From 3f619478f796eddbba6e39502fe941b285dd97b1 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 20:00:34 +0200 Subject: Adding upstream version 1:10.11.6. Signed-off-by: Daniel Baumann --- sql/CMakeLists.txt | 531 + sql/MSG00001.bin | Bin 0 -> 36 bytes sql/add_errmsg | 17 + sql/authors.h | 195 + sql/backup.cc | 654 + sql/backup.h | 47 + sql/bounded_queue.h | 196 + sql/client_settings.h | 53 + sql/compat56.cc | 465 + sql/compat56.h | 55 + sql/contributors.h | 65 + sql/create_options.cc | 879 + sql/create_options.h | 103 + sql/create_tmp_table.h | 80 + sql/cset_narrowing.cc | 35 + sql/cset_narrowing.h | 143 + sql/custom_conf.h | 28 + sql/datadict.cc | 273 + sql/datadict.h | 52 + sql/ddl_log.cc | 3587 ++++ sql/ddl_log.h | 358 + sql/debug.cc | 88 + sql/debug.h | 39 + sql/debug_sync.cc | 1835 ++ sql/debug_sync.h | 55 + sql/derived_handler.cc | 113 + sql/derived_handler.h | 85 + sql/derror.cc | 391 + sql/derror.h | 24 + sql/des_key_file.cc | 106 + sql/des_key_file.h | 40 + sql/discover.cc | 274 + sql/discover.h | 39 + sql/encryption.cc | 246 + sql/event_data_objects.cc | 1619 ++ sql/event_data_objects.h | 207 + sql/event_db_repository.cc | 1217 ++ sql/event_db_repository.h | 131 + sql/event_parse_data.cc | 587 + sql/event_parse_data.h | 132 + sql/event_queue.cc | 848 + sql/event_queue.h | 135 + sql/event_scheduler.cc | 842 + sql/event_scheduler.h | 157 + sql/events.cc | 1327 ++ sql/events.h | 164 + sql/field.cc | 11730 +++++++++++++ sql/field.h | 5941 +++++++ sql/field_comp.cc | 154 + sql/field_comp.h | 33 + sql/field_conv.cc | 922 + sql/filesort.cc | 3110 ++++ sql/filesort.h | 245 + sql/filesort_utils.cc | 188 + sql/filesort_utils.h | 275 + sql/gcalc_slicescan.cc | 2015 +++ sql/gcalc_slicescan.h | 607 + sql/gcalc_tools.cc | 1471 ++ sql/gcalc_tools.h | 359 + sql/gen_lex_hash.cc | 482 + sql/gen_lex_token.cc | 363 + sql/gen_win_tzname_data.ps1 | 12 + sql/gen_yy_files.cmake | 42 + sql/grant.cc | 108 + sql/grant.h | 99 + sql/group_by_handler.cc | 145 + sql/group_by_handler.h | 108 + sql/gstream.cc | 147 + sql/gstream.h | 92 + sql/ha_handler_stats.h | 59 + sql/ha_partition.cc | 12261 +++++++++++++ sql/ha_partition.h | 1654 ++ sql/ha_sequence.cc | 484 + sql/ha_sequence.h | 165 + sql/handle_connections_win.cc | 657 + sql/handle_connections_win.h | 21 + sql/handler.cc | 8789 ++++++++++ sql/handler.h | 5426 ++++++ sql/hash_filo.cc | 33 + sql/hash_filo.h | 214 + sql/hostname.cc | 986 ++ sql/hostname.h | 183 + sql/init.cc | 47 + sql/init.h | 21 + sql/innodb_priv.h | 34 + sql/item.cc | 11025 ++++++++++++ sql/item.h | 7957 +++++++++ sql/item_buff.cc | 252 + sql/item_cmpfunc.cc | 7946 +++++++++ sql/item_cmpfunc.h | 3840 ++++ sql/item_create.cc | 6143 +++++++ sql/item_create.h | 380 + sql/item_func.cc | 7309 ++++++++ sql/item_func.h | 4240 +++++ sql/item_geofunc.cc | 4082 +++++ sql/item_geofunc.h | 1288 ++ sql/item_jsonfunc.cc | 4738 +++++ sql/item_jsonfunc.h | 799 + sql/item_row.cc | 201 + sql/item_row.h | 156 + sql/item_strfunc.cc | 5974 +++++++ sql/item_strfunc.h | 2288 +++ sql/item_subselect.cc | 7028 ++++++++ sql/item_subselect.h | 1551 ++ sql/item_sum.cc | 4600 +++++ sql/item_sum.h | 2101 +++ sql/item_timefunc.cc | 3911 +++++ sql/item_timefunc.h | 2033 +++ sql/item_vers.cc | 196 + sql/item_vers.h | 150 + sql/item_windowfunc.cc | 578 + sql/item_windowfunc.h | 1398 ++ sql/item_xmlfunc.cc | 3130 ++++ sql/item_xmlfunc.h | 165 + sql/json_table.cc | 1478 ++ sql/json_table.h | 294 + sql/key.cc | 903 + sql/key.h | 44 + sql/keycaches.cc | 236 + sql/keycaches.h | 57 + sql/lex.h | 807 + sql/lex_charset.cc | 775 + sql/lex_charset.h | 802 + sql/lex_string.h | 165 + sql/lex_symbol.h | 48 + sql/lock.cc | 1259 ++ sql/lock.h | 54 + sql/log.cc | 12168 +++++++++++++ sql/log.h | 1271 ++ sql/log_event.cc | 4182 +++++ sql/log_event.h | 5931 +++++++ sql/log_event_client.cc | 4011 +++++ sql/log_event_old.cc | 2749 +++ sql/log_event_old.h | 569 + sql/log_event_server.cc | 9087 ++++++++++ sql/log_slow.h | 61 + sql/main.cc | 36 + sql/mariadb.h | 30 + sql/mdl.cc | 3341 ++++ sql/mdl.h | 1154 ++ sql/mem_root_array.h | 245 + sql/message.h | 54 + sql/message.mc | 15 + sql/message.rc | 2 + sql/mf_iocache.cc | 97 + sql/mf_iocache_encr.cc | 275 + sql/multi_range_read.cc | 2131 +++ sql/multi_range_read.h | 670 + sql/my_apc.cc | 239 + sql/my_apc.h | 161 + sql/my_decimal.cc | 437 + sql/my_decimal.h | 550 + sql/my_json_writer.cc | 496 + sql/my_json_writer.h | 793 + sql/myskel.m4.in | 13 + sql/mysql_install_db.cc | 983 ++ sql/mysql_upgrade_service.cc | 613 + sql/mysqld.cc | 9884 +++++++++++ sql/mysqld.h | 993 ++ sql/mysqld_suffix.h | 34 + sql/net_serv.cc | 1451 ++ sql/opt_histogram_json.cc | 1198 ++ sql/opt_histogram_json.h | 147 + sql/opt_index_cond_pushdown.cc | 442 + sql/opt_range.cc | 16784 ++++++++++++++++++ sql/opt_range.h | 2013 +++ sql/opt_range_mrr.cc | 408 + sql/opt_split.cc | 1415 ++ sql/opt_subselect.cc | 7288 ++++++++ sql/opt_subselect.h | 418 + sql/opt_sum.cc | 1096 ++ sql/opt_table_elimination.cc | 2157 +++ sql/opt_trace.cc | 770 + sql/opt_trace.h | 218 + sql/opt_trace_context.h | 135 + sql/parse_file.cc | 1004 ++ sql/parse_file.h | 122 + sql/partition_element.h | 178 + sql/partition_info.cc | 2935 ++++ sql/partition_info.h | 572 + sql/password.c | 524 + sql/plistsort.c | 166 + sql/privilege.h | 756 + sql/procedure.cc | 102 + sql/procedure.h | 193 + sql/protocol.cc | 1991 +++ sql/protocol.h | 332 + sql/proxy_protocol.cc | 584 + sql/proxy_protocol.h | 19 + sql/records.cc | 861 + sql/records.h | 104 + sql/repl_failsafe.cc | 250 + sql/repl_failsafe.h | 43 + sql/replication.h | 567 + sql/rowid_filter.cc | 687 + sql/rowid_filter.h | 485 + sql/rpl_constants.h | 112 + sql/rpl_filter.cc | 939 + sql/rpl_filter.h | 161 + sql/rpl_gtid.cc | 4062 +++++ sql/rpl_gtid.h | 936 + sql/rpl_injector.cc | 197 + sql/rpl_injector.h | 316 + sql/rpl_mi.cc | 2077 +++ sql/rpl_mi.h | 475 + sql/rpl_parallel.cc | 3453 ++++ sql/rpl_parallel.h | 479 + sql/rpl_record.cc | 511 + sql/rpl_record.h | 43 + sql/rpl_record_old.cc | 199 + sql/rpl_record_old.h | 35 + sql/rpl_reporting.cc | 87 + sql/rpl_reporting.h | 133 + sql/rpl_rli.cc | 2642 +++ sql/rpl_rli.h | 1054 ++ sql/rpl_tblmap.cc | 182 + sql/rpl_tblmap.h | 112 + sql/rpl_utility.cc | 344 + sql/rpl_utility.h | 309 + sql/rpl_utility_server.cc | 1187 ++ sql/scheduler.cc | 142 + sql/scheduler.h | 99 + sql/select_handler.cc | 173 + sql/select_handler.h | 86 + sql/semisync.cc | 32 + sql/semisync.h | 73 + sql/semisync_master.cc | 1419 ++ sql/semisync_master.h | 712 + sql/semisync_master_ack_receiver.cc | 303 + sql/semisync_master_ack_receiver.h | 240 + sql/semisync_slave.cc | 275 + sql/semisync_slave.h | 116 + sql/service_wsrep.cc | 416 + sql/session_tracker.cc | 1312 ++ sql/session_tracker.h | 498 + sql/set_var.cc | 1553 ++ sql/set_var.h | 489 + sql/share/CMakeLists.txt | 61 + sql/share/charsets/Index.xml | 736 + sql/share/charsets/README | 39 + sql/share/charsets/armscii8.xml | 139 + sql/share/charsets/ascii.xml | 139 + sql/share/charsets/cp1250.xml | 183 + sql/share/charsets/cp1251.xml | 214 + sql/share/charsets/cp1256.xml | 142 + sql/share/charsets/cp1257.xml | 228 + sql/share/charsets/cp850.xml | 139 + sql/share/charsets/cp852.xml | 139 + sql/share/charsets/cp866.xml | 142 + sql/share/charsets/dec8.xml | 140 + sql/share/charsets/geostd8.xml | 139 + sql/share/charsets/greek.xml | 144 + sql/share/charsets/hebrew.xml | 140 + sql/share/charsets/hp8.xml | 140 + sql/share/charsets/keybcs2.xml | 140 + sql/share/charsets/koi8r.xml | 139 + sql/share/charsets/koi8u.xml | 140 + sql/share/charsets/languages.html | 274 + sql/share/charsets/latin1.xml | 253 + sql/share/charsets/latin2.xml | 186 + sql/share/charsets/latin5.xml | 139 + sql/share/charsets/latin7.xml | 187 + sql/share/charsets/macce.xml | 207 + sql/share/charsets/macroman.xml | 200 + sql/share/charsets/swe7.xml | 141 + sql/share/errmsg-utf8.txt | 10761 ++++++++++++ sql/signal_handler.cc | 373 + sql/slave.cc | 8505 +++++++++ sql/slave.h | 321 + sql/sp.cc | 3097 ++++ sql/sp.h | 675 + sql/sp_cache.cc | 323 + sql/sp_cache.h | 66 + sql/sp_head.cc | 5607 ++++++ sql/sp_head.h | 2188 +++ sql/sp_pcontext.cc | 742 + sql/sp_pcontext.h | 808 + sql/sp_rcontext.cc | 909 + sql/sp_rcontext.h | 410 + sql/spatial.cc | 3804 ++++ sql/spatial.h | 655 + sql/sql_acl.cc | 15107 ++++++++++++++++ sql/sql_acl.h | 374 + sql/sql_acl_getsort.ic | 212 + sql/sql_admin.cc | 1656 ++ sql/sql_admin.h | 117 + sql/sql_alloc.h | 44 + sql/sql_alter.cc | 728 + sql/sql_alter.h | 538 + sql/sql_analyse.cc | 1240 ++ sql/sql_analyse.h | 367 + sql/sql_analyze_stmt.cc | 122 + sql/sql_analyze_stmt.h | 479 + sql/sql_array.h | 298 + sql/sql_audit.cc | 482 + sql/sql_audit.h | 441 + sql/sql_base.cc | 9738 +++++++++++ sql/sql_base.h | 678 + sql/sql_basic_types.h | 339 + sql/sql_binlog.cc | 471 + sql/sql_binlog.h | 23 + sql/sql_bitmap.h | 314 + sql/sql_bootstrap.cc | 159 + sql/sql_bootstrap.h | 55 + sql/sql_builtin.cc.in | 46 + sql/sql_cache.cc | 5330 ++++++ sql/sql_cache.h | 612 + sql/sql_callback.h | 42 + sql/sql_class.cc | 8505 +++++++++ sql/sql_class.h | 8144 +++++++++ sql/sql_client.cc | 44 + sql/sql_cmd.h | 279 + sql/sql_connect.cc | 1580 ++ sql/sql_connect.h | 123 + sql/sql_const.h | 302 + sql/sql_crypt.cc | 77 + sql/sql_crypt.h | 45 + sql/sql_cte.cc | 1738 ++ sql/sql_cte.h | 551 + sql/sql_cursor.cc | 458 + sql/sql_cursor.h | 74 + sql/sql_db.cc | 2165 +++ sql/sql_db.h | 56 + sql/sql_debug.h | 168 + sql/sql_delete.cc | 1677 ++ sql/sql_delete.h | 35 + sql/sql_derived.cc | 1675 ++ sql/sql_derived.h | 33 + sql/sql_digest.cc | 688 + sql/sql_digest.h | 129 + sql/sql_digest_stream.h | 51 + sql/sql_do.cc | 51 + sql/sql_do.h | 26 + sql/sql_error.cc | 1027 ++ sql/sql_error.h | 1336 ++ sql/sql_explain.cc | 2845 +++ sql/sql_explain.h | 1064 ++ sql/sql_expression_cache.cc | 344 + sql/sql_expression_cache.h | 164 + sql/sql_get_diagnostics.cc | 347 + sql/sql_get_diagnostics.h | 319 + sql/sql_handler.cc | 1292 ++ sql/sql_handler.h | 86 + sql/sql_help.cc | 1088 ++ sql/sql_help.h | 30 + sql/sql_hset.h | 114 + sql/sql_i_s.h | 349 + sql/sql_insert.cc | 5345 ++++++ sql/sql_insert.h | 54 + sql/sql_join_cache.cc | 4834 +++++ sql/sql_join_cache.h | 1455 ++ sql/sql_lex.cc | 11881 +++++++++++++ sql/sql_lex.h | 5190 ++++++ sql/sql_lifo_buffer.h | 359 + sql/sql_limit.h | 101 + sql/sql_list.cc | 68 + sql/sql_list.h | 874 + sql/sql_load.cc | 2099 +++ sql/sql_load.h | 34 + sql/sql_locale.cc | 3581 ++++ sql/sql_locale.h | 79 + sql/sql_manager.cc | 164 + sql/sql_manager.h | 23 + sql/sql_mode.cc | 34 + sql/sql_mode.h | 162 + sql/sql_parse.cc | 10462 +++++++++++ sql/sql_parse.h | 191 + sql/sql_partition.cc | 9195 ++++++++++ sql/sql_partition.h | 330 + sql/sql_partition_admin.cc | 1053 ++ sql/sql_partition_admin.h | 262 + sql/sql_plist.h | 297 + sql/sql_plugin.cc | 4534 +++++ sql/sql_plugin.h | 206 + sql/sql_plugin_compat.h | 65 + sql/sql_plugin_services.inl | 358 + sql/sql_prepare.cc | 6510 +++++++ sql/sql_prepare.h | 358 + sql/sql_priv.h | 434 + sql/sql_profile.cc | 691 + sql/sql_profile.h | 336 + sql/sql_reload.cc | 656 + sql/sql_reload.h | 26 + sql/sql_rename.cc | 549 + sql/sql_rename.h | 25 + sql/sql_repl.cc | 4851 +++++ sql/sql_repl.h | 80 + sql/sql_schema.cc | 141 + sql/sql_schema.h | 71 + sql/sql_select.cc | 32034 ++++++++++++++++++++++++++++++++++ sql/sql_select.h | 2595 +++ sql/sql_sequence.cc | 1046 ++ sql/sql_sequence.h | 169 + sql/sql_servers.cc | 1428 ++ sql/sql_servers.h | 52 + sql/sql_show.cc | 10669 +++++++++++ sql/sql_show.h | 268 + sql/sql_signal.cc | 480 + sql/sql_signal.h | 123 + sql/sql_sort.h | 717 + sql/sql_state.c | 55 + sql/sql_statistics.cc | 4493 +++++ sql/sql_statistics.h | 625 + sql/sql_string.cc | 1284 ++ sql/sql_string.h | 1280 ++ sql/sql_table.cc | 12720 ++++++++++++++ sql/sql_table.h | 223 + sql/sql_test.cc | 715 + sql/sql_test.h | 41 + sql/sql_time.cc | 1394 ++ sql/sql_time.h | 190 + sql/sql_trigger.cc | 2810 +++ sql/sql_trigger.h | 370 + sql/sql_truncate.cc | 585 + sql/sql_truncate.h | 71 + sql/sql_tvc.cc | 1209 ++ sql/sql_tvc.h | 76 + sql/sql_type.cc | 9457 ++++++++++ sql/sql_type.h | 7714 ++++++++ sql/sql_type_fixedbin.h | 1921 ++ sql/sql_type_fixedbin_storage.h | 171 + sql/sql_type_geom.cc | 973 ++ sql/sql_type_geom.h | 434 + sql/sql_type_int.h | 365 + sql/sql_type_json.cc | 243 + sql/sql_type_json.h | 165 + sql/sql_type_real.h | 47 + sql/sql_type_string.cc | 104 + sql/sql_type_string.h | 48 + sql/sql_udf.cc | 750 + sql/sql_udf.h | 169 + sql/sql_union.cc | 2902 +++ sql/sql_union.h | 29 + sql/sql_update.cc | 3142 ++++ sql/sql_update.h | 44 + sql/sql_view.cc | 2383 +++ sql/sql_view.h | 71 + sql/sql_window.cc | 3362 ++++ sql/sql_window.h | 260 + sql/sql_yacc.yy | 19510 +++++++++++++++++++++ sql/strfunc.cc | 410 + sql/strfunc.h | 49 + sql/structs.h | 1040 ++ sql/sys_vars.cc | 7030 ++++++++ sql/sys_vars.inl | 2820 +++ sql/sys_vars_shared.h | 87 + sql/table.cc | 10579 +++++++++++ sql/table.h | 3519 ++++ sql/table_cache.cc | 1315 ++ sql/table_cache.h | 117 + sql/temporary_tables.cc | 1595 ++ sql/thr_malloc.cc | 98 + sql/thr_malloc.h | 28 + sql/thread_cache.h | 210 + sql/thread_pool_info.cc | 364 + sql/threadpool.h | 166 + sql/threadpool_common.cc | 636 + sql/threadpool_generic.cc | 1759 ++ sql/threadpool_generic.h | 157 + sql/threadpool_win.cc | 447 + sql/threadpool_winsockets.cc | 268 + sql/threadpool_winsockets.h | 80 + sql/transaction.cc | 728 + sql/transaction.h | 44 + sql/tzfile.h | 142 + sql/tztime.cc | 3094 ++++ sql/tztime.h | 93 + sql/udf_example.c | 1292 ++ sql/udf_example.def | 36 + sql/uniques.cc | 833 + sql/uniques.h | 110 + sql/unireg.cc | 1259 ++ sql/unireg.h | 228 + sql/upgrade_conf_file.cc | 318 + sql/vers_string.h | 84 + sql/win_tzname_data.h | 140 + sql/winmain.cc | 373 + sql/winservice.c | 338 + sql/winservice.h | 212 + sql/wsrep_allowlist_service.cc | 56 + sql/wsrep_allowlist_service.h | 29 + sql/wsrep_applier.cc | 233 + sql/wsrep_applier.h | 44 + sql/wsrep_binlog.cc | 400 + sql/wsrep_binlog.h | 103 + sql/wsrep_check_opts.cc | 102 + sql/wsrep_client_service.cc | 380 + sql/wsrep_client_service.h | 74 + sql/wsrep_client_state.h | 47 + sql/wsrep_condition_variable.h | 53 + sql/wsrep_dummy.cc | 169 + sql/wsrep_high_priority_service.cc | 771 + sql/wsrep_high_priority_service.h | 134 + sql/wsrep_mutex.h | 51 + sql/wsrep_mysqld.cc | 3913 +++++ sql/wsrep_mysqld.h | 629 + sql/wsrep_mysqld_c.h | 30 + sql/wsrep_notify.cc | 105 + sql/wsrep_on.h | 58 + sql/wsrep_plugin.cc | 53 + sql/wsrep_priv.h | 43 + sql/wsrep_schema.cc | 1701 ++ sql/wsrep_schema.h | 172 + sql/wsrep_server_service.cc | 411 + sql/wsrep_server_service.h | 102 + sql/wsrep_server_state.cc | 99 + sql/wsrep_server_state.h | 83 + sql/wsrep_sst.cc | 2161 +++ sql/wsrep_sst.h | 107 + sql/wsrep_status.cc | 60 + sql/wsrep_status.h | 62 + sql/wsrep_storage_service.cc | 206 + sql/wsrep_storage_service.h | 49 + sql/wsrep_thd.cc | 586 + sql/wsrep_thd.h | 325 + sql/wsrep_trans_observer.h | 602 + sql/wsrep_types.h | 31 + sql/wsrep_utils.cc | 607 + sql/wsrep_utils.h | 447 + sql/wsrep_var.cc | 1132 ++ sql/wsrep_var.h | 120 + sql/wsrep_xid.cc | 254 + sql/wsrep_xid.h | 39 + sql/xa.cc | 1171 ++ sql/xa.h | 59 + 525 files changed, 713763 insertions(+) create mode 100644 sql/CMakeLists.txt create mode 100644 sql/MSG00001.bin create mode 100755 sql/add_errmsg create mode 100644 sql/authors.h create mode 100644 sql/backup.cc create mode 100644 sql/backup.h create mode 100644 sql/bounded_queue.h create mode 100644 sql/client_settings.h create mode 100644 sql/compat56.cc create mode 100644 sql/compat56.h create mode 100644 sql/contributors.h create mode 100644 sql/create_options.cc create mode 100644 sql/create_options.h create mode 100644 sql/create_tmp_table.h create mode 100644 sql/cset_narrowing.cc create mode 100644 sql/cset_narrowing.h create mode 100644 sql/custom_conf.h create mode 100644 sql/datadict.cc create mode 100644 sql/datadict.h create mode 100644 sql/ddl_log.cc create mode 100644 sql/ddl_log.h create mode 100644 sql/debug.cc create mode 100644 sql/debug.h create mode 100644 sql/debug_sync.cc create mode 100644 sql/debug_sync.h create mode 100644 sql/derived_handler.cc create mode 100644 sql/derived_handler.h create mode 100644 sql/derror.cc create mode 100644 sql/derror.h create mode 100644 sql/des_key_file.cc create mode 100644 sql/des_key_file.h create mode 100644 sql/discover.cc create mode 100644 sql/discover.h create mode 100644 sql/encryption.cc create mode 100644 sql/event_data_objects.cc create mode 100644 sql/event_data_objects.h create mode 100644 sql/event_db_repository.cc create mode 100644 sql/event_db_repository.h create mode 100644 sql/event_parse_data.cc create mode 100644 sql/event_parse_data.h create mode 100644 sql/event_queue.cc create mode 100644 sql/event_queue.h create mode 100644 sql/event_scheduler.cc create mode 100644 sql/event_scheduler.h create mode 100644 sql/events.cc create mode 100644 sql/events.h create mode 100644 sql/field.cc create mode 100644 sql/field.h create mode 100644 sql/field_comp.cc create mode 100644 sql/field_comp.h create mode 100644 sql/field_conv.cc create mode 100644 sql/filesort.cc create mode 100644 sql/filesort.h create mode 100644 sql/filesort_utils.cc create mode 100644 sql/filesort_utils.h create mode 100644 sql/gcalc_slicescan.cc create mode 100644 sql/gcalc_slicescan.h create mode 100644 sql/gcalc_tools.cc create mode 100644 sql/gcalc_tools.h create mode 100644 sql/gen_lex_hash.cc create mode 100644 sql/gen_lex_token.cc create mode 100644 sql/gen_win_tzname_data.ps1 create mode 100644 sql/gen_yy_files.cmake create mode 100644 sql/grant.cc create mode 100644 sql/grant.h create mode 100644 sql/group_by_handler.cc create mode 100644 sql/group_by_handler.h create mode 100644 sql/gstream.cc create mode 100644 sql/gstream.h create mode 100644 sql/ha_handler_stats.h create mode 100644 sql/ha_partition.cc create mode 100644 sql/ha_partition.h create mode 100644 sql/ha_sequence.cc create mode 100644 sql/ha_sequence.h create mode 100644 sql/handle_connections_win.cc create mode 100644 sql/handle_connections_win.h create mode 100644 sql/handler.cc create mode 100644 sql/handler.h create mode 100644 sql/hash_filo.cc create mode 100644 sql/hash_filo.h create mode 100644 sql/hostname.cc create mode 100644 sql/hostname.h create mode 100644 sql/init.cc create mode 100644 sql/init.h create mode 100644 sql/innodb_priv.h create mode 100644 sql/item.cc create mode 100644 sql/item.h create mode 100644 sql/item_buff.cc create mode 100644 sql/item_cmpfunc.cc create mode 100644 sql/item_cmpfunc.h create mode 100644 sql/item_create.cc create mode 100644 sql/item_create.h create mode 100644 sql/item_func.cc create mode 100644 sql/item_func.h create mode 100644 sql/item_geofunc.cc create mode 100644 sql/item_geofunc.h create mode 100644 sql/item_jsonfunc.cc create mode 100644 sql/item_jsonfunc.h create mode 100644 sql/item_row.cc create mode 100644 sql/item_row.h create mode 100644 sql/item_strfunc.cc create mode 100644 sql/item_strfunc.h create mode 100644 sql/item_subselect.cc create mode 100644 sql/item_subselect.h create mode 100644 sql/item_sum.cc create mode 100644 sql/item_sum.h create mode 100644 sql/item_timefunc.cc create mode 100644 sql/item_timefunc.h create mode 100644 sql/item_vers.cc create mode 100644 sql/item_vers.h create mode 100644 sql/item_windowfunc.cc create mode 100644 sql/item_windowfunc.h create mode 100644 sql/item_xmlfunc.cc create mode 100644 sql/item_xmlfunc.h create mode 100644 sql/json_table.cc create mode 100644 sql/json_table.h create mode 100644 sql/key.cc create mode 100644 sql/key.h create mode 100644 sql/keycaches.cc create mode 100644 sql/keycaches.h create mode 100644 sql/lex.h create mode 100644 sql/lex_charset.cc create mode 100644 sql/lex_charset.h create mode 100644 sql/lex_string.h create mode 100644 sql/lex_symbol.h create mode 100644 sql/lock.cc create mode 100644 sql/lock.h create mode 100644 sql/log.cc create mode 100644 sql/log.h create mode 100644 sql/log_event.cc create mode 100644 sql/log_event.h create mode 100644 sql/log_event_client.cc create mode 100644 sql/log_event_old.cc create mode 100644 sql/log_event_old.h create mode 100644 sql/log_event_server.cc create mode 100644 sql/log_slow.h create mode 100644 sql/main.cc create mode 100644 sql/mariadb.h create mode 100644 sql/mdl.cc create mode 100644 sql/mdl.h create mode 100644 sql/mem_root_array.h create mode 100644 sql/message.h create mode 100644 sql/message.mc create mode 100644 sql/message.rc create mode 100644 sql/mf_iocache.cc create mode 100644 sql/mf_iocache_encr.cc create mode 100644 sql/multi_range_read.cc create mode 100644 sql/multi_range_read.h create mode 100644 sql/my_apc.cc create mode 100644 sql/my_apc.h create mode 100644 sql/my_decimal.cc create mode 100644 sql/my_decimal.h create mode 100644 sql/my_json_writer.cc create mode 100644 sql/my_json_writer.h create mode 100644 sql/myskel.m4.in create mode 100644 sql/mysql_install_db.cc create mode 100644 sql/mysql_upgrade_service.cc create mode 100644 sql/mysqld.cc create mode 100644 sql/mysqld.h create mode 100644 sql/mysqld_suffix.h create mode 100644 sql/net_serv.cc create mode 100644 sql/opt_histogram_json.cc create mode 100644 sql/opt_histogram_json.h create mode 100644 sql/opt_index_cond_pushdown.cc create mode 100644 sql/opt_range.cc create mode 100644 sql/opt_range.h create mode 100644 sql/opt_range_mrr.cc create mode 100644 sql/opt_split.cc create mode 100644 sql/opt_subselect.cc create mode 100644 sql/opt_subselect.h create mode 100644 sql/opt_sum.cc create mode 100644 sql/opt_table_elimination.cc create mode 100644 sql/opt_trace.cc create mode 100644 sql/opt_trace.h create mode 100644 sql/opt_trace_context.h create mode 100644 sql/parse_file.cc create mode 100644 sql/parse_file.h create mode 100644 sql/partition_element.h create mode 100644 sql/partition_info.cc create mode 100644 sql/partition_info.h create mode 100644 sql/password.c create mode 100644 sql/plistsort.c create mode 100644 sql/privilege.h create mode 100644 sql/procedure.cc create mode 100644 sql/procedure.h create mode 100644 sql/protocol.cc create mode 100644 sql/protocol.h create mode 100644 sql/proxy_protocol.cc create mode 100644 sql/proxy_protocol.h create mode 100644 sql/records.cc create mode 100644 sql/records.h create mode 100644 sql/repl_failsafe.cc create mode 100644 sql/repl_failsafe.h create mode 100644 sql/replication.h create mode 100644 sql/rowid_filter.cc create mode 100644 sql/rowid_filter.h create mode 100644 sql/rpl_constants.h create mode 100644 sql/rpl_filter.cc create mode 100644 sql/rpl_filter.h create mode 100644 sql/rpl_gtid.cc create mode 100644 sql/rpl_gtid.h create mode 100644 sql/rpl_injector.cc create mode 100644 sql/rpl_injector.h create mode 100644 sql/rpl_mi.cc create mode 100644 sql/rpl_mi.h create mode 100644 sql/rpl_parallel.cc create mode 100644 sql/rpl_parallel.h create mode 100644 sql/rpl_record.cc create mode 100644 sql/rpl_record.h create mode 100644 sql/rpl_record_old.cc create mode 100644 sql/rpl_record_old.h create mode 100644 sql/rpl_reporting.cc create mode 100644 sql/rpl_reporting.h create mode 100644 sql/rpl_rli.cc create mode 100644 sql/rpl_rli.h create mode 100644 sql/rpl_tblmap.cc create mode 100644 sql/rpl_tblmap.h create mode 100644 sql/rpl_utility.cc create mode 100644 sql/rpl_utility.h create mode 100644 sql/rpl_utility_server.cc create mode 100644 sql/scheduler.cc create mode 100644 sql/scheduler.h create mode 100644 sql/select_handler.cc create mode 100644 sql/select_handler.h create mode 100644 sql/semisync.cc create mode 100644 sql/semisync.h create mode 100644 sql/semisync_master.cc create mode 100644 sql/semisync_master.h create mode 100644 sql/semisync_master_ack_receiver.cc create mode 100644 sql/semisync_master_ack_receiver.h create mode 100644 sql/semisync_slave.cc create mode 100644 sql/semisync_slave.h create mode 100644 sql/service_wsrep.cc create mode 100644 sql/session_tracker.cc create mode 100644 sql/session_tracker.h create mode 100644 sql/set_var.cc create mode 100644 sql/set_var.h create mode 100644 sql/share/CMakeLists.txt create mode 100644 sql/share/charsets/Index.xml create mode 100644 sql/share/charsets/README create mode 100644 sql/share/charsets/armscii8.xml create mode 100644 sql/share/charsets/ascii.xml create mode 100644 sql/share/charsets/cp1250.xml create mode 100644 sql/share/charsets/cp1251.xml create mode 100644 sql/share/charsets/cp1256.xml create mode 100644 sql/share/charsets/cp1257.xml create mode 100644 sql/share/charsets/cp850.xml create mode 100644 sql/share/charsets/cp852.xml create mode 100644 sql/share/charsets/cp866.xml create mode 100644 sql/share/charsets/dec8.xml create mode 100644 sql/share/charsets/geostd8.xml create mode 100644 sql/share/charsets/greek.xml create mode 100644 sql/share/charsets/hebrew.xml create mode 100644 sql/share/charsets/hp8.xml create mode 100644 sql/share/charsets/keybcs2.xml create mode 100644 sql/share/charsets/koi8r.xml create mode 100644 sql/share/charsets/koi8u.xml create mode 100644 sql/share/charsets/languages.html create mode 100644 sql/share/charsets/latin1.xml create mode 100644 sql/share/charsets/latin2.xml create mode 100644 sql/share/charsets/latin5.xml create mode 100644 sql/share/charsets/latin7.xml create mode 100644 sql/share/charsets/macce.xml create mode 100644 sql/share/charsets/macroman.xml create mode 100644 sql/share/charsets/swe7.xml create mode 100644 sql/share/errmsg-utf8.txt create mode 100644 sql/signal_handler.cc create mode 100644 sql/slave.cc create mode 100644 sql/slave.h create mode 100644 sql/sp.cc create mode 100644 sql/sp.h create mode 100644 sql/sp_cache.cc create mode 100644 sql/sp_cache.h create mode 100644 sql/sp_head.cc create mode 100644 sql/sp_head.h create mode 100644 sql/sp_pcontext.cc create mode 100644 sql/sp_pcontext.h create mode 100644 sql/sp_rcontext.cc create mode 100644 sql/sp_rcontext.h create mode 100644 sql/spatial.cc create mode 100644 sql/spatial.h create mode 100644 sql/sql_acl.cc create mode 100644 sql/sql_acl.h create mode 100644 sql/sql_acl_getsort.ic create mode 100644 sql/sql_admin.cc create mode 100644 sql/sql_admin.h create mode 100644 sql/sql_alloc.h create mode 100644 sql/sql_alter.cc create mode 100644 sql/sql_alter.h create mode 100644 sql/sql_analyse.cc create mode 100644 sql/sql_analyse.h create mode 100644 sql/sql_analyze_stmt.cc create mode 100644 sql/sql_analyze_stmt.h create mode 100644 sql/sql_array.h create mode 100644 sql/sql_audit.cc create mode 100644 sql/sql_audit.h create mode 100644 sql/sql_base.cc create mode 100644 sql/sql_base.h create mode 100644 sql/sql_basic_types.h create mode 100644 sql/sql_binlog.cc create mode 100644 sql/sql_binlog.h create mode 100644 sql/sql_bitmap.h create mode 100644 sql/sql_bootstrap.cc create mode 100644 sql/sql_bootstrap.h create mode 100644 sql/sql_builtin.cc.in create mode 100644 sql/sql_cache.cc create mode 100644 sql/sql_cache.h create mode 100644 sql/sql_callback.h create mode 100644 sql/sql_class.cc create mode 100644 sql/sql_class.h create mode 100644 sql/sql_client.cc create mode 100644 sql/sql_cmd.h create mode 100644 sql/sql_connect.cc create mode 100644 sql/sql_connect.h create mode 100644 sql/sql_const.h create mode 100644 sql/sql_crypt.cc create mode 100644 sql/sql_crypt.h create mode 100644 sql/sql_cte.cc create mode 100644 sql/sql_cte.h create mode 100644 sql/sql_cursor.cc create mode 100644 sql/sql_cursor.h create mode 100644 sql/sql_db.cc create mode 100644 sql/sql_db.h create mode 100644 sql/sql_debug.h create mode 100644 sql/sql_delete.cc create mode 100644 sql/sql_delete.h create mode 100644 sql/sql_derived.cc create mode 100644 sql/sql_derived.h create mode 100644 sql/sql_digest.cc create mode 100644 sql/sql_digest.h create mode 100644 sql/sql_digest_stream.h create mode 100644 sql/sql_do.cc create mode 100644 sql/sql_do.h create mode 100644 sql/sql_error.cc create mode 100644 sql/sql_error.h create mode 100644 sql/sql_explain.cc create mode 100644 sql/sql_explain.h create mode 100644 sql/sql_expression_cache.cc create mode 100644 sql/sql_expression_cache.h create mode 100644 sql/sql_get_diagnostics.cc create mode 100644 sql/sql_get_diagnostics.h create mode 100644 sql/sql_handler.cc create mode 100644 sql/sql_handler.h create mode 100644 sql/sql_help.cc create mode 100644 sql/sql_help.h create mode 100644 sql/sql_hset.h create mode 100644 sql/sql_i_s.h create mode 100644 sql/sql_insert.cc create mode 100644 sql/sql_insert.h create mode 100644 sql/sql_join_cache.cc create mode 100644 sql/sql_join_cache.h create mode 100644 sql/sql_lex.cc create mode 100644 sql/sql_lex.h create mode 100644 sql/sql_lifo_buffer.h create mode 100644 sql/sql_limit.h create mode 100644 sql/sql_list.cc create mode 100644 sql/sql_list.h create mode 100644 sql/sql_load.cc create mode 100644 sql/sql_load.h create mode 100644 sql/sql_locale.cc create mode 100644 sql/sql_locale.h create mode 100644 sql/sql_manager.cc create mode 100644 sql/sql_manager.h create mode 100644 sql/sql_mode.cc create mode 100644 sql/sql_mode.h create mode 100644 sql/sql_parse.cc create mode 100644 sql/sql_parse.h create mode 100644 sql/sql_partition.cc create mode 100644 sql/sql_partition.h create mode 100644 sql/sql_partition_admin.cc create mode 100644 sql/sql_partition_admin.h create mode 100644 sql/sql_plist.h create mode 100644 sql/sql_plugin.cc create mode 100644 sql/sql_plugin.h create mode 100644 sql/sql_plugin_compat.h create mode 100644 sql/sql_plugin_services.inl create mode 100644 sql/sql_prepare.cc create mode 100644 sql/sql_prepare.h create mode 100644 sql/sql_priv.h create mode 100644 sql/sql_profile.cc create mode 100644 sql/sql_profile.h create mode 100644 sql/sql_reload.cc create mode 100644 sql/sql_reload.h create mode 100644 sql/sql_rename.cc create mode 100644 sql/sql_rename.h create mode 100644 sql/sql_repl.cc create mode 100644 sql/sql_repl.h create mode 100644 sql/sql_schema.cc create mode 100644 sql/sql_schema.h create mode 100644 sql/sql_select.cc create mode 100644 sql/sql_select.h create mode 100644 sql/sql_sequence.cc create mode 100644 sql/sql_sequence.h create mode 100644 sql/sql_servers.cc create mode 100644 sql/sql_servers.h create mode 100644 sql/sql_show.cc create mode 100644 sql/sql_show.h create mode 100644 sql/sql_signal.cc create mode 100644 sql/sql_signal.h create mode 100644 sql/sql_sort.h create mode 100644 sql/sql_state.c create mode 100644 sql/sql_statistics.cc create mode 100644 sql/sql_statistics.h create mode 100644 sql/sql_string.cc create mode 100644 sql/sql_string.h create mode 100644 sql/sql_table.cc create mode 100644 sql/sql_table.h create mode 100644 sql/sql_test.cc create mode 100644 sql/sql_test.h create mode 100644 sql/sql_time.cc create mode 100644 sql/sql_time.h create mode 100644 sql/sql_trigger.cc create mode 100644 sql/sql_trigger.h create mode 100644 sql/sql_truncate.cc create mode 100644 sql/sql_truncate.h create mode 100644 sql/sql_tvc.cc create mode 100644 sql/sql_tvc.h create mode 100644 sql/sql_type.cc create mode 100644 sql/sql_type.h create mode 100644 sql/sql_type_fixedbin.h create mode 100644 sql/sql_type_fixedbin_storage.h create mode 100644 sql/sql_type_geom.cc create mode 100644 sql/sql_type_geom.h create mode 100644 sql/sql_type_int.h create mode 100644 sql/sql_type_json.cc create mode 100644 sql/sql_type_json.h create mode 100644 sql/sql_type_real.h create mode 100644 sql/sql_type_string.cc create mode 100644 sql/sql_type_string.h create mode 100644 sql/sql_udf.cc create mode 100644 sql/sql_udf.h create mode 100644 sql/sql_union.cc create mode 100644 sql/sql_union.h create mode 100644 sql/sql_update.cc create mode 100644 sql/sql_update.h create mode 100644 sql/sql_view.cc create mode 100644 sql/sql_view.h create mode 100644 sql/sql_window.cc create mode 100644 sql/sql_window.h create mode 100644 sql/sql_yacc.yy create mode 100644 sql/strfunc.cc create mode 100644 sql/strfunc.h create mode 100644 sql/structs.h create mode 100644 sql/sys_vars.cc create mode 100644 sql/sys_vars.inl create mode 100644 sql/sys_vars_shared.h create mode 100644 sql/table.cc create mode 100644 sql/table.h create mode 100644 sql/table_cache.cc create mode 100644 sql/table_cache.h create mode 100644 sql/temporary_tables.cc create mode 100644 sql/thr_malloc.cc create mode 100644 sql/thr_malloc.h create mode 100644 sql/thread_cache.h create mode 100644 sql/thread_pool_info.cc create mode 100644 sql/threadpool.h create mode 100644 sql/threadpool_common.cc create mode 100644 sql/threadpool_generic.cc create mode 100644 sql/threadpool_generic.h create mode 100644 sql/threadpool_win.cc create mode 100644 sql/threadpool_winsockets.cc create mode 100644 sql/threadpool_winsockets.h create mode 100644 sql/transaction.cc create mode 100644 sql/transaction.h create mode 100644 sql/tzfile.h create mode 100644 sql/tztime.cc create mode 100644 sql/tztime.h create mode 100644 sql/udf_example.c create mode 100644 sql/udf_example.def create mode 100644 sql/uniques.cc create mode 100644 sql/uniques.h create mode 100644 sql/unireg.cc create mode 100644 sql/unireg.h create mode 100644 sql/upgrade_conf_file.cc create mode 100644 sql/vers_string.h create mode 100644 sql/win_tzname_data.h create mode 100644 sql/winmain.cc create mode 100644 sql/winservice.c create mode 100644 sql/winservice.h create mode 100644 sql/wsrep_allowlist_service.cc create mode 100644 sql/wsrep_allowlist_service.h create mode 100644 sql/wsrep_applier.cc create mode 100644 sql/wsrep_applier.h create mode 100644 sql/wsrep_binlog.cc create mode 100644 sql/wsrep_binlog.h create mode 100644 sql/wsrep_check_opts.cc create mode 100644 sql/wsrep_client_service.cc create mode 100644 sql/wsrep_client_service.h create mode 100644 sql/wsrep_client_state.h create mode 100644 sql/wsrep_condition_variable.h create mode 100644 sql/wsrep_dummy.cc create mode 100644 sql/wsrep_high_priority_service.cc create mode 100644 sql/wsrep_high_priority_service.h create mode 100644 sql/wsrep_mutex.h create mode 100644 sql/wsrep_mysqld.cc create mode 100644 sql/wsrep_mysqld.h create mode 100644 sql/wsrep_mysqld_c.h create mode 100644 sql/wsrep_notify.cc create mode 100644 sql/wsrep_on.h create mode 100644 sql/wsrep_plugin.cc create mode 100644 sql/wsrep_priv.h create mode 100644 sql/wsrep_schema.cc create mode 100644 sql/wsrep_schema.h create mode 100644 sql/wsrep_server_service.cc create mode 100644 sql/wsrep_server_service.h create mode 100644 sql/wsrep_server_state.cc create mode 100644 sql/wsrep_server_state.h create mode 100644 sql/wsrep_sst.cc create mode 100644 sql/wsrep_sst.h create mode 100644 sql/wsrep_status.cc create mode 100644 sql/wsrep_status.h create mode 100644 sql/wsrep_storage_service.cc create mode 100644 sql/wsrep_storage_service.h create mode 100644 sql/wsrep_thd.cc create mode 100644 sql/wsrep_thd.h create mode 100644 sql/wsrep_trans_observer.h create mode 100644 sql/wsrep_types.h create mode 100644 sql/wsrep_utils.cc create mode 100644 sql/wsrep_utils.h create mode 100644 sql/wsrep_var.cc create mode 100644 sql/wsrep_var.h create mode 100644 sql/wsrep_xid.cc create mode 100644 sql/wsrep_xid.h create mode 100644 sql/xa.cc create mode 100644 sql/xa.h (limited to 'sql') diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt new file mode 100644 index 00000000..13d9d02c --- /dev/null +++ b/sql/CMakeLists.txt @@ -0,0 +1,531 @@ +# Copyright (c) 2006, 2014, Oracle and/or its affiliates. +# Copyright (c) 2010, 2022, MariaDB Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA + + +IF(WITH_WSREP AND NOT EMBEDDED_LIBRARY) + SET(WSREP_SOURCES + wsrep_client_service.cc + wsrep_high_priority_service.cc + wsrep_server_service.cc + wsrep_storage_service.cc + wsrep_server_state.cc + wsrep_status.cc + wsrep_allowlist_service.cc + wsrep_utils.cc + wsrep_xid.cc + wsrep_check_opts.cc + wsrep_mysqld.cc + wsrep_notify.cc + wsrep_sst.cc + wsrep_var.cc + wsrep_binlog.cc + wsrep_applier.cc + wsrep_thd.cc + wsrep_schema.cc + wsrep_plugin.cc + service_wsrep.cc + ) + MYSQL_ADD_PLUGIN(wsrep ${WSREP_SOURCES} MANDATORY NOT_EMBEDDED LINK_LIBRARIES wsrep-lib wsrep_api_v26) + IF(VISIBILITY_HIDDEN_FLAG AND TARGET wsrep) + # wsrep_info plugin needs some wsrep symbols from inside mysqld + # we have to remove -fvisibility=hidden from wsrep + GET_TARGET_PROPERTY(f wsrep COMPILE_FLAGS) + STRING(REPLACE "${VISIBILITY_HIDDEN_FLAG}" "" f ${f}) + SET_TARGET_PROPERTIES(wsrep PROPERTIES COMPILE_FLAGS "${f}") + ENDIF() +ELSE() + ADD_LIBRARY(wsrep STATIC wsrep_dummy.cc) + ADD_DEPENDENCIES(wsrep GenError) +ENDIF() + +INCLUDE_DIRECTORIES( +${CMAKE_SOURCE_DIR}/include +${CMAKE_SOURCE_DIR}/sql +${LIBFMT_INCLUDE_DIR} +${PCRE_INCLUDES} +${ZLIB_INCLUDE_DIR} +${SSL_INCLUDE_DIRS} +${CMAKE_BINARY_DIR}/sql +${CMAKE_SOURCE_DIR}/tpool +) + +ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lex_token.h + COMMAND gen_lex_token > lex_token.h + DEPENDS gen_lex_token +) + +FIND_PACKAGE(BISON 2.4) + +ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.yy + ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.yy + COMMAND ${CMAKE_COMMAND} "-DVAL1=ORACLE" "-DVAL2=MARIADB" + "-DOUT1=${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.yy" + "-DOUT2=${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.yy" + "-DIN=${CMAKE_CURRENT_SOURCE_DIR}/sql_yacc.yy" + "-DBISON_VERSION=${BISON_VERSION}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/gen_yy_files.cmake + COMMENT "Building yy_mariadb.yy and yy_oracle.yy from sql_yacc.yy" + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/sql_yacc.yy + ${CMAKE_CURRENT_SOURCE_DIR}/gen_yy_files.cmake +) + +ADD_DEFINITIONS(-DMYSQL_SERVER -DHAVE_EVENT_SCHEDULER) + +IF(SSL_DEFINES) + ADD_DEFINITIONS(${SSL_DEFINES}) +ENDIF() + +SET (SQL_SOURCE + ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.cc + ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.cc + ../sql-common/client.c + cset_narrowing.cc + compat56.cc derror.cc des_key_file.cc + discover.cc ../sql-common/errmsg.c + field.cc field_conv.cc field_comp.cc + filesort_utils.cc + filesort.cc gstream.cc + signal_handler.cc + handler.cc + hostname.cc init.cc item.cc item_buff.cc item_cmpfunc.cc + item_create.cc item_func.cc item_geofunc.cc item_row.cc + item_strfunc.cc item_subselect.cc item_sum.cc item_timefunc.cc + key.cc log.cc lock.cc + log_event.cc log_event_server.cc + rpl_record.cc rpl_reporting.cc + log_event_old.cc rpl_record_old.cc + mf_iocache.cc my_decimal.cc + mysqld.cc net_serv.cc keycaches.cc + ../sql-common/client_plugin.c + opt_range.cc opt_sum.cc + ../sql-common/pack.c parse_file.cc password.c procedure.cc + protocol.cc records.cc repl_failsafe.cc rpl_filter.cc + session_tracker.cc + set_var.cc + slave.cc sp.cc sp_cache.cc sp_head.cc sp_pcontext.cc + sp_rcontext.cc spatial.cc sql_acl.cc sql_analyse.cc sql_base.cc + sql_cache.cc sql_class.cc sql_client.cc sql_crypt.cc + sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc + sql_digest.cc sql_do.cc + sql_error.cc sql_handler.cc sql_get_diagnostics.cc + sql_help.cc sql_insert.cc sql_lex.cc + sql_list.cc sql_load.cc sql_manager.cc + sql_parse.cc sql_bootstrap.cc + sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc + debug_sync.cc debug.cc + sql_repl.cc sql_select.cc sql_show.cc sql_state.c + group_by_handler.cc derived_handler.cc select_handler.cc + sql_statistics.cc sql_string.cc lex_string.h + sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc + ddl_log.cc ddl_log.h + sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc + sql_time.cc tztime.cc unireg.cc item_xmlfunc.cc + uniques.cc + rpl_tblmap.cc sql_binlog.cc event_scheduler.cc + event_data_objects.cc + event_queue.cc event_db_repository.cc + events.cc ../sql-common/my_user.c + partition_info.cc rpl_utility.cc rpl_utility_server.cc + rpl_injector.cc sql_locale.cc + rpl_rli.cc rpl_mi.cc sql_servers.cc sql_audit.cc + sql_connect.cc scheduler.cc sql_partition_admin.cc + sql_profile.cc event_parse_data.cc sql_alter.cc + sql_signal.cc mdl.cc sql_admin.cc + transaction.cc sys_vars.cc sql_truncate.cc datadict.cc + sql_reload.cc + + # added in MariaDB: + grant.cc + sql_explain.cc + sql_analyze_stmt.cc + sql_join_cache.cc + create_options.cc multi_range_read.cc + opt_histogram_json.cc + opt_index_cond_pushdown.cc opt_subselect.cc + opt_table_elimination.cc sql_expression_cache.cc + gcalc_slicescan.cc gcalc_tools.cc + my_apc.cc mf_iocache_encr.cc item_jsonfunc.cc + my_json_writer.cc + rpl_gtid.cc rpl_parallel.cc + semisync.cc semisync_master.cc semisync_slave.cc + semisync_master_ack_receiver.cc + sql_schema.cc + lex_charset.cc + sql_type.cc sql_mode.cc sql_type_json.cc + sql_type_string.cc + sql_type_geom.cc + item_windowfunc.cc sql_window.cc + sql_cte.cc + item_vers.cc + sql_sequence.cc sql_sequence.h ha_sequence.h + sql_tvc.cc sql_tvc.h + opt_split.cc + rowid_filter.cc rowid_filter.h + opt_trace.cc + table_cache.cc encryption.cc temporary_tables.cc + json_table.cc + proxy_protocol.cc backup.cc xa.cc + ${CMAKE_CURRENT_BINARY_DIR}/lex_hash.h + ${CMAKE_CURRENT_BINARY_DIR}/lex_token.h + ${GEN_SOURCES} + ${MYSYS_LIBWRAP_SOURCE} +) + +MY_CHECK_CXX_COMPILER_FLAG(-Wno-unused-but-set-variable) +IF(have_CXX__Wno_unused_but_set_variable) + ADD_COMPILE_FLAGS(${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.cc + ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.cc + COMPILE_FLAGS "-Wno-unused-but-set-variable") +ENDIF() + +IF ((CMAKE_SYSTEM_NAME MATCHES "Linux" OR + CMAKE_SYSTEM_NAME MATCHES "SunOS" OR + WIN32 OR + HAVE_KQUEUE) + AND (NOT DISABLE_THREADPOOL)) + ADD_DEFINITIONS(-DHAVE_POOL_OF_THREADS) + IF(WIN32) + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc threadpool_winsockets.cc threadpool_winsockets.h) + ENDIF() + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_generic.cc) + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_common.cc) + MYSQL_ADD_PLUGIN(thread_pool_info thread_pool_info.cc DEFAULT STATIC_ONLY NOT_EMBEDDED) +ENDIF() + +IF(WIN32) + SET(SQL_SOURCE ${SQL_SOURCE} handle_connections_win.cc winmain.cc) +ENDIF() + +MYSQL_ADD_PLUGIN(partition ha_partition.cc STORAGE_ENGINE DEFAULT STATIC_ONLY +RECOMPILE_FOR_EMBEDDED) +MYSQL_ADD_PLUGIN(sql_sequence ha_sequence.cc STORAGE_ENGINE MANDATORY STATIC_ONLY +RECOMPILE_FOR_EMBEDDED) + +ADD_LIBRARY(sql STATIC ${SQL_SOURCE}) +MAYBE_DISABLE_IPO(sql) +DTRACE_INSTRUMENT(sql) +TARGET_LINK_LIBRARIES(sql + mysys mysys_ssl dbug strings vio pcre2-8 + tpool + ${LIBWRAP} ${LIBCRYPT} ${CMAKE_DL_LIBS} ${CMAKE_THREAD_LIBS_INIT} + ${SSL_LIBRARIES} + ${LIBSYSTEMD}) + +IF(TARGET pcre2) + ADD_DEPENDENCIES(sql pcre2) +ENDIF() + +FOREACH(se aria partition perfschema sql_sequence wsrep) + # These engines are used directly in sql sources. + IF(TARGET ${se}) + TARGET_LINK_LIBRARIES(sql ${se}) + ENDIF() +ENDFOREACH() + +IF(VISIBILITY_HIDDEN_FLAG AND TARGET partition AND WITH_UBSAN) + # the spider plugin needs some partition symbols from inside mysqld + # when built with ubsan, in which case we need to remove + # -fvisibility=hidden from partition + GET_TARGET_PROPERTY(f partition COMPILE_FLAGS) + STRING(REPLACE "${VISIBILITY_HIDDEN_FLAG}" "" f ${f}) + SET_TARGET_PROPERTIES(partition PROPERTIES COMPILE_FLAGS "${f}") +ENDIF() + +IF(WIN32) + SET(MYSQLD_SOURCE main.cc message.rc) +ELSE() + SET(MYSQLD_SOURCE main.cc ${DTRACE_PROBES_ALL}) +ENDIF() + +IF(MSVC OR CMAKE_SYSTEM_NAME MATCHES AIX) + SET(libs_to_export_symbols sql mysys dbug strings) + # Create shared library of already compiled object + # Export all symbols from selected libraries, to be used + # by plugins + IF(MSVC) + SET(VERSIONINFO_RC ${PROJECT_BINARY_DIR}/versioninfo_dll.rc) + ELSE() + SET(VERSIONINFO_RC) + ENDIF() + ADD_LIBRARY(server SHARED + $ + $ + $ + $ + ${VERSIONINFO_RC} + ) + + # We need to add all dependencies of sql/mysys/dbug/strings + # to link the shared library + SET(all_deps) + FOREACH(lib ${libs_to_export_symbols}) + GET_TARGET_PROPERTY(deps ${lib} LINK_LIBRARIES) + IF(deps) + LIST(APPEND all_deps ${deps}) + ENDIF() + ENDFOREACH() + LIST(REMOVE_DUPLICATES all_deps) + FOREACH(lib ${libs_to_export_symbols}) + LIST(REMOVE_ITEM all_deps ${lib}) + ENDFOREACH() + + TARGET_LINK_LIBRARIES(server PRIVATE + ${all_deps} + sql_builtins + ) + IF(MSVC) + IF(NOT WITHOUT_DYNAMIC_PLUGINS) + SET_TARGET_PROPERTIES(server PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + ENDIF() + ELSE() + SET_TARGET_PROPERTIES(server PROPERTIES AIX_EXPORT_ALL_SYMBOLS TRUE) + ENDIF() + MYSQL_INSTALL_TARGETS(server DESTINATION ${INSTALL_BINDIR} COMPONENT Server) +ENDIF() + +ADD_LIBRARY(sql_builtins STATIC ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc) +TARGET_LINK_LIBRARIES(sql_builtins ${MYSQLD_STATIC_PLUGIN_LIBS}) + +MYSQL_ADD_EXECUTABLE(mariadbd ${MYSQLD_SOURCE} DESTINATION ${INSTALL_SBINDIR} COMPONENT Server) + +IF(APPLE) + # Add CoreServices framework since some dloadable plugins may need it + FIND_LIBRARY(CORESERVICES NAMES CoreServices) + IF(CORESERVICES) + TARGET_LINK_LIBRARIES(mariadbd LINK_PRIVATE ${CORESERVICES}) + ENDIF() +ENDIF() + +IF(NOT WITHOUT_DYNAMIC_PLUGINS) + IF(NOT MSVC) + SET_TARGET_PROPERTIES(mariadbd PROPERTIES ENABLE_EXPORTS TRUE) + ENDIF() + GET_TARGET_PROPERTY(mysqld_link_flags mariadbd LINK_FLAGS) + IF(NOT mysqld_link_flags) + SET(mysqld_link_flags) + ENDIF() +ENDIF(NOT WITHOUT_DYNAMIC_PLUGINS) + +IF(MSVC OR CMAKE_SYSTEM_NAME MATCHES AIX) + TARGET_LINK_LIBRARIES(mariadbd server) +ELSE() + TARGET_LINK_LIBRARIES(mariadbd LINK_PRIVATE sql sql_builtins) +ENDIF() + +# Provide plugins with minimal set of libraries +SET(INTERFACE_LIBS ${LIBRT}) +IF(INTERFACE_LIBS) + TARGET_LINK_LIBRARIES(mariadbd LINK_PUBLIC ${INTERFACE_LIBS}) +ENDIF() + +# On Solaris, some extra effort is required in order to get dtrace probes +# from static libraries +DTRACE_INSTRUMENT_STATIC_LIBS(mariadbd + "sql;mysys;mysys_ssl;${MYSQLD_STATIC_PLUGIN_LIBS}") + + +SET(WITH_MYSQLD_LDFLAGS "" CACHE STRING "Additional linker flags for mysqld") +MARK_AS_ADVANCED(WITH_MYSQLD_LDFLAGS) +IF(WITH_MYSQLD_LDFLAGS) + GET_TARGET_PROPERTY(MYSQLD_LINK_FLAGS mariadbd LINK_FLAGS) + IF(NOT MYSQLD_LINK_FLAGS) + SET(MYSQLD_LINK_FLAGS) + ENDIF() + SET_TARGET_PROPERTIES(mariadbd PROPERTIES LINK_FLAGS + "${MYSQLD_LINK_FLAGS} ${WITH_MYSQLD_LDFLAGS}") +ENDIF() + + +# Handle out-of-source build from source package with possibly broken +# bison. Copy bison output to from source to build directory, if not already +# there +IF (NOT BISON_FOUND) + IF (NOT ${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR}) + FOREACH(file yy_mariadb.cc yy_mariadb.hh yy_oracle.cc yy_oracle.hh) + IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${file} AND (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/${file})) + CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/${file} + ${CMAKE_CURRENT_BINARY_DIR}/${file} COPYONLY) + ENDIF() + ENDFOREACH() + ENDIF() + + IF(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.cc) + # Output files are missing, bail out. + SET(ERRMSG + "Bison (GNU parser generator) is required to build MySQL." + "Please install bison." + ) + IF(WIN32) + SET(ERRMSG ${ERRMSG} + "You can download bison from http://gnuwin32.sourceforge.net/packages/bison.htm " + "Choose 'Complete package, except sources' installation. We recommend to " + "install bison into a directory without spaces, e.g C:\\GnuWin32.") + ENDIF() + MESSAGE(FATAL_ERROR ${ERRMSG}) + ENDIF() +ELSE() + CONFIGURE_FILE(myskel.m4.in myskel.m4) + BISON_TARGET(gen_mariadb_cc_hh ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.yy + ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.cc + COMPILE_FLAGS "-p MYSQL -S ${CMAKE_CURRENT_BINARY_DIR}/myskel.m4") + BISON_TARGET(gen_oracle_cc_hh ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.yy + ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.cc + COMPILE_FLAGS "-p ORA -S ${CMAKE_CURRENT_BINARY_DIR}/myskel.m4") +ENDIF() + +IF(NOT CMAKE_CROSSCOMPILING OR DEFINED CMAKE_CROSSCOMPILING_EMULATOR) + ADD_EXECUTABLE(gen_lex_token gen_lex_token.cc + ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.hh) + ADD_EXECUTABLE(gen_lex_hash gen_lex_hash.cc) +ENDIF() + +ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lex_hash.h + COMMAND gen_lex_hash > lex_hash.h + DEPENDS gen_lex_hash +) + +MYSQL_ADD_EXECUTABLE(mariadb-tzinfo-to-sql tztime.cc) +SET_TARGET_PROPERTIES(mariadb-tzinfo-to-sql PROPERTIES COMPILE_FLAGS "-DTZINFO2SQL") +TARGET_LINK_LIBRARIES(mariadb-tzinfo-to-sql mysys mysys_ssl) + +ADD_CUSTOM_TARGET( + GenServerSource + DEPENDS + ${CMAKE_CURRENT_BINARY_DIR}/lex_hash.h + ${CMAKE_CURRENT_BINARY_DIR}/lex_token.h + ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.cc + ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.cc +) +ADD_DEPENDENCIES(sql GenServerSource) + +IF(TARGET libfmt) + ADD_DEPENDENCIES(sql libfmt) +ENDIF() + +IF(WIN32 OR HAVE_DLOPEN AND NOT DISABLE_SHARED) + ADD_LIBRARY(udf_example MODULE udf_example.c udf_example.def) + SET_TARGET_PROPERTIES(udf_example PROPERTIES PREFIX "") + TARGET_LINK_LIBRARIES(udf_example strings) +ENDIF() + +CONFIGURE_FILE( + ${CMAKE_SOURCE_DIR}/cmake/make_dist.cmake.in + ${CMAKE_BINARY_DIR}/make_dist.cmake @ONLY) + +ADD_CUSTOM_TARGET(dist + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}/make_dist.cmake + DEPENDS ${CMAKE_BINARY_DIR}/sql/yy_mariadb.cc ${CMAKE_BINARY_DIR}/sql/yy_mariadb.hh + DEPENDS ${CMAKE_BINARY_DIR}/sql/yy_oracle.cc ${CMAKE_BINARY_DIR}/sql/yy_oracle.hh + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} +) + +ADD_CUSTOM_TARGET(distclean + COMMAND ${CMAKE_COMMAND} -E echo WARNING: distclean target is not functional + COMMAND ${CMAKE_COMMAND} -E echo Use 'git clean -Xdf' instead + VERBATIM + ) + +# Install initial database (default on windows, optional target elsewhere) +IF(TARGET mariadbd AND (NOT CMAKE_CROSSCOMPILING OR DEFINED CMAKE_CROSSCOMPILING_EMULATOR)) + IF(GENERATOR_IS_MULTI_CONFIG) + SET (CONFIG_PARAM -DCONFIG=${CMAKE_CFG_INTDIR}) + ENDIF() + MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/data) + ADD_CUSTOM_COMMAND( + OUTPUT initdb.dep + COMMAND ${CMAKE_COMMAND} -E remove_directory data + COMMAND ${CMAKE_COMMAND} -E make_directory data + COMMAND ${CMAKE_COMMAND} -E chdir data ${CMAKE_COMMAND} + ${CONFIG_PARAM} + -DTOP_SRCDIR="${CMAKE_BINARY_DIR}" + -DBINDIR="${CMAKE_CURRENT_BINARY_DIR}" + -DMYSQLD_EXECUTABLE="$" + -DCMAKE_CFG_INTDIR="${CMAKE_CFG_INTDIR}" + -P ${CMAKE_SOURCE_DIR}/cmake/create_initial_db.cmake + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/initdb.dep + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ + DEPENDS mariadbd + ) + IF(WIN32) + SET(ALL_ON_WINDOWS ALL) + ELSE() + SET(ALL_ON_WINDOWS) + ENDIF() + ADD_CUSTOM_TARGET(initial_database + ${ALL_ON_WINDOWS} + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/initdb.dep + ) +ENDIF() + +IF(WIN32) + SET(my_bootstrap_sql ${CMAKE_CURRENT_BINARY_DIR}/my_bootstrap.sql) + FILE(TO_NATIVE_PATH ${my_bootstrap_sql} native_outfile) + + # Create bootstrapper SQL script + ADD_CUSTOM_COMMAND(OUTPUT + ${my_bootstrap_sql} + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/scripts + cmd /c copy mysql_system_tables.sql+mysql_system_tables_data.sql+fill_help_tables.sql+mysql_performance_tables.sql+mysql_test_db.sql+mysql_sys_schema.sql ${native_outfile} + DEPENDS + ${CMAKE_SOURCE_DIR}/scripts/mysql_system_tables.sql + ${CMAKE_SOURCE_DIR}/scripts/mysql_system_tables_data.sql + ${CMAKE_SOURCE_DIR}/scripts/fill_help_tables.sql + ${CMAKE_SOURCE_DIR}/scripts/mysql_performance_tables.sql + ${CMAKE_SOURCE_DIR}/scripts/mysql_test_db.sql + ${CMAKE_BINARY_DIR}/scripts/mysql_sys_schema.sql + ) + + ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/mysql_bootstrap_sql.c + COMMAND comp_sql + mysql_bootstrap_sql + ${CMAKE_CURRENT_BINARY_DIR}/my_bootstrap.sql + mysql_bootstrap_sql.c + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS comp_sql ${my_bootstrap_sql} + ) + + MYSQL_ADD_EXECUTABLE(mariadb-install-db + mysql_install_db.cc + ${CMAKE_CURRENT_BINARY_DIR}/mysql_bootstrap_sql.c + password.c + COMPONENT Server + ) + SET_TARGET_PROPERTIES(mariadb-install-db PROPERTIES COMPILE_DEFINITIONS + "INSTALL_PLUGINDIR=${INSTALL_PLUGINDIR};INSTALL_SHAREDIR=${INSTALL_SHAREDIR}") + TARGET_LINK_LIBRARIES(mariadb-install-db mysys mysys_ssl shlwapi) + + ADD_LIBRARY(winservice STATIC winservice.c) + TARGET_LINK_LIBRARIES(winservice shell32) + + MYSQL_ADD_EXECUTABLE(mariadb-upgrade-service + mysql_upgrade_service.cc + upgrade_conf_file.cc + COMPONENT Server) + TARGET_LINK_LIBRARIES(mariadb-upgrade-service mysys winservice) +ENDIF(WIN32) + +IF(NOT WITH_WSREP) + SET(EXCL_WSREP "wsrep_[a-np-z]*.h") +ENDIF() +INSTALL(DIRECTORY . DESTINATION ${INSTALL_INCLUDEDIR}/server/private COMPONENT Development + FILES_MATCHING PATTERN "*.h" + PATTERN share EXCLUDE + PATTERN "${EXCL_WSREP}" EXCLUDE + PATTERN CMakeFiles EXCLUDE) diff --git a/sql/MSG00001.bin b/sql/MSG00001.bin new file mode 100644 index 00000000..5c1cd0ba Binary files /dev/null and b/sql/MSG00001.bin differ diff --git a/sql/add_errmsg b/sql/add_errmsg new file mode 100755 index 00000000..86226926 --- /dev/null +++ b/sql/add_errmsg @@ -0,0 +1,17 @@ +#!/bin/sh + +if test $# -ne 1 +then + echo "Copies # error messages from share/english/errmsg.txt to other message files" + echo "Usage: $0 number_of_messages_to_copy" + exit 1; +fi + +FILE=/tmp/add.$$ +tail -$1 share/english/errmsg-utf8.txt > $FILE +for i in `ls share/*/errmsg-utf8.txt | grep -v english` +do + cat $FILE >> $i +done +rm $FILE + diff --git a/sql/authors.h b/sql/authors.h new file mode 100644 index 00000000..cf0a4c5e --- /dev/null +++ b/sql/authors.h @@ -0,0 +1,195 @@ +#ifndef AUTHORS_INCLUDED +#define AUTHORS_INCLUDED + +/* Copyright (c) 2005, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Structure of the name list */ + +struct show_table_authors_st { + const char *name; + const char *location; + const char *comment; +}; + +/* + Output from "SHOW AUTHORS" + + If you can update it, you get to be in it :) + + Don't be offended if your name is not in here, just add it! + + Active people in the MariaDB are listed first, active people in MySQL + then, not active last. + + Names should be encoded using UTF-8. + + See also https://mariadb.com/kb/en/log-of-mariadb-contributions/ +*/ + +struct show_table_authors_st show_table_authors[]= { + /* Active people on MariaDB */ + { "Michael (Monty) Widenius", "Tusby, Finland", + "Lead developer and main author" }, + { "Sergei Golubchik", "Kerpen, Germany", + "Architect, Full-text search, precision math, plugin framework, merges etc" }, + { "Igor Babaev", "Bellevue, USA", "Optimizer, keycache, core work"}, + { "Sergey Petrunia", "St. Petersburg, Russia", "Optimizer"}, + { "Oleksandr Byelkin", "Lugansk, Ukraine", + "Query Cache (4.0), Subqueries (4.1), Views (5.0)" }, + { "Timour Katchaounov", "Sofia , Bulgaria", "Optimizer"}, + { "Kristian Nielsen", "Copenhagen, Denmark", + "Replication, Async client prototocol, General buildbot stuff" }, + { "Alexander (Bar) Barkov", "Izhevsk, Russia", + "Unicode and character sets" }, + { "Alexey Botchkov (Holyfoot)", "Izhevsk, Russia", + "GIS extensions, embedded server, precision math"}, + { "Daniel Bartholomew", "Raleigh, USA", "MariaDB documentation, Buildbot, releases"}, + { "Colin Charles", "Selangor, Malesia", "MariaDB documentation, talks at a LOT of conferences"}, + { "Sergey Vojtovich", "Izhevsk, Russia", + "initial implementation of plugin architecture, maintained native storage engines (MyISAM, MEMORY, ARCHIVE, etc), rewrite of table cache"}, + { "Vladislav Vaintroub", "Mannheim, Germany", "MariaDB Java connector, new thread pool, Windows optimizations"}, + { "Elena Stepanova", "Sankt Petersburg, Russia", "QA, test cases"}, + { "Georg Richter", "Heidelberg, Germany", "New LGPL C connector, PHP connector"}, + { "Jan Lindström", "Ylämylly, Finland", "Working on InnoDB"}, + { "Lixun Peng", "Hangzhou, China", "Multi Source replication" }, + { "Olivier Bertrand", "Paris, France", "CONNECT storage engine"}, + { "Kentoku Shiba", "Tokyo, Japan", "Spider storage engine, metadata_lock_info Information schema"}, + { "Percona", "CA, USA", "XtraDB, microslow patches, extensions to slow log"}, + { "Vicentiu Ciorbaru", "Bucharest, Romania", "Roles"}, + { "Sudheera Palihakkara", "", "PCRE Regular Expressions" }, + { "Pavel Ivanov", "USA", "Some patches and bug fixes"}, + { "Konstantin Osipov", "Moscow, Russia", + "Prepared statements (4.1), Cursors (5.0), GET_LOCK (10.0)" }, + { "Ian Gilfillan", "South Africa", "MariaDB documentation"}, + { "Federico Razolli", "Italy", "MariaDB documentation Italian translation"}, + { "Vinchen", "Shenzhen, China", "Instant ADD Column for InnoDB, Spider engine optimization, from Tencent Game DBA Team" }, + { "Willhan", "Shenzhen, China", "Big Column Compression, Spider engine optimization, from Tencent Game DBA Team" }, + { "Anders Karlsson", "Ystad, Sweden", "Replication patch for enforcing triggers on slave"}, + { "Otto Kekäläinen", "Tampere, Finland", "Debian packaging, install/upgrade engineering, QA pipelines, documentation"}, + { "Daniel Black", "Canberra, Australia", "Modernising large page support, systemd, and bug fixes"}, + + /* People working on MySQL code base (not NDB) */ + { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" }, + { "Andrei Elkin", "Espoo, Finland", "Replication" }, + { "Dmitri Lenev", "Moscow, Russia", + "Time zones support (4.1), Triggers (5.0)" }, + { "Marc Alff", "Denver, CO, USA", "Signal, Resignal, Performance schema" }, + { "Mikael Ronström", "Stockholm, Sweden", + "NDB Cluster, Partitioning, online alter table" }, + { "Ingo Strüwing", "Berlin, Germany", + "Bug fixing in MyISAM, Merge tables etc" }, + {"Marko Mäkelä", "Helsinki, Finland", "InnoDB core developer"}, + + /* People not active anymore */ + { "David Axmark", "London, England", + "MySQL founder; Small stuff long time ago, Monty ripped it out!" }, + { "Brian (Krow) Aker", "Seattle, WA, USA", + "Architecture, archive, blackhole, federated, bunch of little stuff :)" }, + { "Venu Anuganti", "", "Client/server protocol (4.1)" }, + { "Omer BarNir", "Sunnyvale, CA, USA", + "Testing (sometimes) and general QA stuff" }, + { "John Birrell", "", "Emulation of pthread_mutex() for OS/2" }, + { "Andreas F. Bobak", "", "AGGREGATE extension to user-defined functions" }, + { "Reggie Burnett", "Nashville, TN, USA", "Windows development, Connectors" }, + { "Kent Boortz", "Orebro, Sweden", "Test platform, and general build stuff" }, + { "Tim Bunce", "", "mysqlhotcopy" }, + { "Yves Carlier", "", "mysqlaccess" }, + { "Joshua Chamas", "Cupertino, CA, USA", + "Concurrent insert, extended date syntax" }, + { "Petr Chardin", "Moscow, Russia", + "Instance Manager (5.0), Server log tables (5.1)" }, + { "Wei-Jou Chen", "", "Chinese (Big5) character set" }, + { "Albert Chin-A-Young", "", + "Tru64 port, large file support, better TCP wrappers support" }, + { "Jorge del Conde", "Mexico City, Mexico", "Windows development" }, + { "Antony T. Curtis", "Norwalk, CA, USA", + "Parser, port to OS/2, storage engines and some random stuff" }, + { "Yuri Dario", "", "OS/2 port" }, + { "Patrick Galbraith", "Sharon, NH", "Federated Engine, mysqlslap" }, + { "Lenz Grimmer", "Hamburg, Germany", + "Production (build and release) engineering" }, + { "Nikolay Grishakin", "Austin, TX, USA", "Testing - Server" }, + { "Wei He", "", "Chinese (GBK) character set" }, + { "Eric Herman", "Amsterdam, Netherlands", "Bug fixing - federated" }, + { "Andrey Hristov", "Walldorf, Germany", "Event scheduler (5.1)" }, + { "Alexander (Alexi) Ivanov", "St. Petersburg, Russia", "Replication" }, + { "Mattias Jonsson", "Uppsala, Sweden", "Partitioning" }, + { "Alexander (Salle) Keremidarski", "Sofia, Bulgaria", + "Bug fixing" }, + { "Mats Kindahl", "Storvreta, Sweden", "Replication" }, + { "Serge Kozlov", "Velikie Luki, Russia", "Testing - Cluster" }, + { "Hakan Küçükyılmaz", "Walldorf, Germany", "Testing - Server" }, + { "Matthias Leich", "Berlin, Germany", "Testing - Server" }, + { "Arjen Lentz", "Brisbane, Australia", + "Documentation (2001-2004), Dutch error messages, LOG2()" }, + { "Marc Liyanage", "", "Created Mac OS X packages" }, + { "Kelly Long", "Denver, CO, USA", "Pool Of Threads" }, + { "Zarko Mocnik", "", "Sorting for Slovenian language" }, + { "Per-Erik Martin", "Uppsala, Sweden", "Stored Procedures (5.0)" }, + { "Alexis Mikhailov", "", "User-defined functions" }, + { "Sinisa Milivojevic", "Larnaca, Cyprus", + "UNION (4.0), Subqueries in FROM clause (4.1), many other features" }, + { "Jonathan (Jeb) Miller", "Kyle, TX, USA", + "Testing - Cluster, Replication" }, + { "Elliot Murphy", "Cocoa, FL, USA", "Replication and backup" }, + { "Pekka Nouisiainen", "Stockholm, Sweden", + "NDB Cluster: BLOB support, character set support, ordered indexes" }, + { "Alexander Nozdrin", "Moscow, Russia", + "Bug fixing (Stored Procedures, 5.0)" }, + { "Per Eric Olsson", "", "Testing of dynamic record format" }, + { "Jonas Oreland", "Stockholm, Sweden", + "NDB Cluster, Online Backup, lots of other things" }, + { "Alexander (Sasha) Pachev", "Provo, UT, USA", + "Statement-based replication, SHOW CREATE TABLE, mysql-bench" }, + { "Irena Pancirov", "", "Port to Windows with Borland compiler" }, + { "Jan Pazdziora", "", "Czech sorting order" }, + { "Benjamin Pflugmann", "", + "Extended MERGE storage engine to handle INSERT" }, + { "Igor Romanenko", "", + "mysqldump" }, + { "Tõnu Samuel", "Estonia", + "VIO interface, other miscellaneous features" }, + { "Carsten Segieth (Pino)", "Fredersdorf, Germany", "Testing - Server"}, + { "Martin Sköld", "Stockholm, Sweden", + "NDB Cluster: Unique indexes, integration into MySQL" }, + { "Timothy Smith", "Auckland, New Zealand", + "Dynamic character sets, parts of the build system, libmysqld"}, + { "Miguel Solorzano", "Florianopolis, Santa Catarina, Brazil", + "Windows development, Windows NT service"}, + { "Punita Srivastava", "Austin, TX, USA", "Testing - Merlin"}, + { "Alexey Stroganov (Ranger)", "Lugansk, Ukraine", "Testing - Benchmarks"}, + { "Magnus Svensson", "Öregrund, Sweden", + "NDB Cluster: Integration into MySQL, test framework" }, + { "Zeev Suraski", "", "FROM_UNIXTIME(), ENCRYPT()" }, + { "TAMITO", "", + "The _MB character set macros and UJIS and SJIS character sets" }, + { "Jani Tolonen", "Helsinki, Finland", + "mysqlimport, extensions to command-line clients, PROCEDURE ANALYSE()" }, + { "Lars Thalmann", "Stockholm, Sweden", + "Replication and cluster development" }, + { "Tomas Ulin", "Stockholm, Sweden", + "NDB Cluster: Configuration, installation" }, + { "Gianmassimo Vigazzola", "", "Initial Windows port" }, + { "Sergey Vojtovich", "Izhevsk, Russia", "Plugins infrastructure (5.1)" }, + { "Matt Wagner", "Northfield, MN, USA", "Bug fixing" }, + { "Jim Winstead Jr.", "Los Angeles, CA, USA", "Bug fixing" }, + { "Peter Zaitsev", "Tacoma, WA, USA", + "SHA1(), AES_ENCRYPT(), AES_DECRYPT(), bug fixing" }, + {"Mark Mark Callaghan", "Texas, USA", "Statistics patches"}, + {NULL, NULL, NULL} +}; + +#endif /* AUTHORS_INCLUDED */ diff --git a/sql/backup.cc b/sql/backup.cc new file mode 100644 index 00000000..5ce770c3 --- /dev/null +++ b/sql/backup.cc @@ -0,0 +1,654 @@ +/* Copyright (c) 2018, 2022, MariaDB Corporation. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Implementation of BACKUP STAGE, an interface for external backup tools. + + TODO: + - At backup_start() we call ha_prepare_for_backup() for all active + storage engines. If someone tries to load a new storage engine + that requires prepare_for_backup() for it to work, that storage + engines has to be blocked from loading until backup finishes. + As we currently don't have any loadable storage engine that + requires this and we have not implemented that part. + This can easily be done by adding a + PLUGIN_CANT_BE_LOADED_WHILE_BACKUP_IS_RUNNING flag to + maria_declare_plugin and check this before calling + plugin_initialize() +*/ + +#include "mariadb.h" +#include "sql_class.h" +#include "sql_base.h" // flush_tables +#include "sql_insert.h" // kill_delayed_threads +#include "sql_handler.h" // mysql_ha_cleanup_no_free +#include +#include // strconvert() +#include "debug_sync.h" +#ifdef WITH_WSREP +#include "wsrep_server_state.h" +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ + +static const char *stage_names[]= +{"START", "FLUSH", "BLOCK_DDL", "BLOCK_COMMIT", "END", 0}; + +TYPELIB backup_stage_names= +{ array_elements(stage_names)-1, "", stage_names, 0 }; + +static MDL_ticket *backup_flush_ticket; +static File volatile backup_log= -1; +static int backup_log_error= 0; + +static bool backup_start(THD *thd); +static bool backup_flush(THD *thd); +static bool backup_block_ddl(THD *thd); +static bool backup_block_commit(THD *thd); +static bool start_ddl_logging(); +static void stop_ddl_logging(); + +/** + Run next stage of backup +*/ + +void backup_init() +{ + backup_flush_ticket= 0; + backup_log= -1; + backup_log_error= 0; +} + +bool run_backup_stage(THD *thd, backup_stages stage) +{ + backup_stages next_stage; + DBUG_ENTER("run_backup_stage"); + + if (thd->current_backup_stage == BACKUP_FINISHED) + { + if (stage != BACKUP_START) + { + my_error(ER_BACKUP_NOT_RUNNING, MYF(0)); + DBUG_RETURN(1); + } + next_stage= BACKUP_START; + } + else + { + if ((uint) thd->current_backup_stage >= (uint) stage) + { + my_error(ER_BACKUP_WRONG_STAGE, MYF(0), stage_names[stage], + stage_names[thd->current_backup_stage]); + DBUG_RETURN(1); + } + if (stage == BACKUP_END) + { + /* + If end is given, jump directly to stage end. This is to allow one + to abort backup quickly. + */ + next_stage= stage; + } + else + { + /* Go trough all not used stages until we reach 'stage' */ + next_stage= (backup_stages) ((uint) thd->current_backup_stage + 1); + } + } + + do + { + bool res= false; + backup_stages previous_stage= thd->current_backup_stage; + thd->current_backup_stage= next_stage; + switch (next_stage) { + case BACKUP_START: + if (!(res= backup_start(thd))) + break; + /* Reset backup stage to start for next backup try */ + previous_stage= BACKUP_FINISHED; + break; + case BACKUP_FLUSH: + res= backup_flush(thd); + break; + case BACKUP_WAIT_FOR_FLUSH: + res= backup_block_ddl(thd); + break; + case BACKUP_LOCK_COMMIT: + res= backup_block_commit(thd); + break; + case BACKUP_END: + res= backup_end(thd); + break; + case BACKUP_FINISHED: + DBUG_ASSERT(0); + } + if (res) + { + thd->current_backup_stage= previous_stage; + my_error(ER_BACKUP_STAGE_FAILED, MYF(0), stage_names[(uint) stage]); + DBUG_RETURN(1); + } + next_stage= (backup_stages) ((uint) next_stage + 1); + } while ((uint) next_stage <= (uint) stage); + + DBUG_RETURN(0); +} + + +/** + Start the backup + + - Wait for previous backup to stop running + - Start service to log changed tables (TODO) + - Block purge of redo files (Required at least for Aria) + - An handler can optionally do a checkpoint of all tables, + to speed up the recovery stage of the backup. +*/ + +static bool backup_start(THD *thd) +{ + MDL_request mdl_request; + DBUG_ENTER("backup_start"); + + thd->current_backup_stage= BACKUP_FINISHED; // For next test + if (thd->has_read_only_protection()) + DBUG_RETURN(1); + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(1); + } + + /* this will be reset if this stage fails */ + thd->current_backup_stage= BACKUP_START; + + /* + Wait for old backup to finish and block ddl's so that we can start the + ddl logger + */ + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_BLOCK_DDL, + MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(1); + + if (start_ddl_logging()) + { + thd->mdl_context.release_lock(mdl_request.ticket); + DBUG_RETURN(1); + } + + DBUG_ASSERT(backup_flush_ticket == 0); + backup_flush_ticket= mdl_request.ticket; + + /* Downgrade lock to only block other backups */ + backup_flush_ticket->downgrade_lock(MDL_BACKUP_START); + + ha_prepare_for_backup(); + DBUG_RETURN(0); +} + +/** + backup_flush() + + - FLUSH all changes for not active non transactional tables, except + for statistics and log tables. Close the tables, to ensure they + are marked as closed after backup. + + - BLOCK all NEW write locks for all non transactional tables + (except statistics and log tables). Already granted locks are + not affected (Running statements with non transaction tables will + continue running). + + - The following DDL's doesn't have to be blocked as they can't set + the table in a non consistent state: + CREATE, RENAME, DROP +*/ + +static bool backup_flush(THD *thd) +{ + DBUG_ENTER("backup_flush"); + /* + Lock all non transactional normal tables to be used in new DML's + */ + if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, + MDL_BACKUP_FLUSH, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(1); + + /* + Free unused tables and table shares so that mariabackup knows what + is safe to copy + */ + tc_purge(); + tdc_purge(true); + + DBUG_RETURN(0); +} + +/** + backup_block_ddl() + + - Kill all insert delay handlers, to ensure that all non transactional + tables are closed (can be improved in the future). + + - Close handlers as other threads may wait for these, which can cause + deadlocks. + + - Wait for all statements using write locked non-transactional tables to end. + + - Mark all not used active non transactional tables (except + statistics and log tables) to be closed with + handler->extra(HA_EXTRA_FLUSH) + + - Block TRUNCATE TABLE, CREATE TABLE, DROP TABLE and RENAME + TABLE. Block also start of a new ALTER TABLE and the final rename + phase of ALTER TABLE. Running ALTER TABLES are not blocked. Both normal + and inline ALTER TABLE'S should be blocked when copying is completed but + before final renaming of the tables / new table is activated. + This will probably require a callback from the InnoDB code. +*/ + +/* Retry to get inital lock for 0.1 + 0.5 + 2.25 + 11.25 + 56.25 = 70.35 sec */ +#define MAX_RETRY_COUNT 5 + +static bool backup_block_ddl(THD *thd) +{ + PSI_stage_info org_stage; + uint sleep_time; + DBUG_ENTER("backup_block_ddl"); + + kill_delayed_threads(); + mysql_ha_cleanup_no_free(thd); + + thd->backup_stage(&org_stage); + THD_STAGE_INFO(thd, stage_waiting_for_flush); + /* Wait until all non trans statements has ended */ + if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, + MDL_BACKUP_WAIT_FLUSH, + thd->variables.lock_wait_timeout)) + goto err; + + /* + Remove not used tables from the table share. Flush all changes to + non transaction tables and mark those that are not in use in write + operations as closed. From backup purposes it's not critical if + flush_tables() returns an error. It's ok to continue with next + backup stage even if we got an error. + */ + (void) flush_tables(thd, FLUSH_NON_TRANS_TABLES); + thd->clear_error(); + +#ifdef WITH_WSREP + DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false); + /* + if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock + holder, then do not desync and pause the node from cluster replication. + e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this. + But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set, + we desync the node for BACKUP STAGE because applier threads + bypass backup MDL locks (see MDL_lock::can_grant_lock) + */ + if (WSREP_NNULL(thd)) + { + Wsrep_server_state &server_state= Wsrep_server_state::instance(); + + if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) || + server_state.state() == Wsrep_server_state::s_donor) + { + if (server_state.desync_and_pause().is_undefined()) { + DBUG_RETURN(1); + } + DEBUG_SYNC(thd, "wsrep_backup_stage_after_desync_and_pause"); + thd->wsrep_desynced_backup_stage= true; + } + else + WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used."); + } +#endif /* WITH_WSREP */ + + /* + block new DDL's, in addition to all previous blocks + We didn't do this lock above, as we wanted DDL's to be executed while + we wait for non transactional tables (which may take a while). + + We do this lock in a loop as we can get a deadlock if there are multi-object + ddl statements like + RENAME TABLE t1 TO t2, t3 TO t3 + and the MDL happens in the middle of it. + */ + THD_STAGE_INFO(thd, stage_waiting_for_ddl); + sleep_time= 100; // Start with 0.1 seconds + for (uint i= 0 ; i <= MAX_RETRY_COUNT ; i++) + { + if (!thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, + MDL_BACKUP_WAIT_DDL, + thd->variables.lock_wait_timeout)) + break; + if (thd->get_stmt_da()->sql_errno() != ER_LOCK_DEADLOCK || thd->killed || + i == MAX_RETRY_COUNT) + { + /* + Could be a timeout. Downgrade lock to what is was before this function + was called so that this function can be called again + */ + backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH); + goto err; + } + thd->clear_error(); // Forget the DEADLOCK error + my_sleep(sleep_time); + sleep_time*= 5; // Wait a bit longer next time + } + + /* There can't be anything more that needs to be logged to ddl log */ + THD_STAGE_INFO(thd, org_stage); + stop_ddl_logging(); + + // Allow tests to block the backup thread + DBUG_EXECUTE_IF("sync.after_mdl_block_ddl", + { + const char act[]= + "now " + "SIGNAL sync.after_mdl_block_ddl_reached " + "WAIT_FOR signal.after_mdl_block_ddl_continue"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); + + DBUG_RETURN(0); +err: + THD_STAGE_INFO(thd, org_stage); + DBUG_RETURN(1); +} + + +/** + backup_block_commit() + + Block commits, writes to log and statistics tables and binary log +*/ + +static bool backup_block_commit(THD *thd) +{ + DBUG_ENTER("backup_block_commit"); + if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, + MDL_BACKUP_WAIT_COMMIT, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(1); + + /* We can ignore errors from flush_tables () */ + (void) flush_tables(thd, FLUSH_SYS_TABLES); + + if (mysql_bin_log.is_open()) + { + mysql_mutex_lock(mysql_bin_log.get_log_lock()); + mysql_file_sync(mysql_bin_log.get_log_file()->file, MYF(MY_WME)); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + } + thd->clear_error(); + + DBUG_RETURN(0); +} + + +/** + backup_end() + + Safe to run, even if backup has not been run by this thread. + This is for example the case when a THD ends. +*/ + +bool backup_end(THD *thd) +{ + DBUG_ENTER("backup_end"); + + if (thd->current_backup_stage != BACKUP_FINISHED) + { + DBUG_ASSERT(backup_flush_ticket); + MDL_ticket *old_ticket= backup_flush_ticket; + ha_end_backup(); + // This is needed as we may call backup_end without backup_block_commit + stop_ddl_logging(); + backup_flush_ticket= 0; + thd->current_backup_stage= BACKUP_FINISHED; + thd->mdl_context.release_lock(old_ticket); +#ifdef WITH_WSREP + // If node was desynced, resume and resync + if (thd->wsrep_desynced_backup_stage) + { + Wsrep_server_state &server_state= Wsrep_server_state::instance(); + THD_STAGE_INFO(thd, stage_waiting_flow); + WSREP_DEBUG("backup_end: waiting for flow control for %s", + wsrep_thd_query(thd)); + server_state.resume_and_resync(); + thd->wsrep_desynced_backup_stage= false; + DEBUG_SYNC(thd, "wsrep_backup_stage_after_resume_and_resync"); + } +#endif /* WITH_WSREP */ + } + DBUG_RETURN(0); +} + + +/** + backup_set_alter_copy_lock() + + @param thd + @param table From table that is part of ALTER TABLE. This is only used + for the assert to ensure we use this function correctly. + + Downgrades the MDL_BACKUP_DDL lock to MDL_BACKUP_ALTER_COPY to allow + copy of altered table to proceed under MDL_BACKUP_WAIT_DDL + + Note that in some case when using non transactional tables, + the lock may be of type MDL_BACKUP_DML. +*/ + +void backup_set_alter_copy_lock(THD *thd, TABLE *table) +{ + MDL_ticket *ticket= thd->mdl_backup_ticket; + + /* Ticket maybe NULL in case of LOCK TABLES or for temporary tables*/ + DBUG_ASSERT(ticket || thd->locked_tables_mode || + table->s->tmp_table != NO_TMP_TABLE); + if (ticket) + ticket->downgrade_lock(MDL_BACKUP_ALTER_COPY); +} + +/** + backup_reset_alter_copy_lock + + Upgrade the lock of the original ALTER table MDL_BACKUP_DDL + Can fail if MDL lock was killed +*/ + +bool backup_reset_alter_copy_lock(THD *thd) +{ + bool res= 0; + MDL_ticket *ticket= thd->mdl_backup_ticket; + + /* Ticket maybe NULL in case of LOCK TABLES or for temporary tables*/ + if (ticket) + res= thd->mdl_context.upgrade_shared_lock(ticket, MDL_BACKUP_DDL, + thd->variables.lock_wait_timeout); + return res; +} + + +/***************************************************************************** + Interfaces for BACKUP LOCK + These functions are used by maria_backup to ensure that there are no active + ddl's on the object the backup is going to copy +*****************************************************************************/ + + +bool backup_lock(THD *thd, TABLE_LIST *table) +{ + /* We should leave the previous table unlocked in case of errors */ + backup_unlock(thd); + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + return 1; + } + table->mdl_request.duration= MDL_EXPLICIT; + if (thd->mdl_context.acquire_lock(&table->mdl_request, + thd->variables.lock_wait_timeout)) + return 1; + thd->mdl_backup_lock= table->mdl_request.ticket; + return 0; +} + + +/* Release old backup lock if it exists */ + +void backup_unlock(THD *thd) +{ + if (thd->mdl_backup_lock) + thd->mdl_context.release_lock(thd->mdl_backup_lock); + thd->mdl_backup_lock= 0; +} + + +/***************************************************************************** + Logging of ddl statements to backup log +*****************************************************************************/ + +static bool start_ddl_logging() +{ + char name[FN_REFLEN]; + DBUG_ENTER("start_ddl_logging"); + + fn_format(name, "ddl", mysql_data_home, ".log", 0); + + backup_log_error= 0; + backup_log= mysql_file_create(key_file_log_ddl, name, CREATE_MODE, + O_TRUNC | O_WRONLY | O_APPEND | O_NOFOLLOW, + MYF(MY_WME)); + DBUG_RETURN(backup_log < 0); +} + +static void stop_ddl_logging() +{ + mysql_mutex_lock(&LOCK_backup_log); + if (backup_log >= 0) + { + mysql_file_close(backup_log, MYF(MY_WME)); + backup_log= -1; + } + backup_log_error= 0; + mysql_mutex_unlock(&LOCK_backup_log); +} + + +static inline char *add_str_to_buffer(char *ptr, const LEX_CSTRING *from) +{ + if (from->length) // If length == 0, str may be 0 + memcpy(ptr, from->str, from->length); + ptr[from->length]= '\t'; + return ptr+ from->length + 1; +} + +static char *add_name_to_buffer(char *ptr, const LEX_CSTRING *from) +{ + LEX_CSTRING tmp; + char buff[NAME_LEN*4]; + uint errors; + + tmp.str= buff; + tmp.length= strconvert(system_charset_info, from->str, from->length, + &my_charset_filename, buff, sizeof(buff), &errors); + return add_str_to_buffer(ptr, &tmp); +} + + +static char *add_id_to_buffer(char *ptr, const LEX_CUSTRING *from) +{ + LEX_CSTRING tmp; + char buff[MY_UUID_STRING_LENGTH]; + + if (!from->length) + return add_str_to_buffer(ptr, (LEX_CSTRING*) from); + + tmp.str= buff; + tmp.length= MY_UUID_STRING_LENGTH; + my_uuid2str(from->str, buff, 1); + return add_str_to_buffer(ptr, &tmp); +} + + +static char *add_bool_to_buffer(char *ptr, bool value) { + *(ptr++) = value ? '1' : '0'; + *(ptr++) = '\t'; + return ptr; +} + +/* + Write to backup log + + Sets backup_log_error in case of error. The backup thread could check this + to ensure that all logging had succeded +*/ + +void backup_log_ddl(const backup_log_info *info) +{ + if (backup_log >= 0 && backup_log_error == 0) + { + mysql_mutex_lock(&LOCK_backup_log); + if (backup_log < 0) + { + mysql_mutex_unlock(&LOCK_backup_log); + return; + } + /* Enough place for db.table *2 + query + engine_name * 2 + tabs+ uuids */ + char buff[NAME_CHAR_LEN*4+20+40*2+10+MY_UUID_STRING_LENGTH*2], *ptr= buff; + char timebuff[20]; + struct tm current_time; + LEX_CSTRING tmp_lex; + time_t tmp_time= my_time(0); + + localtime_r(&tmp_time, ¤t_time); + tmp_lex.str= timebuff; + tmp_lex.length= snprintf(timebuff, sizeof(timebuff), + "%4d-%02d-%02d %2d:%02d:%02d", + current_time.tm_year + 1900, + current_time.tm_mon+1, + current_time.tm_mday, + current_time.tm_hour, + current_time.tm_min, + current_time.tm_sec); + ptr= add_str_to_buffer(ptr, &tmp_lex); + + ptr= add_str_to_buffer(ptr, &info->query); + ptr= add_str_to_buffer(ptr, &info->org_storage_engine_name); + ptr= add_bool_to_buffer(ptr, info->org_partitioned); + ptr= add_name_to_buffer(ptr, &info->org_database); + ptr= add_name_to_buffer(ptr, &info->org_table); + ptr= add_id_to_buffer(ptr, &info->org_table_id); + + /* The following fields are only set in case of rename */ + ptr= add_str_to_buffer(ptr, &info->new_storage_engine_name); + ptr= add_bool_to_buffer(ptr, info->new_partitioned); + ptr= add_name_to_buffer(ptr, &info->new_database); + ptr= add_name_to_buffer(ptr, &info->new_table); + ptr= add_id_to_buffer(ptr, &info->new_table_id); + + ptr[-1]= '\n'; // Replace last tab with nl + if (mysql_file_write(backup_log, (uchar*) buff, (size_t) (ptr-buff), + MYF(MY_FNABP))) + backup_log_error= my_errno; + mysql_mutex_unlock(&LOCK_backup_log); + } +} diff --git a/sql/backup.h b/sql/backup.h new file mode 100644 index 00000000..2e5c3a58 --- /dev/null +++ b/sql/backup.h @@ -0,0 +1,47 @@ +#ifndef BACKUP_INCLUDED +#define BACKUP_INCLUDED +/* Copyright (c) 2018, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +enum backup_stages +{ + BACKUP_START, BACKUP_FLUSH, BACKUP_WAIT_FOR_FLUSH, BACKUP_LOCK_COMMIT, + BACKUP_END, BACKUP_FINISHED +}; + +extern TYPELIB backup_stage_names; + +struct backup_log_info { + LEX_CSTRING query; + LEX_CUSTRING org_table_id; /* Unique id from frm */ + LEX_CSTRING org_database, org_table; + LEX_CSTRING org_storage_engine_name; + LEX_CSTRING new_database, new_table; + LEX_CSTRING new_storage_engine_name; + LEX_CUSTRING new_table_id; /* Unique id from frm */ + bool org_partitioned; + bool new_partitioned; +}; + +void backup_init(); +bool run_backup_stage(THD *thd, backup_stages stage); +bool backup_end(THD *thd); +void backup_set_alter_copy_lock(THD *thd, TABLE *altered_table); +bool backup_reset_alter_copy_lock(THD *thd); + +bool backup_lock(THD *thd, TABLE_LIST *table); +void backup_unlock(THD *thd); +void backup_log_ddl(const backup_log_info *info); +#endif /* BACKUP_INCLUDED */ diff --git a/sql/bounded_queue.h b/sql/bounded_queue.h new file mode 100644 index 00000000..07ab6dba --- /dev/null +++ b/sql/bounded_queue.h @@ -0,0 +1,196 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef BOUNDED_QUEUE_INCLUDED +#define BOUNDED_QUEUE_INCLUDED + +#include "my_base.h" +#include +#include "queues.h" +#include + +class Sort_param; + +/** + A priority queue with a fixed, limited size. + + This is a wrapper on top of QUEUE and the queue_xxx() functions. + It keeps the top-N elements which are inserted. + + Elements of type Element_type are pushed into the queue. + For each element, we call a user-supplied keymaker_function, + to generate a key of type Key_type for the element. + Instances of Key_type are compared with the user-supplied compare_function. + + The underlying QUEUE implementation needs one extra element for replacing + the lowest/highest element when pushing into a full queue. + */ +template +class Bounded_queue +{ +public: + Bounded_queue() + { + memset(&m_queue, 0, sizeof(m_queue)); + } + + ~Bounded_queue() + { + delete_queue(&m_queue); + } + + /** + Function for making sort-key from input data. + @param param Sort parameters. + @param to Where to put the key. + @param from The input data. + */ + typedef uint (*keymaker_function)(Sort_param *param, + Key_type *to, + Element_type *from, + bool packing_keys); + + /** + Function for comparing two keys. + @param n Pointer to number of bytes to compare. + @param a First key. + @param b Second key. + @retval -1, 0, or 1 depending on whether the left argument is + less than, equal to, or greater than the right argument. + */ + typedef int (*compare_function)(size_t *n, Key_type **a, Key_type **b); + + /** + Initialize the queue. + + @param max_elements The size of the queue. + @param max_at_top Set to true if you want biggest element on top. + false: We keep the n largest elements. + pop() will return the smallest key in the result set. + true: We keep the n smallest elements. + pop() will return the largest key in the result set. + @param compare Compare function for elements, takes 3 arguments. + If NULL, we use get_ptr_compare(compare_length). + @param compare_length Length of the data (i.e. the keys) used for sorting. + @param keymaker Function which generates keys for elements. + @param sort_param Sort parameters. + @param sort_keys Array of pointers to keys to sort. + + @retval 0 OK, 1 Could not allocate memory. + + We do *not* take ownership of any of the input pointer arguments. + */ + int init(ha_rows max_elements, bool max_at_top, + compare_function compare, size_t compare_length, + keymaker_function keymaker, Sort_param *sort_param, + Key_type **sort_keys); + + /** + Pushes an element on the queue. + If the queue is already full, we discard one element. + Calls keymaker_function to generate a key for the element. + + @param element The element to be pushed. + */ + void push(Element_type *element); + + /** + Removes the top element from the queue. + + @retval Pointer to the (key of the) removed element. + + @note This function is for unit testing, where we push elements into to the + queue, and test that the appropriate keys are retained. + Interleaving of push() and pop() operations has not been tested. + */ + Key_type **pop() + { + // Don't return the extra element to the client code. + if (queue_is_full((&m_queue))) + queue_remove(&m_queue, 0); + DBUG_ASSERT(m_queue.elements > 0); + if (m_queue.elements == 0) + return NULL; + return reinterpret_cast(queue_remove(&m_queue, 0)); + } + + /** + The number of elements in the queue. + */ + uint num_elements() const { return m_queue.elements; } + + /** + Is the queue initialized? + */ + bool is_initialized() const { return m_queue.max_elements > 0; } + +private: + Key_type **m_sort_keys; + size_t m_compare_length; + keymaker_function m_keymaker; + Sort_param *m_sort_param; + st_queue m_queue; +}; + + +template +int Bounded_queue::init(ha_rows max_elements, + bool max_at_top, + compare_function compare, + size_t compare_length, + keymaker_function keymaker, + Sort_param *sort_param, + Key_type **sort_keys) +{ + DBUG_ASSERT(sort_keys != NULL); + + m_sort_keys= sort_keys; + m_compare_length= compare_length; + m_keymaker= keymaker; + m_sort_param= sort_param; + // init_queue() takes an uint, and also does (max_elements + 1) + if (max_elements >= (UINT_MAX - 1)) + return 1; + if (compare == NULL) + compare= + reinterpret_cast(get_ptr_compare(compare_length)); + // We allocate space for one extra element, for replace when queue is full. + return init_queue(&m_queue, (uint) max_elements + 1, + 0, max_at_top, + reinterpret_cast(compare), + &m_compare_length, 0, 0); +} + + +template +void Bounded_queue::push(Element_type *element) +{ + DBUG_ASSERT(is_initialized()); + if (queue_is_full((&m_queue))) + { + // Replace top element with new key, and re-order the queue. + Key_type **pq_top= reinterpret_cast(queue_top(&m_queue)); + (void)(*m_keymaker)(m_sort_param, *pq_top, element, false); + queue_replace_top(&m_queue); + } else { + // Insert new key into the queue. + (*m_keymaker)(m_sort_param, m_sort_keys[m_queue.elements], + element, false); + queue_insert(&m_queue, + reinterpret_cast(&m_sort_keys[m_queue.elements])); + } +} + +#endif // BOUNDED_QUEUE_INCLUDED diff --git a/sql/client_settings.h b/sql/client_settings.h new file mode 100644 index 00000000..cc4981ec --- /dev/null +++ b/sql/client_settings.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef CLIENT_SETTINGS_INCLUDED +#define CLIENT_SETTINGS_INCLUDED +#else +#error You have already included an client_settings.h and it should not be included twice +#endif /* CLIENT_SETTINGS_INCLUDED */ + +#include +#include + +/* + Note: CLIENT_CAPABILITIES is also defined in libmysql/client_settings.h. + When adding capabilities here, consider if they should be also added to + the libmysql version. +*/ +#define CLIENT_CAPABILITIES (CLIENT_MYSQL | \ + CLIENT_LONG_FLAG | \ + CLIENT_TRANSACTIONS | \ + CLIENT_PROTOCOL_41 | \ + CLIENT_SECURE_CONNECTION | \ + CLIENT_PLUGIN_AUTH | \ + CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | \ + CLIENT_CONNECT_ATTRS) + +#define read_user_name(A) A[0]= 0 +#undef _CUSTOMCONFIG_ + +#define mysql_server_init(a,b,c) mysql_client_plugin_init() +#define mysql_server_end() mysql_client_plugin_deinit() + +#ifdef HAVE_REPLICATION +C_MODE_START +void slave_io_thread_detach_vio(); +C_MODE_END +#else +#define slave_io_thread_detach_vio() +#endif + diff --git a/sql/compat56.cc b/sql/compat56.cc new file mode 100644 index 00000000..3d857441 --- /dev/null +++ b/sql/compat56.cc @@ -0,0 +1,465 @@ +/* + Copyright (c) 2004, 2012, Oracle and/or its affiliates. + Copyright (c) 2013, MariaDB Foundation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "compat56.h" +#include "myisampack.h" +#include "my_time.h" + + +static const int my_max_usec_value[7] +{ + 0, + 900000, + 990000, + 999000, + 999900, + 999990, + 999999 +}; + + +/*** MySQL56 TIME low-level memory and disk representation routines ***/ + +/* + In-memory format: + + 1 bit sign (Used for sign, when on disk) + 1 bit unused (Reserved for wider hour range, e.g. for intervals) + 10 bit hour (0-836) + 6 bit minute (0-59) + 6 bit second (0-59) + 24 bits microseconds (0-999999) + + Total: 48 bits = 6 bytes + Suhhhhhh.hhhhmmmm.mmssssss.ffffffff.ffffffff.ffffffff +*/ + + +/** + Convert time value to MySQL56 numeric packed representation. + + @param ltime The value to convert. + @return Numeric packed representation. +*/ +longlong TIME_to_longlong_time_packed(const MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ltime->year == 0); + DBUG_ASSERT(ltime->month == 0); + // Mix days with hours: "1 00:10:10" -> "24:10:10" + long hms= ((ltime->day * 24 + ltime->hour) << 12) | + (ltime->minute << 6) | ltime->second; + longlong tmp= MY_PACKED_TIME_MAKE(hms, ltime->second_part); + return ltime->neg ? -tmp : tmp; +} + + + +/** + Convert MySQL56 time packed numeric representation to time. + + @param OUT ltime The MYSQL_TIME variable to set. + @param tmp The packed numeric representation. +*/ +void TIME_from_longlong_time_packed(MYSQL_TIME *ltime, longlong tmp) +{ + long hms; + if ((ltime->neg= (tmp < 0))) + tmp= -tmp; + hms= (long) MY_PACKED_TIME_GET_INT_PART(tmp); + ltime->year= (uint) 0; + ltime->month= (uint) 0; + ltime->day= (uint) 0; + ltime->hour= (uint) (hms >> 12) % (1 << 10); /* 10 bits starting at 12th */ + ltime->minute= (uint) (hms >> 6) % (1 << 6); /* 6 bits starting at 6th */ + ltime->second= (uint) hms % (1 << 6); /* 6 bits starting at 0th */ + ltime->second_part= MY_PACKED_TIME_GET_FRAC_PART(tmp); + ltime->time_type= MYSQL_TIMESTAMP_TIME; +} + + +/** + Calculate binary size of MySQL56 packed numeric time representation. + + @param dec Precision. +*/ +uint my_time_binary_length(uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + return 3 + (dec + 1) / 2; +} + + +/* + On disk we convert from signed representation to unsigned + representation using TIMEF_OFS, so all values become binary comparable. +*/ +#define TIMEF_OFS 0x800000000000LL +#define TIMEF_INT_OFS 0x800000LL + + +/** + Convert MySQL56 in-memory numeric time representation to on-disk representation + + @param nr Value in packed numeric time format. + @param OUT ptr The buffer to put value at. + @param dec Precision. +*/ +void my_time_packed_to_binary(longlong nr, uchar *ptr, uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + /* Make sure the stored value was previously properly rounded or truncated */ + DBUG_ASSERT((MY_PACKED_TIME_GET_FRAC_PART(nr) % + (int) log_10_int[TIME_SECOND_PART_DIGITS - dec]) == 0); + + switch (dec) + { + case 0: + default: + mi_int3store(ptr, TIMEF_INT_OFS + MY_PACKED_TIME_GET_INT_PART(nr)); + break; + + case 1: + case 2: + mi_int3store(ptr, TIMEF_INT_OFS + MY_PACKED_TIME_GET_INT_PART(nr)); + ptr[3]= (unsigned char) (char) (MY_PACKED_TIME_GET_FRAC_PART(nr) / 10000); + break; + + case 4: + case 3: + mi_int3store(ptr, TIMEF_INT_OFS + MY_PACKED_TIME_GET_INT_PART(nr)); + mi_int2store(ptr + 3, MY_PACKED_TIME_GET_FRAC_PART(nr) / 100); + break; + + case 5: + case 6: + mi_int6store(ptr, nr + TIMEF_OFS); + break; + } +} + + +/** + Convert MySQL56 on-disk time representation to in-memory packed numeric + representation. + + @param ptr The pointer to read the value at. + @param dec Precision. + @return Packed numeric time representation. +*/ +longlong my_time_packed_from_binary(const uchar *ptr, uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + + switch (dec) + { + case 0: + default: + { + longlong intpart= mi_uint3korr(ptr) - TIMEF_INT_OFS; + return MY_PACKED_TIME_MAKE_INT(intpart); + } + case 1: + case 2: + { + longlong intpart= mi_uint3korr(ptr) - TIMEF_INT_OFS; + int frac= (uint) ptr[3]; + if (intpart < 0 && frac) + { + /* + Negative values are stored with reverse fractional part order, + for binary sort compatibility. + + Disk value intpart frac Time value Memory value + 800000.00 0 0 00:00:00.00 0000000000.000000 + 7FFFFF.FF -1 255 -00:00:00.01 FFFFFFFFFF.FFD8F0 + 7FFFFF.9D -1 99 -00:00:00.99 FFFFFFFFFF.F0E4D0 + 7FFFFF.00 -1 0 -00:00:01.00 FFFFFFFFFF.000000 + 7FFFFE.FF -1 255 -00:00:01.01 FFFFFFFFFE.FFD8F0 + 7FFFFE.F6 -2 246 -00:00:01.10 FFFFFFFFFE.FE7960 + + Formula to convert fractional part from disk format + (now stored in "frac" variable) to absolute value: "0x100 - frac". + To reconstruct in-memory value, we shift + to the next integer value and then substruct fractional part. + */ + intpart++; /* Shift to the next integer value */ + frac-= 0x100; /* -(0x100 - frac) */ + } + return MY_PACKED_TIME_MAKE(intpart, frac * 10000); + } + + case 3: + case 4: + { + longlong intpart= mi_uint3korr(ptr) - TIMEF_INT_OFS; + int frac= mi_uint2korr(ptr + 3); + if (intpart < 0 && frac) + { + /* + Fix reverse fractional part order: "0x10000 - frac". + See comments for FSP=1 and FSP=2 above. + */ + intpart++; /* Shift to the next integer value */ + frac-= 0x10000; /* -(0x10000-frac) */ + } + return MY_PACKED_TIME_MAKE(intpart, frac * 100); + } + + case 5: + case 6: + return ((longlong) mi_uint6korr(ptr)) - TIMEF_OFS; + } +} + + +/*** MySQL56 DATETIME low-level memory and disk representation routines ***/ + +/* + 1 bit sign (used when on disk) + 17 bits year*13+month (year 0-9999, month 0-12) + 5 bits day (0-31) + 5 bits hour (0-23) + 6 bits minute (0-59) + 6 bits second (0-59) + 24 bits microseconds (0-999999) + + Total: 64 bits = 8 bytes + + SYYYYYYY.YYYYYYYY.YYdddddh.hhhhmmmm.mmssssss.ffffffff.ffffffff.ffffffff +*/ + +/** + Convert datetime to MySQL56 packed numeric datetime representation. + @param ltime The value to convert. + @return Packed numeric representation of ltime. +*/ +longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME *ltime) +{ + longlong ymd= ((ltime->year * 13 + ltime->month) << 5) | ltime->day; + longlong hms= (ltime->hour << 12) | (ltime->minute << 6) | ltime->second; + longlong tmp= MY_PACKED_TIME_MAKE(((ymd << 17) | hms), ltime->second_part); + DBUG_ASSERT(!check_datetime_range(ltime)); /* Make sure no overflow */ + return ltime->neg ? -tmp : tmp; +} + + +/** + Convert MySQL56 packed numeric datetime representation to MYSQL_TIME. + @param OUT ltime The datetime variable to convert to. + @param tmp The packed numeric datetime value. +*/ +void TIME_from_longlong_datetime_packed(MYSQL_TIME *ltime, longlong tmp) +{ + longlong ymd, hms; + longlong ymdhms, ym; + + DBUG_ASSERT(tmp != LONGLONG_MIN); + + if ((ltime->neg= (tmp < 0))) + tmp= -tmp; + + ltime->second_part= MY_PACKED_TIME_GET_FRAC_PART(tmp); + ymdhms= MY_PACKED_TIME_GET_INT_PART(tmp); + + ymd= ymdhms >> 17; + ym= ymd >> 5; + hms= ymdhms % (1 << 17); + + ltime->day= ymd % (1 << 5); + ltime->month= ym % 13; + ltime->year= (uint) (ym / 13); + + ltime->second= hms % (1 << 6); + ltime->minute= (hms >> 6) % (1 << 6); + ltime->hour= (uint) (hms >> 12); + + ltime->time_type= MYSQL_TIMESTAMP_DATETIME; +} + + +/** + Calculate binary size of MySQL56 packed datetime representation. + @param dec Precision. +*/ +uint my_datetime_binary_length(uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + return 5 + (dec + 1) / 2; +} + + +/* + On disk we store as unsigned number with DATETIMEF_INT_OFS offset, + for HA_KETYPE_BINARY compatibility purposes. +*/ +#define DATETIMEF_INT_OFS 0x8000000000LL + + +/** + Convert MySQL56 on-disk datetime representation + to in-memory packed numeric representation. + + @param ptr The pointer to read value at. + @param dec Precision. + @return In-memory packed numeric datetime representation. +*/ +longlong my_datetime_packed_from_binary(const uchar *ptr, uint dec) +{ + longlong intpart= mi_uint5korr(ptr) - DATETIMEF_INT_OFS; + int frac; + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + switch (dec) + { + case 0: + default: + return MY_PACKED_TIME_MAKE_INT(intpart); + case 1: + case 2: + frac= ((int) (signed char) ptr[5]) * 10000; + break; + case 3: + case 4: + frac= mi_sint2korr(ptr + 5) * 100; + break; + case 5: + case 6: + frac= mi_sint3korr(ptr + 5); + break; + } + return MY_PACKED_TIME_MAKE(intpart, frac); +} + + +/** + Store MySQL56 in-memory numeric packed datetime representation to disk. + + @param nr In-memory numeric packed datetime representation. + @param OUT ptr The pointer to store at. + @param dec Precision, 1-6. +*/ +void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + /* The value being stored must have been properly rounded or truncated */ + DBUG_ASSERT((MY_PACKED_TIME_GET_FRAC_PART(nr) % + (int) log_10_int[TIME_SECOND_PART_DIGITS - dec]) == 0); + + mi_int5store(ptr, MY_PACKED_TIME_GET_INT_PART(nr) + DATETIMEF_INT_OFS); + switch (dec) + { + case 0: + default: + break; + case 1: + case 2: + ptr[5]= (unsigned char) (char) (MY_PACKED_TIME_GET_FRAC_PART(nr) / 10000); + break; + case 3: + case 4: + mi_int2store(ptr + 5, MY_PACKED_TIME_GET_FRAC_PART(nr) / 100); + break; + case 5: + case 6: + mi_int3store(ptr + 5, MY_PACKED_TIME_GET_FRAC_PART(nr)); + } +} + + +/*** MySQL56 TIMESTAMP low-level memory and disk representation routines ***/ + +/** + Calculate on-disk size of a timestamp value. + + @param dec Precision. +*/ +uint my_timestamp_binary_length(uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + return 4 + (dec + 1) / 2; +} + + +/** + Convert MySQL56 binary timestamp representation to in-memory representation. + + @param OUT tm The variable to convert to. + @param ptr The pointer to read the value from. + @param dec Precision. +*/ +void my_timestamp_from_binary(struct timeval *tm, const uchar *ptr, uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + tm->tv_sec= mi_uint4korr(ptr); + switch (dec) + { + case 0: + default: + tm->tv_usec= 0; + return; + case 1: + case 2: + tm->tv_usec= ((int) ptr[4]) * 10000; + break; + case 3: + case 4: + tm->tv_usec= (uint) mi_uint2korr(ptr + 4) * 100; + break; + case 5: + case 6: + tm->tv_usec= (uint) mi_uint3korr(ptr + 4); + } + // The binary data my be corrupt. Cut fractional seconds to the valid range. + set_if_smaller(tm->tv_usec, my_max_usec_value[dec]); +} + + +/** + Convert MySQL56 in-memory timestamp representation to on-disk representation. + + @param tm The value to convert. + @param OUT ptr The pointer to store the value to. + @param dec Precision. +*/ +void my_timestamp_to_binary(const struct timeval *tm, uchar *ptr, uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + /* Stored value must have been previously properly rounded or truncated */ + DBUG_ASSERT((tm->tv_usec % + (int) log_10_int[TIME_SECOND_PART_DIGITS - dec]) == 0); + mi_int4store(ptr, tm->tv_sec); + switch (dec) + { + case 0: + default: + break; + case 1: + case 2: + ptr[4]= (unsigned char) (char) (tm->tv_usec / 10000); + break; + case 3: + case 4: + mi_int2store(ptr + 4, tm->tv_usec / 100); + break; + /* Impossible second precision. Fall through */ + case 5: + case 6: + mi_int3store(ptr + 4, tm->tv_usec); + } +} + +/****************************************/ diff --git a/sql/compat56.h b/sql/compat56.h new file mode 100644 index 00000000..65cd36da --- /dev/null +++ b/sql/compat56.h @@ -0,0 +1,55 @@ +#ifndef COMPAT56_H_INCLUDED +#define COMPAT56_H_INCLUDED +/* + Copyright (c) 2004, 2012, Oracle and/or its affiliates. + Copyright (c) 2013 MariaDB Foundation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** MySQL56 routines and macros **/ + +/* + Buffer size for a native TIMESTAMP representation, for use with NativBuffer. + 4 bytes for seconds + 3 bytes for microseconds + 1 byte for the trailing '\0' (class Native reserves extra 1 byte for '\0') +*/ +#define STRING_BUFFER_TIMESTAMP_BINARY_SIZE 8 /* 4 + 3 + 1 */ + +#define MY_PACKED_TIME_GET_INT_PART(x) ((x) >> 24) +#define MY_PACKED_TIME_GET_FRAC_PART(x) ((x) % (1LL << 24)) +#define MY_PACKED_TIME_MAKE(i, f) ((((ulonglong) (i)) << 24) + (f)) +#define MY_PACKED_TIME_MAKE_INT(i) ((((ulonglong) (i)) << 24)) + +longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME *); +longlong TIME_to_longlong_time_packed(const MYSQL_TIME *); + +void TIME_from_longlong_datetime_packed(MYSQL_TIME *ltime, longlong nr); +void TIME_from_longlong_time_packed(MYSQL_TIME *ltime, longlong nr); + +void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec); +longlong my_datetime_packed_from_binary(const uchar *ptr, uint dec); +uint my_datetime_binary_length(uint dec); + +void my_time_packed_to_binary(longlong nr, uchar *ptr, uint dec); +longlong my_time_packed_from_binary(const uchar *ptr, uint dec); +uint my_time_binary_length(uint dec); + +void my_timestamp_to_binary(const struct timeval *tm, uchar *ptr, uint dec); +void my_timestamp_from_binary(struct timeval *tm, const uchar *ptr, uint dec); +uint my_timestamp_binary_length(uint dec); +/** End of MySQL routines and macros **/ + +#endif /* COMPAT56_H_INCLUDED */ diff --git a/sql/contributors.h b/sql/contributors.h new file mode 100644 index 00000000..bc8ba4ea --- /dev/null +++ b/sql/contributors.h @@ -0,0 +1,65 @@ +#ifndef CONTRIBUTORS_INCLUDED +#define CONTRIBUTORS_INCLUDED + +/* Copyright (c) 2006 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Structure of the name list */ + +struct show_table_contributors_st { + const char *name; + const char *location; + const char *comment; +}; + +/* + Output from "SHOW CONTRIBUTORS" + + Get permission before editing. + + Names should be encoded using UTF-8. + + See also https://mariadb.com/kb/en/log-of-mariadb-contributions/ +*/ + +struct show_table_contributors_st show_table_contributors[]= { + /* MariaDB foundation sponsors, in contribution, size , time order */ + {"Alibaba Cloud", "https://www.alibabacloud.com/", "Platinum Sponsor of the MariaDB Foundation"}, + {"Tencent Cloud", "https://cloud.tencent.com", "Platinum Sponsor of the MariaDB Foundation"}, + {"Microsoft", "https://microsoft.com/", "Platinum Sponsor of the MariaDB Foundation"}, + {"MariaDB Corporation", "https://mariadb.com", "Founding member, Platinum Sponsor of the MariaDB Foundation"}, + {"ServiceNow", "https://servicenow.com", "Platinum Sponsor of the MariaDB Foundation"}, + {"Intel", "https://www.intel.com", "Platinum Sponsor of the MariaDB Foundation"}, + {"SIT", "https://sit.org", "Platinum Sponsor of the MariaDB Foundation"}, + {"Visma", "https://visma.com", "Gold Sponsor of the MariaDB Foundation"}, + {"DBS", "https://dbs.com", "Gold Sponsor of the MariaDB Foundation"}, + {"IBM", "https://www.ibm.com", "Gold Sponsor of the MariaDB Foundation"}, + {"Automattic", "https://automattic.com", "Silver Sponsor of the MariaDB Foundation"}, + {"Percona", "https://www.percona.com/", "Sponsor of the MariaDB Foundation"}, + {"Galera Cluster", "https://galeracluster.com", "Sponsor of the MariaDB Foundation"}, + + /* Sponsors of important features */ + {"Google", "USA", "Sponsoring encryption, parallel replication and GTID"}, + {"Facebook", "USA", "Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc"}, + + /* Individual contributors, names in historical order, newer first */ + {"Ronald Bradford", "Brisbane, Australia", "EFF contribution for UC2006 Auction"}, + {"Sheeri Kritzer", "Boston, Mass. USA", "EFF contribution for UC2006 Auction"}, + {"Mark Shuttleworth", "London, UK.", "EFF contribution for UC2006 Auction"}, + {NULL, NULL, NULL} +}; + +#endif /* CONTRIBUTORS_INCLUDED */ diff --git a/sql/create_options.cc b/sql/create_options.cc new file mode 100644 index 00000000..cea5d7af --- /dev/null +++ b/sql/create_options.cc @@ -0,0 +1,879 @@ +/* Copyright (C) 2010, 2020, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + Engine defined options of tables/fields/keys in CREATE/ALTER TABLE. +*/ + +#include "mariadb.h" +#include "create_options.h" +#include "partition_info.h" +#include +#include "set_var.h" + +#define FRM_QUOTED_VALUE 0x8000U + +/** + Links this item to the given list end + + @param start The list beginning or NULL + @param end The list last element or does not matter +*/ + +void engine_option_value::link(engine_option_value **start, + engine_option_value **end) +{ + DBUG_ENTER("engine_option_value::link"); + DBUG_PRINT("enter", ("name: '%s' (%u) value: '%s' (%u)", + name.str, (uint) name.length, + value.str, (uint) value.length)); + engine_option_value *opt; + /* check duplicates to avoid writing them to frm*/ + for(opt= *start; + opt && ((opt->parsed && !opt->value.str) || + system_charset_info->strnncoll(name.str, name.length, + opt->name.str, opt->name.length)); + opt= opt->next) /* no-op */; + if (opt) + { + opt->value.str= NULL; /* remove previous value */ + opt->parsed= TRUE; /* and don't issue warnings for it anymore */ + } + /* + Add this option to the end of the list + + @note: We add even if it is opt->value.str == NULL because it can be + ALTER TABLE to remove the option. + */ + if (*start) + { + (*end)->next= this; + *end= this; + } + else + { + /* + note that is *start == 0, the value of *end does not matter, + it can be uninitialized. + */ + *start= *end= this; + } + DBUG_VOID_RETURN; +} + +static bool report_wrong_value(THD *thd, const char *name, const char *val, + bool suppress_warning) +{ + if (suppress_warning) + return 0; + + if (!(thd->variables.sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) && + !thd->slave_thread) + { + my_error(ER_BAD_OPTION_VALUE, MYF(0), val, name); + return 1; + } + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_BAD_OPTION_VALUE, + ER_THD(thd, ER_BAD_OPTION_VALUE), val, name); + return 0; +} + +static bool report_unknown_option(THD *thd, engine_option_value *val, + bool suppress_warning) +{ + DBUG_ENTER("report_unknown_option"); + + if (val->parsed || suppress_warning || thd->slave_thread) + { + DBUG_PRINT("info", ("parsed => exiting")); + DBUG_RETURN(FALSE); + } + + if (!(thd->variables.sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS)) + { + my_error(ER_UNKNOWN_OPTION, MYF(0), val->name.str); + DBUG_RETURN(TRUE); + } + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_OPTION, ER_THD(thd, ER_UNKNOWN_OPTION), + val->name.str); + DBUG_RETURN(FALSE); +} + +#define value_ptr(STRUCT,OPT) ((char*)(STRUCT) + (OPT)->offset) + +static bool set_one_value(ha_create_table_option *opt, + THD *thd, const LEX_CSTRING *value, void *base, + bool suppress_warning, + MEM_ROOT *root) +{ + DBUG_ENTER("set_one_value"); + DBUG_PRINT("enter", ("opt: %p type: %u name '%s' value: '%s'", + opt, + opt->type, opt->name, + (value->str ? value->str : ""))); + switch (opt->type) + { + case HA_OPTION_TYPE_SYSVAR: + // HA_OPTION_TYPE_SYSVAR's are replaced in resolve_sysvars() + break; // to DBUG_ASSERT(0) + case HA_OPTION_TYPE_ULL: + { + ulonglong *val= (ulonglong*)value_ptr(base, opt); + if (!value->str) + { + *val= opt->def_value; + DBUG_RETURN(0); + } + + my_option optp= + { opt->name, 1, 0, (uchar **)val, 0, 0, GET_ULL, + REQUIRED_ARG, (longlong)opt->def_value, (longlong)opt->min_value, + opt->max_value, 0, (long) opt->block_size, 0}; + + ulonglong orig_val= strtoull(value->str, NULL, 10); + my_bool unused; + *val= orig_val; + *val= getopt_ull_limit_value(*val, &optp, &unused); + if (*val == orig_val) + DBUG_RETURN(0); + + DBUG_RETURN(report_wrong_value(thd, opt->name, value->str, + suppress_warning)); + } + case HA_OPTION_TYPE_STRING: + { + char **val= (char **)value_ptr(base, opt); + if (!value->str) + { + *val= 0; + DBUG_RETURN(0); + } + + if (!(*val= strmake_root(root, value->str, value->length))) + DBUG_RETURN(1); + DBUG_RETURN(0); + } + case HA_OPTION_TYPE_ENUM: + { + uint *val= (uint *)value_ptr(base, opt), num; + + *val= (uint) opt->def_value; + if (!value->str) + DBUG_RETURN(0); + + const char *start= opt->values, *end; + + num= 0; + while (*start) + { + for (end=start; + *end && *end != ','; + end++) /* no-op */; + if (!system_charset_info->strnncoll(start, end-start, + value->str, value->length)) + { + *val= num; + DBUG_RETURN(0); + } + if (*end) + end++; + start= end; + num++; + } + + DBUG_RETURN(report_wrong_value(thd, opt->name, value->str, + suppress_warning)); + } + case HA_OPTION_TYPE_BOOL: + { + bool *val= (bool *)value_ptr(base, opt); + *val= opt->def_value; + + if (!value->str) + DBUG_RETURN(0); + + if (!system_charset_info->strnncoll("NO", 2, value->str, value->length) || + !system_charset_info->strnncoll("OFF", 3, value->str, value->length) || + !system_charset_info->strnncoll("0", 1, value->str, value->length)) + { + *val= FALSE; + DBUG_RETURN(FALSE); + } + + if (!system_charset_info->strnncoll("YES", 3, value->str, value->length) || + !system_charset_info->strnncoll("ON", 2, value->str, value->length) || + !system_charset_info->strnncoll("1", 1, value->str, value->length)) + { + *val= TRUE; + DBUG_RETURN(FALSE); + } + + DBUG_RETURN(report_wrong_value(thd, opt->name, value->str, + suppress_warning)); + } + } + DBUG_ASSERT(0); + my_error(ER_UNKNOWN_ERROR, MYF(0)); + DBUG_RETURN(1); +} + +static const size_t ha_option_type_sizeof[]= +{ sizeof(ulonglong), sizeof(char *), sizeof(uint), sizeof(bool)}; + +/** + Creates option structure and parses list of options in it + + @param thd thread handler + @param option_struct where to store pointer on the option struct + @param option_list list of options given by user + @param rules list of option description by engine + @param suppress_warning second parse so we do not need warnings + @param root MEM_ROOT where allocate memory + + @retval TRUE Error + @retval FALSE OK +*/ + +bool parse_option_list(THD* thd, handlerton *hton, void *option_struct_arg, + engine_option_value **option_list, + ha_create_table_option *rules, + bool suppress_warning, MEM_ROOT *root) +{ + ha_create_table_option *opt; + size_t option_struct_size= 0; + engine_option_value *val, *last; + void **option_struct= (void**)option_struct_arg; + DBUG_ENTER("parse_option_list"); + DBUG_PRINT("enter", + ("struct: %p list: %p rules: %p suppress_warning: %u root: %p", + *option_struct, *option_list, rules, + (uint) suppress_warning, root)); + + if (rules) + { + for (opt= rules; opt->name; opt++) + set_if_bigger(option_struct_size, opt->offset + + ha_option_type_sizeof[opt->type]); + + *option_struct= alloc_root(root, option_struct_size); + } + + for (opt= rules; rules && opt->name; opt++) + { + bool seen=false; + for (val= *option_list; val; val= val->next) + { + last= val; + if (system_charset_info->strnncoll(opt->name, opt->name_length, + val->name.str, val->name.length)) + continue; + + /* skip duplicates (see engine_option_value constructor above) */ + if (val->parsed && !val->value.str) + continue; + + if (set_one_value(opt, thd, &val->value, + *option_struct, suppress_warning || val->parsed, root)) + DBUG_RETURN(TRUE); + val->parsed= true; + seen=true; + break; + } + if (!seen || (opt->var && !last->value.str)) + { + LEX_CSTRING default_val= null_clex_str; + + /* + Okay, here's the logic for sysvar options: + 1. When we parse CREATE TABLE and sysvar option was not explicitly + mentioned we add it to the list as if it was specified with the + *current* value of the underlying sysvar. + 2. But only if the underlying sysvar value is different from the + sysvar's default. + 3. If it's ALTER TABLE or CREATE_SEQUENCE and the sysvar option was + not explicitly mentioned - do nothing, do not add it to the list. + 4. But if it was ALTER TABLE with sysvar option = DEFAULT, we + add it to the list (under the same condition #2). + 5. If we're here parsing the option list from the .frm file + for a normal open_table() and the sysvar option was not there - + do not add it to the list (makes no sense anyway) and + use the *default* value of the underlying sysvar. Because + sysvar value can change, but it should not affect existing tables. + This is how it's implemented: the current sysvar value is added + to the list if suppress_warning is FALSE (meaning a table is created, + that is CREATE TABLE or ALTER TABLE) and it's actually a CREATE TABLE + command or it's an ALTER TABLE and the option was seen (=DEFAULT). + + Note that if the option was set explicitly (not =DEFAULT) it wouldn't + have passes the if() condition above. + */ + if (!suppress_warning && opt->var && + (thd->lex->sql_command == SQLCOM_CREATE_TABLE || seen)) + { + // take a value from the variable and add it to the list + sys_var *sysvar= find_hton_sysvar(hton, opt->var); + DBUG_ASSERT(sysvar); + + if (!sysvar->session_is_default(thd)) + { + char buf[256]; + String sbuf(buf, sizeof(buf), system_charset_info), *str; + if ((str= sysvar->val_str(&sbuf, thd, OPT_SESSION, &null_clex_str))) + { + LEX_CSTRING name= { opt->name, opt->name_length }; + default_val.str= strmake_root(root, str->ptr(), str->length()); + default_val.length= str->length(); + val= new (root) engine_option_value( + name, default_val, opt->type != HA_OPTION_TYPE_ULL); + if (!val) + DBUG_RETURN(TRUE); + val->link(option_list, &last); + val->parsed= true; + } + } + } + set_one_value(opt, thd, &default_val, *option_struct, + suppress_warning, root); + } + } + + for (val= *option_list; val; val= val->next) + { + if (report_unknown_option(thd, val, suppress_warning)) + DBUG_RETURN(TRUE); + val->parsed= true; + } + + DBUG_RETURN(FALSE); +} + + +/** + Resolves all HA_OPTION_TYPE_SYSVAR elements. + + This is done when an engine is loaded. +*/ +static bool resolve_sysvars(handlerton *hton, ha_create_table_option *rules) +{ + for (ha_create_table_option *opt= rules; rules && opt->name; opt++) + { + if (opt->type == HA_OPTION_TYPE_SYSVAR) + { + struct my_option optp; + plugin_opt_set_limits(&optp, opt->var); + switch(optp.var_type) { + case GET_ULL: + case GET_ULONG: + case GET_UINT: + opt->type= HA_OPTION_TYPE_ULL; + opt->def_value= (ulonglong)optp.def_value; + opt->min_value= (ulonglong)optp.min_value; + opt->max_value= (ulonglong)optp.max_value; + opt->block_size= (ulonglong)optp.block_size; + break; + case GET_STR: + case GET_STR_ALLOC: + opt->type= HA_OPTION_TYPE_STRING; + break; + case GET_BOOL: + opt->type= HA_OPTION_TYPE_BOOL; + opt->def_value= optp.def_value; + break; + case GET_ENUM: + { + opt->type= HA_OPTION_TYPE_ENUM; + opt->def_value= optp.def_value; + + char buf[256]; + String str(buf, sizeof(buf), system_charset_info); + str.length(0); + for (const char **s= optp.typelib->type_names; *s; s++) + { + if (str.append(*s, strlen(*s)) || str.append(',')) + return 1; + } + DBUG_ASSERT(str.length()); + opt->values= my_strndup(PSI_INSTRUMENT_ME, str.ptr(), str.length()-1, MYF(MY_WME)); + if (!opt->values) + return 1; + break; + } + default: + DBUG_ASSERT(0); + } + } + } + return 0; +} + +bool resolve_sysvar_table_options(handlerton *hton) +{ + return resolve_sysvars(hton, hton->table_options) || + resolve_sysvars(hton, hton->field_options) || + resolve_sysvars(hton, hton->index_options); +} + +/* + Restore HA_OPTION_TYPE_SYSVAR options back as they were + before resolve_sysvars(). + + This is done when the engine is unloaded, so that we could + call resolve_sysvars() if the engine is installed again. +*/ +static void free_sysvars(handlerton *hton, ha_create_table_option *rules) +{ + for (ha_create_table_option *opt= rules; rules && opt->name; opt++) + { + if (opt->var) + { + my_free(const_cast(opt->values)); + opt->type= HA_OPTION_TYPE_SYSVAR; + opt->def_value= 0; + opt->min_value= 0; + opt->max_value= 0; + opt->block_size= 0; + opt->values= 0; + } + } +} + +void free_sysvar_table_options(handlerton *hton) +{ + free_sysvars(hton, hton->table_options); + free_sysvars(hton, hton->field_options); + free_sysvars(hton, hton->index_options); +} + + +/** + Parses all table/fields/keys options + + @param thd thread handler + @param file handler of the table + @parem share descriptor of the table + + @retval TRUE Error + @retval FALSE OK +*/ + +bool parse_engine_table_options(THD *thd, handlerton *ht, TABLE_SHARE *share) +{ + MEM_ROOT *root= &share->mem_root; + DBUG_ENTER("parse_engine_table_options"); + + if (parse_option_list(thd, ht, &share->option_struct, & share->option_list, + ht->table_options, TRUE, root)) + DBUG_RETURN(TRUE); + + for (Field **field= share->field; *field; field++) + { + if (parse_option_list(thd, ht, &(*field)->option_struct, + & (*field)->option_list, + ht->field_options, TRUE, root)) + DBUG_RETURN(TRUE); + } + + for (uint index= 0; index < share->keys; index ++) + { + if (parse_option_list(thd, ht, &share->key_info[index].option_struct, + & share->key_info[index].option_list, + ht->index_options, TRUE, root)) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/** + Parses engine-defined partition options + + @param [in] thd thread handler + @parem [in] table table with part_info + + @retval TRUE Error + @retval FALSE OK + + In the case of ALTER TABLE statements, table->part_info is set up + by mysql_unpack_partition(). So, one should not call the present + function before the call of mysql_unpack_partition(). +*/ +bool parse_engine_part_options(THD *thd, TABLE *table) +{ + MEM_ROOT *root= &table->mem_root; + TABLE_SHARE *share= table->s; + partition_info *part_info= table->part_info; + engine_option_value *tmp_option_list; + handlerton *ht; + DBUG_ENTER("parse_engine_part_options"); + + if (!part_info) + DBUG_RETURN(FALSE); + + List_iterator it(part_info->partitions); + while (partition_element *part_elem= it++) + { + if (merge_engine_options(share->option_list, part_elem->option_list, + &tmp_option_list, root)) + DBUG_RETURN(TRUE); + + if (!part_info->is_sub_partitioned()) + { + ht= part_elem->engine_type; + if (parse_option_list(thd, ht, &part_elem->option_struct, + &tmp_option_list, ht->table_options, TRUE, root)) + DBUG_RETURN(TRUE); + } + else + { + List_iterator sub_it(part_elem->subpartitions); + while (partition_element *sub_part_elem= sub_it++) + { + ht= sub_part_elem->engine_type; + if (parse_option_list(thd, ht, &sub_part_elem->option_struct, + &tmp_option_list, ht->table_options, TRUE, root)) + DBUG_RETURN(TRUE); + } + } + } + DBUG_RETURN(FALSE); +} +#endif + +bool engine_options_differ(void *old_struct, void *new_struct, + ha_create_table_option *rules) +{ + ha_create_table_option *opt; + for (opt= rules; rules && opt->name; opt++) + { + char **old_val= (char**)value_ptr(old_struct, opt); + char **new_val= (char**)value_ptr(new_struct, opt); + int neq; + if (opt->type == HA_OPTION_TYPE_STRING) + neq= (*old_val && *new_val) ? strcmp(*old_val, *new_val) : *old_val != *new_val; + else + neq= memcmp(old_val, new_val, ha_option_type_sizeof[opt->type]); + if (neq) + return true; + } + return false; +} + + +/** + Returns representation length of key and value in the frm file +*/ + +uint engine_option_value::frm_length() +{ + /* + 1 byte - name length + 2 bytes - value length + + if value.str is NULL, this option is not written to frm (=DEFAULT) + */ + return value.str ? (uint)(1 + name.length + 2 + value.length) : 0; +} + + +/** + Returns length of representation of option list in the frm file +*/ + +static uint option_list_frm_length(engine_option_value *opt) +{ + uint res= 0; + + for (; opt; opt= opt->next) + res+= opt->frm_length(); + + return res; +} + + +/** + Calculates length of options image in the .frm + + @param table_option_list list of table options + @param create_fields field descriptors list + @param keys number of keys + @param key_info array of key descriptors + + @returns length of image in frm +*/ + +uint engine_table_options_frm_length(engine_option_value *table_option_list, + List &create_fields, + uint keys, KEY *key_info) +{ + List_iterator it(create_fields); + Create_field *field; + uint res, index; + DBUG_ENTER("engine_table_options_frm_length"); + + res= option_list_frm_length(table_option_list); + + while ((field= it++)) + res+= option_list_frm_length(field->option_list); + + for (index= 0; index < keys; index++, key_info++) + res+= option_list_frm_length(key_info->option_list); + + /* + if there's at least one option somewhere (res > 0) + we write option lists for all fields and keys, zero-terminated. + If there're no options we write nothing at all (backward compatibility) + */ + DBUG_RETURN(res ? res + 1 + create_fields.elements + keys : 0); +} + + +/** + Writes image of the key and value to the frm image buffer + + @param buff pointer to the buffer free space beginning + + @returns pointer to byte after last recorded in the buffer +*/ + +uchar *engine_option_value::frm_image(uchar *buff) +{ + if (value.str) + { + DBUG_ASSERT(name.length <= 0xff); + *buff++= (uchar)name.length; + memcpy(buff, name.str, name.length); + buff+= name.length; + int2store(buff, value.length | (quoted_value ? FRM_QUOTED_VALUE : 0)); + buff+= 2; + memcpy(buff, (const uchar *) value.str, value.length); + buff+= value.length; + } + return buff; +} + +/** + Writes image of the key and value to the frm image buffer + + @param buff pointer to the buffer to store the options in + @param opt list of options; + + @returns pointer to the end of the stored data in the buffer +*/ +static uchar *option_list_frm_image(uchar *buff, engine_option_value *opt) +{ + for (; opt; opt= opt->next) + buff= opt->frm_image(buff); + + *buff++= 0; + return buff; +} + + +/** + Writes options image in the .frm buffer + + @param buff pointer to the buffer + @param table_option_list list of table options + @param create_fields field descriptors list + @param keys number of keys + @param key_info array of key descriptors + + @returns pointer to byte after last recorded in the buffer +*/ + +uchar *engine_table_options_frm_image(uchar *buff, + engine_option_value *table_option_list, + List &create_fields, + uint keys, KEY *key_info) +{ + List_iterator it(create_fields); + Create_field *field; + KEY *key_info_end= key_info + keys; + DBUG_ENTER("engine_table_options_frm_image"); + + buff= option_list_frm_image(buff, table_option_list); + + while ((field= it++)) + buff= option_list_frm_image(buff, field->option_list); + + while (key_info < key_info_end) + buff= option_list_frm_image(buff, (key_info++)->option_list); + + DBUG_RETURN(buff); +} + +/** + Reads name and value from buffer, then link it in the list + + @param buff the buffer to read from + @param start The list beginning or NULL + @param end The list last element or does not matter + @param root MEM_ROOT for allocating + + @returns pointer to byte after last recorded in the buffer +*/ +uchar *engine_option_value::frm_read(const uchar *buff, const uchar *buff_end, + engine_option_value **start, + engine_option_value **end, MEM_ROOT *root) +{ + LEX_CSTRING name, value; + uint len; +#define need_buff(N) if (buff + (N) >= buff_end) return NULL + + need_buff(3); + name.length= buff[0]; + buff++; + need_buff(name.length + 2); + if (!(name.str= strmake_root(root, (const char*)buff, name.length))) + return NULL; + buff+= name.length; + len= uint2korr(buff); + value.length= len & ~FRM_QUOTED_VALUE; + buff+= 2; + need_buff(value.length); + if (!(value.str= strmake_root(root, (const char*)buff, value.length))) + return NULL; + buff+= value.length; + + engine_option_value *ptr= + new (root) engine_option_value(name, value, len & FRM_QUOTED_VALUE); + if (!ptr) + return NULL; + ptr->link(start, end); + + return (uchar *)buff; +} + + +/** + Reads options from this buffer + + @param buff the buffer to read from + @param length buffer length + @param share table descriptor + @param root MEM_ROOT for allocating + + @retval TRUE Error + @retval FALSE OK +*/ + +bool engine_table_options_frm_read(const uchar *buff, size_t length, + TABLE_SHARE *share) +{ + const uchar *buff_end= buff + length; + engine_option_value *UNINIT_VAR(end); + MEM_ROOT *root= &share->mem_root; + uint count; + DBUG_ENTER("engine_table_options_frm_read"); + + while (buff < buff_end && *buff) + { + if (!(buff= engine_option_value::frm_read(buff, buff_end, + &share->option_list, &end, root))) + DBUG_RETURN(TRUE); + } + buff++; + + for (count=0; count < share->fields; count++) + { + while (buff < buff_end && *buff) + { + if (!(buff= engine_option_value::frm_read(buff, buff_end, + &share->field[count]->option_list, + &end, root))) + DBUG_RETURN(TRUE); + } + buff++; + } + + for (count=0; count < share->keys; count++) + { + while (buff < buff_end && *buff) + { + if (!(buff= engine_option_value::frm_read(buff, buff_end, + &share->key_info[count].option_list, + &end, root))) + DBUG_RETURN(TRUE); + } + buff++; + } + + if (buff < buff_end) + sql_print_warning("Table '%s' was created in a later MariaDB version - " + "unknown table attributes were ignored", + share->table_name.str); + + DBUG_RETURN(buff > buff_end); +} + +/** + Merges two lists of engine_option_value's with duplicate removal. + + @param [in] source option list + @param [in] changes option list whose options overwrite source's + @param [out] out new option list created by merging given two + @param [in] root MEM_ROOT for allocating memory + + @retval TRUE Error + @retval FALSE OK +*/ +bool merge_engine_options(engine_option_value *source, + engine_option_value *changes, + engine_option_value **out, MEM_ROOT *root) +{ + engine_option_value *UNINIT_VAR(end), *opt, *opt_copy; + *out= 0; + DBUG_ENTER("merge_engine_options"); + + /* Create copy of source list */ + for (opt= source; opt; opt= opt->next) + { + opt_copy= new (root) engine_option_value(opt); + if (!opt_copy) + DBUG_RETURN(TRUE); + opt_copy->link(out, &end); + } + + for (opt= changes; opt; opt= opt->next) + { + opt_copy= new (root) engine_option_value(opt); + if (!opt_copy) + DBUG_RETURN(TRUE); + opt_copy->link(out, &end); + } + DBUG_RETURN(FALSE); +} + +bool is_engine_option_known(engine_option_value *opt, + ha_create_table_option *rules) +{ + if (!rules) + return false; + + for (; rules->name; rules++) + { + if (!system_charset_info->strnncoll(rules->name, rules->name_length, + opt->name.str, opt->name.length)) + return true; + } + return false; +} + diff --git a/sql/create_options.h b/sql/create_options.h new file mode 100644 index 00000000..49612318 --- /dev/null +++ b/sql/create_options.h @@ -0,0 +1,103 @@ +/* Copyright (C) 2010, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + Engine defined options of tables/fields/keys in CREATE/ALTER TABLE. +*/ + +#ifndef SQL_CREATE_OPTIONS_INCLUDED +#define SQL_CREATE_OPTIONS_INCLUDED + +#include "sql_class.h" + +enum { ENGINE_OPTION_MAX_LENGTH=32767 }; + +class engine_option_value: public Sql_alloc +{ + public: + LEX_CSTRING name; + LEX_CSTRING value; + engine_option_value *next; ///< parser puts them in a FIFO linked list + bool parsed; ///< to detect unrecognized options + bool quoted_value; ///< option=VAL vs. option='VAL' + + engine_option_value(engine_option_value *src) : + name(src->name), value(src->value), + next(NULL), parsed(src->parsed), quoted_value(src->quoted_value) + { + } + engine_option_value(LEX_CSTRING &name_arg, LEX_CSTRING &value_arg, + bool quoted) : + name(name_arg), value(value_arg), + next(NULL), parsed(false), quoted_value(quoted) + { + } + engine_option_value(LEX_CSTRING &name_arg): + name(name_arg), value(null_clex_str), + next(NULL), parsed(false), quoted_value(false) + { + } + engine_option_value(LEX_CSTRING &name_arg, ulonglong value_arg, + MEM_ROOT *root) : + name(name_arg), next(NULL), parsed(false), quoted_value(false) + { + char *str; + if (likely((value.str= str= (char *)alloc_root(root, 22)))) + { + value.length= longlong10_to_str(value_arg, str, 10) - str; + } + } + static uchar *frm_read(const uchar *buff, const uchar *buff_end, + engine_option_value **start, + engine_option_value **end, MEM_ROOT *root); + void link(engine_option_value **start, engine_option_value **end); + uint frm_length(); + uchar *frm_image(uchar *buff); +}; + +typedef struct st_key KEY; +class Create_field; + +bool resolve_sysvar_table_options(handlerton *hton); +void free_sysvar_table_options(handlerton *hton); +bool parse_engine_table_options(THD *thd, handlerton *ht, TABLE_SHARE *share); +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool parse_engine_part_options(THD *thd, TABLE *table); +#endif +bool parse_option_list(THD* thd, handlerton *hton, void *option_struct, + engine_option_value **option_list, + ha_create_table_option *rules, + bool suppress_warning, MEM_ROOT *root); +bool engine_table_options_frm_read(const uchar *buff, size_t length, + TABLE_SHARE *share); +bool merge_engine_options(engine_option_value *source, + engine_option_value *changes, + engine_option_value **out, MEM_ROOT *root); + +uint engine_table_options_frm_length(engine_option_value *table_option_list, + List &create_fields, + uint keys, KEY *key_info); +uchar *engine_table_options_frm_image(uchar *buff, + engine_option_value *table_option_list, + List &create_fields, + uint keys, KEY *key_info); + +bool engine_options_differ(void *old_struct, void *new_struct, + ha_create_table_option *rules); +bool is_engine_option_known(engine_option_value *opt, + ha_create_table_option *rules); +#endif diff --git a/sql/create_tmp_table.h b/sql/create_tmp_table.h new file mode 100644 index 00000000..ce86c945 --- /dev/null +++ b/sql/create_tmp_table.h @@ -0,0 +1,80 @@ +#ifndef CREATE_TMP_TABLE_INCLUDED +#define CREATE_TMP_TABLE_INCLUDED + +/* Copyright (c) 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* + Class for creating internal tempory tables in sql_select.cc +*/ + +class Create_tmp_table: public Data_type_statistics +{ +protected: + // The following members are initialized only in start() + Field **m_from_field, **m_default_field; + KEY_PART_INFO *m_key_part_info; + uchar *m_group_buff, *m_bitmaps; + // The following members are initialized in ctor + uint m_alloced_field_count; + bool m_using_unique_constraint; + uint m_temp_pool_slot; + ORDER *m_group; + bool m_distinct; + bool m_save_sum_fields; + bool m_with_cycle; + ulonglong m_select_options; + ha_rows m_rows_limit; + uint m_group_null_items; + + // counter for distinct/other fields + uint m_field_count[2]; + // counter for distinct/other fields which can be NULL + uint m_null_count[2]; + // counter for distinct/other blob fields + uint m_blobs_count[2]; + // counter for "tails" of bit fields which do not fit in a byte + uint m_uneven_bit[2]; + +public: + enum counter {distinct, other}; + /* + shows which field we are processing: distinct/other (set in processing + cycles) + */ + counter current_counter; + Create_tmp_table(ORDER *group, bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit); + virtual ~Create_tmp_table() {} + virtual bool choose_engine(THD *thd, TABLE *table, TMP_TABLE_PARAM *param); + void add_field(TABLE *table, Field *field, uint fieldnr, + bool force_not_null_cols); + TABLE *start(THD *thd, + TMP_TABLE_PARAM *param, + const LEX_CSTRING *table_alias); + bool add_fields(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param, List &fields); + + bool add_schema_fields(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param, + const ST_SCHEMA_TABLE &schema_table); + + bool finalize(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, + bool do_not_open, bool keep_row_order); + void cleanup_on_failure(THD *thd, TABLE *table); +}; + +#endif /* CREATE_TMP_TABLE_INCLUDED */ diff --git a/sql/cset_narrowing.cc b/sql/cset_narrowing.cc new file mode 100644 index 00000000..abdaec16 --- /dev/null +++ b/sql/cset_narrowing.cc @@ -0,0 +1,35 @@ +/* + Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" + +Charset_utf8narrow utf8mb3_from_mb4; + +bool Utf8_narrow::should_do_narrowing(const THD *thd, + CHARSET_INFO *field_cset, + CHARSET_INFO *value_cset) +{ + return optimizer_flag(thd, OPTIMIZER_SWITCH_CSET_NARROWING) && + field_cset == &my_charset_utf8mb3_general_ci && + value_cset == &my_charset_utf8mb4_general_ci; +} + diff --git a/sql/cset_narrowing.h b/sql/cset_narrowing.h new file mode 100644 index 00000000..bb0a3960 --- /dev/null +++ b/sql/cset_narrowing.h @@ -0,0 +1,143 @@ +/* + Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef CSET_NARROWING_H_INCLUDED +#define CSET_NARROWING_H_INCLUDED + +/* + A singleton class to provide "utf8mb3_from_mb4.charset()". + + This is a variant of utf8mb3_general_ci that one can use when they have data + in MB4 and want to make index lookup keys in MB3. +*/ +extern +class Charset_utf8narrow +{ + struct my_charset_handler_st cset_handler; + struct charset_info_st cset; +public: + Charset_utf8narrow() : + cset_handler(*my_charset_utf8mb3_general_ci.cset), + cset(my_charset_utf8mb3_general_ci) /* Copy the CHARSET_INFO structure */ + { + /* Insert our function wc_mb */ + cset_handler.wc_mb= my_wc_mb_utf8mb4_bmp_only; + cset.cset=&cset_handler; + + /* Charsets are compared by their name, so assign a different name */ + LEX_CSTRING tmp= {STRING_WITH_LEN("utf8_mb4_to_mb3")}; + cset.cs_name= tmp; + } + + CHARSET_INFO *charset() { return &cset; } + +} utf8mb3_from_mb4; + + +/* + A class to temporary change a field that uses utf8mb3_general_ci to enable + correct lookup key construction from string value in utf8mb4_general_ci + + Intended usage: + + // can do this in advance: + bool do_narrowing= Utf8_narrow::should_do_narrowing(field, value_cset); + ... + + // This sets the field to do narrowing if necessary: + Utf8_narrow narrow(field, do_narrowing); + + // write to 'field' here + // item->save_in_field(field) or something else + + // Stop doing narrowing + narrow.stop(); +*/ + +class Utf8_narrow +{ + Field *field; + DTCollation save_collation; + +public: + static bool should_do_narrowing(const THD *thd, CHARSET_INFO *field_cset, + CHARSET_INFO *value_cset); + + static bool should_do_narrowing(const Field *field, CHARSET_INFO *value_cset) + { + CHARSET_INFO *field_cset= field->charset(); + THD *thd= field->table->in_use; + return should_do_narrowing(thd, field_cset, value_cset); + } + + Utf8_narrow(Field *field_arg, bool is_applicable) + { + field= NULL; + if (is_applicable) + { + DTCollation mb3_from_mb4= utf8mb3_from_mb4.charset(); + field= field_arg; + save_collation= field->dtcollation(); + field->change_charset(mb3_from_mb4); + } + } + + void stop() + { + if (field) + field->change_charset(save_collation); +#ifndef NDEBUG + field= NULL; +#endif + } + + ~Utf8_narrow() + { + DBUG_ASSERT(!field); + } +}; + + +/* + @brief + Check if two fields can participate in a multiple equality using charset + narrowing. + + @detail + Normally, check_simple_equality() checks this by calling: + + left_field->eq_def(right_field) + + This function does the same but takes into account we might use charset + narrowing: + - collations are not the same but rather an utf8mb{3,4}_general_ci pair + - for field lengths, should compare # characters, not #bytes. +*/ + +inline +bool fields_equal_using_narrowing(const THD *thd, const Field *left, const Field *right) +{ + return + dynamic_cast(left) && + dynamic_cast(right) && + left->real_type() == right->real_type() && + (Utf8_narrow::should_do_narrowing(left, right->charset()) || + Utf8_narrow::should_do_narrowing(right, left->charset())) && + left->char_length() == right->char_length(); +}; + + +#endif /* CSET_NARROWING_H_INCLUDED */ diff --git a/sql/custom_conf.h b/sql/custom_conf.h new file mode 100644 index 00000000..f0bb619a --- /dev/null +++ b/sql/custom_conf.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2000, 2006 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef __MYSQL_CUSTOM_BUILD_CONFIG__ +#define __MYSQL_CUSTOM_BUILD_CONFIG__ + +#define MYSQL_PORT 5002 +#ifdef _WIN32 +#define MYSQL_NAMEDPIPE "SwSqlServer" +#define MYSQL_SERVICENAME "SwSqlServer" +#define KEY_SERVICE_PARAMETERS +"SYSTEM\\CurrentControlSet\\Services\\SwSqlServer\\Parameters" +#endif + +#endif /* __MYSQL_CUSTOM_BUILD_CONFIG__ */ diff --git a/sql/datadict.cc b/sql/datadict.cc new file mode 100644 index 00000000..e85478a7 --- /dev/null +++ b/sql/datadict.cc @@ -0,0 +1,273 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2017, 2022, MariaDB corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "datadict.h" +#include "sql_priv.h" +#include "sql_class.h" +#include "sql_table.h" +#include "ha_sequence.h" + +static int read_string(File file, uchar**to, size_t length) +{ + DBUG_ENTER("read_string"); + + /* This can't use MY_THREAD_SPECIFIC as it's used on server start */ + if (!(*to= (uchar*) my_malloc(PSI_INSTRUMENT_ME, length+1,MYF(MY_WME))) || + mysql_file_read(file, *to, length, MYF(MY_NABP))) + { + my_free(*to); + *to= 0; + DBUG_RETURN(1); + } + *((char*) *to+length)= '\0'; // C-style safety + DBUG_RETURN (0); +} + + +/** + Check type of .frm if we are not going to parse it. + + @param[in] thd The current session. + @param[in] path path to FRM file. + @param[in/out] engine_name table engine name (length < NAME_CHAR_LEN) + + engine_name is a LEX_CSTRING, where engine_name->str must point to + a buffer of at least NAME_CHAR_LEN+1 bytes. + If engine_name is 0, then the function will only test if the file is a + view or not + + @retval TABLE_TYPE_UNKNOWN error - file can't be opened + @retval TABLE_TYPE_NORMAL table + @retval TABLE_TYPE_SEQUENCE sequence table + @retval TABLE_TYPE_VIEW view +*/ + +Table_type dd_frm_type(THD *thd, char *path, LEX_CSTRING *engine_name, + LEX_CSTRING *partition_engine_name, + LEX_CUSTRING *table_version) +{ + File file; + uchar header[64+ MY_UUID_SIZE + 2]; // Header and uuid + size_t error; + Table_type type= TABLE_TYPE_UNKNOWN; + uchar dbt; + DBUG_ENTER("dd_frm_type"); + + file= mysql_file_open(key_file_frm, path, O_RDONLY | O_SHARE, MYF(0)); + if (file < 0) + DBUG_RETURN(TABLE_TYPE_UNKNOWN); + + /* + We return TABLE_TYPE_NORMAL if we can open the .frm file. This allows us + to drop a bad .frm file with DROP TABLE + */ + type= TABLE_TYPE_NORMAL; + + /* + Initialize engine name in case we are not able to find it out + The cast is safe, as engine_name->str points to a usable buffer. + */ + if (engine_name) + { + engine_name->length= 0; + ((char*) (engine_name->str))[0]= 0; + } + if (partition_engine_name) + { + partition_engine_name->length= 0; + partition_engine_name->str= 0; + } + if (table_version) + { + table_version->length= 0; + table_version->str= 0; // Allocated if needed + } + if (unlikely((error= mysql_file_read(file, (uchar*) header, sizeof(header), + MYF(MY_NABP))))) + goto err; + + if (unlikely((!strncmp((char*) header, "TYPE=VIEW\n", 10)))) + { + type= TABLE_TYPE_VIEW; + goto err; + } + + if (!is_binary_frm_header(header)) + goto err; + + dbt= header[3]; + + if ((header[39] & 0x30) == (HA_CHOICE_YES << 4)) + { + DBUG_PRINT("info", ("Sequence found")); + type= TABLE_TYPE_SEQUENCE; + } + + if (table_version) + { + /* Read the table version (if it is a 'new' frm file) */ + if (header[64] == EXTRA2_TABLEDEF_VERSION && header[65] == MY_UUID_SIZE) + if ((table_version->str= (uchar*) thd->memdup(header + 66, MY_UUID_SIZE))) + table_version->length= MY_UUID_SIZE; + } + + /* cannot use ha_resolve_by_legacy_type without a THD */ + if (thd && dbt < DB_TYPE_FIRST_DYNAMIC) + { + handlerton *ht= ha_resolve_by_legacy_type(thd, (legacy_db_type) dbt); + if (ht) + { + if (engine_name) + *engine_name= hton2plugin[ht->slot]->name; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_engine_name && dbt == DB_TYPE_PARTITION_DB) + { + handlerton *p_ht; + legacy_db_type new_dbt= (legacy_db_type) header[61]; + if (new_dbt >= DB_TYPE_FIRST_DYNAMIC) + goto cont; + if (!(p_ht= ha_resolve_by_legacy_type(thd, new_dbt))) + goto err; + *partition_engine_name= *hton_name(p_ht); + } +#endif // WITH_PARTITION_STORAGE_ENGINE + goto err; + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE +cont: +#endif + /* read the true engine name */ + if (engine_name) + { + MY_STAT state; + uchar *frm_image= 0; + uint n_length; + + if (mysql_file_fstat(file, &state, MYF(MY_WME))) + goto err; + + MSAN_STAT_WORKAROUND(&state); + + if (mysql_file_seek(file, 0, SEEK_SET, MYF(MY_WME))) + goto err; + + if (read_string(file, &frm_image, (size_t)state.st_size)) + goto err; + + /* The test for !engine_name->length is only true for partition engine */ + if (!engine_name->length && (n_length= uint4korr(frm_image+55))) + { + uint record_offset= uint2korr(frm_image+6)+ + ((uint2korr(frm_image+14) == 0xffff ? + uint4korr(frm_image+47) : uint2korr(frm_image+14))); + uint reclength= uint2korr(frm_image+16); + + uchar *next_chunk= frm_image + record_offset + reclength; + uchar *buff_end= next_chunk + n_length; + uint connect_string_length= uint2korr(next_chunk); + next_chunk+= connect_string_length + 2; + if (next_chunk + 2 < buff_end) + { + uint len= uint2korr(next_chunk); + if (len <= NAME_CHAR_LEN) + { + /* + The following cast is safe as the caller has allocated buffer + and it's up to this function to generate the name. + */ + strmake((char*) engine_name->str, (char*)next_chunk + 2, + engine_name->length= len); + } + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_engine_name && dbt == DB_TYPE_PARTITION_DB) + { + uint len; + const uchar *extra2; + /* Length of the MariaDB extra2 segment in the form file. */ + len = uint2korr(frm_image+4); + extra2= frm_image + 64; + if (*extra2 != '/') // old frm had '/' there + { + const uchar *e2end= extra2 + len; + while (extra2 + 3 <= e2end) + { + uchar type= *extra2++; + size_t length= *extra2++; + if (!length) + { + if (extra2 + 2 >= e2end) + break; + length= uint2korr(extra2); + extra2+= 2; + if (length < 256) + break; + } + if (extra2 + length > e2end) + break; + if (type == EXTRA2_DEFAULT_PART_ENGINE) + { + partition_engine_name->str= thd->strmake((char*)extra2, length); + partition_engine_name->length= length; + break; + } + extra2+= length; + } + } + } +#endif // WITH_PARTITION_STORAGE_ENGINE + my_free(frm_image); + } + + /* Probably a table. */ +err: + mysql_file_close(file, MYF(MY_WME)); + DBUG_RETURN(type); +} + + +/* + Regenerate a metadata locked table. + + @param thd Thread context. + @param db Name of the database to which the table belongs to. + @param name Table name. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +bool dd_recreate_table(THD *thd, const char *db, const char *table_name) +{ + HA_CREATE_INFO create_info; + char path_buf[FN_REFLEN + 1]; + DBUG_ENTER("dd_recreate_table"); + + /* There should be a exclusive metadata lock on the table. */ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name, + MDL_EXCLUSIVE)); + create_info.init(); + build_table_filename(path_buf, sizeof(path_buf) - 1, + db, table_name, "", 0); + /* Attempt to reconstruct the table. */ + DBUG_RETURN(ha_create_table(thd, path_buf, db, table_name, &create_info, 0, 0)); +} + diff --git a/sql/datadict.h b/sql/datadict.h new file mode 100644 index 00000000..bec093aa --- /dev/null +++ b/sql/datadict.h @@ -0,0 +1,52 @@ +#ifndef DATADICT_INCLUDED +#define DATADICT_INCLUDED +/* Copyright (c) 2010, Oracle and/or its affiliates. + Copyright (c) 2017 MariaDB corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "handler.h" + +/* + Data dictionary API. +*/ + +enum Table_type +{ + TABLE_TYPE_UNKNOWN, + TABLE_TYPE_NORMAL, /* Normal table */ + TABLE_TYPE_SEQUENCE, + TABLE_TYPE_VIEW +}; + +/* + Take extra care when using dd_frm_type() - it only checks the .frm file, + and it won't work for any engine that supports discovery. + + Prefer to use ha_table_exists() instead. + To check whether it's an frm of a view, use dd_frm_is_view(). +*/ + +enum Table_type dd_frm_type(THD *thd, char *path, LEX_CSTRING *engine_name, + LEX_CSTRING *partition_engine_name, + LEX_CUSTRING *table_version); + +static inline bool dd_frm_is_view(THD *thd, char *path) +{ + return dd_frm_type(thd, path, NULL, NULL, NULL) == TABLE_TYPE_VIEW; +} + +bool dd_recreate_table(THD *thd, const char *db, const char *table_name); + +#endif // DATADICT_INCLUDED diff --git a/sql/ddl_log.cc b/sql/ddl_log.cc new file mode 100644 index 00000000..ffd2c24f --- /dev/null +++ b/sql/ddl_log.cc @@ -0,0 +1,3587 @@ +/* + Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "mariadb.h" +#include "mysqld.h" +#include "sql_class.h" // init_sql_alloc() +#include "log.h" // sql_print_error() +#include "ddl_log.h" +#include "ha_partition.h" // PAR_EXT +#include "sql_table.h" // build_table_filename +#include "sql_statistics.h" // rename_table_in_stats_tables +#include "sql_view.h" // mysql_rename_view() +#include "strfunc.h" // strconvert +#include "sql_show.h" // append_identifier() +#include "sql_db.h" // drop_database_objects() +#include // EE_LINK + + +/*-------------------------------------------------------------------------- + + MODULE: DDL log + ----------------- + + This module is used to ensure that we can recover from crashes that + occur in the middle of a meta-data operation in MySQL. E.g. DROP + TABLE t1, t2; We need to ensure that both t1 and t2 are dropped and + not only t1 and also that each table drop is entirely done and not + "half-baked". + + To support this we create log entries for each meta-data statement + in the ddl log while we are executing. These entries are dropped + when the operation is completed. + + At recovery those entries that were not completed will be executed. + + There is only one ddl log in the system and it is protected by a mutex + and there is a global struct that contains information about its current + state. + + DDL recovery after a crash works the following way: + + - ddl_log_initialize() initializes the global global_ddl_log variable + and opens the binary log if it exists. If it doesn't exists a new one + is created. + - ddl_log_close_binlogged_events() loops over all log events and checks if + their xid (stored in the EXECUTE_CODE event) is in the binary log. If xid + exists in the binary log the entry is marked as finished in the ddl log. + - After a new binary log is created and is open for new entries, + ddl_log_execute_recovery() is executed on remaining open events: + - Loop over all events + - For each entry with DDL_LOG_ENTRY_CODE execute the remaining phases + in ddl_log_execute_entry_no_lock() + + The ddl_log.log file is created at startup and deleted when server goes down. + After the final recovery phase is done, the file is truncated. + + History: + First version written in 2006 by Mikael Ronstrom + Second version in 2020 by Monty +--------------------------------------------------------------------------*/ + +#define DDL_LOG_MAGIC_LENGTH 4 +/* How many times to try to execute a ddl log entry that causes crashes */ +#define DDL_LOG_MAX_RETRY 3 +#define DDL_LOG_RETRY_MASK 0xFF +#define DDL_LOG_RETRY_BITS 8 + +uchar ddl_log_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 11, (uchar) 2 }; + +/* Action names for ddl_log_action_code */ + +const char *ddl_log_action_name[DDL_LOG_LAST_ACTION]= +{ + "Unknown", "partitioning delete", "partitioning rename", + "partitioning replace", "partitioning exchange", + "rename table", "rename view", + "initialize drop table", "drop table", + "drop view", "drop trigger", "drop db", "create table", "create view", + "delete tmp file", "create trigger", "alter table", "store query" +}; + +/* Number of phases per entry */ +const uchar ddl_log_entry_phases[DDL_LOG_LAST_ACTION]= +{ + 0, 1, 1, 2, + (uchar) EXCH_PHASE_END, (uchar) DDL_RENAME_PHASE_END, 1, 1, + (uchar) DDL_DROP_PHASE_END, 1, 1, + (uchar) DDL_DROP_DB_PHASE_END, (uchar) DDL_CREATE_TABLE_PHASE_END, + (uchar) DDL_CREATE_VIEW_PHASE_END, 0, (uchar) DDL_CREATE_TRIGGER_PHASE_END, + DDL_ALTER_TABLE_PHASE_END, 1 +}; + + +struct st_global_ddl_log +{ + uchar *file_entry_buf; + DDL_LOG_MEMORY_ENTRY *first_free; + DDL_LOG_MEMORY_ENTRY *first_used; + File file_id; + uint num_entries; + uint name_pos; + uint io_size; + bool initialized; + bool open, backup_done, created; +}; + +/* + The following structure is only used during startup recovery + for writing queries to the binary log. + */ + +class st_ddl_recovery { +public: + String drop_table; + String drop_view; + String query; + String db; + size_t drop_table_init_length, drop_view_init_length; + char current_db[NAME_LEN]; + uint execute_entry_pos; + ulonglong xid; +}; + +static st_global_ddl_log global_ddl_log; +static st_ddl_recovery recovery_state; + +mysql_mutex_t LOCK_gdl; + +/* Positions to different data in a ddl log block */ +static constexpr unsigned DDL_LOG_ENTRY_TYPE_POS= 0; +/* + Note that ACTION_TYPE and PHASE_POS must be after each other. + See update_phase() +*/ +static constexpr unsigned DDL_LOG_ACTION_TYPE_POS= 1; +static constexpr unsigned DDL_LOG_PHASE_POS= 2; +static constexpr unsigned DDL_LOG_NEXT_ENTRY_POS= 4; +/* Flags to remember something unique about the query, like if .frm was used */ +static constexpr unsigned DDL_LOG_FLAG_POS= 8; +/* Used to store XID entry that was written to binary log */ +static constexpr unsigned DDL_LOG_XID_POS= 10; +/* Used to store unique uuid from the .frm file */ +static constexpr unsigned DDL_LOG_UUID_POS= 18; +/* ID_POS can be used to store something unique, like file size (4 bytes) */ +static constexpr unsigned DDL_LOG_ID_POS= DDL_LOG_UUID_POS + MY_UUID_SIZE; +static constexpr unsigned DDL_LOG_END_POS= DDL_LOG_ID_POS + 8; + +/* + Position to where names are stored in the ddl log blocks. The current + value is stored in the header and can thus be changed if we need more + space for constants in the header than what is between DDL_LOG_ID_POS and + DDL_LOG_TMP_NAME_POS. +*/ +static constexpr unsigned DDL_LOG_TMP_NAME_POS= 56; + +/* Definitions for the ddl log header, the first block in the file */ +/* IO_SIZE is stored in the header and can thus be changed */ +static constexpr unsigned DDL_LOG_IO_SIZE= IO_SIZE; + +/* Header is stored in positions 0-3 */ +static constexpr unsigned DDL_LOG_IO_SIZE_POS= 4; +static constexpr unsigned DDL_LOG_NAME_OFFSET_POS= 6; +/* Marks if we have done a backup of the ddl log */ +static constexpr unsigned DDL_LOG_BACKUP_OFFSET_POS= 8; +/* Sum of the above variables */ +static constexpr unsigned DDL_LOG_HEADER_SIZE= 4+2+2+1; + +static void ddl_log_free_lists(); + +/** + Sync the ddl log file. + + @return Operation status + @retval FALSE Success + @retval TRUE Error +*/ + +static bool ddl_log_sync_file() +{ + DBUG_ENTER("ddl_log_sync_file"); + DBUG_RETURN(mysql_file_sync(global_ddl_log.file_id, MYF(MY_WME))); +} + +/* Same as above, but ensure we have the LOCK_gdl locked */ + +static bool ddl_log_sync_no_lock() +{ + DBUG_ENTER("ddl_log_sync_no_lock"); + + mysql_mutex_assert_owner(&LOCK_gdl); + DBUG_RETURN(ddl_log_sync_file()); +} + + +/** + Create ddl log file name. + @param file_name Filename setup +*/ + +static inline void create_ddl_log_file_name(char *file_name, bool backup) +{ + fn_format(file_name, opt_ddl_recovery_file, mysql_data_home, + backup ? "-backup.log" : ".log", MYF(MY_REPLACE_EXT)); +} + + +/** + Write ddl log header. + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool write_ddl_log_header() +{ + uchar header[DDL_LOG_HEADER_SIZE]; + DBUG_ENTER("write_ddl_log_header"); + + memcpy(&header, ddl_log_file_magic, DDL_LOG_MAGIC_LENGTH); + int2store(&header[DDL_LOG_IO_SIZE_POS], global_ddl_log.io_size); + int2store(&header[DDL_LOG_NAME_OFFSET_POS], global_ddl_log.name_pos); + header[DDL_LOG_BACKUP_OFFSET_POS]= 0; + + if (mysql_file_pwrite(global_ddl_log.file_id, + header, sizeof(header), 0, + MYF(MY_WME | MY_NABP))) + DBUG_RETURN(TRUE); + DBUG_RETURN(ddl_log_sync_file()); +} + + +/* + Mark in the ddl log file that we have made a backup of it +*/ + +static void mark_ddl_log_header_backup_done() +{ + uchar marker[1]; + marker[0]= 1; + (void) mysql_file_pwrite(global_ddl_log.file_id, + marker, sizeof(marker), DDL_LOG_BACKUP_OFFSET_POS, + MYF(MY_WME | MY_NABP)); +} + + +void ddl_log_create_backup_file() +{ + char org_file_name[FN_REFLEN]; + char backup_file_name[FN_REFLEN]; + + create_ddl_log_file_name(org_file_name, 0); + create_ddl_log_file_name(backup_file_name, 1); + + my_copy(org_file_name, backup_file_name, MYF(MY_WME)); + mark_ddl_log_header_backup_done(); +} + + +/** + Read one entry from ddl log file. + + @param entry_pos Entry number to read + + @return Operation status + @retval true Error + @retval false Success +*/ + +static bool read_ddl_log_file_entry(uint entry_pos) +{ + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + size_t io_size= global_ddl_log.io_size; + DBUG_ENTER("read_ddl_log_file_entry"); + + mysql_mutex_assert_owner(&LOCK_gdl); + DBUG_RETURN (mysql_file_pread(global_ddl_log.file_id, + file_entry_buf, io_size, + io_size * entry_pos, + MYF(MY_WME | MY_NABP))); +} + + +/** + Write one entry to ddl log file. + + @param entry_pos Entry number to write + + @return + @retval true Error + @retval false Success +*/ + +static bool write_ddl_log_file_entry(uint entry_pos) +{ + bool error= FALSE; + File file_id= global_ddl_log.file_id; + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + DBUG_ENTER("write_ddl_log_file_entry"); + + mysql_mutex_assert_owner(&LOCK_gdl); // To be removed + DBUG_RETURN(mysql_file_pwrite(file_id, file_entry_buf, + global_ddl_log.io_size, + global_ddl_log.io_size * entry_pos, + MYF(MY_WME | MY_NABP))); + DBUG_RETURN(error); +} + + +/** + Update phase of ddl log entry + + @param entry_pos ddl_log entry to update + @param phase New phase + + @return + @retval 0 ok +  @retval 1 Write error. Error given + + This is done without locks as it's guaranteed to be atomic +*/ + +static bool update_phase(uint entry_pos, uchar phase) +{ + DBUG_ENTER("update_phase"); + DBUG_PRINT("ddl_log", ("pos: %u phase: %u", entry_pos, (uint) phase)); + + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, &phase, 1, + global_ddl_log.io_size * entry_pos + + DDL_LOG_PHASE_POS, + MYF(MY_WME | MY_NABP)) || + ddl_log_sync_file()); +} + + +/* + Update flags in ddl log entry + + This is not synced as it usually followed by a phase change, which will sync. +*/ + +static bool update_flags(uint entry_pos, uint16 flags) +{ + uchar buff[2]; + DBUG_ENTER("update_flags"); + + int2store(buff, flags); + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_FLAG_POS, + MYF(MY_WME | MY_NABP))); +} + + +static bool update_next_entry_pos(uint entry_pos, uint next_entry) +{ + uchar buff[4]; + DBUG_ENTER("update_next_entry_pos"); + + DBUG_PRINT("ddl_log", ("pos: %u->%u", entry_pos, next_entry)); + + int4store(buff, next_entry); + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_NEXT_ENTRY_POS, + MYF(MY_WME | MY_NABP))); +} + + +static bool update_xid(uint entry_pos, ulonglong xid) +{ + uchar buff[8]; + DBUG_ENTER("update_xid"); + + int8store(buff, xid); + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_XID_POS, + MYF(MY_WME | MY_NABP)) || + ddl_log_sync_file()); +} + + +static bool update_unique_id(uint entry_pos, ulonglong id) +{ + uchar buff[8]; + DBUG_ENTER("update_unique_xid"); + + int8store(buff, id); + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_ID_POS, + MYF(MY_WME | MY_NABP)) || + ddl_log_sync_file()); +} + + +/* + Disable an execute entry + + @param entry_pos ddl_log entry to update + + Notes: + We don't need sync here as this is mainly done during + recover phase to mark already done entries. We instead sync all entries + at the same time. +*/ + +static bool disable_execute_entry(uint entry_pos) +{ + uchar buff[1]; + DBUG_ENTER("disable_execute_entry"); + DBUG_PRINT("ddl_log", ("pos: {%u}", entry_pos)); + + buff[0]= DDL_LOG_IGNORE_ENTRY_CODE; + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_ENTRY_TYPE_POS, + MYF(MY_WME | MY_NABP))); +} + +/* + Disable an execute entry +*/ + +bool ddl_log_disable_execute_entry(DDL_LOG_MEMORY_ENTRY **active_entry) +{ + bool res= disable_execute_entry((*active_entry)->entry_pos); + ddl_log_sync_no_lock(); + return res; +} + + +/* + Check if an executive entry is active + + @return 0 Entry is active + @return 1 Entry is not active +*/ + +static bool is_execute_entry_active(uint entry_pos) +{ + uchar buff[1]; + DBUG_ENTER("disable_execute_entry"); + + if (mysql_file_pread(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_ENTRY_TYPE_POS, + MYF(MY_WME | MY_NABP))) + DBUG_RETURN(1); + DBUG_RETURN(buff[0] == (uchar) DDL_LOG_EXECUTE_CODE); +} + + +/** + Read header of ddl log file. + + When we read the ddl log header we get information about maximum sizes + of names in the ddl log and we also get information about the number + of entries in the ddl log. + + This is read only once at server startup, so no mutex is needed. + + @return Last entry in ddl log (0 if no entries). + @return -1 if log could not be opened or could not be read +*/ + +static int read_ddl_log_header(const char *file_name) +{ + uchar header[DDL_LOG_HEADER_SIZE]; + int max_entry; + int file_id; + uint io_size; + DBUG_ENTER("read_ddl_log_header"); + + if ((file_id= mysql_file_open(key_file_global_ddl_log, + file_name, + O_RDWR | O_BINARY, MYF(0))) < 0) + DBUG_RETURN(-1); + + if (mysql_file_read(file_id, + header, sizeof(header), MYF(MY_WME | MY_NABP))) + { + /* Write message into error log */ + sql_print_error("DDL_LOG: Failed to read ddl log file '%s' during " + "recovery", file_name); + goto err; + } + + if (memcmp(header, ddl_log_file_magic, 4)) + { + /* Probably upgrade from MySQL 10.5 or earlier */ + sql_print_warning("DDL_LOG: Wrong header in %s. Assuming it is an old " + "recovery file from MariaDB 10.5 or earlier. " + "Skipping DDL recovery", file_name); + goto err; + } + + io_size= uint2korr(&header[DDL_LOG_IO_SIZE_POS]); + global_ddl_log.name_pos= uint2korr(&header[DDL_LOG_NAME_OFFSET_POS]); + global_ddl_log.backup_done= header[DDL_LOG_BACKUP_OFFSET_POS]; + + max_entry= (uint) (mysql_file_seek(file_id, 0L, MY_SEEK_END, MYF(0)) / + io_size); + if (max_entry) + max_entry--; // Don't count first block + + if (!(global_ddl_log.file_entry_buf= (uchar*) + my_malloc(key_memory_DDL_LOG_MEMORY_ENTRY, io_size, + MYF(MY_WME | MY_ZEROFILL)))) + goto err; + + global_ddl_log.open= TRUE; + global_ddl_log.created= 0; + global_ddl_log.file_id= file_id; + global_ddl_log.num_entries= max_entry; + global_ddl_log.io_size= io_size; + DBUG_RETURN(max_entry); + +err: + if (file_id >= 0) + my_close(file_id, MYF(0)); + /* We return -1 to force the ddl log to be re-created */ + DBUG_RETURN(-1); +} + + +/* + Store and read strings in ddl log buffers + + Format is: + 2 byte: length (not counting end \0) + X byte: string value of length 'length' + 1 byte: \0 +*/ + +static uchar *store_string(uchar *pos, uchar *end, const LEX_CSTRING *str) +{ + uint32 length= (uint32) str->length; + if (unlikely(pos + 2 + length + 1 > end)) + { + DBUG_ASSERT(0); + return end; // Overflow + } + + int2store(pos, length); + if (likely(length)) + memcpy(pos+2, str->str, length); + pos[2+length]= 0; // Store end \0 + return pos + 2 + length +1; +} + + +static LEX_CSTRING get_string(uchar **pos, const uchar *end) +{ + LEX_CSTRING tmp; + uint32 length; + if (likely(*pos + 3 <= end)) + { + length= uint2korr(*pos); + if (likely(*pos + 2 + length + 1 <= end)) + { + char *str= (char*) *pos+2; + *pos= *pos + 2 + length + 1; + tmp.str= str; + tmp.length= length; + return tmp; + } + } + /* + Overflow on read, should never happen + Set *pos to end to ensure any future calls also returns empty string + */ + DBUG_ASSERT(0); + *pos= (uchar*) end; + tmp.str= ""; + tmp.length= 0; + return tmp; +} + + +/** + Convert from ddl_log_entry struct to file_entry_buf binary blob. + + @param ddl_log_entry filled in ddl_log_entry struct. +*/ + +static void set_global_from_ddl_log_entry(const DDL_LOG_ENTRY *ddl_log_entry) +{ + uchar *file_entry_buf= global_ddl_log.file_entry_buf, *pos, *end; + + mysql_mutex_assert_owner(&LOCK_gdl); + + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= (uchar) ddl_log_entry->entry_type; + file_entry_buf[DDL_LOG_ACTION_TYPE_POS]= (uchar) ddl_log_entry->action_type; + file_entry_buf[DDL_LOG_PHASE_POS]= (uchar) ddl_log_entry->phase; + int4store(file_entry_buf+DDL_LOG_NEXT_ENTRY_POS, ddl_log_entry->next_entry); + int2store(file_entry_buf+DDL_LOG_FLAG_POS, ddl_log_entry->flags); + int8store(file_entry_buf+DDL_LOG_XID_POS, ddl_log_entry->xid); + memcpy(file_entry_buf+DDL_LOG_UUID_POS, ddl_log_entry->uuid, MY_UUID_SIZE); + int8store(file_entry_buf+DDL_LOG_ID_POS, ddl_log_entry->unique_id); + bzero(file_entry_buf+DDL_LOG_END_POS, + global_ddl_log.name_pos - DDL_LOG_END_POS); + + pos= file_entry_buf + global_ddl_log.name_pos; + end= file_entry_buf + global_ddl_log.io_size; + + pos= store_string(pos, end, &ddl_log_entry->handler_name); + pos= store_string(pos, end, &ddl_log_entry->db); + pos= store_string(pos, end, &ddl_log_entry->name); + pos= store_string(pos, end, &ddl_log_entry->from_handler_name); + pos= store_string(pos, end, &ddl_log_entry->from_db); + pos= store_string(pos, end, &ddl_log_entry->from_name); + pos= store_string(pos, end, &ddl_log_entry->tmp_name); + pos= store_string(pos, end, &ddl_log_entry->extra_name); + bzero(pos, global_ddl_log.io_size - (pos - file_entry_buf)); +} + + +/* + Calculate how much space we have left in the log entry for one string + + This can be used to check if we have space to store the query string + in the block. +*/ + +static size_t ddl_log_free_space_in_entry(const DDL_LOG_ENTRY *ddl_log_entry) +{ + size_t length= global_ddl_log.name_pos + 3*7; // 3 byte per string below + length+= ddl_log_entry->handler_name.length; + length+= ddl_log_entry->db.length; + length+= ddl_log_entry->name.length; + length+= ddl_log_entry->from_handler_name.length; + length+= ddl_log_entry->from_db.length; + length+= ddl_log_entry->from_name.length; + length+= ddl_log_entry->tmp_name.length; + length+= ddl_log_entry->extra_name.length; + return global_ddl_log.io_size - length - 3; // 3 is for storing next string +} + + +/** + Convert from file_entry_buf binary blob to ddl_log_entry struct. + + @param[out] ddl_log_entry struct to fill in. + + @note Strings (names) are pointing to the global_ddl_log structure, + so LOCK_gdl needs to be hold until they are read or copied. +*/ + +static void set_ddl_log_entry_from_global(DDL_LOG_ENTRY *ddl_log_entry, + const uint read_entry) +{ + uchar *file_entry_buf= global_ddl_log.file_entry_buf, *pos; + const uchar *end= file_entry_buf + global_ddl_log.io_size; + uchar single_char; + + mysql_mutex_assert_owner(&LOCK_gdl); + ddl_log_entry->entry_pos= read_entry; + single_char= file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]; + ddl_log_entry->entry_type= (enum ddl_log_entry_code) single_char; + single_char= file_entry_buf[DDL_LOG_ACTION_TYPE_POS]; + ddl_log_entry->action_type= (enum ddl_log_action_code) single_char; + ddl_log_entry->phase= file_entry_buf[DDL_LOG_PHASE_POS]; + ddl_log_entry->next_entry= uint4korr(&file_entry_buf[DDL_LOG_NEXT_ENTRY_POS]); + ddl_log_entry->flags= uint2korr(file_entry_buf + DDL_LOG_FLAG_POS); + ddl_log_entry->xid= uint8korr(file_entry_buf + DDL_LOG_XID_POS); + ddl_log_entry->unique_id= uint8korr(file_entry_buf + DDL_LOG_ID_POS); + memcpy(ddl_log_entry->uuid, file_entry_buf+ DDL_LOG_UUID_POS, MY_UUID_SIZE); + + pos= file_entry_buf + global_ddl_log.name_pos; + ddl_log_entry->handler_name= get_string(&pos, end); + ddl_log_entry->db= get_string(&pos, end); + ddl_log_entry->name= get_string(&pos, end); + ddl_log_entry->from_handler_name= get_string(&pos, end); + ddl_log_entry->from_db= get_string(&pos, end); + ddl_log_entry->from_name= get_string(&pos, end); + ddl_log_entry->tmp_name= get_string(&pos, end); + ddl_log_entry->extra_name= get_string(&pos, end); +} + + +/** + Read a ddl log entry. + + Read a specified entry in the ddl log. + + @param read_entry Number of entry to read + @param[out] entry_info Information from entry + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool read_ddl_log_entry(uint read_entry, DDL_LOG_ENTRY *ddl_log_entry) +{ + DBUG_ENTER("read_ddl_log_entry"); + + if (read_ddl_log_file_entry(read_entry)) + { + sql_print_error("DDL_LOG: Failed to read entry %u", read_entry); + DBUG_RETURN(TRUE); + } + set_ddl_log_entry_from_global(ddl_log_entry, read_entry); + DBUG_RETURN(FALSE); +} + + +/** + Create the ddl log file + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool create_ddl_log() +{ + char file_name[FN_REFLEN]; + DBUG_ENTER("create_ddl_log"); + + ddl_log_free_lists(); + global_ddl_log.open= 0; + global_ddl_log.created= 1; + global_ddl_log.num_entries= 0; + global_ddl_log.name_pos= DDL_LOG_TMP_NAME_POS; + global_ddl_log.num_entries= 0; + global_ddl_log.backup_done= 0; + + /* + Fix file_entry_buf if the old log had a different io_size or if open of old + log didn't succeed. + */ + if (global_ddl_log.io_size != DDL_LOG_IO_SIZE) + { + uchar *ptr= (uchar*) + my_realloc(key_memory_DDL_LOG_MEMORY_ENTRY, + global_ddl_log.file_entry_buf, DDL_LOG_IO_SIZE, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)); + if (ptr) // Resize succeded */ + { + global_ddl_log.file_entry_buf= ptr; + global_ddl_log.io_size= DDL_LOG_IO_SIZE; + } + if (!global_ddl_log.file_entry_buf) + DBUG_RETURN(TRUE); + } + DBUG_ASSERT(global_ddl_log.file_entry_buf); + bzero(global_ddl_log.file_entry_buf, global_ddl_log.io_size); + create_ddl_log_file_name(file_name, 0); + if ((global_ddl_log.file_id= + mysql_file_create(key_file_global_ddl_log, + file_name, CREATE_MODE, + O_RDWR | O_TRUNC | O_BINARY, + MYF(MY_WME | ME_ERROR_LOG))) < 0) + { + /* Couldn't create ddl log file, this is serious error */ + sql_print_error("DDL_LOG: Failed to create ddl log file: %s", file_name); + my_free(global_ddl_log.file_entry_buf); + global_ddl_log.file_entry_buf= 0; + DBUG_RETURN(TRUE); + } + if (write_ddl_log_header()) + { + (void) mysql_file_close(global_ddl_log.file_id, MYF(MY_WME)); + my_free(global_ddl_log.file_entry_buf); + global_ddl_log.file_entry_buf= 0; + DBUG_RETURN(TRUE); + } + global_ddl_log.open= TRUE; + DBUG_RETURN(FALSE); +} + + +/** + Open ddl log and initialise ddl log variables + Create a backuip of of +*/ + +bool ddl_log_initialize() +{ + char file_name[FN_REFLEN]; + DBUG_ENTER("ddl_log_initialize"); + + bzero(&global_ddl_log, sizeof(global_ddl_log)); + global_ddl_log.file_id= (File) -1; + global_ddl_log.initialized= 1; + + mysql_mutex_init(key_LOCK_gdl, &LOCK_gdl, MY_MUTEX_INIT_SLOW); + + create_ddl_log_file_name(file_name, 0); + if (unlikely(read_ddl_log_header(file_name) < 0)) + { + /* Fatal error, log not opened. Recreate it */ + if (create_ddl_log()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/** + @brief Deactivate an individual entry. + + @details For complex rename operations we need to deactivate individual + entries. + + During replace operations where we start with an existing table called + t1 and a replacement table called t1#temp or something else and where + we want to delete t1 and rename t1#temp to t1 this is not possible to + do in a safe manner unless the ddl log is informed of the phases in + the change. + + Delete actions are 1-phase actions that can be ignored immediately after + being executed. + Rename actions from x to y is also a 1-phase action since there is no + interaction with any other handlers named x and y. + Replace action where drop y and x -> y happens needs to be a two-phase + action. Thus the first phase will drop y and the second phase will + rename x -> y. + + @param entry_pos Entry position of record to change + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool ddl_log_increment_phase_no_lock(uint entry_pos) +{ + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + DBUG_ENTER("ddl_log_increment_phase_no_lock"); + + mysql_mutex_assert_owner(&LOCK_gdl); + if (!read_ddl_log_file_entry(entry_pos)) + { + ddl_log_entry_code code= ((ddl_log_entry_code) + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]); + ddl_log_action_code action= ((ddl_log_action_code) + file_entry_buf[DDL_LOG_ACTION_TYPE_POS]); + + if (code == DDL_LOG_ENTRY_CODE && action < (uint) DDL_LOG_LAST_ACTION) + { + /* + Log entry: + Increase the phase by one. If complete mark it done (IGNORE). + */ + char phase= file_entry_buf[DDL_LOG_PHASE_POS]+ 1; + if (ddl_log_entry_phases[action] <= phase) + { + DBUG_ASSERT(phase == ddl_log_entry_phases[action]); + /* Same effect as setting DDL_LOG_IGNORE_ENTRY_CODE */ + phase= DDL_LOG_FINAL_PHASE; + } + file_entry_buf[DDL_LOG_PHASE_POS]= phase; + if (update_phase(entry_pos, phase)) + DBUG_RETURN(TRUE); + } + else + { + /* + Trying to deativate an execute entry or already deactive entry. + This should not happen + */ + DBUG_ASSERT(0); + } + } + else + { + sql_print_error("DDL_LOG: Failed in reading entry before updating it"); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + Increment phase and sync ddl log. This expects LOCK_gdl to be locked +*/ + +static bool increment_phase(uint entry_pos) +{ + if (ddl_log_increment_phase_no_lock(entry_pos)) + return 1; + ddl_log_sync_no_lock(); + return 0; +} + + +/* + Ignore errors from the file system about: + - Non existing tables or file (from drop table or delete file) + - Error about tables files that already exists. + - Error from delete table (from Drop_table_error_handler) + - Wrong trigger definer (from Drop_table_error_handler) +*/ + +class ddl_log_error_handler : public Internal_error_handler +{ +public: + int handled_errors; + int unhandled_errors; + int first_error; + bool only_ignore_non_existing_errors; + + ddl_log_error_handler() : handled_errors(0), unhandled_errors(0), + first_error(0), only_ignore_non_existing_errors(0) + {} + + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + *cond_hdl= NULL; + if (non_existing_table_error(sql_errno) || + (!only_ignore_non_existing_errors && + (sql_errno == EE_LINK || + sql_errno == EE_DELETE || sql_errno == ER_TRG_NO_DEFINER))) + { + handled_errors++; + return TRUE; + } + if (!first_error) + first_error= sql_errno; + + if (*level == Sql_condition::WARN_LEVEL_ERROR) + unhandled_errors++; + return FALSE; + } + bool safely_trapped_errors() + { + return (handled_errors > 0 && unhandled_errors == 0); + } +}; + + +/* + Build a filename for a table, trigger file or .frm + Delete also any temporary file suffixed with ~ + + @return 0 Temporary file deleted + @return 1 No temporary file found +*/ + +static bool build_filename_and_delete_tmp_file(char *path, size_t path_length, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + const char *ext, + PSI_file_key psi_key) +{ + bool deleted; + uint length= build_table_filename(path, path_length-1, + db->str, name->str, ext, 0); + path[length]= '~'; + path[length+1]= 0; + deleted= mysql_file_delete(psi_key, path, MYF(0)) != 0; + path[length]= 0; + return deleted; +} + + +static LEX_CSTRING end_comment= +{ STRING_WITH_LEN(" /* generated by ddl recovery */")}; + + +/** + Log DROP query to binary log with comment + + This function is only run during recovery +*/ + +static void ddl_log_to_binary_log(THD *thd, String *query) +{ + LEX_CSTRING thd_db= thd->db; + + lex_string_set(&thd->db, recovery_state.current_db); + query->length(query->length()-1); // Removed end ',' + query->append(&end_comment); + mysql_mutex_unlock(&LOCK_gdl); + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, + query->ptr(), query->length(), + TRUE, FALSE, FALSE, 0); + mysql_mutex_lock(&LOCK_gdl); + thd->db= thd_db; +} + + +/** + Log DROP TABLE/VIEW to binary log when needed + + @result 0 Nothing was done + @result 1 Query was logged to binary log & query was reset + + Logging happens in the following cases + - This is the last DROP entry + - The query could be longer than max_packet_length if we would add another + table name to the query + + When we log, we always log all found tables and views at the same time. This + is done to simply the exceute code as otherwise we would have to keep + information of what was logged. +*/ + +static bool ddl_log_drop_to_binary_log(THD *thd, DDL_LOG_ENTRY *ddl_log_entry, + String *query) +{ + DBUG_ENTER("ddl_log_drop_to_binary_log"); + if (mysql_bin_log.is_open()) + { + if (!ddl_log_entry->next_entry || + query->length() + end_comment.length + NAME_LEN + 100 > + thd->variables.max_allowed_packet) + { + if (recovery_state.drop_table.length() > + recovery_state.drop_table_init_length) + { + ddl_log_to_binary_log(thd, &recovery_state.drop_table); + recovery_state.drop_table.length(recovery_state.drop_table_init_length); + } + if (recovery_state.drop_view.length() > + recovery_state.drop_view_init_length) + { + ddl_log_to_binary_log(thd, &recovery_state.drop_view); + recovery_state.drop_view.length(recovery_state.drop_view_init_length); + } + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + +/* + Create a new handler based on handlerton name +*/ + +static handler *create_handler(THD *thd, MEM_ROOT *mem_root, + LEX_CSTRING *name) +{ + handlerton *hton; + handler *file; + plugin_ref plugin= my_plugin_lock_by_name(thd, name, + MYSQL_STORAGE_ENGINE_PLUGIN); + if (!plugin) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(ME_ERROR_LOG), name->str); + return 0; + } + hton= plugin_hton(plugin); + if (!ha_storage_engine_is_enabled(hton)) + { + my_error(ER_STORAGE_ENGINE_DISABLED, MYF(ME_ERROR_LOG), name->str); + return 0; + } + if ((file= hton->create(hton, (TABLE_SHARE*) 0, mem_root))) + file->init(); + return file; +} + + +/* + Rename a table and its .frm file for a ddl_log_entry + + We first rename the table and then the .frm file as some engines, + like connect, needs the .frm file to exists to be able to do an rename. +*/ + +static void execute_rename_table(DDL_LOG_ENTRY *ddl_log_entry, handler *file, + const LEX_CSTRING *from_db, + const LEX_CSTRING *from_table, + const LEX_CSTRING *to_db, + const LEX_CSTRING *to_table, + uint flags, + char *from_path, char *to_path) +{ + uint to_length=0, fr_length=0; + DBUG_ENTER("execute_rename_table"); + + if (file->needs_lower_case_filenames()) + { + build_lower_case_table_filename(from_path, FN_REFLEN, + from_db, from_table, + flags & FN_FROM_IS_TMP); + build_lower_case_table_filename(to_path, FN_REFLEN, + to_db, to_table, flags & FN_TO_IS_TMP); + } + else + { + fr_length= build_table_filename(from_path, FN_REFLEN, + from_db->str, from_table->str, "", + flags & FN_FROM_IS_TMP); + to_length= build_table_filename(to_path, FN_REFLEN, + to_db->str, to_table->str, "", + flags & FN_TO_IS_TMP); + } + file->ha_rename_table(from_path, to_path); + if (file->needs_lower_case_filenames()) + { + /* + We have to rebuild the file names as the .frm file should be used + without lower case conversion + */ + fr_length= build_table_filename(from_path, FN_REFLEN, + from_db->str, from_table->str, reg_ext, + flags & FN_FROM_IS_TMP); + to_length= build_table_filename(to_path, FN_REFLEN, + to_db->str, to_table->str, reg_ext, + flags & FN_TO_IS_TMP); + } + else + { + strmov(from_path+fr_length, reg_ext); + strmov(to_path+to_length, reg_ext); + } + if (!access(from_path, F_OK)) + (void) mysql_file_rename(key_file_frm, from_path, to_path, MYF(MY_WME)); + DBUG_VOID_RETURN; +} + + +/* + Update triggers + + If swap_tables == 0 (Restoring the original in case of failed rename) + Convert triggers for db.name -> from_db.from_name + else (Doing the rename in case of ALTER TABLE ... RENAME) + Convert triggers for from_db.from_name -> db.extra_name +*/ + +static void rename_triggers(THD *thd, DDL_LOG_ENTRY *ddl_log_entry, + bool swap_tables) +{ + LEX_CSTRING to_table, from_table, to_db, from_db, from_converted_name; + char to_path[FN_REFLEN+1], from_path[FN_REFLEN+1], conv_path[FN_REFLEN+1]; + + if (!swap_tables) + { + from_db= ddl_log_entry->db; + from_table= ddl_log_entry->name; + to_db= ddl_log_entry->from_db; + to_table= ddl_log_entry->from_name; + } + else + { + from_db= ddl_log_entry->from_db; + from_table= ddl_log_entry->from_name; + to_db= ddl_log_entry->db; + to_table= ddl_log_entry->extra_name; + } + + build_filename_and_delete_tmp_file(from_path, sizeof(from_path), + &from_db, &from_table, + TRG_EXT, key_file_trg); + build_filename_and_delete_tmp_file(to_path, sizeof(to_path), + &to_db, &to_table, + TRG_EXT, key_file_trg); + if (lower_case_table_names) + { + uint errors; + from_converted_name.str= conv_path; + from_converted_name.length= + strconvert(system_charset_info, from_table.str, from_table.length, + files_charset_info, conv_path, FN_REFLEN, &errors); + } + else + from_converted_name= from_table; + + if (!access(to_path, F_OK)) + { + /* + The original file was never renamed or we crashed in recovery + just after renaming back the file. + In this case the current file is correct and we can remove any + left over copied files + */ + (void) mysql_file_delete(key_file_trg, from_path, MYF(0)); + } + else if (!access(from_path, F_OK)) + { + /* .TRG file was renamed. Rename it back */ + /* + We have to create a MDL lock as change_table_names() checks that we + have a mdl locks for the table + */ + MDL_request mdl_request; + TRIGGER_RENAME_PARAM trigger_param; + int error __attribute__((unused)); + MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, + from_db.str, + from_converted_name.str, + MDL_EXCLUSIVE, MDL_EXPLICIT); + error= thd->mdl_context.acquire_lock(&mdl_request, 1); + /* acquire_locks() should never fail during recovery */ + DBUG_ASSERT(error == 0); + + (void) Table_triggers_list::prepare_for_rename(thd, + &trigger_param, + &from_db, + &from_table, + &from_converted_name, + &to_db, + &to_table); + (void) Table_triggers_list::change_table_name(thd, + &trigger_param, + &from_db, + &from_table, + &from_converted_name, + &to_db, + &to_table); + thd->mdl_context.release_lock(mdl_request.ticket); + } +} + + +/* + Update stat tables + + If swap_tables == 0 + Convert stats for from_db.from_table -> db.name + else + Convert stats for db.name -> from_db.from_table +*/ + +static void rename_in_stat_tables(THD *thd, DDL_LOG_ENTRY *ddl_log_entry, + bool swap_tables) +{ + LEX_CSTRING from_table, to_table, from_db, to_db, from_converted_name; + char conv_path[FN_REFLEN+1]; + + if (!swap_tables) + { + from_db= ddl_log_entry->db; + from_table= ddl_log_entry->name; + to_db= ddl_log_entry->from_db; + to_table= ddl_log_entry->from_name; + } + else + { + from_db= ddl_log_entry->from_db; + from_table= ddl_log_entry->from_name; + to_db= ddl_log_entry->db; + to_table= ddl_log_entry->extra_name; + } + if (lower_case_table_names) + { + uint errors; + from_converted_name.str= conv_path; + from_converted_name.length= + strconvert(system_charset_info, from_table.str, from_table.length, + files_charset_info, conv_path, FN_REFLEN, &errors); + } + else + from_converted_name= from_table; + + (void) rename_table_in_stat_tables(thd, + &from_db, + &from_converted_name, + &to_db, + &to_table); +} + + +/** + Execute one action in a ddl log entry + + @param ddl_log_entry Information in action entry to execute + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static int ddl_log_execute_action(THD *thd, MEM_ROOT *mem_root, + DDL_LOG_ENTRY *ddl_log_entry) +{ + LEX_CSTRING handler_name; + handler *file= NULL; + char to_path[FN_REFLEN+1], from_path[FN_REFLEN+1]; + handlerton *hton= 0; + ddl_log_error_handler no_such_table_handler; + uint entry_pos= ddl_log_entry->entry_pos; + int error; + bool frm_action= FALSE; + DBUG_ENTER("ddl_log_execute_action"); + + mysql_mutex_assert_owner(&LOCK_gdl); + DBUG_PRINT("ddl_log", + ("pos: %u=>%u->%u type: %u action: %u (%s) phase: %u " + "handler: '%s' name: '%s' from_name: '%s' tmp_name: '%s'", + recovery_state.execute_entry_pos, + ddl_log_entry->entry_pos, + ddl_log_entry->next_entry, + (uint) ddl_log_entry->entry_type, + (uint) ddl_log_entry->action_type, + ddl_log_action_name[ddl_log_entry->action_type], + (uint) ddl_log_entry->phase, + ddl_log_entry->handler_name.str, + ddl_log_entry->name.str, + ddl_log_entry->from_name.str, + ddl_log_entry->tmp_name.str)); + + if (ddl_log_entry->entry_type == DDL_LOG_IGNORE_ENTRY_CODE || + ddl_log_entry->phase == DDL_LOG_FINAL_PHASE) + DBUG_RETURN(FALSE); + + handler_name= ddl_log_entry->handler_name; + thd->push_internal_handler(&no_such_table_handler); + + if (!strcmp(ddl_log_entry->handler_name.str, reg_ext)) + frm_action= TRUE; + else if (ddl_log_entry->handler_name.length) + { + if (!(file= create_handler(thd, mem_root, &handler_name))) + goto end; + hton= file->ht; + } + + switch (ddl_log_entry->action_type) { + case DDL_LOG_REPLACE_ACTION: + case DDL_LOG_DELETE_ACTION: + { + if (ddl_log_entry->phase == 0) + { + if (frm_action) + { + strxmov(to_path, ddl_log_entry->name.str, reg_ext, NullS); + if (unlikely((error= mysql_file_delete(key_file_frm, to_path, + MYF(MY_WME | + MY_IGNORE_ENOENT))))) + break; +#ifdef WITH_PARTITION_STORAGE_ENGINE + strxmov(to_path, ddl_log_entry->name.str, PAR_EXT, NullS); + (void) mysql_file_delete(key_file_partition_ddl_log, to_path, + MYF(0)); +#endif + } + else + { + if (unlikely((error= hton->drop_table(hton, ddl_log_entry->name.str)))) + { + if (!non_existing_table_error(error)) + break; + } + } + if (increment_phase(entry_pos)) + break; + error= 0; + if (ddl_log_entry->action_type == DDL_LOG_DELETE_ACTION) + break; + } + } + DBUG_ASSERT(ddl_log_entry->action_type == DDL_LOG_REPLACE_ACTION); + /* + Fall through and perform the rename action of the replace + action. We have already indicated the success of the delete + action in the log entry by stepping up the phase. + */ + /* fall through */ + case DDL_LOG_RENAME_ACTION: + { + error= TRUE; + if (frm_action) + { + strxmov(to_path, ddl_log_entry->name.str, reg_ext, NullS); + strxmov(from_path, ddl_log_entry->from_name.str, reg_ext, NullS); + (void) mysql_file_rename(key_file_frm, from_path, to_path, MYF(MY_WME)); +#ifdef WITH_PARTITION_STORAGE_ENGINE + strxmov(to_path, ddl_log_entry->name.str, PAR_EXT, NullS); + strxmov(from_path, ddl_log_entry->from_name.str, PAR_EXT, NullS); + (void) mysql_file_rename(key_file_partition_ddl_log, from_path, to_path, + MYF(MY_WME)); +#endif + } + else + (void) file->ha_rename_table(ddl_log_entry->from_name.str, + ddl_log_entry->name.str); + if (increment_phase(entry_pos)) + break; + break; + } + case DDL_LOG_EXCHANGE_ACTION: + { + /* We hold LOCK_gdl, so we can alter global_ddl_log.file_entry_buf */ + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + /* not yet implemented for frm */ + DBUG_ASSERT(!frm_action); + /* + Using a case-switch here to revert all currently done phases, + since it will fall through until the first phase is undone. + */ + switch (ddl_log_entry->phase) { + case EXCH_PHASE_TEMP_TO_FROM: + /* tmp_name -> from_name possibly done */ + (void) file->ha_rename_table(ddl_log_entry->from_name.str, + ddl_log_entry->tmp_name.str); + /* decrease the phase and sync */ + file_entry_buf[DDL_LOG_PHASE_POS]--; + if (write_ddl_log_file_entry(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + /* fall through */ + case EXCH_PHASE_FROM_TO_NAME: + /* from_name -> name possibly done */ + (void) file->ha_rename_table(ddl_log_entry->name.str, + ddl_log_entry->from_name.str); + /* decrease the phase and sync */ + file_entry_buf[DDL_LOG_PHASE_POS]--; + if (write_ddl_log_file_entry(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + /* fall through */ + case EXCH_PHASE_NAME_TO_TEMP: + /* name -> tmp_name possibly done */ + (void) file->ha_rename_table(ddl_log_entry->tmp_name.str, + ddl_log_entry->name.str); + /* disable the entry and sync */ + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= DDL_LOG_IGNORE_ENTRY_CODE; + (void) write_ddl_log_file_entry(entry_pos); + (void) ddl_log_sync_no_lock(); + break; + } + break; + } + case DDL_LOG_RENAME_TABLE_ACTION: + { + /* + We should restore things by renaming from + 'entry->name' to 'entry->from_name' + */ + switch (ddl_log_entry->phase) { + case DDL_RENAME_PHASE_TRIGGER: + rename_triggers(thd, ddl_log_entry, 0); + if (increment_phase(entry_pos)) + break; + /* fall through */ + case DDL_RENAME_PHASE_STAT: + /* + Stat tables must be updated last so that we can handle a rename of + a stat table. For now we just rememeber that we have to update it + */ + update_flags(ddl_log_entry->entry_pos, DDL_LOG_FLAG_UPDATE_STAT); + ddl_log_entry->flags|= DDL_LOG_FLAG_UPDATE_STAT; + /* fall through */ + case DDL_RENAME_PHASE_TABLE: + /* Restore frm and table to original names */ + execute_rename_table(ddl_log_entry, file, + &ddl_log_entry->db, &ddl_log_entry->name, + &ddl_log_entry->from_db, &ddl_log_entry->from_name, + 0, + from_path, to_path); + + if (ddl_log_entry->flags & DDL_LOG_FLAG_UPDATE_STAT) + { + /* Update stat tables last */ + rename_in_stat_tables(thd, ddl_log_entry, 0); + } + + /* disable the entry and sync */ + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + default: + DBUG_ASSERT(0); + break; + } + break; + } + case DDL_LOG_RENAME_VIEW_ACTION: + { + LEX_CSTRING from_table, to_table; + from_table= ddl_log_entry->from_name; + to_table= ddl_log_entry->name; + + /* Delete any left over .frm~ files */ + build_filename_and_delete_tmp_file(to_path, sizeof(to_path) - 1, + &ddl_log_entry->db, + &ddl_log_entry->name, + reg_ext, + key_file_fileparser); + build_filename_and_delete_tmp_file(from_path, sizeof(from_path) - 1, + &ddl_log_entry->from_db, + &ddl_log_entry->from_name, + reg_ext, key_file_fileparser); + + /* Rename view back if the original rename did succeed */ + if (!access(to_path, F_OK)) + (void) mysql_rename_view(thd, + &ddl_log_entry->from_db, &from_table, + &ddl_log_entry->db, &to_table); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + } + break; + /* + Initialize variables for DROP TABLE and DROP VIEW + In normal cases a query only contains one action. However in case of + DROP DATABASE we may get a mix of both and we have to keep these + separate. + */ + case DDL_LOG_DROP_INIT_ACTION: + { + LEX_CSTRING *comment= &ddl_log_entry->tmp_name; + recovery_state.drop_table.length(0); + recovery_state.drop_table.set_charset(system_charset_info); + recovery_state.drop_table.append(STRING_WITH_LEN("DROP TABLE IF EXISTS ")); + if (comment->length) + { + recovery_state.drop_table.append(comment); + recovery_state.drop_table.append(' '); + } + recovery_state.drop_table_init_length= recovery_state.drop_table.length(); + + recovery_state.drop_view.length(0); + recovery_state.drop_view.set_charset(system_charset_info); + recovery_state.drop_view.append(STRING_WITH_LEN("DROP VIEW IF EXISTS ")); + recovery_state.drop_view_init_length= recovery_state.drop_view.length(); + + strmake(recovery_state.current_db, + ddl_log_entry->from_db.str, sizeof(recovery_state.current_db)-1); + /* We don't increment phase as we want to retry this in case of crash */ + break; + } + case DDL_LOG_DROP_TABLE_ACTION: + { + LEX_CSTRING db, table, path; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + /* Note that path is without .frm extension */ + path= ddl_log_entry->tmp_name; + + switch (ddl_log_entry->phase) { + case DDL_DROP_PHASE_TABLE: + if (hton) + { + no_such_table_handler.only_ignore_non_existing_errors= 1; + error= hton->drop_table(hton, path.str); + no_such_table_handler.only_ignore_non_existing_errors= 0; + if (error) + { + if (!non_existing_table_error(error)) + break; + error= -1; + } + } + else + error= ha_delete_table_force(thd, path.str, &db, &table); + if (error <= 0) + { + /* Not found or already deleted. Delete .frm if it exists */ + strxnmov(to_path, sizeof(to_path)-1, path.str, reg_ext, NullS); + mysql_file_delete(key_file_frm, to_path, MYF(MY_WME|MY_IGNORE_ENOENT)); + error= 0; + } + if (increment_phase(entry_pos)) + break; + /* Fall through */ + case DDL_DROP_PHASE_TRIGGER: + Table_triggers_list::drop_all_triggers(thd, &db, &table, + MYF(MY_WME | MY_IGNORE_ENOENT)); + if (increment_phase(entry_pos)) + break; + /* Fall through */ + case DDL_DROP_PHASE_BINLOG: + if (strcmp(recovery_state.current_db, db.str)) + { + append_identifier(thd, &recovery_state.drop_table, &db); + recovery_state.drop_table.append('.'); + } + append_identifier(thd, &recovery_state.drop_table, &table); + recovery_state.drop_table.append(','); + /* We don't increment phase as we want to retry this in case of crash */ + + if (ddl_log_drop_to_binary_log(thd, ddl_log_entry, + &recovery_state.drop_table)) + { + if (increment_phase(entry_pos)) + break; + } + break; + case DDL_DROP_PHASE_RESET: + /* We have already logged all previous drop's. Clear the query */ + recovery_state.drop_table.length(recovery_state.drop_table_init_length); + recovery_state.drop_view.length(recovery_state.drop_view_init_length); + break; + } + break; + } + case DDL_LOG_DROP_VIEW_ACTION: + { + LEX_CSTRING db, table, path; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + /* Note that for views path is WITH .frm extension */ + path= ddl_log_entry->tmp_name; + + if (ddl_log_entry->phase == 0) + { + mysql_file_delete(key_file_frm, path.str, MYF(MY_WME|MY_IGNORE_ENOENT)); + if (strcmp(recovery_state.current_db, db.str)) + { + append_identifier(thd, &recovery_state.drop_view, &db); + recovery_state.drop_view.append('.'); + } + append_identifier(thd, &recovery_state.drop_view, &table); + recovery_state.drop_view.append(','); + + if (ddl_log_drop_to_binary_log(thd, ddl_log_entry, + &recovery_state.drop_view)) + { + if (increment_phase(entry_pos)) + break; + } + } + else + { + /* We have already logged all previous drop's. Clear the query */ + recovery_state.drop_table.length(recovery_state.drop_table_init_length); + recovery_state.drop_view.length(recovery_state.drop_table_init_length); + } + break; + } + case DDL_LOG_DROP_TRIGGER_ACTION: + { + MY_STAT stat_info; + off_t frm_length= 1; // Impossible length + LEX_CSTRING thd_db= thd->db; + + /* Delete trigger temporary file if it still exists */ + if (!build_filename_and_delete_tmp_file(to_path, sizeof(to_path) - 1, + &ddl_log_entry->db, + &ddl_log_entry->name, + TRG_EXT, + key_file_fileparser)) + { + /* Temporary file existed and was deleted, nothing left to do */ + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + /* + We can use length of TRG file as an indication if trigger was removed. + If there is no file, then it means that this was the last trigger + and the file was removed. + */ + if (my_stat(to_path, &stat_info, MYF(0))) + frm_length= (off_t) stat_info.st_size; + if (frm_length != (off_t) ddl_log_entry->unique_id && + mysql_bin_log.is_open()) + { + /* + File size changed and it was not binlogged (as this entry was + executed) + */ + (void) rm_trigname_file(to_path, &ddl_log_entry->db, + &ddl_log_entry->from_name, + MYF(0)); + + recovery_state.drop_table.length(0); + recovery_state.drop_table.set_charset(system_charset_info); + if (ddl_log_entry->tmp_name.length) + { + /* We can use the original query */ + recovery_state.drop_table.append(&ddl_log_entry->tmp_name); + } + else + { + /* Generate new query */ + recovery_state.drop_table.append(STRING_WITH_LEN("DROP TRIGGER IF " + "EXISTS ")); + append_identifier(thd, &recovery_state.drop_table, + &ddl_log_entry->from_name); + recovery_state.drop_table.append(&end_comment); + } + if (mysql_bin_log.is_open()) + { + mysql_mutex_unlock(&LOCK_gdl); + thd->db= ddl_log_entry->db; + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, + recovery_state.drop_table.ptr(), + recovery_state.drop_table.length(), TRUE, FALSE, + FALSE, 0); + thd->db= thd_db; + mysql_mutex_lock(&LOCK_gdl); + } + } + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + case DDL_LOG_DROP_DB_ACTION: + { + LEX_CSTRING db, path; + db= ddl_log_entry->db; + path= ddl_log_entry->tmp_name; + + switch (ddl_log_entry->phase) { + case DDL_DROP_DB_PHASE_INIT: + drop_database_objects(thd, &path, &db, + !my_strcasecmp(system_charset_info, + MYSQL_SCHEMA_NAME.str, db.str)); + + strxnmov(to_path, sizeof(to_path)-1, path.str, MY_DB_OPT_FILE, NullS); + mysql_file_delete_with_symlink(key_file_misc, to_path, "", MYF(0)); + + (void) rm_dir_w_symlink(path.str, 0); + if (increment_phase(entry_pos)) + break; + /* fall through */ + case DDL_DROP_DB_PHASE_LOG: + { + String *query= &recovery_state.drop_table; + + query->length(0); + query->append(STRING_WITH_LEN("DROP DATABASE IF EXISTS ")); + append_identifier(thd, query, &db); + query->append(&end_comment); + + if (mysql_bin_log.is_open()) + { + mysql_mutex_unlock(&LOCK_gdl); + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, + query->ptr(), query->length(), + TRUE, FALSE, FALSE, 0); + mysql_mutex_lock(&LOCK_gdl); + } + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + } + break; + } + case DDL_LOG_CREATE_TABLE_ACTION: + { + LEX_CSTRING db, table, path; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + path= ddl_log_entry->tmp_name; + + /* Don't delete the table if we didn't create it */ + if (ddl_log_entry->flags == 0) + { + if (hton) + { + if ((error= hton->drop_table(hton, path.str))) + { + if (!non_existing_table_error(error)) + break; + error= -1; + } + } + else + error= ha_delete_table_force(thd, path.str, &db, &table); + } + strxnmov(to_path, sizeof(to_path)-1, path.str, reg_ext, NullS); + mysql_file_delete(key_file_frm, to_path, MYF(MY_WME|MY_IGNORE_ENOENT)); + if (ddl_log_entry->phase == DDL_CREATE_TABLE_PHASE_LOG) + { + /* + The server logged CREATE TABLE ... SELECT into binary log + before crashing. As the commit failed and we have delete the + table above, we have now to log the DROP of the created table. + */ + + String *query= &recovery_state.drop_table; + query->length(0); + query->append(STRING_WITH_LEN("DROP TABLE IF EXISTS ")); + append_identifier(thd, query, &db); + query->append('.'); + append_identifier(thd, query, &table); + query->append(&end_comment); + + if (mysql_bin_log.is_open()) + { + mysql_mutex_unlock(&LOCK_gdl); + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, + query->ptr(), query->length(), + TRUE, FALSE, FALSE, 0); + mysql_mutex_lock(&LOCK_gdl); + } + } + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + error= 0; + break; + } + case DDL_LOG_CREATE_VIEW_ACTION: + { + char *path= to_path; + size_t path_length= ddl_log_entry->tmp_name.length; + memcpy(path, ddl_log_entry->tmp_name.str, path_length+1); + path[path_length+1]= 0; // Prepare for extending + + /* Remove temporary parser file */ + path[path_length]='~'; + mysql_file_delete(key_file_fileparser, path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + path[path_length]= 0; + + switch (ddl_log_entry->phase) { + case DDL_CREATE_VIEW_PHASE_NO_OLD_VIEW: + { + /* + No old view exists, so we can just delete the .frm and temporary files + */ + path[path_length]='-'; + mysql_file_delete(key_file_fileparser, path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + path[path_length]= 0; + mysql_file_delete(key_file_frm, path, MYF(MY_WME|MY_IGNORE_ENOENT)); + break; + } + case DDL_CREATE_VIEW_PHASE_DELETE_VIEW_COPY: + { + /* + Old view existed. We crashed before we had done a copy and change + state to DDL_CREATE_VIEW_PHASE_OLD_VIEW_COPIED + */ + path[path_length]='-'; + mysql_file_delete(key_file_fileparser, path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + path[path_length]= 0; + break; + } + case DDL_CREATE_VIEW_PHASE_OLD_VIEW_COPIED: + { + /* + Old view existed copied to '-' file. Restore it + */ + memcpy(from_path, path, path_length+2); + from_path[path_length]='-'; + if (!access(from_path, F_OK)) + mysql_file_rename(key_file_fileparser, from_path, path, MYF(MY_WME)); + break; + } + } + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + case DDL_LOG_DELETE_TMP_FILE_ACTION: + { + LEX_CSTRING path= ddl_log_entry->tmp_name; + DBUG_ASSERT(ddl_log_entry->unique_id <= UINT_MAX32); + if (!ddl_log_entry->unique_id || + !is_execute_entry_active((uint) ddl_log_entry->unique_id)) + mysql_file_delete(key_file_fileparser, path.str, + MYF(MY_WME|MY_IGNORE_ENOENT)); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + case DDL_LOG_CREATE_TRIGGER_ACTION: + { + LEX_CSTRING db, table, trigger; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + trigger= ddl_log_entry->tmp_name; + + /* Delete backup .TRG (trigger file) if it exists */ + (void) build_filename_and_delete_tmp_file(to_path, sizeof(to_path) - 1, + &db, &table, + TRG_EXT, + key_file_fileparser); + (void) build_filename_and_delete_tmp_file(to_path, sizeof(to_path) - 1, + &db, &trigger, + TRN_EXT, + key_file_fileparser); + switch (ddl_log_entry->phase) { + case DDL_CREATE_TRIGGER_PHASE_DELETE_COPY: + { + size_t length; + /* Delete copy of .TRN and .TRG files */ + length= build_table_filename(to_path, sizeof(to_path) - 1, + db.str, table.str, TRG_EXT, 0); + to_path[length]= '-'; + to_path[length+1]= 0; + mysql_file_delete(key_file_fileparser, to_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + + length= build_table_filename(to_path, sizeof(to_path) - 1, + db.str, trigger.str, TRN_EXT, 0); + to_path[length]= '-'; + to_path[length+1]= 0; + mysql_file_delete(key_file_fileparser, to_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + } + /* Nothing else to do */ + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + case DDL_CREATE_TRIGGER_PHASE_OLD_COPIED: + { + LEX_CSTRING path= {to_path, 0}; + size_t length; + /* Restore old version if the .TRN and .TRG files */ + length= build_table_filename(to_path, sizeof(to_path) - 1, + db.str, table.str, TRG_EXT, 0); + to_path[length]='-'; + to_path[length+1]= 0; + path.length= length+1; + /* an old TRN file only exist in the case if REPLACE was used */ + if (!access(to_path, F_OK)) + sql_restore_definition_file(&path); + + length= build_table_filename(to_path, sizeof(to_path) - 1, + db.str, trigger.str, TRN_EXT, 0); + to_path[length]='-'; + to_path[length+1]= 0; + path.length= length+1; + if (!access(to_path, F_OK)) + sql_restore_definition_file(&path); + else + { + /* + There was originally no .TRN for this trigger. + Delete the newly created one. + */ + to_path[length]= 0; + mysql_file_delete(key_file_fileparser, to_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + } + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + case DDL_CREATE_TRIGGER_PHASE_NO_OLD_TRIGGER: + { + /* No old trigger existed. We can just delete the .TRN and .TRG files */ + build_table_filename(to_path, sizeof(to_path) - 1, + db.str, table.str, TRG_EXT, 0); + mysql_file_delete(key_file_fileparser, to_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + build_table_filename(to_path, sizeof(to_path) - 1, + db.str, trigger.str, TRN_EXT, 0); + mysql_file_delete(key_file_fileparser, to_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + } + break; + } + case DDL_LOG_ALTER_TABLE_ACTION: + { + handlerton *org_hton, *partition_hton; + handler *org_file; + bool is_renamed= ddl_log_entry->flags & DDL_LOG_FLAG_ALTER_RENAME; + bool new_version_ready= 0, new_version_unusable= 0; + LEX_CSTRING db, table; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + + if (!(org_file= create_handler(thd, mem_root, + &ddl_log_entry->from_handler_name))) + goto end; + /* Handlerton of the final table and any temporary tables */ + org_hton= org_file->ht; + /* + partition_hton is the hton for the new file, or + in case of ALTER of a partitioned table, the underlying + table + */ + partition_hton= hton; + + if (ddl_log_entry->flags & DDL_LOG_FLAG_ALTER_PARTITION) + { + /* + The from and to tables where both using the partition engine. + */ + hton= org_hton; + } + switch (ddl_log_entry->phase) { + case DDL_ALTER_TABLE_PHASE_RENAME_FAILED: + /* + We come here when the final rename of temporary table (#sql-alter) to + the original name failed. Now we have to delete the temporary table + and restore the backup. + */ + quick_rm_table(thd, hton, &db, &table, FN_IS_TMP); + if (!is_renamed) + { + execute_rename_table(ddl_log_entry, file, + &ddl_log_entry->from_db, + &ddl_log_entry->extra_name, // #sql-backup + &ddl_log_entry->from_db, + &ddl_log_entry->from_name, + FN_FROM_IS_TMP, + from_path, to_path); + } + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + case DDL_ALTER_TABLE_PHASE_PREPARE_INPLACE: + /* We crashed before ddl_log_update_unique_id() was called */ + new_version_unusable= 1; + /* fall through */ + case DDL_ALTER_TABLE_PHASE_INPLACE_COPIED: + /* The inplace alter table is committed and ready to be used */ + if (!new_version_unusable) + new_version_ready= 1; + /* fall through */ + case DDL_ALTER_TABLE_PHASE_INPLACE: + { + int fr_length, to_length; + /* + Inplace alter table was used. + On disk there are now a table with the original name, the + original .frm file and potentially a #sql-alter...frm file + with the new definition. + */ + fr_length= build_table_filename(from_path, sizeof(from_path) - 1, + ddl_log_entry->db.str, + ddl_log_entry->name.str, + reg_ext, 0); + to_length= build_table_filename(to_path, sizeof(to_path) - 1, + ddl_log_entry->from_db.str, + ddl_log_entry->from_name.str, + reg_ext, 0); + if (!access(from_path, F_OK)) // Does #sql-alter.. exists? + { + LEX_CUSTRING version= {ddl_log_entry->uuid, MY_UUID_SIZE}; + /* + Temporary .frm file exists. This means that that the table in + the storage engine can be of either old or new version. + If old version, delete the new .frm table and keep the old one. + If new version, replace the old .frm with the new one. + */ + to_path[to_length - reg_ext_length]= 0; // Remove .frm + if (!new_version_unusable && + ( !partition_hton->check_version || new_version_ready || + !partition_hton->check_version(partition_hton, + to_path, &version, + ddl_log_entry->unique_id))) + { + /* Table is up to date */ + + /* + Update state so that if we crash and retry the ddl log entry, + we know that we can use the new table even if .frm is renamed. + */ + if (ddl_log_entry->phase != DDL_ALTER_TABLE_PHASE_INPLACE_COPIED) + (void) update_phase(entry_pos, + DDL_ALTER_TABLE_PHASE_INPLACE_COPIED); + /* Replace old .frm file with new one */ + to_path[to_length - reg_ext_length]= FN_EXTCHAR; + (void) mysql_file_rename(key_file_frm, from_path, to_path, + MYF(MY_WME)); + new_version_ready= 1; + } + else + { + DBUG_ASSERT(!new_version_ready); + /* + Use original version of the .frm file. + Remove temporary #sql-alter.frm file and the #sql-alter table. + We have also to remove the temporary table as some storage engines, + like InnoDB, may use it as an internal temporary table + during inplace alter table. + */ + from_path[fr_length - reg_ext_length]= 0; + error= org_hton->drop_table(org_hton, from_path); + if (non_existing_table_error(error)) + error= 0; + from_path[fr_length - reg_ext_length]= FN_EXTCHAR; + mysql_file_delete(key_file_frm, from_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + } + if (is_renamed && new_version_ready) + { + /* After the renames above, the original table is now in from_name */ + ddl_log_entry->name= ddl_log_entry->from_name; + /* Rename db.name -> db.extra_name */ + execute_rename_table(ddl_log_entry, file, + &ddl_log_entry->db, &ddl_log_entry->name, + &ddl_log_entry->db, &ddl_log_entry->extra_name, + 0, + from_path, to_path); + } + (void) update_phase(entry_pos, DDL_ALTER_TABLE_PHASE_UPDATE_TRIGGERS); + goto update_triggers; + } + case DDL_ALTER_TABLE_PHASE_COPIED: + { + char *from_end; + /* + New table is created and we have the query for the binary log. + We should remove the original table and in the next stage replace + it with the new one. + */ + build_table_filename(from_path, sizeof(from_path) - 1, + ddl_log_entry->from_db.str, + ddl_log_entry->from_name.str, + "", 0); + build_table_filename(to_path, sizeof(to_path) - 1, + ddl_log_entry->db.str, + ddl_log_entry->name.str, + "", 0); + from_end= strend(from_path); + if (likely(org_hton)) + { + error= org_hton->drop_table(org_hton, from_path); + if (non_existing_table_error(error)) + error= 0; + } + strmov(from_end, reg_ext); + mysql_file_delete(key_file_frm, from_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + *from_end= 0; // Remove extension + + (void) update_phase(entry_pos, DDL_ALTER_TABLE_PHASE_OLD_RENAMED); + } + /* fall through */ + case DDL_ALTER_TABLE_PHASE_OLD_RENAMED: + { + /* + The new table (from_path) is up to date. + Original table is either renamed as backup table (normal case), + only frm is renamed (in case of engine change) or deleted above. + */ + if (!is_renamed) + { + uint length; + /* Rename new "temporary" table to the original wanted name */ + execute_rename_table(ddl_log_entry, file, + &ddl_log_entry->db, + &ddl_log_entry->name, + &ddl_log_entry->from_db, + &ddl_log_entry->from_name, + FN_FROM_IS_TMP, + from_path, to_path); + + /* + Remove backup (only happens if alter table used without rename). + Backup name is always in lower case, so there is no need for + converting table names. + */ + length= build_table_filename(from_path, sizeof(from_path) - 1, + ddl_log_entry->from_db.str, + ddl_log_entry->extra_name.str, + "", FN_IS_TMP); + if (likely(org_hton)) + { + if (ddl_log_entry->flags & DDL_LOG_FLAG_ALTER_ENGINE_CHANGED) + { + /* Only frm is renamed, storage engine files have original name */ + build_table_filename(to_path, sizeof(from_path) - 1, + ddl_log_entry->from_db.str, + ddl_log_entry->from_name.str, + "", 0); + error= org_hton->drop_table(org_hton, to_path); + } + else + error= org_hton->drop_table(org_hton, from_path); + if (non_existing_table_error(error)) + error= 0; + } + strmov(from_path + length, reg_ext); + mysql_file_delete(key_file_frm, from_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + } + else + execute_rename_table(ddl_log_entry, file, + &ddl_log_entry->db, &ddl_log_entry->name, + &ddl_log_entry->db, &ddl_log_entry->extra_name, + FN_FROM_IS_TMP, + from_path, to_path); + (void) update_phase(entry_pos, DDL_ALTER_TABLE_PHASE_UPDATE_TRIGGERS); + } + /* fall through */ + case DDL_ALTER_TABLE_PHASE_UPDATE_TRIGGERS: + update_triggers: + { + if (is_renamed) + { + // rename_triggers will rename from: from_db.from_name -> db.extra_name + rename_triggers(thd, ddl_log_entry, 1); + (void) update_phase(entry_pos, DDL_ALTER_TABLE_PHASE_UPDATE_STATS); + } + } + /* fall through */ + case DDL_ALTER_TABLE_PHASE_UPDATE_STATS: + if (is_renamed) + { + ddl_log_entry->name= ddl_log_entry->from_name; + ddl_log_entry->from_name= ddl_log_entry->extra_name; + rename_in_stat_tables(thd, ddl_log_entry, 1); + (void) update_phase(entry_pos, DDL_ALTER_TABLE_PHASE_UPDATE_STATS); + } + /* fall through */ + case DDL_ALTER_TABLE_PHASE_UPDATE_BINARY_LOG: + { + /* Write ALTER TABLE query to binary log */ + if (recovery_state.query.length() && mysql_bin_log.is_open()) + { + LEX_CSTRING save_db; + /* Reuse old xid value if possible */ + if (!recovery_state.xid) + recovery_state.xid= server_uuid_value(); + thd->binlog_xid= recovery_state.xid; + update_xid(recovery_state.execute_entry_pos, thd->binlog_xid); + + mysql_mutex_unlock(&LOCK_gdl); + save_db= thd->db; + lex_string_set3(&thd->db, recovery_state.db.ptr(), + recovery_state.db.length()); + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, + recovery_state.query.ptr(), + recovery_state.query.length(), + TRUE, FALSE, FALSE, 0); + thd->binlog_xid= 0; + thd->db= save_db; + mysql_mutex_lock(&LOCK_gdl); + } + recovery_state.query.length(0); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + /* + The following cases are when alter table failed and we have to roll + back + */ + case DDL_ALTER_TABLE_PHASE_CREATED: + { + /* + Temporary table should have been created. Delete it. + */ + if (likely(hton)) + { + error= hton->drop_table(hton, ddl_log_entry->tmp_name.str); + if (non_existing_table_error(error)) + error= 0; + } + (void) update_phase(entry_pos, DDL_ALTER_TABLE_PHASE_INIT); + } + /* fall through */ + case DDL_ALTER_TABLE_PHASE_INIT: + { + /* + A temporary .frm and possible a .par files should have been created + */ + strxmov(to_path, ddl_log_entry->tmp_name.str, reg_ext, NullS); + mysql_file_delete(key_file_frm, to_path, MYF(MY_WME|MY_IGNORE_ENOENT)); + strxmov(to_path, ddl_log_entry->tmp_name.str, PAR_EXT, NullS); + mysql_file_delete(key_file_partition_ddl_log, to_path, + MYF(MY_WME|MY_IGNORE_ENOENT)); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + break; + } + } + delete org_file; + break; + } + case DDL_LOG_STORE_QUERY_ACTION: + { + /* + Read query for next ddl command + */ + if (ddl_log_entry->flags) + { + /* + First QUERY event. Allocate query string. + Query length is stored in unique_id + */ + if (recovery_state.query.alloc((size_t) (ddl_log_entry->unique_id+1))) + goto end; + recovery_state.query.length(0); + recovery_state.db.copy(ddl_log_entry->db.str, ddl_log_entry->db.length, + system_charset_info); + } + if (unlikely(recovery_state.query.length() + + ddl_log_entry->extra_name.length > + recovery_state.query.alloced_length())) + { + /* Impossible length. Ignore query */ + recovery_state.query.length(0); + error= 1; + my_error(ER_INTERNAL_ERROR, MYF(0), + "DDL log: QUERY event has impossible length"); + break; + } + recovery_state.query.qs_append(&ddl_log_entry->extra_name); + break; + } + default: + DBUG_ASSERT(0); + break; + } + +end: + delete file; + /* We are only interested in errors that where not ignored */ + if ((error= (no_such_table_handler.unhandled_errors > 0))) + my_errno= no_such_table_handler.first_error; + thd->pop_internal_handler(); + DBUG_RETURN(error); +} + + +/** + Get a free entry in the ddl log + + @param[out] active_entry A ddl log memory entry returned + @param[out] write_header Set to 1 if ddl log was enlarged + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool ddl_log_get_free_entry(DDL_LOG_MEMORY_ENTRY **active_entry) +{ + DDL_LOG_MEMORY_ENTRY *used_entry; + DDL_LOG_MEMORY_ENTRY *first_used= global_ddl_log.first_used; + DBUG_ENTER("ddl_log_get_free_entry"); + + if (global_ddl_log.first_free == NULL) + { + if (!(used_entry= ((DDL_LOG_MEMORY_ENTRY*) + my_malloc(key_memory_DDL_LOG_MEMORY_ENTRY, + sizeof(DDL_LOG_MEMORY_ENTRY), MYF(MY_WME))))) + { + sql_print_error("DDL_LOG: Failed to allocate memory for ddl log free " + "list"); + *active_entry= 0; + DBUG_RETURN(TRUE); + } + global_ddl_log.num_entries++; + used_entry->entry_pos= global_ddl_log.num_entries; + } + else + { + used_entry= global_ddl_log.first_free; + global_ddl_log.first_free= used_entry->next_log_entry; + } + /* + Move from free list to used list + */ + used_entry->next_log_entry= first_used; + used_entry->prev_log_entry= NULL; + used_entry->next_active_log_entry= NULL; + global_ddl_log.first_used= used_entry; + if (first_used) + first_used->prev_log_entry= used_entry; + + *active_entry= used_entry; + DBUG_RETURN(FALSE); +} + + +/** + Release a log memory entry. + @param log_memory_entry Log memory entry to release +*/ + +void ddl_log_release_memory_entry(DDL_LOG_MEMORY_ENTRY *log_entry) +{ + DDL_LOG_MEMORY_ENTRY *next_log_entry= log_entry->next_log_entry; + DDL_LOG_MEMORY_ENTRY *prev_log_entry= log_entry->prev_log_entry; + DBUG_ENTER("ddl_log_release_memory_entry"); + + mysql_mutex_assert_owner(&LOCK_gdl); + log_entry->next_log_entry= global_ddl_log.first_free; + global_ddl_log.first_free= log_entry; + + if (prev_log_entry) + prev_log_entry->next_log_entry= next_log_entry; + else + global_ddl_log.first_used= next_log_entry; + if (next_log_entry) + next_log_entry->prev_log_entry= prev_log_entry; + // Ensure we get a crash if we try to access this link again. + log_entry->next_active_log_entry= (DDL_LOG_MEMORY_ENTRY*) 0x1; + DBUG_VOID_RETURN; +} + + +/** + Execute one entry in the ddl log. + + Executing an entry means executing a linked list of actions. + + @param first_entry Reference to first action in entry + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool ddl_log_execute_entry_no_lock(THD *thd, uint first_entry) +{ + DDL_LOG_ENTRY ddl_log_entry; + uint read_entry= first_entry; + MEM_ROOT mem_root; + DBUG_ENTER("ddl_log_execute_entry_no_lock"); + + mysql_mutex_assert_owner(&LOCK_gdl); + init_sql_alloc(key_memory_gdl, &mem_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(MY_THREAD_SPECIFIC)); + do + { + if (read_ddl_log_entry(read_entry, &ddl_log_entry)) + { + /* Error logged to error log. Continue with next log entry */ + break; + } + DBUG_ASSERT(ddl_log_entry.entry_type == DDL_LOG_ENTRY_CODE || + ddl_log_entry.entry_type == DDL_LOG_IGNORE_ENTRY_CODE); + + if (ddl_log_execute_action(thd, &mem_root, &ddl_log_entry)) + { + uint action_type= ddl_log_entry.action_type; + if (action_type >= DDL_LOG_LAST_ACTION) + action_type= 0; + + /* Write to error log and continue with next log entry */ + sql_print_error("DDL_LOG: Got error %d when trying to execute action " + "for entry %u of type '%s'", + (int) my_errno, read_entry, + ddl_log_action_name[action_type]); + break; + } + read_entry= ddl_log_entry.next_entry; + } while (read_entry); + + free_root(&mem_root, MYF(0)); + DBUG_RETURN(FALSE); +} + + +/* + External interface methods for the DDL log Module + --------------------------------------------------- +*/ + +/** + Write a ddl log entry. + + A careful write of the ddl log is performed to ensure that we can + handle crashes occurring during CREATE and ALTER TABLE processing. + + @param ddl_log_entry Information about log entry + @param[out] entry_written Entry information written into + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +bool ddl_log_write_entry(DDL_LOG_ENTRY *ddl_log_entry, + DDL_LOG_MEMORY_ENTRY **active_entry) +{ + bool error; + DBUG_ENTER("ddl_log_write_entry"); + + *active_entry= 0; + mysql_mutex_assert_owner(&LOCK_gdl); + DBUG_ASSERT(global_ddl_log.open); + if (unlikely(!global_ddl_log.open)) + { + my_error(ER_INTERNAL_ERROR, MYF(0), "ddl log not initialized"); + DBUG_RETURN(TRUE); + } + + ddl_log_entry->entry_type= DDL_LOG_ENTRY_CODE; + set_global_from_ddl_log_entry(ddl_log_entry); + if (ddl_log_get_free_entry(active_entry)) + DBUG_RETURN(TRUE); + + error= FALSE; + DBUG_PRINT("ddl_log", + ("pos: %u->%u action: %u (%s) phase: %u " + "handler: '%s' name: '%s' from_name: '%s' tmp_name: '%s'", + (*active_entry)->entry_pos, + (uint) ddl_log_entry->next_entry, + (uint) ddl_log_entry->action_type, + ddl_log_action_name[ddl_log_entry->action_type], + (uint) ddl_log_entry->phase, + ddl_log_entry->handler_name.str, + ddl_log_entry->name.str, + ddl_log_entry->from_name.str, + ddl_log_entry->tmp_name.str)); + + if (unlikely(write_ddl_log_file_entry((*active_entry)->entry_pos))) + { + sql_print_error("DDL_LOG: Failed to write entry %u", + (*active_entry)->entry_pos); + ddl_log_release_memory_entry(*active_entry); + *active_entry= 0; + error= TRUE; + } + DBUG_RETURN(error); +} + + +/** + @brief Write or update execute entry in the ddl log. + + @details An execute entry points to the first entry that should + be excuted during recovery. In some cases it's only written once, + in other cases it's updated for each log entry to point to the new + header for the list. + + When called, the previous log entries have already been written but not yet + synched to disk. We write a couple of log entries that describes + action to perform. This entries are set-up in a linked list, + however only when an execute entry is put as the first entry these will be + executed during recovery. + + @param first_entry First entry in linked list of entries + to execute. + @param cond_entry Check and don't execute if cond_entry is active + @param[in,out] active_entry Entry to execute, 0 = NULL if the entry + is written first time and needs to be + returned. In this case the entry written + is returned in this parameter + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +bool ddl_log_write_execute_entry(uint first_entry, + uint cond_entry, + DDL_LOG_MEMORY_ENTRY **active_entry) +{ + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + bool got_free_entry= 0; + DBUG_ENTER("ddl_log_write_execute_entry"); + + mysql_mutex_assert_owner(&LOCK_gdl); + /* + We haven't synched the log entries yet, we sync them now before + writing the execute entry. + */ + (void) ddl_log_sync_no_lock(); + bzero(file_entry_buf, global_ddl_log.io_size); + + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= (uchar)DDL_LOG_EXECUTE_CODE; + int4store(file_entry_buf + DDL_LOG_NEXT_ENTRY_POS, first_entry); + int8store(file_entry_buf + DDL_LOG_ID_POS, ((ulonglong)cond_entry << DDL_LOG_RETRY_BITS)); + + if (!(*active_entry)) + { + if (ddl_log_get_free_entry(active_entry)) + DBUG_RETURN(TRUE); + got_free_entry= TRUE; + } + DBUG_PRINT("ddl_log", + ("pos: %u=>%u", + (*active_entry)->entry_pos, first_entry)); + if (write_ddl_log_file_entry((*active_entry)->entry_pos)) + { + sql_print_error("DDL_LOG: Error writing execute entry %u", + (*active_entry)->entry_pos); + if (got_free_entry) + { + ddl_log_release_memory_entry(*active_entry); + *active_entry= 0; + } + DBUG_RETURN(TRUE); + } + (void) ddl_log_sync_no_lock(); + DBUG_RETURN(FALSE); +} + + +/** + Increment phase for entry. Will deactivate entry after all phases are done + + @details see ddl_log_increment_phase_no_lock. + + @param entry_pos Entry position of record to change + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +bool ddl_log_increment_phase(uint entry_pos) +{ + bool error; + DBUG_ENTER("ddl_log_increment_phase"); + DBUG_PRINT("ddl_log", ("pos: %u", entry_pos)); + + mysql_mutex_lock(&LOCK_gdl); + error= ddl_log_increment_phase_no_lock(entry_pos); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(error); +} + + +/** + Sync ddl log file. + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +bool ddl_log_sync() +{ + bool error; + DBUG_ENTER("ddl_log_sync"); + + mysql_mutex_lock(&LOCK_gdl); + error= ddl_log_sync_no_lock(); + mysql_mutex_unlock(&LOCK_gdl); + + DBUG_RETURN(error); +} + + +/** + Execute one entry in the ddl log. + + Executing an entry means executing a linked list of actions. + + This function is called for recovering partitioning in case of error. + + @param first_entry Reference to first action in entry + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +bool ddl_log_execute_entry(THD *thd, uint first_entry) +{ + bool error; + DBUG_ENTER("ddl_log_execute_entry"); + + mysql_mutex_lock(&LOCK_gdl); + error= ddl_log_execute_entry_no_lock(thd, first_entry); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(error); +} + + +/** + Close the ddl log. +*/ + +static void close_ddl_log() +{ + DBUG_ENTER("close_ddl_log"); + if (global_ddl_log.file_id >= 0) + { + (void) mysql_file_close(global_ddl_log.file_id, MYF(MY_WME)); + global_ddl_log.file_id= (File) -1; + } + global_ddl_log.open= 0; + DBUG_VOID_RETURN; +} + + +/** + Loop over ddl log excute entries and mark those that are already stored + in the binary log as completed + + @return + @retval 0 ok + @return 1 fail (write error) + +*/ + +bool ddl_log_close_binlogged_events(HASH *xids) +{ + uint i; + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_close_binlogged_events"); + + if (global_ddl_log.num_entries == 0 || xids->records == 0) + DBUG_RETURN(0); + + mysql_mutex_lock(&LOCK_gdl); + for (i= 1; i <= global_ddl_log.num_entries; i++) + { + if (read_ddl_log_entry(i, &ddl_log_entry)) + break; // Read error. Stop reading + DBUG_PRINT("xid",("xid: %llu", ddl_log_entry.xid)); + if (ddl_log_entry.entry_type == DDL_LOG_EXECUTE_CODE && + ddl_log_entry.xid != 0 && + my_hash_search(xids, (uchar*) &ddl_log_entry.xid, + sizeof(ddl_log_entry.xid))) + { + if (disable_execute_entry(i)) + { + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(1); // Write error. Fatal! + } + } + } + (void) ddl_log_sync_no_lock(); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(0); +} + + +/** + Execute the ddl log at recovery of MySQL Server. + + @return + @retval 0 Ok. + @retval > 0 Fatal error. We have to abort (can't create ddl log) + @return < -1 Recovery failed, but new log exists and is usable + +*/ + +int ddl_log_execute_recovery() +{ + uint i, count= 0; + int error= 0; + THD *thd, *original_thd; + DDL_LOG_ENTRY ddl_log_entry; + static char recover_query_string[]= "INTERNAL DDL LOG RECOVER IN PROGRESS"; + DBUG_ENTER("ddl_log_execute_recovery"); + + if (!global_ddl_log.backup_done && !global_ddl_log.created) + ddl_log_create_backup_file(); + + if (global_ddl_log.num_entries == 0) + DBUG_RETURN(0); + + /* + To be able to run this from boot, we allocate a temporary THD + */ + if (!(thd=new THD(0))) + { + DBUG_ASSERT(0); // Fatal error + DBUG_RETURN(1); + } + original_thd= current_thd; // Probably NULL + thd->thread_stack= (char*) &thd; + thd->store_globals(); + thd->init(); // Needed for error messages + + thd->log_all_errors= (global_system_variables.log_warnings >= 3); + recovery_state.drop_table.free(); + recovery_state.drop_view.free(); + recovery_state.query.free(); + recovery_state.db.free(); + + thd->set_query(recover_query_string, strlen(recover_query_string)); + + mysql_mutex_lock(&LOCK_gdl); + for (i= 1; i <= global_ddl_log.num_entries; i++) + { + if (read_ddl_log_entry(i, &ddl_log_entry)) + { + error= -1; + continue; + } + if (ddl_log_entry.entry_type == DDL_LOG_EXECUTE_CODE) + { + /* + Remeber information about executive ddl log entry, + used for binary logging during recovery + */ + recovery_state.execute_entry_pos= i; + recovery_state.xid= ddl_log_entry.xid; + + /* purecov: begin tested */ + if ((ddl_log_entry.unique_id & DDL_LOG_RETRY_MASK) > DDL_LOG_MAX_RETRY) + { + error= -1; + continue; + } + update_unique_id(i, ++ddl_log_entry.unique_id); + if ((ddl_log_entry.unique_id & DDL_LOG_RETRY_MASK) > DDL_LOG_MAX_RETRY) + { + sql_print_error("DDL_LOG: Aborting executing entry %u after %llu " + "retries", i, ddl_log_entry.unique_id); + error= -1; + continue; + } + /* purecov: end tested */ + + uint cond_entry= (uint)(ddl_log_entry.unique_id >> DDL_LOG_RETRY_BITS); + + if (cond_entry && is_execute_entry_active(cond_entry)) + { + if (disable_execute_entry(i)) + error= -1; + continue; + } + + if (ddl_log_execute_entry_no_lock(thd, ddl_log_entry.next_entry)) + { + /* Real unpleasant scenario but we have to continue anyway */ + error= -1; + continue; + } + count++; + } + } + recovery_state.drop_table.free(); + recovery_state.drop_view.free(); + recovery_state.query.free(); + recovery_state.db.free(); + close_ddl_log(); + mysql_mutex_unlock(&LOCK_gdl); + thd->reset_query(); + delete thd; + set_current_thd(original_thd); + + /* + Create a new ddl_log to get rid of old stuff and ensure that header matches + the current source version + */ + if (create_ddl_log()) + error= 1; + if (count > 0) + sql_print_information("DDL_LOG: Crash recovery executed %u entries", + count); + + set_current_thd(original_thd); + DBUG_RETURN(error); +} + + +static void ddl_log_free_lists() +{ + DDL_LOG_MEMORY_ENTRY *free_list= global_ddl_log.first_free; + DDL_LOG_MEMORY_ENTRY *used_list= global_ddl_log.first_used; + + while (used_list) + { + DDL_LOG_MEMORY_ENTRY *tmp= used_list->next_log_entry; + my_free(used_list); + used_list= tmp; + } + while (free_list) + { + DDL_LOG_MEMORY_ENTRY *tmp= free_list->next_log_entry; + my_free(free_list); + free_list= tmp; + } + global_ddl_log.first_free= global_ddl_log.first_used= 0; +} + + +/** + Release all memory allocated to the ddl log and delete the ddl log +*/ + +void ddl_log_release() +{ + char file_name[FN_REFLEN]; + DBUG_ENTER("ddl_log_release"); + + if (!global_ddl_log.initialized) + DBUG_VOID_RETURN; + + global_ddl_log.initialized= 0; + ddl_log_free_lists(); + + my_free(global_ddl_log.file_entry_buf); + global_ddl_log.file_entry_buf= 0; + close_ddl_log(); + + create_ddl_log_file_name(file_name, 0); + (void) mysql_file_delete(key_file_global_ddl_log, file_name, MYF(0)); + mysql_mutex_destroy(&LOCK_gdl); + DBUG_VOID_RETURN; +} + + +/** + Methods for DDL_LOG_STATE +*/ + +void ddl_log_add_entry(DDL_LOG_STATE *state, DDL_LOG_MEMORY_ENTRY *log_entry) +{ + log_entry->next_active_log_entry= state->list; + state->main_entry= state->list= log_entry; +} + + +void ddl_log_release_entries(DDL_LOG_STATE *ddl_log_state) +{ + DDL_LOG_MEMORY_ENTRY *next; + for (DDL_LOG_MEMORY_ENTRY *log_entry= ddl_log_state->list; + log_entry; + log_entry= next) + { + next= log_entry->next_active_log_entry; + ddl_log_release_memory_entry(log_entry); + } + ddl_log_state->list= 0; + + if (ddl_log_state->execute_entry) + { + ddl_log_release_memory_entry(ddl_log_state->execute_entry); + ddl_log_state->execute_entry= 0; // Not needed but future safe + } +} + + +/**************************************************************************** + Implementations of common ddl entries +*****************************************************************************/ + +/** + Complete ddl logging. This is done when all statements has completed + successfully and we can disable the execute log entry. +*/ + +void ddl_log_complete(DDL_LOG_STATE *state) +{ + DBUG_ENTER("ddl_log_complete"); + + if (unlikely(!state->list)) + DBUG_VOID_RETURN; // ddl log not used + + mysql_mutex_lock(&LOCK_gdl); + if (likely(state->execute_entry)) + ddl_log_disable_execute_entry(&state->execute_entry); + ddl_log_release_entries(state); + mysql_mutex_unlock(&LOCK_gdl); + state->list= 0; + DBUG_VOID_RETURN; +}; + + +/** + Revert (execute) all entries in the ddl log + + This is called for failed rename table, create trigger or drop trigger. +*/ + +bool ddl_log_revert(THD *thd, DDL_LOG_STATE *state) +{ + bool res= 0; + DBUG_ENTER("ddl_log_revert"); + + if (unlikely(!state->list)) + DBUG_RETURN(0); // ddl log not used + + mysql_mutex_lock(&LOCK_gdl); + if (likely(state->execute_entry)) + { + res= ddl_log_execute_entry_no_lock(thd, state->list->entry_pos); + ddl_log_disable_execute_entry(&state->execute_entry); + } + ddl_log_release_entries(state); + mysql_mutex_unlock(&LOCK_gdl); + state->list= 0; + DBUG_RETURN(res); +} + + +/* + Update phase of main ddl log entry (usually the last one created, + except in case of query events, the one before the query event). +*/ + +bool ddl_log_update_phase(DDL_LOG_STATE *state, uchar phase) +{ + DBUG_ENTER("ddl_log_update_phase"); + if (likely(state->list)) + DBUG_RETURN(update_phase(state->main_entry->entry_pos, phase)); + DBUG_RETURN(0); +} + + +/* + Update flag bits in main ddl log entry (usually last created, except in case + of query events, the one before the query event. +*/ + +bool ddl_log_add_flag(DDL_LOG_STATE *state, uint16 flags) +{ + DBUG_ENTER("ddl_log_update_phase"); + if (likely(state->list)) + { + state->flags|= flags; + DBUG_RETURN(update_flags(state->main_entry->entry_pos, state->flags)); + } + DBUG_RETURN(0); +} + + +/** + Update unique_id (used for inplace alter table) +*/ + +bool ddl_log_update_unique_id(DDL_LOG_STATE *state, ulonglong id) +{ + DBUG_ENTER("ddl_log_update_unique_id"); + DBUG_PRINT("enter", ("id: %llu", id)); + /* The following may not be true in case of temporary tables */ + if (likely(state->list)) + DBUG_RETURN(update_unique_id(state->main_entry->entry_pos, id)); + DBUG_RETURN(0); +} + + +/** + Disable last ddl entry +*/ + +bool ddl_log_disable_entry(DDL_LOG_STATE *state) +{ + DBUG_ENTER("ddl_log_disable_entry"); + /* The following may not be true in case of temporary tables */ + if (likely(state->list)) + DBUG_RETURN(update_phase(state->list->entry_pos, DDL_LOG_FINAL_PHASE)); + DBUG_RETURN(0); +} + + +/** + Update XID for execute event +*/ + +bool ddl_log_update_xid(DDL_LOG_STATE *state, ulonglong xid) +{ + DBUG_ENTER("ddl_log_update_xid"); + DBUG_PRINT("enter", ("xid: %llu", xid)); + /* The following may not be true in case of temporary tables */ + if (likely(state->execute_entry)) + DBUG_RETURN(update_xid(state->execute_entry->entry_pos, xid)); + DBUG_RETURN(0); +} + + +/* + Write ddl_log_entry and write or update ddl_execute_entry + + Will update DDL_LOG_STATE->flags +*/ + +static bool ddl_log_write(DDL_LOG_STATE *ddl_state, + DDL_LOG_ENTRY *ddl_log_entry) +{ + int error; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("ddl_log_write"); + + mysql_mutex_lock(&LOCK_gdl); + error= ((ddl_log_write_entry(ddl_log_entry, &log_entry)) || + ddl_log_write_execute_entry(log_entry->entry_pos, 0, + &ddl_state->execute_entry)); + mysql_mutex_unlock(&LOCK_gdl); + if (error) + { + if (log_entry) + ddl_log_release_memory_entry(log_entry); + DBUG_RETURN(1); + } + ddl_log_add_entry(ddl_state, log_entry); + ddl_state->flags|= ddl_log_entry->flags; // Update cache + DBUG_RETURN(0); +} + + +/** + Logging of rename table +*/ + +bool ddl_log_rename_table(DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_rename_file"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + ddl_log_entry.action_type= DDL_LOG_RENAME_TABLE_ACTION; + ddl_log_entry.next_entry= ddl_state->list ? ddl_state->list->entry_pos : 0; + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(hton)); + ddl_log_entry.db= *const_cast(new_db); + ddl_log_entry.name= *const_cast(new_alias); + ddl_log_entry.from_db= *const_cast(org_db); + ddl_log_entry.from_name= *const_cast(org_alias); + ddl_log_entry.phase= DDL_RENAME_PHASE_TABLE; + + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + +/* + Logging of rename view +*/ + +bool ddl_log_rename_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_rename_file"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + ddl_log_entry.action_type= DDL_LOG_RENAME_VIEW_ACTION; + ddl_log_entry.next_entry= ddl_state->list ? ddl_state->list->entry_pos : 0; + ddl_log_entry.db= *const_cast(new_db); + ddl_log_entry.name= *const_cast(new_alias); + ddl_log_entry.from_db= *const_cast(org_db); + ddl_log_entry.from_name= *const_cast(org_alias); + + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Logging of DROP TABLE and DROP VIEW + + Note that in contrast to rename, which are re-done in reverse order, + deletes are stored in a linked list according to delete order. This + is to ensure that the tables, for the query generated for binlog, + is in original delete order. +*/ + +static bool ddl_log_drop_init(DDL_LOG_STATE *ddl_state, + ddl_log_action_code action_code, + const LEX_CSTRING *db, + const LEX_CSTRING *comment) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_drop_file"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + ddl_log_entry.action_type= action_code; + ddl_log_entry.from_db= *const_cast(db); + ddl_log_entry.tmp_name= *const_cast(comment); + + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +bool ddl_log_drop_table_init(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, + const LEX_CSTRING *comment) +{ + return ddl_log_drop_init(ddl_state, DDL_LOG_DROP_INIT_ACTION, + db, comment); +} + +bool ddl_log_drop_view_init(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db) +{ + return ddl_log_drop_init(ddl_state, DDL_LOG_DROP_INIT_ACTION, + db, &empty_clex_str); +} + + +/** + Log DROP TABLE to the ddl log. + + This code does not call ddl_log_write() as we want the events to + be stored in call order instead of reverse order, which is the normal + case for all other events. + See also comment before ddl_log_drop_init(). +*/ + +static bool ddl_log_drop(DDL_LOG_STATE *ddl_state, + ddl_log_action_code action_code, + uint phase, + handlerton *hton, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table) +{ + DDL_LOG_ENTRY ddl_log_entry; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("ddl_log_drop"); + + DBUG_ASSERT(ddl_state->list); + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + ddl_log_entry.action_type= action_code; + if (hton) + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(hton)); + ddl_log_entry.db= *const_cast(db); + ddl_log_entry.name= *const_cast(table); + ddl_log_entry.tmp_name= *const_cast(path); + ddl_log_entry.phase= (uchar) phase; + + mysql_mutex_lock(&LOCK_gdl); + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + goto error; + + (void) ddl_log_sync_no_lock(); + if (update_next_entry_pos(ddl_state->list->entry_pos, + log_entry->entry_pos)) + { + ddl_log_release_memory_entry(log_entry); + goto error; + } + + mysql_mutex_unlock(&LOCK_gdl); + ddl_log_add_entry(ddl_state, log_entry); + DBUG_RETURN(0); + +error: + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(1); +} + + +bool ddl_log_drop_table(DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table) +{ + DBUG_ENTER("ddl_log_drop_table"); + DBUG_RETURN(ddl_log_drop(ddl_state, + DDL_LOG_DROP_TABLE_ACTION, DDL_DROP_PHASE_TABLE, + hton, path, db, table)); +} + + +bool ddl_log_drop_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table) +{ + DBUG_ENTER("ddl_log_drop_view"); + DBUG_RETURN(ddl_log_drop(ddl_state, + DDL_LOG_DROP_VIEW_ACTION, 0, + (handlerton*) 0, path, db, table)); +} + + +bool ddl_log_drop_trigger(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + const LEX_CSTRING *trigger_name, + const LEX_CSTRING *query) +{ + DDL_LOG_ENTRY ddl_log_entry; + MY_STAT stat_info; + char path[FN_REFLEN+1]; + off_t frm_length= 0; + size_t max_query_length; + DBUG_ENTER("ddl_log_drop_trigger"); + + build_table_filename(path, sizeof(path)-1, db->str, table->str, TRG_EXT, 0); + + /* We can use length of frm file as an indication if trigger was removed */ + if (my_stat(path, &stat_info, MYF(MY_WME | ME_WARNING))) + frm_length= (off_t) stat_info.st_size; + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + ddl_log_entry.action_type= DDL_LOG_DROP_TRIGGER_ACTION; + ddl_log_entry.unique_id= (ulonglong) frm_length; + ddl_log_entry.db= *const_cast(db); + ddl_log_entry.name= *const_cast(table); + ddl_log_entry.from_name= *const_cast(trigger_name); + + /* + If we can store query as is, we store it. Otherwise it will be + re-generated on recovery + */ + + max_query_length= ddl_log_free_space_in_entry(&ddl_log_entry); + if (max_query_length >= query->length) + ddl_log_entry.tmp_name= *const_cast(query); + + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Log DROP DATABASE + + This is logged after all DROP TABLE's for the database. + As now know we are going to log DROP DATABASE to the binary log, we want + to ignore want to ignore all preceding DROP TABLE entries. We do that by + linking this entry directly after the execute entry and forgetting the + link to the previous entries (not setting ddl_log_entry.next_entry) +*/ + +bool ddl_log_drop_db(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, const LEX_CSTRING *path) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_drop_db"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_DROP_DB_ACTION; + ddl_log_entry.db= *const_cast(db); + ddl_log_entry.tmp_name= *const_cast(path); + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Log CREATE TABLE + + @param only_frm On recovery, only drop the .frm. This is needed for + example when deleting a table that was discovered. +*/ + +bool ddl_log_create_table(DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + bool only_frm) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_create_table"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_CREATE_TABLE_ACTION; + if (hton) + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(hton)); + ddl_log_entry.db= *const_cast(db); + ddl_log_entry.name= *const_cast(table); + ddl_log_entry.tmp_name= *const_cast(path); + ddl_log_entry.flags= only_frm ? DDL_LOG_FLAG_ONLY_FRM : 0; + + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Log CREATE VIEW +*/ + +bool ddl_log_create_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + enum_ddl_log_create_view_phase phase) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_create_view"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_CREATE_VIEW_ACTION; + ddl_log_entry.tmp_name= *const_cast(path); + ddl_log_entry.phase= (uchar) phase; + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Log creation of temporary file that should be deleted during recovery + + @param ddl_log_state ddl_state + @param path Path to file to be deleted + @param depending_state If not NULL, then do not delete the temp file if this + entry exists and is active. +*/ + +bool ddl_log_delete_tmp_file(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + DDL_LOG_STATE *depending_state) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_delete_tmp_file"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_DELETE_TMP_FILE_ACTION; + ddl_log_entry.next_entry= ddl_state->list ? ddl_state->list->entry_pos : 0; + ddl_log_entry.tmp_name= *const_cast(path); + if (depending_state) + ddl_log_entry.unique_id= depending_state->execute_entry->entry_pos; + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Log CREATE TRIGGER +*/ + +bool ddl_log_create_trigger(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, const LEX_CSTRING *table, + const LEX_CSTRING *trigger_name, + enum_ddl_log_create_trigger_phase phase) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_create_view"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_CREATE_TRIGGER_ACTION; + ddl_log_entry.db= *const_cast(db); + ddl_log_entry.name= *const_cast(table); + ddl_log_entry.tmp_name= *const_cast(trigger_name); + ddl_log_entry.phase= (uchar) phase; + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/** + Log ALTER TABLE + + $param backup_name Name of backup table. In case of ALTER TABLE rename + this is the final table name +*/ + +bool ddl_log_alter_table(DDL_LOG_STATE *ddl_state, + handlerton *org_hton, + const LEX_CSTRING *db, const LEX_CSTRING *table, + handlerton *new_hton, + handlerton *partition_underlying_hton, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table, + const LEX_CSTRING *frm_path, + const LEX_CSTRING *backup_name, + const LEX_CUSTRING *version, + ulonglong table_version, + bool is_renamed) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_alter_table"); + DBUG_ASSERT(new_hton); + DBUG_ASSERT(org_hton); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_ALTER_TABLE_ACTION; + if (new_hton) + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(new_hton)); + /* Store temporary table name */ + ddl_log_entry.db= *const_cast(new_db); + ddl_log_entry.name= *const_cast(new_table); + if (org_hton) + lex_string_set(&ddl_log_entry.from_handler_name, + ha_resolve_storage_engine_name(org_hton)); + ddl_log_entry.from_db= *const_cast(db); + ddl_log_entry.from_name= *const_cast(table); + ddl_log_entry.tmp_name= *const_cast(frm_path); + ddl_log_entry.extra_name= *const_cast(backup_name); + ddl_log_entry.flags= is_renamed ? DDL_LOG_FLAG_ALTER_RENAME : 0; + ddl_log_entry.unique_id= table_version; + + /* + If we are doing an inplace of a partition engine, we need to log the + underlaying engine. We store this is in ddl_log_entry.handler_name + */ + if (new_hton == org_hton && partition_underlying_hton != new_hton) + { + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(partition_underlying_hton)); + ddl_log_entry.flags|= DDL_LOG_FLAG_ALTER_PARTITION; + } + DBUG_ASSERT(version->length == MY_UUID_SIZE); + memcpy(ddl_log_entry.uuid, version->str, version->length); + DBUG_RETURN(ddl_log_write(ddl_state, &ddl_log_entry)); +} + + +/* + Store query that later should be logged to binary log + + The links of the query log event is + + execute_log_event -> first log_query_event [-> log_query_event...] -> + action_log_event (probably a LOG_ALTER_TABLE_ACTION event) + + This ensures that when we execute the log_query_event it can collect + the full query from the log_query_events and then execute the + action_log_event with the original query stored in 'recovery_state.query'. + + The query is stored in ddl_log_entry.extra_name as this is the last string + stored in the log block (makes it easier to check and debug). +*/ + +bool ddl_log_store_query(THD *thd, DDL_LOG_STATE *ddl_state, + const char *query, size_t length) +{ + DDL_LOG_ENTRY ddl_log_entry; + DDL_LOG_MEMORY_ENTRY *first_entry, *next_entry= 0; + DDL_LOG_MEMORY_ENTRY *original_entry= ddl_state->list; + size_t max_query_length; + uint entry_pos, next_entry_pos= 0, parent_entry_pos; + DBUG_ENTER("ddl_log_store_query"); + DBUG_ASSERT(length <= UINT_MAX32); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(ddl_state->list); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_STORE_QUERY_ACTION; + ddl_log_entry.unique_id= length; + ddl_log_entry.flags= 1; // First entry + ddl_log_entry.db= thd->db; // Current database + + max_query_length= ddl_log_free_space_in_entry(&ddl_log_entry); + + mysql_mutex_lock(&LOCK_gdl); + ddl_log_entry.entry_type= DDL_LOG_ENTRY_CODE; + + if (ddl_log_get_free_entry(&first_entry)) + goto err; + parent_entry_pos= ddl_state->list->entry_pos; + entry_pos= first_entry->entry_pos; + ddl_log_add_entry(ddl_state, first_entry); + + while (length) + { + size_t write_length= MY_MIN(length, max_query_length); + ddl_log_entry.extra_name.str= query; + ddl_log_entry.extra_name.length= write_length; + + query+= write_length; + length-= write_length; + + if (length > 0) + { + if (ddl_log_get_free_entry(&next_entry)) + goto err; + ddl_log_entry.next_entry= next_entry_pos= next_entry->entry_pos; + ddl_log_add_entry(ddl_state, next_entry); + } + else + { + /* point next link of last query_action event to the original action */ + ddl_log_entry.next_entry= parent_entry_pos; + } + set_global_from_ddl_log_entry(&ddl_log_entry); + if (unlikely(write_ddl_log_file_entry(entry_pos))) + goto err; + entry_pos= next_entry_pos; + ddl_log_entry.flags= 0; // Only first entry has this set + ddl_log_entry.db.length= 0; // Don't need DB anymore + ddl_log_entry.extra_name.length= 0; + max_query_length= ddl_log_free_space_in_entry(&ddl_log_entry); + } + if (ddl_log_write_execute_entry(first_entry->entry_pos, + &ddl_state->execute_entry)) + goto err; + + /* Set the original entry to be used for future PHASE updates */ + ddl_state->main_entry= original_entry; + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(0); +err: + /* + Allocated ddl_log entries will be released by the + ddl_log_release_entries() call in dl_log_complete() + */ + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(1); +} + + +/* + Log an delete frm file +*/ + +/* + TODO: Partitioning atomic DDL refactoring: this should be replaced with + ddl_log_create_table(). +*/ +bool ddl_log_delete_frm(DDL_LOG_STATE *ddl_state, const char *to_path) +{ + DDL_LOG_ENTRY ddl_log_entry; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("ddl_log_delete_frm"); + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_DELETE_ACTION; + ddl_log_entry.next_entry= ddl_state->list ? ddl_state->list->entry_pos : 0; + + lex_string_set(&ddl_log_entry.handler_name, reg_ext); + lex_string_set(&ddl_log_entry.name, to_path); + + mysql_mutex_assert_owner(&LOCK_gdl); + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + DBUG_RETURN(1); + + ddl_log_add_entry(ddl_state, log_entry); + DBUG_RETURN(0); +} diff --git a/sql/ddl_log.h b/sql/ddl_log.h new file mode 100644 index 00000000..88f6695f --- /dev/null +++ b/sql/ddl_log.h @@ -0,0 +1,358 @@ +/* + Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* External interfaces to ddl log functions */ + +#ifndef DDL_LOG_INCLUDED +#define DDL_LOG_INCLUDED + +enum ddl_log_entry_code +{ + /* + DDL_LOG_UNKOWN + Here mainly to detect blocks that are all zero + + DDL_LOG_EXECUTE_CODE: + This is a code that indicates that this is a log entry to + be executed, from this entry a linked list of log entries + can be found and executed. + DDL_LOG_ENTRY_CODE: + An entry to be executed in a linked list from an execute log + entry. + DDL_LOG_IGNORE_ENTRY_CODE: + An entry that is to be ignored + */ + DDL_LOG_UNKNOWN= 0, + DDL_LOG_EXECUTE_CODE= 1, + DDL_LOG_ENTRY_CODE= 2, + DDL_LOG_IGNORE_ENTRY_CODE= 3, + DDL_LOG_ENTRY_CODE_LAST= 4 +}; + + +/* + When adding things below, also add an entry to ddl_log_action_names and + ddl_log_entry_phases in ddl_log.cc +*/ + +enum ddl_log_action_code +{ + /* + The type of action that a DDL_LOG_ENTRY_CODE entry is to + perform. + */ + DDL_LOG_UNKNOWN_ACTION= 0, + + /* Delete a .frm file or a table in the partition engine */ + DDL_LOG_DELETE_ACTION= 1, + + /* Rename a .frm fire a table in the partition engine */ + DDL_LOG_RENAME_ACTION= 2, + + /* + Rename an entity after removing the previous entry with the + new name, that is replace this entry. + */ + DDL_LOG_REPLACE_ACTION= 3, + + /* Exchange two entities by renaming them a -> tmp, b -> a, tmp -> b */ + DDL_LOG_EXCHANGE_ACTION= 4, + /* + log do_rename(): Rename of .frm file, table, stat_tables and triggers + */ + DDL_LOG_RENAME_TABLE_ACTION= 5, + DDL_LOG_RENAME_VIEW_ACTION= 6, + DDL_LOG_DROP_INIT_ACTION= 7, + DDL_LOG_DROP_TABLE_ACTION= 8, + DDL_LOG_DROP_VIEW_ACTION= 9, + DDL_LOG_DROP_TRIGGER_ACTION= 10, + DDL_LOG_DROP_DB_ACTION=11, + DDL_LOG_CREATE_TABLE_ACTION=12, + DDL_LOG_CREATE_VIEW_ACTION=13, + DDL_LOG_DELETE_TMP_FILE_ACTION=14, + DDL_LOG_CREATE_TRIGGER_ACTION=15, + DDL_LOG_ALTER_TABLE_ACTION=16, + DDL_LOG_STORE_QUERY_ACTION=17, + DDL_LOG_LAST_ACTION /* End marker */ +}; + + +/* Number of phases for each ddl_log_action_code */ +extern const uchar ddl_log_entry_phases[DDL_LOG_LAST_ACTION]; + + +enum enum_ddl_log_exchange_phase { + EXCH_PHASE_NAME_TO_TEMP= 0, + EXCH_PHASE_FROM_TO_NAME= 1, + EXCH_PHASE_TEMP_TO_FROM= 2, + EXCH_PHASE_END +}; + +enum enum_ddl_log_rename_table_phase { + DDL_RENAME_PHASE_TRIGGER= 0, + DDL_RENAME_PHASE_STAT, + DDL_RENAME_PHASE_TABLE, + DDL_RENAME_PHASE_END +}; + +enum enum_ddl_log_drop_table_phase { + DDL_DROP_PHASE_TABLE=0, + DDL_DROP_PHASE_TRIGGER, + DDL_DROP_PHASE_BINLOG, + DDL_DROP_PHASE_RESET, /* Reset found list of dropped tables */ + DDL_DROP_PHASE_END +}; + +enum enum_ddl_log_drop_db_phase { + DDL_DROP_DB_PHASE_INIT=0, + DDL_DROP_DB_PHASE_LOG, + DDL_DROP_DB_PHASE_END +}; + +enum enum_ddl_log_create_table_phase { + DDL_CREATE_TABLE_PHASE_INIT=0, + DDL_CREATE_TABLE_PHASE_LOG, + DDL_CREATE_TABLE_PHASE_END +}; + +enum enum_ddl_log_create_view_phase { + DDL_CREATE_VIEW_PHASE_NO_OLD_VIEW, + DDL_CREATE_VIEW_PHASE_DELETE_VIEW_COPY, + DDL_CREATE_VIEW_PHASE_OLD_VIEW_COPIED, + DDL_CREATE_VIEW_PHASE_END +}; + +enum enum_ddl_log_create_trigger_phase { + DDL_CREATE_TRIGGER_PHASE_NO_OLD_TRIGGER, + DDL_CREATE_TRIGGER_PHASE_DELETE_COPY, + DDL_CREATE_TRIGGER_PHASE_OLD_COPIED, + DDL_CREATE_TRIGGER_PHASE_END +}; + +enum enum_ddl_log_alter_table_phase { + DDL_ALTER_TABLE_PHASE_INIT, + DDL_ALTER_TABLE_PHASE_RENAME_FAILED, + DDL_ALTER_TABLE_PHASE_INPLACE_COPIED, + DDL_ALTER_TABLE_PHASE_INPLACE, + DDL_ALTER_TABLE_PHASE_PREPARE_INPLACE, + DDL_ALTER_TABLE_PHASE_CREATED, + DDL_ALTER_TABLE_PHASE_COPIED, + DDL_ALTER_TABLE_PHASE_OLD_RENAMED, + DDL_ALTER_TABLE_PHASE_UPDATE_TRIGGERS, + DDL_ALTER_TABLE_PHASE_UPDATE_STATS, + DDL_ALTER_TABLE_PHASE_UPDATE_BINARY_LOG, + DDL_ALTER_TABLE_PHASE_END +}; + + +/* + Flags stored in DDL_LOG_ENTRY.flags + The flag values can be reused for different commands +*/ +#define DDL_LOG_FLAG_ALTER_RENAME (1 << 0) +#define DDL_LOG_FLAG_ALTER_ENGINE_CHANGED (1 << 1) +#define DDL_LOG_FLAG_ONLY_FRM (1 << 2) +#define DDL_LOG_FLAG_UPDATE_STAT (1 << 3) +/* + Set when using ALTER TABLE on a partitioned table and the table + engine is not changed +*/ +#define DDL_LOG_FLAG_ALTER_PARTITION (1 << 4) + +/* + Setting ddl_log_entry.phase to this has the same effect as setting + the phase to the maximum phase (..PHASE_END) for an entry. +*/ + +#define DDL_LOG_FINAL_PHASE ((uchar) 0xff) + +typedef struct st_ddl_log_entry +{ + LEX_CSTRING name; + LEX_CSTRING from_name; + LEX_CSTRING handler_name; + LEX_CSTRING db; + LEX_CSTRING from_db; + LEX_CSTRING from_handler_name; + LEX_CSTRING tmp_name; /* frm file or temporary file name */ + LEX_CSTRING extra_name; /* Backup table name */ + uchar uuid[MY_UUID_SIZE]; // UUID for new frm file + + ulonglong xid; // Xid stored in the binary log + /* + unique_id can be used to store a unique number to check current state. + Currently it is used to store new size of frm file, link to another ddl log + entry or store an a uniq version for a storage engine in alter table. + For execute entries this is reused as an execute counter to ensure we + don't repeat an entry too many times if executing the entry fails. + */ + ulonglong unique_id; + uint next_entry; + uint entry_pos; // Set by write_dll_log_entry() + uint16 flags; // Flags unique for each command + enum ddl_log_entry_code entry_type; // Set automatically + enum ddl_log_action_code action_type; + /* + Most actions have only one phase. REPLACE does however have two + phases. The first phase removes the file with the new name if + there was one there before and the second phase renames the + old name to the new name. + */ + uchar phase; // set automatically +} DDL_LOG_ENTRY; + +typedef struct st_ddl_log_memory_entry +{ + uint entry_pos; + struct st_ddl_log_memory_entry *next_log_entry; + struct st_ddl_log_memory_entry *prev_log_entry; + struct st_ddl_log_memory_entry *next_active_log_entry; +} DDL_LOG_MEMORY_ENTRY; + + +/* + State of the ddl log during execution of a DDL. + + A ddl log state has one execute entry (main entry pointing to the first + action entry) and many 'action entries' linked in a list in the order + they should be executed. + One recovery the log is parsed and all execute entries will be executed. + + All entries are stored as separate blocks in the ddl recovery file. +*/ + +typedef struct st_ddl_log_state +{ + /* List of ddl log entries */ + DDL_LOG_MEMORY_ENTRY *list; + /* One execute entry per list */ + DDL_LOG_MEMORY_ENTRY *execute_entry; + /* + Entry used for PHASE updates. Normally same as first in 'list', but in + case of a query log event, this points to the main event. + */ + DDL_LOG_MEMORY_ENTRY *main_entry; + uint16 flags; /* Cache for flags */ + bool is_active() { return list != 0; } +} DDL_LOG_STATE; + + +/* These functions are for recovery */ +bool ddl_log_initialize(); +void ddl_log_release(); +bool ddl_log_close_binlogged_events(HASH *xids); +int ddl_log_execute_recovery(); + +/* functions for updating the ddl log */ +bool ddl_log_write_entry(DDL_LOG_ENTRY *ddl_log_entry, + DDL_LOG_MEMORY_ENTRY **active_entry); + +bool ddl_log_write_execute_entry(uint first_entry, uint cond_entry, + DDL_LOG_MEMORY_ENTRY** active_entry); +inline +bool ddl_log_write_execute_entry(uint first_entry, + DDL_LOG_MEMORY_ENTRY **active_entry) +{ + return ddl_log_write_execute_entry(first_entry, 0, active_entry); +} +bool ddl_log_disable_execute_entry(DDL_LOG_MEMORY_ENTRY **active_entry); + +void ddl_log_complete(DDL_LOG_STATE *ddl_log_state); +bool ddl_log_revert(THD *thd, DDL_LOG_STATE *ddl_log_state); + +bool ddl_log_update_phase(DDL_LOG_STATE *entry, uchar phase); +bool ddl_log_add_flag(DDL_LOG_STATE *entry, uint16 flag); +bool ddl_log_update_unique_id(DDL_LOG_STATE *state, ulonglong id); +bool ddl_log_update_xid(DDL_LOG_STATE *state, ulonglong xid); +bool ddl_log_disable_entry(DDL_LOG_STATE *state); +bool ddl_log_increment_phase(uint entry_pos); +void ddl_log_release_memory_entry(DDL_LOG_MEMORY_ENTRY *log_entry); +bool ddl_log_sync(); +bool ddl_log_execute_entry(THD *thd, uint first_entry); + +void ddl_log_add_entry(DDL_LOG_STATE *state, DDL_LOG_MEMORY_ENTRY *log_entry); +void ddl_log_release_entries(DDL_LOG_STATE *ddl_log_state); +bool ddl_log_rename_table(DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias); +bool ddl_log_rename_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias); +bool ddl_log_drop_table_init(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, + const LEX_CSTRING *comment); +bool ddl_log_drop_view_init(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db); +bool ddl_log_drop_table(DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table); +bool ddl_log_drop_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table); +bool ddl_log_drop_trigger(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + const LEX_CSTRING *trigger_name, + const LEX_CSTRING *query); +bool ddl_log_drop_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table); +bool ddl_log_drop_db(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, const LEX_CSTRING *path); +bool ddl_log_create_table(DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *path, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + bool only_frm); +bool ddl_log_create_view(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + enum_ddl_log_create_view_phase phase); +bool ddl_log_delete_tmp_file(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *path, + DDL_LOG_STATE *depending_state); +bool ddl_log_create_trigger(DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *db, const LEX_CSTRING *table, + const LEX_CSTRING *trigger_name, + enum_ddl_log_create_trigger_phase phase); +bool ddl_log_alter_table(DDL_LOG_STATE *ddl_state, + handlerton *org_hton, + const LEX_CSTRING *db, const LEX_CSTRING *table, + handlerton *new_hton, + handlerton *partition_underlying_hton, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table, + const LEX_CSTRING *frm_path, + const LEX_CSTRING *backup_table_name, + const LEX_CUSTRING *version, + ulonglong table_version, + bool is_renamed); +bool ddl_log_store_query(THD *thd, DDL_LOG_STATE *ddl_log_state, + const char *query, size_t length); +bool ddl_log_delete_frm(DDL_LOG_STATE *ddl_state, const char *to_path); +extern mysql_mutex_t LOCK_gdl; +#endif /* DDL_LOG_INCLUDED */ diff --git a/sql/debug.cc b/sql/debug.cc new file mode 100644 index 00000000..a0e2340e --- /dev/null +++ b/sql/debug.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_class.h" +#include "debug.h" + +/** + Debug utility to do crash after a set number of executions + + The user variable, either @debug_crash_counter or @debug_error_counter, + is decremented each time debug_crash() or debug_simulate_error is called + if the keyword is set with @@debug_push, like + @@debug_push="d+frm_data_type_info_emulate" + + If the variable is not set or is not an integer it will be ignored. +*/ + +#ifndef DBUG_OFF + +static const LEX_CSTRING debug_crash_counter= +{ STRING_WITH_LEN("debug_crash_counter") }; +static const LEX_CSTRING debug_error_counter= +{ STRING_WITH_LEN("debug_error_counter") }; + +static bool debug_decrement_counter(const LEX_CSTRING *name) +{ + THD *thd= current_thd; + user_var_entry *entry= (user_var_entry*) + my_hash_search(&thd->user_vars, (uchar*) name->str, name->length); + if (!entry || entry->type != INT_RESULT || ! entry->value) + return 0; + (*(ulonglong*) entry->value)= (*(ulonglong*) entry->value)-1; + return !*(ulonglong*) entry->value; +} + +void debug_crash_here(const char *keyword) +{ + DBUG_ENTER("debug_crash_here"); + DBUG_PRINT("enter", ("keyword: %s", keyword)); + + DBUG_EXECUTE_IF(keyword, + if (debug_decrement_counter(&debug_crash_counter)) + { + my_printf_error(ER_INTERNAL_ERROR, + "Crashing at %s", + MYF(ME_ERROR_LOG | ME_NOTE), keyword); + DBUG_SUICIDE(); + }); + DBUG_VOID_RETURN; +} + +/* + This can be used as debug_counter to simulate an error at a specific + position. + + Typical usage would be + if (debug_simualte_error("keyword")) + error= 1; +*/ + +bool debug_simulate_error(const char *keyword, uint error) +{ + DBUG_ENTER("debug_crash_here"); + DBUG_PRINT("enter", ("keyword: %s", keyword)); + DBUG_EXECUTE_IF(keyword, + if (debug_decrement_counter(&debug_error_counter)) + { + my_printf_error(error, + "Simulating error for '%s'", + MYF(ME_ERROR_LOG), keyword); + DBUG_RETURN(1); + }); + DBUG_RETURN(0); +} +#endif /* DBUG_OFF */ diff --git a/sql/debug.h b/sql/debug.h new file mode 100644 index 00000000..48bae774 --- /dev/null +++ b/sql/debug.h @@ -0,0 +1,39 @@ +#ifndef DEBUG_INCLUDED +#define DEBUG_INCLUDED + +/* Copyright (c) 2021, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + Declarations for debug_crash_here and other future mariadb server debug + functionality. +*/ + +/* debug_crash_here() functionallity. + See mysql_test/suite/atomic/create_table.test for an example of how it + can be used +*/ + +#ifndef DBUG_OFF +void debug_crash_here(const char *keyword); +bool debug_simulate_error(const char *keyword, uint error); +#else +#define debug_crash_here(A) do { } while(0) +#define debug_simulate_error(A, B) 0 +#endif + +#endif /* DEBUG_INCLUDED */ diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc new file mode 100644 index 00000000..362463a7 --- /dev/null +++ b/sql/debug_sync.cc @@ -0,0 +1,1835 @@ +/* Copyright (c) 2009, 2013, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* see include/mysql/service_debug_sync.h for debug sync documentation */ + +#include "mariadb.h" +#include "debug_sync.h" +#include + +#if defined(ENABLED_DEBUG_SYNC) + +/* + Due to weaknesses in our include files, we need to include + sql_priv.h here. To have THD declared, we need to include + sql_class.h. This includes log_event.h, which in turn requires + declarations from sql_priv.h (e.g. OPTION_AUTO_IS_NULL). + sql_priv.h includes almost everything, so is sufficient here. +*/ +#include "sql_priv.h" +#include "sql_parse.h" + +/* + Action to perform at a synchronization point. + NOTE: This structure is moved around in memory by realloc(), qsort(), + and memmove(). Do not add objects with non-trivial constructors + or destructors, which might prevent moving of this structure + with these functions. +*/ +struct st_debug_sync_action +{ + ulong activation_count; /* MY_MAX(hit_limit, execute) */ + ulong hit_limit; /* hits before kill query */ + ulong execute; /* executes before self-clear */ + ulong timeout; /* wait_for timeout */ + String signal; /* signal to emit */ + String wait_for; /* signal to wait for */ + String sync_point; /* sync point name */ + bool need_sort; /* if new action, array needs sort */ + bool clear_event; /* do not clear signal when waited + for if false. */ +}; + +/* Debug sync control. Referenced by THD. */ +struct st_debug_sync_control +{ + st_debug_sync_action *ds_action; /* array of actions */ + uint ds_active; /* # active actions */ + uint ds_allocated; /* # allocated actions */ + ulonglong dsp_hits; /* statistics */ + ulonglong dsp_executed; /* statistics */ + ulonglong dsp_max_active; /* statistics */ + /* + thd->proc_info points at unsynchronized memory. + It must not go away as long as the thread exists. + */ + char ds_proc_info[80]; /* proc_info string */ +}; + + + + +/** + Definitions for the debug sync facility. + 1. Global string variable to hold a set of of "signals". + 2. Global condition variable for signaling and waiting. + 3. Global mutex to synchronize access to the above. +*/ +struct st_debug_sync_globals +{ + Hash_set ds_signal_set; /* A set of active signals */ + mysql_cond_t ds_cond; /* condition variable */ + mysql_mutex_t ds_mutex; /* mutex variable */ + ulonglong dsp_hits; /* statistics */ + ulonglong dsp_executed; /* statistics */ + ulonglong dsp_max_active; /* statistics */ + + st_debug_sync_globals() : + ds_signal_set(PSI_NOT_INSTRUMENTED, signal_key), + dsp_hits (0), dsp_executed(0), dsp_max_active(0) {}; + ~st_debug_sync_globals() + { + clear_set(); + } + + void clear_set() + { + Hash_set::Iterator it{ds_signal_set}; + LEX_CSTRING *s; + while ((s= it++)) + my_free(s); + ds_signal_set.clear(); + } + + /* Hash key function for ds_signal_set. */ + static uchar *signal_key(const LEX_CSTRING *str, size_t *klen, my_bool) + { + *klen= str->length; + return (uchar*) str->str; + } + + /** + Return true if the signal is found in global signal list. + + @param signal_name Signal name identifying the signal. + + @note + If signal is found in the global signal set, it means that the + signal thread has signalled to the waiting thread. This method + must be called with the debug_sync_global.ds_mutex held. + + @retval true if signal is found in the global signal list. + @retval false otherwise. + */ + + inline bool is_signalled(const char *signal_name, size_t length) + { + return ds_signal_set.find(signal_name, length); + } + + void clear_signal(const String &signal_name) + { + DBUG_ENTER("clear_signal"); + LEX_CSTRING *record= ds_signal_set.find(signal_name.ptr(), + signal_name.length()); + if (record) + { + ds_signal_set.remove(record); + my_free(record); + } + DBUG_VOID_RETURN; + } + + bool set_signal(const char *signal_name, size_t length) + { + /* Need to check if the signal is already in the hash set, because + Hash_set doesn't differentiate between OOM and key already in. */ + if (is_signalled(signal_name, length)) + return FALSE; + /* LEX_CSTRING and the string allocated with only one malloc. */ + LEX_CSTRING *s= (LEX_CSTRING *) my_malloc(PSI_NOT_INSTRUMENTED, + sizeof(LEX_CSTRING) + length + 1, + MYF(0)); + char *str= (char *)(s + 1); + memcpy(str, signal_name, length); + str[length]= '\0'; + + s->length= length; + s->str= str; + if (ds_signal_set.insert(s)) + return TRUE; + return FALSE; + } +}; + +static st_debug_sync_globals *debug_sync_global; /* All globals in one object */ + +/** + Callbacks from C files. +*/ +C_MODE_START +static void debug_sync(THD *thd, const char *sync_point_name, size_t name_len); +static int debug_sync_qsort_cmp(const void *, const void *); +C_MODE_END + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_debug_sync_globals_ds_mutex; + +static PSI_mutex_info all_debug_sync_mutexes[]= +{ + { &key_debug_sync_globals_ds_mutex, "DEBUG_SYNC::mutex", PSI_FLAG_GLOBAL} +}; + +static PSI_cond_key key_debug_sync_globals_ds_cond; + +static PSI_cond_info all_debug_sync_conds[]= +{ + { &key_debug_sync_globals_ds_cond, "DEBUG_SYNC::cond", PSI_FLAG_GLOBAL} +}; + +static void init_debug_sync_psi_keys(void) +{ + const char* category= "sql"; + int count; + + count= array_elements(all_debug_sync_mutexes); + mysql_mutex_register(category, all_debug_sync_mutexes, count); + + count= array_elements(all_debug_sync_conds); + mysql_cond_register(category, all_debug_sync_conds, count); +} +#endif /* HAVE_PSI_INTERFACE */ + + +/** + Set the THD::proc_info without instrumentation. + This method is private to DEBUG_SYNC, + and on purpose avoid any use of: + - the SHOW PROFILE instrumentation + - the PERFORMANCE_SCHEMA instrumentation + so that using DEBUG_SYNC() in the server code + does not cause the instrumentations to record + spurious data. +*/ +static const char* +debug_sync_thd_proc_info(THD *thd, const char* info) +{ + const char* old_proc_info= thd->proc_info; + thd->proc_info= info; + return old_proc_info; +} + + +/** + Initialize the debug sync facility at server start. + + @return status + @retval 0 ok + @retval != 0 error +*/ + +int debug_sync_init(void) +{ + DBUG_ENTER("debug_sync_init"); + +#ifdef HAVE_PSI_INTERFACE + init_debug_sync_psi_keys(); +#endif + + if (!debug_sync_global) + debug_sync_global= new st_debug_sync_globals(); + + if (opt_debug_sync_timeout) + { + int rc; + + /* Initialize the global variables. */ + debug_sync_global->clear_set(); + if ((rc= mysql_cond_init(key_debug_sync_globals_ds_cond, + &debug_sync_global->ds_cond, NULL)) || + (rc= mysql_mutex_init(key_debug_sync_globals_ds_mutex, + &debug_sync_global->ds_mutex, + MY_MUTEX_INIT_FAST))) + DBUG_RETURN(rc); /* purecov: inspected */ + + /* Set the call back pointer in C files. */ + debug_sync_C_callback_ptr= debug_sync; + } + + DBUG_RETURN(0); +} + + +/** + End the debug sync facility. + + @description + This is called at server shutdown or after a thread initialization error. +*/ + +void debug_sync_end(void) +{ + DBUG_ENTER("debug_sync_end"); + + /* End the facility only if it had been initialized. */ + if (debug_sync_C_callback_ptr) + { + /* Clear the call back pointer in C files. */ + debug_sync_C_callback_ptr= NULL; + + /* Destroy the global variables. */ + debug_sync_global->clear_set(); + mysql_cond_destroy(&debug_sync_global->ds_cond); + mysql_mutex_destroy(&debug_sync_global->ds_mutex); + + /* Print statistics. */ + { + sql_print_information("Debug sync points hit: %lld", + debug_sync_global->dsp_hits); + sql_print_information("Debug sync points executed: %lld", + debug_sync_global->dsp_executed); + sql_print_information("Debug sync points max active per thread: %lld", + debug_sync_global->dsp_max_active); + } + } + + delete debug_sync_global; + /* Just to be safe */ + debug_sync_global= NULL; + + DBUG_VOID_RETURN; +} + + +/* purecov: begin tested */ + +/** + Disable the facility after lack of memory if no error can be returned. + + @note + Do not end the facility here because the global variables can + be in use by other threads. +*/ + +static void debug_sync_emergency_disable(void) +{ + DBUG_ENTER("debug_sync_emergency_disable"); + + opt_debug_sync_timeout= 0; + + DBUG_PRINT("debug_sync", + ("Debug Sync Facility disabled due to lack of memory.")); + sql_print_error("Debug Sync Facility disabled due to lack of memory."); + + DBUG_VOID_RETURN; +} + +/* purecov: end */ + + +/** + Initialize the debug sync facility at thread start. + + @param[in] thd thread handle +*/ + +void debug_sync_init_thread(THD *thd) +{ + DBUG_ENTER("debug_sync_init_thread"); + DBUG_ASSERT(thd); + + if (opt_debug_sync_timeout) + { + thd->debug_sync_control= (st_debug_sync_control*) + my_malloc(PSI_NOT_INSTRUMENTED, + sizeof(st_debug_sync_control), + MYF(MY_WME | MY_ZEROFILL | MY_THREAD_SPECIFIC)); + if (!thd->debug_sync_control) + { + /* + Error is reported by my_malloc(). + We must disable the facility. We have no way to return an error. + */ + debug_sync_emergency_disable(); /* purecov: tested */ + } + } + + DBUG_VOID_RETURN; +} + + +/** + Returns an allocated buffer containing a comma-separated C string of all + active signals. + + Buffer must be freed by the caller. +*/ +static const char *get_signal_set_as_string() +{ + mysql_mutex_assert_owner(&debug_sync_global->ds_mutex); + size_t req_size= 1; // In case of empty set for the end '\0' char. + + for (size_t i= 0; i < debug_sync_global->ds_signal_set.size(); i++) + req_size+= debug_sync_global->ds_signal_set.at(i)->length + 1; + + char *buf= (char *) my_malloc(PSI_NOT_INSTRUMENTED, req_size, MYF(0)); + if (!buf) + return nullptr; + memset(buf, '\0', req_size); + + char *cur_pos= buf; + for (size_t i= 0; i < debug_sync_global->ds_signal_set.size(); i++) + { + const LEX_CSTRING *signal= debug_sync_global->ds_signal_set.at(i); + memcpy(cur_pos, signal->str, signal->length); + if (i != debug_sync_global->ds_signal_set.size() - 1) + cur_pos[signal->length]= ','; + else + cur_pos[signal->length] = '\0'; + cur_pos+= signal->length + 1; + } + return buf; +} + + +/** + End the debug sync facility at thread end. + + @param[in] thd thread handle +*/ + +void debug_sync_end_thread(THD *thd) +{ + DBUG_ENTER("debug_sync_end_thread"); + DBUG_ASSERT(thd); + + if (thd->debug_sync_control) + { + st_debug_sync_control *ds_control= thd->debug_sync_control; + + if (ds_control->ds_action) + { + st_debug_sync_action *action= ds_control->ds_action; + st_debug_sync_action *action_end= action + ds_control->ds_allocated; + for (; action < action_end; action++) + { + action->signal.free(); + action->wait_for.free(); + action->sync_point.free(); + } + my_free(ds_control->ds_action); + } + + /* Statistics. */ + /* + Protect access with debug_sync_global->ds_mutex only if + it had been initialized. + */ + if (debug_sync_C_callback_ptr) + mysql_mutex_lock(&debug_sync_global->ds_mutex); + + debug_sync_global->dsp_hits+= ds_control->dsp_hits; + debug_sync_global->dsp_executed+= ds_control->dsp_executed; + if (debug_sync_global->dsp_max_active < ds_control->dsp_max_active) + debug_sync_global->dsp_max_active= ds_control->dsp_max_active; + + /* + Protect access with debug_sync_global->ds_mutex only if + it had been initialized. + */ + if (debug_sync_C_callback_ptr) + mysql_mutex_unlock(&debug_sync_global->ds_mutex); + + my_free(ds_control); + thd->debug_sync_control= NULL; + } + + DBUG_VOID_RETURN; +} + + +void debug_sync_reset_thread(THD *thd) +{ + if (thd->debug_sync_control) + { + /* + This synchronization point can be used to synchronize on thread end. + This is the latest point in a THD's life, where this can be done. + */ + DEBUG_SYNC(thd, "thread_end"); + thd->debug_sync_control->ds_active= 0; + } +} + + +/** + Move a string by length. + + @param[out] to buffer for the resulting string + @param[in] to_end end of buffer + @param[in] from source string + @param[in] length number of bytes to copy + + @return pointer to end of copied string +*/ + +static char *debug_sync_bmove_len(char *to, char *to_end, + const char *from, size_t length) +{ + DBUG_ASSERT(to); + DBUG_ASSERT(to_end); + DBUG_ASSERT(!length || from); + set_if_smaller(length, (size_t) (to_end - to)); + if (length) + memcpy(to, from, length); + return (to + length); +} + + +#ifdef DBUG_TRACE + +/** + Create a string that describes an action. + + @param[out] result buffer for the resulting string + @param[in] size size of result buffer + @param[in] action action to describe +*/ + +static void debug_sync_action_string(char *result, uint size, + st_debug_sync_action *action) +{ + char *wtxt= result; + char *wend= wtxt + size - 1; /* Allow emergency '\0'. */ + DBUG_ASSERT(result); + DBUG_ASSERT(action); + + /* If an execute count is present, signal or wait_for are needed too. */ + DBUG_ASSERT(!action->execute || + action->signal.length() || action->wait_for.length()); + + if (action->execute) + { + if (action->signal.length()) + { + wtxt= debug_sync_bmove_len(wtxt, wend, STRING_WITH_LEN("SIGNAL ")); + wtxt= debug_sync_bmove_len(wtxt, wend, action->signal.ptr(), + action->signal.length()); + } + if (action->wait_for.length()) + { + if ((wtxt == result) && (wtxt < wend)) + *(wtxt++)= ' '; + wtxt= debug_sync_bmove_len(wtxt, wend, STRING_WITH_LEN(" WAIT_FOR ")); + wtxt= debug_sync_bmove_len(wtxt, wend, action->wait_for.ptr(), + action->wait_for.length()); + + if (action->timeout != opt_debug_sync_timeout) + { + wtxt+= my_snprintf(wtxt, wend - wtxt, " TIMEOUT %lu", action->timeout); + } + } + if (action->execute != 1) + { + wtxt+= my_snprintf(wtxt, wend - wtxt, " EXECUTE %lu", action->execute); + } + } + if (action->hit_limit) + { + wtxt+= my_snprintf(wtxt, wend - wtxt, "%sHIT_LIMIT %lu", + (wtxt == result) ? "" : " ", action->hit_limit); + } + + /* + If (wtxt == wend) string may not be terminated. + There is one byte left for an emergency termination. + */ + *wtxt= '\0'; +} + + +/** + Print actions. + + @param[in] thd thread handle +*/ + +static void debug_sync_print_actions(THD *thd) +{ + st_debug_sync_control *ds_control= thd->debug_sync_control; + uint idx; + DBUG_ENTER("debug_sync_print_actions"); + DBUG_ASSERT(thd); + + if (!ds_control) + DBUG_VOID_RETURN; + + for (idx= 0; idx < ds_control->ds_active; idx++) + { + const char *dsp_name= ds_control->ds_action[idx].sync_point.c_ptr(); + char action_string[256]; + + debug_sync_action_string(action_string, sizeof(action_string), + ds_control->ds_action + idx); + DBUG_PRINT("debug_sync_list", ("%s %s", dsp_name, action_string)); + } + + DBUG_VOID_RETURN; +} +#endif /* defined(DBUG_TRACE) */ + + +/** + Compare two actions by sync point name length, string. + + @param[in] arg1 reference to action1 + @param[in] arg2 reference to action2 + + @return difference + @retval == 0 length1/string1 is same as length2/string2 + @retval < 0 length1/string1 is smaller + @retval > 0 length1/string1 is bigger +*/ + +static int debug_sync_qsort_cmp(const void* arg1, const void* arg2) +{ + st_debug_sync_action *action1= (st_debug_sync_action*) arg1; + st_debug_sync_action *action2= (st_debug_sync_action*) arg2; + int diff; + DBUG_ASSERT(action1); + DBUG_ASSERT(action2); + + if (!(diff= action1->sync_point.length() - action2->sync_point.length())) + diff= memcmp(action1->sync_point.ptr(), action2->sync_point.ptr(), + action1->sync_point.length()); + + return diff; +} + + +/** + Find a debug sync action. + + @param[in] actionarr array of debug sync actions + @param[in] quantity number of actions in array + @param[in] dsp_name name of debug sync point to find + @param[in] name_len length of name of debug sync point + + @return action + @retval != NULL found sync point in array + @retval NULL not found + + @description + Binary search. Array needs to be sorted by length, sync point name. +*/ + +static st_debug_sync_action *debug_sync_find(st_debug_sync_action *actionarr, + int quantity, + const char *dsp_name, + size_t name_len) +{ + st_debug_sync_action *action; + int low ; + int high ; + int mid ; + ssize_t diff ; + DBUG_ASSERT(actionarr); + DBUG_ASSERT(dsp_name); + DBUG_ASSERT(name_len); + + low= 0; + high= quantity; + + while (low < high) + { + mid= (low + high) / 2; + action= actionarr + mid; + if (!(diff= name_len - action->sync_point.length()) && + !(diff= memcmp(dsp_name, action->sync_point.ptr(), name_len))) + return action; + if (diff > 0) + low= mid + 1; + else + high= mid - 1; + } + + if (low < quantity) + { + action= actionarr + low; + if ((name_len == action->sync_point.length()) && + !memcmp(dsp_name, action->sync_point.ptr(), name_len)) + return action; + } + + return NULL; +} + + +/** + Reset the debug sync facility. + + @param[in] thd thread handle + + @description + Remove all actions of this thread. + Clear the global signal. +*/ + +static void debug_sync_reset(THD *thd) +{ + st_debug_sync_control *ds_control= thd->debug_sync_control; + DBUG_ENTER("debug_sync_reset"); + DBUG_ASSERT(thd); + DBUG_ASSERT(ds_control); + + /* Remove all actions of this thread. */ + ds_control->ds_active= 0; + + /* Clear the global signal. */ + mysql_mutex_lock(&debug_sync_global->ds_mutex); + debug_sync_global->clear_set(); + mysql_mutex_unlock(&debug_sync_global->ds_mutex); + + DBUG_VOID_RETURN; +} + + +/** + Remove a debug sync action. + + @param[in] ds_control control object + @param[in] action action to be removed + + @description + Removing an action mainly means to decrement the ds_active counter. + But if the action is between other active action in the array, then + the array needs to be shrunk. The active actions above the one to + be removed have to be moved down by one slot. +*/ + +static void debug_sync_remove_action(st_debug_sync_control *ds_control, + st_debug_sync_action *action) +{ + uint dsp_idx= (uint)(action - ds_control->ds_action); + DBUG_ENTER("debug_sync_remove_action"); + DBUG_ASSERT(ds_control); + DBUG_ASSERT(ds_control == current_thd->debug_sync_control); + DBUG_ASSERT(action); + DBUG_ASSERT(dsp_idx < ds_control->ds_active); + + /* Decrement the number of currently active actions. */ + ds_control->ds_active--; + + /* + If this was not the last active action in the array, we need to + shift remaining active actions down to keep the array gap-free. + Otherwise binary search might fail or take longer than necessary at + least. Also new actions are always put to the end of the array. + */ + if (ds_control->ds_active > dsp_idx) + { + /* + Do not make save_action an object of class st_debug_sync_action. + Its destructor would tamper with the String pointers. + */ + uchar save_action[sizeof(st_debug_sync_action)]; + + /* + Copy the to-be-removed action object to temporary storage before + the shift copies the string pointers over. Do not use assignment + because it would use assignment operator methods for the Strings. + This would copy the strings. The shift below overwrite the string + pointers without freeing them first. By using memmove() we save + the pointers, which are overwritten by the shift. + */ + memmove(save_action, action, sizeof(st_debug_sync_action)); + + /* Move actions down. */ + memmove((void*)(ds_control->ds_action + dsp_idx), + ds_control->ds_action + dsp_idx + 1, + (ds_control->ds_active - dsp_idx) * + sizeof(st_debug_sync_action)); + + /* + Copy back the saved action object to the now free array slot. This + replaces the double references of String pointers that have been + produced by the shift. Again do not use an assignment operator to + avoid string allocation/copy. + */ + memmove((void*)(ds_control->ds_action + ds_control->ds_active), + save_action, sizeof(st_debug_sync_action)); + } + + DBUG_VOID_RETURN; +} + + +/** + Get a debug sync action. + + @param[in] thd thread handle + @param[in] dsp_name debug sync point name + @param[in] name_len length of sync point name + + @return action + @retval != NULL ok + @retval NULL error + + @description + Find the debug sync action for a debug sync point or make a new one. +*/ + +static st_debug_sync_action *debug_sync_get_action(THD *thd, + const char *dsp_name, + uint name_len) +{ + st_debug_sync_control *ds_control= thd->debug_sync_control; + st_debug_sync_action *action; + DBUG_ENTER("debug_sync_get_action"); + DBUG_ASSERT(thd); + DBUG_ASSERT(dsp_name); + DBUG_ASSERT(name_len); + DBUG_ASSERT(ds_control); + DBUG_PRINT("debug_sync", ("sync_point: '%.*s'", (int) name_len, dsp_name)); + DBUG_PRINT("debug_sync", ("active: %u allocated: %u", + ds_control->ds_active, ds_control->ds_allocated)); + + /* There cannot be more active actions than allocated. */ + DBUG_ASSERT(ds_control->ds_active <= ds_control->ds_allocated); + /* If there are active actions, the action array must be present. */ + DBUG_ASSERT(!ds_control->ds_active || ds_control->ds_action); + + /* Try to reuse existing action if there is one for this sync point. */ + if (ds_control->ds_active && + (action= debug_sync_find(ds_control->ds_action, ds_control->ds_active, + dsp_name, name_len))) + { + /* Reuse an already active sync point action. */ + DBUG_ASSERT((uint)(action - ds_control->ds_action) < ds_control->ds_active); + DBUG_PRINT("debug_sync", ("reuse action idx: %ld", + (long) (action - ds_control->ds_action))); + } + else + { + /* Create a new action. */ + int dsp_idx= ds_control->ds_active++; + set_if_bigger(ds_control->dsp_max_active, ds_control->ds_active); + if (ds_control->ds_active > ds_control->ds_allocated) + { + uint new_alloc= ds_control->ds_active + 3; + void *new_action= my_realloc(PSI_NOT_INSTRUMENTED, ds_control->ds_action, + new_alloc * sizeof(st_debug_sync_action), + MYF(MY_WME | MY_ALLOW_ZERO_PTR)); + if (!new_action) + { + /* Error is reported by my_malloc(). */ + goto err; /* purecov: tested */ + } + ds_control->ds_action= (st_debug_sync_action*) new_action; + ds_control->ds_allocated= new_alloc; + /* Clear memory as we do not run string constructors here. */ + bzero((uchar*) (ds_control->ds_action + dsp_idx), + (new_alloc - dsp_idx) * sizeof(st_debug_sync_action)); + } + DBUG_PRINT("debug_sync", ("added action idx: %u", dsp_idx)); + action= ds_control->ds_action + dsp_idx; + if (action->sync_point.copy(dsp_name, name_len, system_charset_info)) + { + /* Error is reported by my_malloc(). */ + goto err; /* purecov: tested */ + } + action->need_sort= TRUE; + } + DBUG_ASSERT(action >= ds_control->ds_action); + DBUG_ASSERT(action < ds_control->ds_action + ds_control->ds_active); + DBUG_PRINT("debug_sync", ("action: %p array: %p count: %u", + action, ds_control->ds_action, + ds_control->ds_active)); + + DBUG_RETURN(action); + + /* purecov: begin tested */ + err: + DBUG_RETURN(NULL); + /* purecov: end */ +} + + +/** + Set a debug sync action. + + @param[in] thd thread handle + @param[in] action synchronization action + + @return status + @retval FALSE ok + @retval TRUE error + + @description + This is called from the debug sync parser. It arms the action for + the requested sync point. If the action parsed into an empty action, + it is removed instead. + + Setting an action for a sync point means to make the sync point + active. When it is hit it will execute this action. + + Before parsing, we "get" an action object. This is placed at the + end of the thread's action array unless the requested sync point + has an action already. + + Then the parser fills the action object from the request string. + + Finally the action is "set" for the sync point. If it was parsed + to be empty, it is removed from the array. If it did belong to a + sync point before, the sync point becomes inactive. If the action + became non-empty and it did not belong to a sync point before (it + was added at the end of the action array), the action array needs + to be sorted by sync point. + + If the sync point name is "now", it is executed immediately. +*/ + +static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action) +{ + st_debug_sync_control *ds_control= thd->debug_sync_control; + bool is_dsp_now= FALSE; + DBUG_ENTER("debug_sync_set_action"); + DBUG_ASSERT(thd); + DBUG_ASSERT(action); + DBUG_ASSERT(ds_control); + + action->activation_count= MY_MAX(action->hit_limit, action->execute); + if (!action->activation_count) + { + debug_sync_remove_action(ds_control, action); + DBUG_PRINT("debug_sync", ("action cleared")); + } + else + { + const char *dsp_name= action->sync_point.c_ptr(); +#ifdef DBUG_TRACE + DBUG_EXECUTE("debug_sync", { + /* Functions as DBUG_PRINT args can change keyword and line nr. */ + const char *sig_emit= action->signal.c_ptr(); + const char *sig_wait= action->wait_for.c_ptr(); + DBUG_PRINT("debug_sync", + ("sync_point: '%s' activation_count: %lu hit_limit: %lu " + "execute: %lu timeout: %lu signal: '%s' wait_for: '%s'", + dsp_name, action->activation_count, + action->hit_limit, action->execute, action->timeout, + sig_emit, sig_wait));}); +#endif + + /* Check this before sorting the array. action may move. */ + is_dsp_now= !my_strcasecmp(system_charset_info, dsp_name, "now"); + + if (action->need_sort) + { + action->need_sort= FALSE; + /* Sort actions by (name_len, name). */ + my_qsort(ds_control->ds_action, ds_control->ds_active, + sizeof(st_debug_sync_action), debug_sync_qsort_cmp); + } + } +#ifdef DBUG_TRACE + DBUG_EXECUTE("debug_sync_list", debug_sync_print_actions(thd);); +#endif + + /* Execute the special sync point 'now' if activated above. */ + if (is_dsp_now) + { + DEBUG_SYNC(thd, "now"); + /* + If HIT_LIMIT for sync point "now" was 1, the execution of the sync + point decremented it to 0. In this case the following happened: + + - an error message was reported with my_error() and + - the statement was killed with thd->killed= THD::KILL_QUERY. + + If a statement reports an error, it must not call send_ok(). + The calling functions will not call send_ok(), if we return TRUE + from this function. + + thd->killed is also set if the wait is interrupted from a + KILL or KILL QUERY statement. In this case, no error is reported + and shall not be reported as a result of SET DEBUG_SYNC. + Hence, we check for the first condition above. + */ + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/** + Extract a token from a string. + + @param[out] token_p returns start of token + @param[out] token_length_p returns length of token + @param[in,out] ptr current string pointer, adds '\0' terminators + + @return string pointer or NULL + @retval != NULL ptr behind token terminator or at string end + @retval NULL no token found in remainder of string + + @note + This function assumes that the string is in system_charset_info, + that this charset is single byte for ASCII NUL ('\0'), that no + character except of ASCII NUL ('\0') contains a byte with value 0, + and that ASCII NUL ('\0') is used as the string terminator. + + This function needs to return tokens that are terminated with ASCII + NUL ('\0'). The tokens are used in my_strcasecmp(). Unfortunately + there is no my_strncasecmp(). + + To return the last token without copying it, we require the input + string to be nul terminated. + + @description + This function skips space characters at string begin. + + It returns a pointer to the first non-space character in *token_p. + + If no non-space character is found before the string terminator + ASCII NUL ('\0'), the function returns NULL. *token_p and + *token_length_p remain unchanged in this case (they are not set). + + The function takes a space character or an ASCII NUL ('\0') as a + terminator of the token. The space character could be multi-byte. + + It returns the length of the token in bytes, excluding the + terminator, in *token_length_p. + + If the terminator of the token is ASCII NUL ('\0'), it returns a + pointer to the terminator (string end). + + If the terminator is a space character, it replaces the the first + byte of the terminator character by ASCII NUL ('\0'), skips the (now + corrupted) terminator character, and skips all following space + characters. It returns a pointer to the next non-space character or + to the string terminator ASCII NUL ('\0'). +*/ + +static char *debug_sync_token(char **token_p, uint *token_length_p, + char *ptr, char *ptrend) +{ + DBUG_ASSERT(token_p); + DBUG_ASSERT(token_length_p); + DBUG_ASSERT(ptr); + + /* Skip leading space */ + ptr+= system_charset_info->scan(ptr, ptrend, MY_SEQ_SPACES); + if (!*ptr) + { + ptr= NULL; + goto end; + } + + /* Get token start. */ + *token_p= ptr; + + /* Find token end. */ + ptr+= system_charset_info->scan(ptr, ptrend, MY_SEQ_NONSPACES); + + /* Get token length. */ + *token_length_p= (uint)(ptr - *token_p); + + /* If necessary, terminate token. */ + if (*ptr) + { + DBUG_ASSERT(ptr < ptrend); + /* Get terminator character length. */ + uint mbspacelen= system_charset_info->charlen_fix(ptr, ptrend); + + /* Terminate token. */ + *ptr= '\0'; + + /* Skip the terminator. */ + ptr+= mbspacelen; + + /* Skip trailing space */ + ptr+= system_charset_info->scan(ptr, ptrend, MY_SEQ_SPACES); + } + + end: + return ptr; +} + + +/** + Extract a number from a string. + + @param[out] number_p returns number + @param[in] actstrptr current pointer in action string + + @return string pointer or NULL + @retval != NULL ptr behind token terminator or at string end + @retval NULL no token found or token is not valid number + + @note + The same assumptions about charset apply as for debug_sync_token(). + + @description + This function fetches a token from the string and converts it + into a number. + + If there is no token left in the string, or the token is not a valid + decimal number, NULL is returned. The result in *number_p is + undefined in this case. +*/ + +static char *debug_sync_number(ulong *number_p, char *actstrptr, + char *actstrend) +{ + char *ptr; + char *ept; + char *token; + uint token_length; + DBUG_ASSERT(number_p); + DBUG_ASSERT(actstrptr); + + /* Get token from string. */ + if (!(ptr= debug_sync_token(&token, &token_length, actstrptr, actstrend))) + goto end; + + *number_p= strtoul(token, &ept, 10); + if (*ept) + ptr= NULL; + + end: + return ptr; +} + + +/** + Evaluate a debug sync action string. + + @param[in] thd thread handle + @param[in,out] action_str action string to receive '\0' terminators + + @return status + @retval FALSE ok + @retval TRUE error + + @description + This is called when the DEBUG_SYNC system variable is set. + Parse action string, build a debug sync action, activate it. + + Before parsing, we "get" an action object. This is placed at the + end of the thread's action array unless the requested sync point + has an action already. + + Then the parser fills the action object from the request string. + + Finally the action is "set" for the sync point. This means that the + sync point becomes active or inactive, depending on the action + values. + + @note + The input string needs to be ASCII NUL ('\0') terminated. We split + nul-terminated tokens in it without copy. + + @note + The current implementation does not support two 'now SIGNAL xxx' commands + in a row for multiple threads as the first one can get lost while + the waiting threads are sleeping on mysql_cond_timedwait(). + One reason for this is that the signal name is stored in a global variable + that is overwritten. A better way would be to store all signals in + an array together with a 'time' when the signal was sent. This array + should be checked on broadcast. + + @see the function comment of debug_sync_token() for more constraints + for the string. +*/ + +static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end) +{ + st_debug_sync_action *action= NULL; + const char *errmsg; + char *ptr; + char *token; + uint token_length= 0; + DBUG_ENTER("debug_sync_eval_action"); + DBUG_ASSERT(thd); + DBUG_ASSERT(action_str); + DBUG_PRINT("debug_sync", ("action_str: '%s'", action_str)); + + /* + Get debug sync point name. Or a special command. + */ + if (!(ptr= debug_sync_token(&token, &token_length, action_str, action_end))) + { + errmsg= "Missing synchronization point name"; + goto err; + } + + /* + If there is a second token, the first one is the sync point name. + */ + if (*ptr) + { + /* Get an action object to collect the requested action parameters. */ + action= debug_sync_get_action(thd, token, token_length); + if (!action) + { + /* Error message is sent. */ + DBUG_RETURN(TRUE); /* purecov: tested */ + } + } + + /* + Get kind of action to be taken at sync point. + */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + { + /* No action present. Try special commands. Token unchanged. */ + + /* + Try RESET. + */ + if (!my_strcasecmp(system_charset_info, token, "RESET")) + { + /* It is RESET. Reset all actions and global signal. */ + debug_sync_reset(thd); + goto end; + } + + /* Token unchanged. It still contains sync point name. */ + errmsg= "Missing action after synchronization point name '%.*s'"; + goto err; + } + + /* + Check for pseudo actions first. Start with actions that work on + an existing action. + */ + DBUG_ASSERT(action); + + /* + Try TEST. + */ + if (!my_strcasecmp(system_charset_info, token, "TEST")) + { + /* It is TEST. Nothing must follow it. */ + if (*ptr) + { + errmsg= "Nothing must follow action TEST"; + goto err; + } + + /* Execute sync point. */ + debug_sync(thd, action->sync_point.ptr(), action->sync_point.length()); + /* Fix statistics. This was not a real hit of the sync point. */ + thd->debug_sync_control->dsp_hits--; + goto end; + } + + /* + Now check for actions that define a new action. + Initialize action. Do not use bzero(). Strings may have malloced. + */ + action->activation_count= 0; + action->hit_limit= 0; + action->execute= 0; + action->timeout= 0; + action->signal.length(0); + action->wait_for.length(0); + + /* + Try CLEAR. + */ + if (!my_strcasecmp(system_charset_info, token, "CLEAR")) + { + /* It is CLEAR. Nothing must follow it. */ + if (*ptr) + { + errmsg= "Nothing must follow action CLEAR"; + goto err; + } + + /* Set (clear/remove) action. */ + goto set_action; + } + + /* + Now check for real sync point actions. + */ + + /* + Try SIGNAL. + */ + if (!my_strcasecmp(system_charset_info, token, "SIGNAL")) + { + /* It is SIGNAL. Signal name must follow. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + { + errmsg= "Missing signal name after action SIGNAL"; + goto err; + } + if (action->signal.copy(token, token_length, system_charset_info)) + { + /* Error is reported by my_malloc(). */ + /* purecov: begin tested */ + errmsg= NULL; + goto err; + /* purecov: end */ + } + + /* Set default for EXECUTE option. */ + action->execute= 1; + + /* Get next token. If none follows, set action. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + goto set_action; + } + + /* + Try WAIT_FOR. + */ + if (!my_strcasecmp(system_charset_info, token, "WAIT_FOR")) + { + /* It is WAIT_FOR. Wait_for signal name must follow. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + { + errmsg= "Missing signal name after action WAIT_FOR"; + goto err; + } + if (action->wait_for.copy(token, token_length, system_charset_info)) + { + /* Error is reported by my_malloc(). */ + /* purecov: begin tested */ + errmsg= NULL; + goto err; + /* purecov: end */ + } + + /* Set default for EXECUTE and TIMEOUT options. */ + action->execute= 1; + action->timeout= opt_debug_sync_timeout; + action->clear_event= true; + + /* Get next token. If none follows, set action. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + goto set_action; + + /* + Try TIMEOUT. + */ + if (!my_strcasecmp(system_charset_info, token, "TIMEOUT")) + { + /* It is TIMEOUT. Number must follow. */ + if (!(ptr= debug_sync_number(&action->timeout, ptr, action_end))) + { + errmsg= "Missing valid number after TIMEOUT"; + goto err; + } + + /* Get next token. If none follows, set action. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + goto set_action; + } + } + + /* + Try EXECUTE. + */ + if (!my_strcasecmp(system_charset_info, token, "EXECUTE")) + { + /* + EXECUTE requires either SIGNAL and/or WAIT_FOR to be present. + In this case action->execute has been preset to 1. + */ + if (!action->execute) + { + errmsg= "Missing action before EXECUTE"; + goto err; + } + + /* Number must follow. */ + if (!(ptr= debug_sync_number(&action->execute, ptr, action_end))) + { + errmsg= "Missing valid number after EXECUTE"; + goto err; + } + + /* Get next token. If none follows, set action. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + goto set_action; + } + + /* + Try NO_CLEAR_EVENT. + */ + if (!my_strcasecmp(system_charset_info, token, "NO_CLEAR_EVENT")) + { + action->clear_event= false; + /* Get next token. If none follows, set action. */ + if (!(ptr = debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; + } + + /* + Try HIT_LIMIT. + */ + if (!my_strcasecmp(system_charset_info, token, "HIT_LIMIT")) + { + /* Number must follow. */ + if (!(ptr= debug_sync_number(&action->hit_limit, ptr, action_end))) + { + errmsg= "Missing valid number after HIT_LIMIT"; + goto err; + } + + /* Get next token. If none follows, set action. */ + if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end))) + goto set_action; + } + + errmsg= "Illegal or out of order stuff: '%.*s'"; + + err: + if (errmsg) + { + /* + NOTE: errmsg must either have %.*s or none % at all. + It can be NULL if an error message is already reported + (e.g. by my_malloc()). + */ + set_if_smaller(token_length, 64); /* Limit error message length. */ + my_printf_error(ER_PARSE_ERROR, errmsg, MYF(0), token_length, token); + } + if (action) + debug_sync_remove_action(thd->debug_sync_control, action); + DBUG_RETURN(TRUE); + + set_action: + DBUG_RETURN(debug_sync_set_action(thd, action)); + + end: + DBUG_RETURN(FALSE); +} + +/** + Set the system variable 'debug_sync'. + + @param[in] thd thread handle + @param[in] var set variable request + + @return status + @retval FALSE ok, variable is set + @retval TRUE error, variable could not be set + + @note + "Setting" of the system variable 'debug_sync' does not mean to + assign a value to it as usual. Instead a debug sync action is parsed + from the input string and stored apart from the variable value. + + @note + For efficiency reasons, the action string parser places '\0' + terminators in the string. So we need to take a copy here. +*/ + +bool debug_sync_update(THD *thd, char *val_str, size_t len) +{ + DBUG_ENTER("debug_sync_update"); + DBUG_PRINT("debug_sync", ("set action: '%s'", val_str)); + + /* + debug_sync_eval_action() places '\0' in the string, which itself + must be '\0' terminated. + */ + DBUG_ASSERT(val_str[len] == '\0'); + DBUG_RETURN(opt_debug_sync_timeout ? + debug_sync_eval_action(thd, val_str, val_str + len) : + FALSE); +} + + +/** + Retrieve the value of the system variable 'debug_sync'. + + @param[in] thd thread handle + + @return string + @retval != NULL ok, string pointer + @retval NULL memory allocation error + + @note + The value of the system variable 'debug_sync' reflects if + the facility is enabled ("ON") or disabled (default, "OFF"). + + When "ON", the current signal is added. +*/ + +uchar *debug_sync_value_ptr(THD *thd) +{ + char *value; + DBUG_ENTER("debug_sync_value_ptr"); + + if (opt_debug_sync_timeout) + { + static const char on[]= "ON - current signals: '"; + + // Ensure exclusive access to debug_sync_global.ds_signal + mysql_mutex_lock(&debug_sync_global->ds_mutex); + + size_t lgt= sizeof(on) + 1; /* +1 as we'll have to append ' at the end. */ + + for (size_t i= 0; i < debug_sync_global->ds_signal_set.size(); i++) + { + /* Assume each signal is separated by a comma, hence +1. */ + lgt+= debug_sync_global->ds_signal_set.at(i)->length + 1; + } + + char *vend; + char *vptr; + + if ((value= (char*) alloc_root(thd->mem_root, lgt))) + { + vend= value + lgt - 1; /* reserve space for '\0'. */ + vptr= debug_sync_bmove_len(value, vend, STRING_WITH_LEN(on)); + for (size_t i= 0; i < debug_sync_global->ds_signal_set.size(); i++) + { + const LEX_CSTRING *s= debug_sync_global->ds_signal_set.at(i); + vptr= debug_sync_bmove_len(vptr, vend, s->str, s->length); + if (i != debug_sync_global->ds_signal_set.size() - 1) + *(vptr++)= ','; + } + DBUG_ASSERT(vptr < vend); + *(vptr++)= '\''; + *vptr= '\0'; /* We have one byte reserved for the worst case. */ + } + mysql_mutex_unlock(&debug_sync_global->ds_mutex); + } + else + { + /* purecov: begin tested */ + value= const_cast("OFF"); + /* purecov: end */ + } + + DBUG_RETURN((uchar*) value); +} + + + + + +/** + Execute requested action at a synchronization point. + + @param[in] thd thread handle + @param[in] action action to be executed + + @note + This is to be called only if activation count > 0. +*/ + +static void debug_sync_execute(THD *thd, st_debug_sync_action *action) +{ +#ifdef DBUG_TRACE + const char *dsp_name= action->sync_point.c_ptr(); + const char *sig_emit= action->signal.c_ptr(); + const char *sig_wait= action->wait_for.c_ptr(); +#endif + DBUG_ENTER("debug_sync_execute"); + DBUG_ASSERT(thd); + DBUG_ASSERT(action); + DBUG_PRINT("debug_sync", + ("sync_point: '%s' activation_count: %lu hit_limit: %lu " + "execute: %lu timeout: %lu signal: '%s' wait_for: '%s'", + dsp_name, action->activation_count, action->hit_limit, + action->execute, action->timeout, sig_emit, sig_wait)); + + DBUG_ASSERT(action->activation_count); + action->activation_count--; + + if (action->execute) + { + const char *UNINIT_VAR(old_proc_info); + + action->execute--; + + /* + If we will be going to wait, set proc_info for the PROCESSLIST table. + Do this before emitting the signal, so other threads can see it + if they awake before we enter_cond() below. + */ + if (action->wait_for.length()) + { + st_debug_sync_control *ds_control= thd->debug_sync_control; + strxnmov(ds_control->ds_proc_info, sizeof(ds_control->ds_proc_info)-1, + "debug sync point: ", action->sync_point.c_ptr(), NullS); + old_proc_info= thd->proc_info; + debug_sync_thd_proc_info(thd, ds_control->ds_proc_info); + } + + /* + Take mutex to ensure that only one thread access + debug_sync_global.ds_signal at a time. Need to take mutex for + read access too, to create a memory barrier in order to avoid that + threads just reads an old cached version of the signal. + */ + + mysql_mutex_lock(&debug_sync_global->ds_mutex); + + if (action->signal.length()) + { + int offset= 0, pos; + bool error= false; + + /* This loop covers all signals in the list except for the last one. + Split the signal string by commas and set a signal in the global + variable for each one. */ + while (!error && (pos= action->signal.strstr(",", 1, offset)) > 0) + { + error= debug_sync_global->set_signal(action->signal.ptr() + offset, + pos - offset); + offset= pos + 1; + } + + if (error || + /* The last signal in the list. */ + debug_sync_global->set_signal(action->signal.ptr() + offset, + action->signal.length() - offset)) + { + /* + Error is reported by my_malloc(). + We must disable the facility. We have no way to return an error. + */ + debug_sync_emergency_disable(); /* purecov: tested */ + } + /* Wake threads waiting in a sync point. */ + mysql_cond_broadcast(&debug_sync_global->ds_cond); + DBUG_PRINT("debug_sync_exec", ("signal '%s' at: '%s'", + sig_emit, dsp_name)); + } /* end if (action->signal.length()) */ + + if (action->wait_for.length()) + { + mysql_mutex_t *old_mutex= NULL; + mysql_cond_t *old_cond= NULL; + bool restore_current_mutex; + int error= 0; + struct timespec abstime; + + /* + We don't use enter_cond()/exit_cond(). They do not save old + mutex and cond. This would prohibit the use of DEBUG_SYNC + between other places of enter_cond() and exit_cond(). + + We need to check for existence of thd->mysys_var to also make + it possible to use DEBUG_SYNC framework in scheduler when this + variable has been set to NULL. + */ + if (thd->mysys_var) + { + old_mutex= thd->mysys_var->current_mutex; + old_cond= thd->mysys_var->current_cond; + restore_current_mutex = true; + thd->mysys_var->current_mutex= &debug_sync_global->ds_mutex; + thd->mysys_var->current_cond= &debug_sync_global->ds_cond; + } + else + restore_current_mutex = false; + + set_timespec(abstime, action->timeout); + DBUG_EXECUTE("debug_sync_exec", { + const char *signal_set= get_signal_set_as_string(); + if (!signal_set) + { + DBUG_PRINT("debug_sync_exec", + ("Out of memory when fetching signal set")); + } + else + { + /* Functions as DBUG_PRINT args can change keyword and line nr. */ + DBUG_PRINT("debug_sync_exec", + ("wait for '%s' at: '%s', curr: '%s'", + sig_wait, dsp_name, signal_set)); + my_free((void *)signal_set); + }}); + + + /* + Wait until the signal set contains the wait_for string. + Interrupt when thread or query is killed or facility is disabled. + The facility can become disabled when some thread cannot get + the required dynamic memory allocated. + */ + while (!debug_sync_global->is_signalled(action->wait_for.ptr(), + action->wait_for.length()) && + !(thd->killed & KILL_HARD_BIT) && + opt_debug_sync_timeout) + { + error= mysql_cond_timedwait(&debug_sync_global->ds_cond, + &debug_sync_global->ds_mutex, + &abstime); + // TODO turn this into a for loop printing. + DBUG_EXECUTE("debug_sync", { + /* Functions as DBUG_PRINT args can change keyword and line nr. */ + DBUG_PRINT("debug_sync", + ("awoke from %s error: %d", + sig_wait, error));}); + if (unlikely(error == ETIMEDOUT || error == ETIME)) + { + // We should not make the statement fail, even if in strict mode. + Abort_on_warning_instant_set aws(thd, false); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DEBUG_SYNC_TIMEOUT, + ER_THD(thd, ER_DEBUG_SYNC_TIMEOUT)); + DBUG_EXECUTE_IF("debug_sync_abort_on_timeout", DBUG_ASSERT(0);); + break; + } + error= 0; + } + + if (action->clear_event) + debug_sync_global->clear_signal(action->wait_for); + + DBUG_EXECUTE("debug_sync_exec", + if (thd->killed) + DBUG_PRINT("debug_sync_exec", + ("killed %d from '%s' at: '%s'", + thd->killed, sig_wait, dsp_name)); + else + DBUG_PRINT("debug_sync_exec", + ("%s from '%s' at: '%s'", + error ? "timeout" : "resume", + sig_wait, dsp_name));); + + /* + We don't use enter_cond()/exit_cond(). They do not save old + mutex and cond. This would prohibit the use of DEBUG_SYNC + between other places of enter_cond() and exit_cond(). The + protected mutex must always unlocked _before_ mysys_var->mutex + is locked. (See comment in THD::exit_cond().) + */ + mysql_mutex_unlock(&debug_sync_global->ds_mutex); + if (restore_current_mutex) + { + mysql_mutex_lock(&thd->mysys_var->mutex); + thd->mysys_var->current_mutex= old_mutex; + thd->mysys_var->current_cond= old_cond; + debug_sync_thd_proc_info(thd, old_proc_info); + mysql_mutex_unlock(&thd->mysys_var->mutex); + } + else + debug_sync_thd_proc_info(thd, old_proc_info); + } + else + { + /* In case we don't wait, we just release the mutex. */ + mysql_mutex_unlock(&debug_sync_global->ds_mutex); + } /* end if (action->wait_for.length()) */ + + } /* end if (action->execute) */ + + /* hit_limit is zero for infinite. Don't decrement unconditionally. */ + if (action->hit_limit) + { + if (!--action->hit_limit) + { + thd->set_killed(KILL_QUERY); + my_error(ER_DEBUG_SYNC_HIT_LIMIT, MYF(0)); + } + DBUG_PRINT("debug_sync_exec", ("hit_limit: %lu at: '%s'", + action->hit_limit, dsp_name)); + } + + DBUG_VOID_RETURN; +} + + +/** + Execute requested action at a synchronization point. + + @param[in] thd thread handle + @param[in] sync_point_name name of synchronization point + @param[in] name_len length of sync point name +*/ + +static void debug_sync(THD *thd, const char *sync_point_name, size_t name_len) +{ + if (!thd) + { + if (!(thd= current_thd)) + return; + } + + st_debug_sync_control *ds_control= thd->debug_sync_control; + st_debug_sync_action *action; + DBUG_ENTER("debug_sync"); + DBUG_PRINT("debug_sync_point", ("hit: '%s'", sync_point_name)); + DBUG_ASSERT(sync_point_name); + DBUG_ASSERT(name_len); + DBUG_ASSERT(ds_control); + + /* Statistics. */ + ds_control->dsp_hits++; + + if (ds_control->ds_active && + (action= debug_sync_find(ds_control->ds_action, ds_control->ds_active, + sync_point_name, name_len)) && + action->activation_count) + { + /* Sync point is active (action exists). */ + debug_sync_execute(thd, action); + + /* Statistics. */ + ds_control->dsp_executed++; + + /* If action became inactive, remove it to shrink the search array. */ + if (!action->activation_count) + debug_sync_remove_action(ds_control, action); + } + + DBUG_VOID_RETURN; +} + +/** + Define debug sync action. + + @param[in] thd thread handle + @param[in] action_str action string + + @return status + @retval FALSE ok + @retval TRUE error + + @description + The function is similar to @c debug_sync_eval_action but is + to be called immediately from the server code rather than + to be triggered by setting a value to DEBUG_SYNC system variable. + + @note + The input string is copied prior to be fed to + @c debug_sync_eval_action to let the latter modify it. + + Caution. + The function allocates in THD::mem_root and therefore + is not recommended to be deployed inside big loops. +*/ + +bool debug_sync_set_action(THD *thd, const char *action_str, size_t len) +{ + bool rc; + char *value; + DBUG_ENTER("debug_sync_set_action"); + DBUG_ASSERT(thd); + DBUG_ASSERT(action_str); + + value= strmake_root(thd->mem_root, action_str, len); + rc= debug_sync_eval_action(thd, value, value + len); + DBUG_RETURN(rc); +} + + +#else /* defined(ENABLED_DEBUG_SYNC) */ +/* prevent linker/lib warning about file without public symbols */ +int debug_sync_dummy; +#endif /* defined(ENABLED_DEBUG_SYNC) */ diff --git a/sql/debug_sync.h b/sql/debug_sync.h new file mode 100644 index 00000000..831b86b6 --- /dev/null +++ b/sql/debug_sync.h @@ -0,0 +1,55 @@ +#ifndef DEBUG_SYNC_INCLUDED +#define DEBUG_SYNC_INCLUDED + +/* Copyright (c) 2009, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + Declarations for the Debug Sync Facility. See debug_sync.cc for details. +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +class THD; + +#if defined(ENABLED_DEBUG_SYNC) + +/* Command line option --debug-sync-timeout. See mysqld.cc. */ +extern MYSQL_PLUGIN_IMPORT uint opt_debug_sync_timeout; + +/* Default WAIT_FOR timeout if command line option is given without argument. */ +#define DEBUG_SYNC_DEFAULT_WAIT_TIMEOUT 300 + +/* Debug Sync prototypes. See debug_sync.cc. */ +extern int debug_sync_init(void); +extern void debug_sync_end(void); +extern void debug_sync_init_thread(THD *thd); +extern void debug_sync_end_thread(THD *thd); +void debug_sync_reset_thread(THD *thd); +extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len); +extern bool debug_sync_update(THD *thd, char *val_str, size_t len); +extern uchar *debug_sync_value_ptr(THD *thd); +#else +static inline void debug_sync_init_thread(THD *thd) {} +static inline void debug_sync_end_thread(THD *thd) {} +static inline void debug_sync_reset_thread(THD *thd) {} +static inline bool debug_sync_set_action(THD *, const char *, size_t) +{ return false; } +#endif /* defined(ENABLED_DEBUG_SYNC) */ +#endif /* DEBUG_SYNC_INCLUDED */ diff --git a/sql/derived_handler.cc b/sql/derived_handler.cc new file mode 100644 index 00000000..cddd1200 --- /dev/null +++ b/sql/derived_handler.cc @@ -0,0 +1,113 @@ +/* + Copyright (c) 2018, 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" +#include "derived_handler.h" + + +/** + The methods of the Pushdown_derived class. + + The objects of this class are used for pushdown of the derived tables + into engines. The main method of the class is Pushdown_derived::execute() + that initiates execution of the query specifying a derived by a foreign + engine, receives the rows of the result set and put them in a temporary + table on the server side. + + The method uses only the functions of the derived_handle interface to do + this. The constructor of the class gets this interface as a parameter. + + Currently a derived tables pushed into an engine is always materialized. + It could be changed if the cases when the tables is used as driving table. +*/ + + +Pushdown_derived::Pushdown_derived(TABLE_LIST *tbl, derived_handler *h) + : derived(tbl), handler(h) +{ +} + + + +int Pushdown_derived::execute() +{ + int err; + THD *thd= handler->thd; + TABLE *table= handler->table; + TMP_TABLE_PARAM *tmp_table_param= handler->tmp_table_param; + + DBUG_ENTER("Pushdown_derived::execute"); + + if ((err= handler->init_scan())) + goto error; + + while (!(err= handler->next_row())) + { + if (unlikely(thd->check_killed())) + { + handler->end_scan(); + DBUG_RETURN(-1); + } + + if ((err= table->file->ha_write_tmp_row(table->record[0]))) + { + bool is_duplicate; + if (likely(!table->file->is_fatal_error(err, HA_CHECK_DUP))) + continue; // Distinct elimination + + if (create_internal_tmp_table_from_heap(thd, table, + tmp_table_param->start_recinfo, + &tmp_table_param->recinfo, + err, 1, &is_duplicate)) + DBUG_RETURN(1); + if (is_duplicate) + continue; + } + } + + if (err != 0 && err != HA_ERR_END_OF_FILE) + goto error; + + if ((err= handler->end_scan())) + goto error_2; + + DBUG_RETURN(0); + +error: + handler->end_scan(); +error_2: + handler->print_error(err, MYF(0)); + DBUG_RETURN(-1); // Error not sent to client +} + + +void derived_handler::print_error(int error, myf errflag) +{ + my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str); +} + + +void derived_handler::set_derived(TABLE_LIST *tbl) +{ + derived= tbl; + table= tbl->table; + unit= tbl->derived; + select= unit->first_select(); + tmp_table_param= ((select_unit *)(unit->result))->get_tmp_table_param(); +} + diff --git a/sql/derived_handler.h b/sql/derived_handler.h new file mode 100644 index 00000000..f6feed8d --- /dev/null +++ b/sql/derived_handler.h @@ -0,0 +1,85 @@ +/* + Copyright (c) 2016, 2017 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef DERIVED_HANDLER_INCLUDED +#define DERIVED_HANDLER_INCLUDED + +#include "mariadb.h" +#include "sql_priv.h" + +class TMP_TABLE_PARAM; + +typedef class st_select_lex_unit SELECT_LEX_UNIT; + +/** + @class derived_handler + + This interface class is to be used for execution of queries that specify + derived table by foreign engines +*/ + +class derived_handler +{ +public: + THD *thd; + handlerton *ht; + + TABLE_LIST *derived; + + /* + Temporary table where all results should be stored in record[0] + The table has a field for every item from the select list of + the specification of derived. + */ + TABLE *table; + + /* The parameters if the temporary table used at its creation */ + TMP_TABLE_PARAM *tmp_table_param; + + SELECT_LEX_UNIT *unit; // Specifies the derived table + + SELECT_LEX *select; // The first select of the specification + + derived_handler(THD *thd_arg, handlerton *ht_arg) + : thd(thd_arg), ht(ht_arg), derived(0),table(0), tmp_table_param(0), + unit(0), select(0) {} + virtual ~derived_handler() = default; + + /* + Functions to scan data. All these returns 0 if ok, error code in case + of error + */ + + /* Initialize the process of producing rows of the derived table */ + virtual int init_scan()= 0; + + /* + Put the next produced row of the derived in table->record[0] and return 0. + Return HA_ERR_END_OF_FILE if there are no more rows, return other error + number in case of fatal error. + */ + virtual int next_row()= 0; + + /* End prodicing rows */ + virtual int end_scan()=0; + + /* Report errors */ + virtual void print_error(int error, myf errflag); + + void set_derived(TABLE_LIST *tbl); +}; + +#endif /* DERIVED_HANDLER_INCLUDED */ diff --git a/sql/derror.cc b/sql/derror.cc new file mode 100644 index 00000000..455e57fd --- /dev/null +++ b/sql/derror.cc @@ -0,0 +1,391 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (C) 2011, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + Read language depeneded messagefile +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "derror.h" +#include "mysys_err.h" +#include "mysqld.h" // lc_messages_dir +#include "derror.h" // read_texts +#include "sql_class.h" // THD + +uint errors_per_range[MAX_ERROR_RANGES+1]; + +static bool check_error_mesg(const char *file_name, const char **errmsg); +static void init_myfunc_errs(void); + + +C_MODE_START +static const char **get_server_errmsgs(int nr) +{ + int section= (nr-ER_ERROR_FIRST) / ERRORS_PER_RANGE; + if (!current_thd) + return DEFAULT_ERRMSGS[section]; + return CURRENT_THD_ERRMSGS[section]; +} +C_MODE_END + +/** + Read messages from errorfile. + + This function can be called multiple times to reload the messages. + + If it fails to load the messages: + - If we already have error messages loaded, keep the old ones and + return FALSE(ok) + - Initializing the errmesg pointer to an array of empty strings + and return TRUE (error) + + @retval + FALSE OK + @retval + TRUE Error +*/ + +static const char ***original_error_messages; + +bool init_errmessage(void) +{ + const char **errmsgs; + bool error= FALSE; + const char *lang= my_default_lc_messages->errmsgs->language; + my_bool use_english; + + DBUG_ENTER("init_errmessage"); + + free_error_messages(); + my_free(original_error_messages); + original_error_messages= 0; + + error_message_charset_info= system_charset_info; + + use_english= !strcmp(lang, "english"); + if (!use_english) + { + /* Read messages from file. */ + use_english= read_texts(ERRMSG_FILE,lang, &original_error_messages); + error= use_english != FALSE; + if (error) + sql_print_error("Could not load error messages for %s",lang); + } + + if (use_english) + { + static const struct + { + const char* name; + uint id; + const char* fmt; + } + english_msgs[]= + { + #include + }; + + memset(errors_per_range, 0, sizeof(errors_per_range)); + /* Calculate nr of messages per range. */ + for (size_t i= 0; i < array_elements(english_msgs); i++) + { + uint id= english_msgs[i].id; + + // We rely on the fact the array is sorted by id. + DBUG_ASSERT(i == 0 || english_msgs[i-1].id < id); + + errors_per_range[id/ERRORS_PER_RANGE-1]= id%ERRORS_PER_RANGE + 1; + } + + size_t all_errors= 0; + for (size_t i= 0; i < MAX_ERROR_RANGES; i++) + all_errors+= errors_per_range[i]; + + if (!(original_error_messages= (const char***) + my_malloc(PSI_NOT_INSTRUMENTED, + (all_errors + MAX_ERROR_RANGES)*sizeof(void*), + MYF(MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + + errmsgs= (const char**)(original_error_messages + MAX_ERROR_RANGES); + + original_error_messages[0]= errmsgs; + for (uint i= 1; i < MAX_ERROR_RANGES; i++) + { + original_error_messages[i]= + original_error_messages[i-1] + errors_per_range[i-1]; + } + + for (uint i= 0; i < array_elements(english_msgs); i++) + { + uint id= english_msgs[i].id; + original_error_messages[id/ERRORS_PER_RANGE-1][id%ERRORS_PER_RANGE]= + english_msgs[i].fmt; + } + } + + /* Register messages for use with my_error(). */ + for (uint i=0 ; i < MAX_ERROR_RANGES ; i++) + { + if (errors_per_range[i]) + { + if (my_error_register(get_server_errmsgs, (i+1)*ERRORS_PER_RANGE, + (i+1)*ERRORS_PER_RANGE + + errors_per_range[i]-1)) + { + my_free(original_error_messages); + original_error_messages= 0; + DBUG_RETURN(TRUE); + } + } + } + DEFAULT_ERRMSGS= original_error_messages; + init_myfunc_errs(); /* Init myfunc messages */ + DBUG_RETURN(error); +} + + +void free_error_messages() +{ + /* We don't need to free errmsg as it's done in cleanup_errmsg */ + for (uint i= 0 ; i < MAX_ERROR_RANGES ; i++) + { + if (errors_per_range[i]) + { + my_error_unregister((i+1)*ERRORS_PER_RANGE, + (i+1)*ERRORS_PER_RANGE + + errors_per_range[i]-1); + errors_per_range[i]= 0; + } + } +} + + +/** + Check the error messages array contains all relevant error messages +*/ + +static bool check_error_mesg(const char *file_name, const char **errmsg) +{ + /* + The last MySQL error message can't be an empty string; If it is, + it means that the error file doesn't contain all MySQL messages + and is probably from an older version of MySQL / MariaDB. + We also check that each section has enough error messages. + */ + if (errmsg[ER_LAST_MYSQL_ERROR_MESSAGE -1 - ER_ERROR_FIRST][0] == 0 || + (errors_per_range[0] < ER_ERROR_LAST_SECTION_2 - ER_ERROR_FIRST + 1) || + errors_per_range[1] != 0 || + (errors_per_range[2] < ER_ERROR_LAST_SECTION_4 - + ER_ERROR_FIRST_SECTION_4 +1) || + (errors_per_range[3] < ER_ERROR_LAST - ER_ERROR_FIRST_SECTION_5 + 1)) + { + sql_print_error("Error message file '%s' is probably from and older " + "version of MariaDB as it doesn't contain all " + "error messages", file_name); + return 1; + } + return 0; +} + + +struct st_msg_file +{ + uint sections; + uint max_error; + uint errors; + size_t text_length; +}; + +/** + Open file for packed textfile in language-directory. +*/ + +static File open_error_msg_file(const char *file_name, const char *language, + uint error_messages, struct st_msg_file *ret) +{ + int error_pos= 0; + File file; + char name[FN_REFLEN]; + char lang_path[FN_REFLEN]; + uchar head[32]; + DBUG_ENTER("open_error_msg_file"); + + convert_dirname(lang_path, language, NullS); + (void) my_load_path(lang_path, lang_path, lc_messages_dir); + if ((file= mysql_file_open(key_file_ERRMSG, + fn_format(name, file_name, lang_path, "", 4), + O_RDONLY | O_SHARE | O_BINARY, + MYF(0))) < 0) + { + /* + Trying pre-5.4 semantics of the --language parameter. + It included the language-specific part, e.g.: + --language=/path/to/english/ + */ + if ((file= mysql_file_open(key_file_ERRMSG, + fn_format(name, file_name, lc_messages_dir, "", + 4), + O_RDONLY | O_SHARE | O_BINARY, + MYF(0))) < 0) + goto err; + if (global_system_variables.log_warnings > 2) + { + sql_print_warning("An old style --language or -lc-message-dir value with language specific part detected: %s", lc_messages_dir); + sql_print_warning("Use --lc-messages-dir without language specific part instead."); + } + } + error_pos=1; + if (mysql_file_read(file, (uchar*) head, 32, MYF(MY_NABP))) + goto err; + error_pos=2; + if (head[0] != (uchar) 254 || head[1] != (uchar) 254 || + head[2] != 2 || head[3] != 5) + goto err; /* purecov: inspected */ + + ret->text_length= uint4korr(head+6); + ret->max_error= uint2korr(head+10); + ret->errors= uint2korr(head+12); + ret->sections= uint2korr(head+14); + + if (unlikely(ret->max_error < error_messages || + ret->sections != MAX_ERROR_RANGES)) + { + sql_print_error("\ +Error message file '%s' had only %d error messages, but it should contain at least %d error messages.\nCheck that the above file is the right version for this program!", + name,ret->errors,error_messages); + (void) mysql_file_close(file, MYF(MY_WME)); + DBUG_RETURN(FERR); + } + DBUG_RETURN(file); + +err: + sql_print_error((error_pos == 2) ? + "Incompatible header in messagefile '%s'. Probably from " + "another version of MariaDB" : + ((error_pos == 1) ? "Can't read from messagefile '%s'" : + "Can't find messagefile '%s'"), name); + if (file != FERR) + (void) mysql_file_close(file, MYF(MY_WME)); + DBUG_RETURN(FERR); +} + + +/* + Define the number of normal and extra error messages in the errmsg.sys + file +*/ + +static const uint error_messages= ER_ERROR_LAST - ER_ERROR_FIRST+1; + +/** + Read text from packed textfile in language-directory. +*/ + +bool read_texts(const char *file_name, const char *language, + const char ****data) +{ + uint i, range_size; + const char **point; + size_t offset; + File file; + uchar *buff, *pos; + struct st_msg_file msg_file; + DBUG_ENTER("read_texts"); + + if (unlikely((file= open_error_msg_file(file_name, language, error_messages, + &msg_file)) == FERR)) + DBUG_RETURN(1); + + if (!(*data= (const char***) + my_malloc(key_memory_errmsgs, + (size_t) ((MAX_ERROR_RANGES+1) * sizeof(char**) + + MY_MAX(msg_file.text_length, msg_file.errors * 2)+ + msg_file.errors * sizeof(char*)), + MYF(MY_WME)))) + goto err; /* purecov: inspected */ + + point= (const char**) ((*data) + MAX_ERROR_RANGES); + buff= (uchar*) (point + msg_file.errors); + + if (mysql_file_read(file, buff, + (size_t) (msg_file.errors + msg_file.sections) * 2, + MYF(MY_NABP | MY_WME))) + goto err; + + pos= buff; + /* read in sections */ + for (i= 0, offset= 0; i < msg_file.sections ; i++) + { + (*data)[i]= point + offset; + errors_per_range[i]= range_size= uint2korr(pos); + offset+= range_size; + pos+= 2; + } + + /* Calculate pointers to text data */ + for (i=0, offset=0 ; i < msg_file.errors ; i++) + { + point[i]= (char*) buff+offset; + offset+=uint2korr(pos); + pos+=2; + } + + /* Read error message texts */ + if (mysql_file_read(file, buff, msg_file.text_length, MYF(MY_NABP | MY_WME))) + goto err; + + (void) mysql_file_close(file, MYF(MY_WME)); + + DBUG_RETURN(check_error_mesg(file_name, point)); + +err: + (void) mysql_file_close(file, MYF(0)); + DBUG_RETURN(1); +} /* read_texts */ + + +/** + Initiates error-messages used by my_func-library. +*/ + +static void init_myfunc_errs() +{ + init_glob_errs(); /* Initiate english errors */ + if (!(specialflag & SPECIAL_ENGLISH)) + { + EE(EE_FILENOTFOUND) = ER_DEFAULT(ER_FILE_NOT_FOUND); + EE(EE_CANTCREATEFILE) = ER_DEFAULT(ER_CANT_CREATE_FILE); + EE(EE_READ) = ER_DEFAULT(ER_ERROR_ON_READ); + EE(EE_WRITE) = ER_DEFAULT(ER_ERROR_ON_WRITE); + EE(EE_BADCLOSE) = ER_DEFAULT(ER_ERROR_ON_CLOSE); + EE(EE_OUTOFMEMORY) = ER_DEFAULT(ER_OUTOFMEMORY); + EE(EE_DELETE) = ER_DEFAULT(ER_CANT_DELETE_FILE); + EE(EE_LINK) = ER_DEFAULT(ER_ERROR_ON_RENAME); + EE(EE_EOFERR) = ER_DEFAULT(ER_UNEXPECTED_EOF); + EE(EE_CANTLOCK) = ER_DEFAULT(ER_CANT_LOCK); + EE(EE_DIR) = ER_DEFAULT(ER_CANT_READ_DIR); + EE(EE_STAT) = ER_DEFAULT(ER_CANT_GET_STAT); + EE(EE_GETWD) = ER_DEFAULT(ER_CANT_GET_WD); + EE(EE_SETWD) = ER_DEFAULT(ER_CANT_SET_WD); + EE(EE_DISK_FULL) = ER_DEFAULT(ER_DISK_FULL); + } +} diff --git a/sql/derror.h b/sql/derror.h new file mode 100644 index 00000000..34a71b73 --- /dev/null +++ b/sql/derror.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef DERROR_INCLUDED +#define DERROR_INCLUDED + +bool init_errmessage(void); +void free_error_messages(); +bool read_texts(const char *file_name, const char *language, + const char ****data); + +#endif /* DERROR_INCLUDED */ diff --git a/sql/des_key_file.cc b/sql/des_key_file.cc new file mode 100644 index 00000000..bfbe04f6 --- /dev/null +++ b/sql/des_key_file.cc @@ -0,0 +1,106 @@ +/* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" // HAVE_* +#include "sql_priv.h" +#include "des_key_file.h" // st_des_keyschedule, st_des_keyblock +#include "log.h" // sql_print_error +#include + +#ifdef HAVE_OPENSSL + +struct st_des_keyschedule des_keyschedule[10]; +uint des_default_key; + +#define des_cs &my_charset_latin1 + +/** + Load DES keys from plaintext file into + memory on MySQL server startup and on command FLUSH DES_KEY_FILE. + + @retval + 0 ok + @retval + 1 Error +*/ + + +bool +load_des_key_file(const char *file_name) +{ + bool result=1; + File file; + IO_CACHE io; + DBUG_ENTER("load_des_key_file"); + DBUG_PRINT("enter",("name: %s",file_name)); + + mysql_mutex_lock(&LOCK_des_key_file); + if ((file= mysql_file_open(key_file_des_key_file, file_name, + O_RDONLY | O_BINARY, MYF(MY_WME))) < 0 || + init_io_cache(&io, file, IO_SIZE*2, READ_CACHE, 0, 0, MYF(MY_WME))) + goto error; + + bzero((char*) des_keyschedule,sizeof(struct st_des_keyschedule) * 10); + des_default_key=15; // Impossible key + for (;;) + { + char *start, *end; + char buf[1024], offset; + st_des_keyblock keyblock; + size_t length; + + if (!(length=my_b_gets(&io,buf,sizeof(buf)-1))) + break; // End of file + offset=buf[0]; + if (offset >= '0' && offset <= '9') // If ok key + { + offset=(char) (offset - '0'); + // Remove newline and possible other control characters + for (start=buf+1 ; my_isspace(des_cs, *start) ; start++) ; + end=buf+length; + for (end=strend(buf) ; + end > start && !my_isgraph(des_cs, end[-1]) ; end--) ; + + if (start != end) + { + DES_cblock ivec; + bzero((char*) &ivec,sizeof(ivec)); + // We make good 24-byte (168 bit) key from given plaintext key with MD5 + EVP_BytesToKey(EVP_des_ede3_cbc(),EVP_md5(),NULL, + (uchar *) start, (int) (end-start),1, + (uchar *) &keyblock, + ivec); + DES_set_key_unchecked(&keyblock.key1,&(des_keyschedule[(int)offset].ks1)); + DES_set_key_unchecked(&keyblock.key2,&(des_keyschedule[(int)offset].ks2)); + DES_set_key_unchecked(&keyblock.key3,&(des_keyschedule[(int)offset].ks3)); + if (des_default_key == 15) + des_default_key= (uint) offset; // use first as def. + } + } + else if (offset != '#') + sql_print_error("load_des_file: Found wrong key_number: %c",offset); + } + result=0; + +error: + if (file >= 0) + { + mysql_file_close(file, MYF(0)); + end_io_cache(&io); + } + mysql_mutex_unlock(&LOCK_des_key_file); + DBUG_RETURN(result); +} +#endif /* HAVE_OPENSSL */ diff --git a/sql/des_key_file.h b/sql/des_key_file.h new file mode 100644 index 00000000..847cd767 --- /dev/null +++ b/sql/des_key_file.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef DES_KEY_FILE_INCLUDED +#define DES_KEY_FILE_INCLUDED + +#ifdef HAVE_OPENSSL +#include + +#include "violite.h" /* DES_cblock, DES_key_schedule */ + +struct st_des_keyblock +{ + DES_cblock key1, key2, key3; +}; + +struct st_des_keyschedule +{ + DES_key_schedule ks1, ks2, ks3; +}; + +extern struct st_des_keyschedule des_keyschedule[10]; +extern uint des_default_key; + +bool load_des_key_file(const char *file_name); +#endif /* HAVE_OPENSSL */ + +#endif /* DES_KEY_FILE_INCLUDED */ diff --git a/sql/discover.cc b/sql/discover.cc new file mode 100644 index 00000000..20116935 --- /dev/null +++ b/sql/discover.cc @@ -0,0 +1,274 @@ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Functions for discover of frm file from handler +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "discover.h" +#include + +/** + Read the contents of a .frm file. + + frmdata and len are set to 0 on error. + + @param name path to table-file "db/name" + @param frmdata frm data + @param len length of the read frmdata + + @retval + 0 ok + @retval + 1 Could not open file + @retval + 2 Could not stat file + @retval + 3 Could not allocate data for read. Could not read file +*/ + +int readfrm(const char *name, const uchar **frmdata, size_t *len) +{ + int error; + char index_file[FN_REFLEN]; + File file; + size_t read_len; + uchar *read_data; + MY_STAT state; + DBUG_ENTER("readfrm"); + DBUG_PRINT("enter",("name: '%s'",name)); + + *frmdata= NULL; // In case of errors + *len= 0; + error= 1; + if ((file= mysql_file_open(key_file_frm, + fn_format(index_file, name, "", reg_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), + O_RDONLY | O_SHARE, + MYF(0))) < 0) + goto err_end; + + // Get length of file + error= 2; + if (mysql_file_fstat(file, &state, MYF(0))) + goto err; + MSAN_STAT_WORKAROUND(&state); + read_len= (size_t)MY_MIN(FRM_MAX_SIZE, state.st_size); // safety + + // Read whole frm file + error= 3; + if (!(read_data= (uchar*)my_malloc(key_memory_frm_string, read_len, + MYF(MY_WME)))) + goto err; + if (mysql_file_read(file, read_data, read_len, MYF(MY_NABP))) + { + my_free(read_data); + goto err; + } + + // Setup return data + *frmdata= (uchar*) read_data; + *len= read_len; + error= 0; + + err: + (void) mysql_file_close(file, MYF(MY_WME)); + + err_end: /* Here when no file */ + DBUG_RETURN (error); +} /* readfrm */ + + +/* + Write the content of a frm data pointer to a frm or par file. + + @param path full path to table-file "db/name.frm" or .par + @param db Database name. Only used for my_error() + @param table Table name. Only used for my_error() + @param data data to write to file + @param len length of the data + + @retval + 0 ok + @retval + <> 0 Could not write file. In this case the file is not created +*/ + +int writefile(const char *path, const char *db, const char *table, + bool tmp_table, const uchar *data, size_t len) +{ + int error; + int create_flags= O_RDWR | O_TRUNC; + DBUG_ENTER("writefile"); + DBUG_PRINT("enter",("name: '%s' len: %lu ",path, (ulong) len)); + + if (tmp_table) + create_flags|= O_EXCL | O_NOFOLLOW; + + File file= mysql_file_create(key_file_frm, path, + CREATE_MODE, create_flags, MYF(0)); + + if (unlikely((error= file < 0))) + { + if (my_errno == ENOENT) + my_error(ER_BAD_DB_ERROR, MYF(0), db); + else + my_error(ER_CANT_CREATE_TABLE, MYF(0), db, table, my_errno); + } + else + { + error= (int)mysql_file_write(file, data, len, MYF(MY_WME | MY_NABP)); + + if (!error && !tmp_table && opt_sync_frm) + error= mysql_file_sync(file, MYF(MY_WME)) || + my_sync_dir_by_file(path, MYF(MY_WME)); + + error|= mysql_file_close(file, MYF(MY_WME)); + if (error) + my_delete(path, MYF(0)); + } + DBUG_RETURN(error); +} /* writefile */ + + +static inline void advance(FILEINFO* &from, FILEINFO* &to, + FILEINFO* cur, bool &skip) +{ + if (skip) // if not copying + from= cur; // just advance the start pointer + else // if copying + if (to == from) // but to the same place, not shifting the data + from= to= cur; // advance both pointers + else // otherwise + while (from < cur) // have to copy [from...cur) to [to...) + *to++ = *from++; + skip= false; +} + +/** + Go through the directory listing looking for files with a specified + extension and add them to the result list + + @details + This function may be called many times on the same directory listing + but with different extensions. To avoid discovering the same table twice, + whenever a table file is discovered, all files with the same name + (independently from the extensions) are removed from the list. + + Example: the list contained + { "db.opt", "t1.MYD", "t1.MYI", "t1.frm", "t2.ARZ", "t3.ARZ", "t3.frm" } + on discovering all ".frm" files, tables "t1" and "t3" will be found, + and list will become + { "db.opt", "t2.ARZ" } + and now ".ARZ" discovery can discover the table "t2" + + @note + This function assumes that the directory listing is sorted alphabetically. + + @note Partitioning makes this more complicated. A partitioned table t1 might + have files, like t1.frm, t1#P#part1.ibd, t1#P#foo.ibd, etc. + That means we need to compare file names only up to the first '#' or '.' + whichever comes first. +*/ +int extension_based_table_discovery(MY_DIR *dirp, const char *ext_meta, + handlerton::discovered_list *result) +{ + CHARSET_INFO *cs= character_set_filesystem; + size_t ext_meta_len= strlen(ext_meta); + FILEINFO *from, *to, *cur, *end; + bool skip= false; + + from= to= cur= dirp->dir_entry; + end= cur + dirp->number_of_files; + while (cur < end) + { + char *octothorp= strchr(cur->name + 1, '#'); + char *ext= strchr(octothorp ? octothorp : cur->name, FN_EXTCHAR); + + if (ext) + { + size_t len= (octothorp ? octothorp : ext) - cur->name; + if (from != cur && + (strlen(from->name) <= len || + cs->strnncoll(from->name, len, cur->name, len) || + (from->name[len] != FN_EXTCHAR && from->name[len] != '#'))) + advance(from, to, cur, skip); + + if (cs->strnncoll(ext, strlen(ext), + ext_meta, ext_meta_len) == 0) + { + *ext = 0; + if (result->add_file(cur->name)) + return 1; + *ext = FN_EXTCHAR; + skip= true; // table discovered, skip all files with the same name + } + } + else + { + advance(from, to, cur, skip); + from++; + } + + cur++; + } + advance(from, to, cur, skip); + dirp->number_of_files= to - dirp->dir_entry; + return 0; +} + +/** + Simple, not reusable file-based table discovery + + @details + simplified version of extension_based_table_discovery(), that does not + modify the list of files. It cannot be called many times for the same + directory listing, otherwise it'll produce duplicate results. +*/ +int ext_table_discovery_simple(MY_DIR *dirp, + handlerton::discovered_list *result) +{ + CHARSET_INFO *cs= character_set_filesystem; + FILEINFO *cur, *end; + + cur= dirp->dir_entry; + end= cur + dirp->number_of_files; + while (cur < end) + { + char *ext= strrchr(cur->name, FN_EXTCHAR); + + if (ext) + { + if (cs->strnncoll(ext, strlen(ext), + reg_ext, reg_ext_length) == 0) + { + *ext = 0; + if (result->add_file(cur->name)) + return 1; + } + } + cur++; + } + return 0; +} + diff --git a/sql/discover.h b/sql/discover.h new file mode 100644 index 00000000..750c2944 --- /dev/null +++ b/sql/discover.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef DISCOVER_INCLUDED +#define DISCOVER_INCLUDED + +int extension_based_table_discovery(MY_DIR *dirp, const char *ext, + handlerton::discovered_list *tl); + +#ifdef MYSQL_SERVER +int readfrm(const char *name, const uchar **data, size_t *length); +int writefile(const char *path, const char *db, const char *table, + bool tmp_table, const uchar *frmdata, size_t len); + +/* a helper to delete an frm file, given a path w/o .frm extension */ +inline void deletefrm(const char *path) +{ + char frm_name[FN_REFLEN]; + strxnmov(frm_name, sizeof(frm_name)-1, path, reg_ext, NullS); + mysql_file_delete(key_file_frm, frm_name, MYF(0)); +} + +int ext_table_discovery_simple(MY_DIR *dirp, + handlerton::discovered_list *result); +#endif + +#endif /* DISCOVER_INCLUDED */ diff --git a/sql/encryption.cc b/sql/encryption.cc new file mode 100644 index 00000000..3c7ba2e9 --- /dev/null +++ b/sql/encryption.cc @@ -0,0 +1,246 @@ +/* Copyright (C) 2015 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include +#include "log.h" +#include "sql_plugin.h" +#include +#include + +/* there can be only one encryption plugin enabled */ +static plugin_ref encryption_manager= 0; +struct encryption_service_st encryption_handler; + +extern "C" { + +uint no_get_key(uint, uint, uchar*, uint*) +{ + return ENCRYPTION_KEY_VERSION_INVALID; +} +uint no_key(uint) +{ + return ENCRYPTION_KEY_VERSION_INVALID; +} +uint zero_size(uint,uint) +{ + return 0; +} + +static int ctx_init(void *ctx, const unsigned char* key, unsigned int klen, + const unsigned char* iv, unsigned int ivlen, int flags, + unsigned int key_id, unsigned int key_version) +{ + return my_aes_crypt_init(ctx, MY_AES_CBC, flags, key, klen, iv, ivlen); +} + +static unsigned int get_length(unsigned int slen, unsigned int key_id, + unsigned int key_version) +{ + return my_aes_get_size(MY_AES_CBC, slen); +} + +uint ctx_size(unsigned int, unsigned int) +{ + return MY_AES_CTX_SIZE; +} + +} /* extern "C" */ + +int initialize_encryption_plugin(st_plugin_int *plugin) +{ + if (encryption_manager) + return 1; + + vio_check_ssl_init(); + + if (plugin->plugin->init && plugin->plugin->init(plugin)) + { + sql_print_error("Plugin '%s' init function returned error.", + plugin->name.str); + return 1; + } + + encryption_manager= plugin_lock(NULL, plugin_int_to_ref(plugin)); + st_mariadb_encryption *handle= + (struct st_mariadb_encryption*) plugin->plugin->info; + + /* + Compiler on Spark doesn't like the '?' operator here as it + believes the (uint (*)...) implies the C++ call model. + */ + if (handle->crypt_ctx_size) + encryption_handler.encryption_ctx_size_func= handle->crypt_ctx_size; + else + encryption_handler.encryption_ctx_size_func= ctx_size; + + encryption_handler.encryption_ctx_init_func= + handle->crypt_ctx_init ? handle->crypt_ctx_init : ctx_init; + + encryption_handler.encryption_ctx_update_func= + handle->crypt_ctx_update ? handle->crypt_ctx_update : my_aes_crypt_update; + + encryption_handler.encryption_ctx_finish_func= + handle->crypt_ctx_finish ? handle->crypt_ctx_finish : my_aes_crypt_finish; + + encryption_handler.encryption_encrypted_length_func= + handle->encrypted_length ? handle->encrypted_length : get_length; + + encryption_handler.encryption_key_get_func= + handle->get_key; + + encryption_handler.encryption_key_get_latest_version_func= + handle->get_latest_key_version; // must be the last + + return 0; +} + +int finalize_encryption_plugin(st_plugin_int *plugin) +{ + int deinit_status= 0; + bool used= plugin_ref_to_int(encryption_manager) == plugin; + + if (used) + { + encryption_handler.encryption_key_get_func= no_get_key; + encryption_handler.encryption_key_get_latest_version_func= no_key; + encryption_handler.encryption_ctx_size_func= zero_size; + } + + if (plugin && plugin->plugin->deinit) + deinit_status= plugin->plugin->deinit(NULL); + + if (used) + { + plugin_unlock(NULL, encryption_manager); + encryption_manager= 0; + } + return deinit_status; +} + +/****************************************************************** + Encryption Scheme service +******************************************************************/ +static uint scheme_get_key(st_encryption_scheme *scheme, + st_encryption_scheme_key *key) +{ + if (scheme->locker) + scheme->locker(scheme, 0); + + // Check if we already have key + for (uint i = 0; i < array_elements(scheme->key); i++) + { + if (scheme->key[i].version == 0) // no more keys + break; + + if (scheme->key[i].version == key->version) + { + *key= scheme->key[i]; + if (scheme->locker) + scheme->locker(scheme, 1); + return 0; + } + } + + // Not found! + scheme->keyserver_requests++; + + uchar global_key[MY_AES_MAX_KEY_LENGTH]; + uint global_key_len= sizeof(global_key), key_len; + + uint rc = encryption_key_get(scheme->key_id, key->version, + global_key, & global_key_len); + if (rc) + goto ret; + + /* Now generate the local key by encrypting IV using the global key */ + rc = my_aes_crypt(MY_AES_ECB, ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD, + scheme->iv, sizeof(scheme->iv), key->key, &key_len, + global_key, global_key_len, NULL, 0); + + DBUG_ASSERT(key_len == sizeof(key->key)); + + if (rc) + goto ret; + + // Rotate keys to make room for a new + for (uint i = array_elements(scheme->key) - 1; i; i--) + scheme->key[i] = scheme->key[i - 1]; + + scheme->key[0]= *key; + +ret: + if (scheme->locker) + scheme->locker(scheme, 1); + return rc; +} + +int do_crypt(const unsigned char* src, unsigned int slen, + unsigned char* dst, unsigned int* dlen, + struct st_encryption_scheme *scheme, + unsigned int key_version, unsigned int i32_1, + unsigned int i32_2, unsigned long long i64, + int flag) +{ + compile_time_assert(ENCRYPTION_SCHEME_KEY_INVALID == + (int)ENCRYPTION_KEY_VERSION_INVALID); + + // Maybe temporal solution for MDEV-8173 + // Rationale: scheme->type is currently global/object + // and when used here might not represent actual state + // of smaller granularity objects e.g. InnoDB page state + // as type is stored to tablespace (FIL) and could represent + // state where key rotation is trying to reach + //DBUG_ASSERT(scheme->type == 1); + + if (key_version == ENCRYPTION_KEY_VERSION_INVALID || + key_version == ENCRYPTION_KEY_NOT_ENCRYPTED) + return ENCRYPTION_SCHEME_KEY_INVALID; + + st_encryption_scheme_key key; + key.version= key_version; + uint rc= scheme_get_key(scheme, &key); + if (rc) + return (int)rc; + + unsigned char iv[4 + 4 + 8]; + int4store(iv + 0, i32_1); + int4store(iv + 4, i32_2); + int8store(iv + 8, i64); + + return encryption_crypt(src, slen, dst, dlen, key.key, sizeof(key.key), + iv, sizeof(iv), flag, scheme->key_id, key_version); +} + +int encryption_scheme_encrypt(const unsigned char* src, unsigned int slen, + unsigned char* dst, unsigned int* dlen, + struct st_encryption_scheme *scheme, + unsigned int key_version, unsigned int i32_1, + unsigned int i32_2, unsigned long long i64) +{ + return do_crypt(src, slen, dst, dlen, scheme, key_version, i32_1, + i32_2, i64, ENCRYPTION_FLAG_NOPAD | ENCRYPTION_FLAG_ENCRYPT); +} + + +int encryption_scheme_decrypt(const unsigned char* src, unsigned int slen, + unsigned char* dst, unsigned int* dlen, + struct st_encryption_scheme *scheme, + unsigned int key_version, unsigned int i32_1, + unsigned int i32_2, unsigned long long i64) +{ + return do_crypt(src, slen, dst, dlen, scheme, key_version, i32_1, + i32_2, i64, ENCRYPTION_FLAG_NOPAD | ENCRYPTION_FLAG_DECRYPT); +} diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc new file mode 100644 index 00000000..bb552aa9 --- /dev/null +++ b/sql/event_data_objects.cc @@ -0,0 +1,1619 @@ +/* + Copyright (c) 2005, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#define MYSQL_LEX 1 +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "unireg.h" +#include "sql_parse.h" // parse_sql +#include "strfunc.h" // find_string_in_array +#include "sql_db.h" // get_default_db_collation +#include "sql_time.h" // interval_type_to_name, + // date_add_interval, + // calc_time_diff +#include "tztime.h" // my_tz_find, my_tz_OFFSET0, struct Time_zone +#include "sp.h" // load_charset, load_collation +#include "events.h" +#include "event_data_objects.h" +#include "event_db_repository.h" +#include "sp_head.h" +#include "sql_show.h" // append_definer, append_identifier +#include "mysql/psi/mysql_sp.h" +#include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ +/** + @addtogroup Event_Scheduler + @{ +*/ + +#ifdef HAVE_PSI_INTERFACE +void init_scheduler_psi_keys() +{ + const char *category= "scheduler"; + + PSI_server->register_statement(category, & Event_queue_element_for_exec::psi_info, 1); +} + +PSI_statement_info Event_queue_element_for_exec::psi_info= +{ 0, "event", 0}; +#endif + +/*************************************************************************/ + +/** + Event_creation_ctx -- creation context of events. +*/ + +class Event_creation_ctx :public Stored_program_creation_ctx, + public Sql_alloc +{ +public: + static bool load_from_db(THD *thd, + MEM_ROOT *event_mem_root, + const char *db_name, + const char *event_name, + TABLE *event_tbl, + Stored_program_creation_ctx **ctx); + +public: + virtual Stored_program_creation_ctx *clone(MEM_ROOT *mem_root) + { + return new (mem_root) + Event_creation_ctx(m_client_cs, m_connection_cl, m_db_cl); + } + +protected: + virtual Object_creation_ctx *create_backup_ctx(THD *thd) const + { + /* + We can avoid usual backup/restore employed in stored programs since we + know that this is a top level statement and the worker thread is + allocated exclusively to execute this event. + */ + + return NULL; + } + +private: + Event_creation_ctx(CHARSET_INFO *client_cs, + CHARSET_INFO *connection_cl, + CHARSET_INFO *db_cl) + : Stored_program_creation_ctx(client_cs, connection_cl, db_cl) + { } +}; + +/************************************************************************** + Event_creation_ctx implementation. +**************************************************************************/ + +bool +Event_creation_ctx::load_from_db(THD *thd, + MEM_ROOT *event_mem_root, + const char *db_name, + const char *event_name, + TABLE *event_tbl, + Stored_program_creation_ctx **ctx) +{ + /* Load character set/collation attributes. */ + + CHARSET_INFO *client_cs; + CHARSET_INFO *connection_cl; + CHARSET_INFO *db_cl; + + bool invalid_creation_ctx= FALSE; + + if (load_charset(thd, event_mem_root, + event_tbl->field[ET_FIELD_CHARACTER_SET_CLIENT], + thd->variables.character_set_client, + &client_cs)) + { + sql_print_warning("Event '%s'.'%s': invalid value " + "in column mysql.event.character_set_client.", + (const char *) db_name, + (const char *) event_name); + + invalid_creation_ctx= TRUE; + } + + if (load_collation(thd, event_mem_root, + event_tbl->field[ET_FIELD_COLLATION_CONNECTION], + thd->variables.collation_connection, + &connection_cl)) + { + sql_print_warning("Event '%s'.'%s': invalid value " + "in column mysql.event.collation_connection.", + (const char *) db_name, + (const char *) event_name); + + invalid_creation_ctx= TRUE; + } + + if (load_collation(thd, event_mem_root, + event_tbl->field[ET_FIELD_DB_COLLATION], + NULL, + &db_cl)) + { + sql_print_warning("Event '%s'.'%s': invalid value " + "in column mysql.event.db_collation.", + (const char *) db_name, + (const char *) event_name); + + invalid_creation_ctx= TRUE; + } + + /* + If we failed to resolve the database collation, load the default one + from the disk. + */ + + if (!db_cl) + db_cl= get_default_db_collation(thd, db_name); + + /* Create the context. */ + + *ctx= new Event_creation_ctx(client_cs, connection_cl, db_cl); + + return invalid_creation_ctx; +} + +/*************************************************************************/ + +/* + Initializes dbname and name of an Event_queue_element_for_exec + object + + SYNOPSIS + Event_queue_element_for_exec::init() + + RETURN VALUE + FALSE OK + TRUE Error (OOM) +*/ + +bool +Event_queue_element_for_exec::init(const LEX_CSTRING &db, const LEX_CSTRING &n) +{ + if (!(dbname.str= my_strndup(key_memory_Event_queue_element_for_exec_names, + db.str, dbname.length= db.length, MYF(MY_WME)))) + return TRUE; + if (!(name.str= my_strndup(key_memory_Event_queue_element_for_exec_names, + n.str, name.length= n.length, MYF(MY_WME)))) + { + my_free(const_cast(dbname.str)); + dbname.str= NULL; + return TRUE; + } + return FALSE; +} + + +/* + Destructor + + SYNOPSIS + Event_queue_element_for_exec::~Event_queue_element_for_exec() +*/ + +Event_queue_element_for_exec::~Event_queue_element_for_exec() +{ + my_free(const_cast(dbname.str)); + my_free(const_cast(name.str)); +} + + +/* + Constructor + + SYNOPSIS + Event_basic::Event_basic() +*/ + +Event_basic::Event_basic() +{ + DBUG_ENTER("Event_basic::Event_basic"); + /* init memory root */ + init_sql_alloc(key_memory_event_basic_root, &mem_root, 256, 512, MYF(0)); + dbname.str= name.str= NULL; + dbname.length= name.length= 0; + time_zone= NULL; + DBUG_VOID_RETURN; +} + + +/* + Destructor + + SYNOPSIS + Event_basic::Event_basic() +*/ + +Event_basic::~Event_basic() +{ + DBUG_ENTER("Event_basic::~Event_basic"); + free_root(&mem_root, MYF(0)); + DBUG_VOID_RETURN; +} + + +/* + Short function to load a char column into a LEX_CSTRING + + SYNOPSIS + Event_basic::load_string_field() + field_name The field( enum_events_table_field is not actually used + because it's unknown in event_data_objects.h) + fields The Field array + field_value The value +*/ + +bool +Event_basic::load_string_fields(Field **fields, ...) +{ + bool ret= FALSE; + va_list args; + enum enum_events_table_field field_name; + LEX_CSTRING *field_value; + + DBUG_ENTER("Event_basic::load_string_fields"); + + va_start(args, fields); + field_name= (enum enum_events_table_field) va_arg(args, int); + while (field_name < ET_FIELD_COUNT) + { + field_value= va_arg(args, LEX_CSTRING *); + if ((field_value->str= get_field(&mem_root, fields[field_name])) == NullS) + { + ret= TRUE; + break; + } + field_value->length= strlen(field_value->str); + + field_name= (enum enum_events_table_field) va_arg(args, int); + } + va_end(args); + + DBUG_RETURN(ret); +} + + +bool +Event_basic::load_time_zone(THD *thd, const LEX_CSTRING *tz_name) +{ + String str(tz_name->str, strlen(tz_name->str), &my_charset_latin1); + time_zone= my_tz_find(thd, &str); + + return (time_zone == NULL); +} + + +/* + Constructor + + SYNOPSIS + Event_queue_element::Event_queue_element() +*/ + +Event_queue_element::Event_queue_element(): + on_completion(Event_parse_data::ON_COMPLETION_DROP), + status(Event_parse_data::ENABLED), expression(0), dropped(FALSE), + execution_count(0) +{ + DBUG_ENTER("Event_queue_element::Event_queue_element"); + + starts= ends= execute_at= last_executed= 0; + starts_null= ends_null= execute_at_null= TRUE; + + DBUG_VOID_RETURN; +} + + +/* + Destructor + + SYNOPSIS + Event_queue_element::Event_queue_element() +*/ +Event_queue_element::~Event_queue_element() = default; + + +/* + Constructor + + SYNOPSIS + Event_timed::Event_timed() +*/ + +Event_timed::Event_timed(): + created(0), modified(0), sql_mode(0) +{ + DBUG_ENTER("Event_timed::Event_timed"); + init(); + DBUG_VOID_RETURN; +} + + +/* + Destructor + + SYNOPSIS + Event_timed::~Event_timed() +*/ + +Event_timed::~Event_timed() = default; + + +/* + Constructor + + SYNOPSIS + Event_job_data::Event_job_data() +*/ + +Event_job_data::Event_job_data() + :sql_mode(0) +{ +} + +/* + Init all member variables + + SYNOPSIS + Event_timed::init() +*/ + +void +Event_timed::init() +{ + DBUG_ENTER("Event_timed::init"); + + definer_user.str= definer_host.str= body.str= comment.str= NULL; + definer_user.length= definer_host.length= body.length= comment.length= 0; + + sql_mode= 0; + + DBUG_VOID_RETURN; +} + + +/** + Load an event's body from a row from mysql.event. + + @details This method is silent on errors and should behave like that. + Callers should handle throwing of error messages. The reason is that the + class should not know about how to deal with communication. + + @return Operation status + @retval FALSE OK + @retval TRUE Error +*/ + +bool +Event_job_data::load_from_row(THD *thd, TABLE *table) +{ + const char *ptr; + size_t len; + LEX_CSTRING tz_name; + + DBUG_ENTER("Event_job_data::load_from_row"); + + if (!table) + DBUG_RETURN(TRUE); + + if (table->s->fields < ET_FIELD_COUNT) + DBUG_RETURN(TRUE); + + if (load_string_fields(table->field, + ET_FIELD_DB, &dbname, + ET_FIELD_NAME, &name, + ET_FIELD_BODY, &body, + ET_FIELD_DEFINER, &definer, + ET_FIELD_TIME_ZONE, &tz_name, + ET_FIELD_COUNT)) + DBUG_RETURN(TRUE); + + if (load_time_zone(thd, &tz_name)) + DBUG_RETURN(TRUE); + + Event_creation_ctx::load_from_db(thd, &mem_root, dbname.str, name.str, table, + &creation_ctx); + + ptr= strchr(definer.str, '@'); + + if (! ptr) + ptr= definer.str; + + len= ptr - definer.str; + definer_user.str= strmake_root(&mem_root, definer.str, len); + definer_user.length= len; + len= definer.length - len - 1; + /* 1:because of @ */ + definer_host.str= strmake_root(&mem_root, ptr + 1, len); + definer_host.length= len; + + sql_mode= (sql_mode_t) table->field[ET_FIELD_SQL_MODE]->val_int(); + + DBUG_RETURN(FALSE); +} + + +/** + Load an event's body from a row from mysql.event. + + @details This method is silent on errors and should behave like that. + Callers should handle throwing of error messages. The reason is that the + class should not know about how to deal with communication. + + @return Operation status + @retval FALSE OK + @retval TRUE Error +*/ + +bool +Event_queue_element::load_from_row(THD *thd, TABLE *table) +{ + const char *ptr; + MYSQL_TIME time; + LEX_CSTRING tz_name; + + DBUG_ENTER("Event_queue_element::load_from_row"); + + if (!table) + DBUG_RETURN(TRUE); + + if (table->s->fields < ET_FIELD_COUNT) + DBUG_RETURN(TRUE); + + if (load_string_fields(table->field, + ET_FIELD_DB, &dbname, + ET_FIELD_NAME, &name, + ET_FIELD_DEFINER, &definer, + ET_FIELD_TIME_ZONE, &tz_name, + ET_FIELD_COUNT)) + DBUG_RETURN(TRUE); + + if (load_time_zone(thd, &tz_name)) + DBUG_RETURN(TRUE); + + starts_null= table->field[ET_FIELD_STARTS]->is_null(); + uint not_used; + if (!starts_null) + { + /* + The expected data type for these columns in mysql.events: + starts, ends, execute_at, last_executed + is DATETIME. No nanosecond truncation should normally be needed, + unless the DBA changes them, e.g. to VARCHAR, DECIMAL, etc. + For this unexpected case let's use the default round mode, + according to the current session settings. + */ + table->field[ET_FIELD_STARTS]->get_date(&time, TIME_NO_ZERO_DATE | + thd->temporal_round_mode()); + starts= my_tz_OFFSET0->TIME_to_gmt_sec(&time,¬_used); + } + + ends_null= table->field[ET_FIELD_ENDS]->is_null(); + if (!ends_null) + { + table->field[ET_FIELD_ENDS]->get_date(&time, TIME_NO_ZERO_DATE | + thd->temporal_round_mode()); + ends= my_tz_OFFSET0->TIME_to_gmt_sec(&time,¬_used); + } + + if (!table->field[ET_FIELD_INTERVAL_EXPR]->is_null()) + expression= table->field[ET_FIELD_INTERVAL_EXPR]->val_int(); + else + expression= 0; + /* + If neigher STARTS and ENDS is set, then both fields are empty. + Hence, if ET_FIELD_EXECUTE_AT is empty there is an error. + */ + execute_at_null= table->field[ET_FIELD_EXECUTE_AT]->is_null(); + DBUG_ASSERT(!(starts_null && ends_null && !expression && execute_at_null)); + if (!expression && !execute_at_null) + { + if (table->field[ET_FIELD_EXECUTE_AT]->get_date(&time, TIME_NO_ZERO_DATE | + thd->temporal_round_mode())) + DBUG_RETURN(TRUE); + execute_at= my_tz_OFFSET0->TIME_to_gmt_sec(&time,¬_used); + } + + /* + We load the interval type from disk as string and then map it to + an integer. This decouples the values of enum interval_type + and values actually stored on disk. Therefore the type can be + reordered without risking incompatibilities of data between versions. + */ + if (!table->field[ET_FIELD_TRANSIENT_INTERVAL]->is_null()) + { + int i; + char buff[MAX_FIELD_WIDTH]; + String str(buff, sizeof(buff), &my_charset_bin); + LEX_CSTRING tmp; + + table->field[ET_FIELD_TRANSIENT_INTERVAL]->val_str(&str); + if (!(tmp.length= str.length())) + DBUG_RETURN(TRUE); + + tmp.str= str.c_ptr_safe(); + + i= find_string_in_array(interval_type_to_name, &tmp, system_charset_info); + if (i < 0) + DBUG_RETURN(TRUE); + interval= (interval_type) i; + } + + if (!table->field[ET_FIELD_LAST_EXECUTED]->is_null()) + { + table->field[ET_FIELD_LAST_EXECUTED]->get_date(&time, TIME_NO_ZERO_DATE | + thd->temporal_round_mode()); + last_executed= my_tz_OFFSET0->TIME_to_gmt_sec(&time,¬_used); + } + + if ((ptr= get_field(&mem_root, table->field[ET_FIELD_STATUS])) == NullS) + DBUG_RETURN(TRUE); + + DBUG_PRINT("load_from_row", ("Event [%s] is [%s]", name.str, ptr)); + + /* Set event status (ENABLED | SLAVESIDE_DISABLED | DISABLED) */ + switch (ptr[0]) + { + case 'E' : + status = Event_parse_data::ENABLED; + break; + case 'S' : + status = Event_parse_data::SLAVESIDE_DISABLED; + break; + case 'D' : + default: + status = Event_parse_data::DISABLED; + break; + } + if ((ptr= get_field(&mem_root, table->field[ET_FIELD_ORIGINATOR])) == NullS) + DBUG_RETURN(TRUE); + originator = (uint32) table->field[ET_FIELD_ORIGINATOR]->val_int(); + + /* ToDo : Andrey . Find a way not to allocate ptr on event_mem_root */ + if ((ptr= get_field(&mem_root, + table->field[ET_FIELD_ON_COMPLETION])) == NullS) + DBUG_RETURN(TRUE); + + on_completion= (ptr[0]=='D'? Event_parse_data::ON_COMPLETION_DROP: + Event_parse_data::ON_COMPLETION_PRESERVE); + + DBUG_RETURN(FALSE); +} + + +/** + Load an event's body from a row from mysql.event. + + @details This method is silent on errors and should behave like that. + Callers should handle throwing of error messages. The reason is that the + class should not know about how to deal with communication. + + @return Operation status + @retval FALSE OK + @retval TRUE Error +*/ + +bool +Event_timed::load_from_row(THD *thd, TABLE *table) +{ + const char *ptr; + size_t len; + + DBUG_ENTER("Event_timed::load_from_row"); + + if (Event_queue_element::load_from_row(thd, table)) + DBUG_RETURN(TRUE); + + if (load_string_fields(table->field, + ET_FIELD_BODY, &body, + ET_FIELD_BODY_UTF8, &body_utf8, + ET_FIELD_COUNT)) + DBUG_RETURN(TRUE); + + if (Event_creation_ctx::load_from_db(thd, &mem_root, dbname.str, name.str, + table, &creation_ctx)) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_EVENT_INVALID_CREATION_CTX, + ER_THD(thd, ER_EVENT_INVALID_CREATION_CTX), + (const char *) dbname.str, + (const char *) name.str); + } + + ptr= strchr(definer.str, '@'); + + if (! ptr) + ptr= definer.str; + + len= ptr - definer.str; + definer_user.str= strmake_root(&mem_root, definer.str, len); + definer_user.length= len; + len= definer.length - len - 1; + /* 1:because of @ */ + definer_host.str= strmake_root(&mem_root, ptr + 1, len); + definer_host.length= len; + + created= table->field[ET_FIELD_CREATED]->val_int(); + modified= table->field[ET_FIELD_MODIFIED]->val_int(); + + comment.str= get_field(&mem_root, table->field[ET_FIELD_COMMENT]); + if (comment.str != NullS) + comment.length= strlen(comment.str); + else + comment.length= 0; + + sql_mode= (sql_mode_t) table->field[ET_FIELD_SQL_MODE]->val_int(); + + DBUG_RETURN(FALSE); +} + + +/* + add_interval() adds a specified interval to time 'ltime' in time + zone 'time_zone', and returns the result converted to the number of + seconds since epoch (aka Unix time; in UTC time zone). Zero result + means an error. +*/ +static +my_time_t +add_interval(MYSQL_TIME *ltime, const Time_zone *time_zone, + interval_type scale, INTERVAL interval) +{ + if (date_add_interval(current_thd, ltime, scale, interval)) + return 0; + + uint not_used; + return time_zone->TIME_to_gmt_sec(ltime, ¬_used); +} + + +/* + Computes the sum of a timestamp plus interval. + + SYNOPSIS + get_next_time() + time_zone event time zone + next the sum + start add interval_value to this time + time_now current time + i_value quantity of time type interval to add + i_type type of interval to add (SECOND, MINUTE, HOUR, WEEK ...) + + RETURN VALUE + 0 OK + 1 Error + + NOTES + 1) If the interval is conversible to SECOND, like MINUTE, HOUR, DAY, WEEK. + Then we use TIMEDIFF()'s implementation as underlying and number of + seconds as resolution for computation. + 2) In all other cases - MONTH, QUARTER, YEAR we use MONTH as resolution + and PERIOD_DIFF()'s implementation +*/ + +static +bool get_next_time(const Time_zone *time_zone, my_time_t *next, + my_time_t start, my_time_t time_now, + int i_value, interval_type i_type) +{ + DBUG_ENTER("get_next_time"); + DBUG_PRINT("enter", ("start: %lu now: %lu", (long) start, (long) time_now)); + + DBUG_ASSERT(start <= time_now); + + longlong months=0, seconds=0; + + switch (i_type) { + case INTERVAL_YEAR: + months= i_value*12; + break; + case INTERVAL_QUARTER: + /* Has already been converted to months */ + case INTERVAL_YEAR_MONTH: + case INTERVAL_MONTH: + months= i_value; + break; + case INTERVAL_WEEK: + /* WEEK has already been converted to days */ + case INTERVAL_DAY: + seconds= i_value*24*3600; + break; + case INTERVAL_DAY_HOUR: + case INTERVAL_HOUR: + seconds= i_value*3600; + break; + case INTERVAL_DAY_MINUTE: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_MINUTE: + seconds= i_value*60; + break; + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + seconds= i_value; + break; + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + /* + We should return an error here so SHOW EVENTS/ SELECT FROM I_S.EVENTS + would give an error then. + */ + DBUG_RETURN(1); + case INTERVAL_LAST: + DBUG_ASSERT(0); + } + DBUG_PRINT("info", ("seconds: %ld months: %ld", (long) seconds, (long) months)); + + MYSQL_TIME local_start; + MYSQL_TIME local_now; + + /* Convert times from UTC to local. */ + { + time_zone->gmt_sec_to_TIME(&local_start, start); + time_zone->gmt_sec_to_TIME(&local_now, time_now); + } + + INTERVAL interval; + bzero(&interval, sizeof(interval)); + my_time_t next_time= 0; + + if (seconds) + { + ulonglong seconds_diff; + ulong microsec_diff; + bool negative= calc_time_diff(&local_now, &local_start, 1, + &seconds_diff, µsec_diff); + if (!negative) + { + /* + The formula below returns the interval that, when added to + local_start, will always give the time in the future. + */ + interval.second= seconds_diff - seconds_diff % seconds + seconds; + next_time= add_interval(&local_start, time_zone, + INTERVAL_SECOND, interval); + if (next_time == 0) + goto done; + } + + if (next_time <= time_now) + { + /* + If 'negative' is true above, then 'next_time == 0', and + 'next_time <= time_now' is also true. If negative is false, + then next_time was set, but perhaps to the value that is less + then time_now. See below for elaboration. + */ + DBUG_ASSERT(negative || next_time > 0); + + /* + If local_now < local_start, i.e. STARTS time is in the future + according to the local time (it always in the past according + to UTC---this is a prerequisite of this function), then + STARTS is almost always in the past according to the local + time too. However, in the time zone that has backward + Daylight Saving Time shift, the following may happen: suppose + we have a backward DST shift at certain date after 2:59:59, + i.e. local time goes 1:59:59, 2:00:00, ... , 2:59:59, (shift + here) 2:00:00 (again), ... , 2:59:59 (again), 3:00:00, ... . + Now suppose the time has passed the first 2:59:59, has been + shifted backward, and now is (the second) 2:20:00. The user + does CREATE EVENT with STARTS 'current-date 2:40:00'. Local + time 2:40:00 from create statement is treated by time + functions as the first such time, so according to UTC it comes + before the second 2:20:00. But according to local time it is + obviously in the future, so we end up in this branch. + + Since we are in the second pass through 2:00:00--2:59:59, and + any local time form this interval is treated by system + functions as the time from the first pass, we have to find the + time for the next execution that is past the DST-affected + interval (past the second 2:59:59 for our example, + i.e. starting from 3:00:00). We do this in the loop until the + local time is mapped onto future UTC time. 'start' time is in + the past, so we may use 'do { } while' here, and add the first + interval right away. + + Alternatively, it could be that local_now >= local_start. Now + for the example above imagine we do CREATE EVENT with STARTS + 'current-date 2:10:00'. Local start 2:10 is in the past (now + is local 2:20), so we add an interval, and get next execution + time, say, 2:40. It is in the future according to local time, + but, again, since we are in the second pass through + 2:00:00--2:59:59, 2:40 will be converted into UTC time in the + past. So we will end up in this branch again, and may add + intervals in a 'do { } while' loop. + + Note that for any given event we may end up here only if event + next execution time will map to the time interval that is + passed twice, and only if the server was started during the + second pass, or the event is being created during the second + pass. After that, we never will get here (unless we again + start the server during the second pass). In other words, + such a condition is extremely rare. + */ + interval.second= seconds; + do + { + next_time= add_interval(&local_start, time_zone, + INTERVAL_SECOND, interval); + if (next_time == 0) + goto done; + } + while (next_time <= time_now); + } + } + else + { + long diff_months= ((long) local_now.year - (long) local_start.year)*12 + + ((long) local_now.month - (long) local_start.month); + + /* + Unlike for seconds above, the formula below returns the interval + that, when added to the local_start, will give the time in the + past, or somewhere in the current month. We are interested in + the latter case, to see if this time has already passed, or is + yet to come this month. + + Note that the time is guaranteed to be in the past unless + (diff_months % months == 0), but no good optimization is + possible here, because (diff_months % months == 0) is what will + happen most of the time, as get_next_time() will be called right + after the execution of the event. We could pass last_executed + time to this function, and see if the execution has already + happened this month, but for that we will have to convert + last_executed from seconds since epoch to local broken-down + time, and this will greatly reduce the effect of the + optimization. So instead we keep the code simple and clean. + */ + interval.month= (ulong) (diff_months - diff_months % months); + next_time= add_interval(&local_start, time_zone, + INTERVAL_MONTH, interval); + if (next_time == 0) + goto done; + + if (next_time <= time_now) + { + interval.month= (ulong) months; + next_time= add_interval(&local_start, time_zone, + INTERVAL_MONTH, interval); + if (next_time == 0) + goto done; + } + } + + DBUG_ASSERT(time_now < next_time); + + *next= next_time; + +done: + DBUG_PRINT("info", ("next_time: %ld", (long) next_time)); + DBUG_RETURN(next_time == 0); +} + + +/* + Computes next execution time. + + SYNOPSIS + Event_queue_element::compute_next_execution_time() + + RETURN VALUE + FALSE OK + TRUE Error + + NOTES + The time is set in execute_at, if no more executions the latter is + set to 0. +*/ + +bool +Event_queue_element::compute_next_execution_time() +{ + my_time_t time_now; + DBUG_ENTER("Event_queue_element::compute_next_execution_time"); + DBUG_PRINT("enter", ("starts: %lu ends: %lu last_executed: %lu this: %p", + (long) starts, (long) ends, (long) last_executed, + this)); + + if (status != Event_parse_data::ENABLED) + { + DBUG_PRINT("compute_next_execution_time", + ("Event %s is DISABLED", name.str)); + goto ret; + } + /* If one-time, no need to do computation */ + if (!expression) + { + /* Let's check whether it was executed */ + if (last_executed) + { + DBUG_PRINT("info",("One-time event %s.%s of was already executed", + dbname.str, name.str)); + dropped= (on_completion == Event_parse_data::ON_COMPLETION_DROP); + DBUG_PRINT("info",("One-time event will be dropped: %d.", dropped)); + + status= Event_parse_data::DISABLED; + } + goto ret; + } + + time_now= current_thd->query_start(); + + DBUG_PRINT("info",("NOW: [%lu]", (ulong) time_now)); + + /* if time_now is after ends don't execute anymore */ + if (!ends_null && ends < time_now) + { + DBUG_PRINT("info", ("NOW after ENDS, don't execute anymore")); + /* time_now is after ends. don't execute anymore */ + execute_at= 0; + execute_at_null= TRUE; + if (on_completion == Event_parse_data::ON_COMPLETION_DROP) + dropped= TRUE; + DBUG_PRINT("info", ("Dropped: %d", dropped)); + status= Event_parse_data::DISABLED; + + goto ret; + } + + /* + Here time_now is before or equals ends if the latter is set. + Let's check whether time_now is before starts. + If so schedule for starts. + */ + if (!starts_null && time_now <= starts) + { + if (time_now == starts && starts == last_executed) + { + /* + do nothing or we will schedule for second time execution at starts. + */ + } + else + { + DBUG_PRINT("info", ("STARTS is future, NOW <= STARTS,sched for STARTS")); + /* + starts is in the future + time_now before starts. Scheduling for starts + */ + execute_at= starts; + execute_at_null= FALSE; + goto ret; + } + } + + if (!starts_null && !ends_null) + { + /* + Both starts and m_ends are set and time_now is between them (incl.) + If last_executed is set then increase with m_expression. The new MYSQL_TIME is + after m_ends set execute_at to 0. And check for on_completion + If not set then schedule for now. + */ + DBUG_PRINT("info", ("Both STARTS & ENDS are set")); + if (!last_executed) + { + DBUG_PRINT("info", ("Not executed so far.")); + } + + { + my_time_t next_exec; + + if (get_next_time(time_zone, &next_exec, starts, time_now, + (int) expression, interval)) + goto err; + + /* There was previous execution */ + if (ends < next_exec) + { + DBUG_PRINT("info", ("Next execution of %s after ENDS. Stop executing.", + name.str)); + /* Next execution after ends. No more executions */ + execute_at= 0; + execute_at_null= TRUE; + if (on_completion == Event_parse_data::ON_COMPLETION_DROP) + dropped= TRUE; + status= Event_parse_data::DISABLED; + } + else + { + DBUG_PRINT("info",("Next[%lu]", (ulong) next_exec)); + execute_at= next_exec; + execute_at_null= FALSE; + } + } + goto ret; + } + else if (starts_null && ends_null) + { + /* starts is always set, so this is a dead branch !! */ + DBUG_PRINT("info", ("Neither STARTS nor ENDS are set")); + /* + Both starts and m_ends are not set, so we schedule for the next + based on last_executed. + */ + if (last_executed) + { + my_time_t next_exec; + if (get_next_time(time_zone, &next_exec, starts, time_now, + (int) expression, interval)) + goto err; + execute_at= next_exec; + DBUG_PRINT("info",("Next[%lu]", (ulong) next_exec)); + } + else + { + /* last_executed not set. Schedule the event for now */ + DBUG_PRINT("info", ("Execute NOW")); + execute_at= time_now; + } + execute_at_null= FALSE; + } + else + { + /* either starts or m_ends is set */ + if (!starts_null) + { + DBUG_PRINT("info", ("STARTS is set")); + /* + - starts is set. + - starts is not in the future according to check made before + Hence schedule for starts + m_expression in case last_executed + is not set, otherwise to last_executed + m_expression + */ + if (!last_executed) + { + DBUG_PRINT("info", ("Not executed so far.")); + } + + { + my_time_t next_exec; + if (get_next_time(time_zone, &next_exec, starts, time_now, + (int) expression, interval)) + goto err; + execute_at= next_exec; + DBUG_PRINT("info",("Next[%lu]", (ulong) next_exec)); + } + execute_at_null= FALSE; + } + else + { + /* this is a dead branch, because starts is always set !!! */ + DBUG_PRINT("info", ("STARTS is not set. ENDS is set")); + /* + - m_ends is set + - m_ends is after time_now or is equal + Hence check for m_last_execute and increment with m_expression. + If last_executed is not set then schedule for now + */ + + if (!last_executed) + execute_at= time_now; + else + { + my_time_t next_exec; + + if (get_next_time(time_zone, &next_exec, starts, time_now, + (int) expression, interval)) + goto err; + + if (ends < next_exec) + { + DBUG_PRINT("info", ("Next execution after ENDS. Stop executing.")); + execute_at= 0; + execute_at_null= TRUE; + status= Event_parse_data::DISABLED; + if (on_completion == Event_parse_data::ON_COMPLETION_DROP) + dropped= TRUE; + } + else + { + DBUG_PRINT("info", ("Next[%lu]", (ulong) next_exec)); + execute_at= next_exec; + execute_at_null= FALSE; + } + } + } + goto ret; + } +ret: + DBUG_PRINT("info", ("ret: 0 execute_at: %lu", (long) execute_at)); + DBUG_RETURN(FALSE); +err: + DBUG_PRINT("info", ("ret=1")); + DBUG_RETURN(TRUE); +} + + +/* + Set the internal last_executed MYSQL_TIME struct to now. NOW is the + time according to thd->query_start(), so the THD's clock. + + SYNOPSIS + Event_queue_element::mark_last_executed() + thd thread context +*/ + +void +Event_queue_element::mark_last_executed(THD *thd) +{ + last_executed= thd->query_start(); + + execution_count++; +} + + +static +void +append_datetime(String *buf, Time_zone *time_zone, my_time_t secs, + const char *name, uint len) +{ + char dtime_buff[20*2+32];/* +32 to make my_snprintf_{8bit|ucs2} happy */ + buf->append(STRING_WITH_LEN(" ")); + buf->append(name, len); + buf->append(STRING_WITH_LEN(" '")); + /* + Pass the buffer and the second param tells fills the buffer and + returns the number of chars to copy. + */ + MYSQL_TIME time; + time_zone->gmt_sec_to_TIME(&time, secs); + buf->append(dtime_buff, my_datetime_to_str(&time, dtime_buff, 0)); + buf->append(STRING_WITH_LEN("'")); +} + + +/* + Get SHOW CREATE EVENT as string + + SYNOPSIS + Event_timed::get_create_event(THD *thd, String *buf) + thd Thread + buf String*, should be already allocated. CREATE EVENT goes inside. + + RETURN VALUE + 0 OK + EVEX_MICROSECOND_UNSUP Error (for now if mysql.event has been + tampered and MICROSECONDS interval or + derivative has been put there. +*/ + +int +Event_timed::get_create_event(THD *thd, String *buf) +{ + char tmp_buf[2 * STRING_BUFFER_USUAL_SIZE]; + String expr_buf(tmp_buf, sizeof(tmp_buf), system_charset_info); + expr_buf.length(0); + + DBUG_ENTER("get_create_event"); + DBUG_PRINT("ret_info",("body_len=[%d]body=[%s]", + (int) body.length, body.str)); + + if (expression && Events::reconstruct_interval_expression(&expr_buf, interval, + expression)) + DBUG_RETURN(EVEX_MICROSECOND_UNSUP); + + buf->append(STRING_WITH_LEN("CREATE ")); + append_definer(thd, buf, &definer_user, &definer_host); + buf->append(STRING_WITH_LEN("EVENT ")); + append_identifier(thd, buf, &name); + + if (expression) + { + buf->append(STRING_WITH_LEN(" ON SCHEDULE EVERY ")); + buf->append(expr_buf); + buf->append(' '); + LEX_CSTRING *ival= &interval_type_to_name[interval]; + buf->append(ival->str, ival->length); + + if (!starts_null) + append_datetime(buf, time_zone, starts, STRING_WITH_LEN("STARTS")); + + if (!ends_null) + append_datetime(buf, time_zone, ends, STRING_WITH_LEN("ENDS")); + } + else + { + append_datetime(buf, time_zone, execute_at, + STRING_WITH_LEN("ON SCHEDULE AT")); + } + + if (on_completion == Event_parse_data::ON_COMPLETION_DROP) + buf->append(STRING_WITH_LEN(" ON COMPLETION NOT PRESERVE ")); + else + buf->append(STRING_WITH_LEN(" ON COMPLETION PRESERVE ")); + + if (status == Event_parse_data::ENABLED) + buf->append(STRING_WITH_LEN("ENABLE")); + else if (status == Event_parse_data::SLAVESIDE_DISABLED) + buf->append(STRING_WITH_LEN("DISABLE ON SLAVE")); + else + buf->append(STRING_WITH_LEN("DISABLE")); + + if (comment.length) + { + buf->append(STRING_WITH_LEN(" COMMENT ")); + append_unescaped(buf, comment.str, comment.length); + } + buf->append(STRING_WITH_LEN(" DO ")); + buf->append(&body); + + DBUG_RETURN(0); +} + + +/** + Get an artificial stored procedure to parse as an event definition. +*/ + +bool +Event_job_data::construct_sp_sql(THD *thd, String *sp_sql) +{ + LEX_CSTRING buffer; + const uint STATIC_SQL_LENGTH= 44; + + DBUG_ENTER("Event_job_data::construct_sp_sql"); + + /* + Allocate a large enough buffer on the thread execution memory + root to avoid multiple [re]allocations on system heap + */ + buffer.length= STATIC_SQL_LENGTH + name.length + body.length; + if (! (buffer.str= (char*) thd->alloc(buffer.length))) + DBUG_RETURN(TRUE); + + sp_sql->set(buffer.str, buffer.length, system_charset_info); + sp_sql->length(0); + + + sp_sql->append(STRING_WITH_LEN("CREATE ")); + sp_sql->append(STRING_WITH_LEN("PROCEDURE ")); + /* + Let's use the same name as the event name to perhaps produce a + better error message in case it is a part of some parse error. + We're using append_identifier here to successfully parse + events with reserved names. + */ + append_identifier(thd, sp_sql, &name); + + /* + The default SQL security of a stored procedure is DEFINER. We + have already activated the security context of the event, so + let's execute the procedure with the invoker rights to save on + resets of security contexts. + */ + sp_sql->append(STRING_WITH_LEN("() SQL SECURITY INVOKER ")); + + if (thd->variables.sql_mode & MODE_ORACLE) + sp_sql->append(STRING_WITH_LEN(" AS BEGIN ")); + sp_sql->append(&body); + if (thd->variables.sql_mode & MODE_ORACLE) + sp_sql->append(STRING_WITH_LEN("; END")); + + DBUG_RETURN(thd->is_fatal_error); +} + + +/** + Get DROP EVENT statement to binlog the drop of ON COMPLETION NOT + PRESERVE event. +*/ + +bool +Event_job_data::construct_drop_event_sql(THD *thd, String *sp_sql) +{ + LEX_CSTRING buffer; + const uint STATIC_SQL_LENGTH= 14; + + DBUG_ENTER("Event_job_data::construct_drop_event_sql"); + + buffer.length= STATIC_SQL_LENGTH + name.length*2 + dbname.length*2; + if (! (buffer.str= (char*) thd->alloc(buffer.length))) + DBUG_RETURN(TRUE); + + sp_sql->set(buffer.str, buffer.length, system_charset_info); + sp_sql->length(0); + + sp_sql->append(STRING_WITH_LEN("DROP EVENT ")); + append_identifier(thd, sp_sql, &dbname); + sp_sql->append('.'); + append_identifier(thd, sp_sql, &name); + + DBUG_RETURN(thd->is_fatal_error); +} + +/** + Compiles and executes the event (the underlying sp_head object) + + @retval TRUE error (reported to the error log) + @retval FALSE success +*/ + +bool +Event_job_data::execute(THD *thd, bool drop) +{ + String sp_sql; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Security_context event_sctx, *save_sctx= NULL; +#endif + List empty_item_list; + bool ret= TRUE; + + DBUG_ENTER("Event_job_data::execute"); + + thd->reset_for_next_command(); + +#ifdef WITH_WSREP + wsrep_open(thd); + wsrep_before_command(thd); +#endif /* WITH_WSREP */ + /* + MySQL parser currently assumes that current database is either + present in THD or all names in all statements are fully specified. + And yet not fully specified names inside stored programs must be + be supported, even if the current database is not set: + CREATE PROCEDURE db1.p1() BEGIN CREATE TABLE t1; END// + -- in this example t1 should be always created in db1 and the statement + must parse even if there is no current database. + + To support this feature and still address the parser limitation, + we need to set the current database here. + We don't have to call mysql_change_db, since the checks performed + in it are unnecessary for the purpose of parsing, and + mysql_change_db will be invoked anyway later, to activate the + procedure database before it's executed. + */ + thd->set_db(&dbname); + + lex_start(thd); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (event_sctx.change_security_context(thd, + &definer_user, &definer_host, + &dbname, &save_sctx)) + { + sql_print_error("Event Scheduler: " + "[%s].[%s.%s] execution failed, " + "failed to authenticate the user.", + definer.str, dbname.str, name.str); + goto end; + } +#endif + + if (check_access(thd, EVENT_ACL, dbname.str, NULL, NULL, 0, 0)) + { + /* + This aspect of behavior is defined in the worklog, + and this is how triggers work too: if TRIGGER + privilege is revoked from trigger definer, + triggers are not executed. + */ + sql_print_error("Event Scheduler: " + "[%s].[%s.%s] execution failed, " + "user no longer has EVENT privilege.", + definer.str, dbname.str, name.str); + goto end; + } + + /* + Set up global thread attributes to reflect the properties of + this Event. We can simply reset these instead of usual + backup/restore employed in stored programs since we know that + this is a top level statement and the worker thread is + allocated exclusively to execute this event. + */ + + thd->variables.sql_mode= sql_mode; + thd->variables.time_zone= time_zone; + + if (construct_sp_sql(thd, &sp_sql)) + goto end; + + thd->set_query(sp_sql.c_ptr_safe(), sp_sql.length()); + + { + Parser_state parser_state; + sql_digest_state *parent_digest= thd->m_digest; + PSI_statement_locker *parent_locker= thd->m_statement_psi; + bool res; + + if (parser_state.init(thd, thd->query(), thd->query_length())) + goto end; + + thd->m_digest= NULL; + thd->m_statement_psi= NULL; + res= parse_sql(thd, & parser_state, creation_ctx); + thd->m_digest= parent_digest; + thd->m_statement_psi= parent_locker; + + if (res) + { + sql_print_error("Event Scheduler: %serror during compilation of %s.%s", + thd->is_fatal_error ? "fatal " : "", dbname.str, name.str); + goto end; + } + } + + { + sp_head *sphead= thd->lex->sphead; + + DBUG_ASSERT(sphead); + + sphead->m_flags|= sp_head::LOG_SLOW_STATEMENTS; + sphead->m_flags|= sp_head::LOG_GENERAL_LOG; + + /* + construct_sp_sql() + parse_sql() set suid to SP_IS_NOT_SUID, + because we have the security context already set to the event + definer here. See more comments in construct_sp_sql(). + */ + DBUG_ASSERT(sphead->suid() == SP_IS_NOT_SUID); + sphead->m_sql_mode= sql_mode; + sphead->set_creation_ctx(creation_ctx); + sphead->optimize(); + + sphead->m_sp_share= MYSQL_GET_SP_SHARE(SP_TYPE_EVENT, + dbname.str, static_cast(dbname.length), + name.str, static_cast(name.length)); + ret= sphead->execute_procedure(thd, &empty_item_list); + /* + There is no pre-locking and therefore there should be no + tables open and locked left after execute_procedure. + */ + } + +end: + if (drop && likely(!thd->is_fatal_error)) + { + /* + We must do it here since here we're under the right authentication + ID of the event definer. + */ + sql_print_information("Event Scheduler: Dropping %s.%s", + (const char *) dbname.str, (const char *) name.str); + /* + Construct a query for the binary log, to ensure the event is dropped + on the slave + */ + if (construct_drop_event_sql(thd, &sp_sql)) + ret= 1; + else + { + thd->set_query(sp_sql.c_ptr_safe(), sp_sql.length()); + + /* + NOTE: even if we run in read-only mode, we should be able to lock + the mysql.event table for writing. In order to achieve this, we + should call mysql_lock_tables() under the super-user. + + Same goes for transaction access mode. + Temporarily reset it to read-write. + */ + + privilege_t saved_master_access(thd->security_ctx->master_access); + thd->security_ctx->master_access |= PRIV_IGNORE_READ_ONLY; + bool save_tx_read_only= thd->tx_read_only; + thd->tx_read_only= false; + + /* + This code is processing event execution and does not have client + connection. Here, event execution will now execute a prepared + DROP EVENT statement, but thd->lex->sql_command is set to + SQLCOM_CREATE_PROCEDURE + DROP EVENT will be logged in binlog, and we have to + replicate it to make all nodes have consistent event definitions + Wsrep DDL replication is triggered inside Events::drop_event(), + and here we need to prepare the THD so that DDL replication is + possible, essentially it requires setting sql_command to + SQLCOMM_DROP_EVENT, we will switch sql_command for the duration + of DDL replication only. + */ + const enum_sql_command sql_command_save= thd->lex->sql_command; + const bool sql_command_set= WSREP(thd); + + if (sql_command_set) + thd->lex->sql_command = SQLCOM_DROP_EVENT; + + ret= Events::drop_event(thd, &dbname, &name, FALSE); + + if (sql_command_set) + { +#ifdef WITH_WSREP + wsrep_to_isolation_end(thd); +#endif + thd->lex->sql_command = sql_command_save; + } + + thd->tx_read_only= save_tx_read_only; + thd->security_ctx->master_access= saved_master_access; + } + } +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (save_sctx) + event_sctx.restore_security_context(thd, save_sctx); +#endif +#ifdef WITH_WSREP + wsrep_after_command_ignore_result(thd); + wsrep_close(thd); +#endif /* WITH_WSREP */ + thd->lex->unit.cleanup(); + thd->end_statement(); + thd->cleanup_after_query(); + /* Avoid races with SHOW PROCESSLIST */ + thd->reset_query(); + + DBUG_PRINT("info", ("EXECUTED %s.%s ret: %d", dbname.str, name.str, ret)); + + DBUG_RETURN(ret); +} + + +/* + Checks whether two events are in the same schema + + SYNOPSIS + event_basic_db_equal() + db Schema + et Compare et->dbname to `db` + + RETURN VALUE + TRUE Equal + FALSE Not equal +*/ + +bool +event_basic_db_equal(const LEX_CSTRING *db, Event_basic *et) +{ + return !sortcmp_lex_string(&et->dbname, db, system_charset_info); +} + + +/* + Checks whether an event has equal `db` and `name` + + SYNOPSIS + event_basic_identifier_equal() + db Schema + name Name + et The event object + + RETURN VALUE + TRUE Equal + FALSE Not equal +*/ + +bool +event_basic_identifier_equal(const LEX_CSTRING *db, const LEX_CSTRING *name, + Event_basic *b) +{ + return !sortcmp_lex_string(name, &b->name, system_charset_info) && + !sortcmp_lex_string(db, &b->dbname, system_charset_info); +} + +/** + @} (End of group Event_Scheduler) +*/ diff --git a/sql/event_data_objects.h b/sql/event_data_objects.h new file mode 100644 index 00000000..c51d5433 --- /dev/null +++ b/sql/event_data_objects.h @@ -0,0 +1,207 @@ +#ifndef _EVENT_DATA_OBJECTS_H_ +#define _EVENT_DATA_OBJECTS_H_ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @addtogroup Event_Scheduler + @{ + + @file event_data_objects.h +*/ + +#include "event_parse_data.h" +#include "thr_lock.h" /* thr_lock_type */ + +class Field; +class THD; +class Time_zone; +struct TABLE; + +void init_scheduler_psi_keys(void); + +class Event_queue_element_for_exec +{ +public: + Event_queue_element_for_exec() : dbname{nullptr, 0}, name{nullptr, 0} {} + ~Event_queue_element_for_exec(); + + bool + init(const LEX_CSTRING &dbname, const LEX_CSTRING &name); + + LEX_CSTRING dbname; + LEX_CSTRING name; + bool dropped; + THD *thd; + +private: + /* Prevent use of these */ + Event_queue_element_for_exec(const Event_queue_element_for_exec &); + void operator=(Event_queue_element_for_exec &); +#ifdef HAVE_PSI_INTERFACE +public: + PSI_statement_info* get_psi_info() + { + return & psi_info; + } + + static PSI_statement_info psi_info; +#endif +}; + + +class Event_basic +{ +protected: + MEM_ROOT mem_root; + +public: + + LEX_CSTRING dbname; + LEX_CSTRING name; + LEX_CSTRING definer;// combination of user and host + + Time_zone *time_zone; + + Event_basic(); + virtual ~Event_basic(); + + virtual bool + load_from_row(THD *thd, TABLE *table) = 0; + +protected: + bool + load_string_fields(Field **fields, ...); + + bool + load_time_zone(THD *thd, const LEX_CSTRING *tz_name); +}; + + + +class Event_queue_element : public Event_basic +{ +public: + int on_completion; + int status; + uint32 originator; + + my_time_t last_executed; + my_time_t execute_at; + my_time_t starts; + my_time_t ends; + bool starts_null; + bool ends_null; + bool execute_at_null; + + longlong expression; + interval_type interval; + + bool dropped; + + uint execution_count; + + Event_queue_element(); + virtual ~Event_queue_element(); + + virtual bool + load_from_row(THD *thd, TABLE *table); + + bool + compute_next_execution_time(); + + void + mark_last_executed(THD *thd); +}; + + +class Event_timed : public Event_queue_element +{ + Event_timed(const Event_timed &); /* Prevent use of these */ + void operator=(Event_timed &); + +public: + LEX_CSTRING body; + + LEX_CSTRING definer_user; + LEX_CSTRING definer_host; + + LEX_CSTRING comment; + + ulonglong created; + ulonglong modified; + + sql_mode_t sql_mode; + + class Stored_program_creation_ctx *creation_ctx; + LEX_CSTRING body_utf8; + + Event_timed(); + virtual ~Event_timed(); + + void + init(); + + virtual bool + load_from_row(THD *thd, TABLE *table); + + int + get_create_event(THD *thd, String *buf); +}; + + +class Event_job_data : public Event_basic +{ +public: + LEX_CSTRING body; + LEX_CSTRING definer_user; + LEX_CSTRING definer_host; + + sql_mode_t sql_mode; + + class Stored_program_creation_ctx *creation_ctx; + + Event_job_data(); + + virtual bool + load_from_row(THD *thd, TABLE *table); + + bool + execute(THD *thd, bool drop); +private: + bool + construct_sp_sql(THD *thd, String *sp_sql); + bool + construct_drop_event_sql(THD *thd, String *sp_sql); + + Event_job_data(const Event_job_data &); /* Prevent use of these */ + void operator=(Event_job_data &); +}; + + +/* Compares only the schema part of the identifier */ +bool +event_basic_db_equal(const LEX_CSTRING *db, Event_basic *et); + +/* Compares the whole identifier*/ +bool +event_basic_identifier_equal(const LEX_CSTRING *db, const LEX_CSTRING *name, + Event_basic *b); + +/** + @} (End of group Event_Scheduler) +*/ + +#endif /* _EVENT_DATA_OBJECTS_H_ */ diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc new file mode 100644 index 00000000..ad9f1c2c --- /dev/null +++ b/sql/event_db_repository.cc @@ -0,0 +1,1217 @@ +/* + Copyright (c) 2006, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_base.h" // close_thread_tables +#include "sql_parse.h" +#include "event_db_repository.h" +#include "key.h" // key_copy +#include "sql_db.h" // get_default_db_collation +#include "sql_time.h" // interval_type_to_name +#include "tztime.h" // struct Time_zone +#include "records.h" // init_read_record, end_read_record +#include "sp_head.h" +#include "event_data_objects.h" +#include "events.h" +#include "sql_show.h" +#include "lock.h" // MYSQL_LOCK_IGNORE_TIMEOUT +#include "transaction.h" + +/** + @addtogroup Event_Scheduler + @{ +*/ + +static +const TABLE_FIELD_TYPE event_table_fields[ET_FIELD_COUNT] = +{ + { + { STRING_WITH_LEN("db") }, + { STRING_WITH_LEN("char(64)") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("name") }, + { STRING_WITH_LEN("char(64)") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("body") }, + { STRING_WITH_LEN("longblob") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("definer") }, + { STRING_WITH_LEN("varchar(") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("execute_at") }, + { STRING_WITH_LEN("datetime") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("interval_value") }, + { STRING_WITH_LEN("int(11)") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("interval_field") }, + { STRING_WITH_LEN("enum('YEAR','QUARTER','MONTH','DAY'," + "'HOUR','MINUTE','WEEK','SECOND','MICROSECOND','YEAR_MONTH','DAY_HOUR'," + "'DAY_MINUTE','DAY_SECOND','HOUR_MINUTE','HOUR_SECOND','MINUTE_SECOND'," + "'DAY_MICROSECOND','HOUR_MICROSECOND','MINUTE_MICROSECOND'," + "'SECOND_MICROSECOND')") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("created") }, + { STRING_WITH_LEN("timestamp") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("modified") }, + { STRING_WITH_LEN("timestamp") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("last_executed") }, + { STRING_WITH_LEN("datetime") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("starts") }, + { STRING_WITH_LEN("datetime") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("ends") }, + { STRING_WITH_LEN("datetime") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("status") }, + { STRING_WITH_LEN("enum('ENABLED','DISABLED','SLAVESIDE_DISABLED')") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("on_completion") }, + { STRING_WITH_LEN("enum('DROP','PRESERVE')") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("sql_mode") }, + { STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES'," + "'IGNORE_SPACE','IGNORE_BAD_TABLE_OPTIONS','ONLY_FULL_GROUP_BY'," + "'NO_UNSIGNED_SUBTRACTION'," + "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB'," + "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40'," + "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES'," + "'STRICT_ALL_TABLES','NO_ZERO_IN_DATE','NO_ZERO_DATE','INVALID_DATES'," + "'ERROR_FOR_DIVISION_BY_ZERO','TRADITIONAL','NO_AUTO_CREATE_USER'," + "'HIGH_NOT_PRECEDENCE','NO_ENGINE_SUBSTITUTION','PAD_CHAR_TO_FULL_LENGTH'," + "'EMPTY_STRING_IS_NULL','SIMULTANEOUS_ASSIGNMENT')") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("comment") }, + { STRING_WITH_LEN("char(64)") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("originator") }, + { STRING_WITH_LEN("int(10)") }, + {NULL, 0} + }, + { + { STRING_WITH_LEN("time_zone") }, + { STRING_WITH_LEN("char(64)") }, + { STRING_WITH_LEN("latin1") } + }, + { + { STRING_WITH_LEN("character_set_client") }, + { STRING_WITH_LEN("char(32)") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("collation_connection") }, + { STRING_WITH_LEN("char(") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("db_collation") }, + { STRING_WITH_LEN("char(") }, + { STRING_WITH_LEN("utf8mb") } + }, + { + { STRING_WITH_LEN("body_utf8") }, + { STRING_WITH_LEN("longblob") }, + { NULL, 0 } + } +}; + +static LEX_CSTRING MYSQL_EVENT_NAME= { STRING_WITH_LEN("event") }; + +static const TABLE_FIELD_DEF +event_table_def= {ET_FIELD_COUNT, event_table_fields, 0, (uint*) 0}; + +/** In case of an error, a message is printed to the error log. */ +static Table_check_intact_log_error table_intact; + + +/** + Puts some data common to CREATE and ALTER EVENT into a row. + + Used both when an event is created and when it is altered. + + @param thd THD + @param table The row to fill out + @param et Event's data + @param sp Event stored routine + @param is_update CREATE EVENT or ALTER EVENT + + @retval FALSE success + @retval TRUE error +*/ + +static bool +mysql_event_fill_row(THD *thd, + TABLE *table, + Event_parse_data *et, + sp_head *sp, + sql_mode_t sql_mode, + my_bool is_update) +{ + CHARSET_INFO *scs= system_charset_info; + enum enum_events_table_field f_num; + Field **fields= table->field; + int rs= FALSE; + + DBUG_ENTER("mysql_event_fill_row"); + + DBUG_PRINT("info", ("dbname=[%s]", et->dbname.str)); + DBUG_PRINT("info", ("name =[%s]", et->name.str)); + + DBUG_ASSERT(et->on_completion != Event_parse_data::ON_COMPLETION_DEFAULT); + + if (table->s->fields < ET_FIELD_COUNT) + { + /* + Safety: this can only happen if someone started the server + and then altered mysql.event. + */ + my_error(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2, MYF(0), + table->s->db.str, table->alias.c_ptr(), + (int) ET_FIELD_COUNT, table->s->fields); + DBUG_RETURN(TRUE); + } + + if (fields[f_num= ET_FIELD_DEFINER]-> + store(et->definer.str, et->definer.length, scs)) + goto err_truncate; + + if (fields[f_num= ET_FIELD_DB]->store(et->dbname.str, et->dbname.length, scs)) + goto err_truncate; + + if (fields[f_num= ET_FIELD_NAME]->store(et->name.str, et->name.length, scs)) + goto err_truncate; + + /* ON_COMPLETION field is NOT NULL thus not calling set_notnull()*/ + rs|= fields[ET_FIELD_ON_COMPLETION]->store((longlong)et->on_completion, TRUE); + + /* + Set STATUS value unconditionally in case of CREATE EVENT. + For ALTER EVENT set it only if value of this field was changed. + Since STATUS field is NOT NULL call to set_notnull() is not needed. + */ + if (!is_update || et->status_changed) + rs|= fields[ET_FIELD_STATUS]->store((longlong)et->status, TRUE); + rs|= fields[ET_FIELD_ORIGINATOR]->store((longlong)et->originator, TRUE); + + if (!is_update) + rs|= fields[ET_FIELD_CREATED]->set_time(); + + /* + Change the SQL_MODE only if body was present in an ALTER EVENT and of course + always during CREATE EVENT. + */ + if (et->body_changed) + { + DBUG_ASSERT(sp->m_body.str); + + rs|= fields[ET_FIELD_SQL_MODE]->store((longlong)sql_mode, TRUE); + + if (fields[f_num= ET_FIELD_BODY]->store(sp->m_body.str, + sp->m_body.length, + scs)) + { + goto err_truncate; + } + } + + if (et->expression) + { + const String *tz_name= thd->variables.time_zone->get_name(); + if (!is_update || !et->starts_null) + { + fields[ET_FIELD_TIME_ZONE]->set_notnull(); + rs|= fields[ET_FIELD_TIME_ZONE]->store(tz_name->ptr(), tz_name->length(), + tz_name->charset()); + } + + fields[ET_FIELD_INTERVAL_EXPR]->set_notnull(); + rs|= fields[ET_FIELD_INTERVAL_EXPR]->store((longlong)et->expression, TRUE); + + fields[ET_FIELD_TRANSIENT_INTERVAL]->set_notnull(); + + rs|= fields[ET_FIELD_TRANSIENT_INTERVAL]-> + store(interval_type_to_name[et->interval].str, + interval_type_to_name[et->interval].length, + scs); + + fields[ET_FIELD_EXECUTE_AT]->set_null(); + + if (!et->starts_null) + { + MYSQL_TIME time; + my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->starts); + + fields[ET_FIELD_STARTS]->set_notnull(); + fields[ET_FIELD_STARTS]->store_time(&time); + } + + if (!et->ends_null) + { + MYSQL_TIME time; + my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->ends); + + fields[ET_FIELD_ENDS]->set_notnull(); + fields[ET_FIELD_ENDS]->store_time(&time); + } + } + else if (et->execute_at) + { + const String *tz_name= thd->variables.time_zone->get_name(); + fields[ET_FIELD_TIME_ZONE]->set_notnull(); + rs|= fields[ET_FIELD_TIME_ZONE]->store(tz_name->ptr(), tz_name->length(), + tz_name->charset()); + + fields[ET_FIELD_INTERVAL_EXPR]->set_null(); + fields[ET_FIELD_TRANSIENT_INTERVAL]->set_null(); + fields[ET_FIELD_STARTS]->set_null(); + fields[ET_FIELD_ENDS]->set_null(); + + MYSQL_TIME time; + my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->execute_at); + + fields[ET_FIELD_EXECUTE_AT]->set_notnull(); + fields[ET_FIELD_EXECUTE_AT]->store_time(&time); + } + else + { + DBUG_ASSERT(is_update); + /* + it is normal to be here when the action is update + this is an error if the action is create. something is borked + */ + } + + rs|= fields[ET_FIELD_MODIFIED]->set_time(); + + if (et->comment.str) + { + if (fields[f_num= ET_FIELD_COMMENT]-> + store(et->comment.str, et->comment.length, scs)) + goto err_truncate; + } + + fields[ET_FIELD_CHARACTER_SET_CLIENT]->set_notnull(); + rs|= fields[ET_FIELD_CHARACTER_SET_CLIENT]-> + store(&thd->variables.character_set_client->cs_name, + system_charset_info); + + fields[ET_FIELD_COLLATION_CONNECTION]->set_notnull(); + rs|= fields[ET_FIELD_COLLATION_CONNECTION]-> + store(&thd->variables.collation_connection->coll_name, + system_charset_info); + + { + CHARSET_INFO *db_cl= get_default_db_collation(thd, et->dbname.str); + + fields[ET_FIELD_DB_COLLATION]->set_notnull(); + rs|= fields[ET_FIELD_DB_COLLATION]->store(&db_cl->coll_name, + system_charset_info); + } + + if (et->body_changed) + { + fields[ET_FIELD_BODY_UTF8]->set_notnull(); + rs|= fields[ET_FIELD_BODY_UTF8]->store(&sp->m_body_utf8, + system_charset_info); + } + + if (rs) + { + my_error(ER_EVENT_STORE_FAILED, MYF(0), fields[f_num]->field_name.str, rs); + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); + +err_truncate: + my_error(ER_EVENT_DATA_TOO_LONG, MYF(0), fields[f_num]->field_name.str); + DBUG_RETURN(TRUE); +} + + +/* + Performs an index scan of event_table (mysql.event) and fills schema_table. + + SYNOPSIS + Event_db_repository::index_read_for_db_for_i_s() + thd Thread + schema_table The I_S.EVENTS table + event_table The event table to use for loading (mysql.event) + db For which schema to do an index scan. + + RETURN VALUE + 0 OK + 1 Error +*/ + +bool +Event_db_repository::index_read_for_db_for_i_s(THD *thd, TABLE *schema_table, + TABLE *event_table, + const char *db) +{ + CHARSET_INFO *scs= system_charset_info; + KEY *key_info; + uint key_len; + uchar *key_buf; + DBUG_ENTER("Event_db_repository::index_read_for_db_for_i_s"); + + DBUG_PRINT("info", ("Using prefix scanning on PK")); + + int ret= event_table->file->ha_index_init(0, 1); + if (ret) + { + event_table->file->print_error(ret, MYF(0)); + DBUG_RETURN(true); + } + + key_info= event_table->key_info; + + if (key_info->user_defined_key_parts == 0 || + key_info->key_part[0].field != event_table->field[ET_FIELD_DB]) + { + /* Corrupted table: no index or index on a wrong column */ + my_error(ER_CANNOT_LOAD_FROM_TABLE_V2, MYF(0), "mysql", "event"); + ret= 1; + goto end; + } + + event_table->field[ET_FIELD_DB]->store(db, strlen(db), scs); + key_len= key_info->key_part[0].store_length; + + if (!(key_buf= (uchar *)alloc_root(thd->mem_root, key_len))) + { + /* Don't send error, it would be done by sql_alloc_error_handler() */ + ret= 1; + goto end; + } + + key_copy(key_buf, event_table->record[0], key_info, key_len); + if (!(ret= event_table->file->ha_index_read_map(event_table->record[0], + key_buf, + (key_part_map)1, + HA_READ_KEY_EXACT))) + { + DBUG_PRINT("info",("Found rows. Let's retrieve them. ret=%d", ret)); + do + { + ret= copy_event_to_schema_table(thd, schema_table, event_table); + if (ret == 0) + ret= event_table->file->ha_index_next_same(event_table->record[0], + key_buf, key_len); + } while (ret == 0); + } + DBUG_PRINT("info", ("Scan finished. ret=%d", ret)); + + /* ret is guaranteed to be != 0 */ + if (ret == HA_ERR_END_OF_FILE || ret == HA_ERR_KEY_NOT_FOUND) + ret= 0; + else + event_table->file->print_error(ret, MYF(0)); + +end: + event_table->file->ha_index_end(); + + DBUG_RETURN(MY_TEST(ret)); +} + + +/* + Performs a table scan of event_table (mysql.event) and fills schema_table. + + SYNOPSIS + Events_db_repository::table_scan_all_for_i_s() + thd Thread + schema_table The I_S.EVENTS in memory table + event_table The event table to use for loading. + + RETURN VALUE + FALSE OK + TRUE Error +*/ + +bool +Event_db_repository::table_scan_all_for_i_s(THD *thd, TABLE *schema_table, + TABLE *event_table) +{ + int ret; + READ_RECORD read_record_info; + DBUG_ENTER("Event_db_repository::table_scan_all_for_i_s"); + + if (init_read_record(&read_record_info, thd, event_table, NULL, NULL, 1, 0, + FALSE)) + DBUG_RETURN(TRUE); + + /* + rr_sequential, in read_record(), returns 137==HA_ERR_END_OF_FILE, + but rr_handle_error returns -1 for that reason. Thus, read_record() + returns -1 eventually. + */ + do + { + ret= read_record_info.read_record(); + if (ret == 0) + ret= copy_event_to_schema_table(thd, schema_table, event_table); + } while (ret == 0); + + DBUG_PRINT("info", ("Scan finished. ret=%d", ret)); + end_read_record(&read_record_info); + + /* ret is guaranteed to be != 0 */ + DBUG_RETURN(ret == -1? FALSE:TRUE); +} + + +/** + Fills I_S.EVENTS with data loaded from mysql.event. Also used by + SHOW EVENTS + + The reason we reset and backup open tables here is that this + function may be called from any query that accesses + INFORMATION_SCHEMA - including a query that is issued from + a pre-locked statement, one that already has open and locked + tables. + + @retval FALSE success + @retval TRUE error +*/ + +bool +Event_db_repository::fill_schema_events(THD *thd, TABLE_LIST *i_s_table, + const char *db) +{ + TABLE *schema_table= i_s_table->table; + TABLE_LIST event_table; + int ret= 0; + DBUG_ENTER("Event_db_repository::fill_schema_events"); + DBUG_PRINT("info",("db=%s", db? db:"(null)")); + + start_new_trans new_trans(thd); + + event_table.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_EVENT_NAME, 0, TL_READ); + + if (open_system_tables_for_read(thd, &event_table)) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(TRUE); + } + + if (table_intact.check(event_table.table, &event_table_def)) + { + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + ret= 1; + goto err; + } + + /* + 1. SELECT I_S => use table scan. I_S.EVENTS does not guarantee order + thus we won't order it. OTOH, SHOW EVENTS will be + ordered. + 2. SHOW EVENTS => PRIMARY KEY with prefix scanning on (db) + Reasoning: Events are per schema, therefore a scan over an index + will save use from doing a table scan and comparing + every single row's `db` with the schema which we show. + */ + if (db) + ret= index_read_for_db_for_i_s(thd, schema_table, event_table.table, db); + else + ret= table_scan_all_for_i_s(thd, schema_table, event_table.table); + +err: + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + + DBUG_PRINT("info", ("Return code=%d", ret)); + DBUG_RETURN(ret); +} + + +/** + Open mysql.event table for read. + + It's assumed that the caller knows what they are doing: + - whether it was necessary to reset-and-backup the open tables state + - whether the requested lock does not lead to a deadlock + - whether this open mode would work under LOCK TABLES, or inside a + stored function or trigger. + + Note that if the table can't be locked successfully this operation will + close it. Therefore it provides guarantee that it either opens and locks + table or fails without leaving any tables open. + + @param[in] thd Thread context + @param[in] lock_type How to lock the table + @param[out] table We will store the open table here + + @retval TRUE open and lock failed - an error message is pushed into the + stack + @retval FALSE success +*/ + +bool +Event_db_repository::open_event_table(THD *thd, enum thr_lock_type lock_type, + TABLE **table) +{ + TABLE_LIST tables; + DBUG_ENTER("Event_db_repository::open_event_table"); + + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_EVENT_NAME, 0, lock_type); + + if (open_and_lock_tables(thd, &tables, FALSE, MYSQL_LOCK_IGNORE_TIMEOUT)) + DBUG_RETURN(TRUE); + + *table= tables.table; + tables.table->use_all_columns(); + /* NOTE: &tables pointer will be invalid after return */ + tables.table->pos_in_table_list= NULL; + + if (table_intact.check(*table, &event_table_def)) + { + thd->commit_whole_transaction_and_close_tables(); + *table= 0; // Table is now closed + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/** + Creates an event record in mysql.event table. + + Creates an event. Relies on mysql_event_fill_row which is shared with + ::update_event. + + @pre All semantic checks must be performed outside. This function + only creates a record on disk. + @pre The thread handle has no open tables. + + @param[in,out] thd THD + @param[in] parse_data Parsed event definition + @param[in] create_if_not TRUE if IF NOT EXISTS clause was provided + to CREATE EVENT statement + @param[out] event_already_exists When method is completed successfully + set to true if event already exists else + set to false + @retval FALSE success + @retval TRUE error +*/ + +bool +Event_db_repository::create_event(THD *thd, Event_parse_data *parse_data, + bool *event_already_exists) +{ + int ret= 1; + TABLE *table= NULL; + sp_head *sp= thd->lex->sphead; + sql_mode_t saved_mode= thd->variables.sql_mode; + /* + Take a savepoint to release only the lock on mysql.event + table at the end but keep the global read lock and + possible other locks taken by the caller. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + DBUG_ENTER("Event_db_repository::create_event"); + + DBUG_PRINT("info", ("open mysql.event for update")); + DBUG_ASSERT(sp); + + /* Reset sql_mode during data dictionary operations. */ + thd->variables.sql_mode= 0; + + if (open_event_table(thd, TL_WRITE, &table)) + goto end; + + DBUG_PRINT("info", ("name: %.*s", (int) parse_data->name.length, + parse_data->name.str)); + + DBUG_PRINT("info", ("check existence of an event with the same name")); + if (!find_named_event(&parse_data->dbname, &parse_data->name, table)) + { + if (thd->lex->create_info.or_replace()) + { + *event_already_exists= false; // Force the caller to update event_queue + if ((ret= table->file->ha_delete_row(table->record[0]))) + { + table->file->print_error(ret, MYF(0)); + goto end; + } + } + else if (thd->lex->create_info.if_not_exists()) + { + *event_already_exists= true; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_EVENT_ALREADY_EXISTS, + ER_THD(thd, ER_EVENT_ALREADY_EXISTS), + parse_data->name.str); + ret= 0; + goto end; + } + else + { + my_error(ER_EVENT_ALREADY_EXISTS, MYF(0), parse_data->name.str); + goto end; + } + } else + *event_already_exists= false; + + DBUG_PRINT("info", ("non-existent, go forward")); + + restore_record(table, s->default_values); // Get default values for fields + + if (check_string_char_length(&parse_data->dbname, 0, + table->field[ET_FIELD_DB]->char_length(), + system_charset_info, 1)) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), parse_data->dbname.str); + goto end; + } + + if (check_string_char_length(&parse_data->name, 0, + table->field[ET_FIELD_NAME]->char_length(), + system_charset_info, 1)) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), parse_data->name.str); + goto end; + } + + if (sp->m_body.length > table->field[ET_FIELD_BODY]->field_length) + { + my_error(ER_TOO_LONG_BODY, MYF(0), parse_data->name.str); + goto end; + } + + /* + mysql_event_fill_row() calls my_error() in case of error so no need to + handle it here + */ + if (mysql_event_fill_row(thd, table, parse_data, sp, saved_mode, FALSE)) + goto end; + + if ((ret= table->file->ha_write_row(table->record[0]))) + { + table->file->print_error(ret, MYF(0)); + goto end; + } + ret= 0; + +end: + if (table) + thd->commit_whole_transaction_and_close_tables(); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + + thd->variables.sql_mode= saved_mode; + DBUG_RETURN(MY_TEST(ret)); +} + + +/** + Used to execute ALTER EVENT. Pendant to Events::update_event(). + + @param[in,out] thd thread handle + @param[in] parse_data parsed event definition + @param[in] new_dbname not NULL if ALTER EVENT RENAME + points at a new database name + @param[in] new_name not NULL if ALTER EVENT RENAME + points at a new event name + + @pre All semantic checks are performed outside this function, + it only updates the event definition on disk. + @pre We don't have any tables open in the given thread. + + @retval FALSE success + @retval TRUE error (reported) +*/ + +bool +Event_db_repository::update_event(THD *thd, Event_parse_data *parse_data, + LEX_CSTRING *new_dbname, + LEX_CSTRING *new_name) +{ + CHARSET_INFO *scs= system_charset_info; + TABLE *table= NULL; + sp_head *sp= thd->lex->sphead; + sql_mode_t saved_mode= thd->variables.sql_mode; + /* + Take a savepoint to release only the lock on mysql.event + table at the end but keep the global read lock and + possible other locks taken by the caller. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + int ret= 1; + DBUG_ENTER("Event_db_repository::update_event"); + + /* None or both must be set */ + DBUG_ASSERT((new_dbname && new_name) || new_dbname == new_name); + + /* Reset sql_mode during data dictionary operations. */ + thd->variables.sql_mode= 0; + + if (open_event_table(thd, TL_WRITE, &table)) + goto end; + + DBUG_PRINT("info", ("dbname: %s", parse_data->dbname.str)); + DBUG_PRINT("info", ("name: %s", parse_data->name.str)); + DBUG_PRINT("info", ("user: %s", parse_data->definer.str)); + + /* first look whether we overwrite */ + if (new_name) + { + DBUG_PRINT("info", ("rename to: %s@%s", new_dbname->str, new_name->str)); + if (!find_named_event(new_dbname, new_name, table)) + { + my_error(ER_EVENT_ALREADY_EXISTS, MYF(0), new_name->str); + goto end; + } + } + /* + ...and then if there is such an event. Don't exchange the blocks + because you will get error 120 from table handler because new_name will + overwrite the key and SE will tell us that it cannot find the already found + row (copied into record[1] later + */ + if (find_named_event(&parse_data->dbname, &parse_data->name, table)) + { + my_error(ER_EVENT_DOES_NOT_EXIST, MYF(0), parse_data->name.str); + goto end; + } + + store_record(table,record[1]); + + /* + We check whether ALTER EVENT was given dates that are in the past. + However to know how to react, we need the ON COMPLETION type. The + check is deferred to this point because by now we have the previous + setting (from the event-table) to fall back on if nothing was specified + in the ALTER EVENT-statement. + */ + + if (parse_data->check_dates(thd, + (int) table->field[ET_FIELD_ON_COMPLETION]->val_int())) + goto end; + + /* + mysql_event_fill_row() calls my_error() in case of error so no need to + handle it here + */ + if (mysql_event_fill_row(thd, table, parse_data, sp, saved_mode, TRUE)) + goto end; + + if (new_dbname) + { + table->field[ET_FIELD_DB]->store(new_dbname->str, new_dbname->length, scs); + table->field[ET_FIELD_NAME]->store(new_name->str, new_name->length, scs); + } + + if ((ret= table->file->ha_update_row(table->record[1], table->record[0]))) + { + table->file->print_error(ret, MYF(0)); + goto end; + } + ret= 0; + +end: + if (table) + thd->commit_whole_transaction_and_close_tables(); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + + thd->variables.sql_mode= saved_mode; + DBUG_RETURN(MY_TEST(ret)); +} + + +/** + Delete event record from mysql.event table. + + @param[in,out] thd thread handle + @param[in] db Database name + @param[in] name Event name + @param[in] drop_if_exists DROP IF EXISTS clause was specified. + If set, and the event does not exist, + the error is downgraded to a warning. + + @retval FALSE success + @retval TRUE error (reported) +*/ + +bool +Event_db_repository::drop_event(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *name, + bool drop_if_exists) +{ + TABLE *table= NULL; + /* + Take a savepoint to release only the lock on mysql.event + table at the end but keep the global read lock and + possible other locks taken by the caller. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + int ret= 1; + + DBUG_ENTER("Event_db_repository::drop_event"); + DBUG_PRINT("enter", ("%s@%s", db->str, name->str)); + + if (open_event_table(thd, TL_WRITE, &table)) + goto end; + + if (!find_named_event(db, name, table)) + { + if ((ret= table->file->ha_delete_row(table->record[0]))) + table->file->print_error(ret, MYF(0)); + goto end; + } + + /* Event not found */ + if (!drop_if_exists) + { + my_error(ER_EVENT_DOES_NOT_EXIST, MYF(0), name->str); + goto end; + } + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SP_DOES_NOT_EXIST, ER_THD(thd, ER_SP_DOES_NOT_EXIST), + "Event", name->str); + ret= 0; + +end: + if (table) + thd->commit_whole_transaction_and_close_tables(); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + + DBUG_RETURN(MY_TEST(ret)); +} + + +/** + Positions the internal pointer of `table` to the place where (db, name) + is stored. + + In case search succeeded, the table cursor points at the found row. + + @param[in] db database name + @param[in] name event name + @param[in,out] table mysql.event table + + + @retval FALSE an event with such db/name key exists + @retval TRUE no record found or an error occurred. +*/ + +bool +Event_db_repository::find_named_event(const LEX_CSTRING *db, + const LEX_CSTRING *name, + TABLE *table) +{ + uchar key[MAX_KEY_LENGTH]; + DBUG_ENTER("Event_db_repository::find_named_event"); + DBUG_PRINT("enter", ("name: %.*s", (int) name->length, name->str)); + + /* + Create key to find row. We have to use field->store() to be able to + handle VARCHAR and CHAR fields. + Assumption here is that the two first fields in the table are + 'db' and 'name' and the first key is the primary key over the + same fields. + */ + if (db->length > table->field[ET_FIELD_DB]->field_length || + name->length > table->field[ET_FIELD_NAME]->field_length || + table->s->keys == 0 || + table->key_info[0].user_defined_key_parts != 2 || + table->key_info[0].key_part[0].fieldnr != ET_FIELD_DB+1 || + table->key_info[0].key_part[1].fieldnr != ET_FIELD_NAME+1) + DBUG_RETURN(TRUE); + + table->field[ET_FIELD_DB]->store(db->str, db->length, &my_charset_bin); + table->field[ET_FIELD_NAME]->store(name->str, name->length, &my_charset_bin); + + key_copy(key, table->record[0], table->key_info, table->key_info->key_length); + + if (table->file->ha_index_read_idx_map(table->record[0], 0, key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)) + { + DBUG_PRINT("info", ("Row not found")); + DBUG_RETURN(TRUE); + } + + DBUG_PRINT("info", ("Row found!")); + DBUG_RETURN(FALSE); +} + + +/* + Drops all events in the selected database, from mysql.event. + + SYNOPSIS + Event_db_repository::drop_schema_events() + thd Thread + schema The database to clean from events +*/ + +void +Event_db_repository::drop_schema_events(THD *thd, const LEX_CSTRING *schema) +{ + int ret= 0; + TABLE *table= NULL; + READ_RECORD read_record_info; + enum enum_events_table_field field= ET_FIELD_DB; + DBUG_ENTER("Event_db_repository::drop_schema_events"); + DBUG_PRINT("enter", ("field: %d schema: %s", field, schema->str)); + + start_new_trans new_trans(thd); + + if (open_event_table(thd, TL_WRITE, &table)) + { + new_trans.restore_old_transaction(); + DBUG_VOID_RETURN; + } + + /* only enabled events are in memory, so we go now and delete the rest */ + if (init_read_record(&read_record_info, thd, table, NULL, NULL, 1, 0, FALSE)) + goto end; + + while (!ret && !(read_record_info.read_record())) + { + char *et_field= get_field(thd->mem_root, table->field[field]); + + /* et_field may be NULL if the table is corrupted or out of memory */ + if (et_field) + { + LEX_CSTRING et_field_lex= { et_field, strlen(et_field) }; + DBUG_PRINT("info", ("Current event %s name=%s", et_field, + get_field(thd->mem_root, + table->field[ET_FIELD_NAME]))); + + if (!sortcmp_lex_string(&et_field_lex, schema, system_charset_info)) + { + DBUG_PRINT("info", ("Dropping")); + if ((ret= table->file->ha_delete_row(table->record[0]))) + table->file->print_error(ret, MYF(0)); + } + } + } + end_read_record(&read_record_info); + +end: + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + DBUG_VOID_RETURN; +} + + +/** + Looks for a named event in mysql.event and then loads it from + the table. + + @pre The given thread does not have open tables. + + @retval FALSE success + @retval TRUE error +*/ + +bool +Event_db_repository::load_named_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name, + Event_basic *etn) +{ + bool ret; + TABLE_LIST event_table; + DBUG_ENTER("Event_db_repository::load_named_event"); + DBUG_PRINT("enter",("thd: %p name: %*s", thd, + (int) name->length, name->str)); + + start_new_trans new_trans(thd); + /* Reset sql_mode during data dictionary operations. */ + Sql_mode_instant_set sms(thd, 0); + + event_table.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_EVENT_NAME, 0, TL_READ); + + /* + We don't use open_event_table() here to make sure that SHOW + CREATE EVENT works properly in transactional context, and + does not release transactional metadata locks when the + event table is closed. + */ + if (!(ret= open_system_tables_for_read(thd, &event_table))) + { + if (table_intact.check(event_table.table, &event_table_def)) + { + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + DBUG_RETURN(TRUE); + } + + if ((ret= find_named_event(dbname, name, event_table.table))) + my_error(ER_EVENT_DOES_NOT_EXIST, MYF(0), name->str); + else if ((ret= etn->load_from_row(thd, event_table.table))) + my_error(ER_CANNOT_LOAD_FROM_TABLE_V2, MYF(0), "mysql", "event"); + thd->commit_whole_transaction_and_close_tables(); + } + new_trans.restore_old_transaction(); + + DBUG_RETURN(ret); +} + + +/** + Update the event record in mysql.event table with a changed status + and/or last execution time. + + @pre The thread handle does not have open tables. +*/ + +bool +Event_db_repository:: +update_timing_fields_for_event(THD *thd, + const LEX_CSTRING *event_db_name, + const LEX_CSTRING *event_name, + my_time_t last_executed, + ulonglong status) +{ + TABLE *table= NULL; + Field **fields; + int ret= 1; + MYSQL_TIME time; + DBUG_ENTER("Event_db_repository::update_timing_fields_for_event"); + + DBUG_ASSERT(thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY); + + /* + Take a savepoint to release only the lock on mysql.event + table at the end but keep the global read lock and + possible other locks taken by the caller. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + if (open_event_table(thd, TL_WRITE, &table)) + DBUG_RETURN(1); + + fields= table->field; + /* + Turn off row binlogging of event timing updates. These are not used + for RBR of events replicated to the slave. + */ + table->file->row_logging= 0; + + if (find_named_event(event_db_name, event_name, table)) + goto end; + + store_record(table, record[1]); + + my_tz_OFFSET0->gmt_sec_to_TIME(&time, last_executed); + fields[ET_FIELD_LAST_EXECUTED]->set_notnull(); + fields[ET_FIELD_LAST_EXECUTED]->store_time(&time); + + fields[ET_FIELD_STATUS]->set_notnull(); + fields[ET_FIELD_STATUS]->store(status, TRUE); + + if ((ret= table->file->ha_update_row(table->record[1], table->record[0]))) + { + table->file->print_error(ret, MYF(0)); + goto end; + } + + ret= 0; +end: + if (thd->commit_whole_transaction_and_close_tables()) + ret= 1; + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + + DBUG_RETURN(MY_TEST(ret)); +} + + +/** + Open mysql.db, mysql.user and mysql.event and check whether: + - mysql.db exists and is up to date (or from a newer version of MySQL), + - mysql.user has column Event_priv at an expected position, + - mysql.event exists and is up to date (or from a newer version of + MySQL) + + This function is called only when the server is started. + @pre The passed in thread handle has no open tables. + + @retval FALSE OK + @retval TRUE Error, an error message is output to the error log. +*/ + +bool +Event_db_repository::check_system_tables(THD *thd) +{ + TABLE_LIST tables; + int ret= FALSE; + DBUG_ENTER("Event_db_repository::check_system_tables"); + DBUG_PRINT("enter", ("thd: %p", thd)); + + /* Check mysql.event */ + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_EVENT_NAME, 0, TL_READ); + + if (open_and_lock_tables(thd, &tables, FALSE, MYSQL_LOCK_IGNORE_TIMEOUT)) + { + ret= 1; + sql_print_error("Cannot open mysql.event"); + } + else + { + if (table_intact.check(tables.table, &event_table_def)) + ret= 1; + close_mysql_tables(thd); + } + + DBUG_RETURN(MY_TEST(ret)); +} + +/** + @} (End of group Event_Scheduler) +*/ diff --git a/sql/event_db_repository.h b/sql/event_db_repository.h new file mode 100644 index 00000000..29b5031b --- /dev/null +++ b/sql/event_db_repository.h @@ -0,0 +1,131 @@ +#ifndef _EVENT_DB_REPOSITORY_H_ +#define _EVENT_DB_REPOSITORY_H_ +/* Copyright (c) 2006, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/** + @addtogroup Event_Scheduler + @{ + + @file event_db_repository.h + + Data Dictionary related operations of Event Scheduler. + + This is a private header file of Events module. Please do not include it + directly. All public declarations of Events module should be stored in + events.h and event_data_objects.h. +*/ + +enum enum_events_table_field +{ + ET_FIELD_DB = 0, + ET_FIELD_NAME, + ET_FIELD_BODY, + ET_FIELD_DEFINER, + ET_FIELD_EXECUTE_AT, + ET_FIELD_INTERVAL_EXPR, + ET_FIELD_TRANSIENT_INTERVAL, + ET_FIELD_CREATED, + ET_FIELD_MODIFIED, + ET_FIELD_LAST_EXECUTED, + ET_FIELD_STARTS, + ET_FIELD_ENDS, + ET_FIELD_STATUS, + ET_FIELD_ON_COMPLETION, + ET_FIELD_SQL_MODE, + ET_FIELD_COMMENT, + ET_FIELD_ORIGINATOR, + ET_FIELD_TIME_ZONE, + ET_FIELD_CHARACTER_SET_CLIENT, + ET_FIELD_COLLATION_CONNECTION, + ET_FIELD_DB_COLLATION, + ET_FIELD_BODY_UTF8, + ET_FIELD_COUNT /* a cool trick to count the number of fields :) */ +}; + + +int +events_table_index_read_for_db(THD *thd, TABLE *schema_table, + TABLE *event_table); + +int +events_table_scan_all(THD *thd, TABLE *schema_table, TABLE *event_table); + + +class Event_basic; +class Event_parse_data; + +class Event_db_repository +{ +public: + Event_db_repository() = default; + + bool + create_event(THD *thd, Event_parse_data *parse_data, + bool *event_already_exists); + bool + update_event(THD *thd, Event_parse_data *parse_data, LEX_CSTRING *new_dbname, + LEX_CSTRING *new_name); + + bool + drop_event(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *name, + bool drop_if_exists); + + void + drop_schema_events(THD *thd, const LEX_CSTRING *schema); + + bool + find_named_event(const LEX_CSTRING *db, const LEX_CSTRING *name, + TABLE *table); + + bool + load_named_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name, + Event_basic *et); + + static bool + open_event_table(THD *thd, enum thr_lock_type lock_type, TABLE **table); + + bool + fill_schema_events(THD *thd, TABLE_LIST *tables, const char *db); + + bool + update_timing_fields_for_event(THD *thd, + const LEX_CSTRING *event_db_name, + const LEX_CSTRING *event_name, + my_time_t last_executed, + ulonglong status); +public: + static bool + check_system_tables(THD *thd); +private: + bool + index_read_for_db_for_i_s(THD *thd, TABLE *schema_table, TABLE *event_table, + const char *db); + + bool + table_scan_all_for_i_s(THD *thd, TABLE *schema_table, TABLE *event_table); + +private: + /* Prevent use of these */ + Event_db_repository(const Event_db_repository &); + void operator=(Event_db_repository &); +}; + +/** + @} (End of group Event_Scheduler) +*/ +#endif /* _EVENT_DB_REPOSITORY_H_ */ diff --git a/sql/event_parse_data.cc b/sql/event_parse_data.cc new file mode 100644 index 00000000..80d2f9c9 --- /dev/null +++ b/sql/event_parse_data.cc @@ -0,0 +1,587 @@ +/* + Copyright (c) 2008, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sp_head.h" +#include "event_parse_data.h" +#include "sql_time.h" // TIME_to_timestamp + +/* + Returns a new instance + + SYNOPSIS + Event_parse_data::new_instance() + + RETURN VALUE + Address or NULL in case of error + + NOTE + Created on THD's mem_root +*/ + +Event_parse_data * +Event_parse_data::new_instance(THD *thd) +{ + return new (thd->mem_root) Event_parse_data; +} + + +/* + Constructor + + SYNOPSIS + Event_parse_data::Event_parse_data() +*/ + +Event_parse_data::Event_parse_data() + :on_completion(Event_parse_data::ON_COMPLETION_DEFAULT), + status(Event_parse_data::ENABLED), status_changed(false), + do_not_create(FALSE), body_changed(FALSE), + item_starts(NULL), item_ends(NULL), item_execute_at(NULL), + starts_null(TRUE), ends_null(TRUE), execute_at_null(TRUE), + item_expression(NULL), expression(0) +{ + DBUG_ENTER("Event_parse_data::Event_parse_data"); + + /* Actually in the parser STARTS is always set */ + starts= ends= execute_at= 0; + + comment.str= NULL; + comment.length= 0; + + DBUG_VOID_RETURN; +} + + +/* + Set a name of the event + + SYNOPSIS + Event_parse_data::init_name() + thd THD + spn the name extracted in the parser +*/ + +void +Event_parse_data::init_name(THD *thd, sp_name *spn) +{ + DBUG_ENTER("Event_parse_data::init_name"); + + /* We have to copy strings to get them into the right memroot */ + dbname.length= spn->m_db.length; + dbname.str= thd->strmake(spn->m_db.str, spn->m_db.length); + name.length= spn->m_name.length; + name.str= thd->strmake(spn->m_name.str, spn->m_name.length); + + DBUG_VOID_RETURN; +} + + +/* + This function is called on CREATE EVENT or ALTER EVENT. When either + ENDS or AT is in the past, we are trying to create an event that + will never be executed. If it has ON COMPLETION NOT PRESERVE + (default), then it would normally be dropped already, so on CREATE + EVENT we give a warning, and do not create anything. On ALTER EVENT + we give a error, and do not change the event. + + If the event has ON COMPLETION PRESERVE, then we see if the event is + created or altered to the ENABLED (default) state. If so, then we + give a warning, and change the state to DISABLED. + + Otherwise it is a valid event in ON COMPLETION PRESERVE DISABLE + state. +*/ + +void +Event_parse_data::check_if_in_the_past(THD *thd, my_time_t ltime_utc) +{ + if (ltime_utc >= thd->query_start()) + return; + + /* + We'll come back later when we have the real on_completion value + */ + if (on_completion == Event_parse_data::ON_COMPLETION_DEFAULT) + return; + + if (on_completion == Event_parse_data::ON_COMPLETION_DROP) + { + switch (thd->lex->sql_command) { + case SQLCOM_CREATE_EVENT: + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_EVENT_CANNOT_CREATE_IN_THE_PAST, + ER_THD(thd, ER_EVENT_CANNOT_CREATE_IN_THE_PAST)); + break; + case SQLCOM_ALTER_EVENT: + my_error(ER_EVENT_CANNOT_ALTER_IN_THE_PAST, MYF(0)); + break; + default: + DBUG_ASSERT(0); + } + + do_not_create= TRUE; + } + else if (status == Event_parse_data::ENABLED) + { + status= Event_parse_data::DISABLED; + status_changed= true; + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_EVENT_EXEC_TIME_IN_THE_PAST, + ER_THD(thd, ER_EVENT_EXEC_TIME_IN_THE_PAST)); + } +} + + +/* + Check time/dates in ALTER EVENT + + We check whether ALTER EVENT was given dates that are in the past. + However to know how to react, we need the ON COMPLETION type. Hence, + the check is deferred until we have the previous ON COMPLETION type + from the event-db to fall back on if nothing was specified in the + ALTER EVENT-statement. + + SYNOPSIS + Event_parse_data::check_dates() + thd Thread + on_completion ON COMPLETION value currently in event-db. + Will be overridden by value in ALTER EVENT if given. + + RETURN VALUE + TRUE an error occurred, do not ALTER + FALSE OK +*/ + +bool +Event_parse_data::check_dates(THD *thd, int previous_on_completion) +{ + if (on_completion == Event_parse_data::ON_COMPLETION_DEFAULT) + { + on_completion= previous_on_completion; + if (!ends_null) + check_if_in_the_past(thd, ends); + if (!execute_at_null) + check_if_in_the_past(thd, execute_at); + } + return do_not_create; +} + + + +/* + Sets time for execution for one-time event. + + SYNOPSIS + Event_parse_data::init_execute_at() + thd Thread + + RETURN VALUE + 0 OK + ER_WRONG_VALUE Wrong value for execute at (reported) +*/ + +int +Event_parse_data::init_execute_at(THD *thd) +{ + uint not_used; + MYSQL_TIME ltime; + my_time_t ltime_utc; + + DBUG_ENTER("Event_parse_data::init_execute_at"); + + if (!item_execute_at) + DBUG_RETURN(0); + + if (item_execute_at->fix_fields(thd, &item_execute_at)) + goto wrong_value; + + /* no starts and/or ends in case of execute_at */ + DBUG_PRINT("info", ("starts_null && ends_null should be 1 is %d", + (starts_null && ends_null))); + DBUG_ASSERT(starts_null && ends_null); + + /* + The expected data type is DATETIME. No nanoseconds truncation should + normally be needed. Using the default rounding mode. + See more comments in event_data_object.cc. + */ + if (item_execute_at->get_date(thd, <ime, TIME_NO_ZERO_DATE | + thd->temporal_round_mode())) + goto wrong_value; + + ltime_utc= TIME_to_timestamp(thd,<ime,¬_used); + if (!ltime_utc) + { + DBUG_PRINT("error", ("Execute AT after year 2037")); + goto wrong_value; + } + + check_if_in_the_past(thd, ltime_utc); + + execute_at_null= FALSE; + execute_at= ltime_utc; + DBUG_RETURN(0); + +wrong_value: + report_bad_value("AT", item_execute_at); + DBUG_RETURN(ER_WRONG_VALUE); +} + + +/* + Sets time for execution of multi-time event.s + + SYNOPSIS + Event_parse_data::init_interval() + thd Thread + + RETURN VALUE + 0 OK + EVEX_BAD_PARAMS Interval is not positive or MICROSECOND (reported) + ER_WRONG_VALUE Wrong value for interval (reported) +*/ + +int +Event_parse_data::init_interval(THD *thd) +{ + INTERVAL interval_tmp; + + DBUG_ENTER("Event_parse_data::init_interval"); + if (!item_expression) + DBUG_RETURN(0); + + switch (interval) { + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "MICROSECOND"); + DBUG_RETURN(EVEX_BAD_PARAMS); + default: + break; + } + + if (item_expression->fix_fields(thd, &item_expression)) + goto wrong_value; + + if (get_interval_value(thd, item_expression, interval, &interval_tmp)) + goto wrong_value; + + expression= 0; + + switch (interval) { + case INTERVAL_YEAR: + expression= interval_tmp.year; + break; + case INTERVAL_QUARTER: + case INTERVAL_MONTH: + expression= interval_tmp.month; + break; + case INTERVAL_WEEK: + case INTERVAL_DAY: + expression= interval_tmp.day; + break; + case INTERVAL_HOUR: + expression= interval_tmp.hour; + break; + case INTERVAL_MINUTE: + expression= interval_tmp.minute; + break; + case INTERVAL_SECOND: + expression= interval_tmp.second; + break; + case INTERVAL_YEAR_MONTH: // Allow YEAR-MONTH YYYYYMM + expression= interval_tmp.year* 12 + interval_tmp.month; + break; + case INTERVAL_DAY_HOUR: + expression= interval_tmp.day* 24 + interval_tmp.hour; + break; + case INTERVAL_DAY_MINUTE: + expression= (interval_tmp.day* 24 + interval_tmp.hour) * 60 + + interval_tmp.minute; + break; + case INTERVAL_HOUR_SECOND: /* day is anyway 0 */ + case INTERVAL_DAY_SECOND: + /* DAY_SECOND having problems because of leap seconds? */ + expression= ((interval_tmp.day* 24 + interval_tmp.hour) * 60 + + interval_tmp.minute)*60 + + interval_tmp.second; + break; + case INTERVAL_HOUR_MINUTE: + expression= interval_tmp.hour * 60 + interval_tmp.minute; + break; + case INTERVAL_MINUTE_SECOND: + expression= interval_tmp.minute * 60 + interval_tmp.second; + break; + case INTERVAL_LAST: + DBUG_ASSERT(0); + default: + ;/* these are the microsec stuff */ + } + if (interval_tmp.neg || expression == 0 || + expression > EVEX_MAX_INTERVAL_VALUE) + { + my_error(ER_EVENT_INTERVAL_NOT_POSITIVE_OR_TOO_BIG, MYF(0)); + DBUG_RETURN(EVEX_BAD_PARAMS); + } + + DBUG_RETURN(0); + +wrong_value: + report_bad_value("INTERVAL", item_expression); + DBUG_RETURN(ER_WRONG_VALUE); +} + + +/* + Sets STARTS. + + SYNOPSIS + Event_parse_data::init_starts() + expr how much? + + NOTES + Note that activation time is not execution time. + EVERY 5 MINUTE STARTS "2004-12-12 10:00:00" means that + the event will be executed every 5 minutes but this will + start at the date shown above. Expressions are possible : + DATE_ADD(NOW(), INTERVAL 1 DAY) -- start tomorrow at + same time. + + RETURN VALUE + 0 OK + ER_WRONG_VALUE Starts before now +*/ + +int +Event_parse_data::init_starts(THD *thd) +{ + uint not_used; + MYSQL_TIME ltime; + my_time_t ltime_utc; + + DBUG_ENTER("Event_parse_data::init_starts"); + if (!item_starts) + DBUG_RETURN(0); + + if (item_starts->fix_fields(thd, &item_starts)) + goto wrong_value; + + if (item_starts->get_date(thd, <ime, TIME_NO_ZERO_DATE | + thd->temporal_round_mode())) + goto wrong_value; + + ltime_utc= TIME_to_timestamp(thd, <ime, ¬_used); + if (!ltime_utc) + goto wrong_value; + + DBUG_PRINT("info",("now: %ld starts: %ld", + (long) thd->query_start(), (long) ltime_utc)); + + starts_null= FALSE; + starts= ltime_utc; + DBUG_RETURN(0); + +wrong_value: + report_bad_value("STARTS", item_starts); + DBUG_RETURN(ER_WRONG_VALUE); +} + + +/* + Sets ENDS (deactivation time). + + SYNOPSIS + Event_parse_data::init_ends() + thd THD + + NOTES + Note that activation time is not execution time. + EVERY 5 MINUTE ENDS "2004-12-12 10:00:00" means that + the event will be executed every 5 minutes but this will + end at the date shown above. Expressions are possible : + DATE_ADD(NOW(), INTERVAL 1 DAY) -- end tomorrow at + same time. + + RETURN VALUE + 0 OK + EVEX_BAD_PARAMS Error (reported) +*/ + +int +Event_parse_data::init_ends(THD *thd) +{ + uint not_used; + MYSQL_TIME ltime; + my_time_t ltime_utc; + + DBUG_ENTER("Event_parse_data::init_ends"); + if (!item_ends) + DBUG_RETURN(0); + + if (item_ends->fix_fields(thd, &item_ends)) + goto error_bad_params; + + DBUG_PRINT("info", ("convert to TIME")); + if (item_ends->get_date(thd, <ime, TIME_NO_ZERO_DATE | + thd->temporal_round_mode())) + goto error_bad_params; + + ltime_utc= TIME_to_timestamp(thd, <ime, ¬_used); + if (!ltime_utc) + goto error_bad_params; + + /* Check whether ends is after starts */ + DBUG_PRINT("info", ("ENDS after STARTS?")); + if (!starts_null && starts >= ltime_utc) + goto error_bad_params; + + check_if_in_the_past(thd, ltime_utc); + + ends_null= FALSE; + ends= ltime_utc; + DBUG_RETURN(0); + +error_bad_params: + my_error(ER_EVENT_ENDS_BEFORE_STARTS, MYF(0)); + DBUG_RETURN(EVEX_BAD_PARAMS); +} + + +/* + Prints an error message about invalid value. Internally used + during input data verification + + SYNOPSIS + Event_parse_data::report_bad_value() + item_name The name of the parameter + bad_item The parameter +*/ + +void +Event_parse_data::report_bad_value(const char *item_name, Item *bad_item) +{ + char buff[120]; + String str(buff,(uint32) sizeof(buff), system_charset_info); + String *str2= bad_item->fixed() ? bad_item->val_str(&str) : NULL; + my_error(ER_WRONG_VALUE, MYF(0), item_name, str2? str2->c_ptr_safe():"NULL"); +} + + +/* + Checks for validity the data gathered during the parsing phase. + + SYNOPSIS + Event_parse_data::check_parse_data() + thd Thread + + RETURN VALUE + FALSE OK + TRUE Error (reported) +*/ + +bool +Event_parse_data::check_parse_data(THD *thd) +{ + bool ret; + DBUG_ENTER("Event_parse_data::check_parse_data"); + DBUG_PRINT("info", ("execute_at: %p expr=%p starts=%p ends=%p", + item_execute_at, item_expression, + item_starts, item_ends)); + + init_name(thd, identifier); + + init_definer(thd); + + ret= init_execute_at(thd) || init_interval(thd) || init_starts(thd) || + init_ends(thd); + check_originator_id(thd); + DBUG_RETURN(ret); +} + + +/* + Inits definer (definer_user and definer_host) during parsing. + + SYNOPSIS + Event_parse_data::init_definer() + thd Thread +*/ + +void +Event_parse_data::init_definer(THD *thd) +{ + DBUG_ENTER("Event_parse_data::init_definer"); + + DBUG_ASSERT(thd->lex->definer); + + const char *definer_user= thd->lex->definer->user.str; + const char *definer_host= thd->lex->definer->host.str; + size_t definer_user_len= thd->lex->definer->user.length; + size_t definer_host_len= thd->lex->definer->host.length; + char *tmp; + DBUG_PRINT("info",("init definer_user thd->mem_root: %p " + "definer_user: %p", thd->mem_root, + definer_user)); + + /* + 1 for @ */ + DBUG_PRINT("info",("init definer as whole")); + definer.length= definer_user_len + definer_host_len + 1; + definer.str= tmp= (char*) thd->alloc(definer.length + 1); + + DBUG_PRINT("info",("copy the user")); + strmake(tmp, definer_user, definer_user_len); + tmp[definer_user_len]= '@'; + + DBUG_PRINT("info",("copy the host")); + strmake(tmp + definer_user_len + 1, definer_host, definer_host_len); + DBUG_PRINT("info",("definer [%s] initted", definer.str)); + + DBUG_VOID_RETURN; +} + + +/** + Set the originator id of the event to the server_id if executing on + the master or set to the server_id of the master if executing on + the slave. If executing on slave, also set status to SLAVESIDE_DISABLED. + + SYNOPSIS + Event_parse_data::check_originator_id() +*/ +void Event_parse_data::check_originator_id(THD *thd) +{ + /* Disable replicated events on slave. */ + if ((WSREP(thd) && IF_WSREP(thd->wsrep_applier, 0)) || + (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL) || + (thd->system_thread == SYSTEM_THREAD_SLAVE_IO)) + { + DBUG_PRINT("info", ("Invoked object status set to SLAVESIDE_DISABLED.")); + if ((status == Event_parse_data::ENABLED) || + (status == Event_parse_data::DISABLED)) + { + status= Event_parse_data::SLAVESIDE_DISABLED; + status_changed= true; + } + originator = thd->variables.server_id; + } + else + originator = global_system_variables.server_id; +} diff --git a/sql/event_parse_data.h b/sql/event_parse_data.h new file mode 100644 index 00000000..4e68295a --- /dev/null +++ b/sql/event_parse_data.h @@ -0,0 +1,132 @@ +/* + Copyright (c) 2008, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef _EVENT_PARSE_DATA_H_ +#define _EVENT_PARSE_DATA_H_ + +#include "sql_alloc.h" + +class Item; +class THD; +class sp_name; + +#define EVEX_GET_FIELD_FAILED -2 +#define EVEX_BAD_PARAMS -5 +#define EVEX_MICROSECOND_UNSUP -6 +#define EVEX_MAX_INTERVAL_VALUE 1000000000L + +class Event_parse_data : public Sql_alloc +{ +public: + /* + ENABLED = feature can function normally (is turned on) + SLAVESIDE_DISABLED = feature is turned off on slave + DISABLED = feature is turned off + */ + enum enum_status + { + ENABLED = 1, + DISABLED, + SLAVESIDE_DISABLED + }; + + enum enum_on_completion + { + /* + On CREATE EVENT, DROP is the DEFAULT as per the docs. + On ALTER EVENT, "no change" is the DEFAULT. + */ + ON_COMPLETION_DEFAULT = 0, + ON_COMPLETION_DROP, + ON_COMPLETION_PRESERVE + }; + + int on_completion; + int status; + bool status_changed; + uint32 originator; + /* + do_not_create will be set if STARTS time is in the past and + on_completion == ON_COMPLETION_DROP. + */ + bool do_not_create; + + bool body_changed; + + LEX_CSTRING dbname; + LEX_CSTRING name; + LEX_CSTRING definer;// combination of user and host + LEX_CSTRING comment; + + Item* item_starts; + Item* item_ends; + Item* item_execute_at; + + my_time_t starts; + my_time_t ends; + my_time_t execute_at; + bool starts_null; + bool ends_null; + bool execute_at_null; + + sp_name *identifier; + Item* item_expression; + longlong expression; + interval_type interval; + + static Event_parse_data * + new_instance(THD *thd); + + bool + check_parse_data(THD *thd); + + bool + check_dates(THD *thd, int previous_on_completion); + +private: + + void + init_definer(THD *thd); + + void + init_name(THD *thd, sp_name *spn); + + int + init_execute_at(THD *thd); + + int + init_interval(THD *thd); + + int + init_starts(THD *thd); + + int + init_ends(THD *thd); + + Event_parse_data(); + ~Event_parse_data(); + + void + report_bad_value(const char *item_name, Item *bad_item); + + void + check_if_in_the_past(THD *thd, my_time_t ltime_utc); + + Event_parse_data(const Event_parse_data &); /* Prevent use of these */ + void check_originator_id(THD *thd); + void operator=(Event_parse_data &); +}; +#endif diff --git a/sql/event_queue.cc b/sql/event_queue.cc new file mode 100644 index 00000000..ebd2dfee --- /dev/null +++ b/sql/event_queue.cc @@ -0,0 +1,848 @@ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "event_queue.h" +#include "event_data_objects.h" +#include "event_db_repository.h" +#include "events.h" +#include "sql_audit.h" +#include "tztime.h" // my_tz_find, my_tz_OFFSET0, struct Time_zone +#include "log.h" // sql_print_error +#include "sql_class.h" // struct THD +#include "mysql/psi/mysql_sp.h" + +/** + @addtogroup Event_Scheduler + @{ +*/ + +#define EVENT_QUEUE_INITIAL_SIZE 30 +#define EVENT_QUEUE_EXTENT 30 + +#ifdef __GNUC__ +#if __GNUC__ >= 2 +#define SCHED_FUNC __FUNCTION__ +#endif +#else +#define SCHED_FUNC "" +#endif + +#define LOCK_QUEUE_DATA() lock_data(SCHED_FUNC, __LINE__) +#define UNLOCK_QUEUE_DATA() unlock_data(SCHED_FUNC, __LINE__) + +/* + Compares the execute_at members of two Event_queue_element instances. + Used as callback for the prioritized queue when shifting + elements inside. + + SYNOPSIS + event_queue_element_data_compare_q() + vptr Not used (set it to NULL) + a First Event_queue_element object + b Second Event_queue_element object + + RETURN VALUE + -1 a->execute_at < b->execute_at + 0 a->execute_at == b->execute_at + 1 a->execute_at > b->execute_at + + NOTES + execute_at.second_part is not considered during comparison +*/ + +extern "C" int event_queue_element_compare_q(void *, uchar *, uchar *); + +int event_queue_element_compare_q(void *vptr, uchar* a, uchar *b) +{ + Event_queue_element *left = (Event_queue_element *)a; + Event_queue_element *right = (Event_queue_element *)b; + my_time_t lhs = left->execute_at; + my_time_t rhs = right->execute_at; + + if (left->status == Event_parse_data::DISABLED) + return right->status != Event_parse_data::DISABLED; + + if (right->status == Event_parse_data::DISABLED) + return 1; + + return (lhs < rhs ? -1 : (lhs > rhs ? 1 : 0)); +} + + +/* + Constructor of class Event_queue. + + SYNOPSIS + Event_queue::Event_queue() +*/ + +Event_queue::Event_queue() + :next_activation_at(0), + mutex_last_locked_at_line(0), + mutex_last_unlocked_at_line(0), + mutex_last_attempted_lock_at_line(0), + mutex_last_locked_in_func("n/a"), + mutex_last_unlocked_in_func("n/a"), + mutex_last_attempted_lock_in_func("n/a"), + mutex_queue_data_locked(FALSE), + mutex_queue_data_attempting_lock(FALSE), + waiting_on_cond(FALSE) +{ + mysql_mutex_init(key_LOCK_event_queue, &LOCK_event_queue, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_queue_state, &COND_queue_state, NULL); +} + + +Event_queue::~Event_queue() +{ + deinit_queue(); + mysql_mutex_destroy(&LOCK_event_queue); + mysql_cond_destroy(&COND_queue_state); +} + + +/* + This is a queue's constructor. Until this method is called, the + queue is unusable. We don't use a C++ constructor instead in + order to be able to check the return value. The queue is + initialized once at server startup. Initialization can fail in + case of a failure reading events from the database or out of + memory. + + SYNOPSIS + Event_queue::init() + + RETURN VALUE + FALSE OK + TRUE Error +*/ + +bool +Event_queue::init_queue(THD *thd) +{ + DBUG_ENTER("Event_queue::init_queue"); + DBUG_PRINT("enter", ("this: %p", this)); + + LOCK_QUEUE_DATA(); + + if (::init_queue(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/, + 0 /*max_on_top*/, event_queue_element_compare_q, + NullS, 0, EVENT_QUEUE_EXTENT)) + { + sql_print_error("Event Scheduler: Can't initialize the execution queue"); + goto err; + } + + UNLOCK_QUEUE_DATA(); + DBUG_RETURN(FALSE); + +err: + UNLOCK_QUEUE_DATA(); + DBUG_RETURN(TRUE); +} + + +/* + Deinits the queue. Remove all elements from it and destroys them + too. + + SYNOPSIS + Event_queue::deinit_queue() +*/ + +void +Event_queue::deinit_queue() +{ + DBUG_ENTER("Event_queue::deinit_queue"); + + LOCK_QUEUE_DATA(); + empty_queue(); + delete_queue(&queue); + UNLOCK_QUEUE_DATA(); + + DBUG_VOID_RETURN; +} + + +/** + Adds an event to the queue. + + Compute the next execution time for an event, and if it is still + active, add it to the queue. Otherwise delete it. + The object is left intact in case of an error. Otherwise + the queue container assumes ownership of it. + + @param[in] thd thread handle + @param[in] new_element a new element to add to the queue + @param[out] created set to TRUE if no error and the element is + added to the queue, FALSE otherwise + + @retval TRUE an error occurred. The value of created is undefined, + the element was not deleted. + @retval FALSE success +*/ + +bool +Event_queue::create_event(THD *thd, Event_queue_element *new_element, + bool *created) +{ + DBUG_ENTER("Event_queue::create_event"); + DBUG_PRINT("enter", ("thd: %p et=%s.%s", thd, + new_element->dbname.str, new_element->name.str)); + + /* Will do nothing if the event is disabled */ + new_element->compute_next_execution_time(); + if (new_element->status != Event_parse_data::ENABLED) + { + delete new_element; + *created= FALSE; + DBUG_RETURN(FALSE); + } + + DBUG_PRINT("info", ("new event in the queue: %p", new_element)); + + LOCK_QUEUE_DATA(); + *created= (queue_insert_safe(&queue, (uchar *) new_element) == FALSE); + dbug_dump_queue(thd->query_start()); + mysql_cond_broadcast(&COND_queue_state); + UNLOCK_QUEUE_DATA(); + + DBUG_RETURN(!*created); +} + + +/* + Updates an event from the scheduler queue + + SYNOPSIS + Event_queue::update_event() + thd Thread + dbname Schema of the event + name Name of the event + new_schema New schema, in case of RENAME TO, otherwise NULL + new_name New name, in case of RENAME TO, otherwise NULL +*/ + +void +Event_queue::update_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name, + Event_queue_element *new_element) +{ + DBUG_ENTER("Event_queue::update_event"); + DBUG_PRINT("enter", ("thd: %p et: [%s.%s]", thd, dbname->str, + name->str)); + + if ((new_element->status == Event_parse_data::DISABLED) || + (new_element->status == Event_parse_data::SLAVESIDE_DISABLED)) + { + DBUG_PRINT("info", ("The event is disabled.")); + /* + Destroy the object but don't skip to end: because we may have to remove + object from the cache. + */ + delete new_element; + new_element= NULL; + } + else + new_element->compute_next_execution_time(); + + LOCK_QUEUE_DATA(); + find_n_remove_event(dbname, name); + + /* If not disabled event */ + if (new_element) + { + DBUG_PRINT("info", ("new event in the queue: %p", new_element)); + queue_insert_safe(&queue, (uchar *) new_element); + mysql_cond_broadcast(&COND_queue_state); + } + + dbug_dump_queue(thd->query_start()); + UNLOCK_QUEUE_DATA(); + + DBUG_VOID_RETURN; +} + + +/* + Drops an event from the queue + + SYNOPSIS + Event_queue::drop_event() + thd Thread + dbname Schema of the event to drop + name Name of the event to drop +*/ + +void +Event_queue::drop_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name) +{ + DBUG_ENTER("Event_queue::drop_event"); + DBUG_PRINT("enter", ("thd: %p db: %s name: %s", thd, + dbname->str, name->str)); + + LOCK_QUEUE_DATA(); + find_n_remove_event(dbname, name); + dbug_dump_queue(thd->query_start()); + UNLOCK_QUEUE_DATA(); + + /* + We don't signal here because the scheduler will catch the change + next time it wakes up. + */ + + DBUG_VOID_RETURN; +} + + +/* + Drops all events from the in-memory queue and disk that match + certain pattern evaluated by a comparator function + + SYNOPSIS + Event_queue::drop_matching_events() + thd THD + pattern A pattern string + comparator The function to use for comparing + + RETURN VALUE + >=0 Number of dropped events + + NOTE + Expected is the caller to acquire lock on LOCK_event_queue +*/ + +void +Event_queue::drop_matching_events(THD *thd, const LEX_CSTRING *pattern, + bool (*comparator)(const LEX_CSTRING *, Event_basic *)) +{ + uint i; + DBUG_ENTER("Event_queue::drop_matching_events"); + DBUG_PRINT("enter", ("pattern: %s", pattern->str)); + + for (i= queue_first_element(&queue) ; + i <= queue_last_element(&queue) ; + ) + { + Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i); + DBUG_PRINT("info", ("[%s.%s]?", et->dbname.str, et->name.str)); + if (comparator(pattern, et)) + { + /* + The queue is ordered. If we remove an element, then all elements + after it will shift one position to the left, if we imagine it as + an array from left to the right. In this case we should not + increment the counter and the (i <= queue_last_element() condition + is ok. + */ + queue_remove(&queue, i); + /* Drop statistics for this stored program from performance schema. */ + MYSQL_DROP_SP(SP_TYPE_EVENT, et->dbname.str, static_cast(et->dbname.length), + et->name.str, static_cast(et->name.length)); + delete et; + } + else + i++; + } + /* + We don't call mysql_cond_broadcast(&COND_queue_state); + If we remove the top event: + 1. The queue is empty. The scheduler will wake up at some time and + realize that the queue is empty. If create_event() comes in between + it will signal the scheduler + 2. The queue is not empty, but the next event after the previous top, + won't be executed any time sooner than the element we removed. Hence, + we may not notify the scheduler and it will realize the change when it + wakes up from timedwait. + */ + + DBUG_VOID_RETURN; +} + + +/* + Drops all events from the in-memory queue and disk that are from + certain schema. + + SYNOPSIS + Event_queue::drop_schema_events() + thd HD + schema The schema name +*/ + +void +Event_queue::drop_schema_events(THD *thd, const LEX_CSTRING *schema) +{ + DBUG_ENTER("Event_queue::drop_schema_events"); + LOCK_QUEUE_DATA(); + drop_matching_events(thd, schema, event_basic_db_equal); + UNLOCK_QUEUE_DATA(); + DBUG_VOID_RETURN; +} + + +/* + Searches for an event in the queue + + SYNOPSIS + Event_queue::find_n_remove_event() + db The schema of the event to find + name The event to find + + NOTE + The caller should do the locking also the caller is responsible for + actual signalling in case an event is removed from the queue. +*/ + +void +Event_queue::find_n_remove_event(const LEX_CSTRING *db, + const LEX_CSTRING *name) +{ + uint i; + DBUG_ENTER("Event_queue::find_n_remove_event"); + + for (i= queue_first_element(&queue); + i <= queue_last_element(&queue); + i++) + { + Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i); + DBUG_PRINT("info", ("[%s.%s]==[%s.%s]?", db->str, name->str, + et->dbname.str, et->name.str)); + if (event_basic_identifier_equal(db, name, et)) + { + queue_remove(&queue, i); + delete et; + break; + } + } + + DBUG_VOID_RETURN; +} + + +/* + Recalculates activation times in the queue. There is one reason for + that. Because the values (execute_at) by which the queue is ordered are + changed by calls to compute_next_execution_time() on a request from the + scheduler thread, if it is not running then the values won't be updated. + Once the scheduler is started again the values has to be recalculated + so they are right for the current time. + + SYNOPSIS + Event_queue::recalculate_activation_times() + thd Thread +*/ + +void +Event_queue::recalculate_activation_times(THD *thd) +{ + uint i; + DBUG_ENTER("Event_queue::recalculate_activation_times"); + + LOCK_QUEUE_DATA(); + DBUG_PRINT("info", ("%u loaded events to be recalculated", queue.elements)); + for (i= queue_first_element(&queue); + i <= queue_last_element(&queue); + i++) + { + ((Event_queue_element*)queue_element(&queue, i))->compute_next_execution_time(); + } + queue_fix(&queue); + /* + The disabled elements are moved to the end during the `fix`. + Start from the end and remove all of the elements which are + disabled. When we find the first non-disabled one we break, as we + have removed all. The queue has been ordered in a way the disabled + events are at the end. + */ + for (i= queue_last_element(&queue); + (int) i >= (int) queue_first_element(&queue); + i--) + { + Event_queue_element *element= + (Event_queue_element*)queue_element(&queue, i); + if (element->status != Event_parse_data::DISABLED) + break; + /* + This won't cause queue re-order, because we remove + always the last element. + */ + queue_remove(&queue, i); + delete element; + } + UNLOCK_QUEUE_DATA(); + + /* + XXX: The events are dropped only from memory and not from disk + even if `drop_list[j]->dropped` is TRUE. There will be still on the + disk till next server restart. + Please add code here to do it. + */ + + DBUG_VOID_RETURN; +} + + +/* + Empties the queue and destroys the Event_queue_element objects in the + queue. + + SYNOPSIS + Event_queue::empty_queue() + + NOTE + Should be called with LOCK_event_queue locked +*/ + +void +Event_queue::empty_queue() +{ + uint i; + DBUG_ENTER("Event_queue::empty_queue"); + DBUG_PRINT("enter", ("Purging the queue. %u element(s)", queue.elements)); + + if (queue.elements) + sql_print_information("Event Scheduler: Purging the queue. %u events", + queue.elements); + /* empty the queue */ + for (i= queue_first_element(&queue); + i <= queue_last_element(&queue); + i++) + { + Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i); + delete et; + } + resize_queue(&queue, 0); + DBUG_VOID_RETURN; +} + + +/* + Dumps the queue to the trace log. + + SYNOPSIS + Event_queue::dbug_dump_queue() + now Current timestamp +*/ + +void +Event_queue::dbug_dump_queue(my_time_t when) +{ +#ifdef DBUG_TRACE + my_time_t now= when; + Event_queue_element *et; + uint i; + DBUG_ENTER("Event_queue::dbug_dump_queue"); + DBUG_PRINT("info", ("Dumping queue . Elements=%u", queue.elements)); + for (i= queue_first_element(&queue); + i <= queue_last_element(&queue); + i++) + { + et= ((Event_queue_element*)queue_element(&queue, i)); + DBUG_PRINT("info", ("et: %p name: %s.%s", et, + et->dbname.str, et->name.str)); + DBUG_PRINT("info", ("exec_at: %lu starts: %lu ends: %lu execs_so_far: %u " + "expr: %ld et.exec_at: %ld now: %ld " + "(et.exec_at - now): %d if: %d", + (long) et->execute_at, (long) et->starts, + (long) et->ends, et->execution_count, + (long) et->expression, (long) et->execute_at, + (long) now, (int) (et->execute_at - now), + et->execute_at <= now)); + } + DBUG_VOID_RETURN; +#endif +} + +/* + Checks whether the top of the queue is elligible for execution and + returns an Event_job_data instance in case it should be executed. + `now` is compared against `execute_at` of the top element in the queue. + + SYNOPSIS + Event_queue::get_top_for_execution_if_time() + thd [in] Thread + event_name [out] The object to execute + + RETURN VALUE + FALSE No error. event_name != NULL + TRUE Serious error +*/ + +bool +Event_queue::get_top_for_execution_if_time(THD *thd, + Event_queue_element_for_exec **event_name) +{ + bool ret= FALSE; + *event_name= NULL; + my_time_t UNINIT_VAR(last_executed); + int UNINIT_VAR(status); + DBUG_ENTER("Event_queue::get_top_for_execution_if_time"); + + LOCK_QUEUE_DATA(); + for (;;) + { + Event_queue_element *top= NULL; + + /* Break loop if thd has been killed */ + if (thd->killed) + { + DBUG_PRINT("info", ("thd->killed=%d", thd->killed)); + goto end; + } + + if (!queue.elements) + { + /* There are no events in the queue */ + next_activation_at= 0; + + /* Release any held audit resources before waiting */ + mysql_audit_release(thd); + + /* Wait on condition until signaled. Release LOCK_queue while waiting. */ + cond_wait(thd, NULL, & stage_waiting_on_empty_queue, SCHED_FUNC, __FILE__, __LINE__); + + continue; + } + + top= (Event_queue_element*) queue_top(&queue); + + thd->set_start_time(); /* Get current time */ + + next_activation_at= top->execute_at; + if (next_activation_at > thd->query_start()) + { + /* + Not yet time for top event, wait on condition with + time or until signaled. Release LOCK_queue while waiting. + */ + struct timespec top_time= { next_activation_at, 0 }; + + /* Release any held audit resources before waiting */ + mysql_audit_release(thd); + + cond_wait(thd, &top_time, &stage_waiting_for_next_activation, SCHED_FUNC, __FILE__, __LINE__); + + continue; + } + + if (!(*event_name= new Event_queue_element_for_exec()) || + (*event_name)->init(top->dbname, top->name)) + { + delete *event_name; + ret= TRUE; + break; + } + + DBUG_PRINT("info", ("Ready for execution")); + top->mark_last_executed(thd); + if (top->compute_next_execution_time()) + top->status= Event_parse_data::DISABLED; + DBUG_PRINT("info", ("event %s status is %d", top->name.str, top->status)); + + top->execution_count++; + (*event_name)->dropped= top->dropped; + /* + Save new values of last_executed timestamp and event status on stack + in order to be able to update event description in system table once + QUEUE_DATA lock is released. + */ + last_executed= top->last_executed; + status= top->status; + + if (top->status == Event_parse_data::DISABLED) + { + DBUG_PRINT("info", ("removing from the queue")); + sql_print_information("Event Scheduler: Last execution of %s.%s. %s", + top->dbname.str, top->name.str, + top->dropped? "Dropping.":""); + delete top; + queue_remove_top(&queue); + } + else + queue_replace_top(&queue); + + dbug_dump_queue(thd->query_start()); + break; + } +end: + UNLOCK_QUEUE_DATA(); + + DBUG_PRINT("info", ("returning %d et_new: %p ", + ret, *event_name)); + + if (*event_name) + { + DBUG_PRINT("info", ("db: %s name: %s", + (*event_name)->dbname.str, (*event_name)->name.str)); + + Event_db_repository *db_repository= Events::get_db_repository(); + (void) db_repository->update_timing_fields_for_event(thd, + &(*event_name)->dbname, &(*event_name)->name, + last_executed, (ulonglong) status); + } + + DBUG_RETURN(ret); +} + + +/* + Auxiliary function for locking LOCK_event_queue. Used by the + LOCK_QUEUE_DATA macro + + SYNOPSIS + Event_queue::lock_data() + func Which function is requesting mutex lock + line On which line mutex lock is requested +*/ + +void +Event_queue::lock_data(const char *func, uint line) +{ + DBUG_ENTER("Event_queue::lock_data"); + DBUG_PRINT("enter", ("func=%s line=%u", func, line)); + mutex_last_attempted_lock_in_func= func; + mutex_last_attempted_lock_at_line= line; + mutex_queue_data_attempting_lock= TRUE; + mysql_mutex_lock(&LOCK_event_queue); + mutex_last_attempted_lock_in_func= ""; + mutex_last_attempted_lock_at_line= 0; + mutex_queue_data_attempting_lock= FALSE; + + mutex_last_locked_in_func= func; + mutex_last_locked_at_line= line; + mutex_queue_data_locked= TRUE; + + DBUG_VOID_RETURN; +} + + +/* + Auxiliary function for unlocking LOCK_event_queue. Used by the + UNLOCK_QUEUE_DATA macro + + SYNOPSIS + Event_queue::unlock_data() + func Which function is requesting mutex unlock + line On which line mutex unlock is requested +*/ + +void +Event_queue::unlock_data(const char *func, uint line) +{ + DBUG_ENTER("Event_queue::unlock_data"); + DBUG_PRINT("enter", ("func=%s line=%u", func, line)); + mutex_last_unlocked_at_line= line; + mutex_queue_data_locked= FALSE; + mutex_last_unlocked_in_func= func; + mysql_mutex_unlock(&LOCK_event_queue); + DBUG_VOID_RETURN; +} + + +/* + Wrapper for mysql_cond_wait/timedwait + + SYNOPSIS + Event_queue::cond_wait() + thd Thread (Could be NULL during shutdown procedure) + msg Message for thd->proc_info + abstime If not null then call mysql_cond_timedwait() + func Which function is requesting cond_wait + line On which line cond_wait is requested +*/ + +void +Event_queue::cond_wait(THD *thd, struct timespec *abstime, const PSI_stage_info *stage, + const char *src_func, const char *src_file, uint src_line) +{ + DBUG_ENTER("Event_queue::cond_wait"); + waiting_on_cond= TRUE; + mutex_last_unlocked_at_line= src_line; + mutex_queue_data_locked= FALSE; + mutex_last_unlocked_in_func= src_func; + + thd->enter_cond(&COND_queue_state, &LOCK_event_queue, stage, NULL, src_func, src_file, src_line); + + if (!thd->killed) + { + DBUG_PRINT("info", ("pthread_cond_%swait", abstime ? "timed" : "")); + if (!abstime) + mysql_cond_wait(&COND_queue_state, &LOCK_event_queue); + else + mysql_cond_timedwait(&COND_queue_state, &LOCK_event_queue, abstime); + } + + mutex_last_locked_in_func= src_func; + mutex_last_locked_at_line= src_line; + mutex_queue_data_locked= TRUE; + waiting_on_cond= FALSE; + + /* + This will free the lock so we need to relock. Not the best thing to + do but we need to obey cond_wait() + */ + thd->exit_cond(NULL, src_func, src_file, src_line); + lock_data(src_func, src_line); + + DBUG_VOID_RETURN; +} + + +/* + Dumps the internal status of the queue + + SYNOPSIS + Event_queue::dump_internal_status() +*/ + +void +Event_queue::dump_internal_status() +{ + DBUG_ENTER("Event_queue::dump_internal_status"); + + /* element count */ + puts(""); + puts("Event queue status:"); + printf("Element count : %u\n", queue.elements); + printf("Data locked : %s\n", mutex_queue_data_locked? "YES":"NO"); + printf("Attempting lock : %s\n", mutex_queue_data_attempting_lock? "YES":"NO"); + printf("LLA : %s:%u\n", mutex_last_locked_in_func, + mutex_last_locked_at_line); + printf("LUA : %s:%u\n", mutex_last_unlocked_in_func, + mutex_last_unlocked_at_line); + if (mutex_last_attempted_lock_at_line) + printf("Last lock attempt at: %s:%u\n", mutex_last_attempted_lock_in_func, + mutex_last_attempted_lock_at_line); + printf("WOC : %s\n", waiting_on_cond? "YES":"NO"); + + MYSQL_TIME time; + my_tz_OFFSET0->gmt_sec_to_TIME(&time, next_activation_at); + if (time.year != 1970) + printf("Next activation : %04d-%02d-%02d %02d:%02d:%02d\n", + time.year, time.month, time.day, time.hour, time.minute, time.second); + else + printf("Next activation : never"); + + DBUG_VOID_RETURN; +} + +/** + @} (End of group Event_Scheduler) +*/ diff --git a/sql/event_queue.h b/sql/event_queue.h new file mode 100644 index 00000000..2b6a0a59 --- /dev/null +++ b/sql/event_queue.h @@ -0,0 +1,135 @@ +#ifndef _EVENT_QUEUE_H_ +#define _EVENT_QUEUE_H_ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + + @addtogroup Event_Scheduler + @{ + + @file event_queue.h + + Queue of events awaiting execution. +*/ + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_event_queue; +extern PSI_cond_key key_COND_queue_state; +#endif /* HAVE_PSI_INTERFACE */ + +#include "queues.h" // QUEUE +#include "sql_string.h" /* LEX_CSTRING */ +#include "my_time.h" /* my_time_t, interval_type */ + +class Event_basic; +class Event_queue_element; +class Event_queue_element_for_exec; + +class THD; + +/** + Queue of active events awaiting execution. +*/ + +class Event_queue +{ +public: + Event_queue(); + ~Event_queue(); + + bool + init_queue(THD *thd); + + /* Methods for queue management follow */ + + bool + create_event(THD *thd, Event_queue_element *new_element, + bool *created); + + void + update_event(THD *thd, const LEX_CSTRING *dbname, const LEX_CSTRING *name, + Event_queue_element *new_element); + + void + drop_event(THD *thd, const LEX_CSTRING *dbname, const LEX_CSTRING *name); + + void + drop_schema_events(THD *thd, const LEX_CSTRING *schema); + + void + recalculate_activation_times(THD *thd); + + bool + get_top_for_execution_if_time(THD *thd, + Event_queue_element_for_exec **event_name); + + + void + dump_internal_status(); + +private: + void + empty_queue(); + + void + deinit_queue(); + /* helper functions for working with mutexes & conditionals */ + void + lock_data(const char *func, uint line); + + void + unlock_data(const char *func, uint line); + + void + cond_wait(THD *thd, struct timespec *abstime, const PSI_stage_info *stage, + const char *src_func, const char *src_file, uint src_line); + + void + find_n_remove_event(const LEX_CSTRING *db, const LEX_CSTRING *name); + + + void + drop_matching_events(THD *thd, const LEX_CSTRING *pattern, + bool (*)(const LEX_CSTRING*, Event_basic *)); + + + void + dbug_dump_queue(my_time_t now); + + /* LOCK_event_queue is the mutex which protects the access to the queue. */ + mysql_mutex_t LOCK_event_queue; + mysql_cond_t COND_queue_state; + + /* The sorted queue with the Event_queue_element objects */ + QUEUE queue; + + my_time_t next_activation_at; + + uint mutex_last_locked_at_line; + uint mutex_last_unlocked_at_line; + uint mutex_last_attempted_lock_at_line; + const char* mutex_last_locked_in_func; + const char* mutex_last_unlocked_in_func; + const char* mutex_last_attempted_lock_in_func; + bool mutex_queue_data_locked; + bool mutex_queue_data_attempting_lock; + bool waiting_on_cond; +}; +/** + @} (End of group Event_Scheduler) +*/ + +#endif /* _EVENT_QUEUE_H_ */ diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc new file mode 100644 index 00000000..97529bd9 --- /dev/null +++ b/sql/event_scheduler.cc @@ -0,0 +1,842 @@ +/* Copyright (c) 2006, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "event_scheduler.h" +#include "events.h" +#include "event_data_objects.h" +#include "event_queue.h" +#include "event_db_repository.h" +#include "sql_connect.h" // init_new_connection_handler_thread +#include "sql_class.h" + +/** + @addtogroup Event_Scheduler + @{ +*/ + +#ifdef __GNUC__ +#if __GNUC__ >= 2 +#define SCHED_FUNC __FUNCTION__ +#endif +#else +#define SCHED_FUNC "" +#endif + +#define LOCK_DATA() lock_data(SCHED_FUNC, __LINE__) +#define UNLOCK_DATA() unlock_data(SCHED_FUNC, __LINE__) +#define COND_STATE_WAIT(mythd, abstime, stage) \ + cond_wait(mythd, abstime, stage, SCHED_FUNC, __FILE__, __LINE__) + +extern pthread_attr_t connection_attrib; +extern ulong event_executed; + +Event_db_repository *Event_worker_thread::db_repository; + + +static +const LEX_CSTRING scheduler_states_names[] = +{ + { STRING_WITH_LEN("INITIALIZED") }, + { STRING_WITH_LEN("RUNNING") }, + { STRING_WITH_LEN("STOPPING") } +}; + +struct scheduler_param { + THD *thd; + Event_scheduler *scheduler; +}; + + +/* + Prints the stack of infos, warnings, errors from thd to + the console so it can be fetched by the logs-into-tables and + checked later. + + SYNOPSIS + evex_print_warnings + thd Thread used during the execution of the event + et The event itself +*/ + +void +Event_worker_thread::print_warnings(THD *thd, Event_job_data *et) +{ + const Sql_condition *err; + DBUG_ENTER("evex_print_warnings"); + if (thd->get_stmt_da()->is_warning_info_empty()) + DBUG_VOID_RETURN; + + char msg_buf[10 * STRING_BUFFER_USUAL_SIZE]; + char prefix_buf[5 * STRING_BUFFER_USUAL_SIZE]; + String prefix(prefix_buf, sizeof(prefix_buf), system_charset_info); + prefix.length(0); + prefix.append(STRING_WITH_LEN("Event Scheduler: [")); + + prefix.append(et->definer.str, et->definer.length, system_charset_info); + prefix.append(STRING_WITH_LEN("][")); + prefix.append(et->dbname.str, et->dbname.length, system_charset_info); + prefix.append('.'); + prefix.append(et->name.str, et->name.length, system_charset_info); + prefix.append(STRING_WITH_LEN("] ")); + + Diagnostics_area::Sql_condition_iterator it= + thd->get_stmt_da()->sql_conditions(); + while ((err= it++)) + { + String err_msg(msg_buf, sizeof(msg_buf), system_charset_info); + /* set it to 0 or we start adding at the end. That's the trick ;) */ + err_msg.length(0); + err_msg.append(prefix); + err_msg.append(err->get_message_text(), + err->get_message_octet_length(), system_charset_info); + DBUG_ASSERT(err->get_level() < 3); + (sql_print_message_handlers[err->get_level()])("%*s", err_msg.length(), + err_msg.c_ptr_safe()); + } + DBUG_VOID_RETURN; +} + + +/* + Performs post initialization of structures in a new thread. + + SYNOPSIS + post_init_event_thread() + thd Thread + + NOTES + Before this is called, one should not do any DBUG_XXX() calls. + +*/ + +bool +post_init_event_thread(THD *thd) +{ + (void) init_new_connection_handler_thread(); + if (init_thr_lock()) + { + thd->cleanup(); + return TRUE; + } + thd->store_globals(); + return FALSE; +} + + +/* + Cleans up the THD and the threaded environment of the thread. + + SYNOPSIS + deinit_event_thread() + thd Thread +*/ + +void +deinit_event_thread(THD *thd) +{ + thd->proc_info= "Clearing"; + DBUG_PRINT("exit", ("Event thread finishing")); + server_threads.erase(thd); + delete thd; +} + + +/* + Performs pre- mysql_thread_create() initialisation of THD. Do this + in the thread that will pass THD to the child thread. In the + child thread call post_init_event_thread(). + + SYNOPSIS + pre_init_event_thread() + thd The THD of the thread. Has to be allocated by the caller. + + NOTES + 1. The host of the thead is my_localhost + 2. thd->net is initted with NULL - no communication. +*/ + +void +pre_init_event_thread(THD* thd) +{ + THD *orig_thd= current_thd; + DBUG_ENTER("pre_init_event_thread"); + + set_current_thd(thd); + thd->client_capabilities= 0; + thd->security_ctx->master_access= NO_ACL; + thd->security_ctx->db_access= NO_ACL; + thd->security_ctx->host_or_ip= (char*)my_localhost; + my_net_init(&thd->net, NULL, thd, MYF(MY_THREAD_SPECIFIC)); + thd->security_ctx->set_user((char*)"event_scheduler"); + thd->net.read_timeout= slave_net_timeout; + thd->variables.option_bits|= OPTION_AUTO_IS_NULL; + thd->client_capabilities|= CLIENT_MULTI_RESULTS; + server_threads.insert(thd); + + /* + Guarantees that we will see the thread in SHOW PROCESSLIST though its + vio is NULL. + */ + + thd->proc_info= "Initialized"; + thd->set_time(); + + /* Do not use user-supplied timeout value for system threads. */ + thd->variables.lock_wait_timeout= LONG_TIMEOUT; + + set_current_thd(orig_thd); + DBUG_VOID_RETURN; +} + + +/* + Function that executes the scheduler, + + SYNOPSIS + event_scheduler_thread() + arg Pointer to `struct scheduler_param` + + RETURN VALUE + 0 OK +*/ + +pthread_handler_t +event_scheduler_thread(void *arg) +{ + /* needs to be first for thread_stack */ + THD *thd= (THD *) ((struct scheduler_param *) arg)->thd; + Event_scheduler *scheduler= ((struct scheduler_param *) arg)->scheduler; + bool res; + + thd->thread_stack= (char *)&thd; // remember where our stack is + + mysql_thread_set_psi_id(thd->thread_id); + + res= post_init_event_thread(thd); + + DBUG_ENTER("event_scheduler_thread"); + my_free(arg); + if (!res) + scheduler->run(thd); + + deinit_event_thread(thd); + DBUG_LEAVE; // Against gcc warnings + my_thread_end(); + return 0; +} + + +/** + Function that executes an event in a child thread. Setups the + environment for the event execution and cleans after that. + + SYNOPSIS + event_worker_thread() + arg The Event_job_data object to be processed + + RETURN VALUE + 0 OK +*/ + +pthread_handler_t +event_worker_thread(void *arg) +{ + THD *thd; + Event_queue_element_for_exec *event= (Event_queue_element_for_exec *)arg; + + thd= event->thd; + + mysql_thread_set_psi_id(thd->thread_id); + + Event_worker_thread worker_thread; + worker_thread.run(thd, event); + + my_thread_end(); + return 0; // Can't return anything here +} + + +/** + Function that executes an event in a child thread. Setups the + environment for the event execution and cleans after that. + + SYNOPSIS + Event_worker_thread::run() + thd Thread context + event The Event_queue_element_for_exec object to be processed +*/ + +void +Event_worker_thread::run(THD *thd, Event_queue_element_for_exec *event) +{ + /* needs to be first for thread_stack */ + char my_stack; + Event_job_data job_data; + bool res; + + DBUG_ASSERT(thd->m_digest == NULL); + DBUG_ASSERT(thd->m_statement_psi == NULL); + +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_statement_locker_state state; + thd->m_statement_psi= MYSQL_START_STATEMENT(& state, + event->get_psi_info()->m_key, + event->dbname.str, + event->dbname.length, + thd->charset(), NULL); +#endif + + thd->thread_stack= &my_stack; // remember where our stack is + res= post_init_event_thread(thd); + + DBUG_ENTER("Event_worker_thread::run"); + DBUG_PRINT("info", ("Time is %u, THD: %p", (uint)my_time(0), thd)); + + if (res) + goto end; + + if ((res= db_repository->load_named_event(thd, &event->dbname, &event->name, + &job_data))) + { + DBUG_PRINT("error", ("Got error from load_named_event")); + goto end; + } + + thd->enable_slow_log= TRUE; + + res= job_data.execute(thd, event->dropped); + + print_warnings(thd, &job_data); + + if (res) + sql_print_information("Event Scheduler: " + "[%s].[%s.%s] event execution failed.", + job_data.definer.str, + job_data.dbname.str, job_data.name.str); +end: +#ifdef HAVE_PSI_STATEMENT_INTERFACE + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; +#endif + DBUG_ASSERT(thd->m_digest == NULL); + DBUG_PRINT("info", ("Done with Event %s.%s", event->dbname.str, + event->name.str)); + + delete event; + deinit_event_thread(thd); + + DBUG_VOID_RETURN; +} + + +Event_scheduler::Event_scheduler(Event_queue *queue_arg) + :state(INITIALIZED), + scheduler_thd(NULL), + queue(queue_arg), + mutex_last_locked_at_line(0), + mutex_last_unlocked_at_line(0), + mutex_last_locked_in_func("n/a"), + mutex_last_unlocked_in_func("n/a"), + mutex_scheduler_data_locked(FALSE), + waiting_on_cond(FALSE), + started_events(0) +{ + mysql_mutex_init(key_event_scheduler_LOCK_scheduler_state, + &LOCK_scheduler_state, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_event_scheduler_COND_state, &COND_state, NULL); + mysql_mutex_record_order(&LOCK_scheduler_state, &LOCK_global_system_variables); +} + + +Event_scheduler::~Event_scheduler() +{ + stop(); /* does nothing if not running */ + mysql_mutex_destroy(&LOCK_scheduler_state); + mysql_cond_destroy(&COND_state); +} + + +/** + Starts the scheduler (again). Creates a new THD and passes it to + a forked thread. Does not wait for acknowledgement from the new + thread that it has started. Asynchronous starting. Most of the + needed initializations are done in the current thread to minimize + the chance of failure in the spawned thread. + + @param[out] err_no - errno indicating type of error which caused + failure to start scheduler thread. + + @return + @retval false Success. + @retval true Error. +*/ + +bool +Event_scheduler::start(int *err_no) +{ + THD *new_thd= NULL; + bool ret= false; + pthread_t th; + struct scheduler_param *scheduler_param_value; + DBUG_ENTER("Event_scheduler::start"); + + LOCK_DATA(); + DBUG_PRINT("info", ("state before action %s", scheduler_states_names[state].str)); + if (state > INITIALIZED) + goto end; + + if (!(new_thd= new THD(next_thread_id()))) + { + sql_print_error("Event Scheduler: Cannot initialize the scheduler thread"); + ret= true; + goto end; + } + + pre_init_event_thread(new_thd); + new_thd->system_thread= SYSTEM_THREAD_EVENT_SCHEDULER; + new_thd->set_command(COM_DAEMON); + + /* + We should run the event scheduler thread under the super-user privileges. + In particular, this is needed to be able to lock the mysql.event table + for writing when the server is running in the read-only mode. + + Same goes for transaction access mode. Set it to read-write for this thd. + */ + new_thd->security_ctx->master_access |= PRIV_IGNORE_READ_ONLY; + new_thd->variables.tx_read_only= false; + new_thd->tx_read_only= false; + + /* This should not be marked with MY_THREAD_SPECIFIC */ + scheduler_param_value= + (struct scheduler_param *)my_malloc(key_memory_Event_scheduler_scheduler_param, + sizeof(struct scheduler_param), MYF(0)); + scheduler_param_value->thd= new_thd; + scheduler_param_value->scheduler= this; + + scheduler_thd= new_thd; + DBUG_PRINT("info", ("Setting state go RUNNING")); + state= RUNNING; + DBUG_PRINT("info", ("Forking new thread for scheduler. THD: %p", new_thd)); + if ((*err_no= mysql_thread_create(key_thread_event_scheduler, + &th, &connection_attrib, + event_scheduler_thread, + (void*)scheduler_param_value))) + { + DBUG_PRINT("error", ("cannot create a new thread")); + sql_print_error("Event scheduler: Failed to start scheduler," + " Can not create thread for event scheduler (errno=%d)", + *err_no); + + state= INITIALIZED; + scheduler_thd= NULL; + deinit_event_thread(new_thd); + + my_free(scheduler_param_value); + ret= true; + } + +end: + UNLOCK_DATA(); + DBUG_RETURN(ret); +} + + +/* + The main loop of the scheduler. + + SYNOPSIS + Event_scheduler::run() + thd Thread + + RETURN VALUE + FALSE OK + TRUE Error (Serious error) +*/ + +bool +Event_scheduler::run(THD *thd) +{ + int res= FALSE; + DBUG_ENTER("Event_scheduler::run"); + + sql_print_information("Event Scheduler: scheduler thread started with id %lu", + (ulong) thd->thread_id); + /* + Recalculate the values in the queue because there could have been stops + in executions of the scheduler and some times could have passed by. + */ + queue->recalculate_activation_times(thd); + + while (is_running()) + { + Event_queue_element_for_exec *event_name; + + /* Gets a minimized version */ + if (queue->get_top_for_execution_if_time(thd, &event_name)) + { + sql_print_information("Event Scheduler: " + "Serious error during getting next " + "event to execute. Stopping"); + break; + } + + DBUG_PRINT("info", ("get_top_for_execution_if_time returned " + "event_name=%p", event_name)); + if (event_name) + { + if ((res= execute_top(event_name))) + break; + } + else + { + DBUG_ASSERT(thd->killed); + DBUG_PRINT("info", ("job_data is NULL, the thread was killed")); + } + DBUG_PRINT("info", ("state=%s", scheduler_states_names[state].str)); + free_root(thd->mem_root, MYF(0)); + /* Ensure we don't have any open tables or table locks */ + DBUG_ASSERT(thd->lock == 0); + } + + LOCK_DATA(); + scheduler_thd= NULL; + state= INITIALIZED; + DBUG_PRINT("info", ("Broadcasting COND_state back to the stoppers")); + mysql_cond_broadcast(&COND_state); + UNLOCK_DATA(); + + DBUG_RETURN(res); +} + + +/* + Creates a new THD instance and then forks a new thread, while passing + the THD pointer and job_data to it. + + SYNOPSIS + Event_scheduler::execute_top() + + RETURN VALUE + FALSE OK + TRUE Error (Serious error) +*/ + +bool +Event_scheduler::execute_top(Event_queue_element_for_exec *event_name) +{ + THD *new_thd; + pthread_t th; + int res= 0; + DBUG_ENTER("Event_scheduler::execute_top"); + + if (!(new_thd= new THD(next_thread_id()))) + goto error; + + pre_init_event_thread(new_thd); + new_thd->system_thread= SYSTEM_THREAD_EVENT_WORKER; + event_name->thd= new_thd; + DBUG_PRINT("info", ("Event %s@%s ready for start", + event_name->dbname.str, event_name->name.str)); + + /* + TODO: should use thread pool here, preferably with an upper limit + on number of threads: if too many events are scheduled for the + same time, starting all of them at once won't help them run truly + in parallel (because of the great amount of synchronization), so + we may as well execute them in sequence, keeping concurrency at a + reasonable level. + */ + /* Major failure */ + if ((res= mysql_thread_create(key_thread_event_worker, + &th, &connection_attrib, event_worker_thread, + event_name))) + { + mysql_mutex_lock(&LOCK_global_system_variables); + Events::opt_event_scheduler= Events::EVENTS_OFF; + mysql_mutex_unlock(&LOCK_global_system_variables); + + sql_print_error("Event_scheduler::execute_top: Can not create event worker" + " thread (errno=%d). Stopping event scheduler", res); + + deinit_event_thread(new_thd); + goto error; + } + + started_events++; + executed_events++; // For SHOW STATUS + + DBUG_PRINT("info", ("Event is in THD: %p", new_thd)); + DBUG_RETURN(FALSE); + +error: + DBUG_PRINT("error", ("Event_scheduler::execute_top() res: %d", res)); + delete event_name; + DBUG_RETURN(TRUE); +} + + +/* + Checks whether the state of the scheduler is RUNNING + + SYNOPSIS + Event_scheduler::is_running() + + RETURN VALUE + TRUE RUNNING + FALSE Not RUNNING +*/ + +bool +Event_scheduler::is_running() +{ + LOCK_DATA(); + bool ret= (state == RUNNING); + UNLOCK_DATA(); + return ret; +} + + +/** + Stops the scheduler (again). Waits for acknowledgement from the + scheduler that it has stopped - synchronous stopping. + + Already running events will not be stopped. If the user needs + them stopped manual intervention is needed. + + SYNOPSIS + Event_scheduler::stop() + + RETURN VALUE + FALSE OK + TRUE Error (not reported) +*/ + +bool +Event_scheduler::stop() +{ + THD *thd= current_thd; + DBUG_ENTER("Event_scheduler::stop"); + DBUG_PRINT("enter", ("thd: %p", thd)); + + LOCK_DATA(); + DBUG_PRINT("info", ("state before action %s", scheduler_states_names[state].str)); + if (state != RUNNING) + { + /* Synchronously wait until the scheduler stops. */ + while (state != INITIALIZED) + COND_STATE_WAIT(thd, NULL, &stage_waiting_for_scheduler_to_stop); + goto end; + } + + /* Guarantee we don't catch spurious signals */ + do { + DBUG_PRINT("info", ("Waiting for COND_started_or_stopped from " + "the scheduler thread. Current value of state is %s . " + "workers count=%d", scheduler_states_names[state].str, + workers_count())); + /* + NOTE: We don't use kill_one_thread() because it can't kill COM_DEAMON + threads. In addition, kill_one_thread() requires THD but during shutdown + current_thd is NULL. Hence, if kill_one_thread should be used it has to + be modified to kill also daemons, by adding a flag, and also we have to + create artificial THD here. To save all this work, we just do what + kill_one_thread() does to kill a thread. See also sql_repl.cc for similar + usage. + */ + + state= STOPPING; + DBUG_PRINT("info", ("Scheduler thread has id %lu", + (ulong) scheduler_thd->thread_id)); + /* This will wake up the thread if it waits on Queue's conditional */ + sql_print_information("Event Scheduler: Killing the scheduler thread, " + "thread id %lu", + (ulong) scheduler_thd->thread_id); + scheduler_thd->awake(KILL_CONNECTION); + + /* thd could be 0x0, when shutting down */ + sql_print_information("Event Scheduler: " + "Waiting for the scheduler thread to reply"); + + /* + Wait only 2 seconds, as there is a small chance the thread missed the + above awake() call and we may have to do it again + */ + struct timespec top_time; + set_timespec(top_time, 2); + COND_STATE_WAIT(thd, &top_time, &stage_waiting_for_scheduler_to_stop); + } while (state == STOPPING); + DBUG_PRINT("info", ("Scheduler thread has cleaned up. Set state to INIT")); + sql_print_information("Event Scheduler: Stopped"); +end: + UNLOCK_DATA(); + DBUG_RETURN(FALSE); +} + + +/* + Returns the number of living event worker threads. + + SYNOPSIS + Event_scheduler::workers_count() +*/ + +static my_bool workers_count_callback(THD *thd, uint32_t *count) +{ + if (thd->system_thread == SYSTEM_THREAD_EVENT_WORKER) + ++*count; + return 0; +} + + +uint +Event_scheduler::workers_count() +{ + uint32_t count= 0; + DBUG_ENTER("Event_scheduler::workers_count"); + server_threads.iterate(workers_count_callback, &count); + DBUG_RETURN(count); +} + + +/* + Auxiliary function for locking LOCK_scheduler_state. Used + by the LOCK_DATA macro. + + SYNOPSIS + Event_scheduler::lock_data() + func Which function is requesting mutex lock + line On which line mutex lock is requested +*/ + +void +Event_scheduler::lock_data(const char *func, uint line) +{ + DBUG_ENTER("Event_scheduler::lock_data"); + DBUG_PRINT("enter", ("func=%s line=%u", func, line)); + mysql_mutex_lock(&LOCK_scheduler_state); + mutex_last_locked_in_func= func; + mutex_last_locked_at_line= line; + mutex_scheduler_data_locked= TRUE; + DBUG_VOID_RETURN; +} + + +/* + Auxiliary function for unlocking LOCK_scheduler_state. Used + by the UNLOCK_DATA macro. + + SYNOPSIS + Event_scheduler::unlock_data() + func Which function is requesting mutex unlock + line On which line mutex unlock is requested +*/ + +void +Event_scheduler::unlock_data(const char *func, uint line) +{ + DBUG_ENTER("Event_scheduler::unlock_data"); + DBUG_PRINT("enter", ("func=%s line=%u", func, line)); + mutex_last_unlocked_at_line= line; + mutex_scheduler_data_locked= FALSE; + mutex_last_unlocked_in_func= func; + mysql_mutex_unlock(&LOCK_scheduler_state); + DBUG_VOID_RETURN; +} + + +/* + Wrapper for mysql_cond_wait/timedwait + + SYNOPSIS + Event_scheduler::cond_wait() + thd Thread (Could be NULL during shutdown procedure) + abstime If not null then call mysql_cond_timedwait() + msg Message for thd->proc_info + func Which function is requesting cond_wait + line On which line cond_wait is requested +*/ + +void +Event_scheduler::cond_wait(THD *thd, struct timespec *abstime, const PSI_stage_info *stage, + const char *src_func, const char *src_file, uint src_line) +{ + DBUG_ENTER("Event_scheduler::cond_wait"); + waiting_on_cond= TRUE; + mutex_last_unlocked_at_line= src_line; + mutex_scheduler_data_locked= FALSE; + mutex_last_unlocked_in_func= src_func; + if (thd) + thd->enter_cond(&COND_state, &LOCK_scheduler_state, stage, + NULL, src_func, src_file, src_line); + + DBUG_PRINT("info", ("mysql_cond_%swait", abstime? "timed":"")); + if (!abstime) + mysql_cond_wait(&COND_state, &LOCK_scheduler_state); + else + mysql_cond_timedwait(&COND_state, &LOCK_scheduler_state, abstime); + if (thd) + { + /* + This will free the lock so we need to relock. Not the best thing to + do but we need to obey cond_wait() + */ + thd->exit_cond(NULL, src_func, src_file, src_line); + LOCK_DATA(); + } + mutex_last_locked_in_func= src_func; + mutex_last_locked_at_line= src_line; + mutex_scheduler_data_locked= TRUE; + waiting_on_cond= FALSE; + DBUG_VOID_RETURN; +} + + +/* + Dumps the internal status of the scheduler + + SYNOPSIS + Event_scheduler::dump_internal_status() +*/ + +void +Event_scheduler::dump_internal_status() +{ + DBUG_ENTER("Event_scheduler::dump_internal_status"); + + puts(""); + puts("Event scheduler status:"); + printf("State : %s\n", scheduler_states_names[state].str); + printf("Thread id : %lu\n", scheduler_thd ? + (ulong) scheduler_thd->thread_id : (ulong) 0); + printf("LLA : %s:%u\n", mutex_last_locked_in_func, + mutex_last_locked_at_line); + printf("LUA : %s:%u\n", mutex_last_unlocked_in_func, + mutex_last_unlocked_at_line); + printf("WOC : %s\n", waiting_on_cond? "YES":"NO"); + printf("Workers : %u\n", workers_count()); + printf("Executed : %lu\n", (ulong) started_events); + printf("Data locked: %s\n", mutex_scheduler_data_locked ? "YES":"NO"); + + DBUG_VOID_RETURN; +} + +/** + @} (End of group Event_Scheduler) +*/ diff --git a/sql/event_scheduler.h b/sql/event_scheduler.h new file mode 100644 index 00000000..59ac923c --- /dev/null +++ b/sql/event_scheduler.h @@ -0,0 +1,157 @@ +#ifndef _EVENT_SCHEDULER_H_ +#define _EVENT_SCHEDULER_H_ +/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @addtogroup Event_Scheduler + @{ +*/ +/** + @file + + Declarations of the scheduler thread class + and related functionality. + + This file is internal to Event_Scheduler module. Please do not + include it directly. All public declarations of Event_Scheduler + module are in events.h and event_data_objects.h. +*/ + + +class Event_queue; +class Event_job_data; +class Event_db_repository; +class Event_queue_element_for_exec; +class Events; +class THD; + +void +pre_init_event_thread(THD* thd); + +bool +post_init_event_thread(THD* thd); + +void +deinit_event_thread(THD *thd); + + +class Event_worker_thread +{ +public: + static void + init(Event_db_repository *db_repository_arg) + { + db_repository= db_repository_arg; + } + + void + run(THD *thd, Event_queue_element_for_exec *event); + +private: + void + print_warnings(THD *thd, Event_job_data *et); + + static Event_db_repository *db_repository; +}; + + +class Event_scheduler +{ +public: + Event_scheduler(Event_queue *event_queue_arg); + ~Event_scheduler(); + + + /* State changing methods follow */ + + bool + start(int *err_no); + + bool + stop(); + + /* + Need to be public because has to be called from the function + passed to pthread_create. + */ + bool + run(THD *thd); + + + /* Information retrieving methods follow */ + bool + is_running(); + + void + dump_internal_status(); + +private: + uint + workers_count(); + + /* helper functions */ + bool + execute_top(Event_queue_element_for_exec *event_name); + + /* helper functions for working with mutexes & conditionals */ + void + lock_data(const char *func, uint line); + + void + unlock_data(const char *func, uint line); + + void + cond_wait(THD *thd, struct timespec *abstime, const PSI_stage_info *stage, + const char *src_func, const char *src_file, uint src_line); + + mysql_mutex_t LOCK_scheduler_state; + + enum enum_state + { + INITIALIZED = 0, + RUNNING, + STOPPING + }; + + /* This is the current status of the life-cycle of the scheduler. */ + enum enum_state state; + + THD *scheduler_thd; + + mysql_cond_t COND_state; + + Event_queue *queue; + + uint mutex_last_locked_at_line; + uint mutex_last_unlocked_at_line; + const char* mutex_last_locked_in_func; + const char* mutex_last_unlocked_in_func; + bool mutex_scheduler_data_locked; + bool waiting_on_cond; + + ulonglong started_events; + +private: + /* Prevent use of these */ + Event_scheduler(const Event_scheduler &); + void operator=(Event_scheduler &); +}; + +/** + @} (End of group Event_Scheduler) +*/ + +#endif /* _EVENT_SCHEDULER_H_ */ diff --git a/sql/events.cc b/sql/events.cc new file mode 100644 index 00000000..64ff2642 --- /dev/null +++ b/sql/events.cc @@ -0,0 +1,1327 @@ +/* + Copyright (c) 2005, 2013, Oracle and/or its affiliates. + Copyright (c) 2017, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_parse.h" // check_access +#include "sql_base.h" // close_mysql_tables +#include "sql_show.h" // append_definer +#include "events.h" +#include "sql_db.h" // check_db_dir_existence +#include "sql_table.h" // write_bin_log +#include "tztime.h" // struct Time_zone +#include "sql_acl.h" // EVENT_ACL +#include "records.h" // init_read_record, end_read_record +#include "event_data_objects.h" +#include "event_db_repository.h" +#include "event_queue.h" +#include "event_scheduler.h" +#include "sp_head.h" // for Stored_program_creation_ctx +#include "set_var.h" +#include "lock.h" // lock_object_name +#include "mysql/psi/mysql_sp.h" +#include "wsrep_mysqld.h" + +/** + @addtogroup Event_Scheduler + @{ +*/ + +/* + TODO list : + - CREATE EVENT should not go into binary log! Does it now? The SQL statements + issued by the EVENT are replicated. + I have an idea how to solve the problem at failover. So the status field + will be ENUM('DISABLED', 'ENABLED', 'SLAVESIDE_DISABLED'). + In this case when CREATE EVENT is replicated it should go into the binary + as SLAVESIDE_DISABLED if it is ENABLED, when it's created as DISABLEd it + should be replicated as disabled. If an event is ALTERed as DISABLED the + query should go untouched into the binary log, when ALTERed as enable then + it should go as SLAVESIDE_DISABLED. This is regarding the SQL interface. + TT routines however modify mysql.event internally and this does not go the + log so in this case queries has to be injected into the log...somehow... or + maybe a solution is RBR for this case, because the event may go only from + ENABLED to DISABLED status change and this is safe for replicating. As well + an event may be deleted which is also safe for RBR. + + - Add logging to file + +*/ + + +/* + If the user (un)intentionally removes an event directly from mysql.event + the following sequence has to be used to be able to remove the in-memory + counterpart. + 1. CREATE EVENT the_name ON SCHEDULE EVERY 1 SECOND DISABLE DO SELECT 1; + 2. DROP EVENT the_name + + In other words, the first one will create a row in mysql.event . In the + second step because there will be a line, disk based drop will pass and + the scheduler will remove the memory counterpart. The reason is that + in-memory queue does not check whether the event we try to drop from memory + is disabled. Disabled events are not kept in-memory because they are not + eligible for execution. +*/ + +Event_queue *Events::event_queue; +Event_scheduler *Events::scheduler; +Event_db_repository *Events::db_repository; +ulong Events::opt_event_scheduler= Events::EVENTS_OFF; +ulong Events::startup_state= Events::EVENTS_OFF; +ulong Events::inited; + + +/* + Compares 2 LEX strings regarding case. + + SYNOPSIS + sortcmp_lex_string() + s First LEX_STRING + t Second LEX_STRING + cs Charset + + RETURN VALUE + -1 s < t + 0 s == t + 1 s > t +*/ + +int sortcmp_lex_string(const LEX_CSTRING *s, const LEX_CSTRING *t, + const CHARSET_INFO *cs) +{ + return cs->strnncollsp(s->str, s->length, + t->str, t->length); +} + + +/** + Push an error into the error stack if the system tables are + not up to date. +*/ + +bool Events::check_if_system_tables_error() +{ + DBUG_ENTER("Events::check_if_system_tables_error"); + + if (!inited) + { + my_error(ER_EVENTS_DB_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/** + Reconstructs interval expression from interval type and expression + value that is in form of a value of the smallest entity: + For + YEAR_MONTH - expression is in months + DAY_MINUTE - expression is in minutes + + SYNOPSIS + Events::reconstruct_interval_expression() + buf Preallocated String buffer to add the value to + interval The interval type (for instance YEAR_MONTH) + expression The value in the lowest entity + + RETURN VALUE + 0 OK + 1 Error +*/ + +int +Events::reconstruct_interval_expression(String *buf, interval_type interval, + longlong expression) +{ + ulonglong expr= expression; + char tmp_buff[128], *end; + bool close_quote= TRUE; + int multipl= 0; + char separator=':'; + + switch (interval) { + case INTERVAL_YEAR_MONTH: + multipl= 12; + separator= '-'; + goto common_1_lev_code; + case INTERVAL_DAY_HOUR: + multipl= 24; + separator= ' '; + goto common_1_lev_code; + case INTERVAL_HOUR_MINUTE: + case INTERVAL_MINUTE_SECOND: + multipl= 60; +common_1_lev_code: + buf->append('\''); + end= longlong10_to_str(expression/multipl, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff)); + expr= expr - (expr/multipl)*multipl; + break; + case INTERVAL_DAY_MINUTE: + { + ulonglong tmp_expr= expr; + + tmp_expr/=(24*60); + buf->append('\''); + end= longlong10_to_str(tmp_expr, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// days + buf->append(' '); + + tmp_expr= expr - tmp_expr*(24*60);//minutes left + end= longlong10_to_str(tmp_expr/60, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// hours + + expr= tmp_expr - (tmp_expr/60)*60; + /* the code after the switch will finish */ + break; + } + case INTERVAL_HOUR_SECOND: + { + ulonglong tmp_expr= expr; + + buf->append('\''); + end= longlong10_to_str(tmp_expr/3600, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// hours + buf->append(':'); + + tmp_expr= tmp_expr - (tmp_expr/3600)*3600; + end= longlong10_to_str(tmp_expr/60, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// minutes + + expr= tmp_expr - (tmp_expr/60)*60; + /* the code after the switch will finish */ + break; + } + case INTERVAL_DAY_SECOND: + { + ulonglong tmp_expr= expr; + + tmp_expr/=(24*3600); + buf->append('\''); + end= longlong10_to_str(tmp_expr, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// days + buf->append(' '); + + tmp_expr= expr - tmp_expr*(24*3600);//seconds left + end= longlong10_to_str(tmp_expr/3600, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// hours + buf->append(':'); + + tmp_expr= tmp_expr - (tmp_expr/3600)*3600; + end= longlong10_to_str(tmp_expr/60, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// minutes + + expr= tmp_expr - (tmp_expr/60)*60; + /* the code after the switch will finish */ + break; + } + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "MICROSECOND"); + return 1; + case INTERVAL_QUARTER: + expr/= 3; + close_quote= FALSE; + break; + case INTERVAL_WEEK: + expr/= 7; + close_quote= FALSE; + break; + default: + close_quote= FALSE; + break; + } + if (close_quote) + buf->append(separator); + end= longlong10_to_str(expr, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff)); + if (close_quote) + buf->append('\''); + + return 0; +} + + +/** + Create a new query string for removing executable comments + for avoiding leak and keeping consistency of the execution + on master and slave. + + @param[in] thd Thread handler + @param[in] buf Query string + + @return + 0 ok + 1 error +*/ +static int +create_query_string(THD *thd, String *buf) +{ + buf->length(0); + /* Append the "CREATE" part of the query */ + if (thd->lex->create_info.or_replace()) + { + if (buf->append(STRING_WITH_LEN("CREATE OR REPLACE "))) + return 1; + } + else if (buf->append(STRING_WITH_LEN("CREATE "))) + return 1; + /* Append definer */ + append_definer(thd, buf, &(thd->lex->definer->user), &(thd->lex->definer->host)); + /* Append the left part of thd->query after "DEFINER" part */ + if (buf->append(thd->lex->stmt_definition_begin, + thd->lex->stmt_definition_end - + thd->lex->stmt_definition_begin)) + return 1; + + return 0; +} + + +/** + Create a new event. + + @param[in,out] thd THD + @param[in] parse_data Event's data from parsing stage + + In case there is an event with the same name (db) and + IF NOT EXISTS is specified, an warning is put into the stack. + @sa Events::drop_event for the notes about locking, pre-locking + and Events DDL. + + @retval FALSE OK + @retval TRUE Error (reported) +*/ + +bool +Events::create_event(THD *thd, Event_parse_data *parse_data) +{ + bool ret; + bool event_already_exists; + enum_binlog_format save_binlog_format; + DBUG_ENTER("Events::create_event"); + + if (unlikely(check_if_system_tables_error())) + DBUG_RETURN(TRUE); + + /* + Perform semantic checks outside of Event_db_repository: + once CREATE EVENT is supported in prepared statements, the + checks will be moved to PREPARE phase. + */ + if (parse_data->check_parse_data(thd)) + DBUG_RETURN(TRUE); + + /* At create, one of them must be set */ + DBUG_ASSERT(parse_data->expression || parse_data->execute_at); + + if (check_access(thd, EVENT_ACL, parse_data->dbname.str, NULL, NULL, 0, 0)) + DBUG_RETURN(TRUE); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (lock_object_name(thd, MDL_key::EVENT, + parse_data->dbname.str, parse_data->name.str)) + DBUG_RETURN(TRUE); + + if (check_db_dir_existence(parse_data->dbname.str)) + { + my_error(ER_BAD_DB_ERROR, MYF(0), parse_data->dbname.str); + DBUG_RETURN(TRUE); + } + + if (parse_data->do_not_create) + DBUG_RETURN(FALSE); + /* + Turn off row binlogging of this statement and use statement-based + so that all supporting tables are updated for CREATE EVENT command. + */ + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + if (thd->lex->create_info.or_replace() && event_queue) + event_queue->drop_event(thd, &parse_data->dbname, &parse_data->name); + + /* On error conditions my_error() is called so no need to handle here */ + if (!(ret= db_repository->create_event(thd, parse_data, + &event_already_exists))) + { + Event_queue_element *new_element; + bool dropped= 0; + + if (!event_already_exists) + { + if (!(new_element= new Event_queue_element())) + ret= TRUE; // OOM + else if ((ret= db_repository->load_named_event(thd, &parse_data->dbname, + &parse_data->name, + new_element))) + { + if (!db_repository->drop_event(thd, &parse_data->dbname, + &parse_data->name, TRUE)) + dropped= 1; + delete new_element; + } + else + { + /* TODO: do not ignore the out parameter and a possible OOM error! */ + bool created; + if (event_queue) + event_queue->create_event(thd, new_element, &created); + } + } + /* + binlog the create event unless it's been successfully dropped + */ + if (!dropped) + { + /* Binlog the create event. */ + DBUG_ASSERT(thd->query() && thd->query_length()); + char buffer[1024]; + String log_query(buffer, sizeof(buffer), &my_charset_bin); + if (create_query_string(thd, &log_query)) + { + my_message_sql(ER_STARTUP, + "Event Error: An error occurred while creating query " + "string, before writing it into binary log.", + MYF(ME_ERROR_LOG)); + ret= true; + } + else + { + /* + If the definer is not set or set to CURRENT_USER, the value + of CURRENT_USER will be written into the binary log as the + definer for the SQL thread. + */ + ret= write_bin_log(thd, TRUE, log_query.ptr(), log_query.length()); + } + } + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + if (!ret && Events::opt_event_scheduler == Events::EVENTS_OFF) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Event scheduler is switched off, use SET GLOBAL event_scheduler=ON to enable it."); + } + + DBUG_RETURN(ret); +#ifdef WITH_WSREP +wsrep_error_label: + DBUG_RETURN(true); +#endif +} + + +/** + Alter an event. + + @param[in,out] thd THD + @param[in] parse_data Event's data from parsing stage + @param[in] new_dbname A new schema name for the event. Set in the case of + ALTER EVENT RENAME, otherwise is NULL. + @param[in] new_name A new name for the event. Set in the case of + ALTER EVENT RENAME + + Parameter 'et' contains data about dbname and event name. + Parameter 'new_name' is the new name of the event, if not null + this means that RENAME TO was specified in the query + @sa Events::drop_event for the locking notes. + + @retval FALSE OK + @retval TRUE error (reported) +*/ + +bool +Events::update_event(THD *thd, Event_parse_data *parse_data, + LEX_CSTRING *new_dbname, LEX_CSTRING *new_name) +{ + int ret; + enum_binlog_format save_binlog_format; + Event_queue_element *new_element; + + DBUG_ENTER("Events::update_event"); + + if (unlikely(check_if_system_tables_error())) + DBUG_RETURN(TRUE); + + if (parse_data->check_parse_data(thd) || parse_data->do_not_create) + DBUG_RETURN(TRUE); + + if (check_access(thd, EVENT_ACL, parse_data->dbname.str, NULL, NULL, 0, 0)) + DBUG_RETURN(TRUE); + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (lock_object_name(thd, MDL_key::EVENT, + parse_data->dbname.str, parse_data->name.str)) + DBUG_RETURN(TRUE); + + if (check_db_dir_existence(parse_data->dbname.str)) + { + my_error(ER_BAD_DB_ERROR, MYF(0), parse_data->dbname.str); + DBUG_RETURN(TRUE); + } + + + if (new_dbname) /* It's a rename */ + { + /* Check that the new and the old names differ. */ + if ( !sortcmp_lex_string(&parse_data->dbname, new_dbname, + system_charset_info) && + !sortcmp_lex_string(&parse_data->name, new_name, + system_charset_info)) + { + my_error(ER_EVENT_SAME_NAME, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* + And the user has sufficient privileges to use the target database. + Do it before checking whether the database exists: we don't want + to tell the user that a database doesn't exist if they can not + access it. + */ + if (check_access(thd, EVENT_ACL, new_dbname->str, NULL, NULL, 0, 0)) + DBUG_RETURN(TRUE); + + /* + Acquire mdl exclusive lock on target database name. + */ + if (lock_object_name(thd, MDL_key::EVENT, + new_dbname->str, new_name->str)) + DBUG_RETURN(TRUE); + + /* Check that the target database exists */ + if (check_db_dir_existence(new_dbname->str)) + { + my_error(ER_BAD_DB_ERROR, MYF(0), new_dbname->str); + DBUG_RETURN(TRUE); + } + } + + /* + Turn off row binlogging of this statement and use statement-based + so that all supporting tables are updated for UPDATE EVENT command. + */ + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* On error conditions my_error() is called so no need to handle here */ + if (!(ret= db_repository->update_event(thd, parse_data, + new_dbname, new_name))) + { + LEX_CSTRING dbname= new_dbname ? *new_dbname : parse_data->dbname; + LEX_CSTRING name= new_name ? *new_name : parse_data->name; + + if (!(new_element= new Event_queue_element())) + ret= TRUE; // OOM + else if ((ret= db_repository->load_named_event(thd, &dbname, &name, + new_element))) + delete new_element; + else + { + /* + TODO: check if an update actually has inserted an entry + into the queue. + If not, and the element is ON COMPLETION NOT PRESERVE, delete + it right away. + */ + if (event_queue) + event_queue->update_event(thd, &parse_data->dbname, &parse_data->name, + new_element); + /* Binlog the alter event. */ + DBUG_ASSERT(thd->query() && thd->query_length()); + ret= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + } + } + + thd->restore_stmt_binlog_format(save_binlog_format); + DBUG_RETURN(ret); +#ifdef WITH_WSREP +wsrep_error_label: + DBUG_RETURN(true); +#endif +} + + +/** + Drops an event + + @param[in,out] thd THD + @param[in] dbname Event's schema + @param[in] name Event's name + @param[in] if_exists When this is set and the event does not exist + a warning is pushed into the warning stack. + Otherwise the operation produces an error. + + @note Similarly to DROP PROCEDURE, we do not allow DROP EVENT + under LOCK TABLES mode, unless table mysql.event is locked. To + ensure that, we do not reset & backup the open tables state in + this function - if in LOCK TABLES or pre-locking mode, this will + lead to an error 'Table mysql.event is not locked with LOCK + TABLES' unless it _is_ locked. In pre-locked mode there is + another barrier - DROP EVENT commits the current transaction, + and COMMIT/ROLLBACK is not allowed in stored functions and + triggers. + + @retval FALSE OK + @retval TRUE Error (reported) +*/ + +bool +Events::drop_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name, bool if_exists) +{ + int ret; + enum_binlog_format save_binlog_format; + DBUG_ENTER("Events::drop_event"); + + if (unlikely(check_if_system_tables_error())) + DBUG_RETURN(TRUE); + + if (check_access(thd, EVENT_ACL, dbname->str, NULL, NULL, 0, 0)) + DBUG_RETURN(TRUE); + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* + Turn off row binlogging of this statement and use statement-based so + that all supporting tables are updated for DROP EVENT command. + */ + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + if (lock_object_name(thd, MDL_key::EVENT, + dbname->str, name->str)) + DBUG_RETURN(TRUE); + /* On error conditions my_error() is called so no need to handle here */ + if (!(ret= db_repository->drop_event(thd, dbname, name, if_exists))) + { + if (event_queue) + event_queue->drop_event(thd, dbname, name); + /* Binlog the drop event. */ + DBUG_ASSERT(thd->query() && thd->query_length()); + ret= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + /* Drop statistics for this stored program from performance schema. */ + MYSQL_DROP_SP(SP_TYPE_EVENT, + dbname->str, static_cast(dbname->length), name->str, static_cast(name->length)); + } + + thd->restore_stmt_binlog_format(save_binlog_format); + DBUG_RETURN(ret); +#ifdef WITH_WSREP +wsrep_error_label: + DBUG_RETURN(true); +#endif +} + + +/** + Drops all events from a schema + + @note We allow to drop all events in a schema even if the + scheduler is disabled. This is to not produce any warnings + in case of DROP DATABASE and a disabled scheduler. + + @param[in,out] thd Thread + @param[in] db ASCIIZ schema name +*/ + +void +Events::drop_schema_events(THD *thd, const char *db) +{ + const LEX_CSTRING db_lex= { db, strlen(db) }; + + DBUG_ENTER("Events::drop_schema_events"); + DBUG_PRINT("enter", ("dropping events from %s", db)); + + DBUG_SLOW_ASSERT(ok_for_lower_case_names(db)); + + /* + Sic: no check if the scheduler is disabled or system tables + are damaged, as intended. + */ + if (event_queue) + event_queue->drop_schema_events(thd, &db_lex); + if (db_repository) + db_repository->drop_schema_events(thd, &db_lex); + else + { + if ((db_repository= new Event_db_repository)) + { + db_repository->drop_schema_events(thd, &db_lex); + delete db_repository; + db_repository= 0; + } + } + DBUG_VOID_RETURN; +} + + +/** + A helper function to generate SHOW CREATE EVENT output from + a named event +*/ + +static bool +send_show_create_event(THD *thd, Event_timed *et, Protocol *protocol) +{ + char show_str_buf[10 * STRING_BUFFER_USUAL_SIZE]; + String show_str(show_str_buf, sizeof(show_str_buf), system_charset_info); + List field_list; + LEX_CSTRING sql_mode; + const String *tz_name; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("send_show_create_event"); + + show_str.length(0); + if (et->get_create_event(thd, &show_str)) + DBUG_RETURN(TRUE); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Event", NAME_CHAR_LEN), + mem_root); + + if (sql_mode_string_representation(thd, et->sql_mode, &sql_mode)) + DBUG_RETURN(TRUE); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "sql_mode", + (uint) sql_mode.length), mem_root); + + tz_name= et->time_zone->get_name(); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "time_zone", tz_name->length()), + mem_root); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Create Event", + show_str.length()), mem_root); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "character_set_client", + MY_CS_CHARACTER_SET_NAME_SIZE), + mem_root); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "collation_connection", + MY_CS_COLLATION_NAME_SIZE), mem_root); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Database Collation", + MY_CS_COLLATION_NAME_SIZE), mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + protocol->prepare_for_resend(); + + protocol->store(et->name.str, et->name.length, system_charset_info); + protocol->store(sql_mode.str, sql_mode.length, system_charset_info); + protocol->store(tz_name->ptr(), tz_name->length(), system_charset_info); + protocol->store(show_str.ptr(), show_str.length(), + et->creation_ctx->get_client_cs()); + protocol->store(&et->creation_ctx->get_client_cs()->cs_name, + system_charset_info); + protocol->store(&et->creation_ctx->get_connection_cl()->coll_name, + system_charset_info); + protocol->store(&et->creation_ctx->get_db_cl()->coll_name, + system_charset_info); + + if (protocol->write()) + DBUG_RETURN(TRUE); + + my_eof(thd); + + DBUG_RETURN(FALSE); +} + + +/** + Implement SHOW CREATE EVENT statement + + thd Thread context + spn The name of the event (db, name) + + @retval FALSE OK + @retval TRUE error (reported) +*/ + +bool +Events::show_create_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name) +{ + Event_timed et; + bool ret; + + DBUG_ENTER("Events::show_create_event"); + DBUG_PRINT("enter", ("name: %s@%s", dbname->str, name->str)); + + if (unlikely(check_if_system_tables_error())) + DBUG_RETURN(TRUE); + + if (check_access(thd, EVENT_ACL, dbname->str, NULL, NULL, 0, 0)) + DBUG_RETURN(TRUE); + + /* + We would like to allow SHOW CREATE EVENT under LOCK TABLES and + in pre-locked mode. mysql.event table is marked as a system table. + This flag reduces the set of its participation scenarios in LOCK TABLES + operation, and therefore an out-of-bound open of this table + for reading like the one below (sic, only for reading) is + more or less deadlock-free. For additional information about when a + deadlock can occur please refer to the description of 'system table' + flag. + */ + ret= db_repository->load_named_event(thd, dbname, name, &et); + + if (!ret) + ret= send_show_create_event(thd, &et, thd->protocol); + + DBUG_RETURN(ret); +} + + +/** + Check access rights and fill INFORMATION_SCHEMA.events table. + + @param[in,out] thd Thread context + @param[in] tables The temporary table to fill. + + In MySQL INFORMATION_SCHEMA tables are temporary tables that are + created and filled on demand. In this function, we fill + INFORMATION_SCHEMA.events. It is a callback for I_S module, invoked from + sql_show.cc + + @return Has to be integer, as such is the requirement of the I_S API + @retval 0 success + @retval 1 an error, pushed into the error stack +*/ + +int +Events::fill_schema_events(THD *thd, TABLE_LIST *tables, COND * /* cond */) +{ + const char *db= NULL; + int ret; + char db_tmp[SAFE_NAME_LEN]; + DBUG_ENTER("Events::fill_schema_events"); + + /* + If we didn't start events because of --skip-grant-tables, return an + empty set + */ + if (opt_noacl) + DBUG_RETURN(0); + + if (unlikely(check_if_system_tables_error())) + DBUG_RETURN(1); + + /* + If it's SHOW EVENTS then thd->lex->select_lex.db is guaranteed not to + be NULL. Let's do an assert anyway. + */ + if (thd->lex->sql_command == SQLCOM_SHOW_EVENTS) + { + LEX_CSTRING *lexdb= &thd->lex->first_select_lex()->db; + DBUG_ASSERT(lexdb); + if (!is_infoschema_db(lexdb) && !is_perfschema_db(lexdb) && + check_access(thd, EVENT_ACL, lexdb->str, NULL, NULL, 0, 0)) + DBUG_RETURN(1); + db= normalize_db_name(lexdb->str, db_tmp, sizeof(db_tmp)); + } + ret= db_repository->fill_schema_events(thd, tables, db); + + DBUG_RETURN(ret); +} + + +/** + Initializes the scheduler's structures. + + @param THD or null (if called by init) + @param opt_noacl_or_bootstrap + TRUE if there is --skip-grant-tables or --bootstrap + option. In that case we disable the event scheduler. + + @note This function is not synchronized. + + @retval FALSE Perhaps there was an error, and the event scheduler + is disabled. But the error is not fatal and the + server start up can continue. + @retval TRUE Fatal error. Startup must terminate (call unireg_abort()). +*/ + +bool +Events::init(THD *thd, bool opt_noacl_or_bootstrap) +{ + int err_no; + bool res= FALSE; + bool had_thd= thd != 0; + DBUG_ENTER("Events::init"); + + DBUG_ASSERT(inited == 0); + + /* + Was disabled explicitly from the command line + */ + if (opt_event_scheduler == Events::EVENTS_DISABLED || + opt_noacl_or_bootstrap) + DBUG_RETURN(FALSE); + + /* We need a temporary THD during boot */ + if (!thd) + { + + if (!(thd= new THD(0))) + { + res= TRUE; + goto end; + } + /* + The thread stack does not start from this function but we cannot + guess the real value. So better some value that doesn't assert than + no value. + */ + thd->thread_stack= (char*) &thd; + thd->store_globals(); + /* + Set current time for the thread that handles events. + Current time is stored in data member start_time of THD class. + Subsequently, this value is used to check whether event was expired + when make loading events from storage. Check for event expiration time + is done at Event_queue_element::compute_next_execution_time() where + event's status set to Event_parse_data::DISABLED and dropped flag set + to true if event was expired. + */ + thd->set_time(); + } + + /* + We will need Event_db_repository anyway, even if the scheduler is + disabled - to perform events DDL. + */ + DBUG_ASSERT(db_repository == 0); + + if (!(db_repository= new Event_db_repository)) + { + res= TRUE; /* fatal error: request unireg_abort */ + goto end; + } + + /* + Since we allow event DDL even if the scheduler is disabled, + check the system tables, as we might need them. + + If run with --skip-grant-tables or --bootstrap, don't try to do the + check of system tables and don't complain: in these modes the tables + are most likely not there and we're going to disable the event + scheduler anyway. + */ + if (Event_db_repository::check_system_tables(thd)) + { + delete db_repository; + db_repository= 0; + my_message(ER_STARTUP, + "Event Scheduler: An error occurred when initializing " + "system tables. Disabling the Event Scheduler.", + MYF(ME_ERROR_LOG)); + /* Disable the scheduler since the system tables are not up to date */ + opt_event_scheduler= EVENTS_OFF; + goto end; + } + + + DBUG_ASSERT(opt_event_scheduler == Events::EVENTS_ON || + opt_event_scheduler == Events::EVENTS_OFF); + + if (!(event_queue= new Event_queue) || + !(scheduler= new Event_scheduler(event_queue))) + { + res= TRUE; /* fatal error: request unireg_abort */ + goto end; + } + + if (event_queue->init_queue(thd) || load_events_from_db(thd) || + (opt_event_scheduler == EVENTS_ON && scheduler->start(&err_no))) + { + my_message_sql(ER_STARTUP, + "Event Scheduler: Error while loading from mysql.event table.", + MYF(ME_ERROR_LOG)); + res= TRUE; /* fatal error: request unireg_abort */ + goto end; + } + Event_worker_thread::init(db_repository); + inited= 1; + +end: + if (res) + deinit(); + if (!had_thd) + delete thd; + + DBUG_RETURN(res); +} + +/* + Cleans up scheduler's resources. Called at server shutdown. + + SYNOPSIS + Events::deinit() + + NOTES + This function is not synchronized. +*/ + +void +Events::deinit() +{ + DBUG_ENTER("Events::deinit"); + + delete scheduler; + scheduler= NULL; /* For restart */ + delete event_queue; + event_queue= NULL; /* For restart */ + delete db_repository; + db_repository= NULL; /* For restart */ + + inited= 0; + DBUG_VOID_RETURN; +} + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key key_LOCK_event_queue, + key_event_scheduler_LOCK_scheduler_state; + +static PSI_mutex_info all_events_mutexes[]= +{ + { &key_LOCK_event_queue, "LOCK_event_queue", PSI_FLAG_GLOBAL}, + { &key_event_scheduler_LOCK_scheduler_state, "Event_scheduler::LOCK_scheduler_state", PSI_FLAG_GLOBAL} +}; + +PSI_cond_key key_event_scheduler_COND_state, key_COND_queue_state; + +static PSI_cond_info all_events_conds[]= +{ + { &key_event_scheduler_COND_state, "Event_scheduler::COND_state", PSI_FLAG_GLOBAL}, + { &key_COND_queue_state, "COND_queue_state", PSI_FLAG_GLOBAL}, +}; + +PSI_thread_key key_thread_event_scheduler, key_thread_event_worker; + +static PSI_thread_info all_events_threads[]= +{ + { &key_thread_event_scheduler, "event_scheduler", PSI_FLAG_GLOBAL}, + { &key_thread_event_worker, "event_worker", 0} +}; +#endif /* HAVE_PSI_INTERFACE */ + +PSI_stage_info stage_waiting_on_empty_queue= { 0, "Waiting on empty queue", 0}; +PSI_stage_info stage_waiting_for_next_activation= { 0, "Waiting for next activation", 0}; +PSI_stage_info stage_waiting_for_scheduler_to_stop= { 0, "Waiting for the scheduler to stop", 0}; + +PSI_memory_key key_memory_event_basic_root; + +#ifdef HAVE_PSI_INTERFACE +PSI_stage_info *all_events_stages[]= +{ + & stage_waiting_on_empty_queue, + & stage_waiting_for_next_activation, + & stage_waiting_for_scheduler_to_stop +}; + +static PSI_memory_info all_events_memory[]= +{ + { &key_memory_event_basic_root, "Event_basic::mem_root", PSI_FLAG_GLOBAL} +}; + +static void init_events_psi_keys(void) +{ + const char* category= "sql"; + int count; + + count= array_elements(all_events_mutexes); + mysql_mutex_register(category, all_events_mutexes, count); + + count= array_elements(all_events_conds); + mysql_cond_register(category, all_events_conds, count); + + count= array_elements(all_events_threads); + mysql_thread_register(category, all_events_threads, count); + + count= array_elements(all_events_stages); + mysql_stage_register(category, all_events_stages, count); + + count= array_elements(all_events_memory); + mysql_memory_register(category, all_events_memory, count); + + init_scheduler_psi_keys(); +} +#endif /* HAVE_PSI_INTERFACE */ + +/** + Inits Events mutexes + + SYNOPSIS + Events::init_mutexes() + thd Thread +*/ + +void +Events::init_mutexes() +{ +#ifdef HAVE_PSI_INTERFACE + init_events_psi_keys(); +#endif +} + + +/* + Dumps the internal status of the scheduler and the memory cache + into a table with two columns - Name & Value. Different properties + which could be useful for debugging for instance deadlocks are + returned. + + SYNOPSIS + Events::dump_internal_status() +*/ + +void +Events::dump_internal_status() +{ + DBUG_ENTER("Events::dump_internal_status"); + puts("\n\n\nEvents status:"); + puts("LLA = Last Locked At LUA = Last Unlocked At"); + puts("WOC = Waiting On Condition DL = Data Locked"); + + /* + opt_event_scheduler should only be accessed while + holding LOCK_global_system_variables. + */ + mysql_mutex_lock(&LOCK_global_system_variables); + if (!inited) + puts("The Event Scheduler is disabled"); + else + { + scheduler->dump_internal_status(); + event_queue->dump_internal_status(); + } + + mysql_mutex_unlock(&LOCK_global_system_variables); + DBUG_VOID_RETURN; +} + +bool Events::start(int *err_no) +{ + DBUG_ASSERT(inited); + return scheduler->start(err_no); +} + +bool Events::stop() +{ + DBUG_ASSERT(inited); + return scheduler->stop(); +} + +/** + Loads all ENABLED events from mysql.event into a prioritized + queue. + + This function is called during the server start up. It reads + every event, computes the next execution time, and if the event + needs execution, adds it to a prioritized queue. Otherwise, if + ON COMPLETION DROP is specified, the event is automatically + removed from the table. + + @param[in,out] thd Thread context. Used for memory allocation in some cases. + + @retval FALSE success + @retval TRUE error, the load is aborted + + @note Reports the error to the console +*/ + +bool +Events::load_events_from_db(THD *thd) +{ + TABLE *table; + READ_RECORD read_record_info; + bool ret= TRUE; + uint count= 0; + DBUG_ENTER("Events::load_events_from_db"); + DBUG_PRINT("enter", ("thd: %p", thd)); + + /* + NOTE: even if we run in read-only mode, we should be able to lock the + mysql.event table for writing. In order to achieve this, we should call + mysql_lock_tables() under the super user. + + Same goes for transaction access mode. + Temporarily reset it to read-write. + */ + + privilege_t saved_master_access(thd->security_ctx->master_access); + thd->security_ctx->master_access |= PRIV_IGNORE_READ_ONLY; + bool save_tx_read_only= thd->tx_read_only; + thd->tx_read_only= false; + + ret= db_repository->open_event_table(thd, TL_WRITE, &table); + + thd->tx_read_only= save_tx_read_only; + thd->security_ctx->master_access= saved_master_access; + + if (ret) + { + my_message_sql(ER_STARTUP, + "Event Scheduler: Failed to open table mysql.event", + MYF(ME_ERROR_LOG)); + DBUG_RETURN(TRUE); + } + + if (init_read_record(&read_record_info, thd, table, NULL, NULL, 0, 1, FALSE)) + { + close_thread_tables(thd); + DBUG_RETURN(TRUE); + } + + while (!(read_record_info.read_record())) + { + Event_queue_element *et; + bool created, dropped; + + if (!(et= new Event_queue_element)) + goto end; + + DBUG_PRINT("info", ("Loading event from row.")); + + if (et->load_from_row(thd, table)) + { + my_message(ER_STARTUP, + "Event Scheduler: " + "Error while loading events from mysql.event. " + "The table probably contains bad data or is corrupted", + MYF(ME_ERROR_LOG)); + delete et; + goto end; + } + +#ifdef WITH_WSREP + /** + If SST is done from a galera node that is also acting as MASTER + newly synced node in galera eco-system will also copy-over the + event state enabling duplicate event in galera eco-system. + DISABLE such events if the current node is not event orginator. + (Also, make sure you skip disabling it if is already disabled to avoid + creation of redundant action) + NOTE: + This complete system relies on server-id. Ideally server-id should be + same for all nodes of galera eco-system but they aren't same. + Infact, based on galera use-case it seems like it recommends to have each + node with different server-id. + */ + if (WSREP(thd) && et->originator != thd->variables.server_id) + { + if (et->status == Event_parse_data::SLAVESIDE_DISABLED) + continue; + + store_record(table, record[1]); + table->field[ET_FIELD_STATUS]-> + store((longlong) Event_parse_data::SLAVESIDE_DISABLED, + TRUE); + + /* All the dmls to mysql.events tables are stmt bin-logged. */ + table->file->row_logging= 0; + (void) table->file->ha_update_row(table->record[1], table->record[0]); + + delete et; + continue; + } +#endif /* WITH_WSREP */ + + /** + Since the Event_queue_element object could be deleted inside + Event_queue::create_event we should save the value of dropped flag + into the temporary variable. + */ + dropped= et->dropped; + if (event_queue->create_event(thd, et, &created)) + { + /* Out of memory */ + delete et; + goto end; + } + if (created) + count++; + else if (dropped) + { + /* + If not created, a stale event - drop if immediately if + ON COMPLETION NOT PRESERVE. + XXX: This won't be replicated, thus the drop won't appear in + in the slave. When the slave is restarted it will drop events. + However, as the slave will be "out of sync", it might happen that + an event created on the master, after master restart, won't be + replicated to the slave correctly, as the create will fail there. + */ + int rc= table->file->ha_delete_row(table->record[0]); + if (rc) + { + table->file->print_error(rc, MYF(0)); + goto end; + } + } + } + my_printf_error(ER_STARTUP, + "Event Scheduler: Loaded %d event%s", + MYF(ME_ERROR_LOG | + (global_system_variables.log_warnings) ? + ME_NOTE: 0), + count, (count == 1) ? "" : "s"); + ret= FALSE; + +end: + end_read_record(&read_record_info); + + close_mysql_tables(thd); + DBUG_RETURN(ret); +} + +#ifdef WITH_WSREP +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + char buffer[1024]; + String log_query(buffer, sizeof(buffer), &my_charset_bin); + + if (create_query_string(thd, &log_query)) + { + WSREP_WARN("events create string failed: schema: %s, query: %s", + thd->get_db(), thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ +/** + @} (End of group Event_Scheduler) +*/ diff --git a/sql/events.h b/sql/events.h new file mode 100644 index 00000000..2fb13d7c --- /dev/null +++ b/sql/events.h @@ -0,0 +1,164 @@ +#ifndef _EVENT_H_ +#define _EVENT_H_ +/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @defgroup Event_Scheduler Event Scheduler + @ingroup Runtime_Environment + @{ + + @file events.h + + A public interface of Events_Scheduler module. +*/ + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_event_scheduler_LOCK_scheduler_state; +extern PSI_cond_key key_event_scheduler_COND_state; +extern PSI_thread_key key_thread_event_scheduler, key_thread_event_worker; +#endif /* HAVE_PSI_INTERFACE */ + +extern PSI_memory_key key_memory_event_basic_root; + +/* Always defined, for SHOW PROCESSLIST. */ +extern PSI_stage_info stage_waiting_on_empty_queue; +extern PSI_stage_info stage_waiting_for_next_activation; +extern PSI_stage_info stage_waiting_for_scheduler_to_stop; + +#include "sql_string.h" /* LEX_CSTRING */ +#include "my_time.h" /* interval_type */ + +class Event_db_repository; +class Event_parse_data; +class Event_queue; +class Event_scheduler; +struct TABLE_LIST; +class THD; +typedef class Item COND; + +int +sortcmp_lex_string(const LEX_CSTRING *s, const LEX_CSTRING *t, + const CHARSET_INFO *cs); + +/** + @brief A facade to the functionality of the Event Scheduler. + + Every public operation against the scheduler has to be executed via the + interface provided by a static method of this class. No instance of this + class is ever created and it has no non-static data members. + + The life cycle of the Events module is the following: + + At server start up: + init_mutexes() -> init() + When the server is running: + create_event(), drop_event(), start_or_stop_event_scheduler(), etc + At shutdown: + deinit(), destroy_mutexes(). + + The peculiar initialization and shutdown cycle is an adaptation to the + outside server startup/shutdown framework and mimics the rest of MySQL + subsystems (ACL, time zone tables, etc). +*/ + +class Events +{ +public: + /* + the following block is to support --event-scheduler command line option + and the @@global.event_scheduler SQL variable. + See sys_var.cc + */ + enum enum_opt_event_scheduler { EVENTS_OFF, EVENTS_ON, EVENTS_DISABLED, + EVENTS_ORIGINAL }; + /* Protected using LOCK_global_system_variables only. */ + static ulong opt_event_scheduler, startup_state; + static ulong inited; + static bool check_if_system_tables_error(); + static bool start(int *err_no); + static bool stop(); + +public: + /* A hack needed for Event_queue_element */ + static Event_db_repository * + get_db_repository() { return db_repository; } + + static bool init(THD *thd, bool opt_noacl); + + static void + deinit(); + + static void + init_mutexes(); + + static void + destroy_mutexes(); + + static bool + create_event(THD *thd, Event_parse_data *parse_data); + + static bool + update_event(THD *thd, Event_parse_data *parse_data, + LEX_CSTRING *new_dbname, LEX_CSTRING *new_name); + + static bool + drop_event(THD *thd, const LEX_CSTRING *dbname, const LEX_CSTRING *name, + bool if_exists); + + static void + drop_schema_events(THD *thd, const char *db); + + static bool + show_create_event(THD *thd, const LEX_CSTRING *dbname, + const LEX_CSTRING *name); + + /* Needed for both SHOW CREATE EVENT and INFORMATION_SCHEMA */ + static int + reconstruct_interval_expression(String *buf, interval_type interval, + longlong expression); + + static int + fill_schema_events(THD *thd, TABLE_LIST *tables, COND * /* cond */); + + static void + dump_internal_status(); + + static void set_original_state(ulong startup_state_org) + { + startup_state= startup_state_org; + } + +private: + + static bool + load_events_from_db(THD *thd); + +private: + static Event_queue *event_queue; + static Event_scheduler *scheduler; + static Event_db_repository *db_repository; + +private: + /* Prevent use of these */ + Events(const Events &); + void operator=(Events &); +}; + +/** + @} (end of group Event Scheduler) +*/ + +#endif /* _EVENT_H_ */ diff --git a/sql/field.cc b/sql/field.cc new file mode 100644 index 00000000..e94d5c19 --- /dev/null +++ b/sql/field.cc @@ -0,0 +1,11730 @@ +/* + Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/** + @file + + @brief + This file implements classes defined in field.h +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" +#include "rpl_rli.h" // Pull in Relay_log_info +#include "slave.h" // Pull in rpl_master_has_bug() +#include "strfunc.h" // find_type2, find_set +#include "tztime.h" // struct Time_zone +#include "filesort.h" // change_double_for_sort +#include "log_event.h" // class Table_map_log_event +#include + +// Maximum allowed exponent value for converting string to decimal +#define MAX_EXPONENT 1024 + +/***************************************************************************** + Instantiate templates and static variables +*****************************************************************************/ + +static const char *zero_timestamp="0000-00-00 00:00:00.000000"; +LEX_CSTRING temp_lex_str= {STRING_WITH_LEN("temp")}; + +uchar Field_null::null[1]={1}; +const char field_separator=','; + +#define DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE FLOATING_POINT_BUFFER +#define LONGLONG_TO_STRING_CONVERSION_BUFFER_SIZE 128 +#define DECIMAL_TO_STRING_CONVERSION_BUFFER_SIZE 128 +#define BLOB_PACK_LENGTH_TO_MAX_LENGH(arg) \ + ((ulong) ((1LL << MY_MIN(arg, 4) * 8) - 1)) + +// Column marked for read or the field set to read out of record[0] +bool Field::marked_for_read() const +{ + return !table || + (!table->read_set || + bitmap_is_set(table->read_set, field_index) || + (!(ptr >= table->record[0] && + ptr < table->record[0] + table->s->reclength))); +} + +/* + The name of this function is a bit misleading as in 10.4 we don't + have to test anymore if the field is computed. Instead we mark + changed fields with DBUG_FIX_WRITE_SET() in table.cc +*/ + +bool Field::marked_for_write_or_computed() const +{ + return (!table || + (!table->write_set || + bitmap_is_set(table->write_set, field_index) || + (!(ptr >= table->record[0] && + ptr < table->record[0] + table->s->reclength)))); +} + + +#define FLAGSTR(S,F) ((S) & (F) ? #F " " : "") + +/* + Rules for merging different types of fields in UNION + + NOTE: to avoid 256*256 table, gap in table types numeration is skipped + following #defines describe that gap and how to canculate number of fields + and index of field in this array. +*/ +const int FIELDTYPE_TEAR_FROM= (MYSQL_TYPE_BIT + 1); +const int FIELDTYPE_TEAR_TO= (MYSQL_TYPE_NEWDECIMAL - 1); +const int FIELDTYPE_LAST= 254; +const int FIELDTYPE_NUM= FIELDTYPE_TEAR_FROM + (FIELDTYPE_LAST - + FIELDTYPE_TEAR_TO); + +static inline int merge_type2index(enum_field_types merge_type) +{ + DBUG_ASSERT(merge_type < FIELDTYPE_TEAR_FROM || + merge_type > FIELDTYPE_TEAR_TO); + DBUG_ASSERT(merge_type <= FIELDTYPE_LAST); + if (merge_type < FIELDTYPE_TEAR_FROM) + return merge_type; + return FIELDTYPE_TEAR_FROM + (merge_type - FIELDTYPE_TEAR_TO) - 1; +} + + +/** + Implements data type merge rules for the built-in traditional data types. + Used for operations such as: + - UNION + - CASE and its abbreviations COALESCE, IF, IFNULL + - LEAST/GREATEST + + Given Fields A and B of real_types a and b, we find the result type of + COALESCE(A, B) by querying: + field_types_merge_rules[field_type_to_index(a)][field_type_to_index(b)]. +*/ +static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= +{ + /* MYSQL_TYPE_DECIMAL -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_TINY -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_TINY, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_SHORT, MYSQL_TYPE_LONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_TINY, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_INT24, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_SHORT -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_SHORT, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_SHORT, MYSQL_TYPE_LONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_SHORT, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_INT24, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_SHORT, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_LONG -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_LONG, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_LONG, MYSQL_TYPE_LONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_LONG, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_LONG, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_FLOAT -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_FLOAT, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_FLOAT, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_FLOAT, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_FLOAT, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_DOUBLE -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_NULL -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_TINY, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_SHORT, MYSQL_TYPE_LONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_NULL, MYSQL_TYPE_TIMESTAMP, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_TIME, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_DATETIME, MYSQL_TYPE_YEAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_BIT, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_ENUM, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_SET, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_TIMESTAMP -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_TIMESTAMP, MYSQL_TYPE_TIMESTAMP, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_DATETIME, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_DATETIME, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_LONGLONG -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_LONGLONG, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_LONGLONG, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_INT24 -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_INT24, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_INT24, MYSQL_TYPE_LONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_INT24, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_INT24, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_INT24, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_DATE -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_DATETIME, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_TIME -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_TIME, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_DATETIME, MYSQL_TYPE_TIME, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_DATETIME, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_DATETIME -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_DATETIME, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_DATETIME, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_DATETIME, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_YEAR -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_TINY, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_SHORT, MYSQL_TYPE_LONG, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_YEAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_INT24, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_YEAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_NEWDATE -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_DATETIME, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_DATETIME, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_VARCHAR -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_VARCHAR + }, + /* MYSQL_TYPE_BIT -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_BIT, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_BIT, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_NEWDECIMAL -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_DOUBLE, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_NEWDECIMAL, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_NEWDECIMAL, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_ENUM -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_ENUM, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_SET -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_SET, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + }, + /* MYSQL_TYPE_TINY_BLOB -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_STRING + MYSQL_TYPE_TINY_BLOB + }, + /* MYSQL_TYPE_MEDIUM_BLOB -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_MEDIUM_BLOB, + //MYSQL_TYPE_STRING + MYSQL_TYPE_MEDIUM_BLOB + }, + /* MYSQL_TYPE_LONG_BLOB -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_STRING + MYSQL_TYPE_LONG_BLOB + }, + /* MYSQL_TYPE_BLOB -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_BLOB, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_BLOB, + //MYSQL_TYPE_STRING + MYSQL_TYPE_BLOB + }, + /* MYSQL_TYPE_VAR_STRING -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_VARCHAR, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_VARCHAR + }, + /* MYSQL_TYPE_STRING -> */ + { + //MYSQL_TYPE_DECIMAL MYSQL_TYPE_TINY + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_SHORT MYSQL_TYPE_LONG + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_FLOAT MYSQL_TYPE_DOUBLE + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_DATE MYSQL_TYPE_TIME + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_NEWDATE MYSQL_TYPE_VARCHAR + MYSQL_TYPE_STRING, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_BIT <16>-<245> + MYSQL_TYPE_STRING, + //MYSQL_TYPE_NEWDECIMAL MYSQL_TYPE_ENUM + MYSQL_TYPE_STRING, MYSQL_TYPE_STRING, + //MYSQL_TYPE_SET MYSQL_TYPE_TINY_BLOB + MYSQL_TYPE_STRING, MYSQL_TYPE_TINY_BLOB, + //MYSQL_TYPE_MEDIUM_BLOB MYSQL_TYPE_LONG_BLOB + MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_LONG_BLOB, + //MYSQL_TYPE_BLOB MYSQL_TYPE_VAR_STRING + MYSQL_TYPE_BLOB, MYSQL_TYPE_VARCHAR, + //MYSQL_TYPE_STRING + MYSQL_TYPE_STRING + } +}; + + +const Type_handler * +Type_handler::aggregate_for_result_traditional(const Type_handler *a, + const Type_handler *b) +{ + if (a == b) + { + /* + If two traditional handlers are equal, quickly return "a". + Some handlers (e.g. Type_handler_bool) pretend to be traditional, + but in fact they are not traditional in full extent, they are + only sub-types for now (and don't have a corresponding Field_xxx yet). + Here we preserve such handlers during aggregation. + As a result, COALESCE(true,true) preserves the "boolean" data type. + + Need to do this conversion for deprecated data types, + similar to what field_type_merge_rules[][] does. + */ + switch (a->field_type()) { + case MYSQL_TYPE_DECIMAL: return &type_handler_newdecimal; + case MYSQL_TYPE_DATE: return &type_handler_newdate; + case MYSQL_TYPE_VAR_STRING: return &type_handler_varchar; + default: break; + } + return a; + } + enum_field_types ta= a->traditional_merge_field_type(); + enum_field_types tb= b->traditional_merge_field_type(); + enum_field_types res= field_types_merge_rules[merge_type2index(ta)] + [merge_type2index(tb)]; + return Type_handler::get_handler_by_real_type(res); +} + + +bool Field::check_assignability_from(const Type_handler *from, + bool ignore) const +{ + /* + Using type_handler_for_item_field() here to get the data type handler + on both sides. This is needed to make sure aggregation for Field + works the same way with how Item_field aggregates for UNION or CASE, + so these statements: + SELECT a FROM t1 UNION SELECT b FROM t1; // Item_field vs Item_field + UPDATE t1 SET a=b; // Field vs Item_field + either both return "Illegal parameter data types" or both pass + the data type compatibility test. + For MariaDB standard data types, using type_handler_for_item_field() + turns ENUM/SET into just CHAR. + */ + Type_handler_hybrid_field_type th(type_handler()-> + type_handler_for_item_field()); + if (th.aggregate_for_result(from->type_handler_for_item_field())) + { + bool error= (!ignore && get_thd()->is_strict_mode()) || + (type_handler()->is_scalar_type() != from->is_scalar_type()); + /* + Display fully qualified column name for table columns. + Display non-qualified names for other things, + e.g. SP variables, SP return values, SP and CURSOR parameters. + */ + if (table->s->db.str && table->s->table_name.str) + my_printf_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, + "Cannot cast '%s' as '%s' in assignment of %`s.%`s.%`s", + MYF(error ? 0 : ME_WARNING), + from->name().ptr(), type_handler()->name().ptr(), + table->s->db.str, table->s->table_name.str, + field_name.str); + else + my_printf_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, + "Cannot cast '%s' as '%s' in assignment of %`s", + MYF(error ? 0 : ME_WARNING), + from->name().ptr(), type_handler()->name().ptr(), + field_name.str); + return error; + } + return false; +} + + +/* + Test if the given string contains important data: + not spaces for character string, + or any data for binary string. + + SYNOPSIS + test_if_important_data() + cs Character set + str String to test + strend String end + + RETURN + FALSE - If string does not have important data + TRUE - If string has some important data +*/ + +static bool +test_if_important_data(CHARSET_INFO *cs, const char *str, const char *strend) +{ + if (cs != &my_charset_bin) + str+= cs->scan(str, strend, MY_SEQ_SPACES); + return (str < strend); +} + + +/** + Function to compare two unsigned integers for their relative order. + Used below. In an anonymous namespace to not clash with definitions + in other files. + */ + +CPP_UNNAMED_NS_START + +int compare(unsigned int a, unsigned int b) +{ + if (a < b) + return -1; + if (b < a) + return 1; + return 0; +} + +CPP_UNNAMED_NS_END + + +/***************************************************************************** + Static help functions +*****************************************************************************/ + +/* + @brief + Create a fixed size sort key part + + @param buff buffer where values are written + @param length fixed size of the sort column +*/ + +void Field::make_sort_key_part(uchar *buff,uint length) +{ + if (maybe_null()) + { + if (is_null()) + { + bzero(buff, length + 1); + return; + } + *buff++= 1; + } + sort_string(buff, length); +} + + +/* + @brief + Create a packed sort key part + + @param buff buffer where values are written + @param sort_field sort column structure + + @retval + length of the bytes written, does not include the NULL bytes +*/ +uint +Field::make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field) +{ + if (maybe_null()) + { + if (is_null()) + { + *buff++= 0; + return 0; // For NULL values don't write any data + } + *buff++=1; + } + sort_string(buff, sort_field->original_length); + return sort_field->original_length; +} + + +uint +Field_longstr::make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field) +{ + if (maybe_null()) + { + if (is_null()) + { + *buff++= 0; + return 0; // For NULL values don't write any data + } + *buff++=1; + } + uchar *end= pack_sort_string(buff, sort_field); + return (uint) (end-buff); +} + + +uchar* +Field_longstr::pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field) +{ + StringBuffer buf; + val_str(&buf, &buf); + return to + sort_field->pack_sort_string(to, &buf, field_charset()); +} + + +/** + @brief + Determine the relative position of the field value in a numeric interval + + @details + The function returns a double number between 0.0 and 1.0 as the relative + position of the value of the this field in the numeric interval of [min,max]. + If the value is not in the interval the the function returns 0.0 when + the value is less than min, and, 1.0 when the value is greater than max. + + @param min value of the left end of the interval + @param max value of the right end of the interval + + @return + relative position of the field value in the numeric interval [min,max] +*/ + +double pos_in_interval_for_double(double midp_val, double min_val, + double max_val) +{ + double n, d; + n= midp_val - min_val; + if (n < 0) + return 0.0; + d= max_val - min_val; + if (d <= 0) + return 1.0; + return MY_MIN(n/d, 1.0); +} + + +double Field::pos_in_interval_val_real(Field *min, Field *max) +{ + return pos_in_interval_for_double(val_real(), min->val_real(), + max->val_real()); +} + + +static +inline ulonglong char_prefix_to_ulonglong(uchar *src) +{ + uint sz= sizeof(ulonglong); + for (uint i= 0; i < sz/2; i++) + { + uchar tmp= src[i]; + src[i]= src[sz-1-i]; + src[sz-1-i]= tmp; + } + return uint8korr(src); +} + +/* + Compute res = a - b, without losing precision and taking care that these are + unsigned numbers. +*/ +static inline double safe_substract(ulonglong a, ulonglong b) +{ + return (a > b)? double(a - b) : -double(b - a); +} + + +/** + @brief + Determine the relative position of the field value in a string interval + + @details + The function returns a double number between 0.0 and 1.0 as the relative + position of the value of the this field in the string interval of [min,max]. + If the value is not in the interval the the function returns 0.0 when + the value is less than min, and, 1.0 when the value is greater than max. + + @note + To calculate the relative position of the string value v in the interval + [min, max] the function first converts the beginning of these three + strings v, min, max into the strings that are used for byte comparison. + For each string not more sizeof(ulonglong) first bytes are taken + from the result of conversion. Then these bytes are interpreted as the + big-endian representation of an ulonglong integer. The values of these + integer numbers obtained for the strings v, min, max are used to calculate + the position of v in [min,max] in the same way is it's done for numeric + fields (see Field::pos_in_interval_val_real). + + @todo + Improve the procedure for the case when min and max have the same + beginning + + @param min value of the left end of the interval + @param max value of the right end of the interval + + @return + relative position of the field value in the string interval [min,max] +*/ + +double Field::pos_in_interval_val_str(Field *min, Field *max, uint data_offset) +{ + return pos_in_interval_for_string(charset(), + ptr + data_offset, data_length(), + min->ptr + data_offset, min->data_length(), + max->ptr + data_offset, max->data_length() + ); +} + + +double pos_in_interval_for_string(CHARSET_INFO *cset, + const uchar *midp_val, uint32 midp_len, + const uchar *min_val, uint32 min_len, + const uchar *max_val, uint32 max_len) +{ + uchar mp_prefix[sizeof(ulonglong)]; + uchar minp_prefix[sizeof(ulonglong)]; + uchar maxp_prefix[sizeof(ulonglong)]; + ulonglong mp, minp, maxp; + + cset->strnxfrm(mp_prefix, sizeof(mp), midp_val, midp_len); + cset->strnxfrm(minp_prefix, sizeof(minp), min_val, min_len); + cset->strnxfrm(maxp_prefix, sizeof(maxp), max_val, max_len); + + mp= char_prefix_to_ulonglong(mp_prefix); + minp= char_prefix_to_ulonglong(minp_prefix); + maxp= char_prefix_to_ulonglong(maxp_prefix); + + double n, d; + n= safe_substract(mp, minp); + if (n < 0) + return 0.0; + d= safe_substract(maxp, minp); + if (d <= 0) + return 1.0; + return MY_MIN(n/d, 1.0); +} + + +bool Field::test_if_equality_guarantees_uniqueness(const Item *item) const +{ + DBUG_ASSERT(cmp_type() != STRING_RESULT); // For STRING_RESULT see Field_str + /* + We use result_type() rather than cmp_type() in the below condition, + because it covers a special case that string literals guarantee uniqueness + for temporal columns, so the query: + WHERE temporal_column='string' + cannot return multiple distinct temporal values. + + TODO: perhaps we could allow INT/DECIMAL/DOUBLE types for temporal items. + */ + return result_type() == item->result_type(); +} + + +/** + Check whether a field item can be substituted for an equal item + + @details + The function checks whether a substitution of a field item for + an equal item is valid. + + @param arg *arg != NULL <-> the field is in the context + where substitution for an equal item is valid + + @note + The following statement is not always true: + @n + x=y => F(x)=F(x/y). + @n + This means substitution of an item for an equal item not always + yields an equavalent condition. Here's an example: + @code + 'a'='a ' + (LENGTH('a')=1) != (LENGTH('a ')=2) + @endcode + Such a substitution is surely valid if either the substituted + field is not of a STRING type or if it is an argument of + a comparison predicate. + + @retval + TRUE substitution is valid + @retval + FALSE otherwise +*/ + +bool Field::can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item_equal) +{ + DBUG_ASSERT(item_equal->compare_type_handler()->cmp_type() != STRING_RESULT); + DBUG_ASSERT(cmp_type() != STRING_RESULT); + switch (ctx.subst_constraint()) { + case ANY_SUBST: + /* + Disable const propagation for items used in different comparison contexts. + This must be done because, for example, Item_hex_string->val_int() is not + the same as (Item_hex_string->val_str() in BINARY column)->val_int(). + We cannot simply disable the replacement in a particular context ( + e.g. = AND = ) since + Items don't know the context they are in and there are functions like + IF (, 'yes', 'no'). + */ + return ctx.compare_type_handler() == item_equal->compare_type_handler(); + case IDENTITY_SUBST: + return true; + } + return false; +} + + +bool Field::cmp_is_done_using_type_handler_of_this(const Item_bool_func *cond, + const Item *item) const +{ + /* + We could eventually take comparison_type_handler() from cond, + instead of calculating it again. But only some descendants of + Item_bool_func has this method. So this needs some hierarchy changes. + Another option is to pass "class Context" to this method. + */ + Type_handler_hybrid_field_type cmp(type_handler_for_comparison()); + return !cmp.aggregate_for_comparison(item->type_handler_for_comparison()) && + cmp.type_handler() == type_handler_for_comparison(); +} + + +/* + This handles all numeric and BIT data types. +*/ +Data_type_compatibility +Field::can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) const +{ + DBUG_ASSERT(cmp_type() != STRING_RESULT); + DBUG_ASSERT(cmp_type() != TIME_RESULT); + return item->cmp_type() != TIME_RESULT ? + Data_type_compatibility::OK : + Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; +} + + +/* + This handles all numeric and BIT data types. +*/ +Data_type_compatibility +Field::can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) const +{ + DBUG_ASSERT(cmp_type() != STRING_RESULT); + DBUG_ASSERT(cmp_type() != TIME_RESULT); + return const_item->cmp_type() != TIME_RESULT ? + Data_type_compatibility::OK : + Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; +} + + +/* + This covers all numeric types, BIT +*/ +Data_type_compatibility +Field::can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const +{ + DBUG_ASSERT(cmp_type() != TIME_RESULT); // Handled in Field_temporal + DBUG_ASSERT(cmp_type() != STRING_RESULT); // Handled in Field_str descendants + return item->cmp_type() != TIME_RESULT ? + Data_type_compatibility::OK : + Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; +} + + +int Field::store_hex_hybrid(const char *str, size_t length) +{ + DBUG_ASSERT(result_type() != STRING_RESULT); + ulonglong nr; + + if (length > 8) + { + nr= flags & UNSIGNED_FLAG ? ULONGLONG_MAX : LONGLONG_MAX; + goto warn; + } + nr= (ulonglong) longlong_from_hex_hybrid(str, length); + if ((length == 8) && !(flags & UNSIGNED_FLAG) && (nr > LONGLONG_MAX)) + { + nr= LONGLONG_MAX; + goto warn; + } + return store((longlong) nr, true); // Assume hex numbers are unsigned + +warn: + if (!store((longlong) nr, true)) + set_warning(Sql_condition::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; +} + + +/** + If a field does not have a corresponding data, it's behavior can vary: + - In case of the fixed file format + it's set to the default value for the data type, + such as 0 for numbers or '' for strings. + - In case of a non-fixed format + it's set to NULL for nullable fields, and + it's set to the default value for the data type for NOT NULL fields. + This seems to be by design. +*/ +bool Field::load_data_set_no_data(THD *thd, bool fixed_format) +{ + reset(); // Do not use the DEFAULT value + if (fixed_format) + { + set_notnull(); + /* + We're loading a fixed format file, e.g.: + LOAD DATA INFILE 't1.txt' INTO TABLE t1 FIELDS TERMINATED BY ''; + Suppose the file ended unexpectedly and no data was provided for an + auto-increment column in the current row. + Historically, if sql_mode=NO_AUTO_VALUE_ON_ZERO, then the column value + is set to 0 in such case (the next auto_increment value is not used). + This behaviour was introduced by the fix for "bug#12053" in mysql-4.1. + Note, loading a delimited file works differently: + "no data" is not converted to 0 on NO_AUTO_VALUE_ON_ZERO: + it's considered as equal to setting the column to NULL, + which is then replaced to the next auto_increment value. + This difference seems to be intentional. + */ + if (this == table->next_number_field) + table->auto_increment_field_not_null= true; + } + set_has_explicit_value(); // Do not auto-update this field + return false; +} + + +bool Field::load_data_set_null(THD *thd) +{ + reset(); + set_null(); + if (!maybe_null()) + { + if (this != table->next_number_field) + set_warning(Sql_condition::WARN_LEVEL_WARN, ER_WARN_NULL_TO_NOTNULL, 1); + } + set_has_explicit_value(); // Do not auto-update this field + return false; +} + + +void Field::load_data_set_value(const char *pos, uint length, + CHARSET_INFO *cs) +{ + /* + Mark field as not null, we should do this for each row because of + restore_record... + */ + set_notnull(); + if (this == table->next_number_field) + table->auto_increment_field_not_null= true; + store(pos, length, cs); + set_has_explicit_value(); // Do not auto-update this field +} + + +bool Field::sp_prepare_and_store_item(THD *thd, Item **value) +{ + DBUG_ENTER("Field::sp_prepare_and_store_item"); + DBUG_ASSERT(value); + + Item *expr_item; + + if (!(expr_item= thd->sp_fix_func_item_for_assignment(this, value))) + goto error; + + /* Save the value in the field. Convert the value if needed. */ + + expr_item->save_in_field(this, 0); + + if (likely(!thd->is_error())) + DBUG_RETURN(false); + +error: + /* + In case of error during evaluation, leave the result field set to NULL. + Sic: we can't do it in the beginning of the function because the + result field might be needed for its own re-evaluation, e.g. case of + set x = x + 1; + */ + set_null(); + DBUG_ASSERT(thd->is_error()); + DBUG_RETURN(true); +} + + +void Field::error_generated_column_function_is_not_allowed(THD *thd, + bool error) const +{ + StringBuffer<64> tmp; + vcol_info->expr->print(&tmp, (enum_query_type) + (QT_TO_SYSTEM_CHARSET | + QT_ITEM_IDENT_SKIP_DB_NAMES | + QT_ITEM_IDENT_SKIP_TABLE_NAMES)); + my_error(ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED, + MYF(error ? 0 : ME_WARNING), + tmp.c_ptr_safe(), vcol_info->get_vcol_type_name(), + const_cast(field_name.str)); +} + + +/* + Check if an indexed or a persistent virtual column depends on sql_mode flags + that it cannot handle. + See sql_mode.h for details. +*/ +bool Field::check_vcol_sql_mode_dependency(THD *thd, vcol_init_mode mode) const +{ + DBUG_ASSERT(vcol_info); + if ((flags & PART_KEY_FLAG) != 0 || stored_in_db()) + { + Sql_mode_dependency valdep= vcol_info->expr->value_depends_on_sql_mode(); + sql_mode_t cnvdep= conversion_depends_on_sql_mode(thd, vcol_info->expr); + Sql_mode_dependency dep= + (valdep | Sql_mode_dependency(0, cnvdep)) & + Sql_mode_dependency(~0, ~can_handle_sql_mode_dependency_on_store()); + if (dep) + { + bool error= (mode & VCOL_INIT_DEPENDENCY_FAILURE_IS_ERROR) != 0; + error_generated_column_function_is_not_allowed(thd, error); + dep.push_dependency_warnings(thd); + return error; + } + } + return false; +} + + +bool Field::make_empty_rec_store_default_value(THD *thd, Item *item) +{ + DBUG_ASSERT(!(flags & BLOB_FLAG)); + int res= item->save_in_field(this, true); + return res != 0 && res != 3; +} + + +/** + Numeric fields base class constructor. +*/ +Field_num::Field_num(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool zero_arg, bool unsigned_arg) + :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg), + dec(dec_arg),zerofill(zero_arg),unsigned_flag(unsigned_arg) +{ + if (zerofill) + flags|=ZEROFILL_FLAG; + if (unsigned_flag) + flags|=UNSIGNED_FLAG; +} + + +void Field_num::prepend_zeros(String *value) const +{ + int diff; + if ((diff= (int) (field_length - value->length())) > 0) + { + const bool error= value->realloc(field_length); + if (likely(!error)) + { + bmove_upp((uchar*) value->ptr()+field_length, + (uchar*) value->ptr()+value->length(), + value->length()); + bfill((uchar*) value->ptr(),diff,'0'); + value->length(field_length); + } + } +} + + +sql_mode_t Field_num::can_handle_sql_mode_dependency_on_store() const +{ + return MODE_PAD_CHAR_TO_FULL_LENGTH; +} + + +Item *Field_num::get_equal_zerofill_const_item(THD *thd, const Context &ctx, + Item *const_item) +{ + switch (ctx.subst_constraint()) { + case IDENTITY_SUBST: + return NULL; // Not safe to propagate if not in comparison. See MDEV-8369. + case ANY_SUBST: + break; + } + DBUG_ASSERT(const_item->const_item()); + DBUG_ASSERT(ctx.compare_type_handler()->cmp_type() != STRING_RESULT); + return const_item; +} + + +/** +Construct warning parameters using thd->no_errors + to determine whether to generate or suppress warnings. + We can get here in a query like this: + SELECT COUNT(@@basedir); + from Item_func_get_system_var::update_null_value(). +*/ +Value_source::Warn_filter::Warn_filter(const THD *thd) + :m_want_warning_edom(!thd->no_errors), + m_want_note_truncated_spaces(!thd->no_errors) +{ } + + +/** + Check string-to-number conversion and produce a warning if + - could not convert any digits (EDOM-alike error) + - found garbage at the end of the string + - found trailing spaces (a note) + See also Field_num::check_edom_and_truncation() for a similar function. + + @param thd - the thread + @param filter - which warnings/notes are allowed + @param type - name of the data type (e.g. "INTEGER", "DECIMAL", "DOUBLE") + @param cs - character set of the original string + @param str - the original string + @param end - the end of the string + + Unlike Field_num::check_edom_and_truncation(), this function does not + distinguish between EDOM and truncation and reports the same warning for + both cases. Perhaps we should eventually print different warnings, to make + the explicit CAST work closer to the implicit cast in Field_xxx::store(). +*/ +void +Value_source::Converter_string_to_number::check_edom_and_truncation(THD *thd, + Warn_filter filter, + const char *type, + CHARSET_INFO *cs, + const char *str, + size_t length) const +{ + DBUG_ASSERT(str <= m_end_of_num); + DBUG_ASSERT(m_end_of_num <= str + length); + if (m_edom || (m_end_of_num < str + length && + !check_if_only_end_space(cs, m_end_of_num, str + length))) + { + // EDOM or important trailing data truncation + if (filter.want_warning_edom()) + { + /* + We can use err.ptr() here as ErrConvString is guaranteed to put an + end \0 here. + */ + THD *wthd= thd ? thd : current_thd; + push_warning_printf(wthd, Sql_condition::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE, + ER_THD(wthd, ER_TRUNCATED_WRONG_VALUE), type, + ErrConvString(str, length, cs).ptr()); + } + } + else if (m_end_of_num < str + length) + { + // Unimportant trailing data (spaces) truncation + if (filter.want_note_truncated_spaces()) + { + THD *wthd= thd ? thd : current_thd; + push_warning_printf(wthd, Sql_condition::WARN_LEVEL_NOTE, + ER_TRUNCATED_WRONG_VALUE, + ER_THD(wthd, ER_TRUNCATED_WRONG_VALUE), type, + ErrConvString(str, length, cs).ptr()); + } + } +} + + +/** + Check a string-to-number conversion routine result and generate warnings + in case when it: + - could not convert any digits + - found garbage at the end of the string. + + @param type Data type name (e.g. "decimal", "integer", "double") + @param edom Indicates that the string-to-number routine returned + an error code equivalent to EDOM (value out of domain), + i.e. the string fully consisted of garbage and the + conversion routine could not get any digits from it. + @param str The original string + @param length Length of 'str' + @param cs Character set + @param end Pointer to char after last used digit + + @note + This is called after one has called one of the following functions: + - strntoull10rnd() + - strntod() + - str2my_decimal() + + @retval + 0 OK + @retval + 1 error: could not scan any digits (EDOM), + e.g. empty string, or garbage. + @retval + 2 error: scanned some digits, + but then found garbage at the end of the string. +*/ + + +int Field_num::check_edom_and_important_data_truncation(const char *type, + bool edom, + CHARSET_INFO *cs, + const char *str, size_t length, + const char *end) +{ + /* Test if we get an empty string or garbage */ + if (edom) + { + ErrConvString err(str, length, cs); + set_warning_truncated_wrong_value(type, err.ptr()); + return 1; + } + /* Test if we have garbage at the end of the given string. */ + if (test_if_important_data(cs, end, str + length)) + { + set_warning(WARN_DATA_TRUNCATED, 1); + return 2; + } + return 0; +} + + +int Field_num::check_edom_and_truncation(const char *type, bool edom, + CHARSET_INFO *cs, + const char *str, size_t length, + const char *end) +{ + int rc= check_edom_and_important_data_truncation(type, edom, + cs, str, length, end); + if (!rc && end < str + length) + set_note(WARN_DATA_TRUNCATED, 1); + return rc; +} + + +/* + Convert a string to an integer then check bounds. + + SYNOPSIS + Field_num::get_int + cs Character set + from String to convert + len Length of the string + rnd OUT longlong value + unsigned_max max unsigned value + signed_min min signed value + signed_max max signed value + + DESCRIPTION + The function calls strntoull10rnd() to get an integer value then + check bounds and errors returned. In case of any error a warning + is raised. + + RETURN + 0 ok + 1 error +*/ + +bool Field_num::get_int(CHARSET_INFO *cs, const char *from, size_t len, + longlong *rnd, ulonglong unsigned_max, + longlong signed_min, longlong signed_max) +{ + char *end; + int error; + + *rnd= (longlong) cs->strntoull10rnd(from, len, + unsigned_flag, &end, + &error); + if (unsigned_flag) + { + + if ((((ulonglong) *rnd > unsigned_max) && + (*rnd= (longlong) unsigned_max)) || + error == MY_ERRNO_ERANGE) + { + goto out_of_range; + } + } + else + { + if (*rnd < signed_min) + { + *rnd= signed_min; + goto out_of_range; + } + else if (*rnd > signed_max) + { + *rnd= signed_max; + goto out_of_range; + } + } + if (get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION && + check_int(cs, from, len, end, error)) + return 1; + + return error && get_thd()->count_cuted_fields == CHECK_FIELD_EXPRESSION; + +out_of_range: + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; +} + + +double Field_real::get_double(const char *str, size_t length, CHARSET_INFO *cs, + int *error) +{ + char *end; + double nr= cs->strntod((char*) str, length, &end, error); + if (unlikely(*error)) + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + *error= 1; + } + else if (get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION && + check_edom_and_truncation("double", str == end, + cs, str, length, end)) + *error= 1; + return nr; +} + + +/** + Process decimal library return codes and issue warnings for overflow and + truncation. + + @param op_result decimal library return code (E_DEC_* see include/decimal.h) + + @retval + 1 there was overflow + @retval + 0 no error or some other errors except overflow +*/ + +int Field::warn_if_overflow(int op_result) +{ + if (op_result == E_DEC_OVERFLOW) + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; + } + if (op_result == E_DEC_TRUNCATED) + { + set_note(WARN_DATA_TRUNCATED, 1); + /* We return 0 here as this is not a critical issue */ + } + return 0; +} + + +/** + Interpret field value as an integer but return the result as a string. + + This is used for printing bit_fields as numbers while debugging. +*/ + +String *Field::val_int_as_str(String *val_buffer, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_read()); + CHARSET_INFO *cs= &my_charset_bin; + uint length; + longlong value= val_int(); + + if (val_buffer->alloc(MY_INT64_NUM_DECIMAL_DIGITS)) + return 0; + length= (uint) (cs->longlong10_to_str)((char*) val_buffer->ptr(), + MY_INT64_NUM_DECIMAL_DIGITS, + unsigned_val ? 10 : -10, + value); + val_buffer->length(length); + return val_buffer; +} + + +/// This is used as a table name when the table structure is not set up +Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, + uchar null_bit_arg, + utype unireg_check_arg, const LEX_CSTRING *field_name_arg) + :ptr(ptr_arg), + null_ptr(null_ptr_arg), table(0), orig_table(0), + table_name(0), field_name(*field_name_arg), option_list(0), + option_struct(0), key_start(0), part_of_key(0), + part_of_key_not_clustered(0), part_of_sortkey(0), + unireg_check(unireg_check_arg), invisible(VISIBLE), field_length(length_arg), + null_bit(null_bit_arg), is_created_from_null_item(FALSE), + read_stats(NULL), collected_stats(0), vcol_info(0), check_constraint(0), + default_value(0) +{ + flags=null_ptr ? 0: NOT_NULL_FLAG; + comment.str= (char*) ""; + comment.length=0; + field_index= 0; + cond_selectivity= 1.0; + next_equal_field= NULL; +} + + +void Field::hash_not_null(Hasher *hasher) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + hasher->add(sort_charset(), ptr, pack_length()); +} + +size_t +Field::do_last_null_byte() const +{ + DBUG_ASSERT(null_ptr == NULL || null_ptr >= table->record[0]); + if (null_ptr) + return (size_t) (null_ptr - table->record[0]) + 1; + return LAST_NULL_BYTE_UNDEF; +} + + +void Field::copy_from_tmp(int row_offset) +{ + memcpy(ptr,ptr+row_offset,pack_length()); + if (null_ptr) + { + *null_ptr= (uchar) ((null_ptr[0] & (uchar) ~(uint) null_bit) | + (null_ptr[row_offset] & (uchar) null_bit)); + } +} + + +bool Field::send(Protocol *protocol) +{ + StringBuffer tmp(charset()); + val_str(&tmp); + return protocol->store(tmp.ptr(), tmp.length(), tmp.charset()); +} + + +bool Field_num::send_numeric_zerofill_str(Protocol_text *protocol, + protocol_send_type_t send_type) +{ + DBUG_ASSERT(marked_for_read()); + StringBuffer tmp(&my_charset_latin1); + val_str(&tmp); + return protocol->store_numeric_zerofill_str(tmp.ptr(), + tmp.length(), + send_type); +} + + +/** + Check to see if field size is compatible with destination. + + This method is used in row-based replication to verify that the + slave's field size is less than or equal to the master's field + size. The encoded field metadata (from the master or source) is + decoded and compared to the size of this field (the slave or + destination). + + @note + + The comparison is made so that if the source data (from the master) + is less than the target data (on the slave), -1 is returned in @c + *order_var. This implies that a conversion is + necessary, but that it is lossy and can result in truncation of the + value. + + If the source data is strictly greater than the target data, 1 is + returned in *order_var. This implies that the source + type can is contained in the target type and that a conversion is + necessary but is non-lossy. + + If no conversion is required to fit the source type in the target + type, 0 is returned in *order_var. + + @param field_metadata Encoded size in field metadata + @param mflags Flags from the table map event for the table. + @param order_var Pointer to variable where the order + between the source field and this field + will be returned. + + @return @c true if this field's size is compatible with the + master's field size, @c false otherwise. +*/ +bool Field::compatible_field_size(uint field_metadata, + const Relay_log_info *rli_arg __attribute__((unused)), + uint16 mflags __attribute__((unused)), + int *order_var) const +{ + uint const source_size= pack_length_from_metadata(field_metadata); + uint const destination_size= row_pack_length(); + DBUG_PRINT("debug", ("real_type: %d, source_size: %u, destination_size: %u", + real_type(), source_size, destination_size)); + *order_var = compare(source_size, destination_size); + return true; +} + + +int Field::store(const char *to, size_t length, CHARSET_INFO *cs, + enum_check_fields check_level) +{ + Check_level_instant_set tmp_level(get_thd(), check_level); + return store(to, length, cs); +} + + +int Field::store_text(const char *to, size_t length, CHARSET_INFO *cs, + enum_check_fields check_level) +{ + Check_level_instant_set tmp_level(get_thd(), check_level); + return store_text(to, length, cs); +} + + +int Field::store_timestamp_dec(const timeval &ts, uint dec) +{ + return store_time_dec(Datetime(get_thd(), ts).get_mysql_time(), dec); +} + + +int Field::store_to_statistical_minmax_field(Field *field, String *val) +{ + val_str(val); + size_t length= Well_formed_prefix(val->charset(), val->ptr(), + MY_MIN(val->length(), field->field_length)).length(); + return field->store(val->ptr(), length, &my_charset_bin); +} + + +int Field::store_from_statistical_minmax_field(Field *stat_field, String *str, + MEM_ROOT *mem) +{ + stat_field->val_str(str); + return store_text(str->ptr(), str->length(), &my_charset_bin); +} + + +/* + Same as above, but store the string in the statistics mem_root to make it + easy to free everything by just freeing the mem_root. +*/ + +int Field_blob::store_from_statistical_minmax_field(Field *stat_field, + String *str, + MEM_ROOT *mem) +{ + String *tmp= stat_field->val_str(str); + uchar *ptr; + if (!(ptr= (uchar*) memdup_root(mem, tmp->ptr(), tmp->length()))) + { + set_ptr((uint32) 0, NULL); + return 1; + } + set_ptr(tmp->length(), ptr); + return 0; +} + + +/** + Pack the field into a format suitable for storage and transfer. + + To implement packing functionality, only the virtual function + should be overridden. The other functions are just convenience + functions and hence should not be overridden. + + @note The default method for packing fields just copy the raw bytes + of the record into the destination, but never more than + max_length characters. + + @param to + Pointer to memory area where representation of field should be put. + + @param from + Pointer to memory area where record representation of field is + stored. + + @param max_length + Maximum length of the field, as given in the column definition. For + example, for CHAR(1000), the max_length + is 1000. This information is sometimes needed to decide how to pack + the data. + +*/ +uchar * +Field::pack(uchar *to, const uchar *from, uint max_length) +{ + uint32 length= pack_length(); + set_if_smaller(length, max_length); + memcpy(to, from, length); + return to+length; +} + +/** + Unpack a field from row data. + + This method is used to unpack a field from a master whose size of + the field is less than that of the slave. + + The param_data parameter is a two-byte integer (stored + in the least significant 16 bits of the unsigned integer) usually + consisting of two parts: the real type in the most significant byte + and a original pack length in the least significant byte. + + The exact layout of the param_data field is given by + the Table_map_log_event::save_field_metadata(). + + This is the default method for unpacking a field. It just copies + the memory block in byte order (of original pack length bytes or + length of field, whichever is smaller). + + @param to Destination of the data + @param from Source of the data + @param param_data Real type and original pack length of the field + data + + @return New pointer into memory based on from + length of the data + @return 0 if wrong data +*/ +const uchar * +Field::unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) +{ + uint length=pack_length(), len; + int from_type= 0; + /* + If from length is > 255, it has encoded data in the upper bits. Need + to mask it out. + */ + if (param_data > 255) + { + from_type= (param_data & 0xff00) >> 8U; // real_type. + param_data= param_data & 0x00ff; // length. + } + + if ((param_data == 0) || + (length == param_data) || + (from_type != real_type())) + { + if (from + length > from_end) + return 0; // Error in data + + memcpy(to, from, length); + return from+length; + } + + len= (param_data && (param_data < length)) ? param_data : length; + + if (from + len > from_end) + return 0; // Error in data + + memcpy(to, from, len); + return from+len; +} + + +void Field_num::add_zerofill_and_unsigned(String &res) const +{ + if (unsigned_flag) + res.append(STRING_WITH_LEN(" unsigned")); + if (zerofill) + res.append(STRING_WITH_LEN(" zerofill")); +} + + +void Field::make_send_field(Send_field *field) +{ + if (orig_table && orig_table->s->db.str && *orig_table->s->db.str) + { + field->db_name= orig_table->s->db; + if (orig_table->pos_in_table_list && + orig_table->pos_in_table_list->schema_table) + field->org_table_name= Lex_cstring_strlen(orig_table->pos_in_table_list-> + schema_table->table_name); + else + field->org_table_name= orig_table->s->table_name; + } + else + field->org_table_name= field->db_name= empty_clex_str; + if (orig_table && orig_table->alias.ptr()) + { + orig_table->alias.get_value(&field->table_name); + field->org_col_name= field_name; + } + else + { + field->table_name= empty_clex_str; + field->org_col_name= empty_clex_str; + } + field->col_name= field_name; + field->length=field_length; + field->set_handler(type_handler()); + field->flags=table->maybe_null ? (flags & ~NOT_NULL_FLAG) : flags; + field->decimals= 0; +} + + +/** + Conversion from decimal to longlong with checking overflow and + setting correct value (min/max) in case of overflow. + + @param val value which have to be converted + @param unsigned_flag type of integer in which we convert val + @param err variable to pass error code + + @return + value converted from val +*/ +longlong Field::convert_decimal2longlong(const my_decimal *val, + bool unsigned_flag, int *err) +{ + longlong i; + if (unsigned_flag) + { + if (val->sign()) + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + i= 0; + *err= 1; + } + else if (warn_if_overflow(my_decimal2int((E_DEC_ERROR & + ~E_DEC_OVERFLOW & + ~E_DEC_TRUNCATED), + val, TRUE, &i))) + { + i= ~(longlong) 0; + *err= 1; + } + } + else if (warn_if_overflow(my_decimal2int((E_DEC_ERROR & + ~E_DEC_OVERFLOW & + ~E_DEC_TRUNCATED), + val, FALSE, &i))) + { + i= (val->sign() ? LONGLONG_MIN : LONGLONG_MAX); + *err= 1; + } + return i; +} + + +/** + Storing decimal in integer fields. + + @param val value for storing + + @note + This method is used by all integer fields, real/decimal redefine it + + @retval + 0 OK + @retval + !=0 error +*/ + +int Field_int::store_decimal(const my_decimal *val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int err= 0; + longlong i= convert_decimal2longlong(val, unsigned_flag, &err); + return MY_TEST(err | store(i, unsigned_flag)); +} + + +/** + Return decimal value of integer field. + + @param decimal_value buffer for storing decimal value + + @note + This method is used by all integer fields, real/decimal redefine it. + All longlong values fit in our decimal buffer which cal store 8*9=72 + digits of integer number + + @return + pointer to decimal buffer with value of field +*/ + +my_decimal* Field_int::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + longlong nr= val_int(); + int2my_decimal(E_DEC_FATAL_ERROR, nr, unsigned_flag, decimal_value); + return decimal_value; +} + + +bool Field_int::get_date(MYSQL_TIME *ltime,date_mode_t fuzzydate) +{ + DBUG_ASSERT(marked_for_read()); + Longlong_hybrid nr(val_int(), (flags & UNSIGNED_FLAG)); + return int_to_datetime_with_warn(get_thd(), nr, ltime, + fuzzydate, table->s, field_name.str); +} + + +bool Field_vers_trx_id::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate, ulonglong trx_id) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(ltime); + if (!table || !table->s) + return true; + DBUG_ASSERT(table->versioned(VERS_TRX_ID) || + (table->versioned() && table->s->table_category == TABLE_CATEGORY_TEMPORARY)); + if (!trx_id) + return true; + + THD *thd= get_thd(); + DBUG_ASSERT(thd); + if (trx_id == ULONGLONG_MAX) + { + thd->variables.time_zone->gmt_sec_to_TIME(ltime, TIMESTAMP_MAX_VALUE); + ltime->second_part= TIME_MAX_SECOND_PART; + return false; + } + if (cached == trx_id) + { + *ltime= cache; + return false; + } + + TR_table trt(thd); + bool found= trt.query(trx_id); + if (found) + { + trt[TR_table::FLD_COMMIT_TS]->get_date(&cache, fuzzydate); + *ltime= cache; + cached= trx_id; + return false; + } + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_NO_TRX_ID, ER_THD(thd, ER_VERS_NO_TRX_ID), + (longlong) trx_id); + return true; +} + + +Field_str::Field_str(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) +{ + m_collation= collation; + if (collation.collation->state & MY_CS_BINSORT) + flags|=BINARY_FLAG; +} + + +bool Field_str::test_if_equality_guarantees_uniqueness(const Item *item) const +{ + /* + Can't guarantee uniqueness when comparing a CHAR/VARCHAR/TEXT, + BINARY/VARBINARY/BLOB, ENUM,SET columns to an item with cmp_type() + of INT_RESULT, DOUBLE_RESULT, DECIMAL_RESULT or TIME_RESULT. + Example: + SELECT * FROM t1 WHERE varchar_column=DATE'2001-01-01' + return non-unuque values, e.g. '2001-01-01' and '2001-01-01x'. + */ + if (!field_charset()->propagate(0, 0) || + item->cmp_type() != STRING_RESULT) + return false; + /* + Can't guarantee uniqueness when comparing to + an item of a different collation. + Example: + SELECT * FROM t1 + WHERE latin1_bin_column = _latin1'A' COLLATE latin1_swedish_ci + return non-unique values 'a' and 'A'. + */ + DTCollation tmp(dtcollation()); + return !tmp.aggregate(item->collation) && tmp.collation == field_charset(); +} + + +bool Field_str::can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item_equal) +{ + DBUG_ASSERT(item_equal->compare_type_handler()->cmp_type() == STRING_RESULT); + switch (ctx.subst_constraint()) { + case ANY_SUBST: + return ctx.compare_type_handler() == item_equal->compare_type_handler() && + (ctx.compare_type_handler()->cmp_type() != STRING_RESULT || + ctx.compare_collation() == item_equal->compare_collation()); + case IDENTITY_SUBST: + return ((charset()->state & MY_CS_BINSORT) && + (charset()->state & MY_CS_NOPAD)); + } + return false; +} + + +void Field_str::change_charset(const DTCollation &new_cs) +{ + if (!has_charset()) + return; + + field_length= (field_length * new_cs.collation->mbmaxlen) / + m_collation.collation->mbmaxlen; + m_collation= new_cs; +} + + +void Field_num::make_send_field(Send_field *field) +{ + Field::make_send_field(field); + field->decimals= dec; +} + +/** + Decimal representation of Field_str. + + @param d value for storing + + @note + Field_str is the base class for fields like Field_enum, + Field_date and some similar. Some dates use fraction and also + string value should be converted to floating point value according + our rules, so we use double to store value of decimal in string. + + @todo + use decimal2string? + + @retval + 0 OK + @retval + !=0 error +*/ + +int Field_str::store_decimal(const my_decimal *d) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + double val; + /* TODO: use decimal2string? */ + int err= warn_if_overflow(my_decimal2double(E_DEC_FATAL_ERROR & + ~E_DEC_OVERFLOW, d, &val)); + return err | store(val); +} + + +my_decimal *Field_str::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + longlong nr= val_int(); + int2my_decimal(E_DEC_FATAL_ERROR, nr, 0, decimal_value); + return decimal_value; +} + + +uint Field::fill_cache_field(CACHE_FIELD *copy) +{ + uint store_length; + copy->str= ptr; + copy->length= pack_length_in_rec(); + copy->field= this; + if (flags & BLOB_FLAG) + { + copy->type= CACHE_BLOB; + copy->length-= portable_sizeof_char_ptr; + return copy->length; + } + else if (!zero_pack() && + (type() == MYSQL_TYPE_STRING && copy->length >= 4 && + copy->length < 256)) + { + copy->type= CACHE_STRIPPED; /* Remove end space */ + store_length= 2; + } + else if (type() == MYSQL_TYPE_VARCHAR) + { + copy->type= pack_length()-row_pack_length() == 1 ? CACHE_VARSTR1: + CACHE_VARSTR2; + store_length= 0; + } + else + { + copy->type= 0; + store_length= 0; + } + return copy->length + store_length; +} + + +bool Field::get_date(MYSQL_TIME *to, date_mode_t mode) +{ + StringBuffer<40> tmp; + Temporal::Warn_push warn(get_thd(), nullptr, nullptr, nullptr, to, mode); + Temporal_hybrid *t= new(to) Temporal_hybrid(get_thd(), &warn, + val_str(&tmp), mode); + return !t->is_valid_temporal(); +} + + +longlong Field::val_datetime_packed(THD *thd) +{ + MYSQL_TIME ltime, tmp; + if (get_date(<ime, Datetime::Options_cmp(thd))) + return 0; + if (ltime.time_type != MYSQL_TIMESTAMP_TIME) + return pack_time(<ime); + if (time_to_datetime_with_warn(thd, <ime, &tmp, TIME_CONV_NONE)) + return 0; + return pack_time(&tmp); +} + + +longlong Field::val_time_packed(THD *thd) +{ + MYSQL_TIME ltime; + Time::Options_cmp opt(thd); + if (get_date(<ime, opt)) + return 0; + if (ltime.time_type == MYSQL_TIMESTAMP_TIME) + return pack_time(<ime); + // Conversion from DATETIME or DATE to TIME is needed + return Time(thd, <ime, opt).to_packed(); +} + + +/** + This is called when storing a date in a string. + + @note + Needs to be changed if/when we want to support different time formats. +*/ + +int Field::store_time_dec(const MYSQL_TIME *ltime, uint dec) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + char buff[MAX_DATE_STRING_REP_LENGTH]; + uint length= (uint) my_TIME_to_str(ltime, buff, dec); + /* Avoid conversion when field character set is ASCII compatible */ + return store(buff, length, (charset()->state & MY_CS_NONASCII) ? + &my_charset_latin1 : charset()); +} + + +bool Field::optimize_range(uint idx, uint part) const +{ + return MY_TEST(table->file->index_flags(idx, part, 1) & HA_READ_RANGE); +} + + +Field *Field::make_new_field(MEM_ROOT *root, TABLE *new_table, + bool keep_type __attribute__((unused))) +{ + Field *tmp; + if (!(tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + return 0; + + if (tmp->table->maybe_null) + tmp->flags&= ~NOT_NULL_FLAG; + tmp->table= new_table; + tmp->key_start.init(0); + tmp->part_of_key.init(0); + tmp->part_of_sortkey.init(0); + tmp->read_stats= NULL; + /* + TODO: it is not clear why this method needs to reset unireg_check. + Try not to reset it, or explain why it needs to be reset. + */ + tmp->unireg_check= Field::NONE; + tmp->flags&= (NOT_NULL_FLAG | BLOB_FLAG | UNSIGNED_FLAG | + ZEROFILL_FLAG | BINARY_FLAG | ENUM_FLAG | SET_FLAG); + tmp->reset_fields(); + tmp->invisible= VISIBLE; + return tmp; +} + + +Field *Field::new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) +{ + Field *tmp; + if ((tmp= make_new_field(root, new_table, table == new_table))) + { + tmp->ptr= new_ptr; + tmp->null_ptr= new_null_ptr; + tmp->null_bit= new_null_bit; + } + return tmp; +} + + +/** + Create field for temporary table from given field. + + @param thd Thread handler + @param table Temporary table + @param maybe_null_arg If the result field should be NULL-able, + even if the original field is NOT NULL, e.g. for: + - OUTER JOIN fields + - WITH ROLLUP fields + - arguments of aggregate functions, e.g. SUM(column1) + @retval NULL, on error + @retval pointer to the new field created, on success. +*/ + +Field *Field::create_tmp_field(MEM_ROOT *mem_root, TABLE *new_table, + bool maybe_null_arg) +{ + Field *new_field; + + if ((new_field= make_new_field(mem_root, new_table, new_table == table))) + { + new_field->init_for_tmp_table(this, new_table); + new_field->flags|= flags & NO_DEFAULT_VALUE_FLAG; + if (maybe_null_arg) + new_field->flags&= ~NOT_NULL_FLAG; // Because of outer join + } + return new_field; +} + + +/* This is used to generate a field in TABLE from TABLE_SHARE */ + +Field *Field::clone(MEM_ROOT *root, TABLE *new_table) +{ + Field *tmp; + if ((tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + { + tmp->init(new_table); + tmp->move_field_offset((my_ptrdiff_t) (new_table->record[0] - + new_table->s->default_values)); + } + return tmp; +} + + +Field *Field::clone(MEM_ROOT *root, TABLE *new_table, my_ptrdiff_t diff) +{ + Field *tmp; + if ((tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + { + if (new_table) + tmp->init(new_table); + tmp->move_field_offset(diff); + } + return tmp; +} + + +int Field::set_default() +{ + if (default_value) + { + Query_arena backup_arena; + /* + TODO: this may impose memory leak until table flush. + See comment in + TABLE::update_virtual_fields(handler *, enum_vcol_update_mode). + */ + table->in_use->set_n_backup_active_arena(table->expr_arena, &backup_arena); + int rc= default_value->expr->save_in_field(this, 0); + table->in_use->restore_active_arena(table->expr_arena, &backup_arena); + return rc; + } + /* Copy constant value stored in s->default_values */ + my_ptrdiff_t l_offset= (my_ptrdiff_t) (table->s->default_values - + table->record[0]); + memcpy(ptr, ptr + l_offset, pack_length_in_rec()); + if (maybe_null_in_table()) + *null_ptr= ((*null_ptr & (uchar) ~null_bit) | + (null_ptr[l_offset] & null_bit)); + return 0; +} + + +/**************************************************************************** + Field_null, a field that always return NULL +****************************************************************************/ + +void Field_null::sql_type(String &res) const +{ + res.set_ascii(STRING_WITH_LEN("null")); +} + + +bool Field_null::is_equal(const Column_definition &new_field) const +{ + DBUG_ASSERT(!compression_method()); + return new_field.type_handler() == type_handler() && + new_field.charset == field_charset() && + new_field.length == max_display_length(); +} + + +/**************************************************************************** + Field_row, e.g. for ROW-type SP variables +****************************************************************************/ + +Field_row::~Field_row() +{ + delete m_table; +} + + +bool Field_row::sp_prepare_and_store_item(THD *thd, Item **value) +{ + DBUG_ENTER("Field_row::sp_prepare_and_store_item"); + + if (value[0]->type() == Item::NULL_ITEM) + { + /* + We're in a auto-generated sp_inst_set, to assign + the explicit default NULL value to a ROW variable. + */ + m_table->set_all_fields_to_null(); + DBUG_RETURN(false); + } + + /** + - In case if we're assigning a ROW variable from another ROW variable, + value[0] points to Item_splocal. sp_fix_func_item() will return the + fixed underlying Item_field pointing to Field_row. + - In case if we're assigning from a ROW() value, src and value[0] will + point to the same Item_row. + - In case if we're assigning from a subselect, src and value[0] also + point to the same Item_singlerow_subselect. + */ + Item *src; + if (!(src= thd->sp_fix_func_item(value)) || + src->cmp_type() != ROW_RESULT || + src->cols() != m_table->s->fields) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), m_table->s->fields); + m_table->set_all_fields_to_null(); + DBUG_RETURN(true); + } + + src->bring_value(); + DBUG_RETURN(m_table->sp_set_all_fields_from_item(thd, src)); +} + + +/**************************************************************************** + Functions for the Field_decimal class + This is an number stored as a pre-space (or pre-zero) string +****************************************************************************/ + +int +Field_decimal::reset(void) +{ + Field_decimal::store(STRING_WITH_LEN("0"),&my_charset_bin); + return 0; +} + +void Field_decimal::overflow(bool negative) +{ + uint len=field_length; + uchar *to=ptr, filler= '9'; + + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + if (negative) + { + if (!unsigned_flag) + { + /* Put - sign as a first digit so we'll have -999..999 or 999..999 */ + *to++ = '-'; + len--; + } + else + { + filler= '0'; // Fill up with 0 + if (!zerofill) + { + /* + Handle unsigned integer without zerofill, in which case + the number should be of format ' 0' or ' 0.000' + */ + uint whole_part=field_length- (dec ? dec+2 : 1); + // Fill with spaces up to the first digit + bfill(to, whole_part, ' '); + to+= whole_part; + len-= whole_part; + // The main code will also handle the 0 before the decimal point + } + } + } + bfill(to, len, filler); + if (dec) + ptr[field_length-dec-1]='.'; + return; +} + + +int Field_decimal::store(const char *from_arg, size_t len, CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff,sizeof(buff), &my_charset_bin); + const uchar *from= (uchar*) from_arg; + + /* Convert character set if the old one is multi uchar */ + if (cs->mbmaxlen > 1) + { + uint dummy_errors; + tmp.copy((char*) from, len, cs, &my_charset_bin, &dummy_errors); + from= (uchar*) tmp.ptr(); + len= tmp.length(); + } + + const uchar *end= from+len; + /* The pointer where the field value starts (i.e., "where to write") */ + uchar *to= ptr; + uint tmp_dec, tmp_uint; + /* + The sign of the number : will be 0 (means positive but sign not + specified), '+' or '-' + */ + uchar sign_char=0; + /* The pointers where prezeros start and stop */ + const uchar *pre_zeros_from, *pre_zeros_end; + /* The pointers where digits at the left of '.' start and stop */ + const uchar *int_digits_from, *int_digits_end; + /* The pointers where digits at the right of '.' start and stop */ + const uchar *frac_digits_from, *frac_digits_end; + /* The sign of the exponent : will be 0 (means no exponent), '+' or '-' */ + char expo_sign_char=0; + uint exponent=0; // value of the exponent + /* + Pointers used when digits move from the left of the '.' to the + right of the '.' (explained below) + */ + const uchar *UNINIT_VAR(int_digits_tail_from); + /* Number of 0 that need to be added at the left of the '.' (1E3: 3 zeros) */ + uint UNINIT_VAR(int_digits_added_zeros); + /* + Pointer used when digits move from the right of the '.' to the left + of the '.' + */ + const uchar *UNINIT_VAR(frac_digits_head_end); + /* Number of 0 that need to be added at the right of the '.' (for 1E-3) */ + uint UNINIT_VAR(frac_digits_added_zeros); + uchar *pos,*tmp_left_pos,*tmp_right_pos; + /* Pointers that are used as limits (begin and end of the field buffer) */ + uchar *left_wall,*right_wall; + uchar tmp_char; + /* + To remember if get_thd()->cuted_fields has already been incremented, + to do that only once + */ + bool is_cuted_fields_incr=0; + + /* + There are three steps in this function : + - parse the input string + - modify the position of digits around the decimal dot '.' + according to the exponent value (if specified) + - write the formatted number + */ + + if ((tmp_dec=dec)) + tmp_dec++; + + /* skip pre-space */ + while (from != end && my_isspace(&my_charset_bin,*from)) + from++; + if (from == end) + { + set_warning(WARN_DATA_TRUNCATED, 1); + is_cuted_fields_incr=1; + } + else if (*from == '+' || *from == '-') // Found some sign ? + { + sign_char= *from++; + /* + We allow "+" for unsigned decimal unless defined different + Both options allowed as one may wish not to have "+" for unsigned numbers + because of data processing issues + */ + if (unsigned_flag) + { + if (sign_char=='-') + { + Field_decimal::overflow(1); + return 1; + } + /* + Defining this will not store "+" for unsigned decimal type even if + it is passed in numeric string. This will make some tests to fail + */ +#ifdef DONT_ALLOW_UNSIGNED_PLUS + else + sign_char=0; +#endif + } + } + + pre_zeros_from= from; + for (; from!=end && *from == '0'; from++) ; // Read prezeros + pre_zeros_end=int_digits_from=from; + /* Read non zero digits at the left of '.'*/ + for (; from != end && my_isdigit(&my_charset_bin, *from) ; from++) ; + int_digits_end=from; + if (from!=end && *from == '.') // Some '.' ? + from++; + frac_digits_from= from; + /* Read digits at the right of '.' */ + for (;from!=end && my_isdigit(&my_charset_bin, *from); from++) ; + frac_digits_end=from; + // Some exponentiation symbol ? + if (from != end && (*from == 'e' || *from == 'E')) + { + from++; + if (from != end && (*from == '+' || *from == '-')) // Some exponent sign ? + expo_sign_char= *from++; + else + expo_sign_char= '+'; + /* + Read digits of the exponent and compute its value. We must care about + 'exponent' overflow, because as unsigned arithmetic is "modulo", big + exponents will become small (e.g. 1e4294967296 will become 1e0, and the + field will finally contain 1 instead of its max possible value). + */ + for (;from!=end && my_isdigit(&my_charset_bin, *from); from++) + { + exponent=10*exponent+(*from-'0'); + if (exponent>MAX_EXPONENT) + break; + } + } + + /* + We only have to generate warnings if count_cuted_fields is set. + This is to avoid extra checks of the number when they are not needed. + Even if this flag is not set, it's OK to increment warnings, if + it makes the code easier to read. + */ + + if (get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION) + { + // Skip end spaces + for (;from != end && my_isspace(&my_charset_bin, *from); from++) ; + if (from != end) // If still something left, warn + { + set_warning(WARN_DATA_TRUNCATED, 1); + is_cuted_fields_incr=1; + } + } + + /* + Now "move" digits around the decimal dot according to the exponent value, + and add necessary zeros. + Examples : + - 1E+3 : needs 3 more zeros at the left of '.' (int_digits_added_zeros=3) + - 1E-3 : '1' moves at the right of '.', and 2 more zeros are needed + between '.' and '1' + - 1234.5E-3 : '234' moves at the right of '.' + These moves are implemented with pointers which point at the begin + and end of each moved segment. Examples : + - 1234.5E-3 : before the code below is executed, the int_digits part is + from '1' to '4' and the frac_digits part from '5' to '5'. After the code + below, the int_digits part is from '1' to '1', the frac_digits_head + part is from '2' to '4', and the frac_digits part from '5' to '5'. + - 1234.5E3 : before the code below is executed, the int_digits part is + from '1' to '4' and the frac_digits part from '5' to '5'. After the code + below, the int_digits part is from '1' to '4', the int_digits_tail + part is from '5' to '5', the frac_digits part is empty, and + int_digits_added_zeros=2 (to make 1234500). + */ + + /* + Below tmp_uint cannot overflow with small enough MAX_EXPONENT setting, + as int_digits_added_zeros<=exponent<4G and + (int_digits_end-int_digits_from)<=max_allowed_packet<=2G and + (frac_digits_from-int_digits_tail_from)<=max_allowed_packet<=2G + */ + + if (!expo_sign_char) + tmp_uint=tmp_dec+(uint)(int_digits_end-int_digits_from); + else if (expo_sign_char == '-') + { + tmp_uint=MY_MIN(exponent,(uint)(int_digits_end-int_digits_from)); + frac_digits_added_zeros=exponent-tmp_uint; + int_digits_end -= tmp_uint; + frac_digits_head_end=int_digits_end+tmp_uint; + tmp_uint=tmp_dec+(uint)(int_digits_end-int_digits_from); + } + else // (expo_sign_char=='+') + { + tmp_uint=MY_MIN(exponent,(uint)(frac_digits_end-frac_digits_from)); + int_digits_added_zeros=exponent-tmp_uint; + int_digits_tail_from=frac_digits_from; + frac_digits_from=frac_digits_from+tmp_uint; + /* + We "eat" the heading zeros of the + int_digits.int_digits_tail.int_digits_added_zeros concatenation + (for example 0.003e3 must become 3 and not 0003) + */ + if (int_digits_from == int_digits_end) + { + /* + There was nothing in the int_digits part, so continue + eating int_digits_tail zeros + */ + for (; int_digits_tail_from != frac_digits_from && + *int_digits_tail_from == '0'; int_digits_tail_from++) ; + if (int_digits_tail_from == frac_digits_from) + { + // there were only zeros in int_digits_tail too + int_digits_added_zeros=0; + } + } + tmp_uint= (uint) (tmp_dec+(int_digits_end-int_digits_from)+ + (uint)(frac_digits_from-int_digits_tail_from)+ + int_digits_added_zeros); + } + + /* + Now write the formatted number + + First the digits of the int_% parts. + Do we have enough room to write these digits ? + If the sign is defined and '-', we need one position for it + */ + + if (field_length < tmp_uint + (int) (sign_char == '-')) + { + // too big number, change to max or min number + Field_decimal::overflow(sign_char == '-'); + return 1; + } + + /* + Tmp_left_pos is the position where the leftmost digit of + the int_% parts will be written + */ + tmp_left_pos=pos=to+(uint)(field_length-tmp_uint); + + // Write all digits of the int_% parts + while (int_digits_from != int_digits_end) + *pos++ = *int_digits_from++ ; + + if (expo_sign_char == '+') + { + while (int_digits_tail_from != frac_digits_from) + *pos++= *int_digits_tail_from++; + while (int_digits_added_zeros-- >0) + *pos++= '0'; + } + /* + Note the position where the rightmost digit of the int_% parts has been + written (this is to later check if the int_% parts contained nothing, + meaning an extra 0 is needed). + */ + tmp_right_pos=pos; + + /* + Step back to the position of the leftmost digit of the int_% parts, + to write sign and fill with zeros or blanks or prezeros. + */ + pos=tmp_left_pos-1; + if (zerofill) + { + left_wall=to-1; + while (pos > left_wall) // Fill with zeros + *pos--='0'; + } + else + { + left_wall=to+(sign_char != 0)-1; + if (!expo_sign_char) // If exponent was specified, ignore prezeros + { + for (;pos > left_wall && pre_zeros_from !=pre_zeros_end; + pre_zeros_from++) + *pos--= '0'; + } + if (pos == tmp_right_pos-1) + *pos--= '0'; // no 0 has ever been written, so write one + left_wall= to-1; + if (sign_char && pos != left_wall) + { + /* Write sign if possible (it is if sign is '-') */ + *pos--= sign_char; + } + while (pos != left_wall) + *pos--=' '; //fill with blanks + } + + /* + Write digits of the frac_% parts ; + Depending on get_thd()->count_cuted_fields, we may also want + to know if some non-zero tail of these parts will + be truncated (for example, 0.002->0.00 will generate a warning, + while 0.000->0.00 will not) + (and 0E1000000000 will not, while 1E-1000000000 will) + */ + + pos=to+(uint)(field_length-tmp_dec); // Calculate post to '.' + right_wall=to+field_length; + if (pos != right_wall) + *pos++='.'; + + if (expo_sign_char == '-') + { + while (frac_digits_added_zeros-- > 0) + { + if (pos == right_wall) + { + if (get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION && + !is_cuted_fields_incr) + break; // Go on below to see if we lose non zero digits + return 0; + } + *pos++='0'; + } + while (int_digits_end != frac_digits_head_end) + { + tmp_char= *int_digits_end++; + if (pos == right_wall) + { + if (tmp_char != '0') // Losing a non zero digit ? + { + if (!is_cuted_fields_incr) + set_warning(WARN_DATA_TRUNCATED, 1); + return 0; + } + continue; + } + *pos++= tmp_char; + } + } + + for (;frac_digits_from!=frac_digits_end;) + { + tmp_char= *frac_digits_from++; + if (pos == right_wall) + { + if (tmp_char != '0') // Losing a non zero digit ? + { + if (!is_cuted_fields_incr) + { + /* + This is a note, not a warning, as we don't want to abort + when we cut decimals in strict mode + */ + set_note(WARN_DATA_TRUNCATED, 1); + } + return 0; + } + continue; + } + *pos++= tmp_char; + } + + while (pos != right_wall) + *pos++='0'; // Fill with zeros at right of '.' + return 0; +} + + +int Field_decimal::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + if (unsigned_flag && nr < 0) + { + overflow(1); + return 1; + } + + if (!std::isfinite(nr)) // Handle infinity as special case + { + overflow(nr < 0.0); + return 1; + } + + size_t length; + uchar fyllchar,*to; + char buff[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE]; + + fyllchar = zerofill ? (char) '0' : (char) ' '; + length= my_fcvt(nr, dec, buff, NULL); + + if (length > field_length) + { + overflow(nr < 0.0); + return 1; + } + else + { + to=ptr; + for (size_t i=field_length-length ; i-- > 0 ;) + *to++ = fyllchar; + memcpy(to,buff,length); + return 0; + } +} + + +int Field_decimal::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + char buff[22]; + uint length, int_part; + char fyllchar; + uchar *to; + + if (nr < 0 && unsigned_flag && !unsigned_val) + { + overflow(1); + return 1; + } + length= (uint) (longlong10_to_str(nr,buff,unsigned_val ? 10 : -10) - buff); + int_part= field_length- (dec ? dec+1 : 0); + + if (length > int_part) + { + overflow(!unsigned_val && nr < 0L); /* purecov: inspected */ + return 1; + } + + fyllchar = zerofill ? (char) '0' : (char) ' '; + to= ptr; + for (uint i=int_part-length ; i-- > 0 ;) + *to++ = fyllchar; + memcpy(to,buff,length); + if (dec) + { + to[length]='.'; + bfill(to+length+1,dec,'0'); + } + return 0; +} + + +double Field_decimal::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + int not_used; + char *end_not_used; + return my_charset_bin.strntod((char*) ptr, field_length, &end_not_used, ¬_used); +} + +longlong Field_decimal::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + int not_used; + if (unsigned_flag) + return my_charset_bin.strntoull((char*) ptr, field_length, 10, NULL, ¬_used); + return my_charset_bin.strntoll((char*) ptr, field_length, 10, NULL, ¬_used); +} + + +String *Field_decimal::val_str(String *val_buffer __attribute__((unused)), + String *val_ptr) +{ + DBUG_ASSERT(marked_for_read()); + uchar *str; + size_t tmp_length; + + for (str=ptr ; *str == ' ' ; str++) ; + val_ptr->set_charset(&my_charset_numeric); + tmp_length= (size_t) (str-ptr); + if (field_length < tmp_length) // Error in data + val_ptr->length(0); + else + val_ptr->set_ascii((const char*) str, field_length-tmp_length); + return val_ptr; +} + +/** + Should be able to handle at least the following fixed decimal formats: + 5.00 , -1.0, 05, -05, +5 with optional pre/end space +*/ + +int Field_decimal::cmp(const uchar *a_ptr,const uchar *b_ptr) const +{ + const uchar *end; + int swap=0; + /* First remove prefixes '0', ' ', and '-' */ + for (end=a_ptr+field_length; + a_ptr != end && + (*a_ptr == *b_ptr || + ((my_isspace(&my_charset_bin,*a_ptr) || *a_ptr == '+' || + *a_ptr == '0') && + (my_isspace(&my_charset_bin,*b_ptr) || *b_ptr == '+' || + *b_ptr == '0'))); + a_ptr++,b_ptr++) + { + if (*a_ptr == '-') // If both numbers are negative + swap= -1 ^ 1; // Swap result + } + if (a_ptr == end) + return 0; + if (*a_ptr == '-') + return -1; + if (*b_ptr == '-') + return 1; + + while (a_ptr != end) + { + if (*a_ptr++ != *b_ptr++) + return swap ^ (a_ptr[-1] < b_ptr[-1] ? -1 : 1); // compare digits + } + return 0; +} + + +void Field_decimal::sort_string(uchar *to,uint length) +{ + uchar *str,*end; + for (str=ptr,end=ptr+length; + str != end && + ((my_isspace(&my_charset_bin,*str) || *str == '+' || + *str == '0')) ; + str++) + *to++=' '; + if (str == end) + return; /* purecov: inspected */ + + if (*str == '-') + { + *to++=1; // Smaller than any number + str++; + while (str != end) + if (my_isdigit(&my_charset_bin,*str)) + *to++= (char) ('9' - *str++); + else + *to++= *str++; + } + else memcpy(to,str,(uint) (end-str)); +} + + +void Field_decimal::sql_type(String &res) const +{ + CHARSET_INFO *cs=res.charset(); + uint tmp=field_length; + if (!unsigned_flag) + tmp--; + if (dec) + tmp--; + res.length(cs->cset->snprintf(cs,(char*) res.ptr(),res.alloced_length(), + "decimal(%d,%d)/*old*/",tmp,dec)); + add_zerofill_and_unsigned(res); +} + + +Field *Field_decimal::make_new_field(MEM_ROOT *root, TABLE *new_table, + bool keep_type) +{ + if (keep_type) + return Field_real::make_new_field(root, new_table, keep_type); + + Field *field= new (root) Field_new_decimal(NULL, field_length, + maybe_null() ? (uchar*) "" : 0, 0, + NONE, &field_name, + dec, flags & ZEROFILL_FLAG, + unsigned_flag); + if (field) + field->init_for_make_new_field(new_table, orig_table); + return field; +} + + +/**************************************************************************** +** Field_new_decimal +****************************************************************************/ + +static decimal_digits_t get_decimal_precision(uint len, decimal_digits_t dec, + bool unsigned_val) +{ + uint precision= my_decimal_length_to_precision(len, dec, unsigned_val); + return (decimal_digits_t) MY_MIN(precision, DECIMAL_MAX_PRECISION); +} + +Field_new_decimal::Field_new_decimal(uchar *ptr_arg, + uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg,bool zero_arg, + bool unsigned_arg) + :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, + MY_MIN(dec_arg, DECIMAL_MAX_SCALE), zero_arg, unsigned_arg) +{ + precision= get_decimal_precision(len_arg, dec_arg, unsigned_arg); + DBUG_ASSERT(precision <= DECIMAL_MAX_PRECISION); + DBUG_ASSERT(dec <= DECIMAL_MAX_SCALE); + bin_size= my_decimal_get_binary_size(precision, dec); +} + + +int Field_new_decimal::reset(void) +{ + store_value(&decimal_zero); + return 0; +} + + +/** + Generate max/min decimal value in case of overflow. + + @param decimal_value buffer for value + @param sign sign of value which caused overflow +*/ + +void Field_new_decimal::set_value_on_overflow(my_decimal *decimal_value, + bool sign) +{ + DBUG_ENTER("Field_new_decimal::set_value_on_overflow"); + max_my_decimal(decimal_value, precision, decimals()); + if (sign) + { + if (unsigned_flag) + my_decimal_set_zero(decimal_value); + else + decimal_value->sign(TRUE); + } + DBUG_VOID_RETURN; +} + + +/** + Store decimal value in the binary buffer. + + Checks if decimal_value fits into field size. + If it does, stores the decimal in the buffer using binary format. + Otherwise sets maximal number that can be stored in the field. + + @param decimal_value my_decimal + @param [OUT] native_error the error returned by my_decimal::to_binary(). + + @retval + 0 ok + @retval + 1 error +*/ + +bool Field_new_decimal::store_value(const my_decimal *decimal_value, + int *native_error) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + DBUG_ENTER("Field_new_decimal::store_value"); +#ifdef DBUG_TRACE + { + char dbug_buff[DECIMAL_MAX_STR_LENGTH+2]; + DBUG_PRINT("enter", ("value: %s", dbug_decimal_as_string(dbug_buff, decimal_value))); + } +#endif + + /* check that we do not try to write negative value in unsigned field */ + if (unsigned_flag && decimal_value->sign()) + { + DBUG_PRINT("info", ("unsigned overflow")); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + decimal_value= &decimal_zero; + } +#ifdef DBUG_TRACE + { + char dbug_buff[DECIMAL_MAX_STR_LENGTH+2]; + DBUG_PRINT("info", ("saving with precision %d scale: %d value %s", + (int)precision, (int)dec, + dbug_decimal_as_string(dbug_buff, decimal_value))); + } +#endif + + *native_error= decimal_value->to_binary(ptr, precision, dec, + E_DEC_FATAL_ERROR & ~E_DEC_OVERFLOW); + + if (unlikely(*native_error == E_DEC_OVERFLOW)) + { + my_decimal buff; + DBUG_PRINT("info", ("overflow")); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + set_value_on_overflow(&buff, decimal_value->sign()); + buff.to_binary(ptr, precision, dec); + error= 1; + } + DBUG_EXECUTE("info", print_decimal_buff(decimal_value, (uchar *) ptr, + bin_size);); + DBUG_RETURN(error); +} + + +bool Field_new_decimal::store_value(const my_decimal *decimal_value) +{ + int native_error; + bool rc= store_value(decimal_value, &native_error); + if (unlikely(!rc && native_error == E_DEC_TRUNCATED)) + set_note(WARN_DATA_TRUNCATED, 1); + return rc; +} + + +int Field_new_decimal::store(const char *from, size_t length, + CHARSET_INFO *charset_arg) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + my_decimal decimal_value; + THD *thd= get_thd(); + DBUG_ENTER("Field_new_decimal::store(char*)"); + + const char *end; + int err= str2my_decimal(E_DEC_FATAL_ERROR & + ~(E_DEC_OVERFLOW | E_DEC_BAD_NUM), + from, length, charset_arg, + &decimal_value, &end); + + if (err == E_DEC_OVERFLOW) // Too many digits (>81) in the integer part + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + if (!thd->abort_on_warning) + { + set_value_on_overflow(&decimal_value, decimal_value.sign()); + store_decimal(&decimal_value); + } + DBUG_RETURN(1); + } + + if (thd->count_cuted_fields > CHECK_FIELD_EXPRESSION) + { + if (check_edom_and_important_data_truncation("decimal", + err && err != E_DEC_TRUNCATED, + charset_arg, + from, length, end)) + { + if (!thd->abort_on_warning) + { + if (err && err != E_DEC_TRUNCATED) + { + /* + If check_decimal() failed because of EDOM-alike error, + (e.g. E_DEC_BAD_NUM), we have to initialize decimal_value to zero. + Note: if check_decimal() failed because of truncation, + decimal_value is already properly initialized. + */ + my_decimal_set_zero(&decimal_value); + /* + TODO: check str2my_decimal() with HF. It seems to do + decimal_make_zero() on fatal errors, so my_decimal_set_zero() + is probably not needed here. + */ + } + store_decimal(&decimal_value); + } + DBUG_RETURN(1); + } + } + +#ifdef DBUG_TRACE + char dbug_buff[DECIMAL_MAX_STR_LENGTH+2]; + DBUG_PRINT("enter", ("value: %s", + dbug_decimal_as_string(dbug_buff, &decimal_value))); +#endif + int err2; + if (store_value(&decimal_value, &err2)) + DBUG_RETURN(1); + + /* + E_DEC_TRUNCATED means minor truncation, a note should be enough: + - in err: str2my_decimal() truncated '1e-1000000000000' to 0.0 + - in err2: store_value() truncated 1.123 to 1.12, e.g. for DECIMAL(10,2) + Also, we send a note if a string had some trailing spaces: '1.12 ' + */ + if (thd->count_cuted_fields > CHECK_FIELD_EXPRESSION && + (err == E_DEC_TRUNCATED || + err2 == E_DEC_TRUNCATED || + end < from + length)) + set_note(WARN_DATA_TRUNCATED, 1); + DBUG_RETURN(0); +} + + +/** + @todo + Fix following when double2my_decimal when double2decimal + will return E_DEC_TRUNCATED always correctly +*/ + +int Field_new_decimal::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + my_decimal decimal_value; + int err; + THD *thd= get_thd(); + DBUG_ENTER("Field_new_decimal::store(double)"); + + err= double2my_decimal(E_DEC_FATAL_ERROR & ~E_DEC_OVERFLOW, nr, + &decimal_value); + if (err) + { + if (check_overflow(err)) + set_value_on_overflow(&decimal_value, decimal_value.sign()); + /* Only issue a warning if store_value doesn't issue an warning */ + thd->got_warning= 0; + } + if (store_value(&decimal_value)) + err= 1; + else if (err && !thd->got_warning) + err= warn_if_overflow(err); + DBUG_RETURN(err); +} + + +int Field_new_decimal::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + my_decimal decimal_value; + int err; + + if ((err= int2my_decimal(E_DEC_FATAL_ERROR & ~E_DEC_OVERFLOW, + nr, unsigned_val, &decimal_value))) + { + if (check_overflow(err)) + set_value_on_overflow(&decimal_value, decimal_value.sign()); + /* Only issue a warning if store_value doesn't issue an warning */ + get_thd()->got_warning= 0; + } + if (store_value(&decimal_value)) + err= 1; + else if (err && !get_thd()->got_warning) + err= warn_if_overflow(err); + return err; +} + + +int Field_new_decimal::store_decimal(const my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + return store_value(decimal_value); +} + + +int Field_new_decimal::store_time_dec(const MYSQL_TIME *ltime, uint dec_arg) +{ + my_decimal decimal_value; + return store_value(date2my_decimal(ltime, &decimal_value)); +} + + +my_decimal* Field_new_decimal::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ENTER("Field_new_decimal::val_decimal"); + binary2my_decimal(E_DEC_FATAL_ERROR, ptr, decimal_value, + precision, dec); + DBUG_EXECUTE("info", print_decimal_buff(decimal_value, (uchar *) ptr, + bin_size);); + DBUG_RETURN(decimal_value); +} + + +int Field_new_decimal::cmp(const uchar *a,const uchar*b) const +{ + return memcmp(a, b, bin_size); +} + + +void Field_new_decimal::sort_string(uchar *buff, uint length) +{ + memcpy(buff, ptr, length); +} + + +void Field_new_decimal::sql_type(String &str) const +{ + CHARSET_INFO *cs= str.charset(); + str.length(cs->cset->snprintf(cs, (char*) str.ptr(), str.alloced_length(), + "decimal(%d,%d)", precision, (int)dec)); + add_zerofill_and_unsigned(str); +} + + +/** + Save the field metadata for new decimal fields. + + Saves the precision in the first byte and decimals() in the second + byte of the field metadata array at index of *metadata_ptr and + *(metadata_ptr + 1). + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ + +Binlog_type_info Field_new_decimal::binlog_type_info() const +{ + DBUG_ASSERT(Field_new_decimal::type() == binlog_type()); + return Binlog_type_info(Field_new_decimal::type(), precision + + (decimals() << 8), 2, binlog_signedness()); +} + + +/** + Returns the number of bytes field uses in row-based replication + row packed size. + + This method is used in row-based replication to determine the number + of bytes that the field consumes in the row record format. This is + used to skip fields in the master that do not exist on the slave. + + @param field_metadata Encoded size in field metadata + + @returns The size of the field based on the field metadata. +*/ +uint Field_new_decimal::pack_length_from_metadata(uint field_metadata) const +{ + uint const source_precision= (field_metadata >> 8U) & 0x00ff; + uint const source_decimal= field_metadata & 0x00ff; + uint const source_size= my_decimal_get_binary_size(source_precision, + source_decimal); + return (source_size); +} + + +bool Field_new_decimal::compatible_field_size(uint field_metadata, + const Relay_log_info * __attribute__((unused)), + uint16 mflags __attribute__((unused)), + int *order_var) const +{ + uint const source_precision= (field_metadata >> 8U) & 0x00ff; + uint const source_decimal= field_metadata & 0x00ff; + int order= compare(source_precision, precision); + *order_var= order != 0 ? order : compare(source_decimal, dec); + return true; +} + + +bool Field_new_decimal::is_equal(const Column_definition &new_field) const +{ + return ((new_field.type_handler() == type_handler()) && + ((new_field.flags & UNSIGNED_FLAG) == + (uint) (flags & UNSIGNED_FLAG)) && + ((new_field.flags & AUTO_INCREMENT_FLAG) <= + (uint) (flags & AUTO_INCREMENT_FLAG)) && + (new_field.length == max_display_length()) && + (new_field.decimals == dec)); +} + + +/** + Unpack a decimal field from row data. + + This method is used to unpack a decimal or numeric field from a master + whose size of the field is less than that of the slave. + + @param to Destination of the data + @param from Source of the data + @param param_data Precision (upper) and decimal (lower) values + + @return New pointer into memory based on from + length of the data +*/ +const uchar * +Field_new_decimal::unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) +{ + if (param_data == 0) + return Field::unpack(to, from, from_end, param_data); + + uint from_precision= (param_data & 0xff00) >> 8U; + uint from_decimal= param_data & 0x00ff; + uint length=pack_length(); + uint from_pack_len= my_decimal_get_binary_size(from_precision, from_decimal); + uint len= (param_data && (from_pack_len < length)) ? + from_pack_len : length; + if ((from_pack_len && (from_pack_len < length)) || + (from_precision < precision) || + (from_decimal < decimals())) + { + /* + If the master's data is smaller than the slave, we need to convert + the binary to decimal then resize the decimal converting it back to + a decimal and write that to the raw data buffer. + */ + decimal_digit_t dec_buf[DECIMAL_MAX_PRECISION]; + decimal_t dec_val; + dec_val.len= from_precision; + dec_val.buf= dec_buf; + /* + Note: bin2decimal does not change the length of the field. So it is + just the first step the resizing operation. The second step does the + resizing using the precision and decimals from the slave. + */ + bin2decimal((uchar *)from, &dec_val, from_precision, from_decimal); + decimal2bin(&dec_val, to, precision, decimals()); + } + else + { + if (from + len > from_end) + return 0; // Wrong data + memcpy(to, from, len); // Sizes are the same, just copy the data. + } + return from+len; +} + + +Item *Field_new_decimal::get_equal_const_item(THD *thd, const Context &ctx, + Item *const_item) +{ + if (flags & ZEROFILL_FLAG) + return Field_num::get_equal_zerofill_const_item(thd, ctx, const_item); + switch (ctx.subst_constraint()) { + case IDENTITY_SUBST: + if (const_item->field_type() != MYSQL_TYPE_NEWDECIMAL || + const_item->decimal_scale() != decimals()) + { + VDec val(const_item); + if (val.is_null()) + { + DBUG_ASSERT(0); + return const_item; + } + /* + Truncate or extend the decimal value to the scale of the field. + See comments about truncation in the same place in + Field_time::get_equal_const_item(). + */ + my_decimal tmp; + val.round_to(&tmp, decimals(), TRUNCATE); + return new (thd->mem_root) Item_decimal(thd, field_name.str, &tmp, + decimals(), field_length); + } + break; + case ANY_SUBST: + break; + } + return const_item; +} + + +int Field_int::store_time_dec(const MYSQL_TIME *ltime, uint dec_arg) +{ + longlong v= TIME_to_ulonglong(ltime); + if (ltime->neg == 0) + return store(v, true); + return store(-v, false); +} + + +void Field_int::sql_type(String &res) const +{ + CHARSET_INFO *cs=res.charset(); + Name name= type_handler()->type_handler_signed()->name(); + res.length(cs->cset->snprintf(cs,(char*) res.ptr(),res.alloced_length(), + "%.*s(%d)", (int) name.length(), name.ptr(), + (int) field_length)); + add_zerofill_and_unsigned(res); +} + + +/**************************************************************************** +** tiny int +****************************************************************************/ + +int Field_tiny::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error; + longlong rnd; + + error= get_int(cs, from, len, &rnd, 255, -128, 127); + ptr[0]= unsigned_flag ? (char) (ulonglong) rnd : (char) rnd; + return error; +} + + +int Field_tiny::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + nr=rint(nr); + if (unsigned_flag) + { + if (nr < 0.0) + { + *ptr=0; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > 255.0) + { + *ptr= (uchar) 255; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + *ptr= (uchar) nr; + } + else + { + if (nr < -128.0) + { + *ptr= (uchar) -128; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > 127.0) + { + *ptr=127; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + *ptr=(uchar) (int) nr; + } + return error; +} + + +int Field_tiny::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + + if (unsigned_flag) + { + if (nr < 0 && !unsigned_val) + { + *ptr= 0; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if ((ulonglong) nr > (ulonglong) 255) + { + *ptr= (char) 255; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + *ptr=(char) nr; + } + else + { + if (nr < 0 && unsigned_val) + nr= 256; // Generate overflow + if (nr < -128) + { + *ptr= (char) -128; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > 127) + { + *ptr=127; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + *ptr=(char) nr; + } + return error; +} + + +double Field_tiny::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + int tmp= unsigned_flag ? (int) ptr[0] : + (int) ((signed char*) ptr)[0]; + return (double) tmp; +} + + +longlong Field_tiny::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + int tmp= unsigned_flag ? (int) ptr[0] : + (int) ((signed char*) ptr)[0]; + return (longlong) tmp; +} + + +String *Field_tiny::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + long nr= unsigned_flag ? (long) ptr[0] : (long) ((signed char*) ptr)[0]; + return val_str_from_long(val_buffer, 5, -10, nr); +} + +bool Field_tiny::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_TINY); + return protocol->store_tiny(Field_tiny::val_int()); +} + + +int Field_tiny::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + signed char a,b; + a=(signed char) a_ptr[0]; b= (signed char) b_ptr[0]; + if (unsigned_flag) + return ((uchar) a < (uchar) b) ? -1 : ((uchar) a > (uchar) b) ? 1 : 0; + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_tiny::sort_string(uchar *to,uint length __attribute__((unused))) +{ + if (unsigned_flag) + *to= *ptr; + else + to[0] = (char) (ptr[0] ^ (uchar) 128); /* Revers signbit */ +} + +/**************************************************************************** + Field type short int (2 byte) +****************************************************************************/ + +int Field_short::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int store_tmp; + int error; + longlong rnd; + + error= get_int(cs, from, len, &rnd, UINT_MAX16, INT_MIN16, INT_MAX16); + store_tmp= unsigned_flag ? (int) (ulonglong) rnd : (int) rnd; + int2store(ptr, store_tmp); + return error; +} + + +int Field_short::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + int16 res; + nr=rint(nr); + if (unsigned_flag) + { + if (nr < 0) + { + res=0; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > (double) UINT_MAX16) + { + res=(int16) UINT_MAX16; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + res=(int16) (uint16) nr; + } + else + { + if (nr < (double) INT_MIN16) + { + res=INT_MIN16; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > (double) INT_MAX16) + { + res=INT_MAX16; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + res=(int16) (int) nr; + } + int2store(ptr,res); + return error; +} + + +int Field_short::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + int16 res; + + if (unsigned_flag) + { + if (nr < 0L && !unsigned_val) + { + res=0; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if ((ulonglong) nr > (ulonglong) UINT_MAX16) + { + res=(int16) UINT_MAX16; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + res=(int16) (uint16) nr; + } + else + { + if (nr < 0 && unsigned_val) + nr= UINT_MAX16+1; // Generate overflow + + if (nr < INT_MIN16) + { + res=INT_MIN16; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > (longlong) INT_MAX16) + { + res=INT_MAX16; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + res=(int16) nr; + } + int2store(ptr,res); + return error; +} + + +double Field_short::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + short j; + j=sint2korr(ptr); + return unsigned_flag ? (double) (unsigned short) j : (double) j; +} + +longlong Field_short::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + short j; + j=sint2korr(ptr); + return unsigned_flag ? (longlong) (unsigned short) j : (longlong) j; +} + + +String *Field_short::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + short j= sint2korr(ptr); + long nr= unsigned_flag ? (long) (unsigned short) j : (long) j; + return val_str_from_long(val_buffer, 7, -10, nr); +} + + +bool Field_short::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_SHORT); + return protocol->store_short(Field_short::val_int()); +} + + +int Field_short::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + short a,b; + a=sint2korr(a_ptr); + b=sint2korr(b_ptr); + + if (unsigned_flag) + return ((unsigned short) a < (unsigned short) b) ? -1 : + ((unsigned short) a > (unsigned short) b) ? 1 : 0; + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_short::sort_string(uchar *to,uint length __attribute__((unused))) +{ + if (unsigned_flag) + to[0] = ptr[1]; + else + to[0] = (char) (ptr[1] ^ 128); /* Revers signbit */ + to[1] = ptr[0]; +} + +/**************************************************************************** + Field type medium int (3 byte) +****************************************************************************/ + +int Field_medium::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int store_tmp; + int error; + longlong rnd; + + error= get_int(cs, from, len, &rnd, UINT_MAX24, INT_MIN24, INT_MAX24); + store_tmp= unsigned_flag ? (int) (ulonglong) rnd : (int) rnd; + int3store(ptr, store_tmp); + return error; +} + + +int Field_medium::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + nr=rint(nr); + if (unsigned_flag) + { + if (nr < 0) + { + int3store(ptr,0); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr >= (double) (long) (1L << 24)) + { + uint32 tmp=(uint32) (1L << 24)-1L; + int3store(ptr,tmp); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + int3store(ptr,(uint32) nr); + } + else + { + if (nr < (double) INT_MIN24) + { + long tmp=(long) INT_MIN24; + int3store(ptr,tmp); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > (double) INT_MAX24) + { + long tmp=(long) INT_MAX24; + int3store(ptr,tmp); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + int3store(ptr,(long) nr); + } + return error; +} + + +int Field_medium::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + + if (unsigned_flag) + { + if (nr < 0 && !unsigned_val) + { + int3store(ptr,0); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if ((ulonglong) nr >= (ulonglong) (long) (1L << 24)) + { + long tmp= (long) (1L << 24)-1L; + int3store(ptr,tmp); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + int3store(ptr,(uint32) nr); + } + else + { + if (nr < 0 && unsigned_val) + nr= (ulonglong) (long) (1L << 24); // Generate overflow + + if (nr < (longlong) INT_MIN24) + { + long tmp= (long) INT_MIN24; + int3store(ptr,tmp); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (nr > (longlong) INT_MAX24) + { + long tmp=(long) INT_MAX24; + int3store(ptr,tmp); + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + int3store(ptr,(long) nr); + } + return error; +} + + +double Field_medium::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + long j= unsigned_flag ? (long) uint3korr(ptr) : sint3korr(ptr); + return (double) j; +} + + +longlong Field_medium::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + long j= unsigned_flag ? (long) uint3korr(ptr) : sint3korr(ptr); + return (longlong) j; +} + + +String *Field_medium::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + long nr= unsigned_flag ? (long) uint3korr(ptr) : sint3korr(ptr); + return val_str_from_long(val_buffer, 10, -10, nr); +} + + +String *Field_int::val_str_from_long(String *val_buffer, + uint max_char_length, + int radix, long nr) +{ + CHARSET_INFO *cs= &my_charset_numeric; + uint length; + uint mlength= MY_MAX(field_length + 1, max_char_length * cs->mbmaxlen); + val_buffer->alloc(mlength); + char *to=(char*) val_buffer->ptr(); + length= (uint) cs->long10_to_str(to, mlength, radix, nr); + val_buffer->length(length); + if (zerofill) + prepend_zeros(val_buffer); /* purecov: inspected */ + val_buffer->set_charset(cs); + return val_buffer; +} + + +bool Field_medium::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_LONG); + return protocol->store_long(Field_medium::val_int()); +} + + +int Field_medium::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + long a,b; + if (unsigned_flag) + { + a=uint3korr(a_ptr); + b=uint3korr(b_ptr); + } + else + { + a=sint3korr(a_ptr); + b=sint3korr(b_ptr); + } + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_medium::sort_string(uchar *to,uint length __attribute__((unused))) +{ + if (unsigned_flag) + to[0] = ptr[2]; + else + to[0] = (uchar) (ptr[2] ^ 128); /* Revers signbit */ + to[1] = ptr[1]; + to[2] = ptr[0]; +} + + +/**************************************************************************** +** long int +****************************************************************************/ + +int Field_long::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + long store_tmp; + int error; + longlong rnd; + + error= get_int(cs, from, len, &rnd, UINT_MAX32, INT_MIN32, INT_MAX32); + store_tmp= unsigned_flag ? (long) (ulonglong) rnd : (long) rnd; + int4store(ptr, store_tmp); + return error; +} + + +int Field_long::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + int32 res; + nr=rint(nr); + if (unsigned_flag) + { + if (nr < 0) + { + res=0; + error= 1; + } + else if (nr > (double) UINT_MAX32) + { + res= UINT_MAX32; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else + res=(int32) (ulong) nr; + } + else + { + if (nr < (double) INT_MIN32) + { + res=(int32) INT_MIN32; + error= 1; + } + else if (nr > (double) INT_MAX32) + { + res=(int32) INT_MAX32; + error= 1; + } + else + res=(int32) (longlong) nr; + } + if (unlikely(error)) + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + + int4store(ptr,res); + return error; +} + + +int Field_long::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + int32 res; + + if (unsigned_flag) + { + if (nr < 0 && !unsigned_val) + { + res=0; + error= 1; + } + else if ((ulonglong) nr >= (1LL << 32)) + { + res=(int32) (uint32) ~0L; + error= 1; + } + else + res=(int32) (uint32) nr; + } + else + { + if (nr < 0 && unsigned_val) + nr= ((longlong) INT_MAX32) + 1; // Generate overflow + if (nr < (longlong) INT_MIN32) + { + res=(int32) INT_MIN32; + error= 1; + } + else if (nr > (longlong) INT_MAX32) + { + res=(int32) INT_MAX32; + error= 1; + } + else + res=(int32) nr; + } + if (unlikely(error)) + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + + int4store(ptr,res); + return error; +} + + +double Field_long::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + int32 j; + j=sint4korr(ptr); + return unsigned_flag ? (double) (uint32) j : (double) j; +} + +longlong Field_long::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + int32 j; + /* See the comment in Field_long::store(long long) */ + DBUG_ASSERT(!table || table->in_use == current_thd); + j=sint4korr(ptr); + return unsigned_flag ? (longlong) (uint32) j : (longlong) j; +} + + +String *Field_long::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + long nr= unsigned_flag ? (long) uint4korr(ptr) : sint4korr(ptr); + return val_str_from_long(val_buffer, 12, unsigned_flag ? 10 : -10, nr); +} + + +bool Field_long::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_LONG); + return protocol->store_long(Field_long::val_int()); +} + + +int Field_long::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + int32 a,b; + a=sint4korr(a_ptr); + b=sint4korr(b_ptr); + if (unsigned_flag) + return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0; + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_long::sort_string(uchar *to,uint length __attribute__((unused))) +{ + if (unsigned_flag) + to[0] = ptr[3]; + else + to[0] = (char) (ptr[3] ^ 128); /* Revers signbit */ + to[1] = ptr[2]; + to[2] = ptr[1]; + to[3] = ptr[0]; +} + + +/**************************************************************************** + Field type longlong int (8 bytes) +****************************************************************************/ + +int Field_longlong::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + char *end; + ulonglong tmp; + + tmp= cs->strntoull10rnd(from, len, unsigned_flag, &end, &error); + if (unlikely(error == MY_ERRNO_ERANGE)) + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + else if (get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION && + check_int(cs, from, len, end, error)) + error= 1; + else + error= 0; + int8store(ptr,tmp); + return error; +} + + +int Field_longlong::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + Converter_double_to_longlong conv(nr, unsigned_flag); + + if (unlikely(conv.error())) + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + + int8store(ptr, conv.result()); + return conv.error(); +} + + +int Field_longlong::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + + if (unlikely(nr < 0)) // Only possible error + { + /* + if field is unsigned and value is signed (< 0) or + if field is signed and value is unsigned we have an overflow + */ + if (unsigned_flag != unsigned_val) + { + nr= unsigned_flag ? (ulonglong) 0 : (ulonglong) LONGLONG_MAX; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + error= 1; + } + } + + int8store(ptr,nr); + return error; +} + + +double Field_longlong::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + longlong j; + j=sint8korr(ptr); + /* The following is open coded to avoid a bug in gcc 3.3 */ + if (unsigned_flag) + { + ulonglong tmp= (ulonglong) j; + return ulonglong2double(tmp); + } + return (double) j; +} + + +longlong Field_longlong::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + longlong j; + j=sint8korr(ptr); + return j; +} + + +String *Field_longlong::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + CHARSET_INFO *cs= &my_charset_numeric; + uint length; + uint mlength=MY_MAX(field_length+1,22*cs->mbmaxlen); + val_buffer->alloc(mlength); + char *to=(char*) val_buffer->ptr(); + longlong j; + j=sint8korr(ptr); + + length=(uint) (cs->longlong10_to_str)(to, mlength, + unsigned_flag ? 10 : -10, j); + val_buffer->length(length); + if (zerofill) + prepend_zeros(val_buffer); + val_buffer->set_charset(cs); + return val_buffer; +} + + +bool Field_longlong::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_LONGLONG); + return protocol->store_longlong(Field_longlong::val_int(), unsigned_flag); +} + + +int Field_longlong::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + longlong a,b; + a=sint8korr(a_ptr); + b=sint8korr(b_ptr); + if (unsigned_flag) + return ((ulonglong) a < (ulonglong) b) ? -1 : + ((ulonglong) a > (ulonglong) b) ? 1 : 0; + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_longlong::sort_string(uchar *to,uint length __attribute__((unused))) +{ + if (unsigned_flag) + to[0] = ptr[7]; + else + to[0] = (char) (ptr[7] ^ 128); /* Revers signbit */ + to[1] = ptr[6]; + to[2] = ptr[5]; + to[3] = ptr[4]; + to[4] = ptr[3]; + to[5] = ptr[2]; + to[6] = ptr[1]; + to[7] = ptr[0]; +} + + +void Field_longlong::set_max() +{ + DBUG_ASSERT(marked_for_write_or_computed()); + set_notnull(); + int8store(ptr, unsigned_flag ? ULONGLONG_MAX : LONGLONG_MAX); +} + +bool Field_longlong::is_max() +{ + DBUG_ASSERT(marked_for_read()); + if (unsigned_flag) + { + ulonglong j; + j= uint8korr(ptr); + return j == ULONGLONG_MAX; + } + longlong j; + j= sint8korr(ptr); + return j == LONGLONG_MAX; +} + +/* + Floating-point numbers + */ + +/**************************************************************************** + single precision float +****************************************************************************/ + +Field_float::Field_float(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, + bool zero_arg, bool unsigned_arg) + :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, + (dec_arg >= FLOATING_POINT_DECIMALS ? NOT_FIXED_DEC : dec_arg), + zero_arg, unsigned_arg) +{ +} + +Field_float::Field_float(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg) + :Field_real((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, (uint) 0, + NONE, field_name_arg, + (dec_arg >= FLOATING_POINT_DECIMALS ? NOT_FIXED_DEC : dec_arg), + 0, 0) +{ +} + + +int Field_float::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + int error; + Field_float::store(get_double(from, len, cs, &error)); + return error; +} + + +int Field_float::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= truncate_double(&nr, field_length, + not_fixed ? NOT_FIXED_DEC : dec, + unsigned_flag, FLT_MAX); + if (unlikely(error)) + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + if (error < 0) // Wrong double value + { + error= 1; + set_null(); + } + } + float j= (float)nr; + + float4store(ptr,j); + return error; +} + + +int Field_float::store(longlong nr, bool unsigned_val) +{ + return Field_float::store(unsigned_val ? ulonglong2double((ulonglong) nr) : + (double) nr); +} + + +double Field_float::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + float j; + float4get(j,ptr); + return ((double) j); +} + +longlong Field_float::val_int(void) +{ + float j; + float4get(j,ptr); + return Converter_double_to_longlong(j, false).result(); +} + + +String *Field_float::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!zerofill || field_length <= MAX_FIELD_CHARLENGTH); + + if (Float(ptr).to_string(val_buffer, dec)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return val_buffer; + } + + if (zerofill) + prepend_zeros(val_buffer); + return val_buffer; +} + + +int Field_float::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + float a,b; + float4get(a,a_ptr); + float4get(b,b_ptr); + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +#define FLT_EXP_DIG (sizeof(float)*8-FLT_MANT_DIG) + +void Field_float::sort_string(uchar *to,uint length __attribute__((unused))) +{ + float nr; + float4get(nr,ptr); + + uchar *tmp= to; + if (nr == (float) 0.0) + { /* Change to zero string */ + tmp[0]=(uchar) 128; + bzero((char*) tmp+1,sizeof(nr)-1); + } + else + { +#ifdef WORDS_BIGENDIAN + memcpy(tmp, &nr, sizeof(nr)); +#else + tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0]; +#endif + if (tmp[0] & 128) /* Negative */ + { /* make complement */ + uint i; + for (i=0 ; i < sizeof(nr); i++) + tmp[i]= (uchar) (tmp[i] ^ (uchar) 255); + } + else + { + ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] | + (ushort) 32768); + exp_part+= (ushort) 1 << (16-1-FLT_EXP_DIG); + tmp[0]= (uchar) (exp_part >> 8); + tmp[1]= (uchar) exp_part; + } + } +} + + +bool Field_float::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_FLOAT); + return protocol->store_float((float) Field_float::val_real(), dec); +} + + +/** + Save the field metadata for float fields. + + Saves the pack length in the first byte. + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ +Binlog_type_info Field_float::binlog_type_info() const +{ + DBUG_ASSERT(Field_float::type() == binlog_type()); + return Binlog_type_info(Field_float::type(), pack_length(), 1, + binlog_signedness()); +} + + +/**************************************************************************** + double precision floating point numbers +****************************************************************************/ + +Field_double::Field_double(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, + bool zero_arg, bool unsigned_arg) + :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, + (dec_arg >= FLOATING_POINT_DECIMALS ? NOT_FIXED_DEC : dec_arg), + zero_arg, unsigned_arg) +{ +} + +Field_double::Field_double(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg) + :Field_real((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "" : 0, (uint) 0, + NONE, field_name_arg, + (dec_arg >= FLOATING_POINT_DECIMALS ? NOT_FIXED_DEC : dec_arg), + 0, 0) +{ +} + +Field_double::Field_double(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool not_fixed_arg) + :Field_real((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "" : 0, (uint) 0, + NONE, field_name_arg, + (dec_arg >= FLOATING_POINT_DECIMALS ? NOT_FIXED_DEC : dec_arg), + 0, 0) +{ + not_fixed= not_fixed_arg; +} + +int Field_double::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + int error; + Field_double::store(get_double(from, len, cs, &error)); + return error; +} + + +int Field_double::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= truncate_double(&nr, field_length, + not_fixed ? NOT_FIXED_DEC : dec, + unsigned_flag, DBL_MAX); + if (unlikely(error)) + { + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + if (error < 0) // Wrong double value + { + error= 1; + set_null(); + } + } + + float8store(ptr,nr); + return error; +} + + +int Field_double::store(longlong nr, bool unsigned_val) +{ + return Field_double::store(unsigned_val ? ulonglong2double((ulonglong) nr) : + (double) nr); +} + +/* + If a field has fixed length, truncate the double argument pointed to by 'nr' + appropriately. + Also ensure that the argument is within [-max_value; max_value] range. + + return + 0 ok + -1 Illegal double value + 1 Value was truncated +*/ + +int truncate_double(double *nr, uint field_length, decimal_digits_t dec, + bool unsigned_flag, double max_value) +{ + int error= 0; + double res= *nr; + + if (std::isnan(res)) + { + *nr= 0; + return -1; + } + else if (unsigned_flag && res < 0) + { + *nr= 0; + return 1; + } + + if (dec < FLOATING_POINT_DECIMALS) + { + uint order= field_length - dec; + uint step= array_elements(log_10) - 1; + double max_value_by_dec= 1.0; + for (; order > step; order-= step) + max_value_by_dec*= log_10[step]; + max_value_by_dec*= log_10[order]; + max_value_by_dec-= 1.0 / log_10[dec]; + set_if_smaller(max_value, max_value_by_dec); + + /* Check for infinity so we don't get NaN in calculations */ + if (!std::isinf(res)) + { + double tmp= rint((res - floor(res)) * log_10[dec]) / log_10[dec]; + res= floor(res) + tmp; + } + } + + if (res < -max_value) + { + res= -max_value; + error= 1; + } + else if (res > max_value) + { + res= max_value; + error= 1; + } + + *nr= res; + return error; +} + +/* + Convert double to longlong / ulonglong. + If double is outside of the supported range, + adjust m_result and set m_error. + + @param nr Number to convert + @param unsigned_flag true if result is unsigned +*/ + +Value_source:: +Converter_double_to_longlong::Converter_double_to_longlong(double nr, + bool unsigned_flag) + :m_error(false) +{ + nr= rint(nr); + if (unsigned_flag) + { + if (nr < 0) + { + m_result= 0; + m_error= true; + } + else if (nr >= (double) ULONGLONG_MAX) + { + m_result= ~(longlong) 0; + m_error= true; + } + else + m_result= (longlong) double2ulonglong(nr); + } + else + { + if (nr <= (double) LONGLONG_MIN) + { + m_result= LONGLONG_MIN; + m_error= (nr < (double) LONGLONG_MIN); + } + else if (nr >= (double) (ulonglong) LONGLONG_MAX) + { + m_result= LONGLONG_MAX; + m_error= (nr > (double) LONGLONG_MAX); + } + else + m_result= (longlong) nr; + } +} + + +void Value_source:: +Converter_double_to_longlong::push_warning(THD *thd, + double nr, + bool unsigned_flag) +{ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DATA_OVERFLOW, ER_THD(thd, ER_DATA_OVERFLOW), + ErrConvDouble(nr).ptr(), + unsigned_flag ? "UNSIGNED INT" : "INT"); +} + + +int Field_real::store_time_dec(const MYSQL_TIME *ltime, uint) +{ + return store(TIME_to_double(ltime)); +} + + +double Field_double::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + double j; + float8get(j,ptr); + return j; +} + + +longlong Field_double::val_int_from_real(bool want_unsigned_result) +{ + Converter_double_to_longlong conv(val_real(), want_unsigned_result); + if (unlikely(!want_unsigned_result && conv.error())) + conv.push_warning(get_thd(), Field_double::val_real(), false); + return conv.result(); +} + + +my_decimal *Field_real::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + double2my_decimal(E_DEC_FATAL_ERROR, val_real(), decimal_value); + return decimal_value; +} + + +bool Field_real::get_date(MYSQL_TIME *ltime,date_mode_t fuzzydate) +{ + DBUG_ASSERT(marked_for_read()); + double nr= val_real(); + return double_to_datetime_with_warn(get_thd(), nr, ltime, fuzzydate, + table->s, field_name.str); +} + + +Item *Field_real::get_equal_const_item(THD *thd, const Context &ctx, + Item *const_item) +{ + if (flags & ZEROFILL_FLAG) + return Field_num::get_equal_zerofill_const_item(thd, ctx, const_item); + switch (ctx.subst_constraint()) { + case IDENTITY_SUBST: + if (const_item->decimal_scale() != Field_real::decimals()) + { + double val= const_item->val_real(); + return new (thd->mem_root) Item_float(thd, val, Field_real::decimals()); + } + break; + case ANY_SUBST: + break; + } + return const_item; +} + + +void Field_real::sql_type(String &res) const +{ + const Name name= type_handler()->name(); + if (dec >= FLOATING_POINT_DECIMALS) + { + res.set_ascii(name.ptr(), name.length()); + } + else + { + CHARSET_INFO *cs= res.charset(); + res.length(cs->cset->snprintf(cs,(char*) res.ptr(),res.alloced_length(), + "%.*s(%d,%d)", (int) name.length(), name.ptr(), + (int) field_length,dec)); + } + add_zerofill_and_unsigned(res); +} + + +String *Field_double::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!zerofill || field_length <= MAX_FIELD_CHARLENGTH); + double nr; + float8get(nr,ptr); + + uint to_length= DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE; + if (val_buffer->alloc(to_length)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return val_buffer; + } + + char *to=(char*) val_buffer->ptr(); + size_t len; + + if (dec >= FLOATING_POINT_DECIMALS) + len= my_gcvt(nr, MY_GCVT_ARG_DOUBLE, to_length - 1, to, NULL); + else + len= my_fcvt(nr, dec, to, NULL); + + val_buffer->length((uint) len); + if (zerofill) + prepend_zeros(val_buffer); + val_buffer->set_charset(&my_charset_numeric); + return val_buffer; +} + +bool Field_double::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if (unlikely(zerofill) && (txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_DOUBLE); + return protocol->store_double(Field_double::val_real(), dec); +} + + +int Field_double::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + double a,b; + float8get(a,a_ptr); + float8get(b,b_ptr); + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + + +#define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG) + +/* The following should work for IEEE */ + +void Field_double::sort_string(uchar *to,uint length __attribute__((unused))) +{ + double nr; + float8get(nr,ptr); + change_double_for_sort(nr, to); +} + + +/** + Save the field metadata for double fields. + + Saves the pack length in the first byte of the field metadata array + at index of *metadata_ptr. + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ +Binlog_type_info Field_double::binlog_type_info() const +{ + DBUG_ASSERT(Field_double::type() == binlog_type()); + return Binlog_type_info(Field_double::type(), pack_length(), 1, + binlog_signedness()); +} + + +/** + TIMESTAMP type holds datetime values in range from 1970-01-01 00:00:01 UTC to + 2038-01-01 00:00:00 UTC stored as number of seconds since Unix + Epoch in UTC. + + Actually SQL-99 says that we should allow niladic functions (like NOW()) + as defaults for any field. The current limitation (only NOW() and only + for TIMESTAMP and DATETIME fields) are because of restricted binary .frm + format and should go away in the future. + + Also because of this limitation of binary .frm format we use 5 different + unireg_check values with TIMESTAMP field to distinguish various cases of + DEFAULT or ON UPDATE values. These values are: + + TIMESTAMP_OLD_FIELD - old timestamp, if there was not any fields with + auto-set-on-update (or now() as default) in this table before, then this + field has NOW() as default and is updated when row changes, else it is + field which has 0 as default value and is not automatically updated. + TIMESTAMP_DN_FIELD - field with NOW() as default but not set on update + automatically (TIMESTAMP DEFAULT NOW()), not used in Field since 10.2.2 + TIMESTAMP_UN_FIELD - field which is set on update automatically but has not + NOW() as default (but it may has 0 or some other const timestamp as + default) (TIMESTAMP ON UPDATE NOW()). + TIMESTAMP_DNUN_FIELD - field which has now() as default and is auto-set on + update. (TIMESTAMP DEFAULT NOW() ON UPDATE NOW()), not used in Field since 10.2.2 + NONE - field which is not auto-set on update with some other than NOW() + default value (TIMESTAMP DEFAULT 0). + + Note that TIMESTAMP_OLD_FIELDs are never created explicitly now, they are + left only for preserving ability to read old tables. Such fields replaced + with their newer analogs in CREATE TABLE and in SHOW CREATE TABLE. This is + because we want to prefer NONE unireg_check before TIMESTAMP_OLD_FIELD for + "TIMESTAMP DEFAULT 'Const'" field. (Old timestamps allowed such + specification too but ignored default value for first timestamp, which of + course is non-standard.) In most cases user won't notice any change, only + exception is different behavior of old/new timestamps during ALTER TABLE. + */ + +Field_timestamp::Field_timestamp(uchar *ptr_arg, uint32 len_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share) + :Field_temporal(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) +{ + /* For 4.0 MYD and 4.0 InnoDB compatibility */ + flags|= UNSIGNED_FLAG; + if (unireg_check != NONE) + { + /* + We mark the flag with TIMESTAMP_FLAG to indicate to the client that + this field will be automatically updated on insert. + */ + flags|= TIMESTAMP_FLAG; + if (unireg_check != TIMESTAMP_DN_FIELD) + flags|= ON_UPDATE_NOW_FLAG; + } +} + + +sql_mode_t +Field_timestamp::conversion_depends_on_sql_mode(THD *thd, Item *expr) const +{ + return expr->datetime_precision(thd) > decimals() ? + MODE_TIME_ROUND_FRACTIONAL : 0; +} + + +int Field_timestamp::save_in_field(Field *to) +{ + ulong sec_part; + my_time_t ts= get_timestamp(&sec_part); + return to->store_timestamp_dec(Timeval(ts, sec_part), decimals()); +} + +my_time_t Field_timestamp0::get_timestamp(const uchar *pos, + ulong *sec_part) const +{ + DBUG_ASSERT(marked_for_read()); + *sec_part= 0; + return sint4korr(pos); +} + + +bool Field_timestamp0::val_native(Native *to) +{ + DBUG_ASSERT(marked_for_read()); + my_time_t sec= (my_time_t) sint4korr(ptr); + return Timestamp_or_zero_datetime(Timestamp(sec, 0), sec == 0). + to_native(to, 0); +} + + +int Field_timestamp::store_TIME_with_warning(THD *thd, const Datetime *dt, + const ErrConv *str, int was_cut) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + static const Timestamp zero(0, 0); + + // Handle totally bad values + if (!dt->is_valid_datetime()) + { + set_datetime_warning(WARN_DATA_TRUNCATED, str, "datetime", 1); + store_TIMESTAMP(zero); + return 1; + } + + // Handle values that do not need DATETIME to TIMESTAMP conversion + if (!dt->get_mysql_time()->month) + { + /* + Zero date is allowed by the current sql_mode. Store zero timestamp. + Return success or a warning about non-fatal truncation, e.g.: + INSERT INTO t1 (ts) VALUES ('0000-00-00 00:00:00 some tail'); + */ + store_TIMESTAMP(zero); + return store_TIME_return_code_with_warnings(was_cut, str, "datetime"); + } + + // Convert DATETIME to TIMESTAMP + uint conversion_error; + const MYSQL_TIME *l_time= dt->get_mysql_time(); + my_time_t timestamp= TIME_to_timestamp(thd, l_time, &conversion_error); + if (timestamp == 0 && l_time->second_part == 0) + { + set_datetime_warning(ER_WARN_DATA_OUT_OF_RANGE, str, "datetime", 1); + store_TIMESTAMP(zero); + return 1; // date was fine but pointed to a DST gap + } + + // Store the value + DBUG_ASSERT(!dt->fraction_remainder(decimals())); + store_TIMESTAMP(Timestamp(timestamp, l_time->second_part)); + + // Calculate return value and send warnings if needed + if (unlikely(conversion_error)) // e.g. DATETIME in the DST gap + { + set_datetime_warning(conversion_error, str, "datetime", 1); + return 1; + } + return store_TIME_return_code_with_warnings(was_cut, str, "datetime"); +} + + +date_conv_mode_t Timestamp::sql_mode_for_timestamp(THD *thd) +{ + // We don't want to store invalid or fuzzy datetime values in TIMESTAMP + return date_conv_mode_t((thd->variables.sql_mode & MODE_NO_ZERO_DATE) | + MODE_NO_ZERO_IN_DATE); +} + + +int Field_timestamp::store_time_dec(const MYSQL_TIME *ltime, uint dec) +{ + int warn; + ErrConvTime str(ltime); + THD *thd= get_thd(); + Datetime dt(thd, &warn, ltime, Timestamp::DatetimeOptions(thd), decimals()); + return store_TIME_with_warning(thd, &dt, &str, warn); +} + + +int Field_timestamp::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + ErrConvString str(from, len, cs); + THD *thd= get_thd(); + MYSQL_TIME_STATUS st; + Datetime dt(thd, &st, from, len, cs, Timestamp::DatetimeOptions(thd), decimals()); + return store_TIME_with_warning(thd, &dt, &str, st.warnings); +} + + +int Field_timestamp::store(double nr) +{ + int error; + ErrConvDouble str(nr); + THD *thd= get_thd(); + Datetime dt(thd, &error, nr, Timestamp::DatetimeOptions(thd), decimals()); + return store_TIME_with_warning(thd, &dt, &str, error); +} + + +int Field_timestamp::store(longlong nr, bool unsigned_val) +{ + int error; + Longlong_hybrid tmp(nr, unsigned_val); + ErrConvInteger str(tmp); + THD *thd= get_thd(); + Datetime dt(&error, tmp, Timestamp::DatetimeOptions(thd)); + return store_TIME_with_warning(thd, &dt, &str, error); +} + + +int Field_timestamp::store_timestamp_dec(const timeval &ts, uint dec) +{ + int warn= 0; + time_round_mode_t mode= Datetime::default_round_mode(get_thd()); + store_TIMESTAMP(Timestamp(ts).round(decimals(), mode, &warn)); + if (warn) + { + /* + We're here if rounding would overflow outside of the supported TIMESTAMP + range, so truncation happened instead: + CREATE TABLE t1 (a TIMESTAMP(6)); + INSERT INTO t1 VALUES ('maximum-possible-timestamp.999999'); + ALTER TABLE t1 MODIFY a TIMESTAMP(5); + SELECT * FROM t1; --> 'maximum-possible-timestamp.99999' (5 digits) + Raise a warning, like DATETIME does for '9999-12-31 23:59:59.999999'. + */ + set_warning(Sql_condition::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); + } + if (ts.tv_sec == 0 && ts.tv_usec == 0 && + get_thd()->variables.sql_mode & (ulonglong) TIME_NO_ZERO_DATE) + return zero_time_stored_return_code_with_warning(); + return 0; +} + + +int Field_timestamp::zero_time_stored_return_code_with_warning() +{ + if (get_thd()->variables.sql_mode & (ulonglong) TIME_NO_ZERO_DATE) + { + ErrConvString s( + STRING_WITH_LEN("0000-00-00 00:00:00.000000") - (decimals() ? 6 - decimals() : 7), + system_charset_info); + set_datetime_warning(WARN_DATA_TRUNCATED, &s, "datetime", 1); + return 1; + } + return 0; + +} + + +int Field_timestamp::store_native(const Native &value) +{ + if (!value.length()) // Zero datetime + { + reset(); + return zero_time_stored_return_code_with_warning(); + } + /* + The exact second precision is not important here. + Field_timestamp*::store_timestamp_dec() do not use the "dec" parameter. + Passing TIME_SECOND_PART_DIGITS is OK. + */ + return store_timestamp_dec(Timestamp(value).tv(), TIME_SECOND_PART_DIGITS); +} + + +longlong Field_timestamp::val_int(void) +{ + MYSQL_TIME ltime; + if (get_date(<ime, Datetime::Options(TIME_NO_ZERO_DATE, get_thd()))) + return 0; + + return ltime.year * 10000000000LL + ltime.month * 100000000LL + + ltime.day * 1000000L + ltime.hour * 10000L + + ltime.minute * 100 + ltime.second; +} + + +String *Field_timestamp::val_str(String *val_buffer, String *val_ptr) +{ + MYSQL_TIME ltime; + uint32 temp, temp2; + uint dec; + char *to; + + val_buffer->alloc(field_length+1); + to= (char*) val_buffer->ptr(); + val_buffer->length(field_length); + + if (get_date(<ime, Datetime::Options(TIME_NO_ZERO_DATE, get_thd()))) + { /* Zero time is "000000" */ + val_ptr->set(zero_timestamp, field_length, &my_charset_numeric); + return val_ptr; + } + val_buffer->set_charset(&my_charset_numeric); // Safety + + temp= ltime.year % 100; + if (temp < YY_PART_YEAR - 1) + { + *to++= '2'; + *to++= '0'; + } + else + { + *to++= '1'; + *to++= '9'; + } + temp2=temp/10; temp=temp-temp2*10; + *to++= (char) ('0'+(char) (temp2)); + *to++= (char) ('0'+(char) (temp)); + *to++= '-'; + temp=ltime.month; + temp2=temp/10; temp=temp-temp2*10; + *to++= (char) ('0'+(char) (temp2)); + *to++= (char) ('0'+(char) (temp)); + *to++= '-'; + temp=ltime.day; + temp2=temp/10; temp=temp-temp2*10; + *to++= (char) ('0'+(char) (temp2)); + *to++= (char) ('0'+(char) (temp)); + *to++= ' '; + temp=ltime.hour; + temp2=temp/10; temp=temp-temp2*10; + *to++= (char) ('0'+(char) (temp2)); + *to++= (char) ('0'+(char) (temp)); + *to++= ':'; + temp=ltime.minute; + temp2=temp/10; temp=temp-temp2*10; + *to++= (char) ('0'+(char) (temp2)); + *to++= (char) ('0'+(char) (temp)); + *to++= ':'; + temp=ltime.second; + temp2=temp/10; temp=temp-temp2*10; + *to++= (char) ('0'+(char) (temp2)); + *to++= (char) ('0'+(char) (temp)); + *to= 0; + val_buffer->set_charset(&my_charset_numeric); + + if ((dec= decimals())) + { + ulong sec_part= (ulong) sec_part_shift(ltime.second_part, dec); + char *buf= const_cast(val_buffer->ptr() + MAX_DATETIME_WIDTH); + for (int i= dec; i > 0; i--, sec_part/= 10) + buf[i]= (char)(sec_part % 10) + '0'; + buf[0]= '.'; + buf[dec + 1]= 0; + } + return val_buffer; +} + + +bool +Field_timestamp::validate_value_in_record(THD *thd, const uchar *record) const +{ + DBUG_ASSERT(!is_null_in_record(record)); + ulong sec_part; + return !get_timestamp(ptr_in_record(record), &sec_part) && !sec_part && + bool(sql_mode_for_dates(thd) & TIME_NO_ZERO_DATE) != false; +} + + +bool Field_timestamp::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + ulong sec_part; + my_time_t ts= get_timestamp(&sec_part); + return get_thd()->timestamp_to_TIME(ltime, ts, sec_part, fuzzydate); +} + + +bool Field_timestamp0::send(Protocol *protocol) +{ + MYSQL_TIME ltime; + Field_timestamp0::get_date(<ime, date_mode_t(0)); + return protocol->store_datetime(<ime, 0); +} + + +int Field_timestamp0::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + int32 a,b; + a=sint4korr(a_ptr); + b=sint4korr(b_ptr); + return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0; +} + + +void Field_timestamp0::sort_string(uchar *to,uint length __attribute__((unused))) +{ + to[0] = ptr[3]; + to[1] = ptr[2]; + to[2] = ptr[1]; + to[3] = ptr[0]; +} + + +int Field_timestamp0::set_time() +{ + set_notnull(); + store_TIMESTAMP(Timestamp(get_thd()->query_start(), 0)); + return 0; +} + + +bool Field_timestamp::load_data_set_no_data(THD *thd, bool fixed_format) +{ + if (!maybe_null()) + { + /* + Timestamp fields that are NOT NULL are autoupdated if there is no + corresponding value in the data file. + */ + set_time(); + set_has_explicit_value(); + return false; + } + return Field::load_data_set_no_data(thd, fixed_format); +} + + +bool Field_timestamp::load_data_set_null(THD *thd) +{ + if (!maybe_null()) + { + /* + Timestamp fields that are NOT NULL are autoupdated if there is no + corresponding value in the data file. + */ + set_time(); + } + else + { + reset(); + set_null(); + } + set_has_explicit_value(); // Do not auto-update this field + return false; +} + + +#ifdef NOT_USED +static void store_native(ulonglong num, uchar *to, uint bytes) +{ + switch(bytes) { + case 1: *to= (uchar)num; break; + case 2: shortstore(to, (ushort)num); break; + case 3: int3store(to, num); /* Sic!*/ break; + case 4: longstore(to, (ulong)num); break; + case 8: longlongstore(to, num); break; + default: DBUG_ASSERT(0); + } +} + +static longlong read_native(const uchar *from, uint bytes) +{ + switch(bytes) { + case 1: return from[0]; + case 2: { uint16 tmp; shortget(tmp, from); return tmp; } + case 3: return uint3korr(from); + case 4: { uint32 tmp; longget(tmp, from); return tmp; } + case 8: { longlong tmp; longlongget(tmp, from); return tmp; } + default: DBUG_ASSERT(0); return 0; + } +} +#endif + + +void Field_timestamp_hires::store_TIMEVAL(const timeval &tv) +{ + mi_int4store(ptr, tv.tv_sec); + store_bigendian(sec_part_shift(tv.tv_usec, dec), ptr+4, sec_part_bytes(dec)); +} + +my_time_t Field_timestamp_hires::get_timestamp(const uchar *pos, + ulong *sec_part) const +{ + DBUG_ASSERT(marked_for_read()); + *sec_part= (long)sec_part_unshift(read_bigendian(pos+4, sec_part_bytes(dec)), dec); + return mi_uint4korr(pos); +} + + +bool Field_timestamp_hires::val_native(Native *to) +{ + DBUG_ASSERT(marked_for_read()); + struct timeval tm; + tm.tv_sec= mi_uint4korr(ptr); + tm.tv_usec= (ulong) sec_part_unshift(read_bigendian(ptr+4, sec_part_bytes(dec)), dec); + return Timestamp_or_zero_datetime(Timestamp(tm), tm.tv_sec == 0). + to_native(to, dec); +} + + +double Field_timestamp_with_dec::val_real(void) +{ + MYSQL_TIME ltime; + if (get_date(<ime, Datetime::Options(TIME_NO_ZERO_DATE, get_thd()))) + return 0; + + return ltime.year * 1e10 + ltime.month * 1e8 + + ltime.day * 1e6 + ltime.hour * 1e4 + + ltime.minute * 1e2 + ltime.second + ltime.second_part*1e-6; +} + +my_decimal *Field_timestamp_with_dec::val_decimal(my_decimal *d) +{ + MYSQL_TIME ltime; + get_date(<ime, date_mode_t(0)); + return TIME_to_my_decimal(<ime, d); +} + +int Field_timestamp::store_decimal(const my_decimal *d) +{ + int error; + THD *thd= get_thd(); + ErrConvDecimal str(d); + Datetime dt(thd, &error, d, Timestamp::DatetimeOptions(thd), decimals()); + return store_TIME_with_warning(thd, &dt, &str, error); +} + +int Field_timestamp_with_dec::set_time() +{ + THD *thd= get_thd(); + set_notnull(); + // Avoid writing microseconds into binlog for FSP=0 + ulong msec= decimals() ? thd->query_start_sec_part() : 0; + store_TIMESTAMP(Timestamp(thd->query_start(), msec).trunc(decimals())); + return 0; +} + +bool Field_timestamp_with_dec::send(Protocol *protocol) +{ + MYSQL_TIME ltime; + Field_timestamp::get_date(<ime, date_mode_t(0)); + return protocol->store_datetime(<ime, dec); +} + + +int Field_timestamp_hires::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + int32 a,b; + ulong a_sec_part, b_sec_part; + a= mi_uint4korr(a_ptr); + a_sec_part= (ulong)read_bigendian(a_ptr+4, sec_part_bytes(dec)); + b= mi_uint4korr(b_ptr); + b_sec_part= (ulong)read_bigendian(b_ptr+4, sec_part_bytes(dec)); + return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : + a_sec_part < b_sec_part ? -1 : a_sec_part > b_sec_part ? 1 : 0; +} + + +void Field_timestamp_with_dec::make_send_field(Send_field *field) +{ + Field::make_send_field(field); + field->decimals= dec; +} + + +/************************************************************* +** MySQL-5.6 compatible TIMESTAMP(N) +**************************************************************/ + +void Field_timestampf::store_TIMEVAL(const timeval &tm) +{ + my_timestamp_to_binary(&tm, ptr, dec); +} + +void Field_timestampf::set_max() +{ + DBUG_ENTER("Field_timestampf::set_max"); + DBUG_ASSERT(marked_for_write_or_computed()); + DBUG_ASSERT(dec == TIME_SECOND_PART_DIGITS); + + set_notnull(); + mi_int4store(ptr, TIMESTAMP_MAX_VALUE); + mi_int3store(ptr + 4, TIME_MAX_SECOND_PART); + + DBUG_VOID_RETURN; +} + +bool Field_timestampf::is_max() +{ + DBUG_ENTER("Field_timestampf::is_max"); + DBUG_ASSERT(marked_for_read()); + + DBUG_RETURN(mi_sint4korr(ptr) == TIMESTAMP_MAX_VALUE && + mi_sint3korr(ptr + 4) == TIME_MAX_SECOND_PART); +} + +my_time_t Field_timestampf::get_timestamp(const uchar *pos, + ulong *sec_part) const +{ + struct timeval tm; + my_timestamp_from_binary(&tm, pos, dec); + *sec_part= tm.tv_usec; + return tm.tv_sec; +} + + +bool Field_timestampf::val_native(Native *to) +{ + DBUG_ASSERT(marked_for_read()); + // Check if it's '0000-00-00 00:00:00' rather than a real timestamp + if (ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) + { + to->length(0); + return false; + } + return Field::val_native(to); +} + +Binlog_type_info Field_timestampf::binlog_type_info() const +{ + return Binlog_type_info(Field_timestampf::binlog_type(), decimals(), 1); +} + + +/*************************************************************/ +sql_mode_t Field_temporal::can_handle_sql_mode_dependency_on_store() const +{ + return MODE_PAD_CHAR_TO_FULL_LENGTH; +} + + +bool Field_temporal::is_equal(const Column_definition &new_field) const +{ + return new_field.type_handler() == type_handler() && + new_field.length == max_display_length(); +} + + +void Field_temporal::set_warnings(Sql_condition::enum_warning_level trunc_level, + const ErrConv *str, int was_cut, + const char *typestr) +{ + /* + error code logic: + MYSQL_TIME_WARN_TRUNCATED means that the value was not a date/time at all. + it will be stored as zero date/time. + MYSQL_TIME_WARN_OUT_OF_RANGE means that the value was a date/time, + that is, it was parsed as such, but the value was invalid. + + Also, MYSQL_TIME_WARN_TRUNCATED is used when storing a DATETIME in + a DATE field and non-zero time part is thrown away. + */ + if (was_cut & MYSQL_TIME_WARN_TRUNCATED) + set_datetime_warning(trunc_level, WARN_DATA_TRUNCATED, str, typestr, 1); + if (was_cut & MYSQL_TIME_WARN_OUT_OF_RANGE) + set_datetime_warning(ER_WARN_DATA_OUT_OF_RANGE, str, typestr, 1); +} + + +void Field_temporal::sql_type_dec_comment(String &res, + const Name &name, + uint dec, + const Name &comment) const +{ + CHARSET_INFO *cs=res.charset(); + res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(), + "%.*s(%u)%s%.*s%s", + (uint) name.length(), name.ptr(), + dec, + comment.length() ? " /* " : "", + (uint) comment.length(), comment.ptr(), + comment.length() ? " */" : "")); +} + + +void Field_temporal::sql_type_comment(String &res, + const Name &name, + const Name &comment) const +{ + CHARSET_INFO *cs=res.charset(); + res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(), + "%.*s%s%.*s%s", + (uint) name.length(), name.ptr(), + comment.length() ? " /* " : "", + (uint) comment.length(), comment.ptr(), + comment.length() ? " */" : "")); +} + + +const Name & Field_temporal::type_version_mysql56() +{ + DBUG_EXECUTE_IF("sql_type", return Type_handler::version_mysql56(); ); + static Name none(NULL, 0); + return none; +} + + +/* + Store string into a date/time field + + RETURN + 0 ok + 1 Value was cut during conversion + 2 value was out of range + 3 Datetime value that was cut (warning level NOTE) + This is used by opt_range.cc:get_mm_leaf(). +*/ +int Field_datetime::store_TIME_with_warning(const Datetime *dt, + const ErrConv *str, + int was_cut) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + // Handle totally bad values + if (!dt->is_valid_datetime()) + return store_invalid_with_warning(str, was_cut, "datetime"); + // Store the value + DBUG_ASSERT(!dt->fraction_remainder(decimals())); + store_datetime(*dt); + // Caclulate return value and send warnings if needed + return store_TIME_return_code_with_warnings(was_cut, str, "datetime"); +} + + +int Field_datetime::store(const char *from, size_t len, CHARSET_INFO *cs) +{ + MYSQL_TIME_STATUS st; + ErrConvString str(from, len, cs); + THD *thd= get_thd(); + Datetime dt(thd, &st, from, len, cs, Datetime::Options(thd), decimals()); + return store_TIME_with_warning(&dt, &str, st.warnings); +} + +int Field_datetime::store(double nr) +{ + int error; + ErrConvDouble str(nr); + THD *thd= get_thd(); + Datetime dt(thd, &error, nr, Datetime::Options(thd), decimals()); + return store_TIME_with_warning(&dt, &str, error); +} + + +int Field_datetime::store(longlong nr, bool unsigned_val) +{ + int error; + Longlong_hybrid tmp(nr, unsigned_val); + ErrConvInteger str(tmp); + THD *thd= get_thd(); + Datetime dt(&error, tmp, Datetime::Options(thd)); + return store_TIME_with_warning(&dt, &str, error); +} + +int Field_datetime::store_time_dec(const MYSQL_TIME *ltime, uint dec) +{ + int error; + ErrConvTime str(ltime); + THD *thd= get_thd(); + Datetime dt(thd, &error, ltime, Datetime::Options(thd), decimals()); + return store_TIME_with_warning(&dt, &str, error); +} + + +int Field_datetime::store_decimal(const my_decimal *d) +{ + int error; + ErrConvDecimal str(d); + THD *thd= get_thd(); + Datetime tm(thd, &error, d, Datetime::Options(thd), decimals()); + return store_TIME_with_warning(&tm, &str, error); +} + + +bool +Field_temporal_with_date::validate_value_in_record(THD *thd, + const uchar *record) const +{ + DBUG_ASSERT(!is_null_in_record(record)); + MYSQL_TIME ltime; + return get_TIME(<ime, ptr_in_record(record), Datetime::Options(thd)); +} + + +my_decimal *Field_temporal::val_decimal(my_decimal *d) +{ + MYSQL_TIME ltime; + if (get_date(<ime, date_mode_t(0))) + { + bzero(<ime, sizeof(ltime)); + ltime.time_type= type_handler()->mysql_timestamp_type(); + } + return TIME_to_my_decimal(<ime, d); +} + + +Data_type_compatibility +Field_temporal::can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *value) const +{ + // Field is of TIME_RESULT, which supersedes everything else. + return Data_type_compatibility::OK; +} + + +Data_type_compatibility +Field_temporal::can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) const +{ + // Field is of TIME_RESULT, which supersedes everything else. + return Data_type_compatibility::OK; +} + + +Item *Field_temporal::get_equal_const_item_datetime(THD *thd, + const Context &ctx, + Item *const_item) +{ + switch (ctx.subst_constraint()) { + case IDENTITY_SUBST: + if ((const_item->field_type() != MYSQL_TYPE_DATETIME && + const_item->field_type() != MYSQL_TYPE_TIMESTAMP) || + const_item->decimals != decimals()) + { + Datetime::Options opt(TIME_CONV_NONE, thd); + Datetime dt(thd, const_item, opt, decimals()); + if (!dt.is_valid_datetime()) + return NULL; + /* + See comments about truncation in the same place in + Field_time::get_equal_const_item(). + */ + return new (thd->mem_root) Item_datetime_literal(thd, &dt, decimals()); + } + break; + case ANY_SUBST: + if (!is_temporal_type_with_date(const_item->field_type())) + { + Datetime dt= type_handler()->field_type() == MYSQL_TYPE_TIMESTAMP ? + Datetime(thd, const_item, Timestamp::DatetimeOptions(thd)) : + Datetime(thd, const_item, Datetime::Options_cmp(thd)); + if (!dt.is_valid_datetime()) + return NULL; + return new (thd->mem_root) + Item_datetime_literal_for_invalid_dates(thd, &dt, + dt.get_mysql_time()-> + second_part ? + TIME_SECOND_PART_DIGITS : 0); + } + break; + } + return const_item; +} + + +/**************************************************************************** +** time type +** In string context: HH:MM:SS +** In number context: HHMMSS +** Stored as a 3 byte unsigned int +****************************************************************************/ +sql_mode_t +Field_time::conversion_depends_on_sql_mode(THD *thd, Item *expr) const +{ + return expr->time_precision(thd) > decimals() ? + MODE_TIME_ROUND_FRACTIONAL : 0; +} + + +int Field_time::store_TIME_with_warning(const Time *t, + const ErrConv *str, int warn) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + // Handle totally bad values + if (!t->is_valid_time()) + return store_invalid_with_warning(str, warn, "time"); + // Store the value + DBUG_ASSERT(!t->fraction_remainder(decimals())); + store_TIME(*t); + // Calculate return value and send warnings if needed + return store_TIME_return_code_with_warnings(warn, str, "time"); +} + + +void Field_time0::store_TIME(const MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ltime->year == 0); + DBUG_ASSERT(ltime->month == 0); + long tmp= (ltime->day*24L+ltime->hour)*10000L + + (ltime->minute*100+ltime->second); + if (ltime->neg) + tmp= -tmp; + int3store(ptr,tmp); +} + +int Field_time::store(const char *from,size_t len,CHARSET_INFO *cs) +{ + ErrConvString str(from, len, cs); + MYSQL_TIME_STATUS st; + THD *thd= get_thd(); + /* + Unlike number-to-time conversion, we need to additionally pass + MODE_NO_ZERO_DATE here (if it presents in the current sql_mode): + SET sql_mode='STRICT_ALL_TABLES,NO_ZERO_DATE'; + INSERT INTO t1 VALUES ('0000-00-00 00:00:00'); -- error + INSERT INTO t1 VALUES (0); -- ok + In the first INSERT we have a zero date. + In the second INSERT we don't have a zero date (it is just a zero time). + */ + Time::Options opt(sql_mode_for_dates(thd), thd); + Time tm(thd, &st, from, len, cs, opt, decimals()); + return store_TIME_with_warning(&tm, &str, st.warnings); +} + + +int Field_time::store_time_dec(const MYSQL_TIME *ltime, uint dec) +{ + ErrConvTime str(ltime); + int warn; + Time tm(&warn, ltime, curdays, Time::Options(get_thd()), decimals()); + return store_TIME_with_warning(&tm, &str, warn); +} + + +int Field_time::store(double nr) +{ + ErrConvDouble str(nr); + int was_cut; + Time tm(get_thd(), &was_cut, nr, Time::Options(get_thd()), decimals()); + return store_TIME_with_warning(&tm, &str, was_cut); +} + + +int Field_time::store(longlong nr, bool unsigned_val) +{ + Longlong_hybrid tmp(nr, unsigned_val); + ErrConvInteger str(tmp); + int was_cut; + THD *thd= get_thd(); + /* + Need fractional digit truncation if nr overflows to '838:59:59.999999'. + The constructor used below will always truncate (never round). + We don't need to care to overwrite the default session rounding mode + from HALF_UP to TRUNCATE. + */ + Time tm(thd, &was_cut, tmp, Time::Options(thd), decimals()); + return store_TIME_with_warning(&tm, &str, was_cut); +} + + +void Field_time::set_curdays(THD *thd) +{ + MYSQL_TIME ltime; + set_current_date(thd, <ime); + curdays= calc_daynr(ltime.year, ltime.month, ltime.day); +} + + +Field *Field_time::new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) +{ + THD *thd= get_thd(); + Field_time *res= + (Field_time*) Field::new_key_field(root, new_table, new_ptr, length, + new_null_ptr, new_null_bit); + if (!(thd->variables.old_behavior & OLD_MODE_ZERO_DATE_TIME_CAST) && res) + res->set_curdays(thd); + return res; +} + + +double Field_time0::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + uint32 j= (uint32) uint3korr(ptr); + return (double) j; +} + +longlong Field_time0::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + return (longlong) sint3korr(ptr); +} + + +/** + @note + This function is multi-byte safe as the result string is always of type + my_charset_bin +*/ + +String *Field_time::val_str(String *str, + String *unused __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + MYSQL_TIME ltime; + get_date(<ime, Datetime::Options(TIME_TIME_ONLY, get_thd())); + str->alloc(field_length + 1); + str->length(my_time_to_str(<ime, const_cast(str->ptr()), decimals())); + str->set_charset(&my_charset_numeric); + return str; +} + + +bool Field_time::check_zero_in_date_with_warn(date_mode_t fuzzydate) +{ + date_conv_mode_t tmp= date_conv_mode_t(fuzzydate); + if (!(tmp & TIME_TIME_ONLY) && (tmp & TIME_NO_ZERO_IN_DATE)) + { + THD *thd= get_thd(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DATA_OUT_OF_RANGE, + ER_THD(thd, ER_WARN_DATA_OUT_OF_RANGE), field_name.str, + thd->get_stmt_da()->current_row_for_warning()); + return true; + } + return false; +} + + +/** + @note + Normally we would not consider 'time' as a valid date, but we allow + get_date() here to be able to do things like + DATE_FORMAT(time, "%l.%i %p") +*/ + +bool Field_time0::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + if (check_zero_in_date_with_warn(fuzzydate)) + return true; + long tmp=(long) sint3korr(ptr); + ltime->neg=0; + if (tmp < 0) + { + ltime->neg= 1; + tmp=-tmp; + } + ltime->year= ltime->month= ltime->day= 0; + ltime->hour= (int) (tmp/10000); + tmp-=ltime->hour*10000; + ltime->minute= (int) tmp/100; + ltime->second= (int) tmp % 100; + ltime->second_part=0; + ltime->time_type= MYSQL_TIMESTAMP_TIME; + return 0; +} + + +int Field_time::store_native(const Native &value) +{ + Time t(value); + DBUG_ASSERT(t.is_valid_time()); + store_TIME(t); + return 0; +} + + +bool Field_time::val_native(Native *to) +{ + MYSQL_TIME ltime; + get_date(<ime, date_mode_t(0)); + int warn; + return Time(&warn, <ime, 0).to_native(to, decimals()); +} + + +bool Field_time::send(Protocol *protocol) +{ + MYSQL_TIME ltime; + get_date(<ime, Time::Options(TIME_TIME_ONLY, get_thd())); + return protocol->store_time(<ime, decimals()); +} + + +int Field_time0::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + int32 a,b; + a=(int32) sint3korr(a_ptr); + b=(int32) sint3korr(b_ptr); + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_time0::sort_string(uchar *to,uint length __attribute__((unused))) +{ + to[0] = (uchar) (ptr[2] ^ 128); + to[1] = ptr[1]; + to[2] = ptr[0]; +} + + +int Field_time_hires::reset() +{ + store_bigendian(zero_point, ptr, Field_time_hires::pack_length()); + return 0; +} + + +void Field_time_hires::store_TIME(const MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ltime->year == 0); + DBUG_ASSERT(ltime->month == 0); + ulonglong packed= sec_part_shift(pack_time(ltime), dec) + zero_point; + store_bigendian(packed, ptr, Field_time_hires::pack_length()); +} + +int Field_time::store_decimal(const my_decimal *d) +{ + ErrConvDecimal str(d); + int was_cut; + Time tm(get_thd(), &was_cut, d, Time::Options(get_thd()), decimals()); + return store_TIME_with_warning(&tm, &str, was_cut); +} + + +bool Field_time::can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item_equal) +{ + DBUG_ASSERT(item_equal->compare_type_handler()->cmp_type() != STRING_RESULT); + switch (ctx.subst_constraint()) { + case ANY_SUBST: + /* + A TIME field in a DATETIME comparison can be substituted to + Item_equal with TIME comparison. + + SET timestamp=UNIX_TIMESTAMP('2015-08-30 10:20:30'); + CREATE OR REPLACE TABLE t1 (a TIME); + INSERT INTO t1 VALUES ('00:00:00'),('00:00:01'); + SELECT * FROM t1 WHERE a>=TIMESTAMP'2015-08-30 00:00:00' + AND a='00:00:00'; + + The above query can be simplified to: + SELECT * FROM t1 WHERE TIME'00:00:00'>=TIMESTAMP'2015-08-30 00:00:00' + AND a='00:00:00'; + And further to: + SELECT * FROM t1 WHERE a=TIME'00:00:00'; + */ + if (ctx.compare_type_handler() == &type_handler_datetime && + item_equal->compare_type_handler() == &type_handler_time) + return true; + return ctx.compare_type_handler() == item_equal->compare_type_handler(); + case IDENTITY_SUBST: + return true; + } + return false; +} + + +Item *Field_time::get_equal_const_item(THD *thd, const Context &ctx, + Item *const_item) +{ + /* + Old mode conversion from DATETIME with non-zero YYYYMMDD part + to TIME works very inconsistently. Possible variants: + - truncate the YYYYMMDD part + - add (MM*33+DD)*24 to hours + - add (MM*31+DD)*24 to hours + Let's disallow propagation of DATETIME with non-zero YYYYMMDD + as an equal constant for a TIME field. + */ + Time::datetime_to_time_mode_t mode= + (thd->variables.old_behavior & OLD_MODE_ZERO_DATE_TIME_CAST) ? + Time::DATETIME_TO_TIME_YYYYMMDD_00000000_ONLY : + Time::DATETIME_TO_TIME_MINUS_CURRENT_DATE; + + switch (ctx.subst_constraint()) { + case ANY_SUBST: + if (const_item->field_type() != MYSQL_TYPE_TIME) + { + // Get the value of const_item with conversion from DATETIME to TIME + Time tm(get_thd(), const_item, Time::Options_cmp(thd, mode)); + if (!tm.is_valid_time()) + return NULL; + /* + Replace a DATE/DATETIME constant to a TIME constant: + WHERE LENGTH(time_column)=8 + AND time_column=TIMESTAMP'2015-08-30 10:20:30'; + to: + WHERE LENGTH(time_column)=10 + AND time_column=TIME'10:20:30' + + (assuming CURRENT_DATE is '2015-08-30' + */ + return new (thd->mem_root) Item_time_literal(thd, &tm, + tm.get_mysql_time()-> + second_part ? + TIME_SECOND_PART_DIGITS : + 0); + } + break; + case IDENTITY_SUBST: + if (const_item->field_type() != MYSQL_TYPE_TIME || + const_item->decimals != decimals()) + { + /* + Note, the value returned in "ltime" can have more fractional + digits that decimals(). The Item_time_literal constructor will + truncate these digits. We could disallow propagation is such + cases, but it's still useful (and safe) to optimize: + WHERE time0_column='00:00:00.123' AND LENGTH(a)=12 + to + WHERE time0_column='00:00:00.123' AND LENGTH(TIME'00:00:00')=12 + and then to + WHERE FALSE + The original WHERE would do the full table scan (in case of no keys). + The optimized WHERE will return with "Impossible WHERE", without + having to do the full table scan. + */ + Time tm(thd, const_item, Time::Options(TIME_TIME_ONLY, thd, mode), + decimals()); + if (!tm.is_valid_time()) + return NULL; + return new (thd->mem_root) Item_time_literal(thd, &tm, decimals()); + } + break; + } + return const_item; +} + + +longlong Field_time_with_dec::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + MYSQL_TIME ltime; + get_date(<ime, Time::Options(TIME_TIME_ONLY, get_thd())); + longlong val= TIME_to_ulonglong_time(<ime); + return ltime.neg ? -val : val; +} + +double Field_time_with_dec::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + MYSQL_TIME ltime; + get_date(<ime, Time::Options(TIME_TIME_ONLY, get_thd())); + return TIME_to_double(<ime); +} + +bool Field_time_hires::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + if (check_zero_in_date_with_warn(fuzzydate)) + return true; + uint32 len= pack_length(); + longlong packed= read_bigendian(ptr, len); + + packed= sec_part_unshift(packed - zero_point, dec); + + unpack_time(packed, ltime, MYSQL_TIMESTAMP_TIME); + return false; +} + + +int Field_time_hires::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + ulonglong a=read_bigendian(a_ptr, Field_time_hires::pack_length()); + ulonglong b=read_bigendian(b_ptr, Field_time_hires::pack_length()); + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_time_hires::sort_string(uchar *to,uint length __attribute__((unused))) +{ + DBUG_ASSERT(length == Field_time_hires::pack_length()); + memcpy(to, ptr, length); + to[0]^= 128; +} + +void Field_time_with_dec::make_send_field(Send_field *field) +{ + Field::make_send_field(field); + field->decimals= dec; +} + +/**************************************************************************** +** time type with fsp (MySQL-5.6 version) +** In string context: HH:MM:SS.FFFFFF +** In number context: HHMMSS.FFFFFF +****************************************************************************/ + +int Field_timef::reset() +{ + my_time_packed_to_binary(0, ptr, dec); + return 0; +} + +void Field_timef::store_TIME(const MYSQL_TIME *ltime) +{ + longlong tmp= TIME_to_longlong_time_packed(ltime); + my_time_packed_to_binary(tmp, ptr, dec); +} + +bool Field_timef::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + if (check_zero_in_date_with_warn(fuzzydate)) + return true; + longlong tmp= my_time_packed_from_binary(ptr, dec); + TIME_from_longlong_time_packed(ltime, tmp); + return false; +} +Binlog_type_info Field_timef::binlog_type_info() const +{ + return Binlog_type_info(Field_timef::binlog_type(), decimals(), 1); +} + + +longlong Field_timef::val_time_packed(THD *thd) +{ + DBUG_ASSERT(marked_for_read()); + longlong tmp= my_time_packed_from_binary(ptr, dec); + MYSQL_TIME ltime; + TIME_from_longlong_time_packed(<ime, tmp); + return pack_time(<ime); +} + + +int Field_timef::store_native(const Native &value) +{ + DBUG_ASSERT(value.length() == my_time_binary_length(dec)); + DBUG_ASSERT(Time(value).is_valid_time()); + memcpy(ptr, value.ptr(), value.length()); + return 0; +} + + +bool Field_timef::val_native(Native *to) +{ + uint32 binlen= my_time_binary_length(dec); + return to->copy((const char*) ptr, binlen); +} + + +/**************************************************************************** +** year type +** Save in a byte the year 0, 1901->2155 +** Can handle 2 byte or 4 byte years! +****************************************************************************/ + +int Field_year::store(const char *from, size_t len,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + THD *thd= get_thd(); + char *end; + int error; + longlong nr= cs->strntoull10rnd(from, len, 0, &end, &error); + + if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155 || + error == MY_ERRNO_ERANGE) + { + *ptr=0; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; + } + + if (thd->count_cuted_fields <= CHECK_FIELD_EXPRESSION && error == MY_ERRNO_EDOM) + { + *ptr= 0; + return 1; + } + + if (thd->count_cuted_fields > CHECK_FIELD_EXPRESSION && + (error= check_int(cs, from, len, end, error))) + { + if (unlikely(error == 1) /* empty or incorrect string */) + { + *ptr= 0; + return 1; + } + error= 1; + } + + if (nr != 0 || len != 4) + { + if (nr < YY_PART_YEAR) + nr+=100; // 2000 - 2069 + else if (nr > 1900) + nr-= 1900; + } + *ptr= (char) (uchar) nr; + return error; +} + + +int Field_year::store(double nr) +{ + if (nr < 0.0 || nr > 2155.0) + { + (void) Field_year::store((longlong) -1, FALSE); + return 1; + } + return Field_year::store((longlong) nr, FALSE); +} + + +int Field_year::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155) + { + *ptr= 0; + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; + } + if (nr != 0 || field_length != 4) // 0000 -> 0; 00 -> 2000 + { + if (nr < YY_PART_YEAR) + nr+=100; // 2000 - 2069 + else if (nr > 1900) + nr-= 1900; + } + *ptr= (char) (uchar) nr; + return 0; +} + + +int Field_year::store_time_dec(const MYSQL_TIME *ltime, uint) +{ + ErrConvTime str(ltime); + if (Field_year::store(ltime->year, 0)) + return 1; + + const char *typestr= Temporal::type_name_by_timestamp_type(ltime->time_type); + set_datetime_warning(WARN_DATA_TRUNCATED, &str, typestr, 1); + return 0; +} + +bool Field_year::send(Protocol *protocol) +{ + DBUG_ASSERT(marked_for_read()); + Protocol_text *txt; + if ((txt= dynamic_cast(protocol))) + return send_numeric_zerofill_str(txt, PROTOCOL_SEND_SHORT); + ulonglong tmp= Field_year::val_int(); + return protocol->store_short(tmp); +} + + +double Field_year::val_real(void) +{ + return (double) Field_year::val_int(); +} + + +longlong Field_year::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(field_length == 2 || field_length == 4); + int tmp= (int) ptr[0]; + if (field_length != 4) + tmp%=100; // Return last 2 char + else if (tmp) + tmp+=1900; + return (longlong) tmp; +} + + +String *Field_year::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(field_length < 5); + val_buffer->alloc(5); + val_buffer->length(field_length); + char *to=(char*) val_buffer->ptr(); + sprintf(to,field_length == 2 ? "%02d" : "%04d",(int) Field_year::val_int()); + val_buffer->set_charset(&my_charset_numeric); + return val_buffer; +} + + +bool Field_year::get_date(MYSQL_TIME *ltime,date_mode_t fuzzydate) +{ + int tmp= (int) ptr[0]; + if (tmp || field_length != 4) + tmp+= 1900; + return int_to_datetime_with_warn(get_thd(), + Longlong_hybrid(tmp * 10000, true), + ltime, fuzzydate, table->s, field_name.str); +} + + +void Field_year::sql_type(String &res) const +{ + CHARSET_INFO *cs=res.charset(); + res.length(cs->cset->snprintf(cs,(char*)res.ptr(),res.alloced_length(), + "year(%d)",(int) field_length)); +} + + +/*****************************************************************************/ + +int Field_date_common::store_TIME_with_warning(const Datetime *dt, + const ErrConv *str, + int was_cut) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + // Handle totally bad values + if (!dt->is_valid_datetime()) + return store_invalid_with_warning(str, was_cut, "date"); + // Store the value + if (!dt->hhmmssff_is_zero()) + was_cut|= MYSQL_TIME_NOTE_TRUNCATED; + store_datetime(*dt); + // Caclulate return value and send warnings if needed + return store_TIME_return_code_with_warnings(was_cut, str, "date"); +} + +int Field_date_common::store(const char *from, size_t len, CHARSET_INFO *cs) +{ + MYSQL_TIME_STATUS st; + ErrConvString str(from, len, cs); + THD *thd= get_thd(); + Datetime dt(thd, &st, from, len, cs, Date::Options(thd), 0); + return store_TIME_with_warning(&dt, &str, st.warnings); +} + +int Field_date_common::store(double nr) +{ + int error; + ErrConvDouble str(nr); + THD *thd= get_thd(); + Datetime dt(thd, &error, nr, Date::Options(thd), 0); + return store_TIME_with_warning(&dt, &str, error); +} + +int Field_date_common::store(longlong nr, bool unsigned_val) +{ + int error; + Longlong_hybrid tmp(nr, unsigned_val); + ErrConvInteger str(tmp); + THD *thd= get_thd(); + Datetime dt(&error, tmp, Date::Options(thd)); + return store_TIME_with_warning(&dt, &str, error); +} + +int Field_date_common::store_time_dec(const MYSQL_TIME *ltime, uint dec) +{ + int error; + ErrConvTime str(ltime); + THD *thd= get_thd(); + Datetime dt(thd, &error, ltime, Date::Options(thd), 0); + return store_TIME_with_warning(&dt, &str, error); +} + +int Field_date_common::store_decimal(const my_decimal *d) +{ + int error; + ErrConvDecimal str(d); + THD *thd= get_thd(); + Datetime tm(thd, &error, d, Date::Options(thd), 0); + return store_TIME_with_warning(&tm, &str, error); +} + + +/**************************************************************************** +** date type +** In string context: YYYY-MM-DD +** In number context: YYYYMMDD +** Stored as a 4 byte unsigned int +****************************************************************************/ + +void Field_date::store_TIME(const MYSQL_TIME *ltime) +{ + uint tmp= ltime->year*10000L + ltime->month*100+ltime->day; + int4store(ptr,tmp); +} + +bool Field_date::send(Protocol *protocol) +{ + longlong tmp= Field_date::val_int(); + MYSQL_TIME tm; + tm.year= (uint32) tmp/10000L % 10000; + tm.month= (uint32) tmp/100 % 100; + tm.day= (uint32) tmp % 100; + return protocol->store_date(&tm); +} + + +double Field_date::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + int32 j; + j=sint4korr(ptr); + return (double) (uint32) j; +} + + +longlong Field_date::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + int32 j; + j=sint4korr(ptr); + return (longlong) (uint32) j; +} + + +bool Field_date::get_TIME(MYSQL_TIME *ltime, const uchar *pos, + date_mode_t fuzzydate) const +{ + DBUG_ASSERT(marked_for_read()); + int32 tmp= sint4korr(pos); + ltime->year= (int) ((uint32) tmp/10000L % 10000); + ltime->month= (int) ((uint32) tmp/100 % 100); + ltime->day= (int) ((uint32) tmp % 100); + ltime->time_type= MYSQL_TIMESTAMP_DATE; + ltime->hour= ltime->minute= ltime->second= ltime->second_part= ltime->neg= 0; + return validate_MMDD(tmp, ltime->month, ltime->day, fuzzydate); +} + + +String *Field_date::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + MYSQL_TIME ltime; + get_TIME(<ime, ptr, date_mode_t(0)); + val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH); + uint length= (uint) my_date_to_str(<ime, + const_cast(val_buffer->ptr())); + val_buffer->length(length); + val_buffer->set_charset(&my_charset_numeric); + + return val_buffer; +} + + +int Field_date::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + int32 a,b; + a=sint4korr(a_ptr); + b=sint4korr(b_ptr); + return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0; +} + + +void Field_date::sort_string(uchar *to,uint length __attribute__((unused))) +{ + to[0] = ptr[3]; + to[1] = ptr[2]; + to[2] = ptr[1]; + to[3] = ptr[0]; +} + +void Field_date::sql_type(String &res) const +{ + res.set_ascii(STRING_WITH_LEN("date")); +} + + +/**************************************************************************** +** The new date type +** This is identical to the old date type, but stored on 3 bytes instead of 4 +** In number context: YYYYMMDD +****************************************************************************/ + +void Field_newdate::store_TIME(const MYSQL_TIME *ltime) +{ + uint tmp= ltime->year*16*32 + ltime->month*32+ltime->day; + int3store(ptr,tmp); +} + + +bool Field_newdate::send(Protocol *protocol) +{ + MYSQL_TIME tm; + Field_newdate::get_date(&tm, date_mode_t(0)); + return protocol->store_date(&tm); +} + + +double Field_newdate::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + return (double) Field_newdate::val_int(); +} + + +longlong Field_newdate::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + ulong j= uint3korr(ptr); + j= (j % 32L)+(j / 32L % 16L)*100L + (j/(16L*32L))*10000L; + return (longlong) j; +} + + +String *Field_newdate::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + val_buffer->alloc(field_length+1); + val_buffer->length(field_length); + uint32 tmp=(uint32) uint3korr(ptr); + int part; + char *pos=(char*) val_buffer->ptr()+10; + + /* Open coded to get more speed */ + *pos--=0; // End NULL + part=(int) (tmp & 31); + *pos--= (char) ('0'+part%10); + *pos--= (char) ('0'+part/10); + *pos--= '-'; + part=(int) (tmp >> 5 & 15); + *pos--= (char) ('0'+part%10); + *pos--= (char) ('0'+part/10); + *pos--= '-'; + part=(int) (tmp >> 9); + *pos--= (char) ('0'+part%10); part/=10; + *pos--= (char) ('0'+part%10); part/=10; + *pos--= (char) ('0'+part%10); part/=10; + *pos= (char) ('0'+part); + val_buffer->set_charset(&my_charset_numeric); + return val_buffer; +} + + +bool Field_newdate::get_TIME(MYSQL_TIME *ltime, const uchar *pos, + date_mode_t fuzzydate) const +{ + DBUG_ASSERT(marked_for_read()); + uint32 tmp=(uint32) uint3korr(pos); + ltime->day= tmp & 31; + ltime->month= (tmp >> 5) & 15; + ltime->year= (tmp >> 9); + ltime->time_type= MYSQL_TIMESTAMP_DATE; + ltime->hour= ltime->minute= ltime->second= ltime->second_part= ltime->neg= 0; + return validate_MMDD(tmp, ltime->month, ltime->day, fuzzydate); +} + + +longlong Field_newdate::val_datetime_packed(THD *thd) +{ + MYSQL_TIME ltime; + Field_newdate::get_date(<ime, date_mode_t(0)); + return pack_time(<ime); +} + + +int Field_newdate::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + uint32 a,b; + a=(uint32) uint3korr(a_ptr); + b=(uint32) uint3korr(b_ptr); + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + + +void Field_newdate::sort_string(uchar *to,uint length __attribute__((unused))) +{ + to[0] = ptr[2]; + to[1] = ptr[1]; + to[2] = ptr[0]; +} + + +void Field_newdate::sql_type(String &res) const +{ + res.set_ascii(STRING_WITH_LEN("date")); +} + + +Item *Field_newdate::get_equal_const_item(THD *thd, const Context &ctx, + Item *const_item) +{ + switch (ctx.subst_constraint()) { + case ANY_SUBST: + if (!is_temporal_type_with_date(const_item->field_type())) + { + /* + DATE is compared to DATETIME-alike non-temporal values + (such as VARCHAR, DECIMAL) as DATETIME, e.g.: + WHERE date_column=20010101235959.0000009 + So here we convert the constant to DATETIME normally. + In case if TIME_ROUND_FRACTIONAL is enabled, nanoseconds will round. + */ + Datetime dt(thd, const_item, Datetime::Options_cmp(thd)); + if (!dt.is_valid_datetime()) + return NULL; + /* + Replace the constant to a DATE or DATETIME constant. + Example: + WHERE LENGTH(date_column)=10 + AND date_column=TIME'10:20:30'; + to: + WHERE LENGTH(date_column)=10 + AND date_column=TIMESTAMP'2015-08-30 10:20:30' + + (assuming CURRENT_DATE is '2015-08-30' + */ + if (!dt.hhmmssff_is_zero()) + return new (thd->mem_root) + Item_datetime_literal_for_invalid_dates(thd, &dt, + dt.get_mysql_time()-> + second_part ? + TIME_SECOND_PART_DIGITS : 0); + Date d(&dt); + return new (thd->mem_root) Item_date_literal_for_invalid_dates(thd, &d); + } + break; + case IDENTITY_SUBST: + if (const_item->field_type() != MYSQL_TYPE_DATE) + { + /* + DATE is compared to non-temporal as DATETIME. + We need to convert to DATETIME first, taking into account the + current session rounding mode (even though this is IDENTITY_SUBSTS!), + then convert the result to DATE. + */ + Datetime dt(thd, const_item, Datetime::Options(TIME_CONV_NONE, thd)); + if (!dt.is_valid_datetime()) + return NULL; + Date d(&dt); + return new (thd->mem_root) Item_date_literal(thd, &d); + } + break; + } + return const_item; +} + + +/**************************************************************************** +** datetime type +** In string context: YYYY-MM-DD HH:MM:DD +** In number context: YYYYMMDDHHMMDD +** Stored as a 8 byte unsigned int. Should sometimes be change to a 6 byte int. +****************************************************************************/ + +void Field_datetime0::store_TIME(const MYSQL_TIME *ltime) +{ + ulonglong tmp= TIME_to_ulonglong_datetime(ltime); + int8store(ptr,tmp); +} + + +sql_mode_t +Field_datetime::conversion_depends_on_sql_mode(THD *thd, Item *expr) const +{ + return expr->datetime_precision(thd) > decimals() ? + MODE_TIME_ROUND_FRACTIONAL : 0; +} + + +bool Field_datetime0::send(Protocol *protocol) +{ + MYSQL_TIME tm; + Field_datetime0::get_date(&tm, date_mode_t(0)); + return protocol->store_datetime(&tm, 0); +} + + +longlong Field_datetime0::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + longlong j; + j=sint8korr(ptr); + return j; +} + + +String *Field_datetime0::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + val_buffer->alloc(field_length+1); + val_buffer->length(field_length); + + DBUG_ASSERT(marked_for_read()); + ulonglong tmp; + long part1,part2; + char *pos; + int part3; + + tmp= Field_datetime0::val_int(); + + /* + Avoid problem with slow longlong arithmetic and sprintf + */ + + part1=(long) (tmp/1000000LL); + part2=(long) (tmp - (ulonglong) part1*1000000LL); + + pos=(char*) val_buffer->ptr() + MAX_DATETIME_WIDTH; + *pos--=0; + *pos--= (char) ('0'+(char) (part2%10)); part2/=10; + *pos--= (char) ('0'+(char) (part2%10)); part3= (int) (part2 / 10); + *pos--= ':'; + *pos--= (char) ('0'+(char) (part3%10)); part3/=10; + *pos--= (char) ('0'+(char) (part3%10)); part3/=10; + *pos--= ':'; + *pos--= (char) ('0'+(char) (part3%10)); part3/=10; + *pos--= (char) ('0'+(char) part3); + *pos--= ' '; + *pos--= (char) ('0'+(char) (part1%10)); part1/=10; + *pos--= (char) ('0'+(char) (part1%10)); part1/=10; + *pos--= '-'; + *pos--= (char) ('0'+(char) (part1%10)); part1/=10; + *pos--= (char) ('0'+(char) (part1%10)); part3= (int) (part1/10); + *pos--= '-'; + *pos--= (char) ('0'+(char) (part3%10)); part3/=10; + *pos--= (char) ('0'+(char) (part3%10)); part3/=10; + *pos--= (char) ('0'+(char) (part3%10)); part3/=10; + *pos=(char) ('0'+(char) part3); + val_buffer->set_charset(&my_charset_numeric); + return val_buffer; +} + +bool Field_datetime0::get_TIME(MYSQL_TIME *ltime, const uchar *pos, + date_mode_t fuzzydate) const +{ + DBUG_ASSERT(marked_for_read()); + longlong tmp= sint8korr(pos); + uint32 part1,part2; + part1=(uint32) (tmp/1000000LL); + part2=(uint32) (tmp - (ulonglong) part1*1000000LL); + + ltime->time_type= MYSQL_TIMESTAMP_DATETIME; + ltime->neg= 0; + ltime->second_part= 0; + ltime->second= (int) (part2%100); + ltime->minute= (int) (part2/100%100); + ltime->hour= (int) (part2/10000); + ltime->day= (int) (part1%100); + ltime->month= (int) (part1/100%100); + ltime->year= (int) (part1/10000); + return validate_MMDD(tmp, ltime->month, ltime->day, fuzzydate); +} + + +int Field_datetime0::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + longlong a,b; + a=sint8korr(a_ptr); + b=sint8korr(b_ptr); + return ((ulonglong) a < (ulonglong) b) ? -1 : + ((ulonglong) a > (ulonglong) b) ? 1 : 0; +} + +void Field_datetime0::sort_string(uchar *to,uint length __attribute__((unused))) +{ + to[0] = ptr[7]; + to[1] = ptr[6]; + to[2] = ptr[5]; + to[3] = ptr[4]; + to[4] = ptr[3]; + to[5] = ptr[2]; + to[6] = ptr[1]; + to[7] = ptr[0]; +} + + +int Field_datetime::set_time() +{ + THD *thd= table->in_use; + set_notnull(); + // Here we always truncate (not round), no matter what sql_mode is + if (decimals()) + store_datetime(Datetime(thd, Timeval(thd->query_start(), + thd->query_start_sec_part()) + ).trunc(decimals())); + else + store_datetime(Datetime(thd, Timeval(thd->query_start(), 0))); + return 0; +} + + +void Field_datetime_hires::store_TIME(const MYSQL_TIME *ltime) +{ + ulonglong packed= sec_part_shift(pack_time(ltime), dec); + store_bigendian(packed, ptr, Field_datetime_hires::pack_length()); +} + +bool Field_datetime_with_dec::send(Protocol *protocol) +{ + MYSQL_TIME ltime; + get_date(<ime, date_mode_t(0)); + return protocol->store_datetime(<ime, dec); +} + + +double Field_datetime_with_dec::val_real(void) +{ + MYSQL_TIME ltime; + get_date(<ime, date_mode_t(0)); + return TIME_to_double(<ime); +} + +longlong Field_datetime_with_dec::val_int(void) +{ + MYSQL_TIME ltime; + get_date(<ime, date_mode_t(0)); + return TIME_to_ulonglong_datetime(<ime); +} + + +String *Field_datetime_with_dec::val_str(String *str, + String *unused __attribute__((unused))) +{ + MYSQL_TIME ltime; + get_date(<ime, date_mode_t(0)); + str->alloc(field_length+1); + str->length(field_length); + my_datetime_to_str(<ime, (char*) str->ptr(), dec); + str->set_charset(&my_charset_numeric); + return str; +} + + +bool Field_datetime_hires::get_TIME(MYSQL_TIME *ltime, const uchar *pos, + date_mode_t fuzzydate) const +{ + DBUG_ASSERT(marked_for_read()); + ulonglong packed= read_bigendian(pos, Field_datetime_hires::pack_length()); + unpack_time(sec_part_unshift(packed, dec), ltime, MYSQL_TIMESTAMP_DATETIME); + return validate_MMDD(packed, ltime->month, ltime->day, fuzzydate); +} + + +int Field_datetime_hires::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + ulonglong a=read_bigendian(a_ptr, Field_datetime_hires::pack_length()); + ulonglong b=read_bigendian(b_ptr, Field_datetime_hires::pack_length()); + return a < b ? -1 : a > b ? 1 : 0; +} + +void Field_datetime_with_dec::make_send_field(Send_field *field) +{ + Field::make_send_field(field); + field->decimals= dec; +} + + +/**************************************************************************** +** MySQL-5.6 compatible DATETIME(N) +** +****************************************************************************/ +int Field_datetimef::reset() +{ + my_datetime_packed_to_binary(0, ptr, dec); + return 0; +} + +void Field_datetimef::store_TIME(const MYSQL_TIME *ltime) +{ + longlong tmp= TIME_to_longlong_datetime_packed(ltime); + my_datetime_packed_to_binary(tmp, ptr, dec); +} + +bool Field_datetimef::get_TIME(MYSQL_TIME *ltime, const uchar *pos, + date_mode_t fuzzydate) const +{ + DBUG_ASSERT(marked_for_read()); + longlong tmp= my_datetime_packed_from_binary(pos, dec); + TIME_from_longlong_datetime_packed(ltime, tmp); + return validate_MMDD(tmp, ltime->month, ltime->day, fuzzydate); +} +Binlog_type_info Field_datetimef::binlog_type_info() const +{ + return Binlog_type_info(Field_datetimef::binlog_type(), decimals(), 1); +} + +longlong Field_datetimef::val_datetime_packed(THD *thd) +{ + DBUG_ASSERT(marked_for_read()); + longlong tmp= my_datetime_packed_from_binary(ptr, dec); + MYSQL_TIME ltime; + TIME_from_longlong_datetime_packed(<ime, tmp); + return pack_time(<ime); +} + + +/**************************************************************************** +** string type +** A string may be varchar or binary +****************************************************************************/ + +/* + Report "not well formed" or "cannot convert" error + after storing a character string info a field. + + SYNOPSIS + check_string_copy_error() + copier - the conversion status + end - the very end of the source string + that was just copied + cs - character set of the string + + NOTES + As of version 5.0 both cases return the same error: + + "Invalid string value: 'xxx' for column 't' at row 1" + + Future versions will possibly introduce a new error message: + + "Cannot convert character string: 'xxx' for column 't' at row 1" + + RETURN + FALSE - If errors didn't happen + TRUE - If an error happened +*/ + +bool +Field_longstr::check_string_copy_error(const String_copier *copier, + const char *end, + CHARSET_INFO *cs) +{ + const char *pos; + char tmp[32]; + + if (likely(!(pos= copier->most_important_error_pos()))) + return FALSE; + + /* Ignore errors from internal expressions */ + if (get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION) + { + DBUG_ASSERT(sizeof(tmp) >= convert_to_printable_required_length(6)); + convert_to_printable(tmp, sizeof(tmp), pos, (end - pos), cs, 6); + set_warning_truncated_wrong_value("string", tmp); + } + return TRUE; +} + + +/* + Check if we lost any important data and send a truncation error/warning + + SYNOPSIS + Field_longstr::report_if_important_data() + pstr - Truncated rest of string + end - End of truncated string + count_spaces - Treat traling spaces as important data + + RETURN VALUES + 0 - None was truncated (or we don't count cut fields) + 2 - Some bytes was truncated + + NOTE + Check if we lost any important data (anything in a binary string, + or any non-space in others). If only trailing spaces was lost, + send a truncation note, otherwise send a truncation error. + Silently ignore traling spaces if the count_space parameter is FALSE. +*/ + +int +Field_longstr::report_if_important_data(const char *pstr, const char *end, + bool count_spaces) +{ + THD *thd; + if ((pstr < end) && + (thd= get_thd())->count_cuted_fields > CHECK_FIELD_EXPRESSION) + { + if (test_if_important_data(field_charset(), pstr, end)) + { + if (thd->abort_on_warning) + set_warning(ER_DATA_TOO_LONG, 1); + else + set_warning(WARN_DATA_TRUNCATED, 1); + return 2; + } + else if (count_spaces) + { + /* If we lost only spaces then produce a NOTE, not a WARNING */ + set_note(WARN_DATA_TRUNCATED, 1); + return 2; + } + } + return 0; +} + + +/* + This is JSON specific. + We should eventually add Field_json_varchar and Field_json_blob + and move make_send_field() to the new classes. +*/ +void Field_longstr::make_send_field(Send_field *field) +{ + Field_str::make_send_field(field); + if (check_constraint) + { + /* + Append the format that is implicitly implied by the CHECK CONSTRAINT. + For example: + CREATE TABLE t1 (js longtext DEFAULT NULL CHECK (json_valid(a))); + SELECT j FROM t1; + will add "format=json" to the extended type info metadata for t1.js. + */ + check_constraint->expr->set_format_by_check_constraint(field); + } +} + + +/* + An optimized version that uses less stack than Field::send(). +*/ +bool Field_longstr::send(Protocol *protocol) +{ + String tmp; + val_str(&tmp, &tmp); + /* + Ensure this function is only used with classes that do not allocate + memory in val_str() + */ + DBUG_ASSERT(tmp.alloced_length() == 0); + return protocol->store(tmp.ptr(), tmp.length(), tmp.charset()); +} + + +const Type_handler *Field_string::type_handler() const +{ + if (is_var_string()) + return &type_handler_var_string; + /* + This is a temporary solution and will be fixed soon (in 10.9?). + Type_handler_string_json will provide its own Field_string_json. + */ + if (Type_handler_json_common::has_json_valid_constraint(this)) + return &type_handler_string_json; + return &type_handler_string; +} + + /* Copy a string and fill with space */ + +int Field_string::store(const char *from, size_t length,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + uint copy_length; + int rc; + + /* See the comment for Field_long::store(long long) */ + DBUG_ASSERT(!table || table->in_use == current_thd); + + rc= well_formed_copy_with_check((char*) ptr, field_length, + cs, from, length, + Field_string::char_length(), + false, ©_length); + + /* Append spaces if the string was shorter than the field. */ + if (copy_length < field_length) + field_charset()->fill((char*) ptr + copy_length, + field_length - copy_length, + field_charset()->pad_char); + + return rc; +} + + +int Field_str::store(longlong nr, bool unsigned_val) +{ + char buff[64]; + uint length; + length= (uint) (field_charset()->longlong10_to_str)(buff, sizeof(buff), + (unsigned_val ? 10: -10), + nr); + return store(buff, length, field_charset()); +} + + +/** + Store double value in Field_string or Field_varstring. + + Pretty prints double number into field_length characters buffer. + + @param nr number +*/ + +int Field_str::store(double nr) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + char buff[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE]; + uint local_char_length= MY_MIN(sizeof(buff), Field_str::char_length()); + size_t length= 0; + my_bool error= (local_char_length == 0); + + // my_gcvt() requires width > 0, and we may have a CHAR(0) column. + if (likely(!error)) + length= my_gcvt(nr, MY_GCVT_ARG_DOUBLE, local_char_length, buff, &error); + + if (unlikely(error)) + { + if (get_thd()->abort_on_warning) + set_warning(ER_DATA_TOO_LONG, 1); + else + set_warning(WARN_DATA_TRUNCATED, 1); + } + return store(buff, (uint)length, &my_charset_numeric); +} + +bool Field_string::is_equal(const Column_definition &new_field) const +{ + DBUG_ASSERT(!compression_method()); + return new_field.type_handler() == type_handler() && + new_field.char_length == char_length() && + new_field.charset == field_charset() && + new_field.length == max_display_length(); +} + + +int Field_longstr::store_decimal(const my_decimal *d) +{ + StringBuffer str; + d->to_string(&str); + return store(str.ptr(), str.length(), str.charset()); +} + +uint32 Field_longstr::max_data_length() const +{ + return field_length + (field_length > 255 ? 2 : 1); +} + + +Data_type_compatibility +Field_longstr::cmp_to_string_with_same_collation(const Item_bool_func *cond, + const Item *item) const +{ + return !cmp_is_done_using_type_handler_of_this(cond, item) ? + Data_type_compatibility::INCOMPATIBLE_DATA_TYPE : + charset() != cond->compare_collation() ? + Data_type_compatibility::INCOMPATIBLE_COLLATION : + Data_type_compatibility::OK; +} + + +Data_type_compatibility +Field_longstr::cmp_to_string_with_stricter_collation(const Item_bool_func *cond, + const Item *item) const +{ + return !cmp_is_done_using_type_handler_of_this(cond, item) ? + Data_type_compatibility::INCOMPATIBLE_DATA_TYPE : + (charset() != cond->compare_collation() && + !(cond->compare_collation()->state & MY_CS_BINSORT) && + !Utf8_narrow::should_do_narrowing(this, cond->compare_collation())) ? + Data_type_compatibility::INCOMPATIBLE_COLLATION : + Data_type_compatibility::OK; +} + + +Data_type_compatibility +Field_longstr::can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) const +{ + DBUG_ASSERT(cmp_type() == STRING_RESULT); + /* + So, we have an equality: tbl.string_key = 'abc' + + The comparison is the string comparison. Can we use index lookups to + find matching rows? We can do that when: + - The comparison uses the same collation as tbl.string_key + - the comparison uses binary collation, while tbl.string_key + uses some other collation. + In this case, we will find matches in some collation. For example, for + 'abc' we may find 'abc', 'ABC', and 'äbc'. + But we're certain that will find the row with the identical binary, 'abc'. + */ + return cmp_to_string_with_stricter_collation(cond, item); +} + + +Data_type_compatibility +Field_longstr::can_optimize_hash_join(const Item_bool_func *cond, + const Item *item) const +{ + DBUG_ASSERT(cmp_type() == STRING_RESULT); + return cmp_to_string_with_same_collation(cond, item); +} + + +Data_type_compatibility +Field_longstr::can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) const +{ + /* + Can't use indexes when comparing a string to a number or a date + Don't use an index when comparing strings of different collations. + */ + DBUG_ASSERT(cmp_type() == STRING_RESULT); + return cmp_to_string_with_same_collation(cond, const_item); +} + + +Data_type_compatibility +Field_longstr::can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const +{ + return is_eq_func ? + cmp_to_string_with_stricter_collation(cond, item) : + cmp_to_string_with_same_collation(cond, item); +} + + +/** + This overrides the default behavior of the parent constructor + Warn_filter(thd) to suppress notes about trailing spaces in case of CHAR(N), + as they are truncated during val_str(). + We still do want truncation notes in case of BINARY(N), + as trailing spaces are not truncated in val_str(). +*/ +Field_string::Warn_filter_string::Warn_filter_string(const THD *thd, + const Field_string *field) + :Warn_filter(!thd->no_errors, + !thd->no_errors && + field->field_charset() == &my_charset_bin) +{ } + + +double Field_string::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + return Converter_strntod_with_warn(get_thd(), + Warn_filter_string(thd, this), + Field_string::charset(), + (const char *) ptr, + field_length).result(); +} + + +longlong Field_string::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + return Converter_strntoll_with_warn(thd, Warn_filter_string(thd, this), + Field_string::charset(), + (const char *) ptr, + field_length).result(); +} + + +sql_mode_t Field_string::value_depends_on_sql_mode() const +{ + return has_charset() ? MODE_PAD_CHAR_TO_FULL_LENGTH : sql_mode_t(0); +}; + + +sql_mode_t Field_string::can_handle_sql_mode_dependency_on_store() const +{ + return has_charset() ? MODE_PAD_CHAR_TO_FULL_LENGTH : sql_mode_t(0); +} + + +String *Field_string::val_str(String *val_buffer __attribute__((unused)), + String *val_ptr) +{ + DBUG_ASSERT(marked_for_read()); + /* See the comment for Field_long::store(long long) */ + DBUG_ASSERT(!table || table->in_use == current_thd); + size_t length; + if (get_thd()->variables.sql_mode & + MODE_PAD_CHAR_TO_FULL_LENGTH) + length= field_charset()->charpos(ptr, ptr + field_length, + Field_string::char_length()); + else + length= field_charset()->lengthsp((const char*) ptr, field_length); + val_ptr->set((const char*) ptr, length, field_charset()); + return val_ptr; +} + + +my_decimal *Field_string::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + Converter_str2my_decimal_with_warn(thd, + Warn_filter_string(thd, this), + E_DEC_FATAL_ERROR & ~E_DEC_BAD_NUM, + Field_string::charset(), + (const char *) ptr, + field_length, decimal_value); + return decimal_value; +} + + +struct Check_field_param { + const Field *field; +}; + +#ifdef HAVE_REPLICATION +static bool +check_field_for_37426(const void *param_arg) +{ + Check_field_param *param= (Check_field_param*) param_arg; + DBUG_ASSERT(param->field->real_type() == MYSQL_TYPE_STRING); + DBUG_PRINT("debug", ("Field %s - type: %d, size: %d", + param->field->field_name.str, + param->field->real_type(), + param->field->row_pack_length())); + return param->field->row_pack_length() > 255; +} +#endif + +bool +Field_string::compatible_field_size(uint field_metadata, + const Relay_log_info *rli_arg, + uint16 mflags __attribute__((unused)), + int *order_var) const +{ +#ifdef HAVE_REPLICATION + const Check_field_param check_param = { this }; + if (rpl_master_has_bug(rli_arg, 37426, TRUE, + check_field_for_37426, &check_param)) + return FALSE; // Not compatible field sizes +#endif + return Field::compatible_field_size(field_metadata, rli_arg, mflags, order_var); +} + + +int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + return field_charset()->coll->strnncollsp_nchars(field_charset(), + a_ptr, field_length, + b_ptr, field_length, + Field_string::char_length(), + MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES); +} + + +void Field_string::sort_string(uchar *to,uint length) +{ +#ifdef DBUG_ASSERT_EXISTS + size_t tmp= +#endif + field_charset()->strnxfrm(to, length, + char_length() * field_charset()->strxfrm_multiply, + ptr, field_length, + MY_STRXFRM_PAD_WITH_SPACE | + MY_STRXFRM_PAD_TO_MAXLEN); + DBUG_ASSERT(tmp == length); +} + + +void Field_string::sql_type(String &res) const +{ + THD *thd= table->in_use; + CHARSET_INFO *cs=res.charset(); + size_t length; + + length= cs->cset->snprintf(cs,(char*) res.ptr(), + res.alloced_length(), "%s(%d)%s", + (type() == MYSQL_TYPE_VAR_STRING ? + (has_charset() ? "varchar" : "varbinary") : + (has_charset() ? "char" : "binary")), + (int) field_length / charset()->mbmaxlen, + type() == MYSQL_TYPE_VAR_STRING ? "/*old*/" : ""); + res.length(length); + if ((thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) && + has_charset() && (charset()->state & MY_CS_BINSORT)) + res.append(STRING_WITH_LEN(" binary")); +} + +/** + For fields which are associated with character sets their length is provided + in octets and their character set information is also provided as part of + type information. + + @param res String which contains filed type and length. +*/ +void Field_string::sql_rpl_type(String *res) const +{ + if (Field_string::has_charset()) + { + CHARSET_INFO *cs= res->charset(); + DBUG_ASSERT(cs->mbminlen == 1); + size_t length= cs->cset->snprintf(cs, (char*) res->ptr(), + res->alloced_length(), + "char(%u octets) character set %s", + field_length, + charset()->cs_name.str); + res->length(length); + } + else + Field_string::sql_type(*res); + } + +uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length) +{ + DBUG_PRINT("debug", ("Packing field '%s'", field_name.str)); + return StringPack(field_charset(), field_length).pack(to, from, max_length); +} + + +/** + Unpack a string field from row data. + + This method is used to unpack a string field from a master whose size + of the field is less than that of the slave. Note that there can be a + variety of field types represented with this class. Certain types like + ENUM or SET are processed differently. Hence, the upper byte of the + @c param_data argument contains the result of field->real_type() from + the master. + + @note For information about how the length is packed, see @c + Field_string::save_field_metadata + + @param to Destination of the data + @param from Source of the data + @param param_data Real type (upper) and length (lower) values + + @return New pointer into memory based on from + length of the data +*/ +const uchar * +Field_string::unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) +{ + return StringPack(field_charset(), field_length).unpack(to, from, from_end, + param_data); +} + + +/** + Save the field metadata for string fields. + + Saves the real type in the first byte and the field length in the + second byte of the field metadata array at index of *metadata_ptr and + *(metadata_ptr + 1). + + @note In order to be able to handle lengths exceeding 255 and be + backwards-compatible with pre-5.1.26 servers, an extra two bits of + the length has been added to the metadata in such a way that if + they are set, a new unrecognized type is generated. This will + cause pre-5.1-26 servers to stop due to a field type mismatch, + while new servers will be able to extract the extra bits. If the + length is <256, there will be no difference and both a new and an + old server will be able to handle it. + + @note The extra two bits are added to bits 13 and 14 of the + parameter data (with 1 being the least siginficant bit and 16 the + most significant bit of the word) by xoring the extra length bits + with the real type. Since all allowable types have 0xF as most + significant bits of the metadata word, lengths <256 will not affect + the real type at all, while all other values will result in a + non-existent type in the range 17-244. + + @see Field_string::unpack + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ + +Binlog_type_info_fixed_string::Binlog_type_info_fixed_string(uchar type_code, + uint32 octets, + CHARSET_INFO *cs) + :Binlog_type_info(type_code, 0, 2, cs) +{ + DBUG_ASSERT(octets < 1024); + DBUG_ASSERT((type_code & 0xF0) == 0xF0); + DBUG_PRINT("debug", ("octets: %u, type_code: %u", octets, type_code)); + m_metadata= (type_code ^ ((octets & 0x300) >> 4)) + + (((uint)(octets & 0xFF)) << 8); +} + + +Binlog_type_info Field_string::binlog_type_info() const +{ + DBUG_ASSERT(Field_string::type() == binlog_type()); + return Binlog_type_info_fixed_string(Field_string::binlog_type(), + field_length, charset()); +} + + +uint Field_string::packed_col_length(const uchar *data_ptr, uint length) +{ + return StringPack::packed_col_length(data_ptr, length); +} + + +uint Field_string::max_packed_col_length(uint max_length) +{ + return StringPack::max_packed_col_length(max_length); +} + + +uint Field_string::get_key_image(uchar *buff, uint length, const uchar *ptr_arg, + imagetype type_arg) const +{ + size_t bytes= field_charset()->charpos((char*) ptr_arg, + (char*) ptr_arg + field_length, + length / mbmaxlen()); + memcpy(buff, ptr_arg, bytes); + if (bytes < length) + field_charset()->fill((char*) buff + bytes, + length - bytes, + field_charset()->pad_char); + return (uint)bytes; +} + + +Field *Field_string::make_new_field(MEM_ROOT *root, TABLE *new_table, + bool keep_type) +{ + Field *field; + if (type() != MYSQL_TYPE_VAR_STRING || keep_type) + field= Field::make_new_field(root, new_table, keep_type); + else if ((field= new (root) Field_varstring(field_length, maybe_null(), + &field_name, + new_table->s, charset()))) + { + /* + Old VARCHAR field which should be modified to a VARCHAR on copy + This is done to ensure that ALTER TABLE will convert old VARCHAR fields + to now VARCHAR fields. + */ + field->init_for_make_new_field(new_table, orig_table); + } + return field; +} + + +en_fieldtype Field_string::tmp_engine_column_type(bool use_packed_rows) const +{ + return field_length >= MIN_STRING_LENGTH_TO_PACK_ROWS ? FIELD_SKIP_ENDSPACE : + FIELD_NORMAL; +} + +/**************************************************************************** + VARCHAR type + Data in field->ptr is stored as: + 1 or 2 bytes length-prefix-header (from Field_varstring::length_bytes) + data + + NOTE: + When VARCHAR is stored in a key (for handler::index_read() etc) it's always + stored with a 2 byte prefix. (Just like blob keys). + + Normally length_bytes is calculated as (field_length < 256 : 1 ? 2) + The exception is if there is a prefix key field that is part of a long + VARCHAR, in which case field_length for this may be 1 but the length_bytes + is 2. +****************************************************************************/ + +const uint Field_varstring::MAX_SIZE= UINT_MAX16; + + +const Type_handler *Field_varstring::type_handler() const +{ + /* + This is a temporary solution and will be fixed soon (in 10.9?). + Type_handler_varchar_json will provide its own Field_varstring_json + and Field_varstring_compressed_json + */ + if (Type_handler_json_common::has_json_valid_constraint(this)) + return &type_handler_varchar_json; + return &type_handler_varchar; +} + + +/** + Save the field metadata for varstring fields. + + Saves the field length in the first byte. Note: may consume + 2 bytes. Caller must ensure second byte is contiguous with + first byte (e.g. array index 0,1). + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ +Binlog_type_info Field_varstring::binlog_type_info() const +{ + DBUG_ASSERT(Field_varstring::type() == binlog_type()); + return Binlog_type_info(Field_varstring::type(), field_length, 2, charset()); +} + + +bool Field_varstring::memcpy_field_possible(const Field *from) const +{ + return (Field_str::memcpy_field_possible(from) && + !compression_method() == !from->compression_method() && + length_bytes == ((Field_varstring*) from)->length_bytes && + (table->file && !(table->file->ha_table_flags() & + HA_RECORD_MUST_BE_CLEAN_ON_WRITE))); +} + + +int Field_varstring::store(const char *from,size_t length,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + uint copy_length; + int rc; + + rc= well_formed_copy_with_check((char*) get_data(), field_length, + cs, from, length, + Field_varstring::char_length(), + true, ©_length); + + store_length(copy_length); + + return rc; +} + + +double Field_varstring::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + return Converter_strntod_with_warn(thd, Warn_filter(thd), + Field_varstring::charset(), + (const char *) get_data(), + get_length()).result(); +} + + +longlong Field_varstring::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + return Converter_strntoll_with_warn(thd, Warn_filter(thd), + Field_varstring::charset(), + (const char *) get_data(), + get_length()).result(); +} + + +String *Field_varstring::val_str(String *val_buffer __attribute__((unused)), + String *val_ptr) +{ + DBUG_ASSERT(marked_for_read()); + val_ptr->set((const char*) get_data(), get_length(), field_charset()); + return val_ptr; +} + + +my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + Converter_str2my_decimal_with_warn(thd, Warn_filter(thd), + E_DEC_FATAL_ERROR & ~E_DEC_BAD_NUM, + Field_varstring::charset(), + (const char *) get_data(), + get_length(), decimal_value); + return decimal_value; + +} + + +/* + An optimized version that uses less stack and less temporary + variable initialization than Field_longstr::send() +*/ +bool Field_varstring::send(Protocol *protocol) +{ + return protocol->store((const char *) get_data(), get_length(), + field_charset()); +} + + +#ifdef HAVE_MEM_CHECK + +/* + Mark the unused part of the varstring as defined. + + This function is only used be Unique when calculating statistics. + + The marking is needed as we write the whole tree to disk in case of + overflows. For using or comparing values the undefined value part + is never used. We could also use bzero() here, but it would be + slower in production environments. + This function is tested by main.stat_tables-enospc +*/ + +void Field_varstring::mark_unused_memory_as_defined() +{ + uint used_length __attribute__((unused)) = get_length(); + MEM_MAKE_DEFINED(get_data() + used_length, field_length - used_length); +} +#endif + + +int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + uint a_length, b_length; + int diff; + + if (length_bytes == 1) + { + a_length= (uint) *a_ptr; + b_length= (uint) *b_ptr; + } + else + { + a_length= uint2korr(a_ptr); + b_length= uint2korr(b_ptr); + } + set_if_smaller(a_length, field_length); + set_if_smaller(b_length, field_length); + diff= field_charset()->strnncollsp(a_ptr + length_bytes, a_length, + b_ptr + length_bytes, b_length); + return diff; +} + + +int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, + size_t prefix_char_len) const +{ + /* avoid more expensive strnncollsp_nchars() if possible */ + if (prefix_char_len * field_charset()->mbmaxlen == + table->field[field_index]->field_length) + return Field_varstring::cmp(a_ptr, b_ptr); + + size_t a_length, b_length; + + if (length_bytes == 1) + { + a_length= *a_ptr; + b_length= *b_ptr; + } + else + { + a_length= uint2korr(a_ptr); + b_length= uint2korr(b_ptr); + } + return field_charset()->coll->strnncollsp_nchars(field_charset(), + a_ptr + length_bytes, + a_length, + b_ptr + length_bytes, + b_length, + prefix_char_len, + 0); +} + + +/** + @note + varstring and blob keys are ALWAYS stored with a 2 byte length prefix +*/ + +int Field_varstring::key_cmp(const uchar *key_ptr, uint max_key_length) const +{ + size_t length= length_bytes == 1 ? (uint) *ptr : uint2korr(ptr); + size_t local_char_length= max_key_length / mbmaxlen(); + + local_char_length= field_charset()->charpos(ptr + length_bytes, + ptr + length_bytes + length, + local_char_length); + set_if_smaller(length, local_char_length); + return field_charset()->strnncollsp(ptr + length_bytes, + length, + key_ptr + HA_KEY_BLOB_LENGTH, + uint2korr(key_ptr)); +} + + +/** + Compare to key segments (always 2 byte length prefix). + + @note + This is used only to compare key segments created for index_read(). + (keys are created and compared in key.cc) +*/ + +int Field_varstring::key_cmp(const uchar *a,const uchar *b) const +{ + return field_charset()->strnncollsp(a + HA_KEY_BLOB_LENGTH, uint2korr(a), + b + HA_KEY_BLOB_LENGTH, uint2korr(b)); +} + + +void Field_varstring::sort_string(uchar *to,uint length) +{ + String buf; + + val_str(&buf, &buf); + + if (field_charset() == &my_charset_bin) + { + /* Store length last in high-byte order to sort longer strings first */ + if (length_bytes == 1) + to[length - 1]= buf.length(); + else + mi_int2store(to + length - 2, buf.length()); + length-= length_bytes; + } + +#ifdef DBUG_ASSERT_EXISTS + size_t rc= +#endif + field_charset()->strnxfrm(to, length, + char_length() * field_charset()->strxfrm_multiply, + (const uchar *) buf.ptr(), buf.length(), + MY_STRXFRM_PAD_WITH_SPACE | + MY_STRXFRM_PAD_TO_MAXLEN); + DBUG_ASSERT(rc == length); +} + + +enum ha_base_keytype Field_varstring::key_type() const +{ + enum ha_base_keytype res; + + if (binary()) + res= length_bytes == 1 ? HA_KEYTYPE_VARBINARY1 : HA_KEYTYPE_VARBINARY2; + else + res= length_bytes == 1 ? HA_KEYTYPE_VARTEXT1 : HA_KEYTYPE_VARTEXT2; + return res; +} + + +/* + Compressed columns need one extra byte to store the compression method. + This byte is invisible to the end user, but not for the storage engine. +*/ + +void Field_varstring::sql_type(String &res) const +{ + THD *thd= table->in_use; + CHARSET_INFO *cs=res.charset(); + size_t length; + + length= cs->cset->snprintf(cs,(char*) res.ptr(), + res.alloced_length(), "%s(%u)", + (has_charset() ? "varchar" : "varbinary"), + (uint) char_length()); + res.length(length); + if ((thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) && + has_charset() && (charset()->state & MY_CS_BINSORT)) + res.append(STRING_WITH_LEN(" binary")); +} + +/** + For fields which are associated with character sets their length is provided + in octets and their character set information is also provided as part of + type information. + + @param res String which contains filed type and length. +*/ +void Field_varstring::sql_rpl_type(String *res) const +{ + if (Field_varstring::has_charset()) + { + CHARSET_INFO *cs= res->charset(); + DBUG_ASSERT(cs->mbminlen == 1); + size_t length= cs->cset->snprintf(cs, (char*) res->ptr(), + res->alloced_length(), + "varchar(%u octets) character set %s", + field_length, + charset()->cs_name.str); + res->length(length); + } + else + Field_varstring::sql_type(*res); +} + + +uint32 Field_varstring::data_length() +{ + return length_bytes == 1 ? (uint32) *ptr : uint2korr(ptr); +} + +/* + Functions to create a packed row. + Here the number of length bytes are depending on the given max_length +*/ + +uchar *Field_varstring::pack(uchar *to, const uchar *from, uint max_length) +{ + uint length= length_bytes == 1 ? (uint) *from : uint2korr(from); + set_if_smaller(max_length, field_length); + if (length > max_length) + length=max_length; + + /* Length always stored little-endian */ + *to++= length & 0xFF; + if (max_length > 255) + *to++= (length >> 8) & 0xFF; + + /* Store bytes of string */ + if (length > 0) + memcpy(to, from+length_bytes, length); + return to+length; +} + + +/** + Unpack a varstring field from row data. + + This method is used to unpack a varstring field from a master + whose size of the field is less than that of the slave. + + @note + The string length is always packed little-endian. + + @param to Destination of the data + @param from Source of the data + @param param_data Length bytes from the master's field data + + @return New pointer into memory based on from + length of the data +*/ +const uchar * +Field_varstring::unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) +{ + uint length; + uint l_bytes= (param_data && (param_data < field_length)) ? + (param_data <= 255) ? 1 : 2 : length_bytes; + + if (from + l_bytes > from_end) + return 0; // Error in data + + if (l_bytes == 1) + { + to[0]= *from++; + length= to[0]; + if (length_bytes == 2) + to[1]= 0; + } + else /* l_bytes == 2 */ + { + length= uint2korr(from); + to[0]= *from++; + to[1]= *from++; + } + if (length) + { + if (from + length > from_end || length > field_length) + return 0; // Error in data + memcpy(to+ length_bytes, from, length); + } + return from+length; +} + + +uint Field_varstring::packed_col_length(const uchar *data_ptr, uint length) +{ + if (length > 255) + return uint2korr(data_ptr)+2; + return (uint) *data_ptr + 1; +} + + +uint Field_varstring::max_packed_col_length(uint max_length) +{ + return (max_length > 255 ? 2 : 1)+max_length; +} + +void Field_varstring::val_str_from_ptr(String *val, const uchar *ptr) const +{ + val->set((const char*) get_data(ptr), get_length(ptr), field_charset()); +} + +uint Field_varstring::get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, + imagetype type_arg) const +{ + String val; + val_str_from_ptr(&val, ptr_arg); + + uint local_char_length= val.charpos(length / mbmaxlen()); + if (local_char_length < val.length()) + val.length(local_char_length); + /* Key is always stored with 2 bytes */ + int2store(buff, val.length()); + memcpy(buff + HA_KEY_BLOB_LENGTH, val.ptr(), val.length()); + if (val.length() < length) + { + /* + Must clear this as we do a memcmp in opt_range.cc to detect + identical keys + */ + memset(buff + HA_KEY_BLOB_LENGTH + val.length(), 0, length - val.length()); + } + return HA_KEY_BLOB_LENGTH + val.length(); +} + + +void Field_varstring::set_key_image(const uchar *buff,uint length) +{ + length= uint2korr(buff); // Real length is here + (void) store((const char*) buff + HA_KEY_BLOB_LENGTH, length, field_charset()); +} + + +int Field_varstring::cmp_binary(const uchar *a_ptr, const uchar *b_ptr, + uint32 max_length) const +{ + uint32 a_length,b_length; + + if (length_bytes == 1) + { + a_length= (uint) *a_ptr; + b_length= (uint) *b_ptr; + } + else + { + a_length= uint2korr(a_ptr); + b_length= uint2korr(b_ptr); + } + set_if_smaller(a_length, max_length); + set_if_smaller(b_length, max_length); + if (a_length != b_length) + return 1; + return memcmp(a_ptr+length_bytes, b_ptr+length_bytes, a_length); +} + + +Field *Field_varstring::make_new_field(MEM_ROOT *root, TABLE *new_table, + bool keep_type) +{ + Field_varstring *res= (Field_varstring*) Field::make_new_field(root, + new_table, + keep_type); + if (res) + res->length_bytes= length_bytes; + return res; +} + + +Field *Field_varstring::new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) +{ + Field_varstring *res; + if ((res= (Field_varstring*) Field::new_key_field(root, new_table, + new_ptr, length, + new_null_ptr, new_null_bit))) + { + /* Keys length prefixes are always packed with 2 bytes */ + res->length_bytes= 2; + } + return res; +} + +bool Field_varstring::is_equal(const Column_definition &new_field) const +{ + return new_field.type_handler() == type_handler() && + new_field.length == field_length && + new_field.char_length == char_length() && + !new_field.compression_method() == !compression_method() && + new_field.charset == field_charset(); +} + + +void Field_varstring::hash_not_null(Hasher *hasher) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + uint len= length_bytes == 1 ? (uint) *ptr : uint2korr(ptr); + hasher->add(charset(), ptr + length_bytes, len); +} + + +/** + Compress field + + @param[out] to destination buffer for compressed data + @param[in] to_length size of to + @param[in] from data to compress + @param[in] length from length + @param[in] max_length truncate `from' to this length + @param[out] out_length compessed data length + @param[in] cs from character set + @param[in] nchars copy no more than "nchars" characters + + In worst case (no compression performed) storage requirement is increased by + 1 byte to store header. If it exceeds field length, normal data truncation is + performed. + + Generic compressed header format (1 byte): + + Bits 1-4: method specific bits + Bits 5-8: compression method + + If compression method is 0 then header is immediately followed by + uncompressed data. + + If compression method is zlib: + + Bits 1-3: number of bytes occupied by original data length + Bits 4: true if zlib wrapper not present + Bits 5-8: store 8 (zlib) + + Header is immediately followed by original data length, + followed by compressed data. +*/ + +int Field_longstr::compress(char *to, uint to_length, + const char *from, uint length, + uint max_length, + uint *out_length, + CHARSET_INFO *cs, size_t nchars) +{ + THD *thd= get_thd(); + char *buf; + uint buf_length; + int rc= 0; + + if (String::needs_conversion_on_storage(length, cs, field_charset()) || + max_length < length) + { + set_if_smaller(max_length, static_cast(mbmaxlen()) * length + 1); + if (!(buf= (char*) my_malloc(PSI_INSTRUMENT_ME, max_length, MYF(MY_WME)))) + { + *out_length= 0; + return -1; + } + + rc= well_formed_copy_with_check(buf, max_length, cs, from, length, + nchars, true, &buf_length); + } + else + { + buf= const_cast(from); + buf_length= length; + } + + if (buf_length == 0) + *out_length= 0; + else if (buf_length >= thd->variables.column_compression_threshold && + (*out_length= compression_method()->compress(thd, to, buf, buf_length))) + status_var_increment(thd->status_var.column_compressions); + else + { + /* Store uncompressed */ + to[0]= 0; + if (buf_length < to_length) + memcpy(to + 1, buf, buf_length); + else + { + /* Storing string at blob capacity, e.g. 255 bytes string to TINYBLOB. */ + rc= well_formed_copy_with_check(to + 1, to_length - 1, cs, from, length, + nchars, true, &buf_length); + } + *out_length= buf_length + 1; + } + + if (buf != from) + my_free(buf); + return rc; +} + + +/* + Memory is allocated only when original data was actually compressed. + Otherwise val_ptr points at data located immediately after header. + + Data can be stored uncompressed if data was shorter than threshold + or compressed data was longer than original data. +*/ + +String *Field_longstr::uncompress(String *val_buffer, String *val_ptr, + const uchar *from, uint from_length) const +{ + if (from_length) + { + uchar method= (*from & 0xF0) >> 4; + + /* Uncompressed data */ + if (!method) + { + val_ptr->set((const char*) from + 1, from_length - 1, field_charset()); + return val_ptr; + } + + if (compression_methods[method].uncompress) + { + if (!compression_methods[method].uncompress(val_buffer, from, from_length, + field_length)) + { + val_buffer->set_charset(field_charset()); + status_var_increment(get_thd()->status_var.column_decompressions); + return val_buffer; + } + } + } + + /* + It would be better to return 0 in case of errors, but to take the + safer route, let's return a zero string and let the general + handler catch the error. + */ + val_ptr->set("", 0, field_charset()); + return val_ptr; +} + + +int Field_varstring_compressed::store(const char *from, size_t length, + CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + uint compressed_length; + int rc= compress((char*) get_data(), field_length, from, (uint) length, + Field_varstring_compressed::max_display_length(), + &compressed_length, cs, + Field_varstring_compressed::char_length()); + store_length(compressed_length); + return rc; +} + +void Field_varstring_compressed::val_str_from_ptr(String *val, const uchar *ptr) const +{ + uncompress(val, val, get_data(ptr), get_length(ptr)); +} + + +String *Field_varstring_compressed::val_str(String *val_buffer, String *val_ptr) +{ + DBUG_ASSERT(marked_for_read()); + return uncompress(val_buffer, val_ptr, get_data(), get_length()); +} + + +double Field_varstring_compressed::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + String buf; + val_str(&buf, &buf); + return Converter_strntod_with_warn(thd, Warn_filter(thd), field_charset(), + buf.ptr(), buf.length()).result(); +} + + +longlong Field_varstring_compressed::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + String buf; + val_str(&buf, &buf); + return Converter_strntoll_with_warn(thd, Warn_filter(thd), field_charset(), + buf.ptr(), buf.length()).result(); +} + + +int Field_varstring_compressed::cmp(const uchar *a_ptr, + const uchar *b_ptr) const +{ + String a, b; + uint a_length, b_length; + + if (length_bytes == 1) + { + a_length= (uint) *a_ptr; + b_length= (uint) *b_ptr; + } + else + { + a_length= uint2korr(a_ptr); + b_length= uint2korr(b_ptr); + } + + uncompress(&a, &a, a_ptr + length_bytes, a_length); + uncompress(&b, &b, b_ptr + length_bytes, b_length); + + return sortcmp(&a, &b, field_charset()); +} + + +Binlog_type_info Field_varstring_compressed::binlog_type_info() const +{ + return Binlog_type_info(Field_varstring_compressed::binlog_type(), + field_length, 2, charset()); +} + + +/**************************************************************************** +** blob type +** A blob is saved as a length and a pointer. The length is stored in the +** packlength slot and may be from 1-4. +****************************************************************************/ + +Field_blob::Field_blob(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, uint blob_pack_length, + const DTCollation &collation) + :Field_longstr(ptr_arg, BLOB_PACK_LENGTH_TO_MAX_LENGH(blob_pack_length), + null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, + collation), + packlength(blob_pack_length) +{ + DBUG_ASSERT(blob_pack_length <= 4); // Only pack lengths 1-4 supported currently + flags|= BLOB_FLAG; + share->blob_fields++; + /* TODO: why do not fill table->s->blob_field array here? */ +} + + +void Field_blob::store_length(uchar *i_ptr, uint i_packlength, uint32 i_number) +{ + store_lowendian(i_number, i_ptr, i_packlength); +} + + +uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg) const +{ + return (uint32)read_lowendian(pos, packlength_arg); +} + + +/** + Copy a value from another BLOB field of the same character set. + This method is used by Copy_field, e.g. during ALTER TABLE. +*/ +int Field_blob::copy_value(Field_blob *from) +{ + DBUG_ASSERT(field_charset() == from->charset()); + DBUG_ASSERT(!compression_method() == !from->compression_method()); + int rc= 0; + uint32 length= from->get_length(); + uchar *data= from->get_ptr(); + if (packlength < from->packlength) + { + set_if_smaller(length, Field_blob::max_data_length()); + length= (uint32) Well_formed_prefix(field_charset(), + (const char *) data, length).length(); + rc= report_if_important_data((const char *) data + length, + (const char *) data + from->get_length(), + true); + } + store_length(length); + bmove(ptr + packlength, (uchar*) &data, sizeof(char*)); + return rc; +} + + +int Field_blob::store(const char *from,size_t length,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + size_t copy_length, new_length; + uint copy_len; + char *tmp; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmpstr(buff,sizeof(buff), &my_charset_bin); + int rc; + + if (!length) + { + bzero(ptr,Field_blob::pack_length()); + return 0; + } + + /* + For min/max fields of statistical data 'table' is set to NULL. + It could not be otherwise as this data is shared by many instances + of the same base table. + */ + + if (table && table->blob_storage) // GROUP_CONCAT with ORDER BY | DISTINCT + { + DBUG_ASSERT(!f_is_hex_escape(flags)); + DBUG_ASSERT(field_charset() == cs); + DBUG_ASSERT(length <= max_data_length()); + + new_length= length; + copy_length= table->in_use->variables.group_concat_max_len; + if (new_length > copy_length) + { + new_length= Well_formed_prefix(cs, + from, copy_length, new_length).length(); + table->blob_storage->set_truncated_value(true); + } + if (!(tmp= table->blob_storage->store(from, new_length))) + goto oom_error; + + Field_blob::store_length(new_length); + bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*)); + return 0; + } + + /* + If the 'from' address is in the range of the temporary 'value'- + object we need to copy the content to a different location or it will be + invalidated when the 'value'-object is reallocated to make room for + the new character set. + */ + if (from >= value.ptr() && from <= value.ptr()+value.length()) + { + /* + If content of the 'from'-address is cached in the 'value'-object + it is possible that the content needs a character conversion. + */ + if (!String::needs_conversion_on_storage(length, cs, field_charset())) + { + Field_blob::store_length(length); + bmove(ptr + packlength, &from, sizeof(char*)); + return 0; + } + if (tmpstr.copy(from, length, cs)) + goto oom_error; + from= tmpstr.ptr(); + } + + new_length= MY_MIN(max_data_length(), mbmaxlen() * length); + if (value.alloc(new_length)) + goto oom_error; + tmp= const_cast(value.ptr()); + + if (f_is_hex_escape(flags)) + { + copy_length= my_copy_with_hex_escaping(field_charset(), + tmp, new_length, + from, length); + Field_blob::store_length(copy_length); + bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*)); + return 0; + } + rc= well_formed_copy_with_check((char*) value.ptr(), (uint) new_length, + cs, from, length, + length, true, ©_len); + value.length(copy_len); + Field_blob::store_length(copy_len); + bmove(ptr+packlength,(uchar*) &tmp,sizeof(char*)); + + return rc; + +oom_error: + /* Fatal OOM error */ + bzero(ptr,Field_blob::pack_length()); + return -1; +} + + +void Field_blob::hash_not_null(Hasher *hasher) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + char *blob; + memcpy(&blob, ptr + packlength, sizeof(char*)); + if (blob) + hasher->add(Field_blob::charset(), blob, get_length(ptr)); +} + + +double Field_blob::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + char *blob; + memcpy(&blob, ptr+packlength, sizeof(char*)); + if (!blob) + return 0.0; + THD *thd= get_thd(); + return Converter_strntod_with_warn(thd, Warn_filter(thd), + Field_blob::charset(), + blob, get_length(ptr)).result(); +} + + +longlong Field_blob::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + char *blob; + memcpy(&blob, ptr+packlength, sizeof(char*)); + if (!blob) + return 0; + THD *thd= get_thd(); + return Converter_strntoll_with_warn(thd, Warn_filter(thd), + Field_blob::charset(), + blob, get_length(ptr)).result(); +} + + +String *Field_blob::val_str(String *val_buffer __attribute__((unused)), + String *val_ptr) +{ + DBUG_ASSERT(marked_for_read()); + char *blob; + memcpy(&blob, ptr+packlength, sizeof(char*)); + if (!blob) + val_ptr->set("",0,charset()); // A bit safer than ->length(0) + else + val_ptr->set((const char*) blob,get_length(ptr),charset()); + return val_ptr; +} + + +my_decimal *Field_blob::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(marked_for_read()); + const char *blob; + size_t length; + memcpy(&blob, ptr+packlength, sizeof(const uchar*)); + if (!blob) + { + blob= ""; + length= 0; + } + else + length= get_length(ptr); + + THD *thd= get_thd(); + Converter_str2my_decimal_with_warn(thd, Warn_filter(thd), + E_DEC_FATAL_ERROR & ~E_DEC_BAD_NUM, + Field_blob::charset(), + blob, length, decimal_value); + return decimal_value; +} + + +int Field_blob::cmp(const uchar *a,uint32 a_length, const uchar *b, + uint32 b_length) const +{ + return field_charset()->strnncollsp(a, a_length, b, b_length); +} + + +int Field_blob::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + uchar *blob1,*blob2; + memcpy(&blob1, a_ptr+packlength, sizeof(char*)); + memcpy(&blob2, b_ptr+packlength, sizeof(char*)); + size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr); + return cmp(blob1, (uint32)a_len, blob2, (uint32)b_len); +} + + +int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, + size_t prefix_char_len) const +{ + uchar *blob1,*blob2; + memcpy(&blob1, a_ptr+packlength, sizeof(char*)); + memcpy(&blob2, b_ptr+packlength, sizeof(char*)); + size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr); + return field_charset()->coll->strnncollsp_nchars(field_charset(), + blob1, a_len, + blob2, b_len, + prefix_char_len, + 0); +} + + +int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr, + uint32 max_length) const +{ + char *a,*b; + uint diff; + uint32 a_length,b_length; + memcpy(&a, a_ptr+packlength, sizeof(char*)); + memcpy(&b, b_ptr+packlength, sizeof(char*)); + a_length=get_length(a_ptr); + if (a_length > max_length) + a_length=max_length; + b_length=get_length(b_ptr); + if (b_length > max_length) + b_length=max_length; + if (uint32 len= MY_MIN(a_length,b_length)) + diff= memcmp(a,b,len); + else + diff= 0; + return diff ? diff : (int) (a_length - b_length); +} + + +/* The following is used only when comparing a key */ + +uint Field_blob::get_key_image_itRAW(const uchar *ptr_arg, uchar *buff, + uint length) const +{ + size_t blob_length= get_length(ptr_arg); + const uchar *blob= get_ptr(ptr_arg); + size_t local_char_length= length / mbmaxlen(); + local_char_length= field_charset()->charpos(blob, blob + blob_length, + local_char_length); + set_if_smaller(blob_length, local_char_length); + + if (length > blob_length) + { + /* + Must clear this as we do a memcmp in opt_range.cc to detect + identical keys + */ + bzero(buff+HA_KEY_BLOB_LENGTH+blob_length, (length-blob_length)); + length=(uint) blob_length; + } + int2store(buff,length); + if (length) + memcpy(buff+HA_KEY_BLOB_LENGTH, blob, length); + return HA_KEY_BLOB_LENGTH+length; +} + + +void Field_blob::set_key_image(const uchar *buff,uint length) +{ + length= uint2korr(buff); + (void) Field_blob::store((const char*) buff+HA_KEY_BLOB_LENGTH, length, + field_charset()); +} + + +int Field_blob::key_cmp(const uchar *key_ptr, uint max_key_length) const +{ + uchar *blob1; + size_t blob_length=get_length(ptr); + memcpy(&blob1, ptr+packlength, sizeof(char*)); + CHARSET_INFO *cs= charset(); + size_t local_char_length= max_key_length / cs->mbmaxlen; + local_char_length= cs->charpos(blob1, blob1+blob_length, + local_char_length); + set_if_smaller(blob_length, local_char_length); + return Field_blob::cmp(blob1, (uint32)blob_length, + key_ptr+HA_KEY_BLOB_LENGTH, + uint2korr(key_ptr)); +} + +int Field_blob::key_cmp(const uchar *a,const uchar *b) const +{ + return Field_blob::cmp(a+HA_KEY_BLOB_LENGTH, uint2korr(a), + b+HA_KEY_BLOB_LENGTH, uint2korr(b)); +} + + +Field *Field_blob::new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) +{ + Field_varstring *res= new (root) Field_varstring(new_ptr, length, 2, + new_null_ptr, + new_null_bit, Field::NONE, + &field_name, + table->s, charset()); + res->init(new_table); + return res; +} + + +/** + Save the field metadata for blob fields. + + Saves the pack length in the first byte of the field metadata array + at index of *metadata_ptr. + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ +Binlog_type_info Field_blob::binlog_type_info() const +{ + DBUG_ASSERT(Field_blob::type() == binlog_type()); + return Binlog_type_info(Field_blob::type(), pack_length_no_ptr(), 1, + charset()); +} + + +uint32 Field_blob::sort_length() const +{ + return packlength == 4 ? + UINT_MAX32 : + (uint32) field_length + sort_suffix_length(); +} + + +uint32 Field_blob::sort_suffix_length() const +{ + return field_charset() == &my_charset_bin ? packlength : 0; +} + + +void Field_blob::sort_string(uchar *to,uint length) +{ + String buf; + + val_str(&buf, &buf); + if (!buf.length() && field_charset()->pad_char == 0) + bzero(to,length); + else + { + if (field_charset() == &my_charset_bin) + { + /* + Store length of blob last in blob to shorter blobs before longer blobs + */ + length-= packlength; + store_bigendian(buf.length(), to + length, packlength); + } + +#ifdef DBUG_ASSERT_EXISTS + size_t rc= +#endif + field_charset()->strnxfrm(to, length, length, + (const uchar *) buf.ptr(), buf.length(), + MY_STRXFRM_PAD_WITH_SPACE | + MY_STRXFRM_PAD_TO_MAXLEN); + DBUG_ASSERT(rc == length); + } +} + + +/* + Return the data type handler, according to packlength. + Implemented in field.cc rather than in field.h + to avoid exporting type_handler_xxx with MYSQL_PLUGIN_IMPORT. +*/ +const Type_handler *Field_blob::type_handler() const +{ + /* + This is a temporary solution and will be fixed soon (in 10.9?). + Type_handler_*blob_json will provide its own Field_blob_json + and Field_blob_compressed_json. + */ + if (Type_handler_json_common::has_json_valid_constraint(this)) + return Type_handler_json_common:: + json_blob_type_handler_by_length_bytes(packlength); + + switch (packlength) { + case 1: return &type_handler_tiny_blob; + case 2: return &type_handler_blob; + case 3: return &type_handler_medium_blob; + } + return &type_handler_long_blob; +} + + +void Field_blob::sql_type(String &res) const +{ + const char *str; + uint length; + switch (packlength) { + default: str="tiny"; length=4; break; + case 2: str=""; length=0; break; + case 3: str="medium"; length= 6; break; + case 4: str="long"; length=4; break; + } + res.set_ascii(str,length); + if (charset() == &my_charset_bin) + { + res.append(STRING_WITH_LEN("blob")); + if (packlength == 2 && (get_thd()->variables.sql_mode & MODE_ORACLE)) + res.append(STRING_WITH_LEN("(65535)")); + } + else + { + res.append(STRING_WITH_LEN("text")); + } +} + +uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length) +{ + uint32 length=get_length(from, packlength); // Length of from string + + /* + Store max length, which will occupy packlength bytes. If the max + length given is smaller than the actual length of the blob, we + just store the initial bytes of the blob. + */ + store_length(to, packlength, MY_MIN(length, max_length)); + + /* + Store the actual blob data, which will occupy 'length' bytes. + */ + if (length > 0) + { + from= get_ptr(from); + memcpy(to+packlength, from,length); + } + return to+packlength+length; +} + + +/** + Unpack a blob field from row data. + + This method is used to unpack a blob field from a master whose size of + the field is less than that of the slave. Note: This method is included + to satisfy inheritance rules, but is not needed for blob fields. It + simply is used as a pass-through to the original unpack() method for + blob fields. + + @param to Destination of the data + @param from Source of the data + @param param_data @c TRUE if base types should be stored in little- + endian format, @c FALSE if native format should + be used. + + @return New pointer into memory based on from + length of the data +*/ + +const uchar *Field_blob::unpack(uchar *to, const uchar *from, + const uchar *from_end, uint param_data) +{ + DBUG_ENTER("Field_blob::unpack"); + DBUG_PRINT("enter", ("to: %p; from: %p; param_data: %u", + to, from, param_data)); + uint const master_packlength= + param_data > 0 ? param_data & 0xFF : packlength; + if (from + master_packlength > from_end) + DBUG_RETURN(0); // Error in data + uint32 const length= get_length(from, master_packlength); + DBUG_DUMP("packed", from, length + master_packlength); + if (from + master_packlength + length > from_end) + DBUG_RETURN(0); + set_ptr(length, const_cast (from) + master_packlength); + DBUG_RETURN(from + master_packlength + length); +} + + +uint Field_blob::packed_col_length(const uchar *data_ptr, uint length) +{ + if (length > 255) + return uint2korr(data_ptr)+2; + return (uint) *data_ptr + 1; +} + + +uint Field_blob::max_packed_col_length(uint max_length) +{ + return (max_length > 255 ? 2 : 1)+max_length; +} + + +/* + Blob fields are regarded equal if they have same character set, + same blob store length and if either both are compressed or both are + uncompressed. + The logic for compression is that we don't have to uncompress and compress + again an already compressed field just because compression method changes. +*/ + +bool Field_blob::is_equal(const Column_definition &new_field) const +{ + return new_field.type_handler() == type_handler() && + !new_field.compression_method() == !compression_method() && + new_field.pack_length == pack_length() && + new_field.charset == field_charset(); +} + + +void Field_blob::make_send_field(Send_field *field) +{ + /* + Historically all BLOB variant Fields are displayed as MYSQL_TYPE_BLOB + in the result set metadata. Note, Item can work differently and + display the exact BLOB type, such as + MYSQL_TYPE_{TINY_BLOB|BLOB|MEDIUM_BLOB|LONG_BLOB}. + QQ: this should be made consistent eventually. + */ + Field_longstr::make_send_field(field); + field->set_handler(&type_handler_blob); +} + + +bool Field_blob::make_empty_rec_store_default_value(THD *thd, Item *item) +{ + DBUG_ASSERT(flags & BLOB_FLAG); + int res= item->save_in_field(this, true); + DBUG_ASSERT(res != 3); // Field_blob never returns 3 + if (res) + return true; // E.g. truncation happened + reset(); // Clear the pointer to a String, it should not be written to frm + return false; +} + + +int Field_blob_compressed::store(const char *from, size_t length, + CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + uint compressed_length; + uint max_length= max_data_length(); + uint to_length= (uint) MY_MIN(max_length, mbmaxlen() * length + 1); + String tmp(from, length, cs); + int rc; + + if (from >= value.ptr() && from <= value.end() && tmp.copy(from, length, cs)) + goto oom; + + if (value.alloc(to_length)) + goto oom; + + rc= compress((char*) value.ptr(), to_length, tmp.ptr(), (uint) length, + max_length, &compressed_length, cs, (uint) length); + set_ptr(compressed_length, (uchar*) value.ptr()); + return rc; + +oom: + set_ptr((uint32) 0, NULL); + return -1; +} + + +String *Field_blob_compressed::val_str(String *val_buffer, String *val_ptr) +{ + DBUG_ASSERT(marked_for_read()); + return uncompress(val_buffer, val_ptr, get_ptr(), get_length()); +} + + +double Field_blob_compressed::val_real(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + String buf; + val_str(&buf, &buf); + return Converter_strntod_with_warn(thd, Warn_filter(thd), field_charset(), + buf.ptr(), buf.length()).result(); +} + + +longlong Field_blob_compressed::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + THD *thd= get_thd(); + String buf; + val_str(&buf, &buf); + return Converter_strntoll_with_warn(thd, Warn_filter(thd), field_charset(), + buf.ptr(), buf.length()).result(); +} + +Binlog_type_info Field_blob_compressed::binlog_type_info() const +{ + return Binlog_type_info(Field_blob_compressed::binlog_type(), + pack_length_no_ptr(), 1, charset()); +} + +/**************************************************************************** +** enum type. +** This is a string which only can have a selection of different values. +** If one uses this string in a number context one gets the type number. +****************************************************************************/ + +sql_mode_t Field_enum::can_handle_sql_mode_dependency_on_store() const +{ + return MODE_PAD_CHAR_TO_FULL_LENGTH; +} + + +enum ha_base_keytype Field_enum::key_type() const +{ + switch (packlength) { + default: return HA_KEYTYPE_BINARY; + case 2: return HA_KEYTYPE_USHORT_INT; + case 3: return HA_KEYTYPE_UINT24; + case 4: return HA_KEYTYPE_ULONG_INT; + case 8: return HA_KEYTYPE_ULONGLONG; + } +} + +void Field_enum::store_type(ulonglong value) +{ + store_lowendian(value, ptr, packlength); +} + + +/** + @note + Storing a empty string in a enum field gives a warning + (if there isn't a empty value in the enum) +*/ + +int Field_enum::store(const char *from,size_t length,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int err= 0; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmpstr(buff,sizeof(buff), &my_charset_bin); + + /* Convert character set if necessary */ + if (String::needs_conversion_on_storage(length, cs, field_charset())) + { + uint dummy_errors; + tmpstr.copy(from, length, cs, field_charset(), &dummy_errors); + from= tmpstr.ptr(); + length= tmpstr.length(); + } + + /* Remove end space */ + length= (uint) field_charset()->lengthsp(from, length); + uint tmp=find_type2(typelib, from, length, field_charset()); + if (!tmp) + { + if (length < 6) // Can't be more than 99999 enums + { + /* This is for reading numbers with LOAD DATA INFILE */ + char *end; + tmp=(uint) cs->strntoul(from,length,10,&end,&err); + if (err || end != from+length || tmp > typelib->count) + { + tmp=0; + set_warning(WARN_DATA_TRUNCATED, 1); + err= 1; + } + if ((get_thd()->count_cuted_fields <= CHECK_FIELD_EXPRESSION) && !length) + err= 0; + } + else + { + set_warning(WARN_DATA_TRUNCATED, 1); + err= 1; + } + } + store_type((ulonglong) tmp); + return err; +} + + +int Field_enum::store(double nr) +{ + return Field_enum::store((longlong) nr, FALSE); +} + + +int Field_enum::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + if ((ulonglong) nr > typelib->count || nr == 0) + { + set_warning(WARN_DATA_TRUNCATED, 1); + if (nr != 0 || get_thd()->count_cuted_fields > CHECK_FIELD_EXPRESSION) + { + nr= 0; + error= 1; + } + } + store_type((ulonglong) (uint) nr); + return error; +} + + +double Field_enum::val_real(void) +{ + return (double) Field_enum::val_int(); +} + + +longlong Field_enum::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + return val_int(ptr); +} + +longlong Field_enum::val_int(const uchar *real_ptr) const +{ + return read_lowendian(real_ptr, packlength); +} + +/** + Save the field metadata for enum fields. + + Saves the real type in the first byte and the pack length in the + second byte of the field metadata array at index of *metadata_ptr and + *(metadata_ptr + 1). + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr +*/ +Binlog_type_info Field_enum::binlog_type_info() const +{ + DBUG_ASSERT(Field_enum::type() == binlog_type()); + return Binlog_type_info(Field_enum::type(), real_type() + (pack_length() << 8), + 2, charset(), (TYPELIB *)get_typelib(), NULL); +} + + +String *Field_enum::val_str(String *val_buffer __attribute__((unused)), + String *val_ptr) +{ + uint tmp=(uint) Field_enum::val_int(); + if (!tmp || tmp > typelib->count) + val_ptr->set("", 0, field_charset()); + else + val_ptr->set((const char*) typelib->type_names[tmp-1], + typelib->type_lengths[tmp-1], + field_charset()); + return val_ptr; +} + +int Field_enum::cmp(const uchar *a_ptr, const uchar *b_ptr) const +{ + ulonglong a=Field_enum::val_int(a_ptr); + ulonglong b=Field_enum::val_int(b_ptr); + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +void Field_enum::sort_string(uchar *to,uint length __attribute__((unused))) +{ + ulonglong value=Field_enum::val_int(); + to+=packlength-1; + for (uint i=0 ; i < packlength ; i++) + { + *to-- = (uchar) (value & 255); + value>>=8; + } +} + + +void Field_enum::sql_type(String &res) const +{ + char buffer[255]; + String enum_item(buffer, sizeof(buffer), res.charset()); + + res.length(0); + res.append(STRING_WITH_LEN("enum(")); + + bool flag=0; + uint *len= typelib->type_lengths; + for (const char **pos= typelib->type_names; *pos; pos++, len++) + { + uint dummy_errors; + if (flag) + res.append(','); + /* convert to res.charset() == utf8, then quote */ + enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); + append_unescaped(&res, enum_item.ptr(), enum_item.length()); + flag= 1; + } + res.append(')'); +} + + +Field *Field_enum::make_new_field(MEM_ROOT *root, TABLE *new_table, + bool keep_type) +{ + Field_enum *res= (Field_enum*) Field::make_new_field(root, new_table, + keep_type); + if (res) + res->typelib= copy_typelib(root, typelib); + return res; +} + + +/* + set type. + This is a string which can have a collection of different values. + Each string value is separated with a ','. + For example "One,two,five" + If one uses this string in a number context one gets the bits as a longlong + number. +*/ + + +int Field_set::store(const char *from,size_t length,CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + bool got_warning= 0; + int err= 0; + char *not_used; + uint not_used2; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmpstr(buff,sizeof(buff), &my_charset_bin); + + /* Convert character set if necessary */ + if (String::needs_conversion_on_storage(length, cs, field_charset())) + { + uint dummy_errors; + tmpstr.copy(from, length, cs, field_charset(), &dummy_errors); + from= tmpstr.ptr(); + length= tmpstr.length(); + } + ulonglong tmp= find_set(typelib, from, length, field_charset(), + ¬_used, ¬_used2, &got_warning); + if (!tmp && length && length < 22) + { + /* This is for reading numbers with LOAD DATA INFILE */ + char *end; + tmp= cs->strntoull(from, length, 10, &end, &err); + if (err || end != from + length) + { + set_warning(WARN_DATA_TRUNCATED, 1); + store_type(0); + return 1; + } + return Field_set::store((longlong) tmp, true/*unsigned*/); + } + else if (got_warning) + set_warning(WARN_DATA_TRUNCATED, 1); + store_type(tmp); + return err; +} + + +int Field_set::store(longlong nr, bool unsigned_val) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int error= 0; + ulonglong max_nr; + + if (sizeof(ulonglong)*8 <= typelib->count) + max_nr= ULONGLONG_MAX; + else + max_nr= (1ULL << typelib->count) - 1; + + if ((ulonglong) nr > max_nr) + { + nr&= max_nr; + set_warning(WARN_DATA_TRUNCATED, 1); + error=1; + } + store_type((ulonglong) nr); + return error; +} + + +String *Field_set::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + ulonglong tmp=(ulonglong) Field_enum::val_int(); + uint bitnr=0; + + val_buffer->set_charset(field_charset()); + val_buffer->length(0); + + while (tmp && bitnr < (uint) typelib->count) + { + if (tmp & 1) + { + if (val_buffer->length()) + val_buffer->append(&field_separator, 1, &my_charset_latin1); + String str(typelib->type_names[bitnr], typelib->type_lengths[bitnr], + field_charset()); + val_buffer->append(str); + } + tmp>>=1; + bitnr++; + } + return val_buffer; +} + + +void Field_set::sql_type(String &res) const +{ + char buffer[255]; + String set_item(buffer, sizeof(buffer), res.charset()); + + res.length(0); + res.append(STRING_WITH_LEN("set(")); + + bool flag=0; + uint *len= typelib->type_lengths; + for (const char **pos= typelib->type_names; *pos; pos++, len++) + { + uint dummy_errors; + if (flag) + res.append(','); + /* convert to res.charset() == utf8, then quote */ + set_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); + append_unescaped(&res, set_item.ptr(), set_item.length()); + flag= 1; + } + res.append(')'); +} + +Binlog_type_info Field_set::binlog_type_info() const +{ + DBUG_ASSERT(Field_set::type() == binlog_type()); + return Binlog_type_info(Field_set::type(), real_type() + + (pack_length() << 8), 2, charset(), NULL, (TYPELIB *)get_typelib()); +} + +/** + @retval + 1 if the fields are equally defined + @retval + 0 if the fields are unequally defined +*/ + +bool Field::eq_def(const Field *field) const +{ + if (real_type() != field->real_type() || charset() != field->charset() || + pack_length() != field->pack_length()) + return 0; + return 1; +} + + +/** + Compare the first t1::count type names. + + @return TRUE if the type names of t1 match those of t2. FALSE otherwise. +*/ + +static bool compare_type_names(CHARSET_INFO *charset, const TYPELIB *t1, + const TYPELIB *t2) +{ + for (uint i= 0; i < t1->count; i++) + if (charset->strnncoll(t1->type_names[i], t1->type_lengths[i], + t2->type_names[i], t2->type_lengths[i])) + return FALSE; + return TRUE; +} + +/** + @return + returns 1 if the fields are equally defined +*/ + +bool Field_enum::eq_def(const Field *field) const +{ + const TYPELIB *values; + + if (!Field::eq_def(field)) + return FALSE; + + values= ((Field_enum*) field)->typelib; + + /* Definition must be strictly equal. */ + if (typelib->count != values->count) + return FALSE; + + return compare_type_names(field_charset(), typelib, values); +} + + +/** + Check whether two fields can be considered 'equal' for table + alteration purposes. Fields are equal if they retain the same + pack length and if new members are added to the end of the list. + + @return true if fields are compatible. + false otherwise. +*/ + +bool Field_enum::is_equal(const Column_definition &new_field) const +{ + const TYPELIB *values= new_field.interval; + + /* + The fields are compatible if they have the same flags, + type, charset and have the same underlying length. + */ + if (new_field.type_handler() != type_handler() || + new_field.charset != field_charset() || + new_field.pack_length != pack_length()) + return false; + + /* + Changing the definition of an ENUM or SET column by adding a new + enumeration or set members to the end of the list of valid member + values only alters table metadata and not table data. + */ + if (typelib->count > values->count) + return false; + + /* Check whether there are modification before the end. */ + if (! compare_type_names(field_charset(), typelib, new_field.interval)) + return false; + + return true; +} + + +uchar *Field_enum::pack(uchar *to, const uchar *from, uint max_length) +{ + DBUG_ENTER("Field_enum::pack"); + DBUG_PRINT("debug", ("packlength: %d", packlength)); + DBUG_DUMP("from", from, packlength); + DBUG_RETURN(pack_int(to, from, packlength)); +} + +const uchar *Field_enum::unpack(uchar *to, const uchar *from, + const uchar *from_end, uint param_data) +{ + DBUG_ENTER("Field_enum::unpack"); + DBUG_PRINT("debug", ("packlength: %d", packlength)); + DBUG_DUMP("from", from, packlength); + DBUG_RETURN(unpack_int(to, from, from_end, packlength)); +} + + +/** + @return + returns 1 if the fields are equally defined +*/ +bool Field_num::eq_def(const Field *field) const +{ + if (!Field::eq_def(field)) + return 0; + Field_num *from_num= (Field_num*) field; + + if (unsigned_flag != from_num->unsigned_flag || + (zerofill && !from_num->zerofill && !zero_pack()) || + dec != from_num->dec) + return 0; + return 1; +} + + +/** + Check whether two numeric fields can be considered 'equal' for table + alteration purposes. Fields are equal if they are of the same type + and retain the same pack length. +*/ + +bool Field_num::is_equal(const Column_definition &new_field) const +{ + if (((new_field.flags & UNSIGNED_FLAG) != (flags & UNSIGNED_FLAG)) || + ((new_field.flags & AUTO_INCREMENT_FLAG) > (flags & AUTO_INCREMENT_FLAG))) + return false; + + const Type_handler *th= type_handler(), *new_th = new_field.type_handler(); + + if (th == new_th && new_field.pack_length == pack_length()) + return true; + /* FIXME: Test and consider returning true for the following: + TINYINT UNSIGNED to BIT(8) + SMALLINT UNSIGNED to BIT(16) + MEDIUMINT UNSIGNED to BIT(24) + INT UNSIGNED to BIT(32) + BIGINT UNSIGNED to BIT(64) + + BIT(1..7) to TINYINT, or BIT(1..8) to TINYINT UNSIGNED + BIT(9..15) to SMALLINT, or BIT(9..16) to SMALLINT UNSIGNED + BIT(17..23) to MEDIUMINT, or BIT(17..24) to MEDIUMINT UNSIGNED + BIT(25..31) to INT, or BIT(25..32) to INT UNSIGNED + BIT(57..63) to BIGINT, or BIT(57..64) to BIGINT UNSIGNED + + Note: InnoDB stores integers in big-endian format, and BIT appears + to use big-endian format. For storage engines that use little-endian + format for integers, we can only return true for the TINYINT + conversion. */ + + return false; +} + + +Data_type_compatibility +Field_enum::can_optimize_range_or_keypart_ref(const Item_bool_func *cond, + const Item *item) const +{ + switch (item->cmp_type()) + { + case TIME_RESULT: + return Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; + case INT_RESULT: + case DECIMAL_RESULT: + case REAL_RESULT: + return Data_type_compatibility::OK; + case STRING_RESULT: + return charset() == cond->compare_collation() ? + Data_type_compatibility::OK : + Data_type_compatibility::INCOMPATIBLE_COLLATION; + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; +} + + +/* + Bit field. + + We store the first 0 - 6 uneven bits among the null bits + at the start of the record. The rest bytes are stored in + the record itself. + + For example: + + CREATE TABLE t1 (a int, b bit(17), c bit(21) not null, d bit(8)); + We would store data as follows in the record: + + Byte Bit + 1 7 - reserve for delete + 6 - null bit for 'a' + 5 - null bit for 'b' + 4 - first (high) bit of 'b' + 3 - first (high) bit of 'c' + 2 - second bit of 'c' + 1 - third bit of 'c' + 0 - forth bit of 'c' + 2 7 - firth bit of 'c' + 6 - null bit for 'd' + 3 - 6 four bytes for 'a' + 7 - 8 two bytes for 'b' + 9 - 10 two bytes for 'c' + 11 one byte for 'd' +*/ + +Field_bit::Field_bit(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + : Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg), + bit_ptr(bit_ptr_arg), bit_ofs(bit_ofs_arg), bit_len(len_arg & 7), + bytes_in_rec(len_arg / 8) +{ + DBUG_ENTER("Field_bit::Field_bit"); + DBUG_PRINT("enter", ("ptr_arg: %p, null_ptr_arg: %p, len_arg: %u, bit_len: %u, bytes_in_rec: %u", + ptr_arg, null_ptr_arg, len_arg, bit_len, bytes_in_rec)); + flags|= UNSIGNED_FLAG; + /* + Ensure that Field::eq() can distinguish between two different bit fields. + (two bit fields that are not null, may have same ptr and null_ptr) + */ + if (!null_ptr_arg) + null_bit= bit_ofs_arg; + DBUG_VOID_RETURN; +} + + +const DTCollation & Field_bit::dtcollation() const +{ + static DTCollation tmp(&my_charset_bin, + DERIVATION_IMPLICIT, MY_REPERTOIRE_UNICODE30); + return tmp; +} + + +/* + This method always calculates hash over 8 bytes. + This is different from how the HEAP engine calculate hash: + HEAP takes into account the actual octet size, so say for BIT(18) + it calculates hash over three bytes only: + - the incomplete byte with bits 16..17 + - the two full bytes with bits 0..15 + See hp_rec_hashnr(), hp_hashnr() for details. + + The HEAP way is more efficient, especially for short lengths. + Let's consider fixing Field_bit eventually to do it in the HEAP way, + with proper measures to upgrade partitioned tables easy. +*/ +void Field_bit::hash_not_null(Hasher *hasher) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + longlong value= Field_bit::val_int(); + uchar tmp[8]; + mi_int8store(tmp,value); + hasher->add(&my_charset_bin, tmp, 8); +} + + +size_t +Field_bit::do_last_null_byte() const +{ + /* + Code elsewhere is assuming that bytes are 8 bits, so I'm using + that value instead of the correct one: CHAR_BIT. + + REFACTOR SUGGESTION (Matz): Change to use the correct number of + bits. On systems with CHAR_BIT > 8 (not very common), the storage + will lose the extra bits. + */ + DBUG_PRINT("test", ("bit_ofs: %d, bit_len: %d bit_ptr: %p", + bit_ofs, bit_len, bit_ptr)); + uchar *result; + if (bit_len == 0) + result= null_ptr; + else if (bit_ofs + bit_len > 8) + result= bit_ptr + 1; + else + result= bit_ptr; + + if (result) + return (size_t) (result - table->record[0]) + 1; + return LAST_NULL_BYTE_UNDEF; +} + + +Field *Field_bit::new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) +{ + Field_bit *res; + if ((res= (Field_bit*) Field::new_key_field(root, new_table, new_ptr, length, + new_null_ptr, new_null_bit))) + { + /* Move bits normally stored in null_pointer to new_ptr */ + res->bit_ptr= new_ptr; + res->bit_ofs= 0; + if (bit_len) + res->ptr++; // Store rest of data here + } + return res; +} + + +bool Field_bit::is_equal(const Column_definition &new_field) const +{ + return new_field.type_handler() == type_handler() && + new_field.length == max_display_length(); +} + + +int Field_bit::store(const char *from, size_t length, CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int delta; + + for (; length && !*from; from++, length--) // skip left 0's + ; + delta= (int)(bytes_in_rec - length); + + if (delta < -1 || + (delta == -1 && (uchar) *from > ((1 << bit_len) - 1)) || + (!bit_len && delta < 0)) + { + set_rec_bits((1 << bit_len) - 1, bit_ptr, bit_ofs, bit_len); + memset(ptr, 0xff, bytes_in_rec); + if (get_thd()->really_abort_on_warning()) + set_warning(ER_DATA_TOO_LONG, 1); + else + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; + } + /* delta is >= -1 here */ + if (delta > 0) + { + if (bit_len) + clr_rec_bits(bit_ptr, bit_ofs, bit_len); + bzero(ptr, delta); + memcpy(ptr + delta, from, length); + } + else if (delta == 0) + { + if (bit_len) + clr_rec_bits(bit_ptr, bit_ofs, bit_len); + memcpy(ptr, from, length); + } + else + { + if (bit_len) + { + set_rec_bits((uchar) *from, bit_ptr, bit_ofs, bit_len); + from++; + } + memcpy(ptr, from, bytes_in_rec); + } + return 0; +} + + +int Field_bit::store(double nr) +{ + return Field_bit::store((longlong) nr, FALSE); +} + + +int Field_bit::store(longlong nr, bool unsigned_val) +{ + char buf[8]; + + mi_int8store(buf, nr); + return store(buf, 8, NULL); +} + + +int Field_bit::store_decimal(const my_decimal *val) +{ + int err= 0; + longlong i= convert_decimal2longlong(val, 1, &err); + return MY_TEST(err | store(i, TRUE)); +} + + +double Field_bit::val_real(void) +{ + return (double) Field_bit::val_int(); +} + + +longlong Field_bit::val_int(void) +{ + DBUG_ASSERT(marked_for_read()); + ulonglong bits= 0; + if (bit_len) + { + bits= get_rec_bits(bit_ptr, bit_ofs, bit_len); + bits<<= (bytes_in_rec * 8); + } + + switch (bytes_in_rec) { + case 0: return bits; + case 1: return bits | (ulonglong) ptr[0]; + case 2: return bits | mi_uint2korr(ptr); + case 3: return bits | mi_uint3korr(ptr); + case 4: return bits | mi_uint4korr(ptr); + case 5: return bits | mi_uint5korr(ptr); + case 6: return bits | mi_uint6korr(ptr); + case 7: return bits | mi_uint7korr(ptr); + default: return mi_uint8korr(ptr + bytes_in_rec - sizeof(longlong)); + } +} + + +String *Field_bit::val_str(String *val_buffer, + String *val_ptr __attribute__((unused))) +{ + DBUG_ASSERT(marked_for_read()); + char buff[sizeof(longlong)]; + uint length= MY_MIN(pack_length(), sizeof(longlong)); + ulonglong bits= val_int(); + mi_int8store(buff,bits); + + val_buffer->alloc(length); + memcpy((char *) val_buffer->ptr(), buff+8-length, length); + val_buffer->length(length); + val_buffer->set_charset(&my_charset_bin); + return val_buffer; +} + + +my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value) +{ + DBUG_ASSERT(marked_for_read()); + int2my_decimal(E_DEC_FATAL_ERROR, val_int(), 1, deciaml_value); + return deciaml_value; +} + + +/* + Compare two bit fields using pointers within the record. + SYNOPSIS + cmp_max() + a Pointer to field->ptr in first record + b Pointer to field->ptr in second record + max_len Maximum length used in index + DESCRIPTION + This method is used from key_rec_cmp used by merge sorts used + by partitioned index read and later other similar places. + The a and b pointer must be pointers to the field in a record + (not the table->record[0] necessarily) +*/ +int Field_bit::cmp_prefix(const uchar *a, const uchar *b, + size_t prefix_char_len) const +{ + my_ptrdiff_t a_diff= a - ptr; + my_ptrdiff_t b_diff= b - ptr; + if (bit_len) + { + int flag; + uchar bits_a= get_rec_bits(bit_ptr+a_diff, bit_ofs, bit_len); + uchar bits_b= get_rec_bits(bit_ptr+b_diff, bit_ofs, bit_len); + if ((flag= (int) (bits_a - bits_b))) + return flag; + } + if (!bytes_in_rec) + return 0; + return memcmp(a, b, bytes_in_rec); +} + + +int Field_bit::key_cmp(const uchar *str, uint) const +{ + if (bit_len) + { + int flag; + uchar bits= get_rec_bits(bit_ptr, bit_ofs, bit_len); + if ((flag= (int) (bits - *str))) + return flag; + str++; + } + return memcmp(ptr, str, bytes_in_rec); +} + + +int Field_bit::cmp_offset(my_ptrdiff_t row_offset) +{ + if (bit_len) + { + int flag; + uchar bits_a= get_rec_bits(bit_ptr, bit_ofs, bit_len); + uchar bits_b= get_rec_bits(bit_ptr + row_offset, bit_ofs, bit_len); + if ((flag= (int) (bits_a - bits_b))) + return flag; + } + return memcmp(ptr, ptr + row_offset, bytes_in_rec); +} + + +uint Field_bit::get_key_image(uchar *buff, uint length, const uchar *ptr_arg, imagetype type_arg) const +{ + if (bit_len) + { + const uchar *bit_ptr_for_arg= ptr_arg + (bit_ptr - ptr); + uchar bits= get_rec_bits(bit_ptr_for_arg, bit_ofs, bit_len); + *buff++= bits; + length--; + } + uint tmp_data_length = MY_MIN(length, bytes_in_rec); + memcpy(buff, ptr, tmp_data_length); + return tmp_data_length + 1; +} + + +/** + Returns the number of bytes field uses in row-based replication + row packed size. + + This method is used in row-based replication to determine the number + of bytes that the field consumes in the row record format. This is + used to skip fields in the master that do not exist on the slave. + + @param field_metadata Encoded size in field metadata + + @returns The size of the field based on the field metadata. +*/ +uint Field_bit::pack_length_from_metadata(uint field_metadata) const +{ + uint const from_len= (field_metadata >> 8U) & 0x00ff; + uint const from_bit_len= field_metadata & 0x00ff; + uint const source_size= from_len + ((from_bit_len > 0) ? 1 : 0); + return (source_size); +} + + +bool +Field_bit::compatible_field_size(uint field_metadata, + const Relay_log_info * __attribute__((unused)), + uint16 mflags, + int *order_var) const +{ + DBUG_ENTER("Field_bit::compatible_field_size"); + DBUG_ASSERT((field_metadata >> 16) == 0); + uint from_bit_len= + 8 * (field_metadata >> 8) + (field_metadata & 0xff); + uint to_bit_len= max_display_length(); + DBUG_PRINT("debug", ("from_bit_len: %u, to_bit_len: %u", + from_bit_len, to_bit_len)); + /* + If the bit length exact flag is clear, we are dealing with an old + master, so we allow some less strict behaviour if replicating by + moving both bit lengths to an even multiple of 8. + + We do this by computing the number of bytes to store the field + instead, and then compare the result. + */ + if (!(mflags & Table_map_log_event::TM_BIT_LEN_EXACT_F)) { + from_bit_len= (from_bit_len + 7) / 8; + to_bit_len= (to_bit_len + 7) / 8; + } + + *order_var= compare(from_bit_len, to_bit_len); + DBUG_RETURN(TRUE); +} + + + +void Field_bit::sql_type(String &res) const +{ + CHARSET_INFO *cs= res.charset(); + size_t length= cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(), + "bit(%d)", (int) field_length); + res.length(length); +} + + +uchar * +Field_bit::pack(uchar *to, const uchar *from, uint max_length) +{ + DBUG_ASSERT(max_length > 0); + uint length; + if (bit_len > 0) + { + /* + We have the following: + + ptr Points into a field in record R1 + from Points to a field in a record R2 + bit_ptr Points to the byte (in the null bytes) that holds the + odd bits of R1 + from_bitp Points to the byte that holds the odd bits of R2 + + We have the following: + + ptr - bit_ptr = from - from_bitp + + We want to isolate 'from_bitp', so this gives: + + ptr - bit_ptr - from = - from_bitp + - ptr + bit_ptr + from = from_bitp + bit_ptr + from - ptr = from_bitp + */ + uchar bits= get_rec_bits(bit_ptr + (from - ptr), bit_ofs, bit_len); + *to++= bits; + } + length= MY_MIN(bytes_in_rec, max_length - (bit_len > 0)); + memcpy(to, from, length); + return to + length; +} + + +/** + Unpack a bit field from row data. + + This method is used to unpack a bit field from a master whose size + of the field is less than that of the slave. + + @param to Destination of the data + @param from Source of the data + @param param_data Bit length (upper) and length (lower) values + + @return New pointer into memory based on from + length of the data +*/ +const uchar * +Field_bit::unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) +{ + DBUG_ENTER("Field_bit::unpack"); + DBUG_PRINT("enter", ("to: %p, from: %p, param_data: 0x%x", + to, from, param_data)); + DBUG_PRINT("debug", ("bit_ptr: %p, bit_len: %u, bit_ofs: %u", + bit_ptr, bit_len, bit_ofs)); + uint const from_len= (param_data >> 8U) & 0x00ff; + uint const from_bit_len= param_data & 0x00ff; + DBUG_PRINT("debug", ("from_len: %u, from_bit_len: %u", + from_len, from_bit_len)); + /* + If the parameter data is zero (i.e., undefined), or if the master + and slave have the same sizes, then use the old unpack() method. + */ + if (param_data == 0 || + ((from_bit_len == bit_len) && (from_len == bytes_in_rec))) + { + if (from + bytes_in_rec + MY_TEST(bit_len) > from_end) + return 0; // Error in data + + if (bit_len > 0) + { + /* + set_rec_bits is a macro, don't put the post-increment in the + argument since that might cause strange side-effects. + + For the choice of the second argument, see the explanation for + Field_bit::pack(). + */ + set_rec_bits(*from, bit_ptr + (to - ptr), bit_ofs, bit_len); + from++; + } + memcpy(to, from, bytes_in_rec); + DBUG_RETURN(from + bytes_in_rec); + } + + /* + We are converting a smaller bit field to a larger one here. + To do that, we first need to construct a raw value for the original + bit value stored in the from buffer. Then that needs to be converted + to the larger field then sent to store() for writing to the field. + Lastly the odd bits need to be masked out if the bytes_in_rec > 0. + Otherwise stray bits can cause spurious values. + */ + + uint len= from_len + ((from_bit_len > 0) ? 1 : 0); + uint new_len= (field_length + 7) / 8; + + if (from + len > from_end || new_len < len) + return 0; // Error in data + + char *value= (char *)my_alloca(new_len); + bzero(value, new_len); + + memcpy(value + (new_len - len), from, len); + /* + Mask out the unused bits in the partial byte. + TODO: Add code to the master to always mask these bits and remove + the following. + */ + if ((from_bit_len > 0) && (from_len > 0)) + value[new_len - len]= value[new_len - len] & ((1U << from_bit_len) - 1); + bitmap_set_bit(table->write_set,field_index); + store(value, new_len, system_charset_info); + my_afree(value); + DBUG_RETURN(from + len); +} + + +int Field_bit::set_default() +{ + if (bit_len > 0) + { + my_ptrdiff_t const col_offset= table->s->default_values - table->record[0]; + uchar bits= get_rec_bits(bit_ptr + col_offset, bit_ofs, bit_len); + set_rec_bits(bits, bit_ptr, bit_ofs, bit_len); + } + return Field::set_default(); +} + +/* + Bit field support for non-MyISAM tables. +*/ + +Field_bit_as_char::Field_bit_as_char(uchar *ptr_arg, uint32 len_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_bit(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, 0, 0, + unireg_check_arg, field_name_arg) +{ + flags|= UNSIGNED_FLAG; + bit_len= 0; + bytes_in_rec= (len_arg + 7) / 8; +} + + +int Field_bit_as_char::store(const char *from, size_t length, CHARSET_INFO *cs) +{ + DBUG_ASSERT(marked_for_write_or_computed()); + int delta; + uchar bits= (uchar) (field_length & 7); + + for (; length && !*from; from++, length--) // skip left 0's + ; + delta= (int)(bytes_in_rec - length); + + if (delta < 0 || + (delta == 0 && bits && (uint) (uchar) *from >= (uint) (1 << bits))) + { + memset(ptr, 0xff, bytes_in_rec); + if (bits) + *ptr&= ((1 << bits) - 1); /* set first uchar */ + if (get_thd()->really_abort_on_warning()) + set_warning(ER_DATA_TOO_LONG, 1); + else + set_warning(ER_WARN_DATA_OUT_OF_RANGE, 1); + return 1; + } + bzero(ptr, delta); + memcpy(ptr + delta, from, length); + return 0; +} + + +void Field_bit_as_char::sql_type(String &res) const +{ + CHARSET_INFO *cs= res.charset(); + size_t length= cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(), + "bit(%d)", (int) field_length); + res.length(length); +} + + +/***************************************************************************** + Handling of field and Create_field +*****************************************************************************/ + +bool Column_definition::create_interval_from_interval_list(MEM_ROOT *mem_root, + bool reuse_interval_list_values) +{ + DBUG_ENTER("Column_definition::create_interval_from_interval_list"); + DBUG_ASSERT(!interval); + TYPELIB *tmpint; + if (!(interval= tmpint= (TYPELIB*) alloc_root(mem_root, sizeof(TYPELIB)))) + DBUG_RETURN(true); // EOM + + List_iterator it(interval_list); + StringBuffer<64> conv; + char comma_buf[5]; /* 5 bytes for 'filename' charset */ + DBUG_ASSERT(sizeof(comma_buf) >= charset->mbmaxlen); + int comma_length= charset->wc_mb(',', + (uchar*) comma_buf, + (uchar*) comma_buf + sizeof(comma_buf)); + DBUG_ASSERT(comma_length >= 0 && comma_length <= (int) sizeof(comma_buf)); + + if (!multi_alloc_root(mem_root, + &tmpint->type_names, + sizeof(char*) * (interval_list.elements + 1), + &tmpint->type_lengths, + sizeof(uint) * (interval_list.elements + 1), + NullS)) + goto err; // EOM + + tmpint->name= ""; + tmpint->count= interval_list.elements; + + for (uint i= 0; i < interval_list.elements; i++) + { + uint32 dummy; + String *tmp= it++; + LEX_CSTRING value; + if (String::needs_conversion(tmp->length(), tmp->charset(), + charset, &dummy)) + { + uint cnv_errs; + conv.copy(tmp->ptr(), tmp->length(), tmp->charset(), charset, &cnv_errs); + value.str= strmake_root(mem_root, conv.ptr(), conv.length()); + value.length= conv.length(); + } + else + { + value.str= reuse_interval_list_values ? tmp->ptr() : + strmake_root(mem_root, + tmp->ptr(), + tmp->length()); + value.length= tmp->length(); + } + if (!value.str) + goto err; // EOM + + // Strip trailing spaces. + value.length= charset->lengthsp(value.str, value.length); + ((char*) value.str)[value.length]= '\0'; + + if (real_field_type() == MYSQL_TYPE_SET) + { + if (charset->instr(value.str, value.length, + comma_buf, comma_length, NULL, 0)) + { + ErrConvString err(tmp); + my_error(ER_ILLEGAL_VALUE_FOR_TYPE, MYF(0), "set", err.ptr()); + goto err; + } + } + tmpint->type_names[i]= value.str; + tmpint->type_lengths[i]= (uint)value.length; + } + tmpint->type_names[interval_list.elements]= 0; // End marker + tmpint->type_lengths[interval_list.elements]= 0; + interval_list.empty(); // Don't need interval_list anymore + DBUG_RETURN(false); +err: + interval= NULL; // Avoid having both non-empty interval_list and interval + DBUG_RETURN(true); +} + + +bool Column_definition::prepare_interval_field(MEM_ROOT *mem_root, + bool reuse_interval_list_values) +{ + DBUG_ENTER("Column_definition::prepare_interval_field"); + DBUG_ASSERT(real_field_type() == MYSQL_TYPE_ENUM || + real_field_type() == MYSQL_TYPE_SET); + /* + Interval values are either in "interval" or in "interval_list", + but not in both at the same time, and are not empty at the same time. + - Values are in "interval_list" when we're coming from the parser + in CREATE TABLE or in CREATE {FUNCTION|PROCEDURE}. + - Values are in "interval" when we're in ALTER TABLE. + + In a corner case with an empty set like SET(''): + - after the parser we have interval_list.elements==1 + - in ALTER TABLE we have a non-NULL interval with interval->count==1, + with interval->type_names[0]=="" and interval->type_lengths[0]==0. + So the assert is still valid for this corner case. + + ENUM and SET with no values at all (e.g. ENUM(), SET()) are not possible, + as the parser requires at least one element, so for a ENUM or SET field it + should never happen that both internal_list.elements and interval are 0. + */ + DBUG_ASSERT((interval == NULL) == (interval_list.elements > 0)); + + /* + Create typelib from interval_list, and if necessary + convert strings from client character set to the + column character set. + */ + if (interval_list.elements && + create_interval_from_interval_list(mem_root, + reuse_interval_list_values)) + DBUG_RETURN(true); + + if (!reuse_interval_list_values) + { + /* + We're initializing from an existing table or view Field_enum + (e.g. for a %TYPE variable) rather than from the parser. + The constructor Column_definition(THD*,Field*,Field*) has already + copied the TYPELIB pointer from the original Field_enum. + Now we need to make a permanent copy of that TYPELIB, + as the original field can be freed before the end of the life + cycle of "this". + */ + DBUG_ASSERT(interval); + if (!(interval= copy_typelib(mem_root, interval))) + DBUG_RETURN(true); + } + prepare_interval_field_calc_length(); + DBUG_RETURN(false); +} + + +bool Column_definition::set_attributes(THD *thd, + const Lex_field_type_st &def, + column_definition_type_t type) +{ + DBUG_ASSERT(type_handler() == &type_handler_null); + DBUG_ASSERT(length == 0); + DBUG_ASSERT(decimals == 0); + + set_handler(def.type_handler()); + return type_handler()->Column_definition_set_attributes(thd, this, + def, type); +} + + +void +Column_definition_attributes::set_length_and_dec(const Lex_length_and_dec_st + &type) +{ + if (type.has_explicit_length()) + length= type.length_overflowed() ? (ulonglong) UINT_MAX32 + 1 : + (ulonglong) type.length(); + + if (type.has_explicit_dec()) + decimals= type.dec(); +} + + +void Column_definition::create_length_to_internal_length_bit() +{ + if (f_bit_as_char(pack_flag)) + { + pack_length= ((length + 7) & ~7) / 8; + } + else + { + pack_length= (uint) length / 8; + } +} + + +void Column_definition::create_length_to_internal_length_newdecimal() +{ + DBUG_ASSERT(length < UINT_MAX32); + decimal_digit_t prec= get_decimal_precision((uint)length, decimals, + flags & UNSIGNED_FLAG); + pack_length= my_decimal_get_binary_size(prec, decimals); +} + + +bool check_expression(Virtual_column_info *vcol, const LEX_CSTRING *name, + enum_vcol_info_type type, Alter_info *alter_info) + +{ + bool ret; + Item::vcol_func_processor_result res; + res.alter_info= alter_info; + + if (!vcol->name.length) + vcol->name= *name; + + /* + Walk through the Item tree checking if all items are valid + to be part of the virtual column + */ + ret= vcol->expr->walk(&Item::check_vcol_func_processor, 0, &res); + vcol->flags= res.errors; + + uint filter= VCOL_IMPOSSIBLE; + if (type != VCOL_GENERATED_VIRTUAL && type != VCOL_DEFAULT) + filter|= VCOL_NOT_STRICTLY_DETERMINISTIC; + if (type != VCOL_DEFAULT) + filter|= VCOL_NEXTVAL; + + if (unlikely(ret || (res.errors & filter))) + { + my_error(ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), res.name, + vcol_type_name(type), name->str); + return TRUE; + } + /* + Safe to call before fix_fields as long as vcol's don't include sub + queries (which is now checked in check_vcol_func_processor) + */ + if (vcol->expr->check_cols(1)) + return TRUE; + return FALSE; +} + + +bool Column_definition::check_length(uint mysql_errno, uint limit) const +{ + if (length <= limit) + return false; + my_error(mysql_errno, MYF(0), field_name.str, static_cast(limit)); + return true; +} + + +bool Column_definition::fix_attributes_int(uint default_length) +{ + if (length) + return check_length(ER_TOO_BIG_DISPLAYWIDTH, MAX_FIELD_CHARLENGTH); + length= default_length; + return false; +} + + +bool Column_definition::fix_attributes_real(uint default_length) +{ + /* change FLOAT(precision) to FLOAT or DOUBLE */ + if (!length && !decimals) + { + length= default_length; + decimals= NOT_FIXED_DEC; + } + if (length < decimals && decimals != NOT_FIXED_DEC) + { + my_error(ER_M_BIGGER_THAN_D, MYF(0), field_name.str); + return true; + } + if (decimals != NOT_FIXED_DEC && decimals >= FLOATING_POINT_DECIMALS) + { + my_error(ER_TOO_BIG_SCALE, MYF(0), + field_name.str, static_cast(FLOATING_POINT_DECIMALS-1)); + return true; + } + return check_length(ER_TOO_BIG_DISPLAYWIDTH, MAX_FIELD_CHARLENGTH); +} + + +bool Column_definition::fix_attributes_decimal() +{ + if (decimals >= NOT_FIXED_DEC) + { + my_error(ER_TOO_BIG_SCALE, MYF(0), + field_name.str, static_cast(NOT_FIXED_DEC - 1)); + return true; + } + my_decimal_trim(&length, &decimals); + if (length > DECIMAL_MAX_PRECISION) + { + my_error(ER_TOO_BIG_PRECISION, MYF(0), field_name.str, + DECIMAL_MAX_PRECISION); + return true; + } + if (length < decimals) + { + my_error(ER_M_BIGGER_THAN_D, MYF(0), field_name.str); + return true; + } + length= my_decimal_precision_to_length((uint) length, decimals, + flags & UNSIGNED_FLAG); + pack_length= my_decimal_get_binary_size((uint) length, decimals); + return false; +} + + +bool Column_definition::fix_attributes_bit() +{ + if (!length) + length= 1; + pack_length= ((uint) length + 7) / 8; + return check_length(ER_TOO_BIG_DISPLAYWIDTH, MAX_BIT_FIELD_LENGTH); +} + + +bool Column_definition::fix_attributes_temporal_with_time(uint int_part_length) +{ + if (length > MAX_DATETIME_PRECISION) + { + my_error(ER_TOO_BIG_PRECISION, MYF(0), field_name.str, + MAX_DATETIME_PRECISION); + return true; + } + decimals= (uint) length; + length+= int_part_length + (length ? 1 : 0); + return false; +} + + +bool Column_definition::validate_check_constraint(THD *thd) +{ + return check_constraint && + check_expression(check_constraint, &field_name, VCOL_CHECK_FIELD); +} + + +bool Column_definition::check(THD *thd) +{ + DBUG_ENTER("Column_definition::check"); + + /* Initialize data for a computed field */ + if (vcol_info) + { + DBUG_ASSERT(vcol_info->expr); + vcol_info->set_handler(type_handler()); + if (check_expression(vcol_info, &field_name, vcol_info->stored_in_db + ? VCOL_GENERATED_STORED : VCOL_GENERATED_VIRTUAL)) + DBUG_RETURN(TRUE); + } + + if (type_handler()->Column_definition_validate_check_constraint(thd, this)) + DBUG_RETURN(TRUE); + + if (default_value) + { + Item *def_expr= default_value->expr; + if (check_expression(default_value, &field_name, VCOL_DEFAULT)) + DBUG_RETURN(TRUE); + + /* Constant's are stored in the 'empty_record', except for blobs */ + if (def_expr->basic_const_item()) + { + if (def_expr->type() == Item::NULL_ITEM) + { + default_value= 0; + if ((flags & (NOT_NULL_FLAG | AUTO_INCREMENT_FLAG)) == NOT_NULL_FLAG) + { + my_error(ER_INVALID_DEFAULT, MYF(0), field_name.str); + DBUG_RETURN(1); + } + } + } + } + + if (default_value && (flags & AUTO_INCREMENT_FLAG)) + { + my_error(ER_INVALID_DEFAULT, MYF(0), field_name.str); + DBUG_RETURN(1); + } + + if (default_value && !default_value->expr->basic_const_item() && + mysql_timestamp_type() == MYSQL_TIMESTAMP_DATETIME && + default_value->expr->type() == Item::FUNC_ITEM) + { + /* + Special case: NOW() for TIMESTAMP and DATETIME fields are handled + as in MariaDB 10.1 by marking them in unireg_check. + */ + Item_func *fn= static_cast(default_value->expr); + if (fn->functype() == Item_func::NOW_FUNC && + (fn->decimals == 0 || fn->decimals >= length)) + { + default_value= 0; + unireg_check= Field::TIMESTAMP_DN_FIELD; + } + } + + if (on_update) + { + if (mysql_timestamp_type() != MYSQL_TIMESTAMP_DATETIME || + on_update->decimals < length) + { + my_error(ER_INVALID_ON_UPDATE, MYF(0), field_name.str); + DBUG_RETURN(TRUE); + } + unireg_check= unireg_check == Field::NONE ? Field::TIMESTAMP_UN_FIELD + : Field::TIMESTAMP_DNUN_FIELD; + } + else if (flags & AUTO_INCREMENT_FLAG) + unireg_check= Field::NEXT_NUMBER; + + if (type_handler()->Column_definition_fix_attributes(this)) + DBUG_RETURN(true); + + /* Remember the value of length */ + char_length= (uint)length; + + /* + Set NO_DEFAULT_VALUE_FLAG if this field doesn't have a default value and + it is NOT NULL, not an AUTO_INCREMENT field. + We need to do this check here and in mysql_create_prepare_table() as + sp_head::fill_field_definition() calls this function. + */ + if (!default_value && unireg_check == Field::NONE && (flags & NOT_NULL_FLAG)) + { + /* + TIMESTAMP columns get implicit DEFAULT value when + explicit_defaults_for_timestamp is not set. + */ + if (((thd->variables.option_bits & OPTION_EXPLICIT_DEF_TIMESTAMP) || + !is_timestamp_type()) && !vers_sys_field()) + { + flags|= NO_DEFAULT_VALUE_FLAG; + } + } + + + if ((flags & AUTO_INCREMENT_FLAG) && + !type_handler()->type_can_have_auto_increment_attribute()) + { + my_error(ER_WRONG_FIELD_SPEC, MYF(0), field_name.str); + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); /* success */ +} + +enum_field_types get_blob_type_from_length(ulong length) +{ + enum_field_types type; + if (length < 256) + type= MYSQL_TYPE_TINY_BLOB; + else if (length < 65536) + type= MYSQL_TYPE_BLOB; + else if (length < 256L*256L*256L) + type= MYSQL_TYPE_MEDIUM_BLOB; + else + type= MYSQL_TYPE_LONG_BLOB; + return type; +} + + +uint pack_length_to_packflag(uint type) +{ + switch (type) { + case 1: return f_settype((uint) MYSQL_TYPE_TINY); + case 2: return f_settype((uint) MYSQL_TYPE_SHORT); + case 3: return f_settype((uint) MYSQL_TYPE_INT24); + case 4: return f_settype((uint) MYSQL_TYPE_LONG); + case 8: return f_settype((uint) MYSQL_TYPE_LONGLONG); + } + return 0; // This shouldn't happen +} + + +uint Column_definition_attributes::pack_flag_to_pack_length() const +{ + uint type= f_packtype(pack_flag); // 0..15 + DBUG_ASSERT(type < 16); + switch (type) { + case MYSQL_TYPE_TINY: return 1; + case MYSQL_TYPE_SHORT: return 2; + case MYSQL_TYPE_LONG: return 4; + case MYSQL_TYPE_LONGLONG: return 8; + case MYSQL_TYPE_INT24: return 3; + } + return 0; // This should not happen +} + + +Field *Column_definition_attributes::make_field(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const Record_addr *rec, + const Type_handler *handler, + const LEX_CSTRING *field_name, + uint32 flags) + const +{ + DBUG_ASSERT(length <= UINT_MAX32); + DBUG_PRINT("debug", ("field_type: %s, field_length: %u, interval: %p, pack_flag: %s%s%s%s%s", + handler->name().ptr(), (uint) length, interval, + FLAGSTR(pack_flag, FIELDFLAG_BINARY), + FLAGSTR(pack_flag, FIELDFLAG_INTERVAL), + FLAGSTR(pack_flag, FIELDFLAG_NUMBER), + FLAGSTR(pack_flag, FIELDFLAG_PACK), + FLAGSTR(pack_flag, FIELDFLAG_BLOB))); + + Record_addr addr(rec->ptr(), f_maybe_null(pack_flag) ? rec->null() : + Bit_addr()); + /* + Special code for the BIT-alike data types + who store data bits together with NULL-bits. + */ + Bit_addr bit(rec->null()); + if (f_maybe_null(pack_flag)) + bit.inc(); + return handler->make_table_field_from_def(share, mem_root, field_name, + addr, bit, this, flags); +} + + +bool Field_vers_trx_id::test_if_equality_guarantees_uniqueness(const Item* item) const +{ + return item->is_of_type(Item::CONST_ITEM, TIME_RESULT); +} + + +Column_definition_attributes::Column_definition_attributes(const Field *field) + :length(field->character_octet_length() / field->charset()->mbmaxlen), + interval(NULL), + charset(field->charset()), // May be NULL ptr + srid(0), + pack_flag(0), + decimals(field->decimals()), + unireg_check(field->unireg_check) +{} + + +Column_definition_attributes:: + Column_definition_attributes(const Type_all_attributes &attr) + :length(attr.max_length), + interval(attr.get_typelib()), + charset(attr.collation.collation), + srid(0), + pack_flag(attr.unsigned_flag ? 0 : FIELDFLAG_DECIMAL), + decimals(attr.decimals), + unireg_check(Field::NONE) +{} + + +/** Create a field suitable for create of table. */ + +Column_definition::Column_definition(THD *thd, Field *old_field, + Field *orig_field) + :Column_definition_attributes(old_field) +{ + srid= 0; + on_update= NULL; + field_name= old_field->field_name; + flags= old_field->flags; + pack_length=old_field->pack_length(); + set_handler(old_field->type_handler()); + comment= old_field->comment; + vcol_info= old_field->vcol_info; + option_list= old_field->option_list; + explicitly_nullable= !(old_field->flags & NOT_NULL_FLAG); + compression_method_ptr= 0; + versioning= VERSIONING_NOT_SET; + invisible= old_field->invisible; + interval_list.empty(); // prepare_interval_field() needs this + char_length= (uint) length; + + if (orig_field) + { + default_value= orig_field->default_value; + check_constraint= orig_field->check_constraint; + if (orig_field->unireg_check == Field::TMYSQL_COMPRESSED) + { + unireg_check= Field::TMYSQL_COMPRESSED; + compression_method_ptr= zlib_compression_method; + } + } + else + { + default_value= 0; + check_constraint= 0; + } + + type_handler()->Column_definition_reuse_fix_attributes(thd, this, old_field); + + /* + Copy the default (constant/function) from the column object orig_field, if + supplied. We do this if all these conditions are met: + + - The column allows a default. + + - The column type is not a BLOB type (as BLOB's doesn't have constant + defaults) + + - The original column (old_field) was properly initialized with a record + buffer pointer. + + - The column didn't have a default expression + */ + if (!(flags & (NO_DEFAULT_VALUE_FLAG | BLOB_FLAG)) && + old_field->ptr != NULL && orig_field != NULL) + { + if (orig_field->unireg_check != Field::NEXT_NUMBER) + unireg_check= orig_field->unireg_check; + + /* Get the value from default_values */ + const uchar *dv= orig_field->table->s->default_values; + if (!default_value && !orig_field->is_null_in_record(dv)) + { + StringBuffer tmp(charset); + String *res= orig_field->val_str(&tmp, orig_field->ptr_in_record(dv)); + char *pos= (char*) thd->strmake(res->ptr(), res->length()); + default_value= new (thd->mem_root) Virtual_column_info(); + default_value->expr= + new (thd->mem_root) Item_string(thd, pos, res->length(), charset); + default_value->utf8= 0; + } + } +} + + +/** + The common part for data type redefinition: + CREATE TABLE t1 (a INT) AS SELECT a FROM t2; + See Type_handler::Column_definition_redefine_stage1() + for data type specific code. + + @param this - The field definition corresponding to the expression + in the "AS SELECT.." part. + + @param dup_field - The field definition from the "CREATE TABLE (...)" part. + It has already underwent prepare_stage1(), so + must be fully initialized: + -- dup_field->charset is set and BINARY + sorting style is applied, see find_bin_collation(). + + @param file - The table handler +*/ +void +Column_definition::redefine_stage1_common(const Column_definition *dup_field, + const handler *file) +{ + set_handler(dup_field->type_handler()); + default_value= dup_field->default_value; + DBUG_ASSERT(dup_field->charset); // Set by prepare_stage1() + charset= dup_field->charset; + length= dup_field->char_length; + pack_length= dup_field->pack_length; + decimals= dup_field->decimals; + unireg_check= dup_field->unireg_check; + flags= dup_field->flags; + interval= dup_field->interval; + vcol_info= dup_field->vcol_info; + invisible= dup_field->invisible; + check_constraint= dup_field->check_constraint; + comment= dup_field->comment; + option_list= dup_field->option_list; + versioning= dup_field->versioning; +} + + + +/** + maximum possible character length for blob. + + This method is used in Item_field::set_field to calculate + max_length for Item. + + For example: + CREATE TABLE t2 SELECT CONCAT(tinyblob_utf8_column) FROM t1; + must create a "VARCHAR(255) CHARACTER SET utf8" column. + + @return + length +*/ + +uint32 Field_blob::char_length() const +{ + return Field_blob::character_octet_length(); +} + + +uint32 Field_blob::character_octet_length() const +{ + switch (packlength) + { + case 1: + return 255; + case 2: + return 65535; + case 3: + return 16777215; + case 4: + return (uint32) UINT_MAX32; + default: + DBUG_ASSERT(0); // we should never go here + return 0; + } +} + + +/** + Makes a clone of this object for ALTER/CREATE TABLE + + @param mem_root MEM_ROOT where to clone the field +*/ + +Create_field *Create_field::clone(MEM_ROOT *mem_root) const +{ + Create_field *res= new (mem_root) Create_field(*this); + return res; +} + +/** + Return true if default is an expression that must be saved explicitly + + This is: + - Not basic constants + - If field is a BLOB (Which doesn't support normal DEFAULT) +*/ + +bool Column_definition::has_default_expression() +{ + return (default_value && + (!default_value->expr->basic_const_item() || + (flags & BLOB_FLAG))); +} + + +bool Column_definition::set_compressed(const char *method) +{ + if (!method || !strcmp(method, zlib_compression_method->name)) + { + unireg_check= Field::TMYSQL_COMPRESSED; + compression_method_ptr= zlib_compression_method; + return false; + } + my_error(ER_UNKNOWN_COMPRESSION_METHOD, MYF(0), method); + return true; +} + + +bool Column_definition::set_compressed_deprecated(THD *thd, const char *method) +{ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX), + " ... COMPRESSED...", + "' COMPRESSED... ...'"); + return set_compressed(method); +} + + +bool +Column_definition::set_compressed_deprecated_column_attribute(THD *thd, + const char *pos, + const char *method) +{ + if (compression_method_ptr) + { + /* + Compression method has already been set, e.g.: + a VARCHAR(10) COMPRESSED DEFAULT 10 COMPRESSED + */ + thd->parse_error(ER_SYNTAX_ERROR, pos); + return true; + } + enum enum_field_types sql_type= real_field_type(); + /* We can't use f_is_blob here as pack_flag is not yet set */ + if (sql_type == MYSQL_TYPE_VARCHAR || sql_type == MYSQL_TYPE_TINY_BLOB || + sql_type == MYSQL_TYPE_BLOB || sql_type == MYSQL_TYPE_MEDIUM_BLOB || + sql_type == MYSQL_TYPE_LONG_BLOB) + return set_compressed_deprecated(thd, method); + else + my_error(ER_WRONG_FIELD_SPEC, MYF(0), field_name.str); + return true; +} + + +bool Column_definition::check_vcol_for_key(THD *thd) const +{ + if (vcol_info && (vcol_info->flags & VCOL_NOT_STRICTLY_DETERMINISTIC)) + { + /* use check_expression() to report an error */ + check_expression(vcol_info, &field_name, VCOL_GENERATED_STORED); + DBUG_ASSERT(thd->is_error()); + return true; + } + return false; +} + + +Send_field::Send_field(THD *thd, Item *item) +{ + item->make_send_field(thd, this); + normalize(); +} + + +/** + maximum possible display length for blob. + + @return + length +*/ + +uint32 Field_blob::max_display_length() const +{ + switch (packlength) + { + case 1: + return 255 * mbmaxlen(); + case 2: + return 65535 * mbmaxlen(); + case 3: + return 16777215 * mbmaxlen(); + case 4: + return (uint32) UINT_MAX32; + default: + DBUG_ASSERT(0); // we should never go here + return 0; + } +} + + +/***************************************************************************** + Warning handling +*****************************************************************************/ + +/** +* Produce warning or note about data saved into field. + + @param level - level of message (Note/Warning/Error) + @param code - error code of message to be produced + @param cut_increment - whenever we should increase cut fields count + @current_row - current row number + + @note + This function won't produce warning or notes or increase cut fields counter + if count_cuted_fields == CHECK_FIELD_IGNORE or CHECK_FIELD_EXPRESSION + for the current thread. + + This allows us to avoid notes in optimisation, like + convert_constant_item(). + + @retval + 1 if count_cuted_fields == CHECK_FIELD_IGNORE and error level is not NOTE + @retval + 0 otherwise +*/ + +bool +Field::set_warning(Sql_condition::enum_warning_level level, uint code, + int cut_increment, ulong current_row) const +{ + /* + If this field was created only for type conversion purposes it + will have table == NULL. + */ + THD *thd= get_thd(); + + /* + In INPLACE ALTER, server can't know which row has generated + the warning, so the value of current row is supplied by the engine. + */ + if (current_row) + thd->get_stmt_da()->reset_current_row_for_warning(current_row); + + if (thd->count_cuted_fields > CHECK_FIELD_EXPRESSION) + { + thd->cuted_fields+= cut_increment; + push_warning_printf(thd, level, code, ER_THD(thd, code), field_name.str, + current_row ? current_row + : thd->get_stmt_da()->current_row_for_warning()); + return 0; + } + return level >= Sql_condition::WARN_LEVEL_WARN; +} + + +/** + Produce warning or note about datetime string data saved into field. + + @param level level of message (Note/Warning/Error) + @param code error code of message to be produced + @param str string value which we tried to save + @param ts_type type of datetime value (datetime/date/time) + @param cuted_increment whenever we should increase cut fields count or not + + @note + This function will always produce some warning but won't increase cut + fields counter if count_cuted_fields ==FIELD_CHECK_IGNORE for current + thread. + + See also bug#2336 + +*/ + +void Field::set_datetime_warning(Sql_condition::enum_warning_level level, + uint code, const ErrConv *str, + const char *typestr, int cuted_increment) + const +{ + THD *thd= get_thd(); + if (thd->really_abort_on_warning() && level >= Sql_condition::WARN_LEVEL_WARN) + { + /* + field_name.str can be NULL when field is not in the select list: + SET SESSION SQL_MODE= 'STRICT_ALL_TABLES,NO_ZERO_DATE'; + CREATE OR REPLACE TABLE t2 SELECT 1 AS f FROM t1 GROUP BY FROM_DAYS(d); + Can't call push_warning_truncated_value_for_field() directly here, + as it expect a non-NULL name. + */ + thd->push_warning_wrong_or_truncated_value(level, false, typestr, + str->ptr(), + table->s->db.str, + table->s->table_name.str, + field_name.str); + } + else + set_warning(level, code, cuted_increment); +} + + +void Field::set_warning_truncated_wrong_value(const char *type_arg, + const char *value) +{ + THD *thd= get_thd(); + const char *db_name; + const char *table_name; + /* + table has in the past been 0 in case of wrong calls when processing + statistics tables. Let's protect against that. + */ + DBUG_ASSERT(table); + + db_name= (table && table->s->db.str) ? table->s->db.str : ""; + table_name= (table && table->s->table_name.str) ? + table->s->table_name.str : ""; + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, + ER_THD(thd, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), + type_arg, value, db_name, table_name, field_name.str, + static_cast(thd->get_stmt_da()-> + current_row_for_warning())); +} + + +/* + Give warning for unusable key + + Note that the caller is responsible to call it only under ther + right note_verbosity level +*/ + +void Field::raise_note_cannot_use_key_part(THD *thd, + uint keynr, uint part, + const LEX_CSTRING &op, + Item *value, + Data_type_compatibility reason) + const +{ + StringBuffer<128> value_buffer; + const LEX_CSTRING keyname= table->s->key_info[keynr].name; + size_t value_length; + + value->print(&value_buffer, QT_EXPLAIN); + value_length= Well_formed_prefix(value_buffer.charset(), + value_buffer.ptr(), + MY_MIN(value_buffer.length(), 64)).length(); + /* + We must use c_ptr() here for the 'T' argument as it only works with + zero terminated strings. + */ + switch (reason){ + case Data_type_compatibility::INCOMPATIBLE_COLLATION: + { + const LEX_CSTRING colf(charset()->coll_name); + const LEX_CSTRING colv(value->collation.collation->coll_name); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, + "Cannot use key %`.*s part[%u] for lookup: " + "%`.*s.%`.*s.%`.*s of collation %`.*s " + "%.*s \"%.*T\" of collation %`.*s", + (int) keyname.length, keyname.str, + part, + (int) table->s->db.length, table->s->db.str, + (int) table->s->table_name.length, + table->s->table_name.str, + (int) field_name.length, field_name.str, + (int) colf.length, colf.str, + (int) op.length, op.str, + (int) value_length, value_buffer.c_ptr_safe(), + (int) colv.length, colv.str); + } + break; + case Data_type_compatibility::OK: + DBUG_ASSERT(0); + /* fall through */ + case Data_type_compatibility::INCOMPATIBLE_DATA_TYPE: + { + const LEX_CSTRING dtypef(type_handler()->name().lex_cstring()); + const LEX_CSTRING dtypev(value->type_handler()->name().lex_cstring()); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, + "Cannot use key %`.*s part[%u] for lookup: " + "%`.*s.%`.*s.%`.*s of type %`.*s " + "%.*s \"%.*T\" of type %`.*s", + (int) keyname.length, keyname.str, + part, + (int) table->s->db.length, table->s->db.str, + (int) table->s->table_name.length, + table->s->table_name.str, + (int) field_name.length, field_name.str, + (int) dtypef.length, dtypef.str, + (int) op.length, op.str, + (int) value_length, value_buffer.c_ptr_safe(), + (int) dtypev.length, dtypev.str); + } + break; + } +} + + +/* + Give warning for unusable key + + Note that the caller is responsible to call it only under ther + right note_verbosity level +*/ + +void Field::raise_note_key_become_unused(THD *thd, const String &expr) const +{ + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, ER_UNKNOWN_ERROR, + "Cannot use key parts with %`.*s.%`.*s.%`.*s " + "in the rewritten condition: %`.*s", + (int) table->s->db.length, table->s->db.str, + (int) table->s->table_name.length, table->s->table_name.str, + (int) field_name.length, field_name.str, + (int) expr.length(), expr.ptr()); +} + + +/* + @brief + Return possible keys for a field + + @details + Return bit map of keys over this field which can be used by the range + optimizer. For a field of a generic table such keys are all keys that starts + from this field. For a field of a materialized derived table/view such keys + are all keys in which this field takes a part. This is less restrictive as + keys for a materialized derived table/view are generated on the fly from + present fields, thus the case when a field for the beginning of a key is + absent is impossible. + + @return map of possible keys +*/ + +key_map Field::get_possible_keys() +{ + DBUG_ASSERT(table->pos_in_table_list); + return (table->pos_in_table_list->is_materialized_derived() ? + part_of_key : key_start); +} + + +bool Field::validate_value_in_record_with_warn(THD *thd, const uchar *record) +{ + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set); + bool rc; + if ((rc= validate_value_in_record(thd, record))) + { + // Get and report val_str() for the DEFAULT value + StringBuffer tmp; + val_str(&tmp, ptr_in_record(record)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_INVALID_DEFAULT_VALUE_FOR_FIELD, + ER_THD(thd, ER_INVALID_DEFAULT_VALUE_FOR_FIELD), + ErrConvString(&tmp).ptr(), field_name.str); + } + dbug_tmp_restore_column_map(&table->read_set, old_map); + return rc; +} + + +bool Field::save_in_field_default_value(bool view_error_processing) +{ + THD *thd= table->in_use; + + /* + TODO: MDEV-19597 Refactor TABLE::vers_update_fields() via stored virtual columns + This condition will go away as well as other conditions with vers_sys_field(). + */ + if (vers_sys_field()) + { + if (flags & VERS_ROW_START) + set_time(); + else + set_max(); + return false; + } + + if (unlikely(flags & NO_DEFAULT_VALUE_FLAG && + real_type() != MYSQL_TYPE_ENUM)) + { + if (reset()) + { + my_message(ER_CANT_CREATE_GEOMETRY_OBJECT, + ER_THD(thd, ER_CANT_CREATE_GEOMETRY_OBJECT), MYF(0)); + return true; + } + + if (view_error_processing) + { + TABLE_LIST *view= table->pos_in_table_list->top_table(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_DEFAULT_FOR_VIEW_FIELD, + ER_THD(thd, ER_NO_DEFAULT_FOR_VIEW_FIELD), + view->view_db.str, + view->view_name.str); + } + else + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_DEFAULT_FOR_FIELD, + ER_THD(thd, ER_NO_DEFAULT_FOR_FIELD), + field_name.str); + } + return true; + } + set_default(); + return + !is_null() && + validate_value_in_record_with_warn(thd, table->record[0]) && + thd->is_error(); +} + + +bool Field::save_in_field_ignore_value(bool view_error_processing) +{ + enum_sql_command com= table->in_use->lex->sql_command; + // All insert-like commands + if (com == SQLCOM_INSERT || com == SQLCOM_REPLACE || + com == SQLCOM_INSERT_SELECT || com == SQLCOM_REPLACE_SELECT || + com == SQLCOM_LOAD) + return save_in_field_default_value(view_error_processing); + return 0; // ignore +} + + +void Field::register_field_in_read_map() +{ + if (vcol_info) + { + Item *vcol_item= vcol_info->expr; + vcol_item->walk(&Item::register_field_in_read_map, 1, 0); + } + bitmap_set_bit(table->read_set, field_index); +} + + +bool Field::val_str_nopad(MEM_ROOT *mem_root, LEX_CSTRING *to) +{ + StringBuffer str; + bool rc= false; + THD *thd= get_thd(); + Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH); + + val_str(&str); + if (!(to->length= str.length())) + *to= empty_clex_str; + else if ((rc= !(to->str= strmake_root(mem_root, str.ptr(), str.length())))) + to->length= 0; + + return rc; +} + + +void Field::print_key_value(String *out, uint32 length) +{ + if (charset() == &my_charset_bin) + print_key_value_binary(out, ptr, length); + else + val_str(out); +} + + +void Field_string::print_key_value(String *out, uint32 length) +{ + if (charset() == &my_charset_bin) + { + size_t len= field_charset()->lengthsp((const char*) ptr, length); + print_key_value_binary(out, ptr, static_cast(len)); + } + else + { + THD *thd= get_thd(); + sql_mode_t sql_mode_backup= thd->variables.sql_mode; + thd->variables.sql_mode&= ~MODE_PAD_CHAR_TO_FULL_LENGTH; + val_str(out,out); + thd->variables.sql_mode= sql_mode_backup; + } +} + + +void Field_varstring::print_key_value(String *out, uint32 length) +{ + if (charset() == &my_charset_bin) + print_key_value_binary(out, get_data(), get_length()); + else + val_str(out,out); +} + + +void Field_blob::print_key_value(String *out, uint32 length) +{ + if (charset() == &my_charset_bin) + { + uchar *blob; + memcpy(&blob, ptr+packlength, sizeof(uchar*)); + print_key_value_binary(out, blob, get_length()); + } + else + val_str(out, out); +} + + +/* + @brief Print value of the key part + + @param + out Output string + key value of the key + length Length of field in bytes, + excluding NULL flag and length bytes +*/ + + +void +Field::print_key_part_value(String *out, const uchar* key, uint32 length) +{ + StringBuffer<128> tmp(system_charset_info); + uint null_byte= 0; + if (real_maybe_null()) + { + /* + Byte 0 of key is the null-byte. If set, key is NULL. + Otherwise, print the key value starting immediately after the + null-byte + */ + if (*key) + { + out->append(NULL_clex_str); + return; + } + null_byte++; // Skip null byte + } + + set_key_image(key + null_byte, length); + print_key_value(&tmp, length); + if (charset() == &my_charset_bin) + out->append(tmp.ptr(), tmp.length(), tmp.charset()); + else + tmp.print(out, system_charset_info); +} + + +void Field::print_key_value_binary(String *out, const uchar* key, uint32 length) +{ + out->append_semi_hex((const char*)key, length, charset()); +} + + +Virtual_column_info* Virtual_column_info::clone(THD *thd) +{ + Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this); + if (!dst) + return NULL; + if (expr) + { + dst->expr= expr->build_clone(thd); + if (!dst->expr) + return NULL; + } + if (!thd->make_lex_string(&dst->name, name.str, name.length)) + return NULL; + return dst; +}; diff --git a/sql/field.h b/sql/field.h new file mode 100644 index 00000000..c456b2e7 --- /dev/null +++ b/sql/field.h @@ -0,0 +1,5941 @@ +#ifndef FIELD_INCLUDED +#define FIELD_INCLUDED +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2008, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Because of the function make_new_field() all field classes that have static + variables must declare the size_of() member function. +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "mysqld.h" /* system_charset_info */ +#include "table.h" /* TABLE */ +#include "sql_string.h" /* String */ +#include "my_decimal.h" /* my_decimal */ +#include "sql_error.h" /* Sql_condition */ +#include "compat56.h" +#include "sql_type.h" /* Type_std_attributes */ +#include "field_comp.h" + +class Send_field; +class Copy_field; +class Protocol; +class Protocol_text; +class Create_field; +class Relay_log_info; +class Field; +class Column_statistics; +class Column_statistics_collected; +class Item_func; +class Item_bool_func; +class Item_equal; +class Virtual_tmp_table; +class Qualified_column_ident; +class Table_ident; +class SEL_ARG; +class RANGE_OPT_PARAM; +struct KEY_PART; +struct SORT_FIELD; +struct SORT_FIELD_ATTR; + +enum enum_check_fields +{ + CHECK_FIELD_IGNORE, + CHECK_FIELD_EXPRESSION, + CHECK_FIELD_WARN, + CHECK_FIELD_ERROR_FOR_NULL, +}; + + +enum enum_conv_type +{ + CONV_TYPE_PRECISE, + CONV_TYPE_VARIANT, + CONV_TYPE_SUBSET_TO_SUPERSET, + CONV_TYPE_SUPERSET_TO_SUBSET, + CONV_TYPE_IMPOSSIBLE +}; + + +class Conv_param +{ + uint16 m_table_def_flags; +public: + Conv_param(uint16 table_def_flags) + :m_table_def_flags(table_def_flags) + { } + uint16 table_def_flags() const { return m_table_def_flags; } +}; + + +class Conv_source: public Type_handler_hybrid_field_type +{ + uint16 m_metadata; + CHARSET_INFO *m_cs; +public: + Conv_source(const Type_handler *h, uint16 metadata, CHARSET_INFO *cs) + :Type_handler_hybrid_field_type(h), + m_metadata(metadata), + m_cs(cs) + { + DBUG_ASSERT(cs); + } + uint16 metadata() const { return m_metadata; } + uint mbmaxlen() const { return m_cs->mbmaxlen; } +}; + + +/* + Common declarations for Field and Item +*/ +class Value_source +{ +protected: + + // Parameters for warning and note generation + class Warn_filter + { + bool m_want_warning_edom; + bool m_want_note_truncated_spaces; + public: + Warn_filter(bool want_warning_edom, bool want_note_truncated_spaces) : + m_want_warning_edom(want_warning_edom), + m_want_note_truncated_spaces(want_note_truncated_spaces) + { } + Warn_filter(const THD *thd); + bool want_warning_edom() const + { return m_want_warning_edom; } + bool want_note_truncated_spaces() const + { return m_want_note_truncated_spaces; } + }; + class Warn_filter_all: public Warn_filter + { + public: + Warn_filter_all() :Warn_filter(true, true) { } + }; + + class Converter_double_to_longlong + { + protected: + bool m_error; + longlong m_result; + public: + Converter_double_to_longlong(double nr, bool unsigned_flag); + longlong result() const { return m_result; } + bool error() const { return m_error; } + void push_warning(THD *thd, double nr, bool unsigned_flag); + }; + class Converter_double_to_longlong_with_warn: + public Converter_double_to_longlong + { + public: + Converter_double_to_longlong_with_warn(THD *thd, double nr, + bool unsigned_flag) + :Converter_double_to_longlong(nr, unsigned_flag) + { + if (m_error) + push_warning(thd, nr, unsigned_flag); + } + Converter_double_to_longlong_with_warn(double nr, bool unsigned_flag) + :Converter_double_to_longlong(nr, unsigned_flag) + { + if (m_error) + push_warning(current_thd, nr, unsigned_flag); + } + }; + + // String-to-number converters + class Converter_string_to_number + { + protected: + char *m_end_of_num; // Where the low-level conversion routine stopped + int m_error; // The error code returned by the low-level routine + bool m_edom; // If EDOM-alike error happened during conversion + /** + Check string-to-number conversion and produce a warning if + - could not convert any digits (EDOM-alike error) + - found garbage at the end of the string + - found extra spaces at the end (a note) + See also Field_num::check_edom_and_truncation() for a similar function. + + @param thd - the thread that will be used to generate warnings. + Can be NULL (which means current_thd will be used + if a warning is really necessary). + @param type - name of the data type + (e.g. "INTEGER", "DECIMAL", "DOUBLE") + @param cs - character set of the original string + @param str - the original string + @param end - the end of the string + @param allow_notes - tells if trailing space notes should be displayed + or suppressed. + + Unlike Field_num::check_edom_and_truncation(), this function does not + distinguish between EDOM and truncation and reports the same warning for + both cases. Perhaps we should eventually print different warnings, + to make the explicit CAST work closer to the implicit cast in + Field_xxx::store(). + */ + void check_edom_and_truncation(THD *thd, Warn_filter filter, + const char *type, + CHARSET_INFO *cs, + const char *str, + size_t length) const; + public: + int error() const { return m_error; } + }; + + class Converter_strntod: public Converter_string_to_number + { + double m_result; + public: + Converter_strntod(CHARSET_INFO *cs, const char *str, size_t length) + { + m_result= cs->strntod((char *) str, length, &m_end_of_num, &m_error); + // strntod() does not set an error if the input string was empty + m_edom= m_error !=0 || str == m_end_of_num; + } + double result() const { return m_result; } + }; + + class Converter_string_to_longlong: public Converter_string_to_number + { + protected: + longlong m_result; + public: + longlong result() const { return m_result; } + }; + + class Converter_strntoll: public Converter_string_to_longlong + { + public: + Converter_strntoll(CHARSET_INFO *cs, const char *str, size_t length) + { + m_result= cs->strntoll(str, length, 10, &m_end_of_num, &m_error); + /* + All non-zero errors means EDOM error. + strntoll() does not set an error if the input string was empty. + Check it here. + Notice the different with the same condition in Converter_strntoll10. + */ + m_edom= m_error != 0 || str == m_end_of_num; + } + }; + + class Converter_strtoll10: public Converter_string_to_longlong + { + public: + Converter_strtoll10(CHARSET_INFO *cs, const char *str, size_t length) + { + m_end_of_num= (char *) str + length; + m_result= cs->strtoll10(str, &m_end_of_num, &m_error); + /* + Negative error means "good negative number". + Only a positive m_error value means a real error. + strtoll10() sets error to MY_ERRNO_EDOM in case of an empty string, + so we don't have to additionally catch empty strings here. + */ + m_edom= m_error > 0; + } + }; + + class Converter_str2my_decimal: public Converter_string_to_number + { + public: + Converter_str2my_decimal(uint mask, + CHARSET_INFO *cs, const char *str, size_t length, + my_decimal *buf) + { + DBUG_ASSERT(length < UINT_MAX32); + m_error= str2my_decimal(mask, str, length, cs, + buf, (const char **) &m_end_of_num); + // E_DEC_TRUNCATED means a very minor truncation: '1e-100' -> 0 + m_edom= m_error && m_error != E_DEC_TRUNCATED; + } + }; + + + // String-to-number converters with automatic warning generation + class Converter_strntod_with_warn: public Converter_strntod + { + public: + Converter_strntod_with_warn(THD *thd, Warn_filter filter, + CHARSET_INFO *cs, + const char *str, size_t length) + :Converter_strntod(cs, str, length) + { + check_edom_and_truncation(thd, filter, "DOUBLE", cs, str, length); + } + }; + + class Converter_strntoll_with_warn: public Converter_strntoll + { + public: + Converter_strntoll_with_warn(THD *thd, Warn_filter filter, + CHARSET_INFO *cs, + const char *str, size_t length) + :Converter_strntoll(cs, str, length) + { + check_edom_and_truncation(thd, filter, "INTEGER", cs, str, length); + } + }; + + class Converter_strtoll10_with_warn: public Converter_strtoll10 + { + public: + Converter_strtoll10_with_warn(THD *thd, Warn_filter filter, + CHARSET_INFO *cs, + const char *str, size_t length) + :Converter_strtoll10(cs, str, length) + { + check_edom_and_truncation(thd, filter, "INTEGER", cs, str, length); + } + }; + + class Converter_str2my_decimal_with_warn: public Converter_str2my_decimal + { + public: + Converter_str2my_decimal_with_warn(THD *thd, Warn_filter filter, + uint mask, CHARSET_INFO *cs, + const char *str, size_t length, + my_decimal *buf) + :Converter_str2my_decimal(mask, cs, str, length, buf) + { + check_edom_and_truncation(thd, filter, "DECIMAL", cs, str, length); + } + }; + + + // String-to-number conversion methods for the old code compatibility + longlong longlong_from_string_with_check(CHARSET_INFO *cs, const char *cptr, + const char *end) const + { + /* + TODO: Give error if we wanted a signed integer and we got an unsigned + one + + Notice, longlong_from_string_with_check() honors thd->no_error, because + it's used to handle queries like this: + SELECT COUNT(@@basedir); + and is called when Item_func_get_system_var::update_null_value() + suppresses warnings and then calls val_int(). + The other methods {double|decimal}_from_string_with_check() ignore + thd->no_errors, because they are not used for update_null_value() + and they always allow all kind of warnings. + */ + THD *thd= current_thd; + return Converter_strtoll10_with_warn(thd, Warn_filter(thd), + cs, cptr, end - cptr).result(); + } + + double double_from_string_with_check(CHARSET_INFO *cs, const char *cptr, + const char *end) const + { + return Converter_strntod_with_warn(NULL, Warn_filter_all(), + cs, cptr, end - cptr).result(); + } + my_decimal *decimal_from_string_with_check(my_decimal *decimal_value, + CHARSET_INFO *cs, + const char *cptr, + const char *end) + { + Converter_str2my_decimal_with_warn(NULL, Warn_filter_all(), + E_DEC_FATAL_ERROR & ~E_DEC_BAD_NUM, + cs, cptr, end - cptr, decimal_value); + return decimal_value; + } + + longlong longlong_from_hex_hybrid(const char *str, size_t length) + { + const char *end= str + length; + const char *ptr= end - MY_MIN(length, sizeof(longlong)); + ulonglong value= 0; + for ( ; ptr != end ; ptr++) + value= (value << 8) + (ulonglong) (uchar) *ptr; + return (longlong) value; + } + + longlong longlong_from_string_with_check(const String *str) const + { + return longlong_from_string_with_check(str->charset(), + str->ptr(), str->end()); + } + double double_from_string_with_check(const String *str) const + { + return double_from_string_with_check(str->charset(), + str->ptr(), str->end()); + } + my_decimal *decimal_from_string_with_check(my_decimal *decimal_value, + const String *str) + { + return decimal_from_string_with_check(decimal_value, str->charset(), + str->ptr(), str->end()); + } + // End of String-to-number conversion methods + +public: + /* + The enumeration Subst_constraint is currently used only in implementations + of the virtual function subst_argument_checker. + */ + enum Subst_constraint + { + ANY_SUBST, /* Any substitution for a field is allowed */ + IDENTITY_SUBST /* Substitution for a field is allowed if any two + different values of the field type are not equal */ + }; + /* + Item context attributes. + Comparison functions pass their attributes to propagate_equal_fields(). + For example, for string comparison, the collation of the comparison + operation is important inside propagate_equal_fields(). + */ + class Context + { + /* + Which type of propagation is allowed: + - ANY_SUBST (loose equality, according to the collation), or + - IDENTITY_SUBST (strict binary equality). + */ + Subst_constraint m_subst_constraint; + /* + Comparison type. + Important only when ANY_SUBSTS. + */ + const Type_handler *m_compare_handler; + /* + Collation of the comparison operation. + Important only when ANY_SUBST. + */ + CHARSET_INFO *m_compare_collation; + public: + Context(Subst_constraint subst, const Type_handler *h, CHARSET_INFO *cs) + :m_subst_constraint(subst), + m_compare_handler(h), + m_compare_collation(cs) + { DBUG_ASSERT(h == h->type_handler_for_comparison()); } + Subst_constraint subst_constraint() const { return m_subst_constraint; } + const Type_handler *compare_type_handler() const + { + DBUG_ASSERT(m_subst_constraint == ANY_SUBST); + return m_compare_handler; + } + CHARSET_INFO *compare_collation() const + { + DBUG_ASSERT(m_subst_constraint == ANY_SUBST); + return m_compare_collation; + } + }; + class Context_identity: public Context + { // Use this to request only exact value, no invariants. + public: + Context_identity() + :Context(IDENTITY_SUBST, &type_handler_long_blob, &my_charset_bin) { } + }; + class Context_boolean: public Context + { // Use this when an item is [a part of] a boolean expression + public: + Context_boolean() + :Context(ANY_SUBST, &type_handler_slonglong, &my_charset_bin) { } + }; +}; + + +#define STORAGE_TYPE_MASK 7 +#define COLUMN_FORMAT_MASK 7 +#define COLUMN_FORMAT_SHIFT 3 + +/* The length of the header part for each virtual column in the .frm file */ +#define FRM_VCOL_OLD_HEADER_SIZE(b) (3 + MY_TEST(b)) +#define FRM_VCOL_NEW_BASE_SIZE 16 +#define FRM_VCOL_NEW_HEADER_SIZE 6 + +class Count_distinct_field; + +struct ha_field_option_struct; + +struct st_cache_field; +int field_conv(Field *to,Field *from); +int truncate_double(double *nr, uint field_length, decimal_digits_t dec, + bool unsigned_flag, double max_value); + +inline uint get_enum_pack_length(int elements) +{ + return elements < 256 ? 1 : 2; +} + +inline uint get_set_pack_length(int elements) +{ + uint len= (elements + 7) / 8; + return len > 4 ? 8 : len; +} + + +/** + Tests if field type is temporal and has date part, + i.e. represents DATE, DATETIME or TIMESTAMP types in SQL. + + @param type Field type, as returned by field->type(). + @retval true If field type is temporal type with date part. + @retval false If field type is not temporal type with date part. +*/ +inline bool is_temporal_type_with_date(enum_field_types type) +{ + switch (type) + { + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + return true; + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_TIMESTAMP2: + DBUG_ASSERT(0); // field->real_type() should not get to here. + return false; + default: + return false; + } +} + + +enum enum_vcol_info_type +{ + VCOL_GENERATED_VIRTUAL, VCOL_GENERATED_STORED, + VCOL_DEFAULT, VCOL_CHECK_FIELD, VCOL_CHECK_TABLE, + VCOL_USING_HASH, + /* Additional types should be added here */ + + VCOL_GENERATED_VIRTUAL_INDEXED, // this is never written in .frm + /* Following is the highest value last */ + VCOL_TYPE_NONE = 127 // Since the 0 value is already in use +}; + +static inline const char *vcol_type_name(enum_vcol_info_type type) +{ + switch (type) + { + case VCOL_GENERATED_VIRTUAL: + case VCOL_GENERATED_VIRTUAL_INDEXED: + case VCOL_GENERATED_STORED: + return "GENERATED ALWAYS AS"; + case VCOL_DEFAULT: + return "DEFAULT"; + case VCOL_CHECK_FIELD: + case VCOL_CHECK_TABLE: + return "CHECK"; + case VCOL_USING_HASH: + return "USING HASH"; + case VCOL_TYPE_NONE: + return "UNTYPED"; + } + return 0; +} + +/* + Flags for Virtual_column_info. If none is set, the expression must be + a constant with no side-effects, so it's calculated at CREATE TABLE time, + stored in table->record[2], and not recalculated for every statement. +*/ +#define VCOL_FIELD_REF 1 +#define VCOL_NON_DETERMINISTIC 2 +#define VCOL_SESSION_FUNC 4 /* uses session data, e.g. USER or DAYNAME */ +#define VCOL_TIME_FUNC 8 /* safe for SBR */ +#define VCOL_AUTO_INC 16 +#define VCOL_IMPOSSIBLE 32 +#define VCOL_NEXTVAL 64 /* NEXTVAL is not implemented for vcols */ + +#define VCOL_NOT_STRICTLY_DETERMINISTIC \ + (VCOL_NON_DETERMINISTIC | VCOL_TIME_FUNC | VCOL_SESSION_FUNC) + +/* + Virtual_column_info is the class to contain additional + characteristics that is specific for a virtual/computed + field such as: + - the defining expression that is evaluated to compute the value + of the field + - whether the field is to be stored in the database + - whether the field is used in a partitioning expression +*/ + +class Virtual_column_info: public Sql_alloc, + private Type_handler_hybrid_field_type +{ +private: + enum_vcol_info_type vcol_type; /* Virtual column expression type */ + /* + The following data is only updated by the parser and read + when a Create_field object is created/initialized. + */ + /* Flag indicating that the field used in a partitioning expression */ + bool in_partitioning_expr; + +public: + /* Flag indicating that the field is physically stored in the database */ + bool stored_in_db; + bool utf8; /* Already in utf8 */ + bool automatic_name; + bool if_not_exists; + Item *expr; + Lex_ident name; /* Name of constraint */ + /* see VCOL_* (VCOL_FIELD_REF, ...) */ + uint flags; + + Virtual_column_info() + :Type_handler_hybrid_field_type(&type_handler_null), + vcol_type((enum_vcol_info_type)VCOL_TYPE_NONE), + in_partitioning_expr(FALSE), stored_in_db(FALSE), + utf8(TRUE), automatic_name(FALSE), expr(NULL), flags(0) + { + name.str= NULL; + name.length= 0; + }; + Virtual_column_info* clone(THD *thd); + ~Virtual_column_info() = default; + enum_vcol_info_type get_vcol_type() const + { + return vcol_type; + } + void set_vcol_type(enum_vcol_info_type v_type) + { + vcol_type= v_type; + } + const char *get_vcol_type_name() const + { + DBUG_ASSERT(vcol_type != VCOL_TYPE_NONE); + return vcol_type_name(vcol_type); + } + void set_handler(const Type_handler *handler) + { + /* Calling this function can only be done once. */ + DBUG_ASSERT(type_handler() == &type_handler_null); + Type_handler_hybrid_field_type::set_handler(handler); + } + bool is_stored() const + { + return stored_in_db; + } + void set_stored_in_db_flag(bool stored) + { + stored_in_db= stored; + } + bool is_in_partitioning_expr() const + { + return in_partitioning_expr; + } + void mark_as_in_partitioning_expr() + { + in_partitioning_expr= TRUE; + } + bool need_refix() const + { + return flags & VCOL_SESSION_FUNC; + } + bool fix_expr(THD *thd); + bool fix_session_expr(THD *thd); + bool cleanup_session_expr(); + bool fix_and_check_expr(THD *thd, TABLE *table); + inline bool is_equal(const Virtual_column_info* vcol) const; + inline void print(String*); +}; + +class Binlog_type_info +{ +public: + enum binlog_sign_t + { + SIGN_SIGNED, + SIGN_UNSIGNED, + SIGN_NOT_APPLICABLE // for non-numeric types + }; + /** + Retrieve the field metadata for fields. + */ + CHARSET_INFO *m_cs; // NULL if not relevant + TYPELIB *m_enum_typelib; // NULL if not relevant + TYPELIB *m_set_typelib; // NULL if not relevant + binlog_sign_t m_signedness; + uint16 m_metadata; + uint8 m_metadata_size; + uchar m_type_code; // according to Field::binlog_type() + uchar m_geom_type; // Non-geometry fields can return 0 + + Binlog_type_info(uchar type_code, + uint16 metadata, + uint8 metadata_size) + :m_cs(NULL), + m_enum_typelib(NULL), + m_set_typelib(NULL), + m_signedness(SIGN_NOT_APPLICABLE), + m_metadata(metadata), + m_metadata_size(metadata_size), + m_type_code(type_code), + m_geom_type(0) + {}; + Binlog_type_info(uchar type_code, uint16 metadata, + uint8 metadata_size, + binlog_sign_t signedness) + : m_cs(NULL), + m_enum_typelib(NULL), + m_set_typelib(NULL), + m_signedness(signedness), + m_metadata(metadata), + m_metadata_size(metadata_size), + m_type_code(type_code), + m_geom_type(0) + {}; + Binlog_type_info(uchar type_code, uint16 metadata, + uint8 metadata_size, CHARSET_INFO *cs) + :m_cs(cs), + m_enum_typelib(NULL), + m_set_typelib(NULL), + m_signedness(SIGN_NOT_APPLICABLE), + m_metadata(metadata), + m_metadata_size(metadata_size), + m_type_code(type_code), + m_geom_type(0) + {}; + Binlog_type_info(uchar type_code, uint16 metadata, + uint8 metadata_size, + CHARSET_INFO *cs, + TYPELIB *t_enum, TYPELIB *t_set) + :m_cs(cs), + m_enum_typelib(t_enum), + m_set_typelib(t_set), + m_signedness(SIGN_NOT_APPLICABLE), + m_metadata(metadata), + m_metadata_size(metadata_size), + m_type_code(type_code), + m_geom_type(0) + {}; + Binlog_type_info(uchar type_code, uint16 metadata, + uint8 metadata_size, CHARSET_INFO *cs, + uchar geom_type) + :m_cs(cs), + m_enum_typelib(NULL), + m_set_typelib(NULL), + m_signedness(SIGN_NOT_APPLICABLE), + m_metadata(metadata), + m_metadata_size(metadata_size), + m_type_code(type_code), + m_geom_type(geom_type) + {}; + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () + { return alloc_root(mem_root, size); } +}; + + +class Binlog_type_info_fixed_string: public Binlog_type_info +{ +public: + Binlog_type_info_fixed_string(uchar type_code, + uint32 octet_length, + CHARSET_INFO *cs); +}; + + +class Field: public Value_source +{ + Field(const Item &); /* Prevent use of these */ + void operator=(Field &); +protected: + int save_in_field_str(Field *to) + { + StringBuffer result(charset()); + val_str(&result); + return to->store(result.ptr(), result.length(), charset()); + } + void error_generated_column_function_is_not_allowed(THD *thd, bool error) + const; + static void do_field_eq(Copy_field *copy); + static void do_field_int(Copy_field *copy); + static void do_field_real(Copy_field *copy); + static void do_field_string(Copy_field *copy); + static void do_field_date(Copy_field *copy); + static void do_field_temporal(Copy_field *copy, date_mode_t fuzzydate); + static void do_field_datetime(Copy_field *copy); + static void do_field_timestamp(Copy_field *copy); + static void do_field_decimal(Copy_field *copy); +public: + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () + { return alloc_root(mem_root, size); } + static void *operator new(size_t size) throw () + { + DBUG_ASSERT(size < UINT_MAX32); + return thd_alloc(current_thd, (uint) size); + } + static void operator delete(void *ptr_arg, size_t size) { TRASH_FREE(ptr_arg, size); } + static void operator delete(void *ptr, MEM_ROOT *mem_root) + { DBUG_ASSERT(0); } + + bool marked_for_read() const; + bool marked_for_write_or_computed() const; + + /** + Used by System Versioning. + */ + virtual void set_max() + { DBUG_ASSERT(0); } + virtual bool is_max() + { DBUG_ASSERT(0); return false; } + + uchar *ptr; // Position to field in record + + /** + Byte where the @c NULL bit is stored inside a record. If this Field is a + @c NOT @c NULL field, this member is @c NULL. + */ + uchar *null_ptr; + /* + Note that you can use table->in_use as replacement for current_thd member + only inside of val_*() and store() members (e.g. you can't use it in cons) + */ + TABLE *table; // Pointer for table + TABLE *orig_table; // Pointer to original table + const char * const *table_name; // Pointer to alias in TABLE + LEX_CSTRING field_name; + LEX_CSTRING comment; + /** reference to the list of options or NULL */ + engine_option_value *option_list; + ha_field_option_struct *option_struct; /* structure with parsed options */ + /* Field is part of the following keys */ + key_map key_start, part_of_key, part_of_key_not_clustered; + + /* + Bitmap of indexes that have records ordered by col1, ... this_field, ... + + For example, INDEX (col(prefix_n)) is not present in col.part_of_sortkey. + */ + key_map part_of_sortkey; + /* + We use three additional unireg types for TIMESTAMP to overcome limitation + of current binary format of .frm file. We'd like to be able to support + NOW() as default and on update value for such fields but unable to hold + this info anywhere except unireg_check field. This issue will be resolved + in more clean way with transition to new text based .frm format. + See also comment for Field_timestamp::Field_timestamp(). + */ + enum __attribute__((packed)) utype { + NONE=0, + NEXT_NUMBER=15, // AUTO_INCREMENT + TIMESTAMP_OLD_FIELD=18, // TIMESTAMP created before 4.1.3 + TIMESTAMP_DN_FIELD=21, // TIMESTAMP DEFAULT NOW() + TIMESTAMP_UN_FIELD=22, // TIMESTAMP ON UPDATE NOW() + TIMESTAMP_DNUN_FIELD=23, // TIMESTAMP DEFAULT NOW() ON UPDATE NOW() + TMYSQL_COMPRESSED= 24, // Compatibility with TMySQL + }; + enum imagetype { itRAW, itMBR}; + + utype unireg_check; + field_visibility_t invisible; + uint32 field_length; // Length of field + uint32 flags; + field_index_t field_index; // field number in fields array + uchar null_bit; // Bit used to test null bit + + /** + If true, this field was created in create_tmp_field_from_item from a NULL + value. This means that the type of the field is just a guess, and the type + may be freely coerced to another type. + + @see create_tmp_field_from_item + @see Item_type_holder::get_real_type + + */ + bool is_created_from_null_item; + + /* + Selectivity of the range condition over this field. + When calculating this selectivity a range predicate + is taken into account only if: + - it is extracted from the WHERE clause + - it depends only on the table the field belongs to + */ + double cond_selectivity; + + /* + The next field in the class of equal fields at the top AND level + of the WHERE clause + */ + Field *next_equal_field; + + /* + This structure is used for statistical data on the column + that has been read from the statistical table column_stat + */ + Column_statistics *read_stats; + /* + This structure is used for statistical data on the column that + is collected by the function collect_statistics_for_table + */ + Column_statistics_collected *collected_stats; + + /* + This is additional data provided for any computed(virtual) field, + default function or check constraint. + In particular it includes a pointer to the item by which this field + can be computed from other fields. + */ + Virtual_column_info *vcol_info, *check_constraint, *default_value; + + Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg); + virtual ~Field() = default; + + virtual Type_numeric_attributes type_numeric_attributes() const + { + return Type_numeric_attributes(field_length, decimals(), is_unsigned()); + } + Type_std_attributes type_std_attributes() const + { + return Type_std_attributes(type_numeric_attributes(), dtcollation()); + } + + bool is_unsigned() const { return flags & UNSIGNED_FLAG; } + + bool check_assignability_from(const Type_handler *from, bool ignore) const; + bool check_assignability_from(const Field *from, bool ignore) const + { + return check_assignability_from(from->type_handler(), ignore); + } + + /** + Convenience definition of a copy function returned by + Field::get_copy_func() + */ + typedef void Copy_func(Copy_field*); + virtual Copy_func *get_copy_func(const Field *from) const= 0; + virtual Copy_func *get_copy_func_to(const Field *to) const + { + return to->get_copy_func(this); + } + /* Store functions returns 1 on overflow and -1 on fatal error */ + virtual int store_field(Field *from) { return from->save_in_field(this); } + virtual int save_in_field(Field *to)= 0; + /** + Check if it is possible just copy the value + of the field 'from' to the field 'this', e.g. for + INSERT INTO t1 (field1) SELECT field2 FROM t2; + @param from - The field to copy from + @retval true - it is possible to just copy value of 'from' to 'this' + @retval false - conversion is needed + */ + virtual bool memcpy_field_possible(const Field *from) const= 0; + virtual bool make_empty_rec_store_default_value(THD *thd, Item *item); + virtual void make_empty_rec_reset(THD *thd) + { + reset(); + } + virtual int store(const char *to, size_t length,CHARSET_INFO *cs)=0; + /* + This is used by engines like CSV and Federated to signal the field + that the data is going to be in text (rather than binary) representation, + even if cs points to &my_charset_bin. + + If a Field distinguishes between text and binary formats (e.g. INET6), + we cannot call store(str,length,&my_charset_bin), + to avoid "field" mis-interpreting the data format as binary. + */ + virtual int store_text(const char *to, size_t length, CHARSET_INFO *cs) + { + return store(to, length, cs); + } + virtual int store_binary(const char *to, size_t length) + { + return store(to, length, &my_charset_bin); + } + virtual int store_hex_hybrid(const char *str, size_t length); + virtual int store(double nr)=0; + virtual int store(longlong nr, bool unsigned_val)=0; + virtual int store_decimal(const my_decimal *d)=0; + virtual int store_time_dec(const MYSQL_TIME *ltime, uint dec); + virtual int store_timestamp_dec(const timeval &ts, uint dec); + int store_timestamp(my_time_t timestamp, ulong sec_part) + { + return store_timestamp_dec(Timeval(timestamp, sec_part), + TIME_SECOND_PART_DIGITS); + } + /** + Store a value represented in native format + */ + virtual int store_native(const Native &value) + { + DBUG_ASSERT(0); + reset(); + return 0; + } + int store_time(const MYSQL_TIME *ltime) + { return store_time_dec(ltime, TIME_SECOND_PART_DIGITS); } + int store(const char *to, size_t length, CHARSET_INFO *cs, + enum_check_fields check_level); + int store_text(const char *to, size_t length, CHARSET_INFO *cs, + enum_check_fields check_level); + int store(const LEX_STRING *ls, CHARSET_INFO *cs) + { + DBUG_ASSERT(ls->length < UINT_MAX32); + return store(ls->str, (uint) ls->length, cs); + } + int store(const LEX_CSTRING *ls, CHARSET_INFO *cs) + { + DBUG_ASSERT(ls->length < UINT_MAX32); + return store(ls->str, (uint) ls->length, cs); + } + int store(const LEX_CSTRING &ls, CHARSET_INFO *cs) + { + DBUG_ASSERT(ls.length < UINT_MAX32); + return store(ls.str, (uint) ls.length, cs); + } + + /* + @brief + Store minimum/maximum value of a column in the statistics table. + @param + field statistical table field + str value buffer + */ + virtual int store_to_statistical_minmax_field(Field *field, String *str); + + /* + @brief + Store minimum/maximum value of a column from the statistical table. + @param + field statistical table field + str value buffer + */ + virtual int store_from_statistical_minmax_field(Field *field, String *str, + MEM_ROOT *mem); + +#ifdef HAVE_MEM_CHECK + /** + Mark unused memory in the field as defined. Mainly used to ensure + that if we write full field to disk (for example in + Count_distinct_field::add(), we don't write unitalized data to + disk which would confuse valgrind or MSAN. + */ + virtual void mark_unused_memory_as_defined() {} +#else + void mark_unused_memory_as_defined() {} +#endif + + virtual double val_real()=0; + virtual longlong val_int()=0; + /* + Get ulonglong representation. + Negative values are truncated to 0. + */ + virtual ulonglong val_uint(void) + { + longlong nr= val_int(); + return nr < 0 ? 0 : (ulonglong) nr; + } + virtual bool val_bool()= 0; + virtual my_decimal *val_decimal(my_decimal *)=0; + inline String *val_str(String *str) { return val_str(str, str); } + /* + val_str(buf1, buf2) gets two buffers and should use them as follows: + if it needs a temp buffer to convert result to string - use buf1 + example Field_tiny::val_str() + if the value exists as a string already - use buf2 + example Field_string::val_str() + consequently, buf2 may be created as 'String buf;' - no memory + will be allocated for it. buf1 will be allocated to hold a + value if it's too small. Using allocated buffer for buf2 may result in + an unnecessary free (and later, may be an alloc). + This trickery is used to decrease a number of malloc calls. + */ + virtual String *val_str(String*,String *)=0; + virtual bool val_native(Native *to) + { + DBUG_ASSERT(!is_null()); + return to->copy((const char *) ptr, pack_length()); + } + String *val_int_as_str(String *val_buffer, bool unsigned_flag); + /* + Return the field value as a LEX_CSTRING, without padding to full length + (MODE_PAD_CHAR_TO_FULL_LENGTH is temporarily suppressed during the call). + + In case of an empty value, to[0] is assigned to empty_clex_string, + memory is not allocated. + In case of a non-empty value, the memory is allocated on mem_root. + In case of a memory allocation failure, to[0] is assigned to {NULL,0}. + + @param [IN] mem_root store non-empty values here + @param [OUT to return the string here + @retval false (success) + @retval true (EOM) + */ + bool val_str_nopad(MEM_ROOT *mem_root, LEX_CSTRING *to); + fast_field_copier get_fast_field_copier(const Field *from); + /* + str_needs_quotes() returns TRUE if the value returned by val_str() needs + to be quoted when used in constructing an SQL query. + */ + virtual bool str_needs_quotes() const { return false; } + const Type_handler *type_handler_for_comparison() const + { + return type_handler()->type_handler_for_comparison(); + } + Item_result result_type () const + { + return type_handler()->result_type(); + } + Item_result cmp_type () const + { + return type_handler()->cmp_type(); + } + virtual bool eq(Field *field) + { + return (ptr == field->ptr && null_ptr == field->null_ptr && + null_bit == field->null_bit && field->type() == type()); + } + virtual bool eq_def(const Field *field) const; + + /* + pack_length() returns size (in bytes) used to store field data in memory + (i.e. it returns the maximum size of the field in a row of the table, + which is located in RAM). + */ + virtual uint32 pack_length() const { return (uint32) field_length; } + + /* + pack_length_in_rec() returns size (in bytes) used to store field data on + storage (i.e. it returns the maximal size of the field in a row of the + table, which is located on disk). + */ + virtual uint32 pack_length_in_rec() const { return pack_length(); } + virtual bool compatible_field_size(uint metadata, const Relay_log_info *rli, + uint16 mflags, int *order) const; + virtual uint pack_length_from_metadata(uint field_metadata) const + { + DBUG_ENTER("Field::pack_length_from_metadata"); + DBUG_RETURN(field_metadata); + } + virtual uint row_pack_length() const { return 0; } + + /* + data_length() return the "real size" of the data in memory. + */ + virtual uint32 data_length() { return pack_length(); } + virtual uint32 sort_length() const { return pack_length(); } + + /* + sort_suffix_length() return the length bytes needed to store the length + for binary charset + */ + virtual uint32 sort_suffix_length() const { return 0; } + + /* + Get the number bytes occupied by the value in the field. + CHAR values are stripped of trailing spaces. + Flexible values are stripped of their length. + */ + virtual uint32 value_length() + { + uint len; + if (!zero_pack() && + (type() == MYSQL_TYPE_STRING && + (len= pack_length()) >= 4 && len < 256)) + { + uchar *str, *end; + for (str= ptr, end= str+len; end > str && end[-1] == ' '; end--) {} + len=(uint) (end-str); + return len; + } + return data_length(); + } + + /** + Get the maximum size of the data in packed format. + + @return Maximum data length of the field when packed using the + Field::pack() function. + */ + virtual uint32 max_data_length() const { + return pack_length(); + }; + + virtual int reset() { bzero(ptr,pack_length()); return 0; } + virtual void reset_fields() {} + const uchar *ptr_in_record(const uchar *record) const + { + my_ptrdiff_t l_offset= (my_ptrdiff_t) (ptr - table->record[0]); + DBUG_ASSERT(l_offset >= 0 && table->s->rec_buff_length - l_offset > 0); + return record + l_offset; + } + virtual int set_default(); + + bool has_update_default_function() const + { + return flags & ON_UPDATE_NOW_FLAG; + } + bool has_default_now_unireg_check() const + { + return unireg_check == TIMESTAMP_DN_FIELD + || unireg_check == TIMESTAMP_DNUN_FIELD; + } + + /* + Mark the field as having a value supplied by the client, thus it should + not be auto-updated. + */ + void set_has_explicit_value() + { + bitmap_set_bit(&table->has_value_set, field_index); + } + bool has_explicit_value() + { + return bitmap_is_set(&table->has_value_set, field_index); + } + void clear_has_explicit_value() + { + bitmap_clear_bit(&table->has_value_set, field_index); + } + + virtual my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const + { DBUG_ASSERT(0); return 0; } + my_time_t get_timestamp(ulong *sec_part) const + { + return get_timestamp(ptr, sec_part); + } + + virtual bool binary() const { return 1; } + virtual bool zero_pack() const { return 1; } + virtual enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; } + virtual uint16 key_part_flag() const { return 0; } + virtual uint16 key_part_length_bytes() const { return 0; } + virtual uint32 key_length() const { return pack_length(); } + virtual const Type_handler *type_handler() const = 0; + virtual enum_field_types type() const + { + return type_handler()->field_type(); + } + virtual enum_field_types real_type() const + { + return type_handler()->real_field_type(); + } + virtual enum_field_types binlog_type() const + { + /* + Binlog stores field->type() as type code by default. For example, + it puts MYSQL_TYPE_STRING in case of CHAR, VARCHAR, SET and ENUM, + with extra data type details put into metadata. + + Binlog behaviour slightly differs between various MySQL and MariaDB + versions for the temporal data types TIME, DATETIME and TIMESTAMP. + + MySQL prior to 5.6 uses MYSQL_TYPE_TIME, MYSQL_TYPE_DATETIME + and MYSQL_TYPE_TIMESTAMP type codes in binlog and stores no + additional metadata. + + MariaDB-5.3 implements new versions for TIME, DATATIME, TIMESTAMP + with fractional second precision, but uses the old format for the + types TIME(0), DATETIME(0), TIMESTAMP(0), and it still stores + MYSQL_TYPE_TIME, MYSQL_TYPE_DATETIME and MYSQL_TYPE_TIMESTAMP in binlog, + with no additional metadata. + So row-based replication between temporal data types of + different precision is not possible in MariaDB. + + MySQL-5.6 also implements a new version of TIME, DATETIME, TIMESTAMP + which support fractional second precision 0..6, and use the new + format even for the types TIME(0), DATETIME(0), TIMESTAMP(0). + For these new data types, MySQL-5.6 stores new type codes + MYSQL_TYPE_TIME2, MYSQL_TYPE_DATETIME2, MYSQL_TYPE_TIMESTAMP2 in binlog, + with fractional precision 0..6 put into metadata. + This makes it in theory possible to do row-based replication between + columns of different fractional precision (e.g. from TIME(1) on master + to TIME(6) on slave). However, it's not currently fully implemented yet. + MySQL-5.6 can only do row-based replication from the old types + TIME, DATETIME, TIMESTAMP (represented by MYSQL_TYPE_TIME, + MYSQL_TYPE_DATETIME and MYSQL_TYPE_TIMESTAMP type codes in binlog) + to the new corresponding types TIME(0), DATETIME(0), TIMESTAMP(0). + + Note: MariaDB starting from the version 10.0 understands the new + MySQL-5.6 type codes MYSQL_TYPE_TIME2, MYSQL_TYPE_DATETIME2, + MYSQL_TYPE_TIMESTAMP2. When started over MySQL-5.6 tables both on + master and on slave, MariaDB-10.0 can also do row-based replication + from the old types TIME, DATETIME, TIMESTAMP to the new MySQL-5.6 + types TIME(0), DATETIME(0), TIMESTAMP(0). + + Note: perhaps binlog should eventually be modified to store + real_type() instead of type() for all column types. + */ + return type(); + } + virtual Binlog_type_info binlog_type_info() const + { + DBUG_ASSERT(Field::type() == binlog_type()); + return Binlog_type_info(Field::type(), 0, 0); + } + virtual en_fieldtype tmp_engine_column_type(bool use_packed_rows) const + { + return FIELD_NORMAL; + } + /* + Conversion type for from the source to the current field. + */ + virtual enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) + const= 0; + enum_conv_type rpl_conv_type_from_same_data_type(uint16 metadata, + const Relay_log_info *rli, + const Conv_param ¶m) + const; + inline int cmp(const uchar *str) const { return cmp(ptr,str); } + /* + The following method is used for comparing prefix keys. + Currently it's only used in partitioning. + */ + virtual int cmp_prefix(const uchar *a, const uchar *b, + size_t prefix_char_len) const + { return cmp(a, b); } + virtual int cmp(const uchar *,const uchar *) const=0; + virtual int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U) const + { return memcmp(a,b,pack_length()); } + virtual int cmp_offset(my_ptrdiff_t row_offset) + { return cmp(ptr,ptr+row_offset); } + virtual int cmp_binary_offset(uint row_offset) + { return cmp_binary(ptr, ptr+row_offset); }; + virtual int key_cmp(const uchar *a,const uchar *b) const + { return cmp(a, b); } + virtual int key_cmp(const uchar *str, uint length) const + { return cmp(ptr,str); } + /* + Update the value m of the 'min_val' field with the current value v + of this field if force_update is set to TRUE or if v < m. + Return TRUE if the value has been updated. + */ + virtual bool update_min(Field *min_val, bool force_update) + { + bool update_fl= force_update || cmp(ptr, min_val->ptr) < 0; + if (update_fl) + { + min_val->set_notnull(); + memcpy(min_val->ptr, ptr, pack_length()); + } + return update_fl; + } + /* + Update the value m of the 'max_val' field with the current value v + of this field if force_update is set to TRUE or if v > m. + Return TRUE if the value has been updated. + */ + virtual bool update_max(Field *max_val, bool force_update) + { + bool update_fl= force_update || cmp(ptr, max_val->ptr) > 0; + if (update_fl) + { + max_val->set_notnull(); + memcpy(max_val->ptr, ptr, pack_length()); + } + return update_fl; + } + virtual void store_field_value(uchar *val, uint len) + { + memcpy(ptr, val, len); + } + virtual decimal_digits_t decimals() const { return 0; } + virtual Information_schema_numeric_attributes + information_schema_numeric_attributes() const + { + return Information_schema_numeric_attributes(); + } + virtual Information_schema_character_attributes + information_schema_character_attributes() const + { + return Information_schema_character_attributes(); + } + virtual void update_data_type_statistics(Data_type_statistics *st) const + { } + /* + Caller beware: sql_type can change str.Ptr, so check + ptr() to see if it changed if you are using your own buffer + in str and restore it with set() if needed + */ + virtual void sql_type(String &str) const =0; + virtual void sql_rpl_type(String *str) const { sql_type(*str); } + virtual uint size_of() const =0; // For new field + inline bool is_null(my_ptrdiff_t row_offset= 0) const + { + /* + The table may have been marked as containing only NULL values + for all fields if it is a NULL-complemented row of an OUTER JOIN + or if the query is an implicitly grouped query (has aggregate + functions but no GROUP BY clause) with no qualifying rows. If + this is the case (in which TABLE::null_row is true), the field + is considered to be NULL. + + Note that if a table->null_row is set then also all null_bits are + set for the row. + + In the case of the 'result_field' for GROUP BY, table->null_row might + refer to the *next* row in the table (when the algorithm is: read the + next row, see if any of group column values have changed, send the + result - grouped - row to the client if yes). So, table->null_row might + be wrong, but such a result_field is always nullable (that's defined by + original_field->maybe_null()) and we trust its null bit. + */ + return null_ptr ? null_ptr[row_offset] & null_bit : table->null_row; + } + inline bool is_real_null(my_ptrdiff_t row_offset= 0) const + { return null_ptr && (null_ptr[row_offset] & null_bit); } + inline bool is_null_in_record(const uchar *record) const + { + if (maybe_null_in_table()) + return record[(uint) (null_ptr - table->record[0])] & null_bit; + return 0; + } + inline void set_null(my_ptrdiff_t row_offset= 0) + { if (null_ptr) null_ptr[row_offset]|= null_bit; } + inline void set_notnull(my_ptrdiff_t row_offset= 0) + { if (null_ptr) null_ptr[row_offset]&= (uchar) ~null_bit; } + inline bool maybe_null(void) const + { return null_ptr != 0 || table->maybe_null; } + // Set to NULL on LOAD DATA or LOAD XML + virtual bool load_data_set_null(THD *thd); + // Reset when a LOAD DATA file ended unexpectedly + virtual bool load_data_set_no_data(THD *thd, bool fixed_format); + void load_data_set_value(const char *pos, uint length, CHARSET_INFO *cs); + + /* @return true if this field is NULL-able (even if temporarily) */ + inline bool real_maybe_null() const { return null_ptr != 0; } + uint null_offset(const uchar *record) const + { return (uint) (null_ptr - record); } + /* + For a NULL-able field (that can actually store a NULL value in a table) + null_ptr points to the "null bitmap" in the table->record[0] header. For + NOT NULL fields it is either 0 or points outside table->record[0] into the + table->triggers->extra_null_bitmap (so that the field can store a NULL + value temporarily, only in memory) + */ + bool maybe_null_in_table() const + { return null_ptr >= table->record[0] && null_ptr <= ptr; } + + uint null_offset() const + { return null_offset(table->record[0]); } + void set_null_ptr(uchar *p_null_ptr, uint p_null_bit) + { + null_ptr= p_null_ptr; + null_bit= static_cast(p_null_bit); + } + + bool stored_in_db() const { return !vcol_info || vcol_info->stored_in_db; } + bool check_vcol_sql_mode_dependency(THD *, vcol_init_mode mode) const; + + virtual sql_mode_t value_depends_on_sql_mode() const + { + return 0; + } + virtual sql_mode_t conversion_depends_on_sql_mode(THD *thd, + Item *expr) const + { + return (sql_mode_t) 0; + } + virtual sql_mode_t can_handle_sql_mode_dependency_on_store() const + { + return 0; + } + + inline THD *get_thd() const + { return likely(table) ? table->in_use : current_thd; } + + enum { + LAST_NULL_BYTE_UNDEF= 0 + }; + + /* + Find the position of the last null byte for the field. + + SYNOPSIS + last_null_byte() + + DESCRIPTION + Return a pointer to the last byte of the null bytes where the + field conceptually is placed. + + RETURN VALUE + The position of the last null byte relative to the beginning of + the record. If the field does not use any bits of the null + bytes, the value 0 (LAST_NULL_BYTE_UNDEF) is returned. + */ + size_t last_null_byte() const { + size_t bytes= do_last_null_byte(); + DBUG_PRINT("debug", ("last_null_byte() ==> %ld", (long) bytes)); + DBUG_ASSERT(bytes <= table->s->null_bytes); + return bytes; + } + + /* + Create mem-comparable sort key part for a sort key + */ + void make_sort_key_part(uchar *buff, uint length); + + /* + create a compact sort key which can be compared with a comparison + function. They are called packed sort keys + */ + virtual uint make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field); + + virtual void make_send_field(Send_field *); + + /* + Some implementations actually may write up to 8 bytes regardless of what + size was requested. This is due to the minimum value of the system variable + max_sort_length. + */ + + virtual void sort_string(uchar *buff,uint length)=0; + virtual bool optimize_range(uint idx, uint part) const; + virtual void free() {} + virtual Field *make_new_field(MEM_ROOT *root, TABLE *new_table, + bool keep_type); + virtual Field *new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit); + Field *create_tmp_field(MEM_ROOT *root, TABLE *new_table, + bool maybe_null_arg); + Field *create_tmp_field(MEM_ROOT *root, TABLE *new_table) + { + return create_tmp_field(root, new_table, maybe_null()); + } + Field *clone(MEM_ROOT *mem_root, TABLE *new_table); + Field *clone(MEM_ROOT *mem_root, TABLE *new_table, my_ptrdiff_t diff); + inline void move_field(uchar *ptr_arg,uchar *null_ptr_arg,uchar null_bit_arg) + { + ptr=ptr_arg; null_ptr=null_ptr_arg; null_bit=null_bit_arg; + } + inline void move_field(uchar *ptr_arg) { ptr=ptr_arg; } + inline uchar *record_ptr() // record[0] or wherever the field was moved to + { + my_ptrdiff_t offset= table->s->field[field_index]->ptr - table->s->default_values; + return ptr - offset; + } + virtual void move_field_offset(my_ptrdiff_t ptr_diff) + { + ptr=ADD_TO_PTR(ptr,ptr_diff, uchar*); + if (null_ptr) + null_ptr=ADD_TO_PTR(null_ptr,ptr_diff,uchar*); + } + + /* + Copy the Field's value to buff. The value will be in table->record[] + format. + */ + void get_image(uchar *buff, uint length, CHARSET_INFO *cs) const + { get_image(buff, length, ptr, cs); } + virtual void get_image(uchar *buff, uint length, + const uchar *ptr_arg, CHARSET_INFO *cs) const + { memcpy(buff,ptr_arg,length); } + + /* + Set Field's value to the value in *buf. + */ + virtual void set_image(const uchar *buff,uint length, CHARSET_INFO *cs) + { memcpy(ptr,buff,length); } + + + /* + Copy a field part into an output buffer. + + SYNOPSIS + Field::get_key_image() + buff [out] output buffer + length output buffer size + type itMBR for geometry blobs, otherwise itRAW + + DESCRIPTION + This function makes a copy of field part of size equal to or + less than "length" parameter value. + For fields of string types (CHAR, VARCHAR, TEXT) the rest of buffer + is padded by zero byte. + + NOTES + For variable length character fields (i.e. UTF-8) the "length" + parameter means a number of output buffer bytes as if all field + characters have maximal possible size (mbmaxlen). In the other words, + "length" parameter is a number of characters multiplied by + field_charset->mbmaxlen. + + RETURN + Number of copied bytes (excluding padded zero bytes -- see above). + */ + + uint get_key_image(uchar *buff, uint length, imagetype type_arg) const + { return get_key_image(buff, length, ptr, type_arg); } + virtual uint get_key_image(uchar *buff, uint length, const uchar *ptr_arg, imagetype type_arg) const + { + get_image(buff, length, ptr_arg, &my_charset_bin); + return length; + } + virtual void set_key_image(const uchar *buff,uint length) + { set_image(buff,length, &my_charset_bin); } + inline longlong val_int_offset(uint row_offset) + { + ptr+=row_offset; + longlong tmp=val_int(); + ptr-=row_offset; + return tmp; + } + inline longlong val_int(const uchar *new_ptr) + { + uchar *old_ptr= ptr; + longlong return_value; + ptr= (uchar*) new_ptr; + return_value= val_int(); + ptr= old_ptr; + return return_value; + } + inline String *val_str(String *str, const uchar *new_ptr) + { + uchar *old_ptr= ptr; + ptr= (uchar*) new_ptr; + val_str(str); + ptr= old_ptr; + return str; + } + virtual bool send(Protocol *protocol); + + virtual uchar *pack(uchar *to, const uchar *from, uint max_length); + /** + @overload Field::pack(uchar*, const uchar*, uint, bool) + */ + uchar *pack(uchar *to, const uchar *from) + { + DBUG_ENTER("Field::pack"); + uchar *result= this->pack(to, from, UINT_MAX); + DBUG_RETURN(result); + } + + virtual const uchar *unpack(uchar* to, const uchar *from, + const uchar *from_end, uint param_data=0); + + virtual uint packed_col_length(const uchar *to, uint length) + { return length;} + virtual uint max_packed_col_length(uint max_length) + { return max_length;} + virtual bool is_packable() const { return false; } + + uint offset(const uchar *record) const + { + return (uint) (ptr - record); + } + void copy_from_tmp(int offset); + uint fill_cache_field(struct st_cache_field *copy); + virtual bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate); + virtual longlong val_datetime_packed(THD *thd); + virtual longlong val_time_packed(THD *thd); + virtual const TYPELIB *get_typelib() const { return NULL; } + virtual CHARSET_INFO *charset() const= 0; + /* returns TRUE if the new charset differs. */ + virtual void change_charset(const DTCollation &new_cs) {} + virtual const DTCollation &dtcollation() const= 0; + virtual CHARSET_INFO *charset_for_protocol(void) const + { return binary() ? &my_charset_bin : charset(); } + virtual CHARSET_INFO *sort_charset(void) const { return charset(); } + virtual bool has_charset(void) const { return FALSE; } + virtual int set_time() { return 1; } + bool set_warning(Sql_condition::enum_warning_level, unsigned int code, + int cuted_increment, ulong current_row=0) const; + virtual void print_key_value(String *out, uint32 length); + void print_key_part_value(String *out, const uchar *key, uint32 length); + void print_key_value_binary(String *out, const uchar* key, uint32 length); + void raise_note_cannot_use_key_part(THD *thd, uint keynr, uint part, + const LEX_CSTRING &op, + Item *value, + const Data_type_compatibility reason) + const; + void raise_note_key_become_unused(THD *thd, const String &expr) const; +protected: + bool set_warning(unsigned int code, int cuted_increment) const + { + return set_warning(Sql_condition::WARN_LEVEL_WARN, code, cuted_increment); + } + bool set_note(unsigned int code, int cuted_increment) const + { + return set_warning(Sql_condition::WARN_LEVEL_NOTE, code, cuted_increment); + } + void set_datetime_warning(Sql_condition::enum_warning_level, uint code, + const ErrConv *str, const char *typestr, + int cuted_increment) const; + void set_datetime_warning(uint code, + const ErrConv *str, const char *typestr, + int cuted_increment) const + { + set_datetime_warning(Sql_condition::WARN_LEVEL_WARN, code, str, typestr, + cuted_increment); + } + void set_warning_truncated_wrong_value(const char *type, const char *value); + inline bool check_overflow(int op_result) + { + return (op_result == E_DEC_OVERFLOW); + } + int warn_if_overflow(int op_result); + Copy_func *get_identical_copy_func() const; + bool cmp_is_done_using_type_handler_of_this(const Item_bool_func *cond, + const Item *item) const; + Data_type_compatibility can_optimize_scalar_range( + const RANGE_OPT_PARAM *param, + const KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, + Item *value) const; + uchar *make_key_image(MEM_ROOT *mem_root, const KEY_PART *key_part); + SEL_ARG *get_mm_leaf_int(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value, + bool unsigned_field); + /* + Make a leaf tree for the cases when the value was stored + to the field exactly, without any truncation, rounding or adjustments. + For example, if we stored an INT value into an INT column, + and value->save_in_field_no_warnings() returned 0, + we know that the value was stored exactly. + */ + SEL_ARG *stored_field_make_mm_leaf_exact(RANGE_OPT_PARAM *param, + KEY_PART *key_part, + scalar_comparison_op op, + Item *value); + /* + Make a leaf tree for the cases when we don't know if + the value was stored to the field without any data loss, + or was modified to a smaller or a greater value. + Used for the data types whose methods Field::store*() + silently adjust the value. This is the most typical case. + */ + SEL_ARG *stored_field_make_mm_leaf(RANGE_OPT_PARAM *param, + KEY_PART *key_part, + scalar_comparison_op op, Item *value); + /* + Make a leaf tree when an INT value was stored into a field of INT type, + and some truncation happened. Tries to adjust the range search condition + when possible, e.g. "tinytint < 300" -> "tinyint <= 127". + Can also return SEL_ARG_IMPOSSIBLE(), and NULL (not sargable). + */ + SEL_ARG *stored_field_make_mm_leaf_bounded_int(RANGE_OPT_PARAM *param, + KEY_PART *key_part, + scalar_comparison_op op, + Item *value, + bool unsigned_field); + /* + Make a leaf tree when some truncation happened during + value->save_in_field_no_warning(this), and we cannot yet adjust the range + search condition for the current combination of the field and the value + data types. + Returns SEL_ARG_IMPOSSIBLE() for "=" and "<=>". + Returns NULL (not sargable) for other comparison operations. + */ + SEL_ARG *stored_field_make_mm_leaf_truncated(RANGE_OPT_PARAM *prm, + scalar_comparison_op, + Item *value); +public: + void set_table_name(String *alias) + { + table_name= &alias->Ptr; + } + void init(TABLE *table_arg) + { + orig_table= table= table_arg; + set_table_name(&table_arg->alias); + } + virtual void init_for_tmp_table(Field *org_field, TABLE *new_table) + { + init(new_table); + orig_table= org_field->orig_table; + vcol_info= 0; + cond_selectivity= 1.0; + next_equal_field= NULL; + option_list= NULL; + option_struct= NULL; + if (org_field->type() == MYSQL_TYPE_VAR_STRING || + org_field->type() == MYSQL_TYPE_VARCHAR) + new_table->s->db_create_options|= HA_OPTION_PACK_RECORD; + } + void init_for_make_new_field(TABLE *new_table_arg, TABLE *orig_table_arg) + { + init(new_table_arg); + /* + Normally orig_table is different from table only if field was + created via ::make_new_field. Here we alter the type of field, + so ::make_new_field is not applicable. But we still need to + preserve the original field metadata for the client-server + protocol. + */ + orig_table= orig_table_arg; + } + + /* maximum possible display length */ + virtual uint32 max_display_length() const= 0; + /** + Whether a field being created has the samle type. + Used by the ALTER TABLE + */ + virtual bool is_equal(const Column_definition &new_field) const= 0; + /* convert decimal to longlong with overflow check */ + longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag, + int *err); + /* + Maximum number of bytes in character representation. + - For string types it is equal to the field capacity, in bytes. + - For non-string types it represents the longest possible string length + after conversion to string. + */ + virtual uint32 character_octet_length() const + { + return field_length; + } + /* The max. number of characters */ + virtual uint32 char_length() const + { + return field_length / charset()->mbmaxlen; + } + + ha_storage_media field_storage_type() const + { + return (ha_storage_media) + ((flags >> FIELD_FLAGS_STORAGE_MEDIA) & 3); + } + + void set_storage_type(ha_storage_media storage_type_arg) + { + DBUG_ASSERT(field_storage_type() == HA_SM_DEFAULT); + flags |= static_cast(storage_type_arg) << + FIELD_FLAGS_STORAGE_MEDIA; + } + + column_format_type column_format() const + { + return (column_format_type) + ((flags >> FIELD_FLAGS_COLUMN_FORMAT) & 3); + } + + void set_column_format(column_format_type column_format_arg) + { + DBUG_ASSERT(column_format() == COLUMN_FORMAT_TYPE_DEFAULT); + flags |= static_cast(column_format_arg) << + FIELD_FLAGS_COLUMN_FORMAT; + } + + bool vers_sys_field() const + { + return flags & (VERS_ROW_START | VERS_ROW_END); + } + + bool vers_update_unversioned() const + { + return flags & VERS_UPDATE_UNVERSIONED_FLAG; + } + + /* + Validate a non-null field value stored in the given record + according to the current thread settings, e.g. sql_mode. + @param thd - the thread + @param record - the record to check in + */ + virtual bool validate_value_in_record(THD *thd, const uchar *record) const + { return false; } + bool validate_value_in_record_with_warn(THD *thd, const uchar *record); + key_map get_possible_keys(); + + /* Hash value */ + void hash(Hasher *hasher) + { + if (is_null()) + hasher->add_null(); + else + hash_not_null(hasher); + } + virtual void hash_not_null(Hasher *hasher); + + /** + Get the upper limit of the MySQL integral and floating-point type. + + @return maximum allowed value for the field + */ + virtual ulonglong get_max_int_value() const + { + DBUG_ASSERT(false); + return 0ULL; + } + +/** + Checks whether a string field is part of write_set. + + @return + FALSE - If field is not char/varchar/.... + - If field is char/varchar/.. and is not part of write set. + TRUE - If field is char/varchar/.. and is part of write set. +*/ + virtual bool is_varchar_and_in_write_set() const { return FALSE; } + + /* Check whether the field can be used as a join attribute in hash join */ + virtual bool hash_join_is_possible() { return TRUE; } + virtual bool eq_cmp_as_binary() { return TRUE; } + + /* Position of the field value within the interval of [min, max] */ + virtual double pos_in_interval(Field *min, Field *max) + { + return (double) 0.5; + } + virtual bool pos_through_val_str() { return false;} + + /* + Check if comparison between the field and an item unambiguously + identifies a distinct field value. + + Example1: SELECT * FROM t1 WHERE int_column=10; + This example returns distinct integer value of 10. + + Example2: SELECT * FROM t1 WHERE varchar_column=DATE'2001-01-01' + This example returns non-distinct values. + Comparison as DATE will return '2001-01-01' and '2001-01-01x', + but these two values are not equal to each other as VARCHARs. + See also the function with the same name in sql_select.cc. + */ + virtual bool test_if_equality_guarantees_uniqueness(const Item *const_item) + const; + virtual bool can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item); + virtual Item *get_equal_const_item(THD *thd, const Context &ctx, + Item *const_item) + { + return const_item; + } + virtual Data_type_compatibility can_optimize_keypart_ref( + const Item_bool_func *cond, + const Item *item) const; + virtual Data_type_compatibility can_optimize_hash_join( + const Item_bool_func *cond, + const Item *item) const + { + return can_optimize_keypart_ref(cond, item); + } + virtual Data_type_compatibility can_optimize_group_min_max( + const Item_bool_func *cond, + const Item *const_item) const; + /** + Test if Field can use range optimizer for a standard comparison operation: + <=, <, =, <=>, >, >= + Note, this method does not cover spatial operations. + */ + virtual Data_type_compatibility can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const; + + virtual SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value)= 0; + + Data_type_compatibility can_optimize_outer_join_table_elimination( + const Item_bool_func *cond, + const Item *item) const + { + // Exactly the same rules with REF access + return can_optimize_keypart_ref(cond, item); + } + + bool save_in_field_default_value(bool view_eror_processing); + bool save_in_field_ignore_value(bool view_error_processing); + + /* Mark field in read map. Updates also virtual fields */ + void register_field_in_read_map(); + + virtual Compression_method *compression_method() const { return 0; } + + virtual Virtual_tmp_table **virtual_tmp_table_addr() + { + return NULL; + } + virtual bool sp_prepare_and_store_item(THD *thd, Item **value); + + friend int cre_myisam(char * name, TABLE *form, uint options, + ulonglong auto_increment_value); + friend class Copy_field; + friend class Item_avg_field; + friend class Item_std_field; + friend class Item_sum_num; + friend class Item_sum_sum; + friend class Item_sum_count; + friend class Item_sum_avg; + friend class Item_sum_std; + friend class Item_sum_min; + friend class Item_sum_max; + friend class Item_func_group_concat; + +private: + /* + Primitive for implementing last_null_byte(). + + SYNOPSIS + do_last_null_byte() + + DESCRIPTION + Primitive for the implementation of the last_null_byte() + function. This represents the inheritance interface and can be + overridden by subclasses. + */ + virtual size_t do_last_null_byte() const; + +protected: + uchar *pack_int(uchar *to, const uchar *from, size_t size) + { + memcpy(to, from, size); + return to + size; + } + + const uchar *unpack_int(uchar* to, const uchar *from, + const uchar *from_end, size_t size) + { + if (from + size > from_end) + return 0; + memcpy(to, from, size); + return from + size; + } + + uchar *pack_int16(uchar *to, const uchar *from) + { return pack_int(to, from, 2); } + const uchar *unpack_int16(uchar* to, const uchar *from, const uchar *from_end) + { return unpack_int(to, from, from_end, 2); } + uchar *pack_int24(uchar *to, const uchar *from) + { return pack_int(to, from, 3); } + const uchar *unpack_int24(uchar* to, const uchar *from, const uchar *from_end) + { return unpack_int(to, from, from_end, 3); } + uchar *pack_int32(uchar *to, const uchar *from) + { return pack_int(to, from, 4); } + const uchar *unpack_int32(uchar* to, const uchar *from, const uchar *from_end) + { return unpack_int(to, from, from_end, 4); } + uchar *pack_int64(uchar* to, const uchar *from) + { return pack_int(to, from, 8); } + const uchar *unpack_int64(uchar* to, const uchar *from, const uchar *from_end) + { return unpack_int(to, from, from_end, 8); } + + double pos_in_interval_val_real(Field *min, Field *max); + double pos_in_interval_val_str(Field *min, Field *max, uint data_offset); +}; + + +class Field_num :public Field { +protected: + int check_edom_and_important_data_truncation(const char *type, bool edom, + CHARSET_INFO *cs, + const char *str, size_t length, + const char *end_of_num); + int check_edom_and_truncation(const char *type, bool edom, + CHARSET_INFO *cs, + const char *str, size_t length, + const char *end_of_num); + int check_int(CHARSET_INFO *cs, const char *str, size_t length, + const char *int_end, int error) + { + return check_edom_and_truncation("integer", + error == MY_ERRNO_EDOM || str == int_end, + cs, str, length, int_end); + } + bool get_int(CHARSET_INFO *cs, const char *from, size_t len, + longlong *rnd, ulonglong unsigned_max, + longlong signed_min, longlong signed_max); + void prepend_zeros(String *value) const; + Item *get_equal_zerofill_const_item(THD *thd, const Context &ctx, + Item *const_item); + Binlog_type_info::binlog_sign_t binlog_signedness() const + { + return (flags & UNSIGNED_FLAG) ? Binlog_type_info::SIGN_UNSIGNED : + Binlog_type_info::SIGN_SIGNED; + } + bool send_numeric_zerofill_str(Protocol_text *protocol, + protocol_send_type_t send_type); + +public: + const decimal_digits_t dec; + bool zerofill,unsigned_flag; // Purify cannot handle bit fields + Field_num(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool zero_arg, bool unsigned_arg); + CHARSET_INFO *charset() const override + { + return DTCollation_numeric::singleton().collation; + } + const DTCollation &dtcollation() const override + { + return DTCollation_numeric::singleton(); + } + sql_mode_t can_handle_sql_mode_dependency_on_store() const override; + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override + { + return (flags & ZEROFILL_FLAG) ? + get_equal_zerofill_const_item(thd, ctx, const_item) : + const_item; + } + void add_zerofill_and_unsigned(String &res) const; + friend class Create_field; + void make_send_field(Send_field *) override; + decimal_digits_t decimals() const override { return dec; } + uint size_of() const override { return sizeof(*this); } + bool eq_def(const Field *field) const override; + Copy_func *get_copy_func(const Field *from) const override + { + if (unsigned_flag && from->cmp_type() == DECIMAL_RESULT) + return do_field_decimal; + return do_field_int; + } + int save_in_field(Field *to) override + { + return to->store(val_int(), MY_TEST(flags & UNSIGNED_FLAG)); + } + bool is_equal(const Column_definition &new_field) const override; + uint row_pack_length() const override { return pack_length(); } + uint32 pack_length_from_metadata(uint field_metadata) const override + { + uint32 length= pack_length(); + DBUG_PRINT("result", ("pack_length_from_metadata(%d): %u", + field_metadata, length)); + return length; + } + double pos_in_interval(Field *min, Field *max) override + { + return pos_in_interval_val_real(min, max); + } + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override; + Binlog_type_info binlog_type_info() const override + { + DBUG_ASSERT(Field_num::type() == binlog_type()); + return Binlog_type_info(Field_num::type(), 0, 0, binlog_signedness()); + } +}; + + +class Field_str :public Field { +protected: + DTCollation m_collation; + // A short alias for m_collation.collation with non-virtual linkage + const CHARSET_INFO *field_charset() const { return m_collation.collation; } + uint mbmaxlen() const { return m_collation.collation->mbmaxlen; } +public: + bool can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item_equal) override; + Field_str(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + const DTCollation &collation); + decimal_digits_t decimals() const override + { return is_created_from_null_item ? 0 : NOT_FIXED_DEC; } + int save_in_field(Field *to) override { return save_in_field_str(to); } + bool memcpy_field_possible(const Field *from) const override + { + return real_type() == from->real_type() && + pack_length() == from->pack_length() && + charset() == from->charset(); + } + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_decimal(const my_decimal *) override; + int store(const char *to,size_t length,CHARSET_INFO *cs) override=0; + int store_hex_hybrid(const char *str, size_t length) override + { + return store(str, length, &my_charset_bin); + } + CHARSET_INFO *charset() const override { return m_collation.collation; } + const DTCollation &dtcollation() const override + { + return m_collation; + } + void change_charset(const DTCollation &new_cs) override; + bool binary() const override { return field_charset() == &my_charset_bin; } + uint32 max_display_length() const override { return field_length; } + uint32 character_octet_length() const override { return field_length; } + uint32 char_length() const override + { + return field_length / mbmaxlen(); + } + Information_schema_character_attributes + information_schema_character_attributes() const override + { + return Information_schema_character_attributes(max_display_length(), + char_length()); + } + friend class Create_field; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override { return val_real() != 0e0; } + bool str_needs_quotes() const override { return true; } + bool eq_cmp_as_binary() override { return MY_TEST(flags & BINARY_FLAG); } + virtual uint length_size() const { return 0; } + double pos_in_interval(Field *min, Field *max) override + { + return pos_in_interval_val_str(min, max, length_size()); + } + bool pos_through_val_str() override {return true;} + + bool test_if_equality_guarantees_uniqueness(const Item *const_item) const + override; + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override; + Binlog_type_info binlog_type_info() const override + { + DBUG_ASSERT(Field_str::type() == binlog_type()); + return Binlog_type_info(Field_str::type(), 0, 0, charset()); + } +}; + +/* base class for Field_string, Field_varstring and Field_blob */ + +class Field_longstr :public Field_str +{ +protected: + int report_if_important_data(const char *ptr, const char *end, + bool count_spaces); + bool check_string_copy_error(const String_copier *copier, + const char *end, CHARSET_INFO *cs); + int check_conversion_status(const String_copier *copier, + const char *end, CHARSET_INFO *cs, + bool count_spaces) + { + if (check_string_copy_error(copier, end, cs)) + return 2; + return report_if_important_data(copier->source_end_pos(), + end, count_spaces); + } + int well_formed_copy_with_check(char *to, size_t to_length, + CHARSET_INFO *from_cs, + const char *from, size_t from_length, + size_t nchars, bool count_spaces, + uint *copy_length) + { + String_copier copier; + + *copy_length= copier.well_formed_copy(field_charset(), to, to_length, + from_cs, from, from_length, + nchars); + + return check_conversion_status(&copier, from + from_length, from_cs, count_spaces); + } + Data_type_compatibility cmp_to_string_with_same_collation( + const Item_bool_func *cond, + const Item *item) const; + Data_type_compatibility cmp_to_string_with_stricter_collation( + const Item_bool_func *cond, + const Item *item) const; + int compress(char *to, uint to_length, + const char *from, uint length, + uint max_length, + uint *out_length, + CHARSET_INFO *cs, size_t nchars); + String *uncompress(String *val_buffer, String *val_ptr, + const uchar *from, uint from_length) const; +public: + Field_longstr(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, collation) + {} + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + int store_decimal(const my_decimal *d) override; + uint32 max_data_length() const override; + void make_send_field(Send_field *) override; + bool send(Protocol *protocol) override; + + bool is_varchar_and_in_write_set() const override + { + DBUG_ASSERT(table && table->write_set); + return bitmap_is_set(table->write_set, field_index); + } + bool match_collation_to_optimize_range() const { return true; } + + Data_type_compatibility can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) + const override; + Data_type_compatibility can_optimize_hash_join(const Item_bool_func *cond, + const Item *item) + const override; + Data_type_compatibility can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) + const override; + Data_type_compatibility can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const override; + bool is_packable() const override { return true; } + uint make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field)override; + uchar* pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field); +}; + +/* base class for float and double and decimal (old one) */ +class Field_real :public Field_num { +protected: + double get_double(const char *str, size_t length, CHARSET_INFO *cs, int *err); +public: + bool not_fixed; + + Field_real(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool zero_arg, bool unsigned_arg) + :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, dec_arg, zero_arg, unsigned_arg), + not_fixed(dec_arg >= FLOATING_POINT_DECIMALS) + {} + Copy_func *get_copy_func(const Field *from) const override + { + return do_field_real; + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + Information_schema_numeric_attributes + information_schema_numeric_attributes() const override + { + return dec == NOT_FIXED_DEC ? + Information_schema_numeric_attributes(field_length) : + Information_schema_numeric_attributes(field_length, dec); + } + void sql_type(String &str) const override; + int save_in_field(Field *to) override { return to->store(val_real()); } + bool memcpy_field_possible(const Field *from) const override + { + /* + Cannot do memcpy from a longer field to a shorter field, + e.g. a DOUBLE(53,10) into a DOUBLE(10,10). + But it should be OK the other way around. + */ + return real_type() == from->real_type() && + pack_length() == from->pack_length() && + is_unsigned() <= from->is_unsigned() && + decimals() == from->decimals() && + field_length >= from->field_length; + } + int store_decimal(const my_decimal *dec) override + { return store(dec->to_double()); } + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override { return val_real() != 0e0; } + uint32 max_display_length() const override { return field_length; } + uint size_of() const override { return sizeof *this; } + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override; +}; + + +class Field_decimal final :public Field_real { +public: + Field_decimal(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool zero_arg,bool unsigned_arg) + :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, + dec_arg, zero_arg, unsigned_arg) + {} + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) + override; + const Type_handler *type_handler() const override + { return &type_handler_olddecimal; } + enum ha_base_keytype key_type() const override + { return zerofill ? HA_KEYTYPE_BINARY : HA_KEYTYPE_NUM; } + Information_schema_numeric_attributes + information_schema_numeric_attributes() const override + { + uint tmp= dec ? 2 : 1; // The sign and the decimal point + return Information_schema_numeric_attributes(field_length - tmp, dec); + } + Copy_func *get_copy_func(const Field *from) const override + { + return eq_def(from) ? get_identical_copy_func() : do_field_string; + } + int reset() override; + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + void overflow(bool negative); + bool zero_pack() const override { return false; } + void sql_type(String &str) const override; + uchar *pack(uchar* to, const uchar *from, uint max_length) override + { + return Field::pack(to, from, max_length); + } +}; + + +/* New decimal/numeric field which use fixed point arithmetic */ +class Field_new_decimal final :public Field_num { +public: + /* The maximum number of decimal digits can be stored */ + decimal_digits_t precision; + uint32 bin_size; + /* + Constructors take max_length of the field as a parameter - not the + precision as the number of decimal digits allowed. + So for example we need to count length from precision handling + CREATE TABLE ( DECIMAL(x,y)) + */ + Field_new_decimal(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool zero_arg, bool unsigned_arg); + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_BINARY; } + Copy_func *get_copy_func(const Field *from) const override + { + // if (from->real_type() == MYSQL_TYPE_BIT) // QQ: why? + // return do_field_int; + return do_field_decimal; + } + int save_in_field(Field *to) override + { + my_decimal tmp(ptr, precision, dec); + return to->store_decimal(&tmp); + } + bool memcpy_field_possible(const Field *from) const override + { + return real_type() == from->real_type() && + pack_length() == from->pack_length() && + is_unsigned() <= from->is_unsigned() && + decimals() == from->decimals() && + field_length == from->field_length; + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + int reset() override; + bool store_value(const my_decimal *decimal_value); + bool store_value(const my_decimal *decimal_value, int *native_error); + void set_value_on_overflow(my_decimal *decimal_value, bool sign); + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + int store_decimal(const my_decimal *) override; + double val_real() override + { + return my_decimal(ptr, precision, dec).to_double(); + } + longlong val_int() override + { + return my_decimal(ptr, precision, dec).to_longlong(unsigned_flag); + } + ulonglong val_uint() override + { + return (ulonglong) my_decimal(ptr, precision, dec).to_longlong(true); + } + my_decimal *val_decimal(my_decimal *) override; + String *val_str(String *val_buffer, String *) override + { + uint fixed_precision= zerofill ? precision : 0; + return my_decimal(ptr, precision, dec). + to_string(val_buffer, fixed_precision, dec, '0'); + } + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + my_decimal nr(ptr, precision, dec); + return decimal_to_datetime_with_warn(get_thd(), &nr, ltime, + fuzzydate, table->s, field_name.str); + } + bool val_bool() override + { + return my_decimal(ptr, precision, dec).to_bool(); + } + int cmp(const uchar *, const uchar *) const override; + void sort_string(uchar *buff, uint length) override; + bool zero_pack() const override { return false; } + void sql_type(String &str) const override; + uint32 max_display_length() const override { return field_length; } + Information_schema_numeric_attributes + information_schema_numeric_attributes() const override + { + return Information_schema_numeric_attributes(precision, dec); + } + uint size_of() const override { return sizeof *this; } + uint32 pack_length() const override { return bin_size; } + uint pack_length_from_metadata(uint field_metadata) const override; + uint row_pack_length() const override { return pack_length(); } + bool compatible_field_size(uint field_metadata, const Relay_log_info *rli, + uint16 mflags, int *order_var) const override; + bool is_equal(const Column_definition &new_field) const override; + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) override; + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override; + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_int :public Field_num +{ +protected: + String *val_str_from_long(String *val_buffer, uint max_char_length, + int radix, long nr); +public: + Field_int(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, bool zero_arg, bool unsigned_arg) + :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, 0, zero_arg, unsigned_arg) + {} + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + bool memcpy_field_possible(const Field *from) const override + { + return real_type() == from->real_type() && + pack_length() == from->pack_length() && + is_unsigned() == from->is_unsigned(); + } + int store_decimal(const my_decimal *) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override { return val_int() != 0; } + ulonglong val_uint() override + { + longlong nr= val_int(); + return nr < 0 && !unsigned_flag ? 0 : (ulonglong) nr; + } + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + virtual const Type_limits_int *type_limits_int() const= 0; + uint32 max_display_length() const override + { + return type_limits_int()->char_length(); + } + Type_numeric_attributes type_numeric_attributes() const override + { + /* + For integer data types, the user-specified length does not constrain the + supported range, so e.g. a column of the INT(1) data type supports the + full integer range anyway. + Choose the maximum from the user-specified length and the maximum + possible length determined by the data type capacity: + INT(1) -> 11 + INT(10) -> 11 + INT(40) -> 40 + */ + uint32 length1= max_display_length(); + uint32 length2= field_length; + return Type_numeric_attributes(MY_MAX(length1, length2), + decimals(), is_unsigned()); + } + Information_schema_numeric_attributes + information_schema_numeric_attributes() const override + { + uint32 prec= type_limits_int()->precision(); + return Information_schema_numeric_attributes(prec, 0); + } + void sql_type(String &str) const override; + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override + { + return get_mm_leaf_int(param, key_part, cond, op, value, unsigned_flag); + } +}; + + +class Field_tiny :public Field_int +{ + const Type_handler_general_purpose_int *type_handler_priv() const + { + if (is_unsigned()) + return &type_handler_utiny; + return &type_handler_stiny; + } +public: + Field_tiny(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + bool zero_arg, bool unsigned_arg) + :Field_int(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, unsigned_arg) + {} + const Type_handler *type_handler() const override + { return type_handler_priv(); } + enum ha_base_keytype key_type() const override + { return unsigned_flag ? HA_KEYTYPE_BINARY : HA_KEYTYPE_INT8; } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int reset() override { ptr[0]=0; return 0; } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 1; } + const Type_limits_int *type_limits_int() const override + { + return type_handler_priv()->type_limits_int(); + } + + uchar *pack(uchar* to, const uchar *from, uint max_length) override + { + *to= *from; + return to + 1; + } + + const uchar *unpack(uchar* to, const uchar *from, + const uchar *from_end, uint param_data) override + { + if (from == from_end) + return 0; + *to= *from; + return from + 1; + } + ulonglong get_max_int_value() const override + { + return unsigned_flag ? 0xFFULL : 0x7FULL; + } +}; + + +class Field_short final :public Field_int +{ + const Type_handler_general_purpose_int *type_handler_priv() const + { + if (is_unsigned()) + return &type_handler_ushort; + return &type_handler_sshort; + } +public: + Field_short(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + bool zero_arg, bool unsigned_arg) + :Field_int(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, unsigned_arg) + {} + Field_short(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + bool unsigned_arg) + :Field_int((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, + NONE, field_name_arg, 0, unsigned_arg) + {} + const Type_handler *type_handler() const override + { return type_handler_priv(); } + enum ha_base_keytype key_type() const override + { return unsigned_flag ? HA_KEYTYPE_USHORT_INT : HA_KEYTYPE_SHORT_INT;} + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int reset() override { ptr[0]=ptr[1]=0; return 0; } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 2; } + const Type_limits_int *type_limits_int() const override + { + return type_handler_priv()->type_limits_int(); + } + uchar *pack(uchar* to, const uchar *from, uint) override + { return pack_int16(to, from); } + + const uchar *unpack(uchar* to, const uchar *from, + const uchar *from_end, uint) override + { return unpack_int16(to, from, from_end); } + ulonglong get_max_int_value() const override + { + return unsigned_flag ? 0xFFFFULL : 0x7FFFULL; + } +}; + +class Field_medium final :public Field_int +{ + const Type_handler_general_purpose_int *type_handler_priv() const + { + if (is_unsigned()) + return &type_handler_uint24; + return &type_handler_sint24; + } +public: + Field_medium(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + bool zero_arg, bool unsigned_arg) + :Field_int(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, unsigned_arg) + {} + const Type_handler *type_handler() const override + { return type_handler_priv(); } + enum ha_base_keytype key_type() const override + { return unsigned_flag ? HA_KEYTYPE_UINT24 : HA_KEYTYPE_INT24; } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int reset() override { ptr[0]=ptr[1]=ptr[2]=0; return 0; } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 3; } + const Type_limits_int *type_limits_int() const override + { + return type_handler_priv()->type_limits_int(); + } + uchar *pack(uchar* to, const uchar *from, uint max_length) override + { + return Field::pack(to, from, max_length); + } + ulonglong get_max_int_value() const override + { + return unsigned_flag ? 0xFFFFFFULL : 0x7FFFFFULL; + } +}; + + +class Field_long final :public Field_int +{ + const Type_handler_general_purpose_int *type_handler_priv() const + { + if (is_unsigned()) + return &type_handler_ulong; + return &type_handler_slong; + } +public: + Field_long(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + bool zero_arg, bool unsigned_arg) + :Field_int(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, unsigned_arg) + {} + Field_long(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + bool unsigned_arg) + :Field_int((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, + NONE, field_name_arg, 0, unsigned_arg) + {} + const Type_handler *type_handler() const override + { return type_handler_priv(); } + enum ha_base_keytype key_type() const override + { return unsigned_flag ? HA_KEYTYPE_ULONG_INT : HA_KEYTYPE_LONG_INT; } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int reset() override { ptr[0]=ptr[1]=ptr[2]=ptr[3]=0; return 0; } + double val_real() override; + longlong val_int() override; + bool send(Protocol *protocol) override; + String *val_str(String *, String *) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 4; } + const Type_limits_int *type_limits_int() const override + { + return type_handler_priv()->type_limits_int(); + } + uchar *pack(uchar* to, const uchar *from, uint) override + { + return pack_int32(to, from); + } + const uchar *unpack(uchar* to, const uchar *from, + const uchar *from_end, uint) override + { + return unpack_int32(to, from, from_end); + } + ulonglong get_max_int_value() const override + { + return unsigned_flag ? 0xFFFFFFFFULL : 0x7FFFFFFFULL; + } +}; + + +class Field_longlong :public Field_int +{ + const Type_handler_general_purpose_int *type_handler_priv() const + { + if (is_unsigned()) + return &type_handler_ulonglong; + return &type_handler_slonglong; + } +public: + Field_longlong(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + bool zero_arg, bool unsigned_arg) + :Field_int(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, unsigned_arg) + {} + Field_longlong(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + bool unsigned_arg) + :Field_int((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, + NONE, field_name_arg, 0, unsigned_arg) + {} + const Type_handler *type_handler() const override + { return type_handler_priv(); } + enum ha_base_keytype key_type() const override + { return unsigned_flag ? HA_KEYTYPE_ULONGLONG : HA_KEYTYPE_LONGLONG; } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int reset() override + { + ptr[0]=ptr[1]=ptr[2]=ptr[3]=ptr[4]=ptr[5]=ptr[6]=ptr[7]=0; + return 0; + } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 8; } + const Type_limits_int *type_limits_int() const override + { + return type_handler_priv()->type_limits_int(); + } + uchar *pack(uchar* to, const uchar *from, uint) override + { + return pack_int64(to, from); + } + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint) override + { + return unpack_int64(to, from, from_end); + } + void set_max() override; + bool is_max() override; + ulonglong get_max_int_value() const override + { + return unsigned_flag ? 0xFFFFFFFFFFFFFFFFULL : 0x7FFFFFFFFFFFFFFFULL; + } +}; + + +class Field_vers_trx_id :public Field_longlong { + MYSQL_TIME cache; + ulonglong cached; +public: + Field_vers_trx_id(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, bool zero_arg, + bool unsigned_arg) + : Field_longlong(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, + unsigned_arg), + cached(0) + {} + const Type_handler *type_handler() const override + { return &type_handler_vers_trx_id; } + uint size_of() const override { return sizeof *this; } + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate, ulonglong trx_id); + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return get_date(ltime, fuzzydate, (ulonglong) val_int()); + } + bool test_if_equality_guarantees_uniqueness(const Item *item) const override; + Data_type_compatibility can_optimize_keypart_ref(const Item_bool_func *, + const Item *) + const override + { + return Data_type_compatibility::OK; + } + + Data_type_compatibility can_optimize_group_min_max(const Item_bool_func *, + const Item *) + const override + { + return Data_type_compatibility::OK; + } + Data_type_compatibility can_optimize_range(const Item_bool_func *, + const Item *, bool is_eq_func) + const override + { + return Data_type_compatibility::OK; + } + /* cmp_type() cannot be TIME_RESULT, because we want to compare this field against + integers. But in all other cases we treat it as TIME_RESULT! */ +}; + + +class Field_float final :public Field_real { +public: + Field_float(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg,bool zero_arg,bool unsigned_arg); + Field_float(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, decimal_digits_t dec_arg); + const Type_handler *type_handler() const override + { return &type_handler_float; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_FLOAT; } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int reset() override { bzero(ptr,sizeof(float)); return 0; } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff, uint length) override; + uint32 pack_length() const override { return sizeof(float); } + uint row_pack_length() const override { return pack_length(); } + ulonglong get_max_int_value() const override + { + /* + We use the maximum as per IEEE754-2008 standard, 2^24 + */ + return 0x1000000ULL; + } + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_double :public Field_real { + longlong val_int_from_real(bool want_unsigned_result); +public: + Field_double(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg,bool zero_arg,bool unsigned_arg); + Field_double(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, decimal_digits_t dec_arg); + Field_double(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg, bool not_fixed_arg); + void init_for_tmp_table(Field *org_field, TABLE *new_table) override + { + Field::init_for_tmp_table(org_field, new_table); + not_fixed= true; + } + const Type_handler *type_handler() const override + { return &type_handler_double; } + enum ha_base_keytype key_type() const override final { return HA_KEYTYPE_DOUBLE; } + int store(const char *to,size_t length,CHARSET_INFO *charset) override final; + int store(double nr) override final; + int store(longlong nr, bool unsigned_val) override final; + int reset() override final { bzero(ptr,sizeof(double)); return 0; } + double val_real() override final; + longlong val_int() override final { return val_int_from_real(false); } + ulonglong val_uint() override final { return (ulonglong) val_int_from_real(true); } + String *val_str(String *, String *) override final; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override final; + void sort_string(uchar *buff, uint length) override final; + uint32 pack_length() const override final { return sizeof(double); } + uint row_pack_length() const override final { return pack_length(); } + ulonglong get_max_int_value() const override final + { + /* + We use the maximum as per IEEE754-2008 standard, 2^53 + */ + return 0x20000000000000ULL; + } + Binlog_type_info binlog_type_info() const override final; +}; + + +/* Everything saved in this will disappear. It will always return NULL */ + +class Field_null :public Field_str { + static uchar null[1]; +public: + Field_null(uchar *ptr_arg, uint32 len_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field_str(ptr_arg, len_arg, null, 1, + unireg_check_arg, field_name_arg, collation) + {} + const Type_handler *type_handler() const override + { return &type_handler_null; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + Information_schema_character_attributes + information_schema_character_attributes() const override + { + return Information_schema_character_attributes(); + } + Copy_func *get_copy_func(const Field *from) const override + { + return do_field_string; + } + int store(const char *to, size_t length, CHARSET_INFO *cs) override final + { null[0]=1; return 0; } + int store(double nr) override final { null[0]=1; return 0; } + int store(longlong nr, bool unsigned_val) override final { null[0]=1; return 0; } + int store_decimal(const my_decimal *d) override final { null[0]=1; return 0; } + int reset() override final { return 0; } + double val_real() override final { return 0.0;} + longlong val_int() override final { return 0;} + bool val_bool() override final { return false; } + my_decimal *val_decimal(my_decimal *) override final { return 0; } + String *val_str(String *value,String *value2) override final + { value2->length(0); return value2;} + bool is_equal(const Column_definition &new_field) const override final; + int cmp(const uchar *a, const uchar *b) const override final { return 0;} + void sort_string(uchar *buff, uint length) override final {} + uint32 pack_length() const override final { return 0; } + void sql_type(String &str) const override final; + uint size_of() const override final { return sizeof *this; } + uint32 max_display_length() const override final { return 4; } + void move_field_offset(my_ptrdiff_t ptr_diff) override final {} + Data_type_compatibility can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) + const override final + { + return Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; + } + Data_type_compatibility can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) + const override final + { + return Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; + } +}; + + +class Field_temporal :public Field { +protected: + Item *get_equal_const_item_datetime(THD *thd, const Context &ctx, + Item *const_item); + void set_warnings(Sql_condition::enum_warning_level trunc_level, + const ErrConv *str, int was_cut, const char *typestr); + int store_TIME_return_code_with_warnings(int warn, const ErrConv *str, + const char *typestr) + { + if (!MYSQL_TIME_WARN_HAVE_WARNINGS(warn) && + MYSQL_TIME_WARN_HAVE_NOTES(warn)) + { + set_warnings(Sql_condition::WARN_LEVEL_NOTE, str, + warn | MYSQL_TIME_WARN_TRUNCATED, typestr); + return 3; + } + set_warnings(Sql_condition::WARN_LEVEL_WARN, str, warn, typestr); + return warn ? 2 : 0; + } + int store_invalid_with_warning(const ErrConv *str, int was_cut, + const char *typestr) + { + DBUG_ASSERT(was_cut); + reset(); + Sql_condition::enum_warning_level level= Sql_condition::WARN_LEVEL_WARN; + if (was_cut & MYSQL_TIME_WARN_ZERO_DATE) + { + set_warnings(level, str, MYSQL_TIME_WARN_OUT_OF_RANGE, typestr); + return 2; + } + set_warnings(level, str, MYSQL_TIME_WARN_TRUNCATED, typestr); + return 1; + } + void sql_type_comment(String &str, + const Name &name, + const Name &comment) const; + void sql_type_dec_comment(String &str, + const Name &name, uint dec, + const Name &comment) const; + void sql_type_opt_dec_comment(String &str, + const Name &name, uint dec, + const Name &comment) const + { + if (dec) + sql_type_dec_comment(str, name, dec, comment); + else + sql_type_comment(str, name, comment); + } + static const Name &type_version_mysql56(); +public: + Field_temporal(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg) + { flags|= BINARY_FLAG; } + int store_hex_hybrid(const char *str, size_t length) override + { + return store(str, length, &my_charset_bin); + } + sql_mode_t can_handle_sql_mode_dependency_on_store() const override; + Copy_func *get_copy_func(const Field *from) const override; + int save_in_field(Field *to) override + { + MYSQL_TIME ltime; + // For temporal types no truncation needed. Rounding mode is not important. + if (get_date(<ime, TIME_CONV_NONE | TIME_FRAC_NONE)) + return to->reset(); + return to->store_time_dec(<ime, decimals()); + } + bool memcpy_field_possible(const Field *from) const override; + uint32 max_display_length() const override { return field_length; } + bool str_needs_quotes() const override { return true; } + CHARSET_INFO *charset() const override + { + return DTCollation_numeric::singleton().collation; + } + const DTCollation &dtcollation() const override + { + return DTCollation_numeric::singleton(); + } + CHARSET_INFO *sort_charset() const override { return &my_charset_bin; } + bool binary() const override { return true; } + bool val_bool() override { return val_real() != 0e0; } + bool is_equal(const Column_definition &new_field) const override; + bool eq_def(const Field *field) const override + { + return (Field::eq_def(field) && decimals() == field->decimals()); + } + my_decimal *val_decimal(my_decimal*) override; + double pos_in_interval(Field *min, Field *max) override + { + return pos_in_interval_val_real(min, max); + } + Data_type_compatibility can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) + const override; + Data_type_compatibility can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) + const override; + Data_type_compatibility can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const override + { + return Data_type_compatibility::OK; + } + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override; +}; + + +/** + Abstract class for: + - DATE + - DATETIME + - DATETIME(1..6) + - DATETIME(0..6) - MySQL56 version +*/ +class Field_temporal_with_date :public Field_temporal { +protected: + virtual void store_TIME(const MYSQL_TIME *ltime) = 0; + void store_datetime(const Datetime &dt) + { + return store_TIME(dt.get_mysql_time()); + } + virtual bool get_TIME(MYSQL_TIME *ltime, const uchar *pos, + date_mode_t fuzzydate) const = 0; + bool validate_MMDD(bool not_zero_date, uint month, uint day, + date_mode_t fuzzydate) const + { + if (!not_zero_date) + return bool(fuzzydate & TIME_NO_ZERO_DATE); + if (!month || !day) + return bool(fuzzydate & TIME_NO_ZERO_IN_DATE); + return false; + } +public: + Field_temporal_with_date(uchar *ptr_arg, uint32 len_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_temporal(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) + {} + bool validate_value_in_record(THD *thd, const uchar *record) const; +}; + + +class Field_timestamp :public Field_temporal { +protected: + int store_TIME_with_warning(THD *, const Datetime *, + const ErrConv *, int warn); + virtual void store_TIMEVAL(const timeval &tv)= 0; + void store_TIMESTAMP(const Timestamp &ts) + { + store_TIMEVAL(ts.tv()); + } + int zero_time_stored_return_code_with_warning(); +public: + Field_timestamp(uchar *ptr_arg, uint32 len_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share); + const Type_handler *type_handler() const override + { return &type_handler_timestamp; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + Copy_func *get_copy_func(const Field *from) const override; + sql_mode_t conversion_depends_on_sql_mode(THD *, Item *) const override; + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + int store_decimal(const my_decimal *) override; + int store_timestamp_dec(const timeval &ts, uint dec) override; + int save_in_field(Field *to) override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool zero_pack() const override { return false; } + /* + This method is used by storage/perfschema and + Item_func_now_local::save_in_field(). + */ + void store_TIME(my_time_t ts, ulong sec_part) + { + int warn; + time_round_mode_t mode= Datetime::default_round_mode(get_thd()); + store_TIMESTAMP(Timestamp(ts, sec_part).round(decimals(), mode, &warn)); + } + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + int store_native(const Native &value) override; + bool validate_value_in_record(THD *thd, const uchar *record) const override; + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override + { + return get_equal_const_item_datetime(thd, ctx, const_item); + } + bool load_data_set_null(THD *thd) override; + bool load_data_set_no_data(THD *thd, bool fixed_format) override; +}; + + +class Field_timestamp0 :public Field_timestamp +{ + void store_TIMEVAL(const timeval &tv) override + { + int4store(ptr, tv.tv_sec); + } +public: + Field_timestamp0(uchar *ptr_arg, uint32 len_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share) + :Field_timestamp(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, share) + { } + enum ha_base_keytype key_type() const override + { return HA_KEYTYPE_ULONG_INT; } + void sql_type(String &str) const override + { + sql_type_comment(str, Field_timestamp0::type_handler()->name(), + Type_handler::version_mariadb53()); + } + double val_real() override + { + return (double) Field_timestamp0::val_int(); + } + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 4; } + int set_time() override; + /* Get TIMESTAMP field value as seconds since begging of Unix Epoch */ + my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const override; + bool val_native(Native *to) override; + uchar *pack(uchar *to, const uchar *from, uint) override + { + return pack_int32(to, from); + } + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint) override + { + return unpack_int32(to, from, from_end); + } + uint size_of() const override { return sizeof *this; } +}; + + +/** + Abstract class for: + - TIMESTAMP(1..6) + - TIMESTAMP(0..6) - MySQL56 version +*/ +class Field_timestamp_with_dec :public Field_timestamp { +protected: + decimal_digits_t dec; +public: + Field_timestamp_with_dec(uchar *ptr_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, decimal_digits_t dec_arg) : + Field_timestamp(ptr_arg, + MAX_DATETIME_WIDTH + dec_arg + MY_TEST(dec_arg), + null_ptr_arg, + null_bit_arg, unireg_check_arg, field_name_arg, share), + dec(dec_arg) + { + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + } + decimal_digits_t decimals() const override { return dec; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_BINARY; } + uchar *pack(uchar *to, const uchar *from, uint max_length) override + { return Field::pack(to, from, max_length); } + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) override + { return Field::unpack(to, from, from_end, param_data); } + void make_send_field(Send_field *field) override; + void sort_string(uchar *to, uint length) override + { + DBUG_ASSERT(length == pack_length()); + memcpy(to, ptr, length); + } + bool send(Protocol *protocol) override; + double val_real() override; + my_decimal* val_decimal(my_decimal*) override; + int set_time() override; +}; + + +class Field_timestamp_hires :public Field_timestamp_with_dec { + uint sec_part_bytes(uint dec) const + { + return Type_handler_timestamp::sec_part_bytes(dec); + } + void store_TIMEVAL(const timeval &tv) override; +public: + Field_timestamp_hires(uchar *ptr_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, decimal_digits_t dec_arg) : + Field_timestamp_with_dec(ptr_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, share, dec_arg) + { + DBUG_ASSERT(dec); + } + void sql_type(String &str) const override + { + sql_type_dec_comment(str, Field_timestamp_hires::type_handler()->name(), + dec, Type_handler::version_mariadb53()); + } + bool val_native(Native *to) override; + my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const override; + int cmp(const uchar *,const uchar *) const override; + uint32 pack_length() const override { return 4 + sec_part_bytes(dec); } + uint size_of() const override { return sizeof *this; } +}; + + +/** + TIMESTAMP(0..6) - MySQL56 version +*/ +class Field_timestampf :public Field_timestamp_with_dec { + void store_TIMEVAL(const timeval &tv) override; +public: + Field_timestampf(uchar *ptr_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, decimal_digits_t dec_arg) : + Field_timestamp_with_dec(ptr_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, share, dec_arg) + {} + const Type_handler *type_handler() const override + { return &type_handler_timestamp2; } + enum_field_types binlog_type() const override + { return MYSQL_TYPE_TIMESTAMP2; } + void sql_type(String &str) const override + { + sql_type_opt_dec_comment(str, Field_timestampf::type_handler()->name(), + dec, type_version_mysql56()); + + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + uint32 pack_length() const override + { + return my_timestamp_binary_length(dec); + } + uint row_pack_length() const override { return pack_length(); } + uint pack_length_from_metadata(uint field_metadata) const override + { + DBUG_ENTER("Field_timestampf::pack_length_from_metadata"); + uint tmp= my_timestamp_binary_length(field_metadata); + DBUG_RETURN(tmp); + } + int cmp(const uchar *a_ptr,const uchar *b_ptr) const override + { + return memcmp(a_ptr, b_ptr, pack_length()); + } + void set_max() override; + bool is_max() override; + my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const override; + bool val_native(Native *to) override; + uint size_of() const override { return sizeof *this; } + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_year final :public Field_tiny { +public: + Field_year(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg) + :Field_tiny(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, 1, 1) + {} + const Type_handler *type_handler() const override + { + return field_length == 2 ? &type_handler_year2 : &type_handler_year; + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + Copy_func *get_copy_func(const Field *from) const override + { + if (eq_def(from)) + return get_identical_copy_func(); + switch (from->cmp_type()) { + case STRING_RESULT: + { + const Type_handler *handler= from->type_handler(); + if (handler == &type_handler_enum || handler == &type_handler_set) + return do_field_int; + return do_field_string; + } + case TIME_RESULT: + return do_field_date; + case DECIMAL_RESULT: + return do_field_decimal; + case REAL_RESULT: + return do_field_real; + case INT_RESULT: + break; + case ROW_RESULT: + default: + DBUG_ASSERT(0); + break; + } + return do_field_int; + } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool send(Protocol *protocol) override; + Information_schema_numeric_attributes + information_schema_numeric_attributes() const override + { + return Information_schema_numeric_attributes(); + } + uint32 max_display_length() const override { return field_length; } + void sql_type(String &str) const override; +}; + + +class Field_date_common :public Field_temporal_with_date +{ +protected: + int store_TIME_with_warning(const Datetime *ltime, const ErrConv *str, + int was_cut); +public: + Field_date_common(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_temporal_with_date(ptr_arg, MAX_DATE_WIDTH, + null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) + {} + Copy_func *get_copy_func(const Field *from) const override; + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override; + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + int store_decimal(const my_decimal *) override; +}; + + +class Field_date final :public Field_date_common +{ + void store_TIME(const MYSQL_TIME *ltime) override; + bool get_TIME(MYSQL_TIME *ltime, const uchar *pos, date_mode_t fuzzydate) + const override; +public: + Field_date(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg) + :Field_date_common(ptr_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) {} + const Type_handler *type_handler() const override + { return &type_handler_date; } + enum ha_base_keytype key_type() const override + { return HA_KEYTYPE_ULONG_INT; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + int reset() override { ptr[0]=ptr[1]=ptr[2]=ptr[3]=0; return 0; } + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return Field_date::get_TIME(ltime, ptr, fuzzydate); } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 4; } + void sql_type(String &str) const override; + uchar *pack(uchar* to, const uchar *from, uint) override + { + return pack_int32(to, from); + } + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint) override + { + return unpack_int32(to, from, from_end); + } + uint size_of() const override { return sizeof *this; } +}; + + +class Field_newdate final :public Field_date_common +{ + void store_TIME(const MYSQL_TIME *ltime) override; + bool get_TIME(MYSQL_TIME *ltime, const uchar *pos, date_mode_t fuzzydate) + const override; +public: + Field_newdate(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg) + :Field_date_common(ptr_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) + {} + const Type_handler *type_handler() const override + { return &type_handler_newdate; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_UINT24; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + int reset() override { ptr[0]=ptr[1]=ptr[2]=0; return 0; } + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 3; } + void sql_type(String &str) const override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return Field_newdate::get_TIME(ltime, ptr, fuzzydate); } + longlong val_datetime_packed(THD *thd) override; + uint size_of() const override { return sizeof *this; } + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override; +}; + + +class Field_time :public Field_temporal { + /* + when this Field_time instance is used for storing values for index lookups + (see class store_key, Field::new_key_field(), etc), the following + might be set to TO_DAYS(CURDATE()). See also Field_time::store_time_dec() + */ + long curdays; +protected: + virtual void store_TIME(const MYSQL_TIME *ltime)= 0; + void store_TIME(const Time &t) { return store_TIME(t.get_mysql_time()); } + int store_TIME_with_warning(const Time *ltime, const ErrConv *str, int warn); + bool check_zero_in_date_with_warn(date_mode_t fuzzydate); + static void do_field_time(Copy_field *copy); +public: + Field_time(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_temporal(ptr_arg, length_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg), curdays(0) + {} + bool can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item_equal) override; + const Type_handler *type_handler() const override + { return &type_handler_time; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + Copy_func *get_copy_func(const Field *from) const override + { + return from->cmp_type() == REAL_RESULT ? do_field_string : // MDEV-9344 + from->type() == MYSQL_TYPE_YEAR ? do_field_int : + from->type() == MYSQL_TYPE_BIT ? do_field_int : + eq_def(from) ? get_identical_copy_func() : + do_field_time; + } + bool memcpy_field_possible(const Field *from) const override + { + return real_type() == from->real_type() && + decimals() == from->decimals(); + } + sql_mode_t conversion_depends_on_sql_mode(THD *, Item *) const override; + int store_native(const Native &value) override; + bool val_native(Native *to) override; + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_decimal(const my_decimal *) override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + void set_curdays(THD *thd); + Field *new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) override; + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override; +}; + + +class Field_time0 final :public Field_time +{ +protected: + void store_TIME(const MYSQL_TIME *ltime) override; +public: + Field_time0(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_time(ptr_arg, length_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) + { } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_INT24; } + void sql_type(String &str) const override + { + sql_type_comment(str, Field_time0::type_handler()->name(), + Type_handler::version_mariadb53()); + } + double val_real() override; + longlong val_int() override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 3; } + uint size_of() const override { return sizeof *this; } +}; + + +/** + Abstract class for: + - TIME(1..6) + - TIME(0..6) - MySQL56 version +*/ +class Field_time_with_dec :public Field_time { +protected: + decimal_digits_t dec; +public: + Field_time_with_dec(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg) + :Field_time(ptr_arg, MIN_TIME_WIDTH + dec_arg + MY_TEST(dec_arg), + null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg), + dec(dec_arg) + { + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + } + decimal_digits_t decimals() const override { return dec; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_BINARY; } + longlong val_int() override; + double val_real() override; + void make_send_field(Send_field *) override; +}; + + +/** + TIME(1..6) +*/ +class Field_time_hires final :public Field_time_with_dec { + longlong zero_point; + void store_TIME(const MYSQL_TIME *) override; +public: + Field_time_hires(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg) + :Field_time_with_dec(ptr_arg, null_ptr_arg, + null_bit_arg, unireg_check_arg, field_name_arg, + dec_arg) + { + DBUG_ASSERT(dec); + zero_point= sec_part_shift( + ((TIME_MAX_VALUE_SECONDS+1LL)*TIME_SECOND_PART_FACTOR), dec); + } + void sql_type(String &str) const override + { + sql_type_dec_comment(str, Field_time_hires::type_handler()->name(), + dec, Type_handler::version_mariadb53()); + } + int reset() override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override + { return Type_handler_time::hires_bytes(dec); } + uint size_of() const override { return sizeof *this; } +}; + + +/** + TIME(0..6) - MySQL56 version +*/ +class Field_timef final :public Field_time_with_dec { + void store_TIME(const MYSQL_TIME *ltime) override; +public: + Field_timef(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + decimal_digits_t dec_arg) + :Field_time_with_dec(ptr_arg, null_ptr_arg, + null_bit_arg, unireg_check_arg, field_name_arg, + dec_arg) + { + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + } + const Type_handler *type_handler() const override + { return &type_handler_time2; } + enum_field_types binlog_type() const override { return MYSQL_TYPE_TIME2; } + void sql_type(String &str) const override + { + sql_type_opt_dec_comment(str, Field_timef::type_handler()->name(), + dec, type_version_mysql56()); + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + uint32 pack_length() const override + { + return my_time_binary_length(dec); + } + uint row_pack_length() const override { return pack_length(); } + uint pack_length_from_metadata(uint field_metadata) const override + { + DBUG_ENTER("Field_timef::pack_length_from_metadata"); + uint tmp= my_time_binary_length(field_metadata); + DBUG_RETURN(tmp); + } + void sort_string(uchar *to, uint length) override + { + DBUG_ASSERT(length == Field_timef::pack_length()); + memcpy(to, ptr, length); + } + int cmp(const uchar *a_ptr, const uchar *b_ptr) const override + { + return memcmp(a_ptr, b_ptr, pack_length()); + } + int reset() override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + longlong val_time_packed(THD *thd) override; + int store_native(const Native &value) override; + bool val_native(Native *to) override; + uint size_of() const override { return sizeof *this; } + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_datetime :public Field_temporal_with_date { +protected: + int store_TIME_with_warning(const Datetime *ltime, const ErrConv *str, + int was_cut); +public: + Field_datetime(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_temporal_with_date(ptr_arg, length_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) + { + if (unireg_check == TIMESTAMP_UN_FIELD || + unireg_check == TIMESTAMP_DNUN_FIELD) + flags|= ON_UPDATE_NOW_FLAG; + } + const Type_handler *type_handler() const override + { return &type_handler_datetime; } + sql_mode_t conversion_depends_on_sql_mode(THD *, Item *) const override; + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override; + int store_decimal(const my_decimal *) override; + int set_time() override; + Item *get_equal_const_item(THD *thd, const Context &ctx, Item *const_item) + override + { + return get_equal_const_item_datetime(thd, ctx, const_item); + } +}; + + +/* + Stored as a 8 byte unsigned int. Should sometimes be change to a 6 byte +*/ + +class Field_datetime0 final :public Field_datetime +{ + void store_TIME(const MYSQL_TIME *ltime) override; + bool get_TIME(MYSQL_TIME *ltime, const uchar *pos, date_mode_t fuzzydate) + const override; +public: + Field_datetime0(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg) + :Field_datetime(ptr_arg, length_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg) + {} + enum ha_base_keytype key_type() const override + { return HA_KEYTYPE_ULONGLONG; } + void sql_type(String &str) const override + { + sql_type_comment(str, Field_datetime0::type_handler()->name(), + Type_handler::version_mariadb53()); + } + double val_real() override + { + return (double) Field_datetime0::val_int(); + } + longlong val_int() override; + String *val_str(String *, String *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return 8; } + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return Field_datetime0::get_TIME(ltime, ptr, fuzzydate); } + uchar *pack(uchar* to, const uchar *from, uint) override + { + return pack_int64(to, from); + } + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint) override + { + return unpack_int64(to, from, from_end); + } + uint size_of() const override { return sizeof *this; } +}; + + +/** + Abstract class for: + - DATETIME(1..6) + - DATETIME(0..6) - MySQL56 version +*/ +class Field_datetime_with_dec :public Field_datetime { +protected: + decimal_digits_t dec; +public: + Field_datetime_with_dec(uchar *ptr_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, decimal_digits_t dec_arg) + :Field_datetime(ptr_arg, MAX_DATETIME_WIDTH + dec_arg + MY_TEST(dec_arg), + null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg), dec(dec_arg) + { + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + } + decimal_digits_t decimals() const override final { return dec; } + enum ha_base_keytype key_type() const override final { return HA_KEYTYPE_BINARY; } + void make_send_field(Send_field *field) override final; + bool send(Protocol *protocol) override final; + uchar *pack(uchar *to, const uchar *from, uint max_length) override final + { return Field::pack(to, from, max_length); } + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) override final + { return Field::unpack(to, from, from_end, param_data); } + void sort_string(uchar *to, uint length) override final + { + DBUG_ASSERT(length == pack_length()); + memcpy(to, ptr, length); + } + double val_real() override final; + longlong val_int() override final; + String *val_str(String *, String *) override final; +}; + + +/** + DATETIME(1..6) +*/ +class Field_datetime_hires final :public Field_datetime_with_dec { + void store_TIME(const MYSQL_TIME *ltime) override; + bool get_TIME(MYSQL_TIME *ltime, const uchar *pos, date_mode_t fuzzydate) + const override; +public: + Field_datetime_hires(uchar *ptr_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, decimal_digits_t dec_arg) + :Field_datetime_with_dec(ptr_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, dec_arg) + { + DBUG_ASSERT(dec); + } + void sql_type(String &str) const override + { + sql_type_dec_comment(str, Field_datetime_hires::type_handler()->name(), + dec, Type_handler::version_mariadb53()); + } + int cmp(const uchar *,const uchar *) const override; + uint32 pack_length() const override + { return Type_handler_datetime::hires_bytes(dec); } + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return Field_datetime_hires::get_TIME(ltime, ptr, fuzzydate); } + uint size_of() const override { return sizeof *this; } +}; + + +/** + DATETIME(0..6) - MySQL56 version +*/ + +class Field_datetimef final :public Field_datetime_with_dec { + void store_TIME(const MYSQL_TIME *ltime) override; + bool get_TIME(MYSQL_TIME *ltime, const uchar *pos, date_mode_t fuzzydate) + const override; +public: + Field_datetimef(uchar *ptr_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, decimal_digits_t dec_arg) + :Field_datetime_with_dec(ptr_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, dec_arg) + {} + const Type_handler *type_handler() const override + { return &type_handler_datetime2; } + enum_field_types binlog_type() const override + { return MYSQL_TYPE_DATETIME2; } + void sql_type(String &str) const override + { + sql_type_opt_dec_comment(str, Field_datetimef::type_handler()->name(), + dec, type_version_mysql56()); + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + uint32 pack_length() const override + { + return my_datetime_binary_length(dec); + } + uint row_pack_length() const override { return pack_length(); } + uint pack_length_from_metadata(uint field_metadata) const override + { + DBUG_ENTER("Field_datetimef::pack_length_from_metadata"); + uint tmp= my_datetime_binary_length(field_metadata); + DBUG_RETURN(tmp); + } + int cmp(const uchar *a_ptr, const uchar *b_ptr) const override + { + return memcmp(a_ptr, b_ptr, pack_length()); + } + int reset() override; + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return Field_datetimef::get_TIME(ltime, ptr, fuzzydate); } + longlong val_datetime_packed(THD *thd) override; + uint size_of() const override { return sizeof *this; } + Binlog_type_info binlog_type_info() const override; +}; + + +static inline Field_timestamp * +new_Field_timestamp(MEM_ROOT *root,uchar *ptr, uchar *null_ptr, uchar null_bit, + enum Field::utype unireg_check, + const LEX_CSTRING *field_name, + TABLE_SHARE *share, decimal_digits_t dec) +{ + if (dec==0) + return new (root) + Field_timestamp0(ptr, MAX_DATETIME_WIDTH, null_ptr, + null_bit, unireg_check, field_name, share); + if (dec >= FLOATING_POINT_DECIMALS) + dec= MAX_DATETIME_PRECISION; + return new (root) + Field_timestamp_hires(ptr, null_ptr, null_bit, unireg_check, + field_name, share, dec); +} + +static inline Field_time * +new_Field_time(MEM_ROOT *root, uchar *ptr, uchar *null_ptr, uchar null_bit, + enum Field::utype unireg_check, const LEX_CSTRING *field_name, + decimal_digits_t dec) +{ + if (dec == 0) + return new (root) + Field_time0(ptr, MIN_TIME_WIDTH, null_ptr, null_bit, unireg_check, + field_name); + if (dec >= FLOATING_POINT_DECIMALS) + dec= MAX_DATETIME_PRECISION; + return new (root) + Field_time_hires(ptr, null_ptr, null_bit, unireg_check, field_name, dec); +} + +static inline Field_datetime * +new_Field_datetime(MEM_ROOT *root, uchar *ptr, uchar *null_ptr, uchar null_bit, + enum Field::utype unireg_check, + const LEX_CSTRING *field_name, decimal_digits_t dec) +{ + if (dec == 0) + return new (root) + Field_datetime0(ptr, MAX_DATETIME_WIDTH, null_ptr, null_bit, + unireg_check, field_name); + if (dec >= FLOATING_POINT_DECIMALS) + dec= MAX_DATETIME_PRECISION; + return new (root) + Field_datetime_hires(ptr, null_ptr, null_bit, + unireg_check, field_name, dec); +} + +class Field_string final :public Field_longstr { + class Warn_filter_string: public Warn_filter + { + public: + Warn_filter_string(const THD *thd, const Field_string *field); + }; + bool is_var_string() const + { + return can_alter_field_type && + orig_table && + (orig_table->s->db_create_options & HA_OPTION_PACK_RECORD) && + field_length >= 4 && + orig_table->s->frm_version < FRM_VER_TRUE_VARCHAR; + } +public: + bool can_alter_field_type; + Field_string(uchar *ptr_arg, uint32 len_arg,uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field_longstr(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, collation), + can_alter_field_type(1) {}; + Field_string(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field_longstr((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, + NONE, field_name_arg, collation), + can_alter_field_type(1) {}; + + const Type_handler *type_handler() const override; + enum ha_base_keytype key_type() const override + { return binary() ? HA_KEYTYPE_BINARY : HA_KEYTYPE_TEXT; } + en_fieldtype tmp_engine_column_type(bool use_packed_rows) const override; + bool zero_pack() const override { return false; } + Copy_func *get_copy_func(const Field *from) const override; + int reset() override + { + charset()->fill((char*) ptr, field_length, (has_charset() ? ' ' : 0)); + return 0; + } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + using Field_str::store; + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + my_decimal *val_decimal(my_decimal *) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + void update_data_type_statistics(Data_type_statistics *st) const override + { + st->m_fixed_string_count++; + st->m_fixed_string_total_length+= pack_length(); + } + void sql_type(String &str) const override; + void sql_rpl_type(String*) const override; + bool is_equal(const Column_definition &new_field) const override; + uchar *pack(uchar *to, const uchar *from, uint max_length) override; + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) override; + uint pack_length_from_metadata(uint field_metadata) const override + { + DBUG_PRINT("debug", ("field_metadata: 0x%04x", field_metadata)); + if (field_metadata == 0) + return row_pack_length(); + return (((field_metadata >> 4) & 0x300) ^ 0x300) + (field_metadata & 0x00ff); + } + bool compatible_field_size(uint field_metadata, const Relay_log_info *rli, + uint16 mflags, int *order_var) const override; + uint row_pack_length() const override { return field_length; } + int pack_cmp(const uchar *a,const uchar *b,uint key_length, + bool insert_or_update); + int pack_cmp(const uchar *b,uint key_length,bool insert_or_update); + uint packed_col_length(const uchar *to, uint length) override; + uint max_packed_col_length(uint max_length) override; + uint size_of() const override { return sizeof *this; } + bool has_charset() const override { return charset() != &my_charset_bin; } + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) + override; + uint get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, imagetype type) const override; + sql_mode_t value_depends_on_sql_mode() const override; + sql_mode_t can_handle_sql_mode_dependency_on_store() const override; + void print_key_value(String *out, uint32 length) override; + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_varstring :public Field_longstr { +public: + const uchar *get_data() const + { + return get_data(ptr); + } + const uchar *get_data(const uchar *ptr_arg) const + { + return ptr_arg + length_bytes; + } + uint get_length() const + { + return get_length(ptr); + } + uint get_length(const uchar *ptr_arg) const + { + return length_bytes == 1 ? (uint) *ptr_arg : uint2korr(ptr_arg); + } +protected: + void store_length(uint32 number) + { + if (length_bytes == 1) + *ptr= (uchar) number; + else + int2store(ptr, number); + } + virtual void val_str_from_ptr(String *val, const uchar *ptr) const; +public: + /* + The maximum space available in a Field_varstring, in bytes. See + length_bytes. + */ + static const uint MAX_SIZE; + /* Store number of bytes used to store length (1 or 2) */ + uint32 length_bytes; + Field_varstring(uchar *ptr_arg, + uint32 len_arg, uint length_bytes_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, const DTCollation &collation) + :Field_longstr(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, collation), + length_bytes(length_bytes_arg) + { + share->varchar_fields++; + } + Field_varstring(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, const DTCollation &collation) + :Field_longstr((uchar*) 0,len_arg, maybe_null_arg ? (uchar*) "": 0, 0, + NONE, field_name_arg, collation), + length_bytes(len_arg < 256 ? 1 :2) + { + share->varchar_fields++; + } + + const Type_handler *type_handler() const override; + en_fieldtype tmp_engine_column_type(bool use_packed_rows) const override + { + return FIELD_VARCHAR; + } + enum ha_base_keytype key_type() const override; + uint16 key_part_flag() const override { return HA_VAR_LENGTH_PART; } + uint16 key_part_length_bytes() const override { return HA_KEY_BLOB_LENGTH; } + uint row_pack_length() const override { return field_length; } + bool zero_pack() const override { return false; } + int reset() override { bzero(ptr,field_length+length_bytes); return 0; } + uint32 pack_length() const override + { return (uint32) field_length+length_bytes; } + uint32 key_length() const override { return (uint32) field_length; } + uint32 sort_length() const override + { + return (uint32) field_length + sort_suffix_length(); + } + uint32 sort_suffix_length() const override + { + return (field_charset() == &my_charset_bin ? length_bytes : 0); + } + Copy_func *get_copy_func(const Field *from) const override; + bool memcpy_field_possible(const Field *from) const override; + void update_data_type_statistics(Data_type_statistics *st) const override + { + st->m_variable_string_count++; + st->m_variable_string_total_length+= pack_length(); + } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + using Field_str::store; +#ifdef HAVE_MEM_CHECK + void mark_unused_memory_as_defined() override; +#endif + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + my_decimal *val_decimal(my_decimal *) override; + bool send(Protocol *protocol) override; + int cmp(const uchar *a,const uchar *b) const override; + int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len) const + override; + void sort_string(uchar *buff,uint length) override; + uint get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, imagetype type) const override; + void set_key_image(const uchar *buff,uint length) override; + void sql_type(String &str) const override; + void sql_rpl_type(String*) const override; + uchar *pack(uchar *to, const uchar *from, uint max_length) override; + const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, + uint param_data) override; + int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U) const + override; + int key_cmp(const uchar *,const uchar*) const override; + int key_cmp(const uchar *str, uint length) const override; + uint packed_col_length(const uchar *to, uint length) override; + uint max_packed_col_length(uint max_length) override; + uint32 data_length() override; + uint size_of() const override { return sizeof *this; } + bool has_charset() const override + { return charset() == &my_charset_bin ? FALSE : TRUE; } + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) + override; + Field *new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) override; + bool is_equal(const Column_definition &new_field) const override; + void hash_not_null(Hasher *hasher) override; + uint length_size() const override { return length_bytes; } + void print_key_value(String *out, uint32 length) override; + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_varstring_compressed final :public Field_varstring { +public: + Field_varstring_compressed(uchar *ptr_arg, + uint32 len_arg, uint length_bytes_arg, + uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, const DTCollation &collation, + Compression_method *compression_method_arg): + Field_varstring(ptr_arg, len_arg, length_bytes_arg, null_ptr_arg, + null_bit_arg, unireg_check_arg, field_name_arg, + share, collation), + compression_method_ptr(compression_method_arg) { DBUG_ASSERT(len_arg > 0); } + Compression_method *compression_method() const override + { return compression_method_ptr; } +private: + Compression_method *compression_method_ptr; + void val_str_from_ptr(String *val, const uchar *ptr) const override; + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + using Field_str::store; + String *val_str(String *, String *) override; + double val_real() override; + longlong val_int() override; + uint size_of() const override { return sizeof *this; } + /* + We use the default Field::send() implementation, + because the derived optimized version (from Field_longstr) + is not suitable for compressed fields. + */ + bool send(Protocol *protocol) override + { + return Field::send(protocol); + } + enum_field_types binlog_type() const override + { return MYSQL_TYPE_VARCHAR_COMPRESSED; } + void sql_type(String &str) const override + { + Field_varstring::sql_type(str); + str.append(STRING_WITH_LEN(" /*!100301 COMPRESSED*/")); + } + uint32 max_display_length() const override { return field_length - 1; } + uint32 character_octet_length() const override { return field_length - 1; } + uint32 char_length() const override + { + return (field_length - 1) / mbmaxlen(); + } + int cmp(const uchar *a_ptr, const uchar *b_ptr) const override; + + /* + Compressed fields can't have keys as two rows may have different + compression methods or compression levels. + */ + + int key_cmp(const uchar *str, uint length) const override + { DBUG_ASSERT(0); return 0; } + using Field_varstring::key_cmp; + Binlog_type_info binlog_type_info() const override; +}; + + +static inline uint8 number_storage_requirement(uint32 n) +{ + return n < 256 ? 1 : n < 65536 ? 2 : n < 16777216 ? 3 : 4; +} + + +static inline void store_bigendian(ulonglong num, uchar *to, uint bytes) +{ + switch(bytes) { + case 1: mi_int1store(to, num); break; + case 2: mi_int2store(to, num); break; + case 3: mi_int3store(to, num); break; + case 4: mi_int4store(to, num); break; + case 5: mi_int5store(to, num); break; + case 6: mi_int6store(to, num); break; + case 7: mi_int7store(to, num); break; + case 8: mi_int8store(to, num); break; + default: DBUG_ASSERT(0); + } +} + + +static inline longlong read_bigendian(const uchar *from, uint bytes) +{ + switch(bytes) { + case 1: return mi_uint1korr(from); + case 2: return mi_uint2korr(from); + case 3: return mi_uint3korr(from); + case 4: return mi_uint4korr(from); + case 5: return mi_uint5korr(from); + case 6: return mi_uint6korr(from); + case 7: return mi_uint7korr(from); + case 8: return mi_sint8korr(from); + default: DBUG_ASSERT(0); return 0; + } +} + +static inline void store_lowendian(ulonglong num, uchar *to, uint bytes) +{ + switch(bytes) { + case 1: *to= (uchar)num; break; + case 2: int2store(to, num); break; + case 3: int3store(to, num); break; + case 4: int4store(to, num); break; + case 8: int8store(to, num); break; + default: DBUG_ASSERT(0); + } +} + +static inline longlong read_lowendian(const uchar *from, uint bytes) +{ + switch(bytes) { + case 1: return from[0]; + case 2: return uint2korr(from); + case 3: return uint3korr(from); + case 4: return uint4korr(from); + case 8: return sint8korr(from); + default: DBUG_ASSERT(0); return 0; + } +} + + +extern LEX_CSTRING temp_lex_str; + +class Field_blob :public Field_longstr { +protected: + /** + The number of bytes used to represent the length of the blob. + */ + uint packlength; + + /** + The 'value'-object is a cache fronting the storage engine. + */ + String value; + /** + Cache for blob values when reading a row with a virtual blob + field. This is needed to not destroy the old cached value when + updating the blob with a new value when creating the new row. + */ + String read_value; + + static void do_copy_blob(Copy_field *copy); + static void do_conv_blob(Copy_field *copy); + uint get_key_image_itRAW(const uchar *ptr_arg, uchar *buff, uint length) const; +public: + Field_blob(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, uint blob_pack_length, + const DTCollation &collation); + Field_blob(uint32 len_arg,bool maybe_null_arg, const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field_longstr((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, + NONE, field_name_arg, collation), + packlength(4) + { + flags|= BLOB_FLAG; + } + Field_blob(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + const DTCollation &collation, bool set_packlength) + :Field_longstr((uchar*) 0,len_arg, maybe_null_arg ? (uchar*) "": 0, 0, + NONE, field_name_arg, collation) + { + flags|= BLOB_FLAG; + packlength= set_packlength ? number_storage_requirement(len_arg) : 4; + } + Field_blob(uint32 packlength_arg) + :Field_longstr((uchar*) 0, 0, (uchar*) "", 0, NONE, &temp_lex_str, + system_charset_info), + packlength(packlength_arg) {} + const Type_handler *type_handler() const override; + /* Note that the default copy constructor is used, in clone() */ + enum_field_types type() const override + { + /* + We cannot return type_handler()->field_type() here. + Some pieces of the code (e.g. in engines) rely on the fact + that Field::type(), Field::real_type() and Item_field::field_type() + return MYSQL_TYPE_BLOB for all blob variants. + We should eventually fix all such code pieces to expect + all BLOB type codes. + */ + return MYSQL_TYPE_BLOB; + } + enum_field_types real_type() const override + { + return MYSQL_TYPE_BLOB; + } + enum ha_base_keytype key_type() const override + { return binary() ? HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2; } + uint16 key_part_flag() const override { return HA_BLOB_PART; } + uint16 key_part_length_bytes() const override { return HA_KEY_BLOB_LENGTH; } + en_fieldtype tmp_engine_column_type(bool use_packed_rows) const override + { + return FIELD_BLOB; + } + Type_numeric_attributes type_numeric_attributes() const override + { + return Type_numeric_attributes(Field_blob::max_display_length(), + decimals(), is_unsigned()); + } + Information_schema_character_attributes + information_schema_character_attributes() const override + { + uint32 octets= Field_blob::character_octet_length(); + uint32 chars= octets / field_charset()->mbminlen; + return Information_schema_character_attributes(octets, chars); + } + void update_data_type_statistics(Data_type_statistics *st) const override + { + st->m_blob_count++; + } + void make_send_field(Send_field *) override; + Copy_func *get_copy_func(const Field *from) const override + { + /* + TODO: MDEV-9331 + if (from->type() == MYSQL_TYPE_BIT) + return do_field_int; + */ + if (!(from->flags & BLOB_FLAG) || from->charset() != charset() || + !from->compression_method() != !compression_method()) + return do_conv_blob; + if (from->pack_length() != Field_blob::pack_length()) + return do_copy_blob; + return get_identical_copy_func(); + } + int store_field(Field *from) override + { // Be sure the value is stored + if (field_charset() == &my_charset_bin && + from->type_handler()->convert_to_binary_using_val_native()) + { + NativeBuffer<64> tmp; + from->val_native(&tmp); + value.copy(tmp.ptr(), tmp.length(), &my_charset_bin); + return store(value.ptr(), value.length(), &my_charset_bin); + } + from->val_str(&value); + if (table->copy_blobs || + (!value.is_alloced() && from->is_varchar_and_in_write_set())) + value.copy(); + return store(value.ptr(), value.length(), from->charset()); + } + bool memcpy_field_possible(const Field *from) const override + { + return Field_str::memcpy_field_possible(from) && + !compression_method() == !from->compression_method() && + !table->copy_blobs; + } + bool make_empty_rec_store_default_value(THD *thd, Item *item) override; + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store_from_statistical_minmax_field(Field *stat_field, String *str, + MEM_ROOT *mem) override; + using Field_str::store; + void hash_not_null(Hasher *hasher) override; + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + my_decimal *val_decimal(my_decimal *) override; + int cmp(const uchar *a, const uchar *b) const override; + int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len) const + override; + int cmp(const uchar *a, uint32 a_length, const uchar *b, uint32 b_length) + const; + int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U) const + override; + int key_cmp(const uchar *,const uchar*) const override; + int key_cmp(const uchar *str, uint length) const override; + /* Never update the value of min_val for a blob field */ + bool update_min(Field *min_val, bool force_update) override { return false; } + /* Never update the value of max_val for a blob field */ + bool update_max(Field *max_val, bool force_update) override { return false; } + uint32 key_length() const override { return 0; } + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override + { return (uint32) (packlength + portable_sizeof_char_ptr); } + + /** + Return the packed length without the pointer size added. + + This is used to determine the size of the actual data in the row + buffer. + + @returns The length of the raw data itself without the pointer. + */ + uint32 pack_length_no_ptr() const + { return (uint32) (packlength); } + uint row_pack_length() const override { return pack_length_no_ptr(); } + uint32 sort_length() const override; + uint32 sort_suffix_length() const override; + uint32 value_length() override { return get_length(); } + uint32 max_data_length() const override + { + return (uint32) (((ulonglong) 1 << (packlength*8)) -1); + } + int reset() override { bzero(ptr, packlength+sizeof(uchar*)); return 0; } + void reset_fields() override + { + bzero((uchar*) &value, sizeof value); + bzero((uchar*) &read_value, sizeof read_value); + } + uint32 get_field_buffer_size() { return value.alloced_length(); } + void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number); + void store_length(size_t number) + { + DBUG_ASSERT(number < UINT_MAX32); + store_length(ptr, packlength, (uint32)number); + } + inline uint32 get_length(my_ptrdiff_t row_offset= 0) const + { return get_length(ptr+row_offset, this->packlength); } + uint32 get_length(const uchar *ptr, uint packlength) const; + uint32 get_length(const uchar *ptr_arg) const + { return get_length(ptr_arg, this->packlength); } + inline uchar *get_ptr() const { return get_ptr(ptr); } + inline uchar *get_ptr(const uchar *ptr_arg) const + { + uchar *s; + memcpy(&s, ptr_arg + packlength, sizeof(uchar*)); + return s; + } + inline void set_ptr(uchar *length, uchar *data) + { + memcpy(ptr,length,packlength); + memcpy(ptr+packlength, &data,sizeof(char*)); + } + void set_ptr_offset(my_ptrdiff_t ptr_diff, uint32 length, const uchar *data) + { + uchar *ptr_ofs= ADD_TO_PTR(ptr,ptr_diff,uchar*); + store_length(ptr_ofs, packlength, length); + memcpy(ptr_ofs+packlength, &data, sizeof(char*)); + } + inline void set_ptr(uint32 length, uchar *data) + { + set_ptr_offset(0, length, data); + } + int copy_value(Field_blob *from); + uint get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, imagetype type) const override + { + DBUG_ASSERT(type == itRAW); + return get_key_image_itRAW(ptr_arg, buff, length); + } + void set_key_image(const uchar *buff,uint length) override; + Field *new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) override; + void sql_type(String &str) const override; + /** + Copy blob buffer into internal storage "value" and update record pointer. + + @retval true Memory allocation error + @retval false Success + */ + bool copy() + { + uchar *tmp= get_ptr(); + if (value.copy((char*) tmp, get_length(), charset())) + { + Field_blob::reset(); + return 1; + } + tmp=(uchar*) value.ptr(); + memcpy(ptr+packlength, &tmp, sizeof(char*)); + return 0; + } + void swap(String &inout, bool set_read_value) + { + if (set_read_value) + read_value.swap(inout); + else + value.swap(inout); + } + /** + Return pointer to blob cache or NULL if not cached. + */ + String * cached(bool *set_read_value) + { + char *tmp= (char *) get_ptr(); + if (!value.is_empty() && tmp == value.ptr()) + { + *set_read_value= false; + return &value; + } + + if (!read_value.is_empty() && tmp == read_value.ptr()) + { + *set_read_value= true; + return &read_value; + } + + return NULL; + } + /* store value for the duration of the current read record */ + inline void swap_value_and_read_value() + { + read_value.swap(value); + } + inline void set_value(uchar *data) + { + /* Set value pointer. Lengths are not important */ + value.reset((char*) data, 1, 1, &my_charset_bin); + } + uchar *pack(uchar *to, const uchar *from, uint max_length) override; + const uchar *unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) override; + uint packed_col_length(const uchar *col_ptr, uint length) override; + uint max_packed_col_length(uint max_length) override; + void free() override + { + value.free(); + read_value.free(); + } + inline void clear_temporary() + { + uchar *tmp= get_ptr(); + if (likely(value.ptr() == (char*) tmp)) + bzero((uchar*) &value, sizeof(value)); + else + { + /* + Currently read_value should never point to tmp, the following code + is mainly here to make things future proof. + */ + if (unlikely(read_value.ptr() == (char*) tmp)) + bzero((uchar*) &read_value, sizeof(read_value)); + } + } + uint size_of() const override { return sizeof *this; } + bool has_charset() const override { return charset() != &my_charset_bin; } + uint32 max_display_length() const override; + uint32 char_length() const override; + uint32 character_octet_length() const override; + bool is_equal(const Column_definition &new_field) const override; + void print_key_value(String *out, uint32 length) override; + Binlog_type_info binlog_type_info() const override; + + friend void TABLE::remember_blob_values(String *blob_storage); + friend void TABLE::restore_blob_values(String *blob_storage); +}; + + +class Field_blob_compressed final :public Field_blob { +public: + Field_blob_compressed(uchar *ptr_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, TABLE_SHARE *share, + uint blob_pack_length, const DTCollation &collation, + Compression_method *compression_method_arg): + Field_blob(ptr_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, share, blob_pack_length, collation), + compression_method_ptr(compression_method_arg) {} + Compression_method *compression_method() const override + { return compression_method_ptr; } +private: + Compression_method *compression_method_ptr; + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + using Field_str::store; + String *val_str(String *, String *) override; + double val_real() override; + longlong val_int() override; + /* + We use the default Field::send() implementation, + because the derived optimized version (from Field_longstr) + is not suitable for compressed fields. + */ + bool send(Protocol *protocol) override + { + return Field::send(protocol); + } + uint size_of() const override { return sizeof *this; } + enum_field_types binlog_type() const override + { return MYSQL_TYPE_BLOB_COMPRESSED; } + void sql_type(String &str) const override + { + Field_blob::sql_type(str); + str.append(STRING_WITH_LEN(" /*!100301 COMPRESSED*/")); + } + + /* + Compressed fields can't have keys as two rows may have different + compression methods or compression levels. + */ + + uint get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, imagetype type_arg) const override + { DBUG_ASSERT(0); return 0; } + void set_key_image(const uchar *, uint) override + { DBUG_ASSERT(0); } + int key_cmp(const uchar *, const uchar *) const override + { DBUG_ASSERT(0); return 0; } + int key_cmp(const uchar *, uint) const override + { DBUG_ASSERT(0); return 0; } + Field *new_key_field(MEM_ROOT *, TABLE *, uchar *, uint32, uchar *, uint) + override + { DBUG_ASSERT(0); return 0; } + Binlog_type_info binlog_type_info() const override; +}; + + +class Field_enum :public Field_str { + static void do_field_enum(Copy_field *copy_field); + longlong val_int(const uchar *) const; + Data_type_compatibility can_optimize_range_or_keypart_ref( + const Item_bool_func *cond, + const Item *item) const; +protected: + uint packlength; +public: + const TYPELIB *typelib; + Field_enum(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + uint packlength_arg, + const TYPELIB *typelib_arg, + const DTCollation &collation) + :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, collation), + packlength(packlength_arg),typelib(typelib_arg) + { + flags|=ENUM_FLAG; + } + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) + override; + const Type_handler *type_handler() const override + { return &type_handler_enum; } + enum ha_base_keytype key_type() const override; + sql_mode_t can_handle_sql_mode_dependency_on_store() const override; + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + Copy_func *get_copy_func(const Field *from) const override + { + if (eq_def(from)) + return get_identical_copy_func(); + if (real_type() == MYSQL_TYPE_ENUM && + from->real_type() == MYSQL_TYPE_ENUM) + return do_field_enum; + if (from->result_type() == STRING_RESULT) + return do_field_string; + return do_field_int; + } + int store_field(Field *from) override + { + if (from->real_type() == MYSQL_TYPE_ENUM && from->val_int() == 0) + { + store_type(0); + return 0; + } + return from->save_in_field(this); + } + int save_in_field(Field *to) override + { + if (to->result_type() != STRING_RESULT) + return to->store(val_int(), 0); + return save_in_field_str(to); + } + bool memcpy_field_possible(const Field *from) const override + { return false; } + void make_empty_rec_reset(THD *) override + { + if (flags & NOT_NULL_FLAG) + { + set_notnull(); + store((longlong) 1, true); + } + else + reset(); + } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + double val_real() override; + longlong val_int() override; + String *val_str(String *, String *) override; + int cmp(const uchar *,const uchar *) const override; + void sort_string(uchar *buff,uint length) override; + uint32 pack_length() const override { return (uint32) packlength; } + void store_type(ulonglong value); + void sql_type(String &str) const override; + uint size_of() const override { return sizeof *this; } + uint pack_length_from_metadata(uint field_metadata) const override + { return (field_metadata & 0x00ff); } + uint row_pack_length() const override { return pack_length(); } + bool zero_pack() const override { return false; } + bool optimize_range(uint, uint) const override { return false; } + bool eq_def(const Field *field) const override; + bool has_charset() const override { return true; } + /* enum and set are sorted as integers */ + CHARSET_INFO *sort_charset() const override { return &my_charset_bin; } + decimal_digits_t decimals() const override { return 0; } + const TYPELIB *get_typelib() const override { return typelib; } + + uchar *pack(uchar *to, const uchar *from, uint max_length) override; + const uchar *unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) override; + + Data_type_compatibility can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) + const override + { + return can_optimize_range_or_keypart_ref(cond, item); + } + Data_type_compatibility can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) + const override + { + /* + Can't use GROUP_MIN_MAX optimization for ENUM and SET, + because the values are stored as numbers in index, + while MIN() and MAX() work as strings. + It would return the records with min and max enum numeric indexes. + "Bug#45300 MAX() and ENUM type" should be fixed first. + */ + return Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; + } + Data_type_compatibility can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const override + { + return can_optimize_range_or_keypart_ref(cond, item); + } + Binlog_type_info binlog_type_info() const override; +private: + bool is_equal(const Column_definition &new_field) const override; +}; + + +class Field_set final :public Field_enum { +public: + Field_set(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, uint32 packlength_arg, + const TYPELIB *typelib_arg, const DTCollation &collation) + :Field_enum(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, packlength_arg, typelib_arg, collation) + { + flags=(flags & ~ENUM_FLAG) | SET_FLAG; + } + void make_empty_rec_reset(THD *thd) override + { + Field::make_empty_rec_reset(thd); + } + + int store_field(Field *from) override { return from->save_in_field(this); } + int store(const char *to,size_t length,CHARSET_INFO *charset) override; + int store(double nr) override + { return Field_set::store((longlong) nr, FALSE); } + int store(longlong nr, bool unsigned_val) override; + + bool zero_pack() const override { return true; } + String *val_str(String *, String *) override; + void sql_type(String &str) const override; + uint size_of() const override { return sizeof *this; } + const Type_handler *type_handler() const override + { return &type_handler_set; } + bool has_charset() const override { return true; } + Binlog_type_info binlog_type_info() const override; +}; + + +/* + Note: + To use Field_bit::cmp_binary() you need to copy the bits stored in + the beginning of the record (the NULL bytes) to each memory you + want to compare (where the arguments point). + + This is the reason: + - Field_bit::cmp_binary() is only implemented in the base class + (Field::cmp_binary()). + - Field::cmp_binary() currently uses pack_length() to calculate how + long the data is. + - pack_length() includes size of the bits stored in the NULL bytes + of the record. +*/ +class Field_bit :public Field { +public: + uchar *bit_ptr; // position in record where 'uneven' bits store + uchar bit_ofs; // offset to 'uneven' high bits + uint bit_len; // number of 'uneven' high bits + uint bytes_in_rec; + Field_bit(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg); + const Type_handler *type_handler() const override + { return &type_handler_bit; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_BIT; } + uint16 key_part_flag() const override { return HA_BIT_PART; } + uint32 key_length() const override + { return (uint32) (field_length + 7) / 8; } + uint32 max_data_length() const override { return key_length(); } + uint32 max_display_length() const override { return field_length; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + CHARSET_INFO *charset() const override { return &my_charset_bin; } + const DTCollation & dtcollation() const override; + Information_schema_numeric_attributes + information_schema_numeric_attributes() const override + { + return Information_schema_numeric_attributes(field_length); + } + void update_data_type_statistics(Data_type_statistics *st) const override + { + st->m_uneven_bit_length+= field_length & 7; + } + uint size_of() const override { return sizeof *this; } + int reset() override + { + bzero(ptr, bytes_in_rec); + if (bit_ptr && (bit_len > 0)) // reset odd bits among null bits + clr_rec_bits(bit_ptr, bit_ofs, bit_len); + return 0; + } + Copy_func *get_copy_func(const Field *from) const override + { + if (from->cmp_type() == DECIMAL_RESULT) + return do_field_decimal; + return do_field_int; + } + int save_in_field(Field *to) override { return to->store(val_int(), true); } + bool memcpy_field_possible(const Field *from) const override{ return false; } + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_decimal(const my_decimal *) override; + double val_real() override; + longlong val_int() override; + String *val_str(String*, String *) override; + bool str_needs_quotes() const override { return true; } + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override { return val_int() != 0; } + int cmp(const uchar *a, const uchar *b) const override + { + DBUG_ASSERT(ptr == a || ptr == b); + if (ptr == a) + return Field_bit::key_cmp(b, bytes_in_rec + MY_TEST(bit_len)); + else + return Field_bit::key_cmp(a, bytes_in_rec + MY_TEST(bit_len)) * -1; + } + int cmp_binary_offset(uint row_offset) override + { return cmp_offset(row_offset); } + int cmp_prefix(const uchar *a, const uchar *b, + size_t prefix_char_length) const override; + int key_cmp(const uchar *a, const uchar *b) const override + { return cmp_binary((uchar *) a, (uchar *) b); } + int key_cmp(const uchar *str, uint length) const override; + int cmp_offset(my_ptrdiff_t row_offset) override; + bool update_min(Field *min_val, bool force_update) override + { + longlong val= val_int(); + bool update_fl= force_update || val < min_val->val_int(); + if (update_fl) + { + min_val->set_notnull(); + min_val->store(val, FALSE); + } + return update_fl; + } + bool update_max(Field *max_val, bool force_update) override + { + longlong val= val_int(); + bool update_fl= force_update || val > max_val->val_int(); + if (update_fl) + { + max_val->set_notnull(); + max_val->store(val, FALSE); + } + return update_fl; + } + void store_field_value(uchar *val, uint) override + { + store(*((longlong *)val), TRUE); + } + double pos_in_interval(Field *min, Field *max) override + { + return pos_in_interval_val_real(min, max); + } + void get_image(uchar *buff, uint length, + const uchar *ptr_arg, CHARSET_INFO *cs) const override + { get_key_image(buff, length, ptr_arg, itRAW); } + void set_image(const uchar *buff,uint length, CHARSET_INFO *cs) override + { Field_bit::store((char *) buff, length, cs); } + uint get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, imagetype type) const override; + void set_key_image(const uchar *buff, uint length) override + { Field_bit::store((char*) buff, length, &my_charset_bin); } + void sort_string(uchar *buff, uint length) override + { get_key_image(buff, length, ptr, itRAW); } + uint32 pack_length() const override + { return (uint32) (field_length + 7) / 8; } + uint32 pack_length_in_rec() const override { return bytes_in_rec; } + uint pack_length_from_metadata(uint field_metadata) const override; + uint row_pack_length() const override + { return (bytes_in_rec + ((bit_len > 0) ? 1 : 0)); } + bool compatible_field_size(uint metadata, const Relay_log_info *rli, + uint16 mflags, int *order_var) const override; + void sql_type(String &str) const override; + uchar *pack(uchar *to, const uchar *from, uint max_length) override; + const uchar *unpack(uchar *to, const uchar *from, + const uchar *from_end, uint param_data) override; + int set_default() override; + + Field *new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) override; + void set_bit_ptr(uchar *bit_ptr_arg, uchar bit_ofs_arg) + { + bit_ptr= bit_ptr_arg; + bit_ofs= bit_ofs_arg; + } + bool eq(Field *field) override + { + return (Field::eq(field) && + bit_ptr == ((Field_bit *)field)->bit_ptr && + bit_ofs == ((Field_bit *)field)->bit_ofs); + } + bool is_equal(const Column_definition &new_field) const override; + void move_field_offset(my_ptrdiff_t ptr_diff) override + { + Field::move_field_offset(ptr_diff); + + /* + clang does not like when things are added to a null pointer, even if + it is never referenced. + */ + if (bit_ptr) + bit_ptr= ADD_TO_PTR(bit_ptr, ptr_diff, uchar*); + } + void hash_not_null(Hasher *hasher) override; + + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override + { + return get_mm_leaf_int(param, key_part, cond, op, value, true); + } + void print_key_value(String *out, uint32 length) override + { + val_int_as_str(out, 1); + } + /** + Save the field metadata for bit fields. + Saves the bit length in the first byte and bytes in record in the + second byte of the field metadata array at index of *metadata_ptr and + *(metadata_ptr + 1). + + @param metadata_ptr First byte of field metadata + + @returns number of bytes written to metadata_ptr + */ + Binlog_type_info binlog_type_info() const override + { + DBUG_PRINT("debug", ("bit_len: %d, bytes_in_rec: %d", + bit_len, bytes_in_rec)); + /* + Since this class and Field_bit_as_char have different ideas of + what should be stored here, we compute the values of the metadata + explicitly using the field_length. + */ + return Binlog_type_info(type(), + static_cast((field_length & 7) | + ((field_length / 8) << 8)), 2); + } + +private: + size_t do_last_null_byte() const override; +}; + + +/** + BIT field represented as chars for non-MyISAM tables. + + @todo The inheritance relationship is backwards since Field_bit is + an extended version of Field_bit_as_char and not the other way + around. Hence, we should refactor it to fix the hierarchy order. + */ +class Field_bit_as_char final :public Field_bit { +public: + Field_bit_as_char(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg); + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_BINARY; } + uint size_of() const override { return sizeof *this; } + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store(double nr) override { return Field_bit::store(nr); } + int store(longlong nr, bool unsigned_val) override + { return Field_bit::store(nr, unsigned_val); } + void sql_type(String &str) const override; +}; + + +class Field_row final :public Field_null +{ + class Virtual_tmp_table *m_table; +public: + Field_row(uchar *ptr_arg, const LEX_CSTRING *field_name_arg) + :Field_null(ptr_arg, 0, Field::NONE, field_name_arg, &my_charset_bin), + m_table(NULL) + {} + ~Field_row(); + en_fieldtype tmp_engine_column_type(bool use_packed_rows) const + { + DBUG_ASSERT(0); + return Field::tmp_engine_column_type(use_packed_rows); + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const + { + DBUG_ASSERT(0); + return CONV_TYPE_IMPOSSIBLE; + } + Virtual_tmp_table **virtual_tmp_table_addr() { return &m_table; } + bool sp_prepare_and_store_item(THD *thd, Item **value); +}; + + +extern const LEX_CSTRING null_clex_str; + +class Column_definition_attributes +{ +public: + /* + At various stages in execution this can be length of field in bytes or + max number of characters. + */ + ulonglong length; + const TYPELIB *interval; + CHARSET_INFO *charset; + uint32 srid; + uint32 pack_flag; + decimal_digits_t decimals; + Field::utype unireg_check; + Column_definition_attributes() + :length(0), + interval(NULL), + charset(&my_charset_bin), + srid(0), + pack_flag(0), + decimals(0), + unireg_check(Field::NONE) + { } + Column_definition_attributes(const Field *field); + Column_definition_attributes(const Type_all_attributes &attr); + Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, + const Record_addr *rec, + const Type_handler *handler, + const LEX_CSTRING *field_name, + uint32 flags) const; + uint temporal_dec(uint intlen) const + { + return (uint) (length > intlen ? length - intlen - 1 : 0); + } + uint pack_flag_to_pack_length() const; + void frm_pack_basic(uchar *buff) const; + void frm_pack_charset(uchar *buff) const; + void frm_pack_numeric_with_dec(uchar *buff) const; + void frm_unpack_basic(const uchar *buff); + bool frm_unpack_charset(TABLE_SHARE *share, const uchar *buff); + bool frm_unpack_numeric_with_dec(TABLE_SHARE *share, const uchar *buff); + bool frm_unpack_temporal_with_dec(TABLE_SHARE *share, uint intlen, + const uchar *buff); + void set_length_and_dec(const Lex_length_and_dec_st &attr); + CHARSET_INFO *explicit_or_derived_charset(const Column_derived_attributes + *derived_attr) const + { + return charset ? charset : derived_attr->charset(); + } +}; + + +/* + Create field class for CREATE TABLE +*/ +class Column_definition: public Sql_alloc, + public Type_handler_hybrid_field_type, + public Column_definition_attributes +{ + /** + Create "interval" from "interval_list". + @param mem_root - memory root to create the TYPELIB + instance and its values on + @param reuse_interval_list_values - determines if TYPELIB can reuse strings + from interval_list, or should always + allocate a copy on mem_root, even if + character set conversion is not needed + @retval false on success + @retval true on error (bad values, or EOM) + */ + bool create_interval_from_interval_list(MEM_ROOT *mem_root, + bool reuse_interval_list_values); + + /* + Calculate TYPELIB (set or enum) max and total lengths + + @param cs charset+collation pair of the interval + @param max_length length of the longest item + @param tot_length sum of the item lengths + + After this method call: + - ENUM uses max_length + - SET uses tot_length. + */ + void calculate_interval_lengths(uint32 *max_length, uint32 *tot_length) + { + const char **pos; + uint *len; + *max_length= *tot_length= 0; + for (pos= interval->type_names, len= interval->type_lengths; + *pos ; pos++, len++) + { + size_t length= charset->numchars(*pos, *pos + *len); + DBUG_ASSERT(length < UINT_MAX32); + *tot_length+= (uint) length; + set_if_bigger(*max_length, (uint32)length); + } + } + bool prepare_stage1_check_typelib_default(); + bool prepare_stage1_convert_default(THD *, MEM_ROOT *, CHARSET_INFO *to); + const Type_handler *field_type() const; // Prevent using this + Compression_method *compression_method_ptr; +public: + Lex_ident field_name; + LEX_CSTRING comment; // Comment for field + enum enum_column_versioning + { + VERSIONING_NOT_SET, + WITH_VERSIONING, + WITHOUT_VERSIONING + }; + Item *on_update; // ON UPDATE NOW() + field_visibility_t invisible; + /* + The value of `length' as set by parser: is the number of characters + for most of the types, or of bytes for BLOBs or numeric types. + */ + uint32 char_length; + uint flags, pack_length; + List interval_list; + engine_option_value *option_list; + bool explicitly_nullable; + + /* + This is additinal data provided for any computed(virtual) field. + In particular it includes a pointer to the item by which this field + can be computed from other fields. + */ + Virtual_column_info + *vcol_info, // Virtual field + *default_value, // Default value + *check_constraint; // Check constraint + + enum_column_versioning versioning; + + Table_period_info *period; + + Column_definition() + :Type_handler_hybrid_field_type(&type_handler_null), + compression_method_ptr(0), + comment(null_clex_str), + on_update(NULL), invisible(VISIBLE), char_length(0), + flags(0), pack_length(0), + option_list(NULL), explicitly_nullable(false), + vcol_info(0), default_value(0), check_constraint(0), + versioning(VERSIONING_NOT_SET), period(NULL) + { + interval_list.empty(); + } + + Column_definition(THD *thd, Field *field, Field *orig_field); + bool set_attributes(THD *thd, + const Lex_field_type_st &attr, + column_definition_type_t type); + void create_length_to_internal_length_null() + { + DBUG_ASSERT(length == 0); + pack_length= 0; + } + void create_length_to_internal_length_simple() + { + pack_length= type_handler()->calc_pack_length((uint32) length); + } + void create_length_to_internal_length_string() + { + length*= charset->mbmaxlen; + if (real_field_type() == MYSQL_TYPE_VARCHAR && compression_method()) + length++; + set_if_smaller(length, UINT_MAX32); + pack_length= type_handler()->calc_pack_length((uint32) length); + } + void create_length_to_internal_length_typelib() + { + /* Pack_length already calculated in sql_parse.cc */ + length*= charset->mbmaxlen; + } + bool vers_sys_field() const + { + return flags & (VERS_ROW_START | VERS_ROW_END); + } + void create_length_to_internal_length_bit(); + void create_length_to_internal_length_newdecimal(); + + /* + Prepare the "charset" member for string data types, + such as CHAR, VARCHAR, TEXT, ENUM, SET: + - derive the charset if not specified explicitly + - find a _bin collation if the BINARY comparison style was specified, e.g.: + CREATE TABLE t1 (a VARCHAR(10) BINARY) CHARSET utf8; + */ + bool prepare_charset_for_string(const Column_derived_attributes *dattr); + + /** + Prepare a SET/ENUM field. + Create "interval" from "interval_list" if needed, and adjust "length". + @param mem_root - Memory root to allocate TYPELIB and + its values on + @param reuse_interval_list_values - determines if TYPELIB can reuse value + buffers from interval_list, or should + always allocate a copy on mem_root, + even if character set conversion + is not needed + */ + bool prepare_interval_field(MEM_ROOT *mem_root, + bool reuse_interval_list_values); + + void prepare_interval_field_calc_length() + { + uint32 field_length, dummy; + if (real_field_type() == MYSQL_TYPE_SET) + { + calculate_interval_lengths(&dummy, &field_length); + length= field_length + (interval->count - 1); + } + else /* MYSQL_TYPE_ENUM */ + { + calculate_interval_lengths(&field_length, &dummy); + length= field_length; + } + set_if_smaller(length, MAX_FIELD_WIDTH - 1); + } + + bool prepare_blob_field(THD *thd); + + bool sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root); + + bool prepare_stage1(THD *thd, MEM_ROOT *mem_root, + column_definition_type_t type, + const Column_derived_attributes *derived_attr); + void prepare_stage1_simple(CHARSET_INFO *cs) + { + charset= cs; + create_length_to_internal_length_simple(); + } + bool prepare_stage1_typelib(THD *thd, MEM_ROOT *mem_root, + column_definition_type_t deftype); + bool prepare_stage1_string(THD *thd, MEM_ROOT *mem_root); + bool prepare_stage1_bit(THD *thd, MEM_ROOT *mem_root); + + bool bulk_alter(const Column_derived_attributes *derived_attr, + const Column_bulk_alter_attributes *bulk_attr) + { + return type_handler()->Column_definition_bulk_alter(this, + derived_attr, + bulk_attr); + } + void redefine_stage1_common(const Column_definition *dup_field, + const handler *file); + bool redefine_stage1(const Column_definition *dup_field, const handler *file) + { + const Type_handler *handler= dup_field->type_handler(); + return handler->Column_definition_redefine_stage1(this, dup_field, file); + } + bool prepare_stage2(handler *handler, ulonglong table_flags); + bool prepare_stage2_blob(handler *handler, + ulonglong table_flags, uint field_flags); + bool prepare_stage2_varchar(ulonglong table_flags); + bool prepare_stage2_typelib(const char *type_name, uint field_flags, + uint *dup_val_count); + uint pack_flag_numeric() const; + uint sign_length() const { return flags & UNSIGNED_FLAG ? 0 : 1; } + bool check_length(uint mysql_errno, uint max_allowed_length) const; + bool fix_attributes_real(uint default_length); + bool fix_attributes_int(uint default_length); + bool fix_attributes_decimal(); + bool fix_attributes_temporal_with_time(uint int_part_length); + bool fix_attributes_bit(); + + bool check(THD *thd); + bool validate_check_constraint(THD *thd); + + bool stored_in_db() const { return !vcol_info || vcol_info->stored_in_db; } + + ha_storage_media field_storage_type() const + { + return (ha_storage_media) + ((flags >> FIELD_FLAGS_STORAGE_MEDIA) & 3); + } + + column_format_type column_format() const + { + return (column_format_type) + ((flags >> FIELD_FLAGS_COLUMN_FORMAT) & 3); + } + + bool has_default_function() const + { + return unireg_check != Field::NONE; + } + + Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, + const Record_addr *addr, + const LEX_CSTRING *field_name_arg) const + { + return Column_definition_attributes::make_field(share, mem_root, addr, + type_handler(), + field_name_arg, flags); + } + Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *field_name_arg) const + { + Record_addr addr(true); + return make_field(share, mem_root, &addr, field_name_arg); + } + /* Return true if default is an expression that must be saved explicitly */ + bool has_default_expression(); + + bool has_default_now_unireg_check() const + { + return unireg_check == Field::TIMESTAMP_DN_FIELD + || unireg_check == Field::TIMESTAMP_DNUN_FIELD; + } + + void set_type(const Column_definition &other) + { + set_handler(other.type_handler()); + length= other.length; + char_length= other.char_length; + decimals= other.decimals; + flags= other.flags; + pack_length= other.pack_length; + unireg_check= other.unireg_check; + interval= other.interval; + charset= other.charset; + srid= other.srid; + pack_flag= other.pack_flag; + } + + // Replace the entire value by another definition + void set_column_definition(const Column_definition *def) + { + *this= *def; + } + bool set_compressed(const char *method); + bool set_compressed_deprecated(THD *thd, const char *method); + bool set_compressed_deprecated_column_attribute(THD *thd, + const char *pos, + const char *method); + void set_compression_method(Compression_method *compression_method_arg) + { compression_method_ptr= compression_method_arg; } + Compression_method *compression_method() const + { return compression_method_ptr; } + + bool check_vcol_for_key(THD *thd) const; + + void set_charset_collation_attrs(const + Lex_column_charset_collation_attrs_st &lc) + { + charset= lc.charset_info(); + if (lc.is_contextually_typed_collation()) + flags|= CONTEXT_COLLATION_FLAG; + else + flags&= ~CONTEXT_COLLATION_FLAG; + } + Lex_column_charset_collation_attrs charset_collation_attrs() const + { + if (!charset) + return Lex_column_charset_collation_attrs(); + if (flags & CONTEXT_COLLATION_FLAG) + return Lex_column_charset_collation_attrs(Lex_context_collation(charset)); + return Lex_column_charset_collation_attrs(Lex_exact_collation(charset)); + } +}; + + +/** + List of ROW element definitions, e.g.: + DECLARE a ROW(a INT,b VARCHAR(10)) +*/ +class Row_definition_list: public List +{ +public: + inline bool eq_name(const Spvar_definition *def, const LEX_CSTRING *name) const; + /** + Find a ROW field by name. + @param [IN] name - the name + @param [OUT] offset - if the ROW field found, its offset it returned here + @retval NULL - the ROW field was not found + @retval !NULL - the pointer to the found ROW field + */ + Spvar_definition *find_row_field_by_name(const LEX_CSTRING *name, uint *offset) const + { + // Cast-off the "const" qualifier + List_iterator it(*((List*)this)); + Spvar_definition *def; + for (*offset= 0; (def= it++); (*offset)++) + { + if (eq_name(def, name)) + return def; + } + return 0; + } + static Row_definition_list *make(MEM_ROOT *mem_root, Spvar_definition *var) + { + Row_definition_list *list; + if (!(list= new (mem_root) Row_definition_list())) + return NULL; + return list->push_back(var, mem_root) ? NULL : list; + } + bool append_uniq(MEM_ROOT *thd, Spvar_definition *var); + bool adjust_formal_params_to_actual_params(THD *thd, List *args); + bool adjust_formal_params_to_actual_params(THD *thd, + Item **args, uint arg_count); + bool resolve_type_refs(THD *); +}; + +/** + This class is used during a stored routine or a trigger execution, + at sp_rcontext::create() time. + Currently it can represent: + - variables with explicit data types: DECLARE a INT; + - variables with data type references: DECLARE a t1.a%TYPE; + - ROW type variables + + Notes: + - Scalar variables have m_field_definitions==NULL. + - ROW variables are defined as having MYSQL_TYPE_NULL, + with a non-empty m_field_definitions. + + Data type references to other object types will be added soon, e.g.: + - DECLARE a table_name%ROWTYPE; + - DECLARE a cursor_name%ROWTYPE; + - DECLARE a record_name%TYPE; + - DECLARE a variable_name%TYPE; +*/ +class Spvar_definition: public Column_definition +{ + Qualified_column_ident *m_column_type_ref; // for %TYPE + Table_ident *m_table_rowtype_ref; // for table%ROWTYPE + bool m_cursor_rowtype_ref; // for cursor%ROWTYPE + uint m_cursor_rowtype_offset; // for cursor%ROWTYPE + Row_definition_list *m_row_field_definitions; // for ROW +public: + Spvar_definition() + :m_column_type_ref(NULL), + m_table_rowtype_ref(NULL), + m_cursor_rowtype_ref(false), + m_cursor_rowtype_offset(0), + m_row_field_definitions(NULL) + { } + Spvar_definition(THD *thd, Field *field) + :Column_definition(thd, field, NULL), + m_column_type_ref(NULL), + m_table_rowtype_ref(NULL), + m_cursor_rowtype_ref(false), + m_cursor_rowtype_offset(0), + m_row_field_definitions(NULL) + { } + const Type_handler *type_handler() const + { + return Type_handler_hybrid_field_type::type_handler(); + } + bool is_column_type_ref() const { return m_column_type_ref != 0; } + bool is_table_rowtype_ref() const { return m_table_rowtype_ref != 0; } + bool is_cursor_rowtype_ref() const { return m_cursor_rowtype_ref; } + bool is_explicit_data_type() const + { + return !is_column_type_ref() && + !is_table_rowtype_ref() && + !is_cursor_rowtype_ref(); + } + Qualified_column_ident *column_type_ref() const + { + return m_column_type_ref; + } + void set_column_type_ref(Qualified_column_ident *ref) + { + m_column_type_ref= ref; + } + + Table_ident *table_rowtype_ref() const + { + return m_table_rowtype_ref; + } + void set_table_rowtype_ref(Table_ident *ref) + { + DBUG_ASSERT(ref); + set_handler(&type_handler_row); + m_table_rowtype_ref= ref; + } + + uint cursor_rowtype_offset() const + { + return m_cursor_rowtype_offset; + } + void set_cursor_rowtype_ref(uint offset) + { + set_handler(&type_handler_row); + m_cursor_rowtype_ref= true; + m_cursor_rowtype_offset= offset; + } + + /* + Find a ROW field by name. + See Row_field_list::find_row_field_by_name() for details. + */ + Spvar_definition *find_row_field_by_name(const LEX_CSTRING *name, uint *offset) const + { + DBUG_ASSERT(m_row_field_definitions); + return m_row_field_definitions->find_row_field_by_name(name, offset); + } + uint is_row() const + { + return m_row_field_definitions != NULL; + } + // Check if "this" defines a ROW variable with n elements + uint is_row(uint n) const + { + return m_row_field_definitions != NULL && + m_row_field_definitions->elements == n; + } + Row_definition_list *row_field_definitions() const + { + return m_row_field_definitions; + } + void set_row_field_definitions(Row_definition_list *list) + { + DBUG_ASSERT(list); + set_handler(&type_handler_row); + m_row_field_definitions= list; + } + +}; + + +inline bool Row_definition_list::eq_name(const Spvar_definition *def, + const LEX_CSTRING *name) const +{ + return def->field_name.length == name->length && my_strcasecmp(system_charset_info, def->field_name.str, name->str) == 0; +} + + +class Create_field :public Column_definition +{ +public: + LEX_CSTRING change; // Old column name if column is renamed by ALTER + LEX_CSTRING after; // Put column after this one + Field *field; // For alter table + const TYPELIB *save_interval; // Temporary copy for the above + // Used only for UCS2 intervals + + /** structure with parsed options (for comparing fields in ALTER TABLE) */ + ha_field_option_struct *option_struct; + uint offset; + uint8 interval_id; + bool create_if_not_exists; // Used in ALTER TABLE IF NOT EXISTS + + Create_field(): + Column_definition(), + field(0), option_struct(NULL), + create_if_not_exists(false) + { + change= after= null_clex_str; + } + Create_field(THD *thd, Field *old_field, Field *orig_field): + Column_definition(thd, old_field, orig_field), + change(old_field->field_name), + field(old_field), option_struct(old_field->option_struct), + create_if_not_exists(false) + { + after= null_clex_str; + } + /* Used to make a clone of this object for ALTER/CREATE TABLE */ + Create_field *clone(MEM_ROOT *mem_root) const; + static void upgrade_data_types(List &list) + { + List_iterator it(list); + while (Create_field *f= it++) + f->type_handler()->Column_definition_implicit_upgrade(f); + } +}; + + +/* + A class for sending info to the client +*/ + +class Send_field :public Sql_alloc, + public Type_handler_hybrid_field_type, + public Send_field_extended_metadata +{ +public: + LEX_CSTRING db_name; + LEX_CSTRING table_name, org_table_name; + LEX_CSTRING col_name, org_col_name; + ulong length; + uint flags; + decimal_digits_t decimals; + Send_field(Field *field) + { + field->make_send_field(this); + DBUG_ASSERT(table_name.str != 0); + normalize(); + } + Send_field(THD *thd, Item *item); + Send_field(Field *field, + const LEX_CSTRING &db_name_arg, + const LEX_CSTRING &table_name_arg) + :Type_handler_hybrid_field_type(field->type_handler()), + db_name(db_name_arg), + table_name(table_name_arg), + org_table_name(table_name_arg), + col_name(field->field_name), + org_col_name(field->field_name), + length(field->field_length), + flags(field->table->maybe_null ? + (field->flags & ~NOT_NULL_FLAG) : field->flags), + decimals(field->decimals()) + { + normalize(); + } + +private: + void normalize() + { + /* limit number of decimals for float and double */ + if (type_handler()->field_type() == MYSQL_TYPE_FLOAT || + type_handler()->field_type() == MYSQL_TYPE_DOUBLE) + set_if_smaller(decimals, FLOATING_POINT_DECIMALS); + } +public: + // This should move to Type_handler eventually + uint32 max_char_length(CHARSET_INFO *cs) const + { + return type_handler()->field_type() >= MYSQL_TYPE_TINY_BLOB && + type_handler()->field_type() <= MYSQL_TYPE_BLOB + ? static_cast(length / cs->mbminlen) + : static_cast(length / cs->mbmaxlen); + } + uint32 max_octet_length(CHARSET_INFO *from, CHARSET_INFO *to) const + { + /* + For TEXT/BLOB columns, field_length describes the maximum data + length in bytes. There is no limit to the number of characters + that a TEXT column can store, as long as the data fits into + the designated space. + For the rest of textual columns, field_length is evaluated as + char_count * mbmaxlen, where character count is taken from the + definition of the column. In other words, the maximum number + of characters here is limited by the column definition. + + When one has a LONG TEXT column with a single-byte + character set, and the connection character set is multi-byte, the + client may get fields longer than UINT_MAX32, due to + -> conversion. + In that case column max length would not fit into the 4 bytes + reserved for it in the protocol. So we cut it here to UINT_MAX32. + */ + return char_to_byte_length_safe(max_char_length(from), to->mbmaxlen); + } + + // This should move to Type_handler eventually + bool is_sane_float() const + { + return (decimals <= FLOATING_POINT_DECIMALS || + (type_handler()->field_type() != MYSQL_TYPE_FLOAT && + type_handler()->field_type() != MYSQL_TYPE_DOUBLE)); + } + bool is_sane_signess() const + { + if (type_handler() == type_handler()->type_handler_signed() && + type_handler() == type_handler()->type_handler_unsigned()) + return true; // Any signess is allowed, e.g. DOUBLE, DECIMAL + /* + We are here e.g. in case of INT data type. + The UNSIGNED_FLAG bit must match in flags and in the type handler. + */ + return ((bool) (flags & UNSIGNED_FLAG)) == type_handler()->is_unsigned(); + } + bool is_sane() const + { + return is_sane_float() && is_sane_signess(); + } +}; + + +/* + A class for quick copying data to fields +*/ + +class Copy_field :public Sql_alloc { +public: + uchar *from_ptr,*to_ptr; + uchar *from_null_ptr,*to_null_ptr; + bool *null_row; + uint from_bit,to_bit; + /** + Number of bytes in the fields pointed to by 'from_ptr' and + 'to_ptr'. Usually this is the number of bytes that are copied from + 'from_ptr' to 'to_ptr'. + + For variable-length fields (VARCHAR), the first byte(s) describe + the actual length of the text. For VARCHARs with length + < 256 there is 1 length byte + >= 256 there is 2 length bytes + Thus, if from_field is VARCHAR(10), from_length (and in most cases + to_length) is 11. For VARCHAR(1024), the length is 1026. @see + Field_varstring::length_bytes + + Note that for VARCHARs, do_copy() will be do_varstring*() which + only copies the length-bytes (1 or 2) + the actual length of the + text instead of from/to_length bytes. + */ + uint from_length,to_length; + Field *from_field,*to_field; + String tmp; // For items + + Copy_field() = default; + ~Copy_field() = default; + void set(Field *to,Field *from,bool save); // Field to field + void set(uchar *to,Field *from); // Field to string + void (*do_copy)(Copy_field *); + void (*do_copy2)(Copy_field *); // Used to handle null values +}; + + +uint pack_length_to_packflag(uint type); +enum_field_types get_blob_type_from_length(ulong length); +int set_field_to_null(Field *field); +int set_field_to_null_with_conversions(Field *field, bool no_conversions); +int convert_null_to_field_value_or_error(Field *field); +bool check_expression(Virtual_column_info *vcol, const LEX_CSTRING *name, + enum_vcol_info_type type, Alter_info *alter_info= NULL); + +/* + The following are for the interface with the .frm file +*/ + +#define FIELDFLAG_DECIMAL 1U +#define FIELDFLAG_BINARY 1U // Shares same flag +#define FIELDFLAG_NUMBER 2U +#define FIELDFLAG_ZEROFILL 4U +#define FIELDFLAG_PACK 120U // Bits used for packing +#define FIELDFLAG_INTERVAL 256U // mangled with decimals! +#define FIELDFLAG_BITFIELD 512U // mangled with decimals! +#define FIELDFLAG_BLOB 1024U // mangled with decimals! +#define FIELDFLAG_GEOM 2048U // mangled with decimals! + +#define FIELDFLAG_TREAT_BIT_AS_CHAR 4096U /* use Field_bit_as_char */ +#define FIELDFLAG_LONG_DECIMAL 8192U +#define FIELDFLAG_NO_DEFAULT 16384U /* sql */ +#define FIELDFLAG_MAYBE_NULL 32768U // sql +#define FIELDFLAG_HEX_ESCAPE 0x10000U +#define FIELDFLAG_PACK_SHIFT 3 +#define FIELDFLAG_DEC_SHIFT 8 +#define FIELDFLAG_MAX_DEC 63U + +#define FIELDFLAG_DEC_MASK 0x3F00U + +#define MTYP_TYPENR(type) ((type) & 127U) // Remove bits from type + +#define f_is_dec(x) ((x) & FIELDFLAG_DECIMAL) +#define f_is_num(x) ((x) & FIELDFLAG_NUMBER) +#define f_is_zerofill(x) ((x) & FIELDFLAG_ZEROFILL) +#define f_is_packed(x) ((x) & FIELDFLAG_PACK) +#define f_packtype(x) (((x) >> FIELDFLAG_PACK_SHIFT) & 15) +#define f_decimals(x) ((uint8) (((x) >> FIELDFLAG_DEC_SHIFT) & FIELDFLAG_MAX_DEC)) +#define f_is_alpha(x) (!f_is_num(x)) +#define f_is_binary(x) ((x) & FIELDFLAG_BINARY) // 4.0- compatibility +#define f_is_enum(x) (((x) & (FIELDFLAG_INTERVAL | FIELDFLAG_NUMBER)) == FIELDFLAG_INTERVAL) +#define f_is_bitfield(x) (((x) & (FIELDFLAG_BITFIELD | FIELDFLAG_NUMBER)) == FIELDFLAG_BITFIELD) +#define f_is_blob(x) (((x) & (FIELDFLAG_BLOB | FIELDFLAG_NUMBER)) == FIELDFLAG_BLOB) +#define f_is_geom(x) (((x) & (FIELDFLAG_GEOM | FIELDFLAG_NUMBER)) == FIELDFLAG_GEOM) +#define f_settype(x) (((uint) (x)) << FIELDFLAG_PACK_SHIFT) +#define f_maybe_null(x) ((x) & FIELDFLAG_MAYBE_NULL) +#define f_no_default(x) ((x) & FIELDFLAG_NO_DEFAULT) +#define f_bit_as_char(x) ((x) & FIELDFLAG_TREAT_BIT_AS_CHAR) +#define f_is_hex_escape(x) ((x) & FIELDFLAG_HEX_ESCAPE) +#define f_visibility(x) (static_cast ((x) & INVISIBLE_MAX_BITS)) + +inline +ulonglong TABLE::vers_end_id() const +{ + DBUG_ASSERT(versioned(VERS_TRX_ID)); + return static_cast(vers_end_field()->val_int()); +} + +inline +ulonglong TABLE::vers_start_id() const +{ + DBUG_ASSERT(versioned(VERS_TRX_ID)); + return static_cast(vers_start_field()->val_int()); +} + +double pos_in_interval_for_string(CHARSET_INFO *cset, + const uchar *midp_val, uint32 midp_len, + const uchar *min_val, uint32 min_len, + const uchar *max_val, uint32 max_len); + +double pos_in_interval_for_double(double midp_val, + double min_val, double max_val); + +#endif /* FIELD_INCLUDED */ diff --git a/sql/field_comp.cc b/sql/field_comp.cc new file mode 100644 index 00000000..ab97c8cc --- /dev/null +++ b/sql/field_comp.cc @@ -0,0 +1,154 @@ +/* Copyright (C) 2017 MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include +#include "sql_string.h" +#include "sql_class.h" +#include "field_comp.h" +#include + + +/** + Compresses string using zlib + + @param[out] to destination buffer for compressed data + @param[in] from data to compress + @param[in] length from length + + Requirement is such that string stored at `to' must not exceed `from' length. + Otherwise 0 is returned and caller stores string uncompressed. + + `to' must be large enough to hold `length' bytes. + + length == 1 is an edge case that may break stream.avail_out calculation: at + least 2 bytes required to store metadata. +*/ + +static uint compress_zlib(THD *thd, char *to, const char *from, uint length) +{ + uint level= thd->variables.column_compression_zlib_level; + + /* Caller takes care of empty strings. */ + DBUG_ASSERT(length); + + if (level > 0 && length > 1) + { + z_stream stream; + int wbits= thd->variables.column_compression_zlib_wrap ? MAX_WBITS : + -MAX_WBITS; + uint strategy= thd->variables.column_compression_zlib_strategy; + /* Store only meaningful bytes of original data length. */ + uchar original_pack_length= number_storage_requirement(length); + + *to= 0x80 + original_pack_length + (wbits < 0 ? 8 : 0); + store_bigendian(length, (uchar*) to + 1, original_pack_length); + + stream.avail_in= length; + stream.next_in= (Bytef*) from; + + DBUG_ASSERT(length >= static_cast(original_pack_length) + 1); + stream.avail_out= length - original_pack_length - 1; + stream.next_out= (Bytef*) to + original_pack_length + 1; + + stream.zalloc= 0; + stream.zfree= 0; + stream.opaque= 0; + + if (deflateInit2(&stream, level, Z_DEFLATED, wbits, 8, strategy) == Z_OK) + { + int res= deflate(&stream, Z_FINISH); + if (deflateEnd(&stream) == Z_OK && res == Z_STREAM_END) + return (uint) (stream.next_out - (Bytef*) to); + } + } + return 0; +} + + +static int uncompress_zlib(String *to, const uchar *from, uint from_length, + uint field_length) +{ + z_stream stream; + uchar original_pack_length; + int wbits; + ulonglong avail_out; + + original_pack_length= *from & 0x07; + wbits= *from & 8 ? -MAX_WBITS : MAX_WBITS; + + from++; + from_length--; + + if (from_length < original_pack_length) + { + my_error(ER_ZLIB_Z_DATA_ERROR, MYF(0)); + return 1; + } + + avail_out= (ulonglong)read_bigendian(from, original_pack_length); + + if (avail_out > field_length) + { + my_error(ER_ZLIB_Z_DATA_ERROR, MYF(0)); + return 1; + } + + stream.avail_out= (uint)avail_out; + if (to->alloc(stream.avail_out)) + return 1; + + stream.next_out= (Bytef*) to->ptr(); + + stream.avail_in= from_length - original_pack_length; + stream.next_in= (Bytef*) from + original_pack_length; + + stream.zalloc= 0; + stream.zfree= 0; + stream.opaque= 0; + + if (inflateInit2(&stream, wbits) == Z_OK) + { + int res= inflate(&stream, Z_FINISH); + if (inflateEnd(&stream) == Z_OK && res == Z_STREAM_END) + { + to->length(stream.total_out); + return 0; + } + } + my_error(ER_ZLIB_Z_DATA_ERROR, MYF(0)); + return 1; +} + + +Compression_method compression_methods[MAX_COMPRESSION_METHODS]= +{ + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { "zlib", compress_zlib, uncompress_zlib }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { 0, 0, 0 } +}; diff --git a/sql/field_comp.h b/sql/field_comp.h new file mode 100644 index 00000000..7eb8ab1e --- /dev/null +++ b/sql/field_comp.h @@ -0,0 +1,33 @@ +#ifndef FIELD_COMP_H_INCLUDED +#define FIELD_COMP_H_INCLUDED +/* Copyright (C) 2017 MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#define MAX_COMPRESSION_METHODS 16 + +struct Compression_method +{ + const char *name; + uint (*compress)(THD *thd, char *to, const char *from, uint length); + int (*uncompress)(String *to, const uchar *from, uint from_length, + uint field_length); +}; + + +extern Compression_method compression_methods[MAX_COMPRESSION_METHODS]; +#define zlib_compression_method (&compression_methods[8]) + +#endif diff --git a/sql/field_conv.cc b/sql/field_conv.cc new file mode 100644 index 00000000..94b6bf0c --- /dev/null +++ b/sql/field_conv.cc @@ -0,0 +1,922 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Functions to copy data to or from fields + + This could be done with a single short function but opencoding this + gives much more speed. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" // THD +#include + +void Field::do_field_eq(Copy_field *copy) +{ + memcpy(copy->to_ptr,copy->from_ptr,copy->from_length); +} + +static void do_field_1(Copy_field *copy) +{ + copy->to_ptr[0]=copy->from_ptr[0]; +} + +static void do_field_2(Copy_field *copy) +{ + copy->to_ptr[0]=copy->from_ptr[0]; + copy->to_ptr[1]=copy->from_ptr[1]; +} + +static void do_field_3(Copy_field *copy) +{ + copy->to_ptr[0]=copy->from_ptr[0]; + copy->to_ptr[1]=copy->from_ptr[1]; + copy->to_ptr[2]=copy->from_ptr[2]; +} + +static void do_field_4(Copy_field *copy) +{ + copy->to_ptr[0]=copy->from_ptr[0]; + copy->to_ptr[1]=copy->from_ptr[1]; + copy->to_ptr[2]=copy->from_ptr[2]; + copy->to_ptr[3]=copy->from_ptr[3]; +} + +static void do_field_6(Copy_field *copy) +{ // For blob field + copy->to_ptr[0]=copy->from_ptr[0]; + copy->to_ptr[1]=copy->from_ptr[1]; + copy->to_ptr[2]=copy->from_ptr[2]; + copy->to_ptr[3]=copy->from_ptr[3]; + copy->to_ptr[4]=copy->from_ptr[4]; + copy->to_ptr[5]=copy->from_ptr[5]; +} + +static void do_field_8(Copy_field *copy) +{ + copy->to_ptr[0]=copy->from_ptr[0]; + copy->to_ptr[1]=copy->from_ptr[1]; + copy->to_ptr[2]=copy->from_ptr[2]; + copy->to_ptr[3]=copy->from_ptr[3]; + copy->to_ptr[4]=copy->from_ptr[4]; + copy->to_ptr[5]=copy->from_ptr[5]; + copy->to_ptr[6]=copy->from_ptr[6]; + copy->to_ptr[7]=copy->from_ptr[7]; +} + + +static void do_field_to_null_str(Copy_field *copy) +{ + if (*copy->from_null_ptr & copy->from_bit) + { + bzero(copy->to_ptr,copy->from_length); + copy->to_null_ptr[0]=1; // Always bit 1 + } + else + { + copy->to_null_ptr[0]=0; + memcpy(copy->to_ptr,copy->from_ptr,copy->from_length); + } +} + + +static void do_outer_field_to_null_str(Copy_field *copy) +{ + if (*copy->null_row || + (copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit))) + { + bzero(copy->to_ptr,copy->from_length); + copy->to_null_ptr[0]=1; // Always bit 1 + } + else + { + copy->to_null_ptr[0]=0; + memcpy(copy->to_ptr,copy->from_ptr,copy->from_length); + } +} + + +static int set_bad_null_error(Field *field, int err) +{ + switch (field->table->in_use->count_cuted_fields) { + case CHECK_FIELD_WARN: + field->set_warning(Sql_condition::WARN_LEVEL_WARN, err, 1); + /* fall through */ + case CHECK_FIELD_IGNORE: + case CHECK_FIELD_EXPRESSION: + return 0; + case CHECK_FIELD_ERROR_FOR_NULL: + if (!field->table->in_use->no_errors) + my_error(ER_BAD_NULL_ERROR, MYF(0), field->field_name.str); + return -1; + } + DBUG_ASSERT(0); // impossible + return -1; +} + + +int set_field_to_null(Field *field) +{ + if (field->table->null_catch_flags & CHECK_ROW_FOR_NULLS_TO_REJECT) + { + field->table->null_catch_flags|= REJECT_ROW_DUE_TO_NULL_FIELDS; + return -1; + } + if (field->real_maybe_null()) + { + field->set_null(); + field->reset(); + return 0; + } + field->reset(); + return set_bad_null_error(field, WARN_DATA_TRUNCATED); +} + + +/** + Set TIMESTAMP to NOW(), AUTO_INCREMENT to the next number, or report an error + + @param field Field to update + + @retval + 0 Field could take 0 or an automatic conversion was used + @retval + -1 Field could not take NULL and no conversion was used. + If no_conversion was not set, an error message is printed +*/ + +int convert_null_to_field_value_or_error(Field *field) +{ + if (field->type() == MYSQL_TYPE_TIMESTAMP) + { + field->set_time(); + return 0; + } + + field->reset(); // Note: we ignore any potential failure of reset() here. + + if (field == field->table->next_number_field) + { + field->table->auto_increment_field_not_null= FALSE; + return 0; // field is set in fill_record() + } + return set_bad_null_error(field, ER_BAD_NULL_ERROR); +} + +/** + Set field to NULL or TIMESTAMP or to next auto_increment number. + + @param field Field to update + @param no_conversions Set to 1 if we should return 1 if field can't + take null values. + If set to 0 we will do store the 'default value' + if the field is a special field. If not we will + give an error. + + @retval + 0 Field could take 0 or an automatic conversion was used + @retval + -1 Field could not take NULL and no conversion was used. + If no_conversion was not set, an error message is printed +*/ + +int +set_field_to_null_with_conversions(Field *field, bool no_conversions) +{ + if (field->table->null_catch_flags & CHECK_ROW_FOR_NULLS_TO_REJECT) + { + field->table->null_catch_flags|= REJECT_ROW_DUE_TO_NULL_FIELDS; + return -1; + } + if (field->real_maybe_null()) + { + field->set_null(); + field->reset(); + return 0; + } + if (no_conversions) + return -1; + + return convert_null_to_field_value_or_error(field); +} + + +static void do_skip(Copy_field *copy __attribute__((unused))) +{ +} + + +/* + Copy: (NULLable field) -> (NULLable field) + + note: if the record we're copying from is NULL-complemetned (i.e. + from_field->table->null_row==1), it will also have all NULLable columns to be + set to NULLs, so we don't need to check table->null_row here. +*/ + +static void do_copy_null(Copy_field *copy) +{ + if (*copy->from_null_ptr & copy->from_bit) + { + *copy->to_null_ptr|=copy->to_bit; + copy->to_field->reset(); + } + else + { + *copy->to_null_ptr&= ~copy->to_bit; + (copy->do_copy2)(copy); + } +} + +/* + Copy: (not-NULL field in table that can be NULL-complemented) -> (NULLable + field) +*/ + +static void do_outer_field_null(Copy_field *copy) +{ + if (*copy->null_row || + (copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit))) + { + *copy->to_null_ptr|=copy->to_bit; + copy->to_field->reset(); + } + else + { + *copy->to_null_ptr&= ~copy->to_bit; + (copy->do_copy2)(copy); + } +} + +/* + Copy: (not-NULL field in table that can be NULL-complemented) -> (not-NULL + field) +*/ +static void do_copy_nullable_row_to_notnull(Copy_field *copy) +{ + if (*copy->null_row || + (copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit))) + { + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + copy->to_field->reset(); + } + else + { + (copy->do_copy2)(copy); + } + +} + +/* Copy: (NULL-able field) -> (not NULL-able field) */ +static void do_copy_not_null(Copy_field *copy) +{ + if (*copy->from_null_ptr & copy->from_bit) + { + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + copy->to_field->reset(); + } + else + (copy->do_copy2)(copy); +} + + +/* Copy: (non-NULLable field) -> (NULLable field) */ +static void do_copy_maybe_null(Copy_field *copy) +{ + *copy->to_null_ptr&= ~copy->to_bit; + (copy->do_copy2)(copy); +} + +/* timestamp and next_number has special handling in case of NULL values */ + +static void do_copy_timestamp(Copy_field *copy) +{ + if (*copy->from_null_ptr & copy->from_bit) + { + /* Same as in set_field_to_null_with_conversions() */ + copy->to_field->set_time(); + } + else + (copy->do_copy2)(copy); +} + + +static void do_copy_next_number(Copy_field *copy) +{ + if (*copy->from_null_ptr & copy->from_bit) + { + /* Same as in set_field_to_null_with_conversions() */ + copy->to_field->table->auto_increment_field_not_null= FALSE; + copy->to_field->reset(); + } + else + (copy->do_copy2)(copy); +} + + +void Field_blob::do_copy_blob(Copy_field *copy) +{ + ((Field_blob*) copy->to_field)->copy_value(((Field_blob*) copy->from_field)); +} + +void Field_blob::do_conv_blob(Copy_field *copy) +{ + copy->from_field->val_str(©->tmp); + ((Field_blob *) copy->to_field)->store(copy->tmp.ptr(), + copy->tmp.length(), + copy->tmp.charset()); +} + +/** Save blob in copy->tmp for GROUP BY. */ + +static void do_save_blob(Copy_field *copy) +{ + char buff[MAX_FIELD_WIDTH]; + String res(buff,sizeof(buff),copy->tmp.charset()); + copy->from_field->val_str(&res); + copy->tmp.copy(res); + ((Field_blob *) copy->to_field)->store(copy->tmp.ptr(), + copy->tmp.length(), + copy->tmp.charset()); +} + + +void Field::do_field_string(Copy_field *copy) +{ + char buff[MAX_FIELD_WIDTH]; + String res(buff, sizeof(buff), copy->from_field->charset()); + res.length(0U); + + copy->from_field->val_str(&res); + copy->to_field->store(res.ptr(), res.length(), res.charset()); +} + + +void Field_enum::do_field_enum(Copy_field *copy) +{ + if (copy->from_field->val_int() == 0) + ((Field_enum *) copy->to_field)->store_type((ulonglong) 0); + else + do_field_string(copy); +} + + +static void do_field_varbinary_pre50(Copy_field *copy) +{ + char buff[MAX_FIELD_WIDTH]; + copy->tmp.set_buffer_if_not_allocated(buff,sizeof(buff),copy->tmp.charset()); + copy->from_field->val_str(©->tmp); + + /* Use the same function as in 4.1 to trim trailing spaces */ + size_t length= my_lengthsp_8bit(&my_charset_bin, copy->tmp.ptr(), + copy->from_field->field_length); + + copy->to_field->store(copy->tmp.ptr(), length, + copy->tmp.charset()); +} + + +void Field::do_field_int(Copy_field *copy) +{ + longlong value= copy->from_field->val_int(); + copy->to_field->store(value, + MY_TEST(copy->from_field->flags & UNSIGNED_FLAG)); +} + +void Field::do_field_real(Copy_field *copy) +{ + double value=copy->from_field->val_real(); + copy->to_field->store(value); +} + + +void Field::do_field_decimal(Copy_field *copy) +{ + my_decimal value(copy->from_field); + copy->to_field->store_decimal(&value); +} + + +void Field::do_field_timestamp(Copy_field *copy) +{ + // XXX why couldn't we do it everywhere? + copy->from_field->save_in_field(copy->to_field); +} + + +void Field::do_field_temporal(Copy_field *copy, date_mode_t fuzzydate) +{ + MYSQL_TIME ltime; + // TODO: we now need to check result + if (copy->from_field->get_date(<ime, fuzzydate)) + copy->to_field->reset(); + else + copy->to_field->store_time_dec(<ime, copy->from_field->decimals()); +} + + +void Field::do_field_datetime(Copy_field *copy) +{ + return do_field_temporal(copy, Datetime::Options(TIME_CONV_NONE, current_thd)); +} + + +void Field::do_field_date(Copy_field *copy) +{ + return do_field_temporal(copy, Date::Options(TIME_CONV_NONE)); +} + + +void Field_time::do_field_time(Copy_field *copy) +{ + return do_field_temporal(copy, Time::Options(current_thd)); +} + + +/** + string copy for single byte characters set when to string is shorter than + from string. +*/ + +static void do_cut_string(Copy_field *copy) +{ + CHARSET_INFO *cs= copy->from_field->charset(); + memcpy(copy->to_ptr,copy->from_ptr,copy->to_length); + + /* Check if we loosed any important characters */ + if (cs->scan((char*) copy->from_ptr + copy->to_length, + (char*) copy->from_ptr + copy->from_length, + MY_SEQ_SPACES) < copy->from_length - copy->to_length) + { + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + } +} + + +/** + string copy for multi byte characters set when to string is shorter than + from string. +*/ + +static void do_cut_string_complex(Copy_field *copy) +{ // Shorter string field + CHARSET_INFO *cs= copy->from_field->charset(); + const uchar *from_end= copy->from_ptr + copy->from_length; + Well_formed_prefix prefix(cs, + (char*) copy->from_ptr, + (char*) from_end, + copy->to_length / cs->mbmaxlen); + size_t copy_length= prefix.length(); + if (copy->to_length < copy_length) + copy_length= copy->to_length; + memcpy(copy->to_ptr, copy->from_ptr, copy_length); + + /* Check if we lost any important characters */ + if (unlikely(prefix.well_formed_error_pos() || + cs->scan((char*) copy->from_ptr + copy_length, + (char*) from_end, + MY_SEQ_SPACES) < + (copy->from_length - copy_length))) + { + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + } + + if (copy_length < copy->to_length) + cs->fill((char*) copy->to_ptr + copy_length, + copy->to_length - copy_length, ' '); +} + + + + +static void do_expand_binary(Copy_field *copy) +{ + CHARSET_INFO *cs= copy->from_field->charset(); + memcpy(copy->to_ptr,copy->from_ptr,copy->from_length); + cs->fill((char*) copy->to_ptr+copy->from_length, + copy->to_length-copy->from_length, '\0'); +} + + + +static void do_expand_string(Copy_field *copy) +{ + CHARSET_INFO *cs= copy->from_field->charset(); + memcpy(copy->to_ptr,copy->from_ptr,copy->from_length); + cs->fill((char*) copy->to_ptr+copy->from_length, + copy->to_length-copy->from_length, ' '); +} + + +/* + Copy from a Field_varstring with length_bytes==1 + into another Field_varstring with length_bytes==1 + when the target column is not shorter than the source column. + We don't need to calculate the prefix in this case. It works for + - non-compressed and compressed columns + - single byte and multi-byte character sets +*/ +static void do_varstring1_no_truncation(Copy_field *copy) +{ + uint length= (uint) *(uchar*) copy->from_ptr; + DBUG_ASSERT(length <= copy->to_length - 1); + *(uchar*) copy->to_ptr= (uchar) length; + memcpy(copy->to_ptr+1, copy->from_ptr + 1, length); +} + +/* + Copy from a Field_varstring with length_bytes==2 + into another Field_varstring with length_bytes==2 + when the target column is not shorter than the source column. + We don't need to calculate the prefix in this case. It works for + - non-compressed and compressed columns + - single byte and multi-byte character sets +*/ +static void do_varstring2_no_truncation(Copy_field *copy) +{ + uint length= uint2korr(copy->from_ptr); + DBUG_ASSERT(length <= copy->to_length - HA_KEY_BLOB_LENGTH); + int2store(copy->to_ptr, length); + memcpy(copy->to_ptr + HA_KEY_BLOB_LENGTH, + copy->from_ptr + HA_KEY_BLOB_LENGTH, length); +} + + +static void do_varstring1(Copy_field *copy) +{ + uint length= (uint) *(uchar*) copy->from_ptr; + if (length > copy->to_length- 1) + { + length=copy->to_length - 1; + if (copy->from_field->table->in_use->count_cuted_fields > + CHECK_FIELD_EXPRESSION && + copy->to_field) + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + } + *(uchar*) copy->to_ptr= (uchar) length; + memcpy(copy->to_ptr+1, copy->from_ptr + 1, length); +} + + +static void do_varstring1_mb(Copy_field *copy) +{ + CHARSET_INFO *cs= copy->from_field->charset(); + uint from_length= (uint) *(uchar*) copy->from_ptr; + const uchar *from_ptr= copy->from_ptr + 1; + uint to_char_length= (copy->to_length - 1) / cs->mbmaxlen; + Well_formed_prefix prefix(cs, (char*) from_ptr, from_length, to_char_length); + if (prefix.length() < from_length) + { + if (current_thd->count_cuted_fields > CHECK_FIELD_EXPRESSION) + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + } + *copy->to_ptr= (uchar) prefix.length(); + memcpy(copy->to_ptr + 1, from_ptr, prefix.length()); +} + + +static void do_varstring2(Copy_field *copy) +{ + uint length=uint2korr(copy->from_ptr); + if (length > copy->to_length- HA_KEY_BLOB_LENGTH) + { + length=copy->to_length-HA_KEY_BLOB_LENGTH; + if (copy->from_field->table->in_use->count_cuted_fields > + CHECK_FIELD_EXPRESSION && + copy->to_field) + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + } + int2store(copy->to_ptr,length); + memcpy(copy->to_ptr+HA_KEY_BLOB_LENGTH, copy->from_ptr + HA_KEY_BLOB_LENGTH, + length); +} + + +static void do_varstring2_mb(Copy_field *copy) +{ + CHARSET_INFO *cs= copy->from_field->charset(); + uint char_length= (copy->to_length - HA_KEY_BLOB_LENGTH) / cs->mbmaxlen; + uint from_length= uint2korr(copy->from_ptr); + const uchar *from_beg= copy->from_ptr + HA_KEY_BLOB_LENGTH; + Well_formed_prefix prefix(cs, (char*) from_beg, from_length, char_length); + if (prefix.length() < from_length) + { + if (current_thd->count_cuted_fields > CHECK_FIELD_EXPRESSION) + copy->to_field->set_warning(Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, 1); + } + int2store(copy->to_ptr, prefix.length()); + memcpy(copy->to_ptr+HA_KEY_BLOB_LENGTH, from_beg, prefix.length()); +} + + +/*************************************************************************** +** The different functions that fills in a Copy_field class +***************************************************************************/ + +/** + copy of field to maybe null string. + If field is null then the all bytes are set to 0. + if field is not null then the first byte is set to 1 and the rest of the + string is the field value. + The 'to' buffer should have a size of field->pack_length()+1 +*/ + +void Copy_field::set(uchar *to,Field *from) +{ + from_ptr=from->ptr; + to_ptr=to; + from_length=from->pack_length_in_rec(); + if (from->maybe_null()) + { + from_null_ptr=from->null_ptr; + from_bit= from->null_bit; + to_ptr[0]= 1; // Null as default value + to_null_ptr= (uchar*) to_ptr++; + to_bit= 1; + if (from->table->maybe_null) + { + null_row= &from->table->null_row; + do_copy= do_outer_field_to_null_str; + } + else + do_copy= do_field_to_null_str; + } + else + { + to_null_ptr= 0; // For easy debugging + do_copy= Field::do_field_eq; + } +} + + +/* + To do: + + If 'save' is set to true and the 'from' is a blob field, do_copy is set to + do_save_blob rather than do_conv_blob. The only differences between them + appears to be: + + - do_save_blob allocates and uses an intermediate buffer before calling + Field_blob::store. Is this in order to trigger the call to + well_formed_copy_nchars, by changing the pointer copy->tmp.ptr()? + That call will take place anyway in all known cases. + */ +void Copy_field::set(Field *to,Field *from,bool save) +{ + if (to->type() == MYSQL_TYPE_NULL) + { + to_null_ptr=0; // For easy debugging + to_ptr=0; + do_copy=do_skip; + return; + } + from_field=from; + to_field=to; + from_ptr=from->ptr; + from_length=from->pack_length_in_rec(); + to_ptr= to->ptr; + to_length=to_field->pack_length_in_rec(); + + // set up null handling + from_null_ptr=to_null_ptr=0; + if (from->maybe_null()) + { + from_null_ptr= from->null_ptr; + from_bit= from->null_bit; + if (to_field->real_maybe_null()) + { + to_null_ptr= to->null_ptr; + to_bit= to->null_bit; + if (from_null_ptr) + do_copy= do_copy_null; + else + { + null_row= &from->table->null_row; + do_copy= do_outer_field_null; + } + } + else + { + if (to_field->type() == MYSQL_TYPE_TIMESTAMP) + do_copy= do_copy_timestamp; // Automatic timestamp + else if (to_field == to_field->table->next_number_field) + do_copy= do_copy_next_number; + else + { + if (!from_null_ptr) + { + null_row= &from->table->null_row; + do_copy= do_copy_nullable_row_to_notnull; + } + else + do_copy= do_copy_not_null; + } + } + } + else if (to_field->real_maybe_null()) + { + to_null_ptr= to->null_ptr; + to_bit= to->null_bit; + do_copy= do_copy_maybe_null; + } + else + do_copy=0; + + if ((to->flags & BLOB_FLAG) && save) + do_copy2= do_save_blob; + else + do_copy2= from->get_copy_func_to(to); + if (!do_copy) // Not null + do_copy=do_copy2; +} + + +Field::Copy_func *Field_timestamp::get_copy_func(const Field *from) const +{ + Field::Copy_func *copy= Field_temporal::get_copy_func(from); + if (copy == do_field_datetime && from->type() == MYSQL_TYPE_TIMESTAMP) + return do_field_timestamp; + else + return copy; +} + + +Field::Copy_func *Field_date_common::get_copy_func(const Field *from) const +{ + Field::Copy_func *copy= Field_temporal::get_copy_func(from); + return copy == do_field_datetime ? do_field_date : copy; +} + + +Field::Copy_func *Field_temporal::get_copy_func(const Field *from) const +{ + /* If types are not 100 % identical then convert trough get_date() */ + if (from->cmp_type() == REAL_RESULT) + return do_field_string; // TODO: MDEV-9344 + if (from->type() == MYSQL_TYPE_YEAR) + return do_field_string; // TODO: MDEV-9343 + if (from->type() == MYSQL_TYPE_BIT) + return do_field_int; + if (!eq_def(from) || + (table->in_use->variables.sql_mode & + (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE))) + return do_field_datetime; + return get_identical_copy_func(); +} + + +Field::Copy_func *Field_varstring::get_copy_func(const Field *from) const +{ + if (from->type() == MYSQL_TYPE_BIT) + return do_field_int; + /* + Detect copy from pre 5.0 varbinary to varbinary as of 5.0 and + use special copy function that removes trailing spaces and thus + repairs data. + */ + if (from->type() == MYSQL_TYPE_VAR_STRING && !from->has_charset() && + !Field_varstring::has_charset()) + return do_field_varbinary_pre50; + if (Field_varstring::real_type() != from->real_type() || + Field_varstring::charset() != from->charset() || + length_bytes != ((const Field_varstring*) from)->length_bytes || + !compression_method() != !from->compression_method()) + return do_field_string; + + if (field_length >= from->field_length) + return length_bytes == 1 ? do_varstring1_no_truncation : + do_varstring2_no_truncation; + + if (compression_method()) + { + /* + Truncation is going to happen, so we need to calculate prefixes. + Can't calculate prefixes directly on compressed data, + need to go through val_str() to uncompress. + */ + return do_field_string; + } + + return length_bytes == 1 ? + (from->charset()->mbmaxlen == 1 ? do_varstring1 : do_varstring1_mb) : + (from->charset()->mbmaxlen == 1 ? do_varstring2 : do_varstring2_mb); +} + + +Field::Copy_func *Field_string::get_copy_func(const Field *from) const +{ + if (from->type() == MYSQL_TYPE_BIT) + return do_field_int; + if (Field_string::type_handler() != from->type_handler() || + Field_string::charset() != from->charset()) + return do_field_string; + if (Field_string::pack_length() < from->pack_length()) + return (Field_string::charset()->mbmaxlen == 1 ? + do_cut_string : do_cut_string_complex); + if (Field_string::pack_length() > from->pack_length()) + return Field_string::charset() == &my_charset_bin ? do_expand_binary : + do_expand_string; + return get_identical_copy_func(); +} + + +Field::Copy_func *Field::get_identical_copy_func() const +{ + /* Identical field types */ + switch (pack_length()) { + case 1: return do_field_1; + case 2: return do_field_2; + case 3: return do_field_3; + case 4: return do_field_4; + case 6: return do_field_6; + case 8: return do_field_8; + } + return do_field_eq; +} + + +bool Field_temporal::memcpy_field_possible(const Field *from) const +{ + return real_type() == from->real_type() && + decimals() == from->decimals() && + !sql_mode_for_dates(table->in_use); +} + + +static int field_conv_memcpy(Field *to, Field *from) +{ + /* + This may happen if one does 'UPDATE ... SET x=x' + The test is here mostly for valgrind, but can also be relevant + if memcpy() is implemented with prefetch-write + */ + if (to->ptr != from->ptr) + memcpy(to->ptr,from->ptr, to->pack_length()); + return 0; +} + + +/** + Copy value of the field with conversion. + + @note Impossibility of simple copy should be checked before this call. + + @param to The field to copy to + + @retval TRUE ERROR + @retval FALSE OK + +*/ +static int field_conv_incompatible(Field *to, Field *from) +{ + return to->store_field(from); +} + + +/** + Simple quick field converter that is called on insert, e.g.: + INSERT INTO t1 (field1) SELECT field2 FROM t2; +*/ + +int field_conv(Field *to,Field *from) +{ + return to->memcpy_field_possible(from) ? + field_conv_memcpy(to, from) : + field_conv_incompatible(to, from); +} + + +fast_field_copier Field::get_fast_field_copier(const Field *from) +{ + DBUG_ENTER("Field::get_fast_field_copier"); + DBUG_RETURN(memcpy_field_possible(from) ? + &field_conv_memcpy : + &field_conv_incompatible); +} diff --git a/sql/filesort.cc b/sql/filesort.cc new file mode 100644 index 00000000..d4c290f2 --- /dev/null +++ b/sql/filesort.cc @@ -0,0 +1,3110 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Sorts a database +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "filesort.h" +#include +#include "sql_sort.h" +#include "probes_mysql.h" +#include "sql_base.h" +#include "sql_test.h" // TEST_filesort +#include "opt_range.h" // SQL_SELECT +#include "bounded_queue.h" +#include "filesort_utils.h" +#include "sql_select.h" +#include "debug_sync.h" + + /* functions defined in this file */ + +static uchar *read_buffpek_from_file(IO_CACHE *buffer_file, uint count, + uchar *buf); +static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, + SORT_INFO *fs_info, + IO_CACHE *buffer_file, + IO_CACHE *tempfile, + Bounded_queue *pq, + ha_rows *found_rows); +static bool write_keys(Sort_param *param, SORT_INFO *fs_info, + uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile); +static uint make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos, + bool using_packed_sortkeys= false); +static uint make_sortkey(Sort_param *param, uchar *to); +static uint make_packed_sortkey(Sort_param *param, uchar *to); + +static void register_used_fields(Sort_param *param); +static bool save_index(Sort_param *param, uint count, + SORT_INFO *table_sort); +static uint suffix_length(ulong string_length); +static uint sortlength(THD *thd, Sort_keys *sortorder, + bool *allow_packing_for_sortkeys); +static Addon_fields *get_addon_fields(TABLE *table, uint sortlength, + uint *addon_length, + uint *m_packable_length); + +static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info, + TABLE *table, + ha_rows records, size_t memory_available); + +static void store_key_part_length(uint32 num, uchar *to, uint bytes) +{ + switch(bytes) { + case 1: *to= (uchar)num; break; + case 2: int2store(to, num); break; + case 3: int3store(to, num); break; + case 4: int4store(to, num); break; + default: DBUG_ASSERT(0); + } +} + + +static uint32 read_keypart_length(const uchar *from, uint bytes) +{ + switch(bytes) { + case 1: return from[0]; + case 2: return uint2korr(from); + case 3: return uint3korr(from); + case 4: return uint4korr(from); + default: DBUG_ASSERT(0); return 0; + } +} + + +// @param sortlen [Maximum] length of the sort key +void Sort_param::init_for_filesort(uint sortlen, TABLE *table, + ha_rows maxrows, Filesort *filesort) +{ + DBUG_ASSERT(addon_fields == NULL); + + sort_length= sortlen; + ref_length= table->file->ref_length; + accepted_rows= filesort->accepted_rows; + + if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) && + !table->fulltext_searched && !filesort->sort_positions) + { + /* + Get the descriptors of all fields whose values are appended + to sorted fields and get its total length in addon_buf.length + */ + addon_fields= get_addon_fields(table, sort_length, &addon_length, + &m_packable_length); + } + if (using_addon_fields()) + { + DBUG_ASSERT(addon_length < UINT_MAX32); + res_length= addon_length; + } + else + { + res_length= ref_length; + /* + The reference to the record is considered + as an additional sorted field + */ + sort_length+= ref_length; + } + rec_length= sort_length + addon_length; + max_rows= maxrows; +} + + +void Sort_param::try_to_pack_addons(ulong max_length_for_sort_data) +{ + if (!using_addon_fields() || // no addons, or + using_packed_addons()) // already packed + return; + + if (!Addon_fields::can_pack_addon_fields(res_length)) + return; + + const uint sz= Addon_fields::size_of_length_field; + + // Heuristic: skip packing if potential savings are less than 10 bytes. + if (m_packable_length < (10 + sz)) + return; + + SORT_ADDON_FIELD *addonf= addon_fields->begin(); + for (;addonf != addon_fields->end(); ++addonf) + { + addonf->offset+= sz; + addonf->null_offset+= sz; + } + + addon_fields->set_using_packed_addons(true); + m_using_packed_addons= true; + m_packed_format= true; + + addon_length+= sz; + res_length+= sz; + rec_length+= sz; +} + +/** + Sort a table. + Creates a set of pointers that can be used to read the rows + in sorted order. This should be done with the functions + in records.cc. + + Before calling filesort, one must have done + table->file->info(HA_STATUS_VARIABLE) + + The result set is stored in + filesort_info->io_cache or + filesort_info->record_pointers. + + @param thd Current thread + @param table Table to sort + @param filesort How to sort the table + @param[out] found_rows Store the number of found rows here. + This is the number of found rows after + applying WHERE condition. + @note + If we sort by position (like if filesort->sort_positions==true) + filesort() will call table->prepare_for_position(). + + @retval + 0 Error + # SORT_INFO +*/ + +SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, + Filesort_tracker* tracker, JOIN *join, + table_map first_table_bit) +{ + int error; + DBUG_ASSERT(thd->variables.sortbuff_size <= SIZE_T_MAX); + size_t memory_available= (size_t)thd->variables.sortbuff_size; + uint maxbuffer; + Merge_chunk *buffpek; + ha_rows num_rows= HA_POS_ERROR, not_used=0; + IO_CACHE tempfile, buffpek_pointers, *outfile; + Sort_param param; + bool allow_packing_for_sortkeys; + Bounded_queue pq; + SQL_SELECT *const select= filesort->select; + ha_rows max_rows= filesort->limit; + uint s_length= 0, sort_len; + Sort_keys *sort_keys; + DBUG_ENTER("filesort"); + + if (!(sort_keys= filesort->make_sortorder(thd, join, first_table_bit))) + DBUG_RETURN(NULL); /* purecov: inspected */ + + s_length= static_cast(sort_keys->size()); + + DBUG_EXECUTE("info",TEST_filesort(filesort->sortorder, s_length);); +#ifdef SKIP_DBUG_IN_FILESORT + DBUG_PUSH_EMPTY; /* No DBUG here */ +#endif + SORT_INFO *sort; + TABLE_LIST *tab= table->pos_in_table_list; + Item_subselect *subselect= tab ? tab->containing_subselect() : 0; + MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str); + DEBUG_SYNC(thd, "filesort_start"); + + if (!(sort= new SORT_INFO)) // Note that this is not automatically freed! + return 0; + + if (subselect && subselect->filesort_buffer.is_allocated()) + { + // Reuse cache from last call + sort->filesort_buffer= subselect->filesort_buffer; + sort->buffpek= subselect->sortbuffer; + subselect->filesort_buffer.reset(); + subselect->sortbuffer.str=0; + } + + DBUG_ASSERT(sort->sorted_result_in_fsbuf == FALSE || + sort->record_pointers == NULL); + + outfile= &sort->io_cache; + + my_b_clear(&tempfile); + my_b_clear(&buffpek_pointers); + buffpek=0; + error= 1; + sort->found_rows= HA_POS_ERROR; + + param.sort_keys= sort_keys; + sort_len= sortlength(thd, sort_keys, &allow_packing_for_sortkeys); + param.init_for_filesort(sort_len, table, max_rows, filesort); + if (!param.accepted_rows) + param.accepted_rows= ¬_used; + + param.set_all_read_bits= filesort->set_all_read_bits; + param.unpack= filesort->unpack; + + sort->addon_fields= param.addon_fields; + sort->sort_keys= param.sort_keys; + + if (select && select->quick) + thd->inc_status_sort_range(); + else + thd->inc_status_sort_scan(); + thd->query_plan_flags|= QPLAN_FILESORT; + tracker->report_use(thd, max_rows); + + // If number of rows is not known, use as much of sort buffer as possible. + num_rows= table->file->estimate_rows_upper_bound(); + + if (check_if_pq_applicable(¶m, sort, + table, num_rows, memory_available)) + { + DBUG_PRINT("info", ("filesort PQ is applicable")); + thd->query_plan_flags|= QPLAN_FILESORT_PRIORITY_QUEUE; + status_var_increment(thd->status_var.filesort_pq_sorts_); + tracker->incr_pq_used(); + param.using_pq= true; + const size_t compare_length= param.sort_length; + DBUG_ASSERT(param.using_packed_sortkeys() == false); + /* + For PQ queries (with limit) we know exactly how many pointers/records + we have in the buffer, so to simplify things, we initialize + all pointers here. (We cannot pack fields anyways, so there is no + point in doing lazy initialization). + */ + sort->init_record_pointers(); + if (pq.init(param.max_rows, + true, // max_at_top + NULL, // compare_function + compare_length, + &make_sortkey, ¶m, sort->get_sort_keys())) + { + /* + If we fail to init pq, we have to give up: + out of memory means my_malloc() will call my_error(). + */ + DBUG_PRINT("info", ("failed to allocate PQ")); + DBUG_ASSERT(thd->is_error()); + goto err; + } + } + else + { + DBUG_PRINT("info", ("filesort PQ is not applicable")); + + if (allow_packing_for_sortkeys) + param.try_to_pack_sortkeys(); + + param.try_to_pack_addons(thd->variables.max_length_for_sort_data); + tracker->report_sort_keys_format(param.using_packed_sortkeys()); + param.using_pq= false; + + size_t min_sort_memory= MY_MAX(MIN_SORT_MEMORY, + param.sort_length*MERGEBUFF2); + set_if_bigger(min_sort_memory, sizeof(Merge_chunk*)*MERGEBUFF2); + while (memory_available >= min_sort_memory) + { + ulonglong keys= memory_available / (param.rec_length + sizeof(char*)); + param.max_keys_per_buffer= (uint) MY_MAX(MERGEBUFF2, + MY_MIN(num_rows, keys)); + sort->alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length); + if (sort->sort_buffer_size() > 0) + break; + size_t old_memory_available= memory_available; + memory_available= memory_available/4*3; + if (memory_available < min_sort_memory && + old_memory_available > min_sort_memory) + memory_available= min_sort_memory; + } + if (memory_available < min_sort_memory) + { + my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR_LOG + ME_FATAL)); + goto err; + } + tracker->report_sort_buffer_size(sort->sort_buffer_size()); + } + + if (param.using_addon_fields()) + { + // report information whether addon fields are packed or not + tracker->report_addon_fields_format(param.using_packed_addons()); + } + + if (param.tmp_buffer.alloc(param.sort_length)) + goto err; + + if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX, + DISK_BUFFER_SIZE, MYF(MY_WME))) + goto err; + + param.sort_form= table; + param.local_sortorder= + Bounds_checked_array(filesort->sortorder, s_length); + + num_rows= find_all_keys(thd, ¶m, select, + sort, + &buffpek_pointers, + &tempfile, + pq.is_initialized() ? &pq : NULL, + &sort->found_rows); + if (num_rows == HA_POS_ERROR) + goto err; + + maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek)); + tracker->report_merge_passes_at_start(thd->query_plan_fsort_passes); + tracker->report_row_numbers(param.examined_rows, sort->found_rows, num_rows); + + if (maxbuffer == 0) // The whole set is in memory + { + if (save_index(¶m, (uint) num_rows, sort)) + goto err; + } + else + { + /* filesort cannot handle zero-length records during merge. */ + DBUG_ASSERT(param.sort_length != 0); + + if (sort->buffpek.str && sort->buffpek.length < maxbuffer) + { + my_free(sort->buffpek.str); + sort->buffpek.str= 0; + } + + if (param.using_addon_fields()) + { + DBUG_ASSERT(sort->addon_fields); + if (!sort->addon_fields->allocate_addon_buf(param.addon_length)) + goto err; + } + + if (!(sort->buffpek.str= + (char *) read_buffpek_from_file(&buffpek_pointers, maxbuffer, + (uchar*) sort->buffpek.str))) + goto err; + sort->buffpek.length= maxbuffer; + buffpek= (Merge_chunk *) sort->buffpek.str; + close_cached_file(&buffpek_pointers); + /* Open cached file if it isn't open */ + if (! my_b_inited(outfile) && + open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER, + MYF(MY_WME))) + goto err; + if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0)) + goto err; + + /* + Use also the space previously used by string pointers in sort_buffer + for temporary key storage. + */ + + param.max_keys_per_buffer= static_cast(sort->sort_buffer_size()) / + param.rec_length; + set_if_bigger(param.max_keys_per_buffer, 1); + maxbuffer--; // Offset from 0 + + if (merge_many_buff(¶m, sort->get_raw_buf(), + buffpek,&maxbuffer, + &tempfile)) + goto err; + if (flush_io_cache(&tempfile) || + reinit_io_cache(&tempfile,READ_CACHE,0L,0,0)) + goto err; + if (merge_index(¶m, + sort->get_raw_buf(), + buffpek, + maxbuffer, + &tempfile, + outfile)) + goto err; + } + + if (num_rows > param.max_rows) + { + // If find_all_keys() produced more results than the query LIMIT. + num_rows= param.max_rows; + } + error= 0; + + err: + param.tmp_buffer.free(); + if (!subselect || !subselect->is_uncacheable()) + { + if (!param.using_addon_fields()) + sort->free_sort_buffer(); + my_free(sort->buffpek.str); + } + else + { + /* Remember sort buffers for next subquery call */ + subselect->filesort_buffer= sort->filesort_buffer; + subselect->sortbuffer= sort->buffpek; + sort->filesort_buffer.reset(); // Don't free this*/ + } + sort->buffpek.str= 0; + + close_cached_file(&tempfile); + close_cached_file(&buffpek_pointers); + if (my_b_inited(outfile)) + { + if (flush_io_cache(outfile)) + error=1; + { + my_off_t save_pos=outfile->pos_in_file; + /* For following reads */ + if (reinit_io_cache(outfile,READ_CACHE,0L,0,0)) + error=1; + outfile->end_of_file=save_pos; + } + } + tracker->report_merge_passes_at_end(thd, thd->query_plan_fsort_passes); + if (unlikely(error)) + { + int kill_errno= thd->killed_errno(); + DBUG_ASSERT(thd->is_error() || kill_errno || thd->killed == ABORT_QUERY); + + my_printf_error(ER_FILSORT_ABORT, + "%s: %s", + MYF(0), + ER_THD(thd, ER_FILSORT_ABORT), + kill_errno ? ER_THD(thd, kill_errno) : + thd->killed == ABORT_QUERY ? "" : + thd->get_stmt_da()->message()); + + if ((thd->killed == ABORT_QUERY || kill_errno) && + global_system_variables.log_warnings > 1) + { + sql_print_warning("%s, host: %s, user: %s, thread: %lu, query: %-.4096s", + ER_THD(thd, ER_FILSORT_ABORT), + thd->security_ctx->host_or_ip, + &thd->security_ctx->priv_user[0], + (ulong) thd->thread_id, + thd->query()); + } + } + else + thd->inc_status_sort_rows(num_rows); + + sort->examined_rows= param.examined_rows; + sort->return_rows= num_rows; +#ifdef SKIP_DBUG_IN_FILESORT + DBUG_POP_EMPTY; /* Ok to DBUG */ +#endif + + DBUG_PRINT("exit", + ("num_rows: %lld examined_rows: %lld found_rows: %lld", + (longlong) sort->return_rows, (longlong) sort->examined_rows, + (longlong) sort->found_rows)); + MYSQL_FILESORT_DONE(error, num_rows); + + if (unlikely(error)) + { + delete sort; + sort= 0; + } + DBUG_RETURN(sort); +} /* filesort */ + + +void Filesort::cleanup() +{ + if (select && own_select) + { + select->cleanup(); + select= NULL; + } +} + + +/* + Create the Sort_keys array and fill the sort_keys[i]->{item|field}. + + This indicates which field/item values will be used as sort keys. + Attributes like lengths are not filled yet. +*/ + +Sort_keys* +Filesort::make_sortorder(THD *thd, JOIN *join, table_map first_table_bit) +{ + uint count; + SORT_FIELD *sort,*pos; + ORDER *ord; + DBUG_ENTER("make_sortorder"); + + count=0; + for (ord = order; ord; ord= ord->next) + count++; + + if (sortorder) + DBUG_RETURN(sort_keys); + + DBUG_ASSERT(sort_keys == NULL); + + sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * count); + pos= sort= sortorder; + + if (!pos) + DBUG_RETURN(0); + + sort_keys= new Sort_keys(sortorder, count); + + if (!sort_keys) + DBUG_RETURN(0); + + pos= sort_keys->begin(); + for (ord= order; ord; ord= ord->next, pos++) + { + Item *first= ord->item[0]; + /* + It is possible that the query plan is to read table t1, while the + sort criteria actually has "ORDER BY t2.col" and the WHERE clause has + a multi-equality(t1.col, t2.col, ...). + The optimizer detects such cases (grep for + UseMultipleEqualitiesToRemoveTempTable to see where), but doesn't + perform equality substitution in the order->item. We need to do the + substitution here ourselves. + */ + table_map item_map= first->used_tables(); + if (join && (item_map & ~join->const_table_map) && + !(item_map & first_table_bit) && join->cond_equal && + first->get_item_equal()) + { + /* + Ok, this is the case descibed just above. Get the first element of the + multi-equality. + */ + Item_equal *item_eq= first->get_item_equal(); + first= item_eq->get_first(NO_PARTICULAR_TAB, NULL); + } + + Item *item= first->real_item(); + pos->field= 0; pos->item= 0; + if (item->type() == Item::FIELD_ITEM) + pos->field= ((Item_field*) item)->field; + else if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item()) + { + // Aggregate, or Item_aggregate_ref + DBUG_ASSERT(first->type() == Item::SUM_FUNC_ITEM || + (first->type() == Item::REF_ITEM && + static_cast(first)->ref_type() == + Item_ref::AGGREGATE_REF)); + pos->field= first->get_tmp_table_field(); + } + else if (item->type() == Item::COPY_STR_ITEM) + { // Blob patch + pos->item= ((Item_copy*) item)->get_item(); + } + else + pos->item= *ord->item; + pos->reverse= (ord->direction == ORDER::ORDER_DESC); + DBUG_ASSERT(pos->field != NULL || pos->item != NULL); + } + DBUG_RETURN(sort_keys); +} + + +/** Read 'count' number of buffer pointers into memory. */ + +static uchar *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count, + uchar *buf) +{ + size_t length= sizeof(Merge_chunk)*count; + uchar *tmp= buf; + DBUG_ENTER("read_buffpek_from_file"); + if (count > UINT_MAX/sizeof(Merge_chunk)) + return 0; /* sizeof(BUFFPEK)*count will overflow */ + if (!tmp) + tmp= (uchar *)my_malloc(key_memory_Filesort_info_merge, length, + MYF(MY_WME | MY_THREAD_SPECIFIC)); + if (tmp) + { + if (reinit_io_cache(buffpek_pointers,READ_CACHE,0L,0,0) || + my_b_read(buffpek_pointers, (uchar*) tmp, length)) + { + my_free(tmp); + tmp=0; + } + } + DBUG_RETURN(tmp); +} + +#ifndef DBUG_OFF + +/* Buffer where record is returned */ +char dbug_print_row_buff[512]; + +/* Temporary buffer for printing a column */ +char dbug_print_row_buff_tmp[512]; + +/* + Print table's current row into a buffer and return a pointer to it. + + This is intended to be used from gdb: + + (gdb) p dbug_print_table_row(table) + $33 = "SUBQUERY2_t1(col_int_key,col_varchar_nokey)=(7,c)" + (gdb) + + Only columns in table->read_set are printed +*/ + +const char* dbug_print_table_row(TABLE *table) +{ + Field **pfield; + String tmp(dbug_print_row_buff_tmp, + sizeof(dbug_print_row_buff_tmp),&my_charset_bin); + + String output(dbug_print_row_buff, sizeof(dbug_print_row_buff), + &my_charset_bin); + + output.length(0); + output.append(table->alias); + output.append('('); + bool first= true; + + for (pfield= table->field; *pfield ; pfield++) + { + const LEX_CSTRING *name; + if (table->read_set && !bitmap_is_set(table->read_set, (*pfield)->field_index)) + continue; + + if (first) + first= false; + else + output.append(','); + + name= (*pfield)->field_name.str ? &(*pfield)->field_name: &NULL_clex_str; + output.append(name); + } + + output.append(STRING_WITH_LEN(")=(")); + + first= true; + for (pfield= table->field; *pfield ; pfield++) + { + Field *field= *pfield; + + if (table->read_set && !bitmap_is_set(table->read_set, (*pfield)->field_index)) + continue; + + if (first) + first= false; + else + output.append(','); + + if (field->is_null()) + output.append(&NULL_clex_str); + else + { + if (field->type() == MYSQL_TYPE_BIT) + (void) field->val_int_as_str(&tmp, 1); + else + field->val_str(&tmp); + output.append(tmp.ptr(), tmp.length()); + } + } + output.append(')'); + + return output.c_ptr_safe(); +} + + +const char* dbug_print_row(TABLE *table, uchar *rec) +{ + table->move_fields(table->field, rec, table->record[0]); + const char* ret= dbug_print_table_row(table); + table->move_fields(table->field, table->record[0], rec); + return ret; +} + + +/* + Print a text, SQL-like record representation into dbug trace. + + Note: this function is a work in progress: at the moment + - column read bitmap is ignored (can print garbage for unused columns) + - there is no quoting +*/ +static void dbug_print_record(TABLE *table, bool print_rowid) +{ + char buff[1024]; + Field **pfield; + String tmp(buff,sizeof(buff),&my_charset_bin); + DBUG_LOCK_FILE; + + fprintf(DBUG_FILE, "record ("); + for (pfield= table->field; *pfield ; pfield++) + fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name.str, + (pfield[1])? ", ":""); + fprintf(DBUG_FILE, ") = "); + + fprintf(DBUG_FILE, "("); + for (pfield= table->field; *pfield ; pfield++) + { + Field *field= *pfield; + + if (field->is_null()) + fwrite("NULL", sizeof(char), 4, DBUG_FILE); + + if (field->type() == MYSQL_TYPE_BIT) + (void) field->val_int_as_str(&tmp, 1); + else + field->val_str(&tmp); + + fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE); + if (pfield[1]) + fwrite(", ", sizeof(char), 2, DBUG_FILE); + } + fprintf(DBUG_FILE, ")"); + if (print_rowid) + { + fprintf(DBUG_FILE, " rowid "); + for (uint i=0; i < table->file->ref_length; i++) + { + fprintf(DBUG_FILE, "%x", (uchar)table->file->ref[i]); + } + } + fprintf(DBUG_FILE, "\n"); + DBUG_UNLOCK_FILE; +} + +#endif + + +/** + Search after sort_keys, and write them into tempfile + (if we run out of space in the sort_keys buffer). + All produced sequences are guaranteed to be non-empty. + + @param param Sorting parameter + @param select Use this to get source data + @param sort_keys Array of pointers to sort key + addon buffers. + @param buffpek_pointers File to write BUFFPEKs describing sorted segments + in tempfile. + @param tempfile File to write sorted sequences of sortkeys to. + @param pq If !NULL, use it for keeping top N elements + @param [out] found_rows The number of FOUND_ROWS(). + For a query with LIMIT, this value will typically + be larger than the function return value. + + @note + Basic idea: + @verbatim + while (get_next_sortkey()) + { + if (using priority queue) + push sort key into queue + else + { + if (no free space in sort_keys buffers) + { + sort sort_keys buffer; + dump sorted sequence to 'tempfile'; + dump BUFFPEK describing sequence location into 'buffpek_pointers'; + } + put sort key into 'sort_keys'; + } + } + if (sort_keys has some elements && dumped at least once) + sort-dump-dump as above; + else + don't sort, leave sort_keys array to be sorted by caller. + @endverbatim + + @retval + Number of records written on success. + @retval + HA_POS_ERROR on error. +*/ + +static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, + SORT_INFO *fs_info, + IO_CACHE *buffpek_pointers, + IO_CACHE *tempfile, + Bounded_queue *pq, + ha_rows *found_rows) +{ + int error, quick_select; + uint idx, indexpos; + uchar *ref_pos, *next_pos, ref_buff[MAX_REFLENGTH]; + TABLE *sort_form; + handler *file; + MY_BITMAP *save_read_set, *save_write_set; + Item *sort_cond; + ha_rows num_records= 0; + const bool packed_format= param->is_packed_format(); + const bool using_packed_sortkeys= param->using_packed_sortkeys(); + + DBUG_ENTER("find_all_keys"); + DBUG_PRINT("info",("using: %s", + (select ? select->quick ? "ranges" : "where": + "every row"))); + + idx=indexpos=0; + error=quick_select=0; + sort_form=param->sort_form; + file=sort_form->file; + ref_pos= ref_buff; + quick_select=select && select->quick; + *found_rows= 0; + ref_pos= &file->ref[0]; + next_pos=ref_pos; + + DBUG_EXECUTE_IF("show_explain_in_find_all_keys", + dbug_serve_apcs(thd, 1); + ); + + if (!quick_select) + { + next_pos=(uchar*) 0; /* Find records in sequence */ + DBUG_EXECUTE_IF("bug14365043_1", + DBUG_SET("+d,ha_rnd_init_fail");); + if (unlikely(file->ha_rnd_init_with_error(1))) + DBUG_RETURN(HA_POS_ERROR); + file->extra_opt(HA_EXTRA_CACHE, thd->variables.read_buff_size); + } + + /* Remember original bitmaps */ + save_read_set= sort_form->read_set; + save_write_set= sort_form->write_set; + + /* Set up temporary column read map for columns used by sort */ + DBUG_ASSERT(save_read_set != &sort_form->tmp_set); + bitmap_clear_all(&sort_form->tmp_set); + sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set); + register_used_fields(param); + if (quick_select) + select->quick->add_used_key_part_to_set(); + + sort_cond= (!select ? 0 : + (!select->pre_idx_push_select_cond ? + select->cond : select->pre_idx_push_select_cond)); + if (sort_cond) + sort_cond->walk(&Item::register_field_in_read_map, 1, sort_form); + sort_form->file->column_bitmaps_signal(); + + if (quick_select) + { + if (select->quick->reset()) + goto err; + } + + if (param->set_all_read_bits) + sort_form->column_bitmaps_set(save_read_set, save_write_set); + DEBUG_SYNC(thd, "after_index_merge_phase1"); + + for (;;) + { + if (quick_select) + error= select->quick->get_next(); + else /* Not quick-select */ + { + error= file->ha_rnd_next(sort_form->record[0]); + if (param->unpack) + param->unpack(sort_form); + } + if (unlikely(error)) + break; + file->position(sort_form->record[0]); + DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE);); + + if (unlikely(thd->check_killed())) + { + DBUG_PRINT("info",("Sort killed by user")); + if (!quick_select) + { + (void) file->extra(HA_EXTRA_NO_CACHE); + file->ha_rnd_end(); + } + goto err; /* purecov: inspected */ + } + + bool write_record= false; + if (likely(error == 0)) + { + param->examined_rows++; + if (select && select->cond) + { + /* + If the condition 'select->cond' contains a subquery, restore the + original read/write sets of the table 'sort_form' because when + SQL_SELECT::skip_record evaluates this condition. it may include a + correlated subquery predicate, such that some field in the subquery + refers to 'sort_form'. + + PSergey-todo: discuss the above with Timour. + */ + MY_BITMAP *tmp_read_set= sort_form->read_set; + MY_BITMAP *tmp_write_set= sort_form->write_set; + + if (select->cond->with_subquery()) + sort_form->column_bitmaps_set(save_read_set, save_write_set); + write_record= (select->skip_record(thd) > 0); + if (select->cond->with_subquery()) + sort_form->column_bitmaps_set(tmp_read_set, tmp_write_set); + } + else + write_record= true; + } + + if (write_record) + { + if (pq) + pq->push(ref_pos); + else + { + if (fs_info->isfull()) + { + if (write_keys(param, fs_info, idx, buffpek_pointers, tempfile)) + goto err; + idx= 0; + indexpos++; + } + if (idx == 0) + fs_info->init_next_record_pointer(); + uchar *start_of_rec= fs_info->get_next_record_pointer(); + + const uint rec_sz= make_sortkey(param, start_of_rec, + ref_pos, using_packed_sortkeys); + if (packed_format && rec_sz != param->rec_length) + fs_info->adjust_next_record_pointer(rec_sz); + idx++; + } + num_records++; + (*param->accepted_rows)++; + } + + /* It does not make sense to read more keys in case of a fatal error */ + if (unlikely(thd->is_error())) + break; + + /* + We need to this after checking the error as the transaction may have + rolled back in case of a deadlock + */ + if (!write_record) + file->unlock_row(); + } + if (!quick_select) + { + (void) file->extra(HA_EXTRA_NO_CACHE); /* End caching of records */ + if (!next_pos) + file->ha_rnd_end(); + } + + /* Signal we should use original column read and write maps */ + sort_form->column_bitmaps_set(save_read_set, save_write_set); + + if (unlikely(thd->is_error())) + DBUG_RETURN(HA_POS_ERROR); + + DBUG_PRINT("test",("error: %d indexpos: %d",error,indexpos)); + if (unlikely(error != HA_ERR_END_OF_FILE)) + { + file->print_error(error,MYF(ME_ERROR_LOG)); + DBUG_RETURN(HA_POS_ERROR); + } + if (indexpos && idx && + write_keys(param, fs_info, idx, buffpek_pointers, tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + + (*found_rows)= num_records; + if (pq) + num_records= pq->num_elements(); + + + DBUG_PRINT("info", ("find_all_keys return %llu", (ulonglong) num_records)); + DBUG_RETURN(num_records); + +err: + sort_form->column_bitmaps_set(save_read_set, save_write_set); + DBUG_RETURN(HA_POS_ERROR); +} /* find_all_keys */ + + +/** + @details + Sort the buffer and write: + -# the sorted sequence to tempfile + -# a BUFFPEK describing the sorted sequence position to buffpek_pointers + + (was: Skriver en buffert med nycklar till filen) + + @param param Sort parameters + @param sort_keys Array of pointers to keys to sort + @param count Number of elements in sort_keys array + @param buffpek_pointers One 'BUFFPEK' struct will be written into this file. + The BUFFPEK::{file_pos, count} will indicate where + the sorted data was stored. + @param tempfile The sorted sequence will be written into this file. + + @retval + 0 OK + @retval + 1 Error +*/ + +static bool +write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, + IO_CACHE *buffpek_pointers, IO_CACHE *tempfile) +{ + Merge_chunk buffpek; + DBUG_ENTER("write_keys"); + + fs_info->sort_buffer(param, count); + + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE, + MYF(MY_WME))) + DBUG_RETURN(1); /* purecov: inspected */ + /* check we won't have more buffpeks than we can possibly keep in memory */ + if (my_b_tell(buffpek_pointers) + sizeof(Merge_chunk) > (ulonglong)UINT_MAX) + DBUG_RETURN(1); + + buffpek.set_file_position(my_b_tell(tempfile)); + if ((ha_rows) count > param->max_rows) + count=(uint) param->max_rows; /* purecov: inspected */ + buffpek.set_rowcount(static_cast(count)); + + for (uint ix= 0; ix < count; ++ix) + { + uchar *record= fs_info->get_sorted_record(ix); + + + if (my_b_write(tempfile, record, param->get_record_length(record))) + DBUG_RETURN(1); /* purecov: inspected */ + } + + if (my_b_write(buffpek_pointers, (uchar*) &buffpek, sizeof(buffpek))) + DBUG_RETURN(1); + + DBUG_RETURN(0); + +} /* write_keys */ + + +/** + Store length in high-byte-first order. +*/ +void store_length(uchar *to, uint length, uint pack_length) +{ + switch (pack_length) { + case 1: + *to= (uchar) length; + break; + case 2: + mi_int2store(to, length); + break; + case 3: + mi_int3store(to, length); + break; + default: + mi_int4store(to, length); + break; + } +} + + +void +Type_handler_string_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp_buffer) const +{ + CHARSET_INFO *cs= item->collation.collation; + bool maybe_null= item->maybe_null(); + + if (maybe_null) + *to++= 1; + + Binary_string *res= item->str_result(tmp_buffer); + if (!res) + { + if (maybe_null) + memset(to - 1, 0, sort_field->length + 1); + else + { + /* purecov: begin deadcode */ + /* + This should only happen during extreme conditions if we run out + of memory or have an item marked not null when it can be null. + This code is here mainly to avoid a hard crash in this case. + */ + DBUG_ASSERT(0); + DBUG_PRINT("warning", + ("Got null on something that shouldn't be null")); + memset(to, 0, sort_field->length); // Avoid crash + /* purecov: end */ + } + return; + } + + if (use_strnxfrm(cs)) + { +#ifdef DBUG_ASSERT_EXISTS + size_t tmp_length= +#endif + cs->strnxfrm(to, sort_field->length, + item->max_char_length() * cs->strxfrm_multiply, + (uchar*) res->ptr(), res->length(), + MY_STRXFRM_PAD_WITH_SPACE | + MY_STRXFRM_PAD_TO_MAXLEN); + DBUG_ASSERT(tmp_length == sort_field->length); + } + else + { + uint diff; + uint sort_field_length= sort_field->length - sort_field->suffix_length; + uint length= res->length(); + if (sort_field_length < length) + { + diff= 0; + length= sort_field_length; + } + else + diff= sort_field_length - length; + if (sort_field->suffix_length) + { + /* Store length last in result_string */ + store_length(to + sort_field_length, length, sort_field->suffix_length); + } + /* apply cs->sort_order for case-insensitive comparison if needed */ + cs->strnxfrm((uchar*)to, length, (const uchar*) res->ptr(), length); + char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' '); + cs->fill((char *) to + length, diff, fill_char); + } +} + + +void +Type_handler_int_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp_buffer) const +{ + longlong value= item->val_int_result(); + make_sort_key_longlong(to, item->maybe_null(), item->null_value, + item->unsigned_flag, value); +} + + +void +Type_handler_temporal_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp_buffer) const +{ + MYSQL_TIME buf; + // This is a temporal type. No nanoseconds. Rounding mode is not important. + DBUG_ASSERT(item->cmp_type() == TIME_RESULT); + static const Temporal::Options opt(TIME_INVALID_DATES, TIME_FRAC_NONE); + if (item->get_date_result(current_thd, &buf, opt)) + { + DBUG_ASSERT(item->maybe_null()); + DBUG_ASSERT(item->null_value); + make_sort_key_longlong(to, item->maybe_null(), true, + item->unsigned_flag, 0); + } + else + make_sort_key_longlong(to, item->maybe_null(), false, + item->unsigned_flag, pack_time(&buf)); +} + + +void +Type_handler_timestamp_common::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp_buffer) const +{ + THD *thd= current_thd; + uint binlen= my_timestamp_binary_length(item->decimals); + Timestamp_or_zero_datetime_native_null native(thd, item); + if (native.is_null() || native.is_zero_datetime()) + { + // NULL or '0000-00-00 00:00:00' + bzero(to, item->maybe_null() ? binlen + 1 : binlen); + } + else + { + if (item->maybe_null()) + *to++= 1; + if (native.length() != binlen) + { + /* + Some items can return native representation with a different + number of fractional digits, e.g.: GREATEST(ts_3, ts_4) can + return a value with 3 fractional digits, although its fractional + precision is 4. Re-pack with a proper precision now. + */ + Timestamp(native).to_native(&native, item->datetime_precision(thd)); + } + DBUG_ASSERT(native.length() == binlen); + memcpy((char *) to, native.ptr(), binlen); + } +} + + +void +Type_handler::store_sort_key_longlong(uchar *to, bool unsigned_flag, + longlong value) const +{ + to[7]= (uchar) value; + to[6]= (uchar) (value >> 8); + to[5]= (uchar) (value >> 16); + to[4]= (uchar) (value >> 24); + to[3]= (uchar) (value >> 32); + to[2]= (uchar) (value >> 40); + to[1]= (uchar) (value >> 48); + if (unsigned_flag) /* Fix sign */ + to[0]= (uchar) (value >> 56); + else + to[0]= (uchar) (value >> 56) ^ 128; /* Reverse signbit */ +} + + +void +Type_handler::make_sort_key_longlong(uchar *to, + bool maybe_null, + bool null_value, + bool unsigned_flag, + longlong value) const + +{ + if (maybe_null) + { + if (null_value) + { + memset(to, 0, 9); + return; + } + *to++= 1; + } + store_sort_key_longlong(to, unsigned_flag, value); +} + + +uint +Type_handler::make_packed_sort_key_longlong(uchar *to, bool maybe_null, + bool null_value, bool unsigned_flag, + longlong value, + const SORT_FIELD_ATTR *sort_field) const +{ + if (maybe_null) + { + if (null_value) + { + *to++= 0; + return 0; + } + *to++= 1; + } + store_sort_key_longlong(to, unsigned_flag, value); + DBUG_ASSERT(sort_field->original_length == sort_field->length); + return sort_field->original_length; +} + + +void +Type_handler_decimal_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp_buffer) const +{ + my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf); + if (item->maybe_null()) + { + if (item->null_value) + { + memset(to, 0, sort_field->length + 1); + return; + } + *to++= 1; + } + dec_val->to_binary(to, item->max_length - (item->decimals ? 1 : 0), + item->decimals); +} + + +void +Type_handler_real_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp_buffer) const +{ + double value= item->val_result(); + if (item->maybe_null()) + { + if (item->null_value) + { + memset(to, 0, sort_field->length + 1); + return; + } + *to++= 1; + } + change_double_for_sort(value, to); +} + + +/** Make a sort-key from record. */ + +static uint make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos, + bool using_packed_sortkeys) +{ + uchar *orig_to= to; + + to+= using_packed_sortkeys ? + make_packed_sortkey(param, to) : + make_sortkey(param, to); + + if (param->using_addon_fields()) + { + /* + Save field values appended to sorted fields. + First null bit indicators are appended then field values follow. + In this implementation we use fixed layout for field values - + the same for all records. + */ + SORT_ADDON_FIELD *addonf= param->addon_fields->begin(); + uchar *nulls= to; + uchar *p_len= to; + DBUG_ASSERT(addonf != 0); + const bool packed_addon_fields= param->addon_fields->using_packed_addons(); + uint32 res_len= addonf->offset; + memset(nulls, 0, addonf->offset); + to+= addonf->offset; + for ( ; addonf != param->addon_fields->end() ; addonf++) + { + Field *field= addonf->field; + if (addonf->null_bit && field->is_null()) + { + nulls[addonf->null_offset]|= addonf->null_bit; + if (!packed_addon_fields) + to+= addonf->length; + } + else + { + uchar *end= field->pack(to, field->ptr); + DBUG_ASSERT(end >= to); + uint sz= static_cast(end - to); + res_len += sz; + if (packed_addon_fields) + to+= sz; + else + { + if (addonf->length > sz) + bzero(end, addonf->length - sz); // Make Valgrind/MSAN happy + to+= addonf->length; + } + } + } + if (packed_addon_fields) + Addon_fields::store_addon_length(p_len, res_len); + } + else + { + /* Save filepos last */ + memcpy((uchar*) to, ref_pos, (size_t) param->ref_length); + to+= param->ref_length; + } + return static_cast(to - orig_to); +} + + +/* + Register fields used by sorting in the sorted table's read set +*/ + +static void register_used_fields(Sort_param *param) +{ + SORT_FIELD *sort_field; + TABLE *table=param->sort_form; + + for (sort_field= param->local_sortorder.begin() ; + sort_field != param->local_sortorder.end() ; + sort_field++) + { + Field *field; + if ((field= sort_field->field)) + { + if (field->table == table) + field->register_field_in_read_map(); + } + else + { // Item + sort_field->item->walk(&Item::register_field_in_read_map, 1, table); + } + } + + if (param->using_addon_fields()) + { + SORT_ADDON_FIELD *addonf= param->addon_fields->begin(); + for ( ; (addonf != param->addon_fields->end()) ; addonf++) + { + Field *field= addonf->field; + field->register_field_in_read_map(); + } + } + else + { + /* Save filepos last */ + table->prepare_for_position(); + } +} + + +static bool save_index(Sort_param *param, uint count, + SORT_INFO *table_sort) +{ + uint offset,res_length, length; + uchar *to; + DBUG_ENTER("save_index"); + DBUG_ASSERT(table_sort->record_pointers == 0); + + table_sort->sort_buffer(param, count); + + if (param->using_addon_fields()) + { + table_sort->sorted_result_in_fsbuf= TRUE; + table_sort->set_sort_length(param->sort_length); + DBUG_RETURN(0); + } + + bool using_packed_sortkeys= param->using_packed_sortkeys(); + res_length= param->res_length; + offset= param->rec_length-res_length; + if (!(to= table_sort->record_pointers= + (uchar*) my_malloc(key_memory_Filesort_info_record_pointers, + res_length*count, MYF(MY_WME | MY_THREAD_SPECIFIC)))) + DBUG_RETURN(1); /* purecov: inspected */ + for (uint ix= 0; ix < count; ++ix) + { + uchar *record= table_sort->get_sorted_record(ix); + + length= using_packed_sortkeys ? + Sort_keys::read_sortkey_length(record) : offset; + + memcpy(to, record + length, res_length); + to+= res_length; + } + DBUG_RETURN(0); +} + + +/** + Test whether priority queue is worth using to get top elements of an + ordered result set. If it is, then allocates buffer for required amount of + records + + @param param Sort parameters. + @param filesort_info Filesort information. + @param table Table to sort. + @param num_rows Estimate of number of rows in source record set. + @param memory_available Memory available for sorting. + + DESCRIPTION + Given a query like this: + SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows; + This function tests whether a priority queue should be used to keep + the result. Necessary conditions are: + - estimate that it is actually cheaper than merge-sort + - enough memory to store the records. + + If we don't have space for records, but we *do* have + space for keys, we may rewrite 'table' to sort with + references to records instead of additional data. + (again, based on estimates that it will actually be cheaper). + + @retval + true - if it's ok to use PQ + false - PQ will be slower than merge-sort, or there is not enough memory. +*/ + +static bool check_if_pq_applicable(Sort_param *param, + SORT_INFO *filesort_info, + TABLE *table, ha_rows num_rows, + size_t memory_available) +{ + DBUG_ENTER("check_if_pq_applicable"); + + /* + How much Priority Queue sort is slower than qsort. + Measurements (see unit test) indicate that PQ is roughly 3 times slower. + */ + const double PQ_slowness= 3.0; + + if (param->max_rows == HA_POS_ERROR) + { + DBUG_PRINT("info", ("No LIMIT")); + DBUG_RETURN(false); + } + + if (param->max_rows + 2 >= UINT_MAX) + { + DBUG_PRINT("info", ("Too large LIMIT")); + DBUG_RETURN(false); + } + + size_t num_available_keys= + memory_available / (param->rec_length + sizeof(char*)); + // We need 1 extra record in the buffer, when using PQ. + param->max_keys_per_buffer= (uint) param->max_rows + 1; + + if (num_rows < num_available_keys) + { + // The whole source set fits into memory. + if (param->max_rows < num_rows/PQ_slowness ) + { + filesort_info->alloc_sort_buffer(param->max_keys_per_buffer, + param->rec_length); + DBUG_RETURN(filesort_info->sort_buffer_size() != 0); + } + else + { + // PQ will be slower. + DBUG_RETURN(false); + } + } + + // Do we have space for LIMIT rows in memory? + if (param->max_keys_per_buffer < num_available_keys) + { + filesort_info->alloc_sort_buffer(param->max_keys_per_buffer, + param->rec_length); + DBUG_RETURN(filesort_info->sort_buffer_size() != 0); + } + + // Try to strip off addon fields. + if (param->addon_fields) + { + const size_t row_length= + param->sort_length + param->ref_length + sizeof(char*); + num_available_keys= memory_available / row_length; + + // Can we fit all the keys in memory? + if (param->max_keys_per_buffer < num_available_keys) + { + const double sort_merge_cost= + get_merge_many_buffs_cost_fast(num_rows, + num_available_keys, + (uint)row_length); + /* + PQ has cost: + (insert + qsort) * log(queue size) / TIME_FOR_COMPARE_ROWID + + cost of file lookup afterwards. + The lookup cost is a bit pessimistic: we take scan_time and assume + that on average we find the row after scanning half of the file. + A better estimate would be lookup cost, but note that we are doing + random lookups here, rather than sequential scan. + */ + const double pq_cpu_cost= + (PQ_slowness * num_rows + param->max_keys_per_buffer) * + log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID; + const double pq_io_cost= + param->max_rows * table->file->scan_time() / 2.0; + const double pq_cost= pq_cpu_cost + pq_io_cost; + + if (sort_merge_cost < pq_cost) + DBUG_RETURN(false); + + filesort_info->alloc_sort_buffer(param->max_keys_per_buffer, + param->sort_length + param->ref_length); + + if (filesort_info->sort_buffer_size() > 0) + { + /* Make attached data to be references instead of fields. */ + my_free(filesort_info->addon_fields); + filesort_info->addon_fields= NULL; + param->addon_fields= NULL; + + param->res_length= param->ref_length; + param->sort_length+= param->ref_length; + param->rec_length= param->sort_length; + + DBUG_RETURN(true); + } + } + } + DBUG_RETURN(false); +} + + +/** Merge buffers to make < MERGEBUFF2 buffers. */ + +int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer, + Merge_chunk *buffpek, uint *maxbuffer, IO_CACHE *t_file) +{ + uint i; + IO_CACHE t_file2,*from_file,*to_file,*temp; + Merge_chunk *lastbuff; + DBUG_ENTER("merge_many_buff"); + + if (*maxbuffer < MERGEBUFF2) + DBUG_RETURN(0); /* purecov: inspected */ + if (flush_io_cache(t_file) || + open_cached_file(&t_file2,mysql_tmpdir,TEMP_PREFIX,DISK_BUFFER_SIZE, + MYF(MY_WME))) + DBUG_RETURN(1); /* purecov: inspected */ + + from_file= t_file ; to_file= &t_file2; + while (*maxbuffer >= MERGEBUFF2) + { + if (reinit_io_cache(from_file,READ_CACHE,0L,0,0)) + goto cleanup; + if (reinit_io_cache(to_file,WRITE_CACHE,0L,0,0)) + goto cleanup; + lastbuff=buffpek; + for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF) + { + if (merge_buffers(param,from_file,to_file,sort_buffer, lastbuff++, + buffpek+i,buffpek+i+MERGEBUFF-1,0)) + goto cleanup; + } + if (merge_buffers(param,from_file,to_file,sort_buffer, lastbuff++, + buffpek+i,buffpek+ *maxbuffer,0)) + break; /* purecov: inspected */ + if (flush_io_cache(to_file)) + break; /* purecov: inspected */ + temp=from_file; from_file=to_file; to_file=temp; + *maxbuffer= (uint) (lastbuff-buffpek)-1; + } +cleanup: + close_cached_file(to_file); // This holds old result + if (to_file == t_file) + { + *t_file=t_file2; // Copy result file + } + + DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */ +} /* merge_many_buff */ + + +/** + Read data to buffer. + + @retval Number of bytes read + (ulong)-1 if something goes wrong +*/ + +ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, + Sort_param *param, bool packed_format) +{ + ha_rows count; + uint rec_length= param->rec_length; + + if ((count= MY_MIN(buffpek->max_keys(),buffpek->rowcount()))) + { + size_t bytes_to_read; + if (packed_format) + { + count= buffpek->rowcount(); + bytes_to_read= MY_MIN(buffpek->buffer_size(), + static_cast(fromfile->end_of_file - + buffpek->file_position())); + } + else + bytes_to_read= rec_length * static_cast(count); + + if (unlikely(my_b_pread(fromfile, buffpek->buffer_start(), + bytes_to_read, buffpek->file_position()))) + return ((ulong) -1); + + size_t num_bytes_read; + + if (packed_format) + { + /* + The last record read is most likely not complete here. + We need to loop through all the records, reading the length fields, + and then "chop off" the final incomplete record. + */ + uchar *record= buffpek->buffer_start(); + uint ix= 0; + uint size_of_addon_length= param->using_packed_addons() ? + Addon_fields::size_of_length_field : 0; + + uint size_of_sort_length= param->using_packed_sortkeys() ? + Sort_keys::size_of_length_field : 0; + + for (; ix < count; ++ix) + { + if (record + size_of_sort_length > buffpek->buffer_end()) + break; + uint sort_length= param->using_packed_sortkeys() ? + Sort_keys::read_sortkey_length(record) : + param->sort_length; + + DBUG_ASSERT(sort_length <= param->sort_length); + + if (record + sort_length + size_of_addon_length > + buffpek->buffer_end()) + break; // Incomplete record. + + uchar *plen= record + sort_length; + uint res_length= param->get_result_length(plen); + if (plen + res_length > buffpek->buffer_end()) + break; // Incomplete record. + DBUG_ASSERT(res_length > 0); + DBUG_ASSERT(sort_length + res_length <= param->rec_length); + record+= sort_length; + record+= res_length; + } + DBUG_ASSERT(ix > 0); + count= ix; + num_bytes_read= record - buffpek->buffer_start(); + DBUG_PRINT("info", ("read %llu bytes of complete records", + static_cast(bytes_to_read))); + } + else + num_bytes_read= bytes_to_read; + + buffpek->init_current_key(); + buffpek->advance_file_position(num_bytes_read); /* New filepos */ + buffpek->decrement_rowcount(count); + buffpek->set_mem_count(count); + return (ulong) num_bytes_read; + } + return 0; +} /* read_to_buffer */ + + +/** + Put all room used by freed buffer to use in adjacent buffer. + + Note, that we can't simply distribute memory evenly between all buffers, + because new areas must not overlap with old ones. + + @param[in] queue list of non-empty buffers, without freed buffer + @param[in] reuse empty buffer + @param[in] key_length key length +*/ + +void reuse_freed_buff(QUEUE *queue, Merge_chunk *reuse, uint key_length) +{ + for (uint i= queue_first_element(queue); + i <= queue_last_element(queue); + i++) + { + Merge_chunk *bp= (Merge_chunk *) queue_element(queue, i); + if (reuse->merge_freed_buff(bp)) + return; + } + DBUG_ASSERT(0); +} + + +/** + Merge buffers to one buffer. + + @param param Sort parameter + @param from_file File with source data (BUFFPEKs point to this file) + @param to_file File to write the sorted result data. + @param sort_buffer Buffer for data to store up to MERGEBUFF2 sort keys. + @param lastbuff OUT Store here BUFFPEK describing data written to to_file + @param Fb First element in source BUFFPEKs array + @param Tb Last element in source BUFFPEKs array + @param flag 0 <=> write {sort_key, addon_fields} pairs as further + sorting will be performed + 1 <=> write just addon_fields as this is the final + merge pass + + @retval + 0 OK + @retval + 1 ERROR +*/ + +bool merge_buffers(Sort_param *param, IO_CACHE *from_file, + IO_CACHE *to_file, Sort_buffer sort_buffer, + Merge_chunk *lastbuff, Merge_chunk *Fb, Merge_chunk *Tb, + int flag) +{ + bool error= 0; + uint rec_length,res_length,offset; + size_t sort_length; + ulong maxcount, bytes_read; + ha_rows max_rows,org_max_rows; + my_off_t to_start_filepos; + uchar *strpos; + Merge_chunk *buffpek; + QUEUE queue; + qsort2_cmp cmp; + void *first_cmp_arg; + element_count dupl_count= 0; + uchar *src; + uchar *unique_buff= param->unique_buff; + const bool killable= !param->not_killable; + THD* const thd=current_thd; + DBUG_ENTER("merge_buffers"); + + thd->inc_status_sort_merge_passes(); + thd->query_plan_fsort_passes++; + + rec_length= param->rec_length; + res_length= param->res_length; + sort_length= param->sort_length; + uint dupl_count_ofs= rec_length-sizeof(element_count); + uint min_dupl_count= param->min_dupl_count; + bool check_dupl_count= flag && min_dupl_count; + offset= (rec_length- + (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length); + uint wr_len= flag ? res_length : rec_length; + uint wr_offset= flag ? offset : 0; + + const bool using_packed_sortkeys= param->using_packed_sortkeys(); + bool offset_for_packing= (flag == 1 && using_packed_sortkeys); + const bool packed_format= param->is_packed_format(); + + maxcount= (ulong) (param->max_keys_per_buffer/((uint) (Tb-Fb) +1)); + to_start_filepos= my_b_tell(to_file); + strpos= sort_buffer.array(); + org_max_rows=max_rows= param->max_rows; + + set_if_bigger(maxcount, 1); + + if (unique_buff) + { + cmp= param->compare; + first_cmp_arg= (void *) ¶m->cmp_context; + } + else + { + cmp= param->get_compare_function(); + first_cmp_arg= param->get_compare_argument(&sort_length); + } + if (unlikely(init_queue(&queue, (uint) (Tb-Fb)+1, + offsetof(Merge_chunk,m_current_key), 0, + (queue_compare) cmp, first_cmp_arg, 0, 0))) + DBUG_RETURN(1); /* purecov: inspected */ + const size_t chunk_sz = (sort_buffer.size()/((uint) (Tb-Fb) +1)); + for (buffpek= Fb ; buffpek <= Tb ; buffpek++) + { + buffpek->set_buffer(strpos, strpos + chunk_sz); + buffpek->set_max_keys(maxcount); + bytes_read= read_to_buffer(from_file, buffpek, param, packed_format); + if (unlikely(bytes_read == (ulong) -1)) + goto err; /* purecov: inspected */ + strpos+= chunk_sz; + // If less data in buffers than expected + buffpek->set_max_keys(buffpek->mem_count()); + queue_insert(&queue, (uchar*) buffpek); + } + + if (unique_buff) + { + /* + Called by Unique::get() + Copy the first argument to unique_buff for unique removal. + Store it also in 'to_file'. + */ + buffpek= (Merge_chunk*) queue_top(&queue); + memcpy(unique_buff, buffpek->current_key(), rec_length); + if (min_dupl_count) + memcpy(&dupl_count, unique_buff+dupl_count_ofs, + sizeof(dupl_count)); + buffpek->advance_current_key(rec_length); + buffpek->decrement_mem_count(); + if (buffpek->mem_count() == 0) + { + if (unlikely(!(bytes_read= read_to_buffer(from_file, buffpek, + param, packed_format)))) + { + (void) queue_remove_top(&queue); + reuse_freed_buff(&queue, buffpek, rec_length); + } + else if (unlikely(bytes_read == (ulong) -1)) + goto err; /* purecov: inspected */ + } + queue_replace_top(&queue); // Top element has been used + } + else + cmp= 0; // Not unique + + while (queue.elements > 1) + { + if (killable && unlikely(thd->check_killed())) + goto err; /* purecov: inspected */ + + for (;;) + { + buffpek= (Merge_chunk*) queue_top(&queue); + src= buffpek->current_key(); + if (cmp) // Remove duplicates + { + uchar *current_key= buffpek->current_key(); + if (!(*cmp)(first_cmp_arg, &unique_buff, ¤t_key)) + { + if (min_dupl_count) + { + element_count cnt; + memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); + dupl_count+= cnt; + } + goto skip_duplicate; + } + if (min_dupl_count) + { + memcpy(unique_buff+dupl_count_ofs, &dupl_count, + sizeof(dupl_count)); + } + src= unique_buff; + } + + { + param->get_rec_and_res_len(buffpek->current_key(), + &rec_length, &res_length); + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + + /* + Do not write into the output file if this is the final merge called + for a Unique object used for intersection and dupl_count is less + than min_dupl_count. + If the Unique object is used to intersect N sets of unique elements + then for any element: + dupl_count >= N <=> the element is occurred in each of these N sets. + */ + if (!check_dupl_count || dupl_count >= min_dupl_count) + { + if(my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) + goto err; /* purecov: inspected */ + } + if (cmp) + { + memcpy(unique_buff, buffpek->current_key(), rec_length); + if (min_dupl_count) + memcpy(&dupl_count, unique_buff+dupl_count_ofs, + sizeof(dupl_count)); + } + if (!--max_rows) + { + /* Nothing more to do */ + goto end; /* purecov: inspected */ + } + } + skip_duplicate: + buffpek->advance_current_key(rec_length); + buffpek->decrement_mem_count(); + + if (buffpek->mem_count() == 0) + { + if (unlikely(!(bytes_read= read_to_buffer(from_file, buffpek, + param, packed_format)))) + { + (void) queue_remove_top(&queue); + reuse_freed_buff(&queue, buffpek, rec_length); + break; /* One buffer have been removed */ + } + else if (unlikely(bytes_read == (ulong) -1)) + goto err; /* purecov: inspected */ + } + queue_replace_top(&queue); /* Top element has been replaced */ + } + } + buffpek= (Merge_chunk*) queue_top(&queue); + buffpek->set_buffer(sort_buffer.array(), + sort_buffer.array() + sort_buffer.size()); + buffpek->set_max_keys(param->max_keys_per_buffer); + + /* + As we know all entries in the buffer are unique, we only have to + check if the first one is the same as the last one we wrote + */ + if (cmp) + { + uchar *current_key= buffpek->current_key(); + if (!(*cmp)(first_cmp_arg, &unique_buff, ¤t_key)) + { + if (min_dupl_count) + { + element_count cnt; + memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); + dupl_count+= cnt; + } + buffpek->advance_current_key(rec_length); + buffpek->decrement_mem_count(); + } + + if (min_dupl_count) + memcpy(unique_buff+dupl_count_ofs, &dupl_count, + sizeof(dupl_count)); + + if (!check_dupl_count || dupl_count >= min_dupl_count) + { + src= unique_buff; + if (my_b_write(to_file, src+wr_offset, wr_len)) + goto err; /* purecov: inspected */ + if (!--max_rows) + goto end; + } + } + + do + { + if (buffpek->mem_count() > max_rows) + { /* Don't write too many records */ + buffpek->set_mem_count(max_rows); + buffpek->set_rowcount(0); /* Don't read more */ + } + max_rows-= buffpek->mem_count(); + for (uint ix= 0; ix < buffpek->mem_count(); ++ix) + { + uchar *src= buffpek->current_key(); + param->get_rec_and_res_len(src, + &rec_length, &res_length); + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if (check_dupl_count) + { + memcpy((uchar *) &dupl_count, + buffpek->current_key() + offset + dupl_count_ofs, + sizeof(dupl_count)); + if (dupl_count < min_dupl_count) + continue; + } + if(my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) + goto err; + buffpek->advance_current_key(rec_length); + } + } + while (likely(!(error= + (bytes_read= read_to_buffer(from_file, buffpek, param, + packed_format)) == (ulong) -1)) && + bytes_read != 0); + +end: + lastbuff->set_rowcount(MY_MIN(org_max_rows-max_rows, param->max_rows)); + lastbuff->set_file_position(to_start_filepos); + +cleanup: + delete_queue(&queue); + DBUG_RETURN(error); + +err: + error= 1; + goto cleanup; + +} /* merge_buffers */ + + + /* Do a merge to output-file (save only positions) */ + +int merge_index(Sort_param *param, Sort_buffer sort_buffer, + Merge_chunk *buffpek, uint maxbuffer, + IO_CACHE *tempfile, IO_CACHE *outfile) +{ + DBUG_ENTER("merge_index"); + if (merge_buffers(param, tempfile, outfile, sort_buffer, buffpek, buffpek, + buffpek + maxbuffer, 1)) + DBUG_RETURN(1); /* purecov: inspected */ + DBUG_RETURN(0); +} /* merge_index */ + + +static uint suffix_length(ulong string_length) +{ + if (string_length < 256) + return 1; + if (string_length < 256L*256L) + return 2; + if (string_length < 256L*256L*256L) + return 3; + return 4; // Can't sort longer than 4G +} + + +void +Type_handler_string_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const +{ + CHARSET_INFO *cs; + sortorder->set_length_and_original_length(thd, item->max_length); + + if (use_strnxfrm((cs= item->collation.collation))) + { + sortorder->length= (uint) cs->strnxfrmlen(sortorder->length); + } + else if (cs == &my_charset_bin) + { + /* Store length last to be able to sort blob/varbinary */ + sortorder->suffix_length= suffix_length(item->max_length); + DBUG_ASSERT(sortorder->length <= UINT_MAX32 - sortorder->suffix_length); + sortorder->length+= sortorder->suffix_length; + if (sortorder->original_length >= UINT_MAX32 - sortorder->suffix_length) + sortorder->original_length= UINT_MAX32; + else + sortorder->original_length+= sortorder->suffix_length; + } +} + + +void +Type_handler_temporal_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const +{ + sortorder->original_length= sortorder->length= 8; // Sizof intern longlong +} + + +void +Type_handler_timestamp_common::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const +{ + sortorder->length= my_timestamp_binary_length(item->decimals); + sortorder->original_length= sortorder->length; +} + + +void +Type_handler_int_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const +{ + sortorder->original_length= sortorder->length= 8; // Sizof intern longlong +} + + +void +Type_handler_real_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const +{ + sortorder->original_length= sortorder->length= sizeof(double); +} + + +void +Type_handler_decimal_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const +{ + sortorder->length= + my_decimal_get_binary_size(item->max_length - (item->decimals ? 1 : 0), + item->decimals); + sortorder->original_length= sortorder->length; +} + + +/** + Calculate length of sort key. + + @param thd Thread handler + @param sortorder Order of items to sort + @param s_length Number of items to sort + @param allow_packing_for_sortkeys [out] set to false if packing sort keys is not + allowed + + @note + * sortorder->length and other members are updated for each sort item. + * TODO what is the meaning of this value if some fields are using packing while + others are not? + + @return + Total length of sort buffer in bytes +*/ + +static uint +sortlength(THD *thd, Sort_keys *sort_keys, bool *allow_packing_for_sortkeys) +{ + uint length; + *allow_packing_for_sortkeys= true; + bool allow_packing_for_keys= true; + + length=0; + uint nullable_cols=0; + + if (sort_keys->is_parameters_computed()) + { + *allow_packing_for_sortkeys= sort_keys->using_packed_sortkeys(); + return sort_keys->get_sort_length_with_memcmp_values(); + } + + for (SORT_FIELD *sortorder= sort_keys->begin(); + sortorder != sort_keys->end(); + sortorder++) + { + sortorder->suffix_length= 0; + sortorder->length_bytes= 0; + if (sortorder->field) + { + Field *field= sortorder->field; + CHARSET_INFO *cs= sortorder->field->sort_charset(); + sortorder->type= field->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + sortorder->set_length_and_original_length(thd, field->sort_length()); + sortorder->suffix_length= sortorder->field->sort_suffix_length(); + sortorder->cs= cs; + + if (use_strnxfrm((cs=sortorder->field->sort_charset()))) + sortorder->length= (uint) cs->strnxfrmlen(sortorder->length); + + if (sortorder->is_variable_sized() && allow_packing_for_keys) + { + allow_packing_for_keys= sortorder->check_if_packing_possible(thd); + sortorder->length_bytes= + number_storage_requirement(MY_MIN(sortorder->original_length, + thd->variables.max_sort_length)); + } + + if ((sortorder->maybe_null= sortorder->field->maybe_null())) + nullable_cols++; // Place for NULL marker + } + else + { + sortorder->type= sortorder->item->type_handler()->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + sortorder->item->type_handler()->sort_length(thd, sortorder->item, + sortorder); + sortorder->cs= sortorder->item->collation.collation; + if (sortorder->is_variable_sized() && allow_packing_for_keys) + { + allow_packing_for_keys= sortorder->check_if_packing_possible(thd); + sortorder->length_bytes= + number_storage_requirement(MY_MIN(sortorder->original_length, + thd->variables.max_sort_length)); + } + + if ((sortorder->maybe_null= sortorder->item->maybe_null())) + nullable_cols++; // Place for NULL marker + } + if (sortorder->is_variable_sized()) + { + set_if_smaller(sortorder->length, thd->variables.max_sort_length); + set_if_smaller(sortorder->original_length, thd->variables.max_sort_length); + } + DBUG_ASSERT(length < UINT_MAX32 - sortorder->length); + length+= sortorder->length; + + sort_keys->increment_size_of_packable_fields(sortorder->length_bytes); + sort_keys->increment_original_sort_length(sortorder->original_length); + } + // add bytes for nullable_cols + sort_keys->increment_original_sort_length(nullable_cols); + *allow_packing_for_sortkeys= allow_packing_for_keys; + sort_keys->set_sort_length_with_memcmp_values(length + nullable_cols); + sort_keys->set_parameters_computed(true); + DBUG_PRINT("info",("sort_length: %d",length)); + return length + nullable_cols; +} + + +/* + Check whether addon fields can be used or not. + + @param table Table structure + @param sortlength Length of sort key [strxfrm form] + @param length [OUT] Max length of addon fields + @param fields [OUT] Number of addon fields + @param null_fields [OUT] Number of nullable addon fields + @param packable_length [OUT] Max length of addon fields that can be + packed + + @retval + TRUE Addon fields can be used + FALSE Otherwise +*/ + +bool filesort_use_addons(TABLE *table, uint sortlength, + uint *length, uint *fields, uint *null_fields, + uint *packable_length) +{ + Field **pfield, *field; + *length= *fields= *null_fields= *packable_length= 0; + uint field_length=0; + + for (pfield= table->field; (field= *pfield) ; pfield++) + { + if (!bitmap_is_set(table->read_set, field->field_index)) + continue; + if (field->flags & BLOB_FLAG) + return false; + field_length= field->max_packed_col_length(field->pack_length()); + (*length)+= field_length; + + if (field->maybe_null() || field->is_packable()) + (*packable_length)+= field_length; + + if (field->maybe_null()) + (*null_fields)++; + (*fields)++; + } + if (!*fields) + return false; + (*length)+= (*null_fields+7)/8; + + /* + sortlength used here is unpacked key length (the strxfrm form). This is + done because unpacked key length is a good upper bound for packed sort + key length. + But for some collations the max packed length may be greater than the + length obtained from the strxfrm form. + Example: for utf8_general_ci, the original string form can be longer than + its mem-comparable form (note that this is rarely achieved in practice). + */ + return *length + sortlength < + table->in_use->variables.max_length_for_sort_data; +} + +/** + Get descriptors of fields appended to sorted fields and + calculate its total length. + + The function first finds out what fields are used in the result set. + Then it calculates the length of the buffer to store the values of + these fields together with the value of sort values. + If the calculated length is not greater than max_length_for_sort_data + the function allocates memory for an array of descriptors containing + layouts for the values of the non-sorted fields in the buffer and + fills them. + + @param table Table structure + @param sortlength Total length of sorted fields + @param addon_length [OUT] Length of addon fields + @param m_packable_length [OUT] Length of the addon fields that can be + packed + @note + The null bits for the appended values are supposed to be put together + and stored the buffer just ahead of the value of the first field. + + @return + Pointer to the layout descriptors for the appended fields, if any + @retval + NULL if we do not store field values with sort data. +*/ + +static Addon_fields* +get_addon_fields(TABLE *table, uint sortlength, + uint *addon_length, uint *m_packable_length) +{ + Field **pfield; + Field *field; + uint length, fields, null_fields, packable_length; + MY_BITMAP *read_set= table->read_set; + DBUG_ENTER("get_addon_fields"); + + /* + If there is a reference to a field in the query add it + to the the set of appended fields. + Note for future refinement: + This this a too strong condition. + Actually we need only the fields referred in the + result set. And for some of them it makes sense to use + the values directly from sorted fields. + But beware the case when item->cmp_type() != item->result_type() + */ + + // see remove_const() for HA_SLOW_RND_POS explanation + if (table->file->ha_table_flags() & HA_SLOW_RND_POS) + sortlength= 0; + + void *raw_mem_addon_field, *raw_mem; + + if (!filesort_use_addons(table, sortlength, &length, &fields, &null_fields, + &packable_length) || + !(my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_THREAD_SPECIFIC), + &raw_mem, sizeof(Addon_fields), + &raw_mem_addon_field, + sizeof(SORT_ADDON_FIELD) * fields, + NullS))) + DBUG_RETURN(0); + + Addon_fields_array + addon_array(static_cast(raw_mem_addon_field), fields); + Addon_fields *addon_fields= new (raw_mem) Addon_fields(addon_array); + + DBUG_ASSERT(addon_fields); + + (*addon_length)= length; + (*m_packable_length)= packable_length; + + length= (null_fields+7)/8; + null_fields= 0; + SORT_ADDON_FIELD* addonf= addon_fields->begin(); + for (pfield= table->field; (field= *pfield) ; pfield++) + { + if (!bitmap_is_set(read_set, field->field_index)) + continue; + addonf->field= field; + addonf->offset= length; + if (field->maybe_null()) + { + addonf->null_offset= null_fields/8; + addonf->null_bit= 1<<(null_fields & 7); + null_fields++; + } + else + { + addonf->null_offset= 0; + addonf->null_bit= 0; + } + addonf->length= field->max_packed_col_length(field->pack_length()); + length+= addonf->length; + addonf++; + } + + DBUG_PRINT("info",("addon_length: %d",length)); + DBUG_RETURN(addon_fields); +} + + +/* +** functions to change a double or float to a sortable string +** The following should work for IEEE +*/ + +#define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG) + +void change_double_for_sort(double nr,uchar *to) +{ + uchar *tmp=(uchar*) to; + if (nr == 0.0) + { /* Change to zero string */ + tmp[0]=(uchar) 128; + memset(tmp+1, 0, sizeof(nr)-1); + } + else + { +#ifdef WORDS_BIGENDIAN + memcpy(tmp, &nr, sizeof(nr)); +#else + { + uchar *ptr= (uchar*) &nr; +#if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN) + tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0]; + tmp[4]= ptr[7]; tmp[5]=ptr[6]; tmp[6]= ptr[5]; tmp[7]=ptr[4]; +#else + tmp[0]= ptr[7]; tmp[1]=ptr[6]; tmp[2]= ptr[5]; tmp[3]=ptr[4]; + tmp[4]= ptr[3]; tmp[5]=ptr[2]; tmp[6]= ptr[1]; tmp[7]=ptr[0]; +#endif + } +#endif + if (tmp[0] & 128) /* Negative */ + { /* make complement */ + uint i; + for (i=0 ; i < sizeof(nr); i++) + tmp[i]=tmp[i] ^ (uchar) 255; + } + else + { /* Set high and move exponent one up */ + ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] | + (ushort) 32768); + exp_part+= (ushort) 1 << (16-1-DBL_EXP_DIG); + tmp[0]= (uchar) (exp_part >> 8); + tmp[1]= (uchar) exp_part; + } + } +} + +bool SORT_INFO::using_packed_addons() +{ + return addon_fields != NULL && addon_fields->using_packed_addons(); +} + +void SORT_INFO::free_addon_buff() +{ + if (addon_fields) + addon_fields->free_addon_buff(); +} + +/* + Check if packed sortkeys are used or not +*/ +bool SORT_INFO::using_packed_sortkeys() +{ + return sort_keys != NULL && sort_keys->using_packed_sortkeys(); +} + +/** + Free SORT_INFO +*/ + +SORT_INFO::~SORT_INFO() +{ + DBUG_ENTER("~SORT_INFO::SORT_INFO()"); + free_data(); + DBUG_VOID_RETURN; +} + + +void Sort_param::try_to_pack_sortkeys() +{ + #ifdef WITHOUT_PACKED_SORT_KEYS + return; + #endif + + uint size_of_packable_fields= sort_keys->get_size_of_packable_fields(); + + /* + Disable packing when all fields are fixed-size fields. + */ + if (size_of_packable_fields == 0) + return; + + const uint sz= Sort_keys::size_of_length_field; + uint sort_len= sort_keys->get_sort_length_with_original_values(); + + /* + Heuristic introduced, skip packing sort keys if saving less than 128 bytes + */ + + if (sort_len < 128 + sz + size_of_packable_fields) + return; + + sort_keys->set_using_packed_sortkeys(true); + m_packed_format= true; + m_using_packed_sortkeys= true; + sort_length= sort_len + sz + size_of_packable_fields + + (using_addon_fields() ? 0 : res_length); + /* Only the record length needs to be updated, the res_length does not need + to be updated + */ + rec_length= sort_length + addon_length; +} + + +uint +Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const +{ + CHARSET_INFO *cs= item->collation.collation; + bool maybe_null= item->maybe_null(); + + if (maybe_null) + *to++= 1; + + Binary_string *res= item->str_result(tmp); + if (!res) + { + if (maybe_null) + { + *(to-1)= 0; + return 0; + } + else + { + /* purecov: begin deadcode */ + /* + This should only happen during extreme conditions if we run out + of memory or have an item marked not null when it can be null. + This code is here mainly to avoid a hard crash in this case. + */ + DBUG_ASSERT(0); + DBUG_PRINT("warning", + ("Got null on something that shouldn't be null")); + memset(to, 0, sort_field->length); // Avoid crash + /* purecov: end */ + return sort_field->original_length; + } + } + return sort_field->pack_sort_string(to, res, cs); +} + + +uint +Type_handler_int_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const +{ + longlong value= item->val_int_result(); + return make_packed_sort_key_longlong(to, item->maybe_null(), + item->null_value, item->unsigned_flag, + value, sort_field); +} + + +uint +Type_handler_decimal_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const +{ + my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf); + if (item->maybe_null()) + { + if (item->null_value) + { + *to++=0; + return 0; + } + *to++= 1; + } + dec_val->to_binary(to, item->max_length - (item->decimals ? 1 : 0), + item->decimals); + DBUG_ASSERT(sort_field->original_length == sort_field->length); + return sort_field->original_length; +} + + +uint +Type_handler_real_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const +{ + double value= item->val_result(); + if (item->maybe_null()) + { + if (item->null_value) + { + *to++=0; + return 0; + } + *to++= 1; + } + change_double_for_sort(value, to); + DBUG_ASSERT(sort_field->original_length == sort_field->length); + return sort_field->original_length; +} + + +uint +Type_handler_temporal_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const +{ + MYSQL_TIME buf; + // This is a temporal type. No nanoseconds. Rounding mode is not important. + DBUG_ASSERT(item->cmp_type() == TIME_RESULT); + static const Temporal::Options opt(TIME_INVALID_DATES, TIME_FRAC_NONE); + if (item->get_date_result(current_thd, &buf, opt)) + { + DBUG_ASSERT(item->maybe_null()); + DBUG_ASSERT(item->null_value); + return make_packed_sort_key_longlong(to, item->maybe_null(), true, + item->unsigned_flag, 0, sort_field); + } + return make_packed_sort_key_longlong(to, item->maybe_null(), false, + item->unsigned_flag, pack_time(&buf), + sort_field); +} + + +uint +Type_handler_timestamp_common::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const +{ + THD *thd= current_thd; + uint binlen= my_timestamp_binary_length(item->decimals); + Timestamp_or_zero_datetime_native_null native(thd, item); + if (native.is_null() || native.is_zero_datetime()) + { + // NULL or '0000-00-00 00:00:00' + if (item->maybe_null()) + { + *to++=0; + return 0; + } + else + { + bzero(to, binlen); + return binlen; + } + } + else + { + if (item->maybe_null()) + *to++= 1; + if (native.length() != binlen) + { + /* + Some items can return native representation with a different + number of fractional digits, e.g.: GREATEST(ts_3, ts_4) can + return a value with 3 fractional digits, although its fractional + precision is 4. Re-pack with a proper precision now. + */ + Timestamp(native).to_native(&native, item->datetime_precision(thd)); + } + DBUG_ASSERT(native.length() == binlen); + memcpy((char *) to, native.ptr(), binlen); + return binlen; + } +} + + +/* + @brief + Reverse the key for DESC clause + @param to buffer where values are written + @param maybe_null nullability of a column + @param sort_field Sort field structure + @details + used for mem-comparable sort keys +*/ + +void reverse_key(uchar *to, const SORT_FIELD_ATTR *sort_field) +{ + uint length; + if (sort_field->maybe_null && (to[-1]= !to[-1])) + { + to+= sort_field->length; // don't waste the time reversing all 0's + return; + } + length=sort_field->length; + while (length--) + { + *to = (uchar) (~ *to); + to++; + } +} + + +/* + @brief + Check if packing sort keys is allowed + @param THD thread structure + @retval + TRUE packing allowed + FALSE packing not allowed +*/ +bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const +{ + /* + Packing not allowed when original length is greater than max_sort_length + and we have a complex collation because cutting a prefix is not safe in + such a case + */ + if (original_length > thd->variables.max_sort_length && + cs->state & MY_CS_NON1TO1) + return false; + return true; +} + + +void SORT_FIELD_ATTR::set_length_and_original_length(THD *thd, uint length_arg) +{ + length= length_arg; + if (is_variable_sized()) + set_if_smaller(length, thd->variables.max_sort_length); + original_length= length_arg; +} + + +/* + Compare function used for packing sort keys +*/ + +qsort2_cmp get_packed_keys_compare_ptr() +{ + return (qsort2_cmp) compare_packed_sort_keys; +} + + +/* + Compare two varstrings. + + The strings are in this data format: + + [null_byte] [length of string + suffix_bytes] [the string] [suffix_bytes] + + suffix_bytes are used only for binary columns. +*/ + +int SORT_FIELD_ATTR::compare_packed_varstrings(uchar *a, size_t *a_len, + uchar *b, size_t *b_len) +{ + int retval; + size_t a_length, b_length; + if (maybe_null) + { + *a_len= *b_len= 1; // NULL bytes are always stored + if (*a != *b) + { + // Note we don't return a proper value in *{a|b}_len for the non-NULL + // value but that's ok + if (*a == 0) + return -1; + else + return 1; + } + else + { + if (*a == 0) + return 0; + } + a++; + b++; + } + else + *a_len= *b_len= 0; + + a_length= read_keypart_length(a, length_bytes); + b_length= read_keypart_length(b, length_bytes); + + *a_len+= length_bytes + a_length; + *b_len+= length_bytes + b_length; + + retval= cs->strnncollsp(a + length_bytes, + a_length - suffix_length, + b + length_bytes, + b_length - suffix_length); + + if (!retval && suffix_length) + { + DBUG_ASSERT(cs == &my_charset_bin); + // comparing the length stored in suffix bytes for binary strings + a= a + length_bytes + a_length - suffix_length; + b= b + length_bytes + b_length - suffix_length; + retval= memcmp(a, b, suffix_length); + } + + return retval; +} + + +/* + A value comparison function that has a signature that's suitable for + comparing packed values, but actually compares fixed-size values with memcmp. + + This is used for ordering fixed-size columns when the sorting procedure used + packed-value format. +*/ + +int SORT_FIELD_ATTR::compare_packed_fixed_size_vals(uchar *a, size_t *a_len, + uchar *b, size_t *b_len) +{ + if (maybe_null) + { + *a_len=1; + *b_len=1; + if (*a != *b) + { + if (*a == 0) + return -1; + else + return 1; + } + else + { + if (*a == 0) + return 0; + } + a++; + b++; + } + else + *a_len= *b_len= 0; + + *a_len+= length; + *b_len+= length; + return memcmp(a,b, length); +} + + +/* + @brief + Comparison function to compare two packed sort keys + + @param sort_param cmp argument + @param a_ptr packed sort key + @param b_ptr packed sort key + + @retval + >0 key a_ptr greater than b_ptr + =0 key a_ptr equal to b_ptr + <0 key a_ptr less than b_ptr + +*/ + +int compare_packed_sort_keys(void *sort_param, + unsigned char **a_ptr, unsigned char **b_ptr) +{ + int retval= 0; + size_t a_len, b_len; + Sort_param *param= (Sort_param*)sort_param; + Sort_keys *sort_keys= param->sort_keys; + uchar *a= *a_ptr; + uchar *b= *b_ptr; + + a+= Sort_keys::size_of_length_field; + b+= Sort_keys::size_of_length_field; + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + retval= sort_field->is_variable_sized() ? + sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : + sort_field->compare_packed_fixed_size_vals(a, &a_len, b, &b_len); + + if (retval) + return sort_field->reverse ? -retval : retval; + + a+= a_len; + b+= b_len; + + } + /* + this comparison is done for the case when the sort keys is appended with + the ROW_ID pointer. For such cases we don't have addon fields + so we can make a memcmp check over both the sort keys + */ + if (!param->using_addon_fields()) + retval= memcmp(a, b, param->res_length); + return retval; +} + + +/* + @brief + Store a packed string in the buffer + + @param to buffer + @param str packed string value + @param cs character set + + @details + This function writes to the buffer the packed value of a key_part + of the sort key. + + The values written to the buffer are in this order + - value for null byte + - length of the string + - value of the string + - suffix length (for binary character set) +*/ + +uint +SORT_FIELD_ATTR::pack_sort_string(uchar *to, const Binary_string *str, + CHARSET_INFO *cs) const +{ + uchar *orig_to= to; + uint32 length, data_length; + DBUG_ASSERT(str->length() <= UINT32_MAX); + length= (uint32) str->length(); + + if (length + suffix_length <= original_length) + data_length= length; + else + data_length= original_length - suffix_length; + + // length stored in lowendian form + store_key_part_length(data_length + suffix_length, to, length_bytes); + to+= length_bytes; + // copying data length bytes to the buffer + memcpy(to, (uchar*)str->ptr(), data_length); + to+= data_length; + + if (cs == &my_charset_bin && suffix_length) + { + // suffix length stored in bigendian form + store_bigendian(length, to, suffix_length); + to+= suffix_length; + } + return static_cast(to - orig_to); +} + + +/* + @brief + Create a mem-comparable sort key + + @param param sort param structure + @param to buffer where values are written + + @retval + length of the bytes written including the NULL bytes +*/ + +static uint make_sortkey(Sort_param *param, uchar *to) +{ + Field *field; + SORT_FIELD *sort_field; + uchar *orig_to= to; + + for (sort_field=param->local_sortorder.begin() ; + sort_field != param->local_sortorder.end() ; + sort_field++) + { + bool maybe_null=0; + if ((field=sort_field->field)) + { + // Field + field->make_sort_key_part(to, sort_field->length); + if ((maybe_null= field->maybe_null())) + to++; + } + else + { // Item + sort_field->item->type_handler()->make_sort_key_part(to, + sort_field->item, + sort_field, + ¶m->tmp_buffer); + if ((maybe_null= sort_field->item->maybe_null())) + to++; + } + + if (sort_field->reverse) + reverse_key(to, sort_field); + to+= sort_field->length; + } + + DBUG_ASSERT(static_cast(to - orig_to) <= param->sort_length); + return static_cast(to - orig_to); +} + + +/* + @brief + create a compact sort key which can be compared with a comparison + function. They are called packed sort keys + + @param param sort param structure + @param to buffer where values are written + + @retval + length of the bytes written including the NULL bytes +*/ + +static uint make_packed_sortkey(Sort_param *param, uchar *to) +{ + Field *field; + SORT_FIELD *sort_field; + uint length; + uchar *orig_to= to; + + to+= Sort_keys::size_of_length_field; + + for (sort_field=param->local_sortorder.begin() ; + sort_field != param->local_sortorder.end() ; + sort_field++) + { + bool maybe_null=0; + if ((field=sort_field->field)) + { + // Field + length= field->make_packed_sort_key_part(to, sort_field); + if ((maybe_null= field->maybe_null())) + to++; + } + else + { // Item + Item *item= sort_field->item; + length= item->type_handler()->make_packed_sort_key_part(to, item, + sort_field, + ¶m->tmp_buffer); + if ((maybe_null= sort_field->item->maybe_null())) + to++; + } + to+= length; + } + + length= static_cast(to - orig_to); + DBUG_ASSERT(length <= param->sort_length); + Sort_keys::store_sortkey_length(orig_to, length); + return length; +} diff --git a/sql/filesort.h b/sql/filesort.h new file mode 100644 index 00000000..ebb521e2 --- /dev/null +++ b/sql/filesort.h @@ -0,0 +1,245 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef FILESORT_INCLUDED +#define FILESORT_INCLUDED + +#include "my_base.h" /* ha_rows */ +#include "sql_alloc.h" +#include "filesort_utils.h" + +class SQL_SELECT; +class THD; +struct TABLE; +class Filesort_tracker; +struct SORT_FIELD; +struct SORT_FIELD_ATTR; +typedef struct st_order ORDER; +class JOIN; +class Addon_fields; +class Sort_keys; + + +/** + Sorting related info. + To be extended by another WL to include complete filesort implementation. +*/ +class Filesort: public Sql_alloc +{ +public: + /** List of expressions to order the table by */ + ORDER *order; + /** Number of records to return */ + ha_rows limit; + /** ORDER BY list with some precalculated info for filesort */ + SORT_FIELD *sortorder; + /* Used with ROWNUM. Contains the number of rows filesort has found so far */ + ha_rows *accepted_rows; + /** select to use for getting records */ + SQL_SELECT *select; + + /** TRUE <=> free select on destruction */ + bool own_select; + /** TRUE means we are using Priority Queue for order by with limit. */ + bool using_pq; + /* + TRUE means sort operation must produce table rowids. + FALSE means that it halso has an option of producing {sort_key, + addon_fields} pairs. + */ + bool sort_positions; + /* + TRUE means all the fields of table of whose bitmap read_set is set + need to be read while reading records in the sort buffer. + FALSE otherwise + */ + bool set_all_read_bits; + + Filesort_tracker *tracker; + Sort_keys *sort_keys; + + /* Unpack temp table columns to base table columns*/ + void (*unpack)(TABLE *); + + Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg, + SQL_SELECT *select_arg): + order(order_arg), + limit(limit_arg), + sortorder(NULL), + accepted_rows(0), + select(select_arg), + own_select(false), + using_pq(false), + sort_positions(sort_positions_arg), + set_all_read_bits(false), + sort_keys(NULL), + unpack(NULL) + { + DBUG_ASSERT(order); + }; + + ~Filesort() { cleanup(); } + /* Prepare ORDER BY list for sorting. */ + Sort_keys* make_sortorder(THD *thd, JOIN *join, table_map first_table_bit); + +private: + void cleanup(); +}; + + +class SORT_INFO +{ + /// Buffer for sorting keys. + Filesort_buffer filesort_buffer; + +public: + SORT_INFO() + :addon_fields(NULL), record_pointers(0), + sort_keys(NULL), + sorted_result_in_fsbuf(FALSE) + { + buffpek.str= 0; + my_b_clear(&io_cache); + } + + ~SORT_INFO(); + + void free_data() + { + close_cached_file(&io_cache); + free_addon_buff(); + my_free(record_pointers); + my_free(buffpek.str); + my_free(addon_fields); + free_sort_buffer(); + } + + void reset() + { + free_data(); + record_pointers= 0; + buffpek.str= 0; + addon_fields= 0; + sorted_result_in_fsbuf= false; + } + + void free_addon_buff(); + + IO_CACHE io_cache; /* If sorted through filesort */ + LEX_STRING buffpek; /* Buffer for buffpek structures */ + Addon_fields *addon_fields; /* Addon field descriptors */ + uchar *record_pointers; /* If sorted in memory */ + Sort_keys *sort_keys; /* Sort key descriptors*/ + + /** + If the entire result of filesort fits in memory, we skip the merge phase. + We may leave the result in filesort_buffer + (indicated by sorted_result_in_fsbuf), or we may strip away + the sort keys, and copy the sorted result into a new buffer. + @see save_index() + */ + bool sorted_result_in_fsbuf; + + /* + How many rows in final result. + Also how many rows in record_pointers, if used + */ + ha_rows return_rows; + ha_rows examined_rows; /* How many rows read */ + ha_rows found_rows; /* How many rows was accepted */ + + /** Sort filesort_buffer */ + void sort_buffer(Sort_param *param, uint count) + { filesort_buffer.sort_buffer(param, count); } + + uchar **get_sort_keys() + { return filesort_buffer.get_sort_keys(); } + + uchar *get_sorted_record(uint ix) + { return filesort_buffer.get_sorted_record(ix); } + + uchar *alloc_sort_buffer(uint num_records, uint record_length) + { return filesort_buffer.alloc_sort_buffer(num_records, record_length); } + + void free_sort_buffer() + { filesort_buffer.free_sort_buffer(); } + + bool isfull() const + { return filesort_buffer.isfull(); } + void init_record_pointers() + { filesort_buffer.init_record_pointers(); } + void init_next_record_pointer() + { filesort_buffer.init_next_record_pointer(); } + uchar *get_next_record_pointer() + { return filesort_buffer.get_next_record_pointer(); } + void adjust_next_record_pointer(uint val) + { filesort_buffer.adjust_next_record_pointer(val); } + + Bounds_checked_array get_raw_buf() + { return filesort_buffer.get_raw_buf(); } + + size_t sort_buffer_size() const + { return filesort_buffer.sort_buffer_size(); } + + bool is_allocated() const + { return filesort_buffer.is_allocated(); } + void set_sort_length(uint val) + { filesort_buffer.set_sort_length(val); } + uint get_sort_length() const + { return filesort_buffer.get_sort_length(); } + + bool has_filesort_result_in_memory() const + { + return record_pointers || sorted_result_in_fsbuf; + } + + /// Are we using "addon fields"? + bool using_addon_fields() const + { + return addon_fields != NULL; + } + + /// Are we using "packed addon fields"? + bool using_packed_addons(); + + /** + Copies (unpacks) values appended to sorted fields from a buffer back to + their regular positions specified by the Field::ptr pointers. + @param buff Buffer which to unpack the value from + */ + template + inline void unpack_addon_fields(uchar *buff); + + bool using_packed_sortkeys(); + + friend SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, + Filesort_tracker* tracker, JOIN *join, + table_map first_table_bit); +}; + +SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, + Filesort_tracker* tracker, JOIN *join=NULL, + table_map first_table_bit=0); + +bool filesort_use_addons(TABLE *table, uint sortlength, + uint *length, uint *fields, uint *null_fields, + uint *m_packable_length); + +void change_double_for_sort(double nr,uchar *to); +void store_length(uchar *to, uint length, uint pack_length); +void +reverse_key(uchar *to, const SORT_FIELD_ATTR *sort_field); + +#endif /* FILESORT_INCLUDED */ diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc new file mode 100644 index 00000000..5a51300a --- /dev/null +++ b/sql/filesort_utils.cc @@ -0,0 +1,188 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2012, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "filesort_utils.h" +#include "sql_const.h" +#include "sql_sort.h" +#include "table.h" + + +PSI_memory_key key_memory_Filesort_buffer_sort_keys; + +namespace { +/** + A local helper function. See comments for get_merge_buffers_cost(). + */ +double get_merge_cost(ha_rows num_elements, ha_rows num_buffers, uint elem_size) +{ + return + 2.0 * ((double) num_elements * elem_size) / IO_SIZE + + (double) num_elements * log((double) num_buffers) / + (TIME_FOR_COMPARE_ROWID * M_LN2); +} +} + +/** + This is a simplified, and faster version of @see get_merge_many_buffs_cost(). + We calculate the cost of merging buffers, by simulating the actions + of @see merge_many_buff. For explanations of formulas below, + see comments for get_merge_buffers_cost(). + TODO: Use this function for Unique::get_use_cost(). +*/ +double get_merge_many_buffs_cost_fast(ha_rows num_rows, + ha_rows num_keys_per_buffer, + uint elem_size) +{ + ha_rows num_buffers= num_rows / num_keys_per_buffer; + ha_rows last_n_elems= num_rows % num_keys_per_buffer; + double total_cost; + + // Calculate CPU cost of sorting buffers. + total_cost= + ((num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) + + last_n_elems * log(1.0 + last_n_elems)) / + TIME_FOR_COMPARE_ROWID); + + // Simulate behavior of merge_many_buff(). + while (num_buffers >= MERGEBUFF2) + { + // Calculate # of calls to merge_buffers(). + const ha_rows loop_limit= num_buffers - MERGEBUFF*3/2; + const ha_rows num_merge_calls= 1 + loop_limit/MERGEBUFF; + const ha_rows num_remaining_buffs= + num_buffers - num_merge_calls * MERGEBUFF; + + // Cost of merge sort 'num_merge_calls'. + total_cost+= + num_merge_calls * + get_merge_cost(num_keys_per_buffer * MERGEBUFF, MERGEBUFF, elem_size); + + // # of records in remaining buffers. + last_n_elems+= num_remaining_buffs * num_keys_per_buffer; + + // Cost of merge sort of remaining buffers. + total_cost+= + get_merge_cost(last_n_elems, 1 + num_remaining_buffs, elem_size); + + num_buffers= num_merge_calls; + num_keys_per_buffer*= MERGEBUFF; + } + + // Simulate final merge_buff call. + last_n_elems+= num_keys_per_buffer * num_buffers; + total_cost+= get_merge_cost(last_n_elems, 1 + num_buffers, elem_size); + return total_cost; +} + +/* + alloc_sort_buffer() + + Allocate buffer for sorting keys. + Try to reuse old buffer if possible. + + @return + 0 Error + # Pointer to allocated buffer +*/ + +uchar *Filesort_buffer::alloc_sort_buffer(uint num_records, + uint record_length) +{ + size_t buff_size; + DBUG_ENTER("alloc_sort_buffer"); + DBUG_EXECUTE_IF("alloc_sort_buffer_fail", + DBUG_SET("+d,simulate_out_of_memory");); + + buff_size= ALIGN_SIZE(num_records * (record_length + sizeof(uchar*))); + + if (m_rawmem) + { + /* + Reuse old buffer if exists and is large enough + Note that we don't make the buffer smaller, as we want to be + prepared for next subquery iteration. + */ + if (buff_size > m_size_in_bytes) + { + /* + Better to free and alloc than realloc as we don't have to remember + the old values + */ + my_free(m_rawmem); + if (!(m_rawmem= (uchar*) my_malloc(key_memory_Filesort_buffer_sort_keys, + buff_size, MYF(MY_THREAD_SPECIFIC)))) + { + m_size_in_bytes= 0; + DBUG_RETURN(0); + } + } + } + else + { + if (!(m_rawmem= (uchar*) my_malloc(key_memory_Filesort_buffer_sort_keys, + buff_size, MYF(MY_THREAD_SPECIFIC)))) + { + m_size_in_bytes= 0; + DBUG_RETURN(0); + } + + } + + m_size_in_bytes= buff_size; + m_record_pointers= reinterpret_cast(m_rawmem) + + ((m_size_in_bytes / sizeof(uchar*)) - 1); + m_num_records= num_records; + m_record_length= record_length; + m_idx= 0; + DBUG_RETURN(m_rawmem); +} + + +void Filesort_buffer::free_sort_buffer() +{ + my_free(m_rawmem); + *this= Filesort_buffer(); +} + + +void Filesort_buffer::sort_buffer(const Sort_param *param, uint count) +{ + size_t size= param->sort_length; + m_sort_keys= get_sort_keys(); + + if (count <= 1 || size == 0) + return; + + // don't reverse for PQ, it is already done + if (!param->using_pq) + reverse_record_pointers(); + + uchar **buffer= NULL; + if (!param->using_packed_sortkeys() && + radixsort_is_appliccable(count, param->sort_length) && + (buffer= (uchar**) my_malloc(PSI_INSTRUMENT_ME, count*sizeof(char*), + MYF(MY_THREAD_SPECIFIC)))) + { + radixsort_for_str_ptr(m_sort_keys, count, param->sort_length, buffer); + my_free(buffer); + return; + } + + my_qsort2(m_sort_keys, count, sizeof(uchar*), + param->get_compare_function(), + param->get_compare_argument(&size)); +} diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h new file mode 100644 index 00000000..946b1cb4 --- /dev/null +++ b/sql/filesort_utils.h @@ -0,0 +1,275 @@ +/* Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef FILESORT_UTILS_INCLUDED +#define FILESORT_UTILS_INCLUDED + +#include "my_base.h" +#include "sql_array.h" + +class Sort_param; +/* + Calculate cost of merge sort + + @param num_rows Total number of rows. + @param num_keys_per_buffer Number of keys per buffer. + @param elem_size Size of each element. + + Calculates cost of merge sort by simulating call to merge_many_buff(). + + @retval + Computed cost of merge sort in disk seeks. + + @note + Declared here in order to be able to unit test it, + since library dependencies have not been sorted out yet. + + See also comments get_merge_many_buffs_cost(). +*/ + +double get_merge_many_buffs_cost_fast(ha_rows num_rows, + ha_rows num_keys_per_buffer, + uint elem_size); + + +/** + A wrapper class around the buffer used by filesort(). + The sort buffer is a contiguous chunk of memory, + containing both records to be sorted, and pointers to said records: + + + |rec 0|record 1 |rec 2| ............ |ptr to rec2|ptr to rec1|ptr to rec0| + + Records will be inserted "left-to-right". Records are not necessarily + fixed-size, they can be packed and stored without any "gaps". + + Record pointers will be inserted "right-to-left", as a side-effect + of inserting the actual records. + + We wrap the buffer in order to be able to do lazy initialization of the + pointers: the buffer is often much larger than what we actually need. + + With this allocation scheme, and lazy initialization of the pointers, + we are able to pack variable-sized records in the buffer, + and thus possibly have space for more records than we initially estimated. + + The buffer must be kept available for multiple executions of the + same sort operation, so we have explicit allocate and free functions, + rather than doing alloc/free in CTOR/DTOR. +*/ + +class Filesort_buffer +{ +public: + Filesort_buffer() : + m_next_rec_ptr(NULL), m_rawmem(NULL), m_record_pointers(NULL), + m_sort_keys(NULL), + m_num_records(0), m_record_length(0), + m_sort_length(0), + m_size_in_bytes(0), m_idx(0) + {} + + /** Sort me... */ + void sort_buffer(const Sort_param *param, uint count); + + /** + Reverses the record pointer array, to avoid recording new results for + non-deterministic mtr tests. + */ + void reverse_record_pointers() + { + if (m_idx < 2) // There is nothing to swap. + return; + uchar **keys= get_sort_keys(); + const longlong count= m_idx - 1; + for (longlong ix= 0; ix <= count/2; ++ix) + { + uchar *tmp= keys[count - ix]; + keys[count - ix] = keys[ix]; + keys[ix]= tmp; + } + } + + /** + Initializes all the record pointers. + */ + void init_record_pointers() + { + init_next_record_pointer(); + while (m_idx < m_num_records) + (void) get_next_record_pointer(); + reverse_record_pointers(); + } + + /** + Prepares the buffer for the next batch of records to process. + */ + void init_next_record_pointer() + { + m_idx= 0; + m_next_rec_ptr= m_rawmem; + m_sort_keys= NULL; + } + + /** + @returns the number of bytes currently in use for data. + */ + size_t space_used_for_data() const + { + return m_next_rec_ptr ? m_next_rec_ptr - m_rawmem : 0; + } + + /** + @returns the number of bytes left in the buffer. + */ + size_t spaceleft() const + { + DBUG_ASSERT(m_next_rec_ptr >= m_rawmem); + const size_t spaceused= + (m_next_rec_ptr - m_rawmem) + + (static_cast(m_idx) * sizeof(uchar*)); + return m_size_in_bytes - spaceused; + } + + /** + Is the buffer full? + */ + bool isfull() const + { + if (m_idx < m_num_records) + return false; + return spaceleft() < (m_record_length + sizeof(uchar*)); + } + + /** + Where should the next record be stored? + */ + uchar *get_next_record_pointer() + { + uchar *retval= m_next_rec_ptr; + // Save the return value in the record pointer array. + m_record_pointers[-m_idx]= m_next_rec_ptr; + // Prepare for the subsequent request. + m_idx++; + m_next_rec_ptr+= m_record_length; + return retval; + } + + /** + Adjusts for actual record length. get_next_record_pointer() above was + pessimistic, and assumed that the record could not be packed. + */ + void adjust_next_record_pointer(uint val) + { + m_next_rec_ptr-= (m_record_length - val); + } + + /// Returns total size: pointer array + record buffers. + size_t sort_buffer_size() const + { + return m_size_in_bytes; + } + + bool is_allocated() const + { + return m_rawmem; + } + + /** + Allocates the buffer, but does *not* initialize pointers. + Total size = (num_records * record_length) + (num_records * sizeof(pointer)) + space for records space for pointer to records + Caller is responsible for raising an error if allocation fails. + + @param num_records Number of records. + @param record_length (maximum) size of each record. + @returns Pointer to allocated area, or NULL in case of out-of-memory. + */ + uchar *alloc_sort_buffer(uint num_records, uint record_length); + + /// Frees the buffer. + void free_sort_buffer(); + + void reset() + { + m_rawmem= NULL; + } + /** + Used to access the "right-to-left" array of record pointers as an ordinary + "left-to-right" array, so that we can pass it directly on to std::sort(). + */ + uchar **get_sort_keys() + { + if (m_idx == 0) + return NULL; + return &m_record_pointers[1 - m_idx]; + } + + /** + Gets sorted record number ix. @see get_sort_keys() + Only valid after buffer has been sorted! + */ + uchar *get_sorted_record(uint ix) + { + return m_sort_keys[ix]; + } + + /** + @returns The entire buffer, as a character array. + This is for reusing the memory for merge buffers. + */ + Bounds_checked_array get_raw_buf() + { + return Bounds_checked_array(m_rawmem, m_size_in_bytes); + } + + /** + We need an assignment operator, see filesort(). + This happens to have the same semantics as the one that would be + generated by the compiler. + Note that this is a shallow copy. We have two objects sharing the same + array. + */ + Filesort_buffer &operator=(const Filesort_buffer &rhs) = default; + + uint get_sort_length() const { return m_sort_length; } + void set_sort_length(uint val) { m_sort_length= val; } + +private: + uchar *m_next_rec_ptr; /// The next record will be inserted here. + uchar *m_rawmem; /// The raw memory buffer. + uchar **m_record_pointers; /// The "right-to-left" array of record pointers. + uchar **m_sort_keys; /// Caches the value of get_sort_keys() + uint m_num_records; /// Saved value from alloc_sort_buffer() + uint m_record_length; /// Saved value from alloc_sort_buffer() + uint m_sort_length; /// The length of the sort key. + size_t m_size_in_bytes; /// Size of raw buffer, in bytes. + + /** + This is the index in the "right-to-left" array of the next record to + be inserted into the buffer. It is signed, because we use it in signed + expressions like: + m_record_pointers[-m_idx]; + It is longlong rather than int, to ensure that it covers UINT_MAX32 + without any casting/warning. + */ + longlong m_idx; +}; + +int compare_packed_sort_keys(void *sort_keys, unsigned char **a, + unsigned char **b); +qsort2_cmp get_packed_keys_compare_ptr(); + +#endif // FILESORT_UTILS_INCLUDED diff --git a/sql/gcalc_slicescan.cc b/sql/gcalc_slicescan.cc new file mode 100644 index 00000000..f94c7190 --- /dev/null +++ b/sql/gcalc_slicescan.cc @@ -0,0 +1,2015 @@ +/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved. + Copyright (C) 2011 Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mariadb.h" +#include +#include + +#ifdef HAVE_SPATIAL + +#include "gcalc_slicescan.h" + + +#define PH_DATA_OFFSET 8 +#define coord_to_float(d) ((double) d) +#define coord_eq(a, b) (a == b) + +typedef int (*sc_compare_func)(const void*, const void*); + +#define LS_LIST_ITEM Gcalc_dyn_list::Item +#define LS_COMPARE_FUNC_DECL sc_compare_func compare, +#define LS_COMPARE_FUNC_CALL(list_el1, list_el2) (*compare)(list_el1, list_el2) +#define LS_NEXT(A) (A)->next +#define LS_SET_NEXT(A,val) (A)->next= val +#define LS_P_NEXT(A) &(A)->next +#define LS_NAME sort_list +#define LS_SCOPE static +#define LS_STRUCT_NAME sort_list_stack_struct +#include "plistsort.c" + + +#define GCALC_COORD_MINUS 0x80000000 +#define FIRST_DIGIT(d) ((d) & 0x7FFFFFFF) +#define GCALC_SIGN(d) ((d) & 0x80000000) + +static Gcalc_scan_iterator::point *eq_sp(const Gcalc_heap::Info *pi) +{ + GCALC_DBUG_ASSERT(pi->type == Gcalc_heap::nt_eq_node); + return (Gcalc_scan_iterator::point *) pi->node.eq.data; +} + + +static Gcalc_scan_iterator::intersection_info *i_data(const Gcalc_heap::Info *pi) +{ + GCALC_DBUG_ASSERT(pi->type == Gcalc_heap::nt_intersection); + return (Gcalc_scan_iterator::intersection_info *) pi->node.intersection.data; +} + + +#ifndef GCALC_DBUG_OFF + +int gcalc_step_counter= 0; + +void GCALC_DBUG_CHECK_COUNTER() +{ + if (++gcalc_step_counter == 0) + GCALC_DBUG_PRINT(("step_counter_0")); + else + GCALC_DBUG_PRINT(("%d step_counter", gcalc_step_counter)); +} + + +const char *gcalc_ev_name(int ev) +{ + switch (ev) + { + case scev_none: + return "n"; + case scev_thread: + return "t"; + case scev_two_threads: + return "tt"; + case scev_end: + return "e"; + case scev_two_ends: + return "ee"; + case scev_intersection: + return "i"; + case scev_point: + return "p"; + case scev_single_point: + return "sp"; + default:; + }; + GCALC_DBUG_ASSERT(0); + return "unk"; +} + + +static int gcalc_pi_str(char *str, const Gcalc_heap::Info *pi, const char *postfix) +{ + return sprintf(str, "%s %d %d | %s %d %d%s", + GCALC_SIGN(pi->node.shape.ix[0]) ? "-":"", FIRST_DIGIT(pi->node.shape.ix[0]),pi->node.shape.ix[1], + GCALC_SIGN(pi->node.shape.iy[0]) ? "-":"", FIRST_DIGIT(pi->node.shape.iy[0]),pi->node.shape.iy[1], + postfix); + +} + + +static void GCALC_DBUG_PRINT_PI(const Gcalc_heap::Info *pi) +{ + char buf[128]; + int n_buf; + if (pi->type == Gcalc_heap::nt_intersection) + { +#ifdef DBUG_TRACE + const Gcalc_scan_iterator::intersection_info *ic= i_data(pi); + + GCALC_DBUG_PRINT(("intersection point %d %d", + ic->edge_a->thread, ic->edge_b->thread)); +#endif + return; + } + if (pi->type == Gcalc_heap::nt_eq_node) + { +#ifdef DBUG_TRACE + const Gcalc_scan_iterator::point *e= eq_sp(pi); + GCALC_DBUG_PRINT(("eq point %d", e->thread)); +#endif + return; + } + n_buf= gcalc_pi_str(buf, pi, ""); + buf[n_buf]= 0; + GCALC_DBUG_PRINT(("%s", buf)); +} + + +static void GCALC_DBUG_PRINT_SLICE(const char *header, + const Gcalc_scan_iterator::point *slice) +{ + size_t nbuf; + char buf[1024]; + nbuf= strlen(header); + strcpy(buf, header); + for (; slice; slice= slice->get_next()) + { + size_t lnbuf= nbuf; + lnbuf+= sprintf(buf + lnbuf, "%d\t", slice->thread); + lnbuf+= sprintf(buf + lnbuf, "%s\t", gcalc_ev_name(slice->event)); + + lnbuf+= gcalc_pi_str(buf + lnbuf, slice->pi, "\t"); + if (slice->is_bottom()) + lnbuf+= sprintf(buf+lnbuf, "bt\t"); + else + lnbuf+= gcalc_pi_str(buf+lnbuf, slice->next_pi, "\t"); + buf[lnbuf]= 0; + GCALC_DBUG_PRINT(("%s", buf)); + } +} + + +#else +#define GCALC_DBUG_CHECK_COUNTER() do { } while(0) +#define GCALC_DBUG_PRINT_PI(pi) do { } while(0) +#define GCALC_DBUG_PRINT_SLICE(a, b) do { } while(0) +#define GCALC_DBUG_PRINT_INTERSECTIONS(a) do { } while(0) +#define GCALC_DBUG_PRINT_STATE(a) do { } while(0) +#endif /*GCALC_DBUG_OFF*/ + + +Gcalc_dyn_list::Gcalc_dyn_list(size_t blk_size, size_t sizeof_item): + m_blk_size(blk_size), + m_sizeof_item(ALIGN_SIZE(sizeof_item)), + m_points_per_blk((uint)((m_blk_size - PH_DATA_OFFSET) / m_sizeof_item)), + m_blk_hook(&m_first_blk), + m_free(NULL), + m_keep(NULL) +{} + + +Gcalc_dyn_list::Gcalc_dyn_list(const Gcalc_dyn_list &dl) +{ + m_blk_size= dl.m_blk_size; + m_sizeof_item= dl.m_sizeof_item; + m_points_per_blk= dl.m_points_per_blk; + m_blk_hook= &m_first_blk; + m_free= NULL; + m_keep= NULL; +} + + +void Gcalc_dyn_list::format_blk(void* block) +{ + Item *pi_end, *cur_pi, *first_pi; + GCALC_DBUG_ASSERT(m_free == NULL); + first_pi= cur_pi= (Item *)(((char *)block) + PH_DATA_OFFSET); + pi_end= ptr_add(first_pi, m_points_per_blk - 1); + do { + cur_pi= cur_pi->next= ptr_add(cur_pi, 1); + } while (cur_pinext= m_free; + m_free= first_pi; +} + + +Gcalc_dyn_list::Item *Gcalc_dyn_list::alloc_new_blk() +{ + void *new_block= my_malloc(PSI_INSTRUMENT_ME, m_blk_size, MYF(MY_WME)); + if (!new_block) + return NULL; + *m_blk_hook= new_block; + m_blk_hook= (void**)new_block; + format_blk(new_block); + return new_item(); +} + + +static void free_blk_list(void *list) +{ + void *next_blk; + while (list) + { + next_blk= *((void **)list); + my_free(list); + list= next_blk; + } +} + + +void Gcalc_dyn_list::cleanup() +{ + *m_blk_hook= NULL; + free_blk_list(m_first_blk); + m_first_blk= NULL; + m_blk_hook= &m_first_blk; + m_free= NULL; +} + + +Gcalc_dyn_list::~Gcalc_dyn_list() +{ + cleanup(); +} + + +void Gcalc_dyn_list::reset() +{ + *m_blk_hook= NULL; + if (m_first_blk) + { + free_blk_list(*((void **)m_first_blk)); + m_blk_hook= (void**)m_first_blk; + m_free= NULL; + format_blk(m_first_blk); + } +} + + +/* Internal coordinate operations implementations */ + +void gcalc_set_zero(Gcalc_internal_coord *d, int d_len) +{ + do + { + d[--d_len]= 0; + } while (d_len); +} + + +int gcalc_is_zero(const Gcalc_internal_coord *d, int d_len) +{ + do + { + if (d[--d_len] != 0) + return 0; + } while (d_len); + return 1; +} + + +#ifdef GCALC_CHECK_WITH_FLOAT +static double *gcalc_coord_extent= NULL; + +long double gcalc_get_double(const Gcalc_internal_coord *d, int d_len) +{ + int n= 1; + long double res= (long double) FIRST_DIGIT(d[0]); + do + { + res*= (long double) GCALC_DIG_BASE; + res+= (long double) d[n]; + } while(++n < d_len); + + n= 0; + do + { + if ((n & 1) && gcalc_coord_extent) + res/= *gcalc_coord_extent; + } while(++n < d_len); + + if (GCALC_SIGN(d[0])) + res*= -1.0; + return res; +} +#endif /*GCALC_CHECK_WITH_FLOAT*/ + + +static void do_add(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b) +{ + int n_digit= result_len-1; + gcalc_digit_t carry= 0; + + do + { + if ((result[n_digit]= + a[n_digit] + b[n_digit] + carry) >= GCALC_DIG_BASE) + { + carry= 1; + result[n_digit]-= GCALC_DIG_BASE; + } + else + carry= 0; + } while (--n_digit); + + result[0]= (a[0] + FIRST_DIGIT(b[0]) + carry); + + GCALC_DBUG_ASSERT(FIRST_DIGIT(result[0]) < GCALC_DIG_BASE); +} + + +static void do_sub(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b) +{ + int n_digit= result_len-1; + gcalc_digit_t carry= 0; + gcalc_digit_t cur_b, cur_a; + + do + { + cur_b= b[n_digit] + carry; + cur_a= a[n_digit]; + if (cur_a < cur_b) + { + carry= 1; + result[n_digit]= (GCALC_DIG_BASE - cur_b) + cur_a; + } + else + { + carry= 0; + result[n_digit]= cur_a - cur_b; + } + } while (--n_digit); + + + result[0]= a[0] - FIRST_DIGIT(b[0]) - carry; + + GCALC_DBUG_ASSERT(FIRST_DIGIT(a[0]) >= FIRST_DIGIT(b[0]) + carry); + GCALC_DBUG_ASSERT(!gcalc_is_zero(result, result_len)); +} +/* +static void do_sub(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b) +{ + int n_digit= result_len-1; + gcalc_digit_t carry= 0; + + do + { + if ((result[n_digit]= a[n_digit] - b[n_digit] - carry) < 0) + { + carry= 1; + result[n_digit]+= GCALC_DIG_BASE; + } + else + carry= 0; + } while (--n_digit); + + + result[0]= a[0] - FIRST_DIGIT(b[0]) - carry; + + GCALC_DBUG_ASSERT(FIRST_DIGIT(a[0]) - FIRST_DIGIT(b[0]) - carry >= 0); + GCALC_DBUG_ASSERT(!gcalc_is_zero(result, result_len)); +} +*/ + +static int do_cmp(const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b, int len) +{ + int n_digit= 1; + + if ((FIRST_DIGIT(a[0]) != FIRST_DIGIT(b[0]))) + return FIRST_DIGIT(a[0]) > FIRST_DIGIT(b[0]) ? 1 : -1; + + do + { + if ((a[n_digit] != b[n_digit])) + return a[n_digit] > b[n_digit] ? 1 : -1; + } while (++n_digit < len); + + return 0; +} + + +#ifdef GCALC_CHECK_WITH_FLOAT +static int de_weak_check(long double a, long double b, long double ex) +{ + long double d= a - b; + if (d < ex && d > -ex) + return 1; + + d/= fabsl(a) + fabsl(b); + if (d < ex && d > -ex) + return 1; + return 0; +} + +static int de_check(long double a, long double b) +{ + return de_weak_check(a, b, (long double) 1e-9); +} +#endif /*GCALC_CHECK_WITH_FLOAT*/ + + +void gcalc_mul_coord(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, int a_len, + const Gcalc_internal_coord *b, int b_len) +{ + GCALC_DBUG_ASSERT(result_len == a_len + b_len); + GCALC_DBUG_ASSERT(a_len >= b_len); + int n_a, n_b, n_res; + gcalc_digit_t carry= 0; + + gcalc_set_zero(result, result_len); + + n_a= a_len - 1; + do + { + gcalc_coord2 cur_a= n_a ? a[n_a] : FIRST_DIGIT(a[0]); + n_b= b_len - 1; + do + { + gcalc_coord2 cur_b= n_b ? b[n_b] : FIRST_DIGIT(b[0]); + gcalc_coord2 mul= cur_a * cur_b + carry + result[n_a + n_b + 1]; + result[n_a + n_b + 1]= mul % GCALC_DIG_BASE; + carry= (gcalc_digit_t) (mul / (gcalc_coord2) GCALC_DIG_BASE); + } while (n_b--); + if (carry) + { + for (n_res= n_a; (result[n_res]+= carry) >= GCALC_DIG_BASE; + n_res--) + { + result[n_res]-= GCALC_DIG_BASE; + carry= 1; + } + carry= 0; + } + } while (n_a--); + if (!gcalc_is_zero(result, result_len)) + result[0]|= GCALC_SIGN(a[0] ^ b[0]); +#ifdef GCALC_CHECK_WITH_FLOAT + GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, a_len) * + gcalc_get_double(b, b_len), + gcalc_get_double(result, result_len))); +#endif /*GCALC_CHECK_WITH_FLOAT*/ +} + + +inline void gcalc_mul_coord1(Gcalc_coord1 result, + const Gcalc_coord1 a, const Gcalc_coord1 b) +{ + return gcalc_mul_coord(result, GCALC_COORD_BASE2, + a, GCALC_COORD_BASE, b, GCALC_COORD_BASE); +} + + +void gcalc_add_coord(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b) +{ + if (GCALC_SIGN(a[0]) == GCALC_SIGN(b[0])) + do_add(result, result_len, a, b); + else + { + int cmp_res= do_cmp(a, b, result_len); + if (cmp_res == 0) + gcalc_set_zero(result, result_len); + else if (cmp_res > 0) + do_sub(result, result_len, a, b); + else + do_sub(result, result_len, b, a); + } +#ifdef GCALC_CHECK_WITH_FLOAT + GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, result_len) + + gcalc_get_double(b, result_len), + gcalc_get_double(result, result_len))); +#endif /*GCALC_CHECK_WITH_FLOAT*/ +} + + +void gcalc_sub_coord(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b) +{ + if (GCALC_SIGN(a[0] ^ b[0])) + do_add(result, result_len, a, b); + else + { + int cmp_res= do_cmp(a, b, result_len); + if (cmp_res == 0) + gcalc_set_zero(result, result_len); + else if (cmp_res > 0) + do_sub(result, result_len, a, b); + else + { + do_sub(result, result_len, b, a); + result[0]^= GCALC_COORD_MINUS; + } + } +#ifdef GCALC_CHECK_WITH_FLOAT + GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, result_len) - + gcalc_get_double(b, result_len), + gcalc_get_double(result, result_len))); +#endif /*GCALC_CHECK_WITH_FLOAT*/ +} + + +inline void gcalc_sub_coord1(Gcalc_coord1 result, + const Gcalc_coord1 a, const Gcalc_coord1 b) +{ + return gcalc_sub_coord(result, GCALC_COORD_BASE, a, b); +} + + +int gcalc_cmp_coord(const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b, int len) +{ + int n_digit= 0; + int result= 0; + + do + { + if (a[n_digit] == b[n_digit]) + { + n_digit++; + continue; + } + if (a[n_digit] > b[n_digit]) + result= GCALC_SIGN(a[0]) ? -1 : 1; + else + result= GCALC_SIGN(b[0]) ? 1 : -1; + break; + + } while (n_digit < len); + +#ifdef GCALC_CHECK_WITH_FLOAT + if (result == 0) + GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, len), + gcalc_get_double(b, len))); + else if (result == 1) + GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, len), + gcalc_get_double(b, len)) || + gcalc_get_double(a, len) > gcalc_get_double(b, len)); + else + GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, len), + gcalc_get_double(b, len)) || + gcalc_get_double(a, len) < gcalc_get_double(b, len)); +#endif /*GCALC_CHECK_WITH_FLOAT*/ + return result; +} + + +#define gcalc_cmp_coord1(a, b) gcalc_cmp_coord(a, b, GCALC_COORD_BASE) + +int gcalc_set_double(Gcalc_internal_coord *c, double d, double ext) +{ + int sign; + double ds= d * ext; + if ((sign= ds < 0)) + ds= -ds; + c[0]= (gcalc_digit_t) (ds / (double) GCALC_DIG_BASE); + c[1]= (gcalc_digit_t) (ds - ((double) c[0]) * (double) GCALC_DIG_BASE); + if (c[1] >= GCALC_DIG_BASE) + { + c[1]= 0; + c[0]++; + } + if (sign && (c[0] | c[1])) + c[0]|= GCALC_COORD_MINUS; +#ifdef GCALC_CHECK_WITH_FLOAT + GCALC_DBUG_ASSERT(de_check(d, gcalc_get_double(c, 2))); +#endif /*GCALC_CHECK_WITH_FLOAT*/ + return 0; +} + + +typedef gcalc_digit_t Gcalc_coord4[GCALC_COORD_BASE*4]; +typedef gcalc_digit_t Gcalc_coord5[GCALC_COORD_BASE*5]; + + +void Gcalc_scan_iterator::intersection_info::do_calc_t() +{ + Gcalc_coord1 a2_a1x, a2_a1y; + Gcalc_coord2 x1y2, x2y1; + + gcalc_sub_coord1(a2_a1x, edge_b->pi->node.shape.ix, edge_a->pi->node.shape.ix); + gcalc_sub_coord1(a2_a1y, edge_b->pi->node.shape.iy, edge_a->pi->node.shape.iy); + + GCALC_DBUG_ASSERT(!gcalc_is_zero(edge_a->dy, GCALC_COORD_BASE) || + !gcalc_is_zero(edge_b->dy, GCALC_COORD_BASE)); + + gcalc_mul_coord1(x1y2, edge_a->dx, edge_b->dy); + gcalc_mul_coord1(x2y1, edge_a->dy, edge_b->dx); + gcalc_sub_coord(t_b, GCALC_COORD_BASE2, x1y2, x2y1); + + + gcalc_mul_coord1(x1y2, a2_a1x, edge_b->dy); + gcalc_mul_coord1(x2y1, a2_a1y, edge_b->dx); + gcalc_sub_coord(t_a, GCALC_COORD_BASE2, x1y2, x2y1); + t_calculated= 1; +} + + +void Gcalc_scan_iterator::intersection_info::do_calc_y() +{ + GCALC_DBUG_ASSERT(t_calculated); + + Gcalc_coord3 a_tb, b_ta; + + gcalc_mul_coord(a_tb, GCALC_COORD_BASE3, + t_b, GCALC_COORD_BASE2, edge_a->pi->node.shape.iy, GCALC_COORD_BASE); + gcalc_mul_coord(b_ta, GCALC_COORD_BASE3, + t_a, GCALC_COORD_BASE2, edge_a->dy, GCALC_COORD_BASE); + + gcalc_add_coord(y_exp, GCALC_COORD_BASE3, a_tb, b_ta); + y_calculated= 1; +} + + +void Gcalc_scan_iterator::intersection_info::do_calc_x() +{ + GCALC_DBUG_ASSERT(t_calculated); + + Gcalc_coord3 a_tb, b_ta; + + gcalc_mul_coord(a_tb, GCALC_COORD_BASE3, + t_b, GCALC_COORD_BASE2, edge_a->pi->node.shape.ix, GCALC_COORD_BASE); + gcalc_mul_coord(b_ta, GCALC_COORD_BASE3, + t_a, GCALC_COORD_BASE2, edge_a->dx, GCALC_COORD_BASE); + + gcalc_add_coord(x_exp, GCALC_COORD_BASE3, a_tb, b_ta); + x_calculated= 1; +} + + +static int cmp_node_isc(const Gcalc_heap::Info *node, + const Gcalc_heap::Info *isc) +{ + GCALC_DBUG_ASSERT(node->type == Gcalc_heap::nt_shape_node); + Gcalc_scan_iterator::intersection_info *inf= i_data(isc); + Gcalc_coord3 exp; + int result; + + inf->calc_t(); + inf->calc_y_exp(); + + gcalc_mul_coord(exp, GCALC_COORD_BASE3, + inf->t_b, GCALC_COORD_BASE2, node->node.shape.iy, GCALC_COORD_BASE); + + result= gcalc_cmp_coord(exp, inf->y_exp, GCALC_COORD_BASE3); +#ifdef GCALC_CHECK_WITH_FLOAT + long double int_x, int_y; + isc->calc_xy_ld(&int_x, &int_y); + if (result < 0) + { + if (!de_check(int_y, node->node.shape.y) && node->node.shape.y > int_y) + GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscy %g < %LG", node->node.shape.y, int_y)); + } + else if (result > 0) + { + if (!de_check(int_y, node->node.shape.y) && node->node.shape.y < int_y) + GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscy %g > %LG", node->node.shape.y, int_y)); + } + else + { + if (!de_check(int_y, node->node.shape.y)) + GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscy %g == %LG", node->node.shape.y, int_y)); + } +#endif /*GCALC_CHECK_WITH_FLOAT*/ + if (result) + goto exit; + + + inf->calc_x_exp(); + gcalc_mul_coord(exp, GCALC_COORD_BASE3, + inf->t_b, GCALC_COORD_BASE2, node->node.shape.ix, GCALC_COORD_BASE); + + result= gcalc_cmp_coord(exp, inf->x_exp, GCALC_COORD_BASE3); +#ifdef GCALC_CHECK_WITH_FLOAT + if (result < 0) + { + if (!de_check(int_x, node->node.shape.x) && node->node.shape.x > int_x) + GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscx failed %g < %LG", + node->node.shape.x, int_x)); + } + else if (result > 0) + { + if (!de_check(int_x, node->node.shape.x) && node->node.shape.x < int_x) + GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscx failed %g > %LG", + node->node.shape.x, int_x)); + } + else + { + if (!de_check(int_x, node->node.shape.x)) + GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscx failed %g == %LG", + node->node.shape.x, int_x)); + } +#endif /*GCALC_CHECK_WITH_FLOAT*/ +exit: + return result; +} + + +static int cmp_intersections(const Gcalc_heap::Info *i1, + const Gcalc_heap::Info *i2) +{ + Gcalc_scan_iterator::intersection_info *inf1= i_data(i1); + Gcalc_scan_iterator::intersection_info *inf2= i_data(i2); + Gcalc_coord5 exp_a, exp_b; + int result; + + inf1->calc_t(); + inf2->calc_t(); + + inf1->calc_y_exp(); + inf2->calc_y_exp(); + + gcalc_mul_coord(exp_a, GCALC_COORD_BASE5, + inf1->y_exp, GCALC_COORD_BASE3, inf2->t_b, GCALC_COORD_BASE2); + gcalc_mul_coord(exp_b, GCALC_COORD_BASE5, + inf2->y_exp, GCALC_COORD_BASE3, inf1->t_b, GCALC_COORD_BASE2); + + result= gcalc_cmp_coord(exp_a, exp_b, GCALC_COORD_BASE5); +#ifdef GCALC_CHECK_WITH_FLOAT + long double x1, y1, x2, y2; + i1->calc_xy_ld(&x1, &y1); + i2->calc_xy_ld(&x2, &y2); + + if (result < 0) + { + if (!de_check(y1, y2) && y2 > y1) + GCALC_DBUG_PRINT(("floatcheck cmp_intersections_y failed %LG < %LG", + y2, y1)); + } + else if (result > 0) + { + if (!de_check(y1, y2) && y2 < y1) + GCALC_DBUG_PRINT(("floatcheck cmp_intersections_y failed %LG > %LG", + y2, y1)); + } + else + { + if (!de_check(y1, y2)) + GCALC_DBUG_PRINT(("floatcheck cmp_intersections_y failed %LG == %LG", + y2, y1)); + } +#endif /*GCALC_CHECK_WITH_FLOAT*/ + + if (result != 0) + return result; + + + inf1->calc_x_exp(); + inf2->calc_x_exp(); + gcalc_mul_coord(exp_a, GCALC_COORD_BASE5, + inf1->x_exp, GCALC_COORD_BASE3, inf2->t_b, GCALC_COORD_BASE2); + gcalc_mul_coord(exp_b, GCALC_COORD_BASE5, + inf2->x_exp, GCALC_COORD_BASE3, inf1->t_b, GCALC_COORD_BASE2); + + result= gcalc_cmp_coord(exp_a, exp_b, GCALC_COORD_BASE5); +#ifdef GCALC_CHECK_WITH_FLOAT + if (result < 0) + { + if (!de_check(x1, x2) && x2 > x1) + GCALC_DBUG_PRINT(("floatcheck cmp_intersectionsx failed %LG < %LG", + x2, x1)); + } + else if (result > 0) + { + if (!de_check(x1, x2) && x2 < x1) + GCALC_DBUG_PRINT(("floatcheck cmp_intersectionsx failed %LG > %LG", + x2, x1)); + } + else + { + if (!de_check(x1, x2)) + GCALC_DBUG_PRINT(("floatcheck cmp_intersectionsx failed %LG == %LG", + x2, x1)); + } +#endif /*GCALC_CHECK_WITH_FLOAT*/ + return result; +} +/* Internal coordinates implementation end */ + + +#define GCALC_SCALE_1 1e18 + +static double find_scale(double extent) +{ + double scale= 1e-2; + while (scale < extent) + scale*= (double ) 10; + return GCALC_SCALE_1 / scale / 10; +} + + +void Gcalc_heap::set_extent(double xmin, double xmax, double ymin, double ymax) +{ + xmin= fabs(xmin); + xmax= fabs(xmax); + ymin= fabs(ymin); + ymax= fabs(ymax); + + if (xmax < xmin) + xmax= xmin; + if (ymax < ymin) + ymax= ymin; + + coord_extent= xmax > ymax ? xmax : ymax; + coord_extent= find_scale(coord_extent); +#ifdef GCALC_CHECK_WITH_FLOAT + gcalc_coord_extent= &coord_extent; +#endif /*GCALC_CHECK_WITH_FLOAT*/ +} + + +void Gcalc_heap::free_point_info(Gcalc_heap::Info *i, + Gcalc_dyn_list::Item **i_hook) +{ + if (m_hook == &i->next) + m_hook= i_hook; + *i_hook= i->next; + free_item(i); + m_n_points--; +} + + +Gcalc_heap::Info *Gcalc_heap::new_point_info(double x, double y, + gcalc_shape_info shape) +{ + Info *result= (Info *)new_item(); + if (!result) + return NULL; + *m_hook= result; + m_hook= &result->next; + result->node.shape.x= x; + result->node.shape.y= y; + result->node.shape.shape= shape; + result->node.shape.top_node= 1; + result->type= nt_shape_node; + gcalc_set_double(result->node.shape.ix, x, coord_extent); + gcalc_set_double(result->node.shape.iy, y, coord_extent); + + m_n_points++; + return result; +} + + +static Gcalc_heap::Info *new_intersection( + Gcalc_heap *heap, Gcalc_scan_iterator::intersection_info *ii) +{ + Gcalc_heap::Info *isc= (Gcalc_heap::Info *)heap->new_item(); + if (!isc) + return 0; + isc->type= Gcalc_heap::nt_intersection; + isc->node.intersection.p1= ii->edge_a->pi; + isc->node.intersection.p2= ii->edge_a->next_pi; + isc->node.intersection.p3= ii->edge_b->pi; + isc->node.intersection.p4= ii->edge_b->next_pi; + isc->node.intersection.data= ii; + return isc; +} + + +static Gcalc_heap::Info *new_eq_point( + Gcalc_heap *heap, const Gcalc_heap::Info *p, + Gcalc_scan_iterator::point *edge) +{ + Gcalc_heap::Info *eqp= (Gcalc_heap::Info *)heap->new_item(); + if (!eqp) + return 0; + eqp->type= Gcalc_heap::nt_eq_node; + eqp->node.eq.node= p; + eqp->node.eq.data= edge; + return eqp; +} + + +void Gcalc_heap::Info::calc_xy(double *x, double *y) const +{ + double b0_x= node.intersection.p2->node.shape.x - node.intersection.p1->node.shape.x; + double b0_y= node.intersection.p2->node.shape.y - node.intersection.p1->node.shape.y; + double b1_x= node.intersection.p4->node.shape.x - node.intersection.p3->node.shape.x; + double b1_y= node.intersection.p4->node.shape.y - node.intersection.p3->node.shape.y; + double b0xb1= b0_x * b1_y - b0_y * b1_x; + double t= (node.intersection.p3->node.shape.x - node.intersection.p1->node.shape.x) * b1_y - (node.intersection.p3->node.shape.y - node.intersection.p1->node.shape.y) * b1_x; + + t/= b0xb1; + + *x= node.intersection.p1->node.shape.x + b0_x * t; + *y= node.intersection.p1->node.shape.y + b0_y * t; +} + + +#ifdef GCALC_CHECK_WITH_FLOAT +void Gcalc_heap::Info::calc_xy_ld(long double *x, long double *y) const +{ + long double b0_x= ((long double) p2->node.shape.x) - p1->node.shape.x; + long double b0_y= ((long double) p2->node.shape.y) - p1->node.shape.y; + long double b1_x= ((long double) p4->node.shape.x) - p3->node.shape.x; + long double b1_y= ((long double) p4->node.shape.y) - p3->node.shape.y; + long double b0xb1= b0_x * b1_y - b0_y * b1_x; + long double ax= ((long double) p3->node.shape.x) - p1->node.shape.x; + long double ay= ((long double) p3->node.shape.y) - p1->node.shape.y; + long double t_a= ax * b1_y - ay * b1_x; + long double hx= (b0xb1 * (long double) p1->node.shape.x + b0_x * t_a); + long double hy= (b0xb1 * (long double) p1->node.shape.y + b0_y * t_a); + + if (fabs(b0xb1) < 1e-15) + { + *x= p1->node.shape.x; + *y= p1->node.shape.y; + return; + } + + *x= hx/b0xb1; + *y= hy/b0xb1; +} +#endif /*GCALC_CHECK_WITH_FLOAT*/ + + +static int cmp_point_info(const Gcalc_heap::Info *i0, + const Gcalc_heap::Info *i1) +{ + int cmp_y= gcalc_cmp_coord1(i0->node.shape.iy, i1->node.shape.iy); + if (cmp_y) + return cmp_y; + return gcalc_cmp_coord1(i0->node.shape.ix, i1->node.shape.ix); +} + + +static inline void trim_node(Gcalc_heap::Info *node, Gcalc_heap::Info *prev_node) +{ + if (!node) + return; + node->node.shape.top_node= 0; + GCALC_DBUG_ASSERT((node->node.shape.left == prev_node) || (node->node.shape.right == prev_node)); + if (node->node.shape.left == prev_node) + node->node.shape.left= node->node.shape.right; + node->node.shape.right= NULL; + GCALC_DBUG_ASSERT(cmp_point_info(node, prev_node)); +} + + +static int compare_point_info(const void *e0, const void *e1) +{ + const Gcalc_heap::Info *i0= (const Gcalc_heap::Info *)e0; + const Gcalc_heap::Info *i1= (const Gcalc_heap::Info *)e1; + return cmp_point_info(i0, i1) > 0; +} + + +void Gcalc_heap::prepare_operation() +{ + Info *cur; + GCALC_DBUG_ASSERT(m_hook); + *m_hook= NULL; + m_hook= NULL; /* just to check it's not called twice */ + m_first= sort_list(compare_point_info, m_first, m_n_points); + + /* TODO - move this to the 'normal_scan' loop */ + for (cur= get_first(); cur; cur= cur->get_next()) + { + trim_node(cur->node.shape.left, cur); + trim_node(cur->node.shape.right, cur); + } +} + + +void Gcalc_heap::reset() +{ + if (m_n_points) + { + if (m_hook) + *m_hook= NULL; + free_list(m_first); + m_n_points= 0; + } + m_hook= &m_first; +} + + +int Gcalc_shape_transporter::int_single_point(gcalc_shape_info Info, + double x, double y) +{ + Gcalc_heap::Info *point= m_heap->new_point_info(x, y, Info); + if (!point) + return 1; + point->node.shape.left= point->node.shape.right= 0; + return 0; +} + + +int Gcalc_shape_transporter::int_add_point(gcalc_shape_info Info, + double x, double y) +{ + Gcalc_heap::Info *point; + Gcalc_dyn_list::Item **hook; + + hook= m_heap->get_cur_hook(); + + if (!(point= m_heap->new_point_info(x, y, Info))) + return 1; + if (m_first) + { + if (cmp_point_info(m_prev, point) == 0) + { + /* Coinciding points, do nothing */ + m_heap->free_point_info(point, hook); + return 0; + } + GCALC_DBUG_ASSERT(!m_prev || m_prev->node.shape.x != x || m_prev->node.shape.y != y); + m_prev->node.shape.left= point; + point->node.shape.right= m_prev; + } + else + m_first= point; + m_prev= point; + m_prev_hook= hook; + return 0; +} + + +void Gcalc_shape_transporter::int_complete() +{ + GCALC_DBUG_ASSERT(m_shape_started == 1 || m_shape_started == 3); + + if (!m_first) + return; + + /* simple point */ + if (m_first == m_prev) + { + m_first->node.shape.right= m_first->node.shape.left= NULL; + return; + } + + /* line */ + if (m_shape_started == 1) + { + m_first->node.shape.right= NULL; + m_prev->node.shape.left= m_prev->node.shape.right; + m_prev->node.shape.right= NULL; + return; + } + + /* polygon */ + if (cmp_point_info(m_first, m_prev) == 0) + { + /* Coinciding points, remove the last one from the list */ + m_prev->node.shape.right->node.shape.left= m_first; + m_first->node.shape.right= m_prev->node.shape.right; + m_heap->free_point_info(m_prev, m_prev_hook); + } + else + { + GCALC_DBUG_ASSERT(m_prev->node.shape.x != m_first->node.shape.x || m_prev->node.shape.y != m_first->node.shape.y); + m_first->node.shape.right= m_prev; + m_prev->node.shape.left= m_first; + } +} + + +inline void calc_dx_dy(Gcalc_scan_iterator::point *p) +{ + gcalc_sub_coord1(p->dx, p->next_pi->node.shape.ix, p->pi->node.shape.ix); + gcalc_sub_coord1(p->dy, p->next_pi->node.shape.iy, p->pi->node.shape.iy); + if (GCALC_SIGN(p->dx[0])) + { + p->l_border= &p->next_pi->node.shape.ix; + p->r_border= &p->pi->node.shape.ix; + } + else + { + p->r_border= &p->next_pi->node.shape.ix; + p->l_border= &p->pi->node.shape.ix; + } +} + + +Gcalc_scan_iterator::Gcalc_scan_iterator(size_t blk_size) : + Gcalc_dyn_list(blk_size, sizeof(point) > sizeof(intersection_info) ? + sizeof(point) : + sizeof(intersection_info)) +{ + state.slice= NULL; + m_bottom_points= NULL; + m_bottom_hook= &m_bottom_points; +} + + +void Gcalc_scan_iterator::init(Gcalc_heap *points) +{ + GCALC_DBUG_ASSERT(points->ready()); + GCALC_DBUG_ASSERT(!state.slice); + + if (!(m_cur_pi= points->get_first())) + return; + m_heap= points; + state.event_position_hook= &state.slice; + state.event_end= NULL; +#ifndef GCALC_DBUG_OFF + m_cur_thread= 0; +#endif /*GCALC_DBUG_OFF*/ + GCALC_SET_TERMINATED(killed, 0); +} + +void Gcalc_scan_iterator::reset() +{ + state.slice= NULL; + m_bottom_points= NULL; + m_bottom_hook= &m_bottom_points; + Gcalc_dyn_list::reset(); +} + + +int Gcalc_scan_iterator::point::cmp_dx_dy(const Gcalc_coord1 dx_a, + const Gcalc_coord1 dy_a, + const Gcalc_coord1 dx_b, + const Gcalc_coord1 dy_b) +{ + Gcalc_coord2 dx_a_dy_b; + Gcalc_coord2 dy_a_dx_b; + gcalc_mul_coord1(dx_a_dy_b, dx_a, dy_b); + gcalc_mul_coord1(dy_a_dx_b, dy_a, dx_b); + + return gcalc_cmp_coord(dx_a_dy_b, dy_a_dx_b, GCALC_COORD_BASE2); +} + + +int Gcalc_scan_iterator::point::cmp_dx_dy(const Gcalc_heap::Info *p1, + const Gcalc_heap::Info *p2, + const Gcalc_heap::Info *p3, + const Gcalc_heap::Info *p4) +{ + Gcalc_coord1 dx_a, dy_a, dx_b, dy_b; + gcalc_sub_coord1(dx_a, p2->node.shape.ix, p1->node.shape.ix); + gcalc_sub_coord1(dy_a, p2->node.shape.iy, p1->node.shape.iy); + gcalc_sub_coord1(dx_b, p4->node.shape.ix, p3->node.shape.ix); + gcalc_sub_coord1(dy_b, p4->node.shape.iy, p3->node.shape.iy); + return cmp_dx_dy(dx_a, dy_a, dx_b, dy_b); +} + + +int Gcalc_scan_iterator::point::cmp_dx_dy(const point *p) const +{ + GCALC_DBUG_ASSERT(!is_bottom()); + return cmp_dx_dy(dx, dy, p->dx, p->dy); +} + + +#ifdef GCALC_CHECK_WITH_FLOAT +void Gcalc_scan_iterator::point::calc_x(long double *x, long double y, + long double ix) const +{ + long double ddy= gcalc_get_double(dy, GCALC_COORD_BASE); + if (fabsl(ddy) < (long double) 1e-20) + { + *x= ix; + } + else + *x= (ddy * (long double) pi->node.shape.x + gcalc_get_double(dx, GCALC_COORD_BASE) * + (y - pi->node.shape.y)) / ddy; +} +#endif /*GCALC_CHECK_WITH_FLOAT*/ + + +static int compare_events(const void *e0, const void *e1) +{ + const Gcalc_scan_iterator::point *p0= (const Gcalc_scan_iterator::point *)e0; + const Gcalc_scan_iterator::point *p1= (const Gcalc_scan_iterator::point *)e1; + return p0->cmp_dx_dy(p1) > 0; +} + + +int Gcalc_scan_iterator::arrange_event(int do_sorting, int n_intersections) +{ + int ev_counter; + point *sp; + point **sp_hook; + + ev_counter= 0; + + *m_bottom_hook= NULL; + for (sp= m_bottom_points; sp; sp= sp->get_next()) + sp->ev_next= sp->get_next(); + + for (sp= state.slice, sp_hook= &state.slice; + sp; sp_hook= sp->next_ptr(), sp= sp->get_next()) + { + if (sp->event) + { + state.event_position_hook= sp_hook; + break; + } + } + + for (sp= *(sp_hook= state.event_position_hook); + sp && sp->event; sp_hook= sp->next_ptr(), sp= sp->get_next()) + { + ev_counter++; + if (sp->get_next() && sp->get_next()->event) + sp->ev_next= sp->get_next(); + else + sp->ev_next= m_bottom_points; + } + +#ifndef GCALC_DBUG_OFF + { + point *cur_p= sp; + for (; cur_p; cur_p= cur_p->get_next()) + GCALC_DBUG_ASSERT(!cur_p->event); + } +#endif /*GCALC_DBUG_OFF*/ + + state.event_end= sp; + + if (ev_counter == 2 && n_intersections == 1) + { + /* If we had only intersection, just swap the two points. */ + sp= *state.event_position_hook; + *state.event_position_hook= sp->get_next(); + sp->next= (*state.event_position_hook)->next; + (*state.event_position_hook)->next= sp; + + /* The list of the events should be restored. */ + (*state.event_position_hook)->ev_next= sp; + sp->ev_next= m_bottom_points; + } + else if (ev_counter == 2 && get_events()->event == scev_two_threads) + { + /* Do nothing. */ + } + else if (ev_counter > 1 && do_sorting) + { + point *cur_p; + *sp_hook= NULL; + sp= (point *) sort_list(compare_events, *state.event_position_hook, + ev_counter); + /* Find last item in the list, it's changed after the sorting. */ + for (cur_p= sp->get_next(); cur_p->get_next(); + cur_p= cur_p->get_next()) + {} + cur_p->next= state.event_end; + *state.event_position_hook= sp; + /* The list of the events should be restored. */ + for (; sp && sp->event; sp= sp->get_next()) + { + if (sp->get_next() && sp->get_next()->event) + sp->ev_next= sp->get_next(); + else + sp->ev_next= m_bottom_points; + } + } + +#ifndef GCALC_DBUG_OFF + { + const event_point *ev= get_events(); + for (; ev && ev->get_next(); ev= ev->get_next()) + { + if (ev->is_bottom() || ev->get_next()->is_bottom()) + break; + GCALC_DBUG_ASSERT(ev->cmp_dx_dy(ev->get_next()) <= 0); + } + } +#endif /*GCALC_DBUG_OFF*/ + return 0; +} + + +int Gcalc_heap::Info::equal_pi(const Info *pi) const +{ + if (type == nt_intersection) + return node.intersection.equal; + if (pi->type == nt_eq_node) + return 1; + if (type == nt_eq_node || pi->type == nt_intersection) + return 0; + return cmp_point_info(this, pi) == 0; +} + +int Gcalc_scan_iterator::step() +{ + int result= 0; + int do_sorting= 0; + int n_intersections= 0; + point *sp; + GCALC_DBUG_ENTER("Gcalc_scan_iterator::step"); + GCALC_DBUG_ASSERT(more_points()); + + if (GCALC_TERMINATED(killed)) + GCALC_DBUG_RETURN(0xFFFF); + + /* Clear the old event marks. */ + if (m_bottom_points) + { + free_list((Gcalc_dyn_list::Item **) &m_bottom_points, + (Gcalc_dyn_list::Item **) m_bottom_hook); + m_bottom_points= NULL; + m_bottom_hook= &m_bottom_points; + } + for (sp= *state.event_position_hook; + sp != state.event_end; sp= sp->get_next()) + sp->event= scev_none; + +//#ifndef GCALC_DBUG_OFF + state.event_position_hook= NULL; + state.pi= NULL; +//#endif /*GCALC_DBUG_OFF*/ + + do + { +#ifndef GCALC_DBUG_OFF + if (m_cur_pi->type == Gcalc_heap::nt_intersection && + m_cur_pi->get_next()->type == Gcalc_heap::nt_intersection && + m_cur_pi->node.intersection.equal) + GCALC_DBUG_ASSERT(cmp_intersections(m_cur_pi, m_cur_pi->get_next()) == 0); +#endif /*GCALC_DBUG_OFF*/ + GCALC_DBUG_CHECK_COUNTER(); + GCALC_DBUG_PRINT_SLICE("step:", state.slice); + GCALC_DBUG_PRINT_PI(m_cur_pi); + if (m_cur_pi->type == Gcalc_heap::nt_shape_node) + { + if (m_cur_pi->is_top()) + { + result= insert_top_node(); + if (!m_cur_pi->is_bottom()) + do_sorting++; + } + else if (m_cur_pi->is_bottom()) + remove_bottom_node(); + else + { + do_sorting++; + result= node_scan(); + } + if (result) + GCALC_DBUG_RETURN(result); + state.pi= m_cur_pi; + } + else if (m_cur_pi->type == Gcalc_heap::nt_eq_node) + { + do_sorting++; + eq_scan(); + } + else + { + /* nt_intersection */ + do_sorting++; + n_intersections++; + intersection_scan(); + if (!state.pi || state.pi->type == Gcalc_heap::nt_intersection) + state.pi= m_cur_pi; + } + + m_cur_pi= m_cur_pi->get_next(); + } while (m_cur_pi && state.pi->equal_pi(m_cur_pi)); + + GCALC_DBUG_RETURN(arrange_event(do_sorting, n_intersections)); +} + + +static int node_on_right(const Gcalc_heap::Info *node, + const Gcalc_heap::Info *edge_a, const Gcalc_heap::Info *edge_b) +{ + Gcalc_coord1 a_x, a_y; + Gcalc_coord1 b_x, b_y; + Gcalc_coord2 ax_by, ay_bx; + int result; + + gcalc_sub_coord1(a_x, node->node.shape.ix, edge_a->node.shape.ix); + gcalc_sub_coord1(a_y, node->node.shape.iy, edge_a->node.shape.iy); + gcalc_sub_coord1(b_x, edge_b->node.shape.ix, edge_a->node.shape.ix); + gcalc_sub_coord1(b_y, edge_b->node.shape.iy, edge_a->node.shape.iy); + gcalc_mul_coord1(ax_by, a_x, b_y); + gcalc_mul_coord1(ay_bx, a_y, b_x); + result= gcalc_cmp_coord(ax_by, ay_bx, GCALC_COORD_BASE2); +#ifdef GCALC_CHECK_WITH_FLOAT + { + long double dx= gcalc_get_double(edge_b->node.shape.ix, GCALC_COORD_BASE) - + gcalc_get_double(edge_a->node.shape.ix, GCALC_COORD_BASE); + long double dy= gcalc_get_double(edge_b->node.shape.iy, GCALC_COORD_BASE) - + gcalc_get_double(edge_a->node.shape.iy, GCALC_COORD_BASE); + long double ax= gcalc_get_double(node->node.shape.ix, GCALC_COORD_BASE) - + gcalc_get_double(edge_a->node.shape.ix, GCALC_COORD_BASE); + long double ay= gcalc_get_double(node->node.shape.iy, GCALC_COORD_BASE) - + gcalc_get_double(edge_a->node.shape.iy, GCALC_COORD_BASE); + long double d= ax * dy - ay * dx; + if (result == 0) + GCALC_DBUG_ASSERT(de_check(d, 0.0)); + else if (result < 0) + GCALC_DBUG_ASSERT(de_check(d, 0.0) || d < 0); + else + GCALC_DBUG_ASSERT(de_check(d, 0.0) || d > 0); + } +#endif /*GCALC_CHECK_WITH_FLOAT*/ + return result; +} + + +static int cmp_tops(const Gcalc_heap::Info *top_node, + const Gcalc_heap::Info *edge_a, const Gcalc_heap::Info *edge_b) +{ + int cmp_res_a, cmp_res_b; + + cmp_res_a= gcalc_cmp_coord1(edge_a->node.shape.ix, top_node->node.shape.ix); + cmp_res_b= gcalc_cmp_coord1(edge_b->node.shape.ix, top_node->node.shape.ix); + + if (cmp_res_a <= 0 && cmp_res_b > 0) + return -1; + if (cmp_res_b <= 0 && cmp_res_a > 0) + return 1; + if (cmp_res_a == 0 && cmp_res_b == 0) + return 0; + + return node_on_right(edge_a, top_node, edge_b); +} + + +int Gcalc_scan_iterator::insert_top_node() +{ + point *sp= state.slice; + point **prev_hook= &state.slice; + point *sp1= NULL; + point *sp0= new_slice_point(); + int cmp_res; + + GCALC_DBUG_ENTER("Gcalc_scan_iterator::insert_top_node"); + if (!sp0) + GCALC_DBUG_RETURN(1); + sp0->pi= m_cur_pi; + sp0->next_pi= m_cur_pi->node.shape.left; +#ifndef GCALC_DBUG_OFF + sp0->thread= m_cur_thread++; +#endif /*GCALC_DBUG_OFF*/ + if (m_cur_pi->node.shape.left) + { + calc_dx_dy(sp0); + if (m_cur_pi->node.shape.right) + { + if (!(sp1= new_slice_point())) + GCALC_DBUG_RETURN(1); + sp1->event= sp0->event= scev_two_threads; + sp1->pi= m_cur_pi; + sp1->next_pi= m_cur_pi->node.shape.right; +#ifndef GCALC_DBUG_OFF + sp1->thread= m_cur_thread++; +#endif /*GCALC_DBUG_OFF*/ + calc_dx_dy(sp1); + /* We have two threads so should decide which one will be first */ + cmp_res= cmp_tops(m_cur_pi, m_cur_pi->node.shape.left, m_cur_pi->node.shape.right); + if (cmp_res > 0) + { + point *tmp= sp0; + sp0= sp1; + sp1= tmp; + } + else if (cmp_res == 0) + { + /* Exactly same direction of the edges. */ + cmp_res= gcalc_cmp_coord1(m_cur_pi->node.shape.left->node.shape.iy, m_cur_pi->node.shape.right->node.shape.iy); + if (cmp_res != 0) + { + if (cmp_res < 0) + { + if (add_eq_node(sp0->next_pi, sp1)) + GCALC_DBUG_RETURN(1); + } + else + { + if (add_eq_node(sp1->next_pi, sp0)) + GCALC_DBUG_RETURN(1); + } + } + else + { + cmp_res= gcalc_cmp_coord1(m_cur_pi->node.shape.left->node.shape.ix, m_cur_pi->node.shape.right->node.shape.ix); + if (cmp_res != 0) + { + if (cmp_res < 0) + { + if (add_eq_node(sp0->next_pi, sp1)) + GCALC_DBUG_RETURN(1); + } + else + { + if (add_eq_node(sp1->next_pi, sp0)) + GCALC_DBUG_RETURN(1); + } + } + } + } + } + else + sp0->event= scev_thread; + } + else + sp0->event= scev_single_point; + + + /* Check if we already have an event - then we'll place the node there */ + for (; sp && !sp->event; prev_hook= sp->next_ptr(), sp=sp->get_next()) + {} + if (!sp) + { + sp= state.slice; + prev_hook= &state.slice; + /* We need to find the place to insert. */ + for (; sp; prev_hook= sp->next_ptr(), sp=sp->get_next()) + { + if (sp->event || gcalc_cmp_coord1(*sp->r_border, m_cur_pi->node.shape.ix) < 0) + continue; + cmp_res= node_on_right(m_cur_pi, sp->pi, sp->next_pi); + if (cmp_res == 0) + { + /* The top node lies on the edge. */ + /* Nodes of that edge will be handled in other places. */ + sp->event= scev_intersection; + } + else if (cmp_res < 0) + break; + } + } + + if (sp0->event == scev_single_point) + { + /* Add single point to the bottom list. */ + *m_bottom_hook= sp0; + m_bottom_hook= sp0->next_ptr(); + state.event_position_hook= prev_hook; + } + else + { + *prev_hook= sp0; + sp0->next= sp; + if (add_events_for_node(sp0)) + GCALC_DBUG_RETURN(1); + + if (sp0->event == scev_two_threads) + { + *prev_hook= sp1; + sp1->next= sp; + if (add_events_for_node(sp1)) + GCALC_DBUG_RETURN(1); + + sp0->next= sp1; + *prev_hook= sp0; + } + } + + GCALC_DBUG_RETURN(0); +} + + +void Gcalc_scan_iterator::remove_bottom_node() +{ + point *sp= state.slice; + point **sp_hook= &state.slice; + point *first_bottom_point= NULL; + + GCALC_DBUG_ENTER("Gcalc_scan_iterator::remove_bottom_node"); + for (; sp; sp= sp->get_next()) + { + if (sp->next_pi == m_cur_pi) + { + *sp_hook= sp->get_next(); + sp->pi= m_cur_pi; + sp->next_pi= NULL; + if (first_bottom_point) + { + first_bottom_point->event= sp->event= scev_two_ends; + break; + } + first_bottom_point= sp; + sp->event= scev_end; + state.event_position_hook= sp_hook; + } + else + sp_hook= sp->next_ptr(); + } + GCALC_DBUG_ASSERT(first_bottom_point); + *m_bottom_hook= first_bottom_point; + m_bottom_hook= first_bottom_point->next_ptr(); + if (sp) + { + *m_bottom_hook= sp; + m_bottom_hook= sp->next_ptr(); + } + + GCALC_DBUG_VOID_RETURN; +} + + +int Gcalc_scan_iterator::add_events_for_node(point *sp_node) +{ + point *sp= state.slice; + int cur_pi_r, sp_pi_r; + + GCALC_DBUG_ENTER("Gcalc_scan_iterator::add_events_for_node"); + + /* Scan to the event point. */ + for (; sp != sp_node; sp= sp->get_next()) + { + GCALC_DBUG_ASSERT(!sp->is_bottom()); + GCALC_DBUG_PRINT(("left cut_edge %d", sp->thread)); + if (sp->next_pi == sp_node->next_pi || + gcalc_cmp_coord1(*sp->r_border, *sp_node->l_border) < 0) + continue; + sp_pi_r= node_on_right(sp->next_pi, sp_node->pi, sp_node->next_pi); + if (sp_pi_r < 0) + continue; + cur_pi_r= node_on_right(sp_node->next_pi, sp->pi, sp->next_pi); + if (cur_pi_r > 0) + continue; + if (cur_pi_r == 0 && sp_pi_r == 0) + { + int cmp_res= cmp_point_info(sp->next_pi, sp_node->next_pi); + if (cmp_res > 0) + { + if (add_eq_node(sp_node->next_pi, sp)) + GCALC_DBUG_RETURN(1); + } + else if (cmp_res < 0) + { + if (add_eq_node(sp->next_pi, sp_node)) + GCALC_DBUG_RETURN(1); + } + continue; + } + + if (cur_pi_r == 0) + { + if (add_eq_node(sp_node->next_pi, sp)) + GCALC_DBUG_RETURN(1); + continue; + } + else if (sp_pi_r == 0) + { + if (add_eq_node(sp->next_pi, sp_node)) + GCALC_DBUG_RETURN(1); + continue; + } + + if (sp->event) + { +#ifndef GCALC_DBUG_OFF + cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi); + GCALC_DBUG_ASSERT(cur_pi_r == 0); +#endif /*GCALC_DBUG_OFF*/ + continue; + } + cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi); + GCALC_DBUG_ASSERT(cur_pi_r >= 0); + //GCALC_DBUG_ASSERT(cur_pi_r > 0); /* Is it ever violated? */ + if (cur_pi_r > 0 && add_intersection(sp, sp_node, m_cur_pi)) + GCALC_DBUG_RETURN(1); + } + + /* Scan to the end of the slice */ + sp= sp->get_next(); + + for (; sp; sp= sp->get_next()) + { + GCALC_DBUG_ASSERT(!sp->is_bottom()); + GCALC_DBUG_PRINT(("right cut_edge %d", sp->thread)); + if (sp->next_pi == sp_node->next_pi || + gcalc_cmp_coord1(*sp_node->r_border, *sp->l_border) < 0) + continue; + sp_pi_r= node_on_right(sp->next_pi, sp_node->pi, sp_node->next_pi); + if (sp_pi_r > 0) + continue; + cur_pi_r= node_on_right(sp_node->next_pi, sp->pi, sp->next_pi); + if (cur_pi_r < 0) + continue; + if (cur_pi_r == 0 && sp_pi_r == 0) + { + int cmp_res= cmp_point_info(sp->next_pi, sp_node->next_pi); + if (cmp_res > 0) + { + if (add_eq_node(sp_node->next_pi, sp)) + GCALC_DBUG_RETURN(1); + } + else if (cmp_res < 0) + { + if (add_eq_node(sp->next_pi, sp_node)) + GCALC_DBUG_RETURN(1); + } + continue; + } + if (cur_pi_r == 0) + { + if (add_eq_node(sp_node->next_pi, sp)) + GCALC_DBUG_RETURN(1); + continue; + } + else if (sp_pi_r == 0) + { + if (add_eq_node(sp->next_pi, sp_node)) + GCALC_DBUG_RETURN(1); + continue; + } + + if (sp->event) + { +#ifndef GCALC_DBUG_OFF + cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi); + GCALC_DBUG_ASSERT(cur_pi_r == 0); +#endif /*GCALC_DBUG_OFF*/ + continue; + } + cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi); + GCALC_DBUG_ASSERT(cur_pi_r <= 0); + //GCALC_DBUG_ASSERT(cur_pi_r < 0); /* Is it ever violated? */ + if (cur_pi_r < 0 && add_intersection(sp_node, sp, m_cur_pi)) + GCALC_DBUG_RETURN(1); + } + + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_scan_iterator::node_scan() +{ + point *sp= state.slice; + Gcalc_heap::Info *cur_pi= m_cur_pi; + + GCALC_DBUG_ENTER("Gcalc_scan_iterator::node_scan"); + + /* Scan to the event point. */ + /* Can be avoided if we add link to the sp to the Info. */ + for (; sp->next_pi != cur_pi; sp= sp->get_next()) + {} + + GCALC_DBUG_PRINT(("node for %d", sp->thread)); + /* Handle the point itself. */ + sp->pi= cur_pi; + sp->next_pi= cur_pi->node.shape.left; + sp->event= scev_point; + calc_dx_dy(sp); + + GCALC_DBUG_RETURN(add_events_for_node(sp)); +} + + +void Gcalc_scan_iterator::eq_scan() +{ + point *sp= eq_sp(m_cur_pi); + GCALC_DBUG_ENTER("Gcalc_scan_iterator::eq_scan"); + +#ifndef GCALC_DBUG_OFF + { + point *cur_p= state.slice; + for (; cur_p && cur_p != sp; cur_p= cur_p->get_next()) + {} + GCALC_DBUG_ASSERT(cur_p); + } +#endif /*GCALC_DBUG_OFF*/ + if (!sp->event) + { + sp->event= scev_intersection; + sp->ev_pi= m_cur_pi; + } + + GCALC_DBUG_VOID_RETURN; +} + + +void Gcalc_scan_iterator::intersection_scan() +{ + intersection_info *ii= i_data(m_cur_pi); + GCALC_DBUG_ENTER("Gcalc_scan_iterator::intersection_scan"); + +#ifndef GCALC_DBUG_OFF + { + point *sp= state.slice; + for (; sp && sp != ii->edge_a; sp= sp->get_next()) + {} + GCALC_DBUG_ASSERT(sp); + for (; sp && sp != ii->edge_b; sp= sp->get_next()) + {} + GCALC_DBUG_ASSERT(sp); + } +#endif /*GCALC_DBUG_OFF*/ + + ii->edge_a->event= ii->edge_b->event= scev_intersection; + ii->edge_a->ev_pi= ii->edge_b->ev_pi= m_cur_pi; + free_item(ii); + m_cur_pi->node.intersection.data= NULL; + + GCALC_DBUG_VOID_RETURN; +} + + +int Gcalc_scan_iterator::add_intersection(point *sp_a, point *sp_b, + Gcalc_heap::Info *pi_from) +{ + Gcalc_heap::Info *ii; + intersection_info *i_calc; + int cmp_res; + int skip_next= 0; + + GCALC_DBUG_ENTER("Gcalc_scan_iterator::add_intersection"); + if (!(i_calc= new_intersection_info(sp_a, sp_b)) || + !(ii= new_intersection(m_heap, i_calc))) + GCALC_DBUG_RETURN(1); + + ii->node.intersection.equal= 0; + + for (; + pi_from->get_next() != sp_a->next_pi && + pi_from->get_next() != sp_b->next_pi; + pi_from= pi_from->get_next()) + { + Gcalc_heap::Info *cur= pi_from->get_next(); + if (skip_next) + { + if (cur->type == Gcalc_heap::nt_intersection) + skip_next= cur->node.intersection.equal; + else + skip_next= 0; + continue; + } + if (cur->type == Gcalc_heap::nt_intersection) + { + cmp_res= cmp_intersections(cur, ii); + skip_next= cur->node.intersection.equal; + } + else if (cur->type == Gcalc_heap::nt_eq_node) + continue; + else + cmp_res= cmp_node_isc(cur, ii); + if (cmp_res == 0) + { + ii->node.intersection.equal= 1; + break; + } + else if (cmp_res > 0) + break; + } + + /* Intersection inserted before the equal point. */ + ii->next= pi_from->get_next(); + pi_from->next= ii; + + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_scan_iterator::add_eq_node(Gcalc_heap::Info *node, point *sp) +{ + Gcalc_heap::Info *en; + + GCALC_DBUG_ENTER("Gcalc_scan_iterator::add_intersection"); + en= new_eq_point(m_heap, node, sp); + if (!en) + GCALC_DBUG_RETURN(1); + + /* eq_node inserted after the equal point. */ + en->next= node->get_next(); + node->next= en; + + GCALC_DBUG_RETURN(0); +} + + +void calc_t(Gcalc_coord2 t_a, Gcalc_coord2 t_b, + Gcalc_coord1 dxa, Gcalc_coord1 dxb, + const Gcalc_heap::Info *p1, const Gcalc_heap::Info *p2, + const Gcalc_heap::Info *p3, const Gcalc_heap::Info *p4) +{ + Gcalc_coord1 a2_a1x, a2_a1y; + Gcalc_coord2 x1y2, x2y1; + Gcalc_coord1 dya, dyb; + + gcalc_sub_coord1(a2_a1x, p3->node.shape.ix, p1->node.shape.ix); + gcalc_sub_coord1(a2_a1y, p3->node.shape.iy, p1->node.shape.iy); + + gcalc_sub_coord1(dxa, p2->node.shape.ix, p1->node.shape.ix); + gcalc_sub_coord1(dya, p2->node.shape.iy, p1->node.shape.iy); + gcalc_sub_coord1(dxb, p4->node.shape.ix, p3->node.shape.ix); + gcalc_sub_coord1(dyb, p4->node.shape.iy, p3->node.shape.iy); + + gcalc_mul_coord1(x1y2, dxa, dyb); + gcalc_mul_coord1(x2y1, dya, dxb); + gcalc_sub_coord(t_b, GCALC_COORD_BASE2, x1y2, x2y1); + + + gcalc_mul_coord1(x1y2, a2_a1x, dyb); + gcalc_mul_coord1(x2y1, a2_a1y, dxb); + gcalc_sub_coord(t_a, GCALC_COORD_BASE2, x1y2, x2y1); +} + + +double Gcalc_scan_iterator::get_y() const +{ + if (state.pi->type == Gcalc_heap::nt_intersection) + { + Gcalc_coord1 dxa, dya; + Gcalc_coord2 t_a, t_b; + Gcalc_coord3 a_tb, b_ta, y_exp; + calc_t(t_a, t_b, dxa, dya, + state.pi->node.intersection.p1, state.pi->node.intersection.p2, state.pi->node.intersection.p3, state.pi->node.intersection.p4); + + + gcalc_mul_coord(a_tb, GCALC_COORD_BASE3, + t_b, GCALC_COORD_BASE2, state.pi->node.intersection.p1->node.shape.iy, GCALC_COORD_BASE); + gcalc_mul_coord(b_ta, GCALC_COORD_BASE3, + t_a, GCALC_COORD_BASE2, dya, GCALC_COORD_BASE); + + gcalc_add_coord(y_exp, GCALC_COORD_BASE3, a_tb, b_ta); + + return (get_pure_double(y_exp, GCALC_COORD_BASE3) / + get_pure_double(t_b, GCALC_COORD_BASE2)) / m_heap->coord_extent; + } + else + return state.pi->node.shape.y; +} + + +double Gcalc_scan_iterator::get_event_x() const +{ + if (state.pi->type == Gcalc_heap::nt_intersection) + { + Gcalc_coord1 dxa, dya; + Gcalc_coord2 t_a, t_b; + Gcalc_coord3 a_tb, b_ta, x_exp; + calc_t(t_a, t_b, dxa, dya, + state.pi->node.intersection.p1, state.pi->node.intersection.p2, state.pi->node.intersection.p3, state.pi->node.intersection.p4); + + + gcalc_mul_coord(a_tb, GCALC_COORD_BASE3, + t_b, GCALC_COORD_BASE2, state.pi->node.intersection.p1->node.shape.ix, GCALC_COORD_BASE); + gcalc_mul_coord(b_ta, GCALC_COORD_BASE3, + t_a, GCALC_COORD_BASE2, dxa, GCALC_COORD_BASE); + + gcalc_add_coord(x_exp, GCALC_COORD_BASE3, a_tb, b_ta); + + return (get_pure_double(x_exp, GCALC_COORD_BASE3) / + get_pure_double(t_b, GCALC_COORD_BASE2)) / m_heap->coord_extent; + } + else + return state.pi->node.shape.x; +} + +double Gcalc_scan_iterator::get_h() const +{ + double cur_y= get_y(); + double next_y; + if (state.pi->type == Gcalc_heap::nt_intersection) + { + double x; + state.pi->calc_xy(&x, &next_y); + } + else + next_y= state.pi->next ? state.pi->get_next()->node.shape.y : 0.0; + return next_y - cur_y; +} + + +double Gcalc_scan_iterator::get_sp_x(const point *sp) const +{ + double dy; + if (sp->event & (scev_end | scev_two_ends | scev_point)) + return sp->pi->node.shape.x; + dy= sp->next_pi->node.shape.y - sp->pi->node.shape.y; + if (fabs(dy) < 1e-12) + return sp->pi->node.shape.x; + return sp->pi->node.shape.x + (sp->next_pi->node.shape.x - sp->pi->node.shape.x) * dy; +} + + +double Gcalc_scan_iterator::get_pure_double(const Gcalc_internal_coord *d, + int d_len) +{ + int n= 1; + long double res= (long double) FIRST_DIGIT(d[0]); + do + { + res*= (long double) GCALC_DIG_BASE; + res+= (long double) d[n]; + } while(++n < d_len); + + if (GCALC_SIGN(d[0])) + res*= -1.0; + return res; +} + + +#endif /* HAVE_SPATIAL */ diff --git a/sql/gcalc_slicescan.h b/sql/gcalc_slicescan.h new file mode 100644 index 00000000..37e887e8 --- /dev/null +++ b/sql/gcalc_slicescan.h @@ -0,0 +1,607 @@ +/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved. + Copyright (C) 2011 Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef GCALC_SLICESCAN_INCLUDED +#define GCALC_SLICESCAN_INCLUDED + +#ifndef DBUG_OFF +// #define GCALC_CHECK_WITH_FLOAT +#else +#define GCALC_DBUG_OFF +#endif /*DBUG_OFF*/ + +#ifndef GCALC_DBUG_OFF +#define GCALC_DBUG_PRINT(b) DBUG_PRINT("Gcalc", b) +#define GCALC_DBUG_ENTER(a) DBUG_ENTER("Gcalc " a) +#define GCALC_DBUG_RETURN(r) DBUG_RETURN(r) +#define GCALC_DBUG_VOID_RETURN DBUG_VOID_RETURN +#define GCALC_DBUG_ASSERT(r) DBUG_ASSERT(r) +#else +#define GCALC_DBUG_PRINT(b) do {} while(0) +#define GCALC_DBUG_ENTER(a) do {} while(0) +#define GCALC_DBUG_RETURN(r) return (r) +#define GCALC_DBUG_VOID_RETURN do {} while(0) +#define GCALC_DBUG_ASSERT(r) do {} while(0) +#endif /*GCALC_DBUG_OFF*/ + +#define GCALC_TERMINATED(state_var) (state_var && (*state_var)) +#define GCALC_SET_TERMINATED(state_var, val) state_var= val +#define GCALC_DECL_TERMINATED_STATE(varname) \ + volatile int *varname; + +/* + Gcalc_dyn_list class designed to manage long lists of same-size objects + with the possible efficiency. + It allocates fixed-size blocks of memory (blk_size specified at the time + of creation). When new object is added to the list, it occupies part of + this block until it's full. Then the new block is allocated. + Freed objects are chained to the m_free list, and if it's not empty, the + newly added object is taken from this list instead the block. +*/ + +class Gcalc_dyn_list +{ +public: + class Item + { + public: + Item *next; + }; + + Gcalc_dyn_list(size_t blk_size, size_t sizeof_item); + Gcalc_dyn_list(const Gcalc_dyn_list &dl); + ~Gcalc_dyn_list(); + Item *new_item() + { + Item *result; + if (m_free) + { + result= m_free; + m_free= m_free->next; + } + else + result= alloc_new_blk(); + + return result; + } + inline void free_item(Item *item) + { + item->next= m_free; + m_free= item; + } + inline void free_list(Item **list, Item **hook) + { + *hook= m_free; + m_free= *list; + } + + void free_list(Item *list) + { + Item **hook= &list; + while (*hook) + hook= &(*hook)->next; + free_list(&list, hook); + } + + void reset(); + void cleanup(); + +protected: + size_t m_blk_size; + size_t m_sizeof_item; + unsigned int m_points_per_blk; + void *m_first_blk; + void **m_blk_hook; + Item *m_free; + Item *m_keep; + + Item *alloc_new_blk(); + void format_blk(void* block); + inline Item *ptr_add(Item *ptr, int n_items) + { + return (Item *)(((char*)ptr) + n_items * m_sizeof_item); + } +}; + +/* Internal Gcalc coordinates to provide the precise calculations */ + +#define GCALC_DIG_BASE 1000000000 +typedef uint32 gcalc_digit_t; +typedef unsigned long long gcalc_coord2; +typedef gcalc_digit_t Gcalc_internal_coord; +#define GCALC_COORD_BASE 2 +#define GCALC_COORD_BASE2 4 +#define GCALC_COORD_BASE3 6 +#define GCALC_COORD_BASE4 8 +#define GCALC_COORD_BASE5 10 + +typedef gcalc_digit_t Gcalc_coord1[GCALC_COORD_BASE]; +typedef gcalc_digit_t Gcalc_coord2[GCALC_COORD_BASE*2]; +typedef gcalc_digit_t Gcalc_coord3[GCALC_COORD_BASE*3]; + + +void gcalc_mul_coord(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, int a_len, + const Gcalc_internal_coord *b, int b_len); + +void gcalc_add_coord(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b); + +void gcalc_sub_coord(Gcalc_internal_coord *result, int result_len, + const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b); + +int gcalc_cmp_coord(const Gcalc_internal_coord *a, + const Gcalc_internal_coord *b, int len); + +/* Internal coordinates declarations end. */ + + +typedef uint gcalc_shape_info; + +/* + Gcalc_heap represents the 'dynamic list' of Info objects, that + contain information about vertexes of all the shapes that take + part in some spatial calculation. Can become quite long. + After filled, the list is usually sorted and then walked through + in the slicescan algorithm. + The Gcalc_heap and the algorithm can only operate with two + kinds of shapes - polygon and polyline. So all the spatial + objects should be represented as sets of these two. +*/ + +class Gcalc_heap : public Gcalc_dyn_list +{ +public: + enum node_type + { + nt_shape_node, + nt_intersection, + nt_eq_node + }; + class Info : public Gcalc_dyn_list::Item + { + public: + node_type type; + union + { + struct + { + /* nt_shape_node */ + gcalc_shape_info shape; + Info *left; + Info *right; + double x,y; + Gcalc_coord1 ix, iy; + int top_node; + } shape; + struct + { + /* nt_intersection */ + /* Line p1-p2 supposed to intersect line p3-p4 */ + const Info *p1; + const Info *p2; + const Info *p3; + const Info *p4; + void *data; + int equal; + } intersection; + struct + { + /* nt_eq_node */ + const Info *node; + void *data; + } eq; + } node; + + bool is_bottom() const + { GCALC_DBUG_ASSERT(type == nt_shape_node); return !node.shape.left; } + bool is_top() const + { GCALC_DBUG_ASSERT(type == nt_shape_node); return node.shape.top_node; } + bool is_single_node() const + { return is_bottom() && is_top(); } + + void calc_xy(double *x, double *y) const; + int equal_pi(const Info *pi) const; +#ifdef GCALC_CHECK_WITH_FLOAT + void calc_xy_ld(long double *x, long double *y) const; +#endif /*GCALC_CHECK_WITH_FLOAT*/ + + Info *get_next() { return (Info *)next; } + const Info *get_next() const { return (const Info *)next; } + }; + + Gcalc_heap(size_t blk_size=8192) : + Gcalc_dyn_list(blk_size, sizeof(Info)), + m_hook(&m_first), m_n_points(0) + {} + + Gcalc_heap(const Gcalc_heap &gh) : + Gcalc_dyn_list(gh), + m_hook(&m_first), m_n_points(0) + {} + + void set_extent(double xmin, double xmax, double ymin, double ymax); + Info *new_point_info(double x, double y, gcalc_shape_info shape); + void free_point_info(Info *i, Gcalc_dyn_list::Item **i_hook); + Info *new_intersection(const Info *p1, const Info *p2, + const Info *p3, const Info *p4); + void prepare_operation(); + inline bool ready() const { return m_hook == NULL; } + Info *get_first() { return (Info *)m_first; } + const Info *get_first() const { return (const Info *)m_first; } + Gcalc_dyn_list::Item **get_last_hook() { return m_hook; } + void reset(); +#ifdef GCALC_CHECK_WITH_FLOAT + long double get_double(const Gcalc_internal_coord *c) const; +#endif /*GCALC_CHECK_WITH_FLOAT*/ + double coord_extent; + Gcalc_dyn_list::Item **get_cur_hook() { return m_hook; } + +private: + Gcalc_dyn_list::Item *m_first; + Gcalc_dyn_list::Item **m_hook; + int m_n_points; +}; + + +/* + the spatial object has to be represented as a set of + simple polygones and polylines to be sent to the slicescan. + + Gcalc_shape_transporter class and his descendants are used to + simplify storing the information about the shape into necessary structures. + This base class only fills the Gcalc_heap with the information about + shapes and vertices. + + Normally the Gcalc_shape_transporter family object is sent as a parameter + to the 'get_shapes' method of an 'spatial' object so it can pass + the spatial information about itself. The virtual methods are + treating this data in a way the caller needs. +*/ + +class Gcalc_shape_transporter +{ +private: + Gcalc_heap::Info *m_first; + Gcalc_heap::Info *m_prev; + Gcalc_dyn_list::Item **m_prev_hook; + int m_shape_started; + void int_complete(); +protected: + Gcalc_heap *m_heap; + int int_single_point(gcalc_shape_info Info, double x, double y); + int int_add_point(gcalc_shape_info Info, double x, double y); + void int_start_line() + { + DBUG_ASSERT(!m_shape_started); + m_shape_started= 1; + m_first= m_prev= NULL; + } + void int_complete_line() + { + DBUG_ASSERT(m_shape_started== 1); + int_complete(); + m_shape_started= 0; + } + void int_start_ring() + { + DBUG_ASSERT(m_shape_started== 2); + m_shape_started= 3; + m_first= m_prev= NULL; + } + void int_complete_ring() + { + DBUG_ASSERT(m_shape_started== 3); + int_complete(); + m_shape_started= 2; + } + void int_start_poly() + { + DBUG_ASSERT(!m_shape_started); + m_shape_started= 2; + } + void int_complete_poly() + { + DBUG_ASSERT(m_shape_started== 2); + m_shape_started= 0; + } + bool line_started() { return m_shape_started == 1; }; +public: + Gcalc_shape_transporter(Gcalc_heap *heap) : + m_shape_started(0), m_heap(heap) {} + + virtual int single_point(double x, double y)=0; + virtual int start_line()=0; + virtual int complete_line()=0; + virtual int start_poly()=0; + virtual int complete_poly()=0; + virtual int start_ring()=0; + virtual int complete_ring()=0; + virtual int add_point(double x, double y)=0; + virtual int start_collection(int n_objects) { return 0; } + virtual int empty_shape() { return 0; } + int start_simple_poly() + { + return start_poly() || start_ring(); + } + int complete_simple_poly() + { + return complete_ring() || complete_poly(); + } + virtual ~Gcalc_shape_transporter() = default; +}; + + +enum Gcalc_scan_events +{ + scev_none= 0, + scev_point= 1, /* Just a new point in thread */ + scev_thread= 2, /* Start of the new thread */ + scev_two_threads= 4, /* A couple of new threads started */ + scev_intersection= 8, /* Intersection happened */ + scev_end= 16, /* Single thread finished */ + scev_two_ends= 32, /* A couple of threads finished */ + scev_single_point= 64 /* Got single point */ +}; + + +/* + Gcalc_scan_iterator incapsulates the slicescan algorithm. + It takes filled Gcalc_heap as a datasource. Then can be + iterated through the vertexes and intersection points with + the step() method. After the 'step()' one usually observes + the current 'slice' to do the necessary calculations, like + looking for intersections, calculating the area, whatever. +*/ + +class Gcalc_scan_iterator : public Gcalc_dyn_list +{ +public: + class point : public Gcalc_dyn_list::Item + { + public: + Gcalc_coord1 dx; + Gcalc_coord1 dy; + Gcalc_heap::Info *pi; + Gcalc_heap::Info *next_pi; + Gcalc_heap::Info *ev_pi; + const Gcalc_coord1 *l_border; + const Gcalc_coord1 *r_border; + point *ev_next; + + Gcalc_scan_events event; + + inline const point *c_get_next() const + { return (const point *)next; } + inline bool is_bottom() const { return !next_pi; } + gcalc_shape_info get_shape() const { return pi->node.shape.shape; } + inline point *get_next() { return (point *)next; } + inline const point *get_next() const { return (const point *)next; } + /* Compare the dx_dy parameters regarding the horiz_dir */ + /* returns -1 if less, 0 if equal, 1 if bigger */ + static int cmp_dx_dy(const Gcalc_coord1 dx_a, + const Gcalc_coord1 dy_a, + const Gcalc_coord1 dx_b, + const Gcalc_coord1 dy_b); + static int cmp_dx_dy(const Gcalc_heap::Info *p1, + const Gcalc_heap::Info *p2, + const Gcalc_heap::Info *p3, + const Gcalc_heap::Info *p4); + int cmp_dx_dy(const point *p) const; + point **next_ptr() { return (point **) &next; } +#ifndef GCALC_DBUG_OFF + unsigned int thread; +#endif /*GCALC_DBUG_OFF*/ +#ifdef GCALC_CHECK_WITH_FLOAT + void calc_x(long double *x, long double y, long double ix) const; +#endif /*GCALC_CHECK_WITH_FLOAT*/ + }; + + /* That class introduced mostly for the 'typecontrol' reason. */ + /* only difference from the point classis the get_next() function. */ + class event_point : public point + { + public: + inline const event_point *get_next() const + { return (const event_point*) ev_next; } + int simple_event() const + { + return !ev_next ? (event & (scev_point | scev_end)) : + (!ev_next->ev_next && event == scev_two_ends); + } + }; + + class intersection_info : public Gcalc_dyn_list::Item + { + public: + point *edge_a; + point *edge_b; + + Gcalc_coord2 t_a; + Gcalc_coord2 t_b; + int t_calculated; + Gcalc_coord3 x_exp; + int x_calculated; + Gcalc_coord3 y_exp; + int y_calculated; + void calc_t() + {if (!t_calculated) do_calc_t(); } + void calc_y_exp() + { if (!y_calculated) do_calc_y(); } + void calc_x_exp() + { if (!x_calculated) do_calc_x(); } + + void do_calc_t(); + void do_calc_x(); + void do_calc_y(); + }; + + + class slice_state + { + public: + point *slice; + point **event_position_hook; + point *event_end; + const Gcalc_heap::Info *pi; + }; + +public: + Gcalc_scan_iterator(size_t blk_size= 8192); + + GCALC_DECL_TERMINATED_STATE(killed) + + void init(Gcalc_heap *points); /* Iterator can be reused */ + void reset(); + int step(); + + Gcalc_heap::Info *more_points() { return m_cur_pi; } + bool more_trapezoids() + { return m_cur_pi && m_cur_pi->next; } + + const point *get_bottom_points() const + { return m_bottom_points; } + const point *get_event_position() const + { return *state.event_position_hook; } + const point *get_event_end() const + { return state.event_end; } + const event_point *get_events() const + { return (const event_point *) + (*state.event_position_hook == state.event_end ? + m_bottom_points : *state.event_position_hook); } + const point *get_b_slice() const { return state.slice; } + double get_h() const; + double get_y() const; + double get_event_x() const; + double get_sp_x(const point *sp) const; + int intersection_step() const + { return state.pi->type == Gcalc_heap::nt_intersection; } + const Gcalc_heap::Info *get_cur_pi() const + { + return state.pi; + } + +private: + Gcalc_heap *m_heap; + Gcalc_heap::Info *m_cur_pi; + slice_state state; + +#ifndef GCALC_DBUG_OFF + unsigned int m_cur_thread; +#endif /*GCALC_DBUG_OFF*/ + + point *m_bottom_points; + point **m_bottom_hook; + + int node_scan(); + void eq_scan(); + void intersection_scan(); + void remove_bottom_node(); + int insert_top_node(); + int add_intersection(point *sp_a, point *sp_b, + Gcalc_heap::Info *pi_from); + int add_eq_node(Gcalc_heap::Info *node, point *sp); + int add_events_for_node(point *sp_node); + + point *new_slice_point() + { + point *new_point= (point *)new_item(); + return new_point; + } + intersection_info *new_intersection_info(point *a, point *b) + { + intersection_info *ii= (intersection_info *)new_item(); + ii->edge_a= a; + ii->edge_b= b; + ii->t_calculated= ii->x_calculated= ii->y_calculated= 0; + return ii; + } + int arrange_event(int do_sorting, int n_intersections); + static double get_pure_double(const Gcalc_internal_coord *d, int d_len); +}; + + +/* + Gcalc_trapezoid_iterator simplifies the calculations on + the current slice of the Gcalc_scan_iterator. + One can walk through the trapezoids formed between + previous and current slices. +*/ + +#ifdef TMP_BLOCK +class Gcalc_trapezoid_iterator +{ +protected: + const Gcalc_scan_iterator::point *sp0; + const Gcalc_scan_iterator::point *sp1; +public: + Gcalc_trapezoid_iterator(const Gcalc_scan_iterator *scan_i) : + sp0(scan_i->get_b_slice()), + sp1(scan_i->get_t_slice()) + {} + + inline bool more() const { return sp1 && sp1->next; } + + const Gcalc_scan_iterator::point *lt() const { return sp1; } + const Gcalc_scan_iterator::point *lb() const { return sp0; } + const Gcalc_scan_iterator::point *rb() const + { + const Gcalc_scan_iterator::point *result= sp0; + while ((result= result->c_get_next())->is_bottom()) + {} + return result; + } + const Gcalc_scan_iterator::point *rt() const + { return sp1->c_get_next(); } + + void operator++() + { + sp0= rb(); + sp1= rt(); + } +}; +#endif /*TMP_BLOCK*/ + + +/* + Gcalc_point_iterator simplifies the calculations on + the current slice of the Gcalc_scan_iterator. + One can walk through the points on the current slice. +*/ + +class Gcalc_point_iterator +{ +protected: + const Gcalc_scan_iterator::point *sp; +public: + Gcalc_point_iterator(const Gcalc_scan_iterator *scan_i): + sp(scan_i->get_b_slice()) + {} + + inline bool more() const { return sp != NULL; } + inline void operator++() { sp= sp->c_get_next(); } + inline const Gcalc_scan_iterator::point *point() const { return sp; } + inline const Gcalc_heap::Info *get_pi() const { return sp->pi; } + inline gcalc_shape_info get_shape() const { return sp->get_shape(); } + inline void restart(const Gcalc_scan_iterator *scan_i) + { sp= scan_i->get_b_slice(); } +}; + +#endif /*GCALC_SLICESCAN_INCLUDED*/ + diff --git a/sql/gcalc_tools.cc b/sql/gcalc_tools.cc new file mode 100644 index 00000000..25c80a7a --- /dev/null +++ b/sql/gcalc_tools.cc @@ -0,0 +1,1471 @@ +/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved. + Copyright (C) 2011 Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mariadb.h" + +#ifdef HAVE_SPATIAL + +#include "gcalc_tools.h" +#include "spatial.h" + +#define float_to_coord(d) ((double) d) + + +/* + Adds new shape to the relation. + After that it can be used as an argument of an operation. +*/ + +gcalc_shape_info Gcalc_function::add_new_shape(uint32 shape_id, + shape_type shape_kind) +{ + shapes_buffer.q_append((uint32) shape_kind); + return n_shapes++; +} + + +/* + Adds new operation to the constructed relation. + To construct the complex relation one has to specify operations + in prefix style. +*/ + +void Gcalc_function::add_operation(uint operation, uint32 n_operands) +{ + uint32 op_code= (uint32 ) operation + n_operands; + function_buffer.q_append(op_code); +} + + +/* + Sometimes the number of arguments is unknown at the moment the operation + is added. That allows to specify it later. +*/ + +void Gcalc_function::add_operands_to_op(uint32 operation_pos, uint32 n_operands) +{ + uint32 op_code= uint4korr(function_buffer.ptr() + operation_pos) + n_operands; + function_buffer.write_at_position(operation_pos, op_code); +} + + +/* + Just like the add_operation() but the result will be the inverted + value of an operation. +*/ + +void Gcalc_function::add_not_operation(op_type operation, uint32 n_operands) +{ + uint32 op_code= ((uint32) op_not | (uint32 ) operation) + n_operands; + function_buffer.q_append(op_code); +} + + +int Gcalc_function::single_shape_op(shape_type shape_kind, gcalc_shape_info *si) +{ + if (reserve_shape_buffer(1) || reserve_op_buffer(1)) + return 1; + *si= add_new_shape(0, shape_kind); + add_operation(op_shape, *si); + return 0; +} + + +int Gcalc_function::repeat_expression(uint32 exp_pos) +{ + if (reserve_op_buffer(1)) + return 1; + add_operation(op_repeat, exp_pos); + return 0; +} + + +/* + Specify how many arguments we're going to have. +*/ + +int Gcalc_function::reserve_shape_buffer(uint n_shapes) +{ + return shapes_buffer.reserve(n_shapes * 4, 512); +} + + +/* + Specify how many operations we're going to have. +*/ + +int Gcalc_function::reserve_op_buffer(uint n_ops) +{ + return function_buffer.reserve(n_ops * 4, 512); +} + + +int Gcalc_function::alloc_states() +{ + if (function_buffer.reserve((n_shapes+1) * 2 * sizeof(int))) + return 1; + i_states= (int *) (function_buffer.ptr() + ALIGN_SIZE(function_buffer.length())); + b_states= i_states + (n_shapes + 1); + return 0; +} + + +int Gcalc_function::count_internal(const char *cur_func, uint set_type, + const char **end) +{ + uint c_op= uint4korr(cur_func); + op_type next_func= (op_type) (c_op & op_any); + int mask= (c_op & op_not) ? 1:0; + uint n_ops= c_op & ~(op_any | op_not | v_mask); + uint n_shape= c_op & ~(op_any | op_not | v_mask); /* same as n_ops */ + op_type v_state= (op_type) (c_op & v_mask); + int result= 0; + const char *sav_cur_func= cur_func; + + // GCALC_DBUG_ENTER("Gcalc_function::count_internal"); + + cur_func+= 4; + if (next_func == op_shape) + { + if (set_type == 0) + result= i_states[n_shape] | b_states[n_shape]; + /* the last call for the count_internal outside of all shapes. */ + else if (set_type == 1) + result= 0; + else if (set_type == op_border) + result= b_states[n_shape]; + else if (set_type == op_internals) + result= i_states[n_shape] && !b_states[n_shape]; + goto exit; + } + + if (next_func == op_false) + { + result= 0; + goto exit; + } + + if (next_func == op_border || next_func == op_internals) + { + result= count_internal(cur_func, + (set_type == 1) ? set_type : next_func, &cur_func); + goto exit; + } + + if (next_func == op_repeat) + { + result= count_internal(function_buffer.ptr() + n_ops, set_type, 0); + goto exit; + } + + if (n_ops == 0) + return mask; + //GCALC_DBUG_RETURN(mask); + + result= count_internal(cur_func, set_type, &cur_func); + + while (--n_ops) + { + int next_res= count_internal(cur_func, set_type, &cur_func); + switch (next_func) + { + case op_union: + if (result == result_true || next_res == result_true) + result= result_true; + else if (result == result_unknown || next_res == result_unknown) + result= result_unknown; + else + result= result_false; + break; + case op_intersection: + if (result == result_false || next_res == result_false) + result= result_false; + else if (result == result_unknown || next_res == result_unknown) + result= result_unknown; + else + result= result_true; + break; + case op_symdifference: + if (result == result_unknown || next_res == result_unknown) + result= result_unknown; + else + result= result ^ next_res; + break; + case op_difference: + if (result == result_false || next_res == result_true) + result= result_false; + else if (result == result_unknown || next_res == result_unknown) + result= result_unknown; + else + result= result_true; + break; + default: + GCALC_DBUG_ASSERT(FALSE); + }; + } + +exit: + if (result != result_unknown) + result^= mask; + if (v_state != v_empty) + { + switch (v_state) + { + case v_find_t: + if (result == result_true) + { + c_op= (c_op & ~v_mask) | v_t_found; + int4store(sav_cur_func, c_op); + } + else + { + if (set_type != 1) + result= result_unknown; + } + break; + case v_find_f: + if (result == result_false) + { + c_op= (c_op & ~v_mask) | v_f_found; + int4store(sav_cur_func, c_op); + } + else + { + if (set_type != 1) + result= result_unknown; + } + break; + case v_t_found: + result= 1; + break; + case v_f_found: + result= 0; + break; + default: + GCALC_DBUG_ASSERT(0); + }; + } + + if (end) + *end= cur_func; + return result; + //GCALC_DBUG_RETURN(result); +} + + +void Gcalc_function::clear_i_states() +{ + for (uint i= 0; i < n_shapes; i++) + i_states[i]= 0; +} + + +void Gcalc_function::clear_b_states() +{ + for (uint i= 0; i < n_shapes; i++) + b_states[i]= 0; +} + + +/* + Clear the state of the object. +*/ + +void Gcalc_function::reset() +{ + n_shapes= 0; + shapes_buffer.length(0); + function_buffer.length(0); +} + + +int Gcalc_function::check_function(Gcalc_scan_iterator &scan_it) +{ + const Gcalc_scan_iterator::point *eq_start, *cur_eq; + const Gcalc_scan_iterator::event_point *events; + int result; + GCALC_DBUG_ENTER("Gcalc_function::check_function"); + + while (scan_it.more_points()) + { + if (scan_it.step()) + GCALC_DBUG_RETURN(-1); + events= scan_it.get_events(); + + /* these kinds of events don't change the function */ + Gcalc_point_iterator pit(&scan_it); + clear_b_states(); + clear_i_states(); + /* Walk to the event, marking polygons we met */ + for (; pit.point() != scan_it.get_event_position(); ++pit) + { + gcalc_shape_info si= pit.point()->get_shape(); + if ((get_shape_kind(si) == Gcalc_function::shape_polygon)) + invert_i_state(si); + } + if (events->simple_event()) + { + if (events->event == scev_end) + set_b_state(events->get_shape()); + + if ((result= count()) != result_unknown) + GCALC_DBUG_RETURN(result); + clear_b_states(); + continue; + } + + /* Check the status of the event point */ + for (; events; events= events->get_next()) + { + gcalc_shape_info si= events->get_shape(); + if (events->event == scev_thread || + events->event == scev_end || + (get_shape_kind(si) == Gcalc_function::shape_polygon)) + set_b_state(si); + else if (events->event == scev_single_point || + get_shape_kind(si) == Gcalc_function::shape_line) + set_i_state(si); + } + + if ((result= count()) != result_unknown) + GCALC_DBUG_RETURN(result); + + /* Set back states changed in the loop above. */ + for (events= scan_it.get_events(); events; events= events->get_next()) + { + gcalc_shape_info si= events->get_shape(); + if (events->event == scev_thread || + events->event == scev_end || + get_shape_kind(si) == Gcalc_function::shape_polygon) + clear_b_state(si); + else if (events->event == scev_single_point || + get_shape_kind(si) == Gcalc_function::shape_line) + clear_i_state(si); + } + + if (scan_it.get_event_position() == scan_it.get_event_end()) + continue; + + /* Check the status after the event */ + eq_start= pit.point(); + do + { + ++pit; + if (pit.point() != scan_it.get_event_end() && + eq_start->cmp_dx_dy(pit.point()) == 0) + continue; + for (cur_eq= eq_start; cur_eq != pit.point(); + cur_eq= cur_eq->get_next()) + { + gcalc_shape_info si= cur_eq->get_shape(); + if (get_shape_kind(si) == Gcalc_function::shape_polygon) + set_b_state(si); + else + invert_i_state(si); + } + if ((result= count()) != result_unknown) + GCALC_DBUG_RETURN(result); + + for (cur_eq= eq_start; cur_eq != pit.point(); cur_eq= cur_eq->get_next()) + { + gcalc_shape_info si= cur_eq->get_shape(); + if ((get_shape_kind(si) == Gcalc_function::shape_polygon)) + { + clear_b_state(si); + invert_i_state(si); + } + else + invert_i_state(cur_eq->get_shape()); + } + if ((result= count()) != result_unknown) + GCALC_DBUG_RETURN(result); + + eq_start= pit.point(); + } while (pit.point() != scan_it.get_event_end()); + } + GCALC_DBUG_RETURN(count_last()); +} + + +int Gcalc_operation_transporter::single_point(double x, double y) +{ + gcalc_shape_info si; + return m_fn->single_shape_op(Gcalc_function::shape_point, &si) || + int_single_point(si, x, y); +} + + +int Gcalc_operation_transporter::start_line() +{ + int_start_line(); + return m_fn->single_shape_op(Gcalc_function::shape_line, &m_si); +} + + +int Gcalc_operation_transporter::complete_line() +{ + int_complete_line(); + return 0; +} + + +int Gcalc_operation_transporter::start_poly() +{ + int_start_poly(); + return m_fn->single_shape_op(Gcalc_function::shape_polygon, &m_si); +} + + +int Gcalc_operation_transporter::complete_poly() +{ + int_complete_poly(); + return 0; +} + + +int Gcalc_operation_transporter::start_ring() +{ + int_start_ring(); + return 0; +} + + +int Gcalc_operation_transporter::complete_ring() +{ + int_complete_ring(); + return 0; +} + + +int Gcalc_operation_transporter::add_point(double x, double y) +{ + return int_add_point(m_si, x, y); +} + + +int Gcalc_operation_transporter::start_collection(int n_objects) +{ + if (m_fn->reserve_shape_buffer(n_objects) || m_fn->reserve_op_buffer(1)) + return 1; + m_fn->add_operation(Gcalc_function::op_union, n_objects); + return 0; +} + + +int Gcalc_operation_transporter::empty_shape() +{ + if (m_fn->reserve_op_buffer(1)) + return 1; + m_fn->add_operation(Gcalc_function::op_false, 0); + return 0; +} + + +int Gcalc_result_receiver::start_shape(Gcalc_function::shape_type shape) +{ + GCALC_DBUG_ENTER("Gcalc_result_receiver::start_shape"); + if (buffer.reserve(4*2, 512)) + GCALC_DBUG_RETURN(1); + cur_shape= shape; + shape_pos= buffer.length(); + buffer.length(shape_pos + ((shape == Gcalc_function::shape_point) ? 4:8)); + n_points= 0; + shape_area= 0.0; + + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_result_receiver::add_point(double x, double y) +{ + GCALC_DBUG_ENTER("Gcalc_result_receiver::add_point"); + if (n_points && x == prev_x && y == prev_y) + GCALC_DBUG_RETURN(0); + + if (!n_points++) + { + prev_x= first_x= x; + prev_y= first_y= y; + GCALC_DBUG_RETURN(0); + } + + shape_area+= prev_x*y - prev_y*x; + + if (buffer.reserve(8*2, 512)) + GCALC_DBUG_RETURN(1); + buffer.q_append(prev_x); + buffer.q_append(prev_y); + prev_x= x; + prev_y= y; + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_result_receiver::complete_shape() +{ + GCALC_DBUG_ENTER("Gcalc_result_receiver::complete_shape"); + if (n_points == 0) + { + buffer.length(shape_pos); + GCALC_DBUG_RETURN(0); + } + if (n_points == 1) + { + if (cur_shape != Gcalc_function::shape_point) + { + if (cur_shape == Gcalc_function::shape_hole) + { + buffer.length(shape_pos); + GCALC_DBUG_RETURN(0); + } + cur_shape= Gcalc_function::shape_point; + buffer.length(buffer.length()-4); + } + } + else + { + GCALC_DBUG_ASSERT(cur_shape != Gcalc_function::shape_point); + if (cur_shape == Gcalc_function::shape_hole) + { + shape_area+= prev_x*first_y - prev_y*first_x; + if (fabs(shape_area) < 1e-8) + { + buffer.length(shape_pos); + GCALC_DBUG_RETURN(0); + } + } + + if ((cur_shape == Gcalc_function::shape_polygon || + cur_shape == Gcalc_function::shape_hole) && + prev_x == first_x && prev_y == first_y) + { + n_points--; + buffer.write_at_position(shape_pos+4, n_points); + goto do_complete; + } + buffer.write_at_position(shape_pos+4, n_points); + } + + if (buffer.reserve(8*2, 512)) + GCALC_DBUG_RETURN(1); + buffer.q_append(prev_x); + buffer.q_append(prev_y); + +do_complete: + buffer.write_at_position(shape_pos, (uint32) cur_shape); + + if (!n_shapes++) + { + GCALC_DBUG_ASSERT(cur_shape != Gcalc_function::shape_hole); + common_shapetype= cur_shape; + } + else if (cur_shape == Gcalc_function::shape_hole) + { + ++n_holes; + } + else if (!collection_result && (cur_shape != common_shapetype)) + { + collection_result= true; + } + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_result_receiver::single_point(double x, double y) +{ + return start_shape(Gcalc_function::shape_point) || + add_point(x, y) || + complete_shape(); +} + + +int Gcalc_result_receiver::done() +{ + return 0; +} + + +void Gcalc_result_receiver::reset() +{ + buffer.length(0); + collection_result= FALSE; + n_shapes= n_holes= 0; +} + + +int Gcalc_result_receiver::get_result_typeid() +{ + if (!n_shapes || collection_result) + return Geometry::wkb_geometrycollection; + + switch (common_shapetype) + { + case Gcalc_function::shape_polygon: + return (n_shapes - n_holes == 1) ? + Geometry::wkb_polygon : Geometry::wkb_multipolygon; + case Gcalc_function::shape_point: + return (n_shapes == 1) ? Geometry::wkb_point : Geometry::wkb_multipoint; + case Gcalc_function::shape_line: + return (n_shapes == 1) ? Geometry::wkb_linestring : + Geometry::wkb_multilinestring; + default: + GCALC_DBUG_ASSERT(0); + } + return 0; +} + + +int Gcalc_result_receiver::move_hole(uint32 dest_position, uint32 source_position, + uint32 *position_shift) +{ + char *ptr; + int source_len; + GCALC_DBUG_ENTER("Gcalc_result_receiver::move_hole"); + GCALC_DBUG_PRINT(("ps %d %d", dest_position, source_position)); + + *position_shift= source_len= buffer.length() - source_position; + + if (dest_position == source_position) + GCALC_DBUG_RETURN(0); + + if (buffer.reserve(source_len, MY_ALIGN(source_len, 512))) + GCALC_DBUG_RETURN(1); + + ptr= (char *) buffer.ptr(); + memmove(ptr + dest_position + source_len, ptr + dest_position, + buffer.length() - dest_position); + memcpy(ptr + dest_position, ptr + buffer.length(), source_len); + GCALC_DBUG_RETURN(0); +} + + +Gcalc_operation_reducer::Gcalc_operation_reducer(size_t blk_size) : + Gcalc_dyn_list(blk_size, sizeof(res_point)), +#ifndef GCALC_DBUG_OFF + n_res_points(0), +#endif /*GCALC_DBUG_OFF*/ + m_res_hook((Gcalc_dyn_list::Item **)&m_result), + m_first_active_thread(NULL) +{} + + +Gcalc_operation_reducer::Gcalc_operation_reducer( + const Gcalc_operation_reducer &gor) : + Gcalc_dyn_list(gor), +#ifndef GCALC_DBUG_OFF + n_res_points(0), +#endif /*GCALC_DBUG_OFF*/ + m_res_hook((Gcalc_dyn_list::Item **)&m_result), + m_first_active_thread(NULL) +{} + + +void Gcalc_operation_reducer::init(Gcalc_function *fn, modes mode) +{ + m_fn= fn; + m_mode= mode; + m_first_active_thread= NULL; + m_lines= NULL; + m_lines_hook= (Gcalc_dyn_list::Item **) &m_lines; + m_poly_borders= NULL; + m_poly_borders_hook= (Gcalc_dyn_list::Item **) &m_poly_borders; + GCALC_SET_TERMINATED(killed, 0); +} + + +Gcalc_operation_reducer:: +Gcalc_operation_reducer(Gcalc_function *fn, modes mode, size_t blk_size) : + Gcalc_dyn_list(blk_size, sizeof(res_point)), + m_res_hook((Gcalc_dyn_list::Item **)&m_result) +{ + init(fn, mode); +} + + +void Gcalc_operation_reducer::res_point::set(const Gcalc_scan_iterator *si) +{ + intersection_point= si->intersection_step(); + pi= si->get_cur_pi(); +} + + +Gcalc_operation_reducer::res_point * + Gcalc_operation_reducer::add_res_point(Gcalc_function::shape_type type) +{ + GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_res_point"); + res_point *result= (res_point *)new_item(); + *m_res_hook= result; + result->prev_hook= m_res_hook; + m_res_hook= &result->next; + result->type= type; +#ifndef GCALC_DBUG_OFF + result->point_n= n_res_points++; +#endif /*GCALC_DBUG_OFF*/ + GCALC_DBUG_RETURN(result); +} + +int Gcalc_operation_reducer::add_line(int incoming, active_thread *t, + const Gcalc_scan_iterator::point *p) +{ + line *l= new_line(); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_line"); + if (!l) + GCALC_DBUG_RETURN(1); + l->incoming= incoming; + l->t= t; + l->p= p; + *m_lines_hook= l; + m_lines_hook= &l->next; + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::add_poly_border(int incoming, + active_thread *t, int prev_state, const Gcalc_scan_iterator::point *p) +{ + poly_border *b= new_poly_border(); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_poly_border"); + if (!b) + GCALC_DBUG_RETURN(1); + b->incoming= incoming; + b->t= t; + b->prev_state= prev_state; + b->p= p; + *m_poly_borders_hook= b; + m_poly_borders_hook= &b->next; + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::continue_range(active_thread *t, + const Gcalc_heap::Info *p, + const Gcalc_heap::Info *p_next) +{ + res_point *rp= add_res_point(t->rp->type); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::continue_range"); + if (!rp) + GCALC_DBUG_RETURN(1); + rp->glue= NULL; + rp->down= t->rp; + t->rp->up= rp; + rp->intersection_point= false; + rp->pi= p; + t->rp= rp; + t->p1= p; + t->p2= p_next; + GCALC_DBUG_RETURN(0); +} + + +inline int Gcalc_operation_reducer::continue_i_range(active_thread *t, + const Gcalc_heap::Info *ii) +{ + res_point *rp= add_res_point(t->rp->type); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::continue_i_range"); + if (!rp) + GCALC_DBUG_RETURN(1); + rp->glue= NULL; + rp->down= t->rp; + t->rp->up= rp; + rp->intersection_point= true; + rp->pi= ii; + t->rp= rp; + GCALC_DBUG_RETURN(0); +} + +int Gcalc_operation_reducer::end_couple(active_thread *t0, active_thread *t1, + const Gcalc_heap::Info *p) +{ + res_point *rp0, *rp1; + GCALC_DBUG_ENTER("Gcalc_operation_reducer::end_couple"); + GCALC_DBUG_ASSERT(t0->rp->type == t1->rp->type); + if (!(rp0= add_res_point(t0->rp->type)) || + !(rp1= add_res_point(t0->rp->type))) + GCALC_DBUG_RETURN(1); + rp0->down= t0->rp; + rp1->down= t1->rp; + rp1->glue= rp0; + rp0->glue= rp1; + rp0->up= rp1->up= NULL; + t0->rp->up= rp0; + t1->rp->up= rp1; + rp0->intersection_point= rp1->intersection_point= false; + rp0->pi= rp1->pi= p; + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::count_slice(Gcalc_scan_iterator *si) +{ + Gcalc_point_iterator pi(si); + int prev_state= 0; + int sav_prev_state; + active_thread *prev_range= NULL; + const Gcalc_scan_iterator::event_point *events; + const Gcalc_scan_iterator::point *eq_start; + active_thread **cur_t_hook= &m_first_active_thread; + active_thread **starting_t_hook; + active_thread *bottom_threads= NULL; + active_thread *eq_thread, *point_thread;; + GCALC_DBUG_ENTER("Gcalc_operation_reducer::count_slice"); + + m_fn->clear_i_states(); + /* Walk to the event, remembering what is needed. */ + for (; pi.point() != si->get_event_position(); + ++pi, cur_t_hook= (active_thread **) &(*cur_t_hook)->next) + { + active_thread *cur_t= *cur_t_hook; + if (cur_t->enabled() && + cur_t->rp->type == Gcalc_function::shape_polygon) + { + prev_state^= 1; + prev_range= prev_state ? cur_t : 0; + } + if (m_fn->get_shape_kind(pi.get_shape()) == Gcalc_function::shape_polygon) + m_fn->invert_i_state(pi.get_shape()); + } + + events= si->get_events(); + if (events->simple_event()) + { + active_thread *cur_t= *cur_t_hook; + switch (events->event) + { + case scev_point: + { + if (cur_t->enabled() && + continue_range(cur_t, events->pi, events->next_pi)) + GCALC_DBUG_RETURN(1); + break; + } + case scev_end: + { + if (cur_t->enabled() && end_line(cur_t, si)) + GCALC_DBUG_RETURN(1); + *cur_t_hook= cur_t->get_next(); + free_item(cur_t); + break; + } + case scev_two_ends: + { + if (cur_t->enabled() && cur_t->get_next()->enabled()) + { + /* When two threads are ended here */ + if (end_couple(cur_t, cur_t->get_next(), events->pi)) + GCALC_DBUG_RETURN(1); + } + else if (cur_t->enabled() || cur_t->get_next()->enabled()) + { + /* Rare case when edges of a polygon coincide */ + if (end_line(cur_t->enabled() ? cur_t : cur_t->get_next(), si)) + GCALC_DBUG_RETURN(1); + } + *cur_t_hook= cur_t->get_next()->get_next(); + free_item(cur_t->next); + free_item(cur_t); + break; + } + default: + GCALC_DBUG_ASSERT(0); + } + GCALC_DBUG_RETURN(0); + } + + starting_t_hook= cur_t_hook; + sav_prev_state= prev_state; + + /* Walk through the event, collecting all the 'incoming' threads */ + for (; events; events= events->get_next()) + { + active_thread *cur_t= *cur_t_hook; + + if (events->event == scev_single_point) + continue; + + if (events->event == scev_thread || + events->event == scev_two_threads) + { + active_thread *new_t= new_active_thread(); + if (!new_t) + GCALC_DBUG_RETURN(1); + new_t->rp= NULL; + /* Insert into the main thread list before the current */ + new_t->next= cur_t; + *cur_t_hook= new_t; + cur_t_hook= (active_thread **) &new_t->next; + } + else + { + if (events->is_bottom()) + { + /* Move thread from the main list to the bottom_threads. */ + *cur_t_hook= cur_t->get_next(); + cur_t->next= bottom_threads; + bottom_threads= cur_t; + } + if (cur_t->enabled()) + { + if (cur_t->rp->type == Gcalc_function::shape_line) + { + GCALC_DBUG_ASSERT(!prev_state); + add_line(1, cur_t, events); + } + else + { + add_poly_border(1, cur_t, prev_state, events); + prev_state^= 1; + } + if (!events->is_bottom()) + { + active_thread *new_t= new_active_thread(); + if (!new_t) + GCALC_DBUG_RETURN(1); + new_t->rp= NULL; + /* Replace the current thread with the new. */ + new_t->next= cur_t->next; + *cur_t_hook= new_t; + cur_t_hook= (active_thread **) &new_t->next; + /* And move old to the bottom list */ + cur_t->next= bottom_threads; + bottom_threads= cur_t; + } + } + else if (!events->is_bottom()) + cur_t_hook= (active_thread **) &cur_t->next; + } + } + prev_state= sav_prev_state; + cur_t_hook= starting_t_hook; + + eq_start= pi.point(); + eq_thread= point_thread= *starting_t_hook; + m_fn->clear_b_states(); + while (eq_start != si->get_event_end()) + { + const Gcalc_scan_iterator::point *cur_eq; + int in_state, after_state; + + ++pi; + point_thread= point_thread->get_next(); + + if (pi.point() != si->get_event_end() && + eq_start->cmp_dx_dy(pi.point()) == 0) + continue; + + for (cur_eq= eq_start; cur_eq != pi.point(); cur_eq= cur_eq->get_next()) + m_fn->set_b_state(cur_eq->get_shape()); + in_state= m_fn->count(); + + m_fn->clear_b_states(); + for (cur_eq= eq_start; cur_eq != pi.point(); cur_eq= cur_eq->get_next()) + { + gcalc_shape_info si= cur_eq->get_shape(); + if ((m_fn->get_shape_kind(si) == Gcalc_function::shape_polygon)) + m_fn->invert_i_state(si); + } + after_state= m_fn->count(); + if (prev_state != after_state) + { + if (add_poly_border(0, eq_thread, prev_state, eq_start)) + GCALC_DBUG_RETURN(1); + } + else if (!prev_state /* &&!after_state */ && in_state) + { + if (add_line(0, eq_thread, eq_start)) + GCALC_DBUG_RETURN(1); + } + + prev_state= after_state; + eq_start= pi.point(); + eq_thread= point_thread; + } + + if (!sav_prev_state && !m_poly_borders && !m_lines) + { + /* Check if we need to add the event point itself */ + m_fn->clear_i_states(); + /* b_states supposed to be clean already */ + for (pi.restart(si); pi.point() != si->get_event_position(); ++pi) + { + if (m_fn->get_shape_kind(pi.get_shape()) == Gcalc_function::shape_polygon) + m_fn->invert_i_state(pi.get_shape()); + } + for (events= si->get_events(); events; events= events->get_next()) + m_fn->set_b_state(events->get_shape()); + + GCALC_DBUG_RETURN(m_fn->count() ? add_single_point(si) : 0); + } + + if (m_poly_borders) + { + *m_poly_borders_hook= NULL; + while (m_poly_borders) + { + poly_border *pb1, *pb2; + pb1= m_poly_borders; + GCALC_DBUG_ASSERT(m_poly_borders->next); + + pb2= get_pair_border(pb1); + /* Remove pb1 from the list. The pb2 already removed in get_pair_border. */ + m_poly_borders= pb1->get_next(); + if (connect_threads(pb1->incoming, pb2->incoming, + pb1->t, pb2->t, pb1->p, pb2->p, + prev_range, si, Gcalc_function::shape_polygon)) + GCALC_DBUG_RETURN(1); + + free_item(pb1); + free_item(pb2); + } + m_poly_borders_hook= (Gcalc_dyn_list::Item **) &m_poly_borders; + m_poly_borders= NULL; + } + + if (m_lines) + { + *m_lines_hook= NULL; + if (m_lines->get_next() && + !m_lines->get_next()->get_next()) + { + if (connect_threads(m_lines->incoming, m_lines->get_next()->incoming, + m_lines->t, m_lines->get_next()->t, + m_lines->p, m_lines->get_next()->p, + NULL, si, Gcalc_function::shape_line)) + GCALC_DBUG_RETURN(1); + } + else + { + for (line *cur_line= m_lines; cur_line; cur_line= cur_line->get_next()) + { + if (cur_line->incoming) + { + if (end_line(cur_line->t, si)) + GCALC_DBUG_RETURN(1); + } + else + start_line(cur_line->t, cur_line->p, si); + } + } + free_list(m_lines); + m_lines= NULL; + m_lines_hook= (Gcalc_dyn_list::Item **) &m_lines; + } + + if (bottom_threads) + free_list(bottom_threads); + + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::add_single_point(const Gcalc_scan_iterator *si) +{ + res_point *rp= add_res_point(Gcalc_function::shape_point); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_single_point"); + if (!rp) + GCALC_DBUG_RETURN(1); + rp->glue= rp->up= rp->down= NULL; + rp->set(si); + GCALC_DBUG_RETURN(0); +} + + +Gcalc_operation_reducer::poly_border + *Gcalc_operation_reducer::get_pair_border(poly_border *b1) +{ + poly_border *prev_b= b1; + poly_border *result= b1->get_next(); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_pair_border"); + if (b1->prev_state) + { + if (b1->incoming) + { + /* Find the first outgoing, otherwise the last one. */ + while (result->incoming && result->get_next()) + { + prev_b= result; + result= result->get_next(); + } + } + else + { + /* Get the last one */ + while (result->get_next()) + { + prev_b= result; + result= result->get_next(); + } + } + } + else /* !b1->prev_state */ + { + if (b1->incoming) + { + /* Get the next incoming, otherwise the last one. */ + while (!result->incoming && result->get_next()) + { + prev_b= result; + result= result->get_next(); + } + } + else + { + /* Just pick the next one */ + } + } + /* Delete the result from the list. */ + prev_b->next= result->next; + GCALC_DBUG_RETURN(result); +} + + +int Gcalc_operation_reducer::connect_threads( + int incoming_a, int incoming_b, + active_thread *ta, active_thread *tb, + const Gcalc_scan_iterator::point *pa, const Gcalc_scan_iterator::point *pb, + active_thread *prev_range, + const Gcalc_scan_iterator *si, Gcalc_function::shape_type s_t) +{ + GCALC_DBUG_ENTER("Gcalc_operation_reducer::connect_threads"); + GCALC_DBUG_PRINT(("incoming %d %d", incoming_a, incoming_b)); + if (incoming_a && incoming_b) + { + res_point *rpa, *rpb; + GCALC_DBUG_ASSERT(ta->rp->type == tb->rp->type); + if (!(rpa= add_res_point(ta->rp->type)) || + !(rpb= add_res_point(ta->rp->type))) + GCALC_DBUG_RETURN(1); + rpa->down= ta->rp; + rpb->down= tb->rp; + rpb->glue= rpa; + rpa->glue= rpb; + rpa->up= rpb->up= NULL; + ta->rp->up= rpa; + tb->rp->up= rpb; + rpa->set(si); + rpb->set(si); + ta->rp= tb->rp= NULL; + GCALC_DBUG_RETURN(0); + } + if (!incoming_a) + { + GCALC_DBUG_ASSERT(!incoming_b); + + res_point *rp0, *rp1; + if (!(rp0= add_res_point(s_t)) || !(rp1= add_res_point(s_t))) + GCALC_DBUG_RETURN(1); + rp0->glue= rp1; + rp1->glue= rp0; + rp0->set(si); + rp1->set(si); + rp0->down= rp1->down= NULL; + ta->rp= rp0; + tb->rp= rp1; + ta->p1= pa->pi; + ta->p2= pa->next_pi; + + tb->p1= pb->pi; + tb->p2= pb->next_pi; + + if (prev_range) + { + rp0->outer_poly= prev_range->thread_start; + tb->thread_start= prev_range->thread_start; + /* Check if needed */ + ta->thread_start= prev_range->thread_start; + } + else + { + rp0->outer_poly= 0; + ta->thread_start= rp0; + /* Check if needed */ + tb->thread_start= rp0; + } + GCALC_DBUG_RETURN(0); + } + /* else, if only ta is incoming */ + + GCALC_DBUG_ASSERT(tb != ta); + tb->rp= ta->rp; + tb->thread_start= ta->thread_start; + if (Gcalc_scan_iterator::point:: + cmp_dx_dy(ta->p1, ta->p2, pb->pi, pb->next_pi) != 0) + { + if (si->intersection_step() ? + continue_i_range(tb, si->get_cur_pi()) : + continue_range(tb, si->get_cur_pi(), pb->next_pi)) + GCALC_DBUG_RETURN(1); + } + tb->p1= pb->pi; + tb->p2= pb->next_pi; + + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::start_line(active_thread *t, + const Gcalc_scan_iterator::point *p, + const Gcalc_scan_iterator *si) +{ + res_point *rp= add_res_point(Gcalc_function::shape_line); + GCALC_DBUG_ENTER("Gcalc_operation_reducer::start_line"); + if (!rp) + GCALC_DBUG_RETURN(1); + rp->glue= rp->down= NULL; + rp->set(si); + t->rp= rp; + t->p1= p->pi; + t->p2= p->next_pi; + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::end_line(active_thread *t, + const Gcalc_scan_iterator *si) +{ + GCALC_DBUG_ENTER("Gcalc_operation_reducer::end_line"); + GCALC_DBUG_ASSERT(t->rp->type == Gcalc_function::shape_line); + res_point *rp= add_res_point(Gcalc_function::shape_line); + if (!rp) + GCALC_DBUG_RETURN(1); + rp->glue= rp->up= NULL; + rp->down= t->rp; + rp->set(si); + t->rp->up= rp; + t->rp= NULL; + + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::count_all(Gcalc_heap *hp) +{ + Gcalc_scan_iterator si; + GCALC_DBUG_ENTER("Gcalc_operation_reducer::count_all"); + si.init(hp); + GCALC_SET_TERMINATED(si.killed, killed); + while (si.more_points()) + { + if (si.step()) + GCALC_DBUG_RETURN(1); + if (count_slice(&si)) + GCALC_DBUG_RETURN(1); + } + GCALC_DBUG_RETURN(0); +} + +inline void Gcalc_operation_reducer::free_result(res_point *res) +{ + if ((*res->prev_hook= res->next)) + { + res->get_next()->prev_hook= res->prev_hook; + } + free_item(res); +} + + +inline int Gcalc_operation_reducer::get_single_result(res_point *res, + Gcalc_result_receiver *storage) +{ + GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_single_result"); + if (res->intersection_point) + { + double x, y; + res->pi->calc_xy(&x, &y); + if (storage->single_point(x,y)) + GCALC_DBUG_RETURN(1); + } + else + if (storage->single_point(res->pi->node.shape.x, res->pi->node.shape.y)) + GCALC_DBUG_RETURN(1); + free_result(res); + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::get_result_thread(res_point *cur, + Gcalc_result_receiver *storage, + int move_upward, + res_point *first_poly_node) +{ + res_point *next; + bool glue_step= false; + double x, y; + GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_result_thread"); + while (cur) + { + if (!glue_step) + { + if (cur->intersection_point) + { + cur->pi->calc_xy(&x, &y); + } + else + { + x= cur->pi->node.shape.x; + y= cur->pi->node.shape.y; + } + if (storage->add_point(x, y)) + GCALC_DBUG_RETURN(1); + } + + next= move_upward ? cur->up : cur->down; + if (!next && !glue_step) + { + next= cur->glue; + move_upward^= 1; + glue_step= true; + if (next) + next->glue= NULL; + } + else + glue_step= false; + + cur->first_poly_node= first_poly_node; + free_result(cur); + cur= next; + } + GCALC_DBUG_RETURN(0); +} + + +int Gcalc_operation_reducer::get_polygon_result(res_point *cur, + Gcalc_result_receiver *storage, + res_point *first_poly_node) +{ + GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_polygon_result"); + res_point *glue= cur->glue; + glue->up->down= NULL; + free_result(glue); + GCALC_DBUG_RETURN(get_result_thread(cur, storage, 1, first_poly_node) || + storage->complete_shape()); +} + + +int Gcalc_operation_reducer::get_line_result(res_point *cur, + Gcalc_result_receiver *storage) +{ + res_point *next; + res_point *cur_orig= cur; + int move_upward= 1; + GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_line_result"); + if (cur->glue) + { + /* Here we have to find the beginning of the line */ + next= cur->up; + move_upward= 1; + while (next) + { + cur= next; + next= move_upward ? next->up : next->down; + if (!next) + { + next= cur->glue; + if (next == cur_orig) + { + /* It's the line loop */ + cur= cur_orig; + cur->glue->glue= NULL; + move_upward= 1; + break; + } + move_upward^= 1; + } + } + } + + GCALC_DBUG_RETURN(get_result_thread(cur, storage, move_upward, 0) || + storage->complete_shape()); +} + + +int Gcalc_operation_reducer::get_result(Gcalc_result_receiver *storage) +{ + poly_instance *polygons= NULL; + + GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_result"); + *m_res_hook= NULL; + + /* This is to workaround an old gcc's bug */ + if (m_res_hook == (Gcalc_dyn_list::Item **) &m_result) + goto done; + + while (m_result) + { + Gcalc_function::shape_type shape= m_result->type; + if (shape == Gcalc_function::shape_point) + { + if (get_single_result(m_result, storage)) + GCALC_DBUG_RETURN(1); + continue; + } + if (shape == Gcalc_function::shape_polygon) + { + if (m_result->outer_poly) + { + uint32 insert_position, hole_position, position_shift; + poly_instance *cur_poly; + insert_position= m_result->outer_poly->first_poly_node->poly_position; + GCALC_DBUG_ASSERT(insert_position); + hole_position= storage->position(); + storage->start_shape(Gcalc_function::shape_hole); + if (get_polygon_result(m_result, storage, + m_result->outer_poly->first_poly_node) || + storage->move_hole(insert_position, hole_position, + &position_shift)) + GCALC_DBUG_RETURN(1); + for (cur_poly= polygons; + cur_poly && *cur_poly->after_poly_position >= insert_position; + cur_poly= cur_poly->get_next()) + *cur_poly->after_poly_position+= position_shift; + } + else + { + uint32 *poly_position= &m_result->poly_position; + poly_instance *p= new_poly(); + p->after_poly_position= poly_position; + p->next= polygons; + polygons= p; + storage->start_shape(Gcalc_function::shape_polygon); + if (get_polygon_result(m_result, storage, m_result)) + GCALC_DBUG_RETURN(1); + *poly_position= storage->position(); + } + } + else + { + storage->start_shape(shape); + if (get_line_result(m_result, storage)) + GCALC_DBUG_RETURN(1); + } + } + +done: + m_res_hook= (Gcalc_dyn_list::Item **)&m_result; + storage->done(); + GCALC_DBUG_RETURN(0); +} + + +void Gcalc_operation_reducer::reset() +{ + free_list((Gcalc_heap::Item **) &m_result, m_res_hook); + m_res_hook= (Gcalc_dyn_list::Item **)&m_result; + free_list(m_first_active_thread); +} + +#endif /*HAVE_SPATIAL*/ + diff --git a/sql/gcalc_tools.h b/sql/gcalc_tools.h new file mode 100644 index 00000000..bb1f473e --- /dev/null +++ b/sql/gcalc_tools.h @@ -0,0 +1,359 @@ +/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved. + Copyright (C) 2011 Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef GCALC_TOOLS_INCLUDED +#define GCALC_TOOLS_INCLUDED + +#include "gcalc_slicescan.h" +#include "sql_string.h" + + +/* + The Gcalc_function class objects are used to check for a binary relation. + The relation can be constructed with the prefix notation using predicates as + op_not (as !A) + op_union ( A || B || C... ) + op_intersection ( A && B && C ... ) + op_symdifference ( A+B+C+... == 1 ) + op_difference ( A && !(B||C||..)) + with the calls of the add_operation(operation, n_operands) method. + The relation is calculated over a set of shapes, that in turn have + to be added with the add_new_shape() method. All the 'shapes' can + be set to 0 with clear_shapes() method and single value + can be changed with the invert_state() method. + Then the value of the relation can be calculated with the count() method. + Frequently used method is find_function(Gcalc_scan_iterator it) that + iterates through the 'it' until the relation becomes TRUE. +*/ + +class Gcalc_function +{ +private: + String shapes_buffer; + String function_buffer; + int *i_states; + int *b_states; + uint32 cur_object_id; + uint n_shapes; + int count_internal(const char *cur_func, uint set_type, + const char **end); +public: + enum op_type + { + v_empty= 0x00000000, + v_find_t= 0x01000000, + v_find_f= 0x02000000, + v_t_found= 0x03000000, + v_f_found= 0x04000000, + v_mask= 0x07000000, + + op_not= 0x80000000, + op_shape= 0x00000000, + op_union= 0x10000000, + op_intersection= 0x20000000, + op_symdifference= 0x30000000, + op_difference= 0x40000000, + op_repeat= 0x50000000, + op_border= 0x60000000, + op_internals= 0x70000000, + op_false= 0x08000000, + op_any= 0x78000000 /* The mask to get any of the operations */ + }; + enum shape_type + { + shape_point= 0, + shape_line= 1, + shape_polygon= 2, + shape_hole= 3 + }; + enum count_result + { + result_false= 0, + result_true= 1, + result_unknown= 2 + }; + Gcalc_function() : n_shapes(0) {} + gcalc_shape_info add_new_shape(uint32 shape_id, shape_type shape_kind); + /* + Adds the leaf operation that returns the shape value. + Also adds the shape to the list of operands. + */ + int single_shape_op(shape_type shape_kind, gcalc_shape_info *si); + void add_operation(uint operation, uint32 n_operands); + void add_not_operation(op_type operation, uint32 n_operands); + uint32 get_next_expression_pos() { return function_buffer.length(); } + void add_operands_to_op(uint32 operation_pos, uint32 n_operands); + int repeat_expression(uint32 exp_pos); + void set_cur_obj(uint32 cur_obj) { cur_object_id= cur_obj; } + int reserve_shape_buffer(uint n_shapes); + int reserve_op_buffer(uint n_ops); + uint get_nshapes() const { return n_shapes; } + shape_type get_shape_kind(gcalc_shape_info si) const + { + return (shape_type) uint4korr(shapes_buffer.ptr() + (si*4)); + } + + void set_states(int *shape_states) { i_states= shape_states; } + int alloc_states(); + void invert_i_state(gcalc_shape_info shape) { i_states[shape]^= 1; } + void set_i_state(gcalc_shape_info shape) { i_states[shape]= 1; } + void clear_i_state(gcalc_shape_info shape) { i_states[shape]= 0; } + void set_b_state(gcalc_shape_info shape) { b_states[shape]= 1; } + void clear_b_state(gcalc_shape_info shape) { b_states[shape]= 0; } + int get_state(gcalc_shape_info shape) + { return i_states[shape] | b_states[shape]; } + int get_i_state(gcalc_shape_info shape) { return i_states[shape]; } + int get_b_state(gcalc_shape_info shape) { return b_states[shape]; } + int count() + { return count_internal(function_buffer.ptr(), 0, 0); } + int count_last() + { return count_internal(function_buffer.ptr(), 1, 0); } + void clear_i_states(); + void clear_b_states(); + void reset(); + + int check_function(Gcalc_scan_iterator &scan_it); +}; + + +/* + Gcalc_operation_transporter class extends the Gcalc_shape_transporter. + In addition to the parent's functionality, it fills the Gcalc_function + object so it has the function that determines the proper shape. + For example Multipolyline will be represented as an union of polylines. +*/ + +class Gcalc_operation_transporter : public Gcalc_shape_transporter +{ +protected: + Gcalc_function *m_fn; + gcalc_shape_info m_si; +public: + Gcalc_operation_transporter(Gcalc_function *fn, Gcalc_heap *heap) : + Gcalc_shape_transporter(heap), m_fn(fn) {} + + int single_point(double x, double y); + int start_line(); + int complete_line(); + int start_poly(); + int complete_poly(); + int start_ring(); + int complete_ring(); + int add_point(double x, double y); + int start_collection(int n_objects); + int empty_shape(); +}; + + +/* + When we calculate the result of an spatial operation like + Union or Intersection, we receive vertexes of the result + one-by-one, and probably need to treat them in variative ways. + So, the Gcalc_result_receiver class designed to get these + vertexes and construct shapes/objects out of them. + and to store the result in an appropriate format +*/ + +class Gcalc_result_receiver +{ + String buffer; + uint32 n_points; + Gcalc_function::shape_type common_shapetype; + bool collection_result; + uint32 n_shapes; + uint32 n_holes; + + Gcalc_function::shape_type cur_shape; + uint32 shape_pos; + double first_x, first_y, prev_x, prev_y; + double shape_area; +public: +Gcalc_result_receiver() : + n_points(0), + common_shapetype(Gcalc_function::shape_point), + collection_result(FALSE), n_shapes(0), n_holes(0), + cur_shape(Gcalc_function::shape_point), shape_pos(0) + {} + int start_shape(Gcalc_function::shape_type shape); + int add_point(double x, double y); + int complete_shape(); + int single_point(double x, double y); + int done(); + void reset(); + + const char *result() { return buffer.ptr(); } + uint length() { return buffer.length(); } + int get_nshapes() { return n_shapes; } + int get_nholes() { return n_holes; } + int get_result_typeid(); + uint32 position() { return buffer.length(); } + int move_hole(uint32 dest_position, uint32 source_position, + uint32 *position_shift); +}; + + +/* + Gcalc_operation_reducer class incapsulates the spatial + operation functionality. It analyses the slices generated by + the slicescan and calculates the shape of the result defined + by some Gcalc_function. +*/ + +class Gcalc_operation_reducer : public Gcalc_dyn_list +{ +public: + enum modes + { + /* Numeric values important here - careful with changing */ + default_mode= 0, + prefer_big_with_holes= 1, + polygon_selfintersections_allowed= 2, /* allowed in the result */ + line_selfintersections_allowed= 4 /* allowed in the result */ + }; + + Gcalc_operation_reducer(size_t blk_size=8192); + Gcalc_operation_reducer(const Gcalc_operation_reducer &gor); + void init(Gcalc_function *fn, modes mode= default_mode); + Gcalc_operation_reducer(Gcalc_function *fn, modes mode= default_mode, + size_t blk_size=8192); + GCALC_DECL_TERMINATED_STATE(killed) + int count_slice(Gcalc_scan_iterator *si); + int count_all(Gcalc_heap *hp); + int get_result(Gcalc_result_receiver *storage); + void reset(); + +#ifndef GCALC_DBUG_OFF + int n_res_points; +#endif /*GCALC_DBUG_OFF*/ + class res_point : public Gcalc_dyn_list::Item + { + public: + int intersection_point; + union + { + const Gcalc_heap::Info *pi; + res_point *first_poly_node; + }; + union + { + res_point *outer_poly; + uint32 poly_position; + }; + res_point *up; + res_point *down; + res_point *glue; + Gcalc_function::shape_type type; + Gcalc_dyn_list::Item **prev_hook; +#ifndef GCALC_DBUG_OFF + int point_n; +#endif /*GCALC_DBUG_OFF*/ + void set(const Gcalc_scan_iterator *si); + res_point *get_next() { return (res_point *)next; } + }; + + class active_thread : public Gcalc_dyn_list::Item + { + public: + res_point *rp; + res_point *thread_start; + + const Gcalc_heap::Info *p1, *p2; + res_point *enabled() { return rp; } + active_thread *get_next() { return (active_thread *)next; } + }; + + class poly_instance : public Gcalc_dyn_list::Item + { + public: + uint32 *after_poly_position; + poly_instance *get_next() { return (poly_instance *)next; } + }; + + class line : public Gcalc_dyn_list::Item + { + public: + active_thread *t; + int incoming; + const Gcalc_scan_iterator::point *p; + line *get_next() { return (line *)next; } + }; + + class poly_border : public Gcalc_dyn_list::Item + { + public: + active_thread *t; + int incoming; + int prev_state; + const Gcalc_scan_iterator::point *p; + poly_border *get_next() { return (poly_border *)next; } + }; + + line *m_lines; + Gcalc_dyn_list::Item **m_lines_hook; + poly_border *m_poly_borders; + Gcalc_dyn_list::Item **m_poly_borders_hook; + line *new_line() { return (line *) new_item(); } + poly_border *new_poly_border() { return (poly_border *) new_item(); } + int add_line(int incoming, active_thread *t, + const Gcalc_scan_iterator::point *p); + int add_poly_border(int incoming, active_thread *t, int prev_state, + const Gcalc_scan_iterator::point *p); + +protected: + Gcalc_function *m_fn; + Gcalc_dyn_list::Item **m_res_hook; + res_point *m_result; + int m_mode; + + res_point *result_heap; + active_thread *m_first_active_thread; + + res_point *add_res_point(Gcalc_function::shape_type type); + active_thread *new_active_thread() { return (active_thread *)new_item(); } + + poly_instance *new_poly() { return (poly_instance *) new_item(); } + +private: + int start_line(active_thread *t, const Gcalc_scan_iterator::point *p, + const Gcalc_scan_iterator *si); + int end_line(active_thread *t, const Gcalc_scan_iterator *si); + int connect_threads(int incoming_a, int incoming_b, + active_thread *ta, active_thread *tb, + const Gcalc_scan_iterator::point *pa, + const Gcalc_scan_iterator::point *pb, + active_thread *prev_range, + const Gcalc_scan_iterator *si, + Gcalc_function::shape_type s_t); + int add_single_point(const Gcalc_scan_iterator *si); + poly_border *get_pair_border(poly_border *b1); + int continue_range(active_thread *t, const Gcalc_heap::Info *p, + const Gcalc_heap::Info *p_next); + int continue_i_range(active_thread *t, + const Gcalc_heap::Info *ii); + int end_couple(active_thread *t0, active_thread *t1, const Gcalc_heap::Info *p); + int get_single_result(res_point *res, Gcalc_result_receiver *storage); + int get_result_thread(res_point *cur, Gcalc_result_receiver *storage, + int move_upward, res_point *first_poly_node); + int get_polygon_result(res_point *cur, Gcalc_result_receiver *storage, + res_point *first_poly_node); + int get_line_result(res_point *cur, Gcalc_result_receiver *storage); + + void free_result(res_point *res); +}; + +#endif /*GCALC_TOOLS_INCLUDED*/ + diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc new file mode 100644 index 00000000..98e6205e --- /dev/null +++ b/sql/gen_lex_hash.cc @@ -0,0 +1,482 @@ +/* + Copyright (c) 2000, 2012, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @details +@verbatim +The idea of presented algorithm see in +"The Art of Computer Programming" by Donald E. Knuth +Volume 3 "Sorting and searching" +(chapter 6.3 "Digital searching" - name and number of chapter + is back translation from Russian edition :)) + +as illustration of data structures, imagine next table: + +static SYMBOL symbols[] = { + { "ADD", SYM(ADD),0,0}, + { "AND", SYM(AND),0,0}, + { "DAY", SYM(DAY_SYM),0,0}, +}; + +for this structure, presented program generate next searching-structure: + ++-----------+-+-+-+ +| len |1|2|3| ++-----------+-+-+-+ +|first_char |0|0|a| +|last_char |0|0|d| +|link |0|0|+| + | + V + +----------+-+-+-+--+ + | 1 char|a|b|c|d | + +----------+-+-+-+--+ + |first_char|b|0|0|0 | + |last_char |n|0|0|-1| + |link |+|0|0|+ | + | | + | V + | symbols[2] ( "DAY" ) + V ++----------+--+-+-+-+-+-+-+-+-+-+--+ +| 2 char|d |e|f|j|h|i|j|k|l|m|n | ++----------+--+-+-+-+-+-+-+-+-+-+--+ +|first_char|0 |0|0|0|0|0|0|0|0|0|0 | +|last_char |-1|0|0|0|0|0|0|0|0|0|-1| +|link |+ |0|0|0|0|0|0|0|0|0|+ | + | | + V V + symbols[0] ( "ADD" ) symbols[1] ( "AND" ) + +for optimization, link is the 16-bit index in 'symbols' or 'sql_functions' +or search-array.. + +So, we can read full search-structure as 32-bit word +@endverbatim + +@todo + use instead to_upper_lex, special array + (substitute chars) without skip codes.. +@todo + try use reverse order of comparing.. + +*/ + +#define NO_YACC_SYMBOLS +#undef CHECK_UNLIKELY +#include "mariadb.h" +#include "mysql_version.h" +#include "lex.h" +#include + +#include /* ORACLE_WELCOME_COPYRIGHT_NOTICE */ + +struct hash_lex_struct +{ + int first_char; + char last_char; + union{ + hash_lex_struct *char_tails; + int iresult; + }; + int ithis; +}; + +hash_lex_struct *get_hash_struct_by_len(hash_lex_struct **root_by_len, + int len, int *max_len) +{ + if (*max_lenfirst_char= 0; + *max_len= len; + } + return (*root_by_len)+(len-1); +} + +void insert_into_hash(hash_lex_struct *root, const char *name, + int len_from_begin, int index, int function) +{ + hash_lex_struct *end, *cur, *tails; + + if (!root->first_char) + { + root->first_char= -1; + root->iresult= index; + return; + } + + if (root->first_char == -1) + { + int index2= root->iresult; + const char *name2= (index2 < 0 ? sql_functions[-index2-1] : + symbols[index2]).name + len_from_begin; + root->first_char= (int) (uchar) name2[0]; + root->last_char= (char) root->first_char; + tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct)); + root->char_tails= tails; + tails->first_char= -1; + tails->iresult= index2; + } + + size_t real_size= (root->last_char-root->first_char+1); + + if (root->first_char>(*name)) + { + size_t new_size= root->last_char-(*name)+1; + if (unlikely(new_sizechar_tails; + tails= (hash_lex_struct*)realloc((char*)tails, + sizeof(hash_lex_struct)*new_size); + root->char_tails= tails; + memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct)); + end= tails + new_size - real_size; + for (cur= tails; curfirst_char= 0; + root->first_char= (int) (uchar) *name; + } + + if (root->last_char<(*name)) + { + size_t new_size= (*name)-root->first_char+1; + if (unlikely(new_sizechar_tails; + tails= (hash_lex_struct*)realloc((char*)tails, + sizeof(hash_lex_struct)*new_size); + root->char_tails= tails; + end= tails + new_size; + for (cur= tails+real_size; curfirst_char= 0; + root->last_char= (*name); + } + + insert_into_hash(root->char_tails+(*name)-root->first_char, + name+1,len_from_begin+1,index,function); +} + + +hash_lex_struct *root_by_len= 0; +int max_len=0; + +hash_lex_struct *root_by_len2= 0; +int max_len2=0; + +void insert_symbols() +{ + size_t i= 0; + SYMBOL *cur; + for (cur= symbols; ilength,&max_len); + insert_into_hash(root,cur->name,0,(uint) i,0); + } +} + +void insert_sql_functions() +{ + int i= 0; + SYMBOL *cur; + for (cur= sql_functions; i < (int) array_elements(sql_functions); cur++, i++) + { + hash_lex_struct *root= + get_hash_struct_by_len(&root_by_len,cur->length,&max_len); + insert_into_hash(root,cur->name,0,-i-1,1); + } +} + +void calc_length() +{ + SYMBOL *cur, *end= symbols + array_elements(symbols); + for (cur= symbols; cur < end; cur++) + cur->length=(uchar) strlen(cur->name); + end= sql_functions + array_elements(sql_functions); + for (cur= sql_functions; curlength=(uchar) strlen(cur->name); +} + +void generate_find_structs() +{ + root_by_len= 0; + max_len=0; + + insert_symbols(); + + root_by_len2= root_by_len; + max_len2= max_len; + + root_by_len= 0; + max_len= 0; + + insert_symbols(); + insert_sql_functions(); +} + +char *hash_map= 0; +int size_hash_map= 0; + +void add_struct_to_map(hash_lex_struct *st) +{ + st->ithis= size_hash_map/4; + size_hash_map+= 4; + hash_map= (char*)realloc((char*)hash_map,size_hash_map); + hash_map[size_hash_map-4]= (char) (st->first_char == -1 ? 0 : + st->first_char); + hash_map[size_hash_map-3]= (char) (st->first_char == -1 || + st->first_char == 0 ? 0 : st->last_char); + if (st->first_char == -1) + { + hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255; + hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8; + } + else if (st->first_char == 0) + { + hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255; + hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8; + } +} + + +void add_structs_to_map(hash_lex_struct *st, int len) +{ + hash_lex_struct *cur, *end= st+len; + for (cur= st; curfirst_char && cur->first_char != -1) + add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1); + } +} + +void set_links(hash_lex_struct *st, int len) +{ + hash_lex_struct *cur, *end= st+len; + for (cur= st; curfirst_char != 0 && cur->first_char != -1) + { + int ilink= cur->char_tails->ithis; + hash_map[cur->ithis*4+2]= ilink%256; + hash_map[cur->ithis*4+3]= ilink/256; + set_links(cur->char_tails,cur->last_char-cur->first_char+1); + } + } +} + + +void print_hash_map(const char *name) +{ + char *cur; + int i; + + printf("static uchar %s[%d]= {\n",name,size_hash_map); + for (i=0, cur= hash_map; ilength!=s2->length || strncmp(s1->name,s2->name,s1->length)) + return 0; + + const char *err_tmpl= "\ngen_lex_hash fatal error : \ +Unfortunately gen_lex_hash can not generate a hash,\n since \ +your lex.h has duplicate definition for a symbol \"%s\"\n\n"; + printf (err_tmpl,s1->name); + fprintf (stderr,err_tmpl,s1->name); + + return 1; +} + + +int check_duplicates() +{ + SYMBOL *cur1, *cur2, *s_end, *f_end; + + s_end= symbols + array_elements(symbols); + f_end= sql_functions + array_elements(sql_functions); + + for (cur1= symbols; cur1sql_functions_max_len) return 0;\n\ + hash_map= sql_functions_map;\n\ + uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\ +\n\ + for (;;){\n\ + uchar first_char= (uchar)cur_struct;\n\ +\n\ + if (first_char == 0)\n\ + {\n\ + int16 ires= (int16)(cur_struct>>16);\n\ + if (ires==array_elements(symbols)) return 0;\n\ + SYMBOL *res;\n\ + if (ires>=0) \n\ + res= symbols+ires;\n\ + else\n\ + res= sql_functions-ires-1;\n\ + uint count= (uint) (cur_str - s);\n\ + return lex_casecmp(cur_str,res->name+count,len-count) ? 0 : res;\n\ + }\n\ +\n\ + uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\ + if (cur_char>=8;\n\ + if (cur_char>(uchar)cur_struct) return 0;\n\ +\n\ + cur_struct>>=8;\n\ + cur_struct= uint4korr(hash_map+\n\ + (((uint16)cur_struct + cur_char - first_char)*4));\n\ + cur_str++;\n\ + }\n" +); + + printf("\ + }else{\n\ + if (len>symbols_max_len) return 0;\n\ + hash_map= symbols_map;\n\ + uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\ +\n\ + for (;;){\n\ + uchar first_char= (uchar)cur_struct;\n\ +\n\ + if (first_char==0) {\n\ + int16 ires= (int16)(cur_struct>>16);\n\ + if (ires==array_elements(symbols)) return 0;\n\ + SYMBOL *res= symbols+ires;\n\ + uint count= (uint) (cur_str - s);\n\ + return lex_casecmp(cur_str,res->name+count,len-count)!=0 ? 0 : res;\n\ + }\n\ +\n\ + uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\ + if (cur_char>=8;\n\ + if (cur_char>(uchar)cur_struct) return 0;\n\ +\n\ + cur_struct>>=8;\n\ + cur_struct= uint4korr(hash_map+\n\ + (((uint16)cur_struct + cur_char - first_char)*4));\n\ + cur_str++;\n\ + }\n\ + }\n\ +}\n" +); + exit(0); +} + diff --git a/sql/gen_lex_token.cc b/sql/gen_lex_token.cc new file mode 100644 index 00000000..40145459 --- /dev/null +++ b/sql/gen_lex_token.cc @@ -0,0 +1,363 @@ +/* + Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include + +/* We only need the tokens here */ +#define YYSTYPE_IS_DECLARED +#include +#include + +#include /* ORACLE_WELCOME_COPYRIGHT_NOTICE */ + +/* + This is a tool used during build only, + so MY_MAX_TOKEN does not need to be exact, + only big enough to hold: + - 256 character terminal tokens + - YYNTOKENS named terminal tokens + from bison. + See also YYMAXUTOK. +*/ +#define MY_MAX_TOKEN 1100 +/** Generated token. */ +struct gen_lex_token_string +{ + const char *m_token_string; + int m_token_length; + bool m_append_space; + bool m_start_expr; +}; + +gen_lex_token_string compiled_token_array[MY_MAX_TOKEN]; +int max_token_seen= 0; + +char char_tokens[256]; + +int tok_generic_value= 0; +int tok_generic_value_list= 0; +int tok_row_single_value= 0; +int tok_row_single_value_list= 0; +int tok_row_multiple_value= 0; +int tok_row_multiple_value_list= 0; +int tok_ident= 0; +int tok_unused= 0; + +void set_token(int tok, const char *str) +{ + if (tok <= 0) + { + fprintf(stderr, "Bad token found\n"); + exit(1); + } + + if (tok > max_token_seen) + { + max_token_seen= tok; + } + + if (max_token_seen >= MY_MAX_TOKEN) + { + fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n"); + exit(1); + } + + compiled_token_array[tok].m_token_string= str; + compiled_token_array[tok].m_token_length= (int)strlen(str); + compiled_token_array[tok].m_append_space= true; + compiled_token_array[tok].m_start_expr= false; +} + +void set_start_expr_token(int tok) +{ + compiled_token_array[tok].m_start_expr= true; +} + +void compute_tokens() +{ + int tok; + unsigned int i; + char *str; + + /* + Default value. + */ + for (tok= 0; tok < MY_MAX_TOKEN; tok++) + { + compiled_token_array[tok].m_token_string= "(unknown)"; + compiled_token_array[tok].m_token_length= 9; + compiled_token_array[tok].m_append_space= true; + compiled_token_array[tok].m_start_expr= false; + } + + /* + Tokens made of just one terminal character + */ + for (tok=0; tok < 256; tok++) + { + str= & char_tokens[tok]; + str[0]= (char) tok; + compiled_token_array[tok].m_token_string= str; + compiled_token_array[tok].m_token_length= 1; + compiled_token_array[tok].m_append_space= true; + } + + max_token_seen= 255; + + /* + String terminal tokens, used in sql_yacc.yy + */ + set_token(NEG, "~"); + set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY"); + + /* + Tokens hard coded in sql_lex.cc + */ + + set_token(WITH_CUBE_SYM, "WITH CUBE"); + set_token(WITH_ROLLUP_SYM, "WITH ROLLUP"); + set_token(WITH_SYSTEM_SYM, "WITH SYSTEM"); + set_token(FOR_SYSTEM_TIME_SYM, "FOR SYSTEM_TIME"); + set_token(VALUES_IN_SYM, "VALUES IN"); + set_token(VALUES_LESS_SYM, "VALUES LESS"); + set_token(NOT2_SYM, "!"); + set_token(OR2_SYM, "|"); + set_token(PARAM_MARKER, "?"); + set_token(SET_VAR, ":="); + set_token(UNDERSCORE_CHARSET, "(_charset)"); + set_token(END_OF_INPUT, ""); + + /* + Values. + These tokens are all normalized later, + so this strings will never be displayed. + */ + set_token(BIN_NUM, "(bin)"); + set_token(DECIMAL_NUM, "(decimal)"); + set_token(FLOAT_NUM, "(float)"); + set_token(HEX_NUM, "(hex)"); + set_token(LEX_HOSTNAME, "(hostname)"); + set_token(LONG_NUM, "(long)"); + set_token(NUM, "(num)"); + set_token(TEXT_STRING, "(text)"); + set_token(NCHAR_STRING, "(nchar)"); + set_token(ULONGLONG_NUM, "(ulonglong)"); + + /* + Identifiers. + */ + set_token(IDENT, "(id)"); + set_token(IDENT_QUOTED, "(id_quoted)"); + + /* + Unused tokens + */ + set_token(LOCATOR_SYM, "LOCATOR"); + set_token(SERVER_OPTIONS, "SERVER_OPTIONS"); + set_token(UDF_RETURNS_SYM, "UDF_RETURNS"); + + /* + See symbols[] in sql/lex.h + */ + for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++) + { + set_token(symbols[i].tok, symbols[i].name); + } + + /* + See sql_functions[] in sql/lex.h + */ + for (i= 0; i< sizeof(sql_functions)/sizeof(sql_functions[0]); i++) + { + set_token(sql_functions[i].tok, sql_functions[i].name); + } + + /* + Additional FAKE tokens, + used internally to normalize a digest text. + */ + + max_token_seen++; + tok_generic_value= max_token_seen; + set_token(tok_generic_value, "?"); + + max_token_seen++; + tok_generic_value_list= max_token_seen; + set_token(tok_generic_value_list, "?, ..."); + + max_token_seen++; + tok_row_single_value= max_token_seen; + set_token(tok_row_single_value, "(?)"); + + max_token_seen++; + tok_row_single_value_list= max_token_seen; + set_token(tok_row_single_value_list, "(?) /* , ... */"); + + max_token_seen++; + tok_row_multiple_value= max_token_seen; + set_token(tok_row_multiple_value, "(...)"); + + max_token_seen++; + tok_row_multiple_value_list= max_token_seen; + set_token(tok_row_multiple_value_list, "(...) /* , ... */"); + + max_token_seen++; + tok_ident= max_token_seen; + set_token(tok_ident, "(tok_id)"); + + max_token_seen++; + tok_unused= max_token_seen; + set_token(tok_unused, "UNUSED"); + + /* + Fix whitespace for some special tokens. + */ + + /* + The lexer parses "@@variable" as '@', '@', 'variable', + returning a token for '@' alone. + + This is incorrect, '@' is not really a token, + because the syntax "@ @ variable" (with spaces) is not accepted: + The lexer keeps some internal state after the '@' fake token. + + To work around this, digest text are printed as "@@variable". + */ + compiled_token_array[(int) '@'].m_append_space= false; + + /* + Define additional properties for tokens. + + List all the token that are followed by an expression. + This is needed to differentiate unary from binary + '+' and '-' operators, because we want to: + - reduce to , + - preserve <...> as is. + */ + set_start_expr_token('('); + set_start_expr_token(','); + set_start_expr_token(EVERY_SYM); + set_start_expr_token(AT_SYM); + set_start_expr_token(STARTS_SYM); + set_start_expr_token(ENDS_SYM); + set_start_expr_token(DEFAULT); + set_start_expr_token(RETURN_MARIADB_SYM); + set_start_expr_token(RETURN_ORACLE_SYM); + set_start_expr_token(IF_SYM); + set_start_expr_token(ELSEIF_MARIADB_SYM); + set_start_expr_token(ELSEIF_ORACLE_SYM); + set_start_expr_token(CASE_SYM); + set_start_expr_token(WHEN_SYM); + set_start_expr_token(WHILE_SYM); + set_start_expr_token(UNTIL_SYM); + set_start_expr_token(SELECT_SYM); + + set_start_expr_token(OR_SYM); + set_start_expr_token(OR2_SYM); + set_start_expr_token(XOR); + set_start_expr_token(AND_SYM); + set_start_expr_token(AND_AND_SYM); + set_start_expr_token(NOT_SYM); + set_start_expr_token(BETWEEN_SYM); + set_start_expr_token(LIKE); + set_start_expr_token(REGEXP); + + set_start_expr_token('|'); + set_start_expr_token('&'); + set_start_expr_token(SHIFT_LEFT); + set_start_expr_token(SHIFT_RIGHT); + set_start_expr_token('+'); + set_start_expr_token('-'); + set_start_expr_token(INTERVAL_SYM); + set_start_expr_token('*'); + set_start_expr_token('/'); + set_start_expr_token('%'); + set_start_expr_token(DIV_SYM); + set_start_expr_token(MOD_SYM); + set_start_expr_token('^'); +} + +void print_tokens() +{ + int tok; + + printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n"); + printf("lex_token_string lex_token_array[]=\n"); + printf("{\n"); + printf("/* PART 1: character tokens. */\n"); + + for (tok= 0; tok<256; tok++) + { + printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n", + tok, + tok, + compiled_token_array[tok].m_append_space ? "true" : "false", + compiled_token_array[tok].m_start_expr ? "true" : "false"); + } + + printf("/* PART 2: named tokens. */\n"); + + for (tok= 256; tok<= max_token_seen; tok++) + { + printf("/* %03d */ { \"%s\", %d, %s, %s},\n", + tok, + compiled_token_array[tok].m_token_string, + compiled_token_array[tok].m_token_length, + compiled_token_array[tok].m_append_space ? "true" : "false", + compiled_token_array[tok].m_start_expr ? "true" : "false"); + } + + printf("/* DUMMY */ { \"\", 0, false, false}\n"); + printf("};\n"); + printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n"); + + printf("/* DIGEST specific tokens. */\n"); + printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value); + printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list); + printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value); + printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list); + printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value); + printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list); + printf("#define TOK_IDENT %d\n", tok_ident); + printf("#define TOK_UNUSED %d\n", tok_unused); +} + +int main(int argc,char **argv) +{ + puts("/*"); + puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011")); + puts("*/"); + + printf("/*\n"); + printf(" This file is generated, do not edit.\n"); + printf(" See file sql/gen_lex_token.cc.\n"); + printf("*/\n"); + printf("struct lex_token_string\n"); + printf("{\n"); + printf(" const char *m_token_string;\n"); + printf(" int m_token_length;\n"); + printf(" bool m_append_space;\n"); + printf(" bool m_start_expr;\n"); + printf("};\n"); + printf("typedef struct lex_token_string lex_token_string;\n"); + + compute_tokens(); + print_tokens(); + + return 0; +} + diff --git a/sql/gen_win_tzname_data.ps1 b/sql/gen_win_tzname_data.ps1 new file mode 100644 index 00000000..474ab889 --- /dev/null +++ b/sql/gen_win_tzname_data.ps1 @@ -0,0 +1,12 @@ +# Generates a header file for converting between Windows timezone names to tzdb names +# using CLDR data. +# Usage: powershell -File gen_win_tzname_data.ps1 > win_tzname_data.h + +write-output "/* This file was generated using gen_win_tzname_data.ps1 */" +$xdoc = new-object System.Xml.XmlDocument +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +$xdoc.load("https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml") +$nodes = $xdoc.SelectNodes("//mapZone[@territory='001']") # use default territory (001) +foreach ($node in $nodes) { + write-output ('{L"'+ $node.other + '","'+ $node.type+'"},') +} diff --git a/sql/gen_yy_files.cmake b/sql/gen_yy_files.cmake new file mode 100644 index 00000000..3ceb60a9 --- /dev/null +++ b/sql/gen_yy_files.cmake @@ -0,0 +1,42 @@ +if(POLICY CMP0054) + cmake_policy(SET CMP0054 NEW) +endif() +file(READ "${IN}" data) +file(WRITE "${OUT1}" "") +file(WRITE "${OUT2}" "") +set(where 0) +if(NOT(BISON_VERSION VERSION_LESS "3.0.0")) + string(REPLACE "\n%pure-parser" "\n%define api.pure" data "${data}") +endif() +string(REGEX REPLACE "/\\* sql_yacc\\.yy \\*/" "/* DON'T EDIT THIS FILE. IT'S GENERATED. EDIT sql_yacc.yy INSTEAD */" data "${data}") +while(NOT data STREQUAL "") + string(REGEX MATCH "^(%[ie][^\n]*\n)|((%[^ie\n]|[^%\n])[^\n]*\n)+|\n+" line "${data}") + string(LENGTH "${line}" ll) + string(SUBSTRING "${data}" ${ll} -1 data) + + if (line MATCHES "^%ifdef +${VAL1} *\n") + set(where 1) + set(line "\n") + elseif(line MATCHES "^%ifdef +${VAL2} *\n") + set(where 2) + set(line "\n") + elseif(line MATCHES "^%else( *| +.*)\n" AND where GREATER 0) + math(EXPR where "3-${where}") + set(line "\n") + elseif(line MATCHES "^%endif( *| +.*)\n") + set(where 0) + set(line "\n") + endif() + if(where STREQUAL 1) + file(APPEND "${OUT1}" "${line}") + string(REGEX REPLACE "[^\n]+" "" line "${line}") + file(APPEND "${OUT2}" "${line}") + elseif(where STREQUAL 2) + file(APPEND "${OUT2}" "${line}") + string(REGEX REPLACE "[^\n]+" "" line "${line}") + file(APPEND "${OUT1}" "${line}") + else() + file(APPEND "${OUT1}" "${line}") + file(APPEND "${OUT2}" "${line}") + endif() +endwhile() diff --git a/sql/grant.cc b/sql/grant.cc new file mode 100644 index 00000000..1ba197bc --- /dev/null +++ b/sql/grant.cc @@ -0,0 +1,108 @@ +/* + Copyright (c) 2009, 2020, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_acl.h" + + +bool Grant_privilege::add_column_privilege(THD *thd, + const Lex_ident_sys &name, + privilege_t which_grant) +{ + String *new_str= new (thd->mem_root) String((const char*) name.str, + name.length, + system_charset_info); + if (unlikely(new_str == NULL)) + return true; + List_iterator iter(m_columns); + class LEX_COLUMN *point; + while ((point=iter++)) + { + if (!my_strcasecmp(system_charset_info, + point->column.c_ptr(), new_str->c_ptr())) + break; + } + m_column_privilege_total|= which_grant; + if (point) + { + point->rights |= which_grant; + return false; + } + + LEX_COLUMN *col= new (thd->mem_root) LEX_COLUMN(*new_str, which_grant); + if (unlikely(col == NULL)) + return true; + return m_columns.push_back(col, thd->mem_root); +} + + +bool Grant_privilege::add_column_list_privilege(THD *thd, + List &list, + privilege_t privilege) +{ + Lex_ident_sys *col; + List_iterator it(list); + while ((col= it++)) + { + if (add_column_privilege(thd, *col, privilege)) + return true; + } + return false; +} + + +privilege_t Grant_object_name::all_privileges_by_type() const +{ + switch (m_type) { + case STAR: return DB_ACLS & ~GRANT_ACL; + case IDENT_STAR: return DB_ACLS & ~GRANT_ACL; + case STAR_STAR: return GLOBAL_ACLS & ~GRANT_ACL; + case TABLE_IDENT: return TABLE_ACLS & ~GRANT_ACL; + } + return NO_ACL; +} + + +bool Grant_privilege::set_object_name(THD *thd, + const Grant_object_name &ident, + SELECT_LEX *sel, + privilege_t with_grant_option) +{ + DBUG_ASSERT(!m_all_privileges || !m_columns.elements); + + m_db= ident.m_db; + if (m_all_privileges) + m_object_privilege= ident.all_privileges_by_type(); + m_object_privilege|= with_grant_option; + switch (ident.m_type) + { + case Lex_grant_object_name::STAR: + case Lex_grant_object_name::IDENT_STAR: + case Lex_grant_object_name::STAR_STAR: + if (!m_all_privileges && m_columns.elements) + { + // e.g. GRANT SELECT (a) ON db.* + my_error(ER_ILLEGAL_GRANT_FOR_TABLE, MYF(0)); + return true; + } + return false; + case Lex_grant_object_name::TABLE_IDENT: + m_db= ident.m_table_ident->db; + return !sel->add_table_to_list(thd, ident.m_table_ident, + NULL, TL_OPTION_UPDATING); + } + return false; // Make gcc happy +} diff --git a/sql/grant.h b/sql/grant.h new file mode 100644 index 00000000..5fbec446 --- /dev/null +++ b/sql/grant.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_GRANT_INCLUDED +#define SQL_GRANT_INCLUDED + +#include "lex_string.h" +#include "privilege.h" + +class LEX_COLUMN; +class Lex_ident_sys; +class Table_ident; + +/* + Represents the object name in this standard SQL grammar: + GRANT ON +*/ +class Grant_object_name +{ +public: + enum Type + { + STAR, // ON * + IDENT_STAR, // ON db.* + STAR_STAR, // ON *.* + TABLE_IDENT // ON db.name + }; + Lex_cstring m_db; + Table_ident *m_table_ident; + Type m_type; +public: + Grant_object_name(Table_ident *table_ident) + :m_table_ident(table_ident), + m_type(TABLE_IDENT) + { } + Grant_object_name(const LEX_CSTRING &db, Type type) + :m_db(db), + m_table_ident(NULL), + m_type(type) + { } + privilege_t all_privileges_by_type() const; +}; + + + +/* + Represents standard SQL statements described by: + - + - +*/ +class Grant_privilege +{ +protected: + List m_columns; + Lex_cstring m_db; + privilege_t m_object_privilege; + privilege_t m_column_privilege_total; + bool m_all_privileges; +public: + Grant_privilege() + :m_object_privilege(NO_ACL), + m_column_privilege_total(NO_ACL), + m_all_privileges(false) + { } + Grant_privilege(privilege_t privilege, bool all_privileges) + :m_object_privilege(privilege), + m_column_privilege_total(NO_ACL), + m_all_privileges(all_privileges) + { } + void add_object_privilege(privilege_t privilege) + { + m_object_privilege|= privilege; + } + bool add_column_privilege(THD *thd, const Lex_ident_sys &col, + privilege_t privilege); + bool add_column_list_privilege(THD *thd, List &list, + privilege_t privilege); + bool set_object_name(THD *thd, + const Grant_object_name &ident, + SELECT_LEX *sel, + privilege_t with_grant_option); + const List & columns() const { return m_columns; } +}; + + +#endif // SQL_GRANT_INCLUDED diff --git a/sql/group_by_handler.cc b/sql/group_by_handler.cc new file mode 100644 index 00000000..7b998494 --- /dev/null +++ b/sql/group_by_handler.cc @@ -0,0 +1,145 @@ +/* + Copyright (c) 2014, 2015 SkySQL Ab & MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This file implements the group_by_handler code. This interface + can be used by storage handlers that can intercept summary or GROUP + BY queries from MariaDB and itself return the result to the user or + upper level. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" + +/* + Same return values as do_select(); + + @retval + 0 if ok + @retval + 1 if error is sent + @retval + -1 if error should be sent +*/ + +int Pushdown_query::execute(JOIN *join) +{ + int err; + ha_rows max_limit; + bool reset_limit= FALSE; + Item **reset_item= 0; + THD *thd= handler->thd; + TABLE *table= handler->table; + DBUG_ENTER("Pushdown_query::execute"); + + if ((err= handler->init_scan())) + goto error; + + if (store_data_in_temp_table) + { + max_limit= join->tmp_table_param.end_write_records; + reset_limit= TRUE; + } + else + { + max_limit= join->unit->lim.get_select_limit(); + if (join->unit->fake_select_lex) + reset_item= &join->unit->fake_select_lex->limit_params.select_limit; + } + + while (!(err= handler->next_row())) + { + if (unlikely(thd->check_killed())) + { + handler->end_scan(); + DBUG_RETURN(-1); + } + + /* Check if we can accept the row */ + if (!having || having->val_bool()) + { + if (store_data_in_temp_table) + { + if ((err= table->file->ha_write_tmp_row(table->record[0]))) + { + bool is_duplicate; + if (likely(!table->file->is_fatal_error(err, HA_CHECK_DUP))) + continue; // Distinct elimination + + if (create_internal_tmp_table_from_heap(thd, table, + join->tmp_table_param. + start_recinfo, + &join->tmp_table_param. + recinfo, + err, 1, &is_duplicate)) + DBUG_RETURN(1); + if (is_duplicate) + continue; + } + } + else + { + if (join->do_send_rows) + { + int error; + /* result < 0 if row was not accepted and should not be counted */ + if (unlikely((error= + join->result->send_data_with_check(*join->fields, + join->unit, + join->send_records)))) + { + handler->end_scan(); + DBUG_RETURN(error < 0 ? 0 : -1); + } + } + } + + /* limit handling */ + if (++join->send_records >= max_limit && join->do_send_rows) + { + if (!(join->select_options & OPTION_FOUND_ROWS)) + break; // LIMIT reached + join->do_send_rows= 0; // Calculate FOUND_ROWS() + if (reset_limit) + join->unit->lim.set_unlimited(); + if (reset_item) + *reset_item= 0; + } + } + } + if (err != 0 && err != HA_ERR_END_OF_FILE) + goto error; + + if ((err= handler->end_scan())) + goto error_2; + if (!store_data_in_temp_table && join->result->send_eof()) + DBUG_RETURN(1); // Don't send error to client + + DBUG_RETURN(0); + +error: + handler->end_scan(); +error_2: + handler->print_error(err, MYF(0)); + DBUG_RETURN(-1); // Error not sent to client +} + + +void group_by_handler::print_error(int error, myf errflag) +{ + my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str); +} diff --git a/sql/group_by_handler.h b/sql/group_by_handler.h new file mode 100644 index 00000000..5457cb77 --- /dev/null +++ b/sql/group_by_handler.h @@ -0,0 +1,108 @@ +/* + Copyright (c) 2014, 2015 SkySQL Ab & MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef GROUP_BY_HANDLER_INCLUDED +#define GROUP_BY_HANDLER_INCLUDED + +class Select_limit_counters; +/* + This file implements the group_by_handler interface. This interface + can be used by storage handlers that can intercept summary or GROUP + BY queries from MariaDB and itself return the result to the user or + upper level. It is part of the Storage Engine API + + Both main and sub queries are supported. Here are some examples of what the + storage engine could intersept: + + SELECT count(*) FROM t1; + SELECT a,count(*) FROM t1 group by a; + SELECT a,count(*) as sum FROM t1 where b > 10 group by a, order by sum; + SELECT a,count(*) FROM t1,t2; + SELECT a, (select sum(*) from t2 where t1.a=t2.a) from t2; +*/ + +/** + The structure describing various parts of the query + + The engine is supposed to take out parts that it can do internally. + For example, if the engine can return results sorted according to + the specified order_by clause, it sets Query::order_by=NULL before + returning. + + At the moment the engine must take group_by (or return an error), and + optionally can take distinct, where, order_by, and having. + + The engine should not modify the select list. It is the extended SELECT + clause (extended, because it has more items than the original + user-specified SELECT clause) and it contains all aggregate functions, + used in the query. +*/ +struct Query +{ + List *select; + bool distinct; + TABLE_LIST *from; + Item *where; + ORDER *group_by; + ORDER *order_by; + Item *having; + // LIMIT + Select_limit_counters *limit; +}; + +class group_by_handler +{ +public: + THD *thd; + handlerton *ht; + + /* + Temporary table where all results should be stored in record[0] + The table has a field for every item from the Query::select list. + */ + TABLE *table; + + group_by_handler(THD *thd_arg, handlerton *ht_arg) + : thd(thd_arg), ht(ht_arg), table(0) {} + virtual ~group_by_handler() = default; + + /* + Functions to scan data. All these returns 0 if ok, error code in case + of error + */ + + /* + Initialize group_by scan, prepare for next_row(). + If this is a sub query with group by, this can be called many times for + a query. + */ + virtual int init_scan()= 0; + + /* + Return next group by result in table->record[0]. + Return 0 if row found, HA_ERR_END_OF_FILE if last row and other error + number in case of fatal error. + */ + virtual int next_row()= 0; + + /* End scanning */ + virtual int end_scan()=0; + + /* Report errors */ + virtual void print_error(int error, myf errflag); +}; + +#endif //GROUP_BY_HANDLER_INCLUDED diff --git a/sql/gstream.cc b/sql/gstream.cc new file mode 100644 index 00000000..f8e84e70 --- /dev/null +++ b/sql/gstream.cc @@ -0,0 +1,147 @@ +/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Functions to read and parse geometrical data. + NOTE: These functions assumes that the string is end \0 terminated! +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "gstream.h" +#include "m_string.h" // LEX_STRING +#include "mysqld.h" + +enum Gis_read_stream::enum_tok_types Gis_read_stream::get_next_toc_type() +{ + skip_space(); + if (m_cur >= m_limit) + return eostream; + if (my_isvar_start(&my_charset_bin, *m_cur)) + return word; + if ((*m_cur >= '0' && *m_cur <= '9') || *m_cur == '-' || *m_cur == '+') + return numeric; + if (*m_cur == '(') + return l_bra; + if (*m_cur == ')') + return r_bra; + if (*m_cur == ',') + return comma; + return unknown; +} + + +bool Gis_read_stream::lookup_next_word(LEX_STRING *res) +{ + const char *cur= m_cur; + + skip_space(); + res->str= (char*) cur; + /* The following will also test for \0 */ + if ((cur >= m_limit) || !my_isvar_start(&my_charset_bin, *cur)) + return 1; + + /* + We can't combine the following increment with my_isvar() because + my_isvar() is a macro that would cause side effects + */ + cur++; + while ((cur < m_limit) && my_isvar(&my_charset_bin, *cur)) + cur++; + + res->length= (uint32) (cur - res->str); + return 0; +} + + +bool Gis_read_stream::get_next_word(LEX_STRING *res) +{ + skip_space(); + res->str= (char*) m_cur; + /* The following will also test for \0 */ + if ((m_cur >= m_limit) || !my_isvar_start(&my_charset_bin, *m_cur)) + return 1; + + /* + We can't combine the following increment with my_isvar() because + my_isvar() is a macro that would cause side effects + */ + m_cur++; + while ((m_cur < m_limit) && my_isvar(&my_charset_bin, *m_cur)) + m_cur++; + + res->length= (uint32) (m_cur - res->str); + return 0; +} + + +/* + Read a floating point number + + NOTE: Number must start with a digit or sign. It can't start with a decimal + point +*/ + +bool Gis_read_stream::get_next_number(double *d) +{ + char *endptr; + int err; + + skip_space(); + + if ((m_cur >= m_limit) || + ((*m_cur < '0' || *m_cur > '9') && *m_cur != '-' && *m_cur != '+')) + { + set_error_msg("Numeric constant expected"); + return 1; + } + + *d = m_charset->strntod((char *)m_cur, (uint) (m_limit-m_cur), &endptr, &err); + if (err) + return 1; + if (endptr) + m_cur = endptr; + return 0; +} + + +bool Gis_read_stream::check_next_symbol(char symbol) +{ + skip_space(); + if ((m_cur >= m_limit) || (*m_cur != symbol)) + { + char buff[32]; + strmov(buff, "'?' expected"); + buff[2]= symbol; + set_error_msg(buff); + return 1; + } + m_cur++; + return 0; +} + + +/* + Remember error message. +*/ + +void Gis_read_stream::set_error_msg(const char *msg) +{ + size_t len= strlen(msg); // ok in this context + m_err_msg= (char *) my_realloc(key_memory_Gis_read_stream_err_msg, + m_err_msg, (uint) len + 1, MYF(MY_ALLOW_ZERO_PTR)); + memcpy(m_err_msg, msg, len + 1); +} diff --git a/sql/gstream.h b/sql/gstream.h new file mode 100644 index 00000000..c5c71539 --- /dev/null +++ b/sql/gstream.h @@ -0,0 +1,92 @@ +#ifndef GSTREAM_INCLUDED +#define GSTREAM_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include /* MY_ALLOW_ZERO_PTR */ +#include "m_ctype.h" /* my_charset_latin1, my_charset_bin */ + +class Gis_read_stream +{ +public: + enum enum_tok_types + { + unknown, + eostream, + word, + numeric, + l_bra, + r_bra, + comma + }; + + Gis_read_stream(CHARSET_INFO *charset, const char *buffer, int size) + :m_cur(buffer), m_limit(buffer + size), m_err_msg(NULL), m_charset(charset) + {} + Gis_read_stream(): m_cur(NullS), m_limit(NullS), m_err_msg(NullS) + {} + ~Gis_read_stream() + { + my_free(m_err_msg); + } + + enum enum_tok_types get_next_toc_type(); + bool lookup_next_word(LEX_STRING *res); + bool get_next_word(LEX_STRING *); + bool get_next_number(double *); + bool check_next_symbol(char); + + inline void skip_space() + { + while ((m_cur < m_limit) && my_isspace(&my_charset_latin1, *m_cur)) + m_cur++; + } + /* Skip next character, if match. Return 1 if no match */ + inline bool skip_char(char skip) + { + skip_space(); + if ((m_cur >= m_limit) || *m_cur != skip) + return 1; /* Didn't find char */ + m_cur++; + return 0; + } + /* Returns the next notempty character. */ + char next_symbol() + { + skip_space(); + if (m_cur >= m_limit) + return 0; /* EOL meet. */ + return *m_cur; + } + void set_error_msg(const char *msg); + + // caller should free this pointer + char *get_error_msg() + { + char *err_msg = m_err_msg; + m_err_msg= NullS; + return err_msg; + } + +protected: + const char *m_cur; + const char *m_limit; + char *m_err_msg; + CHARSET_INFO *m_charset; +}; + +#endif /* GSTREAM_INCLUDED */ diff --git a/sql/ha_handler_stats.h b/sql/ha_handler_stats.h new file mode 100644 index 00000000..726ba604 --- /dev/null +++ b/sql/ha_handler_stats.h @@ -0,0 +1,59 @@ +#ifndef HA_HANDLER_STATS_INCLUDED +#define HA_HANDLER_STATS_INCLUDED +/* + Copyright (c) 2023, MariaDB Foundation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* Definitions for parameters to do with handler-routines */ + +class ha_handler_stats +{ +public: + ulonglong pages_accessed; /* Pages accessed from page cache */ + ulonglong pages_updated; /* Pages changed in page cache */ + ulonglong pages_read_count; /* Pages read from disk */ + ulonglong pages_read_time; /* Time reading pages, in microsec. */ + ulonglong undo_records_read; + ulonglong engine_time; /* Time spent in engine in microsec */ + uint active; /* <> 0 if status has to be updated */ +#define first_stat pages_accessed +#define last_stat engine_time + inline void reset() + { + bzero((void*) this, sizeof(*this)); + } + inline void add(ha_handler_stats *stats) + { + ulonglong *to= &first_stat; + ulonglong *from= &stats->first_stat; + do + { + (*to)+= *from++; + } while (to++ != &last_stat); + } + inline bool has_stats() + { + ulonglong *to= &first_stat; + do + { + if (*to) + return 1; + } while (to++ != &last_stat); + return 0; + } +}; +#endif /* HA_HANDLER_STATS_INCLUDED */ diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc new file mode 100644 index 00000000..5eadbe7e --- /dev/null +++ b/sql/ha_partition.cc @@ -0,0 +1,12261 @@ +/* + Copyright (c) 2005, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + This handler was developed by Mikael Ronstrom for version 5.1 of MySQL. + It is an abstraction layer on top of other handlers such as MyISAM, + InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also + be handled by a storage engine. The current example of this is NDB + Cluster that has internally handled partitioning. This have benefits in + that many loops needed in the partition handler can be avoided. + + Partitioning has an inherent feature which in some cases is positive and + in some cases is negative. It splits the data into chunks. This makes + the data more manageable, queries can easily be parallelised towards the + parts and indexes are split such that there are less levels in the + index trees. The inherent disadvantage is that to use a split index + one has to scan all index parts which is ok for large queries but for + small queries it can be a disadvantage. + + Partitioning lays the foundation for more manageable databases that are + extremely large. It does also lay the foundation for more parallelism + in the execution of queries. This functionality will grow with later + versions of MySQL/MariaDB. + + The partition is setup to use table locks. It implements an partition "SHARE" + that is inserted into a hash by table name. You can use this to store + information of state that any partition handler object will be able to see + if it is using the same table. + + Please read the object definition in ha_partition.h before reading the rest + if this file. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_parse.h" // append_file_to_dir +#include "create_options.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" +#include "sql_table.h" // tablename_to_filename +#include "key.h" +#include "sql_plugin.h" +#include "sql_show.h" // append_identifier +#include "sql_admin.h" // SQL_ADMIN_MSG_TEXT_SIZE +#include "sql_select.h" +#include "ddl_log.h" + +#include "debug_sync.h" + +/* First 4 bytes in the .par file is the number of 32-bit words in the file */ +#define PAR_WORD_SIZE 4 +/* offset to the .par file checksum */ +#define PAR_CHECKSUM_OFFSET 4 +/* offset to the total number of partitions */ +#define PAR_NUM_PARTS_OFFSET 8 +/* offset to the engines array */ +#define PAR_ENGINES_OFFSET 12 +#define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \ + HA_REC_NOT_IN_SEQ | \ + HA_CAN_REPAIR | \ + HA_REUSES_FILE_NAMES) +#define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \ + HA_DUPLICATE_POS | \ + HA_CAN_INSERT_DELAYED | \ + HA_READ_BEFORE_WRITE_REMOVAL |\ + HA_CAN_TABLES_WITHOUT_ROLLBACK) + +static const char *ha_par_ext= PAR_EXT; + +/**************************************************************************** + MODULE create/delete handler object +****************************************************************************/ + +static handler *partition_create_handler(handlerton *hton, + TABLE_SHARE *share, + MEM_ROOT *mem_root); +static uint partition_flags(); +static alter_table_operations alter_table_flags(alter_table_operations flags); + + +int ha_partition::notify_tabledef_changed(LEX_CSTRING *db, + LEX_CSTRING *org_table_name, + LEX_CUSTRING *frm, + LEX_CUSTRING *version) +{ + char from_buff[FN_REFLEN + 1], from_lc_buff[FN_REFLEN + 1]; + const char *from_path, *name_buffer_ptr, *from; + int res= 0; + handler **file= m_file; + DBUG_ENTER("ha_partition::notify_tabledef_changed"); + + from= table->s->normalized_path.str; + + /* setup m_name_buffer_ptr */ + if (read_par_file(table->s->normalized_path.str)) + DBUG_RETURN(1); + + from_path= get_canonical_filename(*file, from, from_lc_buff); + name_buffer_ptr= m_name_buffer_ptr; + do + { + LEX_CSTRING table_name; + const char *table_name_ptr; + if (create_partition_name(from_buff, sizeof(from_buff), + from_path, name_buffer_ptr, + NORMAL_PART_NAME, FALSE)) + res=1; + table_name_ptr= from_buff + dirname_length(from_buff); + + lex_string_set3(&table_name, table_name_ptr, strlen(table_name_ptr)); + + if (((*file)->ht)->notify_tabledef_changed((*file)->ht, db, &table_name, + frm, version, *file)) + res=1; + name_buffer_ptr= strend(name_buffer_ptr) + 1; + } while (*(++file)); + DBUG_RETURN(res); +} + + +static int +partition_notify_tabledef_changed(handlerton *, + LEX_CSTRING *db, + LEX_CSTRING *table, + LEX_CUSTRING *frm, + LEX_CUSTRING *version, + handler *file) +{ + DBUG_ENTER("partition_notify_tabledef_changed"); + DBUG_RETURN(static_cast + (file)->notify_tabledef_changed(db, table, frm, version)); +} + + +/* + If frm_error() is called then we will use this to to find out what file + extensions exist for the storage engine. This is also used by the default + rename_table and delete_table method in handler.cc. +*/ +static const char *ha_partition_ext[]= +{ + ha_par_ext, NullS +}; + +static PSI_memory_key key_memory_Partition_share; +static PSI_memory_key key_memory_partition_sort_buffer; +static PSI_memory_key key_memory_Partition_admin; + +static PSI_memory_key key_memory_ha_partition_file; +//static PSI_memory_key key_memory_ha_partition_engine_array; +static PSI_memory_key key_memory_ha_partition_part_ids; + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key key_partition_auto_inc_mutex; +PSI_file_key key_file_ha_partition_par; + +static PSI_mutex_info all_partition_mutexes[]= +{ + { &key_partition_auto_inc_mutex, "Partition_share::auto_inc_mutex", 0} +}; +static PSI_memory_info all_partitioning_memory[]= +{ { &key_memory_Partition_share, "Partition_share", 0}, + { &key_memory_partition_sort_buffer, "partition_sort_buffer", 0}, + { &key_memory_Partition_admin, "Partition_admin", 0}, + { &key_memory_ha_partition_file, "ha_partition::file", 0}, +// { &key_memory_ha_partition_engine_array, "ha_partition::engine_array", 0}, + { &key_memory_ha_partition_part_ids, "ha_partition::part_ids", 0} }; +static PSI_file_info all_partition_file[]= +{ { &key_file_ha_partition_par, "ha_partition::parfile", 0} }; + +static void init_partition_psi_keys(void) +{ + const char* category= "partition"; + int count; + + count= array_elements(all_partitioning_memory); + mysql_memory_register(category, all_partitioning_memory, count); + count= array_elements(all_partition_mutexes); + mysql_mutex_register(category, all_partition_mutexes, count); + count= array_elements(all_partition_file); + mysql_file_register(category, all_partition_file, count); +} +#endif /* HAVE_PSI_INTERFACE */ + +static int partition_initialize(void *p) +{ + handlerton *partition_hton; + partition_hton= (handlerton *)p; + + partition_hton->db_type= DB_TYPE_PARTITION_DB; + partition_hton->create= partition_create_handler; + + partition_hton->partition_flags= partition_flags; + partition_hton->notify_tabledef_changed= partition_notify_tabledef_changed; + partition_hton->alter_table_flags= alter_table_flags; + partition_hton->flags= HTON_NOT_USER_SELECTABLE | + HTON_HIDDEN | + HTON_TEMPORARY_NOT_SUPPORTED; + partition_hton->tablefile_extensions= ha_partition_ext; + +#ifdef HAVE_PSI_INTERFACE + init_partition_psi_keys(); +#endif + return 0; +} + + +/** + Initialize and allocate space for partitions shares. + + @param num_parts Number of partitions to allocate storage for. + + @return Operation status. + @retval true Failure (out of memory). + @retval false Success. +*/ + +bool Partition_share::init(uint num_parts) +{ + DBUG_ENTER("Partition_share::init"); + auto_inc_initialized= false; + partition_name_hash_initialized= false; + next_auto_inc_val= 0; + if (partitions_share_refs.init(num_parts)) + { + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/* + Create new partition handler + + SYNOPSIS + partition_create_handler() + table Table object + + RETURN VALUE + New partition object +*/ + +static handler *partition_create_handler(handlerton *hton, + TABLE_SHARE *share, + MEM_ROOT *mem_root) +{ + ha_partition *file= new (mem_root) ha_partition(hton, share); + if (file && file->initialize_partition(mem_root)) + { + delete file; + file= 0; + } + return file; +} + +static uint partition_flags() +{ + return HA_CAN_PARTITION; +} + +static alter_table_operations alter_table_flags(alter_table_operations flags __attribute__((unused))) +{ + return (HA_PARTITION_FUNCTION_SUPPORTED | + HA_FAST_CHANGE_PARTITION); +} + +/* + Constructor method + + SYNOPSIS + ha_partition() + table Table object + + RETURN VALUE + NONE +*/ + +ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share) + :handler(hton, share) +{ + DBUG_ENTER("ha_partition::ha_partition(table)"); + ha_partition_init(); + DBUG_VOID_RETURN; +} + + +/* Initialize all partition variables */ + +void ha_partition::ha_partition_init() +{ + DBUG_ENTER("ha_partition::ha_partition_init"); + init_alloc_root(PSI_INSTRUMENT_ME, &m_mem_root, 512, 512, MYF(0)); + init_handler_variables(); + DBUG_VOID_RETURN; +} + +/* + Constructor method + + SYNOPSIS + ha_partition() + part_info Partition info + + RETURN VALUE + NONE +*/ + +ha_partition::ha_partition(handlerton *hton, partition_info *part_info) + :handler(hton, NULL) +{ + DBUG_ENTER("ha_partition::ha_partition(part_info)"); + DBUG_ASSERT(part_info); + ha_partition_init(); + m_part_info= part_info; + m_create_handler= TRUE; + m_is_sub_partitioned= m_part_info->is_sub_partitioned(); + DBUG_VOID_RETURN; +} + +/** + ha_partition constructor method used by ha_partition::clone() + + @param hton Handlerton (partition_hton) + @param share Table share object + @param part_info_arg partition_info to use + @param clone_arg ha_partition to clone + @param clme_mem_root_arg MEM_ROOT to use + + @return New partition handler +*/ + +ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share, + partition_info *part_info_arg, + ha_partition *clone_arg, + MEM_ROOT *clone_mem_root_arg) + :handler(hton, share) +{ + DBUG_ENTER("ha_partition::ha_partition(clone)"); + ha_partition_init(); + m_part_info= part_info_arg; + m_create_handler= TRUE; + m_is_sub_partitioned= m_part_info->is_sub_partitioned(); + m_is_clone_of= clone_arg; + m_clone_mem_root= clone_mem_root_arg; + part_share= clone_arg->part_share; + m_tot_parts= clone_arg->m_tot_parts; + DBUG_VOID_RETURN; +} + +/* + Initialize handler object + + SYNOPSIS + init_handler_variables() + + RETURN VALUE + NONE +*/ + +void ha_partition::init_handler_variables() +{ + active_index= MAX_KEY; + m_mode= 0; + m_open_test_lock= 0; + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; + m_connect_string= NULL; + m_file= NULL; + m_file_tot_parts= 0; + m_reorged_file= NULL; + m_new_file= NULL; + m_reorged_parts= 0; + m_added_file= NULL; + m_tot_parts= 0; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_scan_value= 2; + m_ref_length= 0; + m_part_spec.end_part= NO_CURRENT_PART_ID; + m_index_scan_type= partition_no_index_scan; + m_start_key.key= NULL; + m_start_key.length= 0; + m_myisam= FALSE; + m_innodb= FALSE; + m_extra_cache= FALSE; + m_extra_cache_size= 0; + m_extra_prepare_for_update= FALSE; + m_extra_cache_part_id= NO_CURRENT_PART_ID; + m_handler_status= handler_not_initialized; + m_part_field_array= NULL; + m_ordered_rec_buffer= NULL; + m_top_entry= NO_CURRENT_PART_ID; + m_rec_length= 0; + m_last_part= 0; + m_rec0= 0; + m_err_rec= NULL; + m_curr_key_info[0]= NULL; + m_curr_key_info[1]= NULL; + m_part_func_monotonicity_info= NON_MONOTONIC; + m_key_not_found= FALSE; + auto_increment_lock= FALSE; + auto_increment_safe_stmt_log_lock= FALSE; + /* + this allows blackhole to work properly + */ + m_num_locks= 0; + m_part_info= NULL; + m_create_handler= FALSE; + m_is_sub_partitioned= 0; + m_is_clone_of= NULL; + m_clone_mem_root= NULL; + part_share= NULL; + m_new_partitions_share_refs.empty(); + m_part_ids_sorted_by_num_of_records= NULL; + m_partitions_to_open= NULL; + + m_range_info= NULL; + m_mrr_full_buffer_size= 0; + m_mrr_new_full_buffer_size= 0; + m_mrr_full_buffer= NULL; + m_mrr_range_first= NULL; + + m_pre_calling= FALSE; + m_pre_call_use_parallel= FALSE; + + ft_first= ft_current= NULL; + bulk_access_executing= FALSE; // For future + + /* + Clear bitmaps to allow on one to call my_bitmap_free() on them at any time + */ + my_bitmap_clear(&m_bulk_insert_started); + my_bitmap_clear(&m_locked_partitions); + my_bitmap_clear(&m_partitions_to_reset); + my_bitmap_clear(&m_key_not_found_partitions); + my_bitmap_clear(&m_mrr_used_partitions); + my_bitmap_clear(&m_opened_partitions); + m_file_sample= NULL; + +#ifdef DONT_HAVE_TO_BE_INITALIZED + m_start_key.flag= 0; + m_ordered= TRUE; +#endif +} + +const char *ha_partition::real_table_type() const +{ + // we can do this since we only support a single engine type + return m_file[0]->table_type(); +} + +/* + Destructor method + + SYNOPSIS + ~ha_partition() + + RETURN VALUE + NONE +*/ + +ha_partition::~ha_partition() +{ + DBUG_ENTER("ha_partition::~ha_partition"); + if (m_new_partitions_share_refs.elements) + m_new_partitions_share_refs.delete_elements(); + if (m_file != NULL) + { + uint i; + for (i= 0; i < m_tot_parts; i++) + delete m_file[i]; + } + destroy_record_priority_queue(); + my_free(m_part_ids_sorted_by_num_of_records); + + if (m_added_file) + { + for (handler **ph= m_added_file; *ph; ph++) + delete (*ph); + } + clear_handler_file(); + free_root(&m_mem_root, MYF(0)); + + DBUG_VOID_RETURN; +} + + +/* + Initialize partition handler object + + SYNOPSIS + initialize_partition() + mem_root Allocate memory through this + + RETURN VALUE + 1 Error + 0 Success + + DESCRIPTION + + The partition handler is only a layer on top of other engines. Thus it + can't really perform anything without the underlying handlers. Thus we + add this method as part of the allocation of a handler object. + + 1) Allocation of underlying handlers + If we have access to the partition info we will allocate one handler + instance for each partition. + 2) Allocation without partition info + The cases where we don't have access to this information is when called + in preparation for delete_table and rename_table and in that case we + only need to set HA_FILE_BASED. In that case we will use the .par file + that contains information about the partitions and their engines and + the names of each partition. + 3) Table flags initialisation + We need also to set table flags for the partition handler. This is not + static since it depends on what storage engines are used as underlying + handlers. + The table flags is set in this routine to simulate the behaviour of a + normal storage engine + The flag HA_FILE_BASED will be set independent of the underlying handlers + 4) Index flags initialisation + When knowledge exists on the indexes it is also possible to initialize the + index flags. Again the index flags must be initialized by using the under- + lying handlers since this is storage engine dependent. + The flag HA_READ_ORDER will be reset for the time being to indicate no + ordered output is available from partition handler indexes. Later a merge + sort will be performed using the underlying handlers. + 5) has_transactions are calculated here. + +*/ + +bool ha_partition::initialize_partition(MEM_ROOT *mem_root) +{ + handler **file_array, *file; + ulonglong check_table_flags; + DBUG_ENTER("ha_partition::initialize_partition"); + + if (m_create_handler) + { + m_tot_parts= m_part_info->get_tot_partitions(); + DBUG_ASSERT(m_tot_parts > 0); + if (new_handlers_from_part_info(mem_root)) + DBUG_RETURN(1); + } + else if (!table_share || !table_share->normalized_path.str) + { + /* + Called with dummy table share (delete, rename and alter table). + Don't need to set-up anything. + */ + DBUG_RETURN(0); + } + else if (get_from_handler_file(table_share->normalized_path.str, + mem_root, false)) + { + my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0)); + DBUG_RETURN(1); + } + /* + We create all underlying table handlers here. We do it in this special + method to be able to report allocation errors. + + Set up has_transactions since they are called often in all kinds of places, + other parameters are calculated on demand. + Verify that all partitions have the same table_flags. + */ + check_table_flags= m_file[0]->ha_table_flags(); + file_array= m_file; + do + { + file= *file_array; + if (check_table_flags != file->ha_table_flags()) + { + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + DBUG_RETURN(1); + } + file->handler_stats= handler_stats; + } while (*(++file_array)); + m_handler_status= handler_initialized; + DBUG_RETURN(0); +} + +/**************************************************************************** + MODULE meta data changes +****************************************************************************/ +/* + Delete a table + + SYNOPSIS + delete_table() + name Full path of table name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Used to delete a table. By the time delete_table() has been called all + opened references to this table will have been closed (and your globally + shared references released. The variable name will just be the name of + the table. You will need to remove any files you have created at this + point. + + If you do not implement this, the default delete_table() is called from + handler.cc and it will delete all files with the file extensions returned + by bas_ext(). + + Called from handler.cc by delete_table and ha_create_table(). Only used + during create if the table_flag HA_DROP_BEFORE_CREATE was specified for + the storage engine. +*/ + +int ha_partition::delete_table(const char *name) +{ + DBUG_ENTER("ha_partition::delete_table"); + + DBUG_RETURN(del_ren_table(name, NULL)); +} + + +/* + Rename a table + + SYNOPSIS + rename_table() + from Full path of old table name + to Full path of new table name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Renames a table from one name to another from alter table call. + + If you do not implement this, the default rename_table() is called from + handler.cc and it will rename all files with the file extensions returned + by bas_ext(). + + Called from sql_table.cc by mysql_rename_table(). +*/ + +int ha_partition::rename_table(const char *from, const char *to) +{ + DBUG_ENTER("ha_partition::rename_table"); + + DBUG_RETURN(del_ren_table(from, to)); +} + + +/* + Create the handler file (.par-file) + + SYNOPSIS + create_partitioning_metadata() + path Path to the new frm file (without ext) + old_p Path to the old frm file (without ext) + create_info Create info generated for CREATE TABLE + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + create_partitioning_metadata is called to create any handler specific files + before opening the file with openfrm to later call ::create on the + file object. + In the partition handler this is used to store the names of partitions + and types of engines in the partitions. +*/ + +int ha_partition::create_partitioning_metadata(const char *path, + const char *old_path, + chf_create_flags action_flag) +{ + partition_element *part; + DBUG_ENTER("ha_partition::create_partitioning_metadata"); + + /* + We need to update total number of parts since we might write the handler + file as part of a partition management command + */ + if (action_flag == CHF_DELETE_FLAG || + action_flag == CHF_RENAME_FLAG) + { + char name[FN_REFLEN]; + char old_name[FN_REFLEN]; + + strxmov(name, path, ha_par_ext, NullS); + strxmov(old_name, old_path, ha_par_ext, NullS); + if ((action_flag == CHF_DELETE_FLAG && + mysql_file_delete(key_file_ha_partition_par, name, MYF(MY_WME))) || + (action_flag == CHF_RENAME_FLAG && + mysql_file_rename(key_file_ha_partition_par, old_name, name, + MYF(MY_WME)))) + { + DBUG_RETURN(TRUE); + } + } + else if (action_flag == CHF_CREATE_FLAG) + { + if (create_handler_file(path)) + { + my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0)); + DBUG_RETURN(1); + } + } + + /* m_part_info is only NULL when we failed to create a partition table */ + if (m_part_info) + { + part= m_part_info->partitions.head(); + /* part->engine_type may be 0 when we failed to create the partition */ + if (part->engine_type && + (part->engine_type)->create_partitioning_metadata && + ((part->engine_type)->create_partitioning_metadata)(path, old_path, + action_flag)) + { + my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Create a partitioned table + + SYNOPSIS + create() + name Full path of table name + table_arg Table object + create_info Create info generated for CREATE TABLE + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + create() is called to create a table. The variable name will have the name + of the table. When create() is called you do not need to worry about + opening the table. Also, the FRM file will have already been created so + adjusting create_info will not do you any good. You can overwrite the frm + file at this point if you wish to change the table definition, but there + are no methods currently provided for doing that. + + Called from handler.cc by ha_create_table(). +*/ + +int ha_partition::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + int error; + THD *thd= ha_thd(); + char name_buff[FN_REFLEN + 1], name_lc_buff[FN_REFLEN]; + char *name_buffer_ptr; + const char *path; + uint i; + List_iterator_fast part_it(m_part_info->partitions); + partition_element *part_elem; + handler **file, **abort_file; + DBUG_ENTER("ha_partition::create"); + DBUG_PRINT("enter", ("name: '%s'", name)); + + DBUG_ASSERT(!fn_frm_ext(name)); + + /* Not allowed to create temporary partitioned tables */ + if (create_info && create_info->tmp_table()) + { + my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), "CREATE TEMPORARY TABLE"); + DBUG_RETURN(TRUE); + } + /* + The following block should be removed once the table-level data directory + specification is supported by the partitioning engine (MDEV-28108). + */ + if (thd_sql_command(thd) == SQLCOM_ALTER_TABLE && create_info) + { + if (create_info->data_file_name) + { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, WARN_OPTION_IGNORED, + " table option of old schema is ignored"); + } + if (create_info->index_file_name) + { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, WARN_OPTION_IGNORED, + " table option of old schema is ignored"); + } + } + + if (get_from_handler_file(name, thd->mem_root, false)) + DBUG_RETURN(TRUE); + DBUG_ASSERT(m_file_buffer); + name_buffer_ptr= m_name_buffer_ptr; + file= m_file; + /* + Since ha_partition has HA_FILE_BASED, it must alter underlying table names + if they do not have HA_FILE_BASED and lower_case_table_names == 2. + See Bug#37402, for Mac OS X. + The appended #P#[#SP#] will remain in current case. + Using the first partitions handler, since mixing handlers is not allowed. + */ + path= get_canonical_filename(*file, name, name_lc_buff); + for (i= 0; i < m_part_info->num_parts; i++) + { + part_elem= part_it++; + if (m_is_sub_partitioned) + { + uint j; + List_iterator_fast sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->num_subparts; j++) + { + part_elem= sub_it++; + if (unlikely((error= create_partition_name(name_buff, + sizeof(name_buff), path, + name_buffer_ptr, + NORMAL_PART_NAME, FALSE)))) + goto create_error; + if (unlikely((error= set_up_table_before_create(table_arg, name_buff, + create_info, + part_elem)) || + ((error= (*file)->ha_create(name_buff, table_arg, + create_info))))) + goto create_error; + + name_buffer_ptr= strend(name_buffer_ptr) + 1; + file++; + } + } + else + { + if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff), + path, name_buffer_ptr, + NORMAL_PART_NAME, FALSE)))) + goto create_error; + if (unlikely((error= set_up_table_before_create(table_arg, name_buff, + create_info, + part_elem)) || + ((error= (*file)->ha_create(name_buff, table_arg, + create_info))))) + goto create_error; + + name_buffer_ptr= strend(name_buffer_ptr) + 1; + file++; + } + } + DBUG_RETURN(0); + +create_error: + name_buffer_ptr= m_name_buffer_ptr; + for (abort_file= file, file= m_file; file < abort_file; file++) + { + if (!create_partition_name(name_buff, sizeof(name_buff), path, + name_buffer_ptr, NORMAL_PART_NAME, FALSE)) + (void) (*file)->delete_table((const char*) name_buff); + name_buffer_ptr= strend(name_buffer_ptr) + 1; + } + handler::delete_table(name); + DBUG_RETURN(error); +} + + +/* + Drop partitions as part of ALTER TABLE of partitions + + SYNOPSIS + drop_partitions() + path Complete path of db and table name + + RETURN VALUE + >0 Failure + 0 Success + + DESCRIPTION + Use part_info object on handler object to deduce which partitions to + drop (each partition has a state attached to it) +*/ + +int ha_partition::drop_partitions(const char *path) +{ + List_iterator part_it(m_part_info->partitions); + char part_name_buff[FN_REFLEN + 1]; + uint num_parts= m_part_info->partitions.elements; + uint num_subparts= m_part_info->num_subparts; + uint i= 0; + uint name_variant; + int ret_error; + int error= 0; + DBUG_ENTER("ha_partition::drop_partitions"); + + /* + Assert that it works without HA_FILE_BASED and lower_case_table_name = 2. + We use m_file[0] as long as all partitions have the same storage engine. + */ + DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, + part_name_buff))); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_DROPPED) + { + handler *file; + /* + This part is to be dropped, meaning the part or all its subparts. + */ + name_variant= NORMAL_PART_NAME; + if (m_is_sub_partitioned) + { + List_iterator sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + part= i * num_subparts + j; + if (unlikely((ret_error= + create_subpartition_name(part_name_buff, + sizeof(part_name_buff), path, + part_elem->partition_name, + sub_elem->partition_name, + name_variant)))) + error= ret_error; + file= m_file[part]; + DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff)); + if (unlikely((ret_error= file->delete_table(part_name_buff)))) + error= ret_error; + if (unlikely(ddl_log_increment_phase(sub_elem->log_entry-> + entry_pos))) + error= 1; + } while (++j < num_subparts); + } + else + { + if ((ret_error= create_partition_name(part_name_buff, + sizeof(part_name_buff), path, + part_elem->partition_name, name_variant, TRUE))) + error= ret_error; + else + { + file= m_file[i]; + DBUG_PRINT("info", ("Drop partition %s", part_name_buff)); + if (unlikely((ret_error= file->delete_table(part_name_buff)))) + error= ret_error; + if (unlikely(ddl_log_increment_phase(part_elem->log_entry-> + entry_pos))) + error= 1; + } + } + if (part_elem->part_state == PART_IS_CHANGED) + part_elem->part_state= PART_NORMAL; + else + part_elem->part_state= PART_IS_DROPPED; + } + } while (++i < num_parts); + (void) ddl_log_sync(); + DBUG_RETURN(error); +} + + +/* + Rename partitions as part of ALTER TABLE of partitions + + SYNOPSIS + rename_partitions() + path Complete path of db and table name + + RETURN VALUE + TRUE Failure + FALSE Success + + DESCRIPTION + When reorganising partitions, adding hash partitions and coalescing + partitions it can be necessary to rename partitions while holding + an exclusive lock on the table. + Which partitions to rename is given by state of partitions found by the + partition info struct referenced from the handler object +*/ + +int ha_partition::rename_partitions(const char *path) +{ + List_iterator part_it(m_part_info->partitions); + List_iterator temp_it(m_part_info->temp_partitions); + char part_name_buff[FN_REFLEN + 1]; + char norm_name_buff[FN_REFLEN + 1]; + uint num_parts= m_part_info->partitions.elements; + uint part_count= 0; + uint num_subparts= m_part_info->num_subparts; + uint i= 0; + uint j= 0; + int error= 0; + int ret_error; + uint temp_partitions= m_part_info->temp_partitions.elements; + handler *file; + partition_element *part_elem, *sub_elem; + DBUG_ENTER("ha_partition::rename_partitions"); + + /* + Assert that it works without HA_FILE_BASED and lower_case_table_name = 2. + We use m_file[0] as long as all partitions have the same storage engine. + */ + DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, + norm_name_buff))); + + DEBUG_SYNC(ha_thd(), "before_rename_partitions"); + if (temp_partitions) + { + /* + These are the reorganised partitions that have already been copied. + We delete the partitions and log the delete by inactivating the + delete log entry in the table log. We only need to synchronise + these writes before moving to the next loop since there is no + interaction among reorganised partitions, they cannot have the + same name. + */ + do + { + part_elem= temp_it++; + if (m_is_sub_partitioned) + { + List_iterator sub_it(part_elem->subpartitions); + j= 0; + do + { + sub_elem= sub_it++; + file= m_reorged_file[part_count++]; + if (unlikely((ret_error= + create_subpartition_name(norm_name_buff, + sizeof(norm_name_buff), path, + part_elem->partition_name, + sub_elem->partition_name, + NORMAL_PART_NAME)))) + error= ret_error; + DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff)); + if (unlikely((ret_error= file->delete_table(norm_name_buff)))) + error= ret_error; + else if (unlikely(ddl_log_increment_phase(sub_elem->log_entry-> + entry_pos))) + error= 1; + else + sub_elem->log_entry= NULL; /* Indicate success */ + } while (++j < num_subparts); + } + else + { + file= m_reorged_file[part_count++]; + if (unlikely((ret_error= + create_partition_name(norm_name_buff, + sizeof(norm_name_buff), path, + part_elem->partition_name, + NORMAL_PART_NAME, TRUE)))) + error= ret_error; + else + { + DBUG_PRINT("info", ("Delete partition %s", norm_name_buff)); + if (unlikely((ret_error= file->delete_table(norm_name_buff)))) + error= ret_error; + else if (unlikely(ddl_log_increment_phase(part_elem->log_entry-> + entry_pos))) + error= 1; + else + part_elem->log_entry= NULL; /* Indicate success */ + } + } + } while (++i < temp_partitions); + (void) ddl_log_sync(); + } + i= 0; + do + { + /* + When state is PART_IS_CHANGED it means that we have created a new + TEMP partition that is to be renamed to normal partition name and + we are to delete the old partition with currently the normal name. + + We perform this operation by + 1) Delete old partition with normal partition name + 2) Signal this in table log entry + 3) Synch table log to ensure we have consistency in crashes + 4) Rename temporary partition name to normal partition name + 5) Signal this to table log entry + It is not necessary to synch the last state since a new rename + should not corrupt things if there was no temporary partition. + + The only other parts we need to cater for are new parts that + replace reorganised parts. The reorganised parts were deleted + by the code above that goes through the temp_partitions list. + Thus the synch above makes it safe to simply perform step 4 and 5 + for those entries. + */ + part_elem= part_it++; + if (part_elem->part_state == PART_IS_CHANGED || + part_elem->part_state == PART_TO_BE_DROPPED || + (part_elem->part_state == PART_IS_ADDED && temp_partitions)) + { + if (m_is_sub_partitioned) + { + List_iterator sub_it(part_elem->subpartitions); + uint part; + + j= 0; + do + { + sub_elem= sub_it++; + part= i * num_subparts + j; + if (unlikely((ret_error= + create_subpartition_name(norm_name_buff, + sizeof(norm_name_buff), path, + part_elem->partition_name, + sub_elem->partition_name, + NORMAL_PART_NAME)))) + error= ret_error; + if (part_elem->part_state == PART_IS_CHANGED) + { + file= m_reorged_file[part_count++]; + DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff)); + if (unlikely((ret_error= file->delete_table(norm_name_buff)))) + error= ret_error; + else if (unlikely(ddl_log_increment_phase(sub_elem->log_entry-> + entry_pos))) + error= 1; + (void) ddl_log_sync(); + } + file= m_new_file[part]; + if (unlikely((ret_error= + create_subpartition_name(part_name_buff, + sizeof(part_name_buff), path, + part_elem->partition_name, + sub_elem->partition_name, + TEMP_PART_NAME)))) + error= ret_error; + DBUG_PRINT("info", ("Rename subpartition from %s to %s", + part_name_buff, norm_name_buff)); + if (unlikely((ret_error= file->ha_rename_table(part_name_buff, + norm_name_buff)))) + error= ret_error; + else if (unlikely(ddl_log_increment_phase(sub_elem->log_entry-> + entry_pos))) + error= 1; + else + sub_elem->log_entry= NULL; + } while (++j < num_subparts); + } + else + { + if (unlikely((ret_error= + create_partition_name(norm_name_buff, + sizeof(norm_name_buff), path, + part_elem->partition_name, + NORMAL_PART_NAME, TRUE)) || + (ret_error= create_partition_name(part_name_buff, + sizeof(part_name_buff), + path, + part_elem-> + partition_name, + TEMP_PART_NAME, TRUE)))) + error= ret_error; + else + { + if (part_elem->part_state == PART_IS_CHANGED) + { + file= m_reorged_file[part_count++]; + DBUG_PRINT("info", ("Delete partition %s", norm_name_buff)); + if (unlikely((ret_error= file->delete_table(norm_name_buff)))) + error= ret_error; + else if (unlikely(ddl_log_increment_phase(part_elem->log_entry-> + entry_pos))) + error= 1; + (void) ddl_log_sync(); + } + file= m_new_file[i]; + DBUG_PRINT("info", ("Rename partition from %s to %s", + part_name_buff, norm_name_buff)); + if (unlikely((ret_error= file->ha_rename_table(part_name_buff, + norm_name_buff)))) + error= ret_error; + else if (unlikely(ddl_log_increment_phase(part_elem->log_entry-> + entry_pos))) + error= 1; + else + part_elem->log_entry= NULL; + } + } + } + } while (++i < num_parts); + (void) ddl_log_sync(); + DBUG_RETURN(error); +} + + +#define OPTIMIZE_PARTS 1 +#define ANALYZE_PARTS 2 +#define CHECK_PARTS 3 +#define REPAIR_PARTS 4 +#define ASSIGN_KEYCACHE_PARTS 5 +#define PRELOAD_KEYS_PARTS 6 + +static const LEX_CSTRING opt_op_name[]= +{ + { NULL, 0}, + { STRING_WITH_LEN("optimize") }, + { STRING_WITH_LEN("analyze") }, + { STRING_WITH_LEN("check") }, + { STRING_WITH_LEN("repair") }, + { STRING_WITH_LEN("assign_to_keycache") }, + { STRING_WITH_LEN("preload_keys") } +}; + + +static const LEX_CSTRING msg_warning= { STRING_WITH_LEN("warning") }; +#define msg_error error_clex_str + + +/* + Optimize table + + SYNOPSIS + optimize() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::optimize"); + + DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS)); +} + + +/* + Analyze table + + SYNOPSIS + analyze() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::analyze"); + + int result= handle_opt_partitions(thd, check_opt, ANALYZE_PARTS); + + if ((result == 0) && m_file[0] + && (m_file[0]->ha_table_flags() & HA_ONLINE_ANALYZE)) + { + /* If this is ANALYZE TABLE that will not force table definition cache + eviction, update statistics for the partition handler. */ + this->info(HA_STATUS_CONST | HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + } + + DBUG_RETURN(result); +} + + +/* + Check table + + SYNOPSIS + check() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::check"); + + DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS)); +} + + +/* + Repair table + + SYNOPSIS + repair() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::repair"); + + int res= handle_opt_partitions(thd, check_opt, REPAIR_PARTS); + DBUG_RETURN(res); +} + +/** + Assign to keycache + + @param thd Thread object + @param check_opt Check/analyze/repair/optimize options + + @return + @retval >0 Error + @retval 0 Success +*/ + +int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::assign_to_keycache"); + + DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS)); +} + + +/** + Preload to keycache + + @param thd Thread object + @param check_opt Check/analyze/repair/optimize options + + @return + @retval >0 Error + @retval 0 Success +*/ + +int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::preload_keys"); + + DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS)); +} + + +/* + Handle optimize/analyze/check/repair of one partition + + SYNOPSIS + handle_opt_part() + thd Thread object + check_opt Options + file Handler object of partition + flag Optimize/Analyze/Check/Repair flag + + RETURN VALUE + >0 Failure + 0 Success +*/ + +int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, + uint part_id, uint flag) +{ + int error; + handler *file= m_file[part_id]; + DBUG_ENTER("handle_opt_part"); + DBUG_PRINT("enter", ("flag: %u", flag)); + + if (flag == OPTIMIZE_PARTS) + error= file->ha_optimize(thd, check_opt); + else if (flag == ANALYZE_PARTS) + error= file->ha_analyze(thd, check_opt); + else if (flag == CHECK_PARTS) + { + error= file->ha_check(thd, check_opt); + if (!error || + error == HA_ADMIN_ALREADY_DONE || + error == HA_ADMIN_NOT_IMPLEMENTED) + { + if (check_opt->flags & (T_MEDIUM | T_EXTEND)) + error= check_misplaced_rows(part_id, false); + } + } + else if (flag == REPAIR_PARTS) + { + error= file->ha_repair(thd, check_opt); + if (!error || + error == HA_ADMIN_ALREADY_DONE || + error == HA_ADMIN_NOT_IMPLEMENTED) + { + if (check_opt->flags & (T_MEDIUM | T_EXTEND)) + error= check_misplaced_rows(part_id, true); + } + } + else if (flag == ASSIGN_KEYCACHE_PARTS) + error= file->assign_to_keycache(thd, check_opt); + else if (flag == PRELOAD_KEYS_PARTS) + error= file->preload_keys(thd, check_opt); + else + { + DBUG_ASSERT(FALSE); + error= 1; + } + if (error == HA_ADMIN_ALREADY_DONE) + error= 0; + DBUG_RETURN(error); +} + + +/* + print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE + (modelled after mi_check_print_msg) + TODO: move this into the handler, or rewrite mysql_admin_table. +*/ +bool print_admin_msg(THD* thd, uint len, + const LEX_CSTRING *msg_type, + const char* db_name, String &table_name, + const LEX_CSTRING *op_name, const char *fmt, ...) + ATTRIBUTE_FORMAT(printf, 7, 8); +bool print_admin_msg(THD* thd, uint len, + const LEX_CSTRING *msg_type, + const char* db_name, String &table_name, + const LEX_CSTRING *op_name, const char *fmt, ...) +{ + va_list args; + Protocol *protocol= thd->protocol; + size_t length; + size_t msg_length; + char name[NAME_LEN*2+2]; + char *msgbuf; + bool error= true; + + if (!(msgbuf= (char*) my_malloc(key_memory_Partition_admin, len, MYF(0)))) + return true; + va_start(args, fmt); + msg_length= my_vsnprintf(msgbuf, len, fmt, args); + va_end(args); + if (msg_length >= (len - 1)) + goto err; + msgbuf[len - 1]= 0; // healthy paranoia + + + if (!thd->vio_ok()) + { + sql_print_error("%s", msgbuf); + goto err; + } + + length=(size_t)(strxmov(name, db_name, ".", table_name.c_ptr_safe(), NullS) - name); + /* + TODO: switch from protocol to push_warning here. The main reason we didn't + it yet is parallel repair, which threads have no THD object accessible via + current_thd. + + Also we likely need to lock mutex here (in both cases with protocol and + push_warning). + */ + DBUG_PRINT("info",("print_admin_msg: %s, %s, %s, %s", name, op_name, + msg_type, msgbuf)); + protocol->prepare_for_resend(); + protocol->store(name, length, system_charset_info); + protocol->store(op_name, system_charset_info); + protocol->store(msg_type, system_charset_info); + protocol->store(msgbuf, msg_length, system_charset_info); + if (protocol->write()) + { + sql_print_error("Failed on my_net_write, writing to stderr instead: %s", + msgbuf); + goto err; + } + error= false; +err: + my_free(msgbuf); + return error; +} + + +/* + Handle optimize/analyze/check/repair of partitions + + SYNOPSIS + handle_opt_partitions() + thd Thread object + check_opt Options + flag Optimize/Analyze/Check/Repair flag + + RETURN VALUE + >0 Failure + 0 Success +*/ + +int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, + uint flag) +{ + List_iterator part_it(m_part_info->partitions); + uint num_parts= m_part_info->num_parts; + uint num_subparts= m_part_info->num_subparts; + uint i= 0; + int error; + DBUG_ENTER("ha_partition::handle_opt_partitions"); + DBUG_PRINT("enter", ("flag= %u", flag)); + + do + { + partition_element *part_elem= part_it++; + /* + when ALTER TABLE PARTITION ... + it should only do named partitions, otherwise all partitions + */ + if (!(thd->lex->alter_info.partition_flags & ALTER_PARTITION_ADMIN) || + part_elem->part_state == PART_ADMIN) + { + if (m_is_sub_partitioned) + { + List_iterator subpart_it(part_elem->subpartitions); + partition_element *sub_elem; + uint j= 0, part; + do + { + sub_elem= subpart_it++; + part= i * num_subparts + j; + DBUG_PRINT("info", ("Optimize subpartition %u (%s)", + part, sub_elem->partition_name)); + if (unlikely((error= handle_opt_part(thd, check_opt, part, flag)))) + { + /* print a line which partition the error belongs to */ + if (error != HA_ADMIN_NOT_IMPLEMENTED && + error != HA_ADMIN_ALREADY_DONE && + error != HA_ADMIN_TRY_ALTER && + error != HA_ERR_TABLE_READONLY) + { + print_admin_msg(thd, MYSQL_ERRMSG_SIZE, &msg_error, + table_share->db.str, table->alias, + &opt_op_name[flag], + "Subpartition %s returned error", + sub_elem->partition_name); + } + /* reset part_state for the remaining partitions */ + do + { + if (part_elem->part_state == PART_ADMIN) + part_elem->part_state= PART_NORMAL; + } while ((part_elem= part_it++)); + DBUG_RETURN(error); + } + } while (++j < num_subparts); + } + else + { + DBUG_PRINT("info", ("Optimize partition %u (%s)", i, + part_elem->partition_name)); + if (unlikely((error= handle_opt_part(thd, check_opt, i, flag)))) + { + /* print a line which partition the error belongs to */ + if (error != HA_ADMIN_NOT_IMPLEMENTED && + error != HA_ADMIN_ALREADY_DONE && + error != HA_ADMIN_TRY_ALTER) + { + print_admin_msg(thd, MYSQL_ERRMSG_SIZE, &msg_error, + table_share->db.str, table->alias, + &opt_op_name[flag], "Partition %s returned error", + part_elem->partition_name); + } + /* reset part_state for the remaining partitions */ + do + { + if (part_elem->part_state == PART_ADMIN) + part_elem->part_state= PART_NORMAL; + } while ((part_elem= part_it++)); + DBUG_RETURN(error); + } + } + part_elem->part_state= PART_NORMAL; + } + } while (++i < num_parts); + DBUG_RETURN(FALSE); +} + + +/** + @brief Check and repair the table if necessary + + @param thd Thread object + + @retval TRUE Error/Not supported + @retval FALSE Success + + @note Called if open_table_from_share fails and ::is_crashed(). +*/ + +bool ha_partition::check_and_repair(THD *thd) +{ + handler **file= m_file; + DBUG_ENTER("ha_partition::check_and_repair"); + + do + { + if ((*file)->ha_check_and_repair(thd)) + DBUG_RETURN(TRUE); + } while (*(++file)); + DBUG_RETURN(FALSE); +} + + +/** + @breif Check if the table can be automatically repaired + + @retval TRUE Can be auto repaired + @retval FALSE Cannot be auto repaired +*/ + +bool ha_partition::auto_repair(int error) const +{ + DBUG_ENTER("ha_partition::auto_repair"); + + /* + As long as we only support one storage engine per table, + we can use the first partition for this function. + */ + DBUG_RETURN(m_file[0]->auto_repair(error)); +} + + +/** + @breif Check if the table is crashed + + @retval TRUE Crashed + @retval FALSE Not crashed +*/ + +bool ha_partition::is_crashed() const +{ + handler **file= m_file; + DBUG_ENTER("ha_partition::is_crashed"); + + do + { + if ((*file)->is_crashed()) + DBUG_RETURN(TRUE); + } while (*(++file)); + DBUG_RETURN(FALSE); +} + + +/* + Prepare by creating a new partition + + SYNOPSIS + prepare_new_partition() + table Table object + create_info Create info from CREATE TABLE + file Handler object of new partition + part_name partition name + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::prepare_new_partition(TABLE *tbl, + HA_CREATE_INFO *create_info, + handler *file, const char *part_name, + partition_element *p_elem, + uint disable_non_uniq_indexes) +{ + int error; + DBUG_ENTER("prepare_new_partition"); + + /* + This call to set_up_table_before_create() is done for an alter table. + So this may be the second time around for this partition_element, + depending on how many partitions and subpartitions there were before, + and how many there are now. + The first time, on the CREATE, data_file_name and index_file_name + came from the parser. They did not have the file name attached to + the end. But if this partition is less than the total number of + previous partitions, it's data_file_name has the filename attached. + So we need to take the partition filename off if it exists. + That file name may be different from part_name, which will be + attached in append_file_to_dir(). + */ + truncate_partition_filename((char*) p_elem->data_file_name); + truncate_partition_filename((char*) p_elem->index_file_name); + + if (unlikely((error= set_up_table_before_create(tbl, part_name, create_info, + p_elem)))) + goto error_create; + + if (!(file->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION)) + tbl->s->connect_string= p_elem->connect_string; + create_info->options|= HA_CREATE_TMP_ALTER; + if ((error= file->ha_create(part_name, tbl, create_info))) + { + /* + Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY + if the table/partition already exists. + If we return that error code, then print_error would try to + get_dup_key on a non-existing partition. + So return a more reasonable error code. + */ + if (error == HA_ERR_FOUND_DUPP_KEY) + error= HA_ERR_TABLE_EXIST; + goto error_create; + } + DBUG_PRINT("info", ("partition %s created", part_name)); + if (unlikely((error= file->ha_open(tbl, part_name, m_mode, + m_open_test_lock | HA_OPEN_NO_PSI_CALL | + HA_OPEN_FOR_CREATE)))) + goto error_open; + DBUG_PRINT("info", ("partition %s opened", part_name)); + + /* + Note: if you plan to add another call that may return failure, + better to do it before external_lock() as cleanup_new_partition() + assumes that external_lock() is last call that may fail here. + Otherwise see description for cleanup_new_partition(). + */ + if (unlikely((error= file->ha_external_lock(ha_thd(), F_WRLCK)))) + goto error_external_lock; + DBUG_PRINT("info", ("partition %s external locked", part_name)); + + if (disable_non_uniq_indexes) + file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE); + + DBUG_RETURN(0); +error_external_lock: + (void) file->ha_close(); +error_open: + (void) file->delete_table(part_name); +error_create: + DBUG_RETURN(error); +} + + +/* + Cleanup by removing all created partitions after error + + SYNOPSIS + cleanup_new_partition() + part_count Number of partitions to remove + + RETURN VALUE + NONE + + DESCRIPTION + This function is called immediately after prepare_new_partition() in + case the latter fails. + + In prepare_new_partition() last call that may return failure is + external_lock(). That means if prepare_new_partition() fails, + partition does not have external lock. Thus no need to call + external_lock(F_UNLCK) here. + + TODO: + We must ensure that in the case that we get an error during the process + that we call external_lock with F_UNLCK, close the table and delete the + table in the case where we have been successful with prepare_handler. + We solve this by keeping an array of successful calls to prepare_handler + which can then be used to undo the call. +*/ + +void ha_partition::cleanup_new_partition(uint part_count) +{ + DBUG_ENTER("ha_partition::cleanup_new_partition"); + + if (m_added_file) + { + THD *thd= ha_thd(); + handler **file= m_added_file; + while ((part_count > 0) && (*file)) + { + (*file)->ha_external_unlock(thd); + (*file)->ha_close(); + + /* Leave the (*file)->delete_table(part_name) to the ddl-log */ + + file++; + part_count--; + } + m_added_file= NULL; + } + DBUG_VOID_RETURN; +} + +/* + Implement the partition changes defined by ALTER TABLE of partitions + + SYNOPSIS + change_partitions() + create_info HA_CREATE_INFO object describing all + fields and indexes in table + path Complete path of db and table name + out: copied Output parameter where number of copied + records are added + out: deleted Output parameter where number of deleted + records are added + pack_frm_data Reference to packed frm file + pack_frm_len Length of packed frm file + + RETURN VALUE + >0 Failure + 0 Success + + DESCRIPTION + Add and copy if needed a number of partitions, during this operation + no other operation is ongoing in the server. This is used by + ADD PARTITION all types as well as by REORGANIZE PARTITION. For + one-phased implementations it is used also by DROP and COALESCE + PARTITIONs. + One-phased implementation needs the new frm file, other handlers will + get zero length and a NULL reference here. +*/ + +int ha_partition::change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong * const copied, + ulonglong * const deleted, + const uchar *pack_frm_data + __attribute__((unused)), + size_t pack_frm_len + __attribute__((unused))) +{ + List_iterator part_it(m_part_info->partitions); + List_iterator t_it(m_part_info->temp_partitions); + char part_name_buff[FN_REFLEN + 1]; + uint num_parts= m_part_info->partitions.elements; + uint num_subparts= m_part_info->num_subparts; + uint i= 0; + uint num_remain_partitions, part_count, orig_count; + handler **new_file_array; + int error= 1; + bool first; + uint temp_partitions= m_part_info->temp_partitions.elements; + THD *thd= ha_thd(); + DBUG_ENTER("ha_partition::change_partitions"); + + /* + Assert that it works without HA_FILE_BASED and lower_case_table_name = 2. + We use m_file[0] as long as all partitions have the same storage engine. + */ + DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, + part_name_buff))); + m_reorged_parts= 0; + if (!m_part_info->is_sub_partitioned()) + num_subparts= 1; + + /* + Step 1: + Calculate number of reorganised partitions and allocate space for + their handler references. + */ + if (temp_partitions) + { + m_reorged_parts= temp_partitions * num_subparts; + } + else + { + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_CHANGED || + part_elem->part_state == PART_REORGED_DROPPED) + { + m_reorged_parts+= num_subparts; + } + } while (++i < num_parts); + } + if (m_reorged_parts && + !(m_reorged_file= (handler**) thd->calloc(sizeof(handler*)* + (m_reorged_parts + 1)))) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + + /* + Step 2: + Calculate number of partitions after change and allocate space for + their handler references. + */ + num_remain_partitions= 0; + if (temp_partitions) + { + num_remain_partitions= num_parts * num_subparts; + } + else + { + part_it.rewind(); + i= 0; + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_NORMAL || + part_elem->part_state == PART_TO_BE_ADDED || + part_elem->part_state == PART_CHANGED) + { + num_remain_partitions+= num_subparts; + } + } while (++i < num_parts); + } + if (!(new_file_array= ((handler**) + thd->calloc(sizeof(handler*)* + (2*(num_remain_partitions + 1)))))) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + m_added_file= &new_file_array[num_remain_partitions + 1]; + + /* + Step 3: + Fill m_reorged_file with handler references and NULL at the end + */ + if (m_reorged_parts) + { + i= 0; + part_count= 0; + first= TRUE; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_CHANGED || + part_elem->part_state == PART_REORGED_DROPPED) + { + memcpy((void*)&m_reorged_file[part_count], + (void*)&m_file[i*num_subparts], + sizeof(handler*)*num_subparts); + part_count+= num_subparts; + } + else if (first && temp_partitions && + part_elem->part_state == PART_TO_BE_ADDED) + { + /* + When doing an ALTER TABLE REORGANIZE PARTITION a number of + partitions is to be reorganised into a set of new partitions. + The reorganised partitions are in this case in the temp_partitions + list. We copy all of them in one batch and thus we only do this + until we find the first partition with state PART_TO_BE_ADDED + since this is where the new partitions go in and where the old + ones used to be. + */ + first= FALSE; + DBUG_ASSERT(((i*num_subparts) + m_reorged_parts) <= m_file_tot_parts); + memcpy((void*)m_reorged_file, &m_file[i*num_subparts], + sizeof(handler*)*m_reorged_parts); + } + } while (++i < num_parts); + } + + /* + Step 4: + Fill new_array_file with handler references. Create the handlers if + needed. + */ + i= 0; + part_count= 0; + orig_count= 0; + first= TRUE; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_NORMAL) + { + DBUG_ASSERT(orig_count + num_subparts <= m_file_tot_parts); + memcpy((void*)&new_file_array[part_count], (void*)&m_file[orig_count], + sizeof(handler*)*num_subparts); + part_count+= num_subparts; + orig_count+= num_subparts; + } + else if (part_elem->part_state == PART_CHANGED || + part_elem->part_state == PART_TO_BE_ADDED) + { + uint j= 0; + Parts_share_refs *p_share_refs; + /* + The Handler_shares for each partition's handler can be allocated + within this handler, since there will not be any more instances of the + new partitions, until the table is reopened after the ALTER succeeded. + */ + p_share_refs= new Parts_share_refs; + if (!p_share_refs) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + if (p_share_refs->init(num_subparts)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + if (m_new_partitions_share_refs.push_back(p_share_refs, thd->mem_root)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + do + { + handler **new_file= &new_file_array[part_count++]; + if (!(*new_file= + get_new_handler(table->s, + thd->mem_root, + part_elem->engine_type))) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + if ((*new_file)->set_ha_share_ref(&p_share_refs->ha_shares[j])) + { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + } while (++j < num_subparts); + if (part_elem->part_state == PART_CHANGED) + orig_count+= num_subparts; + else if (temp_partitions && first) + { + orig_count+= (num_subparts * temp_partitions); + first= FALSE; + } + } + } while (++i < num_parts); + first= FALSE; + /* + Step 5: + Create the new partitions and also open, lock and call external_lock + on them to prepare them for copy phase and also for later close + calls + */ + + /* + Before creating new partitions check whether indexes are disabled + in the partitions. + */ + + uint disable_non_uniq_indexes= indexes_are_disabled(); + + i= 0; + part_count= 0; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_ADDED || + part_elem->part_state == PART_CHANGED) + { + /* + A new partition needs to be created PART_TO_BE_ADDED means an + entirely new partition and PART_CHANGED means a changed partition + that will still exist with either more or less data in it. + */ + uint name_variant= NORMAL_PART_NAME; + if (part_elem->part_state == PART_CHANGED || + (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions)) + name_variant= TEMP_PART_NAME; + if (m_part_info->is_sub_partitioned()) + { + List_iterator sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + if (unlikely((error= + create_subpartition_name(part_name_buff, + sizeof(part_name_buff), path, + part_elem->partition_name, + sub_elem->partition_name, + name_variant)))) + { + cleanup_new_partition(part_count); + DBUG_RETURN(error); + } + part= i * num_subparts + j; + DBUG_PRINT("info", ("Add subpartition %s", part_name_buff)); + if (unlikely((error= + prepare_new_partition(table, create_info, + new_file_array[part], + (const char *)part_name_buff, + sub_elem, + disable_non_uniq_indexes)))) + { + cleanup_new_partition(part_count); + DBUG_RETURN(error); + } + + m_added_file[part_count++]= new_file_array[part]; + } while (++j < num_subparts); + } + else + { + if (unlikely((error= + create_partition_name(part_name_buff, + sizeof(part_name_buff), path, + part_elem->partition_name, + name_variant, TRUE)))) + { + cleanup_new_partition(part_count); + DBUG_RETURN(error); + } + + DBUG_PRINT("info", ("Add partition %s", part_name_buff)); + if (unlikely((error= + prepare_new_partition(table, create_info, + new_file_array[i], + (const char *)part_name_buff, + part_elem, + disable_non_uniq_indexes)))) + { + cleanup_new_partition(part_count); + DBUG_RETURN(error); + } + + m_added_file[part_count++]= new_file_array[i]; + } + } + } while (++i < num_parts); + + /* + Step 6: + State update to prepare for next write of the frm file. + */ + i= 0; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_ADDED) + part_elem->part_state= PART_IS_ADDED; + else if (part_elem->part_state == PART_CHANGED) + part_elem->part_state= PART_IS_CHANGED; + else if (part_elem->part_state == PART_REORGED_DROPPED) + part_elem->part_state= PART_TO_BE_DROPPED; + } while (++i < num_parts); + for (i= 0; i < temp_partitions; i++) + { + partition_element *part_elem= t_it++; + DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED); + part_elem->part_state= PART_TO_BE_DROPPED; + } + DBUG_ASSERT(m_new_file == 0); + m_new_file= new_file_array; + for (i= 0; i < part_count; i++) + m_added_file[i]->extra(HA_EXTRA_BEGIN_ALTER_COPY); + error= copy_partitions(copied, deleted); + for (i= 0; i < part_count; i++) + m_added_file[i]->extra(HA_EXTRA_END_ALTER_COPY); + if (unlikely(error)) + { + /* + Close and unlock the new temporary partitions. + They will later be deleted through the ddl-log. + */ + cleanup_new_partition(part_count); + m_new_file= 0; + } + DBUG_RETURN(error); +} + + +/* + Copy partitions as part of ALTER TABLE of partitions + + SYNOPSIS + copy_partitions() + out:copied Number of records copied + out:deleted Number of records deleted + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + change_partitions has done all the preparations, now it is time to + actually copy the data from the reorganised partitions to the new + partitions. +*/ + +int ha_partition::copy_partitions(ulonglong * const copied, + ulonglong * const deleted) +{ + uint reorg_part= 0; + int result= 0; + longlong func_value; + DBUG_ENTER("ha_partition::copy_partitions"); + + if (m_part_info->linear_hash_ind) + { + if (m_part_info->part_type == HASH_PARTITION) + set_linear_hash_mask(m_part_info, m_part_info->num_parts); + else + set_linear_hash_mask(m_part_info, m_part_info->num_subparts); + } + else if (m_part_info->part_type == VERSIONING_PARTITION) + { + if (m_part_info->check_constants(ha_thd(), m_part_info)) + goto init_error; + } + + while (reorg_part < m_reorged_parts) + { + handler *file= m_reorged_file[reorg_part]; + uint32 new_part; + + late_extra_cache(reorg_part); + if (unlikely((result= file->ha_rnd_init_with_error(1)))) + goto init_error; + while (TRUE) + { + if ((result= file->ha_rnd_next(m_rec0))) + { + if (result != HA_ERR_END_OF_FILE) + goto error; + /* + End-of-file reached, break out to continue with next partition or + end the copy process. + */ + break; + } + /* Found record to insert into new handler */ + if (m_part_info->get_partition_id(m_part_info, &new_part, + &func_value)) + { + /* + This record is in the original table but will not be in the new + table since it doesn't fit into any partition any longer due to + changed partitioning ranges or list values. + */ + (*deleted)++; + } + else + { + /* Copy record to new handler */ + (*copied)++; + DBUG_ASSERT(!m_new_file[new_part]->row_logging); + result= m_new_file[new_part]->ha_write_row(m_rec0); + if (result) + goto error; + } + } + late_extra_no_cache(reorg_part); + file->ha_rnd_end(); + reorg_part++; + } + DBUG_EXECUTE_IF("debug_abort_copy_partitions", + DBUG_RETURN(HA_ERR_UNSUPPORTED); ); + DBUG_RETURN(FALSE); +error: + m_reorged_file[reorg_part]->ha_rnd_end(); +init_error: + DBUG_RETURN(result); +} + +/* + Update create info as part of ALTER TABLE + + SYNOPSIS + update_create_info() + create_info Create info from ALTER TABLE + + RETURN VALUE + NONE + + DESCRIPTION + Forward this handler call to the storage engine foreach + partition handler. The data_file_name for each partition may + need to be reset if the tablespace was moved. Use a dummy + HA_CREATE_INFO structure and transfer necessary data. +*/ + +void ha_partition::update_create_info(HA_CREATE_INFO *create_info) +{ + DBUG_ENTER("ha_partition::update_create_info"); + + /* + Fix for bug#38751, some engines needs info-calls in ALTER. + Archive need this since it flushes in ::info. + HA_STATUS_AUTO is optimized so it will not always be forwarded + to all partitions, but HA_STATUS_VARIABLE will. + */ + info(HA_STATUS_VARIABLE | HA_STATUS_OPEN); + + info(HA_STATUS_AUTO); + + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + create_info->auto_increment_value= stats.auto_increment_value; + + /* + DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole + partitioned table, only its parts. + */ + my_bool from_alter= (create_info->data_file_name == (const char*) -1); + create_info->data_file_name= create_info->index_file_name= NULL; + + if (!(m_file[0]->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION)) + create_info->connect_string= null_clex_str; + + /* + We do not need to update the individual partition DATA DIRECTORY settings + since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS. + */ + if (from_alter) + DBUG_VOID_RETURN; + + /* + send Handler::update_create_info() to the storage engine for each + partition that currently has a handler object. Using a dummy + HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs. + */ + + List_iterator part_it(m_part_info->partitions); + partition_element *part_elem, *sub_elem; + uint num_subparts= m_part_info->num_subparts; + uint num_parts= (num_subparts ? m_file_tot_parts / num_subparts : + m_file_tot_parts); + HA_CREATE_INFO dummy_info; + dummy_info.init(); + + /* + Since update_create_info() can be called from mysql_prepare_alter_table() + when not all handlers are set up, we look for that condition first. + If all handlers are not available, do not call update_create_info for any. + */ + uint i, j, part; + for (i= 0; i < num_parts; i++) + { + part_elem= part_it++; + if (!part_elem) + DBUG_VOID_RETURN; + if (m_is_sub_partitioned) + { + List_iterator subpart_it(part_elem->subpartitions); + for (j= 0; j < num_subparts; j++) + { + sub_elem= subpart_it++; + if (!sub_elem) + DBUG_VOID_RETURN; + part= i * num_subparts + j; + if (part >= m_file_tot_parts || !m_file[part]) + DBUG_VOID_RETURN; + } + } + else + { + if (!m_file[i]) + DBUG_VOID_RETURN; + } + } + part_it.rewind(); + + for (i= 0; i < num_parts; i++) + { + part_elem= part_it++; + DBUG_ASSERT(part_elem); + if (m_is_sub_partitioned) + { + List_iterator subpart_it(part_elem->subpartitions); + for (j= 0; j < num_subparts; j++) + { + sub_elem= subpart_it++; + DBUG_ASSERT(sub_elem); + part= i * num_subparts + j; + DBUG_ASSERT(part < m_file_tot_parts); + DBUG_ASSERT(m_file[part]); + dummy_info.data_file_name= dummy_info.index_file_name = NULL; + m_file[part]->update_create_info(&dummy_info); + sub_elem->data_file_name = (char*) dummy_info.data_file_name; + sub_elem->index_file_name = (char*) dummy_info.index_file_name; + } + } + else + { + DBUG_ASSERT(m_file[i]); + dummy_info.data_file_name= dummy_info.index_file_name= NULL; + m_file[i]->update_create_info(&dummy_info); + part_elem->data_file_name = (char*) dummy_info.data_file_name; + part_elem->index_file_name = (char*) dummy_info.index_file_name; + } + } + DBUG_VOID_RETURN; +} + + +/** + Change the internal TABLE_SHARE pointer + + @param table_arg TABLE object + @param share New share to use + + @note Is used in error handling in delete_table. + All handlers should exist (lock_partitions should not be used) +*/ + +void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) +{ + handler **file_array; + table= table_arg; + table_share= share; + /* + m_file can be NULL when using an old cached table in DROP TABLE, when the + table just has REMOVED PARTITIONING, see Bug#42438 + */ + if (m_file) + { + file_array= m_file; + DBUG_ASSERT(*file_array); + do + { + (*file_array)->change_table_ptr(table_arg, share); + } while (*(++file_array)); + } + + if (m_added_file && m_added_file[0]) + { + /* if in middle of a drop/rename etc */ + file_array= m_added_file; + do + { + (*file_array)->change_table_ptr(table_arg, share); + } while (*(++file_array)); + } +} + + +/** + Handle delete and rename table + + @param from Full path of old table + @param to Full path of new table. May be NULL in case of delete + + @return Operation status + @retval >0 Error + @retval 0 Success + + @note Common routine to handle delete_table and rename_table. + The routine uses the partition handler file to get the + names of the partition instances. Both these routines + are called after creating the handler without table + object and thus the file is needed to discover the + names of the partitions and the underlying storage engines. +*/ + +uint ha_partition::del_ren_table(const char *from, const char *to) +{ + int save_error= 0; + int error; + char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1], + from_lc_buff[FN_REFLEN], to_lc_buff[FN_REFLEN]; + char *name_buffer_ptr; + const char *from_path; + const char *to_path= NULL; + handler **file, **abort_file; + THD *thd= ha_thd(); + DBUG_ENTER("ha_partition::del_ren_table"); + + if (get_from_handler_file(from, thd->mem_root, false)) + DBUG_RETURN(my_errno ? my_errno : ENOENT); + DBUG_ASSERT(m_file_buffer); + DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)")); + name_buffer_ptr= m_name_buffer_ptr; + + file= m_file; + /* The command should be logged with IF EXISTS if using a shared table */ + if (m_file[0]->ht->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + thd->replication_flags|= OPTION_IF_EXISTS; + + if (to == NULL) + { + /* + Delete table, start by delete the .par file. If error, break, otherwise + delete as much as possible. + */ + if (unlikely((error= handler::delete_table(from)))) + DBUG_RETURN(error); + } + + if (ha_check_if_updates_are_ignored(thd, partition_ht(), + to ? "RENAME" : "DROP")) + DBUG_RETURN(0); + + /* + Since ha_partition has HA_FILE_BASED, it must alter underlying table names + if they do not have HA_FILE_BASED and lower_case_table_names == 2. + See Bug#37402, for Mac OS X. + The appended #P#[#SP#] will remain in current case. + Using the first partitions handler, since mixing handlers is not allowed. + */ + from_path= get_canonical_filename(*file, from, from_lc_buff); + if (to != NULL) + to_path= get_canonical_filename(*file, to, to_lc_buff); + do + { + if (unlikely((error= create_partition_name(from_buff, sizeof(from_buff), + from_path, name_buffer_ptr, + NORMAL_PART_NAME, FALSE)))) + goto rename_error; + + if (to != NULL) + { // Rename branch + if (unlikely((error= create_partition_name(to_buff, sizeof(to_buff), + to_path, name_buffer_ptr, + NORMAL_PART_NAME, FALSE)))) + goto rename_error; + error= (*file)->ha_rename_table(from_buff, to_buff); + if (unlikely(error)) + goto rename_error; + } + else // delete branch + { + error= (*file)->delete_table(from_buff); + } + name_buffer_ptr= strend(name_buffer_ptr) + 1; + if (unlikely(error)) + save_error= error; + } while (*(++file)); + if (to != NULL) + { + if (unlikely((error= handler::rename_table(from, to)))) + { + /* Try to revert everything, ignore errors */ + (void) handler::rename_table(to, from); + goto rename_error; + } + } + + /* Update .par file in the handlers that supports it */ + if ((*m_file)->ht->create_partitioning_metadata) + { + error= (*m_file)->ht->create_partitioning_metadata(to, from, + to == NULL ? + CHF_DELETE_FLAG : + CHF_RENAME_FLAG); + DBUG_EXECUTE_IF("failed_create_partitioning_metadata", + { my_message_sql(ER_OUT_OF_RESOURCES,"Simulated crash",MYF(0)); + error= 1; + }); + if (error) + { + if (to) + { + (void) handler::rename_table(to, from); + (void) (*m_file)->ht->create_partitioning_metadata(from, to, + CHF_RENAME_FLAG); + goto rename_error; + } + else + save_error=error; + } + } + DBUG_RETURN(save_error); + +rename_error: + name_buffer_ptr= m_name_buffer_ptr; + for (abort_file= file, file= m_file; file < abort_file; file++) + { + /* Revert the rename, back from 'to' to the original 'from' */ + if (!create_partition_name(from_buff, sizeof(from_buff), from_path, + name_buffer_ptr, NORMAL_PART_NAME, FALSE) && + !create_partition_name(to_buff, sizeof(to_buff), to_path, + name_buffer_ptr, NORMAL_PART_NAME, FALSE)) + { + /* Ignore error here */ + (void) (*file)->ha_rename_table(to_buff, from_buff); + } + name_buffer_ptr= strend(name_buffer_ptr) + 1; + } + DBUG_RETURN(error); +} + +uint ha_partition::count_query_cache_dependant_tables(uint8 *tables_type) +{ + DBUG_ENTER("ha_partition::count_query_cache_dependant_tables"); + /* Here we rely on the fact that all tables are of the same type */ + uint8 type= m_file[0]->table_cache_type(); + (*tables_type)|= type; + DBUG_PRINT("enter", ("cnt: %u", (uint) m_tot_parts)); + /* + We need save underlying tables only for HA_CACHE_TBL_ASKTRANSACT: + HA_CACHE_TBL_NONTRANSACT - because all changes goes through partition table + HA_CACHE_TBL_NOCACHE - because will not be cached + HA_CACHE_TBL_TRANSACT - QC need to know that such type present + */ + DBUG_RETURN(type == HA_CACHE_TBL_ASKTRANSACT ? m_tot_parts : 0); +} + +my_bool ha_partition:: +reg_query_cache_dependant_table(THD *thd, + char *engine_key, uint engine_key_len, + char *cache_key, uint cache_key_len, + uint8 type, + Query_cache *cache, + Query_cache_block_table **block_table, + handler *file, + uint *n) +{ + DBUG_ENTER("ha_partition::reg_query_cache_dependant_table"); + qc_engine_callback engine_callback; + ulonglong engine_data; + /* ask undelying engine */ + if (!file->register_query_cache_table(thd, engine_key, + engine_key_len, + &engine_callback, + &engine_data)) + { + DBUG_PRINT("qcache", ("Handler does not allow caching for %.*s", + engine_key_len, engine_key)); + /* + As this can change from call to call, don't reset set + thd->lex->safe_to_cache_query + */ + thd->query_cache_is_applicable= 0; // Query can't be cached + DBUG_RETURN(TRUE); + } + (++(*block_table))->n= ++(*n); + if (!cache->insert_table(thd, cache_key_len, + cache_key, (*block_table), + (uint32) table_share->db.length, + (uint8) (cache_key_len - + table_share->table_cache_key.length), + type, + engine_callback, engine_data, + FALSE)) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +my_bool ha_partition:: +register_query_cache_dependant_tables(THD *thd, + Query_cache *cache, + Query_cache_block_table **block_table, + uint *n) +{ + char *engine_key_end, *query_cache_key_end; + uint i; + uint num_parts= m_part_info->num_parts; + uint num_subparts= m_part_info->num_subparts; + int diff_length; + List_iterator part_it(m_part_info->partitions); + char engine_key[FN_REFLEN], query_cache_key[FN_REFLEN]; + DBUG_ENTER("ha_partition::register_query_cache_dependant_tables"); + + /* see ha_partition::count_query_cache_dependant_tables */ + if (m_file[0]->table_cache_type() != HA_CACHE_TBL_ASKTRANSACT) + DBUG_RETURN(FALSE); // nothing to register + + /* prepare static part of the key */ + memcpy(engine_key, table_share->normalized_path.str, + table_share->normalized_path.length); + memcpy(query_cache_key, table_share->table_cache_key.str, + table_share->table_cache_key.length); + + diff_length= ((int) table_share->table_cache_key.length - + (int) table_share->normalized_path.length -1); + + engine_key_end= engine_key + table_share->normalized_path.length; + query_cache_key_end= query_cache_key + table_share->table_cache_key.length -1; + + engine_key_end[0]= engine_key_end[2]= query_cache_key_end[0]= + query_cache_key_end[2]= '#'; + query_cache_key_end[1]= engine_key_end[1]= 'P'; + engine_key_end+= 3; + query_cache_key_end+= 3; + + i= 0; + do + { + partition_element *part_elem= part_it++; + char *engine_pos= strmov(engine_key_end, part_elem->partition_name); + if (m_is_sub_partitioned) + { + List_iterator subpart_it(part_elem->subpartitions); + partition_element *sub_elem; + uint j= 0, part; + engine_pos[0]= engine_pos[3]= '#'; + engine_pos[1]= 'S'; + engine_pos[2]= 'P'; + engine_pos += 4; + do + { + char *end; + uint length; + sub_elem= subpart_it++; + part= i * num_subparts + j; + /* we store the end \0 as part of the key */ + end= strmov(engine_pos, sub_elem->partition_name) + 1; + length= (uint)(end - engine_key); + /* Copy the suffix and end 0 to query cache key */ + memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end)); + if (reg_query_cache_dependant_table(thd, engine_key, length, + query_cache_key, + length + diff_length, + m_file[part]->table_cache_type(), + cache, + block_table, m_file[part], + n)) + DBUG_RETURN(TRUE); + } while (++j < num_subparts); + } + else + { + char *end= engine_pos+1; // copy end \0 + uint length= (uint)(end - engine_key); + /* Copy the suffix and end 0 to query cache key */ + memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end)); + if (reg_query_cache_dependant_table(thd, engine_key, length, + query_cache_key, + length + diff_length, + m_file[i]->table_cache_type(), + cache, + block_table, m_file[i], + n)) + DBUG_RETURN(TRUE); + } + } while (++i < num_parts); + DBUG_PRINT("info", ("cnt: %u", (uint)m_tot_parts)); + DBUG_RETURN(FALSE); +} + + +/** + Set up table share object before calling create on underlying handler + + @param table Table object + @param info Create info + @param part_elem[in,out] Pointer to used partition_element, searched if NULL + + @return status + @retval TRUE Error + @retval FALSE Success + + @details + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition + 5) Engine-defined attributes on partition +*/ + +int ha_partition::set_up_table_before_create(TABLE *tbl, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *part_elem) +{ + int error= 0; + LEX_CSTRING part_name; + THD *thd= ha_thd(); + DBUG_ENTER("set_up_table_before_create"); + + DBUG_ASSERT(part_elem); + + if (!part_elem) + DBUG_RETURN(1); + tbl->s->max_rows= part_elem->part_max_rows; + tbl->s->min_rows= part_elem->part_min_rows; + part_name.str= strrchr(partition_name_with_path, FN_LIBCHAR)+1; + part_name.length= strlen(part_name.str); + if ((part_elem->index_file_name && + (error= append_file_to_dir(thd, + (const char**)&part_elem->index_file_name, + &part_name))) || + (part_elem->data_file_name && + (error= append_file_to_dir(thd, + (const char**)&part_elem->data_file_name, + &part_name)))) + { + DBUG_RETURN(error); + } + info->index_file_name= part_elem->index_file_name; + info->data_file_name= part_elem->data_file_name; + info->connect_string= part_elem->connect_string; + if (info->connect_string.length) + info->used_fields|= HA_CREATE_USED_CONNECTION; + tbl->s->connect_string= part_elem->connect_string; + if (part_elem->option_list) + tbl->s->option_list= part_elem->option_list; + if (part_elem->option_struct) + tbl->s->option_struct= part_elem->option_struct; + DBUG_RETURN(0); +} + + +/* + Add two names together + + SYNOPSIS + name_add() + out:dest Destination string + first_name First name + sec_name Second name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Routine used to add two names with '_' in between then. Service routine + to create_handler_file + Include the NULL in the count of characters since it is needed as separator + between the partition names. +*/ + +static uint name_add(char *dest, const char *first_name, const char *sec_name) +{ + return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1; +} + + +/** + Create the special .par file + + @param name Full path of table name + + @return Operation status + @retval FALSE Error code + @retval TRUE Success + + @note + Method used to create handler file with names of partitions, their + engine types and the number of partitions. +*/ + +bool ha_partition::create_handler_file(const char *name) +{ + partition_element *part_elem, *subpart_elem; + size_t i, j, part_name_len, subpart_name_len; + size_t tot_partition_words, tot_name_len, num_parts; + size_t tot_parts= 0; + size_t tot_len_words, tot_len_byte, chksum, tot_name_words; + char *name_buffer_ptr; + uchar *file_buffer, *engine_array; + bool result= TRUE; + char file_name[FN_REFLEN]; + char part_name[FN_REFLEN]; + char subpart_name[FN_REFLEN]; + File file; + List_iterator_fast part_it(m_part_info->partitions); + DBUG_ENTER("create_handler_file"); + + num_parts= m_part_info->partitions.elements; + DBUG_PRINT("enter", ("table name: %s num_parts: %zu", name, num_parts)); + tot_name_len= 0; + for (i= 0; i < num_parts; i++) + { + part_elem= part_it++; + if (part_elem->part_state != PART_NORMAL && + part_elem->part_state != PART_TO_BE_ADDED && + part_elem->part_state != PART_CHANGED) + continue; + tablename_to_filename(part_elem->partition_name, part_name, + FN_REFLEN); + part_name_len= strlen(part_name); + if (!m_is_sub_partitioned) + { + tot_name_len+= part_name_len + 1; + tot_parts++; + } + else + { + List_iterator_fast sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->num_subparts; j++) + { + subpart_elem= sub_it++; + tablename_to_filename(subpart_elem->partition_name, + subpart_name, + FN_REFLEN); + subpart_name_len= strlen(subpart_name); + tot_name_len+= part_name_len + subpart_name_len + 5; + tot_parts++; + } + } + } + /* + File format: + Length in words 4 byte + Checksum 4 byte + Total number of partitions 4 byte + Array of engine types n * 4 bytes where + n = (m_tot_parts + 3)/4 + Length of name part in bytes 4 bytes + (Names in filename format) + Name part m * 4 bytes where + m = ((length_name_part + 3)/4)*4 + + All padding bytes are zeroed + */ + tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE; + tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE; + /* 4 static words (tot words, checksum, tot partitions, name length) */ + tot_len_words= 4 + tot_partition_words + tot_name_words; + tot_len_byte= PAR_WORD_SIZE * tot_len_words; + if (!(file_buffer= (uchar *) my_malloc(key_memory_ha_partition_file, + tot_len_byte, MYF(MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + engine_array= (file_buffer + PAR_ENGINES_OFFSET); + name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE + + PAR_WORD_SIZE); + part_it.rewind(); + for (i= 0; i < num_parts; i++) + { + part_elem= part_it++; + if (part_elem->part_state != PART_NORMAL && + part_elem->part_state != PART_TO_BE_ADDED && + part_elem->part_state != PART_CHANGED) + continue; + if (!m_is_sub_partitioned) + { + tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN); + name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1; + *engine_array= (uchar) ha_legacy_type(part_elem->engine_type); + DBUG_PRINT("info", ("engine: %u", *engine_array)); + engine_array++; + } + else + { + List_iterator_fast sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->num_subparts; j++) + { + subpart_elem= sub_it++; + tablename_to_filename(part_elem->partition_name, part_name, + FN_REFLEN); + tablename_to_filename(subpart_elem->partition_name, subpart_name, + FN_REFLEN); + name_buffer_ptr+= name_add(name_buffer_ptr, + part_name, + subpart_name); + *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type); + DBUG_PRINT("info", ("engine: %u", *engine_array)); + engine_array++; + } + } + } + chksum= 0; + int4store(file_buffer, tot_len_words); + int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts); + int4store(file_buffer + PAR_ENGINES_OFFSET + + (tot_partition_words * PAR_WORD_SIZE), + tot_name_len); + for (i= 0; i < tot_len_words; i++) + chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i); + int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum); + /* + Add .par extension to the file name. + Create and write and close file + to be used at open, delete_table and rename_table + */ + fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT); + if ((file= mysql_file_create(key_file_ha_partition_par, + file_name, CREATE_MODE, O_RDWR | O_TRUNC, + MYF(MY_WME))) >= 0) + { + result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte, + MYF(MY_WME | MY_NABP)) != 0; + + /* Write connection information (for federatedx engine) */ + part_it.rewind(); + for (i= 0; i < num_parts && !result; i++) + { + uchar buffer[4]; + part_elem= part_it++; + size_t length= part_elem->connect_string.length; + int4store(buffer, length); + if (my_write(file, buffer, 4, MYF(MY_WME | MY_NABP)) || + my_write(file, (uchar *) part_elem->connect_string.str, length, + MYF(MY_WME | MY_NABP))) + { + result= TRUE; + break; + } + } + (void) mysql_file_close(file, MYF(0)); + if (result) + mysql_file_delete(key_file_ha_partition_par, file_name, MYF(MY_WME)); + } + else + result= TRUE; + my_free(file_buffer); + DBUG_RETURN(result); +} + + +/** + Clear handler variables and free some memory +*/ + +void ha_partition::clear_handler_file() +{ + if (m_engine_array) + plugin_unlock_list(NULL, m_engine_array, m_tot_parts); + free_root(&m_mem_root, MYF(MY_KEEP_PREALLOC)); + m_file_buffer= NULL; + m_engine_array= NULL; + m_connect_string= NULL; +} + + +/** + Create underlying handler objects + + @param mem_root Allocate memory through this + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +bool ha_partition::create_handlers(MEM_ROOT *mem_root) +{ + uint i; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + handlerton *hton0; + DBUG_ENTER("create_handlers"); + + if (!(m_file= (handler **) alloc_root(mem_root, alloc_len))) + DBUG_RETURN(TRUE); + m_file_tot_parts= m_tot_parts; + bzero((char*) m_file, alloc_len); + for (i= 0; i < m_tot_parts; i++) + { + handlerton *hton= plugin_data(m_engine_array[i], handlerton*); + if (!(m_file[i]= get_new_handler(table_share, mem_root, hton))) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("engine_type: %u", hton->db_type)); + } + /* For the moment we only support partition over the same table engine */ + hton0= plugin_data(m_engine_array[0], handlerton*); + if (hton0 == myisam_hton) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + /* INNODB may not be compiled in... */ + else if (ha_legacy_type(hton0) == DB_TYPE_INNODB) + { + DBUG_PRINT("info", ("InnoDB")); + m_innodb= TRUE; + } + DBUG_RETURN(FALSE); +} + + +/* + Create underlying handler objects from partition info + + SYNOPSIS + new_handlers_from_part_info() + mem_root Allocate memory through this + + RETURN VALUE + TRUE Error + FALSE Success +*/ + +bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root) +{ + uint i, j, part_count; + partition_element *part_elem; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + List_iterator_fast part_it(m_part_info->partitions); + DBUG_ENTER("ha_partition::new_handlers_from_part_info"); + + if (!(m_file= (handler **) alloc_root(mem_root, alloc_len))) + goto error; + + m_file_tot_parts= m_tot_parts; + bzero((char*) m_file, alloc_len); + DBUG_ASSERT(m_part_info->num_parts > 0); + + i= 0; + part_count= 0; + /* + Don't know the size of the underlying storage engine, invent a number of + bytes allocated for error message if allocation fails + */ + do + { + part_elem= part_it++; + if (m_is_sub_partitioned) + { + for (j= 0; j < m_part_info->num_subparts; j++) + { + if (!(m_file[part_count++]= get_new_handler(table_share, mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", + (uint) ha_legacy_type(part_elem->engine_type))); + } + } + else + { + if (!(m_file[part_count++]= get_new_handler(table_share, mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", + (uint) ha_legacy_type(part_elem->engine_type))); + } + } while (++i < m_part_info->num_parts); + if (part_elem->engine_type == myisam_hton) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + DBUG_RETURN(FALSE); +error: + DBUG_RETURN(TRUE); +} + + +/** + Read the .par file to get the partitions engines and names + + @param name Name of table file (without extension) + + @return Operation status + @retval 0 success + @retval 1 no par file + @retval # other error + + @note On success, m_file_buffer is allocated and must be + freed by the caller. m_name_buffer_ptr and m_tot_parts is also set. +*/ + +int ha_partition::read_par_file(const char *name) +{ + char buff[FN_REFLEN]; + uchar *tot_name_len_offset; + File file; + uchar *file_buffer; + uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum; + DBUG_ENTER("ha_partition::read_par_file"); + DBUG_PRINT("enter", ("table name: '%s'", name)); + + if (m_file_buffer) + DBUG_RETURN(0); + fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT); + + /* Following could be done with mysql_file_stat to read in whole file */ + if ((file= mysql_file_open(key_file_ha_partition_par, + buff, O_RDONLY | O_SHARE, MYF(0))) < 0) + DBUG_RETURN(1); + if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP))) + goto err1; + len_words= uint4korr(buff); + len_bytes= PAR_WORD_SIZE * len_words; + if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) + goto err1; + if (!(file_buffer= (uchar*) alloc_root(&m_mem_root, len_bytes))) + goto err1; + if (mysql_file_read(file, file_buffer, len_bytes, MYF(MY_NABP))) + goto err2; + + chksum= 0; + for (i= 0; i < len_words; i++) + chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i); + if (chksum) + goto err2; + m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET); + DBUG_PRINT("info", ("No of parts: %u", m_tot_parts)); + tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE; + + tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET + + PAR_WORD_SIZE * tot_partition_words; + tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) / + PAR_WORD_SIZE; + /* + Verify the total length = tot size word, checksum word, num parts word + + engines array + name length word + name array. + */ + if (len_words != (tot_partition_words + tot_name_words + 4)) + goto err2; + m_file_buffer= file_buffer; // Will be freed in clear_handler_file() + m_name_buffer_ptr= (char*) (tot_name_len_offset + PAR_WORD_SIZE); + + if (!(m_connect_string= (LEX_CSTRING*) + alloc_root(&m_mem_root, m_tot_parts * sizeof(LEX_CSTRING)))) + goto err2; + bzero(m_connect_string, m_tot_parts * sizeof(LEX_CSTRING)); + + /* Read connection arguments (for federated X engine) */ + for (i= 0; i < m_tot_parts; i++) + { + LEX_CSTRING connect_string; + uchar buffer[4]; + char *tmp; + if (my_read(file, buffer, 4, MYF(MY_NABP))) + { + /* No extra options; Probably not a federatedx engine */ + break; + } + connect_string.length= uint4korr(buffer); + connect_string.str= tmp= (char*) alloc_root(&m_mem_root, + connect_string.length+1); + if (my_read(file, (uchar*) connect_string.str, connect_string.length, + MYF(MY_NABP))) + break; + tmp[connect_string.length]= 0; + m_connect_string[i]= connect_string; + } + + (void) mysql_file_close(file, MYF(0)); + DBUG_RETURN(0); + +err2: +err1: + (void) mysql_file_close(file, MYF(0)); + DBUG_RETURN(2); +} + + +/** + Setup m_engine_array + + @param mem_root MEM_ROOT to use for allocating new handlers + + @return Operation status + @retval false Success + @retval true Failure +*/ + +bool ha_partition::setup_engine_array(MEM_ROOT *mem_root, + handlerton* first_engine) +{ + uint i; + uchar *buff; + handlerton **engine_array; + enum legacy_db_type db_type, first_db_type; + + DBUG_ASSERT(!m_file); + DBUG_ENTER("ha_partition::setup_engine_array"); + engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*)); + if (!engine_array) + DBUG_RETURN(true); + + buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET); + + first_db_type= (enum legacy_db_type) buff[0]; + if (!(m_engine_array= (plugin_ref*) + alloc_root(&m_mem_root, m_tot_parts * sizeof(plugin_ref)))) + goto err; + + for (i= 0; i < m_tot_parts; i++) + { + db_type= (enum legacy_db_type) buff[i]; + if (db_type != first_db_type) + { + DBUG_PRINT("error", ("partition %u engine %d is not same as " + "first partition %d", i, db_type, + (int) first_db_type)); + DBUG_ASSERT(0); + clear_handler_file(); + goto err; + } + m_engine_array[i]= ha_lock_engine(NULL, first_engine); + if (!m_engine_array[i]) + { + clear_handler_file(); + goto err; + } + } + + my_afree(engine_array); + + if (create_handlers(mem_root)) + { + clear_handler_file(); + DBUG_RETURN(true); + } + + DBUG_RETURN(false); + +err: + my_afree(engine_array); + DBUG_RETURN(true); +} + + +handlerton *ha_partition::get_def_part_engine(const char *name) +{ + if (table_share) + { + if (table_share->default_part_plugin) + return plugin_data(table_share->default_part_plugin, handlerton *); + } + else + { + // DROP TABLE, for example + char buff[FN_REFLEN]; + File file; + MY_STAT state; + uchar *frm_image= 0; + handlerton *hton= 0; + bool use_legacy_type= false; + + fn_format(buff, name, "", reg_ext, MY_APPEND_EXT); + + file= mysql_file_open(key_file_frm, buff, O_RDONLY | O_SHARE, MYF(0)); + if (file < 0) + return NULL; + + if (mysql_file_fstat(file, &state, MYF(MY_WME))) + goto err; + if (state.st_size <= 64) + goto err; + if (!(frm_image= (uchar*)my_malloc(key_memory_Partition_share, + state.st_size, MYF(MY_WME)))) + goto err; + if (mysql_file_read(file, frm_image, state.st_size, MYF(MY_NABP))) + goto err; + + if (frm_image[64] != '/') + { + const uchar *e2= frm_image + 64; + const uchar *e2end = e2 + uint2korr(frm_image + 4); + if (e2end > frm_image + state.st_size) + goto err; + while (e2 + 3 < e2end) + { + uchar type= *e2++; + size_t length= extra2_read_len(&e2, e2end); + if (!length) + goto err; + if (type == EXTRA2_DEFAULT_PART_ENGINE) + { + LEX_CSTRING name= { (char*)e2, length }; + plugin_ref plugin= ha_resolve_by_name(ha_thd(), &name, false); + if (plugin) + hton= plugin_data(plugin, handlerton *); + goto err; + } + e2+= length; + } + } + use_legacy_type= true; +err: + my_free(frm_image); + mysql_file_close(file, MYF(0)); + if (!use_legacy_type) + return hton; + } + + return ha_resolve_by_legacy_type(ha_thd(), + (enum legacy_db_type)m_file_buffer[PAR_ENGINES_OFFSET]); +} + + +/** + Get info about partition engines and their names from the .par file + + @param name Full path of table name + @param mem_root Allocate memory through this + @param is_clone If it is a clone, don't create new handlers + + @return Operation status + @retval true Error + @retval false Success + + @note Open handler file to get partition names, engine types and number of + partitions. +*/ + +bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root, + bool is_clone) +{ + int error; + DBUG_ENTER("ha_partition::get_from_handler_file"); + DBUG_PRINT("enter", ("table name: '%s'", name)); + + if (m_file_buffer) + DBUG_RETURN(false); + + if ((error= read_par_file(name))) + { + if (error != 1 || is_clone || re_create_par_file(name)) + DBUG_RETURN(true); + if (read_par_file(name)) // Test file + DBUG_RETURN(true); + } + + handlerton *default_engine= get_def_part_engine(name); + if (!default_engine) + DBUG_RETURN(true); + + if (!is_clone && setup_engine_array(mem_root, default_engine)) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +/* + Create .par file from SQL syntax. + + This is only used with partitioned tables from MySQL 5.6 or 5.7 + which do not have a .par file. +*/ + +bool ha_partition::re_create_par_file(const char *name) +{ + THD *thd= current_thd; + TABLE table; + TABLE_SHARE *share= table_share; + Query_arena *backup_stmt_arena_ptr= thd->stmt_arena; + Query_arena backup_arena; + uint8 save_context_analysis_only= thd->lex->context_analysis_only; + bool work_part_info_used; + bool tmp; + DBUG_ENTER("ha_partition:re_create_par_file"); + + /* Share can be NULL in case of delete of non existing table */ + if (!share || + !(share->mysql_version >= 50600 && share->mysql_version <= 50799)) + DBUG_RETURN(1); + + bzero((char*) &table, sizeof(table)); + table.in_use= thd; + table.s= share; + table.file= this; + init_sql_alloc(key_memory_TABLE, &table.mem_root, TABLE_ALLOC_BLOCK_SIZE, + 0, MYF(0)); + + Query_arena part_func_arena(&table.mem_root, + Query_arena::STMT_INITIALIZED); + thd->set_n_backup_active_arena(&part_func_arena, &backup_arena); + thd->stmt_arena= &part_func_arena; + + tmp= mysql_unpack_partition(thd, share->partition_info_str, + share->partition_info_str_len, + &table, 0, + plugin_hton(share->default_part_plugin), + &work_part_info_used); + + if (!tmp && m_part_info->partitions.elements == 0) + { + tmp= m_part_info->set_up_defaults_for_partitioning(thd, this, + (HA_CREATE_INFO*) 0, + 0); + if (m_part_info->partitions.elements == 0) + { + /* We did not succed in creating default partitions */ + tmp= 1; + } + } + + thd->stmt_arena= backup_stmt_arena_ptr; + thd->restore_active_arena(&part_func_arena, &backup_arena); + if (!tmp) + { + tmp= create_handler_file(name); + } + + if (table.part_info) + free_items(table.part_info->item_free_list); + thd->lex->context_analysis_only= save_context_analysis_only; + if (table.expr_arena) + table.expr_arena->free_items(); + free_root(&table.mem_root, MYF(0)); + DBUG_RETURN(tmp); +} + + + +/**************************************************************************** + MODULE open/close object +****************************************************************************/ + +/** + Get the partition name. + + @param part Struct containing name and length + @param[out] length Length of the name + + @return Partition name +*/ + +static uchar *get_part_name(PART_NAME_DEF *part, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= part->length; + return part->partition_name; +} + + +/** + Insert a partition name in the partition_name_hash. + + @param name Name of partition + @param part_id Partition id (number) + @param is_subpart Set if the name belongs to a subpartition + + @return Operation status + @retval true Failure + @retval false Success +*/ + +bool ha_partition::insert_partition_name_in_hash(const char *name, uint part_id, + bool is_subpart) +{ + PART_NAME_DEF *part_def; + uchar *part_name; + size_t part_name_length; + DBUG_ENTER("ha_partition::insert_partition_name_in_hash"); + /* + Calculate and store the length here, to avoid doing it when + searching the hash. + */ + part_name_length= strlen(name); + /* + Must use memory that lives as long as table_share. + Freed in the Partition_share destructor. + Since we use my_multi_malloc, then my_free(part_def) will also free + part_name, as a part of my_hash_free. + */ + if (!my_multi_malloc(key_memory_Partition_share, MY_WME, + &part_def, sizeof(PART_NAME_DEF), + &part_name, part_name_length + 1, + NULL)) + DBUG_RETURN(true); + memcpy(part_name, name, part_name_length + 1); + part_def->partition_name= part_name; + part_def->length= (uint)part_name_length; + part_def->part_id= part_id; + part_def->is_subpart= is_subpart; + if (my_hash_insert(&part_share->partition_name_hash, (uchar *) part_def)) + { + my_free(part_def); + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/** + Populate the partition_name_hash in part_share. +*/ + +bool ha_partition::populate_partition_name_hash() +{ + List_iterator part_it(m_part_info->partitions); + uint num_parts= m_part_info->num_parts; + uint num_subparts= m_is_sub_partitioned ? m_part_info->num_subparts : 1; + uint tot_names; + uint i= 0; + DBUG_ASSERT(part_share); + + DBUG_ENTER("ha_partition::populate_partition_name_hash"); + + /* + partition_name_hash is only set once and never changed + -> OK to check without locking. + */ + + if (part_share->partition_name_hash_initialized) + DBUG_RETURN(false); + lock_shared_ha_data(); + if (part_share->partition_name_hash_initialized) + { + unlock_shared_ha_data(); + DBUG_RETURN(false); + } + tot_names= m_is_sub_partitioned ? m_tot_parts + num_parts : num_parts; + if (my_hash_init(key_memory_Partition_share, + &part_share->partition_name_hash, system_charset_info, + tot_names, 0, 0, (my_hash_get_key) get_part_name, my_free, + HASH_UNIQUE)) + { + unlock_shared_ha_data(); + DBUG_RETURN(TRUE); + } + + do + { + partition_element *part_elem= part_it++; + DBUG_ASSERT(part_elem->part_state == PART_NORMAL); + if (part_elem->part_state == PART_NORMAL) + { + if (insert_partition_name_in_hash(part_elem->partition_name, + i * num_subparts, false)) + goto err; + if (m_is_sub_partitioned) + { + List_iterator + subpart_it(part_elem->subpartitions); + partition_element *sub_elem; + uint j= 0; + do + { + sub_elem= subpart_it++; + if (insert_partition_name_in_hash(sub_elem->partition_name, + i * num_subparts + j, true)) + goto err; + + } while (++j < num_subparts); + } + } + } while (++i < num_parts); + + part_share->partition_name_hash_initialized= true; + unlock_shared_ha_data(); + + DBUG_RETURN(FALSE); +err: + my_hash_free(&part_share->partition_name_hash); + unlock_shared_ha_data(); + + DBUG_RETURN(TRUE); +} + + +/** + Set Handler_share pointer and allocate Handler_share pointers + for each partition and set those. + + @param ha_share_arg Where to store/retrieve the Partitioning_share pointer + to be shared by all instances of the same table. + + @return Operation status + @retval true Failure + @retval false Success +*/ + +bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg) +{ + Handler_share **ha_shares; + uint i; + DBUG_ENTER("ha_partition::set_ha_share_ref"); + + DBUG_ASSERT(!part_share); + DBUG_ASSERT(table_share); + DBUG_ASSERT(!m_is_clone_of); + DBUG_ASSERT(m_tot_parts); + if (handler::set_ha_share_ref(ha_share_arg)) + DBUG_RETURN(true); + if (!(part_share= get_share())) + DBUG_RETURN(true); + DBUG_ASSERT(part_share->partitions_share_refs.num_parts >= m_tot_parts); + ha_shares= part_share->partitions_share_refs.ha_shares; + for (i= 0; i < m_tot_parts; i++) + { + if (m_file[i]->set_ha_share_ref(&ha_shares[i])) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/** + Get the PARTITION_SHARE for the table. + + @return Operation status + @retval true Error + @retval false Success + + @note Gets or initializes the Partition_share object used by partitioning. + The Partition_share is used for handling the auto_increment etc. +*/ + +Partition_share *ha_partition::get_share() +{ + Partition_share *tmp_share; + DBUG_ENTER("ha_partition::get_share"); + DBUG_ASSERT(table_share); + + lock_shared_ha_data(); + if (!(tmp_share= static_cast(get_ha_share_ptr()))) + { + tmp_share= new Partition_share; + if (!tmp_share) + goto err; + if (tmp_share->init(m_tot_parts)) + { + delete tmp_share; + tmp_share= NULL; + goto err; + } + set_ha_share_ptr(static_cast(tmp_share)); + } +err: + unlock_shared_ha_data(); + DBUG_RETURN(tmp_share); +} + + + +/** + Helper function for freeing all internal bitmaps. +*/ + +void ha_partition::free_partition_bitmaps() +{ + /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */ + my_bitmap_free(&m_bulk_insert_started); + my_bitmap_free(&m_locked_partitions); + my_bitmap_free(&m_partitions_to_reset); + my_bitmap_free(&m_key_not_found_partitions); + my_bitmap_free(&m_opened_partitions); + my_bitmap_free(&m_mrr_used_partitions); +} + + +/** + Helper function for initializing all internal bitmaps. + + Note: + All bitmaps, including partially allocated, are freed in + free_partion_bitmaps() +*/ + +bool ha_partition::init_partition_bitmaps() +{ + DBUG_ENTER("ha_partition::init_partition_bitmaps"); + + /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */ + if (my_bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1)) + DBUG_RETURN(true); + + /* Initialize the bitmap we use to keep track of locked partitions */ + if (my_bitmap_init(&m_locked_partitions, NULL, m_tot_parts)) + DBUG_RETURN(true); + + /* + Initialize the bitmap we use to keep track of partitions which may have + something to reset in ha_reset(). + */ + if (my_bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts)) + DBUG_RETURN(true); + + /* + Initialize the bitmap we use to keep track of partitions which returned + HA_ERR_KEY_NOT_FOUND from index_read_map. + */ + if (my_bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts)) + DBUG_RETURN(true); + + if (my_bitmap_init(&m_mrr_used_partitions, NULL, m_tot_parts)) + DBUG_RETURN(true); + + if (my_bitmap_init(&m_opened_partitions, NULL, m_tot_parts)) + DBUG_RETURN(true); + + m_file_sample= NULL; + + /* Initialize the bitmap for read/lock_partitions */ + if (!m_is_clone_of) + { + DBUG_ASSERT(!m_clone_mem_root); + if (m_part_info->set_partition_bitmaps(NULL)) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/* + Open handler object +SYNOPSIS + open() + name Full path of table name + mode Open mode flags + test_if_locked ? + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Used for opening tables. The name will be the name of the file. + A table is opened when it needs to be opened. For instance + when a request comes in for a select on the table (tables are not + open and closed for each request, they are cached). + + Called from handler.cc by handler::ha_open(). The server opens all tables + by calling ha_open() which then calls the handler specific open(). +*/ + +int ha_partition::open(const char *name, int mode, uint test_if_locked) +{ + int error= HA_ERR_INITIALIZATION; + handler **file; + char name_buff[FN_REFLEN + 1]; + ulonglong check_table_flags; + DBUG_ENTER("ha_partition::open"); + + DBUG_ASSERT(table->s == table_share); + ref_length= 0; + m_mode= mode; + m_open_test_lock= test_if_locked; + m_part_field_array= m_part_info->full_part_field_array; + if (get_from_handler_file(name, &table->mem_root, MY_TEST(m_is_clone_of))) + DBUG_RETURN(error); + if (populate_partition_name_hash()) + { + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + m_start_key.length= 0; + m_rec0= table->record[0]; + m_rec_length= table_share->reclength; + if (!m_part_ids_sorted_by_num_of_records) + { + if (!(m_part_ids_sorted_by_num_of_records= + (uint32*) my_malloc(key_memory_ha_partition_part_ids, + m_tot_parts * sizeof(uint32), MYF(MY_WME)))) + DBUG_RETURN(error); + uint32 i; + /* Initialize it with all partition ids. */ + for (i= 0; i < m_tot_parts; i++) + m_part_ids_sorted_by_num_of_records[i]= i; + } + + if (init_partition_bitmaps()) + goto err_alloc; + + if (!MY_TEST(m_is_clone_of) && + unlikely((error= + m_part_info->set_partition_bitmaps(m_partitions_to_open)))) + goto err_alloc; + + /* Allocate memory used with MMR */ + if (!(m_range_info= (void **) + my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &m_range_info, sizeof(range_id_t) * m_tot_parts, + &m_stock_range_seq, sizeof(uint) * m_tot_parts, + &m_mrr_buffer, sizeof(HANDLER_BUFFER) * m_tot_parts, + &m_mrr_buffer_size, sizeof(uint) * m_tot_parts, + &m_part_mrr_range_length, sizeof(uint) * m_tot_parts, + &m_part_mrr_range_first, + sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts, + &m_part_mrr_range_current, + sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts, + &m_partition_part_key_multi_range_hld, + sizeof(PARTITION_PART_KEY_MULTI_RANGE_HLD) * m_tot_parts, + NullS))) + goto err_alloc; + + bzero(m_mrr_buffer, m_tot_parts * sizeof(HANDLER_BUFFER)); + bzero(m_part_mrr_range_first, + sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts); + + if (m_is_clone_of) + { + uint i, alloc_len; + char *name_buffer_ptr; + DBUG_ASSERT(m_clone_mem_root); + /* Allocate an array of handler pointers for the partitions handlers. */ + alloc_len= (m_tot_parts + 1) * sizeof(handler*); + if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len))) + { + error= HA_ERR_INITIALIZATION; + goto err_alloc; + } + memset(m_file, 0, alloc_len); + name_buffer_ptr= m_name_buffer_ptr; + /* + Populate them by cloning the original partitions. This also opens them. + Note that file->ref is allocated too. + */ + file= m_is_clone_of->m_file; + for (i= 0; i < m_tot_parts; i++) + { + if (!bitmap_is_set(&m_is_clone_of->m_opened_partitions, i)) + { + /* Here we should just create the handler instance, not open it. */ + if (!(m_file[i]= get_new_handler(table->s, m_clone_mem_root, + file[i]->ht))) + { + error= HA_ERR_INITIALIZATION; + file= &m_file[i]; + goto err_handler; + } + if (m_file[i]->set_ha_share_ref(file[i]->ha_share)) + { + error= HA_ERR_INITIALIZATION; + goto err_handler; + } + continue; + } + + if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff), + name, name_buffer_ptr, + NORMAL_PART_NAME, FALSE)))) + goto err_handler; + /* ::clone() will also set ha_share from the original. */ + if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root))) + { + error= HA_ERR_INITIALIZATION; + file= &m_file[i]; + goto err_handler; + } + if (!m_file_sample) + m_file_sample= m_file[i]; + name_buffer_ptr+= strlen(name_buffer_ptr) + 1; + bitmap_set_bit(&m_opened_partitions, i); + } + } + else + { + check_insert_or_replace_autoincrement(); + if (unlikely((error= open_read_partitions(name_buff, sizeof(name_buff))))) + goto err_handler; + m_num_locks= m_file_sample->lock_count(); + } + /* + We want to know the upper bound for locks, to allocate enough memory. + There is no performance lost if we simply return in lock_count() the + maximum number locks needed, only some minor over allocation of memory + in get_lock_data(). + */ + m_num_locks*= m_tot_parts; + + file= m_file; + ref_length= get_open_file_sample()->ref_length; + check_table_flags= ((get_open_file_sample()->ha_table_flags() & + ~(PARTITION_DISABLED_TABLE_FLAGS)) | + (PARTITION_ENABLED_TABLE_FLAGS)); + while (*(++file)) + { + if (!bitmap_is_set(&m_opened_partitions, (uint)(file - m_file))) + continue; + /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */ + set_if_bigger(ref_length, ((*file)->ref_length)); + /* + Verify that all partitions have the same set of table flags. + Mask all flags that partitioning enables/disables. + */ + if (check_table_flags != (((*file)->ha_table_flags() & + ~(PARTITION_DISABLED_TABLE_FLAGS)) | + (PARTITION_ENABLED_TABLE_FLAGS))) + { + error= HA_ERR_INITIALIZATION; + /* set file to last handler, so all of them are closed */ + file= &m_file[m_tot_parts - 1]; + goto err_handler; + } + } + key_used_on_scan= get_open_file_sample()->key_used_on_scan; + implicit_emptied= get_open_file_sample()->implicit_emptied; + /* + Add 2 bytes for partition id in position ref length. + ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS + */ + ref_length+= PARTITION_BYTES_IN_POS; + m_ref_length= ref_length; + + /* + Release buffer read from .par file. It will not be reused again after + being opened once. + */ + clear_handler_file(); + + DBUG_ASSERT(part_share); + lock_shared_ha_data(); + /* Protect against cloned file, for which we don't need engine name */ + if (m_file[0]) + part_share->partition_engine_name= real_table_type(); + else + part_share->partition_engine_name= 0; // Checked in ha_table_exists() + unlock_shared_ha_data(); + + /* + Some handlers update statistics as part of the open call. This will in + some cases corrupt the statistics of the partition handler and thus + to ensure we have correct statistics we call info from open after + calling open on all individual handlers. + */ + m_handler_status= handler_opened; + if (m_part_info->part_expr) + m_part_func_monotonicity_info= + m_part_info->part_expr->get_monotonicity_info(); + else if (m_part_info->list_of_part_fields) + m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING; + info(HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_OPEN); + DBUG_RETURN(0); + +err_handler: + DEBUG_SYNC(ha_thd(), "partition_open_error"); + DBUG_ASSERT(m_tot_parts > 0); + for (uint i= m_tot_parts - 1; ; --i) + { + if (bitmap_is_set(&m_opened_partitions, i)) + m_file[i]->ha_close(); + if (!i) + break; + } +err_alloc: + free_partition_bitmaps(); + my_free(m_range_info); + m_range_info= 0; + + DBUG_RETURN(error); +} + + +/* + Disabled since it is not possible to prune yet. + without pruning, it need to rebind/unbind every partition in every + statement which uses a table from the table cache. Will also use + as many PSI_tables as there are partitions. +*/ + +#ifdef HAVE_M_PSI_PER_PARTITION +void ha_partition::unbind_psi() +{ + uint i; + + DBUG_ENTER("ha_partition::unbind_psi"); + handler::unbind_psi(); + for (i= 0; i < m_tot_parts; i++) + { + DBUG_ASSERT(m_file[i] != NULL); + m_file[i]->unbind_psi(); + } + DBUG_VOID_RETURN; +} + +int ha_partition::rebind() +{ + uint i; + + DBUG_ENTER("ha_partition::rebind"); + if (int error= handler::rebind()) + DBUG_RETURN(error); + for (i= 0; i < m_tot_parts; i++) + { + DBUG_ASSERT(m_file[i] != NULL); + if (int error= m_file[i]->rebind()) + { + while (i) + m_file[--i]->unbind_psi(); + handler::unbind_psi(); + DBUG_RETURN(error); + } + } + DBUG_RETURN(0); +} +#endif /* HAVE_M_PSI_PER_PARTITION */ + + +/* + Check if the table definition has changed for the part tables + We use the first partition for the check. +*/ + +int ha_partition::discover_check_version() +{ + return m_file[0]->discover_check_version(); +} + +static int set_part_handler_stats(handler *h, void *stats) +{ + h->handler_stats= (ha_handler_stats*) stats; + return 0; +} + + +/** + Clone the open and locked partitioning handler. + + @param mem_root MEM_ROOT to use. + + @return Pointer to the successfully created clone or NULL + + @details + This function creates a new ha_partition handler as a clone/copy. The + original (this) must already be opened and locked. The clone will use + the originals m_part_info. + It also allocates memory for ref + ref_dup. + In ha_partition::open() it will clone its original handlers partitions + which will allocate then on the correct MEM_ROOT and also open them. +*/ + +handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root) +{ + ha_partition *new_handler; + + DBUG_ENTER("ha_partition::clone"); + new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info, + this, mem_root); + if (!new_handler) + DBUG_RETURN(NULL); + + /* + We will not clone each partition's handler here, it will be done in + ha_partition::open() for clones. Also set_ha_share_ref is not needed + here, since 1) ha_share is copied in the constructor used above + 2) each partition's cloned handler will set it from its original. + */ + + /* + Allocate new_handler->ref here because otherwise ha_open will allocate it + on this->table->mem_root and we will not be able to reclaim that memory + when the clone handler object is destroyed. + */ + if (!(new_handler->ref= (uchar*) alloc_root(mem_root, + ALIGN_SIZE(m_ref_length)*2))) + goto err; + + if (new_handler->ha_open(table, name, + table->db_stat, + HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL)) + goto err; + + if (handler_stats) + new_handler->loop_partitions(set_part_handler_stats, handler_stats); + + DBUG_RETURN((handler*) new_handler); + +err: + delete new_handler; + DBUG_RETURN(NULL); +} + + +/* + Update all sub partitions to point to handler stats +*/ + +void ha_partition::handler_stats_updated() +{ + loop_partitions(set_part_handler_stats, handler_stats); +} + + +/* + Close handler object + + SYNOPSIS + close() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Called from sql_base.cc, sql_select.cc, and table.cc. + In sql_select.cc it is only used to close up temporary tables or during + the process where a temporary table is converted over to being a + myisam table. + For sql_base.cc look at close_data_tables(). +*/ + +int ha_partition::close(void) +{ + bool first= TRUE; + handler **file; + uint i; + st_partition_ft_info *tmp_ft_info; + DBUG_ENTER("ha_partition::close"); + DBUG_ASSERT(table->s == table_share); + DBUG_ASSERT(m_part_info); + + destroy_record_priority_queue(); + + for (; ft_first ; ft_first= tmp_ft_info) + { + tmp_ft_info= ft_first->next; + my_free(ft_first); + } + + /* Free active mrr_ranges */ + for (i= 0; i < m_tot_parts; i++) + { + if (m_part_mrr_range_first[i]) + { + PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_first= + m_part_mrr_range_first[i]; + do + { + PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_current; + tmp_mrr_range_current= tmp_mrr_range_first; + tmp_mrr_range_first= tmp_mrr_range_first->next; + my_free(tmp_mrr_range_current); + } while (tmp_mrr_range_first); + } + } + if (m_mrr_range_first) + { + do + { + m_mrr_range_current= m_mrr_range_first; + m_mrr_range_first= m_mrr_range_first->next; + if (m_mrr_range_current->key[0]) + my_free(m_mrr_range_current->key[0]); + if (m_mrr_range_current->key[1]) + my_free(m_mrr_range_current->key[1]); + my_free(m_mrr_range_current); + } while (m_mrr_range_first); + } + my_free(m_range_info); + m_range_info= NULL; // Safety + + if (m_mrr_full_buffer) + { + my_free(m_mrr_full_buffer); + m_mrr_full_buffer= NULL; + m_mrr_full_buffer_size= 0; + } + file= m_file; + +repeat: + do + { + if (!first || bitmap_is_set(&m_opened_partitions, (uint)(file - m_file))) + (*file)->ha_close(); + } while (*(++file)); + + free_partition_bitmaps(); + + if (first && m_added_file && m_added_file[0]) + { + file= m_added_file; + first= FALSE; + goto repeat; + } + + m_handler_status= handler_closed; + DBUG_RETURN(0); +} + +/**************************************************************************** + MODULE start/end statement +****************************************************************************/ +/* + A number of methods to define various constants for the handler. In + the case of the partition handler we need to use some max and min + of the underlying handlers in most cases. +*/ + +/* + Set external locks on table + + SYNOPSIS + external_lock() + thd Thread object + lock_type Type of external lock + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + First you should go read the section "locking functions for mysql" in + lock.cc to understand this. + This create a lock on the table. If you are implementing a storage engine + that can handle transactions look at ha_berkeley.cc to see how you will + want to go about doing this. Otherwise you should consider calling + flock() here. + Originally this method was used to set locks on file level to enable + several MySQL Servers to work on the same data. For transactional + engines it has been "abused" to also mean start and end of statements + to enable proper rollback of statements and transactions. When LOCK + TABLES has been issued the start_stmt method takes over the role of + indicating start of statement but in this case there is no end of + statement indicator(?). + + Called from lock.cc by lock_external() and unlock_external(). Also called + from sql_table.cc by copy_data_between_tables(). +*/ + +int ha_partition::external_lock(THD *thd, int lock_type) +{ + int error; + uint i, first_used_partition; + MY_BITMAP *used_partitions; + DBUG_ENTER("ha_partition::external_lock"); + + DBUG_ASSERT(!auto_increment_lock); + DBUG_ASSERT(!auto_increment_safe_stmt_log_lock); + + if (lock_type == F_UNLCK) + used_partitions= &m_locked_partitions; + else + used_partitions= &(m_part_info->lock_partitions); + + first_used_partition= bitmap_get_first_set(used_partitions); + + for (i= first_used_partition; + i < m_tot_parts; + i= bitmap_get_next_set(used_partitions, i)) + { + DBUG_PRINT("info", ("external_lock(thd, %d) part %u", lock_type, i)); + if (unlikely((error= m_file[i]->ha_external_lock(thd, lock_type)))) + { + if (lock_type != F_UNLCK) + goto err_handler; + } + DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type)); + if (lock_type != F_UNLCK) + bitmap_set_bit(&m_locked_partitions, i); + } + if (lock_type == F_UNLCK) + { + bitmap_clear_all(used_partitions); + if (m_lock_type == F_WRLCK && m_part_info->vers_require_hist_part(thd)) + m_part_info->vers_check_limit(thd); + } + else + { + /* Add touched partitions to be included in reset(). */ + bitmap_union(&m_partitions_to_reset, used_partitions); + } + + if (m_added_file && m_added_file[0]) + { + handler **file= m_added_file; + DBUG_ASSERT(lock_type == F_UNLCK); + do + { + (void) (*file)->ha_external_lock(thd, lock_type); + } while (*(++file)); + } + if (lock_type == F_WRLCK && m_part_info->part_expr) + m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0); + + DBUG_RETURN(0); + +err_handler: + uint j; + for (j= first_used_partition; + j < i; + j= bitmap_get_next_set(&m_locked_partitions, j)) + { + (void) m_file[j]->ha_external_unlock(thd); + } + bitmap_clear_all(&m_locked_partitions); + DBUG_RETURN(error); +} + + +/* + Get the lock(s) for the table and perform conversion of locks if needed + + SYNOPSIS + store_lock() + thd Thread object + to Lock object array + lock_type Table lock type + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + The idea with handler::store_lock() is the following: + + The statement decided which locks we should need for the table + for updates/deletes/inserts we get WRITE locks, for SELECT... we get + read locks. + + Before adding the lock into the table lock handler (see thr_lock.c) + mysqld calls store lock with the requested locks. Store lock can now + modify a write lock to a read lock (or some other lock), ignore the + lock (if we don't want to use MySQL table locks at all) or add locks + for many tables (like we do when we are using a MERGE handler). + + Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE + (which signals that we are doing WRITES, but we are still allowing other + reader's and writer's. + + When releasing locks, store_lock() is also called. In this case one + usually doesn't have to do anything. + + store_lock is called when holding a global mutex to ensure that only + one thread at a time changes the locking information of tables. + + In some exceptional cases MySQL may send a request for a TL_IGNORE; + This means that we are requesting the same lock as last time and this + should also be ignored. (This may happen when someone does a flush + table when we have opened a part of the tables, in which case mysqld + closes and reopens the tables and tries to get the same locks as last + time). In the future we will probably try to remove this. + + Called from lock.cc by get_lock_data(). +*/ + +THR_LOCK_DATA **ha_partition::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + uint i; + DBUG_ENTER("ha_partition::store_lock"); + DBUG_ASSERT(thd == current_thd); + + /* + This can be called from get_lock_data() in mysql_lock_abort_for_thread(), + even when thd != table->in_use. In that case don't use partition pruning, + but use all partitions instead to avoid using another threads structures. + */ + if (thd != table->in_use) + { + for (i= 0; i < m_tot_parts; i++) + to= m_file[i]->store_lock(thd, to, lock_type); + } + else + { + MY_BITMAP *used_partitions= lock_type == TL_UNLOCK || + lock_type == TL_IGNORE ? + &m_locked_partitions : + &m_part_info->lock_partitions; + + for (i= bitmap_get_first_set(used_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(used_partitions, i)) + { + DBUG_PRINT("info", ("store lock %u iteration", i)); + to= m_file[i]->store_lock(thd, to, lock_type); + } + } + DBUG_RETURN(to); +} + +/* + Start a statement when table is locked + + SYNOPSIS + start_stmt() + thd Thread object + lock_type Type of external lock + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This method is called instead of external lock when the table is locked + before the statement is executed. +*/ + +int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) +{ + int error= 0; + uint i; + /* Assert that read_partitions is included in lock_partitions */ + DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions, + &m_part_info->lock_partitions)); + /* + m_locked_partitions is set in previous external_lock/LOCK TABLES. + Current statement's lock requests must not include any partitions + not previously locked. + */ + DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions, + &m_locked_partitions)); + DBUG_ENTER("ha_partition::start_stmt"); + + for (i= bitmap_get_first_set(&(m_part_info->lock_partitions)); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->lock_partitions, i)) + { + if (unlikely((error= m_file[i]->start_stmt(thd, lock_type)))) + DBUG_RETURN(error); + /* Add partition to be called in reset(). */ + bitmap_set_bit(&m_partitions_to_reset, i); + } + if (lock_type >= TL_FIRST_WRITE) + { + if (m_part_info->part_expr) + m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0); + } + DBUG_RETURN(error); +} + + +/** + Get number of lock objects returned in store_lock + + @returns Number of locks returned in call to store_lock + + @desc + Returns the maxinum possible number of store locks needed in call to + store lock. +*/ + +uint ha_partition::lock_count() const +{ + DBUG_ENTER("ha_partition::lock_count"); + DBUG_RETURN(m_num_locks); +} + + +/* + Unlock last accessed row + + SYNOPSIS + unlock_row() + + RETURN VALUE + NONE + + DESCRIPTION + Record currently processed was not in the result set of the statement + and is thus unlocked. Used for UPDATE and DELETE queries. +*/ + +void ha_partition::unlock_row() +{ + DBUG_ENTER("ha_partition::unlock_row"); + m_file[m_last_part]->unlock_row(); + DBUG_VOID_RETURN; +} + +/** + Check if semi consistent read was used + + SYNOPSIS + was_semi_consistent_read() + + RETURN VALUE + TRUE Previous read was a semi consistent read + FALSE Previous read was not a semi consistent read + + DESCRIPTION + See handler.h: + In an UPDATE or DELETE, if the row under the cursor was locked by another + transaction, and the engine used an optimistic read of the last + committed row value under the cursor, then the engine returns 1 from this + function. MySQL must NOT try to update this optimistic value. If the + optimistic value does not match the WHERE condition, MySQL can decide to + skip over this row. Currently only works for InnoDB. This can be used to + avoid unnecessary lock waits. + + If this method returns nonzero, it will also signal the storage + engine that the next read will be a locking re-read of the row. +*/ +bool ha_partition::was_semi_consistent_read() +{ + DBUG_ENTER("ha_partition::was_semi_consistent_read"); + DBUG_ASSERT(m_last_part < m_tot_parts); + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part)); + DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read()); +} + +/** + Use semi consistent read if possible + + SYNOPSIS + try_semi_consistent_read() + yes Turn on semi consistent read + + RETURN VALUE + NONE + + DESCRIPTION + See handler.h: + Tell the engine whether it should avoid unnecessary lock waits. + If yes, in an UPDATE or DELETE, if the row under the cursor was locked + by another transaction, the engine may try an optimistic read of + the last committed row value under the cursor. + Note: prune_partitions are already called before this call, so using + pruning is OK. +*/ +void ha_partition::try_semi_consistent_read(bool yes) +{ + uint i; + DBUG_ENTER("ha_partition::try_semi_consistent_read"); + + i= bitmap_get_first_set(&(m_part_info->read_partitions)); + DBUG_ASSERT(i != MY_BIT_NONE); + for (; + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + m_file[i]->try_semi_consistent_read(yes); + } + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE change record +****************************************************************************/ + +/* + Insert a row to the table + + SYNOPSIS + write_row() + buf The row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + write_row() inserts a row. buf() is a byte array of data, normally + record[0]. + + You can use the field information to extract the data from the native byte + array type. + + Example of this would be: + for (Field **field=table->field ; *field ; field++) + { + ... + } + + See ha_tina.cc for a variant of extracting all of the data as strings. + ha_berkeley.cc has a variant of how to store it intact by "packing" it + for ha_berkeley's own native storage type. + + Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, + sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. + + ADDITIONAL INFO: + + We have to set auto_increment fields, because those may be used in + determining which partition the row should be written to. +*/ + +int ha_partition::write_row(const uchar * buf) +{ + uint32 part_id; + int error; + longlong func_value; + bool have_auto_increment= table->next_number_field && buf == table->record[0]; + MY_BITMAP *old_map; + THD *thd= ha_thd(); + sql_mode_t org_sql_mode= thd->variables.sql_mode; + bool saved_auto_inc_field_not_null= table->auto_increment_field_not_null; + DBUG_ENTER("ha_partition::write_row"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + /* + If we have an auto_increment column and we are writing a changed row + or a new row, then update the auto_increment value in the record. + */ + if (have_auto_increment) + { + if (!table_share->next_number_keypart) + if (unlikely(error= update_next_auto_inc_val())) + goto exit; + + /* + If we have failed to set the auto-increment value for this row, + it is highly likely that we will not be able to insert it into + the correct partition. We must check and fail if necessary. + */ + if (unlikely(error= update_auto_increment())) + goto exit; + + /* + Don't allow generation of auto_increment value the partitions handler. + If a partitions handler would change the value, then it might not + match the partition any longer. + This can occur if 'SET INSERT_ID = 0; INSERT (NULL)', + So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode. + The partitions handler::next_insert_id must always be 0. Otherwise + we need to forward release_auto_increment, or reset it for all + partitions. + */ + if (table->next_number_field->val_int() == 0) + { + table->auto_increment_field_not_null= TRUE; + thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO; + } + } + old_map= dbug_tmp_use_all_columns(table, &table->read_set); + error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value); + dbug_tmp_restore_column_map(&table->read_set, old_map); + if (unlikely(error)) + { + m_part_info->err_value= func_value; + goto exit; + } + if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id)) + { + DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)", + part_id, (long) func_value)); + error= HA_ERR_NOT_IN_LOCK_PARTITIONS; + goto exit; + } + m_last_part= part_id; + DBUG_PRINT("info", ("Insert in partition %u", part_id)); + + start_part_bulk_insert(thd, part_id); + + DBUG_ASSERT(!m_file[part_id]->row_logging); + error= m_file[part_id]->ha_write_row(buf); + if (!error && have_auto_increment && !table->s->next_number_keypart) + set_auto_increment_if_higher(table->next_number_field); + +exit: + table->auto_increment_field_not_null= saved_auto_inc_field_not_null; + thd->variables.sql_mode= org_sql_mode; + DBUG_RETURN(error); +} + + +/* + Update an existing row + + SYNOPSIS + update_row() + old_data Old record in MySQL Row Format + new_data New record in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Yes, update_row() does what you expect, it updates a row. old_data will + have the previous row record in it, while new_data will have the newest + data in it. + Keep in mind that the server can do updates based on ordering if an + ORDER BY clause was used. Consecutive ordering is not guaranteed. + + Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. + new_data is always record[0] + old_data is always record[1] +*/ + +int ha_partition::update_row(const uchar *old_data, const uchar *new_data) +{ + THD *thd= ha_thd(); + uint32 new_part_id, old_part_id= m_last_part; + int error= 0; + DBUG_ENTER("ha_partition::update_row"); + m_err_rec= NULL; + + // Need to read partition-related columns, to locate the row's partition: + DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set, + table->read_set)); +#ifndef DBUG_OFF + /* + The protocol for updating a row is: + 1) position the handler (cursor) on the row to be updated, + either through the last read row (rnd or index) or by rnd_pos. + 2) call update_row with both old and new full records as arguments. + + This means that m_last_part should already be set to actual partition + where the row was read from. And if that is not the same as the + calculated part_id we found a misplaced row, we return an error to + notify the user that something is broken in the row distribution + between partitions! Since we don't check all rows on read, we return an + error instead of correcting m_last_part, to make the user aware of the + problem! + + Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol, + so this is not supported for this engine. + */ + error= get_part_for_buf(old_data, m_rec0, m_part_info, &old_part_id); + DBUG_ASSERT(!error); + DBUG_ASSERT(old_part_id == m_last_part); + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id)); +#endif + + if (unlikely((error= get_part_for_buf(new_data, m_rec0, m_part_info, + &new_part_id)))) + goto exit; + if (unlikely(!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id))) + { + error= HA_ERR_NOT_IN_LOCK_PARTITIONS; + goto exit; + } + + + m_last_part= new_part_id; + start_part_bulk_insert(thd, new_part_id); + DBUG_ASSERT(!m_file[new_part_id]->row_logging); + if (new_part_id == old_part_id) + { + DBUG_PRINT("info", ("Update in partition %u", (uint) new_part_id)); + error= m_file[new_part_id]->ha_update_row(old_data, new_data); + goto exit; + } + else + { + Field *saved_next_number_field= table->next_number_field; + /* + Don't allow generation of auto_increment value for update. + table->next_number_field is never set on UPDATE. + But is set for INSERT ... ON DUPLICATE KEY UPDATE, + and since update_row() does not generate or update an auto_inc value, + we cannot have next_number_field set when moving a row + to another partition with write_row(), since that could + generate/update the auto_inc value. + This gives the same behavior for partitioned vs non partitioned tables. + */ + table->next_number_field= NULL; + DBUG_PRINT("info", ("Update from partition %u to partition %u", + (uint) old_part_id, (uint) new_part_id)); + error= m_file[new_part_id]->ha_write_row((uchar*) new_data); + table->next_number_field= saved_next_number_field; + if (unlikely(error)) + goto exit; + + error= m_file[old_part_id]->ha_delete_row(old_data); + if (unlikely(error)) + goto exit; + } + +exit: + /* + if updating an auto_increment column, update + part_share->next_auto_inc_val if needed. + (not to be used if auto_increment on secondary field in a multi-column + index) + mysql_update does not set table->next_number_field, so we use + table->found_next_number_field instead. + Also checking that the field is marked in the write set. + */ + if (table->found_next_number_field && + new_data == table->record[0] && + !table->s->next_number_keypart && + bitmap_is_set(table->write_set, + table->found_next_number_field->field_index)) + { + update_next_auto_inc_val(); + if (part_share->auto_inc_initialized) + set_auto_increment_if_higher(table->found_next_number_field); + } + DBUG_RETURN(error); +} + + +/* + Remove an existing row + + SYNOPSIS + delete_row + buf Deleted row in MySQL Row Format + + RETURN VALUE + >0 Error Code + 0 Success + + DESCRIPTION + This will delete a row. buf will contain a copy of the row to be deleted. + The server will call this right after the current row has been read + (from either a previous rnd_xxx() or index_xxx() call). + If you keep a pointer to the last row or can access a primary key it will + make doing the deletion quite a bit easier. + Keep in mind that the server does no guarantee consecutive deletions. + ORDER BY clauses can be used. + + Called in sql_acl.cc and sql_udf.cc to manage internal table information. + Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select + it is used for removing duplicates while in insert it is used for REPLACE + calls. + + buf is either record[0] or record[1] +*/ + +int ha_partition::delete_row(const uchar *buf) +{ + int error; + DBUG_ENTER("ha_partition::delete_row"); + m_err_rec= NULL; + + DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set, + table->read_set)); +#ifndef DBUG_OFF + THD* thd = ha_thd(); + /* + The protocol for deleting a row is: + 1) position the handler (cursor) on the row to be deleted, + either through the last read row (rnd or index) or by rnd_pos. + 2) call delete_row with the full record as argument. + + This means that m_last_part should already be set to actual partition + where the row was read from. And if that is not the same as the + calculated part_id we found a misplaced row, we return an error to + notify the user that something is broken in the row distribution + between partitions! Since we don't check all rows on read, we return an + error instead of forwarding the delete to the correct (m_last_part) + partition! + + Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol, + so this is not supported for this engine. + + For partitions by system_time, get_part_for_buf() is always either current + or last historical partition, but DELETE HISTORY can delete from any + historical partition. So, skip the check in this case. + */ + if (!thd->lex->vers_conditions.delete_history) + { + uint32 part_id; + error= get_part_for_buf(buf, m_rec0, m_part_info, &part_id); + DBUG_ASSERT(!error); + DBUG_ASSERT(part_id == m_last_part); + } + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part)); + DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), m_last_part)); +#endif + + if (!bitmap_is_set(&(m_part_info->lock_partitions), m_last_part)) + DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS); + + DBUG_ASSERT(!m_file[m_last_part]->row_logging); + error= m_file[m_last_part]->ha_delete_row(buf); + DBUG_RETURN(error); +} + + +/* + Delete all rows in a table + + SYNOPSIS + delete_all_rows() + + RETURN VALUE + >0 Error Code + 0 Success + + DESCRIPTION + Used to delete all rows in a table. Both for cases of truncate and + for cases where the optimizer realizes that all rows will be + removed as a result of a SQL statement. + + Called from item_sum.cc by Item_func_group_concat::clear(), + Item_sum_count::clear(), and Item_func_group_concat::clear(). + Called from sql_delete.cc by mysql_delete(). + Called from sql_select.cc by JOIN::reset(). + Called from sql_union.cc by st_select_lex_unit::exec(). +*/ + +int ha_partition::delete_all_rows() +{ + int error; + uint i; + DBUG_ENTER("ha_partition::delete_all_rows"); + + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + /* Can be pruned, like DELETE FROM t PARTITION (pX) */ + if (unlikely((error= m_file[i]->ha_delete_all_rows()))) + DBUG_RETURN(error); + } + DBUG_RETURN(0); +} + + +/** + Manually truncate the table. + + @retval 0 Success. + @retval > 0 Error code. +*/ + +int ha_partition::truncate() +{ + int error; + handler **file; + DBUG_ENTER("ha_partition::truncate"); + + /* + TRUNCATE also means resetting auto_increment. Hence, reset + it so that it will be initialized again at the next use. + */ + lock_auto_increment(); + part_share->next_auto_inc_val= 0; + part_share->auto_inc_initialized= false; + unlock_auto_increment(); + + file= m_file; + do + { + if (unlikely((error= (*file)->ha_truncate()))) + DBUG_RETURN(error); + } while (*(++file)); + DBUG_RETURN(0); +} + + +/** + Truncate a set of specific partitions. + + @remark Auto increment value will be truncated in that partition as well! + + ALTER TABLE t TRUNCATE PARTITION ... +*/ + +int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt) +{ + int error= 0; + List_iterator part_it(m_part_info->partitions); + uint num_parts= m_part_info->num_parts; + uint num_subparts= m_part_info->num_subparts; + uint i= 0; + DBUG_ENTER("ha_partition::truncate_partition"); + + /* Only binlog when it starts any call to the partitions handlers */ + *binlog_stmt= false; + + if (set_part_state(alter_info, m_part_info, PART_ADMIN)) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + + /* + TRUNCATE also means resetting auto_increment. Hence, reset + it so that it will be initialized again at the next use. + */ + lock_auto_increment(); + part_share->next_auto_inc_val= 0; + part_share->auto_inc_initialized= FALSE; + unlock_auto_increment(); + + *binlog_stmt= true; + + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_ADMIN) + { + if (m_is_sub_partitioned) + { + List_iterator + subpart_it(part_elem->subpartitions); + partition_element *sub_elem; + uint j= 0, part; + do + { + sub_elem= subpart_it++; + part= i * num_subparts + j; + DBUG_PRINT("info", ("truncate subpartition %u (%s)", + part, sub_elem->partition_name)); + if (unlikely((error= m_file[part]->ha_truncate()))) + break; + sub_elem->part_state= PART_NORMAL; + } while (++j < num_subparts); + } + else + { + DBUG_PRINT("info", ("truncate partition %u (%s)", i, + part_elem->partition_name)); + error= m_file[i]->ha_truncate(); + } + part_elem->part_state= PART_NORMAL; + } + } while (!error && (++i < num_parts)); + DBUG_RETURN(error); +} + + +/* + Start a large batch of insert rows + + SYNOPSIS + start_bulk_insert() + rows Number of rows to insert + flags Flags to control index creation + + RETURN VALUE + NONE + + DESCRIPTION + rows == 0 means we will probably insert many rows +*/ +void ha_partition::start_bulk_insert(ha_rows rows, uint flags) +{ + DBUG_ENTER("ha_partition::start_bulk_insert"); + + m_bulk_inserted_rows= 0; + bitmap_clear_all(&m_bulk_insert_started); + /* use the last bit for marking if bulk_insert_started was called */ + bitmap_set_bit(&m_bulk_insert_started, m_tot_parts); + DBUG_VOID_RETURN; +} + + +/* + Check if start_bulk_insert has been called for this partition, + if not, call it and mark it called +*/ +void ha_partition::start_part_bulk_insert(THD *thd, uint part_id) +{ + long old_buffer_size; + if (!bitmap_is_set(&m_bulk_insert_started, part_id) && + bitmap_is_set(&m_bulk_insert_started, m_tot_parts)) + { + DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id)); + old_buffer_size= thd->variables.read_buff_size; + /* Update read_buffer_size for this partition */ + thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size); + m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows()); + bitmap_set_bit(&m_bulk_insert_started, part_id); + thd->variables.read_buff_size= old_buffer_size; + } + m_bulk_inserted_rows++; +} + +/* + Estimate the read buffer size for each partition. + SYNOPSIS + ha_partition::estimate_read_buffer_size() + original_size read buffer size originally set for the server + RETURN VALUE + estimated buffer size. + DESCRIPTION + If the estimated number of rows to insert is less than 10 (but not 0) + the new buffer size is same as original buffer size. + In case of first partition of when partition function is monotonic + new buffer size is same as the original buffer size. + For rest of the partition total buffer of 10*original_size is divided + equally if number of partition is more than 10 other wise each partition + will be allowed to use original buffer size. +*/ +long ha_partition::estimate_read_buffer_size(long original_size) +{ + /* + If number of rows to insert is less than 10, but not 0, + return original buffer size. + */ + if (estimation_rows_to_insert && (estimation_rows_to_insert < 10)) + return (original_size); + /* + If first insert/partition and monotonic partition function, + allow using buffer size originally set. + */ + if (!m_bulk_inserted_rows && + m_part_func_monotonicity_info != NON_MONOTONIC && + m_tot_parts > 1) + return original_size; + /* + Allow total buffer used in all partition to go up to 10*read_buffer_size. + 11*read_buffer_size in case of monotonic partition function. + */ + + if (m_tot_parts < 10) + return original_size; + return (original_size * 10 / m_tot_parts); +} + +/* + Try to predict the number of inserts into this partition. + + If less than 10 rows (including 0 which means Unknown) + just give that as a guess + If monotonic partitioning function was used + guess that 50 % of the inserts goes to the first partition + For all other cases, guess on equal distribution between the partitions +*/ +ha_rows ha_partition::guess_bulk_insert_rows() +{ + DBUG_ENTER("guess_bulk_insert_rows"); + + if (estimation_rows_to_insert < 10) + DBUG_RETURN(estimation_rows_to_insert); + + /* If first insert/partition and monotonic partition function, guess 50%. */ + if (!m_bulk_inserted_rows && + m_part_func_monotonicity_info != NON_MONOTONIC && + m_tot_parts > 1) + DBUG_RETURN(estimation_rows_to_insert / 2); + + /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */ + if (m_bulk_inserted_rows < estimation_rows_to_insert) + DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows) + / m_tot_parts) + 1); + /* The estimation was wrong, must say 'Unknown' */ + DBUG_RETURN(0); +} + + +void ha_partition::sum_copy_info(handler *file) +{ + copy_info.records+= file->copy_info.records; + copy_info.touched+= file->copy_info.touched; + copy_info.copied+= file->copy_info.copied; + copy_info.deleted+= file->copy_info.deleted; + copy_info.updated+= file->copy_info.updated; +} + + +void ha_partition::sum_copy_infos() +{ + handler **file_array; + bzero(©_info, sizeof(copy_info)); + file_array= m_file; + do + { + if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file))) + sum_copy_info(*file_array); + } while (*(++file_array)); +} + +void ha_partition::reset_copy_info() +{ + handler **file_array; + bzero(©_info, sizeof(copy_info)); + file_array= m_file; + do + { + if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file))) + bzero(&(*file_array)->copy_info, sizeof(copy_info)); + } while (*(++file_array)); +} + + + +/* + Finish a large batch of insert rows + + SYNOPSIS + end_bulk_insert() + + RETURN VALUE + >0 Error code + 0 Success + + Note: end_bulk_insert can be called without start_bulk_insert + being called, see bug#44108. + +*/ + +int ha_partition::end_bulk_insert() +{ + int error= 0; + uint i; + DBUG_ENTER("ha_partition::end_bulk_insert"); + + if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts)) + DBUG_RETURN(error); + + for (i= bitmap_get_first_set(&m_bulk_insert_started); + i < m_tot_parts; + i= bitmap_get_next_set(&m_bulk_insert_started, i)) + { + int tmp; + if ((tmp= m_file[i]->ha_end_bulk_insert())) + error= tmp; + sum_copy_info(m_file[i]); + } + bitmap_clear_all(&m_bulk_insert_started); + DBUG_RETURN(error); +} + + +/**************************************************************************** + MODULE full table scan +****************************************************************************/ +/* + Initialize engine for random reads + + SYNOPSIS + ha_partition::rnd_init() + scan 0 Initialize for random reads through rnd_pos() + 1 Initialize for random scan through rnd_next() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + rnd_init() is called when the server wants the storage engine to do a + table scan or when the server wants to access data through rnd_pos. + + When scan is used we will scan one handler partition at a time. + When preparing for rnd_pos we will init all handler partitions. + No extra cache handling is needed when scanning is not performed. + + Before initialising we will call rnd_end to ensure that we clean up from + any previous incarnation of a table scan. + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_init(bool scan) +{ + int error; + uint i= 0; + uint32 part_id; + DBUG_ENTER("ha_partition::rnd_init"); + + /* + For operations that may need to change data, we may need to extend + read_set. + */ + if (get_lock_type() == F_WRLCK) + { + /* + If write_set contains any of the fields used in partition and + subpartition expression, we need to set all bits in read_set because + the row may need to be inserted in a different [sub]partition. In + other words update_row() can be converted into write_row(), which + requires a complete record. + */ + if (bitmap_is_overlapping(&m_part_info->full_part_field_set, + table->write_set)) + { + DBUG_PRINT("info", ("partition set full bitmap")); + bitmap_set_all(table->read_set); + } + else + { + /* + Some handlers only read fields as specified by the bitmap for the + read set. For partitioned handlers we always require that the + fields of the partition functions are read such that we can + calculate the partition id to place updated and deleted records. + */ + DBUG_PRINT("info", ("partition set part_field bitmap")); + bitmap_union(table->read_set, &m_part_info->full_part_field_set); + } + } + + /* Now we see what the index of our first important partition is */ + DBUG_PRINT("info", ("m_part_info->read_partitions: %p", + m_part_info->read_partitions.bitmap)); + part_id= bitmap_get_first_set(&(m_part_info->read_partitions)); + DBUG_PRINT("info", ("m_part_spec.start_part: %u", (uint) part_id)); + + if (part_id == MY_BIT_NONE) + { + error= 0; + goto err1; + } + + /* + We have a partition and we are scanning with rnd_next + so we bump our cache + */ + DBUG_PRINT("info", ("rnd_init on partition: %u", (uint) part_id)); + if (scan) + { + /* + rnd_end() is needed for partitioning to reset internal data if scan + is already in use + */ + rnd_end(); + late_extra_cache(part_id); + + m_index_scan_type= partition_no_index_scan; + } + + for (i= part_id; + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + if (unlikely((error= m_file[i]->ha_rnd_init(scan)))) + goto err; + } + + m_scan_value= scan; + m_part_spec.start_part= part_id; + m_part_spec.end_part= m_tot_parts - 1; + m_rnd_init_and_first= TRUE; + DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value)); + DBUG_RETURN(0); + +err: + if (scan) + late_extra_no_cache(part_id); + + /* Call rnd_end for all previously inited partitions. */ + for (; + part_id < i; + part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id)) + { + m_file[part_id]->ha_rnd_end(); + } +err1: + m_scan_value= 2; + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(error); +} + + +/* + End of a table scan + + SYNOPSIS + rnd_end() + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::rnd_end() +{ + DBUG_ENTER("ha_partition::rnd_end"); + switch (m_scan_value) { + case 2: // Error + break; + case 1: // Table scan + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + late_extra_no_cache(m_part_spec.start_part); + /* fall through */ + case 0: + uint i; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + m_file[i]->ha_rnd_end(); + } + break; + } + m_scan_value= 2; + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(0); +} + + +/* + read next row during full table scan (scan in random row order) + + SYNOPSIS + rnd_next() + buf buffer that should be filled with data + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This is called for each row of the table scan. When you run out of records + you should return HA_ERR_END_OF_FILE. + The Field structure for the table is the key to getting data into buf + in a manner that will allow the server to understand it. + + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_next(uchar *buf) +{ + handler *file; + int result= HA_ERR_END_OF_FILE, error; + uint part_id= m_part_spec.start_part; + DBUG_ENTER("ha_partition::rnd_next"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + /* upper level will increment this once again at end of call */ + decrement_statistics(&SSV::ha_read_rnd_next_count); + + if (part_id == NO_CURRENT_PART_ID) + { + /* + The original set of partitions to scan was empty and thus we report + the result here. + */ + goto end; + } + + DBUG_ASSERT(m_scan_value == 1); + + if (m_rnd_init_and_first) + { + m_rnd_init_and_first= FALSE; + error= handle_pre_scan(FALSE, check_parallel_search()); + if (m_pre_calling || error) + DBUG_RETURN(error); + } + + file= m_file[part_id]; + + while (TRUE) + { + result= file->ha_rnd_next(buf); + if (!result) + { + m_last_part= part_id; + DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part)); + m_part_spec.start_part= part_id; + table->status= 0; + DBUG_RETURN(0); + } + + /* + if we get here, then the current partition ha_rnd_next returned failure + */ + if (result != HA_ERR_END_OF_FILE) + goto end_dont_reset_start_part; // Return error + + /* End current partition */ + late_extra_no_cache(part_id); + /* Shift to next partition */ + part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id); + if (part_id >= m_tot_parts) + { + result= HA_ERR_END_OF_FILE; + break; + } + m_last_part= part_id; + DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part)); + m_part_spec.start_part= part_id; + file= m_file[part_id]; + late_extra_cache(part_id); + } + +end: + DBUG_PRINT("exit", ("reset start_part")); + m_part_spec.start_part= NO_CURRENT_PART_ID; +end_dont_reset_start_part: + DBUG_RETURN(result); +} + + +/* + Save position of current row + + SYNOPSIS + position() + record Current record in MySQL Row Format + + RETURN VALUE + NONE + + DESCRIPTION + position() is called after each call to rnd_next() if the data needs + to be ordered. You can do something like the following to store + the position: + ha_store_ptr(ref, ref_length, current_position); + + The server uses ref to store data. ref_length in the above case is + the size needed to store current_position. ref is just a byte array + that the server will maintain. If you are using offsets to mark rows, then + current_position should be the offset. If it is a primary key like in + BDB, then it needs to be a primary key. + + Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. +*/ + +void ha_partition::position(const uchar *record) +{ + handler *file= m_file[m_last_part]; + size_t pad_length; + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part)); + DBUG_ENTER("ha_partition::position"); + + file->position(record); + int2store(ref, m_last_part); + memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, file->ref_length); + pad_length= m_ref_length - PARTITION_BYTES_IN_POS - file->ref_length; + if (pad_length) + memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length); + + DBUG_VOID_RETURN; +} + + +/* + Read row using position + + SYNOPSIS + rnd_pos() + out:buf Row read in MySQL Row Format + position Position of read row + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This is like rnd_next, but you are given a position to use + to determine the row. The position will be of the type that you stored in + ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key + or position you saved when position() was called. + Called from filesort.cc records.cc sql_insert.cc sql_select.cc + sql_update.cc. +*/ + +int ha_partition::rnd_pos(uchar * buf, uchar *pos) +{ + uint part_id; + handler *file; + DBUG_ENTER("ha_partition::rnd_pos"); + decrement_statistics(&SSV::ha_read_rnd_count); + + part_id= uint2korr((const uchar *) pos); + DBUG_ASSERT(part_id < m_tot_parts); + file= m_file[part_id]; + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); + m_last_part= part_id; + DBUG_RETURN(file->ha_rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS))); +} + + +/* + Read row using position using given record to find + + SYNOPSIS + rnd_pos_by_record() + record Current record in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + this works as position()+rnd_pos() functions, but does some extra work, + calculating m_last_part - the partition to where the 'record' + should go. + + called from replication (log_event.cc) +*/ + +int ha_partition::rnd_pos_by_record(uchar *record) +{ + DBUG_ENTER("ha_partition::rnd_pos_by_record"); + + if (unlikely(get_part_for_buf(record, m_rec0, m_part_info, &m_last_part))) + DBUG_RETURN(1); + + int err= m_file[m_last_part]->rnd_pos_by_record(record); + DBUG_RETURN(err); +} + + +/**************************************************************************** + MODULE index scan +****************************************************************************/ +/* + Positions an index cursor to the index specified in the handle. Fetches the + row if available. If the key value is null, begin at the first key of the + index. + + There are loads of optimisations possible here for the partition handler. + The same optimisations can also be checked for full table scan although + only through conditions and not from index ranges. + Phase one optimisations: + Check if the fields of the partition function are bound. If so only use + the single partition it becomes bound to. + Phase two optimisations: + If it can be deducted through range or list partitioning that only a + subset of the partitions are used, then only use those partitions. +*/ + + +/** + Setup the ordered record buffer and the priority queue. +*/ + +bool ha_partition::init_record_priority_queue() +{ + DBUG_ENTER("ha_partition::init_record_priority_queue"); + DBUG_ASSERT(!m_ordered_rec_buffer); + /* + Initialize the ordered record buffer. + */ + size_t alloc_len; + uint used_parts= bitmap_bits_set(&m_part_info->read_partitions); + + if (used_parts == 0) /* Do nothing since no records expected. */ + DBUG_RETURN(false); + + /* Allocate record buffer for each used partition. */ + m_priority_queue_rec_len= m_rec_length + ORDERED_REC_OFFSET; + if (!m_using_extended_keys) + m_priority_queue_rec_len+= get_open_file_sample()->ref_length; + alloc_len= used_parts * m_priority_queue_rec_len; + /* Allocate a key for temporary use when setting up the scan. */ + alloc_len+= table_share->max_key_length; + Ordered_blob_storage **blob_storage; + Ordered_blob_storage *objs; + const size_t n_all= used_parts * table->s->blob_fields; + + if (!my_multi_malloc(key_memory_partition_sort_buffer, MYF(MY_WME), + &m_ordered_rec_buffer, alloc_len, + &blob_storage, n_all * sizeof *blob_storage, + &objs, n_all * sizeof *objs, NULL)) + DBUG_RETURN(true); + + /* + We set-up one record per partition and each record has 2 bytes in + front where the partition id is written. This is used by ordered + index_read. + We also set-up a reference to the first record for temporary use in + setting up the scan. + */ + char *ptr= (char*) m_ordered_rec_buffer; + uint i; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + DBUG_PRINT("info", ("init rec-buf for part %u", i)); + if (table->s->blob_fields) + { + for (uint j= 0; j < table->s->blob_fields; ++j, ++objs) + blob_storage[j]= new (objs) Ordered_blob_storage; + *((Ordered_blob_storage ***) ptr)= blob_storage; + blob_storage+= table->s->blob_fields; + } + int2store(ptr + sizeof(String **), i); + ptr+= m_priority_queue_rec_len; + } + m_start_key.key= (const uchar*)ptr; + + /* Initialize priority queue, initialized to reading forward. */ + int (*cmp_func)(void *, uchar *, uchar *); + void *cmp_arg= (void*) this; + if (!m_using_extended_keys && !(table_flags() & HA_SLOW_CMP_REF)) + cmp_func= cmp_key_rowid_part_id; + else + cmp_func= cmp_key_part_id; + DBUG_PRINT("info", ("partition queue_init(1) used_parts: %u", used_parts)); + if (init_queue(&m_queue, used_parts, ORDERED_PART_NUM_OFFSET, + 0, cmp_func, cmp_arg, 0, 0)) + { + my_free(m_ordered_rec_buffer); + m_ordered_rec_buffer= NULL; + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/** + Destroy the ordered record buffer and the priority queue. +*/ + +void ha_partition::destroy_record_priority_queue() +{ + DBUG_ENTER("ha_partition::destroy_record_priority_queue"); + if (m_ordered_rec_buffer) + { + if (table->s->blob_fields) + { + char *ptr= (char *) m_ordered_rec_buffer; + for (uint i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + Ordered_blob_storage **blob_storage= *((Ordered_blob_storage ***) ptr); + for (uint b= 0; b < table->s->blob_fields; ++b) + blob_storage[b]->blob.free(); + ptr+= m_priority_queue_rec_len; + } + } + + delete_queue(&m_queue); + my_free(m_ordered_rec_buffer); + m_ordered_rec_buffer= NULL; + } + DBUG_VOID_RETURN; +} + + +/* + Initialize handler before start of index scan + + SYNOPSIS + index_init() + inx Index number + sorted Is rows to be returned in sorted order + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_init is always called before starting index scans (except when + starting through index_read_idx and using read_range variants). +*/ + +int ha_partition::index_init(uint inx, bool sorted) +{ + int error= 0; + uint i; + DBUG_ENTER("ha_partition::index_init"); + DBUG_PRINT("enter", ("partition this: %p inx: %u sorted: %u", this, inx, sorted)); + + active_index= inx; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_start_key.length= 0; + m_ordered= sorted; + m_ordered_scan_ongoing= FALSE; + m_curr_key_info[0]= table->key_info+inx; + if (pk_is_clustering_key(table->s->primary_key)) + { + /* + if PK is clustered, then the key cmp must use the pk to + differentiate between equal key in given index. + */ + DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp")); + m_curr_key_info[1]= table->key_info+table->s->primary_key; + m_curr_key_info[2]= NULL; + m_using_extended_keys= TRUE; + } + else + { + m_curr_key_info[1]= NULL; + m_using_extended_keys= FALSE; + } + + if (init_record_priority_queue()) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + /* + Some handlers only read fields as specified by the bitmap for the + read set. For partitioned handlers we always require that the + fields of the partition functions are read such that we can + calculate the partition id to place updated and deleted records. + But this is required for operations that may need to change data only. + */ + if (get_lock_type() == F_WRLCK) + { + DBUG_PRINT("info", ("partition set part_field bitmap")); + bitmap_union(table->read_set, &m_part_info->full_part_field_set); + } + if (sorted) + { + /* + An ordered scan is requested. We must make sure all fields of the + used index are in the read set, as partitioning requires them for + sorting (see ha_partition::handle_ordered_index_scan). + + The SQL layer may request an ordered index scan without having index + fields in the read set when + - it needs to do an ordered scan over an index prefix. + - it evaluates ORDER BY with SELECT COUNT(*) FROM t1. + + TODO: handle COUNT(*) queries via unordered scan. + */ + KEY **key_info= m_curr_key_info; + do + { + for (i= 0; i < (*key_info)->user_defined_key_parts; i++) + (*key_info)->key_part[i].field->register_field_in_read_map(); + } while (*(++key_info)); + } + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + if (unlikely((error= m_file[i]->ha_index_init(inx, sorted)))) + goto err; + + DBUG_EXECUTE_IF("ha_partition_fail_index_init", { + i++; + error= HA_ERR_NO_PARTITION_FOUND; + goto err; + }); + } +err: + if (unlikely(error)) + { + /* End the previously initialized indexes. */ + uint j; + for (j= bitmap_get_first_set(&m_part_info->read_partitions); + j < i; + j= bitmap_get_next_set(&m_part_info->read_partitions, j)) + { + (void) m_file[j]->ha_index_end(); + } + destroy_record_priority_queue(); + } + DBUG_RETURN(error); +} + + +/* + End of index scan + + SYNOPSIS + index_end() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_end is called at the end of an index scan to clean up any + things needed to clean up. +*/ + +int ha_partition::index_end() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_end"); + + active_index= MAX_KEY; + m_part_spec.start_part= NO_CURRENT_PART_ID; + file= m_file; + do + { + if ((*file)->inited == INDEX) + { + int tmp; + if ((tmp= (*file)->ha_index_end())) + error= tmp; + } + else if ((*file)->inited == RND) + { + // Possible due to MRR + int tmp; + if ((tmp= (*file)->ha_rnd_end())) + error= tmp; + } + } while (*(++file)); + destroy_record_priority_queue(); + DBUG_RETURN(error); +} + + +/* + Read one record in an index scan and start an index scan + + SYNOPSIS + index_read_map() + buf Read row in MySQL Row Format + key Key parts in consecutive order + keypart_map Which part of key is used + find_flag What type of key condition is used + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_read_map starts a new index scan using a start key. The MySQL Server + will check the end key on its own. Thus to function properly the + partitioned handler need to ensure that it delivers records in the sort + order of the MySQL Server. + index_read_map can be restarted without calling index_end on the previous + index scan and without calling index_init. In this case the index_read_map + is on the same index as the previous index_scan. This is particularly + used in conjuntion with multi read ranges. +*/ + +int ha_partition::index_read_map(uchar *buf, const uchar *key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) +{ + DBUG_ENTER("ha_partition::index_read_map"); + decrement_statistics(&SSV::ha_read_key_count); + end_range= 0; + m_index_scan_type= partition_index_read; + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= find_flag; + DBUG_RETURN(common_index_read(buf, TRUE)); +} + + +/* Compare two part_no partition numbers */ +static int cmp_part_ids(uchar *ref1, uchar *ref2) +{ + uint32 diff2= uint2korr(ref2); + uint32 diff1= uint2korr(ref1); + if (diff2 > diff1) + return -1; + if (diff2 < diff1) + return 1; + return 0; +} + + +/* + @brief + Provide ordering by (key_value, part_no). +*/ + +extern "C" int cmp_key_part_id(void *ptr, uchar *ref1, uchar *ref2) +{ + ha_partition *file= (ha_partition*)ptr; + if (int res= key_rec_cmp(file->m_curr_key_info, + ref1 + PARTITION_BYTES_IN_POS, + ref2 + PARTITION_BYTES_IN_POS)) + return res; + return cmp_part_ids(ref1, ref2); +} + +/* + @brief + Provide ordering by (key_value, underying_table_rowid, part_no). +*/ +extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2) +{ + ha_partition *file= (ha_partition*)ptr; + int res; + + if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS, + ref2 + PARTITION_BYTES_IN_POS))) + { + return res; + } + if ((res= file->get_open_file_sample()->cmp_ref(ref1 + + PARTITION_BYTES_IN_POS + file->m_rec_length, + ref2 + PARTITION_BYTES_IN_POS + file->m_rec_length))) + { + return res; + } + return cmp_part_ids(ref1, ref2); +} + + +/** + Common routine for a number of index_read variants + + @param buf Buffer where the record should be returned. + @param have_start_key TRUE <=> the left endpoint is available, i.e. + we're in index_read call or in read_range_first + call and the range has left endpoint. + FALSE <=> there is no left endpoint (we're in + read_range_first() call and the range has no left + endpoint). + + @return Operation status + @retval 0 OK + @retval HA_ERR_END_OF_FILE Whole index scanned, without finding the record. + @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned. + @retval other error code. + + @details + Start scanning the range (when invoked from read_range_first()) or doing + an index lookup (when invoked from index_read_XXX): + - If possible, perform partition selection + - Find the set of partitions we're going to use + - Depending on whether we need ordering: + NO: Get the first record from first used partition (see + handle_unordered_scan_next_partition) + YES: Fill the priority queue and get the record that is the first in + the ordering +*/ + +int ha_partition::common_index_read(uchar *buf, bool have_start_key) +{ + int error; + uint UNINIT_VAR(key_len); /* used if have_start_key==TRUE */ + bool reverse_order= FALSE; + DBUG_ENTER("ha_partition::common_index_read"); + + DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u", + m_ordered, m_ordered_scan_ongoing)); + + if (have_start_key) + { + m_start_key.length= key_len= calculate_key_len(table, active_index, + m_start_key.key, + m_start_key.keypart_map); + DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u", + m_start_key.keypart_map, m_start_key.flag, key_len)); + DBUG_ASSERT(key_len); + } + if (unlikely((error= partition_scan_set_up(buf, have_start_key)))) + { + DBUG_RETURN(error); + } + + if (have_start_key && + (m_start_key.flag == HA_READ_PREFIX_LAST || + m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV || + m_start_key.flag == HA_READ_BEFORE_KEY)) + { + reverse_order= TRUE; + m_ordered_scan_ongoing= TRUE; + } + DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u", + m_ordered, m_ordered_scan_ongoing, have_start_key)); + if (!m_ordered_scan_ongoing) + { + /* + We use unordered index scan when read_range is used and flag + is set to not use ordered. + We also use an unordered index scan when the number of partitions to + scan is only one. + The unordered index scan will use the partition set created. + */ + DBUG_PRINT("info", ("doing unordered scan")); + error= handle_pre_scan(FALSE, FALSE); + if (likely(!error)) + error= handle_unordered_scan_next_partition(buf); + } + else + { + /* + In all other cases we will use the ordered index scan. This will use + the partition set created by the get_partition_set method. + */ + error= handle_ordered_index_scan(buf, reverse_order); + } + DBUG_RETURN(error); +} + + +/* + Start an index scan from leftmost record and return first record + + SYNOPSIS + index_first() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_first() asks for the first key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the leftmost entry and proceeds forward with + index_next. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_first(uchar * buf) +{ + DBUG_ENTER("ha_partition::index_first"); + decrement_statistics(&SSV::ha_read_first_count); + + end_range= 0; + m_index_scan_type= partition_index_first; + DBUG_RETURN(common_first_last(buf)); +} + + +/* + Start an index scan from rightmost record and return first record + + SYNOPSIS + index_last() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_last() asks for the last key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the rightmost entry and proceeds forward with + index_prev. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_last(uchar * buf) +{ + DBUG_ENTER("ha_partition::index_last"); + decrement_statistics(&SSV::ha_read_last_count); + + m_index_scan_type= partition_index_last; + DBUG_RETURN(common_first_last(buf)); +} + +/* + Common routine for index_first/index_last + + SYNOPSIS + ha_partition::common_first_last() + + see index_first for rest +*/ + +int ha_partition::common_first_last(uchar *buf) +{ + int error; + + if (table->all_partitions_pruned_away) + return HA_ERR_END_OF_FILE; // No rows matching WHERE + + if (unlikely((error= partition_scan_set_up(buf, FALSE)))) + return error; + if (!m_ordered_scan_ongoing && + m_index_scan_type != partition_index_last) + { + if (unlikely((error= handle_pre_scan(FALSE, check_parallel_search())))) + return error; + return handle_unordered_scan_next_partition(buf); + } + return handle_ordered_index_scan(buf, FALSE); +} + + +/* + Optimization of the default implementation to take advantage of dynamic + partition pruning. +*/ +int ha_partition::index_read_idx_map(uchar *buf, uint index, + const uchar *key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) +{ + int error= HA_ERR_KEY_NOT_FOUND; + DBUG_ENTER("ha_partition::index_read_idx_map"); + decrement_statistics(&SSV::ha_read_key_count); + + if (find_flag == HA_READ_KEY_EXACT) + { + uint part; + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= find_flag; + m_start_key.length= calculate_key_len(table, index, m_start_key.key, + m_start_key.keypart_map); + + get_partition_set(table, buf, index, &m_start_key, &m_part_spec); + + /* The start part is must be marked as used. */ + DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part || + bitmap_is_set(&(m_part_info->read_partitions), + m_part_spec.start_part)); + + for (part= m_part_spec.start_part; + part <= m_part_spec.end_part; + part= bitmap_get_next_set(&m_part_info->read_partitions, part)) + { + error= m_file[part]->ha_index_read_idx_map(buf, index, key, + keypart_map, find_flag); + if (likely(error != HA_ERR_KEY_NOT_FOUND && + error != HA_ERR_END_OF_FILE)) + break; + } + if (part <= m_part_spec.end_part) + m_last_part= part; + } + else + { + /* + If not only used with READ_EXACT, we should investigate if possible + to optimize for other find_flag's as well. + */ + DBUG_ASSERT(0); + /* fall back on the default implementation */ + error= handler::index_read_idx_map(buf, index, key, keypart_map, find_flag); + } + DBUG_RETURN(error); +} + + +/* + Read next record in a forward index scan + + SYNOPSIS + index_next() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Used to read forward through the index. +*/ + +int ha_partition::index_next(uchar * buf) +{ + DBUG_ENTER("ha_partition::index_next"); + decrement_statistics(&SSV::ha_read_next_count); + + /* + TODO(low priority): + If we want partition to work with the HANDLER commands, we + must be able to do index_last() -> index_prev() -> index_next() + and if direction changes, we must step back those partitions in + the record queue so we don't return a value from the wrong direction. + */ + if (m_index_scan_type == partition_index_last) + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + if (!m_ordered_scan_ongoing) + { + DBUG_RETURN(handle_unordered_next(buf, FALSE)); + } + DBUG_RETURN(handle_ordered_next(buf, FALSE)); +} + + +/* + Read next record special + + SYNOPSIS + index_next_same() + buf Read row in MySQL Row Format + key Key + keylen Length of key + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This routine is used to read the next but only if the key is the same + as supplied in the call. +*/ + +int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_next_same"); + decrement_statistics(&SSV::ha_read_next_count); + + DBUG_ASSERT(keylen == m_start_key.length); + if (m_index_scan_type == partition_index_last) + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + if (!m_ordered_scan_ongoing) + DBUG_RETURN(handle_unordered_next(buf, TRUE)); + DBUG_RETURN(handle_ordered_next(buf, TRUE)); +} + + +int ha_partition::index_read_last_map(uchar *buf, + const uchar *key, + key_part_map keypart_map) +{ + DBUG_ENTER("ha_partition::index_read_last_map"); + + m_ordered= true; // Safety measure + end_range= NULL; + m_index_scan_type= partition_index_read_last; + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= HA_READ_PREFIX_LAST; + DBUG_RETURN(common_index_read(buf, true)); +} + + +/* + Read next record when performing index scan backwards + + SYNOPSIS + index_prev() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Used to read backwards through the index. +*/ + +int ha_partition::index_prev(uchar * buf) +{ + DBUG_ENTER("ha_partition::index_prev"); + decrement_statistics(&SSV::ha_read_prev_count); + + /* TODO: read comment in index_next */ + if (m_index_scan_type == partition_index_first) + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + DBUG_RETURN(handle_ordered_prev(buf)); +} + + +/* + Start a read of one range with start and end key + + SYNOPSIS + read_range_first() + start_key Specification of start key + end_key Specification of end key + eq_range_arg Is it equal range + sorted Should records be returned in sorted order + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + We reimplement read_range_first since we don't want the compare_key + check at the end. This is already performed in the partition handler. + read_range_next is very much different due to that we need to scan + all underlying handlers. +*/ + +int ha_partition::read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range_arg, bool sorted) +{ + int error; + DBUG_ENTER("ha_partition::read_range_first"); + + m_ordered= sorted; + eq_range= eq_range_arg; + set_end_range(end_key); + + range_key_part= m_curr_key_info[0]->key_part; + if (start_key) + m_start_key= *start_key; + else + m_start_key.key= NULL; + + m_index_scan_type= partition_read_range; + error= common_index_read(m_rec0, MY_TEST(start_key)); + DBUG_RETURN(error); +} + + +/* + Read next record in read of a range with start and end key + + SYNOPSIS + read_range_next() + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::read_range_next() +{ + DBUG_ENTER("ha_partition::read_range_next"); + + if (m_ordered_scan_ongoing) + { + DBUG_RETURN(handle_ordered_next(table->record[0], eq_range)); + } + DBUG_RETURN(handle_unordered_next(table->record[0], eq_range)); +} + +/** + Create a copy of all keys used by multi_range_read() + + @retval 0 ok + @retval HA_ERR_END_OF_FILE no keys in range + @retval other value: error + + TODO to save memory: + - If (mrr_mode & HA_MRR_MATERIALIZED_KEYS) is set then the keys data is + stable and we don't have to copy the keys, only store a pointer to the + key. + - When allocating key data, store things in a MEM_ROOT buffer instead of + a malloc() per key. This will simplify and speed up the current code + and use less memory. +*/ + +int ha_partition::multi_range_key_create_key(RANGE_SEQ_IF *seq, + range_seq_t seq_it) +{ + uint i, length; + key_range *start_key, *end_key; + KEY_MULTI_RANGE *range; + DBUG_ENTER("ha_partition::multi_range_key_create_key"); + + bitmap_clear_all(&m_mrr_used_partitions); + m_mrr_range_length= 0; + bzero(m_part_mrr_range_length, + sizeof(*m_part_mrr_range_length) * m_tot_parts); + if (!m_mrr_range_first) + { + if (!(m_mrr_range_first= (PARTITION_KEY_MULTI_RANGE *) + my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &m_mrr_range_current, sizeof(PARTITION_KEY_MULTI_RANGE), + NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + m_mrr_range_first->id= 1; + m_mrr_range_first->key[0]= NULL; + m_mrr_range_first->key[1]= NULL; + m_mrr_range_first->next= NULL; + } + else + m_mrr_range_current= m_mrr_range_first; + + for (i= 0; i < m_tot_parts; i++) + { + if (!m_part_mrr_range_first[i]) + { + if (!(m_part_mrr_range_first[i]= (PARTITION_PART_KEY_MULTI_RANGE *) + my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL), + &m_part_mrr_range_current[i], sizeof(PARTITION_PART_KEY_MULTI_RANGE), + NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + else + { + m_part_mrr_range_current[i]= m_part_mrr_range_first[i]; + m_part_mrr_range_current[i]->partition_key_multi_range= NULL; + } + } + m_mrr_range_current->key_multi_range.start_key.key= NULL; + m_mrr_range_current->key_multi_range.end_key.key= NULL; + + while (!seq->next(seq_it, &m_mrr_range_current->key_multi_range)) + { + m_mrr_range_length++; + range= &m_mrr_range_current->key_multi_range; + + /* Copy start key */ + start_key= &range->start_key; + DBUG_PRINT("info",("partition range->range_flag: %u", range->range_flag)); + DBUG_PRINT("info",("partition start_key->key: %p", start_key->key)); + DBUG_PRINT("info",("partition start_key->length: %u", start_key->length)); + DBUG_PRINT("info",("partition start_key->keypart_map: %lu", + start_key->keypart_map)); + DBUG_PRINT("info",("partition start_key->flag: %u", start_key->flag)); + + if (start_key->key) + { + length= start_key->length; + if (!m_mrr_range_current->key[0] || + m_mrr_range_current->length[0] < length) + { + if (m_mrr_range_current->key[0]) + my_free(m_mrr_range_current->key[0]); + if (!(m_mrr_range_current->key[0]= + (uchar *) my_malloc(PSI_INSTRUMENT_ME, length, MYF(MY_WME)))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + m_mrr_range_current->length[0]= length; + } + memcpy(m_mrr_range_current->key[0], start_key->key, length); + start_key->key= m_mrr_range_current->key[0]; + } + + /* Copy end key */ + end_key= &range->end_key; + DBUG_PRINT("info",("partition end_key->key: %p", end_key->key)); + DBUG_PRINT("info",("partition end_key->length: %u", end_key->length)); + DBUG_PRINT("info",("partition end_key->keypart_map: %lu", + end_key->keypart_map)); + DBUG_PRINT("info",("partition end_key->flag: %u", end_key->flag)); + if (end_key->key) + { + length= end_key->length; + if (!m_mrr_range_current->key[1] || + m_mrr_range_current->length[1] < length) + { + if (m_mrr_range_current->key[1]) + my_free(m_mrr_range_current->key[1]); + if (!(m_mrr_range_current->key[1]= + (uchar *) my_malloc(PSI_INSTRUMENT_ME, length, MYF(MY_WME)))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + m_mrr_range_current->length[1]= length; + } + memcpy(m_mrr_range_current->key[1], end_key->key, length); + end_key->key= m_mrr_range_current->key[1]; + } + + m_mrr_range_current->ptr= m_mrr_range_current->key_multi_range.ptr; + m_mrr_range_current->key_multi_range.ptr= m_mrr_range_current; + + if (start_key->key && (start_key->flag == HA_READ_KEY_EXACT)) + get_partition_set(table, table->record[0], active_index, + start_key, &m_part_spec); + else + { + m_part_spec.start_part= 0; + m_part_spec.end_part= m_tot_parts - 1; + } + + /* Copy key to those partitions that needs it */ + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + if (bitmap_is_set(&(m_part_info->read_partitions), i)) + { + bitmap_set_bit(&m_mrr_used_partitions, i); + m_part_mrr_range_length[i]++; + m_part_mrr_range_current[i]->partition_key_multi_range= + m_mrr_range_current; + + if (!m_part_mrr_range_current[i]->next) + { + PARTITION_PART_KEY_MULTI_RANGE *tmp_part_mrr_range; + if (!(tmp_part_mrr_range= (PARTITION_PART_KEY_MULTI_RANGE *) + my_malloc(PSI_INSTRUMENT_ME, sizeof(PARTITION_PART_KEY_MULTI_RANGE), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + m_part_mrr_range_current[i]->next= tmp_part_mrr_range; + m_part_mrr_range_current[i]= tmp_part_mrr_range; + } + else + { + m_part_mrr_range_current[i]= m_part_mrr_range_current[i]->next; + m_part_mrr_range_current[i]->partition_key_multi_range= NULL; + } + } + } + + if (!m_mrr_range_current->next) + { + /* Add end of range sentinel */ + PARTITION_KEY_MULTI_RANGE *tmp_mrr_range; + if (!(tmp_mrr_range= (PARTITION_KEY_MULTI_RANGE *) + my_malloc(PSI_INSTRUMENT_ME, sizeof(PARTITION_KEY_MULTI_RANGE), MYF(MY_WME)))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + tmp_mrr_range->id= m_mrr_range_current->id + 1; + tmp_mrr_range->key[0]= NULL; + tmp_mrr_range->key[1]= NULL; + tmp_mrr_range->next= NULL; + m_mrr_range_current->next= tmp_mrr_range; + } + m_mrr_range_current= m_mrr_range_current->next; + } + + if (!m_mrr_range_length) + { + DBUG_PRINT("Warning",("No keys to use for mrr")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + + /* set start and end part */ + m_part_spec.start_part= bitmap_get_first_set(&m_mrr_used_partitions); + + for (i= m_tot_parts; i-- > 0;) + { + if (bitmap_is_set(&m_mrr_used_partitions, i)) + { + m_part_spec.end_part= i; + break; + } + } + for (i= 0; i < m_tot_parts; i++) + { + m_partition_part_key_multi_range_hld[i].partition= this; + m_partition_part_key_multi_range_hld[i].part_id= i; + m_partition_part_key_multi_range_hld[i].partition_part_key_multi_range= + m_part_mrr_range_first[i]; + } + DBUG_PRINT("return",("OK")); + DBUG_RETURN(0); +} + + +static void partition_multi_range_key_get_key_info(void *init_params, + uint *length, + key_part_map *map) +{ + PARTITION_PART_KEY_MULTI_RANGE_HLD *hld= + (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params; + ha_partition *partition= hld->partition; + key_range *start_key= (&partition->m_mrr_range_first-> + key_multi_range.start_key); + DBUG_ENTER("partition_multi_range_key_get_key_info"); + *length= start_key->length; + *map= start_key->keypart_map; + DBUG_VOID_RETURN; +} + + +static range_seq_t partition_multi_range_key_init(void *init_params, + uint n_ranges, + uint flags) +{ + PARTITION_PART_KEY_MULTI_RANGE_HLD *hld= + (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params; + ha_partition *partition= hld->partition; + uint i= hld->part_id; + DBUG_ENTER("partition_multi_range_key_init"); + // not used: partition->m_mrr_range_init_flags= flags; + hld->partition_part_key_multi_range= partition->m_part_mrr_range_first[i]; + DBUG_RETURN(init_params); +} + + +static bool partition_multi_range_key_next(range_seq_t seq, + KEY_MULTI_RANGE *range) +{ + PARTITION_PART_KEY_MULTI_RANGE_HLD *hld= + (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq; + PARTITION_KEY_MULTI_RANGE *partition_key_multi_range= + hld->partition_part_key_multi_range->partition_key_multi_range; + DBUG_ENTER("partition_multi_range_key_next"); + if (!partition_key_multi_range) + DBUG_RETURN(TRUE); + *range= partition_key_multi_range->key_multi_range; + hld->partition_part_key_multi_range= + hld->partition_part_key_multi_range->next; + DBUG_RETURN(FALSE); +} + + +static bool partition_multi_range_key_skip_record(range_seq_t seq, + range_id_t range_info, + uchar *rowid) +{ + PARTITION_PART_KEY_MULTI_RANGE_HLD *hld= + (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq; + PARTITION_KEY_MULTI_RANGE *pkmr= (PARTITION_KEY_MULTI_RANGE *)range_info; + DBUG_ENTER("partition_multi_range_key_skip_record"); + DBUG_RETURN(hld->partition->m_seq_if->skip_record(hld->partition->m_seq, + pkmr->ptr, rowid)); +} + + +static bool partition_multi_range_key_skip_index_tuple(range_seq_t seq, + range_id_t range_info) +{ + PARTITION_PART_KEY_MULTI_RANGE_HLD *hld= + (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq; + PARTITION_KEY_MULTI_RANGE *pkmr= (PARTITION_KEY_MULTI_RANGE *)range_info; + DBUG_ENTER("partition_multi_range_key_skip_index_tuple"); + DBUG_RETURN(hld->partition->m_seq_if->skip_index_tuple(hld->partition->m_seq, + pkmr->ptr)); +} + +ha_rows ha_partition::multi_range_read_info_const(uint keyno, + RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *mrr_mode, + Cost_estimate *cost) +{ + int error; + uint i; + handler **file; + ha_rows rows= 0; + uint ret_mrr_mode= 0; + range_seq_t seq_it; + part_id_range save_part_spec; + Cost_estimate part_cost; + DBUG_ENTER("ha_partition::multi_range_read_info_const"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + m_mrr_new_full_buffer_size= 0; + save_part_spec= m_part_spec; + + cost->reset(); + + seq_it= seq->init(seq_init_param, n_ranges, *mrr_mode); + if (unlikely((error= multi_range_key_create_key(seq, seq_it)))) + { + if (likely(error == HA_ERR_END_OF_FILE)) // No keys in range + { + rows= 0; + goto end; + } + /* + This error means that we can't do multi_range_read for the moment + (probably running out of memory) and we need to fallback to + normal reads + */ + m_part_spec= save_part_spec; + DBUG_RETURN(HA_POS_ERROR); + } + m_part_seq_if.get_key_info= + seq->get_key_info ? partition_multi_range_key_get_key_info : NULL; + m_part_seq_if.init= partition_multi_range_key_init; + m_part_seq_if.next= partition_multi_range_key_next; + m_part_seq_if.skip_record= (seq->skip_record ? + partition_multi_range_key_skip_record : NULL); + m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ? + partition_multi_range_key_skip_index_tuple : + NULL); + file= m_file; + do + { + i= (uint)(file - m_file); + DBUG_PRINT("info",("partition part_id: %u", i)); + if (bitmap_is_set(&m_mrr_used_partitions, i)) + { + ha_rows tmp_rows; + uint tmp_mrr_mode; + m_mrr_buffer_size[i]= 0; + part_cost.reset(); + tmp_mrr_mode= *mrr_mode; + tmp_rows= (*file)-> + multi_range_read_info_const(keyno, &m_part_seq_if, + &m_partition_part_key_multi_range_hld[i], + m_part_mrr_range_length[i], + &m_mrr_buffer_size[i], + &tmp_mrr_mode, &part_cost); + if (tmp_rows == HA_POS_ERROR) + { + m_part_spec= save_part_spec; + DBUG_RETURN(HA_POS_ERROR); + } + cost->add(&part_cost); + rows+= tmp_rows; + ret_mrr_mode|= tmp_mrr_mode; + m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i]; + } + } while (*(++file)); + *mrr_mode= ret_mrr_mode; + +end: + m_part_spec= save_part_spec; + DBUG_RETURN(rows); +} + + +ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges, + uint keys, + uint key_parts, uint *bufsz, + uint *mrr_mode, + Cost_estimate *cost) +{ + uint i; + handler **file; + ha_rows rows= 0; + Cost_estimate part_cost; + DBUG_ENTER("ha_partition::multi_range_read_info"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + cost->reset(); + + m_mrr_new_full_buffer_size= 0; + file= m_file; + do + { + i= (uint)(file - m_file); + if (bitmap_is_set(&(m_part_info->read_partitions), (i))) + { + ha_rows tmp_rows; + m_mrr_buffer_size[i]= 0; + part_cost.reset(); + if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys, + key_parts, + &m_mrr_buffer_size[i], + mrr_mode, &part_cost))) + DBUG_RETURN(rows); + cost->add(&part_cost); + rows+= tmp_rows; + m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i]; + } + } while (*(++file)); + + DBUG_RETURN(0); +} + + +int ha_partition::multi_range_read_init(RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint mrr_mode, + HANDLER_BUFFER *buf) +{ + int error; + uint i; + handler **file; + uchar *tmp_buffer; + DBUG_ENTER("ha_partition::multi_range_read_init"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + eq_range= 0; + m_seq_if= seq; + m_seq= seq->init(seq_init_param, n_ranges, mrr_mode); + if (unlikely((error= multi_range_key_create_key(seq, m_seq)))) + DBUG_RETURN(0); + + m_part_seq_if.get_key_info= (seq->get_key_info ? + partition_multi_range_key_get_key_info : + NULL); + m_part_seq_if.init= partition_multi_range_key_init; + m_part_seq_if.next= partition_multi_range_key_next; + m_part_seq_if.skip_record= (seq->skip_record ? + partition_multi_range_key_skip_record : + NULL); + m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ? + partition_multi_range_key_skip_index_tuple : + NULL); + + /* m_mrr_new_full_buffer_size was calculated in multi_range_read_info */ + if (m_mrr_full_buffer_size < m_mrr_new_full_buffer_size) + { + if (m_mrr_full_buffer) + my_free(m_mrr_full_buffer); + if (!(m_mrr_full_buffer= + (uchar *) my_malloc(PSI_INSTRUMENT_ME, m_mrr_new_full_buffer_size, MYF(MY_WME)))) + { + m_mrr_full_buffer_size= 0; + error= HA_ERR_OUT_OF_MEM; + goto error; + } + m_mrr_full_buffer_size= m_mrr_new_full_buffer_size; + } + + tmp_buffer= m_mrr_full_buffer; + file= m_file; + do + { + i= (uint)(file - m_file); + DBUG_PRINT("info",("partition part_id: %u", i)); + if (bitmap_is_set(&m_mrr_used_partitions, i)) + { + if (m_mrr_new_full_buffer_size) + { + if (m_mrr_buffer_size[i]) + { + m_mrr_buffer[i].buffer= tmp_buffer; + m_mrr_buffer[i].end_of_used_area= tmp_buffer; + tmp_buffer+= m_mrr_buffer_size[i]; + m_mrr_buffer[i].buffer_end= tmp_buffer; + } + } + else + m_mrr_buffer[i]= *buf; + + if (unlikely((error= (*file)-> + multi_range_read_init(&m_part_seq_if, + &m_partition_part_key_multi_range_hld[i], + m_part_mrr_range_length[i], + mrr_mode, + &m_mrr_buffer[i])))) + goto error; + m_stock_range_seq[i]= 0; + } + } while (*(++file)); + + m_multi_range_read_first= TRUE; + m_mrr_range_current= m_mrr_range_first; + m_index_scan_type= partition_read_multi_range; + m_mrr_mode= mrr_mode; + m_mrr_n_ranges= n_ranges; + DBUG_RETURN(0); + +error: + DBUG_RETURN(error); +} + + +int ha_partition::multi_range_read_next(range_id_t *range_info) +{ + int error; + DBUG_ENTER("ha_partition::multi_range_read_next"); + DBUG_PRINT("enter", ("partition this: %p partition m_mrr_mode: %u", + this, m_mrr_mode)); + + if ((m_mrr_mode & HA_MRR_SORTED)) + { + if (m_multi_range_read_first) + { + if (unlikely((error= handle_ordered_index_scan(table->record[0], + FALSE)))) + DBUG_RETURN(error); + if (!m_pre_calling) + m_multi_range_read_first= FALSE; + } + else if (unlikely((error= handle_ordered_next(table->record[0], + eq_range)))) + DBUG_RETURN(error); + *range_info= m_mrr_range_current->ptr; + } + else + { + if (unlikely(m_multi_range_read_first)) + { + if (unlikely((error= + handle_unordered_scan_next_partition(table->record[0])))) + DBUG_RETURN(error); + if (!m_pre_calling) + m_multi_range_read_first= FALSE; + } + else if (unlikely((error= handle_unordered_next(table->record[0], FALSE)))) + DBUG_RETURN(error); + + if (!(m_mrr_mode & HA_MRR_NO_ASSOCIATION)) + { + *range_info= + ((PARTITION_KEY_MULTI_RANGE *) m_range_info[m_last_part])->ptr; + } + } + DBUG_RETURN(0); +} + + +int ha_partition::multi_range_read_explain_info(uint mrr_mode, char *str, + size_t size) +{ + DBUG_ENTER("ha_partition::multi_range_read_explain_info"); + DBUG_RETURN(get_open_file_sample()-> + multi_range_read_explain_info(mrr_mode, str, size)); +} + + +/** + Find and retrieve the Full Text Search relevance ranking for a search string + in a full text index. + + @param handler Full Text Search handler + @param record Search string + @param length Length of the search string + + @retval Relevance value +*/ + +float partition_ft_find_relevance(FT_INFO *handler, + uchar *record, uint length) +{ + st_partition_ft_info *info= (st_partition_ft_info *)handler; + uint m_last_part= ((ha_partition*) info->file)->last_part(); + FT_INFO *m_handler= info->part_ft_info[m_last_part]; + DBUG_ENTER("partition_ft_find_relevance"); + if (!m_handler) + DBUG_RETURN((float)-1.0); + DBUG_RETURN(m_handler->please->find_relevance(m_handler, record, length)); +} + + +/** + Retrieve the Full Text Search relevance ranking for the current + full text search. + + @param handler Full Text Search handler + + @retval Relevance value +*/ + +float partition_ft_get_relevance(FT_INFO *handler) +{ + st_partition_ft_info *info= (st_partition_ft_info *)handler; + uint m_last_part= ((ha_partition*) info->file)->last_part(); + FT_INFO *m_handler= info->part_ft_info[m_last_part]; + DBUG_ENTER("partition_ft_get_relevance"); + if (!m_handler) + DBUG_RETURN((float)-1.0); + DBUG_RETURN(m_handler->please->get_relevance(m_handler)); +} + + +/** + Free the memory for a full text search handler. + + @param handler Full Text Search handler +*/ + +void partition_ft_close_search(FT_INFO *handler) +{ + st_partition_ft_info *info= (st_partition_ft_info *)handler; + info->file->ft_close_search(handler); +} + + +/** + Free the memory for a full text search handler. + + @param handler Full Text Search handler +*/ + +void ha_partition::ft_close_search(FT_INFO *handler) +{ + uint i; + st_partition_ft_info *info= (st_partition_ft_info *)handler; + DBUG_ENTER("ha_partition::ft_close_search"); + + for (i= 0; i < m_tot_parts; i++) + { + FT_INFO *m_handler= info->part_ft_info[i]; + DBUG_ASSERT(!m_handler || + (m_handler->please && m_handler->please->close_search)); + if (m_handler && + m_handler->please && + m_handler->please->close_search) + m_handler->please->close_search(m_handler); + } + DBUG_VOID_RETURN; +} + + +/* Partition Full Text search function table */ +_ft_vft partition_ft_vft = +{ + NULL, // partition_ft_read_next + partition_ft_find_relevance, + partition_ft_close_search, + partition_ft_get_relevance, + NULL // partition_ft_reinit_search +}; + + +/** + Initialize a full text search. +*/ + +int ha_partition::ft_init() +{ + int error; + uint i= 0; + uint32 part_id; + DBUG_ENTER("ha_partition::ft_init"); + DBUG_PRINT("info", ("partition this: %p", this)); + + /* + For operations that may need to change data, we may need to extend + read_set. + */ + if (get_lock_type() == F_WRLCK) + { + /* + If write_set contains any of the fields used in partition and + subpartition expression, we need to set all bits in read_set because + the row may need to be inserted in a different [sub]partition. In + other words update_row() can be converted into write_row(), which + requires a complete record. + */ + if (bitmap_is_overlapping(&m_part_info->full_part_field_set, + table->write_set)) + bitmap_set_all(table->read_set); + else + { + /* + Some handlers only read fields as specified by the bitmap for the + read set. For partitioned handlers we always require that the + fields of the partition functions are read such that we can + calculate the partition id to place updated and deleted records. + */ + bitmap_union(table->read_set, &m_part_info->full_part_field_set); + } + } + + /* Now we see what the index of our first important partition is */ + DBUG_PRINT("info", ("m_part_info->read_partitions: %p", + (void *) m_part_info->read_partitions.bitmap)); + part_id= bitmap_get_first_set(&(m_part_info->read_partitions)); + DBUG_PRINT("info", ("m_part_spec.start_part %u", (uint) part_id)); + + if (part_id == MY_BIT_NONE) + { + error= 0; + goto err1; + } + + DBUG_PRINT("info", ("ft_init on partition %u", (uint) part_id)); + /* + ft_end() is needed for partitioning to reset internal data if scan + is already in use + */ + if (m_pre_calling) + { + if (unlikely((error= pre_ft_end()))) + goto err1; + } + else + ft_end(); + m_index_scan_type= partition_ft_read; + for (i= part_id; i < m_tot_parts; i++) + { + if (bitmap_is_set(&(m_part_info->read_partitions), i)) + { + error= m_pre_calling ? m_file[i]->pre_ft_init() : m_file[i]->ft_init(); + if (unlikely(error)) + goto err2; + } + } + m_scan_value= 1; + m_part_spec.start_part= part_id; + m_part_spec.end_part= m_tot_parts - 1; + m_ft_init_and_first= TRUE; + DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value)); + DBUG_RETURN(0); + +err2: + late_extra_no_cache(part_id); + while ((int)--i >= (int)part_id) + { + if (bitmap_is_set(&(m_part_info->read_partitions), i)) + { + if (m_pre_calling) + m_file[i]->pre_ft_end(); + else + m_file[i]->ft_end(); + } + } +err1: + m_scan_value= 2; + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(error); +} + + +/** + Initialize a full text search during a bulk access request. +*/ + +int ha_partition::pre_ft_init() +{ + bool save_m_pre_calling; + int error; + DBUG_ENTER("ha_partition::pre_ft_init"); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + error= ft_init(); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(error); +} + + +/** + Terminate a full text search. +*/ + +void ha_partition::ft_end() +{ + handler **file; + DBUG_ENTER("ha_partition::ft_end"); + DBUG_PRINT("info", ("partition this: %p", this)); + + switch (m_scan_value) { + case 2: // Error + break; + case 1: // Table scan + if (NO_CURRENT_PART_ID != m_part_spec.start_part) + late_extra_no_cache(m_part_spec.start_part); + file= m_file; + do + { + if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file))) + { + if (m_pre_calling) + (*file)->pre_ft_end(); + else + (*file)->ft_end(); + } + } while (*(++file)); + break; + } + m_scan_value= 2; + m_part_spec.start_part= NO_CURRENT_PART_ID; + ft_current= 0; + DBUG_VOID_RETURN; +} + + +/** + Terminate a full text search during a bulk access request. +*/ + +int ha_partition::pre_ft_end() +{ + bool save_m_pre_calling; + DBUG_ENTER("ha_partition::pre_ft_end"); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + ft_end(); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(0); +} + + +void ha_partition::swap_blobs(uchar * rec_buf, Ordered_blob_storage ** storage, bool restore) +{ + uint *ptr, *end; + uint blob_n= 0; + table->move_fields(table->field, rec_buf, table->record[0]); + for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields; + ptr != end; ++ptr, ++blob_n) + { + DBUG_ASSERT(*ptr < table->s->fields); + Field_blob *blob= (Field_blob*) table->field[*ptr]; + DBUG_ASSERT(blob->flags & BLOB_FLAG); + DBUG_ASSERT(blob->field_index == *ptr); + if (!bitmap_is_set(table->read_set, *ptr) || blob->is_null()) + continue; + + Ordered_blob_storage &s= *storage[blob_n]; + + if (restore) + { + /* + We protect only blob cache (value or read_value). If the cache was + empty that doesn't mean the blob was empty. Blobs allocated by a + storage engine should work just fine. + */ + if (!s.blob.is_empty()) + blob->swap(s.blob, s.set_read_value); + } + else + { + bool set_read_value; + String *cached= blob->cached(&set_read_value); + if (cached) + { + cached->swap(s.blob); + s.set_read_value= set_read_value; + } + } + } + table->move_fields(table->field, table->record[0], rec_buf); +} + + +/** + Initialize a full text search using the extended API. + + @param flags Search flags + @param inx Key number + @param key Key value + + @return FT_INFO structure if successful + NULL otherwise +*/ + +FT_INFO *ha_partition::ft_init_ext(uint flags, uint inx, String *key) +{ + FT_INFO *ft_handler; + handler **file; + st_partition_ft_info *ft_target, **parent; + DBUG_ENTER("ha_partition::ft_init_ext"); + + if (ft_current) + parent= &ft_current->next; + else + parent= &ft_first; + + if (!(ft_target= *parent)) + { + FT_INFO **tmp_ft_info; + if (!(ft_target= (st_partition_ft_info *) + my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL), + &ft_target, sizeof(st_partition_ft_info), + &tmp_ft_info, sizeof(FT_INFO *) * m_tot_parts, + NullS))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL)); + DBUG_RETURN(NULL); + } + ft_target->part_ft_info= tmp_ft_info; + (*parent)= ft_target; + } + + ft_current= ft_target; + file= m_file; + do + { + if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file))) + { + if ((ft_handler= (*file)->ft_init_ext(flags, inx, key))) + (*file)->ft_handler= ft_handler; + else + (*file)->ft_handler= NULL; + ft_target->part_ft_info[file - m_file]= ft_handler; + } + else + { + (*file)->ft_handler= NULL; + ft_target->part_ft_info[file - m_file]= NULL; + } + } while (*(++file)); + + ft_target->please= &partition_ft_vft; + ft_target->file= this; + DBUG_RETURN((FT_INFO*)ft_target); +} + + +/** + Return the next record from the FT result set during an ordered index + pre-scan + + @param use_parallel Is it a parallel search + + @return >0 Error code + 0 Success +*/ + +int ha_partition::pre_ft_read(bool use_parallel) +{ + bool save_m_pre_calling; + int error; + DBUG_ENTER("ha_partition::pre_ft_read"); + DBUG_PRINT("info", ("partition this: %p", this)); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + m_pre_call_use_parallel= use_parallel; + error= ft_read(table->record[0]); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(error); +} + + +/** + Return the first or next record in a full text search. + + @param buf Buffer where the record should be returned + + @return >0 Error code + 0 Success +*/ + +int ha_partition::ft_read(uchar *buf) +{ + handler *file; + int result= HA_ERR_END_OF_FILE, error; + uint part_id= m_part_spec.start_part; + DBUG_ENTER("ha_partition::ft_read"); + DBUG_PRINT("info", ("partition this: %p", this)); + DBUG_PRINT("info", ("part_id: %u", part_id)); + + if (part_id == NO_CURRENT_PART_ID) + { + /* + The original set of partitions to scan was empty and thus we report + the result here. + */ + DBUG_PRINT("info", ("NO_CURRENT_PART_ID")); + goto end; + } + + DBUG_ASSERT(m_scan_value == 1); + + if (m_ft_init_and_first) // First call to ft_read() + { + m_ft_init_and_first= FALSE; + if (!bulk_access_executing) + { + error= handle_pre_scan(FALSE, check_parallel_search()); + if (m_pre_calling || error) + DBUG_RETURN(error); + } + late_extra_cache(part_id); + } + + file= m_file[part_id]; + + while (TRUE) + { + if (!(result= file->ft_read(buf))) + { + /* Found row: remember position and return it. */ + m_part_spec.start_part= m_last_part= part_id; + table->status= 0; + DBUG_RETURN(0); + } + + /* + if we get here, then the current partition ft_next returned failure + */ + if (result != HA_ERR_END_OF_FILE) + goto end_dont_reset_start_part; // Return error + + /* End current partition */ + late_extra_no_cache(part_id); + DBUG_PRINT("info", ("stopping using partition %u", (uint) part_id)); + + /* Shift to next partition */ + while (++part_id < m_tot_parts && + !bitmap_is_set(&(m_part_info->read_partitions), part_id)) + ; + if (part_id >= m_tot_parts) + { + result= HA_ERR_END_OF_FILE; + break; + } + m_part_spec.start_part= m_last_part= part_id; + file= m_file[part_id]; + DBUG_PRINT("info", ("now using partition %u", (uint) part_id)); + late_extra_cache(part_id); + } + +end: + m_part_spec.start_part= NO_CURRENT_PART_ID; +end_dont_reset_start_part: + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(result); +} + + +/* + Common routine to set up index scans + + SYNOPSIS + ha_partition::partition_scan_set_up() + buf Buffer to later return record in (this function + needs it to calculcate partitioning function + values) + + idx_read_flag TRUE <=> m_start_key has range start endpoint which + probably can be used to determine the set of partitions + to scan. + FALSE <=> there is no start endpoint. + + DESCRIPTION + Find out which partitions we'll need to read when scanning the specified + range. + + If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE + as we will not need to do merge ordering. + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag) +{ + DBUG_ENTER("ha_partition::partition_scan_set_up"); + + if (idx_read_flag) + get_partition_set(table, buf, active_index, &m_start_key, &m_part_spec); + else + { + m_part_spec.start_part= 0; + m_part_spec.end_part= m_tot_parts - 1; + } + if (m_part_spec.start_part > m_part_spec.end_part) + { + /* + We discovered a partition set but the set was empty so we report + key not found. + */ + DBUG_PRINT("info", ("scan with no partition to scan")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (m_part_spec.start_part == m_part_spec.end_part) + { + /* + We discovered a single partition to scan, this never needs to be + performed using the ordered index scan. + */ + DBUG_PRINT("info", ("index scan using the single partition %u", + (uint) m_part_spec.start_part)); + m_ordered_scan_ongoing= FALSE; + } + else + { + /* + Set m_ordered_scan_ongoing according how the scan should be done + Only exact partitions are discovered atm by get_partition_set. + Verify this, also bitmap must have at least one bit set otherwise + the result from this table is the empty set. + */ + uint start_part= bitmap_get_first_set(&(m_part_info->read_partitions)); + if (start_part == MY_BIT_NONE) + { + DBUG_PRINT("info", ("scan with no partition to scan")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (start_part > m_part_spec.start_part) + m_part_spec.start_part= start_part; + DBUG_ASSERT(m_part_spec.start_part < m_tot_parts); + m_ordered_scan_ongoing= m_ordered; + } + DBUG_ASSERT(m_part_spec.start_part < m_tot_parts); + DBUG_ASSERT(m_part_spec.end_part < m_tot_parts); + DBUG_RETURN(0); +} + +/** + Check if we can search partitions in parallel + + @retval TRUE yes + @retval FALSE no +*/ + +bool ha_partition::check_parallel_search() +{ + TABLE_LIST *table_list= table->pos_in_table_list; + st_select_lex *select_lex; + JOIN *join; + DBUG_ENTER("ha_partition::check_parallel_search"); + if (!table_list) + goto not_parallel; + + while (table_list->parent_l) + table_list= table_list->parent_l; + + select_lex= table_list->select_lex; + DBUG_PRINT("info",("partition select_lex: %p", select_lex)); + if (!select_lex) + goto not_parallel; + if (!select_lex->limit_params.explicit_limit) + { + DBUG_PRINT("info",("partition not using explicit_limit")); + goto parallel; + } + + join= select_lex->join; + DBUG_PRINT("info",("partition join: %p", join)); + if (join && join->skip_sort_order) + { + DBUG_PRINT("info",("partition order_list.elements: %u", + select_lex->order_list.elements)); + if (select_lex->order_list.elements) + { + Item *item= *select_lex->order_list.first->item; + DBUG_PRINT("info",("partition item: %p", item)); + DBUG_PRINT("info",("partition item->type(): %u", item->type())); + DBUG_PRINT("info",("partition m_part_info->part_type: %u", + m_part_info->part_type)); + DBUG_PRINT("info",("partition m_is_sub_partitioned: %s", + m_is_sub_partitioned ? "TRUE" : "FALSE")); + DBUG_PRINT("info",("partition m_part_info->part_expr: %p", + m_part_info->part_expr)); + if (item->type() == Item::FIELD_ITEM && + m_part_info->part_type == RANGE_PARTITION && + !m_is_sub_partitioned && + (!m_part_info->part_expr || + m_part_info->part_expr->type() == Item::FIELD_ITEM)) + { + Field *order_field= ((Item_field *)item)->field; + DBUG_PRINT("info",("partition order_field: %p", order_field)); + if (order_field && order_field->table == table_list->table) + { + Field *part_field= m_part_info->full_part_field_array[0]; + DBUG_PRINT("info",("partition order_field: %p", order_field)); + DBUG_PRINT("info",("partition part_field: %p", part_field)); + if (part_field == order_field) + { + /* + We are using ORDER BY partition_field LIMIT # + In this case, let's not do things in parallel as it's + likely that the query can be satisfied from the first + partition + */ + DBUG_PRINT("info",("partition with ORDER on partition field")); + goto not_parallel; + } + } + } + DBUG_PRINT("info",("partition have order")); + goto parallel; + } + + DBUG_PRINT("info",("partition group_list.elements: %u", + select_lex->group_list.elements)); + if (select_lex->group_list.elements) + { + Item *item= *select_lex->group_list.first->item; + DBUG_PRINT("info",("partition item: %p", item)); + DBUG_PRINT("info",("partition item->type(): %u", item->type())); + DBUG_PRINT("info",("partition m_part_info->part_type: %u", + m_part_info->part_type)); + DBUG_PRINT("info",("partition m_is_sub_partitioned: %s", + m_is_sub_partitioned ? "TRUE" : "FALSE")); + DBUG_PRINT("info",("partition m_part_info->part_expr: %p", + m_part_info->part_expr)); + if (item->type() == Item::FIELD_ITEM && + m_part_info->part_type == RANGE_PARTITION && + !m_is_sub_partitioned && + (!m_part_info->part_expr || + m_part_info->part_expr->type() == Item::FIELD_ITEM)) + { + Field *group_field= ((Item_field *)item)->field; + DBUG_PRINT("info",("partition group_field: %p", group_field)); + if (group_field && group_field->table == table_list->table) + { + Field *part_field= m_part_info->full_part_field_array[0]; + DBUG_PRINT("info",("partition group_field: %p", group_field)); + DBUG_PRINT("info",("partition part_field: %p", part_field)); + if (part_field == group_field) + { + DBUG_PRINT("info",("partition with GROUP BY on partition field")); + goto not_parallel; + } + } + } + DBUG_PRINT("info",("partition with GROUP BY")); + goto parallel; + } + } + else if (select_lex->order_list.elements || + select_lex->group_list.elements) + { + DBUG_PRINT("info",("partition is not skip_order")); + DBUG_PRINT("info",("partition order_list.elements: %u", + select_lex->order_list.elements)); + DBUG_PRINT("info",("partition group_list.elements: %u", + select_lex->group_list.elements)); + goto parallel; + } + DBUG_PRINT("info",("partition is not skip_order")); + +not_parallel: + DBUG_PRINT("return",("partition FALSE")); + DBUG_RETURN(FALSE); + +parallel: + DBUG_PRINT("return",("partition TRUE")); + DBUG_RETURN(TRUE); +} + + +int ha_partition::handle_pre_scan(bool reverse_order, bool use_parallel) +{ + uint i; + DBUG_ENTER("ha_partition::handle_pre_scan"); + DBUG_PRINT("enter", + ("m_part_spec.start_part: %u m_part_spec.end_part: %u", + (uint) m_part_spec.start_part, (uint) m_part_spec.end_part)); + + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + if (!(bitmap_is_set(&(m_part_info->read_partitions), i))) + continue; + int error; + handler *file= m_file[i]; + + switch (m_index_scan_type) { + case partition_index_read: + error= file->pre_index_read_map(m_start_key.key, + m_start_key.keypart_map, + m_start_key.flag, + use_parallel); + break; + case partition_index_first: + error= file->pre_index_first(use_parallel); + break; + case partition_index_last: + error= file->pre_index_last(use_parallel); + break; + case partition_index_read_last: + error= file->pre_index_read_last_map(m_start_key.key, + m_start_key.keypart_map, + use_parallel); + break; + case partition_read_range: + error= file->pre_read_range_first(m_start_key.key? &m_start_key: NULL, + end_range, eq_range, TRUE, use_parallel); + break; + case partition_read_multi_range: + if (!bitmap_is_set(&m_mrr_used_partitions, i)) + continue; + error= file->pre_multi_range_read_next(use_parallel); + break; + case partition_ft_read: + error= file->pre_ft_read(use_parallel); + break; + case partition_no_index_scan: + error= file->pre_rnd_next(use_parallel); + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(0); + } + if (error == HA_ERR_END_OF_FILE) + error= 0; + if (unlikely(error)) + DBUG_RETURN(error); + } + table->status= 0; + DBUG_RETURN(0); +} + + +/**************************************************************************** + Unordered Index Scan Routines +****************************************************************************/ +/* + Common routine to handle index_next with unordered results + + SYNOPSIS + handle_unordered_next() + out:buf Read row in MySQL Row Format + next_same Called from index_next_same + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code + + DESCRIPTION + These routines are used to scan partitions without considering order. + This is performed in two situations. + 1) In read_multi_range this is the normal case + 2) When performing any type of index_read, index_first, index_last where + all fields in the partition function is bound. In this case the index + scan is performed on only one partition and thus it isn't necessary to + perform any sort. +*/ + +int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same) +{ + handler *file; + int error; + DBUG_ENTER("ha_partition::handle_unordered_next"); + + if (m_part_spec.start_part >= m_tot_parts) + { + /* Should never happen! */ + DBUG_ASSERT(0); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + file= m_file[m_part_spec.start_part]; + + /* + We should consider if this should be split into three functions as + partition_read_range is_next_same are always local constants + */ + + if (m_index_scan_type == partition_read_multi_range) + { + if (likely(!(error= file-> + multi_range_read_next(&m_range_info[m_part_spec.start_part])))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else if (m_index_scan_type == partition_read_range) + { + if (likely(!(error= file->read_range_next()))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else if (is_next_same) + { + if (likely(!(error= file->ha_index_next_same(buf, m_start_key.key, + m_start_key.length)))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else + { + if (likely(!(error= file->ha_index_next(buf)))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); // Row was in range + } + } + + if (unlikely(error == HA_ERR_END_OF_FILE)) + { + m_part_spec.start_part++; // Start using next part + error= handle_unordered_scan_next_partition(buf); + } + DBUG_RETURN(error); +} + + +/* + Handle index_next when changing to new partition + + SYNOPSIS + handle_unordered_scan_next_partition() + buf Read row in MariaDB Row Format + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code + + DESCRIPTION + This routine is used to start the index scan on the next partition. + Both initial start and after completing scan on one partition. +*/ + +int ha_partition::handle_unordered_scan_next_partition(uchar * buf) +{ + uint i= m_part_spec.start_part; + int saved_error= HA_ERR_END_OF_FILE; + DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition"); + + /* Read next partition that includes start_part */ + if (i) + i= bitmap_get_next_set(&m_part_info->read_partitions, i - 1); + else + i= bitmap_get_first_set(&m_part_info->read_partitions); + + for (; + i <= m_part_spec.end_part; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + int error; + handler *file= m_file[i]; + m_part_spec.start_part= i; + + switch (m_index_scan_type) { + case partition_read_multi_range: + if (!bitmap_is_set(&m_mrr_used_partitions, i)) + continue; + DBUG_PRINT("info", ("read_multi_range on partition %u", i)); + error= file->multi_range_read_next(&m_range_info[i]); + break; + case partition_read_range: + DBUG_PRINT("info", ("read_range_first on partition %u", i)); + error= file->read_range_first(m_start_key.key? &m_start_key: NULL, + end_range, eq_range, FALSE); + break; + case partition_index_read: + DBUG_PRINT("info", ("index_read on partition %u", i)); + error= file->ha_index_read_map(buf, m_start_key.key, + m_start_key.keypart_map, + m_start_key.flag); + break; + case partition_index_first: + DBUG_PRINT("info", ("index_first on partition %u", i)); + error= file->ha_index_first(buf); + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(1); + } + if (likely(!error)) + { + m_last_part= i; + DBUG_RETURN(0); + } + if (likely((error != HA_ERR_END_OF_FILE) && + (error != HA_ERR_KEY_NOT_FOUND))) + DBUG_RETURN(error); + + /* + If HA_ERR_KEY_NOT_FOUND, we must return that error instead of + HA_ERR_END_OF_FILE, to be able to continue search. + */ + if (saved_error != HA_ERR_KEY_NOT_FOUND) + saved_error= error; + DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %u", i)); + } + if (saved_error == HA_ERR_END_OF_FILE) + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(saved_error); +} + + +/** + Common routine to start index scan with ordered results. + + @param[out] buf Read row in MariaDB Row Format + + @return Operation status + @retval HA_ERR_END_OF_FILE End of scan + @retval HA_ERR_KEY_NOT_FOUNE End of scan + @retval 0 Success + @retval other Error code + + @details + This part contains the logic to handle index scans that require ordered + output. This includes all except those started by read_range_first with + the flag ordered set to FALSE. Thus most direct index_read and all + index_first and index_last. + + We implement ordering by keeping one record plus a key buffer for each + partition. Every time a new entry is requested we will fetch a new + entry from the partition that is currently not filled with an entry. + Then the entry is put into its proper sort position. + + Returning a record is done by getting the top record, copying the + record to the request buffer and setting the partition as empty on + entries. +*/ + +int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) +{ + int error; + uint i; + uint j= queue_first_element(&m_queue); + uint smallest_range_seq= 0; + bool found= FALSE; + uchar *part_rec_buf_ptr= m_ordered_rec_buffer; + int saved_error= HA_ERR_END_OF_FILE; + DBUG_ENTER("ha_partition::handle_ordered_index_scan"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + if (m_pre_calling) + error= handle_pre_scan(reverse_order, m_pre_call_use_parallel); + else + error= handle_pre_scan(reverse_order, check_parallel_search()); + if (unlikely(error)) + DBUG_RETURN(error); + + if (m_key_not_found) + { + /* m_key_not_found was set in the previous call to this function */ + m_key_not_found= false; + bitmap_clear_all(&m_key_not_found_partitions); + } + m_top_entry= NO_CURRENT_PART_ID; + DBUG_PRINT("info", ("partition queue_remove_all(1)")); + queue_remove_all(&m_queue); + DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions, + m_part_spec.start_part)); + + /* + Position part_rec_buf_ptr to point to the first used partition >= + start_part. There may be partitions marked by used_partitions, + but is before start_part. These partitions has allocated record buffers + but is dynamically pruned, so those buffers must be skipped. + */ + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_part_spec.start_part; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + part_rec_buf_ptr+= m_priority_queue_rec_len; + } + DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u", + m_part_spec.start_part, i)); + for (/* continue from above */ ; + i <= m_part_spec.end_part ; + i= bitmap_get_next_set(&m_part_info->read_partitions, i), + part_rec_buf_ptr+= m_priority_queue_rec_len) + { + DBUG_PRINT("info", ("reading from part %u (scan_type: %u)", + i, m_index_scan_type)); + DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET)); + uchar *rec_buf_ptr= part_rec_buf_ptr + ORDERED_REC_OFFSET; + handler *file= m_file[i]; + + switch (m_index_scan_type) { + case partition_index_read: + error= file->ha_index_read_map(rec_buf_ptr, + m_start_key.key, + m_start_key.keypart_map, + m_start_key.flag); + /* Caller has specified reverse_order */ + break; + case partition_index_first: + error= file->ha_index_first(rec_buf_ptr); + reverse_order= FALSE; + break; + case partition_index_last: + error= file->ha_index_last(rec_buf_ptr); + reverse_order= TRUE; + break; + case partition_read_range: + { + /* + This can only read record to table->record[0], as it was set when + the table was being opened. We have to memcpy data ourselves. + */ + error= file->read_range_first(m_start_key.key? &m_start_key: NULL, + end_range, eq_range, TRUE); + if (likely(!error)) + memcpy(rec_buf_ptr, table->record[0], m_rec_length); + reverse_order= FALSE; + break; + } + case partition_read_multi_range: + { + if (!bitmap_is_set(&m_mrr_used_partitions, i)) + continue; + DBUG_PRINT("info", ("partition %u", i)); + error= file->multi_range_read_next(&m_range_info[i]); + DBUG_PRINT("info", ("error: %d", error)); + if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE) + { + bitmap_clear_bit(&m_mrr_used_partitions, i); + continue; + } + if (likely(!error)) + { + memcpy(rec_buf_ptr, table->record[0], m_rec_length); + reverse_order= FALSE; + m_stock_range_seq[i]= (((PARTITION_KEY_MULTI_RANGE *) + m_range_info[i])->id); + /* Test if the key is in the first key range */ + if (m_stock_range_seq[i] != m_mrr_range_current->id) + { + /* + smallest_range_seq contains the smallest key range we have seen + so far + */ + if (!smallest_range_seq || smallest_range_seq > m_stock_range_seq[i]) + smallest_range_seq= m_stock_range_seq[i]; + continue; + } + } + break; + } + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (likely(!error)) + { + found= TRUE; + if (!m_using_extended_keys) + { + file->position(rec_buf_ptr); + memcpy(rec_buf_ptr + m_rec_length, file->ref, file->ref_length); + } + /* + Initialize queue without order first, simply insert + */ + queue_element(&m_queue, j++)= part_rec_buf_ptr; + if (table->s->blob_fields) + { + Ordered_blob_storage **storage= + *((Ordered_blob_storage ***) part_rec_buf_ptr); + swap_blobs(rec_buf_ptr, storage, false); + } + } + else if (error == HA_ERR_KEY_NOT_FOUND) + { + DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i)); + bitmap_set_bit(&m_key_not_found_partitions, i); + m_key_not_found= true; + saved_error= error; + } + else if (error != HA_ERR_END_OF_FILE) + { + DBUG_RETURN(error); + } + } + + if (!found && smallest_range_seq) + { + /* We know that there is an existing row based on code above */ + found= TRUE; + part_rec_buf_ptr= m_ordered_rec_buffer; + + /* + No key found in the first key range + Collect all partitions that has a key in smallest_range_seq + */ + DBUG_PRINT("info", ("partition !found && smallest_range_seq")); + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i <= m_part_spec.end_part; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + DBUG_PRINT("info", ("partition current_part: %u", i)); + if (i < m_part_spec.start_part) + { + part_rec_buf_ptr+= m_priority_queue_rec_len; + DBUG_PRINT("info", ("partition i < m_part_spec.start_part")); + continue; + } + if (!bitmap_is_set(&m_mrr_used_partitions, i)) + { + part_rec_buf_ptr+= m_priority_queue_rec_len; + DBUG_PRINT("info", ("partition !bitmap_is_set(&m_mrr_used_partitions, i)")); + continue; + } + DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET)); + if (smallest_range_seq == m_stock_range_seq[i]) + { + m_stock_range_seq[i]= 0; + queue_element(&m_queue, j++)= (uchar *) part_rec_buf_ptr; + DBUG_PRINT("info", ("partition smallest_range_seq == m_stock_range_seq[i]")); + } + part_rec_buf_ptr+= m_priority_queue_rec_len; + } + + /* Update global m_mrr_range_current to the current range */ + while (m_mrr_range_current->id < smallest_range_seq) + m_mrr_range_current= m_mrr_range_current->next; + } + if (found) + { + /* + We found at least one partition with data, now sort all entries and + after that read the first entry and copy it to the buffer to return in. + */ + queue_set_max_at_top(&m_queue, reverse_order); + queue_set_cmp_arg(&m_queue, (void*) this); + m_queue.elements= j - queue_first_element(&m_queue); + queue_fix(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry)); + DBUG_RETURN(0); + } + DBUG_RETURN(saved_error); +} + + +/* + Return the top record in sort order + + SYNOPSIS + return_top_record() + out:buf Row returned in MySQL Row Format + + RETURN VALUE + NONE +*/ + +void ha_partition::return_top_record(uchar *buf) +{ + uint part_id; + uchar *key_buffer= queue_top(&m_queue); + uchar *rec_buffer= key_buffer + ORDERED_REC_OFFSET; + DBUG_ENTER("ha_partition::return_top_record"); + DBUG_PRINT("enter", ("partition this: %p", this)); + + part_id= uint2korr(key_buffer + ORDERED_PART_NUM_OFFSET); + memcpy(buf, rec_buffer, m_rec_length); + if (table->s->blob_fields) + { + Ordered_blob_storage **storage= *((Ordered_blob_storage ***) key_buffer); + swap_blobs(buf, storage, true); + } + m_last_part= part_id; + DBUG_PRINT("info", ("partition m_last_part: %u", m_last_part)); + m_top_entry= part_id; + table->status= 0; // Found an existing row + m_file[part_id]->return_record_by_parent(); + DBUG_VOID_RETURN; +} + +/* + This function is only used if the partitioned table has own partitions. + This can happen if the partitioned VP engine is used (part of spider). +*/ + +void ha_partition::return_record_by_parent() +{ + m_file[m_last_part]->return_record_by_parent(); + DBUG_ASSERT(0); +} + + +/** + Add index_next/prev from partitions without exact match. + + If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when + ha_index_read_map was done, those partitions must be included in the + following index_next/prev call. +*/ + +int ha_partition::handle_ordered_index_scan_key_not_found() +{ + int error; + uint i, old_elements= m_queue.elements; + uchar *part_buf= m_ordered_rec_buffer; + uchar *curr_rec_buf= NULL; + DBUG_ENTER("ha_partition::handle_ordered_index_scan_key_not_found"); + DBUG_PRINT("enter", ("partition this: %p", this)); + DBUG_ASSERT(m_key_not_found); + /* + Loop over all used partitions to get the correct offset + into m_ordered_rec_buffer. + */ + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + if (bitmap_is_set(&m_key_not_found_partitions, i)) + { + /* + This partition is used and did return HA_ERR_KEY_NOT_FOUND + in index_read_map. + */ + curr_rec_buf= part_buf + ORDERED_REC_OFFSET; + error= m_file[i]->ha_index_next(curr_rec_buf); + /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */ + DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND); + if (likely(!error)) + { + DBUG_PRINT("info", ("partition queue_insert(1)")); + queue_insert(&m_queue, part_buf); + } + else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND) + DBUG_RETURN(error); + } + part_buf += m_priority_queue_rec_len; + } + DBUG_ASSERT(curr_rec_buf); + bitmap_clear_all(&m_key_not_found_partitions); + m_key_not_found= false; + + if (m_queue.elements > old_elements) + { + /* Update m_top_entry, which may have changed. */ + uchar *key_buffer= queue_top(&m_queue); + m_top_entry= uint2korr(key_buffer); + } + DBUG_RETURN(0); +} + + +/* + Common routine to handle index_next with ordered results + + SYNOPSIS + handle_ordered_next() + out:buf Read row in MySQL Row Format + next_same Called from index_next_same + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code +*/ + +int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same) +{ + int error; + DBUG_ENTER("ha_partition::handle_ordered_next"); + + if (m_top_entry == NO_CURRENT_PART_ID) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + uint part_id= m_top_entry; + uchar *part_rec_buf_ptr= queue_top(&m_queue); + uchar *rec_buf= part_rec_buf_ptr + ORDERED_REC_OFFSET; + handler *file; + + if (m_key_not_found) + { + if (is_next_same) + { + /* Only rows which match the key. */ + m_key_not_found= false; + bitmap_clear_all(&m_key_not_found_partitions); + } + else + { + /* There are partitions not included in the index record queue. */ + uint old_elements= m_queue.elements; + if (unlikely((error= handle_ordered_index_scan_key_not_found()))) + DBUG_RETURN(error); + /* + If the queue top changed, i.e. one of the partitions that gave + HA_ERR_KEY_NOT_FOUND in index_read_map found the next record, + return it. + Otherwise replace the old with a call to index_next (fall through). + */ + if (old_elements != m_queue.elements && part_id != m_top_entry) + { + return_top_record(buf); + DBUG_RETURN(0); + } + } + } + if (part_id >= m_tot_parts) + { + /* This should never happen! */ + DBUG_ASSERT(0); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + + file= m_file[part_id]; + + if (m_index_scan_type == partition_read_range) + { + error= file->read_range_next(); + if (likely(!error)) + { + memcpy(rec_buf, table->record[0], m_rec_length); + if (table->s->blob_fields) + { + Ordered_blob_storage **storage= + *((Ordered_blob_storage ***) part_rec_buf_ptr); + swap_blobs(rec_buf, storage, false); + } + } + } + else if (m_index_scan_type == partition_read_multi_range) + { + DBUG_PRINT("info", ("partition_read_multi_range route")); + DBUG_PRINT("info", ("part_id: %u", part_id)); + bool get_next= FALSE; + error= file->multi_range_read_next(&m_range_info[part_id]); + DBUG_PRINT("info", ("error: %d", error)); + if (unlikely(error == HA_ERR_KEY_NOT_FOUND)) + error= HA_ERR_END_OF_FILE; + if (unlikely(error == HA_ERR_END_OF_FILE)) + { + bitmap_clear_bit(&m_mrr_used_partitions, part_id); + DBUG_PRINT("info", ("partition m_queue.elements: %u", m_queue.elements)); + if (m_queue.elements) + { + DBUG_PRINT("info", ("partition queue_remove_top(1)")); + queue_remove_top(&m_queue); + if (m_queue.elements) + { + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u (3)", + m_top_entry)); + DBUG_RETURN(0); + } + } + get_next= TRUE; + } + else if (likely(!error)) + { + DBUG_PRINT("info", ("m_range_info[%u])->id: %u", part_id, + ((PARTITION_KEY_MULTI_RANGE *) + m_range_info[part_id])->id)); + DBUG_PRINT("info", ("m_mrr_range_current->id: %u", + m_mrr_range_current->id)); + memcpy(rec_buf, table->record[0], m_rec_length); + if (table->s->blob_fields) + { + Ordered_blob_storage **storage= *((Ordered_blob_storage ***) part_rec_buf_ptr); + swap_blobs(rec_buf, storage, false); + } + if (((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id != + m_mrr_range_current->id) + { + m_stock_range_seq[part_id]= + ((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id; + DBUG_PRINT("info", ("partition queue_remove_top(2)")); + queue_remove_top(&m_queue); + if (!m_queue.elements) + get_next= TRUE; + } + } + if (get_next) + { + DBUG_PRINT("info", ("get_next route")); + uint i, j= 0, smallest_range_seq= UINT_MAX32; + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + if (!(bitmap_is_set(&(m_part_info->read_partitions), i))) + continue; + if (!bitmap_is_set(&m_mrr_used_partitions, i)) + continue; + if (smallest_range_seq > m_stock_range_seq[i]) + smallest_range_seq= m_stock_range_seq[i]; + } + + DBUG_PRINT("info", ("smallest_range_seq: %u", smallest_range_seq)); + if (smallest_range_seq != UINT_MAX32) + { + uchar *part_rec_buf_ptr= m_ordered_rec_buffer; + DBUG_PRINT("info", ("partition queue_remove_all(2)")); + queue_remove_all(&m_queue); + DBUG_PRINT("info", ("m_part_spec.start_part: %u", + m_part_spec.start_part)); + + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i <= m_part_spec.end_part; + i= bitmap_get_next_set(&m_part_info->read_partitions, i), + part_rec_buf_ptr+= m_priority_queue_rec_len) + { + DBUG_PRINT("info",("partition part_id: %u", i)); + if (i < m_part_spec.start_part) + { + DBUG_PRINT("info",("partition i < m_part_spec.start_part")); + continue; + } + if (!bitmap_is_set(&m_mrr_used_partitions, i)) + { + DBUG_PRINT("info",("partition !bitmap_is_set(&m_mrr_used_partitions, i)")); + continue; + } + DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + + ORDERED_PART_NUM_OFFSET)); + DBUG_PRINT("info", ("partition m_stock_range_seq[%u]: %u", + i, m_stock_range_seq[i])); + if (smallest_range_seq == m_stock_range_seq[i]) + { + m_stock_range_seq[i]= 0; + DBUG_PRINT("info", ("partition queue_insert(2)")); + queue_insert(&m_queue, part_rec_buf_ptr); + j++; + } + } + while (m_mrr_range_current->id < smallest_range_seq) + m_mrr_range_current= m_mrr_range_current->next; + + DBUG_PRINT("info",("partition m_mrr_range_current: %p", + m_mrr_range_current)); + DBUG_PRINT("info",("partition m_mrr_range_current->id: %u", + m_mrr_range_current ? m_mrr_range_current->id : 0)); + queue_set_max_at_top(&m_queue, FALSE); + queue_set_cmp_arg(&m_queue, (void*) this); + m_queue.elements= j; + queue_fix(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u (4)", + m_top_entry)); + DBUG_RETURN(0); + } + } + } + else if (!is_next_same) + error= file->ha_index_next(rec_buf); + else + error= file->ha_index_next_same(rec_buf, m_start_key.key, + m_start_key.length); + + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE && m_queue.elements) + { + /* Return next buffered row */ + DBUG_PRINT("info", ("partition queue_remove_top(3)")); + queue_remove_top(&m_queue); + if (m_queue.elements) + { + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u (2)", + m_top_entry)); + error= 0; + } + } + DBUG_RETURN(error); + } + + if (!m_using_extended_keys) + { + file->position(rec_buf); + memcpy(rec_buf + m_rec_length, file->ref, file->ref_length); + } + + queue_replace_top(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry)); + DBUG_RETURN(0); +} + + +/* + Common routine to handle index_prev with ordered results + + SYNOPSIS + handle_ordered_prev() + out:buf Read row in MySQL Row Format + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code +*/ + +int ha_partition::handle_ordered_prev(uchar *buf) +{ + int error; + DBUG_ENTER("ha_partition::handle_ordered_prev"); + DBUG_PRINT("enter", ("partition: %p", this)); + + if (m_top_entry == NO_CURRENT_PART_ID) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + uint part_id= m_top_entry; + uchar *rec_buf= queue_top(&m_queue) + ORDERED_REC_OFFSET; + handler *file= m_file[part_id]; + + if (unlikely((error= file->ha_index_prev(rec_buf)))) + { + if (error == HA_ERR_END_OF_FILE && m_queue.elements) + { + DBUG_PRINT("info", ("partition queue_remove_top(4)")); + queue_remove_top(&m_queue); + if (m_queue.elements) + { + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u (2)", + m_top_entry)); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replace_top(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry)); + DBUG_RETURN(0); +} + + +/**************************************************************************** + MODULE information calls +****************************************************************************/ + +/* + These are all first approximations of the extra, info, scan_time + and read_time calls +*/ + +/** + Helper function for sorting according to number of rows in descending order. +*/ + +int ha_partition::compare_number_of_records(ha_partition *me, + const uint32 *a, + const uint32 *b) +{ + handler **file= me->m_file; + /* Note: sorting in descending order! */ + if (file[*a]->stats.records > file[*b]->stats.records) + return -1; + if (file[*a]->stats.records < file[*b]->stats.records) + return 1; + return 0; +} + + +/* + General method to gather info from handler + + SYNOPSIS + info() + flag Specifies what info is requested + + RETURN VALUE + NONE + + DESCRIPTION + ::info() is used to return information to the optimizer. + Currently this table handler doesn't implement most of the fields + really needed. SHOW also makes use of this data + Another note, if your handler doesn't provide exact record count, + you will probably want to have the following in your code: + if (records < 2) + records = 2; + The reason is that the server will optimize for cases of only a single + record. If in a table scan you don't know the number of records + it will probably be better to set records to two so you can return + as many records as you need. + + Along with records a few more variables you may wish to set are: + records + deleted + data_file_length + index_file_length + delete_length + check_time + Take a look at the public variables in handler.h for more information. + + Called in: + filesort.cc + ha_heap.cc + item_sum.cc + opt_sum.cc + sql_delete.cc + sql_delete.cc + sql_derived.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_table.cc + sql_union.cc + sql_update.cc + + Some flags that are not implemented + HA_STATUS_POS: + This parameter is never used from the MySQL Server. It is checked in a + place in MyISAM so could potentially be used by MyISAM specific + programs. + HA_STATUS_NO_LOCK: + This is declared and often used. It's only used by MyISAM. + It means that MySQL doesn't need the absolute latest statistics + information. This may save the handler from doing internal locks while + retrieving statistics data. +*/ + +int ha_partition::info(uint flag) +{ + int error; + uint no_lock_flag= flag & HA_STATUS_NO_LOCK; + uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA; + DBUG_ENTER("ha_partition::info"); + +#ifndef DBUG_OFF + if (bitmap_is_set_all(&(m_part_info->read_partitions))) + DBUG_PRINT("info", ("All partitions are used")); +#endif /* DBUG_OFF */ + if (flag & HA_STATUS_AUTO) + { + bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0); + bool all_parts_opened= true; + DBUG_PRINT("info", ("HA_STATUS_AUTO")); + if (!table->found_next_number_field) + stats.auto_increment_value= 0; + else if (part_share->auto_inc_initialized) + { + lock_auto_increment(); + stats.auto_increment_value= part_share->next_auto_inc_val; + unlock_auto_increment(); + } + else + { + lock_auto_increment(); + /* to avoid two concurrent initializations, check again when locked */ + if (part_share->auto_inc_initialized) + stats.auto_increment_value= part_share->next_auto_inc_val; + else + { + /* + The auto-inc mutex in the table_share is locked, so we do not need + to have the handlers locked. + HA_STATUS_NO_LOCK is not checked, since we cannot skip locking + the mutex, because it is initialized. + */ + handler *file, **file_array; + ulonglong auto_increment_value= 0; + file_array= m_file; + DBUG_PRINT("info", + ("checking all partitions for auto_increment_value")); + do + { + if (!bitmap_is_set(&m_opened_partitions, (uint)(file_array - m_file))) + { + /* + Some partitions aren't opened. + So we can't calculate the autoincrement. + */ + all_parts_opened= false; + break; + } + file= *file_array; + if ((error= file->info(HA_STATUS_AUTO | no_lock_flag))) + { + unlock_auto_increment(); + DBUG_RETURN(error); + } + set_if_bigger(auto_increment_value, + file->stats.auto_increment_value); + } while (*(++file_array)); + + DBUG_ASSERT(auto_increment_value); + stats.auto_increment_value= auto_increment_value; + if (all_parts_opened && auto_inc_is_first_in_idx) + { + set_if_bigger(part_share->next_auto_inc_val, + auto_increment_value); + if (can_use_for_auto_inc_init()) + part_share->auto_inc_initialized= true; + DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu", + (ulong) part_share->next_auto_inc_val)); + } + } + unlock_auto_increment(); + } + } + if (flag & HA_STATUS_VARIABLE) + { + uint i; + DBUG_PRINT("info", ("HA_STATUS_VARIABLE")); + /* + Calculates statistical variables + records: Estimate of number records in table + We report sum (always at least 2 if not empty) + deleted: Estimate of number holes in the table due to + deletes + We report sum + data_file_length: Length of data file, in principle bytes in table + We report sum + index_file_length: Length of index file, in principle bytes in + indexes in the table + We report sum + delete_length: Length of free space easily used by new records in table + We report sum + mean_record_length:Mean record length in the table + We calculate this + check_time: Time of last check (only applicable to MyISAM) + We report last time of all underlying handlers + */ + handler *file; + stats.records= 0; + stats.deleted= 0; + stats.data_file_length= 0; + stats.index_file_length= 0; + stats.delete_length= 0; + stats.check_time= 0; + stats.checksum= 0; + stats.checksum_null= TRUE; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + file= m_file[i]; + if ((error= file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag))) + DBUG_RETURN(error); + stats.records+= file->stats.records; + stats.deleted+= file->stats.deleted; + stats.data_file_length+= file->stats.data_file_length; + stats.index_file_length+= file->stats.index_file_length; + stats.delete_length+= file->stats.delete_length; + if (file->stats.check_time > stats.check_time) + stats.check_time= file->stats.check_time; + if (!file->stats.checksum_null) + { + stats.checksum+= file->stats.checksum; + stats.checksum_null= FALSE; + } + } + if (stats.records && stats.records < 2 && + !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) + stats.records= 2; + if (stats.records > 0) + stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records); + else + stats.mean_rec_length= 0; + } + if (flag & HA_STATUS_CONST) + { + DBUG_PRINT("info", ("HA_STATUS_CONST")); + /* + Recalculate loads of constant variables. MyISAM also sets things + directly on the table share object. + + Check whether this should be fixed since handlers should not + change things directly on the table object. + + Monty comment: This should NOT be changed! It's the handlers + responsibility to correct table->s->keys_xxxx information if keys + have been disabled. + + The most important parameters set here is records per key on + all indexes. block_size and primar key ref_length. + + For each index there is an array of rec_per_key. + As an example if we have an index with three attributes a,b and c + we will have an array of 3 rec_per_key. + rec_per_key[0] is an estimate of number of records divided by + number of unique values of the field a. + rec_per_key[1] is an estimate of the number of records divided + by the number of unique combinations of the fields a and b. + rec_per_key[2] is an estimate of the number of records divided + by the number of unique combinations of the fields a,b and c. + + Many handlers only set the value of rec_per_key when all fields + are bound (rec_per_key[2] in the example above). + + If the handler doesn't support statistics, it should set all of the + above to 0. + + We first scans through all partitions to get the one holding most rows. + We will then allow the handler with the most rows to set + the rec_per_key and use this as an estimate on the total table. + + max_data_file_length: Maximum data file length + We ignore it, is only used in + SHOW TABLE STATUS + max_index_file_length: Maximum index file length + We ignore it since it is never used + block_size: Block size used + We set it to the value of the first handler + ref_length: We set this to the value calculated + and stored in local object + create_time: Creation time of table + + So we calculate these constants by using the variables from the + handler with most rows. + */ + handler *file, **file_array; + ulonglong max_records= 0; + uint32 i= 0; + uint32 handler_instance= 0; + bool handler_instance_set= 0; + + file_array= m_file; + do + { + file= *file_array; + if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file))) + { + /* Get variables if not already done */ + if (!(flag & HA_STATUS_VARIABLE) || + !bitmap_is_set(&(m_part_info->read_partitions), + (uint) (file_array - m_file))) + if ((error= file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag))) + DBUG_RETURN(error); + if (file->stats.records > max_records || !handler_instance_set) + { + handler_instance_set= 1; + max_records= file->stats.records; + handler_instance= i; + } + } + i++; + } while (*(++file_array)); + /* + Sort the array of part_ids by number of records in + in descending order. + */ + my_qsort2((void*) m_part_ids_sorted_by_num_of_records, + m_tot_parts, + sizeof(uint32), + (qsort2_cmp) compare_number_of_records, + this); + + file= m_file[handler_instance]; + if ((error= file->info(HA_STATUS_CONST | no_lock_flag))) + DBUG_RETURN(error); + stats.block_size= file->stats.block_size; + stats.create_time= file->stats.create_time; + ref_length= m_ref_length; + } + if (flag & HA_STATUS_ERRKEY) + { + handler *file= m_file[m_last_part]; + DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY")); + /* + This flag is used to get index number of the unique index that + reported duplicate key + We will report the errkey on the last handler used and ignore the rest + Note: all engines does not support HA_STATUS_ERRKEY, so set errkey. + */ + file->errkey= errkey; + if ((error= file->info(HA_STATUS_ERRKEY | no_lock_flag))) + DBUG_RETURN(error); + errkey= file->errkey; + } + if (flag & HA_STATUS_TIME) + { + handler *file, **file_array; + DBUG_PRINT("info", ("info: HA_STATUS_TIME")); + /* + This flag is used to set the latest update time of the table. + Used by SHOW commands + We will report the maximum of these times + */ + stats.update_time= 0; + file_array= m_file; + do + { + file= *file_array; + if ((error= file->info(HA_STATUS_TIME | no_lock_flag))) + DBUG_RETURN(error); + if (file->stats.update_time > stats.update_time) + stats.update_time= file->stats.update_time; + } while (*(++file_array)); + } + DBUG_RETURN(0); +} + + +void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info, + uint part_id) +{ + handler *file= m_file[part_id]; + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); + file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK); + + stat_info->records= file->stats.records; + stat_info->mean_rec_length= file->stats.mean_rec_length; + stat_info->data_file_length= file->stats.data_file_length; + stat_info->max_data_file_length= file->stats.max_data_file_length; + stat_info->index_file_length= file->stats.index_file_length; + stat_info->max_index_file_length= file->stats.max_index_file_length; + stat_info->delete_length= file->stats.delete_length; + stat_info->create_time= file->stats.create_time; + stat_info->update_time= file->stats.update_time; + stat_info->check_time= file->stats.check_time; + stat_info->check_sum= file->stats.checksum; + stat_info->check_sum_null= file->stats.checksum_null; +} + + +void ha_partition::set_partitions_to_open(List *partition_names) +{ + m_partitions_to_open= partition_names; +} + + +int ha_partition::open_read_partitions(char *name_buff, size_t name_buff_size) +{ + handler **file; + char *name_buffer_ptr; + int error= 0; + + name_buffer_ptr= m_name_buffer_ptr; + file= m_file; + m_file_sample= NULL; + do + { + int n_file= (int)(file-m_file); + int is_open= bitmap_is_set(&m_opened_partitions, n_file); + int should_be_open= bitmap_is_set(&m_part_info->read_partitions, n_file); + + /* + TODO: we can close some opened partitions if they're not + used in the query. It probably should be syncronized with the + table_open_cache value. + + if (is_open && !should_be_open) + { + if (unlikely((error= (*file)->ha_close()))) + goto err_handler; + bitmap_clear_bit(&m_opened_partitions, n_file); + } + else + */ + if (!is_open && should_be_open) + { + LEX_CSTRING save_connect_string= table->s->connect_string; + if (unlikely((error= + create_partition_name(name_buff, name_buff_size, + table->s->normalized_path.str, + name_buffer_ptr, NORMAL_PART_NAME, + FALSE)))) + goto err_handler; + if (!((*file)->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION)) + table->s->connect_string= m_connect_string[(uint)(file-m_file)]; + error= (*file)->ha_open(table, name_buff, m_mode, + m_open_test_lock | HA_OPEN_NO_PSI_CALL); + table->s->connect_string= save_connect_string; + if (error) + goto err_handler; + bitmap_set_bit(&m_opened_partitions, n_file); + m_last_part= n_file; + } + if (!m_file_sample && should_be_open) + m_file_sample= *file; + name_buffer_ptr+= strlen(name_buffer_ptr) + 1; + } while (*(++file)); + +err_handler: + return error; +} + + +int ha_partition::change_partitions_to_open(List *partition_names) +{ + char name_buff[FN_REFLEN+1]; + int error= 0; + + if (m_is_clone_of) + return 0; + + m_partitions_to_open= partition_names; + if (unlikely((error= m_part_info->set_partition_bitmaps(partition_names)))) + goto err_handler; + + if (m_lock_type != F_UNLCK) + { + /* + That happens after the LOCK TABLE statement. + Do nothing in this case. + */ + return 0; + } + + check_insert_or_replace_autoincrement(); + if (bitmap_cmp(&m_opened_partitions, &m_part_info->read_partitions) != 0) + return 0; + + if (unlikely((error= read_par_file(table->s->normalized_path.str)) || + (error= open_read_partitions(name_buff, sizeof(name_buff))))) + goto err_handler; + + clear_handler_file(); + +err_handler: + return error; +} + + +static int extra_cb(handler *h, void *operation) +{ + return h->extra(*(enum ha_extra_function*)operation); +} + + +static int start_keyread_cb(handler* h, void *p) +{ + return h->ha_start_keyread(*(uint*)p); +} + + +static int end_keyread_cb(handler* h, void *unused) +{ + return h->ha_end_keyread(); +} + + +/** + General function to prepare handler for certain behavior. + + @param[in] operation operation to execute + + @return status + @retval 0 success + @retval >0 error code + + @detail + + extra() is called whenever the server wishes to send a hint to + the storage engine. The MyISAM engine implements the most hints. + + We divide the parameters into the following categories: + 1) Operations used by most handlers + 2) Operations used by some non-MyISAM handlers + 3) Operations used only by MyISAM + 4) Operations only used by temporary tables for query processing + 5) Operations only used by MyISAM internally + 6) Operations not used at all + 7) Operations only used by federated tables for query processing + 8) Operations only used by NDB + 9) Operations only used by MERGE + + The partition handler need to handle category 1), 2) and 3). + + 1) Operations used by most handlers + ----------------------------------- + HA_EXTRA_RESET: + This option is used by most handlers and it resets the handler state + to the same state as after an open call. This includes releasing + any READ CACHE or WRITE CACHE or other internal buffer used. + + It is called from the reset method in the handler interface. There are + three instances where this is called. + 1) After completing a INSERT ... SELECT ... query the handler for the + table inserted into is reset + 2) It is called from close_thread_table which in turn is called from + close_thread_tables except in the case where the tables are locked + in which case ha_commit_stmt is called instead. + It is only called from here if refresh_version hasn't changed and the + table is not an old table when calling close_thread_table. + close_thread_tables is called from many places as a general clean up + function after completing a query. + 3) It is called when deleting the QUICK_RANGE_SELECT object if the + QUICK_RANGE_SELECT object had its own handler object. It is called + immediately before close of this local handler object. + HA_EXTRA_KEYREAD: + HA_EXTRA_NO_KEYREAD: + These parameters are used to provide an optimisation hint to the handler. + If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for + many handlers this means that the index-only scans can be used and it + is not necessary to use the real records to satisfy this part of the + query. Index-only scans is a very important optimisation for disk-based + indexes. For main-memory indexes most indexes contain a reference to the + record and thus KEYREAD only says that it is enough to read key fields. + HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET + will disable this option. + The handler will set HA_KEYREAD_ONLY in its table flags to indicate this + feature is supported. + HA_EXTRA_FLUSH: + Indication to flush tables to disk, is supposed to be used to + ensure disk based tables are flushed at end of query execution. + Currently is never used. + + HA_EXTRA_FORCE_REOPEN: + Only used by MyISAM and Archive, called when altering table, + closing tables to enforce a reopen of the table files. + + 2) Operations used by some non-MyISAM handlers + ---------------------------------------------- + HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + This is a strictly InnoDB feature that is more or less undocumented. + When it is activated InnoDB copies field by field from its fetch + cache instead of all fields in one memcpy. Have no idea what the + purpose of this is. + Cut from include/my_base.h: + When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep + other fields intact. When this is off (by default) InnoDB will use memcpy + to overwrite entire row. + HA_EXTRA_IGNORE_DUP_KEY: + HA_EXTRA_NO_IGNORE_DUP_KEY: + Informs the handler to we will not stop the transaction if we get an + duplicate key errors during insert/update. + Always called in pair, triggered by INSERT IGNORE and other similar + SQL constructs. + Not used by MyISAM. + + 3) Operations used only by MyISAM + --------------------------------- + HA_EXTRA_NORMAL: + Only used in MyISAM to reset quick mode, not implemented by any other + handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET. + + It is called after completing a successful DELETE query if the QUICK + option is set. + + HA_EXTRA_QUICK: + When the user does DELETE QUICK FROM table where-clause; this extra + option is called before the delete query is performed and + HA_EXTRA_NORMAL is called after the delete query is completed. + Temporary tables used internally in MySQL always set this option + + The meaning of quick mode is that when deleting in a B-tree no merging + of leafs is performed. This is a common method and many large DBMS's + actually only support this quick mode since it is very difficult to + merge leaves in a tree used by many threads concurrently. + + HA_EXTRA_CACHE: + This flag is usually set with extra_opt along with a cache size. + The size of this buffer is set by the user variable + record_buffer_size. The value of this cache size is the amount of + data read from disk in each fetch when performing a table scan. + This means that before scanning a table it is normal to call + extra with HA_EXTRA_CACHE and when the scan is completed to call + HA_EXTRA_NO_CACHE to release the cache memory. + + Some special care is taken when using this extra parameter since there + could be a write ongoing on the table in the same statement. In this + one has to take special care since there might be a WRITE CACHE as + well. HA_EXTRA_CACHE specifies using a READ CACHE and using + READ CACHE and WRITE CACHE at the same time is not possible. + + Only MyISAM currently use this option. + + It is set when doing full table scans using rr_sequential and + reset when completing such a scan with end_read_record + (resetting means calling extra with HA_EXTRA_NO_CACHE). + + It is set in filesort.cc for MyISAM internal tables and it is set in + a multi-update where HA_EXTRA_CACHE is called on a temporary result + table and after that ha_rnd_init(0) on table to be updated + and immediately after that HA_EXTRA_NO_CACHE on table to be updated. + + Apart from that it is always used from init_read_record but not when + used from UPDATE statements. It is not used from DELETE statements + with ORDER BY and LIMIT but it is used in normal scan loop in DELETE + statements. The reason here is that DELETE's in MyISAM doesn't move + existings data rows. + + It is also set in copy_data_between_tables when scanning the old table + to copy over to the new table. + And it is set in join_init_read_record where quick objects are used + to perform a scan on the table. In this case the full table scan can + even be performed multiple times as part of the nested loop join. + + For purposes of the partition handler it is obviously necessary to have + special treatment of this extra call. If we would simply pass this + extra call down to each handler we would allocate + cache size * no of partitions amount of memory and this is not + necessary since we will only scan one partition at a time when doing + full table scans. + + Thus we treat it by first checking whether we have MyISAM handlers in + the table, if not we simply ignore the call and if we have we will + record the call but will not call any underlying handler yet. Then + when performing the sequential scan we will check this recorded value + and call extra_opt whenever we start scanning a new partition. + + HA_EXTRA_NO_CACHE: + When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the + flush method in the select_union class. + It is used to some extent when insert delayed inserts. + See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows(). + + It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers + if they are MyISAM handlers. Other handlers we can ignore the call + for. If no cache is in use they will quickly return after finding + this out. And we also ensure that all caches are disabled and no one + is left by mistake. + In the future this call will probably be deleted and we will instead call + ::reset(); + + HA_EXTRA_WRITE_CACHE: + See above, called from various places. It is mostly used when we + do INSERT ... SELECT + No special handling to save cache space is developed currently. + + HA_EXTRA_PREPARE_FOR_UPDATE: + This is called as part of a multi-table update. When the table to be + updated is also scanned then this informs MyISAM handler to drop any + caches if dynamic records are used (fixed size records do not care + about this call). We pass this along to the first partition to scan, and + flag that it is to be called after HA_EXTRA_CACHE when moving to the next + partition to scan. + + HA_EXTRA_PREPARE_FOR_DROP: + Only used by MyISAM, called in preparation for a DROP TABLE. + It's used mostly by Windows that cannot handle dropping an open file. + On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN. + + HA_EXTRA_PREPARE_FOR_RENAME: + Informs the handler we are about to attempt a rename of the table. + For handlers that have share open files (MyISAM key-file and + Archive writer) they must close the files before rename is possible + on Windows. + + HA_EXTRA_READCHECK: + HA_EXTRA_NO_READCHECK: + Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that + this is not needed in SQL. The reason for this call is that MyISAM sets + the READ_CHECK_USED in the open call so the call is needed for MyISAM + to reset this feature. + The idea with this parameter was to inform of doing/not doing a read + check before applying an update. Since SQL always performs a read before + applying the update No Read Check is needed in MyISAM as well. + + This is a cut from Docs/myisam.txt + Sometimes you might want to force an update without checking whether + another user has changed the record since you last read it. This is + somewhat dangerous, so it should ideally not be used. That can be + accomplished by wrapping the mi_update() call in two calls to mi_extra(), + using these functions: + HA_EXTRA_NO_READCHECK=5 No readcheck on update + HA_EXTRA_READCHECK=6 Use readcheck (def) + + HA_EXTRA_REMEMBER_POS: + HA_EXTRA_RESTORE_POS: + System versioning needs this for MyISAM and Aria tables. + On DELETE using PRIMARY KEY: + 1) handler::ha_index_read_map() saves rowid used for row delete/update + 2) handler::ha_update_row() can rewrite saved rowid + 3) handler::ha_delete_row()/handler::ha_update_row() expects saved but got + different rowid and operation fails + Using those flags prevents harmful side effect of 2) + + 4) Operations only used by temporary tables for query processing + ---------------------------------------------------------------- + HA_EXTRA_RESET_STATE: + Same as reset() except that buffers are not released. If there is + a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading + or to change type of cache between READ CACHE and WRITE CACHE. + + This extra function is always called immediately before calling + delete_all_rows on the handler for temporary tables. + There are cases however when HA_EXTRA_RESET_STATE isn't called in + a similar case for a temporary table in sql_union.cc and in two other + cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE + called afterwards. + The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means + disable caching, delete all rows and enable WRITE CACHE. This is + used for temporary tables containing distinct sums and a + functional group. + + The only case that delete_all_rows is called on non-temporary tables + is in sql_delete.cc when DELETE FROM table; is called by a user. + In this case no special extra calls are performed before or after this + call. + + The partition handler should not need to bother about this one. It + should never be called. + + HA_EXTRA_NO_ROWS: + Don't insert rows indication to HEAP and MyISAM, only used by temporary + tables used in query processing. + Not handled by partition handler. + + 5) Operations only used by MyISAM internally + -------------------------------------------- + HA_EXTRA_REINIT_CACHE: + This call reinitializes the READ CACHE described above if there is one + and otherwise the call is ignored. + + We can thus safely call it on all underlying handlers if they are + MyISAM handlers. It is however never called so we don't handle it at all. + HA_EXTRA_FLUSH_CACHE: + Flush WRITE CACHE in MyISAM. It is only from one place in the code. + This is in sql_insert.cc where it is called if the table_flags doesn't + contain HA_DUPLICATE_POS. The only handler having the HA_DUPLICATE_POS + set is the MyISAM handler and so the only handler not receiving this + call is MyISAM. + Thus in effect this call is called but never used. Could be removed + from sql_insert.cc + HA_EXTRA_NO_USER_CHANGE: + Only used by MyISAM, never called. + Simulates lock_type as locked. + HA_EXTRA_WAIT_LOCK: + HA_EXTRA_WAIT_NOLOCK: + Only used by MyISAM, called from MyISAM handler but never from server + code on top of the handler. + Sets lock_wait on/off + HA_EXTRA_NO_KEYS: + Only used MyISAM, only used internally in MyISAM handler, never called + from server level. + HA_EXTRA_KEYREAD_CHANGE_POS: + HA_EXTRA_PRELOAD_BUFFER_SIZE: + HA_EXTRA_CHANGE_KEY_TO_DUP: + HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + Only used by MyISAM, never called. + + 6) Operations not used at all + ----------------------------- + HA_EXTRA_KEY_CACHE: + HA_EXTRA_NO_KEY_CACHE: + This parameters are no longer used and could be removed. + + 7) Operations only used by federated tables for query processing + ---------------------------------------------------------------- + HA_EXTRA_INSERT_WITH_UPDATE: + Inform handler that an "INSERT...ON DUPLICATE KEY UPDATE" will be + executed. This condition is unset by HA_EXTRA_NO_IGNORE_DUP_KEY. + + 8) Operations only used by NDB + ------------------------------ + HA_EXTRA_DELETE_CANNOT_BATCH: + HA_EXTRA_UPDATE_CANNOT_BATCH: + Inform handler that delete_row()/update_row() cannot batch deletes/updates + and should perform them immediately. This may be needed when table has + AFTER DELETE/UPDATE triggers which access to subject table. + These flags are reset by the handler::extra(HA_EXTRA_RESET) call. + + 9) Operations only used by MERGE + ------------------------------ + HA_EXTRA_ADD_CHILDREN_LIST: + HA_EXTRA_ATTACH_CHILDREN: + HA_EXTRA_IS_ATTACHED_CHILDREN: + HA_EXTRA_DETACH_CHILDREN: + Special actions for MERGE tables. Ignore. +*/ + +int ha_partition::extra(enum ha_extra_function operation) +{ + DBUG_ENTER("ha_partition:extra"); + DBUG_PRINT("enter", ("operation: %d", (int) operation)); + + switch (operation) { + /* Category 1), used by most handlers */ + case HA_EXTRA_NO_KEYREAD: + DBUG_RETURN(loop_partitions(end_keyread_cb, NULL)); + case HA_EXTRA_KEYREAD: + case HA_EXTRA_FLUSH: + case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE: + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + case HA_EXTRA_PREPARE_FOR_RENAME: + case HA_EXTRA_FORCE_REOPEN: + DBUG_RETURN(loop_extra_alter(operation)); + break; + + /* Category 2), used by non-MyISAM handlers */ + case HA_EXTRA_IGNORE_DUP_KEY: + case HA_EXTRA_NO_IGNORE_DUP_KEY: + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + { + if (!m_myisam) + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + } + break; + + /* Category 3), used by MyISAM handlers */ + case HA_EXTRA_PREPARE_FOR_UPDATE: + /* + Needs to be run on the first partition in the range now, and + later in late_extra_cache, when switching to a new partition to scan. + */ + m_extra_prepare_for_update= TRUE; + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + if (!m_extra_cache) + m_extra_cache_part_id= m_part_spec.start_part; + DBUG_ASSERT(m_extra_cache_part_id == m_part_spec.start_part); + (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE); + } + break; + case HA_EXTRA_NORMAL: + case HA_EXTRA_QUICK: + case HA_EXTRA_PREPARE_FOR_DROP: + case HA_EXTRA_FLUSH_CACHE: + case HA_EXTRA_PREPARE_FOR_ALTER_TABLE: + case HA_EXTRA_REMEMBER_POS: + case HA_EXTRA_RESTORE_POS: + { + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + } + case HA_EXTRA_NO_READCHECK: + { + /* + This is only done as a part of ha_open, which is also used in + ha_partition::open, so no need to do anything. + */ + break; + } + case HA_EXTRA_CACHE: + { + prepare_extra_cache(0); + break; + } + case HA_EXTRA_NO_CACHE: + { + int ret= 0; + if (m_extra_cache_part_id != NO_CURRENT_PART_ID) + ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE); + m_extra_cache= FALSE; + m_extra_cache_size= 0; + m_extra_prepare_for_update= FALSE; + m_extra_cache_part_id= NO_CURRENT_PART_ID; + DBUG_RETURN(ret); + } + case HA_EXTRA_WRITE_CACHE: + { + m_extra_cache= FALSE; + m_extra_cache_size= 0; + m_extra_prepare_for_update= FALSE; + m_extra_cache_part_id= NO_CURRENT_PART_ID; + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + } + case HA_EXTRA_IGNORE_NO_KEY: + case HA_EXTRA_NO_IGNORE_NO_KEY: + { + /* + Ignore as these are specific to NDB for handling + idempotency + */ + break; + } + case HA_EXTRA_WRITE_CAN_REPLACE: + case HA_EXTRA_WRITE_CANNOT_REPLACE: + { + /* + Informs handler that write_row() can replace rows which conflict + with row being inserted by PK/unique key without reporting error + to the SQL-layer. + + At this time, this is safe by limitation of ha_partition + */ + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + } + /* Category 7), used by federated handlers */ + case HA_EXTRA_INSERT_WITH_UPDATE: + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + /* Category 8) Operations only used by NDB */ + case HA_EXTRA_DELETE_CANNOT_BATCH: + case HA_EXTRA_UPDATE_CANNOT_BATCH: + { + /* Currently only NDB use the *_CANNOT_BATCH */ + break; + } + /* Category 9) Operations only used by MERGE */ + case HA_EXTRA_ADD_CHILDREN_LIST: + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + case HA_EXTRA_ATTACH_CHILDREN: + { + int result; + uint num_locks; + handler **file; + if ((result= loop_partitions(extra_cb, &operation))) + DBUG_RETURN(result); + + /* Recalculate lock count as each child may have different set of locks */ + num_locks= 0; + file= m_file; + do + { + num_locks+= (*file)->lock_count(); + } while (*(++file)); + + m_num_locks= num_locks; + break; + } + case HA_EXTRA_IS_ATTACHED_CHILDREN: + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + case HA_EXTRA_DETACH_CHILDREN: + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + case HA_EXTRA_MARK_AS_LOG_TABLE: + /* + http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html + says we no longer support logging to partitioned tables, so we fail + here. + */ + DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE); + case HA_EXTRA_STARTING_ORDERED_INDEX_SCAN: + case HA_EXTRA_BEGIN_ALTER_COPY: + case HA_EXTRA_END_ALTER_COPY: + DBUG_RETURN(loop_partitions(extra_cb, &operation)); + default: + { + /* Temporary crash to discover what is wrong */ + DBUG_ASSERT(0); + break; + } + } + DBUG_RETURN(1); +} + + +/** + Special extra call to reset extra parameters + + @return Operation status. + @retval >0 Error code + @retval 0 Success + + @note Called at end of each statement to reset buffers. + To avoid excessive calls, the m_partitions_to_reset bitmap keep records + of which partitions that have been used in extra(), external_lock() or + start_stmt() and is needed to be called. +*/ + +int ha_partition::reset(void) +{ + int result= 0; + int tmp; + uint i; + DBUG_ENTER("ha_partition::reset"); + + for (i= bitmap_get_first_set(&m_partitions_to_reset); + i < m_tot_parts; + i= bitmap_get_next_set(&m_partitions_to_reset, i)) + { + if (bitmap_is_set(&m_opened_partitions, i) && + (tmp= m_file[i]->ha_reset())) + result= tmp; + } + bitmap_clear_all(&m_partitions_to_reset); + m_extra_prepare_for_update= FALSE; + DBUG_RETURN(result); +} + +/** + Special extra method with additional parameter + See @ref ha_partition::extra + + @param[in] operation operation to execute + @param[in] arg extra argument + + @return status + @retval 0 success + @retval >0 error code + + @detail + Operations supported by extra_opt: + HA_EXTRA_KEYREAD: + arg is interpreted as key index + HA_EXTRA_CACHE: + arg is interpreted as size of cache in full table scan + + For detailed description refer to @ref ha_partition::extra +*/ + +int ha_partition::extra_opt(enum ha_extra_function operation, ulong arg) +{ + DBUG_ENTER("ha_partition::extra_opt"); + + switch (operation) + { + case HA_EXTRA_KEYREAD: + DBUG_RETURN(loop_partitions(start_keyread_cb, &arg)); + case HA_EXTRA_CACHE: + prepare_extra_cache(arg); + DBUG_RETURN(0); + default: + DBUG_ASSERT(0); + } + DBUG_RETURN(1); +} + + +/* + Call extra on handler with HA_EXTRA_CACHE and cachesize + + SYNOPSIS + prepare_extra_cache() + cachesize Size of cache for full table scan + + RETURN VALUE + NONE +*/ + +void ha_partition::prepare_extra_cache(uint cachesize) +{ + DBUG_ENTER("ha_partition::prepare_extra_cache"); + DBUG_PRINT("enter", ("cachesize %u", cachesize)); + + m_extra_cache= TRUE; + m_extra_cache_size= cachesize; + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + DBUG_ASSERT(bitmap_is_set(&m_partitions_to_reset, + m_part_spec.start_part)); + bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part); + late_extra_cache(m_part_spec.start_part); + } + DBUG_VOID_RETURN; +} + + +/** + Prepares our new and reorged handlers for rename or delete. + + @param operation Operation to forward + + @return Operation status + @retval 0 Success + @retval !0 Error +*/ + +int ha_partition::loop_extra_alter(enum ha_extra_function operation) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::loop_extra_alter"); + DBUG_ASSERT(operation == HA_EXTRA_PREPARE_FOR_RENAME || + operation == HA_EXTRA_FORCE_REOPEN); + + if (m_new_file != NULL) + { + for (file= m_new_file; *file; file++) + if ((tmp= (*file)->extra(operation))) + result= tmp; + } + if (m_reorged_file != NULL) + { + for (file= m_reorged_file; *file; file++) + if ((tmp= (*file)->extra(operation))) + result= tmp; + } + if ((tmp= loop_partitions(extra_cb, &operation))) + result= tmp; + DBUG_RETURN(result); +} + + +/** + Call callback(part, param) on all partitions + + @param callback a callback to call for each partition + @param param a void*-parameter passed to callback + + @return Operation status + @retval >0 Error code + @retval 0 Success +*/ + +int ha_partition::loop_partitions(handler_callback callback, void *param) +{ + int result= 0, tmp; + uint i; + DBUG_ENTER("ha_partition::loop_partitions"); + + for (i= bitmap_get_first_set(&m_part_info->lock_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->lock_partitions, i)) + { + /* + This can be called after an error in ha_open. + In this case calling 'extra' can crash. + */ + if (bitmap_is_set(&m_opened_partitions, i) && + (tmp= callback(m_file[i], param))) + result= tmp; + } + /* Add all used partitions to be called in reset(). */ + bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions); + DBUG_RETURN(result); +} + + +/* + Call extra(HA_EXTRA_CACHE) on next partition_id + + SYNOPSIS + late_extra_cache() + partition_id Partition id to call extra on + + RETURN VALUE + NONE +*/ + +void ha_partition::late_extra_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_cache"); + DBUG_PRINT("enter", ("extra_cache %u prepare %u partid %u size %u", + m_extra_cache, m_extra_prepare_for_update, + partition_id, m_extra_cache_size)); + + if (!m_extra_cache && !m_extra_prepare_for_update) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + if (m_extra_cache) + { + if (m_extra_cache_size == 0) + (void) file->extra(HA_EXTRA_CACHE); + else + (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size); + } + if (m_extra_prepare_for_update) + { + (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE); + } + m_extra_cache_part_id= partition_id; + DBUG_VOID_RETURN; +} + + +/* + Call extra(HA_EXTRA_NO_CACHE) on next partition_id + + SYNOPSIS + late_extra_no_cache() + partition_id Partition id to call extra on + + RETURN VALUE + NONE +*/ + +void ha_partition::late_extra_no_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_no_cache"); + + if (!m_extra_cache && !m_extra_prepare_for_update) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + (void) file->extra(HA_EXTRA_NO_CACHE); + DBUG_ASSERT(partition_id == m_extra_cache_part_id); + m_extra_cache_part_id= NO_CURRENT_PART_ID; + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE optimiser support +****************************************************************************/ + +/** + Get keys to use for scanning. + + @return key_map of keys usable for scanning + + @note No need to use read_partitions here, since it does not depend on + which partitions is used, only which storage engine used. +*/ + +const key_map *ha_partition::keys_to_use_for_scanning() +{ + DBUG_ENTER("ha_partition::keys_to_use_for_scanning"); + DBUG_RETURN(get_open_file_sample()->keys_to_use_for_scanning()); +} + + +/** + Minimum number of rows to base optimizer estimate on. +*/ + +ha_rows ha_partition::min_rows_for_estimate() +{ + uint i, max_used_partitions, tot_used_partitions; + DBUG_ENTER("ha_partition::min_rows_for_estimate"); + + tot_used_partitions= bitmap_bits_set(&m_part_info->read_partitions); + + /* + All partitions might have been left as unused during partition pruning + due to, for example, an impossible WHERE condition. Nonetheless, the + optimizer might still attempt to perform (e.g. range) analysis where an + estimate of the the number of rows is calculated using records_in_range. + Hence, to handle this and other possible cases, use zero as the minimum + number of rows to base the estimate on if no partition is being used. + */ + if (!tot_used_partitions) + DBUG_RETURN(0); + + /* + Allow O(log2(tot_partitions)) increase in number of used partitions. + This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on. + I.e when the total number of partitions doubles, allow one more + partition to be checked. + */ + i= 2; + max_used_partitions= 1; + while (i < m_tot_parts) + { + max_used_partitions++; + i= i << 1; + } + if (max_used_partitions > tot_used_partitions) + max_used_partitions= tot_used_partitions; + + /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */ + DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu", + max_used_partitions, + (ulong) stats.records)); + DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu", + tot_used_partitions, + (ulong) stats.records * max_used_partitions + / tot_used_partitions)); + DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions); +} + + +/** + Get the biggest used partition. + + Starting at the N:th biggest partition and skips all non used + partitions, returning the biggest used partition found + + @param[in,out] part_index Skip the *part_index biggest partitions + + @return The biggest used partition with index not lower than *part_index. + @retval NO_CURRENT_PART_ID No more partition used. + @retval != NO_CURRENT_PART_ID partition id of biggest used partition with + index >= *part_index supplied. Note that + *part_index will be updated to the next + partition index to use. +*/ + +uint ha_partition::get_biggest_used_partition(uint *part_index) +{ + uint part_id; + while ((*part_index) < m_tot_parts) + { + part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++]; + if (bitmap_is_set(&m_part_info->read_partitions, part_id)) + return part_id; + } + return NO_CURRENT_PART_ID; +} + + +/* + Return time for a scan of the table + + SYNOPSIS + scan_time() + + RETURN VALUE + time for scan +*/ + +double ha_partition::scan_time() +{ + double scan_time= 0; + uint i; + DBUG_ENTER("ha_partition::scan_time"); + + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + scan_time+= m_file[i]->scan_time(); + DBUG_RETURN(scan_time); +} + + +/** + @brief + Caculate time to scan the given index (index only scan) + + @param inx Index number to scan + + @return time for scanning index inx +*/ + +double ha_partition::key_scan_time(uint inx) +{ + double scan_time= 0; + uint i; + DBUG_ENTER("ha_partition::key_scan_time"); + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + scan_time+= m_file[i]->key_scan_time(inx); + DBUG_RETURN(scan_time); +} + + +double ha_partition::keyread_time(uint inx, uint ranges, ha_rows rows) +{ + double read_time= 0; + uint i; + DBUG_ENTER("ha_partition::keyread_time"); + if (!ranges) + DBUG_RETURN(handler::keyread_time(inx, ranges, rows)); + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + read_time+= m_file[i]->keyread_time(inx, ranges, rows); + DBUG_RETURN(read_time); +} + + +/** + Find number of records in a range. + @param inx Index number + @param min_key Start of range + @param max_key End of range + + @return Number of rows in range. + + Given a starting key, and an ending key estimate the number of rows that + will exist between the two. max_key may be empty which in case determine + if start_key matches any rows. +*/ + +ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key, + const key_range *max_key, + page_range *pages) +{ + ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0; + uint partition_index= 0, part_id; + page_range ignore_pages; + DBUG_ENTER("ha_partition::records_in_range"); + + /* Don't calculate pages of more than one active partition */ + if (bitmap_bits_set(&m_part_info->read_partitions) != 1) + pages= &ignore_pages; + + min_rows_to_check= min_rows_for_estimate(); + + while ((part_id= get_biggest_used_partition(&partition_index)) + != NO_CURRENT_PART_ID) + { + rows= m_file[part_id]->records_in_range(inx, min_key, max_key, pages); + + DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows, + (ulong) m_file[part_id]->stats.records)); + + if (rows == HA_POS_ERROR) + DBUG_RETURN(HA_POS_ERROR); + estimated_rows+= rows; + checked_rows+= m_file[part_id]->stats.records; + /* + Returning 0 means no rows can be found, so we must continue + this loop as long as we have estimated_rows == 0. + Also many engines return 1 to indicate that there may exist + a matching row, we do not normalize this by dividing by number of + used partitions, but leave it to be returned as a sum, which will + reflect that we will need to scan each partition's index. + + Note that this statistics may not always be correct, so we must + continue even if the current partition has 0 rows, since we might have + deleted rows from the current partition, or inserted to the next + partition. + */ + if (estimated_rows && checked_rows && + checked_rows >= min_rows_to_check) + { + DBUG_PRINT("info", + ("records_in_range(inx %u): %lu (%lu * %lu / %lu)", + inx, + (ulong) (estimated_rows * stats.records / checked_rows), + (ulong) estimated_rows, + (ulong) stats.records, + (ulong) checked_rows)); + DBUG_RETURN(estimated_rows * stats.records / checked_rows); + } + } + DBUG_PRINT("info", ("records_in_range(inx %u): %lu", + inx, + (ulong) estimated_rows)); + DBUG_RETURN(estimated_rows); +} + + +/** + Estimate upper bound of number of rows. + + @return Number of rows. +*/ + +ha_rows ha_partition::estimate_rows_upper_bound() +{ + ha_rows rows, tot_rows= 0; + handler **file= m_file; + DBUG_ENTER("ha_partition::estimate_rows_upper_bound"); + + do + { + if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file))) + { + rows= (*file)->estimate_rows_upper_bound(); + if (rows == HA_POS_ERROR) + DBUG_RETURN(HA_POS_ERROR); + tot_rows+= rows; + } + } while (*(++file)); + DBUG_RETURN(tot_rows); +} + + +/* + Get time to read + + SYNOPSIS + read_time() + index Index number used + ranges Number of ranges + rows Number of rows + + RETURN VALUE + time for read + + DESCRIPTION + This will be optimised later to include whether or not the index can + be used with partitioning. To achieve we need to add another parameter + that specifies how many of the index fields that are bound in the ranges. + Possibly added as a new call to handlers. +*/ + +double ha_partition::read_time(uint index, uint ranges, ha_rows rows) +{ + DBUG_ENTER("ha_partition::read_time"); + + DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows)); +} + + +/** + Number of rows in table. see handler.h + + @return Number of records in the table (after pruning!) +*/ + +ha_rows ha_partition::records() +{ + ha_rows tot_rows= 0; + uint i; + DBUG_ENTER("ha_partition::records"); + + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + if (unlikely(m_file[i]->pre_records())) + DBUG_RETURN(HA_POS_ERROR); + const ha_rows rows= m_file[i]->records(); + if (unlikely(rows == HA_POS_ERROR)) + DBUG_RETURN(HA_POS_ERROR); + tot_rows+= rows; + } + DBUG_PRINT("exit", ("records: %lld", (longlong) tot_rows)); + DBUG_RETURN(tot_rows); +} + + +/* + Is it ok to switch to a new engine for this table + + SYNOPSIS + can_switch_engine() + + RETURN VALUE + TRUE Ok + FALSE Not ok + + DESCRIPTION + Used to ensure that tables with foreign key constraints are not moved + to engines without foreign key support. +*/ + +bool ha_partition::can_switch_engines() +{ + handler **file; + DBUG_ENTER("ha_partition::can_switch_engines"); + + file= m_file; + do + { + if (!(*file)->can_switch_engines()) + DBUG_RETURN(FALSE); + } while (*(++file)); + DBUG_RETURN(TRUE); +} + + +/* + Is table cache supported + + SYNOPSIS + table_cache_type() + +*/ + +uint8 ha_partition::table_cache_type() +{ + DBUG_ENTER("ha_partition::table_cache_type"); + + DBUG_RETURN(get_open_file_sample()->table_cache_type()); +} + + +/** + Calculate hash value for KEY partitioning using an array of fields. + + @param field_array An array of the fields in KEY partitioning + + @return hash_value calculated + + @note Uses the hash function on the character set of the field. + Integer and floating point fields use the binary character set by default. +*/ + +uint32 ha_partition::calculate_key_hash_value(Field **field_array) +{ + Hasher hasher; + bool use_51_hash; + use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm == + partition_info::KEY_ALGORITHM_51); + + do + { + Field *field= *field_array; + if (use_51_hash) + { + switch (field->real_type()) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_NEWDATE: + { + if (field->is_null()) + { + hasher.add_null(); + continue; + } + /* Force this to my_hash_sort_bin, which was used in 5.1! */ + uint len= field->pack_length(); + hasher.add(&my_charset_bin, field->ptr, len); + /* Done with this field, continue with next one. */ + continue; + } + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_BIT: + /* Not affected, same in 5.1 and 5.5 */ + break; + /* + ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1) + and my_hash_sort_bin in 5.5! + */ + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + { + if (field->is_null()) + { + hasher.add_null(); + continue; + } + /* Force this to my_hash_sort_bin, which was used in 5.1! */ + uint len= field->pack_length(); + hasher.add(&my_charset_latin1, field->ptr, len); + continue; + } + /* New types in mysql-5.6. */ + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_TIME2: + case MYSQL_TYPE_TIMESTAMP2: + /* Not affected, 5.6+ only! */ + break; + + /* These types should not be allowed for partitioning! */ + case MYSQL_TYPE_NULL: + case MYSQL_TYPE_DECIMAL: + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_GEOMETRY: + /* fall through */ + default: + DBUG_ASSERT(0); // New type? + /* Fall through for default hashing (5.5). */ + } + /* fall through, use collation based hashing. */ + } + field->hash(&hasher); + } while (*(++field_array)); + return (uint32) hasher.finalize(); +} + + +/**************************************************************************** + MODULE print messages +****************************************************************************/ + +const char *ha_partition::index_type(uint inx) +{ + uint first_used_partition; + DBUG_ENTER("ha_partition::index_type"); + + first_used_partition= bitmap_get_first_set(&(m_part_info->read_partitions)); + + if (first_used_partition == MY_BIT_NONE) + { + DBUG_ASSERT(0); // How can this happen? + DBUG_RETURN(handler::index_type(inx)); + } + + DBUG_RETURN(m_file[first_used_partition]->index_type(inx)); +} + + +enum row_type ha_partition::get_row_type() const +{ + uint i; + enum row_type type; + DBUG_ENTER("ha_partition::get_row_type"); + + i= bitmap_get_first_set(&m_part_info->read_partitions); + DBUG_ASSERT(i < m_tot_parts); + if (i >= m_tot_parts) + DBUG_RETURN(ROW_TYPE_NOT_USED); + + type= m_file[i]->get_row_type(); + DBUG_PRINT("info", ("partition %u, row_type: %d", i, type)); + + for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->lock_partitions, i)) + { + enum row_type part_type= m_file[i]->get_row_type(); + DBUG_PRINT("info", ("partition %u, row_type: %d", i, type)); + if (part_type != type) + DBUG_RETURN(ROW_TYPE_NOT_USED); + } + + DBUG_RETURN(type); +} + + +void ha_partition::append_row_to_str(String &str) +{ + const uchar *rec; + bool is_rec0= !m_err_rec || m_err_rec == table->record[0]; + if (is_rec0) + rec= table->record[0]; + else + rec= m_err_rec; + // If PK, use full PK instead of full part field array! + if (table->s->primary_key != MAX_KEY) + { + KEY *key= table->key_info + table->s->primary_key; + KEY_PART_INFO *key_part= key->key_part; + KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts; + if (!is_rec0) + set_key_field_ptr(key, rec, table->record[0]); + for (; key_part != key_part_end; key_part++) + { + Field *field= key_part->field; + str.append(' '); + str.append(&field->field_name); + str.append(':'); + field_unpack(&str, field, rec, 0, false); + } + if (!is_rec0) + set_key_field_ptr(key, table->record[0], rec); + } + else + { + Field **field_ptr; + if (!is_rec0) + table->move_fields(m_part_info->full_part_field_array, rec, + table->record[0]); + /* No primary key, use full partition field array. */ + for (field_ptr= m_part_info->full_part_field_array; + *field_ptr; + field_ptr++) + { + Field *field= *field_ptr; + str.append(' '); + str.append(&field->field_name); + str.append(':'); + field_unpack(&str, field, rec, 0, false); + } + if (!is_rec0) + table->move_fields(m_part_info->full_part_field_array, table->record[0], + rec); + } +} + + +void ha_partition::print_error(int error, myf errflag) +{ + THD *thd= ha_thd(); + DBUG_ENTER("ha_partition::print_error"); + DBUG_PRINT("enter", ("error: %d", error)); + + /* Should probably look for my own errors first */ + if ((error == HA_ERR_NO_PARTITION_FOUND) && + ! (thd->lex->alter_info.partition_flags & ALTER_PARTITION_TRUNCATE)) + { + m_part_info->print_no_partition_found(table, errflag); + DBUG_VOID_RETURN; + } + else if (error == HA_ERR_ROW_IN_WRONG_PARTITION) + { + /* Should only happen on DELETE or UPDATE! */ + DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE || + thd_sql_command(thd) == SQLCOM_DELETE_MULTI || + thd_sql_command(thd) == SQLCOM_UPDATE || + thd_sql_command(thd) == SQLCOM_UPDATE_MULTI); + DBUG_ASSERT(m_err_rec); + if (m_err_rec) + { + uint max_length; + char buf[MAX_KEY_LENGTH]; + String str(buf,sizeof(buf),system_charset_info); + uint32 part_id; + str.length(0); + str.append('('); + str.append_ulonglong(m_last_part); + str.append(STRING_WITH_LEN(" != ")); + if (get_part_for_buf(m_err_rec, m_rec0, m_part_info, &part_id)) + str.append('?'); + else + str.append_ulonglong(part_id); + str.append(')'); + append_row_to_str(str); + + /* Log this error, so the DBA can notice it and fix it! */ + sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s" + "Please REPAIR the table!", + table->s->table_name.str, + str.c_ptr_safe()); + + max_length= (MYSQL_ERRMSG_SIZE - + (uint) strlen(ER_THD(thd, ER_ROW_IN_WRONG_PARTITION))); + if (str.length() >= max_length) + { + str.length(max_length-4); + str.append(STRING_WITH_LEN("...")); + } + my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe()); + m_err_rec= NULL; + DBUG_VOID_RETURN; + } + /* fall through to generic error handling. */ + } + + /* + We choose a main handler's print_error if: + * m_file has not been initialized, like in bug#42438 + * lookup_errkey is set, which means that an error has occurred in the + main handler, not in individual partitions + */ + if (m_file && lookup_errkey == (uint)-1) + { + if (m_last_part >= m_tot_parts) + { + DBUG_ASSERT(0); + m_last_part= 0; + } + m_file[m_last_part]->print_error(error, errflag); + } + else + handler::print_error(error, errflag); + DBUG_VOID_RETURN; +} + + +bool ha_partition::get_error_message(int error, String *buf) +{ + DBUG_ENTER("ha_partition::get_error_message"); + + /* Should probably look for my own errors first */ + + /* In case m_file has not been initialized, like in bug#42438 */ + if (m_file) + DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf)); + DBUG_RETURN(handler::get_error_message(error, buf)); + +} + + +/**************************************************************************** + MODULE in-place ALTER +****************************************************************************/ +/** + Get table flags. +*/ + +handler::Table_flags ha_partition::table_flags() const +{ + uint first_used_partition= 0; + DBUG_ENTER("ha_partition::table_flags"); + if (m_handler_status < handler_initialized || + m_handler_status >= handler_closed) + DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS); + + if (get_lock_type() != F_UNLCK) + { + /* + The flags are cached after external_lock, and may depend on isolation + level. So we should use a locked partition to get the correct flags. + */ + first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions); + if (first_used_partition == MY_BIT_NONE) + first_used_partition= 0; + } + DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() & + ~(PARTITION_DISABLED_TABLE_FLAGS)) | + (PARTITION_ENABLED_TABLE_FLAGS)); +} + + +/** + alter_table_flags must be on handler/table level, not on hton level + due to the ha_partition hton does not know what the underlying hton is. +*/ + +alter_table_operations ha_partition::alter_table_flags(alter_table_operations flags) +{ + alter_table_operations flags_to_return; + DBUG_ENTER("ha_partition::alter_table_flags"); + + flags_to_return= ht->alter_table_flags(flags); + flags_to_return|= m_file[0]->alter_table_flags(flags); + + DBUG_RETURN(flags_to_return); +} + + +/** + check if copy of data is needed in alter table. +*/ +bool ha_partition::check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) +{ + /* + The check for any partitioning related changes have already been done + in mysql_alter_table (by fix_partition_func), so it is only up to + the underlying handlers. + */ + List_iterator part_it(m_part_info->partitions); + HA_CREATE_INFO dummy_info= *create_info; + uint i=0; + while (partition_element *part_elem= part_it++) + { + if (m_is_sub_partitioned) + { + List_iterator subpart_it(part_elem->subpartitions); + while (partition_element *sub_elem= subpart_it++) + { + dummy_info.data_file_name= sub_elem->data_file_name; + dummy_info.index_file_name= sub_elem->index_file_name; + if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes)) + return COMPATIBLE_DATA_NO; + } + } + else + { + dummy_info.data_file_name= part_elem->data_file_name; + dummy_info.index_file_name= part_elem->index_file_name; + if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes)) + return COMPATIBLE_DATA_NO; + } + } + return COMPATIBLE_DATA_YES; +} + + +/** + Support of in-place alter table. +*/ + +/** + Helper class for in-place alter, see handler.h +*/ + +class ha_partition_inplace_ctx : public inplace_alter_handler_ctx +{ +public: + inplace_alter_handler_ctx **handler_ctx_array; +private: + uint m_tot_parts; + +public: + ha_partition_inplace_ctx(THD *thd, uint tot_parts) + : inplace_alter_handler_ctx(), + handler_ctx_array(NULL), + m_tot_parts(tot_parts) + {} + + ~ha_partition_inplace_ctx() + { + if (handler_ctx_array) + { + for (uint index= 0; index < m_tot_parts; index++) + delete handler_ctx_array[index]; + } + } +}; + + +enum_alter_inplace_result +ha_partition::check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) +{ + uint index= 0; + enum_alter_inplace_result result; + alter_table_operations orig_ops; + ha_partition_inplace_ctx *part_inplace_ctx; + bool first_is_set= false; + THD *thd= ha_thd(); + + DBUG_ENTER("ha_partition::check_if_supported_inplace_alter"); + /* + Support inplace change of KEY () -> KEY ALGORITHM = N (). + Any other change would set partition_changed in + prep_alter_part_table() in mysql_alter_table(). + */ + if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO) + { + DBUG_ASSERT(ha_alter_info->alter_info->flags == 0); + DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK); + } + + part_inplace_ctx= + new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts); + if (!part_inplace_ctx) + DBUG_RETURN(HA_ALTER_ERROR); + + part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **) + thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1)); + if (!part_inplace_ctx->handler_ctx_array) + DBUG_RETURN(HA_ALTER_ERROR); + + do { + result= HA_ALTER_INPLACE_NO_LOCK; + /* Set all to NULL, including the terminating one. */ + for (index= 0; index <= m_tot_parts; index++) + part_inplace_ctx->handler_ctx_array[index]= NULL; + + ha_alter_info->handler_flags |= ALTER_PARTITIONED; + orig_ops= ha_alter_info->handler_flags; + for (index= 0; index < m_tot_parts; index++) + { + enum_alter_inplace_result p_result= + m_file[index]->check_if_supported_inplace_alter(altered_table, + ha_alter_info); + part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx; + + if (index == 0) + first_is_set= (ha_alter_info->handler_ctx != NULL); + else if (first_is_set != (ha_alter_info->handler_ctx != NULL)) + { + /* Either none or all partitions must set handler_ctx! */ + DBUG_ASSERT(0); + DBUG_RETURN(HA_ALTER_ERROR); + } + if (p_result < result) + result= p_result; + if (result == HA_ALTER_ERROR) + break; + } + } while (orig_ops != ha_alter_info->handler_flags); + + ha_alter_info->handler_ctx= part_inplace_ctx; + /* + To indicate for future inplace calls that there are several + partitions/handlers that need to be committed together, + we set group_commit_ctx to the NULL terminated array of + the partitions handlers. + */ + ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array; + + DBUG_RETURN(result); +} + + +bool ha_partition::prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) +{ + uint index= 0; + bool error= false; + ha_partition_inplace_ctx *part_inplace_ctx; + + DBUG_ENTER("ha_partition::prepare_inplace_alter_table"); + + /* + Changing to similar partitioning, only update metadata. + Non allowed changes would be caought in prep_alter_part_table(). + */ + if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO) + { + DBUG_ASSERT(ha_alter_info->alter_info->flags == 0); + DBUG_RETURN(false); + } + + part_inplace_ctx= + static_cast(ha_alter_info->handler_ctx); + + for (index= 0; index < m_tot_parts && !error; index++) + { + ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index]; + if (m_file[index]->ha_prepare_inplace_alter_table(altered_table, + ha_alter_info)) + error= true; + part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx; + } + ha_alter_info->handler_ctx= part_inplace_ctx; + + DBUG_RETURN(error); +} + + +bool ha_partition::inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) +{ + uint index= 0; + bool error= false; + ha_partition_inplace_ctx *part_inplace_ctx; + + DBUG_ENTER("ha_partition::inplace_alter_table"); + + /* + Changing to similar partitioning, only update metadata. + Non allowed changes would be caught in prep_alter_part_table(). + */ + if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO) + { + DBUG_ASSERT(ha_alter_info->alter_info->flags == 0); + DBUG_RETURN(false); + } + + part_inplace_ctx= + static_cast(ha_alter_info->handler_ctx); + + for (index= 0; index < m_tot_parts && !error; index++) + { + if ((ha_alter_info->handler_ctx= + part_inplace_ctx->handler_ctx_array[index]) != NULL + && index != 0) + ha_alter_info->handler_ctx->set_shared_data + (*part_inplace_ctx->handler_ctx_array[index - 1]); + + if (m_file[index]->ha_inplace_alter_table(altered_table, + ha_alter_info)) + error= true; + part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx; + } + ha_alter_info->handler_ctx= part_inplace_ctx; + + DBUG_RETURN(error); +} + + +/* + Note that this function will try rollback failed ADD INDEX by + executing DROP INDEX for the indexes that were committed (if any) + before the error occurred. This means that the underlying storage + engine must be able to drop index in-place with X-lock held. + (As X-lock will be held here if new indexes are to be committed) +*/ +bool ha_partition::commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit) +{ + ha_partition_inplace_ctx *part_inplace_ctx; + bool error= false; + + DBUG_ENTER("ha_partition::commit_inplace_alter_table"); + + /* + Changing to similar partitioning, only update metadata. + Non allowed changes would be caught in prep_alter_part_table(). + */ + if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO) + { + DBUG_ASSERT(ha_alter_info->alter_info->flags == 0); + DBUG_RETURN(false); + } + + part_inplace_ctx= + static_cast(ha_alter_info->handler_ctx); + + if (commit) + { + DBUG_ASSERT(ha_alter_info->group_commit_ctx == + part_inplace_ctx->handler_ctx_array); + ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0]; + error= m_file[0]->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, commit); + if (unlikely(error)) + goto end; + if (ha_alter_info->group_commit_ctx) + { + /* + If ha_alter_info->group_commit_ctx is not set to NULL, + then the engine did only commit the first partition! + The engine is probably new, since both innodb and the default + implementation of handler::commit_inplace_alter_table sets it to NULL + and simply return false, since it allows metadata changes only. + Loop over all other partitions as to follow the protocol! + */ + uint i; + /* + InnoDB does not set ha_alter_info->group_commit_ctx to NULL in the + case if autoincrement attribute is necessary to reset for all + partitions for INNOBASE_INPLACE_IGNORE handler flags. It does not + affect durability, because it is solely about updating the InnoDB data + dictionary caches (one InnoDB dict_table_t per partition or + sub-partition). + */ + DBUG_ASSERT(table->found_next_number_field + && !altered_table->found_next_number_field); + for (i= 1; i < m_tot_parts; i++) + { + ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i]; + error|= m_file[i]->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, + true); + } + } + } + else + { + uint i; + for (i= 0; i < m_tot_parts; i++) + { + /* Rollback, commit == false, is done for each partition! */ + ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i]; + if (m_file[i]->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, false)) + error= true; + } + } +end: + ha_alter_info->handler_ctx= part_inplace_ctx; + + DBUG_RETURN(error); +} + + +uint ha_partition::min_of_the_max_uint( + uint (handler::*operator_func)(void) const) const +{ + handler **file; + uint min_of_the_max= ((*m_file)->*operator_func)(); + + for (file= m_file+1; *file; file++) + { + uint tmp= ((*file)->*operator_func)(); + set_if_smaller(min_of_the_max, tmp); + } + return min_of_the_max; +} + + +uint ha_partition::max_supported_key_parts() const +{ + return min_of_the_max_uint(&handler::max_supported_key_parts); +} + + +uint ha_partition::max_supported_key_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_length); +} + + +uint ha_partition::max_supported_key_part_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_part_length); +} + + +uint ha_partition::max_supported_record_length() const +{ + return min_of_the_max_uint(&handler::max_supported_record_length); +} + + +uint ha_partition::max_supported_keys() const +{ + return min_of_the_max_uint(&handler::max_supported_keys); +} + + +uint ha_partition::min_record_length(uint options) const +{ + handler **file; + uint max= (*m_file)->min_record_length(options); + + for (file= m_file, file++; *file; file++) + if (max < (*file)->min_record_length(options)) + max= (*file)->min_record_length(options); + return max; +} + +/**************************************************************************** + MODULE compare records +****************************************************************************/ +/* + Compare two positions + + SYNOPSIS + cmp_ref() + ref1 First position + ref2 Second position + + RETURN VALUE + <0 ref1 < ref2 + 0 Equal + >0 ref1 > ref2 + + DESCRIPTION + We get two references and need to check if those records are the same. + If they belong to different partitions we decide that they are not + the same record. Otherwise we use the particular handler to decide if + they are the same. Sort in partition id order if not equal. + + MariaDB note: + Please don't merge the code from MySQL that does this: + + We get two references and need to check if those records are the same. + If they belong to different partitions we decide that they are not + the same record. Otherwise we use the particular handler to decide if + they are the same. Sort in partition id order if not equal. + + It is incorrect, MariaDB has an alternative fix. +*/ + +int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) +{ + int cmp; + uint32 diff1, diff2; + DBUG_ENTER("ha_partition::cmp_ref"); + + cmp= get_open_file_sample()->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), + (ref2 + PARTITION_BYTES_IN_POS)); + if (cmp) + DBUG_RETURN(cmp); + + diff2= uint2korr(ref2); + diff1= uint2korr(ref1); + + if (diff1 == diff2) + { + /* This means that the references are same and are in same partition.*/ + DBUG_RETURN(0); + } + + /* + In Innodb we compare with either primary key value or global DB_ROW_ID so + it is not possible that the two references are equal and are in different + partitions, but in myisam it is possible since we are comparing offsets. + Remove this assert if DB_ROW_ID is changed to be per partition. + */ + DBUG_ASSERT(!m_innodb); + DBUG_RETURN(diff2 > diff1 ? -1 : 1); +} + + +/**************************************************************************** + MODULE auto increment +****************************************************************************/ + + +/** + Retreive new values for part_share->next_auto_inc_val if needed + + This is needed if the value has not been initialized or if one of + the underlying partitions require that the value should be re-calculated +*/ + +int ha_partition::update_next_auto_inc_val() +{ + if (!part_share->auto_inc_initialized || need_info_for_auto_inc()) + return info(HA_STATUS_AUTO); + return 0; +} + + +/** + Determine whether a partition needs auto-increment initialization. + + @return + TRUE A partition needs auto-increment initialization + FALSE No partition needs auto-increment initialization + + Resets part_share->auto_inc_initialized if next auto_increment needs to be + recalculated. +*/ + +bool ha_partition::need_info_for_auto_inc() +{ + handler **file= m_file; + DBUG_ENTER("ha_partition::need_info_for_auto_inc"); + + do + { + if ((*file)->need_info_for_auto_inc()) + { + /* We have to get new auto_increment values from handler */ + part_share->auto_inc_initialized= FALSE; + DBUG_RETURN(TRUE); + } + } while (*(++file)); + DBUG_RETURN(FALSE); +} + + +/** + Determine if all partitions can use the current auto-increment value for + auto-increment initialization. + + @return + TRUE All partitions can use the current auto-increment + value for auto-increment initialization + FALSE All partitions cannot use the current + auto-increment value for auto-increment + initialization + + Notes + This function is only called for ::info(HA_STATUS_AUTO) and is + mainly used by the Spider engine, which returns false + except in the case of DROP TABLE or ALTER TABLE when it returns TRUE. + Other engines always returns TRUE for this call. +*/ + +bool ha_partition::can_use_for_auto_inc_init() +{ + handler **file= m_file; + DBUG_ENTER("ha_partition::can_use_for_auto_inc_init"); + + do + { + if (!(*file)->can_use_for_auto_inc_init()) + DBUG_RETURN(FALSE); + } while (*(++file)); + DBUG_RETURN(TRUE); +} + + +int ha_partition::reset_auto_increment(ulonglong value) +{ + handler **file= m_file; + int res; + DBUG_ENTER("ha_partition::reset_auto_increment"); + lock_auto_increment(); + part_share->auto_inc_initialized= false; + part_share->next_auto_inc_val= 0; + do + { + if ((res= (*file)->ha_reset_auto_increment(value)) != 0) + break; + } while (*(++file)); + unlock_auto_increment(); + DBUG_RETURN(res); +} + + +/** + This method is called by update_auto_increment which in turn is called + by the individual handlers as part of write_row. We use the + part_share->next_auto_inc_val, or search all + partitions for the highest auto_increment_value if not initialized or + if auto_increment field is a secondary part of a key, we must search + every partition when holding a mutex to be sure of correctness. +*/ + +void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ + DBUG_ENTER("ha_partition::get_auto_increment"); + DBUG_PRINT("enter", ("offset: %lu inc: %lu desired_values: %lu " + "first_value: %lu", (ulong) offset, (ulong) increment, + (ulong) nb_desired_values, (ulong) *first_value)); + DBUG_ASSERT(increment); + DBUG_ASSERT(nb_desired_values); + *first_value= 0; + if (table->s->next_number_keypart) + { + /* + next_number_keypart is != 0 if the auto_increment column is a secondary + column in the index (it is allowed in MyISAM) + */ + DBUG_PRINT("info", ("next_number_keypart != 0")); + ulonglong nb_reserved_values_part; + ulonglong first_value_part, max_first_value; + handler **file= m_file; + first_value_part= max_first_value= *first_value; + /* Must find highest value among all partitions. */ + do + { + /* Only nb_desired_values = 1 makes sense */ + (*file)->get_auto_increment(offset, increment, 1, + &first_value_part, &nb_reserved_values_part); + if (unlikely(first_value_part == ULONGLONG_MAX)) // error in one partition + { + *first_value= first_value_part; + /* log that the error was between table/partition handler */ + sql_print_error("Partition failed to reserve auto_increment value"); + DBUG_VOID_RETURN; + } + DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part)); + set_if_bigger(max_first_value, first_value_part); + } while (*(++file)); + *first_value= max_first_value; + *nb_reserved_values= 1; + } + else + { + THD *thd= ha_thd(); + /* + This is initialized in the beginning of the first write_row call. + */ + DBUG_ASSERT(part_share->auto_inc_initialized); + /* + Get a lock for handling the auto_increment in part_share + for avoiding two concurrent statements getting the same number. + */ + + lock_auto_increment(); + + /* + In a multi-row insert statement like INSERT SELECT and LOAD DATA + where the number of candidate rows to insert is not known in advance + we must hold a lock/mutex for the whole statement if we have statement + based replication. Because the statement-based binary log contains + only the first generated value used by the statement, and slaves assumes + all other generated values used by this statement were consecutive to + this first one, we must exclusively lock the generator until the + statement is done. + */ + if (!auto_increment_safe_stmt_log_lock && + thd->lex->sql_command != SQLCOM_INSERT && + mysql_bin_log.is_open() && + !thd->is_current_stmt_binlog_format_row() && + (thd->variables.option_bits & OPTION_BIN_LOG)) + { + DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock")); + auto_increment_safe_stmt_log_lock= TRUE; + } + + /* this gets corrected (for offset/increment) in update_auto_increment */ + *first_value= part_share->next_auto_inc_val; + part_share->next_auto_inc_val+= nb_desired_values * increment; + + unlock_auto_increment(); + DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value)); + *nb_reserved_values= nb_desired_values; + } + DBUG_VOID_RETURN; +} + +void ha_partition::release_auto_increment() +{ + DBUG_ENTER("ha_partition::release_auto_increment"); + + if (table->s->next_number_keypart) + { + uint i; + for (i= bitmap_get_first_set(&m_part_info->lock_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->lock_partitions, i)) + { + m_file[i]->ha_release_auto_increment(); + } + } + else + { + lock_auto_increment(); + if (next_insert_id) + { + ulonglong next_auto_inc_val= part_share->next_auto_inc_val; + /* + If the current auto_increment values is lower than the reserved + value, and the reserved value was reserved by this thread, + we can lower the reserved value. + */ + if (next_insert_id < next_auto_inc_val && + auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val) + { + THD *thd= ha_thd(); + /* + Check that we do not lower the value because of a failed insert + with SET INSERT_ID, i.e. forced/non generated values. + */ + if (thd->auto_inc_intervals_forced.maximum() < next_insert_id) + part_share->next_auto_inc_val= next_insert_id; + } + DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu", + (ulong) part_share->next_auto_inc_val)); + } + /* + Unlock the multi-row statement lock taken in get_auto_increment. + These actions must be performed even if the next_insert_id field + contains zero, otherwise if the update_auto_increment fails then + an unnecessary lock will remain: + */ + if (auto_increment_safe_stmt_log_lock) + { + auto_increment_safe_stmt_log_lock= FALSE; + DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock")); + } + + unlock_auto_increment(); + } + DBUG_VOID_RETURN; +} + +/**************************************************************************** + MODULE initialize handler for HANDLER call +****************************************************************************/ + +void ha_partition::init_table_handle_for_HANDLER() +{ + return; +} + + +/** + Calculate the checksum of the table (all partitions) +*/ + +int ha_partition::pre_calculate_checksum() +{ + int error; + DBUG_ENTER("ha_partition::pre_calculate_checksum"); + m_pre_calling= TRUE; + if ((table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))) + { + handler **file= m_file; + do + { + if ((error= (*file)->pre_calculate_checksum())) + { + DBUG_RETURN(error); + } + } while (*(++file)); + } + DBUG_RETURN(0); +} + + +int ha_partition::calculate_checksum() +{ + int error; + stats.checksum= 0; + stats.checksum_null= TRUE; + + DBUG_ENTER("ha_partition::calculate_checksum"); + if (!m_pre_calling) + { + if ((error= pre_calculate_checksum())) + { + m_pre_calling= FALSE; + DBUG_RETURN(error); + } + } + m_pre_calling= FALSE; + + handler **file= m_file; + do + { + if ((error= (*file)->calculate_checksum())) + { + DBUG_RETURN(error); + } + if (!(*file)->stats.checksum_null) + { + stats.checksum+= (*file)->stats.checksum; + stats.checksum_null= FALSE; + } + } while (*(++file)); + DBUG_RETURN(0); +} + + +/**************************************************************************** + MODULE enable/disable indexes +****************************************************************************/ + +/* + Disable indexes for a while + SYNOPSIS + disable_indexes() + mode Mode + RETURN VALUES + 0 Success + != 0 Error +*/ + +int ha_partition::disable_indexes(uint mode) +{ + handler **file; + int error= 0; + + DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions))); + for (file= m_file; *file; file++) + { + if (unlikely((error= (*file)->ha_disable_indexes(mode)))) + break; + } + return error; +} + + +/* + Enable indexes again + SYNOPSIS + enable_indexes() + mode Mode + RETURN VALUES + 0 Success + != 0 Error +*/ + +int ha_partition::enable_indexes(uint mode) +{ + handler **file; + int error= 0; + + DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions))); + for (file= m_file; *file; file++) + { + if (unlikely((error= (*file)->ha_enable_indexes(mode)))) + break; + } + return error; +} + + +/* + Check if indexes are disabled + SYNOPSIS + indexes_are_disabled() + + RETURN VALUES + 0 Indexes are enabled + != 0 Indexes are disabled +*/ + +int ha_partition::indexes_are_disabled(void) +{ + handler **file; + int error= 0; + + DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions))); + for (file= m_file; *file; file++) + { + if (unlikely((error= (*file)->indexes_are_disabled()))) + break; + } + return error; +} + + +/** + Check/fix misplaced rows. + + @param read_part_id Partition to check/fix. + @param repair If true, move misplaced rows to correct partition. + + @return Operation status. + @retval HA_ADMIN_OK Success + @retval != HA_ADMIN_OK Error +*/ + +int ha_partition::check_misplaced_rows(uint read_part_id, bool do_repair) +{ + int result= 0; + uint32 correct_part_id; + longlong func_value; + longlong num_misplaced_rows= 0; + + DBUG_ENTER("ha_partition::check_misplaced_rows"); + + DBUG_ASSERT(m_file); + + if (m_part_info->vers_info && + read_part_id != m_part_info->vers_info->now_part->id && + !m_part_info->vers_info->interval.is_set()) + { + /* Skip this check as it is not supported for non-INTERVAL history partitions. */ + DBUG_RETURN(HA_ADMIN_OK); + } + + if (do_repair) + { + /* We must read the full row, if we need to move it! */ + bitmap_set_all(table->read_set); + bitmap_set_all(table->write_set); + } + else + { + /* Only need to read the partitioning fields. */ + bitmap_union(table->read_set, &m_part_info->full_part_field_set); + } + + if ((result= m_file[read_part_id]->ha_rnd_init(1))) + DBUG_RETURN(result); + + while (true) + { + if ((result= m_file[read_part_id]->ha_rnd_next(m_rec0))) + { + if (result != HA_ERR_END_OF_FILE) + break; + + if (num_misplaced_rows > 0) + { + print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, &msg_warning, + table_share->db.str, table->alias, + &opt_op_name[REPAIR_PARTS], + "Moved %lld misplaced rows", + num_misplaced_rows); + } + /* End-of-file reached, all rows are now OK, reset result and break. */ + result= 0; + break; + } + + result= m_part_info->get_partition_id(m_part_info, &correct_part_id, + &func_value); + if (result) + break; + + if (correct_part_id != read_part_id) + { + num_misplaced_rows++; + if (!do_repair) + { + /* Check. */ + print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, &msg_error, + table_share->db.str, table->alias, + &opt_op_name[CHECK_PARTS], + "Found a misplaced row"); + /* Break on first misplaced row! */ + result= HA_ADMIN_NEEDS_UPGRADE; + break; + } + else + { + DBUG_PRINT("info", ("Moving row from partition %u to %u", + (uint) read_part_id, (uint) correct_part_id)); + + /* + Insert row into correct partition. Notice that there are no commit + for every N row, so the repair will be one large transaction! + */ + if ((result= m_file[correct_part_id]->ha_write_row(m_rec0))) + { + /* + We have failed to insert a row, it might have been a duplicate! + */ + char buf[MAX_KEY_LENGTH]; + String str(buf,sizeof(buf),system_charset_info); + str.length(0); + if (result == HA_ERR_FOUND_DUPP_KEY) + { + str.append(STRING_WITH_LEN("Duplicate key found, " + "please update or delete the " + "record:\n")); + result= HA_ADMIN_CORRUPT; + } + m_err_rec= NULL; + append_row_to_str(str); + + /* + If the engine supports transactions, the failure will be + rolled back + */ + if (!m_file[correct_part_id]->has_transactions_and_rollback()) + { + /* Log this error, so the DBA can notice it and fix it! */ + sql_print_error("Table '%-192s' failed to move/insert a row" + " from part %u into part %u:\n%s", + table->s->table_name.str, + (uint) read_part_id, + (uint) correct_part_id, + str.c_ptr_safe()); + } + print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, &msg_error, + table_share->db.str, table->alias, + &opt_op_name[REPAIR_PARTS], + "Failed to move/insert a row" + " from part %u into part %u:\n%s", + (uint) read_part_id, + (uint) correct_part_id, + str.c_ptr_safe()); + break; + } + + /* Delete row from wrong partition. */ + if ((result= m_file[read_part_id]->ha_delete_row(m_rec0))) + { + if (m_file[correct_part_id]->has_transactions_and_rollback()) + break; + /* + We have introduced a duplicate, since we failed to remove it + from the wrong partition. + */ + char buf[MAX_KEY_LENGTH]; + String str(buf,sizeof(buf),system_charset_info); + str.length(0); + m_err_rec= NULL; + append_row_to_str(str); + + /* Log this error, so the DBA can notice it and fix it! */ + sql_print_error("Table '%-192s': Delete from part %u failed with" + " error %d. But it was already inserted into" + " part %u, when moving the misplaced row!" + "\nPlease manually fix the duplicate row:\n%s", + table->s->table_name.str, + (uint) read_part_id, + result, + (uint) correct_part_id, + str.c_ptr_safe()); + break; + } + } + } + } + + int tmp_result= m_file[read_part_id]->ha_rnd_end(); + DBUG_RETURN(result ? result : tmp_result); +} + + +#define KEY_PARTITIONING_CHANGED_STR \ + "KEY () partitioning changed, please run:\n" \ + "ALTER TABLE %s.%s ALGORITHM = INPLACE %s" + +int ha_partition::check_for_upgrade(HA_CHECK_OPT *check_opt) +{ + int error= HA_ADMIN_NEEDS_CHECK; + DBUG_ENTER("ha_partition::check_for_upgrade"); + + /* + This is called even without FOR UPGRADE, + if the .frm version is lower than the current version. + In that case return that it needs checking! + */ + if (!(check_opt->sql_flags & TT_FOR_UPGRADE)) + DBUG_RETURN(error); + + /* + Partitions will be checked for during their ha_check! + + Check if KEY (sub)partitioning was used and any field's hash calculation + differs from 5.1, see bug#14521864. + */ + if (table->s->mysql_version < 50503 && // 5.1 table (<5.5.3) + ((m_part_info->part_type == HASH_PARTITION && // KEY partitioned + m_part_info->list_of_part_fields) || + (m_is_sub_partitioned && // KEY subpartitioned + m_part_info->list_of_subpart_fields))) + { + Field **field; + if (m_is_sub_partitioned) + { + field= m_part_info->subpart_field_array; + } + else + { + field= m_part_info->part_field_array; + } + for (; *field; field++) + { + switch ((*field)->real_type()) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + { + THD *thd= ha_thd(); + char *part_buf; + String db_name, table_name; + uint part_buf_len; + bool skip_generation= false; + partition_info::enum_key_algorithm old_algorithm; + old_algorithm= m_part_info->key_algorithm; + error= HA_ADMIN_FAILED; + append_identifier(ha_thd(), &db_name, &table_share->db); + append_identifier(ha_thd(), &table_name, &table_share->table_name); + if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE) + { + /* + Only possible when someone tampered with .frm files, + like during tests :) + */ + skip_generation= true; + } + m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51; + if (skip_generation || + !(part_buf= generate_partition_syntax_for_frm(thd, m_part_info, + &part_buf_len, + NULL, NULL)) || + print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, &msg_error, + table_share->db.str, + table->alias, + &opt_op_name[CHECK_PARTS], + KEY_PARTITIONING_CHANGED_STR, + db_name.c_ptr_safe(), + table_name.c_ptr_safe(), + part_buf)) + { + /* Error creating admin message (too long string?). */ + print_admin_msg(thd, MYSQL_ERRMSG_SIZE, &msg_error, + table_share->db.str, table->alias, + &opt_op_name[CHECK_PARTS], + KEY_PARTITIONING_CHANGED_STR, + db_name.c_ptr_safe(), table_name.c_ptr_safe(), + ", but add ALGORITHM = 1" + " between 'KEY' and '(' to change the metadata" + " without the need of a full table rebuild."); + } + m_part_info->key_algorithm= old_algorithm; + DBUG_RETURN(error); + } + default: + /* Not affected! */ + ; + } + } + } + + DBUG_RETURN(error); +} + + +TABLE_LIST *ha_partition::get_next_global_for_child() +{ + handler **file; + DBUG_ENTER("ha_partition::get_next_global_for_child"); + for (file= m_file; *file; file++) + { + TABLE_LIST *table_list; + if ((table_list= (*file)->get_next_global_for_child())) + DBUG_RETURN(table_list); + } + DBUG_RETURN(0); +} + + +/** + Push an engine condition to the condition stack of the storage engine + for each partition. + + @param cond Pointer to the engine condition to be pushed. + + @return NULL Underlying engine will not return rows that + do not match the passed condition. + <> NULL 'Remainder' condition that the caller must use + to filter out records. +*/ + +const COND *ha_partition::cond_push(const COND *cond) +{ + uint i; + COND *res_cond= NULL; + DBUG_ENTER("ha_partition::cond_push"); + + for (i= bitmap_get_first_set(&m_partitions_to_reset); + i < m_tot_parts; + i= bitmap_get_next_set(&m_partitions_to_reset, i)) + { + if (bitmap_is_set(&m_opened_partitions, i)) + { + if (m_file[i]->pushed_cond != cond) + { + if (m_file[i]->cond_push(cond)) + res_cond= (COND *) cond; + else + m_file[i]->pushed_cond= cond; + } + } + } + DBUG_RETURN(res_cond); +} + + +/** + Pop the top condition from the condition stack of the storage engine + for each partition. +*/ + +void ha_partition::cond_pop() +{ + uint i; + DBUG_ENTER("ha_partition::cond_pop"); + + for (i= bitmap_get_first_set(&m_partitions_to_reset); + i < m_tot_parts; + i= bitmap_get_next_set(&m_partitions_to_reset, i)) + { + if (bitmap_is_set(&m_opened_partitions, i)) + { + m_file[i]->cond_pop(); + } + } + DBUG_VOID_RETURN; +} + + +/** + Perform bulk update preparation on each partition. + + SYNOPSIS + start_bulk_update() + + RETURN VALUE + TRUE Error + FALSE Success +*/ + +bool ha_partition::start_bulk_update() +{ + handler **file= m_file; + DBUG_ENTER("ha_partition::start_bulk_update"); + + if (bitmap_is_overlapping(&m_part_info->full_part_field_set, + table->write_set)) + DBUG_RETURN(TRUE); + + do + { + bzero(&(*file)->copy_info, sizeof((*file)->copy_info)); + if ((*file)->start_bulk_update()) + DBUG_RETURN(TRUE); + } while (*(++file)); + DBUG_RETURN(FALSE); +} + + +/** + Perform bulk update execution on each partition. A bulk update allows + a handler to batch the updated rows instead of performing the updates + one row at a time. + + SYNOPSIS + exec_bulk_update() + + RETURN VALUE + TRUE Error + FALSE Success +*/ + +int ha_partition::exec_bulk_update(ha_rows *dup_key_found) +{ + int error; + handler **file= m_file; + DBUG_ENTER("ha_partition::exec_bulk_update"); + + do + { + if (unlikely((error= (*file)->exec_bulk_update(dup_key_found)))) + DBUG_RETURN(error); + } while (*(++file)); + DBUG_RETURN(0); +} + + +/** + Perform bulk update cleanup on each partition. + + SYNOPSIS + end_bulk_update() + + RETURN VALUE + NONE +*/ + +int ha_partition::end_bulk_update() +{ + int error= 0; + handler **file= m_file; + DBUG_ENTER("ha_partition::end_bulk_update"); + + do + { + int tmp; + if ((tmp= (*file)->end_bulk_update())) + error= tmp; + } while (*(++file)); + sum_copy_infos(); + DBUG_RETURN(error); +} + + +/** + Add the row to the bulk update on the partition on which the row is stored. + A bulk update allows a handler to batch the updated rows instead of + performing the updates one row at a time. + + SYNOPSIS + bulk_update_row() + old_data Old record + new_data New record + dup_key_found Number of duplicate keys found + + RETURN VALUE + >1 Error + 1 Bulk update not used, normal operation used + 0 Bulk update used by handler +*/ + +int ha_partition::bulk_update_row(const uchar *old_data, const uchar *new_data, + ha_rows *dup_key_found) +{ + int error= 0; + uint32 part_id; + longlong func_value; + DBUG_ENTER("ha_partition::bulk_update_row"); + + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set); + error= m_part_info->get_partition_id(m_part_info, &part_id, + &func_value); + dbug_tmp_restore_column_map(&table->read_set, old_map); + if (unlikely(error)) + { + m_part_info->err_value= func_value; + goto end; + } + + error= m_file[part_id]->ha_bulk_update_row(old_data, new_data, + dup_key_found); + +end: + DBUG_RETURN(error); +} + + +/** + Perform bulk delete preparation on each partition. + + SYNOPSIS + start_bulk_delete() + + RETURN VALUE + TRUE Error + FALSE Success +*/ + +bool ha_partition::start_bulk_delete() +{ + handler **file= m_file; + DBUG_ENTER("ha_partition::start_bulk_delete"); + + do + { + if ((*file)->start_bulk_delete()) + DBUG_RETURN(TRUE); + } while (*(++file)); + DBUG_RETURN(FALSE); +} + + +/** + Perform bulk delete cleanup on each partition. + + SYNOPSIS + end_bulk_delete() + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::end_bulk_delete() +{ + int error= 0; + handler **file= m_file; + DBUG_ENTER("ha_partition::end_bulk_delete"); + + do + { + int tmp; + if ((tmp= (*file)->end_bulk_delete())) + error= tmp; + } while (*(++file)); + sum_copy_infos(); + DBUG_RETURN(error); +} + + +bool ha_partition::check_if_updates_are_ignored(const char *op) const +{ + return (handler::check_if_updates_are_ignored(op) || + ha_check_if_updates_are_ignored(table->in_use, partition_ht(), op)); +} + +/** + Perform initialization for a direct update request. + + SYNOPSIS + direct_update_rows_init() + update fields Pointer to the list of fields to update + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::direct_update_rows_init(List *update_fields) +{ + int error; + uint i, found; + handler *file; + DBUG_ENTER("ha_partition::direct_update_rows_init"); + + if (bitmap_is_overlapping(&m_part_info->full_part_field_set, + table->write_set)) + { + DBUG_PRINT("info", ("partition FALSE by updating part_key")); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + } + + m_part_spec.start_part= 0; + m_part_spec.end_part= m_tot_parts - 1; + m_direct_update_part_spec= m_part_spec; + + found= 0; + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + if (bitmap_is_set(&(m_part_info->read_partitions), i) && + bitmap_is_set(&(m_part_info->lock_partitions), i)) + { + file= m_file[i]; + if (unlikely((error= (m_pre_calling ? + file->pre_direct_update_rows_init(update_fields) : + file->direct_update_rows_init(update_fields))))) + { + DBUG_PRINT("info", ("partition FALSE by storage engine")); + DBUG_RETURN(error); + } + found++; + } + } + + TABLE_LIST *table_list= table->pos_in_table_list; + if (found != 1 && table_list) + { + while (table_list->parent_l) + table_list= table_list->parent_l; + st_select_lex *select_lex= table_list->select_lex; + DBUG_PRINT("info", ("partition select_lex: %p", select_lex)); + if (select_lex && select_lex->limit_params.explicit_limit) + { + DBUG_PRINT("info", ("partition explicit_limit=TRUE")); + DBUG_PRINT("info", ("partition offset_limit: %p", + select_lex->limit_params.offset_limit)); + DBUG_PRINT("info", ("partition select_limit: %p", + select_lex->limit_params.select_limit)); + DBUG_PRINT("info", ("partition FALSE by select_lex")); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + } + } + DBUG_PRINT("info", ("partition OK")); + DBUG_RETURN(0); +} + + +/** + Do initialization for performing parallel direct update + for a handlersocket update request. + + SYNOPSIS + pre_direct_update_rows_init() + update fields Pointer to the list of fields to update + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::pre_direct_update_rows_init(List *update_fields) +{ + bool save_m_pre_calling; + int error; + DBUG_ENTER("ha_partition::pre_direct_update_rows_init"); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + error= direct_update_rows_init(update_fields); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(error); +} + + +/** + Execute a direct update request. A direct update request updates all + qualified rows in a single operation, rather than one row at a time. + The direct update operation is pushed down to each individual + partition. + + SYNOPSIS + direct_update_rows() + update_rows Number of updated rows + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::direct_update_rows(ha_rows *update_rows_result, + ha_rows *found_rows_result) +{ + int error; + bool rnd_seq= FALSE; + ha_rows update_rows= 0; + ha_rows found_rows= 0; + uint32 i; + DBUG_ENTER("ha_partition::direct_update_rows"); + + /* If first call to direct_update_rows with RND scan */ + if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1) + { + rnd_seq= TRUE; + m_scan_value= 2; + } + + *update_rows_result= 0; + *found_rows_result= 0; + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + handler *file= m_file[i]; + if (bitmap_is_set(&(m_part_info->read_partitions), i) && + bitmap_is_set(&(m_part_info->lock_partitions), i)) + { + if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE) + { + if (unlikely((error= (m_pre_calling ? + file->ha_pre_rnd_init(TRUE) : + file->ha_rnd_init(TRUE))))) + DBUG_RETURN(error); + } + if (unlikely((error= (m_pre_calling ? + (file)->pre_direct_update_rows() : + (file)->ha_direct_update_rows(&update_rows, + &found_rows))))) + { + if (rnd_seq) + { + if (m_pre_calling) + file->ha_pre_rnd_end(); + else + file->ha_rnd_end(); + } + DBUG_RETURN(error); + } + *update_rows_result+= update_rows; + *found_rows_result+= found_rows; + } + if (rnd_seq) + { + if (unlikely((error= (m_pre_calling ? + file->ha_pre_index_or_rnd_end() : + file->ha_index_or_rnd_end())))) + DBUG_RETURN(error); + } + } + DBUG_RETURN(0); +} + + +/** + Start parallel execution of a direct update for a handlersocket update + request. A direct update request updates all qualified rows in a single + operation, rather than one row at a time. The direct update operation + is pushed down to each individual partition. + + SYNOPSIS + pre_direct_update_rows() + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::pre_direct_update_rows() +{ + bool save_m_pre_calling; + int error; + ha_rows not_used= 0; + DBUG_ENTER("ha_partition::pre_direct_update_rows"); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + error= direct_update_rows(¬_used, ¬_used); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(error); +} + + +/** + Perform initialization for a direct delete request. + + SYNOPSIS + direct_delete_rows_init() + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::direct_delete_rows_init() +{ + int error; + uint i, found; + DBUG_ENTER("ha_partition::direct_delete_rows_init"); + + m_part_spec.start_part= 0; + m_part_spec.end_part= m_tot_parts - 1; + m_direct_update_part_spec= m_part_spec; + + found= 0; + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + if (bitmap_is_set(&(m_part_info->read_partitions), i) && + bitmap_is_set(&(m_part_info->lock_partitions), i)) + { + handler *file= m_file[i]; + if (unlikely((error= (m_pre_calling ? + file->pre_direct_delete_rows_init() : + file->direct_delete_rows_init())))) + { + DBUG_PRINT("exit", ("error in direct_delete_rows_init")); + DBUG_RETURN(error); + } + found++; + } + } + + TABLE_LIST *table_list= table->pos_in_table_list; + if (found != 1 && table_list) + { + while (table_list->parent_l) + table_list= table_list->parent_l; + st_select_lex *select_lex= table_list->select_lex; + DBUG_PRINT("info", ("partition select_lex: %p", select_lex)); + if (select_lex && select_lex->limit_params.explicit_limit) + { + DBUG_PRINT("info", ("partition explicit_limit: TRUE")); + DBUG_PRINT("info", ("partition offset_limit: %p", + select_lex->limit_params.offset_limit)); + DBUG_PRINT("info", ("partition select_limit: %p", + select_lex->limit_params.select_limit)); + DBUG_PRINT("info", ("partition FALSE by select_lex")); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + } + } + DBUG_PRINT("exit", ("OK")); + DBUG_RETURN(0); +} + + +/** + Do initialization for performing parallel direct delete + for a handlersocket delete request. + + SYNOPSIS + pre_direct_delete_rows_init() + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::pre_direct_delete_rows_init() +{ + bool save_m_pre_calling; + int error; + DBUG_ENTER("ha_partition::pre_direct_delete_rows_init"); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + error= direct_delete_rows_init(); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(error); +} + + +/** + Execute a direct delete request. A direct delete request deletes all + qualified rows in a single operation, rather than one row at a time. + The direct delete operation is pushed down to each individual + partition. + + SYNOPSIS + direct_delete_rows() + delete_rows Number of deleted rows + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::direct_delete_rows(ha_rows *delete_rows_result) +{ + int error; + bool rnd_seq= FALSE; + ha_rows delete_rows= 0; + uint32 i; + handler *file; + DBUG_ENTER("ha_partition::direct_delete_rows"); + + if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1) + { + rnd_seq= TRUE; + m_scan_value= 2; + } + + *delete_rows_result= 0; + m_part_spec= m_direct_update_part_spec; + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + file= m_file[i]; + if (bitmap_is_set(&(m_part_info->read_partitions), i) && + bitmap_is_set(&(m_part_info->lock_partitions), i)) + { + if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE) + { + if (unlikely((error= (m_pre_calling ? + file->ha_pre_rnd_init(TRUE) : + file->ha_rnd_init(TRUE))))) + DBUG_RETURN(error); + } + if ((error= (m_pre_calling ? + file->pre_direct_delete_rows() : + file->ha_direct_delete_rows(&delete_rows)))) + { + if (m_pre_calling) + file->ha_pre_rnd_end(); + else + file->ha_rnd_end(); + DBUG_RETURN(error); + } + delete_rows_result+= delete_rows; + } + if (rnd_seq) + { + if (unlikely((error= (m_pre_calling ? + file->ha_pre_index_or_rnd_end() : + file->ha_index_or_rnd_end())))) + DBUG_RETURN(error); + } + } + DBUG_RETURN(0); +} + + +/** + Start parallel execution of a direct delete for a handlersocket delete + request. A direct delete request deletes all qualified rows in a single + operation, rather than one row at a time. The direct delete operation + is pushed down to each individual partition. + + SYNOPSIS + pre_direct_delete_rows() + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::pre_direct_delete_rows() +{ + bool save_m_pre_calling; + int error; + ha_rows not_used; + DBUG_ENTER("ha_partition::pre_direct_delete_rows"); + save_m_pre_calling= m_pre_calling; + m_pre_calling= TRUE; + error= direct_delete_rows(¬_used); + m_pre_calling= save_m_pre_calling; + DBUG_RETURN(error); +} + +/** + Push metadata for the current operation down to each partition. + + SYNOPSIS + info_push() + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::info_push(uint info_type, void *info) +{ + int error= 0, tmp; + uint i; + DBUG_ENTER("ha_partition::info_push"); + + for (i= bitmap_get_first_set(&m_partitions_to_reset); + i < m_tot_parts; + i= bitmap_get_next_set(&m_partitions_to_reset, i)) + { + if (bitmap_is_set(&m_opened_partitions, i)) + { + if ((tmp= m_file[i]->info_push(info_type, info))) + { + error= tmp; + } + } + } + DBUG_RETURN(error); +} + + +bool +ha_partition::can_convert_nocopy(const Field &field, + const Column_definition &new_type) const +{ + for (uint index= 0; index < m_tot_parts; index++) + { + if (!m_file[index]->can_convert_nocopy(field, new_type)) + return false; + } + return true; +} + +struct st_mysql_storage_engine partition_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +maria_declare_plugin(partition) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &partition_storage_engine, + "partition", + "Mikael Ronstrom, MySQL AB", + "Partition Storage Engine Helper", + PLUGIN_LICENSE_GPL, + partition_initialize, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + NULL, /* status variables */ + NULL, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +} +maria_declare_plugin_end; + +#endif diff --git a/sql/ha_partition.h b/sql/ha_partition.h new file mode 100644 index 00000000..49e212f6 --- /dev/null +++ b/sql/ha_partition.h @@ -0,0 +1,1654 @@ +#ifndef HA_PARTITION_INCLUDED +#define HA_PARTITION_INCLUDED + +/* + Copyright (c) 2005, 2012, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_partition.h" /* part_id_range, partition_element */ +#include "queues.h" /* QUEUE */ + +struct Ordered_blob_storage +{ + String blob; + bool set_read_value; + Ordered_blob_storage() : set_read_value(false) + {} +}; + +#define PAR_EXT ".par" +#define PARTITION_BYTES_IN_POS 2 +#define ORDERED_PART_NUM_OFFSET sizeof(Ordered_blob_storage **) +#define ORDERED_REC_OFFSET (ORDERED_PART_NUM_OFFSET + PARTITION_BYTES_IN_POS) + + +/** Struct used for partition_name_hash */ +typedef struct st_part_name_def +{ + uchar *partition_name; + uint length; + uint32 part_id; + my_bool is_subpart; +} PART_NAME_DEF; + +/** class where to save partitions Handler_share's */ +class Parts_share_refs +{ +public: + uint num_parts; /**< Size of ha_share array */ + Handler_share **ha_shares; /**< Storage for each part */ + Parts_share_refs() + { + num_parts= 0; + ha_shares= NULL; + } + ~Parts_share_refs() + { + uint i; + for (i= 0; i < num_parts; i++) + delete ha_shares[i]; + delete[] ha_shares; + } + bool init(uint arg_num_parts) + { + DBUG_ASSERT(!num_parts && !ha_shares); + num_parts= arg_num_parts; + /* Allocate an array of Handler_share pointers */ + ha_shares= new Handler_share *[num_parts]; + if (!ha_shares) + { + num_parts= 0; + return true; + } + memset(ha_shares, 0, sizeof(Handler_share*) * num_parts); + return false; + } +}; + +class ha_partition; + +/* Partition Full Text Search info */ +struct st_partition_ft_info +{ + struct _ft_vft *please; + st_partition_ft_info *next; + ha_partition *file; + FT_INFO **part_ft_info; +}; + + +#ifdef HAVE_PSI_MUTEX_INTERFACE +extern PSI_mutex_key key_partition_auto_inc_mutex; +#endif + +/** + Partition specific Handler_share. +*/ +class Partition_share : public Handler_share +{ +public: + bool auto_inc_initialized; + mysql_mutex_t auto_inc_mutex; /**< protecting auto_inc val */ + ulonglong next_auto_inc_val; /**< first non reserved value */ + /** + Hash of partition names. Initialized in the first ha_partition::open() + for the table_share. After that it is read-only, i.e. no locking required. + */ + bool partition_name_hash_initialized; + HASH partition_name_hash; + const char *partition_engine_name; + /** Storage for each partitions Handler_share */ + Parts_share_refs partitions_share_refs; + Partition_share() + : auto_inc_initialized(false), + next_auto_inc_val(0), + partition_name_hash_initialized(false), + partition_engine_name(NULL), + partition_names(NULL) + { + mysql_mutex_init(key_partition_auto_inc_mutex, + &auto_inc_mutex, + MY_MUTEX_INIT_FAST); + } + + ~Partition_share() + { + mysql_mutex_destroy(&auto_inc_mutex); + if (partition_names) + { + my_free(partition_names); + } + if (partition_name_hash_initialized) + { + my_hash_free(&partition_name_hash); + } + } + + bool init(uint num_parts); + + /** + Release reserved auto increment values not used. + @param thd Thread. + @param table_share Table Share + @param next_insert_id Next insert id (first non used auto inc value). + @param max_reserved End of reserved auto inc range. + */ + void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share, + const ulonglong next_insert_id, + const ulonglong max_reserved); + + /** lock mutex protecting auto increment value next_auto_inc_val. */ + inline void lock_auto_inc() + { + mysql_mutex_lock(&auto_inc_mutex); + } + /** unlock mutex protecting auto increment value next_auto_inc_val. */ + inline void unlock_auto_inc() + { + mysql_mutex_unlock(&auto_inc_mutex); + } + /** + Populate partition_name_hash with partition and subpartition names + from part_info. + @param part_info Partition info containing all partitions metadata. + + @return Operation status. + @retval false Success. + @retval true Failure. + */ + bool populate_partition_name_hash(partition_info *part_info); + /** Get partition name. + + @param part_id Partition id (for subpartitioned table only subpartition + names will be returned.) + + @return partition name or NULL if error. + */ + const char *get_partition_name(size_t part_id) const; +private: + const uchar **partition_names; + /** + Insert [sub]partition name into partition_name_hash + @param name Partition name. + @param part_id Partition id. + @param is_subpart True if subpartition else partition. + + @return Operation status. + @retval false Success. + @retval true Failure. + */ + bool insert_partition_name_in_hash(const char *name, + uint part_id, + bool is_subpart); +}; + + +/* + List of ranges to be scanned by ha_partition's MRR implementation + + This object is + - A KEY_MULTI_RANGE structure (the MRR range) + - Storage for the range endpoints that the KEY_MULTI_RANGE has pointers to + - list of such ranges (connected through the "next" pointer). +*/ + +typedef struct st_partition_key_multi_range +{ + /* + Number of the range. The ranges are numbered in the order RANGE_SEQ_IF has + emitted them, starting from 1. The numbering in used by ordered MRR scans. + */ + uint id; + uchar *key[2]; + /* + Sizes of allocated memory in key[]. These may be larger then the actual + values as this structure is reused across MRR scans + */ + uint length[2]; + + /* + The range. + key_multi_range.ptr is a pointer to the this PARTITION_KEY_MULTI_RANGE + object + */ + KEY_MULTI_RANGE key_multi_range; + + // Range id from the SQL layer + range_id_t ptr; + + // The next element in the list of MRR ranges. + st_partition_key_multi_range *next; +} PARTITION_KEY_MULTI_RANGE; + + +/* + List of ranges to be scanned in a certain [sub]partition + + The idea is that there's a list of ranges to be scanned in the table + (formed by PARTITION_KEY_MULTI_RANGE structures), + and for each [sub]partition, we only need to scan a subset of that list. + + PKMR1 --> PKMR2 --> PKMR3 -->... // list of PARTITION_KEY_MULTI_RANGE + ^ ^ + | | + PPKMR1 ----------> PPKMR2 -->... // list of PARTITION_PART_KEY_MULTI_RANGE + + This way, per-partition lists of PARTITION_PART_KEY_MULTI_RANGE have pointers + to the elements of the global list of PARTITION_KEY_MULTI_RANGE. +*/ + +typedef struct st_partition_part_key_multi_range +{ + PARTITION_KEY_MULTI_RANGE *partition_key_multi_range; + st_partition_part_key_multi_range *next; +} PARTITION_PART_KEY_MULTI_RANGE; + + +class ha_partition; + +/* + The structure holding information about range sequence to be used with one + partition. + (pointer to this is used as seq_init_param for RANGE_SEQ_IF structure when + invoking MRR for an individual partition) +*/ + +typedef struct st_partition_part_key_multi_range_hld +{ + /* Owner object */ + ha_partition *partition; + + /* id of the the partition this structure is for */ + uint32 part_id; + + /* Current range we're iterating through */ + PARTITION_PART_KEY_MULTI_RANGE *partition_part_key_multi_range; +} PARTITION_PART_KEY_MULTI_RANGE_HLD; + + +extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); +extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); + +class ha_partition final :public handler +{ +private: + enum partition_index_scan_type + { + partition_index_read= 0, + partition_index_first= 1, + partition_index_last= 3, + partition_index_read_last= 4, + partition_read_range = 5, + partition_no_index_scan= 6, + partition_read_multi_range = 7, + partition_ft_read= 8 + }; + /* Data for the partition handler */ + int m_mode; // Open mode + uint m_open_test_lock; // Open test_if_locked + uchar *m_file_buffer; // Content of the .par file + char *m_name_buffer_ptr; // Pointer to first partition name + MEM_ROOT m_mem_root; + plugin_ref *m_engine_array; // Array of types of the handlers + handler **m_file; // Array of references to handler inst. + uint m_file_tot_parts; // Debug + handler **m_new_file; // Array of references to new handlers + handler **m_reorged_file; // Reorganised partitions + handler **m_added_file; // Added parts kept for errors + LEX_CSTRING *m_connect_string; + partition_info *m_part_info; // local reference to partition + Field **m_part_field_array; // Part field array locally to save acc + uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan + st_partition_ft_info *ft_first; + st_partition_ft_info *ft_current; + /* + Current index. + When used in key_rec_cmp: If clustered pk, index compare + must compare pk if given index is same for two rows. + So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL, + and if clustered pk, [0]= current index, [1]= pk, [2]= NULL + */ + KEY *m_curr_key_info[3]; // Current index + uchar *m_rec0; // table->record[0] + const uchar *m_err_rec; // record which gave error + QUEUE m_queue; // Prio queue used by sorted read + + /* + Length of an element in m_ordered_rec_buffer. The elements are composed of + + [part_no] [table->record copy] [underlying_table_rowid] + + underlying_table_rowid is only stored when the table has no extended keys. + */ + size_t m_priority_queue_rec_len; + + /* + If true, then sorting records by key value also sorts them by their + underlying_table_rowid. + */ + bool m_using_extended_keys; + + /* + Since the partition handler is a handler on top of other handlers, it + is necessary to keep information about what the underlying handler + characteristics is. It is not possible to keep any handler instances + for this since the MySQL Server sometimes allocating the handler object + without freeing them. + */ + enum enum_handler_status + { + handler_not_initialized= 0, + handler_initialized, + handler_opened, + handler_closed + }; + enum_handler_status m_handler_status; + + uint m_reorged_parts; // Number of reorganised parts + uint m_tot_parts; // Total number of partitions; + uint m_num_locks; // For engines like ha_blackhole, which needs no locks + uint m_last_part; // Last file that we update,write,read + part_id_range m_part_spec; // Which parts to scan + uint m_scan_value; // Value passed in rnd_init + // call + uint m_ref_length; // Length of position in this + // handler object + key_range m_start_key; // index read key range + enum partition_index_scan_type m_index_scan_type;// What type of index + // scan + uint m_top_entry; // Which partition is to + // deliver next result + uint m_rec_length; // Local copy of record length + + bool m_ordered; // Ordered/Unordered index scan + bool m_create_handler; // Handler used to create table + bool m_is_sub_partitioned; // Is subpartitioned + bool m_ordered_scan_ongoing; + bool m_rnd_init_and_first; + bool m_ft_init_and_first; + + /* + If set, this object was created with ha_partition::clone and doesn't + "own" the m_part_info structure. + */ + ha_partition *m_is_clone_of; + MEM_ROOT *m_clone_mem_root; + + /* + We keep track if all underlying handlers are MyISAM since MyISAM has a + great number of extra flags not needed by other handlers. + */ + bool m_myisam; // Are all underlying handlers + // MyISAM + /* + We keep track of InnoDB handlers below since it requires proper setting + of query_id in fields at index_init and index_read calls. + */ + bool m_innodb; // Are all underlying handlers + // InnoDB + /* + When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying + handlers immediately. Instead we cache it and call the underlying + immediately before starting the scan on the partition. This is to + prevent allocating a READ CACHE for each partition in parallel when + performing a full table scan on MyISAM partitioned table. + This state is cleared by extra(HA_EXTRA_NO_CACHE). + */ + bool m_extra_cache; + uint m_extra_cache_size; + /* The same goes for HA_EXTRA_PREPARE_FOR_UPDATE */ + bool m_extra_prepare_for_update; + /* Which partition has active cache */ + uint m_extra_cache_part_id; + + void init_handler_variables(); + /* + Variables for lock structures. + */ + + bool auto_increment_lock; /**< lock reading/updating auto_inc */ + /** + Flag to keep the auto_increment lock through out the statement. + This to ensure it will work with statement based replication. + */ + bool auto_increment_safe_stmt_log_lock; + /** For optimizing ha_start_bulk_insert calls */ + MY_BITMAP m_bulk_insert_started; + ha_rows m_bulk_inserted_rows; + /** used for prediction of start_bulk_insert rows */ + enum_monotonicity_info m_part_func_monotonicity_info; + part_id_range m_direct_update_part_spec; + bool m_pre_calling; + bool m_pre_call_use_parallel; + /* Keep track of bulk access requests */ + bool bulk_access_executing; + + /** keep track of locked partitions */ + MY_BITMAP m_locked_partitions; + /** Stores shared auto_increment etc. */ + Partition_share *part_share; + void sum_copy_info(handler *file); + void sum_copy_infos(); + void reset_copy_info() override; + /** Temporary storage for new partitions Handler_shares during ALTER */ + List m_new_partitions_share_refs; + /** Sorted array of partition ids in descending order of number of rows. */ + uint32 *m_part_ids_sorted_by_num_of_records; + /* Compare function for my_qsort2, for reversed order. */ + static int compare_number_of_records(ha_partition *me, + const uint32 *a, + const uint32 *b); + /** keep track of partitions to call ha_reset */ + MY_BITMAP m_partitions_to_reset; + /** partitions that returned HA_ERR_KEY_NOT_FOUND. */ + MY_BITMAP m_key_not_found_partitions; + bool m_key_not_found; + List *m_partitions_to_open; + MY_BITMAP m_opened_partitions; + /** This is one of the m_file-s that it guaranteed to be opened. */ + /** It is set in open_read_partitions() */ + handler *m_file_sample; +public: + handler **get_child_handlers() + { + return m_file; + } + ha_partition *get_clone_source() + { + return m_is_clone_of; + } + virtual part_id_range *get_part_spec() + { + return &m_part_spec; + } + virtual uint get_no_current_part_id() + { + return NO_CURRENT_PART_ID; + } + Partition_share *get_part_share() { return part_share; } + handler *clone(const char *name, MEM_ROOT *mem_root) override; + void set_part_info(partition_info *part_info) override + { + m_part_info= part_info; + m_is_sub_partitioned= part_info->is_sub_partitioned(); + } + + void return_record_by_parent() override; + + bool vers_can_native(THD *thd) override + { + if (thd->lex->part_info) + { + // PARTITION BY SYSTEM_TIME is not supported for now + return thd->lex->part_info->part_type != VERSIONING_PARTITION; + } + else + { + bool can= true; + for (uint i= 0; i < m_tot_parts && can; i++) + can= can && m_file[i]->vers_can_native(thd); + return can; + } + } + + /* + ------------------------------------------------------------------------- + MODULE create/delete handler object + ------------------------------------------------------------------------- + Object create/delete method. Normally called when a table object + exists. There is also a method to create the handler object with only + partition information. This is used from mysql_create_table when the + table is to be created and the engine type is deduced to be the + partition handler. + ------------------------------------------------------------------------- + */ + ha_partition(handlerton *hton, TABLE_SHARE * table); + ha_partition(handlerton *hton, partition_info * part_info); + ha_partition(handlerton *hton, TABLE_SHARE *share, + partition_info *part_info_arg, + ha_partition *clone_arg, + MEM_ROOT *clone_mem_root_arg); + ~ha_partition(); + void ha_partition_init(); + /* + A partition handler has no characteristics in itself. It only inherits + those from the underlying handlers. Here we set-up those constants to + enable later calls of the methods to retrieve constants from the under- + lying handlers. Returns false if not successful. + */ + bool initialize_partition(MEM_ROOT *mem_root); + + /* + ------------------------------------------------------------------------- + MODULE meta data changes + ------------------------------------------------------------------------- + Meta data routines to CREATE, DROP, RENAME table and often used at + ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). + + create_partitioning_metadata is called before opening a new handler object + with openfrm to call create. It is used to create any local handler + object needed in opening the object in openfrm + ------------------------------------------------------------------------- + */ + int delete_table(const char *from) override; + int rename_table(const char *from, const char *to) override; + int create(const char *name, TABLE *form, + HA_CREATE_INFO *create_info) override; + int create_partitioning_metadata(const char *name, + const char *old_name, + chf_create_flags action_flag) + override; + bool check_if_updates_are_ignored(const char *op) const override; + void update_create_info(HA_CREATE_INFO *create_info) override; + int change_partitions(HA_CREATE_INFO *create_info, const char *path, + ulonglong * const copied, ulonglong * const deleted, + const uchar *pack_frm_data, size_t pack_frm_len) + override; + int drop_partitions(const char *path) override; + int rename_partitions(const char *path) override; + bool get_no_parts(const char *, uint *num_parts) override + { + DBUG_ENTER("ha_partition::get_no_parts"); + *num_parts= m_tot_parts; + DBUG_RETURN(0); + } + void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) override; + bool check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) override; + void update_part_create_info(HA_CREATE_INFO *create_info, uint part_id) + { + m_file[part_id]->update_create_info(create_info); + } +private: + int copy_partitions(ulonglong * const copied, ulonglong * const deleted); + void cleanup_new_partition(uint part_count); + int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info, + handler *file, const char *part_name, + partition_element *p_elem, + uint disable_non_uniq_indexes); + /* + delete_table and rename_table uses very similar logic which + is packed into this routine. + */ + uint del_ren_table(const char *from, const char *to); + /* + One method to create the table_name.par file containing the names of the + underlying partitions, their engine and the number of partitions. + And one method to read it in. + */ + bool create_handler_file(const char *name); + bool setup_engine_array(MEM_ROOT *mem_root, handlerton *first_engine); + int read_par_file(const char *name); + handlerton *get_def_part_engine(const char *name); + bool get_from_handler_file(const char *name, MEM_ROOT *mem_root, + bool is_clone); + bool re_create_par_file(const char *name); + bool new_handlers_from_part_info(MEM_ROOT *mem_root); + bool create_handlers(MEM_ROOT *mem_root); + void clear_handler_file(); + int set_up_table_before_create(TABLE *table_arg, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *p_elem); + partition_element *find_partition_element(uint part_id); + bool insert_partition_name_in_hash(const char *name, uint part_id, + bool is_subpart); + bool populate_partition_name_hash(); + Partition_share *get_share(); + bool set_ha_share_ref(Handler_share **ha_share) override; + void fix_data_dir(char* path); + bool init_partition_bitmaps(); + void free_partition_bitmaps(); + +public: + + /* + ------------------------------------------------------------------------- + MODULE open/close object + ------------------------------------------------------------------------- + Open and close handler object to ensure all underlying files and + objects allocated and deallocated for query handling is handled + properly. + ------------------------------------------------------------------------- + + A handler object is opened as part of its initialisation and before + being used for normal queries (not before meta-data changes always. + If the object was opened it will also be closed before being deleted. + */ + int open(const char *name, int mode, uint test_if_locked) override; + int close() override; + + /* + ------------------------------------------------------------------------- + MODULE start/end statement + ------------------------------------------------------------------------- + This module contains methods that are used to understand start/end of + statements, transaction boundaries, and aid for proper concurrency + control. + The partition handler need not implement abort and commit since this + will be handled by any underlying handlers implementing transactions. + There is only one call to each handler type involved per transaction + and these go directly to the handlers supporting transactions + ------------------------------------------------------------------------- + */ + THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type) override; + int external_lock(THD * thd, int lock_type) override; + LEX_CSTRING *engine_name() override { return hton_name(partition_ht()); } + /* + When table is locked a statement is started by calling start_stmt + instead of external_lock + */ + int start_stmt(THD * thd, thr_lock_type lock_type) override; + /* + Lock count is number of locked underlying handlers (I assume) + */ + uint lock_count() const override; + /* + Call to unlock rows not to be updated in transaction + */ + void unlock_row() override; + /* + Check if semi consistent read + */ + bool was_semi_consistent_read() override; + /* + Call to hint about semi consistent read + */ + void try_semi_consistent_read(bool) override; + + /* + NOTE: due to performance and resource issues with many partitions, + we only use the m_psi on the ha_partition handler, excluding all + partitions m_psi. + */ +#ifdef HAVE_M_PSI_PER_PARTITION + /* + Bind the table/handler thread to track table i/o. + */ + virtual void unbind_psi(); + virtual int rebind(); +#endif + int discover_check_version() override; + /* + ------------------------------------------------------------------------- + MODULE change record + ------------------------------------------------------------------------- + This part of the handler interface is used to change the records + after INSERT, DELETE, UPDATE, REPLACE method calls but also other + special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. + ------------------------------------------------------------------------- + + These methods are used for insert (write_row), update (update_row) + and delete (delete_row). All methods to change data always work on + one row at a time. update_row and delete_row also contains the old + row. + delete_all_rows will delete all rows in the table in one call as a + special optimisation for DELETE from table; + + Bulk inserts are supported if all underlying handlers support it. + start_bulk_insert and end_bulk_insert is called before and after a + number of calls to write_row. + */ + int write_row(const uchar * buf) override; + bool start_bulk_update() override; + int exec_bulk_update(ha_rows *dup_key_found) override; + int end_bulk_update() override; + int bulk_update_row(const uchar *old_data, const uchar *new_data, + ha_rows *dup_key_found) override; + int update_row(const uchar * old_data, const uchar * new_data) override; + int direct_update_rows_init(List *update_fields) override; + int pre_direct_update_rows_init(List *update_fields) override; + int direct_update_rows(ha_rows *update_rows, ha_rows *found_rows) override; + int pre_direct_update_rows() override; + bool start_bulk_delete() override; + int end_bulk_delete() override; + int delete_row(const uchar * buf) override; + int direct_delete_rows_init() override; + int pre_direct_delete_rows_init() override; + int direct_delete_rows(ha_rows *delete_rows) override; + int pre_direct_delete_rows() override; + int delete_all_rows() override; + int truncate() override; + void start_bulk_insert(ha_rows rows, uint flags) override; + int end_bulk_insert() override; +private: + ha_rows guess_bulk_insert_rows(); + void start_part_bulk_insert(THD *thd, uint part_id); + long estimate_read_buffer_size(long original_size); +public: + + /* + Method for truncating a specific partition. + (i.e. ALTER TABLE t1 TRUNCATE PARTITION p). + + @remark This method is a partitioning-specific hook + and thus not a member of the general SE API. + */ + int truncate_partition(Alter_info *, bool *binlog_stmt); + + bool is_fatal_error(int error, uint flags) override + { + if (!handler::is_fatal_error(error, flags) || + error == HA_ERR_NO_PARTITION_FOUND || + error == HA_ERR_NOT_IN_LOCK_PARTITIONS) + return FALSE; + return TRUE; + } + + + /* + ------------------------------------------------------------------------- + MODULE full table scan + ------------------------------------------------------------------------- + This module is used for the most basic access method for any table + handler. This is to fetch all data through a full table scan. No + indexes are needed to implement this part. + It contains one method to start the scan (rnd_init) that can also be + called multiple times (typical in a nested loop join). Then proceeding + to the next record (rnd_next) and closing the scan (rnd_end). + To remember a record for later access there is a method (position) + and there is a method used to retrieve the record based on the stored + position. + The position can be a file position, a primary key, a ROWID dependent + on the handler below. + ------------------------------------------------------------------------- + */ + /* + unlike index_init(), rnd_init() can be called two times + without rnd_end() in between (it only makes sense if scan=1). + then the second call should prepare for the new table scan + (e.g if rnd_init allocates the cursor, second call should + position it to the start of the table, no need to deallocate + and allocate it again + */ + int rnd_init(bool scan) override; + int rnd_end() override; + int rnd_next(uchar * buf) override; + int rnd_pos(uchar * buf, uchar * pos) override; + int rnd_pos_by_record(uchar *record) override; + void position(const uchar * record) override; + + /* + ------------------------------------------------------------------------- + MODULE index scan + ------------------------------------------------------------------------- + This part of the handler interface is used to perform access through + indexes. The interface is defined as a scan interface but the handler + can also use key lookup if the index is a unique index or a primary + key index. + Index scans are mostly useful for SELECT queries but are an important + part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT + and so forth. + Naturally an index is needed for an index scan and indexes can either + be ordered, hash based. Some ordered indexes can return data in order + but not necessarily all of them. + There are many flags that define the behavior of indexes in the + various handlers. These methods are found in the optimizer module. + ------------------------------------------------------------------------- + + index_read is called to start a scan of an index. The find_flag defines + the semantics of the scan. These flags are defined in + include/my_base.h + index_read_idx is the same but also initializes index before calling doing + the same thing as index_read. Thus it is similar to index_init followed + by index_read. This is also how we implement it. + + index_read/index_read_idx does also return the first row. Thus for + key lookups, the index_read will be the only call to the handler in + the index scan. + + index_init initializes an index before using it and index_end does + any end processing needed. + */ + int index_read_map(uchar * buf, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) override; + int index_init(uint idx, bool sorted) override; + int index_end() override; + + /** + @breif + Positions an index cursor to the index specified in the handle. Fetches the + row if available. If the key value is null, begin at first key of the + index. + */ + int index_read_idx_map(uchar *buf, uint index, const uchar *key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) override; + /* + These methods are used to jump to next or previous entry in the index + scan. There are also methods to jump to first and last entry. + */ + int index_next(uchar * buf) override; + int index_prev(uchar * buf) override; + int index_first(uchar * buf) override; + int index_last(uchar * buf) override; + int index_next_same(uchar * buf, const uchar * key, uint keylen) override; + + int index_read_last_map(uchar *buf, + const uchar *key, + key_part_map keypart_map) override; + + /* + read_first_row is virtual method but is only implemented by + handler.cc, no storage engine has implemented it so neither + will the partition handler. + + int read_first_row(uchar *buf, uint primary_key) override; + */ + + + int read_range_first(const key_range * start_key, + const key_range * end_key, + bool eq_range, bool sorted) override; + int read_range_next() override; + + + HANDLER_BUFFER *m_mrr_buffer; + uint *m_mrr_buffer_size; + uchar *m_mrr_full_buffer; + uint m_mrr_full_buffer_size; + uint m_mrr_new_full_buffer_size; + MY_BITMAP m_mrr_used_partitions; + uint *m_stock_range_seq; + /* not used: uint m_current_range_seq; */ + + /* Value of mrr_mode passed to ha_partition::multi_range_read_init */ + uint m_mrr_mode; + + /* Value of n_ranges passed to ha_partition::multi_range_read_init */ + uint m_mrr_n_ranges; + + /* + Ordered MRR mode: m_range_info[N] has the range_id of the last record that + we've got from partition N + */ + range_id_t *m_range_info; + + /* + TRUE <=> This ha_partition::multi_range_read_next() call is the first one + */ + bool m_multi_range_read_first; + + /* not used: uint m_mrr_range_init_flags; */ + + /* Number of elements in the list pointed by m_mrr_range_first. Not used */ + uint m_mrr_range_length; + + /* Linked list of ranges to scan */ + PARTITION_KEY_MULTI_RANGE *m_mrr_range_first; + PARTITION_KEY_MULTI_RANGE *m_mrr_range_current; + + /* + For each partition: number of ranges MRR scan will scan in the partition + */ + uint *m_part_mrr_range_length; + + /* For each partition: List of ranges to scan in this partition */ + PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_first; + PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_current; + PARTITION_PART_KEY_MULTI_RANGE_HLD *m_partition_part_key_multi_range_hld; + + /* + Sequence of ranges to be scanned (TODO: why not store this in + handler::mrr_{iter,funcs}?) + */ + range_seq_t m_seq; + RANGE_SEQ_IF *m_seq_if; + + /* Range iterator structure to be supplied to partitions */ + RANGE_SEQ_IF m_part_seq_if; + + virtual int multi_range_key_create_key( + RANGE_SEQ_IF *seq, + range_seq_t seq_it + ); + ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *mrr_mode, + Cost_estimate *cost) override; + ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, + uint *mrr_mode, Cost_estimate *cost) override; + int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, + uint n_ranges, uint mrr_mode, + HANDLER_BUFFER *buf) override; + int multi_range_read_next(range_id_t *range_info) override; + int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size) + override; + uint last_part() { return m_last_part; } + +private: + bool init_record_priority_queue(); + void destroy_record_priority_queue(); + int common_index_read(uchar * buf, bool have_start_key); + int common_first_last(uchar * buf); + int partition_scan_set_up(uchar * buf, bool idx_read_flag); + bool check_parallel_search(); + int handle_pre_scan(bool reverse_order, bool use_parallel); + int handle_unordered_next(uchar * buf, bool next_same); + int handle_unordered_scan_next_partition(uchar * buf); + int handle_ordered_index_scan(uchar * buf, bool reverse_order); + int handle_ordered_index_scan_key_not_found(); + int handle_ordered_next(uchar * buf, bool next_same); + int handle_ordered_prev(uchar * buf); + void return_top_record(uchar * buf); + void swap_blobs(uchar* rec_buf, Ordered_blob_storage ** storage, bool restore); +public: + /* + ------------------------------------------------------------------------- + MODULE information calls + ------------------------------------------------------------------------- + This calls are used to inform the handler of specifics of the ongoing + scans and other actions. Most of these are used for optimisation + purposes. + ------------------------------------------------------------------------- + */ + int info(uint) override; + void get_dynamic_partition_info(PARTITION_STATS *stat_info, uint part_id) + override; + void set_partitions_to_open(List *partition_names) override; + int change_partitions_to_open(List *partition_names) override; + int open_read_partitions(char *name_buff, size_t name_buff_size); + int extra(enum ha_extra_function operation) override; + int extra_opt(enum ha_extra_function operation, ulong arg) override; + int reset() override; + uint count_query_cache_dependant_tables(uint8 *tables_type) override; + my_bool register_query_cache_dependant_tables(THD *thd, + Query_cache *cache, + Query_cache_block_table **block, + uint *n) override; + +private: + typedef int handler_callback(handler *, void *); + + my_bool reg_query_cache_dependant_table(THD *thd, + char *engine_key, + uint engine_key_len, + char *query_key, uint query_key_len, + uint8 type, + Query_cache *cache, + Query_cache_block_table + **block_table, + handler *file, uint *n); + static const uint NO_CURRENT_PART_ID= NOT_A_PARTITION_ID; + int loop_partitions(handler_callback callback, void *param); + int loop_extra_alter(enum ha_extra_function operations); + void late_extra_cache(uint partition_id); + void late_extra_no_cache(uint partition_id); + void prepare_extra_cache(uint cachesize); + handler *get_open_file_sample() const { return m_file_sample; } +public: + + /* + ------------------------------------------------------------------------- + MODULE optimiser support + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + */ + + /* + NOTE !!!!!! + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + One important part of the public handler interface that is not depicted in + the methods is the attribute records + + which is defined in the base class. This is looked upon directly and is + set by calling info(HA_STATUS_INFO) ? + ------------------------------------------------------------------------- + */ + +private: + /* Helper functions for optimizer hints. */ + ha_rows min_rows_for_estimate(); + uint get_biggest_used_partition(uint *part_index); +public: + + /* + keys_to_use_for_scanning can probably be implemented as the + intersection of all underlying handlers if mixed handlers are used. + This method is used to derive whether an index can be used for + index-only scanning when performing an ORDER BY query. + Only called from one place in sql_select.cc + */ + const key_map *keys_to_use_for_scanning() override; + + /* + Called in test_quick_select to determine if indexes should be used. + */ + double scan_time() override; + + double key_scan_time(uint inx) override; + + double keyread_time(uint inx, uint ranges, ha_rows rows) override; + + /* + The next method will never be called if you do not implement indexes. + */ + double read_time(uint index, uint ranges, ha_rows rows) override; + /* + For the given range how many records are estimated to be in this range. + Used by optimiser to calculate cost of using a particular index. + */ + ha_rows records_in_range(uint inx, + const key_range * min_key, + const key_range * max_key, + page_range *pages) override; + + /* + Upper bound of number records returned in scan is sum of all + underlying handlers. + */ + ha_rows estimate_rows_upper_bound() override; + + /* + table_cache_type is implemented by the underlying handler but all + underlying handlers must have the same implementation for it to work. + */ + uint8 table_cache_type() override; + ha_rows records() override; + + /* Calculate hash value for PARTITION BY KEY tables. */ + static uint32 calculate_key_hash_value(Field **field_array); + + /* + ------------------------------------------------------------------------- + MODULE print messages + ------------------------------------------------------------------------- + This module contains various methods that returns text messages for + table types, index type and error messages. + ------------------------------------------------------------------------- + */ + /* + The name of the index type that will be used for display + Here we must ensure that all handlers use the same index type + for each index created. + */ + const char *index_type(uint inx) override; + + /* The name of the table type that will be used for display purposes */ + const char *real_table_type() const override; + /* The name of the row type used for the underlying tables. */ + enum row_type get_row_type() const override; + + /* + Handler specific error messages + */ + void print_error(int error, myf errflag) override; + bool get_error_message(int error, String * buf) override; + /* + ------------------------------------------------------------------------- + MODULE handler characteristics + ------------------------------------------------------------------------- + This module contains a number of methods defining limitations and + characteristics of the handler. The partition handler will calculate + this characteristics based on underlying handler characteristics. + ------------------------------------------------------------------------- + + This is a list of flags that says what the storage engine + implements. The current table flags are documented in handler.h + The partition handler will support whatever the underlying handlers + support except when specifically mentioned below about exceptions + to this rule. + NOTE: This cannot be cached since it can depend on TRANSACTION ISOLATION + LEVEL which is dynamic, see bug#39084. + + HA_TABLE_SCAN_ON_INDEX: + Used to avoid scanning full tables on an index. If this flag is set then + the handler always has a primary key (hidden if not defined) and this + index is used for scanning rather than a full table scan in all + situations. + (InnoDB, Federated) + + HA_REC_NOT_IN_SEQ: + This flag is set for handlers that cannot guarantee that the rows are + returned according to incremental positions (0, 1, 2, 3...). + This also means that rnd_next() should return HA_ERR_RECORD_DELETED + if it finds a deleted row. + (MyISAM (not fixed length row), HEAP, InnoDB) + + HA_CAN_GEOMETRY: + Can the storage engine handle spatial data. + Used to check that no spatial attributes are declared unless + the storage engine is capable of handling it. + (MyISAM) + + HA_FAST_KEY_READ: + Setting this flag indicates that the handler is equally fast in + finding a row by key as by position. + This flag is used in a very special situation in conjunction with + filesort's. For further explanation see intro to init_read_record. + (HEAP, InnoDB) + + HA_NULL_IN_KEY: + Is NULL values allowed in indexes. + If this is not allowed then it is not possible to use an index on a + NULLable field. + (HEAP, MyISAM, InnoDB) + + HA_DUPLICATE_POS: + Tells that we can the position for the conflicting duplicate key + record is stored in table->file->dupp_ref. (insert uses rnd_pos() on + this to find the duplicated row) + (MyISAM) + + HA_CAN_INDEX_BLOBS: + Is the storage engine capable of defining an index of a prefix on + a BLOB attribute. + (Federated, MyISAM, InnoDB) + + HA_AUTO_PART_KEY: + Auto increment fields can be part of a multi-part key. For second part + auto-increment keys, the auto_incrementing is done in handler.cc + (Federated, MyISAM) + + HA_REQUIRE_PRIMARY_KEY: + Can't define a table without primary key (and cannot handle a table + with hidden primary key) + (No handler has this limitation currently) + + HA_STATS_RECORDS_IS_EXACT: + Does the counter of records after the info call specify an exact + value or not. If it does this flag is set. + Only MyISAM and HEAP uses exact count. + + HA_CAN_INSERT_DELAYED: + Can the storage engine support delayed inserts. + To start with the partition handler will not support delayed inserts. + Further investigation needed. + (HEAP, MyISAM) + + HA_PRIMARY_KEY_IN_READ_INDEX: + This parameter is set when the handler will also return the primary key + when doing read-only-key on another index. + + HA_NOT_DELETE_WITH_CACHE: + Seems to be an old MyISAM feature that is no longer used. No handler + has it defined but it is checked in init_read_record. + Further investigation needed. + (No handler defines it) + + HA_NO_PREFIX_CHAR_KEYS: + Indexes on prefixes of character fields is not allowed. + (Federated) + + HA_CAN_FULLTEXT: + Does the storage engine support fulltext indexes + The partition handler will start by not supporting fulltext indexes. + (MyISAM) + + HA_CAN_SQL_HANDLER: + Can the HANDLER interface in the MySQL API be used towards this + storage engine. + (MyISAM, InnoDB) + + HA_NO_AUTO_INCREMENT: + Set if the storage engine does not support auto increment fields. + (Currently not set by any handler) + + HA_HAS_CHECKSUM: + Special MyISAM feature. Has special SQL support in CREATE TABLE. + No special handling needed by partition handler. + (MyISAM) + + HA_FILE_BASED: + Should file names always be in lower case (used by engines + that map table names to file names. + Since partition handler has a local file this flag is set. + (Federated, MyISAM) + + HA_CAN_BIT_FIELD: + Is the storage engine capable of handling bit fields? + (MyISAM) + + HA_NEED_READ_RANGE_BUFFER: + Is Read Multi-Range supported => need multi read range buffer + This parameter specifies whether a buffer for read multi range + is needed by the handler. Whether the handler supports this + feature or not is dependent of whether the handler implements + read_multi_range* calls or not. The only handler currently + supporting this feature is NDB so the partition handler need + not handle this call. There are methods in handler.cc that will + transfer those calls into index_read and other calls in the + index scan module. + (No handler defines it) + + HA_PRIMARY_KEY_REQUIRED_FOR_POSITION: + Does the storage engine need a PK for position? + (InnoDB) + + HA_FILE_BASED is always set for partition handler since we use a + special file for handling names of partitions, engine types. + HA_REC_NOT_IN_SEQ is always set for partition handler since we cannot + guarantee that the records will be returned in sequence. + HA_DUPLICATE_POS, + HA_CAN_INSERT_DELAYED, HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is disabled + until further investigated. + */ + Table_flags table_flags() const override; + + /* + This is a bitmap of flags that says how the storage engine + implements indexes. The current index flags are documented in + handler.h. If you do not implement indexes, just return zero + here. + + part is the key part to check. First key part is 0 + If all_parts it's set, MySQL want to know the flags for the combined + index up to and including 'part'. + + HA_READ_NEXT: + Does the index support read next, this is assumed in the server + code and never checked so all indexes must support this. + Note that the handler can be used even if it doesn't have any index. + (HEAP, MyISAM, Federated, InnoDB) + + HA_READ_PREV: + Can the index be used to scan backwards. + (HEAP, MyISAM, InnoDB) + + HA_READ_ORDER: + Can the index deliver its record in index order. Typically true for + all ordered indexes and not true for hash indexes. + In first step this is not true for partition handler until a merge + sort has been implemented in partition handler. + Used to set keymap part_of_sortkey + This keymap is only used to find indexes usable for resolving an ORDER BY + in the query. Thus in most cases index_read will work just fine without + order in result production. When this flag is set it is however safe to + order all output started by index_read since most engines do this. With + read_multi_range calls there is a specific flag setting order or not + order so in those cases ordering of index output can be avoided. + (InnoDB, HEAP, MyISAM) + + HA_READ_RANGE: + Specify whether index can handle ranges, typically true for all + ordered indexes and not true for hash indexes. + Used by optimiser to check if ranges (as key >= 5) can be optimised + by index. + (InnoDB, MyISAM, HEAP) + + HA_ONLY_WHOLE_INDEX: + Can't use part key searches. This is typically true for hash indexes + and typically not true for ordered indexes. + (Federated, HEAP) + + HA_KEYREAD_ONLY: + Does the storage engine support index-only scans on this index. + Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD + Used to set key_map keys_for_keyread and to check in optimiser for + index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler + only have to fill in the columns the key covers. If + HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns + must be updated in the row. + (InnoDB, MyISAM) + */ + ulong index_flags(uint inx, uint part, bool all_parts) const override + { + /* + The following code is not safe if you are using different + storage engines or different index types per partition. + */ + ulong part_flags= m_file[0]->index_flags(inx, part, all_parts); + + /* + The underlying storage engine might support Rowid Filtering. But + ha_partition does not forward the needed SE API calls, so the feature + will not be used. + + Note: It's the same with IndexConditionPushdown, except for its variant + of IndexConditionPushdown+BatchedKeyAccess (that one works). Because of + that, we do not clear HA_DO_INDEX_COND_PUSHDOWN here. + */ + return part_flags & ~HA_DO_RANGE_FILTER_PUSHDOWN; + } + + /** + wrapper function for handlerton alter_table_flags, since + the ha_partition_hton cannot know all its capabilities + */ + alter_table_operations alter_table_flags(alter_table_operations flags) + override; + /* + unireg.cc will call the following to make sure that the storage engine + can handle the data it is about to send. + + The maximum supported values is the minimum of all handlers in the table + */ + uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const; + uint max_supported_record_length() const override; + uint max_supported_keys() const override; + uint max_supported_key_parts() const override; + uint max_supported_key_length() const override; + uint max_supported_key_part_length() const override; + uint min_record_length(uint options) const override; + + /* + ------------------------------------------------------------------------- + MODULE compare records + ------------------------------------------------------------------------- + cmp_ref checks if two references are the same. For most handlers this is + a simple memcmp of the reference. However some handlers use primary key + as reference and this can be the same even if memcmp says they are + different. This is due to character sets and end spaces and so forth. + For the partition handler the reference is first two bytes providing the + partition identity of the referred record and then the reference of the + underlying handler. + Thus cmp_ref for the partition handler always returns FALSE for records + not in the same partition and uses cmp_ref on the underlying handler + to check whether the rest of the reference part is also the same. + ------------------------------------------------------------------------- + */ + int cmp_ref(const uchar * ref1, const uchar * ref2) override; + /* + ------------------------------------------------------------------------- + MODULE auto increment + ------------------------------------------------------------------------- + This module is used to handle the support of auto increments. + + This variable in the handler is used as part of the handler interface + It is maintained by the parent handler object and should not be + touched by child handler objects (see handler.cc for its use). + + auto_increment_column_changed + ------------------------------------------------------------------------- + */ + bool need_info_for_auto_inc() override; + bool can_use_for_auto_inc_init() override; + void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) override; + void release_auto_increment() override; +private: + int reset_auto_increment(ulonglong value) override; + int update_next_auto_inc_val(); + virtual void lock_auto_increment() + { + /* lock already taken */ + if (auto_increment_safe_stmt_log_lock) + return; + if (table_share->tmp_table == NO_TMP_TABLE) + { + part_share->lock_auto_inc(); + DBUG_ASSERT(!auto_increment_lock); + auto_increment_lock= TRUE; + } + } + virtual void unlock_auto_increment() + { + /* + If auto_increment_safe_stmt_log_lock is true, we have to keep the lock. + It will be set to false and thus unlocked at the end of the statement by + ha_partition::release_auto_increment. + */ + if (auto_increment_lock && !auto_increment_safe_stmt_log_lock) + { + auto_increment_lock= FALSE; + part_share->unlock_auto_inc(); + } + } + virtual void set_auto_increment_if_higher(Field *field) + { + ulonglong nr= (((Field_num*) field)->unsigned_flag || + field->val_int() > 0) ? field->val_int() : 0; + lock_auto_increment(); + DBUG_ASSERT(part_share->auto_inc_initialized || + !can_use_for_auto_inc_init()); + /* must check when the mutex is taken */ + if (nr >= part_share->next_auto_inc_val) + part_share->next_auto_inc_val= nr + 1; + unlock_auto_increment(); + } + + void check_insert_or_replace_autoincrement() + { + /* + If we INSERT or REPLACE into the table having the AUTO_INCREMENT column, + we have to read all partitions for the next autoincrement value + unless we already did it. + */ + if (!part_share->auto_inc_initialized && + (ha_thd()->lex->sql_command == SQLCOM_INSERT || + ha_thd()->lex->sql_command == SQLCOM_INSERT_SELECT || + ha_thd()->lex->sql_command == SQLCOM_REPLACE || + ha_thd()->lex->sql_command == SQLCOM_REPLACE_SELECT) && + table->found_next_number_field) + bitmap_set_all(&m_part_info->read_partitions); + } + +public: + + /* + ------------------------------------------------------------------------- + MODULE initialize handler for HANDLER call + ------------------------------------------------------------------------- + This method is a special InnoDB method called before a HANDLER query. + ------------------------------------------------------------------------- + */ + void init_table_handle_for_HANDLER() override; + + /* + The remainder of this file defines the handler methods not implemented + by the partition handler + */ + + /* + ------------------------------------------------------------------------- + MODULE foreign key support + ------------------------------------------------------------------------- + The following methods are used to implement foreign keys as supported by + InnoDB. Implement this ?? + get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual + description of how the CREATE TABLE part to define FOREIGN KEY's is done. + free_foreign_key_create_info is used to free the memory area that provided + this description. + can_switch_engines checks if it is ok to switch to a new engine based on + the foreign key info in the table. + ------------------------------------------------------------------------- + + virtual char* get_foreign_key_create_info() + virtual void free_foreign_key_create_info(char* str) + + virtual int get_foreign_key_list(THD *thd, + List *f_key_list) + virtual uint referenced_by_foreign_key() + */ + bool can_switch_engines() override; + /* + ------------------------------------------------------------------------- + MODULE fulltext index + ------------------------------------------------------------------------- + */ + void ft_close_search(FT_INFO *handler); + int ft_init() override; + int pre_ft_init() override; + void ft_end() override; + int pre_ft_end() override; + FT_INFO *ft_init_ext(uint flags, uint inx, String *key) override; + int ft_read(uchar *buf) override; + int pre_ft_read(bool use_parallel) override; + + /* + ------------------------------------------------------------------------- + MODULE restart full table scan at position (MyISAM) + ------------------------------------------------------------------------- + The following method is only used by MyISAM when used as + temporary tables in a join. + int restart_rnd_next(uchar *buf, uchar *pos) override; + */ + + /* + ------------------------------------------------------------------------- + MODULE in-place ALTER TABLE + ------------------------------------------------------------------------- + These methods are in the handler interface. (used by innodb-plugin) + They are used for in-place alter table: + ------------------------------------------------------------------------- + */ + enum_alter_inplace_result + check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + override; + bool prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + override; + bool inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) override; + bool commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit) override; + /* + ------------------------------------------------------------------------- + MODULE tablespace support + ------------------------------------------------------------------------- + Admin of table spaces is not applicable to the partition handler (InnoDB) + This means that the following method is not implemented: + ------------------------------------------------------------------------- + virtual int discard_or_import_tablespace(my_bool discard) + */ + + /* + ------------------------------------------------------------------------- + MODULE admin MyISAM + ------------------------------------------------------------------------- + + ------------------------------------------------------------------------- + OPTIMIZE TABLE, CHECK TABLE, ANALYZE TABLE and REPAIR TABLE are + mapped to a routine that handles looping over a given set of + partitions and those routines send a flag indicating to execute on + all partitions. + ------------------------------------------------------------------------- + */ + int optimize(THD* thd, HA_CHECK_OPT *check_opt) override; + int analyze(THD* thd, HA_CHECK_OPT *check_opt) override; + int check(THD* thd, HA_CHECK_OPT *check_opt) override; + int repair(THD* thd, HA_CHECK_OPT *check_opt) override; + bool check_and_repair(THD *thd) override; + bool auto_repair(int error) const override; + bool is_crashed() const override; + int check_for_upgrade(HA_CHECK_OPT *check_opt) override; + + /* + ------------------------------------------------------------------------- + MODULE condition pushdown + ------------------------------------------------------------------------- + */ + const COND *cond_push(const COND *cond) override; + void cond_pop() override; + int info_push(uint info_type, void *info) override; + + private: + int handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, uint flags); + int handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, uint part_id, + uint flag); + /** + Check if the rows are placed in the correct partition. If the given + argument is true, then move the rows to the correct partition. + */ + int check_misplaced_rows(uint read_part_id, bool repair); + void append_row_to_str(String &str); + public: + + int pre_calculate_checksum() override; + int calculate_checksum() override; + + /* Enabled keycache for performance reasons, WL#4571 */ + int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt) override; + int preload_keys(THD* thd, HA_CHECK_OPT* check_opt) override; + TABLE_LIST *get_next_global_for_child() override; + + /* + ------------------------------------------------------------------------- + MODULE enable/disable indexes + ------------------------------------------------------------------------- + Enable/Disable Indexes are only supported by HEAP and MyISAM. + ------------------------------------------------------------------------- + */ + int disable_indexes(uint mode) override; + int enable_indexes(uint mode) override; + int indexes_are_disabled() override; + + /* + ------------------------------------------------------------------------- + MODULE append_create_info + ------------------------------------------------------------------------- + append_create_info is only used by MyISAM MERGE tables and the partition + handler will not support this handler as underlying handler. + Implement this?? + ------------------------------------------------------------------------- + virtual void append_create_info(String *packet) + */ + + /* + the following heavily relies on the fact that all partitions + are in the same storage engine. + + When this limitation is lifted, the following hack should go away, + and a proper interface for engines needs to be introduced: + + an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE. + is given to engines everywhere where TABLE_SHARE is used now + has members like option_struct, ha_data + perhaps TABLE needs to be split the same way too... + + this can also be done before partition will support a mix of engines, + but preferably together with other incompatible API changes. + */ + handlerton *partition_ht() const override + { + handlerton *h= m_file[0]->ht; + for (uint i=1; i < m_tot_parts; i++) + DBUG_ASSERT(h == m_file[i]->ht); + return h; + } + + bool partition_engine() override { return 1;} + + /** + Get the number of records in part_elem and its subpartitions, if any. + */ + ha_rows part_records(partition_element *part_elem) + { + DBUG_ASSERT(m_part_info); + uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; + uint32 part_id= part_elem->id * sub_factor; + uint32 part_id_end= part_id + sub_factor; + DBUG_ASSERT(part_id_end <= m_tot_parts); + ha_rows part_recs= 0; + for (; part_id < part_id_end; ++part_id) + { + handler *file= m_file[part_id]; + file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); + part_recs+= file->stats.records; + } + return part_recs; + } + + int notify_tabledef_changed(LEX_CSTRING *db, LEX_CSTRING *table, + LEX_CUSTRING *frm, LEX_CUSTRING *version); + + friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); + friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); + + bool can_convert_nocopy(const Field &field, + const Column_definition &new_field) const override; + void handler_stats_updated() override; +}; +#endif /* HA_PARTITION_INCLUDED */ diff --git a/sql/ha_sequence.cc b/sql/ha_sequence.cc new file mode 100644 index 00000000..bab06147 --- /dev/null +++ b/sql/ha_sequence.cc @@ -0,0 +1,484 @@ +/* + Copyright (c) 2017, Aliyun and/or its affiliates. + Copyright (c) 2017, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "mariadb.h" +#include "sql_list.h" +#include "table.h" +#include "sql_table.h" +#include "sql_sequence.h" +#include "ha_sequence.h" +#include "sql_plugin.h" +#include "mysql/plugin.h" +#include "sql_priv.h" +#include "sql_parse.h" +#include "sql_update.h" +#include "sql_base.h" +#include "log_event.h" + +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" /* wsrep_start_transaction() */ +#endif + +/* + Table flags we should inherit and disable from the original engine. + We add HA_STATS_RECORDS_IS_EXACT as ha_sequence::info() will ensure + that records is always 1 +*/ + +#define SEQUENCE_ENABLED_TABLE_FLAGS (HA_STATS_RECORDS_IS_EXACT | \ + HA_PERSISTENT_TABLE) +#define SEQUENCE_DISABLED_TABLE_FLAGS (HA_CAN_SQL_HANDLER | \ + HA_CAN_INSERT_DELAYED | \ + HA_BINLOG_STMT_CAPABLE) +handlerton *sql_sequence_hton; + +/* + Create a sequence handler +*/ + +ha_sequence::ha_sequence(handlerton *hton, TABLE_SHARE *share) + :handler(hton, share), write_locked(0) +{ + sequence= share->sequence; + DBUG_ASSERT(share->sequence); +} + +/** + Destructor method must remove the underlying handler +*/ +ha_sequence::~ha_sequence() +{ + delete file; +} + +/** + Sequence table open method + + @param name Path to file (dbname and tablename) + @param mode mode + @param flags Flags how to open file + + RETURN VALUES + @retval 0 Success + @retval != 0 Failure +*/ + +int ha_sequence::open(const char *name, int mode, uint flags) +{ + int error; + DBUG_ENTER("ha_sequence::open"); + DBUG_ASSERT(table->s == table_share && file); + + file->table= table; + if (likely(!(error= file->open(name, mode, flags)))) + { + /* + Allocate ref in table's mem_root. We can't use table's ref + as it's allocated by ha_ caller that allocates this. + */ + ref_length= file->ref_length; + if (!(ref= (uchar*) alloc_root(&table->mem_root,ALIGN_SIZE(ref_length)*2))) + { + file->ha_close(); + error=HA_ERR_OUT_OF_MEM; + DBUG_RETURN(error); + } + file->ref= ref; + file->dup_ref= dup_ref= ref+ALIGN_SIZE(file->ref_length); + + /* + ha_open() sets the following for us. We have to set this for the + underlying handler + */ + file->cached_table_flags= (file->table_flags() | HA_REUSES_FILE_NAMES); + + file->reset_statistics(); + internal_tmp_table= file->internal_tmp_table= + MY_TEST(flags & HA_OPEN_INTERNAL_TABLE); + reset_statistics(); + + /* + Don't try to read the initial row if the call is part of CREATE, REPAIR + or FLUSH + */ + if (!(flags & (HA_OPEN_FOR_CREATE | HA_OPEN_FOR_REPAIR | + HA_OPEN_FOR_FLUSH))) + { + if (unlikely((error= table->s->sequence->read_initial_values(table)))) + file->ha_close(); + } + else if (!table->s->tmp_table) + table->internal_set_needs_reopen(true); + + /* + The following is needed to fix comparison of rows in + ha_update_first_row() for InnoDB + */ + if (!error) + memcpy(table->record[1], table->s->default_values, table->s->reclength); + } + DBUG_RETURN(error); +} + +/* + Clone the sequence. Needed if table is used by range optimization + (Very, very unlikely) +*/ + +handler *ha_sequence::clone(const char *name, MEM_ROOT *mem_root) +{ + ha_sequence *new_handler; + DBUG_ENTER("ha_sequence::clone"); + if (!(new_handler= new (mem_root) ha_sequence(ht, table_share))) + DBUG_RETURN(NULL); + + /* + Allocate new_handler->ref here because otherwise ha_open will allocate it + on this->table->mem_root and we will not be able to reclaim that memory + when the clone handler object is destroyed. + */ + + if (!(new_handler->ref= (uchar*) alloc_root(mem_root, + ALIGN_SIZE(ref_length)*2))) + goto err; + + if (new_handler->ha_open(table, name, + table->db_stat, + HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL)) + goto err; + + /* Reuse original storage engine data for duplicate key reference */ + new_handler->ref= file->ref; + new_handler->ref_length= file->ref_length; + new_handler->dup_ref= file->dup_ref; + + DBUG_RETURN((handler*) new_handler); + +err: + delete new_handler; + DBUG_RETURN(NULL); +} + + +/* + Map the create table to the original storage engine +*/ + +int ha_sequence::create(const char *name, TABLE *form, + HA_CREATE_INFO *create_info) +{ + DBUG_ASSERT(create_info->sequence); + /* Sequence tables has one and only one row */ + create_info->max_rows= create_info->min_rows= 1; + return (file->create(name, form, create_info)); +} + +/** + Sequence write row method. + + A sequence table has only one row. Any inserts in the table + will update this row. + + @retval 0 Success + @retval != 0 Failure + + NOTES: + write_locked is set if we are called from SEQUENCE::next_value + In this case the mutex is already locked and we should not update + the sequence with 'buf' as the sequence object is already up to date. +*/ + +int ha_sequence::write_row(const uchar *buf) +{ + int error; + sequence_definition tmp_seq; + bool sequence_locked; + THD *thd= table->in_use; + DBUG_ENTER("ha_sequence::write_row"); + DBUG_ASSERT(table->record[0] == buf); + + /* + Log to binary log even if this function has been called before + (The function ends by setting row_logging to 0) + */ + row_logging= row_logging_init; + if (unlikely(sequence->initialized == SEQUENCE::SEQ_IN_PREPARE)) + { + /* This calls is from ha_open() as part of create table */ + DBUG_RETURN(file->write_row(buf)); + } + if (unlikely(sequence->initialized == SEQUENCE::SEQ_IN_ALTER)) + { + int error= 0; + /* This is called from alter table */ + tmp_seq.read_fields(table); + if (tmp_seq.check_and_adjust(0)) + DBUG_RETURN(HA_ERR_SEQUENCE_INVALID_DATA); + sequence->copy(&tmp_seq); + if (likely(!(error= file->write_row(buf)))) + sequence->initialized= SEQUENCE::SEQ_READY_TO_USE; + row_logging= 0; + DBUG_RETURN(error); + } + if (unlikely(sequence->initialized != SEQUENCE::SEQ_READY_TO_USE)) + DBUG_RETURN(HA_ERR_WRONG_COMMAND); + + sequence_locked= write_locked; + if (!write_locked) // If not from next_value() + { + /* + User tries to write a full row directly to the sequence table with + INSERT or LOAD DATA. + + - Get an exclusive lock for the table. This is needed to ensure that + we excute all full inserts (same as ALTER SEQUENCE) in same order + on master and slaves + - Check that the new row is an accurate SEQUENCE object + */ + /* mark a full binlog image insert to force non-parallel slave */ + thd->transaction->stmt.mark_trans_did_ddl(); + if (table->s->tmp_table == NO_TMP_TABLE && + thd->mdl_context.upgrade_shared_lock(table->mdl_ticket, + MDL_EXCLUSIVE, + thd->variables. + lock_wait_timeout)) + DBUG_RETURN(ER_LOCK_WAIT_TIMEOUT); + + tmp_seq.read_fields(table); + if (tmp_seq.check_and_adjust(0)) + DBUG_RETURN(HA_ERR_SEQUENCE_INVALID_DATA); + + /* + Lock sequence to ensure that no one can come in between + while sequence, table and binary log are updated. + */ + sequence->write_lock(table); + } + +#ifdef WITH_WSREP + /* We need to start Galera transaction for select NEXT VALUE FOR + sequence if it is not yet started. Note that ALTER is handled + as TOI. */ + if (WSREP_ON && WSREP(thd) && + !thd->wsrep_trx().active() && + wsrep_thd_is_local(thd)) + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); +#endif + + if (likely(!(error= file->update_first_row(buf)))) + { + Log_func *log_func= Write_rows_log_event::binlog_row_logging_function; + if (!sequence_locked) + sequence->copy(&tmp_seq); + rows_changed++; + /* We have to do the logging while we hold the sequence mutex */ + if (row_logging) + error= binlog_log_row(table, 0, buf, log_func); + } + + /* Row is already logged, don't log it again in ha_write_row() */ + row_logging= 0; + sequence->all_values_used= 0; + if (!sequence_locked) + sequence->write_unlock(table); + DBUG_RETURN(error); +} + + +/* + Inherit the sequence base table flags. +*/ + +handler::Table_flags ha_sequence::table_flags() const +{ + DBUG_ENTER("ha_sequence::table_flags"); + DBUG_RETURN((file->table_flags() & ~SEQUENCE_DISABLED_TABLE_FLAGS) | + SEQUENCE_ENABLED_TABLE_FLAGS); +} + + +int ha_sequence::info(uint flag) +{ + DBUG_ENTER("ha_sequence::info"); + file->info(flag); + /* Inform optimizer that we have always only one record */ + stats= file->stats; + stats.records= 1; + DBUG_RETURN(false); +} + + +int ha_sequence::extra(enum ha_extra_function operation) +{ + if (operation == HA_EXTRA_PREPARE_FOR_ALTER_TABLE) + { + /* In case of ALTER TABLE allow ::write_row() to copy rows */ + sequence->initialized= SEQUENCE::SEQ_IN_ALTER; + } + return file->extra(operation); +} + +bool ha_sequence::check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) +{ + /* Table definition is locked for SEQUENCE tables */ + return(COMPATIBLE_DATA_YES); +} + + +int ha_sequence::external_lock(THD *thd, int lock_type) +{ + int error= file->external_lock(thd, lock_type); + + /* + Copy lock flag to satisfy DBUG_ASSERT checks in ha_* functions in + handler.cc when we later call it with file->ha_..() + */ + if (!error) + file->m_lock_type= lock_type; + return error; +} + +/* + Squence engine error deal method +*/ + +void ha_sequence::print_error(int error, myf errflag) +{ + const char *sequence_db= table_share->db.str; + const char *sequence_name= table_share->table_name.str; + DBUG_ENTER("ha_sequence::print_error"); + + switch (error) { + case HA_ERR_SEQUENCE_INVALID_DATA: + { + my_error(ER_SEQUENCE_INVALID_DATA, MYF(errflag), sequence_db, + sequence_name); + DBUG_VOID_RETURN; + } + case HA_ERR_SEQUENCE_RUN_OUT: + { + my_error(ER_SEQUENCE_RUN_OUT, MYF(errflag), sequence_db, sequence_name); + DBUG_VOID_RETURN; + } + case HA_ERR_WRONG_COMMAND: + my_error(ER_ILLEGAL_HA, MYF(0), "SEQUENCE", sequence_db, sequence_name); + DBUG_VOID_RETURN; + case ER_WRONG_INSERT_INTO_SEQUENCE: + my_error(error, MYF(0)); + DBUG_VOID_RETURN; + } + file->print_error(error, errflag); + DBUG_VOID_RETURN; +} + +/***************************************************************************** + Sequence plugin interface +*****************************************************************************/ + +/* + Create an new handler +*/ + +static handler *sequence_create_handler(handlerton *hton, + TABLE_SHARE *share, + MEM_ROOT *mem_root) +{ + DBUG_ENTER("sequence_create_handler"); + if (unlikely(!share)) + { + /* + This can happen if we call get_new_handler with a non existing share + */ + DBUG_RETURN(0); + } + DBUG_RETURN(new (mem_root) ha_sequence(hton, share)); +} + + +/* + Sequence engine end. + + SYNOPSIS + sequence_end() + p handlerton. + type panic type. + RETURN VALUES + 0 Success + !=0 Failure +*/ +static int sequence_end(handlerton* hton, + ha_panic_function type __attribute__((unused))) +{ + DBUG_ENTER("sequence_end"); + DBUG_RETURN(0); +} + + +/* + Sequence engine init. + + SYNOPSIS + sequence_initialize() + + @param p handlerton. + + retval 0 Success + retval !=0 Failure +*/ + +static int sequence_initialize(void *p) +{ + handlerton *local_sequence_hton= (handlerton *)p; + DBUG_ENTER("sequence_initialize"); + + local_sequence_hton->db_type= DB_TYPE_SEQUENCE; + local_sequence_hton->create= sequence_create_handler; + local_sequence_hton->panic= sequence_end; + local_sequence_hton->flags= (HTON_NOT_USER_SELECTABLE | + HTON_HIDDEN | + HTON_TEMPORARY_NOT_SUPPORTED | + HTON_ALTER_NOT_SUPPORTED | +#ifdef WITH_WSREP + HTON_WSREP_REPLICATION | +#endif + HTON_NO_PARTITION); + DBUG_RETURN(0); +} + + +static struct st_mysql_storage_engine sequence_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +maria_declare_plugin(sql_sequence) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &sequence_storage_engine, + "SQL_SEQUENCE", + "jianwei.zhao @ Aliyun & Monty @ MariaDB corp", + "Sequence Storage Engine for CREATE SEQUENCE", + PLUGIN_LICENSE_GPL, + sequence_initialize, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + NULL, /* status variables */ + NULL, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +} +maria_declare_plugin_end; diff --git a/sql/ha_sequence.h b/sql/ha_sequence.h new file mode 100644 index 00000000..72e59a40 --- /dev/null +++ b/sql/ha_sequence.h @@ -0,0 +1,165 @@ +#ifndef HA_SEQUENCE_INCLUDED +#define HA_SEQUENCE_INCLUDED +/* + Copyright (c) 2017 Aliyun and/or its affiliates. + Copyright (c) 2017 MariaDB corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "sql_sequence.h" +#include "table.h" +#include "handler.h" + +extern handlerton *sql_sequence_hton; + +/* + Sequence engine handler. + + The sequence engine is a logic engine. It doesn't store any data. + All the sequence data stored into the base table which must support + non rollback writes (HA_CAN_TABLES_WITHOUT_ROLLBACK) + + The sequence data (SEQUENCE class) is stored in TABLE_SHARE->sequence + + TABLE RULES: + 1. When table is created, one row is automaticlly inserted into + the table. The table will always have one and only one row. + 2. Any inserts or updates to the table will be validated. + 3. Inserts will overwrite the original row. + 4. DELETE and TRUNCATE will not affect the table. + Instead a warning will be given. + 5. Cache will be reset for any updates. + + CACHE RULES: + SEQUENCE class is used to cache values that sequence defined. + 1. If hit cache, we can query back the sequence nextval directly + instead of reading the underlying table. + + 2. When run out of values, the sequence engine will reserve new values + in update the base table. + + 3. The cache is invalidated if any update on based table. +*/ + +class ha_sequence :public handler +{ +private: + handler *file; + SEQUENCE *sequence; /* From table_share->sequence */ + +public: + /* Set when handler is write locked */ + bool write_locked; + + ha_sequence(handlerton *hton, TABLE_SHARE *share); + ~ha_sequence(); + + /* virtual function that are re-implemented for sequence */ + int open(const char *name, int mode, uint test_if_locked); + int create(const char *name, TABLE *form, + HA_CREATE_INFO *create_info); + handler *clone(const char *name, MEM_ROOT *mem_root); + int write_row(const uchar *buf); + Table_flags table_flags() const; + /* One can't update or delete from sequence engine */ + int update_row(const uchar *old_data, const uchar *new_data) + { return HA_ERR_WRONG_COMMAND; } + int delete_row(const uchar *buf) + { return HA_ERR_WRONG_COMMAND; } + /* One can't delete from sequence engine */ + int truncate() + { return HA_ERR_WRONG_COMMAND; } + /* Can't use query cache */ + uint8 table_cache_type() + { return HA_CACHE_TBL_NOCACHE; } + void print_error(int error, myf errflag); + int info(uint); + LEX_CSTRING *engine_name() { return hton_name(file->ht); } + int external_lock(THD *thd, int lock_type); + int extra(enum ha_extra_function operation); + /* For ALTER ONLINE TABLE */ + bool check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes); + void write_lock() { write_locked= 1;} + void unlock() { write_locked= 0; } + bool is_locked() { return write_locked; } + + /* Functions that are directly mapped to the underlying handler */ + int rnd_init(bool scan) + { return file->rnd_init(scan); } + /* + We need to have a lock here to protect engines like MyISAM from + simultaneous read and write. For sequence's this is not critical + as this function is used extremely seldom. + */ + int rnd_next(uchar *buf) + { + int error; + table->s->sequence->read_lock(table); + error= file->rnd_next(buf); + table->s->sequence->read_unlock(table); + return error; + } + int rnd_end() + { return file->rnd_end(); } + int rnd_pos(uchar *buf, uchar *pos) + { + int error; + table->s->sequence->read_lock(table); + error= file->rnd_pos(buf, pos); + table->s->sequence->read_unlock(table); + return error; + } + void position(const uchar *record) + { return file->position(record); } + const char *table_type() const + { return file->table_type(); } + ulong index_flags(uint inx, uint part, bool all_parts) const + { return file->index_flags(inx, part, all_parts); } + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) + { return file->store_lock(thd, to, lock_type); } + int close(void) + { return file->close(); } + const char **bas_ext() const + { return file->bas_ext(); } + int delete_table(const char*name) + { return file->delete_table(name); } + int rename_table(const char *from, const char *to) + { return file->rename_table(from, to); } + void unbind_psi() + { file->unbind_psi(); } + void rebind_psi() + { file->rebind_psi(); } + + bool auto_repair(int error) const + { return file->auto_repair(error); } + int repair(THD* thd, HA_CHECK_OPT* check_opt) + { return file->repair(thd, check_opt); } + bool check_and_repair(THD *thd) + { return file->check_and_repair(thd); } + bool is_crashed() const + { return file->is_crashed(); } + void column_bitmaps_signal() + { return file->column_bitmaps_signal(); } + + /* New methods */ + void register_original_handler(handler *file_arg) + { + file= file_arg; + init(); /* Update cached_table_flags */ + } +}; +#endif diff --git a/sql/handle_connections_win.cc b/sql/handle_connections_win.cc new file mode 100644 index 00000000..ffacfcab --- /dev/null +++ b/sql/handle_connections_win.cc @@ -0,0 +1,657 @@ +/* Copyright (c) 2018 MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + +/* Accepting connections on Windows */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* From mysqld.cc */ +extern HANDLE hEventShutdown; +extern Dynamic_array listen_sockets; +#ifdef HAVE_POOL_OF_THREADS +extern PTP_CALLBACK_ENVIRON get_threadpool_win_callback_environ(); +extern void tp_win_callback_prolog(); +#else +#define get_threadpool_win_callback_environ() 0 +#define tp_win_callback_prolog() do{}while(0) +#endif +static SECURITY_ATTRIBUTES pipe_security; + +/** + Abstract base class for accepting new connection, + asynchronously (i.e the accept() operation can be posted, + and result is retrieved later) , and creating a new connection. +*/ + +struct Listener +{ + /** Windows handle of the Listener. + Subclasses would use SOCKET or named pipe handle + */ + HANDLE m_handle; + /** Required for all async IO*/ + OVERLAPPED m_overlapped; + + /** Create new listener + @param handle - @see m_handle + @param wait_handle - usually, event handle or INVALID_HANDLE_VALUE + @see wait_handle + */ + Listener(HANDLE handle, HANDLE wait_handle): + m_handle(handle), m_overlapped() + { + m_overlapped.hEvent= wait_handle; + } + + /** + if not NULL, this handle can be be used in WaitForSingle/MultipleObject(s). + This handle will be closed when object is destroyed. + + If NULL, the completion notification happens in threadpool. + */ + HANDLE wait_handle() + { + return m_overlapped.hEvent; + } + + /* Start waiting for new client connection. */ + virtual void begin_accept()= 0; + + /** + Completion callback,called whenever IO posted by begin_accept is finisjed + Listener needs to create a new THD then (or, call scheduler so it creates one) + + @param success - whether IO completed successfull + */ + virtual void completion_callback(bool success)= 0; + + /** + Completion callback for Listener, that uses events for waiting + to IO. Not suitable for threadpool etc. Retrieves the status of + completed IO from the OVERLAPPED structure + */ + void completion_callback() + { + DBUG_ASSERT(wait_handle() && (wait_handle() != INVALID_HANDLE_VALUE)); + DWORD bytes; + return completion_callback( + GetOverlappedResult(wait_handle(), &m_overlapped, &bytes, FALSE)); + } + + /** Cancel an in-progress IO. Useful for threadpool-bound IO */ + void cancel() + { + CancelIoEx(m_handle, &m_overlapped); + } + + /* Destructor. Closes wait handle, if it was passed in constructor */ + virtual ~Listener() + { + if (m_overlapped.hEvent) + CloseHandle(m_overlapped.hEvent); + }; +}; + +/* Winsock extension finctions. */ +static LPFN_ACCEPTEX my_AcceptEx; +static LPFN_GETACCEPTEXSOCKADDRS my_GetAcceptExSockaddrs; + +/** + Listener that handles socket connections. + Can be threadpool-bound (i.e the completion is executed in threadpool thread), + or use events for waits. + + Threadpool-bound listener should be used with theradpool scheduler, for better + performance. +*/ +struct Socket_Listener: public Listener +{ + /** Client socket passed to AcceptEx() call.*/ + SOCKET m_client_socket; + + /** Listening socket. */ + MYSQL_SOCKET m_listen_socket; + + /** Buffer for sockaddrs passed to AcceptEx()/GetAcceptExSockaddrs() */ + char m_buffer[2 * sizeof(sockaddr_storage) + 32]; + + /* Threadpool IO struct.*/ + PTP_IO m_tp_io; + + /** + Callback for Windows threadpool's StartThreadpoolIo() function. + */ + static void CALLBACK tp_accept_completion_callback( + PTP_CALLBACK_INSTANCE, PVOID context, PVOID , ULONG io_result, + ULONG_PTR, PTP_IO io) + { + tp_win_callback_prolog(); + Listener *listener= (Listener *)context; + + if (io_result == ERROR_OPERATION_ABORTED) + { + /* ERROR_OPERATION_ABORTED caused by CancelIoEx()*/ + CloseThreadpoolIo(io); + delete listener; + return; + } + listener->completion_callback(io_result == 0); + } + + /** + Constructor + @param listen_socket - listening socket + @PTP_CALLBACK_ENVIRON callback_environ - threadpool environment, or NULL + if threadpool is not used for completion callbacks. + */ + Socket_Listener(MYSQL_SOCKET listen_socket, PTP_CALLBACK_ENVIRON callback_environ) : + Listener((HANDLE)listen_socket.fd,0), + m_client_socket(INVALID_SOCKET), + m_listen_socket(listen_socket) + { + if (callback_environ) + { + /* Accept executed in threadpool. */ + m_tp_io= CreateThreadpoolIo(m_handle, + tp_accept_completion_callback, this, callback_environ); + } + else + { + /* Completion signaled via event. */ + m_tp_io= 0; + m_overlapped.hEvent= CreateEvent(0, FALSE , FALSE, 0); + } + } + + /* + Use AcceptEx to asynchronously wait for new connection; + */ + void begin_accept() + { +retry : + m_client_socket= socket(m_listen_socket.address_family, SOCK_STREAM, + IPPROTO_TCP); + if (m_client_socket == INVALID_SOCKET) + { + sql_perror("socket() call failed."); + unireg_abort(1); + } + + DWORD bytes_received; + if (m_tp_io) + StartThreadpoolIo(m_tp_io); + + BOOL ret= my_AcceptEx( + (SOCKET)m_handle, + m_client_socket, + m_buffer, + 0, + sizeof(sockaddr_storage) + 16, + sizeof(sockaddr_storage) + 16, + &bytes_received, + &m_overlapped); + + DWORD last_error= ret? 0: WSAGetLastError(); + if (last_error == WSAECONNRESET || last_error == ERROR_NETNAME_DELETED) + { + if (m_tp_io) + CancelThreadpoolIo(m_tp_io); + closesocket(m_client_socket); + goto retry; + } + + if (ret || last_error == ERROR_IO_PENDING || abort_loop) + return; + + sql_print_error("my_AcceptEx failed, last error %u", last_error); + abort(); + } + + /* Create new socket connection.*/ + void completion_callback(bool success) + { + if (!success) + { + /* my_AcceptEx() returned error */ + closesocket(m_client_socket); + begin_accept(); + return; + } + + MYSQL_SOCKET s_client{m_client_socket}; + +#ifdef HAVE_PSI_SOCKET_INTERFACE + /* Parse socket addresses buffer filled by AcceptEx(), + only needed for PSI instrumentation. */ + sockaddr *local_addr, *remote_addr; + int local_addr_len, remote_addr_len; + + my_GetAcceptExSockaddrs(m_buffer, + 0, sizeof(sockaddr_storage) + 16, sizeof(sockaddr_storage) + 16, + &local_addr, &local_addr_len, &remote_addr, &remote_addr_len); + + s_client.m_psi= PSI_SOCKET_CALL(init_socket) + (key_socket_client_connection, (const my_socket*)&m_listen_socket.fd, + remote_addr, remote_addr_len); +#endif + + /* Start accepting new connection. After this point, do not use + any member data, they could be used by a different (threadpool) thread. */ + begin_accept(); + + /* Some chores post-AcceptEx() that we need to create a normal socket.*/ + if (setsockopt(s_client.fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, + (char *)&m_listen_socket.fd, sizeof(m_listen_socket.fd))) + { + if (!abort_loop) + { + sql_perror("setsockopt(SO_UPDATE_ACCEPT_CONTEXT) failed."); + abort(); + } + } + + /* Create a new connection.*/ + handle_accepted_socket(s_client, m_listen_socket); + } + + ~Socket_Listener() + { + if (m_client_socket != INVALID_SOCKET) + closesocket(m_client_socket); + } + + /* + Retrieve the pointer to the Winsock extension functions + AcceptEx and GetAcceptExSockaddrs. + */ + static void init_winsock_extensions() + { + if (listen_sockets.size() == 0) { + /* --skip-networking was used*/ + return; + } + + SOCKET s= mysql_socket_getfd(listen_sockets.at(0)); + GUID guid_AcceptEx= WSAID_ACCEPTEX; + GUID guid_GetAcceptExSockaddrs= WSAID_GETACCEPTEXSOCKADDRS; + + GUID *guids[]= { &guid_AcceptEx, &guid_GetAcceptExSockaddrs }; + void *funcs[]= { &my_AcceptEx, &my_GetAcceptExSockaddrs }; + DWORD bytes; + for (int i= 0; i < array_elements(guids); i++) + { + if (WSAIoctl(s, + SIO_GET_EXTENSION_FUNCTION_POINTER, + guids[i], sizeof(GUID), + funcs[i], sizeof(void *), + &bytes, 0, 0) == -1) + { + sql_print_error("WSAIoctl(SIO_GET_EXTENSION_FUNCTION_POINTER) failed"); + unireg_abort(1); + } + } + } +}; + +/* + Create a security descriptor for pipe. + - Use low integrity level, so that it is possible to connect + from any process. + - Give current user read/write access to pipe. + - Give Everyone read/write access to pipe minus FILE_CREATE_PIPE_INSTANCE +*/ +static void init_pipe_security_descriptor() +{ +#define SDDL_FMT "S:(ML;; NW;;; LW) D:(A;; 0x%08x;;; WD)(A;; FRFW;;; %s)" +#define EVERYONE_PIPE_ACCESS_MASK \ + (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES | READ_CONTROL | \ + SYNCHRONIZE | FILE_WRITE_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES) + +#ifndef SECURITY_MAX_SID_STRING_CHARACTERS +/* Old SDK does not have this constant */ +#define SECURITY_MAX_SID_STRING_CHARACTERS 187 +#endif + + /* + Figure out SID of the user that runs the server, then create SDDL string + for pipe permissions, and convert it to the security descriptor. + */ + char sddl_string[sizeof(SDDL_FMT) + 8 + SECURITY_MAX_SID_STRING_CHARACTERS]; + struct + { + TOKEN_USER token_user; + BYTE buffer[SECURITY_MAX_SID_SIZE]; + } token_buffer; + HANDLE token; + DWORD tmp; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token)) + goto fail; + + if (!GetTokenInformation(token, TokenUser, &token_buffer, + (DWORD) sizeof(token_buffer), &tmp)) + goto fail; + + CloseHandle(token); + + char *current_user_string_sid; + if (!ConvertSidToStringSid(token_buffer.token_user.User.Sid, + ¤t_user_string_sid)) + goto fail; + + snprintf(sddl_string, sizeof(sddl_string), SDDL_FMT, + (unsigned int)EVERYONE_PIPE_ACCESS_MASK, + current_user_string_sid); + LocalFree(current_user_string_sid); + + if (ConvertStringSecurityDescriptorToSecurityDescriptor(sddl_string, + SDDL_REVISION_1, &pipe_security.lpSecurityDescriptor, 0)) + return; + +fail: + sql_perror("Can't start server : Initialize security descriptor"); + unireg_abort(1); +} + +/** + Pipe Listener. + Only event notification mode is implemented, no threadpool +*/ +struct Pipe_Listener : public Listener +{ + PTP_CALLBACK_ENVIRON m_tp_env; + Pipe_Listener(): + Listener(create_named_pipe(), CreateEvent(0, FALSE, FALSE, 0)), + m_tp_env(get_threadpool_win_callback_environ()) + { + } + + /* + Creates local named pipe instance \\.\pipe\$socket for named pipe connection. + */ + static HANDLE create_named_pipe() + { + static bool first_instance= true; + static char pipe_name[512]; + DWORD open_mode= PIPE_ACCESS_DUPLEX | + FILE_FLAG_OVERLAPPED; + + if (first_instance) + { + snprintf(pipe_name, sizeof(pipe_name), "\\\\.\\pipe\\%s", mysqld_unix_port); + open_mode |= FILE_FLAG_FIRST_PIPE_INSTANCE; + init_pipe_security_descriptor(); + pipe_security.nLength= sizeof(SECURITY_ATTRIBUTES); + pipe_security.bInheritHandle= FALSE; + } + HANDLE pipe_handle= CreateNamedPipe(pipe_name, + open_mode, + PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, + PIPE_UNLIMITED_INSTANCES, + (int)global_system_variables.net_buffer_length, + (int)global_system_variables.net_buffer_length, + NMPWAIT_USE_DEFAULT_WAIT, + &pipe_security); + if (pipe_handle == INVALID_HANDLE_VALUE) + { + sql_perror("Create named pipe failed"); + sql_print_error("Aborting"); + unireg_abort(1); + } + first_instance= false; + return pipe_handle; + } + + static void create_pipe_connection(HANDLE pipe) + { + if (auto connect= new CONNECT(pipe)) + create_new_thread(connect); + else + { + CloseHandle(pipe); + statistic_increment(aborted_connects, &LOCK_status); + statistic_increment(connection_errors_internal, &LOCK_status); + } + } + + /* Threadpool callback.*/ + static void CALLBACK tp_create_pipe_connection( + PTP_CALLBACK_INSTANCE,void *Context) + { + tp_win_callback_prolog(); + create_pipe_connection(Context); + } + + void begin_accept() + { + BOOL connected= ConnectNamedPipe(m_handle, &m_overlapped); + if (connected) + { + /* Overlapped ConnectNamedPipe should return zero. */ + sql_perror("Overlapped ConnectNamedPipe() already connected."); + abort(); + } + DWORD last_error= GetLastError(); + switch (last_error) + { + case ERROR_PIPE_CONNECTED: + /* Client is already connected, so signal an event.*/ + { + /* + Cleanup overlapped (so that subsequent GetOverlappedResult() + does not show results of previous IO + */ + HANDLE e= m_overlapped.hEvent; + memset(&m_overlapped, 0, sizeof(m_overlapped)); + m_overlapped.hEvent = e; + } + if (!SetEvent(m_overlapped.hEvent)) + { + sql_perror("SetEvent() failed for connected pipe."); + abort(); + } + break; + case ERROR_IO_PENDING: + break; + default: + sql_perror("ConnectNamedPipe() failed."); + abort(); + break; + } + } + + void completion_callback(bool success) + { + if (!success) + { +#ifdef DBUG_OFF + sql_print_warning("ConnectNamedPipe completed with %u", GetLastError()); +#endif + CloseHandle(m_handle); + m_handle= create_named_pipe(); + begin_accept(); + return; + } + HANDLE pipe= m_handle; + m_handle= create_named_pipe(); + begin_accept(); + // If threadpool is on, create connection in threadpool thread + if (!m_tp_env || !TrySubmitThreadpoolCallback(tp_create_pipe_connection, pipe, m_tp_env)) + create_pipe_connection(pipe); + } + + ~Pipe_Listener() + { + if (m_handle != INVALID_HANDLE_VALUE) + { + CloseHandle(m_handle); + } + } + + static void cleanup() + { + LocalFree(pipe_security.lpSecurityDescriptor); + } +}; + + /* The shutdown event, which is set whenever*/ +static void create_shutdown_event() +{ + char shutdown_event_name[40]; + sprintf_s(shutdown_event_name, "MySQLShutdown%u", GetCurrentProcessId()); + if (!(hEventShutdown= CreateEvent(0, FALSE, FALSE, shutdown_event_name))) + { + sql_print_error("Can't create shutdown event, Windows error %u", GetLastError()); + unireg_abort(1); + } +} + +/** + Accept new client connections on Windows. + + Since we deal with pipe and sockets, they cannot be put into a select/loop. + But we can use asynchronous IO, and WaitForMultipleObject() loop. + + In addition, for slightly better performance, if we're using threadpool, + socket connections are accepted directly in the threadpool. + + The mode of operation is therefore + + 1. There is WaitForMultipleObject() loop that waits for shutdown notification + (hEventShutdown),and possibly pipes and sockets(e.g if threadpool is not used) + This loop ends when shutdown notification is detected. + + 2. If threadpool is used, new socket connections are accepted there. +*/ + + +#define NUM_PIPE_LISTENERS 24 +#define SHUTDOWN_IDX 0 +#define LISTENER_START_IDX 1 + +static std::vector all_listeners; +static std::vector wait_events; + +void network_init_win() +{ + Socket_Listener::init_winsock_extensions(); + + /* Listen for TCP connections on "extra-port" (no threadpool).*/ + for (uint i= 0 ; i < listen_sockets.elements() ; i++) + { + MYSQL_SOCKET *sock= listen_sockets.get_pos(i); + if (sock->is_extra_port) + all_listeners.push_back(new Socket_Listener(*sock, 0)); + } + + /* Listen for named pipe connections */ + if (mysqld_unix_port[0] && !opt_bootstrap && opt_enable_named_pipe) + { + /* + Use several listeners for pipe, to reduce ERROR_PIPE_BUSY on client side. + */ + for (int i= 0; i < NUM_PIPE_LISTENERS; i++) + all_listeners.push_back(new Pipe_Listener()); + } + + for (uint i= 0 ; i < listen_sockets.elements() ; i++) + { + MYSQL_SOCKET *sock= listen_sockets.get_pos(i); + if (sock->is_extra_port) + continue; + /* Wait for TCP connections.*/ + SetFileCompletionNotificationModes((HANDLE) sock->fd, + FILE_SKIP_SET_EVENT_ON_HANDLE); + all_listeners.push_back( + new Socket_Listener(*sock, get_threadpool_win_callback_environ())); + } + + if (all_listeners.size() == 0 && !opt_bootstrap) + { + sql_print_error("Either TCP connections or named pipe connections must be enabled."); + unireg_abort(1); + } +} + +void handle_connections_win() +{ + int n_waits; + + create_shutdown_event(); + wait_events.push_back(hEventShutdown); + n_waits= 1; + + for (size_t i= 0; i < all_listeners.size(); i++) + { + HANDLE wait_handle= all_listeners[i]->wait_handle(); + if (wait_handle) + { + DBUG_ASSERT((i == 0) || (all_listeners[i - 1]->wait_handle() != 0)); + wait_events.push_back(wait_handle); + } + all_listeners[i]->begin_accept(); + } + + mysqld_win_set_startup_complete(); + + // WaitForMultipleObjects can't wait on more than MAXIMUM_WAIT_OBJECTS + // handles simultaneously. Since MAXIMUM_WAIT_OBJECTS is only 64, there is + // a theoretical possiblity of exceeding that limit on installations where + // host name resolves to a lot of addresses. + if (wait_events.size() > MAXIMUM_WAIT_OBJECTS) + { + sql_print_warning( + "Too many wait events (%lu). Some connection listeners won't be handled. " + "Try to switch \"thread-handling\" to \"pool-of-threads\" and/or disable " + "\"extra-port\".", static_cast(wait_events.size())); + wait_events.resize(MAXIMUM_WAIT_OBJECTS); + } + + for (;;) + { + DBUG_ASSERT(wait_events.size() <= MAXIMUM_WAIT_OBJECTS); + DWORD idx = WaitForMultipleObjects((DWORD)wait_events.size(), + wait_events.data(), FALSE, INFINITE); + DBUG_ASSERT((int)idx >= 0 && (int)idx < (int)wait_events.size()); + + if (idx == SHUTDOWN_IDX) + break; + + all_listeners[idx - LISTENER_START_IDX]->completion_callback(); + } + + mysqld_win_initiate_shutdown(); + + /* Cleanup */ + for (size_t i= 0; i < all_listeners.size(); i++) + { + Listener *listener= all_listeners[i]; + if (listener->wait_handle()) + delete listener; + else + // Threadpool-bound listener will be deleted in threadpool + // Do not call destructor, because callback maybe running. + listener->cancel(); + } + Pipe_Listener::cleanup(); +} diff --git a/sql/handle_connections_win.h b/sql/handle_connections_win.h new file mode 100644 index 00000000..bf66c081 --- /dev/null +++ b/sql/handle_connections_win.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2018 MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + +/** + Handles incoming socket and pipe connections, on Windows. + Creates new (THD) connections.. +*/ +extern void handle_connections_win(); +extern void network_init_win(); diff --git a/sql/handler.cc b/sql/handler.cc new file mode 100644 index 00000000..230bcf5e --- /dev/null +++ b/sql/handler.cc @@ -0,0 +1,8789 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** @file handler.cc + + @brief + Handler-calling-functions +*/ + +#include "mariadb.h" +#include +#include "sql_priv.h" +#include "unireg.h" +#include "rpl_rli.h" +#include "sql_cache.h" // query_cache, query_cache_* +#include "sql_connect.h" // global_table_stats +#include "key.h" // key_copy, key_unpack, key_cmp_if_same, key_cmp +#include "sql_table.h" // build_table_filename +#include "sql_parse.h" // check_stack_overrun +#include "sql_base.h" // TDC_element +#include "discover.h" // extension_based_table_discovery, etc +#include "log_event.h" // *_rows_log_event +#include "create_options.h" +#include +#include "transaction.h" +#include "myisam.h" +#include "probes_mysql.h" +#include +#include +#include +#include "debug_sync.h" // DEBUG_SYNC +#include "sql_audit.h" +#include "ha_sequence.h" +#include "rowid_filter.h" +#include "mysys_err.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" +#endif + +#ifdef WITH_ARIA_STORAGE_ENGINE +#include "../storage/maria/ha_maria.h" +#endif +#include "semisync_master.h" + +#include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_binlog.h" +#include "wsrep_xid.h" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" /* wsrep transaction hooks */ +#include "wsrep_var.h" /* wsrep_hton_check() */ +#endif /* WITH_WSREP */ + +/** + @def MYSQL_TABLE_LOCK_WAIT + Instrumentation helper for table io_waits. + @param OP the table operation to be performed + @param FLAGS per table operation flags. + @param PAYLOAD the code to instrument. + @sa MYSQL_END_TABLE_WAIT. +*/ +#ifdef HAVE_PSI_TABLE_INTERFACE + #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \ + { \ + if (m_psi != NULL) \ + { \ + PSI_table_locker *locker; \ + PSI_table_locker_state state; \ + locker= PSI_TABLE_CALL(start_table_lock_wait) \ + (& state, m_psi, OP, FLAGS, \ + __FILE__, __LINE__); \ + PAYLOAD \ + if (locker != NULL) \ + PSI_TABLE_CALL(end_table_lock_wait)(locker); \ + } \ + else \ + { \ + PAYLOAD \ + } \ + } +#else + #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \ + PAYLOAD +#endif + + +/* + While we have legacy_db_type, we have this array to + check for dups and to find handlerton from legacy_db_type. + Remove when legacy_db_type is finally gone +*/ +st_plugin_int *hton2plugin[MAX_HA]; + +static handlerton *installed_htons[128]; + +#define BITMAP_STACKBUF_SIZE (128/8) + +KEY_CREATE_INFO default_key_create_info= +{ HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, false }; + +/* number of entries in handlertons[] */ +ulong total_ha= 0; +/* number of storage engines (from handlertons[]) that support 2pc */ +ulong total_ha_2pc= 0; +#ifdef DBUG_ASSERT_EXISTS +/* + Number of non-mandatory 2pc handlertons whose initialization failed + to estimate total_ha_2pc value under supposition of the failures + have not occcured. +*/ +ulong failed_ha_2pc= 0; +#endif +/* size of savepoint storage area (see ha_init) */ +ulong savepoint_alloc_size= 0; + +static const LEX_CSTRING sys_table_aliases[]= +{ + { STRING_WITH_LEN("INNOBASE") }, { STRING_WITH_LEN("INNODB") }, + { STRING_WITH_LEN("HEAP") }, { STRING_WITH_LEN("MEMORY") }, + { STRING_WITH_LEN("MERGE") }, { STRING_WITH_LEN("MRG_MYISAM") }, + { STRING_WITH_LEN("Maria") }, { STRING_WITH_LEN("Aria") }, + {NullS, 0} +}; + +const LEX_CSTRING ha_row_type[]= +{ + { STRING_WITH_LEN("") }, + { STRING_WITH_LEN("FIXED") }, + { STRING_WITH_LEN("DYNAMIC") }, + { STRING_WITH_LEN("COMPRESSED") }, + { STRING_WITH_LEN("REDUNDANT") }, + { STRING_WITH_LEN("COMPACT") }, + { STRING_WITH_LEN("PAGE") } +}; + +const char *tx_isolation_names[]= +{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE", + NullS}; +TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"", + tx_isolation_names, NULL}; + +static TYPELIB known_extensions= {0,"known_exts", NULL, NULL}; +uint known_extensions_id= 0; + + +class Table_exists_error_handler : public Internal_error_handler +{ +public: + Table_exists_error_handler() + : m_handled_errors(0), m_unhandled_errors(0) + {} + + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + *cond_hdl= NULL; + if (non_existing_table_error(sql_errno)) + { + m_handled_errors++; + return TRUE; + } + + if (*level == Sql_condition::WARN_LEVEL_ERROR) + m_unhandled_errors++; + return FALSE; + } + + bool safely_trapped_errors() + { + return ((m_handled_errors > 0) && (m_unhandled_errors == 0)); + } + +private: + int m_handled_errors; + int m_unhandled_errors; +}; + + +static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, + bool is_real_trans); + + +static plugin_ref ha_default_plugin(THD *thd) +{ + if (thd->variables.table_plugin) + return thd->variables.table_plugin; + return my_plugin_lock(thd, global_system_variables.table_plugin); +} + +static plugin_ref ha_default_tmp_plugin(THD *thd) +{ + if (thd->variables.tmp_table_plugin) + return thd->variables.tmp_table_plugin; + if (global_system_variables.tmp_table_plugin) + return my_plugin_lock(thd, global_system_variables.tmp_table_plugin); + return ha_default_plugin(thd); +} + + +/** @brief + Return the default storage engine handlerton for thread + + SYNOPSIS + ha_default_handlerton(thd) + thd current thread + + RETURN + pointer to handlerton +*/ +handlerton *ha_default_handlerton(THD *thd) +{ + plugin_ref plugin= ha_default_plugin(thd); + DBUG_ASSERT(plugin); + handlerton *hton= plugin_hton(plugin); + DBUG_ASSERT(hton); + return hton; +} + + +handlerton *ha_default_tmp_handlerton(THD *thd) +{ + plugin_ref plugin= ha_default_tmp_plugin(thd); + DBUG_ASSERT(plugin); + handlerton *hton= plugin_hton(plugin); + DBUG_ASSERT(hton); + return hton; +} + + +/** @brief + Return the storage engine handlerton for the supplied name + + SYNOPSIS + ha_resolve_by_name(thd, name) + thd current thread + name name of storage engine + + RETURN + pointer to storage engine plugin handle +*/ +plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name, + bool tmp_table) +{ + const LEX_CSTRING *table_alias; + plugin_ref plugin; + +redo: + if (thd && !my_charset_latin1.strnncoll( + (const uchar *)name->str, name->length, + (const uchar *)STRING_WITH_LEN("DEFAULT"), 0)) + return tmp_table ? ha_default_tmp_plugin(thd) : ha_default_plugin(thd); + + if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN))) + { + handlerton *hton= plugin_hton(plugin); + if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE)) + return plugin; + + /* + unlocking plugin immediately after locking is relatively low cost. + */ + plugin_unlock(thd, plugin); + } + + /* + We check for the historical aliases. + */ + for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2) + { + if (!my_charset_latin1.strnncoll( + (const uchar *)name->str, name->length, + (const uchar *)table_alias->str, table_alias->length)) + { + name= table_alias + 1; + goto redo; + } + } + + return NULL; +} + + +bool +Storage_engine_name::resolve_storage_engine_with_error(THD *thd, + handlerton **ha, + bool tmp_table) +{ + if (plugin_ref plugin= ha_resolve_by_name(thd, &m_storage_engine_name, + tmp_table)) + { + *ha= plugin_hton(plugin); + return false; + } + + *ha= NULL; + if (thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), m_storage_engine_name.str); + return true; + } + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_STORAGE_ENGINE, + ER_THD(thd, ER_UNKNOWN_STORAGE_ENGINE), + m_storage_engine_name.str); + return false; +} + + +plugin_ref ha_lock_engine(THD *thd, const handlerton *hton) +{ + if (hton) + { + st_plugin_int *plugin= hton2plugin[hton->slot]; + return my_plugin_lock(thd, plugin_int_to_ref(plugin)); + } + return NULL; +} + + +handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type) +{ + plugin_ref plugin; + switch (db_type) { + case DB_TYPE_DEFAULT: + return ha_default_handlerton(thd); + default: + if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT && + (plugin= ha_lock_engine(thd, installed_htons[db_type]))) + return plugin_hton(plugin); + /* fall through */ + case DB_TYPE_UNKNOWN: + return NULL; + } +} + + +/** + Use other database handler if databasehandler is not compiled in. +*/ +handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute) +{ + if (ha_storage_engine_is_enabled(hton)) + return hton; + + if (no_substitute) + return NULL; + + return ha_default_handlerton(thd); +} /* ha_checktype */ + + +handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, + handlerton *db_type) +{ + handler *file; + DBUG_ENTER("get_new_handler"); + DBUG_PRINT("enter", ("alloc: %p", alloc)); + + if (ha_storage_engine_is_enabled(db_type)) + { + if ((file= db_type->create(db_type, share, alloc))) + file->init(); + DBUG_RETURN(file); + } + /* + Try the default table type + Here the call to current_thd() is ok as we call this function a lot of + times but we enter this branch very seldom. + */ + file= get_new_handler(share, alloc, ha_default_handlerton(current_thd)); + DBUG_RETURN(file); +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +handler *get_ha_partition(partition_info *part_info) +{ + ha_partition *partition; + DBUG_ENTER("get_ha_partition"); + if ((partition= new ha_partition(partition_hton, part_info))) + { + if (partition->initialize_partition(current_thd->mem_root)) + { + delete partition; + partition= 0; + } + else + partition->init(); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), + static_cast(sizeof(ha_partition))); + } + DBUG_RETURN(((handler*) partition)); +} +#endif + +static const char **handler_errmsgs; + +C_MODE_START +static const char **get_handler_errmsgs(int nr) +{ + return handler_errmsgs; +} +C_MODE_END + + +/** + Register handler error messages for use with my_error(). + + @retval + 0 OK + @retval + !=0 Error +*/ + +int ha_init_errors(void) +{ +#define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg) + + /* Allocate a pointer array for the error message strings. */ + /* Zerofill it to avoid uninitialized gaps. */ + if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs, + HA_ERR_ERRORS * sizeof(char*), + MYF(MY_WME | MY_ZEROFILL)))) + return 1; + + /* Set the dedicated error messages. */ + SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND)); + SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY)); + SETMSG(HA_ERR_RECORD_CHANGED, "Update which is recoverable"); + SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function"); + SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE)); + SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE)); + SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory"); + SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'"); + SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported"); + SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE)); + SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update"); + SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted"); + SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL)); + SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'"); + SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last"); + SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA)); + SETMSG(HA_ERR_TO_BIG_ROW, "Too big row"); + SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option"); + SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE)); + SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset"); + SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE)); + SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR)); + SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE)); + SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT)); + SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL)); + SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION)); + SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK)); + SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN)); + SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2)); + SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2)); + SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name"); + SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size"); + SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'"); + SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR)); + SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine"); + SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED)); + SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key"); + SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE)); + SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY)); + SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED)); + SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE)); + SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS)); + SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG)); + SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT)); + SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID"); + SETMSG(HA_ERR_DISK_FULL, ER_DEFAULT(ER_DISK_FULL)); + SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search"); + SETMSG(HA_ERR_FK_DEPTH_EXCEEDED, "Foreign key cascade delete/update exceeds"); + SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING)); + + /* Register the error messages for use with my_error(). */ + return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST); +} + + +/** + Unregister handler error messages. + + @retval + 0 OK + @retval + !=0 Error +*/ +static int ha_finish_errors(void) +{ + /* Allocate a pointer array for the error message strings. */ + my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST); + my_free(handler_errmsgs); + handler_errmsgs= 0; + return 0; +} + +static Atomic_counter need_full_discover_for_existence(0); +static Atomic_counter engines_with_discover_file_names(0); +static Atomic_counter engines_with_discover(0); + +static int full_discover_for_existence(handlerton *, const char *, const char *) +{ return 0; } + +static int ext_based_existence(handlerton *, const char *, const char *) +{ return 0; } + +static int hton_ext_based_table_discovery(handlerton *hton, LEX_CSTRING *db, + MY_DIR *dir, handlerton::discovered_list *result) +{ + /* + tablefile_extensions[0] is the metadata file, see + the comment above tablefile_extensions declaration + */ + return extension_based_table_discovery(dir, hton->tablefile_extensions[0], + result); +} + +static void update_discovery_counters(handlerton *hton, int val) +{ + if (hton->discover_table_existence == full_discover_for_existence) + need_full_discover_for_existence+= val; + + if (hton->discover_table_names && hton->tablefile_extensions[0]) + engines_with_discover_file_names+= val; + + if (hton->discover_table) + engines_with_discover+= val; +} + +int ha_drop_table(THD *thd, handlerton *hton, const char *path) +{ + if (ha_check_if_updates_are_ignored(thd, hton, "DROP")) + return 0; // Simulate dropped + return hton->drop_table(hton, path); +} + +static int hton_drop_table(handlerton *hton, const char *path) +{ + char tmp_path[FN_REFLEN]; + handler *file= get_new_handler(nullptr, current_thd->mem_root, hton); + if (!file) + { + /* + If file is not defined it means that the engine can't create a + handler if share is not set or we got an out of memory error + */ + return my_errno == ENOMEM ? ENOMEM : ENOENT; + } + path= get_canonical_filename(file, path, tmp_path); + int error= file->delete_table(path); + delete file; + return error; +} + + +int ha_finalize_handlerton(st_plugin_int *plugin) +{ + int deinit_status= 0; + handlerton *hton= (handlerton *)plugin->data; + DBUG_ENTER("ha_finalize_handlerton"); + + /* hton can be NULL here, if ha_initialize_handlerton() failed. */ + if (!hton) + goto end; + + if (installed_htons[hton->db_type] == hton) + installed_htons[hton->db_type]= NULL; + + if (hton->panic) + hton->panic(hton, HA_PANIC_CLOSE); + + if (plugin->plugin->deinit) + deinit_status= plugin->plugin->deinit(NULL); + + free_sysvar_table_options(hton); + update_discovery_counters(hton, -1); + + /* + In case a plugin is uninstalled and re-installed later, it should + reuse an array slot. Otherwise the number of uninstall/install + cycles would be limited. + */ + if (hton->slot != HA_SLOT_UNDEF) + { + /* Make sure we are not unpluging another plugin */ + DBUG_ASSERT(hton2plugin[hton->slot] == plugin); + DBUG_ASSERT(hton->slot < MAX_HA); + hton2plugin[hton->slot]= NULL; + } + + my_free(hton); + + end: + DBUG_RETURN(deinit_status); +} + + +const char *hton_no_exts[]= { 0 }; + + +int ha_initialize_handlerton(st_plugin_int *plugin) +{ + handlerton *hton; + int ret= 0; + DBUG_ENTER("ha_initialize_handlerton"); + DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str)); + + hton= (handlerton *)my_malloc(key_memory_handlerton, sizeof(handlerton), + MYF(MY_WME | MY_ZEROFILL)); + if (hton == NULL) + { + sql_print_error("Unable to allocate memory for plugin '%s' handlerton.", + plugin->name.str); + ret= 1; + goto err_no_hton_memory; + } + + hton->tablefile_extensions= hton_no_exts; + hton->discover_table_names= hton_ext_based_table_discovery; + hton->drop_table= hton_drop_table; + + hton->slot= HA_SLOT_UNDEF; + /* Historical Requirement */ + plugin->data= hton; // shortcut for the future + /* [remove after merge] notes on merge conflict (MDEV-31400): + 10.6-10.11: 13ba00ff4933cfc1712676f323587504e453d1b5 + 11.0-11.2: 42f8be10f18163c4025710cf6a212e82bddb2f62 + The 10.11->11.0 conflict is trivial, but the reference commit also + contains different non-conflict changes needs to be applied to 11.0 + (and beyond). + */ + if (plugin->plugin->init && (ret= plugin->plugin->init(hton))) + goto err; + + // hton_ext_based_table_discovery() works only when discovery + // is supported and the engine if file-based. + if (hton->discover_table_names == hton_ext_based_table_discovery && + (!hton->discover_table || !hton->tablefile_extensions[0])) + hton->discover_table_names= NULL; + + // default discover_table_existence implementation + if (!hton->discover_table_existence && hton->discover_table) + { + if (hton->tablefile_extensions[0]) + hton->discover_table_existence= ext_based_existence; + else + hton->discover_table_existence= full_discover_for_existence; + } + + uint tmp; + ulong fslot; + + DBUG_EXECUTE_IF("unstable_db_type", { + static int i= (int) DB_TYPE_FIRST_DYNAMIC; + hton->db_type= (enum legacy_db_type)++i; + }); + + /* now check the db_type for conflict */ + if (hton->db_type <= DB_TYPE_UNKNOWN || + hton->db_type >= DB_TYPE_DEFAULT || + installed_htons[hton->db_type]) + { + int idx= (int) DB_TYPE_FIRST_DYNAMIC; + + while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx]) + idx++; + + if (idx == (int) DB_TYPE_DEFAULT) + { + sql_print_warning("Too many storage engines!"); + ret= 1; + goto err_deinit; + } + if (hton->db_type != DB_TYPE_UNKNOWN) + sql_print_warning("Storage engine '%s' has conflicting typecode. " + "Assigning value %d.", plugin->plugin->name, idx); + hton->db_type= (enum legacy_db_type) idx; + } + + /* + In case a plugin is uninstalled and re-installed later, it should + reuse an array slot. Otherwise the number of uninstall/install + cycles would be limited. So look for a free slot. + */ + DBUG_PRINT("plugin", ("total_ha: %lu", total_ha)); + for (fslot= 0; fslot < total_ha; fslot++) + { + if (!hton2plugin[fslot]) + break; + } + if (fslot < total_ha) + hton->slot= fslot; + else + { + if (total_ha >= MAX_HA) + { + sql_print_error("Too many plugins loaded. Limit is %lu. " + "Failed on '%s'", (ulong) MAX_HA, plugin->name.str); + ret= 1; + goto err_deinit; + } + hton->slot= total_ha++; + } + installed_htons[hton->db_type]= hton; + tmp= hton->savepoint_offset; + hton->savepoint_offset= savepoint_alloc_size; + savepoint_alloc_size+= tmp; + hton2plugin[hton->slot]=plugin; + if (hton->prepare) + { + total_ha_2pc++; + if (tc_log && tc_log != get_tc_log_implementation()) + { + total_ha_2pc--; + hton->prepare= 0; + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "Cannot enable tc-log at run-time. " + "XA features of %s are disabled", + plugin->name.str); + } + } + + /* + This is entirely for legacy. We will create a new "disk based" hton and a + "memory" hton which will be configurable longterm. We should be able to + remove partition. + */ + switch (hton->db_type) { + case DB_TYPE_HEAP: + heap_hton= hton; + break; + case DB_TYPE_MYISAM: + myisam_hton= hton; + break; + case DB_TYPE_PARTITION_DB: + partition_hton= hton; + break; + case DB_TYPE_SEQUENCE: + sql_sequence_hton= hton; + break; + default: + break; + }; + + resolve_sysvar_table_options(hton); + update_discovery_counters(hton, 1); + + DBUG_RETURN(ret); + +err_deinit: + /* + Let plugin do its inner deinitialization as plugin->init() + was successfully called before. + */ + if (plugin->plugin->deinit) + (void) plugin->plugin->deinit(NULL); + +err: +#ifdef DBUG_ASSERT_EXISTS + if (hton->prepare) + failed_ha_2pc++; +#endif + my_free(hton); +err_no_hton_memory: + plugin->data= NULL; + DBUG_RETURN(ret); +} + +int ha_init() +{ + int error= 0; + DBUG_ENTER("ha_init"); + + DBUG_ASSERT(total_ha < MAX_HA); + /* + Check if there is a transaction-capable storage engine besides the + binary log (which is considered a transaction-capable storage engine in + counting total_ha) + */ + opt_using_transactions= total_ha > (ulong) opt_bin_log; + savepoint_alloc_size+= sizeof(SAVEPOINT); + DBUG_RETURN(error); +} + +int ha_end() +{ + int error= 0; + DBUG_ENTER("ha_end"); + + /* + This should be eventually based on the graceful shutdown flag. + So if flag is equal to HA_PANIC_CLOSE, the deallocate + the errors. + */ + if (unlikely(ha_finish_errors())) + error= 1; + + DBUG_RETURN(error); +} + +static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin, + void *path) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->drop_database) + hton->drop_database(hton, (char *)path); + return FALSE; +} + + +void ha_drop_database(const char* path) +{ + plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, + (char*) path); +} + + +static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin, + void *disable) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->checkpoint_state) + hton->checkpoint_state(hton, (int) *(bool*) disable); + return FALSE; +} + + +void ha_checkpoint_state(bool disable) +{ + plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable); +} + + +struct st_commit_checkpoint_request { + void *cookie; + void (*pre_hook)(void *); +}; + +static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin, + void *data) +{ + st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data; + handlerton *hton= plugin_hton(plugin); + if (hton->commit_checkpoint_request) + { + void *cookie= st->cookie; + if (st->pre_hook) + (*st->pre_hook)(cookie); + (*hton->commit_checkpoint_request)(cookie); + } + return FALSE; +} + + +/* + Invoke commit_checkpoint_request() in all storage engines that implement it. + + If pre_hook is non-NULL, the hook will be called prior to each invocation. +*/ +void +ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *)) +{ + st_commit_checkpoint_request st; + st.cookie= cookie; + st.pre_hook= pre_hook; + plugin_foreach(NULL, commit_checkpoint_request_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &st); +} + + +/** + @note + don't bother to rollback here, it's done already + + there's no need to rollback here as all transactions must + be rolled back already +*/ +void ha_close_connection(THD* thd) +{ + for (auto i= 0; i < MAX_HA; i++) + { + if (plugin_ref plugin= thd->ha_data[i].lock) + { + thd->ha_data[i].lock= NULL; + handlerton *hton= plugin_hton(plugin); + if (hton->close_connection) + hton->close_connection(hton, thd); + thd_set_ha_data(thd, hton, 0); + plugin_unlock(NULL, plugin); + } + DBUG_ASSERT(!thd->ha_data[i].ha_ptr); + } +} + +static my_bool kill_handlerton(THD *thd, plugin_ref plugin, + void *level) +{ + handlerton *hton= plugin_hton(plugin); + + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + if (hton->kill_query && thd_get_ha_data(thd, hton)) + hton->kill_query(hton, thd, *(enum thd_kill_levels *) level); + return FALSE; +} + +void ha_kill_query(THD* thd, enum thd_kill_levels level) +{ + DBUG_ENTER("ha_kill_query"); + plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &level); + DBUG_VOID_RETURN; +} + + +static my_bool plugin_disable_internal_writes(THD *, plugin_ref plugin, + void *disable) +{ + if (void(*diw)(bool)= plugin_hton(plugin)->disable_internal_writes) + diw(*static_cast(disable)); + return FALSE; +} + + +void ha_disable_internal_writes(bool disable) +{ + plugin_foreach(NULL, plugin_disable_internal_writes, + MYSQL_STORAGE_ENGINE_PLUGIN, &disable); +} + + +static my_bool signal_ddl_recovery_done(THD *, plugin_ref plugin, void *) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->signal_ddl_recovery_done) + (hton->signal_ddl_recovery_done)(hton); + return 0; +} + + +void ha_signal_ddl_recovery_done() +{ + DBUG_ENTER("ha_signal_ddl_recovery_done"); + plugin_foreach(NULL, signal_ddl_recovery_done, MYSQL_STORAGE_ENGINE_PLUGIN, + NULL); + DBUG_VOID_RETURN; +} + + +/***************************************************************************** + Backup functions +******************************************************************************/ + +static my_bool plugin_prepare_for_backup(THD *unused1, plugin_ref plugin, + void *not_used) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->prepare_for_backup) + hton->prepare_for_backup(); + return FALSE; +} + +void ha_prepare_for_backup() +{ + plugin_foreach_with_mask(0, plugin_prepare_for_backup, + MYSQL_STORAGE_ENGINE_PLUGIN, + PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0); +} + +static my_bool plugin_end_backup(THD *unused1, plugin_ref plugin, + void *not_used) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->end_backup) + hton->end_backup(); + return FALSE; +} + +void ha_end_backup() +{ + plugin_foreach_with_mask(0, plugin_end_backup, + MYSQL_STORAGE_ENGINE_PLUGIN, + PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0); +} + +/* + Take a lock to block MDL_BACKUP_DDL (used by maria-backup) until + the DDL operation is taking place +*/ + +bool handler::log_not_redoable_operation(const char *operation) +{ + DBUG_ENTER("log_not_redoable_operation"); + if (table->s->tmp_table == NO_TMP_TABLE) + { + /* + Take a lock to ensure that mariadb-backup will notice the + new log entry (and re-copy the table if needed). + */ + THD *thd= table->in_use; + MDL_request mdl_backup; + backup_log_info ddl_log; + + MDL_REQUEST_INIT(&mdl_backup, MDL_key::BACKUP, "", "", MDL_BACKUP_DDL, + MDL_STATEMENT); + if (thd->mdl_context.acquire_lock(&mdl_backup, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(1); + + bzero(&ddl_log, sizeof(ddl_log)); + lex_string_set(&ddl_log.query, operation); + /* + We can't use partition_engine() here as this function is called + directly by the handler for the underlaying partition table + */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + ddl_log.org_partitioned= table->s->partition_info_str != 0; +#endif + lex_string_set(&ddl_log.org_storage_engine_name, table_type()); + ddl_log.org_database= table->s->db; + ddl_log.org_table= table->s->table_name; + ddl_log.org_table_id= table->s->tabledef_version; + backup_log_ddl(&ddl_log); + } + DBUG_RETURN(0); +} + +/* + Inform plugin of the server shutdown. + Called after all connections are down. + + Under some circumstances, storage engine might need to + so some work, before deinit() can be safely called. + (an example is Innodb purge that might call into server + to calculate virtual columns, which might potentially also + invoke other plugins, such as audit +*/ +static my_bool plugin_pre_shutdown(THD *, plugin_ref plugin, void *) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->pre_shutdown) + hton->pre_shutdown(); + return FALSE; +} + + +void ha_pre_shutdown() +{ + plugin_foreach_with_mask(0, plugin_pre_shutdown, + MYSQL_STORAGE_ENGINE_PLUGIN, + PLUGIN_IS_DELETED | PLUGIN_IS_READY, 0); +} + + +/* ======================================================================== + ======================= TRANSACTIONS ===================================*/ + +/** + Transaction handling in the server + ================================== + + In each client connection, MySQL maintains two transactional + states: + - a statement transaction, + - a standard, also called normal transaction. + + Historical note + --------------- + "Statement transaction" is a non-standard term that comes + from the times when MySQL supported BerkeleyDB storage engine. + + First of all, it should be said that in BerkeleyDB auto-commit + mode auto-commits operations that are atomic to the storage + engine itself, such as a write of a record, and are too + high-granular to be atomic from the application perspective + (MySQL). One SQL statement could involve many BerkeleyDB + auto-committed operations and thus BerkeleyDB auto-commit was of + little use to MySQL. + + Secondly, instead of SQL standard savepoints, BerkeleyDB + provided the concept of "nested transactions". In a nutshell, + transactions could be arbitrarily nested, but when the parent + transaction was committed or aborted, all its child (nested) + transactions were handled committed or aborted as well. + Commit of a nested transaction, in turn, made its changes + visible, but not durable: it destroyed the nested transaction, + all its changes would become available to the parent and + currently active nested transactions of this parent. + + So the mechanism of nested transactions was employed to + provide "all or nothing" guarantee of SQL statements + required by the standard. + A nested transaction would be created at start of each SQL + statement, and destroyed (committed or aborted) at statement + end. Such nested transaction was internally referred to as + a "statement transaction" and gave birth to the term. + + (Historical note ends) + + Since then a statement transaction is started for each statement + that accesses transactional tables or uses the binary log. If + the statement succeeds, the statement transaction is committed. + If the statement fails, the transaction is rolled back. Commits + of statement transactions are not durable -- each such + transaction is nested in the normal transaction, and if the + normal transaction is rolled back, the effects of all enclosed + statement transactions are undone as well. Technically, + a statement transaction can be viewed as a savepoint which is + maintained automatically in order to make effects of one + statement atomic. + + The normal transaction is started by the user and is ended + usually upon a user request as well. The normal transaction + encloses transactions of all statements issued between + its beginning and its end. + In autocommit mode, the normal transaction is equivalent + to the statement transaction. + + Since MySQL supports PSEA (pluggable storage engine + architecture), more than one transactional engine can be + active at a time. Hence transactions, from the server + point of view, are always distributed. In particular, + transactional state is maintained independently for each + engine. In order to commit a transaction the two phase + commit protocol is employed. + + Not all statements are executed in context of a transaction. + Administrative and status information statements do not modify + engine data, and thus do not start a statement transaction and + also have no effect on the normal transaction. Examples of such + statements are SHOW STATUS and RESET SLAVE. + + Similarly DDL statements are not transactional, + and therefore a transaction is [almost] never started for a DDL + statement. The difference between a DDL statement and a purely + administrative statement though is that a DDL statement always + commits the current transaction before proceeding, if there is + any. + + At last, SQL statements that work with non-transactional + engines also have no effect on the transaction state of the + connection. Even though they are written to the binary log, + and the binary log is, overall, transactional, the writes + are done in "write-through" mode, directly to the binlog + file, followed with a OS cache sync, in other words, + bypassing the binlog undo log (translog). + They do not commit the current normal transaction. + A failure of a statement that uses non-transactional tables + would cause a rollback of the statement transaction, but + in case there no non-transactional tables are used, + no statement transaction is started. + + Data layout + ----------- + + The server stores its transaction-related data in + thd->transaction. This structure has two members of type + THD_TRANS. These members correspond to the statement and + normal transactions respectively: + + - thd->transaction.stmt contains a list of engines + that are participating in the given statement + - thd->transaction.all contains a list of engines that + have participated in any of the statement transactions started + within the context of the normal transaction. + Each element of the list contains a pointer to the storage + engine, engine-specific transactional data, and engine-specific + transaction flags. + + In autocommit mode thd->transaction.all is empty. + Instead, data of thd->transaction.stmt is + used to commit/rollback the normal transaction. + + The list of registered engines has a few important properties: + - no engine is registered in the list twice + - engines are present in the list a reverse temporal order -- + new participants are always added to the beginning of the list. + + Transaction life cycle + ---------------------- + + When a new connection is established, thd->transaction + members are initialized to an empty state. + If a statement uses any tables, all affected engines + are registered in the statement engine list. In + non-autocommit mode, the same engines are registered in + the normal transaction list. + At the end of the statement, the server issues a commit + or a roll back for all engines in the statement list. + At this point transaction flags of an engine, if any, are + propagated from the statement list to the list of the normal + transaction. + When commit/rollback is finished, the statement list is + cleared. It will be filled in again by the next statement, + and emptied again at the next statement's end. + + The normal transaction is committed in a similar way + (by going over all engines in thd->transaction.all list) + but at different times: + - upon COMMIT SQL statement is issued by the user + - implicitly, by the server, at the beginning of a DDL statement + or SET AUTOCOMMIT={0|1} statement. + + The normal transaction can be rolled back as well: + - if the user has requested so, by issuing ROLLBACK SQL + statement + - if one of the storage engines requested a rollback + by setting thd->transaction_rollback_request. This may + happen in case, e.g., when the transaction in the engine was + chosen a victim of the internal deadlock resolution algorithm + and rolled back internally. When such a situation happens, there + is little the server can do and the only option is to rollback + transactions in all other participating engines. In this case + the rollback is accompanied by an error sent to the user. + + As follows from the use cases above, the normal transaction + is never committed when there is an outstanding statement + transaction. In most cases there is no conflict, since + commits of the normal transaction are issued by a stand-alone + administrative or DDL statement, thus no outstanding statement + transaction of the previous statement exists. Besides, + all statements that manipulate with the normal transaction + are prohibited in stored functions and triggers, therefore + no conflicting situation can occur in a sub-statement either. + The remaining rare cases when the server explicitly has + to commit the statement transaction prior to committing the normal + one cover error-handling scenarios (see for example + SQLCOM_LOCK_TABLES). + + When committing a statement or a normal transaction, the server + either uses the two-phase commit protocol, or issues a commit + in each engine independently. The two-phase commit protocol + is used only if: + - all participating engines support two-phase commit (provide + handlerton::prepare PSEA API call) and + - transactions in at least two engines modify data (i.e. are + not read-only). + + Note that the two phase commit is used for + statement transactions, even though they are not durable anyway. + This is done to ensure logical consistency of data in a multiple- + engine transaction. + For example, imagine that some day MySQL supports unique + constraint checks deferred till the end of statement. In such + case a commit in one of the engines may yield ER_DUP_KEY, + and MySQL should be able to gracefully abort statement + transactions of other participants. + + After the normal transaction has been committed, + thd->transaction.all list is cleared. + + When a connection is closed, the current normal transaction, if + any, is rolled back. + + Roles and responsibilities + -------------------------- + + The server has no way to know that an engine participates in + the statement and a transaction has been started + in it unless the engine says so. Thus, in order to be + a part of a transaction, the engine must "register" itself. + This is done by invoking trans_register_ha() server call. + Normally the engine registers itself whenever handler::external_lock() + is called. trans_register_ha() can be invoked many times: if + an engine is already registered, the call does nothing. + In case autocommit is not set, the engine must register itself + twice -- both in the statement list and in the normal transaction + list. + In which list to register is a parameter of trans_register_ha(). + + Note, that although the registration interface in itself is + fairly clear, the current usage practice often leads to undesired + effects. E.g. since a call to trans_register_ha() in most engines + is embedded into implementation of handler::external_lock(), some + DDL statements start a transaction (at least from the server + point of view) even though they are not expected to. E.g. + CREATE TABLE does not start a transaction, since + handler::external_lock() is never called during CREATE TABLE. But + CREATE TABLE ... SELECT does, since handler::external_lock() is + called for the table that is being selected from. This has no + practical effects currently, but must be kept in mind + nevertheless. + + Once an engine is registered, the server will do the rest + of the work. + + During statement execution, whenever any of data-modifying + PSEA API methods is used, e.g. handler::write_row() or + handler::update_row(), the read-write flag is raised in the + statement transaction for the involved engine. + Currently All PSEA calls are "traced", and the data can not be + changed in a way other than issuing a PSEA call. Important: + unless this invariant is preserved the server will not know that + a transaction in a given engine is read-write and will not + involve the two-phase commit protocol! + + At the end of a statement, server call trans_commit_stmt is + invoked. This call in turn invokes handlerton::prepare() + for every involved engine. Prepare is followed by a call + to handlerton::commit_one_phase() If a one-phase commit + will suffice, handlerton::prepare() is not invoked and + the server only calls handlerton::commit_one_phase(). + At statement commit, the statement-related read-write + engine flag is propagated to the corresponding flag in the + normal transaction. When the commit is complete, the list + of registered engines is cleared. + + Rollback is handled in a similar fashion. + + Additional notes on DDL and the normal transaction. + --------------------------------------------------- + + DDLs and operations with non-transactional engines + do not "register" in thd->transaction lists, and thus do not + modify the transaction state. Besides, each DDL in + MySQL is prefixed with an implicit normal transaction commit + (a call to trans_commit_implicit()), and thus leaves nothing + to modify. + However, as it has been pointed out with CREATE TABLE .. SELECT, + some DDL statements can start a *new* transaction. + + Behaviour of the server in this case is currently badly + defined. + DDL statements use a form of "semantic" logging + to maintain atomicity: if CREATE TABLE .. SELECT failed, + the newly created table is deleted. + In addition, some DDL statements issue interim transaction + commits: e.g. ALTER TABLE issues a commit after data is copied + from the original table to the internal temporary table. Other + statements, e.g. CREATE TABLE ... SELECT do not always commit + after itself. + And finally there is a group of DDL statements such as + RENAME/DROP TABLE that doesn't start a new transaction + and doesn't commit. + + This diversity makes it hard to say what will happen if + by chance a stored function is invoked during a DDL -- + whether any modifications it makes will be committed or not + is not clear. Fortunately, SQL grammar of few DDLs allows + invocation of a stored function. + + A consistent behaviour is perhaps to always commit the normal + transaction after all DDLs, just like the statement transaction + is always committed at the end of all statements. +*/ + +/** + Register a storage engine for a transaction. + + Every storage engine MUST call this function when it starts + a transaction or a statement (that is it must be called both for the + "beginning of transaction" and "beginning of statement"). + Only storage engines registered for the transaction/statement + will know when to commit/rollback it. + + @note + trans_register_ha is idempotent - storage engine may register many + times per transaction. + +*/ +void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, ulonglong trxid) +{ + THD_TRANS *trans; + Ha_trx_info *ha_info; + DBUG_ENTER("trans_register_ha"); + DBUG_PRINT("enter",("%s", all ? "all" : "stmt")); + + if (all) + { + trans= &thd->transaction->all; + thd->server_status|= SERVER_STATUS_IN_TRANS; + if (thd->tx_read_only) + thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY; + DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS")); + } + else + trans= &thd->transaction->stmt; + + ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0); + + if (ha_info->is_started()) + DBUG_VOID_RETURN; /* already registered, return */ + + ha_info->register_ha(trans, ht_arg); + + trans->no_2pc|=(ht_arg->prepare==0); + + /* Set implicit xid even if there's explicit XA, it will be ignored anyway. */ + if (thd->transaction->implicit_xid.is_null()) + thd->transaction->implicit_xid.set(thd->query_id); + +/* + Register transaction start in performance schema if not done already. + By doing this, we handle cases when the transaction is started implicitly in + autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the + executed statement is a single-statement transaction. + + Explicitly started transactions are handled in trans_begin(). + + Do not register transactions in which binary log is the only participating + transactional storage engine. +*/ + if (thd->m_transaction_psi == NULL && ht_arg->db_type != DB_TYPE_BINLOG) + { + thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state, + thd->get_xid(), trxid, thd->tx_isolation, thd->tx_read_only, + !thd->in_multi_stmt_transaction_mode()); + DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid"); + //gtid_set_performance_schema_values(thd); + } + DBUG_VOID_RETURN; +} + + +static int prepare_or_error(handlerton *ht, THD *thd, bool all) +{ +#ifdef WITH_WSREP + const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all); + if (run_wsrep_hooks && ht->flags & HTON_WSREP_REPLICATION && + wsrep_before_prepare(thd, all)) + { + return(1); + } +#endif /* WITH_WSREP */ + + int err= ht->prepare(ht, thd, all); + status_var_increment(thd->status_var.ha_prepare_count); + if (err) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + } +#ifdef WITH_WSREP + if (run_wsrep_hooks && !err && ht->flags & HTON_WSREP_REPLICATION && + wsrep_after_prepare(thd, all)) + { + err= 1; + } +#endif /* WITH_WSREP */ + + return err; +} + + +/** + @retval + 0 ok + @retval + 1 error, transaction was rolled back +*/ +int ha_prepare(THD *thd) +{ + int error=0, all=1; + THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt; + Ha_trx_info *ha_info= trans->ha_list; + DBUG_ENTER("ha_prepare"); + + if (ha_info) + { + for (; ha_info; ha_info= ha_info->next()) + { + handlerton *ht= ha_info->ht(); + if (ht->prepare) + { + if (unlikely(prepare_or_error(ht, thd, all))) + { + ha_rollback_trans(thd, all); + error=1; + break; + } + } + else + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_GET_ERRNO, ER_THD(thd, ER_GET_ERRNO), + HA_ERR_WRONG_COMMAND, + ha_resolve_storage_engine_name(ht)); + + } + } + + DEBUG_SYNC(thd, "at_unlog_xa_prepare"); + + if (tc_log->unlog_xa_prepare(thd, all)) + { + ha_rollback_trans(thd, all); + error=1; + } + } + + DBUG_RETURN(error); +} + +/* + Like ha_check_and_coalesce_trx_read_only to return counted number of + read-write transaction participants limited to two, but works in the 'all' + context. + Also returns the last found rw ha_info through the 2nd argument. +*/ +uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info) +{ + unsigned rw_ha_count= 0; + + for (auto ha_info= thd->transaction->all.ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write()) + { + *ptr_ha_info= ha_info; + if (++rw_ha_count > 1) + break; + } + } + return rw_ha_count; +} + +/* + Returns counted number of + read-write recoverable transaction participants. +*/ +uint ha_count_rw_2pc(THD *thd, bool all) +{ + unsigned rw_ha_count= 0; + THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt; + + for (Ha_trx_info * ha_info= trans->ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && ha_info->ht()->recover) + ++rw_ha_count; + } + return rw_ha_count; +} + +/** + Check if we can skip the two-phase commit. + + A helper function to evaluate if two-phase commit is mandatory. + As a side effect, propagates the read-only/read-write flags + of the statement transaction to its enclosing normal transaction. + + If we have at least two engines with read-write changes we must + run a two-phase commit. Otherwise we can run several independent + commits as the only transactional engine has read-write changes + and others are read-only. + + @retval 0 All engines are read-only. + @retval 1 We have the only engine with read-write changes. + @retval >1 More than one engine have read-write changes. + Note: return value might NOT be the exact number of + engines with read-write changes. +*/ + +uint +ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, + bool all) +{ + /* The number of storage engines that have actual changes. */ + unsigned rw_ha_count= 0; + Ha_trx_info *ha_info; + + for (ha_info= ha_list; ha_info; ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write()) + ++rw_ha_count; + + if (! all) + { + Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1]; + DBUG_ASSERT(ha_info != ha_info_all); + /* + Merge read-only/read-write information about statement + transaction to its enclosing normal transaction. Do this + only if in a real transaction -- that is, if we know + that ha_info_all is registered in thd->transaction.all. + Since otherwise we only clutter the normal transaction flags. + */ + if (ha_info_all->is_started()) /* FALSE if autocommit. */ + ha_info_all->coalesce_trx_with(ha_info); + } + else if (rw_ha_count > 1) + { + /* + It is a normal transaction, so we don't need to merge read/write + information up, and the need for two-phase commit has been + already established. Break the loop prematurely. + */ + break; + } + } + return rw_ha_count; +} + + +/** + @retval + 0 ok + @retval + 1 transaction was rolled back + @retval + 2 error during commit, data may be inconsistent + + @todo + Since we don't support nested statement transactions in 5.0, + we can't commit or rollback stmt transactions while we are inside + stored functions or triggers. So we simply do nothing now. + TODO: This should be fixed in later ( >= 5.1) releases. +*/ +int ha_commit_trans(THD *thd, bool all) +{ + int error= 0, cookie; + /* + 'all' means that this is either an explicit commit issued by + user, or an implicit commit issued by a DDL. + */ + THD_TRANS *trans= all ? &thd->transaction->all : &thd->transaction->stmt; + /* + "real" is a nick name for a transaction for which a commit will + make persistent changes. E.g. a 'stmt' transaction inside an 'all' + transaction is not 'real': even though it's possible to commit it, + the changes are not durable as they might be rolled back if the + enclosing 'all' transaction is rolled back. + */ + bool is_real_trans= ((all || thd->transaction->all.ha_list == 0) && + !(thd->variables.option_bits & OPTION_GTID_BEGIN)); + Ha_trx_info *ha_info= trans->ha_list; + bool need_prepare_ordered, need_commit_ordered; + my_xid xid; +#ifdef WITH_WSREP + const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all); +#endif /* WITH_WSREP */ + DBUG_ENTER("ha_commit_trans"); + DBUG_PRINT("info",("thd: %p option_bits: %lu all: %d", + thd, (ulong) thd->variables.option_bits, all)); + + /* Just a random warning to test warnings pushed during autocommit. */ + DBUG_EXECUTE_IF("warn_during_ha_commit_trans", + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARNING_NOT_COMPLETE_ROLLBACK, + ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK));); + + DBUG_PRINT("info", + ("all: %d thd->in_sub_stmt: %d ha_info: %p is_real_trans: %d", + all, thd->in_sub_stmt, ha_info, is_real_trans)); + /* + We must not commit the normal transaction if a statement + transaction is pending. Otherwise statement transaction + flags will not get propagated to its normal transaction's + counterpart. + */ + DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL || + trans == &thd->transaction->stmt); + + DBUG_ASSERT(!thd->in_sub_stmt); + + if (thd->in_sub_stmt) + { + /* + Since we don't support nested statement transactions in 5.0, + we can't commit or rollback stmt transactions while we are inside + stored functions or triggers. So we simply do nothing now. + TODO: This should be fixed in later ( >= 5.1) releases. + */ + if (!all) + DBUG_RETURN(0); + /* + We assume that all statements which commit or rollback main transaction + are prohibited inside of stored functions or triggers. So they should + bail out with error even before ha_commit_trans() call. To be 100% safe + let us throw error in non-debug builds. + */ + my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0)); + DBUG_RETURN(2); + } + + if (!ha_info) + { + /* + Free resources and perform other cleanup even for 'empty' transactions. + */ + if (is_real_trans) + { + thd->transaction->cleanup(); + MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi); + thd->m_transaction_psi= NULL; + } +#ifdef WITH_WSREP + if (wsrep_is_active(thd) && is_real_trans && !error) + wsrep_commit_empty(thd, all); +#endif /* WITH_WSREP */ + + DBUG_RETURN(0); + } + + DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE();); + + /* Close all cursors that can not survive COMMIT */ + if (is_real_trans) /* not a statement commit */ + thd->stmt_map.close_transient_cursors(); + + uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all); + /* rw_trans is TRUE when we in a transaction changing data */ + bool rw_trans= is_real_trans && rw_ha_count > 0; + MDL_request mdl_backup; + DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d", + is_real_trans, rw_trans, rw_ha_count)); + + if (rw_trans) + { + /* + Acquire a metadata lock which will ensure that COMMIT is blocked + by an active FLUSH TABLES WITH READ LOCK (and vice versa: + COMMIT in progress blocks FTWRL). + + We allow the owner of FTWRL to COMMIT; we assume that it knows + what it does. + */ + MDL_REQUEST_INIT(&mdl_backup, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_EXPLICIT); + + if (!WSREP(thd)) + { + if (thd->mdl_context.acquire_lock(&mdl_backup, + thd->variables.lock_wait_timeout)) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1); + ha_rollback_trans(thd, all); + DBUG_RETURN(1); + } + thd->backup_commit_lock= &mdl_backup; + } + DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock"); + } + + if (rw_trans && thd->is_read_only_ctx()) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + goto err; + } + +#if 1 // FIXME: This should be done in ha_prepare(). + if (rw_trans || (thd->lex->sql_command == SQLCOM_ALTER_TABLE && + thd->lex->alter_info.flags & ALTER_ADD_SYSTEM_VERSIONING && + is_real_trans)) + { + ulonglong trx_start_id= 0, trx_end_id= 0; + for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next()) + { + if (ha_info->ht()->prepare_commit_versioned) + { + trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id); + + if (trx_end_id == ULONGLONG_MAX) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1); + goto err; + } + + if (trx_end_id) + break; // FIXME: use a common ID for cross-engine transactions + } + } + + if (trx_end_id) + { + if (!TR_table::use_transaction_registry) + { + my_error(ER_VERS_TRT_IS_DISABLED, MYF(0)); + goto err; + } + DBUG_ASSERT(trx_start_id); +#ifdef WITH_WSREP + bool saved_wsrep_on= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; +#endif + TR_table trt(thd, true); + if (trt.update(trx_start_id, trx_end_id)) + { +#ifdef WITH_WSREP + thd->variables.wsrep_on= saved_wsrep_on; +#endif + (void) trans_rollback_stmt(thd); + goto err; + } + trt.table->file->extra(HA_EXTRA_RESET_STATE); + // Here, the call will not commit inside InnoDB. It is only working + // around closing thd->transaction.stmt open by TR_table::open(). + if (all) + commit_one_phase_2(thd, false, &thd->transaction->stmt, false); +#ifdef WITH_WSREP + thd->variables.wsrep_on= saved_wsrep_on; +#endif + } + } +#endif + + if (trans->no_2pc || (rw_ha_count <= 1)) + { +#ifdef WITH_WSREP + /* + This commit will not go through log_and_order() where wsrep commit + ordering is normally done. Commit ordering must be done here. + */ + if (run_wsrep_hooks) + { + // This commit involves more than one storage engine and requires + // two phases, but some engines don't support it. + // Issue a message to the client and roll back the transaction. + if (trans->no_2pc && rw_ha_count > 1) + { + my_message(ER_ERROR_DURING_COMMIT, "Transactional commit not supported " + "by involved engine(s)", MYF(0)); + error= 1; + } + else + error= wsrep_before_commit(thd, all); + } + if (error) + { + ha_rollback_trans(thd, FALSE); + goto wsrep_err; + } +#endif /* WITH_WSREP */ + error= ha_commit_one_phase(thd, all); +#ifdef WITH_WSREP + // Here in case of error we must return 2 for inconsistency + if (run_wsrep_hooks && !error) + error= wsrep_after_commit(thd, all) ? 2 : 0; +#endif /* WITH_WSREP */ + goto done; + } + + need_prepare_ordered= FALSE; + need_commit_ordered= FALSE; + + for (Ha_trx_info *hi= ha_info; hi; hi= hi->next()) + { + handlerton *ht= hi->ht(); + /* + Do not call two-phase commit if this particular + transaction is read-only. This allows for simpler + implementation in engines that are always read-only. + */ + if (! hi->is_trx_read_write()) + continue; + /* + Sic: we know that prepare() is not NULL since otherwise + trans->no_2pc would have been set. + */ + if (unlikely(prepare_or_error(ht, thd, all))) + goto err; + + need_prepare_ordered|= (ht->prepare_ordered != NULL); + need_commit_ordered|= (ht->commit_ordered != NULL); + } + DEBUG_SYNC(thd, "ha_commit_trans_after_prepare"); + DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); + + if (!is_real_trans) + { + error= commit_one_phase_2(thd, all, trans, is_real_trans); + goto done; + } + + DBUG_ASSERT(thd->transaction->implicit_xid.get_my_xid() == + thd->transaction->implicit_xid.quick_get_my_xid()); + DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA() || + thd->lex->xa_opt == XA_ONE_PHASE); + xid= thd->transaction->implicit_xid.quick_get_my_xid(); + +#ifdef WITH_WSREP + if (run_wsrep_hooks && !error) + { + wsrep::seqno const s= wsrep_xid_seqno(thd->wsrep_xid); + if (!s.is_undefined()) + { + // xid was rewritten by wsrep + xid= s.get(); + } + } + if (run_wsrep_hooks && (error = wsrep_before_commit(thd, all))) + goto wsrep_err; +#endif /* WITH_WSREP */ + DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order"); + cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered, + need_commit_ordered); + if (!cookie) + { + WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie); + goto err; + } + DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order"); + DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE();); + + error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0; +#ifdef WITH_WSREP + if (run_wsrep_hooks && + (error || (error = wsrep_after_commit(thd, all)))) + { + error = 2; + mysql_mutex_lock(&thd->LOCK_thd_data); + if (wsrep_must_abort(thd)) + { + mysql_mutex_unlock(&thd->LOCK_thd_data); + (void)tc_log->unlog(cookie, xid); + goto wsrep_err; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + } +#endif /* WITH_WSREP */ + DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE();); + if (tc_log->unlog(cookie, xid)) + error= 2; /* Error during commit */ + +done: + if (is_real_trans) + { + MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi); + thd->m_transaction_psi= NULL; + } + + DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE();); + + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); +#ifdef HAVE_REPLICATION + repl_semisync_master.wait_after_commit(thd, all); + DEBUG_SYNC(thd, "after_group_after_commit"); +#endif + goto end; + + /* Come here if error and we need to rollback. */ +#ifdef WITH_WSREP +wsrep_err: + mysql_mutex_lock(&thd->LOCK_thd_data); + if (run_wsrep_hooks && wsrep_must_abort(thd)) + { + WSREP_DEBUG("BF abort has happened after prepare & certify"); + mysql_mutex_unlock(&thd->LOCK_thd_data); + ha_rollback_trans(thd, TRUE); + } + else + mysql_mutex_unlock(&thd->LOCK_thd_data); + +#endif /* WITH_WSREP */ +err: + error= 1; /* Transaction was rolled back */ + /* + In parallel replication, rollback is delayed, as there is extra replication + book-keeping to be done before rolling back and allowing a conflicting + transaction to continue (MDEV-7458). + */ + if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec)) + ha_rollback_trans(thd, all); + else + { + /* + We are not really doing a rollback here, but the code in trans_commit() + requres that m_transaction_psi is 0 when we return from this function. + */ + MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi); + thd->m_transaction_psi= NULL; + WSREP_DEBUG("rollback skipped %p %d",thd->rgi_slave, + thd->rgi_slave->is_parallel_exec); + } +end: + if (mdl_backup.ticket) + { + /* + We do not always immediately release transactional locks + after ha_commit_trans() (see uses of ha_enable_transaction()), + thus we release the commit blocker lock as soon as it's + not needed. + */ + thd->mdl_context.release_lock(mdl_backup.ticket); + } + thd->backup_commit_lock= 0; +#ifdef WITH_WSREP + if (wsrep_is_active(thd) && is_real_trans && !error && + (rw_ha_count == 0 || all) && + wsrep_not_committed(thd)) + { + wsrep_commit_empty(thd, all); + } +#endif /* WITH_WSREP */ + + DBUG_RETURN(error); +} + +/** + @note + This function does not care about global read lock or backup locks, + the caller should. + + @param[in] all Is set in case of explicit commit + (COMMIT statement), or implicit commit + issued by DDL. Is not set when called + at the end of statement, even if + autocommit=1. +*/ + +int ha_commit_one_phase(THD *thd, bool all) +{ + THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt; + /* + "real" is a nick name for a transaction for which a commit will + make persistent changes. E.g. a 'stmt' transaction inside a 'all' + transaction is not 'real': even though it's possible to commit it, + the changes are not durable as they might be rolled back if the + enclosing 'all' transaction is rolled back. + We establish the value of 'is_real_trans' by checking + if it's an explicit COMMIT/BEGIN statement, or implicit + commit issued by DDL (all == TRUE), or if we're running + in autocommit mode (it's only in the autocommit mode + ha_commit_one_phase() can be called with an empty + transaction.all.ha_list, see why in trans_register_ha()). + */ + bool is_real_trans= ((all || thd->transaction->all.ha_list == 0) && + !(thd->variables.option_bits & OPTION_GTID_BEGIN)); + int res; + DBUG_ENTER("ha_commit_one_phase"); + if (is_real_trans) + { + DEBUG_SYNC(thd, "ha_commit_one_phase"); + if ((res= thd->wait_for_prior_commit())) + DBUG_RETURN(res); + } + res= commit_one_phase_2(thd, all, trans, is_real_trans); + DBUG_RETURN(res); +} + +static bool is_ro_1pc_trans(THD *thd, Ha_trx_info *ha_info, bool all, + bool is_real_trans) +{ + uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all); + bool rw_trans= is_real_trans && + (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U)); + + return !rw_trans; +} + +static bool has_binlog_hton(Ha_trx_info *ha_info) +{ + bool rc; + for (rc= false; ha_info && !rc; ha_info= ha_info->next()) + rc= ha_info->ht() == binlog_hton; + + return rc; +} + +static int +commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) +{ + int error= 0; + uint count= 0; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + DBUG_ENTER("commit_one_phase_2"); + if (is_real_trans) + DEBUG_SYNC(thd, "commit_one_phase_2"); + + if (ha_info) + { + int err; + + if (has_binlog_hton(ha_info) && + (err= binlog_commit(thd, all, + is_ro_1pc_trans(thd, ha_info, all, is_real_trans)))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + error= 1; + } + for (; ha_info; ha_info= ha_info_next) + { + handlerton *ht= ha_info->ht(); + if ((err= ht->commit(ht, thd, all))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + error=1; + } + /* Should this be done only if is_real_trans is set ? */ + status_var_increment(thd->status_var.ha_commit_count); + if (is_real_trans && ht != binlog_hton && ha_info->is_trx_read_write()) + ++count; + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + trans->ha_list= 0; + trans->no_2pc=0; + if (all) + { +#ifdef HAVE_QUERY_CACHE + if (thd->transaction->changed_tables) + query_cache.invalidate(thd, thd->transaction->changed_tables); +#endif + } + } + + /* Free resources and perform other cleanup even for 'empty' transactions. */ + if (is_real_trans) + { + thd->has_waiter= false; + thd->transaction->cleanup(); + if (count >= 2) + statistic_increment(transactions_multi_engine, LOCK_status); + } + + DBUG_RETURN(error); +} + + +int ha_rollback_trans(THD *thd, bool all) +{ + int error=0; + THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + /* + "real" is a nick name for a transaction for which a commit will + make persistent changes. E.g. a 'stmt' transaction inside a 'all' + transaction is not 'real': even though it's possible to commit it, + the changes are not durable as they might be rolled back if the + enclosing 'all' transaction is rolled back. + We establish the value of 'is_real_trans' by checking + if it's an explicit COMMIT or BEGIN statement, or implicit + commit issued by DDL (in these cases all == TRUE), + or if we're running in autocommit mode (it's only in the autocommit mode + ha_commit_one_phase() is called with an empty + transaction.all.ha_list, see why in trans_register_ha()). + */ + bool is_real_trans=all || thd->transaction->all.ha_list == 0; + DBUG_ENTER("ha_rollback_trans"); + + /* + We must not rollback the normal transaction if a statement + transaction is pending. + */ + DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL || + trans == &thd->transaction->stmt); + +#ifdef HAVE_REPLICATION + if (is_real_trans) + { + /* + In parallel replication, if we need to rollback during commit, we must + first inform following transactions that we are going to abort our commit + attempt. Otherwise those following transactions can run too early, and + possibly cause replication to fail. See comments in retry_event_group(). + + (This concerns rollbacks due to temporary errors where the transaction + will be retried afterwards. For non-recoverable errors, following + transactions will not start but just be skipped as the worker threads + perform the error stop). + + There were several bugs with this in the past that were very hard to + track down (MDEV-7458, MDEV-8302). So we add here an assertion for + rollback without signalling following transactions. And in release + builds, we explicitly do the signalling before rolling back. + */ + DBUG_ASSERT( + !(thd->rgi_slave && + !thd->rgi_slave->worker_error && + thd->rgi_slave->did_mark_start_commit) || + (thd->transaction->xid_state.is_explicit_XA() || + (thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_PREPARED_XA))); + + if (thd->rgi_slave && + !thd->rgi_slave->worker_error && + thd->rgi_slave->did_mark_start_commit) + thd->rgi_slave->unmark_start_commit(); + } +#endif + + if (thd->in_sub_stmt) + { + DBUG_ASSERT(0); + /* + If we are inside stored function or trigger we should not commit or + rollback current statement transaction. See comment in ha_commit_trans() + call for more information. + */ + if (!all) + DBUG_RETURN(0); + my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0)); + DBUG_RETURN(1); + } + +#ifdef WITH_WSREP + (void) wsrep_before_rollback(thd, all); +#endif /* WITH_WSREP */ + if (ha_info) + { + /* Close all cursors that can not survive ROLLBACK */ + if (is_real_trans) /* not a statement commit */ + thd->stmt_map.close_transient_cursors(); + + for (; ha_info; ha_info= ha_info_next) + { + int err; + handlerton *ht= ha_info->ht(); + if ((err= ht->rollback(ht, thd, all))) + { + // cannot happen + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); + error=1; +#ifdef WITH_WSREP + WSREP_WARN("handlerton rollback failed, thd %lld %lld conf %d SQL %s", + thd->thread_id, thd->query_id, thd->wsrep_trx().state(), + thd->query()); +#endif /* WITH_WSREP */ + } + status_var_increment(thd->status_var.ha_rollback_count); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + trans->ha_list= 0; + trans->no_2pc=0; + } + +#ifdef WITH_WSREP + if (thd->is_error()) + { + WSREP_DEBUG("ha_rollback_trans(%lld, %s) rolled back: %s: %s; is_real %d", + thd->thread_id, all?"TRUE":"FALSE", wsrep_thd_query(thd), + thd->get_stmt_da()->message(), is_real_trans); + } + (void) wsrep_after_rollback(thd, all); +#endif /* WITH_WSREP */ + + if (all || !thd->in_active_multi_stmt_transaction()) + { + MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi); + thd->m_transaction_psi= NULL; + } + + /* Always cleanup. Even if nht==0. There may be savepoints. */ + if (is_real_trans) + { + /* + Thanks to possibility of MDL deadlock rollback request can come even if + transaction hasn't been started in any transactional storage engine. + */ + if (thd->transaction_rollback_request && + thd->transaction->xid_state.is_explicit_XA()) + thd->transaction->xid_state.set_error(thd->get_stmt_da()->sql_errno()); + + thd->has_waiter= false; + thd->transaction->cleanup(); + } + if (all) + thd->transaction_rollback_request= FALSE; + + /* + If a non-transactional table was updated, warn; don't warn if this is a + slave thread (because when a slave thread executes a ROLLBACK, it has + been read from the binary log, so it's 100% sure and normal to produce + error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the + slave SQL thread, it would not stop the thread but just be printed in + the error log; but we don't want users to wonder why they have this + message in the error log, so we don't send it. + + We don't have to test for thd->killed == KILL_SYSTEM_THREAD as + it doesn't matter if a warning is pushed to a system thread or not: + No one will see it... + */ + if (is_real_trans && thd->transaction->all.modified_non_trans_table && + !thd->slave_thread && thd->killed < KILL_CONNECTION) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARNING_NOT_COMPLETE_ROLLBACK, + ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)); +#ifdef HAVE_REPLICATION + repl_semisync_master.wait_after_rollback(thd, all); +#endif + DBUG_RETURN(error); +} + + +struct xahton_st { + XID *xid; + int result; +}; + +static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->recover) + { + hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid); + ((struct xahton_st *)arg)->result= 0; + } + return FALSE; +} + +static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->recover) + { + hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid); + ((struct xahton_st *)arg)->result= 0; + } + return FALSE; +} + + +int ha_commit_or_rollback_by_xid(XID *xid, bool commit) +{ + struct xahton_st xaop; + xaop.xid= xid; + xaop.result= 1; + + /* + When the binlogging service is enabled complete the transaction + by it first. + */ + if (commit) + binlog_commit_by_xid(binlog_hton, xid); + else + binlog_rollback_by_xid(binlog_hton, xid); + + plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &xaop); + + return xaop.result; +} + + +#ifndef DBUG_OFF +/** Converts XID to string. + +@param[out] buf output buffer +@param[in] xid XID to convert + +@return pointer to converted string + +@note This does not need to be multi-byte safe or anything */ +static char *xid_to_str(char *buf, const XID &xid) +{ + int i; + char *s=buf; + *s++='\''; + for (i= 0; i < xid.gtrid_length + xid.bqual_length; i++) + { + uchar c= (uchar) xid.data[i]; + /* is_next_dig is set if next character is a number */ + bool is_next_dig= FALSE; + if (i < XIDDATASIZE) + { + char ch= xid.data[i + 1]; + is_next_dig= (ch >= '0' && ch <='9'); + } + if (i == xid.gtrid_length) + { + *s++='\''; + if (xid.bqual_length) + { + *s++='.'; + *s++='\''; + } + } + if (c < 32 || c > 126) + { + *s++='\\'; + /* + If next character is a number, write current character with + 3 octal numbers to ensure that the next number is not seen + as part of the octal number + */ + if (c > 077 || is_next_dig) + *s++=_dig_vec_lower[c >> 6]; + if (c > 007 || is_next_dig) + *s++=_dig_vec_lower[(c >> 3) & 7]; + *s++=_dig_vec_lower[c & 7]; + } + else + { + if (c == '\'' || c == '\\') + *s++='\\'; + *s++=c; + } + } + *s++='\''; + *s=0; + return buf; +} +#endif + +static my_xid wsrep_order_and_check_continuity(XID *list, int len) +{ +#ifdef WITH_WSREP + wsrep_sort_xid_array(list, len); + wsrep::gtid cur_position= wsrep_get_SE_checkpoint(); + long long cur_seqno= cur_position.seqno().get(); + for (int i= 0; i < len; ++i) + { + if (!wsrep_is_wsrep_xid(list + i) || + wsrep_xid_seqno(list + i) != cur_seqno + 1) + { + WSREP_WARN("Discovered discontinuity in recovered wsrep " + "transaction XIDs. Truncating the recovery list to " + "%d entries", i); + break; + } + ++cur_seqno; + } + WSREP_INFO("Last wsrep seqno to be recovered %lld", cur_seqno); + return (cur_seqno < 0 ? 0 : cur_seqno); +#else + return 0; +#endif /* WITH_WSREP */ +} +/** + recover() step of xa. + + @note + there are four modes of operation: + - automatic recover after a crash + in this case commit_list != 0, tc_heuristic_recover==0 + all xids from commit_list are committed, others are rolled back + - manual (heuristic) recover + in this case commit_list==0, tc_heuristic_recover != 0 + DBA has explicitly specified that all prepared transactions should + be committed (or rolled back). + - no recovery (MySQL did not detect a crash) + in this case commit_list==0, tc_heuristic_recover == 0 + there should be no prepared transactions in this case. + - automatic recovery for the semisync slave server: uncommitted + transactions are rolled back and when they are in binlog it gets + truncated to the first uncommitted transaction start offset. +*/ +struct xarecover_st +{ + int len, found_foreign_xids, found_my_xids; + XID *list; + HASH *commit_list; + bool dry_run; + MEM_ROOT *mem_root; + bool error; +}; + +/** + Inserts a new hash member. + + returns a successfully created and inserted @c xid_recovery_member + into hash @c hash_arg, + or NULL. +*/ +static xid_recovery_member* +xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root, + XID *full_xid_arg, decltype(::server_id) server_id_arg) +{ + xid_recovery_member *member= (xid_recovery_member *) + alloc_root(ptr_mem_root, sizeof(xid_recovery_member)); + XID *xid_full= NULL; + + if (full_xid_arg) + xid_full= (XID*) alloc_root(ptr_mem_root, sizeof(XID)); + + if (!member || (full_xid_arg && !xid_full)) + return NULL; + + if (full_xid_arg) + *xid_full= *full_xid_arg; + *member= xid_recovery_member(xid_arg, 1, false, xid_full, server_id_arg); + + return + my_hash_insert(hash_arg, (uchar*) member) ? NULL : member; +} + +/* + Inserts a new or updates an existing hash member to increment + the member's prepare counter. + + returns false on success, + true otherwise. +*/ +static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg, + MEM_ROOT *ptr_mem_root, + XID *full_xid_arg, + decltype(::server_id) server_id_arg) +{ + xid_recovery_member* member; + if ((member= (xid_recovery_member *) + my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg)))) + member->in_engine_prepare++; + else + member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root, full_xid_arg, server_id_arg); + + return member == NULL; +} + +/* + A "transport" type for recovery completion with ha_recover_complete() +*/ +struct xarecover_complete_arg +{ + xid_recovery_member* member; + Binlog_offset *binlog_coord; + uint count; +}; + +/* + Flagged to commit member confirms to get committed. + Otherwise when + A. ptr_commit_max is NULL (implies the normal recovery), or + B. it's not NULL (can only be so in the semisync slave case) + and the value referenced is not greater than the member's coordinate + the decision is to rollback. + When both A,B do not hold - which is the semisync slave recovery + case - the decision is to commit. + + Returns true as commmit decision + false as rollback one +*/ +static bool xarecover_decide_to_commit(xid_recovery_member* member, + Binlog_offset *ptr_commit_max) +{ + return + member->decided_to_commit ? true : + !ptr_commit_max ? false : + (member->binlog_coord < *ptr_commit_max ? // semisync slave recovery + true : false); +} + +/* + Helper function for xarecover_do_commit_or_rollback_handlerton. + For a given hton decides what to do with a xid passed in the 2nd arg + and carries out the decision. +*/ +static void xarecover_do_commit_or_rollback(handlerton *hton, + xarecover_complete_arg *arg) +{ + xid_t x; + my_bool rc; + xid_recovery_member *member= arg->member; + Binlog_offset *ptr_commit_max= arg->binlog_coord; + + if (!member->full_xid) + // Populate xid using the server_id from original transaction + x.set(member->xid, member->server_id); + else + x= *member->full_xid; + + rc= xarecover_decide_to_commit(member, ptr_commit_max) ? + hton->commit_by_xid(hton, &x) : hton->rollback_by_xid(hton, &x); + + /* + It's fine to have non-zero rc which would be from transaction + non-participant hton:s. + */ + DBUG_ASSERT(rc || member->in_engine_prepare > 0); + + if (!rc) + { + /* + This block relies on Engine to report XAER_NOTA at + "complete"_by_xid for unknown xid. + */ + member->in_engine_prepare--; + if (global_system_variables.log_warnings > 2) + sql_print_information("%s transaction with xid %llu", + member->decided_to_commit ? "Committed" : + "Rolled back", (ulonglong) member->xid); + } +} + +/* + Per hton recovery decider function. +*/ +static my_bool xarecover_do_commit_or_rollback_handlerton(THD *unused, + plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + + if (hton->recover) + { + xarecover_do_commit_or_rollback(hton, (xarecover_complete_arg *) arg); + } + + return FALSE; +} + +/* + Completes binlog recovery for an input xid in the passed + member_arg to invoke decider functions for each handlerton. + + Returns always FALSE. +*/ +static my_bool xarecover_complete_and_count(void *member_arg, + void *param_arg) +{ + xid_recovery_member *member= (xid_recovery_member*) member_arg; + xarecover_complete_arg *complete_params= + (xarecover_complete_arg*) param_arg; + complete_params->member= member; + + (void) plugin_foreach(NULL, xarecover_do_commit_or_rollback_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, complete_params); + + if (member->in_engine_prepare) + { + complete_params->count++; + if (global_system_variables.log_warnings > 2) + sql_print_warning("Found prepared transaction with xid %llu", + (ulonglong) member->xid); + } + + return false; +} + +/* + Completes binlog recovery to invoke decider functions for + each xid. + Returns the number of transactions remained doubtful. +*/ +uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord) +{ + xarecover_complete_arg complete= { NULL, coord, 0 }; + (void) my_hash_iterate(commit_list, xarecover_complete_and_count, &complete); + + return complete.count; +} + +static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + struct xarecover_st *info= (struct xarecover_st *) arg; + int got; + + if (hton->recover) + { + while ((got= hton->recover(hton, info->list, info->len)) > 0 ) + { + sql_print_information("Found %d prepared transaction(s) in %s", + got, hton_name(hton)->str); + /* If wsrep_on=ON, XIDs are first ordered and then the range of + recovered XIDs is checked for continuity. All the XIDs which + are in continuous range can be safely committed if binlog + is off since they have already ordered and certified in the + cluster. + + The discontinuity of wsrep XIDs may happen because the GTID + is assigned for transaction in wsrep_before_prepare(), but the + commit order is entered in wsrep_before_commit(). This means that + transactions may run prepare step out of order and may + result in gap in wsrep XIDs. This can be the case for example + if we have T1 with seqno 1 and T2 with seqno 2 and the server + crashes after T2 finishes prepare step but before T1 starts + the prepare. + */ + my_xid wsrep_limit __attribute__((unused))= 0; + + /* Note that we could call this for binlog also that + will not have WSREP(thd) but global wsrep on might + be true. + */ + if (WSREP_ON) + wsrep_limit= wsrep_order_and_check_continuity(info->list, got); + + for (int i=0; i < got; i ++) + { + my_xid x= info->list[i].get_my_xid(); + bool is_server_xid= x > 0; + +#ifdef WITH_WSREP + if (!is_server_xid && wsrep_is_wsrep_xid(&info->list[i])) + x= wsrep_xid_seqno(&info->list[i]); +#endif + if (!x) // not "mine" - that is generated by external TM + { + DBUG_EXECUTE("info",{ + char buf[XIDDATASIZE*4+6]; + _db_doprnt_("ignore xid %s", xid_to_str(buf, info->list[i])); + }); + xid_cache_insert(info->list + i); + info->found_foreign_xids++; + continue; + } + if (IF_WSREP(!(wsrep_emulate_bin_log && + wsrep_is_wsrep_xid(info->list + i) && + x <= wsrep_limit) && info->dry_run, + info->dry_run)) + { + info->found_my_xids++; + continue; + } + + /* + Regular and semisync slave server recovery only collects + xids to make decisions on them later by the caller. + */ + if (info->mem_root) + { + // remember "full" xid too when it's not in mysql format. + // Also record the transaction's original server_id. It will be used for + // populating the input XID to be searched in hash. + if (xid_member_replace(info->commit_list, x, info->mem_root, + is_server_xid? NULL : &info->list[i], + is_server_xid? info->list[i].get_trx_server_id() : server_id)) + { + info->error= true; + sql_print_error("Error in memory allocation at xarecover_handlerton"); + break; + } + } + if (IF_WSREP((wsrep_emulate_bin_log && + wsrep_is_wsrep_xid(info->list + i) && + x <= wsrep_limit), false) || + tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT) + { + int rc= hton->commit_by_xid(hton, info->list+i); + if (rc == 0) + { + DBUG_EXECUTE("info",{ + char buf[XIDDATASIZE*4+6]; + _db_doprnt_("commit xid %s", xid_to_str(buf, info->list[i])); + }); + } + } + else if (!info->mem_root) + { + int rc= hton->rollback_by_xid(hton, info->list+i); + if (rc == 0) + { + DBUG_EXECUTE("info",{ + char buf[XIDDATASIZE*4+6]; + _db_doprnt_("rollback xid %s", xid_to_str(buf, info->list[i])); + }); + } + } + } + if (got < info->len) + break; + } + } + return FALSE; +} + +int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root) +{ + struct xarecover_st info; + DBUG_ENTER("ha_recover"); + info.found_foreign_xids= info.found_my_xids= 0; + info.commit_list= commit_list; + info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); + info.list= NULL; + info.mem_root= arg_mem_root; + info.error= false; + + /* commit_list and tc_heuristic_recover cannot be set both */ + DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); + /* if either is set, total_ha_2pc must be set too */ + DBUG_ASSERT(info.dry_run || + (failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log); + + if (total_ha_2pc <= (ulong)opt_bin_log) + DBUG_RETURN(0); + + if (info.commit_list) + sql_print_information("Starting table crash recovery..."); + + for (info.len= MAX_XID_LIST_SIZE ; + info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2) + { + DBUG_EXECUTE_IF("min_xa_len", info.len = 16;); + info.list=(XID *)my_malloc(key_memory_XID, info.len*sizeof(XID), MYF(0)); + } + if (!info.list) + { + sql_print_error(ER(ER_OUTOFMEMORY), + static_cast(info.len*sizeof(XID))); + DBUG_RETURN(1); + } + + plugin_foreach(NULL, xarecover_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &info); + + my_free(info.list); + if (info.found_foreign_xids) + sql_print_warning("Found %d prepared XA transactions", + info.found_foreign_xids); + if (info.dry_run && info.found_my_xids) + { + sql_print_error("Found %d prepared transactions! It means that server was " + "not shut down properly last time and critical recovery " + "information (last binlog or %s file) was manually deleted " + "after a crash. You have to start server with " + "--tc-heuristic-recover switch to commit or rollback " + "pending transactions.", + info.found_my_xids, opt_tc_log_file); + DBUG_RETURN(1); + } + if (info.error) + DBUG_RETURN(1); + + if (info.commit_list) + sql_print_information("Crash table recovery finished."); + DBUG_RETURN(0); +} + + +/* + Called by engine to notify TC that a new commit checkpoint has been reached. + See comments on handlerton method commit_checkpoint_request() for details. +*/ +void commit_checkpoint_notify_ha(void *cookie) +{ + tc_log->commit_checkpoint_notify(cookie); +} + + +/** + Check if all storage engines used in transaction agree that after + rollback to savepoint it is safe to release MDL locks acquired after + savepoint creation. + + @param thd The client thread that executes the transaction. + + @return true - It is safe to release MDL locks. + false - If it is not. +*/ +bool ha_rollback_to_savepoint_can_release_mdl(THD *thd) +{ + Ha_trx_info *ha_info; + THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt : + &thd->transaction->all); + + DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl"); + + /** + Checking whether it is safe to release metadata locks after rollback to + savepoint in all the storage engines that are part of the transaction. + */ + for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next()) + { + handlerton *ht= ha_info->ht(); + DBUG_ASSERT(ht); + + if (ht->savepoint_rollback_can_release_mdl == 0 || + ht->savepoint_rollback_can_release_mdl(ht, thd) == false) + DBUG_RETURN(false); + } + + DBUG_RETURN(true); +} + +int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) +{ + int error=0; + THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt : + &thd->transaction->all); + Ha_trx_info *ha_info, *ha_info_next; + + DBUG_ENTER("ha_rollback_to_savepoint"); + + trans->no_2pc=0; + /* + rolling back to savepoint in all storage engines that were part of the + transaction when the savepoint was set + */ + for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next()) + { + int err; + handlerton *ht= ha_info->ht(); + DBUG_ASSERT(ht); + DBUG_ASSERT(ht->savepoint_set != 0); + if ((err= ht->savepoint_rollback(ht, thd, + (uchar *)(sv+1)+ht->savepoint_offset))) + { // cannot happen + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); + error=1; + } + status_var_increment(thd->status_var.ha_savepoint_rollback_count); + trans->no_2pc|= ht->prepare == 0; + } + /* + rolling back the transaction in all storage engines that were not part of + the transaction when the savepoint was set + */ + for (ha_info= trans->ha_list; ha_info != sv->ha_list; + ha_info= ha_info_next) + { + int err; + handlerton *ht= ha_info->ht(); +#ifdef WITH_WSREP + if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION) + { + WSREP_DEBUG("ha_rollback_to_savepoint: run before_rollbackha_rollback_trans hook"); + (void) wsrep_before_rollback(thd, !thd->in_sub_stmt); + + } +#endif // WITH_WSREP + if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt))) + { // cannot happen + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); + error=1; + } +#ifdef WITH_WSREP + if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION) + { + WSREP_DEBUG("ha_rollback_to_savepoint: run after_rollback hook"); + (void) wsrep_after_rollback(thd, !thd->in_sub_stmt); + } +#endif // WITH_WSREP + status_var_increment(thd->status_var.ha_rollback_count); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + trans->ha_list= sv->ha_list; + + if (thd->m_transaction_psi != NULL) + MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1); + + DBUG_RETURN(error); +} + +/** + @note + according to the sql standard (ISO/IEC 9075-2:2003) + section "4.33.4 SQL-statements and transaction states", + SAVEPOINT is *not* transaction-initiating SQL-statement +*/ +int ha_savepoint(THD *thd, SAVEPOINT *sv) +{ +#ifdef WITH_WSREP + /* + Register binlog hton for savepoint processing if wsrep binlog + emulation is on. + */ + if (WSREP_EMULATE_BINLOG(thd) && wsrep_thd_is_local(thd)) + { + wsrep_register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode()); + } +#endif /* WITH_WSREP */ + int error=0; + THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt : + &thd->transaction->all); + Ha_trx_info *ha_info= trans->ha_list; + DBUG_ENTER("ha_savepoint"); + + for (; ha_info; ha_info= ha_info->next()) + { + int err; + handlerton *ht= ha_info->ht(); + DBUG_ASSERT(ht); + if (! ht->savepoint_set) + { + my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT"); + error=1; + break; + } + if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset))) + { // cannot happen + my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str); + error=1; + } + status_var_increment(thd->status_var.ha_savepoint_count); + } + /* + Remember the list of registered storage engines. All new + engines are prepended to the beginning of the list. + */ + sv->ha_list= trans->ha_list; + + if (!error && thd->m_transaction_psi != NULL) + MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1); + + DBUG_RETURN(error); +} + +int ha_release_savepoint(THD *thd, SAVEPOINT *sv) +{ + int error=0; + Ha_trx_info *ha_info= sv->ha_list; + DBUG_ENTER("ha_release_savepoint"); + + for (; ha_info; ha_info= ha_info->next()) + { + int err; + handlerton *ht= ha_info->ht(); + /* Savepoint life time is enclosed into transaction life time. */ + DBUG_ASSERT(ht); + if (!ht->savepoint_release) + continue; + if ((err= ht->savepoint_release(ht, thd, + (uchar *)(sv+1) + ht->savepoint_offset))) + { // cannot happen + my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str); + error=1; + } + } + + if (thd->m_transaction_psi != NULL) + MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1); + + DBUG_RETURN(error); +} + + +static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + if (hton->start_consistent_snapshot) + { + if (hton->start_consistent_snapshot(hton, thd)) + return TRUE; + *((bool *)arg)= false; + } + return FALSE; +} + +int ha_start_consistent_snapshot(THD *thd) +{ + bool err, warn= true; + + /* + Holding the LOCK_commit_ordered mutex ensures that we get the same + snapshot for all engines (including the binary log). This allows us + among other things to do backups with + START TRANSACTION WITH CONSISTENT SNAPSHOT and + have a consistent binlog position. + */ + mysql_mutex_lock(&LOCK_commit_ordered); + err= plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn); + mysql_mutex_unlock(&LOCK_commit_ordered); + + if (err) + { + ha_rollback_trans(thd, true); + return 1; + } + + /* + Same idea as when one wants to CREATE TABLE in one engine which does not + exist: + */ + if (warn) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "This MariaDB server does not support any " + "consistent-read capable storage engine"); + return 0; +} + + +static my_bool flush_handlerton(THD *thd, plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + return hton->flush_logs && hton->flush_logs(hton); +} + + +bool ha_flush_logs() +{ + return plugin_foreach(NULL, flush_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, 0); +} + + +/** + @brief make canonical filename + + @param[in] file table handler + @param[in] path original path + @param[out] tmp_path buffer for canonized path + + @details Lower case db name and table name path parts for + non file based tables when lower_case_table_names + is 2 (store as is, compare in lower case). + Filesystem path prefix (mysql_data_home or tmpdir) + is left intact. + + @note tmp_path may be left intact if no conversion was + performed. + + @retval canonized path + + @todo This may be done more efficiently when table path + gets built. Convert this function to something like + ASSERT_CANONICAL_FILENAME. +*/ +const char *get_canonical_filename(handler *file, const char *path, + char *tmp_path) +{ + uint i; + if (!file->needs_lower_case_filenames()) + return path; + + for (i= 0; i <= mysql_tmpdir_list.max; i++) + { + if (is_prefix(path, mysql_tmpdir_list.list[i])) + return path; + } + + /* Ensure that table handler get path in lower case */ + if (tmp_path != path) + strmov(tmp_path, path); + + /* + we only should turn into lowercase database/table part + so start the process after homedirectory + */ + my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len); + return tmp_path; +} + + +/** + Delete a table in the engine + + @return 0 Table was deleted + @return -1 Table didn't exists, no error given + @return # Error from table handler + + @note + ENOENT and HA_ERR_NO_SUCH_TABLE are not considered errors. + The .frm file should be deleted by the caller only if we return <= 0. +*/ + +int ha_delete_table(THD *thd, handlerton *hton, const char *path, + const LEX_CSTRING *db, const LEX_CSTRING *alias, + bool generate_warning) +{ + int error; + bool is_error= thd->is_error(); + DBUG_ENTER("ha_delete_table"); + + /* hton is NULL in ALTER TABLE when renaming only .frm files */ + if (hton == NULL || hton == view_pseudo_hton) + DBUG_RETURN(0); + + if (ha_check_if_updates_are_ignored(thd, hton, "DROP")) + DBUG_RETURN(0); + + error= hton->drop_table(hton, path); + if (error > 0) + { + /* + It's not an error if the table doesn't exist in the engine. + warn the user, but still report DROP being a success + */ + bool intercept= non_existing_table_error(error); + + if ((!intercept || generate_warning) && ! thd->is_error()) + { + TABLE dummy_table; + TABLE_SHARE dummy_share; + handler *file= get_new_handler(nullptr, thd->mem_root, hton); + if (file) { + bzero((char*) &dummy_table, sizeof(dummy_table)); + bzero((char*) &dummy_share, sizeof(dummy_share)); + dummy_share.path.str= (char*) path; + dummy_share.path.length= strlen(path); + dummy_share.normalized_path= dummy_share.path; + dummy_share.db= *db; + dummy_share.table_name= *alias; + dummy_table.s= &dummy_share; + dummy_table.alias.set(alias->str, alias->length, table_alias_charset); + file->change_table_ptr(&dummy_table, &dummy_share); + file->print_error(error, MYF(intercept ? ME_WARNING : 0)); + delete file; + } + } + if (intercept) + { + /* Clear error if we got it in this function */ + if (!is_error) + thd->clear_error(); + error= -1; + } + } + if (error) + DBUG_PRINT("exit", ("error: %d", error)); + DBUG_RETURN(error); +} + +/**************************************************************************** +** General handler functions +****************************************************************************/ + + +/** + Clone a handler + + @param name name of new table instance + @param mem_root Where 'this->ref' should be allocated. It can't be + in this->table->mem_root as otherwise we will not be + able to reclaim that memory when the clone handler + object is destroyed. +*/ + +handler *handler::clone(const char *name, MEM_ROOT *mem_root) +{ + handler *new_handler= get_new_handler(table->s, mem_root, ht); + + if (!new_handler) + return NULL; + if (new_handler->set_ha_share_ref(ha_share)) + goto err; + + /* + TODO: Implement a more efficient way to have more than one index open for + the same table instance. The ha_open call is not cacheable for clone. + + This is not critical as the engines already have the table open + and should be able to use the original instance of the table. + */ + if (new_handler->ha_open(table, name, table->db_stat, + HA_OPEN_IGNORE_IF_LOCKED, mem_root)) + goto err; + new_handler->handler_stats= handler_stats; + + return new_handler; + +err: + delete new_handler; + return NULL; +} + + +/** + clone of current handler. + + Creates a clone of handler used for unique hash key and WITHOUT OVERLAPS. + @return error code +*/ +int handler::create_lookup_handler() +{ + handler *tmp; + if (lookup_handler != this) + return 0; + if (!(tmp= clone(table->s->normalized_path.str, table->in_use->mem_root))) + return 1; + lookup_handler= tmp; + return lookup_handler->ha_external_lock(table->in_use, F_RDLCK); +} + +LEX_CSTRING *handler::engine_name() +{ + return hton_name(ht); +} + + +/* + It is assumed that the value of the parameter 'ranges' can be only 0 or 1. + If ranges == 1 then the function returns the cost of index only scan + by index 'keyno' of one range containing 'rows' key entries. + If ranges == 0 then the function returns only the cost of copying + those key entries into the engine buffers. +*/ + +double handler::keyread_time(uint index, uint ranges, ha_rows rows) +{ + DBUG_ASSERT(ranges == 0 || ranges == 1); + size_t len= table->key_info[index].key_length + ref_length; + if (table->file->is_clustering_key(index)) + len= table->s->stored_rec_length; + double cost= (double)rows*len/(stats.block_size+1)*IDX_BLOCK_COPY_COST; + if (ranges) + { + uint keys_per_block= (uint) (stats.block_size*3/4/len+1); + ulonglong blocks= (rows+ keys_per_block- 1)/keys_per_block; + cost+= blocks; + } + return cost; +} + + +THD *handler::ha_thd(void) const +{ + DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd); + return (table && table->in_use) ? table->in_use : current_thd; +} + +void handler::unbind_psi() +{ + /* + Notify the instrumentation that this table is not owned + by this thread any more. + */ + PSI_CALL_unbind_table(m_psi); +} + +void handler::rebind_psi() +{ + /* + Notify the instrumentation that this table is now owned + by this thread. + */ + m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi); +} + + +void handler::start_psi_batch_mode() +{ +#ifdef HAVE_PSI_TABLE_INTERFACE + DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE); + DBUG_ASSERT(m_psi_locker == NULL); + m_psi_batch_mode= PSI_BATCH_MODE_STARTING; + m_psi_numrows= 0; +#endif +} + +void handler::end_psi_batch_mode() +{ +#ifdef HAVE_PSI_TABLE_INTERFACE + DBUG_ASSERT(m_psi_batch_mode != PSI_BATCH_MODE_NONE); + if (m_psi_locker != NULL) + { + DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED); + PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows); + m_psi_locker= NULL; + } + m_psi_batch_mode= PSI_BATCH_MODE_NONE; +#endif +} + +PSI_table_share *handler::ha_table_share_psi() const +{ + return table_share->m_psi; +} + +/** @brief + Open database-handler. + + IMPLEMENTATION + Try O_RDONLY if cannot open as O_RDWR + Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set +*/ +int handler::ha_open(TABLE *table_arg, const char *name, int mode, + uint test_if_locked, MEM_ROOT *mem_root, + List *partitions_to_open) +{ + int error; + DBUG_ENTER("handler::ha_open"); + DBUG_PRINT("enter", + ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", + name, ht->db_type, table_arg->db_stat, mode, + test_if_locked)); + + table= table_arg; + DBUG_ASSERT(table->s == table_share); + DBUG_ASSERT(m_lock_type == F_UNLCK); + DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK)); + DBUG_ASSERT(alloc_root_inited(&table->mem_root)); + + set_partitions_to_open(partitions_to_open); + + if (unlikely((error=open(name,mode,test_if_locked)))) + { + if ((error == EACCES || error == EROFS) && mode == O_RDWR && + (table->db_stat & HA_TRY_READ_ONLY)) + { + table->db_stat|=HA_READ_ONLY; + error=open(name,O_RDONLY,test_if_locked); + } + } + if (unlikely(error)) + { + my_errno= error; /* Safeguard */ + DBUG_PRINT("error",("error: %d errno: %d",error,errno)); + } + else + { + DBUG_ASSERT(m_psi == NULL); + DBUG_ASSERT(table_share != NULL); + /* + Do not call this for partitions handlers, since it may take too much + resources. + So only use the m_psi on table level, not for individual partitions. + */ + if (!(test_if_locked & HA_OPEN_NO_PSI_CALL)) + { + m_psi= PSI_CALL_open_table(ha_table_share_psi(), this); + } + + if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA) + table->db_stat|=HA_READ_ONLY; + (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL + + /* Allocate ref in thd or on the table's mem_root */ + if (!(ref= (uchar*) alloc_root(mem_root ? mem_root : &table->mem_root, + ALIGN_SIZE(ref_length)*2))) + { + ha_close(); + error=HA_ERR_OUT_OF_MEM; + } + else + dup_ref=ref+ALIGN_SIZE(ref_length); + cached_table_flags= table_flags(); + } + reset_statistics(); + internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE); + DBUG_RETURN(error); +} + +int handler::ha_close(void) +{ + DBUG_ENTER("ha_close"); + /* + Increment global statistics for temporary tables. + In_use is 0 for tables that was closed from the table cache. + */ + if (table->in_use) + status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read); + PSI_CALL_close_table(table_share, m_psi); + m_psi= NULL; /* instrumentation handle, invalid after close_table() */ + DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE); + DBUG_ASSERT(m_psi_locker == NULL); + + /* Detach from ANALYZE tracker */ + tracker= NULL; + /* We use ref as way to check that open succeded */ + ref= 0; + + DBUG_ASSERT(m_lock_type == F_UNLCK); + DBUG_ASSERT(inited == NONE); + DBUG_RETURN(close()); +} + + +int handler::ha_rnd_next(uchar *buf) +{ + int result; + DBUG_ENTER("handler::ha_rnd_next"); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited == RND); + + do + { + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, MAX_KEY, result, + { result= rnd_next(buf); }) + if (result != HA_ERR_RECORD_DELETED) + break; + status_var_increment(table->in_use->status_var.ha_read_rnd_deleted_count); + } while (!table->in_use->check_killed(1)); + + if (result == HA_ERR_RECORD_DELETED) + result= HA_ERR_ABORTED_BY_USER; + else + { + if (!result) + { + update_rows_read(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + increment_statistics(&SSV::ha_read_rnd_next_count); + } + + table->status=result ? STATUS_NOT_FOUND: 0; + DBUG_RETURN(result); +} + +int handler::ha_rnd_pos(uchar *buf, uchar *pos) +{ + int result; + DBUG_ENTER("handler::ha_rnd_pos"); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited == RND); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, MAX_KEY, result, + { result= rnd_pos(buf, pos); }) + increment_statistics(&SSV::ha_read_rnd_count); + if (result == HA_ERR_RECORD_DELETED) + result= HA_ERR_KEY_NOT_FOUND; + else if (!result) + { + update_rows_read(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + DBUG_RETURN(result); +} + +int handler::ha_index_read_map(uchar *buf, const uchar *key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) +{ + int result; + DBUG_ENTER("handler::ha_index_read_map"); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited==INDEX); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result, + { result= index_read_map(buf, key, keypart_map, find_flag); }) + increment_statistics(&SSV::ha_read_key_count); + if (!result) + { + update_index_statistics(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + DBUG_RETURN(result); +} + +/* + @note: Other index lookup/navigation functions require prior + handler->index_init() call. This function is different, it requires + that the scan is not initialized, and accepts "uint index" as an argument. +*/ + +int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) +{ + int result; + DBUG_ASSERT(inited==NONE); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(end_range == NULL); + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, index, result, + { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); }) + increment_statistics(&SSV::ha_read_key_count); + if (!result) + { + update_rows_read(); + index_rows_read[index]++; + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + return result; +} + +int handler::ha_index_next(uchar * buf) +{ + int result; + DBUG_ENTER("handler::ha_index_next"); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited==INDEX); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result, + { result= index_next(buf); }) + increment_statistics(&SSV::ha_read_next_count); + if (!result) + { + update_index_statistics(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + + DEBUG_SYNC(ha_thd(), "handler_ha_index_next_end"); + + DBUG_RETURN(result); +} + +int handler::ha_index_prev(uchar * buf) +{ + int result; + DBUG_ENTER("handler::ha_index_prev"); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited==INDEX); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result, + { result= index_prev(buf); }) + increment_statistics(&SSV::ha_read_prev_count); + if (!result) + { + update_index_statistics(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + DBUG_RETURN(result); +} + +int handler::ha_index_first(uchar * buf) +{ + int result; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited==INDEX); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result, + { result= index_first(buf); }) + increment_statistics(&SSV::ha_read_first_count); + if (!result) + { + update_index_statistics(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + return result; +} + +int handler::ha_index_last(uchar * buf) +{ + int result; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited==INDEX); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result, + { result= index_last(buf); }) + increment_statistics(&SSV::ha_read_last_count); + if (!result) + { + update_index_statistics(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + return result; +} + +int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen) +{ + int result; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + DBUG_ASSERT(inited==INDEX); + + TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result, + { result= index_next_same(buf, key, keylen); }) + increment_statistics(&SSV::ha_read_next_count); + if (!result) + { + update_index_statistics(); + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + table->status=result ? STATUS_NOT_FOUND: 0; + return result; +} + + +bool handler::ha_was_semi_consistent_read() +{ + bool result= was_semi_consistent_read(); + if (result) + increment_statistics(&SSV::ha_read_retry_count); + return result; +} + +/* Initialize handler for random reading, with error handling */ + +int handler::ha_rnd_init_with_error(bool scan) +{ + int error; + if (likely(!(error= ha_rnd_init(scan)))) + return 0; + table->file->print_error(error, MYF(0)); + return error; +} + + +/** + Read first row (only) from a table. Used for reading tables with + only one row, either based on table statistics or if table is a SEQUENCE. + + This is never called for normal InnoDB tables, as these table types + does not have HA_STATS_RECORDS_IS_EXACT set. +*/ +int handler::read_first_row(uchar * buf, uint primary_key) +{ + int error; + DBUG_ENTER("handler::read_first_row"); + + /* + If there is very few deleted rows in the table, find the first row by + scanning the table. + TODO remove the test for HA_READ_ORDER + */ + if (stats.deleted < 10 || primary_key >= MAX_KEY || + !(index_flags(primary_key, 0, 0) & HA_READ_ORDER)) + { + if (likely(!(error= ha_rnd_init(1)))) + { + error= ha_rnd_next(buf); + const int end_error= ha_rnd_end(); + if (likely(!error)) + error= end_error; + } + } + else + { + /* Find the first row through the primary key */ + if (likely(!(error= ha_index_init(primary_key, 0)))) + { + error= ha_index_first(buf); + const int end_error= ha_index_end(); + if (likely(!error)) + error= end_error; + } + } + DBUG_RETURN(error); +} + +/** + Generate the next auto-increment number based on increment and offset. + computes the lowest number + - strictly greater than "nr" + - of the form: auto_increment_offset + N * auto_increment_increment + If overflow happened then return MAX_ULONGLONG value as an + indication of overflow. + In most cases increment= offset= 1, in which case we get: + @verbatim 1,2,3,4,5,... @endverbatim + If increment=10 and offset=5 and previous number is 1, we get: + @verbatim 1,5,15,25,35,... @endverbatim +*/ +inline ulonglong +compute_next_insert_id(ulonglong nr,struct system_variables *variables) +{ + const ulonglong save_nr= nr; + + if (variables->auto_increment_increment == 1) + nr= nr + 1; // optimization of the formula below + else + { + /* + Calculating the number of complete auto_increment_increment extents: + */ + nr= (nr + variables->auto_increment_increment - + variables->auto_increment_offset) / + (ulonglong) variables->auto_increment_increment; + /* + Adding an offset to the auto_increment_increment extent boundary: + */ + nr= nr * (ulonglong) variables->auto_increment_increment + + variables->auto_increment_offset; + } + + if (unlikely(nr <= save_nr)) + return ULONGLONG_MAX; + + return nr; +} + + +void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr) +{ + /* + If we have set THD::next_insert_id previously and plan to insert an + explicitly-specified value larger than this, we need to increase + THD::next_insert_id to be greater than the explicit value. + */ + if ((next_insert_id > 0) && (nr >= next_insert_id)) + set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables)); +} + + +/** @brief + Computes the largest number X: + - smaller than or equal to "nr" + - of the form: auto_increment_offset + N * auto_increment_increment + where N>=0. + + SYNOPSIS + prev_insert_id + nr Number to "round down" + variables variables struct containing auto_increment_increment and + auto_increment_offset + + RETURN + The number X if it exists, "nr" otherwise. +*/ +inline ulonglong +prev_insert_id(ulonglong nr, struct system_variables *variables) +{ + if (unlikely(nr < variables->auto_increment_offset)) + { + /* + There's nothing good we can do here. That is a pathological case, where + the offset is larger than the column's max possible value, i.e. not even + the first sequence value may be inserted. User will receive warning. + */ + DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour " + "auto_increment_offset: %lu", + (ulong) nr, variables->auto_increment_offset)); + return nr; + } + if (variables->auto_increment_increment == 1) + return nr; // optimization of the formula below + /* + Calculating the number of complete auto_increment_increment extents: + */ + nr= (nr - variables->auto_increment_offset) / + (ulonglong) variables->auto_increment_increment; + /* + Adding an offset to the auto_increment_increment extent boundary: + */ + return (nr * (ulonglong) variables->auto_increment_increment + + variables->auto_increment_offset); +} + + +/** + Update the auto_increment field if necessary. + + Updates columns with type NEXT_NUMBER if: + + - If column value is set to NULL (in which case + auto_increment_field_not_null is 0) + - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not + set. In the future we will only set NEXT_NUMBER fields if one sets them + to NULL (or they are not included in the insert list). + + In those cases, we check if the currently reserved interval still has + values we have not used. If yes, we pick the smallest one and use it. + Otherwise: + + - If a list of intervals has been provided to the statement via SET + INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the + first unused interval from this list, consider it as reserved. + + - Otherwise we set the column for the first row to the value + next_insert_id(get_auto_increment(column))) which is usually + max-used-column-value+1. + We call get_auto_increment() for the first row in a multi-row + statement. get_auto_increment() will tell us the interval of values it + reserved for us. + + - In both cases, for the following rows we use those reserved values without + calling the handler again (we just progress in the interval, computing + each new value from the previous one). Until we have exhausted them, then + we either take the next provided interval or call get_auto_increment() + again to reserve a new interval. + + - In both cases, the reserved intervals are remembered in + thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based + binlogging; the last reserved interval is remembered in + auto_inc_interval_for_cur_row. The number of reserved intervals is + remembered in auto_inc_intervals_count. It differs from the number of + elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the + latter list is cumulative over all statements forming one binlog event + (when stored functions and triggers are used), and collapses two + contiguous intervals in one (see its append() method). + + The idea is that generated auto_increment values are predictable and + independent of the column values in the table. This is needed to be + able to replicate into a table that already has rows with a higher + auto-increment value than the one that is inserted. + + After we have already generated an auto-increment number and the user + inserts a column with a higher value than the last used one, we will + start counting from the inserted value. + + This function's "outputs" are: the table's auto_increment field is filled + with a value, thd->next_insert_id is filled with the value to use for the + next row, if a value was autogenerated for the current row it is stored in + thd->insert_id_for_cur_row, if get_auto_increment() was called + thd->auto_inc_interval_for_cur_row is modified, if that interval is not + present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to + this list. + + @todo + Replace all references to "next number" or NEXT_NUMBER to + "auto_increment", everywhere (see below: there is + table->auto_increment_field_not_null, and there also exists + table->next_number_field, it's not consistent). + + @retval + 0 ok + @retval + HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and + returned ~(ulonglong) 0 + @retval + HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode + failure. +*/ + +#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here +#define AUTO_INC_DEFAULT_NB_MAX_BITS 16 +#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1) + +int handler::update_auto_increment() +{ + ulonglong nr, nb_reserved_values; + bool append= FALSE; + THD *thd= table->in_use; + struct system_variables *variables= &thd->variables; + int result=0, tmp; + DBUG_ENTER("handler::update_auto_increment"); + + /* + next_insert_id is a "cursor" into the reserved interval, it may go greater + than the interval, but not smaller. + */ + DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum()); + + if ((nr= table->next_number_field->val_int()) != 0 || + (table->auto_increment_field_not_null && + thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)) + { + + /* + There could be an error reported because value was truncated + when strict mode is enabled. + */ + if (thd->is_error()) + DBUG_RETURN(HA_ERR_AUTOINC_ERANGE); + /* + Update next_insert_id if we had already generated a value in this + statement (case of INSERT VALUES(null),(3763),(null): + the last NULL needs to insert 3764, not the value of the first NULL plus + 1). + Ignore negative values. + */ + if ((longlong) nr > 0 || (table->next_number_field->flags & UNSIGNED_FLAG)) + adjust_next_insert_id_after_explicit_value(nr); + insert_id_for_cur_row= 0; // didn't generate anything + DBUG_RETURN(0); + } + + if (table->versioned()) + { + Field *end= table->vers_end_field(); + DBUG_ASSERT(end); + bitmap_set_bit(table->read_set, end->field_index); + if (!end->is_max()) + { + if (thd->lex->sql_command == SQLCOM_ALTER_TABLE) + { + if (!table->next_number_field->real_maybe_null()) + DBUG_RETURN(HA_ERR_UNSUPPORTED); + table->next_number_field->set_null(); + } + DBUG_RETURN(0); + } + } + + // ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT + if (thd->lex->sql_command == SQLCOM_ALTER_TABLE) + table->next_number_field->set_notnull(); + + if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum()) + { + /* next_insert_id is beyond what is reserved, so we reserve more. */ + const Discrete_interval *forced= + thd->auto_inc_intervals_forced.get_next(); + if (forced != NULL) + { + nr= forced->minimum(); + nb_reserved_values= forced->values(); + } + else + { + /* + handler::estimation_rows_to_insert was set by + handler::ha_start_bulk_insert(); if 0 it means "unknown". + */ + ulonglong nb_desired_values; + /* + If an estimation was given to the engine: + - use it. + - if we already reserved numbers, it means the estimation was + not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd + time, twice that the 3rd time etc. + If no estimation was given, use those increasing defaults from the + start, starting from AUTO_INC_DEFAULT_NB_ROWS. + Don't go beyond a max to not reserve "way too much" (because + reservation means potentially losing unused values). + Note that in prelocked mode no estimation is given. + */ + + if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0)) + nb_desired_values= estimation_rows_to_insert; + else if ((auto_inc_intervals_count == 0) && + (thd->lex->many_values.elements > 0)) + { + /* + For multi-row inserts, if the bulk inserts cannot be started, the + handler::estimation_rows_to_insert will not be set. But we still + want to reserve the autoinc values. + */ + nb_desired_values= thd->lex->many_values.elements; + } + else /* go with the increasing defaults */ + { + /* avoid overflow in formula, with this if() */ + if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS) + { + nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS * + (1 << auto_inc_intervals_count); + set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX); + } + else + nb_desired_values= AUTO_INC_DEFAULT_NB_MAX; + } + get_auto_increment(variables->auto_increment_offset, + variables->auto_increment_increment, + nb_desired_values, &nr, + &nb_reserved_values); + if (nr == ULONGLONG_MAX) + DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure + + /* + That rounding below should not be needed when all engines actually + respect offset and increment in get_auto_increment(). But they don't + so we still do it. Wonder if for the not-first-in-index we should do + it. Hope that this rounding didn't push us out of the interval; even + if it did we cannot do anything about it (calling the engine again + will not help as we inserted no row). + */ + nr= compute_next_insert_id(nr-1, variables); + } + + if (table->s->next_number_keypart == 0) + { + /* We must defer the appending until "nr" has been possibly truncated */ + append= TRUE; + } + else + { + /* + For such auto_increment there is no notion of interval, just a + singleton. The interval is not even stored in + thd->auto_inc_interval_for_cur_row, so we are sure to call the engine + for next row. + */ + DBUG_PRINT("info",("auto_increment: special not-first-in-index")); + } + } + + if (unlikely(nr == ULONGLONG_MAX)) + DBUG_RETURN(HA_ERR_AUTOINC_ERANGE); + + DBUG_ASSERT(nr != 0); + DBUG_PRINT("info",("auto_increment: %llu nb_reserved_values: %llu", + nr, append ? nb_reserved_values : 0)); + + /* Store field without warning (Warning will be printed by insert) */ + { + Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE); + tmp= table->next_number_field->store((longlong)nr, TRUE); + } + + if (unlikely(tmp)) // Out of range value in store + { + /* + First, test if the query was aborted due to strict mode constraints + or new field value greater than maximum integer value: + */ + if (thd->killed == KILL_BAD_DATA || + nr > table->next_number_field->get_max_int_value()) + { + /* + It's better to return an error here than getting a confusing + 'duplicate key error' later. + */ + result= HA_ERR_AUTOINC_ERANGE; + } + else + { + /* + Field refused this value (overflow) and truncated it, use the result + of the truncation (which is going to be inserted); however we try to + decrease it to honour auto_increment_* variables. + That will shift the left bound of the reserved interval, we don't + bother shifting the right bound (anyway any other value from this + interval will cause a duplicate key). + */ + nr= prev_insert_id(table->next_number_field->val_int(), variables); + if (unlikely(table->next_number_field->store((longlong)nr, TRUE))) + nr= table->next_number_field->val_int(); + } + } + if (append) + { + auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values, + variables->auto_increment_increment); + auto_inc_intervals_count++; + /* Row-based replication does not need to store intervals in binlog */ + if (((WSREP_NNULL(thd) && wsrep_emulate_bin_log) || + mysql_bin_log.is_open()) && + !thd->is_current_stmt_binlog_format_row()) + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + append(auto_inc_interval_for_cur_row.minimum(), + auto_inc_interval_for_cur_row.values(), + variables->auto_increment_increment); + } + + /* + Record this autogenerated value. If the caller then + succeeds to insert this value, it will call + record_first_successful_insert_id_in_cur_stmt() + which will set first_successful_insert_id_in_cur_stmt if it's not + already set. + */ + insert_id_for_cur_row= nr; + + if (result) // overflow + DBUG_RETURN(result); + + /* + Set next insert id to point to next auto-increment value to be able to + handle multi-row statements. + */ + set_next_insert_id(compute_next_insert_id(nr, variables)); + + DBUG_RETURN(0); +} + + +/** @brief + MySQL signal that it changed the column bitmap + + USAGE + This is for handlers that needs to setup their own column bitmaps. + Normally the handler should set up their own column bitmaps in + index_init() or rnd_init() and in any column_bitmaps_signal() call after + this. + + The handler is allowed to do changes to the bitmap after a index_init or + rnd_init() call is made as after this, MySQL will not use the bitmap + for any program logic checking. +*/ +void handler::column_bitmaps_signal() +{ + DBUG_ENTER("column_bitmaps_signal"); + if (table) + DBUG_PRINT("info", ("read_set: %p write_set: %p", + table->read_set, table->write_set)); + DBUG_VOID_RETURN; +} + + +/** @brief + Reserves an interval of auto_increment values from the handler. + + SYNOPSIS + get_auto_increment() + offset + increment + nb_desired_values how many values we want + first_value (OUT) the first value reserved by the handler + nb_reserved_values (OUT) how many values the handler reserved + + offset and increment means that we want values to be of the form + offset + N * increment, where N>=0 is integer. + If the function sets *first_value to ~(ulonglong)0 it means an error. + If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has + reserved to "positive infinite". +*/ +void handler::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ + ulonglong nr; + int error; + MY_BITMAP *old_read_set; + bool rnd_inited= (inited == RND); + bool rev= table->key_info[table->s->next_number_index]. + key_part[table->s->next_number_keypart].key_part_flag & + HA_REVERSE_SORT; + + if (rnd_inited && ha_rnd_end()) + return; + + old_read_set= table->prepare_for_keyread(table->s->next_number_index); + + if (ha_index_init(table->s->next_number_index, 1)) + { + /* This should never happen, assert in debug, and fail in release build */ + DBUG_ASSERT(0); + (void) extra(HA_EXTRA_NO_KEYREAD); + *first_value= ULONGLONG_MAX; + if (rnd_inited && ha_rnd_init_with_error(0)) + { + //TODO: it would be nice to return here an error + } + return; + } + + if (table->s->next_number_keypart == 0) + { // Autoincrement at key-start + error= rev ? ha_index_first(table->record[1]) + : ha_index_last(table->record[1]); + /* + MySQL implicitly assumes such method does locking (as MySQL decides to + use nr+increment without checking again with the handler, in + handler::update_auto_increment()), so reserves to infinite. + */ + *nb_reserved_values= ULONGLONG_MAX; + } + else + { + uchar key[MAX_KEY_LENGTH]; + key_copy(key, table->record[0], + table->key_info + table->s->next_number_index, + table->s->next_number_key_offset); + error= ha_index_read_map(table->record[1], key, + make_prev_keypart_map(table->s->next_number_keypart), + rev ? HA_READ_KEY_EXACT : HA_READ_PREFIX_LAST); + /* + MySQL needs to call us for next row: assume we are inserting ("a",null) + here, we return 3, and next this statement will want to insert + ("b",null): there is no reason why ("b",3+1) would be the good row to + insert: maybe it already exists, maybe 3+1 is too large... + */ + *nb_reserved_values= 1; + } + + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + /* No entry found, that's fine */; + else + print_error(error, MYF(0)); + nr= 1; + } + else + nr= ((ulonglong) table->next_number_field-> + val_int_offset(table->s->rec_buff_length)+1); + ha_index_end(); + table->restore_column_maps_after_keyread(old_read_set); + *first_value= nr; + if (rnd_inited && ha_rnd_init_with_error(0)) + { + //TODO: it would be nice to return here an error + } + return; +} + + +void handler::ha_release_auto_increment() +{ + DBUG_ENTER("ha_release_auto_increment"); + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK || + (!next_insert_id && !insert_id_for_cur_row)); + release_auto_increment(); + insert_id_for_cur_row= 0; + auto_inc_interval_for_cur_row.replace(0, 0, 0); + auto_inc_intervals_count= 0; + if (next_insert_id > 0) + { + next_insert_id= 0; + /* + this statement used forced auto_increment values if there were some, + wipe them away for other statements. + */ + table->in_use->auto_inc_intervals_forced.empty(); + } + DBUG_VOID_RETURN; +} + + +/** + Construct and emit duplicate key error message using information + from table's record buffer. + + @param table TABLE object which record buffer should be used as + source for column values. + @param key Key description. + @param msg Error message template to which key value should be + added. + @param errflag Flags for my_error() call. + + @notes + The error message is from ER_DUP_ENTRY_WITH_KEY_NAME but to keep things compatibly + with old code, the error number is ER_DUP_ENTRY +*/ + +void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag) +{ + /* Write the duplicated key in the error message */ + char key_buff[MAX_KEY_LENGTH]; + String str(key_buff,sizeof(key_buff),system_charset_info); + + if (key == NULL) + { + /* + Key is unknown. Should only happen if storage engine reports wrong + duplicate key number. + */ + my_printf_error(ER_DUP_ENTRY, msg, errflag, "", "*UNKNOWN*"); + } + else + { + if (key->algorithm == HA_KEY_ALG_LONG_HASH) + setup_keyinfo_hash(key); + /* Table is opened and defined at this point */ + key_unpack(&str,table, key); + uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg); + if (str.length() >= max_length) + { + str.length(max_length-4); + str.append(STRING_WITH_LEN("...")); + } + my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), + key->name.str); + if (key->algorithm == HA_KEY_ALG_LONG_HASH) + re_setup_keyinfo_hash(key); + } +} + +/** + Construct and emit duplicate key error message using information + from table's record buffer. + + @sa print_keydup_error(table, key, msg, errflag). +*/ + +void print_keydup_error(TABLE *table, KEY *key, myf errflag) +{ + print_keydup_error(table, key, + ER_THD(table->in_use, ER_DUP_ENTRY_WITH_KEY_NAME), + errflag); +} + +/** + Print error that we got from handler function. + + @note + In case of delete table it's only safe to use the following parts of + the 'table' structure: + - table->s->path + - table->alias +*/ + +#define SET_FATAL_ERROR fatal_error=1 + +void handler::print_error(int error, myf errflag) +{ + bool fatal_error= 0; + DBUG_ENTER("handler::print_error"); + DBUG_PRINT("enter",("error: %d",error)); + + if (ha_thd()->transaction_rollback_request) + { + /* Ensure this becomes a true error */ + errflag&= ~(ME_WARNING | ME_NOTE); + } + + int textno= -1; // impossible value + switch (error) { + case EACCES: + textno=ER_OPEN_AS_READONLY; + break; + case EAGAIN: + textno=ER_FILE_USED; + break; + case ENOENT: + case ENOTDIR: + case ELOOP: + textno=ER_FILE_NOT_FOUND; + break; + case ENOSPC: + case HA_ERR_DISK_FULL: + textno= ER_DISK_FULL; + SET_FATAL_ERROR; // Ensure error is logged + break; + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_NO_ACTIVE_RECORD: + case HA_ERR_RECORD_DELETED: + case HA_ERR_END_OF_FILE: + /* + This errors is not not normally fatal (for example for reads). However + if you get it during an update or delete, then its fatal. + As the user is calling print_error() (which is not done on read), we + assume something when wrong with the update or delete. + */ + SET_FATAL_ERROR; + textno=ER_KEY_NOT_FOUND; + break; + case HA_ERR_ABORTED_BY_USER: + { + DBUG_ASSERT(ha_thd()->killed); + ha_thd()->send_kill_message(); + DBUG_VOID_RETURN; + } + case HA_ERR_WRONG_MRG_TABLE_DEF: + textno=ER_WRONG_MRG_TABLE; + break; + case HA_ERR_FOUND_DUPP_KEY: + { + if (table) + { + uint key_nr=get_dup_key(error); + if ((int) key_nr >= 0 && key_nr < table->s->keys) + { + print_keydup_error(table, &table->key_info[key_nr], errflag); + table->file->lookup_errkey= -1; + DBUG_VOID_RETURN; + } + } + textno=ER_DUP_KEY; + break; + } + case HA_ERR_FOREIGN_DUPLICATE_KEY: + { + char rec_buf[MAX_KEY_LENGTH]; + String rec(rec_buf, sizeof(rec_buf), system_charset_info); + /* Table is opened and defined at this point */ + + /* + Just print the subset of fields that are part of the first index, + printing the whole row from there is not easy. + */ + key_unpack(&rec, table, &table->key_info[0]); + + char child_table_name[NAME_LEN + 1]; + char child_key_name[NAME_LEN + 1]; + if (get_foreign_dup_key(child_table_name, sizeof(child_table_name), + child_key_name, sizeof(child_key_name))) + { + my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag, + table_share->table_name.str, rec.c_ptr_safe(), + child_table_name, child_key_name); + } + else + { + my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag, + table_share->table_name.str, rec.c_ptr_safe()); + } + DBUG_VOID_RETURN; + } + case HA_ERR_NULL_IN_SPATIAL: + my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag); + DBUG_VOID_RETURN; + case HA_ERR_FOUND_DUPP_UNIQUE: + textno=ER_DUP_UNIQUE; + break; + case HA_ERR_RECORD_CHANGED: + /* + This is not fatal error when using HANDLER interface + SET_FATAL_ERROR; + */ + textno=ER_CHECKREAD; + break; + case HA_ERR_CRASHED: + SET_FATAL_ERROR; + textno=ER_NOT_KEYFILE; + break; + case HA_ERR_WRONG_IN_RECORD: + SET_FATAL_ERROR; + textno= ER_CRASHED_ON_USAGE; + break; + case HA_ERR_CRASHED_ON_USAGE: + SET_FATAL_ERROR; + textno=ER_CRASHED_ON_USAGE; + break; + case HA_ERR_NOT_A_TABLE: + textno= error; + break; + case HA_ERR_CRASHED_ON_REPAIR: + SET_FATAL_ERROR; + textno=ER_CRASHED_ON_REPAIR; + break; + case HA_ERR_OUT_OF_MEM: + textno=ER_OUT_OF_RESOURCES; + break; + case HA_ERR_WRONG_COMMAND: + my_error(ER_ILLEGAL_HA, MYF(0), table_type(), table_share->db.str, + table_share->table_name.str); + DBUG_VOID_RETURN; + break; + case HA_ERR_OLD_FILE: + textno=ER_OLD_KEYFILE; + break; + case HA_ERR_UNSUPPORTED: + textno=ER_UNSUPPORTED_EXTENSION; + break; + case HA_ERR_RECORD_FILE_FULL: + { + textno=ER_RECORD_FILE_FULL; + /* Write the error message to error log */ + errflag|= ME_ERROR_LOG; + break; + } + case HA_ERR_INDEX_FILE_FULL: + { + textno=ER_INDEX_FILE_FULL; + /* Write the error message to error log */ + errflag|= ME_ERROR_LOG; + break; + } + case HA_ERR_LOCK_WAIT_TIMEOUT: + textno=ER_LOCK_WAIT_TIMEOUT; + break; + case HA_ERR_LOCK_TABLE_FULL: + textno=ER_LOCK_TABLE_FULL; + break; + case HA_ERR_LOCK_DEADLOCK: + { + String str, full_err_msg(ER_DEFAULT(ER_LOCK_DEADLOCK), + strlen(ER_DEFAULT(ER_LOCK_DEADLOCK)), + system_charset_info); + + get_error_message(error, &str); + full_err_msg.append(str); + my_printf_error(ER_LOCK_DEADLOCK, "%s", errflag, full_err_msg.c_ptr_safe()); + DBUG_VOID_RETURN; + } + case HA_ERR_READ_ONLY_TRANSACTION: + textno=ER_READ_ONLY_TRANSACTION; + break; + case HA_ERR_CANNOT_ADD_FOREIGN: + textno=ER_CANNOT_ADD_FOREIGN; + break; + case HA_ERR_ROW_IS_REFERENCED: + { + String str; + get_error_message(error, &str); + my_printf_error(ER_ROW_IS_REFERENCED_2, + ER(str.length() ? ER_ROW_IS_REFERENCED_2 : ER_ROW_IS_REFERENCED), + errflag, str.c_ptr_safe()); + DBUG_VOID_RETURN; + } + case HA_ERR_NO_REFERENCED_ROW: + { + String str; + get_error_message(error, &str); + my_printf_error(ER_NO_REFERENCED_ROW_2, + ER(str.length() ? ER_NO_REFERENCED_ROW_2 : ER_NO_REFERENCED_ROW), + errflag, str.c_ptr_safe()); + DBUG_VOID_RETURN; + } + case HA_ERR_TABLE_DEF_CHANGED: + textno=ER_TABLE_DEF_CHANGED; + break; + case HA_ERR_NO_SUCH_TABLE: + my_error(ER_NO_SUCH_TABLE_IN_ENGINE, errflag, table_share->db.str, + table_share->table_name.str); + DBUG_VOID_RETURN; + case HA_ERR_RBR_LOGGING_FAILED: + textno= ER_BINLOG_ROW_LOGGING_FAILED; + break; + case HA_ERR_DROP_INDEX_FK: + { + const char *ptr= "???"; + uint key_nr= get_dup_key(error); + if ((int) key_nr >= 0) + ptr= table->key_info[key_nr].name.str; + my_error(ER_DROP_INDEX_FK, errflag, ptr); + DBUG_VOID_RETURN; + } + case HA_ERR_TABLE_NEEDS_UPGRADE: + textno= ER_TABLE_NEEDS_UPGRADE; + my_error(ER_TABLE_NEEDS_UPGRADE, errflag, + "TABLE", table_share->table_name.str); + DBUG_VOID_RETURN; + case HA_ERR_NO_PARTITION_FOUND: + textno=ER_WRONG_PARTITION_NAME; + break; + case HA_ERR_TABLE_READONLY: + textno= ER_OPEN_AS_READONLY; + break; + case HA_ERR_AUTOINC_READ_FAILED: + textno= ER_AUTOINC_READ_FAILED; + break; + case HA_ERR_AUTOINC_ERANGE: + textno= error; + my_error(textno, errflag, table->found_next_number_field->field_name.str, + table->in_use->get_stmt_da()->current_row_for_warning()); + DBUG_VOID_RETURN; + break; + case HA_ERR_TOO_MANY_CONCURRENT_TRXS: + textno= ER_TOO_MANY_CONCURRENT_TRXS; + break; + case HA_ERR_INDEX_COL_TOO_LONG: + textno= ER_INDEX_COLUMN_TOO_LONG; + break; + case HA_ERR_NOT_IN_LOCK_PARTITIONS: + textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET; + break; + case HA_ERR_INDEX_CORRUPT: + textno= ER_INDEX_CORRUPT; + break; + case HA_ERR_UNDO_REC_TOO_BIG: + textno= ER_UNDO_RECORD_TOO_BIG; + break; + case HA_ERR_COMMIT_ERROR: + textno= ER_ERROR_DURING_COMMIT; + break; + case HA_ERR_PARTITION_LIST: + my_error(ER_VERS_NOT_ALLOWED, errflag, table->s->db.str, table->s->table_name.str); + DBUG_VOID_RETURN; + default: + { + /* The error was "unknown" to this function. + Ask handler if it has got a message for this error */ + bool temporary= FALSE; + String str; + temporary= get_error_message(error, &str); + if (!str.is_empty()) + { + const char* engine= table_type(); + if (temporary) + my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(), + engine); + else + { + SET_FATAL_ERROR; + my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine); + } + } + else + { + if (!temporary) + my_error(ER_GET_ERRNO, errflag, error, table_type()); + /* else no error message. */ + } + DBUG_VOID_RETURN; + } + } + DBUG_ASSERT(textno > 0); + if (unlikely(fatal_error)) + { + /* Ensure this becomes a true error */ + errflag&= ~(ME_WARNING | ME_NOTE); + if ((debug_assert_if_crashed_table || + global_system_variables.log_warnings > 1)) + { + /* + Log error to log before we crash or if extended warnings are requested + */ + errflag|= ME_ERROR_LOG; + } + } + + /* if we got an OS error from a file-based engine, specify a path of error */ + if (error < HA_ERR_FIRST && bas_ext()[0]) + { + char buff[FN_REFLEN]; + strxnmov(buff, sizeof(buff), + table_share->normalized_path.str, bas_ext()[0], NULL); + my_error(textno, errflag, buff, error); + } + else + my_error(textno, errflag, table_share->table_name.str, error); + DBUG_VOID_RETURN; +} + + +/** + Return an error message specific to this handler. + + @param error error code previously returned by handler + @param buf pointer to String where to add error message + + @return + Returns true if this is a temporary error +*/ +bool handler::get_error_message(int error, String* buf) +{ + DBUG_EXECUTE_IF("external_lock_failure", + buf->set_ascii(STRING_WITH_LEN("KABOOM!"));); + return FALSE; +} + +/** + Check for incompatible collation changes. + + @retval + HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade. + @retval + 0 No upgrade required. +*/ + +int handler::check_collation_compatibility() +{ + ulong mysql_version= table->s->mysql_version; + + if (mysql_version < Charset::latest_mariadb_version_with_collation_change()) + { + KEY *key= table->key_info; + KEY *key_end= key + table->s->keys; + for (; key < key_end; key++) + { + KEY_PART_INFO *key_part= key->key_part; + KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts; + for (; key_part < key_part_end; key_part++) + { + if (!key_part->fieldnr) + continue; + Field *field= table->field[key_part->fieldnr - 1]; + uint cs_number= field->charset()->number; + if (Charset::collation_changed_order(mysql_version, cs_number)) + return HA_ADMIN_NEEDS_UPGRADE; + } + } + } + + return 0; +} + + +int handler::check_long_hash_compatibility() const +{ + if (!table->s->old_long_hash_function()) + return 0; + KEY *key= table->key_info; + KEY *key_end= key + table->s->keys; + for ( ; key < key_end; key++) + { + if (key->algorithm == HA_KEY_ALG_LONG_HASH) + { + /* + The old (pre-MDEV-27653) hash function was wrong. + So the long hash unique constraint can have some + duplicate records. REPAIR TABLE can't fix this, + it will fail on a duplicate key error. + Only "ALTER IGNORE TABLE .. FORCE" can fix this. + So we need to return HA_ADMIN_NEEDS_ALTER here, + (not HA_ADMIN_NEEDS_UPGRADE which is used elsewhere), + to properly send the error message text corresponding + to ER_TABLE_NEEDS_REBUILD (rather than to ER_TABLE_NEEDS_UPGRADE) + to the user. + */ + return HA_ADMIN_NEEDS_ALTER; + } + } + return 0; +} + + +int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt) +{ + int error; + KEY *keyinfo, *keyend; + KEY_PART_INFO *keypart, *keypartend; + + if (table->s->incompatible_version) + return HA_ADMIN_NEEDS_ALTER; + + if (!table->s->mysql_version) + { + /* check for blob-in-key error */ + keyinfo= table->key_info; + keyend= table->key_info + table->s->keys; + for (; keyinfo < keyend; keyinfo++) + { + keypart= keyinfo->key_part; + keypartend= keypart + keyinfo->user_defined_key_parts; + for (; keypart < keypartend; keypart++) + { + if (!keypart->fieldnr) + continue; + Field *field= table->field[keypart->fieldnr-1]; + if (field->type() == MYSQL_TYPE_BLOB) + { + if (check_opt->sql_flags & TT_FOR_UPGRADE) + check_opt->flags= T_MEDIUM; + return HA_ADMIN_NEEDS_CHECK; + } + } + } + } + if (table->s->frm_version < FRM_VER_TRUE_VARCHAR) + return HA_ADMIN_NEEDS_ALTER; + + if (unlikely((error= check_collation_compatibility()))) + return error; + + if (unlikely((error= check_long_hash_compatibility()))) + return error; + + return check_for_upgrade(check_opt); +} + + +int handler::check_old_types() +{ + Field** field; + + if (!table->s->mysql_version) + { + /* check for bad DECIMAL field */ + for (field= table->field; (*field); field++) + { + if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL) + { + return HA_ADMIN_NEEDS_ALTER; + } + if ((*field)->type() == MYSQL_TYPE_VAR_STRING) + { + return HA_ADMIN_NEEDS_ALTER; + } + } + } + return 0; +} + + +static bool update_frm_version(TABLE *table) +{ + char path[FN_REFLEN]; + File file; + int result= 1; + DBUG_ENTER("update_frm_version"); + + /* + No need to update frm version in case table was created or checked + by server with the same version. This also ensures that we do not + update frm version for temporary tables as this code doesn't support + temporary tables. + + keep_original_mysql_version is set if the table version cannot be + changed without rewriting the frm file. + */ + if (table->s->mysql_version == MYSQL_VERSION_ID || + table->s->keep_original_mysql_version) + DBUG_RETURN(0); + + strxmov(path, table->s->normalized_path.str, reg_ext, NullS); + + if ((file= mysql_file_open(key_file_frm, + path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0) + { + uchar version[4]; + + int4store(version, MYSQL_VERSION_ID); + + if ((result= (int)mysql_file_pwrite(file, (uchar*) version, 4, 51L, + MYF(MY_WME+MY_NABP)))) + goto err; + + table->s->mysql_version= MYSQL_VERSION_ID; + } +err: + if (file >= 0) + (void) mysql_file_close(file, MYF(MY_WME)); + DBUG_RETURN(result); +} + + + +/** + @return + key if error because of duplicated keys +*/ +uint handler::get_dup_key(int error) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); + DBUG_ENTER("handler::get_dup_key"); + + if (lookup_errkey != (uint)-1) + DBUG_RETURN(errkey= lookup_errkey); + + errkey= (uint)-1; + if (error == HA_ERR_FOUND_DUPP_KEY || + error == HA_ERR_FOREIGN_DUPLICATE_KEY || + error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL || + error == HA_ERR_DROP_INDEX_FK) + info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK); + DBUG_RETURN(errkey); +} + + +/** + Delete all files with extension from bas_ext(). + + @param name Base name of table + + @note + We assume that the handler may return more extensions than + was actually used for the file. We also assume that the first + extension is the most important one (see the comment near + handlerton::tablefile_extensions). If this exist and we can't delete + that it, we will abort the delete. + If the first one doesn't exists, we have to try to delete all other + extension as there is chance that the server had crashed between + the delete of the first file and the next + + @retval + 0 If we successfully deleted at least one file from base_ext and + didn't get any other errors than ENOENT + + @retval + !0 Error +*/ + +int handler::delete_table(const char *name) +{ + int saved_error= ENOENT; + bool abort_if_first_file_error= 1; + bool some_file_deleted= 0; + DBUG_ENTER("handler::delete_table"); + + for (const char **ext= bas_ext(); *ext ; ext++) + { + int err= mysql_file_delete_with_symlink(key_file_misc, name, *ext, MYF(0)); + if (err) + { + if (my_errno != ENOENT) + { + saved_error= my_errno; + /* + If error other than file not found on the first existing file, + return the error. + Otherwise delete as much as possible. + */ + if (abort_if_first_file_error) + DBUG_RETURN(saved_error); + } + } + else + some_file_deleted= 1; + abort_if_first_file_error= 0; + } + DBUG_RETURN(some_file_deleted && saved_error == ENOENT ? 0 : saved_error); +} + + +int handler::rename_table(const char * from, const char * to) +{ + int error= 0; + const char **ext, **start_ext; + start_ext= bas_ext(); + for (ext= start_ext; *ext ; ext++) + { + if (unlikely(rename_file_ext(from, to, *ext))) + { + if ((error=my_errno) != ENOENT) + break; + error= 0; + } + } + if (unlikely(error)) + { + /* Try to revert the rename. Ignore errors. */ + for (; ext >= start_ext; ext--) + rename_file_ext(to, from, *ext); + } + return error; +} + + +void handler::drop_table(const char *name) +{ + ha_close(); + delete_table(name); +} + + +/** + Return true if the error from drop table means that the + table didn't exists +*/ + +bool non_existing_table_error(int error) +{ + return (error == ENOENT || + (error == EE_DELETE && my_errno == ENOENT) || + error == EE_FILENOTFOUND || + error == HA_ERR_NO_SUCH_TABLE || + error == HA_ERR_UNSUPPORTED || + error == ER_NO_SUCH_TABLE || + error == ER_NO_SUCH_TABLE_IN_ENGINE || + error == ER_WRONG_OBJECT); +} + + +/** + Performs checks upon the table. + + @param thd thread doing CHECK TABLE operation + @param check_opt options from the parser + + @retval + HA_ADMIN_OK Successful upgrade + @retval + HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade + @retval + HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE + @retval + HA_ADMIN_NOT_IMPLEMENTED +*/ + +int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt) +{ + int error; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + + if ((table->s->mysql_version >= MYSQL_VERSION_ID) && + (check_opt->sql_flags & TT_FOR_UPGRADE)) + return 0; + + if (table->s->mysql_version < MYSQL_VERSION_ID) + { + if (unlikely((error= check_old_types()))) + return error; + error= ha_check_for_upgrade(check_opt); + if (unlikely(error && (error != HA_ADMIN_NEEDS_CHECK))) + return error; + if (unlikely(!error && (check_opt->sql_flags & TT_FOR_UPGRADE))) + return 0; + } + if (unlikely((error= check(thd, check_opt)))) + return error; + /* Skip updating frm version if not main handler. */ + if (table->file != this) + return error; + return update_frm_version(table); +} + +/** + A helper function to mark a transaction read-write, + if it is started. +*/ + +void handler::mark_trx_read_write_internal() +{ + Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0]; + /* + When a storage engine method is called, the transaction must + have been started, unless it's a DDL call, for which the + storage engine starts the transaction internally, and commits + it internally, without registering in the ha_list. + Unfortunately here we can't know know for sure if the engine + has registered the transaction or not, so we must check. + */ + if (ha_info->is_started()) + { + /* + table_share can be NULL, for example, in ha_delete_table() or + ha_rename_table(). + */ + if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE) + ha_info->set_trx_read_write(); + } +} + + +/** + Repair table: public interface. + + @sa handler::repair() +*/ + +int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt) +{ + int result; + + mark_trx_read_write(); + + result= repair(thd, check_opt); + DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED || + ha_table_flags() & HA_CAN_REPAIR); + + if (result == HA_ADMIN_OK) + result= update_frm_version(table); + return result; +} + + +/** + End bulk insert +*/ + +int handler::ha_end_bulk_insert() +{ + DBUG_ENTER("handler::ha_end_bulk_insert"); + DBUG_EXECUTE_IF("crash_end_bulk_insert", + { extra(HA_EXTRA_FLUSH) ; DBUG_SUICIDE();}); + estimation_rows_to_insert= 0; + DBUG_RETURN(end_bulk_insert()); +} + +/** + Bulk update row: public interface. + + @sa handler::bulk_update_row() +*/ + +int +handler::ha_bulk_update_row(const uchar *old_data, const uchar *new_data, + ha_rows *dup_key_found) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + mark_trx_read_write(); + + return bulk_update_row(old_data, new_data, dup_key_found); +} + + +/** + Delete all rows: public interface. + + @sa handler::delete_all_rows() +*/ + +int +handler::ha_delete_all_rows() +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + mark_trx_read_write(); + + return delete_all_rows(); +} + + +/** + Truncate table: public interface. + + @sa handler::truncate() +*/ + +int +handler::ha_truncate() +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + mark_trx_read_write(); + + return truncate(); +} + + +/** + Reset auto increment: public interface. + + @sa handler::reset_auto_increment() +*/ + +int +handler::ha_reset_auto_increment(ulonglong value) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + mark_trx_read_write(); + + return reset_auto_increment(value); +} + + +/** + Optimize table: public interface. + + @sa handler::optimize() +*/ + +int +handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + mark_trx_read_write(); + + return optimize(thd, check_opt); +} + + +/** + Analyze table: public interface. + + @sa handler::analyze() +*/ + +int +handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + mark_trx_read_write(); + + return analyze(thd, check_opt); +} + + +/** + Check and repair table: public interface. + + @sa handler::check_and_repair() +*/ + +bool +handler::ha_check_and_repair(THD *thd) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_UNLCK); + mark_trx_read_write(); + + return check_and_repair(thd); +} + + +/** + Disable indexes: public interface. + + @sa handler::disable_indexes() +*/ + +int +handler::ha_disable_indexes(uint mode) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + mark_trx_read_write(); + + return disable_indexes(mode); +} + + +/** + Enable indexes: public interface. + + @sa handler::enable_indexes() +*/ + +int +handler::ha_enable_indexes(uint mode) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + mark_trx_read_write(); + + return enable_indexes(mode); +} + + +/** + Discard or import tablespace: public interface. + + @sa handler::discard_or_import_tablespace() +*/ + +int +handler::ha_discard_or_import_tablespace(my_bool discard) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + mark_trx_read_write(); + + return discard_or_import_tablespace(discard); +} + + +bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) +{ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + mark_trx_read_write(); + + return prepare_inplace_alter_table(altered_table, ha_alter_info); +} + + +bool handler::ha_commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit) +{ + /* + At this point we should have an exclusive metadata lock on the table. + The exception is if we're about to roll back changes (commit= false). + In this case, we might be rolling back after a failed lock upgrade, + so we could be holding the same lock level as for inplace_alter_table(). + */ + DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE, + table->s->db.str, + table->s->table_name.str, + MDL_EXCLUSIVE) || + !commit); + + return commit_inplace_alter_table(altered_table, ha_alter_info, commit); +} + + +/* + Default implementation to support in-place alter table + and old online add/drop index API +*/ + +enum_alter_inplace_result +handler::check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) +{ + DBUG_ENTER("handler::check_if_supported_inplace_alter"); + + HA_CREATE_INFO *create_info= ha_alter_info->create_info; + + if (altered_table->versioned(VERS_TIMESTAMP)) + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + + alter_table_operations inplace_offline_operations= + ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE | + ALTER_COLUMN_NAME | + ALTER_RENAME_COLUMN | + ALTER_CHANGE_COLUMN_DEFAULT | + ALTER_COLUMN_DEFAULT | + ALTER_COLUMN_OPTION | + ALTER_CHANGE_CREATE_OPTION | + ALTER_DROP_CHECK_CONSTRAINT | + ALTER_PARTITIONED | + ALTER_VIRTUAL_GCOL_EXPR | + ALTER_RENAME | + ALTER_RENAME_INDEX | + ALTER_INDEX_IGNORABILITY; + + /* Is there at least one operation that requires copy algorithm? */ + if (ha_alter_info->handler_flags & ~inplace_offline_operations) + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + + /* + The following checks for changes related to ALTER_OPTIONS + + ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and + ALTER TABLE table_name DEFAULT CHARSET = .. most likely + change column charsets and so not supported in-place through + old API. + + Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were + not supported as in-place operations in old API either. + */ + if (create_info->used_fields & (HA_CREATE_USED_CHARSET | + HA_CREATE_USED_DEFAULT_CHARSET | + HA_CREATE_USED_PACK_KEYS | + HA_CREATE_USED_CHECKSUM | + HA_CREATE_USED_MAX_ROWS) || + (table->s->row_type != create_info->row_type)) + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + + uint table_changes= (ha_alter_info->handler_flags & + ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE) ? + IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES; + if (table->file->check_if_incompatible_data(create_info, table_changes) + == COMPATIBLE_DATA_YES) + DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK); + + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); +} + +Alter_inplace_info::Alter_inplace_info(HA_CREATE_INFO *create_info_arg, + Alter_info *alter_info_arg, + KEY *key_info_arg, uint key_count_arg, + partition_info *modified_part_info_arg, + bool ignore_arg, bool error_non_empty) + : create_info(create_info_arg), + alter_info(alter_info_arg), + key_info_buffer(key_info_arg), + key_count(key_count_arg), + rename_keys(current_thd->mem_root), + modified_part_info(modified_part_info_arg), + ignore(ignore_arg), + error_if_not_empty(error_non_empty) + {} + +void Alter_inplace_info::report_unsupported_error(const char *not_supported, + const char *try_instead) const +{ + if (unsupported_reason == NULL) + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0), + not_supported, try_instead); + else + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), + not_supported, unsupported_reason, try_instead); +} + + +/** + Rename table: public interface. + + @sa handler::rename_table() +*/ + +int +handler::ha_rename_table(const char *from, const char *to) +{ + DBUG_ASSERT(m_lock_type == F_UNLCK); + mark_trx_read_write(); + + return rename_table(from, to); +} + + +/** + Drop table in the engine: public interface. + + @sa handler::drop_table() + + The difference between this and delete_table() is that the table is open in + drop_table(). +*/ + +void +handler::ha_drop_table(const char *name) +{ + DBUG_ASSERT(m_lock_type == F_UNLCK); + if (check_if_updates_are_ignored("DROP")) + return; + + mark_trx_read_write(); + drop_table(name); +} + + +/** + Structure used during force drop table. +*/ + +struct st_force_drop_table_params +{ + const char *path; + const LEX_CSTRING *db; + const LEX_CSTRING *alias; + int error; + bool discovering; +}; + + +/** + Try to delete table from a given plugin + Table types with discovery is ignored as these .frm files would have + been created during discovery and thus doesn't need to be found + for drop table force +*/ + +static my_bool delete_table_force(THD *thd, plugin_ref plugin, void *arg) +{ + handlerton *hton = plugin_hton(plugin); + st_force_drop_table_params *param = (st_force_drop_table_params *)arg; + + if (param->discovering == (hton->discover_table != NULL) && + !(thd->slave_thread && (hton->flags & HTON_IGNORE_UPDATES))) + { + int error; + error= ha_delete_table(thd, hton, param->path, param->db, param->alias, 0); + if (error > 0 && !non_existing_table_error(error)) + param->error= error; + if (error == 0) + { + if (hton && hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + thd->replication_flags |= OPTION_IF_EXISTS; + param->error= 0; + return TRUE; // Table was deleted + } + } + return FALSE; +} + +/** + @brief + Traverse all plugins to delete table when .frm file is missing. + + @return -1 Table was not found in any engine + @return 0 Table was found in some engine and delete succeded + @return # Error from first engine that had a table but didn't succeed to + delete the table + @return HA_ERR_ROW_IS_REFERENCED if foreign key reference is encountered, + +*/ + +int ha_delete_table_force(THD *thd, const char *path, const LEX_CSTRING *db, + const LEX_CSTRING *alias) +{ + st_force_drop_table_params param; + Table_exists_error_handler no_such_table_handler; + DBUG_ENTER("ha_delete_table_force"); + + param.path= path; + param.db= db; + param.alias= alias; + param.error= -1; // Table not found + param.discovering= true; + + thd->push_internal_handler(&no_such_table_handler); + if (plugin_foreach(thd, delete_table_force, MYSQL_STORAGE_ENGINE_PLUGIN, + ¶m)) + param.error= 0; // Delete succeded + else + { + param.discovering= false; + if (plugin_foreach(thd, delete_table_force, MYSQL_STORAGE_ENGINE_PLUGIN, + ¶m)) + param.error= 0; // Delete succeded + } + thd->pop_internal_handler(); + DBUG_RETURN(param.error); +} + + +/** + Create a table in the engine: public interface. + + @sa handler::create() +*/ + +int +handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info_arg) +{ + DBUG_ASSERT(m_lock_type == F_UNLCK); + mark_trx_read_write(); + int error= create(name, form, info_arg); + if (!error && + !(info_arg->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER))) + mysql_audit_create_table(form); + return error; +} + + +/** + Create handler files for CREATE TABLE: public interface. + + @sa handler::create_partitioning_metadata() +*/ + +int +handler::ha_create_partitioning_metadata(const char *name, + const char *old_name, + chf_create_flags action_flag) +{ + /* + Normally this is done when unlocked, but in fast_alter_partition_table, + it is done on an already locked handler when preparing to alter/rename + partitions. + */ + DBUG_ASSERT(m_lock_type == F_UNLCK || + (!old_name && strcmp(name, table_share->path.str))); + + + mark_trx_read_write(); + return create_partitioning_metadata(name, old_name, action_flag); +} + + +/** + Change partitions: public interface. + + @sa handler::change_partitions() +*/ + +int +handler::ha_change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong * const copied, + ulonglong * const deleted, + const uchar *pack_frm_data, + size_t pack_frm_len) +{ + /* + Must have at least RDLCK or be a TMP table. Read lock is needed to read + from current partitions and write lock will be taken on new partitions. + */ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type != F_UNLCK); + + mark_trx_read_write(); + + return change_partitions(create_info, path, copied, deleted, + pack_frm_data, pack_frm_len); +} + + +/** + Drop partitions: public interface. + + @sa handler::drop_partitions() +*/ + +int +handler::ha_drop_partitions(const char *path) +{ + DBUG_ASSERT(!table->db_stat); + + mark_trx_read_write(); + + return drop_partitions(path); +} + + +/** + Rename partitions: public interface. + + @sa handler::rename_partitions() +*/ + +int +handler::ha_rename_partitions(const char *path) +{ + DBUG_ASSERT(!table->db_stat); + + mark_trx_read_write(); + + return rename_partitions(path); +} + + +/** + Tell the storage engine that it is allowed to "disable transaction" in the + handler. It is a hint that ACID is not required - it was used in NDB for + ALTER TABLE, for example, when data are copied to temporary table. + A storage engine may treat this hint any way it likes. NDB for example + started to commit every now and then automatically. + This hint can be safely ignored. +*/ +int ha_enable_transaction(THD *thd, bool on) +{ + int error=0; + DBUG_ENTER("ha_enable_transaction"); + DBUG_PRINT("enter", ("on: %d", (int) on)); + + if ((thd->transaction->on= on)) + { + /* + Now all storage engines should have transaction handling enabled. + But some may have it enabled all the time - "disabling" transactions + is an optimization hint that storage engine is free to ignore. + So, let's commit an open transaction (if any) now. + */ + if (likely(!(error= ha_commit_trans(thd, 0)))) + error= trans_commit_implicit(thd); + } + DBUG_RETURN(error); +} + +int handler::index_next_same(uchar *buf, const uchar *key, uint keylen) +{ + int error; + DBUG_ENTER("handler::index_next_same"); + if (!(error=index_next(buf))) + { + my_ptrdiff_t ptrdiff= buf - table->record[0]; + uchar *UNINIT_VAR(save_record_0); + KEY *UNINIT_VAR(key_info); + KEY_PART_INFO *UNINIT_VAR(key_part); + KEY_PART_INFO *UNINIT_VAR(key_part_end); + + /* + key_cmp_if_same() compares table->record[0] against 'key'. + In parts it uses table->record[0] directly, in parts it uses + field objects with their local pointers into table->record[0]. + If 'buf' is distinct from table->record[0], we need to move + all record references. This is table->record[0] itself and + the field pointers of the fields used in this key. + */ + if (ptrdiff) + { + save_record_0= table->record[0]; + table->record[0]= buf; + key_info= table->key_info + active_index; + key_part= key_info->key_part; + key_part_end= key_part + key_info->user_defined_key_parts; + for (; key_part < key_part_end; key_part++) + { + DBUG_ASSERT(key_part->field); + key_part->field->move_field_offset(ptrdiff); + } + } + + if (key_cmp_if_same(table, key, active_index, keylen)) + { + table->status=STATUS_NOT_FOUND; + error=HA_ERR_END_OF_FILE; + } + + /* Move back if necessary. */ + if (ptrdiff) + { + table->record[0]= save_record_0; + for (key_part= key_info->key_part; key_part < key_part_end; key_part++) + key_part->field->move_field_offset(-ptrdiff); + } + } + DBUG_PRINT("return",("%i", error)); + DBUG_RETURN(error); +} + + +void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info, + uint part_id) +{ + info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_NO_LOCK); + stat_info->records= stats.records; + stat_info->mean_rec_length= stats.mean_rec_length; + stat_info->data_file_length= stats.data_file_length; + stat_info->max_data_file_length= stats.max_data_file_length; + stat_info->index_file_length= stats.index_file_length; + stat_info->max_index_file_length=stats.max_index_file_length; + stat_info->delete_length= stats.delete_length; + stat_info->create_time= stats.create_time; + stat_info->update_time= stats.update_time; + stat_info->check_time= stats.check_time; + stat_info->check_sum= stats.checksum; + stat_info->check_sum_null= stats.checksum_null; +} + + +/* + Updates the global table stats with the TABLE this handler represents +*/ + +void handler::update_global_table_stats() +{ + TABLE_STATS * table_stats; + + status_var_add(table->in_use->status_var.rows_read, rows_read); + DBUG_ASSERT(rows_tmp_read == 0); + + if (!table->in_use->userstat_running) + { + rows_read= rows_changed= 0; + return; + } + + if (rows_read + rows_changed == 0) + return; // Nothing to update. + + DBUG_ASSERT(table->s); + DBUG_ASSERT(table->s->table_cache_key.str); + + mysql_mutex_lock(&LOCK_global_table_stats); + /* Gets the global table stats, creating one if necessary. */ + if (!(table_stats= (TABLE_STATS*) + my_hash_search(&global_table_stats, + (uchar*) table->s->table_cache_key.str, + table->s->table_cache_key.length))) + { + if (!(table_stats = ((TABLE_STATS*) + my_malloc(PSI_INSTRUMENT_ME, sizeof(TABLE_STATS), + MYF(MY_WME | MY_ZEROFILL))))) + { + /* Out of memory error already given */ + goto end; + } + memcpy(table_stats->table, table->s->table_cache_key.str, + table->s->table_cache_key.length); + table_stats->table_name_length= (uint)table->s->table_cache_key.length; + table_stats->engine_type= ht->db_type; + /* No need to set variables to 0, as we use MY_ZEROFILL above */ + + if (my_hash_insert(&global_table_stats, (uchar*) table_stats)) + { + /* Out of memory error is already given */ + my_free(table_stats); + goto end; + } + } + // Updates the global table stats. + table_stats->rows_read+= rows_read; + table_stats->rows_changed+= rows_changed; + table_stats->rows_changed_x_indexes+= (rows_changed * + (table->s->keys ? table->s->keys : + 1)); + rows_read= rows_changed= 0; +end: + mysql_mutex_unlock(&LOCK_global_table_stats); +} + + +/* + Updates the global index stats with this handler's accumulated index reads. +*/ + +void handler::update_global_index_stats() +{ + DBUG_ASSERT(table->s); + + if (!table->in_use->userstat_running) + { + /* Reset all index read values */ + bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys); + return; + } + + for (uint index = 0; index < table->s->keys; index++) + { + if (index_rows_read[index]) + { + INDEX_STATS* index_stats; + size_t key_length; + KEY *key_info = &table->key_info[index]; // Rows were read using this + + DBUG_ASSERT(key_info->cache_name); + if (!key_info->cache_name) + continue; + key_length= table->s->table_cache_key.length + key_info->name.length + 1; + mysql_mutex_lock(&LOCK_global_index_stats); + // Gets the global index stats, creating one if necessary. + if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats, + key_info->cache_name, + key_length))) + { + if (!(index_stats = ((INDEX_STATS*) + my_malloc(PSI_INSTRUMENT_ME, sizeof(INDEX_STATS), + MYF(MY_WME | MY_ZEROFILL))))) + goto end; // Error is already given + + memcpy(index_stats->index, key_info->cache_name, key_length); + index_stats->index_name_length= key_length; + if (my_hash_insert(&global_index_stats, (uchar*) index_stats)) + { + my_free(index_stats); + goto end; + } + } + /* Updates the global index stats. */ + index_stats->rows_read+= index_rows_read[index]; + index_rows_read[index]= 0; +end: + mysql_mutex_unlock(&LOCK_global_index_stats); + } + } +} + + +static void flush_checksum(ha_checksum *row_crc, uchar **checksum_start, + size_t *checksum_length) +{ + if (*checksum_start) + { + *row_crc= my_checksum(*row_crc, *checksum_start, *checksum_length); + *checksum_start= NULL; + *checksum_length= 0; + } +} + + +/* calculating table's checksum */ +int handler::calculate_checksum() +{ + int error; + THD *thd=ha_thd(); + DBUG_ASSERT(table->s->last_null_bit_pos < 8); + uchar null_mask= table->s->last_null_bit_pos + ? 256 - (1 << table->s->last_null_bit_pos) : 0; + + table->use_all_stored_columns(); + stats.checksum= 0; + + if ((error= ha_rnd_init(1))) + return error; + + for (;;) + { + if (thd->killed) + return HA_ERR_ABORTED_BY_USER; + + ha_checksum row_crc= 0; + error= ha_rnd_next(table->record[0]); + if (error) + break; + + if (table->s->null_bytes) + { + /* fix undefined null bits */ + table->record[0][table->s->null_bytes-1] |= null_mask; + if (!(table->s->db_create_options & HA_OPTION_PACK_RECORD)) + table->record[0][0] |= 1; + + row_crc= my_checksum(row_crc, table->record[0], table->s->null_bytes); + } + + uchar *checksum_start= NULL; + size_t checksum_length= 0; + for (uint i= 0; i < table->s->fields; i++ ) + { + Field *f= table->field[i]; + if (!f->stored_in_db()) + continue; + + + if (! (thd->variables.old_behavior & OLD_MODE_COMPAT_5_1_CHECKSUM) && + f->is_real_null(0)) + { + flush_checksum(&row_crc, &checksum_start, &checksum_length); + continue; + } + /* + BLOB and VARCHAR have pointers in their field, we must convert + to string; GEOMETRY is implemented on top of BLOB. + BIT may store its data among NULL bits, convert as well. + */ + switch (f->type()) { + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_BIT: + { + flush_checksum(&row_crc, &checksum_start, &checksum_length); + String tmp; + f->val_str(&tmp); + row_crc= my_checksum(row_crc, (uchar*) tmp.ptr(), tmp.length()); + break; + } + default: + if (!checksum_start) + checksum_start= f->ptr; + DBUG_ASSERT(checksum_start + checksum_length == f->ptr); + checksum_length+= f->pack_length(); + break; + } + } + flush_checksum(&row_crc, &checksum_start, &checksum_length); + + stats.checksum+= row_crc; + } + ha_rnd_end(); + return error == HA_ERR_END_OF_FILE ? 0 : error; +} + + +/**************************************************************************** +** Some general functions that isn't in the handler class +****************************************************************************/ + +/** + Initiates table-file and calls appropriate database-creator. + + @retval + 0 ok + @retval + 1 error +*/ +int ha_create_table(THD *thd, const char *path, const char *db, + const char *table_name, HA_CREATE_INFO *create_info, + LEX_CUSTRING *frm, bool skip_frm_file) +{ + int error= 1; + TABLE table; + char name_buff[FN_REFLEN]; + const char *name; + TABLE_SHARE share; + Abort_on_warning_instant_set old_abort_on_warning(thd, 0); + bool temp_table __attribute__((unused)) = + create_info->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER); + DBUG_ENTER("ha_create_table"); + + init_tmp_table_share(thd, &share, db, 0, table_name, path); + + if (frm) + { + bool write_frm_now= (!create_info->db_type->discover_table && + !create_info->tmp_table() && !skip_frm_file); + + share.frm_image= frm; + + // open an frm image + if (share.init_from_binary_frm_image(thd, write_frm_now, + frm->str, frm->length)) + goto err; + } + else + { + // open an frm file + share.db_plugin= ha_lock_engine(thd, create_info->db_type); + + if (open_table_def(thd, &share)) + goto err; + } + + share.m_psi= PSI_CALL_get_table_share(temp_table, &share); + + if (open_table_from_share(thd, &share, &empty_clex_str, 0, READ_ALL, 0, + &table, true)) + goto err; + + update_create_info_from_table(create_info, &table); + + name= get_canonical_filename(table.file, share.path.str, name_buff); + + error= table.file->ha_create(name, &table, create_info); + + if (unlikely(error)) + { + if (!thd->is_error()) + my_error(ER_CANT_CREATE_TABLE, MYF(0), db, table_name, error); + table.file->print_error(error, MYF(ME_WARNING)); + PSI_CALL_drop_table_share(temp_table, share.db.str, (uint)share.db.length, + share.table_name.str, (uint)share.table_name.length); + } + + (void) closefrm(&table); + +err: + free_table_share(&share); + DBUG_RETURN(error != 0); +} + +void st_ha_check_opt::init() +{ + flags= sql_flags= 0; + start_time= my_time(0); +} + + +/***************************************************************************** + Key cache handling. + + This code is only relevant for ISAM/MyISAM tables + + key_cache->cache may be 0 only in the case where a key cache is not + initialized or when we where not able to init the key cache in a previous + call to ha_init_key_cache() (probably out of memory) +*****************************************************************************/ + +/** + Init a key cache if it has not been initied before. +*/ +int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *unused + __attribute__((unused))) +{ + DBUG_ENTER("ha_init_key_cache"); + + if (!key_cache->key_cache_inited) + { + mysql_mutex_lock(&LOCK_global_system_variables); + size_t tmp_buff_size= (size_t) key_cache->param_buff_size; + uint tmp_block_size= (uint) key_cache->param_block_size; + uint division_limit= (uint)key_cache->param_division_limit; + uint age_threshold= (uint)key_cache->param_age_threshold; + uint partitions= (uint)key_cache->param_partitions; + uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size; + mysql_mutex_unlock(&LOCK_global_system_variables); + DBUG_RETURN(!init_key_cache(key_cache, + tmp_block_size, + tmp_buff_size, + division_limit, age_threshold, + changed_blocks_hash_size, + partitions)); + } + DBUG_RETURN(0); +} + + +/** + Resize key cache. +*/ +int ha_resize_key_cache(KEY_CACHE *key_cache) +{ + DBUG_ENTER("ha_resize_key_cache"); + + if (key_cache->key_cache_inited) + { + mysql_mutex_lock(&LOCK_global_system_variables); + size_t tmp_buff_size= (size_t) key_cache->param_buff_size; + long tmp_block_size= (long) key_cache->param_block_size; + uint division_limit= (uint)key_cache->param_division_limit; + uint age_threshold= (uint)key_cache->param_age_threshold; + uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size; + mysql_mutex_unlock(&LOCK_global_system_variables); + DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size, + tmp_buff_size, + division_limit, age_threshold, + changed_blocks_hash_size)); + } + DBUG_RETURN(0); +} + + +/** + Change parameters for key cache (like division_limit) +*/ +int ha_change_key_cache_param(KEY_CACHE *key_cache) +{ + DBUG_ENTER("ha_change_key_cache_param"); + + if (key_cache->key_cache_inited) + { + mysql_mutex_lock(&LOCK_global_system_variables); + uint division_limit= (uint)key_cache->param_division_limit; + uint age_threshold= (uint)key_cache->param_age_threshold; + mysql_mutex_unlock(&LOCK_global_system_variables); + change_key_cache_param(key_cache, division_limit, age_threshold); + } + DBUG_RETURN(0); +} + + +/** + Repartition key cache +*/ +int ha_repartition_key_cache(KEY_CACHE *key_cache) +{ + DBUG_ENTER("ha_repartition_key_cache"); + + if (key_cache->key_cache_inited) + { + mysql_mutex_lock(&LOCK_global_system_variables); + size_t tmp_buff_size= (size_t) key_cache->param_buff_size; + long tmp_block_size= (long) key_cache->param_block_size; + uint division_limit= (uint)key_cache->param_division_limit; + uint age_threshold= (uint)key_cache->param_age_threshold; + uint partitions= (uint)key_cache->param_partitions; + uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size; + mysql_mutex_unlock(&LOCK_global_system_variables); + DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size, + tmp_buff_size, + division_limit, age_threshold, + changed_blocks_hash_size, + partitions)); + } + DBUG_RETURN(0); +} + + +/** + Move all tables from one key cache to another one. +*/ +int ha_change_key_cache(KEY_CACHE *old_key_cache, + KEY_CACHE *new_key_cache) +{ + mi_change_key_cache(old_key_cache, new_key_cache); + return 0; +} + + +static my_bool discover_handlerton(THD *thd, plugin_ref plugin, + void *arg) +{ + TABLE_SHARE *share= (TABLE_SHARE *)arg; + handlerton *hton= plugin_hton(plugin); + if (hton->discover_table) + { + share->db_plugin= plugin; + int error= hton->discover_table(hton, thd, share); + if (error != HA_ERR_NO_SUCH_TABLE) + { + if (unlikely(error)) + { + if (!share->error) + { + share->error= OPEN_FRM_ERROR_ALREADY_ISSUED; + plugin_unlock(0, share->db_plugin); + } + + /* + report an error, unless it is "generic" and a more + specific one was already reported + */ + if (error != HA_ERR_GENERIC || !thd->is_error()) + my_error(ER_GET_ERRNO, MYF(0), error, plugin_name(plugin)->str); + share->db_plugin= 0; + } + else + share->error= OPEN_FRM_OK; + + status_var_increment(thd->status_var.ha_discover_count); + return TRUE; // abort the search + } + share->db_plugin= 0; + } + + DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR); + return FALSE; // continue with the next engine +} + +int ha_discover_table(THD *thd, TABLE_SHARE *share) +{ + DBUG_ENTER("ha_discover_table"); + int found; + + DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR); // share is not OK yet + + if (!engines_with_discover) + found= FALSE; + else if (share->db_plugin) + found= discover_handlerton(thd, share->db_plugin, share); + else + found= plugin_foreach(thd, discover_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, share); + + if (thd->lex->query_tables && thd->lex->query_tables->sequence && !found) + my_error(ER_UNKNOWN_SEQUENCES, MYF(0),share->table_name.str); + if (!found) + open_table_error(share, OPEN_FRM_OPEN_ERROR, ENOENT); // not found + + DBUG_RETURN(share->error != OPEN_FRM_OK); +} + +static my_bool file_ext_exists(char *path, size_t path_len, const char *ext) +{ + strmake(path + path_len, ext, FN_REFLEN - path_len); + return !access(path, F_OK); +} + +struct st_discover_existence_args +{ + char *path; + size_t path_len; + const char *db, *table_name; + handlerton *hton; + bool frm_exists; +}; + +static my_bool discover_existence(THD *thd, plugin_ref plugin, + void *arg) +{ + st_discover_existence_args *args= (st_discover_existence_args*)arg; + handlerton *ht= plugin_hton(plugin); + if (!ht->discover_table_existence) + return args->frm_exists; + + args->hton= ht; + + if (ht->discover_table_existence == ext_based_existence) + return file_ext_exists(args->path, args->path_len, + ht->tablefile_extensions[0]); + + return ht->discover_table_existence(ht, args->db, args->table_name); +} + + +/** + Check if a given table exists, without doing a full discover, if possible + + If the 'hton' is not NULL, it's set to the handlerton of the storage engine + of this table, or to view_pseudo_hton if the frm belongs to a view. + + This function takes discovery correctly into account. If frm is found, + it discovers the table to make sure it really exists in the engine. + If no frm is found it discovers the table, in case it still exists in + the engine. + + While it tries to cut corners (don't open .frm if no discovering engine is + enabled, no full discovery if all discovering engines support + discover_table_existence, etc), it still *may* be quite expensive + and must be used sparingly. + + @retval true Table exists (even if the error occurred, like bad frm) + @retval false Table does not exist (one can do CREATE TABLE table_name) + + @note if frm exists and the table in engine doesn't, *hton will be set, + but the return value will be false. + + @note if frm file exists, but the table cannot be opened (engine not + loaded, frm is invalid), the return value will be true, but + *hton will be NULL. +*/ + +bool ha_table_exists(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, LEX_CUSTRING *table_id, + LEX_CSTRING *partition_engine_name, + handlerton **hton, bool *is_sequence) +{ + handlerton *dummy; + bool dummy2; + DBUG_ENTER("ha_table_exists"); + + if (hton) + *hton= 0; + else if (engines_with_discover) + hton= &dummy; + if (!is_sequence) + is_sequence= &dummy2; + *is_sequence= 0; + if (table_id) + { + table_id->str= 0; + table_id->length= 0; + } + + TDC_element *element= tdc_lock_share(thd, db->str, table_name->str); + if (element && element != MY_ERRPTR) + { + if (!hton) + hton= &dummy; + *hton= element->share->db_type(); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_engine_name && element->share->db_type() == partition_hton) + { + if (!static_cast(element->share->ha_share)-> + partition_engine_name) + { + /* Partition engine found, but table has never been opened */ + tdc_unlock_share(element); + goto retry_from_frm; + } + lex_string_set(partition_engine_name, + static_cast(element->share->ha_share)-> + partition_engine_name); + } +#endif + *is_sequence= element->share->table_type == TABLE_TYPE_SEQUENCE; + if (*hton != view_pseudo_hton && element->share->tabledef_version.length && + table_id && + (table_id->str= (uchar*) + thd->memdup(element->share->tabledef_version.str, MY_UUID_SIZE))) + table_id->length= MY_UUID_SIZE; + tdc_unlock_share(element); + DBUG_RETURN(TRUE); + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE +retry_from_frm: +#endif + char path[FN_REFLEN + 1]; + size_t path_len = build_table_filename(path, sizeof(path) - 1, + db->str, table_name->str, "", 0); + st_discover_existence_args args= {path, path_len, db->str, table_name->str, 0, true}; + + if (file_ext_exists(path, path_len, reg_ext)) + { + bool exists= true; + if (hton) + { + char engine_buf[NAME_CHAR_LEN + 1]; + LEX_CSTRING engine= { engine_buf, 0 }; + Table_type type= dd_frm_type(thd, path, &engine, + partition_engine_name, + table_id); + + switch (type) { + case TABLE_TYPE_UNKNOWN: + DBUG_PRINT("exit", ("Exist, cannot be opened")); + DBUG_RETURN(true); // Frm exists + case TABLE_TYPE_VIEW: + *hton= view_pseudo_hton; + DBUG_PRINT("exit", ("Exist, view")); + DBUG_RETURN(true); // Frm exists + case TABLE_TYPE_SEQUENCE: + *is_sequence= true; + /* fall through */ + case TABLE_TYPE_NORMAL: + { + plugin_ref p= plugin_lock_by_name(thd, &engine, + MYSQL_STORAGE_ENGINE_PLUGIN); + *hton= p ? plugin_hton(p) : NULL; + if (*hton) // verify that the table really exists + exists= discover_existence(thd, p, &args); + } + } + } + DBUG_PRINT("exit", (exists ? "Exists" : "Does not exist")); + DBUG_RETURN(exists); + } + + args.frm_exists= false; + if (plugin_foreach(thd, discover_existence, MYSQL_STORAGE_ENGINE_PLUGIN, + &args)) + { + if (hton) + *hton= args.hton; + DBUG_PRINT("exit", ("discovery found file")); + DBUG_RETURN(TRUE); + } + + if (need_full_discover_for_existence) + { + TABLE_LIST table; + bool exists; + uint flags = GTS_TABLE | GTS_VIEW; + + if (!hton) + flags|= GTS_NOLOCK; + + Table_exists_error_handler no_such_table_handler; + thd->push_internal_handler(&no_such_table_handler); + table.init_one_table(db, table_name, 0, TL_READ); + TABLE_SHARE *share= tdc_acquire_share(thd, &table, flags); + thd->pop_internal_handler(); + + if (hton && share) + { + *hton= share->db_type(); + if (table_id && share->tabledef_version.length && + (table_id->str= + (uchar*) thd->memdup(share->tabledef_version.str, MY_UUID_SIZE))) + table_id->length= MY_UUID_SIZE; + tdc_release_share(share); + } + + // the table doesn't exist if we've caught ER_NO_SUCH_TABLE and nothing else + exists= !no_such_table_handler.safely_trapped_errors(); + DBUG_PRINT("exit", (exists ? "Exists" : "Does not exist")); + DBUG_RETURN(exists); + } + + DBUG_PRINT("exit", ("Does not exist")); + DBUG_RETURN(FALSE); +} + + +/* + Check if the CREATE/ALTER table should be ignored + This could happen for slaves where the table is shared between master + and slave + + If statement is ignored, write a note +*/ + +bool handler::check_if_updates_are_ignored(const char *op) const +{ + return ha_check_if_updates_are_ignored(table->in_use, ht, op); +} + + +bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton, + const char *op) +{ + DBUG_ENTER("ha_check_if_updates_are_ignored"); + if (!thd->slave_thread || !(hton= ha_checktype(thd, hton, 1))) + DBUG_RETURN(0); // Not slave or no engine + if (!(hton->flags & HTON_IGNORE_UPDATES)) + DBUG_RETURN(0); // Not shared table + my_error(ER_SLAVE_IGNORED_SHARED_TABLE, MYF(ME_NOTE), op); + DBUG_RETURN(1); +} + + +/** + Discover all table names in a given database +*/ +extern "C" { + +static int cmp_file_names(const void *a, const void *b) +{ + CHARSET_INFO *cs= character_set_filesystem; + char *aa= ((FILEINFO *)a)->name; + char *bb= ((FILEINFO *)b)->name; + return cs->strnncoll(aa, strlen(aa), bb, strlen(bb)); +} + +static int cmp_table_names(LEX_CSTRING * const *a, LEX_CSTRING * const *b) +{ + return my_charset_bin.strnncoll((*a)->str, (*a)->length, + (*b)->str, (*b)->length); +} + +#ifndef DBUG_OFF +static int cmp_table_names_desc(LEX_CSTRING * const *a, LEX_CSTRING * const *b) +{ + return -cmp_table_names(a, b); +} +#endif + +} + +Discovered_table_list::Discovered_table_list(THD *thd_arg, + Dynamic_array *tables_arg, + const LEX_CSTRING *wild_arg) : + thd(thd_arg), with_temps(false), tables(tables_arg) +{ + if (wild_arg->str && wild_arg->str[0]) + { + wild= wild_arg->str; + wend= wild + wild_arg->length; + } + else + wild= 0; +} + +bool Discovered_table_list::add_table(const char *tname, size_t tlen) +{ + /* + TODO Check with_temps and filter out temp tables. + Implement the check, when we'll have at least one affected engine (with + custom discover_table_names() method, that calls add_table() directly). + Note: avoid comparing the same name twice (here and in add_file). + */ + if (wild && table_alias_charset->wildcmp(tname, tname + tlen, wild, wend, + wild_prefix, wild_one, wild_many)) + return 0; + + LEX_CSTRING *name= thd->make_clex_string(tname, tlen); + if (!name || tables->append(name)) + return 1; + return 0; +} + +bool Discovered_table_list::add_file(const char *fname) +{ + bool is_temp= strncmp(fname, STRING_WITH_LEN(tmp_file_prefix)) == 0; + + if (is_temp && !with_temps) + return 0; + + char tname[SAFE_NAME_LEN + 1]; + size_t tlen= filename_to_tablename(fname, tname, sizeof(tname), is_temp); + return add_table(tname, tlen); +} + + +void Discovered_table_list::sort() +{ + tables->sort(cmp_table_names); +} + + +#ifndef DBUG_OFF +void Discovered_table_list::sort_desc() +{ + tables->sort(cmp_table_names_desc); +} +#endif + + +void Discovered_table_list::remove_duplicates() +{ + LEX_CSTRING **src= tables->front(); + LEX_CSTRING **dst= src; + sort(); + while (++dst <= tables->back()) + { + LEX_CSTRING *s= *src, *d= *dst; + DBUG_ASSERT(strncmp(s->str, d->str, MY_MIN(s->length, d->length)) <= 0); + if ((s->length != d->length || strncmp(s->str, d->str, d->length))) + { + src++; + if (src != dst) + *src= *dst; + } + } + tables->elements(src - tables->front() + 1); +} + +struct st_discover_names_args +{ + LEX_CSTRING *db; + MY_DIR *dirp; + Discovered_table_list *result; + uint possible_duplicates; +}; + +static my_bool discover_names(THD *thd, plugin_ref plugin, + void *arg) +{ + st_discover_names_args *args= (st_discover_names_args *)arg; + handlerton *ht= plugin_hton(plugin); + + if (ht->discover_table_names) + { + size_t old_elements= args->result->tables->elements(); + if (ht->discover_table_names(ht, args->db, args->dirp, args->result)) + return 1; + + /* + hton_ext_based_table_discovery never discovers a table that has + a corresponding .frm file; but custom engine discover methods might + */ + if (ht->discover_table_names != hton_ext_based_table_discovery) + args->possible_duplicates+= (uint)(args->result->tables->elements() - old_elements); + } + + return 0; +} + +/** + Return the list of tables + + @param thd + @param db database to look into + @param dirp list of files in this database (as returned by my_dir()) + @param result the object to return the list of files in + @param reusable if true, on return, 'dirp' will be a valid list of all + non-table files. If false, discovery will work much faster, + but it will leave 'dirp' corrupted and completely unusable, + only good for my_dirend(). + + Normally, reusable=false for SHOW and INFORMATION_SCHEMA, and reusable=true + for DROP DATABASE (as it needs to know and delete non-table files). +*/ + +int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp, + Discovered_table_list *result, bool reusable) +{ + int error; + DBUG_ENTER("ha_discover_table_names"); + + if (engines_with_discover_file_names == 0 && !reusable) + { + st_discover_names_args args= {db, NULL, result, 0}; + error= ext_table_discovery_simple(dirp, result) || + plugin_foreach(thd, discover_names, + MYSQL_STORAGE_ENGINE_PLUGIN, &args); + if (args.possible_duplicates > 0) + result->remove_duplicates(); + } + else + { + st_discover_names_args args= {db, dirp, result, 0}; + + /* extension_based_table_discovery relies on dirp being sorted */ + my_qsort(dirp->dir_entry, dirp->number_of_files, + sizeof(FILEINFO), cmp_file_names); + + error= extension_based_table_discovery(dirp, reg_ext, result) || + plugin_foreach(thd, discover_names, + MYSQL_STORAGE_ENGINE_PLUGIN, &args); + if (args.possible_duplicates > 0) + result->remove_duplicates(); + } + + DBUG_RETURN(error); +} + + +/* +int handler::pre_read_multi_range_first(KEY_MULTI_RANGE **found_range_p, + KEY_MULTI_RANGE *ranges, + uint range_count, + bool sorted, HANDLER_BUFFER *buffer, + bool use_parallel) +{ + int result; + DBUG_ENTER("handler::pre_read_multi_range_first"); + result = pre_read_range_first(ranges->start_key.keypart_map ? + &ranges->start_key : 0, + ranges->end_key.keypart_map ? + &ranges->end_key : 0, + test(ranges->range_flag & EQ_RANGE), + sorted, + use_parallel); + DBUG_RETURN(result); +} +*/ + + +/** + Read first row between two ranges. + Store ranges for future calls to read_range_next. + + @param start_key Start key. Is 0 if no min range + @param end_key End key. Is 0 if no max range + @param eq_range_arg Set to 1 if start_key == end_key + @param sorted Set to 1 if result should be sorted per key + + @note + Record is read into table->record[0] + + @retval + 0 Found row + @retval + HA_ERR_END_OF_FILE No rows in range + @retval + \# Error code +*/ +int handler::read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range_arg, bool sorted) +{ + int result; + DBUG_ENTER("handler::read_range_first"); + + eq_range= eq_range_arg; + set_end_range(end_key); + range_key_part= table->key_info[active_index].key_part; + + if (!start_key) // Read first record + result= ha_index_first(table->record[0]); + else + result= ha_index_read_map(table->record[0], + start_key->key, + start_key->keypart_map, + start_key->flag); + if (result) + DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) + ? HA_ERR_END_OF_FILE + : result); + + if (compare_key(end_range) <= 0) + { + DBUG_RETURN(0); + } + else + { + /* + The last read row does not fall in the range. So request + storage engine to release row lock if possible. + */ + unlock_row(); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } +} + + +/** + Read next row between two ranges. + + @note + Record is read into table->record[0] + + @retval + 0 Found row + @retval + HA_ERR_END_OF_FILE No rows in range + @retval + \# Error code +*/ +int handler::read_range_next() +{ + int result; + DBUG_ENTER("handler::read_range_next"); + + if (eq_range) + { + /* We trust that index_next_same always gives a row in range */ + DBUG_RETURN(ha_index_next_same(table->record[0], + end_range->key, + end_range->length)); + } + result= ha_index_next(table->record[0]); + if (result) + DBUG_RETURN(result); + + if (compare_key(end_range) <= 0) + { + DBUG_RETURN(0); + } + else + { + /* + The last read row does not fall in the range. So request + storage engine to release row lock if possible. + */ + unlock_row(); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } +} + + +void handler::set_end_range(const key_range *end_key) +{ + end_range= 0; + if (end_key) + { + end_range= &save_end_range; + save_end_range= *end_key; + key_compare_result_on_equal= + ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 : + (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); + } +} + + +/** + Compare if found key (in row) is over max-value. + + @param range range to compare to row. May be 0 for no range + + @see also + key.cc::key_cmp() + + @return + The return value is SIGN(key_in_row - range_key): + + - 0 : Key is equal to range or 'range' == 0 (no range) + - -1 : Key is less than range + - 1 : Key is larger than range +*/ +int handler::compare_key(key_range *range) +{ + int cmp; + if (!range || in_range_check_pushed_down) + return 0; // No max range + cmp= key_cmp(range_key_part, range->key, range->length); + if (!cmp) + cmp= key_compare_result_on_equal; + return cmp; +} + + +/* + Same as compare_key() but doesn't check have in_range_check_pushed_down. + This is used by index condition pushdown implementation. +*/ + +int handler::compare_key2(key_range *range) const +{ + int cmp; + if (!range) + return 0; // no max range + cmp= key_cmp(range_key_part, range->key, range->length); + if (!cmp) + cmp= key_compare_result_on_equal; + return cmp; +} + + +/** + ICP callback - to be called by an engine to check the pushed condition +*/ +extern "C" check_result_t handler_index_cond_check(void* h_arg) +{ + handler *h= (handler*)h_arg; + THD *thd= h->table->in_use; + check_result_t res; + + DEBUG_SYNC(thd, "handler_index_cond_check"); + enum thd_kill_levels abort_at= h->has_rollback() ? + THD_ABORT_SOFTLY : THD_ABORT_ASAP; + if (thd_kill_level(thd) > abort_at) + return CHECK_ABORTED_BY_USER; + + if (h->end_range && h->compare_key2(h->end_range) > 0) + return CHECK_OUT_OF_RANGE; + h->increment_statistics(&SSV::ha_icp_attempts); + if ((res= h->pushed_idx_cond->val_int()? CHECK_POS : CHECK_NEG) == + CHECK_POS) + h->increment_statistics(&SSV::ha_icp_match); + return res; +} + + +/** + Rowid filter callback - to be called by an engine to check rowid / primary + keys of the rows whose data is to be fetched against the used rowid filter +*/ + +extern "C" +check_result_t handler_rowid_filter_check(void *h_arg) +{ + handler *h= (handler*) h_arg; + TABLE *tab= h->get_table(); + + /* + Check for out-of-range and killed conditions only if we haven't done it + already in the pushed index condition check + */ + if (!h->pushed_idx_cond) + { + THD *thd= h->table->in_use; + DEBUG_SYNC(thd, "handler_rowid_filter_check"); + enum thd_kill_levels abort_at= h->has_transactions() ? + THD_ABORT_SOFTLY : THD_ABORT_ASAP; + if (thd_kill_level(thd) > abort_at) + return CHECK_ABORTED_BY_USER; + + if (h->end_range && h->compare_key2(h->end_range) > 0) + return CHECK_OUT_OF_RANGE; + } + + h->position(tab->record[0]); + return h->pushed_rowid_filter->check((char*)h->ref)? CHECK_POS: CHECK_NEG; +} + + +/** + Callback function for an engine to check whether the used rowid filter + has been already built +*/ + +extern "C" int handler_rowid_filter_is_active(void *h_arg) +{ + if (!h_arg) + return false; + handler *h= (handler*) h_arg; + return h->rowid_filter_is_active; +} + + +int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) +{ + int error, UNINIT_VAR(error1); + + error= ha_index_init(index, 0); + if (likely(!error)) + { + error= index_read_map(buf, key, keypart_map, find_flag); + error1= ha_index_end(); + } + return error ? error : error1; +} + + +/** + Returns a list of all known extensions. + + No mutexes, worst case race is a minor surplus memory allocation + We have to recreate the extension map if mysqld is restarted (for example + within libmysqld) + + @retval + pointer pointer to TYPELIB structure +*/ +static my_bool exts_handlerton(THD *unused, plugin_ref plugin, + void *arg) +{ + List *found_exts= (List *) arg; + handlerton *hton= plugin_hton(plugin); + List_iterator_fast it(*found_exts); + const char **ext, *old_ext; + + for (ext= hton->tablefile_extensions; *ext; ext++) + { + while ((old_ext= it++)) + { + if (!strcmp(old_ext, *ext)) + break; + } + if (!old_ext) + found_exts->push_back((char *) *ext); + + it.rewind(); + } + return FALSE; +} + +TYPELIB *ha_known_exts(void) +{ + if (!known_extensions.type_names || mysys_usage_id != known_extensions_id) + { + List found_exts; + const char **ext, *old_ext; + + known_extensions_id= mysys_usage_id; + found_exts.push_back((char*) TRG_EXT); + found_exts.push_back((char*) TRN_EXT); + + plugin_foreach(NULL, exts_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts); + + ext= (const char **) my_once_alloc(sizeof(char *)* + (found_exts.elements+1), + MYF(MY_WME | MY_FAE)); + + DBUG_ASSERT(ext != 0); + known_extensions.count= found_exts.elements; + known_extensions.type_names= ext; + + List_iterator_fast it(found_exts); + while ((old_ext= it++)) + *ext++= old_ext; + *ext= 0; + } + return &known_extensions; +} + + +static bool stat_print(THD *thd, const char *type, size_t type_len, + const char *file, size_t file_len, + const char *status, size_t status_len) +{ + Protocol *protocol= thd->protocol; + protocol->prepare_for_resend(); + protocol->store(type, type_len, system_charset_info); + protocol->store(file, file_len, system_charset_info); + protocol->store(status, status_len, system_charset_info); + if (protocol->write()) + return TRUE; + return FALSE; +} + + +static my_bool showstat_handlerton(THD *thd, plugin_ref plugin, + void *arg) +{ + enum ha_stat_type stat= *(enum ha_stat_type *) arg; + handlerton *hton= plugin_hton(plugin); + if (hton->show_status && + hton->show_status(hton, thd, stat_print, stat)) + return TRUE; + return FALSE; +} + +bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) +{ + List field_list; + Protocol *protocol= thd->protocol; + MEM_ROOT *mem_root= thd->mem_root; + bool result; + + field_list.push_back(new (mem_root) Item_empty_string(thd, "Type", 10), + mem_root); + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Name", FN_REFLEN), mem_root); + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Status", 10), + mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + return TRUE; + + if (db_type == NULL) + { + result= plugin_foreach(thd, showstat_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &stat); + } + else + { + result= db_type->show_status && + db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0; + } + + /* + We also check thd->is_error() as Innodb may return 0 even if + there was an error. + */ + if (likely(!result && !thd->is_error())) + my_eof(thd); + else if (!thd->is_error()) + my_error(ER_GET_ERRNO, MYF(0), errno, hton_name(db_type)->str); + return result; +} + +/* + Function to check if the conditions for row-based binlogging is + correct for the table. + + A row in the given table should be replicated if: + - It's not called by partition engine + - Row-based replication is enabled in the current thread + - The binlog is enabled + - It is not a temporary table + - The binary log is open + - The database the table resides in shall be binlogged (binlog_*_db rules) + - table is not mysql.event + + RETURN VALUE + 0 No binary logging in row format + 1 Row needs to be logged +*/ + +bool handler::check_table_binlog_row_based() +{ + if (unlikely((!check_table_binlog_row_based_done))) + { + check_table_binlog_row_based_done= 1; + check_table_binlog_row_based_result= + check_table_binlog_row_based_internal(); + } + return check_table_binlog_row_based_result; +} + +bool handler::check_table_binlog_row_based_internal() +{ + THD *thd= table->in_use; + +#ifdef WITH_WSREP + if (!thd->variables.sql_log_bin && + wsrep_thd_is_applying(table->in_use)) + { + /* + wsrep patch sets sql_log_bin to silence binlogging from high + priority threads + */ + return 0; + } +#endif + return (table->s->can_do_row_logging && + !table->versioned(VERS_TRX_ID) && + !(thd->variables.option_bits & OPTION_BIN_TMP_LOG_OFF) && + thd->is_current_stmt_binlog_format_row() && + /* + Wsrep partially enables binary logging if it have not been + explicitly turned on. As a result we return 'true' if we are in + wsrep binlog emulation mode and the current thread is not a wsrep + applier or replayer thread. This decision is not affected by + @@sql_log_bin as we want the events to make into the binlog + cache only to filter them later before they make into binary log + file. + + However, we do return 'false' if binary logging was temporarily + turned off (see tmp_disable_binlog(A)). + + Otherwise, return 'true' if binary logging is on. + */ + IF_WSREP(((WSREP_EMULATE_BINLOG_NNULL(thd) && + wsrep_thd_is_local(thd)) || + ((WSREP_NNULL(thd) || + (thd->variables.option_bits & OPTION_BIN_LOG)) && + mysql_bin_log.is_open())), + (thd->variables.option_bits & OPTION_BIN_LOG) && + mysql_bin_log.is_open())); +} + + +int handler::binlog_log_row(TABLE *table, + const uchar *before_record, + const uchar *after_record, + Log_func *log_func) +{ + bool error; + THD *thd= table->in_use; + DBUG_ENTER("binlog_log_row"); + + if (!thd->binlog_table_maps && + thd->binlog_write_table_maps()) + DBUG_RETURN(HA_ERR_RBR_LOGGING_FAILED); + + error= (*log_func)(thd, table, row_logging_has_trans, + before_record, after_record); + DBUG_RETURN(error ? HA_ERR_RBR_LOGGING_FAILED : 0); +} + + +int handler::ha_external_lock(THD *thd, int lock_type) +{ + int error; + DBUG_ENTER("handler::ha_external_lock"); + /* + Whether this is lock or unlock, this should be true, and is to verify that + if get_auto_increment() was called (thus may have reserved intervals or + taken a table lock), ha_release_auto_increment() was too. + */ + DBUG_ASSERT(next_insert_id == 0); + /* Consecutive calls for lock without unlocking in between is not allowed */ + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) || + lock_type == F_UNLCK)); + /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */ + DBUG_ASSERT(inited == NONE || table->open_by_handler); + + if (MYSQL_HANDLER_RDLOCK_START_ENABLED() || + MYSQL_HANDLER_WRLOCK_START_ENABLED() || + MYSQL_HANDLER_UNLOCK_START_ENABLED()) + { + if (lock_type == F_RDLCK) + { + MYSQL_HANDLER_RDLOCK_START(table_share->db.str, + table_share->table_name.str); + } + else if (lock_type == F_WRLCK) + { + MYSQL_HANDLER_WRLOCK_START(table_share->db.str, + table_share->table_name.str); + } + else if (lock_type == F_UNLCK) + { + MYSQL_HANDLER_UNLOCK_START(table_share->db.str, + table_share->table_name.str); + } + } + + /* + We cache the table flags if the locking succeeded. Otherwise, we + keep them as they were when they were fetched in ha_open(). + */ + MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type, + { error= external_lock(thd, lock_type); }) + + DBUG_EXECUTE_IF("external_lock_failure", error= HA_ERR_GENERIC;); + + if (likely(error == 0 || lock_type == F_UNLCK)) + { + m_lock_type= lock_type; + cached_table_flags= table_flags(); + if (table_share->tmp_table == NO_TMP_TABLE) + mysql_audit_external_lock(thd, table_share, lock_type); + } + + if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() || + MYSQL_HANDLER_WRLOCK_DONE_ENABLED() || + MYSQL_HANDLER_UNLOCK_DONE_ENABLED()) + { + if (lock_type == F_RDLCK) + { + MYSQL_HANDLER_RDLOCK_DONE(error); + } + else if (lock_type == F_WRLCK) + { + MYSQL_HANDLER_WRLOCK_DONE(error); + } + else if (lock_type == F_UNLCK) + { + MYSQL_HANDLER_UNLOCK_DONE(error); + } + } + DBUG_RETURN(error); +} + + +/** @brief + Check handler usage and reset state of file to after 'open' +*/ +int handler::ha_reset() +{ + DBUG_ENTER("ha_reset"); + + /* Check that we have called all proper deallocation functions */ + DBUG_ASSERT((uchar*) table->def_read_set.bitmap + + table->s->column_bitmap_size == + (uchar*) table->def_write_set.bitmap); + DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set)); + DBUG_ASSERT(!table->file->keyread_enabled()); + /* ensure that ha_index_end / ha_rnd_end has been called */ + DBUG_ASSERT(inited == NONE); + /* reset the bitmaps to point to defaults */ + table->default_column_bitmaps(); + pushed_cond= NULL; + tracker= NULL; + mark_trx_read_write_done= 0; + /* + Disable row logging. + */ + row_logging= row_logging_init= 0; + clear_cached_table_binlog_row_based_flag(); + /* Reset information about pushed engine conditions */ + cancel_pushed_idx_cond(); + /* Reset information about pushed index conditions */ + cancel_pushed_rowid_filter(); + if (lookup_handler != this) + { + lookup_handler->ha_external_unlock(table->in_use); + lookup_handler->close(); + delete lookup_handler; + lookup_handler= this; + } + DBUG_RETURN(reset()); +} + +#ifdef WITH_WSREP +static int wsrep_after_row(THD *thd) +{ + DBUG_ENTER("wsrep_after_row"); + if (thd->internal_transaction()) + DBUG_RETURN(0); + + /* enforce wsrep_max_ws_rows */ + thd->wsrep_affected_rows++; + if (wsrep_max_ws_rows && + thd->wsrep_affected_rows > wsrep_max_ws_rows && + wsrep_thd_is_local(thd)) + { + /* + If we are inside stored function or trigger we should not commit or + rollback current statement transaction. See comment in ha_commit_trans() + call for more information. + */ + if (!thd->in_sub_stmt) + trans_rollback_stmt(thd) || trans_rollback(thd); + my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0)); + DBUG_RETURN(ER_ERROR_DURING_COMMIT); + } + else if (wsrep_after_row_internal(thd)) + { + DBUG_RETURN(ER_LOCK_DEADLOCK); + } + DBUG_RETURN(0); +} +#endif /* WITH_WSREP */ + + +/** + Check if there is a conflicting unique hash key +*/ + +int handler::check_duplicate_long_entry_key(const uchar *new_rec, uint key_no) +{ + int result, error= 0; + KEY *key_info= table->key_info + key_no; + Field *hash_field= key_info->key_part->field; + uchar ptr[HA_HASH_KEY_LENGTH_WITH_NULL]; + String *blob_storage; + DBUG_ENTER("handler::check_duplicate_long_entry_key"); + + DBUG_ASSERT((key_info->flags & HA_NULL_PART_KEY && + key_info->key_length == HA_HASH_KEY_LENGTH_WITH_NULL) || + key_info->key_length == HA_HASH_KEY_LENGTH_WITHOUT_NULL); + + if (hash_field->is_real_null()) + DBUG_RETURN(0); + + key_copy(ptr, new_rec, key_info, key_info->key_length, false); + + result= lookup_handler->ha_index_init(key_no, 0); + if (result) + DBUG_RETURN(result); + blob_storage= (String*)alloca(sizeof(String)*table->s->virtual_not_stored_blob_fields); + table->remember_blob_values(blob_storage); + store_record(table, file->lookup_buffer); + result= lookup_handler->ha_index_read_map(table->record[0], ptr, + HA_WHOLE_KEY, HA_READ_KEY_EXACT); + if (!result) + { + bool is_same; + Field * t_field; + Item_func_hash * temp= (Item_func_hash *)hash_field->vcol_info->expr; + Item ** arguments= temp->arguments(); + uint arg_count= temp->argument_count(); + // restore pointers after swap_values in TABLE::update_virtual_fields() + for (Field **vf= table->vfield; *vf; vf++) + { + if (!(*vf)->stored_in_db() && (*vf)->flags & BLOB_FLAG && + bitmap_is_set(table->read_set, (*vf)->field_index)) + ((Field_blob*)*vf)->swap_value_and_read_value(); + } + do + { + my_ptrdiff_t diff= table->file->lookup_buffer - new_rec; + is_same= true; + for (uint j=0; is_same && j < arg_count; j++) + { + DBUG_ASSERT(arguments[j]->type() == Item::FIELD_ITEM || + // this one for left(fld_name,length) + arguments[j]->type() == Item::FUNC_ITEM); + if (arguments[j]->type() == Item::FIELD_ITEM) + { + t_field= static_cast(arguments[j])->field; + if (t_field->cmp_offset(diff)) + is_same= false; + } + else + { + Item_func_left *fnc= static_cast(arguments[j]); + DBUG_ASSERT(!my_strcasecmp(system_charset_info, "left", fnc->func_name())); + DBUG_ASSERT(fnc->arguments()[0]->type() == Item::FIELD_ITEM); + t_field= static_cast(fnc->arguments()[0])->field; + uint length= (uint)fnc->arguments()[1]->val_int(); + if (t_field->cmp_prefix(t_field->ptr, t_field->ptr + diff, length)) + is_same= false; + } + } + } + while (!is_same && + !(result= lookup_handler->ha_index_next_same(table->record[0], + ptr, key_info->key_length))); + if (is_same) + error= HA_ERR_FOUND_DUPP_KEY; + goto exit; + } + if (result != HA_ERR_KEY_NOT_FOUND) + error= result; +exit: + if (error == HA_ERR_FOUND_DUPP_KEY) + { + table->file->lookup_errkey= key_no; + if (ha_table_flags() & HA_DUPLICATE_POS) + { + lookup_handler->position(table->record[0]); + memcpy(table->file->dup_ref, lookup_handler->ref, ref_length); + } + } + restore_record(table, file->lookup_buffer); + table->restore_blob_values(blob_storage); + lookup_handler->ha_index_end(); + DBUG_RETURN(error); +} + +void handler::alloc_lookup_buffer() +{ + if (!lookup_buffer) + lookup_buffer= (uchar*)alloc_root(&table->mem_root, + table_share->max_unique_length + + table_share->null_fields + + table_share->reclength); +} + +/** @brief + check whether inserted records breaks the + unique constraint on long columns. + @returns 0 if no duplicate else returns error + */ +int handler::check_duplicate_long_entries(const uchar *new_rec) +{ + lookup_errkey= (uint)-1; + for (uint i= 0; i < table->s->keys; i++) + { + int result; + if (table->key_info[i].algorithm == HA_KEY_ALG_LONG_HASH && + (result= check_duplicate_long_entry_key(new_rec, i))) + return result; + } + return 0; +} + + +/** @brief + check whether updated records breaks the + unique constraint on long columns. + In the case of update we just need to check the specic key + reason for that is consider case + create table t1(a blob , b blob , x blob , y blob ,unique(a,b) + ,unique(x,y)) + and update statement like this + update t1 set a=23+a; in this case if we try to scan for + whole keys in table then index scan on x_y will return 0 + because data is same so in the case of update we take + key as a parameter in normal insert key should be -1 + @returns 0 if no duplicate else returns error + */ +int handler::check_duplicate_long_entries_update(const uchar *new_rec) +{ + Field *field; + uint key_parts; + KEY *keyinfo; + KEY_PART_INFO *keypart; + /* + Here we are comparing whether new record and old record are same + with respect to fields in hash_str + */ + uint reclength= (uint) (table->record[1] - table->record[0]); + + for (uint i= 0; i < table->s->keys; i++) + { + keyinfo= table->key_info + i; + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) + { + key_parts= fields_in_hash_keyinfo(keyinfo); + keypart= keyinfo->key_part - key_parts; + for (uint j= 0; j < key_parts; j++, keypart++) + { + int error; + field= keypart->field; + /* + Compare fields if they are different then check for duplicates + cmp_binary_offset cannot differentiate between null and empty string + So also check for that too + */ + if((field->is_null(0) != field->is_null(reclength)) || + field->cmp_binary_offset(reclength)) + { + if((error= check_duplicate_long_entry_key(new_rec, i))) + return error; + /* + break because check_duplicate_long_entries_key will + take care of remaining fields + */ + break; + } + } + } + } + return 0; +} + + +int handler::ha_check_overlaps(const uchar *old_data, const uchar* new_data) +{ + DBUG_ASSERT(new_data); + if (this != table->file) + return 0; + if (!table_share->period.unique_keys) + return 0; + if (table->versioned() && !table->vers_end_field()->is_max()) + return 0; + + const bool is_update= old_data != NULL; + uchar *record_buffer= lookup_buffer + table_share->max_unique_length + + table_share->null_fields; + + // Needed to compare record refs later + if (is_update) + position(old_data); + + DBUG_ASSERT(!keyread_enabled()); + + int error= 0; + lookup_errkey= (uint)-1; + + for (uint key_nr= 0; key_nr < table_share->keys && !error; key_nr++) + { + const KEY &key_info= table->key_info[key_nr]; + const uint key_parts= key_info.user_defined_key_parts; + if (!key_info.without_overlaps) + continue; + + if (is_update) + { + bool key_used= false; + for (uint k= 0; k < key_parts && !key_used; k++) + key_used= bitmap_is_set(table->write_set, + key_info.key_part[k].fieldnr - 1); + if (!key_used) + continue; + } + + error= lookup_handler->ha_index_init(key_nr, 0); + if (error) + return error; + + error= lookup_handler->ha_start_keyread(key_nr); + DBUG_ASSERT(!error); + + const uint period_field_length= key_info.key_part[key_parts - 1].length; + const uint key_base_length= key_info.key_length - 2 * period_field_length; + + key_copy(lookup_buffer, new_data, &key_info, 0); + + /* Copy period_start to period_end. + the value in period_start field is not significant, but anyway let's leave + it defined to avoid uninitialized memory access + */ + memcpy(lookup_buffer + key_base_length, + lookup_buffer + key_base_length + period_field_length, + period_field_length); + + /* Find row with period_end > (period_start of new_data) */ + error = lookup_handler->ha_index_read_map(record_buffer, lookup_buffer, + key_part_map((1 << (key_parts - 1)) - 1), + HA_READ_AFTER_KEY); + + if (!error && is_update) + { + /* In case of update it could happen that the nearest neighbour is + a record we are updating. It means, that there are no overlaps + from this side. + */ + DBUG_ASSERT(lookup_handler != this); + DBUG_ASSERT(ref_length == lookup_handler->ref_length); + + lookup_handler->position(record_buffer); + if (memcmp(ref, lookup_handler->ref, ref_length) == 0) + error= lookup_handler->ha_index_next(record_buffer); + } + + if (!error && table->check_period_overlaps(key_info, new_data, record_buffer)) + error= HA_ERR_FOUND_DUPP_KEY; + + if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE) + error= 0; + + if (error == HA_ERR_FOUND_DUPP_KEY) + lookup_errkey= key_nr; + + int end_error= lookup_handler->ha_end_keyread(); + DBUG_ASSERT(!end_error); + + end_error= lookup_handler->ha_index_end(); + if (!error && end_error) + error= end_error; + } + + return error; +} + + +/** + Check if galera disables binary logging for this table + + @return 0 Binary logging disabled + @return 1 Binary logging can be enabled +*/ + + +static inline bool wsrep_check_if_binlog_row(TABLE *table) +{ +#ifdef WITH_WSREP + THD *const thd= table->in_use; + + /* only InnoDB tables will be replicated through binlog emulation */ + if ((WSREP_EMULATE_BINLOG(thd) && + !(table->file->partition_ht()->flags & HTON_WSREP_REPLICATION)) || + thd->wsrep_ignore_table == true) + return 0; +#endif + return 1; +} + + +/** + Prepare handler for row logging + + @return 0 if handler will not participate in row logging + @return 1 handler will participate in row logging + + This function is always safe to call on an opened table. +*/ + +bool handler::prepare_for_row_logging() +{ + DBUG_ENTER("handler::prepare_for_row_logging"); + + /* Check if we should have row logging */ + if (wsrep_check_if_binlog_row(table) && + check_table_binlog_row_based()) + { + /* + Row logging enabled. Intialize all variables and write + annotated and table maps + */ + row_logging= row_logging_init= 1; + + /* + We need to have a transactional behavior for SQLCOM_CREATE_TABLE + (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a + compatible behavior with the STMT based replication even when + the table is not transactional. In other words, if the operation + fails while executing the insert phase nothing is written to the + binlog. + */ + row_logging_has_trans= + ((sql_command_flags[table->in_use->lex->sql_command] & + (CF_SCHEMA_CHANGE | CF_ADMIN_COMMAND)) || + table->file->has_transactions_and_rollback()); + } + else + { + /* Check row_logging has not been properly cleared from previous command */ + DBUG_ASSERT(row_logging == 0); + } + DBUG_RETURN(row_logging); +} + + +/* + Do all initialization needed for insert +*/ + +int handler::prepare_for_insert(bool do_create) +{ + /* Preparation for unique of blob's */ + if (table->s->long_unique_table || table->s->period.unique_keys) + { + if (do_create && create_lookup_handler()) + return 1; + alloc_lookup_buffer(); + } + return 0; +} + + +int handler::ha_write_row(const uchar *buf) +{ + int error; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + DBUG_ENTER("handler::ha_write_row"); + DEBUG_SYNC_C("ha_write_row_start"); + + if ((error= ha_check_overlaps(NULL, buf))) + DBUG_RETURN(error); + + /* + NOTE: this != table->file is true in 3 cases: + + 1. under copy_partitions() (REORGANIZE PARTITION): that does not + require long unique check as it does not introduce new rows or new index. + 2. under partition's ha_write_row() (INSERT): check_duplicate_long_entries() + was already done by ha_partition::ha_write_row(), no need to check it + again for each single partition. + 3. under ha_mroonga::wrapper_write_row() + */ + + if (table->s->long_unique_table && this == table->file) + { + DBUG_ASSERT(inited == NONE || lookup_handler != this); + if ((error= check_duplicate_long_entries(buf))) + DBUG_RETURN(error); + } + + MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str); + mark_trx_read_write(); + increment_statistics(&SSV::ha_write_count); + + TABLE_IO_WAIT(tracker, PSI_TABLE_WRITE_ROW, MAX_KEY, error, + { error= write_row(buf); }) + + MYSQL_INSERT_ROW_DONE(error); + if (likely(!error)) + { + rows_changed++; + if (row_logging) + { + Log_func *log_func= Write_rows_log_event::binlog_row_logging_function; + error= binlog_log_row(table, 0, buf, log_func); + } +#ifdef WITH_WSREP + if (WSREP_NNULL(ha_thd()) && table_share->tmp_table == NO_TMP_TABLE && + ht->flags & HTON_WSREP_REPLICATION && + !error && (error= wsrep_after_row(ha_thd()))) + { + DBUG_RETURN(error); + } +#endif /* WITH_WSREP */ + } + + DEBUG_SYNC_C("ha_write_row_end"); + DBUG_RETURN(error); +} + + +int handler::ha_update_row(const uchar *old_data, const uchar *new_data) +{ + int error; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + /* + Some storage engines require that the new record is in record[0] + (and the old record is in record[1]). + */ + DBUG_ASSERT(new_data == table->record[0]); + DBUG_ASSERT(old_data == table->record[1]); + + uint saved_status= table->status; + error= ha_check_overlaps(old_data, new_data); + + /* + NOTE: this != table->file is true under partition's ha_update_row(): + check_duplicate_long_entries_update() was already done by + ha_partition::ha_update_row(), no need to check it again for each single + partition. Same applies to ha_mroonga wrapper. + */ + + if (!error && table->s->long_unique_table && this == table->file) + error= check_duplicate_long_entries_update(new_data); + table->status= saved_status; + + if (error) + return error; + + MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str); + mark_trx_read_write(); + increment_statistics(&SSV::ha_update_count); + + TABLE_IO_WAIT(tracker, PSI_TABLE_UPDATE_ROW, active_index, 0, + { error= update_row(old_data, new_data);}) + + MYSQL_UPDATE_ROW_DONE(error); + if (likely(!error)) + { + rows_changed++; + if (row_logging) + { + Log_func *log_func= Update_rows_log_event::binlog_row_logging_function; + error= binlog_log_row(table, old_data, new_data, log_func); + } +#ifdef WITH_WSREP + THD *thd= ha_thd(); + if (WSREP_NNULL(thd)) + { + /* for streaming replication, the following wsrep_after_row() + may replicate a fragment, so we have to declare potential PA + unsafe before that */ + if (table->s->primary_key == MAX_KEY && wsrep_thd_is_local(thd)) + { + WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key); + if (thd->wsrep_cs().mark_transaction_pa_unsafe()) + WSREP_DEBUG("session does not have active transaction," + " can not mark as PA unsafe"); + } + + if (!error && table_share->tmp_table == NO_TMP_TABLE && + ht->flags & HTON_WSREP_REPLICATION) + error= wsrep_after_row(thd); + } +#endif /* WITH_WSREP */ + } + return error; +} + +/* + Update first row. Only used by sequence tables +*/ + +int handler::update_first_row(const uchar *new_data) +{ + int error; + if (likely(!(error= ha_rnd_init(1)))) + { + int end_error; + if (likely(!(error= ha_rnd_next(table->record[1])))) + { + /* + We have to do the memcmp as otherwise we may get error 169 from InnoDB + */ + if (memcmp(new_data, table->record[1], table->s->reclength)) + error= update_row(table->record[1], new_data); + } + end_error= ha_rnd_end(); + if (likely(!error)) + error= end_error; + /* Logging would be wrong if update_row works but ha_rnd_end fails */ + DBUG_ASSERT(!end_error || error != 0); + } + return error; +} + + +int handler::ha_delete_row(const uchar *buf) +{ + int error; + DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || + m_lock_type == F_WRLCK); + /* + Normally table->record[0] is used, but sometimes table->record[1] is used. + */ + DBUG_ASSERT(buf == table->record[0] || + buf == table->record[1]); + + MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str); + mark_trx_read_write(); + increment_statistics(&SSV::ha_delete_count); + + TABLE_IO_WAIT(tracker, PSI_TABLE_DELETE_ROW, active_index, error, + { error= delete_row(buf);}) + MYSQL_DELETE_ROW_DONE(error); + if (likely(!error)) + { + rows_changed++; + if (row_logging) + { + Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function; + error= binlog_log_row(table, buf, 0, log_func); + } +#ifdef WITH_WSREP + THD *thd= ha_thd(); + if (WSREP_NNULL(thd)) + { + /* for streaming replication, the following wsrep_after_row() + may replicate a fragment, so we have to declare potential PA + unsafe before that */ + if (table->s->primary_key == MAX_KEY && wsrep_thd_is_local(thd)) + { + WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key); + if (thd->wsrep_cs().mark_transaction_pa_unsafe()) + WSREP_DEBUG("session does not have active transaction," + " can not mark as PA unsafe"); + } + + if (!error && table_share->tmp_table == NO_TMP_TABLE && + ht->flags & HTON_WSREP_REPLICATION) + error= wsrep_after_row(thd); + } +#endif /* WITH_WSREP */ + } + return error; +} + + +/** + Execute a direct update request. A direct update request updates all + qualified rows in a single operation, rather than one row at a time. + In a Spider cluster the direct update operation is pushed down to the + child levels of the cluster. + + Note that this can't be used in case of statment logging + + @param update_rows Number of updated rows. + + @retval 0 Success. + @retval != 0 Failure. +*/ + +int handler::ha_direct_update_rows(ha_rows *update_rows, ha_rows *found_rows) +{ + int error; + MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str); + mark_trx_read_write(); + + error= direct_update_rows(update_rows, found_rows); + MYSQL_UPDATE_ROW_DONE(error); + return error; +} + + +/** + Execute a direct delete request. A direct delete request deletes all + qualified rows in a single operation, rather than one row at a time. + In a Spider cluster the direct delete operation is pushed down to the + child levels of the cluster. + + @param delete_rows Number of deleted rows. + + @retval 0 Success. + @retval != 0 Failure. +*/ + +int handler::ha_direct_delete_rows(ha_rows *delete_rows) +{ + int error; + /* Ensure we are not using binlog row */ + DBUG_ASSERT(!table->in_use->is_current_stmt_binlog_format_row()); + + MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str); + mark_trx_read_write(); + + error = direct_delete_rows(delete_rows); + MYSQL_DELETE_ROW_DONE(error); + return error; +} + + +/** @brief + use_hidden_primary_key() is called in case of an update/delete when + (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined + but we don't have a primary key +*/ +void handler::use_hidden_primary_key() +{ + /* fallback to use all columns in the table to identify row */ + table->column_bitmaps_set(&table->s->all_set, table->write_set); +} + + +/** + Get an initialized ha_share. + + @return Initialized ha_share + @retval NULL ha_share is not yet initialized. + @retval != NULL previous initialized ha_share. + + @note + If not a temp table, then LOCK_ha_data must be held. +*/ + +Handler_share *handler::get_ha_share_ptr() +{ + DBUG_ENTER("handler::get_ha_share_ptr"); + DBUG_ASSERT(ha_share); + DBUG_ASSERT(table_share); + +#ifndef DBUG_OFF + if (table_share->tmp_table == NO_TMP_TABLE) + mysql_mutex_assert_owner(&table_share->LOCK_ha_data); +#endif + + DBUG_RETURN(*ha_share); +} + + +/** + Set ha_share to be used by all instances of the same table/partition. + + @param ha_share Handler_share to be shared. + + @note + If not a temp table, then LOCK_ha_data must be held. +*/ + +void handler::set_ha_share_ptr(Handler_share *arg_ha_share) +{ + DBUG_ENTER("handler::set_ha_share_ptr"); + DBUG_ASSERT(ha_share); +#ifndef DBUG_OFF + if (table_share->tmp_table == NO_TMP_TABLE) + mysql_mutex_assert_owner(&table_share->LOCK_ha_data); +#endif + + *ha_share= arg_ha_share; + DBUG_VOID_RETURN; +} + + +/** + Take a lock for protecting shared handler data. +*/ + +void handler::lock_shared_ha_data() +{ + DBUG_ASSERT(table_share); + if (table_share->tmp_table == NO_TMP_TABLE) + mysql_mutex_lock(&table_share->LOCK_ha_data); +} + + +/** + Release lock for protecting ha_share. +*/ + +void handler::unlock_shared_ha_data() +{ + DBUG_ASSERT(table_share); + if (table_share->tmp_table == NO_TMP_TABLE) + mysql_mutex_unlock(&table_share->LOCK_ha_data); +} + +void handler::set_lock_type(enum thr_lock_type lock) +{ + table->reginfo.lock_type= lock; +} + +Compare_keys handler::compare_key_parts(const Field &old_field, + const Column_definition &new_field, + const KEY_PART_INFO &old_part, + const KEY_PART_INFO &new_part) const +{ + if (!old_field.is_equal(new_field)) + return Compare_keys::NotEqual; + + if (old_part.length != new_part.length) + return Compare_keys::NotEqual; + + return Compare_keys::Equal; +} + +#ifdef WITH_WSREP +/** + @details + This function makes the storage engine to force the victim transaction + to abort. Currently, only innodb has this functionality, but any SE + implementing the wsrep API should provide this service to support + multi-master operation. + + @note Aborting the transaction does NOT end it, it still has to + be rolled back with hton->rollback(). + + @note It is safe to abort from one thread (bf_thd) the transaction, + running in another thread (victim_thd), because InnoDB's lock_sys and + trx_mutex guarantee the necessary protection. However, its not safe + to access victim_thd->transaction, because it's not protected from + concurrent accesses. And it's an overkill to take LOCK_plugin and + iterate the whole installed_htons[] array every time. + + @note Object victim_thd is not guaranteed to exist after this + function returns. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return + always 0 +*/ + +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) +{ + DBUG_ENTER("ha_abort_transaction"); + if (!WSREP(bf_thd) && + !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && + wsrep_thd_is_toi(bf_thd))) { + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); + DBUG_RETURN(0); + } + + handlerton *hton= installed_htons[DB_TYPE_INNODB]; + if (hton && hton->abort_transaction) + { + hton->abort_transaction(hton, bf_thd, victim_thd, signal); + } + else + { + WSREP_WARN("Cannot abort InnoDB transaction"); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); + } + + DBUG_RETURN(0); +} +#endif /* WITH_WSREP */ + + +/* Remove all indexes for a given table from global index statistics */ + +static +int del_global_index_stats_for_table(THD *thd, uchar* cache_key, size_t cache_key_length) +{ + int res = 0; + uint to_delete_counter= 0; + INDEX_STATS *index_stats_to_delete[MAX_INDEXES]; + DBUG_ENTER("del_global_index_stats_for_table"); + + mysql_mutex_lock(&LOCK_global_index_stats); + + for (uint i= 0; i < global_index_stats.records; i++) + { + INDEX_STATS *index_stats = + (INDEX_STATS*) my_hash_element(&global_index_stats, i); + + /* We search correct db\0table_name\0 string */ + if (index_stats && + index_stats->index_name_length >= cache_key_length && + !memcmp(index_stats->index, cache_key, cache_key_length)) + { + index_stats_to_delete[to_delete_counter++]= index_stats; + } + } + + for (uint i= 0; i < to_delete_counter; i++) + res= my_hash_delete(&global_index_stats, (uchar*)index_stats_to_delete[i]); + + mysql_mutex_unlock(&LOCK_global_index_stats); + DBUG_RETURN(res); +} + +/* Remove a table from global table statistics */ + +int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table) +{ + TABLE_STATS *table_stats; + int res = 0; + uchar *cache_key; + size_t cache_key_length; + DBUG_ENTER("del_global_table_stat"); + + cache_key_length= db->length + 1 + table->length + 1; + + if(!(cache_key= (uchar *)my_malloc(PSI_INSTRUMENT_ME, cache_key_length, + MYF(MY_WME | MY_ZEROFILL)))) + { + /* Out of memory error already given */ + res = 1; + goto end; + } + + memcpy(cache_key, db->str, db->length); + memcpy(cache_key + db->length + 1, table->str, table->length); + + res= del_global_index_stats_for_table(thd, cache_key, cache_key_length); + + mysql_mutex_lock(&LOCK_global_table_stats); + + if((table_stats= (TABLE_STATS*) my_hash_search(&global_table_stats, + cache_key, + cache_key_length))) + res= my_hash_delete(&global_table_stats, (uchar*)table_stats); + + my_free(cache_key); + mysql_mutex_unlock(&LOCK_global_table_stats); + +end: + DBUG_RETURN(res); +} + +/* Remove a index from global index statistics */ + +int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info) +{ + INDEX_STATS *index_stats; + size_t key_length= table->s->table_cache_key.length + key_info->name.length + 1; + int res = 0; + DBUG_ENTER("del_global_index_stat"); + mysql_mutex_lock(&LOCK_global_index_stats); + + if((index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats, + key_info->cache_name, + key_length))) + res= my_hash_delete(&global_index_stats, (uchar*)index_stats); + + mysql_mutex_unlock(&LOCK_global_index_stats); + DBUG_RETURN(res); +} + +/***************************************************************************** + VERSIONING functions +******************************************************************************/ + +bool Vers_parse_info::is_start(const char *name) const +{ + DBUG_ASSERT(name); + return as_row.start && as_row.start.streq(name); +} +bool Vers_parse_info::is_end(const char *name) const +{ + DBUG_ASSERT(name); + return as_row.end && as_row.end.streq(name); +} +bool Vers_parse_info::is_start(const Create_field &f) const +{ + return f.flags & VERS_ROW_START; +} +bool Vers_parse_info::is_end(const Create_field &f) const +{ + return f.flags & VERS_ROW_END; +} + +static Create_field *vers_init_sys_field(THD *thd, const char *field_name, int flags, bool integer) +{ + Create_field *f= new (thd->mem_root) Create_field(); + if (!f) + return NULL; + + f->field_name.str= field_name; + f->field_name.length= strlen(field_name); + f->charset= system_charset_info; + f->flags= flags | NO_DEFAULT_VALUE_FLAG | NOT_NULL_FLAG; + if (integer) + { + f->set_handler(&type_handler_vers_trx_id); + f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1; + f->flags|= UNSIGNED_FLAG; + } + else + { + f->set_handler(&type_handler_timestamp2); + f->length= MAX_DATETIME_PRECISION; + } + f->invisible= DBUG_IF("sysvers_show") ? VISIBLE : INVISIBLE_SYSTEM; + + if (f->check(thd)) + return NULL; + + return f; +} + +bool Vers_parse_info::create_sys_field(THD *thd, const char *field_name, + Alter_info *alter_info, int flags) +{ + DBUG_ASSERT(can_native >= 0); /* Requires vers_check_native() called */ + Create_field *f= vers_init_sys_field(thd, field_name, flags, + DBUG_IF("sysvers_force_trx") && + can_native); + if (!f) + return true; + + alter_info->flags|= ALTER_PARSER_ADD_COLUMN; + alter_info->create_list.push_back(f); + + return false; +} + +const Lex_ident Vers_parse_info::default_start= "row_start"; +const Lex_ident Vers_parse_info::default_end= "row_end"; + +bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info) +{ + // If user specified some of these he must specify the others too. Do nothing. + if (*this) + return false; + + alter_info->flags|= ALTER_PARSER_ADD_COLUMN; + + period= start_end_t(default_start, default_end); + as_row= period; + + if (create_sys_field(thd, default_start, alter_info, VERS_ROW_START) || + create_sys_field(thd, default_end, alter_info, VERS_ROW_END)) + { + return true; + } + return false; +} + + +void Table_scope_and_contents_source_st::vers_check_native() +{ + vers_info.can_native= (db_type->db_type == DB_TYPE_PARTITION_DB || + ha_check_storage_engine_flag(db_type, + HTON_NATIVE_SYS_VERSIONING)); +} + + +bool Table_scope_and_contents_source_st::vers_fix_system_fields( + THD *thd, Alter_info *alter_info, const TABLE_LIST &create_table) +{ + DBUG_ASSERT(!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)); + + if (DBUG_IF("sysvers_force") || DBUG_IF("sysvers_force_trx")) + { + alter_info->flags|= ALTER_ADD_SYSTEM_VERSIONING; + options|= HA_VERSIONED_TABLE; + } + + if (!vers_info.need_check(alter_info)) + return false; + + const bool add_versioning= alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING; + + if (!vers_info.versioned_fields && vers_info.unversioned_fields && !add_versioning) + { + // All is correct but this table is not versioned. + options&= ~HA_VERSIONED_TABLE; + return false; + } + + if (!add_versioning && vers_info && !vers_info.versioned_fields) + { + my_error(ER_MISSING, MYF(0), create_table.table_name.str, + "WITH SYSTEM VERSIONING"); + return true; + } + + List_iterator it(alter_info->create_list); + while (Create_field *f= it++) + { + if (f->vers_sys_field()) + continue; + if ((f->versioning == Column_definition::VERSIONING_NOT_SET && !add_versioning) || + f->versioning == Column_definition::WITHOUT_VERSIONING) + { + f->flags|= VERS_UPDATE_UNVERSIONED_FLAG; + } + } // while + + vers_check_native(); + + if (vers_info.fix_implicit(thd, alter_info)) + return true; + + return false; +} + + +bool Table_scope_and_contents_source_st::vers_check_system_fields( + THD *thd, Alter_info *alter_info, const Lex_table_name &table_name, + const Lex_table_name &db, int select_count) +{ + if (!(options & HA_VERSIONED_TABLE)) + return false; + + uint versioned_fields= 0; + + if (!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)) + { + uint fieldnr= 0; + List_iterator field_it(alter_info->create_list); + while (Create_field *f= field_it++) + { + /* + The field from the CREATE part can be duplicated in the SELECT part of + CREATE...SELECT. In that case double counts should be avoided. + select_create::create_table_from_items just pushes the fields back into + the create_list, without additional manipulations, so the fields from + SELECT go last there. + */ + bool is_dup= false; + if (fieldnr >= alter_info->create_list.elements - select_count) + { + List_iterator dup_it(alter_info->create_list); + for (Create_field *dup= dup_it++; !is_dup && dup != f; dup= dup_it++) + is_dup= Lex_ident(dup->field_name).streq(f->field_name); + } + + if (!(f->flags & VERS_UPDATE_UNVERSIONED_FLAG) && !is_dup) + versioned_fields++; + fieldnr++; + } + if (versioned_fields == VERSIONING_FIELDS) + { + my_error(ER_VERS_TABLE_MUST_HAVE_COLUMNS, MYF(0), table_name.str); + return true; + } + } + + if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) && !versioned_fields) + return false; + + return vers_info.check_sys_fields(table_name, db, alter_info); +} + + +bool Vers_parse_info::fix_alter_info(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, TABLE *table) +{ + TABLE_SHARE *share= table->s; + const char *table_name= share->table_name.str; + + if (!need_check(alter_info) && !share->versioned) + return false; + + if (share->tmp_table && + !DBUG_IF("sysvers_force") && !DBUG_IF("sysvers_force_trx")) + { + my_error(ER_VERS_NOT_SUPPORTED, MYF(0), "CREATE TEMPORARY TABLE"); + return true; + } + + if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING && + table->versioned()) + { + my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name); + return true; + } + + if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING) + { + if (!share->versioned) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name); + return true; + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info && + table->part_info->part_type == VERSIONING_PARTITION) + { + my_error(ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION, MYF(0), table_name); + return true; + } +#endif + + return false; + } + + if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)) + { + List_iterator_fast it(alter_info->create_list); + while (Create_field *f= it++) + { + if (f->flags & VERS_SYSTEM_FIELD) + { + if (!table->versioned()) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->s->table_name.str); + return true; + } + my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(0), + f->flags & VERS_ROW_START ? "START" : "END", f->field_name.str); + return true; + } + } + } + + if ((alter_info->flags & ALTER_DROP_PERIOD || + versioned_fields || unversioned_fields) && !share->versioned) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name); + return true; + } + + if (share->versioned) + { + if (alter_info->flags & ALTER_ADD_PERIOD) + { + my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name); + return true; + } + + // copy info from existing table + create_info->options|= HA_VERSIONED_TABLE; + + DBUG_ASSERT(share->vers_start_field()); + DBUG_ASSERT(share->vers_end_field()); + Lex_ident start(share->vers_start_field()->field_name); + Lex_ident end(share->vers_end_field()->field_name); + DBUG_ASSERT(start.str); + DBUG_ASSERT(end.str); + + as_row= start_end_t(start, end); + period= as_row; + + if (alter_info->create_list.elements) + { + List_iterator_fast it(alter_info->create_list); + while (Create_field *f= it++) + { + if (f->versioning == Column_definition::WITHOUT_VERSIONING) + f->flags|= VERS_UPDATE_UNVERSIONED_FLAG; + + if (f->change.str && (start.streq(f->change) || end.streq(f->change))) + { + my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change.str); + return true; + } + } + } + + return false; + } + + if (fix_implicit(thd, alter_info)) + return true; + + if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) + { + if (check_sys_fields(table_name, share->db, alter_info)) + return true; + } + + return false; +} + +bool +Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info, + TABLE_LIST &src_table, TABLE_LIST &table) +{ + List_iterator it(alter_info.create_list); + List_iterator key_it(alter_info.key_list); + List_iterator kp_it; + Create_field *f, *f_start=NULL, *f_end= NULL; + + DBUG_ASSERT(alter_info.create_list.elements > 2); + + if (create_info.tmp_table()) + { + int remove= 2; + while (remove && (f= it++)) + { + if (f->flags & VERS_SYSTEM_FIELD) + { + it.remove(); + remove--; + } + key_it.rewind(); + while (Key *key= key_it++) + { + kp_it.init(key->columns); + while (Key_part_spec *kp= kp_it++) + { + if (0 == lex_string_cmp(system_charset_info, &kp->field_name, + &f->field_name)) + { + kp_it.remove(); + } + } + if (0 == key->columns.elements) + { + key_it.remove(); + } + } + } + DBUG_ASSERT(remove == 0); + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "System versioning is stripped from temporary `%s.%s`", + table.db.str, table.table_name.str); + return false; + } + + while ((f= it++)) + { + if (f->flags & VERS_ROW_START) + { + f_start= f; + if (f_end) + break; + } + else if (f->flags & VERS_ROW_END) + { + f_end= f; + if (f_start) + break; + } + } + + if (!f_start || !f_end) + { + my_error(ER_MISSING, MYF(0), src_table.table_name.str, + f_start ? "AS ROW END" : "AS ROW START"); + return true; + } + + as_row= start_end_t(f_start->field_name, f_end->field_name); + period= as_row; + + create_info.options|= HA_VERSIONED_TABLE; + return false; +} + +bool Vers_parse_info::need_check(const Alter_info *alter_info) const +{ + return versioned_fields || unversioned_fields || + alter_info->flags & ALTER_ADD_PERIOD || + alter_info->flags & ALTER_DROP_PERIOD || + alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING || + alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING || *this; +} + +bool Vers_parse_info::check_conditions(const Lex_table_name &table_name, + const Lex_table_name &db) const +{ + if (!as_row.start || !as_row.end) + { + my_error(ER_MISSING, MYF(0), table_name.str, + as_row.start ? "AS ROW END" : "AS ROW START"); + return true; + } + + if (!period.start || !period.end) + { + my_error(ER_MISSING, MYF(0), table_name.str, "PERIOD FOR SYSTEM_TIME"); + return true; + } + + if (!as_row.start.streq(period.start) || + !as_row.end.streq(period.end)) + { + my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str); + return true; + } + + if (db.streq(MYSQL_SCHEMA_NAME)) + { + my_error(ER_VERS_DB_NOT_SUPPORTED, MYF(0), MYSQL_SCHEMA_NAME.str); + return true; + } + return false; +} + +static bool is_versioning_timestamp(const Column_definition *f) +{ + return f->type_handler() == &type_handler_timestamp2 && + f->length == MAX_DATETIME_FULL_WIDTH; +} + +static bool is_some_bigint(const Column_definition *f) +{ + return f->type_handler() == &type_handler_slonglong || + f->type_handler() == &type_handler_ulonglong || + f->type_handler() == &type_handler_vers_trx_id; +} + +static bool is_versioning_bigint(const Column_definition *f) +{ + return is_some_bigint(f) && f->flags & UNSIGNED_FLAG && + f->length == MY_INT64_NUM_DECIMAL_DIGITS - 1; +} + +static void require_timestamp_error(const char *field, const char *table) +{ + my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field, "TIMESTAMP(6)", table); +} + +static void require_trx_id_error(const char *field, const char *table) +{ + my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field, "BIGINT(20) UNSIGNED", + table); +} + + +bool Vers_type_timestamp::check_sys_fields(const LEX_CSTRING &table_name, + const Column_definition *row_start, + const Column_definition *row_end) const +{ + if (!is_versioning_timestamp(row_start)) + { + require_timestamp_error(row_start->field_name.str, table_name.str); + return true; + } + + if (row_end->type_handler()->vers() != this || + !is_versioning_timestamp(row_end)) + { + require_timestamp_error(row_end->field_name.str, table_name.str); + return true; + } + + return false; +} + + +bool Vers_type_trx::check_sys_fields(const LEX_CSTRING &table_name, + const Column_definition *row_start, + const Column_definition *row_end) const +{ + if (!is_versioning_bigint(row_start)) + { + require_trx_id_error(row_start->field_name.str, table_name.str); + return true; + } + + if (row_end->type_handler()->vers() != this || + !is_versioning_bigint(row_end)) + { + require_trx_id_error(row_end->field_name.str, table_name.str); + return true; + } + + if (!is_some_bigint(row_start)) + { + require_timestamp_error(row_start->field_name.str, table_name.str); + return true; + } + + if (!TR_table::use_transaction_registry) + { + my_error(ER_VERS_TRT_IS_DISABLED, MYF(0)); + return true; + } + + return false; +} + + +bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name, + const Lex_table_name &db, + Alter_info *alter_info) const +{ + if (check_conditions(table_name, db)) + return true; + + List_iterator it(alter_info->create_list); + const Create_field *row_start= nullptr; + const Create_field *row_end= nullptr; + while (const Create_field *f= it++) + { + if (f->flags & VERS_ROW_START && !row_start) + row_start= f; + if (f->flags & VERS_ROW_END && !row_end) + row_end= f; + } + + if (!row_start || !row_end) + { + my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str); + return true; + } + + const Vers_type_handler *row_start_vers= row_start->type_handler()->vers(); + + if (!row_start_vers) + { + require_timestamp_error(row_start->field_name.str, table_name); + return true; + } + + return row_start_vers->check_sys_fields(table_name, row_start, row_end); +} + +bool Table_period_info::check_field(const Create_field* f, + const Lex_ident& f_name) const +{ + bool res= false; + if (!f) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), f_name.str, name.str); + res= true; + } + else if (f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATE && + f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATETIME) + { + my_error(ER_WRONG_FIELD_SPEC, MYF(0), f->field_name.str); + res= true; + } + else if (f->vcol_info || f->flags & VERS_SYSTEM_FIELD) + { + my_error(ER_PERIOD_FIELD_WRONG_ATTRIBUTES, MYF(0), + f->field_name.str, "GENERATED ALWAYS AS"); + res= true; + } + + return res; +} + +bool Table_scope_and_contents_source_st::check_fields( + THD *thd, Alter_info *alter_info, + const Lex_table_name &table_name, const Lex_table_name &db, int select_count) +{ + return vers_check_system_fields(thd, alter_info, + table_name, db, select_count) || + check_period_fields(thd, alter_info); +} + +bool Table_scope_and_contents_source_st::check_period_fields( + THD *thd, Alter_info *alter_info) +{ + if (!period_info.name) + return false; + + if (tmp_table()) + { + my_error(ER_PERIOD_TEMPORARY_NOT_ALLOWED, MYF(0)); + return true; + } + + Table_period_info::start_end_t &period= period_info.period; + const Create_field *row_start= NULL; + const Create_field *row_end= NULL; + List_iterator it(alter_info->create_list); + while (const Create_field *f= it++) + { + if (period.start.streq(f->field_name)) row_start= f; + else if (period.end.streq(f->field_name)) row_end= f; + + if (period_info.name.streq(f->field_name)) + { + my_error(ER_DUP_FIELDNAME, MYF(0), f->field_name.str); + return true; + } + } + + bool res= period_info.check_field(row_start, period.start.str) + || period_info.check_field(row_end, period.end.str); + if (res) + return true; + + if (row_start->type_handler() != row_end->type_handler() + || row_start->length != row_end->length) + { + my_error(ER_PERIOD_TYPES_MISMATCH, MYF(0), period_info.name.str); + res= true; + } + + return res; +} + +bool +Table_scope_and_contents_source_st::fix_create_fields(THD *thd, + Alter_info *alter_info, + const TABLE_LIST &create_table) +{ + return vers_fix_system_fields(thd, alter_info, create_table) + || fix_period_fields(thd, alter_info); +} + +bool +Table_scope_and_contents_source_st::fix_period_fields(THD *thd, + Alter_info *alter_info) +{ + if (!period_info.name) + return false; + + Table_period_info::start_end_t &period= period_info.period; + List_iterator it(alter_info->create_list); + while (Create_field *f= it++) + { + if (period.start.streq(f->field_name) || period.end.streq(f->field_name)) + { + f->period= &period_info; + f->flags|= NOT_NULL_FLAG; + } + } + return false; +} diff --git a/sql/handler.h b/sql/handler.h new file mode 100644 index 00000000..50ec0ed1 --- /dev/null +++ b/sql/handler.h @@ -0,0 +1,5426 @@ +#ifndef HANDLER_INCLUDED +#define HANDLER_INCLUDED +/* + Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2023, MariaDB + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* Definitions for parameters to do with handler-routines */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_const.h" +#include "sql_basic_types.h" +#include "mysqld.h" /* server_id */ +#include "sql_plugin.h" /* plugin_ref, st_plugin_int, plugin */ +#include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA */ +#include "sql_cache.h" +#include "structs.h" /* SHOW_COMP_OPTION */ +#include "sql_array.h" /* Dynamic_array<> */ +#include "mdl.h" +#include "vers_string.h" +#include "ha_handler_stats.h" + +#include "sql_analyze_stmt.h" // for Exec_time_tracker + +#include +#include +#include +#include +#include "sql_sequence.h" +#include "mem_root_array.h" +#include // pair +#include /* __attribute__ */ + +class Alter_info; +class Virtual_column_info; +class sequence_definition; +class Rowid_filter; +class Field_string; +class Field_varstring; +class Field_blob; +class Column_definition; + +// the following is for checking tables + +#define HA_ADMIN_ALREADY_DONE 1 +#define HA_ADMIN_OK 0 +#define HA_ADMIN_NOT_IMPLEMENTED -1 +#define HA_ADMIN_FAILED -2 +#define HA_ADMIN_CORRUPT -3 +#define HA_ADMIN_INTERNAL_ERROR -4 +#define HA_ADMIN_INVALID -5 +#define HA_ADMIN_REJECT -6 +#define HA_ADMIN_TRY_ALTER -7 +#define HA_ADMIN_WRONG_CHECKSUM -8 +#define HA_ADMIN_NOT_BASE_TABLE -9 +#define HA_ADMIN_NEEDS_UPGRADE -10 +#define HA_ADMIN_NEEDS_ALTER -11 +#define HA_ADMIN_NEEDS_CHECK -12 +#define HA_ADMIN_COMMIT_ERROR -13 + +/** + Return values for check_if_supported_inplace_alter(). + + @see check_if_supported_inplace_alter() for description of + the individual values. +*/ +enum enum_alter_inplace_result { + HA_ALTER_ERROR, + HA_ALTER_INPLACE_COPY_NO_LOCK, + HA_ALTER_INPLACE_COPY_LOCK, + HA_ALTER_INPLACE_NOCOPY_LOCK, + HA_ALTER_INPLACE_NOCOPY_NO_LOCK, + HA_ALTER_INPLACE_INSTANT, + HA_ALTER_INPLACE_NOT_SUPPORTED, + HA_ALTER_INPLACE_EXCLUSIVE_LOCK, + HA_ALTER_INPLACE_SHARED_LOCK, + HA_ALTER_INPLACE_NO_LOCK +}; + +/* Flags for create_partitioning_metadata() */ + +enum chf_create_flags { + CHF_CREATE_FLAG, + CHF_DELETE_FLAG, + CHF_RENAME_FLAG, + CHF_INDEX_FLAG +}; + +/* Bits in table_flags() to show what database can do */ + +#define HA_NO_TRANSACTIONS (1ULL << 0) /* Doesn't support transactions */ +#define HA_PARTIAL_COLUMN_READ (1ULL << 1) /* read may not return all columns */ +#define HA_TABLE_SCAN_ON_INDEX (1ULL << 2) /* No separate data/index file */ +/* + The following should be set if the following is not true when scanning + a table with rnd_next() + - We will see all rows (including deleted ones) + - Row positions are 'table->s->db_record_offset' apart + If this flag is not set, filesort will do a position() call for each matched + row to be able to find the row later. +*/ +#define HA_REC_NOT_IN_SEQ (1ULL << 3) +#define HA_CAN_GEOMETRY (1ULL << 4) +/* + Reading keys in random order is as fast as reading keys in sort order + (Used in records.cc to decide if we should use a record cache and by + filesort to decide if we should sort key + data or key + pointer-to-row +*/ +#define HA_FAST_KEY_READ (1ULL << 5) +/* + Set the following flag if we on delete should force all key to be read + and on update read all keys that changes +*/ +#define HA_REQUIRES_KEY_COLUMNS_FOR_DELETE (1ULL << 6) +#define HA_NULL_IN_KEY (1ULL << 7) /* One can have keys with NULL */ +#define HA_DUPLICATE_POS (1ULL << 8) /* ha_position() gives dup row */ +#define HA_NO_BLOBS (1ULL << 9) /* Doesn't support blobs */ +#define HA_CAN_INDEX_BLOBS (1ULL << 10) +#define HA_AUTO_PART_KEY (1ULL << 11) /* auto-increment in multi-part key */ +/* + The engine requires every table to have a user-specified PRIMARY KEY. + Do not set the flag if the engine can generate a hidden primary key internally. + This flag is ignored if a SEQUENCE is created (which, in turn, needs + HA_CAN_TABLES_WITHOUT_ROLLBACK flag) +*/ +#define HA_REQUIRE_PRIMARY_KEY (1ULL << 12) +#define HA_STATS_RECORDS_IS_EXACT (1ULL << 13) /* stats.records is exact */ +/* + INSERT_DELAYED only works with handlers that uses MySQL internal table + level locks +*/ +#define HA_CAN_INSERT_DELAYED (1ULL << 14) +/* + If we get the primary key columns for free when we do an index read + (usually, it also implies that HA_PRIMARY_KEY_REQUIRED_FOR_POSITION + flag is set). +*/ +#define HA_PRIMARY_KEY_IN_READ_INDEX (1ULL << 15) +/* + If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, it means that to position() + uses a primary key given by the record argument. + Without primary key, we can't call position(). + If not set, the position is returned as the current rows position + regardless of what argument is given. +*/ +#define HA_PRIMARY_KEY_REQUIRED_FOR_POSITION (1ULL << 16) +#define HA_CAN_RTREEKEYS (1ULL << 17) +#define HA_NOT_DELETE_WITH_CACHE (1ULL << 18) /* unused */ +/* + The following is we need to a primary key to delete (and update) a row. + If there is no primary key, all columns needs to be read on update and delete +*/ +#define HA_PRIMARY_KEY_REQUIRED_FOR_DELETE (1ULL << 19) +#define HA_NO_PREFIX_CHAR_KEYS (1ULL << 20) +#define HA_CAN_FULLTEXT (1ULL << 21) +#define HA_CAN_SQL_HANDLER (1ULL << 22) +#define HA_NO_AUTO_INCREMENT (1ULL << 23) +/* Has automatic checksums and uses the old checksum format */ +#define HA_HAS_OLD_CHECKSUM (1ULL << 24) +/* Table data are stored in separate files (for lower_case_table_names) */ +#define HA_FILE_BASED (1ULL << 26) +#define HA_CAN_BIT_FIELD (1ULL << 28) /* supports bit fields */ +#define HA_NEED_READ_RANGE_BUFFER (1ULL << 29) /* for read_multi_range */ +#define HA_ANY_INDEX_MAY_BE_UNIQUE (1ULL << 30) +#define HA_NO_COPY_ON_ALTER (1ULL << 31) +#define HA_HAS_RECORDS (1ULL << 32) /* records() gives exact count*/ +/* Has it's own method of binlog logging */ +#define HA_HAS_OWN_BINLOGGING (1ULL << 33) +/* + Engine is capable of row-format and statement-format logging, + respectively +*/ +#define HA_BINLOG_ROW_CAPABLE (1ULL << 34) +#define HA_BINLOG_STMT_CAPABLE (1ULL << 35) + +/* + When a multiple key conflict happens in a REPLACE command mysql + expects the conflicts to be reported in the ascending order of + key names. + + For e.g. + + CREATE TABLE t1 (a INT, UNIQUE (a), b INT NOT NULL, UNIQUE (b), c INT NOT + NULL, INDEX(c)); + + REPLACE INTO t1 VALUES (1,1,1),(2,2,2),(2,1,3); + + MySQL expects the conflict with 'a' to be reported before the conflict with + 'b'. + + If the underlying storage engine does not report the conflicting keys in + ascending order, it causes unexpected errors when the REPLACE command is + executed. + + This flag helps the underlying SE to inform the server that the keys are not + ordered. +*/ +#define HA_DUPLICATE_KEY_NOT_IN_ORDER (1ULL << 36) + +/* + Engine supports REPAIR TABLE. Used by CHECK TABLE FOR UPGRADE if an + incompatible table is detected. If this flag is set, CHECK TABLE FOR UPGRADE + will report ER_TABLE_NEEDS_UPGRADE, otherwise ER_TABLE_NEED_REBUILD. +*/ +#define HA_CAN_REPAIR (1ULL << 37) + +/* Has automatic checksums and uses the new checksum format */ +#define HA_HAS_NEW_CHECKSUM (1ULL << 38) +#define HA_CAN_VIRTUAL_COLUMNS (1ULL << 39) +#define HA_MRR_CANT_SORT (1ULL << 40) +/* All of VARCHAR is stored, including bytes after real varchar data */ +#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (1ULL << 41) + +/* + This storage engine supports condition pushdown +*/ +#define HA_CAN_TABLE_CONDITION_PUSHDOWN (1ULL << 42) +/* old name for the same flag */ +#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN HA_CAN_TABLE_CONDITION_PUSHDOWN + +/** + The handler supports read before write removal optimization + + Read before write removal may be used for storage engines which support + write without previous read of the row to be updated. Handler returning + this flag must implement start_read_removal() and end_read_removal(). + The handler may return "fake" rows constructed from the key of the row + asked for. This is used to optimize UPDATE and DELETE by reducing the + number of roundtrips between handler and storage engine. + + Example: + UPDATE a=1 WHERE pk IN () + + mysql_update() + { + if () + start_read_removal() + -> handler returns true if read removal supported for this table/query + + while(read_record("pk=")) + -> handler returns fake row with column "pk" set to + + ha_update_row() + -> handler sends write "a=1" for row with "pk=" + + end_read_removal() + -> handler returns the number of rows actually written + } + + @note This optimization in combination with batching may be used to + remove even more roundtrips. +*/ +#define HA_READ_BEFORE_WRITE_REMOVAL (1ULL << 43) + +/* + Engine supports extended fulltext API + */ +#define HA_CAN_FULLTEXT_EXT (1ULL << 44) + +/* + Storage engine supports table export using the + FLUSH TABLE FOR EXPORT statement + (meaning, after this statement one can copy table files out of the + datadir and later "import" (somehow) in another MariaDB instance) + */ +#define HA_CAN_EXPORT (1ULL << 45) + +/* + Storage engine does not require an exclusive metadata lock + on the table during optimize. (TODO and repair?). + It can allow other connections to open the table. + (it does not necessarily mean that other connections can + read or modify the table - this is defined by THR locks and the + ::store_lock() method). +*/ +#define HA_CONCURRENT_OPTIMIZE (1ULL << 46) + +/* + If the storage engine support tables that will not roll back on commit + In addition the table should not lock rows and support READ and WRITE + UNCOMMITTED. + This is useful for implementing things like SEQUENCE but can also in + the future be useful to do logging that should never roll back. +*/ +#define HA_CAN_TABLES_WITHOUT_ROLLBACK (1ULL << 47) + +/* + Mainly for usage by SEQUENCE engine. Setting this flag means + that the table will never roll back and that all operations + for this table should stored in the non transactional log + space that will always be written, even on rollback. +*/ + +#define HA_PERSISTENT_TABLE (1ULL << 48) + +/* + If storage engine uses another engine as a base + This flag is also needed if the table tries to open the .frm file + as part of drop table. +*/ +#define HA_REUSES_FILE_NAMES (1ULL << 49) + +/* + Set of all binlog flags. Currently only contain the capabilities + flags. + */ +#define HA_BINLOG_FLAGS (HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE) + +/* The following are used by Spider */ +#define HA_CAN_FORCE_BULK_UPDATE (1ULL << 50) +#define HA_CAN_FORCE_BULK_DELETE (1ULL << 51) +#define HA_CAN_DIRECT_UPDATE_AND_DELETE (1ULL << 52) + +/* The following is for partition handler */ +#define HA_CAN_MULTISTEP_MERGE (1LL << 53) + +/* calling cmp_ref() on the engine is expensive */ +#define HA_SLOW_CMP_REF (1ULL << 54) +#define HA_CMP_REF_IS_EXPENSIVE HA_SLOW_CMP_REF + +/** + Some engines are unable to provide an efficient implementation for rnd_pos(). + Server will try to avoid it, if possible + + TODO better to do it with cost estimates, not with an explicit flag +*/ +#define HA_SLOW_RND_POS (1ULL << 55) + +/* Safe for online backup */ +#define HA_CAN_ONLINE_BACKUPS (1ULL << 56) + +/* Support native hash index */ +#define HA_CAN_HASH_KEYS (1ULL << 57) +#define HA_CRASH_SAFE (1ULL << 58) + +/* + There is no need to evict the table from the table definition cache having + run ANALYZE TABLE on it + */ +#define HA_ONLINE_ANALYZE (1ULL << 59) +/* + Rowid's are not comparable. This is set if the rowid is unique to the + current open handler, like it is with federated where the rowid is a + pointer to a local result set buffer. The effect of having this set is + that the optimizer will not consider the following optimizations for + the table: + ror scans, filtering or duplicate weedout +*/ +#define HA_NON_COMPARABLE_ROWID (1ULL << 60) + +/* Implements SELECT ... FOR UPDATE SKIP LOCKED */ +#define HA_CAN_SKIP_LOCKED (1ULL << 61) + +#define HA_LAST_TABLE_FLAG HA_CAN_SKIP_LOCKED + + +/* bits in index_flags(index_number) for what you can do with index */ +#define HA_READ_NEXT 1 /* TODO really use this flag */ +#define HA_READ_PREV 2 /* supports ::index_prev */ +#define HA_READ_ORDER 4 /* index_next/prev follow sort order */ +#define HA_READ_RANGE 8 /* can find all records in a range */ +#define HA_ONLY_WHOLE_INDEX 16 /* Can't use part key searches */ +#define HA_KEYREAD_ONLY 64 /* Support HA_EXTRA_KEYREAD */ + +/* + Index scan will not return records in rowid order. Not guaranteed to be + set for unordered (e.g. HASH) indexes. +*/ +#define HA_KEY_SCAN_NOT_ROR 128 +#define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */ +/* + Data is clustered on this key. This means that when you read the key + you also get the row data without any additional disk reads. +*/ +#define HA_CLUSTERED_INDEX 512 + +#define HA_DO_RANGE_FILTER_PUSHDOWN 1024 + +/* + bits in alter_table_flags: +*/ +/* + These bits are set if different kinds of indexes can be created or dropped + in-place without re-creating the table using a temporary table. + NO_READ_WRITE indicates that the handler needs concurrent reads and writes + of table data to be blocked. + Partitioning needs both ADD and DROP to be supported by its underlying + handlers, due to error handling, see bug#57778. +*/ +#define HA_INPLACE_ADD_INDEX_NO_READ_WRITE (1UL << 0) +#define HA_INPLACE_DROP_INDEX_NO_READ_WRITE (1UL << 1) +#define HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE (1UL << 2) +#define HA_INPLACE_DROP_UNIQUE_INDEX_NO_READ_WRITE (1UL << 3) +#define HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE (1UL << 4) +#define HA_INPLACE_DROP_PK_INDEX_NO_READ_WRITE (1UL << 5) +/* + These are set if different kinds of indexes can be created or dropped + in-place while still allowing concurrent reads (but not writes) of table + data. If a handler is capable of one or more of these, it should also set + the corresponding *_NO_READ_WRITE bit(s). +*/ +#define HA_INPLACE_ADD_INDEX_NO_WRITE (1UL << 6) +#define HA_INPLACE_DROP_INDEX_NO_WRITE (1UL << 7) +#define HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE (1UL << 8) +#define HA_INPLACE_DROP_UNIQUE_INDEX_NO_WRITE (1UL << 9) +#define HA_INPLACE_ADD_PK_INDEX_NO_WRITE (1UL << 10) +#define HA_INPLACE_DROP_PK_INDEX_NO_WRITE (1UL << 11) +/* + HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is + supported at all. + HA_FAST_CHANGE_PARTITION means that optimised variants of the changes + exists but they are not necessarily done online. + + HA_ONLINE_DOUBLE_WRITE means that the handler supports writing to both + the new partition and to the old partitions when updating through the + old partitioning schema while performing a change of the partitioning. + This means that we can support updating of the table while performing + the copy phase of the change. For no lock at all also a double write + from new to old must exist and this is not required when this flag is + set. + This is actually removed even before it was introduced the first time. + The new idea is that handlers will handle the lock level already in + store_lock for ALTER TABLE partitions. + + HA_PARTITION_ONE_PHASE is a flag that can be set by handlers that take + care of changing the partitions online and in one phase. Thus all phases + needed to handle the change are implemented inside the storage engine. + The storage engine must also support auto-discovery since the frm file + is changed as part of the change and this change must be controlled by + the storage engine. A typical engine to support this is NDB (through + WL #2498). +*/ +#define HA_PARTITION_FUNCTION_SUPPORTED (1UL << 12) +#define HA_FAST_CHANGE_PARTITION (1UL << 13) +#define HA_PARTITION_ONE_PHASE (1UL << 14) + +/* operations for disable/enable indexes */ +#define HA_KEY_SWITCH_NONUNIQ 0 +#define HA_KEY_SWITCH_ALL 1 +#define HA_KEY_SWITCH_NONUNIQ_SAVE 2 +#define HA_KEY_SWITCH_ALL_SAVE 3 + +/* + Note: the following includes binlog and closing 0. + TODO remove the limit, use dynarrays +*/ +#define MAX_HA 64 + +/* + Use this instead of 0 as the initial value for the slot number of + handlerton, so that we can distinguish uninitialized slot number + from slot 0. +*/ +#define HA_SLOT_UNDEF ((uint)-1) + +/* + Parameters for open() (in register form->filestat) + HA_GET_INFO does an implicit HA_ABORT_IF_LOCKED +*/ + +#define HA_OPEN_KEYFILE 1U +#define HA_READ_ONLY 16U /* File opened as readonly */ +/* Try readonly if can't open with read and write */ +#define HA_TRY_READ_ONLY 32U + + /* Some key definitions */ +#define HA_KEY_NULL_LENGTH 1 +#define HA_KEY_BLOB_LENGTH 2 + +/* Maximum length of any index lookup key, in bytes */ + +#define MAX_KEY_LENGTH (MAX_DATA_LENGTH_FOR_KEY \ + +(MAX_REF_PARTS \ + *(HA_KEY_NULL_LENGTH + HA_KEY_BLOB_LENGTH))) + +#define HA_LEX_CREATE_TMP_TABLE 1U +#define HA_CREATE_TMP_ALTER 8U +#define HA_LEX_CREATE_SEQUENCE 16U +#define HA_VERSIONED_TABLE 32U +#define HA_SKIP_KEY_SORT 64U + +#define HA_MAX_REC_LENGTH 65535 + +/* Table caching type */ +#define HA_CACHE_TBL_NONTRANSACT 0 +#define HA_CACHE_TBL_NOCACHE 1U +#define HA_CACHE_TBL_ASKTRANSACT 2U +#define HA_CACHE_TBL_TRANSACT 4U + +/** + Options for the START TRANSACTION statement. + + Note that READ ONLY and READ WRITE are logically mutually exclusive. + This is enforced by the parser and depended upon by trans_begin(). + + We need two flags instead of one in order to differentiate between + situation when no READ WRITE/ONLY clause were given and thus transaction + is implicitly READ WRITE and the case when READ WRITE clause was used + explicitly. +*/ + +// WITH CONSISTENT SNAPSHOT option +static const uint MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT = 1; +// READ ONLY option +static const uint MYSQL_START_TRANS_OPT_READ_ONLY = 2; +// READ WRITE option +static const uint MYSQL_START_TRANS_OPT_READ_WRITE = 4; + +/* Flags for method is_fatal_error */ +#define HA_CHECK_DUP_KEY 1U +#define HA_CHECK_DUP_UNIQUE 2U +#define HA_CHECK_FK_ERROR 4U +#define HA_CHECK_DUP (HA_CHECK_DUP_KEY + HA_CHECK_DUP_UNIQUE) +#define HA_CHECK_ALL (~0U) + +/* Options for info_push() */ +#define INFO_KIND_UPDATE_FIELDS 101 +#define INFO_KIND_UPDATE_VALUES 102 +#define INFO_KIND_FORCE_LIMIT_BEGIN 103 +#define INFO_KIND_FORCE_LIMIT_END 104 + +enum legacy_db_type +{ + /* note these numerical values are fixed and can *not* be changed */ + DB_TYPE_UNKNOWN=0, + DB_TYPE_HEAP=6, + DB_TYPE_MYISAM=9, + DB_TYPE_MRG_MYISAM=10, + DB_TYPE_INNODB=12, + DB_TYPE_EXAMPLE_DB=15, + DB_TYPE_ARCHIVE_DB=16, + DB_TYPE_CSV_DB=17, + DB_TYPE_FEDERATED_DB=18, + DB_TYPE_BLACKHOLE_DB=19, + DB_TYPE_PARTITION_DB=20, + DB_TYPE_BINLOG=21, + DB_TYPE_PBXT=23, + DB_TYPE_PERFORMANCE_SCHEMA=28, + DB_TYPE_S3=41, + DB_TYPE_ARIA=42, + DB_TYPE_TOKUDB=43, /* disabled in MariaDB Server 10.5, removed in 10.6 */ + DB_TYPE_SEQUENCE=44, + DB_TYPE_FIRST_DYNAMIC=45, + DB_TYPE_DEFAULT=127 // Must be last +}; +/* + Better name for DB_TYPE_UNKNOWN. Should be used for engines that do not have + a hard-coded type value here. + */ +#define DB_TYPE_AUTOASSIGN DB_TYPE_UNKNOWN + +enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, + ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED, + ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGE }; + +/* not part of the enum, so that it shouldn't be in switch(row_type) */ +#define ROW_TYPE_MAX ((uint)ROW_TYPE_PAGE + 1) + +/* Specifies data storage format for individual columns */ +enum column_format_type { + COLUMN_FORMAT_TYPE_DEFAULT= 0, /* Not specified (use engine default) */ + COLUMN_FORMAT_TYPE_FIXED= 1, /* FIXED format */ + COLUMN_FORMAT_TYPE_DYNAMIC= 2 /* DYNAMIC format */ +}; + +enum enum_binlog_func { + BFN_RESET_LOGS= 1, + BFN_RESET_SLAVE= 2, + BFN_BINLOG_WAIT= 3, + BFN_BINLOG_END= 4, + BFN_BINLOG_PURGE_FILE= 5 +}; + +enum enum_binlog_command { + LOGCOM_CREATE_TABLE, + LOGCOM_ALTER_TABLE, + LOGCOM_RENAME_TABLE, + LOGCOM_DROP_TABLE, + LOGCOM_CREATE_DB, + LOGCOM_ALTER_DB, + LOGCOM_DROP_DB +}; + +/* struct to hold information about the table that should be created */ + +/* Bits in used_fields */ +#define HA_CREATE_USED_AUTO (1UL << 0) +#define HA_CREATE_USED_RAID (1UL << 1) //RAID is no longer available +#define HA_CREATE_USED_UNION (1UL << 2) +#define HA_CREATE_USED_INSERT_METHOD (1UL << 3) +#define HA_CREATE_USED_MIN_ROWS (1UL << 4) +#define HA_CREATE_USED_MAX_ROWS (1UL << 5) +#define HA_CREATE_USED_AVG_ROW_LENGTH (1UL << 6) +#define HA_CREATE_USED_PACK_KEYS (1UL << 7) +#define HA_CREATE_USED_CHARSET (1UL << 8) +#define HA_CREATE_USED_DEFAULT_CHARSET (1UL << 9) +#define HA_CREATE_USED_DATADIR (1UL << 10) +#define HA_CREATE_USED_INDEXDIR (1UL << 11) +#define HA_CREATE_USED_ENGINE (1UL << 12) +#define HA_CREATE_USED_CHECKSUM (1UL << 13) +#define HA_CREATE_USED_DELAY_KEY_WRITE (1UL << 14) +#define HA_CREATE_USED_ROW_FORMAT (1UL << 15) +#define HA_CREATE_USED_COMMENT (1UL << 16) +#define HA_CREATE_USED_PASSWORD (1UL << 17) +#define HA_CREATE_USED_CONNECTION (1UL << 18) +#define HA_CREATE_USED_KEY_BLOCK_SIZE (1UL << 19) +/* The following two are used by Maria engine: */ +#define HA_CREATE_USED_TRANSACTIONAL (1UL << 20) +#define HA_CREATE_USED_PAGE_CHECKSUM (1UL << 21) +/** This is set whenever STATS_PERSISTENT=0|1|default has been +specified in CREATE/ALTER TABLE. See also HA_OPTION_STATS_PERSISTENT in +include/my_base.h. It is possible to distinguish whether +STATS_PERSISTENT=default has been specified or no STATS_PERSISTENT= is +given at all. */ +#define HA_CREATE_USED_STATS_PERSISTENT (1UL << 22) +/** + This is set whenever STATS_AUTO_RECALC=0|1|default has been + specified in CREATE/ALTER TABLE. See enum_stats_auto_recalc. + It is possible to distinguish whether STATS_AUTO_RECALC=default + has been specified or no STATS_AUTO_RECALC= is given at all. +*/ +#define HA_CREATE_USED_STATS_AUTO_RECALC (1UL << 23) +/** + This is set whenever STATS_SAMPLE_PAGES=N|default has been + specified in CREATE/ALTER TABLE. It is possible to distinguish whether + STATS_SAMPLE_PAGES=default has been specified or no STATS_SAMPLE_PAGES= is + given at all. +*/ +#define HA_CREATE_USED_STATS_SAMPLE_PAGES (1UL << 24) + +/* Create a sequence */ +#define HA_CREATE_USED_SEQUENCE (1UL << 25) +/* Tell binlog_show_create_table to print all engine options */ +#define HA_CREATE_PRINT_ALL_OPTIONS (1UL << 26) + +typedef ulonglong alter_table_operations; +typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*); + +/* + These flags are set by the parser and describes the type of + operation(s) specified by the ALTER TABLE statement. +*/ + +// Set by parser for ADD [COLUMN] +#define ALTER_PARSER_ADD_COLUMN (1ULL << 0) +// Set by parser for DROP [COLUMN] +#define ALTER_PARSER_DROP_COLUMN (1ULL << 1) +// Set for CHANGE [COLUMN] | MODIFY [CHANGE] & mysql_recreate_table +#define ALTER_CHANGE_COLUMN (1ULL << 2) +// Set for ADD INDEX | ADD KEY | ADD PRIMARY KEY | ADD UNIQUE KEY | +// ADD UNIQUE INDEX | ALTER ADD [COLUMN] +#define ALTER_ADD_INDEX (1ULL << 3) +// Set for DROP PRIMARY KEY | DROP FOREIGN KEY | DROP KEY | DROP INDEX +#define ALTER_DROP_INDEX (1ULL << 4) +// Set for RENAME [TO] +#define ALTER_RENAME (1ULL << 5) +// Set for ORDER BY +#define ALTER_ORDER (1ULL << 6) +// Set for table_options, like table comment +#define ALTER_OPTIONS (1ULL << 7) +// Set for ALTER [COLUMN] ... SET DEFAULT ... | DROP DEFAULT +#define ALTER_CHANGE_COLUMN_DEFAULT (1ULL << 8) +// Set for DISABLE KEYS | ENABLE KEYS +#define ALTER_KEYS_ONOFF (1ULL << 9) +// Set for FORCE, ENGINE(same engine), by mysql_recreate_table() +#define ALTER_RECREATE (1ULL << 10) +// Set for CONVERT TO +#define ALTER_CONVERT_TO (1ULL << 11) +// Set for DROP ... ADD some_index +#define ALTER_RENAME_INDEX (1ULL << 12) +// Set for ADD FOREIGN KEY +#define ALTER_ADD_FOREIGN_KEY (1ULL << 21) +// Set for DROP FOREIGN KEY +#define ALTER_DROP_FOREIGN_KEY (1ULL << 22) +#define ALTER_CHANGE_INDEX_COMMENT (1ULL << 23) +// Set for ADD [COLUMN] FIRST | AFTER +#define ALTER_COLUMN_ORDER (1ULL << 25) +#define ALTER_ADD_CHECK_CONSTRAINT (1ULL << 27) +#define ALTER_DROP_CHECK_CONSTRAINT (1ULL << 28) +#define ALTER_RENAME_COLUMN (1ULL << 29) +#define ALTER_COLUMN_UNVERSIONED (1ULL << 30) +#define ALTER_ADD_SYSTEM_VERSIONING (1ULL << 31) +#define ALTER_DROP_SYSTEM_VERSIONING (1ULL << 32) +#define ALTER_ADD_PERIOD (1ULL << 33) +#define ALTER_DROP_PERIOD (1ULL << 34) + +/* + Following defines are used by ALTER_INPLACE_TABLE + + They do describe in more detail the type operation(s) to be executed + by the storage engine. For example, which type of type of index to be + added/dropped. These are set by fill_alter_inplace_info(). +*/ + +#define ALTER_RECREATE_TABLE ALTER_RECREATE +#define ALTER_CHANGE_CREATE_OPTION ALTER_OPTIONS +#define ALTER_ADD_COLUMN (ALTER_ADD_VIRTUAL_COLUMN | \ + ALTER_ADD_STORED_BASE_COLUMN | \ + ALTER_ADD_STORED_GENERATED_COLUMN) +#define ALTER_DROP_COLUMN (ALTER_DROP_VIRTUAL_COLUMN | \ + ALTER_DROP_STORED_COLUMN) +#define ALTER_COLUMN_DEFAULT ALTER_CHANGE_COLUMN_DEFAULT + +// Add non-unique, non-primary index +#define ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX (1ULL << 35) + +// Drop non-unique, non-primary index +#define ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX (1ULL << 36) + +// Add unique, non-primary index +#define ALTER_ADD_UNIQUE_INDEX (1ULL << 37) + +// Drop unique, non-primary index +#define ALTER_DROP_UNIQUE_INDEX (1ULL << 38) + +// Add primary index +#define ALTER_ADD_PK_INDEX (1ULL << 39) + +// Drop primary index +#define ALTER_DROP_PK_INDEX (1ULL << 40) + +// Virtual generated column +#define ALTER_ADD_VIRTUAL_COLUMN (1ULL << 41) +// Stored base (non-generated) column +#define ALTER_ADD_STORED_BASE_COLUMN (1ULL << 42) +// Stored generated column +#define ALTER_ADD_STORED_GENERATED_COLUMN (1ULL << 43) + +// Drop column +#define ALTER_DROP_VIRTUAL_COLUMN (1ULL << 44) +#define ALTER_DROP_STORED_COLUMN (1ULL << 45) + +// Rename column (verified; ALTER_RENAME_COLUMN may use original name) +#define ALTER_COLUMN_NAME (1ULL << 46) + +// Change column datatype +#define ALTER_VIRTUAL_COLUMN_TYPE (1ULL << 47) +#define ALTER_STORED_COLUMN_TYPE (1ULL << 48) + + +// Engine can handle type change by itself in ALGORITHM=INPLACE +#define ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE (1ULL << 49) + +// Reorder column +#define ALTER_STORED_COLUMN_ORDER (1ULL << 50) + +// Reorder column +#define ALTER_VIRTUAL_COLUMN_ORDER (1ULL << 51) + +// Change column from NOT NULL to NULL +#define ALTER_COLUMN_NULLABLE (1ULL << 52) + +// Change column from NULL to NOT NULL +#define ALTER_COLUMN_NOT_NULLABLE (1ULL << 53) + +// Change column generation expression +#define ALTER_VIRTUAL_GCOL_EXPR (1ULL << 54) +#define ALTER_STORED_GCOL_EXPR (1ULL << 55) + +// column's engine options changed, something in field->option_struct +#define ALTER_COLUMN_OPTION (1ULL << 56) + +// MySQL alias for the same thing: +#define ALTER_COLUMN_STORAGE_TYPE ALTER_COLUMN_OPTION + +// Change the column format of column +#define ALTER_COLUMN_COLUMN_FORMAT (1ULL << 57) + +/** + Changes in generated columns that affect storage, + for example, when a vcol type or expression changes + and this vcol is indexed or used in a partitioning expression +*/ +#define ALTER_COLUMN_VCOL (1ULL << 58) + +/** + ALTER TABLE for a partitioned table. The engine needs to commit + online alter of all partitions atomically (using group_commit_ctx) +*/ +#define ALTER_PARTITIONED (1ULL << 59) + +/** + Change in index length such that it doesn't require index rebuild. +*/ +#define ALTER_COLUMN_INDEX_LENGTH (1ULL << 60) + +/** + Indicate that index order might have been changed. Disables inplace algorithm + by default (not for InnoDB). +*/ +#define ALTER_INDEX_ORDER (1ULL << 61) + +/** + Means that the ignorability of an index is changed. +*/ +#define ALTER_INDEX_IGNORABILITY (1ULL << 62) + +/* + Flags set in partition_flags when altering partitions +*/ + +// Set for ADD PARTITION +#define ALTER_PARTITION_ADD (1ULL << 1) +// Set for DROP PARTITION +#define ALTER_PARTITION_DROP (1ULL << 2) +// Set for COALESCE PARTITION +#define ALTER_PARTITION_COALESCE (1ULL << 3) +// Set for REORGANIZE PARTITION ... INTO +#define ALTER_PARTITION_REORGANIZE (1ULL << 4) +// Set for partition_options +#define ALTER_PARTITION_INFO (1ULL << 5) +// Set for LOAD INDEX INTO CACHE ... PARTITION +// Set for CACHE INDEX ... PARTITION +#define ALTER_PARTITION_ADMIN (1ULL << 6) +// Set for REBUILD PARTITION +#define ALTER_PARTITION_REBUILD (1ULL << 7) +// Set for partitioning operations specifying ALL keyword +#define ALTER_PARTITION_ALL (1ULL << 8) +// Set for REMOVE PARTITIONING +#define ALTER_PARTITION_REMOVE (1ULL << 9) +// Set for EXCHANGE PARITION +#define ALTER_PARTITION_EXCHANGE (1ULL << 10) +// Set by Sql_cmd_alter_table_truncate_partition::execute() +#define ALTER_PARTITION_TRUNCATE (1ULL << 11) +// Set for REORGANIZE PARTITION +#define ALTER_PARTITION_TABLE_REORG (1ULL << 12) +#define ALTER_PARTITION_CONVERT_IN (1ULL << 13) +#define ALTER_PARTITION_CONVERT_OUT (1ULL << 14) +// Set for vers_add_auto_hist_parts() operation +#define ALTER_PARTITION_AUTO_HIST (1ULL << 15) + +/* + This is master database for most of system tables. However there + can be other databases which can hold system tables. Respective + storage engines define their own system database names. +*/ +extern const char *mysqld_system_database; + +/* + Structure to hold list of system_database.system_table. + This is used at both mysqld and storage engine layer. +*/ +struct st_system_tablename +{ + const char *db; + const char *tablename; +}; + + +typedef ulonglong my_xid; // this line is the same as in log_event.h +#define MYSQL_XID_PREFIX "MySQLXid" +#define MYSQL_XID_PREFIX_LEN 8 // must be a multiple of 8 +#define MYSQL_XID_OFFSET (MYSQL_XID_PREFIX_LEN+sizeof(server_id)) +#define MYSQL_XID_GTRID_LEN (MYSQL_XID_OFFSET+sizeof(my_xid)) + +#define XIDDATASIZE MYSQL_XIDDATASIZE +#define MAXGTRIDSIZE 64 +#define MAXBQUALSIZE 64 + +#define COMPATIBLE_DATA_YES 0 +#define COMPATIBLE_DATA_NO 1 + +/** + struct xid_t is binary compatible with the XID structure as + in the X/Open CAE Specification, Distributed Transaction Processing: + The XA Specification, X/Open Company Ltd., 1991. + http://www.opengroup.org/bookstore/catalog/c193.htm + + @see MYSQL_XID in mysql/plugin.h +*/ +struct xid_t { + long formatID; + long gtrid_length; + long bqual_length; + char data[XIDDATASIZE]; // not \0-terminated ! + + xid_t() = default; /* Remove gcc warning */ + bool eq(struct xid_t *xid) const + { return !xid->is_null() && eq(xid->gtrid_length, xid->bqual_length, xid->data); } + bool eq(long g, long b, const char *d) const + { return !is_null() && g == gtrid_length && b == bqual_length && !memcmp(d, data, g+b); } + void set(struct xid_t *xid) + { memcpy(this, xid, xid->length()); } + void set(long f, const char *g, long gl, const char *b, long bl) + { + formatID= f; + if ((gtrid_length= gl)) + memcpy(data, g, gl); + if ((bqual_length= bl)) + memcpy(data+gl, b, bl); + } + // Populate server_id if it's specified, otherwise use the current server_id + void set(ulonglong xid, decltype(::server_id) trx_server_id= server_id) + { + my_xid tmp; + formatID= 1; + set(MYSQL_XID_PREFIX_LEN, 0, MYSQL_XID_PREFIX); + memcpy(data+MYSQL_XID_PREFIX_LEN, &trx_server_id, sizeof(trx_server_id)); + tmp= xid; + memcpy(data+MYSQL_XID_OFFSET, &tmp, sizeof(tmp)); + gtrid_length=MYSQL_XID_GTRID_LEN; + } + void set(long g, long b, const char *d) + { + formatID= 1; + gtrid_length= g; + bqual_length= b; + memcpy(data, d, g+b); + } + bool is_null() const { return formatID == -1; } + void null() { formatID= -1; } + my_xid quick_get_my_xid() + { + my_xid tmp; + memcpy(&tmp, data+MYSQL_XID_OFFSET, sizeof(tmp)); + return tmp; + } + my_xid get_my_xid() + { + return gtrid_length == MYSQL_XID_GTRID_LEN && bqual_length == 0 && + !memcmp(data, MYSQL_XID_PREFIX, MYSQL_XID_PREFIX_LEN) ? + quick_get_my_xid() : 0; + } + decltype(::server_id) get_trx_server_id() + { + decltype(::server_id) trx_server_id; + memcpy(&trx_server_id, data+MYSQL_XID_PREFIX_LEN, sizeof(trx_server_id)); + return trx_server_id; + } + uint length() + { + return static_cast(sizeof(formatID)) + key_length(); + } + uchar *key() const + { + return (uchar *)>rid_length; + } + uint key_length() const + { + return static_cast(sizeof(gtrid_length)+sizeof(bqual_length)+ + gtrid_length+bqual_length); + } +}; +typedef struct xid_t XID; + +/* + Enumerates a sequence in the order of + their creation that is in the top-down order of the index file. + Ranges from zero through MAX_binlog_id. + Not confuse the value with the binlog file numerical suffix, + neither with the binlog file line in the binlog index file. +*/ +typedef uint Binlog_file_id; +const Binlog_file_id MAX_binlog_id= UINT_MAX; +const my_off_t MAX_off_t = (~(my_off_t) 0); +/* + Compound binlog-id and byte offset of transaction's first event + in a sequence (e.g the recovery sequence) of binlog files. + Binlog_offset(0,0) is the minimum value to mean + the first byte of the first binlog file. +*/ +typedef std::pair Binlog_offset; + +/* binlog-based recovery transaction descriptor */ +struct xid_recovery_member +{ + my_xid xid; + uint in_engine_prepare; // number of engines that have xid prepared + bool decided_to_commit; + /* + Semisync recovery binlog offset. It's initialized with the maximum + unreachable offset. The max value will remain for any transaction + not found in binlog to yield its rollback decision as it's guaranteed + to be within a truncated tail part of the binlog. + */ + Binlog_offset binlog_coord; + XID *full_xid; // needed by wsrep or past it recovery + decltype(::server_id) server_id; // server id of orginal server + + xid_recovery_member(my_xid xid_arg, uint prepare_arg, bool decided_arg, + XID *full_xid_arg, decltype(::server_id) server_id_arg) + : xid(xid_arg), in_engine_prepare(prepare_arg), + decided_to_commit(decided_arg), + binlog_coord(Binlog_offset(MAX_binlog_id, MAX_off_t)), + full_xid(full_xid_arg), server_id(server_id_arg) {}; +}; + +/* for recover() handlerton call */ +#define MIN_XID_LIST_SIZE 128 +#define MAX_XID_LIST_SIZE (1024*128) + +/* Statistics about batch operations like bulk_insert */ +struct ha_copy_info +{ + ha_rows records; /* Used to check if rest of variables can be used */ + ha_rows touched; + ha_rows copied; + ha_rows deleted; + ha_rows updated; +}; + +/* The handler for a table type. Will be included in the TABLE structure */ + +struct TABLE; + +/* + Make sure that the order of schema_tables and enum_schema_tables are the same. +*/ +enum enum_schema_tables +{ + SCH_ALL_PLUGINS, + SCH_APPLICABLE_ROLES, + SCH_CHARSETS, + SCH_CHECK_CONSTRAINTS, + SCH_COLLATIONS, + SCH_COLLATION_CHARACTER_SET_APPLICABILITY, + SCH_COLUMNS, + SCH_COLUMN_PRIVILEGES, + SCH_ENABLED_ROLES, + SCH_ENGINES, + SCH_EVENTS, + SCH_EXPLAIN_TABULAR, + SCH_EXPLAIN_JSON, + SCH_ANALYZE_TABULAR, + SCH_ANALYZE_JSON, + SCH_FILES, + SCH_GLOBAL_STATUS, + SCH_GLOBAL_VARIABLES, + SCH_KEYWORDS, + SCH_KEY_CACHES, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, + SCH_OPT_TRACE, + SCH_PARAMETERS, + SCH_PARTITIONS, + SCH_PLUGINS, + SCH_PROCESSLIST, + SCH_PROFILES, + SCH_REFERENTIAL_CONSTRAINTS, + SCH_PROCEDURES, + SCH_SCHEMATA, + SCH_SCHEMA_PRIVILEGES, + SCH_SESSION_STATUS, + SCH_SESSION_VARIABLES, + SCH_STATISTICS, + SCH_SQL_FUNCTIONS, + SCH_SYSTEM_VARIABLES, + SCH_TABLES, + SCH_TABLESPACES, + SCH_TABLE_CONSTRAINTS, + SCH_TABLE_NAMES, + SCH_TABLE_PRIVILEGES, + SCH_TRIGGERS, + SCH_USER_PRIVILEGES, + SCH_VIEWS +}; + +struct TABLE_SHARE; +struct HA_CREATE_INFO; +struct st_foreign_key_info; +typedef struct st_foreign_key_info FOREIGN_KEY_INFO; +typedef bool (stat_print_fn)(THD *thd, const char *type, size_t type_len, + const char *file, size_t file_len, + const char *status, size_t status_len); +enum ha_stat_type { HA_ENGINE_STATUS, HA_ENGINE_LOGS, HA_ENGINE_MUTEX }; +extern MYSQL_PLUGIN_IMPORT st_plugin_int *hton2plugin[MAX_HA]; + +struct handlerton; +#define view_pseudo_hton ((handlerton *)1) + +/* + Definitions for engine-specific table/field/index options in the CREATE TABLE. + + Options are declared with HA_*OPTION_* macros (HA_TOPTION_NUMBER, + HA_FOPTION_ENUM, HA_IOPTION_STRING, etc). + + Every macros takes the option name, and the name of the underlying field of + the appropriate C structure. The "appropriate C structure" is + ha_table_option_struct for table level options, + ha_field_option_struct for field level options, + ha_index_option_struct for key level options. The engine either + defines a structure of this name, or uses #define's to map + these "appropriate" names to the actual structure type name. + + ULL options use a ulonglong as the backing store. + HA_*OPTION_NUMBER() takes the option name, the structure field name, + the default value for the option, min, max, and blk_siz values. + + STRING options use a char* as a backing store. + HA_*OPTION_STRING takes the option name and the structure field name. + The default value will be 0. + + ENUM options use a uint as a backing store (not enum!!!). + HA_*OPTION_ENUM takes the option name, the structure field name, + the default value for the option as a number, and a string with the + permitted values for this enum - one string with comma separated values, + for example: "gzip,bzip2,lzma" + + BOOL options use a bool as a backing store. + HA_*OPTION_BOOL takes the option name, the structure field name, + and the default value for the option. + From the SQL, BOOL options accept YES/NO, ON/OFF, and 1/0. + + The name of the option is limited to 255 bytes, + the value (for string options) - to the 32767 bytes. + + See ha_example.cc for an example. +*/ + +struct ha_table_option_struct; +struct ha_field_option_struct; +struct ha_index_option_struct; + +enum ha_option_type { HA_OPTION_TYPE_ULL, /* unsigned long long */ + HA_OPTION_TYPE_STRING, /* char * */ + HA_OPTION_TYPE_ENUM, /* uint */ + HA_OPTION_TYPE_BOOL, /* bool */ + HA_OPTION_TYPE_SYSVAR};/* type of the sysval */ + +#define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz) \ + { HA_OPTION_TYPE_ULL, name, sizeof(name)-1, \ + offsetof(struc, field), def, min, max, blk_siz, 0, 0 } +#define HA_xOPTION_STRING(name, struc, field) \ + { HA_OPTION_TYPE_STRING, name, sizeof(name)-1, \ + offsetof(struc, field), 0, 0, 0, 0, 0, 0} +#define HA_xOPTION_ENUM(name, struc, field, values, def) \ + { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1, \ + offsetof(struc, field), def, 0, \ + sizeof(values)-1, 0, values, 0 } +#define HA_xOPTION_BOOL(name, struc, field, def) \ + { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1, \ + offsetof(struc, field), def, 0, 1, 0, 0, 0 } +#define HA_xOPTION_SYSVAR(name, struc, field, sysvar) \ + { HA_OPTION_TYPE_SYSVAR, name, sizeof(name)-1, \ + offsetof(struc, field), 0, 0, 0, 0, 0, MYSQL_SYSVAR(sysvar) } +#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + +#define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz) \ + HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz) +#define HA_TOPTION_STRING(name, field) \ + HA_xOPTION_STRING(name, ha_table_option_struct, field) +#define HA_TOPTION_ENUM(name, field, values, def) \ + HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def) +#define HA_TOPTION_BOOL(name, field, def) \ + HA_xOPTION_BOOL(name, ha_table_option_struct, field, def) +#define HA_TOPTION_SYSVAR(name, field, sysvar) \ + HA_xOPTION_SYSVAR(name, ha_table_option_struct, field, sysvar) +#define HA_TOPTION_END HA_xOPTION_END + +#define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz) \ + HA_xOPTION_NUMBER(name, ha_field_option_struct, field, def, min, max, blk_siz) +#define HA_FOPTION_STRING(name, field) \ + HA_xOPTION_STRING(name, ha_field_option_struct, field) +#define HA_FOPTION_ENUM(name, field, values, def) \ + HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def) +#define HA_FOPTION_BOOL(name, field, def) \ + HA_xOPTION_BOOL(name, ha_field_option_struct, field, def) +#define HA_FOPTION_SYSVAR(name, field, sysvar) \ + HA_xOPTION_SYSVAR(name, ha_field_option_struct, field, sysvar) +#define HA_FOPTION_END HA_xOPTION_END + +#define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz) \ + HA_xOPTION_NUMBER(name, ha_index_option_struct, field, def, min, max, blk_siz) +#define HA_IOPTION_STRING(name, field) \ + HA_xOPTION_STRING(name, ha_index_option_struct, field) +#define HA_IOPTION_ENUM(name, field, values, def) \ + HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def) +#define HA_IOPTION_BOOL(name, field, def) \ + HA_xOPTION_BOOL(name, ha_index_option_struct, field, def) +#define HA_IOPTION_SYSVAR(name, field, sysvar) \ + HA_xOPTION_SYSVAR(name, ha_index_option_struct, field, sysvar) +#define HA_IOPTION_END HA_xOPTION_END + +typedef struct st_ha_create_table_option { + enum ha_option_type type; + const char *name; + size_t name_length; + ptrdiff_t offset; + ulonglong def_value; + ulonglong min_value, max_value, block_size; + const char *values; + struct st_mysql_sys_var *var; +} ha_create_table_option; + +class handler; +class group_by_handler; +class derived_handler; +class select_handler; +struct Query; +typedef class st_select_lex SELECT_LEX; +typedef struct st_order ORDER; + +/* + handlerton is a singleton structure - one instance per storage engine - + to provide access to storage engine functionality that works on the + "global" level (unlike handler class that works on a per-table basis) + + usually handlerton instance is defined statically in ha_xxx.cc as + + static handlerton { ... } xxx_hton; + + savepoint_*, prepare, recover, and *_by_xid pointers can be 0. +*/ +struct handlerton +{ + /* + Historical number used for frm file to determine the correct + storage engine. This is going away and new engines will just use + "name" for this. + */ + enum legacy_db_type db_type; + /* + each storage engine has it's own memory area (actually a pointer) + in the thd, for storing per-connection information. + It is accessed as + + thd->ha_data[xxx_hton.slot] + + slot number is initialized by MySQL after xxx_init() is called. + */ + uint slot; + /* + to store per-savepoint data storage engine is provided with an area + of a requested size (0 is ok here). + savepoint_offset must be initialized statically to the size of + the needed memory to store per-savepoint information. + After xxx_init it is changed to be an offset to savepoint storage + area and need not be used by storage engine. + see binlog_hton and binlog_savepoint_set/rollback for an example. + */ + uint savepoint_offset; + /* + handlerton methods: + + close_connection is only called if + thd->ha_data[xxx_hton.slot] is non-zero, so even if you don't need + this storage area - set it to something, so that MySQL would know + this storage engine was accessed in this connection + */ + int (*close_connection)(handlerton *hton, THD *thd); + /* + Tell handler that query has been killed. + */ + void (*kill_query)(handlerton *hton, THD *thd, enum thd_kill_levels level); + /* + sv points to an uninitialized storage area of requested size + (see savepoint_offset description) + */ + int (*savepoint_set)(handlerton *hton, THD *thd, void *sv); + /* + sv points to a storage area, that was earlier passed + to the savepoint_set call + */ + int (*savepoint_rollback)(handlerton *hton, THD *thd, void *sv); + /** + Check if storage engine allows to release metadata locks which were + acquired after the savepoint if rollback to savepoint is done. + @return true - If it is safe to release MDL locks. + false - If it is not. + */ + bool (*savepoint_rollback_can_release_mdl)(handlerton *hton, THD *thd); + int (*savepoint_release)(handlerton *hton, THD *thd, void *sv); + /* + 'all' is true if it's a real commit, that makes persistent changes + 'all' is false if it's not in fact a commit but an end of the + statement that is part of the transaction. + NOTE 'all' is also false in auto-commit mode where 'end of statement' + and 'real commit' mean the same event. + */ + int (*commit)(handlerton *hton, THD *thd, bool all); + /* + The commit_ordered() method is called prior to the commit() method, after + the transaction manager has decided to commit (not rollback) the + transaction. Unlike commit(), commit_ordered() is called only when the + full transaction is committed, not for each commit of statement + transaction in a multi-statement transaction. + + Not that like prepare(), commit_ordered() is only called when 2-phase + commit takes place. Ie. when no binary log and only a single engine + participates in a transaction, one commit() is called, no + commit_ordered(). So engines must be prepared for this. + + The calls to commit_ordered() in multiple parallel transactions is + guaranteed to happen in the same order in every participating + handler. This can be used to ensure the same commit order among multiple + handlers (eg. in table handler and binlog). So if transaction T1 calls + into commit_ordered() of handler A before T2, then T1 will also call + commit_ordered() of handler B before T2. + + Engines that implement this method should during this call make the + transaction visible to other transactions, thereby making the order of + transaction commits be defined by the order of commit_ordered() calls. + + The intention is that commit_ordered() should do the minimal amount of + work that needs to happen in consistent commit order among handlers. To + preserve ordering, calls need to be serialised on a global mutex, so + doing any time-consuming or blocking operations in commit_ordered() will + limit scalability. + + Handlers can rely on commit_ordered() calls to be serialised (no two + calls can run in parallel, so no extra locking on the handler part is + required to ensure this). + + Note that commit_ordered() can be called from a different thread than the + one handling the transaction! So it can not do anything that depends on + thread local storage, in particular it can not call my_error() and + friends (instead it can store the error code and delay the call of + my_error() to the commit() method). + + Similarly, since commit_ordered() returns void, any return error code + must be saved and returned from the commit() method instead. + + The commit_ordered method is optional, and can be left unset if not + needed in a particular handler (then there will be no ordering guarantees + wrt. other engines and binary log). + */ + void (*commit_ordered)(handlerton *hton, THD *thd, bool all); + int (*rollback)(handlerton *hton, THD *thd, bool all); + int (*prepare)(handlerton *hton, THD *thd, bool all); + /* + The prepare_ordered method is optional. If set, it will be called after + successful prepare() in all handlers participating in 2-phase + commit. Like commit_ordered(), it is called only when the full + transaction is committed, not for each commit of statement transaction. + + The calls to prepare_ordered() among multiple parallel transactions are + ordered consistently with calls to commit_ordered(). This means that + calls to prepare_ordered() effectively define the commit order, and that + each handler will see the same sequence of transactions calling into + prepare_ordered() and commit_ordered(). + + Thus, prepare_ordered() can be used to define commit order for handlers + that need to do this in the prepare step (like binlog). It can also be + used to release transaction's locks early in an order consistent with the + order transactions will be eventually committed. + + Like commit_ordered(), prepare_ordered() calls are serialised to maintain + ordering, so the intention is that they should execute fast, with only + the minimal amount of work needed to define commit order. Handlers can + rely on this serialisation, and do not need to do any extra locking to + avoid two prepare_ordered() calls running in parallel. + + Like commit_ordered(), prepare_ordered() is not guaranteed to be called + in the context of the thread handling the rest of the transaction. So it + cannot invoke code that relies on thread local storage, in particular it + cannot call my_error(). + + prepare_ordered() cannot cause a rollback by returning an error, all + possible errors must be handled in prepare() (the prepare_ordered() + method returns void). In case of some fatal error, a record of the error + must be made internally by the engine and returned from commit() later. + + Note that for user-level XA SQL commands, no consistent ordering among + prepare_ordered() and commit_ordered() is guaranteed (as that would + require blocking all other commits for an indefinite time). + + When 2-phase commit is not used (eg. only one engine (and no binlog) in + transaction), neither prepare() nor prepare_ordered() is called. + */ + void (*prepare_ordered)(handlerton *hton, THD *thd, bool all); + int (*recover)(handlerton *hton, XID *xid_list, uint len); + int (*commit_by_xid)(handlerton *hton, XID *xid); + int (*rollback_by_xid)(handlerton *hton, XID *xid); + /* + The commit_checkpoint_request() handlerton method is used to checkpoint + the XA recovery process for storage engines that support two-phase + commit. + + The method is optional - an engine that does not implemented is expected + to work the traditional way, where every commit() durably flushes the + transaction to disk in the engine before completion, so XA recovery will + no longer be needed for that transaction. + + An engine that does implement commit_checkpoint_request() is also + expected to implement commit_ordered(), so that ordering of commits is + consistent between 2pc participants. Such engine is no longer required to + durably flush to disk transactions in commit(), provided that the + transaction has been successfully prepare()d and commit_ordered(); thus + potentionally saving one fsync() call. (Engine must still durably flush + to disk in commit() when no prepare()/commit_ordered() steps took place, + at least if durable commits are wanted; this happens eg. if binlog is + disabled). + + The TC will periodically (eg. once per binlog rotation) call + commit_checkpoint_request(). When this happens, the engine must arrange + for all transaction that have completed commit_ordered() to be durably + flushed to disk (this does not include transactions that might be in the + middle of executing commit_ordered()). When such flush has completed, the + engine must call commit_checkpoint_notify_ha(), passing back the opaque + "cookie". + + The flush and call of commit_checkpoint_notify_ha() need not happen + immediately - it can be scheduled and performed asynchronously (ie. as + part of next prepare(), or sync every second, or whatever), but should + not be postponed indefinitely. It is however also permissible to do it + immediately, before returning from commit_checkpoint_request(). + + When commit_checkpoint_notify_ha() is called, the TC will know that the + transactions are durably committed, and thus no longer require XA + recovery. It uses that to reduce the work needed for any subsequent XA + recovery process. + */ + void (*commit_checkpoint_request)(void *cookie); + /* + "Disable or enable checkpointing internal to the storage engine. This is + used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that + the engine will never start any recovery from a time between + FLUSH TABLES ... ; UNLOCK TABLES. + + While checkpointing is disabled, the engine should pause any background + write activity (such as tablespace checkpointing) that require consistency + between different files (such as transaction log and tablespace files) for + crash recovery to succeed. The idea is to use this to make safe + multi-volume LVM snapshot backups. + */ + int (*checkpoint_state)(handlerton *hton, bool disabled); + void *(*create_cursor_read_view)(handlerton *hton, THD *thd); + void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view); + void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view); + handler *(*create)(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root); + void (*drop_database)(handlerton *hton, char* path); + /* + return 0 if dropped successfully, + -1 if nothing was done by design (as in e.g. blackhole) + an error code (e.g. HA_ERR_NO_SUCH_TABLE) otherwise + */ + int (*drop_table)(handlerton *hton, const char* path); + int (*panic)(handlerton *hton, enum ha_panic_function flag); + int (*start_consistent_snapshot)(handlerton *hton, THD *thd); + bool (*flush_logs)(handlerton *hton); + bool (*show_status)(handlerton *hton, THD *thd, stat_print_fn *print, enum ha_stat_type stat); + uint (*partition_flags)(); + alter_table_operations (*alter_table_flags)(alter_table_operations flags); + int (*fill_is_table)(handlerton *hton, THD *thd, TABLE_LIST *tables, + class Item *cond, + enum enum_schema_tables); + uint32 flags; /* global handler flags */ + /* + Those handlerton functions below are properly initialized at handler + init. + */ + int (*binlog_func)(handlerton *hton, THD *thd, enum_binlog_func fn, void *arg); + void (*binlog_log_query)(handlerton *hton, THD *thd, + enum_binlog_command binlog_command, + const char *query, uint query_length, + const char *db, const char *table_name); + + void (*abort_transaction)(handlerton *hton, THD *bf_thd, THD *victim_thd, + my_bool signal) __attribute__((nonnull)); + int (*set_checkpoint)(handlerton *hton, const XID *xid); + int (*get_checkpoint)(handlerton *hton, XID* xid); + /** + Check if the version of the table matches the version in the .frm + file. + + This is mainly used to verify in recovery to check if an inplace + ALTER TABLE succeded. + Storage engines that does not support inplace alter table does not + have to implement this function. + + @param hton handlerton + @param path Path for table + @param version The unique id that is stored in the .frm file for + CREATE and updated for each ALTER TABLE (but not for + simple renames). + This is the ID used for the final table. + @param create_id The value returned from handler->table_version() for + the original table (before ALTER TABLE). + + @retval 0 If id matches or table is newer than create_id (depending + on what version check the engine supports. This means that + The (inplace) alter table did succeed. + @retval # > 0 Alter table did not succeed. + + Related to handler::discover_check_version(). + */ + int (*check_version)(handlerton *hton, const char *path, + const LEX_CUSTRING *version, ulonglong create_id); + + /* Called for all storage handlers after ddl recovery is done */ + void (*signal_ddl_recovery_done)(handlerton *hton); + + /* + Optional clauses in the CREATE/ALTER TABLE + */ + ha_create_table_option *table_options; // table level options + ha_create_table_option *field_options; // these are specified per field + ha_create_table_option *index_options; // these are specified per index + + /** + The list of extensions of files created for a single table in the + database directory (datadir/db_name/). + + Used by open_table_error(), by the default rename_table and delete_table + handler methods, and by the default discovery implementation. + + For engines that have more than one file name extensions (separate + metadata, index, and/or data files), the order of elements is relevant. + First element of engine file name extensions array should be metadata + file extention. This is implied by the open_table_error() + and the default discovery implementation. + + Second element - data file extension. This is implied + assumed by REPAIR TABLE ... USE_FRM implementation. + */ + const char **tablefile_extensions; // by default - empty list + + /********************************************************************** + Functions to intercept queries + **********************************************************************/ + + /* + Create and return a group_by_handler, if the storage engine can execute + the summary / group by query. + If the storage engine can't do that, return NULL. + + The server guaranteeds that all tables in the list belong to this + storage engine. + */ + group_by_handler *(*create_group_by)(THD *thd, Query *query); + + /* + Create and return a derived_handler if the storage engine can execute + the derived table 'derived', otherwise return NULL. + In a general case 'derived' may contain tables not from the engine. + If the engine cannot handle or does not want to handle such pushed derived + the function create_group_by has to return NULL. + */ + derived_handler *(*create_derived)(THD *thd, TABLE_LIST *derived); + + /* + Create and return a select_handler if the storage engine can execute + the select statement 'select, otherwise return NULL + */ + select_handler *(*create_select) (THD *thd, SELECT_LEX *select); + + /********************************************************************* + Table discovery API. + It allows the server to "discover" tables that exist in the storage + engine, without user issuing an explicit CREATE TABLE statement. + **********************************************************************/ + + /* + This method is required for any engine that supports automatic table + discovery, there is no default implementation. + + Given a TABLE_SHARE discover_table() fills it in with a correct table + structure using one of the TABLE_SHARE::init_from_* methods. + + Returns HA_ERR_NO_SUCH_TABLE if the table did not exist in the engine, + zero if the table was discovered successfully, or any other + HA_ERR_* error code as appropriate if the table existed, but the + discovery failed. + */ + int (*discover_table)(handlerton *hton, THD* thd, TABLE_SHARE *share); + + /* + The discover_table_names method tells the server + about all tables in the specified database that the engine + knows about. Tables (or file names of tables) are added to + the provided discovered_list collector object using + add_table() or add_file() methods. + */ + class discovered_list + { + public: + virtual bool add_table(const char *tname, size_t tlen) = 0; + virtual bool add_file(const char *fname) = 0; + protected: virtual ~discovered_list() = default; + }; + + /* + By default (if not implemented by the engine, but the discover_table() is + implemented) it will perform a file-based discovery: + + - if tablefile_extensions[0] is not null, this will discovers all tables + with the tablefile_extensions[0] extension. + + Returns 0 on success and 1 on error. + */ + int (*discover_table_names)(handlerton *hton, LEX_CSTRING *db, MY_DIR *dir, + discovered_list *result); + + /* + This is a method that allows to server to check if a table exists without + an overhead of the complete discovery. + + By default (if not implemented by the engine, but the discovery_table() is + implemented) it will try to perform a file-based discovery: + + - if tablefile_extensions[0] is not null this will look for a file name + with the tablefile_extensions[0] extension. + + - if tablefile_extensions[0] is null, this will resort to discover_table(). + + Note that resorting to discover_table() is slow and the engine + should probably implement its own discover_table_existence() method, + if its tablefile_extensions[0] is null. + + Returns 1 if the table exists and 0 if it does not. + */ + int (*discover_table_existence)(handlerton *hton, const char *db, + const char *table_name); + + /* + This is the assisted table discovery method. Unlike the fully + automatic discovery as above, here a user is expected to issue an + explicit CREATE TABLE with the appropriate table attributes to + "assist" the discovery of a table. But this "discovering" CREATE TABLE + statement will not specify the table structure - the engine discovers + it using this method. For example, FederatedX uses it in + + CREATE TABLE t1 ENGINE=FEDERATED CONNECTION="mysql://foo/bar/t1"; + + Given a TABLE_SHARE discover_table_structure() fills it in with a correct + table structure using one of the TABLE_SHARE::init_from_* methods. + + Assisted discovery works independently from the automatic discover. + An engine is allowed to support only assisted discovery and not + support automatic one. Or vice versa. + */ + int (*discover_table_structure)(handlerton *hton, THD* thd, + TABLE_SHARE *share, HA_CREATE_INFO *info); + + /* + Notify the storage engine that the definition of the table (and the .frm + file) has changed. Returns 0 if ok. + */ + int (*notify_tabledef_changed)(handlerton *hton, LEX_CSTRING *db, + LEX_CSTRING *table_name, LEX_CUSTRING *frm, + LEX_CUSTRING *org_tabledef_version, + handler *file); + + /* + System Versioning + */ + /** Determine if system-versioned data was modified by the transaction. + @param[in,out] thd current session + @param[out] trx_id transaction start ID + @return transaction commit ID + @retval 0 if no system-versioned data was affected by the transaction */ + ulonglong (*prepare_commit_versioned)(THD *thd, ulonglong *trx_id); + + /** Disable or enable the internal writes of a storage engine */ + void (*disable_internal_writes)(bool disable); + + /* backup */ + void (*prepare_for_backup)(void); + void (*end_backup)(void); + + /* Server shutdown early notification.*/ + void (*pre_shutdown)(void); + + /* + Inform handler that partitioning engine has changed the .frm and the .par + files + */ + int (*create_partitioning_metadata)(const char *path, + const char *old_path, + chf_create_flags action_flag); +}; + + +extern const char *hton_no_exts[]; + +static inline LEX_CSTRING *hton_name(const handlerton *hton) +{ + return &(hton2plugin[hton->slot]->name); +} + +static inline handlerton *plugin_hton(plugin_ref plugin) +{ + return plugin_data(plugin, handlerton *); +} + +static inline sys_var *find_hton_sysvar(handlerton *hton, st_mysql_sys_var *var) +{ + return find_plugin_sysvar(hton2plugin[hton->slot], var); +} + +handlerton *ha_default_handlerton(THD *thd); +handlerton *ha_default_tmp_handlerton(THD *thd); + +/* Possible flags of a handlerton (there can be 32 of them) */ +#define HTON_NO_FLAGS 0 +#define HTON_CLOSE_CURSORS_AT_COMMIT (1 << 0) +#define HTON_ALTER_NOT_SUPPORTED (1 << 1) //Engine does not support alter +#define HTON_CAN_RECREATE (1 << 2) //Delete all is used for truncate +#define HTON_HIDDEN (1 << 3) //Engine does not appear in lists +#define HTON_NOT_USER_SELECTABLE (1 << 5) +#define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported +#define HTON_SUPPORT_LOG_TABLES (1 << 7) //Engine supports log tables +#define HTON_NO_PARTITION (1 << 8) //Not partition of these tables + +/* + This flag should be set when deciding that the engine does not allow + row based binary logging (RBL) optimizations. + + Currently, setting this flag, means that table's read/write_set will + be left untouched when logging changes to tables in this engine. In + practice this means that the server will not mess around with + table->write_set and/or table->read_set when using RBL and deciding + whether to log full or minimal rows. + + It's valuable for instance for virtual tables, eg: Performance + Schema which have no meaning for replication. +*/ +#define HTON_NO_BINLOG_ROW_OPT (1 << 9) +#define HTON_SUPPORTS_EXTENDED_KEYS (1 <<10) //supports extended keys +#define HTON_NATIVE_SYS_VERSIONING (1 << 11) //Engine supports System Versioning + +// MySQL compatibility. Unused. +#define HTON_SUPPORTS_FOREIGN_KEYS (1 << 0) //Foreign key constraint supported. + +#define HTON_CAN_MERGE (1 <<11) //Merge type table +// Engine needs to access the main connect string in partitions +#define HTON_CAN_READ_CONNECT_STRING_IN_PARTITION (1 <<12) + +/* can be replicated by wsrep replication provider plugin */ +#define HTON_WSREP_REPLICATION (1 << 13) + +/* + Set this on the *slave* that's connected to a shared with a master storage. + The slave will ignore any CREATE TABLE, DROP or updates for this engine. +*/ +#define HTON_IGNORE_UPDATES (1 << 14) + +/* + Set this on the *master* that's connected to a shared with a slave storage. + The table may not exists on the slave. The effects of having this flag are: + - ALTER TABLE that changes engine from this table to another engine will + be replicated as CREATE + INSERT + - CREATE ... LIKE shared_table will be replicated as a full CREATE TABLE + - ALTER TABLE for this engine will have "IF EXISTS" added. + - RENAME TABLE for this engine will have "IF EXISTS" added. + - DROP TABLE for this engine will have "IF EXISTS" added. +*/ +#define HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE (1 << 15) + +/* + True if handler cannot rollback transactions. If not true, the transaction + will be put in the transactional binlog cache. + For some engines, like Aria, the rollback can happen in case of crash, but + not trough a handler rollback call. +*/ +#define HTON_NO_ROLLBACK (1 << 16) + +/* + This storage engine can support both transactional and non transactional + tables +*/ +#define HTON_TRANSACTIONAL_AND_NON_TRANSACTIONAL (1 << 17) + +/* + Table requires and close and reopen after truncate + If the handler has HTON_CAN_RECREATE, this flag is not used +*/ +#define HTON_REQUIRES_CLOSE_AFTER_TRUNCATE (1 << 18) + +/* Truncate requires that all other handlers are closed */ +#define HTON_TRUNCATE_REQUIRES_EXCLUSIVE_USE (1 << 19) +/* + Used by mysql_inplace_alter_table() to decide if we should call + hton->notify_tabledef_changed() before commit (MyRocks) or after (InnoDB). +*/ +#define HTON_REQUIRES_NOTIFY_TABLEDEF_CHANGED_AFTER_COMMIT (1 << 20) + +class Ha_trx_info; + +struct THD_TRANS +{ + /* true is not all entries in the ht[] support 2pc */ + bool no_2pc; + /* storage engines that registered in this transaction */ + Ha_trx_info *ha_list; + /* + The purpose of this flag is to keep track of non-transactional + tables that were modified in scope of: + - transaction, when the variable is a member of + THD::transaction.all + - top-level statement or sub-statement, when the variable is a + member of THD::transaction.stmt + This member has the following life cycle: + * stmt.modified_non_trans_table is used to keep track of + modified non-transactional tables of top-level statements. At + the end of the previous statement and at the beginning of the session, + it is reset to FALSE. If such functions + as mysql_insert, mysql_update, mysql_delete etc modify a + non-transactional table, they set this flag to TRUE. At the + end of the statement, the value of stmt.modified_non_trans_table + is merged with all.modified_non_trans_table and gets reset. + * all.modified_non_trans_table is reset at the end of transaction + + * Since we do not have a dedicated context for execution of a + sub-statement, to keep track of non-transactional changes in a + sub-statement, we re-use stmt.modified_non_trans_table. + At entrance into a sub-statement, a copy of the value of + stmt.modified_non_trans_table (containing the changes of the + outer statement) is saved on stack. Then + stmt.modified_non_trans_table is reset to FALSE and the + substatement is executed. Then the new value is merged with the + saved value. + */ + bool modified_non_trans_table; + + void reset() { + no_2pc= FALSE; + modified_non_trans_table= FALSE; + m_unsafe_rollback_flags= 0; + } + bool is_empty() const { return ha_list == NULL; } + THD_TRANS() = default; /* Remove gcc warning */ + + unsigned int m_unsafe_rollback_flags; + /* + Define the type of statements which cannot be rolled back safely. + Each type occupies one bit in m_unsafe_rollback_flags. + MODIFIED_NON_TRANS_TABLE is limited to mark only the temporary + non-transactional table *when* it's cached along with the transactional + events; the regular table is covered by the "namesake" bool var. + */ + enum unsafe_statement_types + { + MODIFIED_NON_TRANS_TABLE= 1, + CREATED_TEMP_TABLE= 2, + DROPPED_TEMP_TABLE= 4, + DID_WAIT= 8, + DID_DDL= 0x10, + EXECUTED_TABLE_ADMIN_CMD= 0x20 + }; + + void mark_modified_non_trans_temp_table() + { + m_unsafe_rollback_flags|= MODIFIED_NON_TRANS_TABLE; + } + bool has_modified_non_trans_temp_table() const + { + return (m_unsafe_rollback_flags & MODIFIED_NON_TRANS_TABLE) != 0; + } + void mark_executed_table_admin_cmd() + { + DBUG_PRINT("debug", ("mark_executed_table_admin_cmd")); + m_unsafe_rollback_flags|= EXECUTED_TABLE_ADMIN_CMD; + } + bool trans_executed_admin_cmd() + { + return (m_unsafe_rollback_flags & EXECUTED_TABLE_ADMIN_CMD) != 0; + } + void mark_created_temp_table() + { + DBUG_PRINT("debug", ("mark_created_temp_table")); + m_unsafe_rollback_flags|= CREATED_TEMP_TABLE; + } + void mark_dropped_temp_table() + { + DBUG_PRINT("debug", ("mark_dropped_temp_table")); + m_unsafe_rollback_flags|= DROPPED_TEMP_TABLE; + } + bool has_created_dropped_temp_table() const { + return + (m_unsafe_rollback_flags & (CREATED_TEMP_TABLE|DROPPED_TEMP_TABLE)) != 0; + } + void mark_trans_did_wait() { m_unsafe_rollback_flags|= DID_WAIT; } + bool trans_did_wait() const { + return (m_unsafe_rollback_flags & DID_WAIT) != 0; + } + bool is_trx_read_write() const; + void mark_trans_did_ddl() { m_unsafe_rollback_flags|= DID_DDL; } + bool trans_did_ddl() const { + return (m_unsafe_rollback_flags & DID_DDL) != 0; + } + +}; + + +/** + Either statement transaction or normal transaction - related + thread-specific storage engine data. + + If a storage engine participates in a statement/transaction, + an instance of this class is present in + thd->transaction.{stmt|all}.ha_list. The addition to + {stmt|all}.ha_list is made by trans_register_ha(). + + When it's time to commit or rollback, each element of ha_list + is used to access storage engine's prepare()/commit()/rollback() + methods, and also to evaluate if a full two phase commit is + necessary. + + @sa General description of transaction handling in handler.cc. +*/ + +class Ha_trx_info +{ +public: + /** Register this storage engine in the given transaction context. */ + void register_ha(THD_TRANS *trans, handlerton *ht_arg) + { + DBUG_ASSERT(m_flags == 0); + DBUG_ASSERT(m_ht == NULL); + DBUG_ASSERT(m_next == NULL); + + m_ht= ht_arg; + m_flags= (int) TRX_READ_ONLY; /* Assume read-only at start. */ + + m_next= trans->ha_list; + trans->ha_list= this; + } + + /** Clear, prepare for reuse. */ + void reset() + { + m_next= NULL; + m_ht= NULL; + m_flags= 0; + } + + Ha_trx_info() { reset(); } + + void set_trx_read_write() + { + DBUG_ASSERT(is_started()); + m_flags|= (int) TRX_READ_WRITE; + } + bool is_trx_read_write() const + { + DBUG_ASSERT(is_started()); + return m_flags & (int) TRX_READ_WRITE; + } + bool is_started() const { return m_ht != NULL; } + /** Mark this transaction read-write if the argument is read-write. */ + void coalesce_trx_with(const Ha_trx_info *stmt_trx) + { + /* + Must be called only after the transaction has been started. + Can be called many times, e.g. when we have many + read-write statements in a transaction. + */ + DBUG_ASSERT(is_started()); + if (stmt_trx->is_trx_read_write()) + set_trx_read_write(); + } + Ha_trx_info *next() const + { + DBUG_ASSERT(is_started()); + return m_next; + } + handlerton *ht() const + { + DBUG_ASSERT(is_started()); + return m_ht; + } +private: + enum { TRX_READ_ONLY= 0, TRX_READ_WRITE= 1 }; + /** Auxiliary, used for ha_list management */ + Ha_trx_info *m_next; + /** + Although a given Ha_trx_info instance is currently always used + for the same storage engine, 'ht' is not-NULL only when the + corresponding storage is a part of a transaction. + */ + handlerton *m_ht; + /** + Transaction flags related to this engine. + Not-null only if this instance is a part of transaction. + May assume a combination of enum values above. + */ + uchar m_flags; +}; + + +inline bool THD_TRANS::is_trx_read_write() const +{ + Ha_trx_info *ha_info; + for (ha_info= ha_list; ha_info; ha_info= ha_info->next()) + if (ha_info->is_trx_read_write()) + return TRUE; + return FALSE; +} + + +enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED, + ISO_REPEATABLE_READ, ISO_SERIALIZABLE}; + + +typedef struct { + ulonglong data_file_length; + ulonglong max_data_file_length; + ulonglong index_file_length; + ulonglong max_index_file_length; + ulonglong delete_length; + ha_rows records; + ulong mean_rec_length; + time_t create_time; + time_t check_time; + time_t update_time; + ulonglong check_sum; + bool check_sum_null; +} PARTITION_STATS; + +#define UNDEF_NODEGROUP 65535 +class Item; +struct st_table_log_memory_entry; + +class partition_info; + +struct st_partition_iter; + +enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES, HA_CHOICE_MAX }; + +enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0, + HA_STATS_AUTO_RECALC_ON, + HA_STATS_AUTO_RECALC_OFF }; + +/** + A helper struct for schema DDL statements: + CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ] + ALTER SCHEMA name [ schema_specification... ] + + It stores the "schema_specification" part of the CREATE/ALTER statements and + is passed to mysql_create_db() and mysql_alter_db(). + Currently consists of the schema default character set, collation + and schema_comment. +*/ +struct Schema_specification_st +{ + CHARSET_INFO *default_table_charset; + LEX_CSTRING *schema_comment; + void init() + { + bzero(this, sizeof(*this)); + } +}; + +class Create_field; + +struct Table_period_info: Sql_alloc +{ + Table_period_info() : + create_if_not_exists(false), + constr(NULL), + unique_keys(0) {} + Table_period_info(const char *name_arg, size_t size) : + name(name_arg, size), + create_if_not_exists(false), + constr(NULL), + unique_keys(0){} + + Lex_ident name; + + struct start_end_t + { + start_end_t() = default; + start_end_t(const LEX_CSTRING& _start, const LEX_CSTRING& _end) : + start(_start), + end(_end) {} + Lex_ident start; + Lex_ident end; + }; + start_end_t period; + bool create_if_not_exists; + Virtual_column_info *constr; + uint unique_keys; + + bool is_set() const + { + DBUG_ASSERT(bool(period.start) == bool(period.end)); + return period.start; + } + + void set_period(const Lex_ident& start, const Lex_ident& end) + { + period.start= start; + period.end= end; + } + bool check_field(const Create_field* f, const Lex_ident& f_name) const; +}; + +struct Vers_parse_info: public Table_period_info +{ + Vers_parse_info() : + Table_period_info(STRING_WITH_LEN("SYSTEM_TIME")), + versioned_fields(false), + unversioned_fields(false), + can_native(-1) + {} + + Table_period_info::start_end_t as_row; + + friend struct Table_scope_and_contents_source_st; + void set_start(const LEX_CSTRING field_name) + { + as_row.start= field_name; + period.start= field_name; + } + void set_end(const LEX_CSTRING field_name) + { + as_row.end= field_name; + period.end= field_name; + } + +protected: + bool is_start(const char *name) const; + bool is_end(const char *name) const; + bool is_start(const Create_field &f) const; + bool is_end(const Create_field &f) const; + bool fix_implicit(THD *thd, Alter_info *alter_info); + operator bool() const + { + return as_row.start || as_row.end || period.start || period.end; + } + bool need_check(const Alter_info *alter_info) const; + bool check_conditions(const Lex_table_name &table_name, + const Lex_table_name &db) const; + bool create_sys_field(THD *thd, const char *field_name, + Alter_info *alter_info, int flags); + +public: + static const Lex_ident default_start; + static const Lex_ident default_end; + + bool fix_alter_info(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, TABLE *table); + bool fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info, + TABLE_LIST &src_table, TABLE_LIST &table); + bool check_sys_fields(const Lex_table_name &table_name, + const Lex_table_name &db, Alter_info *alter_info) const; + + /** + At least one field was specified 'WITH/WITHOUT SYSTEM VERSIONING'. + Useful for error handling. + */ + bool versioned_fields : 1; + bool unversioned_fields : 1; + int can_native; +}; + +/** + A helper struct for table DDL statements, e.g.: + CREATE [OR REPLACE] [TEMPORARY] + TABLE [IF NOT EXISTS] tbl_name table_contents_source; + + Represents a combinations of: + 1. The scope, i.e. TEMPORARY or not TEMPORARY + 2. The "table_contents_source" part of the table DDL statements, + which can be initialized from either of these: + - table_element_list ... // Explicit definition (column and key list) + - LIKE another_table_name ... // Copy structure from another table + - [AS] SELECT ... // Copy structure from a subquery +*/ + +struct Table_scope_and_contents_source_pod_st // For trivial members +{ + CHARSET_INFO *alter_table_convert_to_charset; + LEX_CUSTRING tabledef_version; + LEX_CUSTRING org_tabledef_version; /* version of dropped table */ + LEX_CSTRING connect_string; + LEX_CSTRING comment; + LEX_CSTRING alias; + LEX_CSTRING org_storage_engine_name, new_storage_engine_name; + const char *password, *tablespace; + const char *data_file_name, *index_file_name; + ulonglong max_rows,min_rows; + ulonglong auto_increment_value; + ulong table_options; ///< HA_OPTION_ values + ulong avg_row_length; + ulong used_fields; + ulong key_block_size; + ulong expression_length; + ulong field_check_constraints; + /* + number of pages to sample during + stats estimation, if used, otherwise 0. + */ + uint stats_sample_pages; + uint null_bits; /* NULL bits at start of record */ + uint options; /* OR of HA_CREATE_ options */ + uint merge_insert_method; + uint extra_size; /* length of extra data segment */ + handlerton *db_type; + /** + Row type of the table definition. + + Defaults to ROW_TYPE_DEFAULT for all non-ALTER statements. + For ALTER TABLE defaults to ROW_TYPE_NOT_USED (means "keep the current"). + + Can be changed either explicitly by the parser. + If nothing specified inherits the value of the original table (if present). + */ + enum row_type row_type; + enum ha_choice transactional; + enum ha_storage_media storage_media; ///< DEFAULT, DISK or MEMORY + enum ha_choice page_checksum; ///< If we have page_checksums + engine_option_value *option_list; ///< list of table create options + enum_stats_auto_recalc stats_auto_recalc; + bool varchar; ///< 1 if table has a VARCHAR + bool sequence; // If SEQUENCE=1 was used + + List *check_constraint_list; + + /* the following three are only for ALTER TABLE, check_if_incompatible_data() */ + ha_table_option_struct *option_struct; ///< structure with parsed table options + ha_field_option_struct **fields_option_struct; ///< array of field option structures + ha_index_option_struct **indexes_option_struct; ///< array of index option structures + + /* The following is used to remember the old state for CREATE OR REPLACE */ + TABLE *table; + TABLE_LIST *pos_in_locked_tables; + TABLE_LIST *merge_list; + MDL_ticket *mdl_ticket; + bool table_was_deleted; + sequence_definition *seq_create_info; + + void init() + { + bzero(this, sizeof(*this)); + } + bool tmp_table() const { return options & HA_LEX_CREATE_TMP_TABLE; } + void use_default_db_type(THD *thd) + { + db_type= tmp_table() ? ha_default_tmp_handlerton(thd) + : ha_default_handlerton(thd); + } + + bool versioned() const + { + return options & HA_VERSIONED_TABLE; + } +}; + + +struct Table_scope_and_contents_source_st: + public Table_scope_and_contents_source_pod_st +{ + Vers_parse_info vers_info; + Table_period_info period_info; + + void init() + { + Table_scope_and_contents_source_pod_st::init(); + vers_info= {}; + period_info= {}; + } + + bool fix_create_fields(THD *thd, Alter_info *alter_info, + const TABLE_LIST &create_table); + bool fix_period_fields(THD *thd, Alter_info *alter_info); + bool check_fields(THD *thd, Alter_info *alter_info, + const Lex_table_name &table_name, + const Lex_table_name &db, + int select_count= 0); + bool check_period_fields(THD *thd, Alter_info *alter_info); + + void vers_check_native(); + bool vers_fix_system_fields(THD *thd, Alter_info *alter_info, + const TABLE_LIST &create_table); + + bool vers_check_system_fields(THD *thd, Alter_info *alter_info, + const Lex_table_name &table_name, + const Lex_table_name &db, + int select_count= 0); +}; + + +/** + This struct is passed to handler table routines, e.g. ha_create(). + It does not include the "OR REPLACE" and "IF NOT EXISTS" parts, as these + parts are handled on the SQL level and are not needed on the handler level. +*/ +struct HA_CREATE_INFO: public Table_scope_and_contents_source_st, + public Schema_specification_st +{ + /* TODO: remove after MDEV-20865 */ + Alter_info *alter_info; + + void init() + { + Table_scope_and_contents_source_st::init(); + Schema_specification_st::init(); + alter_info= NULL; + } + ulong table_options_with_row_type() + { + if (row_type == ROW_TYPE_DYNAMIC || row_type == ROW_TYPE_PAGE) + return table_options | HA_OPTION_PACK_RECORD; + else + return table_options; + } + bool resolve_to_charset_collation_context(THD *thd, + const Lex_table_charset_collation_attrs_st &default_cscl, + const Lex_table_charset_collation_attrs_st &convert_cscl, + const Charset_collation_context &ctx); +}; + + +/** + This struct is passed to mysql_create_table() and similar creation functions, + as well as to show_create_table(). +*/ +struct Table_specification_st: public HA_CREATE_INFO, + public DDL_options_st +{ + Lex_table_charset_collation_attrs_st default_charset_collation; + Lex_table_charset_collation_attrs_st convert_charset_collation; + + // Deep initialization + void init() + { + HA_CREATE_INFO::init(); + DDL_options_st::init(); + default_charset_collation.init(); + convert_charset_collation.init(); + } + void init(DDL_options_st::Options options_arg) + { + HA_CREATE_INFO::init(); + DDL_options_st::init(options_arg); + default_charset_collation.init(); + convert_charset_collation.init(); + } + /* + Quick initialization, for parser. + Most of the HA_CREATE_INFO is left uninitialized. + It gets fully initialized in sql_yacc.yy, only when the parser + scans a related keyword (e.g. CREATE, ALTER). + */ + void lex_start() + { + HA_CREATE_INFO::options= 0; + DDL_options_st::init(); + default_charset_collation.init(); + convert_charset_collation.init(); + } + + bool add_table_option_convert_charset(CHARSET_INFO *cs) + { + // cs can be NULL, e.g.: ALTER TABLE t1 CONVERT TO CHARACTER SET DEFAULT; + used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET); + return cs ? + convert_charset_collation.merge_exact_charset(Lex_exact_charset(cs)) : + convert_charset_collation.merge_charset_default(); + } + bool add_table_option_convert_collation(const Lex_extended_collation_st &cl) + { + used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET); + return convert_charset_collation.merge_collation(cl); + } + + bool add_table_option_default_charset(CHARSET_INFO *cs) + { + // cs can be NULL, e.g.: CREATE TABLE t1 (..) CHARACTER SET DEFAULT; + used_fields|= HA_CREATE_USED_DEFAULT_CHARSET; + return cs ? + default_charset_collation.merge_exact_charset(Lex_exact_charset(cs)) : + default_charset_collation.merge_charset_default(); + } + bool add_table_option_default_collation(const Lex_extended_collation_st &cl) + { + used_fields|= HA_CREATE_USED_DEFAULT_CHARSET; + return default_charset_collation.merge_collation(cl); + } + + bool resolve_to_charset_collation_context(THD *thd, + const Charset_collation_context &ctx) + { + return HA_CREATE_INFO:: + resolve_to_charset_collation_context(thd, + default_charset_collation, + convert_charset_collation, + ctx); + } +}; + + +/** + Structure describing changes to an index to be caused by ALTER TABLE. +*/ + +struct KEY_PAIR +{ + /** + Pointer to KEY object describing old version of index in + TABLE::key_info array for TABLE instance representing old + version of table. + */ + KEY *old_key; + /** + Pointer to KEY object describing new version of index in + Alter_inplace_info::key_info_buffer array. + */ + KEY *new_key; +}; + + +/** + In-place alter handler context. + + This is a superclass intended to be subclassed by individual handlers + in order to store handler unique context between in-place alter API calls. + + The handler is responsible for creating the object. This can be done + as early as during check_if_supported_inplace_alter(). + + The SQL layer is responsible for destroying the object. + The class extends Sql_alloc so the memory will be mem root allocated. + + @see Alter_inplace_info +*/ + +class inplace_alter_handler_ctx : public Sql_alloc +{ +public: + inplace_alter_handler_ctx() = default; + + virtual ~inplace_alter_handler_ctx() = default; + virtual void set_shared_data(const inplace_alter_handler_ctx& ctx) {} +}; + + +/** + Class describing changes to be done by ALTER TABLE. + Instance of this class is passed to storage engine in order + to determine if this ALTER TABLE can be done using in-place + algorithm. It is also used for executing the ALTER TABLE + using in-place algorithm. +*/ + +class Alter_inplace_info +{ +public: + + /** + Create options (like MAX_ROWS) for the new version of table. + + @note The referenced instance of HA_CREATE_INFO object was already + used to create new .FRM file for table being altered. So it + has been processed by mysql_prepare_create_table() already. + For example, this means that it has HA_OPTION_PACK_RECORD + flag in HA_CREATE_INFO::table_options member correctly set. + */ + HA_CREATE_INFO *create_info; + + /** + Alter options, fields and keys for the new version of table. + + @note The referenced instance of Alter_info object was already + used to create new .FRM file for table being altered. So it + has been processed by mysql_prepare_create_table() already. + In particular, this means that in Create_field objects for + fields which were present in some form in the old version + of table, Create_field::field member points to corresponding + Field instance for old version of table. + */ + Alter_info *alter_info; + + /** + Array of KEYs for new version of table - including KEYs to be added. + + @note Currently this array is produced as result of + mysql_prepare_create_table() call. + This means that it follows different convention for + KEY_PART_INFO::fieldnr values than objects in TABLE::key_info + array. + + @todo This is mainly due to the fact that we need to keep compatibility + with removed handler::add_index() call. We plan to switch to + TABLE::key_info numbering later. + + KEYs are sorted - see sort_keys(). + */ + KEY *key_info_buffer; + + /** Size of key_info_buffer array. */ + uint key_count; + + /** Size of index_drop_buffer array. */ + uint index_drop_count= 0; + + /** + Array of pointers to KEYs to be dropped belonging to the TABLE instance + for the old version of the table. + */ + KEY **index_drop_buffer= nullptr; + + /** Size of index_add_buffer array. */ + uint index_add_count= 0; + + /** + Array of indexes into key_info_buffer for KEYs to be added, + sorted in increasing order. + */ + uint *index_add_buffer= nullptr; + + KEY_PAIR *index_altered_ignorability_buffer= nullptr; + + /** Size of index_altered_ignorability_buffer array. */ + uint index_altered_ignorability_count= 0; + + /** + Old and new index names. Used for index rename. + */ + struct Rename_key_pair + { + Rename_key_pair(const KEY *old_key, const KEY *new_key) + : old_key(old_key), new_key(new_key) + { + } + const KEY *old_key; + const KEY *new_key; + }; + /** + Vector of key pairs from DROP/ADD index which can be renamed. + */ + typedef Mem_root_array Rename_keys_vector; + + /** + A list of indexes which should be renamed. + Index definitions stays the same. + */ + Rename_keys_vector rename_keys; + + /** + Context information to allow handlers to keep context between in-place + alter API calls. + + @see inplace_alter_handler_ctx for information about object lifecycle. + */ + inplace_alter_handler_ctx *handler_ctx= nullptr; + + /** + If the table uses several handlers, like ha_partition uses one handler + per partition, this contains a Null terminated array of ctx pointers + that should all be committed together. + Or NULL if only handler_ctx should be committed. + Set to NULL if the low level handler::commit_inplace_alter_table uses it, + to signal to the main handler that everything was committed as atomically. + + @see inplace_alter_handler_ctx for information about object lifecycle. + */ + inplace_alter_handler_ctx **group_commit_ctx= nullptr; + + /** + Flags describing in detail which operations the storage engine is to + execute. Flags are defined in sql_alter.h + */ + alter_table_operations handler_flags= 0; + + /* Alter operations involving parititons are strored here */ + ulong partition_flags; + + /** + Partition_info taking into account the partition changes to be performed. + Contains all partitions which are present in the old version of the table + with partitions to be dropped or changed marked as such + all partitions + to be added in the new version of table marked as such. + */ + partition_info * const modified_part_info; + + /** true for ALTER IGNORE TABLE ... */ + const bool ignore; + + /** true for online operation (LOCK=NONE) */ + bool online= false; + + /** + When ha_commit_inplace_alter_table() is called the the engine can + set this to a function to be called after the ddl log + is committed. + */ + typedef void (inplace_alter_table_commit_callback)(void *); + inplace_alter_table_commit_callback *inplace_alter_table_committed= nullptr; + + /* This will be used as the argument to the above function when called */ + void *inplace_alter_table_committed_argument= nullptr; + + /** which ALGORITHM and LOCK are supported by the storage engine */ + enum_alter_inplace_result inplace_supported; + + /** + Can be set by handler to describe why a given operation cannot be done + in-place (HA_ALTER_INPLACE_NOT_SUPPORTED) or why it cannot be done + online (HA_ALTER_INPLACE_NO_LOCK or HA_ALTER_INPLACE_COPY_NO_LOCK) + If set, it will be used with ER_ALTER_OPERATION_NOT_SUPPORTED_REASON if + results from handler::check_if_supported_inplace_alter() doesn't match + requirements set by user. If not set, the more generic + ER_ALTER_OPERATION_NOT_SUPPORTED will be used. + + Please set to a properly localized string, for example using + my_get_err_msg(), so that the error message as a whole is localized. + */ + const char *unsupported_reason= nullptr; + + /** true when InnoDB should abort the alter when table is not empty */ + const bool error_if_not_empty; + + /** True when DDL should avoid downgrading the MDL */ + bool mdl_exclusive_after_prepare= false; + + Alter_inplace_info(HA_CREATE_INFO *create_info_arg, + Alter_info *alter_info_arg, + KEY *key_info_arg, uint key_count_arg, + partition_info *modified_part_info_arg, + bool ignore_arg, bool error_non_empty); + + ~Alter_inplace_info() + { + delete handler_ctx; + } + + /** + Used after check_if_supported_inplace_alter() to report + error if the result does not match the LOCK/ALGORITHM + requirements set by the user. + + @param not_supported Part of statement that was not supported. + @param try_instead Suggestion as to what the user should + replace not_supported with. + */ + void report_unsupported_error(const char *not_supported, + const char *try_instead) const; + void add_altered_index_ignorability(KEY *old_key, KEY *new_key) + { + KEY_PAIR *key_pair= index_altered_ignorability_buffer + + index_altered_ignorability_count++; + key_pair->old_key= old_key; + key_pair->new_key= new_key; + DBUG_PRINT("info", ("index had ignorability altered: %i to %i", + old_key->is_ignored, + new_key->is_ignored)); + } + + +}; + + +typedef struct st_key_create_information +{ + enum ha_key_alg algorithm; + ulong block_size; + uint flags; /* HA_USE.. flags */ + LEX_CSTRING parser_name; + LEX_CSTRING comment; + bool is_ignored; +} KEY_CREATE_INFO; + + +typedef struct st_savepoint SAVEPOINT; +extern ulong savepoint_alloc_size; +extern KEY_CREATE_INFO default_key_create_info; + +/* Forward declaration for condition pushdown to storage engine */ +typedef class Item COND; + +typedef struct st_ha_check_opt +{ + st_ha_check_opt() = default; /* Remove gcc warning */ + uint flags; /* isam layer flags (e.g. for myisamchk) */ + uint sql_flags; /* sql layer flags - for something myisamchk cannot do */ + time_t start_time; /* When check/repair starts */ + KEY_CACHE *key_cache; /* new key cache when changing key cache */ + void init(); +} HA_CHECK_OPT; + + +/******************************************************************************** + * MRR + ********************************************************************************/ + +typedef void *range_seq_t; + +typedef struct st_range_seq_if +{ + /* + Get key information + + SYNOPSIS + get_key_info() + init_params The seq_init_param parameter + length OUT length of the keys in this range sequence + map OUT key_part_map of the keys in this range sequence + + DESCRIPTION + This function is set only when using HA_MRR_FIXED_KEY mode. In that mode, + all ranges are single-point equality ranges that use the same set of key + parts. This function allows the MRR implementation to get the length of + a key, and which keyparts it uses. + */ + void (*get_key_info)(void *init_params, uint *length, key_part_map *map); + + /* + Initialize the traversal of range sequence + + SYNOPSIS + init() + init_params The seq_init_param parameter + n_ranges The number of ranges obtained + flags A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY + + RETURN + An opaque value to be used as RANGE_SEQ_IF::next() parameter + */ + range_seq_t (*init)(void *init_params, uint n_ranges, uint flags); + + + /* + Get the next range in the range sequence + + SYNOPSIS + next() + seq The value returned by RANGE_SEQ_IF::init() + range OUT Information about the next range + + RETURN + FALSE - Ok, the range structure filled with info about the next range + TRUE - No more ranges + */ + bool (*next) (range_seq_t seq, KEY_MULTI_RANGE *range); + + /* + Check whether range_info orders to skip the next record + + SYNOPSIS + skip_record() + seq The value returned by RANGE_SEQ_IF::init() + range_info Information about the next range + (Ignored if MRR_NO_ASSOCIATION is set) + rowid Rowid of the record to be checked (ignored if set to 0) + + RETURN + 1 - Record with this range_info and/or this rowid shall be filtered + out from the stream of records returned by multi_range_read_next() + 0 - The record shall be left in the stream + */ + bool (*skip_record) (range_seq_t seq, range_id_t range_info, uchar *rowid); + + /* + Check if the record combination matches the index condition + SYNOPSIS + skip_index_tuple() + seq The value returned by RANGE_SEQ_IF::init() + range_info Information about the next range + + RETURN + 0 - The record combination satisfies the index condition + 1 - Otherwise + */ + bool (*skip_index_tuple) (range_seq_t seq, range_id_t range_info); +} RANGE_SEQ_IF; + +typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info); + +class Cost_estimate +{ +public: + double io_count; /* number of I/O to fetch records */ + double avg_io_cost; /* cost of an average I/O oper. to fetch records */ + double idx_io_count; /* number of I/O to read keys */ + double idx_avg_io_cost; /* cost of an average I/O oper. to fetch records */ + double cpu_cost; /* total cost of operations in CPU */ + double idx_cpu_cost; /* cost of operations in CPU for index */ + double import_cost; /* cost of remote operations */ + double mem_cost; /* cost of used memory */ + + static constexpr double IO_COEFF= 1; + static constexpr double CPU_COEFF= 1; + static constexpr double MEM_COEFF= 1; + static constexpr double IMPORT_COEFF= 1; + + Cost_estimate() + { + reset(); + } + + double total_cost() const + { + return IO_COEFF*io_count*avg_io_cost + + IO_COEFF*idx_io_count*idx_avg_io_cost + + CPU_COEFF*(cpu_cost + idx_cpu_cost) + + MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost; + } + + double index_only_cost() + { + return IO_COEFF*idx_io_count*idx_avg_io_cost + + CPU_COEFF*idx_cpu_cost; + } + + /** + Whether or not all costs in the object are zero + + @return true if all costs are zero, false otherwise + */ + bool is_zero() const + { + return io_count == 0.0 && idx_io_count == 0.0 && cpu_cost == 0.0 && + import_cost == 0.0 && mem_cost == 0.0; + } + + void reset() + { + avg_io_cost= 1.0; + idx_avg_io_cost= 1.0; + io_count= idx_io_count= cpu_cost= idx_cpu_cost= mem_cost= import_cost= 0.0; + } + + void multiply(double m) + { + io_count *= m; + cpu_cost *= m; + idx_io_count *= m; + idx_cpu_cost *= m; + import_cost *= m; + /* Don't multiply mem_cost */ + } + + void add(const Cost_estimate* cost) + { + if (cost->io_count != 0.0) + { + double io_count_sum= io_count + cost->io_count; + avg_io_cost= (io_count * avg_io_cost + + cost->io_count * cost->avg_io_cost) + /io_count_sum; + io_count= io_count_sum; + } + if (cost->idx_io_count != 0.0) + { + double idx_io_count_sum= idx_io_count + cost->idx_io_count; + idx_avg_io_cost= (idx_io_count * idx_avg_io_cost + + cost->idx_io_count * cost->idx_avg_io_cost) + /idx_io_count_sum; + idx_io_count= idx_io_count_sum; + } + cpu_cost += cost->cpu_cost; + idx_cpu_cost += cost->idx_cpu_cost; + import_cost += cost->import_cost; + } + + void add_io(double add_io_cnt, double add_avg_cost) + { + /* In edge cases add_io_cnt may be zero */ + if (add_io_cnt > 0) + { + double io_count_sum= io_count + add_io_cnt; + avg_io_cost= (io_count * avg_io_cost + + add_io_cnt * add_avg_cost) / io_count_sum; + io_count= io_count_sum; + } + } + + /// Add to CPU cost + void add_cpu(double add_cpu_cost) { cpu_cost+= add_cpu_cost; } + + /// Add to import cost + void add_import(double add_import_cost) { import_cost+= add_import_cost; } + + /// Add to memory cost + void add_mem(double add_mem_cost) { mem_cost+= add_mem_cost; } + + /* + To be used when we go from old single value-based cost calculations to + the new Cost_estimate-based. + */ + void convert_from_cost(double cost) + { + reset(); + io_count= cost; + } +}; + +void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, + Cost_estimate *cost); + +/* + Indicates that all scanned ranges will be singlepoint (aka equality) ranges. + The ranges may not use the full key but all of them will use the same number + of key parts. +*/ +#define HA_MRR_SINGLE_POINT 1U +#define HA_MRR_FIXED_KEY 2U + +/* + Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the + 'range' parameter. +*/ +#define HA_MRR_NO_ASSOCIATION 4U + +/* + The MRR user will provide ranges in key order, and MRR implementation + must return rows in key order. +*/ +#define HA_MRR_SORTED 8U + +/* MRR implementation doesn't have to retrieve full records */ +#define HA_MRR_INDEX_ONLY 16U + +/* + The passed memory buffer is of maximum possible size, the caller can't + assume larger buffer. +*/ +#define HA_MRR_LIMITS 32U + + +/* + Flag set <=> default MRR implementation is used + (The choice is made by **_info[_const]() function which may set this + flag. SQL layer remembers the flag value and then passes it to + multi_read_range_init(). +*/ +#define HA_MRR_USE_DEFAULT_IMPL 64U + +/* + Used only as parameter to multi_range_read_info(): + Flag set <=> the caller guarantees that the bounds of the scanned ranges + will not have NULL values. +*/ +#define HA_MRR_NO_NULL_ENDPOINTS 128U + +/* + The MRR user has materialized range keys somewhere in the user's buffer. + This can be used for optimization of the procedure that sorts these keys + since in this case key values don't have to be copied into the MRR buffer. + + In other words, it is guaranteed that after RANGE_SEQ_IF::next() call the + pointer in range->start_key.key will point to a key value that will remain + there until the end of the MRR scan. +*/ +#define HA_MRR_MATERIALIZED_KEYS 256U + +/* + The following bits are reserved for use by MRR implementation. The intended + use scenario: + + * sql layer calls handler->multi_range_read_info[_const]() + - MRR implementation figures out what kind of scan it will perform, saves + the result in *mrr_mode parameter. + * sql layer remembers what was returned in *mrr_mode + + * the optimizer picks the query plan (which may or may not include the MRR + scan that was estimated by the multi_range_read_info[_const] call) + + * if the query is an EXPLAIN statement, sql layer will call + handler->multi_range_read_explain_info(mrr_mode) to get a text description + of the picked MRR scan; the description will be a part of EXPLAIN output. +*/ +#define HA_MRR_IMPLEMENTATION_FLAG1 512U +#define HA_MRR_IMPLEMENTATION_FLAG2 1024U +#define HA_MRR_IMPLEMENTATION_FLAG3 2048U +#define HA_MRR_IMPLEMENTATION_FLAG4 4096U +#define HA_MRR_IMPLEMENTATION_FLAG5 8192U +#define HA_MRR_IMPLEMENTATION_FLAG6 16384U + +#define HA_MRR_IMPLEMENTATION_FLAGS \ + (512U | 1024U | 2048U | 4096U | 8192U | 16384U) + +/* + This is a buffer area that the handler can use to store rows. + 'end_of_used_area' should be kept updated after calls to + read-functions so that other parts of the code can use the + remaining area (until next read calls is issued). +*/ + +typedef struct st_handler_buffer +{ + /* const? */uchar *buffer; /* Buffer one can start using */ + /* const? */uchar *buffer_end; /* End of buffer */ + uchar *end_of_used_area; /* End of area that was used by handler */ +} HANDLER_BUFFER; + +typedef struct system_status_var SSV; + +class ha_statistics +{ +public: + ulonglong data_file_length; /* Length off data file */ + ulonglong max_data_file_length; /* Length off data file */ + ulonglong index_file_length; + ulonglong max_index_file_length; + ulonglong delete_length; /* Free bytes */ + ulonglong auto_increment_value; + /* + The number of records in the table. + 0 - means the table has exactly 0 rows + other - if (table_flags() & HA_STATS_RECORDS_IS_EXACT) + the value is the exact number of records in the table + else + it is an estimate + */ + ha_rows records; + ha_rows deleted; /* Deleted records */ + ulong mean_rec_length; /* physical reclength */ + time_t create_time; /* When table was created */ + time_t check_time; + time_t update_time; + uint block_size; /* index block size */ + ha_checksum checksum; + bool checksum_null; + + /* + number of buffer bytes that native mrr implementation needs, + */ + uint mrr_length_per_rec; + + ha_statistics(): + data_file_length(0), max_data_file_length(0), + index_file_length(0), max_index_file_length(0), delete_length(0), + auto_increment_value(0), records(0), deleted(0), mean_rec_length(0), + create_time(0), check_time(0), update_time(0), block_size(8192), + checksum(0), checksum_null(FALSE), mrr_length_per_rec(0) + {} +}; + +extern "C" check_result_t handler_index_cond_check(void* h_arg); + +extern "C" check_result_t handler_rowid_filter_check(void* h_arg); +extern "C" int handler_rowid_filter_is_active(void* h_arg); + +uint calculate_key_len(TABLE *, uint, const uchar *, key_part_map); +/* + bitmap with first N+1 bits set + (keypart_map for a key prefix of [0..N] keyparts) +*/ +#define make_keypart_map(N) (((key_part_map)2 << (N)) - 1) +/* + bitmap with first N bits set + (keypart_map for a key prefix of [0..N-1] keyparts) +*/ +#define make_prev_keypart_map(N) (((key_part_map)1 << (N)) - 1) + + +/** Base class to be used by handlers different shares */ +class Handler_share +{ +public: + Handler_share() = default; + virtual ~Handler_share() = default; +}; + +enum class Compare_keys : uint32_t +{ + Equal= 0, + EqualButKeyPartLength, + EqualButComment, + NotEqual +}; + +/** + The handler class is the interface for dynamically loadable + storage engines. Do not add ifdefs and take care when adding or + changing virtual functions to avoid vtable confusion + + Functions in this class accept and return table columns data. Two data + representation formats are used: + 1. TableRecordFormat - Used to pass [partial] table records to/from + storage engine + + 2. KeyTupleFormat - used to pass index search tuples (aka "keys") to + storage engine. See opt_range.cc for description of this format. + + TableRecordFormat + ================= + [Warning: this description is work in progress and may be incomplete] + The table record is stored in a fixed-size buffer: + + record: null_bytes, column1_data, column2_data, ... + + The offsets of the parts of the buffer are also fixed: every column has + an offset to its column{i}_data, and if it is nullable it also has its own + bit in null_bytes. + + The record buffer only includes data about columns that are marked in the + relevant column set (table->read_set and/or table->write_set, depending on + the situation). + It could be that it is required that null bits of non-present + columns are set to 1 + + VARIOUS EXCEPTIONS AND SPECIAL CASES + + If the table has no nullable columns, then null_bytes is still + present, its length is one byte which must be set to 0xFF + at all times. + + If the table has columns of type BIT, then certain bits from those columns + may be stored in null_bytes as well. Grep around for Field_bit for + details. + + For blob columns (see Field_blob), the record buffer stores length of the + data, following by memory pointer to the blob data. The pointer is owned + by the storage engine and is valid until the next operation. + + If a blob column has NULL value, then its length and blob data pointer + must be set to 0. +*/ + +class handler :public Sql_alloc +{ +public: + typedef ulonglong Table_flags; +protected: + TABLE_SHARE *table_share; /* The table definition */ + TABLE *table; /* The current open table */ + Table_flags cached_table_flags; /* Set on init() and open() */ + + ha_rows estimation_rows_to_insert; + handler *lookup_handler; + /* Statistics for the query. Updated if handler_stats.in_use is set */ + ha_handler_stats active_handler_stats; + void set_handler_stats(); +public: + handlerton *ht; /* storage engine of this handler */ + uchar *ref; /* Pointer to current row */ + uchar *dup_ref; /* Pointer to duplicate row */ + uchar *lookup_buffer; + + /* General statistics for the table like number of row, file sizes etc */ + ha_statistics stats; + /* + Collect query stats here if pointer is != NULL. + This is a pointer because if we do a clone of the handler, we want to + use the original handler for collecting statistics. + */ + ha_handler_stats *handler_stats; + + /** MultiRangeRead-related members: */ + range_seq_t mrr_iter; /* Iterator to traverse the range sequence */ + RANGE_SEQ_IF mrr_funcs; /* Range sequence traversal functions */ + HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */ + uint ranges_in_seq; /* Total number of ranges in the traversed sequence */ + /** Current range (the one we're now returning rows from) */ + KEY_MULTI_RANGE mrr_cur_range; + + /** The following are for read_range() */ + key_range save_end_range, *end_range; + KEY_PART_INFO *range_key_part; + int key_compare_result_on_equal; + + /* TRUE <=> source MRR ranges and the output are ordered */ + bool mrr_is_output_sorted; + /** TRUE <=> we're currently traversing a range in mrr_cur_range. */ + bool mrr_have_range; + bool eq_range; + bool internal_tmp_table; /* If internal tmp table */ + bool implicit_emptied; /* Can be !=0 only if HEAP */ + bool mark_trx_read_write_done; /* mark_trx_read_write was called */ + bool check_table_binlog_row_based_done; /* check_table_binlog.. was called */ + bool check_table_binlog_row_based_result; /* cached check_table_binlog... */ + /* + TRUE <=> the engine guarantees that returned records are within the range + being scanned. + */ + bool in_range_check_pushed_down; + + uint lookup_errkey; + uint errkey; /* Last dup key */ + uint key_used_on_scan; + uint active_index, keyread; + + /** Length of ref (1-8 or the clustered key length) */ + uint ref_length; + FT_INFO *ft_handler; + enum init_stat { NONE=0, INDEX, RND }; + init_stat inited, pre_inited; + + const COND *pushed_cond; + /** + next_insert_id is the next value which should be inserted into the + auto_increment column: in a inserting-multi-row statement (like INSERT + SELECT), for the first row where the autoinc value is not specified by the + statement, get_auto_increment() called and asked to generate a value, + next_insert_id is set to the next value, then for all other rows + next_insert_id is used (and increased each time) without calling + get_auto_increment(). + */ + ulonglong next_insert_id; + /** + insert id for the current row (*autogenerated*; if not + autogenerated, it's 0). + At first successful insertion, this variable is stored into + THD::first_successful_insert_id_in_cur_stmt. + */ + ulonglong insert_id_for_cur_row; + /** + Interval returned by get_auto_increment() and being consumed by the + inserter. + */ + /* Statistics variables */ + ulonglong rows_read; + ulonglong rows_tmp_read; + ulonglong rows_changed; + /* One bigger than needed to avoid to test if key == MAX_KEY */ + ulonglong index_rows_read[MAX_KEY+1]; + ha_copy_info copy_info; + +private: + /* ANALYZE time tracker, if present */ + Exec_time_tracker *tracker; +public: + void set_time_tracker(Exec_time_tracker *tracker_arg) { tracker=tracker_arg;} + Exec_time_tracker *get_time_tracker() { return tracker; } + + Item *pushed_idx_cond; + uint pushed_idx_cond_keyno; /* The index which the above condition is for */ + + /* Rowid filter pushed into the engine */ + Rowid_filter *pushed_rowid_filter; + /* true when the pushed rowid filter has been already filled */ + bool rowid_filter_is_active; + /* Used for disabling/enabling pushed_rowid_filter */ + Rowid_filter *save_pushed_rowid_filter; + bool save_rowid_filter_is_active; + + Discrete_interval auto_inc_interval_for_cur_row; + /** + Number of reserved auto-increment intervals. Serves as a heuristic + when we have no estimation of how many records the statement will insert: + the more intervals we have reserved, the bigger the next one. Reset in + handler::ha_release_auto_increment(). + */ + uint auto_inc_intervals_count; + + /** + Instrumented table associated with this handler. + This member should be set to NULL when no instrumentation is in place, + so that linking an instrumented/non instrumented server/plugin works. + For example: + - the server is compiled with the instrumentation. + The server expects either NULL or valid pointers in m_psi. + - an engine plugin is compiled without instrumentation. + The plugin can not leave this pointer uninitialized, + or can not leave a trash value on purpose in this pointer, + as this would crash the server. + */ + PSI_table *m_psi; + +private: + /** Internal state of the batch instrumentation. */ + enum batch_mode_t + { + /** Batch mode not used. */ + PSI_BATCH_MODE_NONE, + /** Batch mode used, before first table io. */ + PSI_BATCH_MODE_STARTING, + /** Batch mode used, after first table io. */ + PSI_BATCH_MODE_STARTED + }; + /** + Batch mode state. + @sa start_psi_batch_mode. + @sa end_psi_batch_mode. + */ + batch_mode_t m_psi_batch_mode; + /** + The number of rows in the batch. + @sa start_psi_batch_mode. + @sa end_psi_batch_mode. + */ + ulonglong m_psi_numrows; + /** + The current event in a batch. + @sa start_psi_batch_mode. + @sa end_psi_batch_mode. + */ + PSI_table_locker *m_psi_locker; + /** + Storage for the event in a batch. + @sa start_psi_batch_mode. + @sa end_psi_batch_mode. + */ + PSI_table_locker_state m_psi_locker_state; + +public: + virtual void unbind_psi(); + virtual void rebind_psi(); + /* Return error if definition doesn't match for already opened table */ + virtual int discover_check_version() { return 0; } + + /** + Put the handler in 'batch' mode when collecting + table io instrumented events. + When operating in batch mode: + - a single start event is generated in the performance schema. + - all table io performed between @c start_psi_batch_mode + and @c end_psi_batch_mode is not instrumented: + the number of rows affected is counted instead in @c m_psi_numrows. + - a single end event is generated in the performance schema + when the batch mode ends with @c end_psi_batch_mode. + */ + void start_psi_batch_mode(); + /** End a batch started with @c start_psi_batch_mode. */ + void end_psi_batch_mode(); + + /* If we have row logging enabled for this table */ + bool row_logging, row_logging_init; + /* If the row logging should be done in transaction cache */ + bool row_logging_has_trans; + +private: + /** + The lock type set by when calling::ha_external_lock(). This is + propagated down to the storage engine. The reason for also storing + it here, is that when doing MRR we need to create/clone a second handler + object. This cloned handler object needs to know about the lock_type used. + */ + int m_lock_type; + /** + Pointer where to store/retrieve the Handler_share pointer. + For non partitioned handlers this is &TABLE_SHARE::ha_share. + */ + Handler_share **ha_share; + +public: + handler(handlerton *ht_arg, TABLE_SHARE *share_arg) + :table_share(share_arg), table(0), + estimation_rows_to_insert(0), + lookup_handler(this), + ht(ht_arg), ref(0), lookup_buffer(NULL), handler_stats(NULL), + end_range(NULL), implicit_emptied(0), + mark_trx_read_write_done(0), + check_table_binlog_row_based_done(0), + check_table_binlog_row_based_result(0), + in_range_check_pushed_down(FALSE), lookup_errkey(-1), errkey(-1), + key_used_on_scan(MAX_KEY), + active_index(MAX_KEY), keyread(MAX_KEY), + ref_length(sizeof(my_off_t)), + ft_handler(0), inited(NONE), pre_inited(NONE), + pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0), + tracker(NULL), + pushed_idx_cond(NULL), + pushed_idx_cond_keyno(MAX_KEY), + pushed_rowid_filter(NULL), + rowid_filter_is_active(0), + save_pushed_rowid_filter(NULL), + save_rowid_filter_is_active(false), + auto_inc_intervals_count(0), + m_psi(NULL), + m_psi_batch_mode(PSI_BATCH_MODE_NONE), + m_psi_numrows(0), + m_psi_locker(NULL), + row_logging(0), row_logging_init(0), + m_lock_type(F_UNLCK), ha_share(NULL) + { + DBUG_PRINT("info", + ("handler created F_UNLCK %d F_RDLCK %d F_WRLCK %d", + F_UNLCK, F_RDLCK, F_WRLCK)); + reset_statistics(); + } + virtual ~handler(void) + { + DBUG_ASSERT(m_lock_type == F_UNLCK); + DBUG_ASSERT(inited == NONE); + } + /* To check if table has been properely opened */ + bool is_open() + { + return ref != 0; + } + virtual handler *clone(const char *name, MEM_ROOT *mem_root); + /** This is called after create to allow us to set up cached variables */ + void init() + { + cached_table_flags= table_flags(); + } + /* ha_ methods: public wrappers for private virtual API */ + + int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked, + MEM_ROOT *mem_root= 0, List *partitions_to_open=NULL); + int ha_index_init(uint idx, bool sorted) + { + DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;); + int result; + DBUG_ENTER("ha_index_init"); + DBUG_ASSERT(inited==NONE); + if (!(result= index_init(idx, sorted))) + { + inited= INDEX; + active_index= idx; + end_range= NULL; + } + DBUG_RETURN(result); + } + int ha_index_end() + { + DBUG_ENTER("ha_index_end"); + DBUG_ASSERT(inited==INDEX); + inited= NONE; + active_index= MAX_KEY; + end_range= NULL; + DBUG_RETURN(index_end()); + } + /* This is called after index_init() if we need to do a index scan */ + virtual int prepare_index_scan() { return 0; } + virtual int prepare_index_key_scan_map(const uchar * key, key_part_map keypart_map) + { + uint key_len= calculate_key_len(table, active_index, key, keypart_map); + return prepare_index_key_scan(key, key_len); + } + virtual int prepare_index_key_scan( const uchar * key, uint key_len ) + { return 0; } + virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key) + { return 0; } + + int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result)) + { + DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;); + int result; + DBUG_ENTER("ha_rnd_init"); + DBUG_ASSERT(inited==NONE || (inited==RND && scan)); + inited= (result= rnd_init(scan)) ? NONE: RND; + end_range= NULL; + DBUG_RETURN(result); + } + int ha_rnd_end() + { + DBUG_ENTER("ha_rnd_end"); + DBUG_ASSERT(inited==RND); + inited=NONE; + end_range= NULL; + DBUG_RETURN(rnd_end()); + } + int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result)); + int ha_reset(); + /* this is necessary in many places, e.g. in HANDLER command */ + int ha_index_or_rnd_end() + { + return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0; + } + /** + The cached_table_flags is set at ha_open and ha_external_lock + */ + Table_flags ha_table_flags() const + { + DBUG_ASSERT(cached_table_flags < (HA_LAST_TABLE_FLAG << 1)); + return cached_table_flags; + } + /** + These functions represent the public interface to *users* of the + handler class, hence they are *not* virtual. For the inheritance + interface, see the (private) functions write_row(), update_row(), + and delete_row() below. + */ + int ha_external_lock(THD *thd, int lock_type); + int ha_external_unlock(THD *thd) { return ha_external_lock(thd, F_UNLCK); } + int ha_write_row(const uchar * buf); + int ha_update_row(const uchar * old_data, const uchar * new_data); + int ha_delete_row(const uchar * buf); + void ha_release_auto_increment(); + + bool keyread_enabled() { return keyread < MAX_KEY; } + int ha_start_keyread(uint idx) + { + int res= keyread_enabled() ? 0 : extra_opt(HA_EXTRA_KEYREAD, idx); + keyread= idx; + return res; + } + int ha_end_keyread() + { + if (!keyread_enabled()) + return 0; + keyread= MAX_KEY; + return extra(HA_EXTRA_NO_KEYREAD); + } + + int check_collation_compatibility(); + int check_long_hash_compatibility() const; + int ha_check_for_upgrade(HA_CHECK_OPT *check_opt); + /** to be actually called to get 'check()' functionality*/ + int ha_check(THD *thd, HA_CHECK_OPT *check_opt); + int ha_repair(THD* thd, HA_CHECK_OPT* check_opt); + void ha_start_bulk_insert(ha_rows rows, uint flags= 0) + { + DBUG_ENTER("handler::ha_start_bulk_insert"); + estimation_rows_to_insert= rows; + bzero(©_info,sizeof(copy_info)); + start_bulk_insert(rows, flags); + DBUG_VOID_RETURN; + } + int ha_end_bulk_insert(); + int ha_bulk_update_row(const uchar *old_data, const uchar *new_data, + ha_rows *dup_key_found); + int ha_delete_all_rows(); + int ha_truncate(); + int ha_reset_auto_increment(ulonglong value); + int ha_optimize(THD* thd, HA_CHECK_OPT* check_opt); + int ha_analyze(THD* thd, HA_CHECK_OPT* check_opt); + bool ha_check_and_repair(THD *thd); + int ha_disable_indexes(uint mode); + int ha_enable_indexes(uint mode); + int ha_discard_or_import_tablespace(my_bool discard); + int ha_rename_table(const char *from, const char *to); + void ha_drop_table(const char *name); + + int ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info); + + int ha_create_partitioning_metadata(const char *name, const char *old_name, + chf_create_flags action_flag); + + int ha_change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong * const copied, + ulonglong * const deleted, + const uchar *pack_frm_data, + size_t pack_frm_len); + int ha_drop_partitions(const char *path); + int ha_rename_partitions(const char *path); + + void adjust_next_insert_id_after_explicit_value(ulonglong nr); + int update_auto_increment(); + virtual void print_error(int error, myf errflag); + virtual bool get_error_message(int error, String *buf); + uint get_dup_key(int error); + /** + Retrieves the names of the table and the key for which there was a + duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY. + + If any of the table or key name is not available this method will return + false and will not change any of child_table_name or child_key_name. + + @param child_table_name[out] Table name + @param child_table_name_len[in] Table name buffer size + @param child_key_name[out] Key name + @param child_key_name_len[in] Key name buffer size + + @retval true table and key names were available + and were written into the corresponding + out parameters. + @retval false table and key names were not available, + the out parameters were not touched. + */ + virtual bool get_foreign_dup_key(char *child_table_name, + uint child_table_name_len, + char *child_key_name, + uint child_key_name_len) + { DBUG_ASSERT(false); return(false); } + void reset_statistics() + { + rows_read= rows_changed= rows_tmp_read= 0; + bzero(index_rows_read, sizeof(index_rows_read)); + bzero(©_info, sizeof(copy_info)); + } + virtual void reset_copy_info() {} + void ha_reset_copy_info() + { + bzero(©_info, sizeof(copy_info)); + reset_copy_info(); + } + virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) + { + table= table_arg; + table_share= share; + reset_statistics(); + } + virtual double scan_time() + { + return ((ulonglong2double(stats.data_file_length) / stats.block_size + 2) * + avg_io_cost()); + } + + virtual double key_scan_time(uint index) + { + return keyread_time(index, 1, records()); + } + + virtual double avg_io_cost() + { + return 1.0; + } + + /** + The cost of reading a set of ranges from the table using an index + to access it. + + @param index The index number. + @param ranges The number of ranges to be read. If 0, it means that + we calculate separately the cost of reading the key. + @param rows Total number of rows to be read. + + This method can be used to calculate the total cost of scanning a table + using an index by calling it using read_time(index, 1, table_size). + */ + virtual double read_time(uint index, uint ranges, ha_rows rows) + { return rows2double(ranges+rows); } + + /** + Calculate cost of 'keyread' scan for given index and number of records. + + @param index index to read + @param ranges #of ranges to read + @param rows #of records to read + */ + virtual double keyread_time(uint index, uint ranges, ha_rows rows); + + virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } + + /* + True if changes to the table is persistent (if there are no rollback) + This is used to decide: + - If the table is stored in the transaction or non transactional binary + log + - How things are tracked in trx and in add_changed_table(). + - If we can combine several statements under one commit in the binary log. + */ + bool has_transactions() const + { + return ((ha_table_flags() & (HA_NO_TRANSACTIONS | HA_PERSISTENT_TABLE)) + == 0); + } + /* + True if table has both transactions and rollback. This is used to decide + if we should write the changes to the binary log. If this is true, + we don't have to write failed statements to the log as they can be + rolled back. + */ + bool has_transactions_and_rollback() const + { + return has_transactions() && has_rollback(); + } + /* + True if the underlaying table support transactions and rollback + */ + bool has_transaction_manager() const + { + return ((ha_table_flags() & HA_NO_TRANSACTIONS) == 0 && has_rollback()); + } + + /* + True if the underlaying table support TRANSACTIONAL table option + */ + bool has_transactional_option() const + { + extern handlerton *maria_hton; + return partition_ht() == maria_hton || has_transaction_manager(); + } + + /* + True if table has rollback. Used to check if an update on the table + can be killed fast. + */ + + bool has_rollback() const + { + return ((ht->flags & HTON_NO_ROLLBACK) == 0); + } + + /** + This method is used to analyse the error to see whether the error + is ignorable or not, certain handlers can have more error that are + ignorable than others. E.g. the partition handler can get inserts + into a range where there is no partition and this is an ignorable + error. + HA_ERR_FOUND_DUP_UNIQUE is a special case in MyISAM that means the + same thing as HA_ERR_FOUND_DUP_KEY but can in some cases lead to + a slightly different error message. + */ + virtual bool is_fatal_error(int error, uint flags) + { + if (!error || + ((flags & HA_CHECK_DUP_KEY) && + (error == HA_ERR_FOUND_DUPP_KEY || + error == HA_ERR_FOUND_DUPP_UNIQUE)) || + error == HA_ERR_AUTOINC_ERANGE || + ((flags & HA_CHECK_FK_ERROR) && + (error == HA_ERR_ROW_IS_REFERENCED || + error == HA_ERR_NO_REFERENCED_ROW))) + return FALSE; + return TRUE; + } + + /** + Number of rows in table. It will only be called if + (table_flags() & (HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT)) != 0 + */ + virtual int pre_records() { return 0; } + virtual ha_rows records() { return stats.records; } + /** + Return upper bound of current number of records in the table + (max. of how many records one will retrieve when doing a full table scan) + If upper bound is not known, HA_POS_ERROR should be returned as a max + possible upper bound. + */ + virtual ha_rows estimate_rows_upper_bound() + { return stats.records+EXTRA_RECORDS; } + + /** + Get the row type from the storage engine. If this method returns + ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used. + */ + virtual enum row_type get_row_type() const { return ROW_TYPE_NOT_USED; } + + virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";} + + + /** + Signal that the table->read_set and table->write_set table maps changed + The handler is allowed to set additional bits in the above map in this + call. Normally the handler should ignore all calls until we have done + a ha_rnd_init() or ha_index_init(), write_row(), update_row or delete_row() + as there may be several calls to this routine. + */ + virtual void column_bitmaps_signal(); + /* + We have to check for inited as some engines, like innodb, sets + active_index during table scan. + */ + uint get_index(void) const + { return inited == INDEX ? active_index : MAX_KEY; } + int ha_close(void); + + /** + @retval 0 Bulk update used by handler + @retval 1 Bulk update not used, normal operation used + */ + virtual bool start_bulk_update() { return 1; } + /** + @retval 0 Bulk delete used by handler + @retval 1 Bulk delete not used, normal operation used + */ + virtual bool start_bulk_delete() { return 1; } + /** + After this call all outstanding updates must be performed. The number + of duplicate key errors are reported in the duplicate key parameter. + It is allowed to continue to the batched update after this call, the + handler has to wait until end_bulk_update with changing state. + + @param dup_key_found Number of duplicate keys found + + @retval 0 Success + @retval >0 Error code + */ + virtual int exec_bulk_update(ha_rows *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /** + Perform any needed clean-up, no outstanding updates are there at the + moment. + */ + virtual int end_bulk_update() { return 0; } + /** + Execute all outstanding deletes and close down the bulk delete. + + @retval 0 Success + @retval >0 Error code + */ + virtual int end_bulk_delete() + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + virtual int pre_index_read_map(const uchar *key, + key_part_map keypart_map, + enum ha_rkey_function find_flag, + bool use_parallel) + { return 0; } + virtual int pre_index_first(bool use_parallel) + { return 0; } + virtual int pre_index_last(bool use_parallel) + { return 0; } + virtual int pre_index_read_last_map(const uchar *key, + key_part_map keypart_map, + bool use_parallel) + { return 0; } +/* + virtual int pre_read_multi_range_first(KEY_MULTI_RANGE **found_range_p, + KEY_MULTI_RANGE *ranges, + uint range_count, + bool sorted, HANDLER_BUFFER *buffer, + bool use_parallel); +*/ + virtual int pre_multi_range_read_next(bool use_parallel) + { return 0; } + virtual int pre_read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range, bool sorted, + bool use_parallel) + { return 0; } + virtual int pre_ft_read(bool use_parallel) + { return 0; } + virtual int pre_rnd_next(bool use_parallel) + { return 0; } + int ha_pre_rnd_init(bool scan) + { + int result; + DBUG_ENTER("ha_pre_rnd_init"); + DBUG_ASSERT(pre_inited==NONE || (pre_inited==RND && scan)); + pre_inited= (result= pre_rnd_init(scan)) ? NONE: RND; + DBUG_RETURN(result); + } + int ha_pre_rnd_end() + { + DBUG_ENTER("ha_pre_rnd_end"); + DBUG_ASSERT(pre_inited==RND); + pre_inited=NONE; + DBUG_RETURN(pre_rnd_end()); + } + virtual int pre_rnd_init(bool scan) { return 0; } + virtual int pre_rnd_end() { return 0; } + virtual int pre_index_init(uint idx, bool sorted) { return 0; } + virtual int pre_index_end() { return 0; } + int ha_pre_index_init(uint idx, bool sorted) + { + int result; + DBUG_ENTER("ha_pre_index_init"); + DBUG_ASSERT(pre_inited==NONE); + if (!(result= pre_index_init(idx, sorted))) + pre_inited=INDEX; + DBUG_RETURN(result); + } + int ha_pre_index_end() + { + DBUG_ENTER("ha_pre_index_end"); + DBUG_ASSERT(pre_inited==INDEX); + pre_inited=NONE; + DBUG_RETURN(pre_index_end()); + } + int ha_pre_index_or_rnd_end() + { + return (pre_inited == INDEX ? + ha_pre_index_end() : + pre_inited == RND ? ha_pre_rnd_end() : 0 ); + } + virtual bool vers_can_native(THD *thd) + { + return ht->flags & HTON_NATIVE_SYS_VERSIONING; + } + + /** + @brief + Positions an index cursor to the index specified in the + handle. Fetches the row if available. If the key value is null, + begin at the first key of the index. + */ +protected: + virtual int index_read_map(uchar * buf, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) + { + uint key_len= calculate_key_len(table, active_index, key, keypart_map); + return index_read(buf, key, key_len, find_flag); + } + /** + @brief + Positions an index cursor to the index specified in the + handle. Fetches the row if available. If the key value is null, + begin at the first key of the index. + */ + virtual int index_read_idx_map(uchar * buf, uint index, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag); + virtual int index_next(uchar * buf) + { return HA_ERR_WRONG_COMMAND; } + virtual int index_prev(uchar * buf) + { return HA_ERR_WRONG_COMMAND; } + virtual int index_first(uchar * buf) + { return HA_ERR_WRONG_COMMAND; } + virtual int index_last(uchar * buf) + { return HA_ERR_WRONG_COMMAND; } + virtual int index_next_same(uchar *buf, const uchar *key, uint keylen); + /** + @brief + The following functions works like index_read, but it find the last + row with the current key value or prefix. + @returns @see index_read_map(). + */ + virtual int index_read_last_map(uchar * buf, const uchar * key, + key_part_map keypart_map) + { + uint key_len= calculate_key_len(table, active_index, key, keypart_map); + return index_read_last(buf, key, key_len); + } + virtual int close(void)=0; + inline void update_rows_read() + { + if (likely(!internal_tmp_table)) + rows_read++; + else + rows_tmp_read++; + } + inline void update_index_statistics() + { + index_rows_read[active_index]++; + update_rows_read(); + } +public: + + int ha_index_read_map(uchar * buf, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag); + int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag); + int ha_index_next(uchar * buf); + int ha_index_prev(uchar * buf); + int ha_index_first(uchar * buf); + int ha_index_last(uchar * buf); + int ha_index_next_same(uchar *buf, const uchar *key, uint keylen); + /* + TODO: should we make for those functions non-virtual ha_func_name wrappers, + too? + */ + virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *mrr_mode, + Cost_estimate *cost); + virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, + uint *mrr_mode, Cost_estimate *cost); + virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, + uint n_ranges, uint mrr_mode, + HANDLER_BUFFER *buf); + virtual int multi_range_read_next(range_id_t *range_info); + /* + Return string representation of the MRR plan. + + This is intended to be used for EXPLAIN, via the following scenario: + 1. SQL layer calls handler->multi_range_read_info(). + 1.1. Storage engine figures out whether it will use some non-default + MRR strategy, sets appropritate bits in *mrr_mode, and returns + control to SQL layer + 2. SQL layer remembers the returned mrr_mode + 3. SQL layer compares various options and choses the final query plan. As + a part of that, it makes a choice of whether to use the MRR strategy + picked in 1.1 + 4. EXPLAIN code converts the query plan to its text representation. If MRR + strategy is part of the plan, it calls + multi_range_read_explain_info(mrr_mode) to get a text representation of + the picked MRR strategy. + + @param mrr_mode Mode which was returned by multi_range_read_info[_const] + @param str INOUT string to be printed for EXPLAIN + @param str_end End of the string buffer. The function is free to put the + string into [str..str_end] memory range. + */ + virtual int multi_range_read_explain_info(uint mrr_mode, char *str, + size_t size) + { return 0; } + + virtual int read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range, bool sorted); + virtual int read_range_next(); + void set_end_range(const key_range *end_key); + int compare_key(key_range *range); + int compare_key2(key_range *range) const; + virtual int ft_init() { return HA_ERR_WRONG_COMMAND; } + virtual int pre_ft_init() { return HA_ERR_WRONG_COMMAND; } + virtual void ft_end() {} + virtual int pre_ft_end() { return 0; } + virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key) + { return NULL; } +public: + virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; } + virtual int rnd_next(uchar *buf)=0; + virtual int rnd_pos(uchar * buf, uchar *pos)=0; + /** + This function only works for handlers having + HA_PRIMARY_KEY_REQUIRED_FOR_POSITION set. + It will return the row with the PK given in the record argument. + */ + virtual int rnd_pos_by_record(uchar *record) + { + int error; + DBUG_ASSERT(table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION); + + error = ha_rnd_init(false); + if (error != 0) + return error; + + position(record); + error = ha_rnd_pos(record, ref); + ha_rnd_end(); + return error; + } + virtual int read_first_row(uchar *buf, uint primary_key); +public: + + /* Same as above, but with statistics */ + inline int ha_ft_read(uchar *buf); + inline void ha_ft_end() { ft_end(); ft_handler=NULL; } + int ha_rnd_next(uchar *buf); + int ha_rnd_pos(uchar *buf, uchar *pos); + inline int ha_rnd_pos_by_record(uchar *buf); + inline int ha_read_first_row(uchar *buf, uint primary_key); + + /** + The following 2 function is only needed for tables that may be + internal temporary tables during joins. + */ + virtual int remember_rnd_pos() + { return HA_ERR_WRONG_COMMAND; } + virtual int restart_rnd_next(uchar *buf) + { return HA_ERR_WRONG_COMMAND; } + + virtual ha_rows records_in_range(uint inx, const key_range *min_key, + const key_range *max_key, + page_range *res) + { return (ha_rows) 10; } + /* + If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, then it sets ref + (reference to the row, aka position, with the primary key given in + the record). + Otherwise it set ref to the current row. + */ + virtual void position(const uchar *record)=0; + virtual int info(uint)=0; // see my_base.h for full description + virtual void get_dynamic_partition_info(PARTITION_STATS *stat_info, + uint part_id); + virtual void set_partitions_to_open(List *partition_names) {} + virtual bool check_if_updates_are_ignored(const char *op) const; + virtual int change_partitions_to_open(List *partition_names) + { return 0; } + virtual int extra(enum ha_extra_function operation) + { return 0; } + virtual int extra_opt(enum ha_extra_function operation, ulong arg) + { return extra(operation); } + /* + Table version id for the the table. This should change for each + sucessfull ALTER TABLE. + This is used by the handlerton->check_version() to ask the engine + if the table definition has been updated. + Storage engines that does not support inplace alter table does not + have to support this call. + */ + virtual ulonglong table_version() const { return 0; } + + /** + In an UPDATE or DELETE, if the row under the cursor was locked by another + transaction, and the engine used an optimistic read of the last + committed row value under the cursor, then the engine returns 1 from this + function. MySQL must NOT try to update this optimistic value. If the + optimistic value does not match the WHERE condition, MySQL can decide to + skip over this row. Currently only works for InnoDB. This can be used to + avoid unnecessary lock waits. + + If this method returns nonzero, it will also signal the storage + engine that the next read will be a locking re-read of the row. + */ + bool ha_was_semi_consistent_read(); + virtual bool was_semi_consistent_read() { return 0; } + /** + Tell the engine whether it should avoid unnecessary lock waits. + If yes, in an UPDATE or DELETE, if the row under the cursor was locked + by another transaction, the engine may try an optimistic read of + the last committed row value under the cursor. + */ + virtual void try_semi_consistent_read(bool) {} + virtual void unlock_row() {} + virtual int start_stmt(THD *thd, thr_lock_type lock_type) {return 0;} + virtual bool need_info_for_auto_inc() { return 0; } + virtual bool can_use_for_auto_inc_init() { return 1; } + virtual void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values); + void set_next_insert_id(ulonglong id) + { + DBUG_PRINT("info",("auto_increment: next value %lu", (ulong)id)); + next_insert_id= id; + } + virtual void restore_auto_increment(ulonglong prev_insert_id) + { + /* + Insertion of a row failed, re-use the lastly generated auto_increment + id, for the next row. This is achieved by resetting next_insert_id to + what it was before the failed insertion (that old value is provided by + the caller). If that value was 0, it was the first row of the INSERT; + then if insert_id_for_cur_row contains 0 it means no id was generated + for this first row, so no id was generated since the INSERT started, so + we should set next_insert_id to 0; if insert_id_for_cur_row is not 0, it + is the generated id of the first and failed row, so we use it. + */ + next_insert_id= (prev_insert_id > 0) ? prev_insert_id : + insert_id_for_cur_row; + } + + virtual void update_create_info(HA_CREATE_INFO *create_info) {} + int check_old_types(); + virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt) + { return HA_ADMIN_NOT_IMPLEMENTED; } + virtual int preload_keys(THD* thd, HA_CHECK_OPT* check_opt) + { return HA_ADMIN_NOT_IMPLEMENTED; } + /* end of the list of admin commands */ + + virtual int indexes_are_disabled(void) {return 0;} + virtual void append_create_info(String *packet) {} + /** + If index == MAX_KEY then a check for table is made and if index < + MAX_KEY then a check is made if the table has foreign keys and if + a foreign key uses this index (and thus the index cannot be dropped). + + @param index Index to check if foreign key uses it + + @retval TRUE Foreign key defined on table or index + @retval FALSE No foreign key defined + */ + virtual bool is_fk_defined_on_table_or_index(uint index) + { return FALSE; } + virtual char* get_foreign_key_create_info() + { return(NULL);} /* gets foreign key create string from InnoDB */ + /** + Used in ALTER TABLE to check if changing storage engine is allowed. + + @note Called without holding thr_lock.c lock. + + @retval true Changing storage engine is allowed. + @retval false Changing storage engine not allowed. + */ + virtual bool can_switch_engines() { return true; } + virtual int can_continue_handler_scan() { return 0; } + /** + Get the list of foreign keys in this table. + + @remark Returns the set of foreign keys where this table is the + dependent or child table. + + @param thd The thread handle. + @param f_key_list[out] The list of foreign keys. + + @return The handler error code or zero for success. + */ + virtual int + get_foreign_key_list(THD *thd, List *f_key_list) + { return 0; } + /** + Get the list of foreign keys referencing this table. + + @remark Returns the set of foreign keys where this table is the + referenced or parent table. + + @param thd The thread handle. + @param f_key_list[out] The list of foreign keys. + + @return The handler error code or zero for success. + */ + virtual int + get_parent_foreign_key_list(THD *thd, List *f_key_list) + { return 0; } + virtual uint referenced_by_foreign_key() { return 0;} + virtual void init_table_handle_for_HANDLER() + { return; } /* prepare InnoDB for HANDLER */ + virtual void free_foreign_key_create_info(char* str) {} + /** The following can be called without an open handler */ + virtual const char *table_type() const { return hton_name(ht)->str; } + /* The following is same as table_table(), except for partition engine */ + virtual const char *real_table_type() const { return hton_name(ht)->str; } + const char **bas_ext() const { return ht->tablefile_extensions; } + + virtual int get_default_no_partitions(HA_CREATE_INFO *create_info) + { return 1;} + virtual void set_auto_partitions(partition_info *part_info) { return; } + virtual bool get_no_parts(const char *name, + uint *no_parts) + { + *no_parts= 0; + return 0; + } + virtual void set_part_info(partition_info *part_info) {return;} + virtual void return_record_by_parent() { return; } + + /* Information about index. Both index and part starts from 0 */ + virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0; + + uint max_record_length() const + { return MY_MIN(HA_MAX_REC_LENGTH, max_supported_record_length()); } + uint max_keys() const + { return MY_MIN(MAX_KEY, max_supported_keys()); } + uint max_key_parts() const + { return MY_MIN(MAX_REF_PARTS, max_supported_key_parts()); } + uint max_key_length() const + { return MY_MIN(MAX_DATA_LENGTH_FOR_KEY, max_supported_key_length()); } + uint max_key_part_length() const + { return MY_MIN(MAX_DATA_LENGTH_FOR_KEY, max_supported_key_part_length()); } + + virtual uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } + virtual uint max_supported_keys() const { return 0; } + virtual uint max_supported_key_parts() const { return MAX_REF_PARTS; } + virtual uint max_supported_key_length() const { return MAX_DATA_LENGTH_FOR_KEY; } + virtual uint max_supported_key_part_length() const { return 255; } + virtual uint min_record_length(uint options) const { return 1; } + + virtual int pre_calculate_checksum() { return 0; } + virtual int calculate_checksum(); + virtual bool is_crashed() const { return 0; } + virtual bool auto_repair(int error) const { return 0; } + + void update_global_table_stats(); + void update_global_index_stats(); + + /** + @note lock_count() can return > 1 if the table is MERGE or partitioned. + */ + virtual uint lock_count(void) const { return 1; } + /** + Get the lock(s) for the table and perform conversion of locks if needed. + + Is not invoked for non-transactional temporary tables. + + @note store_lock() can return more than one lock if the table is MERGE + or partitioned. + + @note that one can NOT rely on table->in_use in store_lock(). It may + refer to a different thread if called from mysql_lock_abort_for_thread(). + + @note If the table is MERGE, store_lock() can return less locks + than lock_count() claimed. This can happen when the MERGE children + are not attached when this is called from another thread. + */ + virtual THR_LOCK_DATA **store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type)=0; + + /** Type of table for caching query */ + virtual uint8 table_cache_type() { return HA_CACHE_TBL_NONTRANSACT; } + + + /** + @brief Register a named table with a call back function to the query cache. + + @param thd The thread handle + @param table_key A pointer to the table name in the table cache + @param key_length The length of the table name + @param[out] engine_callback The pointer to the storage engine call back + function + @param[out] engine_data Storage engine specific data which could be + anything + + This method offers the storage engine, the possibility to store a reference + to a table name which is going to be used with query cache. + The method is called each time a statement is written to the cache and can + be used to verify if a specific statement is cacheable. It also offers + the possibility to register a generic (but static) call back function which + is called each time a statement is matched against the query cache. + + @note If engine_data supplied with this function is different from + engine_data supplied with the callback function, and the callback returns + FALSE, a table invalidation on the current table will occur. + + @return Upon success the engine_callback will point to the storage engine + call back function, if any, and engine_data will point to any storage + engine data used in the specific implementation. + @retval TRUE Success + @retval FALSE The specified table or current statement should not be + cached + */ + + virtual my_bool register_query_cache_table(THD *thd, const char *table_key, + uint key_length, + qc_engine_callback *callback, + ulonglong *engine_data) + { + *callback= 0; + return TRUE; + } + + /* + Count tables invisible from all tables list on which current one built + (like myisammrg and partitioned tables) + + tables_type mask for the tables should be added herdde + + returns number of such tables + */ + + virtual uint count_query_cache_dependant_tables(uint8 *tables_type + __attribute__((unused))) + { + return 0; + } + + /* + register tables invisible from all tables list on which current one built + (like myisammrg and partitioned tables). + + @note they should be counted by method above + + cache Query cache pointer + block Query cache block to write the table + n Number of the table + + @retval FALSE - OK + @retval TRUE - Error + */ + + virtual my_bool + register_query_cache_dependant_tables(THD *thd + __attribute__((unused)), + Query_cache *cache + __attribute__((unused)), + Query_cache_block_table **block + __attribute__((unused)), + uint *n __attribute__((unused))) + { + return FALSE; + } + + /* + Check if the key is a clustering key + + - Data is stored together with the primary key (no secondary lookup + needed to find the row data). The optimizer uses this to find out + the cost of fetching data. + + Note that in many cases a clustered key is also a reference key. + This means that: + + - The key is part of each secondary key and is used + to find the row data in the primary index when reading trough + secondary indexes. + - When doing a HA_KEYREAD_ONLY we get also all the primary key parts + into the row. This is critical property used by index_merge. + + All the above is usually true for engines that store the row + data in the primary key index (e.g. in a b-tree), and use the key + key value as a position(). InnoDB is an example of such an engine. + + For a clustered (primary) key, the following should also hold: + index_flags() should contain HA_CLUSTERED_INDEX + table_flags() should contain HA_TABLE_SCAN_ON_INDEX + + For a reference key the following should also hold: + table_flags() should contain HA_PRIMARY_KEY_IS_READ_INDEX. + + @retval TRUE yes + @retval FALSE No. + */ + + /* The following code is for primary keys */ + bool pk_is_clustering_key(uint index) const + { + /* + We have to check for MAX_INDEX as table->s->primary_key can be + MAX_KEY in the case where there is no primary key. + */ + return index != MAX_KEY && is_clustering_key(index); + } + /* Same as before but for other keys, in which case we can skip the check */ + bool is_clustering_key(uint index) const + { + DBUG_ASSERT(index != MAX_KEY); + return (index_flags(index, 0, 1) & HA_CLUSTERED_INDEX); + } + + virtual int cmp_ref(const uchar *ref1, const uchar *ref2) + { + return memcmp(ref1, ref2, ref_length); + } + + /* + Condition pushdown to storage engines + */ + + /** + Push condition down to the table handler. + + @param cond Condition to be pushed. The condition tree must not be + modified by the by the caller. + + @return + The 'remainder' condition that caller must use to filter out records. + NULL means the handler will not return rows that do not match the + passed condition. + + @note + The pushed conditions form a stack (from which one can remove the + last pushed condition using cond_pop). + The table handler filters out rows using (pushed_cond1 AND pushed_cond2 + AND ... AND pushed_condN) + or less restrictive condition, depending on handler's capabilities. + + handler->ha_reset() call empties the condition stack. + Calls to rnd_init/rnd_end, index_init/index_end etc do not affect the + condition stack. + */ + virtual const COND *cond_push(const COND *cond) { return cond; }; + /** + Pop the top condition from the condition stack of the handler instance. + + Pops the top if condition stack, if stack is not empty. + */ + virtual void cond_pop() { return; }; + + /** + Push metadata for the current operation down to the table handler. + */ + virtual int info_push(uint info_type, void *info) { return 0; }; + + /** + Push down an index condition to the handler. + + The server will use this method to push down a condition it wants + the handler to evaluate when retrieving records using a specified + index. The pushed index condition will only refer to fields from + this handler that is contained in the index (but it may also refer + to fields in other handlers). Before the handler evaluates the + condition it must read the content of the index entry into the + record buffer. + + The handler is free to decide if and how much of the condition it + will take responsibility for evaluating. Based on this evaluation + it should return the part of the condition it will not evaluate. + If it decides to evaluate the entire condition it should return + NULL. If it decides not to evaluate any part of the condition it + should return a pointer to the same condition as given as argument. + + @param keyno the index number to evaluate the condition on + @param idx_cond the condition to be evaluated by the handler + + @return The part of the pushed condition that the handler decides + not to evaluate + */ + virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; } + + /** Reset information about pushed index conditions */ + virtual void cancel_pushed_idx_cond() + { + pushed_idx_cond= NULL; + pushed_idx_cond_keyno= MAX_KEY; + in_range_check_pushed_down= false; + } + + virtual void cancel_pushed_rowid_filter() + { + pushed_rowid_filter= NULL; + rowid_filter_is_active= false; + } + + virtual void disable_pushed_rowid_filter() + { + DBUG_ASSERT(pushed_rowid_filter != NULL && + save_pushed_rowid_filter == NULL); + save_pushed_rowid_filter= pushed_rowid_filter; + if (rowid_filter_is_active) + save_rowid_filter_is_active= rowid_filter_is_active; + pushed_rowid_filter= NULL; + rowid_filter_is_active= false; + } + + virtual void enable_pushed_rowid_filter() + { + DBUG_ASSERT(save_pushed_rowid_filter != NULL && + pushed_rowid_filter == NULL); + pushed_rowid_filter= save_pushed_rowid_filter; + if (save_rowid_filter_is_active) + rowid_filter_is_active= true; + save_pushed_rowid_filter= NULL; + } + + virtual bool rowid_filter_push(Rowid_filter *rowid_filter) { return true; } + + /* Needed for partition / spider */ + virtual TABLE_LIST *get_next_global_for_child() { return NULL; } + + /** + Part of old, deprecated in-place ALTER API. + */ + virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) + { return COMPATIBLE_DATA_NO; } + + /* On-line/in-place ALTER TABLE interface. */ + + /* + Here is an outline of on-line/in-place ALTER TABLE execution through + this interface. + + Phase 1 : Initialization + ======================== + During this phase we determine which algorithm should be used + for execution of ALTER TABLE and what level concurrency it will + require. + + *) This phase starts by opening the table and preparing description + of the new version of the table. + *) Then we check if it is impossible even in theory to carry out + this ALTER TABLE using the in-place algorithm. For example, because + we need to change storage engine or the user has explicitly requested + usage of the "copy" algorithm. + *) If in-place ALTER TABLE is theoretically possible, we continue + by compiling differences between old and new versions of the table + in the form of HA_ALTER_FLAGS bitmap. We also build a few + auxiliary structures describing requested changes and store + all these data in the Alter_inplace_info object. + *) Then the handler::check_if_supported_inplace_alter() method is called + in order to find if the storage engine can carry out changes requested + by this ALTER TABLE using the in-place algorithm. To determine this, + the engine can rely on data in HA_ALTER_FLAGS/Alter_inplace_info + passed to it as well as on its own checks. If the in-place algorithm + can be used for this ALTER TABLE, the level of required concurrency for + its execution is also returned. + If any errors occur during the handler call, ALTER TABLE is aborted + and no further handler functions are called. + *) Locking requirements of the in-place algorithm are compared to any + concurrency requirements specified by user. If there is a conflict + between them, we either switch to the copy algorithm or emit an error. + + Phase 2 : Execution + =================== + + In this phase the operations are executed. + + *) As the first step, we acquire a lock corresponding to the concurrency + level which was returned by handler::check_if_supported_inplace_alter() + and requested by the user. This lock is held for most of the + duration of in-place ALTER (if HA_ALTER_INPLACE_COPY_LOCK + or HA_ALTER_INPLACE_COPY_NO_LOCK were returned we acquire an + exclusive lock for duration of the next step only). + *) After that we call handler::ha_prepare_inplace_alter_table() to give the + storage engine a chance to update its internal structures with a higher + lock level than the one that will be used for the main step of algorithm. + After that we downgrade the lock if it is necessary. + *) After that, the main step of this phase and algorithm is executed. + We call the handler::ha_inplace_alter_table() method, which carries out the + changes requested by ALTER TABLE but does not makes them visible to other + connections yet. + *) We ensure that no other connection uses the table by upgrading our + lock on it to exclusive. + *) a) If the previous step succeeds, handler::ha_commit_inplace_alter_table() is + called to allow the storage engine to do any final updates to its structures, + to make all earlier changes durable and visible to other connections. + b) If we have failed to upgrade lock or any errors have occurred during the + handler functions calls (including commit), we call + handler::ha_commit_inplace_alter_table() + to rollback all changes which were done during previous steps. + + Phase 3 : Final + =============== + + In this phase we: + + *) Update SQL-layer data-dictionary by installing .FRM file for the new version + of the table. + *) Inform the storage engine about this change by calling the + hton::notify_table_changed() + *) Destroy the Alter_inplace_info and handler_ctx objects. + + */ + + /** + Check if a storage engine supports a particular alter table in-place + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval HA_ALTER_ERROR Unexpected error. + @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported, must use copy. + @retval HA_ALTER_INPLACE_EXCLUSIVE_LOCK Supported, but requires X lock. + @retval HA_ALTER_INPLACE_COPY_LOCK + Supported, but requires SNW lock + during main phase. Prepare phase + requires X lock. + @retval HA_ALTER_INPLACE_SHARED_LOCK Supported, but requires SNW lock. + @retval HA_ALTER_INPLACE_COPY_NO_LOCK + Supported, concurrent reads/writes + allowed. However, prepare phase + requires X lock. + @retval HA_ALTER_INPLACE_NO_LOCK Supported, concurrent + reads/writes allowed. + + @note The default implementation uses the old in-place ALTER API + to determine if the storage engine supports in-place ALTER or not. + + @note Called without holding thr_lock.c lock. + */ + virtual enum_alter_inplace_result + check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ha_alter_info); + + + /** + Public functions wrapping the actual handler call. + @see prepare_inplace_alter_table() + */ + bool ha_prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info); + + + /** + Public function wrapping the actual handler call. + @see inplace_alter_table() + */ + bool ha_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + { + return inplace_alter_table(altered_table, ha_alter_info); + } + + + /** + Public function wrapping the actual handler call. + Allows us to enforce asserts regardless of handler implementation. + @see commit_inplace_alter_table() + */ + bool ha_commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit); + + +protected: + /** + Allows the storage engine to update internal structures with concurrent + writes blocked. If check_if_supported_inplace_alter() returns + HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_COPY_LOCK, + this function is called with exclusive lock otherwise the same level + of locking as for inplace_alter_table() will be used. + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function reports error, commit_inplace_alter_table() + will be called with commit= false. + + @note For partitioning, failing to prepare one partition, means that + commit_inplace_alter_table() will be called to roll back changes for + all partitions. This means that commit_inplace_alter_table() might be + called without prepare_inplace_alter_table() having been called first + for a given partition. + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval true Error + @retval false Success + */ + virtual bool prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + { return false; } + + + /** + Alter the table structure in-place with operations specified using HA_ALTER_FLAGS + and Alter_inplace_info. The level of concurrency allowed during this + operation depends on the return value from check_if_supported_inplace_alter(). + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function reports error, commit_inplace_alter_table() + will be called with commit= false. + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval true Error + @retval false Success + */ + virtual bool inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + { return false; } + + + /** + Commit or rollback the changes made during prepare_inplace_alter_table() + and inplace_alter_table() inside the storage engine. + Note that in case of rollback the allowed level of concurrency during + this operation will be the same as for inplace_alter_table() and thus + might be higher than during prepare_inplace_alter_table(). (For example, + concurrent writes were blocked during prepare, but might not be during + rollback). + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function with commit= true reports error, it will be called + again with commit= false. + + @note In case of partitioning, this function might be called for rollback + without prepare_inplace_alter_table() having been called first. + Also partitioned tables sets ha_alter_info->group_commit_ctx to a NULL + terminated array of the partitions handlers and if all of them are + committed as one, then group_commit_ctx should be set to NULL to indicate + to the partitioning handler that all partitions handlers are committed. + @see prepare_inplace_alter_table(). + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + @param commit True => Commit, False => Rollback. + + @retval true Error + @retval false Success + */ + virtual bool commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit) +{ + /* Nothing to commit/rollback, mark all handlers committed! */ + ha_alter_info->group_commit_ctx= NULL; + return false; +} + +public: + /* End of On-line/in-place ALTER TABLE interface. */ + + + /** + use_hidden_primary_key() is called in case of an update/delete when + (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined + but we don't have a primary key + */ + virtual void use_hidden_primary_key(); + virtual alter_table_operations alter_table_flags(alter_table_operations flags) + { + if (ht->alter_table_flags) + return ht->alter_table_flags(flags); + return 0; + } + + virtual LEX_CSTRING *engine_name(); + + TABLE* get_table() { return table; } + TABLE_SHARE* get_table_share() { return table_share; } +protected: + /* Service methods for use by storage engines. */ + THD *ha_thd(void) const; + + /** + Acquire the instrumented table information from a table share. + @return an instrumented table share, or NULL. + */ + PSI_table_share *ha_table_share_psi() const; + + /** + Default rename_table() and delete_table() rename/delete files with a + given name and extensions from bas_ext(). + + These methods can be overridden, but their default implementation + provide useful functionality. + */ + virtual int rename_table(const char *from, const char *to); + + +public: + /** + Delete a table in the engine. Called for base as well as temporary + tables. + */ + virtual int delete_table(const char *name); + bool check_table_binlog_row_based(); + bool prepare_for_row_logging(); + int prepare_for_insert(bool do_create); + int binlog_log_row(TABLE *table, + const uchar *before_record, + const uchar *after_record, + Log_func *log_func); + + inline void clear_cached_table_binlog_row_based_flag() + { + check_table_binlog_row_based_done= 0; + } + virtual void handler_stats_updated() {} + + inline void ha_handler_stats_reset() + { + handler_stats= &active_handler_stats; + active_handler_stats.reset(); + active_handler_stats.active= 1; + handler_stats_updated(); + } + inline void ha_handler_stats_disable() + { + handler_stats= 0; + active_handler_stats.active= 0; + handler_stats_updated(); + } + +private: + /* Cache result to avoid extra calls */ + inline void mark_trx_read_write() + { + if (unlikely(!mark_trx_read_write_done)) + { + mark_trx_read_write_done= 1; + mark_trx_read_write_internal(); + } + } + +private: + void mark_trx_read_write_internal(); + bool check_table_binlog_row_based_internal(); + + int create_lookup_handler(); + void alloc_lookup_buffer(); + int check_duplicate_long_entries(const uchar *new_rec); + int check_duplicate_long_entries_update(const uchar *new_rec); + int check_duplicate_long_entry_key(const uchar *new_rec, uint key_no); + /** PRIMARY KEY/UNIQUE WITHOUT OVERLAPS check */ + int ha_check_overlaps(const uchar *old_data, const uchar* new_data); + +protected: + /* + These are intended to be used only by handler::ha_xxxx() functions + However, engines that implement read_range_XXX() (like MariaRocks) + or embed other engines (like ha_partition) may need to call these also + */ + inline void increment_statistics(ulong SSV::*offset) const; + inline void decrement_statistics(ulong SSV::*offset) const; + +private: + /* + Low-level primitives for storage engines. These should be + overridden by the storage engine class. To call these methods, use + the corresponding 'ha_*' method above. + */ + + virtual int open(const char *name, int mode, uint test_if_locked)=0; + /* Note: ha_index_read_idx_map() may bypass index_init() */ + virtual int index_init(uint idx, bool sorted) { return 0; } + virtual int index_end() { return 0; } + /** + rnd_init() can be called two times without rnd_end() in between + (it only makes sense if scan=1). + then the second call should prepare for the new table scan (e.g + if rnd_init allocates the cursor, second call should position it + to the start of the table, no need to deallocate and allocate it again + */ + virtual int rnd_init(bool scan)= 0; + virtual int rnd_end() { return 0; } + virtual int write_row(const uchar *buf __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + /** + Update a single row. + + Note: If HA_ERR_FOUND_DUPP_KEY is returned, the handler must read + all columns of the row so MySQL can create an error message. If + the columns required for the error message are not read, the error + message will contain garbage. + */ + virtual int update_row(const uchar *old_data __attribute__((unused)), + const uchar *new_data __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + /* + Optimized function for updating the first row. Only used by sequence + tables + */ + virtual int update_first_row(const uchar *new_data); + + virtual int delete_row(const uchar *buf __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + /* Perform initialization for a direct update request */ +public: + int ha_direct_update_rows(ha_rows *update_rows, ha_rows *found_rows); + virtual int direct_update_rows_init(List *update_fields) + { + return HA_ERR_WRONG_COMMAND; + } +private: + virtual int pre_direct_update_rows_init(List *update_fields) + { + return HA_ERR_WRONG_COMMAND; + } + virtual int direct_update_rows(ha_rows *update_rows __attribute__((unused)), + ha_rows *found_rows __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + virtual int pre_direct_update_rows() + { + return HA_ERR_WRONG_COMMAND; + } + + /* Perform initialization for a direct delete request */ +public: + int ha_direct_delete_rows(ha_rows *delete_rows); + virtual int direct_delete_rows_init() + { + return HA_ERR_WRONG_COMMAND; + } +private: + virtual int pre_direct_delete_rows_init() + { + return HA_ERR_WRONG_COMMAND; + } + virtual int direct_delete_rows(ha_rows *delete_rows __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + virtual int pre_direct_delete_rows() + { + return HA_ERR_WRONG_COMMAND; + } + + /** + Reset state of file to after 'open'. + This function is called after every statement for all tables used + by that statement. + */ + virtual int reset() { return 0; } + virtual Table_flags table_flags(void) const= 0; + /** + Is not invoked for non-transactional temporary tables. + + Tells the storage engine that we intend to read or write data + from the table. This call is prefixed with a call to handler::store_lock() + and is invoked only for those handler instances that stored the lock. + + Calls to rnd_init/index_init are prefixed with this call. When table + IO is complete, we call external_lock(F_UNLCK). + A storage engine writer should expect that each call to + ::external_lock(F_[RD|WR]LOCK is followed by a call to + ::external_lock(F_UNLCK). If it is not, it is a bug in MySQL. + + The name and signature originate from the first implementation + in MyISAM, which would call fcntl to set/clear an advisory + lock on the data file in this method. + + @param lock_type F_RDLCK, F_WRLCK, F_UNLCK + + @return non-0 in case of failure, 0 in case of success. + When lock_type is F_UNLCK, the return value is ignored. + */ + virtual int external_lock(THD *thd __attribute__((unused)), + int lock_type __attribute__((unused))) + { + return 0; + } + virtual void release_auto_increment() { return; }; + /** admin commands - called from mysql_admin_table */ + virtual int check_for_upgrade(HA_CHECK_OPT *check_opt) + { return 0; } + virtual int check(THD* thd, HA_CHECK_OPT* check_opt) + { return HA_ADMIN_NOT_IMPLEMENTED; } + + /** + In this method check_opt can be modified + to specify CHECK option to use to call check() + upon the table. + */ + virtual int repair(THD* thd, HA_CHECK_OPT* check_opt) + { + DBUG_ASSERT(!(ha_table_flags() & HA_CAN_REPAIR)); + return HA_ADMIN_NOT_IMPLEMENTED; + } +protected: + virtual void start_bulk_insert(ha_rows rows, uint flags) {} + virtual int end_bulk_insert() { return 0; } + virtual int index_read(uchar * buf, const uchar * key, uint key_len, + enum ha_rkey_function find_flag) + { return HA_ERR_WRONG_COMMAND; } + virtual int index_read_last(uchar * buf, const uchar * key, uint key_len) + { + my_errno= HA_ERR_WRONG_COMMAND; + return HA_ERR_WRONG_COMMAND; + } + friend class ha_partition; + friend class ha_sequence; +public: + /** + This method is similar to update_row, however the handler doesn't need + to execute the updates at this point in time. The handler can be certain + that another call to bulk_update_row will occur OR a call to + exec_bulk_update before the set of updates in this query is concluded. + + @param old_data Old record + @param new_data New record + @param dup_key_found Number of duplicate keys found + + @retval 0 Bulk delete used by handler + @retval 1 Bulk delete not used, normal operation used + */ + virtual int bulk_update_row(const uchar *old_data, const uchar *new_data, + ha_rows *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /** + This is called to delete all rows in a table + If the handler don't support this, then this function will + return HA_ERR_WRONG_COMMAND and MySQL will delete the rows one + by one. + */ + virtual int delete_all_rows() + { return (my_errno=HA_ERR_WRONG_COMMAND); } + /** + Quickly remove all rows from a table. + + @remark This method is responsible for implementing MySQL's TRUNCATE + TABLE statement, which is a DDL operation. As such, a engine + can bypass certain integrity checks and in some cases avoid + fine-grained locking (e.g. row locks) which would normally be + required for a DELETE statement. + + @remark Typically, truncate is not used if it can result in integrity + violation. For example, truncate is not used when a foreign + key references the table, but it might be used if foreign key + checks are disabled. + + @remark Engine is responsible for resetting the auto-increment counter. + + @remark The table is locked in exclusive mode. + */ + virtual int truncate() + { + int error= delete_all_rows(); + return error ? error : reset_auto_increment(0); + } + /** + Reset the auto-increment counter to the given value, i.e. the next row + inserted will get the given value. + */ + virtual int reset_auto_increment(ulonglong value) + { return 0; } + virtual int optimize(THD* thd, HA_CHECK_OPT* check_opt) + { return HA_ADMIN_NOT_IMPLEMENTED; } + virtual int analyze(THD* thd, HA_CHECK_OPT* check_opt) + { return HA_ADMIN_NOT_IMPLEMENTED; } + virtual bool check_and_repair(THD *thd) { return TRUE; } + virtual int disable_indexes(uint mode) { return HA_ERR_WRONG_COMMAND; } + virtual int enable_indexes(uint mode) { return HA_ERR_WRONG_COMMAND; } + virtual int discard_or_import_tablespace(my_bool discard) + { return (my_errno=HA_ERR_WRONG_COMMAND); } + virtual void drop_table(const char *name); + virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0; + + virtual int create_partitioning_metadata(const char *name, + const char *old_name, + chf_create_flags action_flag) + { return FALSE; } + + virtual int change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong * const copied, + ulonglong * const deleted, + const uchar *pack_frm_data, + size_t pack_frm_len) + { return HA_ERR_WRONG_COMMAND; } + /* @return true if it's necessary to switch current statement log format from + STATEMENT to ROW if binary log format is MIXED and autoincrement values + are changed in the statement */ + virtual bool autoinc_lock_mode_stmt_unsafe() const + { return false; } + virtual int drop_partitions(const char *path) + { return HA_ERR_WRONG_COMMAND; } + virtual int rename_partitions(const char *path) + { return HA_ERR_WRONG_COMMAND; } + virtual bool set_ha_share_ref(Handler_share **arg_ha_share) + { + DBUG_ASSERT(!ha_share); + DBUG_ASSERT(arg_ha_share); + if (ha_share || !arg_ha_share) + return true; + ha_share= arg_ha_share; + return false; + } + void set_table(TABLE* table_arg) { table= table_arg; } + int get_lock_type() const { return m_lock_type; } +public: + /* XXX to be removed, see ha_partition::partition_ht() */ + virtual handlerton *partition_ht() const + { return ht; } + virtual bool partition_engine() { return 0;} + inline int ha_write_tmp_row(uchar *buf); + inline int ha_delete_tmp_row(uchar *buf); + inline int ha_update_tmp_row(const uchar * old_data, uchar * new_data); + + virtual void set_lock_type(enum thr_lock_type lock); + friend check_result_t handler_index_cond_check(void* h_arg); + friend check_result_t handler_rowid_filter_check(void *h_arg); + + /** + Find unique record by index or unique constrain + + @param record record to find (also will be fillded with + actual record fields) + @param unique_ref index or unique constraiun number (depends + on what used in the engine + + @retval -1 Error + @retval 1 Not found + @retval 0 Found + */ + virtual int find_unique_row(uchar *record, uint unique_ref) + { return -1; /*unsupported */} + + bool native_versioned() const + { DBUG_ASSERT(ht); return partition_ht()->flags & HTON_NATIVE_SYS_VERSIONING; } + virtual void update_partition(uint part_id) + {} + + /** + Some engines can perform column type conversion with ALGORITHM=INPLACE. + These functions check for such possibility. + Implementation could be based on Field_xxx::is_equal() + */ + virtual bool can_convert_nocopy(const Field &, + const Column_definition &) const + { + return false; + } + /* If the table is using sql level unique constraints on some column */ + inline bool has_long_unique(); + + /* Used for ALTER TABLE. + Some engines can handle some differences in indexes by themself. */ + virtual Compare_keys compare_key_parts(const Field &old_field, + const Column_definition &new_field, + const KEY_PART_INFO &old_part, + const KEY_PART_INFO &new_part) const; + + +/* + If lower_case_table_names == 2 (case-preserving but case-insensitive + file system) and the storage is not HA_FILE_BASED, we need to provide + a lowercase file name for the engine. +*/ + inline bool needs_lower_case_filenames() + { + return (lower_case_table_names == 2 && !(ha_table_flags() & HA_FILE_BASED)); + } + + bool log_not_redoable_operation(const char *operation); + +protected: + Handler_share *get_ha_share_ptr(); + void set_ha_share_ptr(Handler_share *arg_ha_share); + void lock_shared_ha_data(); + void unlock_shared_ha_data(); +}; + +#include "multi_range_read.h" +#include "group_by_handler.h" + +bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno); + + /* Some extern variables used with handlers */ + +extern const LEX_CSTRING ha_row_type[]; +extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[]; +extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[]; +extern TYPELIB tx_isolation_typelib; +extern const char *myisam_stats_method_names[]; +extern ulong total_ha, total_ha_2pc; + +/* lookups */ +plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name, bool tmp_table); +plugin_ref ha_lock_engine(THD *thd, const handlerton *hton); +handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type); +handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, + handlerton *db_type); +handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute); + +static inline handlerton *ha_checktype(THD *thd, enum legacy_db_type type, + bool no_substitute = 0) +{ + return ha_checktype(thd, ha_resolve_by_legacy_type(thd, type), no_substitute); +} + +static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type) +{ + return (db_type == NULL) ? DB_TYPE_UNKNOWN : db_type->db_type; +} + +static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type) +{ + return (db_type == NULL ? "UNKNOWN" : + db_type == view_pseudo_hton ? "VIEW" : hton_name(db_type)->str); +} + +static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag) +{ + return db_type && (db_type->flags & flag); +} + +static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) +{ + return db_type && db_type->create; +} + +/* basic stuff */ +int ha_init_errors(void); +int ha_init(void); +int ha_end(void); +int ha_initialize_handlerton(st_plugin_int *plugin); +int ha_finalize_handlerton(st_plugin_int *plugin); + +TYPELIB *ha_known_exts(void); +int ha_panic(enum ha_panic_function flag); +void ha_close_connection(THD* thd); +void ha_kill_query(THD* thd, enum thd_kill_levels level); +void ha_signal_ddl_recovery_done(); +bool ha_flush_logs(); +void ha_drop_database(const char* path); +void ha_checkpoint_state(bool disable); +void ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *)); +int ha_create_table(THD *thd, const char *path, const char *db, + const char *table_name, HA_CREATE_INFO *create_info, + LEX_CUSTRING *frm, bool skip_frm_file); +int ha_delete_table(THD *thd, handlerton *db_type, const char *path, + const LEX_CSTRING *db, const LEX_CSTRING *alias, + bool generate_warning); +int ha_delete_table_force(THD *thd, const char *path, const LEX_CSTRING *db, + const LEX_CSTRING *alias); + +void ha_prepare_for_backup(); +void ha_end_backup(); +void ha_pre_shutdown(); + +void ha_disable_internal_writes(bool disable); + +/* statistics and info */ +bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat); + +/* discovery */ +#ifdef MYSQL_SERVER +class Discovered_table_list: public handlerton::discovered_list +{ + THD *thd; + const char *wild, *wend; + bool with_temps; // whether to include temp tables in the result +public: + Dynamic_array *tables; + + Discovered_table_list(THD *thd_arg, Dynamic_array *tables_arg, + const LEX_CSTRING *wild_arg); + Discovered_table_list(THD *thd_arg, Dynamic_array *tables_arg) + : thd(thd_arg), wild(NULL), with_temps(true), tables(tables_arg) {} + ~Discovered_table_list() = default; + + bool add_table(const char *tname, size_t tlen); + bool add_file(const char *fname); + + void sort(); + void remove_duplicates(); // assumes that the list is sorted +#ifndef DBUG_OFF + /* + Used to find unstable mtr tests querying + INFORMATION_SCHEMA.TABLES without ORDER BY. + */ + void sort_desc(); +#endif /* DBUG_OFF */ +}; + +int ha_discover_table(THD *thd, TABLE_SHARE *share); +int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp, + Discovered_table_list *result, bool reusable); +bool ha_table_exists(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, + LEX_CUSTRING *table_version= 0, + LEX_CSTRING *partition_engine_name= 0, + handlerton **hton= 0, bool *is_sequence= 0); +bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton, + const char *op); +#endif /* MYSQL_SERVER */ + +/* key cache */ +extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *); +int ha_resize_key_cache(KEY_CACHE *key_cache); +int ha_change_key_cache_param(KEY_CACHE *key_cache); +int ha_repartition_key_cache(KEY_CACHE *key_cache); +int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); + +/* transactions: interface to handlerton functions */ +int ha_start_consistent_snapshot(THD *thd); +int ha_commit_or_rollback_by_xid(XID *xid, bool commit); +int ha_commit_one_phase(THD *thd, bool all); +int ha_commit_trans(THD *thd, bool all); +int ha_rollback_trans(THD *thd, bool all); +int ha_prepare(THD *thd); +int ha_recover(HASH *commit_list, MEM_ROOT *mem_root= NULL); +uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord= NULL); + +/* transactions: these functions never call handlerton functions directly */ +int ha_enable_transaction(THD *thd, bool on); + +/* savepoints */ +int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); +bool ha_rollback_to_savepoint_can_release_mdl(THD *thd); +int ha_savepoint(THD *thd, SAVEPOINT *sv); +int ha_release_savepoint(THD *thd, SAVEPOINT *sv); +#ifdef WITH_WSREP +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +#endif + +/* these are called by storage engines */ +void trans_register_ha(THD *thd, bool all, handlerton *ht, + ulonglong trxid); + +/* + Storage engine has to assume the transaction will end up with 2pc if + - there is more than one 2pc-capable storage engine available + - in the current transaction 2pc was not disabled yet +*/ +#define trans_need_2pc(thd, all) ((total_ha_2pc > 1) && \ + !((all ? &thd->transaction.all : &thd->transaction.stmt)->no_2pc)) + +const char *get_canonical_filename(handler *file, const char *path, + char *tmp_path); +void commit_checkpoint_notify_ha(void *cookie); + +inline const LEX_CSTRING *table_case_name(HA_CREATE_INFO *info, const LEX_CSTRING *name) +{ + return ((lower_case_table_names == 2 && info->alias.str) ? &info->alias : name); +} + +typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*); +int binlog_log_row(TABLE* table, + const uchar *before_record, + const uchar *after_record, + Log_func *log_func); + +/** + @def MYSQL_TABLE_IO_WAIT + Instrumentation helper for table io_waits. + Note that this helper is intended to be used from + within the handler class only, as it uses members + from @c handler + Performance schema events are instrumented as follows: + - in non batch mode, one event is generated per call + - in batch mode, the number of rows affected is saved + in @c m_psi_numrows, so that @c end_psi_batch_mode() + generates a single event for the batch. + @param OP the table operation to be performed + @param INDEX the table index used if any, or MAX_KEY. + @param PAYLOAD instrumented code to execute + @sa handler::end_psi_batch_mode. +*/ +#ifdef HAVE_PSI_TABLE_INTERFACE + #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \ + { \ + if (m_psi != NULL) \ + { \ + switch (m_psi_batch_mode) \ + { \ + case PSI_BATCH_MODE_NONE: \ + { \ + PSI_table_locker *sub_locker= NULL; \ + PSI_table_locker_state reentrant_safe_state; \ + sub_locker= PSI_TABLE_CALL(start_table_io_wait) \ + (& reentrant_safe_state, m_psi, OP, INDEX, \ + __FILE__, __LINE__); \ + PAYLOAD \ + if (sub_locker != NULL) \ + PSI_TABLE_CALL(end_table_io_wait) \ + (sub_locker, 1); \ + break; \ + } \ + case PSI_BATCH_MODE_STARTING: \ + { \ + m_psi_locker= PSI_TABLE_CALL(start_table_io_wait) \ + (& m_psi_locker_state, m_psi, OP, INDEX, \ + __FILE__, __LINE__); \ + PAYLOAD \ + if (!RESULT) \ + m_psi_numrows++; \ + m_psi_batch_mode= PSI_BATCH_MODE_STARTED; \ + break; \ + } \ + case PSI_BATCH_MODE_STARTED: \ + default: \ + { \ + DBUG_ASSERT(m_psi_batch_mode \ + == PSI_BATCH_MODE_STARTED); \ + PAYLOAD \ + if (!RESULT) \ + m_psi_numrows++; \ + break; \ + } \ + } \ + } \ + else \ + { \ + PAYLOAD \ + } \ + } +#else + #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \ + PAYLOAD +#endif + +#define TABLE_IO_WAIT(TRACKER, OP, INDEX, RESULT, PAYLOAD) \ + { \ + Exec_time_tracker *this_tracker; \ + if (unlikely((this_tracker= tracker))) \ + tracker->start_tracking(table->in_use); \ + \ + MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD); \ + \ + if (unlikely(this_tracker)) \ + tracker->stop_tracking(table->in_use); \ + } +void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag); +void print_keydup_error(TABLE *table, KEY *key, myf errflag); + +int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info); +int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table); +uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info); +bool non_existing_table_error(int error); +uint ha_count_rw_2pc(THD *thd, bool all); +uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, + bool all); + +#endif /* HANDLER_INCLUDED */ diff --git a/sql/hash_filo.cc b/sql/hash_filo.cc new file mode 100644 index 00000000..085c12f1 --- /dev/null +++ b/sql/hash_filo.cc @@ -0,0 +1,33 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* +** A class for static sized hash tables where old entries are deleted according +** to usage. +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "hash_filo.h" + +#ifdef _WIN32 +// Remove linker warning 4221 about empty file +namespace { char dummy; }; +#endif // _WIN32 diff --git a/sql/hash_filo.h b/sql/hash_filo.h new file mode 100644 index 00000000..4dba104f --- /dev/null +++ b/sql/hash_filo.h @@ -0,0 +1,214 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* +** A class for static sized hash tables where old entries are deleted in +** first-in-last-out to usage. +*/ + +#ifndef HASH_FILO_H +#define HASH_FILO_H + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class interface */ +#endif + +#include "hash.h" /* my_hash_get_key, my_hash_free_key, HASH */ +#include "m_string.h" /* bzero */ +#include "mysqld.h" /* key_hash_filo_lock */ + +class hash_filo_element +{ +private: + hash_filo_element *next_used,*prev_used; + public: + hash_filo_element() = default; + hash_filo_element *next() + { return next_used; } + hash_filo_element *prev() + { return prev_used; } + + friend class hash_filo; +}; + + +class hash_filo +{ +private: + PSI_memory_key m_psi_key; + const uint key_offset, key_length; + const my_hash_get_key get_key; + /** Size of this hash table. */ + uint m_size; + my_hash_free_key free_element; + bool init; + CHARSET_INFO *hash_charset; + + hash_filo_element *first_link,*last_link; +public: + mysql_mutex_t lock; + HASH cache; + + hash_filo(PSI_memory_key psi_key, uint size_arg, uint key_offset_arg, + uint key_length_arg, my_hash_get_key get_key_arg, + my_hash_free_key free_element_arg, CHARSET_INFO *hash_charset_arg) + : m_psi_key(psi_key), key_offset(key_offset_arg), + key_length(key_length_arg), get_key(get_key_arg), m_size(size_arg), + free_element(free_element_arg),init(0), hash_charset(hash_charset_arg), + first_link(NULL), last_link(NULL) + { + bzero((char*) &cache,sizeof(cache)); + } + + ~hash_filo() + { + if (init) + { + if (cache.array.buffer) /* Avoid problems with thread library */ + (void) my_hash_free(&cache); + mysql_mutex_destroy(&lock); + } + } + void clear(bool locked=0) + { + if (!init) + { + init=1; + mysql_mutex_init(key_hash_filo_lock, &lock, MY_MUTEX_INIT_FAST); + } + if (!locked) + mysql_mutex_lock(&lock); + first_link= NULL; + last_link= NULL; + (void) my_hash_free(&cache); + (void) my_hash_init(m_psi_key, &cache,hash_charset,m_size,key_offset, + key_length, get_key, free_element, 0); + if (!locked) + mysql_mutex_unlock(&lock); + } + + hash_filo_element *first() + { + mysql_mutex_assert_owner(&lock); + return first_link; + } + + hash_filo_element *last() + { + mysql_mutex_assert_owner(&lock); + return last_link; + } + + hash_filo_element *search(uchar* key, size_t length) + { + mysql_mutex_assert_owner(&lock); + + hash_filo_element *entry=(hash_filo_element*) + my_hash_search(&cache,(uchar*) key,length); + if (entry) + { // Found; link it first + DBUG_ASSERT(first_link != NULL); + DBUG_ASSERT(last_link != NULL); + if (entry != first_link) + { // Relink used-chain + if (entry == last_link) + { + last_link= last_link->prev_used; + /* + The list must have at least 2 elements, + otherwise entry would be equal to first_link. + */ + DBUG_ASSERT(last_link != NULL); + last_link->next_used= NULL; + } + else + { + DBUG_ASSERT(entry->next_used != NULL); + DBUG_ASSERT(entry->prev_used != NULL); + entry->next_used->prev_used = entry->prev_used; + entry->prev_used->next_used = entry->next_used; + } + entry->prev_used= NULL; + entry->next_used= first_link; + + first_link->prev_used= entry; + first_link=entry; + } + } + return entry; + } + + bool add(hash_filo_element *entry) + { + if (!m_size) return 1; + if (cache.records == m_size) + { + hash_filo_element *tmp=last_link; + last_link= last_link->prev_used; + if (last_link != NULL) + { + last_link->next_used= NULL; + } + else + { + /* Pathological case, m_size == 1 */ + first_link= NULL; + } + my_hash_delete(&cache,(uchar*) tmp); + } + if (my_hash_insert(&cache,(uchar*) entry)) + { + if (free_element) + (*free_element)(entry); // This should never happen + return 1; + } + entry->prev_used= NULL; + entry->next_used= first_link; + if (first_link != NULL) + first_link->prev_used= entry; + else + last_link= entry; + first_link= entry; + + return 0; + } + + uint size() + { return m_size; } + + void resize(uint new_size) + { + mysql_mutex_lock(&lock); + m_size= new_size; + clear(true); + mysql_mutex_unlock(&lock); + } +}; + +template class Hash_filo: public hash_filo +{ +public: + Hash_filo(PSI_memory_key psi_key, uint size_arg, uint key_offset_arg, uint + key_length_arg, my_hash_get_key get_key_arg, my_hash_free_key + free_element_arg, CHARSET_INFO *hash_charset_arg) : + hash_filo(psi_key, size_arg, key_offset_arg, key_length_arg, + get_key_arg, free_element_arg, hash_charset_arg) {} + T* first() { return (T*)hash_filo::first(); } + T* last() { return (T*)hash_filo::last(); } + T* search(uchar* key, size_t len) { return (T*)hash_filo::search(key, len); } +}; + +#endif diff --git a/sql/hostname.cc b/sql/hostname.cc new file mode 100644 index 00000000..d5f342b9 --- /dev/null +++ b/sql/hostname.cc @@ -0,0 +1,986 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2014, SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Get hostname for an IP address. + + Hostnames are checked with reverse name lookup and checked that they + doesn't resemble an IP address. +*/ +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" // SPECIAL_NO_HOST_CACHE +#include "hostname.h" +#ifndef _WIN32 +#include // getservbyname, servent +#endif +#include "hash_filo.h" +#include +#include "log.h" // sql_print_warning, + // sql_print_information +#include "violite.h" // vio_getnameinfo, + // vio_get_normalized_ip_string +#ifdef __cplusplus +extern "C" { // Because of SCO 3.2V4.2 +#endif +#if !defined( _WIN32) +#ifdef HAVE_SYS_UN_H +#include +#endif +#include +#endif // _WIN32 +#ifdef __cplusplus +} +#endif + +Host_errors::Host_errors() +: m_connect(0), + m_host_blocked(0), + m_nameinfo_transient(0), + m_nameinfo_permanent(0), + m_format(0), + m_addrinfo_transient(0), + m_addrinfo_permanent(0), + m_FCrDNS(0), + m_host_acl(0), + m_no_auth_plugin(0), + m_auth_plugin(0), + m_handshake(0), + m_proxy_user(0), + m_proxy_user_acl(0), + m_authentication(0), + m_ssl(0), + m_max_user_connection(0), + m_max_user_connection_per_hour(0), + m_default_database(0), + m_init_connect(0), + m_local(0) +{} + +Host_errors::~Host_errors() = default; + +void Host_errors::reset() +{ + m_connect= 0; + m_host_blocked= 0; + m_nameinfo_transient= 0; + m_nameinfo_permanent= 0; + m_format= 0; + m_addrinfo_transient= 0; + m_addrinfo_permanent= 0; + m_FCrDNS= 0; + m_host_acl= 0; + m_no_auth_plugin= 0; + m_auth_plugin= 0; + m_handshake= 0; + m_proxy_user= 0; + m_proxy_user_acl= 0; + m_authentication= 0; + m_ssl= 0; + m_max_user_connection= 0; + m_max_user_connection_per_hour= 0; + m_default_database= 0; + m_init_connect= 0; + m_local= 0; +} + +void Host_errors::aggregate(const Host_errors *errors) +{ + m_connect+= errors->m_connect; + m_host_blocked+= errors->m_host_blocked; + m_nameinfo_transient+= errors->m_nameinfo_transient; + m_nameinfo_permanent+= errors->m_nameinfo_permanent; + m_format+= errors->m_format; + m_addrinfo_transient+= errors->m_addrinfo_transient; + m_addrinfo_permanent+= errors->m_addrinfo_permanent; + m_FCrDNS+= errors->m_FCrDNS; + m_host_acl+= errors->m_host_acl; + m_no_auth_plugin+= errors->m_no_auth_plugin; + m_auth_plugin+= errors->m_auth_plugin; + m_handshake+= errors->m_handshake; + m_proxy_user+= errors->m_proxy_user; + m_proxy_user_acl+= errors->m_proxy_user_acl; + m_authentication+= errors->m_authentication; + m_ssl+= errors->m_ssl; + m_max_user_connection+= errors->m_max_user_connection; + m_max_user_connection_per_hour+= errors->m_max_user_connection_per_hour; + m_default_database+= errors->m_default_database; + m_init_connect+= errors->m_init_connect; + m_local+= errors->m_local; +} + +static Hash_filo *hostname_cache; +ulong host_cache_size; + +void hostname_cache_refresh() +{ + hostname_cache->clear(); +} + +uint hostname_cache_size() +{ + return hostname_cache->size(); +} + +void hostname_cache_resize(uint size) +{ + hostname_cache->resize(size); +} + +bool hostname_cache_init() +{ + Host_entry tmp; + uint key_offset= (uint) ((char*) (&tmp.ip_key) - (char*) &tmp); + + if (!(hostname_cache= new Hash_filo(key_memory_host_cache_hostname, + host_cache_size, key_offset, HOST_ENTRY_KEY_SIZE, + NULL, (my_hash_free_key) my_free, &my_charset_bin))) + return 1; + + hostname_cache->clear(); + + return 0; +} + +void hostname_cache_free() +{ + delete hostname_cache; + hostname_cache= NULL; +} + +void hostname_cache_lock() +{ + mysql_mutex_lock(&hostname_cache->lock); +} + +void hostname_cache_unlock() +{ + mysql_mutex_unlock(&hostname_cache->lock); +} + +static void prepare_hostname_cache_key(const char *ip_string, + char *ip_key) +{ + size_t ip_string_length= strlen(ip_string); + DBUG_ASSERT(ip_string_length < HOST_ENTRY_KEY_SIZE); + + memset(ip_key, 0, HOST_ENTRY_KEY_SIZE); + memcpy(ip_key, ip_string, ip_string_length); +} + +Host_entry *hostname_cache_first() +{ return hostname_cache->first(); } + +static inline Host_entry *hostname_cache_search(const char *ip_key) +{ + return hostname_cache->search((uchar *) ip_key, HOST_ENTRY_KEY_SIZE); +} + +static void add_hostname_impl(const char *ip_key, const char *hostname, + bool validated, Host_errors *errors, + ulonglong now) +{ + Host_entry *entry; + bool need_add= false; + + entry= hostname_cache_search(ip_key); + + if (likely(entry == NULL)) + { + entry= (Host_entry *) my_malloc(key_memory_host_cache_hostname, + sizeof (Host_entry), 0); + if (entry == NULL) + return; + + need_add= true; + memcpy(&entry->ip_key, ip_key, HOST_ENTRY_KEY_SIZE); + entry->m_errors.reset(); + entry->m_hostname_length= 0; + entry->m_host_validated= false; + entry->m_first_seen= now; + entry->m_last_seen= now; + entry->m_first_error_seen= 0; + entry->m_last_error_seen= 0; + } + else + { + entry->m_last_seen= now; + } + + if (validated) + { + if (hostname != NULL) + { + size_t len= strlen(hostname); + if (len > sizeof(entry->m_hostname) - 1) + len= sizeof(entry->m_hostname) - 1; + memcpy(entry->m_hostname, hostname, len); + entry->m_hostname[len]= '\0'; + entry->m_hostname_length= (uint)len; + + DBUG_PRINT("info", + ("Adding/Updating '%s' -> '%s' (validated) to the hostname cache...'", + (const char *) ip_key, + (const char *) entry->m_hostname)); + } + else + { + entry->m_hostname_length= 0; + DBUG_PRINT("info", + ("Adding/Updating '%s' -> NULL (validated) to the hostname cache...'", + (const char *) ip_key)); + } + entry->m_host_validated= true; + /* + New errors that are considered 'blocking', + that will eventually cause the IP to be black listed and blocked. + */ + errors->sum_connect_errors(); + } + else + { + entry->m_hostname_length= 0; + entry->m_host_validated= false; + /* Do not count new blocking errors during DNS failures. */ + errors->clear_connect_errors(); + DBUG_PRINT("info", + ("Adding/Updating '%s' -> NULL (not validated) to the hostname cache...'", + (const char *) ip_key)); + } + + if (errors->has_error()) + entry->set_error_timestamps(now); + + entry->m_errors.aggregate(errors); + + if (need_add) + hostname_cache->add(entry); + + return; +} + +static void add_hostname(const char *ip_key, const char *hostname, + bool validated, Host_errors *errors) +{ + if (specialflag & SPECIAL_NO_HOST_CACHE) + return; + + ulonglong now= my_hrtime().val; + + mysql_mutex_lock(&hostname_cache->lock); + + add_hostname_impl(ip_key, hostname, validated, errors, now); + + mysql_mutex_unlock(&hostname_cache->lock); + + return; +} + +void inc_host_errors(const char *ip_string, Host_errors *errors) +{ + if (!ip_string) + return; + + ulonglong now= my_hrtime().val; + char ip_key[HOST_ENTRY_KEY_SIZE]; + prepare_hostname_cache_key(ip_string, ip_key); + + mysql_mutex_lock(&hostname_cache->lock); + + Host_entry *entry= hostname_cache_search(ip_key); + + if (entry) + { + if (entry->m_host_validated) + errors->sum_connect_errors(); + else + errors->clear_connect_errors(); + + entry->m_errors.aggregate(errors); + entry->set_error_timestamps(now); + } + + mysql_mutex_unlock(&hostname_cache->lock); +} + +void reset_host_connect_errors(const char *ip_string) +{ + if (!ip_string) + return; + + char ip_key[HOST_ENTRY_KEY_SIZE]; + prepare_hostname_cache_key(ip_string, ip_key); + + mysql_mutex_lock(&hostname_cache->lock); + + Host_entry *entry= hostname_cache_search(ip_key); + + if (entry) + entry->m_errors.clear_connect_errors(); + + mysql_mutex_unlock(&hostname_cache->lock); +} + +static inline bool is_ip_loopback(const struct sockaddr *ip) +{ + switch (ip->sa_family) { + case AF_INET: + { + /* Check for IPv4 127.0.0.1. */ + struct in_addr *ip4= &((struct sockaddr_in *) ip)->sin_addr; + return ntohl(ip4->s_addr) == INADDR_LOOPBACK; + } + +#ifdef HAVE_IPV6 + case AF_INET6: + { + /* Check for IPv6 ::1. */ + struct in6_addr *ip6= &((struct sockaddr_in6 *) ip)->sin6_addr; + return IN6_IS_ADDR_LOOPBACK(ip6); + } +#endif /* HAVE_IPV6 */ + + default: + return FALSE; + } +} + +static inline bool is_hostname_valid(const char *hostname) +{ + /* + A hostname is invalid if it starts with a number followed by a dot + (IPv4 address). + */ + + if (!my_isdigit(&my_charset_latin1, hostname[0])) + return TRUE; + + const char *p= hostname + 1; + + while (my_isdigit(&my_charset_latin1, *p)) + ++p; + + return *p != '.'; +} + +/** + Resolve IP-address to host name. + + This function does the following things: + - resolves IP-address; + - employs Forward Confirmed Reverse DNS technique to validate IP-address; + - returns host name if IP-address is validated; + - set value to out-variable connect_errors -- this variable represents the + number of connection errors from the specified IP-address. + - update the host_cache statistics + + NOTE: connect_errors are counted (are supported) only for the clients + where IP-address can be resolved and FCrDNS check is passed. + + @param [in] ip_storage IP address (sockaddr). Must be set. + @param [in] ip_string IP address (string). Must be set. + @param [out] hostname + @param [out] connect_errors + + @return Error status + @retval 0 Success + @retval RC_BLOCKED_HOST The host is blocked. + + The function does not set/report MySQL server error in case of failure. + It's caller's responsibility to handle failures of this function + properly. +*/ + +int ip_to_hostname(struct sockaddr_storage *ip_storage, + const char *ip_string, + const char **hostname, + uint *connect_errors) +{ + const struct sockaddr *ip= (const sockaddr *) ip_storage; + int err_code; + bool err_status __attribute__((unused)); + Host_errors errors; + + DBUG_ENTER("ip_to_hostname"); + DBUG_PRINT("info", ("IP address: '%s'; family: %d.", + (const char *) ip_string, + (int) ip->sa_family)); + + /* Default output values, for most cases. */ + *hostname= NULL; + *connect_errors= 0; + + /* Check if we have loopback address (127.0.0.1 or ::1). */ + + if (is_ip_loopback(ip)) + { + DBUG_PRINT("info", ("Loopback address detected.")); + + /* Do not count connect errors from localhost. */ + *hostname= my_localhost; + + DBUG_RETURN(0); + } + + /* Prepare host name cache key. */ + + char ip_key[HOST_ENTRY_KEY_SIZE]; + prepare_hostname_cache_key(ip_string, ip_key); + + /* Check first if we have host name in the cache. */ + + if (!(specialflag & SPECIAL_NO_HOST_CACHE)) + { + ulonglong now= my_hrtime().val; + + mysql_mutex_lock(&hostname_cache->lock); + + Host_entry *entry= hostname_cache_search(ip_key); + + if (entry) + { + entry->m_last_seen= now; + *connect_errors= entry->m_errors.m_connect; + + if (unlikely(entry->m_errors.m_connect >= max_connect_errors)) + { + entry->m_errors.m_host_blocked++; + entry->set_error_timestamps(now); + mysql_mutex_unlock(&hostname_cache->lock); + DBUG_RETURN(RC_BLOCKED_HOST); + } + + /* + If there is an IP -> HOSTNAME association in the cache, + but for a hostname that was not validated, + do not return that hostname: perform the network validation again. + */ + if (entry->m_host_validated) + { + if (entry->m_hostname_length) + *hostname= my_strdup(key_memory_host_cache_hostname, + entry->m_hostname, MYF(0)); + + DBUG_PRINT("info",("IP (%s) has been found in the cache. " + "Hostname: '%s'", + (const char *) ip_key, + (const char *) (*hostname? *hostname : "null") + )); + + mysql_mutex_unlock(&hostname_cache->lock); + + DBUG_RETURN(0); + } + } + + mysql_mutex_unlock(&hostname_cache->lock); + } + + /* + Resolve host name. Return an error if a host name can not be resolved + (instead of returning the numeric form of the host name). + */ + + char hostname_buffer[NI_MAXHOST]; + + DBUG_PRINT("info", ("Resolving '%s'...", (const char *) ip_key)); + + err_code= vio_getnameinfo(ip, hostname_buffer, NI_MAXHOST, NULL, 0, + NI_NAMEREQD); + + /* + =========================================================================== + DEBUG code only (begin) + Simulate various output from vio_getnameinfo(). + =========================================================================== + */ + + DBUG_EXECUTE_IF("getnameinfo_error_noname", + { + strcpy(hostname_buffer, ""); + err_code= EAI_NONAME; + } + ); + + DBUG_EXECUTE_IF("getnameinfo_error_again", + { + strcpy(hostname_buffer, ""); + err_code= EAI_AGAIN; + } + ); + + DBUG_EXECUTE_IF("getnameinfo_fake_ipv4", + { + strcpy(hostname_buffer, "santa.claus.ipv4.example.com"); + err_code= 0; + } + ); + + DBUG_EXECUTE_IF("getnameinfo_fake_ipv6", + { + strcpy(hostname_buffer, "santa.claus.ipv6.example.com"); + err_code= 0; + } + ); + + DBUG_EXECUTE_IF("getnameinfo_format_ipv4", + { + strcpy(hostname_buffer, "12.12.12.12"); + err_code= 0; + } + ); + + DBUG_EXECUTE_IF("getnameinfo_format_ipv6", + { + strcpy(hostname_buffer, "12:DEAD:BEEF:0"); + err_code= 0; + } + ); + + DBUG_EXECUTE_IF("getnameinfo_fake_long_host", + { + strcpy(hostname_buffer, "host5678901_345678902_345678903_345678904_345678905_345678906_345678907_345678908_345678909_345678910_345678911_345678912_345678913_345678914_345678915_345678916_345678917_345678918_345678919_345678920_345678921_345678922_345678923_345678924_345678925_345"); + err_code= 0; + } + ); + + /* + =========================================================================== + DEBUG code only (end) + =========================================================================== + */ + + if (err_code) + { + // NOTE: gai_strerror() returns a string ending by a dot. + + DBUG_PRINT("error", ("IP address '%s' could not be resolved: %s", + (const char *) ip_key, + (const char *) gai_strerror(err_code))); + + sql_print_warning("IP address '%s' could not be resolved: %s", + (const char *) ip_key, + (const char *) gai_strerror(err_code)); + + bool validated; + if (vio_is_no_name_error(err_code)) + { + /* + The no-name error means that there is no reverse address mapping + for the IP address. A host name can not be resolved. + */ + errors.m_nameinfo_permanent= 1; + validated= true; + } + else + { + /* + If it is not the no-name error, we should not cache the hostname + (or rather its absence), because the failure might be transient. + Only the ip error statistics are cached. + */ + errors.m_nameinfo_transient= 1; + validated= false; + } + add_hostname(ip_key, NULL, validated, &errors); + + DBUG_RETURN(0); + } + + DBUG_PRINT("info", ("IP '%s' resolved to '%s'.", + (const char *) ip_key, + (const char *) hostname_buffer)); + + /* + Validate hostname: the server does not accept host names, which + resemble IP addresses. + + The thing is that theoretically, a host name can be in a form of IPv4 + address (123.example.org, or 1.2 or even 1.2.3.4). We have to deny such + host names because ACL-systems is not designed to work with them. + + For example, it is possible to specify a host name mask (like + 192.168.1.%) for an ACL rule. Then, if IPv4-like hostnames are allowed, + there is a security hole: instead of allowing access for + 192.168.1.0/255 network (which was assumed by the user), the access + will be allowed for host names like 192.168.1.example.org. + */ + + if (!is_hostname_valid(hostname_buffer)) + { + DBUG_PRINT("error", ("IP address '%s' has been resolved " + "to the host name '%s', which resembles " + "IPv4-address itself.", + (const char *) ip_key, + (const char *) hostname_buffer)); + + sql_print_warning("IP address '%s' has been resolved " + "to the host name '%s', which resembles " + "IPv4-address itself.", + (const char *) ip_key, + (const char *) hostname_buffer); + + errors.m_format= 1; + add_hostname(ip_key, hostname_buffer, false, &errors); + + DBUG_RETURN(false); + } + + /* Get IP-addresses for the resolved host name (FCrDNS technique). */ + + struct addrinfo hints; + struct addrinfo *addr_info_list; + /* + Makes fault injection with DBUG_EXECUTE_IF easier. + Invoking free_addr_info(NULL) crashes on some platforms. + */ + bool free_addr_info_list= false; + + memset(&hints, 0, sizeof (struct addrinfo)); + hints.ai_flags= AI_PASSIVE; + hints.ai_socktype= SOCK_STREAM; + hints.ai_family= AF_UNSPEC; + + DBUG_PRINT("info", ("Getting IP addresses for hostname '%s'...", + (const char *) hostname_buffer)); + + err_code= getaddrinfo(hostname_buffer, NULL, &hints, &addr_info_list); + if (err_code == 0) + free_addr_info_list= true; + + /* + =========================================================================== + DEBUG code only (begin) + Simulate various output from getaddrinfo(). + =========================================================================== + */ + DBUG_EXECUTE_IF("getaddrinfo_error_noname", + { + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + addr_info_list= NULL; + err_code= EAI_NONAME; + free_addr_info_list= false; + } + ); + + DBUG_EXECUTE_IF("getaddrinfo_error_again", + { + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + addr_info_list= NULL; + err_code= EAI_AGAIN; + free_addr_info_list= false; + } + ); + + DBUG_EXECUTE_IF("getaddrinfo_fake_bad_ipv4", + { + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + struct sockaddr_in *debug_addr; + /* + Not thread safe, which is ok. + Only one connection at a time is tested with + fault injection. + */ + static struct sockaddr_in debug_sock_addr[2]; + static struct addrinfo debug_addr_info[2]; + /* Simulating ipv4 192.0.2.126 */ + debug_addr= & debug_sock_addr[0]; + debug_addr->sin_family= AF_INET; + inet_pton(AF_INET,"192.0.2.126", &debug_addr->sin_addr); + + /* Simulating ipv4 192.0.2.127 */ + debug_addr= & debug_sock_addr[1]; + debug_addr->sin_family= AF_INET; + inet_pton(AF_INET,"192.0.2.127", &debug_addr->sin_addr); + + debug_addr_info[0].ai_addr= (struct sockaddr*) & debug_sock_addr[0]; + debug_addr_info[0].ai_addrlen= sizeof (struct sockaddr_in); + debug_addr_info[0].ai_next= & debug_addr_info[1]; + + debug_addr_info[1].ai_addr= (struct sockaddr*) & debug_sock_addr[1]; + debug_addr_info[1].ai_addrlen= sizeof (struct sockaddr_in); + debug_addr_info[1].ai_next= NULL; + + addr_info_list= & debug_addr_info[0]; + err_code= 0; + free_addr_info_list= false; + } + ); + + DBUG_EXECUTE_IF("getaddrinfo_fake_good_ipv4", + { + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + struct sockaddr_in *debug_addr; + static struct sockaddr_in debug_sock_addr[2]; + static struct addrinfo debug_addr_info[2]; + /* Simulating ipv4 192.0.2.5 */ + debug_addr= & debug_sock_addr[0]; + debug_addr->sin_family= AF_INET; + inet_pton(AF_INET,"192.0.2.5", &debug_addr->sin_addr); + + /* Simulating ipv4 192.0.2.4 */ + debug_addr= & debug_sock_addr[1]; + debug_addr->sin_family= AF_INET; + inet_pton(AF_INET,"192.0.2.4", &debug_addr->sin_addr); + + debug_addr_info[0].ai_addr= (struct sockaddr*) & debug_sock_addr[0]; + debug_addr_info[0].ai_addrlen= sizeof (struct sockaddr_in); + debug_addr_info[0].ai_next= & debug_addr_info[1]; + + debug_addr_info[1].ai_addr= (struct sockaddr*) & debug_sock_addr[1]; + debug_addr_info[1].ai_addrlen= sizeof (struct sockaddr_in); + debug_addr_info[1].ai_next= NULL; + + addr_info_list= & debug_addr_info[0]; + err_code= 0; + free_addr_info_list= false; + } + ); + +#ifdef HAVE_IPV6 + DBUG_EXECUTE_IF("getaddrinfo_fake_bad_ipv6", + { + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + struct sockaddr_in6 *debug_addr; + struct in6_addr *ip6; + /* + Not thread safe, which is ok. + Only one connection at a time is tested with + fault injection. + */ + static struct sockaddr_in6 debug_sock_addr[2]; + static struct addrinfo debug_addr_info[2]; + /* Simulating ipv6 2001:DB8::6:7E */ + debug_addr= & debug_sock_addr[0]; + debug_addr->sin6_family= AF_INET6; + ip6= & debug_addr->sin6_addr; + inet_pton(AF_INET6,"2001:DB8::6:7E",ip6); + + /* Simulating ipv6 2001:DB8::6:7F */ + debug_addr= & debug_sock_addr[1]; + debug_addr->sin6_family= AF_INET6; + ip6= & debug_addr->sin6_addr; + inet_pton(AF_INET6,"2001:DB8::6:7F",ip6); + + debug_addr_info[0].ai_addr= (struct sockaddr*) & debug_sock_addr[0]; + debug_addr_info[0].ai_addrlen= sizeof (struct sockaddr_in6); + debug_addr_info[0].ai_next= & debug_addr_info[1]; + + debug_addr_info[1].ai_addr= (struct sockaddr*) & debug_sock_addr[1]; + debug_addr_info[1].ai_addrlen= sizeof (struct sockaddr_in6); + debug_addr_info[1].ai_next= NULL; + + addr_info_list= & debug_addr_info[0]; + err_code= 0; + free_addr_info_list= false; + } + ); + + DBUG_EXECUTE_IF("getaddrinfo_fake_good_ipv6", + { + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + struct sockaddr_in6 *debug_addr; + struct in6_addr *ip6; + /* + Not thread safe, which is ok. + Only one connection at a time is tested with + fault injection. + */ + static struct sockaddr_in6 debug_sock_addr[2]; + static struct addrinfo debug_addr_info[2]; + /* Simulating ipv6 2001:DB8::6:7 */ + debug_addr= & debug_sock_addr[0]; + debug_addr->sin6_family= AF_INET6; + ip6= & debug_addr->sin6_addr; + ip6->s6_addr[ 0] = 0x20; + ip6->s6_addr[ 1] = 0x01; + ip6->s6_addr[ 2] = 0x0d; + ip6->s6_addr[ 3] = 0xb8; + ip6->s6_addr[ 4] = 0x00; + ip6->s6_addr[ 5] = 0x00; + ip6->s6_addr[ 6] = 0x00; + ip6->s6_addr[ 7] = 0x00; + ip6->s6_addr[ 8] = 0x00; + ip6->s6_addr[ 9] = 0x00; + ip6->s6_addr[10] = 0x00; + ip6->s6_addr[11] = 0x00; + ip6->s6_addr[12] = 0x00; + ip6->s6_addr[13] = 0x06; + ip6->s6_addr[14] = 0x00; + ip6->s6_addr[15] = 0x07; + + /* Simulating ipv6 2001:DB8::6:6 */ + debug_addr= & debug_sock_addr[1]; + debug_addr->sin6_family= AF_INET6; + ip6= & debug_addr->sin6_addr; + ip6->s6_addr[ 0] = 0x20; + ip6->s6_addr[ 1] = 0x01; + ip6->s6_addr[ 2] = 0x0d; + ip6->s6_addr[ 3] = 0xb8; + ip6->s6_addr[ 4] = 0x00; + ip6->s6_addr[ 5] = 0x00; + ip6->s6_addr[ 6] = 0x00; + ip6->s6_addr[ 7] = 0x00; + ip6->s6_addr[ 8] = 0x00; + ip6->s6_addr[ 9] = 0x00; + ip6->s6_addr[10] = 0x00; + ip6->s6_addr[11] = 0x00; + ip6->s6_addr[12] = 0x00; + ip6->s6_addr[13] = 0x06; + ip6->s6_addr[14] = 0x00; + ip6->s6_addr[15] = 0x06; + + debug_addr_info[0].ai_addr= (struct sockaddr*) & debug_sock_addr[0]; + debug_addr_info[0].ai_addrlen= sizeof (struct sockaddr_in6); + debug_addr_info[0].ai_next= & debug_addr_info[1]; + + debug_addr_info[1].ai_addr= (struct sockaddr*) & debug_sock_addr[1]; + debug_addr_info[1].ai_addrlen= sizeof (struct sockaddr_in6); + debug_addr_info[1].ai_next= NULL; + + addr_info_list= & debug_addr_info[0]; + err_code= 0; + free_addr_info_list= false; + } + ); +#endif /* HAVE_IPV6 */ + + /* + =========================================================================== + DEBUG code only (end) + =========================================================================== + */ + + if (err_code != 0) + { + sql_print_warning("Host name '%s' could not be resolved: %s", + (const char *) hostname_buffer, + (const char *) gai_strerror(err_code)); + + bool validated; + + if (err_code == EAI_NONAME) + { + errors.m_addrinfo_permanent= 1; + validated= true; + } + else + { + /* + Don't cache responses when the DNS server is down, as otherwise + transient DNS failure may leave any number of clients (those + that attempted to connect during the outage) unable to connect + indefinitely. + Only cache error statistics. + */ + errors.m_addrinfo_transient= 1; + validated= false; + } + add_hostname(ip_key, NULL, validated, &errors); + + DBUG_RETURN(false); + } + + /* Check that getaddrinfo() returned the used IP (FCrDNS technique). */ + + DBUG_PRINT("info", ("The following IP addresses found for '%s':", + (const char *) hostname_buffer)); + + for (struct addrinfo *addr_info= addr_info_list; + addr_info; addr_info= addr_info->ai_next) + { + char ip_buffer[HOST_ENTRY_KEY_SIZE]; + + { + err_status= + vio_get_normalized_ip_string(addr_info->ai_addr, (int)addr_info->ai_addrlen, + ip_buffer, sizeof (ip_buffer)); + DBUG_ASSERT(!err_status); + } + + DBUG_PRINT("info", (" - '%s'", (const char *) ip_buffer)); + + if (strcasecmp(ip_key, ip_buffer) == 0) + { + /* Copy host name string to be stored in the cache. */ + + *hostname= my_strdup(key_memory_host_cache_hostname, + hostname_buffer, MYF(0)); + + if (!*hostname) + { + DBUG_PRINT("error", ("Out of memory.")); + + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + DBUG_RETURN(true); + } + + break; + } + } + + /* Log resolved IP-addresses if no match was found. */ + + if (!*hostname) + { + errors.m_FCrDNS= 1; + + sql_print_warning("Hostname '%s' does not resolve to '%s'.", + (const char *) hostname_buffer, + (const char *) ip_key); + sql_print_information("Hostname '%s' has the following IP addresses:", + (const char *) hostname_buffer); + + for (struct addrinfo *addr_info= addr_info_list; + addr_info; addr_info= addr_info->ai_next) + { + char ip_buffer[HOST_ENTRY_KEY_SIZE]; + + err_status= + vio_get_normalized_ip_string(addr_info->ai_addr, (int)addr_info->ai_addrlen, + ip_buffer, sizeof (ip_buffer)); + DBUG_ASSERT(!err_status); + + sql_print_information(" - %s", (const char *) ip_buffer); + } + } + + /* Add an entry for the IP to the cache. */ + add_hostname(ip_key, *hostname, true, &errors); + + /* Free the result of getaddrinfo(). */ + if (free_addr_info_list) + freeaddrinfo(addr_info_list); + + DBUG_RETURN(false); +} diff --git a/sql/hostname.h b/sql/hostname.h new file mode 100644 index 00000000..3a371dce --- /dev/null +++ b/sql/hostname.h @@ -0,0 +1,183 @@ +/* Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef HOSTNAME_INCLUDED +#define HOSTNAME_INCLUDED + +#include "my_net.h" +#include "hash_filo.h" + +struct Host_errors +{ +public: + Host_errors(); + ~Host_errors(); + + void reset(); + void aggregate(const Host_errors *errors); + + /** Number of connect errors. */ + ulong m_connect; + + /** Number of host blocked errors. */ + ulong m_host_blocked; + /** Number of transient errors from getnameinfo(). */ + ulong m_nameinfo_transient; + /** Number of permanent errors from getnameinfo(). */ + ulong m_nameinfo_permanent; + /** Number of errors from is_hostname_valid(). */ + ulong m_format; + /** Number of transient errors from getaddrinfo(). */ + ulong m_addrinfo_transient; + /** Number of permanent errors from getaddrinfo(). */ + ulong m_addrinfo_permanent; + /** Number of errors from Forward-Confirmed reverse DNS checks. */ + ulong m_FCrDNS; + /** Number of errors from host grants. */ + ulong m_host_acl; + /** Number of errors from missing auth plugin. */ + ulong m_no_auth_plugin; + /** Number of errors from auth plugin. */ + ulong m_auth_plugin; + /** Number of errors from authentication plugins. */ + ulong m_handshake; + /** Number of errors from proxy user. */ + ulong m_proxy_user; + /** Number of errors from proxy user acl. */ + ulong m_proxy_user_acl; + /** Number of errors from authentication. */ + ulong m_authentication; + /** Number of errors from ssl. */ + ulong m_ssl; + /** Number of errors from max user connection. */ + ulong m_max_user_connection; + /** Number of errors from max user connection per hour. */ + ulong m_max_user_connection_per_hour; + /** Number of errors from the default database. */ + ulong m_default_database; + /** Number of errors from init_connect. */ + ulong m_init_connect; + /** Number of errors from the server itself. */ + ulong m_local; + + bool has_error() const + { + return ((m_host_blocked != 0) + || (m_nameinfo_transient != 0) + || (m_nameinfo_permanent != 0) + || (m_format != 0) + || (m_addrinfo_transient != 0) + || (m_addrinfo_permanent != 0) + || (m_FCrDNS != 0) + || (m_host_acl != 0) + || (m_no_auth_plugin != 0) + || (m_auth_plugin != 0) + || (m_handshake != 0) + || (m_proxy_user != 0) + || (m_proxy_user_acl != 0) + || (m_authentication != 0) + || (m_ssl != 0) + || (m_max_user_connection != 0) + || (m_max_user_connection_per_hour != 0) + || (m_default_database != 0) + || (m_init_connect != 0) + || (m_local != 0)); + } + + void sum_connect_errors() + { + /* Current (historical) behavior: */ + m_connect= m_handshake; + } + + void clear_connect_errors() + { + m_connect= 0; + } +}; + +/** Size of IP address string in the hash cache. */ +#define HOST_ENTRY_KEY_SIZE INET6_ADDRSTRLEN + +/** + An entry in the hostname hash table cache. + + Host name cache does two things: + - caches host names to save DNS look ups; + - counts errors from IP. + + Host name can be empty (that means DNS look up failed), + but errors still are counted. +*/ +class Host_entry : public hash_filo_element +{ +public: + Host_entry *next() + { return (Host_entry*) hash_filo_element::next(); } + + /** + Client IP address. This is the key used with the hash table. + + The client IP address is always expressed in IPv6, even when the + network IPv6 stack is not present. + + This IP address is never used to connect to a socket. + */ + char ip_key[HOST_ENTRY_KEY_SIZE]; + + /** + One of the host names for the IP address. May be a zero length string. + */ + char m_hostname[HOSTNAME_LENGTH + 1]; + /** Length in bytes of @c m_hostname. */ + uint m_hostname_length; + /** The hostname is validated and used for authorization. */ + bool m_host_validated; + ulonglong m_first_seen; + ulonglong m_last_seen; + ulonglong m_first_error_seen; + ulonglong m_last_error_seen; + /** Error statistics. */ + Host_errors m_errors; + + void set_error_timestamps(ulonglong now) + { + if (m_first_error_seen == 0) + m_first_error_seen= now; + m_last_error_seen= now; + } +}; + +/** The size of the host_cache. */ +extern ulong host_cache_size; + +#define RC_OK 0 +#define RC_BLOCKED_HOST 1 +int ip_to_hostname(struct sockaddr_storage *ip_storage, + const char *ip_string, + const char **hostname, uint *connect_errors); + +void inc_host_errors(const char *ip_string, Host_errors *errors); +void reset_host_connect_errors(const char *ip_string); +bool hostname_cache_init(); +void hostname_cache_free(); +void hostname_cache_refresh(void); +uint hostname_cache_size(); +void hostname_cache_resize(uint size); +void hostname_cache_lock(); +void hostname_cache_unlock(); +Host_entry *hostname_cache_first(); + +#endif /* HOSTNAME_INCLUDED */ diff --git a/sql/init.cc b/sql/init.cc new file mode 100644 index 00000000..47351787 --- /dev/null +++ b/sql/init.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Init and dummy functions for interface with unireg +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "init.h" +#include "mysqld.h" +#include "my_time.h" // my_init_time +#include "unireg.h" // SPECIAL_SAME_DB_NAME +#include + +void unireg_init(ulong options) +{ + DBUG_ENTER("unireg_init"); + + error_handler_hook = my_message_stderr; + my_disable_async_io=1; /* aioread is only in shared library */ + wild_many='%'; wild_one='_'; wild_prefix='\\'; /* Change to sql syntax */ + + current_pid=(ulong) getpid(); /* Save for later ref */ + my_init_time(); /* Init time-functions (read zone) */ + + (void) strmov(reg_ext,".frm"); + reg_ext_length= 4; + specialflag=SPECIAL_SAME_DB_NAME | options; /* Set options from argv */ + DBUG_VOID_RETURN; +} diff --git a/sql/init.h b/sql/init.h new file mode 100644 index 00000000..b9d5053f --- /dev/null +++ b/sql/init.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef INIT_INCLUDED +#define INIT_INCLUDED + +void unireg_init(ulong options); + +#endif /* INIT_INCLUDED */ diff --git a/sql/innodb_priv.h b/sql/innodb_priv.h new file mode 100644 index 00000000..bec63a83 --- /dev/null +++ b/sql/innodb_priv.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef INNODB_PRIV_INCLUDED +#define INNODB_PRIV_INCLUDED + +/** @file Declaring server-internal functions that are used by InnoDB. */ + +#include +#include /* strconvert */ + +class THD; + +int get_quote_char_for_identifier(THD *thd, const char *name, size_t length); +bool schema_table_store_record(THD *thd, TABLE *table); +void localtime_to_TIME(MYSQL_TIME *to, struct tm *from); + +void sql_print_error(const char *format, ...); + +#define thd_binlog_pos(X, Y, Z) mysql_bin_log_commit_pos(X, Z, Y) + +#endif /* INNODB_PRIV_INCLUDED */ diff --git a/sql/item.cc b/sql/item.cc new file mode 100644 index 00000000..c6d51e47 --- /dev/null +++ b/sql/item.cc @@ -0,0 +1,11025 @@ +/* + Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include +#include +#include "my_dir.h" +#include "sp_rcontext.h" +#include "sp_head.h" +#include "sql_trigger.h" +#include "sql_select.h" +#include "sql_show.h" // append_identifier +#include "sql_view.h" // VIEW_ANY_SQL +#include "sql_time.h" // str_to_datetime_with_warn, + // make_truncated_value_warning +#include "sql_acl.h" // get_column_grant, + // SELECT_ACL, UPDATE_ACL, + // INSERT_ACL, + // check_grant_column +#include "sql_base.h" // enum_resolution_type, + // REPORT_EXCEPT_NOT_FOUND, + // find_item_in_list, + // RESOLVED_AGAINST_ALIAS, ... +#include "sql_expression_cache.h" +#include "sql_lex.h" // empty_clex_str + +const String my_null_string("NULL", 4, default_charset_info); +const String my_default_string("DEFAULT", 7, default_charset_info); + +/* + item_empty_name is used when calling Item::set_name with NULL + pointer, to make it easier to use the name in printf. + item_used_name is used when calling Item::set_name with a 0 length + string. +*/ +const char *item_empty_name=""; +const char *item_used_name= "\0"; + +static int save_field_in_field(Field *, bool *, Field *, bool); +Item_bool_static *Item_false; +Item_bool_static *Item_true; + +/** + Compare two Items for List::add_unique() +*/ + +bool cmp_items(Item *a, Item *b) +{ + return a->eq(b, FALSE); +} + + +/** + Set max_sum_func_level if it is needed +*/ +inline void set_max_sum_func_level(THD *thd, SELECT_LEX *select) +{ + if (thd->lex->in_sum_func && + thd->lex->in_sum_func->nest_level >= select->nest_level) + set_if_bigger(thd->lex->in_sum_func->max_sum_func_level, + select->nest_level - 1); +} + + +MEM_ROOT *get_thd_memroot(THD *thd) +{ + return thd->mem_root; +} + +/***************************************************************************** +** Item functions +*****************************************************************************/ + +/** + Init all special items. +*/ + +void item_init(void) +{ + item_func_sleep_init(); + uuid_short_init(); +} + + +void Item::raise_error_not_evaluable() +{ + Item::Print tmp(this, QT_ORDINARY); + my_error(ER_NOT_ALLOWED_IN_THIS_CONTEXT, MYF(0), tmp.ptr()); +} + + +void Item::push_note_converted_to_negative_complement(THD *thd) +{ + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_UNKNOWN_ERROR, + "Cast to signed converted positive out-of-range integer to " + "it's negative complement"); +} + + +void Item::push_note_converted_to_positive_complement(THD *thd) +{ + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_UNKNOWN_ERROR, + "Cast to unsigned converted negative integer to it's " + "positive complement"); +} + + +longlong Item::val_datetime_packed_result(THD *thd) +{ + MYSQL_TIME ltime, tmp; + if (get_date_result(thd, <ime, Datetime::Options_cmp(thd))) + return 0; + if (ltime.time_type != MYSQL_TIMESTAMP_TIME) + return pack_time(<ime); + if ((null_value= time_to_datetime_with_warn(thd, <ime, &tmp, + TIME_CONV_NONE))) + return 0; + return pack_time(&tmp); +} + + +longlong Item::val_time_packed_result(THD *thd) +{ + MYSQL_TIME ltime; + if (get_date_result(thd, <ime, Time::Options_cmp(thd))) + return 0; + if (ltime.time_type == MYSQL_TIMESTAMP_TIME) + return pack_time(<ime); + int warn= 0; + Time tmp(&warn, <ime, 0); + DBUG_ASSERT(tmp.is_valid_time()); + return tmp.to_packed(); +} + + +/* + For the items which don't have its own fast val_str_ascii() + implementation we provide a generic slower version, + which converts from the Item character set to ASCII. + For better performance conversion happens only in + case of a "tricky" Item character set (e.g. UCS2). + Normally conversion does not happen. +*/ +String *Item::val_str_ascii(String *str) +{ + DBUG_ASSERT(str != &str_value); + + uint errors; + String *res= val_str(&str_value); + if (!res) + return 0; + + if (!(res->charset()->state & MY_CS_NONASCII)) + str= res; + else + { + if ((null_value= str->copy(res->ptr(), res->length(), collation.collation, + &my_charset_latin1, &errors))) + return 0; + } + + return str; +} + + +String *Item::val_str_ascii_revert_empty_string_is_null(THD *thd, String *str) +{ + String *res= val_str_ascii(str); + if (!res && (thd->variables.sql_mode & MODE_EMPTY_STRING_IS_NULL)) + { + null_value= false; + str->set("", 0, &my_charset_latin1); + return str; + } + return res; +} + + +String *Item::val_str(String *str, String *converter, CHARSET_INFO *cs) +{ + String *res= val_str(str); + if (null_value) + return (String *) 0; + + if (!cs) + return res; + + uint errors; + if ((null_value= converter->copy(res->ptr(), res->length(), + collation.collation, cs, &errors))) + return (String *) 0; + + return converter; +} + + +String *Item::val_string_from_real(String *str) +{ + double nr= val_real(); + if (null_value) + return 0; /* purecov: inspected */ + str->set_real(nr,decimals, &my_charset_numeric); + return str; +} + + +String *Item::val_string_from_int(String *str) +{ + longlong nr= val_int(); + if (null_value) + return 0; + str->set_int(nr, unsigned_flag, &my_charset_numeric); + return str; +} + + +longlong Item::val_int_from_str(int *error) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff), &my_charset_bin), *res; + + /* + For a string result, we must first get the string and then convert it + to a longlong + */ + if (!(res= val_str(&tmp))) + { + *error= 0; + return 0; + } + Converter_strtoll10_with_warn cnv(NULL, Warn_filter_all(), + res->charset(), res->ptr(), res->length()); + *error= cnv.error(); + return cnv.result(); +} + + +longlong Item::val_int_signed_typecast_from_str() +{ + int error; + longlong value= val_int_from_str(&error); + if (unlikely(!null_value && value < 0 && error == 0)) + push_note_converted_to_negative_complement(current_thd); + return value; +} + + +longlong Item::val_int_unsigned_typecast_from_str() +{ + int error; + longlong value= val_int_from_str(&error); + if (unlikely(!null_value && error < 0)) + push_note_converted_to_positive_complement(current_thd); + return value; +} + + +longlong Item::val_int_signed_typecast_from_real() +{ + double nr= val_real(); + if (null_value) + return 0; + Converter_double_to_longlong conv(nr, false); + if (conv.error()) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DATA_OVERFLOW, ER_THD(thd, ER_DATA_OVERFLOW), + ErrConvDouble(nr).ptr(), "SIGNED BIGINT"); + } + return conv.result(); +} + + +longlong Item::val_int_unsigned_typecast_from_real() +{ + double nr= val_real(); + if (null_value) + return 0; + Converter_double_to_longlong conv(nr, true); + if (conv.error()) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DATA_OVERFLOW, ER_THD(thd, ER_DATA_OVERFLOW), + ErrConvDouble(nr).ptr(), "UNSIGNED BIGINT"); + } + return conv.result(); +} + + +longlong Item::val_int_signed_typecast_from_int() +{ + longlong value= val_int(); + if (!null_value && unsigned_flag && value < 0) + push_note_converted_to_negative_complement(current_thd); + return value; +} + + +longlong Item::val_int_unsigned_typecast_from_int() +{ + longlong value= val_int(); + if (!null_value && unsigned_flag == 0 && value < 0) + push_note_converted_to_positive_complement(current_thd); + return value; +} + + +my_decimal *Item::val_decimal_from_real(my_decimal *decimal_value) +{ + double nr= val_real(); + if (null_value) + return 0; + double2my_decimal(E_DEC_FATAL_ERROR, nr, decimal_value); + return (decimal_value); +} + + +my_decimal *Item::val_decimal_from_int(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + longlong nr= val_int(); + if (null_value) + return 0; + int2my_decimal(E_DEC_FATAL_ERROR, nr, unsigned_flag, decimal_value); + return decimal_value; +} + + +my_decimal *Item::val_decimal_from_string(my_decimal *decimal_value) +{ + String *res; + + if (!(res= val_str(&str_value))) + return 0; + + return decimal_from_string_with_check(decimal_value, res); +} + + +int Item::save_time_in_field(Field *field, bool no_conversions) +{ + MYSQL_TIME ltime; + if (get_time(field->table->in_use, <ime)) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + return field->store_time_dec(<ime, decimals); +} + + +int Item::save_date_in_field(Field *field, bool no_conversions) +{ + MYSQL_TIME ltime; + THD *thd= field->table->in_use; + if (get_date(thd, <ime, Datetime::Options(thd))) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + return field->store_time_dec(<ime, decimals); +} + + +/* + Store the string value in field directly + + SYNOPSIS + Item::save_str_value_in_field() + field a pointer to field where to store + result the pointer to the string value to be stored + + DESCRIPTION + The method is used by Item_*::save_in_field implementations + when we don't need to calculate the value to store + See Item_string::save_in_field() implementation for example + + IMPLEMENTATION + Check if the Item is null and stores the NULL or the + result value in the field accordingly. + + RETURN + Nonzero value if error +*/ + +int Item::save_str_value_in_field(Field *field, String *result) +{ + if (null_value) + return set_field_to_null(field); + field->set_notnull(); + return field->store(result->ptr(), result->length(), + collation.collation); +} + + +Item::Item(THD *thd): + name(null_clex_str), orig_name(0), is_expensive_cache(-1) +{ + DBUG_ASSERT(thd); + base_flags= item_base_t::FIXED; + with_flags= item_with_t::NONE; + null_value= 0; + marker= MARKER_UNUSED; + + /* Initially this item is not attached to any JOIN_TAB. */ + join_tab_idx= MAX_TABLES; + + /* Put item in free list so that we can free all items at end */ + next= thd->free_list; + thd->free_list= this; + /* + Item constructor can be called during execution other then SQL_COM + command => we should check thd->lex->current_select on zero (thd->lex + can be uninitialised) + */ + if (thd->lex->current_select) + { + enum_parsing_place place= + thd->lex->current_select->parsing_place; + if (place == SELECT_LIST || place == IN_HAVING) + thd->lex->current_select->select_n_having_items++; + } +} + +/* + This is only used for static const items +*/ + +Item::Item(): + name(null_clex_str), orig_name(0), is_expensive_cache(-1) +{ + DBUG_ASSERT(!mysqld_server_started); // Created early + base_flags= item_base_t::FIXED; + with_flags= item_with_t::NONE; + null_value= 0; + marker= MARKER_UNUSED; + join_tab_idx= MAX_TABLES; +} + + +const TABLE_SHARE *Item::field_table_or_null() +{ + if (real_item()->type() != Item::FIELD_ITEM) + return NULL; + + return ((Item_field *) this)->field->table->s; +} + + +/** + Constructor used by Item_field, Item_ref & aggregate (sum) + functions. + + Used for duplicating lists in processing queries with temporary + tables. +*/ +Item::Item(THD *thd, Item *item): + Type_all_attributes(*item), + str_value(item->str_value), + name(item->name), + orig_name(item->orig_name), + base_flags(item->base_flags & ~item_base_t::FIXED), + with_flags(item->with_flags), + marker(item->marker), + null_value(item->null_value), + is_expensive_cache(-1), + join_tab_idx(item->join_tab_idx) +{ + next= thd->free_list; // Put in free list + thd->free_list= this; +} + + +void Item::print_parenthesised(String *str, enum_query_type query_type, + enum precedence parent_prec) +{ + bool need_parens= precedence() < parent_prec; + if (need_parens) + str->append('('); + print(str, query_type); + if (need_parens) + str->append(')'); +} + + +void Item::print(String *str, enum_query_type query_type) +{ + str->append(full_name_cstring()); +} + + +void Item::print_item_w_name(String *str, enum_query_type query_type) +{ + print(str, query_type); + + if (name.str) + { + DBUG_ASSERT(name.length == strlen(name.str)); + THD *thd= current_thd; + str->append(STRING_WITH_LEN(" AS ")); + append_identifier(thd, str, &name); + } +} + + +void Item::print_value(String *str) +{ + char buff[MAX_FIELD_WIDTH]; + String *ptr, tmp(buff,sizeof(buff),str->charset()); + ptr= val_str(&tmp); + if (!ptr) + str->append(NULL_clex_str); + else + { + switch (cmp_type()) { + case STRING_RESULT: + case TIME_RESULT: + append_unescaped(str, ptr->ptr(), ptr->length()); + break; + case DECIMAL_RESULT: + case REAL_RESULT: + case INT_RESULT: + str->append(*ptr); + break; + case ROW_RESULT: + DBUG_ASSERT(0); + } + } +} + + +void Item::cleanup() +{ + DBUG_ENTER("Item::cleanup"); + DBUG_PRINT("enter", ("this: %p", this)); + marker= MARKER_UNUSED; + join_tab_idx= MAX_TABLES; + if (orig_name) + { + name.str= orig_name; + name.length= strlen(orig_name); + } + DBUG_VOID_RETURN; +} + + +/** + cleanup() item if it is 'fixed'. + + @param arg a dummy parameter, is not used here +*/ + +bool Item::cleanup_processor(void *arg) +{ + if (fixed()) + cleanup(); + return FALSE; +} + + +/** + Traverse item tree possibly transforming it (replacing items). + + This function is designed to ease transformation of Item trees. + Re-execution note: every such transformation is registered for + rollback by THD::change_item_tree() and is rolled back at the end + of execution by THD::rollback_item_tree_changes(). + + Therefore: + - this function can not be used at prepared statement prepare + (in particular, in fix_fields!), as only permanent + transformation of Item trees are allowed at prepare. + - the transformer function shall allocate new Items in execution + memory root (thd->mem_root) and not anywhere else: allocated + items will be gone in the end of execution. + + If you don't need to transform an item tree, but only traverse + it, please use Item::walk() instead. + + + @param transformer functor that performs transformation of a subtree + @param arg opaque argument passed to the functor + + @return + Returns pointer to the new subtree root. THD::change_item_tree() + should be called for it if transformation took place, i.e. if a + pointer to newly allocated item is returned. +*/ + +Item* Item::transform(THD *thd, Item_transformer transformer, uchar *arg) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + + return (this->*transformer)(thd, arg); +} + + +/** + Create and set up an expression cache for this item + + @param thd Thread handle + @param depends_on List of the expression parameters + + @details + The function creates an expression cache for an item and its parameters + specified by the 'depends_on' list. Then the expression cache is placed + into a cache wrapper that is returned as the result of the function. + + @returns + A pointer to created wrapper item if successful, NULL - otherwise +*/ + +Item* Item::set_expr_cache(THD *thd) +{ + DBUG_ENTER("Item::set_expr_cache"); + Item_cache_wrapper *wrapper; + if (likely((wrapper= new (thd->mem_root) Item_cache_wrapper(thd, this))) && + likely(!wrapper->fix_fields(thd, (Item**)&wrapper))) + { + if (likely(!wrapper->set_cache(thd))) + DBUG_RETURN(wrapper); + } + DBUG_RETURN(NULL); +} + + +Item_ident::Item_ident(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &db_name_arg, + const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg) + :Item_result_field(thd), orig_db_name(db_name_arg), + orig_table_name(table_name_arg), + orig_field_name(field_name_arg), context(context_arg), + db_name(db_name_arg), table_name(table_name_arg), + field_name(field_name_arg), + cached_table(NULL), depended_from(NULL), + cached_field_index(NO_CACHED_FIELD_INDEX), + can_be_depended(TRUE), alias_name_used(FALSE) +{ + name= field_name_arg; +} + + +Item_ident::Item_ident(THD *thd, TABLE_LIST *view_arg, + const LEX_CSTRING &field_name_arg) + :Item_result_field(thd), orig_db_name(null_clex_str), + orig_table_name(view_arg->table_name), + orig_field_name(field_name_arg), + /* TODO: suspicious use of first_select_lex */ + context(&view_arg->view->first_select_lex()->context), + db_name(null_clex_str), table_name(view_arg->alias), + field_name(field_name_arg), + cached_table(NULL), depended_from(NULL), + cached_field_index(NO_CACHED_FIELD_INDEX), + can_be_depended(TRUE), alias_name_used(FALSE) +{ + name= field_name_arg; +} + + +/** + Constructor used by Item_field & Item_*_ref (see Item comment) +*/ + +Item_ident::Item_ident(THD *thd, Item_ident *item) + :Item_result_field(thd, item), + orig_db_name(item->orig_db_name), + orig_table_name(item->orig_table_name), + orig_field_name(item->orig_field_name), + context(item->context), + db_name(item->db_name), + table_name(item->table_name), + field_name(item->field_name), + cached_table(item->cached_table), + depended_from(item->depended_from), + cached_field_index(item->cached_field_index), + can_be_depended(item->can_be_depended), + alias_name_used(item->alias_name_used) +{} + +void Item_ident::cleanup() +{ + DBUG_ENTER("Item_ident::cleanup"); + bool was_fixed= fixed(); + Item_result_field::cleanup(); + db_name= orig_db_name; + table_name= orig_table_name; + field_name= orig_field_name; + /* Store if this Item was depended */ + if (was_fixed) + { + /* + We can trust that depended_from set correctly only if this item + was fixed + */ + can_be_depended= MY_TEST(depended_from); + } + DBUG_VOID_RETURN; +} + +bool Item_ident::remove_dependence_processor(void * arg) +{ + DBUG_ENTER("Item_ident::remove_dependence_processor"); + if (get_depended_from() == (st_select_lex *) arg) + depended_from= 0; + context= &((st_select_lex *) arg)->context; + DBUG_RETURN(0); +} + +bool Item_ident::collect_outer_ref_processor(void *param) +{ + Collect_deps_prm *prm= (Collect_deps_prm *)param; + if (depended_from && + depended_from->nest_level_base == prm->nest_level_base && + depended_from->nest_level < prm->nest_level) + { + if (prm->collect) + prm->parameters->add_unique(this, &cmp_items); + else + prm->count++; + } + return FALSE; +} + + +/** + Store the pointer to this item field into a list if not already there. + + The method is used by Item::walk to collect all unique Item_field objects + from a tree of Items into a set of items represented as a list. + + Item_cond::walk() and Item_func::walk() stop the evaluation of the + processor function for its arguments once the processor returns + true.Therefore in order to force this method being called for all item + arguments in a condition the method must return false. + + @param arg pointer to a List + + @return + FALSE to force the evaluation of collect_item_field_processor + for the subsequent items. +*/ + +bool Item_field::collect_item_field_processor(void *arg) +{ + DBUG_ENTER("Item_field::collect_item_field_processor"); + DBUG_PRINT("info", ("%s", field->field_name.str ? + field->field_name.str : "noname")); + List *item_list= (List*) arg; + List_iterator item_list_it(*item_list); + Item_field *curr_item; + while ((curr_item= item_list_it++)) + { + if (curr_item->eq(this, 1)) + DBUG_RETURN(FALSE); /* Already in the set. */ + } + item_list->push_back(this); + DBUG_RETURN(FALSE); +} + + +void Item_ident::undeclared_spvar_error() const +{ + /* + We assume this is an unknown SP variable, possibly a ROW variable. + Print the leftmost name in the error: + SET var=a; -> a + SET var=a.b; -> a + SET var=a.b.c; -> a + */ + my_error(ER_SP_UNDECLARED_VAR, MYF(0), db_name.str ? db_name.str : + table_name.str ? table_name.str : + field_name.str); +} + +bool Item_field::unknown_splocal_processor(void *arg) +{ + DBUG_ENTER("Item_field::unknown_splocal_processor"); + DBUG_ASSERT(type() == FIELD_ITEM); + undeclared_spvar_error(); + DBUG_RETURN(true); +} + + +bool Item_field::add_field_to_set_processor(void *arg) +{ + DBUG_ENTER("Item_field::add_field_to_set_processor"); + DBUG_PRINT("info", ("%s", field->field_name.str ? field->field_name.str : + "noname")); + TABLE *table= (TABLE *) arg; + if (field->table == table) + bitmap_set_bit(&table->tmp_set, field->field_index); + DBUG_RETURN(FALSE); +} + + +/** + Rename fields in an expression to new field name as speficied by ALTER TABLE +*/ + +bool Item_field::rename_fields_processor(void *arg) +{ + Item::func_processor_rename *rename= (Item::func_processor_rename*) arg; + List_iterator def_it(rename->fields); + Create_field *def; + + while ((def=def_it++)) + { + if (def->change.str && + (!db_name.str || !db_name.str[0] || + !my_strcasecmp(table_alias_charset, db_name.str, rename->db_name.str)) && + (!table_name.str || !table_name.str[0] || + !my_strcasecmp(table_alias_charset, table_name.str, rename->table_name.str)) && + !my_strcasecmp(system_charset_info, field_name.str, def->change.str)) + { + field_name= def->field_name; + break; + } + } + return 0; +} + + +/** + Check if an Item_field references some field from a list of fields. + + Check whether the Item_field represented by 'this' references any + of the fields in the keyparts passed via 'arg'. Used with the + method Item::walk() to test whether any keypart in a sequence of + keyparts is referenced in an expression. + + @param arg Field being compared, arg must be of type Field + + @retval + TRUE if 'this' references the field 'arg' + @retval + FALSE otherwise +*/ + +bool Item_field::find_item_in_field_list_processor(void *arg) +{ + KEY_PART_INFO *first_non_group_part= *((KEY_PART_INFO **) arg); + KEY_PART_INFO *last_part= *(((KEY_PART_INFO **) arg) + 1); + KEY_PART_INFO *cur_part; + + for (cur_part= first_non_group_part; cur_part != last_part; cur_part++) + { + if (field->eq(cur_part->field)) + return TRUE; + } + return FALSE; +} + + +/* + Mark field in read_map + + NOTES + This is used by filesort to register used fields in a a temporary + column read set or to register used fields in a view or check constraint +*/ + +bool Item_field::register_field_in_read_map(void *arg) +{ + TABLE *table= (TABLE *) arg; + int res= 0; + if (table && table != field->table) + return res; + + if (field->vcol_info && + !bitmap_fast_test_and_set(field->table->read_set, field->field_index)) + { + res= field->vcol_info->expr->walk(&Item::register_field_in_read_map,1,arg); + } + else + bitmap_set_bit(field->table->read_set, field->field_index); + return res; +} + +/* + @brief + Mark field in bitmap supplied as *arg +*/ + +bool Item_field::register_field_in_bitmap(void *arg) +{ + MY_BITMAP *bitmap= (MY_BITMAP *) arg; + DBUG_ASSERT(bitmap); + bitmap_set_bit(bitmap, field->field_index); + return 0; +} + + +/* + Mark field in write_map + + NOTES + This is used by UPDATE to register underlying fields of used view fields. +*/ + +bool Item_field::register_field_in_write_map(void *arg) +{ + TABLE *table= (TABLE *) arg; + if (field->table == table || !table) + bitmap_set_bit(field->table->write_set, field->field_index); + return 0; +} + +/** + Check that we are not referring to any not yet initialized fields + + Fields are initialized in this order: + - All fields that have default value as a constant are initialized first. + - Then user-specified values from the INSERT list + - Then all fields that has a default expression, in field_index order. + - Then all virtual fields, in field_index order. + - Then auto-increment values + + This means: + - For default fields we can't access the same field or a field after + itself that doesn't have a non-constant default value. + - A virtual field can't access itself or a virtual field after itself. + - user-specified values will not see virtual fields or default expressions, + as in INSERT t1 (a) VALUES (b); + - no virtual fields can access auto-increment values + + This is used by fix_vcol_expr() when a table is opened + + We don't have to check non-virtual fields that are marked as + NO_DEFAULT_VALUE as the upper level will ensure that all these + will be given a value. +*/ + +bool Item_field::check_field_expression_processor(void *arg) +{ + Field *org_field= (Field*) arg; + if (field->flags & NO_DEFAULT_VALUE_FLAG && !field->vcol_info) + return 0; + if ((field->default_value && field->default_value->flags) || field->vcol_info) + { + if (field == org_field || + (!org_field->vcol_info && field->vcol_info) || + (((field->vcol_info && org_field->vcol_info) || + (!field->vcol_info && !org_field->vcol_info)) && + field->field_index >= org_field->field_index)) + { + my_error(ER_EXPRESSION_REFERS_TO_UNINIT_FIELD, MYF(0), + org_field->field_name.str, field->field_name.str); + return 1; + } + } + return 0; +} + +bool Item_field::update_vcol_processor(void *arg) +{ + MY_BITMAP *map= (MY_BITMAP *) arg; + if (field->vcol_info && + !bitmap_fast_test_and_set(map, field->field_index)) + { + field->vcol_info->expr->walk(&Item::update_vcol_processor, 0, arg); + field->vcol_info->expr->save_in_field(field, 0); + } + return 0; +} + + +bool Item::check_cols(uint c) +{ + if (c != 1) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), c); + return 1; + } + return 0; +} + + +bool Item::check_type_or_binary(const LEX_CSTRING &opname, + const Type_handler *expect) const +{ + const Type_handler *handler= type_handler(); + if (handler == expect || + (handler->is_general_purpose_string_type() && + collation.collation == &my_charset_bin)) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_general_purpose_string(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->is_general_purpose_string_type()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_traditional_scalar(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->is_traditional_scalar_type()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_int(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_int()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_decimal(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_decimal()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_real(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_real()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_date(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_date()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_time(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_time()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_str(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_str()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_can_return_text(const LEX_CSTRING &opname) const +{ + const Type_handler *handler= type_handler(); + if (handler->can_return_text()) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Item::check_type_scalar(const LEX_CSTRING &opname) const +{ + /* + fixed==true usually means than the Item has an initialized + and reliable data type handler and attributes. + Item_outer_ref is an exception. It copies the data type and the attributes + from the referenced Item in the constructor, but then sets "fixed" to false, + and re-fixes itself again in fix_inner_refs(). + This hack in Item_outer_ref should probably be refactored eventually. + Discuss with Sanja. + */ + DBUG_ASSERT(fixed() || type() == REF_ITEM); + const Type_handler *handler= type_handler(); + if (handler->is_scalar_type()) + return false; + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + return true; +} + + +extern "C" { + +/* + All values greater than MY_NAME_BINARY_VALUE are + interpreted as binary bytes. + The exact constant value does not matter, + but it must be greater than 0x10FFFF, + which is the maximum possible character in Unicode. +*/ +#define MY_NAME_BINARY_VALUE 0x200000 + +/* + Print all binary bytes as well as zero character U+0000 in hex notation. + Print other characters normally. +*/ +static int +my_wc_mb_item_name(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) +{ + if (wc == 0 || wc >= MY_NAME_BINARY_VALUE) + { + if (str + 4 >= end) + return MY_CS_TOOSMALL3; + str[0]= '\\'; + str[1]= 'x'; + str[2]= _dig_vec_upper[(uchar) (wc >> 4)]; + str[3]= _dig_vec_upper[(uchar) wc & 0x0F]; + return 4; + } + return my_charset_utf8mb3_handler.wc_mb(cs, wc, str, end); +} + + +/* + Scan characters and mark all illegal sequences as binary byte values, + to have my_wc_mb_utf8_escape_name() print them using HEX notation. +*/ +static int +my_mb_wc_item_name(CHARSET_INFO *cs, my_wc_t *pwc, + const uchar *str, const uchar *end) +{ + int rc= cs->cset->mb_wc(cs, pwc, str, end); + if (rc == MY_CS_ILSEQ) + { + *pwc= MY_NAME_BINARY_VALUE + *str; + return 1; + } + return rc; +} + +} + + +static LEX_CSTRING +make_name(THD *thd, + const char *str, size_t length, CHARSET_INFO *cs, + size_t max_octet_length) +{ + uint errors; + size_t dst_nbytes= length * system_charset_info->mbmaxlen; + set_if_smaller(dst_nbytes, max_octet_length); + char *dst= (char*) thd->alloc(dst_nbytes + 1); + if (!dst) + return null_clex_str; + uint32 cnv_length= my_convert_using_func(dst, dst_nbytes, system_charset_info, + my_wc_mb_item_name, + str, length, + cs == &my_charset_bin ? + system_charset_info : cs, + my_mb_wc_item_name, &errors); + dst[cnv_length]= '\0'; + return Lex_cstring(dst, cnv_length); +} + + +void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs) +{ + if (!length) + { + /* + Null string are replaced by item_empty_name. This is used by AS or + internal function like last_insert_id() to detect if we need to + change the name later. + Used by sql_yacc.yy in select_alias handling + */ + name.str= str ? item_used_name : item_empty_name; + name.length= 0; + return; + } + + const char *str_start= str; + if (!cs->m_ctype || cs->mbminlen > 1) + { + str+= cs->scan(str, str + length, MY_SEQ_SPACES); + length-= (uint)(str - str_start); + } + else + { + /* + This will probably need a better implementation in the future: + a function in CHARSET_INFO structure. + */ + while (length && !my_isgraph(cs,*str)) + { // Fix problem with yacc + length--; + str++; + } + } + if (str != str_start && is_explicit_name()) + { + char buff[SAFE_NAME_LEN]; + + strmake(buff, str_start, + MY_MIN(sizeof(buff)-1, length + (int) (str-str_start))); + + if (length == 0) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NAME_BECOMES_EMPTY, + ER_THD(thd, ER_NAME_BECOMES_EMPTY), + buff); + else + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_REMOVED_SPACES, ER_THD(thd, ER_REMOVED_SPACES), + buff); + } + name= make_name(thd, str, length, cs, MAX_ALIAS_NAME - 1); +} + + +void Item::set_name_no_truncate(THD *thd, const char *str, uint length, + CHARSET_INFO *cs) +{ + name= make_name(thd, str, length, cs, UINT_MAX - 1); +} + + +/** + @details + This function is called when: + - Comparing items in the WHERE clause (when doing where optimization) + - When trying to find an ORDER BY/GROUP BY item in the SELECT part +*/ + +bool Item::eq(const Item *item, bool binary_cmp) const +{ + /* + Note, that this is never TRUE if item is a Item_param: + for all basic constants we have special checks, and Item_param's + type() can be only among basic constant types. + */ + return type() == item->type() && name.str && item->name.str && + !lex_string_cmp(system_charset_info, &name, &item->name); +} + + +Item *Item::safe_charset_converter(THD *thd, CHARSET_INFO *tocs) +{ + if (!needs_charset_converter(tocs)) + return this; + Item_func_conv_charset *conv= new (thd->mem_root) Item_func_conv_charset(thd, this, tocs, 1); + return conv && conv->safe ? conv : NULL; +} + + +/** + Some pieces of the code do not support changing of + Item_cache to other Item types. + + Example: + Item_singlerow_subselect has "Item_cache **row". + Creating of Item_func_conv_charset followed by THD::change_item_tree() + should not change row[i] from Item_cache directly to Item_func_conv_charset, + because Item_singlerow_subselect later calls Item_cache-specific methods, + e.g. row[i]->store() and row[i]->cache_value(). + + Let's wrap Item_func_conv_charset in a new Item_cache, + so the Item_cache-specific methods can still be used for + Item_singlerow_subselect::row[i] safely. + + As a bonus we cache the converted value, instead of converting every time + + TODO: we should eventually check all other use cases of change_item_tree(). + Perhaps some more potentially dangerous substitution examples exist. +*/ + +Item *Item_cache::safe_charset_converter(THD *thd, CHARSET_INFO *tocs) +{ + if (!example) + return Item::safe_charset_converter(thd, tocs); + Item *conv= example->safe_charset_converter(thd, tocs); + if (conv == example) + return this; + if (!conv || conv->fix_fields(thd, (Item **) NULL)) + return NULL; // Safe conversion is not possible, or OOM + setup(thd, conv); + thd->change_item_tree(&example, conv); + return this; +} + + +/** + @details + Created mostly for mysql_prepare_table(). Important + when a string ENUM/SET column is described with a numeric default value: + + CREATE TABLE t1(a SET('a') DEFAULT 1); + + We cannot use generic Item::safe_charset_converter(), because + the latter returns a non-fixed Item, so val_str() crashes afterwards. + Override Item_num method, to return a fixed item. +*/ + +Item *Item_num::safe_charset_converter(THD *thd, CHARSET_INFO *tocs) +{ + /* + Item_num returns pure ASCII result, + so conversion is needed only in case of "tricky" character + sets like UCS2. If tocs is not "tricky", return the item itself. + */ + if (!(tocs->state & MY_CS_NONASCII)) + return this; + + Item *conv; + if ((conv= const_charset_converter(thd, tocs, true))) + conv->fix_char_length(max_char_length()); + return conv; +} + + +/** + Create character set converter for constant items + using Item_null, Item_string or Item_static_string_func. + + @param tocs Character set to to convert the string to. + @param lossless Whether data loss is acceptable. + @param func_name Function name, or NULL. + + @return this, if conversion is not needed, + NULL, if safe conversion is not possible, or + a new item representing the converted constant. +*/ +Item *Item::const_charset_converter(THD *thd, CHARSET_INFO *tocs, + bool lossless, + const char *func_name) +{ + DBUG_ASSERT(const_item()); + DBUG_ASSERT(fixed()); + StringBuffer<64>tmp; + String *s= val_str(&tmp); + MEM_ROOT *mem_root= thd->mem_root; + + if (!s) + return new (mem_root) Item_null(thd, (char *) func_name, tocs); + + if (!needs_charset_converter(s->length(), tocs)) + { + if (collation.collation == &my_charset_bin && tocs != &my_charset_bin && + !this->check_well_formed_result(s, true)) + return NULL; + return this; + } + + uint conv_errors; + Item_string *conv= (func_name ? + new (mem_root) + Item_static_string_func(thd, Lex_cstring_strlen(func_name), + s, tocs, &conv_errors, + collation.derivation, + collation.repertoire) : + new (mem_root) + Item_string(thd, s, tocs, &conv_errors, + collation.derivation, + collation.repertoire)); + + if (unlikely(!conv || (conv_errors && lossless))) + { + /* + Safe conversion is not possible (or EOM). + We could not convert a string into the requested character set + without data loss. The target charset does not cover all the + characters from the string. Operation cannot be done correctly. + */ + return NULL; + } + if (s->charset() == &my_charset_bin && tocs != &my_charset_bin && + !conv->check_well_formed_result(true)) + return NULL; + return conv; +} + + +Item *Item_param::safe_charset_converter(THD *thd, CHARSET_INFO *tocs) +{ + /* + Return "this" if in prepare. result_type may change at execition time, + to it's possible that the converter will not be needed at all: + + PREPARE stmt FROM 'SELECT * FROM t1 WHERE field = ?'; + SET @arg= 1; + EXECUTE stmt USING @arg; + + In the above example result_type is STRING_RESULT at prepare time, + and INT_RESULT at execution time. + */ + return !const_item() || state == NULL_VALUE ? + this : const_charset_converter(thd, tocs, true); +} + + +/** + Get the value of the function as a MYSQL_TIME structure. + As a extra convenience the time structure is reset on error or NULL values! +*/ + +bool Item::get_date_from_int(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + Longlong_hybrid value(val_int(), unsigned_flag); + return null_value || int_to_datetime_with_warn(thd, value, + ltime, fuzzydate, + field_table_or_null(), + field_name_or_null()); +} + + +bool Item::get_date_from_real(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + double value= val_real(); + return null_value || double_to_datetime_with_warn(thd, value, + ltime, fuzzydate, + field_table_or_null(), + field_name_or_null()); +} + + +bool Item::get_date_from_string(THD *thd, MYSQL_TIME *to, date_mode_t mode) +{ + StringBuffer tmp; + const TABLE_SHARE *s = field_table_or_null(); + Temporal::Warn_push warn(thd, s ? s->db.str : nullptr, + s ? s->table_name.str : nullptr, + field_name_or_null(), to, mode); + Temporal_hybrid *t= new(to) Temporal_hybrid(thd, &warn, val_str(&tmp), mode); + return !t->is_valid_temporal(); +} + + +const MY_LOCALE *Item::locale_from_val_str() +{ + StringBuffer tmp; + String *locale_name= val_str_ascii(&tmp); + const MY_LOCALE *lc; + if (!locale_name || + !(lc= my_locale_by_name(locale_name->c_ptr_safe()))) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_LOCALE, + ER_THD(thd, ER_UNKNOWN_LOCALE), + locale_name ? locale_name->c_ptr_safe() : "NULL"); + lc= &my_locale_en_US; + } + return lc; +} + + +CHARSET_INFO *Item::default_charset() +{ + return current_thd->variables.collation_connection; +} + + +/* + Save value in field, but don't give any warnings + + NOTES + This is used to temporary store and retrieve a value in a column, + for example in opt_range to adjust the key value to fit the column. +*/ + +int Item::save_in_field_no_warnings(Field *field, bool no_conversions) +{ + int res; + TABLE *table= field->table; + THD *thd= table->in_use; + enum_check_fields org_count_cuted_fields= thd->count_cuted_fields; + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set); + Use_relaxed_field_copy urfc(table->in_use); + res= save_in_field(field, no_conversions); + + thd->count_cuted_fields= org_count_cuted_fields; + dbug_tmp_restore_column_map(&table->write_set, old_map); + return res; +} + + +#ifndef DBUG_OFF +static inline +void dbug_mark_unsupported_func(const char *where, const char *processor_name) +{ + char buff[64]; + my_snprintf(buff, sizeof(buff), "%s::%s", where ? where: "", processor_name); + DBUG_ENTER(buff); + my_snprintf(buff, sizeof(buff), "%s returns TRUE: unsupported function", processor_name); + DBUG_PRINT("info", ("%s", buff)); + DBUG_VOID_RETURN; +} +#else +#define dbug_mark_unsupported_func(X,Y) {} +#endif + +bool mark_unsupported_function(const char *where, void *store, uint result) +{ + Item::vcol_func_processor_result *res= + (Item::vcol_func_processor_result*) store; + uint old_errors= res->errors; + dbug_mark_unsupported_func(where, "check_vcol_func_processor"); + res->errors|= result; /* Store type of expression */ + /* Store the name to the highest violation (normally VCOL_IMPOSSIBLE) */ + if (result > old_errors) + res->name= where ? where : ""; + return false; +} + +/* convenience helper for mark_unsupported_function() above */ +bool mark_unsupported_function(const char *w1, const char *w2, + void *store, uint result) +{ + char *ptr= (char*)current_thd->alloc(strlen(w1) + strlen(w2) + 1); + if (ptr) + strxmov(ptr, w1, w2, NullS); + return mark_unsupported_function(ptr, store, result); +} + + +bool Item_field::check_vcol_func_processor(void *arg) +{ + uint r= VCOL_FIELD_REF; + context= 0; + vcol_func_processor_result *res= (vcol_func_processor_result *) arg; + if (res && res->alter_info) + r|= res->alter_info->check_vcol_field(this); + else if (field) + { + if (field->unireg_check == Field::NEXT_NUMBER) + r|= VCOL_AUTO_INC; + if (field->vcol_info && + field->vcol_info->flags & (VCOL_NOT_STRICTLY_DETERMINISTIC | VCOL_AUTO_INC)) + r|= VCOL_NON_DETERMINISTIC; + } + return mark_unsupported_function(field_name.str, arg, r); +} + + +Query_fragment::Query_fragment(THD *thd, sp_head *sphead, + const char *start, const char *end) +{ + DBUG_ASSERT(start <= end); + if (thd->lex->clone_spec_offset) + { + Lex_input_stream *lip= (& thd->m_parser_state->m_lip); + DBUG_ASSERT(lip->get_buf() <= start); + DBUG_ASSERT(end <= lip->get_end_of_query()); + set(start - lip->get_buf(), end - start); + } + else if (sphead) + { + if (sphead->m_tmp_query) + { + // Normal SP statement + DBUG_ASSERT(sphead->m_tmp_query <= start); + set(start - sphead->m_tmp_query, end - start); + } + else + { + /* + We're in the "if" expression of a compound query: + if (expr) + do_something; + end if; + sphead->m_tmp_query is not set yet at this point, because + the "if" part of such statements is never put into the binary log. + Values of Rewritable_query_parameter::pos_in_query and + Rewritable_query_parameter:len_in_query will not be important, + so setting both to 0 should be fine. + */ + set(0, 0); + } + } + else + { + // Non-SP statement + DBUG_ASSERT(thd->query() <= start); + DBUG_ASSERT(end <= thd->query_end()); + set(start - thd->query(), end - start); + } +} + + +/***************************************************************************** + Item_sp_variable methods +*****************************************************************************/ + +Item_sp_variable::Item_sp_variable(THD *thd, const LEX_CSTRING *sp_var_name) + :Item_fixed_hybrid(thd), m_thd(0), m_name(*sp_var_name) +#ifndef DBUG_OFF + , m_sp(0) +#endif +{ +} + + +bool Item_sp_variable::fix_fields_from_item(THD *thd, Item **, const Item *it) +{ + m_thd= thd; /* NOTE: this must be set before any this_xxx() */ + + DBUG_ASSERT(it->fixed()); + + max_length= it->max_length; + decimals= it->decimals; + unsigned_flag= it->unsigned_flag; + base_flags|= item_base_t::FIXED; + with_flags|= item_with_t::SP_VAR; + if (thd->lex->current_select && thd->lex->current_select->master_unit()->item) + thd->lex->current_select->master_unit()->item->with_flags|= item_with_t::SP_VAR; + collation.set(it->collation.collation, it->collation.derivation); + + return FALSE; +} + + +double Item_sp_variable::val_real() +{ + DBUG_ASSERT(fixed()); + Item *it= this_item(); + double ret= it->val_real(); + null_value= it->null_value; + return ret; +} + + +longlong Item_sp_variable::val_int() +{ + DBUG_ASSERT(fixed()); + Item *it= this_item(); + longlong ret= it->val_int(); + null_value= it->null_value; + return ret; +} + + +String *Item_sp_variable::val_str(String *sp) +{ + DBUG_ASSERT(fixed()); + Item *it= this_item(); + String *res= it->val_str(sp); + + null_value= it->null_value; + + if (!res) + return NULL; + + /* + This way we mark returned value of val_str as const, + so that various functions (e.g. CONCAT) won't try to + modify the value of the Item. Analogous mechanism is + implemented for Item_param. + Without this trick Item_splocal could be changed as a + side-effect of expression computation. Here is an example + of what happens without it: suppose x is varchar local + variable in a SP with initial value 'ab' Then + select concat(x,'c'); + would change x's value to 'abc', as Item_func_concat::val_str() + would use x's internal buffer to compute the result. + This is intended behaviour of Item_func_concat. Comments to + Item_param class contain some more details on the topic. + */ + + if (res != &str_value) + str_value.set(res->ptr(), res->length(), res->charset()); + else + res->mark_as_const(); + + return &str_value; +} + + +bool Item_sp_variable::val_native(THD *thd, Native *to) +{ + return val_native_from_item(thd, this_item(), to); +} + + +my_decimal *Item_sp_variable::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + Item *it= this_item(); + my_decimal *val= it->val_decimal(decimal_value); + null_value= it->null_value; + return val; +} + + +bool Item_sp_variable::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + Item *it= this_item(); + bool val= it->get_date(thd, ltime, fuzzydate); + null_value= it->null_value; + return val; +} + + +bool Item_sp_variable::is_null() +{ + return this_item()->is_null(); +} + +void Item_sp_variable::make_send_field(THD *thd, Send_field *field) +{ + Item *it= this_item(); + + it->make_send_field(thd, field); + if (name.str) + field->col_name= name; + else + field->col_name= m_name; +} + +/***************************************************************************** + Item_splocal methods +*****************************************************************************/ + +Item_splocal::Item_splocal(THD *thd, + const Sp_rcontext_handler *rh, + const LEX_CSTRING *sp_var_name, + uint sp_var_idx, + const Type_handler *handler, + uint pos_in_q, uint len_in_q): + Item_sp_variable(thd, sp_var_name), + Rewritable_query_parameter(pos_in_q, len_in_q), + Type_handler_hybrid_field_type(handler), + m_rcontext_handler(rh), + m_var_idx(sp_var_idx), + m_type(handler == &type_handler_row ? ROW_ITEM : CONST_ITEM) +{ + set_maybe_null(); +} + + +sp_rcontext *Item_splocal::get_rcontext(sp_rcontext *local_ctx) const +{ + return m_rcontext_handler->get_rcontext(local_ctx); +} + + +Item_field *Item_splocal::get_variable(sp_rcontext *ctx) const +{ + return get_rcontext(ctx)->get_variable(m_var_idx); +} + + +bool Item_splocal::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + Item *item= get_variable(thd->spcont); + set_handler(item->type_handler()); + return fix_fields_from_item(thd, ref, item); +} + + +Item * +Item_splocal::this_item() +{ + DBUG_ASSERT(m_sp == m_thd->spcont->m_sp); + DBUG_ASSERT(fixed()); + return get_variable(m_thd->spcont); +} + +const Item * +Item_splocal::this_item() const +{ + DBUG_ASSERT(m_sp == m_thd->spcont->m_sp); + DBUG_ASSERT(fixed()); + return get_variable(m_thd->spcont); +} + + +Item ** +Item_splocal::this_item_addr(THD *thd, Item **) +{ + DBUG_ASSERT(m_sp == thd->spcont->m_sp); + DBUG_ASSERT(fixed()); + return get_rcontext(thd->spcont)->get_variable_addr(m_var_idx); +} + + +void Item_splocal::print(String *str, enum_query_type) +{ + const LEX_CSTRING *prefix= m_rcontext_handler->get_name_prefix(); + str->reserve(m_name.length + 8 + prefix->length); + str->append(prefix); + str->append(&m_name); + str->append('@'); + str->qs_append(m_var_idx); +} + + +bool Item_splocal::set_value(THD *thd, sp_rcontext *ctx, Item **it) +{ + return get_rcontext(ctx)->set_variable(thd, get_var_idx(), it); +} + + +/** + These two declarations are different: + x INT; + ROW(x INT); + A ROW with one elements should not be comparable to scalar value. + + TODO: Currently we don't support one argument with the function ROW(), so + this query returns a syntax error, meaning that more arguments are expected: + SELECT ROW(1); + + Therefore, all around the code we assume that cols()==1 means a scalar value + and cols()>1 means a ROW value. With adding ROW SP variables this + assumption is not true any more. ROW variables with one element are + now possible. + + To implement Item::check_cols() correctly, we now should extend it to + know if a ROW or a scalar value is being tested. For example, + these new prototypes should work: + virtual bool check_cols(Item_result result, uint c); + or + virtual bool check_cols(const Type_handler *type, uint c); + + The current implementation of Item_splocal::check_cols() is a compromise + that should be more or less fine until we extend check_cols(). + It disallows ROW variables to appear in a scalar context. + The "|| n == 1" part of the conditon is responsible for this. + For example, it disallows ROW variables to appear in SELECT list: + +DELIMITER $$; +CREATE PROCEDURE p1() +AS + a ROW (a INT); +BEGIN + SELECT a; +END; +$$ +DELIMITER ;$$ +--error ER_OPERAND_COLUMNS +CALL p1(); + + But is produces false negatives with ROW variables consisting of one element. + For example, this script fails: + +SET sql_mode=ORACLE; +DROP PROCEDURE IF EXISTS p1; +DELIMITER $$ +CREATE PROCEDURE p1 +AS + a ROW(a INT); + b ROW(a INT); +BEGIN + SELECT a=b; +END; +$$ +DELIMITER ; +CALL p1(); + + and returns "ERROR 1241 (21000): Operand should contain 1 column(s)". + This will be fixed that we change check_cols(). +*/ + +bool Item_splocal::check_cols(uint n) +{ + DBUG_ASSERT(m_thd->spcont); + if (Type_handler_hybrid_field_type::cmp_type() != ROW_RESULT) + return Item::check_cols(n); + + if (n != this_item()->cols() || n == 1) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), n); + return true; + } + return false; +} + + +bool Item_splocal_row_field::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + Item *item= get_variable(thd->spcont)->element_index(m_field_idx); + return fix_fields_from_item(thd, ref, item); +} + + +Item * +Item_splocal_row_field::this_item() +{ + DBUG_ASSERT(m_sp == m_thd->spcont->m_sp); + DBUG_ASSERT(fixed()); + return get_variable(m_thd->spcont)->element_index(m_field_idx); +} + + +const Item * +Item_splocal_row_field::this_item() const +{ + DBUG_ASSERT(m_sp == m_thd->spcont->m_sp); + DBUG_ASSERT(fixed()); + return get_variable(m_thd->spcont)->element_index(m_field_idx); +} + + +Item ** +Item_splocal_row_field::this_item_addr(THD *thd, Item **) +{ + DBUG_ASSERT(m_sp == thd->spcont->m_sp); + DBUG_ASSERT(fixed()); + return get_variable(thd->spcont)->addr(m_field_idx); +} + + +void Item_splocal_row_field::print(String *str, enum_query_type) +{ + const LEX_CSTRING *prefix= m_rcontext_handler->get_name_prefix(); + str->reserve(m_name.length + m_field_name.length + 8 + prefix->length); + str->append(prefix); + str->append(&m_name); + str->append('.'); + str->append(&m_field_name); + str->append('@'); + str->qs_append(m_var_idx); + str->append('['); + str->qs_append(m_field_idx); + str->append(']'); +} + + +bool Item_splocal_row_field::set_value(THD *thd, sp_rcontext *ctx, Item **it) +{ + return get_rcontext(ctx)->set_variable_row_field(thd, m_var_idx, m_field_idx, + it); +} + + +bool Item_splocal_row_field_by_name::fix_fields(THD *thd, Item **it) +{ + DBUG_ASSERT(fixed() == 0); + m_thd= thd; + if (get_rcontext(thd->spcont)->find_row_field_by_name_or_error(&m_field_idx, + m_var_idx, + m_field_name)) + return true; + Item *item= get_variable(thd->spcont)->element_index(m_field_idx); + set_handler(item->type_handler()); + return fix_fields_from_item(thd, it, item); +} + + +void Item_splocal_row_field_by_name::print(String *str, enum_query_type) +{ + const LEX_CSTRING *prefix= m_rcontext_handler->get_name_prefix(); + // +16 should be enough for .NNN@[""] + if (str->reserve(m_name.length + 2 * m_field_name.length + + prefix->length + 16)) + return; + str->qs_append(prefix); + str->qs_append(&m_name); + str->qs_append('.'); + str->qs_append(&m_field_name); + str->qs_append('@'); + str->qs_append(m_var_idx); + str->qs_append("[\"", 2); + str->qs_append(&m_field_name); + str->qs_append("\"]", 2); +} + + +bool Item_splocal_row_field_by_name::set_value(THD *thd, sp_rcontext *ctx, Item **it) +{ + DBUG_ASSERT(fixed()); // Make sure m_field_idx is already set + return Item_splocal_row_field::set_value(thd, ctx, it); +} + + +/***************************************************************************** + Item_case_expr methods +*****************************************************************************/ + +LEX_CSTRING str_case_expr= { STRING_WITH_LEN("case_expr") }; + +Item_case_expr::Item_case_expr(THD *thd, uint case_expr_id): + Item_sp_variable(thd, &str_case_expr), + m_case_expr_id(case_expr_id) +{ +} + + +bool Item_case_expr::fix_fields(THD *thd, Item **ref) +{ + Item *item= thd->spcont->get_case_expr(m_case_expr_id); + return fix_fields_from_item(thd, ref, item); +} + + +Item * +Item_case_expr::this_item() +{ + DBUG_ASSERT(m_sp == m_thd->spcont->m_sp); + + return m_thd->spcont->get_case_expr(m_case_expr_id); +} + + + +const Item * +Item_case_expr::this_item() const +{ + DBUG_ASSERT(m_sp == m_thd->spcont->m_sp); + + return m_thd->spcont->get_case_expr(m_case_expr_id); +} + + +Item ** +Item_case_expr::this_item_addr(THD *thd, Item **) +{ + DBUG_ASSERT(m_sp == thd->spcont->m_sp); + + return thd->spcont->get_case_expr_addr(m_case_expr_id); +} + + +void Item_case_expr::print(String *str, enum_query_type) +{ + if (str->reserve(MAX_INT_WIDTH + sizeof("case_expr@"))) + return; /* purecov: inspected */ + (void) str->append(STRING_WITH_LEN("case_expr@")); + str->qs_append(m_case_expr_id); +} + + +/***************************************************************************** + Item_name_const methods +*****************************************************************************/ + +double Item_name_const::val_real() +{ + DBUG_ASSERT(fixed()); + double ret= value_item->val_real(); + null_value= value_item->null_value; + return ret; +} + + +longlong Item_name_const::val_int() +{ + DBUG_ASSERT(fixed()); + longlong ret= value_item->val_int(); + null_value= value_item->null_value; + return ret; +} + + +String *Item_name_const::val_str(String *sp) +{ + DBUG_ASSERT(fixed()); + String *ret= value_item->val_str(sp); + null_value= value_item->null_value; + return ret; +} + + +my_decimal *Item_name_const::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + my_decimal *val= value_item->val_decimal(decimal_value); + null_value= value_item->null_value; + return val; +} + +bool Item_name_const::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + bool rc= value_item->get_date(thd, ltime, fuzzydate); + null_value= value_item->null_value; + return rc; +} + +bool Item_name_const::val_native(THD *thd, Native *to) +{ + return val_native_from_item(thd, value_item, to); +} + +bool Item_name_const::is_null() +{ + return value_item->is_null(); +} + + +Item_name_const::Item_name_const(THD *thd, Item *name_arg, Item *val): + Item_fixed_hybrid(thd), value_item(val), name_item(name_arg) +{ + StringBuffer<128> name_buffer; + String *name_str; + + set_maybe_null(); + if (name_item->basic_const_item() && + (name_str= name_item->val_str(&name_buffer))) // Can't have a NULL name + set_name(thd, name_str); +} + + +Item::Type Item_name_const::type() const +{ + /* + + We are guarenteed that value_item->basic_const_item(), if not + an error is thrown that WRONG ARGUMENTS are supplied to + NAME_CONST function. + If type is FUNC_ITEM, then we have a fudged item_func_neg() + on our hands and return the underlying type. + For Item_func_set_collation() + e.g. NAME_CONST('name', 'value' COLLATE collation) we return its + 'value' argument type. + */ + Item::Type value_type= value_item->type(); + if (value_type == FUNC_ITEM) + { + /* + The second argument of NAME_CONST('name', 'value') must be + a simple constant item or a NEG_FUNC/COLLATE_FUNC. + */ + DBUG_ASSERT(((Item_func *) value_item)->functype() == + Item_func::NEG_FUNC || + ((Item_func *) value_item)->functype() == + Item_func::COLLATE_FUNC); + return ((Item_func *) value_item)->key_item()->type(); + } + return value_type; +} + + +bool Item_name_const::fix_fields(THD *thd, Item **ref) +{ + if (value_item->fix_fields_if_needed(thd, &value_item) || + name_item->fix_fields_if_needed(thd, &name_item) || + !value_item->const_item() || + !name_item->const_item()) + { + my_error(ER_RESERVED_SYNTAX, MYF(0), "NAME_CONST"); + return TRUE; + } + if (value_item->collation.derivation == DERIVATION_NUMERIC) + collation= DTCollation_numeric(); + else + collation.set(value_item->collation.collation, DERIVATION_IMPLICIT); + max_length= value_item->max_length; + decimals= value_item->decimals; + unsigned_flag= value_item->unsigned_flag; + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +void Item_name_const::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("NAME_CONST(")); + name_item->print(str, query_type); + str->append(','); + value_item->print(str, query_type); + str->append(')'); +} + + +/* + need a special class to adjust printing : references to aggregate functions + must not be printed as refs because the aggregate functions that are added to + the front of select list are not printed as well. +*/ +class Item_aggregate_ref : public Item_ref +{ +public: + Item_aggregate_ref(THD *thd, Name_resolution_context *context_arg, + Item **item, const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg): + Item_ref(thd, context_arg, item, table_name_arg, field_name_arg) {} + + void print (String *str, enum_query_type query_type) override + { + if (ref) + (*ref)->print(str, query_type); + else + Item_ident::print(str, query_type); + } + Ref_Type ref_type() override final { return AGGREGATE_REF; } +}; + + +/** + Move SUM items out from item tree and replace with reference. + + @param thd Thread handler + @param ref_pointer_array Pointer to array of reference fields + @param fields All fields in select + @param ref Pointer to item + @param split_flags Zero or more of the following flags + SPLIT_FUNC_SKIP_REGISTERED: + Function be must skipped for registered SUM + SUM items + SPLIT_SUM_SELECT + We are called on the select level and have to + register items operated on sum function + + @note + All found SUM items are added FIRST in the fields list and + we replace the item with a reference. + + If this is an item in the SELECT list then we also have to split out + all arguments to functions used together with the sum function. + For example in case of SELECT A*sum(B) we have to split out both + A and sum(B). + This is not needed for ORDER BY, GROUP BY or HAVING as all references + to items in the select list are already of type REF + + thd->fatal_error() may be called if we are out of memory +*/ + +void Item::split_sum_func2(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, Item **ref, + uint split_flags) +{ + if (unlikely(type() == SUM_FUNC_ITEM)) + { + /* An item of type Item_sum is registered if ref_by != 0 */ + if ((split_flags & SPLIT_SUM_SKIP_REGISTERED) && + ((Item_sum *) this)->ref_by) + return; + } + else if (type() == WINDOW_FUNC_ITEM || with_window_func()) + { + /* + Skip the else part, window functions are very special functions: + they need to have their own fields in the temp. table, but they + need to be proceessed differently than regular aggregate functions + + Call split_sum_func here so that each argument gets its fields to + point to the temporary table. + */ + split_sum_func(thd, ref_pointer_array, fields, split_flags); + if (type() == FUNC_ITEM) { + return; + } + } + else if (type() == FUNC_ITEM && + ((Item_func*)this)->functype() == Item_func::ROWNUM_FUNC) + { + } + else + { + /* Not a SUM() function */ + if (!with_sum_func() && !with_rownum_func() && + !(split_flags & SPLIT_SUM_SELECT)) + { + /* + This is not a SUM function and there are no SUM functions inside. + Nothing more to do. + */ + return; + } + if (likely(with_sum_func() || + (type() == FUNC_ITEM && + (((Item_func *) this)->functype() == + Item_func::ISNOTNULLTEST_FUNC || + ((Item_func *) this)->functype() == + Item_func::TRIG_COND_FUNC)))) + { + /* Will call split_sum_func2() for all items */ + split_sum_func(thd, ref_pointer_array, fields, split_flags); + return; + } + + if (unlikely((!(used_tables() & ~PARAM_TABLE_BIT) || + (type() == REF_ITEM && + ((Item_ref*)this)->ref_type() != Item_ref::VIEW_REF && + ((Item_ref*)this)->ref_type() != Item_ref::DIRECT_REF)))) + return; + } + + /* + Replace item with a reference so that we can easily calculate + it (in case of sum functions) or copy it (in case of fields) + + The test above is to ensure we don't do a reference for things + that are constants (PARAM_TABLE_BIT is in effect a constant) + or already referenced (for example an item in HAVING) + Exception is Item_direct_view_ref which we need to convert to + Item_ref to allow fields from view being stored in tmp table. + */ + Item_ref *item_ref; + uint el= fields.elements; + /* + If this is an item_ref, get the original item + This is a safety measure if this is called for things that is + already a reference. + */ + Item *real_itm= real_item(); + ref_pointer_array[el]= real_itm; + if (type() == WINDOW_FUNC_ITEM) + { + if (!(item_ref= (new (thd->mem_root) + Item_direct_ref(thd, + &thd->lex->current_select->context, + &ref_pointer_array[el], + null_clex_str, name)))) + return; // fatal_error is set + } + else + { + if (!(item_ref= (new (thd->mem_root) + Item_aggregate_ref(thd, + &thd->lex->current_select->context, + &ref_pointer_array[el], + null_clex_str, name)))) + return; // fatal_error is set + } + if (type() == SUM_FUNC_ITEM) + item_ref->depended_from= ((Item_sum *) this)->depended_from(); + fields.push_front(real_itm); + thd->change_item_tree(ref, item_ref); +} + + +static bool +left_is_superset(const DTCollation *left, const DTCollation *right) +{ + /* Allow convert to Unicode */ + if (left->collation->state & MY_CS_UNICODE && + (left->derivation < right->derivation || + (left->derivation == right->derivation && + (!(right->collation->state & MY_CS_UNICODE) || + /* The code below makes 4-byte utf8 a superset over 3-byte utf8 */ + (left->collation->state & MY_CS_UNICODE_SUPPLEMENT && + !(right->collation->state & MY_CS_UNICODE_SUPPLEMENT) && + left->collation->mbmaxlen > right->collation->mbmaxlen && + left->collation->mbminlen == right->collation->mbminlen))))) + return TRUE; + /* Allow convert from ASCII */ + if (right->repertoire == MY_REPERTOIRE_ASCII && + (left->derivation < right->derivation || + (left->derivation == right->derivation && + !(left->repertoire == MY_REPERTOIRE_ASCII)))) + return TRUE; + /* Disallow conversion otherwise */ + return FALSE; +} + +/** + Aggregate two collations together taking + into account their coercibility (aka derivation):. + + 0 == DERIVATION_EXPLICIT - an explicitly written COLLATE clause @n + 1 == DERIVATION_NONE - a mix of two different collations @n + 2 == DERIVATION_IMPLICIT - a column @n + 3 == DERIVATION_COERCIBLE - a string constant. + + The most important rules are: + -# If collations are the same: + chose this collation, and the strongest derivation. + -# If collations are different: + - Character sets may differ, but only if conversion without + data loss is possible. The caller provides flags whether + character set conversion attempts should be done. If no + flags are substituted, then the character sets must be the same. + Currently processed flags are: + MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset + MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value + - two EXPLICIT collations produce an error, e.g. this is wrong: + CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci) + - the side with smaller derivation value wins, + i.e. a column is stronger than a string constant, + an explicit COLLATE clause is stronger than a column. + - if derivations are the same, we have DERIVATION_NONE, + we'll wait for an explicit COLLATE clause which possibly can + come from another argument later: for example, this is valid, + but we don't know yet when collecting the first two arguments: + @code + CONCAT(latin1_swedish_ci_column, + latin1_german1_ci_column, + expr COLLATE latin1_german2_ci) + @endcode +*/ + +bool DTCollation::aggregate(const DTCollation &dt, uint flags) +{ + if (!my_charset_same(collation, dt.collation)) + { + /* + We do allow to use binary strings (like BLOBS) + together with character strings. + Binaries have more precedence than a character + string of the same derivation. + */ + if (collation == &my_charset_bin) + { + if (derivation <= dt.derivation) + { + /* Do nothing */ + } + else + { + set(dt); + } + } + else if (dt.collation == &my_charset_bin) + { + if (dt.derivation <= derivation) + { + set(dt); + } + } + else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) && + left_is_superset(this, &dt)) + { + /* Do nothing */ + } + else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) && + left_is_superset(&dt, this)) + { + set(dt); + } + else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) && + derivation < dt.derivation && + dt.derivation >= DERIVATION_SYSCONST) + { + /* Do nothing */ + } + else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) && + dt.derivation < derivation && + derivation >= DERIVATION_SYSCONST) + { + set(dt); + } + else + { + // Cannot apply conversion + set(&my_charset_bin, DERIVATION_NONE, + (dt.repertoire|repertoire)); + return 1; + } + } + else if (derivation < dt.derivation) + { + /* Do nothing */ + } + else if (dt.derivation < derivation) + { + set(dt); + } + else + { + if (collation == dt.collation) + { + /* Do nothing */ + } + else + { + if (derivation == DERIVATION_EXPLICIT) + { + set(0, DERIVATION_NONE, MY_REPERTOIRE_NONE); + return 1; + } + if (collation->state & MY_CS_BINSORT && + dt.collation->state & MY_CS_BINSORT) + return 1; + if (collation->state & MY_CS_BINSORT) + return 0; + if (dt.collation->state & MY_CS_BINSORT) + { + set(dt); + return 0; + } + THD *thd = current_thd; + myf utf8_flag= thd ? thd->get_utf8_flag() + : global_system_variables.old_behavior & OLD_MODE_UTF8_IS_UTF8MB3; + CHARSET_INFO *bin= get_charset_by_csname(collation->cs_name.str, + MY_CS_BINSORT,MYF(utf8_flag)); + set(bin, DERIVATION_NONE); + } + } + repertoire|= dt.repertoire; + return 0; +} + +/******************************/ +static +void my_coll_agg_error(const DTCollation &c1, const DTCollation &c2, + const char *fname) +{ + my_error(ER_CANT_AGGREGATE_2COLLATIONS,MYF(0), + c1.collation->coll_name.str, c1.derivation_name(), + c2.collation->coll_name.str, c2.derivation_name(), + fname); +} + + +static +void my_coll_agg_error(DTCollation &c1, DTCollation &c2, DTCollation &c3, + const char *fname) +{ + my_error(ER_CANT_AGGREGATE_3COLLATIONS,MYF(0), + c1.collation->coll_name.str, c1.derivation_name(), + c2.collation->coll_name.str, c2.derivation_name(), + c3.collation->coll_name.str, c3.derivation_name(), + fname); +} + + +static +void my_coll_agg_error(Item** args, uint count, const char *fname, + int item_sep) +{ + if (count == 2) + my_coll_agg_error(args[0]->collation, args[item_sep]->collation, fname); + else if (count == 3) + my_coll_agg_error(args[0]->collation, args[item_sep]->collation, + args[2*item_sep]->collation, fname); + else + my_error(ER_CANT_AGGREGATE_NCOLLATIONS,MYF(0),fname); +} + + +bool Type_std_attributes::agg_item_collations(DTCollation &c, + const LEX_CSTRING &fname, + Item **av, uint count, + uint flags, int item_sep) +{ + uint i; + Item **arg; + bool unknown_cs= 0; + + c.set(av[0]->collation); + for (i= 1, arg= &av[item_sep]; i < count; i++, arg+= item_sep) + { + if (c.aggregate((*arg)->collation, flags)) + { + if (c.derivation == DERIVATION_NONE && + c.collation == &my_charset_bin) + { + unknown_cs= 1; + continue; + } + my_coll_agg_error(av, count, fname.str, item_sep); + return TRUE; + } + } + + if (unknown_cs && + c.derivation != DERIVATION_EXPLICIT) + { + my_coll_agg_error(av, count, fname.str, item_sep); + return TRUE; + } + + if ((flags & MY_COLL_DISALLOW_NONE) && + c.derivation == DERIVATION_NONE) + { + my_coll_agg_error(av, count, fname.str, item_sep); + return TRUE; + } + + /* If all arguments where numbers, reset to @@collation_connection */ + if (flags & MY_COLL_ALLOW_NUMERIC_CONV && + c.derivation == DERIVATION_NUMERIC) + c.set(Item::default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_NUMERIC); + + return FALSE; +} + + +/* + @param single_err When nargs==1, use *single_err as the second aggregated + collation when producing error message. +*/ + +bool Type_std_attributes::agg_item_set_converter(const DTCollation &coll, + const LEX_CSTRING &fname, + Item **args, uint nargs, + uint flags, int item_sep, + const Single_coll_err + *single_err) +{ + THD *thd= current_thd; + if (thd->lex->is_ps_or_view_context_analysis()) + return false; + Item **arg, *safe_args[2]= {NULL, NULL}; + + /* + For better error reporting: save the first and the second argument. + We need this only if the the number of args is 3 or 2: + - for a longer argument list, "Illegal mix of collations" + doesn't display each argument's characteristics. + - if nargs is 1, then this error cannot happen. + */ + if (nargs >=2 && nargs <= 3) + { + safe_args[0]= args[0]; + safe_args[1]= args[item_sep]; + } + + uint i; + + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + + for (i= 0, arg= args; i < nargs; i++, arg+= item_sep) + { + Item* conv= (*arg)->safe_charset_converter(thd, coll.collation); + if (conv == *arg) + continue; + + if (!conv) + { + if (nargs >=2 && nargs <= 3) + { + /* restore the original arguments for better error message */ + args[0]= safe_args[0]; + args[item_sep]= safe_args[1]; + } + if (nargs == 1 && single_err) + { + /* + Use *single_err to produce an error message mentioning two + collations. + */ + if (single_err->first) + my_coll_agg_error(args[0]->collation, single_err->coll, fname.str); + else + my_coll_agg_error(single_err->coll, args[0]->collation, fname.str); + } + else + my_coll_agg_error(args, nargs, fname.str, item_sep); + return TRUE; + } + + if (conv->fix_fields_if_needed(thd, arg)) + return TRUE; + + if (!thd->stmt_arena->is_conventional()) + { + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + Item_direct_ref_to_item *ref= + new (thd->mem_root) Item_direct_ref_to_item(thd, *arg); + if ((ref == NULL) || ref->fix_fields(thd, (Item **)&ref)) + { + if (arena) + thd->restore_active_arena(arena, &backup); + return TRUE; + } + *arg= ref; + if (arena) + thd->restore_active_arena(arena, &backup); + ref->change_item(thd, conv); + } + else + thd->change_item_tree(arg, conv); + } + return FALSE; +} + + +/** + @brief + Building clone for Item_func_or_sum + + @param thd thread handle + @param mem_root part of the memory for the clone + + @details + This method first builds clones of the arguments. If it is successful with + buiding the clones then it constructs a copy of this Item_func_or_sum object + and attaches to it the built clones of the arguments. + + @return clone of the item + @retval 0 on a failure +*/ + +Item* Item_func_or_sum::build_clone(THD *thd) +{ + Item *copy_tmp_args[2]= {0,0}; + Item **copy_args= copy_tmp_args; + if (arg_count > 2) + { + copy_args= static_cast + (alloc_root(thd->mem_root, sizeof(Item*) * arg_count)); + if (unlikely(!copy_args)) + return 0; + } + for (uint i= 0; i < arg_count; i++) + { + Item *arg_clone= args[i]->build_clone(thd); + if (unlikely(!arg_clone)) + return 0; + copy_args[i]= arg_clone; + } + Item_func_or_sum *copy= static_cast(get_copy(thd)); + if (unlikely(!copy)) + return 0; + if (arg_count > 2) + copy->args= copy_args; + else if (arg_count > 0) + { + copy->args= copy->tmp_arg; + memcpy(copy->args, copy_args, sizeof(Item *) * arg_count); + } + return copy; +} + +Item_sp::Item_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name_arg) : + context(context_arg), m_name(name_arg), m_sp(NULL), func_ctx(NULL), + sp_result_field(NULL) +{ + dummy_table= (TABLE*) thd->calloc(sizeof(TABLE) + sizeof(TABLE_SHARE) + + sizeof(Query_arena)); + dummy_table->s= (TABLE_SHARE*) (dummy_table + 1); + sp_query_arena= new(dummy_table->s + 1) Query_arena(); + memset(&sp_mem_root, 0, sizeof(sp_mem_root)); +} + +Item_sp::Item_sp(THD *thd, Item_sp *item): + context(item->context), m_name(item->m_name), + m_sp(item->m_sp), func_ctx(NULL), sp_result_field(NULL) +{ + dummy_table= (TABLE*) thd->calloc(sizeof(TABLE)+ sizeof(TABLE_SHARE) + + sizeof(Query_arena)); + dummy_table->s= (TABLE_SHARE*) (dummy_table+1); + sp_query_arena= new(dummy_table->s + 1) Query_arena(); + memset(&sp_mem_root, 0, sizeof(sp_mem_root)); +} + +LEX_CSTRING +Item_sp::func_name_cstring(THD *thd, bool is_package_function) const +{ + /* Calculate length to avoid reallocation of string for sure */ + size_t len= (((m_name->m_explicit_name ? m_name->m_db.length : 0) + + m_name->m_name.length)*2 + //characters*quoting + 2 + // quotes for the function name + 2 + // quotes for the package name + (m_name->m_explicit_name ? + 3 : 0) + // '`', '`' and '.' for the db + 1 + // '.' between package and function + 1 + // end of string + ALIGN_SIZE(1)); // to avoid String reallocation + String qname((char *)alloc_root(thd->mem_root, len), len, + system_charset_info); + + qname.length(0); + if (m_name->m_explicit_name) + { + append_identifier(thd, &qname, &m_name->m_db); + qname.append('.'); + } + if (is_package_function) + { + /* + In case of a package function split `pkg.func` and print + quoted `pkg` and `func` separately, so the entire result looks like: + `db`.`pkg`.`func` + */ + Database_qualified_name tmp= Database_qualified_name::split(m_name->m_name); + DBUG_ASSERT(tmp.m_db.length); + append_identifier(thd, &qname, &tmp.m_db); + qname.append('.'); + append_identifier(thd, &qname, &tmp.m_name); + } + else + append_identifier(thd, &qname, &m_name->m_name); + return { qname.c_ptr_safe(), qname.length() }; +} + +void +Item_sp::cleanup() +{ + delete sp_result_field; + sp_result_field= NULL; + m_sp= NULL; + delete func_ctx; + func_ctx= NULL; + free_root(&sp_mem_root, MYF(0)); + dummy_table->alias.free(); +} + +/** + @brief Checks if requested access to function can be granted to user. + If function isn't found yet, it searches function first. + If function can't be found or user don't have requested access + error is raised. + + @param thd thread handler + + @return Indication if the access was granted or not. + @retval FALSE Access is granted. + @retval TRUE Requested access can't be granted or function doesn't exists. + +*/ +bool +Item_sp::sp_check_access(THD *thd) +{ + DBUG_ENTER("Item_sp::sp_check_access"); + DBUG_ASSERT(m_sp); + DBUG_RETURN(m_sp->check_execute_access(thd)); +} + +/** + @brief Execute function & store value in field. + + @return Function returns error status. + @retval FALSE on success. + @retval TRUE if an error occurred. +*/ +bool Item_sp::execute(THD *thd, bool *null_value, Item **args, uint arg_count) +{ + if (unlikely(execute_impl(thd, args, arg_count))) + { + *null_value= 1; + process_error(thd); + if (thd->killed) + thd->send_kill_message(); + return true; + } + + /* Check that the field (the value) is not NULL. */ + + *null_value= sp_result_field->is_null(); + return (*null_value); +} + +/** + @brief Execute function and store the return value in the field. + + @note This function was intended to be the concrete implementation of + the interface function execute. This was never realized. + + @return The error state. + @retval FALSE on success + @retval TRUE if an error occurred. +*/ +bool +Item_sp::execute_impl(THD *thd, Item **args, uint arg_count) +{ + Sub_statement_state statement_state; + Security_context *save_security_ctx= thd->security_ctx; + enum enum_sp_data_access access= + (m_sp->daccess() == SP_DEFAULT_ACCESS) ? + SP_DEFAULT_ACCESS_MAPPING : m_sp->daccess(); + + DBUG_ENTER("Item_sp::execute_impl"); + + if (context && context->security_ctx) + { + /* Set view definer security context */ + thd->security_ctx= context->security_ctx; + } + + if (unlikely(sp_check_access(thd))) + { + thd->security_ctx= save_security_ctx; + DBUG_RETURN(TRUE); + } + + /* + Throw an error if a non-deterministic function is called while + statement-based replication (SBR) is active. + */ + + if (unlikely(!m_sp->detistic() && !trust_function_creators && + (access == SP_CONTAINS_SQL || access == SP_MODIFIES_SQL_DATA) && + (mysql_bin_log.is_open() && + thd->variables.binlog_format == BINLOG_FORMAT_STMT))) + { + my_error(ER_BINLOG_UNSAFE_ROUTINE, MYF(0)); + thd->security_ctx= save_security_ctx; + DBUG_RETURN(TRUE); + } + + /* + Disable the binlogging if this is not a SELECT statement. If this is a + SELECT, leave binlogging on, so execute_function() code writes the + function call into binlog. + */ + thd->reset_sub_statement_state(&statement_state, SUB_STMT_FUNCTION); + + /* + If this function is an aggregate function, we want to initialise the + mem_root only once per group. For a regular stored function, we will + initialise once for each call to execute_function. + */ + m_sp->agg_type(); + DBUG_ASSERT(m_sp->agg_type() == GROUP_AGGREGATE || + (m_sp->agg_type() == NOT_AGGREGATE && !func_ctx)); + if (!func_ctx) + { + init_sql_alloc(key_memory_sp_head_call_root, &sp_mem_root, + MEM_ROOT_BLOCK_SIZE, 0, MYF(0)); + *sp_query_arena= Query_arena(&sp_mem_root, + Query_arena::STMT_SP_QUERY_ARGUMENTS); + } + + bool err_status= m_sp->execute_function(thd, args, arg_count, + sp_result_field, &func_ctx, + sp_query_arena); + /* + We free the function context when the function finished executing normally + (quit_func == TRUE) or the function has exited with an error. + */ + if (err_status || func_ctx->quit_func) + { + /* Free Items allocated during function execution. */ + delete func_ctx; + func_ctx= NULL; + sp_query_arena->free_items(); + free_root(&sp_mem_root, MYF(0)); + memset(&sp_mem_root, 0, sizeof(sp_mem_root)); + } + thd->restore_sub_statement_state(&statement_state); + + thd->security_ctx= save_security_ctx; + DBUG_RETURN(err_status); +} + + +/** + @brief Initialize the result field by creating a temporary dummy table + and assign it to a newly created field object. Meta data used to + create the field is fetched from the sp_head belonging to the stored + proceedure found in the stored procedure functon cache. + + @note This function should be called from fix_fields to init the result + field. It is some what related to Item_field. + + @see Item_field + + @param thd A pointer to the session and thread context. + + @return Function return error status. + @retval TRUE is returned on an error + @retval FALSE is returned on success. +*/ + +bool +Item_sp::init_result_field(THD *thd, uint max_length, uint maybe_null, + bool *null_value, LEX_CSTRING *name) +{ + DBUG_ENTER("Item_sp::init_result_field"); + + DBUG_ASSERT(m_sp != NULL); + DBUG_ASSERT(sp_result_field == NULL); + + /* + A Field needs to be attached to a Table. + Below we "create" a dummy table by initializing + the needed pointers. + */ + dummy_table->alias.set("", 0, table_alias_charset); + dummy_table->in_use= thd; + dummy_table->copy_blobs= TRUE; + dummy_table->s->table_cache_key= empty_clex_str; + dummy_table->s->table_name= empty_clex_str; + dummy_table->maybe_null= maybe_null; + + if (!(sp_result_field= m_sp->create_result_field(max_length, name, + dummy_table))) + DBUG_RETURN(TRUE); + + if (sp_result_field->pack_length() > sizeof(result_buf)) + { + void *tmp; + if (!(tmp= thd->alloc(sp_result_field->pack_length()))) + DBUG_RETURN(TRUE); + sp_result_field->move_field((uchar*) tmp); + } + else + sp_result_field->move_field(result_buf); + + sp_result_field->null_ptr= (uchar *) null_value; + sp_result_field->null_bit= 1; + + DBUG_RETURN(FALSE); +} + +/** + @brief + Building clone for Item_ref + + @param thd thread handle + @param mem_root part of the memory for the clone + + @details + This method gets copy of the current item and also + builds clone for its reference. + + @retval + clone of the item + 0 if an error occurred +*/ + +Item* Item_ref::build_clone(THD *thd) +{ + Item_ref *copy= (Item_ref *) get_copy(thd); + if (unlikely(!copy) || + unlikely(!(copy->ref= (Item**) alloc_root(thd->mem_root, + sizeof(Item*)))) || + unlikely(!(*copy->ref= (* ref)->build_clone(thd)))) + return 0; + return copy; +} + + +/**********************************************/ + +Item_field::Item_field(THD *thd, Field *f) + :Item_ident(thd, 0, null_clex_str, + Lex_cstring_strlen(*f->table_name), f->field_name), + item_equal(0), + have_privileges(NO_ACL), any_privileges(0) +{ + set_field(f); + /* + field_name and table_name should not point to garbage + if this item is to be reused + */ + orig_table_name= table_name; + orig_field_name= field_name; + with_flags|= item_with_t::FIELD; +} + + +/** + Constructor used inside setup_wild(). + + Ensures that field, table, and database names will live as long as + Item_field (this is important in prepared statements). +*/ + +Item_field::Item_field(THD *thd, Name_resolution_context *context_arg, + Field *f) + :Item_ident(thd, context_arg, f->table->s->db, + Lex_cstring_strlen(*f->table_name), f->field_name), + item_equal(0), have_privileges(NO_ACL), any_privileges(0) +{ + /* + We always need to provide Item_field with a fully qualified field + name to avoid ambiguity when executing prepared statements like + SELECT * from d1.t1, d2.t1; (assuming d1.t1 and d2.t1 have columns + with same names). + This is because prepared statements never deal with wildcards in + select list ('*') and always fix fields using fully specified path + (i.e. db.table.column). + No check for OOM: if db_name is NULL, we'll just get + "Field not found" error. + We need to copy db_name, table_name and field_name because they must + be allocated in the statement memory, not in table memory (the table + structure can go away and pop up again between subsequent executions + of a prepared statement or after the close_tables_for_reopen() call + in mysql_multi_update_prepare() or due to wildcard expansion in stored + procedures). + */ + { + if (db_name.str) + orig_db_name= thd->strmake_lex_cstring(db_name); + if (table_name.str) + orig_table_name= thd->strmake_lex_cstring(table_name); + if (field_name.str) + orig_field_name= thd->strmake_lex_cstring(field_name); + /* + We don't restore 'name' in cleanup because it's not changed + during execution. Still we need it to point to persistent + memory if this item is to be reused. + */ + name= orig_field_name; + } + set_field(f); + with_flags|= item_with_t::FIELD; +} + + +Item_field::Item_field(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &db_arg, + const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg) + :Item_ident(thd, context_arg, db_arg, table_name_arg, field_name_arg), + field(0), item_equal(0), + have_privileges(NO_ACL), any_privileges(0) +{ + SELECT_LEX *select= thd->lex->current_select; + collation.set(DERIVATION_IMPLICIT); + if (select && select->parsing_place != IN_HAVING) + select->select_n_where_fields++; + with_flags|= item_with_t::FIELD; +} + +/** + Constructor need to process subselect with temporary tables (see Item) +*/ + +Item_field::Item_field(THD *thd, Item_field *item) + :Item_ident(thd, item), + field(item->field), + item_equal(item->item_equal), + have_privileges(item->have_privileges), + any_privileges(item->any_privileges) +{ + collation.set(DERIVATION_IMPLICIT); + with_flags|= item_with_t::FIELD; +} + + +void Item_field::set_field(Field *field_par) +{ + field=result_field=field_par; // for easy coding with fields + set_maybe_null(field->maybe_null()); + Type_std_attributes::set(field_par->type_std_attributes()); + table_name= Lex_cstring_strlen(*field_par->table_name); + field_name= field_par->field_name; + db_name= field_par->table->s->db; + alias_name_used= field_par->table->alias_name_used; + + base_flags|= item_base_t::FIXED; + if (field->table->s->tmp_table == SYSTEM_TMP_TABLE) + any_privileges= 0; + + if (field->table->s->tmp_table == SYSTEM_TMP_TABLE || + field->table->s->tmp_table == INTERNAL_TMP_TABLE) + set_refers_to_temp_table(); +} + + +/** + Reset this item to point to a field from the new temporary table. + This is used when we create a new temporary table for each execution + of prepared statement. +*/ + +void Item_field::reset_field(Field *f) +{ + set_field(f); + /* 'name' is pointing at field->field_name of old field */ + name= f->field_name; +} + + +void Item_field::load_data_print_for_log_event(THD *thd, String *to) const +{ + append_identifier(thd, to, name.str, name.length); +} + + +bool Item_field::load_data_set_no_data(THD *thd, const Load_data_param *param) +{ + if (field->load_data_set_no_data(thd, param->is_fixed_length())) + return true; + /* + TODO: We probably should not throw warning for each field. + But how about intention to always have the same number + of warnings in THD::cuted_fields (and get rid of cuted_fields + in the end ?) + */ + thd->cuted_fields++; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_TOO_FEW_RECORDS, + ER_THD(thd, ER_WARN_TOO_FEW_RECORDS), + thd->get_stmt_da()->current_row_for_warning()); + return false; +} + + +bool Item_field::enumerate_field_refs_processor(void *arg) +{ + Field_enumerator *fe= (Field_enumerator*)arg; + fe->visit_field(this); + return FALSE; +} + +bool Item_field::update_table_bitmaps_processor(void *arg) +{ + update_table_bitmaps(); + return FALSE; +} + +static inline void set_field_to_new_field(Field **field, Field **new_field) +{ + if (*field && (*field)->table == new_field[0]->table) + { + Field *newf= new_field[(*field)->field_index]; + if ((*field)->ptr == newf->ptr) + *field= newf; + } +} + +bool Item_field::switch_to_nullable_fields_processor(void *arg) +{ + Field **new_fields= (Field **)arg; + set_field_to_new_field(&field, new_fields); + set_field_to_new_field(&result_field, new_fields); + set_maybe_null(field && field->maybe_null()); + return 0; +} + +LEX_CSTRING Item_ident::full_name_cstring() const +{ + char *tmp; + size_t length; + if (!table_name.str || !field_name.str) + { + if (field_name.str) + return field_name; + if (name.str) + return name; + return { STRING_WITH_LEN("tmp_field") }; + } + if (db_name.str && db_name.str[0]) + { + THD *thd= current_thd; + tmp=(char*) thd->alloc((uint) db_name.length+ (uint) table_name.length + + (uint) field_name.length+3); + length= (strxmov(tmp,db_name.str,".",table_name.str,".",field_name.str, + NullS) - tmp); + } + else + { + if (!table_name.str[0]) + return field_name; + + THD *thd= current_thd; + tmp= (char*) thd->alloc((uint) table_name.length + + field_name.length + 2); + length= (strxmov(tmp, table_name.str, ".", field_name.str, NullS) - tmp); + } + return {tmp, length}; +} + +void Item_ident::print(String *str, enum_query_type query_type) +{ + THD *thd= current_thd; + char d_name_buff[MAX_ALIAS_NAME], t_name_buff[MAX_ALIAS_NAME]; + LEX_CSTRING d_name= db_name; + LEX_CSTRING t_name= table_name; + bool use_table_name= table_name.str && table_name.str[0]; + bool use_db_name= use_table_name && db_name.str && db_name.str[0] && + !alias_name_used; + + if (use_db_name && (query_type & QT_ITEM_IDENT_SKIP_DB_NAMES)) + use_db_name= !thd->db.str || strcmp(thd->db.str, db_name.str); + + if (use_db_name) + use_db_name= !(cached_table && cached_table->belong_to_view && + cached_table->belong_to_view->compact_view_format); + + if (use_table_name && (query_type & QT_ITEM_IDENT_SKIP_TABLE_NAMES)) + { + /* + Don't print the table name if it's the only table in the context + XXX technically, that's a sufficient, but too strong condition + */ + if (!context) + use_db_name= use_table_name= false; + else if (context->outer_context) + use_table_name= true; + else if (context->last_name_resolution_table == context->first_name_resolution_table) + use_db_name= use_table_name= false; + else if (!context->last_name_resolution_table && + !context->first_name_resolution_table->next_name_resolution_table) + use_db_name= use_table_name= false; + } + + if ((query_type & QT_ITEM_IDENT_DISABLE_DB_TABLE_NAMES)) + { + // Don't print db or table name irrespective of any other settings. + use_db_name= use_table_name= false; + } + + if (!field_name.str || !field_name.str[0]) + { + append_identifier(thd, str, STRING_WITH_LEN("tmp_field")); + return; + } + + if (lower_case_table_names== 1 || + (lower_case_table_names == 2 && !alias_name_used)) + { + if (use_table_name) + { + strmov(t_name_buff, table_name.str); + my_casedn_str(files_charset_info, t_name_buff); + t_name= Lex_cstring_strlen(t_name_buff); + } + if (use_db_name) + { + strmov(d_name_buff, db_name.str); + my_casedn_str(files_charset_info, d_name_buff); + d_name= Lex_cstring_strlen(d_name_buff); + } + } + + if (use_db_name) + { + append_identifier(thd, str, d_name.str, (uint) d_name.length); + str->append('.'); + DBUG_ASSERT(use_table_name); + } + if (use_table_name) + { + append_identifier(thd, str, t_name.str, (uint) t_name.length); + str->append('.'); + } + append_identifier(thd, str, &field_name); +} + +/* ARGSUSED */ +String *Item_field::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + if ((null_value=field->is_null())) + return 0; + str->set_charset(str_value.charset()); + return field->val_str(str,&str_value); +} + + +double Item_field::val_real() +{ + DBUG_ASSERT(fixed()); + if ((null_value=field->is_null())) + return 0.0; + return field->val_real(); +} + + +longlong Item_field::val_int() +{ + DBUG_ASSERT(fixed()); + if ((null_value=field->is_null())) + return 0; + return field->val_int(); +} + + +my_decimal *Item_field::val_decimal(my_decimal *decimal_value) +{ + if ((null_value= field->is_null())) + return 0; + return field->val_decimal(decimal_value); +} + + +String *Item_field::str_result(String *str) +{ + if ((null_value=result_field->is_null())) + return 0; + str->set_charset(str_value.charset()); + return result_field->val_str(str,&str_value); +} + +bool Item_field::get_date(THD *thd, MYSQL_TIME *ltime,date_mode_t fuzzydate) +{ + if ((null_value=field->is_null()) || field->get_date(ltime,fuzzydate)) + { + bzero((char*) ltime,sizeof(*ltime)); + return 1; + } + return 0; +} + +bool Item_field::get_date_result(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + if ((null_value= result_field->is_null()) || + result_field->get_date(ltime, fuzzydate)) + { + bzero((char*) ltime,sizeof(*ltime)); + return true; + } + return false; +} + + +bool Item_field::val_native(THD *thd, Native *to) +{ + return val_native_from_field(field, to); +} + + +bool Item_field::val_native_result(THD *thd, Native *to) +{ + return val_native_from_field(result_field, to); +} + + +longlong Item_field::val_datetime_packed(THD *thd) +{ + DBUG_ASSERT(fixed()); + if ((null_value= field->is_null())) + return 0; + return field->val_datetime_packed(thd); +} + + +longlong Item_field::val_time_packed(THD *thd) +{ + DBUG_ASSERT(fixed()); + if ((null_value= field->is_null())) + return 0; + return field->val_time_packed(thd); +} + + +void Item_field::save_result(Field *to) +{ + save_field_in_field(result_field, &null_value, to, TRUE); +} + + +double Item_field::val_result() +{ + if ((null_value=result_field->is_null())) + return 0.0; + return result_field->val_real(); +} + +longlong Item_field::val_int_result() +{ + if ((null_value=result_field->is_null())) + return 0; + return result_field->val_int(); +} + + +my_decimal *Item_field::val_decimal_result(my_decimal *decimal_value) +{ + if ((null_value= result_field->is_null())) + return 0; + return result_field->val_decimal(decimal_value); +} + + +bool Item_field::val_bool_result() +{ + if ((null_value= result_field->is_null())) + return false; + return result_field->val_bool(); +} + + +bool Item_field::is_null_result() +{ + return (null_value=result_field->is_null()); +} + + +bool Item_field::eq(const Item *item, bool binary_cmp) const +{ + const Item *real_item2= item->real_item(); + if (real_item2->type() != FIELD_ITEM) + return 0; + + Item_field *item_field= (Item_field*) real_item2; + if (item_field->field && field) + return item_field->field == field; + /* + We may come here when we are trying to find a function in a GROUP BY + clause from the select list. + In this case the '100 % correct' way to do this would be to first + run fix_fields() on the GROUP BY item and then retry this function, but + I think it's better to relax the checking a bit as we will in + most cases do the correct thing by just checking the field name. + (In cases where we would choose wrong we would have to generate a + ER_NON_UNIQ_ERROR). + */ + return (!lex_string_cmp(system_charset_info, &item_field->name, + &field_name) && + (!item_field->table_name.str || !table_name.str || + (!my_strcasecmp(table_alias_charset, item_field->table_name.str, + table_name.str) && + (!item_field->db_name.str || !db_name.str || + (item_field->db_name.str && !strcmp(item_field->db_name.str, + db_name.str)))))); +} + + +table_map Item_field::used_tables() const +{ + if (field->table->const_table) + return 0; // const item + return (get_depended_from() ? OUTER_REF_TABLE_BIT : field->table->map); +} + +table_map Item_field::all_used_tables() const +{ + return (get_depended_from() ? OUTER_REF_TABLE_BIT : field->table->map); +} + + +bool Item_field::find_not_null_fields(table_map allowed) +{ + if (field->table->const_table) + return false; + if (!get_depended_from() && field->real_maybe_null()) + bitmap_set_bit(&field->table->tmp_set, field->field_index); + return false; +} + + +/* + @Note thd->fatal_error can be set in case of OOM +*/ + +void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) +{ + if (new_parent == get_depended_from()) + depended_from= NULL; + if (context) + { + bool need_change= false; + /* + Suppose there are nested selects: + + select_id=1 + select_id=2 + select_id=3 <----+ + select_id=4 -+ + select_id=5 --+ + + Suppose, pullout operation has moved anything that had select_id=4 or 5 + in to select_id=3. + + If this Item_field had a name resolution context pointing into select_lex + with id=4 or id=5, it needs a new name resolution context. + + However, it could also be that this object is a part of outer reference: + Item_ref(Item_field(field in select with select_id=1))). + - The Item_ref object has a context with select_id=5, and so needs a new + name resolution context. + - The Item_field object has a context with select_id=1, and doesn't need + a new name resolution context. + + So, the following loop walks from Item_field's current context upwards. + If we find that the select we've been pulled out to is up there, we + create the new name resolution context. Otherwise, we don't. + */ + for (Name_resolution_context *ct= context; ct; ct= ct->outer_context) + { + if (new_parent == ct->select_lex) + { + need_change= true; + break; + } + } + if (!need_change) + return; + + if (!merge) + { + /* + It is transformation without merge. + This field was "outer" for the inner SELECT where it was taken and + moved up. + "Outer" fields uses normal SELECT_LEX context of upper SELECTs for + name resolution, so we can switch everything to it safely. + */ + this->context= &new_parent->context; + return; + } + + Name_resolution_context *ctx= new Name_resolution_context(); + if (!ctx) + return; // Fatal error set + if (context->select_lex == new_parent) + { + /* + This field was pushed in then pulled out + (for example left part of IN) + */ + ctx->outer_context= context->outer_context; + } + else if (context->outer_context) + { + /* just pull to the upper context */ + ctx->outer_context= context->outer_context->outer_context; + } + ctx->table_list= context->first_name_resolution_table; + ctx->select_lex= new_parent; + if (context->select_lex == NULL) + ctx->select_lex= NULL; + ctx->first_name_resolution_table= context->first_name_resolution_table; + ctx->last_name_resolution_table= context->last_name_resolution_table; + ctx->error_processor= context->error_processor; + ctx->error_processor_data= context->error_processor_data; + ctx->resolve_in_select_list= context->resolve_in_select_list; + ctx->security_ctx= context->security_ctx; + this->context=ctx; + } +} + + +Item *Item_field::get_tmp_table_item(THD *thd) +{ + Item_field *new_item= new (thd->mem_root) Item_field(thd, this); + if (new_item) + { + new_item->field= new_item->result_field; + new_item->set_refers_to_temp_table(); + } + return new_item; +} + +longlong Item_field::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + longlong res= val_int(); + return null_value? LONGLONG_MIN : res; +} + +void Item_field::set_refers_to_temp_table() +{ + /* + Derived temp. tables have non-zero derived_select_number. + We don't need to distingish between other kinds of temp.tables currently. + */ + refers_to_temp_table= (field->table->derived_select_number != 0)? + REFERS_TO_DERIVED_TMP : REFERS_TO_OTHER_TMP; +} + + +bool Item_basic_value::eq(const Item *item, bool binary_cmp) const +{ + const Item_const *c0, *c1; + const Type_handler *h0, *h1; + /* + - Test get_item_const() for NULL filters out Item_param + bound in a way that needs a data type conversion + (e.g. non-integer value in a LIMIT clause). + Item_param::get_item_const() return NULL in such cases. + - Test for type_handler_for_comparison() equality makes sure + that values of different data type groups do not get detected + as equal (e.g. numbers vs strings, time vs datetime). + - Test for cast_to_int_type_handler() equality distinguishes + values with dual properties. For example, VARCHAR 'abc' and hex + hybrid 0x616263 are equal in string context, but they are not equal + if the hybrid appears in integer context (it behaves as integer then). + Here we have no full information about the context, so treat them + as not equal. + QQ: We could pass Value_source::Context here instead of + "bool binary_cmp", to make substitution more delicate. + See Field::get_equal_const_item(). + */ + bool res= (c0= get_item_const()) && + (c1= item->get_item_const()) && + (h0= type_handler())->type_handler_for_comparison() == + (h1= item->type_handler())->type_handler_for_comparison() && + h0->cast_to_int_type_handler()->type_handler_for_comparison() == + h1->cast_to_int_type_handler()->type_handler_for_comparison(); + if (res) + { + switch (c0->const_is_null() + c1->const_is_null()) { + case 2: // Two NULLs + res= true; + break; + case 1: // NULL and non-NULL + res= false; + break; + case 0: // Two non-NULLs + res= h0->Item_const_eq(c0, c1, binary_cmp); + } + } + DBUG_EXECUTE_IF("Item_basic_value", + push_warning_printf(current_thd, + Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "%seq=%d a=%s b=%s", + binary_cmp ? "bin_" : "", (int) res, + DbugStringItemTypeValue(current_thd, this).c_ptr(), + DbugStringItemTypeValue(current_thd, item).c_ptr() + );); + return res; +} + + +/** + Create an item from a string we KNOW points to a valid longlong + end \\0 terminated number string. + This is always 'signed'. Unsigned values are created with Item_uint() +*/ + +Item_int::Item_int(THD *thd, const char *str_arg, size_t length): + Item_num(thd) +{ + char *end_ptr= (char*) str_arg + length; + int error; + value= my_strtoll10(str_arg, &end_ptr, &error); + max_length= (uint) (end_ptr - str_arg); + name.str= str_arg; + /* + We can't trust max_length as in show_routine_code we are using "Pos" as + the field name. + */ + name.length= !str_arg[max_length] ? max_length : strlen(str_arg); +} + + +my_decimal *Item_int::val_decimal(my_decimal *decimal_value) +{ + int2my_decimal(E_DEC_FATAL_ERROR, value, unsigned_flag, decimal_value); + return decimal_value; +} + +String *Item_int::val_str(String *str) +{ + str->set_int(value, unsigned_flag, collation.collation); + return str; +} + + +void Item_int::print(String *str, enum_query_type query_type) +{ + StringBuffer buf; + // my_charset_bin is good enough for numbers + buf.set_int(value, unsigned_flag, &my_charset_bin); + str->append(buf); +} + + +Item *Item_bool::neg_transformer(THD *thd) +{ + value= !value; + name= null_clex_str; + return this; +} + + +Item_uint::Item_uint(THD *thd, const char *str_arg, size_t length): + Item_int(thd, str_arg, length) +{ + unsigned_flag= 1; +} + + +Item_uint::Item_uint(THD *thd, const char *str_arg, longlong i, uint length): + Item_int(thd, str_arg, i, length) +{ + unsigned_flag= 1; +} + + +Item_decimal::Item_decimal(THD *thd, const char *str_arg, size_t length, + CHARSET_INFO *charset): + Item_num(thd) +{ + str2my_decimal(E_DEC_FATAL_ERROR, str_arg, length, charset, &decimal_value); + name.str= str_arg; + name.length= safe_strlen(str_arg); + decimals= (uint8) decimal_value.frac; + max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg + + decimals, + decimals, + unsigned_flag); +} + +Item_decimal::Item_decimal(THD *thd, longlong val, bool unsig): + Item_num(thd) +{ + int2my_decimal(E_DEC_FATAL_ERROR, val, unsig, &decimal_value); + decimals= (uint8) decimal_value.frac; + max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg + + decimals, + decimals, + unsigned_flag); +} + + +Item_decimal::Item_decimal(THD *thd, double val, int precision, int scale): + Item_num(thd) +{ + double2my_decimal(E_DEC_FATAL_ERROR, val, &decimal_value); + decimals= (uint8) decimal_value.frac; + max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg + + decimals, + decimals, + unsigned_flag); +} + + +Item_decimal::Item_decimal(THD *thd, const char *str, const my_decimal *val_arg, + uint decimal_par, uint length): + Item_num(thd) +{ + my_decimal2decimal(val_arg, &decimal_value); + name.str= str; + name.length= safe_strlen(str); + decimals= (uint8) decimal_par; + max_length= length; +} + + +Item_decimal::Item_decimal(THD *thd, const my_decimal *value_par): + Item_num(thd) +{ + my_decimal2decimal(value_par, &decimal_value); + decimals= (uint8) decimal_value.frac; + max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg + + decimals, + decimals, + unsigned_flag); +} + + +Item_decimal::Item_decimal(THD *thd, const uchar *bin, int precision, int scale): + Item_num(thd), + decimal_value(bin, precision, scale) +{ + decimals= (uint8) decimal_value.frac; + max_length= my_decimal_precision_to_length_no_truncation(precision, decimals, + unsigned_flag); +} + + +void Item_decimal::set_decimal_value(my_decimal *value_par) +{ + my_decimal2decimal(value_par, &decimal_value); + decimals= (uint8) decimal_value.frac; + unsigned_flag= !decimal_value.sign(); + max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg + + decimals, + decimals, + unsigned_flag); +} + + +Item *Item_decimal::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_decimal(thd, name.str, &decimal_value, decimals, + max_length); +} + + +String *Item_float::val_str(String *str) +{ + str->set_real(value, decimals, &my_charset_numeric); + return str; +} + + +my_decimal *Item_float::val_decimal(my_decimal *decimal_value) +{ + double2my_decimal(E_DEC_FATAL_ERROR, value, decimal_value); + return (decimal_value); +} + + +Item *Item_float::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_float(thd, name.str, value, decimals, + max_length); +} + + +void Item_string::print(String *str, enum_query_type query_type) +{ + const bool print_introducer= + !(query_type & QT_WITHOUT_INTRODUCERS) && is_cs_specified(); + if (print_introducer) + { + str->append('_'); + str->append(collation.collation->cs_name); + } + + str->append('\''); + + if (query_type & QT_TO_SYSTEM_CHARSET) + { + if (print_introducer) + { + /* + Because we wrote an introducer, we must print str_value in its + charset, and the resulting bytes must not be changed until they + reach the end client. + But the caller is asking for system_charset_info, and may later + convert into character_set_results. That means two conversions: we + must ensure that they don't change our printed bytes. + So we print str_value in the least common denominator of the three + charsets involved: ASCII. Non-ASCII characters are printed as \xFF + sequences (which is ASCII too). This way, our bytes will not be + changed. + */ + ErrConvString tmp(str_value.ptr(), str_value.length(), &my_charset_bin); + str->append(tmp.lex_cstring()); + } + else + { + str_value.print(str, system_charset_info); + } + } + else + { + /* + We're restoring a parse-able statement from an Item tree. + Make sure to revert character set conversions that previously + happened in the parser when Item_string was created. + */ + if (print_introducer) + { + /* + Print the string as is, without conversion: + Strings with introducers are not converted in the parser. + */ + str_value.print(str); + } + else + { + /* + Print the string with conversion. + Strings without introducers are converted in the parser, + from character_set_client to character_set_connection. + + When restoring a CREATE VIEW statement, + - str_value.charsets() contains parse time character_set_connection + - str->charset() contains parse time character_set_client + So we convert the string back from parse-time character_set_connection + to parse time character_set_client. + + In some cases, e.g. SHOW PROCEDURE CODE, it's also possible + that str->charset() is "utf8mb3" instead of parse time + character_set_client. In these cases we convert + here from the parse-time character_set_connection to utf8mb3. + + QQ: perhaps the code behind SHOW PROCEDURE CODE should + also request the result in the parse-time character_set_client + (like the code restoring CREATE VIEW statements does), + rather than in utf8mb3: + - utf8mb3 does not work well with non-BMP characters (e.g. emoji). + - Simply changing utf8mb3 to utf8mb4 will not fully help: + some character sets have unassigned characters, + they get lost during during cs->utf8mb4->cs round trip. + */ + str_value.print_with_conversion(str, str->charset()); + } + } + + str->append('\''); +} + + +double Item_string::val_real() +{ + return double_from_string_with_check(&str_value); +} + + +/** + @todo + Give error if we wanted a signed integer and we got an unsigned one +*/ +longlong Item_string::val_int() +{ + return longlong_from_string_with_check(&str_value); +} + + +my_decimal *Item_string::val_decimal(my_decimal *decimal_value) +{ + return val_decimal_from_string(decimal_value); +} + + +double Item_null::val_real() +{ + null_value=1; + return 0.0; +} +longlong Item_null::val_int() +{ + null_value=1; + return 0; +} +/* ARGSUSED */ +String *Item_null::val_str(String *str) +{ + null_value=1; + return 0; +} + +my_decimal *Item_null::val_decimal(my_decimal *decimal_value) +{ + return 0; +} + + +longlong Item_null::val_datetime_packed(THD *) +{ + null_value= true; + return 0; +} + + +longlong Item_null::val_time_packed(THD *) +{ + null_value= true; + return 0; +} + + +bool Item_null::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + set_zero_time(ltime, MYSQL_TIMESTAMP_NONE); + return (null_value= true); +} + + +Item *Item_null::safe_charset_converter(THD *thd, CHARSET_INFO *tocs) +{ + return this; +} + +Item *Item_null::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_null(thd, name.str); +} + + +Item_basic_constant * +Item_null::make_string_literal_concat(THD *thd, const LEX_CSTRING *str) +{ + DBUG_ASSERT(thd->variables.sql_mode & MODE_EMPTY_STRING_IS_NULL); + if (str->length) + { + CHARSET_INFO *cs= thd->variables.collation_connection; + my_repertoire_t repertoire= my_string_repertoire(cs, str->str, str->length); + return new (thd->mem_root) Item_string(thd, + str->str, (uint) str->length, cs, + DERIVATION_COERCIBLE, repertoire); + } + return this; +} + + +/*********************** Item_param related ******************************/ + +Item_param::Item_param(THD *thd, const LEX_CSTRING *name_arg, + uint pos_in_query_arg, uint len_in_query_arg): + Item_basic_value(thd), + Rewritable_query_parameter(pos_in_query_arg, len_in_query_arg), + /* + Set handler to type_handler_null. Its data type test methods such as: + - is_scalar_type() + - can_return_int() + - can_return_real(), + - is_general_purpose_string_type() + all return "true". This is needed to avoid any "illegal parameter type" + errors in Item::check_type_xxx() at PS prepare time. + */ + Type_handler_hybrid_field_type(&type_handler_null), + state(NO_VALUE), + m_empty_string_is_null(false), + indicator(STMT_INDICATOR_NONE), + m_out_param_info(NULL), + /* + Set m_is_settable_routine_parameter to "true" by default. + This is needed for client-server protocol, + whose parameters are always settable. + For dynamic SQL, settability depends on the type of Item passed + as an actual parameter. See Item_param::set_from_item(). + */ + m_is_settable_routine_parameter(true), + m_clones(thd->mem_root) +{ + name= *name_arg; + /* + Since we can't say whenever this item can be NULL or cannot be NULL + before mysql_stmt_execute(), so we assuming that it can be NULL until + value is set. + */ + set_maybe_null(); + with_flags= with_flags | item_with_t::PARAM; +} + + +/* Add reference to Item_param used in a copy of CTE to its master as a clone */ + +bool Item_param::add_as_clone(THD *thd) +{ + LEX *lex= thd->lex; + my_ptrdiff_t master_pos= pos_in_query + lex->clone_spec_offset; + List_iterator_fast it(lex->param_list); + Item_param *master_param; + while ((master_param = it++)) + { + if (master_pos == master_param->pos_in_query) + return master_param->register_clone(this); + } + DBUG_ASSERT(false); + return false; +} + + +/* Update all clones of Item_param to sync their values with the item's value */ + +void Item_param::sync_clones() +{ + Item_param **c_ptr= m_clones.begin(); + Item_param **end= m_clones.end(); + for ( ; c_ptr < end; c_ptr++) + { + Item_param *c= *c_ptr; + /* Scalar-type members: */ + c->copy_flags(this, item_base_t::MAYBE_NULL); + c->null_value= null_value; + c->Type_std_attributes::operator=(*this); + c->Type_handler_hybrid_field_type::operator=(*this); + + c->state= state; + c->m_empty_string_is_null= m_empty_string_is_null; + + c->value.PValue_simple::operator=(value); + c->value.Type_handler_hybrid_field_type::operator=(value); + type_handler()->Item_param_setup_conversion(current_thd, c); + + /* Class-type members: */ + c->value.m_decimal= value.m_decimal; + /* + Note that String's assignment op properly sets m_is_alloced to 'false', + which is correct here: c->str_value doesn't own anything. + */ + c->value.m_string= value.m_string; + c->value.m_string_ptr= value.m_string_ptr; + } +} + + +void Item_param::set_null() +{ + DBUG_ENTER("Item_param::set_null"); + /* + These are cleared after each execution by reset() method or by setting + other value. + */ + null_value= 1; + /* + Because of NULL and string values we need to set max_length for each new + placeholder value: user can submit NULL for any placeholder type, and + string length can be different in each execution. + */ + max_length= 0; + decimals= 0; + state= NULL_VALUE; + DBUG_VOID_RETURN; +} + +void Item_param::set_int(longlong i, uint32 max_length_arg) +{ + DBUG_ENTER("Item_param::set_int"); + DBUG_ASSERT(value.type_handler()->cmp_type() == INT_RESULT); + value.integer= (longlong) i; + state= SHORT_DATA_VALUE; + collation= DTCollation_numeric(); + max_length= max_length_arg; + decimals= 0; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + DBUG_VOID_RETURN; +} + +void Item_param::set_double(double d) +{ + DBUG_ENTER("Item_param::set_double"); + DBUG_ASSERT(value.type_handler()->cmp_type() == REAL_RESULT); + value.real= d; + state= SHORT_DATA_VALUE; + collation= DTCollation_numeric(); + max_length= DBL_DIG + 8; + decimals= NOT_FIXED_DEC; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + DBUG_VOID_RETURN; +} + + +/** + Set decimal parameter value from string. + + @param str character string + @param length string length + + @note + As we use character strings to send decimal values in + binary protocol, we use str2my_decimal to convert it to + internal decimal value. +*/ + +void Item_param::set_decimal(const char *str, ulong length) +{ + char *end; + DBUG_ENTER("Item_param::set_decimal"); + DBUG_ASSERT(value.type_handler()->cmp_type() == DECIMAL_RESULT); + + end= (char*) str+length; + str2my_decimal(E_DEC_FATAL_ERROR, str, &value.m_decimal, &end); + state= SHORT_DATA_VALUE; + decimals= value.m_decimal.frac; + collation= DTCollation_numeric(); + max_length= + my_decimal_precision_to_length_no_truncation(value.m_decimal.precision(), + decimals, unsigned_flag); + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + DBUG_VOID_RETURN; +} + +void Item_param::set_decimal(const my_decimal *dv, bool unsigned_arg) +{ + DBUG_ASSERT(value.type_handler()->cmp_type() == DECIMAL_RESULT); + state= SHORT_DATA_VALUE; + + my_decimal2decimal(dv, &value.m_decimal); + + decimals= (uint8) value.m_decimal.frac; + collation= DTCollation_numeric(); + unsigned_flag= unsigned_arg; + max_length= my_decimal_precision_to_length(value.m_decimal.intg + decimals, + decimals, unsigned_flag); + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; +} + + +void Item_param::fix_temporal(uint32 max_length_arg, uint decimals_arg) +{ + state= SHORT_DATA_VALUE; + collation= DTCollation_numeric(); + max_length= max_length_arg; + decimals= decimals_arg; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; +} + + +void Item_param::set_time(const MYSQL_TIME *tm, + uint32 max_length_arg, uint decimals_arg) +{ + DBUG_ASSERT(value.type_handler()->cmp_type() == TIME_RESULT); + value.time= *tm; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + fix_temporal(max_length_arg, decimals_arg); +} + + +/** + Set parameter value from MYSQL_TIME value. + + @param tm datetime value to set (time_type is ignored) + @param type type of datetime value + @param max_length_arg max length of datetime value as string + + @note + If we value to be stored is not normalized, zero value will be stored + instead and proper warning will be produced. This function relies on + the fact that even wrong value sent over binary protocol fits into + MAX_DATE_STRING_REP_LENGTH buffer. +*/ +void Item_param::set_time(MYSQL_TIME *tm, timestamp_type time_type, + uint32 max_length_arg) +{ + DBUG_ENTER("Item_param::set_time"); + DBUG_ASSERT(value.type_handler()->cmp_type() == TIME_RESULT); + + value.time= *tm; + value.time.time_type= time_type; + + if (check_datetime_range(&value.time)) + { + ErrConvTime str(&value.time); + make_truncated_value_warning(current_thd, Sql_condition::WARN_LEVEL_WARN, + &str, time_type, NULL, NULL, NULL); + set_zero_time(&value.time, time_type); + } + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + fix_temporal(max_length_arg, + tm->second_part > 0 ? TIME_SECOND_PART_DIGITS : 0); + DBUG_VOID_RETURN; +} + + +bool Item_param::set_str(const char *str, ulong length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) +{ + DBUG_ENTER("Item_param::set_str"); + DBUG_ASSERT(value.type_handler()->cmp_type() == STRING_RESULT); + /* + Assign string with no conversion: data is converted only after it's + been written to the binary log. + */ + uint dummy_errors; + if (unlikely(value.m_string.copy(str, length, fromcs, tocs, &dummy_errors))) + DBUG_RETURN(TRUE); + /* + Set str_value_ptr to make sure it's in sync with str_value. + This is needed in case if we're called from Item_param::set_value(), + from the code responsible for setting OUT parameters in + sp_head::execute_procedure(). This makes sure that + Protocol_binary::send_out_parameters() later gets a valid value + from Item_param::val_str(). + Note, for IN parameters, Item_param::convert_str_value() will be called + later, which will convert the value from the client character set to the + connection character set, and will reset both str_value and str_value_ptr. + */ + value.m_string_ptr.set(value.m_string.ptr(), + value.m_string.length(), + value.m_string.charset()); + state= SHORT_DATA_VALUE; + collation.set(tocs, DERIVATION_COERCIBLE); + max_length= length; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + /* max_length and decimals are set after charset conversion */ + /* sic: str may be not null-terminated, don't add DBUG_PRINT here */ + DBUG_RETURN(FALSE); +} + + +bool Item_param::set_longdata(const char *str, ulong length) +{ + DBUG_ENTER("Item_param::set_longdata"); + DBUG_ASSERT(value.type_handler()->cmp_type() == STRING_RESULT); + + /* + If client character set is multibyte, end of long data packet + may hit at the middle of a multibyte character. Additionally, + if binary log is open we must write long data value to the + binary log in character set of client. This is why we can't + convert long data to connection character set as it comes + (here), and first have to concatenate all pieces together, + write query to the binary log and only then perform conversion. + */ + if (value.m_string.length() + length > current_thd->variables.max_allowed_packet) + { + my_message(ER_UNKNOWN_ERROR, + "Parameter of prepared statement which is set through " + "mysql_send_long_data() is longer than " + "'max_allowed_packet' bytes", + MYF(0)); + DBUG_RETURN(true); + } + + if (value.m_string.append(str, length, &my_charset_bin)) + DBUG_RETURN(TRUE); + state= LONG_DATA_VALUE; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + + DBUG_RETURN(FALSE); +} + + +void Item_param::CONVERSION_INFO::set(THD *thd, CHARSET_INFO *fromcs) +{ + CHARSET_INFO *tocs= thd->variables.collation_connection; + + character_set_of_placeholder= fromcs; + character_set_client= thd->variables.character_set_client; + /* + Setup source and destination character sets so that they + are different only if conversion is necessary: this will + make later checks easier. + */ + uint32 dummy_offset; + final_character_set_of_str_value= + String::needs_conversion(0, fromcs, tocs, &dummy_offset) ? + tocs : fromcs; +} + + +bool Item_param::CONVERSION_INFO::convert(THD *thd, String *str) +{ + return thd->convert_string(str, + character_set_of_placeholder, + final_character_set_of_str_value); +} + + +/** + Set parameter value from Item. + + @param thd Current thread + @param item Item + + @retval + 0 OK + @retval + 1 Out of memory +*/ + +bool Item_param::set_from_item(THD *thd, Item *item) +{ + DBUG_ENTER("Item_param::set_from_item"); + m_is_settable_routine_parameter= item->get_settable_routine_parameter(); + if (limit_clause_param) + { + longlong val= item->val_int(); + if (item->null_value) + { + set_null(); + DBUG_RETURN(false); + } + else + { + unsigned_flag= item->unsigned_flag; + set_handler(item->type_handler()); + DBUG_RETURN(set_limit_clause_param(val)); + } + } + st_value tmp; + if (!item->save_in_value(thd, &tmp)) + { + const Type_handler *h= item->type_handler(); + set_handler(h); + DBUG_RETURN(set_value(thd, item, &tmp, h)); + } + else + set_null(); + + DBUG_RETURN(0); +} + +/** + Resets parameter after execution. + + @note + We clear null_value here instead of setting it in set_* methods, + because we want more easily handle case for long data. +*/ + +void Item_param::reset() +{ + DBUG_ENTER("Item_param::reset"); + /* Shrink string buffer if it's bigger than max possible CHAR column */ + if (value.m_string.alloced_length() > MAX_CHAR_WIDTH) + value.m_string.free(); + else + value.m_string.length(0); + value.m_string_ptr.length(0); + /* + We must prevent all charset conversions until data has been written + to the binary log. + */ + value.m_string.set_charset(&my_charset_bin); + collation.set(&my_charset_bin, DERIVATION_COERCIBLE); + state= NO_VALUE; + set_maybe_null(); + null_value= 0; + DBUG_VOID_RETURN; +} + + +int Item_param::save_in_field(Field *field, bool no_conversions) +{ + field->set_notnull(); + + /* + There's no "default" intentionally, to make compiler complain + when adding a new XXX_VALUE value. + Garbage (e.g. in case of a memory overrun) is handled after the switch. + */ + switch (state) { + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + return value.type_handler()->Item_save_in_field(this, field, no_conversions); + case NULL_VALUE: + return set_field_to_null_with_conversions(field, no_conversions); + case DEFAULT_VALUE: + return field->save_in_field_default_value(field->table->pos_in_table_list-> + top_table() != + field->table->pos_in_table_list); + case IGNORE_VALUE: + return field->save_in_field_ignore_value(field->table->pos_in_table_list-> + top_table() != + field->table->pos_in_table_list); + case NO_VALUE: + DBUG_ASSERT(0); // Should not be possible + return true; + } + DBUG_ASSERT(0); // Garbage + return 1; +} + + +bool Item_param::is_evaluable_expression() const +{ + switch (state) { + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + case NULL_VALUE: + return true; + case NO_VALUE: + return true; // Not assigned yet, so we don't know + case IGNORE_VALUE: + case DEFAULT_VALUE: + break; + } + return false; +} + + +bool Item_param::check_assignability_to(const Field *to, bool ignore) const +{ + switch (state) { + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + case NULL_VALUE: + return to->check_assignability_from(type_handler(), ignore); + case NO_VALUE: + case IGNORE_VALUE: + case DEFAULT_VALUE: + break; + } + return false; +} + + +bool Item_param::can_return_value() const +{ + // There's no "default". See comments in Item_param::save_in_field(). + switch (state) { + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + return true; + case IGNORE_VALUE: + case DEFAULT_VALUE: + invalid_default_param(); + // fall through + case NULL_VALUE: + return false; + case NO_VALUE: + DBUG_ASSERT(0); // Should not be possible + return false; + } + DBUG_ASSERT(0); // Garbage + return false; +} + + +void Item_param::invalid_default_param() const +{ + my_message(ER_INVALID_DEFAULT_PARAM, + ER_THD(current_thd, ER_INVALID_DEFAULT_PARAM), MYF(0)); +} + + +bool Item_param::get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) +{ + /* + LIMIT clause parameter should not call get_date() + For non-LIMIT parameters, handlers must be the same. + */ + DBUG_ASSERT(type_handler()->result_type() == + value.type_handler()->result_type()); + if (state == SHORT_DATA_VALUE && + value.type_handler()->cmp_type() == TIME_RESULT) + { + *res= value.time; + return 0; + } + return type_handler()->Item_get_date_with_warn(thd, this, res, fuzzydate); +} + + +double Item_param::PValue::val_real(const Type_std_attributes *attr) const +{ + switch (type_handler()->cmp_type()) { + case REAL_RESULT: + return real; + case INT_RESULT: + return attr->unsigned_flag + ? (double) (ulonglong) integer + : (double) integer; + case DECIMAL_RESULT: + return m_decimal.to_double(); + case STRING_RESULT: + return double_from_string_with_check(&m_string); + case TIME_RESULT: + /* + This works for example when user says SELECT ?+0.0 and supplies + time value for the placeholder. + */ + return TIME_to_double(&time); + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return 0.0; +} + + +longlong Item_param::PValue::val_int(const Type_std_attributes *attr) const +{ + switch (type_handler()->cmp_type()) { + case REAL_RESULT: + return Converter_double_to_longlong(real, attr->unsigned_flag).result(); + case INT_RESULT: + return integer; + case DECIMAL_RESULT: + return m_decimal.to_longlong(attr->unsigned_flag); + case STRING_RESULT: + return longlong_from_string_with_check(&m_string); + case TIME_RESULT: + return (longlong) TIME_to_ulonglong(&time); + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return 0; +} + + +my_decimal *Item_param::PValue::val_decimal(my_decimal *dec, + const Type_std_attributes *attr) +{ + switch (type_handler()->cmp_type()) { + case DECIMAL_RESULT: + return &m_decimal; + case REAL_RESULT: + double2my_decimal(E_DEC_FATAL_ERROR, real, dec); + return dec; + case INT_RESULT: + int2my_decimal(E_DEC_FATAL_ERROR, integer, attr->unsigned_flag, dec); + return dec; + case STRING_RESULT: + return decimal_from_string_with_check(dec, &m_string); + case TIME_RESULT: + return TIME_to_my_decimal(&time, dec); + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return 0; +} + + +String *Item_param::PValue::val_str(String *str, + const Type_std_attributes *attr) +{ + switch (type_handler()->cmp_type()) { + case STRING_RESULT: + return &m_string_ptr; + case REAL_RESULT: + str->set_real(real, NOT_FIXED_DEC, &my_charset_bin); + return str; + case INT_RESULT: + str->set_int(integer, attr->unsigned_flag, &my_charset_bin); + return str; + case DECIMAL_RESULT: + if (m_decimal.to_string_native(str, 0, 0, 0) <= 1) + return str; + return NULL; + case TIME_RESULT: + { + if (str->reserve(MAX_DATE_STRING_REP_LENGTH)) + return NULL; + str->length((uint) my_TIME_to_str(&time, (char*) str->ptr(), + attr->decimals)); + str->set_charset(&my_charset_bin); + return str; + } + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return NULL; +} + + +/** + Return Param item values in string format, for generating the dynamic + query used in update/binary logs. + + @todo + - Change interface and implementation to fill log data in place + and avoid one more memcpy/alloc between str and log string. + - In case of error we need to notify replication + that binary log contains wrong statement +*/ + +const String *Item_param::value_query_val_str(THD *thd, String *str) const +{ + switch (value.type_handler()->cmp_type()) { + case INT_RESULT: + str->set_int(value.integer, unsigned_flag, &my_charset_bin); + return str; + case REAL_RESULT: + str->set_real(value.real, NOT_FIXED_DEC, &my_charset_bin); + return str; + case DECIMAL_RESULT: + if (value.m_decimal.to_string_native(str, 0, 0, 0) > 1) + return &my_null_string; + return str; + case TIME_RESULT: + { + static const uint32 typelen= 9; // "TIMESTAMP" is the longest type name + char *buf, *ptr; + str->length(0); + /* + TODO: in case of error we need to notify replication + that binary log contains wrong statement + */ + if (str->reserve(MAX_DATE_STRING_REP_LENGTH + 3 + typelen)) + return NULL; + + /* Create date string inplace */ + switch (value.time.time_type) { + case MYSQL_TIMESTAMP_DATE: + str->append(STRING_WITH_LEN("DATE")); + break; + case MYSQL_TIMESTAMP_TIME: + str->append(STRING_WITH_LEN("TIME")); + break; + case MYSQL_TIMESTAMP_DATETIME: + str->append(STRING_WITH_LEN("TIMESTAMP")); + break; + case MYSQL_TIMESTAMP_ERROR: + case MYSQL_TIMESTAMP_NONE: + break; + } + DBUG_ASSERT(str->length() <= typelen); + buf= (char*) str->ptr(); + ptr= buf + str->length(); + *ptr++= '\''; + ptr+= (uint) my_TIME_to_str(&value.time, ptr, decimals); + *ptr++= '\''; + str->length((uint32) (ptr - buf)); + return str; + } + case STRING_RESULT: + { + str->length(0); + append_query_string(value.cs_info.character_set_client, str, + value.m_string.ptr(), value.m_string.length(), + thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES); + return str; + } + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return NULL; +} + + +const String *Item_param::query_val_str(THD *thd, String* str) const +{ + // There's no "default". See comments in Item_param::save_in_field(). + switch (state) { + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + return value_query_val_str(thd, str); + case IGNORE_VALUE: + case DEFAULT_VALUE: + return &my_default_string; + case NULL_VALUE: + return &my_null_string; + case NO_VALUE: + DBUG_ASSERT(0); // Should not be possible + return NULL; + } + DBUG_ASSERT(0); // Garbage + return NULL; +} + + +/** + Convert string from client character set to the character set of + connection. +*/ + +bool Item_param::convert_str_value(THD *thd) +{ + bool rc= FALSE; + if ((state == SHORT_DATA_VALUE || state == LONG_DATA_VALUE) && + value.type_handler()->cmp_type() == STRING_RESULT) + { + rc= value.cs_info.convert_if_needed(thd, &value.m_string); + /* Here str_value is guaranteed to be in final_character_set_of_str_value */ + + /* + str_value_ptr is returned from val_str(). It must be not alloced + to prevent it's modification by val_str() invoker. + */ + value.m_string_ptr.set(value.m_string.ptr(), value.m_string.length(), + value.m_string.charset()); + /* Synchronize item charset and length with value charset */ + fix_charset_and_length_from_str_value(value.m_string, DERIVATION_COERCIBLE); + } + return rc; +} + + +bool Item_param::basic_const_item() const +{ + switch (state) { + case LONG_DATA_VALUE: + case NULL_VALUE: + return true; + case SHORT_DATA_VALUE: + return type_handler()->cmp_type() != TIME_RESULT; + case DEFAULT_VALUE: + case IGNORE_VALUE: + invalid_default_param(); + return false; + case NO_VALUE: + break; + } + return false; +} + + +Item *Item_param::value_clone_item(THD *thd) +{ + MEM_ROOT *mem_root= thd->mem_root; + switch (value.type_handler()->cmp_type()) { + case INT_RESULT: + return (unsigned_flag ? + new (mem_root) Item_uint(thd, name.str, value.integer, max_length) : + new (mem_root) Item_int(thd, name.str, value.integer, max_length)); + case REAL_RESULT: + return new (mem_root) Item_float(thd, name.str, value.real, decimals, + max_length); + case DECIMAL_RESULT: + return 0; // Should create Item_decimal. See MDEV-11361. + case STRING_RESULT: + return new (mem_root) Item_string(thd, name, + Lex_cstring(value.m_string.ptr(), + value.m_string.length()), + value.m_string.charset(), + collation.derivation, + collation.repertoire); + case TIME_RESULT: + break; + case ROW_RESULT: + DBUG_ASSERT(0); + break; + } + return 0; +} + + +/* see comments in the header file */ + +Item * +Item_param::clone_item(THD *thd) +{ + // There's no "default". See comments in Item_param::save_in_field(). + switch (state) { + case IGNORE_VALUE: + case DEFAULT_VALUE: + invalid_default_param(); + // fall through + case NULL_VALUE: + return new (thd->mem_root) Item_null(thd, name.str); + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + { + DBUG_ASSERT(type_handler()->cmp_type() == value.type_handler()->cmp_type()); + return value_clone_item(thd); + } + case NO_VALUE: + return 0; + } + DBUG_ASSERT(0); // Garbage + return 0; +} + + +/* End of Item_param related */ + +void Item_param::print(String *str, enum_query_type query_type) +{ + if (state == NO_VALUE) + { + str->append('?'); + } + else if (state == DEFAULT_VALUE) + { + str->append(STRING_WITH_LEN("default")); + } + else if (state == IGNORE_VALUE) + { + str->append(STRING_WITH_LEN("ignore")); + } + else + { + char buffer[STRING_BUFFER_USUAL_SIZE]; + String tmp(buffer, sizeof(buffer), &my_charset_bin); + const String *res; + res= query_val_str(current_thd, &tmp); + str->append(*res); + } +} + + +/** + Preserve the original parameter types and values + when re-preparing a prepared statement. + + @details Copy parameter type information and conversion + function pointers from a parameter of the old statement + to the corresponding parameter of the new one. + + Move parameter values from the old parameters to the new + one. We simply "exchange" the values, which allows + to save on allocation and character set conversion in + case a parameter is a string or a blob/clob. + + The old parameter gets the value of this one, which + ensures that all memory of this parameter is freed + correctly. + + @param[in] src parameter item of the original + prepared statement +*/ + +void +Item_param::set_param_type_and_swap_value(Item_param *src) +{ + Type_std_attributes::set(src); + set_handler(src->type_handler()); + + copy_flags(src, item_base_t::MAYBE_NULL); + null_value= src->null_value; + state= src->state; + + value.swap(src->value); +} + + +void Item_param::set_default() +{ + m_is_settable_routine_parameter= false; + state= DEFAULT_VALUE; + /* + When Item_param is set to DEFAULT_VALUE: + - its val_str() and val_decimal() return NULL + - get_date() returns true + It's important also to have null_value==true for DEFAULT_VALUE. + Otherwise the callers of val_xxx() and get_date(), e.g. Item::send(), + can misbehave (e.g. crash on asserts). + */ + null_value= true; +} + +void Item_param::set_ignore() +{ + m_is_settable_routine_parameter= false; + state= IGNORE_VALUE; + null_value= true; +} + +/** + This operation is intended to store some item value in Item_param to be + used later. + + @param thd thread context + @param ctx stored procedure runtime context + @param it a pointer to an item in the tree + + @return Error status + @retval TRUE on error + @retval FALSE on success +*/ + +bool +Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it) +{ + Item *arg= *it; + st_value tmp; + /* + The OUT parameter is bound to some data type. + It's important not to touch m_type_handler, + to make sure the next mysql_stmt_execute() + correctly fetches the value from the client-server protocol, + using set_param_func(). + */ + if (arg->save_in_value(thd, &tmp) || + set_value(thd, arg, &tmp, arg->type_handler())) + { + set_null(); + return false; + } + /* It is wrapper => other set_* shoud set null_value */ + DBUG_ASSERT(null_value == false); + return false; +} + + +/** + Setter of Item_param::m_out_param_info. + + m_out_param_info is used to store information about store routine + OUT-parameters, such as stored routine name, database, stored routine + variable name. It is supposed to be set in sp_head::execute() after + Item_param::set_value() is called. +*/ + +void +Item_param::set_out_param_info(Send_field *info) +{ + m_out_param_info= info; + set_handler(m_out_param_info->type_handler()); +} + + +/** + Getter of Item_param::m_out_param_info. + + m_out_param_info is used to store information about store routine + OUT-parameters, such as stored routine name, database, stored routine + variable name. It is supposed to be retrieved in + Protocol_binary::send_out_parameters() during creation of OUT-parameter + result set. +*/ + +const Send_field * +Item_param::get_out_param_info() const +{ + return m_out_param_info; +} + + +/** + Fill meta-data information for the corresponding column in a result set. + If this is an OUT-parameter of a stored procedure, preserve meta-data of + stored-routine variable. + + @param field container for meta-data to be filled +*/ + +void Item_param::make_send_field(THD *thd, Send_field *field) +{ + Item::make_send_field(thd, field); + + if (!m_out_param_info) + return; + + /* + This is an OUT-parameter of stored procedure. We should use + OUT-parameter info to fill out the names. + */ + + *field= *m_out_param_info; +} + +bool Item_param::append_for_log(THD *thd, String *str) +{ + StringBuffer buf; + const String *val= query_val_str(thd, &buf); + return str->append(*val); +} + + +/**************************************************************************** + Item_copy_string +****************************************************************************/ + +double Item_copy_string::val_real() +{ + int err_not_used; + char *end_not_used; + return (null_value ? 0.0 : + str_value.charset()->strntod((char*) str_value.ptr(), + str_value.length(), + &end_not_used, &err_not_used)); +} + +longlong Item_copy_string::val_int() +{ + int err; + return null_value ? 0 : str_value.charset()->strntoll(str_value.ptr(), + str_value.length(), 10, + (char**) 0, &err); +} + + +int Item_copy_string::save_in_field(Field *field, bool no_conversions) +{ + return save_str_value_in_field(field, &str_value); +} + + +void Item_copy_string::copy() +{ + String *res=item->val_str(&str_value); + if (res && res != &str_value) + str_value.copy(*res); + null_value=item->null_value; +} + +/* ARGSUSED */ +String *Item_copy_string::val_str(String *str) +{ + // Item_copy_string is used without fix_fields call + if (null_value) + return (String*) 0; + return &str_value; +} + + +my_decimal *Item_copy_string::val_decimal(my_decimal *decimal_value) +{ + // Item_copy_string is used without fix_fields call + if (null_value) + return (my_decimal *) 0; + string2my_decimal(E_DEC_FATAL_ERROR, &str_value, decimal_value); + return (decimal_value); +} + + +/* + Functions to convert item to field (for send_result_set_metadata) +*/ + +void Item_ref_null_helper::save_val(Field *to) +{ + DBUG_ASSERT(fixed()); + (*ref)->save_val(to); + owner->was_null|= null_value= (*ref)->null_value; +} + + +double Item_ref_null_helper::val_real() +{ + DBUG_ASSERT(fixed()); + double tmp= (*ref)->val_result(); + owner->was_null|= null_value= (*ref)->null_value; + return tmp; +} + + +longlong Item_ref_null_helper::val_int() +{ + DBUG_ASSERT(fixed()); + longlong tmp= (*ref)->val_int_result(); + owner->was_null|= null_value= (*ref)->null_value; + return tmp; +} + + +my_decimal *Item_ref_null_helper::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + my_decimal *val= (*ref)->val_decimal_result(decimal_value); + owner->was_null|= null_value= (*ref)->null_value; + return val; +} + + +bool Item_ref_null_helper::val_bool() +{ + DBUG_ASSERT(fixed()); + bool val= (*ref)->val_bool_result(); + owner->was_null|= null_value= (*ref)->null_value; + return val; +} + + +String* Item_ref_null_helper::val_str(String* s) +{ + DBUG_ASSERT(fixed()); + String* tmp= (*ref)->str_result(s); + owner->was_null|= null_value= (*ref)->null_value; + return tmp; +} + + +bool Item_ref_null_helper::val_native(THD *thd, Native *to) +{ + return (owner->was_null|= val_native_from_item(thd, *ref, to)); +} + + +bool Item_ref_null_helper::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + return (owner->was_null|= null_value= (*ref)->get_date_result(thd, ltime, + fuzzydate)); +} + + +/** + Mark item and SELECT_LEXs as dependent if item was resolved in + outer SELECT. + + @param thd thread handler + @param last select from which current item depend + @param current current select + @param resolved_item item which was resolved in outer SELECT(for warning) + @param mark_item item which should be marked (can be differ in case of + substitution) + @param suppress_warning_output flag specifying whether to suppress output of + a warning message +*/ + +static bool mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current, + Item_ident *resolved_item, + Item_ident *mark_item, + bool suppress_warning_output) +{ + DBUG_ENTER("mark_as_dependent"); + DBUG_PRINT("info", ("current select: %d (%p) last: %d (%p)", + current->select_number, current, + (last ? last->select_number : 0), last)); + + /* store pointer on SELECT_LEX from which item is dependent */ + if (mark_item && mark_item->can_be_depended) + { + DBUG_PRINT("info", ("mark_item: %p lex: %p", mark_item, last)); + mark_item->depended_from= last; + } + if (current->mark_as_dependent(thd, last, + /** resolved_item psergey-thu **/ mark_item)) + DBUG_RETURN(TRUE); + if ((thd->lex->describe & DESCRIBE_EXTENDED) && !suppress_warning_output) + { + const char *db_name= (resolved_item->db_name.str ? + resolved_item->db_name.str : ""); + const char *table_name= (resolved_item->table_name.str ? + resolved_item->table_name.str : ""); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_FIELD_RESOLVED, + ER_THD(thd,ER_WARN_FIELD_RESOLVED), + db_name, (db_name[0] ? "." : ""), + table_name, (table_name [0] ? "." : ""), + resolved_item->field_name.str, + current->select_number, last->select_number); + } + DBUG_RETURN(FALSE); +} + + +/** + Mark range of selects and resolved identifier (field/reference) + item as dependent. + + @param thd thread handler + @param last_select select where resolved_item was resolved + @param current_sel current select (select where resolved_item was placed) + @param found_field field which was found during resolving + @param found_item Item which was found during resolving (if resolved + identifier belongs to VIEW) + @param resolved_item Identifier which was resolved + @param suppress_warning_output flag specifying whether to suppress output of + a warning message + + @note + We have to mark all items between current_sel (including) and + last_select (excluding) as dependent (select before last_select should + be marked with actual table mask used by resolved item, all other with + OUTER_REF_TABLE_BIT) and also write dependence information to Item of + resolved identifier. +*/ + +void mark_select_range_as_dependent(THD *thd, SELECT_LEX *last_select, + SELECT_LEX *current_sel, + Field *found_field, Item *found_item, + Item_ident *resolved_item, + bool suppress_warning_output) +{ + /* + Go from current SELECT to SELECT where field was resolved (it + have to be reachable from current SELECT, because it was already + done once when we resolved this field and cached result of + resolving) + */ + SELECT_LEX *previous_select= current_sel; + for (; previous_select->context.outer_select() != last_select; + previous_select= previous_select->context.outer_select()) + { + Item_subselect *prev_subselect_item= + previous_select->master_unit()->item; + prev_subselect_item->used_tables_cache|= OUTER_REF_TABLE_BIT; + prev_subselect_item->const_item_cache= 0; + } + + Item_subselect *prev_subselect_item= + previous_select->master_unit()->item; + Item_ident *dependent= resolved_item; + if (found_field == view_ref_found) + { + Item::Type type= found_item->type(); + prev_subselect_item->used_tables_cache|= + found_item->used_tables(); + dependent= ((type == Item::REF_ITEM || type == Item::FIELD_ITEM) ? + (Item_ident*) found_item : + 0); + } + else + prev_subselect_item->used_tables_cache|= + found_field->table->map; + prev_subselect_item->const_item_cache= 0; + mark_as_dependent(thd, last_select, current_sel, resolved_item, + dependent, suppress_warning_output); +} + + +/** + Search a GROUP BY clause for a field with a certain name. + + Search the GROUP BY list for a column named as find_item. When searching + preference is given to columns that are qualified with the same table (and + database) name as the one being searched for. + + @param find_item the item being searched for + @param group_list GROUP BY clause + + @return + - the found item on success + - NULL if find_item is not in group_list +*/ + +static Item** find_field_in_group_list(Item *find_item, ORDER *group_list) +{ + LEX_CSTRING db_name; + LEX_CSTRING table_name; + LEX_CSTRING field_name; + ORDER *found_group= NULL; + int found_match_degree= 0; + char name_buff[SAFE_NAME_LEN+1]; + + if (find_item->type() == Item::FIELD_ITEM || + find_item->type() == Item::REF_ITEM) + { + db_name= ((Item_ident*) find_item)->db_name; + table_name= ((Item_ident*) find_item)->table_name; + field_name= ((Item_ident*) find_item)->field_name; + } + else + return NULL; + + if (db_name.str && lower_case_table_names) + { + /* Convert database to lower case for comparison */ + strmake_buf(name_buff, db_name.str); + my_casedn_str(files_charset_info, name_buff); + db_name= Lex_cstring_strlen(name_buff); + } + + DBUG_ASSERT(field_name.str != 0); + + for (ORDER *cur_group= group_list ; cur_group ; cur_group= cur_group->next) + { + int cur_match_degree= 0; + + /* SELECT list element with explicit alias */ + if ((*(cur_group->item))->name.str && !table_name.str && + (*(cur_group->item))->is_explicit_name() && + !lex_string_cmp(system_charset_info, + &(*(cur_group->item))->name, &field_name)) + { + ++cur_match_degree; + } + /* Reference on the field or view/derived field. */ + else if ((*(cur_group->item))->type() == Item::FIELD_ITEM || + (*(cur_group->item))->type() == Item::REF_ITEM ) + { + Item_ident *cur_field= (Item_ident*) *cur_group->item; + const char *l_db_name= cur_field->db_name.str; + const char *l_table_name= cur_field->table_name.str; + LEX_CSTRING *l_field_name= &cur_field->field_name; + + DBUG_ASSERT(l_field_name->str != 0); + + if (!lex_string_cmp(system_charset_info, + l_field_name, &field_name)) + ++cur_match_degree; + else + continue; + + if (l_table_name && table_name.str) + { + /* If field_name is qualified by a table name. */ + if (my_strcasecmp(table_alias_charset, l_table_name, table_name.str)) + /* Same field names, different tables. */ + return NULL; + + ++cur_match_degree; + if (l_db_name && db_name.str) + { + /* If field_name is also qualified by a database name. */ + if (strcmp(l_db_name, db_name.str)) + /* Same field names, different databases. */ + return NULL; + ++cur_match_degree; + } + } + } + else + continue; + + if (cur_match_degree > found_match_degree) + { + found_match_degree= cur_match_degree; + found_group= cur_group; + } + else if (found_group && (cur_match_degree == found_match_degree) && + !(*(found_group->item))->eq((*(cur_group->item)), 0)) + { + /* + If the current resolve candidate matches equally well as the current + best match, they must reference the same column, otherwise the field + is ambiguous. + */ + my_error(ER_NON_UNIQ_ERROR, MYF(0), + find_item->full_name(), current_thd->where); + return NULL; + } + } + + if (found_group) + return found_group->item; + else + return NULL; +} + + +/** + Resolve a column reference in a sub-select. + + Resolve a column reference (usually inside a HAVING clause) against the + SELECT and GROUP BY clauses of the query described by 'select'. The name + resolution algorithm searches both the SELECT and GROUP BY clauses, and in + case of a name conflict prefers GROUP BY column names over SELECT names. If + both clauses contain different fields with the same names, a warning is + issued that name of 'ref' is ambiguous. We extend ANSI SQL in that when no + GROUP BY column is found, then a HAVING name is resolved as a possibly + derived SELECT column. This extension is allowed only if the + MODE_ONLY_FULL_GROUP_BY sql mode isn't enabled. + + @param thd current thread + @param ref column reference being resolved + @param select the select that ref is resolved against + + @note + The resolution procedure is: + - Search for a column or derived column named col_ref_i [in table T_j] + in the SELECT clause of Q. + - Search for a column named col_ref_i [in table T_j] + in the GROUP BY clause of Q. + - If found different columns with the same name in GROUP BY and SELECT: + - if the condition that uses this column name is pushed down into + the HAVING clause return the SELECT column + - else issue a warning and return the GROUP BY column. + - Otherwise + - if the MODE_ONLY_FULL_GROUP_BY mode is enabled return error + - else return the found SELECT column. + + + @return + - NULL - there was an error, and the error was already reported + - not_found_item - the item was not resolved, no error was reported + - resolved item - if the item was resolved +*/ + +static Item** +resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select) +{ + Item **group_by_ref= NULL; + Item **select_ref= NULL; + ORDER *group_list= select->group_list.first; + bool ambiguous_fields= FALSE; + uint counter; + enum_resolution_type resolution; + + /* + Search for a column or derived column named as 'ref' in the SELECT + clause of the current select. + */ + if (!(select_ref= find_item_in_list(ref, *(select->get_item_list()), + &counter, REPORT_EXCEPT_NOT_FOUND, + &resolution))) + return NULL; /* Some error occurred. */ + if (resolution == RESOLVED_AGAINST_ALIAS) + ref->alias_name_used= TRUE; + + /* If this is a non-aggregated field inside HAVING, search in GROUP BY. */ + if (select->having_fix_field && !ref->with_sum_func() && group_list) + { + group_by_ref= find_field_in_group_list(ref, group_list); + + /* Check if the fields found in SELECT and GROUP BY are the same field. */ + if (group_by_ref && (select_ref != not_found_item) && + !((*group_by_ref)->eq(*select_ref, 0)) && + (!select->having_fix_field_for_pushed_cond)) + { + ambiguous_fields= TRUE; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NON_UNIQ_ERROR, + ER_THD(thd,ER_NON_UNIQ_ERROR), ref->full_name(), + thd->where); + + } + } + + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && + select->having_fix_field && + select_ref != not_found_item && !group_by_ref && + !ref->alias_name_used) + { + /* + Report the error if fields was found only in the SELECT item list and + the strict mode is enabled. + */ + my_error(ER_NON_GROUPING_FIELD_USED, MYF(0), + ref->name.str, "HAVING"); + return NULL; + } + if (select_ref != not_found_item || group_by_ref) + { + if (select_ref != not_found_item && !ambiguous_fields) + { + DBUG_ASSERT(*select_ref != 0); + if (!select->ref_pointer_array[counter]) + { + my_error(ER_ILLEGAL_REFERENCE, MYF(0), + ref->name.str, "forward reference in item list"); + return NULL; + } + DBUG_ASSERT((*select_ref)->fixed()); + return &select->ref_pointer_array[counter]; + } + if (group_by_ref) + return group_by_ref; + DBUG_ASSERT(FALSE); + return NULL; /* So there is no compiler warning. */ + } + + return (Item**) not_found_item; +} + + +/* + @brief + Whether a table belongs to an outer select. + + @param table table to check + @param select current select + + @details + Try to find select the table belongs to by ascending the derived tables chain. +*/ + +static +bool is_outer_table(TABLE_LIST *table, SELECT_LEX *select) +{ + DBUG_ASSERT(table->select_lex != select); + TABLE_LIST *tl; + + if (table->belong_to_view && + table->belong_to_view->select_lex == select) + return FALSE; + + for (tl= select->master_unit()->derived; + tl && tl->is_merged_derived(); + select= tl->select_lex, tl= select->master_unit()->derived) + { + if (tl->select_lex == table->select_lex) + return FALSE; + } + return TRUE; +} + + +/** + Resolve the name of an outer select column reference. + + @param[in] thd current thread + @param[in,out] from_field found field reference or (Field*)not_found_field + @param[in,out] reference view column if this item was resolved to a + view column + + @description + The method resolves the column reference represented by 'this' as a column + present in outer selects that contain current select. + + In prepared statements, because of cache, find_field_in_tables() + can resolve fields even if they don't belong to current context. + In this case this method only finds appropriate context and marks + current select as dependent. The found reference of field should be + provided in 'from_field'. + + The cache is critical for prepared statements of type: + + SELECT a FROM (SELECT a FROM test.t1) AS s1 NATURAL JOIN t2 AS s2; + + This is internally converted to a join similar to + + SELECT a FROM t1 AS s1,t2 AS s2 WHERE t2.a=t1.a; + + Without the cache, we would on re-prepare not know if 'a' did match + s1.a or s2.a. + + @note + This is the inner loop of Item_field::fix_fields: + @code + for each outer query Q_k beginning from the inner-most one + { + search for a column or derived column named col_ref_i + [in table T_j] in the FROM clause of Q_k; + + if such a column is not found + Search for a column or derived column named col_ref_i + [in table T_j] in the SELECT and GROUP clauses of Q_k. + } + @endcode + + @retval + 1 column succefully resolved and fix_fields() should continue. + @retval + 0 column fully fixed and fix_fields() should return FALSE + @retval + -1 error occurred +*/ + +int +Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference) +{ + enum_parsing_place place= NO_MATTER; + bool field_found= (*from_field != not_found_field); + bool upward_lookup= FALSE; + TABLE_LIST *table_list; + + /* Calculate the TABLE_LIST for the table */ + table_list= (cached_table ? cached_table : + field_found && (*from_field) != view_ref_found ? + (*from_field)->table->pos_in_table_list : 0); + /* + If there are outer contexts (outer selects, but current select is + not derived table or view) try to resolve this reference in the + outer contexts. + + We treat each subselect as a separate namespace, so that different + subselects may contain columns with the same names. The subselects + are searched starting from the innermost. + */ + Name_resolution_context *last_checked_context= context; + Item **ref= (Item **) not_found_item; + SELECT_LEX *current_sel= context->select_lex; + Name_resolution_context *outer_context= 0; + SELECT_LEX *select= 0; + + if (current_sel->master_unit()->outer_select()) + outer_context= context->outer_context; + + /* + This assert is to ensure we have an outer contex when *from_field + is set. + If this would not be the case, we would assert in mark_as_dependent + as last_checked_countex == context + */ + DBUG_ASSERT(outer_context || !*from_field || + *from_field == not_found_field); + for (; + outer_context; + outer_context= outer_context->outer_context) + { + select= outer_context->select_lex; + Item_subselect *prev_subselect_item= + last_checked_context->select_lex->master_unit()->item; + last_checked_context= outer_context; + upward_lookup= TRUE; + + place= prev_subselect_item->parsing_place; + /* + If outer_field is set, field was already found by first call + to find_field_in_tables(). Only need to find appropriate context. + */ + if (field_found && outer_context->select_lex != + table_list->select_lex) + continue; + /* + In case of a view, find_field_in_tables() writes the pointer to + the found view field into '*reference', in other words, it + substitutes this Item_field with the found expression. + */ + if (field_found || (*from_field= find_field_in_tables(thd, this, + outer_context-> + first_name_resolution_table, + outer_context-> + last_name_resolution_table, + outer_context-> + ignored_tables, + reference, + IGNORE_EXCEPT_NON_UNIQUE, + TRUE, TRUE)) != + not_found_field) + { + if (*from_field) + { + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && + select->cur_pos_in_select_list != UNDEF_POS) + { + /* + As this is an outer field it should be added to the list of + non aggregated fields of the outer select. + */ + if (select->join) + { + marker= select->cur_pos_in_select_list; + select->join->non_agg_fields.push_back(this, thd->mem_root); + } + else + { + /* + join is absent if it is upper SELECT_LEX of non-select + command + */ + DBUG_ASSERT(select->master_unit()->outer_select() == NULL && + (thd->lex->sql_command != SQLCOM_SELECT && + thd->lex->sql_command != SQLCOM_UPDATE_MULTI && + thd->lex->sql_command != SQLCOM_DELETE_MULTI && + thd->lex->sql_command != SQLCOM_INSERT_SELECT && + thd->lex->sql_command != SQLCOM_REPLACE_SELECT)); + } + } + if (*from_field != view_ref_found) + { + prev_subselect_item->used_tables_cache|= (*from_field)->table->map; + prev_subselect_item->const_item_cache= 0; + set_field(*from_field); + if (!last_checked_context->select_lex->having_fix_field && + select->group_list.elements && + (place == SELECT_LIST || place == IN_HAVING)) + { + Item_outer_ref *rf; + /* + If an outer field is resolved in a grouping select then it + is replaced for an Item_outer_ref object. Otherwise an + Item_field object is used. + The new Item_outer_ref object is saved in the inner_refs_list of + the outer select. Here it is only created. It can be fixed only + after the original field has been fixed and this is done in the + fix_inner_refs() function. + */ + ; + if (!(rf= new (thd->mem_root) Item_outer_ref(thd, context, this))) + return -1; + thd->change_item_tree(reference, rf); + select->inner_refs_list.push_back(rf, thd->mem_root); + rf->in_sum_func= thd->lex->in_sum_func; + } + /* + A reference is resolved to a nest level that's outer or the same as + the nest level of the enclosing set function : adjust the value of + max_arg_level for the function if it's needed. + */ + if (thd->lex->in_sum_func && + last_checked_context->select_lex->parent_lex == + context->select_lex->parent_lex && + thd->lex->in_sum_func->nest_level >= select->nest_level) + { + Item::Type ref_type= (*reference)->type(); + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + select->nest_level); + set_field(*from_field); + base_flags|= item_base_t::FIXED; + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, this, + ((ref_type == REF_ITEM || + ref_type == FIELD_ITEM) ? + (Item_ident*) (*reference) : 0), false); + return 0; + } + } + else + { + Item::Type ref_type= (*reference)->type(); + prev_subselect_item->used_tables_and_const_cache_join(*reference); + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, this, + ((ref_type == REF_ITEM || ref_type == FIELD_ITEM) ? + (Item_ident*) (*reference) : + 0), false); + if (thd->lex->in_sum_func && + last_checked_context->select_lex->parent_lex == + context->select_lex->parent_lex && + thd->lex->in_sum_func->nest_level >= select->nest_level) + { + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + select->nest_level); + } + /* + A reference to a view field had been found and we + substituted it instead of this Item (find_field_in_tables + does it by assigning the new value to *reference), so now + we can return from this function. + */ + return 0; + } + } + break; + } + + /* Search in SELECT and GROUP lists of the outer select. */ + if (place != IN_WHERE && place != IN_ON) + { + if (!(ref= resolve_ref_in_select_and_group(thd, this, select))) + return -1; /* Some error occurred (e.g. ambiguous names). */ + if (ref != not_found_item) + { + DBUG_ASSERT(*ref && (*ref)->fixed()); + prev_subselect_item->used_tables_and_const_cache_join(*ref); + break; + } + } + + /* + Reference is not found in this select => this subquery depend on + outer select (or we just trying to find wrong identifier, in this + case it does not matter which used tables bits we set) + */ + prev_subselect_item->used_tables_cache|= OUTER_REF_TABLE_BIT; + prev_subselect_item->const_item_cache= 0; + } + + DBUG_ASSERT(ref != 0); + if (!*from_field) + return -1; + if (ref == not_found_item && *from_field == not_found_field) + { + if (upward_lookup) + { + // We can't say exactly what absent table or field + my_error(ER_BAD_FIELD_ERROR, MYF(0), full_name(), thd->where); + } + else + { + /* Call find_field_in_tables only to report the error */ + find_field_in_tables(thd, this, + context->first_name_resolution_table, + context->last_name_resolution_table, + context->ignored_tables, + reference, REPORT_ALL_ERRORS, + !any_privileges, TRUE); + } + return -1; + } + else if (ref != not_found_item) + { + Item *save; + Item_ref *rf; + + /* Should have been checked in resolve_ref_in_select_and_group(). */ + DBUG_ASSERT(*ref && (*ref)->fixed()); + /* + Here, a subset of actions performed by Item_ref::set_properties + is not enough. So we pass ptr to NULL into Item_[direct]_ref + constructor, so no initialization is performed, and call + fix_fields() below. + */ + save= *ref; + *ref= NULL; // Don't call set_properties() + rf= (place == IN_HAVING ? + new (thd->mem_root) + Item_ref(thd, context, ref, table_name, + field_name, alias_name_used) : + (!select->group_list.elements ? + new (thd->mem_root) + Item_direct_ref(thd, context, ref, table_name, + field_name, alias_name_used) : + new (thd->mem_root) + Item_outer_ref(thd, context, ref, table_name, + field_name, alias_name_used))); + *ref= save; + if (!rf) + return -1; + + if (place != IN_HAVING && select->group_list.elements) + { + outer_context->select_lex->inner_refs_list.push_back((Item_outer_ref*)rf, + thd->mem_root); + ((Item_outer_ref*)rf)->in_sum_func= thd->lex->in_sum_func; + } + thd->change_item_tree(reference, rf); + /* + rf is Item_ref => never substitute other items (in this case) + during fix_fields() => we can use rf after fix_fields() + */ + DBUG_ASSERT(!rf->fixed()); // Assured by Item_ref() + if (rf->fix_fields(thd, reference) || rf->check_cols(1)) + return -1; + + /* + We can not "move" aggregate function in the place where + its arguments are not defined. + */ + set_max_sum_func_level(thd, select); + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, rf, + rf, false); + + return 0; + } + else + { + /* + We can not "move" aggregate function in the place where + its arguments are not defined. + */ + set_max_sum_func_level(thd, select); + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, + this, (Item_ident*)*reference, false); + if (last_checked_context->select_lex->having_fix_field) + { + Item_ref *rf; + rf= new (thd->mem_root) Item_ref(thd, context, + (*from_field)->table->s->db, + Lex_cstring_strlen((*from_field)-> + table->alias.c_ptr()), + field_name); + if (!rf) + return -1; + thd->change_item_tree(reference, rf); + /* + rf is Item_ref => never substitute other items (in this case) + during fix_fields() => we can use rf after fix_fields() + */ + DBUG_ASSERT(!rf->fixed()); // Assured by Item_ref() + if (rf->fix_fields(thd, reference) || rf->check_cols(1)) + return -1; + return 0; + } + } + return 1; +} + + +/** + Resolve the name of a column reference. + + The method resolves the column reference represented by 'this' as a column + present in one of: FROM clause, SELECT clause, GROUP BY clause of a query + Q, or in outer queries that contain Q. + + The name resolution algorithm used is (where [T_j] is an optional table + name that qualifies the column name): + + @code + resolve_column_reference([T_j].col_ref_i) + { + search for a column or derived column named col_ref_i + [in table T_j] in the FROM clause of Q; + + if such a column is NOT found AND // Lookup in outer queries. + there are outer queries + { + for each outer query Q_k beginning from the inner-most one + { + search for a column or derived column named col_ref_i + [in table T_j] in the FROM clause of Q_k; + + if such a column is not found + Search for a column or derived column named col_ref_i + [in table T_j] in the SELECT and GROUP clauses of Q_k. + } + } + } + @endcode + + Notice that compared to Item_ref::fix_fields, here we first search the FROM + clause, and then we search the SELECT and GROUP BY clauses. + + @param[in] thd current thread + @param[in,out] reference view column if this item was resolved to a + view column + + @retval + TRUE if error + @retval + FALSE on success +*/ + +bool Item_field::fix_fields(THD *thd, Item **reference) +{ + DBUG_ASSERT(fixed() == 0); + Field *from_field= (Field *)not_found_field; + bool outer_fixed= false; + SELECT_LEX *select; + if (context) + { + select= context->select_lex; + } + else + { + // No real name resolution, used somewhere in SP + DBUG_ASSERT(field); + select= NULL; + } + + + if (select && select->in_tvc) + { + my_error(ER_FIELD_REFERENCE_IN_TVC, MYF(0), full_name()); + return(1); + } + + if (!field) // If field is not checked + { + TABLE_LIST *table_list; + /* + In case of view, find_field_in_tables() write pointer to view field + expression to 'reference', i.e. it substitute that expression instead + of this Item_field + */ + DBUG_ASSERT(context); + if ((from_field= find_field_in_tables(thd, this, + context->first_name_resolution_table, + context->last_name_resolution_table, + context->ignored_tables, + reference, + thd->lex->use_only_table_context ? + REPORT_ALL_ERRORS : + IGNORE_EXCEPT_NON_UNIQUE, + !any_privileges, + TRUE)) == + not_found_field) + { + int ret; + + /* Look up in current select's item_list to find aliased fields */ + if (select && select->is_item_list_lookup) + { + uint counter; + enum_resolution_type resolution; + Item** res= find_item_in_list(this, + select->item_list, + &counter, REPORT_EXCEPT_NOT_FOUND, + &resolution); + if (!res) + return 1; + if (resolution == RESOLVED_AGAINST_ALIAS) + alias_name_used= TRUE; + if (res != (Item **)not_found_item) + { + if ((*res)->type() == Item::FIELD_ITEM) + { + /* + It's an Item_field referencing another Item_field in the select + list. + Use the field from the Item_field in the select list and leave + the Item_field instance in place. + */ + + Field *new_field= (*((Item_field**)res))->field; + + if (unlikely(new_field == NULL)) + { + /* The column to which we link isn't valid. */ + my_error(ER_BAD_FIELD_ERROR, MYF(0), (*res)->name.str, + thd->where); + return(1); + } + + /* + We can not "move" aggregate function in the place where + its arguments are not defined. + */ + set_max_sum_func_level(thd, select); + set_field(new_field); + depended_from= (*((Item_field**)res))->depended_from; + return 0; + } + else + { + /* + It's not an Item_field in the select list so we must make a new + Item_ref to point to the Item in the select list and replace the + Item_field created by the parser with the new Item_ref. + */ + Item_ref *rf= new (thd->mem_root) + Item_ref(thd, context, db_name, table_name, field_name); + if (!rf) + return 1; + bool err= rf->fix_fields(thd, (Item **) &rf) || rf->check_cols(1); + if (err) + return TRUE; + + thd->change_item_tree(reference, + select->context_analysis_place == + IN_GROUP_BY && + alias_name_used ? *rf->ref : rf); + + /* + We can not "move" aggregate function in the place where + its arguments are not defined. + */ + set_max_sum_func_level(thd, select); + return FALSE; + } + } + } + + if (unlikely(!select)) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), full_name(), thd->where); + goto error; + } + if ((ret= fix_outer_field(thd, &from_field, reference)) < 0) + goto error; + outer_fixed= TRUE; + if (!ret) + goto mark_non_agg_field; + } + else if (!from_field) + goto error; + + table_list= (cached_table ? cached_table : + from_field != view_ref_found ? + from_field->table->pos_in_table_list : 0); + if (!outer_fixed && table_list && table_list->select_lex && + context->select_lex && + table_list->select_lex != context->select_lex && + !context->select_lex->is_merged_child_of(table_list->select_lex) && + is_outer_table(table_list, context->select_lex)) + { + int ret; + if ((ret= fix_outer_field(thd, &from_field, reference)) < 0) + goto error; + outer_fixed= 1; + if (!ret) + goto mark_non_agg_field; + } + + if (select && !thd->lex->current_select->no_wrap_view_item && + thd->lex->in_sum_func && + thd->lex->in_sum_func->nest_level == + select->nest_level) + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + select->nest_level); + /* + if it is not expression from merged VIEW we will set this field. + + We can leave expression substituted from view for next PS/SP rexecution + (i.e. do not register this substitution for reverting on cleanup() + (register_item_tree_changing())), because this subtree will be + fix_field'ed during setup_tables()->setup_underlying() (i.e. before + all other expressions of query, and references on tables which do + not present in query will not make problems. + + Also we suppose that view can't be changed during PS/SP life. + */ + if (from_field == view_ref_found) + return FALSE; + + set_field(from_field); + } + else if (should_mark_column(thd->column_usage)) + { + TABLE *table= field->table; + MY_BITMAP *current_bitmap, *other_bitmap; + if (thd->column_usage == MARK_COLUMNS_READ) + { + current_bitmap= table->read_set; + other_bitmap= table->write_set; + } + else + { + current_bitmap= table->write_set; + other_bitmap= table->read_set; + } + if (!bitmap_fast_test_and_set(current_bitmap, field->field_index)) + { + if (!bitmap_is_set(other_bitmap, field->field_index)) + { + /* First usage of column */ + table->used_fields++; // Used to optimize loops + /* purecov: begin inspected */ + table->covering_keys.intersect(field->part_of_key); + /* purecov: end */ + } + } + } +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (any_privileges) + { + const char *db, *tab; + db= field->table->s->db.str; + tab= field->table->s->table_name.str; + if (!(have_privileges= (get_column_grant(thd, &field->table->grant, + db, tab, field_name.str) & + VIEW_ANY_ACL))) + { + my_error(ER_COLUMNACCESS_DENIED_ERROR, MYF(0), + "ANY", thd->security_ctx->priv_user, + thd->security_ctx->host_or_ip, field_name.str, tab); + goto error; + } + } +#endif + base_flags|= item_base_t::FIXED; + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && + !outer_fixed && !thd->lex->in_sum_func && + select && + select->cur_pos_in_select_list != UNDEF_POS && + select->join) + { + select->join->non_agg_fields.push_back(this, thd->mem_root); + marker= select->cur_pos_in_select_list; + } +mark_non_agg_field: + /* + table->pos_in_table_list can be 0 when fixing partition functions + or virtual fields. + */ + if (fixed() && (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) && + field->table->pos_in_table_list) + { + /* + Mark selects according to presence of non aggregated fields. + Fields from outer selects added to the aggregate function + outer_fields list as it's unknown at the moment whether it's + aggregated or not. + We're using the select lex of the cached table (if present). + */ + SELECT_LEX *select_lex; + if (cached_table) + select_lex= cached_table->select_lex; + else if (!(select_lex= field->table->pos_in_table_list->select_lex)) + { + /* + This can only happen when there is no real table in the query. + We are using the field's resolution context. context->select_lex is eee + safe for use because it's either the SELECT we want to use + (the current level) or a stub added by non-SELECT queries. + */ + select_lex= context->select_lex; + } + if (!thd->lex->in_sum_func) + select_lex->set_non_agg_field_used(true); + else + { + if (outer_fixed) + thd->lex->in_sum_func->outer_fields.push_back(this, thd->mem_root); + else if (thd->lex->in_sum_func->nest_level != + select->nest_level) + select_lex->set_non_agg_field_used(true); + } + } + return FALSE; + +error: + context->process_error(thd); + return TRUE; +} + +bool Item_field::post_fix_fields_part_expr_processor(void *int_arg) +{ + DBUG_ASSERT(fixed()); + if (field->vcol_info) + field->vcol_info->mark_as_in_partitioning_expr(); + /* + Update table_name to be real table name, not the alias. Because alias is + reallocated for every statement, and this item has a long life time */ + table_name= field->table->s->table_name; + return FALSE; +} + +bool Item_field::check_valid_arguments_processor(void *bool_arg) +{ + Virtual_column_info *vcol= field->vcol_info; + if (!vcol) + return FALSE; + return vcol->expr->walk(&Item::check_partition_func_processor, 0, NULL) + || vcol->expr->walk(&Item::check_valid_arguments_processor, 0, NULL); +} + +void Item_field::cleanup() +{ + DBUG_ENTER("Item_field::cleanup"); + Item_ident::cleanup(); + depended_from= NULL; + /* + Even if this object was created by direct link to field in setup_wild() + it will be linked correctly next time by name of field and table alias. + I.e. we can drop 'field'. + */ + field= 0; + item_equal= NULL; + null_value= FALSE; + refers_to_temp_table= NO_TEMP_TABLE; + DBUG_VOID_RETURN; +} + +/** + Find a field among specified multiple equalities. + + The function first searches the field among multiple equalities + of the current level (in the cond_equal->current_level list). + If it fails, it continues searching in upper levels accessed + through a pointer cond_equal->upper_levels. + The search terminates as soon as a multiple equality containing + the field is found. + + @param cond_equal reference to list of multiple equalities where + the field (this object) is to be looked for + + @return + - First Item_equal containing the field, if success + - 0, otherwise +*/ + +Item_equal *Item_field::find_item_equal(COND_EQUAL *cond_equal) +{ + Item_equal *item= 0; + while (cond_equal) + { + List_iterator_fast li(cond_equal->current_level); + while ((item= li++)) + { + if (item->contains(field)) + return item; + } + /* + The field is not found in any of the multiple equalities + of the current level. Look for it in upper levels + */ + cond_equal= cond_equal->upper_levels; + } + return 0; +} + + +/** + Set a pointer to the multiple equality the field reference belongs to + (if any). + + The function looks for a multiple equality containing the field item + among those referenced by arg. + In the case such equality exists the function does the following. + If the found multiple equality contains a constant, then the field + reference is substituted for this constant, otherwise it sets a pointer + to the multiple equality in the field item. + + + @param arg reference to list of multiple equalities where + the field (this object) is to be looked for + + @note + This function is supposed to be called as a callback parameter in calls + of the compile method. + + @return + - pointer to the replacing constant item, if the field item was substituted + - pointer to the field item, otherwise. +*/ + +Item *Item_field::propagate_equal_fields(THD *thd, + const Context &ctx, + COND_EQUAL *arg) +{ + if (!(item_equal= find_item_equal(arg))) + return this; + if (!field->can_be_substituted_to_equal_item(ctx, item_equal)) + { + item_equal= NULL; + return this; + } + Item *item= item_equal->get_const(); + if (!item) + { + /* + The found Item_equal is Okey, but it does not have a constant + item yet. Keep this->item_equal point to the found Item_equal. + */ + return this; + } + if (!(item= field->get_equal_const_item(thd, ctx, item))) + { + /* + Could not do safe conversion from the original constant item + to a field-compatible constant item. + For example, we tried to optimize: + WHERE date_column=' garbage ' AND LENGTH(date_column)=8; + to + WHERE date_column=' garbage ' AND LENGTH(DATE'XXXX-YY-ZZ'); + but failed to create a valid DATE literal from the given string literal. + + Do not do constant propagation in such cases and unlink + "this" from the found Item_equal (as this equality not useful). + */ + item_equal= NULL; + return this; + } + return item; +} + + +/** + Replace an Item_field for an equal Item_field that evaluated earlier + (if any). + + If this->item_equal points to some item and coincides with arg then + the function returns a pointer to an item that is taken from + the very beginning of the item_equal list which the Item_field + object refers to (belongs to) unless item_equal contains a constant + item. In this case the function returns this constant item, + (if the substitution does not require conversion). + If the Item_field object does not refer any Item_equal object + 'this' is returned . + + @param arg NULL or points to so some item of the Item_equal type + + + @note + This function is supposed to be called as a callback parameter in calls + of the transformer method. + + @return + - pointer to a replacement Item_field if there is a better equal item or + a pointer to a constant equal item; + - this - otherwise. +*/ + +Item *Item_field::replace_equal_field(THD *thd, uchar *arg) +{ + REPLACE_EQUAL_FIELD_ARG* param= (REPLACE_EQUAL_FIELD_ARG*)arg; + if (item_equal && item_equal == param->item_equal) + { + Item *const_item2= item_equal->get_const(); + if (const_item2) + { + /* + Currently we don't allow to create Item_equal with compare_type() + different from its Item_field's cmp_type(). + Field_xxx::test_if_equality_guarantees_uniqueness() prevent this. + Also, Item_field::propagate_equal_fields() does not allow to assign + this->item_equal to any instances of Item_equal if "this" is used + in a non-native comparison context, or with an incompatible collation. + So the fact that we have (item_equal != NULL) means that the currently + processed function (the owner of "this") uses the field in its native + comparison context, and it's safe to replace it to the constant from + item_equal. + */ + DBUG_ASSERT(type_handler_for_comparison()->cmp_type() == + item_equal->compare_type_handler()->cmp_type()); + return const_item2; + } + Item_ident *subst= + (Item_ident *) (item_equal->get_first(param->context_tab, this)); + if (subst) + { + Item_field *subst2= (Item_field *) (subst->real_item()); + if (subst2 && !field->eq(subst2->field)) + return subst2; + } + } + return this; +} + + +void Item::init_make_send_field(Send_field *tmp_field, + const Type_handler *h) +{ + tmp_field->db_name= empty_clex_str; + tmp_field->org_table_name= empty_clex_str; + tmp_field->org_col_name= empty_clex_str; + tmp_field->table_name= empty_clex_str; + tmp_field->col_name= name; + tmp_field->flags= (maybe_null() ? 0 : NOT_NULL_FLAG) | + (my_binary_compare(charset_for_protocol()) ? + BINARY_FLAG : 0); + tmp_field->set_handler(h); + tmp_field->length=max_length; + tmp_field->decimals=decimals; + if (unsigned_flag) + tmp_field->flags |= UNSIGNED_FLAG; + static_cast(*tmp_field)= + Send_field_extended_metadata(); + h->Item_append_extended_type_info(tmp_field, this); +} + +void Item::make_send_field(THD *thd, Send_field *tmp_field) +{ + init_make_send_field(tmp_field, type_handler()); +} + + +void Item_empty_string::make_send_field(THD *thd, Send_field *tmp_field) +{ + init_make_send_field(tmp_field, string_type_handler()); +} + + +/** + Verifies that the input string is well-formed according to its character set. + @param send_error If true, call my_error if string is not well-formed. + + Will truncate input string if it is not well-formed. + + @return + If well-formed: input string. + If not well-formed: + if strict mode: NULL pointer and we set this Item's value to NULL + if not strict mode: input string truncated up to last good character + */ +String *Item::check_well_formed_result(String *str, bool send_error) +{ + /* Check whether we got a well-formed string */ + CHARSET_INFO *cs= str->charset(); + uint wlen= str->well_formed_length(); + null_value= false; + if (unlikely(wlen < str->length())) + { + THD *thd= current_thd; + char hexbuf[7]; + uint diff= str->length() - wlen; + set_if_smaller(diff, 3); + octet2hex(hexbuf, str->ptr() + wlen, diff); + if (send_error) + { + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), + cs->cs_name.str, hexbuf); + return 0; + } + if (thd->is_strict_mode()) + { + null_value= 1; + str= 0; + } + else + { + str->length(wlen); + } + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_INVALID_CHARACTER_STRING, + ER_THD(thd, ER_INVALID_CHARACTER_STRING), cs->cs_name.str, + hexbuf); + } + return str; +} + + +/** + Copy a string with optional character set conversion. +*/ +bool +String_copier_for_item::copy_with_warn(CHARSET_INFO *dstcs, String *dst, + CHARSET_INFO *srccs, const char *src, + uint32 src_length, uint32 nchars) +{ + if (unlikely((dst->copy(dstcs, srccs, src, src_length, nchars, this)))) + return true; // EOM + const char *pos; + if (unlikely(pos= well_formed_error_pos())) + { + ErrConvString err(pos, src_length - (pos - src), &my_charset_bin); + push_warning_printf(m_thd, Sql_condition::WARN_LEVEL_WARN, + ER_INVALID_CHARACTER_STRING, + ER_THD(m_thd, ER_INVALID_CHARACTER_STRING), + srccs == &my_charset_bin ? + dstcs->cs_name.str : srccs->cs_name.str, + err.ptr()); + return false; + } + if (unlikely(pos= cannot_convert_error_pos())) + { + char buf[16]; + int mblen= srccs->charlen(pos, src + src_length); + DBUG_ASSERT(mblen > 0 && mblen * 2 + 1 <= (int) sizeof(buf)); + octet2hex(buf, pos, mblen); + push_warning_printf(m_thd, Sql_condition::WARN_LEVEL_WARN, + ER_CANNOT_CONVERT_CHARACTER, + ER_THD(m_thd, ER_CANNOT_CONVERT_CHARACTER), + srccs->cs_name.str, buf, dstcs->cs_name.str); + return false; + } + return false; +} + + +/* + Compare two items using a given collation + + SYNOPSIS + eq_by_collation() + item item to compare with + binary_cmp TRUE <-> compare as binaries + cs collation to use when comparing strings + + DESCRIPTION + This method works exactly as Item::eq if the collation cs coincides with + the collation of the compared objects. Otherwise, first the collations that + differ from cs are replaced for cs and then the items are compared by + Item::eq. After the comparison the original collations of items are + restored. + + RETURN + 1 compared items has been detected as equal + 0 otherwise +*/ + +bool Item::eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs) +{ + CHARSET_INFO *save_cs= 0; + CHARSET_INFO *save_item_cs= 0; + if (collation.collation != cs) + { + save_cs= collation.collation; + collation.collation= cs; + } + if (item->collation.collation != cs) + { + save_item_cs= item->collation.collation; + item->collation.collation= cs; + } + bool res= eq(item, binary_cmp); + if (save_cs) + collation.collation= save_cs; + if (save_item_cs) + item->collation.collation= save_item_cs; + return res; +} + + +/* ARGSUSED */ +void Item_field::make_send_field(THD *thd, Send_field *tmp_field) +{ + field->make_send_field(tmp_field); + DBUG_ASSERT(tmp_field->table_name.str != 0); + if (name.str) + { + DBUG_ASSERT(name.length == strlen(name.str)); + tmp_field->col_name= name; // Use user supplied name + } + if (table_name.str) + tmp_field->table_name= table_name; + if (db_name.str) + tmp_field->db_name= db_name; +} + + +/** + Save a field value in another field + + @param from Field to take the value from + @param [out] null_value Pointer to the null_value flag to set + @param to Field to save the value in + @param no_conversions How to deal with NULL value + + @details + The function takes the value of the field 'from' and, if this value + is not null, it saves in the field 'to' setting off the flag referenced + by 'null_value'. Otherwise this flag is set on and field 'to' is + also set to null possibly with conversion. + + @note + This function is used by the functions Item_field::save_in_field, + Item_field::save_org_in_field and Item_ref::save_in_field + + @retval FALSE OK + @retval TRUE Error + +*/ + +static int save_field_in_field(Field *from, bool *null_value, + Field *to, bool no_conversions) +{ + int res; + DBUG_ENTER("save_field_in_field"); + if (from->is_null()) + { + (*null_value)= 1; + DBUG_RETURN(set_field_to_null_with_conversions(to, no_conversions)); + } + to->set_notnull(); + (*null_value)= 0; + + /* + If we're setting the same field as the one we're reading from there's + nothing to do. This can happen in 'SET x = x' type of scenarios. + */ + if (to == from) + DBUG_RETURN(0); + + res= field_conv(to, from); + DBUG_RETURN(res); +} + + +fast_field_copier Item_field::setup_fast_field_copier(Field *to) +{ + return to->get_fast_field_copier(field); +} + +void Item_field::save_in_result_field(bool no_conversions) +{ + bool unused; + save_field_in_field(field, &unused, result_field, no_conversions); +} + +/** + Set a field's value from a item. +*/ + +void Item_field::save_org_in_field(Field *to, + fast_field_copier fast_field_copier_func) +{ + DBUG_ENTER("Item_field::save_org_in_field"); + DBUG_PRINT("enter", ("setup: %p data: %p", + to, fast_field_copier_func)); + if (fast_field_copier_func) + { + if (field->is_null()) + { + null_value= TRUE; + set_field_to_null_with_conversions(to, TRUE); + DBUG_VOID_RETURN; + } + to->set_notnull(); + if (to == field) + { + null_value= 0; + DBUG_VOID_RETURN; + } + (*fast_field_copier_func)(to, field); + } + else + save_field_in_field(field, &null_value, to, TRUE); + DBUG_VOID_RETURN; +} + + +int Item_field::save_in_field(Field *to, bool no_conversions) +{ + return save_field_in_field(result_field, &null_value, to, no_conversions); +} + + +/** + Store null in field. + + This is used on INSERT. + Allow NULL to be inserted in timestamp and auto_increment values. + + @param field Field where we want to store NULL + + @retval + 0 ok + @retval + 1 Field doesn't support NULL values and can't handle 'field = NULL' +*/ + +int Item_null::save_in_field(Field *field, bool no_conversions) +{ + return set_field_to_null_with_conversions(field, no_conversions); +} + + +/** + Store null in field. + + @param field Field where we want to store NULL + + @retval + 0 OK + @retval + 1 Field doesn't support NULL values +*/ + +int Item_null::save_safe_in_field(Field *field) +{ + return set_field_to_null(field); +} + + +/* + This implementation can lose str_value content, so if the + Item uses str_value to store something, it should + reimplement it's ::save_in_field() as Item_string, for example, does. + + Note: all Item_XXX::val_str(str) methods must NOT assume that + str != str_value. For example, see fix for bug #44743. +*/ +int Item::save_str_in_field(Field *field, bool no_conversions) +{ + String *result; + CHARSET_INFO *cs= collation.collation; + char buff[MAX_FIELD_WIDTH]; // Alloc buffer for small columns + str_value.set_buffer_if_not_allocated(buff, sizeof(buff), cs); + result=val_str(&str_value); + if (null_value) + { + str_value.set_buffer_if_not_allocated(0, 0, cs); + return set_field_to_null_with_conversions(field, no_conversions); + } + + /* NOTE: If null_value == FALSE, "result" must be not NULL. */ + + field->set_notnull(); + int error= field->store(result->ptr(),result->length(),cs); + str_value.set_buffer_if_not_allocated(0, 0, cs); + return error; +} + + +int Item::save_real_in_field(Field *field, bool no_conversions) +{ + double nr= val_real(); + if (null_value) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + return field->store(nr); +} + + +int Item::save_decimal_in_field(Field *field, bool no_conversions) +{ + VDec value(this); + if (value.is_null()) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + return field->store_decimal(value.ptr()); +} + + +int Item::save_int_in_field(Field *field, bool no_conversions) +{ + longlong nr= val_int(); + if (null_value) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + return field->store(nr, unsigned_flag); +} + + +int Item::save_in_field(Field *field, bool no_conversions) +{ + int error= type_handler()->Item_save_in_field(this, field, no_conversions); + return error ? error : (field->table->in_use->is_error() ? 1 : 0); +} + + +bool Item::save_in_param(THD *thd, Item_param *param) +{ + return param->set_from_item(thd, this); +} + + +int Item_string::save_in_field(Field *field, bool no_conversions) +{ + String *result; + result=val_str(&str_value); + return save_str_value_in_field(field, result); +} + + +Item *Item_string::clone_item(THD *thd) +{ + LEX_CSTRING val; + str_value.get_value(&val); + return new (thd->mem_root) Item_string(thd, name, val, collation.collation); +} + + +Item_basic_constant * +Item_string::make_string_literal_concat(THD *thd, const LEX_CSTRING *str) +{ + append(str->str, (uint32) str->length); + if (!(collation.repertoire & MY_REPERTOIRE_EXTENDED)) + { + // If the string has been pure ASCII so far, check the new part. + CHARSET_INFO *cs= thd->variables.collation_connection; + collation.repertoire|= my_string_repertoire(cs, str->str, str->length); + } + return this; +} + + +/* + If "this" is a reasonably short pure ASCII string literal, + try to parse known ODBC-style date, time or timestamp literals, + e.g: + SELECT {d'2001-01-01'}; + SELECT {t'10:20:30'}; + SELECT {ts'2001-01-01 10:20:30'}; +*/ +Item *Item_string::make_odbc_literal(THD *thd, const LEX_CSTRING *typestr) +{ + Item_literal *res; + const Type_handler *h; + if (collation.repertoire == MY_REPERTOIRE_ASCII && + str_value.length() < MAX_DATE_STRING_REP_LENGTH * 4 && + (h= Type_handler::odbc_literal_type_handler(typestr)) && + (res= h->create_literal_item(thd, val_str(NULL), false))) + return res; + /* + h->create_literal_item() returns NULL if failed to parse the string, + or the string format did not match the type, e.g.: {d'2001-01-01 10:10:10'} + */ + return this; +} + + +static int save_int_value_in_field (Field *field, longlong nr, + bool null_value, bool unsigned_flag) +{ + if (null_value) + return set_field_to_null(field); + field->set_notnull(); + return field->store(nr, unsigned_flag); +} + + +int Item_int::save_in_field(Field *field, bool no_conversions) +{ + return save_int_value_in_field (field, val_int(), null_value, unsigned_flag); +} + + +Item *Item_int::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_int(thd, name.str, value, max_length, unsigned_flag); +} + + +void Item_datetime::set(longlong packed, enum_mysql_timestamp_type ts_type) +{ + unpack_time(packed, <ime, ts_type); +} + +int Item_datetime::save_in_field(Field *field, bool no_conversions) +{ + field->set_notnull(); + return field->store_time_dec(<ime, decimals); +} + +longlong Item_datetime::val_int() +{ + return TIME_to_ulonglong(<ime); +} + +int Item_decimal::save_in_field(Field *field, bool no_conversions) +{ + field->set_notnull(); + return field->store_decimal(&decimal_value); +} + + +Item *Item_int_with_ref::clone_item(THD *thd) +{ + DBUG_ASSERT(ref->const_item()); + /* + We need to evaluate the constant to make sure it works with + parameter markers. + */ + return (ref->unsigned_flag ? + new (thd->mem_root) + Item_uint(thd, ref->name.str, ref->val_int(), ref->max_length) : + new (thd->mem_root) + Item_int(thd, ref->name.str, ref->val_int(), ref->max_length)); +} + + +Item *Item::neg(THD *thd) +{ + return new (thd->mem_root) Item_func_neg(thd, this); +} + +Item *Item_int::neg(THD *thd) +{ + /* + The following if should never be true with code generated by + our parser as LONGLONG_MIN values will be stored as decimal. + The code is here in case someone generates an int from inside + MariaDB + */ + if (unlikely(value == LONGLONG_MIN)) + { + /* Precision for int not big enough; Convert constant to decimal */ + Item_decimal *item= new (thd->mem_root) Item_decimal(thd, value, 0); + return item ? item->neg(thd) : item; + } + if (value > 0) + max_length++; + else if (value < 0 && max_length) + max_length--; + value= -value; + name= null_clex_str; + return this; +} + +Item *Item_decimal::neg(THD *thd) +{ + my_decimal_neg(&decimal_value); + unsigned_flag= 0; + name= null_clex_str; + max_length= my_decimal_precision_to_length_no_truncation( + decimal_value.intg + decimals, decimals, unsigned_flag); + return this; +} + +Item *Item_float::neg(THD *thd) +{ + if (value > 0) + max_length++; + else if (value < 0 && max_length) + max_length--; + value= -value; + presentation= 0; + name= null_clex_str; + return this; +} + +Item *Item_uint::neg(THD *thd) +{ + Item_decimal *item; + if (((ulonglong)value) <= LONGLONG_MAX) + return new (thd->mem_root) Item_int(thd, -value, max_length+1); + if (value == LONGLONG_MIN) + return new (thd->mem_root) Item_int(thd, value, max_length+1); + if (!(item= new (thd->mem_root) Item_decimal(thd, value, 1))) + return 0; + return item->neg(thd); +} + + +Item *Item_uint::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_uint(thd, name.str, value, max_length); +} + +static uint nr_of_decimals(const char *str, const char *end) +{ + const char *decimal_point; + + /* Find position for '.' */ + for (;;) + { + if (str == end) + return 0; + if (*str == 'e' || *str == 'E') + return NOT_FIXED_DEC; + if (*str++ == '.') + break; + } + decimal_point= str; + for ( ; str < end && my_isdigit(system_charset_info, *str) ; str++) + ; + if (str < end && (*str == 'e' || *str == 'E')) + return NOT_FIXED_DEC; + /* + QQ: + The number of decimal digist in fact should be (str - decimal_point - 1). + But it seems the result of nr_of_decimals() is never used! + + In case of 'e' and 'E' nr_of_decimals returns NOT_FIXED_DEC. + In case if there is no 'e' or 'E' parser code in sql_yacc.yy + never calls Item_float::Item_float() - it creates Item_decimal instead. + + The only piece of code where we call Item_float::Item_float(str, len) + without having 'e' or 'E' is item_xmlfunc.cc, but this Item_float + never appears in metadata itself. Changing the code to return + (str - decimal_point - 1) does not make any changes in the test results. + + This should be addressed somehow. + Looks like a reminder from before real DECIMAL times. + */ + return (uint) (str - decimal_point); +} + + +/** + This function is only called during parsing: + - when parsing SQL query from sql_yacc.yy + - when parsing XPath query from item_xmlfunc.cc + We will signal an error if value is not a true double value (overflow): + eng: Illegal %s '%-.192s' value found during parsing + + Note: the string is NOT null terminated when called from item_xmlfunc.cc, + so this->name will contain some SQL query tail behind the "length" bytes. + This is Ok for now, as this Item is never seen in SHOW, + or EXPLAIN, or anywhere else in metadata. + Item->name should be fixed to use LEX_STRING eventually. +*/ + +Item_float::Item_float(THD *thd, const char *str_arg, size_t length): + Item_num(thd) +{ + int error; + char *end_not_used; + value= my_charset_bin.strntod((char*) str_arg, length, &end_not_used, &error); + if (unlikely(error)) + { + char tmp[NAME_LEN + 2]; + my_snprintf(tmp, sizeof(tmp), "%.*s", static_cast(length), str_arg); + my_error(ER_ILLEGAL_VALUE_FOR_TYPE, MYF(0), "double", tmp); + } + presentation= name.str= str_arg; + name.length= strlen(str_arg); + decimals=(uint8) nr_of_decimals(str_arg, str_arg+length); + max_length=(uint32)length; +} + + +int Item_float::save_in_field(Field *field, bool no_conversions) +{ + double nr= val_real(); + if (null_value) + return set_field_to_null(field); + field->set_notnull(); + return field->store(nr); +} + + +void Item_float::print(String *str, enum_query_type query_type) +{ + if (presentation) + { + str->append(presentation, strlen(presentation)); + return; + } + char buffer[20]; + String num(buffer, sizeof(buffer), &my_charset_bin); + num.set_real(value, decimals, &my_charset_bin); + str->append(num); +} + + +inline uint char_val(char X) +{ + return (uint) (X >= '0' && X <= '9' ? X-'0' : + X >= 'A' && X <= 'Z' ? X-'A'+10 : + X-'a'+10); +} + + +void Item_hex_constant::hex_string_init(THD *thd, const char *str, size_t str_length) +{ + max_length=(uint)((str_length+1)/2); + char *ptr=(char*) thd->alloc(max_length+1); + if (!ptr) + { + str_value.set("", 0, &my_charset_bin); + return; + } + str_value.set(ptr,max_length,&my_charset_bin); + char *end=ptr+max_length; + if (max_length*2 != str_length) + *ptr++=char_val(*str++); // Not even, assume 0 prefix + while (ptr != end) + { + *ptr++= (char) (char_val(str[0])*16+char_val(str[1])); + str+=2; + } + *ptr=0; // Keep purify happy + collation.set(&my_charset_bin, DERIVATION_COERCIBLE); + unsigned_flag= 1; +} + + +void Item_hex_hybrid::print(String *str, enum_query_type query_type) +{ + uint32 len= MY_MIN(str_value.length(), sizeof(longlong)); + const char *ptr= str_value.ptr() + str_value.length() - len; + str->append("0x",2); + str->append_hex(ptr, len); +} + + +decimal_digits_t Item_hex_hybrid::decimal_precision() const +{ + switch (max_length) {// HEX DEC + case 0: // ---- --- + case 1: return 3; // 0xFF 255 + case 2: return 5; // 0xFFFF 65535 + case 3: return 8; // 0xFFFFFF 16777215 + case 4: return 10; // 0xFFFFFFFF 4294967295 + case 5: return 13; // 0xFFFFFFFFFF 1099511627775 + case 6: return 15; // 0xFFFFFFFFFFFF 281474976710655 + case 7: return 17; // 0xFFFFFFFFFFFFFF 72057594037927935 + } + return 20; // 0xFFFFFFFFFFFFFFFF 18446744073709551615 +} + + +void Item_hex_string::print(String *str, enum_query_type query_type) +{ + str->append("X'",2); + str->append_hex(str_value.ptr(), str_value.length()); + str->append('\''); +} + + +/* + bin item. + In string context this is a binary string. + In number context this is a longlong value. +*/ + +Item_bin_string::Item_bin_string(THD *thd, const char *str, size_t str_length): + Item_hex_hybrid(thd) +{ + const char *end= str + str_length - 1; + char *ptr; + uchar bits= 0; + uint power= 1; + + max_length= (uint)((str_length + 7) >> 3); + if (!(ptr= (char*) thd->alloc(max_length + 1))) + return; + str_value.set(ptr, max_length, &my_charset_bin); + + if (max_length > 0) + { + ptr+= max_length - 1; + ptr[1]= 0; // Set end null for string + for (; end >= str; end--) + { + if (power == 256) + { + power= 1; + *ptr--= bits; + bits= 0; + } + if (*end == '1') + bits|= power; + power<<= 1; + } + *ptr= (char) bits; + } + else + ptr[0]= 0; + + collation.set(&my_charset_bin, DERIVATION_COERCIBLE); +} + + +void Item_bin_string::print(String *str, enum_query_type query_type) +{ + if (!str_value.length()) + { + /* + Historically a bit string such as b'01100001' + prints itself in the hex hybrid notation: 0x61 + In case of an empty bit string b'', the hex hybrid + notation would result in a bad syntax: 0x + So let's print empty bit strings using bit string notation: b'' + */ + static const LEX_CSTRING empty_bit_string= {STRING_WITH_LEN("b''")}; + str->append(empty_bit_string); + } + else + Item_hex_hybrid::print(str, query_type); +} + + +void Item_date_literal::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("DATE'")); + char buf[MAX_DATE_STRING_REP_LENGTH]; + int length= my_date_to_str(cached_time.get_mysql_time(), buf); + str->append(buf, length); + str->append('\''); +} + + +Item *Item_date_literal::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_date_literal(thd, &cached_time); +} + + +bool Item_date_literal::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + fuzzydate |= sql_mode_for_dates(thd); + cached_time.copy_to_mysql_time(ltime); + return (null_value= check_date_with_warn(thd, ltime, fuzzydate, + MYSQL_TIMESTAMP_ERROR)); +} + + +void Item_datetime_literal::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("TIMESTAMP'")); + char buf[MAX_DATE_STRING_REP_LENGTH]; + int length= my_datetime_to_str(cached_time.get_mysql_time(), buf, decimals); + str->append(buf, length); + str->append('\''); +} + + +Item *Item_datetime_literal::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_datetime_literal(thd, &cached_time, decimals); +} + + +bool Item_datetime_literal::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + fuzzydate |= sql_mode_for_dates(thd); + cached_time.copy_to_mysql_time(ltime); + return (null_value= check_date_with_warn(thd, ltime, fuzzydate, + MYSQL_TIMESTAMP_ERROR)); +} + + +void Item_time_literal::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("TIME'")); + char buf[MAX_DATE_STRING_REP_LENGTH]; + int length= my_time_to_str(cached_time.get_mysql_time(), buf, decimals); + str->append(buf, length); + str->append('\''); +} + + +Item *Item_time_literal::clone_item(THD *thd) +{ + return new (thd->mem_root) Item_time_literal(thd, &cached_time, decimals); +} + + +bool Item_time_literal::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + cached_time.copy_to_mysql_time(ltime); + if (fuzzydate & TIME_TIME_ONLY) + return (null_value= false); + return (null_value= check_date_with_warn(thd, ltime, fuzzydate, + MYSQL_TIMESTAMP_ERROR)); +} + + + +/** + Pack data in buffer for sending. +*/ + +bool Item_null::send(Protocol *protocol, st_value *buffer) +{ + return protocol->store_null(); +} + + +/** + Check if an item is a constant one and can be cached. + + @param arg [out] TRUE <=> Cache this item. + + @return TRUE Go deeper in item tree. + @return FALSE Don't go deeper in item tree. +*/ + +bool Item::cache_const_expr_analyzer(uchar **arg) +{ + uchar *cache_flag= *arg; + if (!*cache_flag) + { + Item *item= real_item(); + /* + Cache constant items unless it's a basic constant, constant field or + a subselect (they use their own cache). + */ + if (const_item() && + !(basic_const_item() || item->basic_const_item() || + item->type() == Item::NULL_ITEM || /* Item_name_const hack */ + item->type() == Item::FIELD_ITEM || + item->type() == SUBSELECT_ITEM || + item->type() == CACHE_ITEM || + /* + Do not cache GET_USER_VAR() function as its const_item() may + return TRUE for the current thread but it still may change + during the execution. + */ + (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::GUSERVAR_FUNC))) + *cache_flag= TRUE; + return TRUE; + } + return FALSE; +} + + +/** + Cache item if needed. + + @param arg TRUE <=> Cache this item. + + @return cache if cache needed. + @return this otherwise. +*/ + +Item* Item::cache_const_expr_transformer(THD *thd, uchar *arg) +{ + if (*arg) + { + *arg= FALSE; + Item_cache *cache= get_cache(thd); + if (!cache) + return NULL; + cache->setup(thd, this); + cache->store(this); + return cache; + } + return this; +} + +/** + Find Item by reference in the expression +*/ +bool Item::find_item_processor(void *arg) +{ + return (this == ((Item *) arg)); +} + +bool Item_field::send(Protocol *protocol, st_value *buffer) +{ + return protocol->store(result_field); +} + + +Item* Item::propagate_equal_fields_and_change_item_tree(THD *thd, + const Context &ctx, + COND_EQUAL *cond, + Item **place) +{ + Item *item= propagate_equal_fields(thd, ctx, cond); + if (item && item != this) + thd->change_item_tree(place, item); + return item; +} + + +void Item_field::update_null_value() +{ + /* + need to set no_errors to prevent warnings about type conversion + popping up. + */ + THD *thd= field->table->in_use; + int no_errors; + + no_errors= thd->no_errors; + thd->no_errors= 1; + type_handler()->Item_update_null_value(this); + thd->no_errors= no_errors; +} + + +/* + Add the field to the select list and substitute it for the reference to + the field. + + SYNOPSIS + Item_field::update_value_transformer() + select_arg current select + + DESCRIPTION + If the field doesn't belong to the table being inserted into then it is + added to the select list, pointer to it is stored in the ref_pointer_array + of the select and the field itself is substituted for the Item_ref object. + This is done in order to get correct values from update fields that + belongs to the SELECT part in the INSERT .. SELECT .. ON DUPLICATE KEY + UPDATE statement. + + RETURN + 0 if error occurred + ref if all conditions are met + this field otherwise +*/ + +Item *Item_field::update_value_transformer(THD *thd, uchar *select_arg) +{ + SELECT_LEX *select= (SELECT_LEX*)select_arg; + DBUG_ASSERT(fixed()); + + if (field->table != select->context.table_list->table && + type() != Item::TRIGGER_FIELD_ITEM) + { + List *all_fields= &select->join->all_fields; + Ref_ptr_array &ref_pointer_array= select->ref_pointer_array; + int el= all_fields->elements; + Item_ref *ref; + + ref_pointer_array[el]= (Item*)this; + all_fields->push_front((Item*)this, thd->mem_root); + ref= new (thd->mem_root) + Item_ref(thd, &select->context, &ref_pointer_array[el], + table_name, field_name); + return ref; + } + return this; +} + + +/** + @brief + Prepare AND/OR formula for extraction of a pushable condition + + @param checker the checker callback function to be applied to the nodes + of the tree of the object + @param arg parameter to be passed to the checker + + @details + This method recursively traverses this AND/OR condition and for each + subformula of the condition it checks whether it can be usable for the + extraction of a pushable condition. The criteria of pushability of + a subformula is checked by the callback function 'checker' with one + parameter arg. The subformulas that are not usable are marked with + the flag MARKER_NO_EXTRACTION. + @note + This method is called before any call of build_pushable_cond. + The flag MARKER_NO_EXTRACTION set in a subformula allows to avoid building + clones for the subformulas that are not used in the pushable condition. + @note + This method is called for pushdown conditions into materialized + derived tables/views optimization. + Item::pushable_cond_checker_for_derived() is passed as the actual callback + function. + Also it is called for pushdown conditions in materialized IN subqueries. + Item::pushable_cond_checker_for_subquery is passed as the actual + callback function. +*/ + +void Item::check_pushable_cond(Pushdown_checker checker, uchar *arg) +{ + clear_extraction_flag(); + if (type() == Item::COND_ITEM) + { + bool and_cond= ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC; + List_iterator li(*((Item_cond*) this)->argument_list()); + uint count= 0; + Item *item; + while ((item=li++)) + { + item->check_pushable_cond(checker, arg); + if (item->get_extraction_flag() != MARKER_NO_EXTRACTION) + count++; + else if (!and_cond) + break; + } + if ((and_cond && count == 0) || item) + { + set_extraction_flag(MARKER_NO_EXTRACTION); + if (and_cond) + li.rewind(); + while ((item= li++)) + item->clear_extraction_flag(); + } + } + else if (!((this->*checker) (arg))) + set_extraction_flag(MARKER_NO_EXTRACTION); +} + + +/** + @brief + Build condition extractable from this condition for pushdown + + @param thd the thread handle + @param checker the checker callback function to be applied to the nodes + of the tree of the object to check if multiple equality + elements can be used to create equalities + @param arg parameter to be passed to the checker + + @details + This method finds out what condition that can be pushed down can be + extracted from this condition. If such condition C exists the + method builds the item for it. The method uses the flag MARKER_NO_EXTRACTION + set by the preliminary call of the method check_pushable_cond() to figure + out whether a subformula is pushable or not. + In the case when this item is a multiple equality a checker method is + called to find the equal fields to build a new equality that can be + pushed down. + @note + The built condition C is always implied by the condition cond + (cond => C). The method tries to build the most restrictive such + condition (i.e. for any other condition C' such that cond => C' + we have C => C'). + @note + The build item is not ready for usage: substitution for the field items + has to be done and it has to be re-fixed. + @note + This method is called for pushdown conditions into materialized + derived tables/views optimization. + Item::pushable_equality_checker_for_derived() is passed as the actual + callback function. + Also it is called for pushdown conditions into materialized IN subqueries. + Item::pushable_equality_checker_for_subquery() is passed as the actual + callback function. + + @retval + the built condition pushable into if such a condition exists + NULL if there is no such a condition +*/ + +Item *Item::build_pushable_cond(THD *thd, + Pushdown_checker checker, + uchar *arg) +{ + bool is_multiple_equality= type() == Item::FUNC_ITEM && + ((Item_func*) this)->functype() == Item_func::MULT_EQUAL_FUNC; + + if (get_extraction_flag() == MARKER_NO_EXTRACTION) + return 0; + + if (type() == Item::COND_ITEM) + { + bool cond_and= false; + Item_cond *new_cond; + if (((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + { + cond_and= true; + new_cond= new (thd->mem_root) Item_cond_and(thd); + } + else + new_cond= new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return 0; + List_iterator li(*((Item_cond*) this)->argument_list()); + Item *item; + bool is_fix_needed= false; + + while ((item=li++)) + { + if (item->get_extraction_flag() == MARKER_NO_EXTRACTION) + { + if (!cond_and) + return 0; + continue; + } + Item *fix= item->build_pushable_cond(thd, checker, arg); + if (!fix && !cond_and) + return 0; + if (!fix) + continue; + + if (fix->type() == Item::COND_ITEM && + ((Item_cond*) fix)->functype() == Item_func::COND_AND_FUNC) + is_fix_needed= true; + + if (new_cond->argument_list()->push_back(fix, thd->mem_root)) + return 0; + } + if (is_fix_needed && new_cond->fix_fields(thd, 0)) + return 0; + + switch (new_cond->argument_list()->elements) + { + case 0: + return 0; + case 1: + return new_cond->argument_list()->head(); + default: + return new_cond; + } + } + else if (is_multiple_equality) + { + List equalities; + Item *new_cond= NULL; + if (((Item_equal *)this)->create_pushable_equalities(thd, &equalities, + checker, arg, true) || + (equalities.elements == 0)) + return 0; + + switch (equalities.elements) + { + case 0: + return 0; + case 1: + new_cond= equalities.head(); + break; + default: + new_cond= new (thd->mem_root) Item_cond_and(thd, equalities); + break; + } + if (new_cond && new_cond->fix_fields(thd, &new_cond)) + return 0; + return new_cond; + } + else if (get_extraction_flag() != MARKER_NO_EXTRACTION) + return build_clone(thd); + return 0; +} + + +static +Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel) +{ + DBUG_ASSERT(item->type() == Item::FIELD_ITEM || + (item->type() == Item::REF_ITEM && + ((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF)); + Item_field *field_item= NULL; + table_map map= sel->master_unit()->derived->table->map; + Item_equal *item_equal= item->get_item_equal(); + if (!item_equal) + field_item= (Item_field *)(item->real_item()); + else + { + Item_equal_fields_iterator li(*item_equal); + Item *equal_item; + while ((equal_item= li++)) + { + if (equal_item->used_tables() == map) + { + field_item= (Item_field *)(equal_item->real_item()); + break; + } + } + } + if (field_item) + { + Item_ref *ref= new (thd->mem_root) Item_ref(thd, &sel->context, + field_item->field_name); + return ref; + } + DBUG_ASSERT(0); + return NULL; +} + + +Item *Item_field::derived_field_transformer_for_having(THD *thd, uchar *arg) +{ + st_select_lex *sel= (st_select_lex *)arg; + table_map tab_map= sel->master_unit()->derived->table->map; + if (item_equal && !(item_equal->used_tables() & tab_map)) + return this; + if (!item_equal && used_tables() != tab_map) + return this; + Item *item= get_field_item_for_having(thd, this, sel); + if (item) + item->marker|= MARKER_SUBSTITUTION; + return item; +} + + +Item *Item_direct_view_ref::derived_field_transformer_for_having(THD *thd, + uchar *arg) +{ + st_select_lex *sel= (st_select_lex *)arg; + context= &sel->context; + if ((*ref)->marker & MARKER_SUBSTITUTION) + { + this->marker|= MARKER_SUBSTITUTION; + return this; + } + table_map tab_map= sel->master_unit()->derived->table->map; + if ((item_equal && !(item_equal->used_tables() & tab_map)) || + !item_equal) + return this; + return get_field_item_for_having(thd, this, sel); +} + + +static +Item *find_producing_item(Item *item, st_select_lex *sel) +{ + DBUG_ASSERT(item->type() == Item::FIELD_ITEM || + (item->type() == Item::REF_ITEM && + ((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF)); + Item_field *field_item= NULL; + Item_equal *item_equal= item->get_item_equal(); + table_map tab_map= sel->master_unit()->derived->table->map; + if (item->used_tables() == tab_map) + field_item= (Item_field *) (item->real_item()); + if (!field_item && item_equal) + { + Item_equal_fields_iterator it(*item_equal); + Item *equal_item; + while ((equal_item= it++)) + { + if (equal_item->used_tables() == tab_map) + { + field_item= (Item_field *) (equal_item->real_item()); + break; + } + } + } + List_iterator_fast li(sel->item_list); + if (field_item) + { + Item *producing_item= NULL; + uint field_no= field_item->field->field_index; + for (uint i= 0; i <= field_no; i++) + producing_item= li++; + return producing_item; + } + return NULL; +} + +Item *Item_field::derived_field_transformer_for_where(THD *thd, uchar *arg) +{ + st_select_lex *sel= (st_select_lex *)arg; + Item *producing_item= find_producing_item(this, sel); + if (producing_item) + { + Item *producing_clone= producing_item->build_clone(thd); + if (producing_clone) + producing_clone->marker|= MARKER_SUBSTITUTION; + return producing_clone; + } + return this; +} + +Item *Item_direct_view_ref::derived_field_transformer_for_where(THD *thd, + uchar *arg) +{ + if ((*ref)->marker & MARKER_SUBSTITUTION) + return (*ref); + if (item_equal) + { + st_select_lex *sel= (st_select_lex *)arg; + Item *producing_item= find_producing_item(this, sel); + DBUG_ASSERT (producing_item != NULL); + return producing_item->build_clone(thd); + } + return (*ref); +} + + +Item *Item_field::grouping_field_transformer_for_where(THD *thd, uchar *arg) +{ + st_select_lex *sel= (st_select_lex *)arg; + Field_pair *gr_field= find_matching_field_pair(this, sel->grouping_tmp_fields); + if (gr_field) + { + Item *producing_clone= + gr_field->corresponding_item->build_clone(thd); + if (producing_clone) + producing_clone->marker|= MARKER_SUBSTITUTION; + return producing_clone; + } + return this; +} + + +Item * +Item_direct_view_ref::grouping_field_transformer_for_where(THD *thd, + uchar *arg) +{ + if ((*ref)->marker & MARKER_SUBSTITUTION) + { + this->marker|= MARKER_SUBSTITUTION; + return this; + } + if (!item_equal) + return this; + st_select_lex *sel= (st_select_lex *)arg; + Field_pair *gr_field= find_matching_field_pair(this, + sel->grouping_tmp_fields); + return gr_field->corresponding_item->build_clone(thd); +} + +void Item_field::print(String *str, enum_query_type query_type) +{ + /* + If the field refers to a constant table, print the value. + There are two exceptions: + 1. For temporary (aka "work") tables, we can only access the derived temp. + tables. Other kinds of tables might already have been dropped. + 2. Don't print constants if QT_NO_DATA_EXPANSION or QT_VIEW_INTERNAL is + specified. + */ + if ((refers_to_temp_table != REFERS_TO_OTHER_TMP) && // (1) + !(query_type & (QT_NO_DATA_EXPANSION | QT_VIEW_INTERNAL)) && // (2) + field && field->table->const_table) + { + print_value(str); + return; + } + /* + Item_ident doesn't have references to the underlying Field/TABLE objects, + so it's safe to make the following call even when the table is not + available already: + */ + Item_ident::print(str, query_type); +} + + +Item_ref::Item_ref(THD *thd, Name_resolution_context *context_arg, + Item **item, const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg, + bool alias_name_used_arg): + Item_ident(thd, context_arg, null_clex_str, table_name_arg, field_name_arg), + ref(item), reference_trough_name(0) +{ + alias_name_used= alias_name_used_arg; + /* + This constructor used to create some internals references over fixed items + */ + if ((set_properties_only= (ref && *ref && (*ref)->fixed()))) + set_properties(); +} + +/* + A Field_enumerator-compatible class that invokes mark_as_dependent() for + each field that is a reference to some ancestor of current_select. +*/ +class Dependency_marker: public Field_enumerator +{ +public: + THD *thd; + st_select_lex *current_select; + virtual void visit_field(Item_field *item) + { + // Find which select the field is in. This is achieved by walking up + // the select tree and looking for the table of interest. + st_select_lex *sel; + for (sel= current_select; + sel ; + sel= (sel->context.outer_context ? + sel->context.outer_context->select_lex: + NULL)) + { + List_iterator li(sel->leaf_tables); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (tbl->table == item->field->table) + { + if (sel != current_select) + mark_as_dependent(thd, sel, current_select, item, item, false); + return; + } + } + } + } +}; + +Item_ref::Item_ref(THD *thd, TABLE_LIST *view_arg, Item **item, + const LEX_CSTRING &field_name_arg, + bool alias_name_used_arg): + Item_ident(thd, view_arg, field_name_arg), + ref(item), reference_trough_name(0) +{ + alias_name_used= alias_name_used_arg; + /* + This constructor is used to create some internal references over fixed items + */ + if ((set_properties_only= (ref && *ref && (*ref)->fixed()))) + set_properties(); +} + + +/** + Resolve the name of a reference to a column reference. + + The method resolves the column reference represented by 'this' as a column + present in one of: GROUP BY clause, SELECT clause, outer queries. It is + used typically for columns in the HAVING clause which are not under + aggregate functions. + + POSTCONDITION @n + Item_ref::ref is 0 or points to a valid item. + + @note + The name resolution algorithm used is (where [T_j] is an optional table + name that qualifies the column name): + + @code + resolve_extended([T_j].col_ref_i) + { + Search for a column or derived column named col_ref_i [in table T_j] + in the SELECT and GROUP clauses of Q. + + if such a column is NOT found AND // Lookup in outer queries. + there are outer queries + { + for each outer query Q_k beginning from the inner-most one + { + Search for a column or derived column named col_ref_i + [in table T_j] in the SELECT and GROUP clauses of Q_k. + + if such a column is not found AND + - Q_k is not a group query AND + - Q_k is not inside an aggregate function + OR + - Q_(k-1) is not in a HAVING or SELECT clause of Q_k + { + search for a column or derived column named col_ref_i + [in table T_j] in the FROM clause of Q_k; + } + } + } + } + @endcode + @n + This procedure treats GROUP BY and SELECT clauses as one namespace for + column references in HAVING. Notice that compared to + Item_field::fix_fields, here we first search the SELECT and GROUP BY + clauses, and then we search the FROM clause. + + @param[in] thd current thread + @param[in,out] reference view column if this item was resolved to a + view column + + @todo + Here we could first find the field anyway, and then test this + condition, so that we can give a better error message - + ER_WRONG_FIELD_WITH_GROUP, instead of the less informative + ER_BAD_FIELD_ERROR which we produce now. + + @retval + TRUE if error + @retval + FALSE on success +*/ + +bool Item_ref::fix_fields(THD *thd, Item **reference) +{ + enum_parsing_place place= NO_MATTER; + DBUG_ASSERT(fixed() == 0); + SELECT_LEX *current_sel= context->select_lex; + + if (set_properties_only) + { + /* do nothing */ + } + else if (!ref || ref == not_found_item) + { + DBUG_ASSERT(reference_trough_name != 0); + if (!(ref= resolve_ref_in_select_and_group(thd, this, context->select_lex))) + goto error; /* Some error occurred (e.g. ambiguous names). */ + + if (ref == not_found_item) /* This reference was not resolved. */ + { + Name_resolution_context *last_checked_context= context; + Name_resolution_context *outer_context= context->outer_context; + Field *from_field; + ref= 0; + + if (unlikely(!outer_context)) + { + /* The current reference cannot be resolved in this query. */ + my_error(ER_BAD_FIELD_ERROR,MYF(0), full_name(), thd->where); + goto error; + } + + /* + If there is an outer context (select), and it is not a derived table + (which do not support the use of outer fields for now), try to + resolve this reference in the outer select(s). + + We treat each subselect as a separate namespace, so that different + subselects may contain columns with the same names. The subselects are + searched starting from the innermost. + */ + from_field= (Field*) not_found_field; + + do + { + SELECT_LEX *select= outer_context->select_lex; + Item_subselect *prev_subselect_item= + last_checked_context->select_lex->master_unit()->item; + last_checked_context= outer_context; + + /* Search in the SELECT and GROUP lists of the outer select. */ + if (outer_context->resolve_in_select_list) + { + if (!(ref= resolve_ref_in_select_and_group(thd, this, select))) + goto error; /* Some error occurred (e.g. ambiguous names). */ + if (ref != not_found_item) + { + DBUG_ASSERT(*ref && (*ref)->fixed()); + prev_subselect_item->used_tables_and_const_cache_join(*ref); + break; + } + /* + Set ref to 0 to ensure that we get an error in case we replaced + this item with another item and still use this item in some + other place of the parse tree. + */ + ref= 0; + } + + place= prev_subselect_item->parsing_place; + /* + Check table fields only if the subquery is used somewhere out of + HAVING or the outer SELECT does not use grouping (i.e. tables are + accessible). + TODO: + Here we could first find the field anyway, and then test this + condition, so that we can give a better error message - + ER_WRONG_FIELD_WITH_GROUP, instead of the less informative + ER_BAD_FIELD_ERROR which we produce now. + */ + if ((place != IN_HAVING || + (!select->with_sum_func && + select->group_list.elements == 0))) + { + /* + In case of view, find_field_in_tables() write pointer to view + field expression to 'reference', i.e. it substitute that + expression instead of this Item_ref + */ + from_field= find_field_in_tables(thd, this, + outer_context-> + first_name_resolution_table, + outer_context-> + last_name_resolution_table, + outer_context->ignored_tables, + reference, + IGNORE_EXCEPT_NON_UNIQUE, + TRUE, TRUE); + if (! from_field) + goto error; + if (from_field == view_ref_found) + { + Item::Type refer_type= (*reference)->type(); + prev_subselect_item->used_tables_and_const_cache_join(*reference); + DBUG_ASSERT((*reference)->type() == REF_ITEM); + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, this, + ((refer_type == REF_ITEM || + refer_type == FIELD_ITEM) ? + (Item_ident*) (*reference) : + 0), false); + /* + view reference found, we substituted it instead of this + Item, so can quit + */ + return FALSE; + } + if (from_field != not_found_field) + { + if (cached_table && cached_table->select_lex && + outer_context->select_lex && + cached_table->select_lex != outer_context->select_lex) + { + /* + Due to cache, find_field_in_tables() can return field which + doesn't belong to provided outer_context. In this case we have + to find proper field context in order to fix field correctly. + */ + do + { + outer_context= outer_context->outer_context; + select= outer_context->select_lex; + prev_subselect_item= + last_checked_context->select_lex->master_unit()->item; + last_checked_context= outer_context; + } while (outer_context && outer_context->select_lex && + cached_table->select_lex != outer_context->select_lex); + } + prev_subselect_item->used_tables_cache|= from_field->table->map; + prev_subselect_item->const_item_cache= 0; + break; + } + } + DBUG_ASSERT(from_field == not_found_field); + + /* Reference is not found => depend on outer (or just error). */ + prev_subselect_item->used_tables_cache|= OUTER_REF_TABLE_BIT; + prev_subselect_item->const_item_cache= 0; + + outer_context= outer_context->outer_context; + } while (outer_context); + + DBUG_ASSERT(from_field != 0 && from_field != view_ref_found); + if (from_field != not_found_field) + { + Item_field* fld; + if (!(fld= new (thd->mem_root) Item_field(thd, context, from_field))) + goto error; + thd->change_item_tree(reference, fld); + mark_as_dependent(thd, last_checked_context->select_lex, + current_sel, fld, fld, false); + /* + A reference is resolved to a nest level that's outer or the same as + the nest level of the enclosing set function : adjust the value of + max_arg_level for the function if it's needed. + */ + if (thd->lex->in_sum_func && + last_checked_context->select_lex->parent_lex == + context->select_lex->parent_lex && + thd->lex->in_sum_func->nest_level >= + last_checked_context->select_lex->nest_level) + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + last_checked_context->select_lex->nest_level); + return FALSE; + } + if (unlikely(ref == 0)) + { + /* The item was not a table field and not a reference */ + my_error(ER_BAD_FIELD_ERROR, MYF(0), + this->full_name(), thd->where); + goto error; + } + /* Should be checked in resolve_ref_in_select_and_group(). */ + DBUG_ASSERT(*ref && (*ref)->fixed()); + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, this, this, false); + /* + A reference is resolved to a nest level that's outer or the same as + the nest level of the enclosing set function : adjust the value of + max_arg_level for the function if it's needed. + */ + if (thd->lex->in_sum_func && + last_checked_context->select_lex->parent_lex == + context->select_lex->parent_lex && + thd->lex->in_sum_func->nest_level >= + last_checked_context->select_lex->nest_level) + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + last_checked_context->select_lex->nest_level); + } + } + + DBUG_ASSERT(*ref); + /* + Check if this is an incorrect reference in a group function or forward + reference. Do not issue an error if this is: + 1. outer reference (will be fixed later by the fix_inner_refs function); + 2. an unnamed reference inside an aggregate function. + */ + if (!set_properties_only && + !((*ref)->type() == REF_ITEM && + ((Item_ref *)(*ref))->ref_type() == OUTER_REF) && + (((*ref)->with_sum_func() && name.str && + !(current_sel->get_linkage() != GLOBAL_OPTIONS_TYPE && + current_sel->having_fix_field)) || + !(*ref)->fixed())) + { + my_error(ER_ILLEGAL_REFERENCE, MYF(0), + name.str, ((*ref)->with_sum_func() ? + "reference to group function": + "forward reference in item list")); + goto error; + } + + set_properties(); + + if ((*ref)->check_cols(1)) + goto error; + return FALSE; + +error: + context->process_error(thd); + return TRUE; +} + + +void Item_ref::set_properties() +{ + Type_std_attributes::set(*ref); + /* + We have to remember if we refer to a sum function, to ensure that + split_sum_func() doesn't try to change the reference. + */ + with_flags= (*ref)->with_flags; + base_flags|= (item_base_t::FIXED | + ((*ref)->base_flags & item_base_t::MAYBE_NULL)); + + if (alias_name_used) + return; + if ((*ref)->type() == FIELD_ITEM) + alias_name_used= ((Item_ident *) (*ref))->alias_name_used; + else + alias_name_used= TRUE; // it is not field, so it is was resolved by alias +} + + +void Item_ref::cleanup() +{ + DBUG_ENTER("Item_ref::cleanup"); + Item_ident::cleanup(); + if (reference_trough_name) + { + /* We have to reset the reference as it may been freed */ + ref= 0; + } + DBUG_VOID_RETURN; +} + + +/** + Transform an Item_ref object with a transformer callback function. + + The function first applies the transform method to the item + referenced by this Item_ref object. If this returns a new item the + old item is substituted for a new one. After this the transformer + is applied to the Item_ref object. + + @param transformer the transformer callback function to be applied to + the nodes of the tree of the object + @param argument parameter to be passed to the transformer + + @return Item returned as the result of transformation of the Item_ref object + @retval !NULL The transformation was successful + @retval NULL Out of memory error +*/ + +Item* Item_ref::transform(THD *thd, Item_transformer transformer, uchar *arg) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + DBUG_ASSERT((*ref) != NULL); + + /* Transform the object we are referencing. */ + Item *new_item= (*ref)->transform(thd, transformer, arg); + if (!new_item) + return NULL; + + /* + THD::change_item_tree() should be called only if the tree was + really transformed, i.e. when a new item has been created. + Otherwise we'll be allocating a lot of unnecessary memory for + change records at each execution. + */ + if (*ref != new_item) + thd->change_item_tree(ref, new_item); + + /* Transform the item ref object. */ + return (this->*transformer)(thd, arg); +} + + +/** + Compile an Item_ref object with a processor and a transformer + callback functions. + + First the function applies the analyzer to the Item_ref object. Then + if the analyzer succeeds we first apply the compile method to the + object the Item_ref object is referencing. If this returns a new + item the old item is substituted for a new one. After this the + transformer is applied to the Item_ref object itself. + The compile function is not called if the analyzer returns NULL + in the parameter arg_p. + + @param analyzer the analyzer callback function to be applied to the + nodes of the tree of the object + @param[in,out] arg_p parameter to be passed to the processor + @param transformer the transformer callback function to be applied to the + nodes of the tree of the object + @param arg_t parameter to be passed to the transformer + + @return Item returned as the result of transformation of the Item_ref object +*/ + +Item* Item_ref::compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) +{ + /* Analyze this Item object. */ + if (!(this->*analyzer)(arg_p)) + return NULL; + + /* Compile the Item we are referencing. */ + DBUG_ASSERT((*ref) != NULL); + if (*arg_p) + { + uchar *arg_v= *arg_p; + Item *new_item= (*ref)->compile(thd, analyzer, &arg_v, transformer, arg_t); + if (new_item && *ref != new_item) + thd->change_item_tree(ref, new_item); + } + + /* Transform this Item object. */ + return (this->*transformer)(thd, arg_t); +} + + +void Item_ref::print(String *str, enum_query_type query_type) +{ + if (ref) + { + if ((*ref)->type() != Item::CACHE_ITEM && + (*ref)->type() != Item::WINDOW_FUNC_ITEM && + ref_type() != VIEW_REF && + !table_name.str && name.str && alias_name_used) + { + THD *thd= current_thd; + append_identifier(thd, str, &(*ref)->real_item()->name); + } + else + (*ref)->print(str, query_type); + } + else + Item_ident::print(str, query_type); +} + + +bool Item_ref::send(Protocol *prot, st_value *buffer) +{ + if (result_field) + return prot->store(result_field); + return (*ref)->send(prot, buffer); +} + + +double Item_ref::val_result() +{ + if (result_field) + { + if ((null_value= result_field->is_null())) + return 0.0; + return result_field->val_real(); + } + return val_real(); +} + + +bool Item_ref::is_null_result() +{ + if (result_field) + return (null_value=result_field->is_null()); + + return is_null(); +} + + +longlong Item_ref::val_int_result() +{ + if (result_field) + { + if ((null_value= result_field->is_null())) + return 0; + return result_field->val_int(); + } + return val_int(); +} + + +String *Item_ref::str_result(String* str) +{ + if (result_field) + { + if ((null_value= result_field->is_null())) + return 0; + str->set_charset(str_value.charset()); + return result_field->val_str(str, &str_value); + } + return val_str(str); +} + + +bool Item_ref::val_native_result(THD *thd, Native *to) +{ + return result_field ? + val_native_from_field(result_field, to) : + val_native(thd, to); +} + + +my_decimal *Item_ref::val_decimal_result(my_decimal *decimal_value) +{ + if (result_field) + { + if ((null_value= result_field->is_null())) + return 0; + return result_field->val_decimal(decimal_value); + } + return val_decimal(decimal_value); +} + + +bool Item_ref::val_bool_result() +{ + if (result_field) + { + if ((null_value= result_field->is_null())) + return false; + return result_field->val_bool(); + } + return val_bool(); +} + + +void Item_ref::save_result(Field *to) +{ + if (result_field) + { + save_field_in_field(result_field, &null_value, to, TRUE); + return; + } + (*ref)->save_result(to); + null_value= (*ref)->null_value; +} + + +void Item_ref::save_val(Field *to) +{ + (*ref)->save_result(to); + null_value= (*ref)->null_value; +} + + +double Item_ref::val_real() +{ + DBUG_ASSERT(fixed()); + double tmp=(*ref)->val_result(); + null_value=(*ref)->null_value; + return tmp; +} + + +longlong Item_ref::val_int() +{ + DBUG_ASSERT(fixed()); + longlong tmp=(*ref)->val_int_result(); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_ref::val_bool() +{ + DBUG_ASSERT(fixed()); + bool tmp= (*ref)->val_bool_result(); + null_value= (*ref)->null_value; + return tmp; +} + + +String *Item_ref::val_str(String* tmp) +{ + DBUG_ASSERT(fixed()); + tmp=(*ref)->str_result(tmp); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_ref::is_null() +{ + DBUG_ASSERT(fixed()); + bool tmp=(*ref)->is_null_result(); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_ref::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + return (null_value=(*ref)->get_date_result(thd, ltime, fuzzydate)); +} + + +bool Item_ref::val_native(THD *thd, Native *to) +{ + return val_native_from_item(thd, *ref, to); +} + + +longlong Item_ref::val_datetime_packed(THD *thd) +{ + DBUG_ASSERT(fixed()); + longlong tmp= (*ref)->val_datetime_packed_result(thd); + null_value= (*ref)->null_value; + return tmp; +} + + +longlong Item_ref::val_time_packed(THD *thd) +{ + DBUG_ASSERT(fixed()); + longlong tmp= (*ref)->val_time_packed_result(thd); + null_value= (*ref)->null_value; + return tmp; +} + + +my_decimal *Item_ref::val_decimal(my_decimal *decimal_value) +{ + my_decimal *val= (*ref)->val_decimal_result(decimal_value); + null_value= (*ref)->null_value; + return val; +} + +int Item_ref::save_in_field(Field *to, bool no_conversions) +{ + int res; + if (result_field) + { + if (result_field->is_null()) + { + null_value= 1; + res= set_field_to_null_with_conversions(to, no_conversions); + return res; + } + to->set_notnull(); + res= field_conv(to, result_field); + null_value= 0; + return res; + } + res= (*ref)->save_in_field(to, no_conversions); + null_value= (*ref)->null_value; + return res; +} + + +void Item_ref::save_org_in_field(Field *field, fast_field_copier optimizer_data) +{ + (*ref)->save_org_in_field(field, optimizer_data); +} + + +void Item_ref::make_send_field(THD *thd, Send_field *field) +{ + (*ref)->make_send_field(thd, field); + /* Non-zero in case of a view */ + if (name.str) + field->col_name= name; + if (table_name.str) + field->table_name= table_name; + if (db_name.str) + field->db_name= db_name; + if (orig_field_name.str) + field->org_col_name= orig_field_name; + if (orig_table_name.str) + field->org_table_name= orig_table_name; +} + + +Item *Item_ref::get_tmp_table_item(THD *thd) +{ + if (!result_field) + return (*ref)->get_tmp_table_item(thd); + + Item_field *item= new (thd->mem_root) Item_field(thd, result_field); + if (item) + { + item->table_name= table_name; + item->db_name= db_name; + } + return item; +} + + +void Item_ref_null_helper::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("(")); + if (ref) + (*ref)->print(str, query_type); + else + str->append('?'); + str->append(')'); +} + + +void Item_direct_ref::save_val(Field *to) +{ + (*ref)->save_val(to); + null_value=(*ref)->null_value; +} + + +double Item_direct_ref::val_real() +{ + double tmp=(*ref)->val_real(); + null_value=(*ref)->null_value; + return tmp; +} + + +longlong Item_direct_ref::val_int() +{ + longlong tmp=(*ref)->val_int(); + null_value=(*ref)->null_value; + return tmp; +} + + +String *Item_direct_ref::val_str(String* tmp) +{ + tmp=(*ref)->val_str(tmp); + null_value=(*ref)->null_value; + return tmp; +} + + +my_decimal *Item_direct_ref::val_decimal(my_decimal *decimal_value) +{ + my_decimal *tmp= (*ref)->val_decimal(decimal_value); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_direct_ref::val_bool() +{ + bool tmp= (*ref)->val_bool(); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_direct_ref::is_null() +{ + return (*ref)->is_null(); +} + + +bool Item_direct_ref::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + return (null_value=(*ref)->get_date(thd, ltime, fuzzydate)); +} + + +bool Item_direct_ref::val_native(THD *thd, Native *to) +{ + return val_native_from_item(thd, *ref, to); +} + + +longlong Item_direct_ref::val_time_packed(THD *thd) +{ + longlong tmp = (*ref)->val_time_packed(thd); + null_value= (*ref)->null_value; + return tmp; +} + + +longlong Item_direct_ref::val_datetime_packed(THD *thd) +{ + longlong tmp = (*ref)->val_datetime_packed(thd); + null_value= (*ref)->null_value; + return tmp; +} + + +Item_cache_wrapper::~Item_cache_wrapper() +{ + DBUG_ASSERT(expr_cache == 0); +} + +Item_cache_wrapper::Item_cache_wrapper(THD *thd, Item *item_arg): + Item_result_field(thd), orig_item(item_arg), expr_cache(NULL), expr_value(NULL) +{ + DBUG_ASSERT(orig_item->fixed()); + Type_std_attributes::set(orig_item); + + base_flags|= (item_base_t::FIXED | + (orig_item->base_flags & item_base_t::MAYBE_NULL)); + with_flags|= orig_item->with_flags; + + name= item_arg->name; + + if ((expr_value= orig_item->get_cache(thd))) + expr_value->setup(thd, orig_item); +} + + +/** + Initialize the cache if it is needed +*/ + +void Item_cache_wrapper::init_on_demand() +{ + if (!expr_cache->is_inited()) + { + orig_item->get_cache_parameters(parameters); + expr_cache->init(); + } +} + + +void Item_cache_wrapper::print(String *str, enum_query_type query_type) +{ + if (query_type & QT_ITEM_CACHE_WRAPPER_SKIP_DETAILS) + { + /* Don't print the cache in EXPLAIN EXTENDED */ + orig_item->print(str, query_type); + return; + } + + str->append(STRING_WITH_LEN("")); + if (expr_cache) + { + init_on_demand(); + expr_cache->print(str, query_type); + } + else + str->append(STRING_WITH_LEN("<>")); + str->append('('); + orig_item->print(str, query_type); + str->append(')'); +} + + +/** + Prepare the expression cache wrapper (do nothing) + + @retval FALSE OK +*/ + +bool Item_cache_wrapper::fix_fields(THD *thd __attribute__((unused)), + Item **it __attribute__((unused))) +{ + DBUG_ASSERT(orig_item->fixed()); + DBUG_ASSERT(fixed()); + return FALSE; +} + +bool Item_cache_wrapper::send(Protocol *protocol, st_value *buffer) +{ + if (result_field) + return protocol->store(result_field); + return Item::send(protocol, buffer); +} + +/** + Clean the expression cache wrapper up before reusing it. +*/ + +void Item_cache_wrapper::cleanup() +{ + DBUG_ENTER("Item_cache_wrapper::cleanup"); + Item_result_field::cleanup(); + delete expr_cache; + expr_cache= 0; + /* expr_value is Item so it will be destroyed from list of Items */ + expr_value= 0; + parameters.empty(); + DBUG_VOID_RETURN; +} + + +/** + Create an expression cache that uses a temporary table + + @param thd Thread handle + @param depends_on Parameters of the expression to create cache for + + @details + The function takes 'depends_on' as the list of all parameters for + the expression wrapped into this object and creates an expression + cache in a temporary table containing the field for the parameters + and the result of the expression. + + @retval FALSE OK + @retval TRUE Error +*/ + +bool Item_cache_wrapper::set_cache(THD *thd) +{ + DBUG_ENTER("Item_cache_wrapper::set_cache"); + DBUG_ASSERT(expr_cache == 0); + expr_cache= new Expression_cache_tmptable(thd, parameters, expr_value); + DBUG_RETURN(expr_cache == NULL); +} + +Expression_cache_tracker* Item_cache_wrapper::init_tracker(MEM_ROOT *mem_root) +{ + if (expr_cache) + { + Expression_cache_tracker* tracker= + new(mem_root) Expression_cache_tracker(expr_cache); + if (tracker) + ((Expression_cache_tmptable *)expr_cache)->set_tracker(tracker); + return tracker; + } + return NULL; +} + + +/** + Check if the current values of the parameters are in the expression cache + + @details + The function checks whether the current set of the parameters of the + referenced item can be found in the expression cache. If so the function + returns the item by which the result of the expression can be easily + extracted from the cache with the corresponding val_* method. + + @retval NULL - parameters are not in the cache + @retval - item providing the result of the expression found in cache +*/ + +Item *Item_cache_wrapper::check_cache() +{ + DBUG_ENTER("Item_cache_wrapper::check_cache"); + if (expr_cache) + { + Expression_cache_tmptable::result res; + Item *cached_value; + init_on_demand(); + res= expr_cache->check_value(&cached_value); + if (res == Expression_cache_tmptable::HIT) + DBUG_RETURN(cached_value); + } + DBUG_RETURN(NULL); +} + + +/** + Get the value of the cached expression and put it in the cache +*/ + +inline void Item_cache_wrapper::cache() +{ + expr_value->store(orig_item); + expr_value->cache_value(); + expr_cache->put_value(expr_value); // put in expr_cache +} + + +/** + Get the value of the possibly cached item into the field. +*/ + +void Item_cache_wrapper::save_val(Field *to) +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_int"); + if (!expr_cache) + { + orig_item->save_val(to); + null_value= orig_item->null_value; + DBUG_VOID_RETURN; + } + + if ((cached_value= check_cache())) + { + cached_value->save_val(to); + null_value= cached_value->null_value; + DBUG_VOID_RETURN; + } + cache(); + null_value= expr_value->null_value; + expr_value->save_val(to); + DBUG_VOID_RETURN; +} + + +/** + Get the integer value of the possibly cached item. +*/ + +longlong Item_cache_wrapper::val_int() +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_int"); + if (!expr_cache) + { + longlong tmp= orig_item->val_int(); + null_value= orig_item->null_value; + DBUG_RETURN(tmp); + } + + if ((cached_value= check_cache())) + { + longlong tmp= cached_value->val_int(); + null_value= cached_value->null_value; + DBUG_RETURN(tmp); + } + cache(); + null_value= expr_value->null_value; + DBUG_RETURN(expr_value->val_int()); +} + + +/** + Get the real value of the possibly cached item +*/ + +double Item_cache_wrapper::val_real() +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_real"); + if (!expr_cache) + { + double tmp= orig_item->val_real(); + null_value= orig_item->null_value; + DBUG_RETURN(tmp); + } + + if ((cached_value= check_cache())) + { + double tmp= cached_value->val_real(); + null_value= cached_value->null_value; + DBUG_RETURN(tmp); + } + cache(); + null_value= expr_value->null_value; + DBUG_RETURN(expr_value->val_real()); +} + + +/** + Get the string value of the possibly cached item +*/ + +String *Item_cache_wrapper::val_str(String* str) +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_str"); + if (!expr_cache) + { + String *tmp= orig_item->val_str(str); + null_value= orig_item->null_value; + DBUG_RETURN(tmp); + } + + if ((cached_value= check_cache())) + { + String *tmp= cached_value->val_str(str); + null_value= cached_value->null_value; + DBUG_RETURN(tmp); + } + cache(); + if ((null_value= expr_value->null_value)) + DBUG_RETURN(NULL); + DBUG_RETURN(expr_value->val_str(str)); +} + + +/** + Get the native value of the possibly cached item +*/ + +bool Item_cache_wrapper::val_native(THD *thd, Native* to) +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_native"); + if (!expr_cache) + DBUG_RETURN(val_native_from_item(thd, orig_item, to)); + + if ((cached_value= check_cache())) + DBUG_RETURN(val_native_from_item(thd, cached_value, to)); + + cache(); + if ((null_value= expr_value->null_value)) + DBUG_RETURN(true); + DBUG_RETURN(expr_value->val_native(thd, to)); +} + + + +/** + Get the decimal value of the possibly cached item +*/ + +my_decimal *Item_cache_wrapper::val_decimal(my_decimal* decimal_value) +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_decimal"); + if (!expr_cache) + { + my_decimal *tmp= orig_item->val_decimal(decimal_value); + null_value= orig_item->null_value; + DBUG_RETURN(tmp); + } + + if ((cached_value= check_cache())) + { + my_decimal *tmp= cached_value->val_decimal(decimal_value); + null_value= cached_value->null_value; + DBUG_RETURN(tmp); + } + cache(); + if ((null_value= expr_value->null_value)) + DBUG_RETURN(NULL); + DBUG_RETURN(expr_value->val_decimal(decimal_value)); +} + + +/** + Get the boolean value of the possibly cached item +*/ + +bool Item_cache_wrapper::val_bool() +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::val_bool"); + if (!expr_cache) + { + bool tmp= orig_item->val_bool(); + null_value= orig_item->null_value; + DBUG_RETURN(tmp); + } + + if ((cached_value= check_cache())) + { + bool tmp= cached_value->val_bool(); + null_value= cached_value->null_value; + DBUG_RETURN(tmp); + } + cache(); + null_value= expr_value->null_value; + DBUG_RETURN(expr_value->val_bool()); +} + + +/** + Check for NULL the value of the possibly cached item +*/ + +bool Item_cache_wrapper::is_null() +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::is_null"); + if (!expr_cache) + { + bool tmp= orig_item->is_null(); + null_value= orig_item->null_value; + DBUG_RETURN(tmp); + } + + if ((cached_value= check_cache())) + { + bool tmp= cached_value->is_null(); + null_value= cached_value->null_value; + DBUG_RETURN(tmp); + } + cache(); + DBUG_RETURN((null_value= expr_value->null_value)); +} + + +/** + Get the date value of the possibly cached item +*/ + +bool Item_cache_wrapper::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + Item *cached_value; + DBUG_ENTER("Item_cache_wrapper::get_date"); + if (!expr_cache) + DBUG_RETURN((null_value= orig_item->get_date(thd, ltime, fuzzydate))); + + if ((cached_value= check_cache())) + DBUG_RETURN((null_value= cached_value->get_date(thd, ltime, fuzzydate))); + + cache(); + DBUG_RETURN((null_value= expr_value->get_date(thd, ltime, fuzzydate))); +} + + +int Item_cache_wrapper::save_in_field(Field *to, bool no_conversions) +{ + int res; + DBUG_ASSERT(!result_field); + res= orig_item->save_in_field(to, no_conversions); + null_value= orig_item->null_value; + return res; +} + + +Item* Item_cache_wrapper::get_tmp_table_item(THD *thd) +{ + if (!orig_item->with_sum_func() && !orig_item->const_item()) + { + auto item_field= new (thd->mem_root) Item_field(thd, result_field); + if (item_field) + item_field->set_refers_to_temp_table(); + return item_field; + } + return copy_or_same(thd); +} + + +bool Item_direct_view_ref::send(Protocol *protocol, st_value *buffer) +{ + if (check_null_ref()) + return protocol->store_null(); + return Item_direct_ref::send(protocol, buffer); +} + +/** + Prepare referenced field then call usual Item_direct_ref::fix_fields . + + @param thd thread handler + @param reference reference on reference where this item stored + + @retval + FALSE OK + @retval + TRUE Error +*/ + +bool Item_direct_view_ref::fix_fields(THD *thd, Item **reference) +{ + /* view fild reference must be defined */ + DBUG_ASSERT(*ref); + /* (*ref)->check_cols() will be made in Item_direct_ref::fix_fields */ + if ((*ref)->fixed()) + { + Item *ref_item= (*ref)->real_item(); + if (ref_item->type() == Item::FIELD_ITEM) + { + /* + In some cases we need to update table read set(see bug#47150). + If ref item is FIELD_ITEM and fixed then field and table + have proper values. So we can use them for update. + */ + Field *fld= ((Item_field*) ref_item)->field; + DBUG_ASSERT(fld && fld->table); + if (thd->column_usage == MARK_COLUMNS_READ) + bitmap_set_bit(fld->table->read_set, fld->field_index); + } + } + else if ((*ref)->fix_fields_if_needed(thd, ref)) + return TRUE; + + if (Item_direct_ref::fix_fields(thd, reference)) + return TRUE; + if (view->table && view->table->maybe_null) + set_maybe_null(); + set_null_ref_table(); + return FALSE; +} + +/* + Prepare referenced outer field then call usual Item_direct_ref::fix_fields + + SYNOPSIS + Item_outer_ref::fix_fields() + thd thread handler + reference reference on reference where this item stored + + RETURN + FALSE OK + TRUE Error +*/ + +bool Item_outer_ref::fix_fields(THD *thd, Item **reference) +{ + bool err; + /* outer_ref->check_cols() will be made in Item_direct_ref::fix_fields */ + if ((*ref) && (*ref)->fix_fields_if_needed(thd, reference)) + return TRUE; + err= Item_direct_ref::fix_fields(thd, reference); + if (!outer_ref) + outer_ref= *ref; + if ((*ref)->type() == Item::FIELD_ITEM) + table_name= ((Item_field*)outer_ref)->table_name; + return err; +} + + +void Item_outer_ref::fix_after_pullout(st_select_lex *new_parent, + Item **ref_arg, bool merge) +{ + if (get_depended_from() == new_parent) + { + *ref_arg= outer_ref; + (*ref_arg)->fix_after_pullout(new_parent, ref_arg, merge); + } +} + +void Item_ref::fix_after_pullout(st_select_lex *new_parent, Item **refptr, + bool merge) +{ + (*ref)->fix_after_pullout(new_parent, ref, merge); + if (get_depended_from() == new_parent) + depended_from= NULL; +} + + +/** + Mark references from inner selects used in group by clause + + The method is used by the walk method when called for the expressions + from the group by clause. The callsare occurred in the function + fix_inner_refs invoked by JOIN::prepare. + The parameter passed to Item_outer_ref::check_inner_refs_processor + is the iterator over the list of inner references from the subselects + of the select to be prepared. The function marks those references + from this list whose occurrences are encountered in the group by + expressions passed to the walk method. + + @param arg pointer to the iterator over a list of inner references + + @return + FALSE always +*/ + +bool Item_outer_ref::check_inner_refs_processor(void *arg) +{ + List_iterator_fast *it= + ((List_iterator_fast *) arg); + Item_outer_ref *tmp_ref; + while ((tmp_ref= (*it)++)) + { + if (tmp_ref == this) + { + tmp_ref->found_in_group_by= 1; + break; + } + } + (*it).rewind(); + return FALSE; +} + + +/** + Compare two view column references for equality. + + A view column reference is considered equal to another column + reference if the second one is a view column and if both column + references resolve to the same item. It is assumed that both + items are of the same type. + + @param item item to compare with + @param binary_cmp make binary comparison + + @retval + TRUE Referenced item is equal to given item + @retval + FALSE otherwise +*/ + +bool Item_direct_view_ref::eq(const Item *item, bool binary_cmp) const +{ + if (item->type() == REF_ITEM) + { + Item_ref *item_ref= (Item_ref*) item; + if (item_ref->ref_type() == VIEW_REF) + { + Item *item_ref_ref= *(item_ref->ref); + return ((*ref)->real_item() == item_ref_ref->real_item()); + } + } + return FALSE; +} + + +Item_equal *Item_direct_view_ref::find_item_equal(COND_EQUAL *cond_equal) +{ + Item* field_item= real_item(); + if (field_item->type() != FIELD_ITEM) + return NULL; + return ((Item_field *) field_item)->find_item_equal(cond_equal); +} + + +/** + Set a pointer to the multiple equality the view field reference belongs to + (if any). + + @details + The function looks for a multiple equality containing this item of the type + Item_direct_view_ref among those referenced by arg. + In the case such equality exists the function does the following. + If the found multiple equality contains a constant, then the item + is substituted for this constant, otherwise the function sets a pointer + to the multiple equality in the item. + + @param arg reference to list of multiple equalities where + the item (this object) is to be looked for + + @note + This function is supposed to be called as a callback parameter in calls + of the compile method. + + @note + The function calls Item_field::propagate_equal_fields() for the field item + this->real_item() to do the job. Then it takes the pointer to equal_item + from this field item and assigns it to this->item_equal. + + @return + - pointer to the replacing constant item, if the field item was substituted + - pointer to the field item, otherwise. +*/ + +Item *Item_direct_view_ref::propagate_equal_fields(THD *thd, + const Context &ctx, + COND_EQUAL *cond) +{ + Item *field_item= real_item(); + if (field_item->type() != FIELD_ITEM) + return this; + Item *item= field_item->propagate_equal_fields(thd, ctx, cond); + set_item_equal(field_item->get_item_equal()); + field_item->set_item_equal(NULL); + if (item != field_item) + return item; + return this; +} + + +Item *Item_ref::propagate_equal_fields(THD *thd, const Context &ctx, + COND_EQUAL *cond) +{ + Item *field_item= real_item(); + if (field_item->type() != FIELD_ITEM) + return this; + Item *item= field_item->propagate_equal_fields(thd, ctx, cond); + if (item != field_item) + return item; + return this; +} + + +/** + Replace an Item_direct_view_ref for an equal Item_field evaluated earlier + (if any). + + @details + If this->item_equal points to some item and coincides with arg then + the function returns a pointer to a field item that is referred to by the + first element of the item_equal list which the Item_direct_view_ref + object belongs to unless item_equal contains a constant item. In this + case the function returns this constant item (if the substitution does + not require conversion). + If the Item_direct_view_ref object does not refer any Item_equal object + 'this' is returned . + + @param arg NULL or points to so some item of the Item_equal type + + @note + This function is supposed to be called as a callback parameter in calls + of the transformer method. + + @note + The function calls Item_field::replace_equal_field for the field item + this->real_item() to do the job. + + @return + - pointer to a replacement Item_field if there is a better equal item or + a pointer to a constant equal item; + - this - otherwise. +*/ + +Item *Item_direct_view_ref::replace_equal_field(THD *thd, uchar *arg) +{ + Item *field_item= real_item(); + if (field_item->type() != FIELD_ITEM) + return this; + field_item->set_item_equal(item_equal); + Item *item= field_item->replace_equal_field(thd, arg); + field_item->set_item_equal(0); + return item != field_item ? item : this; +} + + +bool Item_field::excl_dep_on_table(table_map tab_map) +{ + return used_tables() == tab_map || + (item_equal && (item_equal->used_tables() & tab_map)); +} + + +bool +Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) +{ + return find_matching_field_pair(this, sel->grouping_tmp_fields) != NULL; +} + + +bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) +{ + table_map used= used_tables(); + if (used & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) + return false; + if (!(used & ~tab_map)) + return true; + if (item_equal) + { + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + return item_equal->used_tables() & tab_map; + } + return (*ref)->excl_dep_on_table(tab_map); +} + + +bool Item_direct_view_ref::excl_dep_on_grouping_fields(st_select_lex *sel) +{ + if (item_equal) + { + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + return (find_matching_field_pair(this, sel->grouping_tmp_fields) != NULL); + } + return (*ref)->excl_dep_on_grouping_fields(sel); +} + + +bool Item_args::excl_dep_on_grouping_fields(st_select_lex *sel) +{ + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->type() == Item::FUNC_ITEM && + ((Item_func *)args[i])->functype() == Item_func::UDF_FUNC) + return false; + if (args[i]->const_item()) + continue; + if (!args[i]->excl_dep_on_grouping_fields(sel)) + return false; + } + return true; +} + + +double Item_direct_view_ref::val_result() +{ + double tmp=(*ref)->val_result(); + null_value=(*ref)->null_value; + return tmp; +} + + +longlong Item_direct_view_ref::val_int_result() +{ + longlong tmp=(*ref)->val_int_result(); + null_value=(*ref)->null_value; + return tmp; +} + + +String *Item_direct_view_ref::str_result(String* tmp) +{ + tmp=(*ref)->str_result(tmp); + null_value=(*ref)->null_value; + return tmp; +} + + +my_decimal *Item_direct_view_ref::val_decimal_result(my_decimal *val) +{ + my_decimal *tmp= (*ref)->val_decimal_result(val); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_direct_view_ref::val_bool_result() +{ + bool tmp= (*ref)->val_bool_result(); + null_value=(*ref)->null_value; + return tmp; +} + + +bool Item_default_value::eq(const Item *item, bool binary_cmp) const +{ + return item->type() == DEFAULT_VALUE_ITEM && + ((Item_default_value *)item)->arg->eq(arg, binary_cmp); +} + + +bool Item_default_value::check_field_expression_processor(void *) +{ + return Item_default_value::update_func_default_processor(0); +} + +bool Item_default_value::update_func_default_processor(void *) +{ + field->default_value= ((Item_field *)(arg->real_item()))->field->default_value; + return 0; +} + +bool Item_default_value::fix_fields(THD *thd, Item **items) +{ + Item *real_arg; + Item_field *field_arg; + Field *def_field; + DBUG_ASSERT(fixed() == 0); + DBUG_ASSERT(arg); + + /* + DEFAULT() do not need table field so should not ask handler to bring + field value (mark column for read) + */ + enum_column_usage save_column_usage= thd->column_usage; + /* + Fields which has defult value could be read, so it is better hide system + invisible columns. + */ + thd->column_usage= COLUMNS_WRITE; + if (arg->fix_fields_if_needed(thd, &arg)) + { + thd->column_usage= save_column_usage; + goto error; + } + thd->column_usage= save_column_usage; + + real_arg= arg->real_item(); + if (real_arg->type() != FIELD_ITEM) + { + my_error(ER_NO_DEFAULT_FOR_FIELD, MYF(0), arg->name.str); + goto error; + } + + field_arg= (Item_field *)real_arg; + if ((field_arg->field->flags & NO_DEFAULT_VALUE_FLAG)) + { + my_error(ER_NO_DEFAULT_FOR_FIELD, MYF(0), + field_arg->field->field_name.str); + goto error; + } + if (!(def_field= (Field*) thd->alloc(field_arg->field->size_of()))) + goto error; + memcpy((void *)def_field, (void *)field_arg->field, + field_arg->field->size_of()); + def_field->reset_fields(); + // If non-constant default value expression or a blob + if (def_field->default_value && + (def_field->default_value->flags || (def_field->flags & BLOB_FLAG))) + { + uchar *newptr= (uchar*) thd->alloc(1+def_field->pack_length()); + if (!newptr) + goto error; + if (should_mark_column(thd->column_usage)) + def_field->default_value->expr->update_used_tables(); + def_field->move_field(newptr+1, def_field->maybe_null() ? newptr : 0, 1); + } + else + def_field->move_field_offset((my_ptrdiff_t) + (def_field->table->s->default_values - + def_field->table->record[0])); + set_field(def_field); + return FALSE; + +error: + context->process_error(thd); + return TRUE; +} + +void Item_default_value::cleanup() +{ + delete field; // Free cached blob data + Item_field::cleanup(); +} + +void Item_default_value::print(String *str, enum_query_type query_type) +{ + DBUG_ASSERT(arg); + str->append(STRING_WITH_LEN("default(")); + /* + We take DEFAULT from a field so do not need it value in case of const + tables but its name so we set QT_NO_DATA_EXPANSION (as we print for + table definition, also we do not need table and database name) + */ + query_type= (enum_query_type) (query_type | QT_NO_DATA_EXPANSION); + arg->print(str, query_type); + str->append(')'); +} + +void Item_default_value::calculate() +{ + DBUG_ASSERT(arg); + if (field->default_value) + field->set_default(); + DEBUG_SYNC(field->table->in_use, "after_Item_default_value_calculate"); +} + +bool Item_default_value::val_native(THD *thd, Native *to) +{ + calculate(); + return Item_field::val_native(thd, to); +} + +String *Item_default_value::val_str(String *str) +{ + calculate(); + return Item_field::val_str(str); +} + +double Item_default_value::val_real() +{ + calculate(); + return Item_field::val_real(); +} + +longlong Item_default_value::val_int() +{ + calculate(); + return Item_field::val_int(); +} + +my_decimal *Item_default_value::val_decimal(my_decimal *decimal_value) +{ + calculate(); + return Item_field::val_decimal(decimal_value); +} + +bool Item_default_value::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + calculate(); + return Item_field::get_date(thd, ltime, fuzzydate); +} + +bool Item_default_value::send(Protocol *protocol, st_value *buffer) +{ + calculate(); + return Item_field::send(protocol, buffer); +} + +int Item_default_value::save_in_field(Field *field_arg, bool no_conversions) +{ + calculate(); + return Item_field::save_in_field(field_arg, no_conversions); +} + +void Item_default_value::save_in_result_field(bool no_conversions) +{ + calculate(); + Item_field::save_in_result_field(no_conversions); +} + +double Item_default_value::val_result() +{ + calculate(); + return Item_field::val_result(); +} + +longlong Item_default_value::val_int_result() +{ + calculate(); + return Item_field::val_int_result(); +} + +String *Item_default_value::str_result(String* tmp) +{ + calculate(); + return Item_field::str_result(tmp); +} + +bool Item_default_value::val_bool_result() +{ + calculate(); + return Item_field::val_bool_result(); +} + +bool Item_default_value::is_null_result() +{ + calculate(); + return Item_field::is_null_result(); +} + +my_decimal *Item_default_value::val_decimal_result(my_decimal *decimal_value) +{ + calculate(); + return Item_field::val_decimal_result(decimal_value); +} + +bool Item_default_value::get_date_result(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + calculate(); + return Item_field::get_date_result(thd, ltime, fuzzydate); +} + +bool Item_default_value::val_native_result(THD *thd, Native *to) +{ + calculate(); + return Item_field::val_native_result(thd, to); +} + + +table_map Item_default_value::used_tables() const +{ + if (!field || !field->default_value) + return static_cast(0); + if (!field->default_value->expr) // not fully parsed field + return static_cast(RAND_TABLE_BIT); + return field->default_value->expr->used_tables(); +} + +bool Item_default_value::register_field_in_read_map(void *arg) +{ + TABLE *table= (TABLE *) arg; + int res= 0; + if (!table || (table && table == field->table)) + { + if (field->default_value && field->default_value->expr) + res= field->default_value->expr->walk(&Item::register_field_in_read_map,1,arg); + } + else if (result_field && table == result_field->table) + { + bitmap_set_bit(table->read_set, result_field->field_index); + } + + return res; +} + +/** + This method like the walk method traverses the item tree, but at the + same time it can replace some nodes in the tree. +*/ + +Item *Item_default_value::transform(THD *thd, Item_transformer transformer, + uchar *args) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + DBUG_ASSERT(arg); + + Item *new_item= arg->transform(thd, transformer, args); + if (!new_item) + return 0; + + /* + THD::change_item_tree() should be called only if the tree was + really transformed, i.e. when a new item has been created. + Otherwise we'll be allocating a lot of unnecessary memory for + change records at each execution. + */ + if (arg != new_item) + thd->change_item_tree(&arg, new_item); + return (this->*transformer)(thd, args); +} + + +bool Item_insert_value::eq(const Item *item, bool binary_cmp) const +{ + return item->type() == INSERT_VALUE_ITEM && + ((Item_insert_value *)item)->arg->eq(arg, binary_cmp); +} + + +bool Item_insert_value::fix_fields(THD *thd, Item **items) +{ + DBUG_ASSERT(fixed() == 0); + /* We should only check that arg is in first table */ + if (!arg->fixed()) + { + bool res; + TABLE_LIST *orig_next_table= context->last_name_resolution_table; + context->last_name_resolution_table= context->first_name_resolution_table; + res= arg->fix_fields(thd, &arg); + context->last_name_resolution_table= orig_next_table; + if (res) + return TRUE; + } + + if (arg->type() == REF_ITEM) + arg= static_cast(arg)->ref[0]; + if (unlikely(arg->type() != FIELD_ITEM)) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), "", "VALUES() function"); + return TRUE; + } + + Item_field *field_arg= (Item_field *)arg; + + if (field_arg->field->table->insert_values) + { + Field *def_field= (Field*) thd->alloc(field_arg->field->size_of()); + if (!def_field) + return TRUE; + memcpy((void *)def_field, (void *)field_arg->field, + field_arg->field->size_of()); + def_field->move_field_offset((my_ptrdiff_t) + (def_field->table->insert_values - + def_field->table->record[0])); + set_field(def_field); + } + else + { + static uchar null_bit=1; + /* charset doesn't matter here */ + Field *tmp_field= new Field_string(0, 0, &null_bit, 1, Field::NONE, + &field_arg->field->field_name, &my_charset_bin); + if (tmp_field) + { + tmp_field->init(field_arg->field->table); + set_field(tmp_field); + // the index is important when read bits set + tmp_field->field_index= field_arg->field->field_index; + } + } + return FALSE; +} + +void Item_insert_value::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("value(")); + arg->print(str, query_type); + str->append(')'); +} + + +/** + Find index of Field object which will be appropriate for item + representing field of row being changed in trigger. + + @param thd current thread context + @param table table of trigger (and where we looking for fields) + @param table_grant_info GRANT_INFO of the subject table + + @note + This function does almost the same as fix_fields() for Item_field + but is invoked right after trigger definition parsing. Since at + this stage we can't say exactly what Field object (corresponding + to TABLE::record[0] or TABLE::record[1]) should be bound to this + Item, we only find out index of the Field and then select concrete + Field object in fix_fields() (by that time Table_triggers_list::old_field/ + new_field should point to proper array of Fields). + It also binds Item_trigger_field to Table_triggers_list object for + table of trigger which uses this item. +*/ + +void Item_trigger_field::setup_field(THD *thd, TABLE *table, + GRANT_INFO *table_grant_info) +{ + /* + It is too early to mark fields used here, because before execution + of statement that will invoke trigger other statements may use same + TABLE object, so all such mark-up will be wiped out. + So instead we do it in Table_triggers_list::mark_fields_used() + method which is called during execution of these statements. + */ + enum_column_usage saved_column_usage= thd->column_usage; + thd->column_usage= want_privilege == SELECT_ACL ? COLUMNS_READ + : COLUMNS_WRITE; + /* + Try to find field by its name and if it will be found + set field_idx properly. + */ + (void)find_field_in_table(thd, table, field_name.str, field_name.length, + 0, &field_idx); + thd->column_usage= saved_column_usage; + triggers= table->triggers; + table_grants= table_grant_info; +} + + +bool Item_trigger_field::eq(const Item *item, bool binary_cmp) const +{ + return item->type() == TRIGGER_FIELD_ITEM && + row_version == ((Item_trigger_field *)item)->row_version && + !lex_string_cmp(system_charset_info, &field_name, + &((Item_trigger_field *)item)->field_name); +} + + +void Item_trigger_field::set_required_privilege(bool rw) +{ + /* + Require SELECT and UPDATE privilege if this field will be read and + set, and only UPDATE privilege for setting the field. + */ + want_privilege= (rw ? SELECT_ACL | UPDATE_ACL : UPDATE_ACL); +} + + +bool Item_trigger_field::set_value(THD *thd, sp_rcontext * /*ctx*/, Item **it) +{ + if (fix_fields_if_needed(thd, NULL)) + return true; + + Item *item= thd->sp_fix_func_item_for_assignment(field, it); + if (!item) + return true; + if (field->vers_sys_field()) + return false; + + // NOTE: field->table->copy_blobs should be false here, but let's + // remember the value at runtime to avoid subtle bugs. + bool copy_blobs_saved= field->table->copy_blobs; + + field->table->copy_blobs= true; + + int err_code= item->save_in_field(field, 0); + + field->table->copy_blobs= copy_blobs_saved; + field->set_has_explicit_value(); + + return err_code < 0; +} + + +bool Item_trigger_field::fix_fields(THD *thd, Item **items) +{ + /* + Since trigger is object tightly associated with TABLE object most + of its set up can be performed during trigger loading i.e. trigger + parsing! So we have little to do in fix_fields. :) + */ + + DBUG_ASSERT(fixed() == 0); + + /* Set field. */ + + if (likely(field_idx != NO_CACHED_FIELD_INDEX)) + { +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* + Check access privileges for the subject table. We check privileges only + in runtime. + */ + + if (table_grants) + { + table_grants->want_privilege= want_privilege; + + if (check_grant_column(thd, table_grants, + triggers->trigger_table->s->db.str, + triggers->trigger_table->s->table_name.str, + field_name.str, field_name.length, + thd->security_ctx)) + return TRUE; + } +#endif // NO_EMBEDDED_ACCESS_CHECKS + + field= (row_version == OLD_ROW) ? triggers->old_field[field_idx] : + triggers->new_field[field_idx]; + set_field(field); + base_flags|= item_base_t::FIXED; + return FALSE; + } + + my_error(ER_BAD_FIELD_ERROR, MYF(0), field_name.str, + (row_version == NEW_ROW) ? "NEW" : "OLD"); + return TRUE; +} + + +void Item_trigger_field::print(String *str, enum_query_type query_type) +{ + str->append((row_version == NEW_ROW) ? "NEW" : "OLD", 3); + str->append('.'); + str->append(&field_name); +} + + +bool Item_trigger_field::check_vcol_func_processor(void *arg) +{ + const char *ver= row_version == NEW_ROW ? "NEW." : "OLD."; + return mark_unsupported_function(ver, field_name.str, arg, VCOL_IMPOSSIBLE); +} + + +void Item_trigger_field::cleanup() +{ + want_privilege= original_privilege; + /* + Since special nature of Item_trigger_field we should not do most of + things from Item_field::cleanup() or Item_ident::cleanup() here. + */ + Item_fixed_hybrid::cleanup(); +} + + +Item_result item_cmp_type(Item_result a,Item_result b) +{ + if (a == b) + return a; + if (a == ROW_RESULT || b == ROW_RESULT) + return ROW_RESULT; + if (a == TIME_RESULT || b == TIME_RESULT) + return TIME_RESULT; + if ((a == INT_RESULT || a == DECIMAL_RESULT) && + (b == INT_RESULT || b == DECIMAL_RESULT)) + return DECIMAL_RESULT; + return REAL_RESULT; +} + + +void resolve_const_item(THD *thd, Item **ref, Item *comp_item) +{ + Item *item= *ref; + if (item->basic_const_item()) + return; // Can't be better + Type_handler_hybrid_field_type cmp(comp_item->type_handler_for_comparison()); + if (!cmp.aggregate_for_comparison(item->type_handler_for_comparison())) + { + Item *new_item= cmp.type_handler()-> + make_const_item_for_comparison(thd, item, comp_item); + if (new_item) + thd->change_item_tree(ref, new_item); + } +} + +/** + Compare the value stored in field with the expression from the query. + + @param field Field which the Item is stored in after conversion + @param item Original expression from query + + @return Returns an integer greater than, equal to, or less than 0 if + the value stored in the field is greater than, equal to, + or less than the original Item. A 0 may also be returned if + out of memory. + + @note We use this in the range optimizer/partition pruning, + because in some cases we can't store the value in the field + without some precision/character loss. + + We similarly use it to verify that expressions like + BIGINT_FIELD + is done correctly (as int/decimal/float according to literal type). + + @todo rewrite it to use Arg_comparator (currently it's a simplified and + incomplete version of it) +*/ + +int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) +{ + Type_handler_hybrid_field_type cmp(field->type_handler_for_comparison()); + if (cmp.aggregate_for_comparison(item->type_handler_for_comparison())) + { + // At fix_fields() time we checked that "field" and "item" are comparable + DBUG_ASSERT(0); + return 0; + } + return cmp.type_handler()->stored_field_cmp_to_item(thd, field, item); +} + + +void Item_cache::store(Item *item) +{ + example= item; + if (!item) + null_value= TRUE; + value_cached= FALSE; +} + +void Item_cache::print(String *str, enum_query_type query_type) +{ + if (example && // There is a cached item + (query_type & QT_NO_DATA_EXPANSION)) // Caller is show-create-table + { + // Instead of "cache" or the cached value, print the cached item name + example->print(str, query_type); + return; + } + + if (value_cached) + { + print_value(str); + return; + } + str->append(STRING_WITH_LEN("(")); + if (example) + example->print(str, query_type); + else + Item::print(str, query_type); + str->append(')'); +} + +/** + Assign to this cache NULL value if it is possible +*/ + +void Item_cache::set_null() +{ + if (maybe_null()) + { + null_value= TRUE; + value_cached= TRUE; + } +} + + +bool Item_cache_int::cache_value() +{ + if (!example) + return FALSE; + value_cached= TRUE; + value= example->val_int_result(); + null_value_inside= null_value= example->null_value; + unsigned_flag= example->unsigned_flag; + return TRUE; +} + + +String *Item_cache_int::val_str(String *str) +{ + if (!has_value()) + return NULL; + str->set_int(value, unsigned_flag, default_charset()); + return str; +} + + +my_decimal *Item_cache_int::val_decimal(my_decimal *decimal_val) +{ + if (!has_value()) + return NULL; + int2my_decimal(E_DEC_FATAL_ERROR, value, unsigned_flag, decimal_val); + return decimal_val; +} + +double Item_cache_int::val_real() +{ + if (!has_value()) + return 0.0; + return (double) value; +} + +longlong Item_cache_int::val_int() +{ + if (!has_value()) + return 0; + return value; +} + +int Item_cache_int::save_in_field(Field *field, bool no_conversions) +{ + int error; + if (!has_value()) + return set_field_to_null_with_conversions(field, no_conversions); + + field->set_notnull(); + error= field->store(value, unsigned_flag); + + return error ? error : field->table->in_use->is_error() ? 1 : 0; +} + + +Item *Item_cache_int::convert_to_basic_const_item(THD *thd) +{ + Item *new_item; + DBUG_ASSERT(value_cached || example != 0); + if (!value_cached) + cache_value(); + new_item= null_value ? + (Item*) new (thd->mem_root) Item_null(thd) : + (Item*) new (thd->mem_root) Item_int(thd, val_int(), max_length); + return new_item; +} + + +Item_cache_temporal::Item_cache_temporal(THD *thd, const Type_handler *handler) + :Item_cache_int(thd, handler) +{ + if (mysql_timestamp_type() == MYSQL_TIMESTAMP_ERROR) + set_handler(&type_handler_datetime2); +} + + +bool Item_cache_temporal::cache_value() +{ + if (!example) + return false; + value_cached= true; + value= example->val_datetime_packed_result(current_thd); + null_value_inside= null_value= example->null_value; + return true; +} + + +bool Item_cache_time::cache_value() +{ + if (!example) + return false; + value_cached= true; + value= example->val_time_packed_result(current_thd); + null_value_inside= null_value= example->null_value; + return true; +} + + +bool Item_cache_temporal::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + if (!has_value()) + { + bzero((char*) ltime,sizeof(*ltime)); + return (null_value= true); + } + + unpack_time(value, ltime, mysql_timestamp_type()); + return 0; +} + + +int Item_cache_temporal::save_in_field(Field *field, bool no_conversions) +{ + MYSQL_TIME ltime; + // This is a temporal type. No nanoseconds, so round mode is not important. + if (get_date(field->get_thd(), <ime, TIME_CONV_NONE | TIME_FRAC_NONE)) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + int error= field->store_time_dec(<ime, decimals); + return error ? error : field->table->in_use->is_error() ? 1 : 0; +} + + +void Item_cache_temporal::store_packed(longlong val_arg, Item *example_arg) +{ + /* An explicit value is given, save it. */ + store(example_arg); + value_cached= true; + value= val_arg; + null_value= false; +} + + +Item *Item_cache_temporal::clone_item(THD *thd) +{ + Item_cache *tmp= type_handler()->Item_get_cache(thd, this); + Item_cache_temporal *item= static_cast(tmp); + item->store_packed(value, example); + return item; +} + + +Item *Item_cache_temporal::convert_to_basic_const_item(THD *thd) +{ + DBUG_ASSERT(value_cached || example != 0); + if (!value_cached) + cache_value(); + if (null_value) + return new (thd->mem_root) Item_null(thd); + return make_literal(thd); +} + +Item *Item_cache_datetime::make_literal(THD *thd) +{ + Datetime dt(thd, this, TIME_CONV_NONE | TIME_FRAC_NONE); + return new (thd->mem_root) Item_datetime_literal(thd, &dt, decimals); +} + +Item *Item_cache_date::make_literal(THD *thd) +{ + Date d(thd, this, TIME_CONV_NONE | TIME_FRAC_NONE); + return new (thd->mem_root) Item_date_literal(thd, &d); +} + +Item *Item_cache_time::make_literal(THD *thd) +{ + Time t(thd, this); + return new (thd->mem_root) Item_time_literal(thd, &t, decimals); +} + + +int Item_cache_timestamp::save_in_field(Field *field, bool no_conversions) +{ + if (!has_value()) + return set_field_to_null_with_conversions(field, no_conversions); + return m_native.save_in_field(field, decimals); +} + + +bool Item_cache_timestamp::val_native(THD *thd, Native *to) +{ + if (!has_value()) + { + null_value= true; + return true; + } + return (null_value= to->copy(m_native)); +} + + +Datetime Item_cache_timestamp::to_datetime(THD *thd) +{ + DBUG_ASSERT(fixed() == 1); + if (!has_value()) + { + null_value= true; + return Datetime(); + } + return m_native.to_datetime(thd); +} + + +bool Item_cache_timestamp::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + if (!has_value()) + { + set_zero_time(ltime, MYSQL_TIMESTAMP_DATETIME); + return true; + } + Timestamp_or_zero_datetime tm(m_native); + return (null_value= tm.to_TIME(thd, ltime, fuzzydate)); +} + + +bool Item_cache_timestamp::cache_value() +{ + if (!example) + return false; + value_cached= true; + null_value_inside= null_value= + example->val_native_with_conversion_result(current_thd, &m_native, type_handler()); + return true; +} + + +bool Item_cache_real::cache_value() +{ + if (!example) + return FALSE; + value_cached= TRUE; + value= example->val_result(); + null_value_inside= null_value= example->null_value; + return TRUE; +} + + +double Item_cache_real::val_real() +{ + if (!has_value()) + return 0.0; + return value; +} + +longlong Item_cache_real::val_int() +{ + if (!has_value()) + return 0; + return Converter_double_to_longlong(value, unsigned_flag).result(); +} + + +String* Item_cache_double::val_str(String *str) +{ + if (!has_value()) + return NULL; + str->set_real(value, decimals, default_charset()); + return str; +} + + +String* Item_cache_float::val_str(String *str) +{ + if (!has_value()) + return NULL; + Float(value).to_string(str, decimals); + return str; +} + + +my_decimal *Item_cache_real::val_decimal(my_decimal *decimal_val) +{ + if (!has_value()) + return NULL; + double2my_decimal(E_DEC_FATAL_ERROR, value, decimal_val); + return decimal_val; +} + + +Item *Item_cache_real::convert_to_basic_const_item(THD *thd) +{ + Item *new_item; + DBUG_ASSERT(value_cached || example != 0); + if (!value_cached) + cache_value(); + new_item= null_value ? + (Item*) new (thd->mem_root) Item_null(thd) : + (Item*) new (thd->mem_root) Item_float(thd, val_real(), + decimals); + return new_item; +} + + +bool Item_cache_decimal::cache_value() +{ + if (!example) + return FALSE; + value_cached= TRUE; + my_decimal *val= example->val_decimal_result(&decimal_value); + if (!(null_value_inside= null_value= example->null_value) && + val != &decimal_value) + my_decimal2decimal(val, &decimal_value); + return TRUE; +} + +double Item_cache_decimal::val_real() +{ + return !has_value() ? 0.0 : decimal_value.to_double(); +} + +longlong Item_cache_decimal::val_int() +{ + return !has_value() ? 0 : decimal_value.to_longlong(unsigned_flag); +} + +String* Item_cache_decimal::val_str(String *str) +{ + return !has_value() ? NULL : + decimal_value.to_string_round(str, decimals, &decimal_value); +} + +my_decimal *Item_cache_decimal::val_decimal(my_decimal *val) +{ + if (!has_value()) + return NULL; + return &decimal_value; +} + + +Item *Item_cache_decimal::convert_to_basic_const_item(THD *thd) +{ + Item *new_item; + DBUG_ASSERT(value_cached || example != 0); + if (!value_cached) + cache_value(); + if (null_value) + new_item= (Item*) new (thd->mem_root) Item_null(thd); + else + { + VDec tmp(this); + new_item= (Item*) new (thd->mem_root) Item_decimal(thd, tmp.ptr()); + } + return new_item; +} + + +bool Item_cache_str::cache_value() +{ + if (!example) + { + DBUG_ASSERT(value_cached == FALSE); + return FALSE; + } + value_cached= TRUE; + value_buff.set(buffer, sizeof(buffer), example->collation.collation); + value= example->str_result(&value_buff); + if ((null_value= null_value_inside= example->null_value)) + value= 0; + else if (value != &value_buff) + { + /* + We copy string value to avoid changing value if 'item' is table field + in queries like following (where t1.c is varchar): + select a, + (select a,b,c from t1 where t1.a=t2.a) = ROW(a,2,'a'), + (select c from t1 where a=t2.a) + from t2; + */ + value_buff.copy(*value); + value= &value_buff; + } + else + value_buff.copy(); + return TRUE; +} + +double Item_cache_str::val_real() +{ + if (!has_value()) + return 0.0; + return value ? double_from_string_with_check(value) : 0.0; +} + + +longlong Item_cache_str::val_int() +{ + if (!has_value()) + return 0; + return value ? longlong_from_string_with_check(value) : 0; +} + + +String* Item_cache_str::val_str(String *str) +{ + if (!has_value()) + return 0; + return value; +} + + +my_decimal *Item_cache_str::val_decimal(my_decimal *decimal_val) +{ + if (!has_value()) + return NULL; + return value ? decimal_from_string_with_check(decimal_val, value) : 0; +} + + +int Item_cache_str::save_in_field(Field *field, bool no_conversions) +{ + if (!has_value()) + return set_field_to_null_with_conversions(field, no_conversions); + int res= Item_cache::save_in_field(field, no_conversions); + return (is_varbinary && field->type() == MYSQL_TYPE_STRING && + value->length() < field->field_length) ? 1 : res; +} + + +bool Item_cache_row::allocate(THD *thd, uint num) +{ + item_count= num; + return (!values && + !(values= + (Item_cache **) thd->calloc(sizeof(Item_cache *)*item_count))); +} + + +Item *Item_cache_str::convert_to_basic_const_item(THD *thd) +{ + Item *new_item; + DBUG_ASSERT(value_cached || example != 0); + if (!value_cached) + cache_value(); + if (null_value) + new_item= (Item*) new (thd->mem_root) Item_null(thd); + else + { + char buff[MAX_FIELD_WIDTH]; + String tmp(buff, sizeof(buff), value->charset()); + String *result= val_str(&tmp); + uint length= result->length(); + char *tmp_str= thd->strmake(result->ptr(), length); + new_item= new (thd->mem_root) Item_string(thd, tmp_str, length, + result->charset()); + } + return new_item; +} + + +bool Item_cache_row::setup(THD *thd, Item *item) +{ + example= item; + null_value= true; + + if (!values && allocate(thd, item->cols())) + return 1; + for (uint i= 0; i < item_count; i++) + { + Item *el= item->element_index(i); + + if ((!values[i]) && !(values[i]= el->get_cache(thd))) + return 1; + + values[i]->setup(thd, el); + } + return 0; +} + + +void Item_cache_row::store(Item * item) +{ + example= item; + if (!item) + { + null_value= TRUE; + return; + } + for (uint i= 0; i < item_count; i++) + values[i]->store(item->element_index(i)); +} + + +bool Item_cache_row::cache_value() +{ + if (!example) + return FALSE; + value_cached= TRUE; + null_value= TRUE; + null_value_inside= false; + example->bring_value(); + + /* + For Item_cache_row null_value is set to TRUE only when ALL the values + inside the cache are NULL + */ + for (uint i= 0; i < item_count; i++) + { + values[i]->cache_value(); + null_value&= values[i]->null_value; + null_value_inside|= values[i]->null_value; + } + return TRUE; +} + + +void Item_cache_row::illegal_method_call(const char *method) +{ + DBUG_ENTER("Item_cache_row::illegal_method_call"); + DBUG_PRINT("error", ("!!! %s method was called for row item", method)); + DBUG_ASSERT(0); + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + DBUG_VOID_RETURN; +} + + +bool Item_cache_row::check_cols(uint c) +{ + if (c != item_count) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), c); + return 1; + } + return 0; +} + + +bool Item_cache_row::null_inside() +{ + for (uint i= 0; i < item_count; i++) + { + if (values[i]->cols() > 1) + { + if (values[i]->null_inside()) + return 1; + } + else + { + values[i]->update_null_value(); + if (values[i]->null_value) + return 1; + } + } + return 0; +} + + +void Item_cache_row::bring_value() +{ + if (!example) + return; + example->bring_value(); + null_value= example->null_value; + for (uint i= 0; i < item_count; i++) + values[i]->bring_value(); +} + + +/** + Assign to this cache NULL value if it is possible +*/ + +void Item_cache_row::set_null() +{ + Item_cache::set_null(); + if (!values) + return; + for (uint i= 0; i < item_count; i++) + values[i]->set_null(); +}; + + +double Item_type_holder::val_real() +{ + DBUG_ASSERT(0); // should never be called + return 0.0; +} + + +longlong Item_type_holder::val_int() +{ + DBUG_ASSERT(0); // should never be called + return 0; +} + +my_decimal *Item_type_holder::val_decimal(my_decimal *) +{ + DBUG_ASSERT(0); // should never be called + return 0; +} + +String *Item_type_holder::val_str(String*) +{ + DBUG_ASSERT(0); // should never be called + return 0; +} + +bool Item_type_holder::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(0); // should never be called + return true; +} + +void Item_result_field::cleanup() +{ + DBUG_ENTER("Item_result_field::cleanup()"); + Item_fixed_hybrid::cleanup(); + result_field= 0; + DBUG_VOID_RETURN; +} + +/** + Dummy error processor used by default by Name_resolution_context. + + @note + do nothing +*/ + +void dummy_error_processor(THD *thd, void *data) +{} + +/** + Wrapper of hide_view_error call for Name_resolution_context error + processor. + + @note + hide view underlying tables details in error messages +*/ + +void view_error_processor(THD *thd, void *data) +{ + ((TABLE_LIST *)data)->hide_view_error(thd); +} + + +st_select_lex *Item_ident::get_depended_from() const +{ + st_select_lex *dep; + if ((dep= depended_from)) + for ( ; dep->merged_into; dep= dep->merged_into) ; + return dep; +} + + +table_map Item_ref::used_tables() const +{ + return get_depended_from() ? OUTER_REF_TABLE_BIT : (*ref)->used_tables(); +} + + +void Item_ref::update_used_tables() +{ + if (!get_depended_from()) + (*ref)->update_used_tables(); +} + +void Item_direct_view_ref::update_used_tables() +{ + set_null_ref_table(); + Item_direct_ref::update_used_tables(); +} + + +table_map Item_direct_view_ref::used_tables() const +{ + DBUG_ASSERT(fixed()); + + if (get_depended_from()) + return OUTER_REF_TABLE_BIT; + + if (view->is_merged_derived() || view->merged || !view->table) + { + table_map used= (*ref)->used_tables(); + return (used ? + used : + (null_ref_table != NO_NULL_TABLE && !null_ref_table->const_table ? + null_ref_table->map : + (table_map)0 )); + } + return view->table->map; +} + +table_map Item_direct_view_ref::not_null_tables() const +{ + if (get_depended_from()) + return 0; + if (!( view->merged || !view->table)) + return view->table->map; + TABLE *tab= get_null_ref_table(); + if (tab == NO_NULL_TABLE || (*ref)->used_tables()) + return (*ref)->not_null_tables(); + return get_null_ref_table()->map; +} + +/* + we add RAND_TABLE_BIT to prevent moving this item from HAVING to WHERE +*/ +table_map Item_ref_null_helper::used_tables() const +{ + return (get_depended_from() ? + OUTER_REF_TABLE_BIT : + (*ref)->used_tables() | RAND_TABLE_BIT); +} + + +#ifndef DBUG_OFF + +/* Debugger help function */ +static char dbug_item_print_buf[2048]; + +const char *dbug_print_item(Item *item) +{ + char *buf= dbug_item_print_buf; + String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); + str.length(0); + if (!item) + return "(Item*)NULL"; + + THD *thd= current_thd; + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + + item->print(&str, QT_EXPLAIN); + + thd->variables.option_bits= save_option_bits; + + if (str.c_ptr_safe() == buf) + return buf; + else + return "Couldn't fit into buffer"; +} + +const char *dbug_print_select(SELECT_LEX *sl) +{ + char *buf= dbug_item_print_buf; + String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); + str.length(0); + if (!sl) + return "(SELECT_LEX*)NULL"; + + THD *thd= current_thd; + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + + sl->print(thd, &str, QT_EXPLAIN); + + thd->variables.option_bits= save_option_bits; + + if (str.c_ptr() == buf) + return buf; + else + return "Couldn't fit into buffer"; +} + +const char *dbug_print_unit(SELECT_LEX_UNIT *un) +{ + char *buf= dbug_item_print_buf; + String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); + str.length(0); + if (!un) + return "(SELECT_LEX_UNIT*)NULL"; + + THD *thd= current_thd; + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + + un->print(&str, QT_EXPLAIN); + + thd->variables.option_bits= save_option_bits; + + if (str.c_ptr() == buf) + return buf; + else + return "Couldn't fit into buffer"; +} + +const char *dbug_print(Item *x) { return dbug_print_item(x); } +const char *dbug_print(SELECT_LEX *x) { return dbug_print_select(x); } +const char *dbug_print(SELECT_LEX_UNIT *x) { return dbug_print_unit(x); } + +#endif /*DBUG_OFF*/ + +void Item::register_in(THD *thd) +{ + next= thd->free_list; + thd->free_list= this; +} + + +Item_direct_ref_to_item::Item_direct_ref_to_item(THD *thd, Item *item) + : Item_direct_ref(thd, NULL, NULL, empty_clex_str, empty_clex_str) +{ + m_item= item; + ref= (Item**)&m_item; +} + +bool Item_direct_ref_to_item::fix_fields(THD *thd, Item **) +{ + DBUG_ASSERT(m_item != NULL); + if (m_item->fix_fields_if_needed_for_scalar(thd, ref)) + return TRUE; + set_properties(); + return FALSE; +} + +void Item_direct_ref_to_item::print(String *str, enum_query_type query_type) +{ + m_item->print(str, query_type); +} + +Item *Item_direct_ref_to_item::safe_charset_converter(THD *thd, + CHARSET_INFO *tocs) +{ + Item *conv= m_item->safe_charset_converter(thd, tocs); + if (conv != m_item) + { + if (conv== NULL || conv->fix_fields(thd, &conv)) + return NULL; + change_item(thd, conv); + } + return this; +} + +void Item_direct_ref_to_item::change_item(THD *thd, Item *i) +{ + DBUG_ASSERT(i->fixed()); + thd->change_item_tree(ref, i); + set_properties(); +} + + +bool Item::cleanup_excluding_immutables_processor (void *arg) +{ + if (!(get_extraction_flag() == MARKER_IMMUTABLE)) + return cleanup_processor(arg); + else + { + clear_extraction_flag(); + return false; + } +} + + +bool ignored_list_includes_table(ignored_tables_list_t list, TABLE_LIST *tbl) +{ + if (!list) + return false; + List_iterator it(*list); + TABLE_LIST *list_tbl; + while ((list_tbl = it++)) + { + if (list_tbl == tbl) + return true; + } + return false; +} diff --git a/sql/item.h b/sql/item.h new file mode 100644 index 00000000..b967b4cc --- /dev/null +++ b/sql/item.h @@ -0,0 +1,7957 @@ +#ifndef SQL_ITEM_INCLUDED +#define SQL_ITEM_INCLUDED + +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_priv.h" /* STRING_BUFFER_USUAL_SIZE */ +#include "unireg.h" +#include "sql_const.h" /* RAND_TABLE_BIT, MAX_FIELD_NAME */ +#include "field.h" /* Derivation */ +#include "sql_type.h" +#include "sql_time.h" +#include "mem_root_array.h" + +#include "cset_narrowing.h" + +C_MODE_START +#include + +/* + A prototype for a C-compatible structure to store a value of any data type. + Currently it has to stay in /sql, as it depends on String and my_decimal. + We'll do the following changes: + 1. add pure C "struct st_string" and "struct st_my_decimal" + 2. change type of m_string to struct st_string and move inside the union + 3. change type of m_decmal to struct st_my_decimal and move inside the union + 4. move the definition to some file in /include +*/ +class st_value +{ +public: + st_value() {} + st_value(char *buffer, size_t buffer_size) : + m_string(buffer, buffer_size, &my_charset_bin) + {} + enum enum_dynamic_column_type m_type; + union + { + longlong m_longlong; + double m_double; + MYSQL_TIME m_time; + } value; + String m_string; + my_decimal m_decimal; +}; + +C_MODE_END + + +class Value: public st_value +{ +public: + Value(char *buffer, size_t buffer_size) : st_value(buffer, buffer_size) + {} + Value() + {} + bool is_null() const { return m_type == DYN_COL_NULL; } + bool is_longlong() const + { + return m_type == DYN_COL_UINT || m_type == DYN_COL_INT; + } + bool is_double() const { return m_type == DYN_COL_DOUBLE; } + bool is_temporal() const { return m_type == DYN_COL_DATETIME; } + bool is_string() const { return m_type == DYN_COL_STRING; } + bool is_decimal() const { return m_type == DYN_COL_DECIMAL; } +}; + + +template +class ValueBuffer: public Value +{ + char buffer[buffer_size]; +public: + ValueBuffer(): Value(buffer, buffer_size) + {} + void reset_buffer() + { + m_string.set_buffer_if_not_allocated(buffer, buffer_size, &my_charset_bin); + } +}; + + +#ifdef DBUG_OFF +static inline const char *dbug_print_item(Item *item) { return NULL; } +#else +const char *dbug_print_item(Item *item); +#endif + +class Virtual_tmp_table; +class sp_head; +class Protocol; +struct TABLE_LIST; +void item_init(void); /* Init item functions */ +class Item_basic_value; +class Item_result_field; +class Item_field; +class Item_ref; +class Item_param; +class user_var_entry; +class JOIN; +struct KEY_FIELD; +struct SARGABLE_PARAM; +class RANGE_OPT_PARAM; +class SEL_TREE; + +enum precedence { + LOWEST_PRECEDENCE, + ASSIGN_PRECEDENCE, // := + OR_PRECEDENCE, // OR, || (unless PIPES_AS_CONCAT) + XOR_PRECEDENCE, // XOR + AND_PRECEDENCE, // AND, && + NOT_PRECEDENCE, // NOT (unless HIGH_NOT_PRECEDENCE) + CMP_PRECEDENCE, // =, <=>, >=, >, <=, <, <>, !=, IS + BETWEEN_PRECEDENCE, // BETWEEN + IN_PRECEDENCE, // IN, LIKE, REGEXP + BITOR_PRECEDENCE, // | + BITAND_PRECEDENCE, // & + SHIFT_PRECEDENCE, // <<, >> + INTERVAL_PRECEDENCE, // first argument in +INTERVAL + ADD_PRECEDENCE, // +, - + MUL_PRECEDENCE, // *, /, DIV, %, MOD + BITXOR_PRECEDENCE, // ^ + PIPES_PRECEDENCE, // || (if PIPES_AS_CONCAT) + NEG_PRECEDENCE, // unary -, ~, !, NOT (if HIGH_NOT_PRECEDENCE) + COLLATE_PRECEDENCE, // BINARY, COLLATE + DEFAULT_PRECEDENCE, + HIGHEST_PRECEDENCE +}; + +bool mark_unsupported_function(const char *where, void *store, uint result); + +/* convenience helper for mark_unsupported_function() above */ +bool mark_unsupported_function(const char *w1, const char *w2, + void *store, uint result); + +/* Bits for the split_sum_func() function */ +#define SPLIT_SUM_SKIP_REGISTERED 1 /* Skip registered funcs */ +#define SPLIT_SUM_SELECT 2 /* SELECT item; Split all parts */ + +/* + Values for item->marker for cond items in the WHERE clause as used + by the optimizer. + + Note that for Item_fields, the marker contains + 'select->cur_pos_in_select_list +*/ +/* Used to check GROUP BY list in the MODE_ONLY_FULL_GROUP_BY mode */ +#define MARKER_UNDEF_POS -1 +#define MARKER_UNUSED 0 +#define MARKER_CHANGE_COND 1 +#define MARKER_PROCESSED 2 +#define MARKER_CHECK_ON_READ 3 +#define MARKER_NULL_KEY 4 +#define MARKER_FOUND_IN_ORDER 6 + +/* Used as bits in marker by Item::check_pushable_cond() */ +#define MARKER_NO_EXTRACTION (1 << 6) +#define MARKER_FULL_EXTRACTION (1 << 7) +#define MARKER_DELETION (1 << 8) +#define MARKER_IMMUTABLE (1 << 9) +#define MARKER_SUBSTITUTION (1 << 10) + +/* Used as bits in marker by window functions */ +#define MARKER_SORTORDER_CHANGE (1 << 11) +#define MARKER_PARTITION_CHANGE (1 << 12) +#define MARKER_FRAME_CHANGE (1 << 13) +#define MARKER_EXTRACTION_MASK \ + (MARKER_NO_EXTRACTION | MARKER_FULL_EXTRACTION | MARKER_DELETION | \ + MARKER_IMMUTABLE) + +extern const char *item_empty_name; + +void dummy_error_processor(THD *thd, void *data); + +void view_error_processor(THD *thd, void *data); + +typedef List* ignored_tables_list_t; +bool ignored_list_includes_table(ignored_tables_list_t list, TABLE_LIST *tbl); + +/* + Instances of Name_resolution_context store the information necessary for + name resolution of Items and other context analysis of a query made in + fix_fields(). + + This structure is a part of SELECT_LEX, a pointer to this structure is + assigned when an item is created (which happens mostly during parsing + (sql_yacc.yy)), but the structure itself will be initialized after parsing + is complete + + TODO: move subquery of INSERT ... SELECT and CREATE ... SELECT to + separate SELECT_LEX which allow to remove tricks of changing this + structure before and after INSERT/CREATE and its SELECT to make correct + field name resolution. +*/ +struct Name_resolution_context: Sql_alloc +{ + /* + The name resolution context to search in when an Item cannot be + resolved in this context (the context of an outer select) + */ + Name_resolution_context *outer_context= nullptr; + + /* + List of tables used to resolve the items of this context. Usually these + are tables from the FROM clause of SELECT statement. The exceptions are + INSERT ... SELECT and CREATE ... SELECT statements, where SELECT + subquery is not moved to a separate SELECT_LEX. For these types of + statements we have to change this member dynamically to ensure correct + name resolution of different parts of the statement. + */ + TABLE_LIST *table_list= nullptr; + /* + In most cases the two table references below replace 'table_list' above + for the purpose of name resolution. The first and last name resolution + table references allow us to search only in a sub-tree of the nested + join tree in a FROM clause. This is needed for NATURAL JOIN, JOIN ... USING + and JOIN ... ON. + */ + TABLE_LIST *first_name_resolution_table= nullptr; + /* + Last table to search in the list of leaf table references that begins + with first_name_resolution_table. + */ + TABLE_LIST *last_name_resolution_table= nullptr; + + /* Cache first_name_resolution_table in setup_natural_join_row_types */ + TABLE_LIST *natural_join_first_table= nullptr; + /* + SELECT_LEX item belong to, in case of merged VIEW it can differ from + SELECT_LEX where item was created, so we can't use table_list/field_list + from there + */ + st_select_lex *select_lex= nullptr; + + /* + Processor of errors caused during Item name resolving, now used only to + hide underlying tables in errors about views (i.e. it substitute some + errors for views) + */ + void (*error_processor)(THD *, void *)= &dummy_error_processor; + void *error_processor_data= nullptr; + + /* + When TRUE items are resolved in this context both against the + SELECT list and this->table_list. If FALSE, items are resolved + only against this->table_list. + */ + bool resolve_in_select_list= false; + + /* + Bitmap of tables that should be ignored when doing name resolution. + Normally it is {0}. Non-zero values are used by table functions. + */ + ignored_tables_list_t ignored_tables= nullptr; + + /* + Security context of this name resolution context. It's used for views + and is non-zero only if the view is defined with SQL SECURITY DEFINER. + */ + Security_context *security_ctx= nullptr; + + Name_resolution_context() = default; + + /** + Name resolution context with resolution in only one table + */ + Name_resolution_context(TABLE_LIST *table) : + first_name_resolution_table(table), last_name_resolution_table(table) + {} + + void init() + { + resolve_in_select_list= FALSE; + error_processor= &dummy_error_processor; + ignored_tables= nullptr; + first_name_resolution_table= nullptr; + last_name_resolution_table= nullptr; + } + + void resolve_in_table_list_only(TABLE_LIST *tables) + { + table_list= first_name_resolution_table= tables; + resolve_in_select_list= FALSE; + } + + void process_error(THD *thd) + { + (*error_processor)(thd, error_processor_data); + } + st_select_lex *outer_select() + { + return (outer_context ? + outer_context->select_lex : + NULL); + } +}; + + +/* + Store and restore the current state of a name resolution context. +*/ + +class Name_resolution_context_state +{ +private: + TABLE_LIST *save_table_list; + TABLE_LIST *save_first_name_resolution_table; + TABLE_LIST *save_next_name_resolution_table; + bool save_resolve_in_select_list; + TABLE_LIST *save_next_local; + +public: + Name_resolution_context_state() = default; /* Remove gcc warning */ + +public: + /* Save the state of a name resolution context. */ + void save_state(Name_resolution_context *context, TABLE_LIST *table_list) + { + save_table_list= context->table_list; + save_first_name_resolution_table= context->first_name_resolution_table; + save_resolve_in_select_list= context->resolve_in_select_list; + save_next_local= table_list->next_local; + save_next_name_resolution_table= table_list->next_name_resolution_table; + } + + /* Restore a name resolution context from saved state. */ + void restore_state(Name_resolution_context *context, TABLE_LIST *table_list) + { + table_list->next_local= save_next_local; + table_list->next_name_resolution_table= save_next_name_resolution_table; + context->table_list= save_table_list; + context->first_name_resolution_table= save_first_name_resolution_table; + context->resolve_in_select_list= save_resolve_in_select_list; + } + + TABLE_LIST *get_first_name_resolution_table() + { + return save_first_name_resolution_table; + } +}; + +class Name_resolution_context_backup +{ + Name_resolution_context &ctx; + TABLE_LIST &table_list; + table_map save_map; + Name_resolution_context_state ctx_state; + +public: + Name_resolution_context_backup(Name_resolution_context &_ctx, TABLE_LIST &_table_list) + : ctx(_ctx), table_list(_table_list), save_map(_table_list.map) + { + ctx_state.save_state(&ctx, &table_list); + ctx.table_list= &table_list; + ctx.first_name_resolution_table= &table_list; + } + ~Name_resolution_context_backup() + { + ctx_state.restore_state(&ctx, &table_list); + table_list.map= save_map; + } +}; + + +/* + This enum is used to report information about monotonicity of function + represented by Item* tree. + Monotonicity is defined only for Item* trees that represent table + partitioning expressions (i.e. have no subselects/user vars/PS parameters + etc etc). An Item* tree is assumed to have the same monotonicity properties + as its corresponding function F: + + [signed] longlong F(field1, field2, ...) { + put values of field_i into table record buffer; + return item->val_int(); + } + + NOTE + At the moment function monotonicity is not well defined (and so may be + incorrect) for Item trees with parameters/return types that are different + from INT_RESULT, may be NULL, or are unsigned. + It will be possible to address this issue once the related partitioning bugs + (BUG#16002, BUG#15447, BUG#13436) are fixed. + + The NOT_NULL enums are used in TO_DAYS, since TO_DAYS('2001-00-00') returns + NULL which puts those rows into the NULL partition, but + '2000-12-31' < '2001-00-00' < '2001-01-01'. So special handling is needed + for this (see Bug#20577). +*/ + +typedef enum monotonicity_info +{ + NON_MONOTONIC, /* none of the below holds */ + MONOTONIC_INCREASING, /* F() is unary and (x < y) => (F(x) <= F(y)) */ + MONOTONIC_INCREASING_NOT_NULL, /* But only for valid/real x and y */ + MONOTONIC_STRICT_INCREASING,/* F() is unary and (x < y) => (F(x) < F(y)) */ + MONOTONIC_STRICT_INCREASING_NOT_NULL /* But only for valid/real x and y */ +} enum_monotonicity_info; + +/*************************************************************************/ + +class sp_rcontext; + +/** + A helper class to collect different behavior of various kinds of SP variables: + - local SP variables and SP parameters + - PACKAGE BODY routine variables + - (there will be more kinds in the future) +*/ + +class Sp_rcontext_handler +{ +public: + virtual ~Sp_rcontext_handler() = default; + /** + A prefix used for SP variable names in queries: + - EXPLAIN EXTENDED + - SHOW PROCEDURE CODE + Local variables and SP parameters have empty prefixes. + Package body variables are marked with a special prefix. + This improves readability of the output of these queries, + especially when a local variable or a parameter has the same + name with a package body variable. + */ + virtual const LEX_CSTRING *get_name_prefix() const= 0; + /** + At execution time THD->spcont points to the run-time context (sp_rcontext) + of the currently executed routine. + Local variables store their data in the sp_rcontext pointed by thd->spcont. + Package body variables store data in separate sp_rcontext that belongs + to the package. + This method provides access to the proper sp_rcontext structure, + depending on the SP variable kind. + */ + virtual sp_rcontext *get_rcontext(sp_rcontext *ctx) const= 0; +}; + + +class Sp_rcontext_handler_local: public Sp_rcontext_handler +{ +public: + const LEX_CSTRING *get_name_prefix() const; + sp_rcontext *get_rcontext(sp_rcontext *ctx) const; +}; + + +class Sp_rcontext_handler_package_body: public Sp_rcontext_handler +{ +public: + const LEX_CSTRING *get_name_prefix() const; + sp_rcontext *get_rcontext(sp_rcontext *ctx) const; +}; + + +extern MYSQL_PLUGIN_IMPORT + Sp_rcontext_handler_local sp_rcontext_handler_local; + + +extern MYSQL_PLUGIN_IMPORT + Sp_rcontext_handler_package_body sp_rcontext_handler_package_body; + + + +class Item_equal; + +struct st_join_table* const NO_PARTICULAR_TAB= (struct st_join_table*)0x1; + +typedef struct replace_equal_field_arg +{ + Item_equal *item_equal; + struct st_join_table *context_tab; +} REPLACE_EQUAL_FIELD_ARG; + +class Settable_routine_parameter +{ +public: + /* + Set required privileges for accessing the parameter. + + SYNOPSIS + set_required_privilege() + rw if 'rw' is true then we are going to read and set the + parameter, so SELECT and UPDATE privileges might be + required, otherwise we only reading it and SELECT + privilege might be required. + */ + Settable_routine_parameter() = default; + virtual ~Settable_routine_parameter() = default; + virtual void set_required_privilege(bool rw) {}; + + /* + Set parameter value. + + SYNOPSIS + set_value() + thd thread handle + ctx context to which parameter belongs (if it is local + variable). + it item which represents new value + + RETURN + FALSE if parameter value has been set, + TRUE if error has occurred. + */ + virtual bool set_value(THD *thd, sp_rcontext *ctx, Item **it)= 0; + + virtual void set_out_param_info(Send_field *info) {} + + virtual const Send_field *get_out_param_info() const + { return NULL; } + + virtual Item_param *get_item_param() { return 0; } +}; + + +/* + A helper class to calculate offset and length of a query fragment + - outside of SP + - inside an SP + - inside a compound block +*/ +class Query_fragment +{ + uint m_pos; + uint m_length; + void set(size_t pos, size_t length) + { + DBUG_ASSERT(pos < UINT_MAX32); + DBUG_ASSERT(length < UINT_MAX32); + m_pos= (uint) pos; + m_length= (uint) length; + } +public: + Query_fragment(THD *thd, sp_head *sphead, const char *start, const char *end); + uint pos() const { return m_pos; } + uint length() const { return m_length; } +}; + + +/** + This is used for items in the query that needs to be rewritten + before binlogging + + At the moment this applies to Item_param and Item_splocal +*/ +class Rewritable_query_parameter +{ + public: + /* + Offset inside the query text. + Value of 0 means that this object doesn't have to be replaced + (for example SP variables in control statements) + */ + my_ptrdiff_t pos_in_query; + + /* + Byte length of parameter name in the statement. This is not + Item::name.length because name.length contains byte length of UTF8-encoded + name, but the query string is in the client charset. + */ + uint len_in_query; + + bool limit_clause_param; + + Rewritable_query_parameter(uint pos_in_q= 0, uint len_in_q= 0) + : pos_in_query(pos_in_q), len_in_query(len_in_q), + limit_clause_param(false) + { } + + virtual ~Rewritable_query_parameter() = default; + + virtual bool append_for_log(THD *thd, String *str) = 0; +}; + +class Copy_query_with_rewrite +{ + THD *thd; + const char *src; + size_t src_len, from; + String *dst; + + bool copy_up_to(size_t bytes) + { + DBUG_ASSERT(bytes >= from); + return dst->append(src + from, uint32(bytes - from)); + } + +public: + + Copy_query_with_rewrite(THD *t, const char *s, size_t l, String *d) + :thd(t), src(s), src_len(l), from(0), dst(d) { } + + bool append(Rewritable_query_parameter *p) + { + if (copy_up_to(p->pos_in_query) || p->append_for_log(thd, dst)) + return true; + from= p->pos_in_query + p->len_in_query; + return false; + } + + bool finalize() + { return copy_up_to(src_len); } +}; + +struct st_dyncall_create_def +{ + Item *key, *value; + CHARSET_INFO *cs; + uint len, frac; + DYNAMIC_COLUMN_TYPE type; +}; + +typedef struct st_dyncall_create_def DYNCALL_CREATE_DEF; + + +typedef bool (Item::*Item_processor) (void *arg); +/* + Analyzer function + SYNOPSIS + argp in/out IN: Analysis parameter + OUT: Parameter to be passed to the transformer + + RETURN + TRUE Invoke the transformer + FALSE Don't do it + +*/ +typedef bool (Item::*Item_analyzer) (uchar **argp); +typedef Item* (Item::*Item_transformer) (THD *thd, uchar *arg); +typedef void (*Cond_traverser) (const Item *item, void *arg); +typedef bool (Item::*Pushdown_checker) (uchar *arg); + +struct st_cond_statistic; + +struct find_selective_predicates_list_processor_data +{ + TABLE *table; + List list; +}; + +class MY_LOCALE; + +class Item_equal; +class COND_EQUAL; + +class st_select_lex_unit; + +class Item_func_not; +class Item_splocal; + +/** + String_copier that sends Item specific warnings. +*/ +class String_copier_for_item: public String_copier +{ + THD *m_thd; +public: + bool copy_with_warn(CHARSET_INFO *dstcs, String *dst, + CHARSET_INFO *srccs, const char *src, + uint32 src_length, uint32 nchars); + String_copier_for_item(THD *thd): m_thd(thd) { } +}; + + +/** + A helper class describing what kind of Item created a temporary field. + - If m_field is set, then the temporary field was created from Field + (e.g. when the Item was Item_field, or Item_ref pointing to Item_field) + - If m_default_field is set, then there is a usable DEFAULT value. + (e.g. when the Item is Item_field) + - If m_item_result_field is set, then the temporary field was created + from certain sub-types of Item_result_field (e.g. Item_func) + See create_tmp_field() in sql_select.cc for details. +*/ + +class Tmp_field_src +{ + Field *m_field; + Field *m_default_field; + Item_result_field *m_item_result_field; +public: + Tmp_field_src() + :m_field(0), + m_default_field(0), + m_item_result_field(0) + { } + Field *field() const { return m_field; } + Field *default_field() const { return m_default_field; } + Item_result_field *item_result_field() const { return m_item_result_field; } + void set_field(Field *field) { m_field= field; } + void set_default_field(Field *field) { m_default_field= field; } + void set_item_result_field(Item_result_field *item) + { m_item_result_field= item; } +}; + + +/** + Parameters for create_tmp_field_ex(). + See create_tmp_field() in sql_select.cc for details. +*/ + +class Tmp_field_param +{ + bool m_group; + bool m_modify_item; + bool m_table_cant_handle_bit_fields; + bool m_make_copy_field; +public: + Tmp_field_param(bool group, + bool modify_item, + bool table_cant_handle_bit_fields, + bool make_copy_field) + :m_group(group), + m_modify_item(modify_item), + m_table_cant_handle_bit_fields(table_cant_handle_bit_fields), + m_make_copy_field(make_copy_field) + { } + bool group() const { return m_group; } + bool modify_item() const { return m_modify_item; } + bool table_cant_handle_bit_fields() const + { return m_table_cant_handle_bit_fields; } + bool make_copy_field() const { return m_make_copy_field; } + void set_modify_item(bool to) { m_modify_item= to; } +}; + + +class Item_const +{ +public: + virtual ~Item_const() = default; + virtual const Type_all_attributes *get_type_all_attributes_from_const() const= 0; + virtual bool const_is_null() const { return false; } + virtual const longlong *const_ptr_longlong() const { return NULL; } + virtual const double *const_ptr_double() const { return NULL; } + virtual const my_decimal *const_ptr_my_decimal() const { return NULL; } + virtual const MYSQL_TIME *const_ptr_mysql_time() const { return NULL; } + virtual const String *const_ptr_string() const { return NULL; } +}; + + +/****************************************************************************/ + +#define STOP_PTR ((void *) 1) + +/* Base flags (including IN) for an item */ + +typedef uint8 item_flags_t; + +enum class item_base_t : item_flags_t +{ + NONE= 0, +#define ITEM_FLAGS_MAYBE_NULL_SHIFT 0 // Must match MAYBE_NULL + MAYBE_NULL= (1<<0), // May be NULL. + IN_ROLLUP= (1<<1), // Appears in GROUP BY list + // of a query with ROLLUP. + FIXED= (1<<2), // Was fixed with fix_fields(). + IS_EXPLICIT_NAME= (1<<3), // The name of this Item was set by the user + // (or was auto generated otherwise) + IS_IN_WITH_CYCLE= (1<<4), // This item is in CYCLE clause of WITH. + AT_TOP_LEVEL= (1<<5) // At top (AND) level of item tree +}; + + +/* Flags that tells us what kind of items the item contains */ + +enum class item_with_t : item_flags_t +{ + NONE= 0, + SP_VAR= (1<<0), // If Item contains a stored procedure variable + WINDOW_FUNC= (1<<1), // If item contains a window func + FIELD= (1<<2), // If any item except Item_sum contains a field. + SUM_FUNC= (1<<3), // If item contains a sum func + SUBQUERY= (1<<4), // If item containts a sub query + ROWNUM_FUNC= (1<<5), // If ROWNUM function was used + PARAM= (1<<6) // If user parameter was used +}; + + +/* Make operations in item_base_t and item_with_t work like 'int' */ +static inline item_base_t operator&(const item_base_t a, const item_base_t b) +{ + return (item_base_t) (((item_flags_t) a) & ((item_flags_t) b)); +} + +static inline item_base_t & operator&=(item_base_t &a, item_base_t b) +{ + a= (item_base_t) (((item_flags_t) a) & (item_flags_t) b); + return a; +} + +static inline item_base_t operator|(const item_base_t a, const item_base_t b) +{ + return (item_base_t) (((item_flags_t) a) | ((item_flags_t) b)); +} + +static inline item_base_t & operator|=(item_base_t &a, item_base_t b) +{ + a= (item_base_t) (((item_flags_t) a) | (item_flags_t) b); + return a; +} + +static inline item_base_t operator~(const item_base_t a) +{ + return (item_base_t) ~(item_flags_t) a; +} + +static inline item_with_t operator&(const item_with_t a, const item_with_t b) +{ + return (item_with_t) (((item_flags_t) a) & ((item_flags_t) b)); +} + +static inline item_with_t & operator&=(item_with_t &a, item_with_t b) +{ + a= (item_with_t) (((item_flags_t) a) & (item_flags_t) b); + return a; +} + +static inline item_with_t operator|(const item_with_t a, const item_with_t b) +{ + return (item_with_t) (((item_flags_t) a) | ((item_flags_t) b)); +} + +static inline item_with_t & operator|=(item_with_t &a, item_with_t b) +{ + a= (item_with_t) (((item_flags_t) a) | (item_flags_t) b); + return a; +} + +static inline item_with_t operator~(const item_with_t a) +{ + return (item_with_t) ~(item_flags_t) a; +} + + +class Item :public Value_source, + public Type_all_attributes +{ + static void *operator new(size_t size); + +public: + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () + { return alloc_root(mem_root, size); } + static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); } + static void operator delete(void *ptr, MEM_ROOT *mem_root) {} + + enum Type {FIELD_ITEM= 0, FUNC_ITEM, SUM_FUNC_ITEM, + WINDOW_FUNC_ITEM, + /* + NOT NULL literal-alike constants, which do not change their + value during an SQL statement execution, but can optionally + change their value between statements: + - Item_literal - real NOT NULL constants + - Item_param - can change between statements + - Item_splocal - can change between statements + - Item_user_var_as_out_param - hack + Note, Item_user_var_as_out_param actually abuses the type code. + It should be moved out of the Item tree eventually. + */ + CONST_ITEM, + NULL_ITEM, // Item_null or Item_param bound to NULL + COPY_STR_ITEM, FIELD_AVG_ITEM, DEFAULT_VALUE_ITEM, + CONTEXTUALLY_TYPED_VALUE_ITEM, + PROC_ITEM,COND_ITEM, REF_ITEM, FIELD_STD_ITEM, + FIELD_VARIANCE_ITEM, INSERT_VALUE_ITEM, + SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER, + PARAM_ITEM, TRIGGER_FIELD_ITEM, + EXPR_CACHE_ITEM}; + + enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE }; + enum traverse_order { POSTFIX, PREFIX }; + +protected: + SEL_TREE *get_mm_tree_for_const(RANGE_OPT_PARAM *param); + + /** + Create a field based on the exact data type handler. + */ + Field *create_table_field_from_handler(MEM_ROOT *root, TABLE *table) + { + const Type_handler *h= type_handler(); + return h->make_and_init_table_field(root, &name, + Record_addr(maybe_null()), + *this, table); + } + /** + Create a field based on field_type of argument. + This is used to create a field for + - IFNULL(x,something) + - time functions + - prepared statement placeholders + - SP variables with data type references: DECLARE a TYPE OF t1.a; + @retval NULL error + @retval !NULL on success + */ + Field *tmp_table_field_from_field_type(MEM_ROOT *root, TABLE *table) + { + DBUG_ASSERT(fixed()); + const Type_handler *h= type_handler()->type_handler_for_tmp_table(this); + return h->make_and_init_table_field(root, &name, + Record_addr(maybe_null()), + *this, table); + } + /** + Create a temporary field for a simple Item, which does not + need any special action after the field creation: + - is not an Item_field descendant (and not a reference to Item_field) + - is not an Item_result_field descendant + - does not need to copy any DEFAULT value to the result Field + - does not need to set Field::is_created_from_null_item for the result + See create_tmp_field_ex() for details on parameters and return values. + */ + Field *create_tmp_field_ex_simple(MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) + { + DBUG_ASSERT(!param->make_copy_field()); + DBUG_ASSERT(!is_result_field()); + DBUG_ASSERT(type() != NULL_ITEM); + return tmp_table_field_from_field_type(root, table); + } + Field *create_tmp_field_int(MEM_ROOT *root, TABLE *table, + uint convert_int_length); + Field *tmp_table_field_from_field_type_maybe_null(MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param, + bool is_explicit_null); + + void raise_error_not_evaluable(); + void push_note_converted_to_negative_complement(THD *thd); + void push_note_converted_to_positive_complement(THD *thd); + + /* Helper methods, to get an Item value from another Item */ + double val_real_from_item(Item *item) + { + DBUG_ASSERT(fixed()); + double value= item->val_real(); + null_value= item->null_value; + return value; + } + longlong val_int_from_item(Item *item) + { + DBUG_ASSERT(fixed()); + longlong value= item->val_int(); + null_value= item->null_value; + return value; + } + String *val_str_from_item(Item *item, String *str) + { + DBUG_ASSERT(fixed()); + String *res= item->val_str(str); + if (res) + res->set_charset(collation.collation); + if ((null_value= item->null_value)) + res= NULL; + return res; + } + bool val_native_from_item(THD *thd, Item *item, Native *to) + { + DBUG_ASSERT(fixed()); + null_value= item->val_native(thd, to); + DBUG_ASSERT(null_value == item->null_value); + return null_value; + } + bool val_native_from_field(Field *field, Native *to) + { + if ((null_value= field->is_null())) + return true; + return (null_value= field->val_native(to)); + } + bool val_native_with_conversion_from_item(THD *thd, Item *item, Native *to, + const Type_handler *handler) + { + DBUG_ASSERT(fixed()); + return (null_value= item->val_native_with_conversion(thd, to, handler)); + } + my_decimal *val_decimal_from_item(Item *item, my_decimal *decimal_value) + { + DBUG_ASSERT(fixed()); + my_decimal *value= item->val_decimal(decimal_value); + if ((null_value= item->null_value)) + value= NULL; + return value; + } + bool get_date_from_item(THD *thd, Item *item, + MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + bool rc= item->get_date(thd, ltime, fuzzydate); + null_value= MY_TEST(rc || item->null_value); + return rc; + } +public: + + /* + Cache val_str() into the own buffer, e.g. to evaluate constant + expressions with subqueries in the ORDER/GROUP clauses. + */ + String *val_str() { return val_str(&str_value); } + virtual Item_func *get_item_func() { return NULL; } + + const MY_LOCALE *locale_from_val_str(); + + /* All variables for the Item class */ + + /** + Intrusive list pointer for free list. If not null, points to the next + Item on some Query_arena's free list. For instance, stored procedures + have their own Query_arena's. + + @see Query_arena::free_list + */ + Item *next; + + /* + str_values's main purpose is to be used to cache the value in + save_in_field. Calling full_name() for Item_field will also use str_value. + */ + String str_value; + + LEX_CSTRING name; /* Name of item */ + /* Original item name (if it was renamed)*/ + const char *orig_name; + + /* All common bool variables for an Item is stored here */ + item_base_t base_flags; + item_with_t with_flags; + + /* Marker is used in some functions to temporary mark an item */ + int16 marker; + + /* + Tells is the val() value of the item is/was null. + This should not be part of the bit flags as it's changed a lot and also + we use pointers to it + */ + bool null_value; + /* Cache of the result of is_expensive(). */ + int8 is_expensive_cache; + /** + The index in the JOIN::join_tab array of the JOIN_TAB this Item + is attached to. Items are attached (or 'pushed') to JOIN_TABs + during optimization by the make_cond_for_table procedure. During + query execution, this item is evaluated when the join loop reaches + the corresponding JOIN_TAB. + + If the value of join_tab_idx >= MAX_TABLES, this means that there is no + corresponding JOIN_TAB. + */ + uint8 join_tab_idx; + + inline bool maybe_null() const + { return (bool) (base_flags & item_base_t::MAYBE_NULL); } + inline bool in_rollup() const + { return (bool) (base_flags & item_base_t::IN_ROLLUP); } + inline bool fixed() const + { return (bool) (base_flags & item_base_t::FIXED); } + inline bool is_explicit_name() const + { return (bool) (base_flags & item_base_t::IS_EXPLICIT_NAME); } + inline bool is_in_with_cycle() const + { return (bool) (base_flags & item_base_t::IS_IN_WITH_CYCLE); } + + inline bool with_sp_var() const + { return (bool) (with_flags & item_with_t::SP_VAR); } + inline bool with_window_func() const + { return (bool) (with_flags & item_with_t::WINDOW_FUNC); } + inline bool with_field() const + { return (bool) (with_flags & item_with_t::FIELD); } + inline bool with_sum_func() const + { return (bool) (with_flags & item_with_t::SUM_FUNC); } + inline bool with_subquery() const + { return (bool) (with_flags & item_with_t::SUBQUERY); } + inline bool with_rownum_func() const + { return (bool) (with_flags & item_with_t::ROWNUM_FUNC); } + inline bool with_param() const + { return (bool) (with_flags & item_with_t::PARAM); } + inline void copy_flags(const Item *org, item_base_t mask) + { + base_flags= (item_base_t) (((item_flags_t) base_flags & + ~(item_flags_t) mask) | + ((item_flags_t) org->base_flags & + (item_flags_t) mask)); + } + inline void copy_flags(const Item *org, item_with_t mask) + { + with_flags= (item_with_t) (((item_flags_t) with_flags & + ~(item_flags_t) mask) | + ((item_flags_t) org->with_flags & + (item_flags_t) mask)); + } + + // alloc & destruct is done as start of select on THD::mem_root + Item(THD *thd); + /* + Constructor used by Item_field, Item_ref & aggregate (sum) functions. + Used for duplicating lists in processing queries with temporary + tables + Also it used for Item_cond_and/Item_cond_or for creating + top AND/OR structure of WHERE clause to protect it of + optimisation changes in prepared statements + */ + Item(THD *thd, Item *item); + Item(); /* For const item */ + virtual ~Item() + { +#ifdef EXTRA_DEBUG + name.str= 0; + name.length= 0; +#endif + } /*lint -e1509 */ + void set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs); + void set_name(THD *thd, String *str) + { + set_name(thd, str->ptr(), str->length(), str->charset()); + } + void set_name(THD *thd, const LEX_CSTRING &str, + CHARSET_INFO *cs= system_charset_info) + { + set_name(thd, str.str, str.length, cs); + } + void set_name_no_truncate(THD *thd, const char *str, uint length, + CHARSET_INFO *cs); + void init_make_send_field(Send_field *tmp_field, const Type_handler *h); + void share_name_with(const Item *item) + { + name= item->name; + copy_flags(item, item_base_t::IS_EXPLICIT_NAME); + } + virtual void cleanup(); + virtual void make_send_field(THD *thd, Send_field *field); + + bool fix_fields_if_needed(THD *thd, Item **ref) + { + return fixed() ? false : fix_fields(thd, ref); + } + + /* + fix_fields_if_needed_for_scalar() is used where we need to filter items + that can't be scalars and want to return error for it. + */ + bool fix_fields_if_needed_for_scalar(THD *thd, Item **ref) + { + return fix_fields_if_needed(thd, ref) || check_cols(1); + } + bool fix_fields_if_needed_for_bool(THD *thd, Item **ref) + { + return fix_fields_if_needed_for_scalar(thd, ref); + } + bool fix_fields_if_needed_for_order_by(THD *thd, Item **ref) + { + return fix_fields_if_needed_for_scalar(thd, ref); + } + /* + By default we assume that an Item is fixed by the constructor + */ + virtual bool fix_fields(THD *, Item **) + { + /* + This should not normally be called, because usually before + fix_fields() we check fixed() to be false. + But historically we allow fix_fields() to be called for Items + who return basic_const_item()==true. + */ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(basic_const_item()); + return false; + } + virtual void unfix_fields() + { + DBUG_ASSERT(0); + } + + /* + Fix after some tables has been pulled out. Basically re-calculate all + attributes that are dependent on the tables. + */ + virtual void fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) + {}; + + /* + This is for items that require a fixup after the JOIN::prepare() + is done. + */ + virtual void fix_after_optimize(THD *thd) + {} + /* + This method should be used in case where we are sure that we do not need + complete fix_fields() procedure. + Usually this method is used by the optimizer when it has to create a new + item out of other already fixed items. For example, if the optimizer has + to create a new Item_func for an inferred equality whose left and right + parts are already fixed items. In some cases the optimizer cannot use + directly fixed items as the arguments of the created functional item, + but rather uses intermediate type conversion items. Then the method is + supposed to be applied recursively. + */ + virtual void quick_fix_field() + { + DBUG_ASSERT(0); + } + + bool save_in_value(THD *thd, st_value *value) + { + return type_handler()->Item_save_in_value(thd, this, value); + } + + /* Function returns 1 on overflow and -1 on fatal errors */ + int save_in_field_no_warnings(Field *field, bool no_conversions); + virtual int save_in_field(Field *field, bool no_conversions); + virtual bool save_in_param(THD *thd, Item_param *param); + virtual void save_org_in_field(Field *field, + fast_field_copier data + __attribute__ ((__unused__))) + { (void) save_in_field(field, 1); } + virtual fast_field_copier setup_fast_field_copier(Field *field) + { return NULL; } + virtual int save_safe_in_field(Field *field) + { return save_in_field(field, 1); } + virtual bool send(Protocol *protocol, st_value *buffer) + { + return type_handler()->Item_send(this, protocol, buffer); + } + virtual bool eq(const Item *, bool binary_cmp) const; + enum_field_types field_type() const + { + return type_handler()->field_type(); + } + virtual const Type_handler *type_handler() const= 0; + /** + Detects if an Item has a fixed data type which is known + even before fix_fields(). + Currently it's important only to find Items with a fixed boolean + data type. More item types can be marked in the future as having + a fixed data type (e.g. all literals, all fixed type functions, etc). + + @retval NULL if the Item type is not known before fix_fields() + @retval the pointer to the data type handler, if the data type + is known before fix_fields(). + */ + virtual const Type_handler *fixed_type_handler() const + { + return NULL; + } + const Type_handler *type_handler_for_comparison() const + { + return type_handler()->type_handler_for_comparison(); + } + virtual const Type_handler *real_type_handler() const + { + return type_handler(); + } + const Type_handler *cast_to_int_type_handler() const + { + return real_type_handler()->cast_to_int_type_handler(); + } + /* result_type() of an item specifies how the value should be returned */ + Item_result result_type() const + { + return type_handler()->result_type(); + } + /* ... while cmp_type() specifies how it should be compared */ + Item_result cmp_type() const + { + return type_handler()->cmp_type(); + } + const Type_handler *string_type_handler() const + { + return Type_handler::string_type_handler(max_length); + } + /* + Calculate the maximum length of an expression. + This method is used in data type aggregation for UNION, e.g.: + SELECT 'b' UNION SELECT COALESCE(double_10_3_field) FROM t1; + + The result is usually equal to max_length, except for some numeric types. + In case of the INT, FLOAT, DOUBLE data types Item::max_length and + Item::decimals are ignored, so the returned value depends only on the + data type itself. E.g. for an expression of the DOUBLE(10,3) data type, + the result is always 53 (length 10 and precision 3 do not matter). + + max_length is ignored for these numeric data types because the length limit + means only "expected maximum length", it is not a hard limit, so it does + not impose any data truncation. E.g. a column of the type INT(4) can + normally store big values up to 2147483647 without truncation. When we're + aggregating such column for UNION it's important to create a long enough + result column, not to lose any data. + + For detailed behaviour of various data types see implementations of + the corresponding Type_handler_xxx::max_display_length(). + + Note, Item_field::max_display_length() overrides this to get + max_display_length() from the underlying field. + */ + virtual uint32 max_display_length() const + { + return type_handler()->max_display_length(this); + } + const TYPELIB *get_typelib() const override { return NULL; } + /* optimized setting of maybe_null without jumps. Minimizes code size */ + inline void set_maybe_null(bool maybe_null_arg) + { + base_flags= ((item_base_t) ((base_flags & ~item_base_t::MAYBE_NULL)) | + (item_base_t) (maybe_null_arg << + ITEM_FLAGS_MAYBE_NULL_SHIFT)); + } + /* This is used a lot, so make it simpler to use */ + void set_maybe_null() + { + base_flags|= item_base_t::MAYBE_NULL; + } + /* This is used when calling Type_all_attributes::set_type_maybe_null() */ + void set_type_maybe_null(bool maybe_null_arg) override + { + set_maybe_null(maybe_null_arg); + } + /* + Mark the item that it is a top level item, or part of a top level AND item, + for WHERE and ON clauses: + Example: ... WHERE a=5 AND b=6; Both a=5 and b=6 are top level items + + This is used to indicate that there is no distinction between if the + value of the item is FALSE or NULL.. + This enables Item_cond_and and subquery related items to do special + "top level" optimizations. + */ + virtual void top_level_item() + { + base_flags|= item_base_t::AT_TOP_LEVEL; + } + /* + Return TRUE if this item of top WHERE level (AND/OR) + */ + bool is_top_level_item() const + { return (bool) (base_flags & item_base_t::AT_TOP_LEVEL); } + + void set_typelib(const TYPELIB *typelib) override + { + // Non-field Items (e.g. hybrid functions) never have ENUM/SET types yet. + DBUG_ASSERT(0); + } + Item_cache* get_cache(THD *thd) const + { + return type_handler()->Item_get_cache(thd, this); + } + virtual enum Type type() const =0; + bool is_of_type(Type t, Item_result cmp) const + { + return type() == t && cmp_type() == cmp; + } + /* + real_type() is the type of base item. This is same as type() for + most items, except Item_ref() and Item_cache_wrapper() where it + shows the type for the underlying item. + */ + virtual enum Type real_type() const { return type(); } + + /* + Return information about function monotonicity. See comment for + enum_monotonicity_info for details. This function can only be called + after fix_fields() call. + */ + virtual enum_monotonicity_info get_monotonicity_info() const + { return NON_MONOTONIC; } + + /* + Convert "func_arg $CMP$ const" half-interval into + "FUNC(func_arg) $CMP2$ const2" + + SYNOPSIS + val_int_endpoint() + left_endp FALSE <=> The interval is "x < const" or "x <= const" + TRUE <=> The interval is "x > const" or "x >= const" + + incl_endp IN FALSE <=> the comparison is '<' or '>' + TRUE <=> the comparison is '<=' or '>=' + OUT The same but for the "F(x) $CMP$ F(const)" comparison + + DESCRIPTION + This function is defined only for unary monotonic functions. The caller + supplies the source half-interval + + x $CMP$ const + + The value of const is supplied implicitly as the value this item's + argument, the form of $CMP$ comparison is specified through the + function's arguments. The calle returns the result interval + + F(x) $CMP2$ F(const) + + passing back F(const) as the return value, and the form of $CMP2$ + through the out parameter. NULL values are assumed to be comparable and + be less than any non-NULL values. + + RETURN + The output range bound, which equal to the value of val_int() + - If the value of the function is NULL then the bound is the + smallest possible value of LONGLONG_MIN + */ + virtual longlong val_int_endpoint(bool left_endp, bool *incl_endp) + { DBUG_ASSERT(0); return 0; } + + + /* valXXX methods must return NULL or 0 or 0.0 if null_value is set. */ + /* + Return double precision floating point representation of item. + + SYNOPSIS + val_real() + + RETURN + In case of NULL value return 0.0 and set null_value flag to TRUE. + If value is not null null_value flag will be reset to FALSE. + */ + virtual double val_real()=0; + Double_null to_double_null() + { + // val_real() must be caleed on a separate line. See to_longlong_null() + double nr= val_real(); + return Double_null(nr, null_value); + } + /* + Return integer representation of item. + + SYNOPSIS + val_int() + + RETURN + In case of NULL value return 0 and set null_value flag to TRUE. + If value is not null null_value flag will be reset to FALSE. + */ + virtual longlong val_int()=0; + Longlong_hybrid to_longlong_hybrid() + { + return Longlong_hybrid(val_int(), unsigned_flag); + } + Longlong_null to_longlong_null() + { + longlong nr= val_int(); + /* + C++ does not guarantee the order of parameter evaluation, + so to make sure "null_value" is passed to the constructor + after the val_int() call, val_int() is caled on a separate line. + */ + return Longlong_null(nr, null_value); + } + Longlong_hybrid_null to_longlong_hybrid_null() + { + return Longlong_hybrid_null(to_longlong_null(), unsigned_flag); + } + /** + Get a value for CAST(x AS SIGNED). + Too large positive unsigned integer values are converted + to negative complements. + Values of non-integer data types are adjusted to the SIGNED range. + */ + virtual longlong val_int_signed_typecast() + { + return cast_to_int_type_handler()->Item_val_int_signed_typecast(this); + } + longlong val_int_signed_typecast_from_str(); + /** + Get a value for CAST(x AS UNSIGNED). + Negative signed integer values are converted + to positive complements. + Values of non-integer data types are adjusted to the UNSIGNED range. + */ + virtual longlong val_int_unsigned_typecast() + { + return cast_to_int_type_handler()->Item_val_int_unsigned_typecast(this); + } + longlong val_int_unsigned_typecast_from_int(); + longlong val_int_unsigned_typecast_from_str(); + longlong val_int_unsigned_typecast_from_real(); + + /** + Get a value for CAST(x AS UNSIGNED). + Huge positive unsigned values are converted to negative complements. + */ + longlong val_int_signed_typecast_from_int(); + longlong val_int_signed_typecast_from_real(); + + /* + This is just a shortcut to avoid the cast. You should still use + unsigned_flag to check the sign of the item. + */ + inline ulonglong val_uint() { return (ulonglong) val_int(); } + + virtual bool hash_not_null(Hasher *hasher) + { + DBUG_ASSERT(0); + return true; + } + + /* + Return string representation of this item object. + + SYNOPSIS + val_str() + str an allocated buffer this or any nested Item object can use to + store return value of this method. + + NOTE + The caller can modify the returned String, if it's not marked + "const" (with the String::mark_as_const() method). That means that + if the item returns its own internal buffer (e.g. tmp_value), it + *must* be marked "const" [1]. So normally it's preferable to + return the result value in the String, that was passed as an + argument. But, for example, SUBSTR() returns a String that simply + points into the buffer of SUBSTR()'s args[0]->val_str(). Such a + String is always "const", so it's ok to use tmp_value for that and + avoid reallocating/copying of the argument String. + + [1] consider SELECT CONCAT(f, ":", f) FROM (SELECT func() AS f); + here the return value of f() is used twice in the top-level + select, and if they share the same tmp_value buffer, modifying the + first one will implicitly modify the second too. + + RETURN + In case of NULL value return 0 (NULL pointer) and set null_value flag + to TRUE. + If value is not null null_value flag will be reset to FALSE. + */ + virtual String *val_str(String *str)=0; + + + bool val_native_with_conversion(THD *thd, Native *to, const Type_handler *th) + { + return th->Item_val_native_with_conversion(thd, this, to); + } + bool val_native_with_conversion_result(THD *thd, Native *to, + const Type_handler *th) + { + return th->Item_val_native_with_conversion_result(thd, this, to); + } + + virtual bool val_native(THD *thd, Native *to) + { + /* + The default implementation for the Items that do not need native format: + - Item_basic_value (default implementation) + - Item_copy + - Item_exists_subselect + - Item_sum_field + - Item_sum_or_func (default implementation) + - Item_proc + - Item_type_holder (as val_xxx() are never called for it); + + These hybrid Item types override val_native(): + - Item_field + - Item_param + - Item_sp_variable + - Item_ref + - Item_cache_wrapper + - Item_direct_ref + - Item_direct_view_ref + - Item_ref_null_helper + - Item_name_const + - Item_time_literal + - Item_sum_or_func + Note, these hybrid type Item_sum_or_func descendants + override the default implementation: + * Item_sum_hybrid + * Item_func_hybrid_field_type + * Item_func_min_max + * Item_func_sp + * Item_func_last_value + * Item_func_rollup_const + */ + DBUG_ASSERT(0); + return (null_value= 1); + } + virtual bool val_native_result(THD *thd, Native *to) + { + return val_native(thd, to); + } + + /* + Returns string representation of this item in ASCII format. + + SYNOPSIS + val_str_ascii() + str - similar to val_str(); + + NOTE + This method is introduced for performance optimization purposes. + + 1. val_str() result of some Items in string context + depends on @@character_set_results. + @@character_set_results can be set to a "real multibyte" character + set like UCS2, UTF16, UTF32. (We'll use only UTF32 in the examples + below for convenience.) + + So the default string result of such functions + in these circumstances is real multi-byte character set, like UTF32. + + For example, all numbers in string context + return result in @@character_set_results: + + SELECT CONCAT(20010101); -> UTF32 + + We do sprintf() first (to get ASCII representation) + and then convert to UTF32; + + So these kind "data sources" can use ASCII representation + internally, but return multi-byte data only because + @@character_set_results wants so. + Therefore, conversion from ASCII to UTF32 is applied internally. + + + 2. Some other functions need in fact ASCII input. + + For example, + inet_aton(), GeometryFromText(), Convert_TZ(), GET_FORMAT(). + + Similar, fields of certain type, like DATE, TIME, + when you insert string data into them, expect in fact ASCII input. + If they get non-ASCII input, for example UTF32, they + convert input from UTF32 to ASCII, and then use ASCII + representation to do further processing. + + + 3. Now imagine we pass result of a data source of the first type + to a data destination of the second type. + + What happens: + a. data source converts data from ASCII to UTF32, because + @@character_set_results wants so and passes the result to + data destination. + b. data destination gets UTF32 string. + c. data destination converts UTF32 string to ASCII, + because it needs ASCII representation to be able to handle data + correctly. + + As a result we get two steps of unnecessary conversion: + From ASCII to UTF32, then from UTF32 to ASCII. + + A better way to handle these situations is to pass ASCII + representation directly from the source to the destination. + + This is why val_str_ascii() introduced. + + RETURN + Similar to val_str() + */ + virtual String *val_str_ascii(String *str); + + /* + Returns the result of val_str_ascii(), translating NULLs back + to empty strings (if MODE_EMPTY_STRING_IS_NULL is set). + */ + String *val_str_ascii_revert_empty_string_is_null(THD *thd, String *str); + + /* + Returns the val_str() value converted to the given character set. + */ + String *val_str(String *str, String *converter, CHARSET_INFO *to); + + virtual String *val_json(String *str) { return val_str(str); } + /* + Return decimal representation of item with fixed point. + + SYNOPSIS + val_decimal() + decimal_buffer buffer which can be used by Item for returning value + (but can be not) + + NOTE + Returned value should not be changed if it is not the same which was + passed via argument. + + RETURN + Return pointer on my_decimal (it can be other then passed via argument) + if value is not NULL (null_value flag will be reset to FALSE). + In case of NULL value it return 0 pointer and set null_value flag + to TRUE. + */ + virtual my_decimal *val_decimal(my_decimal *decimal_buffer)= 0; + /* + Return boolean value of item. + + RETURN + FALSE value is false or NULL + TRUE value is true (not equal to 0) + */ + virtual bool val_bool() + { + return type_handler()->Item_val_bool(this); + } + + bool eval_const_cond() + { + DBUG_ASSERT(const_item()); + DBUG_ASSERT(!is_expensive()); + return val_bool(); + } + bool can_eval_in_optimize() + { + return const_item() && !is_expensive(); + } + + /* + save_val() is method of val_* family which stores value in the given + field. + */ + virtual void save_val(Field *to) { save_org_in_field(to, NULL); } + /* + save_result() is method of val*result() family which stores value in + the given field. + */ + virtual void save_result(Field *to) { save_val(to); } + /* Helper functions, see item_sum.cc */ + String *val_string_from_real(String *str); + String *val_string_from_int(String *str); + my_decimal *val_decimal_from_real(my_decimal *decimal_value); + my_decimal *val_decimal_from_int(my_decimal *decimal_value); + my_decimal *val_decimal_from_string(my_decimal *decimal_value); + longlong val_int_from_real() + { + DBUG_ASSERT(fixed()); + return Converter_double_to_longlong_with_warn(val_real(), false).result(); + } + longlong val_int_from_str(int *error); + + /* + Returns true if this item can be calculated during + value_depends_on_sql_mode() + */ + bool value_depends_on_sql_mode_const_item() + { + /* + Currently we use value_depends_on_sql_mode() only for virtual + column expressions. They should not contain any expensive items. + If we ever get a crash on the assert below, it means + check_vcol_func_processor() is badly implemented for this item. + */ + DBUG_ASSERT(!is_expensive()); + /* + It should return const_item() actually. + But for some reasons Item_field::const_item() returns true + at value_depends_on_sql_mode() call time. + This should be checked and fixed. + */ + return basic_const_item(); + } + virtual Sql_mode_dependency value_depends_on_sql_mode() const + { + return Sql_mode_dependency(); + } + + int save_time_in_field(Field *field, bool no_conversions); + int save_date_in_field(Field *field, bool no_conversions); + int save_str_in_field(Field *field, bool no_conversions); + int save_real_in_field(Field *field, bool no_conversions); + int save_int_in_field(Field *field, bool no_conversions); + int save_decimal_in_field(Field *field, bool no_conversions); + + int save_str_value_in_field(Field *field, String *result); + + virtual Field *get_tmp_table_field() { return 0; } + virtual Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table); + inline const char *full_name() const { return full_name_cstring().str; } + virtual LEX_CSTRING full_name_cstring() const + { + if (name.str) + return name; + return { STRING_WITH_LEN("???") }; + } + const char *field_name_or_null() + { return real_item()->type() == Item::FIELD_ITEM ? name.str : NULL; } + const TABLE_SHARE *field_table_or_null(); + + /* + *result* family of methods is analog of *val* family (see above) but + return value of result_field of item if it is present. If Item have not + result field, it return val(). This methods set null_value flag in same + way as *val* methods do it. + */ + virtual double val_result() { return val_real(); } + virtual longlong val_int_result() { return val_int(); } + virtual String *str_result(String* tmp) { return val_str(tmp); } + virtual my_decimal *val_decimal_result(my_decimal *val) + { return val_decimal(val); } + virtual bool val_bool_result() { return val_bool(); } + virtual bool is_null_result() { return is_null(); } + /* + Returns 1 if result type and collation for val_str() can change between + calls + */ + virtual bool dynamic_result() { return 0; } + /* + Bitmap of tables used by item + (note: if you need to check dependencies on individual columns, check out + class Field_enumerator) + */ + virtual table_map used_tables() const { return (table_map) 0L; } + virtual table_map all_used_tables() const { return used_tables(); } + /* + Return table map of tables that can't be NULL tables (tables that are + used in a context where if they would contain a NULL row generated + by a LEFT or RIGHT join, the item would not be true). + This expression is used on WHERE item to determinate if a LEFT JOIN can be + converted to a normal join. + Generally this function should return used_tables() if the function + would return null if any of the arguments are null + As this is only used in the beginning of optimization, the value don't + have to be updated in update_used_tables() + */ + virtual table_map not_null_tables() const { return used_tables(); } + /* + Returns true if this is a simple constant item like an integer, not + a constant expression. Used in the optimizer to propagate basic constants. + */ + virtual bool basic_const_item() const { return 0; } + /** + Determines if the expression is allowed as + a virtual column assignment source: + INSERT INTO t1 (vcol) VALUES (10) -> error + INSERT INTO t1 (vcol) VALUES (NULL) -> ok + */ + virtual bool vcol_assignment_allowed_value() const { return false; } + /** + Test if "this" is an ORDER position (rather than an expression). + Notes: + - can be called before fix_fields(). + - local SP variables (even of integer types) are always expressions, not + positions. (And they can't be used before fix_fields is called for them). + */ + virtual bool is_order_clause_position() const { return false; } + /* + Determines if the Item is an evaluable expression, that is + it can return a value, so we can call methods val_xxx(), get_date(), etc. + Most items are evaluable expressions. + Examples of non-evaluable expressions: + - Item_contextually_typed_value_specification (handling DEFAULT and IGNORE) + - Item_type_param bound to DEFAULT and IGNORE + We cannot call the mentioned methods for these Items, + their method implementations typically have DBUG_ASSERT(0). + */ + virtual bool is_evaluable_expression() const { return true; } + + virtual bool check_assignability_to(const Field *to, bool ignore) const + { + /* + "this" must be neither DEFAULT/IGNORE, + nor Item_param bound to DEFAULT/IGNORE. + */ + DBUG_ASSERT(is_evaluable_expression()); + return to->check_assignability_from(type_handler(), ignore); + } + + /** + * Check whether the item is a parameter ('?') of stored routine. + * Default implementation returns false. Method is overridden in the class + * Item_param where it returns true. + */ + virtual bool is_stored_routine_parameter() const { return false; } + + bool check_is_evaluable_expression_or_error() + { + if (is_evaluable_expression()) + return false; // Ok + raise_error_not_evaluable(); + return true; // Error + } + /* cloning of constant items (0 if it is not const) */ + virtual Item *clone_item(THD *thd) { return 0; } + /* deep copy item */ + virtual Item* build_clone(THD *thd) { return get_copy(thd); } + virtual cond_result eq_cmp_result() const { return COND_OK; } + inline uint float_length(uint decimals_par) const + { return decimals < FLOATING_POINT_DECIMALS ? (DBL_DIG+2+decimals_par) : DBL_DIG+8;} + /* Returns total number of decimal digits */ + decimal_digits_t decimal_precision() const override + { + return type_handler()->Item_decimal_precision(this); + } + /* Returns the number of integer part digits only */ + inline decimal_digits_t decimal_int_part() const + { return (decimal_digits_t) my_decimal_int_part(decimal_precision(), decimals); } + /* + Returns the number of fractional digits only. + NOT_FIXED_DEC is replaced to the maximum possible number + of fractional digits, taking into account the data type. + */ + decimal_digits_t decimal_scale() const + { + return type_handler()->Item_decimal_scale(this); + } + /* + Returns how many digits a divisor adds into a division result. + This is important when the integer part of the divisor can be 0. + In this example: + SELECT 1 / 0.000001; -> 1000000.0000 + the divisor adds 5 digits into the result precision. + + Currently this method only replaces NOT_FIXED_DEC to + TIME_SECOND_PART_DIGITS for temporal data types. + This method can be made virtual, to create more efficient (smaller) + data types for division results. + For example, in + SELECT 1/1.000001; + the divisor could provide no additional precision into the result, + so could any other items that are know to return a result + with non-zero integer part. + */ + uint divisor_precision_increment() const + { + return type_handler()->Item_divisor_precision_increment(this); + } + /** + TIME or DATETIME precision of the item: 0..6 + */ + uint time_precision(THD *thd) + { + return const_item() ? type_handler()->Item_time_precision(thd, this) : + MY_MIN(decimals, TIME_SECOND_PART_DIGITS); + } + uint datetime_precision(THD *thd) + { + return const_item() ? type_handler()->Item_datetime_precision(thd, this) : + MY_MIN(decimals, TIME_SECOND_PART_DIGITS); + } + virtual longlong val_int_min() const + { + return LONGLONG_MIN; + } + /* + Returns true if this is constant (during query execution, i.e. its value + will not change until next fix_fields) and its value is known. + */ + virtual bool const_item() const { return used_tables() == 0; } + /* + Returns true if this is constant but its value may be not known yet. + (Can be used for parameters of prep. stmts or of stored procedures.) + */ + virtual bool const_during_execution() const + { return (used_tables() & ~PARAM_TABLE_BIT) == 0; } + + /** + This method is used for to: + - to generate a view definition query (SELECT-statement); + - to generate a SQL-query for EXPLAIN EXTENDED; + - to generate a SQL-query to be shown in INFORMATION_SCHEMA; + - debug. + + For more information about view definition query, INFORMATION_SCHEMA + query and why they should be generated from the Item-tree, @see + mysql_register_view(). + */ + virtual enum precedence precedence() const { return DEFAULT_PRECEDENCE; } + enum precedence higher_precedence() const + { return (enum precedence)(precedence() + 1); } + void print_parenthesised(String *str, enum_query_type query_type, + enum precedence parent_prec); + /** + This helper is used to print expressions as a part of a table definition, + in particular for + - generated columns + - check constraints + - default value expressions + - partitioning expressions + */ + void print_for_table_def(String *str) + { + print_parenthesised(str, + (enum_query_type)(QT_ITEM_ORIGINAL_FUNC_NULLIF | + QT_ITEM_IDENT_SKIP_DB_NAMES | + QT_ITEM_IDENT_SKIP_TABLE_NAMES | + QT_NO_DATA_EXPANSION | + QT_TO_SYSTEM_CHARSET), + LOWEST_PRECEDENCE); + } + virtual void print(String *str, enum_query_type query_type); + class Print: public String + { + public: + Print(Item *item, enum_query_type type) + { + item->print(this, type); + } + }; + + void print_item_w_name(String *str, enum_query_type query_type); + void print_value(String *str); + + virtual void update_used_tables() {} + virtual COND *build_equal_items(THD *thd, COND_EQUAL *inheited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) + { + update_used_tables(); + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + return this; + } + virtual COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level); + virtual void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, + table_map usable_tables, + SARGABLE_PARAM **sargables) + { + return; + } + /* + Make a select tree for all keys in a condition or a condition part + @param param Context + @param cond_ptr[OUT] Store a replacement item here if the condition + can be simplified, e.g.: + WHERE part1 OR part2 OR part3 + with one of the partN evaluating to SEL_TREE::ALWAYS. + */ + virtual SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); + /* + Checks whether the item is: + - a simple equality (field=field_item or field=constant_item), or + - a row equality + and form multiple equality predicates. + */ + virtual bool check_equality(THD *thd, COND_EQUAL *cond, List *eq_list) + { + return false; + } + virtual void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) {} + /* Called for items that really have to be split */ + void split_sum_func2(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, + Item **ref, uint flags); + virtual bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate)= 0; + bool get_date_from_int(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate); + bool get_date_from_real(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate); + bool get_date_from_string(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate); + bool get_time(THD *thd, MYSQL_TIME *ltime) + { return get_date(thd, ltime, Time::Options(thd)); } + // Get a DATE or DATETIME value in numeric packed format for comparison + virtual longlong val_datetime_packed(THD *thd) + { + return Datetime(thd, this, Datetime::Options_cmp(thd)).to_packed(); + } + // Get a TIME value in numeric packed format for comparison + virtual longlong val_time_packed(THD *thd) + { + return Time(thd, this, Time::Options_cmp(thd)).to_packed(); + } + longlong val_datetime_packed_result(THD *thd); + longlong val_time_packed_result(THD *thd); + + virtual bool get_date_result(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { return get_date(thd, ltime,fuzzydate); } + + /* + The method allows to determine nullness of a complex expression + without fully evaluating it, instead of calling val/result*() then + checking null_value. Used in Item_func_isnull/Item_func_isnotnull + and Item_sum_count. + Any new item which can be NULL must implement this method. + */ + virtual bool is_null() { return 0; } + + /* + Make sure the null_value member has a correct value. + */ + virtual void update_null_value () + { + return type_handler()->Item_update_null_value(this); + } + /* + return IN/ALL/ANY subquery or NULL + */ + virtual Item_in_subselect* get_IN_subquery() + { return NULL; /* in is not IN/ALL/ANY */ } + /* + set field of temporary table for Item which can be switched on temporary + table during query processing (grouping and so on) + */ + virtual bool is_result_field() { return 0; } + virtual bool is_bool_literal() const { return false; } + /* This is to handle printing of default values */ + virtual bool need_parentheses_in_default() { return false; } + virtual void save_in_result_field(bool no_conversions) {} + /* + Data type format implied by the CHECK CONSTRAINT, + to be sent to the client in the result set metadata. + */ + virtual bool set_format_by_check_constraint(Send_field_extended_metadata *) + const + { + return false; + } + /* + set value of aggregate function in case of no rows for grouping were found + */ + virtual void no_rows_in_result() {} + virtual void restore_to_before_no_rows_in_result() {} + virtual Item *copy_or_same(THD *thd) { return this; } + virtual Item *copy_andor_structure(THD *thd) { return this; } + virtual Item *real_item() { return this; } + const Item *real_item() const { return const_cast(this)->real_item(); } + virtual Item *get_tmp_table_item(THD *thd) { return copy_or_same(thd); } + virtual Item *make_odbc_literal(THD *thd, const LEX_CSTRING *typestr) + { + return this; + } + + static CHARSET_INFO *default_charset(); + + CHARSET_INFO *charset_for_protocol(void) const + { + return type_handler()->charset_for_protocol(this); + }; + + virtual bool walk(Item_processor processor, bool walk_subquery, void *arg) + { + return (this->*processor)(arg); + } + + virtual Item* transform(THD *thd, Item_transformer transformer, uchar *arg); + virtual Item* top_level_transform(THD *thd, Item_transformer transformer, + uchar *arg) + { + return transform(thd, transformer, arg); + } + + /* + This function performs a generic "compilation" of the Item tree. + The process of compilation is assumed to go as follows: + + compile() + { + if (this->*some_analyzer(...)) + { + compile children if any; + this->*some_transformer(...); + } + } + + i.e. analysis is performed top-down while transformation is done + bottom-up. + */ + virtual Item* compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) + { + if ((this->*analyzer) (arg_p)) + return ((this->*transformer) (thd, arg_t)); + return 0; + } + virtual Item* top_level_compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) + { + return compile(thd, analyzer, arg_p, transformer, arg_t); + } + + virtual void traverse_cond(Cond_traverser traverser, + void *arg, traverse_order order) + { + (*traverser)(this, arg); + } + + /*========= Item processors, to be used with Item::walk() ========*/ + virtual bool remove_dependence_processor(void *arg) { return 0; } + virtual bool cleanup_processor(void *arg); + virtual bool cleanup_excluding_fields_processor (void *arg) + { return cleanup_processor(arg); } + bool cleanup_excluding_immutables_processor (void *arg); + virtual bool cleanup_excluding_const_fields_processor (void *arg) + { return cleanup_processor(arg); } + virtual bool collect_item_field_processor(void *arg) { return 0; } + virtual bool unknown_splocal_processor(void *arg) { return 0; } + virtual bool collect_outer_ref_processor(void *arg) {return 0; } + virtual bool check_inner_refs_processor(void *arg) { return 0; } + virtual bool find_item_in_field_list_processor(void *arg) { return 0; } + virtual bool find_item_processor(void *arg); + virtual bool change_context_processor(void *arg) { return 0; } + virtual bool reset_query_id_processor(void *arg) { return 0; } + virtual bool is_expensive_processor(void *arg) { return 0; } + + // FIXME reduce the number of "add field to bitmap" processors + virtual bool add_field_to_set_processor(void *arg) { return 0; } + virtual bool register_field_in_read_map(void *arg) { return 0; } + virtual bool register_field_in_write_map(void *arg) { return 0; } + virtual bool register_field_in_bitmap(void *arg) { return 0; } + virtual bool update_table_bitmaps_processor(void *arg) { return 0; } + + virtual bool enumerate_field_refs_processor(void *arg) { return 0; } + virtual bool mark_as_eliminated_processor(void *arg) { return 0; } + virtual bool eliminate_subselect_processor(void *arg) { return 0; } + virtual bool view_used_tables_processor(void *arg) { return 0; } + virtual bool eval_not_null_tables(void *arg) { return 0; } + virtual bool is_subquery_processor(void *arg) { return 0; } + virtual bool count_sargable_conds(void *arg) { return 0; } + virtual bool limit_index_condition_pushdown_processor(void *arg) { return 0; } + virtual bool exists2in_processor(void *arg) { return 0; } + virtual bool find_selective_predicates_list_processor(void *arg) { return 0; } + virtual bool cleanup_is_expensive_cache_processor(void *arg) + { + is_expensive_cache= (int8)(-1); + return 0; + } + + virtual bool set_extraction_flag_processor(void *arg) + { + set_extraction_flag(*(int16*)arg); + return 0; + } + + /* + TRUE if the expression depends only on the table indicated by tab_map + or can be converted to such an exression using equalities. + Not to be used for AND/OR formulas. + */ + virtual bool excl_dep_on_table(table_map tab_map) { return false; } + /* + TRUE if the expression depends only on grouping fields of sel + or can be converted to such an expression using equalities. + It also checks if the expression doesn't contain stored procedures, + subqueries or randomly generated elements. + Not to be used for AND/OR formulas. + */ + virtual bool excl_dep_on_grouping_fields(st_select_lex *sel) + { return false; } + /* + TRUE if the expression depends only on fields from the left part of + IN subquery or can be converted to such an expression using equalities. + Not to be used for AND/OR formulas. + */ + virtual bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) + { return false; } + + virtual bool switch_to_nullable_fields_processor(void *arg) { return 0; } + virtual bool find_function_processor (void *arg) { return 0; } + /* + Check if a partition function is allowed + SYNOPSIS + check_partition_func_processor() + int_arg Ignored + RETURN VALUE + TRUE Partition function not accepted + FALSE Partition function accepted + + DESCRIPTION + check_partition_func_processor is used to check if a partition function + uses an allowed function. An allowed function will always ensure that + X=Y guarantees that also part_function(X)=part_function(Y) where X is + a set of partition fields and so is Y. The problems comes mainly from + character sets where two equal strings can be quite unequal. E.g. the + german character for double s is equal to 2 s. + + The default is that an item is not allowed + in a partition function. Allowed functions + can never depend on server version, they cannot depend on anything + related to the environment. They can also only depend on a set of + fields in the table itself. They cannot depend on other tables and + cannot contain any queries and cannot contain udf's or similar. + If a new Item class is defined and it inherits from a class that is + allowed in a partition function then it is very important to consider + whether this should be inherited to the new class. If not the function + below should be defined in the new Item class. + + The general behaviour is that most integer functions are allowed. + If the partition function contains any multi-byte collations then + the function check_part_func_fields will report an error on the + partition function independent of what functions are used. So the + only character sets allowed are single character collation and + even for those only a limited set of functions are allowed. The + problem with multi-byte collations is that almost every string + function has the ability to change things such that two strings + that are equal will not be equal after manipulated by a string + function. E.g. two strings one contains a double s, there is a + special german character that is equal to two s. Now assume a + string function removes one character at this place, then in + one the double s will be removed and in the other there will + still be one s remaining and the strings are no longer equal + and thus the partition function will not sort equal strings into + the same partitions. + + So the check if a partition function is valid is two steps. First + check that the field types are valid, next check that the partition + function is valid. The current set of partition functions valid + assumes that there are no multi-byte collations amongst the partition + fields. + */ + virtual bool check_partition_func_processor(void *arg) { return true; } + virtual bool post_fix_fields_part_expr_processor(void *arg) { return 0; } + virtual bool rename_fields_processor(void *arg) { return 0; } + /* + TRUE if the function is knowingly TRUE or FALSE. + Not to be used for AND/OR formulas. + */ + virtual bool is_simplified_cond_processor(void *arg) { return false; } + + /** Processor used to check acceptability of an item in the defining + expression for a virtual column + + @param arg always ignored + + @retval 0 the item is accepted in the definition of a virtual column + @retval 1 otherwise + */ + struct vcol_func_processor_result + { + uint errors; /* Bits of possible errors */ + const char *name; /* Not supported function */ + Alter_info *alter_info; + vcol_func_processor_result() : + errors(0), name(NULL), alter_info(NULL) {} + }; + struct func_processor_rename + { + LEX_CSTRING db_name; + LEX_CSTRING table_name; + List fields; + }; + virtual bool check_vcol_func_processor(void *arg) + { + return mark_unsupported_function(full_name(), arg, VCOL_IMPOSSIBLE); + } + virtual bool check_handler_func_processor(void *arg) { return 0; } + virtual bool check_field_expression_processor(void *arg) { return 0; } + virtual bool check_func_default_processor(void *arg) { return 0; } + virtual bool update_func_default_processor(void *arg) { return 0; } + /* + Check if an expression value has allowed arguments, like DATE/DATETIME + for date functions. Also used by partitioning code to reject + timezone-dependent expressions in a (sub)partitioning function. + */ + virtual bool check_valid_arguments_processor(void *arg) { return 0; } + virtual bool update_vcol_processor(void *arg) { return 0; } + virtual bool set_fields_as_dependent_processor(void *arg) { return 0; } + /* + Find if some of the key parts of table keys (the reference on table is + passed as an argument) participate in the expression. + If there is some, sets a bit for this key in the proper key map. + */ + virtual bool check_index_dependence(void *arg) { return 0; } + /*============== End of Item processor list ======================*/ + + /* + Given a condition P from the WHERE clause or from an ON expression of + the processed SELECT S and a set of join tables from S marked in the + parameter 'allowed'={T} a call of P->find_not_null_fields({T}) has to + find the set fields {F} of the tables from 'allowed' such that: + - each field from {F} is declared as nullable + - each record of table t from {T} that contains NULL as the value for at + at least one field from {F} can be ignored when building the result set + for S + It is assumed here that the condition P is conjunctive and all its column + references belong to T. + + Examples: + CREATE TABLE t1 (a int, b int); + CREATE TABLE t2 (a int, b int); + + SELECT * FROM t1,t2 WHERE t1.a=t2.a and t1.b > 5; + A call of find_not_null_fields() for the whole WHERE condition and {t1,t2} + should find {t1.a,t1.b,t2.a} + + SELECT * FROM t1 LEFT JOIN ON (t1.a=t2.a and t2.a > t2.b); + A call of find_not_null_fields() for the ON expression and {t2} + should find {t2.a,t2.b} + + The function returns TRUE if it succeeds to prove that all records of + a table from {T} can be ignored. Otherwise it always returns FALSE. + + Example: + SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t2.a IS NULL; + A call of find_not_null_fields() for the WHERE condition and {t1,t2} + will return TRUE. + + It is assumed that the implementation of this virtual function saves + the info on the found set of fields in the structures associates with + tables from {T}. + */ + virtual bool find_not_null_fields(table_map allowed) { return false; } + + /* + Does not guarantee deep copy (depends on copy ctor). + See build_clone() for deep copy. + */ + virtual Item *get_copy(THD *thd)=0; + + bool cache_const_expr_analyzer(uchar **arg); + Item* cache_const_expr_transformer(THD *thd, uchar *arg); + + virtual Item* propagate_equal_fields(THD*, const Context &, COND_EQUAL *) + { + return this; + }; + + Item* propagate_equal_fields_and_change_item_tree(THD *thd, + const Context &ctx, + COND_EQUAL *cond, + Item **place); + + /* arg points to REPLACE_EQUAL_FIELD_ARG object */ + virtual Item *replace_equal_field(THD *thd, uchar *arg) { return this; } + + struct Collect_deps_prm + { + List *parameters; + /* unit from which we count nest_level */ + st_select_lex_unit *nest_level_base; + uint count; + int nest_level; + bool collect; + }; + + /* + For SP local variable returns pointer to Item representing its + current value and pointer to current Item otherwise. + */ + virtual Item *this_item() { return this; } + virtual const Item *this_item() const { return this; } + + /* + For SP local variable returns address of pointer to Item representing its + current value and pointer passed via parameter otherwise. + */ + virtual Item **this_item_addr(THD *thd, Item **addr_arg) { return addr_arg; } + + // Row emulation + virtual uint cols() const { return 1; } + virtual Item* element_index(uint i) { return this; } + virtual Item** addr(uint i) { return 0; } + virtual bool check_cols(uint c); + bool check_type_traditional_scalar(const LEX_CSTRING &opname) const; + bool check_type_scalar(const LEX_CSTRING &opname) const; + bool check_type_or_binary(const LEX_CSTRING &opname, + const Type_handler *handler) const; + bool check_type_general_purpose_string(const LEX_CSTRING &opname) const; + bool check_type_can_return_int(const LEX_CSTRING &opname) const; + bool check_type_can_return_decimal(const LEX_CSTRING &opname) const; + bool check_type_can_return_real(const LEX_CSTRING &opname) const; + bool check_type_can_return_str(const LEX_CSTRING &opname) const; + bool check_type_can_return_text(const LEX_CSTRING &opname) const; + bool check_type_can_return_date(const LEX_CSTRING &opname) const; + bool check_type_can_return_time(const LEX_CSTRING &opname) const; + // It is not row => null inside is impossible + virtual bool null_inside() { return 0; } + // used in row subselects to get value of elements + virtual void bring_value() {} + + const Type_handler *type_handler_long_or_longlong() const + { + return Type_handler::type_handler_long_or_longlong(max_char_length(), + unsigned_flag); + } + + /** + Create field for temporary table. + @param table Temporary table + @param [OUT] src Who created the fields + @param param Create parameters + @retval NULL (on error) + @retval a pointer to a newly create Field (on success) + */ + virtual Field *create_tmp_field_ex(MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param)= 0; + virtual Item_field *field_for_view_update() { return 0; } + + virtual Item *neg_transformer(THD *thd) { return NULL; } + virtual Item *update_value_transformer(THD *thd, uchar *select_arg) + { return this; } + virtual Item *expr_cache_insert_transformer(THD *thd, uchar *unused) + { return this; } + virtual Item *derived_field_transformer_for_having(THD *thd, uchar *arg) + { return this; } + virtual Item *derived_field_transformer_for_where(THD *thd, uchar *arg) + { return this; } + virtual Item *grouping_field_transformer_for_where(THD *thd, uchar *arg) + { return this; } + /* Now is not used. */ + virtual Item *in_subq_field_transformer_for_where(THD *thd, uchar *arg) + { return this; } + virtual Item *in_subq_field_transformer_for_having(THD *thd, uchar *arg) + { return this; } + virtual Item *in_predicate_to_in_subs_transformer(THD *thd, uchar *arg) + { return this; } + virtual Item *in_predicate_to_equality_transformer(THD *thd, uchar *arg) + { return this; } + virtual Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) + { return this; } + virtual Item *multiple_equality_transformer(THD *thd, uchar *arg) + { return this; } + virtual bool expr_cache_is_needed(THD *) { return FALSE; } + virtual Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs); + bool needs_charset_converter(uint32 length, CHARSET_INFO *tocs) const + { + /* + This will return "true" if conversion happens: + - between two non-binary different character sets + - from "binary" to "unsafe" character set + (those that can have non-well-formed string) + - from "binary" to UCS2-alike character set with mbminlen>1, + when prefix left-padding is needed for an incomplete character: + binary 0xFF -> ucs2 0x00FF) + */ + if (!String::needs_conversion_on_storage(length, + collation.collation, tocs)) + return false; + /* + No needs to add converter if an "arg" is NUMERIC or DATETIME + value (which is pure ASCII) and at the same time target DTCollation + is ASCII-compatible. For example, no needs to rewrite: + SELECT * FROM t1 WHERE datetime_field = '2010-01-01'; + to + SELECT * FROM t1 WHERE CONVERT(datetime_field USING cs) = '2010-01-01'; + + TODO: avoid conversion of any values with + repertoire ASCII and 7bit-ASCII-compatible, + not only numeric/datetime origin. + */ + if (collation.derivation == DERIVATION_NUMERIC && + collation.repertoire == MY_REPERTOIRE_ASCII && + !(collation.collation->state & MY_CS_NONASCII) && + !(tocs->state & MY_CS_NONASCII)) + return false; + return true; + } + bool needs_charset_converter(CHARSET_INFO *tocs) + { + // Pass 1 as length to force conversion if tocs->mbminlen>1. + return needs_charset_converter(1, tocs); + } + Item *const_charset_converter(THD *thd, CHARSET_INFO *tocs, bool lossless, + const char *func_name); + Item *const_charset_converter(THD *thd, CHARSET_INFO *tocs, bool lossless) + { return const_charset_converter(thd, tocs, lossless, NULL); } + void delete_self() + { + cleanup(); + delete this; + } + + virtual const Item_const *get_item_const() const { return NULL; } + virtual Item_splocal *get_item_splocal() { return 0; } + virtual Rewritable_query_parameter *get_rewritable_query_parameter() + { return 0; } + + /* + Return Settable_routine_parameter interface of the Item. Return 0 + if this Item is not Settable_routine_parameter. + */ + virtual Settable_routine_parameter *get_settable_routine_parameter() + { + return 0; + } + + virtual Load_data_outvar *get_load_data_outvar() + { + return 0; + } + Load_data_outvar *get_load_data_outvar_or_error() + { + Load_data_outvar *dst= get_load_data_outvar(); + if (dst) + return dst; + my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), name.str); + return NULL; + } + + /** + Test whether an expression is expensive to compute. Used during + optimization to avoid computing expensive expressions during this + phase. Also used to force temp tables when sorting on expensive + functions. + @todo + Normally we should have a method: + cost Item::execution_cost(), + where 'cost' is either 'double' or some structure of various cost + parameters. + + @note + This function is now used to prevent evaluation of expensive subquery + predicates during the optimization phase. It also prevents evaluation + of predicates that are not computable at this moment. + */ + virtual bool is_expensive() + { + if (is_expensive_cache < 0) + is_expensive_cache= walk(&Item::is_expensive_processor, 0, NULL); + return MY_TEST(is_expensive_cache); + } + String *check_well_formed_result(String *str, bool send_error= 0); + bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); + bool too_big_for_varchar() const + { return max_char_length() > CONVERT_IF_BIGGER_TO_BLOB; } + void fix_length_and_charset(uint32 max_char_length_arg, CHARSET_INFO *cs) + { + max_length= char_to_byte_length_safe(max_char_length_arg, cs->mbmaxlen); + collation.collation= cs; + } + void fix_char_length(size_t max_char_length_arg) + { + max_length= char_to_byte_length_safe(max_char_length_arg, + collation.collation->mbmaxlen); + } + /* + Return TRUE if the item points to a column of an outer-joined table. + */ + virtual bool is_outer_field() const { DBUG_ASSERT(fixed()); return FALSE; } + + Item* set_expr_cache(THD *thd); + + virtual Item_equal *get_item_equal() { return NULL; } + virtual void set_item_equal(Item_equal *item_eq) {}; + virtual Item_equal *find_item_equal(COND_EQUAL *cond_equal) { return NULL; } + /** + Set the join tab index to the minimal (left-most) JOIN_TAB to which this + Item is attached. The number is an index is depth_first_tab() traversal + order. + */ + virtual void set_join_tab_idx(uint8 join_tab_idx_arg) + { + if (join_tab_idx_arg < join_tab_idx) + join_tab_idx= join_tab_idx_arg; + } + uint get_join_tab_idx() const { return join_tab_idx; } + + table_map view_used_tables(TABLE_LIST *view) + { + view->view_used_tables= 0; + walk(&Item::view_used_tables_processor, 0, view); + return view->view_used_tables; + } + + /** + Collect and add to the list cache parameters for this Item. + + @note Now implemented only for subqueries and in_optimizer, + if we need it for general function then this method should + be defined for Item_func. + */ + virtual void get_cache_parameters(List ¶meters) { }; + + virtual void mark_as_condition_AND_part(TABLE_LIST *embedding) {}; + + /* how much position should be reserved for Exists2In transformation */ + virtual uint exists2in_reserved_items() { return 0; }; + + virtual Item *neg(THD *thd); + + /** + Inform the item that it is located under a NOT, which is a top-level item. + */ + virtual void under_not(Item_func_not * upper + __attribute__((unused))) {}; + /* + If Item_field is wrapped in Item_direct_wrep remove this Item_direct_ref + wrapper. + */ + virtual Item *remove_item_direct_ref() { return this; } + + + void register_in(THD *thd); + + bool depends_only_on(table_map view_map) + { return get_extraction_flag() & MARKER_FULL_EXTRACTION; } + int get_extraction_flag() const + { + if (basic_const_item()) + return MARKER_FULL_EXTRACTION; + else + return marker & MARKER_EXTRACTION_MASK; + } + void set_extraction_flag(int16 flags) + { + if (!basic_const_item()) + { + marker= marker & ~MARKER_EXTRACTION_MASK; + marker|= flags; + } + } + void clear_extraction_flag() + { + if (!basic_const_item()) + marker= marker & ~MARKER_EXTRACTION_MASK; + } + void check_pushable_cond(Pushdown_checker excl_dep_func, uchar *arg); + bool pushable_cond_checker_for_derived(uchar *arg) + { + return excl_dep_on_table(*((table_map *)arg)); + } + bool pushable_cond_checker_for_subquery(uchar *arg) + { + DBUG_ASSERT(((Item*) arg)->get_IN_subquery()); + return excl_dep_on_in_subq_left_part(((Item*)arg)->get_IN_subquery()); + } + Item *build_pushable_cond(THD *thd, + Pushdown_checker checker, + uchar *arg); + /* + Checks if this item depends only on the arg table + */ + bool pushable_equality_checker_for_derived(uchar *arg) + { + return (used_tables() == *((table_map *)arg)); + } + /* + Checks if this item consists in the left part of arg IN subquery predicate + */ + bool pushable_equality_checker_for_subquery(uchar *arg); +}; + +MEM_ROOT *get_thd_memroot(THD *thd); + +template +inline Item* get_item_copy (THD *thd, T* item) +{ + Item *copy= new (get_thd_memroot(thd)) T(*item); + if (likely(copy)) + copy->register_in(thd); + return copy; +} + + +#ifndef DBUG_OFF +/** + A helper class to print the data type and the value for an Item + in debug builds. +*/ +class DbugStringItemTypeValue: public StringBuffer<128> +{ +public: + DbugStringItemTypeValue(THD *thd, const Item *item) + { + append('('); + Name Item_name= item->type_handler()->name(); + append(Item_name.ptr(), Item_name.length()); + append(')'); + const_cast(item)->print(this, QT_EXPLAIN); + /* Append end \0 to allow usage of c_ptr() */ + append('\0'); + str_length--; + } +}; +#endif /* DBUG_OFF */ + + +/** + Compare two Items for List::add_unique() +*/ + +bool cmp_items(Item *a, Item *b); + + +/** + Array of items, e.g. function or aggerate function arguments. +*/ +class Item_args +{ +protected: + Item **args, *tmp_arg[2]; + uint arg_count; + void set_arguments(THD *thd, List &list); + bool walk_args(Item_processor processor, bool walk_subquery, void *arg) + { + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->walk(processor, walk_subquery, arg)) + return true; + } + return false; + } + bool transform_args(THD *thd, Item_transformer transformer, uchar *arg); + void propagate_equal_fields(THD *, const Item::Context &, COND_EQUAL *); + bool excl_dep_on_table(table_map tab_map) + { + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->const_item()) + continue; + if (!args[i]->excl_dep_on_table(tab_map)) + return false; + } + return true; + } + bool excl_dep_on_grouping_fields(st_select_lex *sel); + bool eq(const Item_args *other, bool binary_cmp) const + { + for (uint i= 0; i < arg_count ; i++) + { + if (!args[i]->eq(other->args[i], binary_cmp)) + return false; + } + return true; + } + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) + { + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->const_item()) + continue; + if (!args[i]->excl_dep_on_in_subq_left_part(subq_pred)) + return false; + } + return true; + } +public: + Item_args(void) + :args(NULL), arg_count(0) + { } + Item_args(Item *a) + :args(tmp_arg), arg_count(1) + { + args[0]= a; + } + Item_args(Item *a, Item *b) + :args(tmp_arg), arg_count(2) + { + args[0]= a; args[1]= b; + } + Item_args(THD *thd, Item *a, Item *b, Item *c) + { + arg_count= 0; + if (likely((args= (Item**) thd_alloc(thd, sizeof(Item*) * 3)))) + { + arg_count= 3; + args[0]= a; args[1]= b; args[2]= c; + } + } + Item_args(THD *thd, Item *a, Item *b, Item *c, Item *d) + { + arg_count= 0; + if (likely((args= (Item**) thd_alloc(thd, sizeof(Item*) * 4)))) + { + arg_count= 4; + args[0]= a; args[1]= b; args[2]= c; args[3]= d; + } + } + Item_args(THD *thd, Item *a, Item *b, Item *c, Item *d, Item* e) + { + arg_count= 5; + if (likely((args= (Item**) thd_alloc(thd, sizeof(Item*) * 5)))) + { + arg_count= 5; + args[0]= a; args[1]= b; args[2]= c; args[3]= d; args[4]= e; + } + } + Item_args(THD *thd, List &list) + { + set_arguments(thd, list); + } + Item_args(THD *thd, const Item_args *other); + bool alloc_arguments(THD *thd, uint count); + void add_argument(Item *item) + { + args[arg_count++]= item; + } + /** + Extract row elements from the given position. + For example, for this input: (1,2),(3,4),(5,6) + pos=0 will extract (1,3,5) + pos=1 will extract (2,4,6) + @param thd - current thread, to allocate memory on its mem_root + @param rows - an array of compatible ROW-type items + @param pos - the element position to extract + */ + bool alloc_and_extract_row_elements(THD *thd, const Item_args *rows, uint pos) + { + DBUG_ASSERT(rows->argument_count() > 0); + DBUG_ASSERT(rows->arguments()[0]->cols() > pos); + if (alloc_arguments(thd, rows->argument_count())) + return true; + for (uint i= 0; i < rows->argument_count(); i++) + { + DBUG_ASSERT(rows->arguments()[0]->cols() == rows->arguments()[i]->cols()); + Item *arg= rows->arguments()[i]->element_index(pos); + add_argument(arg); + } + DBUG_ASSERT(argument_count() == rows->argument_count()); + return false; + } + inline Item **arguments() const { return args; } + inline uint argument_count() const { return arg_count; } + inline void remove_arguments() { arg_count=0; } + Sql_mode_dependency value_depends_on_sql_mode_bit_or() const; +}; + + +/* + Class to be used to enumerate all field references in an item tree. This + includes references to outside but not fields of the tables within a + subquery. + Suggested usage: + + class My_enumerator : public Field_enumerator + { + virtual void visit_field() { ... your actions ...} + } + + My_enumerator enumerator; + item->walk(Item::enumerate_field_refs_processor, ...,&enumerator); + + This is similar to Visitor pattern. +*/ + +class Field_enumerator +{ +public: + virtual void visit_field(Item_field *field)= 0; + virtual ~Field_enumerator() = default;; /* purecov: inspected */ + Field_enumerator() = default; /* Remove gcc warning */ +}; + +class Item_string; + + +class Item_fixed_hybrid: public Item +{ +public: + Item_fixed_hybrid(THD *thd): Item(thd) + { + base_flags&= ~item_base_t::FIXED; + } + Item_fixed_hybrid(THD *thd, Item_fixed_hybrid *item) + :Item(thd, item) + { + base_flags|= (item->base_flags & item_base_t::FIXED); + } + bool fix_fields(THD *thd, Item **ref) override + { + DBUG_ASSERT(!fixed()); + base_flags|= item_base_t::FIXED; + return false; + } + void cleanup() override + { + Item::cleanup(); + base_flags&= ~item_base_t::FIXED; + } + void quick_fix_field() override + { base_flags|= item_base_t::FIXED; } + void unfix_fields() override + { base_flags&= ~item_base_t::FIXED; } +}; + + +/** + A common class for Item_basic_constant and Item_param +*/ +class Item_basic_value :public Item, + public Item_const +{ +protected: + // Value metadata, e.g. to make string processing easier + class Metadata: private MY_STRING_METADATA + { + public: + Metadata(const String *str) + { + my_string_metadata_get(this, str->charset(), str->ptr(), str->length()); + } + Metadata(const String *str, my_repertoire_t repertoire_arg) + { + MY_STRING_METADATA::repertoire= repertoire_arg; + MY_STRING_METADATA::char_length= str->numchars(); + } + my_repertoire_t repertoire() const + { + return MY_STRING_METADATA::repertoire; + } + size_t char_length() const { return MY_STRING_METADATA::char_length; } + }; + void fix_charset_and_length(CHARSET_INFO *cs, + Derivation dv, Metadata metadata) + { + /* + We have to have a different max_length than 'length' here to + ensure that we get the right length if we do use the item + to create a new table. In this case max_length must be the maximum + number of chars for a string of this type because we in Create_field:: + divide the max_length with mbmaxlen). + */ + collation.set(cs, dv, metadata.repertoire()); + fix_char_length(metadata.char_length()); + decimals= NOT_FIXED_DEC; + } + void fix_charset_and_length_from_str_value(const String &str, Derivation dv) + { + fix_charset_and_length(str.charset(), dv, Metadata(&str)); + } + Item_basic_value(THD *thd): Item(thd) {} + Item_basic_value(): Item() {} +public: + Field *create_tmp_field_ex(MEM_ROOT *root, + TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + + /* + create_tmp_field_ex() for this type of Items is called for: + - CREATE TABLE ... SELECT + - In ORDER BY: SELECT max(a) FROM t1 GROUP BY a ORDER BY 'const'; + - In CURSORS: + DECLARE c CURSOR FOR SELECT 'test'; + OPEN c; + */ + return tmp_table_field_from_field_type_maybe_null(root, + table, src, param, + type() == Item::NULL_ITEM); + } + bool eq(const Item *item, bool binary_cmp) const override; + const Type_all_attributes *get_type_all_attributes_from_const() const + override + { return this; } +}; + + +class Item_basic_constant :public Item_basic_value +{ +public: + Item_basic_constant(THD *thd): Item_basic_value(thd) {}; + Item_basic_constant(): Item_basic_value() {}; + bool check_vcol_func_processor(void *arg) { return false; } + const Item_const *get_item_const() const { return this; } + virtual Item_basic_constant *make_string_literal_concat(THD *thd, + const LEX_CSTRING *) + { + DBUG_ASSERT(0); + return this; + } +}; + + +/***************************************************************************** + The class is a base class for representation of stored routine variables in + the Item-hierarchy. There are the following kinds of SP-vars: + - local variables (Item_splocal); + - CASE expression (Item_case_expr); +*****************************************************************************/ + +class Item_sp_variable :public Item_fixed_hybrid +{ +protected: + /* + THD, which is stored in fix_fields() and is used in this_item() to avoid + current_thd use. + */ + THD *m_thd; + + bool fix_fields_from_item(THD *thd, Item **, const Item *); +public: + LEX_CSTRING m_name; + +public: +#ifdef DBUG_ASSERT_EXISTS + /* + Routine to which this Item_splocal belongs. Used for checking if correct + runtime context is used for variable handling. + */ + const sp_head *m_sp; +#endif + +public: + Item_sp_variable(THD *thd, const LEX_CSTRING *sp_var_name); + +public: + bool fix_fields(THD *thd, Item **) override= 0; + double val_real() override; + longlong val_int() override; + String *val_str(String *sp) override; + my_decimal *val_decimal(my_decimal *decimal_value) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *to) override; + bool is_null() override; + +public: + void make_send_field(THD *thd, Send_field *field) override; + bool const_item() const override { return true; } + Field *create_tmp_field_ex(MEM_ROOT *root, + TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + return create_tmp_field_ex_simple(root, table, src, param); + } + inline int save_in_field(Field *field, bool no_conversions) override; + inline bool send(Protocol *protocol, st_value *buffer) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(m_name.str, arg, VCOL_IMPOSSIBLE); + } +}; + +/***************************************************************************** + Item_sp_variable inline implementation. +*****************************************************************************/ + +inline int Item_sp_variable::save_in_field(Field *field, bool no_conversions) +{ + return this_item()->save_in_field(field, no_conversions); +} + +inline bool Item_sp_variable::send(Protocol *protocol, st_value *buffer) +{ + return this_item()->send(protocol, buffer); +} + + +/***************************************************************************** + A reference to local SP variable (incl. reference to SP parameter), used in + runtime. +*****************************************************************************/ + +class Item_splocal :public Item_sp_variable, + private Settable_routine_parameter, + public Rewritable_query_parameter, + public Type_handler_hybrid_field_type +{ +protected: + const Sp_rcontext_handler *m_rcontext_handler; + + uint m_var_idx; + + Type m_type; + + bool append_value_for_log(THD *thd, String *str); + + sp_rcontext *get_rcontext(sp_rcontext *local_ctx) const; + Item_field *get_variable(sp_rcontext *ctx) const; + +public: + Item_splocal(THD *thd, const Sp_rcontext_handler *rh, + const LEX_CSTRING *sp_var_name, uint sp_var_idx, + const Type_handler *handler, + uint pos_in_q= 0, uint len_in_q= 0); + + bool fix_fields(THD *, Item **) override; + Item *this_item() override; + const Item *this_item() const override; + Item **this_item_addr(THD *thd, Item **) override; + + void print(String *str, enum_query_type query_type) override; + +public: + inline const LEX_CSTRING *my_name() const; + + inline uint get_var_idx() const; + + Type type() const override { return m_type; } + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + uint cols() const override { return this_item()->cols(); } + Item* element_index(uint i) override + { return this_item()->element_index(i); } + Item** addr(uint i) override { return this_item()->addr(i); } + bool check_cols(uint c) override; + +private: + bool set_value(THD *thd, sp_rcontext *ctx, Item **it) override; + +public: + Item_splocal *get_item_splocal() override { return this; } + + Rewritable_query_parameter *get_rewritable_query_parameter() override + { return this; } + + Settable_routine_parameter *get_settable_routine_parameter() override + { return this; } + + bool append_for_log(THD *thd, String *str) override; + + Item *get_copy(THD *) override { return nullptr; } + + /* + Override the inherited create_field_for_create_select(), + because we want to preserve the exact data type for: + DECLARE a1 INT; + DECLARE a2 TYPE OF t1.a2; + CREATE TABLE t1 AS SELECT a1, a2; + The inherited implementation would create a column + based on result_type(), which is less exact. + */ + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) override + { return create_table_field_from_handler(root, table); } + + bool is_valid_limit_clause_variable_with_error() const + { + /* + In case if the variable has an anchored data type, e.g.: + DECLARE a TYPE OF t1.a; + type_handler() is set to &type_handler_null and this + function detects such variable as not valid in LIMIT. + */ + if (type_handler()->is_limit_clause_valid_type()) + return true; + my_error(ER_WRONG_SPVAR_TYPE_IN_LIMIT, MYF(0)); + return false; + } +}; + + +/** + An Item_splocal variant whose data type becomes known only at + sp_rcontext creation time, e.g. "DECLARE var1 t1.col1%TYPE". +*/ +class Item_splocal_with_delayed_data_type: public Item_splocal +{ +public: + Item_splocal_with_delayed_data_type(THD *thd, + const Sp_rcontext_handler *rh, + const LEX_CSTRING *sp_var_name, + uint sp_var_idx, + uint pos_in_q, uint len_in_q) + :Item_splocal(thd, rh, sp_var_name, sp_var_idx, &type_handler_null, + pos_in_q, len_in_q) + { } +}; + + +/** + SP variables that are fields of a ROW. + DELCARE r ROW(a INT,b INT); + SELECT r.a; -- This is handled by Item_splocal_row_field +*/ +class Item_splocal_row_field :public Item_splocal +{ +protected: + LEX_CSTRING m_field_name; + uint m_field_idx; + bool set_value(THD *thd, sp_rcontext *ctx, Item **it) override; +public: + Item_splocal_row_field(THD *thd, + const Sp_rcontext_handler *rh, + const LEX_CSTRING *sp_var_name, + const LEX_CSTRING *sp_field_name, + uint sp_var_idx, uint sp_field_idx, + const Type_handler *handler, + uint pos_in_q= 0, uint len_in_q= 0) + :Item_splocal(thd, rh, sp_var_name, sp_var_idx, handler, pos_in_q, len_in_q), + m_field_name(*sp_field_name), + m_field_idx(sp_field_idx) + { } + bool fix_fields(THD *thd, Item **) override; + Item *this_item() override; + const Item *this_item() const override; + Item **this_item_addr(THD *thd, Item **) override; + bool append_for_log(THD *thd, String *str) override; + void print(String *str, enum_query_type query_type) override; +}; + + +class Item_splocal_row_field_by_name :public Item_splocal_row_field +{ + bool set_value(THD *thd, sp_rcontext *ctx, Item **it) override; +public: + Item_splocal_row_field_by_name(THD *thd, + const Sp_rcontext_handler *rh, + const LEX_CSTRING *sp_var_name, + const LEX_CSTRING *sp_field_name, + uint sp_var_idx, + const Type_handler *handler, + uint pos_in_q= 0, uint len_in_q= 0) + :Item_splocal_row_field(thd, rh, sp_var_name, sp_field_name, + sp_var_idx, 0 /* field index will be set later */, + handler, pos_in_q, len_in_q) + { } + bool fix_fields(THD *thd, Item **it) override; + void print(String *str, enum_query_type query_type) override; +}; + + +/***************************************************************************** + Item_splocal inline implementation. +*****************************************************************************/ + +inline const LEX_CSTRING *Item_splocal::my_name() const +{ + return &m_name; +} + +inline uint Item_splocal::get_var_idx() const +{ + return m_var_idx; +} + +/***************************************************************************** + A reference to case expression in SP, used in runtime. +*****************************************************************************/ + +class Item_case_expr :public Item_sp_variable +{ +public: + Item_case_expr(THD *thd, uint case_expr_id); + +public: + bool fix_fields(THD *thd, Item **) override; + Item *this_item() override; + const Item *this_item() const override; + Item **this_item_addr(THD *thd, Item **) override; + + Type type() const override; + const Type_handler *type_handler() const override + { return this_item()->type_handler(); } + +public: + /* + NOTE: print() is intended to be used from views and for debug. + Item_case_expr can not occur in views, so here it is only for debug + purposes. + */ + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *) override { return nullptr; } + +private: + uint m_case_expr_id; +}; + +/***************************************************************************** + Item_case_expr inline implementation. +*****************************************************************************/ + +inline enum Item::Type Item_case_expr::type() const +{ + return this_item()->type(); +} + +/* + NAME_CONST(given_name, const_value). + This 'function' has all properties of the supplied const_value (which is + assumed to be a literal constant), and the name given_name. + + This is used to replace references to SP variables when we write PROCEDURE + statements into the binary log. + + TODO + Together with Item_splocal and Item::this_item() we can actually extract + common a base of this class and Item_splocal. Maybe it is possible to + extract a common base with class Item_ref, too. +*/ + +class Item_name_const : public Item_fixed_hybrid +{ + Item *value_item; + Item *name_item; +public: + Item_name_const(THD *thd, Item *name_arg, Item *val); + + bool fix_fields(THD *, Item **) override; + + Type type() const override; + double val_real() override; + longlong val_int() override; + String *val_str(String *sp) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *to) override; + bool is_null() override; + void print(String *str, enum_query_type query_type) override; + + const Type_handler *type_handler() const override + { + return value_item->type_handler(); + } + + bool const_item() const override { return true; } + + Field *create_tmp_field_ex(MEM_ROOT *root, + TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + /* + We can get to here when using a CURSOR for a query with NAME_CONST(): + DECLARE c CURSOR FOR SELECT NAME_CONST('x','y') FROM t1; + OPEN c; + */ + return tmp_table_field_from_field_type_maybe_null(root, table, src, param, + type() == Item::NULL_ITEM); + } + int save_in_field(Field *field, bool no_conversions) override + { + return value_item->save_in_field(field, no_conversions); + } + + bool send(Protocol *protocol, st_value *buffer) override + { + return value_item->send(protocol, buffer); + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("name_const()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_literal: public Item_basic_constant +{ +public: + Item_literal(THD *thd): Item_basic_constant(thd) + { } + Item_literal(): Item_basic_constant() + {} + Type type() const override { return CONST_ITEM; } + bool check_partition_func_processor(void *int_arg) override { return false;} + bool const_item() const override { return true; } + bool basic_const_item() const override { return true; } + bool is_expensive() override { return false; } + bool cleanup_is_expensive_cache_processor(void *arg) override { return 0; } +}; + + +class Item_num: public Item_literal +{ +public: + Item_num(THD *thd): Item_literal(thd) { collation= DTCollation_numeric(); } + Item_num(): Item_literal() { collation= DTCollation_numeric(); } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } +}; + +#define NO_CACHED_FIELD_INDEX ((field_index_t) ~0U) + +class st_select_lex; + + +class Item_result_field :public Item_fixed_hybrid /* Item with result field */ +{ +protected: + Field *create_tmp_field_ex_from_handler(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param, + const Type_handler *h); +public: + Field *result_field; /* Save result here */ + Item_result_field(THD *thd): Item_fixed_hybrid(thd), result_field(0) {} + // Constructor used for Item_sum/Item_cond_and/or (see Item comment) + Item_result_field(THD *thd, Item_result_field *item): + Item_fixed_hybrid(thd, item), result_field(item->result_field) + {} + ~Item_result_field() = default; + Field *get_tmp_table_field() override { return result_field; } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + DBUG_ASSERT(fixed()); + const Type_handler *h= type_handler()->type_handler_for_tmp_table(this); + return create_tmp_field_ex_from_handler(root, table, src, param, h); + } + void get_tmp_field_src(Tmp_field_src *src, const Tmp_field_param *param); + /* + This implementation of used_tables() used by Item_avg_field and + Item_variance_field which work when only temporary table left, so theu + return table map of the temporary table. + */ + table_map used_tables() const override { return 1; } + bool is_result_field() override { return true; } + void save_in_result_field(bool no_conversions) override + { + save_in_field(result_field, no_conversions); + } + void cleanup() override; + bool check_vcol_func_processor(void *) override { return false; } +}; + + +class Item_ident :public Item_result_field +{ +protected: + /* + We have to store initial values of db_name, table_name and field_name + to be able to restore them during cleanup() because they can be + updated during fix_fields() to values from Field object and life-time + of those is shorter than life-time of Item_field. + */ + Lex_table_name orig_db_name; + Lex_table_name orig_table_name; + Lex_ident orig_field_name; + + void undeclared_spvar_error() const; + +public: + Name_resolution_context *context; + Lex_table_name db_name; + Lex_table_name table_name; + Lex_ident field_name; + /* + Cached pointer to table which contains this field, used for the same reason + by prep. stmt. too in case then we have not-fully qualified field. + 0 - means no cached value. + */ + TABLE_LIST *cached_table; + st_select_lex *depended_from; + /* + Cached value of index for this field in table->field array, used by prepared + stmts for speeding up their re-execution. Holds NO_CACHED_FIELD_INDEX + if index value is not known. + */ + field_index_t cached_field_index; + /* + Some Items resolved in another select should not be marked as dependency + of the subquery where they are. During normal name resolution, we check + this. Stored procedures and prepared statements first try to resolve an + ident item using a cached table reference and field position from the + previous query execution (cached_table/cached_field_index). If the + tables were not changed, the ident matches the table/field, and we have + faster resolution of the ident without looking through all tables and + fields in the query. But in this case, we can not check all conditions + about this ident item dependency, so we should cache the condition in + this variable. + */ + bool can_be_depended; + /* + NOTE: came from TABLE::alias_name_used and this is only a hint! + See comment for TABLE::alias_name_used. + */ + bool alias_name_used; /* true if item was resolved against alias */ + + Item_ident(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &db_name_arg, const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg); + Item_ident(THD *thd, Item_ident *item); + Item_ident(THD *thd, TABLE_LIST *view_arg, const LEX_CSTRING &field_name_arg); + LEX_CSTRING full_name_cstring() const override; + void cleanup() override; + st_select_lex *get_depended_from() const; + bool remove_dependence_processor(void * arg) override; + void print(String *str, enum_query_type query_type) override; + bool change_context_processor(void *cntx) override + { context= (Name_resolution_context *)cntx; return FALSE; } + /** + Collect outer references + */ + bool collect_outer_ref_processor(void *arg) override; + friend bool insert_fields(THD *thd, Name_resolution_context *context, + const char *db_name, + const char *table_name, List_iterator *it, + bool any_privileges, bool returning_field); +}; + + +class Item_field :public Item_ident, + public Load_data_outvar +{ +protected: + void set_field(Field *field); +public: + Field *field; + Item_equal *item_equal; + /* + if any_privileges set to TRUE then here real effective privileges will + be stored + */ + privilege_t have_privileges; + /* field need any privileges (for VIEW creation) */ + bool any_privileges; + +private: + /* + Indicates whether this Item_field refers to a regular or some kind of + temporary table. + This is needed for print() to work: it may be called even after the table + referred by the Item_field has been dropped. + + See ExplainDataStructureLifetime in sql_explain.h for details. + */ + enum { + NO_TEMP_TABLE= 0, + REFERS_TO_DERIVED_TMP= 1, + REFERS_TO_OTHER_TMP=2 + } refers_to_temp_table = NO_TEMP_TABLE; + +public: + Item_field(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &db_arg, const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg); + Item_field(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &field_name_arg) + :Item_field(thd, context_arg, null_clex_str, null_clex_str, field_name_arg) + { } + Item_field(THD *thd, Name_resolution_context *context_arg) + :Item_field(thd, context_arg, null_clex_str, null_clex_str, null_clex_str) + { } + /* + Constructor needed to process subselect with temporary tables (see Item) + */ + Item_field(THD *thd, Item_field *item); + /* + Constructor used inside setup_wild(), ensures that field, table, + and database names will live as long as Item_field (this is important + in prepared statements). + */ + Item_field(THD *thd, Name_resolution_context *context_arg, Field *field); + /* + If this constructor is used, fix_fields() won't work, because + db_name, table_name and column_name are unknown. It's necessary to call + reset_field() before fix_fields() for all fields created this way. + */ + Item_field(THD *thd, Field *field); + Type type() const override { return FIELD_ITEM; } + bool eq(const Item *item, bool binary_cmp) const override; + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + String *val_str(String*) override; + void save_result(Field *to) override; + double val_result() override; + longlong val_int_result() override; + bool val_native(THD *thd, Native *to) override; + bool val_native_result(THD *thd, Native *to) override; + String *str_result(String* tmp) override; + my_decimal *val_decimal_result(my_decimal *) override; + bool val_bool_result() override; + bool is_null_result() override; + bool send(Protocol *protocol, st_value *buffer) override; + Load_data_outvar *get_load_data_outvar() override { return this; } + bool load_data_set_null(THD *thd, const Load_data_param *param) override + { + return field->load_data_set_null(thd); + } + bool load_data_set_value(THD *thd, const char *pos, uint length, + const Load_data_param *param) override + { + field->load_data_set_value(pos, length, param->charset()); + return false; + } + bool load_data_set_no_data(THD *thd, const Load_data_param *param) override; + void load_data_print_for_log_event(THD *thd, String *to) const override; + bool load_data_add_outvar(THD *thd, Load_data_param *param) const override + { + return param->add_outvar_field(thd, field); + } + uint load_data_fixed_length() const override + { + return field->field_length; + } + void reset_field(Field *f); + bool fix_fields(THD *, Item **) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + void make_send_field(THD *thd, Send_field *tmp_field) override; + int save_in_field(Field *field,bool no_conversions) override; + void save_org_in_field(Field *field, fast_field_copier optimizer_data) + override; + fast_field_copier setup_fast_field_copier(Field *field) override; + table_map used_tables() const override; + table_map all_used_tables() const override; + const Type_handler *type_handler() const override + { + const Type_handler *handler= field->type_handler(); + return handler->type_handler_for_item_field(); + } + const Type_handler *real_type_handler() const override + { + if (field->is_created_from_null_item) + return &type_handler_null; + return field->type_handler(); + } + Field *create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table, + Item_ref *orig_item, + const Tmp_field_param *param); + Field *create_tmp_field_ex(MEM_ROOT *root, + TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override; + const TYPELIB *get_typelib() const override { return field->get_typelib(); } + enum_monotonicity_info get_monotonicity_info() const override + { + return MONOTONIC_STRICT_INCREASING; + } + Sql_mode_dependency value_depends_on_sql_mode() const override + { + return Sql_mode_dependency(0, field->value_depends_on_sql_mode()); + } + bool hash_not_null(Hasher *hasher) override + { + if (field->is_null()) + return true; + field->hash_not_null(hasher); + return false; + } + longlong val_int_endpoint(bool left_endp, bool *incl_endp) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool get_date_result(THD *thd, MYSQL_TIME *ltime,date_mode_t fuzzydate) + override; + longlong val_datetime_packed(THD *thd) override; + longlong val_time_packed(THD *thd) override; + bool is_null() override { return field->is_null(); } + void update_null_value() override; + void update_table_bitmaps() + { + if (field && field->table) + { + TABLE *tab= field->table; + tab->covering_keys.intersect(field->part_of_key); + if (tab->read_set) + tab->mark_column_with_deps(field); + } + } + void update_used_tables() override + { + update_table_bitmaps(); + } + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override + { + /* + normilize_cond() replaced all conditions of type + WHERE/HAVING field + to: + WHERE/HAVING field<>0 + By the time of a build_equal_items() call, all such conditions should + already be replaced. No Item_field are possible. + Note, some Item_field derivants are still possible. + Item_insert_value: + SELECT * FROM t1 WHERE VALUES(a); + Item_default_value: + SELECT * FROM t1 WHERE DEFAULT(a); + */ + DBUG_ASSERT(type() != FIELD_ITEM); + return Item_ident::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); + } + bool is_result_field() override { return false; } + void save_in_result_field(bool no_conversions) override; + Item *get_tmp_table_item(THD *thd) override; + bool find_not_null_fields(table_map allowed) override; + bool collect_item_field_processor(void * arg) override; + bool unknown_splocal_processor(void *arg) override; + bool add_field_to_set_processor(void * arg) override; + bool find_item_in_field_list_processor(void *arg) override; + bool register_field_in_read_map(void *arg) override; + bool register_field_in_write_map(void *arg) override; + bool register_field_in_bitmap(void *arg) override; + bool check_partition_func_processor(void *) override {return false;} + bool post_fix_fields_part_expr_processor(void *bool_arg) override; + bool check_valid_arguments_processor(void *bool_arg) override; + bool check_field_expression_processor(void *arg) override; + bool enumerate_field_refs_processor(void *arg) override; + bool update_table_bitmaps_processor(void *arg) override; + bool switch_to_nullable_fields_processor(void *arg) override; + bool update_vcol_processor(void *arg) override; + bool rename_fields_processor(void *arg) override; + bool check_vcol_func_processor(void *arg) override; + bool set_fields_as_dependent_processor(void *arg) override + { + if (!(used_tables() & OUTER_REF_TABLE_BIT)) + { + depended_from= (st_select_lex *) arg; + item_equal= NULL; + } + return 0; + } + void cleanup() override; + Item_equal *get_item_equal() override { return item_equal; } + void set_item_equal(Item_equal *item_eq) override { item_equal= item_eq; } + Item_equal *find_item_equal(COND_EQUAL *cond_equal) override; + Item* propagate_equal_fields(THD *, const Context &, COND_EQUAL *) override; + Item *replace_equal_field(THD *thd, uchar *arg) override; + uint32 max_display_length() const override + { return field->max_display_length(); } + Item_field *field_for_view_update() override { return this; } + int fix_outer_field(THD *thd, Field **field, Item **reference); + Item *update_value_transformer(THD *thd, uchar *select_arg) override; + Item *derived_field_transformer_for_having(THD *thd, uchar *arg) override; + Item *derived_field_transformer_for_where(THD *thd, uchar *arg) override; + Item *grouping_field_transformer_for_where(THD *thd, uchar *arg) override; + Item *in_subq_field_transformer_for_where(THD *thd, uchar *arg) override; + Item *in_subq_field_transformer_for_having(THD *thd, uchar *arg) override; + void print(String *str, enum_query_type query_type) override; + bool excl_dep_on_table(table_map tab_map) override; + bool excl_dep_on_grouping_fields(st_select_lex *sel) override; + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) override; + bool cleanup_excluding_fields_processor(void *arg) override + { return field ? 0 : cleanup_processor(arg); } + bool cleanup_excluding_const_fields_processor(void *arg) override + { return field && const_item() ? 0 : cleanup_processor(arg); } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool is_outer_field() const override + { + DBUG_ASSERT(fixed()); + return field->table->pos_in_table_list->outer_join; + } + bool check_index_dependence(void *arg) override; + void set_refers_to_temp_table(); + friend class Item_default_value; + friend class Item_insert_value; + friend class st_select_lex_unit; +}; + + +/** + Item_field for the ROW data type +*/ +class Item_field_row: public Item_field, + public Item_args +{ +public: + Item_field_row(THD *thd, Field *field) + :Item_field(thd, field), + Item_args() + { } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + const Type_handler *type_handler() const override + { return &type_handler_row; } + uint cols() const override { return arg_count; } + Item* element_index(uint i) override { return arg_count ? args[i] : this; } + Item** addr(uint i) override { return arg_count ? args + i : NULL; } + bool check_cols(uint c) override + { + if (cols() != c) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), c); + return true; + } + return false; + } + bool row_create_items(THD *thd, List *list); +}; + + +class Item_null :public Item_basic_constant +{ +public: + Item_null(THD *thd, const char *name_par=0, CHARSET_INFO *cs= &my_charset_bin): + Item_basic_constant(thd) + { + set_maybe_null(); + null_value= TRUE; + max_length= 0; + name.str= name_par ? name_par : "NULL"; + name.length= strlen(name.str); + collation.set(cs, DERIVATION_IGNORABLE, MY_REPERTOIRE_ASCII); + } + Type type() const override { return NULL_ITEM; } + bool vcol_assignment_allowed_value() const override { return true; } + double val_real() override; + longlong val_int() override; + String *val_str(String *str) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + longlong val_datetime_packed(THD *) override; + longlong val_time_packed(THD *) override; + int save_in_field(Field *field, bool no_conversions) override; + int save_safe_in_field(Field *field) override; + bool send(Protocol *protocol, st_value *buffer) override; + const Type_handler *type_handler() const override + { return &type_handler_null; } + bool basic_const_item() const override { return true; } + Item *clone_item(THD *thd) override; + bool const_is_null() const override { return true; } + bool is_null() override { return true; } + + void print(String *str, enum_query_type) override + { + str->append(NULL_clex_str); + } + + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override; + bool check_partition_func_processor(void *) override { return false; } + Item_basic_constant *make_string_literal_concat(THD *thd, + const LEX_CSTRING *) + override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_null_result :public Item_null +{ +public: + Field *result_field; + Item_null_result(THD *thd): Item_null(thd), result_field(0) {} + bool is_result_field() override { return result_field != 0; } + const Type_handler *type_handler() const override + { + if (result_field) + return result_field->type_handler(); + return &type_handler_null; + } + Field *create_tmp_field_ex(MEM_ROOT *, TABLE *, Tmp_field_src *, + const Tmp_field_param *) override + { + DBUG_ASSERT(0); + return NULL; + } + void save_in_result_field(bool no_conversions) override + { + save_in_field(result_field, no_conversions); + } + bool check_partition_func_processor(void *int_arg) override { return true; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(full_name(), arg, VCOL_IMPOSSIBLE); + } +}; + +/* + Item represents one placeholder ('?') of prepared statement + + Notes: + Item_param::field_type() is used when this item is in a temporary table. + This is NOT placeholder metadata sent to client, as this value + is assigned after sending metadata (in setup_one_conversion_function). + For example in case of 'SELECT ?' you'll get MYSQL_TYPE_STRING both + in result set and placeholders metadata, no matter what type you will + supply for this placeholder in mysql_stmt_execute. + + Item_param has two Type_handler pointers, + which can point to different handlers: + + 1. In the Type_handler_hybrid_field_type member + It's initialized in: + - Item_param::setup_conversion(), for client-server PS protocol, + according to the bind type. + - Item_param::set_from_item(), for EXECUTE and EXECUTE IMMEDIATE, + according to the actual parameter data type. + + 2. In the "value" member. + It's initialized in: + - Item_param::set_param_func(), for client-server PS protocol. + - Item_param::set_from_item(), for EXECUTE and EXECUTE IMMEDIATE. +*/ + +class Item_param :public Item_basic_value, + private Settable_routine_parameter, + public Rewritable_query_parameter, + private Type_handler_hybrid_field_type +{ + /* + NO_VALUE is a special value meaning that the parameter has not been + assigned yet. Item_param::state is assigned to NO_VALUE in constructor + and is used at prepare time. + + 1. At prepare time + Item_param::fix_fields() sets "fixed" to true, + but as Item_param::state is still NO_VALUE, + Item_param::basic_const_item() returns false. This prevents various + optimizations to happen at prepare time fix_fields(). + For example, in this query: + PREPARE stmt FROM 'SELECT FORMAT(10000,2,?)'; + Item_param::basic_const_item() is tested from + Item_func_format::fix_length_and_dec(). + + 2. At execute time: + When Item_param gets a value + (or a pseudo-value like DEFAULT_VALUE or IGNORE_VALUE): + - Item_param::state changes from NO_VALUE to something else + - Item_param::fixed is changed to true + All Item_param::set_xxx() make sure to do so. + In the state with an assigned value: + - Item_param::basic_const_item() returns true + - Item::type() returns NULL_ITEM or CONST_ITEM, + depending on the value assigned. + So in this state Item_param behaves in many cases like a literal. + + When Item_param::cleanup() is called: + - Item_param::state does not change + - Item_param::fixed changes to false + Note, this puts Item_param into an inconsistent state: + - Item_param::basic_const_item() still returns "true" + - Item_param::type() still pretends to be a basic constant Item + Both are not expected in combination with fixed==false. + However, these methods are not really called in this state, + see asserts in Item_param::basic_const_item() and Item_param::type(). + + When Item_param::reset() is called: + - Item_param::state changes to NO_VALUE + - Item_param::fixed changes to false + */ + enum enum_item_param_state + { + NO_VALUE, NULL_VALUE, SHORT_DATA_VALUE, LONG_DATA_VALUE, + DEFAULT_VALUE, IGNORE_VALUE + } state; + + void fix_temporal(uint32 max_length_arg, uint decimals_arg); + + struct CONVERSION_INFO + { + /* + Character sets conversion info for string values. + Character sets of client and connection defined at bind time are used + for all conversions, even if one of them is later changed (i.e. + between subsequent calls to mysql_stmt_execute). + */ + CHARSET_INFO *character_set_client; + CHARSET_INFO *character_set_of_placeholder; + /* + This points at character set of connection if conversion + to it is required (i. e. if placeholder typecode is not BLOB). + Otherwise it's equal to character_set_client (to simplify + check in convert_str_value()). + */ + CHARSET_INFO *final_character_set_of_str_value; + private: + bool needs_conversion() const + { + return final_character_set_of_str_value != + character_set_of_placeholder; + } + bool convert(THD *thd, String *str); + public: + void set(THD *thd, CHARSET_INFO *cs); + bool convert_if_needed(THD *thd, String *str) + { + /* + Check is so simple because all charsets were set up properly + in setup_one_conversion_function, where typecode of + placeholder was also taken into account: the variables are different + here only if conversion is really necessary. + */ + if (needs_conversion()) + return convert(thd, str); + str->set_charset(final_character_set_of_str_value); + return false; + } + }; + + bool m_empty_string_is_null; + + class PValue_simple + { + public: + union + { + longlong integer; + double real; + CONVERSION_INFO cs_info; + MYSQL_TIME time; + }; + void swap(PValue_simple &other) + { + swap_variables(PValue_simple, *this, other); + } + }; + + class PValue: public Type_handler_hybrid_field_type, + public PValue_simple, + public Value_source + { + public: + PValue(): Type_handler_hybrid_field_type(&type_handler_null) {} + my_decimal m_decimal; + String m_string; + /* + A buffer for string and long data values. Historically all allocated + values returned from val_str() were treated as eligible to + modification. I. e. in some cases Item_func_concat can append it's + second argument to return value of the first one. Because of that we + can't return the original buffer holding string data from val_str(), + and have to have one buffer for data and another just pointing to + the data. This is the latter one and it's returned from val_str(). + Can not be declared inside the union as it's not a POD type. + */ + String m_string_ptr; + + void swap(PValue &other) + { + Type_handler_hybrid_field_type::swap(other); + PValue_simple::swap(other); + m_decimal.swap(other.m_decimal); + m_string.swap(other.m_string); + m_string_ptr.swap(other.m_string_ptr); + } + double val_real(const Type_std_attributes *attr) const; + longlong val_int(const Type_std_attributes *attr) const; + my_decimal *val_decimal(my_decimal *dec, const Type_std_attributes *attr); + String *val_str(String *str, const Type_std_attributes *attr); + }; + + PValue value; + + const String *value_query_val_str(THD *thd, String* str) const; + Item *value_clone_item(THD *thd); + bool is_evaluable_expression() const override; + bool check_assignability_to(const Field *field, bool ignore) const override; + bool can_return_value() const; + +public: + /* + Used for bulk protocol only. + */ + enum enum_indicator_type indicator; + + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + + bool vcol_assignment_allowed_value() const override + { + switch (state) { + case NULL_VALUE: + case DEFAULT_VALUE: + case IGNORE_VALUE: + return true; + case NO_VALUE: + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + break; + } + return false; + } + + Item_param(THD *thd, const LEX_CSTRING *name_arg, + uint pos_in_query_arg, uint len_in_query_arg); + + Type type() const override + { + // Don't pretend to be a constant unless value for this item is set. + switch (state) { + case NO_VALUE: return PARAM_ITEM; + case NULL_VALUE: return NULL_ITEM; + case SHORT_DATA_VALUE: return CONST_ITEM; + case LONG_DATA_VALUE: return CONST_ITEM; + case DEFAULT_VALUE: return PARAM_ITEM; + case IGNORE_VALUE: return PARAM_ITEM; + } + DBUG_ASSERT(0); + return PARAM_ITEM; + } + + bool is_order_clause_position() const override + { + return state == SHORT_DATA_VALUE && + type_handler()->is_order_clause_position_type(); + } + + const Item_const *get_item_const() const override + { + switch (state) { + case SHORT_DATA_VALUE: + case LONG_DATA_VALUE: + case NULL_VALUE: + return this; + case IGNORE_VALUE: + case DEFAULT_VALUE: + case NO_VALUE: + break; + } + return NULL; + } + + bool const_is_null() const override { return state == NULL_VALUE; } + bool can_return_const_value(Item_result type) const + { + return can_return_value() && + value.type_handler()->cmp_type() == type && + type_handler()->cmp_type() == type; + } + const longlong *const_ptr_longlong() const override + { return can_return_const_value(INT_RESULT) ? &value.integer : NULL; } + const double *const_ptr_double() const override + { return can_return_const_value(REAL_RESULT) ? &value.real : NULL; } + const my_decimal *const_ptr_my_decimal() const override + { return can_return_const_value(DECIMAL_RESULT) ? &value.m_decimal : NULL; } + const MYSQL_TIME *const_ptr_mysql_time() const override + { return can_return_const_value(TIME_RESULT) ? &value.time : NULL; } + const String *const_ptr_string() const override + { return can_return_const_value(STRING_RESULT) ? &value.m_string : NULL; } + + double val_real() override + { + return can_return_value() ? value.val_real(this) : 0e0; + } + longlong val_int() override + { + return can_return_value() ? value.val_int(this) : 0; + } + my_decimal *val_decimal(my_decimal *dec) override + { + return can_return_value() ? value.val_decimal(dec, this) : NULL; + } + String *val_str(String *str) override + { + return can_return_value() ? value.val_str(str, this) : NULL; + } + bool get_date(THD *thd, MYSQL_TIME *tm, date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *to) override + { + return Item_param::type_handler()->Item_param_val_native(thd, this, to); + } + + int save_in_field(Field *field, bool no_conversions) override; + + void set_default(); + void set_ignore(); + void set_null(); + void set_int(longlong i, uint32 max_length_arg); + void set_double(double i); + void set_decimal(const char *str, ulong length); + void set_decimal(const my_decimal *dv, bool unsigned_arg); + bool set_str(const char *str, ulong length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); + bool set_longdata(const char *str, ulong length); + void set_time(MYSQL_TIME *tm, timestamp_type type, uint32 max_length_arg); + void set_time(const MYSQL_TIME *tm, uint32 max_length_arg, uint decimals_arg); + bool set_from_item(THD *thd, Item *item); + void reset(); + + void set_param_tiny(uchar **pos, ulong len); + void set_param_short(uchar **pos, ulong len); + void set_param_int32(uchar **pos, ulong len); + void set_param_int64(uchar **pos, ulong len); + void set_param_float(uchar **pos, ulong len); + void set_param_double(uchar **pos, ulong len); + void set_param_decimal(uchar **pos, ulong len); + void set_param_time(uchar **pos, ulong len); + void set_param_datetime(uchar **pos, ulong len); + void set_param_date(uchar **pos, ulong len); + void set_param_str(uchar **pos, ulong len); + + void setup_conversion(THD *thd, uchar param_type); + void setup_conversion_blob(THD *thd); + void setup_conversion_string(THD *thd, CHARSET_INFO *fromcs); + + /* + Assign placeholder value from bind data. + Note, that 'len' has different semantics in embedded library (as we + don't need to check that packet is not broken there). See + sql_prepare.cc for details. + */ + void set_param_func(uchar **pos, ulong len) + { + /* + To avoid Item_param::set_xxx() asserting on data type mismatch, + we set the value type handler here: + - It can not be initialized yet after Item_param::setup_conversion(). + - Also, for LIMIT clause parameters, the value type handler might have + changed from the real type handler to type_handler_longlong. + So here we'll restore it. + */ + const Type_handler *h= Item_param::type_handler(); + value.set_handler(h); + h->Item_param_set_param_func(this, pos, len); + } + + bool set_value(THD *thd, const Type_all_attributes *attr, + const st_value *val, const Type_handler *h) + { + value.set_handler(h); // See comments in set_param_func() + return h->Item_param_set_from_value(thd, this, attr, val); + } + + bool set_limit_clause_param(longlong nr) + { + value.set_handler(&type_handler_slonglong); + set_int(nr, MY_INT64_NUM_DECIMAL_DIGITS); + return !unsigned_flag && value.integer < 0; + } + const String *query_val_str(THD *thd, String *str) const; + + bool convert_str_value(THD *thd); + + /* + If value for parameter was not set we treat it as non-const + so no one will use parameters value in fix_fields still + parameter is constant during execution. + */ + bool const_item() const override + { + return state != NO_VALUE; + } + table_map used_tables() const override + { + return state != NO_VALUE ? (table_map)0 : PARAM_TABLE_BIT; + } + void print(String *str, enum_query_type query_type) override; + bool is_null() override + { DBUG_ASSERT(state != NO_VALUE); return state == NULL_VALUE; } + bool basic_const_item() const override; + bool has_no_value() const + { + return state == NO_VALUE; + } + bool has_long_data_value() const + { + return state == LONG_DATA_VALUE; + } + bool has_int_value() const + { + return state == SHORT_DATA_VALUE && + value.type_handler()->cmp_type() == INT_RESULT; + } + bool is_stored_routine_parameter() const override { return true; } + /* + This method is used to make a copy of a basic constant item when + propagating constants in the optimizer. The reason to create a new + item and not use the existing one is not precisely known (2005/04/16). + Probably we are trying to preserve tree structure of items, in other + words, avoid pointing at one item from two different nodes of the tree. + Return a new basic constant item if parameter value is a basic + constant, assert otherwise. This method is called only if + basic_const_item returned TRUE. + */ + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override; + Item *clone_item(THD *thd) override; + void set_param_type_and_swap_value(Item_param *from); + + Rewritable_query_parameter *get_rewritable_query_parameter() override + { return this; } + Settable_routine_parameter *get_settable_routine_parameter() override + { return m_is_settable_routine_parameter ? this : nullptr; } + + bool append_for_log(THD *thd, String *str) override; + bool check_vcol_func_processor(void *) override { return false; } + Item *get_copy(THD *) override { return nullptr; } + bool add_as_clone(THD *thd); + void sync_clones(); + bool register_clone(Item_param *i) { return m_clones.push_back(i); } + +private: + void invalid_default_param() const; + bool set_value(THD *thd, sp_rcontext *ctx, Item **it) override; + void set_out_param_info(Send_field *info) override; + +public: + const Send_field *get_out_param_info() const override; + Item_param *get_item_param() override { return this; } + void make_send_field(THD *thd, Send_field *field) override; + +private: + Send_field *m_out_param_info; + bool m_is_settable_routine_parameter; + /* + Array of all references of this parameter marker used in a CTE to its clones + created for copies of this marker used the CTE's copies. It's used to + synchronize the actual value of the parameter with the values of the clones. + */ + Mem_root_array m_clones; +}; + + +class Item_int :public Item_num +{ +public: + longlong value; + Item_int(THD *thd, int32 i,size_t length= MY_INT32_NUM_DECIMAL_DIGITS): + Item_num(thd), value((longlong) i) + { max_length=(uint32)length; } + Item_int(THD *thd, longlong i,size_t length= MY_INT64_NUM_DECIMAL_DIGITS): + Item_num(thd), value(i) + { max_length=(uint32)length; } + Item_int(THD *thd, ulonglong i, size_t length= MY_INT64_NUM_DECIMAL_DIGITS): + Item_num(thd), value((longlong)i) + { max_length=(uint32)length; unsigned_flag= 1; } + Item_int(THD *thd, const char *str_arg,longlong i,size_t length): + Item_num(thd), value(i) + { + max_length=(uint32)length; + name.str= str_arg; name.length= safe_strlen(name.str); + } + Item_int(THD *thd, const char *str_arg,longlong i,size_t length, bool flag): + Item_num(thd), value(i) + { + max_length=(uint32)length; + name.str= str_arg; name.length= safe_strlen(name.str); + unsigned_flag= flag; + } + Item_int(const char *str_arg,longlong i,size_t length): + Item_num(), value(i) + { + max_length=(uint32)length; + name.str= str_arg; name.length= safe_strlen(name.str); + unsigned_flag= 1; + } + Item_int(THD *thd, const char *str_arg, size_t length=64); + const Type_handler *type_handler() const override + { return type_handler_long_or_longlong(); } + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) override + { return tmp_table_field_from_field_type(root, table); } + const longlong *const_ptr_longlong() const override { return &value; } + longlong val_int() override { return value; } + longlong val_int_min() const override { return value; } + double val_real() override { return (double) value; } + my_decimal *val_decimal(my_decimal *) override; + String *val_str(String*) override; + int save_in_field(Field *field, bool no_conversions) override; + bool is_order_clause_position() const override { return true; } + Item *clone_item(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + Item *neg(THD *thd) override; + decimal_digits_t decimal_precision() const override + { return (decimal_digits_t) (max_length - MY_TEST(value < 0)); } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + We sometimes need to distinguish a number from a boolean: + a[1] and a[true] are different things in XPath. + Also in JSON boolean values should be treated differently. +*/ +class Item_bool :public Item_int +{ +public: + Item_bool(THD *thd, const char *str_arg, longlong i): + Item_int(thd, str_arg, i, 1) {} + Item_bool(THD *thd, bool i) :Item_int(thd, (longlong) i, 1) { } + Item_bool(const char *str_arg, longlong i): + Item_int(str_arg, i, 1) {} + bool is_bool_literal() const override { return true; } + Item *neg_transformer(THD *thd) override; + const Type_handler *type_handler() const override + { return &type_handler_bool; } + const Type_handler *fixed_type_handler() const override + { return &type_handler_bool; } + void quick_fix_field() override + { + /* + We can get here when Item_bool is created instead of a constant + predicate at various condition optimization stages in sql_select. + */ + } +}; + + +class Item_bool_static :public Item_bool +{ +public: + Item_bool_static(const char *str_arg, longlong i): + Item_bool(str_arg, i) {}; + + /* Don't mark static items as top level item */ + virtual void top_level_item() override {} + void set_join_tab_idx(uint8 join_tab_idx_arg) override + { DBUG_ASSERT(0); } + + void cleanup() override {} +}; + +/* The following variablese are stored in a read only segment */ +extern Item_bool_static *Item_false, *Item_true; + +class Item_uint :public Item_int +{ +public: + Item_uint(THD *thd, const char *str_arg, size_t length); + Item_uint(THD *thd, ulonglong i): Item_int(thd, i, 10) {} + Item_uint(THD *thd, const char *str_arg, longlong i, uint length); + double val_real() override { return ulonglong2double((ulonglong)value); } + Item *clone_item(THD *thd) override; + Item *neg(THD *thd) override; + decimal_digits_t decimal_precision() const override + { return decimal_digits_t(max_length); } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_datetime :public Item_int +{ +protected: + MYSQL_TIME ltime; +public: + Item_datetime(THD *thd): Item_int(thd, 0) { unsigned_flag=0; } + int save_in_field(Field *field, bool no_conversions) override; + longlong val_int() override; + double val_real() override { return (double)val_int(); } + void set(longlong packed, enum_mysql_timestamp_type ts_type); + bool get_date(THD *thd, MYSQL_TIME *to, date_mode_t fuzzydate) override + { + *to= ltime; + return false; + } +}; + + +/* decimal (fixed point) constant */ +class Item_decimal :public Item_num +{ +protected: + my_decimal decimal_value; +public: + Item_decimal(THD *thd, const char *str_arg, size_t length, + CHARSET_INFO *charset); + Item_decimal(THD *thd, const char *str, const my_decimal *val_arg, + uint decimal_par, uint length); + Item_decimal(THD *thd, const my_decimal *value_par); + Item_decimal(THD *thd, longlong val, bool unsig); + Item_decimal(THD *thd, double val, int precision, int scale); + Item_decimal(THD *thd, const uchar *bin, int precision, int scale); + + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } + longlong val_int() override + { return decimal_value.to_longlong(unsigned_flag); } + double val_real() override + { return decimal_value.to_double(); } + String *val_str(String *to) override + { return decimal_value.to_string(to); } + my_decimal *val_decimal(my_decimal *val) override + { return &decimal_value; } + const my_decimal *const_ptr_my_decimal() const override + { return &decimal_value; } + int save_in_field(Field *field, bool no_conversions) override; + Item *clone_item(THD *thd) override; + void print(String *str, enum_query_type query_type) override + { + decimal_value.to_string(&str_value); + str->append(str_value); + } + Item *neg(THD *thd) override; + decimal_digits_t decimal_precision() const override + { return decimal_value.precision(); } + void set_decimal_value(my_decimal *value_par); + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_float :public Item_num +{ + const char *presentation; +public: + double value; + Item_float(THD *thd, const char *str_arg, size_t length); + Item_float(THD *thd, const char *str, double val_arg, uint decimal_par, + uint length): Item_num(thd), value(val_arg) + { + presentation= name.str= str; + name.length= safe_strlen(str); + decimals=(uint8) decimal_par; + max_length= length; + } + Item_float(THD *thd, double value_par, uint decimal_par): + Item_num(thd), presentation(0), value(value_par) + { + decimals= (uint8) decimal_par; + } + int save_in_field(Field *field, bool no_conversions) override; + const Type_handler *type_handler() const override + { return &type_handler_double; } + const double *const_ptr_double() const override { return &value; } + double val_real() override { return value; } + longlong val_int() override + { + if (value <= (double) LONGLONG_MIN) + { + return LONGLONG_MIN; + } + else if (value >= (double) (ulonglong) LONGLONG_MAX) + { + return LONGLONG_MAX; + } + return (longlong) rint(value); + } + String *val_str(String*) override; + my_decimal *val_decimal(my_decimal *) override; + Item *clone_item(THD *thd) override; + Item *neg(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_static_float_func :public Item_float +{ + const char *func_name; +public: + Item_static_float_func(THD *thd, const char *str, double val_arg, + uint decimal_par, uint length): + Item_float(thd, NullS, val_arg, decimal_par, length), func_name(str) + {} + void print(String *str, enum_query_type) override + { + str->append(func_name, strlen(func_name)); + } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override + { + return const_charset_converter(thd, tocs, true, func_name); + } +}; + + +class Item_string :public Item_literal +{ +protected: + void fix_from_value(Derivation dv, const Metadata metadata) + { + fix_charset_and_length(str_value.charset(), dv, metadata); + } + void fix_and_set_name_from_value(THD *thd, Derivation dv, + const Metadata metadata) + { + fix_from_value(dv, metadata); + set_name(thd, &str_value); + } +protected: + /* Just create an item and do not fill string representation */ + Item_string(THD *thd, CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE): + Item_literal(thd) + { + collation.set(cs, dv); + max_length= 0; + set_name(thd, NULL, 0, system_charset_info); + decimals= NOT_FIXED_DEC; + } +public: + Item_string(THD *thd, CHARSET_INFO *csi, const char *str_arg, uint length_arg) + :Item_literal(thd) + { + collation.set(csi, DERIVATION_COERCIBLE); + set_name(thd, NULL, 0, system_charset_info); + decimals= NOT_FIXED_DEC; + str_value.copy(str_arg, length_arg, csi); + max_length= str_value.numchars() * csi->mbmaxlen; + } + // Constructors with the item name set from its value + Item_string(THD *thd, const char *str, uint length, CHARSET_INFO *cs, + Derivation dv, my_repertoire_t repertoire) + :Item_literal(thd) + { + str_value.set_or_copy_aligned(str, length, cs); + fix_and_set_name_from_value(thd, dv, Metadata(&str_value, repertoire)); + } + Item_string(THD *thd, const char *str, size_t length, + CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) + :Item_literal(thd) + { + str_value.set_or_copy_aligned(str, length, cs); + fix_and_set_name_from_value(thd, dv, Metadata(&str_value)); + } + Item_string(THD *thd, const String *str, CHARSET_INFO *tocs, uint *conv_errors, + Derivation dv, my_repertoire_t repertoire) + :Item_literal(thd) + { + if (str_value.copy(str, tocs, conv_errors)) + str_value.set("", 0, tocs); // EOM ? + str_value.mark_as_const(); + fix_and_set_name_from_value(thd, dv, Metadata(&str_value, repertoire)); + } + // Constructors with an externally provided item name + Item_string(THD *thd, const LEX_CSTRING &name_par, const LEX_CSTRING &str, + CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) + :Item_literal(thd) + { + str_value.set_or_copy_aligned(str.str, str.length, cs); + fix_from_value(dv, Metadata(&str_value)); + set_name(thd, name_par); + } + Item_string(THD *thd, const LEX_CSTRING &name_par, const LEX_CSTRING &str, + CHARSET_INFO *cs, Derivation dv, my_repertoire_t repertoire) + :Item_literal(thd) + { + str_value.set_or_copy_aligned(str.str, str.length, cs); + fix_from_value(dv, Metadata(&str_value, repertoire)); + set_name(thd, name_par); + } + void print_value(String *to) const + { + str_value.print(to); + } + double val_real() override; + longlong val_int() override; + const String *const_ptr_string() const override { return &str_value; } + String *val_str(String*) override + { + return (String*) &str_value; + } + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return get_date_from_string(thd, ltime, fuzzydate); + } + int save_in_field(Field *field, bool no_conversions) override; + const Type_handler *type_handler() const override + { return &type_handler_varchar; } + Item *clone_item(THD *thd) override; + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override + { + return const_charset_converter(thd, tocs, true); + } + inline void append(const char *str, uint length) + { + str_value.append(str, length); + max_length= str_value.numchars() * collation.collation->mbmaxlen; + } + void print(String *str, enum_query_type query_type) override; + + /** + Return TRUE if character-set-introducer was explicitly specified in the + original query for this item (text literal). + + This operation is to be called from Item_string::print(). The idea is + that when a query is generated (re-constructed) from the Item-tree, + character-set-introducers should appear only for those literals, where + they were explicitly specified by the user. Otherwise, that may lead to + loss collation information (character set introducers implies default + collation for the literal). + + Basically, that makes sense only for views and hopefully will be gone + one day when we start using original query as a view definition. + + @return This operation returns the value of m_cs_specified attribute. + @retval TRUE if character set introducer was explicitly specified in + the original query. + @retval FALSE otherwise. + */ + virtual bool is_cs_specified() const + { + return false; + } + + String *check_well_formed_result(bool send_error) + { return Item::check_well_formed_result(&str_value, send_error); } + + Item_basic_constant *make_string_literal_concat(THD *thd, + const LEX_CSTRING *) override; + Item *make_odbc_literal(THD *thd, const LEX_CSTRING *typestr) override; + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + +}; + + +class Item_string_with_introducer :public Item_string +{ +public: + Item_string_with_introducer(THD *thd, const LEX_CSTRING &str, + CHARSET_INFO *cs): + Item_string(thd, str.str, str.length, cs) + { } + Item_string_with_introducer(THD *thd, const LEX_CSTRING &name_arg, + const LEX_CSTRING &str, CHARSET_INFO *tocs): + Item_string(thd, name_arg, str, tocs) + { } + virtual bool is_cs_specified() const + { + return true; + } +}; + + +class Item_string_sys :public Item_string +{ +public: + Item_string_sys(THD *thd, const char *str, uint length): + Item_string(thd, str, length, system_charset_info) + { } + Item_string_sys(THD *thd, const char *str): + Item_string(thd, str, (uint) strlen(str), system_charset_info) + { } +}; + + +class Item_string_ascii :public Item_string +{ +public: + Item_string_ascii(THD *thd, const char *str, uint length): + Item_string(thd, str, length, &my_charset_latin1, + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII) + { } + Item_string_ascii(THD *thd, const char *str): + Item_string(thd, str, (uint) strlen(str), &my_charset_latin1, + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII) + { } +}; + + +class Item_static_string_func :public Item_string +{ + const LEX_CSTRING func_name; +public: + Item_static_string_func(THD *thd, const LEX_CSTRING &name_par, + const LEX_CSTRING &str, CHARSET_INFO *cs, + Derivation dv= DERIVATION_COERCIBLE): + Item_string(thd, LEX_CSTRING({NullS,0}), str, cs, dv), func_name(name_par) + {} + Item_static_string_func(THD *thd, const LEX_CSTRING &name_par, + const String *str, + CHARSET_INFO *tocs, uint *conv_errors, + Derivation dv, my_repertoire_t repertoire): + Item_string(thd, str, tocs, conv_errors, dv, repertoire), + func_name(name_par) + {} + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override + { + return const_charset_converter(thd, tocs, true, func_name.str); + } + + void print(String *str, enum_query_type) override + { + str->append(func_name); + } + + bool check_partition_func_processor(void *) override { return true; } + + bool check_vcol_func_processor(void *arg) override + { // VCOL_TIME_FUNC because the value is not constant, but does not + // require fix_fields() to be re-run for every statement. + return mark_unsupported_function(func_name.str, arg, VCOL_TIME_FUNC); + } +}; + + +/* for show tables */ +class Item_partition_func_safe_string: public Item_string +{ +public: + Item_partition_func_safe_string(THD *thd, const LEX_CSTRING &name_arg, + uint length, CHARSET_INFO *cs): + Item_string(thd, name_arg, LEX_CSTRING({0,0}), cs) + { + max_length= length; + } + bool check_vcol_func_processor(void *arg) + { + return mark_unsupported_function("safe_string", arg, VCOL_IMPOSSIBLE); + } +}; + + +/** + Item_empty_string -- is a utility class to put an item into List + which is then used in protocol.send_result_set_metadata() when sending SHOW output to + the client. +*/ + +class Item_empty_string :public Item_partition_func_safe_string +{ +public: + Item_empty_string(THD *thd, const LEX_CSTRING &header, uint length, + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci) + :Item_partition_func_safe_string(thd, header, length * cs->mbmaxlen, cs) + { } + Item_empty_string(THD *thd, const char *header, uint length, + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci) + :Item_partition_func_safe_string(thd, LEX_CSTRING({header, strlen(header)}), + length * cs->mbmaxlen, cs) + { } + void make_send_field(THD *thd, Send_field *field); +}; + + +class Item_return_int :public Item_int +{ + enum_field_types int_field_type; +public: + Item_return_int(THD *thd, const char *name_arg, uint length, + enum_field_types field_type_arg, longlong value_arg= 0): + Item_int(thd, name_arg, value_arg, length), int_field_type(field_type_arg) + { + unsigned_flag=1; + } + const Type_handler *type_handler() const override + { + const Type_handler *h= + Type_handler::get_handler_by_field_type(int_field_type); + return unsigned_flag ? h->type_handler_unsigned() : h; + } +}; + + +/** + Item_hex_constant -- a common class for hex literals: X'HHHH' and 0xHHHH +*/ +class Item_hex_constant: public Item_literal +{ +private: + void hex_string_init(THD *thd, const char *str, size_t str_length); +public: + Item_hex_constant(THD *thd): Item_literal(thd) + { + hex_string_init(thd, "", 0); + } + Item_hex_constant(THD *thd, const char *str, size_t str_length): + Item_literal(thd) + { + hex_string_init(thd, str, str_length); + } + const Type_handler *type_handler() const override + { return &type_handler_varchar; } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override + { + return const_charset_converter(thd, tocs, true); + } + const String *const_ptr_string() const override { return &str_value; } + String *val_str(String*) override { return &str_value; } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } +}; + + +/** + Item_hex_hybrid -- is a class implementing 0xHHHH literals, e.g.: + SELECT 0x3132; + They can behave as numbers and as strings depending on context. +*/ +class Item_hex_hybrid: public Item_hex_constant +{ +public: + Item_hex_hybrid(THD *thd): Item_hex_constant(thd) {} + Item_hex_hybrid(THD *thd, const char *str, size_t str_length): + Item_hex_constant(thd, str, str_length) {} + const Type_handler *type_handler() const override + { return &type_handler_hex_hybrid; } + decimal_digits_t decimal_precision() const override; + double val_real() override + { + return (double) (ulonglong) Item_hex_hybrid::val_int(); + } + longlong val_int() override + { + return longlong_from_hex_hybrid(str_value.ptr(), str_value.length()); + } + my_decimal *val_decimal(my_decimal *decimal_value) override + { + longlong value= Item_hex_hybrid::val_int(); + int2my_decimal(E_DEC_FATAL_ERROR, value, TRUE, decimal_value); + return decimal_value; + } + int save_in_field(Field *field, bool) override + { + field->set_notnull(); + return field->store_hex_hybrid(str_value.ptr(), str_value.length()); + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + Item_hex_string -- is a class implementing X'HHHH' literals, e.g.: + SELECT X'3132'; + Unlike Item_hex_hybrid, X'HHHH' literals behave as strings in all contexts. + X'HHHH' are also used in replication of string constants in case of + "dangerous" charsets (sjis, cp932, big5, gbk) who can have backslash (0x5C) + as the second byte of a multi-byte character, so using '\' escaping for + these charsets is not desirable. +*/ +class Item_hex_string: public Item_hex_constant +{ +public: + Item_hex_string(THD *thd): Item_hex_constant(thd) {} + Item_hex_string(THD *thd, const char *str, size_t str_length): + Item_hex_constant(thd, str, str_length) {} + longlong val_int() override + { + return longlong_from_string_with_check(&str_value); + } + double val_real() override + { + return double_from_string_with_check(&str_value); + } + my_decimal *val_decimal(my_decimal *decimal_value) override + { + return val_decimal_from_string(decimal_value); + } + int save_in_field(Field *field, bool) override + { + field->set_notnull(); + return field->store(str_value.ptr(), str_value.length(), + collation.collation); + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_bin_string: public Item_hex_hybrid +{ +public: + Item_bin_string(THD *thd, const char *str, size_t str_length); + void print(String *str, enum_query_type query_type) override; +}; + + +class Item_timestamp_literal: public Item_literal +{ + Timestamp_or_zero_datetime m_value; +public: + Item_timestamp_literal(THD *thd) + :Item_literal(thd) + { } + const Type_handler *type_handler() const override + { return &type_handler_timestamp2; } + int save_in_field(Field *field, bool) override + { + Timestamp_or_zero_datetime_native native(m_value, decimals); + return native.save_in_field(field, decimals); + } + longlong val_int() override + { + return m_value.to_datetime(current_thd).to_longlong(); + } + double val_real() override + { + return m_value.to_datetime(current_thd).to_double(); + } + String *val_str(String *to) override + { + return m_value.to_datetime(current_thd).to_string(to, decimals); + } + my_decimal *val_decimal(my_decimal *to) override + { + return m_value.to_datetime(current_thd).to_decimal(to); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + bool res= m_value.to_TIME(thd, ltime, fuzzydate); + DBUG_ASSERT(!res); + return res; + } + bool val_native(THD *thd, Native *to) override + { + return m_value.to_native(to, decimals); + } + void set_value(const Timestamp_or_zero_datetime &value) + { + m_value= value; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_temporal_literal :public Item_literal +{ +public: + Item_temporal_literal(THD *thd) + :Item_literal(thd) + { + collation= DTCollation_numeric(); + decimals= 0; + } + Item_temporal_literal(THD *thd, decimal_digits_t dec_arg): + Item_literal(thd) + { + collation= DTCollation_numeric(); + decimals= dec_arg; + } + + int save_in_field(Field *field, bool no_conversions) override + { return save_date_in_field(field, no_conversions); } +}; + + +/** + DATE'2010-01-01' +*/ +class Item_date_literal: public Item_temporal_literal +{ +protected: + Date cached_time; + bool update_null() + { + return (maybe_null() && + (null_value= cached_time.check_date_with_warn(current_thd))); + } +public: + Item_date_literal(THD *thd, const Date *ltime) + :Item_temporal_literal(thd), + cached_time(*ltime) + { + DBUG_ASSERT(cached_time.is_valid_date()); + max_length= MAX_DATE_WIDTH; + /* + If date has zero month or day, it can return NULL in case of + NO_ZERO_DATE or NO_ZERO_IN_DATE. + If date is `February 30`, it can return NULL in case if + no ALLOW_INVALID_DATES is set. + We can't set null_value using the current sql_mode here in constructor, + because sql_mode can change in case of prepared statements + between PREPARE and EXECUTE. + Here we only set maybe_null to true if the value has such anomalies. + Later (during execution time), if maybe_null is true, then the value + will be checked per row, according to the execution time sql_mode. + The check_date() below call should cover all cases mentioned. + */ + set_maybe_null(cached_time.check_date(TIME_NO_ZERO_DATE | + TIME_NO_ZERO_IN_DATE)); + } + const Type_handler *type_handler() const override + { return &type_handler_newdate; } + void print(String *str, enum_query_type query_type) override; + const MYSQL_TIME *const_ptr_mysql_time() const override + { + return cached_time.get_mysql_time(); + } + Item *clone_item(THD *thd) override; + longlong val_int() override + { + return update_null() ? 0 : cached_time.to_longlong(); + } + double val_real() override + { + return update_null() ? 0 : cached_time.to_double(); + } + String *val_str(String *to) override + { + return update_null() ? 0 : cached_time.to_string(to); + } + my_decimal *val_decimal(my_decimal *to) override + { + return update_null() ? 0 : cached_time.to_decimal(to); + } + longlong val_datetime_packed(THD *thd) override + { + return update_null() ? 0 : cached_time.valid_date_to_packed(); + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + TIME'10:10:10' +*/ +class Item_time_literal final: public Item_temporal_literal +{ +protected: + Time cached_time; +public: + Item_time_literal(THD *thd, const Time *ltime, decimal_digits_t dec_arg): + Item_temporal_literal(thd, dec_arg), + cached_time(*ltime) + { + DBUG_ASSERT(cached_time.is_valid_time()); + max_length= MIN_TIME_WIDTH + (decimals ? decimals + 1 : 0); + } + const Type_handler *type_handler() const override + { return &type_handler_time2; } + void print(String *str, enum_query_type query_type) override; + const MYSQL_TIME *const_ptr_mysql_time() const override + { + return cached_time.get_mysql_time(); + } + Item *clone_item(THD *thd) override; + longlong val_int() override { return cached_time.to_longlong(); } + double val_real() override { return cached_time.to_double(); } + String *val_str(String *to) override + { return cached_time.to_string(to, decimals); } + my_decimal *val_decimal(my_decimal *to) override + { return cached_time.to_decimal(to); } + longlong val_time_packed(THD *thd) override + { + return cached_time.valid_time_to_packed(); + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *to) override + { + return Time(thd, this).to_native(to, decimals); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + TIMESTAMP'2001-01-01 10:20:30' +*/ + +class Item_datetime_literal: public Item_temporal_literal +{ +protected: + Datetime cached_time; + bool update_null() + { + return (maybe_null() && + (null_value= cached_time.check_date_with_warn(current_thd))); + } +public: + Item_datetime_literal(THD *thd, const Datetime *ltime, + decimal_digits_t dec_arg): + Item_temporal_literal(thd, dec_arg), + cached_time(*ltime) + { + DBUG_ASSERT(cached_time.is_valid_datetime()); + max_length= MAX_DATETIME_WIDTH + (decimals ? decimals + 1 : 0); + // See the comment on maybe_null in Item_date_literal + set_maybe_null(cached_time.check_date(TIME_NO_ZERO_DATE | + TIME_NO_ZERO_IN_DATE)); + } + const Type_handler *type_handler() const override + { return &type_handler_datetime2; } + void print(String *str, enum_query_type query_type) override; + const MYSQL_TIME *const_ptr_mysql_time() const override + { + return cached_time.get_mysql_time(); + } + Item *clone_item(THD *thd) override; + longlong val_int() override + { + return update_null() ? 0 : cached_time.to_longlong(); + } + double val_real() override + { + return update_null() ? 0 : cached_time.to_double(); + } + String *val_str(String *to) override + { + return update_null() ? NULL : cached_time.to_string(to, decimals); + } + my_decimal *val_decimal(my_decimal *to) override + { + return update_null() ? NULL : cached_time.to_decimal(to); + } + longlong val_datetime_packed(THD *thd) override + { + return update_null() ? 0 : cached_time.valid_datetime_to_packed(); + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + An error-safe counterpart for Item_date_literal +*/ +class Item_date_literal_for_invalid_dates: public Item_date_literal +{ + /** + During equal field propagation we can replace non-temporal constants + found in equalities to their native temporal equivalents: + WHERE date_column='2001-01-01' ... -> + WHERE date_column=DATE'2001-01-01' ... + + This is done to make the equal field propagation code handle mixtures of + different temporal types in the same expressions easier (MDEV-8706), e.g. + WHERE LENGTH(date_column)=10 AND date_column=TIME'00:00:00' + + Item_date_literal_for_invalid_dates::get_date() + (unlike the regular Item_date_literal::get_date()) + does not check the result for NO_ZERO_IN_DATE and NO_ZERO_DATE, + always returns success (false), and does not produce error/warning messages. + + We need these _for_invalid_dates classes to be able to rewrite: + SELECT * FROM t1 WHERE date_column='0000-00-00' ... + to: + SELECT * FROM t1 WHERE date_column=DATE'0000-00-00' ... + + to avoid returning NULL value instead of '0000-00-00' even + in sql_mode=TRADITIONAL. + */ +public: + Item_date_literal_for_invalid_dates(THD *thd, const Date *ltime) + :Item_date_literal(thd, ltime) + { + base_flags&= ~item_base_t::MAYBE_NULL; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + cached_time.copy_to_mysql_time(ltime); + return (null_value= false); + } +}; + + +/** + An error-safe counterpart for Item_datetime_literal + (see Item_date_literal_for_invalid_dates for comments) +*/ +class Item_datetime_literal_for_invalid_dates final: public Item_datetime_literal +{ +public: + Item_datetime_literal_for_invalid_dates(THD *thd, + const Datetime *ltime, + decimal_digits_t dec_arg) + :Item_datetime_literal(thd, ltime, dec_arg) + { + base_flags&= ~item_base_t::MAYBE_NULL; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + cached_time.copy_to_mysql_time(ltime); + return (null_value= false); + } +}; + + +class Used_tables_and_const_cache +{ +public: + /* + In some cases used_tables_cache is not what used_tables() return + so the method should be used where one need used tables bit map + (even internally in Item_func_* code). + */ + table_map used_tables_cache; + bool const_item_cache; + + Used_tables_and_const_cache() + :used_tables_cache(0), + const_item_cache(true) + { } + Used_tables_and_const_cache(const Used_tables_and_const_cache *other) + :used_tables_cache(other->used_tables_cache), + const_item_cache(other->const_item_cache) + { } + inline void used_tables_and_const_cache_init() + { + used_tables_cache= 0; + const_item_cache= true; + } + inline void used_tables_and_const_cache_join(const Item *item) + { + used_tables_cache|= item->used_tables(); + const_item_cache&= item->const_item(); + } + inline void used_tables_and_const_cache_update_and_join(Item *item) + { + item->update_used_tables(); + used_tables_and_const_cache_join(item); + } + /* + Call update_used_tables() for all "argc" items in the array "argv" + and join with the current cache. + "this" must be initialized with a constructor or + re-initialized with used_tables_and_const_cache_init(). + */ + void used_tables_and_const_cache_update_and_join(uint argc, Item **argv) + { + for (uint i=0 ; i < argc ; i++) + used_tables_and_const_cache_update_and_join(argv[i]); + } + /* + Call update_used_tables() for all items in the list + and join with the current cache. + "this" must be initialized with a constructor or + re-initialized with used_tables_and_const_cache_init(). + */ + void used_tables_and_const_cache_update_and_join(List &list) + { + List_iterator_fast li(list); + Item *item; + while ((item=li++)) + used_tables_and_const_cache_update_and_join(item); + } +}; + + +/** + An abstract class representing common features of + regular functions and aggregate functions. +*/ +class Item_func_or_sum: public Item_result_field, + public Item_args, + public Used_tables_and_const_cache +{ +protected: + bool agg_arg_charsets(DTCollation &c, Item **items, uint nitems, + uint flags, int item_sep) + { + return Type_std_attributes::agg_arg_charsets(c, func_name_cstring(), + items, nitems, + flags, item_sep); + } + bool agg_arg_charsets_for_string_result(DTCollation &c, + Item **items, uint nitems, + int item_sep= 1) + { + return Type_std_attributes:: + agg_arg_charsets_for_string_result(c, func_name_cstring(), + items, nitems, item_sep); + } + bool agg_arg_charsets_for_string_result_with_comparison(DTCollation &c, + Item **items, + uint nitems, + int item_sep= 1) + { + return Type_std_attributes:: + agg_arg_charsets_for_string_result_with_comparison(c, func_name_cstring(), + items, nitems, + item_sep); + } + + /* + Aggregate arguments for comparison, e.g: a=b, a LIKE b, a RLIKE b + - don't convert to @@character_set_connection if all arguments are numbers + - don't allow DERIVATION_NONE + */ + bool agg_arg_charsets_for_comparison(DTCollation &c, + Item **items, uint nitems, + int item_sep= 1) + { + return Type_std_attributes:: + agg_arg_charsets_for_comparison(c, func_name_cstring(), items, nitems, item_sep); + } + +public: + // This method is used by Arg_comparator + bool agg_arg_charsets_for_comparison(CHARSET_INFO **cs, Item **a, Item **b, + bool allow_narrowing) + { + THD *thd= current_thd; + DTCollation tmp; + if (tmp.set((*a)->collation, (*b)->collation, MY_COLL_CMP_CONV) || + tmp.derivation == DERIVATION_NONE) + { + my_error(ER_CANT_AGGREGATE_2COLLATIONS,MYF(0), + (*a)->collation.collation->coll_name.str, + (*a)->collation.derivation_name(), + (*b)->collation.collation->coll_name.str, + (*b)->collation.derivation_name(), + func_name()); + return true; + } + + if (allow_narrowing && + (*a)->collation.derivation == (*b)->collation.derivation) + { + // allow_narrowing==true only for = and <=> comparisons. + if (Utf8_narrow::should_do_narrowing(thd, (*a)->collation.collation, + (*b)->collation.collation)) + { + // a is a subset, b is a superset (e.g. utf8mb3 vs utf8mb4) + *cs= (*b)->collation.collation; // Compare using the wider cset + return false; + } + else + if (Utf8_narrow::should_do_narrowing(thd, (*b)->collation.collation, + (*a)->collation.collation)) + { + // a is a superset, b is a subset (e.g. utf8mb4 vs utf8mb3) + *cs= (*a)->collation.collation; // Compare using the wider cset + return false; + } + } + /* + If necessary, convert both *a and *b to the collation in tmp: + */ + Single_coll_err error_for_a= {(*b)->collation, true}; + Single_coll_err error_for_b= {(*a)->collation, false}; + + if (agg_item_set_converter(tmp, func_name_cstring(), + a, 1, MY_COLL_CMP_CONV, 1, + /*just for error message*/ &error_for_a) || + agg_item_set_converter(tmp, func_name_cstring(), + b, 1, MY_COLL_CMP_CONV, 1, + /*just for error message*/ &error_for_b)) + return true; + *cs= tmp.collation; + return false; + } + +public: + Item_func_or_sum(THD *thd): Item_result_field(thd), Item_args() {} + Item_func_or_sum(THD *thd, Item *a): Item_result_field(thd), Item_args(a) { } + Item_func_or_sum(THD *thd, Item *a, Item *b): + Item_result_field(thd), Item_args(a, b) { } + Item_func_or_sum(THD *thd, Item *a, Item *b, Item *c): + Item_result_field(thd), Item_args(thd, a, b, c) { } + Item_func_or_sum(THD *thd, Item *a, Item *b, Item *c, Item *d): + Item_result_field(thd), Item_args(thd, a, b, c, d) { } + Item_func_or_sum(THD *thd, Item *a, Item *b, Item *c, Item *d, Item *e): + Item_result_field(thd), Item_args(thd, a, b, c, d, e) { } + Item_func_or_sum(THD *thd, Item_func_or_sum *item): + Item_result_field(thd, item), Item_args(thd, item), + Used_tables_and_const_cache(item) { } + Item_func_or_sum(THD *thd, List &list): + Item_result_field(thd), Item_args(thd, list) { } + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + if (walk_args(processor, walk_subquery, arg)) + return true; + return (this->*processor)(arg); + } + /* + This method is used for debug purposes to print the name of an + item to the debug log. The second use of this method is as + a helper function of print() and error messages, where it is + applicable. To suit both goals it should return a meaningful, + distinguishable and sintactically correct string. This method + should not be used for runtime type identification, use enum + {Sum}Functype and Item_func::functype()/Item_sum::sum_func() + instead. + Added here, to the parent class of both Item_func and Item_sum. + + NOTE: for Items inherited from Item_sum, func_name() and + func_name_cstring() returns part of function name till first + argument (including '(') to make difference in names for functions + with 'distinct' clause and without 'distinct' and also to make + printing of items inherited from Item_sum uniform. + */ + inline const char *func_name() const + { return (char*) func_name_cstring().str; } + virtual LEX_CSTRING func_name_cstring() const= 0; + virtual bool fix_length_and_dec(THD *thd)= 0; + bool const_item() const override { return const_item_cache; } + table_map used_tables() const override { return used_tables_cache; } + Item* build_clone(THD *thd) override; + Sql_mode_dependency value_depends_on_sql_mode() const override + { + return Item_args::value_depends_on_sql_mode_bit_or().soft_to_hard(); + } +}; + +class sp_head; +class sp_name; +struct st_sp_security_context; + +class Item_sp +{ +protected: + // Can be NULL in some non-SELECT queries + Name_resolution_context *context; +public: + sp_name *m_name; + sp_head *m_sp; + TABLE *dummy_table; + uchar result_buf[64]; + sp_rcontext *func_ctx; + MEM_ROOT sp_mem_root; + Query_arena *sp_query_arena; + + /* + The result field of the stored function. + */ + Field *sp_result_field; + Item_sp(THD *thd, Name_resolution_context *context_arg, sp_name *name_arg); + Item_sp(THD *thd, Item_sp *item); + LEX_CSTRING func_name_cstring(THD *thd, bool is_package_function) const; + void cleanup(); + bool sp_check_access(THD *thd); + bool execute(THD *thd, bool *null_value, Item **args, uint arg_count); + bool execute_impl(THD *thd, Item **args, uint arg_count); + bool init_result_field(THD *thd, uint max_length, uint maybe_null, + bool *null_value, LEX_CSTRING *name); + void process_error(THD *thd) + { + if (context) + context->process_error(thd); + } +}; + +class Item_ref :public Item_ident +{ +protected: + void set_properties(); + bool set_properties_only; // the item doesn't need full fix_fields +public: + enum Ref_Type { REF, DIRECT_REF, VIEW_REF, OUTER_REF, AGGREGATE_REF }; + Item **ref; + bool reference_trough_name; + Item_ref(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &db_arg, const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg): + Item_ident(thd, context_arg, db_arg, table_name_arg, field_name_arg), + set_properties_only(0), ref(0), reference_trough_name(1) {} + Item_ref(THD *thd, Name_resolution_context *context_arg, + const LEX_CSTRING &field_name_arg) + :Item_ref(thd, context_arg, null_clex_str, null_clex_str, field_name_arg) + { } + /* + This constructor is used in two scenarios: + A) *item = NULL + No initialization is performed, fix_fields() call will be necessary. + + B) *item points to an Item this Item_ref will refer to. This is + used for GROUP BY. fix_fields() will not be called in this case, + so we call set_properties to make this item "fixed". set_properties + performs a subset of action Item_ref::fix_fields does, and this subset + is enough for Item_ref's used in GROUP BY. + + TODO we probably fix a superset of problems like in BUG#6658. Check this + with Bar, and if we have a more broader set of problems like this. + */ + Item_ref(THD *thd, Name_resolution_context *context_arg, Item **item, + const LEX_CSTRING &table_name_arg, const LEX_CSTRING &field_name_arg, + bool alias_name_used_arg= FALSE); + Item_ref(THD *thd, TABLE_LIST *view_arg, Item **item, + const LEX_CSTRING &field_name_arg, bool alias_name_used_arg= FALSE); + + /* Constructor need to process subselect with temporary tables (see Item) */ + Item_ref(THD *thd, Item_ref *item) + :Item_ident(thd, item), set_properties_only(0), ref(item->ref) {} + enum Type type() const override { return REF_ITEM; } + enum Type real_type() const override + { return ref ? (*ref)->type() : REF_ITEM; } + bool eq(const Item *item, bool binary_cmp) const override + { + const Item *it= item->real_item(); + return ref && (*ref)->eq(it, binary_cmp); + } + void save_val(Field *to) override; + void save_result(Field *to) override; + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + String *val_str(String* tmp) override; + bool val_native(THD *thd, Native *to) override; + bool is_null() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + longlong val_datetime_packed(THD *) override; + longlong val_time_packed(THD *) override; + double val_result() override; + longlong val_int_result() override; + String *str_result(String* tmp) override; + bool val_native_result(THD *thd, Native *to) override; + my_decimal *val_decimal_result(my_decimal *) override; + bool val_bool_result() override; + bool is_null_result() override; + bool send(Protocol *prot, st_value *buffer) override; + void make_send_field(THD *thd, Send_field *field) override; + bool fix_fields(THD *, Item **) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + int save_in_field(Field *field, bool no_conversions) override; + void save_org_in_field(Field *field, fast_field_copier optimizer_data) + override; + fast_field_copier setup_fast_field_copier(Field *field) override + { return (*ref)->setup_fast_field_copier(field); } + const Type_handler *type_handler() const override + { return (*ref)->type_handler(); } + const Type_handler *real_type_handler() const override + { return (*ref)->real_type_handler(); } + Field *get_tmp_table_field() override + { return result_field ? result_field : (*ref)->get_tmp_table_field(); } + Item *get_tmp_table_item(THD *thd) override; + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override; + Item* propagate_equal_fields(THD *, const Context &, COND_EQUAL *) override; + table_map used_tables() const override; + void update_used_tables() override; + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override + { + /* + normilize_cond() replaced all conditions of type + WHERE/HAVING field + to: + WHERE/HAVING field<>0 + By the time of a build_equal_items() call, all such conditions should + already be replaced. No Item_ref referencing to Item_field are possible. + */ + DBUG_ASSERT(real_type() != FIELD_ITEM); + return Item_ident::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); + } + bool const_item() const override + { + return (*ref)->const_item(); + } + table_map not_null_tables() const override + { + return depended_from ? 0 : (*ref)->not_null_tables(); + } + bool find_not_null_fields(table_map allowed) override + { + return depended_from ? false : (*ref)->find_not_null_fields(allowed); + } + void save_in_result_field(bool no_conversions) override + { + (*ref)->save_in_field(result_field, no_conversions); + } + Item *real_item() override + { + return ref ? (*ref)->real_item() : this; + } + const TYPELIB *get_typelib() const override + { + return ref ? (*ref)->get_typelib() : NULL; + } + + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + if (ref && *ref) + return (*ref)->walk(processor, walk_subquery, arg) || + (this->*processor)(arg); + else + return FALSE; + } + Item* transform(THD *thd, Item_transformer, uchar *arg) override; + Item* compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) override; + bool enumerate_field_refs_processor(void *arg) override + { return (*ref)->enumerate_field_refs_processor(arg); } + void no_rows_in_result() override + { + (*ref)->no_rows_in_result(); + } + void restore_to_before_no_rows_in_result() override + { + (*ref)->restore_to_before_no_rows_in_result(); + } + void print(String *str, enum_query_type query_type) override; + enum precedence precedence() const override + { + return ref ? (*ref)->precedence() : DEFAULT_PRECEDENCE; + } + void cleanup() override; + Item_field *field_for_view_update() override + { return (*ref)->field_for_view_update(); } + Load_data_outvar *get_load_data_outvar() override + { + return (*ref)->get_load_data_outvar(); + } + virtual Ref_Type ref_type() { return REF; } + + // Row emulation: forwarding of ROW-related calls to ref + uint cols() const override + { + return ref && result_type() == ROW_RESULT ? (*ref)->cols() : 1; + } + Item* element_index(uint i) override + { + return ref && result_type() == ROW_RESULT ? (*ref)->element_index(i) : this; + } + Item** addr(uint i) override + { + return ref && result_type() == ROW_RESULT ? (*ref)->addr(i) : 0; + } + bool check_cols(uint c) override + { + return ref && result_type() == ROW_RESULT ? (*ref)->check_cols(c) + : Item::check_cols(c); + } + bool null_inside() override + { + return ref && result_type() == ROW_RESULT ? (*ref)->null_inside() : 0; + } + void bring_value() override + { + if (ref && result_type() == ROW_RESULT) + (*ref)->bring_value(); + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("ref", arg, VCOL_IMPOSSIBLE); + } + bool basic_const_item() const override + { return ref && (*ref)->basic_const_item(); } + bool is_outer_field() const override + { + DBUG_ASSERT(fixed()); + DBUG_ASSERT(ref); + return (*ref)->is_outer_field(); + } + Item* build_clone(THD *thd) override; + /** + Checks if the item tree that ref points to contains a subquery. + */ + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool excl_dep_on_table(table_map tab_map) override + { + table_map used= used_tables(); + if (used & OUTER_REF_TABLE_BIT) + return false; + return (used == tab_map) || (*ref)->excl_dep_on_table(tab_map); + } + bool excl_dep_on_grouping_fields(st_select_lex *sel) override + { return (*ref)->excl_dep_on_grouping_fields(sel); } + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) override + { return (*ref)->excl_dep_on_in_subq_left_part(subq_pred); } + bool cleanup_excluding_fields_processor(void *arg) override + { + Item *item= real_item(); + if (item && item->type() == FIELD_ITEM && + ((Item_field *)item)->field) + return 0; + return cleanup_processor(arg); + } + bool cleanup_excluding_const_fields_processor(void *arg) override + { + Item *item= real_item(); + if (item && item->type() == FIELD_ITEM && + ((Item_field *) item)->field && item->const_item()) + return 0; + return cleanup_processor(arg); + } + Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) override + { return (*ref)->field_transformer_for_having_pushdown(thd, arg); } + Item *remove_item_direct_ref() override + { + *ref= (*ref)->remove_item_direct_ref(); + return this; + } +}; + + +/* + The same as Item_ref, but get value from val_* family of method to get + value of item on which it referred instead of result* family. +*/ +class Item_direct_ref :public Item_ref +{ +public: + Item_direct_ref(THD *thd, Name_resolution_context *context_arg, Item **item, + const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg, + bool alias_name_used_arg= FALSE): + Item_ref(thd, context_arg, item, table_name_arg, + field_name_arg, alias_name_used_arg) + {} + /* Constructor need to process subselect with temporary tables (see Item) */ + Item_direct_ref(THD *thd, Item_direct_ref *item) : Item_ref(thd, item) {} + Item_direct_ref(THD *thd, TABLE_LIST *view_arg, Item **item, + const LEX_CSTRING &field_name_arg, + bool alias_name_used_arg= FALSE): + Item_ref(thd, view_arg, item, field_name_arg, + alias_name_used_arg) + {} + + bool fix_fields(THD *thd, Item **it) override + { + if ((*ref)->fix_fields_if_needed_for_scalar(thd, ref)) + return TRUE; + return Item_ref::fix_fields(thd, it); + } + void save_val(Field *to) override; + /* Below we should have all val() methods as in Item_ref */ + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + String *val_str(String* tmp) override; + bool val_native(THD *thd, Native *to) override; + bool is_null() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + longlong val_datetime_packed(THD *) override; + longlong val_time_packed(THD *) override; + Ref_Type ref_type() override { return DIRECT_REF; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *remove_item_direct_ref() override + { return (*ref)->remove_item_direct_ref(); } + + /* Should be called if ref is changed */ + inline void ref_changed() + { + set_properties(); + } +}; + + +/** + This class is the same as Item_direct_ref but created to wrap Item_ident + before fix_fields() call +*/ + +class Item_direct_ref_to_ident :public Item_direct_ref +{ + Item_ident *ident; +public: + Item_direct_ref_to_ident(THD *thd, Item_ident *item): + Item_direct_ref(thd, item->context, (Item**)&item, item->table_name, + item->field_name, FALSE) + { + ident= item; + ref= (Item**)&ident; + } + + bool fix_fields(THD *thd, Item **it) override + { + DBUG_ASSERT(ident->type() == FIELD_ITEM || ident->type() == REF_ITEM); + if (ident->fix_fields_if_needed_for_scalar(thd, ref)) + return TRUE; + set_properties(); + return FALSE; + } + + void print(String *str, enum_query_type query_type) override + { ident->print(str, query_type); } +}; + + +class Item_cache; +class Expression_cache; +class Expression_cache_tracker; + +/** + The objects of this class can store its values in an expression cache. +*/ + +class Item_cache_wrapper :public Item_result_field +{ +private: + /* Pointer on the cached expression */ + Item *orig_item; + Expression_cache *expr_cache; + /* + In order to put the expression into the expression cache and return + value of val_*() method, we will need to get the expression value twice + (probably in different types). In order to avoid making two + (potentially costly) orig_item->val_*() calls, we store expression value + in this Item_cache object. + */ + Item_cache *expr_value; + + List parameters; + + Item *check_cache(); + void cache(); + void init_on_demand(); + +public: + Item_cache_wrapper(THD *thd, Item *item_arg); + ~Item_cache_wrapper(); + + Type type() const override { return EXPR_CACHE_ITEM; } + Type real_type() const override { return orig_item->type(); } + bool set_cache(THD *thd); + Expression_cache_tracker* init_tracker(MEM_ROOT *mem_root); + bool fix_fields(THD *thd, Item **it) override; + void cleanup() override; + Item *get_orig_item() const { return orig_item; } + + /* Methods of getting value which should be cached in the cache */ + void save_val(Field *to) override; + double val_real() override; + longlong val_int() override; + String *val_str(String* tmp) override; + bool val_native(THD *thd, Native *to) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + bool is_null() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool send(Protocol *protocol, st_value *buffer) override; + void save_org_in_field(Field *field, fast_field_copier) override + { + save_val(field); + } + void save_in_result_field(bool) override { save_val(result_field); } + Item* get_tmp_table_item(THD *thd_arg) override; + + /* Following methods make this item transparent as much as possible */ + + void print(String *str, enum_query_type query_type) override; + LEX_CSTRING full_name_cstring() const override + { return orig_item->full_name_cstring(); } + void make_send_field(THD *thd, Send_field *field) override + { orig_item->make_send_field(thd, field); } + bool eq(const Item *item, bool binary_cmp) const override + { + const Item *it= item->real_item(); + return orig_item->eq(it, binary_cmp); + } + void fix_after_pullout(st_select_lex *new_parent, Item **refptr, bool merge) + override + { + orig_item->fix_after_pullout(new_parent, &orig_item, merge); + } + int save_in_field(Field *to, bool no_conversions) override; + const Type_handler *type_handler() const override + { return orig_item->type_handler(); } + table_map used_tables() const override + { return orig_item->used_tables(); } + void update_used_tables() override + { + orig_item->update_used_tables(); + } + bool const_item() const override { return orig_item->const_item(); } + table_map not_null_tables() const override + { return orig_item->not_null_tables(); } + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + return orig_item->walk(processor, walk_subquery, arg) || + (this->*processor)(arg); + } + bool enumerate_field_refs_processor(void *arg) override + { return orig_item->enumerate_field_refs_processor(arg); } + Item_field *field_for_view_update() override + { return orig_item->field_for_view_update(); } + + /* Row emulation: forwarding of ROW-related calls to orig_item */ + uint cols() const override + { return result_type() == ROW_RESULT ? orig_item->cols() : 1; } + Item* element_index(uint i) override + { return result_type() == ROW_RESULT ? orig_item->element_index(i) : this; } + Item** addr(uint i) override + { return result_type() == ROW_RESULT ? orig_item->addr(i) : 0; } + bool check_cols(uint c) override + { + return (result_type() == ROW_RESULT ? + orig_item->check_cols(c) : + Item::check_cols(c)); + } + bool null_inside() override + { return result_type() == ROW_RESULT ? orig_item->null_inside() : 0; } + void bring_value() override + { + if (result_type() == ROW_RESULT) + orig_item->bring_value(); + } + bool is_expensive() override { return orig_item->is_expensive(); } + bool is_expensive_processor(void *arg) override + { return orig_item->is_expensive_processor(arg); } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("cache", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *build_clone(THD *) override { return nullptr; } +}; + + +/* + Class for view fields, the same as Item_direct_ref, but call fix_fields + of reference if it is not called yet +*/ +class Item_direct_view_ref :public Item_direct_ref +{ + Item_equal *item_equal; + TABLE_LIST *view; + TABLE *null_ref_table; + +#define NO_NULL_TABLE (reinterpret_cast(0x1)) + + void set_null_ref_table() + { + if (!view->is_inner_table_of_outer_join() || + !(null_ref_table= view->get_real_join_table())) + null_ref_table= NO_NULL_TABLE; + } + + bool check_null_ref() + { + DBUG_ASSERT(null_ref_table); + if (null_ref_table != NO_NULL_TABLE && null_ref_table->null_row) + { + null_value= 1; + return TRUE; + } + return FALSE; + } + +public: + Item_direct_view_ref(THD *thd, Name_resolution_context *context_arg, + Item **item, + LEX_CSTRING &table_name_arg, + LEX_CSTRING &field_name_arg, + TABLE_LIST *view_arg): + Item_direct_ref(thd, context_arg, item, table_name_arg, field_name_arg), + item_equal(0), view(view_arg), + null_ref_table(NULL) + { + if (fixed()) + set_null_ref_table(); + } + + bool fix_fields(THD *, Item **) override; + bool eq(const Item *item, bool binary_cmp) const override; + Item *get_tmp_table_item(THD *thd) override + { + if (const_item()) + return copy_or_same(thd); + Item *item= Item_ref::get_tmp_table_item(thd); + item->name= name; + return item; + } + Ref_Type ref_type() override { return VIEW_REF; } + Item_equal *get_item_equal() override { return item_equal; } + void set_item_equal(Item_equal *item_eq) override { item_equal= item_eq; } + Item_equal *find_item_equal(COND_EQUAL *cond_equal) override; + Item* propagate_equal_fields(THD *, const Context &, COND_EQUAL *) override; + Item *replace_equal_field(THD *thd, uchar *arg) override; + table_map used_tables() const override; + void update_used_tables() override; + table_map not_null_tables() const override; + bool const_item() const override + { + return (*ref)->const_item() && (null_ref_table == NO_NULL_TABLE); + } + TABLE *get_null_ref_table() const { return null_ref_table; } + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + return (*ref)->walk(processor, walk_subquery, arg) || + (this->*processor)(arg); + } + bool view_used_tables_processor(void *arg) override + { + TABLE_LIST *view_arg= (TABLE_LIST *) arg; + if (view_arg == view) + view_arg->view_used_tables|= (*ref)->used_tables(); + return 0; + } + bool excl_dep_on_table(table_map tab_map) override; + bool excl_dep_on_grouping_fields(st_select_lex *sel) override; + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) override; + Item *derived_field_transformer_for_having(THD *thd, uchar *arg) override; + Item *derived_field_transformer_for_where(THD *thd, uchar *arg) override; + Item *grouping_field_transformer_for_where(THD *thd, uchar *arg) override; + Item *in_subq_field_transformer_for_where(THD *thd, uchar *arg) override; + Item *in_subq_field_transformer_for_having(THD *thd, uchar *arg) override; + + void save_val(Field *to) override + { + if (check_null_ref()) + to->set_null(); + else + Item_direct_ref::save_val(to); + } + double val_real() override + { + if (check_null_ref()) + return 0; + else + return Item_direct_ref::val_real(); + } + longlong val_int() override + { + if (check_null_ref()) + return 0; + else + return Item_direct_ref::val_int(); + } + String *val_str(String* tmp) override + { + if (check_null_ref()) + return NULL; + else + return Item_direct_ref::val_str(tmp); + } + bool val_native(THD *thd, Native *to) override + { + if (check_null_ref()) + return true; + return Item_direct_ref::val_native(thd, to); + } + my_decimal *val_decimal(my_decimal *tmp) override + { + if (check_null_ref()) + return NULL; + else + return Item_direct_ref::val_decimal(tmp); + } + bool val_bool() override + { + if (check_null_ref()) + return 0; + else + return Item_direct_ref::val_bool(); + } + bool is_null() override + { + if (check_null_ref()) + return 1; + else + return Item_direct_ref::is_null(); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + if (check_null_ref()) + { + bzero((char*) ltime,sizeof(*ltime)); + return 1; + } + return Item_direct_ref::get_date(thd, ltime, fuzzydate); + } + longlong val_time_packed(THD *thd) override + { + if (check_null_ref()) + return 0; + else + return Item_direct_ref::val_time_packed(thd); + } + longlong val_datetime_packed(THD *thd) override + { + if (check_null_ref()) + return 0; + else + return Item_direct_ref::val_datetime_packed(thd); + } + bool send(Protocol *protocol, st_value *buffer) override; + void save_org_in_field(Field *field, fast_field_copier) override + { + if (check_null_ref()) + field->set_null(); + else + Item_direct_ref::save_val(field); + } + void save_in_result_field(bool no_conversions) override + { + if (check_null_ref()) + result_field->set_null(); + else + Item_direct_ref::save_in_result_field(no_conversions); + } + + int save_in_field(Field *field, bool no_conversions) override + { + if (check_null_ref()) + return set_field_to_null_with_conversions(field, no_conversions); + + return Item_direct_ref::save_in_field(field, no_conversions); + } + + void cleanup() override + { + null_ref_table= NULL; + item_equal= NULL; + Item_direct_ref::cleanup(); + } + /* + TODO move these val_*_result function to Item_direct_ref (maybe) + */ + double val_result() override; + longlong val_int_result() override; + String *str_result(String* tmp) override; + my_decimal *val_decimal_result(my_decimal *val) override; + bool val_bool_result() override; + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *field_transformer_for_having_pushdown(THD *, uchar *) override + { return this; } + Item *remove_item_direct_ref() override { return this; } +}; + + +/* + Class for outer fields. + An object of this class is created when the select where the outer field was + resolved is a grouping one. After it has been fixed the ref field will point + to either an Item_ref or an Item_direct_ref object which will be used to + access the field. + See also comments for the fix_inner_refs() and the + Item_field::fix_outer_field() functions. +*/ + +class Item_sum; +class Item_outer_ref :public Item_direct_ref +{ +public: + Item *outer_ref; + /* The aggregate function under which this outer ref is used, if any. */ + Item_sum *in_sum_func; + /* + TRUE <=> that the outer_ref is already present in the select list + of the outer select. + */ + bool found_in_select_list; + bool found_in_group_by; + Item_outer_ref(THD *thd, Name_resolution_context *context_arg, + Item_field *outer_field_arg): + Item_direct_ref(thd, context_arg, 0, outer_field_arg->table_name, + outer_field_arg->field_name), + outer_ref(outer_field_arg), in_sum_func(0), + found_in_select_list(0), found_in_group_by(0) + { + ref= &outer_ref; + set_properties(); + /* reset flag set in set_properties() */ + base_flags&= ~item_base_t::FIXED; + } + Item_outer_ref(THD *thd, Name_resolution_context *context_arg, Item **item, + const LEX_CSTRING &table_name_arg, LEX_CSTRING &field_name_arg, + bool alias_name_used_arg): + Item_direct_ref(thd, context_arg, item, table_name_arg, field_name_arg, + alias_name_used_arg), + outer_ref(0), in_sum_func(0), found_in_select_list(1), found_in_group_by(0) + {} + void save_in_result_field(bool no_conversions) override + { + outer_ref->save_org_in_field(result_field, NULL); + } + bool fix_fields(THD *, Item **) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + table_map used_tables() const override + { + return (*ref)->const_item() ? 0 : OUTER_REF_TABLE_BIT; + } + table_map not_null_tables() const override { return 0; } + Ref_Type ref_type() override { return OUTER_REF; } + bool check_inner_refs_processor(void * arg) override; +}; + + +class Item_in_subselect; + + +/* + An object of this class: + - Converts val_XXX() calls to ref->val_XXX_result() calls, like Item_ref. + - Sets owner->was_null=TRUE if it has returned a NULL value from any + val_XXX() function. This allows to inject an Item_ref_null_helper + object into subquery and then check if the subquery has produced a row + with NULL value. +*/ + +class Item_ref_null_helper: public Item_ref +{ +protected: + Item_in_subselect* owner; +public: + Item_ref_null_helper(THD *thd, Name_resolution_context *context_arg, + Item_in_subselect* master, Item **item, + const LEX_CSTRING &table_name_arg, + const LEX_CSTRING &field_name_arg): + Item_ref(thd, context_arg, item, table_name_arg, field_name_arg), + owner(master) {} + void save_val(Field *to) override; + double val_real() override; + longlong val_int() override; + String* val_str(String* s) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *to) override; + void print(String *str, enum_query_type query_type) override; + table_map used_tables() const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* + The following class is used to optimize comparing of date and bigint columns + We need to save the original item ('ref') to be able to call + ref->save_in_field(). This is used to create index search keys. + + An instance of Item_int_with_ref may have signed or unsigned integer value. + +*/ + +class Item_int_with_ref :public Item_int +{ + Item *ref; +public: + Item_int_with_ref(THD *thd, longlong i, Item *ref_arg, bool unsigned_arg): + Item_int(thd, i), ref(ref_arg) + { + unsigned_flag= unsigned_arg; + } + int save_in_field(Field *field, bool no_conversions) override + { + return ref->save_in_field(field, no_conversions); + } + Item *clone_item(THD *thd) override; + Item *real_item() override { return ref; } +}; + +#ifdef MYSQL_SERVER +#include "item_sum.h" +#include "item_func.h" +#include "item_row.h" +#include "item_cmpfunc.h" +#include "item_strfunc.h" +#include "item_timefunc.h" +#include "item_subselect.h" +#include "item_xmlfunc.h" +#include "item_jsonfunc.h" +#include "item_create.h" +#include "item_vers.h" +#endif + +/** + Base class to implement typed value caching Item classes + + Item_copy_ classes are very similar to the corresponding Item_ + classes (e.g. Item_copy_string is similar to Item_string) but they add + the following additional functionality to Item_ : + 1. Nullability + 2. Possibility to store the value not only on instantiation time, + but also later. + Item_copy_ classes are a functionality subset of Item_cache_ + classes, as e.g. they don't support comparisons with the original Item + as Item_cache_ classes do. + Item_copy_ classes are used in GROUP BY calculation. + TODO: Item_copy should be made an abstract interface and Item_copy_ + classes should inherit both the respective Item_ class and the interface. + Ideally we should drop Item_copy_ classes altogether and merge + their functionality to Item_cache_ (and these should be made to inherit + from Item_). +*/ + +class Item_copy :public Item, + public Type_handler_hybrid_field_type +{ +protected: + + /** + Type_handler_hybrid_field_type is used to + store the type of the resulting field that would be used to store the data + in the cache. This is to avoid calls to the original item. + */ + + /** The original item that is copied */ + Item *item; + + /** + Constructor of the Item_copy class + + stores metadata information about the original class as well as a + pointer to it. + */ + Item_copy(THD *thd, Item *org): Item(thd) + { + DBUG_ASSERT(org->fixed()); + item= org; + null_value= item->maybe_null(); + copy_flags(item, item_base_t::MAYBE_NULL); + Type_std_attributes::set(item); + name= item->name; + set_handler(item->type_handler()); + } + +public: + + /** + Update the cache with the value of the original item + + This is the method that updates the cached value. + It must be explicitly called by the user of this class to store the value + of the original item in the cache. + */ + virtual void copy() = 0; + + Item *get_item() { return item; } + /** All of the subclasses should have the same type tag */ + Type type() const override { return COPY_STR_ITEM; } + + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + DBUG_ASSERT(0); + return NULL; + } + void make_send_field(THD *thd, Send_field *field) override + { item->make_send_field(thd, field); } + table_map used_tables() const override { return (table_map) 1L; } + bool const_item() const override { return false; } + bool is_null() override { return null_value; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("copy", arg, VCOL_IMPOSSIBLE); + } + + /* + Override the methods below as pure virtual to make sure all the + sub-classes implement them. + */ + String *val_str(String*) override = 0; + my_decimal *val_decimal(my_decimal *) override = 0; + double val_real() override = 0; + longlong val_int() override = 0; + int save_in_field(Field *field, bool no_conversions) override = 0; + bool walk(Item_processor processor, bool walk_subquery, void *args) override + { + return (item->walk(processor, walk_subquery, args)) || + (this->*processor)(args); + } +}; + +/** + Implementation of a string cache. + + Uses Item::str_value for storage +*/ +class Item_copy_string : public Item_copy +{ +public: + Item_copy_string(THD *thd, Item *item_arg): Item_copy(thd, item_arg) {} + + String *val_str(String*) override; + my_decimal *val_decimal(my_decimal *) override; + double val_real() override; + longlong val_int() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_string(thd, ltime, fuzzydate); } + void copy() override; + int save_in_field(Field *field, bool no_conversions) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + We need a separate class Item_copy_timestamp because + TIMESTAMP->string->TIMESTAMP conversion is not round trip safe + near the DST change, e.g. '2010-10-31 02:25:26' can mean: + - my_time_t(1288477526) - summer time in Moscow + - my_time_t(1288481126) - winter time in Moscow, one hour later +*/ +class Item_copy_timestamp: public Item_copy +{ + Timestamp_or_zero_datetime m_value; + bool sane() const { return !null_value || m_value.is_zero_datetime(); } +public: + Item_copy_timestamp(THD *thd, Item *arg): Item_copy(thd, arg) { } + const Type_handler *type_handler() const override + { return &type_handler_timestamp2; } + void copy() override + { + Timestamp_or_zero_datetime_native_null tmp(current_thd, item, false); + null_value= tmp.is_null(); + m_value= tmp.is_null() ? Timestamp_or_zero_datetime() : + Timestamp_or_zero_datetime(tmp); + } + int save_in_field(Field *field, bool) override + { + DBUG_ASSERT(sane()); + if (null_value) + return set_field_to_null(field); + Timestamp_or_zero_datetime_native native(m_value, decimals); + return native.save_in_field(field, decimals); + } + longlong val_int() override + { + DBUG_ASSERT(sane()); + return null_value ? 0 : + m_value.to_datetime(current_thd).to_longlong(); + } + double val_real() override + { + DBUG_ASSERT(sane()); + return null_value ? 0e0 : + m_value.to_datetime(current_thd).to_double(); + } + String *val_str(String *to) override + { + DBUG_ASSERT(sane()); + return null_value ? NULL : + m_value.to_datetime(current_thd).to_string(to, decimals); + } + my_decimal *val_decimal(my_decimal *to) override + { + DBUG_ASSERT(sane()); + return null_value ? NULL : + m_value.to_datetime(current_thd).to_decimal(to); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + DBUG_ASSERT(sane()); + bool res= m_value.to_TIME(thd, ltime, fuzzydate); + DBUG_ASSERT(!res); + return null_value || res; + } + bool val_native(THD *thd, Native *to) override + { + DBUG_ASSERT(sane()); + return null_value || m_value.to_native(to, decimals); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Cached_item_XXX objects are not exactly caches. They do the following: + + Each Cached_item_XXX object has + - its source item + - saved value of the source item + - cmp() method that compares the saved value with the current value of the + source item, and if they were not equal saves item's value into the saved + value. + + TODO: add here: + - a way to save the new value w/o comparison + - a way to do less/equal/greater comparison +*/ + +class Cached_item :public Sql_alloc +{ +public: + bool null_value; + Cached_item() :null_value(0) {} + /* + Compare the cached value with the source value. If not equal, copy + the source value to the cache. + @return + true - Not equal + false - Equal + */ + virtual bool cmp(void)=0; + + /* Compare the cached value with the source value, without copying */ + virtual int cmp_read_only()=0; + + virtual ~Cached_item(); /*line -e1509 */ +}; + +class Cached_item_item : public Cached_item +{ +protected: + Item *item; + + Cached_item_item(Item *arg) : item(arg) {} +public: + void fetch_value_from(Item *new_item) + { + Item *save= item; + item= new_item; + cmp(); + item= save; + } +}; + +class Cached_item_str :public Cached_item_item +{ + uint32 value_max_length; + String value,tmp_value; +public: + Cached_item_str(THD *thd, Item *arg); + bool cmp() override; + int cmp_read_only() override; + ~Cached_item_str(); // Deallocate String:s +}; + + +class Cached_item_real :public Cached_item_item +{ + double value; +public: + Cached_item_real(Item *item_par) :Cached_item_item(item_par),value(0.0) {} + bool cmp() override; + int cmp_read_only() override; +}; + +class Cached_item_int :public Cached_item_item +{ + longlong value; +public: + Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {} + bool cmp() override; + int cmp_read_only() override; +}; + + +class Cached_item_decimal :public Cached_item_item +{ + my_decimal value; +public: + Cached_item_decimal(Item *item_par); + bool cmp() override; + int cmp_read_only() override; +}; + +class Cached_item_field :public Cached_item +{ + uchar *buff; + Field *field; + uint length; + +public: + Cached_item_field(THD *thd, Field *arg_field): field(arg_field) + { + field= arg_field; + /* TODO: take the memory allocation below out of the constructor. */ + buff= (uchar*) thd_calloc(thd, length= field->pack_length()); + } + bool cmp() override; + int cmp_read_only() override; +}; + +class Item_default_value : public Item_field +{ + bool vcol_assignment_ok; + void calculate(); +public: + Item *arg= nullptr; + Item_default_value(THD *thd, Name_resolution_context *context_arg, Item *a, + bool vcol_assignment_arg) + : Item_field(thd, context_arg), + vcol_assignment_ok(vcol_assignment_arg), arg(a) {} + Type type() const override { return DEFAULT_VALUE_ITEM; } + bool eq(const Item *item, bool binary_cmp) const override; + bool fix_fields(THD *, Item **) override; + void cleanup() override; + void print(String *str, enum_query_type query_type) override; + String *val_str(String *str) override; + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *decimal_value) override; + bool get_date(THD *thd, MYSQL_TIME *ltime,date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *to) override; + bool val_native_result(THD *thd, Native *to) override; + longlong val_datetime_packed(THD *thd) override + { return Item::val_datetime_packed(thd); } + longlong val_time_packed(THD *thd) override + { return Item::val_time_packed(thd); } + + /* Result variants */ + double val_result() override; + longlong val_int_result() override; + String *str_result(String* tmp) override; + my_decimal *val_decimal_result(my_decimal *val) override; + bool val_bool_result() override; + bool is_null_result() override; + bool get_date_result(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + override; + + bool send(Protocol *protocol, st_value *buffer) override; + int save_in_field(Field *field_arg, bool no_conversions) override; + void save_in_result_field(bool no_conversions) override; + bool save_in_param(THD *, Item_param *param) override + { + // It should not be possible to have "EXECUTE .. USING DEFAULT(a)" + DBUG_ASSERT(0); + param->set_default(); + return false; + } + table_map used_tables() const override; + void update_used_tables() override + { + if (field && field->default_value) + field->default_value->expr->update_used_tables(); + } + bool vcol_assignment_allowed_value() const override + { return vcol_assignment_ok; } + Item *get_tmp_table_item(THD *) override { return this; } + Item_field *field_for_view_update() override { return nullptr; } + bool update_vcol_processor(void *) override { return false; } + bool check_field_expression_processor(void *arg) override; + bool check_func_default_processor(void *) override { return true; } + bool update_func_default_processor(void *arg) override; + bool register_field_in_read_map(void *arg) override; + bool walk(Item_processor processor, bool walk_subquery, void *args) override + { + return (arg && arg->walk(processor, walk_subquery, args)) || + (this->*processor)(args); + } + Item *transform(THD *thd, Item_transformer transformer, uchar *args) + override; + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override; +}; + + +class Item_contextually_typed_value_specification: public Item +{ +public: + Item_contextually_typed_value_specification(THD *thd) :Item(thd) + { } + Type type() const override { return CONTEXTUALLY_TYPED_VALUE_ITEM; } + bool vcol_assignment_allowed_value() const override { return true; } + bool eq(const Item *item, bool binary_cmp) const override { return false; } + bool is_evaluable_expression() const override { return false; } + Field *create_tmp_field_ex(MEM_ROOT *, + TABLE *, Tmp_field_src *, + const Tmp_field_param *) override + { + DBUG_ASSERT(0); + return NULL; + } + String *val_str(String *str) override + { + DBUG_ASSERT(0); // never should be called + null_value= true; + return 0; + } + double val_real() override + { + DBUG_ASSERT(0); // never should be called + null_value= true; + return 0.0; + } + longlong val_int() override + { + DBUG_ASSERT(0); // never should be called + null_value= true; + return 0; + } + my_decimal *val_decimal(my_decimal *) override + { + DBUG_ASSERT(0); // never should be called + null_value= true; + return 0; + } + bool get_date(THD *, MYSQL_TIME *, date_mode_t) override + { + DBUG_ASSERT(0); // never should be called + return (null_value= true); + } + bool send(Protocol *, st_value *) override + { + DBUG_ASSERT(0); + return true; + } + const Type_handler *type_handler() const override + { + DBUG_ASSERT(0); + return &type_handler_null; + } +}; + + +/* + ::= DEFAULT +*/ +class Item_default_specification: + public Item_contextually_typed_value_specification +{ +public: + Item_default_specification(THD *thd) + :Item_contextually_typed_value_specification(thd) + { } + void print(String *str, enum_query_type) override + { + str->append(STRING_WITH_LEN("default")); + } + bool check_assignability_to(const Field *to, bool ignore) const override + { + return false; + } + int save_in_field(Field *field_arg, bool) override + { + return field_arg->save_in_field_default_value(false); + } + bool save_in_param(THD *, Item_param *param) override + { + param->set_default(); + return false; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + This class is used as bulk parameter INGNORE representation. + + It just do nothing when assigned to a field + + This is a non-standard MariaDB extension. +*/ + +class Item_ignore_specification: + public Item_contextually_typed_value_specification +{ +public: + Item_ignore_specification(THD *thd) + :Item_contextually_typed_value_specification(thd) + { } + void print(String *str, enum_query_type) override + { + str->append(STRING_WITH_LEN("ignore")); + } + bool check_assignability_to(const Field *to, bool ignore) const override + { + return false; + } + int save_in_field(Field *field_arg, bool) override + { + return field_arg->save_in_field_ignore_value(false); + } + bool save_in_param(THD *, Item_param *param) override + { + param->set_ignore(); + return false; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Item_insert_value -- an implementation of VALUES() function. + You can use the VALUES(col_name) function in the UPDATE clause + to refer to column values from the INSERT portion of the INSERT + ... UPDATE statement. In other words, VALUES(col_name) in the + UPDATE clause refers to the value of col_name that would be + inserted, had no duplicate-key conflict occurred. + In all other places this function returns NULL. +*/ + +class Item_insert_value : public Item_field +{ +public: + Item *arg; + Item_insert_value(THD *thd, Name_resolution_context *context_arg, Item *a) + :Item_field(thd, context_arg), + arg(a) {} + bool eq(const Item *item, bool binary_cmp) const override; + bool fix_fields(THD *, Item **) override; + void print(String *str, enum_query_type query_type) override; + int save_in_field(Field *field_arg, bool no_conversions) override + { + return Item_field::save_in_field(field_arg, no_conversions); + } + Type type() const override { return INSERT_VALUE_ITEM; } + /* + We use RAND_TABLE_BIT to prevent Item_insert_value from + being treated as a constant and precalculated before execution + */ + table_map used_tables() const override { return RAND_TABLE_BIT; } + + Item_field *field_for_view_update() override { return nullptr; } + + bool walk(Item_processor processor, bool walk_subquery, void *args) override + { + return arg->walk(processor, walk_subquery, args) || + (this->*processor)(args); + } + bool check_partition_func_processor(void *) override { return true; } + bool update_vcol_processor(void *) override { return false; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("value()", arg, VCOL_IMPOSSIBLE); + } +}; + + +class Table_triggers_list; + +/* + Represents NEW/OLD version of field of row which is + changed/read in trigger. + + Note: For this item main part of actual binding to Field object happens + not during fix_fields() call (like for Item_field) but right after + parsing of trigger definition, when table is opened, with special + setup_field() call. On fix_fields() stage we simply choose one of + two Field instances representing either OLD or NEW version of this + field. +*/ +class Item_trigger_field : public Item_field, + private Settable_routine_parameter +{ +private: + GRANT_INFO *table_grants; +public: + /* Next in list of all Item_trigger_field's in trigger */ + Item_trigger_field *next_trg_field; + /* Pointer to Table_trigger_list object for table of this trigger */ + Table_triggers_list *triggers; + /* Is this item represents row from NEW or OLD row ? */ + enum __attribute__((packed)) row_version_type {OLD_ROW, NEW_ROW}; + row_version_type row_version; + /* Index of the field in the TABLE::field array */ + field_index_t field_idx; + +private: + /* + Trigger field is read-only unless it belongs to the NEW row in a + BEFORE INSERT of BEFORE UPDATE trigger. + */ + bool read_only; + + /* + 'want_privilege' holds privileges required to perform operation on + this trigger field (SELECT_ACL if we are going to read it and + UPDATE_ACL if we are going to update it). It is initialized at + parse time but can be updated later if this trigger field is used + as OUT or INOUT parameter of stored routine (in this case + set_required_privilege() is called to appropriately update + want_privilege and cleanup() is responsible for restoring of + original want_privilege once parameter's value is updated). + */ + privilege_t original_privilege; + privilege_t want_privilege; +public: + +Item_trigger_field(THD *thd, Name_resolution_context *context_arg, + row_version_type row_ver_arg, + const LEX_CSTRING &field_name_arg, + privilege_t priv, const bool ro) + :Item_field(thd, context_arg, field_name_arg), + table_grants(NULL), next_trg_field(NULL), triggers(NULL), + row_version(row_ver_arg), field_idx(NO_CACHED_FIELD_INDEX), + read_only (ro), original_privilege(priv), want_privilege(priv) + { + } + void setup_field(THD *thd, TABLE *table, GRANT_INFO *table_grant_info); + Type type() const override { return TRIGGER_FIELD_ITEM; } + bool eq(const Item *item, bool binary_cmp) const override; + bool fix_fields(THD *, Item **) override; + void print(String *str, enum_query_type query_type) override; + table_map used_tables() const override { return (table_map)0L; } + Field *get_tmp_table_field() override { return nullptr; } + Item *copy_or_same(THD *) override { return this; } + Item *get_tmp_table_item(THD *thd) override { return copy_or_same(thd); } + void cleanup() override; + +private: + void set_required_privilege(bool rw) override; + bool set_value(THD *thd, sp_rcontext *ctx, Item **it) override; + +public: + Settable_routine_parameter *get_settable_routine_parameter() override + { + return read_only ? nullptr : this; + } + + bool set_value(THD *thd, Item **it) + { + return set_value(thd, NULL, it); + } + +public: + bool unknown_splocal_processor(void *) override { return false; } + bool check_vcol_func_processor(void *arg) override; +}; + + +/** + @todo + Implement the is_null() method for this class. Currently calling is_null() + on any Item_cache object resolves to Item::is_null(), which returns FALSE + for any value. +*/ + +class Item_cache: public Item_fixed_hybrid, + public Type_handler_hybrid_field_type +{ +protected: + Item *example; + /** + Field that this object will get value from. This is used by + index-based subquery engines to detect and remove the equality injected + by IN->EXISTS transformation. + */ + Field *cached_field; + /* + TRUE <=> cache holds value of the last stored item (i.e actual value). + store() stores item to be cached and sets this flag to FALSE. + On the first call of val_xxx function if this flag is set to FALSE the + cache_value() will be called to actually cache value of saved item. + cache_value() will set this flag to TRUE. + */ + bool value_cached; + + table_map used_table_map; +public: + /* + This is set if at least one of the values of a sub query is NULL + Item_cache_row returns this with null_inside(). + For not row items, it's set to the value of null_value + It is set after cache_value() is called. + */ + bool null_value_inside; + + Item_cache(THD *thd): + Item_fixed_hybrid(thd), + Type_handler_hybrid_field_type(&type_handler_string), + example(0), cached_field(0), + value_cached(0), + used_table_map(0) + { + set_maybe_null(); + null_value= 1; + null_value_inside= true; + quick_fix_field(); + } +protected: + Item_cache(THD *thd, const Type_handler *handler): + Item_fixed_hybrid(thd), + Type_handler_hybrid_field_type(handler), + example(0), cached_field(0), + value_cached(0), + used_table_map(0) + { + set_maybe_null(); + null_value= 1; + null_value_inside= true; + quick_fix_field(); + } + +public: + virtual bool allocate(THD *thd, uint i) { return 0; } + virtual bool setup(THD *thd, Item *item) + { + example= item; + Type_std_attributes::set(item); + if (item->type() == FIELD_ITEM) + cached_field= ((Item_field *)item)->field; + return 0; + }; + + void set_used_tables(table_map map) { used_table_map= map; } + table_map used_tables() const override { return used_table_map; } + Type type() const override { return CACHE_ITEM; } + + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + return create_tmp_field_ex_simple(root, table, src, param); + } + + virtual void keep_array() {} +#ifndef DBUG_OFF + bool is_array_kept() { return TRUE; } +#endif + void print(String *str, enum_query_type query_type) override; + bool eq_def(const Field *field) + { + return cached_field ? cached_field->eq_def (field) : FALSE; + } + bool eq(const Item *item, bool binary_cmp) const override + { + return this == item; + } + bool check_vcol_func_processor(void *arg) override + { + if (example) + { + Item::vcol_func_processor_result *res= + (Item::vcol_func_processor_result*) arg; + example->check_vcol_func_processor(arg); + /* + Item_cache of a non-deterministic function requires re-fixing + even if the function itself doesn't (e.g. CURRENT_TIMESTAMP) + */ + if (res->errors & VCOL_NOT_STRICTLY_DETERMINISTIC) + res->errors|= VCOL_SESSION_FUNC; + return false; + } + return mark_unsupported_function("cache", arg, VCOL_IMPOSSIBLE); + } + bool fix_fields(THD *thd, Item **ref) override + { + quick_fix_field(); + if (example && !example->fixed()) + return example->fix_fields(thd, ref); + return 0; + } + void cleanup() override + { + clear(); + Item_fixed_hybrid::cleanup(); + } + /** + Check if saved item has a non-NULL value. + Will cache value of saved item if not already done. + @return TRUE if cached value is non-NULL. + */ + bool has_value() + { + return (value_cached || cache_value()) && !null_value; + } + + virtual void store(Item *item); + virtual Item *get_item() { return example; } + virtual bool cache_value()= 0; + bool basic_const_item() const override + { return example && example->basic_const_item(); } + virtual void clear() { null_value= TRUE; value_cached= FALSE; } + bool is_null() override { return !has_value(); } + bool is_expensive() override + { + if (value_cached) + return false; + return example->is_expensive(); + } + bool is_expensive_processor(void *arg) override + { + DBUG_ASSERT(example); + if (value_cached) + return false; + return example->is_expensive_processor(arg); + } + virtual void set_null(); + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + if (arg == STOP_PTR) + return FALSE; + if (example && example->walk(processor, walk_subquery, arg)) + return TRUE; + return (this->*processor)(arg); + } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override; + void split_sum_func2_example(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) + { + example->split_sum_func2(thd, ref_pointer_array, fields, &example, flags); + } + Item *get_example() const { return example; } + + virtual Item *convert_to_basic_const_item(THD *thd) { return 0; }; + Item *derived_field_transformer_for_having(THD *thd, uchar *) override + { return convert_to_basic_const_item(thd); } + Item *derived_field_transformer_for_where(THD *thd, uchar *) override + { return convert_to_basic_const_item(thd); } + Item *grouping_field_transformer_for_where(THD *thd, uchar *) override + { return convert_to_basic_const_item(thd); } + Item *in_subq_field_transformer_for_where(THD *thd, uchar *) override + { return convert_to_basic_const_item(thd); } + Item *in_subq_field_transformer_for_having(THD *thd, uchar *) override + { return convert_to_basic_const_item(thd); } +}; + + +class Item_cache_int: public Item_cache +{ +protected: + longlong value; +public: + Item_cache_int(THD *thd, const Type_handler *handler): + Item_cache(thd, handler), value(0) {} + + double val_real() override; + longlong val_int() override; + String* val_str(String *str) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_int(thd, ltime, fuzzydate); } + bool cache_value() override; + int save_in_field(Field *field, bool no_conversions) override; + Item *convert_to_basic_const_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_cache_year: public Item_cache_int +{ +public: + Item_cache_year(THD *thd, const Type_handler *handler) + :Item_cache_int(thd, handler) { } + bool get_date(THD *thd, MYSQL_TIME *to, date_mode_t mode) + { + return type_handler_year.Item_get_date_with_warn(thd, this, to, mode); + } +}; + + +class Item_cache_temporal: public Item_cache_int +{ +protected: + Item_cache_temporal(THD *thd, const Type_handler *handler); +public: + bool cache_value() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + int save_in_field(Field *field, bool no_conversions) override; + bool setup(THD *thd, Item *item) override + { + if (Item_cache_int::setup(thd, item)) + return true; + set_if_smaller(decimals, TIME_SECOND_PART_DIGITS); + return false; + } + void store_packed(longlong val_arg, Item *example); + /* + Having a clone_item method tells optimizer that this object + is a constant and need not be optimized further. + Important when storing packed datetime values. + */ + Item *clone_item(THD *thd) override; + Item *convert_to_basic_const_item(THD *thd) override; + virtual Item *make_literal(THD *) =0; +}; + + +class Item_cache_time: public Item_cache_temporal +{ +public: + Item_cache_time(THD *thd) + :Item_cache_temporal(thd, &type_handler_time2) { } + bool cache_value() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *make_literal(THD *) override; + longlong val_datetime_packed(THD *thd) override + { + Datetime::Options_cmp opt(thd); + return has_value() ? Datetime(thd, this, opt).to_packed() : 0; + } + longlong val_time_packed(THD *) override + { + return has_value() ? value : 0; + } + longlong val_int() override + { + return has_value() ? Time(this).to_longlong() : 0; + } + double val_real() override + { + return has_value() ? Time(this).to_double() : 0; + } + String *val_str(String *to) override + { + return has_value() ? Time(this).to_string(to, decimals) : NULL; + } + my_decimal *val_decimal(my_decimal *to) override + { + return has_value() ? Time(this).to_decimal(to) : NULL; + } + bool val_native(THD *thd, Native *to) override + { + return has_value() ? Time(thd, this).to_native(to, decimals) : true; + } +}; + + +class Item_cache_datetime: public Item_cache_temporal +{ +public: + Item_cache_datetime(THD *thd) + :Item_cache_temporal(thd, &type_handler_datetime2) { } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *make_literal(THD *) override; + longlong val_datetime_packed(THD *) override + { + return has_value() ? value : 0; + } + longlong val_time_packed(THD *thd) override + { + return Time(thd, this, Time::Options_cmp(thd)).to_packed(); + } + longlong val_int() override + { + return has_value() ? Datetime(this).to_longlong() : 0; + } + double val_real() override + { + return has_value() ? Datetime(this).to_double() : 0; + } + String *val_str(String *to) override + { + return has_value() ? Datetime(this).to_string(to, decimals) : NULL; + } + my_decimal *val_decimal(my_decimal *to) override + { + return has_value() ? Datetime(this).to_decimal(to) : NULL; + } +}; + + +class Item_cache_date: public Item_cache_temporal +{ +public: + Item_cache_date(THD *thd) + :Item_cache_temporal(thd, &type_handler_newdate) { } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *make_literal(THD *) override; + longlong val_datetime_packed(THD *) override + { + return has_value() ? value : 0; + } + longlong val_time_packed(THD *thd) override + { + return Time(thd, this, Time::Options_cmp(thd)).to_packed(); + } + longlong val_int() override + { return has_value() ? Date(this).to_longlong() : 0; } + double val_real() override + { return has_value() ? Date(this).to_double() : 0; } + String *val_str(String *to) override + { + return has_value() ? Date(this).to_string(to) : NULL; + } + my_decimal *val_decimal(my_decimal *to) override + { + return has_value() ? Date(this).to_decimal(to) : NULL; + } +}; + + +class Item_cache_timestamp: public Item_cache +{ + Timestamp_or_zero_datetime_native m_native; + Datetime to_datetime(THD *thd); +public: + Item_cache_timestamp(THD *thd) + :Item_cache(thd, &type_handler_timestamp2) { } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool cache_value() override; + String* val_str(String *to) override + { + return to_datetime(current_thd).to_string(to, decimals); + } + my_decimal *val_decimal(my_decimal *to) override + { + return to_datetime(current_thd).to_decimal(to); + } + longlong val_int() override + { + return to_datetime(current_thd).to_longlong(); + } + double val_real() override + { + return to_datetime(current_thd).to_double(); + } + longlong val_datetime_packed(THD *thd) override + { + return to_datetime(thd).to_packed(); + } + longlong val_time_packed(THD *) override + { + DBUG_ASSERT(0); + return 0; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + int save_in_field(Field *field, bool no_conversions) override; + bool val_native(THD *thd, Native *to) override; +}; + + +class Item_cache_real: public Item_cache +{ +protected: + double value; +public: + Item_cache_real(THD *thd, const Type_handler *h) + :Item_cache(thd, h), + value(0) + {} + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_real(thd, ltime, fuzzydate); } + bool cache_value() override; + Item *convert_to_basic_const_item(THD *thd) override; +}; + + +class Item_cache_double: public Item_cache_real +{ +public: + Item_cache_double(THD *thd) + :Item_cache_real(thd, &type_handler_double) + { } + String *val_str(String *str) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_cache_float: public Item_cache_real +{ +public: + Item_cache_float(THD *thd) + :Item_cache_real(thd, &type_handler_float) + { } + String *val_str(String *str) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_cache_decimal: public Item_cache +{ +protected: + my_decimal decimal_value; +public: + Item_cache_decimal(THD *thd): Item_cache(thd, &type_handler_newdecimal) {} + + double val_real() override; + longlong val_int() override; + String* val_str(String *str) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *to, date_mode_t mode) override + { + return decimal_to_datetime_with_warn(thd, VDec(this).ptr(), to, mode, + NULL, NULL); + } + bool cache_value() override; + Item *convert_to_basic_const_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_cache_str: public Item_cache +{ + char buffer[STRING_BUFFER_USUAL_SIZE]; + String *value, value_buff; + bool is_varbinary; + +public: + Item_cache_str(THD *thd, const Item *item): + Item_cache(thd, item->type_handler()), value(0), + is_varbinary(item->type() == FIELD_ITEM && + Item_cache_str::field_type() == MYSQL_TYPE_VARCHAR && + !((const Item_field *) item)->field->has_charset()) + { + collation.set(const_cast(item->collation)); + } + double val_real() override; + longlong val_int() override; + String* val_str(String *) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_string(thd, ltime, fuzzydate); } + CHARSET_INFO *charset() const { return value->charset(); }; + int save_in_field(Field *field, bool no_conversions) override; + bool cache_value() override; + Item *convert_to_basic_const_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_cache_str_for_nullif: public Item_cache_str +{ +public: + Item_cache_str_for_nullif(THD *thd, const Item *item) + :Item_cache_str(thd, item) + { } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) override + { + /** + Item_cache_str::safe_charset_converter() returns a new Item_cache + with Item_func_conv_charset installed on "example". The original + Item_cache is not referenced (neither directly nor recursively) + from the result of Item_cache_str::safe_charset_converter(). + + For NULLIF() purposes we need a different behavior: + we need a new instance of Item_func_conv_charset, + with the original Item_cache referenced in args[0]. See MDEV-9181. + */ + return Item::safe_charset_converter(thd, tocs); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_cache_row: public Item_cache +{ + Item_cache **values; + uint item_count; + bool save_array; +public: + Item_cache_row(THD *thd): + Item_cache(thd, &type_handler_row), values(0), item_count(2), + save_array(0) {} + + /* + 'allocate' used only in row transformer, to preallocate space for row + cache. + */ + bool allocate(THD *thd, uint num) override; + /* + 'setup' is needed only by row => it not called by simple row subselect + (only by IN subselect (in subselect optimizer)) + */ + bool setup(THD *thd, Item *item) override; + void store(Item *item) override; + void illegal_method_call(const char *); + void make_send_field(THD *, Send_field *) override + { + illegal_method_call("make_send_field"); + }; + double val_real() override + { + illegal_method_call("val"); + return 0; + }; + longlong val_int() override + { + illegal_method_call("val_int"); + return 0; + }; + String *val_str(String *) override + { + illegal_method_call("val_str"); + return nullptr; + }; + my_decimal *val_decimal(my_decimal *) override + { + illegal_method_call("val_decimal"); + return nullptr; + }; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + illegal_method_call("val_decimal"); + return true; + } + + uint cols() const override { return item_count; } + Item *element_index(uint i) override { return values[i]; } + Item **addr(uint i) override { return (Item **) (values + i); } + bool check_cols(uint c) override; + bool null_inside() override; + void bring_value() override; + void keep_array() override { save_array= 1; } +#ifndef DBUG_OFF + bool is_array_kept() { return save_array; } +#endif + void cleanup() override + { + DBUG_ENTER("Item_cache_row::cleanup"); + Item_cache::cleanup(); + if (!save_array) + values= 0; + DBUG_VOID_RETURN; + } + bool cache_value() override; + void set_null() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Item_type_holder used to store type. name, length of Item for UNIONS & + derived tables. + + Item_type_holder do not need cleanup() because its time of live limited by + single SP/PS execution. +*/ +class Item_type_holder: public Item, public Type_handler_hybrid_field_type +{ +protected: + const TYPELIB *enum_set_typelib; +public: + Item_type_holder(THD *thd, Item *item, const Type_handler *handler, + const Type_all_attributes *attr, bool maybe_null_arg) + :Item(thd), Type_handler_hybrid_field_type(handler), + enum_set_typelib(attr->get_typelib()) + { + name= item->name; + Type_std_attributes::set(*attr); + set_maybe_null(maybe_null_arg); + copy_flags(item, item_base_t::IS_EXPLICIT_NAME | + item_base_t::IS_IN_WITH_CYCLE); + } + + const Type_handler *type_handler() const override + { + return Type_handler_hybrid_field_type::type_handler()-> + type_handler_for_item_field(); + } + const Type_handler *real_type_handler() const override + { + return Type_handler_hybrid_field_type::type_handler(); + } + + Type type() const override { return TYPE_HOLDER; } + const TYPELIB *get_typelib() const override { return enum_set_typelib; } + /* + When handling a query like this: + VALUES ('') UNION VALUES( _utf16 0x0020 COLLATE utf16_bin); + Item_type_holder can be passed to + Type_handler_xxx::Item_hybrid_func_fix_attributes() + We don't want the latter to perform character set conversion of a + Item_type_holder by calling its val_str(), which calls DBUG_ASSERT(0). + Let's override const_item() and is_expensive() to avoid this. + Note, Item_hybrid_func_fix_attributes() could probably + have a new argument to distinguish what we need: + - (a) aggregate data type attributes only + - (b) install converters after attribute aggregation + So st_select_lex_unit::join_union_type_attributes() could + ask it to do (a) only, without (b). + */ + bool const_item() const override { return false; } + bool is_expensive() override { return true; } + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + String *val_str(String*) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + return Item_type_holder::real_type_handler()-> + make_and_init_table_field(root, &name, Record_addr(maybe_null()), + *this, table); + } + Item* get_copy(THD *) override { return nullptr; } +}; + + +class st_select_lex; +void mark_select_range_as_dependent(THD *thd, + st_select_lex *last_select, + st_select_lex *current_sel, + Field *found_field, Item *found_item, + Item_ident *resolved_item, + bool suppress_warning_output); + +extern Cached_item *new_Cached_item(THD *thd, Item *item, + bool pass_through_ref); +extern Item_result item_cmp_type(Item_result a,Item_result b); +extern void resolve_const_item(THD *thd, Item **ref, Item *cmp_item); +extern int stored_field_cmp_to_item(THD *thd, Field *field, Item *item); + +extern const String my_null_string; + +/** + Interface for Item iterator +*/ + +class Item_iterator +{ +public: + /** + Shall set this iterator to the position before the first item + + @note + This method also may perform some other initialization actions like + allocation of certain resources. + */ + virtual void open()= 0; + /** + Shall return the next Item (or NULL if there is no next item) and + move pointer to position after it. + */ + virtual Item *next()= 0; + /** + Shall force iterator to free resources (if it holds them) + + @note + One should not use the iterator without open() call after close() + */ + virtual void close()= 0; + + virtual ~Item_iterator() = default; +}; + + +/** + Item iterator over List_iterator_fast for Item references +*/ + +class Item_iterator_ref_list: public Item_iterator +{ + List_iterator list; +public: + Item_iterator_ref_list(List_iterator &arg_list): + list(arg_list) {} + void open() { list.rewind(); } + Item *next() { return *(list++); } + void close() {} +}; + + +/** + Item iterator over List_iterator_fast for Items +*/ + +class Item_iterator_list: public Item_iterator +{ + List_iterator list; +public: + Item_iterator_list(List_iterator &arg_list): + list(arg_list) {} + void open() { list.rewind(); } + Item *next() { return (list++); } + void close() {} +}; + + +/** + Item iterator over Item interface for rows +*/ + +class Item_iterator_row: public Item_iterator +{ + Item *base_item; + uint current; +public: + Item_iterator_row(Item *base) : base_item(base), current(0) {} + void open() { current= 0; } + Item *next() + { + if (current >= base_item->cols()) + return NULL; + return base_item->element_index(current++); + } + void close() {} +}; + + +/* + fix_escape_item() sets the out "escape" parameter to: + - native code in case of an 8bit character set + - Unicode code point in case of a multi-byte character set + + The value meaning a not-initialized ESCAPE character must not be equal to + any valid value, so must be outside of these ranges: + - -128..+127, not to conflict with a valid 8bit charcter + - 0..0x10FFFF, not to conflict with a valid Unicode code point + The exact value does not matter. +*/ +#define ESCAPE_NOT_INITIALIZED -1000 + +/* + It's used in ::fix_fields() methods of LIKE and JSON_SEARCH + functions to handle the ESCAPE parameter. + This parameter is quite non-standard so the specific function. +*/ +bool fix_escape_item(THD *thd, Item *escape_item, String *tmp_str, + bool escape_used_in_parsing, CHARSET_INFO *cmp_cs, + int *escape); + +inline bool Virtual_column_info::is_equal(const Virtual_column_info* vcol) const +{ + return type_handler() == vcol->type_handler() + && stored_in_db == vcol->is_stored() + && expr->eq(vcol->expr, true); +} + +inline void Virtual_column_info::print(String* str) +{ + expr->print_for_table_def(str); +} + +class Item_direct_ref_to_item : public Item_direct_ref +{ + Item *m_item; +public: + Item_direct_ref_to_item(THD *thd, Item *item); + + void change_item(THD *thd, Item *); + + bool fix_fields(THD *thd, Item **it); + + void print(String *str, enum_query_type query_type); + + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs); + Item *get_tmp_table_item(THD *thd) + { return m_item->get_tmp_table_item(thd); } + Item *get_copy(THD *thd) + { return get_item_copy(thd, this); } + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) + { + return m_item->build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); + } + const char *full_name() const { return m_item->full_name(); } + void make_send_field(THD *thd, Send_field *field) + { m_item->make_send_field(thd, field); } + bool eq(const Item *item, bool binary_cmp) const + { + const Item *it= item->real_item(); + return m_item->eq(it, binary_cmp); + } + void fix_after_pullout(st_select_lex *new_parent, Item **refptr, bool merge) + { m_item->fix_after_pullout(new_parent, &m_item, merge); } + void save_val(Field *to) + { return m_item->save_val(to); } + void save_result(Field *to) + { return m_item->save_result(to); } + int save_in_field(Field *to, bool no_conversions) + { return m_item->save_in_field(to, no_conversions); } + const Type_handler *type_handler() const { return m_item->type_handler(); } + table_map used_tables() const { return m_item->used_tables(); } + void update_used_tables() + { m_item->update_used_tables(); } + bool const_item() const { return m_item->const_item(); } + table_map not_null_tables() const { return m_item->not_null_tables(); } + bool walk(Item_processor processor, bool walk_subquery, void *arg) + { + return m_item->walk(processor, walk_subquery, arg) || + (this->*processor)(arg); + } + bool enumerate_field_refs_processor(void *arg) + { return m_item->enumerate_field_refs_processor(arg); } + Item_field *field_for_view_update() + { return m_item->field_for_view_update(); } + + /* Row emulation: forwarding of ROW-related calls to orig_item */ + uint cols() const + { return m_item->cols(); } + Item* element_index(uint i) + { return this; } + Item** addr(uint i) + { return &m_item; } + bool check_cols(uint c) + { return Item::check_cols(c); } + bool null_inside() + { return m_item->null_inside(); } + void bring_value() + {} + + Item_equal *get_item_equal() { return m_item->get_item_equal(); } + void set_item_equal(Item_equal *item_eq) { m_item->set_item_equal(item_eq); } + Item_equal *find_item_equal(COND_EQUAL *cond_equal) + { return m_item->find_item_equal(cond_equal); } + Item *propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + { return m_item->propagate_equal_fields(thd, ctx, cond); } + Item *replace_equal_field(THD *thd, uchar *arg) + { return m_item->replace_equal_field(thd, arg); } + + bool excl_dep_on_table(table_map tab_map) + { return m_item->excl_dep_on_table(tab_map); } + bool excl_dep_on_grouping_fields(st_select_lex *sel) + { return m_item->excl_dep_on_grouping_fields(sel); } + bool is_expensive() { return m_item->is_expensive(); } + void set_item(Item *item) { m_item= item; } + Item *build_clone(THD *thd) + { + Item *clone_item= m_item->build_clone(thd); + if (clone_item) + { + Item_direct_ref_to_item *copy= (Item_direct_ref_to_item *) get_copy(thd); + if (!copy) + return 0; + copy->set_item(clone_item); + return copy; + } + return 0; + } + + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) + { + m_item->split_sum_func(thd, ref_pointer_array, fields, flags); + } + /* + This processor states that this is safe for virtual columns + (because this Item transparency) + */ + bool check_vcol_func_processor(void *arg) { return FALSE;} +}; + +inline bool TABLE::mark_column_with_deps(Field *field) +{ + bool res; + if (!(res= bitmap_fast_test_and_set(read_set, field->field_index))) + { + if (field->vcol_info) + mark_virtual_column_deps(field); + } + return res; +} + +inline bool TABLE::mark_virtual_column_with_deps(Field *field) +{ + bool res; + DBUG_ASSERT(field->vcol_info); + if (!(res= bitmap_fast_test_and_set(read_set, field->field_index))) + mark_virtual_column_deps(field); + return res; +} + +inline void TABLE::mark_virtual_column_deps(Field *field) +{ + DBUG_ASSERT(field->vcol_info); + DBUG_ASSERT(field->vcol_info->expr); + field->vcol_info->expr->walk(&Item::register_field_in_read_map, 1, 0); +} + +inline void TABLE::use_all_stored_columns() +{ + bitmap_set_all(read_set); + if (Field **vf= vfield) + for (; *vf; vf++) + bitmap_clear_bit(read_set, (*vf)->field_index); +} + +#endif /* SQL_ITEM_INCLUDED */ diff --git a/sql/item_buff.cc b/sql/item_buff.cc new file mode 100644 index 00000000..1079394e --- /dev/null +++ b/sql/item_buff.cc @@ -0,0 +1,252 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Buffers to save and compare item values +*/ + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // THD +#include "set_var.h" // Cached_item, Cached_item_field, ... + +/** + Create right type of Cached_item for an item. +*/ + +Cached_item *new_Cached_item(THD *thd, Item *item, bool pass_through_ref) +{ + if (pass_through_ref && item->real_item()->type() == Item::FIELD_ITEM && + !(((Item_field *) (item->real_item()))->field->flags & BLOB_FLAG)) + { + Item_field *real_item= (Item_field *) item->real_item(); + Field *cached_field= real_item->field; + return new (thd->mem_root) Cached_item_field(thd, cached_field); + } + switch (item->result_type()) { + case STRING_RESULT: + return new Cached_item_str(thd, item); + case INT_RESULT: + return new Cached_item_int(item); + case REAL_RESULT: + return new Cached_item_real(item); + case DECIMAL_RESULT: + return new Cached_item_decimal(item); + case ROW_RESULT: + default: + DBUG_ASSERT(0); + return 0; + } +} + +Cached_item::~Cached_item() = default; + +/** + Compare with old value and replace value with new value. + + @return + Return true if values have changed +*/ + +Cached_item_str::Cached_item_str(THD *thd, Item *arg) + :Cached_item_item(arg), + value_max_length(MY_MIN(arg->max_length, thd->variables.max_sort_length)), + value(value_max_length) +{} + +bool Cached_item_str::cmp(void) +{ + String *res; + bool tmp; + + if ((res=item->val_str(&tmp_value))) + res->length(MY_MIN(res->length(), value_max_length)); + if (null_value != item->null_value) + { + if ((null_value= item->null_value)) + return TRUE; // New value was null + tmp=TRUE; + } + else if (null_value) + return 0; // new and old value was null + else + tmp= sortcmp(&value,res,item->collation.collation) != 0; + if (tmp) + value.copy(*res); // Remember for next cmp + return tmp; +} + + +int Cached_item_str::cmp_read_only() +{ + String *res= item->val_str(&tmp_value); + + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + + return sortcmp(&value, res, item->collation.collation); +} + + +Cached_item_str::~Cached_item_str() +{ + item=0; // Safety +} + +bool Cached_item_real::cmp(void) +{ + double nr= item->val_real(); + if (null_value != item->null_value || nr != value) + { + null_value= item->null_value; + value=nr; + return TRUE; + } + return FALSE; +} + + +int Cached_item_real::cmp_read_only() +{ + double nr= item->val_real(); + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + return (nr == value)? 0 : ((nr < value)? 1: -1); +} + + +bool Cached_item_int::cmp(void) +{ + longlong nr=item->val_int(); + if (null_value != item->null_value || nr != value) + { + null_value= item->null_value; + value=nr; + return TRUE; + } + return FALSE; +} + + +int Cached_item_int::cmp_read_only() +{ + longlong nr= item->val_int(); + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + return (nr == value)? 0 : ((nr < value)? 1: -1); +} + + +bool Cached_item_field::cmp(void) +{ + bool tmp= FALSE; // Value is identical + /* Note that field can't be a blob here ! */ + if (null_value != field->is_null()) + { + null_value= !null_value; + tmp= TRUE; // Value has changed + } + + /* + If value is not null and value changed (from null to not null or + because of value change), then copy the new value to buffer. + */ + if (! null_value && (tmp || (tmp= (field->cmp(buff) != 0)))) + field->get_image(buff,length,field->charset()); + return tmp; +} + + +int Cached_item_field::cmp_read_only() +{ + if (null_value) + { + if (field->is_null()) + return 0; + else + return -1; + } + if (field->is_null()) + return 1; + + return field->cmp(buff); +} + + +Cached_item_decimal::Cached_item_decimal(Item *it) + :Cached_item_item(it) +{ + my_decimal_set_zero(&value); +} + + +bool Cached_item_decimal::cmp() +{ + VDec tmp(item); + if (null_value != tmp.is_null() || + (!tmp.is_null() && tmp.cmp(&value))) + { + null_value= tmp.is_null(); + /* Save only not null values */ + if (!null_value) + { + my_decimal2decimal(tmp.ptr(), &value); + return TRUE; + } + return FALSE; + } + return FALSE; +} + + +int Cached_item_decimal::cmp_read_only() +{ + VDec tmp(item); + if (null_value) + return tmp.is_null() ? 0 : -1; + return tmp.is_null() ? 1 : value.cmp(tmp.ptr()); +} + diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc new file mode 100644 index 00000000..bfe03e1c --- /dev/null +++ b/sql/item_cmpfunc.cc @@ -0,0 +1,7946 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + This file defines all compare functions +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include +#include "sql_select.h" +#include "sql_parse.h" // check_stack_overrun +#include "sql_base.h" // dynamic_column_error_message + +#define PCRE2_STATIC 1 /* Important on Windows */ +#include "pcre2.h" /* pcre2 header file */ +#include "my_json_writer.h" + +/* + Compare row signature of two expressions + + SYNOPSIS: + cmp_row_type() + item1 the first expression + item2 the second expression + + DESCRIPTION + The function checks that two expressions have compatible row signatures + i.e. that the number of columns they return are the same and that if they + are both row expressions then each component from the first expression has + a row signature compatible with the signature of the corresponding component + of the second expression. + + RETURN VALUES + 1 type incompatibility has been detected + 0 otherwise +*/ + +static int cmp_row_type(Item* item1, Item* item2) +{ + uint n= item1->cols(); + if (item2->check_cols(n)) + return 1; + for (uint i=0; ielement_index(i)->check_cols(item1->element_index(i)->cols()) || + (item1->element_index(i)->result_type() == ROW_RESULT && + cmp_row_type(item1->element_index(i), item2->element_index(i)))) + return 1; + } + return 0; +} + + +/** + Aggregates result types from the array of items. + + This method aggregates comparison handler from the array of items. + The result handler is used later for comparison of values of these items. + + aggregate_for_comparison() + funcname the function or operator name, + for error reporting + items array of items to aggregate the type from + nitems number of items in the array + int_uint_as_dec what to do when comparing INT to UINT: + set the comparison handler to decimal or int. + + @retval true type incompatibility has been detected + @retval false otherwise +*/ + +bool Type_handler_hybrid_field_type:: +aggregate_for_comparison(const LEX_CSTRING &funcname, + Item **items, + uint nitems, + bool int_uint_as_dec) +{ + uint unsigned_count= items[0]->unsigned_flag; + /* + Convert sub-type to super-type (e.g. DATE to DATETIME, INT to BIGINT, etc). + Otherwise Predicant_to_list_comparator will treat sub-types of the same + super-type as different data types and won't be able to use bisection in + many cases. + */ + set_handler(items[0]->type_handler()->type_handler_for_comparison()); + for (uint i= 1 ; i < nitems ; i++) + { + unsigned_count+= items[i]->unsigned_flag; + if (aggregate_for_comparison(items[i]->type_handler()-> + type_handler_for_comparison())) + { + /* + For more precise error messages if aggregation failed on the first pair + {items[0],items[1]}, use the name of items[0]->data_handler(). + Otherwise use the name of this->type_handler(), which is already a + result of aggregation for items[0]..items[i-1]. + */ + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + i == 1 ? items[0]->type_handler()->name().ptr() : + type_handler()->name().ptr(), + items[i]->type_handler()->name().ptr(), + funcname.str); + return true; + } + /* + When aggregating types of two row expressions we have to check + that they have the same cardinality and that each component + of the first row expression has a compatible row signature with + the signature of the corresponding component of the second row + expression. + */ + if (cmp_type() == ROW_RESULT && cmp_row_type(items[0], items[i])) + return true; // error found: invalid usage of rows + } + /** + If all arguments are of INT type but have different unsigned_flag values, + switch to DECIMAL_RESULT. + */ + if (int_uint_as_dec && + cmp_type() == INT_RESULT && + unsigned_count != nitems && unsigned_count != 0) + set_handler(&type_handler_newdecimal); + return 0; +} + + +/* + Collects different types for comparison of first item with each other items + + SYNOPSIS + collect_cmp_types() + items Array of items to collect types from + nitems Number of items in the array + skip_nulls Don't collect types of NULL items if TRUE + + DESCRIPTION + This function collects different result types for comparison of the first + item in the list with each of the remaining items in the 'items' array. + + RETURN + 0 - if row type incompatibility has been detected (see cmp_row_type) + Bitmap of collected types - otherwise +*/ + +static uint collect_cmp_types(Item **items, uint nitems, bool skip_nulls= FALSE) +{ + uint i; + uint found_types; + Item_result left_cmp_type= items[0]->cmp_type(); + DBUG_ASSERT(nitems > 1); + found_types= 0; + for (i= 1; i < nitems ; i++) + { + if (skip_nulls && items[i]->type() == Item::NULL_ITEM) + continue; // Skip NULL constant items + if ((left_cmp_type == ROW_RESULT || + items[i]->cmp_type() == ROW_RESULT) && + cmp_row_type(items[0], items[i])) + return 0; + found_types|= 1U << (uint) item_cmp_type(left_cmp_type, items[i]); + } + /* + Even if all right-hand items are NULLs and we are skipping them all, we need + at least one type bit in the found_type bitmask. + */ + if (skip_nulls && !found_types) + found_types= 1U << (uint) left_cmp_type; + return found_types; +} + + +/* + Test functions + Most of these returns 0LL if false and 1LL if true and + NULL if some arg is NULL. +*/ + +longlong Item_func_not::val_int() +{ + DBUG_ASSERT(fixed()); + bool value= args[0]->val_bool(); + null_value=args[0]->null_value; + return ((!null_value && value == 0) ? 1 : 0); +} + +void Item_func_not::print(String *str, enum_query_type query_type) +{ + str->append('!'); + args[0]->print_parenthesised(str, query_type, precedence()); +} + +/** + special NOT for ALL subquery. +*/ + + +longlong Item_func_not_all::val_int() +{ + DBUG_ASSERT(fixed()); + bool value= args[0]->val_bool(); + + /* + return TRUE if there was records in underlying select in max/min + optimization (ALL subquery) + */ + if (empty_underlying_subquery()) + return 1; + + null_value= args[0]->null_value; + return ((!null_value && value == 0) ? 1 : 0); +} + + +bool Item_func_not_all::empty_underlying_subquery() +{ + return ((test_sum_item && !test_sum_item->any_value()) || + (test_sub_item && !test_sub_item->any_value())); +} + +void Item_func_not_all::print(String *str, enum_query_type query_type) +{ + if (show) + Item_func::print(str, query_type); + else + args[0]->print(str, query_type); +} + + +/** + Special NOP (No OPeration) for ALL subquery. It is like + Item_func_not_all. + + @return + (return TRUE if underlying subquery do not return rows) but if subquery + returns some rows it return same value as argument (TRUE/FALSE). +*/ + +longlong Item_func_nop_all::val_int() +{ + DBUG_ASSERT(fixed()); + longlong value= args[0]->val_int(); + + /* + return FALSE if there was records in underlying select in max/min + optimization (SAME/ANY subquery) + */ + if (empty_underlying_subquery()) + return 0; + + null_value= args[0]->null_value; + return (null_value || value == 0) ? 0 : 1; +} + + +/** + Convert a constant item to an int and replace the original item. + + The function converts a constant expression or string to an integer. + On successful conversion the original item is substituted for the + result of the item evaluation. + This is done when comparing DATE/TIME of different formats and + also when comparing bigint to strings (in which case strings + are converted to bigints). + + @param thd thread handle + @param field item will be converted using the type of this field + @param[in,out] item reference to the item to convert + + @note + This function is called only at prepare stage. + As all derived tables are filled only after all derived tables + are prepared we do not evaluate items with subselects here because + they can contain derived tables and thus we may attempt to use a + table that has not been populated yet. + + @retval + 0 Can't convert item + @retval + 1 Item was replaced with an integer version of the item +*/ + +static bool convert_const_to_int(THD *thd, Item_field *field_item, + Item **item) +{ + Field *field= field_item->field; + int result= 0; + + /* + We don't need to convert an integer to an integer, + pretend it's already converted. + + But we still convert it if it is compared with a Field_year, + as YEAR(2) may change the value of an integer when converting it + to an integer (say, 0 to 70). + */ + if ((*item)->cmp_type() == INT_RESULT && + field_item->field_type() != MYSQL_TYPE_YEAR) + return 1; + + /* + Replace (*item) with its value if the item can be computed. + + Do not replace items that contain aggregate functions: + There can be such items that are constants, e.g. COLLATION(AVG(123)), + but this function is called at Name Resolution phase. + Removing aggregate functions may confuse query plan generation code, e.g. + the optimizer might conclude that the query doesn't need to do grouping + at all. + */ + if ((*item)->can_eval_in_optimize() && + !(*item)->with_sum_func()) + { + TABLE *table= field->table; + Use_relaxed_field_copy urfc(thd); + MY_BITMAP *old_maps[2] = { NULL, NULL }; + ulonglong UNINIT_VAR(orig_field_val); /* original field value if valid */ + bool save_field_value; + + /* table->read_set may not be set if we come here from a CREATE TABLE */ + if (table && table->read_set) + dbug_tmp_use_all_columns(table, old_maps, + &table->read_set, &table->write_set); + + /* + Store the value of the field/constant because the call to save_in_field + below overrides that value. Don't save field value if no data has been + read yet. + */ + save_field_value= (field_item->const_item() || + !(field->table->status & STATUS_NO_RECORD)); + if (save_field_value) + orig_field_val= field->val_int(); + if (!(*item)->save_in_field(field, 1) && !field->is_null()) + { + int field_cmp= 0; + // If item is a decimal value, we must reject it if it was truncated. + if (field->type() == MYSQL_TYPE_LONGLONG) + { + field_cmp= stored_field_cmp_to_item(thd, field, *item); + DBUG_PRINT("info", ("convert_const_to_int %d", field_cmp)); + } + + if (0 == field_cmp) + { + Item *tmp= (new (thd->mem_root) + Item_int_with_ref(thd, field->val_int(), *item, + MY_TEST(field->flags & UNSIGNED_FLAG))); + if (tmp) + thd->change_item_tree(item, tmp); + result= 1; // Item was replaced + } + } + /* Restore the original field value. */ + if (save_field_value) + { + result= field->store(orig_field_val, TRUE); + /* orig_field_val must be a valid value that can be restored back. */ + DBUG_ASSERT(!result); + } + if (table && table->read_set) + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_maps); + } + return result; +} + + +/* + Make a special case of compare with fields to get nicer comparisons + of bigint numbers with constant string. + This directly contradicts the manual (number and a string should + be compared as doubles), but seems to provide more + "intuitive" behavior in some cases (but less intuitive in others). +*/ +void Item_func::convert_const_compared_to_int_field(THD *thd) +{ + DBUG_ASSERT(arg_count >= 2); // Item_func_nullif has arg_count == 3 + if (!thd->lex->is_ps_or_view_context_analysis()) + { + int field; + if (args[field= 0]->real_item()->type() == FIELD_ITEM || + args[field= 1]->real_item()->type() == FIELD_ITEM) + { + Item_field *field_item= (Item_field*) (args[field]->real_item()); + if (((field_item->field_type() == MYSQL_TYPE_LONGLONG && + field_item->type_handler() != &type_handler_vers_trx_id) || + field_item->field_type() == MYSQL_TYPE_YEAR)) + convert_const_to_int(thd, field_item, &args[!field]); + } + } +} + + +/* + Iterate through arguments and compare them to the original arguments + in "old_args". If some argument was replaced: + - from Item_field pointing to an indexed Field + - to something else (for example, Item_func_conv_charset) + then we cannot use Field's indexes for range access any more. + Raise a note in this case. + + Note, the number of arguments in "old_args" can be smaller than arg_count. + For example, for LIKE, BETWEEN, IN we pass only args[0] in old_args. + + For a comparison predicate we pass both args[0] and args[1] to cover both: + - WHERE field=expr + - WHERE expr=field +*/ + +void Item_bool_func::raise_note_if_key_become_unused(THD *thd, const Item_args &old_args) +{ + if (!(thd->variables.note_verbosity & NOTE_VERBOSITY_UNUSABLE_KEYS)) + return; + + DBUG_ASSERT(old_args.argument_count() <= arg_count); + for (uint i= 0; i < old_args.argument_count(); i++) + { + if (args[i] != old_args.arguments()[i]) + { + DBUG_ASSERT(old_args.arguments()[i]->fixed()); + Item *real_item= old_args.arguments()[i]->real_item(); + if (real_item->type() == Item::FIELD_ITEM) + { + Field *field= static_cast(real_item)->field; + if (field->flags & PART_KEY_FLAG) + { + /* + It used to be Item_field (with indexes!) before the condition + rewrite. Now it's something else. Cannot use indexes any more. + */ + field->raise_note_key_become_unused(thd, Print(this, QT_EXPLAIN)); + } + } + } + } +} + + +bool Item_func::setup_args_and_comparator(THD *thd, Arg_comparator *cmp) +{ + DBUG_ASSERT(arg_count >= 2); // Item_func_nullif has arg_count == 3 + + if (args[0]->cmp_type() == STRING_RESULT && + args[1]->cmp_type() == STRING_RESULT) + { + CHARSET_INFO *tmp; + /* + Use charset narrowing only for equalities, as that would allow + to construct ref access. + Non-equality comparisons with constants work without charset narrowing, + the constant gets converted. + Non-equality comparisons with non-constants would need narrowing to + enable range optimizer to handle e.g. + t1.mb3key_col <= const_table.mb4_col + But this doesn't look important. + */ + bool allow_narrowing= MY_TEST(functype()==Item_func::EQ_FUNC || + functype()==Item_func::EQUAL_FUNC); + + if (agg_arg_charsets_for_comparison(&tmp, &args[0], &args[1], + allow_narrowing)) + return true; + cmp->m_compare_collation= tmp; + } + // Convert constants when compared to int/year field + DBUG_ASSERT(functype() != LIKE_FUNC); + convert_const_compared_to_int_field(thd); + + return cmp->set_cmp_func(thd, this, &args[0], &args[1], true); +} + + +/* + Comparison operators remove arguments' dependency on PAD_CHAR_TO_FULL_LENGTH + in case of PAD SPACE comparison collations: trailing spaces do not affect + the comparison result for such collations. +*/ +Sql_mode_dependency +Item_bool_rowready_func2::value_depends_on_sql_mode() const +{ + if (compare_collation()->state & MY_CS_NOPAD) + return Item_func::value_depends_on_sql_mode(); + return ((args[0]->value_depends_on_sql_mode() | + args[1]->value_depends_on_sql_mode()) & + Sql_mode_dependency(~0, ~MODE_PAD_CHAR_TO_FULL_LENGTH)). + soft_to_hard(); +} + + +bool Item_bool_rowready_func2::fix_length_and_dec(THD *thd) +{ + max_length= 1; // Function returns 0 or 1 + + /* + As some compare functions are generated after sql_yacc, + we have to check for out of memory conditions here + */ + if (!args[0] || !args[1]) + return FALSE; + Item_args old_args(args[0], args[1]); + if (setup_args_and_comparator(thd, &cmp)) + return true; + raise_note_if_key_become_unused(thd, old_args); + return false; +} + + +/** + Prepare the comparator (set the comparison function) for comparing + items *a1 and *a2 in the context of 'type'. + + @param[in] owner_arg Item, peforming the comparison (e.g. Item_func_eq) + @param[in,out] a1 first argument to compare + @param[in,out] a2 second argument to compare + @param[in] type type context to compare in + + Both *a1 and *a2 can be replaced by this method - typically by constant + items, holding the cached converted value of the original (constant) item. +*/ + +int Arg_comparator::set_cmp_func(THD *thd, Item_func_or_sum *owner_arg, + Item **a1, Item **a2) +{ + owner= owner_arg; + set_null= set_null && owner_arg; + a= a1; + b= a2; + Item *tmp_args[2]= {*a1, *a2}; + Type_handler_hybrid_field_type tmp; + if (tmp.aggregate_for_comparison(owner_arg->func_name_cstring(), tmp_args, 2, + false)) + { + DBUG_ASSERT(thd->is_error()); + return 1; + } + m_compare_handler= tmp.type_handler(); + return m_compare_handler->set_comparator_func(thd, this); +} + + +bool Arg_comparator::set_cmp_func_for_row_arguments(THD *thd) +{ + uint n= (*a)->cols(); + if (n != (*b)->cols()) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), n); + comparators= 0; + return true; + } + if (!(comparators= new (thd->mem_root) Arg_comparator[n])) + return true; + for (uint i=0; i < n; i++) + { + if ((*a)->element_index(i)->cols() != (*b)->element_index(i)->cols()) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), (*a)->element_index(i)->cols()); + return true; + } + if (comparators[i].set_cmp_func(thd, owner, (*a)->addr(i), + (*b)->addr(i), set_null)) + return true; + } + return false; +} + + +bool Arg_comparator::set_cmp_func_row(THD *thd) +{ + func= is_owner_equal_func() ? &Arg_comparator::compare_e_row : + &Arg_comparator::compare_row; + return set_cmp_func_for_row_arguments(thd); +} + + +bool Arg_comparator::set_cmp_func_string(THD *thd) +{ + func= is_owner_equal_func() ? &Arg_comparator::compare_e_string : + &Arg_comparator::compare_string; + if (compare_type() == STRING_RESULT && + (*a)->result_type() == STRING_RESULT && + (*b)->result_type() == STRING_RESULT) + { + /* + We must set cmp_collation here as we may be called from for an automatic + generated item, like in natural join. + Allow reinterpted superset as subset. + */ + bool allow_narrowing= false; + if (owner->type() == Item::FUNC_ITEM) + { + Item_func::Functype ftype= ((Item_func*)owner)->functype(); + if (ftype == Item_func::EQUAL_FUNC || ftype==Item_func::EQ_FUNC) + allow_narrowing= true; + } + + if (owner->agg_arg_charsets_for_comparison(&m_compare_collation, a, b, + allow_narrowing)) + return true; + + if ((*a)->type() == Item::FUNC_ITEM && + ((Item_func *) (*a))->functype() == Item_func::JSON_EXTRACT_FUNC) + { + func= is_owner_equal_func() ? &Arg_comparator::compare_e_json_str: + &Arg_comparator::compare_json_str; + return 0; + } + else if ((*b)->type() == Item::FUNC_ITEM && + ((Item_func *) (*b))->functype() == Item_func::JSON_EXTRACT_FUNC) + { + func= is_owner_equal_func() ? &Arg_comparator::compare_e_json_str: + &Arg_comparator::compare_str_json; + return 0; + } + } + + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + + +bool Arg_comparator::set_cmp_func_time(THD *thd) +{ + m_compare_collation= &my_charset_numeric; + func= is_owner_equal_func() ? &Arg_comparator::compare_e_time : + &Arg_comparator::compare_time; + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + + +bool Arg_comparator::set_cmp_func_datetime(THD *thd) +{ + m_compare_collation= &my_charset_numeric; + func= is_owner_equal_func() ? &Arg_comparator::compare_e_datetime : + &Arg_comparator::compare_datetime; + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + + +bool Arg_comparator::set_cmp_func_native(THD *thd) +{ + m_compare_collation= &my_charset_numeric; + func= is_owner_equal_func() ? &Arg_comparator::compare_e_native : + &Arg_comparator::compare_native; + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + + +bool Arg_comparator::set_cmp_func_int(THD *thd) +{ + func= is_owner_equal_func() ? &Arg_comparator::compare_e_int : + &Arg_comparator::compare_int_signed; + if ((*a)->field_type() == MYSQL_TYPE_YEAR && + (*b)->field_type() == MYSQL_TYPE_YEAR) + { + func= is_owner_equal_func() ? &Arg_comparator::compare_e_datetime : + &Arg_comparator::compare_datetime; + } + else if (func == &Arg_comparator::compare_int_signed) + { + if ((*a)->unsigned_flag) + func= (((*b)->unsigned_flag)? + &Arg_comparator::compare_int_unsigned : + &Arg_comparator::compare_int_unsigned_signed); + else if ((*b)->unsigned_flag) + func= &Arg_comparator::compare_int_signed_unsigned; + } + else if (func== &Arg_comparator::compare_e_int) + { + if ((*a)->unsigned_flag ^ (*b)->unsigned_flag) + func= &Arg_comparator::compare_e_int_diff_signedness; + } + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + + +bool Arg_comparator::set_cmp_func_real(THD *thd) +{ + if ((((*a)->result_type() == DECIMAL_RESULT && !(*a)->const_item() && + (*b)->result_type() == STRING_RESULT && (*b)->const_item()) || + ((*b)->result_type() == DECIMAL_RESULT && !(*b)->const_item() && + (*a)->result_type() == STRING_RESULT && (*a)->const_item()))) + { + /* + + or + + + Do comparison as decimal rather than float, in order not to lose precision. + */ + m_compare_handler= &type_handler_newdecimal; + return set_cmp_func_decimal(thd); + } + + func= is_owner_equal_func() ? &Arg_comparator::compare_e_real : + &Arg_comparator::compare_real; + if ((*a)->decimals < NOT_FIXED_DEC && (*b)->decimals < NOT_FIXED_DEC) + { + precision= 5 / log_10[MY_MAX((*a)->decimals, (*b)->decimals) + 1]; + if (func == &Arg_comparator::compare_real) + func= &Arg_comparator::compare_real_fixed; + else if (func == &Arg_comparator::compare_e_real) + func= &Arg_comparator::compare_e_real_fixed; + } + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + +bool Arg_comparator::set_cmp_func_decimal(THD *thd) +{ + func= is_owner_equal_func() ? &Arg_comparator::compare_e_decimal : + &Arg_comparator::compare_decimal; + a= cache_converted_constant(thd, a, &a_cache, compare_type_handler()); + b= cache_converted_constant(thd, b, &b_cache, compare_type_handler()); + return false; +} + + +/** + Convert and cache a constant. + + @param value [in] An item to cache + @param cache_item [out] Placeholder for the cache item + @param type [in] Comparison type + + @details + When given item is a constant and its type differs from comparison type + then cache its value to avoid type conversion of this constant on each + evaluation. In this case the value is cached and the reference to the cache + is returned. + Original value is returned otherwise. + + @return cache item or original value. +*/ + +Item** Arg_comparator::cache_converted_constant(THD *thd_arg, Item **value, + Item **cache_item, + const Type_handler *handler) +{ + /* + Don't need cache if doing context analysis only. + */ + if (!thd_arg->lex->is_ps_or_view_context_analysis() && + (*value)->const_item() && + handler->type_handler_for_comparison() != + (*value)->type_handler_for_comparison()) + { + Item_cache *cache= handler->Item_get_cache(thd_arg, *value); + cache->setup(thd_arg, *value); + *cache_item= cache; + return cache_item; + } + return value; +} + + +int Arg_comparator::compare_time() +{ + THD *thd= current_thd; + longlong val1= (*a)->val_time_packed(thd); + if (!(*a)->null_value) + { + longlong val2= (*b)->val_time_packed(thd); + if (!(*b)->null_value) + return compare_not_null_values(val1, val2); + } + if (set_null) + owner->null_value= true; + return -1; +} + + +int Arg_comparator::compare_e_time() +{ + THD *thd= current_thd; + longlong val1= (*a)->val_time_packed(thd); + longlong val2= (*b)->val_time_packed(thd); + if ((*a)->null_value || (*b)->null_value) + return MY_TEST((*a)->null_value && (*b)->null_value); + return MY_TEST(val1 == val2); +} + + + +int Arg_comparator::compare_datetime() +{ + THD *thd= current_thd; + longlong val1= (*a)->val_datetime_packed(thd); + if (!(*a)->null_value) + { + longlong val2= (*b)->val_datetime_packed(thd); + if (!(*b)->null_value) + return compare_not_null_values(val1, val2); + } + if (set_null) + owner->null_value= true; + return -1; +} + + +int Arg_comparator::compare_e_datetime() +{ + THD *thd= current_thd; + longlong val1= (*a)->val_datetime_packed(thd); + longlong val2= (*b)->val_datetime_packed(thd); + if ((*a)->null_value || (*b)->null_value) + return MY_TEST((*a)->null_value && (*b)->null_value); + return MY_TEST(val1 == val2); +} + + +int Arg_comparator::compare_string() +{ + String *res1,*res2; + if ((res1= (*a)->val_str(&value1))) + { + if ((res2= (*b)->val_str(&value2))) + { + if (set_null) + owner->null_value= 0; + return sortcmp(res1, res2, compare_collation()); + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +/** + Compare strings, but take into account that NULL == NULL. +*/ + + +int Arg_comparator::compare_e_string() +{ + String *res1,*res2; + res1= (*a)->val_str(&value1); + DBUG_ASSERT((res1 == NULL) == (*a)->null_value); + res2= (*b)->val_str(&value2); + DBUG_ASSERT((res2 == NULL) == (*b)->null_value); + if (!res1 || !res2) + return MY_TEST(res1 == res2); + return MY_TEST(sortcmp(res1, res2, compare_collation()) == 0); +} + + +int Arg_comparator::compare_native() +{ + THD *thd= current_thd; + if (!(*a)->val_native_with_conversion(thd, &m_native1, + compare_type_handler())) + { + if (!(*b)->val_native_with_conversion(thd, &m_native2, + compare_type_handler())) + { + if (set_null) + owner->null_value= 0; + return compare_type_handler()->cmp_native(m_native1, m_native2); + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +int Arg_comparator::compare_e_native() +{ + THD *thd= current_thd; + bool res1= (*a)->val_native_with_conversion(thd, &m_native1, + compare_type_handler()); + bool res2= (*b)->val_native_with_conversion(thd, &m_native2, + compare_type_handler()); + if (res1 || res2) + return MY_TEST(res1 == res2); + return MY_TEST(compare_type_handler()->cmp_native(m_native1, m_native2) == 0); +} + + +int Arg_comparator::compare_real() +{ + /* + Fix yet another manifestation of Bug#2338. 'Volatile' will instruct + gcc to flush double values out of 80-bit Intel FPU registers before + performing the comparison. + */ + volatile double val1, val2; + val1= (*a)->val_real(); + if (!(*a)->null_value) + { + val2= (*b)->val_real(); + if (!(*b)->null_value) + { + if (set_null) + owner->null_value= 0; + if (val1 < val2) return -1; + if (val1 == val2) return 0; + return 1; + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + +int Arg_comparator::compare_decimal() +{ + VDec val1(*a); + if (!val1.is_null()) + { + VDec val2(*b); + if (!val2.is_null()) + { + if (set_null) + owner->null_value= 0; + val1.round_self_if_needed((*a)->decimals, HALF_UP); + val2.round_self_if_needed((*b)->decimals, HALF_UP); + return val1.cmp(val2); + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + +int Arg_comparator::compare_e_real() +{ + double val1= (*a)->val_real(); + double val2= (*b)->val_real(); + if ((*a)->null_value || (*b)->null_value) + return MY_TEST((*a)->null_value && (*b)->null_value); + return MY_TEST(val1 == val2); +} + +int Arg_comparator::compare_e_decimal() +{ + VDec val1(*a), val2(*b); + if (val1.is_null() || val2.is_null()) + return MY_TEST(val1.is_null() && val2.is_null()); + val1.round_self_if_needed((*a)->decimals, HALF_UP); + val2.round_self_if_needed((*b)->decimals, HALF_UP); + return MY_TEST(val1.cmp(val2) == 0); +} + + +int Arg_comparator::compare_real_fixed() +{ + /* + Fix yet another manifestation of Bug#2338. 'Volatile' will instruct + gcc to flush double values out of 80-bit Intel FPU registers before + performing the comparison. + */ + volatile double val1, val2; + val1= (*a)->val_real(); + if (!(*a)->null_value) + { + val2= (*b)->val_real(); + if (!(*b)->null_value) + { + if (set_null) + owner->null_value= 0; + if (val1 == val2 || fabs(val1 - val2) < precision) + return 0; + if (val1 < val2) + return -1; + return 1; + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +int Arg_comparator::compare_e_real_fixed() +{ + double val1= (*a)->val_real(); + double val2= (*b)->val_real(); + if ((*a)->null_value || (*b)->null_value) + return MY_TEST((*a)->null_value && (*b)->null_value); + return MY_TEST(val1 == val2 || fabs(val1 - val2) < precision); +} + + +int Arg_comparator::compare_int_signed() +{ + longlong val1= (*a)->val_int(); + if (!(*a)->null_value) + { + longlong val2= (*b)->val_int(); + if (!(*b)->null_value) + return compare_not_null_values(val1, val2); + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +/** + Compare values as BIGINT UNSIGNED. +*/ + +int Arg_comparator::compare_int_unsigned() +{ + ulonglong val1= (*a)->val_int(); + if (!(*a)->null_value) + { + ulonglong val2= (*b)->val_int(); + if (!(*b)->null_value) + { + if (set_null) + owner->null_value= 0; + if (val1 < val2) return -1; + if (val1 == val2) return 0; + return 1; + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +/** + Compare signed (*a) with unsigned (*B) +*/ + +int Arg_comparator::compare_int_signed_unsigned() +{ + longlong sval1= (*a)->val_int(); + if (!(*a)->null_value) + { + ulonglong uval2= (ulonglong)(*b)->val_int(); + if (!(*b)->null_value) + { + if (set_null) + owner->null_value= 0; + if (sval1 < 0 || (ulonglong)sval1 < uval2) + return -1; + if ((ulonglong)sval1 == uval2) + return 0; + return 1; + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +/** + Compare unsigned (*a) with signed (*B) +*/ + +int Arg_comparator::compare_int_unsigned_signed() +{ + ulonglong uval1= (ulonglong)(*a)->val_int(); + if (!(*a)->null_value) + { + longlong sval2= (*b)->val_int(); + if (!(*b)->null_value) + { + if (set_null) + owner->null_value= 0; + if (sval2 < 0) + return 1; + if (uval1 < (ulonglong)sval2) + return -1; + if (uval1 == (ulonglong)sval2) + return 0; + return 1; + } + } + if (set_null) + owner->null_value= 1; + return -1; +} + + +int Arg_comparator::compare_e_int() +{ + longlong val1= (*a)->val_int(); + longlong val2= (*b)->val_int(); + if ((*a)->null_value || (*b)->null_value) + return MY_TEST((*a)->null_value && (*b)->null_value); + return MY_TEST(val1 == val2); +} + +/** + Compare unsigned *a with signed *b or signed *a with unsigned *b. +*/ +int Arg_comparator::compare_e_int_diff_signedness() +{ + longlong val1= (*a)->val_int(); + longlong val2= (*b)->val_int(); + if ((*a)->null_value || (*b)->null_value) + return MY_TEST((*a)->null_value && (*b)->null_value); + return (val1 >= 0) && MY_TEST(val1 == val2); +} + +int Arg_comparator::compare_row() +{ + int res= 0; + bool was_null= 0; + (*a)->bring_value(); + (*b)->bring_value(); + + if ((*a)->null_value || (*b)->null_value) + { + owner->null_value= 1; + return -1; + } + + uint n= (*a)->cols(); + for (uint i= 0; inull_value && owner->type() == Item::FUNC_ITEM) + { + // NULL was compared + switch (((Item_func*)owner)->functype()) { + case Item_func::NE_FUNC: + break; // NE never aborts on NULL + case Item_func::LT_FUNC: + case Item_func::LE_FUNC: + case Item_func::GT_FUNC: + case Item_func::GE_FUNC: + return -1; // <, <=, > and >= always fail on NULL + case Item_func::EQ_FUNC: + if (owner->is_top_level_item()) + return -1; // We do not need correct NULL returning + break; + default: + DBUG_ASSERT(0); + break; + } + was_null= 1; + owner->null_value= 0; + res= 0; // continue comparison (maybe we will meet explicit difference) + } + else if (res) + return res; + } + if (was_null) + { + /* + There was NULL(s) in comparison in some parts, but there was no + explicit difference in other parts, so we have to return NULL. + */ + owner->null_value= 1; + return -1; + } + return 0; +} + + +int Arg_comparator::compare_e_row() +{ + (*a)->bring_value(); + (*b)->bring_value(); + uint n= (*a)->cols(); + for (uint i= 0; iprint_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" is ")); + if (! affirmative) + str->append(STRING_WITH_LEN("not ")); + if (value) + str->append(STRING_WITH_LEN("true")); + else + str->append(STRING_WITH_LEN("false")); +} + + +bool Item_func_truth::val_bool() +{ + bool val= args[0]->val_bool(); + if (args[0]->null_value) + { + /* + NULL val IS {TRUE, FALSE} --> FALSE + NULL val IS NOT {TRUE, FALSE} --> TRUE + */ + return (! affirmative); + } + + if (affirmative) + { + /* {TRUE, FALSE} val IS {TRUE, FALSE} value */ + return (val == value); + } + + /* {TRUE, FALSE} val IS NOT {TRUE, FALSE} value */ + return (val != value); +} + + +longlong Item_func_truth::val_int() +{ + return (val_bool() ? 1 : 0); +} + + +void Item_in_optimizer::fix_after_pullout(st_select_lex *new_parent, + Item **ref, bool merge) +{ + DBUG_ASSERT(fixed()); + /* This will re-calculate attributes of our Item_in_subselect: */ + Item_bool_func::fix_after_pullout(new_parent, ref, merge); + + /* Then, re-calculate not_null_tables_cache: */ + eval_not_null_tables(NULL); +} + + +bool Item_in_optimizer::eval_not_null_tables(void *opt_arg) +{ + not_null_tables_cache= 0; + if (is_top_level_item()) + { + /* + It is possible to determine NULL-rejectedness of the left arguments + of IN only if it is a top-level predicate. + */ + not_null_tables_cache= args[0]->not_null_tables(); + } + return FALSE; +} + + +bool Item_in_optimizer::find_not_null_fields(table_map allowed) +{ + if (!(~allowed & used_tables()) && is_top_level_item()) + { + return args[0]->find_not_null_fields(allowed); + } + return false; +} + +void Item_in_optimizer::print(String *str, enum_query_type query_type) +{ + if (query_type & QT_PARSABLE) + args[1]->print(str, query_type); + else + { + restore_first_argument(); + Item_func::print(str, query_type); + } +} + + +/** + "Restore" first argument before fix_fields() call (after it is harmless). + + @Note: Main pointer to left part of IN/ALL/ANY subselect is subselect's + lest_expr (see Item_in_optimizer::fix_left) so changes made during + fix_fields will be rolled back there which can make + Item_in_optimizer::args[0] unusable on second execution before fix_left() + call. This call fix the pointer. +*/ + +void Item_in_optimizer::restore_first_argument() +{ + Item_in_subselect *in_subs= args[1]->get_IN_subquery(); + if (in_subs) + args[0]= in_subs->left_exp(); +} + + +bool Item_in_optimizer::fix_left(THD *thd) +{ + DBUG_ENTER("Item_in_optimizer::fix_left"); + /* + Here we will store pointer on place of main storage of left expression. + For usual IN (ALL/ANY) it is subquery left_expr. + For other cases (MAX/MIN optimization, non-transformed EXISTS (10.0)) + it is args[0]. + */ + Item **ref0= args; + if (!invisible_mode()) + { + /* + left_expr->fix_fields() may cause left_expr to be substituted for + another item. (e.g. an Item_field may be changed into Item_ref). This + transformation is undone at the end of statement execution (e.g. the + Item_ref is deleted). However, Item_in_optimizer::args[0] may keep + the pointer to the post-transformation item. Because of that, on the + next execution we need to copy args[1]->left_expr again. + */ + ref0= args[1]->get_IN_subquery()->left_exp_ptr(); + args[0]= (*ref0); + } + if ((*ref0)->fix_fields_if_needed(thd, ref0)) + DBUG_RETURN(1); + if (!cache) + { + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + bool rc= !(cache= (*ref0)->get_cache(thd)); + + if (arena) + thd->restore_active_arena(arena, &backup); + + if (rc) + DBUG_RETURN(1); + cache->keep_array(); + } + /* + During fix_field() expression could be substituted. + So we copy changes before use + */ + if (args[0] != (*ref0)) + args[0]= (*ref0); + DBUG_PRINT("info", ("actual fix fields")); + + cache->setup(thd, args[0]); + if (cache->cols() == 1) + { + DBUG_ASSERT(args[0]->type() != ROW_ITEM); + /* + Note: there can be cases when used_tables()==0 && !const_item(). See + Item_sum::update_used_tables for details. + */ + if ((used_tables_cache= args[0]->used_tables()) || !args[0]->const_item()) + cache->set_used_tables(OUTER_REF_TABLE_BIT); + else + cache->set_used_tables(0); + } + else + { + uint n= cache->cols(); + for (uint i= 0; i < n; i++) + { + /* Check that the expression (part of row) do not contain a subquery */ + if (args[0]->element_index(i)->walk(&Item::is_subquery_processor, 0, 0)) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "SUBQUERY in ROW in left expression of IN/ALL/ANY"); + DBUG_RETURN(1); + } + Item *element=args[0]->element_index(i); + if (element->used_tables() || !element->const_item()) + { + ((Item_cache *)cache->element_index(i))-> + set_used_tables(OUTER_REF_TABLE_BIT); + cache->set_used_tables(OUTER_REF_TABLE_BIT); + } + else + ((Item_cache *)cache->element_index(i))->set_used_tables(0); + } + used_tables_cache= args[0]->used_tables(); + } + eval_not_null_tables(NULL); + with_flags|= (args[0]->with_flags | + (args[1]->with_flags & item_with_t::SP_VAR)); + if ((const_item_cache= args[0]->const_item())) + { + cache->store(args[0]); + cache->cache_value(); + } + if (args[1]->fixed()) + { + /* to avoid overriding is called to update left expression */ + used_tables_and_const_cache_join(args[1]); + with_flags|= args[1]->with_flags & item_with_t::SUM_FUNC; + } + DBUG_RETURN(0); +} + + +bool Item_in_optimizer::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + Item_subselect *sub= 0; + uint col; + + /* + MAX/MIN optimization can convert the subquery into + expr + Item_singlerow_subselect + */ + if (args[1]->type() == Item::SUBSELECT_ITEM) + sub= (Item_subselect *)args[1]; + + if (fix_left(thd)) + return TRUE; + if (args[0]->maybe_null()) + set_maybe_null(); + + if (args[1]->fix_fields_if_needed(thd, args + 1)) + return TRUE; + if (!invisible_mode() && + ((sub && ((col= args[0]->cols()) != sub->engine->cols())) || + (!sub && (args[1]->cols() != (col= 1))))) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), col); + return TRUE; + } + + base_flags|= (item_base_t::FIXED | + (args[1]->base_flags & (item_base_t::MAYBE_NULL | + item_base_t::AT_TOP_LEVEL))); + with_flags|= (item_with_t::SUBQUERY | + args[1]->with_flags | + (args[0]->with_flags & + (item_with_t::SP_VAR | item_with_t::WINDOW_FUNC))); + // The subquery cannot have window functions aggregated in this select + DBUG_ASSERT(!args[1]->with_window_func()); + used_tables_and_const_cache_join(args[1]); + return FALSE; +} + +/** + Check if Item_in_optimizer should work as a pass-through item for its + arguments. + + @note + Item_in_optimizer should work as pass-through for + - subqueries that were processed by ALL/ANY->MIN/MAX rewrite + - subqueries that were originally EXISTS subqueries (and were coinverted by + the EXISTS->IN rewrite) + + When Item_in_optimizer is not not working as a pass-through, it + - caches its "left argument", args[0]. + - makes adjustments to subquery item's return value for proper NULL + value handling +*/ + +bool Item_in_optimizer::invisible_mode() +{ + /* MAX/MIN transformed or EXISTS->IN prepared => do nothing */ + return (args[1]->get_IN_subquery() == NULL); +} + + +/** + Add an expression cache for this subquery if it is needed + + @param thd_arg Thread handle + + @details + The function checks whether an expression cache is needed for this item + and if if so wraps the item into an item of the class + Item_cache_wrapper with an appropriate expression cache set up there. + + @note + used from Item::transform() + + @return + new wrapper item if an expression cache is needed, + this item - otherwise +*/ + +Item *Item_in_optimizer::expr_cache_insert_transformer(THD *thd, uchar *unused) +{ + DBUG_ENTER("Item_in_optimizer::expr_cache_insert_transformer"); + DBUG_ASSERT(fixed()); + + if (invisible_mode()) + DBUG_RETURN(this); + + if (expr_cache) + DBUG_RETURN(expr_cache); + + if (args[1]->expr_cache_is_needed(thd) && + (expr_cache= set_expr_cache(thd))) + DBUG_RETURN(expr_cache); + + DBUG_RETURN(this); +} + + + +/** + Collect and add to the list cache parameters for this Item. + + @param parameters The list where to add parameters +*/ + +void Item_in_optimizer::get_cache_parameters(List ¶meters) +{ + DBUG_ASSERT(fixed()); + /* Add left expression to the list of the parameters of the subquery */ + if (!invisible_mode()) + { + if (args[0]->cols() == 1) + parameters.add_unique(args[0], &cmp_items); + else + { + for (uint i= 0; i < args[0]->cols(); i++) + { + parameters.add_unique(args[0]->element_index(i), &cmp_items); + } + } + } + args[1]->get_cache_parameters(parameters); +} + +/** + The implementation of optimized \ [NOT] IN \ + predicates. The implementation works as follows. + + For the current value of the outer expression + + - If it contains only NULL values, the original (before rewrite by the + Item_in_subselect rewrite methods) inner subquery is non-correlated and + was previously executed, there is no need to re-execute it, and the + previous return value is returned. + + - If it contains NULL values, check if there is a partial match for the + inner query block by evaluating it. For clarity we repeat here the + transformation previously performed on the sub-query. The expression + + + ( oc_1, ..., oc_n ) + \ + ( SELECT ic_1, ..., ic_n + FROM \ + WHERE \ + ) + + + was transformed into + + + ( oc_1, ..., oc_n ) + \ + ( SELECT ic_1, ..., ic_n + FROM \ + WHERE \ AND ... ( ic_k = oc_k OR ic_k IS NULL ) + HAVING ... NOT ic_k IS NULL + ) + + + The evaluation will now proceed according to special rules set up + elsewhere. These rules include: + + - The HAVING NOT \ IS NULL conditions added by the + aforementioned rewrite methods will detect whether they evaluated (and + rejected) a NULL value and if so, will cause the subquery to evaluate + to NULL. + + - The added WHERE and HAVING conditions are present only for those inner + columns that correspond to outer column that are not NULL at the moment. + + - If there is an eligible index for executing the subquery, the special + access method "Full scan on NULL key" is employed which ensures that + the inner query will detect if there are NULL values resulting from the + inner query. This access method will quietly resort to table scan if it + needs to find NULL values as well. + + - Under these conditions, the sub-query need only be evaluated in order to + find out whether it produced any rows. + + - If it did, we know that there was a partial match since there are + NULL values in the outer row expression. + + - If it did not, the result is FALSE or UNKNOWN. If at least one of the + HAVING sub-predicates rejected a NULL value corresponding to an outer + non-NULL, and hence the inner query block returns UNKNOWN upon + evaluation, there was a partial match and the result is UNKNOWN. + + - If it contains no NULL values, the call is forwarded to the inner query + block. + + @see Item_in_subselect::val_bool() + @see Item_is_not_null_test::val_int() +*/ + +longlong Item_in_optimizer::val_int() +{ + bool tmp; + DBUG_ASSERT(fixed()); + cache->store(args[0]); + cache->cache_value(); + DBUG_ENTER(" Item_in_optimizer::val_int"); + + if (invisible_mode()) + { + longlong res= args[1]->val_int(); + null_value= args[1]->null_value; + DBUG_PRINT("info", ("pass trough")); + DBUG_RETURN(res); + } + + if (cache->null_value_inside) + { + DBUG_PRINT("info", ("Left NULL...")); + /* + We're evaluating + " [NOT] IN (SELECT ...)" + where one or more of the outer values is NULL. + */ + if (args[1]->is_top_level_item()) + { + /* + We're evaluating a top level item, e.g. + " IN (SELECT ...)", + and in this case a NULL value in the outer_value_list means + that the result shall be NULL/FALSE (makes no difference for + top level items). The cached value is NULL, so just return + NULL. + */ + null_value= 1; + } + else + { + /* + We're evaluating an item where a NULL value in either the + outer or inner value list does not automatically mean that we + can return NULL/FALSE. An example of such a query is + " NOT IN (SELECT ...)" + The result when there is at least one NULL value is: NULL if the + SELECT evaluated over the non-NULL values produces at least + one row, FALSE otherwise + */ + Item_in_subselect *item_subs= args[1]->get_IN_subquery(); + bool all_left_cols_null= true; + const uint ncols= cache->cols(); + + /* + Turn off the predicates that are based on column compares for + which the left part is currently NULL + */ + for (uint i= 0; i < ncols; i++) + { + if (cache->element_index(i)->null_value) + item_subs->set_cond_guard_var(i, FALSE); + else + all_left_cols_null= false; + } + + if (!item_subs->is_correlated && + all_left_cols_null && result_for_null_param != UNKNOWN) + { + /* + This is a non-correlated subquery, all values in the outer + value list are NULL, and we have already evaluated the + subquery for all NULL values: Return the same result we + did last time without evaluating the subquery. + */ + null_value= result_for_null_param; + } + else + { + /* The subquery has to be evaluated */ + (void) item_subs->val_bool_result(); + if (item_subs->engine->no_rows()) + null_value= item_subs->null_value; + else + null_value= TRUE; + if (all_left_cols_null) + result_for_null_param= null_value; + } + + /* Turn all predicates back on */ + for (uint i= 0; i < ncols; i++) + item_subs->set_cond_guard_var(i, TRUE); + } + DBUG_RETURN(0); + } + tmp= args[1]->val_bool_result(); + null_value= args[1]->null_value; + DBUG_RETURN(tmp); +} + + +void Item_in_optimizer::cleanup() +{ + DBUG_ENTER("Item_in_optimizer::cleanup"); + Item_bool_func::cleanup(); + expr_cache= 0; + DBUG_VOID_RETURN; +} + + +bool Item_in_optimizer::is_null() +{ + val_int(); + return null_value; +} + + +/** + Transform an Item_in_optimizer and its arguments with a callback function. + + @param transformer the transformer callback function to be applied to the + nodes of the tree of the object + @param parameter to be passed to the transformer + + @detail + Recursively transform the left and the right operand of this Item. The + Right operand is an Item_in_subselect or its subclass. To avoid the + creation of new Items, we use the fact the the left operand of the + Item_in_subselect is the same as the one of 'this', so instead of + transforming its operand, we just assign the left operand of the + Item_in_subselect to be equal to the left operand of 'this'. + The transformation is not applied further to the subquery operand + if the IN predicate. + + @returns + @retval pointer to the transformed item + @retval NULL if an error occurred +*/ + +Item *Item_in_optimizer::transform(THD *thd, Item_transformer transformer, + uchar *argument) +{ + Item *new_item; + + DBUG_ASSERT(fixed()); + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + DBUG_ASSERT(arg_count == 2); + + /* Transform the left IN operand. */ + new_item= (*args)->transform(thd, transformer, argument); + if (!new_item) + return 0; + /* + THD::change_item_tree() should be called only if the tree was + really transformed, i.e. when a new item has been created. + Otherwise we'll be allocating a lot of unnecessary memory for + change records at each execution. + */ + if ((*args) != new_item) + thd->change_item_tree(args, new_item); + + if (invisible_mode()) + { + /* MAX/MIN transformed => pass through */ + new_item= args[1]->transform(thd, transformer, argument); + if (!new_item) + return 0; + if (args[1] != new_item) + thd->change_item_tree(args + 1, new_item); + } + else + { + /* + Transform the right IN operand which should be an Item_in_subselect or a + subclass of it. The left operand of the IN must be the same as the left + operand of this Item_in_optimizer, so in this case there is no further + transformation, we only make both operands the same. + TODO: is it the way it should be? + */ + DBUG_ASSERT((args[1])->type() == Item::SUBSELECT_ITEM && + (((Item_subselect*)(args[1]))->substype() == + Item_subselect::IN_SUBS || + ((Item_subselect*)(args[1]))->substype() == + Item_subselect::ALL_SUBS || + ((Item_subselect*)(args[1]))->substype() == + Item_subselect::ANY_SUBS)); + + thd->change_item_tree(args[1]->get_IN_subquery()->left_exp_ptr(), args[0]); + } + return (this->*transformer)(thd, argument); +} + + +bool Item_in_optimizer::is_expensive_processor(void *arg) +{ + DBUG_ASSERT(fixed()); + return args[0]->is_expensive_processor(arg) || + args[1]->is_expensive_processor(arg); +} + + +bool Item_in_optimizer::is_expensive() +{ + DBUG_ASSERT(fixed()); + return args[0]->is_expensive() || args[1]->is_expensive(); +} + + +longlong Item_func_eq::val_int() +{ + DBUG_ASSERT(fixed()); + int value= cmp.compare(); + return value == 0 ? 1 : 0; +} + + +/** Same as Item_func_eq, but NULL = NULL. */ + +bool Item_func_equal::fix_length_and_dec(THD *thd) +{ + bool rc= Item_bool_rowready_func2::fix_length_and_dec(thd); + base_flags&= ~item_base_t::MAYBE_NULL; + null_value=0; + return rc; +} + +longlong Item_func_equal::val_int() +{ + DBUG_ASSERT(fixed()); + return cmp.compare(); +} + +longlong Item_func_ne::val_int() +{ + DBUG_ASSERT(fixed()); + int value= cmp.compare(); + return value != 0 && !null_value ? 1 : 0; +} + + +longlong Item_func_ge::val_int() +{ + DBUG_ASSERT(fixed()); + int value= cmp.compare(); + return value >= 0 ? 1 : 0; +} + + +longlong Item_func_gt::val_int() +{ + DBUG_ASSERT(fixed()); + int value= cmp.compare(); + return value > 0 ? 1 : 0; +} + +longlong Item_func_le::val_int() +{ + DBUG_ASSERT(fixed()); + int value= cmp.compare(); + return value <= 0 && !null_value ? 1 : 0; +} + + +longlong Item_func_lt::val_int() +{ + DBUG_ASSERT(fixed()); + int value= cmp.compare(); + return value < 0 && !null_value ? 1 : 0; +} + + +longlong Item_func_strcmp::val_int() +{ + DBUG_ASSERT(fixed()); + String *a= args[0]->val_str(&value1); + String *b= args[1]->val_str(&value2); + if (!a || !b) + { + null_value=1; + return 0; + } + int value= cmp_collation.sortcmp(a, b); + null_value=0; + return !value ? 0 : (value < 0 ? (longlong) -1 : (longlong) 1); +} + + +bool Item_func_opt_neg::eq(const Item *item, bool binary_cmp) const +{ + /* Assume we don't have rtti */ + if (this == item) + return 1; + if (item->type() != FUNC_ITEM) + return 0; + Item_func *item_func=(Item_func*) item; + if (arg_count != item_func->argument_count() || + functype() != item_func->functype()) + return 0; + if (negated != ((Item_func_opt_neg *) item_func)->negated) + return 0; + return Item_args::eq(item_func, binary_cmp); +} + + +bool Item_func_interval::fix_fields(THD *thd, Item **ref) +{ + if (Item_long_func::fix_fields(thd, ref)) + return true; + for (uint i= 0 ; i < row->cols(); i++) + { + if (row->element_index(i)->check_cols(1)) + return true; + } + return false; +} + + +bool Item_func_interval::fix_length_and_dec(THD *thd) +{ + uint rows= row->cols(); + + use_decimal_comparison= ((row->element_index(0)->result_type() == + DECIMAL_RESULT) || + (row->element_index(0)->result_type() == + INT_RESULT)); + if (rows > 8) + { + bool not_null_consts= TRUE; + + for (uint i= 1; not_null_consts && i < rows; i++) + { + Item *el= row->element_index(i); + not_null_consts&= el->const_item() && !el->is_null(); + } + + if (not_null_consts) + { + intervals= (interval_range*) current_thd->alloc(sizeof(interval_range) * + (rows - 1)); + if (!intervals) + return TRUE; + + if (use_decimal_comparison) + { + for (uint i= 1; i < rows; i++) + { + Item *el= row->element_index(i); + interval_range *range= intervals + (i-1); + if ((el->result_type() == DECIMAL_RESULT) || + (el->result_type() == INT_RESULT)) + { + range->type= DECIMAL_RESULT; + range->dec.init(); + my_decimal *dec= el->val_decimal(&range->dec); + if (dec != &range->dec) + { + range->dec= *dec; + } + } + else + { + range->type= REAL_RESULT; + range->dbl= el->val_real(); + } + } + } + else + { + for (uint i= 1; i < rows; i++) + { + intervals[i-1].dbl= row->element_index(i)->val_real(); + } + } + } + } + base_flags&= ~item_base_t::MAYBE_NULL; + max_length= 2; + used_tables_and_const_cache_join(row); + not_null_tables_cache= row->not_null_tables(); + with_flags|= row->with_flags; + return FALSE; +} + + +/** + Execute Item_func_interval(). + + @note + If we are doing a decimal comparison, we are evaluating the first + item twice. + + @return + - -1 if null value, + - 0 if lower than lowest + - 1 - arg_count-1 if between args[n] and args[n+1] + - arg_count if higher than biggest argument +*/ + +longlong Item_func_interval::val_int() +{ + DBUG_ASSERT(fixed()); + double value; + my_decimal dec_buf, *dec= NULL; + uint i; + + if (use_decimal_comparison) + { + dec= row->element_index(0)->val_decimal(&dec_buf); + if (row->element_index(0)->null_value) + return -1; + my_decimal2double(E_DEC_FATAL_ERROR, dec, &value); + } + else + { + value= row->element_index(0)->val_real(); + if (row->element_index(0)->null_value) + return -1; + } + + if (intervals) + { // Use binary search to find interval + uint start,end; + start= 0; + end= row->cols()-2; + while (start != end) + { + uint mid= (start + end + 1) / 2; + interval_range *range= intervals + mid; + my_bool cmp_result; + /* + The values in the range interval may have different types, + Only do a decimal comparison if the first argument is a decimal + and we are comparing against a decimal + */ + if (dec && range->type == DECIMAL_RESULT) + cmp_result= my_decimal_cmp(&range->dec, dec) <= 0; + else + cmp_result= (range->dbl <= value); + if (cmp_result) + start= mid; + else + end= mid - 1; + } + interval_range *range= intervals+start; + return ((dec && range->type == DECIMAL_RESULT) ? + my_decimal_cmp(dec, &range->dec) < 0 : + value < range->dbl) ? 0 : start + 1; + } + + for (i=1 ; i < row->cols() ; i++) + { + Item *el= row->element_index(i); + if (use_decimal_comparison && + ((el->result_type() == DECIMAL_RESULT) || + (el->result_type() == INT_RESULT))) + { + VDec e_dec(el); + /* Skip NULL ranges. */ + if (e_dec.is_null()) + continue; + if (e_dec.cmp(dec) > 0) + return i - 1; + } + else + { + double val= el->val_real(); + /* Skip NULL ranges. */ + if (el->null_value) + continue; + if (val > value) + return i - 1; + } + } + return i-1; +} + + +/** + Perform context analysis of a BETWEEN item tree. + + This function performs context analysis (name resolution) and calculates + various attributes of the item tree with Item_func_between as its root. + The function saves in ref the pointer to the item or to a newly created + item that is considered as a replacement for the original one. + + @param thd reference to the global context of the query thread + @param ref pointer to Item* variable where pointer to resulting "fixed" + item is to be assigned + + @note + Let T0(e)/T1(e) be the value of not_null_tables(e) when e is used on + a predicate/function level. Then it's easy to show that: + @verbatim + T0(e BETWEEN e1 AND e2) = union(T1(e),T1(e1),T1(e2)) + T1(e BETWEEN e1 AND e2) = union(T1(e),intersection(T1(e1),T1(e2))) + T0(e NOT BETWEEN e1 AND e2) = union(T1(e),intersection(T1(e1),T1(e2))) + T1(e NOT BETWEEN e1 AND e2) = union(T1(e),intersection(T1(e1),T1(e2))) + @endverbatim + + @retval + 0 ok + @retval + 1 got error +*/ + + +bool Item_func_between::eval_not_null_tables(void *opt_arg) +{ + if (Item_func_opt_neg::eval_not_null_tables(NULL)) + return 1; + + /* not_null_tables_cache == union(T1(e),T1(e1),T1(e2)) */ + if (is_top_level_item() && !negated) + return 0; + + /* not_null_tables_cache == union(T1(e), intersection(T1(e1),T1(e2))) */ + not_null_tables_cache= (args[0]->not_null_tables() | + (args[1]->not_null_tables() & + args[2]->not_null_tables())); + return 0; +} + + +bool Item_func_between::find_not_null_fields(table_map allowed) +{ + if (negated || !is_top_level_item() || (~allowed & used_tables())) + return false; + return args[0]->find_not_null_fields(allowed) || + args[1]->find_not_null_fields(allowed) || + args[2]->find_not_null_fields(allowed); +} + + +bool Item_func_between::count_sargable_conds(void *arg) +{ + SELECT_LEX *sel= (SELECT_LEX *) arg; + sel->cond_count++; + sel->between_count++; + return 0; +} + + +void Item_func_between::fix_after_pullout(st_select_lex *new_parent, + Item **ref, bool merge) +{ + /* This will re-calculate attributes of the arguments */ + Item_func_opt_neg::fix_after_pullout(new_parent, ref, merge); + /* Then, re-calculate not_null_tables_cache according to our special rules */ + eval_not_null_tables(NULL); +} + +bool Item_func_between::fix_length_and_dec(THD *thd) +{ + max_length= 1; + + /* + As some compare functions are generated after sql_yacc, + we have to check for out of memory conditions here + */ + if (!args[0] || !args[1] || !args[2]) + return TRUE; + Item_args old_predicant(args[0]); + if (m_comparator.aggregate_for_comparison(Item_func_between:: + func_name_cstring(), + args, 3, false)) + { + DBUG_ASSERT(thd->is_error()); + return TRUE; + } + if (m_comparator.type_handler()->Item_func_between_fix_length_and_dec(this)) + return true; + raise_note_if_key_become_unused(thd, old_predicant); + return false; +} + + +bool Item_func_between::fix_length_and_dec_numeric(THD *thd) +{ + /* See the comment about the similar block in Item_bool_func2 */ + if (args[0]->real_item()->type() == FIELD_ITEM && + !thd->lex->is_ps_or_view_context_analysis()) + { + Item_field *field_item= (Item_field*) (args[0]->real_item()); + if (field_item->field_type() == MYSQL_TYPE_LONGLONG || + field_item->field_type() == MYSQL_TYPE_YEAR) + { + const bool cvt_arg1= convert_const_to_int(thd, field_item, &args[1]); + const bool cvt_arg2= convert_const_to_int(thd, field_item, &args[2]); + if (cvt_arg1 && cvt_arg2) + { + // Works for all types + m_comparator.set_handler(&type_handler_slonglong); + } + } + } + return FALSE; +} + + +bool Item_func_between::fix_length_and_dec_temporal(THD *thd) +{ + if (!thd->lex->is_ps_or_view_context_analysis()) + { + for (uint i= 0; i < 3; i ++) + { + if (args[i]->const_item() && + args[i]->type_handler_for_comparison() != m_comparator.type_handler()) + { + Item_cache *cache= m_comparator.type_handler()->Item_get_cache(thd, args[i]); + if (!cache || cache->setup(thd, args[i])) + return true; + thd->change_item_tree(&args[i], cache); + } + } + } + return false; +} + + +longlong Item_func_between::val_int_cmp_datetime() +{ + THD *thd= current_thd; + longlong value= args[0]->val_datetime_packed(thd), a, b; + if ((null_value= args[0]->null_value)) + return 0; + a= args[1]->val_datetime_packed(thd); + b= args[2]->val_datetime_packed(thd); + return val_int_cmp_int_finalize(value, a, b); +} + + +longlong Item_func_between::val_int_cmp_time() +{ + THD *thd= current_thd; + longlong value= args[0]->val_time_packed(thd), a, b; + if ((null_value= args[0]->null_value)) + return 0; + a= args[1]->val_time_packed(thd); + b= args[2]->val_time_packed(thd); + return val_int_cmp_int_finalize(value, a, b); +} + + +longlong Item_func_between::val_int_cmp_native() +{ + THD *thd= current_thd; + const Type_handler *h= m_comparator.type_handler(); + NativeBuffer value, a, b; + if (val_native_with_conversion_from_item(thd, args[0], &value, h)) + return 0; + bool ra= args[1]->val_native_with_conversion(thd, &a, h); + bool rb= args[2]->val_native_with_conversion(thd, &b, h); + if (!ra && !rb) + return (longlong) + ((h->cmp_native(value, a) >= 0 && + h->cmp_native(value, b) <= 0) != negated); + if (ra && rb) + null_value= true; + else if (ra) + null_value= h->cmp_native(value, b) <= 0; + else + null_value= h->cmp_native(value, a) >= 0; + return (longlong) (!null_value && negated); +} + + +longlong Item_func_between::val_int_cmp_string() +{ + String *value,*a,*b; + value=args[0]->val_str(&value0); + if ((null_value=args[0]->null_value)) + return 0; + a= args[1]->val_str(&value1); + b= args[2]->val_str(&value2); + if (!args[1]->null_value && !args[2]->null_value) + return (longlong) ((sortcmp(value,a,cmp_collation.collation) >= 0 && + sortcmp(value,b,cmp_collation.collation) <= 0) != + negated); + if (args[1]->null_value && args[2]->null_value) + null_value= true; + else if (args[1]->null_value) + { + // Set to not null if false range. + null_value= sortcmp(value,b,cmp_collation.collation) <= 0; + } + else + { + // Set to not null if false range. + null_value= sortcmp(value,a,cmp_collation.collation) >= 0; + } + return (longlong) (!null_value && negated); +} + + +longlong Item_func_between::val_int_cmp_int() +{ + Longlong_hybrid value= args[0]->to_longlong_hybrid(); + if ((null_value= args[0]->null_value)) + return 0; /* purecov: inspected */ + Longlong_hybrid a= args[1]->to_longlong_hybrid(); + Longlong_hybrid b= args[2]->to_longlong_hybrid(); + if (!args[1]->null_value && !args[2]->null_value) + return (longlong) ((value.cmp(a) >= 0 && value.cmp(b) <= 0) != negated); + if (args[1]->null_value && args[2]->null_value) + null_value= true; + else if (args[1]->null_value) + null_value= value.cmp(b) <= 0; // not null if false range. + else + null_value= value.cmp(a) >= 0; + return (longlong) (!null_value && negated); +} + + +bool Item_func_between::val_int_cmp_int_finalize(longlong value, + longlong a, + longlong b) +{ + if (!args[1]->null_value && !args[2]->null_value) + return (longlong) ((value >= a && value <= b) != negated); + if (args[1]->null_value && args[2]->null_value) + null_value= true; + else if (args[1]->null_value) + null_value= value <= b; // not null if false range. + else + null_value= value >= a; + return (longlong) (!null_value && negated); +} + + +longlong Item_func_between::val_int_cmp_decimal() +{ + VDec dec(args[0]); + if ((null_value= dec.is_null())) + return 0; /* purecov: inspected */ + VDec a_dec(args[1]), b_dec(args[2]); + if (!a_dec.is_null() && !b_dec.is_null()) + return (longlong) ((dec.cmp(a_dec) >= 0 && + dec.cmp(b_dec) <= 0) != negated); + if (a_dec.is_null() && b_dec.is_null()) + null_value= true; + else if (a_dec.is_null()) + null_value= (dec.cmp(b_dec) <= 0); + else + null_value= (dec.cmp(a_dec) >= 0); + return (longlong) (!null_value && negated); +} + + +longlong Item_func_between::val_int_cmp_real() +{ + double value= args[0]->val_real(),a,b; + if ((null_value=args[0]->null_value)) + return 0; /* purecov: inspected */ + a= args[1]->val_real(); + b= args[2]->val_real(); + if (!args[1]->null_value && !args[2]->null_value) + return (longlong) ((value >= a && value <= b) != negated); + if (args[1]->null_value && args[2]->null_value) + null_value= true; + else if (args[1]->null_value) + { + null_value= value <= b; // not null if false range. + } + else + { + null_value= value >= a; + } + return (longlong) (!null_value && negated); +} + + +void Item_func_between::print(String *str, enum_query_type query_type) +{ + args[0]->print_parenthesised(str, query_type, higher_precedence()); + if (negated) + str->append(STRING_WITH_LEN(" not")); + str->append(STRING_WITH_LEN(" between ")); + args[1]->print_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" and ")); + args[2]->print_parenthesised(str, query_type, precedence()); +} + + +double +Item_func_ifnull::real_op() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if (!args[0]->null_value) + { + null_value=0; + return value; + } + value= args[1]->val_real(); + if ((null_value=args[1]->null_value)) + return 0.0; + return value; +} + +longlong +Item_func_ifnull::int_op() +{ + DBUG_ASSERT(fixed()); + longlong value=args[0]->val_int(); + if (!args[0]->null_value) + { + null_value=0; + return value; + } + value=args[1]->val_int(); + if ((null_value=args[1]->null_value)) + return 0; + return value; +} + + +my_decimal *Item_func_ifnull::decimal_op(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + my_decimal *value= args[0]->val_decimal(decimal_value); + if (!args[0]->null_value) + { + null_value= 0; + return value; + } + value= args[1]->val_decimal(decimal_value); + if ((null_value= args[1]->null_value)) + return 0; + return value; +} + + +String * +Item_func_ifnull::str_op(String *str) +{ + DBUG_ASSERT(fixed()); + String *res =args[0]->val_str(str); + if (!args[0]->null_value) + { + null_value=0; + res->set_charset(collation.collation); + return res; + } + res=args[1]->val_str(str); + if ((null_value=args[1]->null_value)) + return 0; + res->set_charset(collation.collation); + return res; +} + + +bool Item_func_ifnull::native_op(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + if (!val_native_with_conversion_from_item(thd, args[0], to, type_handler())) + return false; + return val_native_with_conversion_from_item(thd, args[1], to, type_handler()); +} + + +bool Item_func_ifnull::date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + for (uint i= 0; i < 2; i++) + { + Datetime_truncation_not_needed dt(thd, args[i], + fuzzydate & ~TIME_FUZZY_DATES); + if (!(dt.copy_to_mysql_time(ltime, mysql_timestamp_type()))) + return (null_value= false); + } + return (null_value= true); +} + + +bool Item_func_ifnull::time_op(THD *thd, MYSQL_TIME *ltime) +{ + DBUG_ASSERT(fixed()); + for (uint i= 0; i < 2; i++) + { + if (!Time(thd, args[i]).copy_to_mysql_time(ltime)) + return (null_value= false); + } + return (null_value= true); +} + + +/** + Perform context analysis of an IF item tree. + + This function performs context analysis (name resolution) and calculates + various attributes of the item tree with Item_func_if as its root. + The function saves in ref the pointer to the item or to a newly created + item that is considered as a replacement for the original one. + + @param thd reference to the global context of the query thread + @param ref pointer to Item* variable where pointer to resulting "fixed" + item is to be assigned + + @note + Let T0(e)/T1(e) be the value of not_null_tables(e) when e is used on + a predicate/function level. Then it's easy to show that: + @verbatim + T0(IF(e,e1,e2) = T1(IF(e,e1,e2)) + T1(IF(e,e1,e2)) = intersection(T1(e1),T1(e2)) + @endverbatim + + @retval + 0 ok + @retval + 1 got error +*/ + +bool +Item_func_if::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + /* + Mark that we don't care if args[0] is NULL or FALSE, we regard both cases as + false. + */ + args[0]->top_level_item(); + + if (Item_func::fix_fields(thd, ref)) + return 1; + + return 0; +} + + +bool +Item_func_if::eval_not_null_tables(void *opt_arg) +{ + if (Item_func::eval_not_null_tables(NULL)) + return 1; + + not_null_tables_cache= (args[1]->not_null_tables() & + args[2]->not_null_tables()); + + return 0; +} + + +void Item_func_if::fix_after_pullout(st_select_lex *new_parent, + Item **ref, bool merge) +{ + /* This will re-calculate attributes of the arguments */ + Item_func::fix_after_pullout(new_parent, ref, merge); + /* Then, re-calculate not_null_tables_cache according to our special rules */ + eval_not_null_tables(NULL); +} + + +void Item_func_nullif::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) +{ + if (m_cache) + { + flags|= SPLIT_SUM_SKIP_REGISTERED; // See Item_func::split_sum_func + m_cache->split_sum_func2_example(thd, ref_pointer_array, fields, flags); + args[1]->split_sum_func2(thd, ref_pointer_array, fields, &args[1], flags); + } + else + { + Item_func::split_sum_func(thd, ref_pointer_array, fields, flags); + } +} + + +bool Item_func_nullif::walk(Item_processor processor, + bool walk_subquery, void *arg) +{ + /* + No needs to iterate through args[2] when it's just a copy of args[0]. + See MDEV-9712 Performance degradation of nested NULLIF + */ + uint tmp_count= arg_count == 2 || args[0] == args[2] ? 2 : 3; + for (uint i= 0; i < tmp_count; i++) + { + if (args[i]->walk(processor, walk_subquery, arg)) + return true; + } + return (this->*processor)(arg); +} + + +void Item_func_nullif::update_used_tables() +{ + if (m_cache) + { + used_tables_and_const_cache_init(); + used_tables_and_const_cache_update_and_join(m_cache->get_example()); + used_tables_and_const_cache_update_and_join(arg_count, args); + } + else + { + /* + MDEV-9712 Performance degradation of nested NULLIF + No needs to iterate through args[2] when it's just a copy of args[0]. + */ + DBUG_ASSERT(arg_count == 3); + used_tables_and_const_cache_init(); + used_tables_and_const_cache_update_and_join(args[0] == args[2] ? 2 : 3, + args); + } +} + + + +bool +Item_func_nullif::fix_length_and_dec(THD *thd) +{ + /* + If this is the first invocation of fix_length_and_dec(), create the + third argument as a copy of the first. This cannot be done before + fix_fields(), because fix_fields() might replace items, + for exampe NOT x --> x==0, or (SELECT 1) --> 1. + See also class Item_func_nullif declaration. + */ + if (arg_count == 2) + args[arg_count++]= m_arg0 ? m_arg0 : args[0]; + + /* + At prepared statement EXECUTE time, args[0] can already + point to a different Item, created during PREPARE time fix_length_and_dec(). + For example, if character set conversion was needed, arguments can look + like this: + + args[0]= > Item_func_conv_charset \ + l_expr + args[2]= >------------------------/ + + Otherwise (during PREPARE or convensional execution), + args[0] and args[2] should still point to the same original l_expr. + */ + DBUG_ASSERT(args[0] == args[2] || thd->stmt_arena->is_stmt_execute()); + if (args[0]->type() == SUM_FUNC_ITEM && + !thd->lex->is_ps_or_view_context_analysis()) + { + /* + NULLIF(l_expr, r_expr) + + is calculated in the way to return a result equal to: + + CASE WHEN l_expr = r_expr THEN NULL ELSE r_expr END. + + There's nothing special with r_expr, because it's referenced + only by args[1] and nothing else. + + l_expr needs a special treatment, as it's referenced by both + args[0] and args[2] initially. + + args[2] is used to return the value. Afrer all transformations + (e.g. in fix_length_and_dec(), equal field propagation, etc) + args[2] points to a an Item which preserves the exact data type and + attributes (e.g. collation) of the original l_expr. + It can point: + - to the original l_expr + - to an Item_cache pointing to l_expr + - to a constant of the same data type with l_expr. + + args[0] is used for comparison. It can be replaced: + + - to Item_func_conv_charset by character set aggregation routines + - to a constant Item by equal field propagation routines + (in case of Item_field) + + The data type and/or the attributes of args[0] can differ from + the data type and the attributes of the original l_expr, to make + it comparable to args[1] (which points to r_expr or its replacement). + + For aggregate functions we have to wrap the original args[0]/args[2] + into Item_cache (see MDEV-9181). In this case the Item_cache + instance becomes the subject to character set conversion instead of + the original args[0]/args[2], while the original args[0]/args[2] get + hidden inside the cache. + + Some examples of what NULLIF can end up with after argument + substitution (we don't mention args[1] in some cases for simplicity): + + 1. l_expr is not an aggregate function: + + a. No conversion happened. + args[0] and args[2] were not replaced to something else + (i.e. neither by character set conversion, nor by propagation): + + args[1] > r_expr + args[0] \ + l_expr + args[2] / + + b. Conversion of args[0] happened: + + CREATE OR REPLACE TABLE t1 ( + a CHAR(10) CHARACTER SET latin1, + b CHAR(10) CHARACTER SET utf8); + SELECT * FROM t1 WHERE NULLIF(a,b); + + args[1] > r_expr (Item_field for t1.b) + args[0] > Item_func_conv_charset\ + l_expr (Item_field for t1.a) + args[2] > ----------------------/ + + c. Conversion of args[1] happened: + + CREATE OR REPLACE TABLE t1 ( + a CHAR(10) CHARACTER SET utf8, + b CHAR(10) CHARACTER SET latin1); + SELECT * FROM t1 WHERE NULLIF(a,b); + + args[1] > Item_func_conv_charset -> r_expr (Item_field for t1.b) + args[0] \ + l_expr (Item_field for t1.a) + args[2] / + + d. Conversion of only args[0] happened (by equal field proparation): + + CREATE OR REPLACE TABLE t1 ( + a CHAR(10), + b CHAR(10)); + SELECT * FROM t1 WHERE NULLIF(a,b) AND a='a'; + + args[1] > r_expr (Item_field for t1.b) + args[0] > Item_string('a') (constant replacement for t1.a) + args[2] > l_expr (Item_field for t1.a) + + e. Conversion of both args[0] and args[2] happened + (by equal field propagation): + + CREATE OR REPLACE TABLE t1 (a INT,b INT); + SELECT * FROM t1 WHERE NULLIF(a,b) AND a=5; + + args[1] > r_expr (Item_field for "b") + args[0] \ + Item_int (5) (constant replacement for "a") + args[2] / + + 2. In case if l_expr is an aggregate function: + + a. No conversion happened: + + args[0] \ + Item_cache > l_expr + args[2] / + + b. Conversion of args[0] happened: + + args[0] > Item_func_conv_charset \ + Item_cache > l_expr + args[2] >------------------------/ + + c. Conversion of both args[0] and args[2] happened. + (e.g. by equal expression propagation) + TODO: check if it's possible (and add an example query if so). + */ + m_cache= args[0]->cmp_type() == STRING_RESULT ? + new (thd->mem_root) Item_cache_str_for_nullif(thd, args[0]) : + args[0]->get_cache(thd); + if (!m_cache) + return TRUE; + m_cache->setup(thd, args[0]); + m_cache->store(args[0]); + m_cache->set_used_tables(args[0]->used_tables()); + thd->change_item_tree(&args[0], m_cache); + thd->change_item_tree(&args[2], m_cache); + } + set_handler(args[2]->type_handler()); + collation.set(args[2]->collation); + decimals= args[2]->decimals; + unsigned_flag= args[2]->unsigned_flag; + fix_char_length(args[2]->max_char_length()); + set_maybe_null(); + m_arg0= args[0]; + if (setup_args_and_comparator(thd, &cmp)) + return TRUE; + /* + A special code for EXECUTE..PREPARE. + + If args[0] did not change, then we don't remember it, as it can point + to a temporary Item object which will be destroyed between PREPARE + and EXECUTE. EXECUTE time fix_length_and_dec() will correctly set args[2] + from args[0] again. + + If args[0] changed, then it can be Item_func_conv_charset() for the + original args[0], which was permanently installed during PREPARE time + into the item tree as a wrapper for args[0], using change_item_tree(), i.e. + + NULLIF(latin1_field, 'a' COLLATE utf8_bin) + + was "rewritten" to: + + CASE WHEN CONVERT(latin1_field USING utf8) = 'a' COLLATE utf8_bin + THEN NULL + ELSE latin1_field + + - m_args0 points to Item_field corresponding to latin1_field + - args[0] points to Item_func_conv_charset + - args[0]->args[0] is equal to m_args0 + - args[1] points to Item_func_set_collation + - args[2] points is eqial to m_args0 + + In this case we remember and reuse m_arg0 during EXECUTE time as args[2]. + + QQ: How to make sure that m_args0 does not point + to something temporary which will be destroyed between PREPARE and EXECUTE. + The condition below should probably be more strict and somehow check that: + - change_item_tree() was called for the new args[0] + - m_args0 is referenced from inside args[0], e.g. as a function argument, + and therefore it is also something that won't be destroyed between + PREPARE and EXECUTE. + Any ideas? + */ + if (args[0] == m_arg0) + m_arg0= NULL; + return FALSE; +} + + +void Item_func_nullif::print(String *str, enum_query_type query_type) +{ + /* + NULLIF(a,b) is implemented according to the SQL standard as a short for + CASE WHEN a=b THEN NULL ELSE a END + + The constructor of Item_func_nullif sets args[0] and args[2] to the + same item "a", and sets args[1] to "b". + + If "this" is a part of a WHERE or ON condition, then: + - the left "a" is a subject to equal field propagation with ANY_SUBST. + - the right "a" is a subject to equal field propagation with IDENTITY_SUBST. + Therefore, after equal field propagation args[0] and args[2] can point + to different items. + */ + if ((query_type & QT_ITEM_ORIGINAL_FUNC_NULLIF) || + (arg_count == 2) || + (args[0] == args[2])) + { + /* + If QT_ITEM_ORIGINAL_FUNC_NULLIF is requested, + that means we want the original NULLIF() representation, + e.g. when we are in: + SHOW CREATE {VIEW|FUNCTION|PROCEDURE} + + The original representation is possible only if + args[0] and args[2] still point to the same Item. + + The caller must never pass call print() with QT_ITEM_ORIGINAL_FUNC_NULLIF + if an expression has undergone some optimization + (e.g. equal field propagation done in optimize_cond()) already and + NULLIF() potentially has two different representations of "a": + - one "a" for comparison + - another "a" for the returned value! + */ + DBUG_ASSERT(arg_count == 2 || + args[0] == args[2] || current_thd->lex->context_analysis_only); + str->append(func_name_cstring()); + str->append('('); + if (arg_count == 2) + args[0]->print(str, query_type); + else + args[2]->print(str, query_type); + str->append(','); + args[1]->print(str, query_type); + str->append(')'); + } + else + { + /* + args[0] and args[2] are different items. + This is possible after WHERE optimization (equal fields propagation etc), + e.g. in EXPLAIN EXTENDED or EXPLAIN FORMAT=JSON. + As it's not possible to print as a function with 2 arguments any more, + do it in the CASE style. + */ + str->append(STRING_WITH_LEN("(case when ")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" = ")); + args[1]->print(str, query_type); + str->append(STRING_WITH_LEN(" then NULL else ")); + args[2]->print(str, query_type); + str->append(STRING_WITH_LEN(" end)")); + } +} + + +int Item_func_nullif::compare() +{ + if (m_cache) + m_cache->cache_value(); + return cmp.compare(); +} + +/** + @note + Note that we have to evaluate the first argument twice as the compare + may have been done with a different type than return value + @return + NULL if arguments are equal + @return + the first argument if not equal +*/ + +double +Item_func_nullif::real_op() +{ + DBUG_ASSERT(fixed()); + double value; + if (!compare()) + { + null_value=1; + return 0.0; + } + value= args[2]->val_real(); + null_value= args[2]->null_value; + return value; +} + +longlong +Item_func_nullif::int_op() +{ + DBUG_ASSERT(fixed()); + longlong value; + if (!compare()) + { + null_value=1; + return 0; + } + value= args[2]->val_int(); + null_value= args[2]->null_value; + return value; +} + +String * +Item_func_nullif::str_op(String *str) +{ + DBUG_ASSERT(fixed()); + String *res; + if (!compare()) + { + null_value=1; + return 0; + } + res= args[2]->val_str(str); + null_value= args[2]->null_value; + return res; +} + + +my_decimal * +Item_func_nullif::decimal_op(my_decimal * decimal_value) +{ + DBUG_ASSERT(fixed()); + my_decimal *res; + if (!compare()) + { + null_value=1; + return 0; + } + res= args[2]->val_decimal(decimal_value); + null_value= args[2]->null_value; + return res; +} + + +bool +Item_func_nullif::date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + if (!compare()) + return (null_value= true); + Datetime_truncation_not_needed dt(thd, args[2], fuzzydate); + return (null_value= dt.copy_to_mysql_time(ltime, mysql_timestamp_type())); +} + + +bool +Item_func_nullif::time_op(THD *thd, MYSQL_TIME *ltime) +{ + DBUG_ASSERT(fixed()); + if (!compare()) + return (null_value= true); + return (null_value= Time(thd, args[2]).copy_to_mysql_time(ltime)); + +} + + +bool +Item_func_nullif::native_op(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + if (!compare()) + return (null_value= true); + return val_native_with_conversion_from_item(thd, args[2], to, type_handler()); +} + + +bool +Item_func_nullif::is_null() +{ + return (null_value= (!compare() ? 1 : args[2]->is_null())); +} + +void Item_func_case::reorder_args(uint start) +{ + /* + Reorder args, to have at first the optional CASE expression, then all WHEN + expressions, then all THEN expressions. And the optional ELSE expression + at the end. + + We reorder an even number of arguments, starting from start. + */ + uint count = (arg_count - start) / 2; + const size_t size= sizeof(Item*) * count * 2; + Item **arg_buffer= (Item **)my_safe_alloca(size); + memcpy(arg_buffer, &args[start], size); + for (uint i= 0; i < count; i++) + { + args[start + i]= arg_buffer[i*2]; + args[start + i + count]= arg_buffer[i*2 + 1]; + } + my_safe_afree(arg_buffer, size); +} + + + +/** + Find and return matching items for CASE or ELSE item if all compares + are failed or NULL if ELSE item isn't defined. + + IMPLEMENTATION + In order to do correct comparisons of the CASE expression (the expression + between CASE and the first WHEN) with each WHEN expression several + comparators are used. One for each result type. CASE expression can be + evaluated up to # of different result types are used. To check whether + the CASE expression already was evaluated for a particular result type + a bit mapped variable value_added_map is used. Result types are mapped + to it according to their int values i.e. STRING_RESULT is mapped to bit + 0, REAL_RESULT to bit 1, so on. + + @retval + NULL Nothing found and there is no ELSE expression defined + @retval + item Found item or ELSE item if defined and all comparisons are + failed +*/ + +Item *Item_func_case_searched::find_item() +{ + uint count= when_count(); + for (uint i= 0 ; i < count ; i++) + { + if (args[i]->val_bool()) + return args[i + count]; + } + Item **pos= Item_func_case_searched::else_expr_addr(); + return pos ? pos[0] : 0; +} + + +Item *Item_func_case_simple::find_item() +{ + /* Compare every WHEN argument with it and return the first match */ + uint idx; + if (!Predicant_to_list_comparator::cmp(this, &idx, NULL)) + return args[idx + when_count()]; + Item **pos= Item_func_case_simple::else_expr_addr(); + return pos ? pos[0] : 0; +} + + +Item *Item_func_decode_oracle::find_item() +{ + uint idx; + if (!Predicant_to_list_comparator::cmp_nulls_equal(current_thd, this, &idx)) + return args[idx + when_count()]; + Item **pos= Item_func_decode_oracle::else_expr_addr(); + return pos ? pos[0] : 0; +} + + +String *Item_func_case::str_op(String *str) +{ + DBUG_ASSERT(fixed()); + String *res; + Item *item= find_item(); + + if (!item) + { + null_value=1; + return 0; + } + null_value= 0; + if (!(res=item->val_str(str))) + null_value= 1; + return res; +} + + +longlong Item_func_case::int_op() +{ + DBUG_ASSERT(fixed()); + Item *item= find_item(); + longlong res; + + if (!item) + { + null_value=1; + return 0; + } + res=item->val_int(); + null_value=item->null_value; + return res; +} + +double Item_func_case::real_op() +{ + DBUG_ASSERT(fixed()); + Item *item= find_item(); + double res; + + if (!item) + { + null_value=1; + return 0; + } + res= item->val_real(); + null_value=item->null_value; + return res; +} + + +my_decimal *Item_func_case::decimal_op(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + Item *item= find_item(); + my_decimal *res; + + if (!item) + { + null_value=1; + return 0; + } + + res= item->val_decimal(decimal_value); + null_value= item->null_value; + return res; +} + + +bool Item_func_case::date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + Item *item= find_item(); + if (!item) + return (null_value= true); + Datetime_truncation_not_needed dt(thd, item, fuzzydate); + return (null_value= dt.copy_to_mysql_time(ltime, mysql_timestamp_type())); +} + + +bool Item_func_case::time_op(THD *thd, MYSQL_TIME *ltime) +{ + DBUG_ASSERT(fixed()); + Item *item= find_item(); + if (!item) + return (null_value= true); + return (null_value= Time(thd, item).copy_to_mysql_time(ltime)); +} + + +bool Item_func_case::native_op(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + Item *item= find_item(); + if (!item) + return (null_value= true); + return val_native_with_conversion_from_item(thd, item, to, type_handler()); +} + + +bool Item_func_case::fix_fields(THD *thd, Item **ref) +{ + bool res= Item_func::fix_fields(thd, ref); + + Item **pos= else_expr_addr(); + if (!pos || pos[0]->maybe_null()) + set_maybe_null(); + return res; +} + + +/** + Check if (*place) and new_value points to different Items and call + THD::change_item_tree() if needed. +*/ + +static void propagate_and_change_item_tree(THD *thd, Item **place, + COND_EQUAL *cond, + const Item::Context &ctx) +{ + Item *new_value= (*place)->propagate_equal_fields(thd, ctx, cond); + if (new_value && *place != new_value) + thd->change_item_tree(place, new_value); +} + + +bool Item_func_case_simple::prepare_predicant_and_values(THD *thd, + uint *found_types, + bool nulls_equal) +{ + bool have_null= false; + uint type_cnt; + Type_handler_hybrid_field_type tmp; + uint ncases= when_count(); + add_predicant(this, 0); + for (uint i= 0 ; i < ncases; i++) + { + static LEX_CSTRING case_when= { STRING_WITH_LEN("case..when") }; + if (nulls_equal ? + add_value(case_when, this, i + 1) : + add_value_skip_null(case_when, this, i + 1, &have_null)) + return true; + } + all_values_added(&tmp, &type_cnt, &m_found_types); +#ifndef DBUG_OFF + Predicant_to_list_comparator::debug_print(thd); +#endif + return false; +} + + +bool Item_func_case_searched::fix_length_and_dec(THD *thd) +{ + return aggregate_then_and_else_arguments(thd, when_count()); +} + + +bool Item_func_case_simple::fix_length_and_dec(THD *thd) +{ + return (aggregate_then_and_else_arguments(thd, when_count() + 1) || + aggregate_switch_and_when_arguments(thd, false)); +} + + +bool Item_func_decode_oracle::fix_length_and_dec(THD *thd) +{ + return (aggregate_then_and_else_arguments(thd, when_count() + 1) || + aggregate_switch_and_when_arguments(thd, true)); +} + + +/* + Aggregate all THEN and ELSE expression types + and collations when string result + + @param THD - current thd + @param start - an element in args to start aggregating from +*/ +bool Item_func_case::aggregate_then_and_else_arguments(THD *thd, uint start) +{ + if (aggregate_for_result(func_name_cstring(), args + start, + arg_count - start, true)) + return true; + + if (fix_attributes(args + start, arg_count - start)) + return true; + + return false; +} + + +/* + Aggregate the predicant expression and all WHEN expression types + and collations when string comparison +*/ +bool Item_func_case_simple::aggregate_switch_and_when_arguments(THD *thd, + bool nulls_eq) +{ + uint ncases= when_count(); + m_found_types= 0; + if (prepare_predicant_and_values(thd, &m_found_types, nulls_eq)) + { + /* + If Predicant_to_list_comparator() fails to prepare components, + it must put an error into the diagnostics area. This is needed + to make fix_fields() catches such errors. + */ + DBUG_ASSERT(thd->is_error()); + return true; + } + + if (!(m_found_types= collect_cmp_types(args, ncases + 1))) + return true; + + if (m_found_types & (1U << STRING_RESULT)) + { + /* + If we'll do string comparison, we also need to aggregate + character set and collation for first/WHEN items and + install converters for some of them to cmp_collation when necessary. + This is done because cmp_item compatators cannot compare + strings in two different character sets. + Some examples when we install converters: + + 1. Converter installed for the first expression: + + CASE latin1_item WHEN utf16_item THEN ... END + + is replaced to: + + CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END + + 2. Converter installed for the left WHEN item: + + CASE utf16_item WHEN latin1_item THEN ... END + + is replaced to: + + CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END + */ + if (agg_arg_charsets_for_comparison(cmp_collation, args, ncases + 1)) + return true; + } + + if (make_unique_cmp_items(thd, cmp_collation.collation)) + return true; + + return false; +} + + +Item* Item_func_case_simple::propagate_equal_fields(THD *thd, + const Context &ctx, + COND_EQUAL *cond) +{ + const Type_handler *first_expr_cmp_handler; + + first_expr_cmp_handler= args[0]->type_handler_for_comparison(); + /* + Cannot replace the CASE (the switch) argument if + there are multiple comparison types were found, or found a single + comparison type that is not equal to args[0]->cmp_type(). + + - Example: multiple comparison types, can't propagate: + WHERE CASE str_column + WHEN 'string' THEN TRUE + WHEN 1 THEN TRUE + ELSE FALSE END; + + - Example: a single incompatible comparison type, can't propagate: + WHERE CASE str_column + WHEN DATE'2001-01-01' THEN TRUE + ELSE FALSE END; + + - Example: a single incompatible comparison type, can't propagate: + WHERE CASE str_column + WHEN 1 THEN TRUE + ELSE FALSE END; + + - Example: a single compatible comparison type, ok to propagate: + WHERE CASE str_column + WHEN 'str1' THEN TRUE + WHEN 'str2' THEN TRUE + ELSE FALSE END; + */ + if (m_found_types == (1UL << first_expr_cmp_handler->cmp_type())) + propagate_and_change_item_tree(thd, &args[0], cond, + Context(ANY_SUBST, first_expr_cmp_handler, cmp_collation.collation)); + + /* + These arguments are in comparison. + Allow invariants of the same value during propagation. + Note, as we pass ANY_SUBST, none of the WHEN arguments will be + replaced to zero-filled constants (only IDENTITY_SUBST allows this). + Such a change for WHEN arguments would require rebuilding cmp_items. + */ + uint i, count= when_count(); + for (i= 1; i <= count; i++) + { + Type_handler_hybrid_field_type tmp(first_expr_cmp_handler); + if (!tmp.aggregate_for_comparison(args[i]->type_handler_for_comparison())) + propagate_and_change_item_tree(thd, &args[i], cond, + Context(ANY_SUBST, tmp.type_handler(), cmp_collation.collation)); + } + + // THEN and ELSE arguments (they are not in comparison) + for (; i < arg_count; i++) + propagate_and_change_item_tree(thd, &args[i], cond, Context_identity()); + + return this; +} + + +inline void Item_func_case::print_when_then_arguments(String *str, + enum_query_type + query_type, + Item **items, uint count) +{ + for (uint i= 0; i < count; i++) + { + str->append(STRING_WITH_LEN("when ")); + items[i]->print(str, query_type); + str->append(STRING_WITH_LEN(" then ")); + items[i + count]->print(str, query_type); + str->append(' '); + } +} + + +inline void Item_func_case::print_else_argument(String *str, + enum_query_type query_type, + Item *item) +{ + str->append(STRING_WITH_LEN("else ")); + item->print(str, query_type); + str->append(' '); +} + + +void Item_func_case_searched::print(String *str, enum_query_type query_type) +{ + Item **pos; + str->append(STRING_WITH_LEN("case ")); + print_when_then_arguments(str, query_type, &args[0], when_count()); + if ((pos= Item_func_case_searched::else_expr_addr())) + print_else_argument(str, query_type, pos[0]); + str->append(STRING_WITH_LEN("end")); +} + + +void Item_func_case_simple::print(String *str, enum_query_type query_type) +{ + Item **pos; + str->append(STRING_WITH_LEN("case ")); + args[0]->print_parenthesised(str, query_type, precedence()); + str->append(' '); + print_when_then_arguments(str, query_type, &args[1], when_count()); + if ((pos= Item_func_case_simple::else_expr_addr())) + print_else_argument(str, query_type, pos[0]); + str->append(STRING_WITH_LEN("end")); +} + + +void Item_func_decode_oracle::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + args[0]->print(str, query_type); + for (uint i= 1, count= when_count() ; i <= count; i++) + { + str->append(','); + args[i]->print(str, query_type); + str->append(','); + args[i+count]->print(str, query_type); + } + Item **else_expr= Item_func_case_simple::else_expr_addr(); + if (else_expr) + { + str->append(','); + (*else_expr)->print(str, query_type); + } + str->append(')'); +} + + +/** + Coalesce - return first not NULL argument. +*/ + +String *Item_func_coalesce::str_op(String *str) +{ + DBUG_ASSERT(fixed()); + null_value=0; + for (uint i=0 ; i < arg_count ; i++) + { + String *res; + if ((res=args[i]->val_str(str))) + return res; + } + null_value=1; + return 0; +} + +longlong Item_func_coalesce::int_op() +{ + DBUG_ASSERT(fixed()); + null_value=0; + for (uint i=0 ; i < arg_count ; i++) + { + longlong res=args[i]->val_int(); + if (!args[i]->null_value) + return res; + } + null_value=1; + return 0; +} + +double Item_func_coalesce::real_op() +{ + DBUG_ASSERT(fixed()); + null_value=0; + for (uint i=0 ; i < arg_count ; i++) + { + double res= args[i]->val_real(); + if (!args[i]->null_value) + return res; + } + null_value=1; + return 0; +} + + +bool Item_func_coalesce::date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + Datetime_truncation_not_needed dt(thd, args[i], + fuzzydate & ~TIME_FUZZY_DATES); + if (!dt.copy_to_mysql_time(ltime, mysql_timestamp_type())) + return (null_value= false); + } + return (null_value= true); +} + + +bool Item_func_coalesce::time_op(THD *thd, MYSQL_TIME *ltime) +{ + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + if (!Time(thd, args[i]).copy_to_mysql_time(ltime)) + return (null_value= false); + } + return (null_value= true); +} + + +bool Item_func_coalesce::native_op(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + if (!val_native_with_conversion_from_item(thd, args[i], to, type_handler())) + return false; + } + return (null_value= true); +} + + +my_decimal *Item_func_coalesce::decimal_op(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + null_value= 0; + for (uint i= 0; i < arg_count; i++) + { + my_decimal *res= args[i]->val_decimal(decimal_value); + if (!args[i]->null_value) + return res; + } + null_value=1; + return 0; +} + + +/**************************************************************************** + Classes and function for the IN operator +****************************************************************************/ + +/* + Determine which of the signed longlong arguments is bigger + + SYNOPSIS + cmp_longs() + a_val left argument + b_val right argument + + DESCRIPTION + This function will compare two signed longlong arguments + and will return -1, 0, or 1 if left argument is smaller than, + equal to or greater than the right argument. + + RETURN VALUE + -1 left argument is smaller than the right argument. + 0 left argument is equal to the right argument. + 1 left argument is greater than the right argument. +*/ +static inline int cmp_longs (longlong a_val, longlong b_val) +{ + return a_val < b_val ? -1 : a_val == b_val ? 0 : 1; +} + + +/* + Determine which of the unsigned longlong arguments is bigger + + SYNOPSIS + cmp_ulongs() + a_val left argument + b_val right argument + + DESCRIPTION + This function will compare two unsigned longlong arguments + and will return -1, 0, or 1 if left argument is smaller than, + equal to or greater than the right argument. + + RETURN VALUE + -1 left argument is smaller than the right argument. + 0 left argument is equal to the right argument. + 1 left argument is greater than the right argument. +*/ +static inline int cmp_ulongs (ulonglong a_val, ulonglong b_val) +{ + return a_val < b_val ? -1 : a_val == b_val ? 0 : 1; +} + + +/* + Compare two integers in IN value list format (packed_longlong) + + SYNOPSIS + cmp_longlong() + cmp_arg an argument passed to the calling function (my_qsort2) + a left argument + b right argument + + DESCRIPTION + This function will compare two integer arguments in the IN value list + format and will return -1, 0, or 1 if left argument is smaller than, + equal to or greater than the right argument. + It's used in sorting the IN values list and finding an element in it. + Depending on the signedness of the arguments cmp_longlong() will + compare them as either signed (using cmp_longs()) or unsigned (using + cmp_ulongs()). + + RETURN VALUE + -1 left argument is smaller than the right argument. + 0 left argument is equal to the right argument. + 1 left argument is greater than the right argument. +*/ +int cmp_longlong(void *cmp_arg, + in_longlong::packed_longlong *a, + in_longlong::packed_longlong *b) +{ + if (a->unsigned_flag != b->unsigned_flag) + { + /* + One of the args is unsigned and is too big to fit into the + positive signed range. Report no match. + */ + if ((a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX) + || + (b->unsigned_flag && ((ulonglong) b->val) > (ulonglong) LONGLONG_MAX)) + return a->unsigned_flag ? 1 : -1; + /* + Although the signedness differs both args can fit into the signed + positive range. Make them signed and compare as usual. + */ + return cmp_longs(a->val, b->val); + } + if (a->unsigned_flag) + return cmp_ulongs((ulonglong) a->val, (ulonglong) b->val); + return cmp_longs(a->val, b->val); +} + +static int cmp_double(void *cmp_arg, double *a,double *b) +{ + return *a < *b ? -1 : *a == *b ? 0 : 1; +} + +static int cmp_row(void *cmp_arg, cmp_item_row *a, cmp_item_row *b) +{ + return a->compare(b); +} + + +static int cmp_decimal(void *cmp_arg, my_decimal *a, my_decimal *b) +{ + /* + We need call of fixing buffer pointer, because fast sort just copy + decimal buffers in memory and pointers left pointing on old buffer place + */ + a->fix_buffer_pointer(); + b->fix_buffer_pointer(); + return my_decimal_cmp(a, b); +} + + +bool in_vector::find(Item *item) +{ + uchar *result=get_value(item); + if (!result || !used_count) + return false; // Null value + + uint start,end; + start=0; end=used_count-1; + while (start != end) + { + uint mid=(start+end+1)/2; + int res; + if ((res=(*compare)(collation, base+mid*size, result)) == 0) + return true; + if (res < 0) + start=mid; + else + end=mid-1; + } + return ((*compare)(collation, base+start*size, result) == 0); +} + +in_string::in_string(THD *thd, uint elements, qsort2_cmp cmp_func, + CHARSET_INFO *cs) + :in_vector(thd, elements, sizeof(String), cmp_func, cs), + tmp(buff, sizeof(buff), &my_charset_bin) +{} + +in_string::~in_string() +{ + if (base) + { + // base was allocated on THD::mem_root => following is OK + for (uint i=0 ; i < count ; i++) + ((String*) base)[i].free(); + } +} + +bool in_string::set(uint pos, Item *item) +{ + String *str=((String*) base)+pos; + String *res=item->val_str(str); + if (res && res != str) + { + if (res->uses_buffer_owned_by(str)) + res->copy(); + if (item->type() == Item::FUNC_ITEM) + str->copy(*res); + else + *str= *res; + } + if (!str->charset()) + { + CHARSET_INFO *cs; + if (!(cs= item->collation.collation)) + cs= &my_charset_bin; // Should never happen for STR items + str->set_charset(cs); + } + return res == NULL; +} + + +uchar *in_string::get_value(Item *item) +{ + return (uchar*) item->val_str(&tmp); +} + +Item *in_string::create_item(THD *thd) +{ + return new (thd->mem_root) Item_string_for_in_vector(thd, collation); +} + + +in_row::in_row(THD *thd, uint elements, Item * item) +{ + base= (char*) new (thd->mem_root) cmp_item_row[count= elements]; + size= sizeof(cmp_item_row); + compare= (qsort2_cmp) cmp_row; + /* + We need to reset these as otherwise we will call sort() with + uninitialized (even if not used) elements + */ + used_count= elements; + collation= 0; +} + +in_row::~in_row() +{ + if (base) + delete [] (cmp_item_row*) base; +} + +uchar *in_row::get_value(Item *item) +{ + tmp.store_value(item); + if (item->is_null()) + return 0; + return (uchar *)&tmp; +} + +bool in_row::set(uint pos, Item *item) +{ + DBUG_ENTER("in_row::set"); + DBUG_PRINT("enter", ("pos: %u item: %p", pos,item)); + DBUG_RETURN(((cmp_item_row*) base)[pos].store_value_by_template(current_thd, + &tmp, item)); +} + +in_longlong::in_longlong(THD *thd, uint elements) + :in_vector(thd, elements, sizeof(packed_longlong), + (qsort2_cmp) cmp_longlong, 0) +{} + +bool in_longlong::set(uint pos, Item *item) +{ + struct packed_longlong *buff= &((packed_longlong*) base)[pos]; + + buff->val= item->val_int(); + buff->unsigned_flag= item->unsigned_flag; + return item->null_value; +} + +uchar *in_longlong::get_value(Item *item) +{ + tmp.val= item->val_int(); + if (item->null_value) + return 0; + tmp.unsigned_flag= item->unsigned_flag; + return (uchar*) &tmp; +} + +Item *in_longlong::create_item(THD *thd) +{ + /* + We're created a signed INT, this may not be correct in + general case (see BUG#19342). + */ + return new (thd->mem_root) Item_int(thd, (longlong)0); +} + + +static int cmp_timestamp(void *cmp_arg, + Timestamp_or_zero_datetime *a, + Timestamp_or_zero_datetime *b) +{ + return a->cmp(*b); +} + + +in_timestamp::in_timestamp(THD *thd, uint elements) + :in_vector(thd, elements, sizeof(Value), (qsort2_cmp) cmp_timestamp, 0) +{} + + +bool in_timestamp::set(uint pos, Item *item) +{ + Timestamp_or_zero_datetime *buff= &((Timestamp_or_zero_datetime *) base)[pos]; + Timestamp_or_zero_datetime_native_null native(current_thd, item, true); + if (native.is_null()) + { + *buff= Timestamp_or_zero_datetime(); + return true; + } + *buff= Timestamp_or_zero_datetime(native); + return false; +} + + +uchar *in_timestamp::get_value(Item *item) +{ + Timestamp_or_zero_datetime_native_null native(current_thd, item, true); + if (native.is_null()) + return 0; + tmp= Timestamp_or_zero_datetime(native); + return (uchar*) &tmp; +} + + +Item *in_timestamp::create_item(THD *thd) +{ + return new (thd->mem_root) Item_timestamp_literal(thd); +} + + +void in_timestamp::value_to_item(uint pos, Item *item) +{ + const Timestamp_or_zero_datetime &buff= (((Timestamp_or_zero_datetime*) base)[pos]); + static_cast(item)->set_value(buff); +} + + +bool in_datetime::set(uint pos, Item *item) +{ + struct packed_longlong *buff= &((packed_longlong*) base)[pos]; + + buff->val= item->val_datetime_packed(current_thd); + buff->unsigned_flag= 1L; + return item->null_value; +} + +bool in_time::set(uint pos, Item *item) +{ + struct packed_longlong *buff= &((packed_longlong*) base)[pos]; + + buff->val= item->val_time_packed(current_thd); + buff->unsigned_flag= 1L; + return item->null_value; +} + +uchar *in_datetime::get_value(Item *item) +{ + tmp.val= item->val_datetime_packed(current_thd); + if (item->null_value) + return 0; + tmp.unsigned_flag= 1L; + return (uchar*) &tmp; +} + +uchar *in_time::get_value(Item *item) +{ + tmp.val= item->val_time_packed(current_thd); + if (item->null_value) + return 0; + tmp.unsigned_flag= 1L; + return (uchar*) &tmp; +} + +Item *in_temporal::create_item(THD *thd) +{ + return new (thd->mem_root) Item_datetime(thd); +} + + +in_double::in_double(THD *thd, uint elements) + :in_vector(thd, elements, sizeof(double), (qsort2_cmp) cmp_double, 0) +{} + +bool in_double::set(uint pos, Item *item) +{ + ((double*) base)[pos]= item->val_real(); + return item->null_value; +} + +uchar *in_double::get_value(Item *item) +{ + tmp= item->val_real(); + if (item->null_value) + return 0; /* purecov: inspected */ + return (uchar*) &tmp; +} + +Item *in_double::create_item(THD *thd) +{ + return new (thd->mem_root) Item_float(thd, 0.0, 0); +} + + +in_decimal::in_decimal(THD *thd, uint elements) + :in_vector(thd, elements, sizeof(my_decimal), (qsort2_cmp) cmp_decimal, 0) +{} + + +bool in_decimal::set(uint pos, Item *item) +{ + /* as far as 'item' is constant, we can store reference on my_decimal */ + my_decimal *dec= ((my_decimal *)base) + pos; + dec->len= DECIMAL_BUFF_LENGTH; + dec->fix_buffer_pointer(); + my_decimal *res= item->val_decimal(dec); + /* if item->val_decimal() is evaluated to NULL then res == 0 */ + if (!item->null_value && res != dec) + my_decimal2decimal(res, dec); + return item->null_value; +} + + +uchar *in_decimal::get_value(Item *item) +{ + my_decimal *result= item->val_decimal(&val); + if (item->null_value) + return 0; + return (uchar *)result; +} + +Item *in_decimal::create_item(THD *thd) +{ + return new (thd->mem_root) Item_decimal(thd, 0, FALSE); +} + + +bool Predicant_to_list_comparator::alloc_comparators(THD *thd, uint nargs) +{ + size_t nbytes= sizeof(Predicant_to_value_comparator) * nargs; + if (!(m_comparators= (Predicant_to_value_comparator *) thd->alloc(nbytes))) + return true; + memset(m_comparators, 0, nbytes); + return false; +} + + +bool Predicant_to_list_comparator::add_value(const LEX_CSTRING &funcname, + Item_args *args, + uint value_index) +{ + DBUG_ASSERT(m_predicant_index < args->argument_count()); + DBUG_ASSERT(value_index < args->argument_count()); + Type_handler_hybrid_field_type tmp; + Item *tmpargs[2]; + tmpargs[0]= args->arguments()[m_predicant_index]; + tmpargs[1]= args->arguments()[value_index]; + if (tmp.aggregate_for_comparison(funcname, tmpargs, 2, true)) + { + DBUG_ASSERT(current_thd->is_error()); + return true; + } + m_comparators[m_comparator_count].m_handler= tmp.type_handler(); + m_comparators[m_comparator_count].m_arg_index= value_index; + m_comparator_count++; + return false; +} + + +bool Predicant_to_list_comparator:: +add_value_skip_null(const LEX_CSTRING &funcname, + Item_args *args, + uint value_index, + bool *nulls_found) +{ + /* + Skip explicit NULL constant items. + Using real_item() to correctly detect references to explicit NULLs + in HAVING clause, e.g. in this example "b" is skipped: + SELECT a,NULL AS b FROM t1 GROUP BY a HAVING 'A' IN (b,'A'); + */ + if (args->arguments()[value_index]->real_item()->type() == Item::NULL_ITEM) + { + *nulls_found= true; + return false; + } + return add_value(funcname, args, value_index); +} + + +void Predicant_to_list_comparator:: + detect_unique_handlers(Type_handler_hybrid_field_type *compatible, + uint *unique_count, + uint *found_types) +{ + *unique_count= 0; + *found_types= 0; + for (uint i= 0; i < m_comparator_count; i++) + { + uint idx; + if (find_handler(&idx, m_comparators[i].m_handler, i)) + { + m_comparators[i].m_handler_index= i; // New unique handler + (*unique_count)++; + (*found_types)|= 1U << m_comparators[i].m_handler->cmp_type(); + compatible->set_handler(m_comparators[i].m_handler); + } + else + { + m_comparators[i].m_handler_index= idx; // Non-unique handler + } + } +} + + +bool Predicant_to_list_comparator::make_unique_cmp_items(THD *thd, + CHARSET_INFO *cs) +{ + for (uint i= 0; i < m_comparator_count; i++) + { + if (m_comparators[i].m_handler && // Skip implicit NULLs + m_comparators[i].m_handler_index == i && // Skip non-unuque + !(m_comparators[i].m_cmp_item= + m_comparators[i].m_handler->make_cmp_item(thd, cs))) + return true; + } + return false; +} + + +cmp_item* cmp_item_sort_string::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_sort_string_in_static(cmp_charset); +} + +cmp_item* cmp_item_int::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_int(); +} + +cmp_item* cmp_item_real::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_real(); +} + +cmp_item* cmp_item_row::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_row(); +} + + +cmp_item_row::~cmp_item_row() +{ + DBUG_ENTER("~cmp_item_row"); + DBUG_PRINT("enter",("this: %p", this)); + if (comparators) + { + for (uint i= 0; i < n; i++) + { + if (comparators[i]) + delete comparators[i]; + } + } + DBUG_VOID_RETURN; +} + + +bool cmp_item_row::alloc_comparators(THD *thd, uint cols) +{ + if (comparators) + { + DBUG_ASSERT(cols == n); + return false; + } + return + !(comparators= (cmp_item **) thd->calloc(sizeof(cmp_item *) * (n= cols))); +} + + +void cmp_item_row::store_value(Item *item) +{ + DBUG_ENTER("cmp_item_row::store_value"); + DBUG_ASSERT(comparators); + DBUG_ASSERT(n == item->cols()); + item->bring_value(); + item->null_value= 0; + for (uint i=0; i < n; i++) + { + DBUG_ASSERT(comparators[i]); + comparators[i]->store_value(item->element_index(i)); + item->null_value|= item->element_index(i)->null_value; + } + DBUG_VOID_RETURN; +} + + +bool cmp_item_row::store_value_by_template(THD *thd, cmp_item *t, Item *item) +{ + cmp_item_row *tmpl= (cmp_item_row*) t; + if (tmpl->n != item->cols()) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), tmpl->n); + return 1; + } + n= tmpl->n; + bool rc= false; + if ((comparators= (cmp_item **) thd->alloc(sizeof(cmp_item *)*n))) + { + item->bring_value(); + item->null_value= 0; + for (uint i=0; i < n; i++) + { + if (!(comparators[i]= tmpl->comparators[i]->make_same(thd))) + break; // new failed + rc|= comparators[i]->store_value_by_template(thd, tmpl->comparators[i], + item->element_index(i)); + } + } + return rc; +} + + +int cmp_item_row::cmp(Item *arg) +{ + arg->null_value= 0; + if (arg->cols() != n) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), n); + return 1; + } + bool was_null= 0; + arg->bring_value(); + for (uint i=0; i < n; i++) + { + const int rc= comparators[i]->cmp(arg->element_index(i)); + switch (rc) + { + case UNKNOWN: + was_null= true; + break; + case TRUE: + return TRUE; + case FALSE: + break; // elements #i are equal + } + arg->null_value|= arg->element_index(i)->null_value; + } + return was_null ? UNKNOWN : FALSE; +} + + +int cmp_item_row::compare(cmp_item *c) +{ + cmp_item_row *l_cmp= (cmp_item_row *) c; + for (uint i=0; i < n; i++) + { + int res; + if ((res= comparators[i]->compare(l_cmp->comparators[i]))) + return res; + } + return 0; +} + + +void cmp_item_decimal::store_value(Item *item) +{ + my_decimal *val= item->val_decimal(&value); + /* val may be zero if item is nnull */ + if (val && val != &value) + my_decimal2decimal(val, &value); + m_null_value= item->null_value; +} + + +int cmp_item_decimal::cmp_not_null(const Value *val) +{ + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_decimal()); + return my_decimal_cmp(&value, &val->m_decimal); +} + + +int cmp_item_decimal::cmp(Item *arg) +{ + VDec tmp(arg); + return m_null_value || tmp.is_null() ? UNKNOWN : (tmp.cmp(&value) != 0); +} + + +int cmp_item_decimal::compare(cmp_item *arg) +{ + cmp_item_decimal *l_cmp= (cmp_item_decimal*) arg; + return my_decimal_cmp(&value, &l_cmp->value); +} + + +cmp_item* cmp_item_decimal::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_decimal(); +} + + +int cmp_item_datetime::cmp_not_null(const Value *val) +{ + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_temporal()); + return value != pack_time(&val->value.m_time); +} + + +int cmp_item_datetime::cmp(Item *arg) +{ + const bool rc= value != arg->val_datetime_packed(current_thd); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; +} + + +int cmp_item_time::cmp_not_null(const Value *val) +{ + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_temporal()); + return value != pack_time(&val->value.m_time); +} + + +int cmp_item_time::cmp(Item *arg) +{ + const bool rc= value != arg->val_time_packed(current_thd); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; +} + + +int cmp_item_temporal::compare(cmp_item *ci) +{ + cmp_item_temporal *l_cmp= (cmp_item_temporal *)ci; + return (value < l_cmp->value) ? -1 : ((value == l_cmp->value) ? 0 : 1); +} + + +cmp_item *cmp_item_datetime::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_datetime(); +} + + +cmp_item *cmp_item_time::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_time(); +} + + +void cmp_item_timestamp::store_value(Item *item) +{ + item->val_native_with_conversion(current_thd, &m_native, + &type_handler_timestamp2); + m_null_value= item->null_value; +} + + +int cmp_item_timestamp::cmp_not_null(const Value *val) +{ + /* + This method will be implemented when we add this syntax: + SELECT TIMESTAMP WITH LOCAL TIME ZONE '2001-01-01 10:20:30' + For now TIMESTAMP is compared to non-TIMESTAMP using DATETIME. + */ + DBUG_ASSERT(0); + return 0; +} + + +int cmp_item_timestamp::cmp(Item *arg) +{ + THD *thd= current_thd; + Timestamp_or_zero_datetime_native_null tmp(thd, arg, true); + return m_null_value || tmp.is_null() ? UNKNOWN : + type_handler_timestamp2.cmp_native(m_native, tmp) != 0; +} + + +int cmp_item_timestamp::compare(cmp_item *arg) +{ + cmp_item_timestamp *tmp= static_cast(arg); + return type_handler_timestamp2.cmp_native(m_native, tmp->m_native); +} + + +cmp_item* cmp_item_timestamp::make_same(THD *thd) +{ + return new (thd->mem_root) cmp_item_timestamp(); +} + + + +bool Item_func_in::count_sargable_conds(void *arg) +{ + ((SELECT_LEX*) arg)->cond_count++; + return 0; +} + + +bool Item_func_in::list_contains_null() +{ + Item **arg,**arg_end; + for (arg= args + 1, arg_end= args+arg_count; arg != arg_end ; arg++) + { + if ((*arg)->null_inside()) + return 1; + } + return 0; +} + + +/** + Perform context analysis of an IN item tree. + + This function performs context analysis (name resolution) and calculates + various attributes of the item tree with Item_func_in as its root. + The function saves in ref the pointer to the item or to a newly created + item that is considered as a replacement for the original one. + + @param thd reference to the global context of the query thread + @param ref pointer to Item* variable where pointer to resulting "fixed" + item is to be assigned + + @note + Let T0(e)/T1(e) be the value of not_null_tables(e) when e is used on + a predicate/function level. Then it's easy to show that: + @verbatim + T0(e IN(e1,...,en)) = union(T1(e),intersection(T1(ei))) + T1(e IN(e1,...,en)) = union(T1(e),intersection(T1(ei))) + T0(e NOT IN(e1,...,en)) = union(T1(e),union(T1(ei))) + T1(e NOT IN(e1,...,en)) = union(T1(e),intersection(T1(ei))) + @endverbatim + + @retval + 0 ok + @retval + 1 got error +*/ + +bool +Item_func_in::fix_fields(THD *thd, Item **ref) +{ + + if (Item_func_opt_neg::fix_fields(thd, ref)) + return 1; + + return 0; +} + + +Item *Item_func_in::in_predicate_to_equality_transformer(THD *thd, uchar *arg) +{ + if (!array || have_null || !all_items_are_consts(args + 1, arg_count - 1)) + return this; /* Transformation is not applicable */ + + /* + If all elements in the array of constant values are equal and there are + no NULLs in the list then clause + - "a IN (e1,..,en)" can be converted to "a = e1" + - "a NOT IN (e1,..,en)" can be converted to "a != e1". + This means an object of Item_func_in can be replaced with an object of + Item_func_eq for IN (e1,..,en) clause or Item_func_ne for + NOT IN (e1,...,en). + */ + + /* + Since the array is sorted it's enough to compare the first and the last + elements to tell whether all elements are equal + */ + if (array->compare_elems(0, array->used_count - 1)) + { + /* Not all elements are equal, transformation is not possible */ + return this; + } + + Json_writer_object trace_wrapper(thd); + trace_wrapper.add("transformation", "in_predicate_to_equality") + .add("before", this); + + Item *new_item= nullptr; + if (negated) + new_item= new (thd->mem_root) Item_func_ne(thd, args[0], args[1]); + else + new_item= new (thd->mem_root) Item_func_eq(thd, args[0], args[1]); + if (new_item) + { + new_item->set_name(thd, name); + if (new_item->fix_fields(thd, &new_item)) + { + /* + If there are any problems during fixing fields, there is no need to + return an error, just discard the transformation + */ + new_item= this; + } + } + trace_wrapper.add("after", new_item); + return new_item; +} + +bool +Item_func_in::eval_not_null_tables(void *opt_arg) +{ + Item **arg, **arg_end; + + if (Item_func_opt_neg::eval_not_null_tables(NULL)) + return 1; + + /* not_null_tables_cache == union(T1(e),union(T1(ei))) */ + if (is_top_level_item() && negated) + return 0; + + /* not_null_tables_cache = union(T1(e),intersection(T1(ei))) */ + not_null_tables_cache= ~(table_map) 0; + for (arg= args + 1, arg_end= args + arg_count; arg != arg_end; arg++) + not_null_tables_cache&= (*arg)->not_null_tables(); + not_null_tables_cache|= (*args)->not_null_tables(); + return 0; +} + + +bool +Item_func_in::find_not_null_fields(table_map allowed) +{ + if (negated || !is_top_level_item() || (~allowed & used_tables())) + return 0; + return args[0]->find_not_null_fields(allowed); +} + + +void Item_func_in::fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) +{ + /* This will re-calculate attributes of the arguments */ + Item_func_opt_neg::fix_after_pullout(new_parent, ref, merge); + /* Then, re-calculate not_null_tables_cache according to our special rules */ + eval_not_null_tables(NULL); +} + + +bool Item_func_in::prepare_predicant_and_values(THD *thd, uint *found_types) +{ + uint type_cnt; + have_null= false; + + add_predicant(this, 0); + for (uint i= 1 ; i < arg_count; i++) + { + if (add_value_skip_null(Item_func_in::func_name_cstring(), this, i, + &have_null)) + return true; + } + all_values_added(&m_comparator, &type_cnt, found_types); + arg_types_compatible= type_cnt < 2; + +#ifndef DBUG_OFF + Predicant_to_list_comparator::debug_print(thd); +#endif + return false; +} + + +bool Item_func_in::fix_length_and_dec(THD *thd) +{ + Item_args old_predicant(args[0]); + uint found_types; + m_comparator.set_handler(type_handler_varchar.type_handler_for_comparison()); + max_length= 1; + + if (prepare_predicant_and_values(thd, &found_types)) + { + DBUG_ASSERT(thd->is_error()); // Must set error + return TRUE; + } + + if (!arg_types_compatible && comparator_count() == 2) + { + /* + Catch a special case: a mixture of signed and unsigned integer types. + in_longlong can handle such cases. + + Note, prepare_predicant_and_values() aggregates this mixture as follows: + - signed+unsigned produce &type_handler_newdecimal. + - signed+signed or unsigned+unsigned produce &type_handler_slonglong + So we have extactly two distinct handlers. + + The code below assumes that unsigned longlong is handled + by &type_handler_slonglong in comparison context, + which may change in the future to &type_handler_ulonglong. + The DBUG_ASSERT is needed to address this change here properly. + */ + DBUG_ASSERT(type_handler_ulonglong.type_handler_for_comparison() == + &type_handler_slonglong); + // Let's check if all arguments are of integer types + uint found_int_args= 0; + for (uint i= 0; i < arg_count; i++, found_int_args++) + { + if (args[i]->type_handler_for_comparison() != &type_handler_slonglong) + break; + } + if (found_int_args == arg_count) + { + // All arguments are integers. Switch to integer comparison. + arg_types_compatible= true; + DBUG_EXECUTE_IF("Item_func_in", + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: found a mix of UINT and SINT");); + m_comparator.set_handler(&type_handler_slonglong); + } + } + + if (arg_types_compatible) // Bisection condition #1 + { + if (m_comparator.type_handler()-> + Item_func_in_fix_comparator_compatible_types(thd, this)) + return TRUE; + } + else + { + DBUG_ASSERT(m_comparator.cmp_type() != ROW_RESULT); + if (fix_for_scalar_comparison_using_cmp_items(thd, found_types)) + return TRUE; + } + + DBUG_EXECUTE_IF("Item_func_in", + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: types_compatible=%s bisect=%s", + arg_types_compatible ? "yes" : "no", + array != NULL ? "yes" : "no");); + raise_note_if_key_become_unused(thd, old_predicant); + return FALSE; +} + + +/** + Populate Item_func_in::array with constant not-NULL arguments and sort them. + + Sets "have_null" to true if some of the values appeared to be NULL. + Note, explicit NULLs were found during prepare_predicant_and_values(). + So "have_null" can already be true before the fix_in_vector() call. + Here we additionally catch implicit NULLs. +*/ +void Item_func_in::fix_in_vector() +{ + DBUG_ASSERT(array); + uint j=0; + for (uint i=1 ; i < arg_count ; i++) + { + if (!array->set(j,args[i])) + j++; // include this cell in the array. + else + { + /* + We don't put NULL values in array, to avoid erronous matches in + bisection. + */ + have_null= 1; + } + } + if ((array->used_count= j)) + array->sort(); +} + + +/** + Convert all items in to INT. + + IN must compare INT columns and constants as int values (the same + way as equality does). + So we must check here if the column on the left and all the constant + values on the right can be compared as integers and adjust the + comparison type accordingly. + + See the comment about the similar block in Item_bool_func2 +*/ +bool Item_func_in::value_list_convert_const_to_int(THD *thd) +{ + if (args[0]->real_item()->type() == FIELD_ITEM && + !thd->lex->is_view_context_analysis()) + { + Item_field *field_item= (Item_field*) (args[0]->real_item()); + if (field_item->field_type() == MYSQL_TYPE_LONGLONG || + field_item->field_type() == MYSQL_TYPE_YEAR) + { + bool all_converted= true; + Item **arg, **arg_end; + for (arg=args+1, arg_end=args+arg_count; arg != arg_end ; arg++) + { + /* + Explicit NULLs should not affect data cmp_type resolution: + - we ignore NULLs when calling collect_cmp_type() + - we ignore NULLs here + So this expression: + year_column IN (DATE'2001-01-01', NULL) + switches from TIME_RESULT to INT_RESULT. + */ + if (arg[0]->type() != Item::NULL_ITEM && + !convert_const_to_int(thd, field_item, &arg[0])) + all_converted= false; + } + if (all_converted) + m_comparator.set_handler(&type_handler_slonglong); + } + } + return thd->is_fatal_error; // Catch errrors in convert_const_to_int +} + + +bool cmp_item_row:: + aggregate_row_elements_for_comparison(THD *thd, + Type_handler_hybrid_field_type *cmp, + Item_args *tmp, + const LEX_CSTRING &funcname, + uint col, + uint level) +{ + DBUG_EXECUTE_IF("cmp_item", + { + for (uint i= 0 ; i < tmp->argument_count(); i++) + { + Item *arg= tmp->arguments()[i]; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: %*s[%d,%d] handler=%s", + level, "", col, i, + arg->type_handler()->name().ptr()); + } + } + ); + bool err= cmp->aggregate_for_comparison(funcname, tmp->arguments(), + tmp->argument_count(), true); + DBUG_EXECUTE_IF("cmp_item", + { + if (!err) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: %*s=> handler=%s", + level,"", + cmp->type_handler()->name().ptr()); + } + ); + return err; +} + + +bool cmp_item_row::prepare_comparators(THD *thd, const LEX_CSTRING &funcname, + const Item_args *args, uint level) +{ + DBUG_EXECUTE_IF("cmp_item", + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: %*sROW(%d args) level=%d", + level,"", + args->argument_count(), level);); + DBUG_ASSERT(args->argument_count() > 0); + if (alloc_comparators(thd, args->arguments()[0]->cols())) + return true; + DBUG_ASSERT(n == args->arguments()[0]->cols()); + for (uint col= 0; col < n; col++) + { + Item_args tmp; + Type_handler_hybrid_field_type cmp; + + if (tmp.alloc_and_extract_row_elements(thd, args, col) || + aggregate_row_elements_for_comparison(thd, &cmp, &tmp, + funcname, col, level + 1)) + return true; + + /* + There is a legacy bug (MDEV-11511) in the code below, + which should be fixed eventually. + When performing: + (predicant0,predicant1) IN ((value00,value01),(value10,value11)) + It uses only the data type and the collation of the predicant + elements only. It should be fixed to take into account the data type and + the collation for all elements at the N-th positions of the + predicate and all values: + - predicate0, value00, value01 + - predicate1, value10, value11 + */ + Item *item0= args->arguments()[0]->element_index(col); + CHARSET_INFO *collation= item0->collation.collation; + if (!(comparators[col]= cmp.type_handler()->make_cmp_item(thd, collation))) + return true; + if (cmp.type_handler() == &type_handler_row) + { + // Prepare comparators for ROW elements recursively + cmp_item_row *row= static_cast(comparators[col]); + if (row->prepare_comparators(thd, funcname, &tmp, level + 1)) + return true; + } + } + return false; +} + + +bool Item_func_in::fix_for_row_comparison_using_bisection(THD *thd) +{ + if (unlikely(!(array= new (thd->mem_root) in_row(thd, arg_count-1, 0)))) + return true; + cmp_item_row *cmp= &((in_row*)array)->tmp; + if (cmp->prepare_comparators(thd, func_name_cstring(), this, 0)) + return true; + fix_in_vector(); + return false; +} + + +/** + This method is called for scalar data types when bisection is not possible, + for example: + - Some of args[1..arg_count] are not constants. + - args[1..arg_count] are constants, but pairs {args[0],args[1..arg_count]} + are compared by different data types, e.g.: + WHERE decimal_expr IN (1, 1e0) + The pair {args[0],args[1]} is compared by type_handler_decimal. + The pair {args[0],args[2]} is compared by type_handler_double. +*/ +bool Item_func_in::fix_for_scalar_comparison_using_cmp_items(THD *thd, + uint found_types) +{ + if (found_types & (1U << STRING_RESULT) && + agg_arg_charsets_for_comparison(cmp_collation, args, arg_count)) + return true; + if (make_unique_cmp_items(thd, cmp_collation.collation)) + return true; + return false; +} + + +/** + This method is called for the ROW data type when bisection is not possible. +*/ +bool Item_func_in::fix_for_row_comparison_using_cmp_items(THD *thd) +{ + if (make_unique_cmp_items(thd, cmp_collation.collation)) + return true; + DBUG_ASSERT(get_comparator_type_handler(0) == &type_handler_row); + DBUG_ASSERT(get_comparator_cmp_item(0)); + cmp_item_row *cmp_row= (cmp_item_row*) get_comparator_cmp_item(0); + return cmp_row->prepare_comparators(thd, func_name_cstring(), this, 0); +} + + +void Item_func_in::print(String *str, enum_query_type query_type) +{ + args[0]->print_parenthesised(str, query_type, precedence()); + if (negated) + str->append(STRING_WITH_LEN(" not")); + str->append(STRING_WITH_LEN(" in (")); + print_args(str, 1, query_type); + str->append(STRING_WITH_LEN(")")); +} + + +/* + Evaluate the function and return its value. + + SYNOPSIS + val_int() + + DESCRIPTION + Evaluate the function and return its value. + + IMPLEMENTATION + If the array object is defined then the value of the function is + calculated by means of this array. + Otherwise several cmp_item objects are used in order to do correct + comparison of left expression and an expression from the values list. + One cmp_item object correspond to one used comparison type. Left + expression can be evaluated up to number of different used comparison + types. A bit mapped variable value_added_map is used to check whether + the left expression already was evaluated for a particular result type. + Result types are mapped to it according to their integer values i.e. + STRING_RESULT is mapped to bit 0, REAL_RESULT to bit 1, so on. + + RETURN + Value of the function +*/ + +longlong Item_func_in::val_int() +{ + DBUG_ASSERT(fixed()); + if (array) + { + bool tmp=array->find(args[0]); + /* + NULL on left -> UNKNOWN. + Found no match, and NULL on right -> UNKNOWN. + NULL on right can never give a match, as it is not stored in + array. + See also the 'bisection_possible' variable in fix_length_and_dec(). + */ + null_value=args[0]->null_value || (!tmp && have_null); + return (longlong) (!null_value && tmp != negated); + } + + if ((null_value= args[0]->real_item()->type() == NULL_ITEM)) + return 0; + + null_value= have_null; + uint idx; + if (!Predicant_to_list_comparator::cmp(this, &idx, &null_value)) + { + null_value= false; + return (longlong) (!negated); + } + return (longlong) (!null_value && negated); +} + + +void Item_func_in::mark_as_condition_AND_part(TABLE_LIST *embedding) +{ + THD *thd= current_thd; + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + if (!transform_into_subq_checked) + { + if ((transform_into_subq= to_be_transformed_into_in_subq(thd))) + thd->lex->current_select->in_funcs.push_back(this, thd->mem_root); + transform_into_subq_checked= true; + } + + if (arena) + thd->restore_active_arena(arena, &backup); + + emb_on_expr_nest= embedding; +} + + +class Func_handler_bit_or_int_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + Longlong_null a= item->arguments()[0]->to_longlong_null(); + return a.is_null() ? a : a | item->arguments()[1]->to_longlong_null(); + } +}; + + +class Func_handler_bit_or_dec_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + VDec a(item->arguments()[0]); + return a.is_null() ? Longlong_null() : + a.to_xlonglong_null() | VDec(item->arguments()[1]).to_xlonglong_null(); + } +}; + + +bool Item_func_bit_or::fix_length_and_dec(THD *thd) +{ + static Func_handler_bit_or_int_to_ulonglong ha_int_to_ull; + static Func_handler_bit_or_dec_to_ulonglong ha_dec_to_ull; + return fix_length_and_dec_op2_std(&ha_int_to_ull, &ha_dec_to_ull); +} + + +class Func_handler_bit_and_int_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + Longlong_null a= item->arguments()[0]->to_longlong_null(); + return a.is_null() ? a : a & item->arguments()[1]->to_longlong_null(); + } +}; + + +class Func_handler_bit_and_dec_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + VDec a(item->arguments()[0]); + return a.is_null() ? Longlong_null() : + a.to_xlonglong_null() & VDec(item->arguments()[1]).to_xlonglong_null(); + } +}; + + +bool Item_func_bit_and::fix_length_and_dec(THD *thd) +{ + static Func_handler_bit_and_int_to_ulonglong ha_int_to_ull; + static Func_handler_bit_and_dec_to_ulonglong ha_dec_to_ull; + return fix_length_and_dec_op2_std(&ha_int_to_ull, &ha_dec_to_ull); +} + +Item_cond::Item_cond(THD *thd, Item_cond *item) + :Item_bool_func(thd, item), + and_tables_cache(item->and_tables_cache) +{ + base_flags|= (item->base_flags & item_base_t::AT_TOP_LEVEL); + + /* + item->list will be copied by copy_andor_arguments() call + */ +} + + +Item_cond::Item_cond(THD *thd, Item *i1, Item *i2): + Item_bool_func(thd) +{ + list.push_back(i1, thd->mem_root); + list.push_back(i2, thd->mem_root); +} + + +Item *Item_cond_and::copy_andor_structure(THD *thd) +{ + Item_cond_and *item; + if ((item= new (thd->mem_root) Item_cond_and(thd, this))) + item->copy_andor_arguments(thd, this); + return item; +} + + +void Item_cond::copy_andor_arguments(THD *thd, Item_cond *item) +{ + List_iterator_fast li(item->list); + while (Item *it= li++) + list.push_back(it->copy_andor_structure(thd), thd->mem_root); +} + + +bool +Item_cond::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + List_iterator li(list); + Item *item; + uchar buff[sizeof(char*)]; // Max local vars in function + + not_null_tables_cache= 0; + used_tables_and_const_cache_init(); + + /* + and_table_cache is the value that Item_cond_or() returns for + not_null_tables() + */ + and_tables_cache= ~(table_map) 0; + + if (check_stack_overrun(thd, STACK_MIN_SIZE, buff)) + return TRUE; // Fatal error flag is set! + + while (li++) + { + merge_sub_condition(li); + item= *li.ref(); + if (is_top_level_item()) + item->top_level_item(); + + /* + replace degraded condition: + was: + become: != 0 + */ + Item::Type type= item->type(); + if (type == Item::FIELD_ITEM || type == Item::REF_ITEM) + { + Query_arena backup, *arena; + Item *new_item; + arena= thd->activate_stmt_arena_if_needed(&backup); + if ((new_item= new (thd->mem_root) Item_func_ne(thd, item, new (thd->mem_root) Item_int(thd, 0, 1)))) + li.replace(item= new_item); + if (arena) + thd->restore_active_arena(arena, &backup); + } + + if (item->fix_fields_if_needed_for_bool(thd, li.ref())) + return TRUE; /* purecov: inspected */ + merge_sub_condition(li); + item= *li.ref(); // may be substituted in fix_fields/merge_item_if_possible + + used_tables_and_const_cache_join(item); + base_flags|= item->base_flags & item_base_t::MAYBE_NULL; + with_flags|= item->with_flags; + } + (void) eval_not_null_tables((void*) 0); + + /* + We have to set fixed as some other items will check it and fail if we + do not. This can be changed when we properly check if fix_fields() + fails in call cases. + */ + base_flags|= item_base_t::FIXED; + if (fix_length_and_dec(thd) || thd->is_error()) + return TRUE; + return FALSE; +} + + +/** + @brief + Merge a lower-level condition pointed by the iterator into this Item_cond + if possible + + @param li list iterator pointing to condition that must be + examined and merged if possible. + + @details + If an item pointed by the iterator is an instance of Item_cond with the + same functype() as this Item_cond (i.e. both are Item_cond_and or both are + Item_cond_or) then the arguments of that lower-level item can be merged + into the list of arguments of this upper-level Item_cond. + + This optimization reduces the depth of an AND-OR tree. + E.g. a WHERE clause like + F1 AND (F2 AND (F2 AND F4)) + is parsed into a tree with the same nested structure as defined + by braces. This optimization will transform such tree into + AND (F1, F2, F3, F4). + Trees of OR items are flattened as well: + ((F1 OR F2) OR (F3 OR F4)) => OR (F1, F2, F3, F4) + Items for removed AND/OR levels will dangle until the death of the + entire statement. + + The optimization is currently prepared statements and stored procedures + friendly as it doesn't allocate any memory and its effects are durable + (i.e. do not depend on PS/SP arguments). +*/ +void Item_cond::merge_sub_condition(List_iterator& li) +{ + Item *item= *li.ref(); + + /* + The check for list.is_empty() is to catch empty Item_cond_and() items. + We may encounter Item_cond_and with an empty list, because optimizer code + strips multiple equalities, combines items, then adds multiple equalities + back + */ + while (item->type() == Item::COND_ITEM && + ((Item_cond*) item)->functype() == functype() && + !((Item_cond*) item)->list.is_empty()) + { + li.replace(((Item_cond*) item)->list); + ((Item_cond*) item)->list.empty(); + item= *li.ref(); + } +} + +/* + Calculate not_null_tables_cache and and_tables_cache. +*/ + +bool +Item_cond::eval_not_null_tables(void *opt_arg) +{ + Item *item; + bool is_and_cond= functype() == Item_func::COND_AND_FUNC; + List_iterator li(list); + bool found= 0; + + not_null_tables_cache= (table_map) 0; + and_tables_cache= ~(table_map) 0; + while ((item=li++)) + { + if (item->can_eval_in_optimize() && + !item->with_sp_var() && !item->with_param() && + !cond_has_datetime_is_null(item) && is_top_level_item()) + { + if (item->eval_const_cond() == is_and_cond) + { + /* + a. This is "... AND true_cond AND ..." + In this case, true_cond has no effect on cond_and->not_null_tables() + b. This is "... OR false_cond/null cond OR ..." + In this case, false_cond has no effect on cond_or->not_null_tables() + */ + } + else + { + /* + a. This is "... AND false_cond/null_cond AND ..." + The whole condition is FALSE/UNKNOWN. + b. This is "... OR const_cond OR ..." + In this case, cond_or->not_null_tables()=0, because the condition + const_cond might evaluate to true (regardless of whether some tables + were NULL-complemented). + */ + found= 1; + not_null_tables_cache= (table_map) 0; + and_tables_cache= (table_map) 0; + } + } + else + { + table_map tmp_table_map= item->not_null_tables(); + if (!found) + { + /* We should not depend on the order of items */ + not_null_tables_cache|= tmp_table_map; + } + and_tables_cache&= tmp_table_map; + } + } + return 0; +} + + +/** + @note + This implementation of the virtual function find_not_null_fields() + infers null-rejectedness if fields from tables marked in 'allowed' from + this condition. + Currently only top level AND conjuncts that are not disjunctions are used + for the inference. Usage of any top level and-or formula with l OR levels + would require a stack of bitmaps for fields of the height h=2*l+1 So we + would have to allocate h-1 additional field bitmaps for each table marked + in 'allowed'. +*/ + +bool +Item_cond::find_not_null_fields(table_map allowed) +{ + Item *item; + bool is_and_cond= functype() == Item_func::COND_AND_FUNC; + if (!is_and_cond) + { + /* Now only fields of top AND level conjuncts are taken into account */ + return false; + } + uint isnull_func_cnt= 0; + List_iterator li(list); + while ((item=li++)) + { + bool is_mult_eq= item->type() == Item::FUNC_ITEM && + ((Item_func *) item)->functype() == Item_func::MULT_EQUAL_FUNC; + if (is_mult_eq) + { + if (!item->find_not_null_fields(allowed)) + continue; + } + + if (~allowed & item->used_tables()) + continue; + + /* It is assumed that all constant conjuncts are already eliminated */ + + /* + First infer null-rejectedness of fields from all conjuncts but + IS NULL predicates + */ + bool isnull_func= item->type() == Item::FUNC_ITEM && + ((Item_func *) item)->functype() == Item_func::ISNULL_FUNC; + if (isnull_func) + { + isnull_func_cnt++; + continue; + } + if (!item->find_not_null_fields(allowed)) + continue; + } + + /* Now try no get contradictions using IS NULL conjuncts */ + if (isnull_func_cnt) + { + li.rewind(); + while ((item=li++) && isnull_func_cnt) + { + if (~allowed & item->used_tables()) + continue; + + bool isnull_func= item->type() == Item::FUNC_ITEM && + ((Item_func *) item)->functype() == Item_func::ISNULL_FUNC; + if (isnull_func) + { + if (item->find_not_null_fields(allowed)) + return true; + isnull_func_cnt--; + } + } + } + return false; +} + +void Item_cond::fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) +{ + List_iterator li(list); + Item *item; + + used_tables_and_const_cache_init(); + + and_tables_cache= ~(table_map) 0; // Here and below we do as fix_fields does + not_null_tables_cache= 0; + + while ((item=li++)) + { + table_map tmp_table_map; + item->fix_after_pullout(new_parent, li.ref(), merge); + item= *li.ref(); + used_tables_and_const_cache_join(item); + + if (item->const_item()) + and_tables_cache= (table_map) 0; + else + { + tmp_table_map= item->not_null_tables(); + not_null_tables_cache|= tmp_table_map; + and_tables_cache&= tmp_table_map; + const_item_cache= FALSE; + } + } +} + + +bool Item_cond::walk(Item_processor processor, bool walk_subquery, void *arg) +{ + List_iterator_fast li(list); + Item *item; + while ((item= li++)) + if (item->walk(processor, walk_subquery, arg)) + return 1; + return Item_func::walk(processor, walk_subquery, arg); +} + +/** + Transform an Item_cond object with a transformer callback function. + + The function recursively applies the transform method to each + member item of the condition list. + If the call of the method for a member item returns a new item + the old item is substituted for a new one. + After this the transformer is applied to the root node + of the Item_cond object. + + @param transformer the transformer callback function to be applied to + the nodes of the tree of the object + @param arg parameter to be passed to the transformer + + @return + Item returned as the result of transformation of the root node +*/ + +Item *Item_cond::do_transform(THD *thd, Item_transformer transformer, uchar *arg, + bool toplevel) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + + List_iterator li(list); + Item *item; + while ((item= li++)) + { + Item *new_item= toplevel ? item->top_level_transform(thd, transformer, arg) + : item->transform(thd, transformer, arg); + if (!new_item) + return 0; + + /* + THD::change_item_tree() should be called only if the tree was + really transformed, i.e. when a new item has been created. + Otherwise we'll be allocating a lot of unnecessary memory for + change records at each execution. + */ + if (toplevel) + *li.ref()= new_item; + else if (new_item != item) + thd->change_item_tree(li.ref(), new_item); + } + return Item_func::transform(thd, transformer, arg); +} + + +/** + Compile Item_cond object with a processor and a transformer + callback functions. + + First the function applies the analyzer to the root node of + the Item_func object. Then if the analyzer succeeeds (returns TRUE) + the function recursively applies the compile method to member + item of the condition list. + If the call of the method for a member item returns a new item + the old item is substituted for a new one. + After this the transformer is applied to the root node + of the Item_cond object. + + @param analyzer the analyzer callback function to be applied to the + nodes of the tree of the object + @param[in,out] arg_p parameter to be passed to the analyzer + @param transformer the transformer callback function to be applied to the + nodes of the tree of the object + @param arg_t parameter to be passed to the transformer + + @return + Item returned as the result of transformation of the root node +*/ + +Item *Item_cond::do_compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t, bool toplevel) +{ + if (!(this->*analyzer)(arg_p)) + return 0; + + List_iterator li(list); + Item *item; + while ((item= li++)) + { + /* + The same parameter value of arg_p must be passed + to analyze any argument of the condition formula. + */ + uchar *arg_v= *arg_p; + Item *new_item= item->compile(thd, analyzer, &arg_v, transformer, arg_t); + if (!new_item || new_item == item) + continue; + if (toplevel) + *li.ref()= new_item; + else + thd->change_item_tree(li.ref(), new_item); + } + return Item_func::transform(thd, transformer, arg_t); +} + + +Item *Item_cond::propagate_equal_fields(THD *thd, + const Context &ctx, + COND_EQUAL *cond) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + DBUG_ASSERT(arg_count == 0); + List_iterator li(list); + while (li++) + { + /* + The exact value of the last parameter to propagate_and_change_item_tree() + is not important at this point. Item_func derivants will create and + pass their own context to the arguments. + */ + propagate_and_change_item_tree(thd, li.ref(), cond, Context_boolean()); + } + return this; +} + +void Item_cond::traverse_cond(Cond_traverser traverser, + void *arg, traverse_order order) +{ + List_iterator li(list); + Item *item; + + switch(order) { + case(PREFIX): + (*traverser)(this, arg); + while ((item= li++)) + { + item->traverse_cond(traverser, arg, order); + } + (*traverser)(NULL, arg); + break; + case(POSTFIX): + while ((item= li++)) + { + item->traverse_cond(traverser, arg, order); + } + (*traverser)(this, arg); + } +} + +/** + Move SUM items out from item tree and replace with reference. + + The split is done to get an unique item for each SUM function + so that we can easily find and calculate them. + (Calculation done by update_sum_func() and copy_sum_funcs() in + sql_select.cc) + + @param thd Thread handler + @param ref_pointer_array Pointer to array of reference fields + @param fields All fields in select + + @note + This function is run on all expression (SELECT list, WHERE, HAVING etc) + that have or refer (HAVING) to a SUM expression. +*/ + +void Item_cond::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) +{ + List_iterator li(list); + Item *item; + while ((item= li++)) + item->split_sum_func2(thd, ref_pointer_array, fields, li.ref(), + flags | SPLIT_SUM_SKIP_REGISTERED); +} + + +table_map +Item_cond::used_tables() const +{ // This caches used_tables + return used_tables_cache; +} + + +void Item_cond::print(String *str, enum_query_type query_type) +{ + List_iterator_fast li(list); + Item *item; + if ((item=li++)) + item->print_parenthesised(str, query_type, precedence()); + while ((item=li++)) + { + str->append(' '); + str->append(func_name_cstring()); + str->append(' '); + item->print_parenthesised(str, query_type, precedence()); + } +} + + +void Item_cond::neg_arguments(THD *thd) +{ + List_iterator li(list); + Item *item; + while ((item= li++)) /* Apply not transformation to the arguments */ + { + Item *new_item= item->neg_transformer(thd); + if (!new_item) + { + if (!(new_item= new (thd->mem_root) Item_func_not(thd, item))) + return; // Fatal OEM error + } + (void) li.replace(new_item); + } +} + + +/** + @brief + Building clone for Item_cond + + @param thd thread handle + @param mem_root part of the memory for the clone + + @details + This method gets copy of the current item and also + build clones for its elements. For this elements + build_copy is called again. + + @retval + clone of the item + 0 if an error occurred +*/ + +Item *Item_cond::build_clone(THD *thd) +{ + List_iterator_fast li(list); + Item *item; + Item_cond *copy= (Item_cond *) get_copy(thd); + if (!copy) + return 0; + copy->list.empty(); + while ((item= li++)) + { + Item *arg_clone= item->build_clone(thd); + if (!arg_clone) + return 0; + if (copy->list.push_back(arg_clone, thd->mem_root)) + return 0; + } + return copy; +} + + +bool Item_cond::excl_dep_on_table(table_map tab_map) +{ + if (used_tables() & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) + return false; + if (!(used_tables() & ~tab_map)) + return true; + List_iterator_fast li(list); + Item *item; + while ((item= li++)) + { + if (!item->excl_dep_on_table(tab_map)) + return false; + } + return true; +} + + +bool Item_cond::excl_dep_on_grouping_fields(st_select_lex *sel) +{ + if (has_rand_bit()) + return false; + List_iterator_fast li(list); + Item *item; + while ((item= li++)) + { + if (!item->excl_dep_on_grouping_fields(sel)) + return false; + } + return true; +} + + +void Item_cond_and::mark_as_condition_AND_part(TABLE_LIST *embedding) +{ + List_iterator li(list); + Item *item; + while ((item=li++)) + { + item->mark_as_condition_AND_part(embedding); + } +} + +/** + Evaluation of AND(expr, expr, expr ...). + + @note + There are AND expressions for which we don't care if the + result is NULL or 0. This is the case for: + - WHERE clause + - HAVING clause + - IF(expression) + For these we mark them as "top_level_items" + + @retval + 1 If all expressions are true + @retval + 0 If any of the expressions are false or if we find a NULL expression and + this is a top_level_item. + @retval + NULL if all expression are either 1 or NULL +*/ + + +longlong Item_cond_and::val_int() +{ + DBUG_ASSERT(fixed()); + List_iterator_fast li(list); + Item *item; + null_value= 0; + while ((item=li++)) + { + if (!item->val_bool()) + { + if (is_top_level_item() || !(null_value= item->null_value)) + return 0; + } + } + return null_value ? 0 : 1; +} + + +longlong Item_cond_or::val_int() +{ + DBUG_ASSERT(fixed()); + List_iterator_fast li(list); + Item *item; + null_value=0; + while ((item=li++)) + { + if (item->val_bool()) + { + null_value=0; + return 1; + } + if (item->null_value) + null_value=1; + } + return 0; +} + +Item *Item_cond_or::copy_andor_structure(THD *thd) +{ + Item_cond_or *item; + if ((item= new (thd->mem_root) Item_cond_or(thd, this))) + item->copy_andor_arguments(thd, this); + return item; +} + + +/** + Create an AND expression from two expressions. + + @param a expression or NULL + @param b expression. + @param org_item Don't modify a if a == *org_item. + If a == NULL, org_item is set to point at b, + to ensure that future calls will not modify b. + + @note + This will not modify item pointed to by org_item or b + The idea is that one can call this in a loop and create and + 'and' over all items without modifying any of the original items. + + @retval + NULL Error + @retval + Item +*/ + +Item *and_expressions(THD *thd, Item *a, Item *b, Item **org_item) +{ + if (!a) + return (*org_item= (Item*) b); + if (a == *org_item) + { + Item_cond *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, a, (Item*) b))) + { + res->used_tables_cache= a->used_tables() | b->used_tables(); + res->not_null_tables_cache= a->not_null_tables() | b->not_null_tables(); + } + return res; + } + if (((Item_cond_and*) a)->add((Item*) b, thd->mem_root)) + return 0; + ((Item_cond_and*) a)->used_tables_cache|= b->used_tables(); + ((Item_cond_and*) a)->not_null_tables_cache|= b->not_null_tables(); + return a; +} + + +bool Item_func_null_predicate::count_sargable_conds(void *arg) +{ + ((SELECT_LEX*) arg)->cond_count++; + return 0; +} + + +longlong Item_func_isnull::val_int() +{ + DBUG_ASSERT(fixed()); + if (const_item() && !args[0]->maybe_null()) + return 0; + return args[0]->is_null() ? 1: 0; +} + + +bool Item_func_isnull::find_not_null_fields(table_map allowed) +{ + if (!(~allowed & used_tables()) && + args[0]->real_item()->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field *)(args[0]->real_item()))->field; + if (bitmap_is_set(&field->table->tmp_set, field->field_index)) + return true; + } + return false; +} + + +void Item_func_isnull::print(String *str, enum_query_type query_type) +{ + if (const_item() && !args[0]->maybe_null() && + !(query_type & (QT_NO_DATA_EXPANSION | QT_VIEW_INTERNAL))) + str->append(STRING_WITH_LEN("/*always not null*/ 1")); + else + args[0]->print_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" is null")); +} + + +longlong Item_is_not_null_test::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_is_not_null_test::val_int"); + if (const_item() && !args[0]->maybe_null()) + DBUG_RETURN(1); + if (args[0]->is_null()) + { + DBUG_PRINT("info", ("null")); + owner->was_null|= 1; + DBUG_RETURN(0); + } + else + DBUG_RETURN(1); +} + +/** + Optimize case of not_null_column IS NULL. +*/ +void Item_is_not_null_test::update_used_tables() +{ + if (!args[0]->maybe_null()) + used_tables_cache= 0; /* is always true */ + else + args[0]->update_used_tables(); +} + + +longlong Item_func_isnotnull::val_int() +{ + DBUG_ASSERT(fixed()); + return args[0]->is_null() ? 0 : 1; +} + + +void Item_func_isnotnull::print(String *str, enum_query_type query_type) +{ + args[0]->print_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" is not null")); +} + + +bool Item_bool_func2::count_sargable_conds(void *arg) +{ + ((SELECT_LEX*) arg)->cond_count++; + return 0; +} + +void Item_func_like::print(String *str, enum_query_type query_type) +{ + args[0]->print_parenthesised(str, query_type, precedence()); + str->append(' '); + if (negated) + str->append(STRING_WITH_LEN(" not ")); + str->append(func_name_cstring()); + str->append(' '); + if (escape_used_in_parsing) + { + args[1]->print_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" escape ")); + escape_item->print_parenthesised(str, query_type, higher_precedence()); + } + else + args[1]->print_parenthesised(str, query_type, higher_precedence()); +} + + +longlong Item_func_like::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(escape != ESCAPE_NOT_INITIALIZED); + String* res= args[0]->val_str(&cmp_value1); + if (args[0]->null_value) + { + null_value=1; + return 0; + } + String* res2= args[1]->val_str(&cmp_value2); + if (args[1]->null_value) + { + null_value=1; + return 0; + } + null_value=0; + if (canDoTurboBM) + return turboBM_matches(res->ptr(), res->length()) ? !negated : negated; + return cmp_collation.collation->wildcmp( + res->ptr(),res->ptr()+res->length(), + res2->ptr(),res2->ptr()+res2->length(), + escape,wild_one,wild_many) ? negated : !negated; +} + + +/** + We can optimize a where if first character isn't a wildcard +*/ + +bool Item_func_like::with_sargable_pattern() const +{ + if (negated) + return false; + + if (!args[1]->can_eval_in_optimize()) + return false; + + String* res2= args[1]->val_str((String *) &cmp_value2); + if (!res2) + return false; + + if (!res2->length()) // Can optimize empty wildcard: column LIKE '' + return true; + + DBUG_ASSERT(res2->ptr()); + char first= res2->ptr()[0]; + return first != wild_many && first != wild_one; +} + + +/* + subject LIKE pattern + removes subject's dependency on PAD_CHAR_TO_FULL_LENGTH + if pattern ends with the '%' wildcard. +*/ +Sql_mode_dependency Item_func_like::value_depends_on_sql_mode() const +{ + if (!args[1]->value_depends_on_sql_mode_const_item()) + return Item_func::value_depends_on_sql_mode(); + StringBuffer<64> patternbuf; + String *pattern= args[1]->val_str_ascii(&patternbuf); + if (!pattern || !pattern->length()) + return Sql_mode_dependency(); // Will return NULL or 0 + DBUG_ASSERT(pattern->charset()->mbminlen == 1); + if (pattern->ptr()[pattern->length() - 1] != '%') + return Item_func::value_depends_on_sql_mode(); + return ((args[0]->value_depends_on_sql_mode() | + args[1]->value_depends_on_sql_mode()) & + Sql_mode_dependency(~0, ~MODE_PAD_CHAR_TO_FULL_LENGTH)). + soft_to_hard(); +} + + +SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + MEM_ROOT *tmp_root= param->mem_root; + param->thd->mem_root= param->old_root; + bool sargable_pattern= with_sargable_pattern(); + param->thd->mem_root= tmp_root; + return sargable_pattern ? + Item_bool_func2::get_mm_tree(param, cond_ptr) : + Item_func::get_mm_tree(param, cond_ptr); +} + + +bool fix_escape_item(THD *thd, Item *escape_item, String *tmp_str, + bool escape_used_in_parsing, CHARSET_INFO *cmp_cs, + int *escape) +{ + /* + ESCAPE clause accepts only constant arguments and Item_param. + + Subqueries during context_analysis_only might decide they're + const_during_execution, but not quite const yet, not evaluate-able. + This is fine, as most of context_analysis_only modes will never + reach val_int(), so we won't need the value. + CONTEXT_ANALYSIS_ONLY_DERIVED being a notable exception here. + */ + if (!escape_item->const_during_execution() || + (!escape_item->const_item() && + !(thd->lex->context_analysis_only & ~CONTEXT_ANALYSIS_ONLY_DERIVED))) + { + my_error(ER_WRONG_ARGUMENTS,MYF(0),"ESCAPE"); + return TRUE; + } + + IF_DBUG(*escape= ESCAPE_NOT_INITIALIZED,); + + if (escape_item->const_item()) + { + /* If we are on execution stage */ + /* XXX is it safe to evaluate is_expensive() items here? */ + String *escape_str= escape_item->val_str(tmp_str); + if (escape_str) + { + const char *escape_str_ptr= escape_str->ptr(); + if (escape_used_in_parsing && ( + (((thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) && + escape_str->numchars() != 1) || + escape_str->numchars() > 1))) + { + my_error(ER_WRONG_ARGUMENTS,MYF(0),"ESCAPE"); + return TRUE; + } + + if (cmp_cs->use_mb()) + { + CHARSET_INFO *cs= escape_str->charset(); + my_wc_t wc; + int rc= cs->mb_wc(&wc, + (const uchar*) escape_str_ptr, + (const uchar*) escape_str_ptr + + escape_str->length()); + *escape= (int) (rc > 0 ? wc : '\\'); + } + else + { + /* + In the case of 8bit character set, we pass native + code instead of Unicode code as "escape" argument. + Convert to "cs" if charset of escape differs. + */ + uint32 unused; + if (escape_str->needs_conversion(escape_str->length(), + escape_str->charset(),cmp_cs,&unused)) + { + char ch; + uint errors; + uint32 cnvlen= copy_and_convert(&ch, 1, cmp_cs, escape_str_ptr, + escape_str->length(), + escape_str->charset(), &errors); + *escape= cnvlen ? ch : '\\'; + } + else + *escape= escape_str_ptr ? *escape_str_ptr : '\\'; + } + } + else + *escape= '\\'; + } + + return FALSE; +} + +bool Item_func_like::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + if (Item_bool_func2::fix_fields(thd, ref) || + escape_item->fix_fields_if_needed_for_scalar(thd, &escape_item) || + fix_escape_item(thd, escape_item, &cmp_value1, escape_used_in_parsing, + cmp_collation.collation, &escape)) + return TRUE; + + if (escape_item->const_item()) + { + /* + We could also do boyer-more for non-const items, but as we would have to + recompute the tables for each row it's not worth it. + */ + if (args[1]->can_eval_in_optimize() && !use_strnxfrm(collation.collation)) + { + String* res2= args[1]->val_str(&cmp_value2); + if (!res2) + return FALSE; // Null argument + + const size_t len= res2->length(); + + /* + len must be > 2 ('%pattern%') + heuristic: only do TurboBM for pattern_len > 2 + */ + if (len <= 2) + return FALSE; + + const char* first= res2->ptr(); + const char* last= first + len - 1; + + if (len > MIN_TURBOBM_PATTERN_LEN + 2 && + *first == wild_many && + *last == wild_many) + { + const char* tmp = first + 1; + for (; *tmp != wild_many && *tmp != wild_one && *tmp != escape; tmp++) ; + canDoTurboBM = (tmp == last) && !args[0]->collation.collation->use_mb(); + } + if (canDoTurboBM) + { + pattern_len = (int) len - 2; + pattern = thd->strmake(first + 1, pattern_len); + DBUG_PRINT("info", ("Initializing pattern: '%s'", first)); + int *suff = (int*) thd->alloc((int) (sizeof(int)* + ((pattern_len + 1)*2+ + alphabet_size))); + bmGs = suff + pattern_len + 1; + bmBc = bmGs + pattern_len + 1; + turboBM_compute_good_suffix_shifts(suff); + turboBM_compute_bad_character_shifts(); + DBUG_PRINT("info",("done")); + } + use_sampling= (len > 2 && (*first == wild_many || *first == wild_one)); + } + } + return FALSE; +} + + +void Item_func_like::cleanup() +{ + canDoTurboBM= FALSE; + Item_bool_func2::cleanup(); +} + + +bool Item_func_like::find_selective_predicates_list_processor(void *arg) +{ + find_selective_predicates_list_processor_data *data= + (find_selective_predicates_list_processor_data *) arg; + if (use_sampling && used_tables() == data->table->map) + { + THD *thd= data->table->in_use; + COND_STATISTIC *stat; + Item *arg0; + if (!(stat= (COND_STATISTIC *) thd->alloc(sizeof(COND_STATISTIC)))) + return TRUE; + stat->cond= this; + arg0= args[0]->real_item(); + if (args[1]->const_item() && arg0->type() == FIELD_ITEM) + stat->field_arg= ((Item_field *)arg0)->field; + else + stat->field_arg= NULL; + data->list.push_back(stat, thd->mem_root); + } + return FALSE; +} + + +int Regexp_processor_pcre::default_regex_flags() +{ + return default_regex_flags_pcre(current_thd); +} + +void Regexp_processor_pcre::cleanup() +{ + pcre2_match_data_free(m_pcre_match_data); + pcre2_code_free(m_pcre); + reset(); +} + +void Regexp_processor_pcre::init(CHARSET_INFO *data_charset, int extra_flags) +{ + m_library_flags= default_regex_flags() | extra_flags | + (data_charset != &my_charset_bin ? + (PCRE2_UTF | PCRE2_UCP) : 0) | + ((data_charset->state & + (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE2_CASELESS); + + // Convert text data to utf-8. + m_library_charset= data_charset == &my_charset_bin ? + &my_charset_bin : &my_charset_utf8mb3_general_ci; + + m_conversion_is_needed= (data_charset != &my_charset_bin) && + !my_charset_same(data_charset, m_library_charset); +} + +/** + Convert string to lib_charset, if needed. +*/ +String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter) +{ + if (m_conversion_is_needed) + { + uint dummy_errors; + if (converter->copy(str->ptr(), str->length(), str->charset(), + m_library_charset, &dummy_errors)) + return NULL; + str= converter; + } + return str; +} + + +/** + @brief Compile regular expression. + + @param[in] pattern the pattern to compile from. + @param[in] send_error send error message if any. + + @details Make necessary character set conversion then + compile regular expression passed in the args[1]. + + @retval false success. + @retval true error occurred. + */ + +bool Regexp_processor_pcre::compile(String *pattern, bool send_error) +{ + int pcreErrorNumber; + PCRE2_SIZE pcreErrorOffset; + + if (is_compiled()) + { + if (!stringcmp(pattern, &m_prev_pattern)) + return false; + cleanup(); + m_prev_pattern.copy(*pattern); + } + + if (!(pattern= convert_if_needed(pattern, &pattern_converter))) + return true; + + pcre2_compile_context *cctx= NULL; +#ifndef pcre2_set_depth_limit + // old pcre2 uses stack - put a limit on that (new pcre2 prefers heap) + cctx= pcre2_compile_context_create(NULL); + pcre2_set_compile_recursion_guard(cctx, [](uint32_t cur, void *end) -> int + { return available_stack_size(&cur, end) < STACK_MIN_SIZE; }, + current_thd->mysys_var->stack_ends_here); +#endif + m_pcre= pcre2_compile((PCRE2_SPTR8) pattern->ptr(), pattern->length(), + m_library_flags, + &pcreErrorNumber, &pcreErrorOffset, cctx); + pcre2_compile_context_free(cctx); // NULL is ok here + + if (unlikely(m_pcre == NULL)) + { + if (send_error) + { + char buff[MAX_FIELD_WIDTH]; + int lmsg= pcre2_get_error_message(pcreErrorNumber, + (PCRE2_UCHAR8 *)buff, sizeof(buff)); + if (lmsg >= 0) + my_snprintf(buff+lmsg, sizeof(buff)-lmsg, + " at offset %d", pcreErrorOffset); + my_error(ER_REGEXP_ERROR, MYF(0), buff); + } + return true; + } + m_pcre_match_data= pcre2_match_data_create_from_pattern(m_pcre, NULL); + if (m_pcre_match_data == NULL) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + return false; +} + + +bool Regexp_processor_pcre::compile(Item *item, bool send_error) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff, sizeof(buff), &my_charset_bin); + String *pattern= item->val_str(&tmp); + if (unlikely(item->null_value) || (unlikely(compile(pattern, send_error)))) + return true; + return false; +} + + +/** + Send a warning explaining an error code returned by pcre_exec(). +*/ +void Regexp_processor_pcre::pcre_exec_warn(int rc) const +{ + PCRE2_UCHAR8 buf[128]; + THD *thd= current_thd; + + int errlen= pcre2_get_error_message(rc, buf, sizeof(buf)); + if (errlen <= 0) + { + my_snprintf((char *)buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc); + } + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf); +} + + +/** + Call pcre_exec() and send a warning if pcre_exec() returned with an error. +*/ +int Regexp_processor_pcre::pcre_exec_with_warn(const pcre2_code *code, + pcre2_match_data *data, + const char *subject, + int length, int startoffset, + int options) +{ + pcre2_match_context *mctx= NULL; +#ifndef pcre2_set_depth_limit + // old pcre2 uses stack - put a limit on that (new pcre2 prefers heap) + mctx= pcre2_match_context_create(NULL); + pcre2_set_recursion_limit(mctx, + available_stack_size(&mctx, current_thd->mysys_var->stack_ends_here)/544); +#endif + int rc= pcre2_match(code, (PCRE2_SPTR8) subject, (PCRE2_SIZE) length, + (PCRE2_SIZE) startoffset, options, data, mctx); + pcre2_match_context_free(mctx); // NULL is ok here + DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;); + if (unlikely(rc < PCRE2_ERROR_NOMATCH)) + { + m_SubStrVec= NULL; + pcre_exec_warn(rc); + } + else + m_SubStrVec= pcre2_get_ovector_pointer(data); + return rc; +} + + +bool Regexp_processor_pcre::exec(const char *str, size_t length, size_t offset) +{ + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data, + str, (int)length, (int)offset, 0); + return false; +} + + +bool Regexp_processor_pcre::exec(String *str, int offset, + uint n_result_offsets_to_convert) +{ + if (!(str= convert_if_needed(str, &subject_converter))) + return true; + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data, + str->ptr(), str->length(), offset, 0); + if (m_pcre_exec_rc > 0) + { + uint i; + for (i= 0; i < n_result_offsets_to_convert; i++) + { + /* + Convert byte offset into character offset. + */ + m_SubStrVec[i]= (int) str->charset()->numchars(str->ptr(), + str->ptr() + + m_SubStrVec[i]); + } + } + return false; +} + + +bool Regexp_processor_pcre::exec(Item *item, int offset, + uint n_result_offsets_to_convert) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= item->val_str(&tmp); + if (item->null_value) + return true; + return exec(res, offset, n_result_offsets_to_convert); +} + + +void Regexp_processor_pcre::fix_owner(Item_func *owner, + Item *subject_arg, + Item *pattern_arg) +{ + if (!is_compiled() && + pattern_arg->const_item() && + !pattern_arg->is_expensive()) + { + if (compile(pattern_arg, true)) + { + owner->set_maybe_null(); // Will always return NULL + return; + } + set_const(true); + owner->base_flags|= subject_arg->base_flags & item_base_t::MAYBE_NULL; + } + else + owner->set_maybe_null(); +} + + +bool +Item_func_regex::fix_length_and_dec(THD *thd) +{ + if (Item_bool_func::fix_length_and_dec(thd) || + agg_arg_charsets_for_comparison(cmp_collation, args, 2)) + return TRUE; + + re.init(cmp_collation.collation, 0); + re.fix_owner(this, args[0], args[1]); + return FALSE; +} + + +longlong Item_func_regex::val_int() +{ + DBUG_ASSERT(fixed()); + if ((null_value= re.recompile(args[1]))) + return 0; + + if ((null_value= re.exec(args[0], 0, 0))) + return 0; + + return re.match(); +} + + +bool +Item_func_regexp_instr::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_comparison(cmp_collation, args, 2)) + return TRUE; + + re.init(cmp_collation.collation, 0); + re.fix_owner(this, args[0], args[1]); + max_length= MY_INT32_NUM_DECIMAL_DIGITS; // See also Item_func_locate + return FALSE; +} + + +longlong Item_func_regexp_instr::val_int() +{ + DBUG_ASSERT(fixed()); + if ((null_value= re.recompile(args[1]))) + return 0; + + if ((null_value= re.exec(args[0], 0, 1))) + return 0; + + return re.match() ? (longlong) (re.subpattern_start(0) + 1) : 0; +} + + +#ifdef LIKE_CMP_TOUPPER +#define likeconv(cs,A) (uchar) (cs)->toupper(A) +#else +#define likeconv(cs,A) (uchar) (cs)->sort_order[(uchar) (A)] +#endif + + +/** + Precomputation dependent only on pattern_len. +*/ + +void Item_func_like::turboBM_compute_suffixes(int *suff) +{ + const int plm1 = pattern_len - 1; + int f = 0; + int g = plm1; + int *const splm1 = suff + plm1; + CHARSET_INFO *cs= cmp_collation.collation; + + *splm1 = pattern_len; + + if (!cs->sort_order) + { + int i; + for (i = pattern_len - 2; i >= 0; i--) + { + int tmp = *(splm1 + i - f); + if (g < i && tmp < i - g) + suff[i] = tmp; + else + { + if (i < g) + g = i; // g = MY_MIN(i, g) + f = i; + while (g >= 0 && pattern[g] == pattern[g + plm1 - f]) + g--; + suff[i] = f - g; + } + } + } + else + { + int i; + for (i = pattern_len - 2; 0 <= i; --i) + { + int tmp = *(splm1 + i - f); + if (g < i && tmp < i - g) + suff[i] = tmp; + else + { + if (i < g) + g = i; // g = MY_MIN(i, g) + f = i; + while (g >= 0 && + likeconv(cs, pattern[g]) == likeconv(cs, pattern[g + plm1 - f])) + g--; + suff[i] = f - g; + } + } + } +} + + +/** + Precomputation dependent only on pattern_len. +*/ + +void Item_func_like::turboBM_compute_good_suffix_shifts(int *suff) +{ + turboBM_compute_suffixes(suff); + + int *end = bmGs + pattern_len; + int *k; + for (k = bmGs; k < end; k++) + *k = pattern_len; + + int tmp; + int i; + int j = 0; + const int plm1 = pattern_len - 1; + for (i = plm1; i > -1; i--) + { + if (suff[i] == i + 1) + { + for (tmp = plm1 - i; j < tmp; j++) + { + int *tmp2 = bmGs + j; + if (*tmp2 == pattern_len) + *tmp2 = tmp; + } + } + } + + int *tmp2; + for (tmp = plm1 - i; j < tmp; j++) + { + tmp2 = bmGs + j; + if (*tmp2 == pattern_len) + *tmp2 = tmp; + } + + tmp2 = bmGs + plm1; + for (i = 0; i <= pattern_len - 2; i++) + *(tmp2 - suff[i]) = plm1 - i; +} + + +/** + Precomputation dependent on pattern_len. +*/ + +void Item_func_like::turboBM_compute_bad_character_shifts() +{ + int *i; + int *end = bmBc + alphabet_size; + int j; + const int plm1 = pattern_len - 1; + CHARSET_INFO *cs= cmp_collation.collation; + + for (i = bmBc; i < end; i++) + *i = pattern_len; + + if (!cs->sort_order) + { + for (j = 0; j < plm1; j++) + bmBc[(uint) (uchar) pattern[j]] = plm1 - j; + } + else + { + for (j = 0; j < plm1; j++) + bmBc[(uint) likeconv(cs,pattern[j])] = plm1 - j; + } +} + + +/** + Search for pattern in text. + + @return + returns true/false for match/no match +*/ + +bool Item_func_like::turboBM_matches(const char* text, int text_len) const +{ + int bcShift; + int turboShift; + int shift = pattern_len; + int j = 0; + int u = 0; + CHARSET_INFO *cs= cmp_collation.collation; + + const int plm1= pattern_len - 1; + const int tlmpl= text_len - pattern_len; + + /* Searching */ + if (!cs->sort_order) + { + while (j <= tlmpl) + { + int i= plm1; + while (i >= 0 && pattern[i] == text[i + j]) + { + i--; + if (i == plm1 - shift) + i-= u; + } + if (i < 0) + return 1; + + const int v= plm1 - i; + turboShift = u - v; + bcShift = bmBc[(uint) (uchar) text[i + j]] - plm1 + i; + shift = MY_MAX(turboShift, bcShift); + shift = MY_MAX(shift, bmGs[i]); + if (shift == bmGs[i]) + u = MY_MIN(pattern_len - shift, v); + else + { + if (turboShift < bcShift) + shift = MY_MAX(shift, u + 1); + u = 0; + } + j+= shift; + } + return 0; + } + else + { + while (j <= tlmpl) + { + int i= plm1; + while (i >= 0 && likeconv(cs,pattern[i]) == likeconv(cs,text[i + j])) + { + i--; + if (i == plm1 - shift) + i-= u; + } + if (i < 0) + return 1; + + const int v= plm1 - i; + turboShift = u - v; + bcShift = bmBc[(uint) likeconv(cs, text[i + j])] - plm1 + i; + shift = MY_MAX(turboShift, bcShift); + shift = MY_MAX(shift, bmGs[i]); + if (shift == bmGs[i]) + u = MY_MIN(pattern_len - shift, v); + else + { + if (turboShift < bcShift) + shift = MY_MAX(shift, u + 1); + u = 0; + } + j+= shift; + } + return 0; + } +} + + +/** + Make a logical XOR of the arguments. + + If either operator is NULL, return NULL. + + @todo + (low priority) Change this to be optimized as: @n + A XOR B -> (A) == 1 AND (B) <> 1) OR (A <> 1 AND (B) == 1) @n + To be able to do this, we would however first have to extend the MySQL + range optimizer to handle OR better. + + @note + As we don't do any index optimization on XOR this is not going to be + very fast to use. +*/ + +longlong Item_func_xor::val_int() +{ + DBUG_ASSERT(fixed()); + int result= 0; + null_value= false; + for (uint i= 0; i < arg_count; i++) + { + result^= (args[i]->val_int() != 0); + if (args[i]->null_value) + { + null_value= true; + return 0; + } + } + return result; +} + +/** + Apply NOT transformation to the item and return a new one. + + + Transform the item using next rules: + @verbatim + a AND b AND ... -> NOT(a) OR NOT(b) OR ... + a OR b OR ... -> NOT(a) AND NOT(b) AND ... + NOT(a) -> a + a = b -> a != b + a != b -> a = b + a < b -> a >= b + a >= b -> a < b + a > b -> a <= b + a <= b -> a > b + IS NULL(a) -> IS NOT NULL(a) + IS NOT NULL(a) -> IS NULL(a) + @endverbatim + + @param thd thread handler + + @return + New item or + NULL if we cannot apply NOT transformation (see Item::neg_transformer()). +*/ + +Item *Item_func_not::neg_transformer(THD *thd) /* NOT(x) -> x */ +{ + return args[0]; +} + + +bool Item_func_not::fix_fields(THD *thd, Item **ref) +{ + args[0]->under_not(this); + if (args[0]->type() == FIELD_ITEM) + { + /* replace "NOT " with " == 0" */ + Query_arena backup, *arena; + Item *new_item; + bool rc= TRUE; + arena= thd->activate_stmt_arena_if_needed(&backup); + if ((new_item= new (thd->mem_root) Item_func_eq(thd, args[0], new (thd->mem_root) Item_int(thd, 0, 1)))) + { + new_item->name= name; + rc= (*ref= new_item)->fix_fields(thd, ref); + } + if (arena) + thd->restore_active_arena(arena, &backup); + return rc; + } + return Item_func::fix_fields(thd, ref); +} + + +Item *Item_bool_rowready_func2::neg_transformer(THD *thd) +{ + Item *item= negated_item(thd); + return item; +} + +/** + XOR can be negated by negating one of the operands: + + NOT (a XOR b) => (NOT a) XOR b + => a XOR (NOT b) + + @param thd Thread handle + @return New negated item +*/ +Item *Item_func_xor::neg_transformer(THD *thd) +{ + Item *neg_operand; + Item_func_xor *new_item; + if ((neg_operand= args[0]->neg_transformer(thd))) + // args[0] has neg_tranformer + new_item= new(thd->mem_root) Item_func_xor(thd, neg_operand, args[1]); + else if ((neg_operand= args[1]->neg_transformer(thd))) + // args[1] has neg_tranformer + new_item= new(thd->mem_root) Item_func_xor(thd, args[0], neg_operand); + else + { + neg_operand= new(thd->mem_root) Item_func_not(thd, args[0]); + new_item= new(thd->mem_root) Item_func_xor(thd, neg_operand, args[1]); + } + return new_item; +} + + +/** + a IS NULL -> a IS NOT NULL. +*/ +Item *Item_func_isnull::neg_transformer(THD *thd) +{ + Item *item= new (thd->mem_root) Item_func_isnotnull(thd, args[0]); + return item; +} + + +/** + a IS NOT NULL -> a IS NULL. +*/ +Item *Item_func_isnotnull::neg_transformer(THD *thd) +{ + Item *item= new (thd->mem_root) Item_func_isnull(thd, args[0]); + return item; +} + + +Item *Item_cond_and::neg_transformer(THD *thd) /* NOT(a AND b AND ...) -> */ + /* NOT a OR NOT b OR ... */ +{ + neg_arguments(thd); + Item *item= new (thd->mem_root) Item_cond_or(thd, list); + return item; +} + + +bool +Item_cond_and::set_format_by_check_constraint( + Send_field_extended_metadata *to) const +{ + List_iterator_fast li(const_cast&>(list)); + Item *item; + while ((item= li++)) + { + if (item->set_format_by_check_constraint(to)) + return true; + } + return false; +} + + +Item *Item_cond_or::neg_transformer(THD *thd) /* NOT(a OR b OR ...) -> */ + /* NOT a AND NOT b AND ... */ +{ + neg_arguments(thd); + Item *item= new (thd->mem_root) Item_cond_and(thd, list); + return item; +} + + +Item *Item_func_nop_all::neg_transformer(THD *thd) +{ + /* "NOT (e $cmp$ ANY (SELECT ...)) -> e $rev_cmp$" ALL (SELECT ...) */ + Item_func_not_all *new_item= new (thd->mem_root) Item_func_not_all(thd, args[0]); + Item_allany_subselect *allany= (Item_allany_subselect*)args[0]; + allany->create_comp_func(FALSE); + allany->all= !allany->all; + allany->upper_item= new_item; + return new_item; +} + +Item *Item_func_not_all::neg_transformer(THD *thd) +{ + /* "NOT (e $cmp$ ALL (SELECT ...)) -> e $rev_cmp$" ANY (SELECT ...) */ + Item_func_nop_all *new_item= new (thd->mem_root) Item_func_nop_all(thd, args[0]); + Item_allany_subselect *allany= (Item_allany_subselect*)args[0]; + allany->all= !allany->all; + allany->create_comp_func(TRUE); + allany->upper_item= new_item; + return new_item; +} + +Item *Item_func_eq::negated_item(THD *thd) /* a = b -> a != b */ +{ + return new (thd->mem_root) Item_func_ne(thd, args[0], args[1]); +} + + +Item *Item_func_ne::negated_item(THD *thd) /* a != b -> a = b */ +{ + return new (thd->mem_root) Item_func_eq(thd, args[0], args[1]); +} + + +Item *Item_func_lt::negated_item(THD *thd) /* a < b -> a >= b */ +{ + return new (thd->mem_root) Item_func_ge(thd, args[0], args[1]); +} + + +Item *Item_func_ge::negated_item(THD *thd) /* a >= b -> a < b */ +{ + return new (thd->mem_root) Item_func_lt(thd, args[0], args[1]); +} + + +Item *Item_func_gt::negated_item(THD *thd) /* a > b -> a <= b */ +{ + return new (thd->mem_root) Item_func_le(thd, args[0], args[1]); +} + + +Item *Item_func_le::negated_item(THD *thd) /* a <= b -> a > b */ +{ + return new (thd->mem_root) Item_func_gt(thd, args[0], args[1]); +} + +/** + just fake method, should never be called. +*/ +Item *Item_bool_rowready_func2::negated_item(THD *thd) +{ + DBUG_ASSERT(0); + return 0; +} + + +/** + Construct a minimal multiple equality item + + @param f1 the first equal item + @param f2 the second equal item + @param with_const_item TRUE if the first item is constant + + @details + The constructor builds a new item equal object for the equality f1=f2. + One of the equal items can be constant. If this is the case it is passed + always as the first parameter and the parameter with_const_item serves + as an indicator of this case. + Currently any non-constant parameter items must point to an item of the + of the type Item_field or Item_direct_view_ref(Item_field). +*/ + +Item_equal::Item_equal(THD *thd, const Type_handler *handler, + Item *f1, Item *f2, bool with_const_item): + Item_bool_func(thd), eval_item(0), cond_false(0), cond_true(0), + context_field(NULL), link_equal_fields(FALSE), + m_compare_handler(handler), + m_compare_collation(f2->collation.collation) +{ + const_item_cache= 0; + with_const= with_const_item; + equal_items.push_back(f1, thd->mem_root); + equal_items.push_back(f2, thd->mem_root); + upper_levels= NULL; +} + + +/** + Copy constructor for a multiple equality + + @param item_equal source item for the constructor + + @details + The function creates a copy of an Item_equal object. + This constructor is used when an item belongs to a multiple equality + of an upper level (an upper AND/OR level or an upper level of a nested + outer join). +*/ + +Item_equal::Item_equal(THD *thd, Item_equal *item_equal): + Item_bool_func(thd), eval_item(0), cond_false(0), cond_true(0), + context_field(NULL), link_equal_fields(FALSE), + m_compare_handler(item_equal->m_compare_handler), + m_compare_collation(item_equal->m_compare_collation) +{ + const_item_cache= 0; + List_iterator_fast li(item_equal->equal_items); + Item *item; + while ((item= li++)) + { + equal_items.push_back(item, thd->mem_root); + } + with_const= item_equal->with_const; + cond_false= item_equal->cond_false; + upper_levels= item_equal->upper_levels; +} + + +/** + @brief + Add a constant item to the Item_equal object + + @param[in] c the constant to add + @param[in] f item from the list equal_items the item c is equal to + (this parameter is optional) + + @details + The method adds the constant item c to the equal_items list. If the list + doesn't have any constant item yet the item c is just put in the front + the list. Otherwise the value of c is compared with the value of the + constant item from equal_items. If they are not equal cond_false is set + to TRUE. This serves as an indicator that this Item_equal is always FALSE. +*/ + +void Item_equal::add_const(THD *thd, Item *c) +{ + if (cond_false) + return; + if (!with_const) + { + with_const= TRUE; + equal_items.push_front(c, thd->mem_root); + return; + } + + /* + Suppose we have an expression (with a string type field) like this: + WHERE field=const1 AND field=const2 ... + + For all pairs field=constXXX we know that: + + - Item_func_eq::fix_length_and_dec() performed collation and character + set aggregation and added character set converters when needed. + Note, the case like: + WHERE field=const1 COLLATE latin1_bin AND field=const2 + is not handled here, because the field would be replaced to + Item_func_set_collation, which cannot get into Item_equal. + So all constXXX that are handled by Item_equal + already have compatible character sets with "field". + + - Also, Field_str::test_if_equality_guarantees_uniqueness() guarantees + that the comparison collation of all equalities handled by Item_equal + match the the collation of the field. + + Therefore, at Item_equal::add_const() time all constants constXXX + should be directly comparable to each other without an additional + character set conversion. + It's safe to do val_str() for "const_item" and "c" and compare + them according to the collation of the *field*. + + So in a script like this: + CREATE TABLE t1 (a VARCHAR(10) COLLATE xxx); + INSERT INTO t1 VALUES ('a'),('A'); + SELECT * FROM t1 WHERE a='a' AND a='A'; + Item_equal::add_const() effectively rewrites the condition to: + SELECT * FROM t1 WHERE a='a' AND 'a' COLLATE xxx='A'; + and then to: + SELECT * FROM t1 WHERE a='a'; // if the two constants were equal + // e.g. in case of latin1_swedish_ci + or to: + SELECT * FROM t1 WHERE FALSE; // if the two constants were not equal + // e.g. in case of latin1_bin + + Note, both "const_item" and "c" can return NULL, e.g.: + SELECT * FROM t1 WHERE a=NULL AND a='const'; + SELECT * FROM t1 WHERE a='const' AND a=NULL; + SELECT * FROM t1 WHERE a='const' AND a=(SELECT MAX(a) FROM t2) + */ + + cond_false= !Item_equal::compare_type_handler()->Item_eq_value(thd, this, c, + get_const()); + if (with_const && equal_items.elements == 1) + cond_true= TRUE; + if (cond_false || cond_true) + const_item_cache= 1; +} + + +/** + @brief + Check whether a field is referred to in the multiple equality + + @param field field whose occurrence is to be checked + + @details + The function checks whether field is referred to by one of the + items from the equal_items list. + + @retval + 1 if multiple equality contains a reference to field + @retval + 0 otherwise +*/ + +bool Item_equal::contains(Field *field) +{ + Item_equal_fields_iterator it(*this); + while (it++) + { + if (field->eq(it.get_curr_field())) + return 1; + } + return 0; +} + + +/** + @brief + Join members of another Item_equal object + + @param item multiple equality whose members are to be joined + + @details + The function actually merges two multiple equalities. After this operation + the Item_equal object additionally contains the field items of another item of + the type Item_equal. + If the optional constant items are not equal the cond_false flag is set to TRUE. + + @notes + The function is called for any equality f1=f2 such that f1 and f2 are items + of the type Item_field or Item_direct_view_ref(Item_field), and, f1->field is + referred to in the list this->equal_items, while the list item->equal_items + contains a reference to f2->field. +*/ + +void Item_equal::merge(THD *thd, Item_equal *item) +{ + Item *c= item->get_const(); + if (c) + item->equal_items.pop(); + equal_items.append(&item->equal_items); + if (c) + { + /* + The flag cond_false will be set to TRUE after this if + the multiple equality already contains a constant and its + value is not equal to the value of c. + */ + add_const(thd, c); + } + cond_false|= item->cond_false; +} + + +/** + @brief + Merge members of another Item_equal object into this one + + @param item multiple equality whose members are to be merged + @param save_merged keep the list of equalities in 'item' intact + (e.g. for other merges) + + @details + If the Item_equal 'item' happens to have some elements of the list + of equal items belonging to 'this' object then the function merges + the equal items from 'item' into this list. + If both lists contains constants and they are different then + the value of the cond_false flag is set to TRUE. + + @retval + 1 the lists of equal items in 'item' and 'this' contain common elements + @retval + 0 otherwise + + @notes + The method 'merge' just joins the list of equal items belonging to 'item' + to the list of equal items belonging to this object assuming that the lists + are disjoint. It would be more correct to call the method 'join'. + The method 'merge_into_with_check' really merges two lists of equal items if + they have common members. +*/ + +bool Item_equal::merge_with_check(THD *thd, Item_equal *item, bool save_merged) +{ + bool intersected= FALSE; + Item_equal_fields_iterator_slow fi(*item); + + while (fi++) + { + if (contains(fi.get_curr_field())) + { + intersected= TRUE; + if (!save_merged) + fi.remove(); + } + } + if (intersected) + { + if (!save_merged) + merge(thd, item); + else + { + Item *c= item->get_const(); + if (c) + add_const(thd, c); + if (!cond_false) + { + Item *item; + fi.rewind(); + while ((item= fi++)) + { + if (!contains(fi.get_curr_field())) + add(item, thd->mem_root); + } + } + } + } + return intersected; +} + + +/** + @brief + Merge this object into a list of Item_equal objects + + @param list the list of Item_equal objects to merge into + @param save_merged keep the list of equalities in 'this' intact + (e.g. for other merges) + @param only_intersected do not merge if there are no common members + in any of Item_equal objects from the list + and this Item_equal + + @details + If the list of equal items from 'this' object contains common members + with the lists of equal items belonging to Item_equal objects from 'list' + then all involved Item_equal objects e1,...,ek are merged into one + Item equal that replaces e1,...,ek in the 'list'. Otherwise, in the case + when the value of the parameter only_if_intersected is false, this + Item_equal is joined to the 'list'. +*/ + +void Item_equal::merge_into_list(THD *thd, List *list, + bool save_merged, + bool only_intersected) +{ + Item_equal *item; + List_iterator it(*list); + Item_equal *merge_into= NULL; + while((item= it++)) + { + if (!merge_into) + { + if (item->merge_with_check(thd, this, save_merged)) + merge_into= item; + } + else + { + if (merge_into->merge_with_check(thd, item, false)) + it.remove(); + } + } + if (!only_intersected && !merge_into) + list->push_back(this, thd->mem_root); +} + + +/** + @brief + Order equal items of the multiple equality according to a sorting criteria + + @param compare function to compare items from the equal_items list + @param arg context extra parameter for the cmp function + + @details + The function performs ordering of the items from the equal_items list + according to the criteria determined by the cmp callback parameter. + If cmp(item1,item2,arg)<0 than item1 must be placed after item2. + + @notes + The function sorts equal items by the bubble sort algorithm. + The list of field items is looked through and whenever two neighboring + members follow in a wrong order they are swapped. This is performed + again and again until we get all members in a right order. +*/ + +void Item_equal::sort(Item_field_cmpfunc compare, void *arg) +{ + bubble_sort(&equal_items, compare, arg); +} + + +/** + @brief + Check appearance of new constant items in the multiple equality object + + @details + The function checks appearance of new constant items among the members + of the equal_items list. Each new constant item is compared with + the constant item from the list if there is any. If there is none the first + new constant item is placed at the very beginning of the list and + with_const is set to TRUE. If it happens that the compared constant items + are unequal then the flag cond_false is set to TRUE. + + @notes + Currently this function is called only after substitution of constant tables. +*/ + +void Item_equal::update_const(THD *thd) +{ + List_iterator it(equal_items); + if (with_const) + it++; + Item *item; + while ((item= it++)) + { + if (item->can_eval_in_optimize() && + /* + Don't propagate constant status of outer-joined column. + Such a constant status here is a result of: + a) empty outer-joined table: in this case such a column has a + value of NULL; but at the same time other arguments of + Item_equal don't have to be NULLs and the value of the whole + multiple equivalence expression doesn't have to be NULL or FALSE + because of the outer join nature; + or + b) outer-joined table contains only 1 row: the result of + this column is equal to a row field value *or* NULL. + Both values are inacceptable as Item_equal constants. + */ + !item->is_outer_field()) + { + if (item == equal_items.head()) + with_const= TRUE; + else + { + it.remove(); + add_const(thd, item); + } + } + } +} + + +/** + @brief + Fix fields in a completely built multiple equality + + @param thd currently not used thread handle + @param ref not used + + @details + This function is called once the multiple equality has been built out of + the WHERE/ON condition and no new members are expected to be added to the + equal_items list anymore. + As any implementation of the virtual fix_fields method the function + calculates the cached values of not_null_tables_cache, used_tables_cache, + const_item_cache and calls fix_length_and_dec(). + Additionally the function sets a reference to the Item_equal object in + the non-constant items of the equal_items list unless such a reference has + been already set. + + @notes + Currently this function is called only in the function + build_equal_items_for_cond. + + @retval + FALSE always +*/ + +bool Item_equal::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + Item_equal_fields_iterator it(*this); + Item *item; + Field *first_equal_field= NULL; + Field *last_equal_field= NULL; + Field *prev_equal_field= NULL; + not_null_tables_cache= used_tables_cache= 0; + const_item_cache= 0; + while ((item= it++)) + { + table_map tmp_table_map; + used_tables_cache|= item->used_tables(); + tmp_table_map= item->not_null_tables(); + not_null_tables_cache|= tmp_table_map; + DBUG_ASSERT(!item->with_sum_func() && !item->with_subquery()); + if (item->maybe_null()) + set_maybe_null(); + if (!item->get_item_equal()) + item->set_item_equal(this); + if (link_equal_fields && item->real_item()->type() == FIELD_ITEM) + { + last_equal_field= ((Item_field *) (item->real_item()))->field; + if (!prev_equal_field) + first_equal_field= last_equal_field; + else + prev_equal_field->next_equal_field= last_equal_field; + prev_equal_field= last_equal_field; + } + } + if (prev_equal_field && last_equal_field != first_equal_field) + last_equal_field->next_equal_field= first_equal_field; + if (fix_length_and_dec(thd)) + return TRUE; + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +/** + Update the value of the used table attribute and other attributes + */ + +void Item_equal::update_used_tables() +{ + not_null_tables_cache= used_tables_cache= 0; + if ((const_item_cache= cond_false || cond_true)) + return; + Item_equal_fields_iterator it(*this); + Item *item; + const_item_cache= 1; + while ((item= it++)) + { + item->update_used_tables(); + used_tables_cache|= item->used_tables(); + /* see commentary at Item_equal::update_const() */ + const_item_cache&= item->const_item() && !item->is_outer_field(); + } +} + + +/** + @note + This multiple equality can contains elements belonging not to tables {T} + marked in 'allowed' . So we can ascertain null-rejectedness of field f + belonging to table t from {T} only if one of the following equality + predicate can be extracted from this multiple equality: + - f=const + - f=f' where f' is a field of some table from {T} +*/ + +bool Item_equal::find_not_null_fields(table_map allowed) +{ + if (!(allowed & used_tables())) + return false; + bool checked= false; + Item_equal_fields_iterator it(*this); + Item *item; + while ((item= it++)) + { + if (~allowed & item->used_tables()) + continue; + if ((with_const || checked) && !item->find_not_null_fields(allowed)) + continue; + Item_equal_fields_iterator it1(*this); + Item *item1; + while ((item1= it1++) && item1 != item) + { + if (~allowed & item1->used_tables()) + continue; + if (!item->find_not_null_fields(allowed) && + !item1->find_not_null_fields(allowed)) + { + checked= true; + break; + } + } + } + return false; +} + + + +bool Item_equal::count_sargable_conds(void *arg) +{ + SELECT_LEX *sel= (SELECT_LEX *) arg; + uint m= equal_items.elements; + sel->cond_count+= m*(m-1); + return 0; +} + + +/** + @brief + Evaluate multiple equality + + @details + The function evaluate multiple equality to a boolean value. + The function ignores non-constant items from the equal_items list. + The function returns 1 if all constant items from the list are equal. + It returns 0 if there are unequal constant items in the list or + one of the constant items is evaluated to NULL. + + @notes + Currently this function can be called only at the optimization + stage after the constant table substitution, since all Item_equals + are eliminated before the execution stage. + + @retval + 0 multiple equality is always FALSE or NULL + 1 otherwise +*/ + +longlong Item_equal::val_int() +{ + if (cond_false) + return 0; + if (cond_true) + return 1; + Item *item= get_const(); + Item_equal_fields_iterator it(*this); + if (!item) + item= it++; + eval_item->store_value(item); + if ((null_value= item->null_value)) + return 0; + while ((item= it++)) + { + Field *field= it.get_curr_field(); + /* Skip fields of tables that has not been read yet */ + if (!field->table->status || (field->table->status & STATUS_NULL_ROW)) + { + const int rc= eval_item->cmp(item); + if ((rc == TRUE) || (null_value= (rc == UNKNOWN))) + return 0; + } + } + return 1; +} + + +bool Item_equal::fix_length_and_dec(THD *thd) +{ + Item *item= get_first(NO_PARTICULAR_TAB, NULL); + const Type_handler *handler= item->type_handler(); + eval_item= handler->make_cmp_item(thd, item->collation.collation); + return eval_item == NULL; +} + + +bool Item_equal::walk(Item_processor processor, bool walk_subquery, void *arg) +{ + Item *item; + Item_equal_fields_iterator it(*this); + while ((item= it++)) + { + if (item->walk(processor, walk_subquery, arg)) + return 1; + } + return Item_func::walk(processor, walk_subquery, arg); +} + + +Item *Item_equal::transform(THD *thd, Item_transformer transformer, uchar *arg) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + + Item *item; + Item_equal_fields_iterator it(*this); + while ((item= it++)) + { + Item *new_item= item->transform(thd, transformer, arg); + if (!new_item) + return 0; + + /* + THD::change_item_tree() should be called only if the tree was + really transformed, i.e. when a new item has been created. + Otherwise we'll be allocating a lot of unnecessary memory for + change records at each execution. + */ + if (new_item != item) + thd->change_item_tree((Item **) it.ref(), new_item); + } + return Item_func::transform(thd, transformer, arg); +} + + +void Item_equal::print(String *str, enum_query_type query_type) +{ + if (cond_false) + { + str->append('0'); + return; + } + str->append(func_name_cstring()); + str->append('('); + List_iterator_fast it(equal_items); + Item *item; + item= it++; + item->print(str, query_type); + while ((item= it++)) + { + str->append(','); + str->append(' '); + item->print(str, query_type); + } + str->append(')'); +} + + +/* + @brief Get the first equal field of multiple equality. + @param[in] field the field to get equal field to + + @details Get the first field of multiple equality that is equal to the + given field. In order to make semi-join materialization strategy work + correctly we can't propagate equal fields from upper select to a + materialized semi-join. + Thus the fields is returned according to following rules: + + 1) If the given field belongs to a semi-join then the first field in + multiple equality which belong to the same semi-join is returned. + Otherwise NULL is returned. + 2) If the given field doesn't belong to a semi-join then + the first field in the multiple equality that doesn't belong to any + semi-join is returned. + If all fields in the equality are belong to semi-join(s) then NULL + is returned. + 3) If no field is given then the first field in the multiple equality + is returned without regarding whether it belongs to a semi-join or not. + + @retval Found first field in the multiple equality. + @retval 0 if no field found. +*/ + +Item* Item_equal::get_first(JOIN_TAB *context, Item *field_item) +{ + Item_equal_fields_iterator it(*this); + Item *item; + if (!field_item) + return (it++); + Field *field= ((Item_field *) (field_item->real_item()))->field; + + /* + Of all equal fields, return the first one we can use. Normally, this is the + field which belongs to the table that is the first in the join order. + + There is one exception to this: When semi-join materialization strategy is + used, and the given field belongs to a table within the semi-join nest, we + must pick the first field in the semi-join nest. + + Example: suppose we have a join order: + + ot1 ot2 SJ-Mat(it1 it2 it3) ot3 + + and equality ot2.col = it1.col = it2.col + If we're looking for best substitute for 'it2.col', we should pick it1.col + and not ot2.col. + + eliminate_item_equal() also has code that deals with equality substitution + in presence of SJM nests. + */ + + TABLE_LIST *emb_nest; + if (context != NO_PARTICULAR_TAB) + emb_nest= context->emb_sj_nest; + else + emb_nest= field->table->pos_in_table_list->embedding; + + if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used) + { + /* + It's a field from an materialized semi-join. We can substitute it for + - a constant item + - a field from the same semi-join + Find the first of such items: + */ + while ((item= it++)) + { + if (item->const_item() || + it.get_curr_field()->table->pos_in_table_list->embedding == emb_nest) + { + /* + If we found given field then return NULL to avoid unnecessary + substitution. + */ + return (item != field_item) ? item : NULL; + } + } + } + else + { + /* + The field is not in SJ-Materialization nest. We must return the first + field in the join order. The field may be inside a semi-join nest, i.e + a join order may look like this: + + SJ-Mat(it1 it2) ot1 ot2 + + where we're looking what to substitute ot2.col for. In this case we must + still return it1.col, here's a proof why: + + First let's note that either it1.col or it2.col participates in + subquery's IN-equality. It can't be otherwise, because materialization is + only applicable to uncorrelated subqueries, so the only way we could + infer "it1.col=ot1.col" is from the IN-equality. Ok, so IN-eqality has + it1.col or it2.col on its inner side. it1.col is first such item in the + join order, so it's not possible for SJ-Mat to be + SJ-Materialization-lookup, it is SJ-Materialization-Scan. The scan part + of this strategy will unpack value of it1.col=it2.col into it1.col + (that's the first equal item inside the subquery), and we'll be able to + get it from there. qed. + */ + + return equal_items.head(); + } + // Shouldn't get here. + DBUG_ASSERT(0); + return NULL; +} + + +longlong Item_func_dyncol_check::val_int() +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), &my_charset_bin); + DYNAMIC_COLUMN col; + String *str; + enum enum_dyncol_func_result rc; + + str= args[0]->val_str(&tmp); + if (args[0]->null_value) + goto null; + col.length= str->length(); + /* We do not change the string, so could do this trick */ + col.str= (char *)str->ptr(); + rc= mariadb_dyncol_check(&col); + if (rc < 0 && rc != ER_DYNCOL_FORMAT) + { + dynamic_column_error_message(rc); + goto null; + } + null_value= FALSE; + return rc == ER_DYNCOL_OK; + +null: + null_value= TRUE; + return 0; +} + +longlong Item_func_dyncol_exists::val_int() +{ + char buff[STRING_BUFFER_USUAL_SIZE], nmstrbuf[11]; + String tmp(buff, sizeof(buff), &my_charset_bin), + nmbuf(nmstrbuf, sizeof(nmstrbuf), system_charset_info); + DYNAMIC_COLUMN col; + String *str; + LEX_STRING buf, *name= NULL; + ulonglong num= 0; + enum enum_dyncol_func_result rc; + + if (args[1]->result_type() == INT_RESULT) + num= args[1]->val_int(); + else + { + String *nm= args[1]->val_str(&nmbuf); + if (!nm || args[1]->null_value) + { + null_value= 1; + return 1; + } + if (my_charset_same(nm->charset(), DYNCOL_UTF)) + { + buf.str= (char *) nm->ptr(); + buf.length= nm->length(); + } + else + { + uint strlen= nm->length() * DYNCOL_UTF->mbmaxlen + 1; + uint dummy_errors; + buf.str= (char *) current_thd->alloc(strlen); + if (buf.str) + { + buf.length= + copy_and_convert(buf.str, strlen, DYNCOL_UTF, + nm->ptr(), nm->length(), nm->charset(), + &dummy_errors); + } + else + buf.length= 0; + } + name= &buf; + } + str= args[0]->val_str(&tmp); + if (args[0]->null_value || args[1]->null_value || num > UINT_MAX16) + goto null; + col.length= str->length(); + /* We do not change the string, so could do this trick */ + col.str= (char *)str->ptr(); + rc= ((name == NULL) ? + mariadb_dyncol_exists_num(&col, (uint) num) : + mariadb_dyncol_exists_named(&col, name)); + if (rc < 0) + { + dynamic_column_error_message(rc); + goto null; + } + null_value= FALSE; + return rc == ER_DYNCOL_YES; + +null: + null_value= TRUE; + return 0; +} + + +Item_bool_rowready_func2 *Eq_creator::create(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_eq(thd, a, b); +} + + +Item_bool_rowready_func2* Eq_creator::create_swap(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_eq(thd, b, a); +} + + +Item_bool_rowready_func2* Ne_creator::create(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_ne(thd, a, b); +} + + +Item_bool_rowready_func2* Ne_creator::create_swap(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_ne(thd, b, a); +} + + +Item_bool_rowready_func2* Gt_creator::create(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_gt(thd, a, b); +} + + +Item_bool_rowready_func2* Gt_creator::create_swap(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_lt(thd, b, a); +} + + +Item_bool_rowready_func2* Lt_creator::create(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_lt(thd, a, b); +} + + +Item_bool_rowready_func2* Lt_creator::create_swap(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_gt(thd, b, a); +} + + +Item_bool_rowready_func2* Ge_creator::create(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_ge(thd, a, b); +} + + +Item_bool_rowready_func2* Ge_creator::create_swap(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_le(thd, b, a); +} + + +Item_bool_rowready_func2* Le_creator::create(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_le(thd, a, b); +} + + +Item_bool_rowready_func2* Le_creator::create_swap(THD *thd, Item *a, Item *b) const +{ + return new(thd->mem_root) Item_func_ge(thd, b, a); +} + + +bool +Item_equal::excl_dep_on_grouping_fields(st_select_lex *sel) +{ + Item_equal_fields_iterator it(*this); + Item *item; + + while ((item=it++)) + { + if (item->excl_dep_on_grouping_fields(sel)) + { + set_extraction_flag(MARKER_FULL_EXTRACTION); + return true; + } + } + return false; +} + + +/** + @brief + Transform multiple equality into list of equalities + + @param thd the thread handle + @param equalities the list where created equalities are stored + @param checker the checker callback function to be applied to the nodes + of the tree of the object to check if multiple equality + elements can be used to create equalities + @param arg parameter to be passed to the checker + @param clone_const true <=> clone the constant member if there is any + + @details + How the method works on examples: + + Example 1: + It takes MULT_EQ(x,a,b) and tries to create from its elements a set of + equalities {(x=a),(x=b)}. + + Example 2: + It takes MULT_EQ(1,a,b) and tries to create from its elements a set of + equalities {(a=1),(a=b)}. + + How it is done: + + 1. If there is a constant member c the first non-constant member x for + which the function checker returns true is taken and an item for + the equality x=c is created. When constructing the equality item + the left part of the equality is always taken as a clone of x while + the right part is taken as a clone of c only if clone_const == true. + + 2. After this all equalities of the form x=a (where x designates the first + non-constant member for which checker returns true and a is some other + such member of the multiplle equality) are created. When constructing + an equality item both its parts are taken as clones of x and a. + + Suppose in the examples above that for 'x', 'a', and 'b' the function + checker returns true. + + Example 1: + the equality (x=a) is built + the equality (x=b) is built + + Example 2: + the equality (a=1) is built + the equality (a=b) is built + + 3. As a result we get a set of equalities built with the elements of + this multiple equality. They are saved in the equality list. + + Example 1: + {(x=a),(x=b)} + + Example 2: + {(a=1),(a=b)} + + @note + This method is called for condition pushdown into materialized + derived table/view, and IN subquery, and pushdown from HAVING into WHERE. + When it is called for pushdown from HAVING the empty checker is passed. + This is because in this case the elements of the multiple equality don't + need to be checked if they can be used to build equalities: either all + equalities can be pushed or none of them can be pushed. + When the function is called for pushdown from HAVING the value of the + parameter clone_const is always false. In other cases it's always true. + + @retval true if an error occurs + @retval false otherwise +*/ + +bool Item_equal::create_pushable_equalities(THD *thd, + List *equalities, + Pushdown_checker checker, + uchar *arg, + bool clone_const) +{ + Item *item; + Item *left_item= NULL; + Item *right_item = get_const(); + Item_equal_fields_iterator it(*this); + + while ((item=it++)) + { + left_item= item; + if (checker && !((item->*checker) (arg))) + continue; + break; + } + + if (!left_item) + return false; + + if (right_item) + { + Item_func_eq *eq= 0; + Item *left_item_clone= left_item->build_clone(thd); + Item *right_item_clone= !clone_const ? + right_item : right_item->build_clone(thd); + if (!left_item_clone || !right_item_clone) + return true; + eq= new (thd->mem_root) Item_func_eq(thd, + left_item_clone, + right_item_clone); + if (!eq || equalities->push_back(eq, thd->mem_root)) + return true; + if (!clone_const) + { + /* + Also set IMMUTABLE_FL for any sub-items of the right_item. + This is needed to prevent Item::cleanup_excluding_immutables_processor + from peforming cleanup of the sub-items and so creating an item tree + where a fixed item has non-fixed items inside it. + */ + int16 new_flag= MARKER_IMMUTABLE; + right_item->walk(&Item::set_extraction_flag_processor, false, + (void*)&new_flag); + } + } + + while ((item=it++)) + { + if (checker && !((item->*checker) (arg))) + continue; + Item_func_eq *eq= 0; + Item *left_item_clone= left_item->build_clone(thd); + Item *right_item_clone= item->build_clone(thd); + if (!(left_item_clone && right_item_clone)) + return true; + left_item_clone->set_item_equal(NULL); + right_item_clone->set_item_equal(NULL); + eq= new (thd->mem_root) Item_func_eq(thd, + right_item_clone, + left_item_clone); + if (!eq || equalities->push_back(eq, thd->mem_root)) + return true; + } + return false; +} + + +/** + Transform multiple equality into the AND condition of equalities. + + Example: + MULT_EQ(x,a,b) + => + (x=a) AND (x=b) + + Equalities are built in Item_equal::create_pushable_equalities() method + using elements of this multiple equality. The result of this method is + saved in an equality list. + This method returns the condition where the elements of the equality list + are anded. +*/ + +Item *Item_equal::multiple_equality_transformer(THD *thd, uchar *arg) +{ + List equalities; + if (create_pushable_equalities(thd, &equalities, 0, 0, false)) + return 0; + + switch (equalities.elements) + { + case 0: + return 0; + case 1: + return equalities.head(); + break; + default: + return new (thd->mem_root) Item_cond_and(thd, equalities); + break; + } +} diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h new file mode 100644 index 00000000..a67dfb5d --- /dev/null +++ b/sql/item_cmpfunc.h @@ -0,0 +1,3840 @@ +#ifndef ITEM_CMPFUNC_INCLUDED +#define ITEM_CMPFUNC_INCLUDED +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* compare and test functions */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "item_func.h" /* Item_int_func, Item_bool_func */ +#include "item.h" + +extern Item_result item_cmp_type(Item_result a,Item_result b); +inline Item_result item_cmp_type(const Item *a, const Item *b) +{ + return item_cmp_type(a->cmp_type(), b->cmp_type()); +} +inline Item_result item_cmp_type(Item_result a, const Item *b) +{ + return item_cmp_type(a, b->cmp_type()); +} +class Item_bool_func2; +class Arg_comparator; + +typedef int (Arg_comparator::*arg_cmp_func)(); + +typedef int (*Item_field_cmpfunc)(Item *f1, Item *f2, void *arg); + +class Arg_comparator: public Sql_alloc +{ + Item **a, **b; + const Type_handler *m_compare_handler; + CHARSET_INFO *m_compare_collation; + arg_cmp_func func; + Item_func_or_sum *owner; + bool set_null; // TRUE <=> set owner->null_value + Arg_comparator *comparators; // used only for compare_row() + double precision; + /* Fields used in DATE/DATETIME comparison. */ + Item *a_cache, *b_cache; // Cached values of a and b items + // when one of arguments is NULL. + + int set_cmp_func(THD *thd, Item_func_or_sum *owner_arg, + Item **a1, Item **a2); + + int compare_not_null_values(longlong val1, longlong val2) + { + if (set_null) + owner->null_value= false; + if (val1 < val2) return -1; + if (val1 == val2) return 0; + return 1; + } + NativeBuffer m_native1, m_native2; +public: + /* Allow owner function to use string buffers. */ + String value1, value2; + + Arg_comparator(): + m_compare_handler(&type_handler_null), + m_compare_collation(&my_charset_bin), + set_null(TRUE), comparators(0), + a_cache(0), b_cache(0) {}; + Arg_comparator(Item **a1, Item **a2): a(a1), b(a2), + m_compare_handler(&type_handler_null), + m_compare_collation(&my_charset_bin), + set_null(TRUE), comparators(0), + a_cache(0), b_cache(0) {}; + +public: + bool set_cmp_func_for_row_arguments(THD *thd); + bool set_cmp_func_row(THD *thd); + bool set_cmp_func_string(THD *thd); + bool set_cmp_func_time(THD *thd); + bool set_cmp_func_datetime(THD *thd); + bool set_cmp_func_native(THD *thd); + bool set_cmp_func_int(THD *thd); + bool set_cmp_func_real(THD *thd); + bool set_cmp_func_decimal(THD *thd); + + inline int set_cmp_func(THD *thd, Item_func_or_sum *owner_arg, + Item **a1, Item **a2, bool set_null_arg) + { + set_null= set_null_arg; + return set_cmp_func(thd, owner_arg, a1, a2); + } + inline int compare() { return (this->*func)(); } + + int compare_string(); // compare args[0] & args[1] + int compare_real(); // compare args[0] & args[1] + int compare_decimal(); // compare args[0] & args[1] + int compare_int_signed(); // compare args[0] & args[1] + int compare_int_signed_unsigned(); + int compare_int_unsigned_signed(); + int compare_int_unsigned(); + int compare_row(); // compare args[0] & args[1] + int compare_e_string(); // compare args[0] & args[1] + int compare_e_real(); // compare args[0] & args[1] + int compare_e_decimal(); // compare args[0] & args[1] + int compare_e_int(); // compare args[0] & args[1] + int compare_e_int_diff_signedness(); + int compare_e_row(); // compare args[0] & args[1] + int compare_real_fixed(); + int compare_e_real_fixed(); + int compare_datetime(); + int compare_e_datetime(); + int compare_time(); + int compare_e_time(); + int compare_native(); + int compare_e_native(); + int compare_json_str_basic(Item *j, Item *s); + int compare_json_str(); + int compare_str_json(); + int compare_e_json_str_basic(Item *j, Item *s); + int compare_e_json_str(); + int compare_e_str_json(); + + void min_max_update_field_native(THD *thd, Field *field, Item *item, + int cmp_sign); + + Item** cache_converted_constant(THD *thd, Item **value, Item **cache, + const Type_handler *type); + inline bool is_owner_equal_func() + { + return (owner->type() == Item::FUNC_ITEM && + ((Item_func*)owner)->functype() == Item_func::EQUAL_FUNC); + } + const Type_handler *compare_type_handler() const { return m_compare_handler; } + Item_result compare_type() const { return m_compare_handler->cmp_type(); } + CHARSET_INFO *compare_collation() const { return m_compare_collation; } + Arg_comparator *subcomparators() const { return comparators; } + void cleanup() + { + delete [] comparators; + comparators= 0; + } + friend class Item_func; + friend class Item_bool_rowready_func2; +}; + + +class SEL_ARG; +struct KEY_PART; + +class Item_bool_func :public Item_int_func, + public Type_cmp_attributes +{ +protected: + /* + Build a SEL_TREE for a simple predicate + @param param PARAM from SQL_SELECT::test_quick_select + @param field field in the predicate + @param value constant in the predicate + @return Pointer to the tree built tree + */ + virtual SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) + { + DBUG_ENTER("Item_bool_func::get_func_mm_tree"); + DBUG_ASSERT(0); + DBUG_RETURN(0); + } + /* + Return the full select tree for "field_item" and "value": + - a single SEL_TREE if the field is not in a multiple equality, or + - a conjunction of all SEL_TREEs for all fields from + the same multiple equality with "field_item". + */ + SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param, + Item_field *field_item, Item *value); + /** + Test if "item" and "value" are suitable for the range optimization + and get their full select tree. + + "Suitable" means: + - "item" is a field or a field reference + - "value" is NULL (e.g. WHERE field IS NULL), or + "value" is an unexpensive item (e.g. WHERE field OP value) + + @param item - the argument that is checked to be a field + @param value - the other argument + @returns - NULL if the arguments are not suitable for the range optimizer. + @returns - the full select tree if the arguments are suitable. + */ + SEL_TREE *get_full_func_mm_tree_for_args(RANGE_OPT_PARAM *param, + Item *item, Item *value) + { + DBUG_ENTER("Item_bool_func::get_full_func_mm_tree_for_args"); + Item *field= item->real_item(); + if (field->type() == Item::FIELD_ITEM && !field->const_item() && + (!value || !value->is_expensive())) + DBUG_RETURN(get_full_func_mm_tree(param, (Item_field *) field, value)); + DBUG_RETURN(NULL); + } + SEL_TREE *get_mm_parts(RANGE_OPT_PARAM *param, Field *field, + Item_func::Functype type, Item *value); + SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *lt_value, Item *gt_value); + virtual SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, Field *field, + KEY_PART *key_part, + Item_func::Functype type, Item *value); + void raise_note_if_key_become_unused(THD *thd, const Item_args &old_args); +public: + Item_bool_func(THD *thd): Item_int_func(thd) {} + Item_bool_func(THD *thd, Item *a): Item_int_func(thd, a) {} + Item_bool_func(THD *thd, Item *a, Item *b): Item_int_func(thd, a, b) {} + Item_bool_func(THD *thd, Item *a, Item *b, Item *c): Item_int_func(thd, a, b, c) {} + Item_bool_func(THD *thd, List &list): Item_int_func(thd, list) { } + Item_bool_func(THD *thd, Item_bool_func *item) :Item_int_func(thd, item) {} + const Type_handler *type_handler() const override + { return &type_handler_bool; } + const Type_handler *fixed_type_handler() const override + { return &type_handler_bool; } + CHARSET_INFO *compare_collation() const override { return NULL; } + bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=1; return FALSE; } + decimal_digits_t decimal_precision() const override { return 1; } + bool need_parentheses_in_default() override { return true; } +}; + + +/** + Abstract Item class, to represent X IS [NOT] (TRUE | FALSE) + boolean predicates. +*/ + +class Item_func_truth : public Item_bool_func +{ +public: + bool val_bool() override; + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + enum precedence precedence() const override { return CMP_PRECEDENCE; } + +protected: + Item_func_truth(THD *thd, Item *a, bool a_value, bool a_affirmative): + Item_bool_func(thd, a), value(a_value), affirmative(a_affirmative) + {} + + ~Item_func_truth() = default; +private: + /** + True for X IS [NOT] TRUE, + false for X IS [NOT] FALSE predicates. + */ + const bool value; + /** + True for X IS Y, false for X IS NOT Y predicates. + */ + const bool affirmative; +}; + + +/** + This Item represents a X IS TRUE boolean predicate. +*/ + +class Item_func_istrue : public Item_func_truth +{ +public: + Item_func_istrue(THD *thd, Item *a): Item_func_truth(thd, a, true, true) {} + ~Item_func_istrue() = default; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("istrue") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + This Item represents a X IS NOT TRUE boolean predicate. +*/ + +class Item_func_isnottrue : public Item_func_truth +{ +public: + Item_func_isnottrue(THD *thd, Item *a): + Item_func_truth(thd, a, true, false) {} + ~Item_func_isnottrue() = default; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("isnottrue") }; + return name; + } + bool find_not_null_fields(table_map allowed) override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool eval_not_null_tables(void *) override + { not_null_tables_cache= 0; return false; } +}; + + +/** + This Item represents a X IS FALSE boolean predicate. +*/ + +class Item_func_isfalse : public Item_func_truth +{ +public: + Item_func_isfalse(THD *thd, Item *a): Item_func_truth(thd, a, false, true) {} + ~Item_func_isfalse() = default; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("isfalse") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + This Item represents a X IS NOT FALSE boolean predicate. +*/ + +class Item_func_isnotfalse : public Item_func_truth +{ +public: + Item_func_isnotfalse(THD *thd, Item *a): + Item_func_truth(thd, a, false, false) {} + ~Item_func_isnotfalse() = default; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("isnotfalse") }; + return name; + } + bool find_not_null_fields(table_map allowed) override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool eval_not_null_tables(void *) override + { not_null_tables_cache= 0; return false; } +}; + + +class Item_cache; +#define UNKNOWN (-1) + + +/* + Item_in_optimizer(left_expr, Item_in_subselect(...)) + + Item_in_optimizer is used to wrap an instance of Item_in_subselect. This + class does the following: + - Evaluate the left expression and store it in Item_cache_* object (to + avoid re-evaluating it many times during subquery execution) + - Shortcut the evaluation of "NULL IN (...)" to NULL in the cases where we + don't care if the result is NULL or FALSE. + + NOTE + It is not quite clear why the above listed functionality should be + placed into a separate class called 'Item_in_optimizer'. +*/ + +class Item_in_optimizer: public Item_bool_func +{ +protected: + Item_cache *cache; + Item *expr_cache; + /* + Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries: + UNKNOWN - "NULL in (SELECT ...)" has not yet been evaluated + FALSE - result is FALSE + TRUE - result is NULL + */ + int result_for_null_param; +public: + Item_in_optimizer(THD *thd, Item *a, Item *b): + Item_bool_func(thd, a, b), cache(0), expr_cache(0), + result_for_null_param(UNKNOWN) + { + with_flags|= item_with_t::SUBQUERY; + } + bool fix_fields(THD *, Item **) override; + bool fix_left(THD *thd); + table_map not_null_tables() const override { return 0; } + bool is_null() override; + longlong val_int() override; + void cleanup() override; + enum Functype functype() const override { return IN_OPTIMIZER_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("") }; + return name; + } + Item_cache **get_cache() { return &cache; } + Item *transform(THD *thd, Item_transformer transformer, uchar *arg) override; + Item *expr_cache_insert_transformer(THD *thd, uchar *unused) override; + bool is_expensive_processor(void *arg) override; + bool is_expensive() override; + void set_join_tab_idx(uint8 join_tab_idx_arg) override + { args[1]->set_join_tab_idx(join_tab_idx_arg); } + void get_cache_parameters(List ¶meters) override; + bool eval_not_null_tables(void *opt_arg) override; + bool find_not_null_fields(table_map allowed) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) override; + bool invisible_mode(); + void reset_cache() { cache= NULL; } + void print(String *str, enum_query_type query_type) override; + void restore_first_argument(); + Item* get_wrapped_in_subselect_item() + { return args[1]; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + enum precedence precedence() const override { return args[1]->precedence(); } +}; + + +/* + Functions and operators with two arguments that can use range optimizer. +*/ +class Item_bool_func2 :public Item_bool_func +{ /* Bool with 2 string args */ +protected: + void add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables, bool equal_func); +public: + Item_bool_func2(THD *thd, Item *a, Item *b): + Item_bool_func(thd, a, b) { } + + bool is_null() { return MY_TEST(args[0]->is_null() || args[1]->is_null()); } + COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level); + bool count_sargable_conds(void *arg); + /* + Specifies which result type the function uses to compare its arguments. + This method is used in equal field propagation. + */ + virtual const Type_handler *compare_type_handler() const + { + /* + Have STRING_RESULT by default, which means the function compares + val_str() results of the arguments. This is suitable for Item_func_like + and for Item_func_spatial_rel. + Note, Item_bool_rowready_func2 overrides this default behaviour. + */ + return &type_handler_varchar; + } + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) + { + DBUG_ENTER("Item_bool_func2::get_mm_tree"); + DBUG_ASSERT(arg_count == 2); + SEL_TREE *ftree= get_full_func_mm_tree_for_args(param, args[0], args[1]); + if (!ftree) + ftree= Item_func::get_mm_tree(param, cond_ptr); + DBUG_RETURN(ftree); + } +}; + + +/** + A class for functions and operators that can use the range optimizer and + have a reverse function/operator that can also use the range optimizer, + so this condition: + WHERE value OP field + can be optimized as equivalent to: + WHERE field REV_OP value + + This class covers: + - scalar comparison predicates: <, <=, =, <=>, >=, > + - MBR and precise spatial relation predicates (e.g. SP_TOUCHES(x,y)) + + For example: + WHERE 10 > field + can be optimized as: + WHERE field < 10 +*/ +class Item_bool_func2_with_rev :public Item_bool_func2 +{ +protected: + SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) + { + DBUG_ENTER("Item_bool_func2_with_rev::get_func_mm_tree"); + Item_func::Functype func_type= + (value != arguments()[0]) ? functype() : rev_functype(); + DBUG_RETURN(get_mm_parts(param, field, func_type, value)); + } +public: + Item_bool_func2_with_rev(THD *thd, Item *a, Item *b): + Item_bool_func2(thd, a, b) { } + virtual enum Functype rev_functype() const= 0; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) + { + DBUG_ENTER("Item_bool_func2_with_rev::get_mm_tree"); + DBUG_ASSERT(arg_count == 2); + SEL_TREE *ftree; + /* + Even if get_full_func_mm_tree_for_args(param, args[0], args[1]) will not + return a range predicate it may still be possible to create one + by reversing the order of the operands. Note that this only + applies to predicates where both operands are fields. Example: A + query of the form + + WHERE t1.a OP t2.b + + In this case, args[0] == t1.a and args[1] == t2.b. + When creating range predicates for t2, + get_full_func_mm_tree_for_args(param, args[0], args[1]) + will return NULL because 'field' belongs to t1 and only + predicates that applies to t2 are of interest. In this case a + call to get_full_func_mm_tree_for_args() with reversed operands + may succeed. + */ + if (!(ftree= get_full_func_mm_tree_for_args(param, args[0], args[1])) && + !(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0]))) + ftree= Item_func::get_mm_tree(param, cond_ptr); + DBUG_RETURN(ftree); + } +}; + + +class Item_bool_rowready_func2 :public Item_bool_func2_with_rev +{ +protected: + Arg_comparator cmp; + bool check_arguments() const override + { + return check_argument_types_like_args0(); + } +public: + Item_bool_rowready_func2(THD *thd, Item *a, Item *b): + Item_bool_func2_with_rev(thd, a, b), cmp(tmp_arg, tmp_arg + 1) + { } + Sql_mode_dependency value_depends_on_sql_mode() const override; + void print(String *str, enum_query_type query_type) override + { + Item_func::print_op(str, query_type); + } + enum precedence precedence() const override { return CMP_PRECEDENCE; } + Item *neg_transformer(THD *thd) override; + virtual Item *negated_item(THD *thd); + Item *propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + Item_args::propagate_equal_fields(thd, + Context(ANY_SUBST, + cmp.compare_type_handler(), + compare_collation()), + cond); + return this; + } + bool fix_length_and_dec(THD *thd) override; + int set_cmp_func(THD *thd) + { + return cmp.set_cmp_func(thd, this, tmp_arg, tmp_arg + 1, true); + } + CHARSET_INFO *compare_collation() const override + { return cmp.compare_collation(); } + const Type_handler *compare_type_handler() const override + { + return cmp.compare_type_handler(); + } + Arg_comparator *get_comparator() { return &cmp; } + void cleanup() override + { + Item_bool_func2::cleanup(); + cmp.cleanup(); + } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override + { + return add_key_fields_optimize_op(join, key_fields, and_level, + usable_tables, sargables, false); + } + Item *build_clone(THD *thd) override + { + Item_bool_rowready_func2 *clone= + (Item_bool_rowready_func2 *) Item_func::build_clone(thd); + if (clone) + { + clone->cmp.comparators= 0; + } + return clone; + } +}; + +/** + XOR inherits from Item_bool_func because it is not optimized yet. + Later, when XOR is optimized, it needs to inherit from + Item_cond instead. See WL#5800. +*/ +class Item_func_xor :public Item_bool_func +{ +public: + Item_func_xor(THD *thd, Item *i1, Item *i2): Item_bool_func(thd, i1, i2) {} + enum Functype functype() const override { return XOR_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("xor") }; + return name; + } + enum precedence precedence() const override { return XOR_PRECEDENCE; } + void print(String *str, enum_query_type query_type) override + { Item_func::print_op(str, query_type); } + longlong val_int() override; + bool find_not_null_fields(table_map allowed) override { return false; } + Item *neg_transformer(THD *thd) override; + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) override + { + Item_args::propagate_equal_fields(thd, Context_boolean(), cond); + return this; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_not :public Item_bool_func +{ +public: + Item_func_not(THD *thd, Item *a): Item_bool_func(thd, a) {} + longlong val_int() override; + enum Functype functype() const override { return NOT_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("not") }; + return name; + } + bool find_not_null_fields(table_map allowed) override { return false; } + enum precedence precedence() const override { return NEG_PRECEDENCE; } + Item *neg_transformer(THD *thd) override; + bool fix_fields(THD *, Item **) override; + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_maxmin_subselect; + +/* + trigcond(arg) ::= param? arg : TRUE + + The class Item_func_trig_cond is used for guarded predicates + which are employed only for internal purposes. + A guarded predicate is an object consisting of an a regular or + a guarded predicate P and a pointer to a boolean guard variable g. + A guarded predicate P/g is evaluated to true if the value of the + guard g is false, otherwise it is evaluated to the same value that + the predicate P: val(P/g)= g ? val(P):true. + Guarded predicates allow us to include predicates into a conjunction + conditionally. Currently they are utilized for pushed down predicates + in queries with outer join operations. + + In the future, probably, it makes sense to extend this class to + the objects consisting of three elements: a predicate P, a pointer + to a variable g and a firing value s with following evaluation + rule: val(P/g,s)= g==s? val(P) : true. It will allow us to build only + one item for the objects of the form P/g1/g2... + + Objects of this class are built only for query execution after + the execution plan has been already selected. That's why this + class needs only val_int out of generic methods. + + Current uses of Item_func_trig_cond objects: + - To wrap selection conditions when executing outer joins + - To wrap condition that is pushed down into subquery +*/ + +class Item_func_trig_cond: public Item_bool_func +{ + bool *trig_var; +public: + Item_func_trig_cond(THD *thd, Item *a, bool *f): Item_bool_func(thd, a) + { trig_var= f; } + longlong val_int() override { return *trig_var ? args[0]->val_int() : 1; } + enum Functype functype() const override { return TRIG_COND_FUNC; }; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("trigcond") }; + return name; + } + bool const_item() const override { return FALSE; } + bool *get_trig_var() { return trig_var; } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_not_all :public Item_func_not +{ + /* allow to check presence of values in max/min optimization */ + Item_sum_min_max *test_sum_item; + Item_maxmin_subselect *test_sub_item; + +public: + bool show; + + Item_func_not_all(THD *thd, Item *a): + Item_func_not(thd, a), test_sum_item(0), test_sub_item(0), show(0) + {} + table_map not_null_tables() const override { return 0; } + longlong val_int() override; + enum Functype functype() const override { return NOT_ALL_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("") }; + return name; + } + enum precedence precedence() const override + { return show ? Item_func::precedence() : args[0]->precedence(); } + bool fix_fields(THD *thd, Item **ref) override + { return Item_func::fix_fields(thd, ref);} + void print(String *str, enum_query_type query_type) override; + void set_sum_test(Item_sum_min_max *item) { test_sum_item= item; test_sub_item= 0; }; + void set_sub_test(Item_maxmin_subselect *item) { test_sub_item= item; test_sum_item= 0;}; + bool empty_underlying_subquery(); + Item *neg_transformer(THD *thd) override; +}; + + +class Item_func_nop_all :public Item_func_not_all +{ +public: + + Item_func_nop_all(THD *thd, Item *a): Item_func_not_all(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("") }; + return name; + } + Item *neg_transformer(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_eq :public Item_bool_rowready_func2 +{ +public: + Item_func_eq(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b), + in_equality_no(UINT_MAX) + {} + longlong val_int() override; + enum Functype functype() const override { return EQ_FUNC; } + enum Functype rev_functype() const override { return EQ_FUNC; } + cond_result eq_cmp_result() const override { return COND_TRUE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("=") }; + return name; + } + Item *negated_item(THD *thd) override; + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override; + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override + { + return add_key_fields_optimize_op(join, key_fields, and_level, + usable_tables, sargables, true); + } + bool check_equality(THD *thd, COND_EQUAL *cond, List *eq_list) override; + /* + - If this equality is created from the subquery's IN-equality: + number of the item it was created from, e.g. for + (a,b) IN (SELECT c,d ...) a=c will have in_equality_no=0, + and b=d will have in_equality_no=1. + - Otherwise, UINT_MAX + */ + uint in_equality_no; + uint exists2in_reserved_items() override { return 1; }; + friend class Arg_comparator; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_equal final :public Item_bool_rowready_func2 +{ +public: + Item_func_equal(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b) {} + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override; + table_map not_null_tables() const override { return 0; } + bool find_not_null_fields(table_map allowed) override { return false; } + enum Functype functype() const override { return EQUAL_FUNC; } + enum Functype rev_functype() const override { return EQUAL_FUNC; } + cond_result eq_cmp_result() const override { return COND_TRUE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("<=>") }; + return name; + } + Item *neg_transformer(THD *thd) override { return 0; } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override + { + return add_key_fields_optimize_op(join, key_fields, and_level, + usable_tables, sargables, true); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_ge :public Item_bool_rowready_func2 +{ +public: + Item_func_ge(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b) {}; + longlong val_int() override; + enum Functype functype() const override { return GE_FUNC; } + enum Functype rev_functype() const override { return LE_FUNC; } + cond_result eq_cmp_result() const override { return COND_TRUE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN(">=") }; + return name; + } + Item *negated_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_gt :public Item_bool_rowready_func2 +{ +public: + Item_func_gt(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b) {}; + longlong val_int() override; + enum Functype functype() const override { return GT_FUNC; } + enum Functype rev_functype() const override { return LT_FUNC; } + cond_result eq_cmp_result() const override { return COND_FALSE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN(">") }; + return name; + } + Item *negated_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_le :public Item_bool_rowready_func2 +{ +public: + Item_func_le(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b) {}; + longlong val_int() override; + enum Functype functype() const override { return LE_FUNC; } + enum Functype rev_functype() const override { return GE_FUNC; } + cond_result eq_cmp_result() const override { return COND_TRUE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("<=") }; + return name; + } + Item *negated_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_lt :public Item_bool_rowready_func2 +{ +public: + Item_func_lt(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b) {} + longlong val_int() override; + enum Functype functype() const override { return LT_FUNC; } + enum Functype rev_functype() const override { return GT_FUNC; } + cond_result eq_cmp_result() const override { return COND_FALSE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("<") }; + return name; + } + Item *negated_item(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_ne :public Item_bool_rowready_func2 +{ +protected: + SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) override; +public: + Item_func_ne(THD *thd, Item *a, Item *b): + Item_bool_rowready_func2(thd, a, b) {} + longlong val_int() override; + enum Functype functype() const override { return NE_FUNC; } + enum Functype rev_functype() const override { return NE_FUNC; } + cond_result eq_cmp_result() const override { return COND_FALSE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("<>") }; + return name; + } + Item *negated_item(THD *thd) override; + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, + table_map usable_tables, SARGABLE_PARAM **sargables) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + The class Item_func_opt_neg is defined to factor out the functionality + common for the classes Item_func_between and Item_func_in. The objects + of these classes can express predicates or there negations. + The alternative approach would be to create pairs Item_func_between, + Item_func_notbetween and Item_func_in, Item_func_notin. + +*/ + +class Item_func_opt_neg :public Item_bool_func +{ +protected: + /* + The data type handler that will be used for comparison. + Data type handlers of all arguments are mixed to here. + */ + Type_handler_hybrid_field_type m_comparator; + /* + The collation that will be used for comparison in case + when m_compare_type is STRING_RESULT. + */ + DTCollation cmp_collation; +public: + bool negated; /* <=> the item represents NOT */ +public: + Item_func_opt_neg(THD *thd, Item *a, Item *b, Item *c): + Item_bool_func(thd, a, b, c), negated(0) {} + Item_func_opt_neg(THD *thd, List &list): + Item_bool_func(thd, list), negated(0) {} +public: + Item *neg_transformer(THD *thd) override + { + negated= !negated; + return this; + } + bool eq(const Item *item, bool binary_cmp) const override; + CHARSET_INFO *compare_collation() const override + { + return cmp_collation.collation; + } + Item *propagate_equal_fields(THD *, const Context &, + COND_EQUAL *) override= 0; +}; + +class Item_func_between :public Item_func_opt_neg +{ +protected: + SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) override; + bool val_int_cmp_int_finalize(longlong value, longlong a, longlong b); +public: + String value0,value1,value2; + Item_func_between(THD *thd, Item *a, Item *b, Item *c): + Item_func_opt_neg(thd, a, b, c) { } + longlong val_int() override + { + DBUG_ASSERT(fixed()); + return m_comparator.type_handler()->Item_func_between_val_int(this); + } + enum Functype functype() const override { return BETWEEN; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("between") }; + return name; + } + enum precedence precedence() const override { return BETWEEN_PRECEDENCE; } + bool fix_length_and_dec(THD *thd) override; + bool fix_length_and_dec_string(THD *) + { + return agg_arg_charsets_for_comparison(cmp_collation, args, 3); + } + bool fix_length_and_dec_temporal(THD *); + bool fix_length_and_dec_numeric(THD *); + void print(String *str, enum_query_type query_type) override; + bool eval_not_null_tables(void *opt_arg) override; + bool find_not_null_fields(table_map allowed) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + bool count_sargable_conds(void *arg) override; + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override; + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + Item_args::propagate_equal_fields(thd, + Context(ANY_SUBST, + m_comparator.type_handler(), + compare_collation()), + cond); + return this; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + longlong val_int_cmp_string(); + longlong val_int_cmp_datetime(); + longlong val_int_cmp_time(); + longlong val_int_cmp_native(); + longlong val_int_cmp_int(); + longlong val_int_cmp_real(); + longlong val_int_cmp_decimal(); +}; + + +class Item_func_strcmp :public Item_long_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_str(0, 2); } + String value1, value2; + DTCollation cmp_collation; +public: + Item_func_strcmp(THD *thd, Item *a, Item *b): + Item_long_func(thd, a, b) {} + longlong val_int() override; + decimal_digits_t decimal_precision() const override { return 1; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("strcmp") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + if (agg_arg_charsets_for_comparison(cmp_collation, args, 2)) + return TRUE; + fix_char_length(2); // returns "1" or "0" or "-1" + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +struct interval_range +{ + Item_result type; + double dbl; + my_decimal dec; +}; + +class Item_func_interval :public Item_long_func +{ + Item_row *row; + bool use_decimal_comparison; + interval_range *intervals; + bool check_arguments() const override + { + return check_argument_types_like_args0(); + } +public: + Item_func_interval(THD *thd, Item_row *a): + Item_long_func(thd, a), row(a), intervals(0) + { } + bool fix_fields(THD *, Item **) override; + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("interval") }; + return name; + } + decimal_digits_t decimal_precision() const override { return 2; } + void print(String *str, enum_query_type query_type) override + { + str->append(func_name_cstring()); + print_args(str, 0, query_type); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_coalesce :public Item_func_case_expression +{ +public: + Item_func_coalesce(THD *thd, Item *a, Item *b): + Item_func_case_expression(thd, a, b) {} + Item_func_coalesce(THD *thd, List &list): + Item_func_case_expression(thd, list) {} + double real_op() override; + longlong int_op() override; + String *str_op(String *) override; + my_decimal *decimal_op(my_decimal *) override; + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + bool native_op(THD *thd, Native *to) override; + bool fix_length_and_dec(THD *thd) override + { + if (aggregate_for_result(func_name_cstring(), args, arg_count, true)) + return TRUE; + fix_attributes(args, arg_count); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("coalesce") }; + return name; + } + table_map not_null_tables() const override { return 0; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Case abbreviations that aggregate its result field type by two arguments: + IFNULL(arg1, arg2) + IF(switch, arg1, arg2) + NVL2(switch, arg1, arg2) +*/ +class Item_func_case_abbreviation2 :public Item_func_case_expression +{ +protected: + bool fix_length_and_dec2(Item **items) + { + if (aggregate_for_result(func_name_cstring(), items, 2, true)) + return TRUE; + fix_attributes(items, 2); + return FALSE; + } + + void cache_type_info(const Item *source, bool maybe_null_arg) + { + Type_std_attributes::set(source); + set_handler(source->type_handler()); + set_maybe_null(maybe_null_arg); + } + + bool fix_length_and_dec2_eliminate_null(Item **items) + { + // Let IF(cond, expr, NULL) and IF(cond, NULL, expr) inherit type from expr. + if (items[0]->type() == NULL_ITEM) + { + cache_type_info(items[1], true); + // If both arguments are NULL, make resulting type BINARY(0). + if (items[1]->type() == NULL_ITEM) + set_handler(&type_handler_string); + } + else if (items[1]->type() == NULL_ITEM) + { + cache_type_info(items[0], true); + } + else + { + if (fix_length_and_dec2(items)) + return TRUE; + } + return FALSE; + } + +public: + Item_func_case_abbreviation2(THD *thd, Item *a, Item *b): + Item_func_case_expression(thd, a, b) { } + Item_func_case_abbreviation2(THD *thd, Item *a, Item *b, Item *c): + Item_func_case_expression(thd, a, b, c) { } +}; + + +class Item_func_ifnull :public Item_func_case_abbreviation2 +{ +public: + Item_func_ifnull(THD *thd, Item *a, Item *b): + Item_func_case_abbreviation2(thd, a, b) {} + double real_op() override; + longlong int_op() override; + String *str_op(String *str) override; + my_decimal *decimal_op(my_decimal *) override; + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + bool native_op(THD *thd, Native *to) override; + bool fix_length_and_dec(THD *thd) override + { + /* + Set nullability from args[1] by default. + Note, some type handlers may reset maybe_null + in Item_hybrid_func_fix_attributes() if args[1] + is NOT NULL but cannot always be converted to + the data type of "this" safely. + E.g. Type_handler_inet6 does: + IFNULL(inet6_not_null_expr, 'foo') -> INET6 NULL + IFNULL(inet6_not_null_expr, '::1') -> INET6 NOT NULL + */ + copy_flags(args[1], item_base_t::MAYBE_NULL); + if (Item_func_case_abbreviation2::fix_length_and_dec2(args)) + return TRUE; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ifnull") }; + return name; + } + + table_map not_null_tables() const override { return 0; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + Case abbreviations that have a switch argument and + two return arguments to choose from. Returns the value + of either of the two return arguments depending on the switch argument value. + + IF(switch, arg1, arg2) + NVL(switch, arg1, arg2) +*/ +class Item_func_case_abbreviation2_switch: public Item_func_case_abbreviation2 +{ +protected: + virtual Item *find_item() const= 0; + +public: + Item_func_case_abbreviation2_switch(THD *thd, Item *a, Item *b, Item *c) + :Item_func_case_abbreviation2(thd, a, b, c) + { } + + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + Datetime_truncation_not_needed dt(thd, find_item(), fuzzydate); + return (null_value= dt.copy_to_mysql_time(ltime, mysql_timestamp_type())); + } + bool time_op(THD *thd, MYSQL_TIME *ltime) + { + return (null_value= Time(find_item()).copy_to_mysql_time(ltime)); + } + longlong int_op() + { + return val_int_from_item(find_item()); + } + double real_op() + { + return val_real_from_item(find_item()); + } + my_decimal *decimal_op(my_decimal *decimal_value) + { + return val_decimal_from_item(find_item(), decimal_value); + } + String *str_op(String *str) + { + return val_str_from_item(find_item(), str); + } + bool native_op(THD *thd, Native *to) + { + return val_native_with_conversion_from_item(thd, find_item(), to, + type_handler()); + } +}; + + +class Item_func_if :public Item_func_case_abbreviation2_switch +{ +protected: + Item *find_item() const override + { return args[0]->val_bool() ? args[1] : args[2]; } + +public: + Item_func_if(THD *thd, Item *a, Item *b, Item *c): + Item_func_case_abbreviation2_switch(thd, a, b, c) + {} + bool fix_fields(THD *, Item **) override; + bool fix_length_and_dec(THD *thd) override + { + return fix_length_and_dec2_eliminate_null(args + 1); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("if") }; + return name; + } + bool eval_not_null_tables(void *opt_arg) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +private: + void cache_type_info(Item *source); +}; + + +class Item_func_nvl2 :public Item_func_case_abbreviation2_switch +{ +protected: + Item *find_item() const override + { return args[0]->is_null() ? args[2] : args[1]; } + +public: + Item_func_nvl2(THD *thd, Item *a, Item *b, Item *c): + Item_func_case_abbreviation2_switch(thd, a, b, c) + {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("nvl2") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + return fix_length_and_dec2_eliminate_null(args + 1); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_nullif :public Item_func_case_expression +{ + Arg_comparator cmp; + /* + NULLIF(a,b) is a short for: + CASE WHEN a=b THEN NULL ELSE a END + + The left "a" is for comparison purposes. + The right "a" is for return value purposes. + These are two different "a" and they can be replaced to different items. + + The left "a" is in a comparison and can be replaced by: + - Item_func::convert_const_compared_to_int_field() + - agg_item_set_converter() in set_cmp_func() + - cache_converted_constant() in set_cmp_func() + + Both "a"s are subject to equal fields propagation and can be replaced by: + - Item_field::propagate_equal_fields(ANY_SUBST) for the left "a" + - Item_field::propagate_equal_fields(IDENTITY_SUBST) for the right "a" + */ + Item_cache *m_cache; + int compare(); + void reset_first_arg_if_needed() + { + if (arg_count == 3 && args[0] != args[2]) + args[0]= args[2]; + } + Item *m_arg0; +public: + /* + Here we pass three arguments to the parent constructor, as NULLIF + is a three-argument function, it needs two copies of the first argument + (see above). But fix_fields() will be confused if we try to prepare the + same Item twice (if args[0]==args[2]), so we hide the third argument + (decrementing arg_count) and copy args[2]=args[0] again after fix_fields(). + See also Item_func_nullif::fix_length_and_dec(). + */ + Item_func_nullif(THD *thd, Item *a, Item *b): + Item_func_case_expression(thd, a, b, a), + m_cache(NULL), + m_arg0(NULL) + { arg_count--; } + void cleanup() override + { + Item_func_hybrid_field_type::cleanup(); + arg_count= 2; // See the comment to the constructor + } + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + double real_op() override; + longlong int_op() override; + String *str_op(String *str) override; + my_decimal *decimal_op(my_decimal *) override; + bool native_op(THD *thd, Native *to) override; + bool fix_length_and_dec(THD *thd) override; + bool walk(Item_processor processor, bool walk_subquery, void *arg) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("nullif") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) override; + void update_used_tables() override; + table_map not_null_tables() const override { return 0; } + bool is_null() override; + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + Context cmpctx(ANY_SUBST, cmp.compare_type_handler(), + cmp.compare_collation()); + const Item *old0= args[0]; + args[0]->propagate_equal_fields_and_change_item_tree(thd, cmpctx, + cond, &args[0]); + args[1]->propagate_equal_fields_and_change_item_tree(thd, cmpctx, + cond, &args[1]); + /* + MDEV-9712 Performance degradation of nested NULLIF + ANY_SUBST is more relaxed than IDENTITY_SUBST. + If ANY_SUBST did not change args[0], + then we can skip propagation for args[2]. + */ + if (old0 != args[0]) + args[2]->propagate_equal_fields_and_change_item_tree(thd, + Context_identity(), + cond, &args[2]); + return this; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *derived_field_transformer_for_having(THD *thd, uchar *arg) override + { reset_first_arg_if_needed(); return this; } + Item *derived_field_transformer_for_where(THD *thd, uchar *arg) override + { reset_first_arg_if_needed(); return this; } + Item *grouping_field_transformer_for_where(THD *thd, uchar *arg) override + { reset_first_arg_if_needed(); return this; } + Item *in_subq_field_transformer_for_where(THD *thd, uchar *arg) override + { reset_first_arg_if_needed(); return this; } + Item *in_subq_field_transformer_for_having(THD *thd, uchar *arg) override + { reset_first_arg_if_needed(); return this; } +}; + + +/* Functions to handle the optimized IN */ + + +/* A vector of values of some type */ + +class in_vector :public Sql_alloc +{ +public: + char *base; + uint size; + qsort2_cmp compare; + CHARSET_INFO *collation; + uint count; + uint used_count; + in_vector() = default; + in_vector(THD *thd, uint elements, uint element_length, qsort2_cmp cmp_func, + CHARSET_INFO *cmp_coll) + :base((char*) thd_calloc(thd, elements * element_length)), + size(element_length), compare(cmp_func), collation(cmp_coll), + count(elements), used_count(elements) {} + virtual ~in_vector() = default; + /* + Store an Item value at the given position. + @returns false - the Item was not NULL, and the conversion from the + Item data type to the cmp_item data type went without + errors + @returns true - the Item was NULL, or data type conversion returned NULL + */ + virtual bool set(uint pos, Item *item)=0; + virtual uchar *get_value(Item *item)=0; + void sort() + { + my_qsort2(base,used_count,size,compare,(void*)collation); + } + bool find(Item *item); + + /* + Create an instance of Item_{type} (e.g. Item_decimal) constant object + which type allows it to hold an element of this vector without any + conversions. + The purpose of this function is to be able to get elements of this + vector in form of Item_xxx constants without creating Item_xxx object + for every array element you get (i.e. this implements "FlyWeight" pattern) + */ + virtual Item* create_item(THD *thd) { return NULL; } + + /* + Store the value at position #pos into provided item object + SYNOPSIS + value_to_item() + pos Index of value to store + item Constant item to store value into. The item must be of the same + type that create_item() returns. + */ + virtual void value_to_item(uint pos, Item *item) { } + + /* Compare values number pos1 and pos2 for equality */ + bool compare_elems(uint pos1, uint pos2) + { + return MY_TEST(compare(collation, base + pos1 * size, base + pos2 * size)); + } + virtual const Type_handler *type_handler() const= 0; +}; + +class in_string :public in_vector +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp; + class Item_string_for_in_vector: public Item_string + { + public: + Item_string_for_in_vector(THD *thd, CHARSET_INFO *cs): + Item_string(thd, cs) + { } + void set_value(const String *str) + { + str_value= *str; + collation.set(str->charset()); + } + }; +public: + in_string(THD *thd, uint elements, qsort2_cmp cmp_func, CHARSET_INFO *cs); + ~in_string(); + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + Item* create_item(THD *thd) override; + void value_to_item(uint pos, Item *item) override + { + String *str=((String*) base)+pos; + Item_string_for_in_vector *to= (Item_string_for_in_vector*) item; + to->set_value(str); + } + const Type_handler *type_handler() const override + { return &type_handler_varchar; } +}; + +class in_longlong :public in_vector +{ +protected: + /* + Here we declare a temporary variable (tmp) of the same type as the + elements of this vector. tmp is used in finding if a given value is in + the list. + */ + struct packed_longlong + { + longlong val; + longlong unsigned_flag; // Use longlong, not bool, to preserve alignment + } tmp; +public: + in_longlong(THD *thd, uint elements); + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + Item* create_item(THD *thd) override; + void value_to_item(uint pos, Item *item) override + { + ((Item_int*) item)->value= ((packed_longlong*) base)[pos].val; + ((Item_int*) item)->unsigned_flag= (bool) + ((packed_longlong*) base)[pos].unsigned_flag; + } + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + + friend int cmp_longlong(void *cmp_arg, packed_longlong *a,packed_longlong *b); +}; + + +class in_timestamp :public in_vector +{ + Timestamp_or_zero_datetime tmp; +public: + in_timestamp(THD *thd, uint elements); + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + Item* create_item(THD *thd) override; + void value_to_item(uint pos, Item *item) override; + const Type_handler *type_handler() const override + { return &type_handler_timestamp2; } +}; + + +/* + Class to represent a vector of constant DATE/DATETIME values. +*/ +class in_temporal :public in_longlong +{ +public: + /* Cache for the left item. */ + + in_temporal(THD *thd, uint elements) + :in_longlong(thd, elements) {}; + Item *create_item(THD *thd); + void value_to_item(uint pos, Item *item) + { + packed_longlong *val= reinterpret_cast(base)+pos; + Item_datetime *dt= static_cast(item); + dt->set(val->val, type_handler()->mysql_timestamp_type()); + } + friend int cmp_longlong(void *cmp_arg, packed_longlong *a,packed_longlong *b); +}; + + +class in_datetime :public in_temporal +{ +public: + in_datetime(THD *thd, uint elements) + :in_temporal(thd, elements) + {} + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + const Type_handler *type_handler() const override + { return &type_handler_datetime2; } +}; + + +class in_time :public in_temporal +{ +public: + in_time(THD *thd, uint elements) + :in_temporal(thd, elements) + {} + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + const Type_handler *type_handler() const override + { return &type_handler_time2; } +}; + + +class in_double :public in_vector +{ + double tmp; +public: + in_double(THD *thd, uint elements); + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + Item *create_item(THD *thd) override; + void value_to_item(uint pos, Item *item) override + { + ((Item_float*)item)->value= ((double*) base)[pos]; + } + const Type_handler *type_handler() const override + { return &type_handler_double; } +}; + + +class in_decimal :public in_vector +{ + my_decimal val; +public: + in_decimal(THD *thd, uint elements); + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + Item *create_item(THD *thd) override; + void value_to_item(uint pos, Item *item) override + { + my_decimal *dec= ((my_decimal *)base) + pos; + Item_decimal *item_dec= (Item_decimal*)item; + item_dec->set_decimal_value(dec); + } + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } +}; + + +/* +** Classes for easy comparing of non const items +*/ + +class cmp_item :public Sql_alloc +{ +public: + CHARSET_INFO *cmp_charset; + cmp_item() { cmp_charset= &my_charset_bin; } + virtual ~cmp_item() = default; + virtual void store_value(Item *item)= 0; + /** + @returns result (TRUE, FALSE or UNKNOWN) of + "stored argument's value <> item's value" + */ + virtual int cmp(Item *item)= 0; + virtual int cmp_not_null(const Value *value)= 0; + // for optimized IN with row + virtual int compare(cmp_item *item)= 0; + virtual cmp_item *make_same(THD *thd)= 0; + /* + Store a scalar or a ROW value into "this". + @returns false - the value (or every component in case of ROW) was + not NULL and the data type conversion went without errors. + @returns true - the value (or some of its components) was NULL, or the + data type conversion of a not-NULL value returned NULL. + */ + virtual bool store_value_by_template(THD *thd, cmp_item *tmpl, Item *item)=0; +}; + +/// cmp_item which stores a scalar (i.e. non-ROW). +class cmp_item_scalar : public cmp_item +{ +protected: + bool m_null_value; ///< If stored value is NULL + bool store_value_by_template(THD *thd, cmp_item *tmpl, Item *item) override + { + store_value(item); + return m_null_value; + } +}; + +class cmp_item_string : public cmp_item_scalar +{ +protected: + String *value_res; +public: + cmp_item_string () = default; + cmp_item_string (CHARSET_INFO *cs) { cmp_charset= cs; } + void set_charset(CHARSET_INFO *cs) { cmp_charset= cs; } + friend class cmp_item_sort_string; + friend class cmp_item_sort_string_in_static; +}; + +class cmp_item_sort_string :public cmp_item_string +{ +protected: + char value_buff[STRING_BUFFER_USUAL_SIZE]; + String value; +public: + cmp_item_sort_string(): + cmp_item_string() {} + cmp_item_sort_string(CHARSET_INFO *cs): + cmp_item_string(cs), + value(value_buff, sizeof(value_buff), cs) {} + void store_value(Item *item) + { + value_res= item->val_str(&value); + m_null_value= item->null_value; + // Make sure to cache the result String inside "value" + if (value_res && value_res != &value) + { + if (value.copy(*value_res)) + value.set("", 0, item->collation.collation); + value_res= &value; + } + } + int cmp_not_null(const Value *val) + { + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_string()); + return sortcmp(value_res, &val->m_string, cmp_charset) != 0; + } + int cmp(Item *arg) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), cmp_charset), *res= arg->val_str(&tmp); + if (m_null_value || arg->null_value) + return UNKNOWN; + if (value_res && res) + return sortcmp(value_res, res, cmp_charset) != 0; + else if (!value_res && !res) + return FALSE; + else + return TRUE; + } + int compare(cmp_item *ci) + { + cmp_item_string *l_cmp= (cmp_item_string *) ci; + return sortcmp(value_res, l_cmp->value_res, cmp_charset); + } + cmp_item *make_same(THD *thd); + void set_charset(CHARSET_INFO *cs) + { + cmp_charset= cs; + value.set_buffer_if_not_allocated(value_buff, sizeof(value_buff), cs); + } +}; + +class cmp_item_int : public cmp_item_scalar +{ + longlong value; +public: + cmp_item_int() = default; /* Remove gcc warning */ + void store_value(Item *item) + { + value= item->val_int(); + m_null_value= item->null_value; + } + int cmp_not_null(const Value *val) + { + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_longlong()); + return value != val->value.m_longlong; + } + int cmp(Item *arg) + { + const bool rc= value != arg->val_int(); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; + } + int compare(cmp_item *ci) + { + cmp_item_int *l_cmp= (cmp_item_int *)ci; + return (value < l_cmp->value) ? -1 : ((value == l_cmp->value) ? 0 : 1); + } + cmp_item *make_same(THD *thd); +}; + +/* + Compare items in the DATETIME context. +*/ +class cmp_item_temporal: public cmp_item_scalar +{ +protected: + longlong value; +public: + cmp_item_temporal() = default; + int compare(cmp_item *ci); +}; + + +class cmp_item_datetime: public cmp_item_temporal +{ +public: + cmp_item_datetime() + :cmp_item_temporal() + { } + void store_value(Item *item) + { + value= item->val_datetime_packed(current_thd); + m_null_value= item->null_value; + } + int cmp_not_null(const Value *val); + int cmp(Item *arg); + cmp_item *make_same(THD *thd); +}; + + +class cmp_item_time: public cmp_item_temporal +{ +public: + cmp_item_time() + :cmp_item_temporal() + { } + void store_value(Item *item) + { + value= item->val_time_packed(current_thd); + m_null_value= item->null_value; + } + int cmp_not_null(const Value *val); + int cmp(Item *arg); + cmp_item *make_same(THD *thd); +}; + + +class cmp_item_timestamp: public cmp_item_scalar +{ + Timestamp_or_zero_datetime_native m_native; +public: + cmp_item_timestamp() :cmp_item_scalar() { } + void store_value(Item *item); + int cmp_not_null(const Value *val); + int cmp(Item *arg); + int compare(cmp_item *ci); + cmp_item *make_same(THD *thd); +}; + + +class cmp_item_real : public cmp_item_scalar +{ + double value; +public: + cmp_item_real() = default; /* Remove gcc warning */ + void store_value(Item *item) + { + value= item->val_real(); + m_null_value= item->null_value; + } + int cmp_not_null(const Value *val) + { + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_double()); + return value != val->value.m_double; + } + int cmp(Item *arg) + { + const bool rc= value != arg->val_real(); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; + } + int compare(cmp_item *ci) + { + cmp_item_real *l_cmp= (cmp_item_real *) ci; + return (value < l_cmp->value)? -1 : ((value == l_cmp->value) ? 0 : 1); + } + cmp_item *make_same(THD *thd); +}; + + +class cmp_item_decimal : public cmp_item_scalar +{ + my_decimal value; +public: + cmp_item_decimal() = default; /* Remove gcc warning */ + void store_value(Item *item); + int cmp(Item *arg); + int cmp_not_null(const Value *val); + int compare(cmp_item *c); + cmp_item *make_same(THD *thd); +}; + + +/* + cmp_item for optimized IN with row (right part string, which never + be changed) +*/ + +class cmp_item_sort_string_in_static :public cmp_item_string +{ + protected: + String value; +public: + cmp_item_sort_string_in_static(CHARSET_INFO *cs): + cmp_item_string(cs) {} + void store_value(Item *item) + { + value_res= item->val_str(&value); + m_null_value= item->null_value; + } + int cmp_not_null(const Value *val) + { + DBUG_ASSERT(false); + return TRUE; + } + int cmp(Item *item) + { + // Should never be called + DBUG_ASSERT(false); + return TRUE; + } + int compare(cmp_item *ci) + { + cmp_item_string *l_cmp= (cmp_item_string *) ci; + return sortcmp(value_res, l_cmp->value_res, cmp_charset); + } + cmp_item *make_same(THD *thd) + { + return new cmp_item_sort_string_in_static(cmp_charset); + } +}; + + +/** + A helper class to handle situations when some item "pred" (the predicant) + is consequently compared to a list of other items value0..valueN (the values). + Currently used to handle: + - + pred IN (value0, value1, value2) + - + CASE pred WHEN value0 .. WHEN value1 .. WHEN value2 .. END + + Every pair {pred,valueN} can be compared by its own Type_handler. + Some pairs can use the same Type_handler. + In cases when all pairs use exactly the same Type_handler, + we say "all types are compatible". + + For example, for an expression + 1 IN (1, 1e0, 1.0, 2) + - pred is 1 + - value0 is 1 + - value1 is 1e0 + - value2 is 1.1 + - value3 is 2 + + Pairs (pred,valueN) are compared as follows: + N expr1 Type + - ----- ---- + 0 1 INT + 1 1e0 DOUBLE + 2 1.0 DECIMAL + 3 2 INT + + Types are not compatible in this example. + + During add_value() calls, each pair {pred,valueN} is analysed: + - If valueN is an explicit NULL, it can be ignored in the caller asks to do so + - If valueN is not an explicit NULL (or if the caller didn't ask to skip + NULLs), then the value add an element in the array m_comparators[]. + + Every element m_comparators[] stores the following information: + 1. m_arg_index - the position of the value expression in the original + argument array, e.g. in Item_func_in::args[] or Item_func_case::args[]. + + 2. m_handler - the pointer to the data type handler that the owner + will use to compare the pair {args[m_predicate_index],args[m_arg_index]}. + + 3. m_handler_index - the index of an m_comparators[] element corresponding + to the leftmost pair that uses exactly the same Type_handler for + comparison. m_handler_index helps to maintain unique data type handlers. + - m_comparators[i].m_handler_index==i means that this is the + leftmost pair that uses the Type_handler m_handler for comparision. + - If m_comparators[i].m_handlex_index!=i, it means that some earlier + element m_comparators[jm_cmp_item; + DBUG_ASSERT(in_item); + /* + If this is the leftmost pair that uses the data type handler + pointed by m_comparators[i].m_handler, then we need to cache + the predicant value representation used by this handler. + */ + if (m_comparators[i].m_handler_index == i) + in_item->store_value(args->arguments()[m_predicant_index]); + /* + If the predicant item has null_value==true then: + - In case of scalar expression we can returns UNKNOWN immediately. + No needs to check the result of the value item. + - In case of ROW, null_value==true means that *some* row elements + returned NULL, but *some* elements can still be non-NULL! + We need to get the result of the value item and test + if non-NULL elements in the predicant and the value produce + TRUE (not equal), or UNKNOWN. + */ + if (args->arguments()[m_predicant_index]->null_value && + m_comparators[i].m_handler != &type_handler_row) + return UNKNOWN; + return in_item->cmp(args->arguments()[m_comparators[i].m_arg_index]); + } + int cmp_args_nulls_equal(THD *thd, Item_args *args, uint i) + { + Predicant_to_value_comparator *cmp= + &m_comparators[m_comparators[i].m_handler_index]; + cmp_item *in_item= cmp->m_cmp_item; + DBUG_ASSERT(in_item); + Item *predicant= args->arguments()[m_predicant_index]; + Item *arg= args->arguments()[m_comparators[i].m_arg_index]; + ValueBuffer val; + if (m_comparators[i].m_handler_index == i) + in_item->store_value(predicant); + m_comparators[i].m_handler->Item_save_in_value(thd, arg, &val); + if (predicant->null_value && val.is_null()) + return FALSE; // Two nulls are equal + if (predicant->null_value || val.is_null()) + return UNKNOWN; + return in_item->cmp_not_null(&val); + } + /** + Predicant_to_value_comparator - a comparator for one pair (pred,valueN). + See comments above. + */ + struct Predicant_to_value_comparator + { + const Type_handler *m_handler; + cmp_item *m_cmp_item; + uint m_arg_index; + uint m_handler_index; + void cleanup() + { + if (m_cmp_item) + delete m_cmp_item; + memset(this, 0, sizeof(*this)); + } + }; + + Predicant_to_value_comparator *m_comparators; // The comparator array + uint m_comparator_count;// The number of elements in m_comparators[] + uint m_predicant_index; // The position of the predicant in its argument list, + // e.g. for Item_func_in m_predicant_index is 0, + // as predicant is stored in Item_func_in::args[0]. + // For Item_func_case m_predicant_index is + // set to Item_func_case::first_expr_num. + +public: + Predicant_to_list_comparator(THD *thd, uint nvalues) + :m_comparator_count(0), + m_predicant_index(0) + { + alloc_comparators(thd, nvalues); + } + + uint comparator_count() const { return m_comparator_count; } + const Type_handler *get_comparator_type_handler(uint i) const + { + DBUG_ASSERT(i < m_comparator_count); + return m_comparators[i].m_handler; + } + uint get_comparator_arg_index(uint i) const + { + DBUG_ASSERT(i < m_comparator_count); + return m_comparators[i].m_arg_index; + } + cmp_item *get_comparator_cmp_item(uint i) const + { + DBUG_ASSERT(i < m_comparator_count); + return m_comparators[i].m_cmp_item; + } + +#ifndef DBUG_OFF + void debug_print(THD *thd) + { + for (uint i= 0; i < m_comparator_count; i++) + { + DBUG_EXECUTE_IF("Predicant_to_list_comparator", + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: [%d] arg=%d handler=%d (%s)", i, + m_comparators[i].m_arg_index, + m_comparators[i].m_handler_index, + m_comparators[m_comparators[i].m_handler_index]. + m_handler->name().ptr());); + } + } +#endif + + void add_predicant(Item_args *args, uint predicant_index) + { + DBUG_ASSERT(m_comparator_count == 0); // Set in constructor + DBUG_ASSERT(m_predicant_index == 0); // Set in constructor + DBUG_ASSERT(predicant_index < args->argument_count()); + m_predicant_index= predicant_index; + } + /** + Add a new element into m_comparators[], using a {pred,valueN} pair. + + @param funcname - the name of the operation, for error reporting + @param args - the owner function's argument list + @param value_index - the value position in args + @retval true - could not add an element because of non-comparable + arguments (e.g. ROWs with size) + @retval false - a new element was successfully added. + */ + bool add_value(const LEX_CSTRING &funcname, Item_args *args, + uint value_index); + + /** + Add a new element into m_comparators[], ignoring explicit NULL values. + If the value appeared to be an explicit NULL, nulls_found[0] is set to true. + */ + bool add_value_skip_null(const LEX_CSTRING &funcname, + Item_args *args, uint value_index, + bool *nulls_found); + + /** + Signal "this" that there will be no new add_value*() calls, + so it can prepare its internal structures for comparison. + + @param [OUT] compatible - If all comparators are compatible, + their data type handler is returned here. + @param [OUT] unuque_cnt - The number of unique data type handlers found. + If the value returned in *unique_cnt is 0, + it means all values were explicit NULLs: + expr0 IN (NULL,NULL,..,NULL) + @param [OUT] found_type - The bit mask for all found cmp_type()'s. + */ + void all_values_added(Type_handler_hybrid_field_type *compatible, + uint *unique_cnt, uint *found_types) + { + detect_unique_handlers(compatible, unique_cnt, found_types); + } + /** + Creates cmp_item instances for all unique handlers and stores + them into m_comparators[].m_cmp_item, using the information previously + populated by add_predicant(), add_value() and detect_unque_handlers(). + */ + bool make_unique_cmp_items(THD *thd, CHARSET_INFO *cs); + void cleanup() + { + DBUG_ASSERT(m_comparators); + for (uint i= 0; i < m_comparator_count; i++) + m_comparators[i].cleanup(); + memset(m_comparators, 0, sizeof(m_comparators[0]) * m_comparator_count); + m_comparator_count= 0; + m_predicant_index= 0; + } + bool init_clone(THD *thd, uint nvalues) + { + m_comparator_count= 0; + m_predicant_index= 0; + return alloc_comparators(thd, nvalues); + } + /** + @param [IN] args - The argument list that was previously used with + add_predicant() and add_value(). + @param [OUT] idx - In case if a value that is equal to the predicant + was found, the index of the matching value is returned + here. Otherwise, *idx is not changed. + @param [IN/OUT] found_unknown_values - how to handle UNKNOWN results. + If found_unknown_values is NULL (e.g. Item_func_case), + cmp() returns immediately when the first UNKNOWN + result is found. + If found_unknown_values is non-NULL (Item_func_in), + cmp() does not return when an UNKNOWN result is found, + sets *found_unknown_values to true, and continues + to compare the remaining pairs to find FALSE + (i.e. the value that is equal to the predicant). + + @retval false - Found a value that is equal to the predicant + @retval true - Didn't find an equal value + */ + bool cmp(Item_args *args, uint *idx, bool *found_unknown_values) + { + for (uint i= 0 ; i < m_comparator_count ; i++) + { + DBUG_ASSERT(m_comparators[i].m_handler != NULL); + const int rc= cmp_arg(args, i); + if (rc == FALSE) + { + *idx= m_comparators[i].m_arg_index; + return false; // Found a matching value + } + if (rc == UNKNOWN) + { + if (!found_unknown_values) + return true; + *found_unknown_values= true; + } + } + return true; // Not found + } + /* + Same as above, but treats two NULLs as equal, e.g. as in DECODE_ORACLE(). + */ + bool cmp_nulls_equal(THD *thd, Item_args *args, uint *idx) + { + for (uint i= 0 ; i < m_comparator_count ; i++) + { + DBUG_ASSERT(m_comparators[i].m_handler != NULL); + if (cmp_args_nulls_equal(thd, args, i) == FALSE) + { + *idx= m_comparators[i].m_arg_index; + return false; // Found a matching value + } + } + return true; // Not found + } +}; + + +/* + The class Item_func_case is the CASE ... WHEN ... THEN ... END function + implementation. +*/ + +class Item_func_case :public Item_func_case_expression +{ +protected: + String tmp_value; + DTCollation cmp_collation; + bool aggregate_then_and_else_arguments(THD *thd, uint count); + virtual Item **else_expr_addr() const= 0; + virtual Item *find_item()= 0; + inline void print_when_then_arguments(String *str, + enum_query_type query_type, + Item **items, uint count); + inline void print_else_argument(String *str, enum_query_type query_type, + Item *item); + void reorder_args(uint start); +public: + Item_func_case(THD *thd, List &list) + :Item_func_case_expression(thd, list) + { } + double real_op() override; + longlong int_op() override; + String *str_op(String *) override; + my_decimal *decimal_op(my_decimal *) override; + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + bool native_op(THD *thd, Native *to) override; + bool fix_fields(THD *thd, Item **ref) override; + table_map not_null_tables() const override { return 0; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("case") }; + return name; + } + CHARSET_INFO *compare_collation() const { return cmp_collation.collation; } + bool need_parentheses_in_default() override { return true; } +}; + + +/* + CASE WHEN cond THEN res [WHEN cond THEN res...] [ELSE res] END + + Searched CASE checks all WHEN expressions one after another. + When some WHEN expression evaluated to TRUE then the + value of the corresponding THEN expression is returned. +*/ +class Item_func_case_searched: public Item_func_case +{ + uint when_count() const { return arg_count / 2; } + bool with_else() const { return arg_count % 2; } + Item **else_expr_addr() const override + { return with_else() ? &args[arg_count - 1] : 0; } +public: + Item_func_case_searched(THD *thd, List &list) + :Item_func_case(thd, list) + { + DBUG_ASSERT(arg_count >= 2); + reorder_args(0); + } + enum Functype functype() const override { return CASE_SEARCHED_FUNC; } + void print(String *str, enum_query_type query_type) override; + bool fix_length_and_dec(THD *thd) override; + Item *propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + // None of the arguments are in a comparison context + Item_args::propagate_equal_fields(thd, Context_identity(), cond); + return this; + } + Item *find_item() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + CASE pred WHEN value THEN res [WHEN value THEN res...] [ELSE res] END + + When the predicant expression is specified then it is compared to each WHEN + expression individually. When an equal WHEN expression is found + the corresponding THEN expression is returned. + In order to do correct comparisons several comparators are used. One for + each result type. Different result types that are used in particular + CASE ... END expression are collected in the fix_length_and_dec() member + function and only comparators for there result types are used. +*/ +class Item_func_case_simple: public Item_func_case, + public Predicant_to_list_comparator +{ +protected: + uint m_found_types; + uint when_count() const { return (arg_count - 1) / 2; } + bool with_else() const { return arg_count % 2 == 0; } + Item **else_expr_addr() const override + { return with_else() ? &args[arg_count - 1] : 0; } + bool aggregate_switch_and_when_arguments(THD *thd, bool nulls_equal); + bool prepare_predicant_and_values(THD *thd, uint *found_types, + bool nulls_equal); +public: + Item_func_case_simple(THD *thd, List &list) + :Item_func_case(thd, list), + Predicant_to_list_comparator(thd, arg_count), + m_found_types(0) + { + DBUG_ASSERT(arg_count >= 3); + reorder_args(1); + } + void cleanup() override + { + DBUG_ENTER("Item_func_case_simple::cleanup"); + Item_func::cleanup(); + Predicant_to_list_comparator::cleanup(); + DBUG_VOID_RETURN; + } + enum Functype functype() const override { return CASE_SIMPLE_FUNC; } + void print(String *str, enum_query_type query_type) override; + bool fix_length_and_dec(THD *thd) override; + Item *propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override; + Item *find_item() override; + Item *build_clone(THD *thd) override + { + Item_func_case_simple *clone= (Item_func_case_simple *) + Item_func_case::build_clone(thd); + uint ncases= when_count(); + if (clone && clone->Predicant_to_list_comparator::init_clone(thd, ncases)) + return NULL; + return clone; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_decode_oracle: public Item_func_case_simple +{ +public: + Item_func_decode_oracle(THD *thd, List &list) + :Item_func_case_simple(thd, list) + { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("decode_oracle") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + bool fix_length_and_dec(THD *thd) override; + Item *find_item() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + The Item_func_in class implements + in_expr IN () + and + in_expr NOT IN () + + The current implementation distinguishes 2 cases: + 1) all items in are constants and have the same + result type. This case is handled by in_vector class, + implementing fast bisection search. + 2) otherwise Item_func_in employs several cmp_item objects to perform + comparisons of in_expr and an item from . One cmp_item + object for each result type. Different result types are collected in the + fix_length_and_dec() member function by means of collect_cmp_types() + function. + + Bisection is possible when: + 1. All types are similar + 2. All expressions in are const + In the presence of NULLs, the correct result of evaluating this item + must be UNKNOWN or FALSE. To achieve that: + - If type is scalar, we can use bisection and the "have_null" boolean. + - If type is ROW, we will need to scan all of when + searching, so bisection is impossible. Unless: + 3. UNKNOWN and FALSE are equivalent results + 4. Neither left expression nor contain any NULL value +*/ +class Item_func_in :public Item_func_opt_neg, + public Predicant_to_list_comparator +{ + /** + Usable if is made only of constants. Returns true if one + of these constants contains a NULL. Example: + IN ( (-5, (12,NULL)), ... ). + */ + bool list_contains_null(); + bool all_items_are_consts(Item **items, uint nitems) const + { + for (uint i= 0; i < nitems; i++) + { + if (!items[i]->can_eval_in_optimize()) + return false; + } + return true; + } + bool prepare_predicant_and_values(THD *thd, uint *found_types); + bool check_arguments() const override + { + return check_argument_types_like_args0(); + } +protected: + SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) override; + bool transform_into_subq; + bool transform_into_subq_checked; +public: + /// An array of values, created when the bisection lookup method is used + in_vector *array; + /** + If there is some NULL among , during a val_int() call; for + example + IN ( (1,(3,'col')), ... ), where 'col' is a column which evaluates to + NULL. + */ + bool have_null; + /** + true when all arguments of the IN list are of compatible types + and can be used safely as comparisons for key conditions + */ + bool arg_types_compatible; + + TABLE_LIST *emb_on_expr_nest; + + Item_func_in(THD *thd, List &list): + Item_func_opt_neg(thd, list), + Predicant_to_list_comparator(thd, arg_count - 1), + transform_into_subq(false), + transform_into_subq_checked(false), + array(0), have_null(0), + arg_types_compatible(FALSE), emb_on_expr_nest(0) + { } + longlong val_int() override; + bool fix_fields(THD *, Item **) override; + bool fix_length_and_dec(THD *thd) override; + bool compatible_types_scalar_bisection_possible() + { + DBUG_ASSERT(m_comparator.cmp_type() != ROW_RESULT); + return all_items_are_consts(args + 1, arg_count - 1); // Bisection #2 + } + bool compatible_types_row_bisection_possible() + { + DBUG_ASSERT(m_comparator.cmp_type() == ROW_RESULT); + return all_items_are_consts(args + 1, arg_count - 1) && // Bisection #2 + ((is_top_level_item() && !negated) || // Bisection #3 + (!list_contains_null() && !args[0]->maybe_null())); // Bisection #4 + } + bool agg_all_arg_charsets_for_comparison() + { + return agg_arg_charsets_for_comparison(cmp_collation, args, arg_count); + } + void fix_in_vector(); + bool value_list_convert_const_to_int(THD *thd); + bool fix_for_scalar_comparison_using_bisection(THD *thd) + { + array= m_comparator.type_handler()->make_in_vector(thd, this, arg_count - 1); + if (!array) // OOM + return true; + fix_in_vector(); + return false; + } + bool fix_for_scalar_comparison_using_cmp_items(THD *thd, uint found_types); + + bool fix_for_row_comparison_using_cmp_items(THD *thd); + bool fix_for_row_comparison_using_bisection(THD *thd); + + void cleanup() override + { + DBUG_ENTER("Item_func_in::cleanup"); + Item_int_func::cleanup(); + delete array; + array= 0; + Predicant_to_list_comparator::cleanup(); + DBUG_VOID_RETURN; + } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, + table_map usable_tables, SARGABLE_PARAM **sargables) + override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override; + SEL_TREE *get_func_row_mm_tree(RANGE_OPT_PARAM *param, Item_row *key_row); + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + /* + Note, we pass ANY_SUBST, this makes sure that non of the args + will be replaced to a zero-filled Item_string. + Such a change would require rebuilding of cmp_items. + */ + if (arg_types_compatible) + { + Context cmpctx(ANY_SUBST, m_comparator.type_handler(), + Item_func_in::compare_collation()); + args[0]->propagate_equal_fields_and_change_item_tree(thd, cmpctx, + cond, &args[0]); + } + for (uint i= 0; i < comparator_count(); i++) + { + Context cmpctx(ANY_SUBST, get_comparator_type_handler(i), + Item_func_in::compare_collation()); + uint idx= get_comparator_arg_index(i); + args[idx]->propagate_equal_fields_and_change_item_tree(thd, cmpctx, + cond, &args[idx]); + } + return this; + } + void print(String *str, enum_query_type query_type) override; + enum Functype functype() const override { return IN_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("in") }; + return name; + } + enum precedence precedence() const override { return IN_PRECEDENCE; } + bool eval_not_null_tables(void *opt_arg) override; + bool find_not_null_fields(table_map allowed) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + bool count_sargable_conds(void *arg) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *build_clone(THD *thd) override + { + Item_func_in *clone= (Item_func_in *) Item_func::build_clone(thd); + if (clone) + { + clone->array= 0; + if (clone->Predicant_to_list_comparator::init_clone(thd, arg_count - 1)) + return NULL; + } + return clone; + } + void mark_as_condition_AND_part(TABLE_LIST *embedding) override; + bool to_be_transformed_into_in_subq(THD *thd); + bool create_value_list_for_tvc(THD *thd, List< List > *values); + Item *in_predicate_to_in_subs_transformer(THD *thd, uchar *arg) override; + Item *in_predicate_to_equality_transformer(THD *thd, uchar *arg) override; + uint32 max_length_of_left_expr(); +}; + +class cmp_item_row :public cmp_item +{ + cmp_item **comparators; + uint n; + bool alloc_comparators(THD *thd, uint n); + bool aggregate_row_elements_for_comparison(THD *thd, + Type_handler_hybrid_field_type *cmp, + Item_args *tmp, + const LEX_CSTRING &funcname, + uint col, + uint level); +public: + cmp_item_row(): comparators(0), n(0) {} + ~cmp_item_row(); + void store_value(Item *item) override; + bool prepare_comparators(THD *, const LEX_CSTRING &funcname, + const Item_args *args, uint level); + int cmp(Item *arg) override; + int cmp_not_null(const Value *val) override + { + DBUG_ASSERT(false); + return TRUE; + } + int compare(cmp_item *arg) override; + cmp_item *make_same(THD *thd) override; + bool store_value_by_template(THD *thd, cmp_item *tmpl, Item *) override; + friend class Item_func_in; + cmp_item *get_comparator(uint i) { return comparators[i]; } +}; + + +class in_row :public in_vector +{ + cmp_item_row tmp; +public: + in_row(THD *thd, uint elements, Item *); + ~in_row(); + bool set(uint pos, Item *item) override; + uchar *get_value(Item *item) override; + friend class Item_func_in; + const Type_handler *type_handler() const override { return &type_handler_row; } + cmp_item *get_cmp_item() { return &tmp; } +}; + +/* Functions used by where clause */ +class Item_func_null_predicate :public Item_bool_func +{ +protected: + SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) override + { + DBUG_ENTER("Item_func_null_predicate::get_func_mm_tree"); + DBUG_RETURN(get_mm_parts(param, field, functype(), value)); + } + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, Field *field, + KEY_PART *key_part, + Item_func::Functype type, Item *value) override; +public: + Item_func_null_predicate(THD *thd, Item *a): Item_bool_func(thd, a) { } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, + table_map usable_tables, SARGABLE_PARAM **sargables) + override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override + { + DBUG_ENTER("Item_func_null_predicate::get_mm_tree"); + SEL_TREE *ftree= get_full_func_mm_tree_for_args(param, args[0], NULL); + if (!ftree) + ftree= Item_func::get_mm_tree(param, cond_ptr); + DBUG_RETURN(ftree); + } + CHARSET_INFO *compare_collation() const override + { return args[0]->collation.collation; } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=1; + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + } + bool count_sargable_conds(void *arg) override; +}; + + +class Item_func_isnull :public Item_func_null_predicate +{ +public: + Item_func_isnull(THD *thd, Item *a): Item_func_null_predicate(thd, a) {} + longlong val_int() override; + enum Functype functype() const override { return ISNULL_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("isnull") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + enum precedence precedence() const override { return CMP_PRECEDENCE; } + + bool arg_is_datetime_notnull_field() + { + Item **args= arguments(); + if (args[0]->real_item()->type() == Item::FIELD_ITEM) + { + Field *field=((Item_field*) args[0]->real_item())->field; + + if ((field->flags & NOT_NULL_FLAG) && + field->type_handler()->cond_notnull_field_isnull_to_field_eq_zero()) + return true; + } + return false; + } + + /* Optimize case of not_null_column IS NULL */ + void update_used_tables() override + { + if (!args[0]->maybe_null() && !arg_is_datetime_notnull_field()) + { + used_tables_cache= 0; /* is always false */ + const_item_cache= 1; + } + else + { + args[0]->update_used_tables(); + used_tables_cache= args[0]->used_tables(); + const_item_cache= args[0]->const_item(); + } + } + COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level) override; + table_map not_null_tables() const override { return 0; } + bool find_not_null_fields(table_map allowed) override; + Item *neg_transformer(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* Functions used by HAVING for rewriting IN subquery */ + +class Item_in_subselect; + +/* + This is like IS NOT NULL but it also remembers if it ever has + encountered a NULL. +*/ +class Item_is_not_null_test :public Item_func_isnull +{ + Item_in_subselect* owner; +public: + Item_is_not_null_test(THD *thd, Item_in_subselect* ow, Item *a): + Item_func_isnull(thd, a), owner(ow) + {} + enum Functype functype() const override { return ISNOTNULLTEST_FUNC; } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("") }; + return name; + } + void update_used_tables() override; + /* + we add RAND_TABLE_BIT to prevent moving this item from HAVING to WHERE + */ + table_map used_tables() const override + { return used_tables_cache | RAND_TABLE_BIT; } + bool const_item() const override { return FALSE; } +}; + + +class Item_func_isnotnull :public Item_func_null_predicate +{ +public: + Item_func_isnotnull(THD *thd, Item *a): + Item_func_null_predicate(thd, a) {} + longlong val_int() override; + enum Functype functype() const override { return ISNOTNULL_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("isnotnull") }; + return name; + } + enum precedence precedence() const override { return CMP_PRECEDENCE; } + table_map not_null_tables() const override + { return is_top_level_item() ? not_null_tables_cache : 0; } + Item *neg_transformer(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_like :public Item_bool_func2 +{ + // Turbo Boyer-Moore data + bool canDoTurboBM; // pattern is '%abcd%' case + const char* pattern; + int pattern_len; + + // TurboBM buffers, *this is owner + int* bmGs; // good suffix shift table, size is pattern_len + 1 + int* bmBc; // bad character shift table, size is alphabet_size + + void turboBM_compute_suffixes(int* suff); + void turboBM_compute_good_suffix_shifts(int* suff); + void turboBM_compute_bad_character_shifts(); + bool turboBM_matches(const char* text, int text_len) const; + enum { alphabet_size = 256 }; + + Item *escape_item; + + bool escape_used_in_parsing; + bool use_sampling; + + DTCollation cmp_collation; + String cmp_value1, cmp_value2; + bool with_sargable_pattern() const; +protected: + SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) override + { + DBUG_ENTER("Item_func_like::get_func_mm_tree"); + DBUG_RETURN(get_mm_parts(param, field, LIKE_FUNC, value)); + } + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, Field *field, + KEY_PART *key_part, + Item_func::Functype type, Item *value) override; +public: + int escape; + bool negated; + + Item_func_like(THD *thd, Item *a, Item *b, Item *escape_arg, bool escape_used): + Item_bool_func2(thd, a, b), canDoTurboBM(FALSE), pattern(0), pattern_len(0), + bmGs(0), bmBc(0), escape_item(escape_arg), + escape_used_in_parsing(escape_used), use_sampling(0), negated(0) {} + + bool get_negated() const { return negated; } // Used by ColumnStore + + Sql_mode_dependency value_depends_on_sql_mode() const override; + longlong val_int() override; + enum Functype functype() const override { return LIKE_FUNC; } + void print(String *str, enum_query_type query_type) override; + CHARSET_INFO *compare_collation() const override + { return cmp_collation.collation; } + cond_result eq_cmp_result() const override + { + /** + We cannot always rewrite conditions as follows: + from: WHERE expr1=const AND expr1 LIKE expr2 + to: WHERE expr1=const AND const LIKE expr2 + or + from: WHERE expr1=const AND expr2 LIKE expr1 + to: WHERE expr1=const AND expr2 LIKE const + + because LIKE works differently comparing to the regular "=" operator: + + 1. LIKE performs a stricter one-character-to-one-character comparison + and does not recognize contractions and expansions. + Replacing "expr1" to "const in LIKE would make the condition + stricter in case of a complex collation. + + 2. LIKE does not ignore trailing spaces and thus works differently + from the "=" operator in case of "PAD SPACE" collations + (which are the majority in MariaDB). So, for "PAD SPACE" collations: + + - expr1=const - ignores trailing spaces + - const LIKE expr2 - does not ignore trailing spaces + - expr2 LIKE const - does not ignore trailing spaces + + Allow only "binary" for now. + It neither ignores trailing spaces nor has contractions/expansions. + + TODO: + We could still replace "expr1" to "const" in "expr1 LIKE expr2" + in case of a "PAD SPACE" collation, but only if "expr2" has '%' + at the end. + */ + return compare_collation() == &my_charset_bin ? COND_TRUE : COND_OK; + } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, + table_map usable_tables, SARGABLE_PARAM **sargables) + override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override; + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + /* + LIKE differs from the regular comparison operator ('=') in the following: + - LIKE never ignores trailing spaces (even for PAD SPACE collations) + Propagation of equal fields with a PAD SPACE collation into LIKE + is not safe. + Example: + WHERE a='a ' AND a LIKE 'a' - returns true for 'a' + cannot be rewritten to: + WHERE a='a ' AND 'a ' LIKE 'a' - returns false for 'a' + Note, binary collations in MySQL/MariaDB, e.g. latin1_bin, + still have the PAD SPACE attribute and ignore trailing spaces! + - LIKE does not take into account contractions, expansions, + and ignorable characters. + Propagation of equal fields with contractions/expansions/ignorables + is also not safe. + + It's safe to propagate my_charset_bin (BINARY/VARBINARY/BLOB) values, + because they do not ignore trailing spaces and have one-to-one mapping + between a string and its weights. + The below condition should be true only for my_charset_bin + (as of version 10.1.7). + */ + uint flags= Item_func_like::compare_collation()->state; + if ((flags & MY_CS_NOPAD) && !(flags & MY_CS_NON1TO1)) + Item_args::propagate_equal_fields(thd, + Context(ANY_SUBST, + &type_handler_long_blob, + compare_collation()), + cond); + return this; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("like") }; + return name; + } + enum precedence precedence() const override { return IN_PRECEDENCE; } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= 1; + Item_args old_predicant(args[0]); + if (agg_arg_charsets_for_comparison(cmp_collation, args, 2)) + return true; + raise_note_if_key_become_unused(current_thd, old_predicant); + return false; + } + void cleanup() override; + + Item *neg_transformer(THD *thd) override + { + negated= !negated; + return this; + } + + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + return (walk_args(processor, walk_subquery, arg) || + escape_item->walk(processor, walk_subquery, arg) || + (this->*processor)(arg)); + } + + bool find_selective_predicates_list_processor(void *arg) override; + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +typedef struct pcre2_real_code_8 pcre2_code; +typedef struct pcre2_real_match_data_8 pcre2_match_data; +#define PCRE2_SIZE size_t +class Regexp_processor_pcre +{ + pcre2_code *m_pcre; + pcre2_match_data *m_pcre_match_data; + bool m_conversion_is_needed; + bool m_is_const; + int m_library_flags; + CHARSET_INFO *m_library_charset; + String m_prev_pattern; + int m_pcre_exec_rc; + PCRE2_SIZE *m_SubStrVec; + void pcre_exec_warn(int rc) const; + int pcre_exec_with_warn(const pcre2_code *code, + pcre2_match_data *data, + const char *subject, int length, int startoffset, + int options); +public: + String *convert_if_needed(String *src, String *converter); + String subject_converter; + String pattern_converter; + String replace_converter; + Regexp_processor_pcre() : + m_pcre(NULL), m_pcre_match_data(NULL), + m_conversion_is_needed(true), m_is_const(0), + m_library_flags(0), + m_library_charset(&my_charset_utf8mb3_general_ci) + {} + int default_regex_flags(); + void init(CHARSET_INFO *data_charset, int extra_flags); + void fix_owner(Item_func *owner, Item *subject_arg, Item *pattern_arg); + bool compile(String *pattern, bool send_error); + bool compile(Item *item, bool send_error); + bool recompile(Item *item) + { + return !m_is_const && compile(item, false); + } + bool exec(const char *str, size_t length, size_t offset); + bool exec(String *str, int offset, uint n_result_offsets_to_convert); + bool exec(Item *item, int offset, uint n_result_offsets_to_convert); + bool match() const { return m_pcre_exec_rc < 0 ? 0 : 1; } + int nsubpatterns() const { return m_pcre_exec_rc <= 0 ? 0 : m_pcre_exec_rc; } + size_t subpattern_start(int n) const + { + return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2]; + } + size_t subpattern_end(int n) const + { + return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2 + 1]; + } + size_t subpattern_length(int n) const + { + return subpattern_end(n) - subpattern_start(n); + } + void reset() + { + m_pcre= NULL; + m_pcre_match_data= NULL; + m_prev_pattern.length(0); + } + void cleanup(); + bool is_compiled() const { return m_pcre != NULL; } + bool is_const() const { return m_is_const; } + void set_const(bool arg) { m_is_const= arg; } + CHARSET_INFO * library_charset() const { return m_library_charset; } +}; + + +class Item_func_regex :public Item_bool_func +{ + Regexp_processor_pcre re; + DTCollation cmp_collation; +public: + Item_func_regex(THD *thd, Item *a, Item *b): Item_bool_func(thd, a, b) + {} + void cleanup() override + { + DBUG_ENTER("Item_func_regex::cleanup"); + Item_bool_func::cleanup(); + re.cleanup(); + DBUG_VOID_RETURN; + } + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("regexp") }; + return name; + } + enum precedence precedence() const override { return IN_PRECEDENCE; } + Item *get_copy(THD *) override { return 0; } + void print(String *str, enum_query_type query_type) override + { + print_op(str, query_type); + } + + CHARSET_INFO *compare_collation() const override + { return cmp_collation.collation; } +}; + + +/* + In the corner case REGEXP_INSTR could return (2^32 + 1), + which would not fit into Item_long_func range. + But string lengths are limited with max_allowed_packet, + which cannot be bigger than 1024*1024*1024. +*/ +class Item_func_regexp_instr :public Item_long_func +{ + bool check_arguments() const override + { + return (args[0]->check_type_can_return_str(func_name_cstring()) || + args[1]->check_type_can_return_text(func_name_cstring())); + } + Regexp_processor_pcre re; + DTCollation cmp_collation; +public: + Item_func_regexp_instr(THD *thd, Item *a, Item *b) + :Item_long_func(thd, a, b) + {} + void cleanup() override + { + DBUG_ENTER("Item_func_regexp_instr::cleanup"); + Item_int_func::cleanup(); + re.cleanup(); + DBUG_VOID_RETURN; + } + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("regexp_instr") }; + return name; + } + Item *get_copy(THD *thd) override { return 0; } +}; + + +typedef class Item COND; + +class Item_cond :public Item_bool_func +{ +protected: + List list; + table_map and_tables_cache; + +public: + Item_cond(THD *thd): Item_bool_func(thd) + { + /* Item_cond() is only used to create top level items */ + top_level_item(); + const_item_cache=0; + } + Item_cond(THD *thd, Item *i1, Item *i2); + Item_cond(THD *thd, Item_cond *item); + Item_cond(THD *thd, List &nlist): + Item_bool_func(thd), list(nlist) {} + bool add(Item *item, MEM_ROOT *root) + { + DBUG_ASSERT(item); + return list.push_back(item, root); + } + bool add_at_head(Item *item, MEM_ROOT *root) + { + DBUG_ASSERT(item); + return list.push_front(item, root); + } + void add_at_head(List *nlist) + { + DBUG_ASSERT(nlist->elements); + list.prepend(nlist); + } + void add_at_end(List *nlist) + { + DBUG_ASSERT(nlist->elements); + list.append(nlist); + } + bool fix_fields(THD *, Item **ref) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + + enum Type type() const override { return COND_ITEM; } + List* argument_list() { return &list; } + table_map used_tables() const override; + void update_used_tables() override + { + used_tables_and_const_cache_init(); + used_tables_and_const_cache_update_and_join(list); + } + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override; + COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level) override; + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override; + void print(String *str, enum_query_type query_type) override; + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) override; + friend int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, + COND **conds); + void copy_andor_arguments(THD *thd, Item_cond *item); + bool walk(Item_processor processor, bool walk_subquery, void *arg) override; + Item *do_transform(THD *thd, Item_transformer transformer, uchar *arg, + bool toplevel); + Item *transform(THD *thd, Item_transformer transformer, uchar *arg) override + { + return do_transform(thd, transformer, arg, 0); + } + Item *top_level_transform(THD *thd, Item_transformer transformer, uchar *arg) + override + { + return do_transform(thd, transformer, arg, 1); + } + void traverse_cond(Cond_traverser, void *arg, traverse_order order) override; + void neg_arguments(THD *thd); + Item* propagate_equal_fields(THD *, const Context &, COND_EQUAL *) override; + Item *do_compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t, bool toplevel); + Item *compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) override + { + return do_compile(thd, analyzer, arg_p, transformer, arg_t, 0); + } + Item* top_level_compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) override + { + return do_compile(thd, analyzer, arg_p, transformer, arg_t, 1); + } + bool eval_not_null_tables(void *opt_arg) override; + bool find_not_null_fields(table_map allowed) override; + Item *build_clone(THD *thd) override; + bool excl_dep_on_table(table_map tab_map) override; + bool excl_dep_on_grouping_fields(st_select_lex *sel) override; + +private: + void merge_sub_condition(List_iterator& li); +}; + +template class LI, class T> class Item_equal_iterator; + +/* + The class Item_equal is used to represent conjunctions of equality + predicates of the form field1 = field2, and field=const in where + conditions and on expressions. + + All equality predicates of the form field1=field2 contained in a + conjunction are substituted for a sequence of items of this class. + An item of this class Item_equal(f1,f2,...fk) represents a + multiple equality f1=f2=...=fk.l + + If a conjunction contains predicates f1=f2 and f2=f3, a new item of + this class is created Item_equal(f1,f2,f3) representing the multiple + equality f1=f2=f3 that substitutes the above equality predicates in + the conjunction. + A conjunction of the predicates f2=f1 and f3=f1 and f3=f2 will be + substituted for the item representing the same multiple equality + f1=f2=f3. + An item Item_equal(f1,f2) can appear instead of a conjunction of + f2=f1 and f1=f2, or instead of just the predicate f1=f2. + + An item of the class Item_equal inherits equalities from outer + conjunctive levels. + + Suppose we have a where condition of the following form: + WHERE f1=f2 AND f3=f4 AND f3=f5 AND ... AND (...OR (f1=f3 AND ...)). + In this case: + f1=f2 will be substituted for Item_equal(f1,f2); + f3=f4 and f3=f5 will be substituted for Item_equal(f3,f4,f5); + f1=f3 will be substituted for Item_equal(f1,f2,f3,f4,f5); + + An object of the class Item_equal can contain an optional constant + item c. Then it represents a multiple equality of the form + c=f1=...=fk. + + Objects of the class Item_equal are used for the following: + + 1. An object Item_equal(t1.f1,...,tk.fk) allows us to consider any + pair of tables ti and tj as joined by an equi-condition. + Thus it provide us with additional access paths from table to table. + + 2. An object Item_equal(t1.f1,...,tk.fk) is applied to deduce new + SARGable predicates: + f1=...=fk AND P(fi) => f1=...=fk AND P(fi) AND P(fj). + It also can give us additional index scans and can allow us to + improve selectivity estimates. + + 3. An object Item_equal(t1.f1,...,tk.fk) is used to optimize the + selected execution plan for the query: if table ti is accessed + before the table tj then in any predicate P in the where condition + the occurrence of tj.fj is substituted for ti.fi. This can allow + an evaluation of the predicate at an earlier step. + + When feature 1 is supported they say that join transitive closure + is employed. + When feature 2 is supported they say that search argument transitive + closure is employed. + Both features are usually supported by preprocessing original query and + adding additional predicates. + We do not just add predicates, we rather dynamically replace some + predicates that can not be used to access tables in the investigated + plan for those, obtained by substitution of some fields for equal fields, + that can be used. + + Prepared Statements/Stored Procedures note: instances of class + Item_equal are created only at the time a PS/SP is executed and + are deleted in the end of execution. All changes made to these + objects need not be registered in the list of changes of the parse + tree and do not harm PS/SP re-execution. + + Item equal objects are employed only at the optimize phase. Usually they are + not supposed to be evaluated. Yet in some cases we call the method val_int() + for them. We have to take care of restricting the predicate such an + object represents f1=f2= ...=fn to the projection of known fields fi1=...=fik. +*/ + +class Item_equal: public Item_bool_func +{ + /* + The list of equal items. Currently the list can contain: + - Item_fields items for references to table columns + - Item_direct_view_ref items for references to view columns + - one const item + + If the list contains a constant item this item is always first in the list. + The list contains at least two elements. + Currently all Item_fields/Item_direct_view_ref items in the list should + refer to table columns with equavalent type definitions. In particular + if these are string columns they should have the same charset/collation. + + Use objects of the companion class Item_equal_fields_iterator to iterate + over all items from the list of the Item_field/Item_direct_view_ref classes. + */ + List equal_items; + /* + TRUE <-> one of the items is a const item. + Such item is always first in in the equal_items list + */ + bool with_const; + /* + The field eval_item is used when this item is evaluated + with the method val_int() + */ + cmp_item *eval_item; + /* + This initially is set to FALSE. It becomes TRUE when this item is evaluated + as being always false. If the flag is TRUE the contents of the list + the equal_items should be ignored. + */ + bool cond_false; + /* + This initially is set to FALSE. It becomes TRUE when this item is evaluated + as being always true. If the flag is TRUE the contents of the list + the equal_items should be ignored. + */ + bool cond_true; + /* + For Item_equal objects inside an OR clause: one of the fields that were + used in the original equality. + */ + Item_field *context_field; + + bool link_equal_fields; + + const Type_handler *m_compare_handler; + CHARSET_INFO *m_compare_collation; +public: + + COND_EQUAL *upper_levels; /* multiple equalities of upper and levels */ + + Item_equal(THD *thd, const Type_handler *handler, + Item *f1, Item *f2, bool with_const_item); + Item_equal(THD *thd, Item_equal *item_equal); + /* Currently the const item is always the first in the list of equal items */ + inline Item* get_const() { return with_const ? equal_items.head() : NULL; } + void add_const(THD *thd, Item *c); + /** Add a non-constant item to the multiple equality */ + void add(Item *f, MEM_ROOT *root) { equal_items.push_back(f, root); } + bool contains(Field *field); + Item* get_first(struct st_join_table *context, Item *field); + /** Get number of field items / references to field items in this object */ + uint n_field_items() { return equal_items.elements - MY_TEST(with_const); } + void merge(THD *thd, Item_equal *item); + bool merge_with_check(THD *thd, Item_equal *equal_item, bool save_merged); + void merge_into_list(THD *thd, List *list, bool save_merged, + bool only_intersected); + void update_const(THD *thd); + enum Functype functype() const override { return MULT_EQUAL_FUNC; } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("multiple equal") }; + return name; + } + void sort(Item_field_cmpfunc compare, void *arg); + bool fix_length_and_dec(THD *thd) override; + bool fix_fields(THD *thd, Item **ref) override; + void cleanup() override + { + delete eval_item; + eval_item= NULL; + } + void update_used_tables() override; + bool find_not_null_fields(table_map allowed) override; + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override; + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override; + bool walk(Item_processor processor, bool walk_subquery, void *arg) override; + Item *transform(THD *thd, Item_transformer transformer, uchar *arg) override; + void print(String *str, enum_query_type query_type) override; + const Type_handler *compare_type_handler() const { return m_compare_handler; } + CHARSET_INFO *compare_collation() const override + { return m_compare_collation; } + + void set_context_field(Item_field *ctx_field) { context_field= ctx_field; } + void set_link_equal_fields(bool flag) { link_equal_fields= flag; } + Item* get_copy(THD *thd) override { return 0; } + /* + This does not comply with the specification of the virtual method, + but Item_equal items are processed distinguishly anyway + */ + bool excl_dep_on_table(table_map tab_map) override + { + return used_tables() & tab_map; + } + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) override; + bool excl_dep_on_grouping_fields(st_select_lex *sel) override; + bool create_pushable_equalities(THD *thd, List *equalities, + Pushdown_checker checker, uchar *arg, + bool clone_const); + /* Return the number of elements in this multiple equality */ + uint elements_count() { return equal_items.elements; } + friend class Item_equal_fields_iterator; + bool count_sargable_conds(void *arg) override; + Item *multiple_equality_transformer(THD *thd, uchar *arg) override; + friend class Item_equal_iterator; + friend class Item_equal_iterator; + friend Item *eliminate_item_equal(THD *thd, COND *cond, + COND_EQUAL *upper_levels, + Item_equal *item_equal); + friend bool setup_sj_materialization_part1(struct st_join_table *tab); + friend bool setup_sj_materialization_part2(struct st_join_table *tab); +}; + +class COND_EQUAL: public Sql_alloc +{ +public: + uint max_members; /* max number of members the current level + list and all lower level lists */ + COND_EQUAL *upper_levels; /* multiple equalities of upper and levels */ + List current_level; /* list of multiple equalities of + the current and level */ + COND_EQUAL() + { + upper_levels= 0; + } + COND_EQUAL(Item_equal *item, MEM_ROOT *mem_root) + :upper_levels(0) + { + current_level.push_back(item, mem_root); + } + void copy(COND_EQUAL &cond_equal) + { + max_members= cond_equal.max_members; + upper_levels= cond_equal.upper_levels; + if (cond_equal.current_level.is_empty()) + current_level.empty(); + else + current_level= cond_equal.current_level; + } + bool is_empty() + { + return (current_level.elements == 0); + } +}; + + +/* + The template Item_equal_iterator is used to define classes + Item_equal_fields_iterator and Item_equal_fields_iterator_slow. + These are helper classes for the class Item equal + Both classes are used to iterate over references to table/view columns + from the list of equal items that included in an Item_equal object. + The second class supports the operation of removal of the current member + from the list when performing an iteration. +*/ + +template class LI, typename T> class Item_equal_iterator + : public LI +{ +protected: + Item_equal *item_equal; + Item *curr_item; +public: + Item_equal_iterator(Item_equal &item_eq) + :LI (item_eq.equal_items) + { + curr_item= NULL; + item_equal= &item_eq; + if (item_eq.with_const) + { + LI *list_it= this; + curr_item= (*list_it)++; + } + } + Item* operator++(int) + { + LI *list_it= this; + curr_item= (*list_it)++; + return curr_item; + } + void rewind(void) + { + LI *list_it= this; + list_it->rewind(); + if (item_equal->with_const) + curr_item= (*list_it)++; + } + Field *get_curr_field() + { + Item_field *item= (Item_field *) (curr_item->real_item()); + return item->field; + } +}; + +typedef Item_equal_iterator Item_equal_iterator_fast; + +class Item_equal_fields_iterator + :public Item_equal_iterator_fast +{ +public: + Item_equal_fields_iterator(Item_equal &item_eq) + :Item_equal_iterator_fast(item_eq) + { } + Item ** ref() + { + return List_iterator_fast::ref(); + } +}; + +typedef Item_equal_iterator Item_equal_iterator_iterator_slow; + +class Item_equal_fields_iterator_slow + :public Item_equal_iterator_iterator_slow +{ +public: + Item_equal_fields_iterator_slow(Item_equal &item_eq) + :Item_equal_iterator_iterator_slow(item_eq) + { } + void remove() + { + List_iterator::remove(); + } +}; + + +class Item_cond_and final :public Item_cond +{ +public: + COND_EQUAL m_cond_equal; /* contains list of Item_equal objects for + the current and level and reference + to multiple equalities of upper and levels */ + Item_cond_and(THD *thd): Item_cond(thd) {} + Item_cond_and(THD *thd, Item *i1,Item *i2): Item_cond(thd, i1, i2) {} + Item_cond_and(THD *thd, Item_cond_and *item): Item_cond(thd, item) {} + Item_cond_and(THD *thd, List &list_arg): Item_cond(thd, list_arg) {} + enum Functype functype() const override { return COND_AND_FUNC; } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("and") }; + return name; + } + enum precedence precedence() const override { return AND_PRECEDENCE; } + table_map not_null_tables() const override + { return is_top_level_item() ? not_null_tables_cache: and_tables_cache; } + Item *copy_andor_structure(THD *thd) override; + Item *neg_transformer(THD *thd) override; + void mark_as_condition_AND_part(TABLE_LIST *embedding) override; + uint exists2in_reserved_items() override { return list.elements; }; + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override; + bool set_format_by_check_constraint(Send_field_extended_metadata *to) const + override; + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, + table_map usable_tables, SARGABLE_PARAM **sargables) + override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +inline bool is_cond_and(Item *item) +{ + Item_func *func_item= item->get_item_func(); + return func_item && func_item->functype() == Item_func::COND_AND_FUNC; +} + +class Item_cond_or final :public Item_cond +{ +public: + Item_cond_or(THD *thd): Item_cond(thd) {} + Item_cond_or(THD *thd, Item *i1,Item *i2): Item_cond(thd, i1, i2) {} + Item_cond_or(THD *thd, Item_cond_or *item): Item_cond(thd, item) {} + Item_cond_or(THD *thd, List &list_arg): Item_cond(thd, list_arg) {} + enum Functype functype() const override { return COND_OR_FUNC; } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("or") }; + return name; + } + enum precedence precedence() const override { return OR_PRECEDENCE; } + table_map not_null_tables() const override { return and_tables_cache; } + Item *copy_andor_structure(THD *thd) override; + Item *neg_transformer(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_dyncol_check :public Item_bool_func +{ +public: + Item_func_dyncol_check(THD *thd, Item *str): Item_bool_func(thd, str) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_check") }; + return name; + } + bool need_parentheses_in_default() override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_dyncol_exists :public Item_bool_func +{ +public: + Item_func_dyncol_exists(THD *thd, Item *str, Item *num): + Item_bool_func(thd, str, num) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_exists") }; + return name; + } + bool need_parentheses_in_default() override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_cursor_bool_attr: public Item_bool_func, public Cursor_ref +{ +public: + Item_func_cursor_bool_attr(THD *thd, const LEX_CSTRING *name, uint offset) + :Item_bool_func(thd), Cursor_ref(name, offset) + { } + bool check_vcol_func_processor(void *arg) + { + return mark_unsupported_function(func_name(), arg, VCOL_SESSION_FUNC); + } + void print(String *str, enum_query_type query_type) + { + Cursor_ref::print_func(str, func_name_cstring()); + } +}; + + +class Item_func_cursor_isopen: public Item_func_cursor_bool_attr +{ +public: + Item_func_cursor_isopen(THD *thd, const LEX_CSTRING *name, uint offset) + :Item_func_cursor_bool_attr(thd, name, offset) { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("%ISOPEN") }; + return name; + } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_cursor_found: public Item_func_cursor_bool_attr +{ +public: + Item_func_cursor_found(THD *thd, const LEX_CSTRING *name, uint offset) + :Item_func_cursor_bool_attr(thd, name, offset) + { + set_maybe_null(); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("%FOUND") }; + return name; + } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_cursor_notfound: public Item_func_cursor_bool_attr +{ +public: + Item_func_cursor_notfound(THD *thd, const LEX_CSTRING *name, uint offset) + :Item_func_cursor_bool_attr(thd, name, offset) + { + set_maybe_null(); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("%NOTFOUND") }; + return name; + } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + + +inline bool is_cond_or(Item *item) +{ + Item_func *func_item= item->get_item_func(); + return func_item && func_item->functype() == Item_func::COND_OR_FUNC; +} + +Item *and_expressions(Item *a, Item *b, Item **org_item); + +class Comp_creator +{ +public: + Comp_creator() = default; /* Remove gcc warning */ + virtual ~Comp_creator() = default; /* Remove gcc warning */ + /** + Create operation with given arguments. + */ + virtual Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) + const = 0; + /** + Create operation with given arguments in swap order. + */ + virtual Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) + const = 0; + virtual const char* symbol(bool invert) const = 0; + virtual bool eqne_op() const = 0; + virtual bool l_op() const = 0; +}; + +class Eq_creator :public Comp_creator +{ +public: + Eq_creator() = default; /* Remove gcc warning */ + virtual ~Eq_creator() = default; /* Remove gcc warning */ + Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) const; + Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) const; + const char* symbol(bool invert) const { return invert? "<>" : "="; } + bool eqne_op() const { return 1; } + bool l_op() const { return 0; } +}; + +class Ne_creator :public Comp_creator +{ +public: + Ne_creator() = default; /* Remove gcc warning */ + virtual ~Ne_creator() = default; /* Remove gcc warning */ + Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) const; + Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) const; + const char* symbol(bool invert) const { return invert? "=" : "<>"; } + bool eqne_op() const { return 1; } + bool l_op() const { return 0; } +}; + +class Gt_creator :public Comp_creator +{ +public: + Gt_creator() = default; /* Remove gcc warning */ + virtual ~Gt_creator() = default; /* Remove gcc warning */ + Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) const; + Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) const; + const char* symbol(bool invert) const { return invert? "<=" : ">"; } + bool eqne_op() const { return 0; } + bool l_op() const { return 0; } +}; + +class Lt_creator :public Comp_creator +{ +public: + Lt_creator() = default; /* Remove gcc warning */ + virtual ~Lt_creator() = default; /* Remove gcc warning */ + Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) const; + Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) const; + const char* symbol(bool invert) const { return invert? ">=" : "<"; } + bool eqne_op() const { return 0; } + bool l_op() const { return 1; } +}; + +class Ge_creator :public Comp_creator +{ +public: + Ge_creator() = default; /* Remove gcc warning */ + virtual ~Ge_creator() = default; /* Remove gcc warning */ + Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) const; + Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) const; + const char* symbol(bool invert) const { return invert? "<" : ">="; } + bool eqne_op() const { return 0; } + bool l_op() const { return 0; } +}; + +class Le_creator :public Comp_creator +{ +public: + Le_creator() = default; /* Remove gcc warning */ + virtual ~Le_creator() = default; /* Remove gcc warning */ + Item_bool_rowready_func2* create(THD *thd, Item *a, Item *b) const; + Item_bool_rowready_func2* create_swap(THD *thd, Item *a, Item *b) const; + const char* symbol(bool invert) const { return invert? ">" : "<="; } + bool eqne_op() const { return 0; } + bool l_op() const { return 1; } +}; + +/* + These need definitions from this file but the variables are defined + in mysqld.h. The variables really belong in this component, but for + the time being we leave them in mysqld.cc to avoid merge problems. +*/ +extern Eq_creator eq_creator; +extern Ne_creator ne_creator; +extern Gt_creator gt_creator; +extern Lt_creator lt_creator; +extern Ge_creator ge_creator; +extern Le_creator le_creator; + +#endif /* ITEM_CMPFUNC_INCLUDED */ diff --git a/sql/item_create.cc b/sql/item_create.cc new file mode 100644 index 00000000..b26610a5 --- /dev/null +++ b/sql/item_create.cc @@ -0,0 +1,6143 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + Functions to create an item. Used by sql_yac.yy +*/ + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "sql_parse.h" // sql_command_flags +#include "set_var.h" +#include "sp_head.h" +#include "sp.h" +#include "sql_time.h" +#include "sql_type_geom.h" +#include + + +extern "C" uchar* +get_native_fct_hash_key(const uchar *buff, size_t *length, + my_bool /* unused */) +{ + Native_func_registry *func= (Native_func_registry*) buff; + *length= func->name.length; + return (uchar*) func->name.str; +} + + +#ifdef HAVE_SPATIAL +extern Native_func_registry_array native_func_registry_array_geom; +#endif + + +/* +============================================================================= + LOCAL DECLARATIONS +============================================================================= +*/ + +/** + Function builder for Stored Functions. +*/ + +class Create_sp_func : public Create_qfunc +{ +public: + virtual Item *create_with_db(THD *thd, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + bool use_explicit_name, List *item_list); + + static Create_sp_func s_singleton; + +protected: + /** Constructor. */ + Create_sp_func() = default; + /** Destructor. */ + virtual ~Create_sp_func() = default; +}; + + +/* + Concrete functions builders (native functions). + Please keep this list sorted in alphabetical order, + it helps to compare code between versions, and helps with merges conflicts. +*/ + +class Create_func_abs : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_abs s_singleton; + +protected: + Create_func_abs() = default; + virtual ~Create_func_abs() = default; +}; + + +class Create_func_acos : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_acos s_singleton; + +protected: + Create_func_acos() = default; + virtual ~Create_func_acos() = default; +}; + + +class Create_func_addtime : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_addtime s_singleton; + +protected: + Create_func_addtime() = default; + virtual ~Create_func_addtime() = default; +}; + + +class Create_func_aes_encrypt : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_aes_encrypt s_singleton; + +protected: + Create_func_aes_encrypt() = default; + virtual ~Create_func_aes_encrypt() = default; +}; + + +class Create_func_aes_decrypt : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_aes_decrypt s_singleton; + +protected: + Create_func_aes_decrypt() = default; + virtual ~Create_func_aes_decrypt() = default; +}; + + +class Create_func_asin : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_asin s_singleton; + +protected: + Create_func_asin() = default; + virtual ~Create_func_asin() = default; +}; + + +class Create_func_atan : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_atan s_singleton; + +protected: + Create_func_atan() = default; + virtual ~Create_func_atan() = default; +}; + + +class Create_func_benchmark : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_benchmark s_singleton; + +protected: + Create_func_benchmark() = default; + virtual ~Create_func_benchmark() = default; +}; + + +class Create_func_bin : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_bin s_singleton; + +protected: + Create_func_bin() = default; + virtual ~Create_func_bin() = default; +}; + + +class Create_func_binlog_gtid_pos : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_binlog_gtid_pos s_singleton; + +protected: + Create_func_binlog_gtid_pos() = default; + virtual ~Create_func_binlog_gtid_pos() = default; +}; + + +class Create_func_bit_count : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_bit_count s_singleton; + +protected: + Create_func_bit_count() = default; + virtual ~Create_func_bit_count() = default; +}; + + +class Create_func_bit_length : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_bit_length s_singleton; + +protected: + Create_func_bit_length() = default; + virtual ~Create_func_bit_length() = default; +}; + + +class Create_func_ceiling : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_ceiling s_singleton; + +protected: + Create_func_ceiling() = default; + virtual ~Create_func_ceiling() = default; +}; + + +class Create_func_chr : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_chr s_singleton; + +protected: + Create_func_chr() = default; + virtual ~Create_func_chr() = default; +}; + + +class Create_func_char_length : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_char_length s_singleton; + +protected: + Create_func_char_length() = default; + virtual ~Create_func_char_length() = default; +}; + + +class Create_func_coercibility : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_coercibility s_singleton; + +protected: + Create_func_coercibility() = default; + virtual ~Create_func_coercibility() = default; +}; + +class Create_func_dyncol_check : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dyncol_check s_singleton; + +protected: + Create_func_dyncol_check() = default; + virtual ~Create_func_dyncol_check() = default; +}; + +class Create_func_dyncol_exists : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_dyncol_exists s_singleton; + +protected: + Create_func_dyncol_exists() = default; + virtual ~Create_func_dyncol_exists() = default; +}; + +class Create_func_dyncol_list : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dyncol_list s_singleton; + +protected: + Create_func_dyncol_list() = default; + virtual ~Create_func_dyncol_list() = default; +}; + +class Create_func_dyncol_json : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dyncol_json s_singleton; + +protected: + Create_func_dyncol_json() = default; + virtual ~Create_func_dyncol_json() = default; +}; + + +class Create_func_compress : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_compress s_singleton; + +protected: + Create_func_compress() = default; + virtual ~Create_func_compress() = default; +}; + + +class Create_func_concat : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_concat s_singleton; + +protected: + Create_func_concat() = default; + virtual ~Create_func_concat() = default; +}; + + +class Create_func_concat_operator_oracle : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_concat_operator_oracle s_singleton; + +protected: + Create_func_concat_operator_oracle() = default; + virtual ~Create_func_concat_operator_oracle() = default; +}; + + +class Create_func_decode_histogram : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_decode_histogram s_singleton; + +protected: + Create_func_decode_histogram() = default; + virtual ~Create_func_decode_histogram() = default; +}; + + +class Create_func_decode_oracle : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + { + if (unlikely(!item_list || item_list->elements < 3)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + return new (thd->mem_root) Item_func_decode_oracle(thd, *item_list); + } + + static Create_func_decode_oracle s_singleton; + +protected: + Create_func_decode_oracle() = default; + virtual ~Create_func_decode_oracle() = default; +}; + + +class Create_func_decode : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + { + if (thd->variables.sql_mode & MODE_ORACLE) + return Create_func_decode_oracle::s_singleton.create_native(thd, name, + item_list); + if (unlikely(!item_list || item_list->elements != 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + Item_args args(thd, *item_list); + return new (thd->mem_root) Item_func_decode(thd, args.arguments()[0], + args.arguments()[1]); + } + + static Create_func_decode s_singleton; + +protected: + Create_func_decode() {} + virtual ~Create_func_decode() {} +}; + + +class Create_func_concat_ws : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_concat_ws s_singleton; + +protected: + Create_func_concat_ws() = default; + virtual ~Create_func_concat_ws() = default; +}; + + +class Create_func_connection_id : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_connection_id s_singleton; + +protected: + Create_func_connection_id() = default; + virtual ~Create_func_connection_id() = default; +}; + + +class Create_func_nvl2 : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_nvl2 s_singleton; + +protected: + Create_func_nvl2() = default; + virtual ~Create_func_nvl2() = default; +}; + + +class Create_func_conv : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_conv s_singleton; + +protected: + Create_func_conv() = default; + virtual ~Create_func_conv() = default; +}; + + +class Create_func_convert_tz : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_convert_tz s_singleton; + +protected: + Create_func_convert_tz() = default; + virtual ~Create_func_convert_tz() = default; +}; + + +class Create_func_cos : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_cos s_singleton; + +protected: + Create_func_cos() = default; + virtual ~Create_func_cos() = default; +}; + + +class Create_func_cot : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_cot s_singleton; + +protected: + Create_func_cot() = default; + virtual ~Create_func_cot() = default; +}; + + +class Create_func_crc32 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *, List *item_list) + override; + + static Create_func_crc32 s_singleton; + +protected: + Create_func_crc32() = default; + virtual ~Create_func_crc32() = default; +}; + + +class Create_func_crc32c : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *, List *item_list) + override; + + static Create_func_crc32c s_singleton; + +protected: + Create_func_crc32c() = default; + virtual ~Create_func_crc32c() = default; +}; + + +class Create_func_datediff : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_datediff s_singleton; + +protected: + Create_func_datediff() = default; + virtual ~Create_func_datediff() = default; +}; + + +class Create_func_dayname : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dayname s_singleton; + +protected: + Create_func_dayname() = default; + virtual ~Create_func_dayname() = default; +}; + + +class Create_func_dayofmonth : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dayofmonth s_singleton; + +protected: + Create_func_dayofmonth() = default; + virtual ~Create_func_dayofmonth() = default; +}; + + +class Create_func_dayofweek : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dayofweek s_singleton; + +protected: + Create_func_dayofweek() = default; + virtual ~Create_func_dayofweek() = default; +}; + + +class Create_func_dayofyear : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_dayofyear s_singleton; + +protected: + Create_func_dayofyear() = default; + virtual ~Create_func_dayofyear() = default; +}; + + +class Create_func_degrees : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_degrees s_singleton; + +protected: + Create_func_degrees() = default; + virtual ~Create_func_degrees() = default; +}; + + +class Create_func_des_decrypt : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_des_decrypt s_singleton; + +protected: + Create_func_des_decrypt() = default; + virtual ~Create_func_des_decrypt() = default; +}; + + +class Create_func_des_encrypt : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_des_encrypt s_singleton; + +protected: + Create_func_des_encrypt() = default; + virtual ~Create_func_des_encrypt() = default; +}; + + +class Create_func_elt : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_elt s_singleton; + +protected: + Create_func_elt() = default; + virtual ~Create_func_elt() = default; +}; + + +class Create_func_encode : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_encode s_singleton; + +protected: + Create_func_encode() = default; + virtual ~Create_func_encode() = default; +}; + + +class Create_func_encrypt : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_encrypt s_singleton; + +protected: + Create_func_encrypt() = default; + virtual ~Create_func_encrypt() = default; +}; + + +class Create_func_exp : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_exp s_singleton; + +protected: + Create_func_exp() = default; + virtual ~Create_func_exp() = default; +}; + + +class Create_func_export_set : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_export_set s_singleton; + +protected: + Create_func_export_set() = default; + virtual ~Create_func_export_set() = default; +}; + + +class Create_func_field : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_field s_singleton; + +protected: + Create_func_field() = default; + virtual ~Create_func_field() = default; +}; + + +class Create_func_find_in_set : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_find_in_set s_singleton; + +protected: + Create_func_find_in_set() = default; + virtual ~Create_func_find_in_set() = default; +}; + + +class Create_func_floor : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_floor s_singleton; + +protected: + Create_func_floor() = default; + virtual ~Create_func_floor() = default; +}; + + +class Create_func_format : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_format s_singleton; + +protected: + Create_func_format() = default; + virtual ~Create_func_format() = default; +}; + + +class Create_func_found_rows : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_found_rows s_singleton; + +protected: + Create_func_found_rows() = default; + virtual ~Create_func_found_rows() = default; +}; + + +class Create_func_from_base64 : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_from_base64 s_singleton; + +protected: + Create_func_from_base64() = default; + virtual ~Create_func_from_base64() = default; +}; + + +class Create_func_from_days : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_from_days s_singleton; + +protected: + Create_func_from_days() = default; + virtual ~Create_func_from_days() = default; +}; + + +class Create_func_from_unixtime : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_from_unixtime s_singleton; + +protected: + Create_func_from_unixtime() = default; + virtual ~Create_func_from_unixtime() = default; +}; + + +class Create_func_get_lock : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_get_lock s_singleton; + +protected: + Create_func_get_lock() = default; + virtual ~Create_func_get_lock() = default; +}; + + +class Create_func_greatest : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_greatest s_singleton; + +protected: + Create_func_greatest() = default; + virtual ~Create_func_greatest() = default; +}; + + +class Create_func_hex : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_hex s_singleton; + +protected: + Create_func_hex() = default; + virtual ~Create_func_hex() = default; +}; + + +class Create_func_ifnull : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_ifnull s_singleton; + +protected: + Create_func_ifnull() = default; + virtual ~Create_func_ifnull() = default; +}; + + +class Create_func_instr : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_instr s_singleton; + +protected: + Create_func_instr() = default; + virtual ~Create_func_instr() = default; +}; + + +class Create_func_is_free_lock : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_is_free_lock s_singleton; + +protected: + Create_func_is_free_lock() = default; + virtual ~Create_func_is_free_lock() = default; +}; + + +class Create_func_is_used_lock : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_is_used_lock s_singleton; + +protected: + Create_func_is_used_lock() = default; + virtual ~Create_func_is_used_lock() = default; +}; + + +class Create_func_isnull : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_isnull s_singleton; + +protected: + Create_func_isnull() = default; + virtual ~Create_func_isnull() = default; +}; + + +class Create_func_json_normalize : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_normalize s_singleton; + +protected: + Create_func_json_normalize() = default; + virtual ~Create_func_json_normalize() = default; +}; + + +class Create_func_json_equals : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_equals s_singleton; + +protected: + Create_func_json_equals() = default; + virtual ~Create_func_json_equals() = default; +}; + + +class Create_func_json_exists : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_exists s_singleton; + +protected: + Create_func_json_exists() = default; + virtual ~Create_func_json_exists() = default; +}; + + +class Create_func_json_valid : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_valid s_singleton; + +protected: + Create_func_json_valid() = default; + virtual ~Create_func_json_valid() = default; +}; + + +class Create_func_json_compact : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_compact s_singleton; + +protected: + Create_func_json_compact() = default; + virtual ~Create_func_json_compact() = default; +}; + + +class Create_func_json_loose : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_loose s_singleton; + +protected: + Create_func_json_loose() = default; + virtual ~Create_func_json_loose() = default; +}; + + +class Create_func_json_detailed: public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_detailed s_singleton; + +protected: + Create_func_json_detailed() = default; + virtual ~Create_func_json_detailed() = default; +}; + + +class Create_func_json_type : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_type s_singleton; + +protected: + Create_func_json_type() = default; + virtual ~Create_func_json_type() = default; +}; + + +class Create_func_json_depth : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_depth s_singleton; + +protected: + Create_func_json_depth() = default; + virtual ~Create_func_json_depth() = default; +}; + + +class Create_func_json_value : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_value s_singleton; + +protected: + Create_func_json_value() = default; + virtual ~Create_func_json_value() = default; +}; + + +class Create_func_json_query : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_query s_singleton; + +protected: + Create_func_json_query() = default; + virtual ~Create_func_json_query() = default; +}; + + +class Create_func_json_keys: public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_keys s_singleton; + +protected: + Create_func_json_keys() = default; + virtual ~Create_func_json_keys() = default; +}; + + +class Create_func_json_contains: public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_contains s_singleton; + +protected: + Create_func_json_contains() = default; + virtual ~Create_func_json_contains() = default; +}; + + +class Create_func_json_contains_path : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_contains_path s_singleton; + +protected: + Create_func_json_contains_path() = default; + virtual ~Create_func_json_contains_path() = default; +}; + + +class Create_func_json_extract : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_extract s_singleton; + +protected: + Create_func_json_extract() = default; + virtual ~Create_func_json_extract() = default; +}; + + +class Create_func_json_search : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_search s_singleton; + +protected: + Create_func_json_search() = default; + virtual ~Create_func_json_search() = default; +}; + + +class Create_func_json_array : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_array s_singleton; + +protected: + Create_func_json_array() = default; + virtual ~Create_func_json_array() = default; +}; + + +class Create_func_json_array_append : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_array_append s_singleton; + +protected: + Create_func_json_array_append() = default; + virtual ~Create_func_json_array_append() = default; +}; + + +class Create_func_json_array_insert : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_array_insert s_singleton; + +protected: + Create_func_json_array_insert() = default; + virtual ~Create_func_json_array_insert() = default; +}; + + +class Create_func_json_insert : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_insert s_singleton; + +protected: + Create_func_json_insert() = default; + virtual ~Create_func_json_insert() = default; +}; + + +class Create_func_json_set : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_set s_singleton; + +protected: + Create_func_json_set() = default; + virtual ~Create_func_json_set() = default; +}; + + +class Create_func_json_replace : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_replace s_singleton; + +protected: + Create_func_json_replace() = default; + virtual ~Create_func_json_replace() = default; +}; + + +class Create_func_json_remove : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_remove s_singleton; + +protected: + Create_func_json_remove() = default; + virtual ~Create_func_json_remove() = default; +}; + + +class Create_func_json_object : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_object s_singleton; + +protected: + Create_func_json_object() = default; + virtual ~Create_func_json_object() = default; +}; + + +class Create_func_json_length : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_length s_singleton; + +protected: + Create_func_json_length() = default; + virtual ~Create_func_json_length() = default; +}; + + +class Create_func_json_merge : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_merge s_singleton; + +protected: + Create_func_json_merge() = default; + virtual ~Create_func_json_merge() = default; +}; + + +class Create_func_json_merge_patch : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_json_merge_patch s_singleton; + +protected: + Create_func_json_merge_patch() = default; + virtual ~Create_func_json_merge_patch() = default; +}; + + +class Create_func_json_quote : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_quote s_singleton; + +protected: + Create_func_json_quote() = default; + virtual ~Create_func_json_quote() = default; +}; + + +class Create_func_json_unquote : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_unquote s_singleton; + +protected: + Create_func_json_unquote() = default; + virtual ~Create_func_json_unquote() = default; +}; + + +class Create_func_json_overlaps: public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_overlaps s_singleton; + +protected: + Create_func_json_overlaps() {} + virtual ~Create_func_json_overlaps() {} +}; + + +class Create_func_last_day : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_last_day s_singleton; + +protected: + Create_func_last_day() = default; + virtual ~Create_func_last_day() = default; +}; + + +class Create_func_last_insert_id : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_last_insert_id s_singleton; + +protected: + Create_func_last_insert_id() = default; + virtual ~Create_func_last_insert_id() = default; +}; + + +class Create_func_lcase : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_lcase s_singleton; + +protected: + Create_func_lcase() = default; + virtual ~Create_func_lcase() = default; +}; + + +class Create_func_least : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_least s_singleton; + +protected: + Create_func_least() = default; + virtual ~Create_func_least() = default; +}; + + +class Create_func_length : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_length s_singleton; + +protected: + Create_func_length() = default; + virtual ~Create_func_length() = default; +}; + +class Create_func_octet_length : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_octet_length s_singleton; + +protected: + Create_func_octet_length() = default; + virtual ~Create_func_octet_length() = default; +}; + + +#ifndef DBUG_OFF +class Create_func_like_range_min : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_like_range_min s_singleton; + +protected: + Create_func_like_range_min() = default; + virtual ~Create_func_like_range_min() = default; +}; + + +class Create_func_like_range_max : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_like_range_max s_singleton; + +protected: + Create_func_like_range_max() = default; + virtual ~Create_func_like_range_max() = default; +}; +#endif + + +class Create_func_ln : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_ln s_singleton; + +protected: + Create_func_ln() = default; + virtual ~Create_func_ln() = default; +}; + + +class Create_func_load_file : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_load_file s_singleton; + +protected: + Create_func_load_file() = default; + virtual ~Create_func_load_file() = default; +}; + + +class Create_func_locate : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_locate s_singleton; + +protected: + Create_func_locate() = default; + virtual ~Create_func_locate() = default; +}; + + +class Create_func_log : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_log s_singleton; + +protected: + Create_func_log() = default; + virtual ~Create_func_log() = default; +}; + + +class Create_func_log10 : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_log10 s_singleton; + +protected: + Create_func_log10() = default; + virtual ~Create_func_log10() = default; +}; + + +class Create_func_log2 : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_log2 s_singleton; + +protected: + Create_func_log2() = default; + virtual ~Create_func_log2() = default; +}; + + +class Create_func_lpad : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + { + return thd->variables.sql_mode & MODE_ORACLE ? + create_native_oracle(thd, name, item_list) : + create_native_std(thd, name, item_list); + } + static Create_func_lpad s_singleton; + +protected: + Create_func_lpad() = default; + virtual ~Create_func_lpad() = default; + Item *create_native_std(THD *thd, const LEX_CSTRING *name, + List *items); + Item *create_native_oracle(THD *thd, const LEX_CSTRING *name, + List *items); +}; + + +class Create_func_lpad_oracle : public Create_func_lpad +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + { + return create_native_oracle(thd, name, item_list); + } + static Create_func_lpad_oracle s_singleton; +}; + + +class Create_func_ltrim : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_ltrim s_singleton; + +protected: + Create_func_ltrim() = default; + virtual ~Create_func_ltrim() = default; +}; + + +class Create_func_ltrim_oracle : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_ltrim_oracle s_singleton; + +protected: + Create_func_ltrim_oracle() = default; + virtual ~Create_func_ltrim_oracle() = default; +}; + + +class Create_func_makedate : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_makedate s_singleton; + +protected: + Create_func_makedate() = default; + virtual ~Create_func_makedate() = default; +}; + + +class Create_func_maketime : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_maketime s_singleton; + +protected: + Create_func_maketime() = default; + virtual ~Create_func_maketime() = default; +}; + + +class Create_func_make_set : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_make_set s_singleton; + +protected: + Create_func_make_set() = default; + virtual ~Create_func_make_set() = default; +}; + + +class Create_func_master_pos_wait : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_master_pos_wait s_singleton; + +protected: + Create_func_master_pos_wait() = default; + virtual ~Create_func_master_pos_wait() = default; +}; + + +class Create_func_master_gtid_wait : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_master_gtid_wait s_singleton; + +protected: + Create_func_master_gtid_wait() = default; + virtual ~Create_func_master_gtid_wait() = default; +}; + + +class Create_func_md5 : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_md5 s_singleton; + +protected: + Create_func_md5() = default; + virtual ~Create_func_md5() = default; +}; + + +class Create_func_monthname : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_monthname s_singleton; + +protected: + Create_func_monthname() = default; + virtual ~Create_func_monthname() = default; +}; + + +class Create_func_name_const : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_name_const s_singleton; + +protected: + Create_func_name_const() = default; + virtual ~Create_func_name_const() = default; +}; + +class Create_func_natural_sort_key : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1) override; + static Create_func_natural_sort_key s_singleton; +protected: + Create_func_natural_sort_key() = default; + virtual ~Create_func_natural_sort_key() = default; +}; + +class Create_func_nullif : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_nullif s_singleton; + +protected: + Create_func_nullif() = default; + virtual ~Create_func_nullif() = default; +}; + + +class Create_func_oct : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_oct s_singleton; + +protected: + Create_func_oct() = default; + virtual ~Create_func_oct() = default; +}; + + +class Create_func_ord : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_ord s_singleton; + +protected: + Create_func_ord() = default; + virtual ~Create_func_ord() = default; +}; + + +class Create_func_period_add : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_period_add s_singleton; + +protected: + Create_func_period_add() = default; + virtual ~Create_func_period_add() = default; +}; + + +class Create_func_period_diff : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_period_diff s_singleton; + +protected: + Create_func_period_diff() = default; + virtual ~Create_func_period_diff() = default; +}; + + +class Create_func_pi : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_pi s_singleton; + +protected: + Create_func_pi() = default; + virtual ~Create_func_pi() = default; +}; + + +class Create_func_pow : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_pow s_singleton; + +protected: + Create_func_pow() = default; + virtual ~Create_func_pow() = default; +}; + + +class Create_func_quote : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_quote s_singleton; + +protected: + Create_func_quote() = default; + virtual ~Create_func_quote() = default; +}; + + +class Create_func_regexp_instr : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_regexp_instr s_singleton; + +protected: + Create_func_regexp_instr() = default; + virtual ~Create_func_regexp_instr() = default; +}; + + +class Create_func_regexp_replace : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_regexp_replace s_singleton; + +protected: + Create_func_regexp_replace() = default; + virtual ~Create_func_regexp_replace() = default; +}; + + +class Create_func_regexp_substr : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_regexp_substr s_singleton; + +protected: + Create_func_regexp_substr() = default; + virtual ~Create_func_regexp_substr() = default; +}; + + +class Create_func_radians : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_radians s_singleton; + +protected: + Create_func_radians() = default; + virtual ~Create_func_radians() = default; +}; + + +class Create_func_rand : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_rand s_singleton; + +protected: + Create_func_rand() = default; + virtual ~Create_func_rand() = default; +}; + + +class Create_func_random_bytes : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_random_bytes s_singleton; + +protected: + Create_func_random_bytes() {} + virtual ~Create_func_random_bytes() {} +}; + + +class Create_func_release_all_locks : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_release_all_locks s_singleton; +}; + + +class Create_func_release_lock : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_release_lock s_singleton; + +protected: + Create_func_release_lock() = default; + virtual ~Create_func_release_lock() = default; +}; + + +class Create_func_replace_oracle : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_replace_oracle s_singleton; + +protected: + Create_func_replace_oracle() = default; + virtual ~Create_func_replace_oracle() = default; +}; + + +class Create_func_reverse : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_reverse s_singleton; + +protected: + Create_func_reverse() = default; + virtual ~Create_func_reverse() = default; +}; + + +class Create_func_round : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_round s_singleton; + +protected: + Create_func_round() = default; + virtual ~Create_func_round() = default; +}; + + +class Create_func_rpad : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + { + return thd->variables.sql_mode & MODE_ORACLE ? + create_native_oracle(thd, name, item_list) : + create_native_std(thd, name, item_list); + } + static Create_func_rpad s_singleton; + +protected: + Create_func_rpad() = default; + virtual ~Create_func_rpad() = default; + Item *create_native_std(THD *thd, const LEX_CSTRING *name, + List *items); + Item *create_native_oracle(THD *thd, const LEX_CSTRING *name, + List *items); +}; + + +class Create_func_rpad_oracle : public Create_func_rpad +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + { + return create_native_oracle(thd, name, item_list); + } + static Create_func_rpad_oracle s_singleton; +}; + + +class Create_func_rtrim : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_rtrim s_singleton; + +protected: + Create_func_rtrim() = default; + virtual ~Create_func_rtrim() = default; +}; + + +class Create_func_rtrim_oracle : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_rtrim_oracle s_singleton; + +protected: + Create_func_rtrim_oracle() = default; + virtual ~Create_func_rtrim_oracle() = default; +}; + + +class Create_func_sec_to_time : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_sec_to_time s_singleton; + +protected: + Create_func_sec_to_time() = default; + virtual ~Create_func_sec_to_time() = default; +}; + +class Create_func_sformat : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + static Create_func_sformat s_singleton; +protected: + Create_func_sformat() = default; + virtual ~Create_func_sformat() = default; +}; + +class Create_func_sha : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_sha s_singleton; + +protected: + Create_func_sha() = default; + virtual ~Create_func_sha() = default; +}; + + +class Create_func_sha2 : public Create_func_arg2 +{ +public: + virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_sha2 s_singleton; + +protected: + Create_func_sha2() = default; + virtual ~Create_func_sha2() = default; +}; + + +class Create_func_sign : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_sign s_singleton; + +protected: + Create_func_sign() = default; + virtual ~Create_func_sign() = default; +}; + + +class Create_func_sin : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_sin s_singleton; + +protected: + Create_func_sin() = default; + virtual ~Create_func_sin() = default; +}; + + +class Create_func_sleep : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_sleep s_singleton; + +protected: + Create_func_sleep() = default; + virtual ~Create_func_sleep() = default; +}; + + +class Create_func_soundex : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_soundex s_singleton; + +protected: + Create_func_soundex() = default; + virtual ~Create_func_soundex() = default; +}; + + +class Create_func_space : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_space s_singleton; + +protected: + Create_func_space() = default; + virtual ~Create_func_space() = default; +}; + + +class Create_func_sqrt : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_sqrt s_singleton; + +protected: + Create_func_sqrt() = default; + virtual ~Create_func_sqrt() = default; +}; + + +class Create_func_str_to_date : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_str_to_date s_singleton; + +protected: + Create_func_str_to_date() = default; + virtual ~Create_func_str_to_date() = default; +}; + + +class Create_func_strcmp : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_strcmp s_singleton; + +protected: + Create_func_strcmp() = default; + virtual ~Create_func_strcmp() = default; +}; + + +class Create_func_substr_index : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_substr_index s_singleton; + +protected: + Create_func_substr_index() = default; + virtual ~Create_func_substr_index() = default; +}; + + +class Create_func_substr_oracle : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_substr_oracle s_singleton; + +protected: + Create_func_substr_oracle() = default; + virtual ~Create_func_substr_oracle() = default; +}; + + +class Create_func_subtime : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_subtime s_singleton; + +protected: + Create_func_subtime() = default; + virtual ~Create_func_subtime() = default; +}; + + +class Create_func_tan : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_tan s_singleton; + +protected: + Create_func_tan() = default; + virtual ~Create_func_tan() = default; +}; + + +class Create_func_time_format : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_time_format s_singleton; + +protected: + Create_func_time_format() = default; + virtual ~Create_func_time_format() = default; +}; + + +class Create_func_time_to_sec : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_time_to_sec s_singleton; + +protected: + Create_func_time_to_sec() = default; + virtual ~Create_func_time_to_sec() = default; +}; + + +class Create_func_timediff : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_timediff s_singleton; + +protected: + Create_func_timediff() = default; + virtual ~Create_func_timediff() = default; +}; + + +class Create_func_to_base64 : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_to_base64 s_singleton; + +protected: + Create_func_to_base64() = default; + virtual ~Create_func_to_base64() = default; +}; + + +class Create_func_to_char : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + static Create_func_to_char s_singleton; + +protected: + Create_func_to_char() = default; + virtual ~Create_func_to_char() = default; +}; + + +class Create_func_to_days : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_to_days s_singleton; + +protected: + Create_func_to_days() = default; + virtual ~Create_func_to_days() = default; +}; + +class Create_func_to_seconds : public Create_func_arg1 +{ +public: + virtual Item* create_1_arg(THD *thd, Item *arg1); + + static Create_func_to_seconds s_singleton; + +protected: + Create_func_to_seconds() = default; + virtual ~Create_func_to_seconds() = default; +}; + + +class Create_func_ucase : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_ucase s_singleton; + +protected: + Create_func_ucase() = default; + virtual ~Create_func_ucase() = default; +}; + + +class Create_func_uncompress : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_uncompress s_singleton; + +protected: + Create_func_uncompress() = default; + virtual ~Create_func_uncompress() = default; +}; + + +class Create_func_uncompressed_length : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_uncompressed_length s_singleton; + +protected: + Create_func_uncompressed_length() = default; + virtual ~Create_func_uncompressed_length() = default; +}; + + +class Create_func_unhex : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_unhex s_singleton; + +protected: + Create_func_unhex() = default; + virtual ~Create_func_unhex() = default; +}; + + +class Create_func_unix_timestamp : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_unix_timestamp s_singleton; + +protected: + Create_func_unix_timestamp() = default; + virtual ~Create_func_unix_timestamp() = default; +}; + + +class Create_func_uuid_short : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_uuid_short s_singleton; + +protected: + Create_func_uuid_short() = default; + virtual ~Create_func_uuid_short() = default; +}; + + +class Create_func_version : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_version s_singleton; + +protected: + Create_func_version() = default; + virtual ~Create_func_version() = default; +}; + + +class Create_func_weekday : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_weekday s_singleton; + +protected: + Create_func_weekday() = default; + virtual ~Create_func_weekday() = default; +}; + + +class Create_func_weekofyear : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_weekofyear s_singleton; + +protected: + Create_func_weekofyear() = default; + virtual ~Create_func_weekofyear() = default; +}; + + +#ifdef WITH_WSREP +class Create_func_wsrep_last_written_gtid : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_wsrep_last_written_gtid s_singleton; + +protected: + Create_func_wsrep_last_written_gtid() = default; + virtual ~Create_func_wsrep_last_written_gtid() = default; +}; + + +class Create_func_wsrep_last_seen_gtid : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_wsrep_last_seen_gtid s_singleton; + +protected: + Create_func_wsrep_last_seen_gtid() = default; + virtual ~Create_func_wsrep_last_seen_gtid() = default; +}; + + +class Create_func_wsrep_sync_wait_upto : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + static Create_func_wsrep_sync_wait_upto s_singleton; + +protected: + Create_func_wsrep_sync_wait_upto() = default; + virtual ~Create_func_wsrep_sync_wait_upto() = default; +}; +#endif /* WITH_WSREP */ + + +class Create_func_xml_extractvalue : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_xml_extractvalue s_singleton; + +protected: + Create_func_xml_extractvalue() = default; + virtual ~Create_func_xml_extractvalue() = default; +}; + + +class Create_func_xml_update : public Create_func_arg3 +{ +public: + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3); + + static Create_func_xml_update s_singleton; + +protected: + Create_func_xml_update() = default; + virtual ~Create_func_xml_update() = default; +}; + + +class Create_func_year_week : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list); + + static Create_func_year_week s_singleton; + +protected: + Create_func_year_week() = default; + virtual ~Create_func_year_week() = default; +}; + + +/* +============================================================================= + IMPLEMENTATION +============================================================================= +*/ + +/** + Checks if there are named parameters in a parameter list. + The syntax to name parameters in a function call is as follow: + foo(expr AS named, expr named, expr AS "named", expr "named") + @param params The parameter list, can be null + @return true if one or more parameter is named +*/ +static bool has_named_parameters(List *params) +{ + if (params) + { + Item *param; + List_iterator it(*params); + while ((param= it++)) + { + if (param->is_explicit_name()) + return true; + } + } + + return false; +} + + +Item* +Create_qfunc::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + LEX_CSTRING db; + + if (unlikely(! thd->db.str && ! thd->lex->sphead)) + { + /* + The proper error message should be in the lines of: + Can't resolve () to a function call, + because this function: + - is not a native function, + - is not a user defined function, + - can not match a qualified (read: stored) function + since no database is selected. + Reusing ER_SP_DOES_NOT_EXIST have a message consistent with + the case when a default database exist, see Create_sp_func::create(). + */ + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), + "FUNCTION", name->str); + return NULL; + } + + if (thd->lex->copy_db_to(&db)) + return NULL; + + return create_with_db(thd, &db, name, false, item_list); +} + + +#ifdef HAVE_DLOPEN +Create_udf_func Create_udf_func::s_singleton; + +Item* +Create_udf_func::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + udf_func *udf= find_udf(name->str, name->length); + DBUG_ASSERT(udf); + return create(thd, udf, item_list); +} + + +Item* +Create_udf_func::create(THD *thd, udf_func *udf, List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + DBUG_ENTER("Create_udf_func::create"); + if (item_list != NULL) + arg_count= item_list->elements; + + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_UDF); + + DBUG_ASSERT( (udf->type == UDFTYPE_FUNCTION) + || (udf->type == UDFTYPE_AGGREGATE)); + + switch(udf->returns) { + case STRING_RESULT: + { + if (udf->type == UDFTYPE_FUNCTION) + { + if (arg_count) + func= new (thd->mem_root) Item_func_udf_str(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_func_udf_str(thd, udf); + } + else + { + if (arg_count) + func= new (thd->mem_root) Item_sum_udf_str(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_sum_udf_str(thd, udf); + } + break; + } + case REAL_RESULT: + { + if (udf->type == UDFTYPE_FUNCTION) + { + if (arg_count) + func= new (thd->mem_root) Item_func_udf_float(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_func_udf_float(thd, udf); + } + else + { + if (arg_count) + func= new (thd->mem_root) Item_sum_udf_float(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_sum_udf_float(thd, udf); + } + break; + } + case INT_RESULT: + { + if (udf->type == UDFTYPE_FUNCTION) + { + if (arg_count) + func= new (thd->mem_root) Item_func_udf_int(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_func_udf_int(thd, udf); + } + else + { + if (arg_count) + func= new (thd->mem_root) Item_sum_udf_int(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_sum_udf_int(thd, udf); + } + break; + } + case DECIMAL_RESULT: + { + if (udf->type == UDFTYPE_FUNCTION) + { + if (arg_count) + func= new (thd->mem_root) Item_func_udf_decimal(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_func_udf_decimal(thd, udf); + } + else + { + if (arg_count) + func= new (thd->mem_root) Item_sum_udf_decimal(thd, udf, *item_list); + else + func= new (thd->mem_root) Item_sum_udf_decimal(thd, udf); + } + break; + } + default: + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "UDF return type"); + } + } + thd->lex->safe_to_cache_query= 0; + DBUG_RETURN(func); +} +#endif + + +Create_sp_func Create_sp_func::s_singleton; + +Item* +Create_sp_func::create_with_db(THD *thd, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + bool use_explicit_name, List *item_list) +{ + int arg_count= 0; + Item *func= NULL; + LEX *lex= thd->lex; + sp_name *qname; + const Sp_handler *sph= &sp_handler_function; + Database_qualified_name pkgname(&null_clex_str, &null_clex_str); + + if (unlikely(has_named_parameters(item_list))) + { + /* + The syntax "db.foo(expr AS p1, expr AS p2, ...) is invalid, + and has been rejected during syntactic parsing already, + because a stored function call may not have named parameters. + + The syntax "foo(expr AS p1, expr AS p2, ...)" is correct, + because it can refer to a User Defined Function call. + For a Stored Function however, this has no semantic. + */ + my_error(ER_WRONG_PARAMETERS_TO_STORED_FCT, MYF(0), name->str); + return NULL; + } + + if (item_list != NULL) + arg_count= item_list->elements; + + qname= new (thd->mem_root) sp_name(db, name, use_explicit_name); + if (unlikely(sph->sp_resolve_package_routine(thd, thd->lex->sphead, + qname, &sph, &pkgname))) + return NULL; + sph->add_used_routine(lex, thd, qname); + if (pkgname.m_name.length) + sp_handler_package_body.add_used_routine(lex, thd, &pkgname); + Name_resolution_context *ctx= lex->current_context(); + if (arg_count > 0) + func= new (thd->mem_root) Item_func_sp(thd, ctx, qname, sph, *item_list); + else + func= new (thd->mem_root) Item_func_sp(thd, ctx, qname, sph); + + lex->safe_to_cache_query= 0; + return func; +} + + +Item* +Create_native_func::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + if (unlikely(has_named_parameters(item_list))) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return create_native(thd, name, item_list); +} + + +Item* +Create_func_arg0::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count != 0)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return create_builder(thd); +} + + +Item* +Create_func_arg1::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list) + arg_count= item_list->elements; + + if (unlikely(arg_count != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + Item *param_1= item_list->pop(); + + if (unlikely(param_1->is_explicit_name())) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return create_1_arg(thd, param_1); +} + + +Item* +Create_func_arg2::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list) + arg_count= item_list->elements; + + if (unlikely(arg_count != 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + + if (unlikely(param_1->is_explicit_name() || + param_2->is_explicit_name())) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return create_2_arg(thd, param_1, param_2); +} + + +Item* +Create_func_arg3::create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list) + arg_count= item_list->elements; + + if (unlikely(arg_count != 3)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + + if (unlikely(param_1->is_explicit_name() || + param_2->is_explicit_name() || + param_3->is_explicit_name())) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return create_3_arg(thd, param_1, param_2, param_3); +} + + +Create_func_abs Create_func_abs::s_singleton; + +Item* +Create_func_abs::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_abs(thd, arg1); +} + + +Create_func_acos Create_func_acos::s_singleton; + +Item* +Create_func_acos::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_acos(thd, arg1); +} + + +Create_func_addtime Create_func_addtime::s_singleton; + +Item* +Create_func_addtime::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_add_time(thd, arg1, arg2, false); +} + + +Create_func_aes_encrypt Create_func_aes_encrypt::s_singleton; + +Item* +Create_func_aes_encrypt::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_aes_encrypt(thd, arg1, arg2); +} + + +Create_func_aes_decrypt Create_func_aes_decrypt::s_singleton; + +Item* +Create_func_aes_decrypt::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_aes_decrypt(thd, arg1, arg2); +} + + +Create_func_asin Create_func_asin::s_singleton; + +Item* +Create_func_asin::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_asin(thd, arg1); +} + + +Create_func_atan Create_func_atan::s_singleton; + +Item* +Create_func_atan::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item* func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_atan(thd, param_1); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_atan(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_benchmark Create_func_benchmark::s_singleton; + +Item* +Create_func_benchmark::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_benchmark(thd, arg1, arg2); +} + + +Create_func_bin Create_func_bin::s_singleton; + +Item* +Create_func_bin::create_1_arg(THD *thd, Item *arg1) +{ + Item *i10= new (thd->mem_root) Item_int(thd, (int32) 10,2); + Item *i2= new (thd->mem_root) Item_int(thd, (int32) 2,1); + return new (thd->mem_root) Item_func_conv(thd, arg1, i10, i2); +} + + +Create_func_binlog_gtid_pos Create_func_binlog_gtid_pos::s_singleton; + +Item* +Create_func_binlog_gtid_pos::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ +#ifdef HAVE_REPLICATION + if (unlikely(!mysql_bin_log.is_open())) +#endif + { + my_error(ER_NO_BINARY_LOGGING, MYF(0)); + return NULL; + } + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + return new (thd->mem_root) Item_func_binlog_gtid_pos(thd, arg1, arg2); +} + + +Create_func_bit_count Create_func_bit_count::s_singleton; + +Item* +Create_func_bit_count::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_bit_count(thd, arg1); +} + + +Create_func_bit_length Create_func_bit_length::s_singleton; + +Item* +Create_func_bit_length::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_bit_length(thd, arg1); +} + + +Create_func_ceiling Create_func_ceiling::s_singleton; + +Item* +Create_func_ceiling::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_ceiling(thd, arg1); +} + + +Create_func_chr Create_func_chr::s_singleton; + +Item* +Create_func_chr::create_1_arg(THD *thd, Item *arg1) +{ + CHARSET_INFO *cs_db= thd->variables.collation_database; + return new (thd->mem_root) Item_func_chr(thd, arg1, cs_db); +} + + +Create_func_char_length Create_func_char_length::s_singleton; + +Item* +Create_func_char_length::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_char_length(thd, arg1); +} + + +Create_func_coercibility Create_func_coercibility::s_singleton; + +Item* +Create_func_coercibility::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_coercibility(thd, arg1); +} + + +Create_func_dyncol_check Create_func_dyncol_check::s_singleton; + +Item* +Create_func_dyncol_check::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_dyncol_check(thd, arg1); +} + +Create_func_dyncol_exists Create_func_dyncol_exists::s_singleton; + +Item* +Create_func_dyncol_exists::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_dyncol_exists(thd, arg1, arg2); +} + +Create_func_dyncol_list Create_func_dyncol_list::s_singleton; + +Item* +Create_func_dyncol_list::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_dyncol_list(thd, arg1); +} + +Create_func_dyncol_json Create_func_dyncol_json::s_singleton; + +Item* +Create_func_dyncol_json::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_dyncol_json(thd, arg1); +} + +Create_func_concat Create_func_concat::s_singleton; + +Item* +Create_func_concat::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return thd->variables.sql_mode & MODE_ORACLE ? + new (thd->mem_root) Item_func_concat_operator_oracle(thd, *item_list) : + new (thd->mem_root) Item_func_concat(thd, *item_list); +} + +Create_func_concat_operator_oracle + Create_func_concat_operator_oracle::s_singleton; + +Item* +Create_func_concat_operator_oracle::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_concat_operator_oracle(thd, *item_list); +} + +Create_func_decode_histogram Create_func_decode_histogram::s_singleton; + +Item * +Create_func_decode_histogram::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_decode_histogram(thd, arg1, arg2); +} + +Create_func_decode Create_func_decode::s_singleton; + +Create_func_decode_oracle Create_func_decode_oracle::s_singleton; + +Create_func_concat_ws Create_func_concat_ws::s_singleton; + +Item* +Create_func_concat_ws::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + /* "WS" stands for "With Separator": this function takes 2+ arguments */ + if (unlikely(arg_count < 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_concat_ws(thd, *item_list); +} + + +Create_func_compress Create_func_compress::s_singleton; + +Item* +Create_func_compress::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_compress(thd, arg1); +} + + +Create_func_connection_id Create_func_connection_id::s_singleton; + +Item* +Create_func_connection_id::create_builder(THD *thd) +{ + thd->lex->safe_to_cache_query= 0; + return new (thd->mem_root) Item_func_connection_id(thd); +} + + +Create_func_nvl2 Create_func_nvl2::s_singleton; + +Item* +Create_func_nvl2::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_nvl2(thd, arg1, arg2, arg3); +} + + +Create_func_conv Create_func_conv::s_singleton; + +Item* +Create_func_conv::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_conv(thd, arg1, arg2, arg3); +} + + +Create_func_convert_tz Create_func_convert_tz::s_singleton; + +Item* +Create_func_convert_tz::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_convert_tz(thd, arg1, arg2, arg3); +} + + +Create_func_cos Create_func_cos::s_singleton; + +Item* +Create_func_cos::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_cos(thd, arg1); +} + + +Create_func_cot Create_func_cot::s_singleton; + +Item* +Create_func_cot::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_cot(thd, arg1); +} + + +Create_func_crc32 Create_func_crc32::s_singleton; + +Item* +Create_func_crc32::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + + if (unlikely(argc != 1 && argc != 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(), *arg2= argc < 2 ? nullptr : item_list->pop(); + + /* This was checked in Create_native_func::create_func() */ + DBUG_ASSERT(!arg1->is_explicit_name()); + DBUG_ASSERT(!arg2 || !arg2->is_explicit_name()); + + return arg2 + ? new (thd->mem_root) Item_func_crc32(thd, false, arg1, arg2) + : new (thd->mem_root) Item_func_crc32(thd, false, arg1); +} + + +Create_func_crc32c Create_func_crc32c::s_singleton; + +Item* +Create_func_crc32c::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + + if (unlikely(argc != 1 && argc != 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(), *arg2= argc < 2 ? nullptr : item_list->pop(); + + /* This was checked in Create_native_func::create_func() */ + DBUG_ASSERT(!arg1->is_explicit_name()); + DBUG_ASSERT(!arg2 || !arg2->is_explicit_name()); + + return arg2 + ? new (thd->mem_root) Item_func_crc32(thd, true, arg1, arg2) + : new (thd->mem_root) Item_func_crc32(thd, true, arg1); +} + + +Create_func_datediff Create_func_datediff::s_singleton; + +Item* +Create_func_datediff::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + Item *i1= new (thd->mem_root) Item_func_to_days(thd, arg1); + Item *i2= new (thd->mem_root) Item_func_to_days(thd, arg2); + + return new (thd->mem_root) Item_func_minus(thd, i1, i2); +} + + +Create_func_dayname Create_func_dayname::s_singleton; + +Item* +Create_func_dayname::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_dayname(thd, arg1); +} + + +Create_func_dayofmonth Create_func_dayofmonth::s_singleton; + +Item* +Create_func_dayofmonth::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_dayofmonth(thd, arg1); +} + + +Create_func_dayofweek Create_func_dayofweek::s_singleton; + +Item* +Create_func_dayofweek::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_weekday(thd, arg1, 1); +} + + +Create_func_dayofyear Create_func_dayofyear::s_singleton; + +Item* +Create_func_dayofyear::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_dayofyear(thd, arg1); +} + + +Create_func_degrees Create_func_degrees::s_singleton; + +Item* +Create_func_degrees::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_units(thd, (char*) "degrees", arg1, + 180/M_PI, 0.0); +} + + +Create_func_des_decrypt Create_func_des_decrypt::s_singleton; + +Item* +Create_func_des_decrypt::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_des_decrypt(thd, param_1); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_des_decrypt(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_des_encrypt Create_func_des_encrypt::s_singleton; + +Item* +Create_func_des_encrypt::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_des_encrypt(thd, param_1); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_des_encrypt(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_elt Create_func_elt::s_singleton; + +Item* +Create_func_elt::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_elt(thd, *item_list); +} + + +Create_func_encode Create_func_encode::s_singleton; + +Item* +Create_func_encode::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_encode(thd, arg1, arg2); +} + + +Create_func_encrypt Create_func_encrypt::s_singleton; + +Item* +Create_func_encrypt::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_encrypt(thd, param_1); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_encrypt(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_exp Create_func_exp::s_singleton; + +Item* +Create_func_exp::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_exp(thd, arg1); +} + + +Create_func_export_set Create_func_export_set::s_singleton; + +Item* +Create_func_export_set::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + func= new (thd->mem_root) Item_func_export_set(thd, param_1, param_2, param_3); + break; + } + case 4: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + Item *param_4= item_list->pop(); + func= new (thd->mem_root) Item_func_export_set(thd, param_1, param_2, param_3, + param_4); + break; + } + case 5: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + Item *param_4= item_list->pop(); + Item *param_5= item_list->pop(); + func= new (thd->mem_root) Item_func_export_set(thd, param_1, param_2, param_3, + param_4, param_5); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_field Create_func_field::s_singleton; + +Item* +Create_func_field::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_field(thd, *item_list); +} + + +Create_func_find_in_set Create_func_find_in_set::s_singleton; + +Item* +Create_func_find_in_set::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_find_in_set(thd, arg1, arg2); +} + + +Create_func_floor Create_func_floor::s_singleton; + +Item* +Create_func_floor::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_floor(thd, arg1); +} + + +Create_func_format Create_func_format::s_singleton; + +Item* +Create_func_format::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= item_list ? item_list->elements : 0; + + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_format(thd, param_1, param_2); + break; + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + func= new (thd->mem_root) Item_func_format(thd, param_1, param_2, param_3); + break; + } + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + + return func; +} + + +Create_func_from_base64 Create_func_from_base64::s_singleton; + + +Item * +Create_func_from_base64::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_from_base64(thd, arg1); +} + + +Create_func_found_rows Create_func_found_rows::s_singleton; + +Item* +Create_func_found_rows::create_builder(THD *thd) +{ + DBUG_ENTER("Create_func_found_rows::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->safe_to_cache_query= 0; + DBUG_RETURN(new (thd->mem_root) Item_func_found_rows(thd)); +} + + +Create_func_from_days Create_func_from_days::s_singleton; + +Item* +Create_func_from_days::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_from_days(thd, arg1); +} + + +Create_func_from_unixtime Create_func_from_unixtime::s_singleton; + +Item* +Create_func_from_unixtime::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_from_unixtime(thd, param_1); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *ut= new (thd->mem_root) Item_func_from_unixtime(thd, param_1); + func= new (thd->mem_root) Item_func_date_format(thd, ut, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + + +Create_func_get_lock Create_func_get_lock::s_singleton; + +Item* +Create_func_get_lock::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_get_lock(thd, arg1, arg2); +} + + +Create_func_greatest Create_func_greatest::s_singleton; + +Item* +Create_func_greatest::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_max(thd, *item_list); +} + + +Create_func_hex Create_func_hex::s_singleton; + +Item* +Create_func_hex::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_hex(thd, arg1); +} + + +Create_func_ifnull Create_func_ifnull::s_singleton; + +Item* +Create_func_ifnull::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_ifnull(thd, arg1, arg2); +} + + +Create_func_instr Create_func_instr::s_singleton; + +Item* +Create_func_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_locate(thd, arg1, arg2); +} + + +Create_func_is_free_lock Create_func_is_free_lock::s_singleton; + +Item* +Create_func_is_free_lock::create_1_arg(THD *thd, Item *arg1) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_is_free_lock(thd, arg1); +} + + +Create_func_is_used_lock Create_func_is_used_lock::s_singleton; + +Item* +Create_func_is_used_lock::create_1_arg(THD *thd, Item *arg1) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_is_used_lock(thd, arg1); +} + + +Create_func_isnull Create_func_isnull::s_singleton; + +Item* +Create_func_isnull::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_isnull(thd, arg1); +} + +Create_func_json_normalize Create_func_json_normalize::s_singleton; + +Item* +Create_func_json_normalize::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_normalize(thd, arg1); +} + + +Create_func_json_equals Create_func_json_equals::s_singleton; + +Item* +Create_func_json_equals::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_equals(thd, arg1, arg2); +} + + +Create_func_json_exists Create_func_json_exists::s_singleton; + +Item* +Create_func_json_exists::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_exists(thd, arg1, arg2); +} + + +Create_func_json_detailed Create_func_json_detailed::s_singleton; + +Item* +Create_func_json_detailed::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 1 || arg_count > 2 /* json_doc, [path]...*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_format(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_loose Create_func_json_loose::s_singleton; + +Item* +Create_func_json_loose::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_format(thd, arg1, + Item_func_json_format::LOOSE); +} + + +Create_func_json_compact Create_func_json_compact::s_singleton; + +Item* +Create_func_json_compact::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_format(thd, arg1, + Item_func_json_format::COMPACT); +} + + +Create_func_json_valid Create_func_json_valid::s_singleton; + +Item* +Create_func_json_valid::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_valid(thd, arg1); +} + + +Create_func_json_type Create_func_json_type::s_singleton; + +Item* +Create_func_json_type::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_type(thd, arg1); +} + + +Create_func_json_depth Create_func_json_depth::s_singleton; + +Item* +Create_func_json_depth::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_depth(thd, arg1); +} + + +Create_func_json_value Create_func_json_value::s_singleton; + +Item* +Create_func_json_value::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_value(thd, arg1, arg2); +} + + +Create_func_json_query Create_func_json_query::s_singleton; + +Item* +Create_func_json_query::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_query(thd, arg1, arg2); +} + + +Create_func_json_quote Create_func_json_quote::s_singleton; + +Item* +Create_func_json_quote::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_quote(thd, arg1); +} + + +Create_func_json_unquote Create_func_json_unquote::s_singleton; + +Item* +Create_func_json_unquote::create_1_arg(THD *thd, Item *arg1) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_unquote(thd, arg1); +} + + +Create_func_last_day Create_func_last_day::s_singleton; + +Item* +Create_func_last_day::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_last_day(thd, arg1); +} + + +Create_func_json_array Create_func_json_array::s_singleton; + +Item* +Create_func_json_array::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func; + + if (item_list != NULL) + { + func= new (thd->mem_root) Item_func_json_array(thd, *item_list); + } + else + { + func= new (thd->mem_root) Item_func_json_array(thd); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_array_append Create_func_json_array_append::s_singleton; + +Item* +Create_func_json_array_append::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 || (arg_count & 1) == 0 /*is even*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_array_append(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_array_insert Create_func_json_array_insert::s_singleton; + +Item* +Create_func_json_array_insert::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 || (arg_count & 1) == 0 /*is even*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_array_insert(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_insert Create_func_json_insert::s_singleton; + +Item* +Create_func_json_insert::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 || (arg_count & 1) == 0 /*is even*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_insert(true, false, + thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_set Create_func_json_set::s_singleton; + +Item* +Create_func_json_set::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 || (arg_count & 1) == 0 /*is even*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_insert(true, true, + thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_replace Create_func_json_replace::s_singleton; + +Item* +Create_func_json_replace::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 || (arg_count & 1) == 0 /*is even*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_insert(false, true, + thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_remove Create_func_json_remove::s_singleton; + +Item* +Create_func_json_remove::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2 /*json_doc, path [,path]*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_remove(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_object Create_func_json_object::s_singleton; + +Item* +Create_func_json_object::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func; + int arg_count; + + if (item_list != NULL) + { + arg_count= item_list->elements; + if (unlikely((arg_count & 1) != 0 /*is odd*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_object(thd, *item_list); + } + } + else + { + arg_count= 0; + func= new (thd->mem_root) Item_func_json_object(thd); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_length Create_func_json_length::s_singleton; + +Item* +Create_func_json_length::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func; + int arg_count; + + if (unlikely(item_list == NULL || + (arg_count= item_list->elements) == 0)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_length(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_merge Create_func_json_merge::s_singleton; + +Item* +Create_func_json_merge::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func; + int arg_count; + + if (unlikely(item_list == NULL || + (arg_count= item_list->elements) < 2)) // json, json + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_merge(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_merge_patch Create_func_json_merge_patch::s_singleton; + +Item* +Create_func_json_merge_patch::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func; + int arg_count; + + if (item_list == NULL || + (arg_count= item_list->elements) < 2) // json, json + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_merge_patch(thd, *item_list); + } + + return func; +} + + +Create_func_json_contains Create_func_json_contains::s_singleton; + +Item* +Create_func_json_contains::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count == 2 || arg_count == 3/* json_doc, val, [path] */)) + { + func= new (thd->mem_root) Item_func_json_contains(thd, *item_list); + } + else + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_keys Create_func_json_keys::s_singleton; + +Item* +Create_func_json_keys::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 1 || arg_count > 2 /* json_doc, [path]...*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_keys(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_contains_path Create_func_json_contains_path::s_singleton; + +Item* +Create_func_json_contains_path::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 /* json_doc, one_or_all, path, [path]...*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_contains_path(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_extract Create_func_json_extract::s_singleton; + +Item* +Create_func_json_extract::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2 /* json_doc, path, [path]...*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_extract(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_search Create_func_json_search::s_singleton; + +Item* +Create_func_json_search::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 3 /* json_doc, one_or_all, search_str, [escape_char[, path]...*/)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + } + else + { + func= new (thd->mem_root) Item_func_json_search(thd, *item_list); + } + + status_var_increment(thd->status_var.feature_json); + return func; +} + + +Create_func_json_overlaps Create_func_json_overlaps::s_singleton; + +Item* +Create_func_json_overlaps::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + status_var_increment(thd->status_var.feature_json); + return new (thd->mem_root) Item_func_json_overlaps(thd, arg1, arg2); +} + + +Create_func_last_insert_id Create_func_last_insert_id::s_singleton; + +Item* +Create_func_last_insert_id::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 0: + { + func= new (thd->mem_root) Item_func_last_insert_id(thd); + thd->lex->safe_to_cache_query= 0; + break; + } + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_last_insert_id(thd, param_1); + thd->lex->safe_to_cache_query= 0; + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_lcase Create_func_lcase::s_singleton; + +Item* +Create_func_lcase::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_lcase(thd, arg1); +} + + +Create_func_least Create_func_least::s_singleton; + +Item* +Create_func_least::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_min(thd, *item_list); +} + + +Create_func_length Create_func_length::s_singleton; + +Item* +Create_func_length::create_1_arg(THD *thd, Item *arg1) +{ + if (thd->variables.sql_mode & MODE_ORACLE) + return new (thd->mem_root) Item_func_char_length(thd, arg1); + else + return new (thd->mem_root) Item_func_octet_length(thd, arg1); +} + +Create_func_octet_length Create_func_octet_length::s_singleton; + +Item* +Create_func_octet_length::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_octet_length(thd, arg1); +} + + +#ifndef DBUG_OFF +Create_func_like_range_min Create_func_like_range_min::s_singleton; + +Item* +Create_func_like_range_min::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_like_range_min(thd, arg1, arg2); +} + + +Create_func_like_range_max Create_func_like_range_max::s_singleton; + +Item* +Create_func_like_range_max::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_like_range_max(thd, arg1, arg2); +} +#endif + + +Create_func_ln Create_func_ln::s_singleton; + +Item* +Create_func_ln::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_ln(thd, arg1); +} + + +Create_func_load_file Create_func_load_file::s_singleton; + +Item* +Create_func_load_file::create_1_arg(THD *thd, Item *arg1) +{ + DBUG_ENTER("Create_func_load_file::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + DBUG_RETURN(new (thd->mem_root) Item_load_file(thd, arg1)); +} + + +Create_func_locate Create_func_locate::s_singleton; + +Item* +Create_func_locate::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + /* Yes, parameters in that order : 2, 1 */ + func= new (thd->mem_root) Item_func_locate(thd, param_2, param_1); + break; + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + /* Yes, parameters in that order : 2, 1, 3 */ + func= new (thd->mem_root) Item_func_locate(thd, param_2, param_1, param_3); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_log Create_func_log::s_singleton; + +Item* +Create_func_log::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_log(thd, param_1); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_log(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_log10 Create_func_log10::s_singleton; + +Item* +Create_func_log10::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_log10(thd, arg1); +} + + +Create_func_log2 Create_func_log2::s_singleton; + +Item* +Create_func_log2::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_log2(thd, arg1); +} + + +Create_func_lpad Create_func_lpad::s_singleton; + +Create_func_lpad_oracle Create_func_lpad_oracle::s_singleton; + +Item* +Create_func_lpad::create_native_std(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= item_list ? item_list->elements : 0; + + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_lpad(thd, param_1, param_2); + break; + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + func= new (thd->mem_root) Item_func_lpad(thd, param_1, param_2, param_3); + break; + } + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + + return func; +} + + +Item* +Create_func_lpad::create_native_oracle(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= item_list ? item_list->elements : 0; + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + return new (thd->mem_root) Item_func_lpad_oracle(thd, param_1, param_2); + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + return new (thd->mem_root) Item_func_lpad_oracle(thd, param_1, + param_2, param_3); + } + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + return NULL; +} + + +Create_func_ltrim Create_func_ltrim::s_singleton; + +Item* +Create_func_ltrim::create_1_arg(THD *thd, Item *arg1) +{ + return Lex_trim(TRIM_LEADING, arg1).make_item_func_trim(thd); +} + + +Create_func_ltrim_oracle Create_func_ltrim_oracle::s_singleton; + +Item* +Create_func_ltrim_oracle::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_ltrim_oracle(thd, arg1); +} + + +Create_func_makedate Create_func_makedate::s_singleton; + +Item* +Create_func_makedate::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_makedate(thd, arg1, arg2); +} + + +Create_func_maketime Create_func_maketime::s_singleton; + +Item* +Create_func_maketime::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_maketime(thd, arg1, arg2, arg3); +} + + +Create_func_make_set Create_func_make_set::s_singleton; + +Item* +Create_func_make_set::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_make_set(thd, *item_list); +} + + +Create_func_master_pos_wait Create_func_master_pos_wait::s_singleton; + +Item* +Create_func_master_pos_wait::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) + +{ + Item *func= NULL; + int arg_count= 0; + + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 2 || arg_count > 4)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return func; + } + + thd->lex->safe_to_cache_query= 0; + + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + switch (arg_count) { + case 2: + { + func= new (thd->mem_root) Item_master_pos_wait(thd, param_1, param_2); + break; + } + case 3: + { + Item *param_3= item_list->pop(); + func= new (thd->mem_root) Item_master_pos_wait(thd, param_1, param_2, param_3); + break; + } + case 4: + { + Item *param_3= item_list->pop(); + Item *param_4= item_list->pop(); + func= new (thd->mem_root) Item_master_pos_wait(thd, param_1, param_2, param_3, + param_4); + break; + } + } + + return func; +} + + +Create_func_master_gtid_wait Create_func_master_gtid_wait::s_singleton; + +Item* +Create_func_master_gtid_wait::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 1 || arg_count > 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return func; + } + + thd->lex->safe_to_cache_query= 0; + + Item *param_1= item_list->pop(); + switch (arg_count) { + case 1: + { + func= new (thd->mem_root) Item_master_gtid_wait(thd, param_1); + break; + } + case 2: + { + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_master_gtid_wait(thd, param_1, param_2); + break; + } + } + + return func; +} + + +Create_func_md5 Create_func_md5::s_singleton; + +Item* +Create_func_md5::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_md5(thd, arg1); +} + +Create_func_natural_sort_key Create_func_natural_sort_key::s_singleton; + +Item *Create_func_natural_sort_key::create_1_arg(THD *thd, Item* arg1) +{ + return new (thd->mem_root) Item_func_natural_sort_key(thd, arg1); +} + +Create_func_monthname Create_func_monthname::s_singleton; + +Item* +Create_func_monthname::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_monthname(thd, arg1); +} + + +Create_func_name_const Create_func_name_const::s_singleton; + +Item* +Create_func_name_const::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + if (!arg1->basic_const_item()) + goto err; + + if (arg2->basic_const_item()) + return new (thd->mem_root) Item_name_const(thd, arg1, arg2); + + if (arg2->type() == Item::FUNC_ITEM) + { + Item_func *value_func= (Item_func *) arg2; + if (value_func->functype() != Item_func::COLLATE_FUNC && + value_func->functype() != Item_func::NEG_FUNC) + goto err; + + if (!value_func->key_item()->basic_const_item()) + goto err; + return new (thd->mem_root) Item_name_const(thd, arg1, arg2); + } +err: + my_error(ER_WRONG_ARGUMENTS, MYF(0), "NAME_CONST"); + return NULL; +} + + +Create_func_nullif Create_func_nullif::s_singleton; + +Item* +Create_func_nullif::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_nullif(thd, arg1, arg2); +} + + +Create_func_oct Create_func_oct::s_singleton; + +Item* +Create_func_oct::create_1_arg(THD *thd, Item *arg1) +{ + Item *i10= new (thd->mem_root) Item_int(thd, (int32) 10,2); + Item *i8= new (thd->mem_root) Item_int(thd, (int32) 8,1); + return new (thd->mem_root) Item_func_conv(thd, arg1, i10, i8); +} + + +Create_func_ord Create_func_ord::s_singleton; + +Item* +Create_func_ord::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_ord(thd, arg1); +} + + +Create_func_period_add Create_func_period_add::s_singleton; + +Item* +Create_func_period_add::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_period_add(thd, arg1, arg2); +} + + +Create_func_period_diff Create_func_period_diff::s_singleton; + +Item* +Create_func_period_diff::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_period_diff(thd, arg1, arg2); +} + + +Create_func_pi Create_func_pi::s_singleton; + +Item* +Create_func_pi::create_builder(THD *thd) +{ + return new (thd->mem_root) Item_static_float_func(thd, "pi()", M_PI, 6, 8); +} + + +Create_func_pow Create_func_pow::s_singleton; + +Item* +Create_func_pow::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_pow(thd, arg1, arg2); +} + + +Create_func_quote Create_func_quote::s_singleton; + +Item* +Create_func_quote::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_quote(thd, arg1); +} + + +Create_func_regexp_instr Create_func_regexp_instr::s_singleton; + +Item* +Create_func_regexp_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_regexp_instr(thd, arg1, arg2); +} + + +Create_func_regexp_replace Create_func_regexp_replace::s_singleton; + +Item* +Create_func_regexp_replace::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_regexp_replace(thd, arg1, arg2, arg3); +} + + +Create_func_regexp_substr Create_func_regexp_substr::s_singleton; + +Item* +Create_func_regexp_substr::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_regexp_substr(thd, arg1, arg2); +} + + +Create_func_radians Create_func_radians::s_singleton; + +Item* +Create_func_radians::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_units(thd, (char*) "radians", arg1, + M_PI/180, 0.0); +} + + +Create_func_rand Create_func_rand::s_singleton; + +Item* +Create_func_rand::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + /* + When RAND() is binlogged, the seed is binlogged too. So the + sequence of random numbers is the same on a replication slave as + on the master. However, if several RAND() values are inserted + into a table, the order in which the rows are modified may differ + between master and slave, because the order is undefined. Hence, + the statement is unsafe to log in statement format. + + For normal INSERT's this is howevever safe + */ + if (thd->lex->sql_command != SQLCOM_INSERT) + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + + switch (arg_count) { + case 0: + { + func= new (thd->mem_root) Item_func_rand(thd); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_rand(thd, param_1); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_random_bytes Create_func_random_bytes::s_singleton; + +Item *Create_func_random_bytes::create_1_arg(THD *thd, Item *arg1) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_RAND); + return new (thd->mem_root) Item_func_random_bytes(thd, arg1); +} + + +Create_func_release_all_locks Create_func_release_all_locks::s_singleton; + +Item* +Create_func_release_all_locks::create_builder(THD *thd) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_release_all_locks(thd); +} + + +Create_func_release_lock Create_func_release_lock::s_singleton; + +Item* +Create_func_release_lock::create_1_arg(THD *thd, Item *arg1) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_release_lock(thd, arg1); +} + + +Create_func_replace_oracle Create_func_replace_oracle::s_singleton; + +Item* +Create_func_replace_oracle::create_3_arg(THD *thd, Item *arg1, Item *arg2, + Item *arg3) +{ + return new (thd->mem_root) Item_func_replace_oracle(thd, arg1, arg2, arg3); +} + + +Create_func_reverse Create_func_reverse::s_singleton; + +Item* +Create_func_reverse::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_reverse(thd, arg1); +} + + +Create_func_round Create_func_round::s_singleton; + +Item* +Create_func_round::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + Item *i0= new (thd->mem_root) Item_int(thd, (char*)"0", 0, 1); + func= new (thd->mem_root) Item_func_round(thd, param_1, i0, 0); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_round(thd, param_1, param_2, 0); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_rpad Create_func_rpad::s_singleton; + +Create_func_rpad_oracle Create_func_rpad_oracle::s_singleton; + +Item* +Create_func_rpad::create_native_std(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= item_list ? item_list->elements : 0; + + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_rpad(thd, param_1, param_2); + break; + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + func= new (thd->mem_root) Item_func_rpad(thd, param_1, param_2, param_3); + break; + } + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + + return func; +} + + +Item* +Create_func_rpad::create_native_oracle(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= item_list ? item_list->elements : 0; + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + return new (thd->mem_root) Item_func_rpad_oracle(thd, param_1, param_2); + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + return new (thd->mem_root) Item_func_rpad_oracle(thd, param_1, + param_2, param_3); + } + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + return NULL; +} + + +Create_func_rtrim Create_func_rtrim::s_singleton; + +Item* +Create_func_rtrim::create_1_arg(THD *thd, Item *arg1) +{ + return Lex_trim(TRIM_TRAILING, arg1).make_item_func_trim(thd); +} + + +Create_func_rtrim_oracle Create_func_rtrim_oracle::s_singleton; + +Item* +Create_func_rtrim_oracle::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_rtrim_oracle(thd, arg1); +} + + +Create_func_sec_to_time Create_func_sec_to_time::s_singleton; + +Item* +Create_func_sec_to_time::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_sec_to_time(thd, arg1); +} + +Create_func_sformat Create_func_sformat::s_singleton; + +Item* +Create_func_sformat::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (unlikely(arg_count < 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + + return new (thd->mem_root) Item_func_sformat(thd, *item_list); +} + + +Create_func_sha Create_func_sha::s_singleton; + +Item* +Create_func_sha::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_sha(thd, arg1); +} + + +Create_func_sha2 Create_func_sha2::s_singleton; + +Item* +Create_func_sha2::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_sha2(thd, arg1, arg2); +} + + +Create_func_sign Create_func_sign::s_singleton; + +Item* +Create_func_sign::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_sign(thd, arg1); +} + + +Create_func_sin Create_func_sin::s_singleton; + +Item* +Create_func_sin::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_sin(thd, arg1); +} + + +Create_func_sleep Create_func_sleep::s_singleton; + +Item* +Create_func_sleep::create_1_arg(THD *thd, Item *arg1) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return new (thd->mem_root) Item_func_sleep(thd, arg1); +} + + +Create_func_soundex Create_func_soundex::s_singleton; + +Item* +Create_func_soundex::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_soundex(thd, arg1); +} + + +Create_func_space Create_func_space::s_singleton; + +Item* +Create_func_space::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_space(thd, arg1); +} + + +Create_func_sqrt Create_func_sqrt::s_singleton; + +Item* +Create_func_sqrt::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_sqrt(thd, arg1); +} + + +Create_func_str_to_date Create_func_str_to_date::s_singleton; + +Item* +Create_func_str_to_date::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_str_to_date(thd, arg1, arg2); +} + + +Create_func_strcmp Create_func_strcmp::s_singleton; + +Item* +Create_func_strcmp::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_strcmp(thd, arg1, arg2); +} + + +Create_func_substr_index Create_func_substr_index::s_singleton; + +Item* +Create_func_substr_index::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_substr_index(thd, arg1, arg2, arg3); +} + + +Create_func_substr_oracle Create_func_substr_oracle::s_singleton; + +Item* +Create_func_substr_oracle::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= item_list ? item_list->elements : 0; + + switch (arg_count) { + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_substr_oracle(thd, param_1, param_2); + break; + } + case 3: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + Item *param_3= item_list->pop(); + func= new (thd->mem_root) Item_func_substr_oracle(thd, param_1, param_2, param_3); + break; + } + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + + return func; +} + + +Create_func_subtime Create_func_subtime::s_singleton; + +Item* +Create_func_subtime::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_add_time(thd, arg1, arg2, true); +} + + +Create_func_tan Create_func_tan::s_singleton; + +Item* +Create_func_tan::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_tan(thd, arg1); +} + + +Create_func_time_format Create_func_time_format::s_singleton; + +Item* +Create_func_time_format::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_time_format(thd, arg1, arg2); +} + + +Create_func_time_to_sec Create_func_time_to_sec::s_singleton; + +Item* +Create_func_time_to_sec::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_time_to_sec(thd, arg1); +} + + +Create_func_timediff Create_func_timediff::s_singleton; + +Item* +Create_func_timediff::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_timediff(thd, arg1, arg2); +} + + +Create_func_to_base64 Create_func_to_base64::s_singleton; + +Item* +Create_func_to_base64::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_to_base64(thd, arg1); +} + + +Create_func_to_char Create_func_to_char::s_singleton; + +Item* +Create_func_to_char::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + Item *i0= new (thd->mem_root) Item_string_sys(thd, "YYYY-MM-DD HH24:MI:SS", 21); + func= new (thd->mem_root) Item_func_tochar(thd, param_1, i0); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_tochar(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_to_days Create_func_to_days::s_singleton; + +Item* +Create_func_to_days::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_to_days(thd, arg1); +} + + +Create_func_to_seconds Create_func_to_seconds::s_singleton; + +Item* +Create_func_to_seconds::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_to_seconds(thd, arg1); +} + + +Create_func_ucase Create_func_ucase::s_singleton; + +Item* +Create_func_ucase::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_ucase(thd, arg1); +} + + +Create_func_uncompress Create_func_uncompress::s_singleton; + +Item* +Create_func_uncompress::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_uncompress(thd, arg1); +} + + +Create_func_uncompressed_length Create_func_uncompressed_length::s_singleton; + +Item* +Create_func_uncompressed_length::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_uncompressed_length(thd, arg1); +} + + +Create_func_unhex Create_func_unhex::s_singleton; + +Item* +Create_func_unhex::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_unhex(thd, arg1); +} + + +Create_func_unix_timestamp Create_func_unix_timestamp::s_singleton; + +Item* +Create_func_unix_timestamp::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 0: + { + func= new (thd->mem_root) Item_func_unix_timestamp(thd); + thd->lex->safe_to_cache_query= 0; + break; + } + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_unix_timestamp(thd, param_1); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +Create_func_uuid_short Create_func_uuid_short::s_singleton; + +Item* +Create_func_uuid_short::create_builder(THD *thd) +{ + DBUG_ENTER("Create_func_uuid_short::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + thd->lex->safe_to_cache_query= 0; + DBUG_RETURN(new (thd->mem_root) Item_func_uuid_short(thd)); +} + + +Create_func_version Create_func_version::s_singleton; + +Item* +Create_func_version::create_builder(THD *thd) +{ + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + static Lex_cstring name(STRING_WITH_LEN("version()")); + return new (thd->mem_root) Item_static_string_func(thd, name, + Lex_cstring_strlen(server_version), + system_charset_info, + DERIVATION_SYSCONST); +} + + +Create_func_weekday Create_func_weekday::s_singleton; + +Item* +Create_func_weekday::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_weekday(thd, arg1, 0); +} + + +Create_func_weekofyear Create_func_weekofyear::s_singleton; + +Item* +Create_func_weekofyear::create_1_arg(THD *thd, Item *arg1) +{ + Item *i1= new (thd->mem_root) Item_int(thd, (char*) "3", 3, 1); + return new (thd->mem_root) Item_func_week(thd, arg1, i1); +} + + +#ifdef WITH_WSREP +Create_func_wsrep_last_written_gtid +Create_func_wsrep_last_written_gtid::s_singleton; + +Item* +Create_func_wsrep_last_written_gtid::create_builder(THD *thd) +{ + thd->lex->safe_to_cache_query= 0; + return new (thd->mem_root) Item_func_wsrep_last_written_gtid(thd); +} + + +Create_func_wsrep_last_seen_gtid +Create_func_wsrep_last_seen_gtid::s_singleton; + +Item* +Create_func_wsrep_last_seen_gtid::create_builder(THD *thd) +{ + thd->lex->safe_to_cache_query= 0; + return new (thd->mem_root) Item_func_wsrep_last_seen_gtid(thd); +} + + +Create_func_wsrep_sync_wait_upto +Create_func_wsrep_sync_wait_upto::s_singleton; + +Item* +Create_func_wsrep_sync_wait_upto::create_native(THD *thd, + const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + Item *param_1, *param_2; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) + { + case 1: + param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_wsrep_sync_wait_upto(thd, param_1); + break; + case 2: + param_1= item_list->pop(); + param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_wsrep_sync_wait_upto(thd, param_1, param_2); + break; + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + thd->lex->safe_to_cache_query= 0; + return func; +} +#endif /* WITH_WSREP */ + +Create_func_xml_extractvalue Create_func_xml_extractvalue::s_singleton; + +Item* +Create_func_xml_extractvalue::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_xml_extractvalue(thd, arg1, arg2); +} + + +Create_func_xml_update Create_func_xml_update::s_singleton; + +Item* +Create_func_xml_update::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) +{ + return new (thd->mem_root) Item_func_xml_update(thd, arg1, arg2, arg3); +} + + +Create_func_year_week Create_func_year_week::s_singleton; + +Item* +Create_func_year_week::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + Item *i0= new (thd->mem_root) Item_int(thd, (char*) "0", 0, 1); + func= new (thd->mem_root) Item_func_yearweek(thd, param_1, i0); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_yearweek(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + +#define BUILDER(F) & F::s_singleton + +/* + MySQL native functions. + MAINTAINER: + - Keep sorted for human lookup. At runtime, a hash table is used. + - do **NOT** conditionally (#ifdef, #ifndef) define a function *NAME*: + doing so will cause user code that works against a --without-XYZ binary + to fail with name collisions against a --with-XYZ binary. + Use something similar to GEOM_BUILDER instead. + - keep 1 line per entry, it makes grep | sort easier +*/ + +const Native_func_registry func_array[] = +{ + { { STRING_WITH_LEN("ABS") }, BUILDER(Create_func_abs)}, + { { STRING_WITH_LEN("ACOS") }, BUILDER(Create_func_acos)}, + { { STRING_WITH_LEN("ADDTIME") }, BUILDER(Create_func_addtime)}, + { { STRING_WITH_LEN("AES_DECRYPT") }, BUILDER(Create_func_aes_decrypt)}, + { { STRING_WITH_LEN("AES_ENCRYPT") }, BUILDER(Create_func_aes_encrypt)}, + { { STRING_WITH_LEN("ASIN") }, BUILDER(Create_func_asin)}, + { { STRING_WITH_LEN("ATAN") }, BUILDER(Create_func_atan)}, + { { STRING_WITH_LEN("ATAN2") }, BUILDER(Create_func_atan)}, + { { STRING_WITH_LEN("BENCHMARK") }, BUILDER(Create_func_benchmark)}, + { { STRING_WITH_LEN("BIN") }, BUILDER(Create_func_bin)}, + { { STRING_WITH_LEN("BINLOG_GTID_POS") }, BUILDER(Create_func_binlog_gtid_pos)}, + { { STRING_WITH_LEN("BIT_COUNT") }, BUILDER(Create_func_bit_count)}, + { { STRING_WITH_LEN("BIT_LENGTH") }, BUILDER(Create_func_bit_length)}, + { { STRING_WITH_LEN("CEIL") }, BUILDER(Create_func_ceiling)}, + { { STRING_WITH_LEN("CEILING") }, BUILDER(Create_func_ceiling)}, + { { STRING_WITH_LEN("CHARACTER_LENGTH") }, BUILDER(Create_func_char_length)}, + { { STRING_WITH_LEN("CHAR_LENGTH") }, BUILDER(Create_func_char_length)}, + { { STRING_WITH_LEN("CHR") }, BUILDER(Create_func_chr)}, + { { STRING_WITH_LEN("COERCIBILITY") }, BUILDER(Create_func_coercibility)}, + { { STRING_WITH_LEN("COLUMN_CHECK") }, BUILDER(Create_func_dyncol_check)}, + { { STRING_WITH_LEN("COLUMN_EXISTS") }, BUILDER(Create_func_dyncol_exists)}, + { { STRING_WITH_LEN("COLUMN_LIST") }, BUILDER(Create_func_dyncol_list)}, + { { STRING_WITH_LEN("COLUMN_JSON") }, BUILDER(Create_func_dyncol_json)}, + { { STRING_WITH_LEN("COMPRESS") }, BUILDER(Create_func_compress)}, + { { STRING_WITH_LEN("CONCAT") }, BUILDER(Create_func_concat)}, + { { STRING_WITH_LEN("CONCAT_OPERATOR_ORACLE") }, BUILDER(Create_func_concat_operator_oracle)}, + { { STRING_WITH_LEN("CONCAT_WS") }, BUILDER(Create_func_concat_ws)}, + { { STRING_WITH_LEN("CONNECTION_ID") }, BUILDER(Create_func_connection_id)}, + { { STRING_WITH_LEN("CONV") }, BUILDER(Create_func_conv)}, + { { STRING_WITH_LEN("CONVERT_TZ") }, BUILDER(Create_func_convert_tz)}, + { { STRING_WITH_LEN("COS") }, BUILDER(Create_func_cos)}, + { { STRING_WITH_LEN("COT") }, BUILDER(Create_func_cot)}, + { { STRING_WITH_LEN("CRC32") }, BUILDER(Create_func_crc32)}, + { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, + { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, + { { STRING_WITH_LEN("DAYNAME") }, BUILDER(Create_func_dayname)}, + { { STRING_WITH_LEN("DAYOFMONTH") }, BUILDER(Create_func_dayofmonth)}, + { { STRING_WITH_LEN("DAYOFWEEK") }, BUILDER(Create_func_dayofweek)}, + { { STRING_WITH_LEN("DAYOFYEAR") }, BUILDER(Create_func_dayofyear)}, + { { STRING_WITH_LEN("DECODE") }, BUILDER(Create_func_decode)}, + { { STRING_WITH_LEN("DEGREES") }, BUILDER(Create_func_degrees)}, + { { STRING_WITH_LEN("DECODE_HISTOGRAM") }, BUILDER(Create_func_decode_histogram)}, + { { STRING_WITH_LEN("DECODE_ORACLE") }, BUILDER(Create_func_decode_oracle)}, + { { STRING_WITH_LEN("DES_DECRYPT") }, BUILDER(Create_func_des_decrypt)}, + { { STRING_WITH_LEN("DES_ENCRYPT") }, BUILDER(Create_func_des_encrypt)}, + { { STRING_WITH_LEN("ELT") }, BUILDER(Create_func_elt)}, + { { STRING_WITH_LEN("ENCODE") }, BUILDER(Create_func_encode)}, + { { STRING_WITH_LEN("ENCRYPT") }, BUILDER(Create_func_encrypt)}, + { { STRING_WITH_LEN("EXP") }, BUILDER(Create_func_exp)}, + { { STRING_WITH_LEN("EXPORT_SET") }, BUILDER(Create_func_export_set)}, + { { STRING_WITH_LEN("EXTRACTVALUE") }, BUILDER(Create_func_xml_extractvalue)}, + { { STRING_WITH_LEN("FIELD") }, BUILDER(Create_func_field)}, + { { STRING_WITH_LEN("FIND_IN_SET") }, BUILDER(Create_func_find_in_set)}, + { { STRING_WITH_LEN("FLOOR") }, BUILDER(Create_func_floor)}, + { { STRING_WITH_LEN("FORMAT") }, BUILDER(Create_func_format)}, + { { STRING_WITH_LEN("FOUND_ROWS") }, BUILDER(Create_func_found_rows)}, + { { STRING_WITH_LEN("FROM_BASE64") }, BUILDER(Create_func_from_base64)}, + { { STRING_WITH_LEN("FROM_DAYS") }, BUILDER(Create_func_from_days)}, + { { STRING_WITH_LEN("FROM_UNIXTIME") }, BUILDER(Create_func_from_unixtime)}, + { { STRING_WITH_LEN("GET_LOCK") }, BUILDER(Create_func_get_lock)}, + { { STRING_WITH_LEN("GREATEST") }, BUILDER(Create_func_greatest)}, + { { STRING_WITH_LEN("HEX") }, BUILDER(Create_func_hex)}, + { { STRING_WITH_LEN("IFNULL") }, BUILDER(Create_func_ifnull)}, + { { STRING_WITH_LEN("INSTR") }, BUILDER(Create_func_instr)}, + { { STRING_WITH_LEN("ISNULL") }, BUILDER(Create_func_isnull)}, + { { STRING_WITH_LEN("IS_FREE_LOCK") }, BUILDER(Create_func_is_free_lock)}, + { { STRING_WITH_LEN("IS_USED_LOCK") }, BUILDER(Create_func_is_used_lock)}, + { { STRING_WITH_LEN("JSON_ARRAY") }, BUILDER(Create_func_json_array)}, + { { STRING_WITH_LEN("JSON_ARRAY_APPEND") }, BUILDER(Create_func_json_array_append)}, + { { STRING_WITH_LEN("JSON_ARRAY_INSERT") }, BUILDER(Create_func_json_array_insert)}, + { { STRING_WITH_LEN("JSON_COMPACT") }, BUILDER(Create_func_json_compact)}, + { { STRING_WITH_LEN("JSON_CONTAINS") }, BUILDER(Create_func_json_contains)}, + { { STRING_WITH_LEN("JSON_CONTAINS_PATH") }, BUILDER(Create_func_json_contains_path)}, + { { STRING_WITH_LEN("JSON_DEPTH") }, BUILDER(Create_func_json_depth)}, + { { STRING_WITH_LEN("JSON_DETAILED") }, BUILDER(Create_func_json_detailed)}, + { { STRING_WITH_LEN("JSON_PRETTY") }, BUILDER(Create_func_json_detailed)}, + { { STRING_WITH_LEN("JSON_EQUALS") }, BUILDER(Create_func_json_equals)}, + { { STRING_WITH_LEN("JSON_EXISTS") }, BUILDER(Create_func_json_exists)}, + { { STRING_WITH_LEN("JSON_EXTRACT") }, BUILDER(Create_func_json_extract)}, + { { STRING_WITH_LEN("JSON_INSERT") }, BUILDER(Create_func_json_insert)}, + { { STRING_WITH_LEN("JSON_KEYS") }, BUILDER(Create_func_json_keys)}, + { { STRING_WITH_LEN("JSON_LENGTH") }, BUILDER(Create_func_json_length)}, + { { STRING_WITH_LEN("JSON_LOOSE") }, BUILDER(Create_func_json_loose)}, + { { STRING_WITH_LEN("JSON_MERGE") }, BUILDER(Create_func_json_merge)}, + { { STRING_WITH_LEN("JSON_MERGE_PATCH") }, BUILDER(Create_func_json_merge_patch)}, + { { STRING_WITH_LEN("JSON_MERGE_PRESERVE") }, BUILDER(Create_func_json_merge)}, + { { STRING_WITH_LEN("JSON_NORMALIZE") }, BUILDER(Create_func_json_normalize)}, + { { STRING_WITH_LEN("JSON_QUERY") }, BUILDER(Create_func_json_query)}, + { { STRING_WITH_LEN("JSON_QUOTE") }, BUILDER(Create_func_json_quote)}, + { { STRING_WITH_LEN("JSON_OBJECT") }, BUILDER(Create_func_json_object)}, + { { STRING_WITH_LEN("JSON_OVERLAPS") }, BUILDER(Create_func_json_overlaps)}, + { { STRING_WITH_LEN("JSON_REMOVE") }, BUILDER(Create_func_json_remove)}, + { { STRING_WITH_LEN("JSON_REPLACE") }, BUILDER(Create_func_json_replace)}, + { { STRING_WITH_LEN("JSON_SET") }, BUILDER(Create_func_json_set)}, + { { STRING_WITH_LEN("JSON_SEARCH") }, BUILDER(Create_func_json_search)}, + { { STRING_WITH_LEN("JSON_TYPE") }, BUILDER(Create_func_json_type)}, + { { STRING_WITH_LEN("JSON_UNQUOTE") }, BUILDER(Create_func_json_unquote)}, + { { STRING_WITH_LEN("JSON_VALID") }, BUILDER(Create_func_json_valid)}, + { { STRING_WITH_LEN("JSON_VALUE") }, BUILDER(Create_func_json_value)}, + { { STRING_WITH_LEN("LAST_DAY") }, BUILDER(Create_func_last_day)}, + { { STRING_WITH_LEN("LAST_INSERT_ID") }, BUILDER(Create_func_last_insert_id)}, + { { STRING_WITH_LEN("LCASE") }, BUILDER(Create_func_lcase)}, + { { STRING_WITH_LEN("LEAST") }, BUILDER(Create_func_least)}, + { { STRING_WITH_LEN("LENGTH") }, BUILDER(Create_func_length)}, + { { STRING_WITH_LEN("LENGTHB") }, BUILDER(Create_func_octet_length)}, +#ifndef DBUG_OFF + { { STRING_WITH_LEN("LIKE_RANGE_MIN") }, BUILDER(Create_func_like_range_min)}, + { { STRING_WITH_LEN("LIKE_RANGE_MAX") }, BUILDER(Create_func_like_range_max)}, +#endif + { { STRING_WITH_LEN("LN") }, BUILDER(Create_func_ln)}, + { { STRING_WITH_LEN("LOAD_FILE") }, BUILDER(Create_func_load_file)}, + { { STRING_WITH_LEN("LOCATE") }, BUILDER(Create_func_locate)}, + { { STRING_WITH_LEN("LOG") }, BUILDER(Create_func_log)}, + { { STRING_WITH_LEN("LOG10") }, BUILDER(Create_func_log10)}, + { { STRING_WITH_LEN("LOG2") }, BUILDER(Create_func_log2)}, + { { STRING_WITH_LEN("LOWER") }, BUILDER(Create_func_lcase)}, + { { STRING_WITH_LEN("LPAD") }, BUILDER(Create_func_lpad)}, + { { STRING_WITH_LEN("LPAD_ORACLE") }, BUILDER(Create_func_lpad_oracle)}, + { { STRING_WITH_LEN("LTRIM") }, BUILDER(Create_func_ltrim)}, + { { STRING_WITH_LEN("LTRIM_ORACLE") }, BUILDER(Create_func_ltrim_oracle)}, + { { STRING_WITH_LEN("MAKEDATE") }, BUILDER(Create_func_makedate)}, + { { STRING_WITH_LEN("MAKETIME") }, BUILDER(Create_func_maketime)}, + { { STRING_WITH_LEN("MAKE_SET") }, BUILDER(Create_func_make_set)}, + { { STRING_WITH_LEN("MASTER_GTID_WAIT") }, BUILDER(Create_func_master_gtid_wait)}, + { { STRING_WITH_LEN("MASTER_POS_WAIT") }, BUILDER(Create_func_master_pos_wait)}, + { { STRING_WITH_LEN("MD5") }, BUILDER(Create_func_md5)}, + { { STRING_WITH_LEN("MONTHNAME") }, BUILDER(Create_func_monthname)}, + { { STRING_WITH_LEN("NAME_CONST") }, BUILDER(Create_func_name_const)}, + { {STRING_WITH_LEN("NATURAL_SORT_KEY")}, BUILDER(Create_func_natural_sort_key)}, + { { STRING_WITH_LEN("NVL") }, BUILDER(Create_func_ifnull)}, + { { STRING_WITH_LEN("NVL2") }, BUILDER(Create_func_nvl2)}, + { { STRING_WITH_LEN("NULLIF") }, BUILDER(Create_func_nullif)}, + { { STRING_WITH_LEN("OCT") }, BUILDER(Create_func_oct)}, + { { STRING_WITH_LEN("OCTET_LENGTH") }, BUILDER(Create_func_octet_length)}, + { { STRING_WITH_LEN("ORD") }, BUILDER(Create_func_ord)}, + { { STRING_WITH_LEN("PERIOD_ADD") }, BUILDER(Create_func_period_add)}, + { { STRING_WITH_LEN("PERIOD_DIFF") }, BUILDER(Create_func_period_diff)}, + { { STRING_WITH_LEN("PI") }, BUILDER(Create_func_pi)}, + { { STRING_WITH_LEN("POW") }, BUILDER(Create_func_pow)}, + { { STRING_WITH_LEN("POWER") }, BUILDER(Create_func_pow)}, + { { STRING_WITH_LEN("QUOTE") }, BUILDER(Create_func_quote)}, + { { STRING_WITH_LEN("RANDOM_BYTES")}, BUILDER(Create_func_random_bytes)}, + { { STRING_WITH_LEN("REGEXP_INSTR") }, BUILDER(Create_func_regexp_instr)}, + { { STRING_WITH_LEN("REGEXP_REPLACE") }, BUILDER(Create_func_regexp_replace)}, + { { STRING_WITH_LEN("REGEXP_SUBSTR") }, BUILDER(Create_func_regexp_substr)}, + { { STRING_WITH_LEN("RADIANS") }, BUILDER(Create_func_radians)}, + { { STRING_WITH_LEN("RAND") }, BUILDER(Create_func_rand)}, + { { STRING_WITH_LEN("RELEASE_ALL_LOCKS") }, + BUILDER(Create_func_release_all_locks)}, + { { STRING_WITH_LEN("RELEASE_LOCK") }, BUILDER(Create_func_release_lock)}, + { { STRING_WITH_LEN("REPLACE_ORACLE") }, + BUILDER(Create_func_replace_oracle)}, + { { STRING_WITH_LEN("REVERSE") }, BUILDER(Create_func_reverse)}, + { { STRING_WITH_LEN("ROUND") }, BUILDER(Create_func_round)}, + { { STRING_WITH_LEN("RPAD") }, BUILDER(Create_func_rpad)}, + { { STRING_WITH_LEN("RPAD_ORACLE") }, BUILDER(Create_func_rpad_oracle)}, + { { STRING_WITH_LEN("RTRIM") }, BUILDER(Create_func_rtrim)}, + { { STRING_WITH_LEN("RTRIM_ORACLE") }, BUILDER(Create_func_rtrim_oracle)}, + { { STRING_WITH_LEN("SEC_TO_TIME") }, BUILDER(Create_func_sec_to_time)}, + { { STRING_WITH_LEN("SFORMAT") }, BUILDER(Create_func_sformat)}, + { { STRING_WITH_LEN("SHA") }, BUILDER(Create_func_sha)}, + { { STRING_WITH_LEN("SHA1") }, BUILDER(Create_func_sha)}, + { { STRING_WITH_LEN("SHA2") }, BUILDER(Create_func_sha2)}, + { { STRING_WITH_LEN("SIGN") }, BUILDER(Create_func_sign)}, + { { STRING_WITH_LEN("SIN") }, BUILDER(Create_func_sin)}, + { { STRING_WITH_LEN("SLEEP") }, BUILDER(Create_func_sleep)}, + { { STRING_WITH_LEN("SOUNDEX") }, BUILDER(Create_func_soundex)}, + { { STRING_WITH_LEN("SPACE") }, BUILDER(Create_func_space)}, + { { STRING_WITH_LEN("SQRT") }, BUILDER(Create_func_sqrt)}, + { { STRING_WITH_LEN("STRCMP") }, BUILDER(Create_func_strcmp)}, + { { STRING_WITH_LEN("STR_TO_DATE") }, BUILDER(Create_func_str_to_date)}, + { { STRING_WITH_LEN("SUBSTR_ORACLE") }, + BUILDER(Create_func_substr_oracle)}, + { { STRING_WITH_LEN("SUBSTRING_INDEX") }, BUILDER(Create_func_substr_index)}, + { { STRING_WITH_LEN("SUBTIME") }, BUILDER(Create_func_subtime)}, + { { STRING_WITH_LEN("TAN") }, BUILDER(Create_func_tan)}, + { { STRING_WITH_LEN("TIMEDIFF") }, BUILDER(Create_func_timediff)}, + { { STRING_WITH_LEN("TIME_FORMAT") }, BUILDER(Create_func_time_format)}, + { { STRING_WITH_LEN("TIME_TO_SEC") }, BUILDER(Create_func_time_to_sec)}, + { { STRING_WITH_LEN("TO_BASE64") }, BUILDER(Create_func_to_base64)}, + { { STRING_WITH_LEN("TO_CHAR") }, BUILDER(Create_func_to_char)}, + { { STRING_WITH_LEN("TO_DAYS") }, BUILDER(Create_func_to_days)}, + { { STRING_WITH_LEN("TO_SECONDS") }, BUILDER(Create_func_to_seconds)}, + { { STRING_WITH_LEN("UCASE") }, BUILDER(Create_func_ucase)}, + { { STRING_WITH_LEN("UNCOMPRESS") }, BUILDER(Create_func_uncompress)}, + { { STRING_WITH_LEN("UNCOMPRESSED_LENGTH") }, BUILDER(Create_func_uncompressed_length)}, + { { STRING_WITH_LEN("UNHEX") }, BUILDER(Create_func_unhex)}, + { { STRING_WITH_LEN("UNIX_TIMESTAMP") }, BUILDER(Create_func_unix_timestamp)}, + { { STRING_WITH_LEN("UPDATEXML") }, BUILDER(Create_func_xml_update)}, + { { STRING_WITH_LEN("UPPER") }, BUILDER(Create_func_ucase)}, + { { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)}, + { { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)}, + { { STRING_WITH_LEN("WEEKDAY") }, BUILDER(Create_func_weekday)}, + { { STRING_WITH_LEN("WEEKOFYEAR") }, BUILDER(Create_func_weekofyear)}, +#ifdef WITH_WSREP + { { STRING_WITH_LEN("WSREP_LAST_WRITTEN_GTID") }, BUILDER(Create_func_wsrep_last_written_gtid)}, + { { STRING_WITH_LEN("WSREP_LAST_SEEN_GTID") }, BUILDER(Create_func_wsrep_last_seen_gtid)}, + { { STRING_WITH_LEN("WSREP_SYNC_WAIT_UPTO_GTID") }, BUILDER(Create_func_wsrep_sync_wait_upto)}, +#endif /* WITH_WSREP */ + { { STRING_WITH_LEN("YEARWEEK") }, BUILDER(Create_func_year_week)} +}; + +Native_func_registry_array + native_func_registry_array(func_array, array_elements(func_array)); + +const size_t func_array_length= sizeof(func_array) / sizeof(Native_func_registry) - 1; + +Native_functions_hash native_functions_hash; + +/* + Load the hash table for native functions. + Note: this code is not thread safe, and is intended to be used at server + startup only (before going multi-threaded) +*/ + +bool Native_functions_hash::init(size_t count) +{ + DBUG_ENTER("Native_functions_hash::init"); + + if (my_hash_init(key_memory_native_functions, this, + system_charset_info, (ulong) count, 0, 0, (my_hash_get_key) + get_native_fct_hash_key, NULL, MYF(0))) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +bool Native_functions_hash::append(const Native_func_registry array[], + size_t count) +{ + DBUG_ENTER("Native_functions_hash::append"); + + for (size_t i= 0; i < count; i++) + { + if (my_hash_insert(this, (uchar*) &array[i])) + DBUG_RETURN(true); + } + +#if ! defined (DBUG_OFF) && defined (DBUG_TRACE) + for (uint i=0 ; i < records ; i++) + { + const Native_func_registry *func= + (Native_func_registry*) my_hash_element(this, i); + DBUG_PRINT("info", ("native function: %s length: %u", + func->name.str, (uint) func->name.length)); + } +#endif + DBUG_RETURN(false); +} + + +bool Native_functions_hash::remove(const Native_func_registry array[], + size_t count) +{ + DBUG_ENTER("Native_functions_hash::remove"); + + for (size_t i= 0; i < count; i++) + { + if (my_hash_delete(this, (uchar*) &array[i])) + DBUG_RETURN(true); + } + + DBUG_RETURN(false); +} + + +/* + Empty the hash table for native functions. + Note: this code is not thread safe, and is intended to be used at server + shutdown only (after thread requests have been executed). +*/ + +void Native_functions_hash::cleanup() +{ + DBUG_ENTER("Native_functions_hash::cleanup"); + my_hash_free(this); + DBUG_VOID_RETURN; +} + + +static Create_func * +function_plugin_find_native_function_builder(THD *thd, const LEX_CSTRING &name) +{ + plugin_ref plugin; + if ((plugin= my_plugin_lock_by_name(thd, &name, MariaDB_FUNCTION_PLUGIN))) + { + Create_func *builder= + reinterpret_cast(plugin_decl(plugin)->info)-> + create_func(); + // TODO: MDEV-20846 Add proper unlocking for MariaDB_FUNCTION_PLUGIN + plugin_unlock(thd, plugin); + return builder; + } + return NULL; +} + + +Create_func * +Native_functions_hash::find(THD *thd, const LEX_CSTRING &name) const +{ + Native_func_registry *func; + Create_func *builder= NULL; + + /* Thread safe */ + func= (Native_func_registry*) my_hash_search(this, + (uchar*) name.str, + name.length); + + if (func && (builder= func->builder)) + return builder; + + if ((builder= function_plugin_find_native_function_builder(thd, name))) + return builder; + + return NULL; +} + + +int item_create_init() +{ + size_t count= native_func_registry_array.count(); +#ifdef HAVE_SPATIAL + count+= native_func_registry_array_geom.count(); +#endif + + if (native_functions_hash.init(count) || + native_functions_hash.append(native_func_registry_array.elements(), + native_func_registry_array.count())) + return true; + +#ifdef HAVE_SPATIAL + if (native_functions_hash.append(native_func_registry_array_geom.elements(), + native_func_registry_array_geom.count())) + return true; +#endif + return false; +} + + +void item_create_cleanup() +{ + native_functions_hash.cleanup(); +} + + +Create_qfunc * +find_qualified_function_builder(THD *thd) +{ + return & Create_sp_func::s_singleton; +} + + +static List *create_func_dyncol_prepare(THD *thd, + DYNCALL_CREATE_DEF **dfs, + List &list) +{ + DYNCALL_CREATE_DEF *def; + List_iterator_fast li(list); + List *args= new (thd->mem_root) List; + + *dfs= (DYNCALL_CREATE_DEF *)alloc_root(thd->mem_root, + sizeof(DYNCALL_CREATE_DEF) * + list.elements); + + if (!args || !*dfs) + return NULL; + + for (uint i= 0; (def= li++) ;) + { + dfs[0][i++]= *def; + args->push_back(def->key, thd->mem_root); + args->push_back(def->value, thd->mem_root); + } + return args; +} + +Item *create_func_dyncol_create(THD *thd, List &list) +{ + List *args; + DYNCALL_CREATE_DEF *dfs; + if (!(args= create_func_dyncol_prepare(thd, &dfs, list))) + return NULL; + + return new (thd->mem_root) Item_func_dyncol_create(thd, *args, dfs); +} + +Item *create_func_dyncol_add(THD *thd, Item *str, + List &list) +{ + List *args; + DYNCALL_CREATE_DEF *dfs; + + if (!(args= create_func_dyncol_prepare(thd, &dfs, list))) + return NULL; + + args->push_back(str, thd->mem_root); + + return new (thd->mem_root) Item_func_dyncol_add(thd, *args, dfs); +} + + + +Item *create_func_dyncol_delete(THD *thd, Item *str, List &nums) +{ + DYNCALL_CREATE_DEF *dfs; + Item *key; + List_iterator_fast it(nums); + List *args= new (thd->mem_root) List; + + dfs= (DYNCALL_CREATE_DEF *)alloc_root(thd->mem_root, + sizeof(DYNCALL_CREATE_DEF) * + nums.elements); + if (!args || !dfs) + return NULL; + + for (uint i= 0; (key= it++); i++) + { + dfs[i].key= key; + dfs[i].value= new (thd->mem_root) Item_null(thd); + dfs[i].type= DYN_COL_INT; + args->push_back(dfs[i].key, thd->mem_root); + args->push_back(dfs[i].value, thd->mem_root); + } + + args->push_back(str, thd->mem_root); + + return new (thd->mem_root) Item_func_dyncol_add(thd, *args, dfs); +} + + +Item *create_func_dyncol_get(THD *thd, Item *str, Item *num, + const Type_handler *handler, + const Lex_length_and_dec_st &length_dec, + CHARSET_INFO *cs) +{ + Item *res; + + if (likely(!(res= new (thd->mem_root) Item_dyncol_get(thd, str, num)))) + return res; // Return NULL + return handler->create_typecast_item(thd, res, + Type_cast_attributes(length_dec, cs)); +} diff --git a/sql/item_create.h b/sql/item_create.h new file mode 100644 index 00000000..80395960 --- /dev/null +++ b/sql/item_create.h @@ -0,0 +1,380 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Functions to create an item. Used by sql/sql_yacc.yy */ + +#ifndef ITEM_CREATE_H +#define ITEM_CREATE_H + +#include "item_func.h" // Cast_target + +typedef struct st_udf_func udf_func; + +/** + Public function builder interface. + The parser (sql/sql_yacc.yy) uses a factory / builder pattern to + construct an Item object for each function call. + All the concrete function builders implements this interface, + either directly or indirectly with some adapter helpers. + Keeping the function creation separated from the bison grammar allows + to simplify the parser, and avoid the need to introduce a new token + for each function, which has undesirable side effects in the grammar. +*/ + +class Create_func +{ +public: + /** + The builder create method. + Given the function name and list or arguments, this method creates + an Item that represents the function call. + In case or errors, a NULL item is returned, and an error is reported. + Note that the thd object may be modified by the builder. + In particular, the following members/methods can be set/called, + depending on the function called and the function possible side effects. +
    +
  • thd->lex->binlog_row_based_if_mixed
  • +
  • thd->lex->current_context()
  • +
  • thd->lex->safe_to_cache_query
  • +
  • thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT)
  • +
  • thd->lex->uncacheable(UNCACHEABLE_RAND)
  • +
  • thd->lex->add_time_zone_tables_to_query_tables(thd)
  • +
+ @param thd The current thread + @param name The function name + @param item_list The list of arguments to the function, can be NULL + @return An item representing the parsed function call, or NULL + */ + virtual Item *create_func(THD *thd, const LEX_CSTRING *name, + List *item_list) = 0; + +protected: + /** Constructor */ + Create_func() = default; + /** Destructor */ + virtual ~Create_func() = default; +}; + + +/** + Adapter for functions that takes exactly zero arguments. +*/ + +class Create_func_arg0 : public Create_func +{ +public: + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + Builder method, with no arguments. + @param thd The current thread + @return An item representing the function call + */ + virtual Item *create_builder(THD *thd) = 0; + +protected: + /** Constructor. */ + Create_func_arg0() = default; + /** Destructor. */ + virtual ~Create_func_arg0() = default; +}; + + +/** + Adapter for functions that takes exactly one argument. +*/ + +class Create_func_arg1 : public Create_func +{ +public: + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + Builder method, with one argument. + @param thd The current thread + @param arg1 The first argument of the function + @return An item representing the function call + */ + virtual Item *create_1_arg(THD *thd, Item *arg1) = 0; + +protected: + /** Constructor. */ + Create_func_arg1() = default; + /** Destructor. */ + virtual ~Create_func_arg1() = default; +}; + + +/** + Adapter for functions that takes exactly two arguments. +*/ + +class Create_func_arg2 : public Create_func +{ +public: + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + Builder method, with two arguments. + @param thd The current thread + @param arg1 The first argument of the function + @param arg2 The second argument of the function + @return An item representing the function call + */ + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) = 0; + +protected: + /** Constructor. */ + Create_func_arg2() = default; + /** Destructor. */ + virtual ~Create_func_arg2() = default; +}; + + +/** + Adapter for functions that takes exactly three arguments. +*/ + +class Create_func_arg3 : public Create_func +{ +public: + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + Builder method, with three arguments. + @param thd The current thread + @param arg1 The first argument of the function + @param arg2 The second argument of the function + @param arg3 The third argument of the function + @return An item representing the function call + */ + virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) = 0; + +protected: + /** Constructor. */ + Create_func_arg3() = default; + /** Destructor. */ + virtual ~Create_func_arg3() = default; +}; + + + + +/** + Adapter for native functions with a variable number of arguments. + The main use of this class is to discard the following calls: + foo(expr1 AS name1, expr2 AS name2, ...) + which are syntactically correct (the syntax can refer to a UDF), + but semantically invalid for native functions. +*/ + +class Create_native_func : public Create_func +{ +public: + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + Builder method, with no arguments. + @param thd The current thread + @param name The native function name + @param item_list The function parameters, none of which are named + @return An item representing the function call + */ + virtual Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) = 0; + +protected: + /** Constructor. */ + Create_native_func() = default; + /** Destructor. */ + virtual ~Create_native_func() = default; +}; + + +/** + Function builder for qualified functions. + This builder is used with functions call using a qualified function name + syntax, as in db.func(expr, expr, ...). +*/ + +class Create_qfunc : public Create_func +{ +public: + /** + The builder create method, for unqualified functions. + This builder will use the current database for the database name. + @param thd The current thread + @param name The function name + @param item_list The list of arguments to the function, can be NULL + @return An item representing the parsed function call + */ + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + The builder create method, for qualified functions. + @param thd The current thread + @param db The database name + @param name The function name + @param use_explicit_name Should the function be represented as 'db.name'? + @param item_list The list of arguments to the function, can be NULL + @return An item representing the parsed function call + */ + virtual Item *create_with_db(THD *thd, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + bool use_explicit_name, + List *item_list) = 0; + +protected: + /** Constructor. */ + Create_qfunc() = default; + /** Destructor. */ + virtual ~Create_qfunc() = default; +}; + + +/** + Find the function builder for qualified functions. + @param thd The current thread + @return A function builder for qualified functions +*/ +extern Create_qfunc * find_qualified_function_builder(THD *thd); + + +#ifdef HAVE_DLOPEN +/** + Function builder for User Defined Functions. +*/ + +class Create_udf_func : public Create_func +{ +public: + Item *create_func(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + /** + The builder create method, for User Defined Functions. + @param thd The current thread + @param fct The User Defined Function metadata + @param item_list The list of arguments to the function, can be NULL + @return An item representing the parsed function call + */ + Item *create(THD *thd, udf_func *fct, List *item_list); + + /** Singleton. */ + static Create_udf_func s_singleton; + +protected: + /** Constructor. */ + Create_udf_func() = default; + /** Destructor. */ + virtual ~Create_udf_func() = default; +}; +#endif + + +struct Native_func_registry +{ + LEX_CSTRING name; + Create_func *builder; +}; + + +class Native_functions_hash: public HASH +{ +public: + Native_functions_hash() + { + bzero(this, sizeof(*this)); + } + ~Native_functions_hash() + { + /* + No automatic free because objects of this type + are expected to be declared statically. + The code in cleanup() calls my_hash_free() which may not work correctly + at the very end of mariadbd shutdown. + The the upper level code should call cleanup() explicitly. + + Unfortunatelly, it's not possible to use DBUG_ASSERT(!records) here, + because the server terminates using exit() in some cases, + e.g. in the test main.named_pipe with the "Create named pipe failed" + error. + */ + } + bool init(size_t count); + bool append(const Native_func_registry array[], size_t count); + bool remove(const Native_func_registry array[], size_t count); + void cleanup(); + /** + Find the native function builder associated with a given function name. + @param thd The current thread + @param name The native function name + @return The native function builder associated with the name, or NULL + */ + Create_func *find(THD *thd, const LEX_CSTRING &name) const; +}; + +extern MYSQL_PLUGIN_IMPORT Native_functions_hash native_functions_hash; + +extern const Native_func_registry func_array[]; +extern const size_t func_array_length; + +int item_create_init(); +void item_create_cleanup(); + +Item *create_func_dyncol_create(THD *thd, List &list); +Item *create_func_dyncol_add(THD *thd, Item *str, + List &list); +Item *create_func_dyncol_delete(THD *thd, Item *str, List &nums); +Item *create_func_dyncol_get(THD *thd, Item *num, Item *str, + const Type_handler *handler, + const Lex_length_and_dec_st &length_and_dec, + CHARSET_INFO *cs); +Item *create_func_dyncol_json(THD *thd, Item *str); + + +class Native_func_registry_array +{ + const Native_func_registry *m_elements; + size_t m_count; +public: + Native_func_registry_array() + :m_elements(NULL), + m_count(0) + { } + Native_func_registry_array(const Native_func_registry *elements, size_t count) + :m_elements(elements), + m_count(count) + { } + const Native_func_registry& element(size_t i) const + { + DBUG_ASSERT(i < m_count); + return m_elements[i]; + } + const Native_func_registry *elements() const { return m_elements; } + size_t count() const { return m_count; } +}; + + +#endif + diff --git a/sql/item_func.cc b/sql/item_func.cc new file mode 100644 index 00000000..a2bc4752 --- /dev/null +++ b/sql/item_func.cc @@ -0,0 +1,7309 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + This file defines all numerical functions +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "sql_plugin.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" +#include "slave.h" // for wait_for_master_pos +#include "sql_show.h" // append_identifier +#include "strfunc.h" // find_type +#include "sql_parse.h" // is_update_query +#include "sql_acl.h" // EXECUTE_ACL +#include "mysqld.h" // LOCK_short_uuid_generator +#include "rpl_mi.h" +#include "sql_time.h" +#include +#include +#include +#include +#include + +#include "sp_head.h" +#include "sp_rcontext.h" +#include "sp.h" +#include "set_var.h" +#include "debug_sync.h" +#include "sql_base.h" +#include "sql_cte.h" +#ifdef WITH_WSREP +#include "mysql/service_wsrep.h" +#endif /* WITH_WSREP */ + +#ifdef NO_EMBEDDED_ACCESS_CHECKS +#define sp_restore_security_context(A,B) while (0) {} +#endif + +bool check_reserved_words(const LEX_CSTRING *name) +{ + if (lex_string_eq(name, STRING_WITH_LEN("GLOBAL")) || + lex_string_eq(name, STRING_WITH_LEN("LOCAL")) || + lex_string_eq(name, STRING_WITH_LEN("SESSION"))) + return TRUE; + return FALSE; +} + + +/** + Test if the sum of arguments overflows the ulonglong range. +*/ +static inline bool test_if_sum_overflows_ull(ulonglong arg1, ulonglong arg2) +{ + return ULonglong::test_if_sum_overflows_ull(arg1, arg2); +} + + +/** + Allocate memory for arguments using tmp_args or thd->alloc(). + @retval false - success + @retval true - error (arg_count is set to 0 for conveniece) +*/ +bool Item_args::alloc_arguments(THD *thd, uint count) +{ + if (count <= 2) + { + args= tmp_arg; + return false; + } + if ((args= (Item**) thd->alloc(sizeof(Item*) * count)) == NULL) + { + arg_count= 0; + return true; + } + return false; +} + + +void Item_args::set_arguments(THD *thd, List &list) +{ + if (alloc_arguments(thd, list.elements)) + return; + List_iterator_fast li(list); + Item *item; + for (arg_count= 0; (item= li++); ) + args[arg_count++]= item; +} + + +Item_args::Item_args(THD *thd, const Item_args *other) + :arg_count(other->arg_count) +{ + if (arg_count <= 2) + { + args= tmp_arg; + } + else if (!(args= (Item**) thd->alloc(sizeof(Item*) * arg_count))) + { + arg_count= 0; + return; + } + if (arg_count) + memcpy(args, other->args, sizeof(Item*) * arg_count); +} + + +void Item_func::sync_with_sum_func_and_with_field(List &list) +{ + List_iterator_fast li(list); + Item *item; + while ((item= li++)) + with_flags|= item->with_flags; +} + + +bool Item_func::check_argument_types_like_args0() const +{ + if (arg_count < 2) + return false; + uint cols= args[0]->cols(); + bool is_scalar= args[0]->type_handler()->is_scalar_type(); + for (uint i= 1; i < arg_count; i++) + { + if (is_scalar != args[i]->type_handler()->is_scalar_type()) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + args[i]->type_handler()->name().ptr(), func_name()); + return true; + } + if (args[i]->check_cols(cols)) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_or_binary(const Type_handler *handler, + uint start, uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_or_binary(func_name_cstring(), handler)) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_traditional_scalar(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_traditional_scalar(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_can_return_int(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_can_return_int(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_can_return_real(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_can_return_real(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_can_return_text(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_can_return_text(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_can_return_str(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_can_return_str(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_can_return_date(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_can_return_date(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_can_return_time(uint start, + uint end) const +{ + for (uint i= start; i < end ; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_can_return_time(func_name_cstring())) + return true; + } + return false; +} + + +bool Item_func::check_argument_types_scalar(uint start, uint end) const +{ + for (uint i= start; i < end; i++) + { + DBUG_ASSERT(i < arg_count); + if (args[i]->check_type_scalar(func_name_cstring())) + return true; + } + return false; +} + + +/* + Resolve references to table column for a function and its argument + + SYNOPSIS: + fix_fields() + thd Thread object + ref Pointer to where this object is used. This reference + is used if we want to replace this object with another + one (for example in the summary functions). + + DESCRIPTION + Call fix_fields() for all arguments to the function. The main intention + is to allow all Item_field() objects to setup pointers to the table fields. + + Sets as a side effect the following class variables: + maybe_null Set if any argument may return NULL + with_sum_func Set if any of the arguments contains a sum function + with_window_func() Set if any of the arguments contain a window function + with_field Set if any of the arguments contains or is a field + used_tables_cache Set to union of the tables used by arguments + + str_value.charset If this is a string function, set this to the + character set for the first argument. + If any argument is binary, this is set to binary + + If for any item any of the defaults are wrong, then this can + be fixed in the fix_length_and_dec() function that is called + after this one or by writing a specialized fix_fields() for the + item. + + RETURN VALUES + FALSE ok + TRUE Got error. Stored with my_error(). +*/ + +bool +Item_func::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + Item **arg,**arg_end; + uchar buff[STACK_BUFF_ALLOC]; // Max argument in function + + /* + The Used_tables_and_const_cache of "this" was initialized by + the constructor, or by Item_func::cleanup(). + */ + DBUG_ASSERT(used_tables_cache == 0); + DBUG_ASSERT(const_item_cache == true); + + not_null_tables_cache= 0; + + /* + Use stack limit of STACK_MIN_SIZE * 2 since + on some platforms a recursive call to fix_fields + requires more than STACK_MIN_SIZE bytes (e.g. for + MIPS, it takes about 22kB to make one recursive + call to Item_func::fix_fields()) + */ + if (check_stack_overrun(thd, STACK_MIN_SIZE * 2, buff)) + return TRUE; // Fatal error if flag is set! + if (arg_count) + { // Print purify happy + for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++) + { + Item *item; + /* + We can't yet set item to *arg as fix_fields may change *arg + We shouldn't call fix_fields() twice, so check 'fixed' field first + */ + if ((*arg)->fix_fields_if_needed(thd, arg)) + { + cleanup(); + return TRUE; /* purecov: inspected */ + } + item= *arg; + + base_flags|= item->base_flags & item_base_t::MAYBE_NULL; + with_flags|= item->with_flags; + used_tables_and_const_cache_join(item); + not_null_tables_cache|= item->not_null_tables(); + } + } + if (check_arguments()) + { + cleanup(); + return true; + } + if (fix_length_and_dec(thd)) + { + cleanup(); + return TRUE; + } + base_flags|= item_base_t::FIXED; + return FALSE; +} + +void +Item_func::quick_fix_field() +{ + Item **arg,**arg_end; + if (arg_count) + { + for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++) + { + if (!(*arg)->fixed()) + (*arg)->quick_fix_field(); + } + } + base_flags|= item_base_t::FIXED; +} + + +bool +Item_func::eval_not_null_tables(void *opt_arg) +{ + Item **arg,**arg_end; + not_null_tables_cache= 0; + if (arg_count) + { + for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++) + { + not_null_tables_cache|= (*arg)->not_null_tables(); + } + } + return FALSE; +} + + +bool +Item_func::find_not_null_fields(table_map allowed) +{ + if (~allowed & used_tables()) + return false; + + Item **arg,**arg_end; + if (arg_count) + { + for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++) + { + if (!(*arg)->find_not_null_fields(allowed)) + continue; + } + } + return false; +} + + +void Item_func::fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) +{ + Item **arg,**arg_end; + + used_tables_and_const_cache_init(); + not_null_tables_cache= 0; + + if (arg_count) + { + for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++) + { + (*arg)->fix_after_pullout(new_parent, arg, merge); + Item *item= *arg; + + used_tables_and_const_cache_join(item); + not_null_tables_cache|= item->not_null_tables(); + } + } +} + + +void Item_func::traverse_cond(Cond_traverser traverser, + void *argument, traverse_order order) +{ + if (arg_count) + { + Item **arg,**arg_end; + + switch (order) { + case(PREFIX): + (*traverser)(this, argument); + for (arg= args, arg_end= args+arg_count; arg != arg_end; arg++) + { + (*arg)->traverse_cond(traverser, argument, order); + } + break; + case (POSTFIX): + for (arg= args, arg_end= args+arg_count; arg != arg_end; arg++) + { + (*arg)->traverse_cond(traverser, argument, order); + } + (*traverser)(this, argument); + } + } + else + (*traverser)(this, argument); +} + + +bool Item_args::transform_args(THD *thd, Item_transformer transformer, uchar *arg) +{ + for (uint i= 0; i < arg_count; i++) + { + Item *new_item= args[i]->transform(thd, transformer, arg); + if (!new_item) + return true; + /* + THD::change_item_tree() should be called only if the tree was + really transformed, i.e. when a new item has been created. + Otherwise we'll be allocating a lot of unnecessary memory for + change records at each execution. + */ + if (args[i] != new_item) + thd->change_item_tree(&args[i], new_item); + } + return false; +} + + +/** + Transform an Item_func object with a transformer callback function. + + The function recursively applies the transform method to each + argument of the Item_func node. + If the call of the method for an argument item returns a new item + the old item is substituted for a new one. + After this the transformer is applied to the root node + of the Item_func object. + @param transformer the transformer callback function to be applied to + the nodes of the tree of the object + @param argument parameter to be passed to the transformer + + @return + Item returned as the result of transformation of the root node +*/ + +Item *Item_func::transform(THD *thd, Item_transformer transformer, uchar *argument) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + if (transform_args(thd, transformer, argument)) + return 0; + return (this->*transformer)(thd, argument); +} + + +/** + Compile Item_func object with a processor and a transformer + callback functions. + + First the function applies the analyzer to the root node of + the Item_func object. Then if the analyzer succeeds (returns TRUE) + the function recursively applies the compile method to each argument + of the Item_func node. + If the call of the method for an argument item returns a new item + the old item is substituted for a new one. + After this the transformer is applied to the root node + of the Item_func object. + The compile function is not called if the analyzer returns NULL + in the parameter arg_p. + + @param analyzer the analyzer callback function to be applied to the + nodes of the tree of the object + @param[in,out] arg_p parameter to be passed to the processor + @param transformer the transformer callback function to be applied to the + nodes of the tree of the object + @param arg_t parameter to be passed to the transformer + + @return + Item returned as the result of transformation of the root node +*/ + +Item *Item_func::compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) +{ + if (!(this->*analyzer)(arg_p)) + return 0; + if (*arg_p && arg_count) + { + Item **arg,**arg_end; + for (arg= args, arg_end= args+arg_count; arg != arg_end; arg++) + { + /* + The same parameter value of arg_p must be passed + to analyze any argument of the condition formula. + */ + uchar *arg_v= *arg_p; + Item *new_item= (*arg)->compile(thd, analyzer, &arg_v, transformer, + arg_t); + if (new_item && *arg != new_item) + thd->change_item_tree(arg, new_item); + } + } + return (this->*transformer)(thd, arg_t); +} + + +void Item_args::propagate_equal_fields(THD *thd, + const Item::Context &ctx, + COND_EQUAL *cond) +{ + uint i; + for (i= 0; i < arg_count; i++) + args[i]->propagate_equal_fields_and_change_item_tree(thd, ctx, cond, + &args[i]); +} + + +Sql_mode_dependency Item_args::value_depends_on_sql_mode_bit_or() const +{ + Sql_mode_dependency res; + for (uint i= 0; i < arg_count; i++) + res|= args[i]->value_depends_on_sql_mode(); + return res; +} + + +/** + See comments in Item_cond::split_sum_func() +*/ + +void Item_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) +{ + Item **arg, **arg_end; + DBUG_ENTER("Item_func::split_sum_func"); + + for (arg= args, arg_end= args+arg_count; arg != arg_end ; arg++) + (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, + flags | SPLIT_SUM_SKIP_REGISTERED); + DBUG_VOID_RETURN; +} + + +table_map Item_func::not_null_tables() const +{ + return not_null_tables_cache; +} + + +void Item_func::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + print_args(str, 0, query_type); + str->append(')'); +} + + +void Item_func::print_args(String *str, uint from, enum_query_type query_type) +{ + for (uint i=from ; i < arg_count ; i++) + { + if (i != from) + str->append(','); + args[i]->print(str, query_type); + } +} + + +void Item_func::print_op(String *str, enum_query_type query_type) +{ + for (uint i=0 ; i < arg_count-1 ; i++) + { + args[i]->print_parenthesised(str, query_type, precedence()); + str->append(' '); + str->append(func_name_cstring()); + str->append(' '); + } + args[arg_count-1]->print_parenthesised(str, query_type, higher_precedence()); +} + + +bool Item_func::eq(const Item *item, bool binary_cmp) const +{ + /* Assume we don't have rtti */ + if (this == item) + return 1; + /* + Ensure that we are comparing two functions and that the function + is deterministic. + */ + if (item->type() != FUNC_ITEM || (used_tables() & RAND_TABLE_BIT)) + return 0; + Item_func *item_func=(Item_func*) item; + Item_func::Functype func_type; + if ((func_type= functype()) != item_func->functype() || + arg_count != item_func->arg_count || + (func_type != Item_func::FUNC_SP && + func_name() != item_func->func_name()) || + (func_type == Item_func::FUNC_SP && + my_strcasecmp(system_charset_info, func_name(), item_func->func_name()))) + return 0; + return Item_args::eq(item_func, binary_cmp); +} + + +/* +bool Item_func::is_expensive_processor(uchar *arg) +{ + return is_expensive(); +} +*/ + + +bool Item_hybrid_func::fix_attributes(Item **items, uint nitems) +{ + bool rc= Item_hybrid_func::type_handler()-> + Item_hybrid_func_fix_attributes(current_thd, + func_name_cstring(), this, this, + items, nitems); + DBUG_ASSERT(!rc || current_thd->is_error()); + return rc; +} + + +String *Item_real_func::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + double nr= val_real(); + if (null_value) + return 0; /* purecov: inspected */ + str->set_real(nr, decimals, collation.collation); + return str; +} + + +my_decimal *Item_real_func::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + double nr= val_real(); + if (null_value) + return 0; /* purecov: inspected */ + double2my_decimal(E_DEC_FATAL_ERROR, nr, decimal_value); + return decimal_value; +} + + +#ifdef HAVE_DLOPEN +void Item_udf_func::fix_num_length_and_dec() +{ + uint fl_length= 0; + decimals=0; + for (uint i=0 ; i < arg_count ; i++) + { + set_if_bigger(decimals,args[i]->decimals); + set_if_bigger(fl_length, args[i]->max_length); + } + max_length=float_length(decimals); + if (fl_length > max_length) + { + decimals= NOT_FIXED_DEC; + max_length= float_length(NOT_FIXED_DEC); + } +} +#endif + + +void Item_func::signal_divide_by_null() +{ + THD *thd= current_thd; + if (thd->variables.sql_mode & MODE_ERROR_FOR_DIVISION_BY_ZERO) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_DIVISION_BY_ZERO, + ER_THD(thd, ER_DIVISION_BY_ZERO)); + null_value= 1; +} + + +Item *Item_func::get_tmp_table_item(THD *thd) +{ + if (!with_sum_func() && !const_item()) + { + auto item_field= new (thd->mem_root) Item_field(thd, result_field); + if (item_field) + item_field->set_refers_to_temp_table(); + return item_field; + } + return copy_or_same(thd); +} + +double Item_int_func::val_real() +{ + DBUG_ASSERT(fixed()); + + return unsigned_flag ? (double) ((ulonglong) val_int()) : (double) val_int(); +} + + +String *Item_int_func::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + longlong nr=val_int(); + if (null_value) + return 0; + str->set_int(nr, unsigned_flag, collation.collation); + return str; +} + + +bool Item_func_connection_id::fix_length_and_dec(THD *thd) +{ + if (Item_long_func::fix_length_and_dec(thd)) + return TRUE; + max_length= 10; + return FALSE; +} + + +bool Item_func_connection_id::fix_fields(THD *thd, Item **ref) +{ + if (Item_int_func::fix_fields(thd, ref)) + return TRUE; + thd->used|= THD::THREAD_SPECIFIC_USED; + value= thd->variables.pseudo_thread_id; + return FALSE; +} + + +bool Item_num_op::fix_type_handler(const Type_aggregator *aggregator) +{ + DBUG_ASSERT(arg_count == 2); + const Type_handler *h0= args[0]->cast_to_int_type_handler(); + const Type_handler *h1= args[1]->cast_to_int_type_handler(); + if (!aggregate_for_num_op(aggregator, h0, h1)) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + h0->name().ptr(), h1->name().ptr(), func_name()); + return true; +} + + +bool Item_func_plus::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_plus::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + const Type_aggregator *aggregator= &type_handler_data->m_type_aggregator_for_plus; + DBUG_EXECUTE_IF("num_op", aggregator= &type_handler_data->m_type_aggregator_for_result;); + DBUG_ASSERT(aggregator->is_commutative()); + if (fix_type_handler(aggregator)) + DBUG_RETURN(TRUE); + if (Item_func_plus::type_handler()->Item_func_plus_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + + +String *Item_func_hybrid_field_type::val_str_from_int_op(String *str) +{ + longlong nr= int_op(); + if (null_value) + return 0; /* purecov: inspected */ + str->set_int(nr, unsigned_flag, collation.collation); + return str; +} + +double Item_func_hybrid_field_type::val_real_from_int_op() +{ + longlong result= int_op(); + return unsigned_flag ? (double) ((ulonglong) result) : (double) result; +} + +my_decimal * +Item_func_hybrid_field_type::val_decimal_from_int_op(my_decimal *dec) +{ + longlong result= int_op(); + if (null_value) + return NULL; + int2my_decimal(E_DEC_FATAL_ERROR, result, unsigned_flag, dec); + return dec; +} + + +String *Item_func_hybrid_field_type::val_str_from_real_op(String *str) +{ + double nr= real_op(); + if (null_value) + return 0; /* purecov: inspected */ + str->set_real(nr, decimals, collation.collation); + return str; +} + +longlong Item_func_hybrid_field_type::val_int_from_real_op() +{ + return Converter_double_to_longlong(real_op(), unsigned_flag).result(); +} + +my_decimal * +Item_func_hybrid_field_type::val_decimal_from_real_op(my_decimal *dec) +{ + double result= (double) real_op(); + if (null_value) + return NULL; + double2my_decimal(E_DEC_FATAL_ERROR, result, dec); + return dec; +} + + +String *Item_func_hybrid_field_type::val_str_from_date_op(String *str) +{ + MYSQL_TIME ltime; + if (date_op_with_null_check(current_thd, <ime) || + (null_value= str->alloc(MAX_DATE_STRING_REP_LENGTH))) + return (String *) 0; + str->length(my_TIME_to_str(<ime, const_cast(str->ptr()), decimals)); + str->set_charset(&my_charset_bin); + DBUG_ASSERT(!null_value); + return str; +} + +double Item_func_hybrid_field_type::val_real_from_date_op() +{ + MYSQL_TIME ltime; + if (date_op_with_null_check(current_thd, <ime)) + return 0; + return TIME_to_double(<ime); +} + +longlong Item_func_hybrid_field_type::val_int_from_date_op() +{ + MYSQL_TIME ltime; + if (date_op_with_null_check(current_thd, <ime)) + return 0; + return TIME_to_ulonglong(<ime); +} + +my_decimal * +Item_func_hybrid_field_type::val_decimal_from_date_op(my_decimal *dec) +{ + MYSQL_TIME ltime; + if (date_op_with_null_check(current_thd, <ime)) + { + my_decimal_set_zero(dec); + return 0; + } + return date2my_decimal(<ime, dec); +} + + +String *Item_func_hybrid_field_type::val_str_from_time_op(String *str) +{ + MYSQL_TIME ltime; + if (time_op_with_null_check(current_thd, <ime) || + (null_value= my_TIME_to_str(<ime, str, decimals))) + return NULL; + return str; +} + +double Item_func_hybrid_field_type::val_real_from_time_op() +{ + MYSQL_TIME ltime; + return time_op_with_null_check(current_thd, <ime) ? 0 : + TIME_to_double(<ime); +} + +longlong Item_func_hybrid_field_type::val_int_from_time_op() +{ + MYSQL_TIME ltime; + return time_op_with_null_check(current_thd, <ime) ? 0 : + TIME_to_ulonglong(<ime); +} + +my_decimal * +Item_func_hybrid_field_type::val_decimal_from_time_op(my_decimal *dec) +{ + MYSQL_TIME ltime; + if (time_op_with_null_check(current_thd, <ime)) + { + my_decimal_set_zero(dec); + return 0; + } + return date2my_decimal(<ime, dec); +} + + +double Item_func_hybrid_field_type::val_real_from_str_op() +{ + String *res= str_op_with_null_check(&str_value); + return res ? double_from_string_with_check(res) : 0.0; +} + +longlong Item_func_hybrid_field_type::val_int_from_str_op() +{ + String *res= str_op_with_null_check(&str_value); + return res ? longlong_from_string_with_check(res) : 0; +} + +my_decimal * +Item_func_hybrid_field_type::val_decimal_from_str_op(my_decimal *decimal_value) +{ + String *res= str_op_with_null_check(&str_value); + return res ? decimal_from_string_with_check(decimal_value, res) : 0; +} + + +void Item_func_signed::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as signed)")); + +} + + +void Item_func_unsigned::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as unsigned)")); + +} + + +my_decimal *Item_decimal_typecast::val_decimal(my_decimal *dec) +{ + VDec tmp(args[0]); + bool sign; + uint precision; + + if ((null_value= tmp.is_null())) + return NULL; + tmp.round_to(dec, decimals, HALF_UP); + sign= dec->sign(); + if (unsigned_flag) + { + if (sign) + { + my_decimal_set_zero(dec); + goto err; + } + } + precision= my_decimal_length_to_precision(max_length, + decimals, unsigned_flag); + if (precision - decimals < (uint) my_decimal_intg(dec)) + { + max_my_decimal(dec, precision, decimals); + dec->sign(sign); + goto err; + } + return dec; + +err: + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DATA_OUT_OF_RANGE, + ER_THD(thd, ER_WARN_DATA_OUT_OF_RANGE), + name.str, + thd->get_stmt_da()->current_row_for_warning()); + return dec; +} + + +void Item_decimal_typecast::print(String *str, enum_query_type query_type) +{ + char len_buf[20*3 + 1]; + char *end; + + uint precision= my_decimal_length_to_precision(max_length, decimals, + unsigned_flag); + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as decimal(")); + + end=int10_to_str(precision, len_buf,10); + str->append(len_buf, (uint32) (end - len_buf)); + + str->append(','); + + end=int10_to_str(decimals, len_buf,10); + str->append(len_buf, (uint32) (end - len_buf)); + + str->append(')'); + str->append(')'); +} + + +double Item_real_typecast::val_real_with_truncate(double max_value) +{ + int error; + double tmp= args[0]->val_real(); + if ((null_value= args[0]->null_value)) + return 0.0; + + if (unlikely((error= truncate_double(&tmp, max_length, decimals, + false/*unsigned_flag*/, max_value)))) + { + /* + We don't want automatic escalation from a warning to an error + in this scenario: + INSERT INTO t1 (float_field) VALUES (CAST(1e100 AS FLOAT)); + The above statement should work even in the strict mode. + So let's use a note rather than a warning. + */ + THD *thd= current_thd; + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_DATA_OUT_OF_RANGE, + ER_THD(thd, ER_WARN_DATA_OUT_OF_RANGE), + name.str, (ulong) 1); + if (error < 0) + { + null_value= 1; // Illegal value + tmp= 0.0; + } + } + return tmp; +} + + +void Item_real_typecast::print(String *str, enum_query_type query_type) +{ + char len_buf[20*3 + 1]; + char *end; + Name name= type_handler()->name(); + + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as ")); + str->append(name.ptr(), name.length()); + if (decimals != NOT_FIXED_DEC) + { + str->append('('); + end= int10_to_str(max_length, len_buf,10); + str->append(len_buf, (uint32) (end - len_buf)); + str->append(','); + end= int10_to_str(decimals, len_buf,10); + str->append(len_buf, (uint32) (end - len_buf)); + str->append(')'); + } + str->append(')'); +} + +double Item_func_plus::real_op() +{ + double value= args[0]->val_real() + args[1]->val_real(); + if ((null_value=args[0]->null_value || args[1]->null_value)) + return 0.0; + return check_float_overflow(value); +} + +#if defined(__powerpc64__) && GCC_VERSION >= 6003 && GCC_VERSION <= 10002 +#pragma GCC push_options +#pragma GCC optimize ("no-expensive-optimizations") +#endif + +longlong Item_func_plus::int_op() +{ + longlong val0= args[0]->val_int(); + longlong val1= args[1]->val_int(); + bool res_unsigned= FALSE; + longlong res; + + if ((null_value= args[0]->null_value || args[1]->null_value)) + return 0; + /* + First check whether the result can be represented as a + (bool unsigned_flag, longlong value) pair, then check if it is compatible + with this Item's unsigned_flag by calling check_integer_overflow(). + */ + if (args[0]->unsigned_flag) + { + if (args[1]->unsigned_flag || val1 >= 0) + { + if (test_if_sum_overflows_ull((ulonglong) val0, (ulonglong) val1)) + goto err; + res_unsigned= TRUE; + } + else + { + /* val1 is negative */ + if ((ulonglong) val0 > (ulonglong) LONGLONG_MAX) + res_unsigned= TRUE; + } + } + else + { + if (args[1]->unsigned_flag) + { + if (val0 >= 0) + { + if (test_if_sum_overflows_ull((ulonglong) val0, (ulonglong) val1)) + goto err; + res_unsigned= TRUE; + } + else + { + if ((ulonglong) val1 > (ulonglong) LONGLONG_MAX) + res_unsigned= TRUE; + } + } + else + { + if (val0 >=0 && val1 >= 0) + res_unsigned= TRUE; + else if (val0 < 0 && val1 < 0 && val0 < (LONGLONG_MIN - val1)) + goto err; + } + } + + if (res_unsigned) + res= (longlong) ((ulonglong) val0 + (ulonglong) val1); + else + res= val0 + val1; + + return check_integer_overflow(res, res_unsigned); + +err: + return raise_integer_overflow(); +} + +#if defined(__powerpc64__) && GCC_VERSION >= 6003 && GCC_VERSION <= 10002 +#pragma GCC pop_options +#endif + +/** + Calculate plus of two decimals. + + @param decimal_value Buffer that can be used to store result + + @retval + 0 Value was NULL; In this case null_value is set + @retval + \# Value of operation as a decimal +*/ + +my_decimal *Item_func_plus::decimal_op(my_decimal *decimal_value) +{ + VDec2_lazy val(args[0], args[1]); + if (!(null_value= (val.has_null() || + check_decimal_overflow(my_decimal_add(E_DEC_FATAL_ERROR & + ~E_DEC_OVERFLOW, + decimal_value, + val.m_a.ptr(), + val.m_b.ptr())) > 3))) + return decimal_value; + return 0; +} + +/** + Set precision of results for additive operations (+ and -) +*/ +void Item_func_additive_op::result_precision() +{ + decimals= MY_MAX(args[0]->decimal_scale(), args[1]->decimal_scale()); + int arg1_int= args[0]->decimal_precision() - args[0]->decimal_scale(); + int arg2_int= args[1]->decimal_precision() - args[1]->decimal_scale(); + int precision= MY_MAX(arg1_int, arg2_int) + 1 + decimals; + + DBUG_ASSERT(arg1_int >= 0); + DBUG_ASSERT(arg2_int >= 0); + + max_length= my_decimal_precision_to_length_no_truncation(precision, decimals, + unsigned_flag); +} + + +/** + The following function is here to allow the user to force + subtraction of UNSIGNED BIGINT to return negative values. +*/ +void Item_func_minus::fix_unsigned_flag() +{ + if (unsigned_flag && + (current_thd->variables.sql_mode & MODE_NO_UNSIGNED_SUBTRACTION)) + { + unsigned_flag=0; + set_handler(Item_func_minus::type_handler()->type_handler_signed()); + } +} + + +bool Item_func_minus::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_minus::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + const Type_aggregator *aggregator= &type_handler_data->m_type_aggregator_for_minus; + DBUG_EXECUTE_IF("num_op", aggregator= &type_handler_data->m_type_aggregator_non_commutative_test;); + DBUG_ASSERT(!aggregator->is_commutative()); + if (fix_type_handler(aggregator)) + DBUG_RETURN(TRUE); + if (Item_func_minus::type_handler()->Item_func_minus_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + m_depends_on_sql_mode_no_unsigned_subtraction= unsigned_flag; + fix_unsigned_flag(); + DBUG_RETURN(FALSE); +} + + +Sql_mode_dependency Item_func_minus::value_depends_on_sql_mode() const +{ + Sql_mode_dependency dep= Item_func_additive_op::value_depends_on_sql_mode(); + if (m_depends_on_sql_mode_no_unsigned_subtraction) + dep|= Sql_mode_dependency(0, MODE_NO_UNSIGNED_SUBTRACTION); + return dep; +} + + +double Item_func_minus::real_op() +{ + double value= args[0]->val_real() - args[1]->val_real(); + if ((null_value=args[0]->null_value || args[1]->null_value)) + return 0.0; + return check_float_overflow(value); +} + + +#if defined(__powerpc64__) && GCC_VERSION >= 6003 && GCC_VERSION <= 10002 +#pragma GCC push_options +#pragma GCC optimize ("no-expensive-optimizations") +#endif + +longlong Item_func_minus::int_op() +{ + longlong val0= args[0]->val_int(); + longlong val1= args[1]->val_int(); + bool res_unsigned= FALSE; + longlong res; + + if ((null_value= args[0]->null_value || args[1]->null_value)) + return 0; + + /* + First check whether the result can be represented as a + (bool unsigned_flag, longlong value) pair, then check if it is compatible + with this Item's unsigned_flag by calling check_integer_overflow(). + */ + if (args[0]->unsigned_flag) + { + if (args[1]->unsigned_flag) + { + if ((ulonglong) val0 < (ulonglong) val1) + goto err; + res_unsigned= TRUE; + } + else + { + if (val1 >= 0) + { + if ((ulonglong) val0 > (ulonglong) val1) + res_unsigned= TRUE; + } + else + { + if (test_if_sum_overflows_ull((ulonglong) val0, (ulonglong) -val1)) + goto err; + res_unsigned= TRUE; + } + } + } + else + { + if (args[1]->unsigned_flag) + { + if (((ulonglong) val0 - (ulonglong) LONGLONG_MIN) < (ulonglong) val1) + goto err; + } + else + { + if (val0 > 0 && val1 < 0) + res_unsigned= TRUE; + else if (val0 < 0 && val1 > 0 && val0 < (LONGLONG_MIN + val1)) + goto err; + } + } + if (res_unsigned) + res= (longlong) ((ulonglong) val0 - (ulonglong) val1); + else + res= val0 - val1; + + return check_integer_overflow(res, res_unsigned); + +err: + return raise_integer_overflow(); +} + +#if defined(__powerpc64__) && GCC_VERSION >= 6003 && GCC_VERSION <= 10002 +#pragma GCC pop_options +#endif + +/** + See Item_func_plus::decimal_op for comments. +*/ + +my_decimal *Item_func_minus::decimal_op(my_decimal *decimal_value) +{ + VDec2_lazy val(args[0], args[1]); + if (!(null_value= (val.has_null() || + check_decimal_overflow(my_decimal_sub(E_DEC_FATAL_ERROR & + ~E_DEC_OVERFLOW, + decimal_value, + val.m_a.ptr(), + val.m_b.ptr())) > 3))) + return decimal_value; + return 0; +} + + +double Item_func_mul::real_op() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real() * args[1]->val_real(); + if ((null_value=args[0]->null_value || args[1]->null_value)) + return 0.0; + return check_float_overflow(value); +} + + +longlong Item_func_mul::int_op() +{ + DBUG_ASSERT(fixed()); + /* + Since we also have to take the unsigned_flag for a and b into account, + it is easier to first work with absolute values and set the + correct sign later. + */ + Longlong_hybrid_null ha= args[0]->to_longlong_hybrid_null(); + Longlong_hybrid_null hb= args[1]->to_longlong_hybrid_null(); + + if ((null_value= ha.is_null() || hb.is_null())) + return 0; + + ULonglong_null ures= ULonglong_null::ullmul(ha.abs(), hb.abs()); + if (ures.is_null()) + return raise_integer_overflow(); + + return check_integer_overflow(ULonglong_hybrid(ures.value(), + ha.neg() != hb.neg())); +} + + +/** See Item_func_plus::decimal_op for comments. */ + +my_decimal *Item_func_mul::decimal_op(my_decimal *decimal_value) +{ + VDec2_lazy val(args[0], args[1]); + if (!(null_value= (val.has_null() || + check_decimal_overflow(my_decimal_mul(E_DEC_FATAL_ERROR & + ~E_DEC_OVERFLOW, + decimal_value, + val.m_a.ptr(), + val.m_b.ptr())) > 3))) + return decimal_value; + return 0; +} + + +void Item_func_mul::result_precision() +{ + decimals= MY_MIN(args[0]->decimal_scale() + args[1]->decimal_scale(), + DECIMAL_MAX_SCALE); + uint est_prec = args[0]->decimal_precision() + args[1]->decimal_precision(); + uint precision= MY_MIN(est_prec, DECIMAL_MAX_PRECISION); + max_length= my_decimal_precision_to_length_no_truncation(precision, decimals, + unsigned_flag); +} + + +bool Item_func_mul::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_mul::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + const Type_aggregator *aggregator= &type_handler_data->m_type_aggregator_for_mul; + DBUG_EXECUTE_IF("num_op", aggregator= &type_handler_data->m_type_aggregator_for_result;); + DBUG_ASSERT(aggregator->is_commutative()); + if (fix_type_handler(aggregator)) + DBUG_RETURN(TRUE); + if (Item_func_mul::type_handler()->Item_func_mul_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + + +double Item_func_div::real_op() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + double val2= args[1]->val_real(); + if ((null_value= args[0]->null_value || args[1]->null_value)) + return 0.0; + if (val2 == 0.0) + { + signal_divide_by_null(); + return 0.0; + } + return check_float_overflow(value/val2); +} + + +my_decimal *Item_func_div::decimal_op(my_decimal *decimal_value) +{ + int err; + VDec2_lazy val(args[0], args[1]); + if ((null_value= val.has_null())) + return 0; + if ((err= check_decimal_overflow(my_decimal_div(E_DEC_FATAL_ERROR & + ~E_DEC_OVERFLOW & + ~E_DEC_DIV_ZERO, + decimal_value, + val.m_a.ptr(), val.m_b.ptr(), + prec_increment))) > 3) + { + if (err == E_DEC_DIV_ZERO) + signal_divide_by_null(); + null_value= 1; + return 0; + } + return decimal_value; +} + + +void Item_func_div::result_precision() +{ + /* + We need to add args[1]->divisor_precision_increment(), + to properly handle the cases like this: + SELECT 5.05 / 0.014; -> 360.714286 + i.e. when the divisor has a zero integer part + and non-zero digits appear only after the decimal point. + Precision in this example is calculated as + args[0]->decimal_precision() + // 3 + args[1]->divisor_precision_increment() + // 3 + prec_increment // 4 + which gives 10 decimals digits. + */ + uint precision=MY_MIN(args[0]->decimal_precision() + + args[1]->divisor_precision_increment() + prec_increment, + DECIMAL_MAX_PRECISION); + decimals= MY_MIN(args[0]->decimal_scale() + prec_increment, DECIMAL_MAX_SCALE); + max_length= my_decimal_precision_to_length_no_truncation(precision, decimals, + unsigned_flag); +} + + +void Item_func_div::fix_length_and_dec_double(void) +{ + Item_num_op::fix_length_and_dec_double(); + decimals= MY_MAX(args[0]->decimals, args[1]->decimals) + prec_increment; + set_if_smaller(decimals, NOT_FIXED_DEC); + uint tmp= float_length(decimals); + if (decimals == NOT_FIXED_DEC) + max_length= tmp; + else + { + max_length=args[0]->max_length - args[0]->decimals + decimals; + set_if_smaller(max_length, tmp); + } +} + + +void Item_func_div::fix_length_and_dec_int(void) +{ + set_handler(&type_handler_newdecimal); + DBUG_PRINT("info", ("Type changed: %s", type_handler()->name().ptr())); + Item_num_op::fix_length_and_dec_decimal(); +} + + +bool Item_func_div::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_div::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + prec_increment= thd->variables.div_precincrement; + set_maybe_null(); // division by zero + + const Type_aggregator *aggregator= &type_handler_data->m_type_aggregator_for_div; + DBUG_EXECUTE_IF("num_op", aggregator= &type_handler_data->m_type_aggregator_non_commutative_test;); + DBUG_ASSERT(!aggregator->is_commutative()); + if (fix_type_handler(aggregator)) + DBUG_RETURN(TRUE); + if (Item_func_div::type_handler()->Item_func_div_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + + +/* Integer division */ +longlong Item_func_int_div::val_int() +{ + DBUG_ASSERT(fixed()); + + /* + Perform division using DECIMAL math if either of the operands has a + non-integer type + */ + if (args[0]->result_type() != INT_RESULT || + args[1]->result_type() != INT_RESULT) + { + VDec2_lazy val(args[0], args[1]); + if ((null_value= val.has_null())) + return 0; + + int err; + my_decimal tmp; + if ((err= my_decimal_div(E_DEC_FATAL_ERROR & ~E_DEC_DIV_ZERO, &tmp, + val.m_a.ptr(), val.m_b.ptr(), 0)) > 3) + { + if (err == E_DEC_DIV_ZERO) + signal_divide_by_null(); + return 0; + } + + my_decimal truncated; + if (tmp.round_to(&truncated, 0, TRUNCATE)) + DBUG_ASSERT(false); + + longlong res; + if (my_decimal2int(E_DEC_FATAL_ERROR, &truncated, unsigned_flag, &res) & + E_DEC_OVERFLOW) + raise_integer_overflow(); + return res; + } + + Longlong_hybrid val0= args[0]->to_longlong_hybrid(); + Longlong_hybrid val1= args[1]->to_longlong_hybrid(); + if ((null_value= (args[0]->null_value || args[1]->null_value))) + return 0; + if (val1 == 0) + { + signal_divide_by_null(); + return 0; + } + + return check_integer_overflow(ULonglong_hybrid(val0.abs() / val1.abs(), + val0.neg() != val1.neg())); +} + + +bool Item_func_int_div::fix_length_and_dec(THD *thd) +{ + uint32 prec= args[0]->decimal_int_part(); + set_if_smaller(prec, MY_INT64_NUM_DECIMAL_DIGITS); + fix_char_length(prec); + set_maybe_null(); + unsigned_flag=args[0]->unsigned_flag | args[1]->unsigned_flag; + return false; +} + + +longlong Item_func_mod::int_op() +{ + DBUG_ASSERT(fixed()); + Longlong_hybrid val0= args[0]->to_longlong_hybrid(); + Longlong_hybrid val1= args[1]->to_longlong_hybrid(); + + if ((null_value= args[0]->null_value || args[1]->null_value)) + return 0; /* purecov: inspected */ + if (val1 == 0) + { + signal_divide_by_null(); + return 0; + } + + /* + '%' is calculated by integer division internally. Since dividing + LONGLONG_MIN by -1 generates SIGFPE, we calculate using unsigned values and + then adjust the sign appropriately. + */ + return check_integer_overflow(ULonglong_hybrid(val0.abs() % val1.abs(), + val0.neg())); +} + +double Item_func_mod::real_op() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + double val2= args[1]->val_real(); + if ((null_value= args[0]->null_value || args[1]->null_value)) + return 0.0; /* purecov: inspected */ + if (val2 == 0.0) + { + signal_divide_by_null(); + return 0.0; + } + return fmod(value,val2); +} + + +my_decimal *Item_func_mod::decimal_op(my_decimal *decimal_value) +{ + VDec2_lazy val(args[0], args[1]); + if ((null_value= val.has_null())) + return 0; + switch (my_decimal_mod(E_DEC_FATAL_ERROR & ~E_DEC_DIV_ZERO, decimal_value, + val.m_a.ptr(), val.m_b.ptr())) { + case E_DEC_TRUNCATED: + case E_DEC_OK: + return decimal_value; + case E_DEC_DIV_ZERO: + signal_divide_by_null(); + /* fall through */ + default: + null_value= 1; + return 0; + } +} + + +void Item_func_mod::result_precision() +{ + unsigned_flag= args[0]->unsigned_flag; + decimals= MY_MAX(args[0]->decimal_scale(), args[1]->decimal_scale()); + uint prec= MY_MAX(args[0]->decimal_precision(), args[1]->decimal_precision()); + fix_char_length(my_decimal_precision_to_length_no_truncation(prec, decimals, + unsigned_flag)); +} + + +bool Item_func_mod::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_mod::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + set_maybe_null(); // division by zero + const Type_aggregator *aggregator= &type_handler_data->m_type_aggregator_for_mod; + DBUG_EXECUTE_IF("num_op", aggregator= &type_handler_data->m_type_aggregator_non_commutative_test;); + DBUG_ASSERT(!aggregator->is_commutative()); + if (fix_type_handler(aggregator)) + DBUG_RETURN(TRUE); + if (Item_func_mod::type_handler()->Item_func_mod_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + +static void calc_hash_for_unique(ulong &nr1, ulong &nr2, String *str) +{ + CHARSET_INFO *cs; + uchar l[4]; + int4store(l, str->length()); + cs= str->charset(); + cs->hash_sort(l, sizeof(l), &nr1, &nr2); + cs= str->charset(); + cs->hash_sort((uchar *)str->ptr(), str->length(), &nr1, &nr2); +} + +longlong Item_func_hash_mariadb_100403::val_int() +{ + DBUG_EXECUTE_IF("same_long_unique_hash", return 9;); + unsigned_flag= true; + ulong nr1= 1,nr2= 4; + String * str; + for(uint i= 0;ival_str(); + if(args[i]->null_value) + { + null_value= 1; + return 0; + } + calc_hash_for_unique(nr1, nr2, str); + } + null_value= 0; + return (longlong)nr1; +} + + +longlong Item_func_hash::val_int() +{ + DBUG_EXECUTE_IF("same_long_unique_hash", return 9;); + unsigned_flag= true; + Hasher hasher; + for(uint i= 0;ihash_not_null(&hasher)) + { + null_value= 1; + return 0; + } + } + null_value= 0; + return (longlong) hasher.finalize(); +} + + +bool Item_func_hash::fix_length_and_dec(THD *thd) +{ + decimals= 0; + max_length= 8; + return false; +} + + + +double Item_func_neg::real_op() +{ + double value= args[0]->val_real(); + null_value= args[0]->null_value; + return -value; +} + + +longlong Item_func_neg::int_op() +{ + longlong value= args[0]->val_int(); + if ((null_value= args[0]->null_value)) + return 0; + if (args[0]->unsigned_flag && + (ulonglong) value > (ulonglong) LONGLONG_MAX + 1) + return raise_integer_overflow(); + + if (value == LONGLONG_MIN) + { + if (args[0]->unsigned_flag != unsigned_flag) + /* negation of LONGLONG_MIN is LONGLONG_MIN. */ + return LONGLONG_MIN; + else + return raise_integer_overflow(); + } + + return check_integer_overflow(-value, !args[0]->unsigned_flag && value < 0); +} + + +my_decimal *Item_func_neg::decimal_op(my_decimal *decimal_value) +{ + VDec value(args[0]); + if (!(null_value= value.is_null())) + { + my_decimal2decimal(value.ptr(), decimal_value); + my_decimal_neg(decimal_value); + return decimal_value; + } + return 0; +} + + +void Item_func_neg::fix_length_and_dec_int() +{ + max_length= args[0]->max_length + 1; + set_handler(type_handler_long_or_longlong()); + + /* + If this is in integer context keep the context as integer if possible + (This is how multiplication and other integer functions works) + Use val() to get value as arg_type doesn't mean that item is + Item_int or Item_float due to existence of Item_param. + */ + if (args[0]->const_item()) + { + longlong val= args[0]->val_int(); + if ((ulonglong) val >= (ulonglong) LONGLONG_MIN && + ((ulonglong) val != (ulonglong) LONGLONG_MIN || + !args[0]->is_of_type(CONST_ITEM, INT_RESULT))) + { + /* + Ensure that result is converted to DECIMAL, as longlong can't hold + the negated number + */ + set_handler(&type_handler_newdecimal); + DBUG_PRINT("info", ("Type changed: DECIMAL_RESULT")); + } + } + unsigned_flag= false; +} + + +void Item_func_neg::fix_length_and_dec_double() +{ + set_handler(&type_handler_double); + decimals= args[0]->decimals; // Preserve NOT_FIXED_DEC + max_length= args[0]->max_length + 1; + // Limit length with something reasonable + uint32 mlen= type_handler()->max_display_length(this); + set_if_smaller(max_length, mlen); + unsigned_flag= false; +} + + +void Item_func_neg::fix_length_and_dec_decimal() +{ + set_handler(&type_handler_newdecimal); + decimals= args[0]->decimal_scale(); // Do not preserve NOT_FIXED_DEC + max_length= args[0]->max_length + 1; + unsigned_flag= false; +} + + +bool Item_func_neg::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_neg::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + if (args[0]->cast_to_int_type_handler()-> + Item_func_neg_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + + +double Item_func_abs::real_op() +{ + double value= args[0]->val_real(); + null_value= args[0]->null_value; + return fabs(value); +} + + +longlong Item_func_abs::int_op() +{ + longlong value= args[0]->val_int(); + if ((null_value= args[0]->null_value)) + return 0; + if (unsigned_flag) + return value; + /* -LONGLONG_MIN = LONGLONG_MAX + 1 => outside of signed longlong range */ + if (value == LONGLONG_MIN) + return raise_integer_overflow(); + return (value >= 0) ? value : -value; +} + + +my_decimal *Item_func_abs::decimal_op(my_decimal *decimal_value) +{ + VDec value(args[0]); + if (!(null_value= value.is_null())) + { + my_decimal2decimal(value.ptr(), decimal_value); + if (decimal_value->sign()) + my_decimal_neg(decimal_value); + return decimal_value; + } + return 0; +} + +void Item_func_abs::fix_length_and_dec_int() +{ + max_length= args[0]->max_length; + unsigned_flag= args[0]->unsigned_flag; + set_handler(type_handler_long_or_longlong()); +} + + +void Item_func_abs::fix_length_and_dec_double() +{ + set_handler(&type_handler_double); + decimals= args[0]->decimals; // Preserve NOT_FIXED_DEC + max_length= float_length(decimals); + unsigned_flag= args[0]->unsigned_flag; +} + + +void Item_func_abs::fix_length_and_dec_decimal() +{ + set_handler(&type_handler_newdecimal); + decimals= args[0]->decimal_scale(); // Do not preserve NOT_FIXED_DEC + max_length= args[0]->max_length; + unsigned_flag= args[0]->unsigned_flag; +} + + +bool Item_func_abs::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_abs::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + if (args[0]->cast_to_int_type_handler()-> + Item_func_abs_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + + +/** Gateway to natural LOG function. */ +double Item_func_ln::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value= args[0]->null_value)) + return 0.0; + if (value <= 0.0) + { + signal_divide_by_null(); + return 0.0; + } + return log(value); +} + +/** + Extended but so slower LOG function. + + We have to check if all values are > zero and first one is not one + as these are the cases then result is not a number. +*/ +double Item_func_log::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value= args[0]->null_value)) + return 0.0; + if (value <= 0.0) + { + signal_divide_by_null(); + return 0.0; + } + if (arg_count == 2) + { + double value2= args[1]->val_real(); + if ((null_value= args[1]->null_value)) + return 0.0; + if (value2 <= 0.0 || value == 1.0) + { + signal_divide_by_null(); + return 0.0; + } + return log(value2) / log(value); + } + return log(value); +} + +double Item_func_log2::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + + if ((null_value=args[0]->null_value)) + return 0.0; + if (value <= 0.0) + { + signal_divide_by_null(); + return 0.0; + } + return log(value) / M_LN2; +} + +double Item_func_log10::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value= args[0]->null_value)) + return 0.0; + if (value <= 0.0) + { + signal_divide_by_null(); + return 0.0; + } + return log10(value); +} + +double Item_func_exp::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0.0; /* purecov: inspected */ + return check_float_overflow(exp(value)); +} + +double Item_func_sqrt::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=(args[0]->null_value || value < 0))) + return 0.0; /* purecov: inspected */ + return sqrt(value); +} + +double Item_func_pow::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + double val2= args[1]->val_real(); + if ((null_value=(args[0]->null_value || args[1]->null_value))) + return 0.0; /* purecov: inspected */ + return check_float_overflow(pow(value,val2)); +} + +// Trigonometric functions + +double Item_func_acos::val_real() +{ + DBUG_ASSERT(fixed()); + /* One can use this to defer SELECT processing. */ + DEBUG_SYNC(current_thd, "before_acos_function"); + // the volatile's for BUG #2338 to calm optimizer down (because of gcc's bug) + volatile double value= args[0]->val_real(); + if ((null_value=(args[0]->null_value || (value < -1.0 || value > 1.0)))) + return 0.0; + return acos(value); +} + +double Item_func_asin::val_real() +{ + DBUG_ASSERT(fixed()); + // the volatile's for BUG #2338 to calm optimizer down (because of gcc's bug) + volatile double value= args[0]->val_real(); + if ((null_value=(args[0]->null_value || (value < -1.0 || value > 1.0)))) + return 0.0; + return asin(value); +} + +double Item_func_atan::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0.0; + if (arg_count == 2) + { + double val2= args[1]->val_real(); + if ((null_value=args[1]->null_value)) + return 0.0; + return check_float_overflow(atan2(value,val2)); + } + return atan(value); +} + +double Item_func_cos::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0.0; + return cos(value); +} + +double Item_func_sin::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0.0; + return sin(value); +} + +double Item_func_tan::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0.0; + return check_float_overflow(tan(value)); +} + + +double Item_func_cot::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0.0; + return check_float_overflow(1.0 / tan(value)); +} + + +// Shift-functions, same as << and >> in C/C++ + + +class Func_handler_shift_left_int_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return item->arguments()[0]->to_longlong_null() << + item->arguments()[1]->to_longlong_null(); + } +}; + + +class Func_handler_shift_left_decimal_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return VDec(item->arguments()[0]).to_xlonglong_null() << + item->arguments()[1]->to_longlong_null(); + } +}; + + +bool Item_func_shift_left::fix_length_and_dec(THD *thd) +{ + static Func_handler_shift_left_int_to_ulonglong ha_int_to_ull; + static Func_handler_shift_left_decimal_to_ulonglong ha_dec_to_ull; + return fix_length_and_dec_op1_std(&ha_int_to_ull, &ha_dec_to_ull); +} + + +class Func_handler_shift_right_int_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return item->arguments()[0]->to_longlong_null() >> + item->arguments()[1]->to_longlong_null(); + } +}; + + +class Func_handler_shift_right_decimal_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return VDec(item->arguments()[0]).to_xlonglong_null() >> + item->arguments()[1]->to_longlong_null(); + } +}; + + +bool Item_func_shift_right::fix_length_and_dec(THD *thd) +{ + static Func_handler_shift_right_int_to_ulonglong ha_int_to_ull; + static Func_handler_shift_right_decimal_to_ulonglong ha_dec_to_ull; + return fix_length_and_dec_op1_std(&ha_int_to_ull, &ha_dec_to_ull); +} + + +class Func_handler_bit_neg_int_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return ~ item->arguments()[0]->to_longlong_null(); + } +}; + + +class Func_handler_bit_neg_decimal_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return ~ VDec(item->arguments()[0]).to_xlonglong_null(); + } +}; + + +bool Item_func_bit_neg::fix_length_and_dec(THD *thd) +{ + static Func_handler_bit_neg_int_to_ulonglong ha_int_to_ull; + static Func_handler_bit_neg_decimal_to_ulonglong ha_dec_to_ull; + return fix_length_and_dec_op1_std(&ha_int_to_ull, &ha_dec_to_ull); +} + + +// Conversion functions + +void Item_func_int_val::fix_length_and_dec_int_or_decimal() +{ + DBUG_ASSERT(args[0]->cmp_type() == DECIMAL_RESULT); + DBUG_ASSERT(args[0]->max_length <= DECIMAL_MAX_STR_LENGTH); + /* + FLOOR() for negative numbers can increase length: floor(-9.9) -> -10 + CEILING() for positive numbers can increase length: ceil(9.9) -> 10 + */ + decimal_round_mode mode= round_mode(); + uint length_increase= args[0]->decimals > 0 && + (mode == CEILING || + (mode == FLOOR && !args[0]->unsigned_flag)) ? 1 : 0; + uint precision= args[0]->decimal_int_part() + length_increase; + set_if_bigger(precision, 1); + + /* + The BIGINT data type can store: + UNSIGNED BIGINT: 0..18446744073709551615 - up to 19 digits + SIGNED BIGINT: -9223372036854775808..9223372036854775807 - up to 18 digits + + The INT data type can store: + UNSIGNED INT: 0..4294967295 - up to 9 digits + SIGNED INT: -2147483648..2147483647 - up to 9 digits + */ + if (precision > 18) + { + unsigned_flag= args[0]->unsigned_flag; + fix_char_length( + my_decimal_precision_to_length_no_truncation(precision, 0, + unsigned_flag)); + set_handler(&type_handler_newdecimal); + } + else + { + uint sign_length= (unsigned_flag= args[0]->unsigned_flag) ? 0 : 1; + fix_char_length(precision + sign_length); + if (precision > 9) + { + if (unsigned_flag) + set_handler(&type_handler_ulonglong); + else + set_handler(&type_handler_slonglong); + } + else + { + if (unsigned_flag) + set_handler(&type_handler_ulong); + else + set_handler(&type_handler_slong); + } + } +} + + +void Item_func_int_val::fix_length_and_dec_double() +{ + set_handler(&type_handler_double); + max_length= float_length(0); + decimals= 0; +} + + +bool Item_func_int_val::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_int_val::fix_length_and_dec"); + DBUG_PRINT("info", ("name %s", func_name())); + /* + We don't want to translate ENUM/SET to CHAR here. + So let's call real_type_handler(), not type_handler(). + */ + if (args[0]->real_type_handler()->Item_func_int_val_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s", real_type_handler()->name().ptr())); + DBUG_RETURN(FALSE); +} + + +bool Item_func_int_val::native_op(THD *thd, Native *to) +{ + // TODO: turn Item_func_int_val into Item_handled_func eventually. + if (type_handler()->mysql_timestamp_type() == MYSQL_TIMESTAMP_TIME) + return Time(thd, this).to_native(to, decimals); + DBUG_ASSERT(0); + return true; +} + + +longlong Item_func_ceiling::int_op() +{ + switch (args[0]->result_type()) { + case STRING_RESULT: // hex hybrid + case INT_RESULT: + return val_int_from_item(args[0]); + case DECIMAL_RESULT: + return VDec_op(this).to_longlong(unsigned_flag); + default: + break; + } + return (longlong) Item_func_ceiling::real_op(); +} + + +double Item_func_ceiling::real_op() +{ + /* + the volatile's for BUG #3051 to calm optimizer down (because of gcc's + bug) + */ + volatile double value= args[0]->val_real(); + null_value= args[0]->null_value; + return ceil(value); +} + + +my_decimal *Item_func_ceiling::decimal_op(my_decimal *decimal_value) +{ + VDec value(args[0]); + if (!(null_value= (value.is_null() || + value.round_to(decimal_value, 0, CEILING) > 1))) + return decimal_value; + return 0; +} + + +bool Item_func_ceiling::date_op(THD *thd, MYSQL_TIME *to, + date_mode_t fuzzydate) +{ + Datetime::Options opt(thd, TIME_FRAC_TRUNCATE); + Datetime *tm= new (to) Datetime(thd, args[0], opt); + tm->ceiling(thd); + null_value= !tm->is_valid_datetime(); + DBUG_ASSERT(maybe_null() || !null_value); + return null_value; +} + + +bool Item_func_ceiling::time_op(THD *thd, MYSQL_TIME *to) +{ + static const Time::Options_for_round opt; + Time *tm= new (to) Time(thd, args[0], opt); + tm->ceiling(); + null_value= !tm->is_valid_time(); + DBUG_ASSERT(maybe_null() || !null_value); + return null_value; +} + + +longlong Item_func_floor::int_op() +{ + switch (args[0]->result_type()) { + case STRING_RESULT: // hex hybrid + case INT_RESULT: + return val_int_from_item(args[0]); + case DECIMAL_RESULT: + { + my_decimal dec_buf, *dec; + return (!(dec= Item_func_floor::decimal_op(&dec_buf))) ? 0 : + dec->to_longlong(unsigned_flag); + } + default: + break; + } + return (longlong) Item_func_floor::real_op(); +} + + +double Item_func_floor::real_op() +{ + /* + the volatile's for BUG #3051 to calm optimizer down (because of gcc's + bug) + */ + volatile double value= args[0]->val_real(); + null_value= args[0]->null_value; + return floor(value); +} + + +my_decimal *Item_func_floor::decimal_op(my_decimal *decimal_value) +{ + VDec value(args[0]); + if (!(null_value= (value.is_null() || + value.round_to(decimal_value, 0, FLOOR) > 1))) + return decimal_value; + return 0; +} + + +bool Item_func_floor::date_op(THD *thd, MYSQL_TIME *to, date_mode_t fuzzydate) +{ + // DATETIME is not negative, so FLOOR means just truncation + Datetime::Options opt(thd, TIME_FRAC_TRUNCATE); + Datetime *tm= new (to) Datetime(thd, args[0], opt, 0); + null_value= !tm->is_valid_datetime(); + DBUG_ASSERT(maybe_null() || !null_value); + return null_value; +} + + +bool Item_func_floor::time_op(THD *thd, MYSQL_TIME *to) +{ + static const Time::Options_for_round opt; + Time *tm= new (to) Time(thd, args[0], opt); + tm->floor(); + null_value= !tm->is_valid_time(); + DBUG_ASSERT(maybe_null() || !null_value); + return null_value; +} + + +void Item_func_round::fix_length_and_dec_decimal(uint decimals_to_set) +{ + int decimals_delta= args[0]->decimals - decimals_to_set; + int length_increase= (decimals_delta <= 0 || truncate) ? 0 : 1; + int precision= args[0]->decimal_precision() + length_increase - + decimals_delta; + DBUG_ASSERT(decimals_to_set <= DECIMAL_MAX_SCALE); + set_handler(&type_handler_newdecimal); + unsigned_flag= args[0]->unsigned_flag; + decimals= decimals_to_set; + if (!precision) + precision= 1; // DECIMAL(0,0) -> DECIMAL(1,0) + max_length= my_decimal_precision_to_length_no_truncation(precision, + decimals, + unsigned_flag); +} + +void Item_func_round::fix_length_and_dec_double(uint decimals_to_set) +{ + set_handler(&type_handler_double); + unsigned_flag= args[0]->unsigned_flag; + decimals= decimals_to_set; + max_length= float_length(decimals_to_set); +} + + +void Item_func_round::fix_arg_decimal() +{ + if (args[1]->const_item()) + { + Longlong_hybrid dec= args[1]->to_longlong_hybrid(); + if (args[1]->null_value) + fix_length_and_dec_double(NOT_FIXED_DEC); + else + fix_length_and_dec_decimal(dec.to_uint(DECIMAL_MAX_SCALE)); + } + else + { + set_handler(&type_handler_newdecimal); + unsigned_flag= args[0]->unsigned_flag; + decimals= args[0]->decimals; + max_length= args[0]->max_length; + } +} + + +void Item_func_round::fix_arg_double() +{ + if (args[1]->const_item()) + { + Longlong_hybrid dec= args[1]->to_longlong_hybrid(); + fix_length_and_dec_double(args[1]->null_value ? NOT_FIXED_DEC : + dec.to_uint(NOT_FIXED_DEC)); + } + else + fix_length_and_dec_double(args[0]->decimals); +} + + +void Item_func_round::fix_arg_temporal(const Type_handler *h, + uint int_part_length) +{ + set_handler(h); + if (args[1]->can_eval_in_optimize()) + { + Longlong_hybrid_null dec= args[1]->to_longlong_hybrid_null(); + fix_attributes_temporal(int_part_length, + dec.is_null() ? args[0]->decimals : + dec.to_uint(TIME_SECOND_PART_DIGITS)); + } + else + fix_attributes_temporal(int_part_length, args[0]->decimals); +} + + +void Item_func_round::fix_arg_time() +{ + fix_arg_temporal(&type_handler_time2, MIN_TIME_WIDTH); +} + + +void Item_func_round::fix_arg_datetime() +{ + /* + Day increment operations are not supported for '0000-00-00', + see get_date_from_daynr() for details. Therefore, expressions like + ROUND('0000-00-00 23:59:59.999999') + return NULL. + */ + if (!truncate) + set_maybe_null(); + fix_arg_temporal(&type_handler_datetime2, MAX_DATETIME_WIDTH); +} + + +bool Item_func_round::test_if_length_can_increase() +{ + if (truncate) + return false; + if (args[1]->can_eval_in_optimize()) + { + // Length can increase in some cases: e.g. ROUND(9,-1) -> 10. + Longlong_hybrid val1= args[1]->to_longlong_hybrid(); + return !args[1]->null_value && val1.neg(); + } + return true; // ROUND(x,n), where n is not a constant. +} + + +/** + Calculate data type and attributes for INT-alike input. + + @param [IN] preferred - The preferred data type handler for simple cases + such as ROUND(x) and TRUNCATE(x,0), when the input + is short enough to fit into an integer type + (without extending to DECIMAL). + - If `preferred` is not NULL, then the code tries + to preserve the given data type handler and + the data type attributes `preferred_attrs`. + - If `preferred` is NULL, then the code fully + calculates attributes using + args[0]->decimal_precision() and chooses between + INT and BIGINT, depending on attributes. + @param [IN] preferred_attrs - The preferred data type attributes for + simple cases. +*/ +void Item_func_round::fix_arg_int(const Type_handler *preferred, + const Type_std_attributes *preferred_attrs, + bool use_decimal_on_length_increase) +{ + DBUG_ASSERT(args[0]->decimals == 0); + + Type_std_attributes::set(preferred_attrs); + if (!test_if_length_can_increase()) + { + // Preserve the exact data type and attributes + set_handler(preferred); + } + else + { + max_length++; + if (use_decimal_on_length_increase) + set_handler(&type_handler_newdecimal); + else + set_handler(type_handler_long_or_longlong()); + } +} + + +void Item_func_round::fix_arg_hex_hybrid() +{ + DBUG_ASSERT(args[0]->decimals == 0); + DBUG_ASSERT(args[0]->decimal_precision() < DECIMAL_LONGLONG_DIGITS); + DBUG_ASSERT(args[0]->unsigned_flag); // no needs to add sign length + bool length_can_increase= test_if_length_can_increase(); + max_length= args[0]->decimal_precision() + MY_TEST(length_can_increase); + unsigned_flag= true; + decimals= 0; + if (length_can_increase && args[0]->max_length >= 8) + set_handler(&type_handler_newdecimal); + else + set_handler(type_handler_long_or_longlong()); +} + + +double my_double_round(double value, longlong dec, bool dec_unsigned, + bool truncate) +{ + double tmp; + bool dec_negative= (dec < 0) && !dec_unsigned; + ulonglong abs_dec= dec_negative ? -dec : dec; + /* + tmp2 is here to avoid return the value with 80 bit precision + This will fix that the test round(0.1,1) = round(0.1,1) is true + Tagging with volatile is no guarantee, it may still be optimized away... + */ + volatile double tmp2; + + tmp=(abs_dec < array_elements(log_10) ? + log_10[abs_dec] : pow(10.0,(double) abs_dec)); + + // Pre-compute these, to avoid optimizing away e.g. 'floor(v/tmp) * tmp'. + volatile double value_div_tmp= value / tmp; + volatile double value_mul_tmp= value * tmp; + + if (!dec_negative && std::isinf(tmp)) // "dec" is too large positive number + return value; + + if (dec_negative && std::isinf(tmp)) + tmp2= 0.0; + else if (!dec_negative && std::isinf(value_mul_tmp)) + tmp2= value; + else if (truncate) + { + if (value >= 0.0) + tmp2= dec < 0 ? floor(value_div_tmp) * tmp : floor(value_mul_tmp) / tmp; + else + tmp2= dec < 0 ? ceil(value_div_tmp) * tmp : ceil(value_mul_tmp) / tmp; + } + else + tmp2=dec < 0 ? rint(value_div_tmp) * tmp : rint(value_mul_tmp) / tmp; + + return tmp2; +} + + +double Item_func_round::real_op() +{ + double value= args[0]->val_real(); + + if (!(null_value= args[0]->null_value)) + { + longlong dec= args[1]->val_int(); + if (!(null_value= args[1]->null_value)) + return my_double_round(value, dec, args[1]->unsigned_flag, truncate); + } + return 0.0; +} + +/* + Rounds a given value to a power of 10 specified as the 'to' argument, + avoiding overflows when the value is close to the ulonglong range boundary. +*/ + +static inline ulonglong my_unsigned_round(ulonglong value, ulonglong to) +{ + ulonglong tmp= value / to * to; + return (value - tmp < (to >> 1)) ? tmp : tmp + to; +} + + +longlong Item_func_round::int_op() +{ + longlong value= args[0]->val_int(); + longlong dec= args[1]->val_int(); + decimals= 0; + ulonglong abs_dec; + if ((null_value= args[0]->null_value || args[1]->null_value)) + return 0; + if ((dec >= 0) || args[1]->unsigned_flag) + return value; // integer have not digits after point + + abs_dec= Longlong(dec).abs(); // Avoid undefined behavior + longlong tmp; + + if(abs_dec >= array_elements(log_10_int)) + return 0; + + tmp= log_10_int[abs_dec]; + + if (truncate) + value= (unsigned_flag) ? + ((ulonglong) value / tmp) * tmp : (value / tmp) * tmp; + else + value= (unsigned_flag || value >= 0) ? + my_unsigned_round((ulonglong) value, tmp) : + -(longlong) my_unsigned_round((ulonglong) -value, tmp); + return value; +} + + +my_decimal *Item_func_round::decimal_op(my_decimal *decimal_value) +{ + VDec value(args[0]); + longlong dec= args[1]->val_int(); + if (dec >= 0 || args[1]->unsigned_flag) + dec= MY_MIN((ulonglong) dec, decimals); + else if (dec < INT_MIN) + dec= INT_MIN; + + if (!(null_value= (value.is_null() || args[1]->null_value || + value.round_to(decimal_value, (int) dec, + truncate ? TRUNCATE : HALF_UP) > 1))) + return decimal_value; + return 0; +} + + +bool Item_func_round::time_op(THD *thd, MYSQL_TIME *to) +{ + DBUG_ASSERT(args[0]->type_handler()->mysql_timestamp_type() == + MYSQL_TIMESTAMP_TIME); + Time::Options_for_round opt(truncate ? TIME_FRAC_TRUNCATE : TIME_FRAC_ROUND); + Longlong_hybrid_null dec= args[1]->to_longlong_hybrid_null(); + Time *tm= new (to) Time(thd, args[0], opt, + dec.to_uint(TIME_SECOND_PART_DIGITS)); + null_value= !tm->is_valid_time() || dec.is_null(); + DBUG_ASSERT(maybe_null() || !null_value); + return null_value; +} + + +bool Item_func_round::date_op(THD *thd, MYSQL_TIME *to, date_mode_t fuzzydate) +{ + DBUG_ASSERT(args[0]->type_handler()->mysql_timestamp_type() == + MYSQL_TIMESTAMP_DATETIME); + Datetime::Options opt(thd, truncate ? TIME_FRAC_TRUNCATE : TIME_FRAC_ROUND); + Longlong_hybrid_null dec= args[1]->to_longlong_hybrid_null(); + Datetime *tm= new (to) Datetime(thd, args[0], opt, + dec.to_uint(TIME_SECOND_PART_DIGITS)); + null_value= !tm->is_valid_datetime() || dec.is_null(); + DBUG_ASSERT(maybe_null() || !null_value); + return null_value; +} + + +bool Item_func_round::native_op(THD *thd, Native *to) +{ + // TODO: turn Item_func_round into Item_handled_func eventually. + if (type_handler()->mysql_timestamp_type() == MYSQL_TIMESTAMP_TIME) + return Time(thd, this).to_native(to, decimals); + DBUG_ASSERT(0); + return true; +} + + +void Item_func_rand::seed_random(Item *arg) +{ + /* + TODO: do not do reinit 'rand' for every execute of PS/SP if + args[0] is a constant. + */ + uint32 tmp= (uint32) arg->val_int(); +#ifdef WITH_WSREP + if (WSREP_ON) + { + THD *thd= current_thd; + if (WSREP(thd)) + { + if (wsrep_thd_is_applying(thd)) + tmp= thd->wsrep_rand; + else + thd->wsrep_rand= tmp; + } + } +#endif /* WITH_WSREP */ + + my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L), + (uint32) (tmp*0x10000001L)); +} + + +bool Item_func_rand::fix_fields(THD *thd,Item **ref) +{ + if (Item_real_func::fix_fields(thd, ref)) + return TRUE; + used_tables_cache|= RAND_TABLE_BIT; + if (arg_count) + { // Only use argument once in query + /* + Allocate rand structure once: we must use thd->stmt_arena + to create rand in proper mem_root if it's a prepared statement or + stored procedure. + + No need to send a Rand log event if seed was given eg: RAND(seed), + as it will be replicated in the query as such. + */ + if (!rand && !(rand= (struct my_rnd_struct*) + thd->stmt_arena->alloc(sizeof(*rand)))) + return TRUE; + } + else + { + /* + Save the seed only the first time RAND() is used in the query + Once events are forwarded rather than recreated, + the following can be skipped if inside the slave thread + */ + if (!(thd->used & THD::RAND_USED)) + { + thd->used|= THD::RAND_USED; + thd->rand_saved_seed1= thd->rand.seed1; + thd->rand_saved_seed2= thd->rand.seed2; + } + rand= &thd->rand; + } + return FALSE; +} + +void Item_func_rand::update_used_tables() +{ + Item_real_func::update_used_tables(); + used_tables_cache|= RAND_TABLE_BIT; +} + + +double Item_func_rand::val_real() +{ + DBUG_ASSERT(fixed()); + if (arg_count) + { + if (!args[0]->const_item()) + seed_random(args[0]); + else if (first_eval) + { + /* + Constantness of args[0] may be set during JOIN::optimize(), if arg[0] + is a field item of "constant" table. Thus, we have to evaluate + seed_random() for constant arg there but not at the fix_fields method. + */ + first_eval= FALSE; + seed_random(args[0]); + } + } + return my_rnd(rand); +} + +longlong Item_func_sign::val_int() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + null_value=args[0]->null_value; + return value < 0.0 ? -1 : (value > 0 ? 1 : 0); +} + + +double Item_func_units::val_real() +{ + DBUG_ASSERT(fixed()); + double value= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0; + return check_float_overflow(value * mul + add); +} + + +bool Item_func_min_max::fix_attributes(Item **items, uint nitems) +{ + bool rc= Item_func_min_max::type_handler()-> + Item_func_min_max_fix_attributes(current_thd, this, items, nitems); + DBUG_ASSERT(!rc || current_thd->is_error()); + return rc; +} + + +/* + Compare item arguments using DATETIME/DATE/TIME representation. + + DESCRIPTION + Compare item arguments as DATETIME values and return the index of the + least/greatest argument in the arguments array. + The correct DATE/DATETIME value of the found argument is + stored to the value pointer, if latter is provided. + + RETURN + 1 If one of arguments is NULL or there was a execution error + 0 Otherwise +*/ + +bool Item_func_min_max::get_date_native(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + longlong UNINIT_VAR(min_max); + DBUG_ASSERT(fixed()); + + for (uint i=0; i < arg_count ; i++) + { + longlong res= args[i]->val_datetime_packed(thd); + + /* Check if we need to stop (because of error or KILL) and stop the loop */ + if (unlikely(args[i]->null_value)) + return (null_value= 1); + + if (i == 0 || (res < min_max ? cmp_sign : -cmp_sign) > 0) + min_max= res; + } + unpack_time(min_max, ltime, mysql_timestamp_type()); + + if (!(fuzzydate & TIME_TIME_ONLY) && + unlikely((null_value= check_date_with_warn(thd, ltime, fuzzydate, + MYSQL_TIMESTAMP_ERROR)))) + return true; + + return (null_value= 0); +} + + +bool Item_func_min_max::get_time_native(THD *thd, MYSQL_TIME *ltime) +{ + DBUG_ASSERT(fixed()); + + Time value(thd, args[0], Time::Options(thd), decimals); + if (!value.is_valid_time()) + return (null_value= true); + + for (uint i= 1; i < arg_count ; i++) + { + Time tmp(thd, args[i], Time::Options(thd), decimals); + if (!tmp.is_valid_time()) + return (null_value= true); + + int cmp= value.cmp(&tmp); + if ((cmp_sign < 0 ? cmp : -cmp) < 0) + value= tmp; + } + value.copy_to_mysql_time(ltime); + return (null_value= 0); +} + + +String *Item_func_min_max::val_str_native(String *str) +{ + String *UNINIT_VAR(res); + for (uint i=0; i < arg_count ; i++) + { + if (i == 0) + res=args[i]->val_str(str); + else + { + String *res2; + res2= args[i]->val_str(res == str ? &tmp_value : str); + if (res2) + { + int cmp= sortcmp(res,res2,collation.collation); + if ((cmp_sign < 0 ? cmp : -cmp) < 0) + res=res2; + } + } + if ((null_value= args[i]->null_value)) + return 0; + } + res->set_charset(collation.collation); + return res; +} + + +double Item_func_min_max::val_real_native() +{ + double value=0.0; + for (uint i=0; i < arg_count ; i++) + { + if (i == 0) + value= args[i]->val_real(); + else + { + double tmp= args[i]->val_real(); + if (!args[i]->null_value && (tmp < value ? cmp_sign : -cmp_sign) > 0) + value=tmp; + } + if ((null_value= args[i]->null_value)) + break; + } + return value; +} + + +longlong Item_func_min_max::val_int_native() +{ + DBUG_ASSERT(fixed()); + longlong value=0; + for (uint i=0; i < arg_count ; i++) + { + if (i == 0) + value=args[i]->val_int(); + else + { + longlong tmp=args[i]->val_int(); + if (!args[i]->null_value && (tmp < value ? cmp_sign : -cmp_sign) > 0) + value=tmp; + } + if ((null_value= args[i]->null_value)) + break; + } + return value; +} + + +my_decimal *Item_func_min_max::val_decimal_native(my_decimal *dec) +{ + DBUG_ASSERT(fixed()); + my_decimal tmp_buf, *tmp, *UNINIT_VAR(res); + + for (uint i=0; i < arg_count ; i++) + { + if (i == 0) + res= args[i]->val_decimal(dec); + else + { + tmp= args[i]->val_decimal(&tmp_buf); // Zero if NULL + if (tmp && (my_decimal_cmp(tmp, res) * cmp_sign) < 0) + { + if (tmp == &tmp_buf) + { + /* Move value out of tmp_buf as this will be reused on next loop */ + my_decimal2decimal(tmp, dec); + res= dec; + } + else + res= tmp; + } + } + if ((null_value= args[i]->null_value)) + { + res= 0; + break; + } + } + return res; +} + + +bool Item_func_min_max::val_native(THD *thd, Native *native) +{ + DBUG_ASSERT(fixed()); + const Type_handler *handler= Item_hybrid_func::type_handler(); + NativeBuffer cur; + for (uint i= 0; i < arg_count; i++) + { + if (val_native_with_conversion_from_item(thd, args[i], + i == 0 ? native : &cur, + handler)) + return true; + if (i > 0) + { + int cmp= handler->cmp_native(*native, cur); + if ((cmp_sign < 0 ? cmp : -cmp) < 0 && native->copy(cur)) + return null_value= true; + } + } + return null_value= false; +} + + +longlong Item_func_bit_length::val_int() +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&value); + return (null_value= !res) ? 0 : (longlong) res->length() * 8; +} + + +longlong Item_func_octet_length::val_int() +{ + DBUG_ASSERT(fixed()); + String *res=args[0]->val_str(&value); + if (!res) + { + null_value=1; + return 0; /* purecov: inspected */ + } + null_value=0; + return (longlong) res->length(); +} + + +longlong Item_func_char_length::val_int() +{ + DBUG_ASSERT(fixed()); + String *res=args[0]->val_str(&value); + if (!res) + { + null_value=1; + return 0; /* purecov: inspected */ + } + null_value=0; + return (longlong) res->numchars(); +} + + +longlong Item_func_coercibility::val_int() +{ + DBUG_ASSERT(fixed()); + null_value= 0; + return (longlong) args[0]->collation.derivation; +} + + +longlong Item_func_locate::val_int() +{ + DBUG_ASSERT(fixed()); + String *a=args[0]->val_str(&value1); + String *b=args[1]->val_str(&value2); + if (!a || !b) + { + null_value=1; + return 0; /* purecov: inspected */ + } + null_value=0; + /* must be longlong to avoid truncation */ + longlong start= 0; + longlong start0= 0; + my_match_t match; + + if (arg_count == 3) + { + start0= start= args[2]->val_int(); + + if ((start <= 0) || (start > a->length())) + return 0; + start0--; start--; + + /* start is now sufficiently valid to pass to charpos function */ + start= a->charpos((int) start); + + if (start + b->length() > a->length()) + return 0; + } + + if (!b->length()) // Found empty string at start + return start + 1; + + if (!cmp_collation.collation->instr(a->ptr() + start, + (uint) (a->length() - start), + b->ptr(), b->length(), + &match, 1)) + return 0; + return (longlong) match.mb_len + start0 + 1; +} + + +void Item_func_locate::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("locate(")); + args[1]->print(str, query_type); + str->append(','); + args[0]->print(str, query_type); + if (arg_count == 3) + { + str->append(','); + args[2]->print(str, query_type); + } + str->append(')'); +} + + +longlong Item_func_field::val_int() +{ + DBUG_ASSERT(fixed()); + + if (cmp_type == STRING_RESULT) + { + String *field; + if (!(field= args[0]->val_str(&value))) + return 0; + for (uint i=1 ; i < arg_count ; i++) + { + String *tmp_value=args[i]->val_str(&tmp); + if (tmp_value && !sortcmp(field,tmp_value,cmp_collation.collation)) + return (longlong) (i); + } + } + else if (cmp_type == INT_RESULT) + { + longlong val= args[0]->val_int(); + if (args[0]->null_value) + return 0; + for (uint i=1; i < arg_count ; i++) + { + if (val == args[i]->val_int() && !args[i]->null_value) + return (longlong) (i); + } + } + else if (cmp_type == DECIMAL_RESULT) + { + VDec dec(args[0]); + if (dec.is_null()) + return 0; + my_decimal dec_arg_buf; + for (uint i=1; i < arg_count; i++) + { + my_decimal *dec_arg= args[i]->val_decimal(&dec_arg_buf); + if (!args[i]->null_value && !dec.cmp(dec_arg)) + return (longlong) (i); + } + } + else + { + double val= args[0]->val_real(); + if (args[0]->null_value) + return 0; + for (uint i=1; i < arg_count ; i++) + { + if (val == args[i]->val_real() && !args[i]->null_value) + return (longlong) (i); + } + } + return 0; +} + + +bool Item_func_field::fix_length_and_dec(THD *thd) +{ + base_flags&= ~item_base_t::MAYBE_NULL; + max_length=3; + cmp_type= args[0]->result_type(); + for (uint i=1; i < arg_count ; i++) + cmp_type= item_cmp_type(cmp_type, args[i]->result_type()); + if (cmp_type == STRING_RESULT) + return agg_arg_charsets_for_comparison(cmp_collation, args, arg_count); + return FALSE; +} + + +longlong Item_func_ascii::val_int() +{ + DBUG_ASSERT(fixed()); + String *res=args[0]->val_str(&value); + if (!res) + { + null_value=1; + return 0; + } + null_value=0; + return (longlong) (res->length() ? (uchar) (*res)[0] : (uchar) 0); +} + +longlong Item_func_ord::val_int() +{ + DBUG_ASSERT(fixed()); + String *res=args[0]->val_str(&value); + if (!res) + { + null_value=1; + return 0; + } + null_value=0; + if (!res->length()) return 0; +#ifdef USE_MB + if (res->use_mb()) + { + const char *str=res->ptr(); + uint32 n=0, l=my_ismbchar(res->charset(),str,str+res->length()); + if (!l) + return (longlong)((uchar) *str); + while (l--) + n=(n<<8)|(uint32)((uchar) *str++); + return (longlong) n; + } +#endif + return (longlong) ((uchar) (*res)[0]); +} + + /* Search after a string in a string of strings separated by ',' */ + /* Returns number of found type >= 1 or 0 if not found */ + /* This optimizes searching in enums to bit testing! */ + +bool Item_func_find_in_set::fix_length_and_dec(THD *thd) +{ + decimals=0; + max_length=3; // 1-999 + if (args[0]->const_item() && args[1]->type() == FIELD_ITEM) + { + Field *field= ((Item_field*) args[1])->field; + if (field->real_type() == MYSQL_TYPE_SET) + { + String *find=args[0]->val_str(&value); + if (find) + { + // find is not NULL pointer so args[0] is not a null-value + DBUG_ASSERT(!args[0]->null_value); + enum_value= find_type(((Field_enum*) field)->typelib,find->ptr(), + find->length(), 0); + enum_bit=0; + if (enum_value) + enum_bit= 1ULL << (enum_value-1); + } + } + } + return agg_arg_charsets_for_comparison(cmp_collation, args, 2); +} + +static const char separator=','; + +longlong Item_func_find_in_set::val_int() +{ + DBUG_ASSERT(fixed()); + if (enum_value) + { + // enum_value is set iff args[0]->const_item() in fix_length_and_dec(). + DBUG_ASSERT(args[0]->const_item()); + + ulonglong tmp= (ulonglong) args[1]->val_int(); + null_value= args[1]->null_value; + /* + No need to check args[0]->null_value since enum_value is set iff + args[0] is a non-null const item. Note: no DBUG_ASSERT on + args[0]->null_value here because args[0] may have been replaced + by an Item_cache on which val_int() has not been called. See + BUG#11766317 + */ + if (!null_value) + { + if (tmp & enum_bit) + return enum_value; + } + return 0L; + } + + String *find=args[0]->val_str(&value); + String *buffer=args[1]->val_str(&value2); + if (!find || !buffer) + { + null_value=1; + return 0; /* purecov: inspected */ + } + null_value=0; + + if ((int) (buffer->length() - find->length()) >= 0) + { + my_wc_t wc= 0; + CHARSET_INFO *cs= cmp_collation.collation; + const char *str_begin= buffer->ptr(); + const char *str_end= buffer->ptr(); + const char *real_end= str_end+buffer->length(); + const char *find_str= find->ptr(); + uint find_str_len= find->length(); + int position= 0; + while (1) + { + int symbol_len; + if ((symbol_len= cs->mb_wc(&wc, (uchar*) str_end, + (uchar*) real_end)) > 0) + { + const char *substr_end= str_end + symbol_len; + bool is_last_item= (substr_end == real_end); + bool is_separator= (wc == (my_wc_t) separator); + if (is_separator || is_last_item) + { + position++; + if (is_last_item && !is_separator) + str_end= substr_end; + if (!cs->strnncoll(str_begin, (uint) (str_end - str_begin), + find_str, find_str_len)) + return (longlong) position; + else + str_begin= substr_end; + } + str_end= substr_end; + } + else if (str_end - str_begin == 0 && + find_str_len == 0 && + wc == (my_wc_t) separator) + return (longlong) ++position; + else + return 0; + } + } + return 0; +} + + +class Func_handler_bit_count_int_to_slong: + public Item_handled_func::Handler_slong2 +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return item->arguments()[0]->to_longlong_null().bit_count(); + } +}; + + +class Func_handler_bit_count_decimal_to_slong: + public Item_handled_func::Handler_slong2 +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return VDec(item->arguments()[0]).to_xlonglong_null().bit_count(); + } +}; + + +bool Item_func_bit_count::fix_length_and_dec(THD *thd) +{ + static Func_handler_bit_count_int_to_slong ha_int_to_slong; + static Func_handler_bit_count_decimal_to_slong ha_dec_to_slong; + set_func_handler(args[0]->cmp_type() == INT_RESULT ? + (const Handler *) &ha_int_to_slong : + (const Handler *) &ha_dec_to_slong); + return m_func_handler->fix_length_and_dec(this); +} + + +/**************************************************************************** +** Functions to handle dynamic loadable functions +** Original source by: Alexis Mikhailov +** Rewritten by monty. +****************************************************************************/ + +#ifdef HAVE_DLOPEN + +void udf_handler::cleanup() +{ + if (!not_original) + { + if (initialized) + { + if (u_d->func_deinit != NULL) + { + Udf_func_deinit deinit= u_d->func_deinit; + (*deinit)(&initid); + } + free_udf(u_d); + initialized= FALSE; + } + if (buffers) // Because of bug in ecc + delete [] buffers; + buffers= 0; + } +} + + +bool +udf_handler::fix_fields(THD *thd, Item_func_or_sum *func, + uint arg_count, Item **arguments) +{ + uchar buff[STACK_BUFF_ALLOC]; // Max argument in function + DBUG_ENTER("Item_udf_func::fix_fields"); + + if (check_stack_overrun(thd, STACK_MIN_SIZE, buff)) + DBUG_RETURN(TRUE); // Fatal error flag is set! + + udf_func *tmp_udf=find_udf(u_d->name.str,u_d->name.length,1); + + if (!tmp_udf) + { + my_error(ER_CANT_FIND_UDF, MYF(0), u_d->name.str); + DBUG_RETURN(TRUE); + } + u_d=tmp_udf; + args=arguments; + + /* Fix all arguments */ + func->base_flags&= ~item_base_t::MAYBE_NULL; + func->used_tables_and_const_cache_init(); + + if ((f_args.arg_count=arg_count)) + { + if (!(f_args.arg_type= (Item_result*) + thd->alloc(f_args.arg_count*sizeof(Item_result)))) + + { + err_exit: + free_udf(u_d); + DBUG_RETURN(TRUE); + } + uint i; + Item **arg,**arg_end; + for (i=0, arg=arguments, arg_end=arguments+arg_count; + arg != arg_end ; + arg++,i++) + { + if ((*arg)->fix_fields_if_needed_for_scalar(thd, arg)) + DBUG_RETURN(true); + // we can't assign 'item' before, because fix_fields() can change arg + Item *item= *arg; + /* + TODO: We should think about this. It is not always + right way just to set an UDF result to return my_charset_bin + if one argument has binary sorting order. + The result collation should be calculated according to arguments + derivations in some cases and should not in other cases. + Moreover, some arguments can represent a numeric input + which doesn't effect the result character set and collation. + There is no a general rule for UDF. Everything depends on + the particular user defined function. + */ + if (item->collation.collation->state & MY_CS_BINSORT) + func->collation.set(&my_charset_bin); + func->base_flags|= item->base_flags & item_base_t::MAYBE_NULL; + func->with_flags|= item->with_flags; + func->used_tables_and_const_cache_join(item); + f_args.arg_type[i]=item->result_type(); + } + buffers=new (thd->mem_root) String[arg_count]; + if (!buffers || + !multi_alloc_root(thd->mem_root, + &f_args.args, arg_count * sizeof(char *), + &f_args.lengths, arg_count * sizeof(long), + &f_args.maybe_null, arg_count * sizeof(char), + &num_buffer, arg_count * sizeof(double), + &f_args.attributes, arg_count * sizeof(char *), + &f_args.attribute_lengths, arg_count * sizeof(long), + NullS)) + goto err_exit; + } + if (func->fix_length_and_dec(thd)) + DBUG_RETURN(TRUE); + initid.max_length=func->max_length; + initid.maybe_null=func->maybe_null(); + initid.const_item=func->const_item_cache; + initid.decimals=func->decimals; + initid.ptr=0; + for (uint i1= 0 ; i1 < arg_count ; i1++) + buffers[i1].set_thread_specific(); + + if (u_d->func_init) + { + char init_msg_buff[MYSQL_ERRMSG_SIZE]; + char *to=num_buffer; + for (uint i=0; i < arg_count; i++) + { + /* + For a constant argument i, args->args[i] points to the argument value. + For non-constant, args->args[i] is NULL. + */ + f_args.args[i]= NULL; /* Non-const unless updated below. */ + + f_args.lengths[i]= arguments[i]->max_length; + f_args.maybe_null[i]= (char) arguments[i]->maybe_null(); + f_args.attributes[i]= arguments[i]->name.str; + f_args.attribute_lengths[i]= (ulong)arguments[i]->name.length; + + if (arguments[i]->const_item()) + { + switch (arguments[i]->result_type()) { + case STRING_RESULT: + case DECIMAL_RESULT: + { + String *res= arguments[i]->val_str(&buffers[i]); + if (arguments[i]->null_value) + continue; + f_args.args[i]= (char*) res->c_ptr_safe(); + f_args.lengths[i]= res->length(); + break; + } + case INT_RESULT: + *((longlong*) to)= arguments[i]->val_int(); + if (arguments[i]->null_value) + continue; + f_args.args[i]= to; + to+= ALIGN_SIZE(sizeof(longlong)); + break; + case REAL_RESULT: + *((double*) to)= arguments[i]->val_real(); + if (arguments[i]->null_value) + continue; + f_args.args[i]= to; + to+= ALIGN_SIZE(sizeof(double)); + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + break; + } + } + } + Udf_func_init init= u_d->func_init; + if (unlikely((error=(uchar) init(&initid, &f_args, init_msg_buff)))) + { + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), + u_d->name.str, init_msg_buff); + goto err_exit; + } + func->max_length=MY_MIN(initid.max_length,MAX_BLOB_WIDTH); + func->set_maybe_null(initid.maybe_null); + /* + The above call for init() can reset initid.const_item to "false", + e.g. when the UDF function wants to be non-deterministic. + See sequence_init() in udf_example.cc. + */ + func->const_item_cache= initid.const_item; + func->decimals=MY_MIN(initid.decimals,NOT_FIXED_DEC); + } + initialized=1; + if (unlikely(error)) + { + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), + u_d->name.str, ER_THD(thd, ER_UNKNOWN_ERROR)); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +bool udf_handler::get_arguments() +{ + if (unlikely(error)) + return 1; // Got an error earlier + char *to= num_buffer; + uint str_count=0; + for (uint i=0; i < f_args.arg_count; i++) + { + f_args.args[i]=0; + switch (f_args.arg_type[i]) { + case STRING_RESULT: + case DECIMAL_RESULT: + { + String *res=args[i]->val_str(&buffers[str_count++]); + if (!(args[i]->null_value)) + { + f_args.args[i]= (char*) res->ptr(); + f_args.lengths[i]= res->length(); + } + else + { + f_args.lengths[i]= 0; + } + break; + } + case INT_RESULT: + *((longlong*) to) = args[i]->val_int(); + if (!args[i]->null_value) + { + f_args.args[i]=to; + to+= ALIGN_SIZE(sizeof(longlong)); + } + break; + case REAL_RESULT: + *((double*) to)= args[i]->val_real(); + if (!args[i]->null_value) + { + f_args.args[i]=to; + to+= ALIGN_SIZE(sizeof(double)); + } + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + break; + } + } + return 0; +} + +/** + @return + (String*)NULL in case of NULL values +*/ +String *udf_handler::val_str(String *str,String *save_str) +{ + uchar is_null_tmp=0; + ulong res_length; + DBUG_ENTER("udf_handler::val_str"); + + if (get_arguments()) + DBUG_RETURN(0); + char * (*func)(UDF_INIT *, UDF_ARGS *, char *, ulong *, uchar *, uchar *)= + (char* (*)(UDF_INIT *, UDF_ARGS *, char *, ulong *, uchar *, uchar *)) + u_d->func; + + if ((res_length=str->alloced_length()) < MAX_FIELD_WIDTH) + { // This happens VERY seldom + if (str->alloc(MAX_FIELD_WIDTH)) + { + error=1; + DBUG_RETURN(0); + } + } + char *res=func(&initid, &f_args, (char*) str->ptr(), &res_length, + &is_null_tmp, &error); + DBUG_PRINT("info", ("udf func returned, res_length: %lu", res_length)); + if (is_null_tmp || !res || unlikely(error)) // The !res is for safety + { + DBUG_PRINT("info", ("Null or error")); + DBUG_RETURN(0); + } + if (res == str->ptr()) + { + str->length(res_length); + DBUG_PRINT("exit", ("str: %*.s", (int) str->length(), str->ptr())); + DBUG_RETURN(str); + } + save_str->set(res, res_length, str->charset()); + DBUG_PRINT("exit", ("save_str: %s", save_str->ptr())); + DBUG_RETURN(save_str); +} + + +/* + For the moment, UDF functions are returning DECIMAL values as strings +*/ + +my_decimal *udf_handler::val_decimal(my_bool *null_value, my_decimal *dec_buf) +{ + char buf[DECIMAL_MAX_STR_LENGTH+1], *end; + ulong res_length= DECIMAL_MAX_STR_LENGTH; + + if (get_arguments()) + { + *null_value=1; + return 0; + } + char *(*func)(UDF_INIT *, UDF_ARGS *, char *, ulong *, uchar *, uchar *)= + (char* (*)(UDF_INIT *, UDF_ARGS *, char *, ulong *, uchar *, uchar *)) + u_d->func; + + char *res= func(&initid, &f_args, buf, &res_length, &is_null, &error); + if (is_null || unlikely(error)) + { + *null_value= 1; + return 0; + } + end= res+ res_length; + str2my_decimal(E_DEC_FATAL_ERROR, res, dec_buf, &end); + return dec_buf; +} + + +void Item_udf_func::cleanup() +{ + udf.cleanup(); + Item_func::cleanup(); +} + + +void Item_udf_func::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + for (uint i=0 ; i < arg_count ; i++) + { + if (i != 0) + str->append(','); + args[i]->print_item_w_name(str, query_type); + } + str->append(')'); +} + + +double Item_func_udf_float::val_real() +{ + double res; + my_bool tmp_null_value; + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_func_udf_float::val"); + DBUG_PRINT("info",("result_type: %d arg_count: %d", + args[0]->result_type(), arg_count)); + res= udf.val(&tmp_null_value); + null_value= tmp_null_value; + DBUG_RETURN(res); +} + + +String *Item_func_udf_float::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + double nr= val_real(); + if (null_value) + return 0; /* purecov: inspected */ + str->set_real(nr,decimals,&my_charset_bin); + return str; +} + + +longlong Item_func_udf_int::val_int() +{ + longlong res; + my_bool tmp_null_value; + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_func_udf_int::val_int"); + res= udf.val_int(&tmp_null_value); + null_value= tmp_null_value; + DBUG_RETURN(res); +} + + +String *Item_func_udf_int::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + longlong nr=val_int(); + if (null_value) + return 0; + str->set_int(nr, unsigned_flag, &my_charset_bin); + return str; +} + + +my_decimal *Item_func_udf_decimal::val_decimal(my_decimal *dec_buf) +{ + my_decimal *res; + my_bool tmp_null_value; + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_func_udf_decimal::val_decimal"); + DBUG_PRINT("info",("result_type: %d arg_count: %d", + args[0]->result_type(), arg_count)); + + res= udf.val_decimal(&tmp_null_value, dec_buf); + null_value= tmp_null_value; + DBUG_RETURN(res); +} + + +/* Default max_length is max argument length */ + +bool Item_func_udf_str::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_udf_str::fix_length_and_dec"); + max_length=0; + for (uint i = 0; i < arg_count; i++) + set_if_bigger(max_length,args[i]->max_length); + DBUG_RETURN(FALSE); +} + +String *Item_func_udf_str::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res=udf.val_str(str,&str_value); + null_value = !res; + return res; +} + + +/** + @note + This has to come last in the udf_handler methods, or C for AIX + version 6.0.0.0 fails to compile with debugging enabled. (Yes, really.) +*/ + +udf_handler::~udf_handler() +{ + /* Everything should be properly cleaned up by this moment. */ + DBUG_ASSERT(not_original || !(initialized || buffers)); +} + +#else +bool udf_handler::get_arguments() { return 0; } +#endif /* HAVE_DLOPEN */ + + +longlong Item_master_pos_wait::val_int() +{ + DBUG_ASSERT(fixed()); + THD* thd = current_thd; + String *log_name = args[0]->val_str(&value); + int event_count= 0; + DBUG_ENTER("Item_master_pos_wait::val_int"); + + null_value=0; + if (thd->slave_thread || !log_name || !log_name->length()) + { + null_value = 1; + DBUG_RETURN(0); + } +#ifdef HAVE_REPLICATION + longlong pos = (ulong)args[1]->val_int(); + longlong timeout = (arg_count>=3) ? args[2]->val_int() : 0 ; + String connection_name_buff; + LEX_CSTRING connection_name; + Master_info *mi= NULL; + if (arg_count >= 4) + { + String *con; + if (!(con= args[3]->val_str(&connection_name_buff))) + goto err; + + connection_name.str= con->ptr(); + connection_name.length= con->length(); + if (check_master_connection_name(&connection_name)) + { + my_error(ER_WRONG_ARGUMENTS, MYF(ME_WARNING), + "MASTER_CONNECTION_NAME"); + goto err; + } + } + else + connection_name= thd->variables.default_master_connection; + + if (!(mi= get_master_info(&connection_name, Sql_condition::WARN_LEVEL_WARN))) + goto err; + + if ((event_count = mi->rli.wait_for_pos(thd, log_name, pos, timeout)) == -2) + { + null_value = 1; + event_count=0; + } + mi->release(); +#endif + DBUG_PRINT("exit", ("event_count: %d null_value: %d", event_count, + (int) null_value)); + DBUG_RETURN(event_count); + +#ifdef HAVE_REPLICATION +err: + { + null_value = 1; + DBUG_RETURN(0); + } +#endif +} + + +longlong Item_master_gtid_wait::val_int() +{ + DBUG_ASSERT(fixed()); + longlong result= 0; + String *gtid_pos __attribute__((unused)) = args[0]->val_str(&value); + DBUG_ENTER("Item_master_gtid_wait::val_int"); + + if (args[0]->null_value) + { + null_value= 1; + DBUG_RETURN(0); + } + + null_value=0; +#ifdef HAVE_REPLICATION + THD* thd= current_thd; + longlong timeout_us; + + if (arg_count==2 && !args[1]->null_value) + timeout_us= (longlong)(1e6*args[1]->val_real()); + else + timeout_us= (longlong)-1; + + result= rpl_global_gtid_waiting.wait_for_pos(thd, gtid_pos, timeout_us); +#else + null_value= 0; +#endif /* REPLICATION */ + DBUG_RETURN(result); +} + + +/** + Enables a session to wait on a condition until a timeout or a network + disconnect occurs. + + @remark The connection is polled every m_interrupt_interval nanoseconds. +*/ + +class Interruptible_wait +{ + THD *m_thd; + struct timespec m_abs_timeout; + static const ulonglong m_interrupt_interval; + + public: + Interruptible_wait(THD *thd) + : m_thd(thd) {} + + ~Interruptible_wait() = default; + + public: + /** + Set the absolute timeout. + + @param timeout The amount of time in nanoseconds to wait + */ + void set_timeout(ulonglong timeout) + { + /* + Calculate the absolute system time at the start so it can + be controlled in slices. It relies on the fact that once + the absolute time passes, the timed wait call will fail + automatically with a timeout error. + */ + set_timespec_nsec(m_abs_timeout, timeout); + } + + /** The timed wait. */ + int wait(mysql_cond_t *, mysql_mutex_t *); +}; + + +/** Time to wait before polling the connection status. */ +const ulonglong Interruptible_wait::m_interrupt_interval= 5 * 1000000000ULL; + + +/** + Wait for a given condition to be signaled. + + @param cond The condition variable to wait on. + @param mutex The associated mutex. + + @remark The absolute timeout is preserved across calls. + + @retval return value from mysql_cond_timedwait +*/ + +int Interruptible_wait::wait(mysql_cond_t *cond, mysql_mutex_t *mutex) +{ + int error; + struct timespec timeout; + + while (1) + { + /* Wait for a fixed interval. */ + set_timespec_nsec(timeout, m_interrupt_interval); + + /* But only if not past the absolute timeout. */ + if (cmp_timespec(timeout, m_abs_timeout) > 0) + timeout= m_abs_timeout; + + error= mysql_cond_timedwait(cond, mutex, &timeout); + if (m_thd->check_killed()) + break; + if (error == ETIMEDOUT || error == ETIME) + { + /* Return error if timed out or connection is broken. */ + if (!cmp_timespec(timeout, m_abs_timeout) || !m_thd->is_connected()) + break; + } + /* Otherwise, propagate status to the caller. */ + else + break; + } + + return error; +} + + +/** + For locks with EXPLICIT duration, MDL returns a new ticket + every time a lock is granted. This allows to implement recursive + locks without extra allocation or additional data structures, such + as below. However, if there are too many tickets in the same + MDL_context, MDL_context::find_ticket() is getting too slow, + since it's using a linear search. + This is why a separate structure is allocated for a user + level lock, and before requesting a new lock from MDL, + GET_LOCK() checks thd->ull_hash if such lock is already granted, + and if so, simply increments a reference counter. +*/ + +class User_level_lock +{ +public: + MDL_ticket *lock; + int refs; +}; + + +/** Extract a hash key from User_level_lock. */ + +uchar *ull_get_key(const uchar *ptr, size_t *length, + my_bool not_used __attribute__((unused))) +{ + User_level_lock *ull = (User_level_lock*) ptr; + MDL_key *key = ull->lock->get_key(); + *length= key->length(); + return (uchar*) key->ptr(); +} + + +/** + Release all user level locks for this THD. +*/ + +void mysql_ull_cleanup(THD *thd) +{ + User_level_lock *ull; + DBUG_ENTER("mysql_ull_cleanup"); + + for (uint i= 0; i < thd->ull_hash.records; i++) + { + ull = (User_level_lock*) my_hash_element(&thd->ull_hash, i); + thd->mdl_context.release_lock(ull->lock); + my_free(ull); + } + + my_hash_free(&thd->ull_hash); + + DBUG_VOID_RETURN; +} + + +/** + Set explicit duration for metadata locks corresponding to + user level locks to protect them from being released at the end + of transaction. +*/ + +void mysql_ull_set_explicit_lock_duration(THD *thd) +{ + User_level_lock *ull; + DBUG_ENTER("mysql_ull_set_explicit_lock_duration"); + + for (uint i= 0; i < thd->ull_hash.records; i++) + { + ull= (User_level_lock*) my_hash_element(&thd->ull_hash, i); + thd->mdl_context.set_lock_duration(ull->lock, MDL_EXPLICIT); + } + DBUG_VOID_RETURN; +} + + +/** + When MDL detects a lock wait timeout, it pushes + an error into the statement diagnostics area. + For GET_LOCK(), lock wait timeout is not an error, + but a special return value (0). + Similarly, killing get_lock wait is not an error either, + but a return value NULL. + Capture and suppress lock wait timeouts and kills. +*/ + +class Lock_wait_timeout_handler: public Internal_error_handler +{ +public: + Lock_wait_timeout_handler() :m_lock_wait_timeout(false) {} + + bool m_lock_wait_timeout; + + bool handle_condition(THD * /* thd */, uint sql_errno, + const char * /* sqlstate */, + Sql_condition::enum_warning_level* /* level */, + const char *message, + Sql_condition ** /* cond_hdl */); +}; + +bool +Lock_wait_timeout_handler:: +handle_condition(THD *thd, uint sql_errno, + const char * /* sqlstate */, + Sql_condition::enum_warning_level* /* level */, + const char *message, + Sql_condition ** /* cond_hdl */) +{ + if (sql_errno == ER_LOCK_WAIT_TIMEOUT) + { + m_lock_wait_timeout= true; + return true; /* condition handled */ + } + if (thd->is_killed()) + return true; + + return false; +} + + +static int ull_name_ok(String *name) +{ + if (!name || !name->length()) + return 0; + + if (name->length() > NAME_LEN) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), name->c_ptr_safe()); + return 0; + } + return 1; +} + + +/** + Get a user level lock. + + @retval + 1 : Got lock + @retval + 0 : Timeout + @retval + NULL : Error +*/ + +longlong Item_func_get_lock::val_int() +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&value); + double timeout= args[1]->val_real(); + THD *thd= current_thd; + User_level_lock *ull; + DBUG_ENTER("Item_func_get_lock::val_int"); + + null_value= 1; + /* + In slave thread no need to get locks, everything is serialized. Anyway + there is no way to make GET_LOCK() work on slave like it did on master + (i.e. make it return exactly the same value) because we don't have the + same other concurrent threads environment. No matter what we return here, + it's not guaranteed to be same as on master. + */ + if (thd->slave_thread) + { + null_value= 0; + DBUG_RETURN(1); + } + + if (args[1]->null_value || + (!args[1]->unsigned_flag && ((longlong) timeout < 0))) + { + char buf[22]; + if (args[1]->null_value) + strmov(buf, "NULL"); + else + llstr(((longlong) timeout), buf); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_TYPE, ER(ER_WRONG_VALUE_FOR_TYPE), + "timeout", buf, "get_lock"); + null_value= 1; + DBUG_RETURN(0); + } + + if (!ull_name_ok(res)) + DBUG_RETURN(0); + DBUG_PRINT("enter", ("lock: %.*s", res->length(), res->ptr())); + /* HASH entries are of type User_level_lock. */ + if (! my_hash_inited(&thd->ull_hash) && + my_hash_init(key_memory_User_level_lock, &thd->ull_hash, + &my_charset_bin, 16 /* small hash */, 0, 0, ull_get_key, + NULL, 0)) + { + DBUG_RETURN(0); + } + + MDL_request ull_request; + MDL_REQUEST_INIT(&ull_request, MDL_key::USER_LOCK, res->c_ptr_safe(), "", + MDL_SHARED_NO_WRITE, MDL_EXPLICIT); + MDL_key *ull_key= &ull_request.key; + + + if ((ull= (User_level_lock*) + my_hash_search(&thd->ull_hash, ull_key->ptr(), ull_key->length()))) + { + /* Recursive lock */ + ull->refs++; + null_value= 0; + DBUG_PRINT("info", ("recursive lock, ref-count: %d", (int) ull->refs)); + DBUG_RETURN(1); + } + + Lock_wait_timeout_handler lock_wait_timeout_handler; + thd->push_internal_handler(&lock_wait_timeout_handler); + bool error= thd->mdl_context.acquire_lock(&ull_request, timeout); + (void) thd->pop_internal_handler(); + if (unlikely(error)) + { + if (lock_wait_timeout_handler.m_lock_wait_timeout) + null_value= 0; + DBUG_RETURN(0); + } + + ull= (User_level_lock*) my_malloc(key_memory_User_level_lock, + sizeof(User_level_lock), + MYF(MY_WME|MY_THREAD_SPECIFIC)); + if (ull == NULL) + { + thd->mdl_context.release_lock(ull_request.ticket); + DBUG_RETURN(0); + } + + ull->lock= ull_request.ticket; + ull->refs= 1; + + if (my_hash_insert(&thd->ull_hash, (uchar*) ull)) + { + thd->mdl_context.release_lock(ull->lock); + my_free(ull); + DBUG_RETURN(0); + } + null_value= 0; + + DBUG_RETURN(1); +} + + +/** + Release all user level locks. + @return + - N if N-lock released + - 0 if lock wasn't held +*/ +longlong Item_func_release_all_locks::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + ulong num_unlocked= 0; + DBUG_ENTER("Item_func_release_all_locks::val_int"); + for (size_t i= 0; i < thd->ull_hash.records; i++) + { + auto ull= (User_level_lock *) my_hash_element(&thd->ull_hash, i); + thd->mdl_context.release_lock(ull->lock); + num_unlocked+= ull->refs; + my_free(ull); + } + my_hash_free(&thd->ull_hash); + DBUG_RETURN(num_unlocked); +} + + +/** + Release a user level lock. + @return + - 1 if lock released + - 0 if lock wasn't held + - (SQL) NULL if no such lock +*/ + +longlong Item_func_release_lock::val_int() +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&value); + THD *thd= current_thd; + DBUG_ENTER("Item_func_release_lock::val_int"); + null_value= 1; + + if (!ull_name_ok(res)) + DBUG_RETURN(0); + + DBUG_PRINT("enter", ("lock: %.*s", res->length(), res->ptr())); + + MDL_key ull_key; + ull_key.mdl_key_init(MDL_key::USER_LOCK, res->c_ptr_safe(), ""); + + User_level_lock *ull; + + if (!my_hash_inited(&thd->ull_hash) || + !(ull= + (User_level_lock*) my_hash_search(&thd->ull_hash, + ull_key.ptr(), ull_key.length()))) + { + null_value= thd->mdl_context.get_lock_owner(&ull_key) == 0; + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("ref count: %d", (int) ull->refs)); + null_value= 0; + if (--ull->refs == 0) + { + my_hash_delete(&thd->ull_hash, (uchar*) ull); + thd->mdl_context.release_lock(ull->lock); + my_free(ull); + } + DBUG_RETURN(1); +} + + +/** + Check a user level lock. + + Sets null_value=TRUE on error. + + @retval + 1 Available + @retval + 0 Already taken, or error +*/ + +longlong Item_func_is_free_lock::val_int() +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&value); + THD *thd= current_thd; + null_value= 1; + + if (!ull_name_ok(res)) + return 0; + + MDL_key ull_key; + ull_key.mdl_key_init(MDL_key::USER_LOCK, res->c_ptr_safe(), ""); + + null_value= 0; + return thd->mdl_context.get_lock_owner(&ull_key) == 0; +} + + +longlong Item_func_is_used_lock::val_int() +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&value); + THD *thd= current_thd; + null_value= 1; + + if (!ull_name_ok(res)) + return 0; + + MDL_key ull_key; + ull_key.mdl_key_init(MDL_key::USER_LOCK, res->c_ptr_safe(), ""); + ulong thread_id = thd->mdl_context.get_lock_owner(&ull_key); + if (thread_id == 0) + return 0; + + null_value= 0; + return thread_id; +} + + +longlong Item_func_last_insert_id::val_int() +{ + THD *thd= current_thd; + DBUG_ASSERT(fixed()); + if (arg_count) + { + longlong value= args[0]->val_int(); + null_value= args[0]->null_value; + /* + LAST_INSERT_ID(X) must affect the client's mysql_insert_id() as + documented in the manual. We don't want to touch + first_successful_insert_id_in_cur_stmt because it would make + LAST_INSERT_ID(X) take precedence over an generated auto_increment + value for this row. + */ + thd->arg_of_last_insert_id_function= TRUE; + thd->first_successful_insert_id_in_prev_stmt= value; + return value; + } + return + static_cast(thd->read_first_successful_insert_id_in_prev_stmt()); +} + + +bool Item_func_last_insert_id::fix_fields(THD *thd, Item **ref) +{ + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + return Item_int_func::fix_fields(thd, ref); +} + + +/* This function is just used to test speed of different functions */ + +longlong Item_func_benchmark::val_int() +{ + DBUG_ASSERT(fixed()); + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff), &my_charset_bin); + my_decimal tmp_decimal; + THD *thd= current_thd; + ulonglong loop_count; + + loop_count= (ulonglong) args[0]->val_int(); + + if (args[0]->null_value || + (!args[0]->unsigned_flag && (((longlong) loop_count) < 0))) + { + if (!args[0]->null_value) + { + char buff[22]; + llstr(((longlong) loop_count), buff); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_TYPE, + ER_THD(thd, ER_WRONG_VALUE_FOR_TYPE), + "count", buff, "benchmark"); + } + + null_value= 1; + return 0; + } + + null_value=0; + for (ulonglong loop=0 ; loop < loop_count && !thd->killed; loop++) + { + switch (args[1]->result_type()) { + case REAL_RESULT: + (void) args[1]->val_real(); + break; + case INT_RESULT: + (void) args[1]->val_int(); + break; + case STRING_RESULT: + (void) args[1]->val_str(&tmp); + break; + case DECIMAL_RESULT: + (void) args[1]->val_decimal(&tmp_decimal); + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + return 0; + } + } + return 0; +} + + +void Item_func_benchmark::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("benchmark(")); + args[0]->print(str, query_type); + str->append(','); + args[1]->print(str, query_type); + str->append(')'); +} + + +mysql_mutex_t LOCK_item_func_sleep; + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_LOCK_item_func_sleep; + +static PSI_mutex_info item_func_sleep_mutexes[]= +{ + { &key_LOCK_item_func_sleep, "LOCK_item_func_sleep", PSI_FLAG_GLOBAL} +}; + + +static void init_item_func_sleep_psi_keys(void) +{ + const char* category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(item_func_sleep_mutexes); + PSI_server->register_mutex(category, item_func_sleep_mutexes, count); +} +#endif + +static bool item_func_sleep_inited= 0; + + +void item_func_sleep_init(void) +{ +#ifdef HAVE_PSI_INTERFACE + init_item_func_sleep_psi_keys(); +#endif + + mysql_mutex_init(key_LOCK_item_func_sleep, &LOCK_item_func_sleep, MY_MUTEX_INIT_SLOW); + item_func_sleep_inited= 1; +} + + +void item_func_sleep_free(void) +{ + if (item_func_sleep_inited) + { + item_func_sleep_inited= 0; + mysql_mutex_destroy(&LOCK_item_func_sleep); + } +} + + +/** This function is just used to create tests with time gaps. */ + +longlong Item_func_sleep::val_int() +{ + THD *thd= current_thd; + Interruptible_wait timed_cond(thd); + mysql_cond_t cond; + double timeout; + int error; + + DBUG_ASSERT(fixed()); + + timeout= args[0]->val_real(); + /* + On 64-bit OSX mysql_cond_timedwait() waits forever + if passed abstime time has already been exceeded by + the system time. + When given a very short timeout (< 10 mcs) just return + immediately. + We assume that the lines between this test and the call + to mysql_cond_timedwait() will be executed in less than 0.00001 sec. + */ + if (timeout < 0.00001) + return 0; + + timed_cond.set_timeout((ulonglong) (timeout * 1000000000.0)); + + mysql_cond_init(key_item_func_sleep_cond, &cond, NULL); + mysql_mutex_lock(&LOCK_item_func_sleep); + + THD_STAGE_INFO(thd, stage_user_sleep); + thd->mysys_var->current_mutex= &LOCK_item_func_sleep; + thd->mysys_var->current_cond= &cond; + + error= 0; + thd_wait_begin(thd, THD_WAIT_SLEEP); + while (!thd->killed) + { + error= timed_cond.wait(&cond, &LOCK_item_func_sleep); + if (error == ETIMEDOUT || error == ETIME) + break; + error= 0; + } + thd_wait_end(thd); + mysql_mutex_unlock(&LOCK_item_func_sleep); + mysql_mutex_lock(&thd->mysys_var->mutex); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_cond_destroy(&cond); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sleep_inject_query_done_debug_sync", { + debug_sync_set_action + (thd, STRING_WITH_LEN("dispatch_command_end SIGNAL query_done")); + };); +#endif + + return MY_TEST(!error); // Return 1 killed +} + + +bool Item_func_user_var::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function("@", name.str, arg, VCOL_NON_DETERMINISTIC); +} + +#define extra_size sizeof(double) + +user_var_entry *get_variable(HASH *hash, LEX_CSTRING *name, + bool create_if_not_exists) +{ + user_var_entry *entry; + + if (!(entry = (user_var_entry*) my_hash_search(hash, (uchar*) name->str, + name->length)) && + create_if_not_exists) + { + size_t size=ALIGN_SIZE(sizeof(user_var_entry))+name->length+1+extra_size; + if (!my_hash_inited(hash)) + return 0; + if (!(entry = (user_var_entry*) my_malloc(key_memory_user_var_entry, size, + MYF(MY_WME | ME_FATAL | + MY_THREAD_SPECIFIC)))) + return 0; + entry->name.str=(char*) entry+ ALIGN_SIZE(sizeof(user_var_entry))+ + extra_size; + entry->name.length=name->length; + entry->value=0; + entry->length=0; + entry->update_query_id=0; + entry->set_charset(NULL); + entry->unsigned_flag= 0; + /* + If we are here, we were called from a SET or a query which sets a + variable. Imagine it is this: + INSERT INTO t SELECT @a:=10, @a:=@a+1. + Then when we have a Item_func_get_user_var (because of the @a+1) so we + think we have to write the value of @a to the binlog. But before that, + we have a Item_func_set_user_var to create @a (@a:=10), in this we mark + the variable as "already logged" (line below) so that it won't be logged + by Item_func_get_user_var (because that's not necessary). + */ + entry->used_query_id=current_thd->query_id; + entry->type=STRING_RESULT; + memcpy((char*) entry->name.str, name->str, name->length+1); + if (my_hash_insert(hash,(uchar*) entry)) + { + my_free(entry); + return 0; + } + } + return entry; +} + + +void Item_func_set_user_var::cleanup() +{ + Item_func::cleanup(); + m_var_entry= NULL; +} + + +bool Item_func_set_user_var::set_entry(THD *thd, bool create_if_not_exists) +{ + if (m_var_entry && thd->thread_id == entry_thread_id) + goto end; // update entry->update_query_id for PS + if (!(m_var_entry= get_variable(&thd->user_vars, &name, create_if_not_exists))) + { + entry_thread_id= 0; + return TRUE; + } + entry_thread_id= thd->thread_id; + /* + Remember the last query which updated it, this way a query can later know + if this variable is a constant item in the query (it is if update_query_id + is different from query_id). + */ +end: + m_var_entry->update_query_id= thd->query_id; + return FALSE; +} + + +/* + When a user variable is updated (in a SET command or a query like + SELECT @a:= ). +*/ + +bool Item_func_set_user_var::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + /* fix_fields will call Item_func_set_user_var::fix_length_and_dec */ + if (Item_func::fix_fields(thd, ref) || set_entry(thd, TRUE)) + return TRUE; + /* + As it is wrong and confusing to associate any + character set with NULL, @a should be latin2 + after this query sequence: + + SET @a=_latin2'string'; + SET @a=NULL; + + I.e. the second query should not change the charset + to the current default value, but should keep the + original value assigned during the first query. + In order to do it, we don't copy charset + from the argument if the argument is NULL + and the variable has previously been initialized. + */ + null_item= (args[0]->type() == NULL_ITEM); + if (!m_var_entry->charset() || !null_item) + m_var_entry->set_charset(args[0]->collation.derivation == DERIVATION_NUMERIC ? + &my_charset_numeric : args[0]->collation.collation); + collation.set(m_var_entry->charset(), + args[0]->collation.derivation == DERIVATION_NUMERIC ? + DERIVATION_NUMERIC : DERIVATION_IMPLICIT); + switch (args[0]->result_type()) { + case STRING_RESULT: + case TIME_RESULT: + set_handler(type_handler_long_blob. + type_handler_adjusted_to_max_octet_length(max_length, + collation.collation)); + break; + case REAL_RESULT: + set_handler(&type_handler_double); + break; + case INT_RESULT: + set_handler(Type_handler::type_handler_long_or_longlong(max_char_length(), + unsigned_flag)); + break; + case DECIMAL_RESULT: + set_handler(&type_handler_newdecimal); + break; + case ROW_RESULT: + DBUG_ASSERT(0); + set_handler(&type_handler_row); + break; + } + if (thd->lex->current_select) + { + /* + When this function is used in a derived table/view force the derived + table to be materialized to preserve possible side-effect of setting a + user variable. + */ + SELECT_LEX_UNIT *unit= thd->lex->current_select->master_unit(); + TABLE_LIST *derived; + for (derived= unit->derived; + derived; + derived= unit->derived) + { + derived->set_materialized_derived(); + derived->prohibit_cond_pushdown= true; + if (unit->with_element && unit->with_element->is_recursive) + break; + unit= derived->select_lex->master_unit(); + } + } + + return FALSE; +} + + +bool +Item_func_set_user_var::fix_length_and_dec(THD *thd) +{ + base_flags|= (args[0]->base_flags & item_base_t::MAYBE_NULL); + decimals=args[0]->decimals; + if (args[0]->collation.derivation == DERIVATION_NUMERIC) + { + collation.set(DERIVATION_NUMERIC); + fix_length_and_charset(args[0]->max_char_length(), &my_charset_numeric); + } + else + { + collation.set(DERIVATION_IMPLICIT); + fix_length_and_charset(args[0]->max_char_length(), + args[0]->collation.collation); + } + unsigned_flag= args[0]->unsigned_flag; + return FALSE; +} + + +/* + Mark field in read_map + + NOTES + This is used by filesort to register used fields in a a temporary + column read set or to register used fields in a view +*/ + +bool Item_func_set_user_var::register_field_in_read_map(void *arg) +{ + if (result_field) + { + TABLE *table= (TABLE *) arg; + if (result_field->table == table || !table) + bitmap_set_bit(result_field->table->read_set, result_field->field_index); + if (result_field->vcol_info) + return result_field->vcol_info-> + expr->walk(&Item::register_field_in_read_map, 1, arg); + } + return 0; +} + +/* + Mark field in bitmap supplied as *arg + +*/ + +bool Item_func_set_user_var::register_field_in_bitmap(void *arg) +{ + MY_BITMAP *bitmap = (MY_BITMAP *) arg; + DBUG_ASSERT(bitmap); + if (result_field) + { + if (!bitmap) + return 1; + bitmap_set_bit(bitmap, result_field->field_index); + } + return 0; +} + +/** + Set value to user variable. + + @param entry pointer to structure representing variable + @param set_null should we set NULL value ? + @param ptr pointer to buffer with new value + @param length length of new value + @param type type of new value + @param cs charset info for new value + @param dv derivation for new value + @param unsigned_arg indicates if a value of type INT_RESULT is unsigned + + @note Sets error and fatal error if allocation fails. + + @retval + false success + @retval + true failure +*/ + +bool +update_hash(user_var_entry *entry, bool set_null, void *ptr, size_t length, + Item_result type, CHARSET_INFO *cs, + bool unsigned_arg) +{ + if (set_null) + { + char *pos= (char*) entry+ ALIGN_SIZE(sizeof(user_var_entry)); + if (entry->value && entry->value != pos) + my_free(entry->value); + entry->value= 0; + entry->length= 0; + } + else + { + if (type == STRING_RESULT) + length++; // Store strings with end \0 + if (length <= extra_size) + { + /* Save value in value struct */ + char *pos= (char*) entry+ ALIGN_SIZE(sizeof(user_var_entry)); + if (entry->value != pos) + { + if (entry->value) + my_free(entry->value); + entry->value=pos; + } + } + else + { + /* Allocate variable */ + if (entry->length != length) + { + char *pos= (char*) entry+ ALIGN_SIZE(sizeof(user_var_entry)); + if (entry->value == pos) + entry->value=0; + entry->value= (char*) my_realloc(key_memory_user_var_entry_value, + entry->value, length, + MYF(MY_ALLOW_ZERO_PTR | MY_WME | + ME_FATAL | MY_THREAD_SPECIFIC)); + if (!entry->value) + return 1; + } + } + if (type == STRING_RESULT) + { + length--; // Fix length change above + entry->value[length]= 0; // Store end \0 + } + if (length) + memmove(entry->value, ptr, length); + if (type == DECIMAL_RESULT) + ((my_decimal*)entry->value)->fix_buffer_pointer(); + entry->length= length; + entry->set_charset(cs); + entry->unsigned_flag= unsigned_arg; + } + entry->type=type; +#ifdef USER_VAR_TRACKING +#ifndef EMBEDDED_LIBRARY + THD *thd= current_thd; + thd->session_tracker.user_variables.mark_as_changed(thd, entry); +#endif +#endif // USER_VAR_TRACKING + return 0; +} + + +bool +Item_func_set_user_var::update_hash(void *ptr, size_t length, + Item_result res_type, + CHARSET_INFO *cs, + bool unsigned_arg) +{ + /* + If we set a variable explicitly to NULL then keep the old + result type of the variable + */ + if (args[0]->type() == Item::FIELD_ITEM) + { + /* args[0]->null_value may be outdated */ + null_value= ((Item_field*)args[0])->field->is_null(); + } + else + null_value= args[0]->null_value; + if (null_value && null_item) + res_type= m_var_entry->type; // Don't change type of item + if (::update_hash(m_var_entry, null_value, + ptr, length, res_type, cs, unsigned_arg)) + { + null_value= 1; + return 1; + } + return 0; +} + + +/** Get the value of a variable as a double. */ + +double user_var_entry::val_real(bool *null_value) +{ + if ((*null_value= (value == 0))) + return 0.0; + + switch (type) { + case REAL_RESULT: + return *(double*) value; + case INT_RESULT: + return (double) *(longlong*) value; + case DECIMAL_RESULT: + return ((my_decimal *)value)->to_double(); + case STRING_RESULT: + return my_atof(value); // This is null terminated + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // Impossible + break; + } + return 0.0; // Impossible +} + + +/** Get the value of a variable as an integer. */ + +longlong user_var_entry::val_int(bool *null_value) const +{ + if ((*null_value= (value == 0))) + return 0; + + switch (type) { + case REAL_RESULT: + return (longlong) *(double*) value; + case INT_RESULT: + return *(longlong*) value; + case DECIMAL_RESULT: + return ((my_decimal *)value)->to_longlong(false); + case STRING_RESULT: + { + int error; + return my_strtoll10(value, (char**) 0, &error);// String is null terminated + } + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // Impossible + break; + } + return 0; // Impossible +} + + +/** Get the value of a variable as a string. */ + +String *user_var_entry::val_str(bool *null_value, String *str, + uint decimals) const +{ + if ((*null_value= (value == 0))) + return (String*) 0; + + switch (type) { + case REAL_RESULT: + str->set_real(*(double*) value, decimals, charset()); + break; + case INT_RESULT: + if (!unsigned_flag) + str->set(*(longlong*) value, charset()); + else + str->set(*(ulonglong*) value, charset()); + break; + case DECIMAL_RESULT: + str_set_decimal((my_decimal *) value, str, charset()); + break; + case STRING_RESULT: + if (str->copy(value, length, charset())) + str= 0; // EOM error + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // Impossible + break; + } + return(str); +} + +/** Get the value of a variable as a decimal. */ + +my_decimal *user_var_entry::val_decimal(bool *null_value, my_decimal *val) +{ + if ((*null_value= (value == 0))) + return 0; + + switch (type) { + case REAL_RESULT: + double2my_decimal(E_DEC_FATAL_ERROR, *(double*) value, val); + break; + case INT_RESULT: + int2my_decimal(E_DEC_FATAL_ERROR, *(longlong*) value, 0, val); + break; + case DECIMAL_RESULT: + my_decimal2decimal((my_decimal *) value, val); + break; + case STRING_RESULT: + str2my_decimal(E_DEC_FATAL_ERROR, value, length, charset(), val); + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // Impossible + break; + } + return(val); +} + +/** + This functions is invoked on SET \@variable or + \@variable:= expression. + + Evaluate (and check expression), store results. + + @note + For now it always return OK. All problem with value evaluating + will be caught by thd->is_error() check in sql_set_variables(). + + @retval + FALSE OK. +*/ + +bool +Item_func_set_user_var::check(bool use_result_field) +{ + DBUG_ENTER("Item_func_set_user_var::check"); + if (use_result_field && !result_field) + use_result_field= FALSE; + + switch (result_type()) { + case REAL_RESULT: + { + save_result.vreal= use_result_field ? result_field->val_real() : + args[0]->val_real(); + break; + } + case INT_RESULT: + { + save_result.vint= use_result_field ? result_field->val_int() : + args[0]->val_int(); + unsigned_flag= (use_result_field ? + ((Field_num*)result_field)->unsigned_flag: + args[0]->unsigned_flag); + break; + } + case STRING_RESULT: + { + save_result.vstr= use_result_field ? result_field->val_str(&value) : + args[0]->val_str(&value); + break; + } + case DECIMAL_RESULT: + { + save_result.vdec= use_result_field ? + result_field->val_decimal(&decimal_buff) : + args[0]->val_decimal(&decimal_buff); + break; + } + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + break; + } + DBUG_RETURN(FALSE); +} + + +/** + @brief Evaluate and store item's result. + This function is invoked on "SELECT ... INTO @var ...". + + @param item An item to get value from. +*/ + +void Item_func_set_user_var::save_item_result(Item *item) +{ + DBUG_ENTER("Item_func_set_user_var::save_item_result"); + + switch (args[0]->result_type()) { + case REAL_RESULT: + save_result.vreal= item->val_result(); + break; + case INT_RESULT: + save_result.vint= item->val_int_result(); + unsigned_flag= item->unsigned_flag; + break; + case STRING_RESULT: + save_result.vstr= item->str_result(&value); + break; + case DECIMAL_RESULT: + save_result.vdec= item->val_decimal_result(&decimal_buff); + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + break; + } + DBUG_VOID_RETURN; +} + + +/** + This functions is invoked on + SET \@variable or \@variable:= expression. + + @note + We have to store the expression as such in the variable, independent of + the value method used by the user + + @retval + 0 OK + @retval + 1 EOM Error + +*/ + +bool +Item_func_set_user_var::update() +{ + bool res= 0; + DBUG_ENTER("Item_func_set_user_var::update"); + + switch (result_type()) { + case REAL_RESULT: + { + res= update_hash((void*) &save_result.vreal,sizeof(save_result.vreal), + REAL_RESULT, &my_charset_numeric, 0); + break; + } + case INT_RESULT: + { + res= update_hash((void*) &save_result.vint, sizeof(save_result.vint), + INT_RESULT, &my_charset_numeric, unsigned_flag); + break; + } + case STRING_RESULT: + { + if (!save_result.vstr) // Null value + res= update_hash((void*) 0, 0, STRING_RESULT, &my_charset_bin, 0); + else + res= update_hash((void*) save_result.vstr->ptr(), + save_result.vstr->length(), STRING_RESULT, + save_result.vstr->charset(), 0); + break; + } + case DECIMAL_RESULT: + { + if (!save_result.vdec) // Null value + res= update_hash((void*) 0, 0, DECIMAL_RESULT, &my_charset_bin, 0); + else + res= update_hash((void*) save_result.vdec, + sizeof(my_decimal), DECIMAL_RESULT, + &my_charset_numeric, 0); + break; + } + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + break; + } + DBUG_RETURN(res); +} + + +double Item_func_set_user_var::val_real() +{ + DBUG_ASSERT(fixed()); + check(0); + update(); // Store expression + return m_var_entry->val_real(&null_value); +} + +longlong Item_func_set_user_var::val_int() +{ + DBUG_ASSERT(fixed()); + check(0); + update(); // Store expression + return m_var_entry->val_int(&null_value); +} + +String *Item_func_set_user_var::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + check(0); + update(); // Store expression + return m_var_entry->val_str(&null_value, str, decimals); +} + + +my_decimal *Item_func_set_user_var::val_decimal(my_decimal *val) +{ + DBUG_ASSERT(fixed()); + check(0); + update(); // Store expression + return m_var_entry->val_decimal(&null_value, val); +} + + +double Item_func_set_user_var::val_result() +{ + DBUG_ASSERT(fixed()); + check(TRUE); + update(); // Store expression + return m_var_entry->val_real(&null_value); +} + +longlong Item_func_set_user_var::val_int_result() +{ + DBUG_ASSERT(fixed()); + check(TRUE); + update(); // Store expression + return m_var_entry->val_int(&null_value); +} + +bool Item_func_set_user_var::val_bool_result() +{ + DBUG_ASSERT(fixed()); + check(TRUE); + update(); // Store expression + return m_var_entry->val_int(&null_value) != 0; +} + +String *Item_func_set_user_var::str_result(String *str) +{ + DBUG_ASSERT(fixed()); + check(TRUE); + update(); // Store expression + return m_var_entry->val_str(&null_value, str, decimals); +} + + +my_decimal *Item_func_set_user_var::val_decimal_result(my_decimal *val) +{ + DBUG_ASSERT(fixed()); + check(TRUE); + update(); // Store expression + return m_var_entry->val_decimal(&null_value, val); +} + + +bool Item_func_set_user_var::is_null_result() +{ + DBUG_ASSERT(fixed()); + check(TRUE); + update(); // Store expression + return is_null(); +} + + +void Item_func_set_user_var::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("@")); + str->append(&name); + str->append(STRING_WITH_LEN(":=")); + args[0]->print_parenthesised(str, query_type, precedence()); +} + + +void Item_func_set_user_var::print_as_stmt(String *str, + enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("set @")); + str->append(&name); + str->append(STRING_WITH_LEN(":=")); + args[0]->print_parenthesised(str, query_type, precedence()); +} + +bool Item_func_set_user_var::send(Protocol *protocol, st_value *buffer) +{ + if (result_field) + { + check(1); + update(); + return protocol->store(result_field); + } + return Item::send(protocol, buffer); +} + +void Item_func_set_user_var::make_send_field(THD *thd, Send_field *tmp_field) +{ + if (result_field) + { + result_field->make_send_field(tmp_field); + DBUG_ASSERT(tmp_field->table_name.str != 0); + if (Item::name.str) + tmp_field->col_name= Item::name; // Use user supplied name + } + else + Item::make_send_field(thd, tmp_field); +} + + +/* + Save the value of a user variable into a field + + SYNOPSIS + save_in_field() + field target field to save the value to + no_conversion flag indicating whether conversions are allowed + + DESCRIPTION + Save the function value into a field and update the user variable + accordingly. If a result field is defined and the target field doesn't + coincide with it then the value from the result field will be used as + the new value of the user variable. + + The reason to have this method rather than simply using the result + field in the val_xxx() methods is that the value from the result field + not always can be used when the result field is defined. + Let's consider the following cases: + 1) when filling a tmp table the result field is defined but the value of it + is undefined because it has to be produced yet. Thus we can't use it. + 2) on execution of an INSERT ... SELECT statement the save_in_field() + function will be called to fill the data in the new record. If the SELECT + part uses a tmp table then the result field is defined and should be + used in order to get the correct result. + + The difference between the SET_USER_VAR function and regular functions + like CONCAT is that the Item_func objects for the regular functions are + replaced by Item_field objects after the values of these functions have + been stored in a tmp table. Yet an object of the Item_field class cannot + be used to update a user variable. + Due to this we have to handle the result field in a special way here and + in the Item_func_set_user_var::send() function. + + RETURN VALUES + FALSE Ok + TRUE Error +*/ + +int Item_func_set_user_var::save_in_field(Field *field, bool no_conversions, + bool can_use_result_field) +{ + bool use_result_field= (!can_use_result_field ? 0 : + (result_field && result_field != field)); + int error; + + /* Update the value of the user variable */ + check(use_result_field); + update(); + + if (result_type() == STRING_RESULT || + (result_type() == REAL_RESULT && + field->result_type() == STRING_RESULT)) + { + String *result; + CHARSET_INFO *cs= collation.collation; + char buff[MAX_FIELD_WIDTH]; // Alloc buffer for small columns + str_value.set_buffer_if_not_allocated(buff, sizeof(buff), cs); + result= m_var_entry->val_str(&null_value, &str_value, decimals); + + if (null_value) + { + str_value.set_buffer_if_not_allocated(0, 0, cs); + return set_field_to_null_with_conversions(field, no_conversions); + } + + /* NOTE: If null_value == FALSE, "result" must be not NULL. */ + + field->set_notnull(); + error=field->store(result->ptr(),result->length(),cs); + str_value.set_buffer_if_not_allocated(0, 0, cs); + } + else if (result_type() == REAL_RESULT) + { + double nr= m_var_entry->val_real(&null_value); + if (null_value) + return set_field_to_null(field); + field->set_notnull(); + error=field->store(nr); + } + else if (result_type() == DECIMAL_RESULT) + { + my_decimal decimal_value; + my_decimal *val= m_var_entry->val_decimal(&null_value, &decimal_value); + if (null_value) + return set_field_to_null(field); + field->set_notnull(); + error=field->store_decimal(val); + } + else + { + longlong nr= m_var_entry->val_int(&null_value); + if (null_value) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + error=field->store(nr, unsigned_flag); + } + return error; +} + + +String * +Item_func_get_user_var::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_func_get_user_var::val_str"); + if (!m_var_entry) + DBUG_RETURN((String*) 0); // No such variable + DBUG_RETURN(m_var_entry->val_str(&null_value, str, decimals)); +} + + +double Item_func_get_user_var::val_real() +{ + DBUG_ASSERT(fixed()); + if (!m_var_entry) + return 0.0; // No such variable + return (m_var_entry->val_real(&null_value)); +} + + +my_decimal *Item_func_get_user_var::val_decimal(my_decimal *dec) +{ + DBUG_ASSERT(fixed()); + if (!m_var_entry) + return 0; + return m_var_entry->val_decimal(&null_value, dec); +} + + +longlong Item_func_get_user_var::val_int() +{ + DBUG_ASSERT(fixed()); + if (!m_var_entry) + return 0; // No such variable + return (m_var_entry->val_int(&null_value)); +} + + +/** + Get variable by name and, if necessary, put the record of variable + use into the binary log. + + When a user variable is invoked from an update query (INSERT, UPDATE etc), + stores this variable and its value in thd->user_var_events, so that it can be + written to the binlog (will be written just before the query is written, see + log.cc). + + @param thd Current thread + @param name Variable name + @param[out] out_entry variable structure or NULL. The pointer is set + regardless of whether function succeeded or not. + + @retval + 0 OK + @retval + 1 Failed to put appropriate record into binary log + +*/ + +static int +get_var_with_binlog(THD *thd, enum_sql_command sql_command, + LEX_CSTRING *name, user_var_entry **out_entry) +{ + BINLOG_USER_VAR_EVENT *user_var_event; + user_var_entry *var_entry; + var_entry= get_variable(&thd->user_vars, name, 0); + + /* + Any reference to user-defined variable which is done from stored + function or trigger affects their execution and the execution of the + calling statement. We must log all such variables even if they are + not involved in table-updating statements. + */ + if (!(opt_bin_log && + (is_update_query(sql_command) || thd->in_sub_stmt))) + { + *out_entry= var_entry; + return 0; + } + + if (!var_entry) + { + /* + If the variable does not exist, it's NULL, but we want to create it so + that it gets into the binlog (if it didn't, the slave could be + influenced by a variable of the same name previously set by another + thread). + We create it like if it had been explicitly set with SET before. + The 'new' mimics what sql_yacc.yy does when 'SET @a=10;'. + sql_set_variables() is what is called from 'case SQLCOM_SET_OPTION' + in dispatch_command()). Instead of building a one-element list to pass to + sql_set_variables(), we could instead manually call check() and update(); + this would save memory and time; but calling sql_set_variables() makes + one unique place to maintain (sql_set_variables()). + + Manipulation with lex is necessary since free_underlaid_joins + is going to release memory belonging to the main query. + */ + + List tmp_var_list; + LEX *sav_lex= thd->lex, lex_tmp; + thd->lex= &lex_tmp; + lex_start(thd); + tmp_var_list.push_back(new (thd->mem_root) + set_var_user(new (thd->mem_root) + Item_func_set_user_var(thd, name, + new (thd->mem_root) Item_null(thd))), + thd->mem_root); + /* Create the variable if the above allocations succeeded */ + if (unlikely(thd->is_fatal_error) || + unlikely(sql_set_variables(thd, &tmp_var_list, false))) + { + thd->lex= sav_lex; + goto err; + } + thd->lex= sav_lex; + if (unlikely(!(var_entry= get_variable(&thd->user_vars, name, 0)))) + goto err; + } + else if (var_entry->used_query_id == thd->query_id || + mysql_bin_log.is_query_in_union(thd, var_entry->used_query_id)) + { + /* + If this variable was already stored in user_var_events by this query + (because it's used in more than one place in the query), don't store + it. + */ + *out_entry= var_entry; + return 0; + } + + size_t size; + /* + First we need to store value of var_entry, when the next situation + appears: + > set @a:=1; + > insert into t1 values (@a), (@a:=@a+1), (@a:=@a+1); + We have to write to binlog value @a= 1. + + We allocate the user_var_event on user_var_events_alloc pool, not on + the this-statement-execution pool because in SPs user_var_event objects + may need to be valid after current [SP] statement execution pool is + destroyed. + */ + size= ALIGN_SIZE(sizeof(BINLOG_USER_VAR_EVENT)) + var_entry->length; + if (unlikely(!(user_var_event= (BINLOG_USER_VAR_EVENT *) + alloc_root(thd->user_var_events_alloc, size)))) + goto err; + + user_var_event->value= (char*) user_var_event + + ALIGN_SIZE(sizeof(BINLOG_USER_VAR_EVENT)); + user_var_event->user_var_event= var_entry; + user_var_event->type= var_entry->type; + user_var_event->charset_number= var_entry->charset()->number; + user_var_event->unsigned_flag= var_entry->unsigned_flag; + if (!var_entry->value) + { + /* NULL value*/ + user_var_event->length= 0; + user_var_event->value= 0; + } + else + { + user_var_event->length= var_entry->length; + memcpy(user_var_event->value, var_entry->value, + var_entry->length); + } + /* Mark that this variable has been used by this query */ + var_entry->used_query_id= thd->query_id; + if (insert_dynamic(&thd->user_var_events, (uchar*) &user_var_event)) + goto err; + + *out_entry= var_entry; + return 0; + +err: + *out_entry= var_entry; + return 1; +} + +bool Item_func_get_user_var::fix_length_and_dec(THD *thd) +{ + int error; + set_maybe_null(); + decimals=NOT_FIXED_DEC; + max_length=MAX_BLOB_WIDTH; + + error= get_var_with_binlog(thd, thd->lex->sql_command, &name, &m_var_entry); + + /* + If the variable didn't exist it has been created as a STRING-type. + 'm_var_entry' is NULL only if there occurred an error during the call to + get_var_with_binlog. + */ + if (likely(!error && m_var_entry)) + { + unsigned_flag= m_var_entry->unsigned_flag; + max_length= (uint32)m_var_entry->length; + switch (m_var_entry->type) { + case REAL_RESULT: + collation.set(&my_charset_numeric, DERIVATION_NUMERIC); + fix_char_length(DBL_DIG + 8); + set_handler(&type_handler_double); + break; + case INT_RESULT: + collation.set(&my_charset_numeric, DERIVATION_NUMERIC); + fix_char_length(MAX_BIGINT_WIDTH); + decimals=0; + if (unsigned_flag) + set_handler(&type_handler_ulonglong); + else + set_handler(&type_handler_slonglong); + break; + case STRING_RESULT: + collation.set(m_var_entry->charset(), DERIVATION_IMPLICIT); + max_length= MAX_BLOB_WIDTH - 1; + set_handler(&type_handler_long_blob); + break; + case DECIMAL_RESULT: + collation.set(&my_charset_numeric, DERIVATION_NUMERIC); + fix_char_length(DECIMAL_MAX_STR_LENGTH); + decimals= DECIMAL_MAX_SCALE; + set_handler(&type_handler_newdecimal); + break; + case ROW_RESULT: // Keep compiler happy + case TIME_RESULT: + DBUG_ASSERT(0); // This case should never be chosen + break; + } + } + else + { + collation.set(&my_charset_bin, DERIVATION_IMPLICIT); + null_value= 1; + set_handler(&type_handler_long_blob); + max_length= MAX_BLOB_WIDTH; + } + return false; +} + + +bool Item_func_get_user_var::const_item() const +{ + return (!m_var_entry || + current_thd->query_id != m_var_entry->update_query_id); +} + + +void Item_func_get_user_var::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("@")); + append_identifier(current_thd, str, &name); +} + + +bool Item_func_get_user_var::eq(const Item *item, bool binary_cmp) const +{ + /* Assume we don't have rtti */ + if (this == item) + return 1; // Same item is same. + /* Check if other type is also a get_user_var() object */ + if (item->type() != FUNC_ITEM || + ((Item_func*) item)->functype() != functype()) + return 0; + Item_func_get_user_var *other=(Item_func_get_user_var*) item; + return (name.length == other->name.length && + !memcmp(name.str, other->name.str, name.length)); +} + + +bool Item_func_get_user_var::set_value(THD *thd, + sp_rcontext * /*ctx*/, Item **it) +{ + LEX_CSTRING tmp_name= get_name(); + Item_func_set_user_var *suv= new (thd->mem_root) Item_func_set_user_var(thd, &tmp_name, *it); + /* + Item_func_set_user_var is not fixed after construction, call + fix_fields(). + */ + return (!suv || suv->fix_fields(thd, it) || suv->check(0) || suv->update()); +} + + +bool Item_user_var_as_out_param::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(!fixed()); + DBUG_ASSERT(thd->lex->exchange); + if (!(entry= get_variable(&thd->user_vars, &org_name, 1))) + return TRUE; + entry->type= STRING_RESULT; + /* + Let us set the same collation which is used for loading + of fields in LOAD DATA INFILE. + (Since Item_user_var_as_out_param is used only there). + */ + entry->set_charset(thd->lex->exchange->cs ? + thd->lex->exchange->cs : + thd->variables.collation_database); + entry->update_query_id= thd->query_id; + return FALSE; +} + + +void Item_user_var_as_out_param::set_null_value(CHARSET_INFO* cs) +{ + ::update_hash(entry, TRUE, 0, 0, STRING_RESULT, cs, 0 /* unsigned_arg */); +} + + +void Item_user_var_as_out_param::set_value(const char *str, uint length, + CHARSET_INFO* cs) +{ + ::update_hash(entry, FALSE, (void*)str, length, STRING_RESULT, cs, + 0 /* unsigned_arg */); +} + + +double Item_user_var_as_out_param::val_real() +{ + DBUG_ASSERT(0); + return 0.0; +} + + +longlong Item_user_var_as_out_param::val_int() +{ + DBUG_ASSERT(0); + return 0; +} + + +String* Item_user_var_as_out_param::val_str(String *str) +{ + DBUG_ASSERT(0); + return 0; +} + + +my_decimal* Item_user_var_as_out_param::val_decimal(my_decimal *decimal_buffer) +{ + DBUG_ASSERT(0); + return 0; +} + + +bool Item_user_var_as_out_param::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate) +{ + DBUG_ASSERT(0); + return true; +} + + +void Item_user_var_as_out_param::load_data_print_for_log_event(THD *thd, + String *str) + const +{ + str->append('@'); + append_identifier(thd, str, &org_name); +} + + +Item_func_get_system_var:: +Item_func_get_system_var(THD *thd, sys_var *var_arg, enum_var_type var_type_arg, + LEX_CSTRING *component_arg, const char *name_arg, + size_t name_len_arg): + Item_func(thd), var(var_arg), var_type(var_type_arg), + orig_var_type(var_type_arg), component(*component_arg), cache_present(0) +{ + /* set_name() will allocate the name */ + set_name(thd, name_arg, (uint) name_len_arg, system_charset_info); +} + + +bool Item_func_get_system_var::is_written_to_binlog() +{ + return var->is_written_to_binlog(var_type); +} + + +void Item_func_get_system_var::update_null_value() +{ + THD *thd= current_thd; + int save_no_errors= thd->no_errors; + thd->no_errors= TRUE; + type_handler()->Item_update_null_value(this); + thd->no_errors= save_no_errors; +} + + +bool Item_func_get_system_var::fix_length_and_dec(THD *thd) +{ + const char *cptr; + set_maybe_null(); + max_length= 0; + + if (var->check_type(var_type)) + { + if (var_type != OPT_DEFAULT) + { + my_error(ER_INCORRECT_GLOBAL_LOCAL_VAR, MYF(0), + var->name.str, var_type == OPT_GLOBAL ? "SESSION" : "GLOBAL"); + return TRUE; + } + /* As there was no local variable, return the global value */ + var_type= OPT_GLOBAL; + } + + switch (var->show_type()) + { + case SHOW_HA_ROWS: + case SHOW_UINT: + case SHOW_ULONG: + case SHOW_ULONGLONG: + unsigned_flag= TRUE; + /* fall through */ + case SHOW_SINT: + case SHOW_SLONG: + case SHOW_SLONGLONG: + collation= DTCollation_numeric(); + fix_char_length(MY_INT64_NUM_DECIMAL_DIGITS); + decimals=0; + break; + case SHOW_CHAR: + case SHOW_CHAR_PTR: + mysql_mutex_lock(&LOCK_global_system_variables); + cptr= var->show_type() == SHOW_CHAR ? + reinterpret_cast(var->value_ptr(thd, var_type, + &component)) : + *reinterpret_cast(var->value_ptr(thd, + var_type, + &component)); + if (cptr) + max_length= (uint32) system_charset_info->numchars(cptr, + cptr + strlen(cptr)); + mysql_mutex_unlock(&LOCK_global_system_variables); + collation.set(system_charset_info, DERIVATION_SYSCONST); + max_length*= system_charset_info->mbmaxlen; + decimals=NOT_FIXED_DEC; + break; + case SHOW_LEX_STRING: + { + mysql_mutex_lock(&LOCK_global_system_variables); + const LEX_STRING *ls= + reinterpret_cast(var->value_ptr(current_thd, + var_type, + &component)); + max_length= (uint32) system_charset_info->numchars(ls->str, + ls->str + ls->length); + mysql_mutex_unlock(&LOCK_global_system_variables); + collation.set(system_charset_info, DERIVATION_SYSCONST); + max_length*= system_charset_info->mbmaxlen; + decimals=NOT_FIXED_DEC; + } + break; + case SHOW_BOOL: + case SHOW_MY_BOOL: + collation= DTCollation_numeric(); + fix_char_length(1); + decimals=0; + break; + case SHOW_DOUBLE: + decimals= 6; + collation= DTCollation_numeric(); + fix_char_length(DBL_DIG + 6); + break; + default: + my_error(ER_VAR_CANT_BE_READ, MYF(0), var->name.str); + break; + } + return FALSE; +} + + +void Item_func_get_system_var::print(String *str, enum_query_type query_type) +{ + if (name.length) + str->append(&name); + else + { + str->append(STRING_WITH_LEN("@@")); + if (component.length) + { + str->append(&component); + str->append('.'); + } + else if (var_type == SHOW_OPT_GLOBAL && var->scope() != sys_var::GLOBAL) + { + str->append(STRING_WITH_LEN("global.")); + } + str->append(&var->name); + } +} + +bool Item_func_get_system_var::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function("@@", var->name.str, arg, VCOL_SESSION_FUNC); +} + + +const Type_handler *Item_func_get_system_var::type_handler() const +{ + switch (var->show_type()) + { + case SHOW_BOOL: + case SHOW_MY_BOOL: + case SHOW_SINT: + case SHOW_SLONG: + case SHOW_SLONGLONG: + return &type_handler_slonglong; + case SHOW_UINT: + case SHOW_ULONG: + case SHOW_ULONGLONG: + case SHOW_HA_ROWS: + return &type_handler_ulonglong; + case SHOW_CHAR: + case SHOW_CHAR_PTR: + case SHOW_LEX_STRING: + return &type_handler_varchar; + case SHOW_DOUBLE: + return &type_handler_double; + default: + my_error(ER_VAR_CANT_BE_READ, MYF(0), var->name.str); + return &type_handler_varchar; // keep the compiler happy + } +} + + +longlong Item_func_get_system_var::val_int() +{ + THD *thd= current_thd; + + DBUG_EXECUTE_IF("simulate_non_gtid_aware_master", + { + if (0 == strcmp("gtid_domain_id", var->name.str)) + { + my_error(ER_VAR_CANT_BE_READ, MYF(0), var->name.str); + return 0; + } + }); + if (cache_present && thd->query_id == used_query_id) + { + if (cache_present & GET_SYS_VAR_CACHE_LONG) + { + null_value= cached_null_value; + return cached_llval; + } + else if (cache_present & GET_SYS_VAR_CACHE_DOUBLE) + { + null_value= cached_null_value; + cached_llval= (longlong) cached_dval; + cache_present|= GET_SYS_VAR_CACHE_LONG; + return cached_llval; + } + else if (cache_present & GET_SYS_VAR_CACHE_STRING) + { + null_value= cached_null_value; + if (!null_value) + cached_llval= longlong_from_string_with_check(&cached_strval); + else + cached_llval= 0; + cache_present|= GET_SYS_VAR_CACHE_LONG; + return cached_llval; + } + } + + cached_llval= var->val_int(&null_value, thd, var_type, &component); + cache_present |= GET_SYS_VAR_CACHE_LONG; + used_query_id= thd->query_id; + cached_null_value= null_value; + return cached_llval; +} + + +String* Item_func_get_system_var::val_str(String* str) +{ + THD *thd= current_thd; + + if (cache_present && thd->query_id == used_query_id) + { + if (cache_present & GET_SYS_VAR_CACHE_STRING) + { + null_value= cached_null_value; + return null_value ? NULL : &cached_strval; + } + else if (cache_present & GET_SYS_VAR_CACHE_LONG) + { + null_value= cached_null_value; + if (!null_value) + cached_strval.set (cached_llval, collation.collation); + cache_present|= GET_SYS_VAR_CACHE_STRING; + return null_value ? NULL : &cached_strval; + } + else if (cache_present & GET_SYS_VAR_CACHE_DOUBLE) + { + null_value= cached_null_value; + if (!null_value) + cached_strval.set_real (cached_dval, decimals, collation.collation); + cache_present|= GET_SYS_VAR_CACHE_STRING; + return null_value ? NULL : &cached_strval; + } + } + + str= var->val_str(&cached_strval, thd, var_type, &component); + cache_present|= GET_SYS_VAR_CACHE_STRING; + used_query_id= thd->query_id; + cached_null_value= null_value= !str; + return str; +} + + +double Item_func_get_system_var::val_real() +{ + THD *thd= current_thd; + + if (cache_present && thd->query_id == used_query_id) + { + if (cache_present & GET_SYS_VAR_CACHE_DOUBLE) + { + null_value= cached_null_value; + return cached_dval; + } + else if (cache_present & GET_SYS_VAR_CACHE_LONG) + { + null_value= cached_null_value; + cached_dval= (double)cached_llval; + cache_present|= GET_SYS_VAR_CACHE_DOUBLE; + return cached_dval; + } + else if (cache_present & GET_SYS_VAR_CACHE_STRING) + { + null_value= cached_null_value; + if (!null_value) + cached_dval= double_from_string_with_check(&cached_strval); + else + cached_dval= 0; + cache_present|= GET_SYS_VAR_CACHE_DOUBLE; + return cached_dval; + } + } + + cached_dval= var->val_real(&null_value, thd, var_type, &component); + cache_present |= GET_SYS_VAR_CACHE_DOUBLE; + used_query_id= thd->query_id; + cached_null_value= null_value; + return cached_dval; +} + + +bool Item_func_get_system_var::eq(const Item *item, bool binary_cmp) const +{ + /* Assume we don't have rtti */ + if (this == item) + return 1; // Same item is same. + /* Check if other type is also a get_user_var() object */ + if (item->type() != FUNC_ITEM || + ((Item_func*) item)->functype() != functype()) + return 0; + Item_func_get_system_var *other=(Item_func_get_system_var*) item; + return (var == other->var && var_type == other->var_type); +} + + +void Item_func_get_system_var::cleanup() +{ + Item_func::cleanup(); + cache_present= 0; + var_type= orig_var_type; + cached_strval.free(); +} + +/** + @retval + 0 ok + 1 OOM error +*/ + +bool Item_func_match::init_search(THD *thd, bool no_order) +{ + DBUG_ENTER("Item_func_match::init_search"); + + if (!table->file->is_open()) + DBUG_RETURN(0); + + /* Check if init_search() has been called before */ + if (ft_handler) + { + if (join_key) + table->file->ft_handler= ft_handler; + DBUG_RETURN(0); + } + + if (key == NO_SUCH_KEY) + { + List fields; + fields.push_back(new (thd->mem_root) + Item_string(thd, " ", 1, cmp_collation.collation), + thd->mem_root); + for (uint i= 1; i < arg_count; i++) + fields.push_back(args[i]); + concat_ws= new (thd->mem_root) Item_func_concat_ws(thd, fields); + if (unlikely(thd->is_fatal_error)) + DBUG_RETURN(1); // OOM in new or push_back + /* + Above function used only to get value and do not need fix_fields for it: + Item_string - basic constant + fields - fix_fields() was already called for this arguments + Item_func_concat_ws - do not need fix_fields() to produce value + */ + concat_ws->quick_fix_field(); + } + + if (master) + { + join_key= master->join_key= join_key | master->join_key; + if (master->init_search(thd, no_order)) + DBUG_RETURN(1); + ft_handler= master->ft_handler; + join_key= master->join_key; + DBUG_RETURN(0); + } + + String *ft_tmp= 0; + + // MATCH ... AGAINST (NULL) is meaningless, but possible + if (!(ft_tmp=key_item()->val_str(&value))) + { + ft_tmp= &value; + value.set("", 0, cmp_collation.collation); + } + + if (ft_tmp->charset() != cmp_collation.collation) + { + uint dummy_errors; + if (search_value.copy(ft_tmp->ptr(), ft_tmp->length(), ft_tmp->charset(), + cmp_collation.collation, &dummy_errors)) + DBUG_RETURN(1); + ft_tmp= &search_value; + } + + if (join_key && !no_order) + match_flags|=FT_SORTED; + + if (key != NO_SUCH_KEY) + THD_STAGE_INFO(table->in_use, stage_fulltext_initialization); + + ft_handler= table->file->ft_init_ext(match_flags, key, ft_tmp); + + if (!ft_handler) + DBUG_RETURN(1); + if (join_key) + table->file->ft_handler=ft_handler; + + DBUG_RETURN(0); +} + + +bool Item_func_match::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + Item *UNINIT_VAR(item); // Safe as arg_count is > 1 + + status_var_increment(thd->status_var.feature_fulltext); + + set_maybe_null(); + join_key=0; + + /* + const_item is assumed in quite a bit of places, so it would be difficult + to remove; If it would ever to be removed, this should include + modifications to find_best and auto_close as complement to auto_init code + above. + */ + if (Item_func::fix_fields(thd, ref) || + !args[0]->const_during_execution()) + { + my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST"); + return TRUE; + } + + bool allows_multi_table_search= true; + const_item_cache=0; + table= 0; + for (uint i=1 ; i < arg_count ; i++) + { + + item= args[i]->real_item(); + /* + When running in PS mode, some Item_field's can already be replaced + to Item_func_conv_charset during PREPARE time. This is possible + in case of "MATCH (f1,..,fN) AGAINST (... IN BOOLEAN MODE)" + when running without any fulltext indexes and when fields f1..fN + have different character sets. + So we check for FIELD_ITEM only during prepare time and in non-PS mode, + and do not check in PS execute time. + */ + if (!thd->stmt_arena->is_stmt_execute() && + item->type() != Item::FIELD_ITEM) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH"); + return TRUE; + } + /* + During the prepare-time execution of fix_fields() of a PS query some + Item_fields's could have been already replaced to Item_func_conv_charset + (by the call for agg_arg_charsets_for_comparison below()). + But agg_arg_charsets_for_comparison() is written in a way that + at least *one* of the Item_field's is not replaced. + This makes sure that "table" gets initialized during PS execution time. + */ + if (item->type() == Item::FIELD_ITEM) + table= ((Item_field *)item)->field->table; + + allows_multi_table_search &= allows_search_on_non_indexed_columns(table); + } + + /* + Check that all columns come from the same table. + We've already checked that columns in MATCH are fields so + PARAM_TABLE_BIT can only appear from AGAINST argument. + */ + if ((used_tables_cache & ~PARAM_TABLE_BIT) != item->used_tables()) + key=NO_SUCH_KEY; + + if (key == NO_SUCH_KEY && !allows_multi_table_search) + { + my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH"); + return TRUE; + } + if (!(table->file->ha_table_flags() & HA_CAN_FULLTEXT)) + { + my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0), table->file->table_type()); + return 1; + } + table->fulltext_searched=1; + return agg_arg_charsets_for_comparison(cmp_collation, args+1, arg_count-1); +} + +bool Item_func_match::fix_index() +{ + Item_field *item; + uint ft_to_key[MAX_KEY], ft_cnt[MAX_KEY], fts=0, keynr; + uint max_cnt=0, mkeys=0, i; + + /* + We will skip execution if the item is not fixed + with fix_field + */ + if (!fixed()) + return false; + + if (key == NO_SUCH_KEY) + return 0; + + if (!table) + goto err; + + for (keynr=0 ; keynr < table->s->keys ; keynr++) + { + if ((table->key_info[keynr].flags & HA_FULLTEXT) && + (match_flags & FT_BOOL ? + table->keys_in_use_for_query.is_set(keynr) : + table->s->usable_indexes(table->in_use).is_set(keynr))) + { + ft_to_key[fts]=keynr; + ft_cnt[fts]=0; + fts++; + } + } + + if (!fts) + goto err; + + for (i=1; i < arg_count; i++) + { + Item *real_item= args[i]->real_item(); + if (real_item->type() != FIELD_ITEM) + goto err; + item=(Item_field*)real_item; + for (keynr=0 ; keynr < fts ; keynr++) + { + KEY *ft_key=&table->key_info[ft_to_key[keynr]]; + uint key_parts=ft_key->user_defined_key_parts; + + for (uint part=0 ; part < key_parts ; part++) + { + if (item->field->eq(ft_key->key_part[part].field)) + ft_cnt[keynr]++; + } + } + } + + for (keynr=0 ; keynr < fts ; keynr++) + { + if (ft_cnt[keynr] > max_cnt) + { + mkeys=0; + max_cnt=ft_cnt[mkeys]=ft_cnt[keynr]; + ft_to_key[mkeys]=ft_to_key[keynr]; + continue; + } + if (max_cnt && ft_cnt[keynr] == max_cnt) + { + mkeys++; + ft_cnt[mkeys]=ft_cnt[keynr]; + ft_to_key[mkeys]=ft_to_key[keynr]; + continue; + } + } + + for (keynr=0 ; keynr <= mkeys ; keynr++) + { + // partial keys doesn't work + if (max_cnt < arg_count-1 || + max_cnt < table->key_info[ft_to_key[keynr]].user_defined_key_parts) + continue; + + key=ft_to_key[keynr]; + + return 0; + } + +err: + if (allows_search_on_non_indexed_columns(table)) + { + key=NO_SUCH_KEY; + return 0; + } + my_message(ER_FT_MATCHING_KEY_NOT_FOUND, + ER(ER_FT_MATCHING_KEY_NOT_FOUND), MYF(0)); + return 1; +} + + +bool Item_func_match::eq(const Item *item, bool binary_cmp) const +{ + if (item->type() != FUNC_ITEM || + ((Item_func*)item)->functype() != FT_FUNC || + match_flags != ((Item_func_match*)item)->match_flags) + return 0; + + Item_func_match *ifm=(Item_func_match*) item; + + if (key == ifm->key && table == ifm->table && + key_item()->eq(ifm->key_item(), binary_cmp)) + return 1; + + return 0; +} + + +double Item_func_match::val_real() +{ + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_func_match::val"); + if (ft_handler == NULL) + DBUG_RETURN(-1.0); + + if (key != NO_SUCH_KEY && table->null_row) /* NULL row from an outer join */ + DBUG_RETURN(0.0); + + if (join_key) + { + if (table->file->ft_handler) + DBUG_RETURN(ft_handler->please->get_relevance(ft_handler)); + join_key=0; + } + + if (key == NO_SUCH_KEY) + { + String *a= concat_ws->val_str(&value); + if ((null_value= (a == 0)) || !a->length()) + DBUG_RETURN(0); + DBUG_RETURN(ft_handler->please->find_relevance(ft_handler, + (uchar *)a->ptr(), a->length())); + } + DBUG_RETURN(ft_handler->please->find_relevance(ft_handler, + table->record[0], 0)); +} + +void Item_func_match::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("(match ")); + print_args(str, 1, query_type); + str->append(STRING_WITH_LEN(" against (")); + args[0]->print(str, query_type); + if (match_flags & FT_BOOL) + str->append(STRING_WITH_LEN(" in boolean mode")); + else if (match_flags & FT_EXPAND) + str->append(STRING_WITH_LEN(" with query expansion")); + str->append(STRING_WITH_LEN("))")); +} + + +class Func_handler_bit_xor_int_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return item->arguments()[0]->to_longlong_null() ^ + item->arguments()[1]->to_longlong_null(); + } +}; + + +class Func_handler_bit_xor_dec_to_ulonglong: + public Item_handled_func::Handler_ulonglong +{ +public: + Longlong_null to_longlong_null(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + return VDec(item->arguments()[0]).to_xlonglong_null() ^ + VDec(item->arguments()[1]).to_xlonglong_null(); + } +}; + + +bool Item_func_bit_xor::fix_length_and_dec(THD *thd) +{ + static const Func_handler_bit_xor_int_to_ulonglong ha_int_to_ull; + static const Func_handler_bit_xor_dec_to_ulonglong ha_dec_to_ull; + return fix_length_and_dec_op2_std(&ha_int_to_ull, &ha_dec_to_ull); +} + + +/*************************************************************************** + System variables +****************************************************************************/ + +/** + Return value of an system variable base[.name] as a constant item. + + @param thd Thread handler + @param var_type global / session + @param name Name of base or system variable + @param component Component. + + @note + If component.str = 0 then the variable name is in 'name' + + @return + - 0 : error + - # : constant item +*/ + + +Item *get_system_var(THD *thd, enum_var_type var_type, + const LEX_CSTRING *name, + const LEX_CSTRING *component) +{ + sys_var *var; + LEX_CSTRING base_name, component_name; + + if (component->str) + { + base_name= *component; + component_name= *name; + } + else + { + base_name= *name; + component_name= *component; // Empty string + } + + if (!(var= find_sys_var(thd, base_name.str, base_name.length))) + return 0; + if (component->str) + { + if (!var->is_struct()) + { + my_error(ER_VARIABLE_IS_NOT_STRUCT, MYF(0), base_name.str); + return 0; + } + } + thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + + set_if_smaller(component_name.length, MAX_SYS_VAR_LENGTH); + + return new (thd->mem_root) Item_func_get_system_var(thd, var, var_type, + &component_name, + NULL, 0); +} + + +longlong Item_func_row_count::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + + return thd->get_row_count_func(); +} + + + + +Item_func_sp::Item_func_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name, const Sp_handler *sph): + Item_func(thd), Item_sp(thd, context_arg, name), m_handler(sph) +{ + set_maybe_null(); +} + + +Item_func_sp::Item_func_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name_arg, const Sp_handler *sph, + List &list): + Item_func(thd, list), Item_sp(thd, context_arg, name_arg), m_handler(sph) +{ + set_maybe_null(); +} + + +void +Item_func_sp::cleanup() +{ + Item_sp::cleanup(); + Item_func::cleanup(); +} + +LEX_CSTRING +Item_func_sp::func_name_cstring() const +{ + return Item_sp::func_name_cstring(current_thd, + m_handler == &sp_handler_package_function); +} + + +void my_missing_function_error(const LEX_CSTRING &token, const char *func_name) +{ + if (token.length && is_lex_native_function (&token)) + my_error(ER_FUNC_INEXISTENT_NAME_COLLISION, MYF(0), func_name); + else + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "FUNCTION", func_name); +} + + +/** + @note + Deterministic stored procedures are considered inexpensive. + Consequently such procedures may be evaluated during optimization, + if they are constant (checked by the optimizer). +*/ + +bool Item_func_sp::is_expensive() +{ + return !m_sp->detistic() || + current_thd->locked_tables_mode < LTM_LOCK_TABLES; +} + + +/** + @brief Initialize local members with values from the Field interface. + + @note called from Item::fix_fields. +*/ + +bool Item_func_sp::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_func_sp::fix_length_and_dec"); + + DBUG_ASSERT(sp_result_field); + Type_std_attributes::set(sp_result_field->type_std_attributes()); + // There is a bug in the line below. See MDEV-11292 for details. + collation.derivation= DERIVATION_COERCIBLE; + set_maybe_null(); + + DBUG_RETURN(FALSE); +} + + +bool +Item_func_sp::execute() +{ + /* Execute function and store the return value in the field. */ + return Item_sp::execute(current_thd, &null_value, args, arg_count); +} + + +void +Item_func_sp::make_send_field(THD *thd, Send_field *tmp_field) +{ + DBUG_ENTER("Item_func_sp::make_send_field"); + DBUG_ASSERT(sp_result_field); + sp_result_field->make_send_field(tmp_field); + if (name.str) + { + DBUG_ASSERT(name.length == strlen(name.str)); + tmp_field->col_name= name; + } + DBUG_VOID_RETURN; +} + + +const Type_handler *Item_func_sp::type_handler() const +{ + DBUG_ENTER("Item_func_sp::type_handler"); + DBUG_PRINT("info", ("m_sp = %p", (void *) m_sp)); + DBUG_ASSERT(sp_result_field); + // This converts ENUM/SET to STRING + const Type_handler *handler= sp_result_field->type_handler(); + DBUG_RETURN(handler->type_handler_for_item_field()); +} + + +longlong Item_func_found_rows::val_int() +{ + DBUG_ASSERT(fixed()); + return current_thd->found_rows(); +} + + +longlong Item_func_oracle_sql_rowcount::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + /* + In case when a query like this: + INSERT a INTO @va FROM t1; + returns multiple rows, SQL%ROWCOUNT should report 1 rather than -1. + */ + longlong rows= thd->get_row_count_func(); + return rows != -1 ? rows : // ROW_COUNT() + thd->found_rows(); // FOUND_ROWS() +} + + +longlong Item_func_sqlcode::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(!null_value); + Diagnostics_area::Sql_condition_iterator it= + current_thd->get_stmt_da()->sql_conditions(); + const Sql_condition *err; + if ((err= it++)) + return err->get_sql_errno(); + return 0; +} + + +bool +Item_func_sp::fix_fields(THD *thd, Item **ref) +{ + bool res; + DBUG_ENTER("Item_func_sp::fix_fields"); + DBUG_ASSERT(fixed() == 0); + sp_head *sp= m_handler->sp_find_routine(thd, m_name, true); + + /* + Checking privileges to execute the function while creating view and + executing the function of select. + */ + if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) || + (thd->lex->sql_command == SQLCOM_CREATE_VIEW)) + { + Security_context *save_security_ctx= thd->security_ctx; + if (context && context->security_ctx) + thd->security_ctx= context->security_ctx; + + /* + If the routine is not found, let's still check EXECUTE_ACL to decide + whether to return "Access denied" or "Routine does not exist". + */ + res= sp ? sp->check_execute_access(thd) : + check_routine_access(thd, EXECUTE_ACL, &m_name->m_db, + &m_name->m_name, + &sp_handler_function, false); + thd->security_ctx= save_security_ctx; + + if (res) + { + process_error(thd); + DBUG_RETURN(res); + } + } + + + /* Custom aggregates are transformed into an Item_sum_sp. We can not do this + earlier as we have no way of knowing what kind of Item we should create + when parsing the query. + + TODO(cvicentiu): See if this limitation can be lifted. + */ + + DBUG_ASSERT(m_sp == NULL); + if (!(m_sp= sp)) + { + my_missing_function_error(m_name->m_name, ErrConvDQName(m_name).ptr()); + process_error(thd); + DBUG_RETURN(TRUE); + } + + /* + We must call init_result_field before Item_func::fix_fields() + to make m_sp and result_field members available to fix_length_and_dec(), + which is called from Item_func::fix_fields(). + */ + res= init_result_field(thd, max_length, maybe_null(), &null_value, &name); + + if (res) + DBUG_RETURN(TRUE); + + if (m_sp->agg_type() == GROUP_AGGREGATE) + { + Item_sum_sp *item_sp; + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + if (arg_count) + { + List list; + for (uint i= 0; i < arg_count; i++) + list.push_back(args[i]); + item_sp= new (thd->mem_root) Item_sum_sp(thd, context, m_name, sp, list); + } + else + item_sp= new (thd->mem_root) Item_sum_sp(thd, context, m_name, sp); + + if (arena) + thd->restore_active_arena(arena, &backup); + if (!item_sp) + DBUG_RETURN(TRUE); + *ref= item_sp; + item_sp->name= name; + bool err= item_sp->fix_fields(thd, ref); + if (err) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); + } + + res= Item_func::fix_fields(thd, ref); + + if (res) + DBUG_RETURN(TRUE); + + if (thd->lex->is_view_context_analysis()) + { + /* + Here we check privileges of the stored routine only during view + creation, in order to validate the view. A runtime check is + performed in Item_func_sp::execute(), and this method is not + called during context analysis. Notice, that during view + creation we do not infer into stored routine bodies and do not + check privileges of its statements, which would probably be a + good idea especially if the view has SQL SECURITY DEFINER and + the used stored procedure has SQL SECURITY DEFINER. + */ + res= sp_check_access(thd); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* + Try to set and restore the security context to see whether it's valid + */ + Security_context *save_secutiry_ctx; + res= set_routine_security_ctx(thd, m_sp, &save_secutiry_ctx); + if (!res) + m_sp->m_security_ctx.restore_security_context(thd, save_secutiry_ctx); + +#endif /* ! NO_EMBEDDED_ACCESS_CHECKS */ + } + + if (!m_sp->detistic()) + { + used_tables_cache |= RAND_TABLE_BIT; + const_item_cache= FALSE; + } + + DBUG_RETURN(res); +} + + +void Item_func_sp::update_used_tables() +{ + Item_func::update_used_tables(); + + if (!m_sp->detistic()) + { + used_tables_cache |= RAND_TABLE_BIT; + const_item_cache= FALSE; + } +} + +bool Item_func_sp::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); +} + +/* + uuid_short handling. + + The short uuid is defined as a longlong that contains the following bytes: + + Bytes Comment + 1 Server_id & 255 + 4 Startup time of server in seconds + 3 Incrementor + + This means that an uuid is guaranteed to be unique + even in a replication environment if the following holds: + + - The last byte of the server id is unique + - If you between two shutdown of the server don't get more than + an average of 2^24 = 16M calls to uuid_short() per second. +*/ + +ulonglong uuid_value; + +void uuid_short_init() +{ + uuid_value= ((((ulonglong) global_system_variables.server_id) << 56) + + (((ulonglong) server_start_time) << 24)); +} + +ulonglong server_uuid_value() +{ + ulonglong val; + mysql_mutex_lock(&LOCK_short_uuid_generator); + val= uuid_value++; + mysql_mutex_unlock(&LOCK_short_uuid_generator); + return val; +} + +longlong Item_func_uuid_short::val_int() +{ + return (longlong) server_uuid_value(); +} + + +/** + Last_value - return last argument. +*/ + +void Item_func_last_value::evaluate_sideeffects() +{ + DBUG_ASSERT(fixed() && arg_count > 0); + for (uint i= 0; i < arg_count-1 ; i++) + args[i]->val_int(); +} + +String *Item_func_last_value::val_str(String *str) +{ + String *tmp; + evaluate_sideeffects(); + tmp= last_value->val_str(str); + null_value= last_value->null_value; + return tmp; +} + + +bool Item_func_last_value::val_native(THD *thd, Native *to) +{ + evaluate_sideeffects(); + return val_native_from_item(thd, last_value, to); +} + + +longlong Item_func_last_value::val_int() +{ + longlong tmp; + evaluate_sideeffects(); + tmp= last_value->val_int(); + null_value= last_value->null_value; + return tmp; +} + +double Item_func_last_value::val_real() +{ + double tmp; + evaluate_sideeffects(); + tmp= last_value->val_real(); + null_value= last_value->null_value; + return tmp; +} + +my_decimal *Item_func_last_value::val_decimal(my_decimal *decimal_value) +{ + my_decimal *tmp; + evaluate_sideeffects(); + tmp= last_value->val_decimal(decimal_value); + null_value= last_value->null_value; + return tmp; +} + + +bool Item_func_last_value::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + evaluate_sideeffects(); + bool tmp= last_value->get_date(thd, ltime, fuzzydate); + null_value= last_value->null_value; + return tmp; +} + + +bool Item_func_last_value::fix_length_and_dec(THD *thd) +{ + last_value= args[arg_count -1]; + Type_std_attributes::set(last_value); + set_maybe_null(last_value->maybe_null()); + return FALSE; +} + + +void Cursor_ref::print_func(String *str, const LEX_CSTRING &func_name) +{ + append_identifier(current_thd, str, &m_cursor_name); + str->append(func_name); +} + + +sp_cursor *Cursor_ref::get_open_cursor_or_error() +{ + THD *thd= current_thd; + sp_cursor *c= thd->spcont->get_cursor(m_cursor_offset); + DBUG_ASSERT(c); + if (!c/*safety*/ || !c->is_open()) + { + my_message(ER_SP_CURSOR_NOT_OPEN, ER_THD(thd, ER_SP_CURSOR_NOT_OPEN), + MYF(0)); + return NULL; + } + return c; +} + + +longlong Item_func_cursor_isopen::val_int() +{ + sp_cursor *c= current_thd->spcont->get_cursor(m_cursor_offset); + DBUG_ASSERT(c != NULL); + return c ? c->is_open() : 0; +} + + +longlong Item_func_cursor_found::val_int() +{ + sp_cursor *c= get_open_cursor_or_error(); + return !(null_value= (!c || c->fetch_count() == 0)) && c->found(); +} + + +longlong Item_func_cursor_notfound::val_int() +{ + sp_cursor *c= get_open_cursor_or_error(); + return !(null_value= (!c || c->fetch_count() == 0)) && !c->found(); +} + + +longlong Item_func_cursor_rowcount::val_int() +{ + sp_cursor *c= get_open_cursor_or_error(); + return !(null_value= !c) ? c->row_count() : 0; +} + +/***************************************************************************** + SEQUENCE functions +*****************************************************************************/ + +longlong Item_func_nextval::val_int() +{ + longlong value; + int error; + const char *key; + uint length= get_table_def_key(table_list, &key); + THD *thd; + SEQUENCE_LAST_VALUE *entry; + char buff[80]; + String key_buff(buff,sizeof(buff), &my_charset_bin); + DBUG_ENTER("Item_func_nextval::val_int"); + update_table(); + DBUG_ASSERT(table && table->s->sequence); + thd= table->in_use; + + if (thd->count_cuted_fields == CHECK_FIELD_EXPRESSION) + { + /* Alter table checking if function works */ + null_value= 0; + DBUG_RETURN(0); + } + + if (table->s->tmp_table != NO_TMP_TABLE) + { + /* + Temporary tables has an extra \0 at end to distinguish it from + normal tables + */ + key_buff.copy(key, length, &my_charset_bin); + key_buff.append((char) 0); + key= key_buff.ptr(); + length++; + } + + if (!(entry= ((SEQUENCE_LAST_VALUE*) + my_hash_search(&thd->sequences, (uchar*) key, length)))) + { + if (!(key= (char*) my_memdup(PSI_INSTRUMENT_ME, key, length, MYF(MY_WME))) || + !(entry= new SEQUENCE_LAST_VALUE((uchar*) key, length))) + { + /* EOM, error given */ + my_free((char*) key); + delete entry; + null_value= 1; + DBUG_RETURN(0); + } + if (my_hash_insert(&thd->sequences, (uchar*) entry)) + { + /* EOM, error given */ + delete entry; + null_value= 1; + DBUG_RETURN(0); + } + } + entry->null_value= null_value= 0; + value= table->s->sequence->next_value(table, 0, &error); + entry->value= value; + entry->set_version(table); + + if (unlikely(error)) // Warning already printed + entry->null_value= null_value= 1; // For not strict mode + DBUG_RETURN(value); +} + + +/* Print for nextval and lastval */ + +void Item_func_nextval::print(String *str, enum_query_type query_type) +{ + char d_name_buff[MAX_ALIAS_NAME], t_name_buff[MAX_ALIAS_NAME]; + LEX_CSTRING d_name= table_list->db; + LEX_CSTRING t_name= table_list->table_name; + bool use_db_name= d_name.str && d_name.str[0]; + THD *thd= current_thd; // Don't trust 'table' + + str->append(func_name_cstring()); + str->append('('); + + /* + for next_val we assume that table_list has been updated to contain + the current db. + */ + + if (lower_case_table_names > 0) + { + strmake(t_name_buff, t_name.str, MAX_ALIAS_NAME-1); + t_name.length= my_casedn_str(files_charset_info, t_name_buff); + t_name.str= t_name_buff; + if (use_db_name) + { + strmake(d_name_buff, d_name.str, MAX_ALIAS_NAME-1); + d_name.length= my_casedn_str(files_charset_info, d_name_buff); + d_name.str= d_name_buff; + } + } + + if (use_db_name) + { + append_identifier(thd, str, &d_name); + str->append('.'); + } + append_identifier(thd, str, &t_name); + str->append(')'); +} + + +/* Return last used value for sequence or NULL if sequence hasn't been used */ + +longlong Item_func_lastval::val_int() +{ + const char *key; + SEQUENCE_LAST_VALUE *entry; + uint length= get_table_def_key(table_list, &key); + THD *thd; + char buff[80]; + String key_buff(buff,sizeof(buff), &my_charset_bin); + DBUG_ENTER("Item_func_lastval::val_int"); + update_table(); + thd= table->in_use; + + if (table->s->tmp_table != NO_TMP_TABLE) + { + /* + Temporary tables has an extra \0 at end to distinguish it from + normal tables + */ + key_buff.copy(key, length, &my_charset_bin); + key_buff.append((char) 0); + key= key_buff.ptr(); + length++; + } + + if (!(entry= ((SEQUENCE_LAST_VALUE*) + my_hash_search(&thd->sequences, (uchar*) key, length)))) + { + /* Sequence not used */ + null_value= 1; + DBUG_RETURN(0); + } + if (entry->check_version(table)) + { + /* Table droped and re-created, remove current version */ + my_hash_delete(&thd->sequences, (uchar*) entry); + null_value= 1; + DBUG_RETURN(0); + } + + null_value= entry->null_value; + DBUG_RETURN(entry->value); +} + + +/* + Sets next value to be returned from sequences + + SELECT setval(foo, 42, 0); Next nextval will return 43 + SELECT setval(foo, 42, 0, true); Same as above + SELECT setval(foo, 42, 0, false); Next nextval will return 42 +*/ + +longlong Item_func_setval::val_int() +{ + longlong value; + int error; + THD *thd; + DBUG_ENTER("Item_func_setval::val_int"); + + update_table(); + DBUG_ASSERT(table && table->s->sequence); + thd= table->in_use; + + if (unlikely(thd->count_cuted_fields == CHECK_FIELD_EXPRESSION)) + { + /* Alter table checking if function works */ + null_value= 0; + DBUG_RETURN(0); + } + + value= nextval; + error= table->s->sequence->set_value(table, nextval, round, is_used); + if (unlikely(error)) + { + null_value= 1; + value= 0; + } + DBUG_RETURN(value); +} + + +/* Print for setval */ + +void Item_func_setval::print(String *str, enum_query_type query_type) +{ + char d_name_buff[MAX_ALIAS_NAME], t_name_buff[MAX_ALIAS_NAME]; + LEX_CSTRING d_name= table_list->db; + LEX_CSTRING t_name= table_list->table_name; + bool use_db_name= d_name.str && d_name.str[0]; + THD *thd= current_thd; // Don't trust 'table' + + str->append(func_name_cstring()); + str->append('('); + + /* + for next_val we assume that table_list has been updated to contain + the current db. + */ + + if (lower_case_table_names > 0) + { + strmake(t_name_buff, t_name.str, MAX_ALIAS_NAME-1); + t_name.length= my_casedn_str(files_charset_info, t_name_buff); + t_name.str= t_name_buff; + if (use_db_name) + { + strmake(d_name_buff, d_name.str, MAX_ALIAS_NAME-1); + d_name.length= my_casedn_str(files_charset_info, d_name_buff); + d_name.str= d_name_buff; + } + } + + if (use_db_name) + { + append_identifier(thd, str, &d_name); + str->append('.'); + } + append_identifier(thd, str, &t_name); + str->append(','); + str->append_longlong(nextval); + str->append(','); + str->append_longlong(is_used); + str->append(','); + str->append_ulonglong(round); + str->append(')'); +} + + +/* + Return how many row combinations has accepted so far + 1 + + The + 1 is to ensure that, for example, 'WHERE ROWNUM <=1' returns one row +*/ + +longlong Item_func_rownum::val_int() +{ + if (!accepted_rows) + { + /* + Rownum is not properly set up. Probably used in wrong context when + it should not be used. In this case returning 0 is probably the best + solution. + */ + return 0; + } + return (longlong) *accepted_rows+1; +} + + +Item_func_rownum::Item_func_rownum(THD *thd): + Item_longlong_func(thd),accepted_rows(0) +{ + /* + Remember the select context. + Add the function to the list fix_after_optimize in the select context + so that we can easily initializef all rownum functions with the pointers + to the row counters. + */ + select= thd->lex->current_select; + select->fix_after_optimize.push_back(this, thd->mem_root); + + /* + Mark that query is using rownum() and ensure that this select is + not merged with other selects + */ + select->with_rownum= 1; + thd->lex->with_rownum= 1; + thd->lex->uncacheable(UNCACHEABLE_RAND); + with_flags= with_flags | item_with_t::ROWNUM_FUNC; + + /* If this command changes data, mark it as unsafe for statement logging */ + if (sql_command_flags[thd->lex->sql_command] & + (CF_UPDATES_DATA | CF_DELETES_DATA)) + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); +} + + +/* + Store a reference to the variable that contains number of accepted rows +*/ + +void Item_func_rownum::fix_after_optimize(THD *thd) +{ + accepted_rows= &select->join->accepted_rows; +} + +/* + Inform all ROWNUM() function where the number of rows are stored +*/ + +void fix_rownum_pointers(THD *thd, SELECT_LEX *select_lex, ha_rows *ptr) +{ + List_iterator li(select_lex->fix_after_optimize); + while (Item *item= li++) + { + if (item->type() == Item::FUNC_ITEM && + ((Item_func*) item)->functype() == Item_func::ROWNUM_FUNC) + ((Item_func_rownum*) item)->store_pointer_to_row_counter(ptr); + } +} diff --git a/sql/item_func.h b/sql/item_func.h new file mode 100644 index 00000000..435875bd --- /dev/null +++ b/sql/item_func.h @@ -0,0 +1,4240 @@ +#ifndef ITEM_FUNC_INCLUDED +#define ITEM_FUNC_INCLUDED +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Function items used by mysql */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#ifdef HAVE_IEEEFP_H +extern "C" /* Bug in BSDI include file */ +{ +#include +} +#endif + +#include "sql_udf.h" // udf_handler +#include "my_decimal.h" // string2my_decimal +#include + + +class Item_func :public Item_func_or_sum +{ + void sync_with_sum_func_and_with_field(List &list); +protected: + virtual bool check_arguments() const + { + return check_argument_types_scalar(0, arg_count); + } + bool check_argument_types_like_args0() const; + bool check_argument_types_scalar(uint start, uint end) const; + bool check_argument_types_traditional_scalar(uint start, uint end) const; + bool check_argument_types_or_binary(const Type_handler *handler, + uint start, uint end) const; + bool check_argument_types_can_return_int(uint start, uint end) const; + bool check_argument_types_can_return_real(uint start, uint end) const; + bool check_argument_types_can_return_str(uint start, uint end) const; + bool check_argument_types_can_return_text(uint start, uint end) const; + bool check_argument_types_can_return_date(uint start, uint end) const; + bool check_argument_types_can_return_time(uint start, uint end) const; + void print_cast_temporal(String *str, enum_query_type query_type); +public: + + table_map not_null_tables_cache; + + enum Functype { UNKNOWN_FUNC,EQ_FUNC,EQUAL_FUNC,NE_FUNC,LT_FUNC,LE_FUNC, + GE_FUNC,GT_FUNC,FT_FUNC, + LIKE_FUNC,ISNULL_FUNC,ISNOTNULL_FUNC, + COND_AND_FUNC, COND_OR_FUNC, XOR_FUNC, + BETWEEN, IN_FUNC, MULT_EQUAL_FUNC, + INTERVAL_FUNC, ISNOTNULLTEST_FUNC, + SP_EQUALS_FUNC, SP_DISJOINT_FUNC,SP_INTERSECTS_FUNC, + SP_TOUCHES_FUNC,SP_CROSSES_FUNC,SP_WITHIN_FUNC, + SP_CONTAINS_FUNC,SP_OVERLAPS_FUNC, + SP_STARTPOINT,SP_ENDPOINT,SP_EXTERIORRING, + SP_POINTN,SP_GEOMETRYN,SP_INTERIORRINGN, SP_RELATE_FUNC, + NOT_FUNC, NOT_ALL_FUNC, TEMPTABLE_ROWID, + NOW_FUNC, NOW_UTC_FUNC, SYSDATE_FUNC, TRIG_COND_FUNC, + SUSERVAR_FUNC, GUSERVAR_FUNC, COLLATE_FUNC, + EXTRACT_FUNC, CHAR_TYPECAST_FUNC, FUNC_SP, UDF_FUNC, + NEG_FUNC, GSYSVAR_FUNC, IN_OPTIMIZER_FUNC, DYNCOL_FUNC, + JSON_EXTRACT_FUNC, JSON_VALID_FUNC, ROWNUM_FUNC, + CASE_SEARCHED_FUNC, // Used by ColumnStore/Spider + CASE_SIMPLE_FUNC, // Used by ColumnStore/spider, + }; + static scalar_comparison_op functype_to_scalar_comparison_op(Functype type) + { + switch (type) { + case EQ_FUNC: return SCALAR_CMP_EQ; + case EQUAL_FUNC: return SCALAR_CMP_EQUAL; + case LT_FUNC: return SCALAR_CMP_LT; + case LE_FUNC: return SCALAR_CMP_LE; + case GE_FUNC: return SCALAR_CMP_GE; + case GT_FUNC: return SCALAR_CMP_GT; + default: break; + } + DBUG_ASSERT(0); + return SCALAR_CMP_EQ; + } + enum Type type() const override { return FUNC_ITEM; } + virtual enum Functype functype() const { return UNKNOWN_FUNC; } + Item_func(THD *thd): Item_func_or_sum(thd) + { + DBUG_ASSERT(with_flags == item_with_t::NONE); + with_flags= item_with_t::NONE; + } + Item_func(THD *thd, Item *a): Item_func_or_sum(thd, a) + { + with_flags= a->with_flags; + } + Item_func(THD *thd, Item *a, Item *b): + Item_func_or_sum(thd, a, b) + { + with_flags= a->with_flags | b->with_flags; + } + Item_func(THD *thd, Item *a, Item *b, Item *c): + Item_func_or_sum(thd, a, b, c) + { + with_flags|= a->with_flags | b->with_flags | c->with_flags; + } + Item_func(THD *thd, Item *a, Item *b, Item *c, Item *d): + Item_func_or_sum(thd, a, b, c, d) + { + with_flags= a->with_flags | b->with_flags | c->with_flags | d->with_flags; + } + Item_func(THD *thd, Item *a, Item *b, Item *c, Item *d, Item* e): + Item_func_or_sum(thd, a, b, c, d, e) + { + with_flags= (a->with_flags | b->with_flags | c->with_flags | d->with_flags | + e->with_flags); + } + Item_func(THD *thd, List &list): + Item_func_or_sum(thd, list) + { + set_arguments(thd, list); + } + // Constructor used for Item_cond_and/or (see Item comment) + Item_func(THD *thd, Item_func *item): + Item_func_or_sum(thd, item), + not_null_tables_cache(item->not_null_tables_cache) + { } + bool fix_fields(THD *, Item **ref) override; + void cleanup() override + { + Item_func_or_sum::cleanup(); + used_tables_and_const_cache_init(); + } + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + void quick_fix_field() override; + table_map not_null_tables() const override; + void update_used_tables() override + { + used_tables_and_const_cache_init(); + used_tables_and_const_cache_update_and_join(arg_count, args); + } + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override; + SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) override + { + DBUG_ENTER("Item_func::get_mm_tree"); + DBUG_RETURN(const_item() ? get_mm_tree_for_const(param) : NULL); + } + bool eq(const Item *item, bool binary_cmp) const override; + virtual Item *key_item() const { return args[0]; } + void set_arguments(THD *thd, List &list) + { + Item_args::set_arguments(thd, list); + sync_with_sum_func_and_with_field(list); + list.empty(); // Fields are used + } + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) override; + void print(String *str, enum_query_type query_type) override; + void print_op(String *str, enum_query_type query_type); + void print_args(String *str, uint from, enum_query_type query_type); + bool is_null() override + { + update_null_value(); + return null_value; + } + String *val_str_from_val_str_ascii(String *str, String *str2); + + void signal_divide_by_null(); + friend class udf_handler; + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) override + { return tmp_table_field_from_field_type(root, table); } + Item *get_tmp_table_item(THD *thd) override; + + void fix_char_length_ulonglong(ulonglong max_char_length_arg) + { + ulonglong max_result_length= max_char_length_arg * + collation.collation->mbmaxlen; + if (max_result_length >= MAX_BLOB_WIDTH) + { + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + } + else + max_length= (uint32) max_result_length; + } + Item *transform(THD *thd, Item_transformer transformer, uchar *arg) override; + Item* compile(THD *thd, Item_analyzer analyzer, uchar **arg_p, + Item_transformer transformer, uchar *arg_t) override; + void traverse_cond(Cond_traverser traverser, + void * arg, traverse_order order) override; + bool eval_not_null_tables(void *opt_arg) override; + bool find_not_null_fields(table_map allowed) override; + // bool is_expensive_processor(void *arg); + // virtual bool is_expensive() { return 0; } + inline void raise_numeric_overflow(const char *type_name) + { + char buf[256]; + String str(buf, sizeof(buf), system_charset_info); + str.length(0); + print(&str, QT_NO_DATA_EXPANSION); + my_error(ER_DATA_OUT_OF_RANGE, MYF(0), type_name, str.c_ptr_safe()); + } + inline double raise_float_overflow() + { + raise_numeric_overflow("DOUBLE"); + return 0.0; + } + inline longlong raise_integer_overflow() + { + raise_numeric_overflow(unsigned_flag ? "BIGINT UNSIGNED": "BIGINT"); + return 0; + } + inline int raise_decimal_overflow() + { + raise_numeric_overflow("DECIMAL"); + return E_DEC_OVERFLOW; + } + /** + Throw an error if the input double number is not finite, i.e. is either + +/-INF or NAN. + */ + inline double check_float_overflow(double value) + { + return std::isfinite(value) ? value : raise_float_overflow(); + } + /** + Throw an error if the input BIGINT value represented by the + (longlong value, bool unsigned flag) pair cannot be returned by the + function, i.e. is not compatible with this Item's unsigned_flag. + */ + inline longlong check_integer_overflow(longlong value, bool val_unsigned) + { + return check_integer_overflow(Longlong_hybrid(value, val_unsigned)); + } + + // Check if the value is compatible with Item::unsigned_flag. + inline longlong check_integer_overflow(const Longlong_hybrid &sval) + { + Longlong_null res= sval.val_int(unsigned_flag); + return res.is_null() ? raise_integer_overflow() : res.value(); + } + + // Check if the value is compatible with Item::unsigned_flag. + longlong check_integer_overflow(const ULonglong_hybrid &uval) + { + Longlong_null res= uval.val_int(unsigned_flag); + return res.is_null() ? raise_integer_overflow() : res.value(); + } + + /** + Throw an error if the error code of a DECIMAL operation is E_DEC_OVERFLOW. + */ + inline int check_decimal_overflow(int error) + { + return (error == E_DEC_OVERFLOW) ? raise_decimal_overflow() : error; + } + + bool has_timestamp_args() + { + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->type() == Item::FIELD_ITEM && + args[i]->field_type() == MYSQL_TYPE_TIMESTAMP) + return TRUE; + } + return FALSE; + } + + bool has_date_args() + { + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->type() == Item::FIELD_ITEM && + (args[i]->field_type() == MYSQL_TYPE_DATE || + args[i]->field_type() == MYSQL_TYPE_DATETIME)) + return TRUE; + } + return FALSE; + } + + bool has_time_args() + { + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->type() == Item::FIELD_ITEM && + (args[i]->field_type() == MYSQL_TYPE_TIME || + args[i]->field_type() == MYSQL_TYPE_DATETIME)) + return TRUE; + } + return FALSE; + } + + bool has_datetime_args() + { + DBUG_ASSERT(fixed()); + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->type() == Item::FIELD_ITEM && + args[i]->field_type() == MYSQL_TYPE_DATETIME) + return TRUE; + } + return FALSE; + } + + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + /* + By default only substitution for a field whose two different values + are never equal is allowed in the arguments of a function. + This is overruled for the direct arguments of comparison functions. + */ + Item_args::propagate_equal_fields(thd, Context_identity(), cond); + return this; + } + + bool has_rand_bit() + { + return used_tables() & RAND_TABLE_BIT; + } + + bool excl_dep_on_table(table_map tab_map) override + { + if (used_tables() & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) + return false; + return !(used_tables() & ~tab_map) || + Item_args::excl_dep_on_table(tab_map); + } + + bool excl_dep_on_grouping_fields(st_select_lex *sel) override + { + if (has_rand_bit() || with_subquery()) + return false; + return Item_args::excl_dep_on_grouping_fields(sel); + } + + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) override + { + return Item_args::excl_dep_on_in_subq_left_part(subq_pred); + } + + /* + We assume the result of any function that has a TIMESTAMP argument to be + timezone-dependent, since a TIMESTAMP value in both numeric and string + contexts is interpreted according to the current timezone. + The only exception is UNIX_TIMESTAMP() which returns the internal + representation of a TIMESTAMP argument verbatim, and thus does not depend on + the timezone. + */ + bool check_valid_arguments_processor(void *bool_arg) override + { + return has_timestamp_args(); + } + + bool find_function_processor (void *arg) override + { + return functype() == *(Functype *) arg; + } + + void no_rows_in_result() override + { + for (uint i= 0; i < arg_count; i++) + { + args[i]->no_rows_in_result(); + } + } + void restore_to_before_no_rows_in_result() override + { + for (uint i= 0; i < arg_count; i++) + { + args[i]->restore_to_before_no_rows_in_result(); + } + } + void convert_const_compared_to_int_field(THD *thd); + /** + Prepare arguments and setup a comparator. + Used in Item_func_xxx with two arguments and a comparator, + e.g. Item_bool_func2 and Item_func_nullif. + args[0] or args[1] can be modified: + - converted to character set and collation of the operation + - or replaced to an Item_int_with_ref + */ + bool setup_args_and_comparator(THD *thd, Arg_comparator *cmp); + Item_func *get_item_func() override { return this; } + bool is_simplified_cond_processor(void *arg) override + { return const_item() && !val_int(); } +}; + + +class Item_real_func :public Item_func +{ +public: + Item_real_func(THD *thd): Item_func(thd) { collation= DTCollation_numeric(); } + Item_real_func(THD *thd, Item *a): Item_func(thd, a) + { collation= DTCollation_numeric(); } + Item_real_func(THD *thd, Item *a, Item *b): Item_func(thd, a, b) + { collation= DTCollation_numeric(); } + Item_real_func(THD *thd, List &list): Item_func(thd, list) + { collation= DTCollation_numeric(); } + String *val_str(String*str) override; + my_decimal *val_decimal(my_decimal *decimal_value) override; + longlong val_int() override + { + DBUG_ASSERT(fixed()); + return Converter_double_to_longlong(val_real(), unsigned_flag).result(); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_real(thd, ltime, fuzzydate); } + const Type_handler *type_handler() const override + { return &type_handler_double; } + bool fix_length_and_dec(THD *thd) override + { + decimals= NOT_FIXED_DEC; + max_length= float_length(decimals); + return FALSE; + } +}; + + +/** + Functions whose returned field type is determined at fix_fields() time. +*/ +class Item_hybrid_func: public Item_func, + public Type_handler_hybrid_field_type +{ +protected: + bool fix_attributes(Item **item, uint nitems); +public: + Item_hybrid_func(THD *thd): Item_func(thd) { } + Item_hybrid_func(THD *thd, Item *a): Item_func(thd, a) { } + Item_hybrid_func(THD *thd, Item *a, Item *b): Item_func(thd, a, b) { } + Item_hybrid_func(THD *thd, Item *a, Item *b, Item *c): + Item_func(thd, a, b, c) { } + Item_hybrid_func(THD *thd, List &list): Item_func(thd, list) { } + Item_hybrid_func(THD *thd, Item_hybrid_func *item) + :Item_func(thd, item), Type_handler_hybrid_field_type(item) { } + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + void fix_length_and_dec_long_or_longlong(uint char_length, bool unsigned_arg) + { + collation= DTCollation_numeric(); + unsigned_flag= unsigned_arg; + max_length= char_length; + set_handler(Type_handler::type_handler_long_or_longlong(char_length, + unsigned_arg)); + } + void fix_length_and_dec_ulong_or_ulonglong_by_nbits(uint nbits) + { + uint digits= Type_handler_bit::Bit_decimal_notation_int_digits_by_nbits(nbits); + collation= DTCollation_numeric(); + unsigned_flag= true; + max_length= digits; + if (nbits > 32) + set_handler(&type_handler_ulonglong); + else + set_handler(&type_handler_ulong); + } +}; + + +class Item_handled_func: public Item_func +{ +public: + class Handler + { + public: + virtual ~Handler() = default; + virtual String *val_str(Item_handled_func *, String *) const= 0; + virtual String *val_str_ascii(Item_handled_func *, String *) const= 0; + virtual double val_real(Item_handled_func *) const= 0; + virtual longlong val_int(Item_handled_func *) const= 0; + virtual my_decimal *val_decimal(Item_handled_func *, my_decimal *) const= 0; + virtual bool get_date(THD *thd, Item_handled_func *, MYSQL_TIME *, date_mode_t fuzzydate) const= 0; + virtual bool val_native(THD *thd, Item_handled_func *, Native *to) const + { + DBUG_ASSERT(0); + to->length(0); + return true; + } + virtual const Type_handler * + return_type_handler(const Item_handled_func *item) const= 0; + virtual const Type_handler * + type_handler_for_create_select(const Item_handled_func *item) const + { + return return_type_handler(item); + } + virtual bool fix_length_and_dec(Item_handled_func *) const= 0; + }; + + class Handler_str: public Handler + { + public: + String *val_str_ascii(Item_handled_func *item, String *str) const + { + return item->Item::val_str_ascii(str); + } + double val_real(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + StringBuffer<64> tmp; + String *res= item->val_str(&tmp); + return res ? item->double_from_string_with_check(res) : 0.0; + } + longlong val_int(Item_handled_func *item) const + { + DBUG_ASSERT(item->fixed()); + StringBuffer<22> tmp; + String *res= item->val_str(&tmp); + return res ? item->longlong_from_string_with_check(res) : 0; + } + my_decimal *val_decimal(Item_handled_func *item, my_decimal *to) const + { + return item->val_decimal_from_string(to); + } + bool get_date(THD *thd, Item_handled_func *item, MYSQL_TIME *to, + date_mode_t fuzzydate) const + { + return item->get_date_from_string(thd, to, fuzzydate); + } + }; + + /** + Abstract class for functions returning TIME, DATE, DATETIME or string values, + whose data type depends on parameters and is set at fix_fields time. + */ + class Handler_temporal: public Handler + { + public: + String *val_str(Item_handled_func *item, String *to) const + { + StringBuffer ascii_buf; + return item->val_str_from_val_str_ascii(to, &ascii_buf); + } + }; + + /** + Abstract class for functions returning strings, + which are generated from get_date() results, + when get_date() can return different MYSQL_TIMESTAMP_XXX per row. + */ + class Handler_temporal_string: public Handler_temporal + { + public: + const Type_handler *return_type_handler(const Item_handled_func *) const + { + return &type_handler_string; + } + const Type_handler * + type_handler_for_create_select(const Item_handled_func *item) const + { + return return_type_handler(item)->type_handler_for_tmp_table(item); + } + double val_real(Item_handled_func *item) const + { + return Temporal_hybrid(item).to_double(); + } + longlong val_int(Item_handled_func *item) const + { + return Temporal_hybrid(item).to_longlong(); + } + my_decimal *val_decimal(Item_handled_func *item, my_decimal *to) const + { + return Temporal_hybrid(item).to_decimal(to); + } + String *val_str_ascii(Item_handled_func *item, String *to) const + { + return Temporal_hybrid(item).to_string(to, item->decimals); + } + }; + + + class Handler_date: public Handler_temporal + { + public: + const Type_handler *return_type_handler(const Item_handled_func *) const + { + return &type_handler_newdate; + } + bool fix_length_and_dec(Item_handled_func *item) const + { + item->fix_attributes_date(); + return false; + } + double val_real(Item_handled_func *item) const + { + return Date(item).to_double(); + } + longlong val_int(Item_handled_func *item) const + { + return Date(item).to_longlong(); + } + my_decimal *val_decimal(Item_handled_func *item, my_decimal *to) const + { + return Date(item).to_decimal(to); + } + String *val_str_ascii(Item_handled_func *item, String *to) const + { + return Date(item).to_string(to); + } + }; + + + class Handler_time: public Handler_temporal + { + public: + const Type_handler *return_type_handler(const Item_handled_func *) const + { + return &type_handler_time2; + } + double val_real(Item_handled_func *item) const + { + return Time(item).to_double(); + } + longlong val_int(Item_handled_func *item) const + { + return Time(item).to_longlong(); + } + my_decimal *val_decimal(Item_handled_func *item, my_decimal *to) const + { + return Time(item).to_decimal(to); + } + String *val_str_ascii(Item_handled_func *item, String *to) const + { + return Time(item).to_string(to, item->decimals); + } + bool val_native(THD *thd, Item_handled_func *item, Native *to) const + { + return Time(thd, item).to_native(to, item->decimals); + } + }; + + + class Handler_datetime: public Handler_temporal + { + public: + const Type_handler *return_type_handler(const Item_handled_func *) const + { + return &type_handler_datetime2; + } + double val_real(Item_handled_func *item) const + { + return Datetime(item).to_double(); + } + longlong val_int(Item_handled_func *item) const + { + return Datetime(item).to_longlong(); + } + my_decimal *val_decimal(Item_handled_func *item, my_decimal *to) const + { + return Datetime(item).to_decimal(to); + } + String *val_str_ascii(Item_handled_func *item, String *to) const + { + return Datetime(item).to_string(to, item->decimals); + } + }; + + + class Handler_int: public Handler + { + public: + String *val_str(Item_handled_func *item, String *to) const + { + longlong nr= val_int(item); + if (item->null_value) + return 0; + to->set_int(nr, item->unsigned_flag, item->collation.collation); + return to; + } + String *val_str_ascii(Item_handled_func *item, String *to) const + { + return item->Item::val_str_ascii(to); + } + double val_real(Item_handled_func *item) const + { + return item->unsigned_flag ? (double) ((ulonglong) val_int(item)) : + (double) val_int(item); + } + my_decimal *val_decimal(Item_handled_func *item, my_decimal *to) const + { + return item->val_decimal_from_int(to); + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzydate) const + { + return item->get_date_from_int(thd, to, fuzzydate); + } + longlong val_int(Item_handled_func *item) const + { + Longlong_null tmp= to_longlong_null(item); + item->null_value= tmp.is_null(); + return tmp.value(); + } + virtual Longlong_null to_longlong_null(Item_handled_func *item) const= 0; + }; + + class Handler_slong: public Handler_int + { + public: + const Type_handler *return_type_handler(const Item_handled_func *item) const + { + return &type_handler_slong; + } + bool fix_length_and_dec(Item_handled_func *item) const + { + item->unsigned_flag= false; + item->collation= DTCollation_numeric(); + item->fix_char_length(11); + return false; + } + }; + + class Handler_slong2: public Handler_slong + { + public: + bool fix_length_and_dec(Item_handled_func *func) const + { + bool rc= Handler_slong::fix_length_and_dec(func); + func->max_length= 2; + return rc; + } + }; + + class Handler_ulonglong: public Handler_int + { + public: + const Type_handler *return_type_handler(const Item_handled_func *item) const + { + return &type_handler_ulonglong; + } + bool fix_length_and_dec(Item_handled_func *item) const + { + item->unsigned_flag= true; + item->collation= DTCollation_numeric(); + item->fix_char_length(21); + return false; + } + }; + +protected: + const Handler *m_func_handler; +public: + Item_handled_func(THD *thd, Item *a) + :Item_func(thd, a), m_func_handler(NULL) { } + Item_handled_func(THD *thd, Item *a, Item *b) + :Item_func(thd, a, b), m_func_handler(NULL) { } + void set_func_handler(const Handler *handler) + { + m_func_handler= handler; + } + const Type_handler *type_handler() const override + { + return m_func_handler->return_type_handler(this); + } + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) override + { + DBUG_ASSERT(fixed()); + const Type_handler *h= m_func_handler->type_handler_for_create_select(this); + return h->make_and_init_table_field(root, &name, + Record_addr(maybe_null()), + *this, table); + } + String *val_str(String *to) override + { + return m_func_handler->val_str(this, to); + } + String *val_str_ascii(String *to) override + { + return m_func_handler->val_str_ascii(this, to); + } + double val_real() override + { + return m_func_handler->val_real(this); + } + longlong val_int() override + { + return m_func_handler->val_int(this); + } + my_decimal *val_decimal(my_decimal *to) override + { + return m_func_handler->val_decimal(this, to); + } + bool get_date(THD *thd, MYSQL_TIME *to, date_mode_t fuzzydate) override + { + return m_func_handler->get_date(thd, this, to, fuzzydate); + } + bool val_native(THD *thd, Native *to) override + { + return m_func_handler->val_native(thd, this, to); + } +}; + + +/** + Functions that at fix_fields() time determine the returned field type, + trying to preserve the exact data type of the arguments. + + The descendants have to implement "native" value methods, + i.e. str_op(), date_op(), int_op(), real_op(), decimal_op(). + fix_fields() chooses which of the above value methods will be + used during execution time, according to the returned field type. + + For example, if fix_fields() determines that the returned value type + is MYSQL_TYPE_LONG, then: + - int_op() is chosen as the execution time native method. + - val_int() returns the result of int_op() as is. + - all other methods, i.e. val_real(), val_decimal(), val_str(), get_date(), + call int_op() first, then convert the result to the requested data type. +*/ +class Item_func_hybrid_field_type: public Item_hybrid_func +{ + /* + Helper methods to make sure that the result of + decimal_op(), str_op() and date_op() is properly synched with null_value. + */ + bool date_op_with_null_check(THD *thd, MYSQL_TIME *ltime) + { + bool rc= date_op(thd, ltime, date_mode_t(0)); + DBUG_ASSERT(!rc ^ null_value); + return rc; + } + bool time_op_with_null_check(THD *thd, MYSQL_TIME *ltime) + { + bool rc= time_op(thd, ltime); + DBUG_ASSERT(!rc ^ null_value); + DBUG_ASSERT(rc || ltime->time_type == MYSQL_TIMESTAMP_TIME); + return rc; + } + String *str_op_with_null_check(String *str) + { + String *res= str_op(str); + DBUG_ASSERT((res != NULL) ^ null_value); + return res; + } + +public: + // Value methods that involve no conversion + String *val_str_from_str_op(String *str) + { + return str_op_with_null_check(&str_value); + } + longlong val_int_from_int_op() + { + return int_op(); + } + double val_real_from_real_op() + { + return real_op(); + } + + // Value methods that involve conversion + String *val_str_from_real_op(String *str); + String *val_str_from_int_op(String *str); + String *val_str_from_date_op(String *str); + String *val_str_from_time_op(String *str); + + my_decimal *val_decimal_from_str_op(my_decimal *dec); + my_decimal *val_decimal_from_real_op(my_decimal *dec); + my_decimal *val_decimal_from_int_op(my_decimal *dec); + my_decimal *val_decimal_from_date_op(my_decimal *dec); + my_decimal *val_decimal_from_time_op(my_decimal *dec); + + longlong val_int_from_str_op(); + longlong val_int_from_real_op(); + longlong val_int_from_date_op(); + longlong val_int_from_time_op(); + + double val_real_from_str_op(); + double val_real_from_date_op(); + double val_real_from_time_op(); + double val_real_from_int_op(); + +public: + Item_func_hybrid_field_type(THD *thd): + Item_hybrid_func(thd) + { collation= DTCollation_numeric(); } + Item_func_hybrid_field_type(THD *thd, Item *a): + Item_hybrid_func(thd, a) + { collation= DTCollation_numeric(); } + Item_func_hybrid_field_type(THD *thd, Item *a, Item *b): + Item_hybrid_func(thd, a, b) + { collation= DTCollation_numeric(); } + Item_func_hybrid_field_type(THD *thd, Item *a, Item *b, Item *c): + Item_hybrid_func(thd, a, b, c) + { collation= DTCollation_numeric(); } + Item_func_hybrid_field_type(THD *thd, List &list): + Item_hybrid_func(thd, list) + { collation= DTCollation_numeric(); } + + double val_real() override + { + DBUG_ASSERT(fixed()); + return Item_func_hybrid_field_type::type_handler()-> + Item_func_hybrid_field_type_val_real(this); + } + longlong val_int() override + { + DBUG_ASSERT(fixed()); + return Item_func_hybrid_field_type::type_handler()-> + Item_func_hybrid_field_type_val_int(this); + } + my_decimal *val_decimal(my_decimal *dec) override + { + DBUG_ASSERT(fixed()); + return Item_func_hybrid_field_type::type_handler()-> + Item_func_hybrid_field_type_val_decimal(this, dec); + } + String *val_str(String*str) override + { + DBUG_ASSERT(fixed()); + String *res= Item_func_hybrid_field_type::type_handler()-> + Item_func_hybrid_field_type_val_str(this, str); + DBUG_ASSERT(null_value == (res == NULL)); + return res; + } + bool get_date(THD *thd, MYSQL_TIME *to, date_mode_t mode) override + { + DBUG_ASSERT(fixed()); + return Item_func_hybrid_field_type::type_handler()-> + Item_func_hybrid_field_type_get_date_with_warn(thd, this, to, mode); + } + + bool val_native(THD *thd, Native *to) override + { + DBUG_ASSERT(fixed()); + return native_op(thd, to); + } + + /** + @brief Performs the operation that this functions implements when the + result type is INT. + + @return The result of the operation. + */ + virtual longlong int_op()= 0; + Longlong_null to_longlong_null_op() + { + longlong nr= int_op(); + /* + C++ does not guarantee the order of parameter evaluation, + so to make sure "null_value" is passed to the constructor + after the int_op() call, int_op() is caled on a separate line. + */ + return Longlong_null(nr, null_value); + } + Longlong_hybrid_null to_longlong_hybrid_null_op() + { + return Longlong_hybrid_null(to_longlong_null_op(), unsigned_flag); + } + + /** + @brief Performs the operation that this functions implements when the + result type is REAL. + + @return The result of the operation. + */ + virtual double real_op()= 0; + Double_null to_double_null_op() + { + // val_real() must be caleed on a separate line. See to_longlong_null() + double nr= real_op(); + return Double_null(nr, null_value); + } + + /** + @brief Performs the operation that this functions implements when the + result type is DECIMAL. + + @param A pointer where the DECIMAL value will be allocated. + @return + - 0 If the result is NULL + - The same pointer it was given, with the area initialized to the + result of the operation. + */ + virtual my_decimal *decimal_op(my_decimal *)= 0; + + /** + @brief Performs the operation that this functions implements when the + result type is a string type. + + @return The result of the operation. + */ + virtual String *str_op(String *)= 0; + + /** + @brief Performs the operation that this functions implements when + field type is DATETIME or DATE. + @return The result of the operation. + */ + virtual bool date_op(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate)= 0; + + /** + @brief Performs the operation that this functions implements when + field type is TIME. + @return The result of the operation. + */ + virtual bool time_op(THD *thd, MYSQL_TIME *res)= 0; + + virtual bool native_op(THD *thd, Native *native)= 0; +}; + + +/* + This class resembles SQL standard CASE-alike expressions: + CASE and its abbreviations COALESCE, NULLIF, IFNULL, IF. + + ::= + | +*/ +class Item_func_case_expression: public Item_func_hybrid_field_type +{ +public: + Item_func_case_expression(THD *thd) + :Item_func_hybrid_field_type(thd) + { } + Item_func_case_expression(THD *thd, Item *a) + :Item_func_hybrid_field_type(thd, a) + { } + Item_func_case_expression(THD *thd, Item *a, Item *b) + :Item_func_hybrid_field_type(thd, a, b) + { } + Item_func_case_expression(THD *thd, Item *a, Item *b, Item *c) + :Item_func_hybrid_field_type(thd, a, b, c) + { } + Item_func_case_expression(THD *thd, List &list): + Item_func_hybrid_field_type(thd, list) + { } + bool find_not_null_fields(table_map allowed) { return false; } +}; + + +class Item_func_numhybrid: public Item_func_hybrid_field_type +{ +protected: + + inline void fix_decimals() + { + DBUG_ASSERT(result_type() == DECIMAL_RESULT); + if (decimals == NOT_FIXED_DEC) + set_if_smaller(decimals, max_length - 1); + } + +public: + Item_func_numhybrid(THD *thd): Item_func_hybrid_field_type(thd) + { } + Item_func_numhybrid(THD *thd, Item *a): Item_func_hybrid_field_type(thd, a) + { } + Item_func_numhybrid(THD *thd, Item *a, Item *b): + Item_func_hybrid_field_type(thd, a, b) + { } + Item_func_numhybrid(THD *thd, Item *a, Item *b, Item *c): + Item_func_hybrid_field_type(thd, a, b, c) + { } + Item_func_numhybrid(THD *thd, List &list): + Item_func_hybrid_field_type(thd, list) + { } + String *str_op(String *str) { DBUG_ASSERT(0); return 0; } + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + DBUG_ASSERT(0); + return true; + } + bool time_op(THD *thd, MYSQL_TIME *ltime) + { + DBUG_ASSERT(0); + return true; + } + bool native_op(THD *thd, Native *to) + { + DBUG_ASSERT(0); + return true; + } +}; + + +/* function where type of result detected by first argument */ +class Item_func_num1: public Item_func_numhybrid +{ +public: + Item_func_num1(THD *thd, Item *a): Item_func_numhybrid(thd, a) {} + Item_func_num1(THD *thd, Item *a, Item *b): Item_func_numhybrid(thd, a, b) {} + bool check_partition_func_processor(void *int_arg) { return FALSE; } + bool check_vcol_func_processor(void *arg) { return FALSE; } +}; + + +/* Base class for operations like '+', '-', '*' */ +class Item_num_op :public Item_func_numhybrid +{ +protected: + bool check_arguments() const override + { + return false; // Checked by aggregate_for_num_op() + } +public: + Item_num_op(THD *thd, Item *a, Item *b): Item_func_numhybrid(thd, a, b) {} + virtual void result_precision()= 0; + + void print(String *str, enum_query_type query_type) override + { + print_op(str, query_type); + } + bool fix_type_handler(const Type_aggregator *aggregator); + void fix_length_and_dec_double() + { + aggregate_numeric_attributes_real(args, arg_count); + max_length= float_length(decimals); + } + void fix_length_and_dec_decimal() + { + unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag; + result_precision(); + fix_decimals(); + } + void fix_length_and_dec_int() + { + unsigned_flag= args[0]->unsigned_flag | args[1]->unsigned_flag; + result_precision(); + decimals= 0; + set_handler(type_handler_long_or_longlong()); + } + void fix_length_and_dec_temporal(bool downcast_decimal_to_int) + { + set_handler(&type_handler_newdecimal); + fix_length_and_dec_decimal(); + if (decimals == 0 && downcast_decimal_to_int) + set_handler(type_handler_long_or_longlong()); + } + bool need_parentheses_in_default() override { return true; } +}; + + +class Item_int_func :public Item_func +{ +public: + /* + QQ: shouldn't 20 characters be enough: + Max unsigned = 18,446,744,073,709,551,615 = 20 digits, 20 characters + Max signed = 9,223,372,036,854,775,807 = 19 digits, 19 characters + Min signed = -9,223,372,036,854,775,808 = 19 digits, 20 characters + */ + Item_int_func(THD *thd): Item_func(thd) + { collation= DTCollation_numeric(); fix_char_length(21); } + Item_int_func(THD *thd, Item *a): Item_func(thd, a) + { collation= DTCollation_numeric(); fix_char_length(21); } + Item_int_func(THD *thd, Item *a, Item *b): Item_func(thd, a, b) + { collation= DTCollation_numeric(); fix_char_length(21); } + Item_int_func(THD *thd, Item *a, Item *b, Item *c): Item_func(thd, a, b, c) + { collation= DTCollation_numeric(); fix_char_length(21); } + Item_int_func(THD *thd, Item *a, Item *b, Item *c, Item *d): + Item_func(thd, a, b, c, d) + { collation= DTCollation_numeric(); fix_char_length(21); } + Item_int_func(THD *thd, List &list): Item_func(thd, list) + { collation= DTCollation_numeric(); fix_char_length(21); } + Item_int_func(THD *thd, Item_int_func *item) :Item_func(thd, item) + { collation= DTCollation_numeric(); } + double val_real() override; + String *val_str(String*str) override; + my_decimal *val_decimal(my_decimal *decimal_value) override + { + return val_decimal_from_int(decimal_value); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_int(thd, ltime, fuzzydate); } + const Type_handler *type_handler() const override= 0; + bool fix_length_and_dec(THD *thd) override { return FALSE; } +}; + + +class Item_long_func: public Item_int_func +{ +public: + Item_long_func(THD *thd): Item_int_func(thd) { } + Item_long_func(THD *thd, Item *a): Item_int_func(thd, a) {} + Item_long_func(THD *thd, Item *a, Item *b): Item_int_func(thd, a, b) {} + Item_long_func(THD *thd, Item *a, Item *b, Item *c): Item_int_func(thd, a, b, c) {} + Item_long_func(THD *thd, List &list): Item_int_func(thd, list) { } + Item_long_func(THD *thd, Item_long_func *item) :Item_int_func(thd, item) {} + const Type_handler *type_handler() const override + { + if (unsigned_flag) + return &type_handler_ulong; + return &type_handler_slong; + } + bool fix_length_and_dec(THD *thd) override { max_length= 11; return FALSE; } +}; + + +class Item_func_hash: public Item_int_func +{ +public: + Item_func_hash(THD *thd, List &item): Item_int_func(thd, item) + {} + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override; + const Type_handler *type_handler() const override + { return &type_handler_slong; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("") }; + return name; + } +}; + +class Item_func_hash_mariadb_100403: public Item_func_hash +{ +public: + Item_func_hash_mariadb_100403(THD *thd, List &item) + :Item_func_hash(thd, item) + {} + longlong val_int(); + Item *get_copy(THD *thd) + { return get_item_copy(thd, this); } + const char *func_name() const { return ""; } +}; + +class Item_longlong_func: public Item_int_func +{ +public: + Item_longlong_func(THD *thd): Item_int_func(thd) { } + Item_longlong_func(THD *thd, Item *a): Item_int_func(thd, a) {} + Item_longlong_func(THD *thd, Item *a, Item *b): Item_int_func(thd, a, b) {} + Item_longlong_func(THD *thd, Item *a, Item *b, Item *c): Item_int_func(thd, a, b, c) {} + Item_longlong_func(THD *thd, Item *a, Item *b, Item *c, Item *d): + Item_int_func(thd, a, b, c, d) {} + Item_longlong_func(THD *thd, List &list): Item_int_func(thd, list) { } + Item_longlong_func(THD *thd, Item_longlong_func *item) :Item_int_func(thd, item) {} + const Type_handler *type_handler() const override + { + if (unsigned_flag) + return &type_handler_ulonglong; + return &type_handler_slonglong; + } +}; + + +class Cursor_ref +{ +protected: + LEX_CSTRING m_cursor_name; + uint m_cursor_offset; + class sp_cursor *get_open_cursor_or_error(); + Cursor_ref(const LEX_CSTRING *name, uint offset) + :m_cursor_name(*name), m_cursor_offset(offset) + { } + void print_func(String *str, const LEX_CSTRING &func_name); +}; + + + +class Item_func_cursor_rowcount: public Item_longlong_func, + public Cursor_ref +{ +public: + Item_func_cursor_rowcount(THD *thd, const LEX_CSTRING *name, uint offset) + :Item_longlong_func(thd), Cursor_ref(name, offset) + { + set_maybe_null(); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("%ROWCOUNT") }; + return name; + } + longlong val_int() override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), arg, VCOL_SESSION_FUNC); + } + void print(String *str, enum_query_type query_type) override + { + return Cursor_ref::print_func(str, func_name_cstring()); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + + +class Item_func_connection_id :public Item_long_func +{ + longlong value; + +public: + Item_func_connection_id(THD *thd): Item_long_func(thd) { unsigned_flag=1; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("connection_id") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + bool fix_fields(THD *thd, Item **ref) override; + longlong val_int() override { DBUG_ASSERT(fixed()); return value; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_SESSION_FUNC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_signed :public Item_int_func +{ +public: + Item_func_signed(THD *thd, Item *a): Item_int_func(thd, a) + { + unsigned_flag= 0; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_signed") }; + return name; + } + const Type_handler *type_handler() const override + { + return Type_handler::type_handler_long_or_longlong(max_char_length(), + false); + } + longlong val_int() override + { + longlong value= args[0]->val_int_signed_typecast(); + null_value= args[0]->null_value; + return value; + } + void fix_length_and_dec_double() + { + fix_char_length(MAX_BIGINT_WIDTH); + } + void fix_length_and_dec_generic() + { + uint32 char_length= MY_MIN(args[0]->max_char_length(), + MY_INT64_NUM_DECIMAL_DIGITS); + /* + args[0]->max_char_length() can return 0. + Reserve max_length to fit at least one character for one digit, + plus one character for the sign (if signed). + */ + set_if_bigger(char_length, 1U + (unsigned_flag ? 0 : 1)); + fix_char_length(char_length); + } + void fix_length_and_dec_string() + { + /* + For strings, use decimal_int_part() instead of max_char_length(). + This is important for Item_hex_hybrid: + SELECT CAST(0x1FFFFFFFF AS SIGNED); + Length is 5, decimal_int_part() is 13. + */ + uint32 char_length= MY_MIN(args[0]->decimal_int_part(), + MY_INT64_NUM_DECIMAL_DIGITS); + set_if_bigger(char_length, 1U + (unsigned_flag ? 0 : 1)); + fix_char_length(char_length); + } + bool fix_length_and_dec(THD *thd) override + { + return args[0]->type_handler()->Item_func_signed_fix_length_and_dec(this); + } + void print(String *str, enum_query_type query_type) override; + decimal_digits_t decimal_precision() const override + { return args[0]->decimal_precision(); } + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_unsigned :public Item_func_signed +{ +public: + Item_func_unsigned(THD *thd, Item *a): Item_func_signed(thd, a) + { + unsigned_flag= 1; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_unsigned") }; + return name; + } + const Type_handler *type_handler() const override + { + if (max_char_length() <= MY_INT32_NUM_DECIMAL_DIGITS - 1) + return &type_handler_ulong; + return &type_handler_ulonglong; + } + longlong val_int() override + { + longlong value= args[0]->val_int_unsigned_typecast(); + null_value= args[0]->null_value; + return value; + } + bool fix_length_and_dec(THD *thd) override + { + return args[0]->type_handler()->Item_func_unsigned_fix_length_and_dec(this); + } + decimal_digits_t decimal_precision() const override { return max_length; } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_decimal_typecast :public Item_func +{ + my_decimal decimal_value; +public: + Item_decimal_typecast(THD *thd, Item *a, uint len, decimal_digits_t dec) + :Item_func(thd, a) + { + decimals= dec; + collation= DTCollation_numeric(); + fix_char_length(my_decimal_precision_to_length_no_truncation(len, dec, + unsigned_flag)); + } + String *val_str(String *str) override { return VDec(this).to_string(str); } + double val_real() override { return VDec(this).to_double(); } + longlong val_int() override { return VDec(this).to_longlong(unsigned_flag); } + my_decimal *val_decimal(my_decimal*) override; + bool get_date(THD *thd, MYSQL_TIME *to, date_mode_t mode) override + { + return decimal_to_datetime_with_warn(thd, VDec(this).ptr(), to, mode, + NULL, NULL); + } + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } + void fix_length_and_dec_generic() {} + bool fix_length_and_dec(THD *thd) override + { + return + args[0]->type_handler()->Item_decimal_typecast_fix_length_and_dec(this); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("decimal_typecast") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_real_typecast: public Item_real_func +{ +protected: + double val_real_with_truncate(double max_value); +public: + Item_real_typecast(THD *thd, Item *a, uint len, uint dec) + :Item_real_func(thd, a) + { + decimals= (uint8) dec; + max_length= (uint32) len; + } + bool need_parentheses_in_default() { return true; } + void print(String *str, enum_query_type query_type); + void fix_length_and_dec_generic() + { + set_maybe_null(); + } +}; + + +class Item_float_typecast :public Item_real_typecast +{ +public: + Item_float_typecast(THD *thd, Item *a) + :Item_real_typecast(thd, a, MAX_FLOAT_STR_LENGTH, NOT_FIXED_DEC) + { } + const Type_handler *type_handler() const override + { return &type_handler_float; } + bool fix_length_and_dec(THD *thd) override + { + return + args[0]->type_handler()->Item_float_typecast_fix_length_and_dec(this); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("float_typecast") }; + return name; + } + double val_real() override + { + return (double) (float) val_real_with_truncate(FLT_MAX); + } + String *val_str(String*str) override + { + Float nr(Item_float_typecast::val_real()); + if (null_value) + return 0; + nr.to_string(str, decimals); + return str; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_double_typecast :public Item_real_typecast +{ +public: + Item_double_typecast(THD *thd, Item *a, uint len, uint dec): + Item_real_typecast(thd, a, len, dec) + { } + bool fix_length_and_dec(THD *thd) override + { + return + args[0]->type_handler()->Item_double_typecast_fix_length_and_dec(this); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("double_typecast") }; + return name; + } + double val_real() override { return val_real_with_truncate(DBL_MAX); } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_additive_op :public Item_num_op +{ +public: + Item_func_additive_op(THD *thd, Item *a, Item *b): Item_num_op(thd, a, b) {} + void result_precision(); + bool check_partition_func_processor(void *int_arg) {return FALSE;} + bool check_vcol_func_processor(void *arg) { return FALSE;} +}; + + +class Item_func_plus :public Item_func_additive_op +{ +public: + Item_func_plus(THD *thd, Item *a, Item *b): + Item_func_additive_op(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("+") }; + return name; + } + enum precedence precedence() const override { return ADD_PRECEDENCE; } + bool fix_length_and_dec(THD *thd) override; + longlong int_op() override; + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_minus :public Item_func_additive_op +{ + bool m_depends_on_sql_mode_no_unsigned_subtraction; +public: + Item_func_minus(THD *thd, Item *a, Item *b): + Item_func_additive_op(thd, a, b), + m_depends_on_sql_mode_no_unsigned_subtraction(false) + { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("-") }; + return name; + } + enum precedence precedence() const override { return ADD_PRECEDENCE; } + Sql_mode_dependency value_depends_on_sql_mode() const override; + longlong int_op() override; + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + bool fix_length_and_dec(THD *thd) override; + void fix_unsigned_flag(); + void fix_length_and_dec_double() + { + Item_func_additive_op::fix_length_and_dec_double(); + fix_unsigned_flag(); + } + void fix_length_and_dec_decimal() + { + Item_func_additive_op::fix_length_and_dec_decimal(); + fix_unsigned_flag(); + } + void fix_length_and_dec_int() + { + Item_func_additive_op::fix_length_and_dec_int(); + fix_unsigned_flag(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_mul :public Item_num_op +{ +public: + Item_func_mul(THD *thd, Item *a, Item *b): + Item_num_op(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("*") }; + return name; + } + enum precedence precedence() const override { return MUL_PRECEDENCE; } + longlong int_op() override; + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + void result_precision() override; + bool fix_length_and_dec(THD *thd) override; + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_div :public Item_num_op +{ +public: + uint prec_increment; + Item_func_div(THD *thd, Item *a, Item *b): Item_num_op(thd, a, b) {} + longlong int_op() override { DBUG_ASSERT(0); return 0; } + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("/") }; + return name; + } + enum precedence precedence() const override { return MUL_PRECEDENCE; } + bool fix_length_and_dec(THD *thd) override; + void fix_length_and_dec_double(); + void fix_length_and_dec_int(); + void result_precision() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_int_div :public Item_int_func +{ +public: + Item_func_int_div(THD *thd, Item *a, Item *b): Item_int_func(thd, a, b) + {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("DIV") }; + return name; + } + enum precedence precedence() const override { return MUL_PRECEDENCE; } + const Type_handler *type_handler() const override + { return type_handler_long_or_longlong(); } + bool fix_length_and_dec(THD *thd) override; + void print(String *str, enum_query_type query_type) override + { + print_op(str, query_type); + } + + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_mod :public Item_num_op +{ +public: + Item_func_mod(THD *thd, Item *a, Item *b): Item_num_op(thd, a, b) {} + longlong int_op() override; + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("MOD") }; + return name; + } + enum precedence precedence() const override { return MUL_PRECEDENCE; } + void result_precision() override; + bool fix_length_and_dec(THD *thd) override; + void fix_length_and_dec_double() + { + Item_num_op::fix_length_and_dec_double(); + unsigned_flag= args[0]->unsigned_flag; + } + void fix_length_and_dec_decimal() + { + result_precision(); + fix_decimals(); + } + void fix_length_and_dec_int() + { + result_precision(); + DBUG_ASSERT(decimals == 0); + set_handler(type_handler_long_or_longlong()); + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_neg :public Item_func_num1 +{ +public: + Item_func_neg(THD *thd, Item *a): Item_func_num1(thd, a) {} + double real_op() override; + longlong int_op() override; + my_decimal *decimal_op(my_decimal *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("-") }; + return name; + } + enum Functype functype() const override { return NEG_FUNC; } + enum precedence precedence() const override { return NEG_PRECEDENCE; } + void print(String *str, enum_query_type query_type) override + { + str->append(func_name_cstring()); + args[0]->print_parenthesised(str, query_type, precedence()); + } + void fix_length_and_dec_int(); + void fix_length_and_dec_double(); + void fix_length_and_dec_decimal(); + bool fix_length_and_dec(THD *thd) override; + decimal_digits_t decimal_precision() const override + { return args[0]->decimal_precision(); } + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_abs :public Item_func_num1 +{ +public: + Item_func_abs(THD *thd, Item *a): Item_func_num1(thd, a) {} + double real_op() override; + longlong int_op() override; + my_decimal *decimal_op(my_decimal *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("abs") }; + return name; + } + void fix_length_and_dec_int(); + void fix_length_and_dec_double(); + void fix_length_and_dec_decimal(); + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +// A class to handle logarithmic and trigonometric functions + +class Item_dec_func :public Item_real_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_real(0, arg_count); } + public: + Item_dec_func(THD *thd, Item *a): Item_real_func(thd, a) {} + Item_dec_func(THD *thd, Item *a, Item *b): Item_real_func(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override + { + decimals= NOT_FIXED_DEC; + max_length= float_length(decimals); + set_maybe_null(); + return FALSE; + } +}; + +class Item_func_exp :public Item_dec_func +{ +public: + Item_func_exp(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("exp") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_ln :public Item_dec_func +{ +public: + Item_func_ln(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ln") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_log :public Item_dec_func +{ +public: + Item_func_log(THD *thd, Item *a): Item_dec_func(thd, a) {} + Item_func_log(THD *thd, Item *a, Item *b): Item_dec_func(thd, a, b) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("log") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_log2 :public Item_dec_func +{ +public: + Item_func_log2(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("log2") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_log10 :public Item_dec_func +{ +public: + Item_func_log10(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("log10") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_sqrt :public Item_dec_func +{ +public: + Item_func_sqrt(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sqrt") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_pow :public Item_dec_func +{ +public: + Item_func_pow(THD *thd, Item *a, Item *b): Item_dec_func(thd, a, b) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("pow") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_acos :public Item_dec_func +{ +public: + Item_func_acos(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("acos") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_asin :public Item_dec_func +{ +public: + Item_func_asin(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("asin") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_atan :public Item_dec_func +{ +public: + Item_func_atan(THD *thd, Item *a): Item_dec_func(thd, a) {} + Item_func_atan(THD *thd, Item *a, Item *b): Item_dec_func(thd, a, b) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("atan") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_cos :public Item_dec_func +{ +public: + Item_func_cos(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cos") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_sin :public Item_dec_func +{ +public: + Item_func_sin(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sin") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_tan :public Item_dec_func +{ +public: + Item_func_tan(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("tan") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_cot :public Item_dec_func +{ +public: + Item_func_cot(THD *thd, Item *a): Item_dec_func(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cot") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_int_val :public Item_func_hybrid_field_type +{ +public: + Item_func_int_val(THD *thd, Item *a): Item_func_hybrid_field_type(thd, a) {} + bool check_partition_func_processor(void *int_arg) override { return FALSE; } + bool check_vcol_func_processor(void *arg) override { return FALSE; } + virtual decimal_round_mode round_mode() const= 0; + void fix_length_and_dec_double(); + void fix_length_and_dec_int_or_decimal(); + void fix_length_and_dec_time() + { + fix_attributes_time(0); + set_handler(&type_handler_time2); + } + void fix_length_and_dec_datetime() + { + fix_attributes_datetime(0); + set_handler(&type_handler_datetime2); + // Thinks like CEILING(TIMESTAMP'0000-01-01 23:59:59.9') returns NULL + set_maybe_null(); + } + bool fix_length_and_dec(THD *thd) override; + String *str_op(String *str) override { DBUG_ASSERT(0); return 0; } + bool native_op(THD *thd, Native *to) override; +}; + + +class Item_func_ceiling :public Item_func_int_val +{ +public: + Item_func_ceiling(THD *thd, Item *a): Item_func_int_val(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ceiling") }; + return name; + } + decimal_round_mode round_mode() const override { return CEILING; } + longlong int_op() override; + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_floor :public Item_func_int_val +{ +public: + Item_func_floor(THD *thd, Item *a): Item_func_int_val(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("floor") }; + return name; + } + decimal_round_mode round_mode() const override { return FLOOR; } + longlong int_op() override; + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* This handles round and truncate */ + +class Item_func_round :public Item_func_hybrid_field_type +{ + bool truncate; + void fix_length_and_dec_decimal(uint decimals_to_set); + void fix_length_and_dec_double(uint decimals_to_set); + bool test_if_length_can_increase(); +public: + Item_func_round(THD *thd, Item *a, Item *b, bool trunc_arg) + :Item_func_hybrid_field_type(thd, a, b), truncate(trunc_arg) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING truncate_name= {STRING_WITH_LEN("truncate") }; + static LEX_CSTRING round_name= {STRING_WITH_LEN("round") }; + return truncate ? truncate_name : round_name; + } + double real_op() override; + longlong int_op() override; + my_decimal *decimal_op(my_decimal *) override; + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool time_op(THD *thd, MYSQL_TIME *ltime) override; + bool native_op(THD *thd, Native *to) override; + String *str_op(String *str) override + { + DBUG_ASSERT(0); + return NULL; + } + void fix_arg_decimal(); + void fix_arg_int(const Type_handler *preferred, + const Type_std_attributes *preferred_attributes, + bool use_decimal_on_length_increase); + void fix_arg_hex_hybrid(); + void fix_arg_double(); + void fix_arg_time(); + void fix_arg_datetime(); + void fix_arg_temporal(const Type_handler *h, uint int_part_length); + bool fix_length_and_dec(THD *thd) override + { + /* + We don't want to translate ENUM/SET to CHAR here. + So let's real_type_handler(), not type_handler(). + */ + return args[0]->real_type_handler()->Item_func_round_fix_length_and_dec(this); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_rand :public Item_real_func +{ + struct my_rnd_struct *rand; + bool first_eval; // TRUE if val_real() is called 1st time + bool check_arguments() const override + { return check_argument_types_can_return_int(0, arg_count); } + void seed_random (Item * val); +public: + Item_func_rand(THD *thd, Item *a): + Item_real_func(thd, a), rand(0), first_eval(TRUE) {} + Item_func_rand(THD *thd): Item_real_func(thd) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rand") }; + return name; + } + bool const_item() const override { return 0; } + void update_used_tables() override; + bool fix_fields(THD *thd, Item **ref) override; + void cleanup() override { first_eval= TRUE; Item_real_func::cleanup(); } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_rownum final :public Item_longlong_func +{ + /* + This points to a variable that contains the number of rows + accpted so far in the result set + */ + ha_rows *accepted_rows; + SELECT_LEX *select; +public: + Item_func_rownum(THD *thd); + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rownum") }; + return name; + } + enum Functype functype() const override { return ROWNUM_FUNC; } + void update_used_tables() override {} + bool const_item() const override { return 0; } + void fix_after_optimize(THD *thd) override; + bool fix_length_and_dec(THD *thd) override + { + unsigned_flag= 1; + used_tables_cache= RAND_TABLE_BIT; + const_item_cache=0; + set_maybe_null(); + return FALSE; + } + void cleanup() override + { + Item_longlong_func::cleanup(); + /* Ensure we don't point to freed memory */ + accepted_rows= 0; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_IMPOSSIBLE); + } + bool check_handler_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override { return 0; } + /* This function is used in insert, update and delete */ + void store_pointer_to_row_counter(ha_rows *row_counter) + { + accepted_rows= row_counter; + } +}; + +void fix_rownum_pointers(THD *thd, SELECT_LEX *select_lex, ha_rows *ptr); + + +class Item_func_sign :public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_real(func_name_cstring()); } +public: + Item_func_sign(THD *thd, Item *a): Item_long_func(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sign") }; + return name; + } + decimal_digits_t decimal_precision() const override { return 1; } + bool fix_length_and_dec(THD *thd) override { fix_char_length(2); return FALSE; } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_units :public Item_real_func +{ + LEX_CSTRING name; + double mul,add; + bool check_arguments() const override + { return check_argument_types_can_return_real(0, arg_count); } +public: + Item_func_units(THD *thd, char *name_arg, Item *a, double mul_arg, + double add_arg): + Item_real_func(thd, a), mul(mul_arg), add(add_arg) + { + name.str= name_arg; + name.length= strlen(name_arg); + } + double val_real() override; + LEX_CSTRING func_name_cstring() const override { return name; } + bool fix_length_and_dec(THD *thd) override + { + decimals= NOT_FIXED_DEC; + max_length= float_length(decimals); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + Item_func_min_max does not derive from Item_func_hybrid_field_type + because the way how its methods val_xxx() and get_date() work depend + not only by its arguments, but also on the context in which + LEAST() and GREATEST() appear. + For example, using Item_func_min_max in a CAST like this: + CAST(LEAST('11','2') AS SIGNED) + forces Item_func_min_max to compare the arguments as numbers rather + than strings. + Perhaps this should be changed eventually (see MDEV-5893). +*/ +class Item_func_min_max :public Item_hybrid_func +{ + String tmp_value; + int cmp_sign; +protected: + bool check_arguments() const override + { + return false; // Checked by aggregate_for_min_max() + } + bool fix_attributes(Item **item, uint nitems); +public: + Item_func_min_max(THD *thd, List &list, int cmp_sign_arg): + Item_hybrid_func(thd, list), cmp_sign(cmp_sign_arg) + {} + String *val_str_native(String *str); + double val_real_native(); + longlong val_int_native(); + my_decimal *val_decimal_native(my_decimal *); + bool get_date_native(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate); + bool get_time_native(THD *thd, MYSQL_TIME *res); + + double val_real() override + { + DBUG_ASSERT(fixed()); + return Item_func_min_max::type_handler()-> + Item_func_min_max_val_real(this); + } + longlong val_int() override + { + DBUG_ASSERT(fixed()); + return Item_func_min_max::type_handler()-> + Item_func_min_max_val_int(this); + } + String *val_str(String *str) override + { + DBUG_ASSERT(fixed()); + return Item_func_min_max::type_handler()-> + Item_func_min_max_val_str(this, str); + } + my_decimal *val_decimal(my_decimal *dec) override + { + DBUG_ASSERT(fixed()); + return Item_func_min_max::type_handler()-> + Item_func_min_max_val_decimal(this, dec); + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override + { + DBUG_ASSERT(fixed()); + return Item_func_min_max::type_handler()-> + Item_func_min_max_get_date(thd, this, res, fuzzydate); + } + bool val_native(THD *thd, Native *to) override; + void aggregate_attributes_real(Item **items, uint nitems) + { + /* + Aggregating attributes for the double data type for LEAST/GREATEST + is almost the same with aggregating for CASE-alike hybrid functions, + (CASE..THEN, COALESCE, IF, etc). + There is one notable difference though, when a numeric argument is mixed + with a string argument: + - CASE-alike functions return a string data type in such cases + COALESCE(10,'x') -> VARCHAR(2) = '10' + - LEAST/GREATEST returns double: + GREATEST(10,'10e4') -> DOUBLE = 100000 + As the string argument can represent a number in the scientific notation, + like in the example above, max_length of the result can be longer than + max_length of the arguments. To handle this properly, max_length is + additionally assigned to the result of float_length(decimals). + */ + Item_func::aggregate_attributes_real(items, nitems); + max_length= float_length(decimals); + } + bool fix_length_and_dec(THD *thd) override + { + if (aggregate_for_min_max(func_name_cstring(), args, arg_count)) + return true; + fix_attributes(args, arg_count); + return false; + } +}; + +class Item_func_min :public Item_func_min_max +{ +public: + Item_func_min(THD *thd, List &list): Item_func_min_max(thd, list, 1) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("least") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_max :public Item_func_min_max +{ +public: + Item_func_max(THD *thd, List &list): Item_func_min_max(thd, list, -1) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("greatest") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Objects of this class are used for ROLLUP queries to wrap up + each constant item referred to in GROUP BY list. +*/ + +class Item_func_rollup_const :public Item_func +{ +public: + Item_func_rollup_const(THD *thd, Item *a): Item_func(thd, a) + { + name= a->name; + } + double val_real() override { return val_real_from_item(args[0]); } + longlong val_int() override { return val_int_from_item(args[0]); } + String *val_str(String *str) override + { return val_str_from_item(args[0], str); } + bool val_native(THD *thd, Native *to) override + { return val_native_from_item(thd, args[0], to); } + my_decimal *val_decimal(my_decimal *dec) override + { return val_decimal_from_item(args[0], dec); } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_item(thd, args[0], ltime, fuzzydate); } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rollup_const") }; + return name; + } + bool const_item() const override { return 0; } + const Type_handler *type_handler() const override + { return args[0]->type_handler(); } + bool fix_length_and_dec(THD *thd) override + { + Type_std_attributes::set(*args[0]); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_long_func_length: public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_str(func_name_cstring()); } +public: + Item_long_func_length(THD *thd, Item *a): Item_long_func(thd, a) {} + bool fix_length_and_dec(THD *thd) override { max_length=10; return FALSE; } +}; + + +class Item_func_octet_length :public Item_long_func_length +{ + String value; +public: + Item_func_octet_length(THD *thd, Item *a): Item_long_func_length(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("octet_length") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_bit_length :public Item_longlong_func +{ + String value; +public: + Item_func_bit_length(THD *thd, Item *a): Item_longlong_func(thd, a) {} + bool fix_length_and_dec(THD *thd) override + { + max_length= 11; // 0x100000000*8 = 34,359,738,368 + return FALSE; + } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("bit_length") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_char_length :public Item_long_func_length +{ + String value; +public: + Item_func_char_length(THD *thd, Item *a): Item_long_func_length(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("char_length") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_coercibility :public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_str(func_name_cstring()); } +public: + Item_func_coercibility(THD *thd, Item *a): Item_long_func(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("coercibility") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length=10; + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + } + bool eval_not_null_tables(void *) override + { + not_null_tables_cache= 0; + return false; + } + bool find_not_null_fields(table_map allowed) override + { + return false; + } + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { return this; } + bool const_item() const override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + In the corner case LOCATE could return (4,294,967,296 + 1), + which would not fit into Item_long_func range. + But string lengths are limited with max_allowed_packet, + which cannot be bigger than 1024*1024*1024. +*/ +class Item_func_locate :public Item_long_func +{ + bool check_arguments() const override + { + return check_argument_types_can_return_str(0, 2) || + (arg_count > 2 && args[2]->check_type_can_return_int(func_name_cstring())); + } + String value1,value2; + DTCollation cmp_collation; +public: + Item_func_locate(THD *thd, Item *a, Item *b) + :Item_long_func(thd, a, b) {} + Item_func_locate(THD *thd, Item *a, Item *b, Item *c) + :Item_long_func(thd, a, b, c) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("locate") }; + return name; + } + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override + { + max_length= MY_INT32_NUM_DECIMAL_DIGITS; + return agg_arg_charsets_for_comparison(cmp_collation, args, 2); + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_field :public Item_long_func +{ + String value,tmp; + Item_result cmp_type; + DTCollation cmp_collation; +public: + Item_func_field(THD *thd, List &list): Item_long_func(thd, list) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("field") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_ascii :public Item_long_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_str(0, arg_count); } + String value; +public: + Item_func_ascii(THD *thd, Item *a): Item_long_func(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ascii") }; + return name; + } + bool fix_length_and_dec(THD *thd) override { max_length=3; return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_ord :public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_str(func_name_cstring()); } + String value; +public: + Item_func_ord(THD *thd, Item *a): Item_long_func(thd, a) {} + bool fix_length_and_dec(THD *thd) override { fix_char_length(7); return FALSE; } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ord") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_find_in_set :public Item_long_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_str(0, 2); } + String value,value2; + uint enum_value; + ulonglong enum_bit; + DTCollation cmp_collation; +public: + Item_func_find_in_set(THD *thd, Item *a, Item *b): + Item_long_func(thd, a, b), enum_value(0) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("find_in_set") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* Base class for all bit functions: '~', '|', '^', '&', '>>', '<<' */ + +class Item_func_bit_operator: public Item_handled_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_int(0, arg_count); } +protected: + bool fix_length_and_dec_op1_std(const Handler *ha_int, const Handler *ha_dec) + { + set_func_handler(args[0]->cmp_type() == INT_RESULT ? ha_int : ha_dec); + return m_func_handler->fix_length_and_dec(this); + } + bool fix_length_and_dec_op2_std(const Handler *ha_int, const Handler *ha_dec) + { + set_func_handler(args[0]->cmp_type() == INT_RESULT && + args[1]->cmp_type() == INT_RESULT ? ha_int : ha_dec); + return m_func_handler->fix_length_and_dec(this); + } +public: + Item_func_bit_operator(THD *thd, Item *a) + :Item_handled_func(thd, a) {} + Item_func_bit_operator(THD *thd, Item *a, Item *b) + :Item_handled_func(thd, a, b) {} + void print(String *str, enum_query_type query_type) override + { + print_op(str, query_type); + } + bool need_parentheses_in_default() override { return true; } +}; + +class Item_func_bit_or :public Item_func_bit_operator +{ +public: + Item_func_bit_or(THD *thd, Item *a, Item *b) + :Item_func_bit_operator(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("|") }; + return name; + } + enum precedence precedence() const override { return BITOR_PRECEDENCE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_bit_and :public Item_func_bit_operator +{ +public: + Item_func_bit_and(THD *thd, Item *a, Item *b) + :Item_func_bit_operator(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("&") }; + return name; + } + enum precedence precedence() const override { return BITAND_PRECEDENCE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_bit_count :public Item_handled_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_int(func_name_cstring()); } +public: + Item_func_bit_count(THD *thd, Item *a): Item_handled_func(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("bit_count") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_shift_left :public Item_func_bit_operator +{ +public: + Item_func_shift_left(THD *thd, Item *a, Item *b) + :Item_func_bit_operator(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("<<") }; + return name; + } + enum precedence precedence() const override { return SHIFT_PRECEDENCE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_shift_right :public Item_func_bit_operator +{ +public: + Item_func_shift_right(THD *thd, Item *a, Item *b) + :Item_func_bit_operator(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN(">>") }; + return name; + } + enum precedence precedence() const override { return SHIFT_PRECEDENCE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_bit_neg :public Item_func_bit_operator +{ +public: + Item_func_bit_neg(THD *thd, Item *a): Item_func_bit_operator(thd, a) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("~") }; + return name; + } + enum precedence precedence() const override { return NEG_PRECEDENCE; } + void print(String *str, enum_query_type query_type) override + { + str->append(func_name_cstring()); + args[0]->print_parenthesised(str, query_type, precedence()); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_last_insert_id :public Item_longlong_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_int(0, arg_count); } +public: + Item_func_last_insert_id(THD *thd): Item_longlong_func(thd) {} + Item_func_last_insert_id(THD *thd, Item *a): Item_longlong_func(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("last_insert_id") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + unsigned_flag= true; + if (arg_count) + max_length= args[0]->max_length; + return FALSE; + } + bool fix_fields(THD *thd, Item **ref) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_benchmark :public Item_long_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_int(func_name_cstring()) || + args[1]->check_type_scalar(func_name_cstring()); + } +public: + Item_func_benchmark(THD *thd, Item *count_expr, Item *expr): + Item_long_func(thd, count_expr, expr) + {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("benchmark") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length=1; + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + } + void print(String *str, enum_query_type query_type) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +void item_func_sleep_init(void); +void item_func_sleep_free(void); + +class Item_func_sleep :public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_real(func_name_cstring()); } +public: + Item_func_sleep(THD *thd, Item *a): Item_long_func(thd, a) {} + bool fix_length_and_dec(THD *thd) override { fix_char_length(1); return FALSE; } + bool const_item() const override { return 0; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sleep") }; + return name; + } + table_map used_tables() const override + { + return used_tables_cache | RAND_TABLE_BIT; + } + bool is_expensive() override { return 1; } + longlong val_int() override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + + +#ifdef HAVE_DLOPEN + +class Item_udf_func :public Item_func +{ + /** + Mark "this" as non-deterministic if it uses no tables + and is not a constant at the same time. + */ + void set_non_deterministic_if_needed() + { + if (!const_item_cache && !used_tables_cache) + used_tables_cache= RAND_TABLE_BIT; + } +protected: + udf_handler udf; + bool is_expensive_processor(void *arg) override { return TRUE; } + + class VDec_udf: public Dec_ptr_and_buffer + { + public: + VDec_udf(Item_udf_func *func, udf_handler *udf) + { + my_bool tmp_null_value; + m_ptr= udf->val_decimal(&tmp_null_value, &m_buffer); + DBUG_ASSERT(is_null() == (tmp_null_value != 0)); + func->null_value= is_null(); + } + }; + +public: + Item_udf_func(THD *thd, udf_func *udf_arg): + Item_func(thd), udf(udf_arg) {} + Item_udf_func(THD *thd, udf_func *udf_arg, List &list): + Item_func(thd, list), udf(udf_arg) {} + LEX_CSTRING func_name_cstring() const override + { + const char *tmp= udf.name(); + return { tmp, strlen(tmp) }; + } + enum Functype functype() const override { return UDF_FUNC; } + bool fix_fields(THD *thd, Item **ref) override + { + DBUG_ASSERT(fixed() == 0); + bool res= udf.fix_fields(thd, this, arg_count, args); + set_non_deterministic_if_needed(); + base_flags|= item_base_t::FIXED; + return res; + } + void fix_num_length_and_dec(); + void update_used_tables() override + { + /* + TODO: Make a member in UDF_INIT and return if a UDF is deterministic or + not. + Currently UDF_INIT has a member (const_item) that is an in/out + parameter to the init() call. + The code in udf_handler::fix_fields also duplicates the arguments + handling code in Item_func::fix_fields(). + + The lack of information if a UDF is deterministic makes writing + a correct update_used_tables() for UDFs impossible. + One solution to this would be : + - Add a is_deterministic member of UDF_INIT + - (optionally) deprecate the const_item member of UDF_INIT + - Take away the duplicate code from udf_handler::fix_fields() and + make Item_udf_func call Item_func::fix_fields() to process its + arguments as for any other function. + - Store the deterministic flag returned by _init into the + udf_handler. + - Don't implement Item_udf_func::fix_fields, implement + Item_udf_func::fix_length_and_dec() instead (similar to non-UDF + functions). + - Override Item_func::update_used_tables to call + Item_func::update_used_tables() and add a RAND_TABLE_BIT to the + result of Item_func::update_used_tables() if the UDF is + non-deterministic. + - (optionally) rename RAND_TABLE_BIT to NONDETERMINISTIC_BIT to + better describe its usage. + + The above would require a change of the UDF API. + Until that change is done here's how the current code works: + We call Item_func::update_used_tables() only when we know that + the function depends on real non-const tables and is deterministic. + This can be done only because we know that the optimizer will + call update_used_tables() only when there's possibly a new const + table. So update_used_tables() can only make a Item_func more + constant than it is currently. + That's why we don't need to do anything if a function is guaranteed + to return non-constant (it's non-deterministic) or is already a + const. + */ + if ((used_tables_cache & ~PSEUDO_TABLE_BITS) && + !(used_tables_cache & RAND_TABLE_BIT)) + { + Item_func::update_used_tables(); + set_non_deterministic_if_needed(); + } + } + void cleanup() override; + bool eval_not_null_tables(void *opt_arg) override + { + not_null_tables_cache= 0; + return 0; + } + bool find_not_null_fields(table_map allowed) override + { + return false; + } + bool is_expensive() override { return 1; } + void print(String *str, enum_query_type query_type) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_NON_DETERMINISTIC); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } + bool excl_dep_on_grouping_fields(st_select_lex *sel) override + { return false; } +}; + + +class Item_func_udf_float :public Item_udf_func +{ + public: + Item_func_udf_float(THD *thd, udf_func *udf_arg): + Item_udf_func(thd, udf_arg) {} + Item_func_udf_float(THD *thd, udf_func *udf_arg, + List &list): + Item_udf_func(thd, udf_arg, list) {} + longlong val_int() override + { + DBUG_ASSERT(fixed()); + return Converter_double_to_longlong(Item_func_udf_float::val_real(), + unsigned_flag).result(); + } + my_decimal *val_decimal(my_decimal *dec_buf) override + { + double res=val_real(); + if (null_value) + return NULL; + double2my_decimal(E_DEC_FATAL_ERROR, res, dec_buf); + return dec_buf; + } + double val_real() override; + String *val_str(String *str) override; + const Type_handler *type_handler() const override + { return &type_handler_double; } + bool fix_length_and_dec(THD *thd) override { fix_num_length_and_dec(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_udf_int :public Item_udf_func +{ +public: + Item_func_udf_int(THD *thd, udf_func *udf_arg): + Item_udf_func(thd, udf_arg) {} + Item_func_udf_int(THD *thd, udf_func *udf_arg, + List &list): + Item_udf_func(thd, udf_arg, list) {} + longlong val_int() override; + double val_real() override { return (double) Item_func_udf_int::val_int(); } + my_decimal *val_decimal(my_decimal *decimal_value) override + { + return val_decimal_from_int(decimal_value); + } + String *val_str(String *str) override; + const Type_handler *type_handler() const override + { + if (unsigned_flag) + return &type_handler_ulonglong; + return &type_handler_slonglong; + } + bool fix_length_and_dec(THD *thd) override { decimals= 0; max_length= 21; return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_udf_decimal :public Item_udf_func +{ +public: + Item_func_udf_decimal(THD *thd, udf_func *udf_arg): + Item_udf_func(thd, udf_arg) {} + Item_func_udf_decimal(THD *thd, udf_func *udf_arg, List &list): + Item_udf_func(thd, udf_arg, list) {} + longlong val_int() override + { + return VDec_udf(this, &udf).to_longlong(unsigned_flag); + } + double val_real() override + { + return VDec_udf(this, &udf).to_double(); + } + my_decimal *val_decimal(my_decimal *) override; + String *val_str(String *str) override + { + return VDec_udf(this, &udf).to_string_round(str, decimals); + } + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } + bool fix_length_and_dec(THD *thd) override { fix_num_length_and_dec(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_udf_str :public Item_udf_func +{ +public: + Item_func_udf_str(THD *thd, udf_func *udf_arg): + Item_udf_func(thd, udf_arg) {} + Item_func_udf_str(THD *thd, udf_func *udf_arg, List &list): + Item_udf_func(thd, udf_arg, list) {} + String *val_str(String *) override; + double val_real() override + { + int err_not_used; + char *end_not_used; + String *res; + res= val_str(&str_value); + return res ? res->charset()->strntod((char*) res->ptr(), res->length(), + &end_not_used, &err_not_used) : 0.0; + } + longlong val_int() override + { + int err_not_used; + String *res; res=val_str(&str_value); + return res ? res->charset()->strntoll(res->ptr(),res->length(),10, + (char**) 0, &err_not_used) : (longlong) 0; + } + my_decimal *val_decimal(my_decimal *dec_buf) override + { + String *res=val_str(&str_value); + if (!res) + return NULL; + string2my_decimal(E_DEC_FATAL_ERROR, res, dec_buf); + return dec_buf; + } + const Type_handler *type_handler() const override + { return string_type_handler(); } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#else /* Dummy functions to get yy_*.cc files compiled */ + +class Item_func_udf_float :public Item_real_func +{ + public: + Item_func_udf_float(THD *thd, udf_func *udf_arg): + Item_real_func(thd) {} + Item_func_udf_float(THD *thd, udf_func *udf_arg, List &list): + Item_real_func(thd, list) {} + double val_real() { DBUG_ASSERT(fixed()); return 0.0; } +}; + + +class Item_func_udf_int :public Item_int_func +{ +public: + Item_func_udf_int(THD *thd, udf_func *udf_arg): + Item_int_func(thd) {} + Item_func_udf_int(THD *thd, udf_func *udf_arg, List &list): + Item_int_func(thd, list) {} + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + longlong val_int() { DBUG_ASSERT(fixed()); return 0; } +}; + + +class Item_func_udf_decimal :public Item_int_func +{ +public: + Item_func_udf_decimal(THD *thd, udf_func *udf_arg): + Item_int_func(thd) {} + Item_func_udf_decimal(THD *thd, udf_func *udf_arg, List &list): + Item_int_func(thd, list) {} + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + my_decimal *val_decimal(my_decimal *) { DBUG_ASSERT(fixed()); return 0; } +}; + + +class Item_func_udf_str :public Item_func +{ +public: + Item_func_udf_str(THD *thd, udf_func *udf_arg): + Item_func(thd) {} + Item_func_udf_str(THD *thd, udf_func *udf_arg, List &list): + Item_func(thd, list) {} + String *val_str(String *) + { DBUG_ASSERT(fixed()); null_value=1; return 0; } + double val_real() { DBUG_ASSERT(fixed()); null_value= 1; return 0.0; } + longlong val_int() { DBUG_ASSERT(fixed()); null_value=1; return 0; } + bool fix_length_and_dec(THD *thd) override + { base_flags|= item_base_t::MAYBE_NULL; max_length=0; return FALSE; } +}; + +#endif /* HAVE_DLOPEN */ + +void mysql_ull_cleanup(THD *thd); +void mysql_ull_set_explicit_lock_duration(THD *thd); + + +class Item_func_lock :public Item_long_func +{ + public: + Item_func_lock(THD *thd): Item_long_func(thd) { } + Item_func_lock(THD *thd, Item *a): Item_long_func(thd, a) {} + Item_func_lock(THD *thd, Item *a, Item *b): Item_long_func(thd, a, b) {} + table_map used_tables() const override + { + return used_tables_cache | RAND_TABLE_BIT; + } + bool const_item() const override { return 0; } + bool is_expensive() override { return 1; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } +}; + + +class Item_func_get_lock final :public Item_func_lock +{ + bool check_arguments() const override + { + return args[0]->check_type_general_purpose_string(func_name_cstring()) || + args[1]->check_type_can_return_real(func_name_cstring()); + } + String value; + public: + Item_func_get_lock(THD *thd, Item *a, Item *b) :Item_func_lock(thd, a, b) {} + longlong val_int() final; + LEX_CSTRING func_name_cstring() const override final + { + static LEX_CSTRING name= {STRING_WITH_LEN("get_lock") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length= 1; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) final + { return get_item_copy(thd, this); } +}; + + +class Item_func_release_all_locks final :public Item_func_lock +{ +public: + Item_func_release_all_locks(THD *thd): Item_func_lock(thd) + { unsigned_flag= 1; } + longlong val_int() final; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("release_all_locks") }; + return name; + } + Item *get_copy(THD *thd) final + { return get_item_copy(thd, this); } +}; + + +class Item_func_release_lock final :public Item_func_lock +{ + bool check_arguments() const override + { return args[0]->check_type_general_purpose_string(func_name_cstring()); } + String value; +public: + Item_func_release_lock(THD *thd, Item *a): Item_func_lock(thd, a) {} + longlong val_int() final; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("release_lock") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length= 1; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) final + { return get_item_copy(thd, this); } +}; + + +/* replication functions */ + +class Item_master_pos_wait :public Item_longlong_func +{ + bool check_arguments() const override + { + return + args[0]->check_type_general_purpose_string(func_name_cstring()) || + args[1]->check_type_can_return_int(func_name_cstring()) || + (arg_count > 2 && args[2]->check_type_can_return_int(func_name_cstring())) || + (arg_count > 3 && args[3]->check_type_general_purpose_string(func_name_cstring())); + } + String value; +public: + Item_master_pos_wait(THD *thd, Item *a, Item *b) + :Item_longlong_func(thd, a, b) {} + Item_master_pos_wait(THD *thd, Item *a, Item *b, Item *c): + Item_longlong_func(thd, a, b, c) {} + Item_master_pos_wait(THD *thd, Item *a, Item *b, Item *c, Item *d): + Item_longlong_func(thd, a, b, c, d) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("master_pos_wait") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length=21; + set_maybe_null(); + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_master_gtid_wait :public Item_long_func +{ + bool check_arguments() const override + { + return args[0]->check_type_general_purpose_string(func_name_cstring()) || + (arg_count > 1 && args[1]->check_type_can_return_real(func_name_cstring())); + } + String value; +public: + Item_master_gtid_wait(THD *thd, Item *a) + :Item_long_func(thd, a) {} + Item_master_gtid_wait(THD *thd, Item *a, Item *b) + :Item_long_func(thd, a, b) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("master_gtid_wait") }; + return name; + } + bool fix_length_and_dec(THD *thd) override { max_length=2; return FALSE; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Handling of user definable variables */ + +class user_var_entry; + + +/** + A class to set and get user variables +*/ +class Item_func_user_var :public Item_hybrid_func +{ +protected: + user_var_entry *m_var_entry; +public: + LEX_CSTRING name; // keep it public + Item_func_user_var(THD *thd, const LEX_CSTRING *a) + :Item_hybrid_func(thd), m_var_entry(NULL), name(*a) { } + Item_func_user_var(THD *thd, const LEX_CSTRING *a, Item *b) + :Item_hybrid_func(thd, b), m_var_entry(NULL), name(*a) { } + Item_func_user_var(THD *thd, Item_func_user_var *item) + :Item_hybrid_func(thd, item), + m_var_entry(item->m_var_entry), name(item->name) { } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) + { + DBUG_ASSERT(fixed()); + return create_tmp_field_ex_from_handler(root, table, src, param, + type_handler()); + } + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) + { return create_table_field_from_handler(root, table); } + bool check_vcol_func_processor(void *arg); + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } +}; + + +class Item_func_set_user_var :public Item_func_user_var +{ + /* + The entry_thread_id variable is used: + 1) to skip unnecessary updates of the entry field (see above); + 2) to reset the entry field that was initialized in the other thread + (for example, an item tree of a trigger that updates user variables + may be shared between several connections, and the entry_thread_id field + prevents updates of one connection user variables from a concurrent + connection calling the same trigger that initially updated some + user variable it the first connection context). + */ + my_thread_id entry_thread_id; + String value; + my_decimal decimal_buff; + bool null_item; + union + { + longlong vint; + double vreal; + String *vstr; + my_decimal *vdec; + } save_result; + +public: + Item_func_set_user_var(THD *thd, const LEX_CSTRING *a, Item *b): + Item_func_user_var(thd, a, b), + entry_thread_id(0) + {} + Item_func_set_user_var(THD *thd, Item_func_set_user_var *item) + :Item_func_user_var(thd, item), + entry_thread_id(item->entry_thread_id), + value(item->value), decimal_buff(item->decimal_buff), + null_item(item->null_item), save_result(item->save_result) + {} + + enum Functype functype() const override { return SUSERVAR_FUNC; } + double val_real() override; + longlong val_int() override; + String *val_str(String *str) override; + my_decimal *val_decimal(my_decimal *) override; + double val_result() override; + longlong val_int_result() override; + bool val_bool_result() override; + String *str_result(String *str) override; + my_decimal *val_decimal_result(my_decimal *) override; + bool is_null_result() override; + bool update_hash(void *ptr, size_t length, enum Item_result type, + CHARSET_INFO *cs, bool unsigned_arg); + bool send(Protocol *protocol, st_value *buffer) override; + void make_send_field(THD *thd, Send_field *tmp_field) override; + bool check(bool use_result_field); + void save_item_result(Item *item); + bool update(); + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + enum precedence precedence() const override { return ASSIGN_PRECEDENCE; } + void print_as_stmt(String *str, enum_query_type query_type); + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("set_user_var") }; + return name; + } + int save_in_field(Field *field, bool no_conversions, + bool can_use_result_field); + int save_in_field(Field *field, bool no_conversions) override + { + return save_in_field(field, no_conversions, 1); + } + void save_org_in_field(Field *field, + fast_field_copier data __attribute__ ((__unused__))) + override + { (void) save_in_field(field, 1, 0); } + bool register_field_in_read_map(void *arg) override; + bool register_field_in_bitmap(void *arg) override; + bool set_entry(THD *thd, bool create_if_not_exists); + void cleanup() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool excl_dep_on_table(table_map tab_map) override { return false; } +}; + + +class Item_func_get_user_var :public Item_func_user_var, + private Settable_routine_parameter +{ +public: + Item_func_get_user_var(THD *thd, const LEX_CSTRING *a): + Item_func_user_var(thd, a) {} + enum Functype functype() const override { return GUSERVAR_FUNC; } + LEX_CSTRING get_name() { return name; } + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal*) override; + String *val_str(String* str) override; + bool fix_length_and_dec(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + /* + We must always return variables as strings to guard against selects of type + select @t1:=1,@t1,@t:="hello",@t from foo where (@t1:= t2.b) + */ + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("get_user_var") }; + return name; + } + bool const_item() const override; + table_map used_tables() const override + { return const_item() ? 0 : RAND_TABLE_BIT; } + bool eq(const Item *item, bool binary_cmp) const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +private: + bool set_value(THD *thd, sp_rcontext *ctx, Item **it) override; + +public: + Settable_routine_parameter *get_settable_routine_parameter() override + { + return this; + } +}; + + +/* + This item represents user variable used as out parameter (e.g in LOAD DATA), + and it is supposed to be used only for this purprose. So it is simplified + a lot. Actually you should never obtain its value. + + The only two reasons for this thing being an Item is possibility to store it + in List and desire to place this code somewhere near other functions + working with user variables. +*/ +class Item_user_var_as_out_param :public Item_fixed_hybrid, + public Load_data_outvar +{ + LEX_CSTRING org_name; + user_var_entry *entry; +public: + Item_user_var_as_out_param(THD *thd, const LEX_CSTRING *a) + :Item_fixed_hybrid(thd) + { + DBUG_ASSERT(a->length < UINT_MAX32); + org_name= *a; + set_name(thd, a->str, a->length, system_charset_info); + } + Load_data_outvar *get_load_data_outvar() override + { + return this; + } + bool load_data_set_null(THD *thd, const Load_data_param *param) override + { + set_null_value(param->charset()); + return false; + } + bool load_data_set_no_data(THD *thd, const Load_data_param *param) override + { + set_null_value(param->charset()); + return false; + } + bool load_data_set_value(THD *thd, const char *pos, uint length, + const Load_data_param *param) override + { + set_value(pos, length, param->charset()); + return false; + } + void load_data_print_for_log_event(THD *thd, String *to) const override; + bool load_data_add_outvar(THD *thd, Load_data_param *param) const override + { + return param->add_outvar_user_var(thd); + } + uint load_data_fixed_length() const override + { + return 0; + } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + DBUG_ASSERT(0); + return NULL; + } + /* We should return something different from FIELD_ITEM here */ + enum Type type() const override { return CONST_ITEM;} + double val_real() override; + longlong val_int() override; + String *val_str(String *str) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + my_decimal *val_decimal(my_decimal *decimal_buffer) override; + /* fix_fields() binds variable name with its entry structure */ + bool fix_fields(THD *thd, Item **ref) override; + void set_null_value(CHARSET_INFO* cs); + void set_value(const char *str, uint length, CHARSET_INFO* cs); + const Type_handler *type_handler() const override + { return &type_handler_double; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* A system variable */ + +#define GET_SYS_VAR_CACHE_LONG 1 +#define GET_SYS_VAR_CACHE_DOUBLE 2 +#define GET_SYS_VAR_CACHE_STRING 4 + +class Item_func_get_system_var :public Item_func +{ + sys_var *var; + enum_var_type var_type, orig_var_type; + LEX_CSTRING component; + longlong cached_llval; + double cached_dval; + String cached_strval; + bool cached_null_value; + query_id_t used_query_id; + uchar cache_present; + +public: + Item_func_get_system_var(THD *thd, sys_var *var_arg, + enum_var_type var_type_arg, + LEX_CSTRING *component_arg, const char *name_arg, + size_t name_len_arg); + enum Functype functype() const override { return GSYSVAR_FUNC; } + void update_null_value() override; + bool fix_length_and_dec(THD *thd) override; + void print(String *str, enum_query_type query_type) override; + bool const_item() const override { return true; } + table_map used_tables() const override { return 0; } + const Type_handler *type_handler() const override; + double val_real() override; + longlong val_int() override; + String* val_str(String*) override; + my_decimal *val_decimal(my_decimal *dec_buf) override + { return val_decimal_from_real(dec_buf); } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } + /* TODO: fix to support views */ + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("get_system_var") }; + return name; + } + /** + Indicates whether this system variable is written to the binlog or not. + + Variables are written to the binlog as part of "status_vars" in + Query_log_event, as an Intvar_log_event, or a Rand_log_event. + + @return true if the variable is written to the binlog, false otherwise. + */ + bool is_written_to_binlog(); + bool eq(const Item *item, bool binary_cmp) const override; + + void cleanup() override; + bool check_vcol_func_processor(void *arg) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* for fulltext search */ + +class Item_func_match :public Item_real_func +{ +public: + uint key, match_flags; + bool join_key; + DTCollation cmp_collation; + FT_INFO *ft_handler; + TABLE *table; + Item_func_match *master; // for master-slave optimization + Item *concat_ws; // Item_func_concat_ws + String value; // value of concat_ws + String search_value; // key_item()'s value converted to cmp_collation + + Item_func_match(THD *thd, List &a, uint b): + Item_real_func(thd, a), key(0), match_flags(b), join_key(0), ft_handler(0), + table(0), master(0), concat_ws(0) { } + void cleanup() override + { + DBUG_ENTER("Item_func_match::cleanup"); + Item_real_func::cleanup(); + if (!master && ft_handler) + ft_handler->please->close_search(ft_handler); + ft_handler= 0; + concat_ws= 0; + table= 0; // required by Item_func_match::eq() + DBUG_VOID_RETURN; + } + bool is_expensive_processor(void *arg) override { return TRUE; } + enum Functype functype() const override { return FT_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("match") }; + return name; + } + bool eval_not_null_tables(void *opt_arg) override + { + not_null_tables_cache= 0; + return 0; + } + bool find_not_null_fields(table_map allowed) override + { + return false; + } + bool fix_fields(THD *thd, Item **ref) override; + bool eq(const Item *, bool binary_cmp) const override; + /* The following should be safe, even if we compare doubles */ + longlong val_int() override { DBUG_ASSERT(fixed()); return val_real() != 0.0; } + double val_real() override; + void print(String *str, enum_query_type query_type) override; + + bool fix_index(); + bool init_search(THD *thd, bool no_order); + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("match ... against()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *build_clone(THD *thd) override { return 0; } +private: + /** + Check whether storage engine for given table, + allows FTS Boolean search on non-indexed columns. + + @todo A flag should be added to the extended fulltext API so that + it may be checked whether search on non-indexed columns are + supported. Currently, it is not possible to check for such a + flag since @c this->ft_handler is not yet set when this function is + called. The current hack is to assume that search on non-indexed + columns are supported for engines that does not support the extended + fulltext API (e.g., MyISAM), while it is not supported for other + engines (e.g., InnoDB) + + @param table_arg Table for which storage engine to check + + @retval true if BOOLEAN search on non-indexed columns is supported + @retval false otherwise + */ + bool allows_search_on_non_indexed_columns(TABLE* table_arg) + { + // Only Boolean search may support non_indexed columns + if (!(match_flags & FT_BOOL)) + return false; + + DBUG_ASSERT(table_arg && table_arg->file); + + // Assume that if extended fulltext API is not supported, + // non-indexed columns are allowed. This will be true for MyISAM. + if ((table_arg->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0) + return true; + + return false; + } +}; + + +class Item_func_bit_xor : public Item_func_bit_operator +{ +public: + Item_func_bit_xor(THD *thd, Item *a, Item *b) + :Item_func_bit_operator(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("^") }; + return name; + } + enum precedence precedence() const override { return BITXOR_PRECEDENCE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_is_free_lock :public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_general_purpose_string(func_name_cstring()); } + String value; +public: + Item_func_is_free_lock(THD *thd, Item *a): Item_long_func(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("is_free_lock") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=1; + set_maybe_null(); + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_is_used_lock :public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_general_purpose_string(func_name_cstring()); } + String value; +public: + Item_func_is_used_lock(THD *thd, Item *a): Item_long_func(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("is_used_lock") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; max_length=10; + set_maybe_null(); + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +struct Lex_cast_type_st: public Lex_length_and_dec_st +{ +private: + const Type_handler *m_type_handler; + CHARSET_INFO *m_charset; +public: + void set(const Type_handler *handler, + Lex_length_and_dec_st length_and_dec, + CHARSET_INFO *cs= NULL) + { + m_type_handler= handler; + m_charset= cs; + Lex_length_and_dec_st::operator=(length_and_dec); + } + bool set(const Type_handler *handler, + const Lex_length_and_dec_st & length_and_dec, + const Lex_column_charset_collation_attrs_st &cscl, + CHARSET_INFO *defcs) + { + CHARSET_INFO *tmp= cscl.resolved_to_character_set(defcs); + if (!tmp) + return true; + set(handler, length_and_dec, tmp); + return false; + } + void set(const Type_handler *handler) + { + m_type_handler= handler; + m_charset= NULL; + Lex_length_and_dec_st::reset(); + } + const Type_handler *type_handler() const { return m_type_handler; } + CHARSET_INFO *charset() const { return m_charset; } + Item *create_typecast_item(THD *thd, Item *item) const + { + return m_type_handler-> + create_typecast_item(thd, item, Type_cast_attributes(*this, m_charset)); + } + Item *create_typecast_item_or_error(THD *thd, Item *item) const; +}; + + +class Item_func_row_count :public Item_longlong_func +{ +public: + Item_func_row_count(THD *thd): Item_longlong_func(thd) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("row_count") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals= 0; + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + * + * Stored FUNCTIONs + * + */ + +class Item_func_sp :public Item_func, + public Item_sp +{ +private: + const Sp_handler *m_handler; + + bool execute(); + +protected: + bool is_expensive_processor(void *arg) override + { return is_expensive(); } + + bool check_arguments() const override + { + // sp_prepare_func_item() checks that the number of columns is correct + return false; + } +public: + + Item_func_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name, const Sp_handler *sph); + + Item_func_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name, const Sp_handler *sph, List &list); + + virtual ~Item_func_sp() = default; + + void update_used_tables() override; + + void cleanup() override; + + LEX_CSTRING func_name_cstring() const override; + + const Type_handler *type_handler() const override; + + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override; + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) override + { + return result_type() != STRING_RESULT ? + sp_result_field : + create_table_field_from_handler(root, table); + } + void make_send_field(THD *thd, Send_field *tmp_field) override; + + longlong val_int() override + { + if (execute()) + return (longlong) 0; + return sp_result_field->val_int(); + } + + double val_real() override + { + if (execute()) + return 0.0; + return sp_result_field->val_real(); + } + + my_decimal *val_decimal(my_decimal *dec_buf) override + { + if (execute()) + return NULL; + return sp_result_field->val_decimal(dec_buf); + } + + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + if (execute()) + return true; + return sp_result_field->get_date(ltime, fuzzydate); + } + + String *val_str(String *str) override + { + String buf; + char buff[20]; + buf.set(buff, 20, str->charset()); + buf.length(0); + if (execute()) + return NULL; + /* + result_field will set buf pointing to internal buffer + of the resul_field. Due to this it will change any time + when SP is executed. In order to prevent occasional + corruption of returned value, we make here a copy. + */ + sp_result_field->val_str(&buf); + str->copy(buf); + return str; + } + + bool val_native(THD *thd, Native *to) override + { + if (execute()) + return true; + return (null_value= sp_result_field->val_native(to)); + } + + void update_null_value() override + { + execute(); + } + + bool change_context_processor(void *cntx) override + { context= (Name_resolution_context *)cntx; return FALSE; } + + enum Functype functype() const override { return FUNC_SP; } + + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override; + bool is_expensive() override; + + inline Field *get_sp_result_field() + { + return sp_result_field; + } + const sp_name *get_sp_name() const + { + return m_name; + } + + bool check_vcol_func_processor(void *arg) override; + bool limit_index_condition_pushdown_processor(void *opt_arg) override + { + return TRUE; + } + Item *get_copy(THD *) override { return 0; } + bool eval_not_null_tables(void *opt_arg) override + { + not_null_tables_cache= 0; + return 0; + } + bool excl_dep_on_grouping_fields(st_select_lex *sel) override + { return false; } + bool find_not_null_fields(table_map allowed) override + { + return false; + } +}; + + +class Item_func_found_rows :public Item_longlong_func +{ +public: + Item_func_found_rows(THD *thd): Item_longlong_func(thd) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("found_rows") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals= 0; + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_oracle_sql_rowcount :public Item_longlong_func +{ +public: + Item_func_oracle_sql_rowcount(THD *thd): Item_longlong_func(thd) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("SQL%ROWCOUNT") }; + return name; + } + void print(String *str, enum_query_type query_type) override + { + str->append(func_name_cstring()); + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_sqlcode: public Item_long_func +{ +public: + Item_func_sqlcode(THD *thd): Item_long_func(thd) { } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("SQLCODE") }; + return name; + } + void print(String *str, enum_query_type query_type) override + { + str->append(func_name_cstring()); + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + bool fix_length_and_dec(THD *thd) override + { + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= false; + max_length= 11; + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +void uuid_short_init(); +ulonglong server_uuid_value(); + +class Item_func_uuid_short :public Item_longlong_func +{ +public: + Item_func_uuid_short(THD *thd): Item_longlong_func(thd) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("uuid_short") }; + return name; + } + longlong val_int() override; + bool const_item() const override { return false; } + bool fix_length_and_dec(THD *thd) override + { max_length= 21; unsigned_flag=1; return FALSE; } + table_map used_tables() const override { return RAND_TABLE_BIT; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_NON_DETERMINISTIC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_last_value :public Item_func +{ +protected: + Item *last_value; +public: + Item_func_last_value(THD *thd, List &list): Item_func(thd, list) {} + double val_real() override; + longlong val_int() override; + String *val_str(String *) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool val_native(THD *thd, Native *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("last_value") }; + return name; + } + const Type_handler *type_handler() const override + { return last_value->type_handler(); } + bool eval_not_null_tables(void *) override + { + not_null_tables_cache= 0; + return 0; + } + bool find_not_null_fields(table_map allowed) override + { + return false; + } + bool const_item() const override { return 0; } + void evaluate_sideeffects(); + void update_used_tables() override + { + Item_func::update_used_tables(); + copy_flags(last_value, item_base_t::MAYBE_NULL); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Implementation for sequences: NEXT VALUE FOR sequence and NEXTVAL() */ + +class Item_func_nextval :public Item_longlong_func +{ +protected: + TABLE_LIST *table_list; + TABLE *table; +public: + Item_func_nextval(THD *thd, TABLE_LIST *table_list_arg): + Item_longlong_func(thd), table_list(table_list_arg) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("nextval") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + unsigned_flag= 0; + max_length= MAX_BIGINT_WIDTH; + set_maybe_null(); /* In case of errors */ + return FALSE; + } + /* + update_table() function must be called during the value function + as in case of DEFAULT the sequence table may not yet be open + while fix_fields() are called + */ + void update_table() + { + if (!(table= table_list->table)) + { + /* + If nextval was used in DEFAULT then next_local points to + the table_list used by to open the sequence table + */ + table= table_list->next_local->table; + } + } + bool const_item() const override { return 0; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + void print(String *str, enum_query_type query_type) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_NEXTVAL); + } +}; + + +/* Implementation for sequences: LASTVAL(sequence), PostgreSQL style */ + +class Item_func_lastval :public Item_func_nextval +{ +public: + Item_func_lastval(THD *thd, TABLE_LIST *table_list_arg): + Item_func_nextval(thd, table_list_arg) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("lastval") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Implementation for sequences: SETVAL(sequence), PostgreSQL style */ + +class Item_func_setval :public Item_func_nextval +{ + longlong nextval; + ulonglong round; + bool is_used; +public: + Item_func_setval(THD *thd, TABLE_LIST *table_list_arg, longlong nextval_arg, + ulonglong round_arg, bool is_used_arg) + : Item_func_nextval(thd, table_list_arg), + nextval(nextval_arg), round(round_arg), is_used(is_used_arg) + {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("setval") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +Item *get_system_var(THD *thd, enum_var_type var_type, + const LEX_CSTRING *name, const LEX_CSTRING *component); +extern bool check_reserved_words(const LEX_CSTRING *name); +double my_double_round(double value, longlong dec, bool dec_unsigned, + bool truncate); + +extern bool volatile mqh_used; + +bool update_hash(user_var_entry *entry, bool set_null, void *ptr, size_t length, + Item_result type, CHARSET_INFO *cs, + bool unsigned_arg); + +#endif /* ITEM_FUNC_INCLUDED */ diff --git a/sql/item_geofunc.cc b/sql/item_geofunc.cc new file mode 100644 index 00000000..8714417a --- /dev/null +++ b/sql/item_geofunc.cc @@ -0,0 +1,4082 @@ +/* Copyright (c) 2003, 2016, Oracle and/or its affiliates. + Copyright (c) 2011, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + This file defines all spatial functions +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // THD, set_var.h: THD +#include "set_var.h" +#ifdef HAVE_SPATIAL +#include +#include "opt_range.h" +#include "item_geofunc.h" +#include "item_create.h" + + +bool Item_geometry_func::fix_length_and_dec(THD *thd) +{ + collation.set(&my_charset_bin); + decimals=0; + max_length= (uint32) UINT_MAX32; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_geometry_from_text::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + Geometry_buffer buffer; + String arg_val; + String *wkt= args[0]->val_str_ascii(&arg_val); + + if ((null_value= args[0]->null_value)) + return 0; + + Gis_read_stream trs(wkt->charset(), wkt->ptr(), wkt->length()); + uint32 srid= 0; + + if ((arg_count == 2) && !args[1]->null_value) + srid= (uint32)args[1]->val_int(); + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + return 0; + str->q_append(srid); + if ((null_value= !Geometry::create_from_wkt(&buffer, &trs, str, 0))) + return 0; + return str; +} + + +String *Item_func_geometry_from_wkb::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *wkb; + Geometry_buffer buffer; + uint32 srid= 0; + + if (args[0]->field_type() == MYSQL_TYPE_GEOMETRY) + { + String *str_ret= args[0]->val_str(str); + null_value= args[0]->null_value; + return str_ret; + } + + wkb= args[0]->val_str(&arg_val); + + if ((arg_count == 2) && !args[1]->null_value) + srid= (uint32)args[1]->val_int(); + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + { + null_value= TRUE; /* purecov: inspected */ + return 0; /* purecov: inspected */ + } + str->q_append(srid); + if ((null_value= + (args[0]->null_value || + !Geometry::create_from_wkb(&buffer, wkb->ptr(), wkb->length(), str)))) + return 0; + return str; +} + + +String *Item_func_geometry_from_json::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + Geometry_buffer buffer; + String *js= args[0]->val_str_ascii(&tmp_js); + uint32 srid= 0; + longlong options= 0; + json_engine_t je; + + if ((null_value= args[0]->null_value)) + return 0; + + if (arg_count > 1 && !args[1]->null_value) + { + options= args[1]->val_int(); + if (options > 4 || options < 1) + { + String *sv= args[1]->val_str(&tmp_js); + my_error(ER_WRONG_VALUE_FOR_TYPE, MYF(0), + "option", sv->c_ptr_safe(), "ST_GeomFromGeoJSON"); + null_value= 1; + return 0; + } + } + + if ((arg_count == 3) && !args[2]->null_value) + srid= (uint32)args[2]->val_int(); + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + return 0; + str->q_append(srid); + + json_scan_start(&je, js->charset(), (const uchar *) js->ptr(), + (const uchar *) js->end()); + + if ((null_value= !Geometry::create_from_json(&buffer, &je, options==1, str))) + { + int code= 0; + + switch (je.s.error) + { + case Geometry::GEOJ_INCORRECT_GEOJSON: + code= ER_GEOJSON_INCORRECT; + break; + case Geometry::GEOJ_TOO_FEW_POINTS: + code= ER_GEOJSON_TOO_FEW_POINTS; + break; + case Geometry::GEOJ_EMPTY_COORDINATES: + code= ER_GEOJSON_EMPTY_COORDINATES; + break; + case Geometry::GEOJ_POLYGON_NOT_CLOSED: + code= ER_GEOJSON_NOT_CLOSED; + break; + case Geometry::GEOJ_DIMENSION_NOT_SUPPORTED: + my_error(ER_GIS_INVALID_DATA, MYF(0), "ST_GeomFromGeoJSON"); + break; + default: + report_json_error_ex(js->ptr(), &je, func_name(), 0, + Sql_condition::WARN_LEVEL_WARN); + return NULL; + } + + if (code) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, code, + ER_THD(thd, code)); + } + return 0; + } + return str; +} + + +String *Item_func_as_wkt::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + Geometry_buffer buffer; + Geometry *geom= NULL; + const char *dummy; + + if ((null_value= + (args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length()))))) + return 0; + + str->length(0); + str->set_charset(&my_charset_latin1); + if ((null_value= geom->as_wkt(str, &dummy))) + return 0; + + return str; +} + + +bool Item_func_as_wkt::fix_length_and_dec(THD *thd) +{ + collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + max_length= (uint32) UINT_MAX32; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_as_wkb::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + Geometry_buffer buffer; + + if ((null_value= + (args[0]->null_value || + !(Geometry::construct(&buffer, swkb->ptr(), swkb->length()))))) + return 0; + + str->copy(swkb->ptr() + SRID_SIZE, swkb->length() - SRID_SIZE, + &my_charset_bin); + return str; +} + + +bool Item_func_as_geojson::fix_length_and_dec(THD *thd) +{ + collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + max_length=MAX_BLOB_WIDTH; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_as_geojson::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + uint max_dec= FLOATING_POINT_DECIMALS; + longlong options= 0; + Geometry_buffer buffer; + Geometry *geom= NULL; + const char *dummy; + + if ((null_value= + (args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length()))))) + return 0; + + if (arg_count > 1) + { + max_dec= (uint) args[1]->val_int(); + if (args[1]->null_value) + max_dec= FLOATING_POINT_DECIMALS; + if (arg_count > 2) + { + options= args[2]->val_int(); + if (args[2]->null_value) + options= 0; + } + } + + str->length(0); + str->set_charset(&my_charset_latin1); + + if (str->reserve(1, 512)) + return 0; + + str->qs_append('{'); + + if (options & 1) + { + if (geom->bbox_as_json(str) || str->append(", ", 2)) + goto error; + } + + if ((geom->as_json(str, max_dec, &dummy) || str->append('}'))) + goto error; + + return str; + +error: + null_value= 1; + return 0; +} + + +String *Item_func_geometry_type::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + String *swkb= args[0]->val_str(str); + Geometry_buffer buffer; + Geometry *geom= NULL; + + if ((null_value= + (args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length()))))) + return 0; + /* String will not move */ + str->copy(geom->get_class_info()->m_name.str, + geom->get_class_info()->m_name.length, + &my_charset_latin1); + return str; +} + + +String *Item_func_envelope::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + Geometry_buffer buffer; + Geometry *geom= NULL; + uint32 srid; + + if ((null_value= + args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())))) + return 0; + + srid= uint4korr(swkb->ptr()); + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + return 0; + str->q_append(srid); + return (null_value= geom->envelope(str)) ? 0 : str; +} + + +int Item_func_boundary::Transporter::single_point(double x, double y) +{ + return 0; +} + + +int Item_func_boundary::Transporter::start_line() +{ + n_points= 0; + current_type= Gcalc_function::shape_line; + return 0; +} + + +int Item_func_boundary::Transporter::complete_line() +{ + current_type= (Gcalc_function::shape_type) 0; + if (n_points > 1) + return m_receiver->single_point(last_x, last_y); + return 0; +} + + +int Item_func_boundary::Transporter::start_poly() +{ + current_type= Gcalc_function::shape_polygon; + return 0; +} + + +int Item_func_boundary::Transporter::complete_poly() +{ + current_type= (Gcalc_function::shape_type) 0; + return 0; +} + + +int Item_func_boundary::Transporter::start_ring() +{ + n_points= 0; + return m_receiver->start_shape(Gcalc_function::shape_line); +} + + +int Item_func_boundary::Transporter::complete_ring() +{ + if (n_points > 1) + { + m_receiver->add_point(last_x, last_y); + } + m_receiver->complete_shape(); + return 0; +} + + +int Item_func_boundary::Transporter::add_point(double x, double y) +{ + ++n_points; + if (current_type== Gcalc_function::shape_polygon) + { + /* Polygon's ring case */ + if (n_points == 1) + { + last_x= x; + last_y= y; + } + return m_receiver->add_point(x, y); + } + + if (current_type== Gcalc_function::shape_line) + { + /* Line's case */ + last_x= x; + last_y= y; + if (n_points == 1) + return m_receiver->single_point(x, y); + } + return 0; +} + + +int Item_func_boundary::Transporter::start_collection(int n_objects) +{ + return 0; +} + + +String *Item_func_boundary::val_str(String *str_value) +{ + DBUG_ENTER("Item_func_boundary::val_str"); + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + + if ((null_value= args[0]->null_value)) + DBUG_RETURN(0); + + Geometry_buffer buffer; + uint32 srid= 0; + Transporter trn(&res_receiver); + + Geometry *g= Geometry::construct(&buffer, swkb->ptr(), swkb->length()); + if (!g) + DBUG_RETURN(0); + + if (g->store_shapes(&trn)) + goto mem_error; + + str_value->set_charset(&my_charset_bin); + str_value->length(0); + if (str_value->reserve(SRID_SIZE, 512)) + goto mem_error; + str_value->q_append(srid); + + if (!Geometry::create_from_opresult(&buffer, str_value, res_receiver)) + goto mem_error; + + res_receiver.reset(); + DBUG_RETURN(str_value); + +mem_error: + null_value= 1; + DBUG_RETURN(0); +} + + +String *Item_func_centroid::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + Geometry_buffer buffer; + Geometry *geom= NULL; + uint32 srid; + + if ((null_value= args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())))) + return 0; + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + return 0; + srid= uint4korr(swkb->ptr()); + str->q_append(srid); + + return (null_value= MY_TEST(geom->centroid(str))) ? 0 : str; +} + + +int Item_func_convexhull::add_node_to_line(ch_node **p_cur, int dir, + const Gcalc_heap::Info *pi) +{ + ch_node *new_node; + ch_node *cur= *p_cur; + + while (cur->prev) + { + int v_sign= Gcalc_scan_iterator::point::cmp_dx_dy( + cur->prev->pi, cur->pi, cur->pi, pi); + if (v_sign*dir <0) + break; + new_node= cur; + cur= cur->prev; + res_heap.free_item(new_node); + } + if (!(new_node= new_ch_node())) + return 1; + cur->next= new_node; + new_node->prev= cur; + new_node->pi= pi; + *p_cur= new_node; + return 0; +} + + +#ifndef HEAVY_CONVEX_HULL +String *Item_func_convexhull::val_str(String *str_value) +{ + Geometry_buffer buffer; + Geometry *geom= NULL; + MBR mbr; + const char *c_end; + Gcalc_operation_transporter trn(&func, &collector); + uint32 srid= 0; + ch_node *left_first, *left_cur, *right_first, *right_cur; + Gcalc_heap::Info *cur_pi; + + DBUG_ENTER("Item_func_convexhull::val_str"); + DBUG_ASSERT(fixed()); + String *swkb= args[0]->val_str(&tmp_value); + + if ((null_value= + args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())))) + DBUG_RETURN(0); + + geom->get_mbr(&mbr, &c_end); + collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax); + if ((null_value= geom->store_shapes(&trn))) + { + str_value= 0; + goto mem_error; + } + + collector.prepare_operation(); + if (!(cur_pi= collector.get_first())) + goto build_result; /* An EMPTY GEOMETRY */ + + if (!cur_pi->get_next()) + { + /* Single point. */ + if (res_receiver.single_point(cur_pi->node.shape.x, cur_pi->node.shape.y)) + goto mem_error; + goto build_result; + } + + left_cur= left_first= new_ch_node(); + right_cur= right_first= new_ch_node(); + right_first->prev= left_first->prev= 0; + right_first->pi= left_first->pi= cur_pi; + + while ((cur_pi= cur_pi->get_next())) + { + /* Handle left part of the hull, then the right part. */ + if (add_node_to_line(&left_cur, 1, cur_pi)) + goto mem_error; + if (add_node_to_line(&right_cur, -1, cur_pi)) + goto mem_error; + } + + left_cur->next= 0; + if (left_first->get_next()->get_next() == NULL && + right_cur->prev->prev == NULL) + { + /* We only have 2 nodes in the result, so we create a polyline. */ + if (res_receiver.start_shape(Gcalc_function::shape_line) || + res_receiver.add_point(left_first->pi->node.shape.x, left_first->pi->node.shape.y) || + res_receiver.add_point(left_cur->pi->node.shape.x, left_cur->pi->node.shape.y) || + res_receiver.complete_shape()) + + goto mem_error; + + goto build_result; + } + + if (res_receiver.start_shape(Gcalc_function::shape_polygon)) + goto mem_error; + + while (left_first) + { + if (res_receiver.add_point(left_first->pi->node.shape.x, left_first->pi->node.shape.y)) + goto mem_error; + left_first= left_first->get_next(); + } + + /* Skip last point in the right part as it coincides */ + /* with the last one in the left. */ + right_cur= right_cur->prev; + while (right_cur->prev) + { + if (res_receiver.add_point(right_cur->pi->node.shape.x, right_cur->pi->node.shape.y)) + goto mem_error; + right_cur= right_cur->prev; + } + res_receiver.complete_shape(); + +build_result: + str_value->set_charset(&my_charset_bin); + str_value->length(0); + if (str_value->reserve(SRID_SIZE, 512)) + goto mem_error; + str_value->q_append(srid); + + if (!Geometry::create_from_opresult(&buffer, str_value, res_receiver)) + goto mem_error; + +mem_error: + collector.reset(); + func.reset(); + res_receiver.reset(); + res_heap.reset(); + DBUG_RETURN(str_value); +} + +#else /*HEAVY_CONVEX_HULL*/ +String *Item_func_convexhull::val_str(String *str_value) +{ + Geometry_buffer buffer; + Geometry *geom= NULL; + MBR mbr; + const char *c_end; + Gcalc_operation_transporter trn(&func, &collector); + const Gcalc_scan_iterator::event_point *ev; + uint32 srid= 0; + ch_node *left_first, *left_cur, *right_first, *right_cur; + + DBUG_ENTER("Item_func_convexhull::val_str"); + DBUG_ASSERT(fixed()); + String *swkb= args[0]->val_str(&tmp_value); + + if ((null_value= + args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())))) + DBUG_RETURN(0); + + geom->get_mbr(&mbr, &c_end); + collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax); + if ((null_value= geom->store_shapes(&trn))) + { + str_value= 0; + goto mem_error; + } + + collector.prepare_operation(); + scan_it.init(&collector); + scan_it.killed= (int *) &(current_thd->killed); + + if (!scan_it.more_points()) + goto build_result; /* An EMPTY GEOMETRY */ + + if (scan_it.step()) + goto mem_error; + + if (!scan_it.more_points()) + { + /* Single point. */ + if (res_receiver.single_point(scan_it.get_events()->pi->x, + scan_it.get_events()->pi->y)) + goto mem_error; + goto build_result; + } + + left_cur= left_first= new_ch_node(); + right_cur= right_first= new_ch_node(); + right_first->prev= left_first->prev= 0; + right_first->pi= left_first->pi= scan_it.get_events()->pi; + + while (scan_it.more_points()) + { + if (scan_it.step()) + goto mem_error; + ev= scan_it.get_events(); + + /* Skip the intersections-only events. */ + while (ev->event == scev_intersection) + { + ev= ev->get_next(); + if (!ev) + goto skip_point; + } + + { + Gcalc_point_iterator pit(&scan_it); + if (!pit.point() || scan_it.get_event_position() == pit.point()) + { + /* Handle left part of the hull. */ + if (add_node_to_line(&left_cur, 1, ev->pi)) + goto mem_error; + } + if (pit.point()) + { + /* Check the rightmost point */ + for(; pit.point()->c_get_next(); ++pit) + ; + } + if (!pit.point() || pit.point()->event || + scan_it.get_event_position() == pit.point()->c_get_next()) + { + /* Handle right part of the hull. */ + if (add_node_to_line(&right_cur, -1, ev->pi)) + goto mem_error; + } + } +skip_point:; + } + + left_cur->next= 0; + if (left_first->get_next()->get_next() == NULL && + right_cur->prev->prev == NULL) + { + /* We only have 2 nodes in the result, so we create a polyline. */ + if (res_receiver.start_shape(Gcalc_function::shape_line) || + res_receiver.add_point(left_first->pi->x, left_first->pi->y) || + res_receiver.add_point(left_cur->pi->x, left_cur->pi->y) || + res_receiver.complete_shape()) + + goto mem_error; + + goto build_result; + } + + if (res_receiver.start_shape(Gcalc_function::shape_polygon)) + goto mem_error; + + while (left_first) + { + if (res_receiver.add_point(left_first->pi->x, left_first->pi->y)) + goto mem_error; + left_first= left_first->get_next(); + } + + /* Skip last point in the right part as it coincides */ + /* with the last one in the left. */ + right_cur= right_cur->prev; + while (right_cur->prev) + { + if (res_receiver.add_point(right_cur->pi->x, right_cur->pi->y)) + goto mem_error; + right_cur= right_cur->prev; + } + res_receiver.complete_shape(); + +build_result: + str_value->set_charset(&my_charset_bin); + str_value->length(0); + if (str_value->reserve(SRID_SIZE, 512)) + goto mem_error; + str_value->q_append(srid); + + if (!Geometry::create_from_opresult(&buffer, str_value, res_receiver)) + goto mem_error; + +mem_error: + collector.reset(); + func.reset(); + res_receiver.reset(); + res_heap.reset(); + DBUG_RETURN(str_value); +} +#endif /*HEAVY_CONVEX_HULL*/ + + +/* + Spatial decomposition functions +*/ + +String *Item_func_spatial_decomp::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + Geometry_buffer buffer; + Geometry *geom= NULL; + uint32 srid; + + if ((null_value= + (args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length()))))) + return 0; + + srid= uint4korr(swkb->ptr()); + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + goto err; + str->q_append(srid); + switch (decomp_func) { + case SP_STARTPOINT: + if (geom->start_point(str)) + goto err; + break; + + case SP_ENDPOINT: + if (geom->end_point(str)) + goto err; + break; + + case SP_EXTERIORRING: + if (geom->exterior_ring(str)) + goto err; + break; + + default: + goto err; + } + return str; + +err: + null_value= 1; + return 0; +} + + +String *Item_func_spatial_decomp_n::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_val; + String *swkb= args[0]->val_str(&arg_val); + long n= (long) args[1]->val_int(); + Geometry_buffer buffer; + Geometry *geom= NULL; + uint32 srid; + + if ((null_value= + (args[0]->null_value || args[1]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length()))))) + return 0; + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + goto err; + srid= uint4korr(swkb->ptr()); + str->q_append(srid); + switch (decomp_func_n) + { + case SP_POINTN: + if (geom->point_n(n,str)) + goto err; + break; + + case SP_GEOMETRYN: + if (geom->geometry_n(n,str)) + goto err; + break; + + case SP_INTERIORRINGN: + if (geom->interior_ring_n(n,str)) + goto err; + break; + + default: + goto err; + } + return str; + +err: + null_value=1; + return 0; +} + + +/* + Functions to concatenate various spatial objects +*/ + + +/* +* Concatenate doubles into Point +*/ + + +String *Item_func_point::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + double x= args[0]->val_real(); + double y= args[1]->val_real(); + uint32 srid= 0; + + if ((null_value= (args[0]->null_value || + args[1]->null_value || + str->alloc(4/*SRID*/ + 1 + 4 + SIZEOF_STORED_DOUBLE * 2)))) + return 0; + + str->set_charset(&my_charset_bin); + str->length(0); + str->q_append(srid); + str->q_append((char)Geometry::wkb_ndr); + str->q_append((uint32)Geometry::wkb_point); + str->q_append(x); + str->q_append(y); + return str; +} + + +/** + Concatenates various items into various collections + with checkings for valid wkb type of items. + For example, MultiPoint can be a collection of Points only. + coll_type contains wkb type of target collection. + item_type contains a valid wkb type of items. + In the case when coll_type is wkbGeometryCollection, + we do not check wkb type of items, any is valid. +*/ + +String *Item_func_spatial_collection::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String arg_value; + uint i; + uint32 srid= 0; + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(4/*SRID*/ + 1 + 4 + 4, 512)) + goto err; + + str->q_append(srid); + str->q_append((char) Geometry::wkb_ndr); + str->q_append((uint32) coll_type); + str->q_append((uint32) arg_count); + + for (i= 0; i < arg_count; ++i) + { + String *res= args[i]->val_str(&arg_value); + uint32 len; + if (args[i]->null_value || ((len= res->length()) < WKB_HEADER_SIZE)) + goto err; + + if (coll_type == Geometry::wkb_geometrycollection) + { + /* + In the case of GeometryCollection we don't need any checkings + for item types, so just copy them into target collection + */ + if (str->append(res->ptr() + 4/*SRID*/, len - 4/*SRID*/, (uint32) 512)) + goto err; + } + else + { + enum Geometry::wkbType wkb_type; + const uint data_offset= 4/*SRID*/ + 1; + if (res->length() < data_offset + sizeof(uint32)) + goto err; + const char *data= res->ptr() + data_offset; + + /* + In the case of named collection we must check that items + are of specific type, let's do this checking now + */ + + wkb_type= (Geometry::wkbType) uint4korr(data); + data+= 4; + len-= 5 + 4/*SRID*/; + if (wkb_type != item_type) + goto err; + + switch (coll_type) { + case Geometry::wkb_multipoint: + case Geometry::wkb_multilinestring: + case Geometry::wkb_multipolygon: + if (len < WKB_HEADER_SIZE || + str->append(data-WKB_HEADER_SIZE, len+WKB_HEADER_SIZE, 512)) + goto err; + break; + + case Geometry::wkb_linestring: + if (len < POINT_DATA_SIZE || str->append(data, POINT_DATA_SIZE, 512)) + goto err; + break; + case Geometry::wkb_polygon: + { + uint32 n_points; + double x1, y1, x2, y2; + const char *org_data= data; + + if (len < 4) + goto err; + + n_points= uint4korr(data); + data+= 4; + + if (n_points < 2 || len < 4 + n_points * POINT_DATA_SIZE) + goto err; + + float8get(x1, data); + data+= SIZEOF_STORED_DOUBLE; + float8get(y1, data); + data+= SIZEOF_STORED_DOUBLE; + + data+= (n_points - 2) * POINT_DATA_SIZE; + + float8get(x2, data); + float8get(y2, data + SIZEOF_STORED_DOUBLE); + + if ((x1 != x2) || (y1 != y2) || + str->append(org_data, len, 512)) + goto err; + } + break; + + default: + goto err; + } + } + } + if (str->length() > current_thd->variables.max_allowed_packet) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), thd->variables.max_allowed_packet); + goto err; + } + + null_value = 0; + return str; + +err: + null_value= 1; + return 0; +} + + +/* + Functions for spatial relations +*/ + +static SEL_ARG sel_arg_impossible(SEL_ARG::IMPOSSIBLE); + +SEL_ARG * +Item_func_spatial_rel::get_mm_leaf(RANGE_OPT_PARAM *param, + Field *field, KEY_PART *key_part, + Item_func::Functype type, Item *value) +{ + DBUG_ENTER("Item_func_spatial_rel::get_mm_leaf"); + if (key_part->image_type != Field::itMBR) + DBUG_RETURN(0); + if (value->cmp_type() != STRING_RESULT) + DBUG_RETURN(&sel_arg_impossible); + + if (param->using_real_indexes && + !field->optimize_range(param->real_keynr[key_part->key], + key_part->part)) + DBUG_RETURN(0); + + Field_geom *field_geom= dynamic_cast(field); + DBUG_ASSERT(field_geom); + const Type_handler_geometry *sav_geom_type= field_geom->type_handler_geom(); + // We have to be able to store all sorts of spatial features here + field_geom->set_type_handler(&type_handler_geometry); + bool rc= value->save_in_field_no_warnings(field, 1); + field_geom->set_type_handler(sav_geom_type); + + if (rc) + DBUG_RETURN(&sel_arg_impossible); // Bad GEOMETRY value + + DBUG_ASSERT(!field->real_maybe_null()); // SPATIAL keys do not support NULL + + uchar *str= (uchar*) alloc_root(param->mem_root, key_part->store_length + 1); + if (!str) + DBUG_RETURN(0); // out of memory + field->get_key_image(str, key_part->length, key_part->image_type); + SEL_ARG *tree; + + if (!(tree= new (param->mem_root) SEL_ARG(field, str, str))) + DBUG_RETURN(0); // out of memory + + switch (type) { + case SP_EQUALS_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_DISJOINT_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_INTERSECTS_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_TOUCHES_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_CROSSES_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_WITHIN_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_CONTAINS_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + case SP_OVERLAPS_FUNC: + tree->min_flag= GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; + tree->max_flag= NO_MAX_RANGE; + break; + default: + DBUG_ASSERT(0); + break; + } + DBUG_RETURN(tree); +} + + +LEX_CSTRING Item_func_spatial_mbr_rel::func_name_cstring() const +{ + switch (spatial_rel) { + case SP_CONTAINS_FUNC: + return { STRING_WITH_LEN("mbrcontains") }; + case SP_WITHIN_FUNC: + return { STRING_WITH_LEN("mbrwithin") } ; + case SP_EQUALS_FUNC: + return { STRING_WITH_LEN("mbrequals") }; + case SP_DISJOINT_FUNC: + return { STRING_WITH_LEN("mbrdisjoint") }; + case SP_INTERSECTS_FUNC: + return { STRING_WITH_LEN("mbrintersects") }; + case SP_TOUCHES_FUNC: + return { STRING_WITH_LEN("mbrtouches") }; + case SP_CROSSES_FUNC: + return { STRING_WITH_LEN("mbrcrosses") }; + case SP_OVERLAPS_FUNC: + return { STRING_WITH_LEN("mbroverlaps") }; + default: + DBUG_ASSERT(0); // Should never happened + return { STRING_WITH_LEN("mbrsp_unknown") }; + } +} + + +longlong Item_func_spatial_mbr_rel::val_int() +{ + DBUG_ASSERT(fixed()); + String *res1= args[0]->val_str(&tmp_value1); + String *res2= args[1]->val_str(&tmp_value2); + Geometry_buffer buffer1, buffer2; + Geometry *g1, *g2; + MBR mbr1, mbr2; + const char *dummy; + + if ((null_value= + (args[0]->null_value || + args[1]->null_value || + !(g1= Geometry::construct(&buffer1, res1->ptr(), res1->length())) || + !(g2= Geometry::construct(&buffer2, res2->ptr(), res2->length())) || + g1->get_mbr(&mbr1, &dummy) || !mbr1.valid() || + g2->get_mbr(&mbr2, &dummy) || !mbr2.valid()))) + return 0; + + switch (spatial_rel) { + case SP_CONTAINS_FUNC: + return mbr1.contains(&mbr2); + case SP_WITHIN_FUNC: + return mbr1.within(&mbr2); + case SP_EQUALS_FUNC: + return mbr1.equals(&mbr2); + case SP_DISJOINT_FUNC: + return mbr1.disjoint(&mbr2); + case SP_INTERSECTS_FUNC: + return mbr1.intersects(&mbr2); + case SP_TOUCHES_FUNC: + return mbr1.touches(&mbr2); + case SP_OVERLAPS_FUNC: + return mbr1.overlaps(&mbr2); + case SP_CROSSES_FUNC: + return 0; + default: + break; + } + + null_value=1; + return 0; +} + + +LEX_CSTRING Item_func_spatial_precise_rel::func_name_cstring() const +{ + switch (spatial_rel) { + case SP_CONTAINS_FUNC: + return { STRING_WITH_LEN("st_contains") }; + case SP_WITHIN_FUNC: + return { STRING_WITH_LEN("st_within") }; + case SP_EQUALS_FUNC: + return { STRING_WITH_LEN("st_equals") }; + case SP_DISJOINT_FUNC: + return { STRING_WITH_LEN("st_disjoint") }; + case SP_INTERSECTS_FUNC: + return { STRING_WITH_LEN("st_intersects") }; + case SP_TOUCHES_FUNC: + return { STRING_WITH_LEN("st_touches") }; + case SP_CROSSES_FUNC: + return { STRING_WITH_LEN("st_crosses") }; + case SP_OVERLAPS_FUNC: + return { STRING_WITH_LEN("st_overlaps") } ; + default: + DBUG_ASSERT(0); // Should never happened + return { STRING_WITH_LEN("sp_unknown") }; + } +} + + +static double count_edge_t(const Gcalc_heap::Info *ea, + const Gcalc_heap::Info *eb, + const Gcalc_heap::Info *v, + double &ex, double &ey, double &vx, double &vy, + double &e_sqrlen) +{ + ex= eb->node.shape.x - ea->node.shape.x; + ey= eb->node.shape.y - ea->node.shape.y; + vx= v->node.shape.x - ea->node.shape.x; + vy= v->node.shape.y - ea->node.shape.y; + e_sqrlen= ex * ex + ey * ey; + return (ex * vx + ey * vy) / e_sqrlen; +} + + +static double distance_to_line(double ex, double ey, double vx, double vy, + double e_sqrlen) +{ + return fabs(vx * ey - vy * ex) / sqrt(e_sqrlen); +} + + +static double distance_points(const Gcalc_heap::Info *a, + const Gcalc_heap::Info *b) +{ + double x= a->node.shape.x - b->node.shape.x; + double y= a->node.shape.y - b->node.shape.y; + return sqrt(x * x + y * y); +} + + +static Gcalc_function::op_type op_matrix(int n) +{ + switch (n) + { + case 0: + return Gcalc_function::op_internals; + case 1: + return Gcalc_function::op_border; + case 2: + return (Gcalc_function::op_type) + ((int) Gcalc_function::op_not | (int) Gcalc_function::op_union); + }; + GCALC_DBUG_ASSERT(FALSE); + return Gcalc_function::op_any; +} + + +static int setup_relate_func(Geometry *g1, Geometry *g2, + Gcalc_operation_transporter *trn, Gcalc_function *func, + const char *mask) +{ + int do_store_shapes=1; + uint UNINIT_VAR(shape_a), UNINIT_VAR(shape_b); + uint n_operands= 0; + int last_shape_pos; + + last_shape_pos= func->get_next_expression_pos(); + if (func->reserve_op_buffer(1)) + return 1; + func->add_operation(Gcalc_function::op_intersection, 0); + for (int nc=0; nc<9; nc++) + { + uint cur_op; + + cur_op= Gcalc_function::op_intersection; + switch (mask[nc]) + { + case '*': + continue; + case 'T': + case '0': + case '1': + case '2': + cur_op|= Gcalc_function::v_find_t; + break; + case 'F': + cur_op|= (Gcalc_function::op_not | Gcalc_function::v_find_f); + break; + default: + return 1; + }; + ++n_operands; + if (func->reserve_op_buffer(3)) + return 1; + func->add_operation(cur_op, 2); + + func->add_operation(op_matrix(nc/3), 1); + if (do_store_shapes) + { + shape_a= func->get_next_expression_pos(); + if (g1->store_shapes(trn)) + return 1; + } + else + func->repeat_expression(shape_a); + if (func->reserve_op_buffer(1)) + return 1; + func->add_operation(op_matrix(nc%3), 1); + if (do_store_shapes) + { + shape_b= func->get_next_expression_pos(); + if (g2->store_shapes(trn)) + return 1; + do_store_shapes= 0; + } + else + func->repeat_expression(shape_b); + } + + func->add_operands_to_op(last_shape_pos, n_operands); + return 0; +} + + +#define GIS_ZERO 0.00000000001 + +class Geometry_ptr_with_buffer_and_mbr +{ +public: + Geometry *geom; + Geometry_buffer buffer; + MBR mbr; + bool construct(Item *item, String *tmp_value) + { + const char *c_end; + String *res= item->val_str(tmp_value); + return + item->null_value || + !(geom= Geometry::construct(&buffer, res->ptr(), res->length())) || + geom->get_mbr(&mbr, &c_end) || !mbr.valid(); + } + int store_shapes(Gcalc_shape_transporter *trn) const + { return geom->store_shapes(trn); } +}; + + +longlong Item_func_spatial_relate::val_int() +{ + DBUG_ENTER("Item_func_spatial_relate::val_int"); + DBUG_ASSERT(fixed()); + Geometry_ptr_with_buffer_and_mbr g1, g2; + int result= 0; + + if ((null_value= (g1.construct(args[0], &tmp_value1) || + g2.construct(args[1], &tmp_value2) || + func.reserve_op_buffer(1)))) + DBUG_RETURN(0); + + MBR umbr(g1.mbr, g2.mbr); + collector.set_extent(umbr.xmin, umbr.xmax, umbr.ymin, umbr.ymax); + g1.mbr.buffer(1e-5); + Gcalc_operation_transporter trn(&func, &collector); + + String *matrix= args[2]->val_str(&tmp_matrix); + if ((null_value= args[2]->null_value || matrix->length() != 9 || + setup_relate_func(g1.geom, g2.geom, + &trn, &func, matrix->ptr()))) + goto exit; + + collector.prepare_operation(); + scan_it.init(&collector); + scan_it.killed= (int *) &(current_thd->killed); + if (!func.alloc_states()) + result= func.check_function(scan_it); + +exit: + collector.reset(); + func.reset(); + scan_it.reset(); + DBUG_RETURN(result); +} + + +longlong Item_func_spatial_precise_rel::val_int() +{ + DBUG_ENTER("Item_func_spatial_precise_rel::val_int"); + DBUG_ASSERT(fixed()); + Geometry_ptr_with_buffer_and_mbr g1, g2; + int result= 0; + uint shape_a, shape_b; + + if ((null_value= (g1.construct(args[0], &tmp_value1) || + g2.construct(args[1], &tmp_value2) || + func.reserve_op_buffer(1)))) + DBUG_RETURN(0); + + Gcalc_operation_transporter trn(&func, &collector); + + MBR umbr(g1.mbr, g2.mbr); + collector.set_extent(umbr.xmin, umbr.xmax, umbr.ymin, umbr.ymax); + + g1.mbr.buffer(1e-5); + + switch (spatial_rel) { + case SP_CONTAINS_FUNC: + if (!g1.mbr.contains(&g2.mbr)) + goto exit; + func.add_operation(Gcalc_function::v_find_f | + Gcalc_function::op_not | + Gcalc_function::op_difference, 2); + /* Mind the g2 goes first. */ + null_value= g2.store_shapes(&trn) || g1.store_shapes(&trn); + break; + case SP_WITHIN_FUNC: + g2.mbr.buffer(2e-5); + if (!g1.mbr.within(&g2.mbr)) + goto exit; + func.add_operation(Gcalc_function::v_find_f | + Gcalc_function::op_not | + Gcalc_function::op_difference, 2); + null_value= g1.store_shapes(&trn) || g2.store_shapes(&trn); + break; + case SP_EQUALS_FUNC: + if (!g1.mbr.contains(&g2.mbr)) + goto exit; + func.add_operation(Gcalc_function::v_find_f | + Gcalc_function::op_not | + Gcalc_function::op_symdifference, 2); + null_value= g1.store_shapes(&trn) || g2.store_shapes(&trn); + break; + case SP_DISJOINT_FUNC: + func.add_operation(Gcalc_function::v_find_f | + Gcalc_function::op_not | + Gcalc_function::op_intersection, 2); + null_value= g1.store_shapes(&trn) || g2.store_shapes(&trn); + break; + case SP_INTERSECTS_FUNC: + if (!g1.mbr.intersects(&g2.mbr)) + goto exit; + func.add_operation(Gcalc_function::v_find_t | + Gcalc_function::op_intersection, 2); + null_value= g1.store_shapes(&trn) || g2.store_shapes(&trn); + break; + case SP_OVERLAPS_FUNC: + case SP_CROSSES_FUNC: + func.add_operation(Gcalc_function::op_intersection, 2); + if (func.reserve_op_buffer(3)) + break; + func.add_operation(Gcalc_function::v_find_t | + Gcalc_function::op_intersection, 2); + shape_a= func.get_next_expression_pos(); + if ((null_value= g1.store_shapes(&trn))) + break; + shape_b= func.get_next_expression_pos(); + if ((null_value= g2.store_shapes(&trn))) + break; + if (func.reserve_op_buffer(7)) + break; + func.add_operation(Gcalc_function::op_intersection, 2); + func.add_operation(Gcalc_function::v_find_t | + Gcalc_function::op_difference, 2); + func.repeat_expression(shape_a); + func.repeat_expression(shape_b); + func.add_operation(Gcalc_function::v_find_t | + Gcalc_function::op_difference, 2); + func.repeat_expression(shape_b); + func.repeat_expression(shape_a); + break; + case SP_TOUCHES_FUNC: + if (func.reserve_op_buffer(5)) + break; + func.add_operation(Gcalc_function::op_intersection, 2); + func.add_operation(Gcalc_function::v_find_f | + Gcalc_function::op_not | + Gcalc_function::op_intersection, 2); + func.add_operation(Gcalc_function::op_internals, 1); + shape_a= func.get_next_expression_pos(); + if ((null_value= g1.store_shapes(&trn)) || + func.reserve_op_buffer(1)) + break; + func.add_operation(Gcalc_function::op_internals, 1); + shape_b= func.get_next_expression_pos(); + if ((null_value= g2.store_shapes(&trn)) || + func.reserve_op_buffer(1)) + break; + func.add_operation(Gcalc_function::v_find_t | + Gcalc_function::op_intersection, 2); + func.repeat_expression(shape_a); + func.repeat_expression(shape_b); + break; + default: + DBUG_ASSERT(FALSE); + break; + } + + if (null_value) + goto exit; + + collector.prepare_operation(); + scan_it.init(&collector); + scan_it.killed= (int *) &(current_thd->killed); + + if (func.alloc_states()) + goto exit; + + result= func.check_function(scan_it); + +exit: + collector.reset(); + func.reset(); + scan_it.reset(); + DBUG_RETURN(result); +} + + +Item_func_spatial_operation::~Item_func_spatial_operation() = default; + + +String *Item_func_spatial_operation::val_str(String *str_value) +{ + DBUG_ENTER("Item_func_spatial_operation::val_str"); + DBUG_ASSERT(fixed()); + Geometry_ptr_with_buffer_and_mbr g1, g2; + uint32 srid= 0; + Gcalc_operation_transporter trn(&func, &collector); + + if (func.reserve_op_buffer(1)) + DBUG_RETURN(0); + func.add_operation(spatial_op, 2); + + if ((null_value= (g1.construct(args[0], &tmp_value1) || + g2.construct(args[1], &tmp_value2)))) + { + str_value= 0; + goto exit; + } + + g1.mbr.add_mbr(&g2.mbr); + collector.set_extent(g1.mbr.xmin, g1.mbr.xmax, g1.mbr.ymin, g1.mbr.ymax); + + if ((null_value= g1.store_shapes(&trn) || g2.store_shapes(&trn))) + { + str_value= 0; + goto exit; + } + + collector.prepare_operation(); + if (func.alloc_states()) + goto exit; + + operation.init(&func); + + if (operation.count_all(&collector) || + operation.get_result(&res_receiver)) + goto exit; + + + str_value->set_charset(&my_charset_bin); + str_value->length(0); + if (str_value->reserve(SRID_SIZE, 512)) + goto exit; + str_value->q_append(srid); + + if (!Geometry::create_from_opresult(&g1.buffer, str_value, res_receiver)) + goto exit; + +exit: + collector.reset(); + func.reset(); + res_receiver.reset(); + DBUG_RETURN(str_value); +} + + +LEX_CSTRING Item_func_spatial_operation::func_name_cstring() const +{ + switch (spatial_op) { + case Gcalc_function::op_intersection: + return { STRING_WITH_LEN("st_intersection") }; + case Gcalc_function::op_difference: + return { STRING_WITH_LEN("st_difference") }; + case Gcalc_function::op_union: + return { STRING_WITH_LEN("st_union") }; + case Gcalc_function::op_symdifference: + return { STRING_WITH_LEN("st_symdifference") }; + default: + DBUG_ASSERT(0); // Should never happen + return { STRING_WITH_LEN("sp_unknown") }; + } +} + + +static const int SINUSES_CALCULATED= 32; +static double n_sinus[SINUSES_CALCULATED+1]= +{ + 0, + 0.04906767432741802, + 0.0980171403295606, + 0.1467304744553618, + 0.1950903220161283, + 0.2429801799032639, + 0.2902846772544623, + 0.3368898533922201, + 0.3826834323650898, + 0.4275550934302821, + 0.4713967368259976, + 0.5141027441932217, + 0.5555702330196022, + 0.5956993044924334, + 0.6343932841636455, + 0.6715589548470183, + 0.7071067811865475, + 0.7409511253549591, + 0.773010453362737, + 0.8032075314806448, + 0.8314696123025452, + 0.8577286100002721, + 0.8819212643483549, + 0.9039892931234433, + 0.9238795325112867, + 0.9415440651830208, + 0.9569403357322089, + 0.970031253194544, + 0.9807852804032304, + 0.989176509964781, + 0.9951847266721968, + 0.9987954562051724, + 1 +}; + + +static void get_n_sincos(int n, double *sinus, double *cosinus) +{ + DBUG_ASSERT(n > 0 && n < SINUSES_CALCULATED*2+1); + if (n < (SINUSES_CALCULATED + 1)) + { + *sinus= n_sinus[n]; + *cosinus= n_sinus[SINUSES_CALCULATED - n]; + } + else + { + n-= SINUSES_CALCULATED; + *sinus= n_sinus[SINUSES_CALCULATED - n]; + *cosinus= -n_sinus[n]; + } +} + + +static int fill_half_circle(Gcalc_shape_transporter *trn, double x, double y, + double ax, double ay) +{ + double n_sin, n_cos; + double x_n, y_n; + for (int n = 1; n < (SINUSES_CALCULATED * 2 - 1); n++) + { + get_n_sincos(n, &n_sin, &n_cos); + x_n= ax * n_cos - ay * n_sin; + y_n= ax * n_sin + ay * n_cos; + if (trn->add_point(x_n + x, y_n + y)) + return 1; + } + return 0; +} + + +static int fill_gap(Gcalc_shape_transporter *trn, + double x, double y, + double ax, double ay, double bx, double by, double d, + bool *empty_gap) +{ + double ab= ax * bx + ay * by; + double cosab= ab / (d * d) + GIS_ZERO; + double n_sin, n_cos; + double x_n, y_n; + int n=1; + + *empty_gap= true; + for (;;) + { + get_n_sincos(n++, &n_sin, &n_cos); + if (n_cos <= cosab) + break; + *empty_gap= false; + x_n= ax * n_cos - ay * n_sin; + y_n= ax * n_sin + ay * n_cos; + if (trn->add_point(x_n + x, y_n + y)) + return 1; + } + return 0; +} + + +/* + Calculates the vector (p2,p1) and + negatively orthogonal to it with the length of d. + The result is (ex,ey) - the vector, (px,py) - the orthogonal. +*/ + +static void calculate_perpendicular( + double x1, double y1, double x2, double y2, double d, + double *ex, double *ey, + double *px, double *py) +{ + double q; + *ex= x1 - x2; + *ey= y1 - y2; + q= d / sqrt((*ex) * (*ex) + (*ey) * (*ey)); + *px= (*ey) * q; + *py= -(*ex) * q; +} + + +int Item_func_buffer::Transporter::single_point(double x, double y) +{ + if (buffer_op == Gcalc_function::op_difference) + { + if (m_fn->reserve_op_buffer(1)) + return 1; + m_fn->add_operation(Gcalc_function::op_false, 0); + return 0; + } + + m_nshapes= 0; + return add_point_buffer(x, y); +} + + +int Item_func_buffer::Transporter::add_edge_buffer( + double x3, double y3, bool round_p1, bool round_p2) +{ + Gcalc_operation_transporter trn(m_fn, m_heap); + double e1_x, e1_y, e2_x, e2_y, p1_x, p1_y, p2_x, p2_y; + double e1e2; + double sin1, cos1; + double x_n, y_n; + bool empty_gap1, empty_gap2; + + ++m_nshapes; + if (trn.start_simple_poly()) + return 1; + + calculate_perpendicular(x1, y1, x2, y2, m_d, &e1_x, &e1_y, &p1_x, &p1_y); + calculate_perpendicular(x3, y3, x2, y2, m_d, &e2_x, &e2_y, &p2_x, &p2_y); + + e1e2= e1_x * e2_y - e2_x * e1_y; + sin1= n_sinus[1]; + cos1= n_sinus[31]; + if (e1e2 < 0) + { + empty_gap2= false; + x_n= x2 + p2_x * cos1 - p2_y * sin1; + y_n= y2 + p2_y * cos1 + p2_x * sin1; + if (fill_gap(&trn, x2, y2, -p1_x,-p1_y, p2_x,p2_y, m_d, &empty_gap1) || + trn.add_point(x2 + p2_x, y2 + p2_y) || + trn.add_point(x_n, y_n)) + return 1; + } + else + { + x_n= x2 - p2_x * cos1 - p2_y * sin1; + y_n= y2 - p2_y * cos1 + p2_x * sin1; + if (trn.add_point(x_n, y_n) || + trn.add_point(x2 - p2_x, y2 - p2_y) || + fill_gap(&trn, x2, y2, -p2_x, -p2_y, p1_x, p1_y, m_d, &empty_gap2)) + return 1; + empty_gap1= false; + } + if ((!empty_gap2 && trn.add_point(x2 + p1_x, y2 + p1_y)) || + trn.add_point(x1 + p1_x, y1 + p1_y)) + return 1; + + if (round_p1 && fill_half_circle(&trn, x1, y1, p1_x, p1_y)) + return 1; + + if (trn.add_point(x1 - p1_x, y1 - p1_y) || + (!empty_gap1 && trn.add_point(x2 - p1_x, y2 - p1_y))) + return 1; + return trn.complete_simple_poly(); +} + + +int Item_func_buffer::Transporter::add_last_edge_buffer() +{ + Gcalc_operation_transporter trn(m_fn, m_heap); + double e1_x, e1_y, p1_x, p1_y; + + ++m_nshapes; + if (trn.start_simple_poly()) + return 1; + + calculate_perpendicular(x1, y1, x2, y2, m_d, &e1_x, &e1_y, &p1_x, &p1_y); + + if (trn.add_point(x1 + p1_x, y1 + p1_y) || + trn.add_point(x1 - p1_x, y1 - p1_y) || + trn.add_point(x2 - p1_x, y2 - p1_y) || + fill_half_circle(&trn, x2, y2, -p1_x, -p1_y) || + trn.add_point(x2 + p1_x, y2 + p1_y)) + return 1; + return trn.complete_simple_poly(); +} + + +int Item_func_buffer::Transporter::add_point_buffer(double x, double y) +{ + Gcalc_operation_transporter trn(m_fn, m_heap); + + m_nshapes++; + if (trn.start_simple_poly()) + return 1; + if (trn.add_point(x - m_d, y) || + fill_half_circle(&trn, x, y, -m_d, 0.0) || + trn.add_point(x + m_d, y) || + fill_half_circle(&trn, x, y, m_d, 0.0)) + return 1; + return trn.complete_simple_poly(); +} + + +int Item_func_buffer::Transporter::start_line() +{ + if (buffer_op == Gcalc_function::op_difference) + { + if (m_fn->reserve_op_buffer(1)) + return 1; + m_fn->add_operation(Gcalc_function::op_false, 0); + skip_line= TRUE; + return 0; + } + + m_nshapes= 0; + + if (m_fn->reserve_op_buffer(2)) + return 1; + last_shape_pos= m_fn->get_next_expression_pos(); + m_fn->add_operation(buffer_op, 0); + m_npoints= 0; + int_start_line(); + return 0; +} + + +int Item_func_buffer::Transporter::start_poly() +{ + m_nshapes= 1; + + if (m_fn->reserve_op_buffer(2)) + return 1; + last_shape_pos= m_fn->get_next_expression_pos(); + m_fn->add_operation(buffer_op, 0); + return Gcalc_operation_transporter::start_poly(); +} + + +int Item_func_buffer::Transporter::complete_poly() +{ + if (Gcalc_operation_transporter::complete_poly()) + return 1; + m_fn->add_operands_to_op(last_shape_pos, m_nshapes); + return 0; +} + + +int Item_func_buffer::Transporter::start_ring() +{ + m_npoints= 0; + return Gcalc_operation_transporter::start_ring(); +} + + +int Item_func_buffer::Transporter::start_collection(int n_objects) +{ + if (m_fn->reserve_op_buffer(1)) + return 1; + m_fn->add_operation(Gcalc_function::op_union, n_objects); + return 0; +} + + +int Item_func_buffer::Transporter::add_point(double x, double y) +{ + if (skip_line) + return 0; + + if (m_npoints && x == x2 && y == y2) + return 0; + + ++m_npoints; + + if (m_npoints == 1) + { + x00= x; + y00= y; + } + else if (m_npoints == 2) + { + x01= x; + y01= y; + } + else if (add_edge_buffer(x, y, (m_npoints == 3) && line_started(), false)) + return 1; + + x1= x2; + y1= y2; + x2= x; + y2= y; + + return line_started() ? 0 : Gcalc_operation_transporter::add_point(x, y); +} + + +int Item_func_buffer::Transporter::complete() +{ + if (m_npoints) + { + if (m_npoints == 1) + { + if (add_point_buffer(x2, y2)) + return 1; + } + else if (m_npoints == 2) + { + if (add_edge_buffer(x1, y1, true, true)) + return 1; + } + else if (line_started()) + { + if (add_last_edge_buffer()) + return 1; + } + else + { + if (x2 != x00 || y2 != y00) + { + if (add_edge_buffer(x00, y00, false, false)) + return 1; + x1= x2; + y1= y2; + x2= x00; + y2= y00; + } + if (add_edge_buffer(x01, y01, false, false)) + return 1; + } + } + + return 0; +} + + +int Item_func_buffer::Transporter::complete_line() +{ + if (!skip_line) + { + if (complete()) + return 1; + int_complete_line(); + m_fn->add_operands_to_op(last_shape_pos, m_nshapes); + } + skip_line= FALSE; + return 0; +} + + +int Item_func_buffer::Transporter::complete_ring() +{ + return complete() || + Gcalc_operation_transporter::complete_ring(); +} + + +String *Item_func_buffer::val_str(String *str_value) +{ + DBUG_ENTER("Item_func_buffer::val_str"); + DBUG_ASSERT(fixed()); + String *obj= args[0]->val_str(str_value); + double dist= args[1]->val_real(); + Geometry_buffer buffer; + Geometry *g; + uint32 srid= 0; + String *str_result= NULL; + Transporter trn(&func, &collector, dist); + MBR mbr; + const char *c_end; + + null_value= 1; + if (args[0]->null_value || args[1]->null_value || + !(g= Geometry::construct(&buffer, obj->ptr(), obj->length())) || + g->get_mbr(&mbr, &c_end)) + goto mem_error; + + if (dist > 0.0) + mbr.buffer(dist); + else + { + /* This happens when dist is too far negative. */ + if (mbr.xmax + dist < mbr.xmin || mbr.ymax + dist < mbr.ymin) + goto return_empty_result; + } + + collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax); + /* + If the distance given is 0, the Buffer function is in fact NOOP, + so it's natural just to return the argument1. + Besides, internal calculations here can't handle zero distance anyway. + */ + if (fabs(dist) < GIS_ZERO) + { + null_value= 0; + str_result= obj; + goto mem_error; + } + + if (g->store_shapes(&trn)) + goto mem_error; + + collector.prepare_operation(); + if (func.alloc_states()) + goto mem_error; + operation.init(&func); + operation.killed= (int *) &(current_thd->killed); + + if (operation.count_all(&collector) || + operation.get_result(&res_receiver)) + goto mem_error; + + +return_empty_result: + str_value->set_charset(&my_charset_bin); + str_value->length(0); + if (str_value->reserve(SRID_SIZE, 512)) + goto mem_error; + str_value->q_append(srid); + + if (!Geometry::create_from_opresult(&buffer, str_value, res_receiver)) + goto mem_error; + + null_value= 0; + str_result= str_value; +mem_error: + collector.reset(); + func.reset(); + res_receiver.reset(); + DBUG_RETURN(str_result); +} + + +longlong Item_func_isempty::val_int() +{ + DBUG_ASSERT(fixed()); + String tmp; + String *swkb= args[0]->val_str(&tmp); + Geometry_buffer buffer; + + null_value= args[0]->null_value || + !(Geometry::construct(&buffer, swkb->ptr(), swkb->length())); + return null_value ? 1 : 0; +} + + +longlong Item_func_issimple::val_int() +{ + String *swkb= args[0]->val_str(&tmp); + Geometry_buffer buffer; + Gcalc_operation_transporter trn(&func, &collector); + Geometry *g; + int result= 1; + MBR mbr; + const char *c_end; + + DBUG_ENTER("Item_func_issimple::val_int"); + DBUG_ASSERT(fixed()); + + null_value= 0; + if ((args[0]->null_value || + !(g= Geometry::construct(&buffer, swkb->ptr(), swkb->length())) || + g->get_mbr(&mbr, &c_end))) + { + /* We got NULL as an argument. Have to return -1 */ + DBUG_RETURN(-1); + } + + collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax); + + if (g->get_class_info()->m_type_id == Geometry::wkb_point) + DBUG_RETURN(1); + + if (g->store_shapes(&trn)) + goto mem_error; + + collector.prepare_operation(); + scan_it.init(&collector); + + while (scan_it.more_points()) + { + const Gcalc_scan_iterator::event_point *ev, *next_ev; + + if (scan_it.step()) + goto mem_error; + + ev= scan_it.get_events(); + if (ev->simple_event()) + continue; + + next_ev= ev->get_next(); + if ((ev->event & (scev_thread | scev_single_point)) && !next_ev) + continue; + + if ((ev->event == scev_two_threads) && !next_ev->get_next()) + continue; + + /* If the first and last points of a curve coincide - that is */ + /* an exception to the rule and the line is considered as simple. */ + if ((next_ev && !next_ev->get_next()) && + (ev->event & (scev_thread | scev_end)) && + (next_ev->event & (scev_thread | scev_end))) + continue; + + result= 0; + break; + } + + collector.reset(); + func.reset(); + scan_it.reset(); + DBUG_RETURN(result); +mem_error: + null_value= 1; + DBUG_RETURN(0); +} + + +longlong Item_func_isclosed::val_int() +{ + DBUG_ASSERT(fixed()); + String tmp; + String *swkb= args[0]->val_str(&tmp); + Geometry_buffer buffer; + Geometry *geom; + int isclosed= 0; // In case of error + + null_value= 0; + if (!swkb || + args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())) || + geom->is_closed(&isclosed)) + { + /* IsClosed(NULL) should return -1 */ + return -1; + } + + return (longlong) isclosed; +} + + +longlong Item_func_isring::val_int() +{ + /* It's actually a combination of two functions - IsClosed and IsSimple */ + DBUG_ASSERT(fixed()); + String tmp; + String *swkb= args[0]->val_str(&tmp); + Geometry_buffer buffer; + Geometry *geom; + int isclosed= 0; // In case of error + + null_value= 0; + if (!swkb || + args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())) || + geom->is_closed(&isclosed)) + { + /* IsRing(NULL) should return -1 */ + return -1; + } + + if (!isclosed) + return 0; + + return Item_func_issimple::val_int(); +} + + +/* + Numerical functions +*/ + + +longlong Item_func_dimension::val_int() +{ + DBUG_ASSERT(fixed()); + uint32 dim= 0; // In case of error + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + const char *dummy; + + null_value= (!swkb || + args[0]->null_value || + !(geom= Geometry::construct(&buffer, swkb->ptr(), swkb->length())) || + geom->dimension(&dim, &dummy)); + return (longlong) dim; +} + + +longlong Item_func_numinteriorring::val_int() +{ + DBUG_ASSERT(fixed()); + uint32 num= 0; // In case of error + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + + null_value= (!swkb || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), swkb->length())) || + geom->num_interior_ring(&num)); + return (longlong) num; +} + + +longlong Item_func_numgeometries::val_int() +{ + DBUG_ASSERT(fixed()); + uint32 num= 0; // In case of errors + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + + null_value= (!swkb || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), swkb->length())) || + geom->num_geometries(&num)); + return (longlong) num; +} + + +longlong Item_func_numpoints::val_int() +{ + DBUG_ASSERT(fixed()); + uint32 num= 0; // In case of errors + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + + null_value= (!swkb || + args[0]->null_value || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), swkb->length())) || + geom->num_points(&num)); + return (longlong) num; +} + + +double Item_func_x::val_real() +{ + DBUG_ASSERT(fixed()); + double res= 0.0; // In case of errors + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + + null_value= (!swkb || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), swkb->length())) || + geom->get_x(&res)); + return res; +} + + +double Item_func_y::val_real() +{ + DBUG_ASSERT(fixed()); + double res= 0; // In case of errors + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + + null_value= (!swkb || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), swkb->length())) || + geom->get_y(&res)); + return res; +} + + +double Item_func_area::val_real() +{ + DBUG_ASSERT(fixed()); + double res= 0; // In case of errors + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + const char *dummy; + + null_value= (!swkb || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), swkb->length())) || + geom->area(&res, &dummy)); + return res; +} + +double Item_func_glength::val_real() +{ + DBUG_ASSERT(fixed()); + double res= 0; // In case of errors + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + Geometry *geom; + const char *end; + + null_value= (!swkb || + !(geom= Geometry::construct(&buffer, + swkb->ptr(), + swkb->length())) || + geom->geom_length(&res, &end)); + return res; +} + +longlong Item_func_srid::val_int() +{ + DBUG_ASSERT(fixed()); + String *swkb= args[0]->val_str(&value); + Geometry_buffer buffer; + + null_value= (!swkb || + !Geometry::construct(&buffer, + swkb->ptr(), swkb->length())); + if (null_value) + return 0; + + return (longlong) (uint4korr(swkb->ptr())); +} + + +double Item_func_distance::val_real() +{ + bool cur_point_edge; + const Gcalc_scan_iterator::point *evpos; + const Gcalc_heap::Info *cur_point, *dist_point; + const Gcalc_scan_iterator::event_point *ev; + double t, distance, cur_distance; + double x1, x2, y1, y2; + double ex, ey, vx, vy, e_sqrlen; + uint obj2_si; + Gcalc_operation_transporter trn(&func, &collector); + + DBUG_ENTER("Item_func_distance::val_real"); + DBUG_ASSERT(fixed()); + String *res1= args[0]->val_str(&tmp_value1); + String *res2= args[1]->val_str(&tmp_value2); + Geometry_buffer buffer1, buffer2; + Geometry *g1, *g2; + MBR mbr1, mbr2; + const char *c_end; + + if (args[0]->null_value || args[1]->null_value) + goto mem_error; + g1= Geometry::construct(&buffer1, res1->ptr(), res1->length()); + if (!g1) + goto mem_error; + g2= Geometry::construct(&buffer2, res2->ptr(), res2->length()); + if (!g2) + goto mem_error; + if (g1->get_mbr(&mbr1, &c_end) || g2->get_mbr(&mbr2, &c_end)) + goto mem_error; + + mbr1.add_mbr(&mbr2); + collector.set_extent(mbr1.xmin, mbr1.xmax, mbr1.ymin, mbr1.ymax); + + if ((g1->get_class_info()->m_type_id == Geometry::wkb_point) && + (g2->get_class_info()->m_type_id == Geometry::wkb_point)) + { + if (((Gis_point *) g1)->get_xy(&x1, &y1) || + ((Gis_point *) g2)->get_xy(&x2, &y2)) + goto mem_error; + ex= x2 - x1; + ey= y2 - y1; + DBUG_RETURN(sqrt(ex * ex + ey * ey)); + } + + if (func.reserve_op_buffer(1)) + goto mem_error; + func.add_operation(Gcalc_function::op_intersection, 2); + + if (g1->store_shapes(&trn)) + goto mem_error; + obj2_si= func.get_nshapes(); + if (g2->store_shapes(&trn) || func.alloc_states()) + goto mem_error; + + if (obj2_si == 0 || func.get_nshapes() == obj2_si) + { + distance= 0.0; + null_value= 1; + goto exit; + } + + + collector.prepare_operation(); + scan_it.init(&collector); + + distance= DBL_MAX; + while (scan_it.more_points()) + { + if (scan_it.step()) + goto mem_error; + evpos= scan_it.get_event_position(); + ev= scan_it.get_events(); + + if (ev->simple_event()) + { + cur_point= ev->pi; + goto count_distance; + } + /* + handling intersection we only need to check if it's the intersecion + of objects 1 and 2. In this case distance is 0 + */ + cur_point= NULL; + + /* + having these events we need to check for possible intersection + of objects + scev_thread | scev_two_threads | scev_single_point + */ + func.clear_i_states(); + for (Gcalc_point_iterator pit(&scan_it); pit.point() != evpos; ++pit) + { + gcalc_shape_info si= pit.point()->get_shape(); + if ((func.get_shape_kind(si) == Gcalc_function::shape_polygon)) + func.invert_i_state(si); + } + + func.clear_b_states(); + for (; ev; ev= ev->get_next()) + { + if (ev->event != scev_intersection) + cur_point= ev->pi; + func.set_b_state(ev->get_shape()); + if (func.count()) + { + /* Point of one object is inside the other - intersection found */ + distance= 0; + goto exit; + } + } + + if (!cur_point) + continue; + +count_distance: + if (cur_point->node.shape.shape >= obj2_si) + continue; + cur_point_edge= !cur_point->is_bottom(); + + for (dist_point= collector.get_first(); dist_point; dist_point= dist_point->get_next()) + { + /* We only check vertices of object 2 */ + if (dist_point->type != Gcalc_heap::nt_shape_node || + dist_point->node.shape.shape < obj2_si) + continue; + + /* if we have an edge to check */ + if (dist_point->node.shape.left) + { + t= count_edge_t(dist_point, dist_point->node.shape.left, cur_point, + ex, ey, vx, vy, e_sqrlen); + if ((t>0.0) && (t<1.0)) + { + cur_distance= distance_to_line(ex, ey, vx, vy, e_sqrlen); + if (distance > cur_distance) + distance= cur_distance; + } + } + if (cur_point_edge) + { + t= count_edge_t(cur_point, cur_point->node.shape.left, dist_point, + ex, ey, vx, vy, e_sqrlen); + if ((t>0.0) && (t<1.0)) + { + cur_distance= distance_to_line(ex, ey, vx, vy, e_sqrlen); + if (distance > cur_distance) + distance= cur_distance; + } + } + cur_distance= distance_points(cur_point, dist_point); + if (distance > cur_distance) + distance= cur_distance; + } + } +exit: + collector.reset(); + func.reset(); + scan_it.reset(); + DBUG_RETURN(distance); +mem_error: + null_value= 1; + DBUG_RETURN(0); +} + + +double Item_func_sphere_distance::val_real() +{ + /* To test null_value of item, first get well-known bytes as a backups */ + String bak1, bak2; + String *arg1= args[0]->val_str(&bak1); + String *arg2= args[1]->val_str(&bak2); + double distance= 0.0; + double sphere_radius= 6370986.0; // Default radius equals Earth radius + + null_value= (args[0]->null_value || args[1]->null_value); + if (null_value) + { + return 0; + } + + if (arg_count == 3) + { + sphere_radius= args[2]->val_real(); + // Radius cannot be Null + if (args[2]->null_value) + { + null_value= true; + return 0; + } + if (sphere_radius <= 0) + { + my_error(ER_INTERNAL_ERROR, MYF(0), "Radius must be greater than zero."); + return 1; + } + } + Geometry_buffer buffer1, buffer2; + Geometry *g1, *g2; + if (!(g1= Geometry::construct(&buffer1, arg1->ptr(), arg1->length())) || + !(g2= Geometry::construct(&buffer2, arg2->ptr(), arg2->length()))) + { + my_error(ER_GIS_INVALID_DATA, MYF(0), "ST_Distance_Sphere"); + goto handle_errors; + } +// Method allowed for points and multipoints + if (!(g1->get_class_info()->m_type_id == Geometry::wkb_point || + g1->get_class_info()->m_type_id == Geometry::wkb_multipoint) || + !(g2->get_class_info()->m_type_id == Geometry::wkb_point || + g2->get_class_info()->m_type_id == Geometry::wkb_multipoint)) + { + // Generate error message in case different geometry is used? + my_error(ER_INTERNAL_ERROR, MYF(0), func_name()); + return 0; + } + distance= spherical_distance_points(g1, g2, sphere_radius); + if (distance < 0) + { + my_error(ER_INTERNAL_ERROR, MYF(0), "Returned distance cannot be negative."); + return 1; + } + return distance; + + handle_errors: + return 0; +} + + +double Item_func_sphere_distance::spherical_distance_points(Geometry *g1, + Geometry *g2, + const double r) +{ + double res= 0.0; + // Length for the single point (25 Bytes) + uint32 len= SRID_SIZE + POINT_DATA_SIZE + WKB_HEADER_SIZE; + int err_hv= 0, err_sph= 0; + + switch (g2->get_class_info()->m_type_id) + { + case Geometry::wkb_point: + { + Gis_point *g2p= static_cast(g2); + // Optimization for point-point case + if (g1->get_class_info()->m_type_id == Geometry::wkb_point) + { + res= g2p->calculate_haversine(g1, r, &err_hv); + } + else + { + // Optimization for single point in Multipoint + if (g1->get_data_size() == len) + { + res= g2p->calculate_haversine(g1, r, &err_hv); + } + else + { + // There are multipoints in g1 + // g1 is MultiPoint and calculate MP.sphericaldistance from g2 Point + if (g1->get_data_size() != GET_SIZE_ERROR) + err_sph= g2p->spherical_distance_multipoints(g1, r, &res, &err_hv); + } + } + break; + } + + case Geometry::wkb_multipoint: + // Optimization for point-point case + if (g1->get_class_info()->m_type_id == Geometry::wkb_point) + { + Gis_point *g1p= static_cast(g1); + // Optimization for single point in Multipoint g2 + if (g2->get_data_size() == len) + { + res= g1p->calculate_haversine(g2, r, &err_hv); + } + else + { + if (g2->get_data_size() != GET_SIZE_ERROR) + // g1 is a point (casted to multi_point) and g2 multipoint + err_sph= g1p->spherical_distance_multipoints(g2, r, &res, &err_hv); + } + } + else + { + Gis_multi_point *g1mp= static_cast(g1); + // Multipoints in g1 and g2 - no optimization + err_sph= g1mp->spherical_distance_multipoints(g2, r, &res, &err_hv); + } + break; + + default: + DBUG_ASSERT(0); + break; + } + + if (err_hv == 1) + my_error(ER_STD_OUT_OF_RANGE_ERROR, MYF(0), + "Longitude should be [-180,180]", "ST_Distance_Sphere"); + else if(err_hv < 0) + my_error(ER_STD_OUT_OF_RANGE_ERROR, MYF(0), + "Latitude should be [-90,90]", "ST_Distance_Sphere"); + else if (err_sph || err_hv == 2) + my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0)); + return res; +} + + +String *Item_func_pointonsurface::val_str(String *str) +{ + Gcalc_operation_transporter trn(&func, &collector); + String *res= args[0]->val_str(&tmp_value); + Geometry_buffer buffer; + Geometry *g; + MBR mbr; + const char *c_end; + double UNINIT_VAR(px), UNINIT_VAR(py), x0, UNINIT_VAR(y0); + String *result= 0; + const Gcalc_scan_iterator::point *pprev= NULL; + uint32 srid; + DBUG_ENTER("Item_func_pointonsurface::val_str"); + DBUG_ASSERT(fixed()); + + null_value= 1; + if ((args[0]->null_value || + !(g= Geometry::construct(&buffer, res->ptr(), res->length())) || + g->get_mbr(&mbr, &c_end))) + goto mem_error; + + collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax); + + if (g->store_shapes(&trn)) + goto mem_error; + + collector.prepare_operation(); + scan_it.init(&collector); + + while (scan_it.more_points()) + { + if (scan_it.step()) + goto mem_error; + + if (scan_it.get_h() > GIS_ZERO) + { + y0= scan_it.get_y(); + break; + } + } + + if (!scan_it.more_points()) + { + goto exit; + } + + if (scan_it.step()) + goto mem_error; + + for (Gcalc_point_iterator pit(&scan_it); pit.point(); ++pit) + { + if (pprev == NULL) + { + pprev= pit.point(); + continue; + } + x0= scan_it.get_sp_x(pprev); + px= scan_it.get_sp_x(pit.point()); + if (px - x0 > GIS_ZERO) + { + if (scan_it.get_h() > GIS_ZERO) + { + px= (px + x0) / 2.0; + py= scan_it.get_y(); + } + else + { + px= (px + x0) / 2.0; + py= (y0 + scan_it.get_y()) / 2.0; + } + null_value= 0; + break; + } + pprev= NULL; + } + + if (null_value) + goto exit; + + str->set_charset(&my_charset_bin); + str->length(0); + if (str->reserve(SRID_SIZE, 512)) + goto mem_error; + + srid= uint4korr(res->ptr()); + str->q_append(srid); + + if (Geometry::create_point(str, px, py)) + goto mem_error; + + result= str; + +exit: + collector.reset(); + func.reset(); + scan_it.reset(); + DBUG_RETURN(result); + +mem_error: + collector.reset(); + func.reset(); + scan_it.reset(); + null_value= 1; + DBUG_RETURN(0); +} + + +#ifndef DBUG_OFF +longlong Item_func_gis_debug::val_int() +{ + /* For now this is just a stub. TODO: implement the internal GIS debuggign */ + return 0; +} +#endif + + +/**********************************************************************/ + + +class Create_func_area : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_area(thd, arg1); + } + + static Create_func_area s_singleton; + +protected: + Create_func_area() = default; + virtual ~Create_func_area() = default; +}; + + +class Create_func_as_wkb : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_as_wkb(thd, arg1); + } + + static Create_func_as_wkb s_singleton; + +protected: + Create_func_as_wkb() = default; + virtual ~Create_func_as_wkb() = default; +}; + + +class Create_func_as_wkt : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_as_wkt(thd, arg1); + } + + static Create_func_as_wkt s_singleton; + +protected: + Create_func_as_wkt() = default; + virtual ~Create_func_as_wkt() = default; +}; + + + +class Create_func_centroid : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_centroid(thd, arg1); + } + + static Create_func_centroid s_singleton; + +protected: + Create_func_centroid() = default; + virtual ~Create_func_centroid() = default; +}; + + +class Create_func_convexhull : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_convexhull(thd, arg1); + } + + static Create_func_convexhull s_singleton; + +protected: + Create_func_convexhull() = default; + virtual ~Create_func_convexhull() = default; +}; + + +class Create_func_pointonsurface : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_pointonsurface(thd, arg1); + } + + static Create_func_pointonsurface s_singleton; + +protected: + Create_func_pointonsurface() = default; + virtual ~Create_func_pointonsurface() = default; +}; + + +class Create_func_mbr_contains : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_mbr_rel(thd, arg1, arg2, + Item_func::SP_CONTAINS_FUNC); + } + + static Create_func_mbr_contains s_singleton; + +protected: + Create_func_mbr_contains() = default; + virtual ~Create_func_mbr_contains() = default; +}; + + +class Create_func_contains : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_CONTAINS_FUNC); + } + static Create_func_contains s_singleton; + +protected: + Create_func_contains() = default; + virtual ~Create_func_contains() = default; +}; + + +class Create_func_crosses : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_CROSSES_FUNC); + } + static Create_func_crosses s_singleton; + +protected: + Create_func_crosses() = default; + virtual ~Create_func_crosses() = default; +}; + + +class Create_func_dimension : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_dimension(thd, arg1); + } + + static Create_func_dimension s_singleton; + +protected: + Create_func_dimension() = default; + virtual ~Create_func_dimension() = default; +}; + + +class Create_func_mbr_disjoint : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_mbr_rel(thd, arg1, arg2, + Item_func::SP_DISJOINT_FUNC); + } + + static Create_func_mbr_disjoint s_singleton; + +protected: + Create_func_mbr_disjoint() = default; + virtual ~Create_func_mbr_disjoint() = default; +}; + + +class Create_func_disjoint : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_DISJOINT_FUNC); + } + static Create_func_disjoint s_singleton; + +protected: + Create_func_disjoint() = default; + virtual ~Create_func_disjoint() = default; +}; + + +class Create_func_distance : public Create_func_arg2 +{ +public: + Item* create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_distance(thd, arg1, arg2); + } + + static Create_func_distance s_singleton; + +protected: + Create_func_distance() = default; + virtual ~Create_func_distance() = default; +}; + + +class Create_func_distance_sphere: public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + static Create_func_distance_sphere s_singleton; + +protected: + Create_func_distance_sphere() = default; + virtual ~Create_func_distance_sphere() = default; +}; + + +Item* +Create_func_distance_sphere::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (arg_count < 2) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + return new (thd->mem_root) Item_func_sphere_distance(thd, *item_list); +} + + +class Create_func_endpoint : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_spatial_decomp(thd, arg1, + Item_func::SP_ENDPOINT); + } + + static Create_func_endpoint s_singleton; + +protected: + Create_func_endpoint() = default; + virtual ~Create_func_endpoint() = default; +}; + + +class Create_func_envelope : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_envelope(thd, arg1); + } + + static Create_func_envelope s_singleton; + +protected: + Create_func_envelope() = default; + virtual ~Create_func_envelope() = default; +}; + +class Create_func_boundary : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_boundary(thd, arg1); + } + + static Create_func_boundary s_singleton; + +protected: + Create_func_boundary() = default; + virtual ~Create_func_boundary() = default; +}; + + +class Create_func_mbr_equals : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_mbr_rel(thd, arg1, arg2, + Item_func::SP_EQUALS_FUNC); + } + + static Create_func_mbr_equals s_singleton; + +protected: + Create_func_mbr_equals() = default; + virtual ~Create_func_mbr_equals() = default; +}; + + +class Create_func_equals : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_EQUALS_FUNC); + } + + static Create_func_equals s_singleton; + +protected: + Create_func_equals() = default; + virtual ~Create_func_equals() = default; +}; + + +class Create_func_exteriorring : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_spatial_decomp(thd, arg1, + Item_func::SP_EXTERIORRING); + } + + static Create_func_exteriorring s_singleton; + +protected: + Create_func_exteriorring() = default; + virtual ~Create_func_exteriorring() = default; +}; + + + +class Create_func_geometry_from_text : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + static Create_func_geometry_from_text s_singleton; + +protected: + Create_func_geometry_from_text() = default; + virtual ~Create_func_geometry_from_text() = default; +}; + + +Item* +Create_func_geometry_from_text::create_native(THD *thd, + const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_text(thd, param_1); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_text(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +class Create_func_geometry_from_wkb : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + static Create_func_geometry_from_wkb s_singleton; + +protected: + Create_func_geometry_from_wkb() = default; + virtual ~Create_func_geometry_from_wkb() = default; +}; + + +Item* +Create_func_geometry_from_wkb::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_wkb(thd, param_1); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + case 2: + { + Item *param_1= item_list->pop(); + Item *param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_wkb(thd, param_1, param_2); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +class Create_func_geometry_from_json : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + static Create_func_geometry_from_json s_singleton; + +protected: + Create_func_geometry_from_json() = default; + virtual ~Create_func_geometry_from_json() = default; +}; + + +Item* +Create_func_geometry_from_json::create_native(THD *thd, + const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *json= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_json(thd, json); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + case 2: + { + Item *json= item_list->pop(); + Item *options= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_json(thd, json, options); + break; + } + case 3: + { + Item *json= item_list->pop(); + Item *options= item_list->pop(); + Item *srid= item_list->pop(); + func= new (thd->mem_root) Item_func_geometry_from_json(thd, json, options, + srid); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +class Create_func_as_geojson : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, List *item_list) + override; + + static Create_func_as_geojson s_singleton; + +protected: + Create_func_as_geojson() = default; + virtual ~Create_func_as_geojson() = default; +}; + + +Item* +Create_func_as_geojson::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) { + case 1: + { + Item *geom= item_list->pop(); + func= new (thd->mem_root) Item_func_as_geojson(thd, geom); + thd->lex->uncacheable(UNCACHEABLE_RAND); + break; + } + case 2: + { + Item *geom= item_list->pop(); + Item *max_dec= item_list->pop(); + func= new (thd->mem_root) Item_func_as_geojson(thd, geom, max_dec); + break; + } + case 3: + { + Item *geom= item_list->pop(); + Item *max_dec= item_list->pop(); + Item *options= item_list->pop(); + func= new (thd->mem_root) Item_func_as_geojson(thd, geom, max_dec, options); + break; + } + default: + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + } + + return func; +} + + +class Create_func_geometry_type : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_geometry_type(thd, arg1); + } + + static Create_func_geometry_type s_singleton; + +protected: + Create_func_geometry_type() = default; + virtual ~Create_func_geometry_type() = default; +}; + + +class Create_func_geometryn : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_decomp_n(thd, arg1, arg2, + Item_func::SP_GEOMETRYN); + } + + static Create_func_geometryn s_singleton; + +protected: + Create_func_geometryn() = default; + virtual ~Create_func_geometryn() = default; +}; + + +#if !defined(DBUG_OFF) +class Create_func_gis_debug : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_gis_debug(thd, arg1); + } + + static Create_func_gis_debug s_singleton; + +protected: + Create_func_gis_debug() = default; + virtual ~Create_func_gis_debug() = default; +}; +#endif + + +class Create_func_glength : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_glength(thd, arg1); + } + + static Create_func_glength s_singleton; + +protected: + Create_func_glength() = default; + virtual ~Create_func_glength() = default; +}; + + +class Create_func_interiorringn : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_decomp_n(thd, arg1, arg2, + Item_func::SP_INTERIORRINGN); + } + + static Create_func_interiorringn s_singleton; + +protected: + Create_func_interiorringn() = default; + virtual ~Create_func_interiorringn() = default; +}; + + +class Create_func_relate : public Create_func_arg3 +{ +public: + Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) override + { + return new (thd->mem_root) Item_func_spatial_relate(thd, arg1, arg2, arg3); + } + + static Create_func_relate s_singleton; + +protected: + Create_func_relate() = default; + virtual ~Create_func_relate() = default; +}; + + +class Create_func_mbr_intersects : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_mbr_rel(thd, arg1, arg2, + Item_func::SP_INTERSECTS_FUNC); + } + + static Create_func_mbr_intersects s_singleton; + +protected: + Create_func_mbr_intersects() = default; + virtual ~Create_func_mbr_intersects() = default; +}; + + +class Create_func_intersects : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_INTERSECTS_FUNC); + } + + static Create_func_intersects s_singleton; + +protected: + Create_func_intersects() = default; + virtual ~Create_func_intersects() = default; +}; + + +class Create_func_intersection : public Create_func_arg2 +{ +public: + Item* create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_operation(thd, arg1, arg2, + Gcalc_function::op_intersection); + } + + static Create_func_intersection s_singleton; + +protected: + Create_func_intersection() = default; + virtual ~Create_func_intersection() = default; +}; + + +class Create_func_difference : public Create_func_arg2 +{ +public: + Item* create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_operation(thd, arg1, arg2, + Gcalc_function::op_difference); + } + + static Create_func_difference s_singleton; + +protected: + Create_func_difference() = default; + virtual ~Create_func_difference() = default; +}; + + +class Create_func_union : public Create_func_arg2 +{ +public: + Item* create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_operation(thd, arg1, arg2, + Gcalc_function::op_union); + } + + static Create_func_union s_singleton; + +protected: + Create_func_union() = default; + virtual ~Create_func_union() = default; +}; + + +class Create_func_symdifference : public Create_func_arg2 +{ +public: + Item* create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_operation(thd, arg1, arg2, + Gcalc_function::op_symdifference); + } + + static Create_func_symdifference s_singleton; + +protected: + Create_func_symdifference() = default; + virtual ~Create_func_symdifference() = default; +}; + + +class Create_func_buffer : public Create_func_arg2 +{ +public: + Item* create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_buffer(thd, arg1, arg2); + } + + static Create_func_buffer s_singleton; + +protected: + Create_func_buffer() = default; + virtual ~Create_func_buffer() = default; +}; + + +class Create_func_isclosed : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_isclosed(thd, arg1); + } + + static Create_func_isclosed s_singleton; + +protected: + Create_func_isclosed() = default; + virtual ~Create_func_isclosed() = default; +}; + + +class Create_func_isring : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_isring(thd, arg1); + } + + static Create_func_isring s_singleton; + +protected: + Create_func_isring() = default; + virtual ~Create_func_isring() = default; +}; + + +class Create_func_isempty : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_isempty(thd, arg1); + } + + static Create_func_isempty s_singleton; + +protected: + Create_func_isempty() = default; + virtual ~Create_func_isempty() = default; +}; + + +class Create_func_issimple : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_issimple(thd, arg1); + } + + static Create_func_issimple s_singleton; + +protected: + Create_func_issimple() = default; + virtual ~Create_func_issimple() = default; +}; + + + +class Create_func_numgeometries : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_numgeometries(thd, arg1); + } + + static Create_func_numgeometries s_singleton; + +protected: + Create_func_numgeometries() = default; + virtual ~Create_func_numgeometries() = default; +}; + + +class Create_func_numinteriorring : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_numinteriorring(thd, arg1); + } + + static Create_func_numinteriorring s_singleton; + +protected: + Create_func_numinteriorring() = default; + virtual ~Create_func_numinteriorring() = default; +}; + + +class Create_func_numpoints : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_numpoints(thd, arg1); + } + + static Create_func_numpoints s_singleton; + +protected: + Create_func_numpoints() = default; + virtual ~Create_func_numpoints() = default; +}; + + +class Create_func_mbr_overlaps : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_mbr_rel(thd, arg1, arg2, + Item_func::SP_OVERLAPS_FUNC); + } + + static Create_func_mbr_overlaps s_singleton; + +protected: + Create_func_mbr_overlaps() = default; + virtual ~Create_func_mbr_overlaps() = default; +}; + + +class Create_func_overlaps : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_OVERLAPS_FUNC); + } + + static Create_func_overlaps s_singleton; + +protected: + Create_func_overlaps() = default; + virtual ~Create_func_overlaps() = default; +}; + + + + + +class Create_func_pointn : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_decomp_n(thd, arg1, arg2, + Item_func::SP_POINTN); + } + static Create_func_pointn s_singleton; + +protected: + Create_func_pointn() = default; + virtual ~Create_func_pointn() = default; +}; + + + + +class Create_func_srid : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_srid(thd, arg1); + } + + static Create_func_srid s_singleton; + +protected: + Create_func_srid() = default; + virtual ~Create_func_srid() = default; +}; + + +class Create_func_startpoint : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_spatial_decomp(thd, arg1, + Item_func::SP_STARTPOINT); + } + + static Create_func_startpoint s_singleton; + +protected: + Create_func_startpoint() = default; + virtual ~Create_func_startpoint() = default; +}; + + + +class Create_func_touches : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_TOUCHES_FUNC); + } + + static Create_func_touches s_singleton; + +protected: + Create_func_touches() = default; + virtual ~Create_func_touches() = default; +}; + + +class Create_func_mbr_within : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_mbr_rel(thd, arg1, arg2, + Item_func::SP_WITHIN_FUNC); + } + + static Create_func_mbr_within s_singleton; + +protected: + Create_func_mbr_within() = default; + virtual ~Create_func_mbr_within() = default; +}; + + +class Create_func_within : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { + return new (thd->mem_root) Item_func_spatial_precise_rel(thd, arg1, arg2, + Item_func::SP_WITHIN_FUNC); + } + + static Create_func_within s_singleton; + +protected: + Create_func_within() = default; + virtual ~Create_func_within() = default; +}; + + +class Create_func_x : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_x(thd, arg1); + } + + static Create_func_x s_singleton; + +protected: + Create_func_x() = default; + virtual ~Create_func_x() = default; +}; + + +class Create_func_y : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override + { + return new (thd->mem_root) Item_func_y(thd, arg1); + } + + static Create_func_y s_singleton; + +protected: + Create_func_y() = default; + virtual ~Create_func_y() = default; +}; + + +/*****************************************************************/ + + + + + + + +/*************************************************************************/ + +#if !defined(DBUG_OFF) +Create_func_gis_debug Create_func_gis_debug::s_singleton; +#endif + +Create_func_area Create_func_area::s_singleton; +Create_func_as_geojson Create_func_as_geojson::s_singleton; +Create_func_as_wkb Create_func_as_wkb::s_singleton; +Create_func_as_wkt Create_func_as_wkt::s_singleton; +Create_func_boundary Create_func_boundary::s_singleton; +Create_func_buffer Create_func_buffer::s_singleton; +Create_func_centroid Create_func_centroid::s_singleton; +Create_func_contains Create_func_contains::s_singleton; +Create_func_convexhull Create_func_convexhull::s_singleton; +Create_func_crosses Create_func_crosses::s_singleton; +Create_func_difference Create_func_difference::s_singleton; +Create_func_dimension Create_func_dimension::s_singleton; +Create_func_disjoint Create_func_disjoint::s_singleton; +Create_func_distance Create_func_distance::s_singleton; +Create_func_distance_sphere Create_func_distance_sphere::s_singleton; +Create_func_endpoint Create_func_endpoint::s_singleton; +Create_func_envelope Create_func_envelope::s_singleton; +Create_func_equals Create_func_equals::s_singleton; +Create_func_exteriorring Create_func_exteriorring::s_singleton; +Create_func_geometry_from_json Create_func_geometry_from_json::s_singleton; +Create_func_geometry_from_text Create_func_geometry_from_text::s_singleton; +Create_func_geometry_from_wkb Create_func_geometry_from_wkb::s_singleton; +Create_func_geometryn Create_func_geometryn::s_singleton; +Create_func_geometry_type Create_func_geometry_type::s_singleton; +Create_func_glength Create_func_glength::s_singleton; +Create_func_interiorringn Create_func_interiorringn::s_singleton; +Create_func_intersection Create_func_intersection::s_singleton; +Create_func_intersects Create_func_intersects::s_singleton; +Create_func_isclosed Create_func_isclosed::s_singleton; +Create_func_isempty Create_func_isempty::s_singleton; +Create_func_isring Create_func_isring::s_singleton; +Create_func_issimple Create_func_issimple::s_singleton; +Create_func_mbr_contains Create_func_mbr_contains::s_singleton; +Create_func_mbr_disjoint Create_func_mbr_disjoint::s_singleton; +Create_func_mbr_equals Create_func_mbr_equals::s_singleton; +Create_func_mbr_intersects Create_func_mbr_intersects::s_singleton; +Create_func_mbr_overlaps Create_func_mbr_overlaps::s_singleton; +Create_func_mbr_within Create_func_mbr_within::s_singleton; +Create_func_numgeometries Create_func_numgeometries::s_singleton; +Create_func_numinteriorring Create_func_numinteriorring::s_singleton; +Create_func_numpoints Create_func_numpoints::s_singleton; +Create_func_overlaps Create_func_overlaps::s_singleton; +Create_func_pointn Create_func_pointn::s_singleton; +Create_func_pointonsurface Create_func_pointonsurface::s_singleton; +Create_func_relate Create_func_relate::s_singleton; +Create_func_srid Create_func_srid::s_singleton; +Create_func_startpoint Create_func_startpoint::s_singleton; +Create_func_symdifference Create_func_symdifference::s_singleton; +Create_func_touches Create_func_touches::s_singleton; +Create_func_union Create_func_union::s_singleton; +Create_func_within Create_func_within::s_singleton; +Create_func_x Create_func_x::s_singleton; +Create_func_y Create_func_y::s_singleton; + +/*************************************************************************/ + + +#define GEOM_BUILDER(F) & F::s_singleton + + +static Native_func_registry func_array_geom[] = +{ +#ifndef DBUG_OFF + { { STRING_WITH_LEN("ST_GIS_DEBUG") }, GEOM_BUILDER(Create_func_gis_debug)}, +#endif + { { STRING_WITH_LEN("AREA") }, GEOM_BUILDER(Create_func_area)}, + { { STRING_WITH_LEN("ASBINARY") }, GEOM_BUILDER(Create_func_as_wkb)}, + { { STRING_WITH_LEN("ASTEXT") }, GEOM_BUILDER(Create_func_as_wkt)}, + { { STRING_WITH_LEN("ASWKB") }, GEOM_BUILDER(Create_func_as_wkb)}, + { { STRING_WITH_LEN("ASWKT") }, GEOM_BUILDER(Create_func_as_wkt)}, + { { STRING_WITH_LEN("BOUNDARY") }, GEOM_BUILDER(Create_func_boundary)}, + { { STRING_WITH_LEN("BUFFER") }, GEOM_BUILDER(Create_func_buffer)}, + { { STRING_WITH_LEN("CENTROID") }, GEOM_BUILDER(Create_func_centroid)}, + { { STRING_WITH_LEN("CONTAINS") }, GEOM_BUILDER(Create_func_contains)}, + { { STRING_WITH_LEN("CONVEXHULL") }, GEOM_BUILDER(Create_func_convexhull)}, + { { STRING_WITH_LEN("CROSSES") }, GEOM_BUILDER(Create_func_crosses)}, + { { STRING_WITH_LEN("DIMENSION") }, GEOM_BUILDER(Create_func_dimension)}, + { { STRING_WITH_LEN("DISJOINT") }, GEOM_BUILDER(Create_func_mbr_disjoint)}, + { { STRING_WITH_LEN("ENDPOINT") }, GEOM_BUILDER(Create_func_endpoint)}, + { { STRING_WITH_LEN("ENVELOPE") }, GEOM_BUILDER(Create_func_envelope)}, + { { STRING_WITH_LEN("EQUALS") }, GEOM_BUILDER(Create_func_equals)}, + { { STRING_WITH_LEN("EXTERIORRING") }, GEOM_BUILDER(Create_func_exteriorring)}, + { { STRING_WITH_LEN("GEOMCOLLFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("GEOMCOLLFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("GEOMETRYCOLLECTIONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("GEOMETRYCOLLECTIONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("GEOMETRYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("GEOMETRYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("GEOMETRYN") }, GEOM_BUILDER(Create_func_geometryn)}, + { { STRING_WITH_LEN("GEOMETRYTYPE") }, GEOM_BUILDER(Create_func_geometry_type)}, + { { STRING_WITH_LEN("GEOMFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("GEOMFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("GLENGTH") }, GEOM_BUILDER(Create_func_glength)}, + { { STRING_WITH_LEN("INTERIORRINGN") }, GEOM_BUILDER(Create_func_interiorringn)}, + { { STRING_WITH_LEN("INTERSECTS") }, GEOM_BUILDER(Create_func_mbr_intersects)}, + { { STRING_WITH_LEN("ISCLOSED") }, GEOM_BUILDER(Create_func_isclosed)}, + { { STRING_WITH_LEN("ISEMPTY") }, GEOM_BUILDER(Create_func_isempty)}, + { { STRING_WITH_LEN("ISRING") }, GEOM_BUILDER(Create_func_isring)}, + { { STRING_WITH_LEN("ISSIMPLE") }, GEOM_BUILDER(Create_func_issimple)}, + { { STRING_WITH_LEN("LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("LINESTRINGFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("MBRCONTAINS") }, GEOM_BUILDER(Create_func_mbr_contains)}, + { { STRING_WITH_LEN("MBRDISJOINT") }, GEOM_BUILDER(Create_func_mbr_disjoint)}, + { { STRING_WITH_LEN("MBREQUAL") }, GEOM_BUILDER(Create_func_mbr_equals)}, + { { STRING_WITH_LEN("MBREQUALS") }, GEOM_BUILDER(Create_func_mbr_equals)}, + { { STRING_WITH_LEN("MBRINTERSECTS") }, GEOM_BUILDER(Create_func_mbr_intersects)}, + { { STRING_WITH_LEN("MBROVERLAPS") }, GEOM_BUILDER(Create_func_mbr_overlaps)}, + { { STRING_WITH_LEN("MBRTOUCHES") }, GEOM_BUILDER(Create_func_touches)}, + { { STRING_WITH_LEN("MBRWITHIN") }, GEOM_BUILDER(Create_func_mbr_within)}, + { { STRING_WITH_LEN("MLINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("MLINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("MPOINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("MPOINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("MPOLYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("MPOLYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("MULTILINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("MULTILINESTRINGFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("MULTIPOINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("MULTIPOINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("MULTIPOLYGONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("MULTIPOLYGONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("NUMGEOMETRIES") }, GEOM_BUILDER(Create_func_numgeometries)}, + { { STRING_WITH_LEN("NUMINTERIORRINGS") }, GEOM_BUILDER(Create_func_numinteriorring)}, + { { STRING_WITH_LEN("NUMPOINTS") }, GEOM_BUILDER(Create_func_numpoints)}, + { { STRING_WITH_LEN("OVERLAPS") }, GEOM_BUILDER(Create_func_mbr_overlaps)}, + { { STRING_WITH_LEN("POINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("POINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("POINTN") }, GEOM_BUILDER(Create_func_pointn)}, + { { STRING_WITH_LEN("POINTONSURFACE") }, GEOM_BUILDER(Create_func_pointonsurface)}, + { { STRING_WITH_LEN("POLYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("POLYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("POLYGONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("POLYGONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("SRID") }, GEOM_BUILDER(Create_func_srid)}, + { { STRING_WITH_LEN("ST_AREA") }, GEOM_BUILDER(Create_func_area)}, + { { STRING_WITH_LEN("STARTPOINT") }, GEOM_BUILDER(Create_func_startpoint)}, + { { STRING_WITH_LEN("ST_ASBINARY") }, GEOM_BUILDER(Create_func_as_wkb)}, + { { STRING_WITH_LEN("ST_ASGEOJSON") }, GEOM_BUILDER(Create_func_as_geojson)}, + { { STRING_WITH_LEN("ST_ASTEXT") }, GEOM_BUILDER(Create_func_as_wkt)}, + { { STRING_WITH_LEN("ST_ASWKB") }, GEOM_BUILDER(Create_func_as_wkb)}, + { { STRING_WITH_LEN("ST_ASWKT") }, GEOM_BUILDER(Create_func_as_wkt)}, + { { STRING_WITH_LEN("ST_BOUNDARY") }, GEOM_BUILDER(Create_func_boundary)}, + { { STRING_WITH_LEN("ST_BUFFER") }, GEOM_BUILDER(Create_func_buffer)}, + { { STRING_WITH_LEN("ST_CENTROID") }, GEOM_BUILDER(Create_func_centroid)}, + { { STRING_WITH_LEN("ST_CONTAINS") }, GEOM_BUILDER(Create_func_contains)}, + { { STRING_WITH_LEN("ST_CONVEXHULL") }, GEOM_BUILDER(Create_func_convexhull)}, + { { STRING_WITH_LEN("ST_CROSSES") }, GEOM_BUILDER(Create_func_crosses)}, + { { STRING_WITH_LEN("ST_DIFFERENCE") }, GEOM_BUILDER(Create_func_difference)}, + { { STRING_WITH_LEN("ST_DIMENSION") }, GEOM_BUILDER(Create_func_dimension)}, + { { STRING_WITH_LEN("ST_DISJOINT") }, GEOM_BUILDER(Create_func_disjoint)}, + { { STRING_WITH_LEN("ST_DISTANCE") }, GEOM_BUILDER(Create_func_distance)}, + { { STRING_WITH_LEN("ST_ENDPOINT") }, GEOM_BUILDER(Create_func_endpoint)}, + { { STRING_WITH_LEN("ST_ENVELOPE") }, GEOM_BUILDER(Create_func_envelope)}, + { { STRING_WITH_LEN("ST_EQUALS") }, GEOM_BUILDER(Create_func_equals)}, + { { STRING_WITH_LEN("ST_EQUALS") }, GEOM_BUILDER(Create_func_equals)}, + { { STRING_WITH_LEN("ST_EXTERIORRING") }, GEOM_BUILDER(Create_func_exteriorring)}, + { { STRING_WITH_LEN("ST_GEOMCOLLFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_GEOMCOLLFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_GEOMETRYCOLLECTIONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_GEOMETRYCOLLECTIONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_GEOMETRYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_GEOMETRYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_GEOMETRYN") }, GEOM_BUILDER(Create_func_geometryn)}, + { { STRING_WITH_LEN("ST_GEOMETRYTYPE") }, GEOM_BUILDER(Create_func_geometry_type)}, + { { STRING_WITH_LEN("ST_GEOMFROMGEOJSON") }, GEOM_BUILDER(Create_func_geometry_from_json)}, + { { STRING_WITH_LEN("ST_GEOMFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_GEOMFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_INTERIORRINGN") }, GEOM_BUILDER(Create_func_interiorringn)}, + { { STRING_WITH_LEN("ST_INTERSECTION") }, GEOM_BUILDER(Create_func_intersection)}, + { { STRING_WITH_LEN("ST_INTERSECTS") }, GEOM_BUILDER(Create_func_intersects)}, + { { STRING_WITH_LEN("ST_ISCLOSED") }, GEOM_BUILDER(Create_func_isclosed)}, + { { STRING_WITH_LEN("ST_ISEMPTY") }, GEOM_BUILDER(Create_func_isempty)}, + { { STRING_WITH_LEN("ST_ISRING") }, GEOM_BUILDER(Create_func_isring)}, + { { STRING_WITH_LEN("ST_ISSIMPLE") }, GEOM_BUILDER(Create_func_issimple)}, + { { STRING_WITH_LEN("ST_LENGTH") }, GEOM_BUILDER(Create_func_glength)}, + { { STRING_WITH_LEN("ST_LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_LINESTRINGFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_MLINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_MLINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_MPOINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_MPOINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_MPOLYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_MPOLYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_MULTILINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_MULTILINESTRINGFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_MULTIPOINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_MULTIPOINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_MULTIPOLYGONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_MULTIPOLYGONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_NUMGEOMETRIES") }, GEOM_BUILDER(Create_func_numgeometries)}, + { { STRING_WITH_LEN("ST_NUMINTERIORRINGS") }, GEOM_BUILDER(Create_func_numinteriorring)}, + { { STRING_WITH_LEN("ST_NUMPOINTS") }, GEOM_BUILDER(Create_func_numpoints)}, + { { STRING_WITH_LEN("ST_OVERLAPS") }, GEOM_BUILDER(Create_func_overlaps)}, + { { STRING_WITH_LEN("ST_POINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_POINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_POINTN") }, GEOM_BUILDER(Create_func_pointn)}, + { { STRING_WITH_LEN("ST_POINTONSURFACE") }, GEOM_BUILDER(Create_func_pointonsurface)}, + { { STRING_WITH_LEN("ST_POLYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_POLYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_POLYGONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)}, + { { STRING_WITH_LEN("ST_POLYGONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)}, + { { STRING_WITH_LEN("ST_RELATE") }, GEOM_BUILDER(Create_func_relate)}, + { { STRING_WITH_LEN("ST_SRID") }, GEOM_BUILDER(Create_func_srid)}, + { { STRING_WITH_LEN("ST_STARTPOINT") }, GEOM_BUILDER(Create_func_startpoint)}, + { { STRING_WITH_LEN("ST_SYMDIFFERENCE") }, GEOM_BUILDER(Create_func_symdifference)}, + { { STRING_WITH_LEN("ST_TOUCHES") }, GEOM_BUILDER(Create_func_touches)}, + { { STRING_WITH_LEN("ST_UNION") }, GEOM_BUILDER(Create_func_union)}, + { { STRING_WITH_LEN("ST_WITHIN") }, GEOM_BUILDER(Create_func_within)}, + { { STRING_WITH_LEN("ST_X") }, GEOM_BUILDER(Create_func_x)}, + { { STRING_WITH_LEN("ST_Y") }, GEOM_BUILDER(Create_func_y)}, + { { STRING_WITH_LEN("ST_DISTANCE_SPHERE") }, GEOM_BUILDER(Create_func_distance_sphere)}, + { { STRING_WITH_LEN("TOUCHES") }, GEOM_BUILDER(Create_func_touches)}, + { { STRING_WITH_LEN("WITHIN") }, GEOM_BUILDER(Create_func_within)}, + { { STRING_WITH_LEN("X") }, GEOM_BUILDER(Create_func_x)}, + { { STRING_WITH_LEN("Y") }, GEOM_BUILDER(Create_func_y)}, +}; + + +Native_func_registry_array + native_func_registry_array_geom(func_array_geom, + array_elements(func_array_geom)); + +#endif /*HAVE_SPATIAL*/ diff --git a/sql/item_geofunc.h b/sql/item_geofunc.h new file mode 100644 index 00000000..40f4b924 --- /dev/null +++ b/sql/item_geofunc.h @@ -0,0 +1,1288 @@ +#ifndef ITEM_GEOFUNC_INCLUDED +#define ITEM_GEOFUNC_INCLUDED + +/* Copyright (c) 2000, 2016 Oracle and/or its affiliates. + Copyright (C) 2011, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* This file defines all spatial functions */ + +#ifdef HAVE_SPATIAL + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_type_geom.h" +#include "item.h" +#include "gstream.h" +#include "spatial.h" +#include "gcalc_slicescan.h" +#include "gcalc_tools.h" + +class Item_geometry_func: public Item_str_func +{ +public: + Item_geometry_func(THD *thd): Item_str_func(thd) {} + Item_geometry_func(THD *thd, Item *a): Item_str_func(thd, a) {} + Item_geometry_func(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + Item_geometry_func(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c) {} + Item_geometry_func(THD *thd, List &list): Item_str_func(thd, list) {} + bool fix_length_and_dec(THD *thd) override; + const Type_handler *type_handler() const override + { return &type_handler_geometry; } +}; + + +/* + Functions returning REAL measurements of a single GEOMETRY argument +*/ +class Item_real_func_args_geometry: public Item_real_func +{ +protected: + String value; + bool check_arguments() const override + { + DBUG_ASSERT(arg_count == 1); + return Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), + args[0]); + } +public: + Item_real_func_args_geometry(THD *thd, Item *a) + :Item_real_func(thd, a) {} +}; + + +/* + Functions returning INT measurements of a single GEOMETRY argument +*/ +class Item_long_func_args_geometry: public Item_long_func +{ + bool check_arguments() const override + { + DBUG_ASSERT(arg_count == 1); + return Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), + args[0]); + } +protected: + String value; +public: + Item_long_func_args_geometry(THD *thd, Item *a) + :Item_long_func(thd, a) {} +}; + + +/* + Functions returning BOOL measurements of a single GEOMETRY argument +*/ +class Item_bool_func_args_geometry: public Item_bool_func +{ +protected: + String value; + bool check_arguments() const override + { + DBUG_ASSERT(arg_count == 1); + return Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), + args[0]); + } +public: + Item_bool_func_args_geometry(THD *thd, Item *a) + :Item_bool_func(thd, a) {} +}; + + +/* + Functions returning ASCII string measurements of a single GEOMETRY argument +*/ +class Item_str_ascii_func_args_geometry: public Item_str_ascii_func +{ +protected: + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 1); + return Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), + args[0]); + } +public: + Item_str_ascii_func_args_geometry(THD *thd, Item *a) + :Item_str_ascii_func(thd, a) {} + Item_str_ascii_func_args_geometry(THD *thd, Item *a, Item *b) + :Item_str_ascii_func(thd, a, b) {} + Item_str_ascii_func_args_geometry(THD *thd, Item *a, Item *b, Item *c) + :Item_str_ascii_func(thd, a, b, c) {} +}; + + +/* + Functions returning binary string measurements of a single GEOMETRY argument +*/ +class Item_binary_func_args_geometry: public Item_str_func +{ +protected: + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 1); + return Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), + args[0]); + } +public: + Item_binary_func_args_geometry(THD *thd, Item *a) + :Item_str_func(thd, a) {} +}; + + +/* + Functions returning GEOMETRY measurements of a single GEOEMETRY argument +*/ +class Item_geometry_func_args_geometry: public Item_geometry_func +{ +protected: + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 1); + return Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), + args[0]); + } +public: + Item_geometry_func_args_geometry(THD *thd, Item *a) + :Item_geometry_func(thd, a) {} + Item_geometry_func_args_geometry(THD *thd, Item *a, Item *b) + :Item_geometry_func(thd, a, b) {} +}; + + +/* + Functions returning REAL result relationships between two GEOMETRY arguments +*/ +class Item_real_func_args_geometry_geometry: public Item_real_func +{ +protected: + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 2); + return Type_handler_geometry::check_types_geom_or_binary(func_name_cstring(), + args, 0, 2); + } +public: + Item_real_func_args_geometry_geometry(THD *thd, Item *a, Item *b) + :Item_real_func(thd, a, b) {} +}; + + +/* + Functions returning BOOL result relationships between two GEOMETRY arguments +*/ +class Item_bool_func_args_geometry_geometry: public Item_bool_func +{ +protected: + String value; + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 2); + return Type_handler_geometry::check_types_geom_or_binary(func_name_cstring(), + args, 0, 2); + } +public: + Item_bool_func_args_geometry_geometry(THD *thd, Item *a, Item *b, Item *c) + :Item_bool_func(thd, a, b, c) {} +}; + + +class Item_func_geometry_from_text: public Item_geometry_func +{ + bool check_arguments() const override + { + return args[0]->check_type_general_purpose_string(func_name_cstring()) || + check_argument_types_can_return_int(1, MY_MIN(2, arg_count)); + } +public: + Item_func_geometry_from_text(THD *thd, Item *a): Item_geometry_func(thd, a) {} + Item_func_geometry_from_text(THD *thd, Item *a, Item *srid): + Item_geometry_func(thd, a, srid) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_geometryfromtext") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_geometry_from_wkb: public Item_geometry_func +{ + bool check_arguments() const override + { + return + Type_handler_geometry::check_type_geom_or_binary(func_name_cstring(), args[0]) || + check_argument_types_can_return_int(1, MY_MIN(2, arg_count)); + } +public: + Item_func_geometry_from_wkb(THD *thd, Item *a): Item_geometry_func(thd, a) {} + Item_func_geometry_from_wkb(THD *thd, Item *a, Item *srid): + Item_geometry_func(thd, a, srid) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_geometryfromwkb") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_geometry_from_json: public Item_geometry_func +{ + String tmp_js; + bool check_arguments() const override + { + // TODO: check with Alexey, for better args[1] and args[2] type control + return args[0]->check_type_general_purpose_string(func_name_cstring()) || + check_argument_types_traditional_scalar(1, MY_MIN(3, arg_count)); + } +public: + Item_func_geometry_from_json(THD *thd, Item *js): Item_geometry_func(thd, js) {} + Item_func_geometry_from_json(THD *thd, Item *js, Item *opt): + Item_geometry_func(thd, js, opt) {} + Item_func_geometry_from_json(THD *thd, Item *js, Item *opt, Item *srid): + Item_geometry_func(thd, js, opt, srid) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_geomfromgeojson") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_as_wkt: public Item_str_ascii_func_args_geometry +{ +public: + Item_func_as_wkt(THD *thd, Item *a) + :Item_str_ascii_func_args_geometry(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_astext") }; + return name; + } + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_as_wkb: public Item_binary_func_args_geometry +{ +public: + Item_func_as_wkb(THD *thd, Item *a) + :Item_binary_func_args_geometry(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_aswkb") }; + return name; + } + String *val_str(String *) override; + const Type_handler *type_handler() const override + { return &type_handler_long_blob; } + bool fix_length_and_dec(THD *thd) override + { + collation.set(&my_charset_bin); + decimals=0; + max_length= (uint32) UINT_MAX32; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_as_geojson: public Item_str_ascii_func_args_geometry +{ + bool check_arguments() const override + { + // TODO: check with Alexey, for better args[1] and args[2] type control + return Item_str_ascii_func_args_geometry::check_arguments() || + check_argument_types_traditional_scalar(1, MY_MIN(3, arg_count)); + } +public: + Item_func_as_geojson(THD *thd, Item *js) + :Item_str_ascii_func_args_geometry(thd, js) {} + Item_func_as_geojson(THD *thd, Item *js, Item *max_dec_digits) + :Item_str_ascii_func_args_geometry(thd, js, max_dec_digits) {} + Item_func_as_geojson(THD *thd, Item *js, Item *max_dec_digits, Item *opt) + :Item_str_ascii_func_args_geometry(thd, js, max_dec_digits, opt) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_asgeojson") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + String *val_str_ascii(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_geometry_type: public Item_str_ascii_func_args_geometry +{ +public: + Item_func_geometry_type(THD *thd, Item *a) + :Item_str_ascii_func_args_geometry(thd, a) {} + String *val_str_ascii(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_geometrytype") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + // "GeometryCollection" is the longest + fix_length_and_charset(20, default_charset()); + set_maybe_null(); + return FALSE; + }; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +// #define HEAVY_CONVEX_HULL +class Item_func_convexhull: public Item_geometry_func_args_geometry +{ + class ch_node: public Gcalc_dyn_list::Item + { + public: + const Gcalc_heap::Info *pi; + ch_node *prev; + Gcalc_dyn_list::Item *next; + ch_node *get_next() { return (ch_node *) next; } + }; + + Gcalc_heap collector; + Gcalc_function func; + Gcalc_dyn_list res_heap; + + Gcalc_result_receiver res_receiver; + String tmp_value; +#ifdef HEAVY_CONVEX_HULL + Gcalc_scan_iterator scan_it; +#endif /*HEAVY_CONVEX_HULL*/ + ch_node *new_ch_node() { return (ch_node *) res_heap.new_item(); } + int add_node_to_line(ch_node **p_cur, int dir, const Gcalc_heap::Info *pi); +public: + Item_func_convexhull(THD *thd, Item *a) + :Item_geometry_func_args_geometry(thd, a), + res_heap(8192, sizeof(ch_node)) + {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_convexhull") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_centroid: public Item_geometry_func_args_geometry +{ +public: + Item_func_centroid(THD *thd, Item *a) + :Item_geometry_func_args_geometry(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_centroid") }; + return name; + } + String *val_str(String *) override; + const Type_handler *type_handler() const override + { + return &type_handler_point; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_envelope: public Item_geometry_func_args_geometry +{ +public: + Item_func_envelope(THD *thd, Item *a) + :Item_geometry_func_args_geometry(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_envelope") }; + return name; + } + String *val_str(String *) override; + const Type_handler *type_handler() const override + { + return &type_handler_polygon; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_boundary: public Item_geometry_func_args_geometry +{ + class Transporter : public Gcalc_shape_transporter + { + Gcalc_result_receiver *m_receiver; + uint n_points; + Gcalc_function::shape_type current_type; + double last_x, last_y; + public: + Transporter(Gcalc_result_receiver *receiver) : + Gcalc_shape_transporter(NULL), m_receiver(receiver) + {} + int single_point(double x, double y); + int start_line(); + int complete_line(); + int start_poly(); + int complete_poly(); + int start_ring(); + int complete_ring(); + int add_point(double x, double y); + + int start_collection(int n_objects); + }; + Gcalc_result_receiver res_receiver; +public: + Item_func_boundary(THD *thd, Item *a) + :Item_geometry_func_args_geometry(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_boundary") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_point: public Item_geometry_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_real(0, 2); } +public: + Item_func_point(THD *thd, Item *a, Item *b): Item_geometry_func(thd, a, b) {} + Item_func_point(THD *thd, Item *a, Item *b, Item *srid): + Item_geometry_func(thd, a, b, srid) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("point") }; + return name; + } + String *val_str(String *) override; + const Type_handler *type_handler() const override + { + return &type_handler_point; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_spatial_decomp: public Item_geometry_func_args_geometry +{ + enum Functype decomp_func; +public: + Item_func_spatial_decomp(THD *thd, Item *a, Item_func::Functype ft): + Item_geometry_func_args_geometry(thd, a) { decomp_func = ft; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING startpoint= {STRING_WITH_LEN("st_startpoint") }; + static LEX_CSTRING endpoint= {STRING_WITH_LEN("st_endpoint") }; + static LEX_CSTRING exteriorring= {STRING_WITH_LEN("st_exteriorring") }; + static LEX_CSTRING unknown= {STRING_WITH_LEN("spatial_decomp_unknown") }; + switch (decomp_func) { + case SP_STARTPOINT: + return startpoint; + case SP_ENDPOINT: + return endpoint; + case SP_EXTERIORRING: + return exteriorring; + default: + DBUG_ASSERT(0); // Should never happened + return unknown; + } + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_spatial_decomp_n: public Item_geometry_func_args_geometry +{ + enum Functype decomp_func_n; + bool check_arguments() const override + { + return Item_geometry_func_args_geometry::check_arguments() || + args[1]->check_type_can_return_int(func_name_cstring()); + } +public: + Item_func_spatial_decomp_n(THD *thd, Item *a, Item *b, Item_func::Functype ft) + :Item_geometry_func_args_geometry(thd, a, b), + decomp_func_n(ft) + { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING pointn= {STRING_WITH_LEN("st_pointn") }; + static LEX_CSTRING geometryn= {STRING_WITH_LEN("st_geometryn") }; + static LEX_CSTRING interiorringn= {STRING_WITH_LEN("st_interiorringn") }; + static LEX_CSTRING unknown= {STRING_WITH_LEN("spatial_decomp_unknown") }; + + switch (decomp_func_n) { + case SP_POINTN: + return pointn; + case SP_GEOMETRYN: + return geometryn; + case SP_INTERIORRINGN: + return interiorringn; + default: + DBUG_ASSERT(0); // Should never happened + return unknown; + } + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_spatial_collection: public Item_geometry_func +{ + bool check_arguments() const override + { + return Type_handler_geometry::check_types_geom_or_binary(func_name_cstring(), args, + 0, arg_count); + } + enum Geometry::wkbType coll_type; + enum Geometry::wkbType item_type; +public: + Item_func_spatial_collection(THD *thd, + List &list, enum Geometry::wkbType ct, enum Geometry::wkbType it): + Item_geometry_func(thd, list) + { + coll_type=ct; + item_type=it; + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + if (Item_geometry_func::fix_length_and_dec(thd)) + return TRUE; + for (unsigned int i= 0; i < arg_count; ++i) + { + if (args[i]->fixed() && args[i]->field_type() != MYSQL_TYPE_GEOMETRY) + { + String str; + args[i]->print(&str, QT_NO_DATA_EXPANSION); + str.append('\0'); + my_error(ER_ILLEGAL_VALUE_FOR_TYPE, MYF(0), "non geometric", + str.ptr()); + return TRUE; + } + } + return FALSE; + } +}; + + +class Item_func_geometrycollection: public Item_func_spatial_collection +{ +public: + Item_func_geometrycollection(THD *thd, List &list) + :Item_func_spatial_collection(thd, list, + Geometry::wkb_geometrycollection, + Geometry::wkb_point) + { } + const Type_handler *type_handler() const override + { + return &type_handler_geometrycollection; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("geometrycollection") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_linestring: public Item_func_spatial_collection +{ +public: + Item_func_linestring(THD *thd, List &list) + :Item_func_spatial_collection(thd, list, + Geometry::wkb_linestring, + Geometry::wkb_point) + { } + const Type_handler *type_handler() const override + { return &type_handler_linestring; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("linestring") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_polygon: public Item_func_spatial_collection +{ +public: + Item_func_polygon(THD *thd, List &list) + :Item_func_spatial_collection(thd, list, + Geometry::wkb_polygon, + Geometry::wkb_linestring) + { } + const Type_handler *type_handler() const override + { return &type_handler_polygon; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("polygon") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_multilinestring: public Item_func_spatial_collection +{ +public: + Item_func_multilinestring(THD *thd, List &list) + :Item_func_spatial_collection(thd, list, + Geometry::wkb_multilinestring, + Geometry::wkb_linestring) + { } + const Type_handler *type_handler() const override + { + return &type_handler_multilinestring; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("multilinestring") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_multipoint: public Item_func_spatial_collection +{ +public: + Item_func_multipoint(THD *thd, List &list) + :Item_func_spatial_collection(thd, list, + Geometry::wkb_multipoint, + Geometry::wkb_point) + { } + const Type_handler *type_handler() const override + { + return &type_handler_multipoint; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("multipoint") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_multipolygon: public Item_func_spatial_collection +{ +public: + Item_func_multipolygon(THD *thd, List &list) + :Item_func_spatial_collection(thd, list, + Geometry::wkb_multipolygon, + Geometry::wkb_polygon) + { } + const Type_handler *type_handler() const override + { + return &type_handler_multipolygon; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("multipolygon") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + + +/* + Spatial relations +*/ + +class Item_func_spatial_rel: public Item_bool_func2_with_rev +{ +protected: + enum Functype spatial_rel; + String tmp_value1, tmp_value2; + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, Field *field, + KEY_PART *key_part, + Item_func::Functype type, Item *value) override; + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 2); + return Type_handler_geometry::check_types_geom_or_binary(func_name_cstring(), + args, 0, 2); + } +public: + Item_func_spatial_rel(THD *thd, Item *a, Item *b, enum Functype sp_rel): + Item_bool_func2_with_rev(thd, a, b), spatial_rel(sp_rel) + { + set_maybe_null(); + } + enum Functype functype() const override { return spatial_rel; } + enum Functype rev_functype() const override + { + switch (spatial_rel) + { + case SP_CONTAINS_FUNC: + return SP_WITHIN_FUNC; + case SP_WITHIN_FUNC: + return SP_CONTAINS_FUNC; + default: + return spatial_rel; + } + } + bool is_null() override { (void) val_int(); return null_value; } + void add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) override + { + return add_key_fields_optimize_op(join, key_fields, and_level, + usable_tables, sargables, false); + } + bool need_parentheses_in_default() override { return false; } + Item *build_clone(THD *thd) override { return 0; } +}; + + +class Item_func_spatial_mbr_rel: public Item_func_spatial_rel +{ +public: + Item_func_spatial_mbr_rel(THD *thd, Item *a, Item *b, enum Functype sp_rel): + Item_func_spatial_rel(thd, a, b, sp_rel) + { } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_spatial_precise_rel: public Item_func_spatial_rel +{ + Gcalc_heap collector; + Gcalc_scan_iterator scan_it; + Gcalc_function func; +public: + Item_func_spatial_precise_rel(THD *thd, Item *a, Item *b, enum Functype sp_rel): + Item_func_spatial_rel(thd, a, b, sp_rel), collector() + { } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_spatial_relate: public Item_bool_func_args_geometry_geometry +{ + Gcalc_heap collector; + Gcalc_scan_iterator scan_it; + Gcalc_function func; + String tmp_value1, tmp_value2, tmp_matrix; + bool check_arguments() const override + { + return Item_bool_func_args_geometry_geometry::check_arguments() || + args[2]->check_type_general_purpose_string(func_name_cstring()); + } +public: + Item_func_spatial_relate(THD *thd, Item *a, Item *b, Item *matrix): + Item_bool_func_args_geometry_geometry(thd, a, b, matrix) + { } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_relate") }; + return name; + } + bool need_parentheses_in_default() override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Spatial operations +*/ + +class Item_func_spatial_operation final: public Item_geometry_func +{ + bool check_arguments() const override + { + DBUG_ASSERT(arg_count >= 2); + return Type_handler_geometry::check_types_geom_or_binary(func_name_cstring(), + args, 0, 2); + } +public: + Gcalc_function::op_type spatial_op; + Gcalc_heap collector; + Gcalc_function func; + + Gcalc_result_receiver res_receiver; + Gcalc_operation_reducer operation; + String tmp_value1,tmp_value2; +public: + Item_func_spatial_operation(THD *thd, Item *a,Item *b, + Gcalc_function::op_type sp_op): + Item_geometry_func(thd, a, b), spatial_op(sp_op) + {} + virtual ~Item_func_spatial_operation(); + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override; + void print(String *str, enum_query_type query_type) override + { + Item_func::print(str, query_type); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_buffer final : public Item_geometry_func_args_geometry +{ + bool check_arguments() const override + { + return Item_geometry_func_args_geometry::check_arguments() || + args[1]->check_type_can_return_real(func_name_cstring()); + } +protected: + class Transporter : public Gcalc_operation_transporter + { + int m_npoints; + double m_d; + double x1,y1,x2,y2; + double x00,y00,x01,y01; + int add_edge_buffer(double x3, double y3, bool round_p1, bool round_p2); + int add_last_edge_buffer(); + int add_point_buffer(double x, double y); + int complete(); + int m_nshapes; + Gcalc_function::op_type buffer_op; + int last_shape_pos; + bool skip_line; + + public: + Transporter(Gcalc_function *fn, Gcalc_heap *heap, double d) : + Gcalc_operation_transporter(fn, heap), m_npoints(0), m_d(d), + m_nshapes(0), buffer_op((d > 0.0) ? Gcalc_function::op_union : + Gcalc_function::op_difference), + skip_line(FALSE) + {} + int single_point(double x, double y); + int start_line(); + int complete_line(); + int start_poly(); + int complete_poly(); + int start_ring(); + int complete_ring(); + int add_point(double x, double y); + + int start_collection(int n_objects); + }; + Gcalc_heap collector; + Gcalc_function func; + + Gcalc_result_receiver res_receiver; + Gcalc_operation_reducer operation; + +public: + Item_func_buffer(THD *thd, Item *obj, Item *distance) + :Item_geometry_func_args_geometry(thd, obj, distance) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_buffer") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_isempty: public Item_bool_func_args_geometry +{ +public: + Item_func_isempty(THD *thd, Item *a) + :Item_bool_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_isempty") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { set_maybe_null(); return FALSE; } + bool need_parentheses_in_default() override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_issimple: public Item_long_func_args_geometry +{ + Gcalc_heap collector; + Gcalc_function func; + Gcalc_scan_iterator scan_it; + String tmp; +public: + Item_func_issimple(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_issimple") }; + return name; + } + bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=2; return FALSE; } + decimal_digits_t decimal_precision() const override { return 1; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_isclosed: public Item_long_func_args_geometry +{ +public: + Item_func_isclosed(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_isclosed") }; + return name; + } + bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=2; return FALSE; } + decimal_digits_t decimal_precision() const override { return 1; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_isring: public Item_func_issimple +{ +public: + Item_func_isring(THD *thd, Item *a): Item_func_issimple(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_isring") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_dimension: public Item_long_func_args_geometry +{ +public: + Item_func_dimension(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_dimension") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { max_length= 10; set_maybe_null(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_x: public Item_real_func_args_geometry +{ +public: + Item_func_x(THD *thd, Item *a): Item_real_func_args_geometry(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_x") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + if (Item_real_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_y: public Item_real_func_args_geometry +{ +public: + Item_func_y(THD *thd, Item *a): Item_real_func_args_geometry(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_y") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + if (Item_real_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_numgeometries: public Item_long_func_args_geometry +{ +public: + Item_func_numgeometries(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_numgeometries") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { max_length= 10; set_maybe_null(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_numinteriorring: public Item_long_func_args_geometry +{ +public: + Item_func_numinteriorring(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_numinteriorrings") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { max_length= 10; set_maybe_null(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_numpoints: public Item_long_func_args_geometry +{ +public: + Item_func_numpoints(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_numpoints") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { max_length= 10; set_maybe_null(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_area: public Item_real_func_args_geometry +{ +public: + Item_func_area(THD *thd, Item *a): Item_real_func_args_geometry(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_area") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + if (Item_real_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_glength: public Item_real_func_args_geometry +{ + String value; +public: + Item_func_glength(THD *thd, Item *a) + :Item_real_func_args_geometry(thd, a) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_length") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + if (Item_real_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_srid: public Item_long_func_args_geometry +{ +public: + Item_func_srid(THD *thd, Item *a) + :Item_long_func_args_geometry(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("srid") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { max_length= 10; set_maybe_null(); return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_distance: public Item_real_func_args_geometry_geometry +{ + String tmp_value1; + String tmp_value2; + Gcalc_heap collector; + Gcalc_function func; + Gcalc_scan_iterator scan_it; +public: + Item_func_distance(THD *thd, Item *a, Item *b) + :Item_real_func_args_geometry_geometry(thd, a, b) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_distance") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_sphere_distance: public Item_real_func +{ + double spherical_distance_points(Geometry *g1, Geometry *g2, + const double sphere_r); +public: + Item_func_sphere_distance(THD *thd, List &list): + Item_real_func(thd, list) {} + double val_real() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_distance_sphere") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_pointonsurface: public Item_geometry_func_args_geometry +{ + String tmp_value; + Gcalc_heap collector; + Gcalc_function func; + Gcalc_scan_iterator scan_it; +public: + Item_func_pointonsurface(THD *thd, Item *a) + :Item_geometry_func_args_geometry(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_pointonsurface") }; + return name; + } + String *val_str(String *) override; + const Type_handler *type_handler() const override + { + return &type_handler_point; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +#ifndef DBUG_OFF +class Item_func_gis_debug: public Item_long_func +{ + public: + Item_func_gis_debug(THD *thd, Item *a): Item_long_func(thd, a) + { null_value= false; } + bool fix_length_and_dec(THD *thd) override { fix_char_length(10); return FALSE; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("st_gis_debug") }; + return name; + } + longlong val_int() override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; +#endif + + +#define GEOM_NEW(thd, obj_constructor) new (thd->mem_root) obj_constructor +#define GEOM_TYPE(x) (x) + +#else /*HAVE_SPATIAL*/ + +#define GEOM_NEW(thd, obj_constructor) NULL +#define GEOM_TYPE(x) NULL + +#endif /*HAVE_SPATIAL*/ +#endif /* ITEM_GEOFUNC_INCLUDED */ diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc new file mode 100644 index 00000000..590fde88 --- /dev/null +++ b/sql/item_jsonfunc.cc @@ -0,0 +1,4738 @@ +/* Copyright (c) 2016, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" +#include "item.h" +#include "sql_parse.h" // For check_stack_overrun + +/* + Allocating memory and *also* using it (reading and + writing from it) because some build instructions cause + compiler to optimize out stack_used_up. Since alloca() + here depends on stack_used_up, it doesnt get executed + correctly and causes json_debug_nonembedded to fail + ( --error ER_STACK_OVERRUN_NEED_MORE does not occur). +*/ +#define ALLOCATE_MEM_ON_STACK(A) do \ + { \ + uchar *array= (uchar*)alloca(A); \ + bzero(array, A); \ + my_checksum(0, array, A); \ + } while(0) + +/* + Compare ASCII string against the string with the specified + character set. + Only compares the equality, case insensitive. +*/ +static bool eq_ascii_string(const CHARSET_INFO *cs, + const char *ascii, + const char *s, uint32 s_len) +{ + const char *s_end= s + s_len; + + while (*ascii && s < s_end) + { + my_wc_t wc; + int wc_len; + + wc_len= cs->mb_wc(&wc, (uchar *) s, (uchar *) s_end); + if (wc_len <= 0 || (wc | 0x20) != (my_wc_t) *ascii) + return 0; + + ascii++; + s+= wc_len; + } + + return *ascii == 0 && s >= s_end; +} + + +static bool append_simple(String *s, const char *a, size_t a_len) +{ + if (!s->realloc_with_extra_if_needed(s->length() + a_len)) + { + s->q_append(a, a_len); + return FALSE; + } + + return TRUE; +} + + +static inline bool append_simple(String *s, const uchar *a, size_t a_len) +{ + return append_simple(s, (const char *) a, a_len); +} + + +/* + Appends JSON string to the String object taking charsets in + consideration. +*/ +static int st_append_json(String *s, + CHARSET_INFO *json_cs, const uchar *js, uint js_len) +{ + int str_len= js_len * s->charset()->mbmaxlen; + + if (!s->reserve(str_len, 1024) && + (str_len= json_unescape(json_cs, js, js + js_len, + s->charset(), (uchar *) s->end(), (uchar *) s->end() + str_len)) > 0) + { + s->length(s->length() + str_len); + return 0; + } + + return str_len; +} + + +/* + Appends arbitrary String to the JSON string taking charsets in + consideration. +*/ +static int st_append_escaped(String *s, const String *a) +{ + /* + In the worst case one character from the 'a' string + turns into '\uXXXX\uXXXX' which is 12. + */ + int str_len= a->length() * 12 * s->charset()->mbmaxlen / + a->charset()->mbminlen; + if (!s->reserve(str_len, 1024) && + (str_len= + json_escape(a->charset(), (uchar *) a->ptr(), (uchar *)a->end(), + s->charset(), + (uchar *) s->end(), (uchar *)s->end() + str_len)) > 0) + { + s->length(s->length() + str_len); + return 0; + } + + return a->length(); +} + + +static const int TAB_SIZE_LIMIT= 8; +static const char tab_arr[TAB_SIZE_LIMIT+1]= " "; + +static int append_tab(String *js, int depth, int tab_size) +{ + if (js->append('\n')) + return 1; + for (int i=0; iappend(tab_arr, tab_size)) + return 1; + } + return 0; +} + +int json_path_parts_compare( + const json_path_step_t *a, const json_path_step_t *a_end, + const json_path_step_t *b, const json_path_step_t *b_end, + enum json_value_types vt, const int *array_sizes) +{ + int res, res2; + const json_path_step_t *temp_b= b; + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + while (a <= a_end) + { + if (b > b_end) + { + while (vt != JSON_VALUE_ARRAY && + (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY && + a->n_item == 0) + { + if (++a > a_end) + return 0; + } + return -2; + } + + DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0); + + if (a->type & JSON_PATH_ARRAY) + { + if (b->type & JSON_PATH_ARRAY) + { + int res= 0, corrected_n_item_a= 0; + if (array_sizes) + corrected_n_item_a= a->n_item < 0 ? + array_sizes[b-temp_b] + a->n_item : a->n_item; + if (a->type & JSON_PATH_ARRAY_RANGE) + { + int corrected_n_item_end_a= 0; + if (array_sizes) + corrected_n_item_end_a= a->n_item_end < 0 ? + array_sizes[b-temp_b] + a->n_item_end : + a->n_item_end; + res= b->n_item >= corrected_n_item_a && + b->n_item <= corrected_n_item_end_a; + } + else + res= corrected_n_item_a == b->n_item; + + if ((a->type & JSON_PATH_WILD) || res) + goto step_fits; + goto step_failed; + } + if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0) + goto step_fits_autowrap; + goto step_failed; + } + else /* JSON_PATH_KEY */ + { + if (!(b->type & JSON_PATH_KEY)) + goto step_failed; + + if (!(a->type & JSON_PATH_WILD) && + (a->key_end - a->key != b->key_end - b->key || + memcmp(a->key, b->key, a->key_end - a->key) != 0)) + goto step_failed; + + goto step_fits; + } +step_failed: + if (!(a->type & JSON_PATH_DOUBLE_WILD)) + return -1; + b++; + continue; + +step_fits: + b++; + if (!(a->type & JSON_PATH_DOUBLE_WILD)) + { + a++; + continue; + } + + /* Double wild handling needs recursions. */ + res= json_path_parts_compare(a+1, a_end, b, b_end, vt, + array_sizes ? array_sizes + (b - temp_b) : + NULL); + if (res == 0) + return 0; + + res2= json_path_parts_compare(a, a_end, b, b_end, vt, + array_sizes ? array_sizes + (b - temp_b) : + NULL); + + return (res2 >= 0) ? res2 : res; + +step_fits_autowrap: + if (!(a->type & JSON_PATH_DOUBLE_WILD)) + { + a++; + continue; + } + + /* Double wild handling needs recursions. */ + res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt, + array_sizes ? array_sizes + (b - temp_b) : + NULL); + if (res == 0) + return 0; + + res2= json_path_parts_compare(a, a_end, b+1, b_end, vt, + array_sizes ? array_sizes + (b - temp_b) : + NULL); + + return (res2 >= 0) ? res2 : res; + + } + + return b <= b_end; +} + + +int json_path_compare(const json_path_t *a, const json_path_t *b, + enum json_value_types vt, const int *array_size) +{ + return json_path_parts_compare(a->steps+1, a->last_step, + b->steps+1, b->last_step, vt, array_size); +} + + +static int json_nice(json_engine_t *je, String *nice_js, + Item_func_json_format::formats mode, int tab_size=4) +{ + int depth= 0; + static const char *comma= ", ", *colon= "\": "; + uint comma_len, colon_len; + int first_value= 1; + int value_size = 0; + int curr_state= -1; + int64_t value_len= 0; + String curr_str{}; + + nice_js->length(0); + nice_js->set_charset(je->s.cs); + nice_js->alloc(je->s.str_end - je->s.c_str + 32); + + DBUG_ASSERT(mode != Item_func_json_format::DETAILED || + (tab_size >= 0 && tab_size <= TAB_SIZE_LIMIT)); + + if (mode == Item_func_json_format::LOOSE) + { + comma_len= 2; + colon_len= 3; + } + else if (mode == Item_func_json_format::DETAILED) + { + comma_len= 1; + colon_len= 3; + } + else + { + comma_len= 1; + colon_len= 2; + } + + do + { + curr_state= je->state; + switch (je->state) + { + case JST_KEY: + { + const uchar *key_start= je->s.c_str; + const uchar *key_end; + + do + { + key_end= je->s.c_str; + } while (json_read_keyname_chr(je) == 0); + + if (unlikely(je->s.error)) + goto error; + + if (!first_value) + nice_js->append(comma, comma_len); + + if (mode == Item_func_json_format::DETAILED && + append_tab(nice_js, depth, tab_size)) + goto error; + + nice_js->append('"'); + append_simple(nice_js, key_start, key_end - key_start); + nice_js->append(colon, colon_len); + } + /* now we have key value to handle, so no 'break'. */ + DBUG_ASSERT(je->state == JST_VALUE); + goto handle_value; + + case JST_VALUE: + if (!first_value) + nice_js->append(comma, comma_len); + + if (mode == Item_func_json_format::DETAILED && + depth > 0 && + append_tab(nice_js, depth, tab_size)) + goto error; + +handle_value: + if (json_read_value(je)) + goto error; + if (json_value_scalar(je)) + { + if (append_simple(nice_js, je->value_begin, + je->value_end - je->value_begin)) + goto error; + + curr_str.copy((const char *)je->value_begin, + je->value_end - je->value_begin, je->s.cs); + value_len= je->value_end - je->value_begin; + first_value= 0; + if (value_size != -1) + value_size++; + } + else + { + if (mode == Item_func_json_format::DETAILED && + depth > 0 && !(curr_state != JST_KEY) && + append_tab(nice_js, depth, tab_size)) + goto error; + nice_js->append((je->value_type == JSON_VALUE_OBJECT) ? "{" : "[", 1); + first_value= 1; + value_size= (je->value_type == JSON_VALUE_OBJECT) ? -1: 0; + depth++; + } + + break; + + case JST_OBJ_END: + case JST_ARRAY_END: + depth--; + if (mode == Item_func_json_format::DETAILED && (value_size > 1 || value_size == -1) && + append_tab(nice_js, depth, tab_size)) + goto error; + + if (mode == Item_func_json_format::DETAILED && + value_size == 1 && je->state != JST_OBJ_END) + { + for (auto i = 0; i < value_len; i++) + { + nice_js->chop(); + } + for (auto i = 0; i < (depth + 1) * tab_size + 1; i++) + { + nice_js->chop(); + } + nice_js->append(curr_str); + } + + nice_js->append((je->state == JST_OBJ_END) ? "}": "]", 1); + first_value= 0; + value_size= -1; + break; + + default: + break; + }; + } while (json_scan_next(je) == 0); + + return je->s.error || *je->killed_ptr; + +error: + return 1; +} + + +#define report_json_error(js, je, n_param) \ + report_json_error_ex(js->ptr(), je, func_name(), n_param, \ + Sql_condition::WARN_LEVEL_WARN) + +void report_json_error_ex(const char *js, json_engine_t *je, + const char *fname, int n_param, + Sql_condition::enum_warning_level lv) +{ + THD *thd= current_thd; + int position= (int)((const char *) je->s.c_str - js); + uint code; + + n_param++; + + switch (je->s.error) + { + case JE_BAD_CHR: + code= ER_JSON_BAD_CHR; + break; + + case JE_NOT_JSON_CHR: + code= ER_JSON_NOT_JSON_CHR; + break; + + case JE_EOS: + code= ER_JSON_EOS; + break; + + case JE_SYN: + case JE_STRING_CONST: + code= ER_JSON_SYNTAX; + break; + + case JE_ESCAPING: + code= ER_JSON_ESCAPING; + break; + + case JE_DEPTH: + code= ER_JSON_DEPTH; + if (lv == Sql_condition::WARN_LEVEL_ERROR) + my_error(code, MYF(0), JSON_DEPTH_LIMIT, n_param, fname, position); + else + push_warning_printf(thd, lv, code, ER_THD(thd, code), JSON_DEPTH_LIMIT, + n_param, fname, position); + return; + + default: + return; + } + + if (lv == Sql_condition::WARN_LEVEL_ERROR) + my_error(code, MYF(0), n_param, fname, position); + else + push_warning_printf(thd, lv, code, ER_THD(thd, code), + n_param, fname, position); +} + + + +#define NO_WILDCARD_ALLOWED 1 +#define SHOULD_END_WITH_ARRAY 2 +#define TRIVIAL_PATH_NOT_ALLOWED 3 + +#define report_path_error(js, je, n_param) \ + report_path_error_ex(js->ptr(), je, func_name(), n_param,\ + Sql_condition::WARN_LEVEL_WARN) + +void report_path_error_ex(const char *ps, json_path_t *p, + const char *fname, int n_param, + Sql_condition::enum_warning_level lv) +{ + THD *thd= current_thd; + int position= (int)((const char *) p->s.c_str - ps + 1); + uint code; + + n_param++; + + switch (p->s.error) + { + case JE_BAD_CHR: + case JE_NOT_JSON_CHR: + case JE_SYN: + code= ER_JSON_PATH_SYNTAX; + break; + + case JE_EOS: + code= ER_JSON_PATH_EOS; + break; + + case JE_DEPTH: + code= ER_JSON_PATH_DEPTH; + if (lv == Sql_condition::WARN_LEVEL_ERROR) + my_error(code, MYF(0), JSON_DEPTH_LIMIT, n_param, fname, position); + else + push_warning_printf(thd, lv, code, ER_THD(thd, code), + JSON_DEPTH_LIMIT, n_param, fname, position); + return; + + case NO_WILDCARD_ALLOWED: + code= ER_JSON_PATH_NO_WILDCARD; + break; + + case TRIVIAL_PATH_NOT_ALLOWED: + code= ER_JSON_PATH_EMPTY; + break; + + + default: + return; + } + if (lv == Sql_condition::WARN_LEVEL_ERROR) + my_error(code, MYF(0), n_param, fname, position); + else + push_warning_printf(thd, lv, code, ER_THD(thd, code), + n_param, fname, position); +} + + +/* + Checks if the path has '.*' '[*]' or '**' constructions + and sets the NO_WILDCARD_ALLOWED error if the case. +*/ +static int path_setup_nwc(json_path_t *p, CHARSET_INFO *i_cs, + const uchar *str, const uchar *end) +{ + if (!json_path_setup(p, i_cs, str, end)) + { + if ((p->types_used & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | + JSON_PATH_ARRAY_RANGE)) == 0) + return 0; + p->s.error= NO_WILDCARD_ALLOWED; + } + + return 1; +} + + +longlong Item_func_json_valid::val_int() +{ + String *js= args[0]->val_json(&tmp_value); + + if ((null_value= args[0]->null_value)) + return 0; + + return json_valid(js->ptr(), js->length(), js->charset()); +} + + +bool Item_func_json_equals::fix_length_and_dec(THD *thd) +{ + if (Item_bool_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + return FALSE; +} + + +longlong Item_func_json_equals::val_int() +{ + longlong result= 0; + + String a_tmp, b_tmp; + + String *a= args[0]->val_json(&a_tmp); + String *b= args[1]->val_json(&b_tmp); + + DYNAMIC_STRING a_res; + if (init_dynamic_string(&a_res, NULL, 0, 0)) + { + null_value= 1; + return 1; + } + + DYNAMIC_STRING b_res; + if (init_dynamic_string(&b_res, NULL, 0, 0)) + { + dynstr_free(&a_res); + null_value= 1; + return 1; + } + + if ((null_value= args[0]->null_value || args[1]->null_value)) + { + null_value= 1; + goto end; + } + + if (json_normalize(&a_res, a->ptr(), a->length(), a->charset())) + { + null_value= 1; + goto end; + } + + if (json_normalize(&b_res, b->ptr(), b->length(), b->charset())) + { + null_value= 1; + goto end; + } + + result= strcmp(a_res.str, b_res.str) ? 0 : 1; + +end: + dynstr_free(&b_res); + dynstr_free(&a_res); + return result; +} + + +bool Item_func_json_exists::fix_length_and_dec(THD *thd) +{ + if (Item_bool_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + path.set_constant_flag(args[1]->const_item()); + return FALSE; +} + + +longlong Item_func_json_exists::val_int() +{ + json_engine_t je; + int array_counters[JSON_DEPTH_LIMIT]; + + String *js= args[0]->val_json(&tmp_js); + + if (!path.parsed) + { + String *s_p= args[1]->val_str(&tmp_path); + if (s_p && + json_path_setup(&path.p, s_p->charset(), (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto err_return; + path.parsed= path.constant; + } + + if ((null_value= args[0]->null_value || args[1]->null_value)) + { + null_value= 1; + return 0; + } + + null_value= 0; + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + path.cur_step= path.p.steps; + if (json_find_path(&je, &path.p, &path.cur_step, array_counters)) + { + if (je.s.error) + goto err_return; + return 0; + } + + return 1; + +err_return: + null_value= 1; + return 0; +} + + +bool Item_func_json_value::fix_length_and_dec(THD *thd) +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length; + set_constant_flag(args[1]->const_item()); + set_maybe_null(); + return FALSE; +} + + +bool Item_func_json_query::fix_length_and_dec(THD *thd) +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length; + set_constant_flag(args[1]->const_item()); + set_maybe_null(); + return FALSE; +} + + +bool Json_path_extractor::extract(String *str, Item *item_js, Item *item_jp, + CHARSET_INFO *cs) +{ + String *js= item_js->val_json(&tmp_js); + int error= 0; + int array_counters[JSON_DEPTH_LIMIT]; + + if (!parsed) + { + String *s_p= item_jp->val_str(&tmp_path); + if (s_p && + json_path_setup(&p, s_p->charset(), (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + return true; + parsed= constant; + } + + if (item_js->null_value || item_jp->null_value) + return true; + + Json_engine_scan je(*js); + str->length(0); + str->set_charset(cs); + + cur_step= p.steps; +continue_search: + if (json_find_path(&je, &p, &cur_step, array_counters)) + return true; + + if (json_read_value(&je)) + return true; + + if (je.value_type == JSON_VALUE_NULL) + return true; + + if (unlikely(check_and_get_value(&je, str, &error))) + { + if (error) + return true; + goto continue_search; + } + + return false; +} + + +bool Json_engine_scan::check_and_get_value_scalar(String *res, int *error) +{ + CHARSET_INFO *json_cs; + const uchar *js; + uint js_len; + + if (!json_value_scalar(this)) + { + /* We only look for scalar values! */ + if (json_skip_level(this) || json_scan_next(this)) + *error= 1; + return true; + } + + if (value_type == JSON_VALUE_TRUE || + value_type == JSON_VALUE_FALSE) + { + json_cs= &my_charset_utf8mb4_bin; + js= (const uchar *) ((value_type == JSON_VALUE_TRUE) ? "1" : "0"); + js_len= 1; + } + else + { + json_cs= s.cs; + js= value; + js_len= value_len; + } + + + return st_append_json(res, json_cs, js, js_len); +} + + +bool Json_engine_scan::check_and_get_value_complex(String *res, int *error) +{ + if (json_value_scalar(this)) + { + /* We skip scalar values. */ + if (json_scan_next(this)) + *error= 1; + return true; + } + + const uchar *tmp_value= value; + if (json_skip_level(this)) + { + *error= 1; + return true; + } + + res->set((const char *) value, (uint32)(s.c_str - tmp_value), s.cs); + return false; +} + + +bool Item_func_json_quote::fix_length_and_dec(THD *thd) +{ + collation.set(&my_charset_utf8mb4_bin); + /* + Odd but realistic worst case is when all characters + of the argument turn into '\uXXXX\uXXXX', which is 12. + */ + fix_char_length_ulonglong((ulonglong) args[0]->max_char_length() * 12 + 2); + return FALSE; +} + + +String *Item_func_json_quote::val_str(String *str) +{ + String *s= args[0]->val_str(&tmp_s); + + if ((null_value= (args[0]->null_value || + args[0]->result_type() != STRING_RESULT))) + return NULL; + + str->length(0); + str->set_charset(&my_charset_utf8mb4_bin); + + if (str->append('"') || + st_append_escaped(str, s) || + str->append('"')) + { + /* Report an error. */ + null_value= 1; + return 0; + } + + return str; +} + + +bool Item_func_json_unquote::fix_length_and_dec(THD *thd) +{ + collation.set(&my_charset_utf8mb3_general_ci, + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + max_length= args[0]->max_length; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_json_unquote::read_json(json_engine_t *je) +{ + String *js= args[0]->val_json(&tmp_s); + + if ((null_value= args[0]->null_value)) + return 0; + + json_scan_start(je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (json_read_value(je)) + goto error; + + return js; + +error: + if (je->value_type == JSON_VALUE_STRING) + report_json_error(js, je, 0); + return js; +} + + +String *Item_func_json_unquote::val_str(String *str) +{ + json_engine_t je; + int c_len; + String *js; + + if (!(js= read_json(&je))) + return NULL; + + if (unlikely(je.s.error) || je.value_type != JSON_VALUE_STRING) + return js; + + str->length(0); + str->set_charset(&my_charset_utf8mb3_general_ci); + + if (str->realloc_with_extra_if_needed(je.value_len) || + (c_len= json_unescape(js->charset(), + je.value, je.value + je.value_len, + &my_charset_utf8mb3_general_ci, + (uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0) + goto error; + + str->length(c_len); + return str; + +error: + report_json_error(js, &je, 0); + return js; +} + + +static int alloc_tmp_paths(THD *thd, uint n_paths, + json_path_with_flags **paths, String **tmp_paths) +{ + if (n_paths > 0) + { + if (*tmp_paths == 0) + { + MEM_ROOT *root= thd->stmt_arena->mem_root; + + *paths= (json_path_with_flags *) alloc_root(root, + sizeof(json_path_with_flags) * n_paths); + + *tmp_paths= new (root) String[n_paths]; + if (*paths == 0 || *tmp_paths == 0) + return 1; + + for (uint c_path=0; c_path < n_paths; c_path++) + (*tmp_paths)[c_path].set_charset(&my_charset_utf8mb3_general_ci); + } + + return 0; + } + + /* n_paths == 0 */ + *paths= 0; + *tmp_paths= 0; + return 0; +} + + +static void mark_constant_paths(json_path_with_flags *p, + Item** args, uint n_args) +{ + uint n; + for (n= 0; n < n_args; n++) + p[n].set_constant_flag(args[n]->const_item()); +} + + +bool Item_json_str_multipath::fix_fields(THD *thd, Item **ref) +{ + return alloc_tmp_paths(thd, get_n_paths(), &paths, &tmp_paths) || + Item_str_func::fix_fields(thd, ref); +} + + +void Item_json_str_multipath::cleanup() +{ + if (tmp_paths) + { + for (uint i= get_n_paths(); i>0; i--) + tmp_paths[i-1].free(); + } + Item_str_func::cleanup(); +} + + +bool Item_func_json_extract::fix_length_and_dec(THD *thd) +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length * (arg_count - 1); + + mark_constant_paths(paths, args+1, arg_count-1); + set_maybe_null(); + return FALSE; +} + + +static bool path_exact(const json_path_with_flags *paths_list, int n_paths, + const json_path_t *p, json_value_types vt, + const int *array_size_counter) +{ + for (; n_paths > 0; n_paths--, paths_list++) + { + if (json_path_compare(&paths_list->p, p, vt, array_size_counter) == 0) + return TRUE; + } + return FALSE; +} + + +static bool path_ok(const json_path_with_flags *paths_list, int n_paths, + const json_path_t *p, json_value_types vt, + const int *array_size_counter) +{ + for (; n_paths > 0; n_paths--, paths_list++) + { + if (json_path_compare(&paths_list->p, p, vt, array_size_counter) >= 0) + return TRUE; + } + return FALSE; +} + + +String *Item_func_json_extract::read_json(String *str, + json_value_types *type, + char **out_val, int *value_len) +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je, sav_je; + json_path_t p; + const uchar *value; + int not_first_value= 0; + uint n_arg; + size_t v_len; + int possible_multiple_values; + int array_size_counter[JSON_DEPTH_LIMIT]; + uint has_negative_path= 0; + + if ((null_value= args[0]->null_value)) + return 0; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + json_path_with_flags *c_path= paths + n_arg - 1; + if (!c_path->parsed) + { + c_path->p.types_used= JSON_PATH_KEY_NULL; + String *s_p= args[n_arg]->val_str(tmp_paths + (n_arg-1)); + if (s_p) + { + if (json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto return_null; + } + c_path->parsed= c_path->constant; + has_negative_path|= c_path->p.types_used & JSON_PATH_NEGATIVE_INDEX; + } + } + + if (args[n_arg]->null_value) + goto return_null; + } + + possible_multiple_values= arg_count > 2 || + (paths[0].p.types_used & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | + JSON_PATH_ARRAY_RANGE)); + + *type= possible_multiple_values ? JSON_VALUE_ARRAY : JSON_VALUE_NULL; + + if (str) + { + str->set_charset(js->charset()); + str->length(0); + + if (possible_multiple_values && str->append('[')) + goto error; + } + + json_get_path_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length(), &p); + + while (json_get_path_next(&je, &p) == 0) + { + if (has_negative_path && je.value_type == JSON_VALUE_ARRAY && + json_skip_array_and_count(&je, + array_size_counter + (p.last_step - p.steps))) + goto error; + + if (!path_exact(paths, arg_count-1, &p, je.value_type, array_size_counter)) + continue; + + value= je.value_begin; + + if (*type == JSON_VALUE_NULL) + { + *type= je.value_type; + *out_val= (char *) je.value; + *value_len= je.value_len; + } + if (!str) + { + /* If str is NULL, we only care about the first found value. */ + goto return_ok; + } + + if (json_value_scalar(&je)) + v_len= je.value_end - value; + else + { + if (possible_multiple_values) + sav_je= je; + if (json_skip_level(&je)) + goto error; + v_len= je.s.c_str - value; + if (possible_multiple_values) + je= sav_je; + } + + if ((not_first_value && str->append(", ", 2)) || + str->append((const char *) value, v_len)) + goto error; /* Out of memory. */ + + not_first_value= 1; + + if (!possible_multiple_values) + { + /* Loop to the end of the JSON just to make sure it's valid. */ + while (json_scan_next(&je) == 0) {} + break; + } + } + + if (unlikely(je.s.error)) + goto error; + + if (!not_first_value) + { + /* Nothing was found. */ + goto return_null; + } + + if (possible_multiple_values && str->append(']')) + goto error; /* Out of memory. */ + + js= str; + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + if (json_nice(&je, &tmp_js, Item_func_json_format::LOOSE)) + goto error; + +return_ok: + return &tmp_js; + +error: + report_json_error(js, &je, 0); +return_null: + null_value= 1; + return 0; +} + + +String *Item_func_json_extract::val_str(String *str) +{ + json_value_types type; + char *value; + int value_len; + return read_json(str, &type, &value, &value_len); +} + + +longlong Item_func_json_extract::val_int() +{ + json_value_types type; + char *value; + int value_len; + longlong i= 0; + + if (read_json(NULL, &type, &value, &value_len) != NULL) + { + switch (type) + { + case JSON_VALUE_NUMBER: + case JSON_VALUE_STRING: + { + char *end; + int err; + i= collation.collation->strntoll(value, value_len, 10, &end, &err); + break; + } + case JSON_VALUE_TRUE: + i= 1; + break; + default: + i= 0; + break; + }; + } + return i; +} + + +double Item_func_json_extract::val_real() +{ + json_value_types type; + char *value; + int value_len; + double d= 0.0; + + if (read_json(NULL, &type, &value, &value_len) != NULL) + { + switch (type) + { + case JSON_VALUE_STRING: + case JSON_VALUE_NUMBER: + { + char *end; + int err; + d= collation.collation->strntod(value, value_len, &end, &err); + break; + } + case JSON_VALUE_TRUE: + d= 1.0; + break; + default: + break; + }; + } + + return d; +} + + +my_decimal *Item_func_json_extract::val_decimal(my_decimal *to) +{ + json_value_types type; + char *value; + int value_len; + + if (read_json(NULL, &type, &value, &value_len) != NULL) + { + switch (type) + { + case JSON_VALUE_STRING: + case JSON_VALUE_NUMBER: + { + my_decimal *res= decimal_from_string_with_check(to, collation.collation, + value, + value + value_len); + null_value= res == NULL; + return res; + } + case JSON_VALUE_TRUE: + int2my_decimal(E_DEC_FATAL_ERROR, 1, false/*unsigned_flag*/, to); + return to; + case JSON_VALUE_OBJECT: + case JSON_VALUE_ARRAY: + case JSON_VALUE_FALSE: + case JSON_VALUE_UNINITIALIZED: + case JSON_VALUE_NULL: + int2my_decimal(E_DEC_FATAL_ERROR, 0, false/*unsigned_flag*/, to); + return to; + }; + } + DBUG_ASSERT(null_value); + return 0; +} + + + +bool Item_func_json_contains::fix_length_and_dec(THD *thd) +{ + a2_constant= args[1]->const_item(); + a2_parsed= FALSE; + set_maybe_null(); + if (arg_count > 2) + path.set_constant_flag(args[2]->const_item()); + return Item_bool_func::fix_length_and_dec(thd); +} + + +static int find_key_in_object(json_engine_t *j, json_string_t *key) +{ + const uchar *c_str= key->c_str; + + while (json_scan_next(j) == 0 && j->state != JST_OBJ_END) + { + DBUG_ASSERT(j->state == JST_KEY); + if (json_key_matches(j, key)) + return TRUE; + if (json_skip_key(j)) + return FALSE; + key->c_str= c_str; + } + + return FALSE; +} + + +static int check_contains(json_engine_t *js, json_engine_t *value) +{ + json_engine_t loc_js; + bool set_js; + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + switch (js->value_type) + { + case JSON_VALUE_OBJECT: + { + json_string_t key_name; + + if (value->value_type != JSON_VALUE_OBJECT) + return FALSE; + + loc_js= *js; + set_js= FALSE; + json_string_set_cs(&key_name, value->s.cs); + while (json_scan_next(value) == 0 && value->state != JST_OBJ_END) + { + const uchar *k_start, *k_end; + + DBUG_ASSERT(value->state == JST_KEY); + k_start= value->s.c_str; + do + { + k_end= value->s.c_str; + } while (json_read_keyname_chr(value) == 0); + + if (unlikely(value->s.error) || json_read_value(value)) + return FALSE; + + if (set_js) + *js= loc_js; + else + set_js= TRUE; + + json_string_set_str(&key_name, k_start, k_end); + if (!find_key_in_object(js, &key_name) || + json_read_value(js) || + !check_contains(js, value)) + return FALSE; + } + + return value->state == JST_OBJ_END && !json_skip_level(js); + } + case JSON_VALUE_ARRAY: + if (value->value_type != JSON_VALUE_ARRAY) + { + loc_js= *value; + set_js= FALSE; + while (json_scan_next(js) == 0 && js->state != JST_ARRAY_END) + { + int c_level, v_scalar; + DBUG_ASSERT(js->state == JST_VALUE); + if (json_read_value(js)) + return FALSE; + + if (!(v_scalar= json_value_scalar(js))) + c_level= json_get_level(js); + + if (set_js) + *value= loc_js; + else + set_js= TRUE; + + if (check_contains(js, value)) + { + if (json_skip_level(js)) + return FALSE; + return TRUE; + } + if (unlikely(value->s.error) || unlikely(js->s.error) || + (!v_scalar && json_skip_to_level(js, c_level))) + return FALSE; + } + return FALSE; + } + /* else */ + loc_js= *js; + set_js= FALSE; + while (json_scan_next(value) == 0 && value->state != JST_ARRAY_END) + { + DBUG_ASSERT(value->state == JST_VALUE); + if (json_read_value(value)) + return FALSE; + + if (set_js) + *js= loc_js; + else + set_js= TRUE; + if (!check_contains(js, value)) + return FALSE; + } + + return value->state == JST_ARRAY_END; + + case JSON_VALUE_STRING: + if (value->value_type != JSON_VALUE_STRING) + return FALSE; + /* + TODO: make proper json-json comparison here that takes excipient + into account. + */ + return value->value_len == js->value_len && + memcmp(value->value, js->value, value->value_len) == 0; + case JSON_VALUE_NUMBER: + if (value->value_type == JSON_VALUE_NUMBER) + { + double d_j, d_v; + char *end; + int err; + + d_j= js->s.cs->strntod((char *) js->value, js->value_len, &end, &err);; + d_v= value->s.cs->strntod((char *) value->value, value->value_len, &end, &err);; + + return (fabs(d_j - d_v) < 1e-12); + } + else + return FALSE; + + default: + break; + } + + /* + We have these not mentioned in the 'switch' above: + + case JSON_VALUE_TRUE: + case JSON_VALUE_FALSE: + case JSON_VALUE_NULL: + */ + return value->value_type == js->value_type; +} + + +longlong Item_func_json_contains::val_int() +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je, ve; + int result; + + if ((null_value= args[0]->null_value)) + return 0; + + if (!a2_parsed) + { + val= args[1]->val_json(&tmp_val); + a2_parsed= a2_constant; + } + + if (val == 0) + { + null_value= 1; + return 0; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (arg_count>2) /* Path specified. */ + { + int array_counters[JSON_DEPTH_LIMIT]; + if (!path.parsed) + { + String *s_p= args[2]->val_str(&tmp_path); + if (s_p && + path_setup_nwc(&path.p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->end())) + { + report_path_error(s_p, &path.p, 2); + goto return_null; + } + path.parsed= path.constant; + } + if (args[2]->null_value) + goto return_null; + + path.cur_step= path.p.steps; + if (json_find_path(&je, &path.p, &path.cur_step, array_counters)) + { + if (je.s.error) + { + ve.s.error= 0; + goto error; + } + + return FALSE; + } + } + + json_scan_start(&ve, val->charset(),(const uchar *) val->ptr(), + (const uchar *) val->end()); + + if (json_read_value(&je) || json_read_value(&ve)) + goto error; + + result= check_contains(&je, &ve); + if (unlikely(je.s.error || ve.s.error)) + goto error; + + return result; + +error: + if (je.s.error) + report_json_error(js, &je, 0); + if (ve.s.error) + report_json_error(val, &ve, 1); +return_null: + null_value= 1; + return 0; +} + + +bool Item_func_json_contains_path::fix_fields(THD *thd, Item **ref) +{ + return alloc_tmp_paths(thd, arg_count-2, &paths, &tmp_paths) || + (p_found= (bool *) alloc_root(thd->mem_root, + (arg_count-2)*sizeof(bool))) == NULL || + Item_int_func::fix_fields(thd, ref); +} + + +bool Item_func_json_contains_path::fix_length_and_dec(THD *thd) +{ + ooa_constant= args[1]->const_item(); + ooa_parsed= FALSE; + set_maybe_null(); + mark_constant_paths(paths, args+2, arg_count-2); + return Item_bool_func::fix_length_and_dec(thd); +} + + +void Item_func_json_contains_path::cleanup() +{ + if (tmp_paths) + { + for (uint i= arg_count-2; i>0; i--) + tmp_paths[i-1].free(); + tmp_paths= 0; + } + Item_int_func::cleanup(); +} + + +static int parse_one_or_all(const Item_func *f, Item *ooa_arg, + bool *ooa_parsed, bool ooa_constant, bool *mode_one) +{ + if (!*ooa_parsed) + { + char buff[20]; + String *res, tmp(buff, sizeof(buff), &my_charset_bin); + if ((res= ooa_arg->val_str(&tmp)) == NULL) + return TRUE; + + *mode_one=eq_ascii_string(res->charset(), "one", + res->ptr(), res->length()); + if (!*mode_one) + { + if (!eq_ascii_string(res->charset(), "all", res->ptr(), res->length())) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_JSON_ONE_OR_ALL, ER_THD(thd, ER_JSON_ONE_OR_ALL), + f->func_name()); + *mode_one= TRUE; + return TRUE; + } + } + *ooa_parsed= ooa_constant; + } + return FALSE; +} + + +#ifdef DUMMY +longlong Item_func_json_contains_path::val_int() +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je; + uint n_arg; + longlong result; + + if ((null_value= args[0]->null_value)) + return 0; + + if (parse_one_or_all(this, args[1], &ooa_parsed, ooa_constant, &mode_one)) + goto return_null; + + result= !mode_one; + for (n_arg=2; n_arg < arg_count; n_arg++) + { + int array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_arg - 2; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths + (n_arg-2)); + if (s_p) + { + if (json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto null_return; + } + c_path->parsed= c_path->constant; + has_negative_path|= c_path->p.types_used & JSON_PATH_NEGATIVE_INDEX; + } + } + + if (args[n_arg]->null_value) + goto return_null; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + c_path->cur_step= c_path->p.steps; + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + /* Path wasn't found. */ + if (je.s.error) + goto js_error; + + if (!mode_one) + { + result= 0; + break; + } + } + else if (mode_one) + { + result= 1; + break; + } + } + + + return result; + +js_error: + report_json_error(js, &je, 0); +return_null: + null_value= 1; + return 0; +} +#endif /*DUMMY*/ + +longlong Item_func_json_contains_path::val_int() +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je; + uint n_arg; + longlong result; + json_path_t p; + int n_found; + LINT_INIT(n_found); + int array_sizes[JSON_DEPTH_LIMIT]; + uint has_negative_path= 0; + + if ((null_value= args[0]->null_value)) + return 0; + + if (parse_one_or_all(this, args[1], &ooa_parsed, ooa_constant, &mode_one)) + goto null_return;; + + for (n_arg=2; n_arg < arg_count; n_arg++) + { + json_path_with_flags *c_path= paths + n_arg - 2; + c_path->p.types_used= JSON_PATH_KEY_NULL; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths + (n_arg-2)); + if (s_p) + { + if (json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto null_return; + } + c_path->parsed= c_path->constant; + has_negative_path|= c_path->p.types_used & JSON_PATH_NEGATIVE_INDEX; + } + } + if (args[n_arg]->null_value) + goto null_return; + } + + json_get_path_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length(), &p); + + + if (!mode_one) + { + bzero(p_found, (arg_count-2) * sizeof(bool)); + n_found= arg_count - 2; + } + else + n_found= 0; /* Just to prevent 'uninitialized value' warnings */ + + result= 0; + while (json_get_path_next(&je, &p) == 0) + { + int n_path= arg_count - 2; + if (has_negative_path && je.value_type == JSON_VALUE_ARRAY && + json_skip_array_and_count(&je, array_sizes + (p.last_step - p.steps))) + { + result= 1; + break; + } + + json_path_with_flags *c_path= paths; + for (; n_path > 0; n_path--, c_path++) + { + if (json_path_compare(&c_path->p, &p, je.value_type, array_sizes) >= 0) + { + if (mode_one) + { + result= 1; + break; + } + /* mode_all */ + if (p_found[n_path-1]) + continue; /* already found */ + if (--n_found == 0) + { + result= 1; + break; + } + p_found[n_path-1]= TRUE; + } + } + } + + if (likely(je.s.error == 0)) + return result; + + report_json_error(js, &je, 0); +null_return: + null_value= 1; + return 0; +} + + +/* + This reproduces behavior according to the former + Item_func_conv_charset::is_json_type() which returned args[0]->is_json_type(). + JSON functions with multiple string input with different character sets + wrap some arguments into Item_func_conv_charset. So the former + Item_func_conv_charset::is_json_type() took the JSON propery from args[0], + i.e. from the original argument before the conversion. + This is probably not always correct because an *explicit* + `CONVERT(arg USING charset)` is actually a general purpose string + expression, not a JSON expression. +*/ +bool is_json_type(const Item *item) +{ + for ( ; ; ) + { + if (Type_handler_json_common::is_json_type_handler(item->type_handler())) + return true; + const Item_func_conv_charset *func; + if (!(func= dynamic_cast(item->real_item()))) + return false; + item= func->arguments()[0]; + } + return false; +} + + +static int append_json_value(String *str, Item *item, String *tmp_val) +{ + if (item->type_handler()->is_bool_type()) + { + longlong v_int= item->val_int(); + const char *t_f; + int t_f_len; + + if (item->null_value) + goto append_null; + + if (v_int) + { + t_f= "true"; + t_f_len= 4; + } + else + { + t_f= "false"; + t_f_len= 5; + } + + return str->append(t_f, t_f_len); + } + { + String *sv= item->val_json(tmp_val); + if (item->null_value) + goto append_null; + if (is_json_type(item)) + return str->append(sv->ptr(), sv->length()); + + if (item->result_type() == STRING_RESULT) + { + return str->append('"') || + st_append_escaped(str, sv) || + str->append('"'); + } + return st_append_escaped(str, sv); + } + +append_null: + return str->append(STRING_WITH_LEN("null")); +} + + +static int append_json_value_from_field(String *str, + Item *i, Field *f, const uchar *key, size_t offset, String *tmp_val) +{ + if (i->type_handler()->is_bool_type()) + { + longlong v_int= f->val_int(key + offset); + const char *t_f; + int t_f_len; + + if (f->is_null_in_record(key)) + goto append_null; + + if (v_int) + { + t_f= "true"; + t_f_len= 4; + } + else + { + t_f= "false"; + t_f_len= 5; + } + + return str->append(t_f, t_f_len); + } + { + String *sv= f->val_str(tmp_val, key + offset); + if (f->is_null_in_record(key)) + goto append_null; + if (is_json_type(i)) + return str->append(sv->ptr(), sv->length()); + + if (i->result_type() == STRING_RESULT) + { + return str->append('"') || + st_append_escaped(str, sv) || + str->append('"'); + } + return st_append_escaped(str, sv); + } + +append_null: + return str->append(STRING_WITH_LEN("null")); +} + + +static int append_json_keyname(String *str, Item *item, String *tmp_val) +{ + String *sv= item->val_str(tmp_val); + if (item->null_value) + goto append_null; + + return str->append('"') || + st_append_escaped(str, sv) || + str->append("\": ", 3); + +append_null: + return str->append("\"\": ", 4); +} + + +bool Item_func_json_array::fix_length_and_dec(THD *thd) +{ + ulonglong char_length= 2; + uint n_arg; + + result_limit= 0; + + if (arg_count == 0) + { + THD* thd= current_thd; + collation.set(thd->variables.collation_connection, + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + tmp_val.set_charset(thd->variables.collation_connection); + max_length= 2; + return FALSE; + } + + if (agg_arg_charsets_for_string_result(collation, args, arg_count)) + return TRUE; + + for (n_arg=0 ; n_arg < arg_count ; n_arg++) + char_length+= static_cast(args[n_arg]->max_char_length()) + 4; + + fix_char_length_ulonglong(char_length); + tmp_val.set_charset(collation.collation); + return FALSE; +} + + +String *Item_func_json_array::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint n_arg; + + str->length(0); + str->set_charset(collation.collation); + + if (str->append('[') || + ((arg_count > 0) && append_json_value(str, args[0], &tmp_val))) + goto err_return; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + if (str->append(", ", 2) || + append_json_value(str, args[n_arg], &tmp_val)) + goto err_return; + } + + if (str->append(']')) + goto err_return; + + if (result_limit == 0) + result_limit= current_thd->variables.max_allowed_packet; + + if (str->length() <= result_limit) + return str; + + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(current_thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), result_limit); + +err_return: + /*TODO: Launch out of memory error. */ + null_value= 1; + return NULL; +} + + +bool Item_func_json_array_append::fix_length_and_dec(THD *thd) +{ + uint n_arg; + ulonglong char_length; + + collation.set(args[0]->collation); + char_length= args[0]->max_char_length(); + + for (n_arg= 1; n_arg < arg_count; n_arg+= 2) + { + paths[n_arg/2].set_constant_flag(args[n_arg]->const_item()); + char_length+= + static_cast(args[n_arg+1]->max_char_length()) + 4; + } + + fix_char_length_ulonglong(char_length); + set_maybe_null(); + return FALSE; +} + + +String *Item_func_json_array_append::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_json(&tmp_js); + uint n_arg, n_path; + size_t str_rest_len; + const uchar *ar_end; + THD *thd= current_thd; + + DBUG_ASSERT(fixed()); + + if ((null_value= args[0]->null_value)) + return 0; + + for (n_arg=1, n_path=0; n_arg < arg_count; n_arg+=2, n_path++) + { + int array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_path; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+n_path); + if (s_p && + path_setup_nwc(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto return_null; + } + c_path->parsed= c_path->constant; + } + if (args[n_arg]->null_value) + goto return_null; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + + c_path->cur_step= c_path->p.steps; + + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + if (je.s.error) + goto js_error; + + goto return_null; + } + + if (json_read_value(&je)) + goto js_error; + + str->length(0); + str->set_charset(js->charset()); + if (str->reserve(js->length() + 8, 1024)) + goto return_null; /* Out of memory. */ + + if (je.value_type == JSON_VALUE_ARRAY) + { + int n_items; + if (json_skip_level_and_count(&je, &n_items)) + goto js_error; + + ar_end= je.s.c_str - je.sav_c_len; + str_rest_len= js->length() - (ar_end - (const uchar *) js->ptr()); + str->q_append(js->ptr(), ar_end-(const uchar *) js->ptr()); + if (n_items) + str->append(", ", 2); + if (append_json_value(str, args[n_arg+1], &tmp_val)) + goto return_null; /* Out of memory. */ + + if (str->reserve(str_rest_len, 1024)) + goto return_null; /* Out of memory. */ + str->q_append((const char *) ar_end, str_rest_len); + } + else + { + const uchar *c_from, *c_to; + + /* Wrap as an array. */ + str->q_append(js->ptr(), (const char *) je.value_begin - js->ptr()); + c_from= je.value_begin; + + if (je.value_type == JSON_VALUE_OBJECT) + { + if (json_skip_level(&je)) + goto js_error; + c_to= je.s.c_str; + } + else + c_to= je.value_end; + + if (str->append('[') || + str->append((const char *) c_from, c_to - c_from) || + str->append(", ", 2) || + append_json_value(str, args[n_arg+1], &tmp_val) || + str->append(']') || + str->append((const char *) je.s.c_str, + js->end() - (const char *) je.s.c_str)) + goto return_null; /* Out of memory. */ + } + { + /* Swap str and js. */ + if (str == &tmp_js) + { + str= js; + js= &tmp_js; + } + else + { + js= str; + str= &tmp_js; + } + } + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + if (json_nice(&je, str, Item_func_json_format::LOOSE)) + goto js_error; + + return str; + +js_error: + report_json_error(js, &je, 0); + +return_null: + thd->check_killed(); // to get the error message right + null_value= 1; + return 0; +} + + +String *Item_func_json_array_insert::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_json(&tmp_js); + uint n_arg, n_path; + THD *thd= current_thd; + + DBUG_ASSERT(fixed()); + + if ((null_value= args[0]->null_value)) + return 0; + + for (n_arg=1, n_path=0; n_arg < arg_count; n_arg+=2, n_path++) + { + int array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_path; + const char *item_pos; + int n_item, corrected_n_item; + + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+n_path); + if (s_p && + (path_setup_nwc(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length()) || + c_path->p.last_step - 1 < c_path->p.steps || + c_path->p.last_step->type != JSON_PATH_ARRAY)) + { + if (c_path->p.s.error == 0) + c_path->p.s.error= SHOULD_END_WITH_ARRAY; + + report_path_error(s_p, &c_path->p, n_arg); + + goto return_null; + } + c_path->parsed= c_path->constant; + c_path->p.last_step--; + } + if (args[n_arg]->null_value) + goto return_null; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + + c_path->cur_step= c_path->p.steps; + + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + if (je.s.error) + goto js_error; + + /* Can't find the array to insert. */ + continue; + } + + if (json_read_value(&je)) + goto js_error; + + if (je.value_type != JSON_VALUE_ARRAY) + { + /* Must be an array. */ + continue; + } + + item_pos= 0; + n_item= 0; + corrected_n_item= c_path->p.last_step[1].n_item; + if (corrected_n_item < 0) + { + int array_size; + if (json_skip_array_and_count(&je, &array_size)) + goto js_error; + corrected_n_item+= array_size + 1; + } + + while (json_scan_next(&je) == 0 && je.state != JST_ARRAY_END) + { + DBUG_ASSERT(je.state == JST_VALUE); + + if (n_item == corrected_n_item) + { + item_pos= (const char *) je.s.c_str; + break; + } + n_item++; + + if (json_read_value(&je) || + (!json_value_scalar(&je) && json_skip_level(&je))) + goto js_error; + } + + if (unlikely(je.s.error || *je.killed_ptr)) + goto js_error; + + str->length(0); + str->set_charset(js->charset()); + if (item_pos) + { + if (append_simple(str, js->ptr(), item_pos - js->ptr()) || + (n_item > 0 && str->append(" ", 1)) || + append_json_value(str, args[n_arg+1], &tmp_val) || + str->append(",", 1) || + (n_item == 0 && str->append(" ", 1)) || + append_simple(str, item_pos, js->end() - item_pos)) + goto return_null; /* Out of memory. */ + } + else + { + /* Insert position wasn't found - append to the array. */ + DBUG_ASSERT(je.state == JST_ARRAY_END); + item_pos= (const char *) (je.s.c_str - je.sav_c_len); + if (append_simple(str, js->ptr(), item_pos - js->ptr()) || + (n_item > 0 && str->append(", ", 2)) || + append_json_value(str, args[n_arg+1], &tmp_val) || + append_simple(str, item_pos, js->end() - item_pos)) + goto return_null; /* Out of memory. */ + } + + { + /* Swap str and js. */ + if (str == &tmp_js) + { + str= js; + js= &tmp_js; + } + else + { + js= str; + str= &tmp_js; + } + } + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + if (json_nice(&je, str, Item_func_json_format::LOOSE)) + goto js_error; + + return str; + +js_error: + report_json_error(js, &je, 0); +return_null: + thd->check_killed(); // to get the error message right + null_value= 1; + return 0; +} + + +String *Item_func_json_object::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint n_arg; + + str->length(0); + str->set_charset(collation.collation); + + if (str->append('{') || + (arg_count > 0 && + (append_json_keyname(str, args[0], &tmp_val) || + append_json_value(str, args[1], &tmp_val)))) + goto err_return; + + for (n_arg=2; n_arg < arg_count; n_arg+=2) + { + if (str->append(", ", 2) || + append_json_keyname(str, args[n_arg], &tmp_val) || + append_json_value(str, args[n_arg+1], &tmp_val)) + goto err_return; + } + + if (str->append('}')) + goto err_return; + + if (result_limit == 0) + result_limit= current_thd->variables.max_allowed_packet; + + if (str->length() <= result_limit) + return str; + + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(current_thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), result_limit); + +err_return: + /*TODO: Launch out of memory error. */ + null_value= 1; + return NULL; +} + + +static int do_merge(String *str, json_engine_t *je1, json_engine_t *je2) +{ + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + if (json_read_value(je1) || json_read_value(je2)) + return 1; + + if (je1->value_type == JSON_VALUE_OBJECT && + je2->value_type == JSON_VALUE_OBJECT) + { + json_engine_t sav_je1= *je1; + json_engine_t sav_je2= *je2; + + int first_key= 1; + json_string_t key_name; + + json_string_set_cs(&key_name, je1->s.cs); + + if (str->append('{')) + return 3; + while (json_scan_next(je1) == 0 && + je1->state != JST_OBJ_END) + { + const uchar *key_start, *key_end; + /* Loop through the Json_1 keys and compare with the Json_2 keys. */ + DBUG_ASSERT(je1->state == JST_KEY); + key_start= je1->s.c_str; + do + { + key_end= je1->s.c_str; + } while (json_read_keyname_chr(je1) == 0); + + if (unlikely(je1->s.error)) + return 1; + + if (first_key) + first_key= 0; + else + { + if (str->append(", ", 2)) + return 3; + *je2= sav_je2; + } + + if (str->append('"') || + append_simple(str, key_start, key_end - key_start) || + str->append("\":", 2)) + return 3; + + while (json_scan_next(je2) == 0 && + je2->state != JST_OBJ_END) + { + int ires; + DBUG_ASSERT(je2->state == JST_KEY); + json_string_set_str(&key_name, key_start, key_end); + if (!json_key_matches(je2, &key_name)) + { + if (je2->s.error || json_skip_key(je2)) + return 2; + continue; + } + + /* Json_2 has same key as Json_1. Merge them. */ + if ((ires= do_merge(str, je1, je2))) + return ires; + goto merged_j1; + } + if (unlikely(je2->s.error)) + return 2; + + key_start= je1->s.c_str; + /* Just append the Json_1 key value. */ + if (json_skip_key(je1)) + return 1; + if (append_simple(str, key_start, je1->s.c_str - key_start)) + return 3; + +merged_j1: + continue; + } + + *je2= sav_je2; + /* + Now loop through the Json_2 keys. + Skip if there is same key in Json_1 + */ + while (json_scan_next(je2) == 0 && + je2->state != JST_OBJ_END) + { + const uchar *key_start, *key_end; + DBUG_ASSERT(je2->state == JST_KEY); + key_start= je2->s.c_str; + do + { + key_end= je2->s.c_str; + } while (json_read_keyname_chr(je2) == 0); + + if (unlikely(je2->s.error)) + return 1; + + *je1= sav_je1; + while (json_scan_next(je1) == 0 && + je1->state != JST_OBJ_END) + { + DBUG_ASSERT(je1->state == JST_KEY); + json_string_set_str(&key_name, key_start, key_end); + if (!json_key_matches(je1, &key_name)) + { + if (unlikely(je1->s.error || json_skip_key(je1))) + return 2; + continue; + } + if (json_skip_key(je2) || json_skip_level(je1)) + return 1; + goto continue_j2; + } + + if (unlikely(je1->s.error)) + return 2; + + if (first_key) + first_key= 0; + else if (str->append(", ", 2)) + return 3; + + if (json_skip_key(je2)) + return 1; + + if (str->append('"') || + append_simple(str, key_start, je2->s.c_str - key_start)) + return 3; + +continue_j2: + continue; + } + + if (str->append('}')) + return 3; + } + else + { + const uchar *end1, *beg1, *end2, *beg2; + int n_items1=1, n_items2= 1; + + beg1= je1->value_begin; + + /* Merge as a single array. */ + if (je1->value_type == JSON_VALUE_ARRAY) + { + if (json_skip_level_and_count(je1, &n_items1)) + return 1; + + end1= je1->s.c_str - je1->sav_c_len; + } + else + { + if (str->append('[')) + return 3; + if (je1->value_type == JSON_VALUE_OBJECT) + { + if (json_skip_level(je1)) + return 1; + end1= je1->s.c_str; + } + else + end1= je1->value_end; + } + + if (str->append((const char*) beg1, end1 - beg1)) + return 3; + + if (json_value_scalar(je2)) + { + beg2= je2->value_begin; + end2= je2->value_end; + } + else + { + if (je2->value_type == JSON_VALUE_OBJECT) + { + beg2= je2->value_begin; + if (json_skip_level(je2)) + return 2; + } + else + { + beg2= je2->s.c_str; + if (json_skip_level_and_count(je2, &n_items2)) + return 2; + } + end2= je2->s.c_str; + } + + if ((n_items1 && n_items2 && str->append(", ", 2)) || + str->append((const char*) beg2, end2 - beg2)) + return 3; + + if (je2->value_type != JSON_VALUE_ARRAY && + str->append(']')) + return 3; + } + + return 0; +} + + +String *Item_func_json_merge::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + json_engine_t je1, je2; + String *js1= args[0]->val_json(&tmp_js1), *js2=NULL; + uint n_arg; + THD *thd= current_thd; + LINT_INIT(js2); + + if (args[0]->null_value) + goto null_return; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + str->set_charset(js1->charset()); + str->length(0); + + js2= args[n_arg]->val_json(&tmp_js2); + if (args[n_arg]->null_value) + goto null_return; + + json_scan_start(&je1, js1->charset(),(const uchar *) js1->ptr(), + (const uchar *) js1->ptr() + js1->length()); + je1.killed_ptr= (uchar*)&thd->killed; + + json_scan_start(&je2, js2->charset(),(const uchar *) js2->ptr(), + (const uchar *) js2->ptr() + js2->length()); + je2.killed_ptr= (uchar*)&thd->killed; + + if (do_merge(str, &je1, &je2)) + goto error_return; + + { + /* Swap str and js1. */ + if (str == &tmp_js1) + { + str= js1; + js1= &tmp_js1; + } + else + { + js1= str; + str= &tmp_js1; + } + } + } + + json_scan_start(&je1, js1->charset(),(const uchar *) js1->ptr(), + (const uchar *) js1->ptr() + js1->length()); + je1.killed_ptr= (uchar*)&thd->killed; + if (json_nice(&je1, str, Item_func_json_format::LOOSE)) + goto error_return; + + null_value= 0; + return str; + +error_return: + if (je1.s.error) + report_json_error(js1, &je1, 0); + if (je2.s.error) + report_json_error(js2, &je2, n_arg); + thd->check_killed(); // to get the error message right +null_return: + null_value= 1; + return NULL; +} + + +static int copy_value_patch(String *str, json_engine_t *je) +{ + int first_key= 1; + + if (je->value_type != JSON_VALUE_OBJECT) + { + const uchar *beg, *end; + + beg= je->value_begin; + + if (!json_value_scalar(je)) + { + if (json_skip_level(je)) + return 1; + end= je->s.c_str; + } + else + end= je->value_end; + + if (append_simple(str, beg, end-beg)) + return 1; + + return 0; + } + /* JSON_VALUE_OBJECT */ + + if (str->append('{')) + return 1; + while (json_scan_next(je) == 0 && je->state != JST_OBJ_END) + { + const uchar *key_start; + /* Loop through the Json_1 keys and compare with the Json_2 keys. */ + DBUG_ASSERT(je->state == JST_KEY); + key_start= je->s.c_str; + + if (json_read_value(je)) + return 1; + + if (je->value_type == JSON_VALUE_NULL) + continue; + + if (!first_key) + { + if (str->append(", ", 2)) + return 3; + } + else + first_key= 0; + + if (str->append('"') || + append_simple(str, key_start, je->value_begin - key_start) || + copy_value_patch(str, je)) + return 1; + } + if (str->append('}')) + return 1; + + return 0; +} + + +static int do_merge_patch(String *str, json_engine_t *je1, json_engine_t *je2, + bool *empty_result) +{ + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + if (json_read_value(je1) || json_read_value(je2)) + return 1; + + if (je1->value_type == JSON_VALUE_OBJECT && + je2->value_type == JSON_VALUE_OBJECT) + { + json_engine_t sav_je1= *je1; + json_engine_t sav_je2= *je2; + + int first_key= 1; + json_string_t key_name; + size_t sav_len; + bool mrg_empty; + + *empty_result= FALSE; + json_string_set_cs(&key_name, je1->s.cs); + + if (str->append('{')) + return 3; + while (json_scan_next(je1) == 0 && + je1->state != JST_OBJ_END) + { + const uchar *key_start, *key_end; + /* Loop through the Json_1 keys and compare with the Json_2 keys. */ + DBUG_ASSERT(je1->state == JST_KEY); + key_start= je1->s.c_str; + do + { + key_end= je1->s.c_str; + } while (json_read_keyname_chr(je1) == 0); + + if (je1->s.error) + return 1; + + sav_len= str->length(); + + if (!first_key) + { + if (str->append(", ", 2)) + return 3; + *je2= sav_je2; + } + + if (str->append('"') || + append_simple(str, key_start, key_end - key_start) || + str->append("\":", 2)) + return 3; + + while (json_scan_next(je2) == 0 && + je2->state != JST_OBJ_END) + { + int ires; + DBUG_ASSERT(je2->state == JST_KEY); + json_string_set_str(&key_name, key_start, key_end); + if (!json_key_matches(je2, &key_name)) + { + if (je2->s.error || json_skip_key(je2)) + return 2; + continue; + } + + /* Json_2 has same key as Json_1. Merge them. */ + if ((ires= do_merge_patch(str, je1, je2, &mrg_empty))) + return ires; + + if (mrg_empty) + str->length(sav_len); + else + first_key= 0; + + goto merged_j1; + } + + if (je2->s.error) + return 2; + + key_start= je1->s.c_str; + /* Just append the Json_1 key value. */ + if (json_skip_key(je1)) + return 1; + if (append_simple(str, key_start, je1->s.c_str - key_start)) + return 3; + first_key= 0; + +merged_j1: + continue; + } + + *je2= sav_je2; + /* + Now loop through the Json_2 keys. + Skip if there is same key in Json_1 + */ + while (json_scan_next(je2) == 0 && + je2->state != JST_OBJ_END) + { + const uchar *key_start, *key_end; + DBUG_ASSERT(je2->state == JST_KEY); + key_start= je2->s.c_str; + do + { + key_end= je2->s.c_str; + } while (json_read_keyname_chr(je2) == 0); + + if (je2->s.error) + return 1; + + *je1= sav_je1; + while (json_scan_next(je1) == 0 && + je1->state != JST_OBJ_END) + { + DBUG_ASSERT(je1->state == JST_KEY); + json_string_set_str(&key_name, key_start, key_end); + if (!json_key_matches(je1, &key_name)) + { + if (je1->s.error || json_skip_key(je1)) + return 2; + continue; + } + if (json_skip_key(je2) || + json_skip_level(je1)) + return 1; + goto continue_j2; + } + + if (je1->s.error) + return 2; + + + sav_len= str->length(); + + if (!first_key && str->append(", ", 2)) + return 3; + + if (str->append('"') || + append_simple(str, key_start, key_end - key_start) || + str->append("\":", 2)) + return 3; + + if (json_read_value(je2)) + return 1; + + if (je2->value_type == JSON_VALUE_NULL) + str->length(sav_len); + else + { + if (copy_value_patch(str, je2)) + return 1; + first_key= 0; + } + +continue_j2: + continue; + } + + if (str->append('}')) + return 3; + } + else + { + if (!json_value_scalar(je1) && json_skip_level(je1)) + return 1; + + *empty_result= je2->value_type == JSON_VALUE_NULL; + if (!(*empty_result) && copy_value_patch(str, je2)) + return 1; + } + + return 0; +} + + +String *Item_func_json_merge_patch::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + json_engine_t je1, je2; + String *js1= args[0]->val_json(&tmp_js1), *js2=NULL; + uint n_arg; + bool empty_result, merge_to_null; + THD *thd= current_thd; + + /* To report errors properly if some JSON is invalid. */ + je1.s.error= je2.s.error= 0; + merge_to_null= args[0]->null_value; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + js2= args[n_arg]->val_json(&tmp_js2); + if (args[n_arg]->null_value) + { + merge_to_null= true; + goto cont_point; + } + + json_scan_start(&je2, js2->charset(),(const uchar *) js2->ptr(), + (const uchar *) js2->ptr() + js2->length()); + je2.killed_ptr= (uchar*)&thd->killed; + + if (merge_to_null) + { + if (json_read_value(&je2)) + goto error_return; + if (je2.value_type == JSON_VALUE_OBJECT) + goto cont_point; + + merge_to_null= false; + str->set(js2->ptr(), js2->length(), js2->charset()); + goto cont_point; + } + + str->set_charset(js1->charset()); + str->length(0); + + + json_scan_start(&je1, js1->charset(),(const uchar *) js1->ptr(), + (const uchar *) js1->ptr() + js1->length()); + je1.killed_ptr= (uchar*)&thd->killed; + + if (do_merge_patch(str, &je1, &je2, &empty_result)) + goto error_return; + + if (empty_result) + str->append(STRING_WITH_LEN("null")); + +cont_point: + { + /* Swap str and js1. */ + if (str == &tmp_js1) + { + str= js1; + js1= &tmp_js1; + } + else + { + js1= str; + str= &tmp_js1; + } + } + } + + if (merge_to_null) + goto null_return; + + json_scan_start(&je1, js1->charset(),(const uchar *) js1->ptr(), + (const uchar *) js1->ptr() + js1->length()); + je1.killed_ptr= (uchar*)&thd->killed; + if (json_nice(&je1, str, Item_func_json_format::LOOSE)) + goto error_return; + + null_value= 0; + return str; + +error_return: + if (je1.s.error) + report_json_error(js1, &je1, 0); + if (je2.s.error) + report_json_error(js2, &je2, n_arg); + thd->check_killed(); // to get the error message right +null_return: + null_value= 1; + return NULL; +} + + +bool Item_func_json_length::fix_length_and_dec(THD *thd) +{ + if (arg_count > 1) + path.set_constant_flag(args[1]->const_item()); + set_maybe_null(); + max_length= 10; + return FALSE; +} + + +longlong Item_func_json_length::val_int() +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je; + uint length= 0; + int array_counters[JSON_DEPTH_LIMIT]; + int err; + + if ((null_value= args[0]->null_value)) + return 0; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (arg_count > 1) + { + /* Path specified - let's apply it. */ + if (!path.parsed) + { + String *s_p= args[1]->val_str(&tmp_path); + if (s_p && + path_setup_nwc(&path.p, s_p->charset(), (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &path.p, 1); + goto null_return; + } + path.parsed= path.constant; + } + if (args[1]->null_value) + goto null_return; + + path.cur_step= path.p.steps; + if (json_find_path(&je, &path.p, &path.cur_step, array_counters)) + { + if (je.s.error) + goto err_return; + goto null_return; + } + } + + + if (json_read_value(&je)) + goto err_return; + + if (json_value_scalar(&je)) + return 1; + + while (!(err= json_scan_next(&je)) && + je.state != JST_OBJ_END && je.state != JST_ARRAY_END) + { + switch (je.state) + { + case JST_VALUE: + case JST_KEY: + length++; + break; + case JST_OBJ_START: + case JST_ARRAY_START: + if (json_skip_level(&je)) + goto err_return; + break; + default: + break; + }; + } + + if (!err) + { + /* Parse to the end of the JSON just to check it's valid. */ + while (json_scan_next(&je) == 0) {} + } + + if (likely(!je.s.error)) + return length; + +err_return: + report_json_error(js, &je, 0); +null_return: + null_value= 1; + return 0; +} + + +longlong Item_func_json_depth::val_int() +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je; + uint depth= 0, c_depth= 0; + bool inc_depth= TRUE; + + if ((null_value= args[0]->null_value)) + return 0; + + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + do + { + switch (je.state) + { + case JST_VALUE: + case JST_KEY: + if (inc_depth) + { + c_depth++; + inc_depth= FALSE; + if (c_depth > depth) + depth= c_depth; + } + break; + case JST_OBJ_START: + case JST_ARRAY_START: + inc_depth= TRUE; + break; + case JST_OBJ_END: + case JST_ARRAY_END: + if (!inc_depth) + c_depth--; + inc_depth= FALSE; + break; + default: + break; + } + } while (json_scan_next(&je) == 0); + + if (likely(!je.s.error)) + return depth; + + report_json_error(js, &je, 0); + null_value= 1; + return 0; +} + + +bool Item_func_json_type::fix_length_and_dec(THD *thd) +{ + collation.set(&my_charset_utf8mb3_general_ci); + max_length= 12 * collation.collation->mbmaxlen; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_json_type::val_str(String *str) +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je; + const char *type; + + if ((null_value= args[0]->null_value)) + return 0; + + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (json_read_value(&je)) + goto error; + + switch (je.value_type) + { + case JSON_VALUE_OBJECT: + type= "OBJECT"; + break; + case JSON_VALUE_ARRAY: + type= "ARRAY"; + break; + case JSON_VALUE_STRING: + type= "STRING"; + break; + case JSON_VALUE_NUMBER: + type= (je.num_flags & JSON_NUM_FRAC_PART) ? "DOUBLE" : "INTEGER"; + break; + case JSON_VALUE_TRUE: + case JSON_VALUE_FALSE: + type= "BOOLEAN"; + break; + default: + type= "NULL"; + break; + } + + str->set(type, strlen(type), &my_charset_utf8mb3_general_ci); + return str; + +error: + report_json_error(js, &je, 0); + null_value= 1; + return 0; +} + + +bool Item_func_json_insert::fix_length_and_dec(THD *thd) +{ + uint n_arg; + ulonglong char_length; + + collation.set(args[0]->collation); + char_length= args[0]->max_char_length(); + + for (n_arg= 1; n_arg < arg_count; n_arg+= 2) + { + paths[n_arg/2].set_constant_flag(args[n_arg]->const_item()); + char_length+= + static_cast(args[n_arg+1]->max_char_length()) + 4; + } + + fix_char_length_ulonglong(char_length); + set_maybe_null(); + return FALSE; +} + + +String *Item_func_json_insert::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_json(&tmp_js); + uint n_arg, n_path; + json_string_t key_name; + THD *thd= current_thd; + + DBUG_ASSERT(fixed()); + + if ((null_value= args[0]->null_value)) + return 0; + + str->set_charset(collation.collation); + tmp_js.set_charset(collation.collation); + json_string_set_cs(&key_name, collation.collation); + + for (n_arg=1, n_path=0; n_arg < arg_count; n_arg+=2, n_path++) + { + int array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_path; + const char *v_to; + json_path_step_t *lp; + int corrected_n_item; + + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+n_path); + if (s_p) + { + if (path_setup_nwc(&c_path->p,s_p->charset(), + (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto return_null; + } + + /* We search to the last step. */ + c_path->p.last_step--; + } + c_path->parsed= c_path->constant; + } + if (args[n_arg]->null_value) + goto return_null; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + + if (c_path->p.last_step < c_path->p.steps) + goto v_found; + + c_path->cur_step= c_path->p.steps; + + if (c_path->p.last_step >= c_path->p.steps && + json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + if (je.s.error) + goto js_error; + continue; + } + + if (json_read_value(&je)) + goto js_error; + + lp= c_path->p.last_step+1; + if (lp->type & JSON_PATH_ARRAY) + { + int n_item= 0; + + if (je.value_type != JSON_VALUE_ARRAY) + { + const uchar *v_from= je.value_begin; + int do_array_autowrap; + + if (mode_insert) + { + if (mode_replace) + do_array_autowrap= lp->n_item > 0; + else + { + if (lp->n_item == 0) + continue; + do_array_autowrap= 1; + } + } + else + { + if (lp->n_item) + continue; + do_array_autowrap= 0; + } + + + str->length(0); + /* Wrap the value as an array. */ + if (append_simple(str, js->ptr(), (const char *) v_from - js->ptr()) || + (do_array_autowrap && str->append('['))) + goto js_error; /* Out of memory. */ + + if (je.value_type == JSON_VALUE_OBJECT) + { + if (json_skip_level(&je)) + goto js_error; + } + + if ((do_array_autowrap && + (append_simple(str, v_from, je.s.c_str - v_from) || + str->append(", ", 2))) || + append_json_value(str, args[n_arg+1], &tmp_val) || + (do_array_autowrap && str->append(']')) || + append_simple(str, je.s.c_str, js->end()-(const char *) je.s.c_str)) + goto js_error; /* Out of memory. */ + + goto continue_point; + } + corrected_n_item= lp->n_item; + if (corrected_n_item < 0) + { + int array_size; + if (json_skip_array_and_count(&je, &array_size)) + goto js_error; + corrected_n_item+= array_size; + } + + while (json_scan_next(&je) == 0 && je.state != JST_ARRAY_END) + { + switch (je.state) + { + case JST_VALUE: + if (n_item == corrected_n_item) + goto v_found; + n_item++; + if (json_skip_array_item(&je)) + goto js_error; + break; + default: + break; + } + } + + if (unlikely(je.s.error)) + goto js_error; + + if (!mode_insert) + continue; + + v_to= (const char *) (je.s.c_str - je.sav_c_len); + str->length(0); + if (append_simple(str, js->ptr(), v_to - js->ptr()) || + (n_item > 0 && str->append(", ", 2)) || + append_json_value(str, args[n_arg+1], &tmp_val) || + append_simple(str, v_to, js->end() - v_to)) + goto js_error; /* Out of memory. */ + } + else /*JSON_PATH_KEY*/ + { + uint n_key= 0; + + if (je.value_type != JSON_VALUE_OBJECT) + continue; + + while (json_scan_next(&je) == 0 && je.state != JST_OBJ_END) + { + switch (je.state) + { + case JST_KEY: + json_string_set_str(&key_name, lp->key, lp->key_end); + if (json_key_matches(&je, &key_name)) + goto v_found; + n_key++; + if (json_skip_key(&je)) + goto js_error; + break; + default: + break; + } + } + + if (unlikely(je.s.error)) + goto js_error; + + if (!mode_insert) + continue; + + v_to= (const char *) (je.s.c_str - je.sav_c_len); + str->length(0); + if (append_simple(str, js->ptr(), v_to - js->ptr()) || + (n_key > 0 && str->append(", ", 2)) || + str->append('"') || + append_simple(str, lp->key, lp->key_end - lp->key) || + str->append("\":", 2) || + append_json_value(str, args[n_arg+1], &tmp_val) || + append_simple(str, v_to, js->end() - v_to)) + goto js_error; /* Out of memory. */ + } + + goto continue_point; + +v_found: + + if (!mode_replace) + continue; + + if (json_read_value(&je)) + goto js_error; + + v_to= (const char *) je.value_begin; + str->length(0); + if (!json_value_scalar(&je)) + { + if (json_skip_level(&je)) + goto js_error; + } + + if (append_simple(str, js->ptr(), v_to - js->ptr()) || + append_json_value(str, args[n_arg+1], &tmp_val) || + append_simple(str, je.s.c_str, js->end()-(const char *) je.s.c_str)) + goto js_error; /* Out of memory. */ +continue_point: + { + /* Swap str and js. */ + if (str == &tmp_js) + { + str= js; + js= &tmp_js; + } + else + { + js= str; + str= &tmp_js; + } + } + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + if (json_nice(&je, str, Item_func_json_format::LOOSE)) + goto js_error; + + return str; + +js_error: + report_json_error(js, &je, 0); + thd->check_killed(); // to get the error message right +return_null: + null_value= 1; + return 0; +} + + +bool Item_func_json_remove::fix_length_and_dec(THD *thd) +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length; + + mark_constant_paths(paths, args+1, arg_count-1); + set_maybe_null(); + return FALSE; +} + + +String *Item_func_json_remove::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_json(&tmp_js); + uint n_arg, n_path; + json_string_t key_name; + THD *thd= current_thd; + + DBUG_ASSERT(fixed()); + + if (args[0]->null_value) + goto null_return; + + str->set_charset(js->charset()); + json_string_set_cs(&key_name, js->charset()); + + for (n_arg=1, n_path=0; n_arg < arg_count; n_arg++, n_path++) + { + int array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_path; + const char *rem_start= 0, *rem_end; + json_path_step_t *lp; + int n_item= 0; + + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+n_path); + if (s_p) + { + if (path_setup_nwc(&c_path->p,s_p->charset(), + (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto null_return; + } + + /* We search to the last step. */ + c_path->p.last_step--; + if (c_path->p.last_step < c_path->p.steps) + { + c_path->p.s.error= TRIVIAL_PATH_NOT_ALLOWED; + report_path_error(s_p, &c_path->p, n_arg); + goto null_return; + } + } + c_path->parsed= c_path->constant; + } + if (args[n_arg]->null_value) + goto null_return; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + + c_path->cur_step= c_path->p.steps; + + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + if (je.s.error) + goto js_error; + } + + if (json_read_value(&je)) + goto js_error; + + lp= c_path->p.last_step+1; + + if (lp->type & JSON_PATH_ARRAY) + { + int corrected_n_item; + if (je.value_type != JSON_VALUE_ARRAY) + continue; + + corrected_n_item= lp->n_item; + if (corrected_n_item < 0) + { + int array_size; + if (json_skip_array_and_count(&je, &array_size)) + goto js_error; + corrected_n_item+= array_size; + } + + while (json_scan_next(&je) == 0 && je.state != JST_ARRAY_END) + { + switch (je.state) + { + case JST_VALUE: + if (n_item == corrected_n_item) + { + rem_start= (const char *) (je.s.c_str - + (n_item ? je.sav_c_len : 0)); + goto v_found; + } + n_item++; + if (json_skip_array_item(&je)) + goto js_error; + break; + default: + break; + } + } + + if (unlikely(je.s.error)) + goto js_error; + + continue; + } + else /*JSON_PATH_KEY*/ + { + if (je.value_type != JSON_VALUE_OBJECT) + continue; + + while (json_scan_next(&je) == 0 && je.state != JST_OBJ_END) + { + switch (je.state) + { + case JST_KEY: + if (n_item == 0) + rem_start= (const char *) (je.s.c_str - je.sav_c_len); + json_string_set_str(&key_name, lp->key, lp->key_end); + if (json_key_matches(&je, &key_name)) + goto v_found; + + if (json_skip_key(&je)) + goto js_error; + + rem_start= (const char *) je.s.c_str; + n_item++; + break; + default: + break; + } + } + + if (unlikely(je.s.error)) + goto js_error; + + continue; + } + +v_found: + + if (json_skip_key(&je) || json_scan_next(&je)) + goto js_error; + + rem_end= (je.state == JST_VALUE && n_item == 0) ? + (const char *) je.s.c_str : (const char *) (je.s.c_str - je.sav_c_len); + + str->length(0); + + if (append_simple(str, js->ptr(), rem_start - js->ptr()) || + (je.state == JST_KEY && n_item > 0 && str->append(",", 1)) || + append_simple(str, rem_end, js->end() - rem_end)) + goto js_error; /* Out of memory. */ + + { + /* Swap str and js. */ + if (str == &tmp_js) + { + str= js; + js= &tmp_js; + } + else + { + js= str; + str= &tmp_js; + } + } + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + je.killed_ptr= (uchar*)&thd->killed; + if (json_nice(&je, str, Item_func_json_format::LOOSE)) + goto js_error; + + null_value= 0; + return str; + +js_error: + thd->check_killed(); // to get the error message right + report_json_error(js, &je, 0); +null_return: + null_value= 1; + return 0; +} + + +bool Item_func_json_keys::fix_length_and_dec(THD *thd) +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length; + set_maybe_null(); + if (arg_count > 1) + path.set_constant_flag(args[1]->const_item()); + return FALSE; +} + + +/* + That function is for Item_func_json_keys::val_str exclusively. + It utilizes the fact the resulting string is in specific format: + ["key1", "key2"...] +*/ +static int check_key_in_list(String *res, + const uchar *key, int key_len) +{ + const uchar *c= (const uchar *) res->ptr() + 2; /* beginning '["' */ + const uchar *end= (const uchar *) res->end() - 1; /* ending '"' */ + + while (c < end) + { + int n_char; + for (n_char=0; c[n_char] != '"' && n_char < key_len; n_char++) + { + if (c[n_char] != key[n_char]) + break; + } + if (c[n_char] == '"') + { + if (n_char == key_len) + return 1; + } + else + { + while (c[n_char] != '"') + n_char++; + } + c+= n_char + 4; /* skip ', "' */ + } + return 0; +} + + +String *Item_func_json_keys::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_json(&tmp_js); + uint n_keys= 0; + int array_counters[JSON_DEPTH_LIMIT]; + + if ((args[0]->null_value)) + goto null_return; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (arg_count < 2) + goto skip_search; + + if (!path.parsed) + { + String *s_p= args[1]->val_str(&tmp_path); + if (s_p && + path_setup_nwc(&path.p, s_p->charset(), (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &path.p, 1); + goto null_return; + } + path.parsed= path.constant; + } + + if (args[1]->null_value) + goto null_return; + + path.cur_step= path.p.steps; + + if (json_find_path(&je, &path.p, &path.cur_step, array_counters)) + { + if (je.s.error) + goto err_return; + + goto null_return; + } + +skip_search: + if (json_read_value(&je)) + goto err_return; + + if (je.value_type != JSON_VALUE_OBJECT) + goto null_return; + + str->length(0); + if (str->append('[')) + goto err_return; /* Out of memory. */ + /* Parse the OBJECT collecting the keys. */ + while (json_scan_next(&je) == 0 && je.state != JST_OBJ_END) + { + const uchar *key_start, *key_end; + int key_len; + + switch (je.state) + { + case JST_KEY: + key_start= je.s.c_str; + do + { + key_end= je.s.c_str; + } while (json_read_keyname_chr(&je) == 0); + if (unlikely(je.s.error)) + goto err_return; + key_len= (int)(key_end - key_start); + + if (!check_key_in_list(str, key_start, key_len)) + { + if ((n_keys > 0 && str->append(", ", 2)) || + str->append('"') || + append_simple(str, key_start, key_len) || + str->append('"')) + goto err_return; + n_keys++; + } + break; + case JST_OBJ_START: + case JST_ARRAY_START: + if (json_skip_level(&je)) + break; + break; + default: + break; + } + } + + if (unlikely(je.s.error || str->append(']'))) + goto err_return; + + null_value= 0; + return str; + +err_return: + report_json_error(js, &je, 0); +null_return: + null_value= 1; + return 0; +} + + +bool Item_func_json_search::fix_fields(THD *thd, Item **ref) +{ + if (Item_json_str_multipath::fix_fields(thd, ref)) + return TRUE; + + if (arg_count < 4) + { + escape= '\\'; + return FALSE; + } + + return fix_escape_item(thd, args[3], &tmp_js, true, + args[0]->collation.collation, &escape); +} + + +static const uint SQR_MAX_BLOB_WIDTH= (uint) sqrt(MAX_BLOB_WIDTH); + +bool Item_func_json_search::fix_length_and_dec(THD *thd) +{ + collation.set(args[0]->collation); + + /* + It's rather difficult to estimate the length of the result. + I believe arglen^2 is the reasonable upper limit. + */ + if (args[0]->max_length > SQR_MAX_BLOB_WIDTH) + max_length= MAX_BLOB_WIDTH; + else + { + max_length= args[0]->max_length; + max_length*= max_length; + } + + ooa_constant= args[1]->const_item(); + ooa_parsed= FALSE; + + if (arg_count > 4) + mark_constant_paths(paths, args+4, arg_count-4); + set_maybe_null(); + return FALSE; +} + + +int Item_func_json_search::compare_json_value_wild(json_engine_t *je, + const String *cmp_str) +{ + if (je->value_type != JSON_VALUE_STRING || !je->value_escaped) + return collation.collation->wildcmp( + (const char *) je->value, (const char *) (je->value + je->value_len), + cmp_str->ptr(), cmp_str->end(), escape, wild_one, wild_many) ? 0 : 1; + + { + int esc_len; + if (esc_value.alloced_length() < (uint) je->value_len && + esc_value.alloc((je->value_len / 1024 + 1) * 1024)) + return 0; + + esc_len= json_unescape(je->s.cs, je->value, je->value + je->value_len, + je->s.cs, (uchar *) esc_value.ptr(), + (uchar *) (esc_value.ptr() + + esc_value.alloced_length())); + if (esc_len <= 0) + return 0; + + return collation.collation->wildcmp( + esc_value.ptr(), esc_value.ptr() + esc_len, + cmp_str->ptr(), cmp_str->end(), escape, wild_one, wild_many) ? 0 : 1; + } +} + + +static int append_json_path(String *str, const json_path_t *p) +{ + const json_path_step_t *c; + + if (str->append("\"$", 2)) + return TRUE; + + for (c= p->steps+1; c <= p->last_step; c++) + { + if (c->type & JSON_PATH_KEY) + { + if (str->append(".", 1) || + append_simple(str, c->key, c->key_end-c->key)) + return TRUE; + } + else /*JSON_PATH_ARRAY*/ + { + + if (str->append('[') || + str->append_ulonglong(c->n_item) || + str->append(']')) + return TRUE; + } + } + + return str->append('"'); +} + + +String *Item_func_json_search::val_str(String *str) +{ + String *js= args[0]->val_json(&tmp_js); + String *s_str= args[2]->val_str(&tmp_path); + json_engine_t je; + json_path_t p, sav_path; + uint n_arg; + int array_sizes[JSON_DEPTH_LIMIT]; + uint has_negative_path= 0; + + if (args[0]->null_value || args[2]->null_value) + goto null_return; + + if (parse_one_or_all(this, args[1], &ooa_parsed, ooa_constant, &mode_one)) + goto null_return; + + n_path_found= 0; + str->set_charset(js->charset()); + str->length(0); + + for (n_arg=4; n_arg < arg_count; n_arg++) + { + json_path_with_flags *c_path= paths + n_arg - 4; + c_path->p.types_used= JSON_PATH_KEY_NULL; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths + (n_arg-4)); + if (s_p) + { + if (json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + { + report_path_error(s_p, &c_path->p, n_arg); + goto null_return; + } + c_path->parsed= c_path->constant; + has_negative_path|= c_path->p.types_used & JSON_PATH_NEGATIVE_INDEX; + } + } + if (args[n_arg]->null_value) + goto null_return; + } + + json_get_path_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length(), &p); + + while (json_get_path_next(&je, &p) == 0) + { + if (has_negative_path && je.value_type == JSON_VALUE_ARRAY && + json_skip_array_and_count(&je, array_sizes + (p.last_step - p.steps))) + goto js_error; + + if (json_value_scalar(&je)) + { + if ((arg_count < 5 || + path_ok(paths, arg_count - 4, &p, je.value_type, array_sizes)) && + compare_json_value_wild(&je, s_str) != 0) + { + ++n_path_found; + if (n_path_found == 1) + { + sav_path= p; + sav_path.last_step= sav_path.steps + (p.last_step - p.steps); + } + else + { + if (n_path_found == 2) + { + if (str->append('[') || + append_json_path(str, &sav_path)) + goto js_error; + } + if (str->append(", ", 2) || append_json_path(str, &p)) + goto js_error; + } + if (mode_one) + goto end; + } + } + } + + if (unlikely(je.s.error)) + goto js_error; + +end: + if (n_path_found == 0) + goto null_return; + if (n_path_found == 1) + { + if (append_json_path(str, &sav_path)) + goto js_error; + } + else + { + if (str->append(']')) + goto js_error; + } + + null_value= 0; + return str; + + +js_error: + report_json_error(js, &je, 0); +null_return: + null_value= 1; + return 0; +} + + +LEX_CSTRING Item_func_json_format::func_name_cstring() const +{ + switch (fmt) + { + case COMPACT: + return { STRING_WITH_LEN("json_compact") }; + case LOOSE: + return { STRING_WITH_LEN("json_loose") }; + case DETAILED: + return { STRING_WITH_LEN("json_detailed") }; + default: + DBUG_ASSERT(0); + }; + + return NULL_clex_str; +} + + +bool Item_func_json_format::fix_length_and_dec(THD *thd) +{ + decimals= 0; + collation.set(args[0]->collation); + max_length= args[0]->max_length; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_json_format::val_str(String *str) +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je; + int tab_size= 4; + THD *thd= current_thd; + + if ((null_value= args[0]->null_value)) + return 0; + + if (fmt == DETAILED) + { + if (arg_count > 1) + { + tab_size= (int)args[1]->val_int(); + if (args[1]->null_value) + { + null_value= 1; + return 0; + } + } + if (tab_size < 0) + tab_size= 0; + else if (tab_size > TAB_SIZE_LIMIT) + tab_size= TAB_SIZE_LIMIT; + } + + json_scan_start(&je, js->charset(), (const uchar *) js->ptr(), + (const uchar *) js->ptr()+js->length()); + je.killed_ptr= (uchar*)&thd->killed; + + if (json_nice(&je, str, fmt, tab_size)) + { + null_value= 1; + report_json_error(js, &je, 0); + thd->check_killed(); // to get the error message right + return 0; + } + + return str; +} + + +String *Item_func_json_format::val_json(String *str) +{ + String *js= args[0]->val_json(&tmp_js); + if ((null_value= args[0]->null_value)) + return 0; + return js; +} + +int Arg_comparator::compare_json_str_basic(Item *j, Item *s) +{ + String *js,*str; + int c_len; + json_engine_t je; + + if ((js= j->val_str(&value1))) + { + json_scan_start(&je, js->charset(), (const uchar *) js->ptr(), + (const uchar *) js->ptr()+js->length()); + if (json_read_value(&je)) + goto error; + if (je.value_type == JSON_VALUE_STRING) + { + if (value2.realloc_with_extra_if_needed(je.value_len) || + (c_len= json_unescape(js->charset(), je.value, + je.value + je.value_len, + &my_charset_utf8mb3_general_ci, + (uchar *) value2.ptr(), + (uchar *) (value2.ptr() + je.value_len))) < 0) + goto error; + + value2.length(c_len); + js= &value2; + str= &value1; + } + else + { + str= &value2; + } + + + if ((str= s->val_str(str))) + { + if (set_null) + owner->null_value= 0; + return sortcmp(js, str, compare_collation()); + } + } + +error: + if (set_null) + owner->null_value= 1; + return -1; +} + + +int Arg_comparator::compare_e_json_str_basic(Item *j, Item *s) +{ + String *res1,*res2; + json_value_types type; + char *value; + int value_len, c_len; + Item_func_json_extract *e= (Item_func_json_extract *) j; + + res1= e->read_json(&value1, &type, &value, &value_len); + res2= s->val_str(&value2); + + if (!res1 || !res2) + return MY_TEST(res1 == res2); + + if (type == JSON_VALUE_STRING) + { + if (value1.realloc_with_extra_if_needed(value_len) || + (c_len= json_unescape(value1.charset(), (uchar *) value, + (uchar *) value+value_len, + &my_charset_utf8mb3_general_ci, + (uchar *) value1.ptr(), + (uchar *) (value1.ptr() + value_len))) < 0) + return 1; + value1.length(c_len); + res1= &value1; + } + + return MY_TEST(sortcmp(res1, res2, compare_collation()) == 0); +} + + +String *Item_func_json_arrayagg::get_str_from_item(Item *i, String *tmp) +{ + m_tmp_json.length(0); + if (append_json_value(&m_tmp_json, i, tmp)) + return NULL; + return &m_tmp_json; +} + + +String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f, + String *tmp, const uchar *key, size_t offset) +{ + m_tmp_json.length(0); + + if (append_json_value_from_field(&m_tmp_json, i, f, key, offset, tmp)) + return NULL; + + return &m_tmp_json; + +} + + +void Item_func_json_arrayagg::cut_max_length(String *result, + uint old_length, uint max_length) const +{ + if (result->length() == 0) + return; + + if (result->ptr()[result->length() - 1] != '"' || + max_length == 0) + { + Item_func_group_concat::cut_max_length(result, old_length, max_length); + return; + } + + Item_func_group_concat::cut_max_length(result, old_length, max_length-1); + result->append('"'); +} + + +Item *Item_func_json_arrayagg::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_func_json_arrayagg(thd, this); +} + + +String* Item_func_json_arrayagg::val_str(String *str) +{ + if ((str= Item_func_group_concat::val_str(str))) + { + String s; + s.append('['); + s.swap(*str); + str->append(s); + str->append(']'); + } + return str; +} + + +Item_func_json_objectagg:: +Item_func_json_objectagg(THD *thd, Item_func_json_objectagg *item) + :Item_sum(thd, item) +{ + quick_group= FALSE; + result.set_charset(collation.collation); + result.append('{'); +} + + +bool +Item_func_json_objectagg::fix_fields(THD *thd, Item **ref) +{ + uint i; /* for loop variable */ + DBUG_ASSERT(fixed() == 0); + + memcpy(orig_args, args, sizeof(Item*) * arg_count); + + if (init_sum_func_check(thd)) + return TRUE; + + set_maybe_null(); + + /* + Fix fields for select list and ORDER clause + */ + + for (i=0 ; i < arg_count ; i++) + { + if (args[i]->fix_fields_if_needed_for_scalar(thd, &args[i])) + return TRUE; + with_flags|= args[i]->with_flags; + } + + /* skip charset aggregation for order columns */ + if (agg_arg_charsets_for_string_result(collation, args, arg_count)) + return 1; + + result.set_charset(collation.collation); + result_field= 0; + null_value= 1; + max_length= (uint32)(thd->variables.group_concat_max_len + / collation.collation->mbminlen + * collation.collation->mbmaxlen); + + if (check_sum_func(thd, ref)) + return TRUE; + + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +void Item_func_json_objectagg::cleanup() +{ + DBUG_ENTER("Item_func_json_objectagg::cleanup"); + Item_sum::cleanup(); + + result.length(1); + DBUG_VOID_RETURN; +} + + +Item *Item_func_json_objectagg::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_func_json_objectagg(thd, this); +} + + +void Item_func_json_objectagg::clear() +{ + result.length(1); + null_value= 1; +} + + +bool Item_func_json_objectagg::add() +{ + StringBuffer buf; + String *key; + + key= args[0]->val_str(&buf); + if (args[0]->is_null()) + return 0; + + null_value= 0; + if (result.length() > 1) + result.append(STRING_WITH_LEN(", ")); + + result.append('"'); + st_append_escaped(&result,key); + result.append(STRING_WITH_LEN("\":")); + + buf.length(0); + append_json_value(&result, args[1], &buf); + + return 0; +} + + +String* Item_func_json_objectagg::val_str(String* str) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return 0; + + result.append('}'); + return &result; +} + + +String *Item_func_json_normalize::val_str(String *buf) +{ + String tmp; + String *raw_json= args[0]->val_str(&tmp); + + DYNAMIC_STRING normalized_json; + if (init_dynamic_string(&normalized_json, NULL, 0, 0)) + { + null_value= 1; + return NULL; + } + + null_value= args[0]->null_value; + if (null_value) + goto end; + + if (json_normalize(&normalized_json, + raw_json->ptr(), raw_json->length(), + raw_json->charset())) + { + null_value= 1; + goto end; + } + + buf->length(0); + if (buf->append(normalized_json.str, normalized_json.length)) + { + null_value= 1; + goto end; + } + +end: + dynstr_free(&normalized_json); + return null_value ? NULL : buf; +} + + +bool Item_func_json_normalize::fix_length_and_dec(THD *thd) +{ + collation.set(&my_charset_utf8mb4_bin); + /* 0 becomes 0.0E0, thus one character becomes 5 chars */ + fix_char_length_ulonglong((ulonglong) args[0]->max_char_length() * 5); + set_maybe_null(); + return FALSE; +} + + +/* + When the two values match or don't match we need to return true or false. + But we can have some more elements in the array left or some more keys + left in the object that we no longer want to compare. In this case, + we want to skip the current item. +*/ +void json_skip_current_level(json_engine_t *js, json_engine_t *value) +{ + json_skip_level(js); + json_skip_level(value); +} + + +/* At least one of the two arguments is a scalar. */ +bool json_find_overlap_with_scalar(json_engine_t *js, json_engine_t *value) +{ + if (json_value_scalar(value)) + { + if (js->value_type == value->value_type) + { + if (js->value_type == JSON_VALUE_NUMBER) + { + double d_j, d_v; + char *end; + int err; + + d_j= js->s.cs->strntod((char *) js->value, js->value_len, &end, &err); + d_v= value->s.cs->strntod((char *) value->value, value->value_len, + &end, &err); + + return (fabs(d_j - d_v) < 1e-12); + } + else if (js->value_type == JSON_VALUE_STRING) + { + return value->value_len == js->value_len && + memcmp(value->value, js->value, value->value_len) == 0; + } + } + return value->value_type == js->value_type; + } + else if (value->value_type == JSON_VALUE_ARRAY) + { + while (json_scan_next(value) == 0 && value->state == JST_VALUE) + { + if (json_read_value(value)) + return FALSE; + if (js->value_type == value->value_type) + { + int res1= json_find_overlap_with_scalar(js, value); + if (res1) + return TRUE; + } + if (!json_value_scalar(value)) + json_skip_level(value); + } + } + return FALSE; +} + + +/* + Compare when one is object and other is array. This means we are looking + for the object in the array. Hence, when value type of an element of the + array is object, then compare the two objects entirely. If they are + equal return true else return false. +*/ +bool json_compare_arr_and_obj(json_engine_t *js, json_engine_t *value) +{ + st_json_engine_t loc_val= *value; + while (json_scan_next(js) == 0 && js->state == JST_VALUE) + { + if (json_read_value(js)) + return FALSE; + if (js->value_type == JSON_VALUE_OBJECT) + { + int res1= json_find_overlap_with_object(js, value, true); + if (res1) + return TRUE; + *value= loc_val; + } + if (js->value_type == JSON_VALUE_ARRAY) + json_skip_level(js); + } + return FALSE; +} + + +bool json_compare_arrays_in_order(json_engine_t *js, json_engine_t *value) +{ + bool res= false; + while (json_scan_next(js) == 0 && json_scan_next(value) == 0 && + js->state == JST_VALUE && value->state == JST_VALUE) + { + if (json_read_value(js) || json_read_value(value)) + return FALSE; + if (js->value_type != value->value_type) + { + json_skip_current_level(js, value); + return FALSE; + } + res= check_overlaps(js, value, true); + if (!res) + { + json_skip_current_level(js, value); + return FALSE; + } + } + res= (value->state == JST_ARRAY_END || value->state == JST_OBJ_END ? + TRUE : FALSE); + json_skip_current_level(js, value); + return res; +} + + +int json_find_overlap_with_array(json_engine_t *js, json_engine_t *value, + bool compare_whole) +{ + if (value->value_type == JSON_VALUE_ARRAY) + { + if (compare_whole) + return json_compare_arrays_in_order(js, value); + + json_engine_t loc_value= *value, current_js= *js; + + while (json_scan_next(js) == 0 && js->state == JST_VALUE) + { + if (json_read_value(js)) + return FALSE; + current_js= *js; + while (json_scan_next(value) == 0 && value->state == JST_VALUE) + { + if (json_read_value(value)) + return FALSE; + if (js->value_type == value->value_type) + { + int res1= check_overlaps(js, value, true); + if (res1) + return TRUE; + } + else + { + if (!json_value_scalar(value)) + json_skip_level(value); + } + *js= current_js; + } + *value= loc_value; + if (!json_value_scalar(js)) + json_skip_level(js); + } + return FALSE; + } + else if (value->value_type == JSON_VALUE_OBJECT) + { + if (compare_whole) + { + json_skip_current_level(js, value); + return FALSE; + } + return json_compare_arr_and_obj(js, value); + } + else + return json_find_overlap_with_scalar(value, js); +} + + +int compare_nested_object(json_engine_t *js, json_engine_t *value) +{ + int result= 0; + const char *value_begin= (const char*)value->s.c_str-1; + const char *js_begin= (const char*)js->s.c_str-1; + json_skip_level(value); + json_skip_level(js); + const char *value_end= (const char*)value->s.c_str; + const char *js_end= (const char*)js->s.c_str; + + String a(value_begin, value_end-value_begin,value->s.cs); + String b(js_begin, js_end-js_begin, js->s.cs); + + DYNAMIC_STRING a_res, b_res; + if (init_dynamic_string(&a_res, NULL, 4096, 1024) || + init_dynamic_string(&b_res, NULL, 4096, 1024)) + { + goto error; + } + if (json_normalize(&a_res, a.ptr(), a.length(), value->s.cs) || + json_normalize(&b_res, b.ptr(), b.length(), value->s.cs)) + { + goto error; + } + + result= strcmp(a_res.str, b_res.str) ? 0 : 1; + + error: + dynstr_free(&a_res); + dynstr_free(&b_res); + + return MY_TEST(result); +} +int json_find_overlap_with_object(json_engine_t *js, json_engine_t *value, + bool compare_whole) +{ + if (value->value_type == JSON_VALUE_OBJECT) + { + if (compare_whole) + { + return compare_nested_object(js, value); + } + else + { + /* Find at least one common key-value pair */ + json_string_t key_name; + bool found_key= false, found_value= false; + json_engine_t loc_js= *js; + const uchar *k_start, *k_end; + + json_string_set_cs(&key_name, value->s.cs); + + while (json_scan_next(value) == 0 && value->state == JST_KEY) + { + k_start= value->s.c_str; + do + { + k_end= value->s.c_str; + } while (json_read_keyname_chr(value) == 0); + + if (unlikely(value->s.error)) + return FALSE; + + json_string_set_str(&key_name, k_start, k_end); + found_key= find_key_in_object(js, &key_name); + found_value= 0; + + if (found_key) + { + if (json_read_value(js) || json_read_value(value)) + return FALSE; + + /* + The value of key-value pair can be an be anything. If it is an object + then we need to compare the whole value and if it is an array then + we need to compare the elements in that order. So set compare_whole + to true. + */ + if (js->value_type == value->value_type) + found_value= check_overlaps(js, value, true); + if (found_value) + { + /* + We have found at least one common key-value pair now. + No need to check for more key-value pairs. So skip remaining + jsons and return TRUE. + */ + json_skip_current_level(js, value); + return TRUE; + } + else + { + /* + Key is found but value is not found. We have already + exhausted both values for current key. Hence "reset" + only js (first argument i.e json document) and + continue. + */ + *js= loc_js; + continue; + } + } + else + { + /* + key is not found. So no need to check for value for that key. + Read the value anyway so we get the "type" of json value. + If is is non-scalar then skip the entire value + (scalar values get exhausted while reading so no need to skip them). + Then reset the json doc again. + */ + if (json_read_value(value)) + return FALSE; + if (!json_value_scalar(value)) + json_skip_level(value); + *js= loc_js; + } + } + /* + At this point we have already returned true if any intersection exists. + So skip jsons if not exhausted and return false. + */ + json_skip_current_level(js, value); + return FALSE; + } + } + else if (value->value_type == JSON_VALUE_ARRAY) + { + if (compare_whole) + { + json_skip_current_level(js, value); + return FALSE; + } + return json_compare_arr_and_obj(value, js); + } + return FALSE; +} + + +/* + Find if two json documents overlap + + SYNOPSIS + check_overlaps() + js - json document + value - value + compare_whole - If true then find full overlap with the document in case of + object and comparing in-order in case of array. + Else find at least one match between two objects or array. + + IMPLEMENTATION + We can compare two json datatypes if they are of same type to check if + they are equal. When comparing between a json document and json value, + there can be following cases: + 1) When at least one of the two json documents is of scalar type: + 1.a) If value and json document both are scalar, then return true + if they have same type and value. + 1.b) If json document is scalar but other is array (or vice versa), + then return true if array has at least one element of same type + and value as scalar. + 1.c) If one is scalar and other is object, then return false because + it can't be compared. + + 2) When both arguments are of non-scalar type: + 2.a) If both arguments are arrays: + Iterate over the value and json document. If there exists at least + one element in other array of same type and value as that of + element in value, then return true else return false. + 2.b) If both arguments are objects: + Iterate over value and json document and if there exists at least + one key-value pair common between two objects, then return true, + else return false. + 2.c) If either of json document or value is array and other is object: + Iterate over the array, if an element of type object is found, + then compare it with the object (which is the other arguemnt). + If the entire object matches i.e all they key value pairs match, + then return true else return false. + + When we are comparing an object which is nested in other object or nested + in an array, we need to compare all the key-value pairs, irrespective of + what order they are in as opposed to non-nested where we return true if + at least one match is found. However, if we have an array nested in another + array, then we compare two arrays in that order i.e we compare + i-th element of array 1 with i-th element of array 2. + + RETURN + FALSE - If two json documents do not overlap + TRUE - if two json documents overlap +*/ +int check_overlaps(json_engine_t *js, json_engine_t *value, bool compare_whole) +{ + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + switch (js->value_type) + { + case JSON_VALUE_OBJECT: + return json_find_overlap_with_object(js, value, compare_whole); + case JSON_VALUE_ARRAY: + return json_find_overlap_with_array(js, value, compare_whole); + default: + return json_find_overlap_with_scalar(js, value); + } +} + +longlong Item_func_json_overlaps::val_int() +{ + String *js= args[0]->val_json(&tmp_js); + json_engine_t je, ve; + int result; + + if ((null_value= args[0]->null_value)) + return 0; + + if (!a2_parsed) + { + val= args[1]->val_json(&tmp_val); + a2_parsed= a2_constant; + } + + if (val == 0) + { + null_value= 1; + return 0; + } + + json_scan_start(&je, js->charset(), (const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + json_scan_start(&ve, val->charset(), (const uchar *) val->ptr(), + (const uchar *) val->end()); + + if (json_read_value(&je) || json_read_value(&ve)) + goto error; + + result= check_overlaps(&je, &ve, false); + if (unlikely(je.s.error || ve.s.error)) + goto error; + + return result; + +error: + if (je.s.error) + report_json_error(js, &je, 0); + if (ve.s.error) + report_json_error(val, &ve, 1); + return 0; +} + +bool Item_func_json_overlaps::fix_length_and_dec(THD *thd) +{ + a2_constant= args[1]->const_item(); + a2_parsed= FALSE; + set_maybe_null(); + + return Item_bool_func::fix_length_and_dec(thd); +} diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h new file mode 100644 index 00000000..6f6b6a7a --- /dev/null +++ b/sql/item_jsonfunc.h @@ -0,0 +1,799 @@ +#ifndef ITEM_JSONFUNC_INCLUDED +#define ITEM_JSONFUNC_INCLUDED + +/* Copyright (c) 2016, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +/* This file defines all JSON functions */ + + +#include +#include "item_cmpfunc.h" // Item_bool_func +#include "item_strfunc.h" // Item_str_func +#include "item_sum.h" +#include "sql_type_json.h" + +class json_path_with_flags +{ +public: + json_path_t p; + bool constant; + bool parsed; + json_path_step_t *cur_step; + void set_constant_flag(bool s_constant) + { + constant= s_constant; + parsed= FALSE; + } +}; + + +void report_path_error_ex(const char *ps, json_path_t *p, + const char *fname, int n_param, + Sql_condition::enum_warning_level lv); +void report_json_error_ex(const char *js, json_engine_t *je, + const char *fname, int n_param, + Sql_condition::enum_warning_level lv); +int check_overlaps(json_engine_t *js, json_engine_t *value, bool compare_whole); +int json_find_overlap_with_object(json_engine_t *js, + json_engine_t *value, + bool compare_whole); +void json_skip_current_level(json_engine_t *js, json_engine_t *value); +bool json_find_overlap_with_scalar(json_engine_t *js, json_engine_t *value); +bool json_compare_arrays_in_order_in_order(json_engine_t *js, json_engine_t *value); +bool json_compare_arr_and_obj(json_engine_t *js, json_engine_t* value); +int json_find_overlap_with_array(json_engine_t *js, + json_engine_t *value, + bool compare_whole); + + + +class Json_engine_scan: public json_engine_t +{ +public: + Json_engine_scan(CHARSET_INFO *i_cs, const uchar *str, const uchar *end) + { + json_scan_start(this, i_cs, str, end); + } + Json_engine_scan(const String &str) + :Json_engine_scan(str.charset(), (const uchar *) str.ptr(), + (const uchar *) str.end()) + { } + bool check_and_get_value_scalar(String *res, int *error); + bool check_and_get_value_complex(String *res, int *error); +}; + + +class Json_path_extractor: public json_path_with_flags +{ +protected: + String tmp_js, tmp_path; + virtual ~Json_path_extractor() { } + virtual bool check_and_get_value(Json_engine_scan *je, + String *to, int *error)=0; + bool extract(String *to, Item *js, Item *jp, CHARSET_INFO *cs); +}; + + +class Item_func_json_valid: public Item_bool_func +{ +protected: + String tmp_value; + +public: + Item_func_json_valid(THD *thd, Item *json) : Item_bool_func(thd, json) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_valid") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + if (Item_bool_func::fix_length_and_dec(thd)) + return TRUE; + set_maybe_null(); + return FALSE; + } + bool set_format_by_check_constraint(Send_field_extended_metadata *to) const + override + { + static const Lex_cstring fmt(STRING_WITH_LEN("json")); + return to->set_format_name(fmt); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + enum Functype functype() const override { return JSON_VALID_FUNC; } +}; + + +class Item_func_json_equals: public Item_bool_func +{ +public: + Item_func_json_equals(THD *thd, Item *a, Item *b): + Item_bool_func(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_equals") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + longlong val_int() override; +}; + + +class Item_func_json_exists: public Item_bool_func +{ +protected: + json_path_with_flags path; + String tmp_js, tmp_path; + +public: + Item_func_json_exists(THD *thd, Item *js, Item *i_path): + Item_bool_func(thd, js, i_path) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_exists") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + longlong val_int() override; +}; + + +class Item_json_func: public Item_str_func +{ +public: + Item_json_func(THD *thd) + :Item_str_func(thd) { } + Item_json_func(THD *thd, Item *a) + :Item_str_func(thd, a) { } + Item_json_func(THD *thd, Item *a, Item *b) + :Item_str_func(thd, a, b) { } + Item_json_func(THD *thd, List &list) + :Item_str_func(thd, list) { } + const Type_handler *type_handler() const override + { + return Type_handler_json_common::json_type_handler(max_length); + } +}; + + +class Item_func_json_value: public Item_str_func, + public Json_path_extractor +{ + +public: + Item_func_json_value(THD *thd, Item *js, Item *i_path): + Item_str_func(thd, js, i_path) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_value") }; + return name; + } + bool fix_length_and_dec(THD *thd) override ; + String *val_str(String *to) override + { + null_value= Json_path_extractor::extract(to, args[0], args[1], + collation.collation); + return null_value ? NULL : to; + } + bool check_and_get_value(Json_engine_scan *je, + String *res, int *error) override + { + return je->check_and_get_value_scalar(res, error); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_query: public Item_json_func, + public Json_path_extractor +{ +public: + Item_func_json_query(THD *thd, Item *js, Item *i_path): + Item_json_func(thd, js, i_path) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_query") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *to) override + { + null_value= Json_path_extractor::extract(to, args[0], args[1], + collation.collation); + return null_value ? NULL : to; + } + bool check_and_get_value(Json_engine_scan *je, + String *res, int *error) override + { + return je->check_and_get_value_complex(res, error); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_quote: public Item_str_func +{ +protected: + String tmp_s; + +public: + Item_func_json_quote(THD *thd, Item *s): Item_str_func(thd, s) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_quote") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_unquote: public Item_str_func +{ +protected: + String tmp_s; + String *read_json(json_engine_t *je); +public: + Item_func_json_unquote(THD *thd, Item *s): Item_str_func(thd, s) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_unquote") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_json_str_multipath: public Item_json_func +{ +protected: + json_path_with_flags *paths; + String *tmp_paths; +public: + Item_json_str_multipath(THD *thd, List &list): + Item_json_func(thd, list), tmp_paths(0) {} + bool fix_fields(THD *thd, Item **ref); + void cleanup(); + virtual uint get_n_paths() const = 0; +}; + + +class Item_func_json_extract: public Item_json_str_multipath +{ +protected: + String tmp_js; +public: + String *read_json(String *str, json_value_types *type, + char **out_val, int *value_len); + Item_func_json_extract(THD *thd, List &list): + Item_json_str_multipath(thd, list) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_extract") }; + return name; + } + enum Functype functype() const override { return JSON_EXTRACT_FUNC; } + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + longlong val_int() override; + double val_real() override; + my_decimal *val_decimal(my_decimal *) override; + uint get_n_paths() const override { return arg_count - 1; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_contains: public Item_bool_func +{ +protected: + String tmp_js; + json_path_with_flags path; + String tmp_path; + bool a2_constant, a2_parsed; + String tmp_val, *val; +public: + Item_func_json_contains(THD *thd, List &list): + Item_bool_func(thd, list) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_contains") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_contains_path: public Item_bool_func +{ +protected: + String tmp_js; + json_path_with_flags *paths; + String *tmp_paths; + bool mode_one; + bool ooa_constant, ooa_parsed; + bool *p_found; + +public: + Item_func_json_contains_path(THD *thd, List &list): + Item_bool_func(thd, list), tmp_paths(0) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_contains_path") }; + return name; + } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override; + void cleanup() override; + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_array: public Item_json_func +{ +protected: + String tmp_val; + ulong result_limit; +public: + Item_func_json_array(THD *thd): + Item_json_func(thd) {} + Item_func_json_array(THD *thd, List &list): + Item_json_func(thd, list) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_array") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_array_append: public Item_json_str_multipath +{ +protected: + String tmp_js; + String tmp_val; +public: + Item_func_json_array_append(THD *thd, List &list): + Item_json_str_multipath(thd, list) {} + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + uint get_n_paths() const override { return arg_count/2; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_array_append") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_array_insert: public Item_func_json_array_append +{ +public: + Item_func_json_array_insert(THD *thd, List &list): + Item_func_json_array_append(thd, list) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_array_insert") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_object: public Item_func_json_array +{ +public: + Item_func_json_object(THD *thd): + Item_func_json_array(thd) {} + Item_func_json_object(THD *thd, List &list): + Item_func_json_array(thd, list) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_object") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_merge: public Item_func_json_array +{ +protected: + String tmp_js1, tmp_js2; +public: + Item_func_json_merge(THD *thd, List &list): + Item_func_json_array(thd, list) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_merge_preserve") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_json_merge_patch: public Item_func_json_merge +{ +public: + Item_func_json_merge_patch(THD *thd, List &list): + Item_func_json_merge(thd, list) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_merge_patch") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_normalize: public Item_json_func +{ +public: + Item_func_json_normalize(THD *thd, Item *a): + Item_json_func(thd, a) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_normalize") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_length: public Item_long_func +{ + bool check_arguments() const override + { + const LEX_CSTRING name= func_name_cstring(); + if (arg_count == 0 || arg_count > 2) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + return true; + } + return args[0]->check_type_can_return_text(name) || + (arg_count > 1 && args[1]->check_type_general_purpose_string(name)); + } +protected: + json_path_with_flags path; + String tmp_js; + String tmp_path; +public: + Item_func_json_length(THD *thd, List &list): + Item_long_func(thd, list) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_length") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_depth: public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_text(func_name_cstring()); } +protected: + String tmp_js; +public: + Item_func_json_depth(THD *thd, Item *js): Item_long_func(thd, js) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_depth") }; + return name; + } + bool fix_length_and_dec(THD *thd) override { max_length= 10; return FALSE; } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_type: public Item_str_func +{ +protected: + String tmp_js; +public: + Item_func_json_type(THD *thd, Item *js): Item_str_func(thd, js) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_type") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_insert: public Item_json_str_multipath +{ +protected: + String tmp_js; + String tmp_val; + bool mode_insert, mode_replace; +public: + Item_func_json_insert(bool i_mode, bool r_mode, THD *thd, List &list): + Item_json_str_multipath(thd, list), + mode_insert(i_mode), mode_replace(r_mode) {} + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + uint get_n_paths() const override { return arg_count/2; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING json_set= {STRING_WITH_LEN("json_set") }; + static LEX_CSTRING json_insert= {STRING_WITH_LEN("json_insert") }; + static LEX_CSTRING json_replace= {STRING_WITH_LEN("json_replace") }; + return (mode_insert ? + (mode_replace ? json_set : json_insert) : json_replace); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_remove: public Item_json_str_multipath +{ +protected: + String tmp_js; +public: + Item_func_json_remove(THD *thd, List &list): + Item_json_str_multipath(thd, list) {} + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + uint get_n_paths() const override { return arg_count - 1; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_remove") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_keys: public Item_str_func +{ +protected: + json_path_with_flags path; + String tmp_js, tmp_path; + +public: + Item_func_json_keys(THD *thd, List &list): + Item_str_func(thd, list) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_keys") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_search: public Item_json_str_multipath +{ +protected: + String tmp_js, tmp_path, esc_value; + bool mode_one; + bool ooa_constant, ooa_parsed; + int escape; + int n_path_found; + json_path_t sav_path; + + int compare_json_value_wild(json_engine_t *je, const String *cmp_str); + +public: + Item_func_json_search(THD *thd, List &list): + Item_json_str_multipath(thd, list) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_search") }; + return name; + } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *) override; + uint get_n_paths() const override { return arg_count > 4 ? arg_count - 4 : 0; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_format: public Item_json_func +{ +public: + enum formats + { + NONE, + COMPACT, + LOOSE, + DETAILED + }; +protected: + formats fmt; + String tmp_js; +public: + Item_func_json_format(THD *thd, Item *js, formats format): + Item_json_func(thd, js), fmt(format) {} + Item_func_json_format(THD *thd, List &list): + Item_json_func(thd, list), fmt(DETAILED) {} + + LEX_CSTRING func_name_cstring() const override; + bool fix_length_and_dec(THD *thd) override; + String *val_str(String *str) override; + String *val_json(String *str) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_arrayagg : public Item_func_group_concat +{ +protected: + /* + Overrides Item_func_group_concat::skip_nulls() + NULL-s should be added to the result as JSON null value. + */ + bool skip_nulls() const override { return false; } + String *get_str_from_item(Item *i, String *tmp) override; + String *get_str_from_field(Item *i, Field *f, String *tmp, + const uchar *key, size_t offset) override; + void cut_max_length(String *result, + uint old_length, uint max_length) const override; +public: + String m_tmp_json; /* Used in get_str_from_*.. */ + Item_func_json_arrayagg(THD *thd, Name_resolution_context *context_arg, + bool is_distinct, List *is_select, + const SQL_I_List &is_order, String *is_separator, + bool limit_clause, Item *row_limit, Item *offset_limit): + Item_func_group_concat(thd, context_arg, is_distinct, is_select, is_order, + is_separator, limit_clause, row_limit, offset_limit) + { + } + Item_func_json_arrayagg(THD *thd, Item_func_json_arrayagg *item) : + Item_func_group_concat(thd, item) {} + const Type_handler *type_handler() const override + { + return Type_handler_json_common::json_type_handler_sum(this); + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_arrayagg(") }; + return name; + } + enum Sumfunctype sum_func() const override { return JSON_ARRAYAGG_FUNC; } + + String* val_str(String *str) override; + + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_json_objectagg : public Item_sum +{ + String result; +public: + Item_func_json_objectagg(THD *thd, Item *key, Item *value) : + Item_sum(thd, key, value) + { + quick_group= FALSE; + result.append('{'); + } + + Item_func_json_objectagg(THD *thd, Item_func_json_objectagg *item); + void cleanup() override; + + enum Sumfunctype sum_func () const override { return JSON_OBJECTAGG_FUNC;} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_objectagg") }; + return name; + } + const Type_handler *type_handler() const override + { + return Type_handler_json_common::json_type_handler_sum(this); + } + void clear() override; + bool add() override; + void reset_field() override { DBUG_ASSERT(0); } // not used + void update_field() override { DBUG_ASSERT(0); } // not used + bool fix_fields(THD *,Item **) override; + + double val_real() override { return 0.0; } + longlong val_int() override { return 0; } + my_decimal *val_decimal(my_decimal *decimal_value) override + { + my_decimal_set_zero(decimal_value); + return decimal_value; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return get_date_from_string(thd, ltime, fuzzydate); + } + String* val_str(String* str) override; + Item *copy_or_same(THD* thd) override; + void no_rows_in_result() override {} + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +extern bool is_json_type(const Item *item); + +class Item_func_json_overlaps: public Item_bool_func +{ + String tmp_js; + bool a2_constant, a2_parsed; + String tmp_val, *val; +public: + Item_func_json_overlaps(THD *thd, Item *a, Item *b): + Item_bool_func(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("json_overlaps") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#endif /* ITEM_JSONFUNC_INCLUDED */ diff --git a/sql/item_row.cc b/sql/item_row.cc new file mode 100644 index 00000000..3981392b --- /dev/null +++ b/sql/item_row.cc @@ -0,0 +1,201 @@ +/* + Copyright (c) 2002, 2011, Oracle and/or its affiliates. + Copyright (c) 2011, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // THD, set_var.h: THD +#include "set_var.h" + +void Item_row::illegal_method_call(const char *method) +{ + DBUG_ENTER("Item_row::illegal_method_call"); + DBUG_PRINT("error", ("!!! %s method was called for row item", method)); + DBUG_ASSERT(0); + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + DBUG_VOID_RETURN; +} + +bool Item_row::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + null_value= 0; + base_flags&= ~item_base_t::MAYBE_NULL; + + Item **arg, **arg_end; + for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++) + { + if ((*arg)->fix_fields_if_needed(thd, arg)) + return TRUE; + // we can't assign 'item' before, because fix_fields() can change arg + Item *item= *arg; + used_tables_cache |= item->used_tables(); + const_item_cache&= item->const_item() && !with_null; + not_null_tables_cache|= item->not_null_tables(); + + if (const_item_cache) + { + if (item->cols() > 1) + with_null|= item->null_inside(); + else + { + if (item->is_null()) + with_null|= 1; + } + } + base_flags|= (item->base_flags & item_base_t::MAYBE_NULL); + with_flags|= item->with_flags; + } + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +bool +Item_row::eval_not_null_tables(void *opt_arg) +{ + Item **arg,**arg_end; + not_null_tables_cache= 0; + if (arg_count) + { + for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++) + { + not_null_tables_cache|= (*arg)->not_null_tables(); + } + } + return FALSE; +} + + +bool +Item_row::find_not_null_fields(table_map allowed) +{ + if (~allowed & used_tables()) + return false; + + Item **arg,**arg_end; + if (arg_count) + { + for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++) + { + if (!(*arg)->find_not_null_fields(allowed)) + continue; + } + } + return false; +} + + +void Item_row::cleanup() +{ + DBUG_ENTER("Item_row::cleanup"); + + Item_fixed_hybrid::cleanup(); + /* Reset to the original values */ + used_tables_and_const_cache_init(); + with_null= 0; + + DBUG_VOID_RETURN; +} + + +void Item_row::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) +{ + Item **arg, **arg_end; + for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++) + (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, + flags | SPLIT_SUM_SKIP_REGISTERED); +} + + +void Item_row::fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) +{ + used_tables_and_const_cache_init(); + not_null_tables_cache= 0; + for (uint i= 0; i < arg_count; i++) + { + args[i]->fix_after_pullout(new_parent, &args[i], merge); + used_tables_and_const_cache_join(args[i]); + not_null_tables_cache|= args[i]->not_null_tables(); + } +} + + +bool Item_row::check_cols(uint c) +{ + if (c != arg_count) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), c); + return 1; + } + return 0; +} + +void Item_row::print(String *str, enum_query_type query_type) +{ + str->append('('); + for (uint i= 0; i < arg_count; i++) + { + if (i) + str->append(','); + args[i]->print(str, query_type); + } + str->append(')'); +} + + +Item *Item_row::transform(THD *thd, Item_transformer transformer, uchar *arg) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + + if (transform_args(thd, transformer, arg)) + return 0; + return (this->*transformer)(thd, arg); +} + +void Item_row::bring_value() +{ + for (uint i= 0; i < arg_count; i++) + args[i]->bring_value(); +} + + +Item* Item_row::build_clone(THD *thd) +{ + Item **copy_args= static_cast + (alloc_root(thd->mem_root, sizeof(Item*) * arg_count)); + if (unlikely(!copy_args)) + return 0; + for (uint i= 0; i < arg_count; i++) + { + Item *arg_clone= args[i]->build_clone(thd); + if (!arg_clone) + return 0; + copy_args[i]= arg_clone; + } + Item_row *copy= (Item_row *) get_copy(thd); + if (unlikely(!copy)) + return 0; + copy->args= copy_args; + return copy; +} diff --git a/sql/item_row.h b/sql/item_row.h new file mode 100644 index 00000000..fbf632ba --- /dev/null +++ b/sql/item_row.h @@ -0,0 +1,156 @@ +#ifndef ITEM_ROW_INCLUDED +#define ITEM_ROW_INCLUDED + +/* + Copyright (c) 2002, 2013, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + Row items used for comparing rows and IN operations on rows: + + @verbatim + (a, b, c) > (10, 10, 30) + (a, b, c) = (select c, d, e, from t1 where x=12) + (a, b, c) IN ((1,2,2), (3,4,5), (6,7,8) + (a, b, c) IN (select c, d, e, from t1) + @endverbatim +*/ + + +/** + Item which stores (x,y,...) and ROW(x,y,...). + Note that this can be recursive: ((x,y),(z,t)) is a ROW of ROWs. +*/ +class Item_row: public Item_fixed_hybrid, + private Item_args, + private Used_tables_and_const_cache +{ + table_map not_null_tables_cache; + /** + If elements are made only of constants, of which one or more are + NULL. For example, this item is (1,2,NULL), or ( (1,NULL), (2,3) ). + */ + bool with_null; +public: + Item_row(THD *thd, List &list) + :Item_fixed_hybrid(thd), Item_args(thd, list), + not_null_tables_cache(0), with_null(0) + { } + Item_row(THD *thd, Item_row *row) + :Item_fixed_hybrid(thd), Item_args(thd, static_cast(row)), + Used_tables_and_const_cache(), + not_null_tables_cache(0), with_null(0) + { } + + enum Type type() const override { return ROW_ITEM; }; + const Type_handler *type_handler() const override { return &type_handler_row; } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + return NULL; // Check with Vicentiu why it's called for Item_row + } + void illegal_method_call(const char *); + bool is_null() override { return null_value; } + void make_send_field(THD *thd, Send_field *) override + { + illegal_method_call((const char*)"make_send_field"); + }; + double val_real() override + { + illegal_method_call((const char*)"val"); + return 0; + }; + longlong val_int() override + { + illegal_method_call((const char*)"val_int"); + return 0; + }; + String *val_str(String *) override + { + illegal_method_call((const char*)"val_str"); + return 0; + }; + my_decimal *val_decimal(my_decimal *) override + { + illegal_method_call((const char*)"val_decimal"); + return 0; + }; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + illegal_method_call((const char*)"get_date"); + return true; + } + bool fix_fields(THD *thd, Item **ref) override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) + override; + void cleanup() override; + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) override; + table_map used_tables() const override { return used_tables_cache; }; + bool const_item() const override { return const_item_cache; }; + void update_used_tables() override + { + used_tables_and_const_cache_init(); + used_tables_and_const_cache_update_and_join(arg_count, args); + } + table_map not_null_tables() const override { return not_null_tables_cache; } + void print(String *str, enum_query_type query_type) override; + + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + if (walk_args(processor, walk_subquery, arg)) + return true; + return (this->*processor)(arg); + } + Item *transform(THD *thd, Item_transformer transformer, uchar *arg) override; + bool eval_not_null_tables(void *opt_arg) override; + bool find_not_null_fields(table_map allowed) override; + + uint cols() const override { return arg_count; } + Item* element_index(uint i) override { return args[i]; } + Item** addr(uint i) override { return args + i; } + bool check_cols(uint c) override; + bool null_inside() override { return with_null; }; + void bring_value() override; + + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { + Item_args::propagate_equal_fields(thd, Context_identity(), cond); + return this; + } + + bool excl_dep_on_table(table_map tab_map) override + { + return Item_args::excl_dep_on_table(tab_map); + } + + bool excl_dep_on_grouping_fields(st_select_lex *sel) override + { + return Item_args::excl_dep_on_grouping_fields(sel); + } + + bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) override + { + return Item_args::excl_dep_on_in_subq_left_part(subq_pred); + } + + bool check_vcol_func_processor(void *arg) override {return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *build_clone(THD *thd) override; +}; + +#endif /* ITEM_ROW_INCLUDED */ diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc new file mode 100644 index 00000000..4bbf36ec --- /dev/null +++ b/sql/item_strfunc.cc @@ -0,0 +1,5974 @@ +/* + Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/** + @file + + @brief + This file defines all string functions + + @warning + Some string functions don't always put and end-null on a String. + (This shouldn't be needed) +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" // HAVE_* + +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" +#include "sql_base.h" +#include "sql_time.h" +#include "des_key_file.h" // st_des_keyschedule, st_des_keyblock +#include "password.h" // my_make_scrambled_password, + // my_make_scrambled_password_323 +#include +#include +C_MODE_START +#include "../mysys/my_static.h" // For soundex_map +C_MODE_END +#include "sql_show.h" // append_identifier +#include +#include "sql_statistics.h" + +/* fmtlib include (https://fmt.dev/). */ +#define FMT_STATIC_THOUSANDS_SEPARATOR ',' +#define FMT_HEADER_ONLY 1 +#include "fmt/format-inl.h" + +size_t username_char_length= USERNAME_CHAR_LENGTH; + +/* + Calculate max length of string from length argument to LEFT and RIGHT +*/ + +static uint32 max_length_for_string(Item *item) +{ + ulonglong length= item->val_int(); + /* Note that if value is NULL, val_int() returned 0 */ + if (length > (ulonglong) INT_MAX32) + { + /* Limit string length to maxium string length in MariaDB (2G) */ + length= item->unsigned_flag ? (ulonglong) INT_MAX32 : 0; + } + return (uint32) length; +} + + +/* + For the Items which have only val_str_ascii() method + and don't have their own "native" val_str(), + we provide a "wrapper" method to convert from ASCII + to Item character set when it's necessary. + Conversion happens only in case of "tricky" Item character set (e.g. UCS2). + Normally conversion does not happen, and val_str_ascii() is immediately + returned instead. + + No matter if conversion is needed or not needed, + the result is always returned in "str" (see MDEV-10306 why). + + @param [OUT] str - Store the result here + @param [IN] ascii_buffer - Use this temporary buffer to call val_str_ascii() +*/ +String *Item_func::val_str_from_val_str_ascii(String *str, String *ascii_buffer) +{ + DBUG_ASSERT(fixed()); + + if (!(collation.collation->state & MY_CS_NONASCII)) + { + String *res= val_str_ascii(str); + if (res) + res->set_charset(collation.collation); + return res; + } + + DBUG_ASSERT(str != ascii_buffer); + + uint errors; + String *res= val_str_ascii(ascii_buffer); + if (!res) + return 0; + + if ((null_value= str->copy(res->ptr(), res->length(), + &my_charset_latin1, collation.collation, + &errors))) + return 0; + + return str; +} + + +bool Item_str_func::fix_fields(THD *thd, Item **ref) +{ + bool res= Item_func::fix_fields(thd, ref); + /* + In Item_str_func::check_well_formed_result() we may set null_value + flag on the same condition as in test() below. + */ + if (thd->is_strict_mode()) + set_maybe_null(); + return res; +} + + +my_decimal *Item_str_func::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + StringBuffer<64> tmp; + String *res= val_str(&tmp); + return res ? decimal_from_string_with_check(decimal_value, res) : 0; +} + + +double Item_str_func::val_real() +{ + DBUG_ASSERT(fixed()); + StringBuffer<64> tmp; + String *res= val_str(&tmp); + return res ? double_from_string_with_check(res) : 0.0; +} + + +longlong Item_str_func::val_int() +{ + DBUG_ASSERT(fixed()); + StringBuffer<22> tmp; + String *res= val_str(&tmp); + return res ? longlong_from_string_with_check(res) : 0; +} + + +String *Item_func_md5::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + String * sptr= args[0]->val_str(str); + if (sptr) + { + uchar digest[16]; + + null_value=0; + compute_md5_hash(digest, (const char *) sptr->ptr(), sptr->length()); + if (str->alloc(32)) // Ensure that memory is free + { + null_value=1; + return 0; + } + array_to_hex((char *) str->ptr(), digest, 16); + str->set_charset(&my_charset_numeric); + str->length((uint) 32); + return str; + } + null_value=1; + return 0; +} + + +String *Item_func_sha::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + String * sptr= args[0]->val_str(str); + if (sptr) /* If we got value different from NULL */ + { + /* Temporary buffer to store 160bit digest */ + uint8 digest[MY_SHA1_HASH_SIZE]; + my_sha1(digest, (const char *) sptr->ptr(), sptr->length()); + /* Ensure that memory is free and we got result */ + if (!str->alloc(MY_SHA1_HASH_SIZE*2)) + { + array_to_hex((char *) str->ptr(), digest, MY_SHA1_HASH_SIZE); + str->set_charset(&my_charset_numeric); + str->length((uint) MY_SHA1_HASH_SIZE*2); + null_value=0; + return str; + } + } + null_value=1; + return 0; +} + +bool Item_func_sha::fix_length_and_dec(THD *thd) +{ + // size of hex representation of hash + fix_length_and_charset(MY_SHA1_HASH_SIZE * 2, default_charset()); + return FALSE; +} + +String *Item_func_sha2::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + unsigned char digest_buf[512/8]; // enough for SHA512 + String *input_string; + const char *input_ptr; + size_t input_len; + + input_string= args[0]->val_str(str); + str->set_charset(&my_charset_bin); + + if (input_string == NULL) + { + null_value= TRUE; + return (String *) NULL; + } + + null_value= args[0]->null_value; + if (null_value) + return (String *) NULL; + + input_ptr= input_string->ptr(); + input_len= input_string->length(); + + longlong digest_length= args[1]->val_int(); + switch (digest_length) { + case 512: + my_sha512(digest_buf, input_ptr, input_len); + break; + case 384: + my_sha384(digest_buf, input_ptr, input_len); + break; + case 224: + my_sha224(digest_buf, input_ptr, input_len); + break; + case 0: // SHA-256 is the default + digest_length= 256; + /* fall through */ + case 256: + my_sha256(digest_buf, input_ptr, input_len); + break; + default: + if (!args[1]->const_item()) + { + THD *thd= current_thd; + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_PARAMETERS_TO_NATIVE_FCT, + ER_THD(thd, ER_WRONG_PARAMETERS_TO_NATIVE_FCT), + "sha2"); + } + null_value= TRUE; + return NULL; + } + digest_length/= 8; /* bits to bytes */ + + /* + Since we're subverting the usual String methods, we must make sure that + the destination has space for the bytes we're about to write. + */ + str->alloc((uint) digest_length*2 + 1); /* Each byte as two nybbles */ + + /* Convert the large number to a string-hex representation. */ + array_to_hex((char *) str->ptr(), digest_buf, (uint)digest_length); + + /* We poked raw bytes in. We must inform the the String of its length. */ + str->length((uint) digest_length*2); /* Each byte as two nybbles */ + + null_value= FALSE; + return str; +} + + +bool Item_func_sha2::fix_length_and_dec(THD *thd) +{ + set_maybe_null(); + max_length = 0; + + int sha_variant= (int)(args[1]->const_item() ? args[1]->val_int() : 512); + + switch (sha_variant) { + case 0: // SHA-256 is the default + sha_variant= 256; + /* fall through */ + case 512: + case 384: + case 256: + case 224: + fix_length_and_charset(sha_variant/8 * 2, default_charset()); + break; + default: + THD *thd= current_thd; + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_PARAMETERS_TO_NATIVE_FCT, + ER_THD(thd, ER_WRONG_PARAMETERS_TO_NATIVE_FCT), + "sha2"); + } + return FALSE; +} + +/* Implementation of AES encryption routines */ +void Item_aes_crypt::create_key(String *user_key, uchar *real_key) +{ + uchar *real_key_end= real_key + AES_KEY_LENGTH / 8; + uchar *ptr; + const char *sptr= user_key->ptr(); + const char *key_end= sptr + user_key->length(); + + bzero(real_key, AES_KEY_LENGTH / 8); + + for (ptr= real_key; sptr < key_end; ptr++, sptr++) + { + if (ptr == real_key_end) + ptr= real_key; + *ptr ^= (uchar) *sptr; + } +} + + +String *Item_aes_crypt::val_str(String *str2) +{ + DBUG_ASSERT(fixed()); + StringBuffer<80> user_key_buf; + String *sptr= args[0]->val_str(&tmp_value); + String *user_key= args[1]->val_str(&user_key_buf); + uint32 aes_length; + + if (sptr && user_key) // we need both arguments to be not NULL + { + null_value=0; + aes_length=my_aes_get_size(MY_AES_ECB, sptr->length()); + + if (!str2->alloc(aes_length)) // Ensure that memory is free + { + uchar rkey[AES_KEY_LENGTH / 8]; + create_key(user_key, rkey); + + if (!my_aes_crypt(MY_AES_ECB, what, (uchar*)sptr->ptr(), sptr->length(), + (uchar*)str2->ptr(), &aes_length, + rkey, AES_KEY_LENGTH / 8, 0, 0)) + { + str2->length((uint) aes_length); + DBUG_ASSERT(collation.collation == &my_charset_bin); + str2->set_charset(&my_charset_bin); + return str2; + } + } + } + null_value=1; + return 0; +} + +bool Item_func_aes_encrypt::fix_length_and_dec(THD *thd) +{ + max_length=my_aes_get_size(MY_AES_ECB, args[0]->max_length); + what= ENCRYPTION_FLAG_ENCRYPT; + return FALSE; +} + + + +bool Item_func_aes_decrypt::fix_length_and_dec(THD *thd) +{ + max_length=args[0]->max_length; + set_maybe_null(); + what= ENCRYPTION_FLAG_DECRYPT; + return FALSE; +} + + +bool Item_func_to_base64::fix_length_and_dec(THD *thd) +{ + base_flags|= args[0]->base_flags & item_base_t::MAYBE_NULL; + collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + if (args[0]->max_length > (uint) my_base64_encode_max_arg_length()) + { + set_maybe_null(); + fix_char_length_ulonglong((ulonglong) my_base64_encode_max_arg_length()); + } + else + { + int length= my_base64_needed_encoded_length((int) args[0]->max_length); + DBUG_ASSERT(length > 0); + fix_char_length_ulonglong((ulonglong) length - 1); + } + return FALSE; +} + + +String *Item_func_to_base64::val_str_ascii(String *str) +{ + String *res= args[0]->val_str(&tmp_value); + bool too_long= false; + int length; + if (!res || + res->length() > (uint) my_base64_encode_max_arg_length() || + (too_long= + ((uint) (length= my_base64_needed_encoded_length((int) res->length())) > + current_thd->variables.max_allowed_packet)) || + str->alloc((uint) length)) + { + null_value= 1; // NULL input, too long input, or OOM. + if (too_long) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + } + return 0; + } + my_base64_encode(res->ptr(), (int) res->length(), (char*) str->ptr()); + DBUG_ASSERT(length > 0); + str->length((uint) length - 1); // Without trailing '\0' + null_value= 0; + return str; +} + + +bool Item_func_from_base64::fix_length_and_dec(THD *thd) +{ + if (args[0]->max_length > (uint) my_base64_decode_max_arg_length()) + { + fix_char_length_ulonglong((ulonglong) my_base64_decode_max_arg_length()); + } + else + { + int length= my_base64_needed_decoded_length((int) args[0]->max_length); + fix_char_length_ulonglong((ulonglong) length); + } + // Can be NULL, e.g. in case of badly formed input string + set_maybe_null(); + return FALSE; +} + + +String *Item_func_from_base64::val_str(String *str) +{ + String *res= args[0]->val_str_ascii(&tmp_value); + int length; + const char *end_ptr; + + if (!res) + goto err; + + if (res->length() > (uint) my_base64_decode_max_arg_length() || + ((uint) (length= my_base64_needed_decoded_length((int) res->length())) > + current_thd->variables.max_allowed_packet)) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + goto err; + } + + if (str->alloc((uint) length)) + goto err; + + if ((length= my_base64_decode(res->ptr(), (int) res->length(), + (char *) str->ptr(), &end_ptr, 0)) < 0 || + end_ptr < res->ptr() + res->length()) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BAD_BASE64_DATA, ER_THD(thd, ER_BAD_BASE64_DATA), + (int) (end_ptr - res->ptr())); + goto err; + } + + str->length((uint) length); + null_value= 0; + return str; +err: + null_value= 1; // NULL input, too long input, OOM, or badly formed input + return 0; +} +/////////////////////////////////////////////////////////////////////////////// + + +const char *histogram_types[] = + {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON_HB", 0}; +static TYPELIB histogram_types_typelib= + { array_elements(histogram_types), + "histogram_types", + histogram_types, NULL}; +const char *representation_by_type[]= {"%.3f", "%.5f"}; + +String *Item_func_decode_histogram::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff[STRING_BUFFER_USUAL_SIZE]; + String *res, tmp(buff, sizeof(buff), &my_charset_bin); + int type; + + tmp.length(0); + if (!(res= args[0]->val_str(&tmp)) || + (type= find_type(res->c_ptr_safe(), + &histogram_types_typelib, MYF(0))) <= 0) + { + null_value= 1; + return 0; + } + type--; + + tmp.length(0); + if (!(res= args[1]->val_str(&tmp))) + { + null_value= 1; + return 0; + } + + if (type == JSON_HB) + { + // It's a JSON histogram. Return it as-is. + null_value= 0; + return res; + } + + if (type == DOUBLE_PREC_HB && res->length() % 2 != 0) + res->length(res->length() - 1); // one byte is unused + + double prev= 0.0; + uint i; + str->length(0); + char numbuf[32]; + const uchar *p= (uchar*)res->c_ptr_safe(); + for (i= 0; i < res->length(); i++) + { + double val; + switch (type) + { + case SINGLE_PREC_HB: + val= p[i] / ((double)((1 << 8) - 1)); + break; + case DOUBLE_PREC_HB: + val= uint2korr(p + i) / ((double)((1 << 16) - 1)); + i++; + break; + default: + val= 0; + DBUG_ASSERT(0); + } + /* show delta with previous value */ + size_t size= my_snprintf(numbuf, sizeof(numbuf), + representation_by_type[type], val - prev); + str->append(numbuf, size); + str->append(','); + prev= val; + } + /* show delta with max */ + size_t size= my_snprintf(numbuf, sizeof(numbuf), + representation_by_type[type], 1.0 - prev); + str->append(numbuf, size); + + null_value=0; + return str; +} + + +/////////////////////////////////////////////////////////////////////////////// + +/* + Realloc the result buffer. + NOTE: We should be prudent in the initial allocation unit -- the + size of the arguments is a function of data distribution, which + can be any. Instead of overcommitting at the first row, we grow + the allocated amount by the factor of 2. This ensures that no + more than 25% of memory will be overcommitted on average. + + @param IN/OUT str - the result string + @param IN length - new total space required in "str" + @retval false - on success + @retval true - on error +*/ + +bool Item_func_concat::realloc_result(String *str, uint length) const +{ + if (str->alloced_length() >= length) + return false; // Alloced space is big enough, nothing to do. + + if (str->alloced_length() == 0) + return str->alloc(length); + + /* + Item_func_concat::val_str() makes sure the result length does not grow + higher than max_allowed_packet. So "length" is limited to 1G here. + We can't say anything about the current value of str->alloced_length(), + as str was initially set by args[0]->val_str(str). + So multiplication by 2 can overflow, if args[0] for some reasons + did not limit the result to max_alloced_packet. But it's not harmful, + "str" will be reallocated exactly to "length" bytes in case of overflow. + */ + uint new_length= MY_MAX(str->alloced_length() * 2, length); + return str->realloc(new_length); +} + + +/** + Concatenate args with the following premises: + If only one arg (which is ok), return value of arg; +*/ + +String *Item_func_concat::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + String *res; + + null_value=0; + if (!(res= args[0]->val_str(str))) + goto null; + + if (res != str) + str->copy_or_move(res->ptr(), res->length(), res->charset()); + + for (uint i= 1 ; i < arg_count ; i++) + { + if (!(res= args[i]->val_str(&tmp_value)) || + append_value(thd, str, res)) + goto null; + } + + str->set_charset(collation.collation); + return str; + +null: + null_value= true; + return 0; +} + + +String *Item_func_concat_operator_oracle::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + String *res= NULL; + uint i; + + null_value=0; + // Search first non null argument + for (i= 0; i < arg_count; i++) + { + if ((res= args[i]->val_str(str))) + break; + } + if (!res) + goto null; + + if (res != str) + str->copy(res->ptr(), res->length(), res->charset()); + + for (i++ ; i < arg_count ; i++) + { + if (!(res= args[i]->val_str(&tmp_value)) || res->length() == 0) + continue; + if (append_value(thd, str, res)) + goto null; + } + + str->set_charset(collation.collation); + return str; + +null: + null_value= true; + return 0; +} + + +bool Item_func_concat::append_value(THD *thd, String *res, const String *app) +{ + uint concat_len; + if ((concat_len= res->length() + app->length()) > + thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(), + thd->variables.max_allowed_packet); + return true; + } + DBUG_ASSERT(!res->uses_buffer_owned_by(app)); + DBUG_ASSERT(!app->uses_buffer_owned_by(res)); + return realloc_result(res, concat_len) || res->append(*app); +} + + +bool Item_func_concat::fix_length_and_dec(THD *thd) +{ + ulonglong char_length= 0; + + if (agg_arg_charsets_for_string_result(collation, args, arg_count)) + return TRUE; + + for (uint i=0 ; i < arg_count ; i++) + char_length+= args[i]->max_char_length(); + + fix_char_length_ulonglong(char_length); + return FALSE; +} + +/** + @details + Function des_encrypt() by tonu@spam.ee & monty + Works only if compiled with OpenSSL library support. + @return + A binary string where first character is CHAR(128 | key-number). + If one uses a string key key_number is 127. + Encryption result is longer than original by formula: + @code new_length= org_length + (8-(org_length % 8))+1 @endcode +*/ +bool Item_func_des_encrypt::fix_length_and_dec(THD *thd) +{ + set_maybe_null(); + /* 9 = MAX ((8- (arg_len % 8)) + 1) */ + max_length = args[0]->max_length + 9; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT), + func_name_cstring().str); + return FALSE; +} + + +String *Item_func_des_encrypt::val_str(String *str) +{ + DBUG_ASSERT(fixed()); +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) + uint code= ER_WRONG_PARAMETERS_TO_PROCEDURE; + DES_cblock ivec; + struct st_des_keyblock keyblock; + struct st_des_keyschedule keyschedule; + const char *append_str="********"; + uint key_number, res_length, tail; + String *res= args[0]->val_str(&tmp_value); + + if ((null_value= args[0]->null_value)) + return 0; // ENCRYPT(NULL) == NULL + if ((res_length=res->length()) == 0) + return make_empty_result(str); + if (arg_count == 1) + { + /* Protect against someone doing FLUSH DES_KEY_FILE */ + mysql_mutex_lock(&LOCK_des_key_file); + keyschedule= des_keyschedule[key_number=des_default_key]; + mysql_mutex_unlock(&LOCK_des_key_file); + } + else if (args[1]->result_type() == INT_RESULT) + { + key_number= (uint) args[1]->val_int(); + if (key_number > 9) + goto error; + mysql_mutex_lock(&LOCK_des_key_file); + keyschedule= des_keyschedule[key_number]; + mysql_mutex_unlock(&LOCK_des_key_file); + } + else + { + String *keystr= args[1]->val_str(str); + if (!keystr) + goto error; + key_number=127; // User key string + + /* We make good 24-byte (168 bit) key from given plaintext key with MD5 */ + bzero((char*) &ivec,sizeof(ivec)); + if (!EVP_BytesToKey(EVP_des_ede3_cbc(),EVP_md5(),NULL, + (uchar*) keystr->ptr(), (int) keystr->length(), + 1, (uchar*) &keyblock,ivec)) + goto error; + DES_set_key_unchecked(&keyblock.key1,&keyschedule.ks1); + DES_set_key_unchecked(&keyblock.key2,&keyschedule.ks2); + DES_set_key_unchecked(&keyblock.key3,&keyschedule.ks3); + } + + /* + The problem: DES algorithm requires original data to be in 8-bytes + chunks. Missing bytes get filled with '*'s and result of encryption + can be up to 8 bytes longer than original string. When decrypted, + we do not know the size of original string :( + We add one byte with value 0x1..0x8 as the last byte of the padded + string marking change of string length. + */ + + tail= 8 - (res_length % 8); // 1..8 marking extra length + res_length+=tail; + if (tmp_arg.alloc(res_length)) + goto error; + tmp_arg.length(0); + tmp_arg.append(res->ptr(), res->length()); + code= ER_OUT_OF_RESOURCES; + if (tmp_arg.append(append_str, tail) || str->alloc(res_length+1)) + goto error; + tmp_arg[res_length-1]=tail; // save extra length + str->length(res_length+1); + str->set_charset(&my_charset_bin); + (*str)[0]=(char) (128 | key_number); + // Real encryption + bzero((char*) &ivec,sizeof(ivec)); + DES_ede3_cbc_encrypt((const uchar*) (tmp_arg.ptr()), + (uchar*) (str->ptr()+1), + res_length, + &keyschedule.ks1, + &keyschedule.ks2, + &keyschedule.ks3, + &ivec, TRUE); + return str; + +error: + THD *thd= current_thd; + push_warning_printf(thd,Sql_condition::WARN_LEVEL_WARN, + code, ER_THD(thd, code), + "des_encrypt"); +#else + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_FEATURE_DISABLED, ER_THD(thd, ER_FEATURE_DISABLED), + "des_encrypt", "--with-ssl"); +#endif /* defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) */ + null_value=1; + return 0; +} + + +bool Item_func_des_decrypt::fix_length_and_dec(THD *thd) +{ + set_maybe_null(); + /* 9 = MAX ((8- (arg_len % 8)) + 1) */ + max_length= args[0]->max_length; + if (max_length >= 9U) + max_length-= 9U; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT), + func_name_cstring().str); + return FALSE; +} + + +String *Item_func_des_decrypt::val_str(String *str) +{ + DBUG_ASSERT(fixed()); +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) + uint code= ER_WRONG_PARAMETERS_TO_PROCEDURE; + DES_cblock ivec; + struct st_des_keyblock keyblock; + struct st_des_keyschedule keyschedule; + String *res= args[0]->val_str(&tmp_value); + uint length,tail; + + if ((null_value= args[0]->null_value)) + return 0; + length= res->length(); + if (length < 9 || (length % 8) != 1 || !((*res)[0] & 128)) + return res; // Skip decryption if not encrypted + + if (arg_count == 1) // If automatic uncompression + { + uint key_number=(uint) (*res)[0] & 127; + // Check if automatic key and that we have privilege to uncompress using it + if (!(current_thd->security_ctx->master_access & PRIV_DES_DECRYPT_ONE_ARG) || + key_number > 9) + goto error; + + mysql_mutex_lock(&LOCK_des_key_file); + keyschedule= des_keyschedule[key_number]; + mysql_mutex_unlock(&LOCK_des_key_file); + } + else + { + // We make good 24-byte (168 bit) key from given plaintext key with MD5 + String *keystr= args[1]->val_str(str); + if (!keystr) + goto error; + + bzero((char*) &ivec,sizeof(ivec)); + if (!EVP_BytesToKey(EVP_des_ede3_cbc(),EVP_md5(),NULL, + (uchar*) keystr->ptr(),(int) keystr->length(), + 1,(uchar*) &keyblock,ivec)) + goto error; + // Here we set all 64-bit keys (56 effective) one by one + DES_set_key_unchecked(&keyblock.key1,&keyschedule.ks1); + DES_set_key_unchecked(&keyblock.key2,&keyschedule.ks2); + DES_set_key_unchecked(&keyblock.key3,&keyschedule.ks3); + } + code= ER_OUT_OF_RESOURCES; + if (str->alloc(length-1)) + goto error; + + bzero((char*) &ivec,sizeof(ivec)); + DES_ede3_cbc_encrypt((const uchar*) res->ptr()+1, + (uchar*) (str->ptr()), + length-1, + &keyschedule.ks1, + &keyschedule.ks2, + &keyschedule.ks3, + &ivec, FALSE); + /* Restore old length of key */ + if ((tail=(uint) (uchar) (*str)[length-2]) > 8) + goto wrong_key; // Wrong key + str->length(length-1-tail); + str->set_charset(&my_charset_bin); + return str; + +error: + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + code, ER_THD(thd, code), + "des_decrypt"); + } +wrong_key: +#else + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_FEATURE_DISABLED, ER_THD(thd, ER_FEATURE_DISABLED), + "des_decrypt", "--with-ssl"); + } +#endif /* defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) */ + null_value=1; + return 0; +} + + +/** + concat with separator. First arg is the separator + concat_ws takes at least two arguments. +*/ + +String *Item_func_concat_ws::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char tmp_str_buff[10]; + String tmp_sep_str(tmp_str_buff, sizeof(tmp_str_buff),default_charset_info), + *sep_str, *res, *res2,*use_as_buff; + uint i; + bool is_const= 0; + THD *thd= 0; + + null_value=0; + if (!(sep_str= args[0]->val_str(&tmp_sep_str))) + goto null; + + use_as_buff= &tmp_value; + str->length(0); // QQ; Should be removed + res=str; // If 0 arg_count + + // Skip until non-null argument is found. + // If not, return the empty string + for (i=1; i < arg_count; i++) + if ((res= args[i]->val_str(str))) + { + is_const= args[i]->const_item(); + break; + } + + if (i == arg_count) + return make_empty_result(str); + + for (i++; i < arg_count ; i++) + { + if (!(res2= args[i]->val_str(use_as_buff))) + continue; // Skip NULL + + if (!thd) + thd= current_thd; + if (res->length() + sep_str->length() + res2->length() > + thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + goto null; + } + if (!is_const && res->alloced_length() >= + res->length() + sep_str->length() + res2->length()) + { // Use old buffer + res->append(*sep_str); // res->length() > 0 always + res->append(*res2); + } + else if (str->alloced_length() >= + res->length() + sep_str->length() + res2->length()) + { + /* We have room in str; We can't get any errors here */ + if (str->ptr() == res2->ptr()) + { // This is quite uncommon! + str->replace(0,0,*sep_str); + str->replace(0,0,*res); + } + else + { + str->copy(*res); + str->append(*sep_str); + str->append(*res2); + } + res=str; + use_as_buff= &tmp_value; + } + else if (res == &tmp_value) + { + if (res->append(*sep_str) || res->append(*res2)) + goto null; // Must be a blob + } + else if (res2 == &tmp_value) + { // This can happen only 1 time + if (tmp_value.replace(0,0,*sep_str) || tmp_value.replace(0,0,*res)) + goto null; + res= &tmp_value; + use_as_buff=str; // Put next arg here + } + else if (tmp_value.is_alloced() && res2->ptr() >= tmp_value.ptr() && + res2->ptr() < tmp_value.ptr() + tmp_value.alloced_length()) + { + /* + This happens really seldom: + In this case res2 is sub string of tmp_value. We will + now work in place in tmp_value to set it to res | sep_str | res2 + */ + /* Chop the last characters in tmp_value that isn't in res2 */ + tmp_value.length((uint32) (res2->ptr() - tmp_value.ptr()) + + res2->length()); + /* Place res2 at start of tmp_value, remove chars before res2 */ + if (tmp_value.replace(0,(uint32) (res2->ptr() - tmp_value.ptr()), + *res) || + tmp_value.replace(res->length(),0, *sep_str)) + goto null; + res= &tmp_value; + use_as_buff=str; // Put next arg here + } + else + { // Two big const strings + /* + NOTE: We should be prudent in the initial allocation unit -- the + size of the arguments is a function of data distribution, which can + be any. Instead of overcommitting at the first row, we grow the + allocated amount by the factor of 2. This ensures that no more than + 25% of memory will be overcommitted on average. + */ + + uint concat_len= res->length() + sep_str->length() + res2->length(); + + if (tmp_value.alloced_length() < concat_len) + { + if (tmp_value.alloced_length() == 0) + { + if (tmp_value.alloc(concat_len)) + goto null; + } + else + { + uint new_len = MY_MAX(tmp_value.alloced_length() * 2, concat_len); + + if (tmp_value.alloc(new_len)) + goto null; + } + } + + if (tmp_value.copy(*res) || + tmp_value.append(*sep_str) || + tmp_value.append(*res2)) + goto null; + res= &tmp_value; + use_as_buff=str; + } + } + res->set_charset(collation.collation); + return res; + +null: + null_value=1; + return 0; +} + + +bool Item_func_concat_ws::fix_length_and_dec(THD *thd) +{ + ulonglong char_length; + + if (agg_arg_charsets_for_string_result(collation, args, arg_count)) + return TRUE; + + /* + arg_count cannot be less than 2, + it is done on parser level in sql_yacc.yy + so, (arg_count - 2) is safe here. + */ + char_length= (ulonglong) args[0]->max_char_length() * (arg_count - 2); + for (uint i=1 ; i < arg_count ; i++) + char_length+= args[i]->max_char_length(); + + fix_char_length_ulonglong(char_length); + return FALSE; +} + + +String *Item_func_reverse::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&tmp_value); + const char *ptr, *end; + char *tmp; + + if ((null_value=args[0]->null_value)) + return 0; + /* An empty string is a special case as the string pointer may be null */ + if (!res->length()) + return make_empty_result(str); + if (str->alloc(res->length())) + { + null_value= 1; + return 0; + } + str->length(res->length()); + str->set_charset(res->charset()); + ptr= res->ptr(); + end= res->end(); + tmp= (char *) str->end(); +#ifdef USE_MB + if (res->use_mb()) + { + uint32 l; + while (ptr < end) + { + if ((l= my_ismbchar(res->charset(),ptr,end))) + { + tmp-= l; + DBUG_ASSERT(tmp >= str->ptr()); + memcpy(tmp,ptr,l); + ptr+= l; + } + else + *--tmp= *ptr++; + } + } + else +#endif /* USE_MB */ + { + while (ptr < end) + *--tmp= *ptr++; + } + return str; +} + + +bool Item_func_reverse::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + fix_char_length(args[0]->max_char_length()); + return FALSE; +} + +/** + Replace all occurrences of string2 in string1 with string3. + + Don't reallocate val_str() if not needed. + + @todo + Fix that this works with binary strings when using USE_MB +*/ + +String *Item_func_replace::val_str_internal(String *str, + String *empty_string_for_null) +{ + DBUG_ASSERT(fixed()); + String *res,*res2,*res3; + int offset; + uint from_length,to_length; + bool alloced=0; +#ifdef USE_MB + const char *ptr,*end,*strend,*search,*search_end; + uint32 l; + bool binary_cmp; +#endif + THD *thd= 0; + + null_value=0; + res=args[0]->val_str(str); + if (args[0]->null_value) + goto null; + res2=args[1]->val_str(&tmp_value); + if (args[1]->null_value) + { + if (!empty_string_for_null) + goto null; + res2= empty_string_for_null; + } + res->set_charset(collation.collation); + +#ifdef USE_MB + binary_cmp = ((res->charset()->state & MY_CS_BINSORT) || !res->use_mb()); +#endif + + if (res2->length() == 0) + return res; +#ifndef USE_MB + if ((offset=res->strstr(*res2)) < 0) + return res; +#else + offset=0; + if (binary_cmp && (offset=res->strstr(*res2)) < 0) + return res; +#endif + if (!(res3=args[2]->val_str(&tmp_value2))) + { + if (!empty_string_for_null) + goto null; + res3= empty_string_for_null; + } + from_length= res2->length(); + to_length= res3->length(); + +#ifdef USE_MB + if (!binary_cmp) + { + search=res2->ptr(); + search_end=search+from_length; +redo: + DBUG_ASSERT(res->ptr() || !offset); + ptr=res->ptr()+offset; + strend=res->ptr()+res->length(); + /* + In some cases val_str() can return empty string + with ptr() == NULL and length() == 0. + Let's check strend to avoid overflow. + */ + end= strend ? strend - from_length + 1 : NULL; + while (ptr < end) + { + if (*ptr == *search) + { + char *i,*j; + i=(char*) ptr+1; j=(char*) search+1; + while (j != search_end) + if (*i++ != *j++) goto skip; + offset= (int) (ptr-res->ptr()); + + if (!thd) + thd= current_thd; + + if (res->length()-from_length + to_length > + thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + + goto null; + } + if (!alloced) + { + alloced=1; + res=copy_if_not_alloced(str,res,res->length()+to_length); + } + res->replace((uint) offset,from_length,*res3); + offset+=(int) to_length; + goto redo; + } + skip: + if ((l=my_ismbchar(res->charset(), ptr,strend))) ptr+=l; + else ++ptr; + } + } + else +#endif /* USE_MB */ + { + thd= current_thd; + do + { + if (res->length()-from_length + to_length > + thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + goto null; + } + if (!alloced) + { + alloced=1; + res=copy_if_not_alloced(str,res,res->length()+to_length); + } + res->replace((uint) offset,from_length,*res3); + offset+=(int) to_length; + } + while ((offset=res->strstr(*res2,(uint) offset)) >= 0); + } + if (empty_string_for_null && !res->length()) + goto null; + + return res; + +null: + null_value=1; + return 0; +} + + +bool Item_func_replace::fix_length_and_dec(THD *thd) +{ + ulonglong char_length= (ulonglong) args[0]->max_char_length(); + int diff=(int) (args[2]->max_char_length() - 1); + if (diff > 0) + { // Calculate of maxreplaces + ulonglong max_substrs= char_length; + char_length+= max_substrs * (uint) diff; + } + + if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 3)) + return TRUE; + fix_char_length_ulonglong(char_length); + return FALSE; +} + +/* + this is done in the constructor to be in the same memroot as + the item itself +*/ +Item_func_sformat::Item_func_sformat(THD *thd, List &list) + : Item_str_func(thd, list) +{ + val_arg= new (thd->mem_root) String[arg_count]; +} + + +bool Item_func_sformat::fix_length_and_dec(THD *thd) +{ + if (!val_arg) + return TRUE; + + ulonglong char_length= 0; + + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_ALLOW_NUMERIC_CONV; + + if (Type_std_attributes::agg_item_collations(collation, func_name_cstring(), + args, arg_count, flags, 1)) + return TRUE; + + DTCollation c= collation; + if (c.collation->mbminlen > 1) + c.collation= &my_charset_utf8mb4_bin; + + for (uint i=0 ; i < arg_count ; i++) + { + if (args[i]->result_type() == STRING_RESULT && + Type_std_attributes::agg_item_set_converter(c, func_name_cstring(), + args+i, 1, flags, 1)) + return TRUE; + } + + char_length= MAX_BLOB_WIDTH; + fix_char_length_ulonglong(char_length); + return FALSE; +} + +/* + allow fmt to take String arguments directly. + Inherit from string_view, so all string formatting works. + but {:p} doesn't, because it's not char*, not a pointer. +*/ +namespace fmt { + template <> struct formatter: formatter { + template + auto format(String c, FormatContext& ctx) -> decltype(ctx.out()) { + string_view name = { c.ptr(), c.length() }; + return formatter::format(name, ctx); + }; + }; +}; + +/* + SFORMAT(format_string, ...) + This function receives a formatting specification string and N parameters + (N >= 0), and it returns string formatted using the rules the user passed + in the specification. It uses fmtlib (https://fmt.dev/). +*/ +String *Item_func_sformat::val_str(String *res) +{ + /* + A union that stores a numeric format arg value. + fmt::detail::make_arg does not accept temporaries, so all of its numeric + args are temporarily stored in the fmt_args array. + See: https://github.com/fmtlib/fmt/issues/3596 + */ + union Format_arg_store { + longlong val_int; + float val_float; + double val_double; + }; + + DBUG_ASSERT(fixed()); + using ctx= fmt::format_context; + String *fmt_arg= NULL; + String *parg= NULL; + fmt::format_args::format_arg *vargs= NULL; + Format_arg_store *fmt_args= NULL; + + null_value= true; + if (!(fmt_arg= args[0]->val_str(res))) + return NULL; + + if (!(vargs= new fmt::format_args::format_arg[arg_count - 1])) + return NULL; + + if (!(fmt_args= new Format_arg_store[arg_count - 1])) + { + delete [] vargs; + return NULL; + } + + /* Creates the array of arguments for vformat */ + for (uint carg= 1; carg < arg_count; carg++) + { + switch (args[carg]->result_type()) + { + case INT_RESULT: + fmt_args[carg-1].val_int= args[carg]->val_int(); + vargs[carg-1]= fmt::detail::make_arg(fmt_args[carg-1].val_int); + break; + case DECIMAL_RESULT: // TODO + case REAL_RESULT: + if (args[carg]->field_type() == MYSQL_TYPE_FLOAT) + { + fmt_args[carg-1].val_float= (float)args[carg]->val_real(); + vargs[carg-1]= fmt::detail::make_arg(fmt_args[carg-1].val_float); + } + else + { + fmt_args[carg-1].val_double= args[carg]->val_real(); + vargs[carg-1]= fmt::detail::make_arg(fmt_args[carg-1].val_double); + } + break; + case STRING_RESULT: + if (!(parg= args[carg]->val_str(&val_arg[carg-1]))) + { + delete [] vargs; + delete [] fmt_args; + return NULL; + } + vargs[carg-1]= fmt::detail::make_arg(*parg); + break; + case TIME_RESULT: // TODO + case ROW_RESULT: // TODO + default: + DBUG_ASSERT(0); + delete [] vargs; + delete [] fmt_args; + return NULL; + } + } + + null_value= false; + /* Create the string output */ + try + { + auto text = fmt::vformat(fmt_arg->c_ptr_safe(), + fmt::format_args(vargs, arg_count-1)); + res->length(0); + res->set_charset(collation.collation); + res->append(text.c_str(), text.size(), fmt_arg->charset()); + } + catch (const fmt::format_error &ex) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_SFORMAT_ERROR, + ER_THD(thd, WARN_SFORMAT_ERROR), ex.what()); + null_value= true; + } + delete [] vargs; + delete [] fmt_args; + return null_value ? NULL : res; +} + +#include"my_global.h" +#include +#include + +bool Item_func_random_bytes::fix_length_and_dec(THD *thd) +{ + used_tables_cache|= RAND_TABLE_BIT; + if (args[0]->can_eval_in_optimize()) + { + int32 v= (int32) args[0]->val_int(); + max_length= MY_MAX(0, MY_MIN(v, MAX_RANDOM_BYTES)); + return false; + } + max_length= MAX_RANDOM_BYTES; + return false; +} + + +void Item_func_random_bytes::update_used_tables() +{ + Item_str_func::update_used_tables(); + used_tables_cache|= RAND_TABLE_BIT; +} + + +String *Item_func_random_bytes::val_str(String *str) +{ + longlong count= args[0]->val_int(); + + if (args[0]->null_value) + goto err; + null_value= 0; + + if (count < 0 || count > MAX_RANDOM_BYTES) + goto err; + + if (count == 0) + return make_empty_result(str); + + if (str->alloc((uint) count)) + goto err; + + str->length(count); + str->set_charset(&my_charset_bin); + if (my_random_bytes((unsigned char *) str->ptr(), (int32) count)) + { + ulong ssl_err; + while ((ssl_err= ERR_get_error())) + { + char buf[256]; + ERR_error_string_n(ssl_err, buf, sizeof(buf)); + sql_print_warning("SSL error: %s", buf); + } + goto err; + } + + return str; + +err: + null_value= 1; + return 0; +} + + +/*********************************************************************/ +bool Item_func_regexp_replace::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 3)) + return TRUE; + max_length= MAX_BLOB_WIDTH; + re.init(collation.collation, 0); + re.fix_owner(this, args[0], args[1]); + return FALSE; +} + + +/* + Traverse through the replacement string and append to "str". + Sub-pattern references \0 .. \9 are recognized, which are replaced + to the chunks of the source string. +*/ +bool Item_func_regexp_replace::append_replacement(String *str, + const LEX_CSTRING *source, + const LEX_CSTRING *replace) +{ + const char *beg= replace->str; + const char *end= beg + replace->length; + CHARSET_INFO *cs= re.library_charset(); + + for ( ; ; ) + { + my_wc_t wc; + int cnv, n; + + if ((cnv= cs->mb_wc(&wc, (const uchar *) beg, + (const uchar *) end)) < 1) + break; /* End of line */ + beg+= cnv; + + if (wc != '\\') + { + if (str->append(beg - cnv, cnv, cs)) + return true; + continue; + } + + if ((cnv= cs->mb_wc(&wc, (const uchar *) beg, + (const uchar *) end)) < 1) + break; /* End of line */ + beg+= cnv; + + if ((n= ((int) wc) - '0') >= 0 && n <= 9) + { + if (n < re.nsubpatterns()) + { + /* A valid sub-pattern reference found */ + size_t pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg; + if (str->append(source->str + pbeg, plength, cs)) + return true; + } + } + else + { + /* + A non-digit character following after '\'. + Just add the character itself. + */ + if (str->append(beg - cnv, cnv, cs)) + return false; + } + } + return false; +} + + +String *Item_func_regexp_replace::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff0[MAX_FIELD_WIDTH]; + char buff2[MAX_FIELD_WIDTH]; + String tmp0(buff0,sizeof(buff0),&my_charset_bin); + String tmp2(buff2,sizeof(buff2),&my_charset_bin); + String *source= args[0]->val_str(&tmp0); + String *replace= args[2]->val_str(&tmp2); + LEX_CSTRING src, rpl; + size_t startoffset= 0; + + if ((null_value= (args[0]->null_value || args[2]->null_value || + re.recompile(args[1])))) + return (String *) 0; + + if (!(source= re.convert_if_needed(source, &re.subject_converter)) || + !(replace= re.convert_if_needed(replace, &re.replace_converter))) + goto err; + + source->get_value(&src); + replace->get_value(&rpl); + + str->length(0); + str->set_charset(collation.collation); + + for ( ; ; ) // Iterate through all matches + { + + if (re.exec(src.str, src.length, startoffset)) + goto err; + + if (!re.match() || re.subpattern_length(0) == 0) + { + /* + No match or an empty match. + Append the rest of the source string + starting from startoffset until the end of the source. + */ + if (str->append(src.str + startoffset, src.length - startoffset, + re.library_charset())) + goto err; + return str; + } + + /* + Append prefix, the part before the matching pattern. + starting from startoffset until the next match + */ + if (str->append(src.str + startoffset, + re.subpattern_start(0) - startoffset, re.library_charset())) + goto err; + + // Append replacement + if (append_replacement(str, &src, &rpl)) + goto err; + + // Set the new start point as the end of previous match + startoffset= re.subpattern_end(0); + } + return str; + +err: + null_value= true; + return (String *) 0; +} + + +bool Item_func_regexp_substr::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 2)) + return TRUE; + fix_char_length(args[0]->max_char_length()); + re.init(collation.collation, 0); + re.fix_owner(this, args[0], args[1]); + return FALSE; +} + + +String *Item_func_regexp_substr::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff0[MAX_FIELD_WIDTH]; + String tmp0(buff0,sizeof(buff0),&my_charset_bin); + String *source= args[0]->val_str(&tmp0); + + if ((null_value= (args[0]->null_value || re.recompile(args[1])))) + return (String *) 0; + + if (!(source= re.convert_if_needed(source, &re.subject_converter))) + goto err; + + str->length(0); + str->set_charset(collation.collation); + + if (re.exec(source->ptr(), source->length(), 0)) + goto err; + + if (!re.match()) + return str; + + if (str->append(source->ptr() + re.subpattern_start(0), + re.subpattern_length(0), re.library_charset())) + goto err; + + return str; + +err: + null_value= true; + return (String *) 0; +} + + +/************************************************************************/ + + +String *Item_func_insert::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res,*res2; + longlong start, length; /* must be longlong to avoid truncation */ + + null_value=0; + res=args[0]->val_str(str); + res2=args[3]->val_str(&tmp_value); + start= args[1]->val_int(); + length= args[2]->val_int(); + + if (args[0]->null_value || args[1]->null_value || args[2]->null_value || + args[3]->null_value) + goto null; /* purecov: inspected */ + + if ((start <= 0) || (start > res->length())) + return res; // Wrong param; skip insert + if ((length < 0) || (length > res->length())) + length= res->length(); + start--; + + /* + There is one exception not handled (intentionally) by the character set + aggregation code. If one string is strong side and is binary, and + another one is weak side and is a multi-byte character string, + then we need to operate on the second string in terms on bytes when + calling ::numchars() and ::charpos(), rather than in terms of characters. + Lets substitute its character set to binary. + */ + if (collation.collation == &my_charset_bin) + { + res->set_charset(&my_charset_bin); + res2->set_charset(&my_charset_bin); + } + + /* start and length are now sufficiently valid to pass to charpos function */ + start= res->charpos((int) start); + length= res->charpos((int) length, (uint32) start); + + /* Re-testing with corrected params */ + if (start + 1 > res->length()) // remember, start = args[1].val_int() - 1 + return res; /* purecov: inspected */ // Wrong param; skip insert + if (length > res->length() - start) + length= res->length() - start; + + { + THD *thd= current_thd; + if ((ulonglong) (res->length() - length + res2->length()) > + (ulonglong) thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), thd->variables.max_allowed_packet); + goto null; + } + } + res=copy_if_not_alloced(str,res,res->length()); + res->replace((uint32) start,(uint32) length,*res2); + return res; +null: + null_value=1; + return 0; +} + + +bool Item_func_insert::fix_length_and_dec(THD *thd) +{ + ulonglong char_length; + + // Handle character set for args[0] and args[3]. + if (agg_arg_charsets_for_string_result(collation, args, 2, 3)) + return TRUE; + char_length= ((ulonglong) args[0]->max_char_length() + + (ulonglong) args[3]->max_char_length()); + fix_char_length_ulonglong(char_length); + return FALSE; +} + + +String *Item_str_conv::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res; + size_t alloced_length, len; + + if ((null_value= (!(res= args[0]->val_str(&tmp_value)) || + str->alloc((alloced_length= res->length() * multiply))))) + return 0; + + len= converter(collation.collation, (char*) res->ptr(), res->length(), + (char*) str->ptr(), alloced_length); + DBUG_ASSERT(len <= alloced_length); + str->set_charset(collation.collation); + str->length(len); + return str; +} + + +bool Item_func_lcase::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + multiply= collation.collation->casedn_multiply(); + converter= collation.collation->cset->casedn; + fix_char_length_ulonglong((ulonglong) args[0]->max_char_length() * multiply); + return FALSE; +} + +bool Item_func_ucase::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + multiply= collation.collation->caseup_multiply(); + converter= collation.collation->cset->caseup; + fix_char_length_ulonglong((ulonglong) args[0]->max_char_length() * multiply); + return FALSE; +} + + +bool Item_func_left::hash_not_null(Hasher *hasher) +{ + StringBuffer buf; + String *str= val_str(&buf); + DBUG_ASSERT((str == NULL) == null_value); + if (!str) + return true; + hasher->add(collation.collation, str->ptr(), str->length()); + return false; +} + + +String *Item_func_left::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(str); + + /* must be longlong to avoid truncation */ + longlong length= args[1]->val_int(); + uint char_pos; + + if ((null_value=(args[0]->null_value || args[1]->null_value))) + return 0; + + /* if "unsigned_flag" is set, we have a *huge* positive number. */ + if ((length <= 0) && (!args[1]->unsigned_flag)) + return make_empty_result(str); + if ((res->length() <= (ulonglong) length) || + (res->length() <= (char_pos= res->charpos((int) length)))) + return res; + + tmp_value.set(*res, 0, char_pos); + return &tmp_value; +} + + +void Item_str_func::left_right_max_length() +{ + uint32 char_length= args[0]->max_char_length(); + if (args[1]->can_eval_in_optimize()) + { + uint32 length= max_length_for_string(args[1]); + set_if_smaller(char_length, length); + } + fix_char_length(char_length); +} + + +bool Item_func_left::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + left_right_max_length(); + return FALSE; +} + + +String *Item_func_right::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(str); + /* must be longlong to avoid truncation */ + longlong length= args[1]->val_int(); + + if ((null_value=(args[0]->null_value || args[1]->null_value))) + return 0; /* purecov: inspected */ + + /* if "unsigned_flag" is set, we have a *huge* positive number. */ + if ((length <= 0) && (!args[1]->unsigned_flag)) + return make_empty_result(str); /* purecov: inspected */ + + if (res->length() <= (ulonglong) length) + return res; /* purecov: inspected */ + + uint start=res->numchars(); + if (start <= (uint) length) + return res; + start=res->charpos(start - (uint) length); + tmp_value.set(*res,start,res->length()-start); + return &tmp_value; +} + + +bool Item_func_right::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + left_right_max_length(); + return FALSE; +} + + +String *Item_func_substr::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res = args[0]->val_str(str); + /* must be longlong to avoid truncation */ + longlong start= get_position(); + /* Assumes that the maximum length of a String is < INT_MAX32. */ + /* Limit so that code sees out-of-bound value properly. */ + longlong length= arg_count == 3 ? args[2]->val_int() : INT_MAX32; + longlong tmp_length; + + if ((null_value=(args[0]->null_value || args[1]->null_value || + (arg_count == 3 && args[2]->null_value)))) + return 0; /* purecov: inspected */ + + /* Negative or zero length, will return empty string. */ + if ((arg_count == 3) && (length <= 0) && + (length == 0 || !args[2]->unsigned_flag)) + return make_empty_result(str); + + /* Assumes that the maximum length of a String is < INT_MAX32. */ + /* Set here so that rest of code sees out-of-bound value as such. */ + if ((length <= 0) || (length > INT_MAX32)) + length= INT_MAX32; + + /* if "unsigned_flag" is set, we have a *huge* positive number. */ + /* Assumes that the maximum length of a String is < INT_MAX32. */ + if ((!args[1]->unsigned_flag && (start < INT_MIN32 || start > INT_MAX32)) || + (args[1]->unsigned_flag && ((ulonglong) start > INT_MAX32))) + return make_empty_result(str); + + start= ((start < 0) ? res->numchars() + start : start - 1); + start= res->charpos((int) start); + if ((start < 0) || ((uint) start + 1 > res->length())) + return make_empty_result(str); + + length= res->charpos((int) length, (uint32) start); + tmp_length= res->length() - start; + length= MY_MIN(length, tmp_length); + + if (!start && (longlong) res->length() == length) + return res; + tmp_value.set(*res, (uint32) start, (uint32) length); + return &tmp_value; +} + + +bool Item_func_substr::fix_length_and_dec(THD *thd) +{ + max_length=args[0]->max_length; + + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + if (args[1]->const_item()) + { + int32 start= (int32) get_position(); + if (args[1]->null_value) + max_length= 0; + else if (start < 0) + max_length= ((uint)(-start) > max_length) ? 0 : (uint)(-start); + else + max_length-= MY_MIN((uint)(start - 1), max_length); + } + if (arg_count == 3 && args[2]->const_item()) + { + int32 length= (int32) args[2]->val_int(); + if (args[2]->null_value || length <= 0) + max_length=0; /* purecov: inspected */ + else + set_if_smaller(max_length,(uint) length); + } + max_length*= collation.collation->mbmaxlen; + return FALSE; +} + + +bool Item_func_substr_index::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 2)) + return TRUE; + fix_char_length(args[0]->max_char_length()); + return FALSE; +} + + +String *Item_func_substr_index::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),system_charset_info); + String *res= args[0]->val_str(&tmp_value); + String *delimiter= args[1]->val_str(&tmp); + int32 count= (int32) args[2]->val_int(); + uint offset; + + if (args[0]->null_value || args[1]->null_value || args[2]->null_value) + { // string and/or delim are null + null_value=1; + return 0; + } + null_value=0; + uint delimiter_length= delimiter->length(); + if (!res->length() || !delimiter_length || !count) + return make_empty_result(str); // Wrong parameters + + res->set_charset(collation.collation); + +#ifdef USE_MB + if (res->use_mb()) + { + const char *ptr= res->ptr(); + const char *strend= ptr+res->length(); + const char *end= strend-delimiter_length+1; + const char *search= delimiter->ptr(); + const char *search_end= search+delimiter_length; + int32 n=0,c=count,pass; + uint32 l; + for (pass=(count>0);pass<2;++pass) + { + while (ptr < end) + { + if (*ptr == *search) + { + char *i,*j; + i=(char*) ptr+1; j=(char*) search+1; + while (j != search_end) + if (*i++ != *j++) goto skip; + if (pass==0) ++n; + else if (!--c) break; + ptr+= delimiter_length; + continue; + } + skip: + if ((l=my_ismbchar(res->charset(), ptr,strend))) ptr+=l; + else ++ptr; + } /* either not found or got total number when count<0 */ + if (pass == 0) /* count<0 */ + { + c+=n+1; + if (c<=0) + { + str->copy(res->ptr(), res->length(), collation.collation); + return str; // not found, return the original string + } + ptr=res->ptr(); + } + else + { + if (c) + { + str->copy(res->ptr(), res->length(), collation.collation); + return str; // not found, return the original string + } + if (count>0) /* return left part */ + { + str->copy(res->ptr(), (uint32) (ptr-res->ptr()), collation.collation); + return str; + } + else /* return right part */ + { + ptr+= delimiter_length; + str->copy(res->ptr() + (ptr-res->ptr()), (uint32) (strend - ptr), + collation.collation); + return str; + } + } + } + } + else +#endif /* USE_MB */ + { + if (count > 0) + { // start counting from the beginning + for (offset=0; ; offset+= delimiter_length) + { + if ((int) (offset= res->strstr(*delimiter, offset)) < 0) + { + str->copy(res->ptr(), res->length(), collation.collation); + return str; // not found, return the original string + } + if (!--count) + { + str->copy(res->ptr(), offset, collation.collation); + return str; + } + } + } + else + { + /* + Negative index, start counting at the end + */ + for (offset=res->length(); offset ;) + { + /* + this call will result in finding the position pointing to one + address space less than where the found substring is located + in res + */ + if ((int) (offset= res->strrstr(*delimiter, offset)) < 0) + { + str->copy(res->ptr(), res->length(), collation.collation); + return str; // not found, return the original string + } + /* + At this point, we've searched for the substring + the number of times as supplied by the index value + */ + if (!++count) + { + offset+= delimiter_length; + str->copy(res->ptr() + offset, res->length() - offset, + collation.collation); + return str; + } + } + if (count) + { + str->copy(res->ptr(), res->length(), collation.collation); + return str; // not found, return the original string + } + } + } + DBUG_ASSERT(0); + return NULL; +} + +/* +** The trim functions are extension to ANSI SQL because they trim substrings +** They ltrim() and rtrim() functions are optimized for 1 byte strings +** They also return the original string if possible, else they return +** a substring that points at the original string. +*/ + + +String *Item_func_ltrim::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff[MAX_FIELD_WIDTH], *ptr, *end; + String tmp(buff,sizeof(buff),system_charset_info); + String *res, *remove_str; + uint UNINIT_VAR(remove_length); + + res= args[0]->val_str(str); + if ((null_value=args[0]->null_value)) + return 0; + remove_str= &remove; /* Default value. */ + if (arg_count == 2) + { + remove_str= args[1]->val_str(&tmp); + if ((null_value= args[1]->null_value)) + return 0; + } + + if ((remove_length= remove_str->length()) == 0 || + remove_length > res->length()) + return non_trimmed_value(res); + + ptr= (char*) res->ptr(); + end= ptr+res->length(); + if (remove_length == 1) + { + char chr=(*remove_str)[0]; + while (ptr != end && *ptr == chr) + ptr++; + } + else + { + const char *r_ptr=remove_str->ptr(); + end-=remove_length; + while (ptr <= end && !memcmp(ptr, r_ptr, remove_length)) + ptr+=remove_length; + end+=remove_length; + } + if (ptr == res->ptr()) + return non_trimmed_value(res); + return trimmed_value(res, (uint32) (ptr - res->ptr()), (uint32) (end - ptr)); +} + + +String *Item_func_rtrim::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff[MAX_FIELD_WIDTH], *ptr, *end; + String tmp(buff, sizeof(buff), system_charset_info); + String *res, *remove_str; + uint UNINIT_VAR(remove_length); + + res= args[0]->val_str(str); + if ((null_value=args[0]->null_value)) + return 0; + remove_str= &remove; /* Default value. */ + if (arg_count == 2) + { + remove_str= args[1]->val_str(&tmp); + if ((null_value= args[1]->null_value)) + return 0; + } + + if ((remove_length= remove_str->length()) == 0 || + remove_length > res->length()) + return non_trimmed_value(res); + + ptr= (char*) res->ptr(); + end= ptr+res->length(); +#ifdef USE_MB + char *p=ptr; + uint32 l; +#endif + if (remove_length == 1) + { + char chr=(*remove_str)[0]; +#ifdef USE_MB + if (collation.collation->use_mb()) + { + while (ptr < end) + { + if ((l= my_ismbchar(collation.collation, ptr, end))) ptr+= l, p=ptr; + else ++ptr; + } + ptr=p; + } +#endif + while (ptr != end && end[-1] == chr) + end--; + } + else + { + const char *r_ptr=remove_str->ptr(); +#ifdef USE_MB + if (collation.collation->use_mb()) + { + loop: + while (ptr + remove_length < end) + { + if ((l= my_ismbchar(collation.collation, ptr, end))) ptr+= l; + else ++ptr; + } + if (ptr + remove_length == end && !memcmp(ptr,r_ptr,remove_length)) + { + end-=remove_length; + ptr=p; + goto loop; + } + } + else +#endif /* USE_MB */ + { + while (ptr + remove_length <= end && + !memcmp(end-remove_length, r_ptr, remove_length)) + end-=remove_length; + } + } + if (end == res->ptr()+res->length()) + return non_trimmed_value(res); + return trimmed_value(res, 0, (uint32) (end - res->ptr())); +} + + +String *Item_func_trim::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + char buff[MAX_FIELD_WIDTH], *ptr, *end; + const char *r_ptr; + String tmp(buff, sizeof(buff), system_charset_info); + String *res, *remove_str; + uint UNINIT_VAR(remove_length); + + res= args[0]->val_str(str); + if ((null_value=args[0]->null_value)) + return 0; + remove_str= &remove; /* Default value. */ + if (arg_count == 2) + { + remove_str= args[1]->val_str(&tmp); + if ((null_value= args[1]->null_value)) + return 0; + } + + if ((remove_length= remove_str->length()) == 0 || + remove_length > res->length()) + return non_trimmed_value(res); + + ptr= (char*) res->ptr(); + end= ptr+res->length(); + r_ptr= remove_str->ptr(); + while (ptr+remove_length <= end && !memcmp(ptr,r_ptr,remove_length)) + ptr+=remove_length; +#ifdef USE_MB + if (collation.collation->use_mb()) + { + char *p=ptr; + uint32 l; + loop: + while (ptr + remove_length < end) + { + if ((l= my_ismbchar(collation.collation, ptr, end))) + ptr+= l; + else + ++ptr; + } + if (ptr + remove_length == end && !memcmp(ptr,r_ptr,remove_length)) + { + end-=remove_length; + ptr=p; + goto loop; + } + ptr=p; + } + else +#endif /* USE_MB */ + { + while (ptr + remove_length <= end && + !memcmp(end-remove_length,r_ptr,remove_length)) + end-=remove_length; + } + if (ptr == res->ptr() && end == ptr+res->length()) + return non_trimmed_value(res); + return trimmed_value(res, (uint32) (ptr - res->ptr()), (uint32) (end - ptr)); +} + +bool Item_func_trim::fix_length_and_dec(THD *thd) +{ + if (arg_count == 1) + { + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + remove.set_charset(collation.collation); + remove.set_ascii(" ",1); + } + else + { + // Handle character set for args[1] and args[0]. + // Note that we pass args[1] as the first item, and args[0] as the second. + if (agg_arg_charsets_for_string_result_with_comparison(collation, + &args[1], 2, -1)) + return TRUE; + } + fix_char_length(args[0]->max_char_length()); + return FALSE; +} + +void Item_func_trim::print(String *str, enum_query_type query_type) +{ + if (arg_count == 1) + { + Item_func::print(str, query_type); + return; + } + str->append(Item_func_trim::func_name_cstring()); + str->append(func_name_ext()); + str->append('('); + str->append(mode_name()); + str->append(' '); + args[1]->print(str, query_type); + str->append(STRING_WITH_LEN(" from ")); + args[0]->print(str, query_type); + str->append(')'); +} + + +/* + RTRIM(expr) + TRIM(TRAILING ' ' FROM expr) + remove argument's soft dependency on PAD_CHAR_TO_FULL_LENGTH: +*/ +Sql_mode_dependency Item_func_trim::value_depends_on_sql_mode() const +{ + DBUG_ASSERT(fixed()); + if (arg_count == 1) // RTRIM(expr) + return (args[0]->value_depends_on_sql_mode() & + Sql_mode_dependency(~0, ~MODE_PAD_CHAR_TO_FULL_LENGTH)). + soft_to_hard(); + // TRIM(... FROM expr) + DBUG_ASSERT(arg_count == 2); + if (!args[1]->value_depends_on_sql_mode_const_item()) + return Item_func::value_depends_on_sql_mode(); + StringBuffer<64> trimstrbuf; + String *trimstr= args[1]->val_str(&trimstrbuf); + if (!trimstr) + return Sql_mode_dependency(); // will return NULL + if (trimstr->length() == 0) + return Item_func::value_depends_on_sql_mode(); // will trim nothing + if (trimstr->lengthsp() != 0) + return Item_func::value_depends_on_sql_mode(); // will trim not only spaces + if (trimstr->length() > trimstr->charset()->mbminlen || + trimstr->numchars() > 1) + return Item_func::value_depends_on_sql_mode(); // more than one space + // TRIM(TRAILING ' ' FROM expr) + return ((args[0]->value_depends_on_sql_mode() | + args[1]->value_depends_on_sql_mode()) & + Sql_mode_dependency(~0, ~MODE_PAD_CHAR_TO_FULL_LENGTH)). + soft_to_hard(); +} + + +/* Item_func_password */ + +bool Item_func_password::fix_fields(THD *thd, Item **ref) +{ + if (deflt) + alg= (thd->variables.old_passwords ? OLD : NEW); + return Item_str_ascii_func::fix_fields(thd, ref); +} + +String *Item_func_password::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(str); + switch (alg){ + case NEW: + if (args[0]->null_value || res->length() == 0) + return make_empty_result(str); + my_make_scrambled_password(tmp_value, res->ptr(), res->length()); + str->set(tmp_value, SCRAMBLED_PASSWORD_CHAR_LENGTH, &my_charset_latin1); + break; + case OLD: + if ((null_value=args[0]->null_value)) + return 0; + if (res->length() == 0) + return make_empty_result(str); + my_make_scrambled_password_323(tmp_value, res->ptr(), res->length()); + str->set(tmp_value, SCRAMBLED_PASSWORD_CHAR_LENGTH_323, &my_charset_latin1); + break; + default: + DBUG_ASSERT(0); + } + return str; +} + +char *Item_func_password::alloc(THD *thd, const char *password, + size_t pass_len, enum PW_Alg al) +{ + char *buff= (char *) thd->alloc((al==NEW)? + SCRAMBLED_PASSWORD_CHAR_LENGTH + 1: + SCRAMBLED_PASSWORD_CHAR_LENGTH_323 + 1); + if (!buff) + return NULL; + + switch (al) { + case NEW: + my_make_scrambled_password(buff, password, pass_len); + break; + case OLD: + my_make_scrambled_password_323(buff, password, pass_len); + break; + default: + DBUG_ASSERT(0); + } + return buff; +} + + + +#define bin_to_ascii(c) ((c)>=38?((c)-38+'a'):(c)>=12?((c)-12+'A'):(c)+'.') + +String *Item_func_encrypt::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + +#ifdef HAVE_CRYPT + String *res =args[0]->val_str(str); + + char salt[3],*salt_ptr; + if ((null_value=args[0]->null_value)) + return 0; + if (res->length() == 0) + return make_empty_result(str); + if (arg_count == 1) + { // generate random salt + time_t timestamp=current_thd->query_start(); + salt[0] = bin_to_ascii( (ulong) timestamp & 0x3f); + salt[1] = bin_to_ascii(( (ulong) timestamp >> 5) & 0x3f); + salt[2] = 0; + salt_ptr=salt; + } + else + { // obtain salt from the first two bytes + String *salt_str=args[1]->val_str(&tmp_value); + if ((null_value= (args[1]->null_value || salt_str->length() < 2))) + return 0; + salt_ptr= salt_str->c_ptr_safe(); + } + mysql_mutex_lock(&LOCK_crypt); + char *tmp= crypt(res->c_ptr_safe(),salt_ptr); + if (!tmp) + { + mysql_mutex_unlock(&LOCK_crypt); + null_value= 1; + return 0; + } + str->set(tmp, (uint) strlen(tmp), &my_charset_bin); + str->copy(); + mysql_mutex_unlock(&LOCK_crypt); + return str; +#else + null_value=1; + return 0; +#endif /* HAVE_CRYPT */ +} + +bool Item_func_encode::seed() +{ + char buf[80]; + ulong rand_nr[2]; + String *key, tmp(buf, sizeof(buf), system_charset_info); + + if (!(key= args[1]->val_str(&tmp))) + return TRUE; + + hash_password(rand_nr, key->ptr(), key->length()); + sql_crypt.init(rand_nr); + + return FALSE; +} + +bool Item_func_encode::fix_length_and_dec(THD *thd) +{ + max_length=args[0]->max_length; + base_flags|= ((args[0]->base_flags | args[1]->base_flags) & + item_base_t::MAYBE_NULL); + collation.set(&my_charset_bin); + /* Precompute the seed state if the item is constant. */ + seeded= args[1]->const_item() && + (args[1]->result_type() == STRING_RESULT) && !seed(); + return FALSE; +} + +String *Item_func_encode::val_str(String *str) +{ + String *res; + DBUG_ASSERT(fixed()); + + if (!(res=args[0]->val_str(str))) + { + null_value= 1; + return NULL; + } + + if (!seeded && seed()) + { + null_value= 1; + return NULL; + } + + null_value= 0; + res= copy_if_not_alloced(str, res, res->length()); + crypto_transform(res); + sql_crypt.reinit(); + + return res; +} + +void Item_func_encode::crypto_transform(String *res) +{ + sql_crypt.encode((char*) res->ptr(),res->length()); + res->set_charset(&my_charset_bin); +} + +void Item_func_decode::crypto_transform(String *res) +{ + sql_crypt.decode((char*) res->ptr(),res->length()); +} + + +String *Item_func_database::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + if (thd->db.str == NULL) + { + null_value= 1; + return 0; + } + else + str->copy(thd->db.str, thd->db.length, system_charset_info); + null_value= 0; + return str; +} + + +String *Item_func_sqlerrm::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(!null_value); + Diagnostics_area::Sql_condition_iterator it= + current_thd->get_stmt_da()->sql_conditions(); + const Sql_condition *err; + if ((err= it++)) + { + str->copy(err->get_message_text(), err->get_message_octet_length(), + system_charset_info); + return str; + } + str->copy(STRING_WITH_LEN("normal, successful completion"), + system_charset_info); + return str; +} + + +/** + @note USER() is replicated correctly if binlog_format=ROW or (as of + BUG#28086) binlog_format=MIXED, but is incorrectly replicated to '' + if binlog_format=STATEMENT. +*/ +bool Item_func_user::init(const char *user, const char *host) +{ + DBUG_ASSERT(fixed()); + + // For system threads (e.g. replication SQL thread) user may be empty + if (user) + { + CHARSET_INFO *cs= str_value.charset(); + size_t res_length= (strlen(user)+strlen(host)+2) * cs->mbmaxlen; + + if (str_value.alloc((uint) res_length)) + { + null_value=1; + return TRUE; + } + + res_length=cs->cset->snprintf(cs, (char*)str_value.ptr(), (uint) res_length, + "%s@%s", user, host); + str_value.length((uint) res_length); + str_value.mark_as_const(); + } + return FALSE; +} + + +Item *Item_func_sysconst::safe_charset_converter(THD *thd, CHARSET_INFO *tocs) +{ + /* + During view or prepared statement creation, the item should not + make use of const_charset_converter as it would imply substitution + with constant items which is not correct. Functions can have different + values during view creation and view execution based on context. + + Return the identical item during view creation and prepare. + */ + if (thd->lex->is_ps_or_view_context_analysis()) + return this; + return const_charset_converter(thd, tocs, true, fully_qualified_func_name()); +} + +bool Item_func_sysconst::const_item() const +{ + if (current_thd->lex->is_ps_or_view_context_analysis()) + return false; + return true; +} + +bool Item_func_user::fix_fields(THD *thd, Item **ref) +{ + return (Item_func_sysconst::fix_fields(thd, ref) || + init(thd->main_security_ctx.user, + thd->main_security_ctx.host_or_ip)); +} + + +bool Item_func_current_user::fix_fields(THD *thd, Item **ref) +{ + if (Item_func_sysconst::fix_fields(thd, ref)) + return TRUE; + + Security_context *ctx= context && context->security_ctx + ? context->security_ctx : thd->security_ctx; + return init(ctx->priv_user, ctx->priv_host); +} + +bool Item_func_current_role::fix_fields(THD *thd, Item **ref) +{ + if (Item_func_sysconst::fix_fields(thd, ref)) + return 1; + + Security_context *ctx= context && context->security_ctx + ? context->security_ctx : thd->security_ctx; + if (ctx->priv_role[0]) + { + if (str_value.copy(ctx->priv_role, strlen(ctx->priv_role), + system_charset_info)) + return 1; + str_value.mark_as_const(); + null_value= 0; + base_flags&= ~item_base_t::MAYBE_NULL; + return 0; + } + null_value= 1; + set_maybe_null(); + return 0; +} + +bool Item_func_soundex::fix_length_and_dec(THD *thd) +{ + uint32 char_length= args[0]->max_char_length(); + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + set_if_bigger(char_length, 4); + fix_char_length(char_length); + return FALSE; +} + + +/** + If alpha, map input letter to soundex code. + If not alpha and remove_garbage is set then skip to next char + else return 0 +*/ + +static int soundex_toupper(int ch) +{ + return (ch >= 'a' && ch <= 'z') ? ch - 'a' + 'A' : ch; +} + + +static char get_scode(int wc) +{ + int ch= soundex_toupper(wc); + if (ch < 'A' || ch > 'Z') + { + // Thread extended alfa (country spec) + return '0'; // as vokal + } + return(soundex_map[ch-'A']); +} + + +static bool my_uni_isalpha(int wc) +{ + /* + Return true for all Basic Latin letters: a..z A..Z. + Return true for all Unicode characters with code higher than U+00C0: + - characters between 'z' and U+00C0 are controls and punctuations. + - "U+00C0 LATIN CAPITAL LETTER A WITH GRAVE" is the first letter after 'z'. + */ + return (wc >= 'a' && wc <= 'z') || + (wc >= 'A' && wc <= 'Z') || + (wc >= 0xC0); +} + + +String *Item_func_soundex::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&tmp_value); + char last_ch,ch; + CHARSET_INFO *cs= collation.collation; + my_wc_t wc; + uint nchars; + int rc; + + if ((null_value= args[0]->null_value)) + return 0; /* purecov: inspected */ + + if (str->alloc(MY_MAX(res->length(), 4 * cs->mbminlen))) + return &tmp_value; /* purecov: inspected */ + str->set_charset(collation.collation); + char *to= (char *) str->ptr(); + char *to_end= to + str->alloced_length(); + char *from= (char *) res->ptr(), *end= from + res->length(); + + for ( ; ; ) /* Skip pre-space */ + { + if ((rc= cs->mb_wc(&wc, (uchar*) from, (uchar*) end)) <= 0) + return make_empty_result(str); /* EOL or invalid byte sequence */ + + if (rc == 1 && cs->m_ctype) + { + /* Single byte letter found */ + if (my_isalpha(cs, *from)) + { + last_ch= get_scode(*from); // Code of the first letter + *to++= soundex_toupper(*from++); // Copy first letter + break; + } + from++; + } + else + { + from+= rc; + if (my_uni_isalpha(wc)) + { + /* Multibyte letter found */ + wc= soundex_toupper(wc); + last_ch= get_scode(wc); // Code of the first letter + if ((rc= cs->wc_mb(wc, (uchar*) to, (uchar*) to_end)) <= 0) + { + /* Extra safety - should not really happen */ + DBUG_ASSERT(false); + return make_empty_result(str); + } + to+= rc; + break; + } + } + } + + /* + last_ch is now set to the first 'double-letter' check. + loop on input letters until end of input + */ + for (nchars= 1 ; ; ) + { + if ((rc= cs->mb_wc(&wc, (uchar*) from, (uchar*) end)) <= 0) + break; /* EOL or invalid byte sequence */ + + if (rc == 1 && cs->m_ctype) + { + if (!my_isalpha(cs, *from++)) + continue; + } + else + { + from+= rc; + if (!my_uni_isalpha(wc)) + continue; + } + + ch= get_scode(wc); + if ((ch != '0') && (ch != last_ch)) // if not skipped or double + { + // letter, copy to output + if ((rc= cs->wc_mb((my_wc_t) ch, (uchar*) to, (uchar*) to_end)) <= 0) + { + // Extra safety - should not really happen + DBUG_ASSERT(false); + break; + } + to+= rc; + nchars++; + last_ch= ch; // save code of last input letter + } // for next double-letter check + } + + /* Pad up to 4 characters with DIGIT ZERO, if the string is shorter */ + if (nchars < 4) + { + uint nbytes= (4 - nchars) * cs->mbminlen; + cs->fill(to, nbytes, '0'); + to+= nbytes; + } + + str->length((uint) (to - str->ptr())); + return str; +} + + +/** + Change a number to format '3,333,333,333.000'. + + This should be 'internationalized' sometimes. +*/ + +/* + The maximum supported decimal scale: + 38 - starting from 10.2.1 + 30 - before 10.2.1 +*/ +const int FORMAT_MAX_DECIMALS= 38; + + +bool Item_func_format::fix_length_and_dec(THD *thd) +{ + uint32 char_length= args[0]->type_handler()->Item_decimal_notation_int_digits(args[0]); + uint dec= FORMAT_MAX_DECIMALS; + /* + Format can require one more integer digit if rounding happens: + FORMAT(9.9,0) -> '10' + Set need_extra_digit_for_rounding to true by default + if args[0] has some decimals: if args[1] is not + a constant, then format can potentially reduce + the number of decimals and round to the next integer. + */ + bool need_extra_digit_for_rounding= args[0]->decimals > 0; + if (args[1]->can_eval_in_optimize()) + { + Longlong_hybrid tmp= args[1]->to_longlong_hybrid(); + if (!args[1]->null_value) + { + dec= tmp.to_uint(FORMAT_MAX_DECIMALS); + need_extra_digit_for_rounding= (dec < args[0]->decimals); + } + } + /* + In case of a data type with zero integer digits, e.g. DECIMAL(4,4), + we'll print at least one integer digit. + */ + if (need_extra_digit_for_rounding || !char_length) + char_length++; + uint32 max_sep_count= (char_length / 3) + (dec ? 1 : 0) + /*sign*/1; + collation.set(default_charset()); + fix_char_length(char_length + max_sep_count + dec); + if (arg_count == 3) + locale= args[2]->basic_const_item() ? args[2]->locale_from_val_str() : NULL; + else + locale= &my_locale_en_US; /* Two arguments */ + return FALSE; +} + + +/** + @todo + This needs to be fixed for multi-byte character set where numbers + are stored in more than one byte +*/ + +String *Item_func_format::val_str_ascii(String *str) +{ + uint32 str_length; + /* Number of decimal digits */ + int dec; + /* Number of characters used to represent the decimals, including '.' */ + uint32 dec_length; + const MY_LOCALE *lc; + DBUG_ASSERT(fixed()); + + dec= (int) args[1]->val_int(); + if (args[1]->null_value) + { + null_value=1; + return NULL; + } + + lc= locale ? locale : args[2]->locale_from_val_str(); + + dec= set_zone(dec, 0, FORMAT_MAX_DECIMALS); + dec_length= dec ? dec+1 : 0; + null_value=0; + + if (args[0]->result_type() == DECIMAL_RESULT || + args[0]->result_type() == INT_RESULT) + { + VDec res(args[0]); + if ((null_value= res.is_null())) + return 0; /* purecov: inspected */ + res.to_string_round(str, dec); + str_length= str->length(); + } + else + { + double nr= args[0]->val_real(); + if ((null_value=args[0]->null_value)) + return 0; /* purecov: inspected */ + nr= my_double_round(nr, (longlong) dec, FALSE, FALSE); + str->set_fcvt(nr, dec); + if (!std::isfinite(nr)) + return str; + str_length=str->length(); + } + /* We need this test to handle 'nan' and short values */ + if (lc->grouping[0] > 0 && + str_length >= dec_length + 1 + lc->grouping[0]) + { + /* We need space for ',' between each group of digits as well. */ + char buf[2 * FLOATING_POINT_BUFFER]; + int count; + const char *grouping= lc->grouping; + char sign_length= *str->ptr() == '-' ? 1 : 0; + const char *src= str->ptr() + str_length - dec_length - 1; + const char *src_begin= str->ptr() + sign_length; + char *dst= buf + sizeof(buf); + + /* Put the fractional part */ + if (dec) + { + dst-= (dec + 1); + *dst= lc->decimal_point; + memcpy(dst + 1, src + 2, dec); + } + + /* Put the integer part with grouping */ + for (count= *grouping; src >= src_begin; count--) + { + /* + When *grouping==0x80 (which means "end of grouping") + count will be initialized to -1 and + we'll never get into this "if" anymore. + */ + if (count == 0) + { + *--dst= lc->thousand_sep; + if (grouping[1]) + grouping++; + count= *grouping; + } + DBUG_ASSERT(dst > buf); + *--dst= *src--; + } + + if (sign_length) /* Put '-' */ + *--dst= *str->ptr(); + + /* Put the rest of the integer part without grouping */ + str->copy(dst, buf + sizeof(buf) - dst, &my_charset_latin1); + } + else if (dec_length && lc->decimal_point != '.') + { + /* + For short values without thousands (<1000) + replace decimal point to localized value. + */ + DBUG_ASSERT(dec_length <= str_length); + ((char*) str->ptr())[str_length - dec_length]= lc->decimal_point; + } + return str; +} + + +bool Item_func_elt::fix_length_and_dec(THD *thd) +{ + uint32 char_length= 0; + decimals=0; + + if (agg_arg_charsets_for_string_result(collation, args + 1, arg_count - 1)) + return TRUE; + + for (uint i= 1 ; i < arg_count ; i++) + { + set_if_bigger(char_length, args[i]->max_char_length()); + set_if_bigger(decimals,args[i]->decimals); + } + fix_char_length(char_length); + set_maybe_null(); // NULL if wrong first arg + return FALSE; +} + + +double Item_func_elt::val_real() +{ + DBUG_ASSERT(fixed()); + uint tmp; + null_value=1; + if ((tmp=(uint) args[0]->val_int()) == 0 || tmp >= arg_count) + return 0.0; + double result= args[tmp]->val_real(); + null_value= args[tmp]->null_value; + return result; +} + + +longlong Item_func_elt::val_int() +{ + DBUG_ASSERT(fixed()); + uint tmp; + null_value=1; + if ((tmp=(uint) args[0]->val_int()) == 0 || tmp >= arg_count) + return 0; + + longlong result= args[tmp]->val_int(); + null_value= args[tmp]->null_value; + return result; +} + + +String *Item_func_elt::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint tmp; + null_value=1; + if ((tmp=(uint) args[0]->val_int()) == 0 || tmp >= arg_count) + return NULL; + + String *result= args[tmp]->val_str(str); + if (result) + result->set_charset(collation.collation); + null_value= args[tmp]->null_value; + return result; +} + + +bool Item_func_make_set::fix_length_and_dec(THD *thd) +{ + uint32 char_length= arg_count - 2; /* Separators */ + + if (agg_arg_charsets_for_string_result(collation, args + 1, arg_count - 1)) + return TRUE; + + for (uint i=1 ; i < arg_count ; i++) + char_length+= args[i]->max_char_length(); + fix_char_length(char_length); + return FALSE; +} + + +String *Item_func_make_set::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + ulonglong bits; + bool first_found=0; + Item **ptr=args+1; + String *result= make_empty_result(str); + + bits=args[0]->val_int(); + if ((null_value=args[0]->null_value)) + return NULL; + + if (arg_count < 65) + bits &= ((ulonglong) 1 << (arg_count-1))-1; + + for (; bits; bits >>= 1, ptr++) + { + if (bits & 1) + { + String *res= (*ptr)->val_str(str); + if (res) // Skip nulls + { + if (!first_found) + { // First argument + first_found=1; + if (res != str) + result=res; // Use original string + else + { + if (tmp_str.copy(*res)) // Don't use 'str' + return make_empty_result(str); + result= &tmp_str; + } + } + else + { + if (result != &tmp_str) + { // Copy data to tmp_str + if (tmp_str.alloc(result->length()+res->length()+1) || + tmp_str.copy(*result)) + return make_empty_result(str); + result= &tmp_str; + } + if (tmp_str.append(STRING_WITH_LEN(","), &my_charset_bin) || tmp_str.append(*res)) + return make_empty_result(str); + } + } + } + } + return result; +} + + +void Item_func_char::print(String *str, enum_query_type query_type) +{ + str->append(Item_func_char::func_name_cstring()); + str->append('('); + print_args(str, 0, query_type); + if (collation.collation != &my_charset_bin) + { + str->append(STRING_WITH_LEN(" using ")); + str->append(collation.collation->cs_name); + } + str->append(')'); +} + + +String *Item_func_char::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + str->length(0); + str->set_charset(collation.collation); + for (uint i=0 ; i < arg_count ; i++) + { + int32 num=(int32) args[i]->val_int(); + if (!args[i]->null_value) + append_char(str, num); + } + str->realloc(str->length()); // Add end 0 (for Purify) + return check_well_formed_result(str); +} + + +void Item_func_char::append_char(String *str, int32 num) +{ + char tmp[4]; + if (num & 0xFF000000L) + { + mi_int4store(tmp, num); + str->append(tmp, 4, &my_charset_bin); + } + else if (num & 0xFF0000L) + { + mi_int3store(tmp, num); + str->append(tmp, 3, &my_charset_bin); + } + else if (num & 0xFF00L) + { + mi_int2store(tmp, num); + str->append(tmp, 2, &my_charset_bin); + } + else + { + tmp[0]= (char) num; + str->append(tmp, 1, &my_charset_bin); + } +} + + +String *Item_func_chr::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + str->length(0); + str->set_charset(collation.collation); + int32 num=(int32) args[0]->val_int(); + if (!args[0]->null_value) + append_char(str, num); + else + { + null_value= 1; + return 0; + } + str->realloc(str->length()); // Add end 0 (for Purify) + return check_well_formed_result(str); +} + + +inline String* alloc_buffer(String *res,String *str,String *tmp_value, + ulong length) +{ + if (res->alloced_length() < length) + { + if (str->alloced_length() >= length) + { + (void) str->copy(*res); + str->length(length); + return str; + } + if (tmp_value->alloc(length)) + return 0; + (void) tmp_value->copy(*res); + tmp_value->length(length); + return tmp_value; + } + res->length(length); + return res; +} + + +bool Item_func_repeat::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return TRUE; + DBUG_ASSERT(collation.collation != NULL); + if (args[1]->can_eval_in_optimize()) + { + uint32 length= max_length_for_string(args[1]); + ulonglong char_length= (ulonglong) args[0]->max_char_length() * length; + fix_char_length_ulonglong(char_length); + return false; + } + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + return false; +} + +/** + Item_func_repeat::str is carefully written to avoid reallocs + as much as possible at the cost of a local buffer +*/ + +String *Item_func_repeat::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint length,tot_length; + char *to; + /* must be longlong to avoid truncation */ + longlong count= args[1]->val_int(); + String *res= args[0]->val_str(str); + + if (args[0]->null_value || args[1]->null_value) + goto err; // string and/or delim are null + null_value= 0; + + if (count <= 0 && (count == 0 || !args[1]->unsigned_flag)) + return make_empty_result(str); + + /* Assumes that the maximum length of a String is < INT_MAX32. */ + /* Bounds check on count: If this is triggered, we will error. */ + if ((ulonglong) count > INT_MAX32) + count= INT_MAX32; + if (count == 1) // To avoid reallocs + return res; + length=res->length(); + + // Safe length check + { + THD *thd= current_thd; + if (length > thd->variables.max_allowed_packet / (uint) count) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), thd->variables.max_allowed_packet); + goto err; + } + } + tot_length= length*(uint) count; + if (!(res= alloc_buffer(res,str,&tmp_value,tot_length))) + goto err; + + to=(char*) res->ptr()+length; + while (--count) + { + memcpy(to,res->ptr(),length); + to+=length; + } + return (res); + +err: + null_value=1; + return 0; +} + + +bool Item_func_space::fix_length_and_dec(THD *thd) +{ + collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + if (args[0]->can_eval_in_optimize()) + { + fix_char_length_ulonglong(max_length_for_string(args[0])); + return false; + } + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + return false; +} + + +String *Item_func_space::val_str(String *str) +{ + uint tot_length; + longlong count= args[0]->val_int(); + CHARSET_INFO *cs= collation.collation; + + if (args[0]->null_value) + goto err; // string and/or delim are null + null_value= 0; + + if (count <= 0 && (count == 0 || !args[0]->unsigned_flag)) + return make_empty_result(str); + /* + Assumes that the maximum length of a String is < INT_MAX32. + Bounds check on count: If this is triggered, we will error. + */ + if ((ulonglong) count > INT_MAX32) + count= INT_MAX32; + + // Safe length check + tot_length= (uint) count * cs->mbminlen; + { + THD *thd= current_thd; + if (tot_length > thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + goto err; + } + } + if (str->alloc(tot_length)) + goto err; + str->length(tot_length); + str->set_charset(cs); + cs->fill((char*) str->ptr(), tot_length, ' '); + return str; + +err: + null_value= 1; + return 0; +} + + +bool Item_func_binlog_gtid_pos::fix_length_and_dec(THD *thd) +{ + collation.set(system_charset_info); + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + return FALSE; +} + + +String *Item_func_binlog_gtid_pos::val_str(String *str) +{ + DBUG_ASSERT(fixed()); +#ifndef HAVE_REPLICATION + null_value= 0; + str->copy("", 0, system_charset_info); + return str; +#else + String name_str, *name; + longlong pos; + + if (args[0]->null_value || args[1]->null_value) + goto err; + + name= args[0]->val_str(&name_str); + pos= args[1]->val_int(); + + if (pos < 0 || pos > UINT_MAX32) + goto err; + + if (gtid_state_from_binlog_pos(name->c_ptr_safe(), (uint32)pos, str)) + goto err; + null_value= 0; + return str; + +err: + null_value= 1; + return NULL; +#endif /* !HAVE_REPLICATION */ +} + + +static String *default_pad_str(String *pad_str, CHARSET_INFO *collation) +{ + pad_str->set_charset(collation); + pad_str->length(0); + pad_str->append(" ", 1); + return pad_str; +} + +bool Item_func_pad::fix_length_and_dec(THD *thd) +{ + if (arg_count == 3) + { + String *str; + if (!args[2]->basic_const_item() || !(str= args[2]->val_str(&pad_str)) || + !str->length()) + set_maybe_null(); + // Handle character set for args[0] and args[2]. + if (agg_arg_charsets_for_string_result(collation, &args[0], 2, 2)) + return TRUE; + } + else + { + if (agg_arg_charsets_for_string_result(collation, &args[0], 1, 1)) + return TRUE; + default_pad_str(&pad_str, collation.collation); + } + + DBUG_ASSERT(collation.collation->mbmaxlen > 0); + if (args[1]->can_eval_in_optimize()) + { + fix_char_length_ulonglong(max_length_for_string(args[1])); + return false; + } + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + return false; +} + + +/* + PAD(expr,length,' ') + removes argument's soft dependency on PAD_CHAR_TO_FULL_LENGTH if the result + is longer than the argument's maximim possible length. +*/ +Sql_mode_dependency Item_func_rpad::value_depends_on_sql_mode() const +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count >= 2); + if (!args[1]->value_depends_on_sql_mode_const_item() || + (arg_count == 3 && !args[2]->value_depends_on_sql_mode_const_item())) + return Item_func::value_depends_on_sql_mode(); + Longlong_hybrid len= args[1]->to_longlong_hybrid(); + if (args[1]->null_value || len.neg()) + return Sql_mode_dependency(); // will return NULL + if (len.abs() > 0 && len.abs() < args[0]->max_char_length()) + return Item_func::value_depends_on_sql_mode(); + StringBuffer<64> padstrbuf; + String *padstr= arg_count == 3 ? args[2]->val_str(&padstrbuf) : + default_pad_str(&padstrbuf, collation.collation); + if (!padstr || !padstr->length()) + return Sql_mode_dependency(); // will return NULL + if (padstr->lengthsp() != 0) + return Item_func::value_depends_on_sql_mode(); // will pad not only spaces + // RPAD(expr, length, ' ') -- with a long enough length + return ((args[0]->value_depends_on_sql_mode() | + args[1]->value_depends_on_sql_mode()) & + Sql_mode_dependency(~0, ~MODE_PAD_CHAR_TO_FULL_LENGTH)). + soft_to_hard(); +} + + + +String *Item_func_rpad::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint32 res_byte_length,res_char_length,pad_char_length,pad_byte_length; + char *to; + const char *ptr_pad; + /* must be longlong to avoid truncation */ + longlong count= args[1]->val_int(); + longlong byte_count; + String *res= args[0]->val_str(str); + String *rpad= arg_count == 2 ? &pad_str : args[2]->val_str(&pad_str); + + if (!res || args[1]->null_value || !rpad || + ((count < 0) && !args[1]->unsigned_flag)) + goto err; + + null_value=0; + + if (count == 0) + return make_empty_result(str); + + /* Assumes that the maximum length of a String is < INT_MAX32. */ + /* Set here so that rest of code sees out-of-bound value as such. */ + if ((ulonglong) count > INT_MAX32) + count= INT_MAX32; + /* + There is one exception not handled (intentionally) by the character set + aggregation code. If one string is strong side and is binary, and + another one is weak side and is a multi-byte character string, + then we need to operate on the second string in terms on bytes when + calling ::numchars() and ::charpos(), rather than in terms of characters. + Lets substitute its character set to binary. + */ + if (collation.collation == &my_charset_bin) + { + res->set_charset(&my_charset_bin); + rpad->set_charset(&my_charset_bin); + } + + if (count <= (res_char_length= res->numchars())) + { // String to pad is big enough + res->length(res->charpos((int) count)); // Shorten result if longer + return (res); + } + + byte_count= count * collation.collation->mbmaxlen; + { + THD *thd= current_thd; + if ((ulonglong) byte_count > thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), thd->variables.max_allowed_packet); + goto err; + } + } + + if (arg_count == 3) + { + if (args[2]->null_value || !(pad_char_length= rpad->numchars())) + goto err; + } + else + pad_char_length= 1; // Implicit space + + res_byte_length= res->length(); /* Must be done before alloc_buffer */ + if (!(res= alloc_buffer(res,str,&tmp_value, (ulong) byte_count))) + goto err; + + to= (char*) res->ptr()+res_byte_length; + ptr_pad=rpad->ptr(); + pad_byte_length= rpad->length(); + count-= res_char_length; + for ( ; (uint32) count > pad_char_length; count-= pad_char_length) + { + memcpy(to,ptr_pad,pad_byte_length); + to+= pad_byte_length; + } + if (count) + { + pad_byte_length= rpad->charpos((int) count); + memcpy(to,ptr_pad,(size_t) pad_byte_length); + to+= pad_byte_length; + } + res->length((uint) (to- (char*) res->ptr())); + return (res); + + err: + null_value=1; + return 0; +} + + +String *Item_func_lpad::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint32 res_char_length,pad_char_length; + /* must be longlong to avoid truncation */ + longlong count= args[1]->val_int(); + longlong byte_count; + String *res= args[0]->val_str(&tmp_value); + String *pad= arg_count == 2 ? &pad_str : args[2]->val_str(&pad_str); + + if (!res || args[1]->null_value || !pad || + ((count < 0) && !args[1]->unsigned_flag)) + goto err; + + null_value=0; + + if (count == 0) + return make_empty_result(str); + + /* Assumes that the maximum length of a String is < INT_MAX32. */ + /* Set here so that rest of code sees out-of-bound value as such. */ + if ((ulonglong) count > INT_MAX32) + count= INT_MAX32; + + /* + There is one exception not handled (intentionally) by the character set + aggregation code. If one string is strong side and is binary, and + another one is weak side and is a multi-byte character string, + then we need to operate on the second string in terms on bytes when + calling ::numchars() and ::charpos(), rather than in terms of characters. + Lets substitute its character set to binary. + */ + if (collation.collation == &my_charset_bin) + { + res->set_charset(&my_charset_bin); + pad->set_charset(&my_charset_bin); + } + + res_char_length= res->numchars(); + + if (count <= res_char_length) + { + res->length(res->charpos((int) count)); + return res; + } + + byte_count= count * collation.collation->mbmaxlen; + + { + THD *thd= current_thd; + if ((ulonglong) byte_count > thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), thd->variables.max_allowed_packet); + goto err; + } + } + + if (str->alloc((uint32) byte_count)) + goto err; + + if (arg_count == 3) + { + if (args[2]->null_value || !(pad_char_length= pad->numchars())) + goto err; + } + else + pad_char_length= 1; // Implicit space + + str->length(0); + str->set_charset(collation.collation); + count-= res_char_length; + while (count >= pad_char_length) + { + str->append(*pad); + count-= pad_char_length; + } + if (count > 0) + str->append(pad->ptr(), pad->charpos((int) count), collation.collation); + + str->append(*res); + null_value= 0; + return str; + +err: + null_value= 1; + return 0; +} + + +String *Item_func_conv::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(str); + char *endptr,ans[65],*ptr; + longlong dec; + int from_base= (int) args[1]->val_int(); + int to_base= (int) args[2]->val_int(); + int err; + + // Note that abs(INT_MIN) is undefined. + if (args[0]->null_value || args[1]->null_value || args[2]->null_value || + from_base == INT_MIN || to_base == INT_MIN || + abs(to_base) > 36 || abs(to_base) < 2 || + abs(from_base) > 36 || abs(from_base) < 2 || !(res->length())) + { + null_value= 1; + return NULL; + } + null_value= 0; + unsigned_flag= !(from_base < 0); + + if (args[0]->field_type() == MYSQL_TYPE_BIT) + { + /* + Special case: The string representation of BIT doesn't resemble the + decimal representation, so we shouldn't change it to string and then to + decimal. + */ + dec= args[0]->val_int(); + } + else + { + if (from_base < 0) + dec= res->charset()->strntoll(res->ptr(), res->length(), + -from_base, &endptr, &err); + else + dec= (longlong) res->charset()->strntoull(res->ptr(), res->length(), + from_base, &endptr, &err); + } + + uint dummy_errors; + if (!(ptr= longlong2str(dec, ans, to_base)) || + (collation.collation->state & MY_CS_NONASCII) ? + str->copy(ans, (uint32) (ptr - ans), &my_charset_latin1, + collation.collation, &dummy_errors) : + str->copy(ans, (uint32) (ptr - ans), collation.collation)) + { + null_value= 1; + return NULL; + } + return str; +} + + +/* + This function is needed as Item_func_conc_charset stores cached values + in str_value. +*/ + +int Item_func_conv_charset::save_in_field(Field *field, bool no_conversions) +{ + String *result; + CHARSET_INFO *cs= collation.collation; + + result= val_str(&str_value); + if (null_value) + return set_field_to_null_with_conversions(field, no_conversions); + + /* NOTE: If null_value == FALSE, "result" must be not NULL. */ + field->set_notnull(); + int error= field->store(result->ptr(),result->length(),cs); + return error; +} + + +String *Item_func_conv_charset::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + if (use_cached_value) + return null_value ? 0 : &str_value; + String *arg= args[0]->val_str(&tmp_value); + String_copier_for_item copier(current_thd); + return ((null_value= args[0]->null_value || + copier.copy_with_warn(collation.collation, str, + arg->charset(), arg->ptr(), + arg->length(), arg->length()))) ? + 0 : str; +} + +bool Item_func_conv_charset::fix_length_and_dec(THD *thd) +{ + DBUG_ASSERT(collation.derivation == DERIVATION_IMPLICIT); + fix_char_length(args[0]->max_char_length()); + return FALSE; +} + +void Item_func_conv_charset::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("convert(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" using ")); + str->append(collation.collation->cs_name); + str->append(')'); +} + +String *Item_func_set_collation::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + str=args[0]->val_str(str); + if ((null_value=args[0]->null_value)) + return 0; + str->set_charset(collation.collation); + return str; +} + +bool Item_func_set_collation::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return true; + Lex_exact_charset_opt_extended_collate cl(collation.collation, true); + if (cl.merge_collation_override(m_set_collation)) + return true; + collation.set(cl.collation().charset_info(), DERIVATION_EXPLICIT, + args[0]->collation.repertoire); + max_length= args[0]->max_length; + return FALSE; +} + + +bool Item_func_set_collation::eq(const Item *item, bool binary_cmp) const +{ + return Item_func::eq(item, binary_cmp) && + collation.collation == item->collation.collation; +} + + +void Item_func_set_collation::print(String *str, enum_query_type query_type) +{ + args[0]->print_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" collate ")); + str->append(m_set_collation.collation_name_for_show()); +} + +String *Item_func_charset::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint dummy_errors; + + CHARSET_INFO *cs= args[0]->charset_for_protocol(); + null_value= 0; + str->copy(cs->cs_name.str, cs->cs_name.length, + &my_charset_latin1, collation.collation, &dummy_errors); + return str; +} + +String *Item_func_collation::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + uint dummy_errors; + CHARSET_INFO *cs= args[0]->charset_for_protocol(); + + null_value= 0; + str->copy(cs->coll_name.str, cs->coll_name.length, &my_charset_latin1, + collation.collation, &dummy_errors); + return str; +} + + +bool Item_func_weight_string::fix_length_and_dec(THD *thd) +{ + CHARSET_INFO *cs= args[0]->collation.collation; + collation.set(&my_charset_bin, args[0]->collation.derivation); + weigth_flags= my_strxfrm_flag_normalize(cs, weigth_flags); + /* + Use result_length if it was given explicitly in constructor, + otherwise calculate max_length using argument's max_length + and "nweights". + */ + if (!(max_length= result_length)) + { + size_t char_length; + char_length= ((cs->state & MY_CS_STRNXFRM_BAD_NWEIGHTS) || !nweights) ? + args[0]->max_char_length() : nweights * + my_count_bits_uint32(cs->levels_for_order); + max_length= (uint32) cs->strnxfrmlen(char_length * cs->mbmaxlen); + } + set_maybe_null(); + return FALSE; +} + + +/* Return a weight_string according to collation */ +String *Item_func_weight_string::val_str(String *str) +{ + String *res; + CHARSET_INFO *cs= args[0]->collation.collation; + size_t tmp_length, frm_length; + DBUG_ASSERT(fixed()); + + if (args[0]->result_type() != STRING_RESULT || + !(res= args[0]->val_str(&tmp_value))) + goto nl; + + /* + Use result_length if it was given in constructor + explicitly, otherwise calculate result length + from argument and "nweights". + */ + if (!(tmp_length= result_length)) + { + size_t char_length; + if (cs->state & MY_CS_STRNXFRM_BAD_NWEIGHTS) + { + /* + latin2_czech_cs and cp1250_czech_cs do not support + the "nweights" limit in strnxfrm(). Use the full length. + */ + char_length= res->length(); + } + else + { + /* + If we don't need to pad the result with spaces, then it should be + OK to calculate character length of the argument approximately: + "res->length() / cs->mbminlen" can return a number that is + bigger than the real number of characters in the string, so + we'll allocate a little bit more memory but avoid calling + the slow res->numchars(). + In case if we do need to pad with spaces, we call res->numchars() + to know the true number of characters. + */ + if (!(char_length= nweights)) + char_length= (weigth_flags & MY_STRXFRM_PAD_WITH_SPACE) ? + res->numchars() : (res->length() / cs->mbminlen); + } + tmp_length= cs->strnxfrmlen(char_length * cs->mbmaxlen); + } + + { + THD *thd= current_thd; + if (tmp_length > current_thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), + thd->variables.max_allowed_packet); + goto nl; + } + } + + if (str->alloc(tmp_length)) + goto nl; + + frm_length= cs->strnxfrm((char*) str->ptr(), tmp_length, + nweights ? nweights : (uint) tmp_length, + res->ptr(), res->length(), + weigth_flags); + DBUG_ASSERT(frm_length <= tmp_length); + + str->set_charset(&my_charset_bin); + str->length(frm_length); + null_value= 0; + return str; + +nl: + null_value= 1; + return 0; +} + + +void Item_func_weight_string::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + args[0]->print(str, query_type); + str->append(','); + str->append_ulonglong(result_length); + str->append(','); + str->append_ulonglong(nweights); + str->append(','); + str->append_ulonglong(weigth_flags); + str->append(')'); +} + + +String *Item_func_hex::val_str_ascii_from_val_real(String *str) +{ + ulonglong dec; + double val= args[0]->val_real(); + if ((null_value= args[0]->null_value)) + return 0; + if ((val <= (double) LONGLONG_MIN) || + (val >= (double) (ulonglong) ULONGLONG_MAX)) + dec= ~(longlong) 0; + else + dec= (ulonglong) (val + (val > 0 ? 0.5 : -0.5)); + return str->set_hex(dec) ? make_empty_result(str) : str; +} + + +String *Item_func_hex::val_str_ascii_from_val_str(String *str) +{ + DBUG_ASSERT(&tmp_value != str); + String *res= args[0]->val_str(&tmp_value); + DBUG_ASSERT(res != str); + if ((null_value= (res == NULL))) + return NULL; + return str->set_hex(res->ptr(), res->length()) ? make_empty_result(str) : str; +} + + +String *Item_func_hex::val_str_ascii_from_val_int(String *str) +{ + ulonglong dec= (ulonglong) args[0]->val_int(); + if ((null_value= args[0]->null_value)) + return 0; + return str->set_hex(dec) ? make_empty_result(str) : str; +} + + + /** Convert given hex string to a binary string. */ + +String *Item_func_unhex::val_str(String *str) +{ + const char *from, *end; + char *to; + String *res; + uint length; + DBUG_ASSERT(fixed()); + + res= args[0]->val_str(&tmp_value); + if (!res || str->alloc(length= (1+res->length())/2)) + { + null_value=1; + return 0; + } + + from= res->ptr(); + null_value= 0; + str->set_charset(&my_charset_bin); + str->length(length); + to= (char*) str->ptr(); + if (res->length() % 2) + { + int hex_char; + *to++= hex_char= hexchar_to_int(*from++); + if ((null_value= (hex_char == -1))) + return 0; + } + for (end=res->ptr()+res->length(); from < end ; from+=2, to++) + { + int hex_char1, hex_char2; + hex_char1= hexchar_to_int(from[0]); + hex_char2= hexchar_to_int(from[1]); + if ((null_value= (hex_char1 == -1 || hex_char2 == -1))) + return 0; + *to= (char) ((hex_char1 << 4) | hex_char2); + } + return str; +} + + +#ifndef DBUG_OFF +String *Item_func_like_range::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + longlong nbytes= args[1]->val_int(); + String *res= args[0]->val_str(str); + size_t min_len, max_len; + CHARSET_INFO *cs= collation.collation; + + if (!res || args[0]->null_value || args[1]->null_value || + nbytes < 0 || nbytes > MAX_BLOB_WIDTH || + min_str.alloc((size_t)nbytes) || max_str.alloc((size_t)nbytes)) + goto err; + null_value=0; + + if (cs->like_range(res->ptr(), res->length(), + '\\', '_', '%', (size_t)nbytes, + (char*) min_str.ptr(), (char*) max_str.ptr(), + &min_len, &max_len)) + goto err; + + min_str.set_charset(collation.collation); + max_str.set_charset(collation.collation); + min_str.length(min_len); + max_str.length(max_len); + + return is_min ? &min_str : &max_str; + +err: + null_value= 1; + return 0; +} +#endif + + +void Item_func_binary::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as binary)")); +} + + +#include // For my_stat + +String *Item_load_file::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *file_name; + File file; + MY_STAT stat_info; + char path[FN_REFLEN]; + ulonglong file_size; + DBUG_ENTER("load_file"); + + if (!(file_name= args[0]->val_str(str)) +#ifndef NO_EMBEDDED_ACCESS_CHECKS + || !(current_thd->security_ctx->master_access & FILE_ACL) +#endif + ) + goto err; + + (void) fn_format(path, file_name->c_ptr_safe(), mysql_real_data_home, "", + MY_RELATIVE_PATH | MY_UNPACK_FILENAME); + + /* Read only allowed from within dir specified by secure_file_priv */ + if (!is_secure_file_path(path)) + goto err; + + if (!mysql_file_stat(key_file_loadfile, path, &stat_info, MYF(0))) + goto err; + + if (!(stat_info.st_mode & S_IROTH)) + { + /* my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), file_name->c_ptr()); */ + goto err; + } + file_size= stat_info.st_size; + + { + THD *thd= current_thd; + if (file_size >= thd->variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), thd->variables.max_allowed_packet); + goto err; + } + } + if (tmp_value.alloc((ulong)file_size)) + goto err; + if ((file= mysql_file_open(key_file_loadfile, + file_name->ptr(), O_RDONLY, MYF(0))) < 0) + goto err; + if (mysql_file_read(file, (uchar*) tmp_value.ptr(), (size_t)stat_info.st_size, + MYF(MY_NABP))) + { + mysql_file_close(file, MYF(0)); + goto err; + } + tmp_value.length((uint32)stat_info.st_size); + mysql_file_close(file, MYF(0)); + null_value = 0; + DBUG_RETURN(&tmp_value); + +err: + null_value = 1; + DBUG_RETURN(0); +} + + +String* Item_func_export_set::val_str(String* str) +{ + DBUG_ASSERT(fixed()); + String yes_buf, no_buf, sep_buf; + const ulonglong the_set = (ulonglong) args[0]->val_int(); + const String *yes= args[1]->val_str(&yes_buf); + const String *no= args[2]->val_str(&no_buf); + const String *sep= NULL; + + uint num_set_values = 64; + str->length(0); + str->set_charset(collation.collation); + + /* Check if some argument is a NULL value */ + if (args[0]->null_value || args[1]->null_value || args[2]->null_value) + { + null_value= true; + return NULL; + } + /* + Arg count can only be 3, 4 or 5 here. This is guaranteed from the + grammar for EXPORT_SET() + */ + switch(arg_count) { + case 5: + num_set_values = (uint) args[4]->val_int(); + if (num_set_values > 64) + num_set_values=64; + if (args[4]->null_value) + { + null_value= true; + return NULL; + } + /* Fall through */ + case 4: + if (!(sep = args[3]->val_str(&sep_buf))) // Only true if NULL + { + null_value= true; + return NULL; + } + break; + case 3: + { + /* errors is not checked - assume "," can always be converted */ + uint errors; + sep_buf.copy(STRING_WITH_LEN(","), &my_charset_bin, + collation.collation, &errors); + sep = &sep_buf; + } + break; + default: + DBUG_ASSERT(0); // cannot happen + } + null_value= false; + + THD *thd= current_thd; + const ulong max_allowed_packet= thd->variables.max_allowed_packet; + const uint num_separators= num_set_values > 0 ? num_set_values - 1 : 0; + const ulonglong max_total_length= + num_set_values * MY_MAX(yes->length(), no->length()) + + num_separators * sep->length(); + + if (unlikely(max_total_length > max_allowed_packet)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), max_allowed_packet); + null_value= true; + return NULL; + } + + uint ix; + ulonglong mask; + for (ix= 0, mask=0x1; ix < num_set_values; ++ix, mask = (mask << 1)) + { + if (the_set & mask) + str->append(*yes); + else + str->append(*no); + if (ix != num_separators) + str->append(*sep); + } + return str; +} + +bool Item_func_export_set::fix_length_and_dec(THD *thd) +{ + uint32 length= MY_MAX(args[1]->max_char_length(), args[2]->max_char_length()); + uint32 sep_length= (arg_count > 3 ? args[3]->max_char_length() : 1); + + if (agg_arg_charsets_for_string_result(collation, + args + 1, MY_MIN(4, arg_count) - 1)) + return TRUE; + fix_char_length(length * 64 + sep_length * 63); + return FALSE; +} + + +#define get_esc_bit(mask, num) (1 & (*((mask) + ((num) >> 3))) >> ((num) & 7)) + +/** + QUOTE() function returns argument string in single quotes suitable for + using in a SQL statement. + + Adds a \\ before all characters that needs to be escaped in a SQL string. + We also escape '^Z' (END-OF-FILE in windows) to avoid problems when + running commands from a file in windows. + + This function is very useful when you want to generate SQL statements. + + @note + QUOTE(NULL) returns the string 'NULL' (4 letters, without quotes). + + @retval + str Quoted string + @retval + NULL Out of memory. +*/ + +String *Item_func_quote::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + /* + Bit mask that has 1 for set for the position of the following characters: + 0, \, ' and ^Z + */ + + static uchar escmask[32]= + { + 0x01, 0x00, 0x00, 0x04, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + + ulong max_allowed_packet= current_thd->variables.max_allowed_packet; + char *from, *to, *end, *start; + String *arg= args[0]->val_str(&tmp_value); + uint arg_length, new_length; + if (!arg) // Null argument + { + /* Return the string 'NULL' */ + str->copy(STRING_WITH_LEN("NULL"), collation.collation); + null_value= 0; + return str; + } + + arg_length= arg->length(); + + if (collation.collation->mbmaxlen == 1) + { + new_length= arg_length + 2; /* for beginning and ending ' signs */ + for (from= (char*) arg->ptr(), end= from + arg_length; from < end; from++) + new_length+= get_esc_bit(escmask, (uchar) *from); + if (new_length > max_allowed_packet) + goto toolong; + } + else + { + new_length= (arg_length * 2) + /* For string characters */ + (2 * collation.collation->mbmaxlen); /* For quotes */ + set_if_smaller(new_length, max_allowed_packet); + } + + if (str->alloc(new_length)) + goto null; + + if (collation.collation->mbmaxlen > 1) + { + CHARSET_INFO *cs= collation.collation; + int mblen; + uchar *to_end; + to= (char*) str->ptr(); + to_end= (uchar*) to + new_length; + + /* Put leading quote */ + if ((mblen= cs->wc_mb('\'', (uchar *) to, to_end)) <= 0) + goto toolong; + to+= mblen; + + for (start= (char*) arg->ptr(), end= start + arg_length; start < end; ) + { + my_wc_t wc; + bool escape; + if ((mblen= cs->mb_wc(&wc, (uchar*) start, (uchar*) end)) <= 0) + goto null; + start+= mblen; + switch (wc) { + case 0: escape= 1; wc= '0'; break; + case '\032': escape= 1; wc= 'Z'; break; + case '\'': escape= 1; break; + case '\\': escape= 1; break; + default: escape= 0; break; + } + if (escape) + { + if ((mblen= cs->wc_mb('\\', (uchar*) to, to_end)) <= 0) + goto toolong; + to+= mblen; + } + if ((mblen= cs->wc_mb(wc, (uchar*) to, to_end)) <= 0) + goto toolong; + to+= mblen; + } + + /* Put trailing quote */ + if ((mblen= cs->wc_mb('\'', (uchar *) to, to_end)) <= 0) + goto toolong; + to+= mblen; + new_length= (uint)(to - str->ptr()); + goto ret; + } + + /* + We replace characters from the end to the beginning + */ + to= (char*) str->ptr() + new_length - 1; + *to--= '\''; + for (start= (char*) arg->ptr(),end= start + arg_length; end-- != start; to--) + { + /* + We can't use the bitmask here as we want to replace \O and ^Z with 0 + and Z + */ + switch (*end) { + case 0: + *to--= '0'; + *to= '\\'; + break; + case '\032': + *to--= 'Z'; + *to= '\\'; + break; + case '\'': + case '\\': + *to--= *end; + *to= '\\'; + break; + default: + *to= *end; + break; + } + } + *to= '\''; + +ret: + str->length(new_length); + str->set_charset(collation.collation); + null_value= 0; + return str; + +toolong: + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(current_thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + func_name(), max_allowed_packet); +null: + null_value= 1; + return 0; +} + +longlong Item_func_uncompressed_length::val_int() +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&value); + if (!res) + { + null_value=1; + return 0; /* purecov: inspected */ + } + null_value=0; + if (res->is_empty()) return 0; + + /* + If length is <= 4 bytes, data is corrupt. This is the best we can do + to detect garbage input without decompressing it. + */ + if (res->length() <= 4) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_ZLIB_Z_DATA_ERROR, + ER_THD(thd, ER_ZLIB_Z_DATA_ERROR)); + null_value= 1; + return 0; + } + + /* + res->ptr() using is safe because we have tested that string is at least + 5 bytes long. + res->c_ptr() is not used because: + - we do not need \0 terminated string to get first 4 bytes + - c_ptr() tests simbol after string end (uninitialized memory) which + confuse valgrind + */ + return uint4korr(res->ptr()) & 0x3FFFFFFF; +} + +longlong Item_func_crc32::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1 || arg_count == 2); + String *res; + longlong crc; + if (arg_count > 1) + { + crc= args[0]->val_int(); + null_value= args[0]->null_value; + if (null_value) + return 0; + res= args[1]->val_str(&value); + } + else + { + crc= 0; + null_value= 0; + res= args[0]->val_str(&value); + } + + if (!res) + { + null_value=1; + return 0; /* purecov: inspected */ + } + + return static_cast + (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); +} + +#ifdef HAVE_COMPRESS +#include "zlib.h" + +String *Item_func_compress::val_str(String *str) +{ + int err= Z_OK, code; + size_t new_size; + String *res; + Byte *body; + char *tmp, *last_char; + DBUG_ASSERT(fixed()); + + if (!(res= args[0]->val_str(&tmp_value))) + { + null_value= 1; + return 0; + } + null_value= 0; + if (res->is_empty()) return res; + + /* + Citation from zlib.h (comment for compress function): + + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. + We assume here that the buffer can't grow more than .25 %. + */ + new_size= res->length() + res->length() / 5 + 12; + + // Check new_size overflow: new_size <= res->length() + if (((uint32) (new_size+5) <= res->length()) || + str->alloc((uint32) new_size + 4 + 1)) + { + null_value= 1; + return 0; + } + + body= ((Byte*)str->ptr()) + 4; + + // As far as we have checked res->is_empty() we can use ptr() + if ((err= my_compress_buffer(body, &new_size, (const uchar *)res->ptr(), + res->length())) != Z_OK) + { + THD *thd= current_thd; + code= err==Z_MEM_ERROR ? ER_ZLIB_Z_MEM_ERROR : ER_ZLIB_Z_BUF_ERROR; + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, code, + ER_THD(thd, code)); + null_value= 1; + return 0; + } + + tmp= (char*) str->ptr(); // int4store is a macro; avoid side effects + int4store(tmp, res->length() & 0x3FFFFFFF); + + /* This is to ensure that things works for CHAR fields, which trim ' ': */ + last_char= ((char*)body)+new_size-1; + if (*last_char == ' ') + { + *++last_char= '.'; + new_size++; + } + + str->length((uint32)new_size + 4); + str->set_charset(&my_charset_bin); + return str; +} + + +String *Item_func_uncompress::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + String *res= args[0]->val_str(&tmp_value); + ulong new_size; + int err; + uint code; + + if (!res) + goto err; + null_value= 0; + if (res->is_empty()) + return res; + + /* If length is less than 4 bytes, data is corrupt */ + if (res->length() <= 4) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_ZLIB_Z_DATA_ERROR, + ER_THD(thd, ER_ZLIB_Z_DATA_ERROR)); + goto err; + } + + /* Size of uncompressed data is stored as first 4 bytes of field */ + new_size= uint4korr(res->ptr()) & 0x3FFFFFFF; + if (new_size > current_thd->variables.max_allowed_packet) + { + THD *thd= current_thd; + push_warning_printf(thd,Sql_condition::WARN_LEVEL_WARN, + ER_TOO_BIG_FOR_UNCOMPRESS, + ER_THD(thd, ER_TOO_BIG_FOR_UNCOMPRESS), + static_cast(thd->variables. + max_allowed_packet)); + goto err; + } + if (str->alloc((uint32)new_size)) + goto err; + + if ((err= uncompress((Byte*)str->ptr(), &new_size, + ((const Bytef*)res->ptr())+4,res->length()-4)) == Z_OK) + { + str->length((uint32) new_size); + return str; + } + + code= ((err == Z_BUF_ERROR) ? ER_ZLIB_Z_BUF_ERROR : + ((err == Z_MEM_ERROR) ? ER_ZLIB_Z_MEM_ERROR : ER_ZLIB_Z_DATA_ERROR)); + { + THD *thd= current_thd; + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, code, ER_THD(thd, code)); + } + +err: + null_value= 1; + return 0; +} +#endif + + +Item_func_dyncol_create::Item_func_dyncol_create(THD *thd, List &args, + DYNCALL_CREATE_DEF *dfs): + Item_str_func(thd, args), defs(dfs), vals(0), keys_num(NULL), keys_str(NULL), + names(FALSE), force_names(FALSE) +{ + DBUG_ASSERT((args.elements & 0x1) == 0); // even number of arguments +} + + +bool Item_func_dyncol_create::fix_fields(THD *thd, Item **ref) +{ + uint i; + bool res= Item_func::fix_fields(thd, ref); // no need Item_str_func here + if (!res) + { + vals= (DYNAMIC_COLUMN_VALUE *) alloc_root(thd->mem_root, + sizeof(DYNAMIC_COLUMN_VALUE) * + (arg_count / 2)); + for (i= 0; + i + 1 < arg_count && args[i]->result_type() == INT_RESULT; + i+= 2) + ; + if (i + 1 < arg_count) + { + names= TRUE; + } + + keys_num= (uint *) alloc_root(thd->mem_root, + (sizeof(LEX_STRING) > sizeof(uint) ? + sizeof(LEX_STRING) : + sizeof(uint)) * + (arg_count / 2)); + keys_str= (LEX_STRING *) keys_num; + status_var_increment(thd->status_var.feature_dynamic_columns); + } + return res || vals == 0 || keys_num == 0; +} + + +bool Item_func_dyncol_create::fix_length_and_dec(THD *thd) +{ + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + collation.set(&my_charset_bin); + decimals= 0; + return FALSE; +} + +bool Item_func_dyncol_create::prepare_arguments(THD *thd, bool force_names_arg) +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + String *res, tmp(buff, sizeof(buff), &my_charset_bin); + uint column_count= (arg_count / 2); + uint i; + my_decimal dtmp, *dres; + force_names= force_names_arg; + + if (!(names || force_names)) + { + for (i= 0; i < column_count; i++) + { + uint valpos= i * 2 + 1; + DYNAMIC_COLUMN_TYPE type= defs[i].type; + if (type == DYN_COL_NULL) + type= args[valpos]->type_handler()->dyncol_type(args[valpos]); + if (type == DYN_COL_STRING && + args[valpos]->type() == Item::FUNC_ITEM && + ((Item_func *)args[valpos])->functype() == DYNCOL_FUNC) + { + force_names= 1; + break; + } + } + } + + /* get values */ + for (i= 0; i < column_count; i++) + { + uint valpos= i * 2 + 1; + DYNAMIC_COLUMN_TYPE type= defs[i].type; + if (type == DYN_COL_NULL) // auto detect + type= args[valpos]->type_handler()->dyncol_type(args[valpos]); + if (type == DYN_COL_STRING && + args[valpos]->type() == Item::FUNC_ITEM && + ((Item_func *)args[valpos])->functype() == DYNCOL_FUNC) + { + DBUG_ASSERT(names || force_names); + type= DYN_COL_DYNCOL; + } + if (names || force_names) + { + res= args[i * 2]->val_str(&tmp); + if (res) + { + // guaranty UTF-8 string for names + if (my_charset_same(res->charset(), DYNCOL_UTF)) + { + keys_str[i].length= res->length(); + keys_str[i].str= thd->strmake(res->ptr(), res->length()); + } + else + { + uint strlen= res->length() * DYNCOL_UTF->mbmaxlen + 1; + uint dummy_errors; + if (char *str= (char *) thd->alloc(strlen)) + { + keys_str[i].length= + copy_and_convert(str, strlen, DYNCOL_UTF, + res->ptr(), res->length(), res->charset(), + &dummy_errors); + keys_str[i].str= str; + } + else + keys_str[i].length= 0; + + } + } + else + { + keys_str[i].length= 0; + keys_str[i].str= NULL; + } + } + else + keys_num[i]= (uint) args[i * 2]->val_int(); + if (args[i * 2]->null_value) + { + /* to make cleanup possible */ + for (; i < column_count; i++) + vals[i].type= DYN_COL_NULL; + return 1; + } + vals[i].type= type; + switch (type) { + case DYN_COL_NULL: + DBUG_ASSERT(args[valpos]->field_type() == MYSQL_TYPE_NULL); + break; + case DYN_COL_INT: + vals[i].x.long_value= args[valpos]->val_int(); + break; + case DYN_COL_UINT: + vals[i].x.ulong_value= args[valpos]->val_int(); + break; + case DYN_COL_DOUBLE: + vals[i].x.double_value= args[valpos]->val_real(); + break; + case DYN_COL_DYNCOL: + case DYN_COL_STRING: + res= args[valpos]->val_str(&tmp); + if (res && defs[i].cs) + res->set_charset(defs[i].cs); + if (res && + (vals[i].x.string.value.str= thd->strmake(res->ptr(), res->length()))) + { + vals[i].x.string.value.length= res->length(); + vals[i].x.string.charset= res->charset(); + } + else + { + args[valpos]->null_value= 1; // In case of out of memory + vals[i].x.string.value.str= NULL; + vals[i].x.string.value.length= 0; // just to be safe + } + break; + case DYN_COL_DECIMAL: + if ((dres= args[valpos]->val_decimal(&dtmp))) + { + mariadb_dyncol_prepare_decimal(&vals[i]); + DBUG_ASSERT(vals[i].x.decimal.value.len == dres->len); + vals[i].x.decimal.value.intg= dres->intg; + vals[i].x.decimal.value.frac= dres->frac; + vals[i].x.decimal.value.sign= dres->sign(); + memcpy(vals[i].x.decimal.buffer, dres->buf, + sizeof(vals[i].x.decimal.buffer)); + } + else + { + mariadb_dyncol_prepare_decimal(&vals[i]); // just to be safe + DBUG_ASSERT(args[valpos]->null_value); + } + break; + case DYN_COL_DATETIME: + case DYN_COL_DATE: + args[valpos]->get_date(thd, &vals[i].x.time_value, + Datetime::Options(thd)); + break; + case DYN_COL_TIME: + args[valpos]->get_time(thd, &vals[i].x.time_value); + break; + default: + DBUG_ASSERT(0); + vals[i].type= DYN_COL_NULL; + } + if (vals[i].type != DYN_COL_NULL && args[valpos]->null_value) + { + vals[i].type= DYN_COL_NULL; + } + } + return FALSE; +} + + +String *Item_func_dyncol_create::val_str(String *str) +{ + DYNAMIC_COLUMN col; + String *res; + uint column_count= (arg_count / 2); + enum enum_dyncol_func_result rc; + DBUG_ASSERT((arg_count & 0x1) == 0); // even number of arguments + + /* FIXME: add thd argument to Item::val_str() */ + if (prepare_arguments(current_thd, FALSE)) + { + res= NULL; + null_value= 1; + } + else + { + if ((rc= ((names || force_names) ? + mariadb_dyncol_create_many_named(&col, column_count, keys_str, + vals, TRUE) : + mariadb_dyncol_create_many_num(&col, column_count, keys_num, + vals, TRUE)))) + { + dynamic_column_error_message(rc); + mariadb_dyncol_free(&col); + res= NULL; + null_value= TRUE; + } + else + { + /* Move result from DYNAMIC_COLUMN to str_value */ + char *ptr; + size_t length, alloc_length; + dynstr_reassociate(&col, &ptr, &length, &alloc_length); + str_value.reset(ptr, length, alloc_length, &my_charset_bin); + res= &str_value; + null_value= FALSE; + } + } + + return res; +} + +void Item_func_dyncol_create::print_arguments(String *str, + enum_query_type query_type) +{ + uint i; + uint column_count= (arg_count / 2); + for (i= 0; i < column_count; i++) + { + args[i*2]->print(str, query_type); + str->append(','); + args[i*2 + 1]->print(str, query_type); + switch (defs[i].type) { + case DYN_COL_NULL: // automatic type => write nothing + break; + case DYN_COL_INT: + str->append(STRING_WITH_LEN(" AS int")); + break; + case DYN_COL_UINT: + str->append(STRING_WITH_LEN(" AS unsigned int")); + break; + case DYN_COL_DOUBLE: + str->append(STRING_WITH_LEN(" AS double")); + break; + case DYN_COL_DYNCOL: + case DYN_COL_STRING: + str->append(STRING_WITH_LEN(" AS char")); + if (defs[i].cs) + { + str->append(STRING_WITH_LEN(" charset ")); + str->append(defs[i].cs->cs_name); + str->append(' '); + } + break; + case DYN_COL_DECIMAL: + str->append(STRING_WITH_LEN(" AS decimal")); + break; + case DYN_COL_DATETIME: + str->append(STRING_WITH_LEN(" AS datetime")); + break; + case DYN_COL_DATE: + str->append(STRING_WITH_LEN(" AS date")); + break; + case DYN_COL_TIME: + str->append(STRING_WITH_LEN(" AS time")); + break; + } + if (i < column_count - 1) + str->append(','); + } +} + + +void Item_func_dyncol_create::print(String *str, + enum_query_type query_type) +{ + DBUG_ASSERT((arg_count & 0x1) == 0); // even number of arguments + str->append(STRING_WITH_LEN("column_create(")); + print_arguments(str, query_type); + str->append(')'); +} + +String *Item_func_dyncol_json::val_str(String *str) +{ + DYNAMIC_STRING json, col; + String *res; + enum enum_dyncol_func_result rc; + + res= args[0]->val_str(str); + if (args[0]->null_value) + goto null; + + col.str= (char *)res->ptr(); + col.length= res->length(); + if ((rc= mariadb_dyncol_json(&col, &json))) + { + dynamic_column_error_message(rc); + goto null; + } + bzero(&col, sizeof(col)); + { + /* Move result from DYNAMIC_COLUMN to str */ + char *ptr; + size_t length, alloc_length; + dynstr_reassociate(&json, &ptr, &length, &alloc_length); + str->reset(ptr, length, alloc_length, DYNCOL_UTF); + null_value= FALSE; + } + str->set_charset(DYNCOL_UTF); + return str; + +null: + bzero(&col, sizeof(col)); + null_value= TRUE; + return NULL; +} + +String *Item_func_dyncol_add::val_str(String *str) +{ + DYNAMIC_COLUMN col; + String *res; + uint column_count= (arg_count / 2); + enum enum_dyncol_func_result rc; + DBUG_ASSERT((arg_count & 0x1) == 1); // odd number of arguments + + /* We store the packed data last */ + res= args[arg_count - 1]->val_str(str); + if (args[arg_count - 1]->null_value || + init_dynamic_string(&col, NULL, res->length() + STRING_BUFFER_USUAL_SIZE, + STRING_BUFFER_USUAL_SIZE)) + goto null; + + col.length= res->length(); + memcpy(col.str, res->ptr(), col.length); + + /* FIXME: add thd argument to Item::val_str() */ + if (prepare_arguments(current_thd, mariadb_dyncol_has_names(&col))) + goto null; + + if ((rc= ((names || force_names) ? + mariadb_dyncol_update_many_named(&col, column_count, + keys_str, vals) : + mariadb_dyncol_update_many_num(&col, column_count, + keys_num, vals)))) + { + dynamic_column_error_message(rc); + mariadb_dyncol_free(&col); + goto null; + } + + { + /* Move result from DYNAMIC_COLUMN to str */ + char *ptr; + size_t length, alloc_length; + dynstr_reassociate(&col, &ptr, &length, &alloc_length); + str->reset(ptr, length, alloc_length, &my_charset_bin); + null_value= FALSE; + } + + return str; + +null: + null_value= TRUE; + return NULL; +} + + +void Item_func_dyncol_add::print(String *str, + enum_query_type query_type) +{ + DBUG_ASSERT((arg_count & 0x1) == 1); // odd number of arguments + str->append(STRING_WITH_LEN("column_add(")); + args[arg_count - 1]->print(str, query_type); + str->append(','); + print_arguments(str, query_type); + str->append(')'); +} + + +/** + Get value for a column stored in a dynamic column + + @notes + This function ensures that null_value is set correctly +*/ + +bool Item_dyncol_get::get_dyn_value(THD *thd, DYNAMIC_COLUMN_VALUE *val, + String *tmp) +{ + DYNAMIC_COLUMN dyn_str; + String *res; + longlong num= 0; + LEX_STRING buf, *name= NULL; + char nmstrbuf[11]; + String nmbuf(nmstrbuf, sizeof(nmstrbuf), system_charset_info); + enum enum_dyncol_func_result rc; + + if (args[1]->result_type() == INT_RESULT) + num= args[1]->val_int(); + else + { + String *nm= args[1]->val_str(&nmbuf); + if (!nm || args[1]->null_value) + { + null_value= 1; + return 1; + } + + if (my_charset_same(nm->charset(), DYNCOL_UTF)) + { + buf.str= (char *) nm->ptr(); + buf.length= nm->length(); + } + else + { + uint strlen= nm->length() * DYNCOL_UTF->mbmaxlen + 1; + uint dummy_errors; + buf.str= (char *) thd->alloc(strlen); + if (buf.str) + { + buf.length= + copy_and_convert(buf.str, strlen, DYNCOL_UTF, + nm->ptr(), nm->length(), nm->charset(), + &dummy_errors); + } + else + buf.length= 0; + } + name= &buf; + } + + + if (args[1]->null_value || num < 0 || num > INT_MAX) + { + null_value= 1; + return 1; + } + + res= args[0]->val_str(tmp); + if (args[0]->null_value) + { + null_value= 1; + return 1; + } + + dyn_str.str= (char*) res->ptr(); + dyn_str.length= res->length(); + if ((rc= ((name == NULL) ? + mariadb_dyncol_get_num(&dyn_str, (uint) num, val) : + mariadb_dyncol_get_named(&dyn_str, name, val)))) + { + dynamic_column_error_message(rc); + null_value= 1; + return 1; + } + + null_value= 0; + return 0; // ok +} + + +String *Item_dyncol_get::val_str(String *str_result) +{ + DYNAMIC_COLUMN_VALUE val; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), &my_charset_bin); + + if (get_dyn_value(current_thd, &val, &tmp)) + return NULL; + + switch (val.type) { + case DYN_COL_NULL: + goto null; + case DYN_COL_INT: + case DYN_COL_UINT: + str_result->set_int(val.x.long_value, MY_TEST(val.type == DYN_COL_UINT), + &my_charset_latin1); + break; + case DYN_COL_DOUBLE: + str_result->set_real(val.x.double_value, NOT_FIXED_DEC, &my_charset_latin1); + break; + case DYN_COL_DYNCOL: + case DYN_COL_STRING: + if ((char*) tmp.ptr() <= val.x.string.value.str && + (char*) tmp.ptr() + tmp.length() >= val.x.string.value.str) + { + /* value is allocated in tmp buffer; We have to make a copy */ + str_result->copy(val.x.string.value.str, val.x.string.value.length, + val.x.string.charset); + } + else + { + /* + It's safe to use the current value because it's either pointing + into a field or in a buffer for another item and this buffer + is not going to be deleted during expression evaluation + */ + str_result->set(val.x.string.value.str, val.x.string.value.length, + val.x.string.charset); + } + break; + case DYN_COL_DECIMAL: + { + int res; + int length= decimal_string_size(&val.x.decimal.value); + if (str_result->alloc(length)) + goto null; + if ((res= decimal2string(&val.x.decimal.value, (char*) str_result->ptr(), + &length, 0, 0, ' ')) != E_DEC_OK) + { + char buff[40]; + int len= sizeof(buff); + DBUG_ASSERT(length < (int)sizeof(buff)); + decimal2string(&val.x.decimal.value, buff, &len, 0, 0, ' '); + decimal_operation_results(res, buff, "CHAR"); + } + str_result->set_charset(&my_charset_latin1); + str_result->length(length); + break; + } + case DYN_COL_DATETIME: + case DYN_COL_DATE: + case DYN_COL_TIME: + { + int length; + /* + We use AUTO_SEC_PART_DIGITS here to ensure that we do not loose + any microseconds from the data. This is safe to do as we are + asked to return the time argument as a string. + */ + if (str_result->alloc(MAX_DATE_STRING_REP_LENGTH) || + !(length= my_TIME_to_str(&val.x.time_value, (char*) str_result->ptr(), + AUTO_SEC_PART_DIGITS))) + goto null; + str_result->set_charset(&my_charset_latin1); + str_result->length(length); + break; + } + } + return str_result; + +null: + null_value= TRUE; + return 0; +} + + +longlong Item_dyncol_get::val_int() +{ + THD *thd= current_thd; + DYNAMIC_COLUMN_VALUE val; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), &my_charset_bin); + + if (get_dyn_value(thd, &val, &tmp)) + return 0; + + switch (val.type) { + case DYN_COL_DYNCOL: + case DYN_COL_NULL: + goto null; + case DYN_COL_UINT: + unsigned_flag= 1; // Make it possible for caller to detect sign + return val.x.long_value; + case DYN_COL_INT: + unsigned_flag= 0; // Make it possible for caller to detect sign + return val.x.long_value; + case DYN_COL_DOUBLE: + return Converter_double_to_longlong_with_warn(thd, val.x.double_value, + unsigned_flag).result(); + case DYN_COL_STRING: + { + int error; + longlong num; + char *end= val.x.string.value.str + val.x.string.value.length, *org_end= end; + + num= my_strtoll10(val.x.string.value.str, &end, &error); + if (unlikely(end != org_end || error > 0)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BAD_DATA, + ER_THD(thd, ER_BAD_DATA), + ErrConvString(val.x.string.value.str, + val.x.string.value.length, + val.x.string.charset).ptr(), + unsigned_flag ? "UNSIGNED INT" : "INT"); + } + unsigned_flag= error >= 0; + return num; + } + case DYN_COL_DECIMAL: + { + longlong num; + my_decimal2int(E_DEC_FATAL_ERROR, &val.x.decimal.value, unsigned_flag, + &num); + return num; + } + case DYN_COL_DATETIME: + case DYN_COL_DATE: + case DYN_COL_TIME: + unsigned_flag= !val.x.time_value.neg; + if (unsigned_flag) + return TIME_to_ulonglong(&val.x.time_value); + else + return -(longlong)TIME_to_ulonglong(&val.x.time_value); + } + +null: + null_value= TRUE; + return 0; +} + + +double Item_dyncol_get::val_real() +{ + THD *thd= current_thd; + DYNAMIC_COLUMN_VALUE val; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), &my_charset_bin); + + if (get_dyn_value(thd, &val, &tmp)) + return 0.0; + + switch (val.type) { + case DYN_COL_DYNCOL: + case DYN_COL_NULL: + goto null; + case DYN_COL_UINT: + return ulonglong2double(val.x.ulong_value); + case DYN_COL_INT: + return (double) val.x.long_value; + case DYN_COL_DOUBLE: + return (double) val.x.double_value; + case DYN_COL_STRING: + { + int error; + char *end; + double res= val.x.string.charset->strntod((char*) val.x.string.value.str, + val.x.string.value.length, &end, &error); + + if (end != (char*) val.x.string.value.str + val.x.string.value.length || + error) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BAD_DATA, + ER_THD(thd, ER_BAD_DATA), + ErrConvString(val.x.string.value.str, + val.x.string.value.length, + val.x.string.charset).ptr(), + "DOUBLE"); + } + return res; + } + case DYN_COL_DECIMAL: + { + double res; + /* This will always succeed */ + decimal2double(&val.x.decimal.value, &res); + return res; + } + case DYN_COL_DATETIME: + case DYN_COL_DATE: + case DYN_COL_TIME: + return TIME_to_double(&val.x.time_value); + } + +null: + null_value= TRUE; + return 0.0; +} + + +my_decimal *Item_dyncol_get::val_decimal(my_decimal *decimal_value) +{ + THD *thd= current_thd; + DYNAMIC_COLUMN_VALUE val; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), &my_charset_bin); + + if (get_dyn_value(thd, &val, &tmp)) + return NULL; + + switch (val.type) { + case DYN_COL_DYNCOL: + case DYN_COL_NULL: + goto null; + case DYN_COL_UINT: + int2my_decimal(E_DEC_FATAL_ERROR, val.x.long_value, TRUE, decimal_value); + break; + case DYN_COL_INT: + int2my_decimal(E_DEC_FATAL_ERROR, val.x.long_value, FALSE, decimal_value); + break; + case DYN_COL_DOUBLE: + double2my_decimal(E_DEC_FATAL_ERROR, val.x.double_value, decimal_value); + break; + case DYN_COL_STRING: + { + const char *end; + int rc; + rc= str2my_decimal(0, val.x.string.value.str, val.x.string.value.length, + val.x.string.charset, decimal_value, &end); + if (rc != E_DEC_OK || + end != val.x.string.value.str + val.x.string.value.length) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BAD_DATA, + ER_THD(thd, ER_BAD_DATA), + ErrConvString(val.x.string.value.str, + val.x.string.value.length, + val.x.string.charset).ptr(), + "DECIMAL"); + } + break; + } + case DYN_COL_DECIMAL: + decimal2my_decimal(&val.x.decimal.value, decimal_value); + break; + case DYN_COL_DATETIME: + case DYN_COL_DATE: + case DYN_COL_TIME: + decimal_value= TIME_to_my_decimal(&val.x.time_value, decimal_value); + break; + } + return decimal_value; + +null: + null_value= TRUE; + return 0; +} + + +bool Item_dyncol_get::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DYNAMIC_COLUMN_VALUE val; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), &my_charset_bin); + bool signed_value= 0; + + if (get_dyn_value(current_thd, &val, &tmp)) + return 1; // Error + + switch (val.type) { + case DYN_COL_DYNCOL: + case DYN_COL_NULL: + goto null; + case DYN_COL_INT: + signed_value= 1; // For error message + /* fall through */ + case DYN_COL_UINT: + if (signed_value || val.x.ulong_value <= LONGLONG_MAX) + { + longlong llval = (longlong)val.x.ulong_value; + if (int_to_datetime_with_warn(thd, Longlong_hybrid(llval, !signed_value), + ltime, fuzzydate, 0, 0 /* TODO */)) + goto null; + return 0; + } + /* let double_to_datetime_with_warn() issue the warning message */ + val.x.double_value= static_cast(ULONGLONG_MAX); + /* fall through */ + case DYN_COL_DOUBLE: + if (double_to_datetime_with_warn(thd, val.x.double_value, ltime, fuzzydate, + 0, 0 /* TODO */)) + goto null; + return 0; + case DYN_COL_DECIMAL: + if (decimal_to_datetime_with_warn(thd, (my_decimal*)&val.x.decimal.value, + ltime, fuzzydate, 0, 0 /* TODO */)) + goto null; + return 0; + case DYN_COL_STRING: + if (str_to_datetime_with_warn(thd, &my_charset_numeric, + val.x.string.value.str, + val.x.string.value.length, + ltime, fuzzydate)) + goto null; + return 0; + case DYN_COL_DATETIME: + case DYN_COL_DATE: + case DYN_COL_TIME: + *ltime= val.x.time_value; + return 0; + } + +null: + null_value= TRUE; + return 1; +} + +void Item_dyncol_get::print(String *str, enum_query_type query_type) +{ + /* + Parent cast doesn't exist yet, only print dynamic column name. This happens + when called from create_func_cast() / wrong_precision_error(). + */ + if (!str->length()) + { + args[1]->print(str, query_type); + return; + } + + /* see create_func_dyncol_get */ + DBUG_ASSERT(str->length() >= 5); + DBUG_ASSERT(strncmp(str->ptr() + str->length() - 5, "cast(", 5) == 0); + + str->length(str->length() - 5); // removing "cast(" + str->append(STRING_WITH_LEN("column_get(")); + args[0]->print(str, query_type); + str->append(','); + args[1]->print(str, query_type); + /* let the parent cast item add " as )" */ +} + + +String *Item_func_dyncol_list::val_str(String *str) +{ + uint i; + enum enum_dyncol_func_result rc; + LEX_STRING *names= 0; + uint count; + DYNAMIC_COLUMN col; + String *res= args[0]->val_str(str); + + if (args[0]->null_value) + goto null; + col.length= res->length(); + /* We do not change the string, so could do this trick */ + col.str= (char *)res->ptr(); + if ((rc= mariadb_dyncol_list_named(&col, &count, &names))) + { + bzero(&col, sizeof(col)); + dynamic_column_error_message(rc); + goto null; + } + bzero(&col, sizeof(col)); + + /* + We estimate average name length as 10 + */ + if (str->alloc(count * 13)) + goto null; + + str->length(0); + for (i= 0; i < count; i++) + { + append_identifier(current_thd, str, names[i].str, names[i].length); + if (i < count - 1) + str->qs_append(','); + } + null_value= FALSE; + if (names) + my_free(names); + str->set_charset(DYNCOL_UTF); + return str; + +null: + null_value= TRUE; + if (names) + my_free(names); + return NULL; +} + +Item_temptable_rowid::Item_temptable_rowid(TABLE *table_arg) + : Item_str_func(table_arg->in_use), table(table_arg) +{ + max_length= table->file->ref_length; +} + +bool Item_temptable_rowid::fix_length_and_dec(THD *thd) +{ + used_tables_cache= table->map; + const_item_cache= false; + return FALSE; +} + +String *Item_temptable_rowid::val_str(String *str) +{ + if (!((null_value= table->null_row))) + table->file->position(table->record[0]); + str_value.set((char*)(table->file->ref), max_length, &my_charset_bin); + return &str_value; +} + +/** + Helper routine to encode length prefix + in natsort_encode_numeric_string(). + + The idea is so that bigger input numbers correspond + lexicographically bigger output strings. + + Note, that in real use the number would typically + small, as it only computes variable *length prefixes*. + + @param[in] n - the number + @param[in] s - output string + + @return - length of encoding + + Here is how encoding works + + - n is from 0 to 8 + Output string calculated as '0'+n (range '0' - '8') + + - n is from 9 to 17 + Output calculated as concat('9', '0' + n -9)' + Output range: '90'-'98' + + -n is from 18 to 26 + Output calculated as concat('99', '0' + n -18)' + Output range '990'-'998' + + - n is from 27 to SIZE_T_MAX + Output starts with '999', + then log10(n) is encoded as 2-digit decimal number + then the number itself is added. + Example : for 28 key is concat('999', '01' , '28') + i.e '9990128' + + Key length is 5 + ceil(log10(n)) + + Output range is + (64bit)'9990128' - '9991918446744073709551615' + (32bit)'9990128' - '999094294967295' +*/ + +/* Largest length of encoded string.*/ +static size_t natsort_encode_length_max(size_t n) +{ + return (n < 27) ? n/9+1 : 26; +} + +static void natsort_encode_length(size_t n, String* out) +{ + if (n < 27) + { + if (n >= 9) + out->fill(out->length() + n/9,'9'); + out->append(char(n % 9 + '0')); + return; + } + + size_t log10n= 0; + for (size_t tmp= n / 10; tmp; tmp/= 10) + log10n++; + out->fill(out->length() + 3, '9'); + out->append('0' + (char) (log10n / 10)); + out->append('0' + (char) (log10n % 10)); + out->append_ulonglong(n); +} + +enum class NATSORT_ERR +{ + SUCCESS= 0, + KEY_TOO_LARGE= 1, + ALLOC_ERROR= 2 +}; + +/* + Encode numeric string for natural sorting. + + @param[in] in - start of the numeric string + skipping leading zeros + + @param[in] n_digits - length of the string, + in characters, not counting leading zeros. + + @param[out] out - String to write to. The string should + have enough preallocated space to fit the encoded key. + + @return + NATSORT_ERR::SUCCESS - success + NATSORT_ERR::KEY_TOO_LARGE - out string does not have enough + space left to accomodate the key. + + + The resulting encoding of the numeric string is then + + CONCAT(natsort_encode_length(n_digits), in) +*/ +static NATSORT_ERR natsort_encode_numeric_string(const char *in, + size_t n_digits, + String *out) +{ + DBUG_ASSERT(in); + DBUG_ASSERT(n_digits); + + if (out->length() + natsort_encode_length_max(n_digits - 1) + n_digits > + out->alloced_length()) + return NATSORT_ERR::KEY_TOO_LARGE; + + natsort_encode_length(n_digits - 1, out); + out->append(in, n_digits); + return NATSORT_ERR::SUCCESS; +} + +/* + Calculate max size of the natsort key. + + A digit in string expands to 2 chars length_prefix , and the digit + + With even length L=2N, the largest key corresponds to input string + in form REPEAT(,N) and the length of a key is + 2N + N = 3N + + With odd input length L=2N+1, largest key is built by appending + a digit at the end, with key length 3N+2 + +*/ +static size_t natsort_max_key_size(size_t input_size) +{ + return input_size + (input_size + 1)/2 ; +} + +/** + Convert a string to natural sort key. + @param[in] in - input string + @param[out] out - output string + @param[in] max_key_size - the maximum size of the output + key, in bytes. + @return NATSORT_ERR::SUCCESS - successful completion + NATSORT_ERR::ALLOC_ERROR - memory allocation error + NATSORT_ERR::KEY_TOO_LARGE - resulting key would exceed max_key_size +*/ +static NATSORT_ERR to_natsort_key(const String *in, String *out, + size_t max_key_size) +{ + size_t n_digits= 0; + size_t n_lead_zeros= 0; + size_t num_start; + size_t reserve_length= std::min( + natsort_max_key_size(in->length()) + MAX_BIGINT_WIDTH + 2, max_key_size); + + out->length(0); + out->set_charset(in->charset()); + + if (out->alloc((uint32) reserve_length)) + return NATSORT_ERR::ALLOC_ERROR; + + for (size_t pos= 0;; pos++) + { + char c= pos < in->length() ? (*in)[pos] : 0; + bool is_digit= (c >= '0' && c <= '9'); + if (!is_digit && (n_digits || n_lead_zeros)) + { + /* Handle end of digits run.*/ + if (!n_digits) + { + /*We only have zeros.*/ + n_lead_zeros--; + num_start= pos - 1; + n_digits= 1; + } + NATSORT_ERR err= natsort_encode_numeric_string( + in->ptr() + num_start, n_digits, out); + if (err != NATSORT_ERR::SUCCESS) + return err; + + /* Reset state.*/ + n_digits= 0; + num_start= size_t(-1); + n_lead_zeros= 0; + } + + if (pos == in->length()) + break; + + if (!is_digit) + { + if (out->length() == max_key_size) + return NATSORT_ERR::KEY_TOO_LARGE; + out->append(c); + } + else if (c == '0' && !n_digits) + n_lead_zeros++; + else if (!n_digits++) + num_start= pos; + } + return NATSORT_ERR::SUCCESS; +} + +String *Item_func_natural_sort_key::val_str(String *out) +{ + String *in= args[0]->val_str(); + if (args[0]->null_value || !in) + { + null_value= true; + return nullptr; + } + NATSORT_ERR err= NATSORT_ERR::SUCCESS; + CHARSET_INFO *cs= in->charset(); + ulong max_allowed_packet= current_thd->variables.max_allowed_packet; + uint errs; + String tmp; + /* + to_natsort_key() only support charsets where digits are represented by + a single byte in range 0x30-0x39. Almost everything is OK, just utf16/32 + won't do. Full ASCII compatibility is not required, so that SJIS and SWE7 + are fine. + */ + if (cs->mbminlen != 1) + { + if (tmp.copy(in, &my_charset_utf8mb4_bin, &errs)) + goto error_exit; + in= &tmp; + } + + err= to_natsort_key(in, out, max_allowed_packet / cs->mbminlen); + + if (err != NATSORT_ERR::SUCCESS) + { + if (err == NATSORT_ERR::KEY_TOO_LARGE) + { + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(), + max_allowed_packet); + } + goto error_exit; + } + + if (cs->mbminlen != 1) + { + /* output string is now utf8, convert to input charset.*/ + if (tmp.copy(out, cs, &errs) || out->copy(tmp)) + goto error_exit; + } + null_value= false; + return out; + +error_exit: + null_value= true; + return nullptr; +} + +bool Item_func_natural_sort_key::fix_length_and_dec(THD *thd) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return true; + DBUG_ASSERT(collation.collation != NULL); + uint32 max_char_len= + (uint32) natsort_max_key_size(args[0]->max_char_length()); + fix_char_length(max_char_len); + + set_maybe_null(args[0]->maybe_null() || + max_char_len * collation.collation->mbmaxlen > + current_thd->variables.max_allowed_packet); + return false; +} + +/** + Disable use in stored virtual functions. Temporarily(?), until + the encoding is stable. +*/ +bool Item_func_natural_sort_key::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function(func_name(), "()", arg, + VCOL_NON_DETERMINISTIC); +} + +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_server_state.h" +/* Format is %d-%d-%llu */ +#define WSREP_MAX_WSREP_SERVER_GTID_STR_LEN 10+1+10+1+20 + +String *Item_func_wsrep_last_written_gtid::val_str_ascii(String *str) +{ + if (gtid_str.alloc(WSREP_MAX_WSREP_SERVER_GTID_STR_LEN+1)) + { + my_error(ER_OUTOFMEMORY, WSREP_MAX_WSREP_SERVER_GTID_STR_LEN); + null_value= TRUE; + return 0; + } + + ssize_t gtid_len= my_snprintf((char*)gtid_str.ptr(), + WSREP_MAX_WSREP_SERVER_GTID_STR_LEN+1, + "%u-%u-%llu", wsrep_gtid_server.domain_id, + wsrep_gtid_server.server_id, + current_thd->wsrep_last_written_gtid_seqno); + if (gtid_len < 0) + { + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), func_name(), + "wsrep_gtid_print failed"); + null_value= TRUE; + return 0; + } + gtid_str.length(gtid_len); + return >id_str; +} + +String *Item_func_wsrep_last_seen_gtid::val_str_ascii(String *str) +{ + if (gtid_str.alloc(WSREP_MAX_WSREP_SERVER_GTID_STR_LEN+1)) + { + my_error(ER_OUTOFMEMORY, WSREP_MAX_WSREP_SERVER_GTID_STR_LEN); + null_value= TRUE; + return 0; + } + ssize_t gtid_len= my_snprintf((char*)gtid_str.ptr(), + WSREP_MAX_WSREP_SERVER_GTID_STR_LEN+1, + "%u-%u-%llu", wsrep_gtid_server.domain_id, + wsrep_gtid_server.server_id, + wsrep_gtid_server.seqno()); + if (gtid_len < 0) + { + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), func_name(), + "wsrep_gtid_print failed"); + null_value= TRUE; + return 0; + } + gtid_str.length(gtid_len); + return >id_str; +} + +longlong Item_func_wsrep_sync_wait_upto::val_int() +{ + String *gtid_str __attribute__((unused)) = args[0]->val_str(&value); + null_value=0; + uint timeout; + rpl_gtid *gtid_list; + uint32 count; + int wait_gtid_ret= 0; + int ret= 1; + + if (args[0]->null_value) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), func_name()); + null_value= TRUE; + return 0; + } + + if (arg_count==2 && !args[1]->null_value) + timeout= (uint)(args[1]->val_real()); + else + timeout= (uint)-1; + + if (!(gtid_list= gtid_parse_string_to_list(gtid_str->ptr(), gtid_str->length(), + &count))) + { + my_error(ER_INCORRECT_GTID_STATE, MYF(0), func_name()); + null_value= TRUE; + return 0; + } + if (count == 1) + { + if (wsrep_check_gtid_seqno(gtid_list[0].domain_id, gtid_list[0].server_id, + gtid_list[0].seq_no)) + { + wait_gtid_ret= wsrep_gtid_server.wait_gtid_upto(gtid_list[0].seq_no, timeout); + if ((wait_gtid_ret == ETIMEDOUT) || (wait_gtid_ret == ETIME)) + { + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0), func_name()); + ret= 0; + } + else if (wait_gtid_ret == ENOMEM) + { + my_error(ER_OUTOFMEMORY, MYF(0), func_name()); + ret= 0; + } + } + } + else + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), func_name()); + null_value= TRUE; + ret= 0; + } + my_free(gtid_list); + return ret; +} + +#endif /* WITH_WSREP */ diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h new file mode 100644 index 00000000..3a3c5338 --- /dev/null +++ b/sql/item_strfunc.h @@ -0,0 +1,2288 @@ +#ifndef ITEM_STRFUNC_INCLUDED +#define ITEM_STRFUNC_INCLUDED + +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* This file defines all string functions */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +extern size_t username_char_length; + +class Item_str_func :public Item_func +{ +protected: + /** + Sets the result value of the function an empty string, using the current + character set. No memory is allocated. + @retval A pointer to the str_value member. + */ + virtual String *make_empty_result(String *str) + { + /* + Reset string length to an empty string. We don't use str_value.set() as + we don't want to free and potentially have to reallocate the buffer + for each call. + */ + if (!str->is_alloced()) + str->set("", 0, collation.collation); /* Avoid null ptrs */ + else + { + str->length(0); /* Reuse allocated area */ + str->set_charset(collation.collation); + } + return str; + } +public: + Item_str_func(THD *thd): Item_func(thd) { decimals=NOT_FIXED_DEC; } + Item_str_func(THD *thd, Item *a): Item_func(thd, a) {decimals=NOT_FIXED_DEC; } + Item_str_func(THD *thd, Item *a, Item *b): + Item_func(thd, a, b) { decimals=NOT_FIXED_DEC; } + Item_str_func(THD *thd, Item *a, Item *b, Item *c): + Item_func(thd, a, b, c) { decimals=NOT_FIXED_DEC; } + Item_str_func(THD *thd, Item *a, Item *b, Item *c, Item *d): + Item_func(thd, a, b, c, d) { decimals=NOT_FIXED_DEC; } + Item_str_func(THD *thd, Item *a, Item *b, Item *c, Item *d, Item* e): + Item_func(thd, a, b, c, d, e) { decimals=NOT_FIXED_DEC; } + Item_str_func(THD *thd, List &list): + Item_func(thd, list) { decimals=NOT_FIXED_DEC; } + longlong val_int() override; + double val_real() override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_string(thd, ltime, fuzzydate); } + const Type_handler *type_handler() const override + { return string_type_handler(); } + void left_right_max_length(); + bool fix_fields(THD *thd, Item **ref) override; +}; + + + +/* + Functions that return values with ASCII repertoire +*/ +class Item_str_ascii_func :public Item_str_func +{ + String ascii_buf; +public: + Item_str_ascii_func(THD *thd): Item_str_func(thd) {} + Item_str_ascii_func(THD *thd, Item *a): Item_str_func(thd, a) {} + Item_str_ascii_func(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + Item_str_ascii_func(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c) {} + String *val_str(String *str) override + { + return val_str_from_val_str_ascii(str, &ascii_buf); + } + String *val_str_ascii(String *) override= 0; +}; + + +/** + Functions that return a checksum or a hash of the argument, + or somehow else encode or decode the argument, + returning an ASCII-repertoire string. +*/ +class Item_str_ascii_checksum_func: public Item_str_ascii_func +{ +public: + Item_str_ascii_checksum_func(THD *thd, Item *a) + :Item_str_ascii_func(thd, a) { } + Item_str_ascii_checksum_func(THD *thd, Item *a, Item *b) + :Item_str_ascii_func(thd, a, b) { } + bool eq(const Item *item, bool binary_cmp) const + { + // Always use binary argument comparison: MD5('x') != MD5('X') + return Item_func::eq(item, true); + } +}; + + +/** + Functions that return a checksum or a hash of the argument, + or somehow else encode or decode the argument, + returning a binary string. +*/ +class Item_str_binary_checksum_func: public Item_str_func +{ +public: + Item_str_binary_checksum_func(THD *thd, Item *a) + :Item_str_func(thd, a) { } + Item_str_binary_checksum_func(THD *thd, Item *a, Item *b) + :Item_str_func(thd, a, b) { } + bool eq(const Item *item, bool binary_cmp) const + { + /* + Always use binary argument comparison: + FROM_BASE64('test') != FROM_BASE64('TEST') + */ + return Item_func::eq(item, true); + } +}; + + +class Item_func_md5 :public Item_str_ascii_checksum_func +{ +public: + Item_func_md5(THD *thd, Item *a): Item_str_ascii_checksum_func(thd, a) {} + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override + { + fix_length_and_charset(32, default_charset()); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("md5") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_sha :public Item_str_ascii_checksum_func +{ +public: + Item_func_sha(THD *thd, Item *a): Item_str_ascii_checksum_func(thd, a) {} + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sha") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_sha2 :public Item_str_ascii_checksum_func +{ +public: + Item_func_sha2(THD *thd, Item *a, Item *b) + :Item_str_ascii_checksum_func(thd, a, b) {} + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sha2") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_to_base64 :public Item_str_ascii_checksum_func +{ + String tmp_value; +public: + Item_func_to_base64(THD *thd, Item *a) + :Item_str_ascii_checksum_func(thd, a) {} + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("to_base64") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_from_base64 :public Item_str_binary_checksum_func +{ + String tmp_value; +public: + Item_func_from_base64(THD *thd, Item *a) + :Item_str_binary_checksum_func(thd, a) { } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("from_base64") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#include + +class Item_aes_crypt :public Item_str_binary_checksum_func +{ + enum { AES_KEY_LENGTH = 128 }; + void create_key(String *user_key, uchar* key); + +protected: + int what; + String tmp_value; +public: + Item_aes_crypt(THD *thd, Item *a, Item *b) + :Item_str_binary_checksum_func(thd, a, b) {} + String *val_str(String *); +}; + +class Item_func_aes_encrypt :public Item_aes_crypt +{ +public: + Item_func_aes_encrypt(THD *thd, Item *a, Item *b) + :Item_aes_crypt(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("aes_encrypt") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_aes_decrypt :public Item_aes_crypt +{ +public: + Item_func_aes_decrypt(THD *thd, Item *a, Item *b): + Item_aes_crypt(thd, a, b) {} + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("aes_decrypt") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_natural_sort_key : public Item_str_func +{ +public: + Item_func_natural_sort_key(THD *thd, Item *a) + : Item_str_func(thd, a){}; + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("natural_sort_key")}; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { + return get_item_copy(thd, this); + } + + bool check_vcol_func_processor(void *arg) override; +}; + +class Item_func_concat :public Item_str_func +{ +protected: + String tmp_value; + /* + Append a non-NULL value to the result. + @param [IN] thd - The current thread. + @param [IN/OUT] res - The current val_str() return value. + @param [IN] app - The value to be appended. + @retval - false on success, true on error + */ + bool append_value(THD *thd, String *res, const String *app); + bool realloc_result(String *str, uint length) const; +public: + Item_func_concat(THD *thd, List &list): Item_str_func(thd, list) {} + Item_func_concat(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("concat") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + This class handles the || operator in sql_mode=ORACLE. + Unlike the traditional MariaDB concat(), it treats NULL arguments as ''. +*/ +class Item_func_concat_operator_oracle :public Item_func_concat +{ +public: + Item_func_concat_operator_oracle(THD *thd, List &list) + :Item_func_concat(thd, list) + { } + Item_func_concat_operator_oracle(THD *thd, Item *a, Item *b) + :Item_func_concat(thd, a, b) + { } + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("concat_operator_oracle") }; + return name; + } + Item *get_copy(THD *thd) override + { + return get_item_copy(thd, this); + } +}; + + +class Item_func_decode_histogram :public Item_str_func +{ +public: + Item_func_decode_histogram(THD *thd, Item *a, Item *b): + Item_str_func(thd, a, b) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + collation.set(system_charset_info); + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("decode_histogram") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_concat_ws :public Item_str_func +{ + String tmp_value; +public: + Item_func_concat_ws(THD *thd, List &list): Item_str_func(thd, list) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("concat_ws") }; + return name; + } + table_map not_null_tables() const override { return 0; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_random_bytes : public Item_str_func +{ +public: + Item_func_random_bytes(THD *thd, Item *arg1) : Item_str_func(thd, arg1) {} + bool fix_length_and_dec(THD *thd) override; + void update_used_tables() override; + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("random_bytes")}; + return name; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_NON_DETERMINISTIC | VCOL_NEXTVAL); + } + Item *get_copy(THD *thd) override + { + return get_item_copy(thd, this); + } + static const int MAX_RANDOM_BYTES= 1024; +}; + + +class Item_func_reverse :public Item_str_func +{ + String tmp_value; +public: + Item_func_reverse(THD *thd, Item *a): Item_str_func(thd, a) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("reverse") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_replace :public Item_str_func +{ + String tmp_value,tmp_value2; +public: + Item_func_replace(THD *thd, Item *org, Item *find, Item *replace): + Item_str_func(thd, org, find, replace) {} + String *val_str(String *to) override { return val_str_internal(to, NULL); }; + bool fix_length_and_dec(THD *thd) override; + String *val_str_internal(String *str, String *empty_string_for_null); + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("replace") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_replace_oracle :public Item_func_replace +{ + String tmp_emtpystr; +public: + Item_func_replace_oracle(THD *thd, Item *org, Item *find, Item *replace): + Item_func_replace(thd, org, find, replace) {} + String *val_str(String *to) override + { return val_str_internal(to, &tmp_emtpystr); }; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("replace_oracle") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_regexp_replace :public Item_str_func +{ + Regexp_processor_pcre re; + bool append_replacement(String *str, + const LEX_CSTRING *source, + const LEX_CSTRING *replace); +public: + Item_func_regexp_replace(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c) + {} + void cleanup() override + { + DBUG_ENTER("Item_func_regexp_replace::cleanup"); + Item_str_func::cleanup(); + re.cleanup(); + DBUG_VOID_RETURN; + } + String *val_str(String *str) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("regexp_replace") }; + return name; + } + Item *get_copy(THD *thd) override { return 0;} +}; + + +class Item_func_regexp_substr :public Item_str_func +{ + Regexp_processor_pcre re; +public: + Item_func_regexp_substr(THD *thd, Item *a, Item *b): + Item_str_func(thd, a, b) + {} + void cleanup() override + { + DBUG_ENTER("Item_func_regexp_substr::cleanup"); + Item_str_func::cleanup(); + re.cleanup(); + DBUG_VOID_RETURN; + } + String *val_str(String *str) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("regexp_substr") }; + return name; + } + Item *get_copy(THD *thd) override { return 0; } +}; + + +class Item_func_insert :public Item_str_func +{ + String tmp_value; +public: + Item_func_insert(THD *thd, Item *org, Item *start, Item *length, + Item *new_str): + Item_str_func(thd, org, start, length, new_str) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("insert") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_str_conv :public Item_str_func +{ +protected: + uint multiply; + my_charset_conv_case converter; + String tmp_value; +public: + Item_str_conv(THD *thd, Item *item): Item_str_func(thd, item) {} + String *val_str(String *) override; +}; + + +class Item_func_lcase :public Item_str_conv +{ +public: + Item_func_lcase(THD *thd, Item *item): Item_str_conv(thd, item) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("lcase") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_ucase :public Item_str_conv +{ +public: + Item_func_ucase(THD *thd, Item *item): Item_str_conv(thd, item) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ucase") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_left :public Item_str_func +{ + String tmp_value; +public: + Item_func_left(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + bool hash_not_null(Hasher *hasher) override; + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("left") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_right :public Item_str_func +{ + String tmp_value; +public: + Item_func_right(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("right") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_substr :public Item_str_func +{ + String tmp_value; +protected: + virtual longlong get_position() { return args[1]->val_int(); } +public: + Item_func_substr(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + Item_func_substr(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("substr") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_sformat :public Item_str_func +{ + String *val_arg; +public: + Item_func_sformat(THD *thd, List &list); + ~Item_func_sformat() { delete [] val_arg; } + String *val_str(String*) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sformat") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_substr_oracle :public Item_func_substr +{ +protected: + longlong get_position() override + { longlong pos= args[1]->val_int(); return pos == 0 ? 1 : pos; } + String *make_empty_result(String *str) override + { null_value= 1; return NULL; } +public: + Item_func_substr_oracle(THD *thd, Item *a, Item *b): + Item_func_substr(thd, a, b) {} + Item_func_substr_oracle(THD *thd, Item *a, Item *b, Item *c): + Item_func_substr(thd, a, b, c) {} + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_func_substr::fix_length_and_dec(thd); + set_maybe_null(); + return res; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("substr_oracle") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_substr_index :public Item_str_func +{ + String tmp_value; +public: + Item_func_substr_index(THD *thd, Item *a,Item *b,Item *c): + Item_str_func(thd, a, b, c) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("substring_index") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + +}; + + +class Item_func_trim :public Item_str_func +{ +protected: + String tmp_value; + String remove; + String *trimmed_value(String *res, uint32 offset, uint32 length) + { + if (length == 0) + return make_empty_result(&tmp_value); + + tmp_value.set(*res, offset, length); + /* + Make sure to return correct charset and collation: + TRIM(0x000000 FROM _ucs2 0x0061) + should set charset to "binary" rather than to "ucs2". + */ + tmp_value.set_charset(collation.collation); + return &tmp_value; + } + String *non_trimmed_value(String *res) + { + return trimmed_value(res, 0, res->length()); + } + virtual LEX_CSTRING func_name_ext() const + { + static LEX_CSTRING name_ext= {STRING_WITH_LEN("") }; + return name_ext; + } +public: + Item_func_trim(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + Item_func_trim(THD *thd, Item *a): Item_str_func(thd, a) {} + Sql_mode_dependency value_depends_on_sql_mode() const override; + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("trim") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + virtual LEX_CSTRING mode_name() const { return { "both", 4}; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_trim_oracle :public Item_func_trim +{ +protected: + String *make_empty_result(String *str) override + { null_value= 1; return NULL; } + LEX_CSTRING func_name_ext() const override + { + static LEX_CSTRING name_ext= {STRING_WITH_LEN("_oracle") }; + return name_ext; + } +public: + Item_func_trim_oracle(THD *thd, Item *a, Item *b): + Item_func_trim(thd, a, b) {} + Item_func_trim_oracle(THD *thd, Item *a): Item_func_trim(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("trim_oracle") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_func_trim::fix_length_and_dec(thd); + set_maybe_null(); + return res; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_ltrim :public Item_func_trim +{ +public: + Item_func_ltrim(THD *thd, Item *a, Item *b): Item_func_trim(thd, a, b) {} + Item_func_ltrim(THD *thd, Item *a): Item_func_trim(thd, a) {} + Sql_mode_dependency value_depends_on_sql_mode() const override + { + return Item_func::value_depends_on_sql_mode(); + } + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ltrim") }; + return name; + } + LEX_CSTRING mode_name() const override + { return { STRING_WITH_LEN("leading") }; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_ltrim_oracle :public Item_func_ltrim +{ +protected: + String *make_empty_result(String *str) override + { null_value= 1; return NULL; } + LEX_CSTRING func_name_ext() const override + { + static LEX_CSTRING name_ext= {STRING_WITH_LEN("_oracle") }; + return name_ext; + } +public: + Item_func_ltrim_oracle(THD *thd, Item *a, Item *b): + Item_func_ltrim(thd, a, b) {} + Item_func_ltrim_oracle(THD *thd, Item *a): Item_func_ltrim(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ltrim_oracle") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_func_ltrim::fix_length_and_dec(thd); + set_maybe_null(); + return res; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_rtrim :public Item_func_trim +{ +public: + Item_func_rtrim(THD *thd, Item *a, Item *b): Item_func_trim(thd, a, b) {} + Item_func_rtrim(THD *thd, Item *a): Item_func_trim(thd, a) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rtrim") }; + return name; + } + LEX_CSTRING mode_name() const override + { return { STRING_WITH_LEN("trailing") }; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_rtrim_oracle :public Item_func_rtrim +{ +protected: + String *make_empty_result(String *str) override + { null_value= 1; return NULL; } + LEX_CSTRING func_name_ext() const override + { + static LEX_CSTRING name_ext= {STRING_WITH_LEN("_oracle") }; + return name_ext; + } +public: + Item_func_rtrim_oracle(THD *thd, Item *a, Item *b): + Item_func_rtrim(thd, a, b) {} + Item_func_rtrim_oracle(THD *thd, Item *a): Item_func_rtrim(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rtrim_oracle") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_func_rtrim::fix_length_and_dec(thd); + set_maybe_null(); + return res; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* + Item_func_password -- new (4.1.1) PASSWORD() function implementation. + Returns strcat('*', octet2hex(sha1(sha1(password)))). '*' stands for new + password format, sha1(sha1(password) is so-called hash_stage2 value. + Length of returned string is always 41 byte. To find out how entire + authentication procedure works, see comments in password.c. +*/ + +class Item_func_password :public Item_str_ascii_checksum_func +{ +public: + enum PW_Alg {OLD, NEW}; +private: + char tmp_value[SCRAMBLED_PASSWORD_CHAR_LENGTH+1]; + enum PW_Alg alg; + bool deflt; +public: + Item_func_password(THD *thd, Item *a): + Item_str_ascii_checksum_func(thd, a), alg(NEW), deflt(1) {} + Item_func_password(THD *thd, Item *a, PW_Alg al): + Item_str_ascii_checksum_func(thd, a), alg(al), deflt(0) {} + String *val_str_ascii(String *str) override; + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override + { + fix_length_and_charset((alg == 1 ? + SCRAMBLED_PASSWORD_CHAR_LENGTH : + SCRAMBLED_PASSWORD_CHAR_LENGTH_323), + default_charset()); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING password_normal= {STRING_WITH_LEN("password") }; + static LEX_CSTRING password_old= {STRING_WITH_LEN("old_password") }; + return (deflt || alg == 1) ? password_normal : password_old; + } + static char *alloc(THD *thd, const char *password, size_t pass_len, + enum PW_Alg al); + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + + +class Item_func_des_encrypt :public Item_str_binary_checksum_func +{ + String tmp_value,tmp_arg; +public: + Item_func_des_encrypt(THD *thd, Item *a) + :Item_str_binary_checksum_func(thd, a) {} + Item_func_des_encrypt(THD *thd, Item *a, Item *b) + :Item_str_binary_checksum_func(thd, a, b) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("des_encrypt") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_des_decrypt :public Item_str_binary_checksum_func +{ + String tmp_value; +public: + Item_func_des_decrypt(THD *thd, Item *a) + :Item_str_binary_checksum_func(thd, a) {} + Item_func_des_decrypt(THD *thd, Item *a, Item *b) + :Item_str_binary_checksum_func(thd, a, b) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("des_decrypt") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + QQ: Item_func_encrypt should derive from Item_str_ascii_checksum_func. + However, it should be fixed to handle UCS2, UTF16, UTF32 properly first, + as the underlying crypt() call expects a null-terminated input string. +*/ +class Item_func_encrypt :public Item_str_binary_checksum_func +{ + String tmp_value; + + /* Encapsulate common constructor actions */ + void constructor_helper() + { + collation.set(&my_charset_bin); + } +public: + Item_func_encrypt(THD *thd, Item *a): Item_str_binary_checksum_func(thd, a) + { + constructor_helper(); + } + Item_func_encrypt(THD *thd, Item *a, Item *b) + :Item_str_binary_checksum_func(thd, a, b) + { + constructor_helper(); + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + max_length = 13; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("encrypt") }; + return name; + } + bool check_vcol_func_processor(void *arg) override + { + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#include "sql_crypt.h" + + +class Item_func_encode :public Item_str_binary_checksum_func +{ +private: + /** Whether the PRNG has already been seeded. */ + bool seeded; +protected: + SQL_CRYPT sql_crypt; +public: + Item_func_encode(THD *thd, Item *a, Item *seed_arg): + Item_str_binary_checksum_func(thd, a, seed_arg) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("encode") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +protected: + virtual void crypto_transform(String *); +private: + /** Provide a seed for the PRNG sequence. */ + bool seed(); +}; + + +class Item_func_decode :public Item_func_encode +{ +public: + Item_func_decode(THD *thd, Item *a, Item *seed_arg): Item_func_encode(thd, a, seed_arg) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("decode") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +protected: + void crypto_transform(String *) override; +}; + + +class Item_func_sysconst :public Item_str_func +{ +public: + Item_func_sysconst(THD *thd): Item_str_func(thd) + { collation.set(system_charset_info,DERIVATION_SYSCONST); } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs); + /* + Used to create correct Item name in new converted item in + safe_charset_converter, return string representation of this function + call + */ + virtual const char *fully_qualified_func_name() const = 0; + bool check_vcol_func_processor(void *arg) + { + return mark_unsupported_function(fully_qualified_func_name(), arg, + VCOL_SESSION_FUNC); + } + bool const_item() const; +}; + + +class Item_func_database :public Item_func_sysconst +{ +public: + Item_func_database(THD *thd): Item_func_sysconst(thd) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= NAME_CHAR_LEN * system_charset_info->mbmaxlen; + set_maybe_null(); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("database") }; + return name; + } + const char *fully_qualified_func_name() const override + { return "database()"; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_sqlerrm :public Item_func_sysconst +{ +public: + Item_func_sqlerrm(THD *thd): Item_func_sysconst(thd) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("SQLERRM") }; + return name; + } + const char *fully_qualified_func_name() const override + { return "SQLERRM"; } + void print(String *str, enum_query_type query_type) override + { + str->append(func_name_cstring()); + } + bool fix_length_and_dec(THD *thd) override + { + max_length= 512 * system_charset_info->mbmaxlen; + null_value= false; + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_user :public Item_func_sysconst +{ +protected: + bool init (const char *user, const char *host); + +public: + Item_func_user(THD *thd): Item_func_sysconst(thd) + { + str_value.set("", 0, system_charset_info); + } + String *val_str(String *) override + { + DBUG_ASSERT(fixed()); + return (null_value ? 0 : &str_value); + } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= (uint32) (username_char_length + + HOSTNAME_LENGTH + 1) * SYSTEM_CHARSET_MBMAXLEN; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("user") }; + return name; + } + const char *fully_qualified_func_name() const override + { return "user()"; } + int save_in_field(Field *field, bool no_conversions) override + { + return save_str_value_in_field(field, &str_value); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_current_user :public Item_func_user +{ + Name_resolution_context *context; + +public: + Item_func_current_user(THD *thd, Name_resolution_context *context_arg): + Item_func_user(thd), context(context_arg) {} + bool fix_fields(THD *thd, Item **ref) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("current_user") }; + return name; + } + const char *fully_qualified_func_name() const override + { return "current_user()"; } + bool check_vcol_func_processor(void *arg) override + { + context= 0; + return mark_unsupported_function(fully_qualified_func_name(), arg, + VCOL_SESSION_FUNC); + } +}; + + +class Item_func_current_role :public Item_func_sysconst +{ + Name_resolution_context *context; + +public: + Item_func_current_role(THD *thd, Name_resolution_context *context_arg): + Item_func_sysconst(thd), context(context_arg) {} + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= (uint32) username_char_length * SYSTEM_CHARSET_MBMAXLEN; + return FALSE; + } + int save_in_field(Field *field, bool no_conversions) override + { return save_str_value_in_field(field, &str_value); } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("current_role") }; + return name; + } + const char *fully_qualified_func_name() const override + { return "current_role()"; } + String *val_str(String *) override + { + DBUG_ASSERT(fixed()); + return null_value ? NULL : &str_value; + } + bool check_vcol_func_processor(void *arg) override + { + context= 0; + return mark_unsupported_function(fully_qualified_func_name(), arg, + VCOL_SESSION_FUNC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_soundex :public Item_str_func +{ + String tmp_value; +public: + Item_func_soundex(THD *thd, Item *a): Item_str_func(thd, a) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("soundex") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_elt :public Item_str_func +{ +public: + Item_func_elt(THD *thd, List &list): Item_str_func(thd, list) {} + double val_real() override; + longlong val_int() override; + String *val_str(String *str) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("elt") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_make_set :public Item_str_func +{ + String tmp_str; + +public: + Item_func_make_set(THD *thd, List &list): Item_str_func(thd, list) {} + String *val_str(String *str) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("make_set") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_format :public Item_str_ascii_func +{ + const MY_LOCALE *locale; +public: + Item_func_format(THD *thd, Item *org, Item *dec): + Item_str_ascii_func(thd, org, dec) {} + Item_func_format(THD *thd, Item *org, Item *dec, Item *lang): + Item_str_ascii_func(thd, org, dec, lang) {} + + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("format") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_char :public Item_str_func +{ +public: + Item_func_char(THD *thd, List &list): Item_str_func(thd, list) + { collation.set(&my_charset_bin); } + Item_func_char(THD *thd, List &list, CHARSET_INFO *cs): + Item_str_func(thd, list) + { collation.set(cs); } + Item_func_char(THD *thd, Item *arg1, CHARSET_INFO *cs): + Item_str_func(thd, arg1) + { collation.set(cs); } + String *val_str(String *) override; + void append_char(String * str, int32 num); + bool fix_length_and_dec(THD *thd) override + { + max_length= arg_count * 4; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("char") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_chr :public Item_func_char +{ +public: + Item_func_chr(THD *thd, Item *arg1, CHARSET_INFO *cs): + Item_func_char(thd, arg1, cs) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= 4; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("chr") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_repeat :public Item_str_func +{ + String tmp_value; +public: + Item_func_repeat(THD *thd, Item *arg1, Item *arg2): + Item_str_func(thd, arg1, arg2) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("repeat") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_space :public Item_str_func +{ +public: + Item_func_space(THD *thd, Item *arg1): Item_str_func(thd, arg1) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("space") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_binlog_gtid_pos :public Item_str_func +{ +public: + Item_func_binlog_gtid_pos(THD *thd, Item *arg1, Item *arg2): + Item_str_func(thd, arg1, arg2) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("binlog_gtid_pos") }; + return name; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_pad: public Item_str_func +{ +protected: + String tmp_value, pad_str; +public: + Item_func_pad(THD *thd, Item *arg1, Item *arg2, Item *arg3): + Item_str_func(thd, arg1, arg2, arg3) {} + Item_func_pad(THD *thd, Item *arg1, Item *arg2): + Item_str_func(thd, arg1, arg2) {} + bool fix_length_and_dec(THD *thd) override; +}; + + +class Item_func_rpad :public Item_func_pad +{ +public: + Item_func_rpad(THD *thd, Item *arg1, Item *arg2, Item *arg3): + Item_func_pad(thd, arg1, arg2, arg3) {} + Item_func_rpad(THD *thd, Item *arg1, Item *arg2): + Item_func_pad(thd, arg1, arg2) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rpad") }; + return name; + } + Sql_mode_dependency value_depends_on_sql_mode() const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_rpad_oracle :public Item_func_rpad +{ + String *make_empty_result(String *str) override + { null_value= 1; return NULL; } +public: + Item_func_rpad_oracle(THD *thd, Item *arg1, Item *arg2, Item *arg3): + Item_func_rpad(thd, arg1, arg2, arg3) {} + Item_func_rpad_oracle(THD *thd, Item *arg1, Item *arg2): + Item_func_rpad(thd, arg1, arg2) {} + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_func_rpad::fix_length_and_dec(thd); + set_maybe_null(); + return res; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rpad_oracle") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_lpad :public Item_func_pad +{ +public: + Item_func_lpad(THD *thd, Item *arg1, Item *arg2, Item *arg3): + Item_func_pad(thd, arg1, arg2, arg3) {} + Item_func_lpad(THD *thd, Item *arg1, Item *arg2): + Item_func_pad(thd, arg1, arg2) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("lpad") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_lpad_oracle :public Item_func_lpad +{ + String *make_empty_result(String *str) override + { null_value= 1; return NULL; } +public: + Item_func_lpad_oracle(THD *thd, Item *arg1, Item *arg2, Item *arg3): + Item_func_lpad(thd, arg1, arg2, arg3) {} + Item_func_lpad_oracle(THD *thd, Item *arg1, Item *arg2): + Item_func_lpad(thd, arg1, arg2) {} + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_func_lpad::fix_length_and_dec(thd); + set_maybe_null(); + return res; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("lpad_oracle") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_conv :public Item_str_func +{ +public: + Item_func_conv(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("conv") }; + return name; + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + collation.set(default_charset()); + fix_char_length(64); + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_hex :public Item_str_ascii_checksum_func +{ +protected: + String tmp_value; + /* + Calling arg[0]->type_handler() can be expensive on every row. + It's a virtual method, and in case if args[0] is a complex Item, + its type_handler() can call more virtual methods. + So let's cache it during fix_length_and_dec(). + */ + const Type_handler *m_arg0_type_handler; +public: + Item_func_hex(THD *thd, Item *a): + Item_str_ascii_checksum_func(thd, a), m_arg0_type_handler(NULL) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("hex") }; + return name; + } + String *val_str_ascii_from_val_int(String *str); + String *val_str_ascii_from_val_real(String *str); + String *val_str_ascii_from_val_str(String *str); + String *val_str_ascii(String *str) override + { + DBUG_ASSERT(fixed()); + return m_arg0_type_handler->Item_func_hex_val_str_ascii(this, str); + } + bool fix_length_and_dec(THD *thd) override + { + collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + decimals=0; + fix_char_length(args[0]->max_length * 2); + m_arg0_type_handler= args[0]->type_handler(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_unhex :public Item_str_func +{ + String tmp_value; +public: + Item_func_unhex(THD *thd, Item *a): Item_str_func(thd, a) + { + /* there can be bad hex strings */ + set_maybe_null(); + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("unhex") }; + return name; + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + collation.set(&my_charset_bin); + decimals=0; + max_length=(1+args[0]->max_length)/2; + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +#ifndef DBUG_OFF +class Item_func_like_range :public Item_str_func +{ +protected: + String min_str; + String max_str; + const bool is_min; +public: + Item_func_like_range(THD *thd, Item *a, Item *b, bool is_min_arg): + Item_str_func(thd, a, b), is_min(is_min_arg) + { + set_maybe_null(); + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + collation.set(args[0]->collation); + decimals=0; + max_length= MAX_BLOB_WIDTH; + return FALSE; + } +}; + + +class Item_func_like_range_min :public Item_func_like_range +{ +public: + Item_func_like_range_min(THD *thd, Item *a, Item *b): + Item_func_like_range(thd, a, b, true) { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("like_range_min") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_like_range_max :public Item_func_like_range +{ +public: + Item_func_like_range_max(THD *thd, Item *a, Item *b): + Item_func_like_range(thd, a, b, false) { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("like_range_max") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; +#endif + + +class Item_func_binary :public Item_str_func +{ +public: + Item_func_binary(THD *thd, Item *a): Item_str_func(thd, a) {} + String *val_str(String *a) override + { + DBUG_ASSERT(fixed()); + String *tmp=args[0]->val_str(a); + null_value=args[0]->null_value; + if (tmp) + tmp->set_charset(&my_charset_bin); + return tmp; + } + bool fix_length_and_dec(THD *thd) override + { + collation.set(&my_charset_bin); + max_length=args[0]->max_length; + return FALSE; + } + void print(String *str, enum_query_type query_type) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_binary") }; + return name; + } + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_load_file :public Item_str_func +{ + String tmp_value; +public: + Item_load_file(THD *thd, Item *a): Item_str_func(thd, a) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("load_file") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + collation.set(&my_charset_bin, DERIVATION_COERCIBLE); + set_maybe_null(); + max_length=MAX_BLOB_WIDTH; + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_IMPOSSIBLE); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_export_set: public Item_str_func +{ + public: + Item_func_export_set(THD *thd, Item *a, Item *b, Item* c): + Item_str_func(thd, a, b, c) {} + Item_func_export_set(THD *thd, Item *a, Item *b, Item* c, Item* d): + Item_str_func(thd, a, b, c, d) {} + Item_func_export_set(THD *thd, Item *a, Item *b, Item* c, Item* d, Item* e): + Item_str_func(thd, a, b, c, d, e) {} + String *val_str(String *str) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("export_set") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_quote :public Item_str_func +{ + String tmp_value; +public: + Item_func_quote(THD *thd, Item *a): Item_str_func(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("quote") }; + return name; + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + collation.set(args[0]->collation); + ulonglong max_result_length= (ulonglong) args[0]->max_length * 2 + + 2 * collation.collation->mbmaxlen; + max_length= (uint32) MY_MIN(max_result_length, MAX_BLOB_WIDTH); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_conv_charset :public Item_str_func +{ + bool use_cached_value; + String tmp_value; +public: + bool safe; + Item_func_conv_charset(THD *thd, Item *a, CHARSET_INFO *cs): + Item_str_func(thd, a) + { + collation.set(cs, DERIVATION_IMPLICIT); + use_cached_value= 0; safe= 0; + } + Item_func_conv_charset(THD *thd, Item *a, CHARSET_INFO *cs, bool cache_if_const): + Item_str_func(thd, a) + { + collation.set(cs, DERIVATION_IMPLICIT); + if (cache_if_const && args[0]->can_eval_in_optimize()) + { + uint errors= 0; + String tmp, *str= args[0]->val_str(&tmp); + if (!str || str_value.copy(str->ptr(), str->length(), + str->charset(), cs, &errors)) + null_value= 1; + use_cached_value= 1; + str_value.mark_as_const(); + safe= (errors == 0); + } + else + { + use_cached_value= 0; + /* + Conversion from and to "binary" is safe. + Conversion to Unicode is safe. + Conversion from an expression with the ASCII repertoire + to any character set that can store characters U+0000..U+007F + is safe: + - All supported multibyte character sets can store U+0000..U+007F + - All supported 7bit character sets can store U+0000..U+007F + except those marked with MY_CS_NONASCII (e.g. swe7). + Other kind of conversions are potentially lossy. + */ + safe= (args[0]->collation.collation == &my_charset_bin || + cs == &my_charset_bin || + (cs->state & MY_CS_UNICODE) || + (args[0]->collation.repertoire == MY_REPERTOIRE_ASCII && + (cs->mbmaxlen > 1 || !(cs->state & MY_CS_NONASCII)))); + } + } + String *val_str(String *) override; + longlong val_int() override + { + if (args[0]->result_type() == STRING_RESULT) + return Item_str_func::val_int(); + longlong res= args[0]->val_int(); + if ((null_value= args[0]->null_value)) + return 0; + return res; + } + double val_real() override + { + if (args[0]->result_type() == STRING_RESULT) + return Item_str_func::val_real(); + double res= args[0]->val_real(); + if ((null_value= args[0]->null_value)) + return 0; + return res; + } + my_decimal *val_decimal(my_decimal *d) override + { + if (args[0]->result_type() == STRING_RESULT) + return Item_str_func::val_decimal(d); + my_decimal *res= args[0]->val_decimal(d); + if ((null_value= args[0]->null_value)) + return NULL; + return res; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + if (args[0]->result_type() == STRING_RESULT) + return Item_str_func::get_date(thd, ltime, fuzzydate); + bool res= args[0]->get_date(thd, ltime, fuzzydate); + if ((null_value= args[0]->null_value)) + return 1; + return res; + } + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("convert") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + int save_in_field(Field*, bool) override; +}; + +class Item_func_set_collation :public Item_str_func +{ + Lex_extended_collation_st m_set_collation; +public: + Item_func_set_collation(THD *thd, Item *a, + const Lex_extended_collation_st &set_collation): + Item_str_func(thd, a), m_set_collation(set_collation) {} + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + bool eq(const Item *item, bool binary_cmp) const override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("collate") }; + return name; + } + enum precedence precedence() const override { return COLLATE_PRECEDENCE; } + enum Functype functype() const override { return COLLATE_FUNC; } + void print(String *str, enum_query_type query_type) override; + Item_field *field_for_view_update() override + { + /* this function is transparent for view updating */ + return args[0]->field_for_view_update(); + } + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_expr_str_metadata :public Item_str_func +{ +public: + Item_func_expr_str_metadata(THD *thd, Item *a): Item_str_func(thd, a) { } + bool fix_length_and_dec(THD *thd) override + { + collation.set(system_charset_info); + max_length= 64 * collation.collation->mbmaxlen; // should be enough + base_flags&= ~item_base_t::MAYBE_NULL; + return FALSE; + }; + table_map not_null_tables() const override { return 0; } + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { return this; } + bool const_item() const override { return true; } +}; + + +class Item_func_charset :public Item_func_expr_str_metadata +{ +public: + Item_func_charset(THD *thd, Item *a) + :Item_func_expr_str_metadata(thd, a) { } + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("charset") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_collation :public Item_func_expr_str_metadata +{ +public: + Item_func_collation(THD *thd, Item *a) + :Item_func_expr_str_metadata(thd, a) {} + String *val_str(String *) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("collation") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_weight_string :public Item_str_func +{ + String tmp_value; + uint weigth_flags; + uint nweights; + uint result_length; +public: + Item_func_weight_string(THD *thd, Item *a, uint result_length_arg, + uint nweights_arg, uint flags_arg): + Item_str_func(thd, a) + { + nweights= nweights_arg; + weigth_flags= flags_arg; + result_length= result_length_arg; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("weight_string") }; + return name; + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override; + bool eq(const Item *item, bool binary_cmp) const override + { + if (!Item_str_func::eq(item, binary_cmp)) + return false; + Item_func_weight_string *that= (Item_func_weight_string *)item; + return this->weigth_flags == that->weigth_flags && + this->nweights == that->nweights && + this->result_length == that->result_length; + } + Item* propagate_equal_fields(THD *thd, const Context &ctx, COND_EQUAL *cond) + override + { return this; } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_crc32 :public Item_long_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_str(func_name_cstring()) && + (arg_count == 1 || + args[1]->check_type_can_return_int(func_name_cstring())); + } + String value; + uint32 (*const crc_func)(uint32, const void*, size_t); +public: + Item_func_crc32(THD *thd, bool Castagnoli, Item *a) : + Item_long_func(thd, a), + crc_func(Castagnoli ? my_crc32c : my_checksum) + { unsigned_flag= 1; } + Item_func_crc32(THD *thd, bool Castagnoli, Item *a, Item *b) : + Item_long_func(thd, a, b), + crc_func(Castagnoli ? my_crc32c : my_checksum) + { unsigned_flag= 1; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING crc32_name= {STRING_WITH_LEN("crc32") }; + static LEX_CSTRING crc32c_name= {STRING_WITH_LEN("crc32c") }; + return crc_func == my_crc32c ? crc32c_name : crc32_name; + } + bool fix_length_and_dec(THD *thd) override { max_length=10; return FALSE; } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_uncompressed_length : public Item_long_func_length +{ + String value; +public: + Item_func_uncompressed_length(THD *thd, Item *a) + :Item_long_func_length(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("uncompressed_length") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length=10; + set_maybe_null(); + return FALSE; } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#ifdef HAVE_COMPRESS +#define ZLIB_DEPENDED_FUNCTION ; +#else +#define ZLIB_DEPENDED_FUNCTION { null_value=1; return 0; } +#endif + +class Item_func_compress: public Item_str_binary_checksum_func +{ + String tmp_value; +public: + Item_func_compress(THD *thd, Item *a) + :Item_str_binary_checksum_func(thd, a) {} + bool fix_length_and_dec(THD *thd) override + { + max_length= (args[0]->max_length * 120) / 100 + 12; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("compress") }; + return name; + } + String *val_str(String *) override ZLIB_DEPENDED_FUNCTION + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_uncompress: public Item_str_binary_checksum_func +{ + String tmp_value; +public: + Item_func_uncompress(THD *thd, Item *a) + :Item_str_binary_checksum_func(thd, a) {} + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + max_length= MAX_BLOB_WIDTH; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("uncompress") }; + return name; + } + String *val_str(String *) override ZLIB_DEPENDED_FUNCTION + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_dyncol_create: public Item_str_func +{ +protected: + DYNCALL_CREATE_DEF *defs; + DYNAMIC_COLUMN_VALUE *vals; + uint *keys_num; + LEX_STRING *keys_str; + bool names, force_names; + bool prepare_arguments(THD *thd, bool force_names); + void print_arguments(String *str, enum_query_type query_type); +public: + Item_func_dyncol_create(THD *thd, List &args, DYNCALL_CREATE_DEF *dfs); + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_create") }; + return name; + } + String *val_str(String *) override; + void print(String *str, enum_query_type query_type) override; + enum Functype functype() const override { return DYNCOL_FUNC; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_dyncol_add: public Item_func_dyncol_create +{ +public: + Item_func_dyncol_add(THD *thd, List &args_arg, DYNCALL_CREATE_DEF *dfs): + Item_func_dyncol_create(thd, args_arg, dfs) + {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_add") }; + return name; + } + String *val_str(String *) override; + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_dyncol_json: public Item_str_func +{ +public: + Item_func_dyncol_json(THD *thd, Item *str): Item_str_func(thd, str) + {collation.set(DYNCOL_UTF);} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_json") }; + return name; + } + String *val_str(String *) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= MAX_BLOB_WIDTH; + set_maybe_null(); + decimals= 0; + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* + The following functions is always called from an Item_cast function +*/ + +class Item_dyncol_get: public Item_str_func +{ +public: + Item_dyncol_get(THD *thd, Item *str, Item *num): Item_str_func(thd, str, num) + {} + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + max_length= MAX_BLOB_WIDTH; + return FALSE; + } + /* Mark that collation can change between calls */ + bool dynamic_result() override { return 1; } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_get") }; + return name; + } + String *val_str(String *) override; + longlong val_int() override; + longlong val_int_signed_typecast() override + { + unsigned_flag= false; // Mark that we want to have a signed value + longlong value= val_int(); // val_int() can change unsigned_flag + if (!null_value && unsigned_flag && value < 0) + push_note_converted_to_negative_complement(current_thd); + return value; + } + longlong val_int_unsigned_typecast() override + { + unsigned_flag= true; // Mark that we want to have an unsigned value + longlong value= val_int(); // val_int() can change unsigned_flag + if (!null_value && unsigned_flag == 0 && value < 0) + push_note_converted_to_positive_complement(current_thd); + return value; + } + double val_real() override; + my_decimal *val_decimal(my_decimal *) override; + bool get_dyn_value(THD *thd, DYNAMIC_COLUMN_VALUE *val, String *tmp); + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_dyncol_list: public Item_str_func +{ +public: + Item_func_dyncol_list(THD *thd, Item *str): Item_str_func(thd, str) + {collation.set(DYNCOL_UTF);} + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + max_length= MAX_BLOB_WIDTH; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("column_list") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* + this is used by JOIN_TAB::keep_current_rowid + and stores handler::position(). + It has nothing to do with _rowid pseudo-column, that the parser supports. +*/ +class Item_temptable_rowid :public Item_str_func +{ +public: + TABLE *table; + Item_temptable_rowid(TABLE *table_arg); + const Type_handler *type_handler() const override + { return &type_handler_string; } + Field *create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) + { return create_table_field_from_handler(root, table); } + String *val_str(String *str) override; + enum Functype functype() const override { return TEMPTABLE_ROWID; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; +#ifdef WITH_WSREP + +#include "wsrep_api.h" + +class Item_func_wsrep_last_written_gtid: public Item_str_ascii_func +{ + String gtid_str; +public: + Item_func_wsrep_last_written_gtid(THD *thd): Item_str_ascii_func(thd) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("wsrep_last_written_gtid") }; + return name; + } + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= WSREP_GTID_STR_LEN; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_wsrep_last_seen_gtid: public Item_str_ascii_func +{ + String gtid_str; +public: + Item_func_wsrep_last_seen_gtid(THD *thd): Item_str_ascii_func(thd) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("wsrep_last_seen_gtid") }; + return name; + } + String *val_str_ascii(String *) override; + bool fix_length_and_dec(THD *thd) override + { + max_length= WSREP_GTID_STR_LEN; + set_maybe_null(); + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_wsrep_sync_wait_upto: public Item_int_func +{ + String value; +public: + Item_func_wsrep_sync_wait_upto(THD *thd, Item *a): Item_int_func(thd, a) {} + Item_func_wsrep_sync_wait_upto(THD *thd, Item *a, Item* b): Item_int_func(thd, a, b) {} + const Type_handler *type_handler() const override + { return &type_handler_string; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("wsrep_sync_wait_upto_gtid") }; + return name; + } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; +#endif /* WITH_WSREP */ + +#endif /* ITEM_STRFUNC_INCLUDED */ diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc new file mode 100644 index 00000000..8b473278 --- /dev/null +++ b/sql/item_subselect.cc @@ -0,0 +1,7028 @@ +/* Copyright (c) 2002, 2016, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + subselect Item + + @todo + - add function from mysql_select that use JOIN* as parameter to JOIN + methods (sql_select.h/sql_select.cc) +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" +#include "sql_select.h" +#include "sql_parse.h" // check_stack_overrun +#include "sql_cte.h" +#include "sql_test.h" + +double get_post_group_estimate(JOIN* join, double join_op_rows); + +LEX_CSTRING exists_outer_expr_name= { STRING_WITH_LEN("") }; + +LEX_CSTRING no_matter_name= {STRING_WITH_LEN("") }; + +int check_and_do_in_subquery_rewrites(JOIN *join); + +Item_subselect::Item_subselect(THD *thd_arg): + Item_result_field(thd_arg), Used_tables_and_const_cache(), + value_assigned(0), own_engine(0), thd(0), old_engine(0), + have_to_be_excluded(0), + inside_first_fix_fields(0), done_first_fix_fields(FALSE), + expr_cache(0), forced_const(FALSE), expensive_fl(FALSE), + substitution(0), engine(0), eliminated(FALSE), + changed(0), is_correlated(FALSE), with_recursive_reference(0) +{ + DBUG_ENTER("Item_subselect::Item_subselect"); + DBUG_PRINT("enter", ("this: %p", this)); + sortbuffer.str= 0; + +#ifndef DBUG_OFF + exec_counter= 0; +#endif + with_flags|= item_with_t::SUBQUERY; + reset(); + /* + Item value is NULL if select_result_interceptor didn't change this value + (i.e. some rows will be found returned) + */ + null_value= TRUE; + DBUG_VOID_RETURN; +} + + +void Item_subselect::init(st_select_lex *select_lex, + select_result_interceptor *result) +{ + /* + Please see Item_singlerow_subselect::invalidate_and_restore_select_lex(), + which depends on alterations to the parse tree implemented here. + */ + + DBUG_ENTER("Item_subselect::init"); + DBUG_PRINT("enter", ("select_lex: %p this: %p", + select_lex, this)); + + select_lex->parent_lex->relink_hack(select_lex); + + unit= select_lex->master_unit(); + + if (unit->item) + { + engine= unit->item->engine; + parsing_place= unit->item->parsing_place; + if (unit->item->substype() == EXISTS_SUBS && + ((Item_exists_subselect *)unit->item)->exists_transformed) + { + /* it is permanent transformation of EXISTS to IN */ + unit->item= this; + engine->change_result(this, result, FALSE); + } + else + { + /* + Item can be changed in JOIN::prepare while engine in JOIN::optimize + => we do not copy old_engine here + */ + unit->thd->change_item_tree((Item**)&unit->item, this); + engine->change_result(this, result, TRUE); + } + } + else + { + SELECT_LEX *outer_select= unit->outer_select(); + THD *thd= unit->thd; + /* + do not take into account expression inside aggregate functions because + they can access original table fields + */ + parsing_place= (outer_select->in_sum_expr ? + NO_MATTER : + outer_select->parsing_place); + if (unit->is_unit_op() && + (unit->first_select()->next_select() || unit->fake_select_lex)) + engine= new (thd->mem_root) + subselect_union_engine(unit, result, this); + else + engine= new (thd->mem_root) + subselect_single_select_engine(select_lex, result, this); + } + DBUG_PRINT("info", ("engine: %p", engine)); + DBUG_VOID_RETURN; +} + +st_select_lex * +Item_subselect::get_select_lex() +{ + return unit->first_select(); +} + +void Item_subselect::cleanup() +{ + DBUG_ENTER("Item_subselect::cleanup"); + Item_result_field::cleanup(); + if (old_engine) + { + if (engine) + engine->cleanup(); + engine= old_engine; + old_engine= 0; + } + if (engine) + engine->cleanup(); + reset(); + filesort_buffer.free_sort_buffer(); + my_free(sortbuffer.str); + sortbuffer.str= 0; + + value_assigned= 0; + expr_cache= 0; + forced_const= FALSE; + DBUG_PRINT("info", ("exec_counter: %d", exec_counter)); +#ifndef DBUG_OFF + exec_counter= 0; +#endif + DBUG_VOID_RETURN; +} + + +void Item_singlerow_subselect::cleanup() +{ + DBUG_ENTER("Item_singlerow_subselect::cleanup"); + value= 0; row= 0; + Item_subselect::cleanup(); + DBUG_VOID_RETURN; +} + + +void Item_in_subselect::cleanup() +{ + DBUG_ENTER("Item_in_subselect::cleanup"); + if (left_expr_cache) + { + left_expr_cache->delete_elements(); + delete left_expr_cache; + left_expr_cache= NULL; + } + /* + TODO: This breaks the commented assert in add_strategy(). + in_strategy&= ~SUBS_STRATEGY_CHOSEN; + */ + first_execution= TRUE; + pushed_cond_guards= NULL; + Item_subselect::cleanup(); + DBUG_VOID_RETURN; +} + + +void Item_allany_subselect::cleanup() +{ + /* + The MAX/MIN transformation through injection is reverted through the + change_item_tree() mechanism. Revert the select_lex object of the + query to its initial state. + */ + for (SELECT_LEX *sl= unit->first_select(); + sl; sl= sl->next_select()) + if (test_set_strategy(SUBS_MAXMIN_INJECTED)) + sl->with_sum_func= false; + Item_in_subselect::cleanup(); +} + + +Item_subselect::~Item_subselect() +{ + DBUG_ENTER("Item_subselect::~Item_subselect"); + DBUG_PRINT("enter", ("this: %p", this)); + if (own_engine) + delete engine; + else + if (engine) // can be empty in case of EOM + engine->cleanup(); + engine= NULL; + DBUG_VOID_RETURN; +} + +bool +Item_subselect::select_transformer(JOIN *join) +{ + DBUG_ENTER("Item_subselect::select_transformer"); + DBUG_ASSERT(thd == join->thd); + DBUG_RETURN(false); +} + + +bool Item_subselect::fix_fields(THD *thd_param, Item **ref) +{ + char const *save_where= thd_param->where; + uint8 uncacheable; + bool res; + + thd= thd_param; + + DBUG_ASSERT(unit->thd == thd); + + { + SELECT_LEX *upper= unit->outer_select(); + if (upper->parsing_place == IN_HAVING) + upper->subquery_in_having= 1; + /* The subquery is an expression cache candidate */ + upper->expr_cache_may_be_used[upper->parsing_place]= TRUE; + } + + status_var_increment(thd_param->status_var.feature_subquery); + + DBUG_ASSERT(fixed() == 0); + engine->set_thd((thd= thd_param)); + if (!done_first_fix_fields) + { + done_first_fix_fields= TRUE; + inside_first_fix_fields= TRUE; + upper_refs.empty(); + /* + psergey-todo: remove _first_fix_fields calls, we need changes on every + execution + */ + } + + eliminated= FALSE; + parent_select= thd_param->lex->current_select; + + if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res)) + return TRUE; + + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + { + if (sl->tvc) + { + if (!(sl= wrap_tvc_into_select(thd, sl))) + { + res= TRUE; + goto end; + } + if (sl == unit->first_select() && !sl->next_select()) + unit->fake_select_lex= 0; + } + } + + if (!(res= engine->prepare(thd))) + { + // all transformation is done (used by prepared statements) + changed= 1; + inside_first_fix_fields= FALSE; + + /* + Substitute the current item with an Item_in_optimizer that was + created by Item_in_subselect::select_in_like_transformer and + call fix_fields for the substituted item which in turn calls + engine->prepare for the subquery predicate. + */ + if (substitution) + { + /* + If the top item of the WHERE/HAVING condition changed, + set correct WHERE/HAVING for PS. + */ + if (unit->outer_select()->where == (*ref)) + unit->outer_select()->where= substitution; + else if (unit->outer_select()->having == (*ref)) + unit->outer_select()->having= substitution; + + (*ref)= substitution; + substitution->name= name; + if (have_to_be_excluded) + engine->exclude(); + substitution= 0; + thd->where= "checking transformed subquery"; + res= (*ref)->fix_fields_if_needed(thd, ref); + goto end; + + } + // Is it one field subselect? + if (engine->cols() > max_columns) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + res= TRUE; + goto end; + } + if (fix_length_and_dec()) + { + res= TRUE; + goto end; + } + } + else + goto end; + + if ((uncacheable= engine->uncacheable() & ~UNCACHEABLE_EXPLAIN) || + with_recursive_reference) + { + const_item_cache= 0; + if (uncacheable & UNCACHEABLE_RAND) + used_tables_cache|= RAND_TABLE_BIT; + } + base_flags|= item_base_t::FIXED; + +end: + done_first_fix_fields= FALSE; + inside_first_fix_fields= FALSE; + thd->where= save_where; + return res; +} + + +bool Item_subselect::enumerate_field_refs_processor(void *arg) +{ + List_iterator it(upper_refs); + Ref_to_outside *upper; + + while ((upper= it++)) + { + if (upper->item && + upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg)) + return TRUE; + } + return FALSE; +} + +bool Item_subselect::mark_as_eliminated_processor(void *arg) +{ + eliminated= TRUE; + return FALSE; +} + + +/** + Remove a subselect item from its unit so that the unit no longer + represents a subquery. + + @param arg unused parameter + + @return + FALSE to force the evaluation of the processor for the subsequent items. +*/ + +bool Item_subselect::eliminate_subselect_processor(void *arg) +{ + unit->item= NULL; + if (!unit->is_excluded()) + unit->exclude(); + eliminated= TRUE; + return FALSE; +} + + +bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, + Item *item) +{ + if (inside_first_fix_fields) + { + is_correlated= TRUE; + Ref_to_outside *upper; + if (!(upper= new (thd->mem_root) Ref_to_outside())) + return TRUE; + upper->select= select; + upper->item= item; + if (upper_refs.push_back(upper, thd->mem_root)) + return TRUE; + } + return FALSE; +} + + +/* + @brief + Update the table bitmaps for the outer references used within a subquery +*/ + +bool Item_subselect::update_table_bitmaps_processor(void *arg) +{ + List_iterator it(upper_refs); + Ref_to_outside *upper; + + while ((upper= it++)) + { + if (upper->item && + upper->item->walk(&Item::update_table_bitmaps_processor, FALSE, arg)) + return TRUE; + } + return FALSE; +} + + +/* + Adjust attributes after our parent select has been merged into grandparent + + DESCRIPTION + Subquery is a composite object which may be correlated, that is, it may + have + 1. references to tables of the parent select (i.e. one that has the clause + with the subquery predicate) + 2. references to tables of the grandparent select + 3. references to tables of further ancestors. + + Before the pullout, this item indicates: + - #1 with table bits in used_tables() + - #2 and #3 with OUTER_REF_TABLE_BIT. + + After parent has been merged with grandparent: + - references to parent and grandparent tables should be indicated with + table bits. + - references to greatgrandparent and further ancestors - with + OUTER_REF_TABLE_BIT. +*/ + +void Item_subselect::fix_after_pullout(st_select_lex *new_parent, + Item **ref, bool merge) +{ + recalc_used_tables(new_parent, TRUE); + parent_select= new_parent; +} + + +class Field_fixer: public Field_enumerator +{ +public: + table_map used_tables; /* Collect used_tables here */ + st_select_lex *new_parent; /* Select we're in */ + virtual void visit_field(Item_field *item) + { + //for (TABLE_LIST *tbl= new_parent->leaf_tables; tbl; tbl= tbl->next_local) + //{ + // if (tbl->table == field->table) + // { + used_tables|= item->field->table->map; + // return; + // } + //} + //used_tables |= OUTER_REF_TABLE_BIT; + } +}; + + +/* + Recalculate used_tables_cache +*/ + +void Item_subselect::recalc_used_tables(st_select_lex *new_parent, + bool after_pullout) +{ + List_iterator_fast it(upper_refs); + Ref_to_outside *upper; + DBUG_ENTER("recalc_used_tables"); + + used_tables_cache= 0; + while ((upper= it++)) + { + bool found= FALSE; + /* + Check if + 1. the upper reference refers to the new immediate parent select, or + 2. one of the further ancestors. + + We rely on the fact that the tree of selects is modified by some kind of + 'flattening', i.e. a process where child selects are merged into their + parents. + The merged selects are removed from the select tree but keep pointers to + their parents. + */ + for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select()) + { + /* + If we've reached the new parent select by walking upwards from + reference's original select, this means that the reference is now + referring to the direct parent: + */ + if (sel == new_parent) + { + found= TRUE; + /* + upper->item may be NULL when we've referred to a grouping function, + in which case we don't care about what it's table_map really is, + because item->with_sum_func==1 will ensure correct placement of the + item. + */ + if (upper->item) + { + // Now, iterate over fields and collect used_tables() attribute: + Field_fixer fixer; + fixer.used_tables= 0; + fixer.new_parent= new_parent; + upper->item->walk(&Item::enumerate_field_refs_processor, 0, &fixer); + used_tables_cache |= fixer.used_tables; + upper->item->walk(&Item::update_table_bitmaps_processor, FALSE, NULL); +/* + if (after_pullout) + upper->item->fix_after_pullout(new_parent, &(upper->item)); + upper->item->update_used_tables(); +*/ + } + } + } + if (!found) + used_tables_cache|= OUTER_REF_TABLE_BIT; + } + /* + Don't update const_tables_cache yet as we don't yet know which of the + parent's tables are constant. Parent will call update_used_tables() after + he has done const table detection, and that will be our chance to update + const_tables_cache. + */ + DBUG_PRINT("exit", ("used_tables_cache: %llx", used_tables_cache)); + DBUG_VOID_RETURN; +} + + +/** + Determine if a subquery is expensive to execute during query optimization. + + @details The cost of execution of a subquery is estimated based on an + estimate of the number of rows the subquery will access during execution. + This measure is used instead of JOIN::read_time, because it is considered + to be much more reliable than the cost estimate. + + @return true if the subquery is expensive + @return false otherwise +*/ +bool Item_subselect::is_expensive() +{ + double examined_rows= 0; + bool all_are_simple= true; + + if (!expensive_fl && is_evaluated()) + return false; + + /* check extremely simple select */ + if (!unit->first_select()->next_select()) // no union + { + /* + such single selects works even without optimization because + can not makes loops + */ + SELECT_LEX *sl= unit->first_select(); + JOIN *join = sl->join; + if (join && !join->tables_list && !sl->first_inner_unit()) + return (expensive_fl= false); + } + + + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + { + JOIN *cur_join= sl->join; + + /* not optimized subquery */ + if (!cur_join) + return (expensive_fl= true); + + /* + If the subquery is not optimised or in the process of optimization + it supposed to be expensive + */ + if (cur_join->optimization_state != JOIN::OPTIMIZATION_DONE) + return (expensive_fl= true); + + if (!cur_join->tables_list && !sl->first_inner_unit()) + continue; + + /* + Subqueries whose result is known after optimization are not expensive. + Such subqueries have all tables optimized away, thus have no join plan. + */ + if ((cur_join->zero_result_cause || !cur_join->tables_list)) + continue; + + /* + This is not simple SELECT in union so we can not go by simple condition + */ + all_are_simple= false; + + /* + If a subquery is not optimized we cannot estimate its cost. A subquery is + considered optimized if it has a join plan. + */ + if (!cur_join->join_tab) + return (expensive_fl= true); + + if (sl->first_inner_unit()) + { + /* + Subqueries that contain subqueries are considered expensive. + @todo: accumulate the cost of subqueries. + */ + return (expensive_fl= true); + } + + examined_rows+= cur_join->get_examined_rows(); + } + + // here we are sure that subquery is optimized so thd is set + return (expensive_fl= !all_are_simple && + (examined_rows > thd->variables.expensive_subquery_limit)); +} + + +/* + @brief + Apply item processor for all scalar (i.e. Item*) expressions that + occur in the nested join. +*/ + +static +int walk_items_for_table_list(Item_processor processor, + bool walk_subquery, void *argument, + List& join_list) +{ + List_iterator li(join_list); + int res; + while (TABLE_LIST *table= li++) + { + if (table->on_expr) + { + if ((res= table->on_expr->walk(processor, walk_subquery, argument))) + return res; + } + if (Table_function_json_table *tf= table->table_function) + { + if ((res= tf->walk_items(processor, walk_subquery, argument))) + { + return res; + } + } + + if (table->nested_join) + { + if ((res= walk_items_for_table_list(processor, walk_subquery, argument, + table->nested_join->join_list))) + return res; + } + } + return 0; +} + + +bool Item_subselect::unknown_splocal_processor(void *argument) +{ + SELECT_LEX *sl= unit->first_select(); + if (sl->top_join_list.elements) + return 0; + if (sl->tvc && sl->tvc->walk_values(&Item::unknown_splocal_processor, + false, argument)) + return true; + for (SELECT_LEX *lex= unit->first_select(); lex; lex= lex->next_select()) + { + /* + TODO: walk through GROUP BY and ORDER yet eventually. + This will require checking aliases in SELECT list: + SELECT 1 AS a GROUP BY a; + SELECT 1 AS a ORDER BY a; + */ + List_iterator li(lex->item_list); + Item *item; + if (lex->where && (lex->where)->walk(&Item::unknown_splocal_processor, + false, argument)) + return true; + if (lex->having && (lex->having)->walk(&Item::unknown_splocal_processor, + false, argument)) + return true; + while ((item=li++)) + { + if (item->walk(&Item::unknown_splocal_processor, false, argument)) + return true; + } + } + return false; +} + + +bool Item_subselect::walk(Item_processor processor, bool walk_subquery, + void *argument) +{ + if (!(unit->uncacheable & ~UNCACHEABLE_DEPENDENT) && engine->is_executed() && + !unit->describe) + { + /* + The subquery has already been executed (for real, it wasn't EXPLAIN's + fake execution) so it should not matter what it has inside. + + The actual reason for not walking inside is that parts of the subquery + (e.g. JTBM join nests and their IN-equality conditions may have been + invalidated by irreversible cleanups (those happen after an uncorrelated + subquery has been executed). + */ + return (this->*processor)(argument); + } + + if (walk_subquery) + { + for (SELECT_LEX *lex= unit->first_select(); lex; lex= lex->next_select()) + { + List_iterator li(lex->item_list); + ORDER *order; + + if (lex->where && (lex->where)->walk(processor, walk_subquery, argument)) + return 1; + if (lex->having && (lex->having)->walk(processor, walk_subquery, + argument)) + return 1; + + if (walk_items_for_table_list(processor, walk_subquery, argument, + *lex->join_list)) + return 1; + + while (Item *item= li++) + { + if (item->walk(processor, walk_subquery, argument)) + return 1; + } + for (order= lex->order_list.first ; order; order= order->next) + { + if ((*order->item)->walk(processor, walk_subquery, argument)) + return 1; + } + for (order= lex->group_list.first ; order; order= order->next) + { + if ((*order->item)->walk(processor, walk_subquery, argument)) + return 1; + } + } + } + return (this->*processor)(argument); +} + + +bool Item_subselect::exec() +{ + subselect_engine *org_engine= engine; + DBUG_ENTER("Item_subselect::exec"); + DBUG_ASSERT(fixed()); + DBUG_ASSERT(thd); + DBUG_ASSERT(!eliminated); + + DBUG_EXECUTE_IF("Item_subselect", + Item::Print print(this, + enum_query_type(QT_TO_SYSTEM_CHARSET | + QT_WITHOUT_INTRODUCERS)); + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: Item_subselect::exec %.*b", + print.length(),print.ptr()); + ); + /* + Do not execute subselect in case of a fatal error + or if the query has been killed. + */ + if (unlikely(thd->is_error() || thd->killed)) + DBUG_RETURN(true); + + DBUG_ASSERT(!thd->lex->context_analysis_only); + /* + Simulate a failure in sub-query execution. Used to test e.g. + out of memory or query being killed conditions. + */ + DBUG_EXECUTE_IF("subselect_exec_fail", DBUG_RETURN(true);); + + bool res= engine->exec(); + +#ifndef DBUG_OFF + ++exec_counter; +#endif + if (engine != org_engine) + { + /* + If the subquery engine changed during execution due to lazy subquery + optimization, or because the original engine found a more efficient other + engine, re-execute the subquery with the new engine. + */ + DBUG_RETURN(exec()); + } + DBUG_RETURN(res); +} + + +void Item_subselect::get_cache_parameters(List ¶meters) +{ + Collect_deps_prm prm= {¶meters, // parameters + unit->first_select()->nest_level_base, // nest_level_base + 0, // count + unit->first_select()->nest_level, // nest_level + TRUE // collect + }; + walk(&Item::collect_outer_ref_processor, TRUE, &prm); +} + +int Item_in_subselect::optimize(double *out_rows, double *cost) +{ + int res; + DBUG_ENTER("Item_in_subselect::optimize"); + DBUG_ASSERT(fixed()); + SELECT_LEX *save_select= thd->lex->current_select; + JOIN *join= unit->first_select()->join; + + thd->lex->current_select= join->select_lex; + if ((res= join->optimize())) + DBUG_RETURN(res); + + /* Calculate #rows and cost of join execution */ + join->get_partial_cost_and_fanout(join->table_count - join->const_tables, + table_map(-1), + cost, out_rows); + + /* + Adjust join output cardinality. There can be these cases: + - Have no GROUP BY and no aggregate funcs: we won't get into this + function because such join will be processed as a merged semi-join + (TODO: does it really mean we don't need to handle such cases here at + all? put ASSERT) + - Have no GROUP BY but have aggregate funcs: output is 1 record. + - Have GROUP BY and have (or not) aggregate funcs: need to adjust output + cardinality. + */ + thd->lex->current_select= save_select; + if (!join->group_list && !join->group_optimized_away && + join->tmp_table_param.sum_func_count) + { + DBUG_PRINT("info",("Materialized join will have only 1 row (it has " + "aggregates but no GROUP BY")); + *out_rows= 1; + } + + /* Now with grouping */ + if (join->group_list_for_estimates) + { + DBUG_PRINT("info",("Materialized join has grouping, trying to estimate it")); + double output_rows= get_post_group_estimate(join, *out_rows); + DBUG_PRINT("info",("Got value of %g", output_rows)); + *out_rows= output_rows; + } + + DBUG_RETURN(res); + +} + + +/** + Check if an expression cache is needed for this subquery + + @param thd Thread handle + + @details + The function checks whether a cache is needed for a subquery and whether + the result of the subquery can be put in cache. + + @retval TRUE cache is needed + @retval FALSE otherwise +*/ + +bool Item_subselect::expr_cache_is_needed(THD *thd) +{ + return ((engine->uncacheable() & UNCACHEABLE_DEPENDENT) && + engine->cols() == 1 && + optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) && + !(engine->uncacheable() & (UNCACHEABLE_RAND | + UNCACHEABLE_SIDEEFFECT)) && + !with_recursive_reference); +} + + +/** + Check if the left IN argument contains NULL values. + + @retval TRUE there are NULLs + @retval FALSE otherwise +*/ + +inline bool Item_in_subselect::left_expr_has_null() +{ + return (*(optimizer->get_cache()))->null_value_inside; +} + + +/** + Check if an expression cache is needed for this subquery + + @param thd Thread handle + + @details + The function checks whether a cache is needed for a subquery and whether + the result of the subquery can be put in cache. + + @note + This method allows many columns in the subquery because it is supported by + Item_in_optimizer and result of the IN subquery will be scalar in this + case. + + @retval TRUE cache is needed + @retval FALSE otherwise +*/ + +bool Item_in_subselect::expr_cache_is_needed(THD *thd) +{ + return (optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) && + !(engine->uncacheable() & (UNCACHEABLE_RAND | + UNCACHEABLE_SIDEEFFECT)) && + !with_recursive_reference); +} + + +/* + Compute the IN predicate if the left operand's cache changed. +*/ + +bool Item_in_subselect::exec() +{ + DBUG_ENTER("Item_in_subselect::exec"); + DBUG_ASSERT(fixed()); + DBUG_ASSERT(thd); + + /* + Initialize the cache of the left predicate operand. This has to be done as + late as now, because Cached_item directly contains a resolved field (not + an item, and in some cases (when temp tables are created), these fields + end up pointing to the wrong field. One solution is to change Cached_item + to not resolve its field upon creation, but to resolve it dynamically + from a given Item_ref object. + TODO: the cache should be applied conditionally based on: + - rules - e.g. only if the left operand is known to be ordered, and/or + - on a cost-based basis, that takes into account the cost of a cache + lookup, the cache hit rate, and the savings per cache hit. + */ + if (!left_expr_cache && (test_strategy(SUBS_MATERIALIZATION))) + init_left_expr_cache(); + + /* + If the new left operand is already in the cache, reuse the old result. + Use the cached result only if this is not the first execution of IN + because the cache is not valid for the first execution. + */ + if (!first_execution && left_expr_cache && + test_if_item_cache_changed(*left_expr_cache) < 0) + DBUG_RETURN(FALSE); + + /* + The exec() method below updates item::value, and item::null_value, thus if + we don't call it, the next call to item::val_int() will return whatever + result was computed by its previous call. + */ + DBUG_RETURN(Item_subselect::exec()); +} + + +Item::Type Item_subselect::type() const +{ + return SUBSELECT_ITEM; +} + + +bool Item_subselect::fix_length_and_dec() +{ + if (engine->fix_length_and_dec(0)) + return TRUE; + return FALSE; +} + + +table_map Item_subselect::used_tables() const +{ + return (table_map) ((engine->uncacheable() & ~UNCACHEABLE_EXPLAIN)? + used_tables_cache : 0L); +} + + +bool Item_subselect::const_item() const +{ + DBUG_ASSERT(thd); + return (thd->lex->context_analysis_only || with_recursive_reference ? + FALSE : + forced_const || const_item_cache); +} + +Item *Item_subselect::get_tmp_table_item(THD *thd_arg) +{ + if (!with_sum_func() && !const_item()) + { + auto item_field= + new (thd->mem_root) Item_field(thd_arg, result_field); + if (item_field) + item_field->set_refers_to_temp_table(); + return item_field; + } + return copy_or_same(thd_arg); +} + +void Item_subselect::update_used_tables() +{ + if (!forced_const) + { + recalc_used_tables(parent_select, FALSE); + if (!(engine->uncacheable() & ~UNCACHEABLE_EXPLAIN)) + { + // did all used tables become static? + if (!(used_tables_cache & ~engine->upper_select_const_tables()) && + ! with_recursive_reference) + const_item_cache= 1; + } + } +} + + +void Item_subselect::print(String *str, enum_query_type query_type) +{ + if (query_type & QT_ITEM_SUBSELECT_ID_ONLY) + { + str->append(STRING_WITH_LEN("(subquery#")); + if (unit && unit->first_select()) + { + char buf[64]; + size_t length= (size_t) + (longlong10_to_str(unit->first_select()->select_number, buf, 10) - + buf); + str->append(buf, length); + } + else + { + // TODO: Explain what exactly does this mean? + str->append(NULL_clex_str); + } + + str->append(')'); + return; + } + if (engine) + { + str->append('('); + engine->print(str, query_type); + str->append(')'); + } + else + str->append(STRING_WITH_LEN("(...)")); +} + + +Item_singlerow_subselect::Item_singlerow_subselect(THD *thd, st_select_lex *select_lex): + Item_subselect(thd), value(0) +{ + DBUG_ENTER("Item_singlerow_subselect::Item_singlerow_subselect"); + init(select_lex, new (thd->mem_root) select_singlerow_subselect(thd, this)); + set_maybe_null(); + max_columns= UINT_MAX; + DBUG_VOID_RETURN; +} + +st_select_lex * +Item_singlerow_subselect::invalidate_and_restore_select_lex() +{ + DBUG_ENTER("Item_singlerow_subselect::invalidate_and_restore_select_lex"); + st_select_lex *result= get_select_lex(); + + DBUG_ASSERT(result); + + /* + This code restore the parse tree in it's state before the execution of + Item_singlerow_subselect::Item_singlerow_subselect(), + and in particular decouples this object from the SELECT_LEX, + so that the SELECT_LEX can be used with a different flavor + or Item_subselect instead, as part of query rewriting. + */ + unit->item= NULL; + + DBUG_RETURN(result); +} + +Item_maxmin_subselect::Item_maxmin_subselect(THD *thd, + Item_subselect *parent, + st_select_lex *select_lex, + bool max_arg): + Item_singlerow_subselect(thd), was_values(TRUE) +{ + DBUG_ENTER("Item_maxmin_subselect::Item_maxmin_subselect"); + max= max_arg; + init(select_lex, + new (thd->mem_root) select_max_min_finder_subselect(thd, + this, max_arg, parent->substype() == Item_subselect::ALL_SUBS)); + max_columns= 1; + set_maybe_null(); + max_columns= 1; + + /* + Following information was collected during performing fix_fields() + of Items belonged to subquery, which will be not repeated + */ + used_tables_cache= parent->get_used_tables_cache(); + const_item_cache= parent->const_item(); + + DBUG_VOID_RETURN; +} + +void Item_maxmin_subselect::cleanup() +{ + DBUG_ENTER("Item_maxmin_subselect::cleanup"); + Item_singlerow_subselect::cleanup(); + + /* + By default it is TRUE to avoid TRUE reporting by + Item_func_not_all/Item_func_nop_all if this item was never called. + + Engine exec() set it to FALSE by reset_value_registration() call. + select_max_min_finder_subselect::send_data() set it back to TRUE if some + value will be found. + */ + was_values= TRUE; + DBUG_VOID_RETURN; +} + + +void Item_maxmin_subselect::print(String *str, enum_query_type query_type) +{ + str->append(max?"":"", 5); + Item_singlerow_subselect::print(str, query_type); +} + + +void Item_maxmin_subselect::no_rows_in_result() +{ + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) + return; + value= get_cache(thd); + null_value= 0; + was_values= 0; + make_const(); +} + + +void Item_singlerow_subselect::no_rows_in_result() +{ + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) + return; + value= get_cache(thd); + reset(); + make_const(); +} + + +void Item_singlerow_subselect::reset() +{ + Item_subselect::reset(); + if (value) + { + for(uint i= 0; i < engine->cols(); i++) + row[i]->set_null(); + } +} + + +/** + @todo + - We can't change name of Item_field or Item_ref, because it will + prevent its correct resolving, but we should save name of + removed item => we do not make optimization if top item of + list is field or reference. + - switch off this optimization for prepare statement, + because we do not rollback these changes. + Make rollback for it, or special name resolving mode in 5.0. + + @param join Join object of the subquery (i.e. 'child' join). + + @retval false The subquery was transformed +*/ +bool +Item_singlerow_subselect::select_transformer(JOIN *join) +{ + DBUG_ENTER("Item_singlerow_subselect::select_transformer"); + if (changed) + DBUG_RETURN(false); + DBUG_ASSERT(join->thd == thd); + + SELECT_LEX *select_lex= join->select_lex; + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + auto need_to_pull_out_item = [](enum_parsing_place context_analysis_place, + Item *item) { + return + !item->with_sum_func() && + /* + We can't change name of Item_field or Item_ref, because it will + prevent its correct resolving, but we should save name of + removed item => we do not make optimization if top item of + list is field or reference. + TODO: solve above problem + */ + item->type() != FIELD_ITEM && item->type() != REF_ITEM && + /* + The item can be pulled out to upper level in case it doesn't represent + the constant in the clause 'ORDER/GROUP BY (constant)'. + */ + !((item->is_order_clause_position() || + item->is_stored_routine_parameter()) && + (context_analysis_place == IN_ORDER_BY || + context_analysis_place == IN_GROUP_BY) + ); + }; + + if (!select_lex->master_unit()->is_unit_op() && + !select_lex->table_list.elements && + select_lex->item_list.elements == 1 && + !join->conds && !join->having && + need_to_pull_out_item( + join->select_lex->outer_select()->context_analysis_place, + select_lex->item_list.head()) && + thd->stmt_arena->state != Query_arena::STMT_INITIALIZED_FOR_SP) + { + have_to_be_excluded= 1; + if (thd->lex->describe) + { + char warn_buff[MYSQL_ERRMSG_SIZE]; + sprintf(warn_buff, ER_THD(thd, ER_SELECT_REDUCED), + select_lex->select_number); + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SELECT_REDUCED, warn_buff); + } + substitution= select_lex->item_list.head(); + /* + as far as we moved content to upper level we have to fix dependences & Co + */ + substitution->fix_after_pullout(select_lex->outer_select(), + &substitution, TRUE); + } + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(false); +} + + +void Item_singlerow_subselect::store(uint i, Item *item) +{ + row[i]->store(item); + row[i]->cache_value(); +} + +const Type_handler *Item_singlerow_subselect::type_handler() const +{ + return engine->type_handler(); +} + +bool Item_singlerow_subselect::fix_length_and_dec() +{ + if ((max_columns= engine->cols()) == 1) + { + if (engine->fix_length_and_dec(row= &value)) + return TRUE; + } + else + { + if (!(row= (Item_cache**) current_thd->alloc(sizeof(Item_cache*) * + max_columns)) || + engine->fix_length_and_dec(row)) + return TRUE; + value= *row; + } + unsigned_flag= value->unsigned_flag; + /* + If the subquery always returns a row, like "(SELECT subq_value)" + then its NULLability is the same as subq_value's NULLability. + */ + if (engine->always_returns_one_row()) + set_maybe_null(engine->may_be_null()); + else + { + for (uint i= 0; i < max_columns; i++) + row[i]->set_maybe_null(); + } + return FALSE; +} + + + +/* + @brief + Check if we can guarantee that this engine will always produce exactly one + row. + + @detail + Check if the subquery is just + + (SELECT value) + + Then we can guarantee we always return one row. + Selecting from tables may produce more than one row. + HAVING, WHERE or ORDER BY/LIMIT clauses may cause no rows to be produced. +*/ + +bool subselect_single_select_engine::always_returns_one_row() const +{ + st_select_lex *params= select_lex->master_unit()->global_parameters(); + return no_tables() && + !params->limit_params.select_limit && + !params->limit_params.offset_limit && + !select_lex->where && + !select_lex->having; +} + +/** + Add an expression cache for this subquery if it is needed + + @param thd_arg Thread handle + + @details + The function checks whether an expression cache is needed for this item + and if if so wraps the item into an item of the class + Item_cache_wrapper with an appropriate expression cache set up there. + + @note + used from Item::transform() + + @return + new wrapper item if an expression cache is needed, + this item - otherwise +*/ + +Item* Item_singlerow_subselect::expr_cache_insert_transformer(THD *tmp_thd, + uchar *unused) +{ + DBUG_ENTER("Item_singlerow_subselect::expr_cache_insert_transformer"); + + DBUG_ASSERT(thd == tmp_thd); + + /* + Do not create subquery cache if the subquery was eliminated. + The optimizer may eliminate subquery items (see + eliminate_subselect_processor). However it does not update + all query's data structures, so the eliminated item may be + still reachable. + */ + if (eliminated) + DBUG_RETURN(this); + + if (expr_cache) + DBUG_RETURN(expr_cache); + + if (expr_cache_is_needed(tmp_thd) && + (expr_cache= set_expr_cache(tmp_thd))) + { + init_expr_cache_tracker(tmp_thd); + DBUG_RETURN(expr_cache); + } + DBUG_RETURN(this); +} + + +uint Item_singlerow_subselect::cols() const +{ + return engine->cols(); +} + +bool Item_singlerow_subselect::check_cols(uint c) +{ + if (c != engine->cols()) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), c); + return 1; + } + return 0; +} + +bool Item_singlerow_subselect::null_inside() +{ + for (uint i= 0; i < max_columns ; i++) + { + if (row[i]->null_value) + return 1; + } + return 0; +} + +void Item_singlerow_subselect::bring_value() +{ + if (!exec() && assigned()) + { + null_value= true; + for (uint i= 0; i < max_columns ; i++) + { + if (!row[i]->null_value) + { + null_value= false; + return; + } + } + } + else + reset(); +} + +double Item_singlerow_subselect::val_real() +{ + DBUG_ASSERT(fixed()); + if (forced_const) + return value->val_real(); + if (!exec() && !value->null_value) + { + null_value= FALSE; + return value->val_real(); + } + else + { + reset(); + return 0; + } +} + +longlong Item_singlerow_subselect::val_int() +{ + DBUG_ASSERT(fixed()); + if (forced_const) + { + longlong val= value->val_int(); + null_value= value->null_value; + return val; + } + if (!exec() && !value->null_value) + { + null_value= FALSE; + return value->val_int(); + } + else + { + reset(); + DBUG_ASSERT(null_value); + return 0; + } +} + +String *Item_singlerow_subselect::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + if (forced_const) + { + String *res= value->val_str(str); + null_value= value->null_value; + return res; + } + if (!exec() && !value->null_value) + { + null_value= FALSE; + return value->val_str(str); + } + else + { + reset(); + DBUG_ASSERT(null_value); + return 0; + } +} + + +bool Item_singlerow_subselect::val_native(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + if (forced_const) + return value->val_native(thd, to); + if (!exec() && !value->null_value) + { + null_value= false; + return value->val_native(thd, to); + } + else + { + reset(); + return true; + } +} + + +my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + if (forced_const) + { + my_decimal *val= value->val_decimal(decimal_value); + null_value= value->null_value; + return val; + } + if (!exec() && !value->null_value) + { + null_value= FALSE; + return value->val_decimal(decimal_value); + } + else + { + reset(); + DBUG_ASSERT(null_value); + return 0; + } +} + + +bool Item_singlerow_subselect::val_bool() +{ + DBUG_ASSERT(fixed()); + if (forced_const) + { + bool val= value->val_bool(); + null_value= value->null_value; + return val; + } + if (!exec() && !value->null_value) + { + null_value= FALSE; + return value->val_bool(); + } + else + { + reset(); + DBUG_ASSERT(null_value); + return 0; + } +} + + +bool Item_singlerow_subselect::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + if (forced_const) + { + bool val= value->get_date(thd, ltime, fuzzydate); + null_value= value->null_value; + return val; + } + if (!exec() && !value->null_value) + { + null_value= FALSE; + return value->get_date(thd, ltime, fuzzydate); + } + else + { + reset(); + DBUG_ASSERT(null_value); + return 1; + } +} + + +Item_exists_subselect::Item_exists_subselect(THD *thd, + st_select_lex *select_lex): + Item_subselect(thd), upper_not(NULL), + emb_on_expr_nest(NULL), optimizer(0), exists_transformed(0) +{ + DBUG_ENTER("Item_exists_subselect::Item_exists_subselect"); + + + init(select_lex, new (thd->mem_root) select_exists_subselect(thd, this)); + max_columns= UINT_MAX; + null_value= FALSE; //can't be NULL + base_flags&= ~item_base_t::MAYBE_NULL; //can't be NULL + value= 0; + DBUG_VOID_RETURN; +} + + +void Item_exists_subselect::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("exists")); + Item_subselect::print(str, query_type); +} + + +bool Item_in_subselect::test_limit(st_select_lex_unit *unit_arg) +{ + if (unlikely(unit_arg->fake_select_lex && + unit_arg->fake_select_lex->test_limit())) + return(1); + + SELECT_LEX *sl= unit_arg->first_select(); + for (; sl; sl= sl->next_select()) + { + if (unlikely(sl->test_limit())) + return(1); + } + return(0); +} + +Item_in_subselect::Item_in_subselect(THD *thd, Item * left_exp, + st_select_lex *select_lex): + Item_exists_subselect(thd), left_expr_cache(0), first_execution(TRUE), + in_strategy(SUBS_NOT_TRANSFORMED), + pushed_cond_guards(NULL), do_not_convert_to_sj(FALSE), is_jtbm_merged(FALSE), + is_jtbm_const_tab(FALSE), is_flattenable_semijoin(FALSE), + is_registered_semijoin(FALSE), + upper_item(0), + converted_from_in_predicate(FALSE) +{ + DBUG_ENTER("Item_in_subselect::Item_in_subselect"); + DBUG_PRINT("info", ("in_strategy: %u", (uint)in_strategy)); + + left_expr_orig= left_expr= left_exp; + /* prepare to possible disassembling the item in convert_subq_to_sj() */ + if (left_exp->type() == Item::ROW_ITEM) + left_expr_orig= new (thd->mem_root) + Item_row(thd, static_cast(left_exp)); + func= &eq_creator; + init(select_lex, new (thd->mem_root) select_exists_subselect(thd, this)); + max_columns= UINT_MAX; + set_maybe_null(); + reset(); + //if test_limit will fail then error will be reported to client + test_limit(select_lex->master_unit()); + DBUG_VOID_RETURN; +} + +int Item_in_subselect::get_identifier() +{ + return engine->get_identifier(); +} + +Item_allany_subselect::Item_allany_subselect(THD *thd, Item * left_exp, + chooser_compare_func_creator fc, + st_select_lex *select_lex, + bool all_arg): + Item_in_subselect(thd), func_creator(fc), all(all_arg) +{ + DBUG_ENTER("Item_allany_subselect::Item_allany_subselect"); + left_expr_orig= left_expr= left_exp; + /* prepare to possible disassembling the item in convert_subq_to_sj() */ + if (left_exp->type() == Item::ROW_ITEM) + left_expr_orig= new (thd->mem_root) + Item_row(thd, static_cast(left_exp)); + func= func_creator(all_arg); + init(select_lex, new (thd->mem_root) select_exists_subselect(thd, this)); + max_columns= 1; + reset(); + //if test_limit will fail then error will be reported to client + test_limit(select_lex->master_unit()); + DBUG_VOID_RETURN; +} + + +/** + Initialize length and decimals for EXISTS and inherited (IN/ALL/ANY) + subqueries +*/ + +void Item_exists_subselect::init_length_and_dec() +{ + decimals= 0; + max_length= 1; + max_columns= engine->cols(); +} + + +bool Item_exists_subselect::fix_length_and_dec() +{ + DBUG_ENTER("Item_exists_subselect::fix_length_and_dec"); + init_length_and_dec(); + // If limit is not set or it is constant more than 1 + if (!unit->global_parameters()->limit_params.select_limit || + (unit->global_parameters()->limit_params.select_limit->basic_const_item() && + unit->global_parameters()->limit_params.select_limit->val_int() > 1)) + { + /* + We need only 1 row to determine existence (i.e. any EXISTS that is not + an IN always requires LIMIT 1) + */ + Item *item= new (thd->mem_root) Item_int(thd, (int32) 1); + if (!item) + DBUG_RETURN(TRUE); + thd->change_item_tree(&unit->global_parameters()->limit_params.select_limit, + item); + unit->global_parameters()->limit_params.explicit_limit= 1; // we set the limit + DBUG_PRINT("info", ("Set limit to 1")); + } + DBUG_RETURN(FALSE); +} + + +bool Item_in_subselect::fix_length_and_dec() +{ + DBUG_ENTER("Item_in_subselect::fix_length_and_dec"); + init_length_and_dec(); + /* + Unlike Item_exists_subselect, LIMIT 1 is set later for + Item_in_subselect, depending on the chosen strategy. + */ + DBUG_RETURN(FALSE); +} + + +/** + Add an expression cache for this subquery if it is needed + + @param thd_arg Thread handle + + @details + The function checks whether an expression cache is needed for this item + and if if so wraps the item into an item of the class + Item_cache_wrapper with an appropriate expression cache set up there. + + @note + used from Item::transform() + + @return + new wrapper item if an expression cache is needed, + this item - otherwise +*/ + +Item* Item_exists_subselect::expr_cache_insert_transformer(THD *tmp_thd, + uchar *unused) +{ + DBUG_ENTER("Item_exists_subselect::expr_cache_insert_transformer"); + DBUG_ASSERT(thd == tmp_thd); + + if (expr_cache) + DBUG_RETURN(expr_cache); + + if (substype() == EXISTS_SUBS && expr_cache_is_needed(tmp_thd) && + (expr_cache= set_expr_cache(tmp_thd))) + { + init_expr_cache_tracker(tmp_thd); + DBUG_RETURN(expr_cache); + } + DBUG_RETURN(this); +} + + +void Item_exists_subselect::no_rows_in_result() +{ + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) + return; + value= 0; + null_value= 0; + make_const(); +} + +double Item_exists_subselect::val_real() +{ + DBUG_ASSERT(fixed()); + if (!forced_const && exec()) + { + reset(); + return 0; + } + return (double) value; +} + +longlong Item_exists_subselect::val_int() +{ + DBUG_ASSERT(fixed()); + if (!forced_const && exec()) + { + reset(); + return 0; + } + return value; +} + + +/** + Return the result of EXISTS as a string value + + Converts the true/false result into a string value. + Note that currently this cannot be NULL, so if the query execution fails + it will return 0. + + @param decimal_value[out] buffer to hold the resulting string value + @retval Pointer to the converted string. + Can't be a NULL pointer, as currently + EXISTS cannot return NULL. +*/ + +String *Item_exists_subselect::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + if (!forced_const && exec()) + reset(); + str->set((ulonglong)value,&my_charset_bin); + return str; +} + + +/** + Return the result of EXISTS as a decimal value + + Converts the true/false result into a decimal value. + Note that currently this cannot be NULL, so if the query execution fails + it will return 0. + + @param decimal_value[out] Buffer to hold the resulting decimal value + @retval Pointer to the converted decimal. + Can't be a NULL pointer, as currently + EXISTS cannot return NULL. +*/ + +my_decimal *Item_exists_subselect::val_decimal(my_decimal *decimal_value) +{ + DBUG_ASSERT(fixed()); + if (!forced_const && exec()) + reset(); + int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value); + return decimal_value; +} + + +bool Item_exists_subselect::val_bool() +{ + DBUG_ASSERT(fixed()); + if (!forced_const && exec()) + { + reset(); + return 0; + } + return value != 0; +} + + +double Item_in_subselect::val_real() +{ + /* + As far as Item_in_subselect called only from Item_in_optimizer this + method should not be used + */ + DBUG_ASSERT(fixed()); + if (forced_const) + return value; + DBUG_ASSERT((engine->uncacheable() & ~UNCACHEABLE_EXPLAIN) || + ! engine->is_executed()); + null_value= was_null= FALSE; + if (exec()) + { + reset(); + return 0; + } + if (was_null && !value) + null_value= TRUE; + return (double) value; +} + + +longlong Item_in_subselect::val_int() +{ + /* + As far as Item_in_subselect called only from Item_in_optimizer this + method should not be used + */ + DBUG_ASSERT(0); + DBUG_ASSERT(fixed()); + if (forced_const) + return value; + DBUG_ASSERT((engine->uncacheable() & ~UNCACHEABLE_EXPLAIN) || + ! engine->is_executed()); + null_value= was_null= FALSE; + if (exec()) + { + reset(); + return 0; + } + if (was_null && !value) + null_value= TRUE; + return value; +} + + +String *Item_in_subselect::val_str(String *str) +{ + /* + As far as Item_in_subselect called only from Item_in_optimizer this + method should not be used + */ + DBUG_ASSERT(0); + DBUG_ASSERT(fixed()); + if (forced_const) + goto value_is_ready; + DBUG_ASSERT((engine->uncacheable() & ~UNCACHEABLE_EXPLAIN) || + ! engine->is_executed()); + null_value= was_null= FALSE; + if (exec()) + { + reset(); + return 0; + } + if (was_null && !value) + { + null_value= TRUE; + return 0; + } +value_is_ready: + str->set((ulonglong)value, &my_charset_bin); + return str; +} + + +bool Item_in_subselect::val_bool() +{ + DBUG_ASSERT(fixed()); + if (forced_const) + return value; + DBUG_ASSERT((engine->uncacheable() & ~UNCACHEABLE_EXPLAIN) || + ! engine->is_executed() || with_recursive_reference); + null_value= was_null= FALSE; + if (exec()) + { + reset(); + return 0; + } + if (was_null && !value) + null_value= TRUE; + return value; +} + +my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value) +{ + /* + As far as Item_in_subselect called only from Item_in_optimizer this + method should not be used + */ + DBUG_ASSERT(0); + if (forced_const) + goto value_is_ready; + DBUG_ASSERT((engine->uncacheable() & ~UNCACHEABLE_EXPLAIN) || + ! engine->is_executed()); + null_value= was_null= FALSE; + DBUG_ASSERT(fixed()); + if (exec()) + { + reset(); + return 0; + } + if (was_null && !value) + null_value= TRUE; +value_is_ready: + int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value); + return decimal_value; +} + + +/** + Prepare a single-column IN/ALL/ANY subselect for rewriting. + + @param join Join object of the subquery (i.e. 'child' join). + + @details + + Prepare a single-column subquery to be rewritten. Given the subquery. + + If the subquery has no tables it will be turned to an expression between + left part and SELECT list. + + In other cases the subquery will be wrapped with Item_in_optimizer which + allow later to turn it to EXISTS or MAX/MIN. + + @retval false The subquery was transformed + @retval true Error +*/ + +bool +Item_in_subselect::single_value_transformer(JOIN *join) +{ + SELECT_LEX *select_lex= join->select_lex; + DBUG_ENTER("Item_in_subselect::single_value_transformer"); + DBUG_ASSERT(thd == join->thd); + + /* + Check that the right part of the subselect contains no more than one + column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...) + */ + // psergey: duplicated_subselect_card_check + if (select_lex->item_list.elements > 1) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + DBUG_RETURN(true); + } + + Item* join_having= join->having ? join->having : join->tmp_having; + if (!(join_having || select_lex->with_sum_func || + select_lex->group_list.elements) && + select_lex->table_list.elements == 0 && !join->conds && + !select_lex->master_unit()->is_unit_op()) + { + Item *where_item= (Item*) select_lex->item_list.head(); + /* + it is single select without tables => possible optimization + remove the dependence mark since the item is moved to upper + select and is not outer anymore. + */ + where_item->walk(&Item::remove_dependence_processor, 0, + select_lex->outer_select()); + /* + fix_field of substitution item will be done in time of + substituting. + Note that real_item() should be used instead of + original left expression because left_expr can be + runtime created Ref item which is deleted at the end + of the statement. Thus one of 'substitution' arguments + can be broken in case of PS. + */ + substitution= func->create(thd, left_expr, where_item); + have_to_be_excluded= 1; + if (thd->lex->describe) + { + char warn_buff[MYSQL_ERRMSG_SIZE]; + sprintf(warn_buff, ER_THD(thd, ER_SELECT_REDUCED), + select_lex->select_number); + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SELECT_REDUCED, warn_buff); + } + DBUG_RETURN(false); + } + + /* + Wrap the current IN predicate in an Item_in_optimizer. The actual + substitution in the Item tree takes place in Item_subselect::fix_fields. + */ + if (!substitution) + { + /* We're invoked for the 1st (or the only) SELECT in the subquery UNION */ + substitution= optimizer; + + SELECT_LEX *current= thd->lex->current_select; + + thd->lex->current_select= current->return_after_parsing(); + if (!optimizer || optimizer->fix_left(thd)) + { + thd->lex->current_select= current; + DBUG_RETURN(true); + } + thd->lex->current_select= current; + + /* We will refer to upper level cache array => we have to save it for SP */ + DBUG_ASSERT(optimizer->get_cache()[0]->is_array_kept()); + + /* + As far as Item_in_optimizer does not substitute itself on fix_fields + we can use same item for all selects. + */ + expr= new (thd->mem_root) Item_direct_ref(thd, &select_lex->context, + (Item**)optimizer->get_cache(), + no_matter_name, + in_left_expr_name); + } + + DBUG_RETURN(false); +} + + +/** + Apply transformation max/min transwormation to ALL/ANY subquery if it is + possible. + + @param join Join object of the subquery (i.e. 'child' join). + + @details + + If this is an ALL/ANY single-value subselect, try to rewrite it with + a MIN/MAX subselect. We can do that if a possible NULL result of the + subselect can be ignored. + E.g. SELECT * FROM t1 WHERE b > ANY (SELECT a FROM t2) can be rewritten + with SELECT * FROM t1 WHERE b > (SELECT MAX(a) FROM t2). + We can't check that this optimization is safe if it's not a top-level + item of the WHERE clause (e.g. because the WHERE clause can contain IS + NULL/IS NOT NULL functions). If so, we rewrite ALL/ANY with NOT EXISTS + later in this method. + + @retval false The subquery was transformed + @retval true Error +*/ + +bool Item_allany_subselect::transform_into_max_min(JOIN *join) +{ + DBUG_ENTER("Item_allany_subselect::transform_into_max_min"); + if (!test_strategy(SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE)) + DBUG_RETURN(false); + Item **place= optimizer->arguments() + 1; + SELECT_LEX *select_lex= join->select_lex; + Item *subs; + DBUG_ASSERT(thd == join->thd); + + /* + */ + DBUG_ASSERT(!substitution); + + /* + Check if optimization with aggregate min/max possible + 1 There is no aggregate in the subquery + 2 It is not UNION + 3 There is tables + 4 It is not ALL subquery with possible NULLs in the SELECT list + */ + if (!select_lex->group_list.elements && /*1*/ + !select_lex->having && /*1*/ + !select_lex->with_sum_func && /*1*/ + !(select_lex->next_select()) && /*2*/ + select_lex->table_list.elements && /*3*/ + (!select_lex->ref_pointer_array[0]->maybe_null() || /*4*/ + substype() != Item_subselect::ALL_SUBS)) /*4*/ + { + Item_sum_min_max *item; + nesting_map save_allow_sum_func; + if (func->l_op()) + { + /* + (ALL && (> || =>)) || (ANY && (< || =<)) + for ALL condition is inverted + */ + item= new (thd->mem_root) Item_sum_max(thd, + select_lex->ref_pointer_array[0]); + } + else + { + /* + (ALL && (< || =<)) || (ANY && (> || =>)) + for ALL condition is inverted + */ + item= new (thd->mem_root) Item_sum_min(thd, + select_lex->ref_pointer_array[0]); + } + if (upper_item) + upper_item->set_sum_test(item); + thd->change_item_tree(&select_lex->ref_pointer_array[0], item); + { + List_iterator it(select_lex->item_list); + it++; + thd->change_item_tree(it.ref(), item); + } + + DBUG_EXECUTE("where", + print_where(item, "rewrite with MIN/MAX", QT_ORDINARY);); + + save_allow_sum_func= thd->lex->allow_sum_func; + thd->lex->allow_sum_func.set_bit(thd->lex->current_select->nest_level); + /* + Item_sum_(max|min) can't substitute other item => we can use 0 as + reference, also Item_sum_(max|min) can't be fixed after creation, so + we do not check item->fixed + */ + if (item->fix_fields(thd, 0)) + DBUG_RETURN(true); + thd->lex->allow_sum_func= save_allow_sum_func; + /* we added aggregate function => we have to change statistic */ + count_field_types(select_lex, &join->tmp_table_param, join->all_fields, + 0); + if (join->prepare_stage2()) + DBUG_RETURN(true); + subs= new (thd->mem_root) Item_singlerow_subselect(thd, select_lex); + + /* + Remove other strategies if any (we already changed the query and + can't apply other strategy). + */ + set_strategy(SUBS_MAXMIN_INJECTED); + } + else + { + Item_maxmin_subselect *item; + subs= item= new (thd->mem_root) Item_maxmin_subselect(thd, this, select_lex, func->l_op()); + if (upper_item) + upper_item->set_sub_test(item); + /* + Remove other strategies if any (we already changed the query and + can't apply other strategy). + */ + set_strategy(SUBS_MAXMIN_ENGINE); + } + /* + The swap is needed for expressions of type 'f1 < ALL ( SELECT ....)' + where we want to evaluate the sub query even if f1 would be null. + */ + subs= func->create_swap(thd, expr, subs); + thd->change_item_tree(place, subs); + if (subs->fix_fields(thd, &subs)) + DBUG_RETURN(true); + DBUG_ASSERT(subs == (*place)); // There was no substitutions + + select_lex->master_unit()->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + + DBUG_RETURN(false); +} + + +bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex) +{ + bool fix_res= 0; + DBUG_ASSERT(thd); + if (!having->fixed()) + { + select_lex->having_fix_field= 1; + fix_res= having->fix_fields(thd, 0); + select_lex->having_fix_field= 0; + } + return fix_res; +} + +bool Item_allany_subselect::is_maxmin_applicable(JOIN *join) +{ + /* + Check if max/min optimization applicable: It is top item of + WHERE condition. + */ + return ((is_top_level_item() || + (upper_item && upper_item->is_top_level_item())) && + !(join->select_lex->master_unit()->uncacheable & + ~UNCACHEABLE_EXPLAIN) && + !func->eqne_op()); +} + + +/** + Create the predicates needed to transform a single-column IN/ALL/ANY + subselect into a correlated EXISTS via predicate injection. + + @param join[in] Join object of the subquery (i.e. 'child' join). + @param where_item[out] the in-to-exists addition to the where clause + @param having_item[out] the in-to-exists addition to the having clause + + @details + The correlated predicates are created as follows: + + - If the subquery has aggregates, GROUP BY, or HAVING, convert to + + SELECT ie FROM ... HAVING subq_having AND + trigcond(oe $cmp$ ref_or_null_helper) + + the addition is wrapped into trigger only when we want to distinguish + between NULL and FALSE results. + + - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the + following: + + = If we don't need to distinguish between NULL and FALSE subquery: + + SELECT ie FROM ... WHERE subq_where AND (oe $cmp$ ie) + + = If we need to distinguish between those: + + SELECT ie FROM ... + WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL)) + HAVING trigcond((ie)) + + @retval false If the new conditions were created successfully + @retval true Error +*/ + +bool +Item_in_subselect::create_single_in_to_exists_cond(JOIN *join, + Item **where_item, + Item **having_item) +{ + SELECT_LEX *select_lex= join->select_lex; + DBUG_ASSERT(thd == join->thd); + /* + The non-transformed HAVING clause of 'join' may be stored in two ways + during JOIN::optimize: this->tmp_having= this->having; this->having= 0; + */ + Item* join_having= join->having ? join->having : join->tmp_having; + DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond"); + + *where_item= NULL; + *having_item= NULL; + + if (join_having || select_lex->with_sum_func || + select_lex->group_list.elements) + { + LEX_CSTRING field_name= this->full_name_cstring(); + Item *item= func->create(thd, expr, + new (thd->mem_root) Item_ref_null_helper( + thd, + &select_lex->context, + this, + &select_lex-> + ref_pointer_array[0], + {STRING_WITH_LEN("")}, + field_name)); + if (!is_top_level_item() && left_expr->maybe_null()) + { + /* + We can encounter "NULL IN (SELECT ...)". Wrap the added condition + within a trig_cond. + */ + disable_cond_guard_for_const_null_left_expr(0); + item= new (thd->mem_root) Item_func_trig_cond(thd, item, get_cond_guard(0)); + } + + if (!join_having) + item->name= in_having_cond; + if (fix_having(item, select_lex)) + DBUG_RETURN(true); + *having_item= item; + } + else + { + /* + No need to use real_item for the item, as the ref items that are possible + in the subquery either belong to views or to the parent select. + For such case we need to refer to the reference and not to the original + item. + */ + Item *item= (Item*) select_lex->item_list.head(); + + if (select_lex->table_list.elements || + !(select_lex->master_unit()->is_unit_op())) + { + Item *having= item; + Item *orig_item= item; + + item= func->create(thd, expr, item); + if (!is_top_level_item() && orig_item->maybe_null()) + { + having= new (thd->mem_root) Item_is_not_null_test(thd, this, having); + if (left_expr->maybe_null()) + { + disable_cond_guard_for_const_null_left_expr(0); + if (!(having= new (thd->mem_root) Item_func_trig_cond(thd, having, + get_cond_guard(0)))) + DBUG_RETURN(true); + } + having->name= in_having_cond; + if (fix_having(having, select_lex)) + DBUG_RETURN(true); + *having_item= having; + + item= new (thd->mem_root) Item_cond_or(thd, item, + new (thd->mem_root) Item_func_isnull(thd, orig_item)); + } + /* + If we may encounter NULL IN (SELECT ...) and care whether subquery + result is NULL or FALSE, wrap condition in a trig_cond. + */ + if (!is_top_level_item() && left_expr->maybe_null()) + { + disable_cond_guard_for_const_null_left_expr(0); + if (!(item= new (thd->mem_root) Item_func_trig_cond(thd, item, + get_cond_guard(0)))) + DBUG_RETURN(true); + } + + /* + TODO: figure out why the following is done here in + single_value_transformer but there is no corresponding action in + row_value_transformer? + */ + item->name= in_additional_cond; + if (item->fix_fields_if_needed(thd, 0)) + DBUG_RETURN(true); + *where_item= item; + } + else + { + DBUG_ASSERT(select_lex->master_unit()->is_unit_op()); + LEX_CSTRING field_name= {STRING_WITH_LEN("") }; + Item *new_having= + func->create(thd, expr, + new (thd->mem_root) Item_ref_null_helper(thd, + &select_lex->context, + this, + &select_lex->ref_pointer_array[0], + no_matter_name, + field_name)); + if (!is_top_level_item() && left_expr->maybe_null()) + { + disable_cond_guard_for_const_null_left_expr(0); + if (!(new_having= new (thd->mem_root) + Item_func_trig_cond(thd, new_having, get_cond_guard(0)))) + DBUG_RETURN(true); + } + + new_having->name= in_having_cond; + if (fix_having(new_having, select_lex)) + DBUG_RETURN(true); + + *having_item= new_having; + } + } + + DBUG_RETURN(false); +} + + +/** + Wrap a multi-column IN/ALL/ANY subselect into an Item_in_optimizer. + + @param join Join object of the subquery (i.e. 'child' join). + + @details + The subquery predicate is wrapped into an Item_in_optimizer. Later the query + optimization phase chooses whether the subquery under the Item_in_optimizer + will be further transformed into an equivalent correlated EXISTS by injecting + additional predicates, or will be executed via subquery materialization in its + unmodified form. + + @retval false The subquery was transformed + @retval true Error +*/ + +bool +Item_in_subselect::row_value_transformer(JOIN *join) +{ + SELECT_LEX *select_lex= join->select_lex; + uint cols_num= left_expr->cols(); + + DBUG_ENTER("Item_in_subselect::row_value_transformer"); + DBUG_ASSERT(thd == join->thd); + + // psergey: duplicated_subselect_card_check + if (select_lex->item_list.elements != cols_num) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), cols_num); + DBUG_RETURN(true); + } + + /* + Wrap the current IN predicate in an Item_in_optimizer. The actual + substitution in the Item tree takes place in Item_subselect::fix_fields. + */ + if (!substitution) + { + //first call for this unit + SELECT_LEX_UNIT *master_unit= select_lex->master_unit(); + substitution= optimizer; + + SELECT_LEX *current= thd->lex->current_select; + thd->lex->current_select= current->return_after_parsing(); + if (!optimizer || optimizer->fix_left(thd)) + { + thd->lex->current_select= current; + DBUG_RETURN(true); + } + + // we will refer to upper level cache array => we have to save it in PS + DBUG_ASSERT(optimizer->get_cache()[0]->is_array_kept()); + + thd->lex->current_select= current; + /* + The uncacheable property controls a number of actions, e.g. whether to + save/restore (via init_save_join_tab/restore_tmp) the original JOIN for + plans with a temp table where the original JOIN was overridden by + make_simple_join. The UNCACHEABLE_EXPLAIN is ignored by EXPLAIN, thus + non-correlated subqueries will not appear as such to EXPLAIN. + */ + master_unit->uncacheable|= UNCACHEABLE_EXPLAIN; + select_lex->uncacheable|= UNCACHEABLE_EXPLAIN; + } + + DBUG_RETURN(false); +} + + +/** + Create the predicates needed to transform a multi-column IN/ALL/ANY + subselect into a correlated EXISTS via predicate injection. + + @details + The correlated predicates are created as follows: + + - If the subquery has aggregates, GROUP BY, or HAVING, convert to + + (l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having) + => + EXISTS (SELECT ... HAVING having and + (l1 = v1 or is null v1) and + (l2 = v2 or is null v2) and + (l3 = v3 or is null v3) and + is_not_null_test(v1) and + is_not_null_test(v2) and + is_not_null_test(v3)) + + where is_not_null_test used to register nulls in case if we have + not found matching to return correct NULL value. + + - Otherwise (no aggregates/GROUP BY/HAVING) convert the subquery as follows: + + (l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where) + => + EXISTS (SELECT ... WHERE where and + (l1 = v1 or is null v1) and + (l2 = v2 or is null v2) and + (l3 = v3 or is null v3) + HAVING is_not_null_test(v1) and + is_not_null_test(v2) and + is_not_null_test(v3)) + where is_not_null_test registers NULLs values but reject rows. + + in case when we do not need correct NULL, we have simpler construction: + EXISTS (SELECT ... WHERE where and + (l1 = v1) and + (l2 = v2) and + (l3 = v3) + + @param join[in] Join object of the subquery (i.e. 'child' join). + @param where_item[out] the in-to-exists addition to the where clause + @param having_item[out] the in-to-exists addition to the having clause + + @retval false If the new conditions were created successfully + @retval true Error +*/ + +bool +Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, + Item **where_item, + Item **having_item) +{ + SELECT_LEX *select_lex= join->select_lex; + uint cols_num= left_expr->cols(); + /* + The non-transformed HAVING clause of 'join' may be stored in two ways + during JOIN::optimize: this->tmp_having= this->having; this->having= 0; + */ + Item* join_having= join->having ? join->having : join->tmp_having; + bool is_having_used= (join_having || select_lex->with_sum_func || + select_lex->group_list.first || + !select_lex->table_list.elements); + LEX_CSTRING list_ref= { STRING_WITH_LEN("")}; + DBUG_ENTER("Item_in_subselect::create_row_in_to_exists_cond"); + DBUG_ASSERT(thd == join->thd); + + *where_item= NULL; + *having_item= NULL; + + if (is_having_used) + { + /* TODO: say here explicitly if the order of AND parts matters or not. */ + Item *item_having_part2= 0; + for (uint i= 0; i < cols_num; i++) + { + DBUG_ASSERT((left_expr->fixed() && + + select_lex->ref_pointer_array[i]->fixed()) || + (select_lex->ref_pointer_array[i]->type() == REF_ITEM && + ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() == + Item_ref::OUTER_REF)); + Item *item_eq= + new (thd->mem_root) + Item_func_eq(thd, new (thd->mem_root) + Item_direct_ref(thd, &select_lex->context, + (*optimizer->get_cache())-> + addr(i), + no_matter_name, + in_left_expr_name), + new (thd->mem_root) + Item_ref(thd, &select_lex->context, + &select_lex->ref_pointer_array[i], + no_matter_name, + list_ref)); + Item *item_isnull= + new (thd->mem_root) + Item_func_isnull(thd, + new (thd->mem_root) + Item_ref(thd, &select_lex->context, + &select_lex->ref_pointer_array[i], + no_matter_name, + list_ref)); + Item *col_item= new (thd->mem_root) + Item_cond_or(thd, item_eq, item_isnull); + if (!is_top_level_item() && left_expr->element_index(i)->maybe_null() && + get_cond_guard(i)) + { + disable_cond_guard_for_const_null_left_expr(i); + if (!(col_item= new (thd->mem_root) + Item_func_trig_cond(thd, col_item, get_cond_guard(i)))) + DBUG_RETURN(true); + } + *having_item= and_items(thd, *having_item, col_item); + + Item *item_nnull_test= + new (thd->mem_root) + Item_is_not_null_test(thd, this, + new (thd->mem_root) + Item_ref(thd, &select_lex->context, + &select_lex-> + ref_pointer_array[i], + no_matter_name, + list_ref)); + if (!is_top_level_item() && left_expr->element_index(i)->maybe_null() && + get_cond_guard(i) ) + { + disable_cond_guard_for_const_null_left_expr(i); + if (!(item_nnull_test= + new (thd->mem_root) + Item_func_trig_cond(thd, item_nnull_test, get_cond_guard(i)))) + DBUG_RETURN(true); + } + item_having_part2= and_items(thd, item_having_part2, item_nnull_test); + item_having_part2->top_level_item(); + } + *having_item= and_items(thd, *having_item, item_having_part2); + } + else + { + for (uint i= 0; i < cols_num; i++) + { + Item *item, *item_isnull; + DBUG_ASSERT((left_expr->fixed() && + select_lex->ref_pointer_array[i]->fixed()) || + (select_lex->ref_pointer_array[i]->type() == REF_ITEM && + ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() == + Item_ref::OUTER_REF)); + item= new (thd->mem_root) + Item_func_eq(thd, + new (thd->mem_root) + Item_direct_ref(thd, &select_lex->context, + (*optimizer->get_cache())-> + addr(i), + no_matter_name, + in_left_expr_name), + new (thd->mem_root) + Item_direct_ref(thd, &select_lex->context, + &select_lex-> + ref_pointer_array[i], + no_matter_name, + list_ref)); + if (!is_top_level_item() && select_lex->ref_pointer_array[i]->maybe_null()) + { + Item *having_col_item= + new (thd->mem_root) + Item_is_not_null_test(thd, this, + new (thd->mem_root) + Item_ref(thd, &select_lex->context, + &select_lex->ref_pointer_array[i], + no_matter_name, + list_ref)); + + item_isnull= new (thd->mem_root) + Item_func_isnull(thd, + new (thd->mem_root) + Item_direct_ref(thd, &select_lex->context, + &select_lex-> + ref_pointer_array[i], + no_matter_name, + list_ref)); + item= new (thd->mem_root) Item_cond_or(thd, item, item_isnull); + if (left_expr->element_index(i)->maybe_null() && get_cond_guard(i)) + { + disable_cond_guard_for_const_null_left_expr(i); + if (!(item= new (thd->mem_root) + Item_func_trig_cond(thd, item, get_cond_guard(i)))) + DBUG_RETURN(true); + if (!(having_col_item= new (thd->mem_root) + Item_func_trig_cond(thd, having_col_item, get_cond_guard(i)))) + DBUG_RETURN(true); + } + *having_item= and_items(thd, *having_item, having_col_item); + } + if (!is_top_level_item() && left_expr->element_index(i)->maybe_null() && + get_cond_guard(i)) + { + if (!(item= new (thd->mem_root) + Item_func_trig_cond(thd, item, get_cond_guard(i)))) + DBUG_RETURN(true); + } + *where_item= and_items(thd, *where_item, item); + } + } + + if (*where_item) + { + if ((*where_item)->fix_fields_if_needed(thd, 0)) + DBUG_RETURN(true); + (*where_item)->top_level_item(); + } + + if (*having_item) + { + if (!join_having) + (*having_item)->name= in_having_cond; + if (fix_having(*having_item, select_lex)) + DBUG_RETURN(true); + (*having_item)->top_level_item(); + } + + DBUG_RETURN(false); +} + + +bool +Item_in_subselect::select_transformer(JOIN *join) +{ + return select_in_like_transformer(join); +} + +bool +Item_exists_subselect::select_transformer(JOIN *join) +{ + return select_prepare_to_be_in(); +} + + +/** + Create the predicates needed to transform an IN/ALL/ANY subselect into a + correlated EXISTS via predicate injection. + + @param join_arg Join object of the subquery. + + @retval FALSE ok + @retval TRUE error +*/ + +bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg) +{ + bool res; + + DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE || + engine->engine_type() == subselect_engine::UNION_ENGINE); + /* + TODO: the call to init_cond_guards allocates and initializes an + array of booleans that may not be used later because we may choose + materialization. + The two calls below to create_XYZ_cond depend on this boolean array. + If the dependency is removed, the call can be moved to a later phase. + */ + init_cond_guards(); + if (left_expr->cols() == 1) + res= create_single_in_to_exists_cond(join_arg, + &(join_arg->in_to_exists_where), + &(join_arg->in_to_exists_having)); + else + res= create_row_in_to_exists_cond(join_arg, + &(join_arg->in_to_exists_where), + &(join_arg->in_to_exists_having)); + + /* + The IN=>EXISTS transformation makes non-correlated subqueries correlated. + */ + if (!left_expr->can_eval_in_optimize()) + { + join_arg->select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + join_arg->select_lex->master_unit()->uncacheable|= + UNCACHEABLE_DEPENDENT_INJECTED; + } + /* + The uncacheable property controls a number of actions, e.g. whether to + save/restore (via init_save_join_tab/restore_tmp) the original JOIN for + plans with a temp table where the original JOIN was overridden by + make_simple_join. The UNCACHEABLE_EXPLAIN is ignored by EXPLAIN, thus + non-correlated subqueries will not appear as such to EXPLAIN. + */ + join_arg->select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN; + join_arg->select_lex->uncacheable|= UNCACHEABLE_EXPLAIN; + return (res); +} + + +/** + Transform an IN/ALL/ANY subselect into a correlated EXISTS via injecting + correlated in-to-exists predicates. + + @param join_arg Join object of the subquery. + + @retval FALSE ok + @retval TRUE error +*/ + +bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) +{ + SELECT_LEX *select_lex= join_arg->select_lex; + Item *where_item= join_arg->in_to_exists_where; + Item *having_item= join_arg->in_to_exists_having; + + DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond"); + DBUG_ASSERT(thd == join_arg->thd); + + if (select_lex->min_max_opt_list.elements) + { + /* + MIN/MAX optimizations have been applied to Item_sum objects + of the subquery this subquery predicate in opt_sum_query(). + Injection of new condition invalidates this optimizations. + Thus those optimizations must be rolled back. + */ + List_iterator_fast it(select_lex->min_max_opt_list); + Item_sum *item; + while ((item= it++)) + { + item->clear(); + item->reset_forced_const(); + } + if (where_item) + where_item->update_used_tables(); + if (having_item) + having_item->update_used_tables(); + } + + if (where_item) + { + List *and_args= NULL; + /* + If the top-level Item of the WHERE clause is an AND, detach the multiple + equality list that was attached to the end of the AND argument list by + build_equal_items_for_cond(). The multiple equalities must be detached + because fix_fields merges lower level AND arguments into the upper AND. + As a result, the arguments from lower-level ANDs are concatenated after + the multiple equalities. When the multiple equality list is treated as + such, it turns out that it contains non-Item_equal object which is wrong. + */ + if (join_arg->conds && join_arg->conds->type() == Item::COND_ITEM && + ((Item_cond*) join_arg->conds)->functype() == Item_func::COND_AND_FUNC) + { + and_args= ((Item_cond*) join_arg->conds)->argument_list(); + if (join_arg->cond_equal) + and_args->disjoin((List *) &join_arg->cond_equal->current_level); + } + + where_item= and_items(thd, join_arg->conds, where_item); + + /* This is the fix_fields() call mentioned in the comment above */ + if (where_item->fix_fields_if_needed(thd, 0)) + DBUG_RETURN(true); + // TIMOUR TODO: call optimize_cond() for the new where clause + thd->change_item_tree(&select_lex->where, where_item); + select_lex->where->top_level_item(); + join_arg->conds= select_lex->where; + + /* Attach back the list of multiple equalities to the new top-level AND. */ + if (and_args && join_arg->cond_equal) + { + /* + The fix_fields() call above may have changed the argument list, so + fetch it again: + */ + and_args= ((Item_cond*) join_arg->conds)->argument_list(); + ((Item_cond_and *) (join_arg->conds))->m_cond_equal= + *join_arg->cond_equal; + and_args->append((List *)&join_arg->cond_equal->current_level); + } + } + + if (having_item) + { + Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having; + having_item= and_items(thd, join_having, having_item); + if (fix_having(having_item, select_lex)) + DBUG_RETURN(true); + // TIMOUR TODO: call optimize_cond() for the new having clause + thd->change_item_tree(&select_lex->having, having_item); + select_lex->having->top_level_item(); + join_arg->having= select_lex->having; + } + SELECT_LEX *global_parameters= unit->global_parameters(); + join_arg->thd->change_item_tree(&global_parameters->limit_params.select_limit, + new (thd->mem_root) Item_int(thd, (int32) 1)); + unit->lim.set_single_row(); + + DBUG_RETURN(false); +} + + +/* + If this select can potentially be converted by EXISTS->IN conversion, wrap it + in an Item_in_optimizer object. Final decision whether to do the conversion + is done at a later phase. +*/ + +bool Item_exists_subselect::select_prepare_to_be_in() +{ + bool trans_res= FALSE; + DBUG_ENTER("Item_exists_subselect::select_prepare_to_be_in"); + if (!optimizer && + (thd->lex->sql_command == SQLCOM_SELECT || + thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI) && + !unit->first_select()->is_part_of_union() && + optimizer_flag(thd, OPTIMIZER_SWITCH_EXISTS_TO_IN) && + (is_top_level_item() || + (upper_not && upper_not->is_top_level_item()))) + { + Query_arena *arena, backup; + bool result; + arena= thd->activate_stmt_arena_if_needed(&backup); + result= (!(optimizer= new (thd->mem_root) Item_in_optimizer(thd, new (thd->mem_root) Item_int(thd, 1), this))); + if (arena) + thd->restore_active_arena(arena, &backup); + if (result) + trans_res= TRUE; + else + substitution= optimizer; + } + DBUG_RETURN(trans_res); +} + +/** + Check if 'func' is an equality in form "inner_table.column = outer_expr" + + @param func Expression to check + @param allow_subselect If true, the outer_expr part can have a subquery + If false, it cannot. + @param local_field OUT Return "inner_table.column" here + @param outer_expr OUT Return outer_expr here + + @return true - 'func' is an Equality. +*/ + +static bool check_equality_for_exist2in(Item_func *func, + bool allow_subselect, + Item_ident **local_field, + Item **outer_exp) +{ + Item **args; + if (func->functype() != Item_func::EQ_FUNC) + return FALSE; + DBUG_ASSERT(func->argument_count() == 2); + args= func->arguments(); + if (args[0]->real_type() == Item::FIELD_ITEM && + args[0]->all_used_tables() != OUTER_REF_TABLE_BIT && + args[1]->all_used_tables() == OUTER_REF_TABLE_BIT && + (allow_subselect || !args[1]->with_subquery())) + { + /* It is Item_field or Item_direct_view_ref) */ + DBUG_ASSERT(args[0]->type() == Item::FIELD_ITEM || + args[0]->type() == Item::REF_ITEM); + *local_field= (Item_ident *)args[0]; + *outer_exp= args[1]; + return TRUE; + } + else if (args[1]->real_type() == Item::FIELD_ITEM && + args[1]->all_used_tables() != OUTER_REF_TABLE_BIT && + args[0]->all_used_tables() == OUTER_REF_TABLE_BIT && + (allow_subselect || !args[0]->with_subquery())) + { + /* It is Item_field or Item_direct_view_ref) */ + DBUG_ASSERT(args[1]->type() == Item::FIELD_ITEM || + args[1]->type() == Item::REF_ITEM); + *local_field= (Item_ident *)args[1]; + *outer_exp= args[0]; + return TRUE; + } + + return FALSE; +} + +typedef struct st_eq_field_outer +{ + Item **eq_ref; + Item_ident *local_field; + Item *outer_exp; +} EQ_FIELD_OUTER; + + +/** + Check if 'conds' is a set of AND-ed outer_expr=inner_table.col equalities + + @detail + Check if 'conds' has form + + outer1=inner_tbl1.col1 AND ... AND outer2=inner_tbl1.col2 AND remainder_cond + + if there is just one outer_expr=inner_expr pair, then outer_expr can have a + subselect in it. If there are many such pairs, then none of outer_expr can + have a subselect in it. If we allow this, the query will fail with an error: + + This version of MariaDB doesn't yet support 'SUBQUERY in ROW in left + expression of IN/ALL/ANY' + + @param conds Condition to be checked + @parm result Array to collect EQ_FIELD_OUTER elements describing + inner-vs-outer equalities the function has found. + @return + false - some inner-vs-outer equalities were found + true - otherwise. +*/ + +static bool find_inner_outer_equalities(Item **conds, + Dynamic_array &result) +{ + bool found= FALSE; + EQ_FIELD_OUTER element; + if (is_cond_and(*conds)) + { + List_iterator li(*((Item_cond*)*conds)->argument_list()); + Item *item; + bool allow_subselect= true; + while ((item= li++)) + { + if (item->type() == Item::FUNC_ITEM && + check_equality_for_exist2in((Item_func *)item, + allow_subselect, + &element.local_field, + &element.outer_exp)) + { + found= TRUE; + allow_subselect= false; + element.eq_ref= li.ref(); + if (result.append(element)) + goto alloc_err; + } + } + } + else if ((*conds)->type() == Item::FUNC_ITEM && + check_equality_for_exist2in((Item_func *)*conds, + true, + &element.local_field, + &element.outer_exp)) + { + found= TRUE; + element.eq_ref= conds; + if (result.append(element)) + goto alloc_err; + } + + return !found; +alloc_err: + return TRUE; +} + +/** + Converts EXISTS subquery to IN subquery if it is possible and has sense + + @param opt_arg Pointer on THD + + @return TRUE in case of error and FALSE otherwise. +*/ + +bool Item_exists_subselect::exists2in_processor(void *opt_arg) +{ + THD *thd= (THD *)opt_arg; + SELECT_LEX *first_select=unit->first_select(), *save_select; + JOIN *join= first_select->join; + Item **eq_ref= NULL; + Item_ident *local_field= NULL; + Item *outer_exp= NULL; + Item *left_exp= NULL; Item_in_subselect *in_subs; + Query_arena *arena= NULL, backup; + int res= FALSE; + List outer; + Dynamic_array eqs(PSI_INSTRUMENT_MEM, 5, 5); + bool will_be_correlated; + DBUG_ENTER("Item_exists_subselect::exists2in_processor"); + + if (!optimizer || + !optimizer_flag(thd, OPTIMIZER_SWITCH_EXISTS_TO_IN) || + (!is_top_level_item() && (!upper_not || + !upper_not->is_top_level_item())) || + first_select->is_part_of_union() || + first_select->group_list.elements || + join->having || + first_select->with_sum_func || + !first_select->leaf_tables.elements|| + !join->conds || + with_recursive_reference) + DBUG_RETURN(FALSE); + + /* + EXISTS-to-IN coversion and ORDER BY ... LIMIT clause: + + - "[ORDER BY ...] LIMIT n" clause with a non-zero n does not affect + the result of the EXISTS(...) predicate, and so we can discard + it during the conversion. + - "[ORDER BY ...] LIMIT m, n" can turn a non-empty resultset into empty + one, so it affects tthe EXISTS(...) result and cannot be discarded. + + Disallow exists-to-in conversion if + (1). three is a LIMIT which is not a basic constant + (1a) or is a "LIMIT 0" (see MDEV-19429) + (2). there is an OFFSET clause + */ + if ((first_select->limit_params.select_limit && // (1) + (!first_select->limit_params.select_limit->basic_const_item() || // (1) + first_select->limit_params.select_limit->val_uint() == 0)) || // (1a) + first_select->limit_params.offset_limit) // (2) + { + DBUG_RETURN(FALSE); + } + + /* Disallow the conversion if offset + limit exists */ + + DBUG_ASSERT(first_select->group_list.elements == 0 && + first_select->having == NULL); + + if (find_inner_outer_equalities(&join->conds, eqs)) + DBUG_RETURN(FALSE); + + DBUG_ASSERT(eqs.elements() != 0); + + save_select= thd->lex->current_select; + thd->lex->current_select= first_select; + + /* check that the subquery has only dependencies we are going pull out */ + { + List unused; + Collect_deps_prm prm= {&unused, // parameters + unit->first_select()->nest_level_base, // nest_level_base + 0, // count + unit->first_select()->nest_level, // nest_level + FALSE // collect + }; + walk(&Item::collect_outer_ref_processor, TRUE, &prm); + DBUG_ASSERT(prm.count > 0); + DBUG_ASSERT(prm.count >= (uint)eqs.elements()); + will_be_correlated= prm.count > (uint)eqs.elements(); + if (upper_not && will_be_correlated) + goto out; + } + + if ((uint)eqs.elements() > (first_select->item_list.elements + + first_select->select_n_reserved)) + goto out; + + arena= thd->activate_stmt_arena_if_needed(&backup); + + while (first_select->item_list.elements > (uint)eqs.elements()) + { + first_select->item_list.pop(); + first_select->join->all_fields.elements--; + } + { + List_iterator it(first_select->item_list); + + for (uint i= 0; i < (uint)eqs.elements(); i++) + { + Item *item= it++; + eq_ref= eqs.at(i).eq_ref; + local_field= eqs.at(i).local_field; + outer_exp= eqs.at(i).outer_exp; + /* Add the field to the SELECT_LIST */ + if (item) + it.replace(local_field); + else + { + first_select->item_list.push_back(local_field, thd->mem_root); + first_select->join->all_fields.elements++; + } + first_select->ref_pointer_array[i]= (Item *)local_field; + + /* remove the parts from condition */ + if (!upper_not || !local_field->maybe_null()) + *eq_ref= new (thd->mem_root) Item_int(thd, 1); + else + { + *eq_ref= new (thd->mem_root) + Item_func_isnotnull(thd, + new (thd->mem_root) + Item_field(thd, + ((Item_field*)(local_field-> + real_item()))->context, + ((Item_field*)(local_field-> + real_item()))->field)); + if((*eq_ref)->fix_fields(thd, (Item **)eq_ref)) + { + res= TRUE; + goto out; + } + } + outer_exp->fix_after_pullout(unit->outer_select(), &outer_exp, FALSE); + outer_exp->update_used_tables(); + outer.push_back(outer_exp, thd->mem_root); + } + } + + join->conds->update_used_tables(); + + /* make IN SUBQUERY and put outer_exp as left part */ + if (eqs.elements() == 1) + left_exp= outer_exp; + else + { + if (!(left_exp= new (thd->mem_root) Item_row(thd, outer))) + { + res= TRUE; + goto out; + } + } + + /* make EXISTS->IN permanet (see Item_subselect::init()) */ + set_exists_transformed(); + + first_select->limit_params.select_limit= NULL; + if (!(in_subs= new (thd->mem_root) Item_in_subselect(thd, left_exp, + first_select))) + { + res= TRUE; + goto out; + } + in_subs->set_exists_transformed(); + optimizer->arguments()[0]= left_exp; + optimizer->arguments()[1]= in_subs; + in_subs->optimizer= optimizer; + DBUG_ASSERT(is_top_level_item() || + (upper_not && upper_not->is_top_level_item())); + in_subs->top_level_item(); + { + SELECT_LEX *current= thd->lex->current_select; + optimizer->reset_cache(); // renew cache, and we will not keep it + thd->lex->current_select= unit->outer_select(); + DBUG_ASSERT(optimizer); + if (optimizer->fix_left(thd)) + { + res= TRUE; + /* + We should not restore thd->lex->current_select because it will be + reset on exit from this procedure + */ + goto out; + } + /* + As far as Item_ref_in_optimizer do not substitute itself on fix_fields + we can use same item for all selects. + */ + in_subs->expr= new (thd->mem_root) + Item_direct_ref(thd, &first_select->context, + (Item**)optimizer->get_cache(), + no_matter_name, + in_left_expr_name); + if (in_subs->fix_fields(thd, optimizer->arguments() + 1)) + { + res= TRUE; + /* + We should not restore thd->lex->current_select because it will be + reset on exit from this procedure + */ + goto out; + } + { + /* Move dependence list */ + List_iterator_fast it(upper_refs); + Ref_to_outside *upper; + while ((upper= it++)) + { + uint i; + for (i= 0; i < (uint)eqs.elements(); i++) + if (eqs.at(i).outer_exp-> + walk(&Item::find_item_processor, TRUE, upper->item)) + break; + if (i == (uint)eqs.elements() && + (in_subs->upper_refs.push_back(upper, thd->stmt_arena->mem_root))) + goto out; + } + } + in_subs->update_used_tables(); + /* + The engine of the subquery is fixed so above fix_fields() is not + complete and should be fixed + */ + in_subs->upper_refs= upper_refs; + upper_refs.empty(); + thd->lex->current_select= current; + } + + DBUG_ASSERT(unit->item == in_subs); + DBUG_ASSERT(join == first_select->join); + /* + Fix dependency info + */ + in_subs->is_correlated= will_be_correlated; + if (!will_be_correlated) + { + first_select->uncacheable&= ~UNCACHEABLE_DEPENDENT_GENERATED; + unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_GENERATED; + } + /* + set possible optimization strategies + */ + in_subs->emb_on_expr_nest= emb_on_expr_nest; + res= check_and_do_in_subquery_rewrites(join); + first_select->join->prepare_stage2(); + + first_select->fix_prepare_information(thd, &join->conds, &join->having); + + if (upper_not) + { + Item *exp; + if (eqs.elements() == 1) + { + exp= (optimizer->arguments()[0]->maybe_null() ? + (Item*) new (thd->mem_root) + Item_cond_and(thd, + new (thd->mem_root) + Item_func_isnotnull(thd, + new (thd->mem_root) + Item_direct_ref(thd, + &unit->outer_select()->context, + optimizer->arguments(), + no_matter_name, + exists_outer_expr_name)), + optimizer) : + (Item *)optimizer); + } + else + { + List *and_list= new (thd->mem_root) List; + if (!and_list) + { + res= TRUE; + goto out; + } + for (size_t i= 0; i < eqs.elements(); i++) + { + if (optimizer->arguments()[0]->maybe_null()) + { + and_list-> + push_front(new (thd->mem_root) + Item_func_isnotnull(thd, + new (thd->mem_root) + Item_direct_ref(thd, + &unit->outer_select()->context, + optimizer->arguments()[0]->addr((int)i), + no_matter_name, + exists_outer_expr_name)), + thd->mem_root); + } + } + if (and_list->elements > 0) + { + and_list->push_front(optimizer, thd->mem_root); + exp= new (thd->mem_root) Item_cond_and(thd, *and_list); + } + else + exp= optimizer; + } + upper_not->arguments()[0]= exp; + if (exp->fix_fields_if_needed(thd, upper_not->arguments())) + { + res= TRUE; + goto out; + } + } + +out: + thd->lex->current_select= save_select; + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(res); +} + + +/** + Prepare IN/ALL/ANY/SOME subquery transformation and call the appropriate + transformation function. + + @param join JOIN object of transforming subquery + + @notes + To decide which transformation procedure (scalar or row) applicable here + we have to call fix_fields() for the left expression to be able to call + cols() method on it. Also this method makes arena management for + underlying transformation methods. + + @retval false OK + @retval true Error +*/ + +bool +Item_in_subselect::select_in_like_transformer(JOIN *join) +{ + Query_arena *arena= 0, backup; + SELECT_LEX *current= thd->lex->current_select; + const char *save_where= thd->where; + bool trans_res= true; + bool result; + + DBUG_ENTER("Item_in_subselect::select_in_like_transformer"); + DBUG_ASSERT(thd == join->thd); + thd->where= "IN/ALL/ANY subquery"; + + /* + In some optimisation cases we will not need this Item_in_optimizer + object, but we can't know it here, but here we need address correct + reference on left expression. + + note: we won't need Item_in_optimizer when handling degenerate cases + like "... IN (SELECT 1)" + */ + arena= thd->activate_stmt_arena_if_needed(&backup); + if (!optimizer) + { + optimizer= new (thd->mem_root) Item_in_optimizer(thd, left_expr_orig, this); + if ((result= !optimizer)) + goto out; + } + + thd->lex->current_select= current->return_after_parsing(); + result= optimizer->fix_left(thd); + thd->lex->current_select= current; + + if (changed) + { + trans_res= false; + goto out; + } + + + if (result) + goto out; + + /* + Both transformers call fix_fields() only for Items created inside them, + and all that items do not make permanent changes in current item arena + which allow to us call them with changed arena (if we do not know nature + of Item, we have to call fix_fields() for it only with original arena to + avoid memory leak) + */ + if (left_expr->cols() == 1) + trans_res= single_value_transformer(join); + else + { + /* we do not support row operation for ALL/ANY/SOME */ + if (func != &eq_creator) + { + if (arena) + thd->restore_active_arena(arena, &backup); + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + DBUG_RETURN(true); + } + trans_res= row_value_transformer(join); + } +out: + if (arena) + thd->restore_active_arena(arena, &backup); + thd->where= save_where; + DBUG_RETURN(trans_res); +} + + +void Item_in_subselect::print(String *str, enum_query_type query_type) +{ + if (test_strategy(SUBS_IN_TO_EXISTS) && + !(query_type & QT_PARSABLE)) + str->append(STRING_WITH_LEN("")); + else + { + left_expr->print_parenthesised(str, query_type, precedence()); + str->append(STRING_WITH_LEN(" in ")); + } + Item_subselect::print(str, query_type); +} + +bool Item_exists_subselect::fix_fields(THD *thd, Item **ref) +{ + DBUG_ENTER("Item_exists_subselect::fix_fields"); + if (exists_transformed) + DBUG_RETURN( !( (*ref)= new (thd->mem_root) Item_int(thd, 1))); + DBUG_RETURN(Item_subselect::fix_fields(thd, ref)); +} + + +bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref) +{ + uint outer_cols_num; + List *inner_cols; + char const *save_where= thd_arg->where; + DBUG_ENTER("Item_in_subselect::fix_fields"); + + thd= thd_arg; + DBUG_ASSERT(unit->thd == thd); + + if (test_strategy(SUBS_SEMI_JOIN)) + DBUG_RETURN( !( (*ref)= new (thd->mem_root) Item_int(thd, 1)) ); + + thd->where= "IN/ALL/ANY subquery"; + /* + Check if the outer and inner IN operands match in those cases when we + will not perform IN=>EXISTS transformation. Currently this is when we + use subquery materialization. + + The condition below is true when this method was called recursively from + inside JOIN::prepare for the JOIN object created by the call chain + Item_subselect::fix_fields -> subselect_single_select_engine::prepare, + which creates a JOIN object for the subquery and calls JOIN::prepare for + the JOIN of the subquery. + Notice that in some cases, this doesn't happen, and the check_cols() + test for each Item happens later in + Item_in_subselect::row_value_in_to_exists_transformer. + The reason for this mess is that our JOIN::prepare phase works top-down + instead of bottom-up, so we first do name resoluton and semantic checks + for the outer selects, then for the inner. + */ + if (engine && + engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE && + ((subselect_single_select_engine*)engine)->join) + { + outer_cols_num= left_expr->cols(); + + if (unit->is_unit_op()) + inner_cols= &(unit->types); + else + inner_cols= &(unit->first_select()->item_list); + if (outer_cols_num != inner_cols->elements) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), outer_cols_num); + goto err; + } + if (outer_cols_num > 1) + { + List_iterator inner_col_it(*inner_cols); + Item *inner_col; + for (uint i= 0; i < outer_cols_num; i++) + { + inner_col= inner_col_it++; + if (inner_col->check_cols(left_expr->element_index(i)->cols())) + goto err; + } + } + } + + if (left_expr && left_expr->fix_fields_if_needed(thd_arg, &left_expr)) + goto err; + else + if (Item_subselect::fix_fields(thd_arg, ref)) + goto err; + base_flags|= item_base_t::FIXED; + thd->where= save_where; + DBUG_RETURN(FALSE); + +err: + thd->where= save_where; + DBUG_RETURN(TRUE); +} + + +void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, + Item **ref, bool merge) +{ + left_expr->fix_after_pullout(new_parent, &left_expr, merge); + Item_subselect::fix_after_pullout(new_parent, ref, merge); + used_tables_cache |= left_expr->used_tables(); +} + +void Item_in_subselect::update_used_tables() +{ + Item_subselect::update_used_tables(); + left_expr->update_used_tables(); + //used_tables_cache |= left_expr->used_tables(); + used_tables_cache= Item_subselect::used_tables() | left_expr->used_tables(); +} + + +/** + Try to create and initialize an engine to compute a subselect via + materialization. + + @details + The method creates a new engine for materialized execution, and initializes + the engine. The initialization may fail + - either because it wasn't possible to create the needed temporary table + and its index, + - or because of a memory allocation error, + + @returns + @retval TRUE memory allocation error occurred + @retval FALSE an execution method was chosen successfully +*/ + +bool Item_in_subselect::setup_mat_engine() +{ + subselect_hash_sj_engine *mat_engine= NULL; + subselect_single_select_engine *select_engine; + + DBUG_ENTER("Item_in_subselect::setup_mat_engine"); + DBUG_ASSERT(thd); + + /* + The select_engine (that executes transformed IN=>EXISTS subselects) is + pre-created at parse time, and is stored in statement memory (preserved + across PS executions). + */ + DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE); + select_engine= (subselect_single_select_engine*) engine; + + /* Create/initialize execution objects. */ + if (!(mat_engine= new (thd->mem_root) + subselect_hash_sj_engine(thd, this, select_engine))) + DBUG_RETURN(TRUE); + + if (mat_engine->prepare(thd) || + mat_engine->init(&select_engine->join->fields_list, + engine->get_identifier())) + DBUG_RETURN(TRUE); + + engine= mat_engine; + DBUG_RETURN(FALSE); +} + + +/** + Initialize the cache of the left operand of the IN predicate. + + @note This method has the same purpose as alloc_group_fields(), + but it takes a different kind of collection of items, and the + list we push to is dynamically allocated. + + @retval TRUE if a memory allocation error occurred or the cache is + not applicable to the current query + @retval FALSE if success +*/ + +bool Item_in_subselect::init_left_expr_cache() +{ + JOIN *outer_join; + DBUG_ASSERT(thd); + + outer_join= unit->outer_select()->join; + /* + An IN predicate might be evaluated in a query for which all tables have + been optimzied away. + */ + if (!outer_join || !outer_join->table_count || !outer_join->tables_list) + return TRUE; + + if (!(left_expr_cache= new (thd->mem_root) List)) + return TRUE; + + for (uint i= 0; i < left_expr->cols(); i++) + { + Cached_item *cur_item_cache= new_Cached_item(thd, + left_expr->element_index(i), + FALSE); + if (!cur_item_cache || left_expr_cache->push_front(cur_item_cache, + thd->mem_root)) + return TRUE; + } + return FALSE; +} + + +bool Item_in_subselect::init_cond_guards() +{ + DBUG_ASSERT(thd); + uint cols_num= left_expr->cols(); + if (!is_top_level_item() && !pushed_cond_guards && + (left_expr->maybe_null() || cols_num > 1)) + { + if (!(pushed_cond_guards= (bool*)thd->alloc(sizeof(bool) * cols_num))) + return TRUE; + for (uint i= 0; i < cols_num; i++) + pushed_cond_guards[i]= TRUE; + } + return FALSE; +} + + +bool +Item_allany_subselect::select_transformer(JOIN *join) +{ + DBUG_ENTER("Item_allany_subselect::select_transformer"); + DBUG_ASSERT((in_strategy & ~(SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE | + SUBS_IN_TO_EXISTS | SUBS_STRATEGY_CHOSEN)) == 0); + if (upper_item) + upper_item->show= 1; + DBUG_RETURN(select_in_like_transformer(join)); +} + + +void Item_allany_subselect::print(String *str, enum_query_type query_type) +{ + if (test_strategy(SUBS_IN_TO_EXISTS) && + !(query_type & QT_PARSABLE)) + str->append(STRING_WITH_LEN("")); + else + { + left_expr->print(str, query_type); + str->append(' '); + const char *name= func->symbol(all); + str->append(name, strlen(name)); + str->append(all ? " all " : " any ", 5); + } + Item_subselect::print(str, query_type); +} + + +void Item_allany_subselect::no_rows_in_result() +{ + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) + return; + value= 0; + null_value= 0; + was_null= 0; + make_const(); +} + + +void subselect_engine::set_thd(THD *thd_arg) +{ + thd= thd_arg; + if (result) + result->set_thd(thd_arg); +} + + +subselect_single_select_engine:: +subselect_single_select_engine(st_select_lex *select, + select_result_interceptor *result_arg, + Item_subselect *item_arg) + :subselect_engine(item_arg, result_arg), + prepared(0), executed(0), + select_lex(select), join(0) +{ + select_lex->master_unit()->item= item_arg; +} + +int subselect_single_select_engine::get_identifier() +{ + return select_lex->select_number; +} + +void subselect_single_select_engine::force_reexecution() +{ + executed= false; +} + +void subselect_single_select_engine::cleanup() +{ + DBUG_ENTER("subselect_single_select_engine::cleanup"); + prepared= executed= 0; + join= 0; + result->cleanup(); + select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + DBUG_VOID_RETURN; +} + + +void subselect_union_engine::cleanup() +{ + DBUG_ENTER("subselect_union_engine::cleanup"); + unit->reinit_exec_mechanism(); + result->cleanup(); + unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + sl->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + DBUG_VOID_RETURN; +} + + +bool subselect_union_engine::is_executed() const +{ + return unit->executed; +} + +void subselect_union_engine::force_reexecution() +{ + unit->executed= false; +} + + +/* + Check if last execution of the subquery engine produced any rows + + SYNOPSIS + subselect_union_engine::no_rows() + + DESCRIPTION + Check if last execution of the subquery engine produced any rows. The + return value is undefined if last execution ended in an error. + + RETURN + TRUE - Last subselect execution has produced no rows + FALSE - Otherwise +*/ + +bool subselect_union_engine::no_rows() +{ + /* Check if we got any rows when reading UNION result from temp. table: */ + if (unit->fake_select_lex) + { + JOIN *join= unit->fake_select_lex->join; + if (join) + return MY_TEST(!join->send_records); + return false; + } + return MY_TEST(!(((select_union_direct *)(unit->get_union_result())) + ->send_records)); +} + + +void subselect_uniquesubquery_engine::cleanup() +{ + DBUG_ENTER("subselect_uniquesubquery_engine::cleanup"); + /* + Note for mergers: we don't have to, and actually must not de-initialize + tab->table->file here. + - We don't have to, because free_tmp_table() will call ha_index_or_rnd_end + - We must not do it, because tab->table may be a derived table which + has been already dropped by close_thread_tables(), while we here are + called from cleanup_items() + */ + DBUG_VOID_RETURN; +} + + +subselect_union_engine::subselect_union_engine(st_select_lex_unit *u, + select_result_interceptor *result_arg, + Item_subselect *item_arg) + :subselect_engine(item_arg, result_arg) +{ + unit= u; + unit->item= item_arg; +} + + +/** + Create and prepare the JOIN object that represents the query execution + plan for the subquery. + + @details + This method is called from Item_subselect::fix_fields. For prepared + statements it is called both during the PREPARE and EXECUTE phases in the + following ways: + - During PREPARE the optimizer needs some properties + (join->fields_list.elements) of the JOIN to proceed with preparation of + the remaining query (namely to complete ::fix_fields for the subselect + related classes. In the end of PREPARE the JOIN is deleted. + - When we EXECUTE the query, Item_subselect::fix_fields is called again, and + the JOIN object is re-created again, prepared and executed. In the end of + execution it is deleted. + In all cases the JOIN is created in runtime memory (not in the permanent + memory root). + + @todo + Re-check what properties of 'join' are needed during prepare, and see if + we can avoid creating a JOIN during JOIN::prepare of the outer join. + + @retval 0 if success + @retval 1 if error +*/ + +int subselect_single_select_engine::prepare(THD *thd) +{ + if (prepared) + return 0; + set_thd(thd); + if (select_lex->join) + { + select_lex->cleanup(); + } + join= (new (thd->mem_root) + JOIN(thd, select_lex->item_list, + select_lex->options | SELECT_NO_UNLOCK, result)); + if (!join || !result) + return 1; /* Fatal error is set already. */ + prepared= 1; + SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= select_lex; + if (join->prepare(select_lex->table_list.first, + select_lex->where, + select_lex->order_list.elements + + select_lex->group_list.elements, + select_lex->order_list.first, + false, + select_lex->group_list.first, + select_lex->having, + NULL, select_lex, + select_lex->master_unit())) + return 1; + thd->lex->current_select= save_select; + return 0; +} + +int subselect_union_engine::prepare(THD *thd_arg) +{ + set_thd(thd_arg); + return unit->prepare(unit->derived, result, SELECT_NO_UNLOCK); +} + +int subselect_uniquesubquery_engine::prepare(THD *) +{ + /* Should never be called. */ + DBUG_ASSERT(FALSE); + return 1; +} + + +/* + Check if last execution of the subquery engine produced any rows + + SYNOPSIS + subselect_single_select_engine::no_rows() + + DESCRIPTION + Check if last execution of the subquery engine produced any rows. The + return value is undefined if last execution ended in an error. + + RETURN + TRUE - Last subselect execution has produced no rows + FALSE - Otherwise +*/ + +bool subselect_single_select_engine::no_rows() +{ + return !item->assigned(); +} + + +/** + Makes storage for the output values for the subquery and calcuates + their data and column types and their nullability. +*/ +bool subselect_engine::set_row(List &item_list, Item_cache **row) +{ + Item *sel_item; + List_iterator_fast li(item_list); + set_handler(&type_handler_varchar); + for (uint i= 0; (sel_item= li++); i++) + { + item->max_length= sel_item->max_length; + set_handler(sel_item->type_handler()); + item->decimals= sel_item->decimals; + item->unsigned_flag= sel_item->unsigned_flag; + maybe_null= sel_item->maybe_null(); + if (!(row[i]= sel_item->get_cache(thd))) + return TRUE; + row[i]->setup(thd, sel_item); + //psergey-backport-timours: row[i]->store(sel_item); + } + if (item_list.elements > 1) + set_handler(&type_handler_row); + return FALSE; +} + +bool subselect_single_select_engine::fix_length_and_dec(Item_cache **row) +{ + DBUG_ASSERT(row || select_lex->item_list.elements==1); + if (set_row(select_lex->item_list, row)) + return TRUE; + item->collation.set(row[0]->collation); + if (cols() != 1) + maybe_null= 0; + return FALSE; +} + +bool subselect_union_engine::fix_length_and_dec(Item_cache **row) +{ + DBUG_ASSERT(row || unit->first_select()->item_list.elements==1); + + if (unit->first_select()->item_list.elements == 1) + { + if (set_row(unit->types, row)) + return TRUE; + item->collation.set(row[0]->collation); + } + else + { + bool maybe_null_saved= maybe_null; + if (set_row(unit->types, row)) + return TRUE; + maybe_null= maybe_null_saved; + } + return FALSE; +} + +bool subselect_uniquesubquery_engine::fix_length_and_dec(Item_cache **row) +{ + //this never should be called + DBUG_ASSERT(0); + return FALSE; +} + +int read_first_record_seq(JOIN_TAB *tab); +int rr_sequential(READ_RECORD *info); +int join_read_always_key_or_null(JOIN_TAB *tab); +int join_read_next_same_or_null(READ_RECORD *info); + +int subselect_single_select_engine::exec() +{ + char const *save_where= thd->where; + SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= select_lex; + DBUG_ENTER("subselect_single_select_engine::exec"); + + if (join->optimization_state == JOIN::NOT_OPTIMIZED) + { + SELECT_LEX_UNIT *unit= select_lex->master_unit(); + + unit->set_limit(unit->global_parameters()); + if (join->optimize()) + { + thd->where= save_where; + executed= 1; + thd->lex->current_select= save_select; + DBUG_RETURN(join->error ? join->error : 1); + } + if (!select_lex->uncacheable && thd->lex->describe && + !(join->select_options & SELECT_DESCRIBE)) + { + item->update_used_tables(); + if (item->const_item()) + { + /* + It's necessary to keep original JOIN table because + create_sort_index() function may overwrite original + JOIN_TAB::type and wrong optimization method can be + selected on re-execution. + */ + select_lex->uncacheable|= UNCACHEABLE_EXPLAIN; + select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN; + } + } + if (item->engine_changed(this)) + { + thd->lex->current_select= save_select; + DBUG_RETURN(1); + } + } + if (select_lex->uncacheable && + select_lex->uncacheable != UNCACHEABLE_EXPLAIN + && executed) + { + if (join->reinit()) + { + thd->where= save_where; + thd->lex->current_select= save_select; + DBUG_RETURN(1); + } + item->reset(); + item->assigned((executed= 0)); + } + if (!executed) + { + item->reset_value_registration(); + JOIN_TAB *changed_tabs[MAX_TABLES]; + JOIN_TAB **last_changed_tab= changed_tabs; + if (item->have_guarded_conds()) + { + /* + For at least one of the pushed predicates the following is true: + We should not apply optimizations based on the condition that was + pushed down into the subquery. Those optimizations are ref[_or_null] + accesses. Change them to be full table scans. + */ + JOIN_TAB *tab; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab && tab->keyuse) + { + for (uint i= 0; i < tab->ref.key_parts; i++) + { + bool *cond_guard= tab->ref.cond_guards[i]; + if (cond_guard && !*cond_guard) + { + /* Change the access method to full table scan */ + tab->save_read_first_record= tab->read_first_record; + tab->save_read_record= tab->read_record.read_record_func; + tab->read_record.read_record_func= rr_sequential; + tab->read_first_record= read_first_record_seq; + if (tab->rowid_filter) + tab->table->file->disable_pushed_rowid_filter(); + tab->read_record.thd= join->thd; + tab->read_record.ref_length= tab->table->file->ref_length; + tab->read_record.unlock_row= rr_unlock_row; + *(last_changed_tab++)= tab; + break; + } + } + } + } + } + + join->exec(); + + /* Enable the optimizations back */ + for (JOIN_TAB **ptab= changed_tabs; ptab != last_changed_tab; ptab++) + { + JOIN_TAB *tab= *ptab; + tab->read_record.ref_length= 0; + tab->read_first_record= tab->save_read_first_record; + tab->read_record.read_record_func= tab->save_read_record; + if (tab->rowid_filter) + tab->table->file->enable_pushed_rowid_filter(); + } + executed= 1; + if (!(uncacheable() & ~UNCACHEABLE_EXPLAIN) && + !item->with_recursive_reference) + item->make_const(); + thd->where= save_where; + thd->lex->current_select= save_select; + DBUG_RETURN(join->error || thd->is_fatal_error || thd->is_error()); + } + thd->where= save_where; + thd->lex->current_select= save_select; + DBUG_RETURN(0); +} + +int subselect_union_engine::exec() +{ + char const *save_where= thd->where; + int res= unit->exec(); + thd->where= save_where; + return res; +} + + +/* + Search for at least one row satisfying select condition + + SYNOPSIS + subselect_uniquesubquery_engine::scan_table() + + DESCRIPTION + Scan the table using sequential access until we find at least one row + satisfying select condition. + + The caller must set this->empty_result_set=FALSE before calling this + function. This function will set it to TRUE if it finds a matching row. + + RETURN + FALSE - OK + TRUE - Error +*/ + +int subselect_uniquesubquery_engine::scan_table() +{ + int error; + TABLE *table= tab->table; + DBUG_ENTER("subselect_uniquesubquery_engine::scan_table"); + + if ((table->file->inited && + (error= table->file->ha_index_end())) || + (error= table->file->ha_rnd_init(1))) + { + (void) report_error(table, error); + DBUG_RETURN(true); + } + + table->file->extra_opt(HA_EXTRA_CACHE, + get_thd()->variables.read_buff_size); + table->null_row= 0; + for (;;) + { + error=table->file->ha_rnd_next(table->record[0]); + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(table, error); + break; + } + } + + if (!cond || cond->val_int()) + { + empty_result_set= FALSE; + break; + } + } + + table->file->ha_rnd_end(); + DBUG_RETURN(error != 0); +} + + +/** + Copy ref key for index access into the only subquery table. + + @details + Copy ref key and check for conversion problems. + If there is an error converting the left IN operand to the column type of + the right IN operand count it as no match. In this case IN has the value of + FALSE. We mark the subquery table cursor as having no more rows (to ensure + that the processing that follows will not find a match) and return FALSE, + so IN is not treated as returning NULL. + + @returns + @retval FALSE The outer ref was copied into an index lookup key. + @retval TRUE The outer ref cannot possibly match any row, IN is FALSE. +*/ + +bool subselect_uniquesubquery_engine::copy_ref_key(bool skip_constants) +{ + DBUG_ENTER("subselect_uniquesubquery_engine::copy_ref_key"); + + for (store_key **copy= tab->ref.key_copy ; *copy ; copy++) + { + enum store_key::store_key_result store_res; + if (skip_constants && (*copy)->store_key_is_const()) + continue; + store_res= (*copy)->copy(thd); + tab->ref.key_err= store_res; + + if (store_res == store_key::STORE_KEY_FATAL) + { + /* + Error converting the left IN operand to the column type of the right + IN operand. + */ + DBUG_RETURN(true); + } + } + DBUG_RETURN(false); +} + + +/** + Execute subselect via unique index lookup + + @details + Find rows corresponding to the ref key using index access. + If some part of the lookup key is NULL, then we're evaluating + NULL IN (SELECT ... ) + This is a special case, we don't need to search for NULL in the table, + instead, the result value is + - NULL if select produces empty row set + - FALSE otherwise. + + In some cases (IN subselect is a top level item, i.e. + is_top_level_item() == TRUE, the caller doesn't distinguish between NULL and + FALSE result and we just return FALSE. + Otherwise we make a full table scan to see if there is at least one + matching row. + + The result of this function (info about whether a row was found) is + stored in this->empty_result_set. + + @returns + @retval 0 OK + @retval 1 notify caller to call Item_subselect::reset(), + in most cases reset() sets the result to NULL +*/ + +int subselect_uniquesubquery_engine::exec() +{ + DBUG_ENTER("subselect_uniquesubquery_engine::exec"); + int error; + TABLE *table= tab->table; + empty_result_set= TRUE; + table->status= 0; + Item_in_subselect *in_subs= item->get_IN_subquery(); + DBUG_ASSERT(in_subs); + DBUG_ASSERT(thd); + + if (!tab->preread_init_done && tab->preread_init()) + DBUG_RETURN(1); + + if (in_subs->left_expr_has_null()) + { + /* + The case when all values in left_expr are NULL is handled by + Item_in_optimizer::val_int(). + */ + if (in_subs->is_top_level_item()) + DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */ + else + DBUG_RETURN(scan_table()); + } + + if (copy_ref_key(true)) + { + /* We know that there will be no rows even if we scan. */ + in_subs->value= 0; + DBUG_RETURN(0); + } + + if (!table->file->inited && + (error= table->file->ha_index_init(tab->ref.key, 0))) + { + (void) report_error(table, error); + DBUG_RETURN(true); + } + + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); + if (unlikely(error && + error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)) + error= report_error(table, error); + else + { + error= 0; + table->null_row= 0; + if (!table->status && (!cond || cond->val_int())) + { + in_subs->value= 1; + empty_result_set= FALSE; + } + else + in_subs->value= 0; + } + + DBUG_RETURN(error != 0); +} + + +/* + TIMOUR: write comment +*/ + +int subselect_uniquesubquery_engine::index_lookup() +{ + DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup"); + int error; + TABLE *table= tab->table; + + if (!table->file->inited) + table->file->ha_index_init(tab->ref.key, 0); + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); + DBUG_PRINT("info", ("lookup result: %i", error)); + + if (unlikely(error && error != HA_ERR_KEY_NOT_FOUND && + error != HA_ERR_END_OF_FILE)) + { + /* + TIMOUR: I don't understand at all when do we need to call report_error. + In most places where we access an index, we don't do this. Why here? + */ + error= report_error(table, error); + DBUG_RETURN(error); + } + + table->null_row= 0; + if (!error && (!cond || cond->val_int())) + item->get_IN_subquery()->value= 1; + else + item->get_IN_subquery()->value= 0; + + DBUG_RETURN(0); +} + + + +subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine() +{ + /* Tell handler we don't need the index anymore */ + //psergey-merge-todo: the following was gone in 6.0: + //psergey-merge: don't need this after all: tab->table->file->ha_index_end(); +} + + +/** + Execute subselect via unique index lookup + + @details + The engine is used to resolve subqueries in form + + oe IN (SELECT key FROM tbl WHERE subq_where) + + The value of the predicate is calculated as follows: + 1. If oe IS NULL, this is a special case, do a full table scan on + table tbl and search for row that satisfies subq_where. If such + row is found, return NULL, otherwise return FALSE. + 2. Make an index lookup via key=oe, search for a row that satisfies + subq_where. If found, return TRUE. + 3. If check_null==TRUE, make another lookup via key=NULL, search for a + row that satisfies subq_where. If found, return NULL, otherwise + return FALSE. + + @todo + The step #1 can be optimized further when the index has several key + parts. Consider a subquery: + + (oe1, oe2) IN (SELECT keypart1, keypart2 FROM tbl WHERE subq_where) + + and suppose we need to evaluate it for {oe1, oe2}=={const1, NULL}. + Current code will do a full table scan and obtain correct result. There + is a better option: instead of evaluating + + SELECT keypart1, keypart2 FROM tbl WHERE subq_where (1) + + and checking if it has produced any matching rows, evaluate + + SELECT keypart2 FROM tbl WHERE subq_where AND keypart1=const1 (2) + + If this query produces a row, the result is NULL (as we're evaluating + "(const1, NULL) IN { (const1, X), ... }", which has a value of UNKNOWN, + i.e. NULL). If the query produces no rows, the result is FALSE. + + We currently evaluate (1) by doing a full table scan. (2) can be + evaluated by doing a "ref" scan on "keypart1=const1", which can be much + cheaper. We can use index statistics to quickly check whether "ref" scan + will be cheaper than full table scan. + + @returns + @retval 0 OK + @retval 1 notify caller to call Item_subselect::reset(), + in most cases reset() sets the result to NULL +*/ + +int subselect_indexsubquery_engine::exec() +{ + DBUG_ENTER("subselect_indexsubquery_engine"); + int error; + bool null_finding= 0; + TABLE *table= tab->table; + Item_in_subselect *in_subs= item->get_IN_subquery(); + DBUG_ASSERT(thd); + + in_subs->value= 0; + empty_result_set= TRUE; + table->status= 0; + + if (check_null) + { + /* We need to check for NULL if there wasn't a matching value */ + *tab->ref.null_ref_key= 0; // Search first for not null + in_subs->was_null= 0; + } + + if (!tab->preread_init_done && tab->preread_init()) + DBUG_RETURN(1); + + if (in_subs->left_expr_has_null()) + { + /* + The case when all values in left_expr are NULL is handled by + Item_in_optimizer::val_int(). + */ + if (in_subs->is_top_level_item()) + DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */ + else + DBUG_RETURN(scan_table()); + } + + if (copy_ref_key(true)) + { + /* We know that there will be no rows even if we scan. */ + in_subs->value= 0; + DBUG_RETURN(0); + } + + if (!table->file->inited && + (error= table->file->ha_index_init(tab->ref.key, 1))) + { + (void) report_error(table, error); + DBUG_RETURN(true); + } + + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); + if (unlikely(error && + error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)) + error= report_error(table, error); + else + { + for (;;) + { + error= 0; + table->null_row= 0; + if (!table->status) + { + if ((!cond || cond->val_int()) && (!having || having->val_int())) + { + empty_result_set= FALSE; + if (null_finding) + in_subs->was_null= 1; + else + in_subs->value= 1; + break; + } + error= table->file->ha_index_next_same(table->record[0], + tab->ref.key_buff, + tab->ref.key_length); + if (unlikely(error && error != HA_ERR_END_OF_FILE)) + { + error= report_error(table, error); + break; + } + } + else + { + if (!check_null || null_finding) + break; /* We don't need to check nulls */ + *tab->ref.null_ref_key= 1; + null_finding= 1; + /* Check if there exists a row with a null value in the index */ + if (unlikely((error= (safe_index_read(tab) == 1)))) + break; + } + } + } + DBUG_RETURN(error != 0); +} + + +uint subselect_single_select_engine::cols() const +{ + //psergey-sj-backport: the following assert was gone in 6.0: + //DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields() + //return select_lex->join->fields_list.elements; + return select_lex->item_list.elements; +} + + +uint subselect_union_engine::cols() const +{ + DBUG_ASSERT(unit->is_prepared()); // should be called after fix_fields() + return unit->types.elements; +} + + +uint8 subselect_single_select_engine::uncacheable() +{ + return select_lex->uncacheable; +} + + +uint8 subselect_union_engine::uncacheable() +{ + return unit->uncacheable; +} + + +void subselect_single_select_engine::exclude() +{ + select_lex->master_unit()->exclude_level(); +} + +void subselect_union_engine::exclude() +{ + unit->exclude_level(); +} + + +void subselect_uniquesubquery_engine::exclude() +{ + //this never should be called + DBUG_ASSERT(0); +} + + +table_map subselect_engine::calc_const_tables(List &list) +{ + table_map map= 0; + List_iterator ti(list); + TABLE_LIST *table; + //for (; table; table= table->next_leaf) + while ((table= ti++)) + { + TABLE *tbl= table->table; + if (tbl && tbl->const_table) + map|= tbl->map; + } + return map; +} + + +table_map subselect_single_select_engine::upper_select_const_tables() +{ + return calc_const_tables(select_lex->outer_select()->leaf_tables); +} + + +table_map subselect_union_engine::upper_select_const_tables() +{ + return calc_const_tables(unit->outer_select()->leaf_tables); +} + + +void subselect_single_select_engine::print(String *str, + enum_query_type query_type) +{ + With_clause* with_clause= select_lex->get_with_clause(); + THD *thd= get_thd(); + if (with_clause) + with_clause->print(thd, str, query_type); + select_lex->print(thd, str, query_type); +} + + +void subselect_union_engine::print(String *str, enum_query_type query_type) +{ + unit->print(str, query_type); +} + + +void subselect_uniquesubquery_engine::print(String *str, + enum_query_type query_type) +{ + TABLE *table= tab->tab_list ? tab->tab_list->table : tab->table; + str->append(STRING_WITH_LEN("(")); + tab->ref.items[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" in ")); + if (table->s->table_category == TABLE_CATEGORY_TEMPORARY) + { + /* + Temporary tables' names change across runs, so they can't be used for + EXPLAIN EXTENDED. + */ + str->append(STRING_WITH_LEN("")); + } + else + str->append(&table->s->table_name); + KEY *key_info= table->key_info+ tab->ref.key; + str->append(STRING_WITH_LEN(" on ")); + str->append(&key_info->name); + if (cond) + { + str->append(STRING_WITH_LEN(" where ")); + cond->print(str, query_type); + } + str->append(')'); +} + +/* +TODO: +The above ::print method should be changed as below. Do it after +all other tests pass. + +void subselect_uniquesubquery_engine::print(String *str) +{ + TABLE *table= tab->tab_list ? tab->tab_list->table : tab->table; + KEY *key_info= table->key_info + tab->ref.key; + str->append(STRING_WITH_LEN("(")); + for (uint i= 0; i < key_info->user_defined_key_parts; i++) + tab->ref.items[i]->print(str); + str->append(STRING_WITH_LEN(" in ")); + str->append(&table->s->table_name); + str->append(STRING_WITH_LEN(" on ")); + str->append(&key_info->name); + if (cond) + { + str->append(STRING_WITH_LEN(" where ")); + cond->print(str); + } + str->append(')'); +} +*/ + +void subselect_indexsubquery_engine::print(String *str, + enum_query_type query_type) +{ + TABLE *table= tab->tab_list ? tab->tab_list->table : tab->table; + str->append(STRING_WITH_LEN("(")); + tab->ref.items[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" in ")); + str->append(&table->s->table_name); + KEY *key_info= table->key_info+ tab->ref.key; + str->append(STRING_WITH_LEN(" on ")); + str->append(&key_info->name); + if (check_null) + str->append(STRING_WITH_LEN(" checking NULL")); + if (cond) + { + str->append(STRING_WITH_LEN(" where ")); + cond->print(str, query_type); + } + if (having) + { + str->append(STRING_WITH_LEN(" having ")); + having->print(str, query_type); + } + str->append(')'); +} + +/** + change select_result object of engine. + + @param si new subselect Item + @param res new select_result object + @param temp temporary assignment + + @retval + FALSE OK + @retval + TRUE error +*/ + +bool +subselect_single_select_engine::change_result(Item_subselect *si, + select_result_interceptor *res, + bool temp) +{ + DBUG_ENTER("subselect_single_select_engine::change_result"); + item= si; + if (temp) + { + /* + Here we reuse change_item_tree to roll back assignment. It has + nothing special about Item* pointer so it is safe conversion. We do + not change the interface to be compatible with MySQL. + */ + thd->change_item_tree((Item**) &result, (Item*)res); + } + else + result= res; + + /* + We can't use 'result' below as gcc 4.2.4's alias optimization + assumes that result was not changed by thd->change_item_tree(). + I tried to find a solution to make gcc happy, but could not find anything + that would not require a lot of extra code that would be harder to manage + than the current code. + */ + DBUG_RETURN(select_lex->join->change_result(res, NULL)); +} + + +/** + change select_result object of engine. + + @param si new subselect Item + @param res new select_result object + + @retval + FALSE OK + @retval + TRUE error +*/ + +bool subselect_union_engine::change_result(Item_subselect *si, + select_result_interceptor *res, + bool temp) +{ + item= si; + int rc= unit->change_result(res, result); + if (temp) + thd->change_item_tree((Item**) &result, (Item*)res); + else + result= res; + return rc; +} + + +/** + change select_result emulation, never should be called. + + @param si new subselect Item + @param res new select_result object + + @retval + FALSE OK + @retval + TRUE error +*/ + +bool +subselect_uniquesubquery_engine::change_result(Item_subselect *si, + select_result_interceptor *res, + bool temp + __attribute__((unused))) +{ + DBUG_ASSERT(0); + return TRUE; +} + + +/** + Report about presence of tables in subquery. + + @retval + TRUE there are not tables used in subquery + @retval + FALSE there are some tables in subquery +*/ +bool subselect_single_select_engine::no_tables() const +{ + return(select_lex->table_list.elements == 0); +} + + +/* + Check statically whether the subquery can return NULL + + SINOPSYS + subselect_single_select_engine::may_be_null() + + RETURN + FALSE can guarantee that the subquery never return NULL + TRUE otherwise +*/ +bool subselect_single_select_engine::may_be_null() +{ + return ((no_tables() && !join->conds && !join->having) ? maybe_null : 1); +} + + +/** + Report about presence of tables in subquery. + + @retval + TRUE there are not tables used in subquery + @retval + FALSE there are some tables in subquery +*/ +bool subselect_union_engine::no_tables() const +{ + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + { + if (sl->table_list.elements) + return FALSE; + } + return TRUE; +} + + +/** + Report about presence of tables in subquery. + + @retval + TRUE there are not tables used in subquery + @retval + FALSE there are some tables in subquery +*/ + +bool subselect_uniquesubquery_engine::no_tables() const +{ + /* returning value is correct, but this method should never be called */ + DBUG_ASSERT(FALSE); + return 0; +} + + +/****************************************************************************** + WL#1110 - Implementation of class subselect_hash_sj_engine +******************************************************************************/ + + +/** + Check if an IN predicate should be executed via partial matching using + only schema information. + + @details + This test essentially has three results: + - partial matching is applicable, but cannot be executed due to a + limitation in the total number of indexes, as a result we can't + use subquery materialization at all. + - partial matching is either applicable or not, and this can be + determined by looking at 'this->max_keys'. + If max_keys > 1, then we need partial matching because there are + more indexes than just the one we use during materialization to + remove duplicates. + + @note + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used +*/ + +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_schema() +{ + Item_in_subselect *item_in= item->get_IN_subquery(); + + if (item_in->is_top_level_item()) + return COMPLETE_MATCH; + else + { + List_iterator inner_col_it(*item_in->unit->get_column_types(false)); + Item *outer_col, *inner_col; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + outer_col= item_in->left_expr->element_index(i); + inner_col= inner_col_it++; + + if (!inner_col->maybe_null() && !outer_col->maybe_null()) + bitmap_set_bit(&non_null_key_parts, i); + else + { + bitmap_set_bit(&partial_match_key_parts, i); + ++count_partial_match_columns; + } + } + } + + /* If no column contains NULLs use regular hash index lookups. */ + if (count_partial_match_columns) + return PARTIAL_MATCH; + return COMPLETE_MATCH; +} + + +/** + Test whether an IN predicate must be computed via partial matching + based on the NULL statistics for each column of a materialized subquery. + + @details The procedure analyzes column NULL statistics, updates the + matching type of columns that cannot be NULL or that contain only NULLs. + Based on this, the procedure determines the final execution strategy for + the [NOT] IN predicate. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used +*/ + +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_data() +{ + Item_in_subselect *item_in= item->get_IN_subquery(); + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + Item *outer_col; + + /* + If we already determined that a complete match is enough based on schema + information, nothing can be better. + */ + if (strategy == COMPLETE_MATCH) + return COMPLETE_MATCH; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + if (!bitmap_is_set(&partial_match_key_parts, i)) + continue; + outer_col= item_in->left_expr->element_index(i); + /* + If column 'i' doesn't contain NULLs, and the corresponding outer reference + cannot have a NULL value, then 'i' is a non-nullable column. + */ + if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null()) + { + bitmap_clear_bit(&partial_match_key_parts, i); + bitmap_set_bit(&non_null_key_parts, i); + --count_partial_match_columns; + } + if (result_sink->get_null_count_of_col(i) == tmp_table->file->stats.records) + ++count_null_only_columns; + if (result_sink->get_null_count_of_col(i)) + ++count_columns_with_nulls; + } + + /* If no column contains NULLs use regular hash index lookups. */ + if (!count_partial_match_columns) + return COMPLETE_MATCH; + return PARTIAL_MATCH; +} + + +void +subselect_hash_sj_engine::choose_partial_match_strategy( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts_arg) +{ + ulonglong pm_buff_size; + + DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + Choose according to global optimizer switch. If only one of the switches is + 'ON', then the remaining strategy is the only possible one. The only cases + when this will be overridden is when the total size of all buffers for the + merge strategy is bigger than the 'rowid_merge_buff_size' system variable, + or if there isn't enough physical memory to allocate the buffers. + */ + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_SCAN; + else if + ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_MERGE; + + /* + If both switches are ON, or both are OFF, we interpret that as "let the + optimizer decide". Perform a cost based choice between the two partial + matching strategies. + */ + /* + TIMOUR: the above interpretation of the switch values could be changed to: + - if both are ON - let the optimizer decide, + - if both are OFF - do not use partial matching, therefore do not use + materialization in non-top-level predicates. + The problem with this is that we know for sure if we need partial matching + only after the subquery is materialized, and this is too late to revert to + the IN=>EXISTS strategy. + */ + if (strategy == PARTIAL_MATCH) + { + /* + TIMOUR: Currently we use a super simplistic measure. This will be + addressed in a separate task. + */ + if (tmp_table->file->stats.records < 100) + strategy= PARTIAL_MATCH_SCAN; + else + strategy= PARTIAL_MATCH_MERGE; + } + + /* Check if there is enough memory for the rowid merge strategy. */ + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_buff_size= rowid_merge_buff_size(has_non_null_key, + has_covering_null_row, + partial_match_key_parts_arg); + if (pm_buff_size > thd->variables.rowid_merge_buff_size) + strategy= PARTIAL_MATCH_SCAN; + } +} + + +/* + Compute the memory size of all buffers proportional to the number of rows + in tmp_table. + + @details + If the result is bigger than thd->variables.rowid_merge_buff_size, partial + matching via merging is not applicable. +*/ + +ulonglong subselect_hash_sj_engine::rowid_merge_buff_size( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + /* Total size of all buffers used by partial matching. */ + ulonglong buff_size; + ha_rows row_count= tmp_table->file->stats.records; + uint rowid_length= tmp_table->file->ref_length; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + ha_rows max_null_row; + + /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */ + buff_size= row_count * rowid_length * sizeof(uchar); + + if (has_non_null_key) + { + /* Add the size of Ordered_key::key_buff of the only non-NULL key. */ + buff_size+= row_count * sizeof(rownum_t); + } + + if (!has_covering_null_row) + { + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i) || + result_sink->get_null_count_of_col(i) == row_count) + continue; /* In these cases we wouldn't construct Ordered keys. */ + + /* Add the size of Ordered_key::key_buff */ + buff_size+= (row_count - result_sink->get_null_count_of_col(i)) * + sizeof(rownum_t); + /* Add the size of Ordered_key::null_key */ + max_null_row= result_sink->get_max_null_of_col(i); + if (max_null_row >= UINT_MAX) + { + /* + There can be at most UINT_MAX bits in a MY_BITMAP that is used to + store NULLs in an Ordered_key. Return a number of bytes bigger than + the maximum allowed memory buffer for partial matching to disable + the rowid merge strategy. + */ + return ULONGLONG_MAX; + } + buff_size+= bitmap_buffer_size(max_null_row); + } + } + + return buff_size; +} + + +/* + Initialize a MY_BITMAP with a buffer allocated on the current + memory root. + TIMOUR: move to bitmap C file? +*/ + +static my_bool +my_bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) +{ + my_bitmap_map *bitmap_buf; + + if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root, + bitmap_buffer_size(n_bits))) || + my_bitmap_init(map, bitmap_buf, n_bits)) + return TRUE; + bitmap_clear_all(map); + return FALSE; +} + + +/** + Create all structures needed for IN execution that can live between PS + reexecution. + + @param tmp_columns the items that produce the data for the temp table + @param subquery_id subquery's identifier (to make "" name for + EXPLAIN) + + @details + - Create a temporary table to store the result of the IN subquery. The + temporary table has one hash index on all its columns. + - Create a new result sink that sends the result stream of the subquery to + the temporary table, + + @notice: + Currently Item_subselect::init() already chooses and creates at parse + time an engine with a corresponding JOIN to execute the subquery. + + @retval TRUE if error + @retval FALSE otherwise +*/ + +bool subselect_hash_sj_engine::init(List *tmp_columns, uint subquery_id) +{ + THD *thd= get_thd(); + select_unit *result_sink; + /* Options to create_tmp_table. */ + ulonglong tmp_create_options= thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS; + /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */ + + DBUG_ENTER("subselect_hash_sj_engine::init"); + + if (my_bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements, + thd->mem_root) || + my_bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements, + thd->mem_root)) + DBUG_RETURN(TRUE); + + /* + Create and initialize a select result interceptor that stores the + result stream in a temporary table. The temporary table itself is + managed (created/filled/etc) internally by the interceptor. + */ +/* + TIMOUR: + Select a more efficient result sink when we know there is no need to collect + data statistics. + + if (strategy == COMPLETE_MATCH) + { + if (!(result= new select_union)) + DBUG_RETURN(TRUE); + } + else if (strategy == PARTIAL_MATCH) + { + if (!(result= new select_materialize_with_stats)) + DBUG_RETURN(TRUE); + } +*/ + if (!(result_sink= new (thd->mem_root) select_materialize_with_stats(thd))) + DBUG_RETURN(TRUE); + + char buf[32]; + LEX_CSTRING name; + name.length= my_snprintf(buf, sizeof(buf), "", subquery_id); + if (!(name.str= (char*) thd->memdup(buf, name.length + 1))) + DBUG_RETURN(TRUE); + + result_sink->get_tmp_table_param()->materialized_subquery= true; + + if (item->substype() == Item_subselect::IN_SUBS && + (item->get_IN_subquery()->is_jtbm_merged)) + { + result_sink->get_tmp_table_param()->force_not_null_cols= true; + } + if (result_sink->create_result_table(thd, tmp_columns, TRUE, + tmp_create_options, + &name, TRUE, TRUE, FALSE, 0)) + DBUG_RETURN(TRUE); + + tmp_table= result_sink->table; + result= result_sink; + + /* + If the subquery has blobs, or the total key length is bigger than + some length, or the total number of key parts is more than the + allowed maximum (currently MAX_REF_PARTS == 32), then the created + index cannot be used for lookups and we can't use hash semi + join. If this is the case, delete the temporary table since it + will not be used, and tell the caller we failed to initialize the + engine. + */ + if (tmp_table->s->keys == 0) + { + //fprintf(stderr, "Q: %s\n", current_thd->query()); + DBUG_ASSERT(0); + DBUG_ASSERT( + tmp_table->s->uniques || + tmp_table->key_info->key_length >= tmp_table->file->max_key_length() || + tmp_table->key_info->user_defined_key_parts > + tmp_table->file->max_key_parts()); + free_tmp_table(thd, tmp_table); + tmp_table= NULL; + delete result; + result= NULL; + DBUG_RETURN(TRUE); + } + + /* + Make sure there is only one index on the temp table, and it doesn't have + the extra key part created when s->uniques > 0. + + NOTE: item have to be Item_in_subselect, because class constructor + accept Item_in_subselect as the parmeter. + */ + DBUG_ASSERT(tmp_table->s->keys == 1 && + item->get_IN_subquery()->left_expr->cols() == + tmp_table->key_info->user_defined_key_parts); + + if (make_semi_join_conds() || + /* A unique_engine is used both for complete and partial matching. */ + !(lookup_engine= make_unique_engine())) + DBUG_RETURN(TRUE); + + /* + Repeat name resolution for 'cond' since cond is not part of any + clause of the query, and it is not 'fixed' during JOIN::prepare. + */ + if (semi_join_conds && + semi_join_conds->fix_fields_if_needed(thd, (Item**)&semi_join_conds)) + DBUG_RETURN(TRUE); + /* Let our engine reuse this query plan for materialization. */ + materialize_join= materialize_engine->join; + materialize_join->change_result(result, NULL); + + DBUG_RETURN(FALSE); +} + + +/* + Create an artificial condition to post-filter those rows matched by index + lookups that cannot be distinguished by the index lookup procedure. + + @notes + The need for post-filtering may occur e.g. because of + truncation. Prepared statements execution requires that fix_fields is + called for every execution. In order to call fix_fields we need to + create a Name_resolution_context and a corresponding TABLE_LIST for + the temporary table for the subquery, so that all column references + to the materialized subquery table can be resolved correctly. + + @returns + @retval TRUE memory allocation error occurred + @retval FALSE the conditions were created and resolved (fixed) +*/ + +bool subselect_hash_sj_engine::make_semi_join_conds() +{ + /* + Table reference for tmp_table that is used to resolve column references + (Item_fields) to columns in tmp_table. + */ + TABLE_LIST *tmp_table_ref; + /* Name resolution context for all tmp_table columns created below. */ + Name_resolution_context *context; + Item_in_subselect *item_in= item->get_IN_subquery(); + LEX_CSTRING table_name; + DBUG_ENTER("subselect_hash_sj_engine::make_semi_join_conds"); + DBUG_ASSERT(semi_join_conds == NULL); + + if (!(semi_join_conds= new (thd->mem_root) Item_cond_and(thd))) + DBUG_RETURN(TRUE); + + if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST)))) + DBUG_RETURN(TRUE); + + table_name.str= tmp_table->alias.c_ptr(); + table_name.length= tmp_table->alias.length(), + tmp_table_ref->init_one_table(&empty_clex_str, &table_name, NULL, TL_READ); + tmp_table_ref->table= tmp_table; + + context= new Name_resolution_context(tmp_table_ref); + semi_join_conds_context= context; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + /* New equi-join condition for the current column. */ + Item_func_eq *eq_cond; + /* Item for the corresponding field from the materialized temp table. */ + Item_field *right_col_item= new (thd->mem_root) + Item_field(thd, context, tmp_table->field[i]); + if (right_col_item) + right_col_item->set_refers_to_temp_table(); + + if (!right_col_item || + !(eq_cond= new (thd->mem_root) + Item_func_eq(thd, item_in->left_expr->element_index(i), + right_col_item)) || + (((Item_cond_and*)semi_join_conds)->add(eq_cond, thd->mem_root))) + { + delete semi_join_conds; + semi_join_conds= NULL; + DBUG_RETURN(TRUE); + } + } + if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +/** + Create a new uniquesubquery engine for the execution of an IN predicate. + + @details + Create and initialize a new JOIN_TAB, and Table_ref objects to perform + lookups into the indexed temporary table. + + @retval A new subselect_hash_sj_engine object + @retval NULL if a memory allocation error occurs +*/ + +subselect_uniquesubquery_engine* +subselect_hash_sj_engine::make_unique_engine() +{ + Item_in_subselect *item_in= item->get_IN_subquery(); + Item_iterator_row it(item_in->left_expr); + /* The only index on the temporary table. */ + KEY *tmp_key= tmp_table->key_info; + JOIN_TAB *tab; + + DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine"); + + /* + Create and initialize the JOIN_TAB that represents an index lookup + plan operator into the materialized subquery result. Notice that: + - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and + - here we initialize only those members that are used by + subselect_uniquesubquery_engine, so these objects are incomplete. + */ + if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) + DBUG_RETURN(NULL); + + tab->table= tmp_table; + tab->tab_list= 0; + tab->preread_init_done= FALSE; + tab->ref.tmp_table_index_lookup_init(thd, tmp_key, it, FALSE); + + DBUG_RETURN(new (thd->mem_root) + subselect_uniquesubquery_engine(thd, tab, item_in, + semi_join_conds)); +} + + +subselect_hash_sj_engine::~subselect_hash_sj_engine() +{ + delete lookup_engine; + delete result; + if (tmp_table) + free_tmp_table(thd, tmp_table); +} + + +int subselect_hash_sj_engine::prepare(THD *thd_arg) +{ + /* + Create and optimize the JOIN that will be used to materialize + the subquery if not yet created. + */ + set_thd(thd_arg); + return materialize_engine->prepare(thd); +} + + +/** + Cleanup performed after each PS execution. + + @details + Called in the end of JOIN::prepare for PS from Item_subselect::cleanup. +*/ + +void subselect_hash_sj_engine::cleanup() +{ + enum_engine_type lookup_engine_type= lookup_engine->engine_type(); + is_materialized= FALSE; + bitmap_clear_all(&non_null_key_parts); + bitmap_clear_all(&partial_match_key_parts); + count_partial_match_columns= 0; + count_null_only_columns= 0; + strategy= UNDEFINED; + materialize_engine->cleanup(); + /* + Restore the original Item_in_subselect engine. This engine is created once + at parse time and stored across executions, while all other materialization + related engines are created and chosen for each execution. + */ + item->get_IN_subquery()->engine= materialize_engine; + if (lookup_engine_type == TABLE_SCAN_ENGINE || + lookup_engine_type == ROWID_MERGE_ENGINE) + { + subselect_engine *inner_lookup_engine; + inner_lookup_engine= + ((subselect_partial_match_engine*) lookup_engine)->lookup_engine; + /* + Partial match engines are recreated for each PS execution inside + subselect_hash_sj_engine::exec(). + */ + delete lookup_engine; + lookup_engine= inner_lookup_engine; + } + DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE); + lookup_engine->cleanup(); + result->cleanup(); /* Resets the temp table as well. */ + DBUG_ASSERT(tmp_table); + free_tmp_table(thd, tmp_table); + tmp_table= NULL; +} + + +/* + Get fanout produced by tables specified in the table_map +*/ + +double get_fanout_with_deps(JOIN *join, table_map tset) +{ + /* Handle the case of "Impossible WHERE" */ + if (join->table_count == 0) + return 0.0; + + /* First, recursively get all tables we depend on */ + table_map deps_to_check= tset; + table_map checked_deps= 0; + table_map further_deps; + do + { + further_deps= 0; + Table_map_iterator tm_it(deps_to_check); + int tableno; + while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) + { + /* get tableno's dependency tables that are not in needed_set */ + further_deps |= join->map2table[tableno]->ref.depend_map & ~checked_deps; + } + + checked_deps |= deps_to_check; + deps_to_check= further_deps; + } while (further_deps != 0); + + + /* Now, walk the join order and calculate the fanout */ + double fanout= 1; + for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab; + tab= next_top_level_tab(join, tab)) + { + /* + Ignore SJM nests. They have tab->table==NULL. There is no point to walk + inside them, because GROUP BY clause cannot refer to tables from within + subquery. + */ + if (!tab->is_sjm_nest() && (tab->table->map & checked_deps) && + !tab->emb_sj_nest && + tab->records_read != 0) + { + fanout *= tab->records_read; + } + } + return fanout; +} + + +#if 0 +void check_out_index_stats(JOIN *join) +{ + ORDER *order; + uint n_order_items; + + /* + First, collect the keys that we can use in each table. + We can use a key if + - all tables refer to it. + */ + key_map key_start_use[MAX_TABLES]; + key_map key_infix_use[MAX_TABLES]; + table_map key_used=0; + table_map non_key_used= 0; + + bzero(&key_start_use, sizeof(key_start_use)); //psergey-todo: safe initialization! + bzero(&key_infix_use, sizeof(key_infix_use)); + + for (order= join->group_list; order; order= order->next) + { + Item *item= order->item[0]; + + if (item->real_type() == Item::FIELD_ITEM) + { + if (item->used_tables() & OUTER_REF_TABLE_BIT) + continue; /* outside references are like constants for us */ + + Field *field= ((Item_field*)item->real_item())->field; + uint table_no= field->table->tablenr; + if (!(non_key_used && table_map(1) << table_no) && + !field->part_of_key.is_clear_all()) + { + key_map infix_map= field->part_of_key; + infix_map.subtract(field->key_start); + key_start_use[table_no].merge(field->key_start); + key_infix_use[table_no].merge(infix_map); + key_used |= table_no; + } + continue; + } + /* + Note: the below will cause clauses like GROUP BY YEAR(date) not to be + handled. + */ + non_key_used |= item->used_tables(); + } + + Table_map_iterator tm_it(key_used & ~non_key_used); + int tableno; + while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) + { + key_map::iterator key_it(key_start_use); + int keyno; + while ((keyno = tm_it.next_bit()) != key_map::iterator::BITMAP_END) + { + for (order= join->group_list; order; order= order->next) + { + Item *item= order->item[0]; + if (item->used_tables() & (table_map(1) << tableno)) + { + DBUG_ASSERT(item->real_type() == Item::FIELD_ITEM); + } + } + /* + if (continuation) + { + walk through list and find which key parts are occupied; + // note that the above can't be made any faster. + } + else + use rec_per_key[0]; + + find out the cardinality. + check if cardinality decreases if we use it; + */ + } + } +} +#endif + + +/* + Get an estimate of how many records will be produced after the GROUP BY + operation. + + @param join Join we're operating on + @param join_op_rows How many records will be produced by the join + operations (this is what join optimizer produces) + + @seealso + See also optimize_semijoin_nests(), grep for "Adjust output cardinality + estimates". Very similar code there that is not joined with this one + because we operate on different data structs and too much effort is + needed to abstract them out. + + @return + Number of records we expect to get after the GROUP BY operation +*/ + +double get_post_group_estimate(JOIN* join, double join_op_rows) +{ + table_map tables_in_group_list= table_map(0); + + /* Find out which tables are used in GROUP BY list */ + for (ORDER *order= join->group_list_for_estimates; order; order= order->next) + { + Item *item= order->item[0]; + table_map item_used_tables= item->used_tables(); + if (item_used_tables & RAND_TABLE_BIT) + { + /* Each join output record will be in its own group */ + return join_op_rows; + } + tables_in_group_list|= item_used_tables; + } + tables_in_group_list &= ~PSEUDO_TABLE_BITS; + + /* + Use join fanouts to calculate the max. number of records in the group-list + */ + double fanout_rows[MAX_KEY]; + bzero(&fanout_rows, sizeof(fanout_rows)); + double out_rows; + + out_rows= get_fanout_with_deps(join, tables_in_group_list); + +#if 0 + /* The following will be needed when making use of index stats: */ + /* + Also generate max. number of records for each of the tables mentioned + in the group-list. We'll use that a baseline number that we'll try to + reduce by using + - #table-records + - index statistics. + */ + Table_map_iterator tm_it(tables_in_group_list); + int tableno; + while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) + { + fanout_rows[tableno]= get_fanout_with_deps(join, table_map(1) << tableno); + } + + /* + Try to bring down estimates using index statistics. + */ + //check_out_index_stats(join); +#endif + + return out_rows; +} + + +/** + Execute a subquery IN predicate via materialization. + + @details + If needed materialize the subquery into a temporary table, then + copmpute the predicate via a lookup into this table. + + @retval TRUE if error + @retval FALSE otherwise +*/ + +int subselect_hash_sj_engine::exec() +{ + Item_in_subselect *item_in= item->get_IN_subquery(); + SELECT_LEX *save_select= thd->lex->current_select; + subselect_partial_match_engine *pm_engine= NULL; + int res= 0; + DBUG_ENTER("subselect_hash_sj_engine::exec"); + + /* + Optimize and materialize the subquery during the first execution of + the subquery predicate. + */ + thd->lex->current_select= materialize_engine->select_lex; + /* The subquery should be optimized, and materialized only once. */ + DBUG_ASSERT(materialize_join->optimization_state == JOIN::OPTIMIZATION_DONE && + !is_materialized); + materialize_join->exec(); + if (unlikely((res= MY_TEST(materialize_join->error || thd->is_fatal_error || + thd->is_error())))) + goto err; + + /* + TODO: + - Unlock all subquery tables as we don't need them. To implement this + we need to add new functionality to JOIN::join_free that can unlock + all tables in a subquery (and all its subqueries). + - The temp table used for grouping in the subquery can be freed + immediately after materialization (yet it's done together with + unlocking). + */ + is_materialized= TRUE; + /* + If the subquery returned no rows, the temporary table is empty, so we know + directly that the result of IN is FALSE. We first update the table + statistics, then we test if the temporary table for the query result is + empty. + */ + tmp_table->file->info(HA_STATUS_VARIABLE); + if (!tmp_table->file->stats.records) + { + /* The value of IN will not change during this execution. */ + item_in->reset(); + item_in->make_const(); + item_in->set_first_execution(); + thd->lex->current_select= save_select; + DBUG_RETURN(FALSE); + } + + /* + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + */ + strategy= get_strategy_using_schema(); + /* This call may discover that we don't need partial matching at all. */ + strategy= get_strategy_using_data(); + if (strategy == PARTIAL_MATCH) + { + uint count_pm_keys; /* Total number of keys needed for partial matching. */ + MY_BITMAP *nn_key_parts= NULL; /* Key parts of the only non-NULL index. */ + uint count_non_null_columns= 0; /* Number of columns in nn_key_parts. */ + bool has_covering_null_row; + bool has_covering_null_columns; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + uint field_count= tmp_table->s->fields; + + if (count_partial_match_columns < field_count) + { + nn_key_parts= &non_null_key_parts; + count_non_null_columns= bitmap_bits_set(nn_key_parts); + } + has_covering_null_row= (result_sink->get_max_nulls_in_row() == field_count); + has_covering_null_columns= (count_non_null_columns + + count_null_only_columns == field_count); + + if (has_covering_null_row && has_covering_null_columns) + { + /* + The whole table consist of only NULL values. The result of IN is + a constant UNKNOWN. + */ + DBUG_ASSERT(tmp_table->file->stats.records == 1); + item_in->value= 0; + item_in->null_value= 1; + item_in->make_const(); + item_in->set_first_execution(); + thd->lex->current_select= save_select; + DBUG_RETURN(FALSE); + } + + if (has_covering_null_row) + { + DBUG_ASSERT(count_partial_match_columns == field_count); + count_pm_keys= 0; + } + else if (has_covering_null_columns) + count_pm_keys= 1; + else + count_pm_keys= count_partial_match_columns - count_null_only_columns + + (nn_key_parts ? 1 : 0); + + choose_partial_match_strategy(MY_TEST(nn_key_parts), + has_covering_null_row, + &partial_match_key_parts); + DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE || + strategy == PARTIAL_MATCH_SCAN); + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_engine= + (new (thd->mem_root) + subselect_rowid_merge_engine(thd, + (subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + count_pm_keys, + has_covering_null_row, + has_covering_null_columns, + count_columns_with_nulls, + item, result, + semi_join_conds->argument_list())); + if (!pm_engine || + pm_engine->prepare(thd) || + ((subselect_rowid_merge_engine*) pm_engine)-> + init(nn_key_parts, &partial_match_key_parts)) + { + /* + The call to init() would fail if there was not enough memory + to allocate all buffers for the rowid merge strategy. In + this case revert to table scanning which doesn't need any + big buffers. + */ + delete pm_engine; + pm_engine= NULL; + strategy= PARTIAL_MATCH_SCAN; + } + } + + if (strategy == PARTIAL_MATCH_SCAN) + { + if (!(pm_engine= + (new (thd->mem_root) + subselect_table_scan_engine(thd, + (subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + item, result, + semi_join_conds->argument_list(), + has_covering_null_row, + has_covering_null_columns, + count_columns_with_nulls))) || + pm_engine->prepare(thd)) + { + /* This is an irrecoverable error. */ + res= 1; + goto err; + } + } + } + + if (pm_engine) + lookup_engine= pm_engine; + item_in->change_engine(lookup_engine); + +err: + thd->lex->current_select= save_select; + DBUG_RETURN(res); +} + + +/** + Print the state of this engine into a string for debugging and views. +*/ + +void subselect_hash_sj_engine::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN(" (")); + materialize_engine->print(str, query_type); + str->append(STRING_WITH_LEN(" ), ")); + + if (lookup_engine) + lookup_engine->print(str, query_type); + else + str->append(STRING_WITH_LEN( + "" + )); +} + +bool subselect_hash_sj_engine::fix_length_and_dec(Item_cache** row) +{ + DBUG_ASSERT(FALSE); + return FALSE; +} + +void subselect_hash_sj_engine::exclude() +{ + DBUG_ASSERT(FALSE); +} + +bool subselect_hash_sj_engine::no_tables() const +{ + DBUG_ASSERT(FALSE); + return FALSE; +} + +bool subselect_hash_sj_engine::change_result(Item_subselect *si, + select_result_interceptor *res, + bool temp __attribute__((unused))) +{ + DBUG_ASSERT(FALSE); + return TRUE; +} + + +Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, + ha_rows null_count_arg, ha_rows min_null_row_arg, + ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg) + : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg), + row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg) +{ + DBUG_ASSERT(tbl->file->stats.records > null_count); + key_buff_elements= tbl->file->stats.records - null_count; + cur_key_idx= HA_POS_ERROR; + + DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) || + (!null_count && !min_null_row_arg && !max_null_row_arg)); + if (null_count) + { + /* The counters are 1-based, for key access we need 0-based indexes. */ + min_null_row= min_null_row_arg - 1; + max_null_row= max_null_row_arg - 1; + } + else + min_null_row= max_null_row= 0; +} + + +Ordered_key::~Ordered_key() +{ + my_free(key_buff); + my_bitmap_free(&null_key); +} + + +/* + Cleanup that needs to be done for each PS (re)execution. +*/ + +void Ordered_key::cleanup() +{ + /* + Currently these keys are recreated for each PS re-execution, thus + there is nothing to cleanup, the whole object goes away after execution + is over. All handler related initialization/deinitialization is done by + the parent subselect_rowid_merge_engine object. + */ +} + + +/* + Initialize a multi-column index. +*/ + +bool Ordered_key::init(MY_BITMAP *columns_to_index) +{ + THD *thd= tbl->in_use; + uint cur_key_col= 0; + Item_field *cur_tmp_field; + Item_func_lt *fn_less_than; + + key_column_count= bitmap_bits_set(columns_to_index); + key_columns= (Item_field**) thd->alloc(key_column_count * + sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(key_column_count * + sizeof(Item_func_lt*)); + + if (!key_columns || !compare_pred) + return TRUE; /* Revert to table scan partial match. */ + + for (uint i= 0; i < columns_to_index->n_bits; i++) + { + if (!bitmap_is_set(columns_to_index, i)) + continue; + cur_tmp_field= new (thd->mem_root) Item_field(thd, tbl->field[i]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + fn_less_than= new (thd->mem_root) Item_func_lt(thd, cur_tmp_field, + search_key->element_index(i)); + fn_less_than->fix_fields(thd, (Item**) &fn_less_than); + key_columns[cur_key_col]= cur_tmp_field; + compare_pred[cur_key_col]= fn_less_than; + ++cur_key_col; + } + + if (alloc_keys_buffers()) + { + /* TIMOUR revert to partial match via table scan. */ + return TRUE; + } + return FALSE; +} + + +/* + Initialize a single-column index. +*/ + +bool Ordered_key::init(int col_idx) +{ + THD *thd= tbl->in_use; + + key_column_count= 1; + + // TIMOUR: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*)); + + key_columns[0]= new (thd->mem_root) Item_field(thd, tbl->field[col_idx]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + compare_pred[0]= new (thd->mem_root) Item_func_lt(thd, key_columns[0], + search_key->element_index(col_idx)); + compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]); + + if (alloc_keys_buffers()) + { + /* TIMOUR revert to partial match via table scan. */ + return TRUE; + } + return FALSE; +} + + +/* + Allocate the buffers for both the row number, and the NULL-bitmap indexes. +*/ + +bool Ordered_key::alloc_keys_buffers() +{ + DBUG_ASSERT(key_buff_elements > 0); + + if (!(key_buff= (rownum_t*) my_malloc(PSI_INSTRUMENT_ME, + static_cast(key_buff_elements * sizeof(rownum_t)), + MYF(MY_WME | MY_THREAD_SPECIFIC)))) + return TRUE; + + /* + TIMOUR: it is enough to create bitmaps with size + (max_null_row - min_null_row), and then use min_null_row as + lookup offset. + */ + /* Notice that max_null_row is max array index, we need count, so +1. */ + if (my_bitmap_init(&null_key, NULL, (uint)(max_null_row + 1))) + return TRUE; + + cur_key_idx= HA_POS_ERROR; + + return FALSE; +} + + +/* + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. + + @retval -1 + @retval 0 + @retval +1 +*/ + +int +Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b) +{ + uchar *rowid_a, *rowid_b; + int error; + int cmp_res; + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + + if (a == b) + return 0; + /* Get the corresponding rowids. */ + rowid_a= row_num_to_rowid + a * rowid_length; + rowid_b= row_num_to_rowid + b * rowid_length; + /* Fetch the rows for comparison. */ + if (unlikely((error= tbl->file->ha_rnd_pos(tbl->record[0], rowid_a)))) + { + /* purecov: begin inspected */ + tbl->file->print_error(error, MYF(ME_FATAL)); // Sets fatal_error + return 0; + /* purecov: end */ + } + if (unlikely((error= tbl->file->ha_rnd_pos(tbl->record[1], rowid_b)))) + { + /* purecov: begin inspected */ + tbl->file->print_error(error, MYF(ME_FATAL)); // Sets fatal_error + return 0; + /* purecov: end */ + } + /* + Compare the two rows by the corresponding values of the indexed + columns. + */ + for (uint i= 0; i < key_column_count; i++) + { + Field *cur_field= key_columns[i]->field; + if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length))) + return (cmp_res > 0 ? 1 : -1); + } + return 0; +} + + +int +Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key, + rownum_t* a, rownum_t* b) +{ + /* The result of comparing the two keys according to their row data. */ + int cmp_row_res= key->cmp_keys_by_row_data(*a, *b); + if (cmp_row_res) + return cmp_row_res; + return (*a < *b) ? -1 : (*a > *b) ? 1 : 0; +} + + +bool Ordered_key::sort_keys() +{ + if (tbl->file->ha_rnd_init_with_error(0)) + return TRUE; + my_qsort2(key_buff, (size_t) key_buff_elements, sizeof(rownum_t), + (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this); + /* Invalidate the current row position. */ + cur_key_idx= HA_POS_ERROR; + tbl->file->ha_rnd_end(); + return FALSE; +} + + +/* + The fraction of rows that do not contain NULL in the columns indexed by + this key. + + @retval 1 if there are no NULLs + @retval 0 if only NULLs +*/ + +double Ordered_key::null_selectivity() +{ + /* We should not be processing empty tables. */ + DBUG_ASSERT(tbl->file->stats.records); + return (1 - (double) null_count / (double) tbl->file->stats.records); +} + + +/* + Compare the value(s) of the current key in 'search_key' with the + data of the current table record. + + @notes The comparison result follows from the way compare_pred + is created in Ordered_key::init. Currently compare_pred compares + a field in of the current row with the corresponding Item that + contains the search key. + + @param row_num Number of the row (not index in the key_buff array) + + @retval -1 if (current row < search_key) + @retval 0 if (current row == search_key) + @retval +1 if (current row > search_key) +*/ + +int Ordered_key::cmp_key_with_search_key(rownum_t row_num) +{ + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length; + int error; + int cmp_res; + + if (unlikely((error= tbl->file->ha_rnd_pos(tbl->record[0], cur_rowid)))) + { + /* purecov: begin inspected */ + tbl->file->print_error(error, MYF(ME_FATAL)); // Sets fatal_error + return 0; + /* purecov: end */ + } + + for (uint i= 0; i < key_column_count; i++) + { + cmp_res= compare_pred[i]->get_comparator()->compare(); + /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */ + DBUG_ASSERT(!compare_pred[i]->null_value); + if (cmp_res) + return (cmp_res > 0 ? 1 : -1); + } + return 0; +} + + +/* + Find a key in a sorted array of keys via binary search. + + see create_subq_in_equalities() +*/ + +bool Ordered_key::lookup() +{ + DBUG_ASSERT(key_buff_elements); + + ha_rows lo= 0; + ha_rows hi= key_buff_elements - 1; + ha_rows mid; + int cmp_res; + + while (lo <= hi) + { + mid= lo + (hi - lo) / 2; + cmp_res= cmp_key_with_search_key(key_buff[mid]); + /* + In order to find the minimum match, check if the pevious element is + equal or smaller than the found one. If equal, we need to search further + to the left. + */ + if (!cmp_res && mid > 0) + cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0; + + if (cmp_res == -1) + { + /* row[mid] < search_key */ + lo= mid + 1; + } + else if (cmp_res == 1) + { + /* row[mid] > search_key */ + if (!mid) + goto not_found; + hi= mid - 1; + } + else + { + /* row[mid] == search_key */ + cur_key_idx= mid; + return TRUE; + } + } +not_found: + cur_key_idx= HA_POS_ERROR; + return FALSE; +} + + +/* + Move the current index pointer to the next key with the same column + values as the current key. Since the index is sorted, all such keys + are contiguous. +*/ + +bool Ordered_key::next_same() +{ + DBUG_ASSERT(key_buff_elements); + + if (cur_key_idx < key_buff_elements - 1) + { + /* + TIMOUR: + The below is quite inefficient, since as a result we will fetch every + row (except the last one) twice. There must be a more efficient way, + e.g. swapping record[0] and record[1], and reading only the new record. + */ + if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1])) + { + ++cur_key_idx; + return TRUE; + } + } + return FALSE; +} + + +void Ordered_key::print(String *str) +{ + uint i; + + /* We have to pre-allocate string as we are using qs_append() */ + if (str->alloc(str->length() + + 5+10+4+ (NAME_LEN+2)*key_column_count+ + 20+11+21+10+FLOATING_POINT_BUFFER*3+50 + )) + return; + str->append(STRING_WITH_LEN("{idx=")); + str->qs_append(keyid); + str->append(STRING_WITH_LEN(", (")); + for (i= 0; i < key_column_count ; i++) + { + str->append(&key_columns[i]->field->field_name); + str->append(STRING_WITH_LEN(", ")); + } + if (key_column_count) + str->length(str->length() - 2); + str->append(STRING_WITH_LEN("), ")); + + str->append(STRING_WITH_LEN("null_bitmap: (bits=")); + str->qs_append(null_key.n_bits); + str->append(STRING_WITH_LEN(", nulls= ")); + str->qs_append((double)null_count); + str->append(STRING_WITH_LEN(", min_null= ")); + str->qs_append((double)min_null_row); + str->append(STRING_WITH_LEN(", max_null= ")); + str->qs_append((double)max_null_row); + str->append(STRING_WITH_LEN("), ")); + + str->append('}'); +} + + +subselect_partial_match_engine::subselect_partial_match_engine( + THD *thd_arg, + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List *equi_join_conds_arg, + bool has_covering_null_row_arg, + bool has_covering_null_columns_arg, + uint count_columns_with_nulls_arg) + :subselect_engine(item_arg, result_arg), + tmp_table(tmp_table_arg), lookup_engine(engine_arg), + equi_join_conds(equi_join_conds_arg), + has_covering_null_row(has_covering_null_row_arg), + has_covering_null_columns(has_covering_null_columns_arg), + count_columns_with_nulls(count_columns_with_nulls_arg) +{ + thd= thd_arg; +} + + +int subselect_partial_match_engine::exec() +{ + Item_in_subselect *item_in= item->get_IN_subquery(); + int lookup_res; + DBUG_ASSERT(thd); + + DBUG_ASSERT(!(item_in->left_expr_has_null() && + item_in->is_top_level_item())); + + if (!item_in->left_expr_has_null()) + { + /* Try to find a matching row by index lookup. */ + if (lookup_engine->copy_ref_key(false)) + { + /* The result is FALSE based on the outer reference. */ + item_in->value= 0; + item_in->null_value= 0; + return 0; + } + else + { + /* Search for a complete match. */ + if ((lookup_res= lookup_engine->index_lookup())) + { + /* An error occurred during lookup(). */ + item_in->value= 0; + item_in->null_value= 0; + return lookup_res; + } + else if (item_in->value || !count_columns_with_nulls) + { + /* + A complete match was found, the result of IN is TRUE. + If no match was found, and there are no NULLs in the materialized + subquery, then the result is guaranteed to be false because this + branch is executed when the outer reference has no NULLs as well. + Notice: (this->item == lookup_engine->item) + */ + return 0; + } + } + } + + if (has_covering_null_row) + { + /* + If there is a NULL-only row that covers all columns the result of IN + is UNKNOWN. + */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + return 0; + } + + /* + There is no complete match. Look for a partial match (UNKNOWN result), or + no match (FALSE). + */ + if (tmp_table->file->inited) + tmp_table->file->ha_index_end(); + + if (partial_match()) + { + /* The result of IN is UNKNOWN. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + } + else + { + /* The result of IN is FALSE. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 0; + item_in->null_value= 0; + } + + return 0; +} + + +void subselect_partial_match_engine::print(String *str, + enum_query_type query_type) +{ + /* + Should never be called as the actual engine cannot be known at query + optimization time. + DBUG_ASSERT(FALSE); + */ +} + + +/* + @param non_null_key_parts + @param partial_match_key_parts A union of all single-column NULL key parts. + + @retval FALSE the engine was initialized successfully + @retval TRUE there was some (memory allocation) error during initialization, + such errors should be interpreted as revert to other strategy +*/ + +bool +subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, + MY_BITMAP *partial_match_key_parts) +{ + THD *thd= get_thd(); + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tmp_table->file->ref_length; + ha_rows row_count= tmp_table->file->stats.records; + rownum_t cur_rownum= 0; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + uint cur_keyid= 0; + Item *left= item->get_IN_subquery()->left_exp(); + int error; + + if (merge_keys_count == 0) + { + DBUG_ASSERT(bitmap_bits_set(partial_match_key_parts) == 0 || + has_covering_null_row); + /* There is nothing to initialize, we will only do regular lookups. */ + return FALSE; + } + + /* + If all nullable columns contain only NULLs, there must be one index + over all non-null columns. + */ + DBUG_ASSERT(!has_covering_null_columns || + (has_covering_null_columns && + merge_keys_count == 1 && non_null_key_parts)); + /* + Allocate buffers to hold the merged keys and the mapping between rowids and + row numbers. All small buffers are allocated in the runtime memroot. Big + buffers are allocated from the OS via malloc. + */ + if (!(merge_keys= (Ordered_key**) thd->alloc(merge_keys_count * + sizeof(Ordered_key*))) || + !(null_bitmaps= (MY_BITMAP**) thd->alloc(merge_keys_count * + sizeof(MY_BITMAP*))) || + !(row_num_to_rowid= (uchar*) my_malloc(PSI_INSTRUMENT_ME, + static_cast(row_count * rowid_length), + MYF(MY_WME | MY_THREAD_SPECIFIC)))) + return TRUE; + + /* Create the only non-NULL key if there is any. */ + if (non_null_key_parts) + { + non_null_key= (new (thd->mem_root) + Ordered_key(cur_keyid, tmp_table, left, + 0, 0, 0, row_num_to_rowid)); + if (non_null_key->init(non_null_key_parts)) + return TRUE; + merge_keys[cur_keyid]= non_null_key; + merge_keys[cur_keyid]->first(); + ++cur_keyid; + } + + /* + If all nullable columns contain NULLs, the only key that is needed is the + only non-NULL key that is already created above. + */ + if (!has_covering_null_columns) + { + if (my_bitmap_init_memroot(&matching_keys, merge_keys_count, thd->mem_root) || + my_bitmap_init_memroot(&matching_outer_cols, merge_keys_count, thd->mem_root)) + return TRUE; + + /* + Create one single-column NULL-key for each column in + partial_match_key_parts. + */ + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + /* Skip columns that have no NULLs, or contain only NULLs. */ + if (!bitmap_is_set(partial_match_key_parts, i) || + result_sink->get_null_count_of_col(i) == row_count) + continue; + + merge_keys[cur_keyid]= new (thd->mem_root) + Ordered_key(cur_keyid, tmp_table, + left->element_index(i), + result_sink->get_null_count_of_col(i), + result_sink->get_min_null_of_col(i), + result_sink->get_max_null_of_col(i), + row_num_to_rowid); + if (merge_keys[cur_keyid]->init(i)) + return TRUE; + merge_keys[cur_keyid]->first(); + ++cur_keyid; + } + } + DBUG_ASSERT(cur_keyid == merge_keys_count); + + /* Populate the indexes with data from the temporary table. */ + if (unlikely(tmp_table->file->ha_rnd_init_with_error(1))) + return TRUE; + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); + tmp_table->null_row= 0; + while (TRUE) + { + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + + if (error == HA_ERR_ABORTED_BY_USER) + break; + /* + This is a temp table that we fully own, there should be no other + cause to stop the iteration than EOF. + */ + DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE); + if (unlikely(error == HA_ERR_END_OF_FILE)) + { + DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records); + break; + } + + /* + Save the position of this record in the row_num -> rowid mapping. + */ + tmp_table->file->position(tmp_table->record[0]); + memcpy(row_num_to_rowid + cur_rownum * rowid_length, + tmp_table->file->ref, rowid_length); + + /* Add the current row number to the corresponding keys. */ + if (non_null_key) + { + /* By definition there are no NULLs in the non-NULL key. */ + non_null_key->add_key(cur_rownum); + } + + for (uint i= (non_null_key ? 1 : 0); i < merge_keys_count; i++) + { + /* + Check if the first and only indexed column contains NULL in the current + row, and add the row number to the corresponding key. + */ + if (merge_keys[i]->get_field(0)->is_null()) + merge_keys[i]->set_null(cur_rownum); + else + merge_keys[i]->add_key(cur_rownum); + } + ++cur_rownum; + } + + tmp_table->file->ha_rnd_end(); + + /* Sort all the keys by their NULL selectivity. */ + my_qsort(merge_keys, merge_keys_count, sizeof(Ordered_key*), + (qsort_cmp) cmp_keys_by_null_selectivity); + + /* Sort the keys in each of the indexes. */ + for (uint i= 0; i < merge_keys_count; i++) + if (merge_keys[i]->sort_keys()) + return TRUE; + + if (init_queue(&pq, merge_keys_count, 0, FALSE, + subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL, + 0, 0)) + return TRUE; + + return FALSE; +} + + +subselect_rowid_merge_engine::~subselect_rowid_merge_engine() +{ + /* None of the resources below is allocated if there are no ordered keys. */ + if (merge_keys_count) + { + my_free(row_num_to_rowid); + for (uint i= 0; i < merge_keys_count; i++) + delete merge_keys[i]; + delete_queue(&pq); + if (tmp_table->file->inited == handler::RND) + tmp_table->file->ha_rnd_end(); + } +} + + +void subselect_rowid_merge_engine::cleanup() +{ +} + + +/* + Quick sort comparison function to compare keys in order of decreasing bitmap + selectivity, so that the most selective keys come first. + + @param k1 first key to compare + @param k2 second key to compare + + @retval 1 if k1 is less selective than k2 + @retval 0 if k1 is equally selective as k2 + @retval -1 if k1 is more selective than k2 +*/ + +int +subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1, + Ordered_key **k2) +{ + double k1_sel= (*k1)->null_selectivity(); + double k2_sel= (*k2)->null_selectivity(); + if (k1_sel < k2_sel) + return 1; + if (k1_sel > k2_sel) + return -1; + return 0; +} + + +/* +*/ + +int +subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg, + uchar *k1, uchar *k2) +{ + rownum_t r1= ((Ordered_key*) k1)->current(); + rownum_t r2= ((Ordered_key*) k2)->current(); + + return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0; +} + + +/* + Check if certain table row contains a NULL in all columns for which there is + no match in the corresponding value index. + + @note + There is no need to check the columns that contain only NULLs, because + those are guaranteed to match. + + @retval TRUE if a NULL row exists + @retval FALSE otherwise +*/ + +bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num) +{ + Ordered_key *cur_key; + for (uint i = 0; i < merge_keys_count; i++) + { + cur_key= merge_keys[i]; + if (bitmap_is_set(&matching_keys, cur_key->get_keyid())) + { + /* + The key 'i' (with id 'cur_keyid') already matches a value in row + 'row_num', thus we skip it as it can't possibly match a NULL. + */ + continue; + } + if (!cur_key->is_null(row_num)) + return FALSE; + } + return TRUE; +} + + +/** + Test if a subset of NULL-able columns contains a row of NULLs. + @retval TRUE if such a row exists + @retval FALSE no complementing null row +*/ + +bool subselect_rowid_merge_engine:: +exists_complementing_null_row(MY_BITMAP *keys_to_complement) +{ + rownum_t highest_min_row= 0; + rownum_t lowest_max_row= UINT_MAX; + uint count_null_keys, i; + Ordered_key *cur_key; + + if (!count_columns_with_nulls) + { + /* + If there are both NULLs and non-NUll values in the outer reference, and + the subquery contains no NULLs, a complementing NULL row cannot exist. + */ + return FALSE; + } + + for (i= (non_null_key ? 1 : 0), count_null_keys= 0; i < merge_keys_count; i++) + { + cur_key= merge_keys[i]; + if (bitmap_is_set(keys_to_complement, cur_key->get_keyid())) + continue; + if (!cur_key->get_null_count()) + { + /* If there is column without NULLs, there cannot be a partial match. */ + return FALSE; + } + if (cur_key->get_min_null_row() > highest_min_row) + highest_min_row= cur_key->get_min_null_row(); + if (cur_key->get_max_null_row() < lowest_max_row) + lowest_max_row= cur_key->get_max_null_row(); + null_bitmaps[count_null_keys++]= cur_key->get_null_key(); + } + + if (lowest_max_row < highest_min_row) + { + /* The intersection of NULL rows is empty. */ + return FALSE; + } + + return bitmap_exists_intersection((const MY_BITMAP**) null_bitmaps, + count_null_keys, + (uint)highest_min_row, (uint)lowest_max_row); +} + + +/* + @retval TRUE there is a partial match (UNKNOWN) + @retval FALSE there is no match at all (FALSE) +*/ + +bool subselect_rowid_merge_engine::partial_match() +{ + Ordered_key *min_key; /* Key that contains the current minimum position. */ + rownum_t min_row_num; /* Current row number of min_key. */ + Ordered_key *cur_key; + rownum_t cur_row_num; + uint count_nulls_in_search_key= 0; + uint max_null_in_any_row= + ((select_materialize_with_stats *) result)->get_max_nulls_in_row(); + bool res= FALSE; + + /* If there is a non-NULL key, it must be the first key in the keys array. */ + DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); + /* The prioryty queue for keys must be empty. */ + DBUG_ASSERT(!pq.elements); + + /* All data accesses during execution are via handler::ha_rnd_pos() */ + if (unlikely(tmp_table->file->ha_rnd_init_with_error(0))) + { + res= FALSE; + goto end; + } + + /* Check if there is a match for the columns of the only non-NULL key. */ + if (non_null_key && !non_null_key->lookup()) + { + res= FALSE; + goto end; + } + + /* + If all nullable columns contain only NULLs, then there is a guaranteed + partial match, and we don't need to search for a matching row. + */ + if (has_covering_null_columns) + { + res= TRUE; + goto end; + } + + if (non_null_key) + queue_insert(&pq, (uchar *) non_null_key); + /* + Do not add the non_null_key, since it was already processed above. + */ + bitmap_clear_all(&matching_outer_cols); + for (uint i= MY_TEST(non_null_key); i < merge_keys_count; i++) + { + DBUG_ASSERT(merge_keys[i]->get_column_count() == 1); + if (merge_keys[i]->get_search_key(0)->null_value) + { + ++count_nulls_in_search_key; + bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid()); + } + else if (merge_keys[i]->lookup()) + queue_insert(&pq, (uchar *) merge_keys[i]); + } + + /* + If the outer reference consists of only NULLs, or if it has NULLs in all + nullable columns (above we guarantee there is a match for the non-null + coumns), the result is UNKNOWN. + */ + if (count_nulls_in_search_key == merge_keys_count - MY_TEST(non_null_key)) + { + res= TRUE; + goto end; + } + + /* + If the outer row has NULLs in some columns, and + there is no match for any of the remaining columns, and + there is a subquery row with NULLs in all unmatched columns, + then there is a partial match, otherwise the result is FALSE. + */ + if (count_nulls_in_search_key && !pq.elements) + { + DBUG_ASSERT(!non_null_key); + /* + Check if the intersection of all NULL bitmaps of all keys that + are not in matching_outer_cols is non-empty. + */ + res= exists_complementing_null_row(&matching_outer_cols); + goto end; + } + + /* + If there is no NULL (sub)row that covers all NULL columns, and there is no + match for any of the NULL columns, the result is FALSE. Notice that if there + is a non-null key, and there is only one matching key, the non-null key is + the matching key. This is so, because this method returns FALSE if the + non-null key doesn't have a match. + */ + if (!count_nulls_in_search_key && + (!pq.elements || + (pq.elements == 1 && non_null_key && + max_null_in_any_row < merge_keys_count-1))) + { + if (!pq.elements) + { + DBUG_ASSERT(!non_null_key); + /* + The case of a covering null row is handled by + subselect_partial_match_engine::exec() + */ + DBUG_ASSERT(max_null_in_any_row != tmp_table->s->fields); + } + res= FALSE; + goto end; + } + + DBUG_ASSERT(pq.elements); + + min_key= (Ordered_key*) queue_remove_top(&pq); + min_row_num= min_key->current(); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); + bitmap_union(&matching_keys, &matching_outer_cols); + if (min_key->next_same()) + queue_insert(&pq, (uchar *) min_key); + + if (pq.elements == 0) + { + /* + Check the only matching row of the only key min_key for NULL matches + in the other columns. + */ + res= test_null_row(min_row_num); + goto end; + } + + while (TRUE) + { + cur_key= (Ordered_key*) queue_remove_top(&pq); + cur_row_num= cur_key->current(); + + if (cur_row_num == min_row_num) + bitmap_set_bit(&matching_keys, cur_key->get_keyid()); + else + { + /* Follows from the correct use of priority queue. */ + DBUG_ASSERT(cur_row_num > min_row_num); + if (test_null_row(min_row_num)) + { + res= TRUE; + goto end; + } + else + { + min_key= cur_key; + min_row_num= cur_row_num; + bitmap_clear_all(&matching_keys); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); + bitmap_union(&matching_keys, &matching_outer_cols); + } + } + + if (cur_key->next_same()) + queue_insert(&pq, (uchar *) cur_key); + + if (pq.elements == 0) + { + /* Check the last row of the last column in PQ for NULL matches. */ + res= test_null_row(min_row_num); + goto end; + } + } + + /* We should never get here - all branches must be handled explicitly above. */ + DBUG_ASSERT(FALSE); + +end: + if (!has_covering_null_columns) + bitmap_clear_all(&matching_keys); + queue_remove_all(&pq); + tmp_table->file->ha_rnd_end(); + return res; +} + + +subselect_table_scan_engine::subselect_table_scan_engine( + THD *thd, + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List *equi_join_conds_arg, + bool has_covering_null_row_arg, + bool has_covering_null_columns_arg, + uint count_columns_with_nulls_arg) + :subselect_partial_match_engine(thd, engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + has_covering_null_row_arg, + has_covering_null_columns_arg, + count_columns_with_nulls_arg) +{} + + +/* + TIMOUR: + This method is based on subselect_uniquesubquery_engine::scan_table(). + Consider refactoring somehow, 80% of the code is the same. + + for each row_i in tmp_table + { + count_matches= 0; + for each row element row_i[j] + { + if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j]) + ++count_matches; + } + if (count_matches == outer_ref.elements) + return TRUE + } + return FALSE +*/ + +bool subselect_table_scan_engine::partial_match() +{ + List_iterator_fast equality_it(*equi_join_conds); + Item *cur_eq; + uint count_matches; + int error; + bool res; + + if (unlikely(tmp_table->file->ha_rnd_init_with_error(1))) + { + res= FALSE; + goto end; + } + + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + get_thd()->variables.read_buff_size); + for (;;) + { + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(tmp_table, error); + break; + } + } + + equality_it.rewind(); + count_matches= 0; + while ((cur_eq= equality_it++)) + { + DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM && + ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC); + if (!cur_eq->val_int() && !cur_eq->null_value) + break; + ++count_matches; + } + if (count_matches == tmp_table->s->fields) + { + res= TRUE; /* Found a matching row. */ + goto end; + } + } + + res= FALSE; +end: + tmp_table->file->ha_rnd_end(); + return res; +} + + +void subselect_table_scan_engine::cleanup() +{ +} + + +void Item_subselect::register_as_with_rec_ref(With_element *with_elem) +{ + with_elem->sq_with_rec_ref.link_in_list(this, &this->next_with_rec_ref); + with_recursive_reference= true; +} + + +/* + Create an execution tracker for the expression cache we're using for this + subselect; add the tracker to the query plan. +*/ + +void Item_subselect::init_expr_cache_tracker(THD *thd) +{ + if(!expr_cache) + return; + + Explain_query *qw= thd->lex->explain; + DBUG_ASSERT(qw); + Explain_node *node= qw->get_node(unit->first_select()->select_number); + if (!node) + return; + DBUG_ASSERT(expr_cache->type() == Item::EXPR_CACHE_ITEM); + node->cache_tracker= ((Item_cache_wrapper *)expr_cache)->init_tracker(qw->mem_root); +} diff --git a/sql/item_subselect.h b/sql/item_subselect.h new file mode 100644 index 00000000..f838c0d4 --- /dev/null +++ b/sql/item_subselect.h @@ -0,0 +1,1551 @@ +#ifndef ITEM_SUBSELECT_INCLUDED +#define ITEM_SUBSELECT_INCLUDED + +/* Copyright (c) 2002, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* subselect Item */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include + +class st_select_lex; +class st_select_lex_unit; +class JOIN; +class select_result_interceptor; +class subselect_engine; +class subselect_hash_sj_engine; +class Item_bool_func2; +class Comp_creator; +class With_element; +class Field_pair; + +typedef class st_select_lex SELECT_LEX; + +/** + Convenience typedef used in this file, and further used by any files + including this file. +*/ +typedef Comp_creator* (*chooser_compare_func_creator)(bool invert); +class Cached_item; + +/* base class for subselects */ + +class Item_subselect :public Item_result_field, + protected Used_tables_and_const_cache +{ + /* + Set to TRUE if the value is assigned for the subselect + FALSE: subquery not executed or the subquery returns an empty result + */ + bool value_assigned; + bool own_engine; /* the engine was not taken from other Item_subselect */ +protected: + /* thread handler, will be assigned in fix_fields only */ + THD *thd; + /* old engine if engine was changed */ + subselect_engine *old_engine; + /* allowed number of columns (1 for single value subqueries) */ + uint max_columns; + /* where subquery is placed */ + enum_parsing_place parsing_place; + /* work with 'substitution' */ + bool have_to_be_excluded; + + bool inside_first_fix_fields; + bool done_first_fix_fields; + Item *expr_cache; + /* + Set to TRUE if at optimization or execution time we determine that this + item's value is a constant. We need this member because it is not possible + to substitute 'this' with a constant item. + */ + bool forced_const; + /* Set to the result of the last call of is_expensive() */ + bool expensive_fl; +#ifndef DBUG_OFF + /* Count the number of times this subquery predicate has been executed. */ + uint exec_counter; +#endif +public: + /* + Used inside Item_subselect::fix_fields() according to this scenario: + > Item_subselect::fix_fields + > engine->prepare + > child_join->prepare + (Here we realize we need to do the rewrite and set + substitution= some new Item, eg. Item_in_optimizer ) + < child_join->prepare + < engine->prepare + *ref= substitution; + substitution= NULL; + < Item_subselect::fix_fields + */ + /* TODO make this protected member again. */ + Item *substitution; + /* engine that perform execution of subselect (single select or union) */ + /* TODO make this protected member again. */ + subselect_engine *engine; + /* unit of subquery */ + st_select_lex_unit *unit; + /* Cached buffers used when calling filesort in sub queries */ + Filesort_buffer filesort_buffer; + LEX_STRING sortbuffer; + /* A reference from inside subquery predicate to somewhere outside of it */ + class Ref_to_outside : public Sql_alloc + { + public: + st_select_lex *select; /* Select where the reference is pointing to */ + /* + What is being referred. This may be NULL when we're referring to an + aggregate function. + */ + Item *item; + }; + /* + References from within this subquery to somewhere outside of it (i.e. to + parent select, grandparent select, etc) + */ + List upper_refs; + st_select_lex *parent_select; + + /* + TRUE<=>Table Elimination has made it redundant to evaluate this select + (and so it is not part of QEP, etc) + */ + bool eliminated; + + /* subquery is transformed */ + bool changed; + + /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */ + bool is_correlated; + + /* + TRUE <=> the subquery contains a recursive reference in the FROM list + of one of its selects. In this case some of subquery optimization + strategies cannot be applied for the subquery; + */ + bool with_recursive_reference; + + /* To link Item_subselects containing references to the same recursive CTE */ + Item_subselect *next_with_rec_ref; + + enum subs_type {UNKNOWN_SUBS, SINGLEROW_SUBS, + EXISTS_SUBS, IN_SUBS, ALL_SUBS, ANY_SUBS}; + + Item_subselect(THD *thd); + + virtual subs_type substype() { return UNKNOWN_SUBS; } + bool is_exists_predicate() + { + return substype() == Item_subselect::EXISTS_SUBS; + } + bool is_in_predicate() + { + return get_IN_subquery() != NULL; + } + + /* + We need this method, because some compilers do not allow 'this' + pointer in constructor initialization list, but we need to pass a pointer + to subselect Item class to select_result_interceptor's constructor. + */ + virtual void init (st_select_lex *select_lex, + select_result_interceptor *result); + + ~Item_subselect(); + void cleanup() override; + virtual void reset() + { + eliminated= FALSE; + null_value= 1; + } + /** + Set the subquery result to a default value consistent with the semantics of + the result row produced for queries with implicit grouping. + */ + void no_rows_in_result() override= 0; + virtual bool select_transformer(JOIN *join); + bool assigned() { return value_assigned; } + void assigned(bool a) { value_assigned= a; } + enum Type type() const override; + bool is_null() override + { + update_null_value(); + return null_value; + } + bool fix_fields(THD *thd, Item **ref) override; + bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item); + void fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) override; + void recalc_used_tables(st_select_lex *new_parent, bool after_pullout); + virtual bool exec(); + /* + If subquery optimization or execution determines that the subquery has + an empty result, mark the subquery predicate as a constant value. + */ + void make_const() + { + used_tables_cache= 0; + const_item_cache= 0; + forced_const= TRUE; + } + virtual bool fix_length_and_dec(); + table_map used_tables() const override; + table_map not_null_tables() const override { return 0; } + bool const_item() const override; + inline table_map get_used_tables_cache() { return used_tables_cache; } + Item *get_tmp_table_item(THD *thd) override; + void update_used_tables() override; + void print(String *str, enum_query_type query_type) override; + virtual bool have_guarded_conds() { return FALSE; } + bool change_engine(subselect_engine *eng) + { + old_engine= engine; + engine= eng; + return eng == 0; + } + bool engine_changed(subselect_engine *eng) { return engine != eng; } + /* + True if this subquery has been already evaluated. Implemented only for + single select and union subqueries only. + */ + bool is_evaluated() const; + bool is_uncacheable() const; + bool is_expensive() override; + + /* + Used by max/min subquery to initialize value presence registration + mechanism. Engine call this method before rexecution query. + */ + virtual void reset_value_registration() {} + enum_parsing_place place() { return parsing_place; } + bool walk(Item_processor processor, bool walk_subquery, void *arg) override; + bool unknown_splocal_processor(void *arg) override; + bool mark_as_eliminated_processor(void *arg) override; + bool eliminate_subselect_processor(void *arg) override; + bool enumerate_field_refs_processor(void *arg) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function("select ...", arg, VCOL_IMPOSSIBLE); + } + /** + Callback to test if an IN predicate is expensive. + + @notes + The return value affects the behavior of make_cond_for_table(). + + @retval TRUE if the predicate is expensive + @retval FALSE otherwise + */ + bool is_expensive_processor(void *arg) override { return is_expensive(); } + bool update_table_bitmaps_processor(void *arg) override; + + /** + Get the SELECT_LEX structure associated with this Item. + @return the SELECT_LEX structure associated with this Item + */ + st_select_lex* get_select_lex(); + bool expr_cache_is_needed(THD *) override; + void get_cache_parameters(List ¶meters) override; + bool is_subquery_processor (void *opt_arg) override { return 1; } + bool exists2in_processor(void *opt_arg) override { return 0; } + bool limit_index_condition_pushdown_processor(void *opt_arg) override + { + return TRUE; + } + + void register_as_with_rec_ref(With_element *with_elem); + void init_expr_cache_tracker(THD *thd); + + Item* build_clone(THD *thd) override { return 0; } + Item* get_copy(THD *thd) override { return 0; } + + st_select_lex *wrap_tvc_into_select(THD *thd, st_select_lex *tvc_sl); + + friend class select_result_interceptor; + friend class Item_in_optimizer; + friend bool Item_field::fix_fields(THD *, Item **); + friend int Item_field::fix_outer_field(THD *, Field **, Item **); + friend bool Item_ref::fix_fields(THD *, Item **); + friend void mark_select_range_as_dependent(THD*, + st_select_lex*, st_select_lex*, + Field*, Item*, Item_ident*, + bool); + friend bool convert_join_subqueries_to_semijoins(JOIN *join); +}; + +/* single value subselect */ + +class Item_cache; +class Item_singlerow_subselect :public Item_subselect +{ +protected: + Item_cache *value, **row; +public: + Item_singlerow_subselect(THD *thd_arg, st_select_lex *select_lex); + Item_singlerow_subselect(THD *thd_arg): Item_subselect(thd_arg), value(0), row (0) + {} + + void cleanup() override; + subs_type substype() override { return SINGLEROW_SUBS; } + + void reset() override; + void no_rows_in_result() override; + bool select_transformer(JOIN *join) override; + void store(uint i, Item* item); + double val_real() override; + longlong val_int() override; + String *val_str(String *) override; + bool val_native(THD *thd, Native *) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + const Type_handler *type_handler() const override; + bool fix_length_and_dec() override; + + uint cols() const override; + Item* element_index(uint i) override + { return reinterpret_cast(row[i]); } + Item** addr(uint i) override { return (Item**)row + i; } + bool check_cols(uint c) override; + bool null_inside() override; + void bring_value() override; + + /** + This method is used to implement a special case of semantic tree + rewriting, mandated by a SQL:2003 exception in the specification. + The only caller of this method is handle_sql2003_note184_exception(), + see the code there for more details. + Note that this method breaks the object internal integrity, by + removing it's association with the corresponding SELECT_LEX, + making this object orphan from the parse tree. + No other method, beside the destructor, should be called on this + object, as it is now invalid. + @return the SELECT_LEX structure that was given in the constructor. + */ + st_select_lex* invalidate_and_restore_select_lex(); + + Item* expr_cache_insert_transformer(THD *thd, uchar *unused) override; + + friend class select_singlerow_subselect; +}; + +/* used in static ALL/ANY optimization */ +class select_max_min_finder_subselect; +class Item_maxmin_subselect :public Item_singlerow_subselect +{ +protected: + bool max; + bool was_values; // Set if we have found at least one row +public: + Item_maxmin_subselect(THD *thd, Item_subselect *parent, + st_select_lex *select_lex, bool max); + void print(String *str, enum_query_type query_type) override; + void cleanup() override; + bool any_value() { return was_values; } + void register_value() { was_values= TRUE; } + void reset_value_registration() override { was_values= FALSE; } + void no_rows_in_result() override; +}; + +/* exists subselect */ + +class Item_exists_subselect :public Item_subselect +{ +protected: + Item_func_not *upper_not; + bool value; /* value of this item (boolean: exists/not-exists) */ + + void init_length_and_dec(); + bool select_prepare_to_be_in(); + +public: + /* + Used by subquery optimizations to keep track about in which clause this + subquery predicate is located: + NO_JOIN_NEST - the predicate is an AND-part of the WHERE + join nest pointer - the predicate is an AND-part of ON expression + of a join nest + NULL - for all other locations + */ + TABLE_LIST *emb_on_expr_nest; + /** + Reference on the Item_in_optimizer wrapper of this subquery + */ + Item_in_optimizer *optimizer; + /* true if we got this from EXISTS or to IN */ + bool exists_transformed; + + Item_exists_subselect(THD *thd_arg, st_select_lex *select_lex); + Item_exists_subselect(THD *thd_arg): + Item_subselect(thd_arg), upper_not(NULL), + emb_on_expr_nest(NULL), optimizer(0), exists_transformed(0) + {} + + subs_type substype() override { return EXISTS_SUBS; } + void reset() override + { + eliminated= FALSE; + value= 0; + } + void no_rows_in_result() override; + + const Type_handler *type_handler() const override + { + return &type_handler_bool; + } + longlong val_int() override; + double val_real() override; + String *val_str(String*) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { return get_date_from_int(thd, ltime, fuzzydate); } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec() override; + void print(String *str, enum_query_type query_type) override; + bool select_transformer(JOIN *join) override; + bool exists2in_processor(void *opt_arg) override; + + Item* expr_cache_insert_transformer(THD *thd, uchar *unused) override; + + void mark_as_condition_AND_part(TABLE_LIST *embedding) override + { + emb_on_expr_nest= embedding; + } + void under_not(Item_func_not *upper) override { upper_not= upper; }; + + void set_exists_transformed() { exists_transformed= TRUE; } + + friend class select_exists_subselect; + friend class subselect_uniquesubquery_engine; + friend class subselect_indexsubquery_engine; +}; + + +TABLE_LIST * const NO_JOIN_NEST=(TABLE_LIST*)0x1; + +/* + Possible methods to execute an IN predicate. These are set by the optimizer + based on user-set optimizer switches, semantic analysis and cost comparison. +*/ +#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */ +/* The Final decision about the strategy is made. */ +#define SUBS_STRATEGY_CHOSEN 1 +#define SUBS_SEMI_JOIN 2 /* IN was converted to semi-join. */ +#define SUBS_IN_TO_EXISTS 4 /* IN was converted to correlated EXISTS. */ +#define SUBS_MATERIALIZATION 8 /* Execute IN via subquery materialization. */ +/* Partial matching substrategies of MATERIALIZATION. */ +#define SUBS_PARTIAL_MATCH_ROWID_MERGE 16 +#define SUBS_PARTIAL_MATCH_TABLE_SCAN 32 +/* ALL/ANY will be transformed with max/min optimization */ +/* The subquery has not aggregates, transform it into a MAX/MIN query. */ +#define SUBS_MAXMIN_INJECTED 64 +/* The subquery has aggregates, use a special max/min subselect engine. */ +#define SUBS_MAXMIN_ENGINE 128 + + +/** + Representation of IN subquery predicates of the form + "left_expr IN (SELECT ...)". + + @details + This class has: + - A "subquery execution engine" (as a subclass of Item_subselect) that allows + it to evaluate subqueries. (and this class participates in execution by + having was_null variable where part of execution result is stored. + - Transformation methods (todo: more on this). + + This class is not used directly, it is "wrapped" into Item_in_optimizer + which provides some small bits of subquery evaluation. +*/ + +class Item_in_subselect :public Item_exists_subselect +{ +protected: + /* + Cache of the left operand of the subquery predicate. Allocated in the + runtime memory root, for each execution, thus need not be freed. + */ + List *left_expr_cache; + bool first_execution; + + /* + expr & optimizer used in subselect rewriting to store Item for + all JOIN in UNION + */ + Item *expr; + bool was_null; + /* A bitmap of possible execution strategies for an IN predicate. */ + uchar in_strategy; +protected: + /* Used to trigger on/off conditions that were pushed down to subselect */ + bool *pushed_cond_guards; + Comp_creator *func; + +protected: + bool init_cond_guards(); + bool select_in_like_transformer(JOIN *join); + bool single_value_transformer(JOIN *join); + bool row_value_transformer(JOIN * join); + bool fix_having(Item *having, st_select_lex *select_lex); + bool create_single_in_to_exists_cond(JOIN * join, + Item **where_item, + Item **having_item); + bool create_row_in_to_exists_cond(JOIN * join, + Item **where_item, + Item **having_item); + Item *left_expr; + /* + Important for PS/SP: left_expr_orig is the item that left_expr originally + pointed at. That item is allocated on the statement arena, while + left_expr could later be changed to something on the execution arena. + */ + Item *left_expr_orig; +public: + /* Priority of this predicate in the convert-to-semi-join-nest process. */ + int sj_convert_priority; + /* May be TRUE only for the candidates to semi-join conversion */ + bool do_not_convert_to_sj; + /* + Types of left_expr and subquery's select list allow to perform subquery + materialization. Currently, we set this to FALSE when it as well could + be TRUE. This is to be properly addressed with fix for BUG#36752. + */ + bool types_allow_materialization; + + /* + Same as above, but they also allow to scan the materialized table. + */ + bool sjm_scan_allowed; + + /* + JoinTaB Materialization (JTBM) members + */ + + /* + TRUE <=> This subselect has been converted into non-mergeable semi-join + table. + */ + bool is_jtbm_merged; + + /* (Applicable if is_jtbm_merged==TRUE) Time required to run the materialized join */ + double jtbm_read_time; + + /* (Applicable if is_jtbm_merged==TRUE) Number of output rows in materialized join */ + double jtbm_record_count; + + /* + (Applicable if is_jtbm_merged==TRUE) TRUE <=> The materialized subselect is + a degenerate subselect which produces 0 or 1 rows, which we know at + optimization phase. + Examples: + 1. subquery has "Impossible WHERE": + + SELECT * FROM ot WHERE ot.column IN (SELECT it.col FROM it WHERE 2 > 3) + + 2. Subquery produces one row which opt_sum.cc is able to get with one lookup: + + SELECT * FROM ot WHERE ot.column IN (SELECT MAX(it.key_col) FROM it) + */ + bool is_jtbm_const_tab; + + /* + (Applicable if is_jtbm_const_tab==TRUE) Whether the subquery has produced + the row (or not) + */ + bool jtbm_const_row_found; + + /* + TRUE<=>this is a flattenable semi-join, false otherwise. + */ + bool is_flattenable_semijoin; + + /* + TRUE<=>registered in the list of semijoins in outer select + */ + bool is_registered_semijoin; + + List corresponding_fields; + + /* + Used to determine how this subselect item is represented in the item tree, + in case there is a need to locate it there and replace with something else. + Two options are possible: + 1. This item is there 'as-is'. + 1. This item is wrapped within Item_in_optimizer. + */ + Item *original_item() + { + return (is_flattenable_semijoin && !exists_transformed ? + (Item*)this : + (Item*)optimizer); + } + + bool *get_cond_guard(int i) + { + return pushed_cond_guards ? pushed_cond_guards + i : NULL; + } + void set_cond_guard_var(int i, bool v) + { + if ( pushed_cond_guards) + pushed_cond_guards[i]= v; + } + bool have_guarded_conds() override { return MY_TEST(pushed_cond_guards); } + + Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery + + /* + SET to TRUE if IN subquery is converted from an IN predicate + */ + bool converted_from_in_predicate; + + Item_in_subselect(THD *thd_arg, Item * left_expr, st_select_lex *select_lex); + Item_in_subselect(THD *thd_arg): + Item_exists_subselect(thd_arg), left_expr_cache(0), first_execution(TRUE), + in_strategy(SUBS_NOT_TRANSFORMED), + pushed_cond_guards(NULL), func(NULL), do_not_convert_to_sj(FALSE), + is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0), + converted_from_in_predicate(FALSE) {} + void cleanup() override; + subs_type substype() override { return IN_SUBS; } + void reset() override + { + eliminated= FALSE; + value= 0; + null_value= 0; + was_null= 0; + } + bool select_transformer(JOIN *join) override; + bool create_in_to_exists_cond(JOIN *join_arg); + bool inject_in_to_exists_cond(JOIN *join_arg); + + bool exec() override; + longlong val_int() override; + double val_real() override; + String *val_str(String*) override; + my_decimal *val_decimal(my_decimal *) override; + bool val_bool() override; + bool test_limit(st_select_lex_unit *unit); + void print(String *str, enum_query_type query_type) override; + enum precedence precedence() const override { return IN_PRECEDENCE; } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec() override; + void fix_after_pullout(st_select_lex *new_parent, Item **ref, + bool merge) override; + bool const_item() const override + { + return Item_subselect::const_item() && left_expr->const_item(); + } + void update_used_tables() override; + bool setup_mat_engine(); + bool init_left_expr_cache(); + /* Inform 'this' that it was computed, and contains a valid result. */ + void set_first_execution() { if (first_execution) first_execution= FALSE; } + bool expr_cache_is_needed(THD *thd) override; + inline bool left_expr_has_null(); + + void disable_cond_guard_for_const_null_left_expr(int i) + { + if (left_expr->can_eval_in_optimize()) + { + if (left_expr->element_index(i)->is_null()) + set_cond_guard_var(i,FALSE); + } + } + + int optimize(double *out_rows, double *cost); + /* + Return the identifier that we could use to identify the subquery for the + user. + */ + int get_identifier(); + + void block_conversion_to_sj () { do_not_convert_to_sj= TRUE; } + + bool test_strategy(uchar strategy) + { return MY_TEST(in_strategy & strategy); } + + /** + Test that the IN strategy was chosen for execution. This is so + when the CHOSEN flag is ON, and there is no other strategy. + */ + bool test_set_strategy(uchar strategy) + { + DBUG_ASSERT(strategy == SUBS_SEMI_JOIN || + strategy == SUBS_IN_TO_EXISTS || + strategy == SUBS_MATERIALIZATION || + strategy == SUBS_PARTIAL_MATCH_ROWID_MERGE || + strategy == SUBS_PARTIAL_MATCH_TABLE_SCAN || + strategy == SUBS_MAXMIN_INJECTED || + strategy == SUBS_MAXMIN_ENGINE); + return ((in_strategy & SUBS_STRATEGY_CHOSEN) && + (in_strategy & ~SUBS_STRATEGY_CHOSEN) == strategy); + } + + bool is_set_strategy() + { return MY_TEST(in_strategy & SUBS_STRATEGY_CHOSEN); } + + bool has_strategy() + { return in_strategy != SUBS_NOT_TRANSFORMED; } + + void add_strategy (uchar strategy) + { + DBUG_ENTER("Item_in_subselect::add_strategy"); + DBUG_PRINT("enter", ("current: %u add: %u", + (uint) in_strategy, (uint) strategy)); + DBUG_ASSERT(strategy != SUBS_NOT_TRANSFORMED); + DBUG_ASSERT(!(strategy & SUBS_STRATEGY_CHOSEN)); + /* + TODO: PS re-execution breaks this condition, because + check_and_do_in_subquery_rewrites() is called for each reexecution + and re-adds the same strategies. + DBUG_ASSERT(!(in_strategy & SUBS_STRATEGY_CHOSEN)); + */ + in_strategy|= strategy; + DBUG_VOID_RETURN; + } + + void reset_strategy(uchar strategy) + { + DBUG_ENTER("Item_in_subselect::reset_strategy"); + DBUG_PRINT("enter", ("current: %u new: %u", + (uint) in_strategy, (uint) strategy)); + DBUG_ASSERT(strategy != SUBS_NOT_TRANSFORMED); + in_strategy= strategy; + DBUG_VOID_RETURN; + } + + void set_strategy(uchar strategy) + { + DBUG_ENTER("Item_in_subselect::set_strategy"); + DBUG_PRINT("enter", ("current: %u set: %u", + (uint) in_strategy, + (uint) (SUBS_STRATEGY_CHOSEN | strategy))); + /* Check that only one strategy is set for execution. */ + DBUG_ASSERT(strategy == SUBS_SEMI_JOIN || + strategy == SUBS_IN_TO_EXISTS || + strategy == SUBS_MATERIALIZATION || + strategy == SUBS_PARTIAL_MATCH_ROWID_MERGE || + strategy == SUBS_PARTIAL_MATCH_TABLE_SCAN || + strategy == SUBS_MAXMIN_INJECTED || + strategy == SUBS_MAXMIN_ENGINE); + in_strategy= (SUBS_STRATEGY_CHOSEN | strategy); + DBUG_VOID_RETURN; + } + + bool walk(Item_processor processor, bool walk_subquery, void *arg) override + { + return left_expr->walk(processor, walk_subquery, arg) || + Item_subselect::walk(processor, walk_subquery, arg); + } + + bool exists2in_processor(void *opt_arg __attribute__((unused))) override + { + return 0; + }; + + bool pushdown_cond_for_in_subquery(THD *thd, Item *cond); + + Item_in_subselect *get_IN_subquery() override + { return this; } + inline Item** left_exp_ptr() + { return &left_expr; } + inline Item* left_exp() const + { return left_expr; } + inline Item* left_exp_orig() const + { return left_expr_orig; } + + friend class Item_ref_null_helper; + friend class Item_is_not_null_test; + friend class Item_in_optimizer; + friend class subselect_indexsubquery_engine; + friend class subselect_hash_sj_engine; + friend class subselect_partial_match_engine; + friend class Item_exists_subselect; +}; + + +/* ALL/ANY/SOME subselect */ +class Item_allany_subselect :public Item_in_subselect +{ +public: + chooser_compare_func_creator func_creator; + bool all; + + Item_allany_subselect(THD *thd_arg, Item * left_expr, + chooser_compare_func_creator fc, + st_select_lex *select_lex, bool all); + + void cleanup(); + // only ALL subquery has upper not + subs_type substype() { return all?ALL_SUBS:ANY_SUBS; } + bool select_transformer(JOIN *join); + void create_comp_func(bool invert) { func= func_creator(invert); } + void print(String *str, enum_query_type query_type); + enum precedence precedence() const { return CMP_PRECEDENCE; } + bool is_maxmin_applicable(JOIN *join); + bool transform_into_max_min(JOIN *join); + void no_rows_in_result(); +}; + + +class subselect_engine: public Sql_alloc, + public Type_handler_hybrid_field_type +{ +protected: + select_result_interceptor *result; /* results storage class */ + THD *thd; /* pointer to current THD */ + Item_subselect *item; /* item, that use this engine */ + bool maybe_null; /* may be null (first item in select) */ +public: + + enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE, + UNION_ENGINE, UNIQUESUBQUERY_ENGINE, + INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE, + ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE}; + + subselect_engine(Item_subselect *si, + select_result_interceptor *res): + Type_handler_hybrid_field_type(&type_handler_varchar), + thd(NULL) + { + result= res; + item= si; + maybe_null= 0; + } + virtual ~subselect_engine() = default;; // to satisfy compiler + virtual void cleanup()= 0; + + /* + Also sets "thd" for subselect_engine::result. + Should be called before prepare(). + */ + void set_thd(THD *thd_arg); + THD * get_thd() { return thd ? thd : current_thd; } + virtual int prepare(THD *)= 0; + virtual bool fix_length_and_dec(Item_cache** row)= 0; + /* + Execute the engine + + SYNOPSIS + exec() + + DESCRIPTION + Execute the engine. The result of execution is subquery value that is + either captured by previously set up select_result-based 'sink' or + stored somewhere by the exec() method itself. + + A required side effect: If at least one pushed-down predicate is + disabled, subselect_engine->no_rows() must return correct result after + the exec() call. + + RETURN + 0 - OK + 1 - Either an execution error, or the engine was "changed", and the + caller should call exec() again for the new engine. + */ + virtual int exec()= 0; + virtual uint cols() const= 0; /* return number of columns in select */ + virtual uint8 uncacheable()= 0; /* query is uncacheable */ + virtual void exclude()= 0; + virtual bool may_be_null() { return maybe_null; }; + virtual table_map upper_select_const_tables()= 0; + static table_map calc_const_tables(TABLE_LIST *); + static table_map calc_const_tables(List &list); + virtual void print(String *str, enum_query_type query_type)= 0; + virtual bool change_result(Item_subselect *si, + select_result_interceptor *result, + bool temp= FALSE)= 0; + virtual bool no_tables() const = 0; + /* + Return true we can guarantee that the subquery will always return one row. + */ + virtual bool always_returns_one_row() const { return false; } + virtual bool is_executed() const { return FALSE; } + /* Check if subquery produced any rows during last query execution */ + virtual bool no_rows() = 0; + virtual enum_engine_type engine_type() { return ABSTRACT_ENGINE; } + virtual int get_identifier() { DBUG_ASSERT(0); return 0; } + virtual void force_reexecution() {} +protected: + bool set_row(List &item_list, Item_cache **row); +}; + +class subselect_single_select_engine: public subselect_engine +{ + bool prepared; /* simple subselect is prepared */ + bool executed; /* simple subselect is executed */ + st_select_lex *select_lex; /* corresponding select_lex */ + JOIN * join; /* corresponding JOIN structure */ +public: + subselect_single_select_engine(st_select_lex *select, + select_result_interceptor *result, + Item_subselect *item); + void cleanup() override; + int prepare(THD *thd) override; + bool fix_length_and_dec(Item_cache** row) override; + int exec() override; + uint cols() const override; + uint8 uncacheable() override; + void exclude() override; + table_map upper_select_const_tables() override; + void print(String *str, enum_query_type query_type) override; + bool change_result(Item_subselect *si, + select_result_interceptor *result, + bool temp) override; + bool no_tables() const override; + bool always_returns_one_row() const override; + bool may_be_null() override; + bool is_executed() const override { return executed; } + bool no_rows() override; + enum_engine_type engine_type() override { return SINGLE_SELECT_ENGINE; } + int get_identifier() override; + void force_reexecution() override; + void change_select(st_select_lex *new_select) { select_lex= new_select; } + + friend class subselect_hash_sj_engine; + friend class Item_in_subselect; + friend bool execute_degenerate_jtbm_semi_join(THD *thd, + TABLE_LIST *tbl, + Item_in_subselect *subq_pred, + List &eq_list); +}; + + +class subselect_union_engine: public subselect_engine +{ + st_select_lex_unit *unit; /* corresponding unit structure */ +public: + subselect_union_engine(st_select_lex_unit *u, + select_result_interceptor *result, + Item_subselect *item); + void cleanup() override; + int prepare(THD *) override; + bool fix_length_and_dec(Item_cache** row) override; + int exec() override; + uint cols() const override; + uint8 uncacheable() override; + void exclude() override; + table_map upper_select_const_tables() override; + void print(String *str, enum_query_type query_type) override; + bool change_result(Item_subselect *si, + select_result_interceptor *result, + bool temp= FALSE) override; + bool no_tables() const override; + bool is_executed() const override; + void force_reexecution() override; + bool no_rows() override; + enum_engine_type engine_type() override { return UNION_ENGINE; } +}; + + +struct st_join_table; + + +/* + A subquery execution engine that evaluates the subquery by doing one index + lookup in a unique index. + + This engine is used to resolve subqueries in forms + + outer_expr IN (SELECT tbl.unique_key FROM tbl WHERE subq_where) + + or, tuple-based: + + (oe1, .. oeN) IN (SELECT uniq_key_part1, ... uniq_key_partK + FROM tbl WHERE subqwhere) + + i.e. the subquery is a single table SELECT without GROUP BY, aggregate + functions, etc. +*/ + +class subselect_uniquesubquery_engine: public subselect_engine +{ +protected: + st_join_table *tab; + Item *cond; /* The WHERE condition of subselect */ + /* + TRUE<=> last execution produced empty set. Valid only when left + expression is NULL. + */ + bool empty_result_set; +public: + + // constructor can assign THD because it will be called after JOIN::prepare + subselect_uniquesubquery_engine(THD *thd_arg, st_join_table *tab_arg, + Item_in_subselect *subs, Item *where) + :subselect_engine(subs, 0), tab(tab_arg), cond(where) + { + thd= thd_arg; + DBUG_ASSERT(subs); + } + ~subselect_uniquesubquery_engine(); + void cleanup() override; + int prepare(THD *) override; + bool fix_length_and_dec(Item_cache** row) override; + int exec() override; + uint cols() const override { return 1; } + uint8 uncacheable() override { return UNCACHEABLE_DEPENDENT_INJECTED; } + void exclude() override; + table_map upper_select_const_tables() override { return 0; } + void print(String *str, enum_query_type query_type) override; + bool change_result(Item_subselect *si, + select_result_interceptor *result, + bool temp= FALSE) override; + bool no_tables() const override; + int index_lookup(); /* TIMOUR: this method needs refactoring. */ + int scan_table(); + bool copy_ref_key(bool skip_constants); + bool no_rows() override { return empty_result_set; } + enum_engine_type engine_type() override { return UNIQUESUBQUERY_ENGINE; } +}; + + +class subselect_indexsubquery_engine: public subselect_uniquesubquery_engine +{ + /* FALSE for 'ref', TRUE for 'ref-or-null'. */ + bool check_null; + /* + The "having" clause. This clause (further referred to as "artificial + having") was inserted by subquery transformation code. It contains + Item(s) that have a side-effect: they record whether the subquery has + produced a row with NULL certain components. We need to use it for cases + like + (oe1, oe2) IN (SELECT t.key, t.no_key FROM t1) + where we do index lookup on t.key=oe1 but need also to check if there + was a row such that t.no_key IS NULL. + + NOTE: This is currently here and not in the uniquesubquery_engine. Ideally + it should have been in uniquesubquery_engine in order to allow execution of + subqueries like + + (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl) + + We could use uniquesubquery_engine for the first component and let + Item_is_not_null_test( non_key_maybe_null_field) to handle the second. + + However, subqueries like the above are currently not handled by index + lookup-based subquery engines, the engine applicability check misses + them: it doesn't switch the engine for case of artificial having and + [eq_]ref access (only for artificial having + ref_or_null or no having). + The above example subquery is handled as a full-blown SELECT with eq_ref + access to one table. + + Due to this limitation, the "artificial having" currently needs to be + checked by only in indexsubquery_engine. + */ + Item *having; +public: + + // constructor can assign THD because it will be called after JOIN::prepare + subselect_indexsubquery_engine(THD *thd_arg, st_join_table *tab_arg, + Item_in_subselect *subs, Item *where, + Item *having_arg, bool chk_null) + :subselect_uniquesubquery_engine(thd_arg, tab_arg, subs, where), + check_null(chk_null), + having(having_arg) + { DBUG_ASSERT(subs); } + int exec(); + void print (String *str, enum_query_type query_type); + virtual enum_engine_type engine_type() { return INDEXSUBQUERY_ENGINE; } +}; + +/* + This function is actually defined in sql_parse.cc, but it depends on + chooser_compare_func_creator defined in this file. + */ +Item * all_any_subquery_creator(THD *thd, Item *left_expr, + chooser_compare_func_creator cmp, + bool all, + SELECT_LEX *select_lex); + + +inline bool Item_subselect::is_evaluated() const +{ + return engine->is_executed(); +} + + +inline bool Item_subselect::is_uncacheable() const +{ + return engine->uncacheable(); +} + +/** + Compute an IN predicate via a hash semi-join. This class is responsible for + the materialization of the subquery, and the selection of the correct and + optimal execution method (e.g. direct index lookup, or partial matching) for + the IN predicate. +*/ + +class subselect_hash_sj_engine : public subselect_engine +{ +public: + /* The table into which the subquery is materialized. */ + TABLE *tmp_table; + /* TRUE if the subquery was materialized into a temp table. */ + bool is_materialized; + /* + The old engine already chosen at parse time and stored in permanent memory. + Through this member we can re-create and re-prepare materialize_join for + each execution of a prepared statement. We also reuse the functionality + of subselect_single_select_engine::[prepare | cols]. + */ + subselect_single_select_engine *materialize_engine; + /* + QEP to execute the subquery and materialize its result into a + temporary table. Created during the first call to exec(). + */ + JOIN *materialize_join; + /* + A conjunction of all the equality conditions between all pairs of expressions + that are arguments of an IN predicate. We need these to post-filter some + IN results because index lookups sometimes match values that are actually + not equal to the search key in SQL terms. + */ + Item_cond_and *semi_join_conds; + Name_resolution_context *semi_join_conds_context; + + + subselect_hash_sj_engine(THD *thd_arg, Item_in_subselect *in_predicate, + subselect_single_select_engine *old_engine) + : subselect_engine(in_predicate, NULL), + tmp_table(NULL), is_materialized(FALSE), materialize_engine(old_engine), + materialize_join(NULL), semi_join_conds(NULL), lookup_engine(NULL), + count_partial_match_columns(0), count_null_only_columns(0), + count_columns_with_nulls(0), strategy(UNDEFINED) + { DBUG_ASSERT(in_predicate); } + ~subselect_hash_sj_engine(); + + bool init(List *tmp_columns, uint subquery_id); + void cleanup() override; + int prepare(THD *) override; + int exec() override; + void print(String *str, enum_query_type query_type) override; + uint cols() const override { return materialize_engine->cols(); } + uint8 uncacheable() override { return materialize_engine->uncacheable(); } + table_map upper_select_const_tables() override { return 0; } + bool no_rows() override { return !tmp_table->file->stats.records; } + enum_engine_type engine_type() override { return HASH_SJ_ENGINE; } + /* + TODO: factor out all these methods in a base subselect_index_engine class + because all of them have dummy implementations and should never be called. + */ + bool fix_length_and_dec(Item_cache** row) override;//=>base class + void exclude() override; //=>base class + //=>base class + bool change_result(Item_subselect *si, + select_result_interceptor *result, + bool temp= FALSE) override; + bool no_tables() const override;//=>base class + +protected: + /* The engine used to compute the IN predicate. */ + subselect_engine *lookup_engine; + /* Keyparts of the only non-NULL composite index in a rowid merge. */ + MY_BITMAP non_null_key_parts; + /* Keyparts of the single column indexes with NULL, one keypart per index. */ + MY_BITMAP partial_match_key_parts; + uint count_partial_match_columns; + uint count_null_only_columns; + uint count_columns_with_nulls; + /* Possible execution strategies that can be used to compute hash semi-join.*/ + enum exec_strategy { + UNDEFINED, + COMPLETE_MATCH, /* Use regular index lookups. */ + PARTIAL_MATCH, /* Use some partial matching strategy. */ + PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */ + PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ + IMPOSSIBLE /* Subquery materialization is not applicable. */ + }; + /* The chosen execution strategy. Computed after materialization. */ + exec_strategy strategy; + exec_strategy get_strategy_using_schema(); + exec_strategy get_strategy_using_data(); + ulonglong rowid_merge_buff_size(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + void choose_partial_match_strategy(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + bool make_semi_join_conds(); + subselect_uniquesubquery_engine* make_unique_engine(); + +}; + + +/* + Distinguish the type of (0-based) row numbers from the type of the index into + an array of row numbers. +*/ +typedef ha_rows rownum_t; + + +/* + An Ordered_key is an in-memory table index that allows O(log(N)) time + lookups of a multi-part key. + + If the index is over a single column, then this column may contain NULLs, and + the NULLs are stored and tested separately for NULL in O(1) via is_null(). + Multi-part indexes assume that the indexed columns do not contain NULLs. + + TODO: + = Due to the unnatural assymetry between single and multi-part indexes, it + makes sense to somehow refactor or extend the class. + + = This class can be refactored into a base abstract interface, and two + subclasses: + - one to represent single-column indexes, and + - another to represent multi-column indexes. + Such separation would allow slightly more efficient implementation of + the single-column indexes. + = The current design requires such indexes to be fully recreated for each + PS (re)execution, however most of the comprising objects can be reused. +*/ + +class Ordered_key : public Sql_alloc +{ +protected: + /* + Index of the key in an array of keys. This index allows to + construct (sub)sets of keys represented by bitmaps. + */ + uint keyid; + /* The table being indexed. */ + TABLE *tbl; + /* The columns being indexed. */ + Item_field **key_columns; + /* Number of elements in 'key_columns' (number of key parts). */ + uint key_column_count; + /* + An expression, or sequence of expressions that forms the search key. + The search key is a sequence when it is Item_row. Each element of the + sequence is accessible via Item::element_index(int i). + */ + Item *search_key; + +/* Value index related members. */ + /* + The actual value index, consists of a sorted sequence of row numbers. + */ + rownum_t *key_buff; + /* Number of elements in key_buff. */ + ha_rows key_buff_elements; + /* Current element in 'key_buff'. */ + ha_rows cur_key_idx; + /* + Mapping from row numbers to row ids. The element row_num_to_rowid[i] + contains a buffer with the rowid for the row numbered 'i'. + The memory for this member is not maintanined by this class because + all Ordered_key indexes of the same table share the same mapping. + */ + uchar *row_num_to_rowid; + /* + A sequence of predicates to compare the search key with the corresponding + columns of a table row from the index. + */ + Item_func_lt **compare_pred; + +/* Null index related members. */ + MY_BITMAP null_key; + /* Count of NULLs per column. */ + ha_rows null_count; + /* The row number that contains the first NULL in a column. */ + rownum_t min_null_row; + /* The row number that contains the last NULL in a column. */ + rownum_t max_null_row; + +protected: + bool alloc_keys_buffers(); + /* + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. + */ + int cmp_keys_by_row_data(rownum_t a, rownum_t b); + static int cmp_keys_by_row_data_and_rownum(Ordered_key *key, + rownum_t* a, rownum_t* b); + + int cmp_key_with_search_key(rownum_t row_num); + +public: + Ordered_key(uint keyid_arg, TABLE *tbl_arg, + Item *search_key_arg, ha_rows null_count_arg, + ha_rows min_null_row_arg, ha_rows max_null_row_arg, + uchar *row_num_to_rowid_arg); + ~Ordered_key(); + void cleanup(); + /* Initialize a multi-column index. */ + bool init(MY_BITMAP *columns_to_index); + /* Initialize a single-column index. */ + bool init(int col_idx); + + uint get_column_count() { return key_column_count; } + uint get_keyid() { return keyid; } + Field *get_field(uint i) + { + DBUG_ASSERT(i < key_column_count); + return key_columns[i]->field; + } + rownum_t get_min_null_row() { return min_null_row; } + rownum_t get_max_null_row() { return max_null_row; } + MY_BITMAP * get_null_key() { return &null_key; } + ha_rows get_null_count() { return null_count; } + /* + Get the search key element that corresponds to the i-th key part of this + index. + */ + Item *get_search_key(uint i) + { + return search_key->element_index(key_columns[i]->field->field_index); + } + void add_key(rownum_t row_num) + { + /* The caller must know how many elements to add. */ + DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements); + key_buff[cur_key_idx]= row_num; + ++cur_key_idx; + } + + bool sort_keys(); + double null_selectivity(); + + /* + Position the current element at the first row that matches the key. + The key itself is propagated by evaluating the current value(s) of + this->search_key. + */ + bool lookup(); + /* Move the current index cursor to the first key. */ + void first() + { + DBUG_ASSERT(key_buff_elements); + cur_key_idx= 0; + } + /* TODO */ + bool next_same(); + /* Move the current index cursor to the next key. */ + bool next() + { + DBUG_ASSERT(key_buff_elements); + if (cur_key_idx < key_buff_elements - 1) + { + ++cur_key_idx; + return TRUE; + } + return FALSE; + }; + /* Return the current index element. */ + rownum_t current() + { + DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements); + return key_buff[cur_key_idx]; + } + + void set_null(rownum_t row_num) + { + bitmap_set_bit(&null_key, (uint)row_num); + } + bool is_null(rownum_t row_num) + { + /* + Indexes consisting of only NULLs do not have a bitmap buffer at all. + Their only initialized member is 'n_bits', which is equal to the number + of temp table rows. + */ + if (null_count == tbl->file->stats.records) + { + DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits); + return TRUE; + } + if (row_num > max_null_row || row_num < min_null_row) + return FALSE; + return bitmap_is_set(&null_key, (uint)row_num); + } + void print(String *str); +}; + + +class subselect_partial_match_engine : public subselect_engine +{ +protected: + /* The temporary table that contains a materialized subquery. */ + TABLE *tmp_table; + /* + The engine used to check whether an IN predicate is TRUE or not. If not + TRUE, then subselect_rowid_merge_engine further distinguishes between + FALSE and UNKNOWN. + */ + subselect_uniquesubquery_engine *lookup_engine; + /* A list of equalities between each pair of IN operands. */ + List *equi_join_conds; + /* + True if there is an all NULL row in tmp_table. If so, then if there is + no complete match, there is a guaranteed partial match. + */ + bool has_covering_null_row; + + /* + True if all nullable columns of tmp_table consist of only NULL values. + If so, then if there is a match in the non-null columns, there is a + guaranteed partial match. + */ + bool has_covering_null_columns; + uint count_columns_with_nulls; + +protected: + virtual bool partial_match()= 0; +public: + subselect_partial_match_engine(THD *thd, + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List *equi_join_conds_arg, + bool has_covering_null_row_arg, + bool has_covering_null_columns_arg, + uint count_columns_with_nulls_arg); + int prepare(THD *thd_arg) override { set_thd(thd_arg); return 0; } + int exec() override; + bool fix_length_and_dec(Item_cache**) override { return FALSE; } + uint cols() const override + { /* TODO: what is the correct value? */ return 1; } + uint8 uncacheable() override { return UNCACHEABLE_DEPENDENT; } + void exclude() override {} + table_map upper_select_const_tables() override { return 0; } + bool change_result(Item_subselect*, + select_result_interceptor*, + bool temp= FALSE) override + { DBUG_ASSERT(FALSE); return false; } + bool no_tables() const override { return false; } + bool no_rows() override + { + /* + TODO: It is completely unclear what is the semantics of this + method. The current result is computed so that the call to no_rows() + from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value + correctly. + */ + return !(item->get_IN_subquery()->null_value); + } + void print(String*, enum_query_type) override; + + friend void subselect_hash_sj_engine::cleanup(); +}; + + +class subselect_rowid_merge_engine: public subselect_partial_match_engine +{ +protected: + /* + Mapping from row numbers to row ids. The rowids are stored sequentially + in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length. + */ + uchar *row_num_to_rowid; + /* + A subset of all the keys for which there is a match for the same row. + Used during execution. Computed for each outer reference + */ + MY_BITMAP matching_keys; + /* + The columns of the outer reference that are NULL. Computed for each + outer reference. + */ + MY_BITMAP matching_outer_cols; + /* + Indexes of row numbers, sorted by . If an + index may contain NULLs, the NULLs are stored efficiently in a bitmap. + + The indexes are sorted by the selectivity of their NULL sub-indexes, the + one with the fewer NULLs is first. Thus, if there is any index on + non-NULL columns, it is contained in keys[0]. + */ + Ordered_key **merge_keys; + /* The number of elements in merge_keys. */ + uint merge_keys_count; + /* The NULL bitmaps of merge keys.*/ + MY_BITMAP **null_bitmaps; + /* + An index on all non-NULL columns of 'tmp_table'. The index has the + logical form: <[v_i1 | ... | v_ik], rownum>. It allows to find the row + number where the columns c_i1,...,c1_k contain the values v_i1,...,v_ik. + If such an index exists, it is always the first element of 'merge_keys'. + */ + Ordered_key *non_null_key; + /* + Priority queue of Ordered_key indexes, one per NULLable column. + This queue is used by the partial match algorithm in method exec(). + */ + QUEUE pq; +protected: + /* + Comparison function to compare keys in order of decreasing bitmap + selectivity. + */ + static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2); + /* + Comparison function used by the priority queue pq, the 'smaller' key + is the one with the smaller current row number. + */ + static int cmp_keys_by_cur_rownum(void *arg, uchar *k1, uchar *k2); + + bool test_null_row(rownum_t row_num); + bool exists_complementing_null_row(MY_BITMAP *keys_to_complement); + bool partial_match(); +public: + subselect_rowid_merge_engine(THD *thd, + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, uint merge_keys_count_arg, + bool has_covering_null_row_arg, + bool has_covering_null_columns_arg, + uint count_columns_with_nulls_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List *equi_join_conds_arg) + :subselect_partial_match_engine(thd, engine_arg, tmp_table_arg, + item_arg, result_arg, equi_join_conds_arg, + has_covering_null_row_arg, + has_covering_null_columns_arg, + count_columns_with_nulls_arg), + merge_keys_count(merge_keys_count_arg), non_null_key(NULL) + {} + ~subselect_rowid_merge_engine(); + bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts); + void cleanup(); + virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; } +}; + + +class subselect_table_scan_engine: public subselect_partial_match_engine +{ +protected: + bool partial_match(); +public: + subselect_table_scan_engine(THD *thd, + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List *equi_join_conds_arg, + bool has_covering_null_row_arg, + bool has_covering_null_columns_arg, + uint count_columns_with_nulls_arg); + void cleanup(); + virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; } +}; +#endif /* ITEM_SUBSELECT_INCLUDED */ diff --git a/sql/item_sum.cc b/sql/item_sum.cc new file mode 100644 index 00000000..bbd09a59 --- /dev/null +++ b/sql/item_sum.cc @@ -0,0 +1,4600 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + Sum functions (COUNT, MIN...) +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" +#include "uniques.h" +#include "sp_rcontext.h" +#include "sp.h" +#include "sql_parse.h" +#include "sp_head.h" + +/** + Calculate the affordable RAM limit for structures like TREE or Unique + used in Item_sum_* +*/ + +size_t Item_sum::ram_limitation(THD *thd) +{ + return MY_MAX(1024, + (size_t)MY_MIN(thd->variables.tmp_memory_table_size, + thd->variables.max_heap_table_size)); +} + + +/* + Force create_tmp_table() to convert BIT columns to BIGINT. + This is needed because BIT fields store parts of their data in table's + null bits, and we don't have methods to compare two table records with + bit fields. +*/ + +static void store_bit_fields_as_bigint_in_tempory_table(List *list) +{ + List_iterator_fast li(*list); + Item *item; + while ((item= li++)) + { + if (item->type() == Item::FIELD_ITEM && + ((Item_field*) item)->field->type() == FIELD_TYPE_BIT) + item->marker= MARKER_NULL_KEY; + } +} + +/** + Prepare an aggregate function item for checking context conditions. + + The function initializes the members of the Item_sum object created + for a set function that are used to check validity of the set function + occurrence. + If the set function is not allowed in any subquery where it occurs + an error is reported immediately. + + @param thd reference to the thread context info + + @note + This function is to be called for any item created for a set function + object when the traversal of trees built for expressions used in the query + is performed at the phase of context analysis. This function is to + be invoked at the descent of this traversal. + @retval + TRUE if an error is reported + @retval + FALSE otherwise +*/ + +bool Item_sum::init_sum_func_check(THD *thd) +{ + SELECT_LEX *curr_sel= thd->lex->current_select; + if (curr_sel && curr_sel->name_visibility_map.is_clear_all()) + { + for (SELECT_LEX *sl= curr_sel; sl; sl= sl->context.outer_select()) + { + curr_sel->name_visibility_map.set_bit(sl->nest_level); + } + } + if (!curr_sel || + !(thd->lex->allow_sum_func.is_overlapping(curr_sel->name_visibility_map))) + { + my_message(ER_INVALID_GROUP_FUNC_USE, ER_THD(thd, ER_INVALID_GROUP_FUNC_USE), + MYF(0)); + return TRUE; + } + /* Set a reference to the nesting set function if there is any */ + in_sum_func= thd->lex->in_sum_func; + /* Save a pointer to object to be used in items for nested set functions */ + thd->lex->in_sum_func= this; + nest_level= thd->lex->current_select->nest_level; + ref_by= 0; + aggr_level= -1; + aggr_sel= NULL; + max_arg_level= -1; + max_sum_func_level= -1; + outer_fields.empty(); + return FALSE; +} + +/** + Check constraints imposed on a usage of a set function. + + The method verifies whether context conditions imposed on a usage + of any set function are met for this occurrence. + + The function first checks if we are using any window functions as + arguments to the set function. In that case it returns an error. + + Afterwards, it checks whether the set function occurs in the position where it + can be aggregated and, when it happens to occur in argument of another + set function, the method checks that these two functions are aggregated in + different subqueries. + If the context conditions are not met the method reports an error. + If the set function is aggregated in some outer subquery the method + adds it to the chain of items for such set functions that is attached + to the the st_select_lex structure for this subquery. + + A number of designated members of the object are used to check the + conditions. They are specified in the comment before the Item_sum + class declaration. + Additionally a bitmap variable called allow_sum_func is employed. + It is included into the thd->lex structure. + The bitmap contains 1 at n-th position if the set function happens + to occur under a construct of the n-th level subquery where usage + of set functions are allowed (i.e either in the SELECT list or + in the HAVING clause of the corresponding subquery) + Consider the query: + @code + SELECT SUM(t1.b) FROM t1 GROUP BY t1.a + HAVING t1.a IN (SELECT t2.c FROM t2 WHERE AVG(t1.b) > 20) AND + t1.a > (SELECT MIN(t2.d) FROM t2); + @endcode + allow_sum_func will contain: + - for SUM(t1.b) - 1 at the first position + - for AVG(t1.b) - 1 at the first position, 0 at the second position + - for MIN(t2.d) - 1 at the first position, 1 at the second position. + + @param thd reference to the thread context info + @param ref location of the pointer to this item in the embedding expression + + @note + This function is to be called for any item created for a set function + object when the traversal of trees built for expressions used in the query + is performed at the phase of context analysis. This function is to + be invoked at the ascent of this traversal. + + @retval + TRUE if an error is reported + @retval + FALSE otherwise +*/ + +bool Item_sum::check_sum_func(THD *thd, Item **ref) +{ + SELECT_LEX *curr_sel= thd->lex->current_select; + nesting_map allow_sum_func(thd->lex->allow_sum_func); + allow_sum_func.intersect(curr_sel->name_visibility_map); + bool invalid= FALSE; + // should be set already + DBUG_ASSERT(!curr_sel->name_visibility_map.is_clear_all()); + + /* + Window functions can not be used as arguments to sum functions. + Aggregation happes before window function computation, so there + are no values to aggregate over. + */ + if (with_window_func()) + { + my_message(ER_SUM_FUNC_WITH_WINDOW_FUNC_AS_ARG, + ER_THD(thd, ER_SUM_FUNC_WITH_WINDOW_FUNC_AS_ARG), + MYF(0)); + return TRUE; + } + + if (window_func_sum_expr_flag) + return false; + /* + The value of max_arg_level is updated if an argument of the set function + contains a column reference resolved against a subquery whose level is + greater than the current value of max_arg_level. + max_arg_level cannot be greater than nest level. + nest level is always >= 0 + */ + if (nest_level == max_arg_level) + { + /* + The function must be aggregated in the current subquery, + If it is there under a construct where it is not allowed + we report an error. + */ + invalid= !(allow_sum_func.is_set(max_arg_level)); + } + else if (max_arg_level >= 0 || + !(allow_sum_func.is_set(nest_level))) + { + /* + The set function can be aggregated only in outer subqueries. + Try to find a subquery where it can be aggregated; + If we fail to find such a subquery report an error. + */ + if (register_sum_func(thd, ref)) + return TRUE; + invalid= aggr_level < 0 && + !(allow_sum_func.is_set(nest_level)); + if (!invalid && thd->variables.sql_mode & MODE_ANSI) + invalid= aggr_level < 0 && max_arg_level < nest_level; + } + if (!invalid && aggr_level < 0) + { + aggr_level= nest_level; + aggr_sel= curr_sel; + } + /* + By this moment we either found a subquery where the set function is + to be aggregated and assigned a value that is >= 0 to aggr_level, + or set the value of 'invalid' to TRUE to report later an error. + */ + /* + Additionally we have to check whether possible nested set functions + are acceptable here: they are not, if the level of aggregation of + some of them is less than aggr_level. + */ + if (!invalid) + invalid= aggr_level <= max_sum_func_level; + if (invalid) + { + my_message(ER_INVALID_GROUP_FUNC_USE, + ER_THD(thd, ER_INVALID_GROUP_FUNC_USE), + MYF(0)); + return TRUE; + } + + if (in_sum_func) + { + /* + If the set function is nested adjust the value of + max_sum_func_level for the nesting set function. + We take into account only enclosed set functions that are to be + aggregated on the same level or above of the nest level of + the enclosing set function. + But we must always pass up the max_sum_func_level because it is + the maximum nested level of all directly and indirectly enclosed + set functions. We must do that even for set functions that are + aggregated inside of their enclosing set function's nest level + because the enclosing function may contain another enclosing + function that is to be aggregated outside or on the same level + as its parent's nest level. + */ + if (in_sum_func->nest_level >= aggr_level) + set_if_bigger(in_sum_func->max_sum_func_level, aggr_level); + set_if_bigger(in_sum_func->max_sum_func_level, max_sum_func_level); + } + + /* + Check that non-aggregated fields and sum functions aren't mixed in the + same select in the ONLY_FULL_GROUP_BY mode. + */ + if (outer_fields.elements) + { + Item_field *field; + /* + Here we compare the nesting level of the select to which an outer field + belongs to with the aggregation level of the sum function. All fields in + the outer_fields list are checked. + + If the nesting level is equal to the aggregation level then the field is + aggregated by this sum function. + If the nesting level is less than the aggregation level then the field + belongs to an outer select. In this case if there is an embedding sum + function add current field to functions outer_fields list. If there is + no embedding function then the current field treated as non aggregated + and the select it belongs to is marked accordingly. + If the nesting level is greater than the aggregation level then it means + that this field was added by an inner sum function. + Consider an example: + + select avg ( <-- we are here, checking outer.f1 + select ( + select sum(outer.f1 + inner.f1) from inner + ) from outer) + from most_outer; + + In this case we check that no aggregate functions are used in the + select the field belongs to. If there are some then an error is + raised. + */ + List_iterator of(outer_fields); + while ((field= of++)) + { + SELECT_LEX *sel= field->field->table->pos_in_table_list->select_lex; + if (sel->nest_level < aggr_level) + { + if (in_sum_func) + { + /* + Let upper function decide whether this field is a non + aggregated one. + */ + in_sum_func->outer_fields.push_back(field, thd->mem_root); + } + else + sel->set_non_agg_field_used(true); + } + if (sel->nest_level > aggr_level && + (sel->agg_func_used()) && + !sel->group_list.elements) + { + my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS, + ER_THD(thd, ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0)); + return TRUE; + } + } + } + aggr_sel->set_agg_func_used(true); + if (sum_func() == SP_AGGREGATE_FUNC) + aggr_sel->set_custom_agg_func_used(true); + update_used_tables(); + thd->lex->in_sum_func= in_sum_func; + return FALSE; +} + +/** + Attach a set function to the subquery where it must be aggregated. + + The function looks for an outer subquery where the set function must be + aggregated. If it finds such a subquery then aggr_level is set to + the nest level of this subquery and the item for the set function + is added to the list of set functions used in nested subqueries + inner_sum_func_list defined for each subquery. When the item is placed + there the field 'ref_by' is set to ref. + + @note + Now we 'register' only set functions that are aggregated in outer + subqueries. Actually it makes sense to link all set function for + a subquery in one chain. It would simplify the process of 'splitting' + for set functions. + + @param thd reference to the thread context info + @param ref location of the pointer to this item in the embedding expression + + @retval + FALSE if the executes without failures (currently always) + @retval + TRUE otherwise +*/ + +bool Item_sum::register_sum_func(THD *thd, Item **ref) +{ + SELECT_LEX *sl; + nesting_map allow_sum_func= thd->lex->allow_sum_func; + for (sl= thd->lex->current_select->context.outer_select() ; + sl && sl->nest_level > max_arg_level; + sl= sl->context.outer_select()) + { + if (aggr_level < 0 && + (allow_sum_func.is_set(sl->nest_level))) + { + /* Found the most nested subquery where the function can be aggregated */ + aggr_level= sl->nest_level; + aggr_sel= sl; + } + } + if (sl && (allow_sum_func.is_set(sl->nest_level))) + { + /* + We reached the subquery of level max_arg_level and checked + that the function can be aggregated here. + The set function will be aggregated in this subquery. + */ + aggr_level= sl->nest_level; + aggr_sel= sl; + + } + if (aggr_level >= 0) + { + ref_by= ref; + /* Add the object to the list of registered objects assigned to aggr_sel */ + if (!aggr_sel->inner_sum_func_list) + next= this; + else + { + next= aggr_sel->inner_sum_func_list->next; + aggr_sel->inner_sum_func_list->next= this; + } + aggr_sel->inner_sum_func_list= this; + aggr_sel->with_sum_func= 1; + + /* + Mark Item_subselect(s) as containing aggregate function all the way up + to aggregate function's calculation context. + Note that we must not mark the Item of calculation context itself + because with_sum_func on the calculation context st_select_lex is + already set above. + + with_sum_func being set for an Item means that this Item refers + (somewhere in it, e.g. one of its arguments if it's a function) directly + or through intermediate items to an aggregate function that is calculated + in a context "outside" of the Item (e.g. in the current or outer select). + + with_sum_func being set for an st_select_lex means that this st_select_lex + has aggregate functions directly referenced (i.e. not through a sub-select). + */ + for (sl= thd->lex->current_select; + sl && sl != aggr_sel && sl->master_unit()->item; + sl= sl->master_unit()->outer_select() ) + sl->master_unit()->item->with_flags|= item_with_t::SUM_FUNC; + } + if (aggr_sel) + thd->lex->current_select->mark_as_dependent(thd, aggr_sel, NULL); + + if ((thd->lex->describe & DESCRIBE_EXTENDED) && aggr_sel) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_AGGFUNC_DEPENDENCE, + ER_THD(thd, ER_WARN_AGGFUNC_DEPENDENCE), + func_name(), + thd->lex->current_select->select_number, + aggr_sel->select_number); + } + return FALSE; +} + + +bool Item_sum::collect_outer_ref_processor(void *param) +{ + Collect_deps_prm *prm= (Collect_deps_prm *)param; + SELECT_LEX *ds; + if ((ds= depended_from()) && + ds->nest_level_base == prm->nest_level_base && + ds->nest_level < prm->nest_level) + { + if (prm->collect) + prm->parameters->add_unique(this, &cmp_items); + else + prm->count++; + } + return FALSE; +} + + +Item_sum::Item_sum(THD *thd, List &list): Item_func_or_sum(thd, list) +{ + if (!(orig_args= (Item **) thd->alloc(sizeof(Item *) * arg_count))) + { + args= NULL; + } + mark_as_sum_func(); + init_aggregator(); + list.empty(); // Fields are used +} + + +/** + Constructor used in processing select with temporary tebles. +*/ + +Item_sum::Item_sum(THD *thd, Item_sum *item): + Item_func_or_sum(thd, item), + aggr_sel(item->aggr_sel), + nest_level(item->nest_level), aggr_level(item->aggr_level), + quick_group(item->quick_group), + orig_args(NULL) +{ + if (arg_count <= 2) + { + orig_args=tmp_orig_args; + } + else + { + if (!(orig_args= (Item**) thd->alloc(sizeof(Item*)*arg_count))) + return; + } + if (arg_count) + memcpy(orig_args, item->orig_args, sizeof(Item*)*arg_count); + init_aggregator(); + with_distinct= item->with_distinct; + if (item->aggr) + set_aggregator(thd, item->aggr->Aggrtype()); +} + + +void Item_sum::mark_as_sum_func() +{ + SELECT_LEX *cur_select= current_thd->lex->current_select; + cur_select->n_sum_items++; + cur_select->with_sum_func= 1; + const_item_cache= false; + with_flags= (with_flags | item_with_t::SUM_FUNC) & ~item_with_t::FIELD; + window_func_sum_expr_flag= false; +} + + +void Item_sum::print(String *str, enum_query_type query_type) +{ + /* orig_args is not filled with valid values until fix_fields() */ + Item **pargs= fixed() ? orig_args : args; + str->append(func_name_cstring()); + /* + TODO: + The fact that func_name() may return a name with an extra '(' + is really annoying. This shoud be fixed. + */ + if (!is_aggr_sum_func()) + str->append('('); + for (uint i=0 ; i < arg_count ; i++) + { + if (i) + str->append(','); + pargs[i]->print(str, query_type); + } + str->append(')'); +} + +void Item_sum::fix_num_length_and_dec() +{ + decimals=0; + for (uint i=0 ; i < arg_count ; i++) + set_if_bigger(decimals,args[i]->decimals); + max_length=float_length(decimals); +} + +Item *Item_sum::get_tmp_table_item(THD *thd) +{ + Item_sum* sum_item= (Item_sum *) copy_or_same(thd); + if (sum_item && sum_item->result_field) // If not a const sum func + { + Field *result_field_tmp= sum_item->result_field; + for (uint i=0 ; i < sum_item->arg_count ; i++) + { + Item *arg= sum_item->args[i]; + if (!arg->const_item()) + { + if (arg->type() == Item::FIELD_ITEM) + { + ((Item_field*) arg)->field= result_field_tmp++; + } + else + { + auto item_field= + new (thd->mem_root) Item_field(thd, result_field_tmp++); + if (item_field) + item_field->set_refers_to_temp_table(); + sum_item->args[i]= item_field; + } + } + } + } + return sum_item; +} + + +void Item_sum::update_used_tables () +{ + if (!Item_sum::const_item()) + { + used_tables_cache= 0; + for (uint i=0 ; i < arg_count ; i++) + { + args[i]->update_used_tables(); + used_tables_cache|= args[i]->used_tables(); + } + /* + MariaDB: don't run the following { + + used_tables_cache&= PSEUDO_TABLE_BITS; + + // the aggregate function is aggregated into its local context + used_tables_cache|= ((table_map)1 << aggr_sel->join->tables) - 1; + + } because if we do it, table elimination will assume that + - constructs like "COUNT(*)" use columns from all tables + - so, it is not possible to eliminate any table + our solution for COUNT(*) is that it has + item->used_tables() == 0 && !item->const_item() + */ + } +} + + +Item *Item_sum::set_arg(uint i, THD *thd, Item *new_val) +{ + thd->change_item_tree(args + i, new_val); + return new_val; +} + + +int Item_sum::set_aggregator(THD *thd, Aggregator::Aggregator_type aggregator) +{ + /* + Dependent subselects may be executed multiple times, making + set_aggregator to be called multiple times. The aggregator type + will be the same, but it needs to be reset so that it is + reevaluated with the new dependent data. + This function may also be called multiple times during query optimization. + In this case, the type may change, so we delete the old aggregator, + and create a new one. + */ + if (aggr && aggregator == aggr->Aggrtype()) + { + aggr->clear(); + return FALSE; + } + + delete aggr; + switch (aggregator) + { + case Aggregator::DISTINCT_AGGREGATOR: + aggr= new (thd->mem_root) Aggregator_distinct(this); + break; + case Aggregator::SIMPLE_AGGREGATOR: + aggr= new (thd->mem_root) Aggregator_simple(this); + break; + }; + return aggr ? FALSE : TRUE; +} + + +void Item_sum::cleanup() +{ + if (aggr) + { + delete aggr; + aggr= NULL; + } + Item_result_field::cleanup(); + const_item_cache= false; +} + +Item *Item_sum::result_item(THD *thd, Field *field) +{ + return new (thd->mem_root) Item_field(thd, field); +} + +bool Item_sum::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function(func_name(), + is_aggr_sum_func() ? ")" : "()", + arg, VCOL_IMPOSSIBLE); +} + + +/** + Compare keys consisting of single field that cannot be compared as binary. + + Used by the Unique class to compare keys. Will do correct comparisons + for all field types. + + @param arg Pointer to the relevant Field class instance + @param key1 left key image + @param key2 right key image + @return comparison result + @retval < 0 if key1 < key2 + @retval = 0 if key1 = key2 + @retval > 0 if key1 > key2 +*/ + +int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2) +{ + Field *f= (Field*) arg; + return f->cmp(key1, key2); +} + + +C_MODE_START + +int count_distinct_walk(void *elem, element_count count, void *arg) +{ + (*((ulonglong*)arg))++; + return 0; +} + +C_MODE_END + + +/** + Correctly compare composite keys. + + Used by the Unique class to compare keys. Will do correct comparisons + for composite keys with various field types. + + @param arg Pointer to the relevant Aggregator_distinct instance + @param key1 left key image + @param key2 right key image + @return comparison result + @retval <0 if key1 < key2 + @retval =0 if key1 = key2 + @retval >0 if key1 > key2 +*/ + +int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2) +{ + Aggregator_distinct *aggr= (Aggregator_distinct *) arg; + Field **field = aggr->table->field; + Field **field_end= field + aggr->table->s->fields; + uint32 *lengths=aggr->field_lengths; + for (; field < field_end; ++field) + { + Field* f = *field; + int len = *lengths++; + int res = f->cmp(key1, key2); + if (res) + return res; + key1 += len; + key2 += len; + } + return 0; +} + + +/***************************************************************************/ + +C_MODE_START + +/* Declarations for auxiliary C-callbacks */ + +int simple_raw_key_cmp(void* arg, const void* key1, const void* key2) +{ + return memcmp(key1, key2, *(uint *) arg); +} + + +static int item_sum_distinct_walk_for_count(void *element, + element_count num_of_dups, + void *item) +{ + return ((Aggregator_distinct*) (item))->unique_walk_function_for_count(element); +} + + +static int item_sum_distinct_walk(void *element, element_count num_of_dups, + void *item) +{ + return ((Aggregator_distinct*) (item))->unique_walk_function(element); +} + +C_MODE_END + +/***************************************************************************/ +/** + Called before feeding the first row. Used to allocate/setup + the internal structures used for aggregation. + + @param thd Thread descriptor + @return status + @retval FALSE success + @retval TRUE failure + + Prepares Aggregator_distinct to process the incoming stream. + Creates the temporary table and the Unique class if needed. + Called by Item_sum::aggregator_setup() +*/ + +bool Aggregator_distinct::setup(THD *thd) +{ + endup_done= FALSE; + /* + Setup can be called twice for ROLLUP items. This is a bug. + Please add DBUG_ASSERT(tree == 0) here when it's fixed. + */ + if (tree || table || tmp_table_param) + return FALSE; + + if (item_sum->setup(thd)) + return TRUE; + if (item_sum->sum_func() == Item_sum::COUNT_FUNC || + item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + List list; + SELECT_LEX *select_lex= thd->lex->current_select; + + if (!(tmp_table_param= new (thd->mem_root) TMP_TABLE_PARAM)) + return TRUE; + + /* Create a table with an unique key over all parameters */ + for (uint i=0; i < item_sum->get_arg_count() ; i++) + { + Item *item=item_sum->get_arg(i); + if (list.push_back(item, thd->mem_root)) + return TRUE; // End of memory + if (item->const_item() && item->is_null()) + always_null= true; + } + if (always_null) + return FALSE; + count_field_types(select_lex, tmp_table_param, list, 0); + tmp_table_param->force_copy_fields= item_sum->has_force_copy_fields(); + DBUG_ASSERT(table == 0); + /* + Convert bit fields to bigint's in temporary table. + Needed by Unique which is used when HEAP table is used. + */ + store_bit_fields_as_bigint_in_tempory_table(&list); + + if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1, + 0, + (select_lex->options | + thd->variables.option_bits), + HA_POS_ERROR, &empty_clex_str))) + return TRUE; + table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows + table->no_rows=1; + + if (table->s->db_type() == heap_hton) + { + /* + No blobs, otherwise it would have been MyISAM: set up a compare + function and its arguments to use with Unique. + */ + qsort_cmp2 compare_key; + void* cmp_arg; + Field **field= table->field; + Field **field_end= field + table->s->fields; + bool all_binary= TRUE; + + for (tree_key_length= 0; field < field_end; ++field) + { + Field *f= *field; + enum enum_field_types type= f->type(); + tree_key_length+= f->pack_length(); + if ((type == MYSQL_TYPE_VARCHAR) || + (!f->binary() && (type == MYSQL_TYPE_STRING || + type == MYSQL_TYPE_VAR_STRING))) + { + all_binary= FALSE; + break; + } + } + if (all_binary) + { + cmp_arg= (void*) &tree_key_length; + compare_key= (qsort_cmp2) simple_raw_key_cmp; + } + else + { + if (table->s->fields == 1) + { + /* + If we have only one field, which is the most common use of + count(distinct), it is much faster to use a simpler key + compare method that can take advantage of not having to worry + about other fields. + */ + compare_key= (qsort_cmp2) simple_str_key_cmp; + cmp_arg= (void*) table->field[0]; + /* tree_key_length has been set already */ + } + else + { + uint32 *length; + compare_key= (qsort_cmp2) composite_key_cmp; + cmp_arg= (void*) this; + field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32)); + for (tree_key_length= 0, length= field_lengths, field= table->field; + field < field_end; ++field, ++length) + { + *length= (*field)->pack_length(); + tree_key_length+= *length; + } + } + } + DBUG_ASSERT(tree == 0); + tree= (new (thd->mem_root) + Unique(compare_key, cmp_arg, tree_key_length, + item_sum->ram_limitation(thd))); + /* + The only time tree_key_length could be 0 is if someone does + count(distinct) on a char(0) field - stupid thing to do, + but this has to be handled - otherwise someone can crash + the server with a DoS attack + */ + if (! tree) + return TRUE; + } + return FALSE; + } + else + { + Item *arg; + DBUG_ENTER("Aggregator_distinct::setup"); + /* It's legal to call setup() more than once when in a subquery */ + if (tree) + DBUG_RETURN(FALSE); + + /* + Virtual table and the tree are created anew on each re-execution of + PS/SP. Hence all further allocations are performed in the runtime + mem_root. + */ + + item_sum->null_value= 1; + item_sum->set_maybe_null(); + item_sum->quick_group= 0; + + DBUG_ASSERT(item_sum->get_arg(0)->fixed()); + + arg= item_sum->get_arg(0); + if (arg->const_item()) + { + (void) arg->is_null(); + if (arg->null_value) + always_null= true; + } + + if (always_null) + DBUG_RETURN(FALSE); + + Field *field= arg->type_handler()-> + make_num_distinct_aggregator_field(thd->mem_root, arg); + if (!field || !(table= create_virtual_tmp_table(thd, field))) + DBUG_RETURN(TRUE); + + /* XXX: check that the case of CHAR(0) works OK */ + tree_key_length= table->s->reclength - table->s->null_bytes; + + /* + Unique handles all unique elements in a tree until they can't fit + in. Then the tree is dumped to the temporary file. We can use + simple_raw_key_cmp because the table contains numbers only; decimals + are converted to binary representation as well. + */ + tree= (new (thd->mem_root) + Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length, + item_sum->ram_limitation(thd))); + + DBUG_RETURN(tree == 0); + } +} + + +/** + Invalidate calculated value and clear the distinct rows. + + Frees space used by the internal data structures. + Removes the accumulated distinct rows. Invalidates the calculated result. +*/ + +void Aggregator_distinct::clear() +{ + endup_done= FALSE; + item_sum->clear(); + if (tree) + tree->reset(); + /* tree and table can be both null only if always_null */ + if (item_sum->sum_func() == Item_sum::COUNT_FUNC || + item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + if (!tree && table) + { + table->file->extra(HA_EXTRA_NO_CACHE); + table->file->ha_delete_all_rows(); + table->file->extra(HA_EXTRA_WRITE_CACHE); + } + } + else + { + item_sum->null_value= 1; + } +} + + +/** + Process incoming row. + + Add it to Unique/temp hash table if it's unique. Skip the row if + not unique. + Prepare Aggregator_distinct to process the incoming stream. + Create the temporary table and the Unique class if needed. + Called by Item_sum::aggregator_add(). + To actually get the result value in item_sum's buffers + Aggregator_distinct::endup() must be called. + + @return status + @retval FALSE success + @retval TRUE failure +*/ + +bool Aggregator_distinct::add() +{ + if (always_null) + return 0; + + if (item_sum->sum_func() == Item_sum::COUNT_FUNC || + item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + int error; + copy_fields(tmp_table_param); + if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) + return TRUE; + + for (Field **field=table->field ; *field ; field++) + if ((*field)->is_real_null(0)) + return 0; // Don't count NULL + + if (tree) + { + /* + The first few bytes of record (at least one) are just markers + for deleted and NULLs. We want to skip them since they will + bloat the tree without providing any valuable info. Besides, + key_length used to initialize the tree didn't include space for them. + */ + return tree->unique_add(table->record[0] + table->s->null_bytes); + } + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) && + table->file->is_fatal_error(error, HA_CHECK_DUP)) + return TRUE; + return FALSE; + } + else + { + item_sum->get_arg(0)->save_in_field(table->field[0], FALSE); + if (table->field[0]->is_null()) + return 0; + DBUG_ASSERT(tree); + item_sum->null_value= 0; + /* + '0' values are also stored in the tree. This doesn't matter + for SUM(DISTINCT), but is important for AVG(DISTINCT) + */ + return tree->unique_add(table->field[0]->ptr); + } +} + + +/** + Calculate the aggregate function value. + + Since Distinct_aggregator::add() just collects the distinct rows, + we must go over the distinct rows and feed them to the aggregation + function before returning its value. + This is what endup () does. It also sets the result validity flag + endup_done to TRUE so it will not recalculate the aggregate value + again if the Item_sum hasn't been reset. +*/ + +void Aggregator_distinct::endup() +{ + /* prevent consecutive recalculations */ + if (endup_done) + return; + + /* we are going to calculate the aggregate value afresh */ + item_sum->clear(); + + /* The result will definitely be null : no more calculations needed */ + if (always_null) + return; + + if (item_sum->sum_func() == Item_sum::COUNT_FUNC || + item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + DBUG_ASSERT(item_sum->fixed()); + Item_sum_count *sum= (Item_sum_count *)item_sum; + if (tree && tree->elements == 0) + { + /* everything fits in memory */ + sum->count= (longlong) tree->elements_in_tree(); + endup_done= TRUE; + } + if (!tree) + { + /* there were blobs */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + sum->count= table->file->stats.records; + endup_done= TRUE; + } + } + + /* + We don't have a tree only if 'setup()' hasn't been called; + this is the case of sql_executor.cc:return_zero_rows. + */ + if (tree && !endup_done) + { + /* + All tree's values are not NULL. + Note that value of field is changed as we walk the tree, in + Aggregator_distinct::unique_walk_function, but it's always not NULL. + */ + table->field[0]->set_notnull(); + /* go over the tree of distinct keys and calculate the aggregate value */ + use_distinct_values= TRUE; + tree_walk_action func; + if (item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + func= item_sum_distinct_walk_for_count; + else + func= item_sum_distinct_walk; + tree->walk(table, func, (void*) this); + use_distinct_values= FALSE; + } + /* prevent consecutive recalculations */ + endup_done= TRUE; +} + + +String * +Item_sum_int::val_str(String *str) +{ + return val_string_from_int(str); +} + + +my_decimal *Item_sum_int::val_decimal(my_decimal *decimal_value) +{ + return val_decimal_from_int(decimal_value); +} + + +bool +Item_sum_num::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + + if (init_sum_func_check(thd)) + return TRUE; + + decimals=0; + set_maybe_null(sum_func() != COUNT_FUNC); + for (uint i=0 ; i < arg_count ; i++) + { + if (args[i]->fix_fields_if_needed_for_scalar(thd, &args[i])) + return TRUE; + set_if_bigger(decimals, args[i]->decimals); + /* We should ignore FIELD's in arguments to sum functions */ + with_flags|= (args[i]->with_flags & ~item_with_t::FIELD); + } + result_field=0; + max_length=float_length(decimals); + null_value=1; + if (fix_length_and_dec(thd) || + check_sum_func(thd, ref)) + return TRUE; + + if (arg_count) + memcpy (orig_args, args, sizeof (Item *) * arg_count); + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +bool +Item_sum_min_max::fix_fields(THD *thd, Item **ref) +{ + DBUG_ENTER("Item_sum_min_max::fix_fields"); + DBUG_ASSERT(fixed() == 0); + + if (init_sum_func_check(thd)) + DBUG_RETURN(TRUE); + + // 'item' can be changed during fix_fields + if (args[0]->fix_fields_if_needed_for_scalar(thd, &args[0])) + DBUG_RETURN(TRUE); + + /* We should ignore FIELD's in arguments to sum functions */ + with_flags|= (args[0]->with_flags & ~item_with_t::FIELD); + if (fix_length_and_dec(thd)) + DBUG_RETURN(TRUE); + + if (!is_window_func_sum_expr()) + setup_hybrid(thd, args[0], NULL); + result_field=0; + + if (check_sum_func(thd, ref)) + DBUG_RETURN(TRUE); + + orig_args[0]= args[0]; + base_flags|= item_base_t::FIXED; + DBUG_RETURN(FALSE); +} + + +bool Item_sum_hybrid::fix_length_and_dec_generic() +{ + Item *item= arguments()[0]; + Type_std_attributes::set(item); + set_handler(item->type_handler()); + return false; +} + + +/** + MAX/MIN for the traditional numeric types preserve the exact data type + from Fields, but do not preserve the exact type from Items: + MAX(float_field) -> FLOAT + MAX(smallint_field) -> LONGLONG + MAX(COALESCE(float_field)) -> DOUBLE + MAX(COALESCE(smallint_field)) -> LONGLONG + QQ: Items should probably be fixed to preserve the exact type. +*/ +bool Item_sum_hybrid::fix_length_and_dec_numeric(const Type_handler *handler) +{ + Item *item= arguments()[0]; + Item *item2= item->real_item(); + Type_std_attributes::set(item); + if (item2->type() == Item::FIELD_ITEM) + set_handler(item2->type_handler()); + else + set_handler(handler); + return false; +} + + +/** + MAX(str_field) converts ENUM/SET to CHAR, and preserve all other types + for Fields. + QQ: This works differently from UNION, which preserve the exact data + type for ENUM/SET if the joined ENUM/SET fields are equally defined. + Perhaps should be fixed. + MAX(str_item) chooses the best suitable string type. +*/ +bool Item_sum_hybrid::fix_length_and_dec_string() +{ + Item *item= arguments()[0]; + Item *item2= item->real_item(); + Type_std_attributes::set(item); + if (item2->type() == Item::FIELD_ITEM) + { + // Fields: convert ENUM/SET to CHAR, preserve the type otherwise. + set_handler(item->type_handler()); + } + else + { + // Items: choose VARCHAR/BLOB/MEDIUMBLOB/LONGBLOB, depending on length. + set_handler(type_handler_varchar. + type_handler_adjusted_to_max_octet_length(max_length, + collation.collation)); + } + return false; +} + + +bool Item_sum_min_max::fix_length_and_dec(THD *thd) +{ + DBUG_ASSERT(args[0]->field_type() == args[0]->real_item()->field_type()); + DBUG_ASSERT(args[0]->result_type() == args[0]->real_item()->result_type()); + /* MIN/MAX can return NULL for empty set indepedent of the used column */ + set_maybe_null(); + null_value= true; + return args[0]->type_handler()->Item_sum_hybrid_fix_length_and_dec(this); +} + + +/** + MIN/MAX function setup. + + @param item argument of MIN/MAX function + @param value_arg calculated value of MIN/MAX function + + @details + Setup cache/comparator of MIN/MAX functions. When called by the + copy_or_same function value_arg parameter contains calculated value + of the original MIN/MAX object and it is saved in this object's cache. + + We mark the value and arg_cache with 'RAND_TABLE_BIT' to ensure + that Arg_comparator::compare_datetime() doesn't allocate new + item inside of Arg_comparator. This would cause compare_datetime() + and Item_sum_min::add() to use different values! +*/ + +void Item_sum_min_max::setup_hybrid(THD *thd, Item *item, Item *value_arg) +{ + DBUG_ENTER("Item_sum_min_max::setup_hybrid"); + if (!(value= item->get_cache(thd))) + DBUG_VOID_RETURN; + value->setup(thd, item); + value->store(value_arg); + /* Don't cache value, as it will change */ + if (!item->const_item()) + value->set_used_tables(RAND_TABLE_BIT); + if (!(arg_cache= item->get_cache(thd))) + DBUG_VOID_RETURN; + arg_cache->setup(thd, item); + /* Don't cache value, as it will change */ + if (!item->const_item()) + arg_cache->set_used_tables(RAND_TABLE_BIT); + cmp= new (thd->mem_root) Arg_comparator(); + if (cmp) + cmp->set_cmp_func(thd, this, (Item**)&arg_cache, (Item**)&value, FALSE); + DBUG_VOID_RETURN; +} + + +Field *Item_sum_min_max::create_tmp_field(MEM_ROOT *root, + bool group, TABLE *table) +{ + DBUG_ENTER("Item_sum_min_max::create_tmp_field"); + + if (args[0]->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field*) args[0])->field; + if ((field= field->create_tmp_field(root, table, true))) + { + DBUG_ASSERT((field->flags & NOT_NULL_FLAG) == 0); + field->field_name= name; + } + DBUG_RETURN(field); + } + DBUG_RETURN(tmp_table_field_from_field_type(root, table)); +} + +/*********************************************************************** +** Item_sum_sp class +***********************************************************************/ + +Item_sum_sp::Item_sum_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name_arg, sp_head *sp, List &list) + :Item_sum(thd, list), Item_sp(thd, context_arg, name_arg) +{ + set_maybe_null(); + quick_group= 0; + m_sp= sp; +} + +Item_sum_sp::Item_sum_sp(THD *thd, Name_resolution_context *context_arg, + sp_name *name_arg, sp_head *sp) + :Item_sum(thd), Item_sp(thd, context_arg, name_arg) +{ + set_maybe_null(); + quick_group= 0; + m_sp= sp; +} + +Item_sum_sp::Item_sum_sp(THD *thd, Item_sum_sp *item): + Item_sum(thd, item), Item_sp(thd, item) +{ + base_flags|= (item->base_flags & item_base_t::MAYBE_NULL); + quick_group= item->quick_group; +} + +bool +Item_sum_sp::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + if (init_sum_func_check(thd)) + return TRUE; + decimals= 0; + + m_sp= m_sp ? m_sp : sp_handler_function.sp_find_routine(thd, m_name, true); + + if (!m_sp) + { + my_missing_function_error(m_name->m_name, ErrConvDQName(m_name).ptr()); + process_error(thd); + return TRUE; + } + + if (init_result_field(thd, max_length, maybe_null(), &null_value, &name)) + return TRUE; + + for (uint i= 0 ; i < arg_count ; i++) + { + if (args[i]->fix_fields_if_needed_for_scalar(thd, &args[i])) + return TRUE; + set_if_bigger(decimals, args[i]->decimals); + /* We should ignore FIELD's in arguments to sum functions */ + with_flags|= (args[i]->with_flags & ~item_with_t::FIELD); + } + result_field= NULL; + max_length= float_length(decimals); + null_value= 1; + if (fix_length_and_dec(thd)) + return TRUE; + + if (check_sum_func(thd, ref)) + return TRUE; + + if (arg_count) + memcpy(orig_args, args, sizeof(Item *) * arg_count); + base_flags|= item_base_t::FIXED; + return FALSE; +} + +/** + Execute function to store value in result field. + This is called when we need the value to be returned for the function. + Here we send a signal in form of the server status that all rows have been + fetched and now we have to exit from the function with the return value. + @return Function returns error status. + @retval FALSE on success. + @retval TRUE if an error occurred. +*/ + +bool +Item_sum_sp::execute() +{ + THD *thd= current_thd; + bool res; + uint old_server_status= thd->server_status; + + /* + We set server status so we can send a signal to exit from the + function with the return value. + */ + + thd->server_status|= SERVER_STATUS_LAST_ROW_SENT; + res= Item_sp::execute(thd, &null_value, args, arg_count); + thd->server_status= old_server_status; + return res; +} + +/** + Handles the aggregation of the values. + @note: See class description for more details on how and why this is done. + @return The error state. + @retval FALSE on success. + @retval TRUE if an error occurred. +*/ + +bool +Item_sum_sp::add() +{ + return execute_impl(current_thd, args, arg_count); +} + + +void +Item_sum_sp::clear() +{ + delete func_ctx; + func_ctx= NULL; + sp_query_arena->free_items(); + free_root(&sp_mem_root, MYF(0)); +} + +const Type_handler *Item_sum_sp::type_handler() const +{ + DBUG_ENTER("Item_sum_sp::type_handler"); + DBUG_PRINT("info", ("m_sp = %p", (void *) m_sp)); + DBUG_ASSERT(sp_result_field); + // This converts ENUM/SET to STRING + const Type_handler *handler= sp_result_field->type_handler(); + DBUG_RETURN(handler->type_handler_for_item_field()); +} + +void +Item_sum_sp::cleanup() +{ + Item_sp::cleanup(); + Item_sum::cleanup(); +} + +/** + Initialize local members with values from the Field interface. + @note called from Item::fix_fields. +*/ + +bool +Item_sum_sp::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_sum_sp::fix_length_and_dec"); + DBUG_ASSERT(sp_result_field); + Type_std_attributes::set(sp_result_field->type_std_attributes()); + bool res= Item_sum::fix_length_and_dec(thd); + DBUG_RETURN(res); +} + +LEX_CSTRING Item_sum_sp::func_name_cstring() const +{ + return Item_sp::func_name_cstring(current_thd, false); +} + +Item* Item_sum_sp::copy_or_same(THD *thd) +{ + Item_sum_sp *copy_item= new (thd->mem_root) Item_sum_sp(thd, this); + copy_item->init_result_field(thd, max_length, maybe_null(), + ©_item->null_value, ©_item->name); + return copy_item; +} + +/*********************************************************************** +** reset and add of sum_func +***********************************************************************/ + +/** + @todo + check if the following assignments are really needed +*/ +Item_sum_sum::Item_sum_sum(THD *thd, Item_sum_sum *item) + :Item_sum_num(thd, item), + Type_handler_hybrid_field_type(item), + direct_added(FALSE), direct_reseted_field(FALSE), + curr_dec_buff(item->curr_dec_buff), + count(item->count) +{ + /* TODO: check if the following assignments are really needed */ + if (result_type() == DECIMAL_RESULT) + { + my_decimal2decimal(item->dec_buffs, dec_buffs); + my_decimal2decimal(item->dec_buffs + 1, dec_buffs + 1); + } + else + sum= item->sum; +} + +Item *Item_sum_sum::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_sum(thd, this); +} + + +void Item_sum_sum::cleanup() +{ + DBUG_ENTER("Item_sum_sum::cleanup"); + direct_added= direct_reseted_field= FALSE; + Item_sum_num::cleanup(); + DBUG_VOID_RETURN; +} + + +void Item_sum_sum::clear() +{ + DBUG_ENTER("Item_sum_sum::clear"); + null_value=1; + count= 0; + if (result_type() == DECIMAL_RESULT) + { + curr_dec_buff= 0; + my_decimal_set_zero(dec_buffs); + } + else + sum= 0.0; + DBUG_VOID_RETURN; +} + + +void Item_sum_sum::fix_length_and_dec_double() +{ + set_handler(&type_handler_double); // Change FLOAT to DOUBLE + decimals= args[0]->decimals; + sum= 0.0; +} + + +void Item_sum_sum::fix_length_and_dec_decimal() +{ + set_handler(&type_handler_newdecimal); // Change temporal to new DECIMAL + decimals= args[0]->decimals; + /* SUM result can't be longer than length(arg) + length(MAX_ROWS) */ + int precision= args[0]->decimal_precision() + DECIMAL_LONGLONG_DIGITS; + decimals= MY_MIN(decimals, DECIMAL_MAX_SCALE); + precision= MY_MIN(precision, DECIMAL_MAX_PRECISION); + max_length= my_decimal_precision_to_length_no_truncation(precision, + decimals, + unsigned_flag); + curr_dec_buff= 0; + my_decimal_set_zero(dec_buffs); +} + + +bool Item_sum_sum::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_sum_sum::fix_length_and_dec"); + set_maybe_null(); + null_value=1; + if (args[0]->cast_to_int_type_handler()-> + Item_sum_sum_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s (%d, %d)", type_handler()->name().ptr(), + max_length, (int) decimals)); + DBUG_RETURN(FALSE); +} + + +void Item_sum_sum::direct_add(my_decimal *add_sum_decimal) +{ + DBUG_ENTER("Item_sum_sum::direct_add"); + DBUG_PRINT("info", ("add_sum_decimal: %p", add_sum_decimal)); + direct_added= TRUE; + direct_reseted_field= FALSE; + if (add_sum_decimal) + { + direct_sum_is_null= FALSE; + direct_sum_decimal= *add_sum_decimal; + } + else + { + direct_sum_is_null= TRUE; + direct_sum_decimal= decimal_zero; + } + DBUG_VOID_RETURN; +} + + +void Item_sum_sum::direct_add(double add_sum_real, bool add_sum_is_null) +{ + DBUG_ENTER("Item_sum_sum::direct_add"); + DBUG_PRINT("info", ("add_sum_real: %f", add_sum_real)); + direct_added= TRUE; + direct_reseted_field= FALSE; + direct_sum_is_null= add_sum_is_null; + direct_sum_real= add_sum_real; + DBUG_VOID_RETURN; +} + + +bool Item_sum_sum::add() +{ + DBUG_ENTER("Item_sum_sum::add"); + add_helper(false); + DBUG_RETURN(0); +} + +void Item_sum_sum::add_helper(bool perform_removal) +{ + DBUG_ENTER("Item_sum_sum::add_helper"); + + if (result_type() == DECIMAL_RESULT) + { + if (unlikely(direct_added)) + { + /* Add value stored by Item_sum_sum::direct_add */ + DBUG_ASSERT(!perform_removal); + + direct_added= FALSE; + if (likely(!direct_sum_is_null)) + { + my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1), + &direct_sum_decimal, dec_buffs + curr_dec_buff); + curr_dec_buff^= 1; + null_value= 0; + } + } + else + { + direct_reseted_field= FALSE; + my_decimal value; + const my_decimal *val= aggr->arg_val_decimal(&value); + if (!aggr->arg_is_null(true)) + { + if (perform_removal) + { + if (count > 0) + { + my_decimal_sub(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff ^ 1), + dec_buffs + curr_dec_buff, val); + count--; + } + else + DBUG_VOID_RETURN; + } + else + { + count++; + my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff ^ 1), + val, dec_buffs + curr_dec_buff); + } + curr_dec_buff^= 1; + null_value= (count > 0) ? 0 : 1; + } + } + } + else + { + if (unlikely(direct_added)) + { + /* Add value stored by Item_sum_sum::direct_add */ + DBUG_ASSERT(!perform_removal); + + direct_added= FALSE; + if (!direct_sum_is_null) + { + sum+= direct_sum_real; + null_value= 0; + } + } + else + { + direct_reseted_field= FALSE; + if (perform_removal && count > 0) + sum-= aggr->arg_val_real(); + else + sum+= aggr->arg_val_real(); + if (!aggr->arg_is_null(true)) + { + if (perform_removal) + { + if (count > 0) + { + count--; + } + } + else + count++; + + null_value= (count > 0) ? 0 : 1; + } + } + } + DBUG_VOID_RETURN; +} + + +longlong Item_sum_sum::val_int() +{ + DBUG_ASSERT(fixed()); + if (aggr) + aggr->endup(); + if (result_type() == DECIMAL_RESULT) + return dec_buffs[curr_dec_buff].to_longlong(unsigned_flag); + return val_int_from_real(); +} + + +double Item_sum_sum::val_real() +{ + DBUG_ASSERT(fixed()); + if (aggr) + aggr->endup(); + if (result_type() == DECIMAL_RESULT) + sum= dec_buffs[curr_dec_buff].to_double(); + return sum; +} + + +String *Item_sum_sum::val_str(String *str) +{ + if (aggr) + aggr->endup(); + if (result_type() == DECIMAL_RESULT) + return VDec(this).to_string_round(str, decimals); + return val_string_from_real(str); +} + + +my_decimal *Item_sum_sum::val_decimal(my_decimal *val) +{ + if (aggr) + aggr->endup(); + if (result_type() == DECIMAL_RESULT) + return null_value ? NULL : (dec_buffs + curr_dec_buff); + return val_decimal_from_real(val); +} + +void Item_sum_sum::remove() +{ + DBUG_ENTER("Item_sum_sum::remove"); + add_helper(true); + DBUG_VOID_RETURN; +} + +/** + Aggregate a distinct row from the distinct hash table. + + Called for each row into the hash table 'Aggregator_distinct::table'. + Includes the current distinct row into the calculation of the + aggregate value. Uses the Field classes to get the value from the row. + This function is used for AVG/SUM(DISTINCT). For COUNT(DISTINCT) + it's called only when there are no blob arguments and the data don't + fit into memory (so Unique makes persisted trees on disk). + + @param element pointer to the row data. + + @return status + @retval FALSE success + @retval TRUE failure +*/ + +bool Aggregator_distinct::unique_walk_function(void *element) +{ + memcpy(table->field[0]->ptr, element, tree_key_length); + item_sum->add(); + return 0; +} + + +/* + A variant of unique_walk_function() that is to be used with Item_sum_count. + + COUNT is a special aggregate function: it doesn't need the values, it only + needs to count them. COUNT needs to know the values are not NULLs, but NULL + values are not put into the Unique, so we don't need to check for NULLs here. +*/ + +bool Aggregator_distinct::unique_walk_function_for_count(void *element) +{ + Item_sum_count *sum= (Item_sum_count *)item_sum; + sum->count++; + return 0; +} + + +Aggregator_distinct::~Aggregator_distinct() +{ + if (tree) + { + delete tree; + tree= NULL; + } + if (table) + { + free_tmp_table(table->in_use, table); + table=NULL; + } + if (tmp_table_param) + { + delete tmp_table_param; + tmp_table_param= NULL; + } +} + + +my_decimal *Aggregator_simple::arg_val_decimal(my_decimal *value) +{ + return item_sum->args[0]->val_decimal(value); +} + + +double Aggregator_simple::arg_val_real() +{ + return item_sum->args[0]->val_real(); +} + + +bool Aggregator_simple::arg_is_null(bool use_null_value) +{ + Item **item= item_sum->args; + const uint item_count= item_sum->arg_count; + if (use_null_value) + { + for (uint i= 0; i < item_count; i++) + { + if (item[i]->null_value) + return true; + } + } + else + { + for (uint i= 0; i < item_count; i++) + { + if (item[i]->maybe_null() && item[i]->is_null()) + return true; + } + } + return false; +} + + +my_decimal *Aggregator_distinct::arg_val_decimal(my_decimal * value) +{ + return use_distinct_values ? table->field[0]->val_decimal(value) : + item_sum->args[0]->val_decimal(value); +} + + +double Aggregator_distinct::arg_val_real() +{ + return use_distinct_values ? table->field[0]->val_real() : + item_sum->args[0]->val_real(); +} + + +bool Aggregator_distinct::arg_is_null(bool use_null_value) +{ + if (use_distinct_values) + { + const bool rc= table->field[0]->is_null(); + DBUG_ASSERT(!rc); // NULLs are never stored in 'tree' + return rc; + } + return use_null_value ? + item_sum->args[0]->null_value : + (item_sum->args[0]->maybe_null() && item_sum->args[0]->is_null()); +} + + +Item *Item_sum_count::copy_or_same(THD* thd) +{ + DBUG_ENTER("Item_sum_count::copy_or_same"); + DBUG_RETURN(new (thd->mem_root) Item_sum_count(thd, this)); +} + + +void Item_sum_count::direct_add(longlong add_count) +{ + DBUG_ENTER("Item_sum_count::direct_add"); + DBUG_PRINT("info", ("add_count: %lld", add_count)); + direct_counted= TRUE; + direct_reseted_field= FALSE; + direct_count= add_count; + DBUG_VOID_RETURN; +} + + +void Item_sum_count::clear() +{ + DBUG_ENTER("Item_sum_count::clear"); + count= 0; + DBUG_VOID_RETURN; +} + + +bool Item_sum_count::add() +{ + DBUG_ENTER("Item_sum_count::add"); + if (direct_counted) + { + direct_counted= FALSE; + count+= direct_count; + } + else + { + direct_reseted_field= FALSE; + if (aggr->arg_is_null(false)) + DBUG_RETURN(0); + count++; + } + DBUG_RETURN(0); +} + + +/* + Remove a row. This is used by window functions. +*/ + +void Item_sum_count::remove() +{ + DBUG_ASSERT(aggr->Aggrtype() == Aggregator::SIMPLE_AGGREGATOR); + if (aggr->arg_is_null(false)) + return; + if (count > 0) + count--; +} + +longlong Item_sum_count::val_int() +{ + DBUG_ENTER("Item_sum_count::val_int"); + DBUG_ASSERT(fixed()); + if (aggr) + aggr->endup(); + DBUG_RETURN((longlong)count); +} + + +void Item_sum_count::cleanup() +{ + DBUG_ENTER("Item_sum_count::cleanup"); + count= 0; + direct_counted= FALSE; + direct_reseted_field= FALSE; + Item_sum_int::cleanup(); + DBUG_VOID_RETURN; +} + + +/* + Average +*/ + +void Item_sum_avg::fix_length_and_dec_decimal() +{ + Item_sum_sum::fix_length_and_dec_decimal(); + int precision= args[0]->decimal_precision() + prec_increment; + decimals= MY_MIN(args[0]->decimal_scale() + prec_increment, DECIMAL_MAX_SCALE); + max_length= my_decimal_precision_to_length_no_truncation(precision, + decimals, + unsigned_flag); + f_precision= MY_MIN(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION); + f_scale= args[0]->decimal_scale(); + dec_bin_size= my_decimal_get_binary_size(f_precision, f_scale); +} + + +void Item_sum_avg::fix_length_and_dec_double() +{ + Item_sum_sum::fix_length_and_dec_double(); + decimals= MY_MIN(args[0]->decimals + prec_increment, + FLOATING_POINT_DECIMALS); + max_length= MY_MIN(args[0]->max_length + prec_increment, float_length(decimals)); +} + + +bool Item_sum_avg::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_sum_avg::fix_length_and_dec"); + prec_increment= current_thd->variables.div_precincrement; + set_maybe_null(); + null_value=1; + if (args[0]->cast_to_int_type_handler()-> + Item_sum_avg_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s (%d, %d)", type_handler()->name().ptr(), + max_length, (int) decimals)); + DBUG_RETURN(FALSE); +} + + +Item *Item_sum_avg::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_avg(thd, this); +} + + +Field *Item_sum_avg::create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) +{ + + if (group) + { + /* + We must store both value and counter in the temporary table in one field. + The easiest way is to do this is to store both value in a string + and unpack on access. + */ + Field *field= new (root) + Field_string(((result_type() == DECIMAL_RESULT) ? + dec_bin_size : sizeof(double)) + sizeof(longlong), + 0, &name, &my_charset_bin); + if (field) + field->init(table); + return field; + } + return tmp_table_field_from_field_type(root, table); +} + + +void Item_sum_avg::clear() +{ + Item_sum_sum::clear(); + count=0; +} + + +bool Item_sum_avg::add() +{ + if (Item_sum_sum::add()) + return TRUE; + if (!aggr->arg_is_null(true)) + count++; + return FALSE; +} + +void Item_sum_avg::remove() +{ + Item_sum_sum::remove(); + if (!aggr->arg_is_null(true)) + { + if (count > 0) + count--; + } +} + +double Item_sum_avg::val_real() +{ + DBUG_ASSERT(fixed()); + if (aggr) + aggr->endup(); + if (!count) + { + null_value=1; + return 0.0; + } + return Item_sum_sum::val_real() / ulonglong2double(count); +} + + +my_decimal *Item_sum_avg::val_decimal(my_decimal *val) +{ + my_decimal cnt; + const my_decimal *sum_dec; + DBUG_ASSERT(fixed()); + if (aggr) + aggr->endup(); + if (!count) + { + null_value=1; + return NULL; + } + + /* + For non-DECIMAL result_type() the division will be done in + Item_sum_avg::val_real(). + */ + if (result_type() != DECIMAL_RESULT) + return val_decimal_from_real(val); + + sum_dec= dec_buffs + curr_dec_buff; + int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &cnt); + my_decimal_div(E_DEC_FATAL_ERROR, val, sum_dec, &cnt, prec_increment); + return val; +} + + +String *Item_sum_avg::val_str(String *str) +{ + if (aggr) + aggr->endup(); + if (result_type() == DECIMAL_RESULT) + return VDec(this).to_string_round(str, decimals); + return val_string_from_real(str); +} + + +/* + Standard deviation +*/ + +double Item_sum_std::val_real() +{ + DBUG_ASSERT(fixed()); + double nr= Item_sum_variance::val_real(); + if (std::isnan(nr)) + { + /* + variance_fp_recurrence_next() can overflow in some cases and return "nan": + + CREATE OR REPLACE TABLE t1 (a DOUBLE); + INSERT INTO t1 VALUES (1.7e+308), (-1.7e+308), (0); + SELECT STDDEV_SAMP(a) FROM t1; + */ + null_value= true; // Convert "nan" to NULL + return 0; + } + if (std::isinf(nr)) + return DBL_MAX; + DBUG_ASSERT(nr >= 0.0); + return sqrt(nr); +} + +Item *Item_sum_std::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_std(thd, this); +} + + +Item *Item_sum_std::result_item(THD *thd, Field *field) +{ + return new (thd->mem_root) Item_std_field(thd, this); +} + + +/* + Variance +*/ + + +/** + Variance implementation for floating-point implementations, without + catastrophic cancellation, from Knuth's _TAoCP_, 3rd ed, volume 2, pg232. + This alters the value at m, s, and increments count. +*/ + +/* + These two functions are used by the Item_sum_variance and the + Item_variance_field classes, which are unrelated, and each need to calculate + variance. The difference between the two classes is that the first is used + for a mundane SELECT, while the latter is used in a GROUPing SELECT. +*/ +void Stddev::recurrence_next(double nr) +{ + if (!m_count++) + { + DBUG_ASSERT(m_m == 0); + DBUG_ASSERT(m_s == 0); + m_m= nr; + } + else + { + double m_kminusone= m_m; + volatile double diff= nr - m_kminusone; + m_m= m_kminusone + diff / (double) m_count; + m_s= m_s + diff * (nr - m_m); + } +} + + +double Stddev::result(bool is_sample_variance) +{ + if (m_count == 1) + return 0.0; + + if (is_sample_variance) + return m_s / (m_count - 1); + + /* else, is a population variance */ + return m_s / m_count; +} + + +Item_sum_variance::Item_sum_variance(THD *thd, Item_sum_variance *item): + Item_sum_double(thd, item), + m_stddev(item->m_stddev), sample(item->sample), + prec_increment(item->prec_increment) +{ } + + +void Item_sum_variance::fix_length_and_dec_double() +{ + DBUG_ASSERT(Item_sum_variance::type_handler() == &type_handler_double); + decimals= MY_MIN(args[0]->decimals + 4, FLOATING_POINT_DECIMALS); +} + + +void Item_sum_variance::fix_length_and_dec_decimal() +{ + DBUG_ASSERT(Item_sum_variance::type_handler() == &type_handler_double); + int precision= args[0]->decimal_precision() * 2 + prec_increment; + decimals= MY_MIN(args[0]->decimals + prec_increment, + FLOATING_POINT_DECIMALS - 1); + max_length= my_decimal_precision_to_length_no_truncation(precision, + decimals, + unsigned_flag); +} + + +bool Item_sum_variance::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_sum_variance::fix_length_and_dec"); + set_maybe_null(); + null_value= 1; + prec_increment= current_thd->variables.div_precincrement; + + /* + According to the SQL2003 standard (Part 2, Foundations; sec 10.9, + aggregate function; paragraph 7h of Syntax Rules), "the declared + type of the result is an implementation-defined approximate numeric + type. + */ + if (args[0]->type_handler()->Item_sum_variance_fix_length_and_dec(this)) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("Type: %s (%d, %d)", type_handler()->name().ptr(), + max_length, (int)decimals)); + DBUG_RETURN(FALSE); +} + + +Item *Item_sum_variance::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_variance(thd, this); +} + + +/** + Create a new field to match the type of value we're expected to yield. + If we're grouping, then we need some space to serialize variables into, to + pass around. +*/ +Field *Item_sum_variance::create_tmp_field(MEM_ROOT *root, + bool group, TABLE *table) +{ + Field *field; + if (group) + { + /* + We must store both value and counter in the temporary table in one field. + The easiest way is to do this is to store both value in a string + and unpack on access. + */ + field= new (root) Field_string(Stddev::binary_size(), 0, + &name, &my_charset_bin); + } + else + field= new (root) Field_double(max_length, maybe_null(), &name, decimals, + TRUE); + + if (field != NULL) + field->init(table); + + return field; +} + + +void Item_sum_variance::clear() +{ + m_stddev= Stddev(); +} + +bool Item_sum_variance::add() +{ + /* + Why use a temporary variable? We don't know if it is null until we + evaluate it, which has the side-effect of setting null_value . + */ + double nr= args[0]->val_real(); + + if (!args[0]->null_value) + m_stddev.recurrence_next(nr); + return 0; +} + +double Item_sum_variance::val_real() +{ + DBUG_ASSERT(fixed()); + + /* + 'sample' is a 1/0 boolean value. If it is 1/true, id est this is a sample + variance call, then we should set nullness when the count of the items + is one or zero. If it's zero, i.e. a population variance, then we only + set nullness when the count is zero. + + Another way to read it is that 'sample' is the numerical threshold, at and + below which a 'count' number of items is called NULL. + */ + DBUG_ASSERT((sample == 0) || (sample == 1)); + if (m_stddev.count() <= sample) + { + null_value=1; + return 0.0; + } + + null_value=0; + return m_stddev.result(sample); +} + + +void Item_sum_variance::reset_field() +{ + double nr; + uchar *res= result_field->ptr; + + nr= args[0]->val_real(); /* sets null_value as side-effect */ + + if (args[0]->null_value) + bzero(res,Stddev::binary_size()); + else + Stddev(nr).to_binary(res); +} + + +Stddev::Stddev(const uchar *ptr) +{ + float8get(m_m, ptr); + float8get(m_s, ptr + sizeof(double)); + m_count= sint8korr(ptr + sizeof(double) * 2); +} + + +void Stddev::to_binary(uchar *ptr) const +{ + /* Serialize format is (double)m, (double)s, (longlong)count */ + float8store(ptr, m_m); + float8store(ptr + sizeof(double), m_s); + ptr+= sizeof(double)*2; + int8store(ptr, m_count); +} + + +void Item_sum_variance::update_field() +{ + uchar *res=result_field->ptr; + + double nr= args[0]->val_real(); /* sets null_value as side-effect */ + + if (args[0]->null_value) + return; + + /* Serialize format is (double)m, (double)s, (longlong)count */ + Stddev field_stddev(res); + field_stddev.recurrence_next(nr); + field_stddev.to_binary(res); +} + + +Item *Item_sum_variance::result_item(THD *thd, Field *field) +{ + return new (thd->mem_root) Item_variance_field(thd, this); +} + +/* min & max */ + +void Item_sum_min_max::clear() +{ + DBUG_ENTER("Item_sum_min_max::clear"); + /* + We should not clear const items (from SELECT MIN(key) from t1) as then we would loose the + value cached in opt_sum_query() where we replace MIN/MAX/COUNT with constants. + */ + if (!const_item()) + { + value->clear(); + null_value= 1; + } + DBUG_VOID_RETURN; +} + + +bool +Item_sum_min_max::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return true; + bool retval= value->get_date(thd, ltime, fuzzydate); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == true); + return retval; +} + + +void Item_sum_min_max::direct_add(Item *item) +{ + DBUG_ENTER("Item_sum_min_max::direct_add"); + DBUG_PRINT("info", ("item: %p", item)); + direct_added= TRUE; + direct_item= item; + DBUG_VOID_RETURN; +} + + +double Item_sum_min_max::val_real() +{ + DBUG_ENTER("Item_sum_min_max::val_real"); + DBUG_ASSERT(fixed()); + if (null_value) + DBUG_RETURN(0.0); + double retval= value->val_real(); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == 0.0); + DBUG_RETURN(retval); +} + +longlong Item_sum_min_max::val_int() +{ + DBUG_ENTER("Item_sum_min_max::val_int"); + DBUG_ASSERT(fixed()); + if (null_value) + DBUG_RETURN(0); + longlong retval= value->val_int(); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == 0); + DBUG_RETURN(retval); +} + + +my_decimal *Item_sum_min_max::val_decimal(my_decimal *val) +{ + DBUG_ENTER("Item_sum_min_max::val_decimal"); + DBUG_ASSERT(fixed()); + if (null_value) + DBUG_RETURN(0); + my_decimal *retval= value->val_decimal(val); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == NULL); + DBUG_RETURN(retval); +} + + +String * +Item_sum_min_max::val_str(String *str) +{ + DBUG_ENTER("Item_sum_min_max::val_str"); + DBUG_ASSERT(fixed()); + if (null_value) + DBUG_RETURN(0); + String *retval= value->val_str(str); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == NULL); + DBUG_RETURN(retval); +} + + +bool Item_sum_min_max::val_native(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return true; + return val_native_from_item(thd, value, to); +} + + +void Item_sum_min_max::cleanup() +{ + DBUG_ENTER("Item_sum_min_max::cleanup"); + Item_sum::cleanup(); + if (cmp) + delete cmp; + cmp= 0; + /* + by default it is TRUE to avoid TRUE reporting by + Item_func_not_all/Item_func_nop_all if this item was never called. + + no_rows_in_result() set it to FALSE if was not results found. + If some results found it will be left unchanged. + */ + was_values= TRUE; + DBUG_VOID_RETURN; +} + +void Item_sum_min_max::no_rows_in_result() +{ + DBUG_ENTER("Item_sum_min_max::no_rows_in_result"); + /* We may be called here twice in case of ref field in function */ + if (was_values) + { + bool org_const_item_cache= const_item_cache; + was_values= FALSE; + was_null_value= value->null_value; + const_item_cache= 0; // Ensure that clear works on const items + clear(); + const_item_cache= org_const_item_cache; + } + DBUG_VOID_RETURN; +} + +void Item_sum_min_max::restore_to_before_no_rows_in_result() +{ + if (!was_values) + { + was_values= TRUE; + null_value= value->null_value= was_null_value; + } +} + + +Item *Item_sum_min::copy_or_same(THD* thd) +{ + DBUG_ENTER("Item_sum_min::copy_or_same"); + Item_sum_min *item= new (thd->mem_root) Item_sum_min(thd, this); + item->setup_hybrid(thd, args[0], value); + DBUG_RETURN(item); +} + + +bool Item_sum_min::add() +{ + Item *UNINIT_VAR(tmp_item); + DBUG_ENTER("Item_sum_min::add"); + DBUG_PRINT("enter", ("this: %p", this)); + + if (unlikely(direct_added)) + { + /* Change to use direct_item */ + tmp_item= arg_cache->get_item(); + arg_cache->store(direct_item); + } + DBUG_PRINT("info", ("null_value: %s", null_value ? "TRUE" : "FALSE")); + /* args[0] < value */ + arg_cache->cache_value(); + if (!arg_cache->null_value && + (null_value || cmp->compare() < 0)) + { + value->store(arg_cache); + value->cache_value(); + null_value= 0; + } + if (unlikely(direct_added)) + { + /* Restore original item */ + direct_added= FALSE; + arg_cache->store(tmp_item); + } + DBUG_RETURN(0); +} + + +Item *Item_sum_max::copy_or_same(THD* thd) +{ + Item_sum_max *item= new (thd->mem_root) Item_sum_max(thd, this); + item->setup_hybrid(thd, args[0], value); + return item; +} + + +bool Item_sum_max::add() +{ + Item * UNINIT_VAR(tmp_item); + DBUG_ENTER("Item_sum_max::add"); + DBUG_PRINT("enter", ("this: %p", this)); + + if (unlikely(direct_added)) + { + /* Change to use direct_item */ + tmp_item= arg_cache->get_item(); + arg_cache->store(direct_item); + } + /* args[0] > value */ + arg_cache->cache_value(); + DBUG_PRINT("info", ("null_value: %s", null_value ? "TRUE" : "FALSE")); + if (!arg_cache->null_value && + (null_value || cmp->compare() > 0)) + { + value->store(arg_cache); + value->cache_value(); + null_value= 0; + } + if (unlikely(direct_added)) + { + /* Restore original item */ + direct_added= FALSE; + arg_cache->store(tmp_item); + } + DBUG_RETURN(0); +} + + +/* bit_or and bit_and */ + +longlong Item_sum_bit::val_int() +{ + DBUG_ASSERT(fixed()); + return (longlong) bits; +} + + +void Item_sum_bit::clear() +{ + bits= reset_bits; + if (as_window_function) + clear_as_window(); +} + +Item *Item_sum_or::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_or(thd, this); +} + +bool Item_sum_bit::clear_as_window() +{ + memset(bit_counters, 0, sizeof(bit_counters)); + num_values_added= 0; + set_bits_from_counters(); + return 0; +} + +bool Item_sum_bit::remove_as_window(ulonglong value) +{ + DBUG_ASSERT(as_window_function); + if (num_values_added == 0) + return 0; // Nothing to remove. + + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + if (!bit_counters[i]) + { + // Don't attempt to remove values that were never added. + DBUG_ASSERT((value & (1ULL << i)) == 0); + continue; + } + bit_counters[i]-= (value & (1ULL << i)) ? 1 : 0; + } + + // Prevent overflow; + num_values_added = MY_MIN(num_values_added, num_values_added - 1); + set_bits_from_counters(); + return 0; +} + +bool Item_sum_bit::add_as_window(ulonglong value) +{ + DBUG_ASSERT(as_window_function); + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + bit_counters[i]+= (value & (1ULL << i)) ? 1 : 0; + } + // Prevent overflow; + num_values_added = MY_MAX(num_values_added, num_values_added + 1); + set_bits_from_counters(); + return 0; +} + +void Item_sum_or::set_bits_from_counters() +{ + ulonglong value= 0; + for (uint i= 0; i < NUM_BIT_COUNTERS; i++) + { + value|= bit_counters[i] > 0 ? (1ULL << i) : 0ULL; + } + bits= value | reset_bits; +} + +bool Item_sum_or::add() +{ + ulonglong value= (ulonglong) args[0]->val_int(); + if (!args[0]->null_value) + { + if (as_window_function) + return add_as_window(value); + bits|=value; + } + return 0; +} + +void Item_sum_xor::set_bits_from_counters() +{ + ulonglong value= 0; + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + value|= (bit_counters[i] % 2) ? (1 << i) : 0; + } + bits= value ^ reset_bits; +} + +Item *Item_sum_xor::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_xor(thd, this); +} + + +bool Item_sum_xor::add() +{ + ulonglong value= (ulonglong) args[0]->val_int(); + if (!args[0]->null_value) + { + if (as_window_function) + return add_as_window(value); + bits^=value; + } + return 0; +} + +void Item_sum_and::set_bits_from_counters() +{ + ulonglong value= 0; + if (!num_values_added) + { + bits= reset_bits; + return; + } + + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + // We've only added values of 1 for this bit. + if (bit_counters[i] == num_values_added) + value|= (1ULL << i); + } + bits= value & reset_bits; +} +Item *Item_sum_and::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_and(thd, this); +} + + +bool Item_sum_and::add() +{ + ulonglong value= (ulonglong) args[0]->val_int(); + if (!args[0]->null_value) + { + if (as_window_function) + return add_as_window(value); + bits&=value; + } + return 0; +} + +/************************************************************************ +** reset result of a Item_sum with is saved in a tmp_table +*************************************************************************/ + +void Item_sum_min_max::reset_field() +{ + Item *UNINIT_VAR(tmp_item), *arg0; + DBUG_ENTER("Item_sum_min_max::reset_field"); + + arg0= args[0]; + if (unlikely(direct_added)) + { + /* Switch to use direct item */ + tmp_item= value->get_item(); + value->store(direct_item); + arg0= direct_item; + } + + switch(result_type()) { + case STRING_RESULT: + { + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),result_field->charset()),*res; + + res= arg0->val_str(&tmp); + if (arg0->null_value) + { + result_field->set_null(); + result_field->reset(); + } + else + { + result_field->set_notnull(); + result_field->store(res->ptr(),res->length(),tmp.charset()); + } + break; + } + case INT_RESULT: + { + longlong nr= arg0->val_int(); + + if (maybe_null()) + { + if (arg0->null_value) + { + nr=0; + result_field->set_null(); + } + else + result_field->set_notnull(); + } + DBUG_PRINT("info", ("nr: %lld", nr)); + result_field->store(nr, unsigned_flag); + break; + } + case REAL_RESULT: + { + double nr= arg0->val_real(); + + if (maybe_null()) + { + if (arg0->null_value) + { + nr=0.0; + result_field->set_null(); + } + else + result_field->set_notnull(); + } + result_field->store(nr); + break; + } + case DECIMAL_RESULT: + { + VDec arg_dec(arg0); + + if (maybe_null()) + { + if (arg_dec.is_null()) + result_field->set_null(); + else + result_field->set_notnull(); + } + /* + We must store zero in the field as we will use the field value in + add() + */ + result_field->store_decimal(arg_dec.ptr_or(&decimal_zero)); + break; + } + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); + } + + if (unlikely(direct_added)) + { + direct_added= FALSE; + value->store(tmp_item); + } + DBUG_VOID_RETURN; +} + + +void Item_sum_sum::reset_field() +{ + my_bool null_flag; + DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); + if (result_type() == DECIMAL_RESULT) + { + if (unlikely(direct_added)) + result_field->store_decimal(&direct_sum_decimal); + else + result_field->store_decimal(VDec(args[0]).ptr_or(&decimal_zero)); + } + else + { + DBUG_ASSERT(result_type() == REAL_RESULT); + double nr= likely(!direct_added) ? args[0]->val_real() : direct_sum_real; + float8store(result_field->ptr, nr); + } + + if (unlikely(direct_added)) + { + direct_added= FALSE; + direct_reseted_field= TRUE; + null_flag= direct_sum_is_null; + } + else + null_flag= args[0]->null_value; + + if (null_flag) + result_field->set_null(); + else + result_field->set_notnull(); +} + + +void Item_sum_count::reset_field() +{ + DBUG_ENTER("Item_sum_count::reset_field"); + uchar *res=result_field->ptr; + longlong nr=0; + DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); + + if (unlikely(direct_counted)) + { + nr= direct_count; + direct_counted= FALSE; + direct_reseted_field= TRUE; + } + else if (!args[0]->maybe_null() || !args[0]->is_null()) + nr= 1; + DBUG_PRINT("info", ("nr: %lld", nr)); + int8store(res,nr); + DBUG_VOID_RETURN; +} + + +void Item_sum_avg::reset_field() +{ + uchar *res=result_field->ptr; + DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); + if (result_type() == DECIMAL_RESULT) + { + longlong tmp; + VDec value(args[0]); + tmp= value.is_null() ? 0 : 1; + value.to_binary(res, f_precision, f_scale); + res+= dec_bin_size; + int8store(res, tmp); + } + else + { + double nr= args[0]->val_real(); + + if (args[0]->null_value) + bzero(res,sizeof(double)+sizeof(longlong)); + else + { + longlong tmp= 1; + float8store(res,nr); + res+=sizeof(double); + int8store(res,tmp); + } + } +} + + +void Item_sum_bit::reset_field() +{ + reset_and_add(); + int8store(result_field->ptr, bits); +} + +void Item_sum_bit::update_field() +{ + // We never call update_field when computing the function as a window + // function. Setting bits to a random value invalidates the bits counters and + // the result of the bit function becomes erroneous. + DBUG_ASSERT(!as_window_function); + uchar *res=result_field->ptr; + bits= uint8korr(res); + add(); + int8store(res, bits); +} + + +/** + calc next value and merge it with field_value. +*/ + +void Item_sum_sum::update_field() +{ + DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); + if (result_type() == DECIMAL_RESULT) + { + my_decimal value, *arg_val; + my_bool null_flag; + if (unlikely(direct_added || direct_reseted_field)) + { + direct_added= direct_reseted_field= FALSE; + arg_val= &direct_sum_decimal; + null_flag= direct_sum_is_null; + } + else + { + arg_val= args[0]->val_decimal(&value); + null_flag= args[0]->null_value; + } + + if (!null_flag) + { + if (!result_field->is_null()) + { + my_decimal field_value(result_field); + my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, arg_val, &field_value); + result_field->store_decimal(dec_buffs); + } + else + { + result_field->store_decimal(arg_val); + result_field->set_notnull(); + } + } + } + else + { + double old_nr,nr; + uchar *res= result_field->ptr; + my_bool null_flag; + + float8get(old_nr,res); + if (unlikely(direct_added || direct_reseted_field)) + { + direct_added= direct_reseted_field= FALSE; + null_flag= direct_sum_is_null; + nr= direct_sum_real; + } + else + { + nr= args[0]->val_real(); + null_flag= args[0]->null_value; + } + if (!null_flag) + { + old_nr+=nr; + result_field->set_notnull(); + } + float8store(res,old_nr); + } +} + + +void Item_sum_count::update_field() +{ + DBUG_ENTER("Item_sum_count::update_field"); + longlong nr; + uchar *res=result_field->ptr; + + nr=sint8korr(res); + if (unlikely(direct_counted || direct_reseted_field)) + { + direct_counted= direct_reseted_field= FALSE; + nr+= direct_count; + } + else if (!args[0]->maybe_null() || !args[0]->is_null()) + nr++; + DBUG_PRINT("info", ("nr: %lld", nr)); + int8store(res,nr); + DBUG_VOID_RETURN; +} + + +void Item_sum_avg::update_field() +{ + longlong field_count; + uchar *res=result_field->ptr; + + DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); + + if (result_type() == DECIMAL_RESULT) + { + VDec tmp(args[0]); + if (!tmp.is_null()) + { + binary2my_decimal(E_DEC_FATAL_ERROR, res, + dec_buffs + 1, f_precision, f_scale); + field_count= sint8korr(res + dec_bin_size); + my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, tmp.ptr(), dec_buffs + 1); + dec_buffs->to_binary(res, f_precision, f_scale); + res+= dec_bin_size; + field_count++; + int8store(res, field_count); + } + } + else + { + double nr; + + nr= args[0]->val_real(); + if (!args[0]->null_value) + { + double old_nr; + float8get(old_nr, res); + field_count= sint8korr(res + sizeof(double)); + old_nr+= nr; + float8store(res,old_nr); + res+= sizeof(double); + field_count++; + int8store(res, field_count); + } + } +} + + +Item *Item_sum_avg::result_item(THD *thd, Field *field) +{ + return + result_type() == DECIMAL_RESULT ? + (Item_avg_field*) new (thd->mem_root) Item_avg_field_decimal(thd, this) : + (Item_avg_field*) new (thd->mem_root) Item_avg_field_double(thd, this); +} + + +void Item_sum_min_max::update_field() +{ + DBUG_ENTER("Item_sum_min_max::update_field"); + Item *UNINIT_VAR(tmp_item); + if (unlikely(direct_added)) + { + tmp_item= args[0]; + args[0]= direct_item; + } + if (Item_sum_min_max::type_handler()->is_val_native_ready()) + { + /* + TODO-10.5: change Item_sum_min_max to use val_native() for all data types + - make all type handlers val_native() ready + - use min_max_update_native_field() for all data types + - remove Item_sum_min_max::min_max_update_{str|real|int|decimal}_field() + */ + min_max_update_native_field(); + } + else + { + switch (Item_sum_min_max::type_handler()->cmp_type()) { + case STRING_RESULT: + case TIME_RESULT: + min_max_update_str_field(); + break; + case INT_RESULT: + min_max_update_int_field(); + break; + case DECIMAL_RESULT: + min_max_update_decimal_field(); + break; + default: + min_max_update_real_field(); + } + } + if (unlikely(direct_added)) + { + direct_added= FALSE; + args[0]= tmp_item; + } + DBUG_VOID_RETURN; +} + + +void Arg_comparator::min_max_update_field_native(THD *thd, + Field *field, + Item *item, + int cmp_sign) +{ + DBUG_ENTER("Arg_comparator::min_max_update_field_native"); + if (!item->val_native(current_thd, &m_native2)) + { + if (field->is_null()) + field->store_native(m_native2); // The first non-null value + else + { + field->val_native(&m_native1); + if ((cmp_sign * m_compare_handler->cmp_native(m_native2, m_native1)) < 0) + field->store_native(m_native2); + } + field->set_notnull(); + } + DBUG_VOID_RETURN; +} + + +void +Item_sum_min_max::min_max_update_native_field() +{ + DBUG_ENTER("Item_sum_min_max::min_max_update_native_field"); + DBUG_ASSERT(cmp); + DBUG_ASSERT(type_handler_for_comparison() == cmp->compare_type_handler()); + THD *thd= current_thd; + cmp->min_max_update_field_native(thd, result_field, args[0], cmp_sign); + DBUG_VOID_RETURN; +} + + +void +Item_sum_min_max::min_max_update_str_field() +{ + DBUG_ENTER("Item_sum_min_max::min_max_update_str_field"); + DBUG_ASSERT(cmp); + String *res_str=args[0]->val_str(&cmp->value1); + + if (!args[0]->null_value) + { + if (result_field->is_null()) + result_field->store(res_str->ptr(),res_str->length(),res_str->charset()); + else + { + result_field->val_str(&cmp->value2); + if ((cmp_sign * sortcmp(res_str,&cmp->value2,collation.collation)) < 0) + result_field->store(res_str->ptr(),res_str->length(),res_str->charset()); + } + result_field->set_notnull(); + } + DBUG_VOID_RETURN; +} + + +void +Item_sum_min_max::min_max_update_real_field() +{ + double nr,old_nr; + + DBUG_ENTER("Item_sum_min_max::min_max_update_real_field"); + old_nr=result_field->val_real(); + nr= args[0]->val_real(); + if (!args[0]->null_value) + { + if (result_field->is_null(0) || + (cmp_sign > 0 ? old_nr > nr : old_nr < nr)) + old_nr=nr; + result_field->set_notnull(); + } + else if (result_field->is_null(0)) + result_field->set_null(); + result_field->store(old_nr); + DBUG_VOID_RETURN; +} + + +void +Item_sum_min_max::min_max_update_int_field() +{ + longlong nr,old_nr; + + DBUG_ENTER("Item_sum_min_max::min_max_update_int_field"); + old_nr=result_field->val_int(); + nr=args[0]->val_int(); + if (!args[0]->null_value) + { + if (result_field->is_null(0)) + old_nr=nr; + else + { + bool res=(unsigned_flag ? + (ulonglong) old_nr > (ulonglong) nr : + old_nr > nr); + /* (cmp_sign > 0 && res) || (!(cmp_sign > 0) && !res) */ + if ((cmp_sign > 0) ^ (!res)) + old_nr=nr; + } + result_field->set_notnull(); + } + else if (result_field->is_null(0)) + result_field->set_null(); + DBUG_PRINT("info", ("nr: %lld", old_nr)); + result_field->store(old_nr, unsigned_flag); + DBUG_VOID_RETURN; +} + + +/** + @todo + optimize: do not get result_field in case of args[0] is NULL +*/ +void +Item_sum_min_max::min_max_update_decimal_field() +{ + DBUG_ENTER("Item_sum_min_max::min_max_update_decimal_field"); + my_decimal old_val, nr_val; + const my_decimal *old_nr; + const my_decimal *nr= args[0]->val_decimal(&nr_val); + if (!args[0]->null_value) + { + if (result_field->is_null(0)) + old_nr=nr; + else + { + old_nr= result_field->val_decimal(&old_val); + bool res= my_decimal_cmp(old_nr, nr) > 0; + /* (cmp_sign > 0 && res) || (!(cmp_sign > 0) && !res) */ + if ((cmp_sign > 0) ^ (!res)) + old_nr=nr; + } + result_field->set_notnull(); + result_field->store_decimal(old_nr); + } + else if (result_field->is_null(0)) + result_field->set_null(); + DBUG_VOID_RETURN; +} + + +double Item_avg_field_double::val_real() +{ + // fix_fields() never calls for this Item + double nr; + longlong count; + uchar *res; + + float8get(nr,field->ptr); + res= (field->ptr+sizeof(double)); + count= sint8korr(res); + + if ((null_value= !count)) + return 0.0; + return nr/(double) count; +} + + +my_decimal *Item_avg_field_decimal::val_decimal(my_decimal *dec_buf) +{ + // fix_fields() never calls for this Item + longlong count= sint8korr(field->ptr + dec_bin_size); + if ((null_value= !count)) + return 0; + + my_decimal dec_count, dec_field(field->ptr, f_precision, f_scale); + int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &dec_count); + my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, + &dec_field, &dec_count, prec_increment); + return dec_buf; +} + + +double Item_std_field::val_real() +{ + double nr; + // fix_fields() never calls for this Item + nr= Item_variance_field::val_real(); + DBUG_ASSERT(nr >= 0.0); + return sqrt(nr); +} + + +double Item_variance_field::val_real() +{ + // fix_fields() never calls for this Item + Stddev tmp(field->ptr); + if ((null_value= (tmp.count() <= sample))) + return 0.0; + + return tmp.result(sample); +} + + +/**************************************************************************** +** Functions to handle dynamic loadable aggregates +** Original source by: Alexis Mikhailov +** Adapted for UDAs by: Andreas F. Bobak . +** Rewritten by: Monty. +****************************************************************************/ + +#ifdef HAVE_DLOPEN + +void Item_udf_sum::clear() +{ + DBUG_ENTER("Item_udf_sum::clear"); + udf.clear(); + DBUG_VOID_RETURN; +} + +bool Item_udf_sum::add() +{ + my_bool tmp_null_value; + DBUG_ENTER("Item_udf_sum::add"); + udf.add(&tmp_null_value); + null_value= tmp_null_value; + DBUG_RETURN(0); +} + + +bool Item_udf_sum::supports_removal() const +{ + DBUG_ENTER("Item_udf_sum::supports_remove"); + DBUG_PRINT("info", ("support: %d", udf.supports_removal())); + DBUG_RETURN(udf.supports_removal()); +} + + +void Item_udf_sum::remove() +{ + my_bool tmp_null_value; + DBUG_ENTER("Item_udf_sum::remove"); + udf.remove(&tmp_null_value); + null_value= tmp_null_value; + DBUG_VOID_RETURN; +} + + +void Item_udf_sum::cleanup() +{ + /* + udf_handler::cleanup() nicely handles case when we have not + original item but one created by copy_or_same() method. + */ + udf.cleanup(); + Item_sum::cleanup(); +} + + +void Item_udf_sum::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + for (uint i=0 ; i < arg_count ; i++) + { + if (i) + str->append(','); + args[i]->print(str, query_type); + } + str->append(')'); +} + + +Item *Item_sum_udf_float::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_udf_float(thd, this); +} + +double Item_sum_udf_float::val_real() +{ + my_bool tmp_null_value; + double res; + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_sum_udf_float::val"); + DBUG_PRINT("enter",("result_type: %d arg_count: %d", + args[0]->result_type(), arg_count)); + res= udf.val(&tmp_null_value); + null_value= tmp_null_value; + DBUG_RETURN(res); +} + + +String *Item_sum_udf_float::val_str(String *str) +{ + return val_string_from_real(str); +} + + +my_decimal *Item_sum_udf_float::val_decimal(my_decimal *dec) +{ + return val_decimal_from_real(dec); +} + + +my_decimal *Item_sum_udf_decimal::val_decimal(my_decimal *dec_buf) +{ + my_decimal *res; + my_bool tmp_null_value; + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_func_udf_decimal::val_decimal"); + DBUG_PRINT("enter",("result_type: %d arg_count: %d", + args[0]->result_type(), arg_count)); + + res= udf.val_decimal(&tmp_null_value, dec_buf); + null_value= tmp_null_value; + DBUG_RETURN(res); +} + + +Item *Item_sum_udf_decimal::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_udf_decimal(thd, this); +} + + +Item *Item_sum_udf_int::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_udf_int(thd, this); +} + +longlong Item_sum_udf_int::val_int() +{ + my_bool tmp_null_value; + longlong res; + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_sum_udf_int::val_int"); + DBUG_PRINT("enter",("result_type: %d arg_count: %d", + args[0]->result_type(), arg_count)); + res= udf.val_int(&tmp_null_value); + null_value= tmp_null_value; + DBUG_RETURN(res); +} + + +String *Item_sum_udf_int::val_str(String *str) +{ + return val_string_from_int(str); +} + +my_decimal *Item_sum_udf_int::val_decimal(my_decimal *dec) +{ + return val_decimal_from_int(dec); +} + + +/** Default max_length is max argument length. */ + +bool Item_sum_udf_str::fix_length_and_dec(THD *thd) +{ + DBUG_ENTER("Item_sum_udf_str::fix_length_and_dec"); + max_length=0; + for (uint i = 0; i < arg_count; i++) + set_if_bigger(max_length,args[i]->max_length); + DBUG_RETURN(FALSE); +} + + +Item *Item_sum_udf_str::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_sum_udf_str(thd, this); +} + + +my_decimal *Item_sum_udf_str::val_decimal(my_decimal *dec) +{ + return val_decimal_from_string(dec); +} + +String *Item_sum_udf_str::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + DBUG_ENTER("Item_sum_udf_str::str"); + String *res=udf.val_str(str,&str_value); + null_value = !res; + DBUG_RETURN(res); +} + +#endif /* HAVE_DLOPEN */ + + +/***************************************************************************** + GROUP_CONCAT function + + SQL SYNTAX: + GROUP_CONCAT([DISTINCT] expr,... [ORDER BY col [ASC|DESC],...] + [SEPARATOR str_const]) + + concat of values from "group by" operation + + BUGS + Blobs doesn't work with DISTINCT or ORDER BY +*****************************************************************************/ + + + +/** + Compares the values for fields in expr list of GROUP_CONCAT. + @note + + GROUP_CONCAT([DISTINCT] expr [,expr ...] + [ORDER BY {unsigned_integer | col_name | expr} + [ASC | DESC] [,col_name ...]] + [SEPARATOR str_val]) + + @return + @retval -1 : key1 < key2 + @retval 0 : key1 = key2 + @retval 1 : key1 > key2 +*/ + +extern "C" +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, + const void* key2) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + + for (uint i= 0; i < item_func->arg_count_field; i++) + { + Item *item= item_func->args[i]; + /* + If item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + */ + Field *field= item->get_tmp_table_field(); + + if (!field) + continue; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset); + if (res) + return res; + } + return 0; +} + + +/* + @brief + Comparator function for DISTINCT clause taking into account NULL values. + + @note + Used for JSON_ARRAYAGG function +*/ + +int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1_arg, + const void* key2_arg) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + + uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes; + uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes; + + /* + JSON_ARRAYAGG function only accepts one argument. + */ + + Item *item= item_func->args[0]; + /* + If item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + return 0; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + */ + Field *field= item->get_tmp_table_field(); + + if (!field) + return 0; + + if (field->is_null_in_record((uchar*)key1_arg) && + field->is_null_in_record((uchar*)key2_arg)) + return 0; + + if (field->is_null_in_record((uchar*)key1_arg)) + return -1; + + if (field->is_null_in_record((uchar*)key2_arg)) + return 1; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp(key1 + offset, key2 + offset); + if (res) + return res; + return 0; +} + + +/** + function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... ) +*/ + +extern "C" +int group_concat_key_cmp_with_order(void* arg, const void* key1, + const void* key2) +{ + Item_func_group_concat* grp_item= (Item_func_group_concat*) arg; + ORDER **order_item, **end; + + for (order_item= grp_item->order, end=order_item+ grp_item->arg_count_order; + order_item < end; + order_item++) + { + Item *item= *(*order_item)->item; + /* + If field_item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; + /* + If item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + + Note that for the case of ROLLUP, field may point to another table + tham grp_item->table. This is however ok as the table definitions are + the same. + */ + Field *field= item->get_tmp_table_field(); + if (!field) + continue; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset); + if (res) + return ((*order_item)->direction == ORDER::ORDER_ASC) ? res : -res; + } + /* + We can't return 0 because in that case the tree class would remove this + item as double value. This would cause problems for case-changes and + if the returned values are not the same we do the sort on. + */ + return 1; +} + + +/* + @brief + Comparator function for ORDER BY clause taking into account NULL values. + + @note + Used for JSON_ARRAYAGG function +*/ + +int group_concat_key_cmp_with_order_with_nulls(void *arg, const void *key1_arg, + const void *key2_arg) +{ + Item_func_group_concat* grp_item= (Item_func_group_concat*) arg; + ORDER **order_item, **end; + + uchar *key1= (uchar*)key1_arg + grp_item->table->s->null_bytes; + uchar *key2= (uchar*)key2_arg + grp_item->table->s->null_bytes; + + for (order_item= grp_item->order, end=order_item+ grp_item->arg_count_order; + order_item < end; + order_item++) + { + Item *item= *(*order_item)->item; + /* + If field_item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + + Note that for the case of ROLLUP, field may point to another table + tham grp_item->table. This is however ok as the table definitions are + the same. + */ + Field *field= item->get_tmp_table_field(); + if (!field) + continue; + + if (field->is_null_in_record((uchar*)key1_arg) && + field->is_null_in_record((uchar*)key2_arg)) + continue; + + if (field->is_null_in_record((uchar*)key1_arg)) + return ((*order_item)->direction == ORDER::ORDER_ASC) ? -1 : 1; + + if (field->is_null_in_record((uchar*)key2_arg)) + return ((*order_item)->direction == ORDER::ORDER_ASC) ? 1 : -1; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset); + if (res) + return ((*order_item)->direction == ORDER::ORDER_ASC) ? res : -res; + } + /* + We can't return 0 because in that case the tree class would remove this + item as double value. This would cause problems for case-changes and + if the returned values are not the same we do the sort on. + */ + return 1; +} + + +static void report_cut_value_error(THD *thd, uint row_count, const char *fname) +{ + size_t fn_len= strlen(fname); + char *fname_upper= (char *) my_alloca(fn_len + 1); + if (!fname_upper) + fname_upper= (char*) fname; // Out of memory + else + memcpy(fname_upper, fname, fn_len+1); + my_caseup_str(&my_charset_latin1, fname_upper); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CUT_VALUE_GROUP_CONCAT, + ER_THD(thd, ER_CUT_VALUE_GROUP_CONCAT), + row_count, fname_upper); + my_afree(fname_upper); +} + + +void Item_func_group_concat::cut_max_length(String *result, + uint old_length, uint max_length) const +{ + const char *ptr= result->ptr(); + /* + It's ok to use item->result.length() as the fourth argument + as this is never used to limit the length of the data. + Cut is done with the third argument. + */ + size_t add_length= Well_formed_prefix(collation.collation, + ptr + old_length, + ptr + max_length, + result->length()).length(); + result->length(old_length + add_length); +} + + +/** + Append data from current leaf to item->result. +*/ + +extern "C" +int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), + void* item_arg) +{ + Item_func_group_concat *item= (Item_func_group_concat *) item_arg; + TABLE *table= item->table; + uint max_length= table->in_use->variables.group_concat_max_len; + String tmp((char *)table->record[1], table->s->reclength, + default_charset_info); + String tmp2; + uchar *key= (uchar *) key_arg; + String *result= &item->result; + Item **arg= item->args, **arg_end= item->args + item->arg_count_field; + uint old_length= result->length(); + + ulonglong *offset_limit= &item->copy_offset_limit; + ulonglong *row_limit = &item->copy_row_limit; + if (item->limit_clause && !(*row_limit)) + { + item->result_finalized= true; + return 1; + } + + tmp.length(0); + + if (item->limit_clause && (*offset_limit)) + { + item->row_count++; + (*offset_limit)--; + return 0; + } + + if (!item->result_finalized) + item->result_finalized= true; + else + result->append(*item->separator); + + for (; arg < arg_end; arg++) + { + String *res; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + We also can't use table->field array to access the fields + because it contains both order and arg list fields. + */ + if ((*arg)->const_item()) + res= item->get_str_from_item(*arg, &tmp); + else + { + Field *field= (*arg)->get_tmp_table_field(); + if (field) + { + uint offset= (field->offset(field->table->record[0]) - + table->s->null_bytes); + DBUG_ASSERT(offset < table->s->reclength); + res= item->get_str_from_field(*arg, field, &tmp, key, + offset + item->get_null_bytes()); + } + else + res= item->get_str_from_item(*arg, &tmp); + } + + if (res) + result->append(*res); + } + + if (item->limit_clause) + (*row_limit)--; + item->row_count++; + + /* stop if length of result more than max_length */ + if (result->length() > max_length) + { + THD *thd= current_thd; + item->cut_max_length(result, old_length, max_length); + item->warning_for_row= TRUE; + report_cut_value_error(thd, item->row_count, item->func_name()); + + /** + To avoid duplicated warnings in Item_func_group_concat::val_str() + */ + if (table && table->blob_storage) + table->blob_storage->set_truncated_value(false); + return 1; + } + return 0; +} + + +/** + Constructor of Item_func_group_concat. + + @param distinct_arg distinct + @param select_list list of expression for show values + @param order_list list of sort columns + @param separator_arg string value of separator. +*/ + +Item_func_group_concat:: +Item_func_group_concat(THD *thd, Name_resolution_context *context_arg, + bool distinct_arg, List *select_list, + const SQL_I_List &order_list, + String *separator_arg, bool limit_clause, + Item *row_limit_arg, Item *offset_limit_arg) + :Item_sum(thd), tmp_table_param(0), separator(separator_arg), tree(0), + unique_filter(NULL), table(0), + order(0), context(context_arg), + arg_count_order(order_list.elements), + arg_count_field(select_list->elements), + row_count(0), + distinct(distinct_arg), + warning_for_row(FALSE), always_null(FALSE), + force_copy_fields(0), row_limit(NULL), + offset_limit(NULL), limit_clause(limit_clause), + copy_offset_limit(0), copy_row_limit(0), original(0) +{ + Item *item_select; + Item **arg_ptr; + + quick_group= FALSE; + arg_count= arg_count_field + arg_count_order; + + /* + We need to allocate: + args - arg_count_field+arg_count_order + (for possible order items in temporary tables) + order - arg_count_order + */ + if (!(args= (Item**) thd->alloc(sizeof(Item*) * arg_count * 2 + + sizeof(ORDER*)*arg_count_order))) + return; + + order= (ORDER**)(args + arg_count); + + /* fill args items of show and sort */ + List_iterator_fast li(*select_list); + + for (arg_ptr=args ; (item_select= li++) ; arg_ptr++) + *arg_ptr= item_select; + + if (arg_count_order) + { + ORDER **order_ptr= order; + for (ORDER *order_item= order_list.first; + order_item != NULL; + order_item= order_item->next) + { + (*order_ptr++)= order_item; + *arg_ptr= *order_item->item; + order_item->item= arg_ptr++; + } + } + + /* orig_args is only used for print() */ + orig_args= (Item**) (order + arg_count_order); + if (arg_count) + memcpy(orig_args, args, sizeof(Item*) * arg_count); + if (limit_clause) + { + row_limit= row_limit_arg; + offset_limit= offset_limit_arg; + } +} + + +Item_func_group_concat::Item_func_group_concat(THD *thd, + Item_func_group_concat *item) + :Item_sum(thd, item), + tmp_table_param(item->tmp_table_param), + separator(item->separator), + tree(item->tree), + tree_len(item->tree_len), + unique_filter(item->unique_filter), + table(item->table), + context(item->context), + arg_count_order(item->arg_count_order), + arg_count_field(item->arg_count_field), + row_count(item->row_count), + distinct(item->distinct), + warning_for_row(item->warning_for_row), + always_null(item->always_null), + force_copy_fields(item->force_copy_fields), + row_limit(item->row_limit), offset_limit(item->offset_limit), + limit_clause(item->limit_clause),copy_offset_limit(item->copy_offset_limit), + copy_row_limit(item->copy_row_limit), original(item) +{ + quick_group= item->quick_group; + result.set_charset(collation.collation); + + /* + Since the ORDER structures pointed to by the elements of the 'order' array + may be modified in find_order_in_list() called from + Item_func_group_concat::setup(), create a copy of those structures so that + such modifications done in this object would not have any effect on the + object being copied. + */ + ORDER *tmp; + if (!(tmp= (ORDER *) thd->alloc(sizeof(ORDER *) * arg_count_order + + sizeof(ORDER) * arg_count_order))) + return; + order= (ORDER **)(tmp + arg_count_order); + for (uint i= 0; i < arg_count_order; i++, tmp++) + { + /* + Compiler generated copy constructor is used to + to copy all the members of ORDER struct. + It's also necessary to update ORDER::next pointer + so that it points to new ORDER element. + */ + new (tmp) st_order(*(item->order[i])); + tmp->next= (i + 1 == arg_count_order) ? NULL : (tmp + 1); + order[i]= tmp; + } +} + + +void Item_func_group_concat::cleanup() +{ + DBUG_ENTER("Item_func_group_concat::cleanup"); + Item_sum::cleanup(); + + /* + Free table and tree if they belong to this item (if item have not pointer + to original item from which was made copy => it own its objects ) + */ + if (!original) + { + delete tmp_table_param; + tmp_table_param= 0; + if (table) + { + THD *thd= table->in_use; + if (table->blob_storage) + delete table->blob_storage; + free_tmp_table(thd, table); + table= 0; + if (tree) + { + delete_tree(tree, 0); + tree= 0; + } + if (unique_filter) + { + delete unique_filter; + unique_filter= NULL; + } + } + DBUG_ASSERT(tree == 0); + } + /* + As the ORDER structures pointed to by the elements of the + 'order' array may be modified in find_order_in_list() called + from Item_func_group_concat::setup() to point to runtime + created objects, we need to reset them back to the original + arguments of the function. + */ + ORDER **order_ptr= order; + for (uint i= 0; i < arg_count_order; i++) + { + (*order_ptr)->item= &args[arg_count_field + i]; + order_ptr++; + } + DBUG_VOID_RETURN; +} + + +Item *Item_func_group_concat::copy_or_same(THD* thd) +{ + return new (thd->mem_root) Item_func_group_concat(thd, this); +} + + +void Item_func_group_concat::clear() +{ + result.length(0); + result.copy(); + null_value= TRUE; + warning_for_row= FALSE; + result_finalized= false; + if (offset_limit) + copy_offset_limit= offset_limit->val_int(); + if (row_limit) + copy_row_limit= row_limit->val_int(); + if (tree) + { + reset_tree(tree); + tree_len= 0; + } + if (unique_filter) + unique_filter->reset(); + if (table && table->blob_storage) + table->blob_storage->reset(); + /* No need to reset the table as we never call write_row */ +} + +struct st_repack_tree { + TREE tree; + TABLE *table; + size_t len, maxlen; +}; + +extern "C" +int copy_to_tree(void* key, element_count count __attribute__((unused)), + void* arg) +{ + struct st_repack_tree *st= (struct st_repack_tree*)arg; + TABLE *table= st->table; + Field* field= table->field[0]; + const uchar *ptr= field->ptr_in_record((uchar*)key - table->s->null_bytes); + size_t len= (size_t)field->val_int(ptr); + + DBUG_ASSERT(count == 1); + if (!tree_insert(&st->tree, key, 0, st->tree.custom_arg)) + return 1; + + st->len += len; + return st->len > st->maxlen; +} + +bool Item_func_group_concat::repack_tree(THD *thd) +{ + struct st_repack_tree st; + int size= tree->size_of_element; + if (!tree->offset_to_key) + size-= sizeof(void*); + + init_tree(&st.tree, (size_t) MY_MIN(thd->variables.max_heap_table_size, + thd->variables.sortbuff_size/16), 0, + size, get_comparator_function_for_order_by(), NULL, + (void*) this, MYF(MY_THREAD_SPECIFIC)); + DBUG_ASSERT(tree->size_of_element == st.tree.size_of_element); + st.table= table; + st.len= 0; + st.maxlen= thd->variables.group_concat_max_len; + tree_walk(tree, ©_to_tree, &st, left_root_right); + if (st.len <= st.maxlen) // Copying aborted. Must be OOM + { + delete_tree(&st.tree, 0); + return 1; + } + delete_tree(tree, 0); + *tree= st.tree; + tree_len= st.len; + return 0; +} + + +/* + Repacking the tree is expensive. But it keeps the tree small, and + inserting into an unnecessary large tree is also waste of time. + + The following number is best-by-test. Test execution time slowly + decreases up to N=10 (that is, factor=1024) and then starts to increase, + again, very slowly. +*/ +#define GCONCAT_REPACK_FACTOR 10 + +bool Item_func_group_concat::add(bool exclude_nulls) +{ + if (always_null && exclude_nulls) + return 0; + copy_fields(tmp_table_param); + if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) + return TRUE; + + size_t row_str_len= 0; + StringBuffer buf; + String *res; + for (uint i= 0; i < arg_count_field; i++) + { + Item *show_item= args[i]; + if (show_item->const_item()) + continue; + + Field *field= show_item->get_tmp_table_field(); + if (field) + { + if (field->is_null_in_record((const uchar*) table->record[0]) && + exclude_nulls) + return 0; // Skip row if it contains null + + buf.set_buffer_if_not_allocated(&my_charset_bin); + if (tree && (res= field->val_str(&buf))) + row_str_len+= res->length(); + } + else + { + /* + should not reach here, we create temp table for all the arguments of + the group_concat function + */ + DBUG_ASSERT(0); + } + } + + null_value= FALSE; + bool row_eligible= TRUE; + + if (distinct) + { + /* Filter out duplicate rows. */ + uint count= unique_filter->elements_in_tree(); + unique_filter->unique_add(get_record_pointer()); + if (count == unique_filter->elements_in_tree()) + row_eligible= FALSE; + } + + TREE_ELEMENT *el= 0; // Only for safety + if (row_eligible && tree) + { + THD *thd= table->in_use; + table->field[0]->store(row_str_len, FALSE); + if ((tree_len >> GCONCAT_REPACK_FACTOR) > thd->variables.group_concat_max_len + && tree->elements_in_tree > 1) + if (repack_tree(thd)) + return 1; + el= tree_insert(tree, get_record_pointer(), 0, tree->custom_arg); + /* check if there was enough memory to insert the row */ + if (!el) + return 1; + tree_len+= row_str_len; + } + + /* + In case of GROUP_CONCAT with DISTINCT or ORDER BY (or both) don't dump the + row to the output buffer here. That will be done in val_str. + */ + if (row_eligible && !warning_for_row && (!tree && !distinct)) + dump_leaf_key(get_record_pointer(), 1, this); + + return 0; +} + + +bool +Item_func_group_concat::fix_fields(THD *thd, Item **ref) +{ + uint i; /* for loop variable */ + DBUG_ASSERT(fixed() == 0); + + if (init_sum_func_check(thd)) + return TRUE; + + set_maybe_null(); + + /* + Fix fields for select list and ORDER clause + */ + + for (i=0 ; i < arg_count ; i++) + { + if (args[i]->fix_fields_if_needed_for_scalar(thd, &args[i])) + return TRUE; + /* We should ignore FIELD's in arguments to sum functions */ + with_flags|= (args[i]->with_flags & ~item_with_t::FIELD); + } + + /* skip charset aggregation for order columns */ + if (agg_arg_charsets_for_string_result(collation, + args, arg_count - arg_count_order)) + return 1; + + result.set_charset(collation.collation); + result_field= 0; + null_value= 1; + max_length= (uint32) MY_MIN((ulonglong) thd->variables.group_concat_max_len + / collation.collation->mbminlen + * collation.collation->mbmaxlen, UINT_MAX32); + + uint32 offset; + if (separator->needs_conversion(separator->length(), separator->charset(), + collation.collation, &offset)) + { + uint32 buflen= collation.collation->mbmaxlen * separator->length(); + uint errors, conv_length; + char *buf; + String *new_separator; + + if (!(buf= (char*) thd->stmt_arena->alloc(buflen)) || + !(new_separator= new(thd->stmt_arena->mem_root) + String(buf, buflen, collation.collation))) + return TRUE; + + conv_length= copy_and_convert(buf, buflen, collation.collation, + separator->ptr(), separator->length(), + separator->charset(), &errors); + new_separator->length(conv_length); + separator= new_separator; + } + + if (check_sum_func(thd, ref)) + return TRUE; + + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +bool Item_func_group_concat::setup(THD *thd) +{ + List list; + SELECT_LEX *select_lex= thd->lex->current_select; + const bool order_or_distinct= MY_TEST(arg_count_order > 0 || distinct); + DBUG_ENTER("Item_func_group_concat::setup"); + + /* + Currently setup() can be called twice. Please add + assertion here when this is fixed. + */ + if (table || tree) + DBUG_RETURN(FALSE); + + if (!(tmp_table_param= new (thd->mem_root) TMP_TABLE_PARAM)) + DBUG_RETURN(TRUE); + + /* Push all not constant fields to the list and create a temp table */ + always_null= 0; + for (uint i= 0; i < arg_count_field; i++) + { + Item *item= args[i]; + if (list.push_back(item, thd->mem_root)) + DBUG_RETURN(TRUE); + if (item->const_item() && item->is_null() && skip_nulls()) + { + always_null= 1; + DBUG_RETURN(FALSE); + } + } + + List all_fields(list); + /* + Try to find every ORDER expression in the list of GROUP_CONCAT + arguments. If an expression is not found, prepend it to + "all_fields". The resulting field list is used as input to create + tmp table columns. + */ + if (arg_count_order) + { + uint n_elems= arg_count_order + all_fields.elements; + ref_pointer_array= static_cast(thd->alloc(sizeof(Item*) * n_elems)); + if (!ref_pointer_array) + DBUG_RETURN(TRUE); + memcpy(ref_pointer_array, args, arg_count * sizeof(Item*)); + DBUG_ASSERT(context); + if (setup_order(thd, Ref_ptr_array(ref_pointer_array, n_elems), + context->table_list, list, all_fields, *order)) + DBUG_RETURN(TRUE); + /* + Prepend the field to store the length of the string representation + of this row. Used to detect when the tree goes over group_concat_max_len + */ + Item *item= new (thd->mem_root) + Item_uint(thd, thd->variables.group_concat_max_len); + if (!item || all_fields.push_front(item, thd->mem_root)) + DBUG_RETURN(TRUE); + } + + count_field_types(select_lex, tmp_table_param, all_fields, 0); + tmp_table_param->force_copy_fields= force_copy_fields; + tmp_table_param->hidden_field_count= (arg_count_order > 0); + DBUG_ASSERT(table == 0); + if (order_or_distinct) + { + /* + Convert bit fields to bigint's in the temporary table. + Needed as we cannot compare two table records containing BIT fields + stored in the the tree used for distinct/order by. + Moreover we don't even save in the tree record null bits + where BIT fields store parts of their data. + */ + store_bit_fields_as_bigint_in_tempory_table(&all_fields); + } + + /* + We have to create a temporary table to get descriptions of fields + (types, sizes and so on). + + Note that in the table, we first have the ORDER BY fields, then the + field list. + */ + if (!(table= create_tmp_table(thd, tmp_table_param, all_fields, + (ORDER*) 0, 0, TRUE, + (select_lex->options | + thd->variables.option_bits), + HA_POS_ERROR, &empty_clex_str))) + DBUG_RETURN(TRUE); + table->file->extra(HA_EXTRA_NO_ROWS); + table->no_rows= 1; + + /** + Initialize blob_storage if GROUP_CONCAT is used + with ORDER BY | DISTINCT and BLOB field count > 0. + */ + if (order_or_distinct && table->s->blob_fields) + table->blob_storage= new (thd->mem_root) Blob_mem_storage(); + + /* + Need sorting or uniqueness: init tree and choose a function to sort. + Don't reserve space for NULLs: if any of gconcat arguments is NULL, + the row is not added to the result. + */ + uint tree_key_length= table->s->reclength - table->s->null_bytes; + + if (arg_count_order) + { + tree= &tree_base; + /* + Create a tree for sorting. The tree is used to sort (according to the + syntax of this function). If there is no ORDER BY clause, we don't + create this tree. + */ + init_tree(tree, (size_t)MY_MIN(thd->variables.max_heap_table_size, + thd->variables.sortbuff_size/16), 0, + tree_key_length + get_null_bytes(), + get_comparator_function_for_order_by(), NULL, (void*) this, + MYF(MY_THREAD_SPECIFIC)); + tree_len= 0; + } + + if (distinct) + unique_filter= (new (thd->mem_root) + Unique(get_comparator_function_for_distinct(), + (void*)this, + tree_key_length + get_null_bytes(), + ram_limitation(thd))); + if ((row_limit && row_limit->cmp_type() != INT_RESULT) || + (offset_limit && offset_limit->cmp_type() != INT_RESULT)) + { + my_error(ER_INVALID_VALUE_TO_LIMIT, MYF(0)); + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/* This is used by rollup to create a separate usable copy of the function */ + +void Item_func_group_concat::make_unique() +{ + tmp_table_param= 0; + table=0; + original= 0; + force_copy_fields= 1; + tree= 0; +} + + +String* Item_func_group_concat::val_str(String* str) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return 0; + + if (!result_finalized) // Result yet to be written. + { + if (tree != NULL) // order by + tree_walk(tree, &dump_leaf_key, this, left_root_right); + else if (distinct) // distinct (and no order by). + unique_filter->walk(table, &dump_leaf_key, this); + else if (row_limit && copy_row_limit == (ulonglong)row_limit->val_int()) + return &result; + else + DBUG_ASSERT(false); // Can't happen + } + + if (table && table->blob_storage && + table->blob_storage->is_truncated_value()) + { + warning_for_row= true; + report_cut_value_error(current_thd, row_count, func_name()); + } + + return &result; +} + + +/* + @brief + Get the comparator function for DISTINT clause +*/ + +qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct() +{ + return skip_nulls() ? + group_concat_key_cmp_with_distinct : + group_concat_key_cmp_with_distinct_with_nulls; +} + + +/* + @brief + Get the comparator function for ORDER BY clause +*/ + +qsort_cmp2 Item_func_group_concat::get_comparator_function_for_order_by() +{ + return skip_nulls() ? + group_concat_key_cmp_with_order : + group_concat_key_cmp_with_order_with_nulls; +} + + +/* + + @brief + Get the record pointer of the current row of the table + + @details + look at the comments for Item_func_group_concat::get_null_bytes +*/ + +uchar* Item_func_group_concat::get_record_pointer() +{ + return skip_nulls() ? + table->record[0] + table->s->null_bytes : + table->record[0]; +} + + +/* + @brief + Get the null bytes for the table if required. + + @details + This function is used for GROUP_CONCAT (or JSON_ARRAYAGG) implementation + where the Unique tree or the ORDER BY tree may store the null values, + in such case we also store the null bytes inside each node of the tree. + +*/ + +uint Item_func_group_concat::get_null_bytes() +{ + return skip_nulls() ? 0 : table->s->null_bytes; +} + + +void Item_func_group_concat::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + if (distinct) + str->append(STRING_WITH_LEN("distinct ")); + for (uint i= 0; i < arg_count_field; i++) + { + if (i) + str->append(','); + orig_args[i]->print(str, query_type); + } + if (arg_count_order) + { + str->append(STRING_WITH_LEN(" order by ")); + for (uint i= 0 ; i < arg_count_order ; i++) + { + if (i) + str->append(','); + orig_args[i + arg_count_field]->print(str, query_type); + if (order[i]->direction == ORDER::ORDER_ASC) + str->append(STRING_WITH_LEN(" ASC")); + else + str->append(STRING_WITH_LEN(" DESC")); + } + } + + if (sum_func() == GROUP_CONCAT_FUNC) + { + str->append(STRING_WITH_LEN(" separator \'")); + str->append_for_single_quote(separator->ptr(), separator->length()); + str->append(STRING_WITH_LEN("\'")); + } + + if (limit_clause) + { + str->append(STRING_WITH_LEN(" limit ")); + if (offset_limit) + { + offset_limit->print(str, query_type); + str->append(','); + } + row_limit->print(str, query_type); + } + str->append(STRING_WITH_LEN(")")); +} + + +Item_func_group_concat::~Item_func_group_concat() +{ + if (!original && unique_filter) + delete unique_filter; +} diff --git a/sql/item_sum.h b/sql/item_sum.h new file mode 100644 index 00000000..73087340 --- /dev/null +++ b/sql/item_sum.h @@ -0,0 +1,2101 @@ +#ifndef ITEM_SUM_INCLUDED +#define ITEM_SUM_INCLUDED +/* Copyright (c) 2000, 2013 Oracle and/or its affiliates. + Copyright (c) 2008, 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* classes for sum functions */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include +#include "sql_udf.h" /* udf_handler */ + +class Item_sum; +class Aggregator_distinct; +class Aggregator_simple; + +/** + The abstract base class for the Aggregator_* classes. + It implements the data collection functions (setup/add/clear) + as either pass-through to the real functionality or + as collectors into an Unique (for distinct) structure. + + Note that update_field/reset_field are not in that + class, because they're simply not called when + GROUP BY/DISTINCT can be handled with help of index on grouped + fields (quick_group = 0); +*/ + +class Aggregator : public Sql_alloc +{ + friend class Item_sum; + friend class Item_sum_sum; + friend class Item_sum_count; + friend class Item_sum_avg; + + /* + All members are protected as this class is not usable outside of an + Item_sum descendant. + */ +protected: + /* the aggregate function class to act on */ + Item_sum *item_sum; + +public: + Aggregator (Item_sum *arg): item_sum(arg) {} + virtual ~Aggregator () = default; /* Keep gcc happy */ + + enum Aggregator_type { SIMPLE_AGGREGATOR, DISTINCT_AGGREGATOR }; + virtual Aggregator_type Aggrtype() = 0; + + /** + Called before adding the first row. + Allocates and sets up the internal aggregation structures used, + e.g. the Unique instance used to calculate distinct. + */ + virtual bool setup(THD *) = 0; + + /** + Called when we need to wipe out all the data from the aggregator : + all the values acumulated and all the state. + Cleans up the internal structures and resets them to their initial state. + */ + virtual void clear() = 0; + + /** + Called when there's a new value to be aggregated. + Updates the internal state of the aggregator to reflect the new value. + */ + virtual bool add() = 0; + + /** + Called when there are no more data and the final value is to be retrieved. + Finalises the state of the aggregator, so the final result can be retrieved. + */ + virtual void endup() = 0; + + /** Decimal value of being-aggregated argument */ + virtual my_decimal *arg_val_decimal(my_decimal * value) = 0; + /** Floating point value of being-aggregated argument */ + virtual double arg_val_real() = 0; + /** + NULLness of being-aggregated argument. + + @param use_null_value Optimization: to determine if the argument is NULL + we must, in the general case, call is_null() on it, which itself might + call val_*() on it, which might be costly. If you just have called + arg_val*(), you can pass use_null_value=true; this way, arg_is_null() + might avoid is_null() and instead do a cheap read of the Item's null_value + (updated by arg_val*()). + */ + virtual bool arg_is_null(bool use_null_value) = 0; +}; + + +class st_select_lex; +class Window_spec; + +/** + Class Item_sum is the base class used for special expressions that SQL calls + 'set functions'. These expressions are formed with the help of aggregate + functions such as SUM, MAX, GROUP_CONCAT etc. + + GENERAL NOTES + + A set function cannot be used in certain positions where expressions are + accepted. There are some quite explicable restrictions for the usage of + set functions. + + In the query: + SELECT AVG(b) FROM t1 WHERE SUM(b) > 20 GROUP by a + the usage of the set function AVG(b) is legal, while the usage of SUM(b) + is illegal. A WHERE condition must contain expressions that can be + evaluated for each row of the table. Yet the expression SUM(b) can be + evaluated only for each group of rows with the same value of column a. + In the query: + SELECT AVG(b) FROM t1 WHERE c > 30 GROUP BY a HAVING SUM(b) > 20 + both set function expressions AVG(b) and SUM(b) are legal. + + We can say that in a query without nested selects an occurrence of a + set function in an expression of the SELECT list or/and in the HAVING + clause is legal, while in the WHERE clause it's illegal. + + The general rule to detect whether a set function is legal in a query with + nested subqueries is much more complicated. + + Consider the the following query: + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL (SELECT t2.c FROM t2 WHERE SUM(t1.b) < t2.c). + The set function SUM(b) is used here in the WHERE clause of the subquery. + Nevertheless it is legal since it is under the HAVING clause of the query + to which this function relates. The expression SUM(t1.b) is evaluated + for each group defined in the main query, not for groups of the subquery. + + The problem of finding the query where to aggregate a particular + set function is not so simple as it seems to be. + + In the query: + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.c FROM t2 GROUP BY t2.c + HAVING SUM(t1.a) < t2.c) + the set function can be evaluated for both outer and inner selects. + If we evaluate SUM(t1.a) for the outer query then we get the value of t1.a + multiplied by the cardinality of a group in table t1. In this case + in each correlated subquery SUM(t1.a) is used as a constant. But we also + can evaluate SUM(t1.a) for the inner query. In this case t1.a will be a + constant for each correlated subquery and summation is performed + for each group of table t2. + (Here it makes sense to remind that the query + SELECT c FROM t GROUP BY a HAVING SUM(1) < a + is quite legal in our SQL). + + So depending on what query we assign the set function to we + can get different result sets. + + The general rule to detect the query where a set function is to be + evaluated can be formulated as follows. + Consider a set function S(E) where E is an expression with occurrences + of column references C1, ..., CN. Resolve these column references against + subqueries that contain the set function S(E). Let Q be the innermost + subquery of those subqueries. (It should be noted here that S(E) + in no way can be evaluated in the subquery embedding the subquery Q, + otherwise S(E) would refer to at least one unbound column reference) + If S(E) is used in a construct of Q where set functions are allowed then + we evaluate S(E) in Q. + Otherwise we look for a innermost subquery containing S(E) of those where + usage of S(E) is allowed. + + Let's demonstrate how this rule is applied to the following queries. + + 1. SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b + HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c + HAVING SUM(t1.a+t2.b) < t3.c)) + For this query the set function SUM(t1.a+t2.b) depends on t1.a and t2.b + with t1.a defined in the outermost query, and t2.b defined for its + subquery. The set function is in the HAVING clause of the subquery and can + be evaluated in this subquery. + + 2. SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 + WHERE t2.b > ALL (SELECT t3.c FROM t3 GROUP BY t3.c + HAVING SUM(t1.a+t2.b) < t3.c)) + Here the set function SUM(t1.a+t2.b)is in the WHERE clause of the second + subquery - the most upper subquery where t1.a and t2.b are defined. + If we evaluate the function in this subquery we violate the context rules. + So we evaluate the function in the third subquery (over table t3) where it + is used under the HAVING clause. + + 3. SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 + WHERE t2.b > ALL (SELECT t3.c FROM t3 + WHERE SUM(t1.a+t2.b) < t3.c)) + In this query evaluation of SUM(t1.a+t2.b) is not legal neither in the second + nor in the third subqueries. So this query is invalid. + + Mostly set functions cannot be nested. In the query + SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20 + the expression SUM(b) is not acceptable, though it is under a HAVING clause. + Yet it is acceptable in the query: + SELECT t.1 FROM t1 GROUP BY t1.a HAVING SUM(t1.b) > 20. + + An argument of a set function does not have to be a reference to a table + column as we saw it in examples above. This can be a more complex expression + SELECT t1.a FROM t1 GROUP BY t1.a HAVING SUM(t1.b+1) > 20. + The expression SUM(t1.b+1) has a very clear semantics in this context: + we sum up the values of t1.b+1 where t1.b varies for all values within a + group of rows that contain the same t1.a value. + + A set function for an outer query yields a constant within a subquery. So + the semantics of the query + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c + HAVING AVG(t2.c+SUM(t1.b)) > 20) + is still clear. For a group of the rows with the same t1.a values we + calculate the value of SUM(t1.b). This value 's' is substituted in the + the subquery: + SELECT t2.c FROM t2 GROUP BY t2.c HAVING AVG(t2.c+s) + than returns some result set. + + By the same reason the following query with a subquery + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c + HAVING AVG(SUM(t1.b)) > 20) + is also acceptable. + + IMPLEMENTATION NOTES + + Three methods were added to the class to check the constraints specified + in the previous section. These methods utilize several new members. + + The field 'nest_level' contains the number of the level for the subquery + containing the set function. The main SELECT is of level 0, its subqueries + are of levels 1, the subqueries of the latter are of level 2 and so on. + + The field 'aggr_level' is to contain the nest level of the subquery + where the set function is aggregated. + + The field 'max_arg_level' is for the maximum of the nest levels of the + unbound column references occurred in the set function. A column reference + is unbound within a set function if it is not bound by any subquery + used as a subexpression in this function. A column reference is bound by + a subquery if it is a reference to the column by which the aggregation + of some set function that is used in the subquery is calculated. + For the set function used in the query + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b + HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c + HAVING SUM(t1.a+t2.b) < t3.c)) + the value of max_arg_level is equal to 1 since t1.a is bound in the main + query, and t2.b is bound by the first subquery whose nest level is 1. + Obviously a set function cannot be aggregated in the subquery whose + nest level is less than max_arg_level. (Yet it can be aggregated in the + subqueries whose nest level is greater than max_arg_level.) + In the query + SELECT t.a FROM t1 HAVING AVG(t1.a+(SELECT MIN(t2.c) FROM t2)) + the value of the max_arg_level for the AVG set function is 0 since + the reference t2.c is bound in the subquery. + + The field 'max_sum_func_level' is to contain the maximum of the + nest levels of the set functions that are used as subexpressions of + the arguments of the given set function, but not aggregated in any + subquery within this set function. A nested set function s1 can be + used within set function s0 only if s1.max_sum_func_level < + s0.max_sum_func_level. Set function s1 is considered as nested + for set function s0 if s1 is not calculated in any subquery + within s0. + + A set function that is used as a subexpression in an argument of another + set function refers to the latter via the field 'in_sum_func'. + + The condition imposed on the usage of set functions are checked when + we traverse query subexpressions with the help of the recursive method + fix_fields. When we apply this method to an object of the class + Item_sum, first, on the descent, we call the method init_sum_func_check + that initialize members used at checking. Then, on the ascent, we + call the method check_sum_func that validates the set function usage + and reports an error if it is illegal. + The method register_sum_func serves to link the items for the set functions + that are aggregated in the embedding (sub)queries. Circular chains of such + functions are attached to the corresponding st_select_lex structures + through the field inner_sum_func_list. + + Exploiting the fact that the members mentioned above are used in one + recursive function we could have allocated them on the thread stack. + Yet we don't do it now. + + We assume that the nesting level of subquries does not exceed 127. + TODO: to catch queries where the limit is exceeded to make the + code clean here. + + @note + The implementation takes into account the used strategy: + - Items resolved at optimization phase return 0 from Item_sum::used_tables(). + - Items that depend on the number of join output records, but not columns of + any particular table (like COUNT(*)), returm 0 from Item_sum::used_tables(), + but still return false from Item_sum::const_item(). +*/ + +class Item_sum :public Item_func_or_sum +{ + friend class Aggregator_distinct; + friend class Aggregator_simple; + +protected: + /** + Aggregator class instance. Not set initially. Allocated only after + it is determined if the incoming data are already distinct. + */ + Aggregator *aggr; + +private: + /** + Used in making ROLLUP. Set for the ROLLUP copies of the original + Item_sum and passed to create_tmp_field() to cause it to work + over the temp table buffer that is referenced by + Item_result_field::result_field. + */ + bool force_copy_fields; + + /** + Indicates how the aggregate function was specified by the parser : + 1 if it was written as AGGREGATE(DISTINCT), + 0 if it was AGGREGATE() + */ + bool with_distinct; + + /* TRUE if this is aggregate function of a window function */ + bool window_func_sum_expr_flag; + +public: + + bool has_force_copy_fields() const { return force_copy_fields; } + bool has_with_distinct() const { return with_distinct; } + + enum Sumfunctype + { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC, + AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC, + VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC, + ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC, + CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC, + NTH_VALUE_FUNC, LEAD_FUNC, LAG_FUNC, PERCENTILE_CONT_FUNC, + PERCENTILE_DISC_FUNC, SP_AGGREGATE_FUNC, JSON_ARRAYAGG_FUNC, + JSON_OBJECTAGG_FUNC + }; + + Item **ref_by; /* pointer to a ref to the object used to register it */ + Item_sum *next; /* next in the circular chain of registered objects */ + Item_sum *in_sum_func; /* embedding set function if any */ + st_select_lex * aggr_sel; /* select where the function is aggregated */ + int8 nest_level; /* number of the nesting level of the set function */ + int8 aggr_level; /* nesting level of the aggregating subquery */ + int8 max_arg_level; /* max level of unbound column references */ + int8 max_sum_func_level;/* max level of aggregation for embedded functions */ + + /* + true (the default value) means this aggregate function can be computed + with TemporaryTableWithPartialSums algorithm (see end_update()). + false means this aggregate function needs OrderedGroupBy algorithm (see + end_write_group()). + */ + bool quick_group; + /* + This list is used by the check for mixing non aggregated fields and + sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields + directly or indirectly used under this function it as it's unclear + at the moment of fixing outer field whether it's aggregated or not. + */ + List outer_fields; + +protected: + /* + Copy of the arguments list to hold the original set of arguments. + Used in EXPLAIN EXTENDED instead of the current argument list because + the current argument list can be altered by usage of temporary tables. + */ + Item **orig_args, *tmp_orig_args[2]; + + static size_t ram_limitation(THD *thd); +public: + // Methods used by ColumnStore + Item **get_orig_args() const { return orig_args; } +public: + + void mark_as_sum_func(); + Item_sum(THD *thd): Item_func_or_sum(thd), quick_group(1) + { + mark_as_sum_func(); + init_aggregator(); + } + Item_sum(THD *thd, Item *a): Item_func_or_sum(thd, a), quick_group(1), + orig_args(tmp_orig_args) + { + mark_as_sum_func(); + init_aggregator(); + } + Item_sum(THD *thd, Item *a, Item *b): Item_func_or_sum(thd, a, b), + quick_group(1), orig_args(tmp_orig_args) + { + mark_as_sum_func(); + init_aggregator(); + } + Item_sum(THD *thd, List &list); + //Copy constructor, need to perform subselects with temporary tables + Item_sum(THD *thd, Item_sum *item); + enum Type type() const override { return SUM_FUNC_ITEM; } + virtual enum Sumfunctype sum_func () const=0; + bool is_aggr_sum_func() + { + switch (sum_func()) { + case COUNT_FUNC: + case COUNT_DISTINCT_FUNC: + case SUM_FUNC: + case SUM_DISTINCT_FUNC: + case AVG_FUNC: + case AVG_DISTINCT_FUNC: + case MIN_FUNC: + case MAX_FUNC: + case STD_FUNC: + case VARIANCE_FUNC: + case SUM_BIT_FUNC: + case UDF_SUM_FUNC: + case GROUP_CONCAT_FUNC: + case JSON_ARRAYAGG_FUNC: + return true; + default: + return false; + } + } + /** + Resets the aggregate value to its default and aggregates the current + value of its attribute(s). + */ + inline bool reset_and_add() + { + aggregator_clear(); + return aggregator_add(); + }; + + /* + Called when new group is started and results are being saved in + a temporary table. Similarly to reset_and_add() it resets the + value to its default and aggregates the value of its + attribute(s), but must also store it in result_field. + This set of methods (result_item(), reset_field, update_field()) of + Item_sum is used only if quick_group is not null. Otherwise + copy_or_same() is used to obtain a copy of this item. + */ + virtual void reset_field()=0; + /* + Called for each new value in the group, when temporary table is in use. + Similar to add(), but uses temporary table field to obtain current value, + Updated value is then saved in the field. + */ + virtual void update_field()=0; + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + null_value=1; + return FALSE; + } + virtual Item *result_item(THD *thd, Field *field); + + void update_used_tables() override; + COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) override + { + /* + Item_sum (and derivants) of the original WHERE/HAVING clauses + should already be replaced to Item_aggregate_ref by the time when + build_equal_items() is called. See Item::split_sum_func2(). + */ + DBUG_ASSERT(0); + return Item::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); + } + bool is_null() override { return null_value; } + /** + make_const() + Called if we've managed to calculate the value of this Item in + opt_sum_query(), hence it can be considered constant at all subsequent + steps. + */ + void make_const () + { + used_tables_cache= 0; + const_item_cache= true; + } + void reset_forced_const() { const_item_cache= false; } + bool const_during_execution() const override { return false; } + void print(String *str, enum_query_type query_type) override; + void fix_num_length_and_dec(); + + /** + Mark an aggregate as having no rows. + + This function is called by the execution engine to assign 'NO ROWS + FOUND' value to an aggregate item, when the underlying result set + has no rows. Such value, in a general case, may be different from + the default value of the item after 'clear()': e.g. a numeric item + may be initialized to 0 by clear() and to NULL by + no_rows_in_result(). + */ + void no_rows_in_result() override + { + set_aggregator(current_thd, with_distinct ? + Aggregator::DISTINCT_AGGREGATOR : + Aggregator::SIMPLE_AGGREGATOR); + aggregator_clear(); + } + virtual void make_unique() { force_copy_fields= TRUE; } + Item *get_tmp_table_item(THD *thd) override; + virtual Field *create_tmp_field(MEM_ROOT *root, bool group, TABLE *table); + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + return create_tmp_field(root, param->group(), table); + } + bool collect_outer_ref_processor(void *param) override; + bool init_sum_func_check(THD *thd); + bool check_sum_func(THD *thd, Item **ref); + bool register_sum_func(THD *thd, Item **ref); + st_select_lex *depended_from() + { return (nest_level == aggr_level ? 0 : aggr_sel); } + + Item *get_arg(uint i) const { return args[i]; } + Item *set_arg(uint i, THD *thd, Item *new_val); + uint get_arg_count() const { return arg_count; } + virtual Item **get_args() { return fixed() ? orig_args : args; } + + /* Initialization of distinct related members */ + void init_aggregator() + { + aggr= NULL; + with_distinct= FALSE; + force_copy_fields= FALSE; + } + + /** + Called to initialize the aggregator. + */ + + inline bool aggregator_setup(THD *thd) { return aggr->setup(thd); }; + + /** + Called to cleanup the aggregator. + */ + + inline void aggregator_clear() { aggr->clear(); } + + /** + Called to add value to the aggregator. + */ + + inline bool aggregator_add() { return aggr->add(); }; + + /* stores the declared DISTINCT flag (from the parser) */ + void set_distinct(bool distinct) + { + with_distinct= distinct; + quick_group= with_distinct ? 0 : 1; + } + + /* + Set the type of aggregation : DISTINCT or not. + + May be called multiple times. + */ + + int set_aggregator(THD *thd, Aggregator::Aggregator_type aggregator); + + virtual void clear()= 0; + virtual bool add()= 0; + virtual bool setup(THD *thd) { return false; } + + virtual bool supports_removal() const { return false; } + virtual void remove() { DBUG_ASSERT(0); } + + void cleanup() override; + bool check_vcol_func_processor(void *arg) override; + virtual void setup_window_func(THD *thd, Window_spec *window_spec) {} + void mark_as_window_func_sum_expr() { window_func_sum_expr_flag= true; } + bool is_window_func_sum_expr() { return window_func_sum_expr_flag; } + virtual void setup_caches(THD *thd) {}; + virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); } +}; + + +class Unique; + + +/** + The distinct aggregator. + Implements AGGFN (DISTINCT ..) + Collects all the data into an Unique (similarly to what Item_sum + does currently when with_distinct=true) and then (if applicable) iterates over + the list of unique values and pumps them back into its object +*/ + +class Aggregator_distinct : public Aggregator +{ + friend class Item_sum_sum; + + /* + flag to prevent consecutive runs of endup(). Normally in endup there are + expensive calculations (like walking the distinct tree for example) + which we must do only once if there are no data changes. + We can re-use the data for the second and subsequent val_xxx() calls. + endup_done set to TRUE also means that the calculated values for + the aggregate functions are correct and don't need recalculation. + */ + bool endup_done; + + /* + Used depending on the type of the aggregate function and the presence of + blob columns in it: + - For COUNT(DISTINCT) and no blob fields this points to a real temporary + table. It's used as a hash table. + - For AVG/SUM(DISTINCT) or COUNT(DISTINCT) with blob fields only the + in-memory data structure of a temporary table is constructed. + It's used by the Field classes to transform data into row format. + */ + TABLE *table; + + /* + An array of field lengths on row allocated and used only for + COUNT(DISTINCT) with multiple columns and no blobs. Used in + Aggregator_distinct::composite_key_cmp (called from Unique to compare + nodes + */ + uint32 *field_lengths; + + /* + Used in conjunction with 'table' to support the access to Field classes + for COUNT(DISTINCT). Needed by copy_fields()/copy_funcs(). + */ + TMP_TABLE_PARAM *tmp_table_param; + + /* + If there are no blobs in the COUNT(DISTINCT) arguments, we can use a tree, + which is faster than heap table. In that case, we still use the table + to help get things set up, but we insert nothing in it. + For AVG/SUM(DISTINCT) we always use this tree (as it takes a single + argument) to get the distinct rows. + */ + Unique *tree; + + /* + The length of the temp table row. Must be a member of the class as it + gets passed down to simple_raw_key_cmp () as a compare function argument + to Unique. simple_raw_key_cmp () is used as a fast comparison function + when the entire row can be binary compared. + */ + uint tree_key_length; + + /* + Set to true if the result is known to be always NULL. + If set deactivates creation and usage of the temporary table (in the + 'table' member) and the Unique instance (in the 'tree' member) as well as + the calculation of the final value on the first call to + Item_[sum|avg|count]::val_xxx(). + */ + bool always_null; + + /** + When feeding back the data in endup() from Unique/temp table back to + Item_sum::add() methods we must read the data from Unique (and not + recalculate the functions that are given as arguments to the aggregate + function. + This flag is to tell the arg_*() methods to take the data from the Unique + instead of calling the relevant val_..() method. + */ + bool use_distinct_values; + +public: + Aggregator_distinct (Item_sum *sum) : + Aggregator(sum), table(NULL), tmp_table_param(NULL), tree(NULL), + always_null(false), use_distinct_values(false) {} + virtual ~Aggregator_distinct (); + Aggregator_type Aggrtype() { return DISTINCT_AGGREGATOR; } + + bool setup(THD *); + void clear(); + bool add(); + void endup(); + virtual my_decimal *arg_val_decimal(my_decimal * value); + virtual double arg_val_real(); + virtual bool arg_is_null(bool use_null_value); + + bool unique_walk_function(void *element); + bool unique_walk_function_for_count(void *element); + static int composite_key_cmp(void* arg, uchar* key1, uchar* key2); +}; + + +/** + The pass-through aggregator. + Implements AGGFN (DISTINCT ..) by knowing it gets distinct data on input. + So it just pumps them back to the Item_sum descendant class. +*/ +class Aggregator_simple : public Aggregator +{ +public: + + Aggregator_simple (Item_sum *sum) : + Aggregator(sum) {} + Aggregator_type Aggrtype() { return Aggregator::SIMPLE_AGGREGATOR; } + + bool setup(THD * thd) { return item_sum->setup(thd); } + void clear() { item_sum->clear(); } + bool add() { return item_sum->add(); } + void endup() {}; + virtual my_decimal *arg_val_decimal(my_decimal * value); + virtual double arg_val_real(); + virtual bool arg_is_null(bool use_null_value); +}; + + +class Item_sum_num :public Item_sum +{ +public: + Item_sum_num(THD *thd): Item_sum(thd) {} + Item_sum_num(THD *thd, Item *item_par): + Item_sum(thd, item_par) {} + Item_sum_num(THD *thd, Item *a, Item* b): + Item_sum(thd, a, b) {} + Item_sum_num(THD *thd, List &list): + Item_sum(thd, list) {} + Item_sum_num(THD *thd, Item_sum_num *item): + Item_sum(thd, item) {} + bool fix_fields(THD *, Item **); +}; + + +class Item_sum_double :public Item_sum_num +{ +public: + Item_sum_double(THD *thd): Item_sum_num(thd) {} + Item_sum_double(THD *thd, Item *item_par): Item_sum_num(thd, item_par) {} + Item_sum_double(THD *thd, List &list): Item_sum_num(thd, list) {} + Item_sum_double(THD *thd, Item_sum_double *item) :Item_sum_num(thd, item) {} + longlong val_int() override + { + return val_int_from_real(); + } + String *val_str(String*str) override + { + return val_string_from_real(str); + } + my_decimal *val_decimal(my_decimal *to) override + { + return val_decimal_from_real(to); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return get_date_from_real(thd, ltime, fuzzydate); + } + const Type_handler *type_handler() const override + { return &type_handler_double; } +}; + + +class Item_sum_int :public Item_sum_num +{ +public: + Item_sum_int(THD *thd): Item_sum_num(thd) {} + Item_sum_int(THD *thd, Item *item_par): Item_sum_num(thd, item_par) {} + Item_sum_int(THD *thd, List &list): Item_sum_num(thd, list) {} + Item_sum_int(THD *thd, Item_sum_int *item) :Item_sum_num(thd, item) {} + double val_real() override { DBUG_ASSERT(fixed()); return (double) val_int(); } + String *val_str(String*str) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return get_date_from_int(thd, ltime, fuzzydate); + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=21; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value=0; + return FALSE; } +}; + + +class Item_sum_sum :public Item_sum_num, + public Type_handler_hybrid_field_type +{ +protected: + bool direct_added; + bool direct_reseted_field; + bool direct_sum_is_null; + double direct_sum_real; + double sum; + my_decimal direct_sum_decimal; + my_decimal dec_buffs[2]; + uint curr_dec_buff; + bool fix_length_and_dec(THD *thd) override; + +public: + Item_sum_sum(THD *thd, Item *item_par, bool distinct): + Item_sum_num(thd, item_par), direct_added(FALSE), + direct_reseted_field(FALSE) + { + set_distinct(distinct); + } + Item_sum_sum(THD *thd, Item_sum_sum *item); + enum Sumfunctype sum_func() const override + { + return has_with_distinct() ? SUM_DISTINCT_FUNC : SUM_FUNC; + } + void cleanup() override; + void direct_add(my_decimal *add_sum_decimal); + void direct_add(double add_sum_real, bool add_sum_is_null); + void clear() override; + bool add() override; + double val_real() override; + longlong val_int() override; + String *val_str(String*str) override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + void fix_length_and_dec_double(); + void fix_length_and_dec_decimal(); + void reset_field() override; + void update_field() override; + void no_rows_in_result() override {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name_distinct= { STRING_WITH_LEN("sum(distinct ")}; + static LEX_CSTRING name_normal= { STRING_WITH_LEN("sum(") }; + return has_with_distinct() ? name_distinct : name_normal; + } + Item *copy_or_same(THD* thd) override; + void remove() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + bool supports_removal() const override + { + return true; + } + +private: + void add_helper(bool perform_removal); + ulonglong count; +}; + + +class Item_sum_count :public Item_sum_int +{ + bool direct_counted; + bool direct_reseted_field; + longlong direct_count; + longlong count; + + friend class Aggregator_distinct; + + void clear() override; + bool add() override; + void cleanup() override; + void remove() override; + +public: + Item_sum_count(THD *thd, Item *item_par): + Item_sum_int(thd, item_par), direct_counted(FALSE), + direct_reseted_field(FALSE), count(0) + {} + + /** + Constructs an instance for COUNT(DISTINCT) + + @param list a list of the arguments to the aggregate function + + This constructor is called by the parser only for COUNT (DISTINCT). + */ + + Item_sum_count(THD *thd, List &list): + Item_sum_int(thd, list), direct_counted(FALSE), + direct_reseted_field(FALSE), count(0) + { + set_distinct(TRUE); + } + Item_sum_count(THD *thd, Item_sum_count *item): + Item_sum_int(thd, item), direct_counted(FALSE), + direct_reseted_field(FALSE), count(item->count) + {} + enum Sumfunctype sum_func () const override + { + return has_with_distinct() ? COUNT_DISTINCT_FUNC : COUNT_FUNC; + } + void no_rows_in_result() override { count=0; } + void make_const(longlong count_arg) + { + count=count_arg; + Item_sum::make_const(); + } + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + longlong val_int() override; + void reset_field() override; + void update_field() override; + void direct_add(longlong add_count); + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name_distinct= { STRING_WITH_LEN("count(distinct ")}; + static LEX_CSTRING name_normal= { STRING_WITH_LEN("count(") }; + return has_with_distinct() ? name_distinct : name_normal; + } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + bool supports_removal() const override + { + return true; + } +}; + + +class Item_sum_avg :public Item_sum_sum +{ +public: + // TODO-cvicentiu given that Item_sum_sum now uses a counter of its own, in + // order to implement remove(), it is possible to remove this member. + ulonglong count; + uint prec_increment; + uint f_precision, f_scale, dec_bin_size; + + Item_sum_avg(THD *thd, Item *item_par, bool distinct): + Item_sum_sum(thd, item_par, distinct), count(0) + {} + Item_sum_avg(THD *thd, Item_sum_avg *item) + :Item_sum_sum(thd, item), count(item->count), + prec_increment(item->prec_increment) {} + + void fix_length_and_dec_double(); + void fix_length_and_dec_decimal(); + bool fix_length_and_dec(THD *thd) override; + enum Sumfunctype sum_func () const override + { + return has_with_distinct() ? AVG_DISTINCT_FUNC : AVG_FUNC; + } + void clear() override; + bool add() override; + void remove() override; + double val_real() override; + // In SPs we might force the "wrong" type with select into a declare variable + longlong val_int() override { return val_int_from_real(); } + my_decimal *val_decimal(my_decimal *) override; + String *val_str(String *str) override; + void reset_field() override; + void update_field() override; + Item *result_item(THD *thd, Field *field) override; + void no_rows_in_result() override {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name_distinct= { STRING_WITH_LEN("avg(distinct ")}; + static LEX_CSTRING name_normal= { STRING_WITH_LEN("avg(") }; + return has_with_distinct() ? name_distinct : name_normal; + } + Item *copy_or_same(THD* thd) override; + Field *create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) override; + void cleanup() override + { + count= 0; + Item_sum_sum::cleanup(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + bool supports_removal() const override + { + return true; + } +}; + + +/* + variance(a) = + + = sum (ai - avg(a))^2 / count(a) ) + = sum (ai^2 - 2*ai*avg(a) + avg(a)^2) / count(a) + = (sum(ai^2) - sum(2*ai*avg(a)) + sum(avg(a)^2))/count(a) = + = (sum(ai^2) - 2*avg(a)*sum(a) + count(a)*avg(a)^2)/count(a) = + = (sum(ai^2) - 2*sum(a)*sum(a)/count(a) + count(a)*sum(a)^2/count(a)^2 )/count(a) = + = (sum(ai^2) - 2*sum(a)^2/count(a) + sum(a)^2/count(a) )/count(a) = + = (sum(ai^2) - sum(a)^2/count(a))/count(a) + +But, this falls prey to catastrophic cancellation. Instead, use the recurrence formulas + + M_{1} = x_{1}, ~ M_{k} = M_{k-1} + (x_{k} - M_{k-1}) / k newline + S_{1} = 0, ~ S_{k} = S_{k-1} + (x_{k} - M_{k-1}) times (x_{k} - M_{k}) newline + for 2 <= k <= n newline + ital variance = S_{n} / (n-1) + +*/ + +class Stddev +{ + double m_m; + double m_s; + ulonglong m_count; +public: + Stddev() :m_m(0), m_s(0), m_count(0) { } + Stddev(double nr) :m_m(nr), m_s(0.0), m_count(1) { } + Stddev(const uchar *); + void to_binary(uchar *) const; + void recurrence_next(double nr); + double result(bool is_simple_variance); + ulonglong count() const { return m_count; } + static uint32 binary_size() + { + return (uint32) (sizeof(double) * 2 + sizeof(ulonglong)); + }; +}; + + + +class Item_sum_variance :public Item_sum_double +{ + Stddev m_stddev; + bool fix_length_and_dec(THD *thd) override; + +public: + uint sample; + uint prec_increment; + + Item_sum_variance(THD *thd, Item *item_par, uint sample_arg): + Item_sum_double(thd, item_par), + sample(sample_arg) + {} + Item_sum_variance(THD *thd, Item_sum_variance *item); + Sumfunctype sum_func () const override { return VARIANCE_FUNC; } + void fix_length_and_dec_double(); + void fix_length_and_dec_decimal(); + void clear() override final; + bool add() override final; + double val_real() override; + void reset_field() override final; + void update_field() override final; + Item *result_item(THD *thd, Field *field) override; + void no_rows_in_result() override final {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name_sample= { STRING_WITH_LEN("var_samp(")}; + static LEX_CSTRING name_normal= { STRING_WITH_LEN("variance(") }; + return sample ? name_sample : name_normal; + } + Item *copy_or_same(THD* thd) override; + Field *create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) override + final; + void cleanup() override final + { + m_stddev= Stddev(); + Item_sum_double::cleanup(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* + standard_deviation(a) = sqrt(variance(a)) +*/ + +class Item_sum_std final :public Item_sum_variance +{ + public: + Item_sum_std(THD *thd, Item *item_par, uint sample_arg): + Item_sum_variance(thd, item_par, sample_arg) {} + Item_sum_std(THD *thd, Item_sum_std *item) + :Item_sum_variance(thd, item) + {} + enum Sumfunctype sum_func () const override final { return STD_FUNC; } + double val_real() override final; + Item *result_item(THD *thd, Field *field) override final; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING std_name= {STRING_WITH_LEN("std(") }; + static LEX_CSTRING stddev_samp_name= {STRING_WITH_LEN("stddev_samp(") }; + return sample ? stddev_samp_name : std_name; + } + Item *copy_or_same(THD* thd) override final; + Item *get_copy(THD *thd) override final + { return get_item_copy(thd, this); } +}; + + +class Item_sum_hybrid : public Item_sum, + public Type_handler_hybrid_field_type +{ +public: + Item_sum_hybrid(THD *thd, Item *item_par): + Item_sum(thd, item_par), + Type_handler_hybrid_field_type(&type_handler_slonglong) + { collation.set(&my_charset_bin); } + Item_sum_hybrid(THD *thd, Item *a, Item *b): + Item_sum(thd, a, b), + Type_handler_hybrid_field_type(&type_handler_slonglong) + { collation.set(&my_charset_bin); } + Item_sum_hybrid(THD *thd, Item_sum_hybrid *item) + :Item_sum(thd, item), + Type_handler_hybrid_field_type(item) + { } + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + bool fix_length_and_dec_generic(); + bool fix_length_and_dec_numeric(const Type_handler *h); + bool fix_length_and_dec_string(); +}; + + +// This class is a string or number function depending on num_func +class Arg_comparator; +class Item_cache; +class Item_sum_min_max :public Item_sum_hybrid +{ +protected: + bool direct_added; + Item *direct_item; + Item_cache *value, *arg_cache; + Arg_comparator *cmp; + int cmp_sign; + bool was_values; // Set if we have found at least one row (for max/min only) + bool was_null_value; + +public: + Item_sum_min_max(THD *thd, Item *item_par,int sign): + Item_sum_hybrid(thd, item_par), + direct_added(FALSE), value(0), arg_cache(0), cmp(0), + cmp_sign(sign), was_values(TRUE) + { collation.set(&my_charset_bin); } + Item_sum_min_max(THD *thd, Item_sum_min_max *item) + :Item_sum_hybrid(thd, item), + direct_added(FALSE), value(item->value), arg_cache(0), + cmp_sign(item->cmp_sign), was_values(item->was_values) + { } + bool fix_fields(THD *, Item **) override; + bool fix_length_and_dec(THD *thd) override; + void setup_hybrid(THD *thd, Item *item, Item *value_arg); + void clear() override; + void direct_add(Item *item); + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + void reset_field() override; + String *val_str(String *) override; + bool val_native(THD *thd, Native *) override; + const Type_handler *real_type_handler() const override + { + return get_arg(0)->real_type_handler(); + } + const TYPELIB *get_typelib() const override { return args[0]->get_typelib(); } + void update_field() override; + void min_max_update_str_field(); + void min_max_update_real_field(); + void min_max_update_int_field(); + void min_max_update_decimal_field(); + void min_max_update_native_field(); + void cleanup() override; + bool any_value() { return was_values; } + void no_rows_in_result() override; + void restore_to_before_no_rows_in_result() override; + Field *create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) override; + void setup_caches(THD *thd) override + { setup_hybrid(thd, arguments()[0], NULL); } +}; + + +class Item_sum_min final :public Item_sum_min_max +{ +public: + Item_sum_min(THD *thd, Item *item_par): Item_sum_min_max(thd, item_par, 1) {} + Item_sum_min(THD *thd, Item_sum_min *item) :Item_sum_min_max(thd, item) {} + enum Sumfunctype sum_func () const override {return MIN_FUNC;} + + bool add() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING sum_name= {STRING_WITH_LEN("min(") }; + return sum_name; + } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_max final :public Item_sum_min_max +{ +public: + Item_sum_max(THD *thd, Item *item_par): Item_sum_min_max(thd, item_par, -1) {} + Item_sum_max(THD *thd, Item_sum_max *item) :Item_sum_min_max(thd, item) {} + enum Sumfunctype sum_func () const override {return MAX_FUNC;} + + bool add() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING sum_name= {STRING_WITH_LEN("max(") }; + return sum_name; + } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_bit :public Item_sum_int +{ +public: + Item_sum_bit(THD *thd, Item *item_par, ulonglong reset_arg): + Item_sum_int(thd, item_par), reset_bits(reset_arg), bits(reset_arg), + as_window_function(FALSE), num_values_added(0) {} + Item_sum_bit(THD *thd, Item_sum_bit *item): + Item_sum_int(thd, item), reset_bits(item->reset_bits), bits(item->bits), + as_window_function(item->as_window_function), + num_values_added(item->num_values_added) + { + if (as_window_function) + memcpy(bit_counters, item->bit_counters, sizeof(bit_counters)); + } + enum Sumfunctype sum_func () const override { return SUM_BIT_FUNC;} + void clear() override; + longlong val_int() override; + void reset_field() override; + void update_field() override; + const Type_handler *type_handler() const override + { return &type_handler_ulonglong; } + bool fix_length_and_dec(THD *thd) override + { + if (args[0]->check_type_can_return_int(func_name_cstring())) + return true; + decimals= 0; max_length=21; unsigned_flag= 1; + base_flags&= ~item_base_t::MAYBE_NULL; + null_value= 0; + return FALSE; + } + void cleanup() override + { + bits= reset_bits; + if (as_window_function) + clear_as_window(); + Item_sum_int::cleanup(); + } + void setup_window_func(THD *thd __attribute__((unused)), + Window_spec *window_spec __attribute__((unused))) + override + { + as_window_function= TRUE; + clear_as_window(); + } + void remove() override + { + if (as_window_function) + { + remove_as_window(args[0]->val_int()); + return; + } + // Unless we're counting bits, we can not remove anything. + DBUG_ASSERT(0); + } + + bool supports_removal() const override + { + return true; + } + +protected: + enum bit_counters { NUM_BIT_COUNTERS= 64 }; + ulonglong reset_bits,bits; + /* + Marks whether the function is to be computed as a window function. + */ + bool as_window_function; + // When used as an aggregate window function, we need to store + // this additional information. + ulonglong num_values_added; + ulonglong bit_counters[NUM_BIT_COUNTERS]; + bool add_as_window(ulonglong value); + bool remove_as_window(ulonglong value); + bool clear_as_window(); + virtual void set_bits_from_counters()= 0; +}; + + +class Item_sum_or final :public Item_sum_bit +{ +public: + Item_sum_or(THD *thd, Item *item_par): Item_sum_bit(thd, item_par, 0) {} + Item_sum_or(THD *thd, Item_sum_or *item) :Item_sum_bit(thd, item) {} + bool add() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING sum_name= {STRING_WITH_LEN("bit_or(") }; + return sum_name; + } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + +private: + void set_bits_from_counters() override; +}; + + +class Item_sum_and final :public Item_sum_bit +{ +public: + Item_sum_and(THD *thd, Item *item_par): + Item_sum_bit(thd, item_par, ULONGLONG_MAX) {} + Item_sum_and(THD *thd, Item_sum_and *item) :Item_sum_bit(thd, item) {} + bool add() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING sum_min_name= {STRING_WITH_LEN("bit_and(") }; + return sum_min_name; + } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + +private: + void set_bits_from_counters() override; +}; + +class Item_sum_xor final :public Item_sum_bit +{ +public: + Item_sum_xor(THD *thd, Item *item_par): Item_sum_bit(thd, item_par, 0) {} + Item_sum_xor(THD *thd, Item_sum_xor *item) :Item_sum_bit(thd, item) {} + bool add() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING sum_min_name= {STRING_WITH_LEN("bit_xor(") }; + return sum_min_name; + } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + +private: + void set_bits_from_counters() override; +}; + +class sp_head; +class sp_name; +class Query_arena; +struct st_sp_security_context; + +/* + Item_sum_sp handles STORED AGGREGATE FUNCTIONS + + Each Item_sum_sp represents a custom aggregate function. Inside the + function's body, we require at least one occurrence of FETCH GROUP NEXT ROW + instruction. This cursor is what makes custom stored aggregates possible. + + During computation the function's add method is called. This in turn performs + an execution of the function. The function will execute from the current + function context (and instruction), if one exists, or from the start if not. + See Item_sp for more details. + + Upon encounter of FETCH GROUP NEXT ROW instruction, the function will pause + execution. We assume that the user has performed the necessary additions for + a row, between two encounters of FETCH GROUP NEXT ROW. + + Example: + create aggregate function f1(x INT) returns int + begin + declare continue handler for not found return s; + declare s int default 0 + loop + fetch group next row; + set s = s + x; + end loop; + end + + The function will always stop after an encounter of FETCH GROUP NEXT ROW, + except (!) on first encounter, as the value for the first row in the + group is already set in the argument x. This behaviour is done so when + a user writes a function, he should "logically" include FETCH GROUP NEXT ROW + before any "add" instructions in the stored function. This means however that + internally, the first occurrence doesn't stop the function. See the + implementation of FETCH GROUP NEXT ROW for details as to how it happens. + + Either way, one should assume that after calling "Item_sum_sp::add()" that + the values for that particular row have been added to the aggregation. + + To produce values for val_xxx methods we need an extra syntactic construct. + We require a continue handler when "no more rows are available". val_xxx + methods force a function return by executing the function again, while + setting a server flag that no more rows have been found. This implies + that val_xxx methods should only be called once per group however. + + Example: + DECLARE CONTINUE HANDLER FOR NOT FOUND RETURN ret_val; +*/ +class Item_sum_sp :public Item_sum, + public Item_sp +{ + private: + bool execute(); + +public: + Item_sum_sp(THD *thd, Name_resolution_context *context_arg, sp_name *name, + sp_head *sp); + + Item_sum_sp(THD *thd, Name_resolution_context *context_arg, sp_name *name, + sp_head *sp, List &list); + Item_sum_sp(THD *thd, Item_sum_sp *item); + + enum Sumfunctype sum_func () const override + { + return SP_AGGREGATE_FUNC; + } + Field *create_field_for_create_select(MEM_ROOT *root, TABLE *table) override + { + return create_table_field_from_handler(root, table); + } + bool fix_length_and_dec(THD *thd) override; + bool fix_fields(THD *thd, Item **ref) override; + LEX_CSTRING func_name_cstring() const override; + const Type_handler *type_handler() const override; + bool add() override; + + /* val_xx functions */ + longlong val_int() override + { + if(execute()) + return 0; + return sp_result_field->val_int(); + } + + double val_real() override + { + if(execute()) + return 0.0; + return sp_result_field->val_real(); + } + + my_decimal *val_decimal(my_decimal *dec_buf) override + { + if(execute()) + return NULL; + return sp_result_field->val_decimal(dec_buf); + } + + bool val_native(THD *thd, Native *to) override + { + return (null_value= execute()) || sp_result_field->val_native(to); + } + + String *val_str(String *str) override + { + String buf; + char buff[20]; + buf.set(buff, 20, str->charset()); + buf.length(0); + if (execute()) + return NULL; + /* + result_field will set buf pointing to internal buffer + of the resul_field. Due to this it will change any time + when SP is executed. In order to prevent occasional + corruption of returned value, we make here a copy. + */ + sp_result_field->val_str(&buf); + str->copy(buf); + return str; + } + void reset_field() override{DBUG_ASSERT(0);} + void update_field() override{DBUG_ASSERT(0);} + void clear() override; + void cleanup() override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return execute() || sp_result_field->get_date(ltime, fuzzydate); + } + inline Field *get_sp_result_field() + { + return sp_result_field; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + Item *copy_or_same(THD *thd) override; +}; + +/* Items to get the value of a stored sum function */ + +class Item_sum_field :public Item +{ +protected: + Field *field; +public: + Item_sum_field(THD *thd, Item_sum *item) + :Item(thd), field(item->result_field) + { + name= item->name; + set_maybe_null(); + decimals= item->decimals; + max_length= item->max_length; + unsigned_flag= item->unsigned_flag; + } + table_map used_tables() const override { return (table_map) 1L; } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + return create_tmp_field_ex_simple(root, table, src, param); + } + void save_in_result_field(bool no_conversions) override { DBUG_ASSERT(0); } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(name.str, arg, VCOL_IMPOSSIBLE); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } +}; + + +class Item_avg_field :public Item_sum_field +{ +protected: + uint prec_increment; +public: + Item_avg_field(THD *thd, Item_sum_avg *item) + :Item_sum_field(thd, item), prec_increment(item->prec_increment) + { } + enum Type type() const override { return FIELD_AVG_ITEM; } + bool is_null() override { update_null_value(); return null_value; } +}; + + +class Item_avg_field_double :public Item_avg_field +{ +public: + Item_avg_field_double(THD *thd, Item_sum_avg *item) + :Item_avg_field(thd, item) + { } + const Type_handler *type_handler() const override + { return &type_handler_double; } + longlong val_int() override { return val_int_from_real(); } + my_decimal *val_decimal(my_decimal *dec) override + { return val_decimal_from_real(dec); } + String *val_str(String *str) override + { return val_string_from_real(str); } + double val_real() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_avg_field_decimal :public Item_avg_field +{ + uint f_precision, f_scale, dec_bin_size; +public: + Item_avg_field_decimal(THD *thd, Item_sum_avg *item) + :Item_avg_field(thd, item), + f_precision(item->f_precision), + f_scale(item->f_scale), + dec_bin_size(item->dec_bin_size) + { } + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } + double val_real() override + { + return VDec(this).to_double(); + } + longlong val_int() override + { + return VDec(this).to_longlong(unsigned_flag); + } + String *val_str(String *str) override + { + return VDec(this).to_string_round(str, decimals); + } + my_decimal *val_decimal(my_decimal *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_variance_field :public Item_sum_field +{ + uint sample; +public: + Item_variance_field(THD *thd, Item_sum_variance *item) + :Item_sum_field(thd, item), sample(item->sample) + { } + enum Type type() const override {return FIELD_VARIANCE_ITEM; } + double val_real() override; + longlong val_int() override { return val_int_from_real(); } + String *val_str(String *str) override + { return val_string_from_real(str); } + my_decimal *val_decimal(my_decimal *dec_buf) override + { return val_decimal_from_real(dec_buf); } + bool is_null() override { update_null_value(); return null_value; } + const Type_handler *type_handler() const override + { return &type_handler_double; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_std_field :public Item_variance_field +{ +public: + Item_std_field(THD *thd, Item_sum_std *item) + :Item_variance_field(thd, item) + { } + enum Type type() const override { return FIELD_STD_ITEM; } + double val_real() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + User defined aggregates +*/ + +#ifdef HAVE_DLOPEN + +class Item_udf_sum : public Item_sum +{ +protected: + udf_handler udf; + +public: + Item_udf_sum(THD *thd, udf_func *udf_arg): + Item_sum(thd), udf(udf_arg) + { quick_group=0; } + Item_udf_sum(THD *thd, udf_func *udf_arg, List &list): + Item_sum(thd, list), udf(udf_arg) + { quick_group=0;} + Item_udf_sum(THD *thd, Item_udf_sum *item) + :Item_sum(thd, item), udf(item->udf) + { udf.not_original= TRUE; } + LEX_CSTRING func_name_cstring() const override + { + const char *tmp= udf.name(); + return {tmp, strlen(tmp) }; + } + bool fix_fields(THD *thd, Item **ref) override + { + DBUG_ASSERT(fixed() == 0); + + if (init_sum_func_check(thd)) + return TRUE; + + base_flags|= item_base_t::FIXED; + /* + We set const_item_cache to false in constructors. + It can be later changed to "true", in a Item_sum::make_const() call. + No make_const() calls should have happened so far. + */ + DBUG_ASSERT(!const_item_cache); + if (udf.fix_fields(thd, this, this->arg_count, this->args)) + return TRUE; + /** + The above call for udf.fix_fields() updates + the Used_tables_and_const_cache part of "this" as if it was a regular + non-aggregate UDF function and can change both const_item_cache and + used_tables_cache members. + - The used_tables_cache will be re-calculated in update_used_tables() + which is called from check_sum_func() below. So we don't care about + its current value. + - The const_item_cache must stay "false" until a Item_sum::make_const() + call happens, if ever. So we need to reset const_item_cache back to + "false" here. + */ + const_item_cache= false; + memcpy (orig_args, args, sizeof (Item *) * arg_count); + return check_sum_func(thd, ref); + } + enum Sumfunctype sum_func () const override { return UDF_SUM_FUNC; } + virtual bool have_field_update(void) const { return 0; } + + void clear() override; + bool add() override; + bool supports_removal() const override; + void remove() override; + void reset_field() override {}; + void update_field() override {} + void cleanup() override; + void print(String *str, enum_query_type query_type) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } +}; + + +class Item_sum_udf_float :public Item_udf_sum +{ + public: + Item_sum_udf_float(THD *thd, udf_func *udf_arg): + Item_udf_sum(thd, udf_arg) {} + Item_sum_udf_float(THD *thd, udf_func *udf_arg, List &list): + Item_udf_sum(thd, udf_arg, list) {} + Item_sum_udf_float(THD *thd, Item_sum_udf_float *item) + :Item_udf_sum(thd, item) {} + longlong val_int() override { return val_int_from_real(); } + double val_real() override; + String *val_str(String*str) override; + my_decimal *val_decimal(my_decimal *) override; + const Type_handler *type_handler() const override + { return &type_handler_double; } + bool fix_length_and_dec(THD *thd) override + { fix_num_length_and_dec(); return FALSE; } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_udf_int :public Item_udf_sum +{ +public: + Item_sum_udf_int(THD *thd, udf_func *udf_arg): + Item_udf_sum(thd, udf_arg) {} + Item_sum_udf_int(THD *thd, udf_func *udf_arg, List &list): + Item_udf_sum(thd, udf_arg, list) {} + Item_sum_udf_int(THD *thd, Item_sum_udf_int *item) + :Item_udf_sum(thd, item) {} + longlong val_int() override; + double val_real() override + { DBUG_ASSERT(fixed()); return (double) Item_sum_udf_int::val_int(); } + String *val_str(String*str) override; + my_decimal *val_decimal(my_decimal *) override; + const Type_handler *type_handler() const override + { + if (unsigned_flag) + return &type_handler_ulonglong; + return &type_handler_slonglong; + } + bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=21; return FALSE; } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_udf_str :public Item_udf_sum +{ +public: + Item_sum_udf_str(THD *thd, udf_func *udf_arg): + Item_udf_sum(thd, udf_arg) {} + Item_sum_udf_str(THD *thd, udf_func *udf_arg, List &list): + Item_udf_sum(thd, udf_arg, list) {} + Item_sum_udf_str(THD *thd, Item_sum_udf_str *item) + :Item_udf_sum(thd, item) {} + String *val_str(String *) override; + double val_real() override + { + int err_not_used; + char *end_not_used; + String *res; + res=val_str(&str_value); + return res ? res->charset()->strntod((char*) res->ptr(),res->length(), + &end_not_used, &err_not_used) : 0.0; + } + longlong val_int() override + { + int err_not_used; + char *end; + String *res; + CHARSET_INFO *cs; + + if (!(res= val_str(&str_value))) + return 0; /* Null value */ + cs= res->charset(); + end= (char*) res->ptr()+res->length(); + return cs->strtoll10(res->ptr(), &end, &err_not_used); + } + my_decimal *val_decimal(my_decimal *dec) override; + const Type_handler *type_handler() const override + { return string_type_handler(); } + bool fix_length_and_dec(THD *thd) override; + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_udf_decimal :public Item_udf_sum +{ +public: + Item_sum_udf_decimal(THD *thd, udf_func *udf_arg): + Item_udf_sum(thd, udf_arg) {} + Item_sum_udf_decimal(THD *thd, udf_func *udf_arg, List &list): + Item_udf_sum(thd, udf_arg, list) {} + Item_sum_udf_decimal(THD *thd, Item_sum_udf_decimal *item) + :Item_udf_sum(thd, item) {} + String *val_str(String *str) override + { + return VDec(this).to_string_round(str, decimals); + } + double val_real() override + { + return VDec(this).to_double(); + } + longlong val_int() override + { + return VDec(this).to_longlong(unsigned_flag); + } + my_decimal *val_decimal(my_decimal *) override; + const Type_handler *type_handler() const override + { return &type_handler_newdecimal; } + bool fix_length_and_dec(THD *thd) override + { fix_num_length_and_dec(); return FALSE; } + Item *copy_or_same(THD* thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#else /* Dummy functions to get yy_*.cc files compiled */ + +class Item_sum_udf_float :public Item_sum_double +{ + public: + Item_sum_udf_float(THD *thd, udf_func *udf_arg): + Item_sum_double(thd) {} + Item_sum_udf_float(THD *thd, udf_func *udf_arg, List &list): + Item_sum_double(thd) {} + Item_sum_udf_float(THD *thd, Item_sum_udf_float *item) + :Item_sum_double(thd, item) {} + enum Sumfunctype sum_func () const { return UDF_SUM_FUNC; } + double val_real() { DBUG_ASSERT(fixed()); return 0.0; } + void clear() {} + bool add() { return 0; } + void reset_field() { DBUG_ASSERT(0); }; + void update_field() {} +}; + + +class Item_sum_udf_int :public Item_sum_double +{ +public: + Item_sum_udf_int(THD *thd, udf_func *udf_arg): + Item_sum_double(thd) {} + Item_sum_udf_int(THD *thd, udf_func *udf_arg, List &list): + Item_sum_double(thd) {} + Item_sum_udf_int(THD *thd, Item_sum_udf_int *item) + :Item_sum_double(thd, item) {} + enum Sumfunctype sum_func () const { return UDF_SUM_FUNC; } + longlong val_int() { DBUG_ASSERT(fixed()); return 0; } + double val_real() { DBUG_ASSERT(fixed()); return 0; } + void clear() {} + bool add() { return 0; } + void reset_field() { DBUG_ASSERT(0); }; + void update_field() {} +}; + + +class Item_sum_udf_decimal :public Item_sum_double +{ + public: + Item_sum_udf_decimal(THD *thd, udf_func *udf_arg): + Item_sum_double(thd) {} + Item_sum_udf_decimal(THD *thd, udf_func *udf_arg, List &list): + Item_sum_double(thd) {} + Item_sum_udf_decimal(THD *thd, Item_sum_udf_float *item) + :Item_sum_double(thd, item) {} + enum Sumfunctype sum_func () const { return UDF_SUM_FUNC; } + double val_real() { DBUG_ASSERT(fixed()); return 0.0; } + my_decimal *val_decimal(my_decimal *) { DBUG_ASSERT(fixed()); return 0; } + void clear() {} + bool add() { return 0; } + void reset_field() { DBUG_ASSERT(0); }; + void update_field() {} +}; + + +class Item_sum_udf_str :public Item_sum_double +{ +public: + Item_sum_udf_str(THD *thd, udf_func *udf_arg): + Item_sum_double(thd) {} + Item_sum_udf_str(THD *thd, udf_func *udf_arg, List &list): + Item_sum_double(thd) {} + Item_sum_udf_str(THD *thd, Item_sum_udf_str *item) + :Item_sum_double(thd, item) {} + String *val_str(String *) + { DBUG_ASSERT(fixed()); null_value=1; return 0; } + double val_real() { DBUG_ASSERT(fixed()); null_value=1; return 0.0; } + longlong val_int() { DBUG_ASSERT(fixed()); null_value=1; return 0; } + bool fix_length_and_dec(THD *thd) override + { base_flags|= item_base_t::MAYBE_NULL; max_length=0; return FALSE; } + enum Sumfunctype sum_func () const { return UDF_SUM_FUNC; } + void clear() {} + bool add() { return 0; } + void reset_field() { DBUG_ASSERT(0); }; + void update_field() {} +}; + +#endif /* HAVE_DLOPEN */ + +C_MODE_START +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, + const void* key2); +int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1, + const void* key2); +int group_concat_key_cmp_with_order(void* arg, const void* key1, + const void* key2); +int group_concat_key_cmp_with_order_with_nulls(void *arg, const void *key1, + const void *key2); +int dump_leaf_key(void* key_arg, + element_count count __attribute__((unused)), + void* item_arg); +C_MODE_END + +class Item_func_group_concat : public Item_sum +{ +protected: + TMP_TABLE_PARAM *tmp_table_param; + String result; + String *separator; + TREE tree_base; + TREE *tree; + size_t tree_len; + Item **ref_pointer_array; + + /** + If DISTINCT is used with this GROUP_CONCAT, this member is used to filter + out duplicates. + @see Item_func_group_concat::setup + @see Item_func_group_concat::add + @see Item_func_group_concat::clear + */ + Unique *unique_filter; + TABLE *table; + ORDER **order; + Name_resolution_context *context; + /** The number of ORDER BY items. */ + uint arg_count_order; + /** The number of selected items, aka the expr list. */ + uint arg_count_field; + uint row_count; + bool distinct; + bool warning_for_row; + bool always_null; + bool force_copy_fields; + /** True if entire result of GROUP_CONCAT has been written to output buffer. */ + bool result_finalized; + /** Limits the rows in the result */ + Item *row_limit; + /** Skips a particular number of rows in from the result*/ + Item *offset_limit; + bool limit_clause; + /* copy of the offset limit */ + ulonglong copy_offset_limit; + /*copy of the row limit */ + ulonglong copy_row_limit; + + /* + Following is 0 normal object and pointer to original one for copy + (to correctly free resources) + */ + Item_func_group_concat *original; + + /* + Used by Item_func_group_concat and Item_func_json_arrayagg. The latter + needs null values but the former doesn't. + */ + bool add(bool exclude_nulls); + + friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1, + const void* key2); + friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1, + const void* key2); + friend int group_concat_key_cmp_with_order(void* arg, const void* key1, + const void* key2); + friend int group_concat_key_cmp_with_order_with_nulls(void *arg, + const void *key1, const void *key2); + friend int dump_leaf_key(void* key_arg, + element_count count __attribute__((unused)), + void* item_arg); + + bool repack_tree(THD *thd); + + /* + Says whether the function should skip NULL arguments + or add them to the result. + Redefined in JSON_ARRAYAGG. + */ + virtual bool skip_nulls() const { return true; } + virtual String *get_str_from_item(Item *i, String *tmp) + { return i->val_str(tmp); } + virtual String *get_str_from_field(Item *i, Field *f, String *tmp, + const uchar *key, size_t offset) + { return f->val_str(tmp, key + offset); } + virtual void cut_max_length(String *result, + uint old_length, uint max_length) const; +public: + // Methods used by ColumnStore + bool get_distinct() const { return distinct; } + uint get_count_field() const { return arg_count_field; } + uint get_order_field() const { return arg_count_order; } + const String* get_separator() const { return separator; } + ORDER** get_order() const { return order; } + +public: + Item_func_group_concat(THD *thd, Name_resolution_context *context_arg, + bool is_distinct, List *is_select, + const SQL_I_List &is_order, String *is_separator, + bool limit_clause, Item *row_limit, Item *offset_limit); + + Item_func_group_concat(THD *thd, Item_func_group_concat *item); + ~Item_func_group_concat(); + void cleanup() override; + + enum Sumfunctype sum_func () const override {return GROUP_CONCAT_FUNC;} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING sum_name= {STRING_WITH_LEN("group_concat(") }; + return sum_name; + } + const Type_handler *type_handler() const override + { + if (too_big_for_varchar()) + return &type_handler_blob; + return &type_handler_varchar; + } + void clear() override; + bool add() override + { + return add(skip_nulls()); + } + void reset_field() override { DBUG_ASSERT(0); } // not used + void update_field() override { DBUG_ASSERT(0); } // not used + bool fix_fields(THD *,Item **) override; + bool setup(THD *thd) override; + void make_unique() override; + double val_real() override + { + int error; + const char *end; + String *res; + if (!(res= val_str(&str_value))) + return 0.0; + end= res->ptr() + res->length(); + return (my_strtod(res->ptr(), (char**) &end, &error)); + } + longlong val_int() override + { + String *res; + char *end_ptr; + int error; + if (!(res= val_str(&str_value))) + return (longlong) 0; + end_ptr= (char*) res->ptr()+ res->length(); + return my_strtoll10(res->ptr(), &end_ptr, &error); + } + my_decimal *val_decimal(my_decimal *decimal_value) override + { + return val_decimal_from_string(decimal_value); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return get_date_from_string(thd, ltime, fuzzydate); + } + String *val_str(String *str) override; + Item *copy_or_same(THD* thd) override; + void no_rows_in_result() override {} + void print(String *str, enum_query_type query_type) override; + bool change_context_processor(void *cntx) override + { context= (Name_resolution_context *)cntx; return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + qsort_cmp2 get_comparator_function_for_distinct(); + qsort_cmp2 get_comparator_function_for_order_by(); + uchar* get_record_pointer(); + uint get_null_bytes(); + +}; + +#endif /* ITEM_SUM_INCLUDED */ diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc new file mode 100644 index 00000000..b624a381 --- /dev/null +++ b/sql/item_timefunc.cc @@ -0,0 +1,3911 @@ +/* + Copyright (c) 2000, 2012, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + This file defines all time functions + + @todo + Move month and days to language files +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" +#include "sql_locale.h" // MY_LOCALE my_locale_en_US +#include "strfunc.h" // check_word +#include "sql_type_int.h" // Longlong_hybrid +#include "sql_time.h" // make_truncated_value_warning, + // get_date_from_daynr, + // calc_weekday, calc_week, + // convert_month_to_period, + // convert_period_to_month, + // TIME_to_timestamp, + // calc_time_diff, + // calc_time_from_sec, + // get_date_time_format_str +#include "tztime.h" // struct Time_zone +#include "sql_class.h" // THD +#include +#include + +/** Day number for Dec 31st, 9999. */ +#define MAX_DAY_NUMBER 3652424L + +Func_handler_date_add_interval_datetime_arg0_time + func_handler_date_add_interval_datetime_arg0_time; + +Func_handler_date_add_interval_datetime func_handler_date_add_interval_datetime; +Func_handler_date_add_interval_date func_handler_date_add_interval_date; +Func_handler_date_add_interval_time func_handler_date_add_interval_time; +Func_handler_date_add_interval_string func_handler_date_add_interval_string; + +Func_handler_add_time_datetime func_handler_add_time_datetime_add(1); +Func_handler_add_time_datetime func_handler_add_time_datetime_sub(-1); +Func_handler_add_time_time func_handler_add_time_time_add(1); +Func_handler_add_time_time func_handler_add_time_time_sub(-1); +Func_handler_add_time_string func_handler_add_time_string_add(1); +Func_handler_add_time_string func_handler_add_time_string_sub(-1); + +Func_handler_str_to_date_datetime_sec func_handler_str_to_date_datetime_sec; +Func_handler_str_to_date_datetime_usec func_handler_str_to_date_datetime_usec; +Func_handler_str_to_date_date func_handler_str_to_date_date; +Func_handler_str_to_date_time_sec func_handler_str_to_date_time_sec; +Func_handler_str_to_date_time_usec func_handler_str_to_date_time_usec; + + +/* + Date formats corresponding to compound %r and %T conversion specifiers + + Note: We should init at least first element of "positions" array + (first member) or hpux11 compiler will die horribly. +*/ +static DATE_TIME_FORMAT time_ampm_format= {{0}, '\0', 0, + {(char *)"%I:%i:%S %p", 11}}; +static DATE_TIME_FORMAT time_24hrs_format= {{0}, '\0', 0, + {(char *)"%H:%i:%S", 8}}; + +/** + Extract datetime value to MYSQL_TIME struct from string value + according to format string. + + @param format date/time format specification + @param val String to decode + @param length Length of string + @param l_time Store result here + @param cached_timestamp_type It uses to get an appropriate warning + in the case when the value is truncated. + @param sub_pattern_end if non-zero then we are parsing string which + should correspond compound specifier (like %T or + %r) and this parameter is pointer to place where + pointer to end of string matching this specifier + should be stored. + + @note + Possibility to parse strings matching to patterns equivalent to compound + specifiers is mainly intended for use from inside of this function in + order to understand %T and %r conversion specifiers, so number of + conversion specifiers that can be used in such sub-patterns is limited. + Also most of checks are skipped in this case. + + @note + If one adds new format specifiers to this function he should also + consider adding them to get_date_time_result_type() function. + + @retval + 0 ok + @retval + 1 error +*/ + +static bool extract_date_time(THD *thd, DATE_TIME_FORMAT *format, + const char *val, uint length, MYSQL_TIME *l_time, + timestamp_type cached_timestamp_type, + const char **sub_pattern_end, + const char *date_time_type, + date_conv_mode_t fuzzydate) +{ + int weekday= 0, yearday= 0, daypart= 0; + int week_number= -1; + int error= 0; + int strict_week_number_year= -1; + int frac_part; + bool usa_time= 0; + bool UNINIT_VAR(sunday_first_n_first_week_non_iso); + bool UNINIT_VAR(strict_week_number); + bool UNINIT_VAR(strict_week_number_year_type); + const char *val_begin= val; + const char *val_end= val + length; + const char *ptr= format->format.str; + const char *end= ptr + format->format.length; + CHARSET_INFO *cs= &my_charset_bin; + DBUG_ENTER("extract_date_time"); + + if (!sub_pattern_end) + bzero((char*) l_time, sizeof(*l_time)); + + l_time->time_type= cached_timestamp_type; + + for (; ptr != end && val != val_end; ptr++) + { + /* Skip pre-space between each argument */ + if ((val+= cs->scan(val, val_end, MY_SEQ_SPACES)) >= val_end) + break; + + if (*ptr == '%' && ptr+1 != end) + { + int val_len; + char *tmp; + + error= 0; + + val_len= (uint) (val_end - val); + switch (*++ptr) { + /* Year */ + case 'Y': + tmp= (char*) val + MY_MIN(4, val_len); + l_time->year= (int) my_strtoll10(val, &tmp, &error); + if ((int) (tmp-val) <= 2) + l_time->year= year_2000_handling(l_time->year); + val= tmp; + break; + case 'y': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->year= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + l_time->year= year_2000_handling(l_time->year); + break; + + /* Month */ + case 'm': + case 'c': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->month= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + case 'M': + if ((l_time->month= check_word(my_locale_en_US.month_names, + val, val_end, &val)) <= 0) + goto err; + break; + case 'b': + if ((l_time->month= check_word(my_locale_en_US.ab_month_names, + val, val_end, &val)) <= 0) + goto err; + break; + /* Day */ + case 'd': + case 'e': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->day= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + case 'D': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->day= (int) my_strtoll10(val, &tmp, &error); + /* Skip 'st, 'nd, 'th .. */ + val= tmp + MY_MIN((int) (val_end-tmp), 2); + break; + + /* Hour */ + case 'h': + case 'I': + case 'l': + usa_time= 1; + /* fall through */ + case 'k': + case 'H': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->hour= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + + /* Minute */ + case 'i': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->minute= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + + /* Second */ + case 's': + case 'S': + tmp= (char*) val + MY_MIN(2, val_len); + l_time->second= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + + /* Second part */ + case 'f': + tmp= (char*) val_end; + if (tmp - val > 6) + tmp= (char*) val + 6; + l_time->second_part= (int) my_strtoll10(val, &tmp, &error); + frac_part= 6 - (int) (tmp - val); + if (frac_part > 0) + l_time->second_part*= (ulong) log_10_int[frac_part]; + val= tmp; + break; + + /* AM / PM */ + case 'p': + if (val_len < 2 || ! usa_time) + goto err; + if (!my_charset_latin1.strnncoll(val, 2, "PM", 2)) + daypart= 12; + else if (my_charset_latin1.strnncoll(val, 2, "AM", 2)) + goto err; + val+= 2; + break; + + /* Exotic things */ + case 'W': + if ((weekday= check_word(my_locale_en_US.day_names, val, val_end, &val)) <= 0) + goto err; + break; + case 'a': + if ((weekday= check_word(my_locale_en_US.ab_day_names, val, val_end, &val)) <= 0) + goto err; + break; + case 'w': + tmp= (char*) val + 1; + if (unlikely((weekday= (int) my_strtoll10(val, &tmp, &error)) < 0 || + weekday >= 7)) + goto err; + /* We should use the same 1 - 7 scale for %w as for %W */ + if (!weekday) + weekday= 7; + val= tmp; + break; + case 'j': + tmp= (char*) val + MY_MIN(val_len, 3); + yearday= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + + /* Week numbers */ + case 'V': + case 'U': + case 'v': + case 'u': + sunday_first_n_first_week_non_iso= (*ptr=='U' || *ptr== 'V'); + strict_week_number= (*ptr=='V' || *ptr=='v'); + tmp= (char*) val + MY_MIN(val_len, 2); + if (unlikely((week_number= + (int) my_strtoll10(val, &tmp, &error)) < 0 || + (strict_week_number && !week_number) || + week_number > 53)) + goto err; + val= tmp; + break; + + /* Year used with 'strict' %V and %v week numbers */ + case 'X': + case 'x': + strict_week_number_year_type= (*ptr=='X'); + tmp= (char*) val + MY_MIN(4, val_len); + strict_week_number_year= (int) my_strtoll10(val, &tmp, &error); + val= tmp; + break; + + /* Time in AM/PM notation */ + case 'r': + /* + We can't just set error here, as we don't want to generate two + warnings in case of errors + */ + if (extract_date_time(thd, &time_ampm_format, val, + (uint)(val_end - val), l_time, + cached_timestamp_type, &val, "time", fuzzydate)) + DBUG_RETURN(1); + break; + + /* Time in 24-hour notation */ + case 'T': + if (extract_date_time(thd, &time_24hrs_format, val, + (uint)(val_end - val), l_time, + cached_timestamp_type, &val, "time", fuzzydate)) + DBUG_RETURN(1); + break; + + /* Conversion specifiers that match classes of characters */ + case '.': + while (my_ispunct(cs, *val) && val != val_end) + val++; + break; + case '@': + while (my_isalpha(cs, *val) && val != val_end) + val++; + break; + case '#': + while (my_isdigit(cs, *val) && val != val_end) + val++; + break; + default: + goto err; + } + if (unlikely(error)) // Error from my_strtoll10 + goto err; + } + else if (!my_isspace(cs, *ptr)) + { + if (*val != *ptr) + goto err; + val++; + } + } + if (usa_time) + { + if (l_time->hour > 12 || l_time->hour < 1) + goto err; + l_time->hour= l_time->hour%12+daypart; + } + + /* + If we are recursively called for parsing string matching compound + specifiers we are already done. + */ + if (sub_pattern_end) + { + *sub_pattern_end= val; + DBUG_RETURN(0); + } + + if (yearday > 0) + { + uint days; + days= calc_daynr(l_time->year,1,1) + yearday - 1; + if (get_date_from_daynr(days,&l_time->year,&l_time->month,&l_time->day)) + goto err; + } + + if (week_number >= 0 && weekday) + { + int days; + uint weekday_b; + + /* + %V,%v require %X,%x resprectively, + %U,%u should be used with %Y and not %X or %x + */ + if ((strict_week_number && + (strict_week_number_year < 0 || + strict_week_number_year_type != + sunday_first_n_first_week_non_iso)) || + (!strict_week_number && strict_week_number_year >= 0)) + goto err; + + /* Number of days since year 0 till 1st Jan of this year */ + days= calc_daynr((strict_week_number ? strict_week_number_year : + l_time->year), + 1, 1); + /* Which day of week is 1st Jan of this year */ + weekday_b= calc_weekday(days, sunday_first_n_first_week_non_iso); + + /* + Below we are going to sum: + 1) number of days since year 0 till 1st day of 1st week of this year + 2) number of days between 1st week and our week + 3) and position of our day in the week + */ + if (sunday_first_n_first_week_non_iso) + { + days+= ((weekday_b == 0) ? 0 : 7) - weekday_b + + (week_number - 1) * 7 + + weekday % 7; + } + else + { + days+= ((weekday_b <= 3) ? 0 : 7) - weekday_b + + (week_number - 1) * 7 + + (weekday - 1); + } + + if (get_date_from_daynr(days,&l_time->year,&l_time->month,&l_time->day)) + goto err; + } + + if (l_time->month > 12 || l_time->day > 31 || l_time->hour > 23 || + l_time->minute > 59 || l_time->second > 59) + goto err; + + int was_cut; + if (check_date(l_time, fuzzydate | TIME_INVALID_DATES, &was_cut)) + goto err; + + if (val != val_end) + { + do + { + if (!my_isspace(&my_charset_latin1,*val)) + { + ErrConvString err(val_begin, length, &my_charset_bin); + make_truncated_value_warning(thd, Sql_condition::WARN_LEVEL_WARN, + &err, cached_timestamp_type, + nullptr, nullptr, nullptr); + break; + } + } while (++val != val_end); + } + DBUG_RETURN(0); + +err: + { + char buff[128]; + strmake(buff, val_begin, MY_MIN(length, sizeof(buff)-1)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_TYPE, + ER_THD(thd, ER_WRONG_VALUE_FOR_TYPE), + date_time_type, buff, "str_to_date"); + } + DBUG_RETURN(1); +} + + +/** + Create a formatted date/time value in a string. +*/ + +static bool make_date_time(const String *format, const MYSQL_TIME *l_time, + timestamp_type type, const MY_LOCALE *locale, + String *str) +{ + char intbuff[15]; + uint hours_i; + uint weekday; + ulong length; + const uchar *ptr, *end; + + str->length(0); + + if (l_time->neg) + str->append_wc('-'); + + end= (ptr= (const uchar *) format->ptr()) + format->length(); + + for ( ; ; ) + { + my_wc_t wc; + int mblen= format->charset()->cset->mb_wc(format->charset(), &wc, ptr, end); + if (mblen < 1) + return false; + ptr+= mblen; + + if (wc != '%' || ptr >= end) + str->append_wc(wc); + else + { + mblen= format->charset()->cset->mb_wc(format->charset(), &wc, ptr, end); + if (mblen < 1) + return false; + ptr+= mblen; + + switch (wc) { + case 'M': + if (type == MYSQL_TIMESTAMP_TIME || !l_time->month) + return 1; + str->append(locale->month_names->type_names[l_time->month-1], + (uint) strlen(locale->month_names->type_names[l_time->month-1]), + system_charset_info); + break; + case 'b': + if (type == MYSQL_TIMESTAMP_TIME || !l_time->month) + return 1; + str->append(locale->ab_month_names->type_names[l_time->month-1], + (uint) strlen(locale->ab_month_names->type_names[l_time->month-1]), + system_charset_info); + break; + case 'W': + if (type == MYSQL_TIMESTAMP_TIME || !(l_time->month || l_time->year)) + return 1; + weekday= calc_weekday(calc_daynr(l_time->year,l_time->month, + l_time->day),0); + str->append(locale->day_names->type_names[weekday], + (uint) strlen(locale->day_names->type_names[weekday]), + system_charset_info); + break; + case 'a': + if (type == MYSQL_TIMESTAMP_TIME || !(l_time->month || l_time->year)) + return 1; + weekday=calc_weekday(calc_daynr(l_time->year,l_time->month, + l_time->day),0); + str->append(locale->ab_day_names->type_names[weekday], + (uint) strlen(locale->ab_day_names->type_names[weekday]), + system_charset_info); + break; + case 'D': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->day, 1); + if (l_time->day >= 10 && l_time->day <= 19) + str->append(STRING_WITH_LEN("th")); + else + { + switch (l_time->day %10) { + case 1: + str->append(STRING_WITH_LEN("st")); + break; + case 2: + str->append(STRING_WITH_LEN("nd")); + break; + case 3: + str->append(STRING_WITH_LEN("rd")); + break; + default: + str->append(STRING_WITH_LEN("th")); + break; + } + } + break; + case 'Y': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->year, 4); + break; + case 'y': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->year % 100, 2); + break; + case 'm': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->month, 2); + break; + case 'c': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->month, 1); + break; + case 'd': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->day, 2); + break; + case 'e': + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + str->append_zerofill(l_time->day, 1); + break; + case 'f': + str->append_zerofill((uint) l_time->second_part, 6); + break; + case 'H': + str->append_zerofill(l_time->hour, 2); + break; + case 'h': + case 'I': + hours_i= (l_time->hour%24 + 11)%12+1; + str->append_zerofill(hours_i, 2); + break; + case 'i': /* minutes */ + str->append_zerofill(l_time->minute, 2); + break; + case 'j': + { + if (type == MYSQL_TIMESTAMP_TIME || !l_time->month || !l_time->year) + return 1; + long value= calc_daynr(l_time->year,l_time->month, l_time->day) - + calc_daynr(l_time->year,1,1) + 1; + str->append_zerofill((uint) value, 3); + break; + } + case 'k': + str->append_zerofill(l_time->hour, 1); + break; + case 'l': + hours_i= (l_time->hour%24 + 11)%12+1; + str->append_zerofill(hours_i, 1); + break; + case 'p': + hours_i= l_time->hour%24; + str->append(hours_i < 12 ? "AM" : "PM",2); + break; + case 'r': + length= sprintf(intbuff, ((l_time->hour % 24) < 12) ? + "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM", + (l_time->hour+11)%12+1, + l_time->minute, + l_time->second); + str->append(intbuff, length); + break; + case 'S': + case 's': + str->append_zerofill(l_time->second, 2); + break; + case 'T': + length= sprintf(intbuff, "%02d:%02d:%02d", + l_time->hour, l_time->minute, l_time->second); + str->append(intbuff, length); + break; + case 'U': + case 'u': + { + uint year; + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + + uint value= calc_week(l_time, + wc == 'U' ? WEEK_FIRST_WEEKDAY : + WEEK_MONDAY_FIRST, + &year); + str->append_zerofill(value, 2); + } + break; + case 'v': + case 'V': + { + uint year; + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + uint value= calc_week(l_time, wc == 'V' ? + (WEEK_YEAR | WEEK_FIRST_WEEKDAY) : + (WEEK_YEAR | WEEK_MONDAY_FIRST), + &year); + str->append_zerofill(value, 2); + } + break; + case 'x': + case 'X': + { + uint year; + if (type == MYSQL_TIMESTAMP_TIME) + return 1; + (void) calc_week(l_time, + (wc == 'X' ? + WEEK_YEAR | WEEK_FIRST_WEEKDAY : + WEEK_YEAR | WEEK_MONDAY_FIRST), + &year); + str->append_zerofill(year, 4); + } + break; + case 'w': + if (type == MYSQL_TIMESTAMP_TIME || !(l_time->month || l_time->year)) + return 1; + weekday=calc_weekday(calc_daynr(l_time->year,l_time->month, + l_time->day),1); + str->append_zerofill(weekday, 1); + break; + + default: + str->append_wc(wc); + break; + } + } + } + return 0; +} + + +/** + @details + Get a array of positive numbers from a string object. + Each number is separated by 1 non digit character + Return error if there is too many numbers. + If there is too few numbers, assume that the numbers are left out + from the high end. This allows one to give: + DAY_TO_SECOND as "D MM:HH:SS", "MM:HH:SS" "HH:SS" or as seconds. + + @param length: length of str + @param cs: charset of str + @param values: array of results + @param count: count of elements in result array + @param transform_msec: if value is true we suppose + that the last part of string value is microseconds + and we should transform value to six digit value. + For example, '1.1' -> '1.100000' +*/ + +#define MAX_DIGITS_IN_TIME_SPEC 20 + +static bool get_interval_info(const char *str, size_t length,CHARSET_INFO *cs, + size_t count, ulonglong *values, + bool transform_msec) +{ + const char *end=str+length; + uint i; + size_t field_length= 0; + + while (str != end && !my_isdigit(cs,*str)) + str++; + + for (i=0 ; i < count ; i++) + { + ulonglong value; + const char *start= str; + const char *local_end= end; + + /* + We limit things to 19 digits to not get an overflow. This is ok as + this function is meant to read up to microseconds + */ + if ((local_end-str) > MAX_DIGITS_IN_TIME_SPEC) + local_end= str+ MAX_DIGITS_IN_TIME_SPEC; + + for (value= 0; str != local_end && my_isdigit(cs, *str) ; str++) + value= value*10 + *str - '0'; + + if ((field_length= (size_t)(str - start)) >= MAX_DIGITS_IN_TIME_SPEC) + return true; + values[i]= value; + while (str != end && !my_isdigit(cs,*str)) + str++; + if (str == end && i != count-1) + { + i++; + /* Change values[0...i-1] -> values[0...count-1] */ + bmove_upp((uchar*) (values+count), (uchar*) (values+i), + sizeof(*values)*i); + bzero((uchar*) values, sizeof(*values)*(count-i)); + break; + } + } + + if (transform_msec && field_length > 0) + { + if (field_length < 6) + values[count - 1] *= log_10_int[6 - field_length]; + else if (field_length > 6) + values[count - 1] /= log_10_int[field_length - 6]; + } + + return (str != end); +} + + +longlong Item_func_period_add::val_int() +{ + DBUG_ASSERT(fixed()); + ulong period=(ulong) args[0]->val_int(); + int months=(int) args[1]->val_int(); + + if ((null_value=args[0]->null_value || args[1]->null_value) || + period == 0L) + return 0; /* purecov: inspected */ + return (longlong) + convert_month_to_period((uint) ((int) convert_period_to_month(period)+ + months)); +} + + +longlong Item_func_period_diff::val_int() +{ + DBUG_ASSERT(fixed()); + ulong period1=(ulong) args[0]->val_int(); + ulong period2=(ulong) args[1]->val_int(); + + if ((null_value=args[0]->null_value || args[1]->null_value)) + return 0; /* purecov: inspected */ + return (longlong) ((long) convert_period_to_month(period1)- + (long) convert_period_to_month(period2)); +} + + + +longlong Item_func_to_days::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : d.daynr(); +} + + +longlong Item_func_to_seconds::val_int_endpoint(bool left_endp, + bool *incl_endp) +{ + DBUG_ASSERT(fixed()); + // val_int_endpoint() is called only if args[0] is a temporal Item_field + Datetime_from_temporal dt(current_thd, args[0], TIME_FUZZY_DATES); + if ((null_value= !dt.is_valid_datetime())) + { + /* got NULL, leave the incl_endp intact */ + return LONGLONG_MIN; + } + /* Set to NULL if invalid date, but keep the value */ + null_value= dt.check_date(TIME_NO_ZEROS); + /* + Even if the evaluation return NULL, seconds is useful for pruning + */ + return dt.to_seconds(); +} + +longlong Item_func_to_seconds::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + /* + Unlike val_int_endpoint(), we cannot use Datetime_from_temporal here. + The argument can be of a non-temporal data type. + */ + Datetime dt(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + return (null_value= !dt.is_valid_datetime()) ? 0 : dt.to_seconds(); +} + +/* + Get information about this Item tree monotonicity + + SYNOPSIS + Item_func_to_days::get_monotonicity_info() + + DESCRIPTION + Get information about monotonicity of the function represented by this item + tree. + + RETURN + See enum_monotonicity_info. +*/ + +enum_monotonicity_info Item_func_to_days::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM) + { + if (args[0]->field_type() == MYSQL_TYPE_DATE) + return MONOTONIC_STRICT_INCREASING_NOT_NULL; + if (args[0]->field_type() == MYSQL_TYPE_DATETIME) + return MONOTONIC_INCREASING_NOT_NULL; + } + return NON_MONOTONIC; +} + +enum_monotonicity_info Item_func_to_seconds::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM) + { + if (args[0]->field_type() == MYSQL_TYPE_DATE || + args[0]->field_type() == MYSQL_TYPE_DATETIME) + return MONOTONIC_STRICT_INCREASING_NOT_NULL; + } + return NON_MONOTONIC; +} + + +longlong Item_func_to_days::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + DBUG_ASSERT(fixed()); + // val_int_endpoint() is only called if args[0] is a temporal Item_field + Datetime_from_temporal dt(current_thd, args[0], TIME_CONV_NONE); + longlong res; + if ((null_value= !dt.is_valid_datetime())) + { + /* got NULL, leave the incl_endp intact */ + return LONGLONG_MIN; + } + res= (longlong) dt.daynr(); + /* Set to NULL if invalid date, but keep the value */ + null_value= dt.check_date(TIME_NO_ZEROS); + if (null_value) + { + /* + Even if the evaluation return NULL, the calc_daynr is useful for pruning + */ + if (args[0]->field_type() != MYSQL_TYPE_DATE) + *incl_endp= TRUE; + return res; + } + + if (args[0]->field_type() == MYSQL_TYPE_DATE) + { + // TO_DAYS() is strictly monotonic for dates, leave incl_endp intact + return res; + } + + /* + Handle the special but practically useful case of datetime values that + point to day bound ("strictly less" comparison stays intact): + + col < '2007-09-15 00:00:00' -> TO_DAYS(col) < TO_DAYS('2007-09-15') + col > '2007-09-15 23:59:59' -> TO_DAYS(col) > TO_DAYS('2007-09-15') + + which is different from the general case ("strictly less" changes to + "less or equal"): + + col < '2007-09-15 12:34:56' -> TO_DAYS(col) <= TO_DAYS('2007-09-15') + */ + const MYSQL_TIME <ime= dt.get_mysql_time()[0]; + if ((!left_endp && dt.hhmmssff_is_zero()) || + (left_endp && ltime.hour == 23 && ltime.minute == 59 && + ltime.second == 59)) + /* do nothing */ + ; + else + *incl_endp= TRUE; + return res; +} + + +longlong Item_func_dayofyear::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : d.dayofyear(); +} + +longlong Item_func_dayofmonth::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_CONV_NONE, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : d.get_mysql_time()->day; +} + +longlong Item_func_month::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_CONV_NONE, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : d.get_mysql_time()->month; +} + + +bool Item_func_monthname::fix_length_and_dec(THD *thd) +{ + CHARSET_INFO *cs= thd->variables.collation_connection; + locale= thd->variables.lc_time_names; + collation.set(cs, DERIVATION_COERCIBLE, locale->repertoire()); + decimals=0; + max_length= locale->max_month_name_length * collation.collation->mbmaxlen; + set_maybe_null(); + return FALSE; +} + + +String* Item_func_monthname::val_str(String* str) +{ + DBUG_ASSERT(fixed()); + const char *month_name; + uint err; + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_CONV_NONE, thd)); + if ((null_value= (!d.is_valid_datetime() || !d.get_mysql_time()->month))) + return (String *) 0; + + month_name= locale->month_names->type_names[d.get_mysql_time()->month - 1]; + str->copy(month_name, (uint) strlen(month_name), &my_charset_utf8mb3_bin, + collation.collation, &err); + return str; +} + + +/** + Returns the quarter of the year. +*/ + +longlong Item_func_quarter::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_CONV_NONE, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : d.quarter(); +} + +longlong Item_func_hour::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Time tm(thd, args[0], Time::Options_for_cast(thd)); + return (null_value= !tm.is_valid_time()) ? 0 : tm.get_mysql_time()->hour; +} + +longlong Item_func_minute::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Time tm(thd, args[0], Time::Options_for_cast(thd)); + return (null_value= !tm.is_valid_time()) ? 0 : tm.get_mysql_time()->minute; +} + +/** + Returns the second in time_exp in the range of 0 - 59. +*/ +longlong Item_func_second::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Time tm(thd, args[0], Time::Options_for_cast(thd)); + return (null_value= !tm.is_valid_time()) ? 0 : tm.get_mysql_time()->second; +} + + +uint week_mode(uint mode) +{ + uint week_format= (mode & 7); + if (!(week_format & WEEK_MONDAY_FIRST)) + week_format^= WEEK_FIRST_WEEKDAY; + return week_format; +} + +/** + @verbatim + The bits in week_format(for calc_week() function) has the following meaning: + WEEK_MONDAY_FIRST (0) If not set Sunday is first day of week + If set Monday is first day of week + WEEK_YEAR (1) If not set Week is in range 0-53 + + Week 0 is returned for the the last week of the previous year (for + a date at start of january) In this case one can get 53 for the + first week of next year. This flag ensures that the week is + relevant for the given year. Note that this flag is only + relevant if WEEK_JANUARY is not set. + + If set Week is in range 1-53. + + In this case one may get week 53 for a date in January (when + the week is that last week of previous year) and week 1 for a + date in December. + + WEEK_FIRST_WEEKDAY (2) If not set Weeks are numbered according + to ISO 8601:1988 + If set The week that contains the first + 'first-day-of-week' is week 1. + + ISO 8601:1988 means that if the week containing January 1 has + four or more days in the new year, then it is week 1; + Otherwise it is the last week of the previous year, and the + next week is week 1. + @endverbatim +*/ + +longlong Item_func_week::val_int() +{ + DBUG_ASSERT(fixed()); + uint week_format; + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + if ((null_value= !d.is_valid_datetime())) + return 0; + if (arg_count > 1) + week_format= (uint)args[1]->val_int(); + else + week_format= thd->variables.default_week_format; + return d.week(week_mode(week_format)); +} + + +longlong Item_func_yearweek::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : + d.yearweek((week_mode((uint) args[1]->val_int()) | WEEK_YEAR)); +} + + +longlong Item_func_weekday::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime dt(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + if ((null_value= !dt.is_valid_datetime())) + return 0; + return dt.weekday(odbc_type) + MY_TEST(odbc_type); +} + +bool Item_func_dayname::fix_length_and_dec(THD *thd) +{ + CHARSET_INFO *cs= thd->variables.collation_connection; + locale= thd->variables.lc_time_names; + collation.set(cs, DERIVATION_COERCIBLE, locale->repertoire()); + decimals=0; + max_length= locale->max_day_name_length * collation.collation->mbmaxlen; + set_maybe_null(); + return FALSE; +} + + +String* Item_func_dayname::val_str(String* str) +{ + DBUG_ASSERT(fixed()); + const char *day_name; + uint err; + THD *thd= current_thd; + Datetime dt(thd, args[0], Datetime::Options(TIME_NO_ZEROS, thd)); + + if ((null_value= !dt.is_valid_datetime())) + return (String*) 0; + + day_name= locale->day_names->type_names[dt.weekday(false)]; + str->copy(day_name, (uint) strlen(day_name), &my_charset_utf8mb3_bin, + collation.collation, &err); + return str; +} + + +longlong Item_func_year::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Datetime d(thd, args[0], Datetime::Options(TIME_CONV_NONE, thd)); + return (null_value= !d.is_valid_datetime()) ? 0 : d.get_mysql_time()->year; +} + + +/* + Get information about this Item tree monotonicity + + SYNOPSIS + Item_func_year::get_monotonicity_info() + + DESCRIPTION + Get information about monotonicity of the function represented by this item + tree. + + RETURN + See enum_monotonicity_info. +*/ + +enum_monotonicity_info Item_func_year::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM && + (args[0]->field_type() == MYSQL_TYPE_DATE || + args[0]->field_type() == MYSQL_TYPE_DATETIME)) + return MONOTONIC_INCREASING; + return NON_MONOTONIC; +} + + +longlong Item_func_year::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + DBUG_ASSERT(fixed()); + // val_int_endpoint() is cally only if args[0] is a temporal Item_field + Datetime_from_temporal dt(current_thd, args[0], TIME_CONV_NONE); + if ((null_value= !dt.is_valid_datetime())) + { + /* got NULL, leave the incl_endp intact */ + return LONGLONG_MIN; + } + + /* + Handle the special but practically useful case of datetime values that + point to year bound ("strictly less" comparison stays intact) : + + col < '2007-01-01 00:00:00' -> YEAR(col) < 2007 + + which is different from the general case ("strictly less" changes to + "less or equal"): + + col < '2007-09-15 23:00:00' -> YEAR(col) <= 2007 + */ + const MYSQL_TIME <ime= dt.get_mysql_time()[0]; + if (!left_endp && ltime.day == 1 && ltime.month == 1 && + dt.hhmmssff_is_zero()) + ; /* do nothing */ + else + *incl_endp= TRUE; + return ltime.year; +} + + +bool Item_func_unix_timestamp::get_timestamp_value(my_time_t *seconds, + ulong *second_part) +{ + DBUG_ASSERT(fixed()); + if (args[0]->type() == FIELD_ITEM) + { // Optimize timestamp field + Field *field=((Item_field*) args[0])->field; + if (field->type() == MYSQL_TYPE_TIMESTAMP) + { + if ((null_value= field->is_null())) + return 1; + *seconds= field->get_timestamp(second_part); + return 0; + } + } + + Timestamp_or_zero_datetime_native_null native(current_thd, args[0], true); + if ((null_value= native.is_null() || native.is_zero_datetime())) + return true; + Timestamp tm(native); + *seconds= tm.tv().tv_sec; + *second_part= tm.tv().tv_usec; + return false; +} + + +longlong Item_func_unix_timestamp::int_op() +{ + if (arg_count == 0) + return (longlong) current_thd->query_start(); + + ulong second_part; + my_time_t seconds; + if (get_timestamp_value(&seconds, &second_part)) + return 0; + + return seconds; +} + + +my_decimal *Item_func_unix_timestamp::decimal_op(my_decimal* buf) +{ + ulong second_part; + my_time_t seconds; + if (get_timestamp_value(&seconds, &second_part)) + return 0; + + return seconds2my_decimal(seconds < 0, seconds < 0 ? -seconds : seconds, + second_part, buf); +} + + +enum_monotonicity_info Item_func_unix_timestamp::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM && + (args[0]->field_type() == MYSQL_TYPE_TIMESTAMP)) + return MONOTONIC_INCREASING; + return NON_MONOTONIC; +} + + +longlong Item_func_unix_timestamp::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1 && + args[0]->type() == Item::FIELD_ITEM && + args[0]->field_type() == MYSQL_TYPE_TIMESTAMP); + Field *field= ((Item_field*)args[0])->field; + /* Leave the incl_endp intact */ + ulong unused; + my_time_t ts= field->get_timestamp(&unused); + null_value= field->is_null(); + return ts; +} + + +longlong Item_func_time_to_sec::int_op() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Time tm(thd, args[0], Time::Options_for_cast(thd)); + return ((null_value= !tm.is_valid_time())) ? 0 : tm.to_seconds(); +} + + +my_decimal *Item_func_time_to_sec::decimal_op(my_decimal* buf) +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Time tm(thd, args[0], Time::Options_for_cast(thd)); + if ((null_value= !tm.is_valid_time())) + return 0; + const MYSQL_TIME *ltime= tm.get_mysql_time(); + longlong seconds= tm.to_seconds_abs(); + return seconds2my_decimal(ltime->neg, seconds, ltime->second_part, buf); +} + + +/** + Convert a string to a interval value. + + To make code easy, allow interval objects without separators. +*/ + +bool get_interval_value(THD *thd, Item *args, + interval_type int_type, INTERVAL *interval) +{ + ulonglong array[5]; + longlong UNINIT_VAR(value); + const char *UNINIT_VAR(str); + size_t UNINIT_VAR(length); + CHARSET_INFO *UNINIT_VAR(cs); + char buf[100]; + String str_value(buf, sizeof(buf), &my_charset_bin); + + bzero((char*) interval,sizeof(*interval)); + if (int_type == INTERVAL_SECOND && args->decimals) + { + VDec val(args); + if (val.is_null()) + return true; + Sec6 d(val.ptr()); + interval->neg= d.neg(); + if (d.sec() >= LONGLONG_MAX) + { + ErrConvDecimal err(val.ptr()); + thd->push_warning_truncated_wrong_value("seconds", err.ptr()); + return true; + } + interval->second= d.sec(); + interval->second_part= d.usec(); + return false; + } + else if ((int) int_type <= INTERVAL_MICROSECOND) + { + value= args->val_int(); + if (args->null_value) + return 1; + if (value < 0) + { + interval->neg=1; + value= -value; + } + } + else + { + String *res; + if (!(res= args->val_str_ascii(&str_value))) + return (1); + + /* record negative intervals in interval->neg */ + str=res->ptr(); + cs= res->charset(); + const char *end=str+res->length(); + while (str != end && my_isspace(cs,*str)) + str++; + if (str != end && *str == '-') + { + interval->neg=1; + str++; + } + length= (size_t) (end-str); // Set up pointers to new str + } + + switch (int_type) { + case INTERVAL_YEAR: + interval->year= (ulong) value; + break; + case INTERVAL_QUARTER: + interval->month= (ulong)(value*3); + break; + case INTERVAL_MONTH: + interval->month= (ulong) value; + break; + case INTERVAL_WEEK: + interval->day= (ulong)(value*7); + break; + case INTERVAL_DAY: + interval->day= (ulong) value; + break; + case INTERVAL_HOUR: + interval->hour= (ulong) value; + break; + case INTERVAL_MICROSECOND: + interval->second_part=value; + break; + case INTERVAL_MINUTE: + interval->minute=value; + break; + case INTERVAL_SECOND: + interval->second=value; + break; + case INTERVAL_YEAR_MONTH: // Allow YEAR-MONTH YYYYYMM + if (get_interval_info(str,length,cs,2,array,0)) + return (1); + interval->year= (ulong) array[0]; + interval->month= (ulong) array[1]; + break; + case INTERVAL_DAY_HOUR: + if (get_interval_info(str,length,cs,2,array,0)) + return (1); + interval->day= (ulong) array[0]; + interval->hour= (ulong) array[1]; + break; + case INTERVAL_DAY_MICROSECOND: + if (get_interval_info(str,length,cs,5,array,1)) + return (1); + interval->day= (ulong) array[0]; + interval->hour= (ulong) array[1]; + interval->minute= array[2]; + interval->second= array[3]; + interval->second_part= array[4]; + break; + case INTERVAL_DAY_MINUTE: + if (get_interval_info(str,length,cs,3,array,0)) + return (1); + interval->day= (ulong) array[0]; + interval->hour= (ulong) array[1]; + interval->minute= array[2]; + break; + case INTERVAL_DAY_SECOND: + if (get_interval_info(str,length,cs,4,array,0)) + return (1); + interval->day= (ulong) array[0]; + interval->hour= (ulong) array[1]; + interval->minute= array[2]; + interval->second= array[3]; + break; + case INTERVAL_HOUR_MICROSECOND: + if (get_interval_info(str,length,cs,4,array,1)) + return (1); + interval->hour= (ulong) array[0]; + interval->minute= array[1]; + interval->second= array[2]; + interval->second_part= array[3]; + break; + case INTERVAL_HOUR_MINUTE: + if (get_interval_info(str,length,cs,2,array,0)) + return (1); + interval->hour= (ulong) array[0]; + interval->minute= array[1]; + break; + case INTERVAL_HOUR_SECOND: + if (get_interval_info(str,length,cs,3,array,0)) + return (1); + interval->hour= (ulong) array[0]; + interval->minute= array[1]; + interval->second= array[2]; + break; + case INTERVAL_MINUTE_MICROSECOND: + if (get_interval_info(str,length,cs,3,array,1)) + return (1); + interval->minute= array[0]; + interval->second= array[1]; + interval->second_part= array[2]; + break; + case INTERVAL_MINUTE_SECOND: + if (get_interval_info(str,length,cs,2,array,0)) + return (1); + interval->minute= array[0]; + interval->second= array[1]; + break; + case INTERVAL_SECOND_MICROSECOND: + if (get_interval_info(str,length,cs,2,array,1)) + return (1); + interval->second= array[0]; + interval->second_part= array[1]; + break; + case INTERVAL_LAST: /* purecov: begin deadcode */ + DBUG_ASSERT(0); + break; /* purecov: end */ + } + return 0; +} + + +bool Item_func_from_days::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + longlong value=args[0]->val_int(); + if ((null_value= (args[0]->null_value || + ((fuzzydate & TIME_NO_ZERO_DATE) && value == 0)))) + return true; + bzero(ltime, sizeof(MYSQL_TIME)); + if (get_date_from_daynr((long) value, <ime->year, <ime->month, + <ime->day)) + return 0; + + ltime->time_type= MYSQL_TIMESTAMP_DATE; + return 0; +} + + +/** + Converts current time in my_time_t to MYSQL_TIME representation for local + time zone. Defines time zone (local) used for whole CURDATE function. +*/ +void Item_func_curdate_local::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start()); + thd->used |= THD::TIME_ZONE_USED; +} + + +/** + Converts current time in my_time_t to MYSQL_TIME representation for UTC + time zone. Defines time zone (UTC) used for whole UTC_DATE function. +*/ +void Item_func_curdate_utc::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start()); + /* + We are not flagging this query as using time zone, since it uses fixed + UTC-SYSTEM time-zone. + */ +} + + +bool Item_func_curdate::get_date(THD *thd, MYSQL_TIME *res, + date_mode_t fuzzydate __attribute__((unused))) +{ + query_id_t query_id= thd->query_id; + /* Cache value for this query */ + if (last_query_id != query_id) + { + last_query_id= query_id; + store_now_in_TIME(thd, <ime); + /* We don't need to set second_part and neg because they already 0 */ + ltime.hour= ltime.minute= ltime.second= 0; + ltime.time_type= MYSQL_TIMESTAMP_DATE; + } + *res=ltime; + return 0; +} + + +bool Item_func_curtime::fix_fields(THD *thd, Item **items) +{ + if (decimals > TIME_SECOND_PART_DIGITS) + { + my_error(ER_TOO_BIG_PRECISION, MYF(0), + func_name(), TIME_SECOND_PART_DIGITS); + return 1; + } + return Item_timefunc::fix_fields(thd, items); +} + +bool Item_func_curtime::get_date(THD *thd, MYSQL_TIME *res, + date_mode_t fuzzydate __attribute__((unused))) +{ + query_id_t query_id= thd->query_id; + /* Cache value for this query */ + if (last_query_id != query_id) + { + last_query_id= query_id; + store_now_in_TIME(thd, <ime); + } + *res= ltime; + return 0; +} + +void Item_func_curtime::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + if (decimals) + str->append_ulonglong(decimals); + str->append(')'); +} + +static void set_sec_part(ulong sec_part, MYSQL_TIME *ltime, Item *item) +{ + DBUG_ASSERT(item->decimals == AUTO_SEC_PART_DIGITS || + item->decimals <= TIME_SECOND_PART_DIGITS); + if (item->decimals) + { + ltime->second_part= sec_part; + if (item->decimals < TIME_SECOND_PART_DIGITS) + my_datetime_trunc(ltime, item->decimals); + } +} + +/** + Converts current time in my_time_t to MYSQL_TIME representation for local + time zone. Defines time zone (local) used for whole CURTIME function. +*/ +void Item_func_curtime_local::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start()); + now_time->year= now_time->month= now_time->day= 0; + now_time->time_type= MYSQL_TIMESTAMP_TIME; + set_sec_part(thd->query_start_sec_part(), now_time, this); + thd->used|= THD::TIME_ZONE_USED; +} + + +/** + Converts current time in my_time_t to MYSQL_TIME representation for UTC + time zone. Defines time zone (UTC) used for whole UTC_TIME function. +*/ +void Item_func_curtime_utc::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start()); + now_time->year= now_time->month= now_time->day= 0; + now_time->time_type= MYSQL_TIMESTAMP_TIME; + set_sec_part(thd->query_start_sec_part(), now_time, this); + /* + We are not flagging this query as using time zone, since it uses fixed + UTC-SYSTEM time-zone. + */ +} + +bool Item_func_now::fix_fields(THD *thd, Item **items) +{ + if (decimals > TIME_SECOND_PART_DIGITS) + { + my_error(ER_TOO_BIG_PRECISION, MYF(0), + func_name(), TIME_SECOND_PART_DIGITS); + return 1; + } + return Item_datetimefunc::fix_fields(thd, items); +} + +void Item_func_now::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + if (decimals) + str->append_ulonglong(decimals); + str->append(')'); +} + + +int Item_func_now_local::save_in_field(Field *field, bool no_conversions) +{ + if (field->type() == MYSQL_TYPE_TIMESTAMP) + { + THD *thd= field->get_thd(); + my_time_t ts= thd->query_start(); + ulong sec_part= decimals ? thd->query_start_sec_part() : 0; + sec_part-= my_time_fraction_remainder(sec_part, decimals); + field->set_notnull(); + field->store_timestamp(ts, sec_part); + return 0; + } + else + return Item_datetimefunc::save_in_field(field, no_conversions); +} + + +/** + Converts current time in my_time_t to MYSQL_TIME representation for local + time zone. Defines time zone (local) used for whole NOW function. +*/ +void Item_func_now_local::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start()); + set_sec_part(thd->query_start_sec_part(), now_time, this); + thd->used|= THD::TIME_ZONE_USED; +} + + +/** + Converts current time in my_time_t to MYSQL_TIME representation for UTC + time zone. Defines time zone (UTC) used for whole UTC_TIMESTAMP function. +*/ +void Item_func_now_utc::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start()); + set_sec_part(thd->query_start_sec_part(), now_time, this); + /* + We are not flagging this query as using time zone, since it uses fixed + UTC-SYSTEM time-zone. + */ +} + + +bool Item_func_now::get_date(THD *thd, MYSQL_TIME *res, + date_mode_t fuzzydate __attribute__((unused))) +{ + query_id_t query_id= thd->query_id; + /* Cache value for this query */ + if (last_query_id != query_id) + { + last_query_id= query_id; + store_now_in_TIME(thd, <ime); + } + *res= ltime; + return 0; +} + + +/** + Converts current time in my_time_t to MYSQL_TIME representation for local + time zone. Defines time zone (local) used for whole SYSDATE function. +*/ +void Item_func_sysdate_local::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) +{ + my_hrtime_t now= my_hrtime(); + thd->variables.time_zone->gmt_sec_to_TIME(now_time, hrtime_to_my_time(now)); + set_sec_part(hrtime_sec_part(now), now_time, this); + thd->used|= THD::TIME_ZONE_USED; +} + + +bool Item_func_sysdate_local::get_date(THD *thd, MYSQL_TIME *res, + date_mode_t fuzzydate __attribute__((unused))) +{ + store_now_in_TIME(thd, res); + return 0; +} + +bool Item_func_sec_to_time::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + VSec9 sec(thd, args[0], "seconds", LONGLONG_MAX); + if ((null_value= sec.is_null())) + return true; + sec.round(decimals, thd->temporal_round_mode()); + if (sec.sec_to_time(ltime, decimals) && !sec.truncated()) + sec.make_truncated_warning(thd, "seconds"); + return false; +} + +bool Item_func_date_format::fix_length_and_dec(THD *thd) +{ + if (!is_time_format) + { + if (arg_count < 3) + locale= thd->variables.lc_time_names; + else + if (args[2]->basic_const_item()) + locale= args[2]->locale_from_val_str(); + } + + /* + Must use this_item() in case it's a local SP variable + (for ->max_length and ->str_value) + */ + Item *arg1= args[1]->this_item(); + + decimals=0; + CHARSET_INFO *cs= thd->variables.collation_connection; + my_repertoire_t repertoire= arg1->collation.repertoire; + if (!thd->variables.lc_time_names->is_ascii) + repertoire|= MY_REPERTOIRE_EXTENDED; + collation.set(cs, arg1->collation.derivation, repertoire); + StringBuffer buffer; + String *str; + if (args[1]->basic_const_item() && (str= args[1]->val_str(&buffer))) + { // Optimize the normal case + fixed_length=1; + max_length= format_length(str) * collation.collation->mbmaxlen; + } + else + { + fixed_length=0; + max_length=MY_MIN(arg1->max_length, MAX_BLOB_WIDTH) * 10 * + collation.collation->mbmaxlen; + set_if_smaller(max_length,MAX_BLOB_WIDTH); + } + set_maybe_null(); // If wrong date + return FALSE; +} + + +bool Item_func_date_format::eq(const Item *item, bool binary_cmp) const +{ + Item_func_date_format *item_func; + + if (item->type() != FUNC_ITEM) + return 0; + if (func_name() != ((Item_func*) item)->func_name()) + return 0; + if (this == item) + return 1; + item_func= (Item_func_date_format*) item; + if (arg_count != item_func->arg_count) + return 0; + if (!args[0]->eq(item_func->args[0], binary_cmp)) + return 0; + /* + We must compare format string case sensitive. + This needed because format modifiers with different case, + for example %m and %M, have different meaning. + */ + if (!args[1]->eq(item_func->args[1], 1)) + return 0; + if (arg_count > 2 && !args[2]->eq(item_func->args[2], 1)) + return 0; + return 1; +} + + + +uint Item_func_date_format::format_length(const String *format) +{ + uint size=0; + const char *ptr=format->ptr(); + const char *end=ptr+format->length(); + + for (; ptr != end ; ptr++) + { + if (*ptr != '%' || ptr == end-1) + size++; + else + { + switch(*++ptr) { + case 'M': /* month, textual */ + case 'W': /* day (of the week), textual */ + size += 64; /* large for UTF8 locale data */ + break; + case 'D': /* day (of the month), numeric plus english suffix */ + case 'Y': /* year, numeric, 4 digits */ + case 'x': /* Year, used with 'v' */ + case 'X': /* Year, used with 'v, where week starts with Monday' */ + size += 4; + break; + case 'a': /* locale's abbreviated weekday name (Sun..Sat) */ + case 'b': /* locale's abbreviated month name (Jan.Dec) */ + size += 32; /* large for UTF8 locale data */ + break; + case 'j': /* day of year (001..366) */ + size += 3; + break; + case 'U': /* week (00..52) */ + case 'u': /* week (00..52), where week starts with Monday */ + case 'V': /* week 1..53 used with 'x' */ + case 'v': /* week 1..53 used with 'x', where week starts with Monday */ + case 'y': /* year, numeric, 2 digits */ + case 'm': /* month, numeric */ + case 'd': /* day (of the month), numeric */ + case 'h': /* hour (01..12) */ + case 'I': /* --||-- */ + case 'i': /* minutes, numeric */ + case 'l': /* hour ( 1..12) */ + case 'p': /* locale's AM or PM */ + case 'S': /* second (00..61) */ + case 's': /* seconds, numeric */ + case 'c': /* month (0..12) */ + case 'e': /* day (0..31) */ + size += 2; + break; + case 'k': /* hour ( 0..23) */ + case 'H': /* hour (00..23; value > 23 OK, padding always 2-digit) */ + size += 7; /* docs allow > 23, range depends on sizeof(unsigned int) */ + break; + case 'r': /* time, 12-hour (hh:mm:ss [AP]M) */ + size += 11; + break; + case 'T': /* time, 24-hour (hh:mm:ss) */ + size += 8; + break; + case 'f': /* microseconds */ + size += 6; + break; + case 'w': /* day (of the week), numeric */ + case '%': + default: + size++; + break; + } + } + } + return size; +} + + +String *Item_func_date_format::val_str(String *str) +{ + StringBuffer<64> format_buffer; + String *format; + MYSQL_TIME l_time; + uint size; + const MY_LOCALE *lc= 0; + DBUG_ASSERT(fixed()); + date_conv_mode_t mode= is_time_format ? TIME_TIME_ONLY : TIME_CONV_NONE; + THD *thd= current_thd; + + if ((null_value= args[0]->get_date(thd, &l_time, + Temporal::Options(mode, thd)))) + return 0; + + if (!(format= args[1]->val_str(&format_buffer)) || !format->length()) + goto null_date; + + if (!is_time_format && !(lc= locale) && !(lc= args[2]->locale_from_val_str())) + goto null_date; // invalid locale + + if (fixed_length) + size=max_length; + else + size=format_length(format); + + if (size < MAX_DATE_STRING_REP_LENGTH) + size= MAX_DATE_STRING_REP_LENGTH; + + DBUG_ASSERT(format != str); + if (str->alloc(size)) + goto null_date; + + /* Create the result string */ + str->set_charset(collation.collation); + if (!make_date_time(format, &l_time, + is_time_format ? MYSQL_TIMESTAMP_TIME : + MYSQL_TIMESTAMP_DATE, + lc, str)) + return str; + +null_date: + null_value=1; + return 0; +} + +/* + Oracle has many formatting models, we list all but only part of them + are implemented, because some models depend on oracle functions + which mariadb is not supported. + + Models for datetime, used by TO_CHAR/TO_DATE. Normal format characters are + stored as short integer < 128, while format characters are stored as a + integer > 128 +*/ + +enum enum_tochar_formats +{ + FMT_BASE= 128, + FMT_AD, + FMT_AD_DOT, + FMT_AM, + FMT_AM_DOT, + FMT_BC, + FMT_BC_DOT, + FMT_CC, + FMT_SCC, + FMT_D, + FMT_DAY, + FMT_DD, + FMT_DDD, + FMT_DL, + FMT_DS, + FMT_DY, + FMT_E, + FMT_EE, + FMT_FF, + FMT_FM, + FMT_FX, + FMT_HH, + FMT_HH12, + FMT_HH24, + FMT_IW, + FMT_I, + FMT_IY, + FMT_IYY, + FMT_IYYY, + FMT_J, + FMT_MI, + FMT_MM, + FMT_MON, + FMT_MONTH, + FMT_PM, + FMT_PM_DOT, + FMT_RM, + FMT_RR, + FMT_RRRR, + FMT_SS, + FMT_SSSSSS, + FMT_TS, + FMT_TZD, + FMT_TZH, + FMT_TZM, + FMT_TZR, + FMT_W, + FMT_WW, + FMT_X, + FMT_Y, + FMT_YY, + FMT_YYY, + FMT_YYYY, + FMT_YYYY_COMMA, + FMT_YEAR, + FMT_SYYYY, + FMT_SYEAR +}; + +/** + Flip 'quotation_flag' if we found a quote (") character. + + @param cftm Character or FMT... format descriptor + @param quotation_flag Points to 'true' if we are inside a quoted string + + @return true If we are inside a quoted string or if we found a '"' character + @return false Otherwise +*/ + +static inline bool check_quotation(uint16 cfmt, bool *quotation_flag) +{ + if (cfmt == '"') + { + *quotation_flag= !*quotation_flag; + return true; + } + return *quotation_flag; +} + +#define INVALID_CHARACTER(x) (((x) >= 'A' && (x) <= 'Z') ||((x) >= '0' && (x) <= '9') || (x) >= 127 || ((x) < 32)) + + +/** + Special characters are directly output in the result + + @return 0 If found not acceptable character + @return # Number of copied characters +*/ + +static uint parse_special(char cfmt, const char *ptr, const char *end, + uint16 *array) +{ + int offset= 0; + char tmp1; + + /* Non-printable character and Multibyte encoded characters */ + if (INVALID_CHARACTER(cfmt)) + return 0; + + /* + * '&' with text is used for variable input, but '&' with other + * special charaters like '|'. '*' is used as separator + */ + if (cfmt == '&' && ptr + 1 < end) + { + tmp1= my_toupper(system_charset_info, *(ptr+1)); + if (tmp1 >= 'A' && tmp1 <= 'Z') + return 0; + } + + do { + /* + Continuously store the special characters in fmt_array until non-special + characters appear + */ + *array++= (uint16) (uchar) *ptr++; + offset++; + if (ptr == end) + break; + tmp1= my_toupper(system_charset_info, *ptr); + } while (!INVALID_CHARACTER(tmp1) && tmp1 != '"'); + return offset; +} + + +/** + Parse the format string, convert it to an compact array and calculate the + length of output string + + @param format Format string + @param fmt_len Function will store max length of formated date string here + + @return 0 ok. fmt_len is updated + @return 1 error. In this case 'warning_string' is set to error message +*/ + +bool Item_func_tochar::parse_format_string(const String *format, uint *fmt_len) +{ + const char *ptr, *end; + uint16 *tmp_fmt= fmt_array; + uint tmp_len= 0; + int offset= 0; + bool quotation_flag= false; + + ptr= format->ptr(); + end= ptr + format->length(); + + if (format->length() > MAX_DATETIME_FORMAT_MODEL_LEN) + { + warning_message.append(STRING_WITH_LEN("datetime format string is too " + "long")); + return 1; + } + + for (; ptr < end; ptr++, tmp_fmt++) + { + uint ulen; + char cfmt, next_char; + + cfmt= my_toupper(system_charset_info, *ptr); + + /* + Oracle datetime format support text in double quotation marks like + 'YYYY"abc"MM"xyz"DD', When this happens, store the text and quotation + marks, and use the text as a separator in make_date_time_oracle. + + NOTE: the quotation mark is not print in return value. for example: + select TO_CHAR(sysdate, 'YYYY"abc"MM"xyzDD"') will return 2021abc01xyz11 + */ + if (check_quotation(cfmt, "ation_flag)) + { + *tmp_fmt= *ptr; + tmp_len+= 1; + continue; + } + + switch (cfmt) { + case 'A': // AD/A.D./AM/A.M. + if (ptr+1 >= end) + goto error; + next_char= my_toupper(system_charset_info, *(ptr+1)); + if (next_char == 'D') + { + *tmp_fmt= FMT_AD; + ptr+= 1; + tmp_len+= 2; + } + else if (next_char == 'M') + { + *tmp_fmt= FMT_AM; + ptr+= 1; + tmp_len+= 2; + } + else if (next_char == '.' && ptr+3 < end && *(ptr+3) == '.') + { + if (my_toupper(system_charset_info, *(ptr+2)) == 'D') + { + *tmp_fmt= FMT_AD_DOT; + ptr+= 3; + tmp_len+= 4; + } + else if (my_toupper(system_charset_info, *(ptr+2)) == 'M') + { + *tmp_fmt= FMT_AM_DOT; + ptr+= 3; + tmp_len+= 4; + } + else + goto error; + } + else + goto error; + break; + case 'B': // BC and B.C + if (ptr+1 >= end) + goto error; + next_char= my_toupper(system_charset_info, *(ptr+1)); + if (next_char == 'C') + { + *tmp_fmt= FMT_BC; + ptr+= 1; + tmp_len+= 2; + } + else if (next_char == '.' && ptr+3 < end && + my_toupper(system_charset_info, *(ptr+2)) == 'C' && + *(ptr+3) == '.') + { + *tmp_fmt= FMT_BC_DOT; + ptr+= 3; + tmp_len+= 4; + } + else + goto error; + break; + case 'P': // PM or P.M. + next_char= my_toupper(system_charset_info, *(ptr+1)); + if (next_char == 'M') + { + *tmp_fmt= FMT_PM; + ptr+= 1; + tmp_len+= 2; + } + else if (next_char == '.' && + my_toupper(system_charset_info, *(ptr+2)) == 'M' && + my_toupper(system_charset_info, *(ptr+3)) == '.') + { + *tmp_fmt= FMT_PM_DOT; + ptr+= 3; + tmp_len+= 4; + } + else + goto error; + break; + case 'Y': // Y, YY, YYY o YYYYY + if (ptr + 1 == end || my_toupper(system_charset_info, *(ptr+1)) != 'Y') + { + *tmp_fmt= FMT_Y; + tmp_len+= 1; + break; + } + if (ptr + 2 == end || + my_toupper(system_charset_info, *(ptr+2)) != 'Y') /* YY */ + { + *tmp_fmt= FMT_YY; + ulen= 2; + } + else + { + if (ptr + 3 < end && my_toupper(system_charset_info, *(ptr+3)) == 'Y') + { + *tmp_fmt= FMT_YYYY; + ulen= 4; + } + else + { + *tmp_fmt= FMT_YYY; + ulen= 3; + } + } + ptr+= ulen-1; + tmp_len+= ulen; + break; + + case 'R': // RR or RRRR + if (ptr + 1 == end || my_toupper(system_charset_info, *(ptr+1)) != 'R') + goto error; + + if (ptr + 2 == end || my_toupper(system_charset_info, *(ptr+2)) != 'R') + { + *tmp_fmt= FMT_RR; + ulen= 2; + } + else + { + if (ptr + 3 >= end || my_toupper(system_charset_info, *(ptr+3)) != 'R') + goto error; + *tmp_fmt= FMT_RRRR; + ulen= 4; + } + ptr+= ulen-1; + tmp_len+= ulen; + break; + case 'M': + { + char tmp1; + if (ptr + 1 >= end) + goto error; + + tmp1= my_toupper(system_charset_info, *(ptr+1)); + if (tmp1 == 'M') + { + *tmp_fmt= FMT_MM; + tmp_len+= 2; + ptr+= 1; + } + else if (tmp1 == 'I') + { + *tmp_fmt= FMT_MI; + tmp_len+= 2; + ptr+= 1; + } + else if (tmp1 == 'O') + { + if (ptr + 2 >= end) + goto error; + char tmp2= my_toupper(system_charset_info, *(ptr+2)); + if (tmp2 != 'N') + goto error; + + if (ptr + 4 >= end || + my_toupper(system_charset_info, *(ptr+3)) != 'T' || + my_toupper(system_charset_info, *(ptr+4)) != 'H') + { + *tmp_fmt= FMT_MON; + tmp_len+= 3; + ptr+= 2; + } + else + { + *tmp_fmt= FMT_MONTH; + tmp_len+= (locale->max_month_name_length * + my_charset_utf8mb3_bin.mbmaxlen); + ptr+= 4; + } + } + else + goto error; + } + break; + case 'D': // DD, DY, or DAY + { + if (ptr + 1 >= end) + goto error; + char tmp1= my_toupper(system_charset_info, *(ptr+1)); + + if (tmp1 == 'D') + { + *tmp_fmt= FMT_DD; + tmp_len+= 2; + } + else if (tmp1 == 'Y') + { + *tmp_fmt= FMT_DY; + tmp_len+= 3; + } + else if (tmp1 == 'A') // DAY + { + if (ptr + 2 == end || my_toupper(system_charset_info, *(ptr+2)) != 'Y') + goto error; + *tmp_fmt= FMT_DAY; + tmp_len+= locale->max_day_name_length * my_charset_utf8mb3_bin.mbmaxlen; + ptr+= 1; + } + else + goto error; + ptr+= 1; + } + break; + case 'H': // HH, HH12 or HH23 + { + char tmp1, tmp2, tmp3; + if (ptr + 1 >= end) + goto error; + tmp1= my_toupper(system_charset_info, *(ptr+1)); + + if (tmp1 != 'H') + goto error; + + if (ptr+3 >= end) + { + *tmp_fmt= FMT_HH; + ptr+= 1; + } + else + { + tmp2= *(ptr+2); + tmp3= *(ptr+3); + + if (tmp2 == '1' && tmp3 == '2') + { + *tmp_fmt= FMT_HH12; + ptr+= 3; + } + else if (tmp2 == '2' && tmp3 == '4') + { + *tmp_fmt= FMT_HH24; + ptr+= 3; + } + else + { + *tmp_fmt= FMT_HH; + ptr+= 1; + } + } + tmp_len+= 2; + break; + } + case 'S': // SS + if (ptr + 1 == end || my_toupper(system_charset_info, *(ptr+1)) != 'S') + goto error; + + *tmp_fmt= FMT_SS; + tmp_len+= 2; + ptr+= 1; + break; + case '|': + /* + If only one '|' just ignore it, else append others, for example: + TO_CHAR('2000-11-05', 'YYYY|MM||||DD') --> 200011|||05 + */ + if (ptr + 1 == end || *(ptr+1) != '|') + { + tmp_fmt--; + break; + } + ptr++; // Skip first '|' + do + { + *tmp_fmt++= *ptr++; + tmp_len++; + } while ((ptr < end) && *ptr == '|'); + ptr--; // Fix ptr for above for loop + tmp_fmt--; + break; + + default: + offset= parse_special(cfmt, ptr, end, tmp_fmt); + if (!offset) + goto error; + /* ptr++ is in the for loop, so we must move ptr to offset-1 */ + ptr+= (offset-1); + tmp_fmt+= (offset-1); + tmp_len+= offset; + break; + } + } + *fmt_len= tmp_len; + *tmp_fmt= 0; + return 0; + +error: + warning_message.append(STRING_WITH_LEN("date format not recognized at ")); + warning_message.append(ptr, MY_MIN(8, end- ptr)); + return 1; +} + + +static inline bool append_val(int val, int size, String *str) +{ + return str->append_zerofill(val, size); +} + + +static bool make_date_time_oracle(const uint16 *fmt_array, + const MYSQL_TIME *l_time, + const MY_LOCALE *locale, + String *str) +{ + bool quotation_flag= false; + const uint16 *ptr= fmt_array; + uint hours_i; + uint weekday; + + str->length(0); + + while (*ptr) + { + if (check_quotation(*ptr, "ation_flag)) + { + /* don't display '"' in the result, so if it is '"', skip it */ + if (*ptr != '"') + { + DBUG_ASSERT(*ptr <= 255); + str->append((char) *ptr); + } + ptr++; + continue; + } + + switch (*ptr) { + + case FMT_AM: + case FMT_PM: + if (l_time->hour > 11) + str->append("PM", 2); + else + str->append("AM", 2); + break; + + case FMT_AM_DOT: + case FMT_PM_DOT: + if (l_time->hour > 11) + str->append(STRING_WITH_LEN("P.M.")); + else + str->append(STRING_WITH_LEN("A.M.")); + break; + + case FMT_AD: + case FMT_BC: + if (l_time->year > 0) + str->append(STRING_WITH_LEN("AD")); + else + str->append(STRING_WITH_LEN("BC")); + break; + + case FMT_AD_DOT: + case FMT_BC_DOT: + if (l_time->year > 0) + str->append(STRING_WITH_LEN("A.D.")); + else + str->append(STRING_WITH_LEN("B.C.")); + break; + + case FMT_Y: + if (append_val(l_time->year%10, 1, str)) + goto err_exit; + break; + + case FMT_YY: + case FMT_RR: + if (append_val(l_time->year%100, 2, str)) + goto err_exit; + break; + + case FMT_YYY: + if (append_val(l_time->year%1000, 3, str)) + goto err_exit; + break; + + case FMT_YYYY: + case FMT_RRRR: + if (append_val(l_time->year, 4, str)) + goto err_exit; + break; + + case FMT_MM: + if (append_val(l_time->month, 2, str)) + goto err_exit; + break; + + case FMT_MON: + { + if (l_time->month == 0) + { + str->append("00", 2); + } + else + { + const char *month_name= (locale->ab_month_names-> + type_names[l_time->month-1]); + size_t m_len= strlen(month_name); + str->append(month_name, m_len, system_charset_info); + } + } + break; + + case FMT_MONTH: + { + if (l_time->month == 0) + { + str->append("00", 2); + } + else + { + const char *month_name= (locale->month_names-> + type_names[l_time->month-1]); + size_t month_byte_len= strlen(month_name); + size_t month_char_len; + str->append(month_name, month_byte_len, system_charset_info); + month_char_len= my_numchars_mb(&my_charset_utf8mb3_general_ci, + month_name, month_name + + month_byte_len); + if (str->fill(str->length() + locale->max_month_name_length - + month_char_len, ' ')) + goto err_exit; + } + } + break; + + case FMT_DD: + if (append_val(l_time->day, 2, str)) + goto err_exit; + break; + + case FMT_DY: + { + if (l_time->day == 0) + str->append("00", 2); + else + { + weekday= calc_weekday(calc_daynr(l_time->year,l_time->month, + l_time->day), 0); + const char *day_name= locale->ab_day_names->type_names[weekday]; + str->append(day_name, strlen(day_name), system_charset_info); + } + } + break; + + case FMT_DAY: + { + if (l_time->day == 0) + str->append("00", 2, system_charset_info); + else + { + const char *day_name; + size_t day_byte_len, day_char_len; + weekday=calc_weekday(calc_daynr(l_time->year,l_time->month, + l_time->day), 0); + day_name= locale->day_names->type_names[weekday]; + day_byte_len= strlen(day_name); + str->append(day_name, day_byte_len, system_charset_info); + day_char_len= my_numchars_mb(&my_charset_utf8mb3_general_ci, + day_name, day_name + day_byte_len); + if (str->fill(str->length() + locale->max_day_name_length - + day_char_len, ' ')) + goto err_exit; + } + } + break; + + case FMT_HH12: + case FMT_HH: + hours_i= (l_time->hour%24 + 11)%12+1; + if (append_val(hours_i, 2, str)) + goto err_exit; + break; + + case FMT_HH24: + if (append_val(l_time->hour, 2, str)) + goto err_exit; + break; + + case FMT_MI: + if (append_val(l_time->minute, 2, str)) + goto err_exit; + break; + + case FMT_SS: + if (append_val(l_time->second, 2, str)) + goto err_exit; + break; + + default: + str->append((char) *ptr); + } + + ptr++; + }; + return false; + +err_exit: + return true; +} + + +bool Item_func_tochar::fix_length_and_dec(THD *thd) +{ + CHARSET_INFO *cs= thd->variables.collation_connection; + Item *arg1= args[1]->this_item(); + my_repertoire_t repertoire= arg1->collation.repertoire; + StringBuffer buffer; + String *str; + + locale= thd->variables.lc_time_names; + if (!thd->variables.lc_time_names->is_ascii) + repertoire|= MY_REPERTOIRE_EXTENDED; + collation.set(cs, arg1->collation.derivation, repertoire); + + /* first argument must be datetime or string */ + enum_field_types arg0_mysql_type= args[0]->field_type(); + + max_length= 0; + switch (arg0_mysql_type) { + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_STRING: + break; + default: + { + my_printf_error(ER_STD_INVALID_ARGUMENT, + ER(ER_STD_INVALID_ARGUMENT), + MYF(0), + "data type of first argument must be type " + "date/datetime/time or string", + func_name()); + return TRUE; + } + } + if (args[1]->basic_const_item() && (str= args[1]->val_str(&buffer))) + { + uint ulen; + fixed_length= 1; + if (parse_format_string(str, &ulen)) + { + my_printf_error(ER_STD_INVALID_ARGUMENT, + ER(ER_STD_INVALID_ARGUMENT), + MYF(0), + warning_message.c_ptr(), + func_name()); + return TRUE; + } + max_length= (uint32) (ulen * collation.collation->mbmaxlen); + } + else + { + fixed_length= 0; + max_length= (uint32) MY_MIN(arg1->max_length * 10 * + collation.collation->mbmaxlen, + MAX_BLOB_WIDTH); + } + set_maybe_null(); + return FALSE; +} + + +String *Item_func_tochar::val_str(String* str) + { + THD *thd= current_thd; + StringBuffer<64> format_buffer; + String *format; + MYSQL_TIME l_time; + const MY_LOCALE *lc= locale; + date_conv_mode_t mode= TIME_CONV_NONE; + size_t max_result_length= max_length; + + if (warning_message.length()) + goto null_date; + + if ((null_value= args[0]->get_date(thd, &l_time, + Temporal::Options(mode, thd)))) + return 0; + + if (!fixed_length) + { + uint ulen; + if (!(format= args[1]->val_str(&format_buffer)) || !format->length() || + parse_format_string(format, &ulen)) + goto null_date; + max_result_length= ((size_t) ulen) * collation.collation->mbmaxlen; + } + + if (str->alloc(max_result_length)) + goto null_date; + + /* Create the result string */ + str->set_charset(collation.collation); + if (!make_date_time_oracle(fmt_array, &l_time, lc, str)) + return str; + +null_date: + + if (warning_message.length()) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_STD_INVALID_ARGUMENT, + ER_THD(thd, ER_STD_INVALID_ARGUMENT), + warning_message.c_ptr(), + func_name()); + if (!fixed_length) + warning_message.length(0); + } + + null_value= 1; + return 0; +} + + +bool Item_func_from_unixtime::fix_length_and_dec(THD *thd) +{ + thd->used|= THD::TIME_ZONE_USED; + tz= thd->variables.time_zone; + Type_std_attributes::set( + Type_temporal_attributes_not_fixed_dec(MAX_DATETIME_WIDTH, + args[0]->decimals, false), + DTCollation_numeric()); + set_maybe_null(); + return FALSE; +} + + +bool Item_func_from_unixtime::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate __attribute__((unused))) +{ + bzero((char *)ltime, sizeof(*ltime)); + ltime->time_type= MYSQL_TIMESTAMP_TIME; + + VSec9 sec(thd, args[0], "unixtime", TIMESTAMP_MAX_VALUE); + DBUG_ASSERT(sec.is_null() || sec.sec() <= TIMESTAMP_MAX_VALUE); + + if (sec.is_null() || sec.truncated() || sec.neg()) + return (null_value= 1); + + sec.round(MY_MIN(decimals, TIME_SECOND_PART_DIGITS), thd->temporal_round_mode()); + if (sec.sec() > TIMESTAMP_MAX_VALUE) + return (null_value= true); // Went out of range after rounding + + tz->gmt_sec_to_TIME(ltime, (my_time_t) sec.sec()); + ltime->second_part= sec.usec(); + + return (null_value= 0); +} + + +bool Item_func_convert_tz::get_date(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate __attribute__((unused))) +{ + my_time_t my_time_tmp; + String str; + + if (!from_tz_cached) + { + from_tz= my_tz_find(thd, args[1]->val_str_ascii(&str)); + from_tz_cached= args[1]->const_item(); + } + + if (!to_tz_cached) + { + to_tz= my_tz_find(thd, args[2]->val_str_ascii(&str)); + to_tz_cached= args[2]->const_item(); + } + + if ((null_value= (from_tz == 0 || to_tz == 0))) + return true; + + Datetime::Options opt(TIME_NO_ZEROS, thd); + Datetime *dt= new(ltime) Datetime(thd, args[0], opt); + if ((null_value= !dt->is_valid_datetime())) + return true; + + { + uint not_used; + my_time_tmp= from_tz->TIME_to_gmt_sec(ltime, ¬_used); + ulong sec_part= ltime->second_part; + /* my_time_tmp is guaranteed to be in the allowed range */ + if (my_time_tmp) + to_tz->gmt_sec_to_TIME(ltime, my_time_tmp); + /* we rely on the fact that no timezone conversion can change sec_part */ + ltime->second_part= sec_part; + } + + return (null_value= 0); +} + + +void Item_func_convert_tz::cleanup() +{ + from_tz_cached= to_tz_cached= 0; + Item_datetimefunc::cleanup(); +} + + +bool Item_date_add_interval::fix_length_and_dec(THD *thd) +{ + enum_field_types arg0_field_type; + + if (!args[0]->type_handler()->is_traditional_scalar_type()) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + "interval", func_name()); + return TRUE; + } + /* + The field type for the result of an Item_datefunc is defined as + follows: + + - If first arg is a MYSQL_TYPE_DATETIME result is MYSQL_TYPE_DATETIME + - If first arg is a MYSQL_TYPE_DATE and the interval type uses hours, + minutes or seconds then type is MYSQL_TYPE_DATETIME + otherwise it's MYSQL_TYPE_DATE + - if first arg is a MYSQL_TYPE_TIME and the interval type isn't using + anything larger than days, then the result is MYSQL_TYPE_TIME, + otherwise - MYSQL_TYPE_DATETIME. + - Otherwise the result is MYSQL_TYPE_STRING + (This is because you can't know if the string contains a DATE, + MYSQL_TIME or DATETIME argument) + */ + arg0_field_type= args[0]->field_type(); + + if (arg0_field_type == MYSQL_TYPE_DATETIME || + arg0_field_type == MYSQL_TYPE_TIMESTAMP) + { + set_func_handler(&func_handler_date_add_interval_datetime); + } + else if (arg0_field_type == MYSQL_TYPE_DATE) + { + if (int_type <= INTERVAL_DAY || int_type == INTERVAL_YEAR_MONTH) + set_func_handler(&func_handler_date_add_interval_date); + else + set_func_handler(&func_handler_date_add_interval_datetime); + } + else if (arg0_field_type == MYSQL_TYPE_TIME) + { + if (int_type >= INTERVAL_DAY && int_type != INTERVAL_YEAR_MONTH) + set_func_handler(&func_handler_date_add_interval_time); + else + set_func_handler(&func_handler_date_add_interval_datetime_arg0_time); + } + else + { + set_func_handler(&func_handler_date_add_interval_string); + } + set_maybe_null(); + return m_func_handler->fix_length_and_dec(this); +} + + +bool Func_handler_date_add_interval_datetime_arg0_time:: + get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const +{ + // time_expr + INTERVAL {YEAR|QUARTER|MONTH|WEEK|YEAR_MONTH} + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DATETIME_FUNCTION_OVERFLOW, + ER_THD(thd, ER_DATETIME_FUNCTION_OVERFLOW), "time"); + return (item->null_value= true); +} + + +bool Item_date_add_interval::eq(const Item *item, bool binary_cmp) const +{ + if (!Item_func::eq(item, binary_cmp)) + return 0; + Item_date_add_interval *other= (Item_date_add_interval*) item; + return ((int_type == other->int_type) && + (date_sub_interval == other->date_sub_interval)); +} + +/* + 'interval_names' reflects the order of the enumeration interval_type. + See item_timefunc.h + */ + +static const char *interval_names[]= +{ + "year", "quarter", "month", "week", "day", + "hour", "minute", "second", "microsecond", + "year_month", "day_hour", "day_minute", + "day_second", "hour_minute", "hour_second", + "minute_second", "day_microsecond", + "hour_microsecond", "minute_microsecond", + "second_microsecond" +}; + +void Item_date_add_interval::print(String *str, enum_query_type query_type) +{ + args[0]->print_parenthesised(str, query_type, INTERVAL_PRECEDENCE); + static LEX_CSTRING minus_interval= { STRING_WITH_LEN(" - interval ") }; + static LEX_CSTRING plus_interval= { STRING_WITH_LEN(" + interval ") }; + LEX_CSTRING *tmp= date_sub_interval ? &minus_interval : &plus_interval; + str->append(tmp); + args[1]->print(str, query_type); + str->append(' '); + str->append(interval_names[int_type], strlen(interval_names[int_type])); +} + +void Item_extract::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("extract(")); + str->append(interval_names[int_type], strlen(interval_names[int_type])); + str->append(STRING_WITH_LEN(" from ")); + args[0]->print(str, query_type); + str->append(')'); +} + + +bool Item_extract::check_arguments() const +{ + if (!args[0]->type_handler()->can_return_extract_source(int_type)) + { + char tmp[64]; + my_snprintf(tmp, sizeof(tmp), "extract(%s)", interval_names[int_type]); + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), tmp); + return true; + } + return false; +} + + +bool Item_extract::fix_length_and_dec(THD *thd) +{ + set_maybe_null(); // If wrong date + uint32 daylen= args[0]->cmp_type() == TIME_RESULT ? 2 : + TIME_MAX_INTERVAL_DAY_CHAR_LENGTH; + switch (int_type) { + case INTERVAL_YEAR: set_date_length(4); break; // YYYY + case INTERVAL_YEAR_MONTH: set_date_length(6); break; // YYYYMM + case INTERVAL_QUARTER: set_date_length(2); break; // 1..4 + case INTERVAL_MONTH: set_date_length(2); break; // MM + case INTERVAL_WEEK: set_date_length(2); break; // 0..52 + case INTERVAL_DAY: set_day_length(daylen); break; // DD + case INTERVAL_DAY_HOUR: set_day_length(daylen+2); break; // DDhh + case INTERVAL_DAY_MINUTE: set_day_length(daylen+4); break; // DDhhmm + case INTERVAL_DAY_SECOND: set_day_length(daylen+6); break; // DDhhmmss + case INTERVAL_HOUR: set_time_length(2); break; // hh + case INTERVAL_HOUR_MINUTE: set_time_length(4); break; // hhmm + case INTERVAL_HOUR_SECOND: set_time_length(6); break; // hhmmss + case INTERVAL_MINUTE: set_time_length(2); break; // mm + case INTERVAL_MINUTE_SECOND: set_time_length(4); break; // mmss + case INTERVAL_SECOND: set_time_length(2); break; // ss + case INTERVAL_MICROSECOND: set_time_length(6); break; // ffffff + case INTERVAL_DAY_MICROSECOND: set_time_length(daylen+12); break; // DDhhmmssffffff + case INTERVAL_HOUR_MICROSECOND: set_time_length(12); break; // hhmmssffffff + case INTERVAL_MINUTE_MICROSECOND: set_time_length(10); break; // mmssffffff + case INTERVAL_SECOND_MICROSECOND: set_time_length(8); break; // ssffffff + case INTERVAL_LAST: DBUG_ASSERT(0); break; /* purecov: deadcode */ + } + return FALSE; +} + + +uint Extract_source::week(THD *thd) const +{ + DBUG_ASSERT(is_valid_extract_source()); + uint year; + ulong week_format= current_thd->variables.default_week_format; + return calc_week(this, week_mode(week_format), &year); +} + + +longlong Item_extract::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Extract_source dt(thd, args[0], m_date_mode); + if ((null_value= !dt.is_valid_extract_source())) + return 0; + switch (int_type) { + case INTERVAL_YEAR: return dt.year(); + case INTERVAL_YEAR_MONTH: return dt.year_month(); + case INTERVAL_QUARTER: return dt.quarter(); + case INTERVAL_MONTH: return dt.month(); + case INTERVAL_WEEK: return dt.week(thd); + case INTERVAL_DAY: return dt.day(); + case INTERVAL_DAY_HOUR: return dt.day_hour(); + case INTERVAL_DAY_MINUTE: return dt.day_minute(); + case INTERVAL_DAY_SECOND: return dt.day_second(); + case INTERVAL_HOUR: return dt.hour(); + case INTERVAL_HOUR_MINUTE: return dt.hour_minute(); + case INTERVAL_HOUR_SECOND: return dt.hour_second(); + case INTERVAL_MINUTE: return dt.minute(); + case INTERVAL_MINUTE_SECOND: return dt.minute_second(); + case INTERVAL_SECOND: return dt.second(); + case INTERVAL_MICROSECOND: return dt.microsecond(); + case INTERVAL_DAY_MICROSECOND: return dt.day_microsecond(); + case INTERVAL_HOUR_MICROSECOND: return dt.hour_microsecond(); + case INTERVAL_MINUTE_MICROSECOND: return dt.minute_microsecond(); + case INTERVAL_SECOND_MICROSECOND: return dt.second_microsecond(); + case INTERVAL_LAST: DBUG_ASSERT(0); break; /* purecov: deadcode */ + } + return 0; // Impossible +} + +bool Item_extract::eq(const Item *item, bool binary_cmp) const +{ + if (this == item) + return 1; + if (item->type() != FUNC_ITEM || + functype() != ((Item_func*)item)->functype()) + return 0; + + Item_extract* ie= (Item_extract*)item; + if (ie->int_type != int_type) + return 0; + + if (!args[0]->eq(ie->args[0], binary_cmp)) + return 0; + return 1; +} + + +bool Item_char_typecast::eq(const Item *item, bool binary_cmp) const +{ + if (this == item) + return 1; + if (item->type() != FUNC_ITEM || + functype() != ((Item_func*)item)->functype()) + return 0; + + Item_char_typecast *cast= (Item_char_typecast*)item; + if (cast_length != cast->cast_length || + cast_cs != cast->cast_cs) + return 0; + + if (!args[0]->eq(cast->args[0], binary_cmp)) + return 0; + return 1; +} + +void Item_func::print_cast_temporal(String *str, enum_query_type query_type) +{ + char buf[32]; + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as ")); + const Name name= type_handler()->name(); + str->append(name.ptr(), name.length()); + if (decimals && decimals != NOT_FIXED_DEC) + { + str->append('('); + size_t length= (size_t) (longlong10_to_str(decimals, buf, -10) - buf); + str->append(buf, length); + str->append(')'); + } + str->append(')'); +} + + +void Item_char_typecast::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as char")); + if (cast_length != ~0U) + { + char buf[20]; + size_t length= (size_t) (longlong10_to_str(cast_length, buf, 10) - buf); + str->append('('); + str->append(buf, length); + str->append(')'); + } + if (cast_cs) + { + str->append(STRING_WITH_LEN(" charset ")); + str->append(cast_cs->cs_name); + } + str->append(')'); +} + + +void Item_char_typecast::check_truncation_with_warn(String *src, size_t dstlen) +{ + if (dstlen < src->length()) + { + THD *thd= current_thd; + char char_type[40]; + ErrConvString err(src); + bool save_abort_on_warning= thd->abort_on_warning; + thd->abort_on_warning&= !m_suppress_warning_to_error_escalation; + my_snprintf(char_type, sizeof(char_type), "%s(%lu)", + cast_cs == &my_charset_bin ? "BINARY" : "CHAR", + (ulong) cast_length); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE, + ER_THD(thd, ER_TRUNCATED_WRONG_VALUE), char_type, + err.ptr()); + thd->abort_on_warning= save_abort_on_warning; + } +} + + +String *Item_char_typecast::reuse(String *src, size_t length) +{ + DBUG_ASSERT(length <= src->length()); + check_truncation_with_warn(src, length); + tmp_value.set(src->ptr(), length, cast_cs); + return &tmp_value; +} + + +/* + Make a copy, to handle conversion or fix bad bytes. +*/ +String *Item_char_typecast::copy(String *str, CHARSET_INFO *strcs) +{ + String_copier_for_item copier(current_thd); + if (copier.copy_with_warn(cast_cs, &tmp_value, strcs, + str->ptr(), str->length(), cast_length)) + { + null_value= 1; // EOM + return 0; + } + check_truncation_with_warn(str, (uint)(copier.source_end_pos() - str->ptr())); + return &tmp_value; +} + + +uint Item_char_typecast::adjusted_length_with_warn(uint length) +{ + if (length <= current_thd->variables.max_allowed_packet) + return length; + + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER_THD(thd, ER_WARN_ALLOWED_PACKET_OVERFLOWED), + cast_cs == &my_charset_bin ? + "cast_as_binary" : func_name(), + thd->variables.max_allowed_packet); + return thd->variables.max_allowed_packet; +} + + +String *Item_char_typecast::val_str_generic(String *str) +{ + DBUG_ASSERT(fixed()); + String *res; + + if (has_explicit_length()) + cast_length= adjusted_length_with_warn(cast_length); + + if (!(res= args[0]->val_str(str))) + { + null_value= 1; + return 0; + } + + if (cast_cs == &my_charset_bin && + has_explicit_length() && + cast_length > res->length()) + { + // Special case: pad binary value with trailing 0x00 + DBUG_ASSERT(cast_length <= current_thd->variables.max_allowed_packet); + if (res->alloced_length() < cast_length) + { + str_value.alloc(cast_length); + str_value.copy(*res); + res= &str_value; + } + bzero((char*) res->ptr() + res->length(), cast_length - res->length()); + res->length(cast_length); + res->set_charset(&my_charset_bin); + } + else + { + /* + from_cs is 0 in the case where the result set may vary between calls, + for example with dynamic columns. + */ + CHARSET_INFO *cs= from_cs ? from_cs : res->charset(); + if (!charset_conversion) + { + // Try to reuse the original string (if well formed). + Well_formed_prefix prefix(cs, res->ptr(), res->end(), cast_length); + if (!prefix.well_formed_error_pos()) + res= reuse(res, prefix.length()); + goto end; + } + // Character set conversion, or bad bytes were found. + if (!(res= copy(res, cs))) + return 0; + } + +end: + return ((null_value= (res->length() > + adjusted_length_with_warn(res->length())))) ? 0 : res; +} + + +String *Item_char_typecast::val_str_binary_from_native(String *str) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(cast_cs == &my_charset_bin); + NativeBuffer native; + + if (args[0]->val_native(current_thd, &native)) + { + null_value= 1; + return 0; + } + + if (has_explicit_length()) + { + cast_length= adjusted_length_with_warn(cast_length); + if (cast_length > native.length()) + { + // add trailing 0x00s + DBUG_ASSERT(cast_length <= current_thd->variables.max_allowed_packet); + str->alloc(cast_length); + str->copy(native.ptr(), native.length(), &my_charset_bin); + bzero((char*) str->end(), cast_length - str->length()); + str->length(cast_length); + } + else + str->copy(native.ptr(), cast_length, &my_charset_bin); + } + else + str->copy(native.ptr(), native.length(), &my_charset_bin); + + return ((null_value= (str->length() > + adjusted_length_with_warn(str->length())))) ? 0 : str; +} + + +class Item_char_typecast_func_handler: public Item_handled_func::Handler_str +{ +public: + const Type_handler *return_type_handler(const Item_handled_func *item) const + { + return Type_handler::string_type_handler(item->max_length); + } + const Type_handler * + type_handler_for_create_select(const Item_handled_func *item) const + { + return return_type_handler(item)->type_handler_for_tmp_table(item); + } + + bool fix_length_and_dec(Item_handled_func *item) const + { + return false; + } + String *val_str(Item_handled_func *item, String *to) const + { + DBUG_ASSERT(dynamic_cast(item)); + return static_cast(item)->val_str_generic(to); + } +}; + + +static Item_char_typecast_func_handler item_char_typecast_func_handler; + + +void Item_char_typecast::fix_length_and_dec_numeric() +{ + fix_length_and_dec_internal(from_cs= cast_cs->mbminlen == 1 ? + cast_cs : + &my_charset_latin1); + set_func_handler(&item_char_typecast_func_handler); +} + + +void Item_char_typecast::fix_length_and_dec_generic() +{ + fix_length_and_dec_internal(from_cs= args[0]->dynamic_result() ? + 0 : + args[0]->collation.collation); + set_func_handler(&item_char_typecast_func_handler); +} + + +void Item_char_typecast::fix_length_and_dec_str() +{ + fix_length_and_dec_generic(); + m_suppress_warning_to_error_escalation= true; + set_func_handler(&item_char_typecast_func_handler); +} + + +void +Item_char_typecast::fix_length_and_dec_native_to_binary(uint32 octet_length) +{ + collation.set(&my_charset_bin, DERIVATION_IMPLICIT); + max_length= has_explicit_length() ? (uint32) cast_length : octet_length; + if (current_thd->is_strict_mode()) + set_maybe_null(); +} + + +void Item_char_typecast::fix_length_and_dec_internal(CHARSET_INFO *from_cs) +{ + uint32 char_length; + /* + We always force character set conversion if cast_cs + is a multi-byte character set. It guarantees that the + result of CAST is a well-formed string. + For single-byte character sets we allow just to copy + from the argument. A single-byte character sets string + is always well-formed. + + There is a special trick to convert form a number to ucs2. + As numbers have my_charset_bin as their character set, + it wouldn't do conversion to ucs2 without an additional action. + To force conversion, we should pretend to be non-binary. + Let's choose from_cs this way: + - If the argument in a number and cast_cs is ucs2 (i.e. mbminlen > 1), + then from_cs is set to latin1, to perform latin1 -> ucs2 conversion. + - If the argument is a number and cast_cs is ASCII-compatible + (i.e. mbminlen == 1), then from_cs is set to cast_cs, + which allows just to take over the args[0]->val_str() result + and thus avoid unnecessary character set conversion. + - If the argument is not a number, then from_cs is set to + the argument's charset. + - If argument has a dynamic collation (can change from call to call) + we set from_cs to 0 as a marker that we have to take the collation + from the result string. + + Note (TODO): we could use repertoire technique here. + */ + charset_conversion= !from_cs || (cast_cs->mbmaxlen > 1) || + (!my_charset_same(from_cs, cast_cs) && + from_cs != &my_charset_bin && + cast_cs != &my_charset_bin); + collation.set(cast_cs, DERIVATION_IMPLICIT); + char_length= ((cast_length != ~0U) ? cast_length : + args[0]->max_length / + (cast_cs == &my_charset_bin ? 1 : + args[0]->collation.collation->mbmaxlen)); + max_length= char_length * cast_cs->mbmaxlen; + // Add NULL-ability in strict mode. See Item_str_func::fix_fields() + if (current_thd->is_strict_mode()) + set_maybe_null(); +} + + +bool Item_time_typecast::get_date(THD *thd, MYSQL_TIME *to, date_mode_t mode) +{ + Time *tm= new(to) Time(thd, args[0], Time::Options_for_cast(mode, thd), + MY_MIN(decimals, TIME_SECOND_PART_DIGITS)); + return (null_value= !tm->is_valid_time()); +} + + +Sql_mode_dependency Item_time_typecast::value_depends_on_sql_mode() const +{ + return Item_timefunc::value_depends_on_sql_mode() | + Sql_mode_dependency(decimals < args[0]->decimals ? + MODE_TIME_ROUND_FRACTIONAL : 0, 0); +} + + +bool Item_date_typecast::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + date_mode_t tmp= (fuzzydate | sql_mode_for_dates(thd)) & ~TIME_TIME_ONLY; + // Force truncation + Date *d= new(ltime) Date(thd, args[0], Date::Options(date_conv_mode_t(tmp))); + return (null_value= !d->is_valid_date()); +} + + +bool Item_datetime_typecast::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + date_mode_t tmp= (fuzzydate | sql_mode_for_dates(thd)) & ~TIME_TIME_ONLY; + // Force rounding if the current sql_mode says so + Datetime::Options opt(date_conv_mode_t(tmp), thd); + Datetime *dt= new(ltime) Datetime(thd, args[0], opt, + MY_MIN(decimals, TIME_SECOND_PART_DIGITS)); + return (null_value= !dt->is_valid_datetime()); +} + + +Sql_mode_dependency Item_datetime_typecast::value_depends_on_sql_mode() const +{ + return Item_datetimefunc::value_depends_on_sql_mode() | + Sql_mode_dependency(decimals < args[0]->decimals ? + MODE_TIME_ROUND_FRACTIONAL : 0, 0); +} + + +/** + MAKEDATE(a,b) is a date function that creates a date value + from a year and day value. + + NOTES: + As arguments are integers, we can't know if the year is a 2 digit + or 4 digit year. In this case we treat all years < 100 as 2 digit + years. Ie, this is not safe for dates between 0000-01-01 and + 0099-12-31 +*/ + +bool Item_func_makedate::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + long year, days, daynr= (long) args[1]->val_int(); + + VYear vyear(args[0]); + if (vyear.is_null() || args[1]->null_value || vyear.truncated() || daynr <= 0) + goto err; + + if ((year= (long) vyear.year()) < 100) + year= year_2000_handling(year); + days= calc_daynr(year,1,1) + daynr - 1; + if (get_date_from_daynr(days, <ime->year, <ime->month, <ime->day)) + goto err; + ltime->time_type= MYSQL_TIMESTAMP_DATE; + ltime->neg= 0; + ltime->hour= ltime->minute= ltime->second= ltime->second_part= 0; + return (null_value= 0); + +err: + return (null_value= 1); +} + + +bool Item_func_add_time::fix_length_and_dec(THD *thd) +{ + enum_field_types arg0_field_type; + + if (!args[0]->type_handler()->is_traditional_scalar_type() || + !args[1]->type_handler()->is_traditional_scalar_type()) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + args[1]->type_handler()->name().ptr(), func_name()); + return TRUE; + } + /* + The field type for the result of an Item_func_add_time function is defined + as follows: + + - If first arg is a MYSQL_TYPE_DATETIME or MYSQL_TYPE_TIMESTAMP + result is MYSQL_TYPE_DATETIME + - If first arg is a MYSQL_TYPE_TIME result is MYSQL_TYPE_TIME + - Otherwise the result is MYSQL_TYPE_STRING + */ + + arg0_field_type= args[0]->field_type(); + if (arg0_field_type == MYSQL_TYPE_DATE || + arg0_field_type == MYSQL_TYPE_DATETIME || + arg0_field_type == MYSQL_TYPE_TIMESTAMP) + { + set_func_handler(sign > 0 ? &func_handler_add_time_datetime_add : + &func_handler_add_time_datetime_sub); + } + else if (arg0_field_type == MYSQL_TYPE_TIME) + { + set_func_handler(sign > 0 ? &func_handler_add_time_time_add : + &func_handler_add_time_time_sub); + } + else + { + set_func_handler(sign > 0 ? &func_handler_add_time_string_add : + &func_handler_add_time_string_sub); + } + + set_maybe_null(); + return m_func_handler->fix_length_and_dec(this); +} + + +/** + TIMEDIFF(t,s) is a time function that calculates the + time value between a start and end time. + + t and s: time_or_datetime_expression + Result: Time value +*/ + +bool Item_func_timediff::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + int l_sign= 1; + MYSQL_TIME l_time1,l_time2,l_time3; + + /* the following may be true in, for example, date_add(timediff(...), ... */ + if (fuzzydate & TIME_NO_ZERO_IN_DATE) + return (null_value= 1); + + if (args[0]->get_time(thd, &l_time1) || + args[1]->get_time(thd, &l_time2) || + l_time1.time_type != l_time2.time_type) + return (null_value= 1); + + if (l_time1.neg != l_time2.neg) + l_sign= -l_sign; + + if (calc_time_diff(&l_time1, &l_time2, l_sign, &l_time3, fuzzydate)) + return (null_value= 1); + + *ltime= l_time3; + return (null_value= adjust_time_range_with_warn(thd, ltime, decimals)); +} + + +/** + MAKETIME(h,m,s) is a time function that calculates a time value + from the total number of hours, minutes, and seconds. + Result: Time value +*/ + +bool Item_func_maketime::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + Longlong_hybrid hour(args[0]->val_int(), args[0]->unsigned_flag); + longlong minute= args[1]->val_int(); + VSec9 sec(thd, args[2], "seconds", 59); + + DBUG_ASSERT(sec.is_null() || sec.sec() <= 59); + if (args[0]->null_value || args[1]->null_value || sec.is_null() || + minute < 0 || minute > 59 || sec.neg() || sec.truncated()) + return (null_value= 1); + + int warn; + new(ltime) Time(&warn, hour.neg(), hour.abs(), (uint) minute, + sec.to_const_sec9(), thd->temporal_round_mode(), decimals); + if (warn) + { + // use check_time_range() to set ltime to the max value depending on dec + int unused; + ltime->hour= TIME_MAX_HOUR + 1; + check_time_range(ltime, decimals, &unused); + char buf[28]; + char *ptr= longlong10_to_str(hour.value(), buf, hour.is_unsigned() ? 10 : -10); + int len = (int)(ptr - buf) + sprintf(ptr, ":%02u:%02u", + (uint) minute, (uint) sec.sec()); + ErrConvString err(buf, len, &my_charset_bin); + thd->push_warning_truncated_wrong_value("time", err.ptr()); + } + + return (null_value= 0); +} + + +/** + MICROSECOND(a) is a function ( extraction) that extracts the microseconds + from a. + + a: Datetime or time value + Result: int value +*/ + +longlong Item_func_microsecond::val_int() +{ + DBUG_ASSERT(fixed()); + THD *thd= current_thd; + Time tm(thd, args[0], Time::Options_for_cast(thd)); + return ((null_value= !tm.is_valid_time())) ? + 0 : tm.get_mysql_time()->second_part; +} + + +longlong Item_func_timestamp_diff::val_int() +{ + MYSQL_TIME ltime1, ltime2; + ulonglong seconds; + ulong microseconds; + long months= 0; + int neg= 1; + THD *thd= current_thd; + Datetime::Options opt(TIME_NO_ZEROS, thd); + + null_value= 0; + + if (Datetime(thd, args[0], opt).copy_to_mysql_time(<ime1) || + Datetime(thd, args[1], opt).copy_to_mysql_time(<ime2)) + goto null_date; + + if (calc_time_diff(<ime2,<ime1, 1, + &seconds, µseconds)) + neg= -1; + + if (int_type == INTERVAL_YEAR || + int_type == INTERVAL_QUARTER || + int_type == INTERVAL_MONTH) + { + uint year_beg, year_end, month_beg, month_end, day_beg, day_end; + uint years= 0; + uint second_beg, second_end, microsecond_beg, microsecond_end; + + if (neg == -1) + { + year_beg= ltime2.year; + year_end= ltime1.year; + month_beg= ltime2.month; + month_end= ltime1.month; + day_beg= ltime2.day; + day_end= ltime1.day; + second_beg= ltime2.hour * 3600 + ltime2.minute * 60 + ltime2.second; + second_end= ltime1.hour * 3600 + ltime1.minute * 60 + ltime1.second; + microsecond_beg= ltime2.second_part; + microsecond_end= ltime1.second_part; + } + else + { + year_beg= ltime1.year; + year_end= ltime2.year; + month_beg= ltime1.month; + month_end= ltime2.month; + day_beg= ltime1.day; + day_end= ltime2.day; + second_beg= ltime1.hour * 3600 + ltime1.minute * 60 + ltime1.second; + second_end= ltime2.hour * 3600 + ltime2.minute * 60 + ltime2.second; + microsecond_beg= ltime1.second_part; + microsecond_end= ltime2.second_part; + } + + /* calc years */ + years= year_end - year_beg; + if (month_end < month_beg || (month_end == month_beg && day_end < day_beg)) + years-= 1; + + /* calc months */ + months= 12*years; + if (month_end < month_beg || (month_end == month_beg && day_end < day_beg)) + months+= 12 - (month_beg - month_end); + else + months+= (month_end - month_beg); + + if (day_end < day_beg) + months-= 1; + else if ((day_end == day_beg) && + ((second_end < second_beg) || + (second_end == second_beg && microsecond_end < microsecond_beg))) + months-= 1; + } + + switch (int_type) { + case INTERVAL_YEAR: + return months/12*neg; + case INTERVAL_QUARTER: + return months/3*neg; + case INTERVAL_MONTH: + return months*neg; + case INTERVAL_WEEK: + return ((longlong) (seconds / SECONDS_IN_24H / 7L)) * neg; + case INTERVAL_DAY: + return ((longlong) (seconds / SECONDS_IN_24H)) * neg; + case INTERVAL_HOUR: + return ((longlong) (seconds / 3600L)) * neg; + case INTERVAL_MINUTE: + return ((longlong) (seconds / 60L)) * neg; + case INTERVAL_SECOND: + return ((longlong) seconds) * neg; + case INTERVAL_MICROSECOND: + /* + In MySQL difference between any two valid datetime values + in microseconds fits into longlong. + */ + return ((longlong) ((ulonglong) seconds * 1000000L + microseconds)) * neg; + default: + break; + } + +null_date: + null_value=1; + return 0; +} + + +void Item_func_timestamp_diff::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + + switch (int_type) { + case INTERVAL_YEAR: + str->append(STRING_WITH_LEN("YEAR")); + break; + case INTERVAL_QUARTER: + str->append(STRING_WITH_LEN("QUARTER")); + break; + case INTERVAL_MONTH: + str->append(STRING_WITH_LEN("MONTH")); + break; + case INTERVAL_WEEK: + str->append(STRING_WITH_LEN("WEEK")); + break; + case INTERVAL_DAY: + str->append(STRING_WITH_LEN("DAY")); + break; + case INTERVAL_HOUR: + str->append(STRING_WITH_LEN("HOUR")); + break; + case INTERVAL_MINUTE: + str->append(STRING_WITH_LEN("MINUTE")); + break; + case INTERVAL_SECOND: + str->append(STRING_WITH_LEN("SECOND")); + break; + case INTERVAL_MICROSECOND: + str->append(STRING_WITH_LEN("MICROSECOND")); + break; + default: + break; + } + + for (uint i=0 ; i < 2 ; i++) + { + str->append(','); + args[i]->print(str, query_type); + } + str->append(')'); +} + + +String *Item_func_get_format::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed()); + const char *format_name; + KNOWN_DATE_TIME_FORMAT *format; + String *val= args[0]->val_str_ascii(str); + ulong val_len; + + if ((null_value= args[0]->null_value)) + return 0; + + val_len= val->length(); + for (format= &known_date_time_formats[0]; + (format_name= format->format_name); + format++) + { + uint format_name_len; + format_name_len= (uint) strlen(format_name); + if (val_len == format_name_len && + !my_charset_latin1.strnncoll(val->ptr(), val_len, + format_name, val_len)) + { + const char *format_str= get_date_time_format_str(format, type); + str->set(format_str, (uint) strlen(format_str), &my_charset_numeric); + return str; + } + } + + null_value= 1; + return 0; +} + + +void Item_func_get_format::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + + switch (type) { + case MYSQL_TIMESTAMP_DATE: + str->append(STRING_WITH_LEN("DATE, ")); + break; + case MYSQL_TIMESTAMP_DATETIME: + str->append(STRING_WITH_LEN("DATETIME, ")); + break; + case MYSQL_TIMESTAMP_TIME: + str->append(STRING_WITH_LEN("TIME, ")); + break; + default: + DBUG_ASSERT(0); + } + args[0]->print(str, query_type); + str->append(')'); +} + + +/** + Get type of datetime value (DATE/TIME/...) which will be produced + according to format string. + + @param format format string + @param length length of format string + + @note + We don't process day format's characters('D', 'd', 'e') because day + may be a member of all date/time types. + + @note + Format specifiers supported by this function should be in sync with + specifiers supported by extract_date_time() function. + + @return + A function handler corresponding the given format +*/ + +static const Item_handled_func::Handler * +get_date_time_result_type(const char *format, uint length) +{ + const char *time_part_frms= "HISThiklrs"; + const char *date_part_frms= "MVUXYWabcjmvuxyw"; + bool date_part_used= 0, time_part_used= 0, frac_second_used= 0; + + const char *val= format; + const char *end= format + length; + + for (; val != end; val++) + { + if (*val == '%' && val+1 != end) + { + val++; + if (*val == 'f') + frac_second_used= time_part_used= 1; + else if (!time_part_used && strchr(time_part_frms, *val)) + time_part_used= 1; + else if (!date_part_used && strchr(date_part_frms, *val)) + date_part_used= 1; + if (date_part_used && frac_second_used) + { + /* + frac_second_used implies time_part_used, and thus we already + have all types of date-time components and can end our search. + */ + return &func_handler_str_to_date_datetime_usec; + } + } + } + + /* We don't have all three types of date-time components */ + if (frac_second_used) + return &func_handler_str_to_date_time_usec; + if (time_part_used) + { + if (date_part_used) + return &func_handler_str_to_date_datetime_sec; + return &func_handler_str_to_date_time_sec; + } + return &func_handler_str_to_date_date; +} + + +bool Item_func_str_to_date::fix_length_and_dec(THD *thd) +{ + if (!args[0]->type_handler()->is_traditional_scalar_type() || + !args[1]->type_handler()->is_traditional_scalar_type()) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + args[1]->type_handler()->name().ptr(), func_name()); + return TRUE; + } + if (agg_arg_charsets(collation, args, 2, MY_COLL_ALLOW_CONV, 1)) + return TRUE; + if (collation.collation->mbminlen > 1) + internal_charset= &my_charset_utf8mb4_general_ci; + + set_maybe_null(); + set_func_handler(&func_handler_str_to_date_datetime_usec); + + if ((const_item= args[1]->const_item())) + { + StringBuffer<64> format_str; + String *format= args[1]->val_str(&format_str, &format_converter, + internal_charset); + if (!args[1]->null_value) + set_func_handler(get_date_time_result_type(format->ptr(), format->length())); + } + return m_func_handler->fix_length_and_dec(this); +} + + +bool Item_func_str_to_date::get_date_common(THD *thd, MYSQL_TIME *ltime, + date_mode_t fuzzydate, + timestamp_type tstype) +{ + DATE_TIME_FORMAT date_time_format; + StringBuffer<64> val_string, format_str; + String *val, *format; + + val= args[0]->val_str(&val_string, &subject_converter, internal_charset); + format= args[1]->val_str(&format_str, &format_converter, internal_charset); + if (args[0]->null_value || args[1]->null_value) + return (null_value=1); + + date_time_format.format.str= (char*) format->ptr(); + date_time_format.format.length= format->length(); + if (extract_date_time(thd, &date_time_format, val->ptr(), val->length(), + ltime, tstype, 0, "datetime", + date_conv_mode_t(fuzzydate) | + sql_mode_for_dates(thd))) + return (null_value=1); + return (null_value= 0); +} + + +bool Item_func_last_day::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + Datetime::Options opt(date_conv_mode_t(fuzzydate & ~TIME_TIME_ONLY), + time_round_mode_t(fuzzydate)); + Datetime *d= new(ltime) Datetime(thd, args[0], opt); + if ((null_value= (!d->is_valid_datetime() || ltime->month == 0))) + return true; + uint month_idx= ltime->month-1; + ltime->day= days_in_month[month_idx]; + if ( month_idx == 1 && calc_days_in_year(ltime->year) == 366) + ltime->day= 29; + ltime->hour= ltime->minute= ltime->second= 0; + ltime->second_part= 0; + ltime->time_type= MYSQL_TIMESTAMP_DATE; + return (null_value= 0); +} diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h new file mode 100644 index 00000000..083eb7ba --- /dev/null +++ b/sql/item_timefunc.h @@ -0,0 +1,2033 @@ +#ifndef ITEM_TIMEFUNC_INCLUDED +#define ITEM_TIMEFUNC_INCLUDED +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2009-2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Function items used by mysql */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +class MY_LOCALE; + + +bool get_interval_value(THD *thd, Item *args, + interval_type int_type, INTERVAL *interval); + + +class Item_long_func_date_field: public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_date(func_name_cstring()); } +public: + Item_long_func_date_field(THD *thd, Item *a) + :Item_long_func(thd, a) { } +}; + + +class Item_long_func_time_field: public Item_long_func +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_time(func_name_cstring()); } +public: + Item_long_func_time_field(THD *thd, Item *a) + :Item_long_func(thd, a) { } +}; + + +class Item_func_period_add :public Item_long_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_int(0, 2); } +public: + Item_func_period_add(THD *thd, Item *a, Item *b): Item_long_func(thd, a, b) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("period_add") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + max_length=6*MY_CHARSET_BIN_MB_MAXLEN; + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_period_diff :public Item_long_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_int(0, 2); } +public: + Item_func_period_diff(THD *thd, Item *a, Item *b): Item_long_func(thd, a, b) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("period_diff") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=6*MY_CHARSET_BIN_MB_MAXLEN; + return FALSE; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_to_days :public Item_long_func_date_field +{ +public: + Item_func_to_days(THD *thd, Item *a): Item_long_func_date_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("to_days") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=6*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + enum_monotonicity_info get_monotonicity_info() const override; + longlong val_int_endpoint(bool left_endp, bool *incl_endp) override; + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_to_seconds :public Item_longlong_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_date(0, arg_count); } +public: + Item_func_to_seconds(THD *thd, Item *a): Item_longlong_func(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("to_seconds") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + fix_char_length(12); + set_maybe_null(); + return FALSE; + } + enum_monotonicity_info get_monotonicity_info() const override; + longlong val_int_endpoint(bool left_endp, bool *incl_endp) override; + bool check_partition_func_processor(void *bool_arg) override { return FALSE;} + + /* Only meaningful with date part and optional time part */ + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_dayofmonth :public Item_long_func_date_field +{ +public: + Item_func_dayofmonth(THD *thd, Item *a): Item_long_func_date_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("dayofmonth") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=2*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_month :public Item_long_func +{ +public: + Item_func_month(THD *thd, Item *a): Item_long_func(thd, a) + { } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("month") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals= 0; + fix_char_length(2); + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_monthname :public Item_str_func +{ + MY_LOCALE *locale; +public: + Item_func_monthname(THD *thd, Item *a): Item_str_func(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("monthname") }; + return name; + } + String *val_str(String *str) override; + bool fix_length_and_dec(THD *thd) override; + bool check_partition_func_processor(void *int_arg) override {return TRUE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_dayofyear :public Item_long_func_date_field +{ +public: + Item_func_dayofyear(THD *thd, Item *a): Item_long_func_date_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("dayofyear") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals= 0; + fix_char_length(3); + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_hour :public Item_long_func_time_field +{ +public: + Item_func_hour(THD *thd, Item *a): Item_long_func_time_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("hour") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=2*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_time_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_minute :public Item_long_func_time_field +{ +public: + Item_func_minute(THD *thd, Item *a): Item_long_func_time_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("minute") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=2*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_time_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_quarter :public Item_long_func_date_field +{ +public: + Item_func_quarter(THD *thd, Item *a): Item_long_func_date_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("quarter") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=1*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_second :public Item_long_func_time_field +{ +public: + Item_func_second(THD *thd, Item *a): Item_long_func_time_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("second") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=2*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_time_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_week :public Item_long_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_date(func_name_cstring()) || + (arg_count > 1 && args[1]->check_type_can_return_int(func_name_cstring())); + } +public: + Item_func_week(THD *thd, Item *a): Item_long_func(thd, a) {} + Item_func_week(THD *thd, Item *a, Item *b): Item_long_func(thd, a, b) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("week") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=2*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_vcol_func_processor(void *arg) override + { + if (arg_count == 2) + return FALSE; + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + bool check_valid_arguments_processor(void *int_arg) override + { + return arg_count == 2; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_yearweek :public Item_long_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_date(func_name_cstring()) || + args[1]->check_type_can_return_int(func_name_cstring()); + } +public: + Item_func_yearweek(THD *thd, Item *a, Item *b) + :Item_long_func(thd, a, b) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("yearweek") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=6*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_year :public Item_long_func_date_field +{ +public: + Item_func_year(THD *thd, Item *a): Item_long_func_date_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("year") }; + return name; + } + enum_monotonicity_info get_monotonicity_info() const override; + longlong val_int_endpoint(bool left_endp, bool *incl_endp) override; + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + max_length=4*MY_CHARSET_BIN_MB_MAXLEN; + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_weekday :public Item_long_func +{ + bool odbc_type; +public: + Item_func_weekday(THD *thd, Item *a, bool type_arg): + Item_long_func(thd, a), odbc_type(type_arg) { } + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING dayofweek= {STRING_WITH_LEN("dayofweek") }; + static LEX_CSTRING weekday= {STRING_WITH_LEN("weekday") }; + return (odbc_type ? dayofweek : weekday); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } + bool fix_length_and_dec(THD *thd) override + { + decimals= 0; + fix_char_length(1); + set_maybe_null(); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_dayname :public Item_str_func +{ + MY_LOCALE *locale; + public: + Item_func_dayname(THD *thd, Item *a): Item_str_func(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("dayname") }; + return name; + } + String *val_str(String *str) override; + const Type_handler *type_handler() const override + { return &type_handler_varchar; } + bool fix_length_and_dec(THD *thd) override; + bool check_partition_func_processor(void *int_arg) override {return TRUE;} + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_date_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_seconds_hybrid: public Item_func_numhybrid +{ +public: + Item_func_seconds_hybrid(THD *thd): Item_func_numhybrid(thd) {} + Item_func_seconds_hybrid(THD *thd, Item *a): Item_func_numhybrid(thd, a) {} + void fix_length_and_dec_generic(uint dec) + { + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + decimals= dec; + max_length=17 + (decimals ? decimals + 1 : 0); + set_maybe_null(); + if (decimals) + set_handler(&type_handler_newdecimal); + else + set_handler(type_handler_long_or_longlong()); + } + double real_op() { DBUG_ASSERT(0); return 0; } + String *str_op(String *str) { DBUG_ASSERT(0); return 0; } + bool date_op(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + DBUG_ASSERT(0); + return true; + } +}; + + +class Item_func_unix_timestamp :public Item_func_seconds_hybrid +{ + bool get_timestamp_value(my_time_t *seconds, ulong *second_part); +public: + Item_func_unix_timestamp(THD *thd): Item_func_seconds_hybrid(thd) {} + Item_func_unix_timestamp(THD *thd, Item *a): + Item_func_seconds_hybrid(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("unix_timestamp") }; + return name; + } + enum_monotonicity_info get_monotonicity_info() const override; + longlong val_int_endpoint(bool left_endp, bool *incl_endp) override; + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + /* + UNIX_TIMESTAMP() depends on the current timezone + (and thus may not be used as a partitioning function) + when its argument is NOT of the TIMESTAMP type. + */ + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_timestamp_args(); + } + bool check_vcol_func_processor(void *arg) override + { + if (arg_count) + return FALSE; + return mark_unsupported_function(func_name(), "()", arg, VCOL_TIME_FUNC); + } + bool fix_length_and_dec(THD *thd) override + { + fix_length_and_dec_generic(arg_count ? + args[0]->datetime_precision(thd) : 0); + return FALSE; + } + longlong int_op() override; + my_decimal *decimal_op(my_decimal* buf) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_time_to_sec :public Item_func_seconds_hybrid +{ +public: + Item_func_time_to_sec(THD *thd, Item *item): + Item_func_seconds_hybrid(thd, item) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("time_to_sec") }; + return name; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_time_args(); + } + bool fix_length_and_dec(THD *thd) override + { + fix_length_and_dec_generic(args[0]->time_precision(thd)); + return FALSE; + } + longlong int_op() override; + my_decimal *decimal_op(my_decimal* buf) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_datefunc :public Item_func +{ +public: + Item_datefunc(THD *thd): Item_func(thd) { } + Item_datefunc(THD *thd, Item *a): Item_func(thd, a) { } + Item_datefunc(THD *thd, Item *a, Item *b): Item_func(thd, a, b) { } + const Type_handler *type_handler() const override + { return &type_handler_newdate; } + longlong val_int() override + { return Date(this).to_longlong(); } + double val_real() override + { return Date(this).to_double(); } + String *val_str(String *to) override + { return Date(this).to_string(to); } + my_decimal *val_decimal(my_decimal *to) override + { return Date(this).to_decimal(to); } + bool fix_length_and_dec(THD *thd) override + { + fix_attributes_date(); + set_maybe_null(arg_count > 0); + return FALSE; + } +}; + + +class Item_timefunc :public Item_func +{ +public: + Item_timefunc(THD *thd): Item_func(thd) {} + Item_timefunc(THD *thd, Item *a): Item_func(thd, a) {} + Item_timefunc(THD *thd, Item *a, Item *b): Item_func(thd, a, b) {} + Item_timefunc(THD *thd, Item *a, Item *b, Item *c): Item_func(thd, a, b ,c) {} + const Type_handler *type_handler() const override + { return &type_handler_time2; } + longlong val_int() override + { return Time(this).to_longlong(); } + double val_real() override + { return Time(this).to_double(); } + String *val_str(String *to) override + { return Time(this).to_string(to, decimals); } + my_decimal *val_decimal(my_decimal *to) override + { return Time(this).to_decimal(to); } + bool val_native(THD *thd, Native *to) override + { return Time(thd, this).to_native(to, decimals); } +}; + + +class Item_datetimefunc :public Item_func +{ +public: + Item_datetimefunc(THD *thd): Item_func(thd) {} + Item_datetimefunc(THD *thd, Item *a): Item_func(thd, a) {} + Item_datetimefunc(THD *thd, Item *a, Item *b): Item_func(thd, a, b) {} + Item_datetimefunc(THD *thd, Item *a, Item *b, Item *c): + Item_func(thd, a, b ,c) {} + const Type_handler *type_handler() const override + { return &type_handler_datetime2; } + longlong val_int() override { return Datetime(this).to_longlong(); } + double val_real() override { return Datetime(this).to_double(); } + String *val_str(String *to) override + { return Datetime(this).to_string(to, decimals); } + my_decimal *val_decimal(my_decimal *to) override + { return Datetime(this).to_decimal(to); } +}; + + +/* Abstract CURTIME function. Children should define what time zone is used */ + +class Item_func_curtime :public Item_timefunc +{ + MYSQL_TIME ltime; + query_id_t last_query_id; +public: + Item_func_curtime(THD *thd, uint dec): Item_timefunc(thd), last_query_id(0) + { decimals= dec; } + bool fix_fields(THD *, Item **) override; + bool fix_length_and_dec(THD *thd) override + { fix_attributes_time(decimals); return FALSE; } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + /* + Abstract method that defines which time zone is used for conversion. + Converts time current time in my_time_t representation to broken-down + MYSQL_TIME representation using UTC-SYSTEM or per-thread time zone. + */ + virtual void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time)=0; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_TIME_FUNC); + } + void print(String *str, enum_query_type query_type) override; +}; + + +class Item_func_curtime_local :public Item_func_curtime +{ +public: + Item_func_curtime_local(THD *thd, uint dec): Item_func_curtime(thd, dec) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("curtime") }; + return name; + } + void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_curtime_utc :public Item_func_curtime +{ +public: + Item_func_curtime_utc(THD *thd, uint dec): Item_func_curtime(thd, dec) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("utc_time") }; + return name; + } + void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Abstract CURDATE function. See also Item_func_curtime. */ + +class Item_func_curdate :public Item_datefunc +{ + query_id_t last_query_id; + MYSQL_TIME ltime; +public: + Item_func_curdate(THD *thd): Item_datefunc(thd), last_query_id(0) {} + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + virtual void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time)=0; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_TIME_FUNC); + } +}; + + +class Item_func_curdate_local :public Item_func_curdate +{ +public: + Item_func_curdate_local(THD *thd): Item_func_curdate(thd) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("curdate") }; + return name; + } + void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_curdate_utc :public Item_func_curdate +{ +public: + Item_func_curdate_utc(THD *thd): Item_func_curdate(thd) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("utc_date") }; + return name; + } + void store_now_in_TIME(THD* thd, MYSQL_TIME *now_time) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Abstract CURRENT_TIMESTAMP function. See also Item_func_curtime */ + +class Item_func_now :public Item_datetimefunc +{ + MYSQL_TIME ltime; + query_id_t last_query_id; +public: + Item_func_now(THD *thd, uint dec): Item_datetimefunc(thd), last_query_id(0) + { decimals= dec; } + bool fix_fields(THD *, Item **) override; + bool fix_length_and_dec(THD *thd) override + { fix_attributes_datetime(decimals); return FALSE;} + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + virtual void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time)=0; + bool check_vcol_func_processor(void *arg) override + { + /* + NOW is safe for replication as slaves will run with same time as + master + */ + return mark_unsupported_function(func_name(), "()", arg, VCOL_TIME_FUNC); + } + void print(String *str, enum_query_type query_type) override; +}; + + +class Item_func_now_local :public Item_func_now +{ +public: + Item_func_now_local(THD *thd, uint dec): Item_func_now(thd, dec) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("current_timestamp") }; + return name; + } + int save_in_field(Field *field, bool no_conversions) override; + void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) override; + enum Functype functype() const override { return NOW_FUNC; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_now_utc :public Item_func_now +{ +public: + Item_func_now_utc(THD *thd, uint dec): Item_func_now(thd, dec) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("utc_timestamp") }; + return name; + } + void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) override; + enum Functype functype() const override { return NOW_UTC_FUNC; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_TIME_FUNC | VCOL_NON_DETERMINISTIC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + This is like NOW(), but always uses the real current time, not the + query_start(). This matches the Oracle behavior. +*/ +class Item_func_sysdate_local :public Item_func_now +{ +public: + Item_func_sysdate_local(THD *thd, uint dec): Item_func_now(thd, dec) {} + bool const_item() const override { return 0; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sysdate") }; + return name; + } + void store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) override; + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + table_map used_tables() const override { return RAND_TABLE_BIT; } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, + VCOL_TIME_FUNC | VCOL_NON_DETERMINISTIC); + } + enum Functype functype() const override { return SYSDATE_FUNC; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_from_days :public Item_datefunc +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_int(func_name_cstring()); } +public: + Item_func_from_days(THD *thd, Item *a): Item_datefunc(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("from_days") }; + return name; + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return has_date_args() || has_time_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_date_format :public Item_str_func +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_date(func_name_cstring()) || + check_argument_types_can_return_text(1, arg_count); + } + const MY_LOCALE *locale; + int fixed_length; + String value; +protected: + bool is_time_format; +public: + Item_func_date_format(THD *thd, Item *a, Item *b): + Item_str_func(thd, a, b), locale(0), is_time_format(false) {} + Item_func_date_format(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c), locale(0), is_time_format(false) {} + String *val_str(String *str) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("date_format") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + uint format_length(const String *format); + bool eq(const Item *item, bool binary_cmp) const override; + bool check_vcol_func_processor(void *arg) override + { + if (arg_count > 2) + return false; + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_time_format: public Item_func_date_format +{ +public: + Item_func_time_format(THD *thd, Item *a, Item *b): + Item_func_date_format(thd, a, b) { is_time_format= true; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("time_format") }; + return name; + } + bool check_vcol_func_processor(void *arg) override { return false; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* the max length of datetime format models string in Oracle is 144 */ +#define MAX_DATETIME_FORMAT_MODEL_LEN 144 + +class Item_func_tochar :public Item_str_func +{ + const MY_LOCALE *locale; + THD *thd; + String warning_message; + bool fixed_length; + + /* + When datetime format models is parsed, use uint16 integers to + represent the format models and store in fmt_array. + */ + uint16 fmt_array[MAX_DATETIME_FORMAT_MODEL_LEN+1]; + + bool check_arguments() const override + { + return + (args[0]->check_type_can_return_date(func_name_cstring()) && + args[0]->check_type_can_return_time(func_name_cstring())) || + check_argument_types_can_return_text(1, arg_count); + } + +public: + Item_func_tochar(THD *thd, Item *a, Item *b): + Item_str_func(thd, a, b), locale(0) + { + /* NOTE: max length of warning message is 64 */ + warning_message.alloc(64); + warning_message.length(0); + } + ~Item_func_tochar() { warning_message.free(); } + String *val_str(String *str) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("to_char") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + bool parse_format_string(const String *format, uint *fmt_len); + + bool check_vcol_func_processor(void *arg) override + { + if (arg_count > 2) + return false; + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_from_unixtime :public Item_datetimefunc +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_decimal(func_name_cstring()); } + Time_zone *tz; + public: + Item_func_from_unixtime(THD *thd, Item *a): Item_datetimefunc(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("from_unixtime") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + We need Time_zone class declaration for storing pointers in + Item_func_convert_tz. +*/ +class Time_zone; + +/* + This class represents CONVERT_TZ() function. + The important fact about this function that it is handled in special way. + When such function is met in expression time_zone system tables are added + to global list of tables to open, so later those already opened and locked + tables can be used during this function calculation for loading time zone + descriptions. +*/ +class Item_func_convert_tz :public Item_datetimefunc +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_date(func_name_cstring()) || + check_argument_types_can_return_text(1, arg_count); + } + /* + If time zone parameters are constants we are caching objects that + represent them (we use separate from_tz_cached/to_tz_cached members + to indicate this fact, since NULL is legal value for from_tz/to_tz + members. + */ + bool from_tz_cached, to_tz_cached; + Time_zone *from_tz, *to_tz; + public: + Item_func_convert_tz(THD *thd, Item *a, Item *b, Item *c): + Item_datetimefunc(thd, a, b, c), from_tz_cached(0), to_tz_cached(0) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("convert_tz") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + fix_attributes_datetime(args[0]->datetime_precision(thd)); + set_maybe_null(); + return FALSE; + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + void cleanup() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_sec_to_time :public Item_timefunc +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_decimal(func_name_cstring()); } +public: + Item_func_sec_to_time(THD *thd, Item *item): Item_timefunc(thd, item) {} + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + bool fix_length_and_dec(THD *thd) override + { + fix_attributes_time(args[0]->decimals); + set_maybe_null(); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("sec_to_time") }; + return name; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_date_add_interval :public Item_handled_func +{ +public: + const interval_type int_type; // keep it public + const bool date_sub_interval; // keep it public + Item_date_add_interval(THD *thd, Item *a, Item *b, interval_type type_arg, + bool neg_arg): + Item_handled_func(thd, a, b), int_type(type_arg), + date_sub_interval(neg_arg) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("date_add_interval") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + bool eq(const Item *item, bool binary_cmp) const override; + void print(String *str, enum_query_type query_type) override; + enum precedence precedence() const override { return INTERVAL_PRECEDENCE; } + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_extract :public Item_int_func, + public Type_handler_hybrid_field_type +{ + date_mode_t m_date_mode; + const Type_handler_int_result *handler_by_length(uint32 length, + uint32 threashold) + { + if (length >= threashold) + return &type_handler_slonglong; + return &type_handler_slong; + } + void set_date_length(uint32 length) + { + /* + Although DATE components (e.g. YEAR, YEAR_MONTH, QUARTER, MONTH, WEEK) + cannot have a sign, we should probably still add +1, + because all around the code we assume that max_length is sign inclusive. + Another options is to set unsigned_flag to "true". + */ + set_handler(handler_by_length(max_length= length, 10)); // QQ: see above + m_date_mode= date_mode_t(0); + } + void set_day_length(uint32 length) + { + /* + Units starting with DAY can be negative: + EXTRACT(DAY FROM '-24:00:00') -> -1 + */ + set_handler(handler_by_length(max_length= length + 1/*sign*/, 11)); + m_date_mode= Temporal::Options(TIME_INTERVAL_DAY, current_thd); + } + void set_time_length(uint32 length) + { + set_handler(handler_by_length(max_length= length + 1/*sign*/, 11)); + m_date_mode= Temporal::Options(TIME_INTERVAL_hhmmssff, current_thd); + } + public: + const interval_type int_type; // keep it public + Item_extract(THD *thd, interval_type type_arg, Item *a): + Item_int_func(thd, a), + Type_handler_hybrid_field_type(&type_handler_slonglong), + m_date_mode(date_mode_t(0)), + int_type(type_arg) + { } + const Type_handler *type_handler() const override + { + return Type_handler_hybrid_field_type::type_handler(); + } + longlong val_int() override; + enum Functype functype() const override { return EXTRACT_FUNC; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("extract") }; + return name; + } + bool check_arguments() const override; + bool fix_length_and_dec(THD *thd) override; + bool eq(const Item *item, bool binary_cmp) const override; + void print(String *str, enum_query_type query_type) override; + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override + { + if (int_type != INTERVAL_WEEK) + return FALSE; + return mark_unsupported_function(func_name(), "()", arg, VCOL_SESSION_FUNC); + } + bool check_valid_arguments_processor(void *int_arg) override + { + switch (int_type) { + case INTERVAL_YEAR: + case INTERVAL_YEAR_MONTH: + case INTERVAL_QUARTER: + case INTERVAL_MONTH: + /* case INTERVAL_WEEK: Not allowed as partitioning function, bug#57071 */ + case INTERVAL_DAY: + return !has_date_args(); + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_DAY_MICROSECOND: + return !has_datetime_args(); + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + case INTERVAL_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + return !has_time_args(); + default: + /* + INTERVAL_LAST is only an end marker, + INTERVAL_WEEK depends on default_week_format which is a session + variable and cannot be used for partitioning. See bug#57071. + */ + break; + } + return true; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_char_typecast :public Item_handled_func +{ + uint cast_length; + CHARSET_INFO *cast_cs, *from_cs; + bool charset_conversion; + String tmp_value; + bool m_suppress_warning_to_error_escalation; +public: + bool has_explicit_length() const { return cast_length != ~0U; } +private: + String *reuse(String *src, size_t length); + String *copy(String *src, CHARSET_INFO *cs); + uint adjusted_length_with_warn(uint length); + void check_truncation_with_warn(String *src, size_t dstlen); + void fix_length_and_dec_internal(CHARSET_INFO *fromcs); +public: + // Methods used by ColumnStore + uint get_cast_length() const { return cast_length; } +public: + Item_char_typecast(THD *thd, Item *a, uint length_arg, CHARSET_INFO *cs_arg): + Item_handled_func(thd, a), cast_length(length_arg), cast_cs(cs_arg), + m_suppress_warning_to_error_escalation(false) {} + enum Functype functype() const override { return CHAR_TYPECAST_FUNC; } + bool eq(const Item *item, bool binary_cmp) const override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_char") }; + return name; + } + CHARSET_INFO *cast_charset() const { return cast_cs; } + String *val_str_generic(String *a); + String *val_str_binary_from_native(String *a); + void fix_length_and_dec_generic(); + void fix_length_and_dec_numeric(); + void fix_length_and_dec_str(); + void fix_length_and_dec_native_to_binary(uint32 octet_length); + bool fix_length_and_dec(THD *thd) override + { + return args[0]->type_handler()->Item_char_typecast_fix_length_and_dec(this); + } + void print(String *str, enum_query_type query_type) override; + bool need_parentheses_in_default() override { return true; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_interval_DDhhmmssff_typecast :public Item_char_typecast +{ + uint m_fsp; +public: + Item_interval_DDhhmmssff_typecast(THD *thd, Item *a, uint fsp) + :Item_char_typecast(thd, a,Interval_DDhhmmssff::max_char_length(fsp), + &my_charset_latin1), + m_fsp(fsp) + { } + String *val_str(String *to) + { + Interval_DDhhmmssff it(current_thd, args[0], m_fsp); + null_value= !it.is_valid_interval_DDhhmmssff(); + return it.to_string(to, m_fsp); + } +}; + + +class Item_date_typecast :public Item_datefunc +{ +public: + Item_date_typecast(THD *thd, Item *a): Item_datefunc(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_date") }; + return name; + } + void print(String *str, enum_query_type query_type) override + { + print_cast_temporal(str, query_type); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool fix_length_and_dec(THD *thd) override + { + return args[0]->type_handler()->Item_date_typecast_fix_length_and_dec(this); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_time_typecast :public Item_timefunc +{ +public: + Item_time_typecast(THD *thd, Item *a, uint dec_arg): + Item_timefunc(thd, a) { decimals= dec_arg; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_time") }; + return name; + } + void print(String *str, enum_query_type query_type) override + { + print_cast_temporal(str, query_type); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool fix_length_and_dec(THD *thd) override + { + return args[0]->type_handler()-> + Item_time_typecast_fix_length_and_dec(this); + } + Sql_mode_dependency value_depends_on_sql_mode() const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_datetime_typecast :public Item_datetimefunc +{ +public: + Item_datetime_typecast(THD *thd, Item *a, uint dec_arg): + Item_datetimefunc(thd, a) { decimals= dec_arg; } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cast_as_datetime") }; + return name; + } + void print(String *str, enum_query_type query_type) override + { + print_cast_temporal(str, query_type); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + bool fix_length_and_dec(THD *thd) override + { + return args[0]->type_handler()-> + Item_datetime_typecast_fix_length_and_dec(this); + } + Sql_mode_dependency value_depends_on_sql_mode() const override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_makedate :public Item_datefunc +{ + bool check_arguments() const override + { return check_argument_types_can_return_int(0, arg_count); } +public: + Item_func_makedate(THD *thd, Item *a, Item *b): + Item_datefunc(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("makedate") }; + return name; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_timestamp :public Item_datetimefunc +{ + bool check_arguments() const override + { + return args[0]->check_type_can_return_date(func_name_cstring()) || + args[1]->check_type_can_return_time(func_name_cstring()); + } +public: + Item_func_timestamp(THD *thd, Item *a, Item *b) + :Item_datetimefunc(thd, a, b) + { } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("timestamp") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + uint dec0= args[0]->datetime_precision(thd); + uint dec1= Interval_DDhhmmssff::fsp(thd, args[1]); + fix_attributes_datetime(MY_MAX(dec0, dec1)); + set_maybe_null(); + return false; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + Datetime dt(thd, args[0], Datetime::Options(TIME_CONV_NONE, thd)); + if (!dt.is_valid_datetime()) + return (null_value= 1); + + Interval_DDhhmmssff it(thd, args[1]); + if (!it.is_valid_interval_DDhhmmssff()) + return (null_value= true); + return (null_value= Sec6_add(dt.get_mysql_time(), it.get_mysql_time(), 1). + to_datetime(ltime)); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + ADDTIME(t,a) and SUBTIME(t,a) are time functions that calculate a + time/datetime value + + t: time_or_datetime_expression + a: time_expression + + Result: Time value or datetime value +*/ + +class Item_func_add_time :public Item_handled_func +{ + int sign; +public: + // Methods used by ColumnStore + int get_sign() const { return sign; } +public: + Item_func_add_time(THD *thd, Item *a, Item *b, bool neg_arg) + :Item_handled_func(thd, a, b), sign(neg_arg ? -1 : 1) + { } + bool fix_length_and_dec(THD *thd) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING addtime= { STRING_WITH_LEN("addtime") }; + static LEX_CSTRING subtime= { STRING_WITH_LEN("subtime") }; + return sign > 0 ? addtime : subtime; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_timediff :public Item_timefunc +{ + bool check_arguments() const override + { return check_argument_types_can_return_time(0, arg_count); } +public: + Item_func_timediff(THD *thd, Item *a, Item *b): Item_timefunc(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("timediff") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + uint dec= MY_MAX(args[0]->time_precision(thd), + args[1]->time_precision(thd)); + fix_attributes_time(dec); + set_maybe_null(); + return FALSE; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_maketime :public Item_timefunc +{ + bool check_arguments() const override + { + return check_argument_types_can_return_int(0, 2) || + args[2]->check_type_can_return_decimal(func_name_cstring()); + } +public: + Item_func_maketime(THD *thd, Item *a, Item *b, Item *c): + Item_timefunc(thd, a, b, c) + {} + bool fix_length_and_dec(THD *thd) override + { + fix_attributes_time(args[2]->decimals); + set_maybe_null(); + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("maketime") }; + return name; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_microsecond :public Item_long_func_time_field +{ +public: + Item_func_microsecond(THD *thd, Item *a): Item_long_func_time_field(thd, a) {} + longlong val_int() override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("microsecond") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + set_maybe_null(); + fix_char_length(6); + return FALSE; + } + bool check_partition_func_processor(void *int_arg) override {return FALSE;} + bool check_vcol_func_processor(void *arg) override { return FALSE;} + bool check_valid_arguments_processor(void *int_arg) override + { + return !has_time_args(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_timestamp_diff :public Item_longlong_func +{ + bool check_arguments() const override + { return check_argument_types_can_return_date(0, arg_count); } + const interval_type int_type; +public: + // Methods used by ColumnStore + interval_type get_int_type() const { return int_type; }; +public: + Item_func_timestamp_diff(THD *thd, Item *a, Item *b, interval_type type_arg): + Item_longlong_func(thd, a, b), int_type(type_arg) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("timestampdiff") }; + return name; + } + longlong val_int() override; + bool fix_length_and_dec(THD *thd) override + { + decimals=0; + set_maybe_null(); + return FALSE; + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +enum date_time_format +{ + USA_FORMAT, JIS_FORMAT, ISO_FORMAT, EUR_FORMAT, INTERNAL_FORMAT +}; + +class Item_func_get_format :public Item_str_ascii_func +{ +public: + const timestamp_type type; // keep it public + Item_func_get_format(THD *thd, timestamp_type type_arg, Item *a): + Item_str_ascii_func(thd, a), type(type_arg) + {} + String *val_str_ascii(String *str) override; + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("get_format") }; + return name; + } + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + decimals=0; + fix_length_and_charset(17, default_charset()); + return FALSE; + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_str_to_date :public Item_handled_func +{ + bool const_item; + String subject_converter; + String format_converter; + CHARSET_INFO *internal_charset; +public: + Item_func_str_to_date(THD *thd, Item *a, Item *b): + Item_handled_func(thd, a, b), const_item(false), + internal_charset(NULL) + {} + bool get_date_common(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate, + timestamp_type); + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("str_to_date") }; + return name; + } + bool fix_length_and_dec(THD *thd) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_last_day :public Item_datefunc +{ + bool check_arguments() const override + { return args[0]->check_type_can_return_date(func_name_cstring()); } +public: + Item_func_last_day(THD *thd, Item *a): Item_datefunc(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("last_day") }; + return name; + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/*****************************************************************************/ + +class Func_handler_date_add_interval +{ +protected: + static uint interval_dec(const Item *item, interval_type int_type) + { + if (int_type == INTERVAL_MICROSECOND || + (int_type >= INTERVAL_DAY_MICROSECOND && + int_type <= INTERVAL_SECOND_MICROSECOND)) + return TIME_SECOND_PART_DIGITS; + if (int_type == INTERVAL_SECOND && item->decimals > 0) + return MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS); + return 0; + } + interval_type int_type(const Item_handled_func *item) const + { + return static_cast(item)->int_type; + } + bool sub(const Item_handled_func *item) const + { + return static_cast(item)->date_sub_interval; + } + bool add(THD *thd, Item *item, interval_type type, bool sub, MYSQL_TIME *to) const + { + INTERVAL interval; + if (get_interval_value(thd, item, type, &interval)) + return true; + if (sub) + interval.neg = !interval.neg; + return date_add_interval(thd, to, type, interval); + } +}; + + +class Func_handler_date_add_interval_datetime: + public Item_handled_func::Handler_datetime, + public Func_handler_date_add_interval +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + uint dec= MY_MAX(item->arguments()[0]->datetime_precision(current_thd), + interval_dec(item->arguments()[1], int_type(item))); + item->fix_attributes_datetime(dec); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + Datetime::Options opt(TIME_CONV_NONE, thd); + Datetime dt(thd, item->arguments()[0], opt); + if (!dt.is_valid_datetime() || + dt.check_date_with_warn(thd, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE)) + return (item->null_value= true); + dt.copy_to_mysql_time(to); + return (item->null_value= add(thd, item->arguments()[1], + int_type(item), sub(item), to)); + } +}; + + +class Func_handler_date_add_interval_datetime_arg0_time: + public Func_handler_date_add_interval_datetime +{ +public: + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const; +}; + + +class Func_handler_date_add_interval_date: + public Item_handled_func::Handler_date, + public Func_handler_date_add_interval +{ +public: + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + /* + The first argument is known to be of the DATE data type (not DATETIME). + We don't need rounding here. + */ + Date d(thd, item->arguments()[0], TIME_CONV_NONE); + if (!d.is_valid_date() || + d.check_date_with_warn(thd, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE)) + return (item->null_value= true); + d.copy_to_mysql_time(to); + return (item->null_value= add(thd, item->arguments()[1], + int_type(item), sub(item), to)); + } +}; + + +class Func_handler_date_add_interval_time: + public Item_handled_func::Handler_time, + public Func_handler_date_add_interval +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + uint dec= MY_MAX(item->arguments()[0]->time_precision(current_thd), + interval_dec(item->arguments()[1], int_type(item))); + item->fix_attributes_time(dec); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + Time t(thd, item->arguments()[0]); + if (!t.is_valid_time()) + return (item->null_value= true); + t.copy_to_mysql_time(to); + return (item->null_value= add(thd, item->arguments()[1], + int_type(item), sub(item), to)); + } +}; + + +class Func_handler_date_add_interval_string: + public Item_handled_func::Handler_temporal_string, + public Func_handler_date_add_interval +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + uint dec= MY_MAX(item->arguments()[0]->datetime_precision(current_thd), + interval_dec(item->arguments()[1], int_type(item))); + item->Type_std_attributes::set( + Type_temporal_attributes_not_fixed_dec(MAX_DATETIME_WIDTH, dec, false), + DTCollation(item->default_charset(), + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII)); + item->fix_char_length(item->max_length); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + if (item->arguments()[0]-> + get_date(thd, to, Datetime::Options(TIME_CONV_NONE, thd)) || + (to->time_type != MYSQL_TIMESTAMP_TIME && + check_date_with_warn(thd, to, TIME_NO_ZEROS, MYSQL_TIMESTAMP_ERROR))) + return (item->null_value= true); + return (item->null_value= add(thd, item->arguments()[1], + int_type(item), sub(item), to)); + } +}; + + +class Func_handler_sign +{ +protected: + int m_sign; + Func_handler_sign(int sign) :m_sign(sign) { } +}; + + +class Func_handler_add_time_datetime: + public Item_handled_func::Handler_datetime, + public Func_handler_sign +{ +public: + Func_handler_add_time_datetime(int sign) + :Func_handler_sign(sign) + { } + bool fix_length_and_dec(Item_handled_func *item) const + { + THD *thd= current_thd; + uint dec0= item->arguments()[0]->datetime_precision(thd); + uint dec1= Interval_DDhhmmssff::fsp(thd, item->arguments()[1]); + item->fix_attributes_datetime(MY_MAX(dec0, dec1)); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + DBUG_ASSERT(item->fixed()); + Datetime::Options opt(TIME_CONV_NONE, thd); + Datetime dt(thd, item->arguments()[0], opt); + if (!dt.is_valid_datetime()) + return (item->null_value= true); + Interval_DDhhmmssff it(thd, item->arguments()[1]); + if (!it.is_valid_interval_DDhhmmssff()) + return (item->null_value= true); + return (item->null_value= (Sec6_add(dt.get_mysql_time(), + it.get_mysql_time(), m_sign). + to_datetime(to))); + } +}; + + +class Func_handler_add_time_time: + public Item_handled_func::Handler_time, + public Func_handler_sign +{ +public: + Func_handler_add_time_time(int sign) + :Func_handler_sign(sign) + { } + bool fix_length_and_dec(Item_handled_func *item) const + { + THD *thd= current_thd; + uint dec0= item->arguments()[0]->time_precision(thd); + uint dec1= Interval_DDhhmmssff::fsp(thd, item->arguments()[1]); + item->fix_attributes_time(MY_MAX(dec0, dec1)); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + DBUG_ASSERT(item->fixed()); + Time t(thd, item->arguments()[0]); + if (!t.is_valid_time()) + return (item->null_value= true); + Interval_DDhhmmssff i(thd, item->arguments()[1]); + if (!i.is_valid_interval_DDhhmmssff()) + return (item->null_value= true); + return (item->null_value= (Sec6_add(t.get_mysql_time(), + i.get_mysql_time(), m_sign). + to_time(thd, to, item->decimals))); + } +}; + + +class Func_handler_add_time_string: + public Item_handled_func::Handler_temporal_string, + public Func_handler_sign +{ +public: + Func_handler_add_time_string(int sign) + :Func_handler_sign(sign) + { } + bool fix_length_and_dec(Item_handled_func *item) const + { + uint dec0= item->arguments()[0]->decimals; + uint dec1= Interval_DDhhmmssff::fsp(current_thd, item->arguments()[1]); + uint dec= MY_MAX(dec0, dec1); + item->Type_std_attributes::set( + Type_temporal_attributes_not_fixed_dec(MAX_DATETIME_WIDTH, dec, false), + DTCollation(item->default_charset(), + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII)); + item->fix_char_length(item->max_length); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + DBUG_ASSERT(item->fixed()); + // Detect a proper timestamp type based on the argument values + Temporal_hybrid l_time1(thd, item->arguments()[0], + Temporal::Options(TIME_TIME_ONLY, thd)); + if (!l_time1.is_valid_temporal()) + return (item->null_value= true); + Interval_DDhhmmssff l_time2(thd, item->arguments()[1]); + if (!l_time2.is_valid_interval_DDhhmmssff()) + return (item->null_value= true); + Sec6_add add(l_time1.get_mysql_time(), l_time2.get_mysql_time(), m_sign); + return (item->null_value= (l_time1.get_mysql_time()->time_type == + MYSQL_TIMESTAMP_TIME ? + add.to_time(thd, to, item->decimals) : + add.to_datetime(to))); + } +}; + + +class Func_handler_str_to_date_datetime_sec: + public Item_handled_func::Handler_datetime +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + item->fix_attributes_datetime(0); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + return static_cast(item)-> + get_date_common(thd, to, fuzzy, MYSQL_TIMESTAMP_DATETIME); + } +}; + + +class Func_handler_str_to_date_datetime_usec: + public Item_handled_func::Handler_datetime +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + item->fix_attributes_datetime(TIME_SECOND_PART_DIGITS); + return false; + } + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + return static_cast(item)-> + get_date_common(thd, to, fuzzy, MYSQL_TIMESTAMP_DATETIME); + } +}; + + +class Func_handler_str_to_date_date: public Item_handled_func::Handler_date +{ +public: + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + return static_cast(item)-> + get_date_common(thd, to, fuzzy, MYSQL_TIMESTAMP_DATE); + } +}; + + +class Func_handler_str_to_date_time: public Item_handled_func::Handler_time +{ +public: + bool get_date(THD *thd, Item_handled_func *item, + MYSQL_TIME *to, date_mode_t fuzzy) const + { + if (static_cast(item)-> + get_date_common(thd, to, fuzzy, MYSQL_TIMESTAMP_TIME)) + return true; + if (to->day) + { + /* + Day part for time type can be nonzero value and so + we should add hours from day part to hour part to + keep valid time value. + */ + to->hour+= to->day * 24; + to->day= 0; + } + return false; + } +}; + + +class Func_handler_str_to_date_time_sec: public Func_handler_str_to_date_time +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + item->fix_attributes_time(0); + return false; + } +}; + + +class Func_handler_str_to_date_time_usec: public Func_handler_str_to_date_time +{ +public: + bool fix_length_and_dec(Item_handled_func *item) const + { + item->fix_attributes_time(TIME_SECOND_PART_DIGITS); + return false; + } +}; + + +#endif /* ITEM_TIMEFUNC_INCLUDED */ diff --git a/sql/item_vers.cc b/sql/item_vers.cc new file mode 100644 index 00000000..3f648cde --- /dev/null +++ b/sql/item_vers.cc @@ -0,0 +1,196 @@ +/* Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/** + @brief + System Versioning items +*/ + +#include "mariadb.h" +#include "sql_priv.h" + +#include "sql_class.h" +#include "tztime.h" +#include "item.h" + +bool Item_func_history::val_bool() +{ + Item_field *f= static_cast(args[0]); + DBUG_ASSERT(f->fixed()); + DBUG_ASSERT(f->field->flags & VERS_ROW_END); + return !f->field->is_max(); +} + +void Item_func_history::print(String *str, enum_query_type query_type) +{ + str->append(func_name_cstring()); + str->append('('); + args[0]->print(str, query_type); + str->append(')'); +} + +Item_func_trt_ts::Item_func_trt_ts(THD *thd, Item* a, TR_table::field_id_t _trt_field) : + Item_datetimefunc(thd, a), + trt_field(_trt_field) +{ + decimals= 6; + null_value= true; + DBUG_ASSERT(arg_count == 1 && args[0]); +} + + +bool +Item_func_trt_ts::get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) +{ + DBUG_ASSERT(thd); + DBUG_ASSERT(args[0]); + if (args[0]->result_type() != INT_RESULT) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + func_name()); + return true; + } + ulonglong trx_id= args[0]->val_uint(); + if (trx_id == ULONGLONG_MAX) + { + null_value= false; + thd->variables.time_zone->gmt_sec_to_TIME(res, TIMESTAMP_MAX_VALUE); + res->second_part= TIME_MAX_SECOND_PART; + return false; + } + + TR_table trt(thd); + + null_value= !trt.query(trx_id); + if (null_value) + return true; + + return trt[trt_field]->get_date(res, fuzzydate); +} + + +Item_func_trt_id::Item_func_trt_id(THD *thd, Item* a, TR_table::field_id_t _trt_field, + bool _backwards) : + Item_longlong_func(thd, a), + trt_field(_trt_field), + backwards(_backwards) +{ + decimals= 0; + unsigned_flag= 1; + null_value= true; + DBUG_ASSERT(arg_count == 1 && args[0]); +} + +Item_func_trt_id::Item_func_trt_id(THD *thd, Item* a, Item* b, TR_table::field_id_t _trt_field) : + Item_longlong_func(thd, a, b), + trt_field(_trt_field), + backwards(false) +{ + decimals= 0; + unsigned_flag= 1; + null_value= true; + DBUG_ASSERT(arg_count == 2 && args[0] && args[1]); +} + +longlong +Item_func_trt_id::get_by_trx_id(ulonglong trx_id) +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + if (trx_id == ULONGLONG_MAX) + { + null_value= true; + return 0; + } + + TR_table trt(thd); + null_value= !trt.query(trx_id); + if (null_value) + return 0; + + return trt[trt_field]->val_int(); +} + +longlong +Item_func_trt_id::get_by_commit_ts(MYSQL_TIME &commit_ts, bool backwards) +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + TR_table trt(thd); + null_value= !trt.query(commit_ts, backwards); + if (null_value) + return backwards ? ULONGLONG_MAX : 0; + + return trt[trt_field]->val_int(); +} + +longlong +Item_func_trt_id::val_int() +{ + if (args[0]->is_null()) + { + if (arg_count < 2 || trt_field == TR_table::FLD_TRX_ID) + { + null_value= true; + return 0; + } + return get_by_trx_id(args[1]->val_uint()); + } + else + { + MYSQL_TIME commit_ts; + THD *thd= current_thd; + Datetime::Options opt(TIME_CONV_NONE, thd); + if (args[0]->get_date(thd, &commit_ts, opt)) + { + null_value= true; + return 0; + } + if (arg_count > 1) + { + backwards= args[1]->val_bool(); + DBUG_ASSERT(arg_count == 2); + } + return get_by_commit_ts(commit_ts, backwards); + } +} + +Item_func_trt_trx_sees::Item_func_trt_trx_sees(THD *thd, Item* a, Item* b) : + Item_bool_func(thd, a, b), + accept_eq(false) +{ + null_value= true; + DBUG_ASSERT(arg_count == 2 && args[0] && args[1]); +} + +longlong +Item_func_trt_trx_sees::val_int() +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + DBUG_ASSERT(arg_count > 1); + ulonglong trx_id1= args[0]->val_uint(); + ulonglong trx_id0= args[1]->val_uint(); + bool result= accept_eq; + + TR_table trt(thd); + null_value= trt.query_sees(result, trx_id1, trx_id0); + return result; +} diff --git a/sql/item_vers.h b/sql/item_vers.h new file mode 100644 index 00000000..88c8cea8 --- /dev/null +++ b/sql/item_vers.h @@ -0,0 +1,150 @@ +#ifndef ITEM_VERS_INCLUDED +#define ITEM_VERS_INCLUDED +/* Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/* System Versioning items */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +class Item_func_history: public Item_bool_func +{ +public: + /* + @param a Item_field for row_end system field + */ + Item_func_history(THD *thd, Item *a): Item_bool_func(thd, a) + { + DBUG_ASSERT(a->type() == Item::FIELD_ITEM); + } + + bool val_bool() override; + longlong val_int() override { return val_bool(); } + bool fix_length_and_dec(THD *thd) override + { + set_maybe_null(); + null_value= 0; + decimals= 0; + max_length= 1; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("is_history") }; + return name; + } + void print(String *str, enum_query_type query_type) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_trt_ts: public Item_datetimefunc +{ + TR_table::field_id_t trt_field; +public: + Item_func_trt_ts(THD *thd, Item* a, TR_table::field_id_t _trt_field); + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING begin_name= {STRING_WITH_LEN("trt_begin_ts") }; + static LEX_CSTRING commit_name= {STRING_WITH_LEN("trt_commit_ts") }; + return (trt_field == TR_table::FLD_BEGIN_TS) ? begin_name : commit_name; + } + bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + bool fix_length_and_dec(THD *thd) override + { fix_attributes_datetime(decimals); return FALSE; } +}; + +class Item_func_trt_id : public Item_longlong_func +{ + TR_table::field_id_t trt_field; + bool backwards; + + longlong get_by_trx_id(ulonglong trx_id); + longlong get_by_commit_ts(MYSQL_TIME &commit_ts, bool backwards); + +public: + Item_func_trt_id(THD *thd, Item* a, TR_table::field_id_t _trt_field, bool _backwards= false); + Item_func_trt_id(THD *thd, Item* a, Item* b, TR_table::field_id_t _trt_field); + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING trx_name= {STRING_WITH_LEN("trt_trx_id") }; + static LEX_CSTRING commit_name= {STRING_WITH_LEN("trt_commit_id") }; + static LEX_CSTRING iso_name= {STRING_WITH_LEN("trt_iso_level") }; + + switch (trt_field) { + case TR_table::FLD_TRX_ID: + return trx_name; + case TR_table::FLD_COMMIT_ID: + return commit_name; + case TR_table::FLD_ISO_LEVEL: + return iso_name; + default: + DBUG_ASSERT(0); + } + return NULL_clex_str; + } + + bool fix_length_and_dec(THD *thd) override + { + bool res= Item_int_func::fix_length_and_dec(thd); + max_length= 20; + return res; + } + + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_trt_trx_sees : public Item_bool_func +{ +protected: + bool accept_eq; + +public: + Item_func_trt_trx_sees(THD *thd, Item* a, Item* b); + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("trt_trx_sees") }; + return name; + } + longlong val_int() override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_func_trt_trx_sees_eq : + public Item_func_trt_trx_sees +{ +public: + Item_func_trt_trx_sees_eq(THD *thd, Item* a, Item* b) : + Item_func_trt_trx_sees(thd, a, b) + { + accept_eq= true; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("trt_trx_sees_eq") }; + return name; + } +}; + +#endif /* ITEM_VERS_INCLUDED */ diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc new file mode 100644 index 00000000..5438cade --- /dev/null +++ b/sql/item_windowfunc.cc @@ -0,0 +1,578 @@ +/* + Copyright (c) 2016, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mariadb.h" +#include "item_windowfunc.h" +#include "sql_select.h" // test if group changed + + +bool +Item_window_func::resolve_window_name(THD *thd) +{ + if (window_spec) + { + /* The window name has been already resolved */ + return false; + } + DBUG_ASSERT(window_name != NULL && window_spec == NULL); + const char *ref_name= window_name->str; + + /* !TODO: Add the code to resolve ref_name in outer queries */ + /* + First look for the deinition of the window with 'window_name' + in the current select + */ + List curr_window_specs= + List (thd->lex->current_select->window_specs); + List_iterator_fast it(curr_window_specs); + Window_spec *win_spec; + while((win_spec= it++)) + { + const char *win_spec_name= win_spec->name(); + if (win_spec_name && + my_strcasecmp(system_charset_info, ref_name, win_spec_name) == 0) + { + window_spec= win_spec; + break; + } + } + + if (!window_spec) + { + my_error(ER_WRONG_WINDOW_SPEC_NAME, MYF(0), ref_name); + return true; + } + + return false; +} + + +void +Item_window_func::update_used_tables() +{ + used_tables_cache= 0; + window_func()->update_used_tables(); + used_tables_cache|= window_func()->used_tables(); + for (ORDER *ord= window_spec->partition_list->first; ord; ord=ord->next) + { + Item *item= *ord->item; + item->update_used_tables(); + used_tables_cache|= item->used_tables(); + } + for (ORDER *ord= window_spec->order_list->first; ord; ord=ord->next) + { + Item *item= *ord->item; + item->update_used_tables(); + used_tables_cache|= item->used_tables(); + } +} + + +bool +Item_window_func::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + + if (!thd->lex->current_select || + (thd->lex->current_select->context_analysis_place != SELECT_LIST && + thd->lex->current_select->context_analysis_place != IN_ORDER_BY)) + { + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + return true; + } + + if (window_name && resolve_window_name(thd)) + return true; + + if (window_spec->window_frame && is_frame_prohibited()) + { + my_error(ER_NOT_ALLOWED_WINDOW_FRAME, MYF(0), + window_func()->func_name()); + return true; + } + + if (window_spec->order_list->elements == 0 && is_order_list_mandatory()) + { + my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), + window_func()->func_name()); + return true; + } + + window_func()->mark_as_window_func_sum_expr(); + + /* + TODO: why the last parameter is 'ref' in this call? What if window_func + decides to substitute itself for something else and does *ref=.... ? + This will substitute *this (an Item_window_func object) with Item_sum + object. Is this the intent? + */ + if (window_func()->fix_fields(thd, ref)) + return true; + + const_item_cache= false; + + with_flags= (with_flags & ~item_with_t::SUM_FUNC) | item_with_t::WINDOW_FUNC; + + if (fix_length_and_dec(thd)) + return TRUE; + + max_length= window_func()->max_length; + set_maybe_null(window_func()->maybe_null()); + + base_flags|= item_base_t::FIXED; + set_phase_to_initial(); + return false; +} + + +/* + @detail + Window function evaluates its arguments when it is scanning the temporary + table in partition/order-by order. That is, arguments should be read from + the temporary table, not from the original base columns. + + In order for this to work, we need to call "split_sum_func" for each + argument. The effect of the call is: + 1. the argument is added into ref_pointer_array. This will cause the + argument to be saved in the temp.table + 2. argument item is replaced with an Item_ref object. this object refers + the argument through the ref_pointer_array. + + then, change_to_use_tmp_fields() will replace ref_pointer_array with an + array that points to the temp.table fields. + This way, when window_func attempts to evaluate its arguments, it will use + Item_ref objects which will read data from the temp.table. + + Note: Before window functions, aggregate functions never needed to do such + transformations on their arguments. This is because grouping operation + does not need to read from the temp.table. + (Q: what happens when we first sort and then do grouping in a + group-after-group mode? dont group by items read from temp.table, then?) +*/ + +void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) +{ + for (uint i=0; i < window_func()->argument_count(); i++) + { + Item **p_item= &window_func()->arguments()[i]; + (*p_item)->split_sum_func2(thd, ref_pointer_array, fields, p_item, flags); + } + window_func()->setup_caches(thd); +} + +bool Item_window_func::check_result_type_of_order_item() +{ + switch (window_func()->sum_func()) { + case Item_sum::PERCENTILE_CONT_FUNC: + { + Item_result rtype= window_spec->order_list->first->item[0]->cmp_type(); + // TODO (varun) : support date type in percentile_cont function + if (rtype != REAL_RESULT && rtype != INT_RESULT && + rtype != DECIMAL_RESULT && rtype != TIME_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_PERCENTILE_FUNC, MYF(0), + window_func()->func_name()); + return true; + } + return false; + } + case Item_sum::PERCENTILE_DISC_FUNC: + { + Item *src_item= window_spec->order_list->first->item[0]; + Item_sum_percentile_disc *func= + static_cast(window_func()); + func->set_handler(src_item->type_handler()); + func->Type_std_attributes::set(src_item); + Type_std_attributes::set(src_item); + return false; + } + default: + break; + } + return FALSE; +} + +/* + This must be called before attempting to compute the window function values. + @detail + If we attempt to do it in fix_fields(), partition_fields will refer + to the original window function arguments. + We need it to refer to temp.table columns. +*/ + +void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: move this into Item_window_func? */ + peer_tracker = new Group_bound_tracker(thd, window_spec->order_list); + peer_tracker->init(); + clear(); +} + +void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: consider moving this && Item_sum_rank's implementation */ + peer_tracker = new Group_bound_tracker(thd, window_spec->order_list); + peer_tracker->init(); + clear(); +} + +void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_spec) +{ + order_item= window_spec->order_list->first->item[0]; + if (!(value= order_item->get_cache(thd))) + return; + value->setup(thd, order_item); + value->store(order_item); +} + +void Item_sum_percentile_cont::setup_window_func(THD *thd, Window_spec *window_spec) +{ + order_item= window_spec->order_list->first->item[0]; + /* TODO(varun): need to discuss and finalise what type should we + return for percentile cont functions + */ + if (!(ceil_value= order_item->get_cache(thd))) + return; + ceil_value->setup(thd, order_item); + ceil_value->store(order_item); + + if (!(floor_value= order_item->get_cache(thd))) + return; + floor_value->setup(thd, order_item); + floor_value->store(order_item); +} +bool Item_sum_percentile_cont::fix_fields(THD *thd, Item **ref) +{ + bool res; + res= Item_sum_num::fix_fields(thd, ref); + if (res) + return res; + + switch(args[0]->cmp_type()) + { + case DECIMAL_RESULT: + case REAL_RESULT: + case INT_RESULT: + break; + default: + my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0), func_name()); + return TRUE; + } + return res; +} +bool Item_sum_percentile_disc::fix_fields(THD *thd, Item **ref) +{ + bool res; + res= Item_sum_num::fix_fields(thd, ref); + if (res) + return res; + + switch(args[0]->cmp_type()) + { + case DECIMAL_RESULT: + case REAL_RESULT: + case INT_RESULT: + break; + default: + my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0), func_name()); + return TRUE; + } + return res; + +} + +bool Item_sum_dense_rank::add() +{ + if (peer_tracker->check_if_next_group() || first_add) + { + first_add= false; + dense_rank++; + } + + return false; +} + + +bool Item_sum_rank::add() +{ + row_number++; + if (peer_tracker->check_if_next_group()) + { + /* Row value changed */ + cur_rank= row_number; + } + return false; +} + +bool Item_sum_percent_rank::add() +{ + row_number++; + if (peer_tracker->check_if_next_group()) + { + /* Row value changed. */ + cur_rank= row_number; + } + return false; +} + +void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: move this into Item_window_func? */ + peer_tracker = new Group_bound_tracker(thd, window_spec->order_list); + peer_tracker->init(); + clear(); +} + + +bool Item_sum_hybrid_simple::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed() == 0); + + if (init_sum_func_check(thd)) + return TRUE; + + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->fix_fields_if_needed_for_scalar(thd, &args[i])) + return TRUE; + with_flags|= args[i]->with_flags; + } + + if (fix_length_and_dec(thd)) + return TRUE; + + setup_hybrid(thd, args[0]); + result_field=0; + + if (check_sum_func(thd, ref)) + return TRUE; + for (uint i= 0; i < arg_count; i++) + orig_args[i]= args[i]; + + base_flags|= item_base_t::FIXED; + return FALSE; +} + + +bool Item_sum_hybrid_simple::fix_length_and_dec(THD *thd) +{ + set_maybe_null(); + null_value= true; + return args[0]->type_handler()->Item_sum_hybrid_fix_length_and_dec(this); +} + + +bool Item_sum_hybrid_simple::add() +{ + value->store(args[0]); + value->cache_value(); + null_value= value->null_value; + return false; +} + +void Item_sum_hybrid_simple::setup_hybrid(THD *thd, Item *item) +{ + if (!(value= item->get_cache(thd))) + return; + value->setup(thd, item); + value->store(item); + if (!item->const_item()) + value->set_used_tables(RAND_TABLE_BIT); + collation.set(item->collation); +} + +double Item_sum_hybrid_simple::val_real() +{ + DBUG_ASSERT(fixed()); + if (null_value) + return 0.0; + double retval= value->val_real(); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == 0.0); + return retval; +} + +longlong Item_sum_hybrid_simple::val_int() +{ + DBUG_ASSERT(fixed()); + if (null_value) + return 0; + longlong retval= value->val_int(); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == 0); + return retval; +} + +my_decimal *Item_sum_hybrid_simple::val_decimal(my_decimal *val) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return 0; + my_decimal *retval= value->val_decimal(val); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == NULL); + return retval; +} + +String * +Item_sum_hybrid_simple::val_str(String *str) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return 0; + String *retval= value->val_str(str); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == NULL); + return retval; +} + +bool Item_sum_hybrid_simple::val_native(THD *thd, Native *to) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return true; + return val_native_from_item(thd, value, to); +} + +bool Item_sum_hybrid_simple::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) +{ + DBUG_ASSERT(fixed()); + if (null_value) + return true; + bool retval= value->get_date(thd, ltime, fuzzydate); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == true); + return retval; +} + +Field *Item_sum_hybrid_simple::create_tmp_field(MEM_ROOT *root, + bool group, TABLE *table) +{ + DBUG_ASSERT(0); + return NULL; +} + +void Item_sum_hybrid_simple::reset_field() +{ + switch(result_type()) { + case STRING_RESULT: + { + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),result_field->charset()),*res; + + res=args[0]->val_str(&tmp); + if (args[0]->null_value) + { + result_field->set_null(); + result_field->reset(); + } + else + { + result_field->set_notnull(); + result_field->store(res->ptr(),res->length(),tmp.charset()); + } + break; + } + case INT_RESULT: + { + longlong nr=args[0]->val_int(); + + if (maybe_null()) + { + if (args[0]->null_value) + { + nr=0; + result_field->set_null(); + } + else + result_field->set_notnull(); + } + result_field->store(nr, unsigned_flag); + break; + } + case REAL_RESULT: + { + double nr= args[0]->val_real(); + + if (maybe_null()) + { + if (args[0]->null_value) + { + nr=0.0; + result_field->set_null(); + } + else + result_field->set_notnull(); + } + result_field->store(nr); + break; + } + case DECIMAL_RESULT: + { + VDec arg_dec(args[0]); + + if (maybe_null()) + { + if (arg_dec.is_null()) + result_field->set_null(); + else + result_field->set_notnull(); + } + /* + We must store zero in the field as we will use the field value in + add() + */ + result_field->store_decimal(arg_dec.ptr_or(&decimal_zero)); + break; + } + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); + } +} + +void Item_sum_hybrid_simple::update_field() +{ + DBUG_ASSERT(0); +} + +void Item_window_func::print(String *str, enum_query_type query_type) +{ + if (only_single_element_order_list()) + { + print_for_percentile_functions(str, query_type); + return; + } + window_func()->print(str, query_type); + str->append(STRING_WITH_LEN(" over ")); + if (!window_spec) + str->append(window_name); + else + window_spec->print(str, query_type); +} +void Item_window_func::print_for_percentile_functions(String *str, enum_query_type query_type) +{ + window_func()->print(str, query_type); + str->append(STRING_WITH_LEN(" within group ")); + str->append('('); + window_spec->print_order(str,query_type); + str->append(')'); + str->append(STRING_WITH_LEN(" over ")); + str->append('('); + window_spec->print_partition(str,query_type); + str->append(')'); +} diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h new file mode 100644 index 00000000..0dfc683c --- /dev/null +++ b/sql/item_windowfunc.h @@ -0,0 +1,1398 @@ +/* + Copyright (c) 2016, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef ITEM_WINDOWFUNC_INCLUDED +#define ITEM_WINDOWFUNC_INCLUDED + +#include "item.h" + +class Window_spec; + + +int test_if_group_changed(List &list); + + +/* A wrapper around test_if_group_changed */ +class Group_bound_tracker +{ +public: + + Group_bound_tracker(THD *thd, SQL_I_List *list) + { + for (ORDER *curr = list->first; curr; curr=curr->next) + { + Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); + group_fields.push_back(tmp); + } + } + + void init() + { + first_check= true; + } + + /* + Check if the current row is in a different group than the previous row + this function was called for. + XXX: Side-effect: The new row's group becomes the current row's group. + + Returns true if there is a change between the current_group and the cached + value, or if it is the first check after a call to init. + */ + bool check_if_next_group() + { + if (test_if_group_changed(group_fields) > -1 || first_check) + { + first_check= false; + return true; + } + return false; + } + + /* + Check if the current row is in a different group than the previous row + check_if_next_group was called for. + + Compares the groups without the additional side effect of updating the + current cached values. + */ + int compare_with_cache() + { + List_iterator li(group_fields); + Cached_item *ptr; + int res; + while ((ptr= li++)) + { + if ((res= ptr->cmp_read_only())) + return res; + } + return 0; + } + ~Group_bound_tracker() + { + group_fields.delete_elements(); + } + +private: + List group_fields; + /* + During the first check_if_next_group, the list of cached_items is not + initialized. The compare function will return that the items match if + the field's value is the same as the Cached_item's default value (0). + This flag makes sure that we always return true during the first check. + + XXX This is better to be implemented within test_if_group_changed, but + since it is used in other parts of the codebase, we keep it here for now. + */ + bool first_check; +}; + +/* + ROW_NUMBER() OVER (...) + + @detail + - This is a Window function (not just an aggregate) + - It can be computed by doing one pass over select output, provided + the output is sorted according to the window definition. +*/ + +class Item_sum_row_number: public Item_sum_int +{ + longlong count; + +public: + + Item_sum_row_number(THD *thd) + : Item_sum_int(thd), count(0) {} + + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + + void clear() override + { + count= 0; + } + + bool add() override + { + count++; + return false; + } + + void reset_field() override { DBUG_ASSERT(0); } + void update_field() override {} + + enum Sumfunctype sum_func() const override + { + return ROW_NUMBER_FUNC; + } + + longlong val_int() override + { + return count; + } + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("row_number") }; + return name; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + RANK() OVER (...) Windowing function + + @detail + - This is a Window function (not just an aggregate) + - It can be computed by doing one pass over select output, provided + the output is sorted according to the window definition. + + The function is defined as: + + "The rank of row R is defined as 1 (one) plus the number of rows that + precede R and are not peers of R" + + "This implies that if two or more rows are not distinct with respect to + the window ordering, then there will be one or more" +*/ + +class Item_sum_rank: public Item_sum_int +{ +protected: + longlong row_number; // just ROW_NUMBER() + longlong cur_rank; // current value + + Group_bound_tracker *peer_tracker; +public: + + Item_sum_rank(THD *thd) : Item_sum_int(thd), peer_tracker(NULL) {} + + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + + void clear() override + { + /* This is called on partition start */ + cur_rank= 1; + row_number= 0; + } + + bool add() override; + + longlong val_int() override + { + return cur_rank; + } + + void reset_field() override { DBUG_ASSERT(0); } + void update_field() override {} + + enum Sumfunctype sum_func () const override + { + return RANK_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("rank") }; + return name; + } + + void setup_window_func(THD *thd, Window_spec *window_spec) override; + + void cleanup() override + { + if (peer_tracker) + { + delete peer_tracker; + peer_tracker= NULL; + } + Item_sum_int::cleanup(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + DENSE_RANK() OVER (...) Windowing function + + @detail + - This is a Window function (not just an aggregate) + - It can be computed by doing one pass over select output, provided + the output is sorted according to the window definition. + + The function is defined as: + + "If DENSE_RANK is specified, then the rank of row R is defined as the + number of rows preceding and including R that are distinct with respect + to the window ordering" + + "This implies that there are no gaps in the sequential rank numbering of + rows in each window partition." +*/ + + +class Item_sum_dense_rank: public Item_sum_int +{ + longlong dense_rank; + bool first_add; + Group_bound_tracker *peer_tracker; + public: + /* + XXX(cvicentiu) This class could potentially be implemented in the rank + class, with a switch for the DENSE case. + */ + void clear() override + { + dense_rank= 0; + first_add= true; + } + bool add() override; + void reset_field() override { DBUG_ASSERT(0); } + void update_field() override {} + longlong val_int() override + { + return dense_rank; + } + + Item_sum_dense_rank(THD *thd) + : Item_sum_int(thd), dense_rank(0), first_add(true), peer_tracker(NULL) {} + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + enum Sumfunctype sum_func () const override + { + return DENSE_RANK_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("dense_rank") }; + return name; + } + + void setup_window_func(THD *thd, Window_spec *window_spec) override; + + void cleanup() override + { + if (peer_tracker) + { + delete peer_tracker; + peer_tracker= NULL; + } + Item_sum_int::cleanup(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +class Item_sum_hybrid_simple : public Item_sum_hybrid +{ + public: + Item_sum_hybrid_simple(THD *thd, Item *arg): + Item_sum_hybrid(thd, arg), + value(NULL) + { } + + Item_sum_hybrid_simple(THD *thd, Item *arg1, Item *arg2): + Item_sum_hybrid(thd, arg1, arg2), + value(NULL) + { } + + bool add() override; + bool fix_fields(THD *, Item **) override; + bool fix_length_and_dec(THD *thd) override; + void setup_hybrid(THD *thd, Item *item); + double val_real() override; + longlong val_int() override; + my_decimal *val_decimal(my_decimal *) override; + void reset_field() override; + String *val_str(String *) override; + bool val_native(THD *thd, Native *to) override; + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; + const Type_handler *type_handler() const override + { return Type_handler_hybrid_field_type::type_handler(); } + void update_field() override; + Field *create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) override; + void clear() override + { + value->clear(); + null_value= 1; + } + + private: + Item_cache *value; +}; + +/* + This item will remember the first value added to it. It will not update + the value unless it is cleared. +*/ +class Item_sum_first_value : public Item_sum_hybrid_simple +{ + public: + Item_sum_first_value(THD* thd, Item* arg_expr) : + Item_sum_hybrid_simple(thd, arg_expr) {} + + + enum Sumfunctype sum_func () const override + { + return FIRST_VALUE_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("first_value") }; + return name; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +/* + This item will remember the last value added to it. + + This item does not support removal, and can be cleared only by calling + clear(). +*/ +class Item_sum_last_value : public Item_sum_hybrid_simple +{ + public: + Item_sum_last_value(THD* thd, Item* arg_expr) : + Item_sum_hybrid_simple(thd, arg_expr) {} + + enum Sumfunctype sum_func() const override + { + return LAST_VALUE_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("last_value") }; + return name; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_nth_value : public Item_sum_hybrid_simple +{ + public: + Item_sum_nth_value(THD *thd, Item *arg_expr, Item* offset_expr) : + Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {} + + enum Sumfunctype sum_func() const override + { + return NTH_VALUE_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("nth_value") }; + return name; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_lead : public Item_sum_hybrid_simple +{ + public: + Item_sum_lead(THD *thd, Item *arg_expr, Item* offset_expr) : + Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {} + + enum Sumfunctype sum_func() const override + { + return LEAD_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("lead") }; + return name; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_sum_lag : public Item_sum_hybrid_simple +{ + public: + Item_sum_lag(THD *thd, Item *arg_expr, Item* offset_expr) : + Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {} + + enum Sumfunctype sum_func() const override + { + return LAG_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("lag") }; + return name; + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Partition_row_count +{ +public: + Partition_row_count() :partition_row_count_(0) { } + void set_partition_row_count(ulonglong count) + { + partition_row_count_ = count; + } + double calc_val_real(bool *null_value, + ulonglong current_row_count) + { + if ((*null_value= (partition_row_count_ == 0))) + return 0; + return static_cast(current_row_count) / partition_row_count_; + } +protected: + longlong get_row_count() { return partition_row_count_; } + ulonglong partition_row_count_; +}; + + +class Current_row_count +{ +public: + Current_row_count() :current_row_count_(0) { } +protected: + ulonglong get_row_number() { return current_row_count_ ; } + ulonglong current_row_count_; +}; + + +/* + @detail + "The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is + defined to be the RANK of R and NR is defined to be the number of rows in + the window partition of R." + + Computation of this function requires two passes: + - First pass to find #rows in the partition + This is held within the row_count context. + - Second pass to compute rank of current row and the value of the function +*/ +class Item_sum_percent_rank: public Item_sum_double, + public Partition_row_count +{ + public: + Item_sum_percent_rank(THD *thd) + : Item_sum_double(thd), cur_rank(1), peer_tracker(NULL) {} + + longlong val_int() override + { + /* + Percent rank is a real value so calling the integer value should never + happen. It makes no sense as it gets truncated to either 0 or 1. + */ + DBUG_ASSERT(0); + return 0; + } + + double val_real() override + { + /* + We can not get the real value without knowing the number of rows + in the partition. Don't divide by 0. + */ + ulonglong partition_rows = get_row_count(); + null_value= partition_rows > 0 ? false : true; + + return partition_rows > 1 ? + static_cast(cur_rank - 1) / (partition_rows - 1) : 0; + } + + enum Sumfunctype sum_func () const override + { + return PERCENT_RANK_FUNC; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("percent_rank") }; + return name; + } + + void update_field() override {} + + void clear() override + { + cur_rank= 1; + row_number= 0; + } + bool add() override; + const Type_handler *type_handler() const override + { return &type_handler_double; } + + bool fix_length_and_dec(THD *thd) override + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + return FALSE; + } + + void setup_window_func(THD *thd, Window_spec *window_spec) override; + + void reset_field() override { DBUG_ASSERT(0); } + + void set_partition_row_count(ulonglong count) override + { + Partition_row_count::set_partition_row_count(count); + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + private: + longlong cur_rank; // Current rank of the current row. + longlong row_number; // Value if this were ROW_NUMBER() function. + + Group_bound_tracker *peer_tracker; + + void cleanup() override + { + if (peer_tracker) + { + delete peer_tracker; + peer_tracker= NULL; + } + Item_sum_num::cleanup(); + } +}; + + + + +/* + @detail + "The relative rank of a row R is defined as NP/NR, where + - NP is defined to be the number of rows preceding or peer with R in the + window ordering of the window partition of R + - NR is defined to be the number of rows in the window partition of R. + + Just like with Item_sum_percent_rank, computation of this function requires + two passes. +*/ + +class Item_sum_cume_dist: public Item_sum_double, + public Partition_row_count, + public Current_row_count +{ + public: + Item_sum_cume_dist(THD *thd) :Item_sum_double(thd) { } + Item_sum_cume_dist(THD *thd, Item *arg) :Item_sum_double(thd, arg) { } + + double val_real() override + { + return calc_val_real(&null_value, current_row_count_); + } + + bool add() override + { + current_row_count_++; + return false; + } + + enum Sumfunctype sum_func() const override + { + return CUME_DIST_FUNC; + } + + void clear() override + { + current_row_count_= 0; + partition_row_count_= 0; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("cume_dist") }; + return name; + } + + void update_field() override {} + const Type_handler *type_handler() const override + { return &type_handler_double; } + + bool fix_length_and_dec(THD *thd) override + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + return FALSE; + } + + void reset_field() override { DBUG_ASSERT(0); } + + void set_partition_row_count(ulonglong count) override + { + Partition_row_count::set_partition_row_count(count); + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + +}; + +class Item_sum_ntile : public Item_sum_int, + public Partition_row_count, + public Current_row_count +{ + public: + Item_sum_ntile(THD* thd, Item* num_quantiles_expr) : + Item_sum_int(thd, num_quantiles_expr), n_old_val_(0) + { } + + longlong val_int() override + { + if (get_row_count() == 0) + { + null_value= true; + return 0; + } + + longlong num_quantiles= get_num_quantiles(); + + if (num_quantiles <= 0 || + (static_cast(num_quantiles) != n_old_val_ && n_old_val_ > 0)) + { + my_error(ER_INVALID_NTILE_ARGUMENT, MYF(0)); + return true; + } + n_old_val_= static_cast(num_quantiles); + null_value= false; + ulonglong quantile_size = get_row_count() / num_quantiles; + ulonglong extra_rows = get_row_count() - quantile_size * num_quantiles; + + if (current_row_count_ <= extra_rows * (quantile_size + 1)) + return (current_row_count_ - 1) / (quantile_size + 1) + 1; + + return (current_row_count_ - 1 - extra_rows) / quantile_size + 1; + } + + bool add() override + { + current_row_count_++; + return false; + } + + enum Sumfunctype sum_func() const override + { + return NTILE_FUNC; + } + + void clear() override + { + current_row_count_= 0; + partition_row_count_= 0; + n_old_val_= 0; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("ntile") }; + return name; + } + + void update_field() override {} + + const Type_handler *type_handler() const override + { return &type_handler_slonglong; } + + void reset_field() override { DBUG_ASSERT(0); } + + void set_partition_row_count(ulonglong count) override + { + Partition_row_count::set_partition_row_count(count); + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + private: + longlong get_num_quantiles() { return args[0]->val_int(); } + ulonglong n_old_val_; +}; + +class Item_sum_percentile_disc : public Item_sum_num, + public Type_handler_hybrid_field_type, + public Partition_row_count, + public Current_row_count +{ +public: + Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_num(thd, arg), + Type_handler_hybrid_field_type(&type_handler_slonglong), + value(NULL), val_calculated(FALSE), first_call(TRUE), + prev_value(0), order_item(NULL){} + + double val_real() override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return value->val_real(); + } + + longlong val_int() override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return value->val_int(); + } + + my_decimal* val_decimal(my_decimal* dec) override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return value->val_decimal(dec); + } + + String* val_str(String *str) override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return value->val_str(str); + } + + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return true; + } + null_value= false; + return value->get_date(thd, ltime, fuzzydate); + } + + bool val_native(THD *thd, Native *to) override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return true; + } + null_value= false; + return value->val_native(thd, to); + } + + bool add() override + { + Item *arg= get_arg(0); + if (arg->is_null()) + return false; + + if (first_call) + { + prev_value= arg->val_real(); + if (prev_value > 1 || prev_value < 0) + { + my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0), func_name()); + return true; + } + first_call= false; + } + + double arg_val= arg->val_real(); + + if (prev_value != arg_val) + { + my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0), func_name()); + return true; + } + + if (val_calculated) + return false; + + value->store(order_item); + value->cache_value(); + if (value->null_value) + return false; + + current_row_count_++; + double val= calc_val_real(&null_value, current_row_count_); + + if (val >= prev_value && !val_calculated) + val_calculated= true; + return false; + } + + enum Sumfunctype sum_func() const override + { + return PERCENTILE_DISC_FUNC; + } + + void clear() override + { + val_calculated= false; + first_call= true; + value->clear(); + partition_row_count_= 0; + current_row_count_= 0; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("percentile_disc") }; + return name; + } + + void update_field() override {} + const Type_handler *type_handler() const override + {return Type_handler_hybrid_field_type::type_handler();} + + bool fix_length_and_dec(THD *thd) override + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + return FALSE; + } + + void reset_field() override { DBUG_ASSERT(0); } + + void set_partition_row_count(ulonglong count) override + { + Partition_row_count::set_partition_row_count(count); + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + void setup_window_func(THD *thd, Window_spec *window_spec) override; + void setup_hybrid(THD *thd, Item *item); + bool fix_fields(THD *thd, Item **ref) override; + +private: + Item_cache *value; + bool val_calculated; + bool first_call; + double prev_value; + Item *order_item; +}; + +class Item_sum_percentile_cont : public Item_sum_double, + public Partition_row_count, + public Current_row_count +{ +public: + Item_sum_percentile_cont(THD *thd, Item* arg) : Item_sum_double(thd, arg), + floor_value(NULL), ceil_value(NULL), first_call(TRUE),prev_value(0), + ceil_val_calculated(FALSE), floor_val_calculated(FALSE), order_item(NULL){} + + double val_real() override + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + double val= 1 + prev_value * (get_row_count()-1); + + /* + Applying the formula to get the value + If (CRN = FRN = RN) then the result is (value of expression from row at RN) + Otherwise the result is + (CRN - RN) * (value of expression for row at FRN) + + (RN - FRN) * (value of expression for row at CRN) + */ + + if(ceil(val) == floor(val)) + return floor_value->val_real(); + + double ret_val= ((val - floor(val)) * ceil_value->val_real()) + + ((ceil(val) - val) * floor_value->val_real()); + + return ret_val; + } + + bool add() override + { + Item *arg= get_arg(0); + if (arg->is_null()) + return false; + + if (first_call) + { + first_call= false; + prev_value= arg->val_real(); + if (prev_value > 1 || prev_value < 0) + { + my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0), func_name()); + return true; + } + } + + double arg_val= arg->val_real(); + if (prev_value != arg_val) + { + my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0), func_name()); + return true; + } + + if (!floor_val_calculated) + { + floor_value->store(order_item); + floor_value->cache_value(); + if (floor_value->null_value) + return false; + } + if (floor_val_calculated && !ceil_val_calculated) + { + ceil_value->store(order_item); + ceil_value->cache_value(); + if (ceil_value->null_value) + return false; + } + + current_row_count_++; + double val= 1 + prev_value * (get_row_count()-1); + + if (!floor_val_calculated && get_row_number() == floor(val)) + floor_val_calculated= true; + + if (!ceil_val_calculated && get_row_number() == ceil(val)) + ceil_val_calculated= true; + return false; + } + + enum Sumfunctype sum_func() const override + { + return PERCENTILE_CONT_FUNC; + } + + void clear() override + { + first_call= true; + floor_value->clear(); + ceil_value->clear(); + floor_val_calculated= false; + ceil_val_calculated= false; + partition_row_count_= 0; + current_row_count_= 0; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("percentile_cont") }; + return name; + } + void update_field() override {} + + bool fix_length_and_dec(THD *thd) override + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + return FALSE; + } + + void reset_field() override { DBUG_ASSERT(0); } + + void set_partition_row_count(ulonglong count) override + { + Partition_row_count::set_partition_row_count(count); + } + + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + void setup_window_func(THD *thd, Window_spec *window_spec) override; + void setup_hybrid(THD *thd, Item *item); + bool fix_fields(THD *thd, Item **ref) override; + +private: + Item_cache *floor_value; + Item_cache *ceil_value; + bool first_call; + double prev_value; + bool ceil_val_calculated; + bool floor_val_calculated; + Item *order_item; +}; + + + + +class Item_window_func : public Item_func_or_sum +{ + /* Window function parameters as we've got them from the parser */ +public: + LEX_CSTRING *window_name; +public: + Window_spec *window_spec; + +public: + Item_window_func(THD *thd, Item_sum *win_func, LEX_CSTRING *win_name) + : Item_func_or_sum(thd, (Item *) win_func), + window_name(win_name), window_spec(NULL), + force_return_blank(true), + read_value_from_result_field(false) {} + + Item_window_func(THD *thd, Item_sum *win_func, Window_spec *win_spec) + : Item_func_or_sum(thd, (Item *) win_func), + window_name(NULL), window_spec(win_spec), + force_return_blank(true), + read_value_from_result_field(false) {} + + Item_sum *window_func() const { return (Item_sum *) args[0]; } + + void update_used_tables() override; + + /* + This is used by filesort to mark the columns it needs to read (because they + participate in the sort criteria and/or row retrieval. Window functions can + only be used in sort criteria). + + Sorting by window function value is only done after the window functions + have been computed. In that case, window function will need to read its + temp.table field. In order to allow that, mark that field in the read_set. + */ + bool register_field_in_read_map(void *arg) override + { + TABLE *table= (TABLE*) arg; + if (result_field && (result_field->table == table || !table)) + { + bitmap_set_bit(result_field->table->read_set, result_field->field_index); + } + return 0; + } + + bool is_frame_prohibited() const + { + switch (window_func()->sum_func()) { + case Item_sum::ROW_NUMBER_FUNC: + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + return true; + default: + return false; + } + } + + bool requires_special_cursors() const + { + switch (window_func()->sum_func()) { + case Item_sum::FIRST_VALUE_FUNC: + case Item_sum::LAST_VALUE_FUNC: + case Item_sum::NTH_VALUE_FUNC: + case Item_sum::LAG_FUNC: + case Item_sum::LEAD_FUNC: + return true; + default: + return false; + } + } + + bool requires_partition_size() const + { + switch (window_func()->sum_func()) { + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + return true; + default: + return false; + } + } + + bool requires_peer_size() const + { + switch (window_func()->sum_func()) { + case Item_sum::CUME_DIST_FUNC: + return true; + default: + return false; + } + } + + bool is_order_list_mandatory() const + { + switch (window_func()->sum_func()) { + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::LAG_FUNC: + case Item_sum::LEAD_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + return true; + default: + return false; + } + } + + bool only_single_element_order_list() const + { + switch (window_func()->sum_func()){ + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + return true; + default: + return false; + } + } + + bool check_result_type_of_order_item(); + + + + /* + Computation functions. + TODO: consoder merging these with class Group_bound_tracker. + */ + void setup_partition_border_check(THD *thd); + + const Type_handler *type_handler() const override + { + return ((Item_sum *) args[0])->type_handler(); + } + enum Item::Type type() const override { return Item::WINDOW_FUNC_ITEM; } + +private: + /* + Window functions are very special functions, so val_() methods have + special meaning for them: + + - Phase#1, "Initial" we run the join and put its result into temporary + table. For window functions, we write the default value (NULL?) as + a placeholder. + + - Phase#2: "Computation": executor does the scan in {PARTITION, ORDER BY} + order of this window function. It calls appropriate methods to inform + the window function about rows entering/leaving the window. + It calls window_func()->val_int() so that current window function value + can be saved and stored in the temp.table. + + - Phase#3: "Retrieval" the temporary table is read and passed to query + output. However, Item_window_func still remains in the select list, + so item_windowfunc->val_int() will be called. + During Phase#3, read_value_from_result_field= true. + */ + bool force_return_blank; + bool read_value_from_result_field; + void print_for_percentile_functions(String *str, enum_query_type query_type); + +public: + void set_phase_to_initial() + { + force_return_blank= true; + read_value_from_result_field= false; + } + void set_phase_to_computation() + { + force_return_blank= false; + read_value_from_result_field= false; + } + void set_phase_to_retrieval() + { + force_return_blank= false; + read_value_from_result_field= true; + } + + bool is_null() override + { + if (force_return_blank) + return true; + + if (read_value_from_result_field) + return result_field->is_null(); + + return window_func()->is_null(); + } + + double val_real() override + { + double res; + if (force_return_blank) + { + res= 0.0; + null_value= true; + } + else if (read_value_from_result_field) + { + res= result_field->val_real(); + null_value= result_field->is_null(); + } + else + { + res= window_func()->val_real(); + null_value= window_func()->null_value; + } + return res; + } + + longlong val_int() override + { + longlong res; + if (force_return_blank) + { + res= 0; + null_value= true; + } + else if (read_value_from_result_field) + { + res= result_field->val_int(); + null_value= result_field->is_null(); + } + else + { + res= window_func()->val_int(); + null_value= window_func()->null_value; + } + return res; + } + + String* val_str(String* str) override + { + String *res; + if (force_return_blank) + { + null_value= true; + res= NULL; + } + else if (read_value_from_result_field) + { + if ((null_value= result_field->is_null())) + res= NULL; + else + res= result_field->val_str(str); + } + else + { + res= window_func()->val_str(str); + null_value= window_func()->null_value; + } + return res; + } + + bool val_native(THD *thd, Native *to) override + { + if (force_return_blank) + return null_value= true; + if (read_value_from_result_field) + return val_native_from_field(result_field, to); + return val_native_from_item(thd, window_func(), to); + } + + my_decimal* val_decimal(my_decimal* dec) override + { + my_decimal *res; + if (force_return_blank) + { + null_value= true; + res= NULL; + } + else if (read_value_from_result_field) + { + if ((null_value= result_field->is_null())) + res= NULL; + else + res= result_field->val_decimal(dec); + } + else + { + res= window_func()->val_decimal(dec); + null_value= window_func()->null_value; + } + return res; + } + + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + bool res; + if (force_return_blank) + { + null_value= true; + res= true; + } + else if (read_value_from_result_field) + { + if ((null_value= result_field->is_null())) + res= true; + else + res= result_field->get_date(ltime, fuzzydate); + } + else + { + res= window_func()->get_date(thd, ltime, fuzzydate); + null_value= window_func()->null_value; + } + return res; + } + + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List &fields, uint flags) override; + + bool fix_length_and_dec(THD *thd) override + { + Type_std_attributes::set(window_func()); + return FALSE; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("WF") }; + return name; + } + + bool fix_fields(THD *thd, Item **ref) override; + + bool resolve_window_name(THD *thd); + + void print(String *str, enum_query_type query_type) override; + + Item *get_copy(THD *thd) override { return 0; } + +}; + +#endif /* ITEM_WINDOWFUNC_INCLUDED */ diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc new file mode 100644 index 00000000..85ec0516 --- /dev/null +++ b/sql/item_xmlfunc.cc @@ -0,0 +1,3130 @@ +/* Copyright (c) 2005, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" +#include "my_xml.h" +#include "sp_pcontext.h" +#include "sql_class.h" // THD + +/* + TODO: future development directions: + 1. add nodeset_to_nodeset_comparator + 2. add lacking functions: + - name() + - lang() + - string() + - id() + - translate() + - local-name() + - starts-with() + - namespace-uri() + - substring-after() + - normalize-space() + - substring-before() + 3. add lacking axis: + - following-sibling + - following, + - preceding-sibling + - preceding +*/ + + +/* Structure to store a parsed XML tree */ +typedef struct my_xml_node_st +{ + uint level; /* level in XML tree, 0 means root node */ + enum my_xml_node_type type; /* node type: node, or attribute, or text */ + uint parent; /* link to the parent */ + const char *beg; /* beginning of the name or text */ + const char *end; /* end of the name or text */ + const char *tagend; /* where this tag ends */ +} MY_XML_NODE; + + +/* Lexical analyzer token */ +typedef struct my_xpath_lex_st +{ + int term; /* token type, see MY_XPATH_LEX_XXXXX below */ + const char *beg; /* beginnign of the token */ + const char *end; /* end of the token */ +} MY_XPATH_LEX; + + +/* XPath function creator */ +typedef struct my_xpath_function_names_st +{ + const char *name; /* function name */ + size_t length; /* function name length */ + size_t minargs; /* min number of arguments */ + size_t maxargs; /* max number of arguments */ + Item *(*create)(struct my_xpath_st *xpath, Item **args, uint nargs); +} MY_XPATH_FUNC; + + +/* XPath query parser */ +typedef struct my_xpath_st +{ + THD *thd; + int debug; + MY_XPATH_LEX query; /* Whole query */ + MY_XPATH_LEX lasttok; /* last scanned token */ + MY_XPATH_LEX prevtok; /* previous scanned token */ + int axis; /* last scanned axis */ + int extra; /* last scanned "extra", context dependent */ + MY_XPATH_FUNC *func; /* last scanned function creator */ + Item *item; /* current expression */ + Item *context; /* last scanned context */ + Item *rootelement; /* The root element */ + Native *context_cache; /* last context provider */ + String *pxml; /* Parsed XML, an array of MY_XML_NODE */ + CHARSET_INFO *cs; /* character set/collation string comparison */ + int error; +} MY_XPATH; + + +static Type_handler_long_blob type_handler_xpath_nodeset; + + +/* + Common features of the functions returning a node set. +*/ +class Item_nodeset_func :public Item_str_func +{ +protected: + NativeNodesetBuffer tmp_native_value, tmp2_native_value; + MY_XPATH_FLT *fltbeg, *fltend; + MY_XML_NODE *nodebeg, *nodeend; + uint numnodes; +public: + String *pxml; + NativeNodesetBuffer context_cache; + Item_nodeset_func(THD *thd, String *pxml_arg): + Item_str_func(thd), pxml(pxml_arg) {} + Item_nodeset_func(THD *thd, Item *a, String *pxml_arg): + Item_str_func(thd, a), pxml(pxml_arg) {} + Item_nodeset_func(THD *thd, Item *a, Item *b, String *pxml_arg): + Item_str_func(thd, a, b), pxml(pxml_arg) {} + Item_nodeset_func(THD *thd, Item *a, Item *b, Item *c, String *pxml_arg): + Item_str_func(thd, a, b, c), pxml(pxml_arg) {} + void prepare_nodes() + { + nodebeg= (MY_XML_NODE*) pxml->ptr(); + nodeend= (MY_XML_NODE*) (pxml->ptr() + pxml->length()); + numnodes= (uint)(nodeend - nodebeg); + } + void prepare(THD *thd, Native *nodeset) + { + prepare_nodes(); + args[0]->val_native(thd, &tmp_native_value); + fltbeg= (MY_XPATH_FLT*) tmp_native_value.ptr(); + fltend= (MY_XPATH_FLT*) tmp_native_value.end(); + nodeset->length(0); + } + const Type_handler *type_handler() const override + { + return &type_handler_xpath_nodeset; + } + const Type_handler *fixed_type_handler() const override + { + return &type_handler_xpath_nodeset; + } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + DBUG_ASSERT(0); + return NULL; + } + String *val_str(String *str) override + { + prepare_nodes(); + val_native(current_thd, &tmp2_native_value); + fltbeg= (MY_XPATH_FLT*) tmp2_native_value.ptr(); + fltend= (MY_XPATH_FLT*) tmp2_native_value.end(); + String active; + active.alloc(numnodes); + bzero((char*) active.ptr(), numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *node; + uint j; + for (j=0, node= nodebeg ; j < numnodes; j++, node++) + { + if (node->type == MY_XML_NODE_TEXT && + node->parent == flt->num) + active[j]= 1; + } + } + + str->length(0); + str->set_charset(collation.collation); + for (uint i=0 ; i < numnodes; i++) + { + if(active[i]) + { + if (str->length()) + str->append(" ", 1, &my_charset_latin1); + str->append(nodebeg[i].beg, nodebeg[i].end - nodebeg[i].beg); + } + } + return str; + } + bool fix_length_and_dec(THD *thd) override + { + max_length= MAX_BLOB_WIDTH; + collation.collation= pxml->charset(); + // To avoid premature evaluation, mark all nodeset functions as non-const. + used_tables_cache= RAND_TABLE_BIT; + const_item_cache= false; + return FALSE; + } + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("nodeset") }; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), arg, VCOL_IMPOSSIBLE); + } + +}; + + +/* Returns an XML root */ +class Item_nodeset_func_rootelement :public Item_nodeset_func +{ +public: + Item_nodeset_func_rootelement(THD *thd, String *pxml): + Item_nodeset_func(thd, pxml) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_rootelement") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Returns a Union of two node sets */ +class Item_nodeset_func_union :public Item_nodeset_func +{ +public: + Item_nodeset_func_union(THD *thd, Item *a, Item *b, String *pxml): + Item_nodeset_func(thd, a, b, pxml) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_union") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Makes one step towards the given axis */ +class Item_nodeset_func_axisbyname :public Item_nodeset_func +{ + const char *node_name; + uint node_namelen; +public: + Item_nodeset_func_axisbyname(THD *thd, Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func(thd, a, pxml), node_name(n_arg), node_namelen(l_arg) { } + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_axisbyname") }; + } + bool validname(MY_XML_NODE *n) + { + if (node_name[0] == '*') + return 1; + return (node_namelen == (uint) (n->end - n->beg)) && + !memcmp(node_name, n->beg, node_namelen); + } +}; + + +/* Returns self */ +class Item_nodeset_func_selfbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_selfbyname(THD *thd, Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(thd, a, n_arg, l_arg, pxml) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_selfbyname") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Returns children */ +class Item_nodeset_func_childbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_childbyname(THD *thd, Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(thd, a, n_arg, l_arg, pxml) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_childbyname") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Returns descendants */ +class Item_nodeset_func_descendantbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_descendantbyname(THD *thd, Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(thd, a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_descendantbyname") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Returns ancestors */ +class Item_nodeset_func_ancestorbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_ancestorbyname(THD *thd, Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(thd, a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_ancestorbyname") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Returns parents */ +class Item_nodeset_func_parentbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_parentbyname(THD *thd, Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(thd, a, n_arg, l_arg, pxml) {} + + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_parentbyname") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Returns attributes */ +class Item_nodeset_func_attributebyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_attributebyname(THD *thd, Item *a, const char *n_arg, + uint l_arg, String *pxml): + Item_nodeset_func_axisbyname(thd, a, n_arg, l_arg, pxml) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_attributebyname") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Condition iterator: goes through all nodes in the current + context and checks a condition, returning those nodes + giving TRUE condition result. +*/ +class Item_nodeset_func_predicate :public Item_nodeset_func +{ +public: + Item_nodeset_func_predicate(THD *thd, Item *a, Item *b, String *pxml): + Item_nodeset_func(thd, a, b, pxml) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_predicate") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* Selects nodes with a given position in context */ +class Item_nodeset_func_elementbyindex :public Item_nodeset_func +{ +public: + Item_nodeset_func_elementbyindex(THD *thd, Item *a, Item *b, String *pxml): + Item_nodeset_func(thd, a, b, pxml) { } + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_elementbyindex") }; + } + bool val_native(THD *thd, Native *nodeset) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Converts its argument into a boolean value. + * a number is true if it is non-zero + * a node-set is true if and only if it is non-empty + * a string is true if and only if its length is non-zero +*/ +class Item_xpath_cast_bool :public Item_bool_func +{ + String *pxml; + NativeNodesetBuffer tmp_native_value; +public: + Item_xpath_cast_bool(THD *thd, Item *a, String *pxml_arg): + Item_bool_func(thd, a), pxml(pxml_arg) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_cast_bool") }; + } + longlong val_int() override + { + if (args[0]->fixed_type_handler() == &type_handler_xpath_nodeset) + { + args[0]->val_native(current_thd, &tmp_native_value); + return tmp_native_value.elements() == 1 ? 1 : 0; + } + return args[0]->val_real() ? 1 : 0; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Converts its argument into a number +*/ +class Item_xpath_cast_number :public Item_real_func +{ +public: + Item_xpath_cast_number(THD *thd, Item *a): Item_real_func(thd, a) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_cast_number") }; + } + double val_real() override { return args[0]->val_real(); } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/* + Context cache, for predicate +*/ +class Item_nodeset_context_cache :public Item_nodeset_func +{ +public: + Native *native_cache; + Item_nodeset_context_cache(THD *thd, Native *native_arg, String *pxml): + Item_nodeset_func(thd, pxml), native_cache(native_arg) { } + bool val_native(THD *, Native *nodeset) override + { + return nodeset->copy(*native_cache); + } + bool fix_length_and_dec(THD *thd) override + { max_length= MAX_BLOB_WIDTH; return FALSE; } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_xpath_position :public Item_long_func +{ + String *pxml; + NativeNodesetBuffer tmp_native_value; +public: + Item_func_xpath_position(THD *thd, Item *a, String *p): + Item_long_func(thd, a), pxml(p) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_position") }; + } + bool fix_length_and_dec(THD *thd) override { max_length=10; return FALSE; } + longlong val_int() override + { + args[0]->val_native(current_thd, &tmp_native_value); + if (tmp_native_value.elements() == 1) + return tmp_native_value.element(0).pos + 1; + return 0; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_xpath_count :public Item_long_func +{ + String *pxml; + NativeNodesetBuffer tmp_native_value; +public: + Item_func_xpath_count(THD *thd, Item *a, String *p): + Item_long_func(thd, a), pxml(p) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_count") }; + } + bool fix_length_and_dec(THD *thd) override { max_length=10; return FALSE; } + longlong val_int() override + { + uint predicate_supplied_context_size; + args[0]->val_native(current_thd, &tmp_native_value); + if (tmp_native_value.elements() == 1 && + (predicate_supplied_context_size= tmp_native_value.element(0).size)) + return predicate_supplied_context_size; + return tmp_native_value.elements(); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_xpath_sum :public Item_real_func +{ + String *pxml; + NativeNodesetBuffer tmp_native_value; +public: + Item_func_xpath_sum(THD *thd, Item *a, String *p): + Item_real_func(thd, a), pxml(p) {} + + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_sum") }; + } + double val_real() override + { + double sum= 0; + args[0]->val_native(current_thd, &tmp_native_value); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) tmp_native_value.ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) tmp_native_value.end(); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + char *end; + int err; + double add= collation.collation->strntod((char*) node->beg, + node->end - node->beg, &end, &err); + if (!err) + sum+= add; + } + } + } + return sum; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +/** + A string whose value may be changed during execution. +*/ +class Item_string_xml_non_const: public Item_string +{ +public: + Item_string_xml_non_const(THD *thd, const char *str, uint length, + CHARSET_INFO *cs): + Item_string(thd, str, length, cs) + { } + bool const_item() const { return false ; } + bool basic_const_item() const { return false; } + void set_value(const char *str, uint length, CHARSET_INFO *cs) + { + str_value.set(str, length, cs); + } + Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs) + { + /* + Item_string::safe_charset_converter() does not accept non-constants. + Note, conversion is not really needed here anyway. + */ + return this; + } +}; + + +class Item_nodeset_to_const_comparator :public Item_bool_func +{ + String *pxml; + NativeNodesetBuffer tmp_nodeset; +public: + Item_nodeset_to_const_comparator(THD *thd, Item *nodeset, Item *cmpfunc, + String *p): + Item_bool_func(thd, nodeset, cmpfunc), pxml(p) {} + LEX_CSTRING func_name_cstring() const override + { + return { STRING_WITH_LEN("xpath_nodeset_to_const_comparator") }; + } + bool check_vcol_func_processor(void *arg) override + { + return mark_unsupported_function(func_name(), arg, VCOL_IMPOSSIBLE); + } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + DBUG_ASSERT(0); + return NULL; + } + longlong val_int() override + { + Item_func *comp= (Item_func*)args[1]; + Item_string_xml_non_const *fake= + (Item_string_xml_non_const*)(comp->arguments()[0]); + args[0]->val_native(current_thd, &tmp_nodeset); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) tmp_nodeset.ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) tmp_nodeset.end(); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + fake->set_value(node->beg, (uint)(node->end - node->beg), + collation.collation); + if (args[1]->val_int()) + return 1; + } + } + } + return 0; + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +bool Item_nodeset_func_rootelement::val_native(THD *thd, Native *nodeset) +{ + nodeset->length(0); + return MY_XPATH_FLT(0, 0).append_to(nodeset); +} + + +bool Item_nodeset_func_union::val_native(THD *thd, Native *nodeset) +{ + uint num_nodes= pxml->length() / sizeof(MY_XML_NODE); + NativeNodesetBuffer set0, set1; + args[0]->val_native(thd, &set0); + args[1]->val_native(thd, &set1); + String both_str; + both_str.alloc(num_nodes); + char *both= (char*) both_str.ptr(); + bzero((void*)both, num_nodes); + MY_XPATH_FLT *flt; + + fltbeg= (MY_XPATH_FLT*) set0.ptr(); + fltend= (MY_XPATH_FLT*) set0.end(); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + fltbeg= (MY_XPATH_FLT*) set1.ptr(); + fltend= (MY_XPATH_FLT*) set1.end(); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + nodeset->length(0); + for (uint i= 0, pos= 0; i < num_nodes; i++) + { + if (both[i]) + MY_XPATH_FLT(i, pos++).append_to(nodeset); + } + return false; +} + + +bool Item_nodeset_func_selfbyname::val_native(THD *thd, Native *nodeset) +{ + prepare(thd, nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint pos= 0; + MY_XML_NODE *self= &nodebeg[flt->num]; + if (validname(self)) + MY_XPATH_FLT(flt->num, pos++).append_to(nodeset); + } + return false; +} + + +bool Item_nodeset_func_childbyname::val_native(THD *thd, Native *nodeset) +{ + prepare(thd, nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos= 0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TAG) && + validname(node)) + MY_XPATH_FLT(j, pos++).append_to(nodeset); + } + } + return false; +} + + +bool Item_nodeset_func_descendantbyname::val_native(THD *thd, Native *nodeset) +{ + prepare(thd, nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint pos= 0; + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + MY_XPATH_FLT(flt->num, pos++).append_to(nodeset); + for (uint j= flt->num + 1 ; j < numnodes ; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->type == MY_XML_NODE_TAG) && validname(node)) + MY_XPATH_FLT(j, pos++).append_to(nodeset); + } + } + return false; +} + + +bool Item_nodeset_func_ancestorbyname::val_native(THD *thd, Native *nodeset) +{ + char *active; + String active_str; + prepare(thd, nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + uint pos= 0; + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + /* + Go to the root and add all nodes on the way. + Don't add the root if context is the root itelf + */ + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + { + active[flt->num]= 1; + pos++; + } + + for (uint j= self->parent; nodebeg[j].parent != j; j= nodebeg[j].parent) + { + if (flt->num && validname(&nodebeg[j])) + { + active[j]= 1; + pos++; + } + } + } + + for (uint j= 0; j < numnodes ; j++) + { + if (active[j]) + MY_XPATH_FLT(j, --pos).append_to(nodeset); + } + return false; +} + + +bool Item_nodeset_func_parentbyname::val_native(THD *thd, Native *nodeset) +{ + char *active; + String active_str; + prepare(thd, nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint j= nodebeg[flt->num].parent; + if (flt->num && validname(&nodebeg[j])) + active[j]= 1; + } + for (uint j= 0, pos= 0; j < numnodes ; j++) + { + if (active[j]) + MY_XPATH_FLT(j, pos++).append_to(nodeset); + } + return false; +} + + +bool Item_nodeset_func_attributebyname::val_native(THD *thd, Native *nodeset) +{ + prepare(thd, nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos=0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_ATTR) && + validname(node)) + MY_XPATH_FLT(j, pos++).append_to(nodeset); + } + } + return false; +} + + +bool Item_nodeset_func_predicate::val_native(THD *thd, Native *str) +{ + Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0]; + uint pos= 0, size; + prepare(thd, str); + size= (uint)(fltend - fltbeg); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + nodeset_func->context_cache.length(0); + MY_XPATH_FLT(flt->num, flt->pos, size). + append_to(&nodeset_func->context_cache); + if (args[1]->val_int()) + MY_XPATH_FLT(flt->num, pos++).append_to(str); + } + return false; +} + + +bool Item_nodeset_func_elementbyindex::val_native(THD *thd, Native *nodeset) +{ + Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0]; + prepare(thd, nodeset); + MY_XPATH_FLT *flt; + uint pos, size= (uint)(fltend - fltbeg); + for (pos= 0, flt= fltbeg; flt < fltend; flt++) + { + nodeset_func->context_cache.length(0); + MY_XPATH_FLT(flt->num, flt->pos, size). + append_to(&nodeset_func->context_cache); + int index= (int) (args[1]->val_int()) - 1; + if (index >= 0 && + (flt->pos == (uint) index || + (args[1]->type_handler()->is_bool_type()))) + MY_XPATH_FLT(flt->num, pos++).append_to(nodeset); + } + return false; +} + + +/* + If item is a node set, then casts it to boolean, + otherwise returns the item itself. +*/ +static Item* nodeset2bool(MY_XPATH *xpath, Item *item) +{ + if (item->fixed_type_handler() == &type_handler_xpath_nodeset) + return new (xpath->thd->mem_root) + Item_xpath_cast_bool(xpath->thd, item, xpath->pxml); + return item; +} + + +/* + XPath lexical tokens +*/ +#define MY_XPATH_LEX_DIGITS 'd' +#define MY_XPATH_LEX_IDENT 'i' +#define MY_XPATH_LEX_STRING 's' +#define MY_XPATH_LEX_SLASH '/' +#define MY_XPATH_LEX_LB '[' +#define MY_XPATH_LEX_RB ']' +#define MY_XPATH_LEX_LP '(' +#define MY_XPATH_LEX_RP ')' +#define MY_XPATH_LEX_EQ '=' +#define MY_XPATH_LEX_LESS '<' +#define MY_XPATH_LEX_GREATER '>' +#define MY_XPATH_LEX_AT '@' +#define MY_XPATH_LEX_COLON ':' +#define MY_XPATH_LEX_ASTERISK '*' +#define MY_XPATH_LEX_DOT '.' +#define MY_XPATH_LEX_VLINE '|' +#define MY_XPATH_LEX_MINUS '-' +#define MY_XPATH_LEX_PLUS '+' +#define MY_XPATH_LEX_EXCL '!' +#define MY_XPATH_LEX_COMMA ',' +#define MY_XPATH_LEX_DOLLAR '$' +#define MY_XPATH_LEX_ERROR 'A' +#define MY_XPATH_LEX_EOF 'B' +#define MY_XPATH_LEX_AND 'C' +#define MY_XPATH_LEX_OR 'D' +#define MY_XPATH_LEX_DIV 'E' +#define MY_XPATH_LEX_MOD 'F' +#define MY_XPATH_LEX_FUNC 'G' +#define MY_XPATH_LEX_NODETYPE 'H' +#define MY_XPATH_LEX_AXIS 'I' +#define MY_XPATH_LEX_LE 'J' +#define MY_XPATH_LEX_GE 'K' + + +/* + XPath axis type +*/ +#define MY_XPATH_AXIS_ANCESTOR 0 +#define MY_XPATH_AXIS_ANCESTOR_OR_SELF 1 +#define MY_XPATH_AXIS_ATTRIBUTE 2 +#define MY_XPATH_AXIS_CHILD 3 +#define MY_XPATH_AXIS_DESCENDANT 4 +#define MY_XPATH_AXIS_DESCENDANT_OR_SELF 5 +#define MY_XPATH_AXIS_FOLLOWING 6 +#define MY_XPATH_AXIS_FOLLOWING_SIBLING 7 +#define MY_XPATH_AXIS_NAMESPACE 8 +#define MY_XPATH_AXIS_PARENT 9 +#define MY_XPATH_AXIS_PRECEDING 10 +#define MY_XPATH_AXIS_PRECEDING_SIBLING 11 +#define MY_XPATH_AXIS_SELF 12 + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *eq_func(THD *thd, int oper, Item *a, Item *b) +{ + MEM_ROOT *mem_root= thd->mem_root; + switch (oper) + { + case '=': return new (mem_root) Item_func_eq(thd, a, b); + case '!': return new (mem_root) Item_func_ne(thd, a, b); + case MY_XPATH_LEX_GE: return new (mem_root) Item_func_ge(thd, a, b); + case MY_XPATH_LEX_LE: return new (mem_root) Item_func_le(thd, a, b); + case MY_XPATH_LEX_GREATER: return new (mem_root) Item_func_gt(thd, a, b); + case MY_XPATH_LEX_LESS: return new (mem_root) Item_func_lt(thd, a, b); + } + return 0; +} + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and reverse operation, e.g. + + A > B is converted into B < A + + RETURN + The newly created item. +*/ +static Item *eq_func_reverse(THD *thd, int oper, Item *a, Item *b) +{ + MEM_ROOT *mem_root= thd->mem_root; + switch (oper) + { + case '=': return new (mem_root) Item_func_eq(thd, a, b); + case '!': return new (mem_root) Item_func_ne(thd, a, b); + case MY_XPATH_LEX_GE: return new (mem_root) Item_func_le(thd, a, b); + case MY_XPATH_LEX_LE: return new (mem_root) Item_func_ge(thd, a, b); + case MY_XPATH_LEX_GREATER: return new (mem_root) Item_func_lt(thd, a, b); + case MY_XPATH_LEX_LESS: return new (mem_root) Item_func_gt(thd, a, b); + } + return 0; +} + + +/* + Create a comparator + + SYNOPSYS + Create a comparator for scalar or non-scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *create_comparator(MY_XPATH *xpath, + int oper, MY_XPATH_LEX *context, + Item *a, Item *b) +{ + if (a->fixed_type_handler() != &type_handler_xpath_nodeset && + b->fixed_type_handler() != &type_handler_xpath_nodeset) + { + return eq_func(xpath->thd, oper, a, b); // two scalar arguments + } + else if (a->fixed_type_handler() == &type_handler_xpath_nodeset && + b->fixed_type_handler() == &type_handler_xpath_nodeset) + { + uint len= (uint)(xpath->query.end - context->beg); + if (len <= 32) + my_printf_error(ER_UNKNOWN_ERROR, + "XPATH error: " + "comparison of two nodesets is not supported: '%.*s'", + MYF(0), len, context->beg); + else + my_printf_error(ER_UNKNOWN_ERROR, + "XPATH error: " + "comparison of two nodesets is not supported: '%.32T'", + MYF(0), context->beg); + + return 0; // TODO: Comparison of two nodesets + } + else + { + /* + Compare a node set to a scalar value. + We just create a fake Item_string_xml_non_const() argument, + which will be filled to the partular value + in a loop through all of the nodes in the node set. + */ + + THD *thd= xpath->thd; + Item_string *fake= (new (thd->mem_root) + Item_string_xml_non_const(thd, "", 0, xpath->cs)); + Item_nodeset_func *nodeset; + Item *scalar, *comp; + if (a->fixed_type_handler() == &type_handler_xpath_nodeset) + { + nodeset= (Item_nodeset_func*) a; + scalar= b; + comp= eq_func(thd, oper, (Item*)fake, scalar); + } + else + { + nodeset= (Item_nodeset_func*) b; + scalar= a; + comp= eq_func_reverse(thd, oper, fake, scalar); + } + return (new (thd->mem_root) + Item_nodeset_to_const_comparator(thd, nodeset, comp, xpath->pxml)); + } +} + + +/* + Create a step + + SYNOPSYS + Create a step function for the given argument and axis. + + RETURN + The newly created item. +*/ +static Item* nametestfunc(MY_XPATH *xpath, + int type, Item *arg, const char *beg, uint len) +{ + THD *thd= xpath->thd; + MEM_ROOT *mem_root= thd->mem_root; + + DBUG_ASSERT(arg != 0); + DBUG_ASSERT(arg->fixed_type_handler() == &type_handler_xpath_nodeset); + DBUG_ASSERT(beg != 0); + DBUG_ASSERT(len > 0); + + Item *res; + switch (type) + { + case MY_XPATH_AXIS_ANCESTOR: + res= new (mem_root) Item_nodeset_func_ancestorbyname(thd, arg, beg, len, + xpath->pxml, 0); + break; + case MY_XPATH_AXIS_ANCESTOR_OR_SELF: + res= new (mem_root) Item_nodeset_func_ancestorbyname(thd, arg, beg, len, + xpath->pxml, 1); + break; + case MY_XPATH_AXIS_PARENT: + res= new (mem_root) Item_nodeset_func_parentbyname(thd, arg, beg, len, + xpath->pxml); + break; + case MY_XPATH_AXIS_DESCENDANT: + res= new (mem_root) Item_nodeset_func_descendantbyname(thd, arg, beg, len, + xpath->pxml, 0); + break; + case MY_XPATH_AXIS_DESCENDANT_OR_SELF: + res= new (mem_root) Item_nodeset_func_descendantbyname(thd, arg, beg, len, + xpath->pxml, 1); + break; + case MY_XPATH_AXIS_ATTRIBUTE: + res= new (mem_root) Item_nodeset_func_attributebyname(thd, arg, beg, len, + xpath->pxml); + break; + case MY_XPATH_AXIS_SELF: + res= new (mem_root) Item_nodeset_func_selfbyname(thd, arg, beg, len, + xpath->pxml); + break; + default: + res= new (mem_root) Item_nodeset_func_childbyname(thd, arg, beg, len, + xpath->pxml); + } + return res; +} + + +/* + Tokens consisting of one character, for faster lexical analyzer. +*/ +static char simpletok[128]= +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +/* + ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \200 +*/ + 0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 +}; + + +/* + XPath keywords +*/ +struct my_xpath_keyword_names_st +{ + int tok; + const char *name; + size_t length; + int extra; +}; + + +static struct my_xpath_keyword_names_st my_keyword_names[] = +{ + {MY_XPATH_LEX_AND , "and" , 3, 0 }, + {MY_XPATH_LEX_OR , "or" , 2, 0 }, + {MY_XPATH_LEX_DIV , "div" , 3, 0 }, + {MY_XPATH_LEX_MOD , "mod" , 3, 0 }, + {0,NULL,0,0} +}; + + +static struct my_xpath_keyword_names_st my_axis_names[]= +{ + {MY_XPATH_LEX_AXIS,"ancestor" , 8,MY_XPATH_AXIS_ANCESTOR }, + {MY_XPATH_LEX_AXIS,"ancestor-or-self" ,16,MY_XPATH_AXIS_ANCESTOR_OR_SELF }, + {MY_XPATH_LEX_AXIS,"attribute" , 9,MY_XPATH_AXIS_ATTRIBUTE }, + {MY_XPATH_LEX_AXIS,"child" , 5,MY_XPATH_AXIS_CHILD }, + {MY_XPATH_LEX_AXIS,"descendant" ,10,MY_XPATH_AXIS_DESCENDANT }, + {MY_XPATH_LEX_AXIS,"descendant-or-self",18,MY_XPATH_AXIS_DESCENDANT_OR_SELF}, + {MY_XPATH_LEX_AXIS,"following" , 9,MY_XPATH_AXIS_FOLLOWING }, + {MY_XPATH_LEX_AXIS,"following-sibling" ,17,MY_XPATH_AXIS_FOLLOWING_SIBLING }, + {MY_XPATH_LEX_AXIS,"namespace" , 9,MY_XPATH_AXIS_NAMESPACE }, + {MY_XPATH_LEX_AXIS,"parent" , 6,MY_XPATH_AXIS_PARENT }, + {MY_XPATH_LEX_AXIS,"preceding" , 9,MY_XPATH_AXIS_PRECEDING }, + {MY_XPATH_LEX_AXIS,"preceding-sibling" ,17,MY_XPATH_AXIS_PRECEDING_SIBLING }, + {MY_XPATH_LEX_AXIS,"self" , 4,MY_XPATH_AXIS_SELF }, + {0,NULL,0,0} +}; + + +static struct my_xpath_keyword_names_st my_nodetype_names[]= +{ + {MY_XPATH_LEX_NODETYPE, "comment" , 7, 0 }, + {MY_XPATH_LEX_NODETYPE, "text" , 4, 0 }, + {MY_XPATH_LEX_NODETYPE, "processing-instruction" , 22,0 }, + {MY_XPATH_LEX_NODETYPE, "node" , 4, 0 }, + {0,NULL,0,0} +}; + + +/* + Lookup a keyword + + SYNOPSYS + Check that the last scanned identifier is a keyword. + + RETURN + - Token type, on lookup success. + - MY_XPATH_LEX_IDENT, on lookup failure. +*/ +static int +my_xpath_keyword(MY_XPATH *x, + struct my_xpath_keyword_names_st *keyword_names, + const char *beg, const char *end) +{ + struct my_xpath_keyword_names_st *k; + size_t length= end-beg; + for (k= keyword_names; k->name; k++) + { + if (length == k->length && !strncasecmp(beg, k->name, length)) + { + x->extra= k->extra; + return k->tok; + } + } + return MY_XPATH_LEX_IDENT; +} + + +/* + Functions to create an item, a-la those in item_create.cc +*/ + +static Item *create_func_true(MY_XPATH *xpath, Item **args, uint nargs) +{ + return (Item*) Item_true; +} + + +static Item *create_func_false(MY_XPATH *xpath, Item **args, uint nargs) +{ + return (Item*) Item_false; +} + + +static Item *create_func_not(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) + Item_func_not(xpath->thd, nodeset2bool(xpath, args[0])); +} + + +static Item *create_func_ceiling(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) Item_func_ceiling(xpath->thd, args[0]); +} + + +static Item *create_func_floor(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) Item_func_floor(xpath->thd, args[0]); +} + + +static Item *create_func_bool(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) + Item_xpath_cast_bool(xpath->thd, args[0], xpath->pxml); +} + + +static Item *create_func_number(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) + Item_xpath_cast_number(xpath->thd, args[0]); +} + + +static Item *create_func_string_length(MY_XPATH *xpath, Item **args, + uint nargs) +{ + Item *arg= nargs ? args[0] : xpath->context; + return arg ? new (xpath->thd->mem_root) + Item_func_char_length(xpath->thd, arg) : 0; +} + + +static Item *create_func_round(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) + Item_func_round(xpath->thd, args[0], + new (xpath->thd->mem_root) + Item_int(xpath->thd, (char *) "0", 0, 1), 0); +} + + +static Item *create_func_last(MY_XPATH *xpath, Item **args, uint nargs) +{ + return (xpath->context ? + new (xpath->thd->mem_root) + Item_func_xpath_count(xpath->thd, xpath->context, xpath->pxml) : + NULL); +} + + +static Item *create_func_position(MY_XPATH *xpath, Item **args, uint nargs) +{ + return (xpath->context ? + new (xpath->thd->mem_root) + Item_func_xpath_position(xpath->thd, xpath->context, xpath->pxml) : + NULL); +} + + +static Item *create_func_contains(MY_XPATH *xpath, Item **args, uint nargs) +{ + return (new (xpath->thd->mem_root) + Item_xpath_cast_bool(xpath->thd, + new (xpath->thd->mem_root) + Item_func_locate(xpath->thd, args[0], args[1]), + xpath->pxml)); +} + + +static Item *create_func_concat(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new (xpath->thd->mem_root) + Item_func_concat(xpath->thd, args[0], args[1]); +} + + +static Item *create_func_substr(MY_XPATH *xpath, Item **args, uint nargs) +{ + THD *thd= xpath->thd; + if (nargs == 2) + return new (thd->mem_root) Item_func_substr(thd, args[0], args[1]); + return new (thd->mem_root) Item_func_substr(thd, args[0], args[1], args[2]); +} + + +static Item *create_func_count(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->fixed_type_handler() != &type_handler_xpath_nodeset) + return 0; + return new (xpath->thd->mem_root) Item_func_xpath_count(xpath->thd, args[0], xpath->pxml); +} + + +static Item *create_func_sum(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->fixed_type_handler() != &type_handler_xpath_nodeset) + return 0; + return new (xpath->thd->mem_root) + Item_func_xpath_sum(xpath->thd, args[0], xpath->pxml); +} + + +/* + Functions names. Separate lists for names with + lengths 3,4,5 and 6 for faster lookups. +*/ +static MY_XPATH_FUNC my_func_names3[]= +{ + {"sum", 3, 1 , 1 , create_func_sum}, + {"not", 3, 1 , 1 , create_func_not}, + {0 , 0, 0 , 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names4[]= +{ + {"last", 4, 0, 0, create_func_last}, + {"true", 4, 0, 0, create_func_true}, + {"name", 4, 0, 1, 0}, + {"lang", 4, 1, 1, 0}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names5[]= +{ + {"count", 5, 1, 1, create_func_count}, + {"false", 5, 0, 0, create_func_false}, + {"floor", 5, 1, 1, create_func_floor}, + {"round", 5, 1, 1, create_func_round}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names6[]= +{ + {"concat", 6, 2, 255, create_func_concat}, + {"number", 6, 0, 1 , create_func_number}, + {"string", 6, 0, 1 , 0}, + {0 , 0, 0, 0 , 0} +}; + + +/* Other functions, with name longer than 6, all together */ +static MY_XPATH_FUNC my_func_names[] = +{ + {"id" , 2 , 1 , 1 , 0}, + {"boolean" , 7 , 1 , 1 , create_func_bool}, + {"ceiling" , 7 , 1 , 1 , create_func_ceiling}, + {"position" , 8 , 0 , 0 , create_func_position}, + {"contains" , 8 , 2 , 2 , create_func_contains}, + {"substring" , 9 , 2 , 3 , create_func_substr}, + {"translate" , 9 , 3 , 3 , 0}, + + {"local-name" , 10 , 0 , 1 , 0}, + {"starts-with" , 11 , 2 , 2 , 0}, + {"namespace-uri" , 13 , 0 , 1 , 0}, + {"string-length" , 13 , 0 , 1 , create_func_string_length}, + {"substring-after" , 15 , 2 , 2 , 0}, + {"normalize-space" , 15 , 0 , 1 , 0}, + {"substring-before" , 16 , 2 , 2 , 0}, + + {NULL,0,0,0,0} +}; + + +/* + Lookup a function by name + + SYNOPSYS + Lookup a function by its name. + + RETURN + Pointer to a MY_XPATH_FUNC variable on success. + 0 - on failure. + +*/ +MY_XPATH_FUNC * +my_xpath_function(const char *beg, const char *end) +{ + MY_XPATH_FUNC *k, *function_names; + uint length= (uint)(end-beg); + switch (length) + { + case 1: return 0; + case 3: function_names= my_func_names3; break; + case 4: function_names= my_func_names4; break; + case 5: function_names= my_func_names5; break; + case 6: function_names= my_func_names6; break; + default: function_names= my_func_names; + } + for (k= function_names; k->name; k++) + if (k->create && length == k->length && !strncasecmp(beg, k->name, length)) + return k; + return NULL; +} + + +/* Initialize a lex analyzer token */ +static void +my_xpath_lex_init(MY_XPATH_LEX *lex, + const char *str, const char *strend) +{ + lex->beg= str; + lex->end= strend; +} + + +/* Initialize an XPath query parser */ +static void +my_xpath_init(MY_XPATH *xpath) +{ + bzero((void*)xpath, sizeof(xpath[0])); +} + + +static int +my_xdigit(int c) +{ + return ((c) >= '0' && (c) <= '9'); +} + + +/* + Scan the next token + + SYNOPSYS + Scan the next token from the input. + lex->term is set to the scanned token type. + lex->beg and lex->end are set to the beginning + and to the end of the token. + RETURN + N/A +*/ +static void +my_xpath_lex_scan(MY_XPATH *xpath, + MY_XPATH_LEX *lex, const char *beg, const char *end) +{ + int ch, ctype, length; + for ( ; beg < end && *beg == ' ' ; beg++) ; // skip leading spaces + lex->beg= beg; + + if (beg >= end) + { + lex->end= beg; + lex->term= MY_XPATH_LEX_EOF; // end of line reached + return; + } + + // Check ident, or a function call, or a keyword + if ((length= xpath->cs->ctype(&ctype, + (const uchar*) beg, + (const uchar*) end)) > 0 && + ((ctype & (_MY_L | _MY_U)) || *beg == '_')) + { + // scan until the end of the identifier + for (beg+= length; + (length= xpath->cs->ctype(&ctype, + (const uchar*) beg, + (const uchar*) end)) > 0 && + ((ctype & (_MY_L | _MY_U | _MY_NMR)) || + *beg == '_' || *beg == '-' || *beg == '.') ; + beg+= length) /* no op */; + lex->end= beg; + + if (beg < end) + { + if (*beg == '(') + { + /* + check if a function call, e.g.: count(/a/b) + or a nodetype test, e.g.: /a/b/text() + */ + if ((xpath->func= my_xpath_function(lex->beg, beg))) + lex->term= MY_XPATH_LEX_FUNC; + else + lex->term= my_xpath_keyword(xpath, my_nodetype_names, + lex->beg, beg); + return; + } + // check if an axis specifier, e.g.: /a/b/child::* + else if (*beg == ':' && beg + 1 < end && beg[1] == ':') + { + lex->term= my_xpath_keyword(xpath, my_axis_names, + lex->beg, beg); + return; + } + } + // check if a keyword + lex->term= my_xpath_keyword(xpath, my_keyword_names, + lex->beg, beg); + return; + } + + + ch= *beg++; + + if (ch > 0 && ch < 128 && simpletok[ch]) + { + // a token consisting of one character found + lex->end= beg; + lex->term= ch; + return; + } + + + if (my_xdigit(ch)) // a sequence of digits + { + for ( ; beg < end && my_xdigit(*beg) ; beg++) ; + lex->end= beg; + lex->term= MY_XPATH_LEX_DIGITS; + return; + } + + if (ch == '"' || ch == '\'') // a string: either '...' or "..." + { + for ( ; beg < end && *beg != ch ; beg++) ; + if (beg < end) + { + lex->end= beg+1; + lex->term= MY_XPATH_LEX_STRING; + return; + } + else + { + // unexpected end-of-line, without closing quot sign + lex->end= end; + lex->term= MY_XPATH_LEX_ERROR; + return; + } + } + + lex->end= beg; + lex->term= MY_XPATH_LEX_ERROR; // unknown character + return; +} + + +/* + Scan the given token + + SYNOPSYS + Scan the given token and rotate lasttok to prevtok on success. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_term(MY_XPATH *xpath, int term) +{ + if (xpath->lasttok.term == term && !xpath->error) + { + xpath->prevtok= xpath->lasttok; + my_xpath_lex_scan(xpath, &xpath->lasttok, + xpath->lasttok.end, xpath->query.end); + return 1; + } + return 0; +} + + +/* + Scan AxisName + + SYNOPSYS + Scan an axis name and store the scanned axis type into xpath->axis. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName(MY_XPATH *xpath) +{ + int rc= my_xpath_parse_term(xpath, MY_XPATH_LEX_AXIS); + xpath->axis= xpath->extra; + return rc; +} + + +/********************************************* +** Grammar rules, according to http://www.w3.org/TR/xpath +** Implemented using recursive descendant method. +** All the following grammar processing functions accept +** a single "xpath" argument and return 1 on success and 0 on error. +** They also modify "xpath" argument by creating new items. +*/ + +/* [9] PredicateExpr ::= Expr */ +#define my_xpath_parse_PredicateExpr(x) my_xpath_parse_Expr((x)) + +/* [14] Expr ::= OrExpr */ +#define my_xpath_parse_Expr(x) my_xpath_parse_OrExpr((x)) + +static int my_xpath_parse_LocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath); +static int my_xpath_parse_Step(MY_XPATH *xpath); +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NodeTest(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NameTest(MY_XPATH *xpath); +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath); +static int my_xpath_parse_Number(MY_XPATH *xpath); +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath); +static int my_xpath_parse_PathExpr(MY_XPATH *xpath); +static int my_xpath_parse_OrExpr(MY_XPATH *xpath); +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath); +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath); +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath); +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath); +static int my_xpath_parse_AndExpr(MY_XPATH *xpath); +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath); +static int my_xpath_parse_VariableReference(MY_XPATH *xpath); + + +/* + Scan LocationPath + + SYNOPSYS + + [1] LocationPath ::= RelativeLocationPath + | AbsoluteLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_LocationPath(MY_XPATH *xpath) +{ + Item *context= xpath->context; + + if (!xpath->context) + xpath->context= xpath->rootelement; + int rc= my_xpath_parse_RelativeLocationPath(xpath) || + my_xpath_parse_AbsoluteLocationPath(xpath); + + xpath->item= xpath->context; + xpath->context= context; + return rc; +} + + +/* + Scan Absolute Location Path + + SYNOPSYS + + [2] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | AbbreviatedAbsoluteLocationPath + [10] AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath + + We combine these two rules into one rule for better performance: + + [2,10] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | '//' RelativeLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 0; + + xpath->context= xpath->rootelement; + + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_func_descendantbyname(xpath->thd, + xpath->context, + "*", 1, + xpath->pxml, 1); + return my_xpath_parse_RelativeLocationPath(xpath); + } + + my_xpath_parse_RelativeLocationPath(xpath); + + return (xpath->error == 0); +} + + +/* + Scan Relative Location Path + + SYNOPSYS + + For better performance we combine these two rules + + [3] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | AbbreviatedRelativeLocationPath + [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + + + Into this one: + + [3-11] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | RelativeLocationPath '//' Step + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_Step(xpath)) + return 0; + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_func_descendantbyname(xpath->thd, + xpath->context, + "*", 1, + xpath->pxml, 1); + if (!my_xpath_parse_Step(xpath)) + { + xpath->error= 1; + return 0; + } + } + return 1; +} + + +/* + Scan non-abbreviated or abbreviated Step + + SYNOPSYS + + [4] Step ::= AxisSpecifier NodeTest Predicate* + | AbbreviatedStep + [8] Predicate ::= '[' PredicateExpr ']' + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AxisSpecifier(xpath)) + return 0; + + if (!my_xpath_parse_NodeTest(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_LB)) + { + Item *prev_context= xpath->context; + Native *context_cache; + context_cache= &((Item_nodeset_func*)xpath->context)->context_cache; + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_context_cache(xpath->thd, context_cache, xpath->pxml); + xpath->context_cache= context_cache; + + if(!my_xpath_parse_PredicateExpr(xpath)) + { + xpath->error= 1; + return 0; + } + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) + { + xpath->error= 1; + return 0; + } + + xpath->item= nodeset2bool(xpath, xpath->item); + + const Type_handler *fh; + if ((fh= xpath->item->fixed_type_handler()) && fh->is_bool_type()) + { + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_func_predicate(xpath->thd, prev_context, + xpath->item, + xpath->pxml); + } + else + { + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_func_elementbyindex(xpath->thd, + prev_context, + xpath->item, + xpath->pxml); + } + } + return 1; +} + + +static int my_xpath_parse_Step(MY_XPATH *xpath) +{ + return + my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(xpath) || + my_xpath_parse_AbbreviatedStep(xpath); +} + + +/* + Scan Abbreviated Axis Specifier + + SYNOPSYS + [5] AxisSpecifier ::= AxisName '::' + | AbbreviatedAxisSpecifier + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_AT)) + xpath->axis= MY_XPATH_AXIS_ATTRIBUTE; + else + xpath->axis= MY_XPATH_AXIS_CHILD; + return 1; +} + + +/* + Scan non-abbreviated axis specifier + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName_colon_colon(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON); +} + + +/* + Scan Abbreviated AxisSpecifier + + SYNOPSYS + [13] AbbreviatedAxisSpecifier ::= '@'? + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName_colon_colon(xpath) || + my_xpath_parse_AbbreviatedAxisSpecifier(xpath); +} + + +/* + Scan NodeType followed by parens + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest_lp_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_NODETYPE) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} + + +/* + Scan NodeTest + + SYNOPSYS + + [7] NodeTest ::= NameTest + | NodeType '(' ')' + | 'processing-instruction' '(' Literal ')' + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NameTest(xpath) || + my_xpath_parse_NodeTest_lp_rp(xpath); +} + + +/* + Scan Abbreviated Step + + SYNOPSYS + + [12] AbbreviatedStep ::= '.' | '..' + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + return 0; + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_func_parentbyname(xpath->thd, + xpath->context, "*", + 1, xpath->pxml); + return 1; +} + + +/* + Scan Primary Expression + + SYNOPSYS + + [15] PrimaryExpr ::= VariableReference + | '(' Expr ')' + | Literal + | Number + | FunctionCall + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_lp_Expr_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} +static int my_xpath_parse_PrimaryExpr_literal(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_STRING)) + return 0; + xpath->item= new (xpath->thd->mem_root) + Item_string(xpath->thd, xpath->prevtok.beg + 1, + (uint)(xpath->prevtok.end - xpath->prevtok.beg - 2), + xpath->cs); + return 1; +} +static int my_xpath_parse_PrimaryExpr(MY_XPATH *xpath) +{ + return + my_xpath_parse_lp_Expr_rp(xpath) || + my_xpath_parse_VariableReference(xpath) || + my_xpath_parse_PrimaryExpr_literal(xpath) || + my_xpath_parse_Number(xpath) || + my_xpath_parse_FunctionCall(xpath); +} + + +/* + Scan Function Call + + SYNOPSYS + [16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')' + [17] Argument ::= Expr + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath) +{ + Item *args[256]; + uint nargs; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_FUNC)) + return 0; + + MY_XPATH_FUNC *func= xpath->func; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_LP)) + return 0; + + for (nargs= 0 ; nargs < func->maxargs; ) + { + if (!my_xpath_parse_Expr(xpath)) + { + if (nargs < func->minargs) + return 0; + goto right_paren; + } + args[nargs++]= xpath->item; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_COMMA)) + { + if (nargs < func->minargs) + return 0; + else + break; + } + } + +right_paren: + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RP)) + return 0; + + return ((xpath->item= func->create(xpath, args, nargs))) ? 1 : 0; +} + + +/* + Scan Union Expression + + SYNOPSYS + [18] UnionExpr ::= PathExpr + | UnionExpr '|' PathExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_PathExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_VLINE)) + { + Item *prev= xpath->item; + if (prev->fixed_type_handler() != &type_handler_xpath_nodeset) + return 0; + + if (!my_xpath_parse_PathExpr(xpath) + || xpath->item->fixed_type_handler() != &type_handler_xpath_nodeset) + { + xpath->error= 1; + return 0; + } + xpath->item= new (xpath->thd->mem_root) + Item_nodeset_func_union(xpath->thd, prev, xpath->item, + xpath->pxml); + } + return 1; +} + + +/* + Scan Path Expression + + SYNOPSYS + + [19] PathExpr ::= LocationPath + | FilterExpr + | FilterExpr '/' RelativeLocationPath + | FilterExpr '//' RelativeLocationPath + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(MY_XPATH *xpath) +{ + Item *context= xpath->context; + int rc; + + if (!my_xpath_parse_FilterExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 1; + + if (xpath->item->fixed_type_handler() != &type_handler_xpath_nodeset) + { + xpath->lasttok= xpath->prevtok; + xpath->error= 1; + return 0; + } + + /* + The context for the next relative path is the nodeset + returned by FilterExpr + */ + xpath->context= xpath->item; + + /* treat double slash (//) as /descendant-or-self::node()/ */ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + xpath->context= new (xpath->thd->mem_root) + Item_nodeset_func_descendantbyname(xpath->thd, + xpath->context, + "*", 1, + xpath->pxml, 1); + rc= my_xpath_parse_RelativeLocationPath(xpath); + + /* push back the context and restore the item */ + xpath->item= xpath->context; + xpath->context= context; + return rc; +} +static int my_xpath_parse_PathExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_LocationPath(xpath) || + my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); +} + + + +/* + Scan Filter Expression + + SYNOPSYS + [20] FilterExpr ::= PrimaryExpr + | FilterExpr Predicate + + or in other words: + + [20] FilterExpr ::= PrimaryExpr Predicate* + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_PrimaryExpr(xpath); +} + + +/* + Scan Or Expression + + SYNOPSYS + [21] OrExpr ::= AndExpr + | OrExpr 'or' AndExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_OrExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_OR)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_AndExpr(xpath)) + { + xpath->error= 1; + return 0; + } + xpath->item= new (xpath->thd->mem_root) + Item_cond_or(xpath->thd, nodeset2bool(xpath, prev), + nodeset2bool(xpath, xpath->item)); + } + return 1; +} + + +/* + Scan And Expression + + SYNOPSYS + [22] AndExpr ::= EqualityExpr + | AndExpr 'and' EqualityExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AndExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_AND)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_EqualityExpr(xpath)) + { + xpath->error= 1; + return 0; + } + + xpath->item= new (xpath->thd->mem_root) + Item_cond_and(xpath->thd, nodeset2bool(xpath, prev), + nodeset2bool(xpath, xpath->item)); + } + return 1; +} + + +/* + Scan Equality Expression + + SYNOPSYS + [23] EqualityExpr ::= RelationalExpr + | EqualityExpr '=' RelationalExpr + | EqualityExpr '!=' RelationalExpr + or in other words: + + [23] EqualityExpr ::= RelationalExpr ( EqualityOperator EqualityExpr )* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_ne(MY_XPATH *xpath) +{ + MY_XPATH_LEX prevtok= xpath->prevtok; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_EXCL)) + return 0; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ)) + { + /* Unget the exclamation mark */ + xpath->lasttok= xpath->prevtok; + xpath->prevtok= prevtok; + return 0; + } + return 1; +} +static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_ne(xpath)) + { + xpath->extra= '!'; + return 1; + } + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ)) + { + xpath->extra= '='; + return 1; + } + return 0; +} +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) +{ + MY_XPATH_LEX operator_context; + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + + operator_context= xpath->lasttok; + while (my_xpath_parse_EqualityOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + if (!my_xpath_parse_RelationalExpr(xpath)) + { + xpath->error= 1; + return 0; + } + + if (!(xpath->item= create_comparator(xpath, oper, &operator_context, + prev, xpath->item))) + return 0; + + operator_context= xpath->lasttok; + } + return 1; +} + + +/* + Scan Relational Expression + + SYNOPSYS + + [24] RelationalExpr ::= AdditiveExpr + | RelationalExpr '<' AdditiveExpr + | RelationalExpr '>' AdditiveExpr + | RelationalExpr '<=' AdditiveExpr + | RelationalExpr '>=' AdditiveExpr + or in other words: + + [24] RelationalExpr ::= AdditiveExpr (RelationalOperator RelationalExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_LESS)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_LE : MY_XPATH_LEX_LESS; + return 1; + } + else if (my_xpath_parse_term(xpath, MY_XPATH_LEX_GREATER)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_GE : MY_XPATH_LEX_GREATER; + return 1; + } + return 0; +} +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) +{ + MY_XPATH_LEX operator_context; + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + operator_context= xpath->lasttok; + while (my_xpath_parse_RelationalOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + + if (!my_xpath_parse_AdditiveExpr(xpath)) + { + xpath->error= 1; + return 0; + } + + if (!(xpath->item= create_comparator(xpath, oper, &operator_context, + prev, xpath->item))) + return 0; + operator_context= xpath->lasttok; + } + return 1; +} + + +/* + Scan Additive Expression + + SYNOPSYS + + [25] AdditiveExpr ::= MultiplicativeExpr + | AdditiveExpr '+' MultiplicativeExpr + | AdditiveExpr '-' MultiplicativeExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AdditiveOperator(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_PLUS) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS); +} +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + while (my_xpath_parse_AdditiveOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + THD *thd= xpath->thd; + + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + { + xpath->error= 1; + return 0; + } + + if (oper == MY_XPATH_LEX_PLUS) + xpath->item= new (thd->mem_root) + Item_func_plus(thd, prev, xpath->item); + else + xpath->item= new (thd->mem_root) + Item_func_minus(thd, prev, xpath->item); + }; + return 1; +} + + +/* + Scan Multiplicative Expression + + SYNOPSYS + + [26] MultiplicativeExpr ::= UnaryExpr + | MultiplicativeExpr MultiplyOperator UnaryExpr + | MultiplicativeExpr 'div' UnaryExpr + | MultiplicativeExpr 'mod' UnaryExpr + or in other words: + + [26] MultiplicativeExpr ::= UnaryExpr (MulOper MultiplicativeExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_MultiplicativeOperator(MY_XPATH *xpath) +{ + return + my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIV) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MOD); +} +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + + THD *thd= xpath->thd; + while (my_xpath_parse_MultiplicativeOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_UnaryExpr(xpath)) + { + xpath->error= 1; + return 0; + } + switch (oper) + { + case MY_XPATH_LEX_ASTERISK: + xpath->item= new (thd->mem_root) Item_func_mul(thd, prev, xpath->item); + break; + case MY_XPATH_LEX_DIV: + xpath->item= new (thd->mem_root) Item_func_int_div(thd, prev, xpath->item); + break; + case MY_XPATH_LEX_MOD: + xpath->item= new (thd->mem_root) Item_func_mod(thd, prev, xpath->item); + break; + } + } + return 1; +} + + +/* + Scan Unary Expression + + SYNOPSYS + + [27] UnaryExpr ::= UnionExpr + | '-' UnaryExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS)) + return my_xpath_parse_UnionExpr(xpath); + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + xpath->item= new (xpath->thd->mem_root) + Item_func_neg(xpath->thd, xpath->item); + return 1; +} + + +/** + A helper class to make a null-terminated string from XPath fragments. + The string is allocated on the THD memory root. +*/ +class XPath_cstring_null_terminated: public LEX_CSTRING +{ +public: + XPath_cstring_null_terminated(THD *thd, const char *str, size_t length) + { + if (thd->make_lex_string(this, str, length)) + static_cast(*this)= empty_clex_str; + } +}; + + +/* + Scan Number + + SYNOPSYS + + [30] Number ::= Digits ('.' Digits?)? | '.' Digits) + + or in other words: + + [30] Number ::= Digits + | Digits '.' + | Digits '.' Digits + | '.' Digits + + Note: the last rule is not supported yet, + as it is in conflict with abbreviated step. + 1 + .123 does not work, + 1 + 0.123 does. + Perhaps it is better to move this code into lex analyzer. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_Number(MY_XPATH *xpath) +{ + const char *beg; + THD *thd; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS)) + return 0; + beg= xpath->prevtok.beg; + thd= xpath->thd; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + { + XPath_cstring_null_terminated nr(thd, beg, xpath->prevtok.end - beg); + xpath->item= new (thd->mem_root) Item_int(thd, nr.str, (uint) nr.length); + } + else + { + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS); + XPath_cstring_null_terminated nr(thd, beg, xpath->prevtok.end - beg); + xpath->item= new (thd->mem_root) Item_float(thd, nr.str, (uint) nr.length); + } + return 1; +} + + +/* + Scan NCName. + + SYNOPSYS + + The keywords AND, OR, MOD, DIV are valid identitiers + when they are in identifier context: + + SELECT + ExtractValue('
VALUE
', + '/and/or/mod/div') + -> VALUE + + RETURN + 1 - success + 0 - failure +*/ + +static int +my_xpath_parse_NCName(MY_XPATH *xpath) +{ + return + my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_AND) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_OR) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MOD) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIV) ? 1 : 0; +} + + +/* + QName grammar can be found in a separate document + http://www.w3.org/TR/REC-xml-names/#NT-QName + + [6] QName ::= (Prefix ':')? LocalPart + [7] Prefix ::= NCName + [8] LocalPart ::= NCName +*/ + +static int +my_xpath_parse_QName(MY_XPATH *xpath) +{ + const char *beg; + if (!my_xpath_parse_NCName(xpath)) + return 0; + beg= xpath->prevtok.beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON)) + return 1; /* Non qualified name */ + if (!my_xpath_parse_NCName(xpath)) + return 0; + xpath->prevtok.beg= beg; + return 1; +} + + +/** + Scan Variable reference + + @details Implements parsing of two syntax structures: + + 1. Standard XPath syntax [36], for SP variables: + + VariableReference ::= '$' QName + + Finds a SP variable with the given name. + If outside of a SP context, or variable with + the given name doesn't exists, then error is returned. + + 2. Non-standard syntax - MySQL extension for user variables: + + VariableReference ::= '$' '@' QName + + Item, corresponding to the variable, is returned + in xpath->item in both cases. + + @param xpath pointer to XPath structure + + @return Operation status + @retval 1 Success + @retval 0 Failure +*/ + +static int +my_xpath_parse_VariableReference(MY_XPATH *xpath) +{ + LEX_CSTRING name; + THD *thd= xpath->thd; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOLLAR)) + return 0; + const char *dollar_pos= xpath->prevtok.beg; + if (!dollar_pos) + return 0; + int user_var= my_xpath_parse_term(xpath, MY_XPATH_LEX_AT); + if (!((user_var && + my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT))) && + !my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT)) + return 0; + + name.length= xpath->prevtok.end - xpath->prevtok.beg; + name.str= (char*) xpath->prevtok.beg; + + if (user_var) + xpath->item= new (thd->mem_root) Item_func_get_user_var(thd, &name); + else + { + sp_variable *spv; + const Sp_rcontext_handler *rh; + LEX *lex; + /* + We call lex->find_variable() rather than thd->lex->spcont->find_variable() + to make sure package body variables are properly supported. + */ + if ((lex= thd->lex) && + (spv= lex->find_variable(&name, &rh))) + { + Item_splocal *splocal= new (thd->mem_root) + Item_splocal(thd, rh, &name, spv->offset, spv->type_handler(), 0); +#ifdef DBUG_ASSERT_EXISTS + if (splocal) + splocal->m_sp= lex->sphead; +#endif + xpath->item= (Item*) splocal; + } + else + { + xpath->item= NULL; + DBUG_ASSERT(xpath->query.end > dollar_pos); + uint len= (uint)(xpath->query.end - dollar_pos); + if (len <= 32) + my_printf_error(ER_UNKNOWN_ERROR, "Unknown XPATH variable at: '%.*s'", + MYF(0), len, dollar_pos); + else + my_printf_error(ER_UNKNOWN_ERROR, "Unknown XPATH variable at: '%.32T'", + MYF(0), dollar_pos); + } + } + return xpath->item ? 1 : 0; +} + + +/* + Scan Name Test + + SYNOPSYS + + [37] NameTest ::= '*' + | NCName ':' '*' + | QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_NodeTest_QName(MY_XPATH *xpath) +{ + if (!my_xpath_parse_QName(xpath)) + return 0; + DBUG_ASSERT(xpath->context); + uint len= (uint)(xpath->prevtok.end - xpath->prevtok.beg); + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, + xpath->prevtok.beg, len); + return 1; +} +static int +my_xpath_parse_NodeTest_asterisk(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK)) + return 0; + DBUG_ASSERT(xpath->context); + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, "*", 1); + return 1; +} +static int +my_xpath_parse_NameTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NodeTest_asterisk(xpath) || + my_xpath_parse_NodeTest_QName(xpath); +} + + +/* + Scan an XPath expression + + SYNOPSYS + Scan xpath expression. + The expression is returned in xpath->expr. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse(MY_XPATH *xpath, const char *str, const char *strend) +{ + my_xpath_lex_init(&xpath->query, str, strend); + my_xpath_lex_init(&xpath->prevtok, str, strend); + my_xpath_lex_scan(xpath, &xpath->lasttok, str, strend); + + xpath->rootelement= new (xpath->thd->mem_root) + Item_nodeset_func_rootelement(xpath->thd, + xpath->pxml); + + return (my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF)); +} + + +bool Item_xml_str_func::fix_length_and_dec(THD *thd) +{ + max_length= MAX_BLOB_WIDTH; + return agg_arg_charsets_for_comparison(collation, args, arg_count); +} + + +bool Item_xml_str_func::fix_fields(THD *thd, Item **ref) +{ + String *xp; + MY_XPATH xpath; + int rc; + + if (Item_str_func::fix_fields(thd, ref)) + return true; + + status_var_increment(current_thd->status_var.feature_xml); + + nodeset_func= 0; + + + if (collation.collation->mbminlen > 1) + { + /* UCS2 is not supported */ + my_printf_error(ER_UNKNOWN_ERROR, + "Character set '%s' is not supported by XPATH", + MYF(0), collation.collation->cs_name.str); + return true; + } + + if (!args[1]->const_item()) + { + my_printf_error(ER_UNKNOWN_ERROR, + "Only constant XPATH queries are supported", MYF(0)); + return true; + } + + /* + Get the XPath query text from args[1] and cache it in m_xpath_query. + Its fragments will be referenced by items created during my_xpath_parse(), + e.g. by Item_nodeset_func_axisbyname::node_name. + */ + if (!(xp= args[1]->val_str(&m_xpath_query)) || + (xp != &m_xpath_query && m_xpath_query.copy(*xp))) + return false; // Will return NULL + my_xpath_init(&xpath); + xpath.thd= thd; + xpath.cs= collation.collation; + xpath.debug= 0; + xpath.pxml= xml.parsed(); + xml.set_charset(collation.collation); + + rc= my_xpath_parse(&xpath, xp->ptr(), xp->ptr() + xp->length()); + + if (!rc) + { + uint clen= (uint)(xpath.query.end - xpath.lasttok.beg); + if (clen <= 32) + my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%.*s'", + MYF(0), clen, xpath.lasttok.beg); + else + my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%.32T'", + MYF(0), xpath.lasttok.beg); + + return true; + } + + /* + Parsing XML is a heavy operation, so if the first argument is constant, + then parse XML only one time and cache the parsed representation + together with raw text representation. + + Note, we cannot cache the entire function result even if + the first and the second arguments are constants, because + the XPath expression may have user and SP variable references, + so the function result can vary between executions. + */ + if ((args[0]->const_item() && get_xml(&xml, true)) || + !(nodeset_func= xpath.item)) + return false; // Will return NULL + + return nodeset_func->fix_fields(thd, &nodeset_func); +} + + +#define MAX_LEVEL 256 +typedef struct +{ + uint level; + String *pxml; // parsed XML + uint pos[MAX_LEVEL]; // Tag position stack + uint parent; // Offset of the parent of the current node +} MY_XML_USER_DATA; + + +static bool +append_node(String *str, MY_XML_NODE *node) +{ + /* + If "str" doesn't have space for a new node, + it will allocate two times more space that it has had so far. + (2*len+512) is a heuristic value, + which gave the best performance during tests. + The ideas behind this formula are: + - It allows to have a very small number of reallocs: + about 10 reallocs on a 1Mb-long XML value. + - At the same time, it avoids excessive memory use. + */ + if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512)) + return TRUE; + str->q_append((const char*) node, sizeof(MY_XML_NODE)); + return FALSE; +} + + +/* + Process tag beginning + + SYNOPSYS + + A call-back function executed when XML parser + is entering a tag or an attribute. + Appends the new node into data->pxml. + Increments data->level. + + RETURN + Currently only MY_XML_OK +*/ +extern "C" int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len); + +int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + MY_XML_NODE node; + + node.parent= data->parent; // Set parent for the new node to old parent + data->parent= numnodes; // Remember current node as new parent + DBUG_ASSERT(data->level < MAX_LEVEL); + data->pos[data->level]= numnodes; + if (data->level < MAX_LEVEL - 1) + node.level= data->level++; + else + return MY_XML_ERROR; + node.type= st->current_node_type; // TAG or ATTR + node.beg= attr; + node.end= attr + len; + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK; +} + + +/* + Process text node + + SYNOPSYS + + A call-back function executed when XML parser + is entering into a tag or an attribute textual value. + The value is appended into data->pxml. + + RETURN + Currently only MY_XML_OK +*/ +extern "C" int xml_value(MY_XML_PARSER *st,const char *attr, size_t len); + +int xml_value(MY_XML_PARSER *st,const char *attr, size_t len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE node; + + node.parent= data->parent; // Set parent for the new text node to old parent + node.level= data->level; + node.type= MY_XML_NODE_TEXT; + node.beg= attr; + node.end= attr + len; + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK; +} + + +/* + Leave a tag or an attribute + + SYNOPSYS + + A call-back function executed when XML parser + is leaving a tag or an attribute. + Decrements data->level. + + RETURN + Currently only MY_XML_OK +*/ +extern "C" int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len); + +int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + DBUG_ASSERT(data->level > 0); + data->level--; + + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + data->parent= nodes[data->parent].parent; + nodes+= data->pos[data->level]; + nodes->tagend= st->cur; + + return MY_XML_OK; +} + + +/* + Parse raw XML + + SYNOPSYS + + RETURN + false on success + true on error +*/ +bool Item_xml_str_func::XML::parse() +{ + MY_XML_PARSER p; + MY_XML_USER_DATA user_data; + int rc; + + m_parsed_buf.length(0); + + /* Prepare XML parser */ + my_xml_parser_create(&p); + p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; + user_data.level= 0; + user_data.pxml= &m_parsed_buf; + user_data.parent= 0; + my_xml_set_enter_handler(&p, xml_enter); + my_xml_set_value_handler(&p, xml_value); + my_xml_set_leave_handler(&p, xml_leave); + my_xml_set_user_data(&p, (void*) &user_data); + + /* Add root node */ + p.current_node_type= MY_XML_NODE_TAG; + xml_enter(&p, m_raw_ptr->ptr(), 0); + + /* Execute XML parser */ + if ((rc= my_xml_parse(&p, m_raw_ptr->ptr(), m_raw_ptr->length())) != MY_XML_OK) + { + THD *thd= current_thd; + char buf[128]; + my_snprintf(buf, sizeof(buf)-1, "parse error at line %d pos %lu: %s", + my_xml_error_lineno(&p) + 1, + (ulong) my_xml_error_pos(&p) + 1, + my_xml_error_string(&p)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE, + ER_THD(thd, ER_WRONG_VALUE), "XML", buf); + m_raw_ptr= (String *) 0; + } + my_xml_parser_free(&p); + + return rc != MY_XML_OK; +} + + +/* + Parse the raw XML from the given source, + optionally cache the raw XML, + remember the pointer to the raw XML. +*/ +bool Item_xml_str_func::XML::parse(String *raw_xml, bool cache) +{ + m_raw_ptr= raw_xml; + if (cache) + { + m_cached= true; + if (m_raw_ptr != &m_raw_buf && m_raw_buf.copy(*m_raw_ptr)) + { + m_raw_ptr= (String *) 0; + return true; + } + m_raw_ptr= &m_raw_buf; + } + return parse(); +} + + +const MY_XML_NODE *Item_xml_str_func::XML::node(uint idx) +{ + const MY_XML_NODE *nodebeg= (MY_XML_NODE*) m_parsed_buf.ptr(); + DBUG_ASSERT(idx < m_parsed_buf.length() / sizeof (MY_XML_NODE)); + return nodebeg + idx; +} + + +String *Item_func_xml_extractvalue::val_str(String *str) +{ + String *res; + null_value= 0; + if (!nodeset_func || get_xml(&xml) || + !(res= nodeset_func->val_str(str))) + { + null_value= 1; + return 0; + } + return res; +} + + +bool Item_func_xml_update::collect_result(String *str, + const MY_XML_NODE *cut, + const String *replace) +{ + uint offs= cut->type == MY_XML_NODE_TAG ? 1 : 0; + const char *end= cut->tagend + offs; + str->length(0); + str->set_charset(collation.collation); + return + /* Put the XML part preceding the replaced piece */ + str->append(xml.raw()->ptr(), cut->beg - xml.raw()->ptr() - offs) || + /* Put the replacement */ + str->append(replace->ptr(), replace->length()) || + /* Put the XML part following the replaced piece */ + str->append(end, xml.raw()->ptr() + xml.raw()->length() - end); +} + + +String *Item_func_xml_update::val_str(String *str) +{ + String *rep; + + null_value= 0; + if (!nodeset_func || get_xml(&xml) || + !(rep= args[2]->val_str(&tmp_value3)) || + nodeset_func->type_handler() != &type_handler_xpath_nodeset || + nodeset_func->val_native(current_thd, &tmp_native_value2)) + { + null_value= 1; + return 0; + } + + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) tmp_native_value2.ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) tmp_native_value2.end(); + + /* Allow replacing of one tag only */ + if (fltend - fltbeg != 1) + { + /* TODO: perhaps add a warning that more than one tag selected */ + return xml.raw(); + } + + const MY_XML_NODE *nodebeg= xml.node(fltbeg->num); + + if (!nodebeg->level) + { + /* + Root element, without NameTest: + UpdateXML(xml, '/', 'replacement'); + Just return the replacement string. + */ + return rep; + } + + return collect_result(str, nodebeg, rep) ? (String *) NULL : str; +} diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h new file mode 100644 index 00000000..f6e153c9 --- /dev/null +++ b/sql/item_xmlfunc.h @@ -0,0 +1,165 @@ +#ifndef ITEM_XMLFUNC_INCLUDED +#define ITEM_XMLFUNC_INCLUDED + +/* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2019, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* This file defines all XML functions */ + + +typedef struct my_xml_node_st MY_XML_NODE; + + +/* Structure to store nodeset elements */ +class MY_XPATH_FLT +{ +public: + uint num; // Absolute position in MY_XML_NODE array + uint pos; // Relative position in context + uint size; // Context size +public: + MY_XPATH_FLT(uint32 num_arg, uint32 pos_arg) + :num(num_arg), pos(pos_arg), size(0) + { } + MY_XPATH_FLT(uint32 num_arg, uint32 pos_arg, uint32 size_arg) + :num(num_arg), pos(pos_arg), size(size_arg) + { } + bool append_to(Native *to) const + { + return to->append((const char*) this, (uint32) sizeof(*this)); + } +}; + + +class NativeNodesetBuffer: public NativeBuffer<16*sizeof(MY_XPATH_FLT)> +{ +public: + const MY_XPATH_FLT &element(uint i) const + { + const MY_XPATH_FLT *p= (MY_XPATH_FLT*) (ptr() + i * sizeof(MY_XPATH_FLT)); + return *p; + } + uint32 elements() const + { + return length() / sizeof(MY_XPATH_FLT); + } +}; + + +class Item_xml_str_func: public Item_str_func +{ +protected: + /* + A helper class to store raw and parsed XML. + */ + class XML + { + bool m_cached; + String *m_raw_ptr; // Pointer to text representation + String m_raw_buf; // Cached text representation + String m_parsed_buf; // Array of MY_XML_NODEs, pointing to raw_buffer + bool parse(); + void reset() + { + m_cached= false; + m_raw_ptr= (String *) 0; + } + public: + XML() { reset(); } + void set_charset(CHARSET_INFO *cs) { m_parsed_buf.set_charset(cs); } + String *raw() { return m_raw_ptr; } + String *parsed() { return &m_parsed_buf; } + const MY_XML_NODE *node(uint idx); + bool cached() { return m_cached; } + bool parse(String *raw, bool cache); + bool parse(Item *item, bool cache) + { + String *res; + if (!(res= item->val_str(&m_raw_buf))) + { + m_raw_ptr= (String *) 0; + m_cached= cache; + return true; + } + return parse(res, cache); + } + }; + String m_xpath_query; // XPath query text + Item *nodeset_func; + XML xml; + bool get_xml(XML *xml_arg, bool cache= false) + { + if (!cache && xml_arg->cached()) + return xml_arg->raw() == 0; + return xml_arg->parse(args[0], cache); + } +public: + Item_xml_str_func(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) + { + set_maybe_null(); + } + Item_xml_str_func(THD *thd, Item *a, Item *b, Item *c): + Item_str_func(thd, a, b, c) + { + set_maybe_null(); + } + bool fix_fields(THD *thd, Item **ref) override; + bool fix_length_and_dec(THD *thd) override; + bool const_item() const override + { + return const_item_cache && (!nodeset_func || nodeset_func->const_item()); + } +}; + + +class Item_func_xml_extractvalue: public Item_xml_str_func +{ +public: + Item_func_xml_extractvalue(THD *thd, Item *a, Item *b): + Item_xml_str_func(thd, a, b) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("extractvalue") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + + +class Item_func_xml_update: public Item_xml_str_func +{ + NativeNodesetBuffer tmp_native_value2; + String tmp_value3; + bool collect_result(String *str, + const MY_XML_NODE *cut, + const String *replace); +public: + Item_func_xml_update(THD *thd, Item *a, Item *b, Item *c): + Item_xml_str_func(thd, a, b, c) {} + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("updatexml") }; + return name; + } + String *val_str(String *) override; + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } +}; + +#endif /* ITEM_XMLFUNC_INCLUDED */ diff --git a/sql/json_table.cc b/sql/json_table.cc new file mode 100644 index 00000000..4f3cfb6b --- /dev/null +++ b/sql/json_table.cc @@ -0,0 +1,1478 @@ +/* + Copyright (c) 2020, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" /* TMP_TABLE_PARAM */ +#include "table.h" +#include "sql_type_json.h" +#include "item_jsonfunc.h" +#include "json_table.h" +#include "sql_show.h" +#include "sql_select.h" +#include "create_tmp_table.h" +#include "sql_parse.h" + +#define HA_ERR_JSON_TABLE (HA_ERR_LAST+1) + +/* + Allocating memory and *also* using it (reading and + writing from it) because some build instructions cause + compiler to optimize out stack_used_up. Since alloca() + here depends on stack_used_up, it doesnt get executed + correctly and causes json_debug_nonembedded to fail + ( --error ER_STACK_OVERRUN_NEED_MORE does not occur). +*/ +#define ALLOCATE_MEM_ON_STACK(A) do \ + { \ + uchar *array= (uchar*)alloca(A); \ + array[0]= 1; \ + array[0]++; \ + array[0] ? array[0]++ : array[0]--; \ + } while(0) + +class table_function_handlerton +{ +public: + handlerton m_hton; + table_function_handlerton() + { + bzero(&m_hton, sizeof(m_hton)); + m_hton.tablefile_extensions= hton_no_exts; + m_hton.slot= HA_SLOT_UNDEF; + } +}; + + +static table_function_handlerton table_function_hton; + +/* + @brief + Collect a set of tables that a given table function cannot have + references to. + + @param + table_func The table function we are connecting info for + join_list The nested join to be processed + disallowed_tables Collect the tables here. + + @detail + According to the SQL standard, a table function can refer to any table + that's "preceding" it in the FROM clause. + + The other limitation we would like to enforce is that the inner side of + an outer join cannot refer to the outer side. An example: + + SELECT * from JSON_TABLE(t1.col, ...) left join t1 on ... + + This function implements both of the above restrictions. + + Basic idea: the "join_list" contains the tables in the order that's a + reverse of the order they were specified in the query. + If we walk the join_list, we will encounter: + 1. First, the tables that table function cannot refer to (collect them in a + bitmap) + 2. Then, the table function itself (put it in the bitmap, too, as self- + references are not allowed, and stop the walk) + 3. Tables that the table function CAN refer to (we don't walk these as + we've stopped on step #2). + + The above can be applied recursively for nested joins (this covers NATURAL + JOIN, and JOIN ... USING constructs). + + Enforcing the "refer to only preceding tables" rule means that outer side + of LEFT JOIN cannot refer to the inner side. + + Handing RIGHT JOINs: There are no RIGHT JOINs in the join_list data + structures. They were converted to LEFT JOINs (see calls to st_select_lex:: + convert_right_join). This conversion changes the order of tables, but + we are ok with operating on the tables "in the left join order". + + @return + 0 - Continue + 1 - Finish the process, success + -1 - Finish the process, failure +*/ + +static +int get_disallowed_table_deps_for_list(MEM_ROOT *mem_root, + TABLE_LIST *table_func, + List *join_list, + List *disallowed_tables) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + List_iterator li(*join_list); + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + while ((table= li++)) + { + if ((nested_join= table->nested_join)) + { + int res; + if ((res= get_disallowed_table_deps_for_list(mem_root, table_func, + &nested_join->join_list, + disallowed_tables))) + return res; + } + else + { + if (disallowed_tables->push_back(table, mem_root)) + return -1; + if (table == table_func) + { + // This is the JSON_TABLE(...) that are we're computing dependencies + // for. + return 1; // Finish the processing + } + } + } + return 0; // Continue +} + + +/* + @brief + Given a join and a table function in it (specified by its table_func_bit), + produce a bitmap of tables that the table function can NOT have references + to. + + @detail + See get_disallowed_table_deps_for_list + + @return + NULL - Out of memory + Other - A list of tables that the function cannot have references to. May + be empty. +*/ + +static +List* get_disallowed_table_deps(MEM_ROOT *mem_root, + SELECT_LEX *select, + TABLE_LIST *table_func) +{ + List *disallowed_tables; + + if (!(disallowed_tables = new (mem_root) List)) + return NULL; + + int res= get_disallowed_table_deps_for_list(mem_root, table_func, + select->join_list, + disallowed_tables); + + // The collection process must have finished + DBUG_ASSERT(res != 0); + + if (res == -1) + return NULL; // Out of memory + + return disallowed_tables; +} + + +/* + A table that produces output rows for JSON_TABLE(). +*/ + +class ha_json_table: public handler +{ + Table_function_json_table *m_jt; + + String *m_js; // The JSON document we're reading + String m_tmps; // Buffer for the above + + int fill_column_values(THD *thd, uchar * buf, uchar *pos); + +public: + ha_json_table(TABLE_SHARE *share_arg, Table_function_json_table *jt): + handler(&table_function_hton.m_hton, share_arg), m_jt(jt) + { + /* + set the mark_trx_read_write_done to avoid the + handler::mark_trx_read_write_internal() call. + It relies on &ha_thd()->ha_data[ht->slot].ha_info[0] to be set. + But we don't set the ha_data for the ha_json_table, and + that call makes no sence for ha_json_table. + */ + mark_trx_read_write_done= 1; + + /* See ha_json_table::position for format definition */ + ref_length= m_jt->m_columns.elements * 4; + } + ~ha_json_table() {} + handler *clone(const char *name, MEM_ROOT *mem_root) override { return NULL; } + /* Rows also use a fixed-size format */ + enum row_type get_row_type() const override { return ROW_TYPE_FIXED; } + const char *table_type() const override + { + return "JSON_TABLE function"; + } + ulonglong table_flags() const override + { + return (HA_FAST_KEY_READ | /*HA_NO_BLOBS |*/ HA_NULL_IN_KEY | + HA_CAN_SQL_HANDLER | + HA_REC_NOT_IN_SEQ | HA_NO_TRANSACTIONS | + HA_HAS_RECORDS); + } + ulong index_flags(uint inx, uint part, bool all_parts) const override + { + return HA_ONLY_WHOLE_INDEX | HA_KEY_SCAN_NOT_ROR; + } + ha_rows records() override { return HA_POS_ERROR; } + + int open(const char *name, int mode, uint test_if_locked) override + { return 0; } + int close(void) override { return 0; } + int rnd_init(bool scan) override; + int rnd_next(uchar *buf) override; + int rnd_pos(uchar * buf, uchar *pos) override; + void position(const uchar *record) override; + int info(uint) override; + int extra(enum ha_extra_function operation) override { return 0; } + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) override + { return NULL; } + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) + override { return 1; } + /* Give no message. */ + bool get_error_message(int error, String *buf) override + { + buf->length(0); + return TRUE; + } +}; + + +/* + Helper class that creates the temporary table that + represents the table function in the query. +*/ + +class Create_json_table final: public Create_tmp_table +{ +public: + Create_json_table() : + Create_tmp_table((ORDER*) 0, 0, 0, 0, 0) + {} + virtual ~Create_json_table() {}; + TABLE *start(THD *thd, + TMP_TABLE_PARAM *param, + Table_function_json_table *jt, + const LEX_CSTRING *table_alias); + bool choose_engine(THD *thd, TABLE *table, TMP_TABLE_PARAM *param) override + { + return 0; // Engine already choosen + } + bool add_json_table_fields(THD *thd, TABLE *table, + Table_function_json_table *jt); + bool finalize(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, + Table_function_json_table *jt); +}; + + +/* + @brief + Start scanning the JSON document in [str ... end] + + @detail + Note: non-root nested paths are set to scan one JSON node (that is, a + "subdocument"). +*/ + +void Json_table_nested_path::scan_start(CHARSET_INFO *i_cs, + const uchar *str, const uchar *end) +{ + json_get_path_start(&m_engine, i_cs, str, end, &m_cur_path); + m_cur_nested= NULL; + m_null= false; + m_ordinality_counter= 0; +} + + +/* + @brief + Find the next JSON element that matches the search path. +*/ + +int Json_table_nested_path::scan_next() +{ + bool no_records_found= false; + if (m_cur_nested) + { + for (;;) + { + if (m_cur_nested->scan_next() == 0) + return 0; + if (!(m_cur_nested= m_cur_nested->m_next_nested)) + break; +handle_new_nested: + m_cur_nested->scan_start(m_engine.s.cs, m_engine.value_begin, + m_engine.s.str_end); + } + if (no_records_found) + return 0; + } + + DBUG_ASSERT(!m_cur_nested); + + while (!json_get_path_next(&m_engine, &m_cur_path)) + { + if (json_path_compare(&m_path, &m_cur_path, m_engine.value_type, + NULL)) + continue; + /* path found. */ + ++m_ordinality_counter; + + if (!m_nested) + return 0; + + m_cur_nested= m_nested; + no_records_found= true; + goto handle_new_nested; + } + + m_null= true; + return 1; +} + + +int ha_json_table::rnd_init(bool scan) +{ + Json_table_nested_path &p= m_jt->m_nested_path; + DBUG_ENTER("ha_json_table::rnd_init"); + + if ((m_js= m_jt->m_json->val_str(&m_tmps))) + { + p.scan_start(m_js->charset(), + (const uchar *) m_js->ptr(), (const uchar *) m_js->end()); + } + + DBUG_RETURN(0); +} + + +/* + @brief + Store JSON value in an SQL field, doing necessary special conversions + for JSON's null, true, and false. +*/ + +static void store_json_in_field(Field *f, const json_engine_t *je) +{ + switch (je->value_type) + { + case JSON_VALUE_NULL: + f->set_null(); + return; + + case JSON_VALUE_TRUE: + case JSON_VALUE_FALSE: + { + Item_result rt= f->result_type(); + if (rt == INT_RESULT || rt == DECIMAL_RESULT || rt == REAL_RESULT) + { + f->store(je->value_type == JSON_VALUE_TRUE, false); + return; + } + break; + } + default: + break; + }; + f->store((const char *) je->value, (uint32) je->value_len, je->s.cs); +} + + +static int store_json_in_json(Field *f, json_engine_t *je) +{ + const uchar *from= je->value_begin; + const uchar *to; + + if (json_value_scalar(je)) + to= je->value_end; + else + { + int error; + if ((error= json_skip_level(je))) + return error; + to= je->s.c_str; + } + f->store((const char *) from, (uint32) (to - from), je->s.cs); + return 0; +} + + +bool Json_table_nested_path::check_error(const char *str) +{ + if (m_engine.s.error) + { + report_json_error_ex(str, &m_engine, "JSON_TABLE", 0, + Sql_condition::WARN_LEVEL_ERROR); + return true; // Error + } + return false; // Ok +} + + +int ha_json_table::rnd_next(uchar *buf) +{ + if (!m_js) + return HA_ERR_END_OF_FILE; + + /* + Step 1: Move the root nested path to the next record (this implies moving + its child nested paths accordingly) + */ + if (m_jt->m_nested_path.scan_next()) + { + if (m_jt->m_nested_path.check_error(m_js->ptr())) + { + /* + We already reported an error, so returning an + error code that just doesn't produce extra + messages. + */ + return HA_ERR_JSON_TABLE; + } + return HA_ERR_END_OF_FILE; + } + + /* + Step 2: Read values for all columns (the columns refer to nested paths + they are in). + */ + return fill_column_values(table->in_use, buf, NULL) ? HA_ERR_JSON_TABLE : 0; +} + + +/* + @brief + Fill values of table columns, taking data either from Json_nested_path + objects, or from the rowid value + + @param pos NULL means the data should be read from Json_nested_path + objects. + Non-null value is a pointer to previously saved rowid (see + ha_json_table::position() for description) +*/ + +int ha_json_table::fill_column_values(THD *thd, uchar * buf, uchar *pos) +{ + MY_BITMAP *orig_map= dbug_tmp_use_all_columns(table, &table->write_set); + int error= 0; + Counting_error_handler er_handler; + Field **f= table->field; + Json_table_column *jc; + List_iterator_fast jc_i(m_jt->m_columns); + my_ptrdiff_t ptrdiff= buf - table->record[0]; + Abort_on_warning_instant_set ao_set(table->in_use, FALSE); + enum_check_fields cf_orig= table->in_use->count_cuted_fields; + + table->in_use->count_cuted_fields= CHECK_FIELD_ERROR_FOR_NULL; + + thd->push_internal_handler(&er_handler); + + while (!error && (jc= jc_i++)) + { + bool is_null_value; + uint int_pos= 0; /* just to make compilers happy. */ + + if (!bitmap_is_set(table->read_set, (*f)->field_index)) + { + /* + If the RESPONSE_ERROR is set for the column, we have + to unpack it even if it's not in the read_set - to check + for possible errors. + */ + if (jc->m_on_empty.m_response != Json_table_column::RESPONSE_ERROR && + jc->m_on_error.m_response != Json_table_column::RESPONSE_ERROR) + goto cont_loop; + } + + (*f)->move_field_offset(ptrdiff); + + /* + Read the NULL flag: + - if we are reading from a rowid value, 0 means SQL NULL. + - if scanning json document, read it from the nested path + */ + if (pos) + is_null_value= !(int_pos= uint4korr(pos)); + else + is_null_value= jc->m_nest->m_null; + + if (is_null_value) + { + (*f)->set_null(); + } + else + { + (*f)->set_notnull(); + switch (jc->m_column_type) + { + case Json_table_column::FOR_ORDINALITY: + { + /* + Read the cardinality counter: + - read it from nested path when scanning the json document + - or, read it from rowid when in rnd_pos() call + */ + longlong counter= pos? int_pos: jc->m_nest->m_ordinality_counter; + (*f)->store(counter, TRUE); + break; + } + case Json_table_column::PATH: + case Json_table_column::EXISTS_PATH: + { + json_engine_t je; + json_path_step_t *cur_step; + int array_counters[JSON_DEPTH_LIMIT]; + int not_found; + const uchar* node_start; + const uchar* node_end; + + /* + Get the JSON context node that we will need to evaluate PATH or + EXISTS against: + - when scanning the json document, read it from nested path + - when in rnd_pos call, the rowid has the start offset. + */ + if (pos) + { + node_start= (const uchar *) (m_js->ptr() + (int_pos-1)); + node_end= (const uchar *) m_js->end(); + } + else + { + node_start= jc->m_nest->get_value(); + node_end= jc->m_nest->get_value_end(); + } + + json_scan_start(&je, m_js->charset(), node_start, node_end); + + cur_step= jc->m_path.steps; + not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) || + json_read_value(&je); + + if (jc->m_column_type == Json_table_column::EXISTS_PATH) + { + (*f)->store(!not_found); + } + else /*PATH*/ + { + if (not_found) + { + error= jc->m_on_empty.respond(jc, *f, ER_JSON_TABLE_ERROR_ON_FIELD); + } + else + { + if (jc->m_format_json) + { + if (!(error= store_json_in_json(*f, &je))) + error= er_handler.errors; + } + else if (!(error= !json_value_scalar(&je))) + { + store_json_in_field(*f, &je); + error= er_handler.errors; + } + + if (error) + { + error= jc->m_on_error.respond(jc, *f, + ER_JSON_TABLE_SCALAR_EXPECTED); + er_handler.errors= 0; + } + else + { + /* + If the path contains wildcards, check if there are + more matches for it in json and report an error if so. + */ + if (jc->m_path.types_used & + (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | + JSON_PATH_ARRAY_RANGE) && + (json_scan_next(&je) || + !json_find_path(&je, &jc->m_path, &cur_step, + array_counters))) + { + error= jc->m_on_error.respond(jc, *f, + ER_JSON_TABLE_MULTIPLE_MATCHES); + } + } + } + } + break; + } + }; + } + (*f)->move_field_offset(-ptrdiff); + +cont_loop: + f++; + if (pos) + pos+= 4; + } + + dbug_tmp_restore_column_map(&table->write_set, orig_map); + thd->pop_internal_handler(); + thd->count_cuted_fields= cf_orig; + return error; +} + + +int ha_json_table::rnd_pos(uchar * buf, uchar *pos) +{ + return fill_column_values(table->in_use, buf, pos) ? HA_ERR_JSON_TABLE : 0; +} + + +/* + The reference has 4 bytes for every column of the JSON_TABLE. + There it keeps 0 for the NULL values, ordinality index for + the ORDINALITY columns and the offset of the field's data in + the JSON for other column types. +*/ +void ha_json_table::position(const uchar *record) +{ + uchar *c_ref= ref; + Json_table_column *jc; + List_iterator_fast jc_i(m_jt->m_columns); + + while ((jc= jc_i++)) + { + if (jc->m_nest->m_null) + { + int4store(c_ref, 0); + } + else + { + switch (jc->m_column_type) + { + case Json_table_column::FOR_ORDINALITY: + int4store(c_ref, jc->m_nest->m_ordinality_counter); + break; + case Json_table_column::PATH: + case Json_table_column::EXISTS_PATH: + { + size_t pos= jc->m_nest->get_value() - + (const uchar *) m_js->ptr() + 1; + int4store(c_ref, pos); + break; + } + }; + } + c_ref+= 4; + } +} + + +int ha_json_table::info(uint) +{ + /* + We don't want 0 or 1 in stats.records. + Though this value shouldn't matter as the optimizer + supposed to use Table_function_json_table::get_estimates + to obtain this data. + */ + stats.records= 4; + return 0; +} + + +/** + Create a json table according to a field list. + + @param thd thread handle + @param param a description used as input to create the table + @param jt json_table specificaion + @param table_alias alias +*/ + +TABLE *Create_json_table::start(THD *thd, + TMP_TABLE_PARAM *param, + Table_function_json_table *jt, + const LEX_CSTRING *table_alias) +{ + TABLE *table; + TABLE_SHARE *share; + DBUG_ENTER("Create_json_table::start"); + + param->tmp_name= "json"; + if (!(table= Create_tmp_table::start(thd, param, table_alias))) + DBUG_RETURN(0); + share= table->s; + share->not_usable_by_query_cache= FALSE; + share->db_plugin= NULL; + if (!(table->file= new (&table->mem_root) ha_json_table(share, jt))) + DBUG_RETURN(NULL); + table->file->init(); + DBUG_RETURN(table); +} + + +bool Create_json_table::finalize(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param, + Table_function_json_table *jt) +{ + DBUG_ENTER("Create_json_table::finalize"); + DBUG_ASSERT(table); + + if (Create_tmp_table::finalize(thd, table, param, 1, 0)) + DBUG_RETURN(true); + + table->db_stat= HA_OPEN_KEYFILE; + if (unlikely(table->file->ha_open(table, table->s->path.str, O_RDWR, + HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE))) + DBUG_RETURN(true); + + table->set_created(); + table->s->max_rows= ~(ha_rows) 0; + param->end_write_records= HA_POS_ERROR; + DBUG_RETURN(0); +} + + +/* + @brief + Read the JSON_TABLE's field definitions from @jt and add the fields to + table @table. +*/ + +bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table, + Table_function_json_table *jt) +{ + TABLE_SHARE *share= table->s; + Json_table_column *jc; + uint fieldnr= 0; + MEM_ROOT *mem_root_save= thd->mem_root; + List_iterator_fast jc_i(jt->m_columns); + Column_derived_attributes da(&my_charset_utf8mb4_general_ci); + DBUG_ENTER("add_json_table_fields"); + + thd->mem_root= &table->mem_root; + current_counter= other; + + while ((jc= jc_i++)) + { + Create_field *sql_f= jc->m_field; + List_iterator_fast it2(jt->m_columns); + Json_table_column *jc2; + /* + Initialize length from its original value (number of characters), + which was set in the parser. This is necessary if we're + executing a prepared statement for the second time. + */ + sql_f->length= sql_f->char_length; + + if (sql_f->prepare_stage1(thd, thd->mem_root, + COLUMN_DEFINITION_TABLE_FIELD, + &da)) + goto err_exit; + + while ((jc2= it2++) != jc) + { + if (lex_string_cmp(system_charset_info, + &sql_f->field_name, &jc2->m_field->field_name) == 0) + { + my_error(ER_DUP_FIELDNAME, MYF(0), sql_f->field_name.str); + goto err_exit; + } + } + it2.rewind(); + } + + jc_i.rewind(); + + while ((jc= jc_i++)) + { + Create_field *sql_f= jc->m_field; + Record_addr addr(!(sql_f->flags & NOT_NULL_FLAG)); + Bit_addr bit(addr.null()); + uint uneven_delta; + + sql_f->prepare_stage2(table->file, table->file->ha_table_flags()); + + if (!sql_f->charset) + sql_f->charset= &my_charset_utf8mb4_bin; + + Field *f= sql_f->type_handler()->make_table_field_from_def(share, + thd->mem_root, &sql_f->field_name, addr, bit, sql_f, sql_f->flags); + if (!f) + goto err_exit; + f->init(table); + uneven_delta= m_uneven_bit_length; + add_field(table, f, fieldnr++, 0); + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + } + + share->fields= fieldnr; + share->blob_fields= m_blob_count; + table->field[fieldnr]= 0; // End marker + share->blob_field[m_blob_count]= 0; // End marker + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + thd->mem_root= mem_root_save; + + DBUG_RETURN(FALSE); +err_exit: + thd->mem_root= mem_root_save; + DBUG_RETURN(TRUE); +} + + +/* + @brief + Given a TABLE_LIST representing JSON_TABLE(...) syntax, create a temporary + table for it. + + @detail + The temporary table will have: + - fields whose names/datatypes are specified in JSON_TABLE(...) syntax + - a ha_json_table as the storage engine. + + The uses of the temporary table are: + - name resolution: the query may have references to the columns of + JSON_TABLE(...). A TABLE object will allow to resolve them. + - query execution: ha_json_table will produce JSON_TABLE's rows. +*/ + +TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table) +{ + TMP_TABLE_PARAM tp; + TABLE *table; + uint field_count= sql_table->table_function->m_columns.elements+1; + + DBUG_ENTER("create_table_for_function"); + + tp.init(); + tp.table_charset= system_charset_info; + tp.field_count= field_count; + { + Create_json_table maker; + + if (!(table= maker.start(thd, &tp, + sql_table->table_function, &sql_table->alias)) || + maker.add_json_table_fields(thd, table, sql_table->table_function) || + maker.finalize(thd, table, &tp, sql_table->table_function)) + { + if (table) + free_tmp_table(thd, table); + DBUG_RETURN(NULL); + } + } + sql_table->schema_table_name.length= 0; + + my_bitmap_map* bitmaps= + (my_bitmap_map*) thd->alloc(bitmap_buffer_size(field_count)); + my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count); + table->read_set= &table->def_read_set; + bitmap_clear_all(table->read_set); + table->alias_name_used= true; + table->next= thd->derived_tables; + thd->derived_tables= table; + table->s->tmp_table= INTERNAL_TMP_TABLE; + table->grant.privilege= SELECT_ACL; + + sql_table->table= table; + + DBUG_RETURN(table); +} + + +int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + CHARSET_INFO *cs) +{ + set(ctype); + m_explicit_cs= cs; + if (json_path_setup(&m_path, thd->variables.collation_connection, + (const uchar *) path.str, (const uchar *)(path.str + path.length))) + { + report_path_error_ex(path.str, &m_path, "JSON_TABLE", 1, + Sql_condition::WARN_LEVEL_ERROR); + return 1; + } + + /* + This is done so the ::print function can just print the path string. + Can be removed if we redo that function to print the path using it's + anctual content. Not sure though if we should. + */ + m_path.s.c_str= (const uchar *) path.str; + + if (ctype == PATH) + m_format_json= m_field->type_handler() == &type_handler_long_blob_json; + + return 0; +} + + +int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + const Lex_column_charset_collation_attrs_st &cl) +{ + if (cl.is_empty() || cl.is_contextually_typed_collate_default()) + return set(thd, ctype, path, nullptr); + + CHARSET_INFO *tmp; + if (!(tmp= cl.resolved_to_character_set(&my_charset_utf8mb4_general_ci))) + return 1; + return set(thd, ctype, path, tmp); +} + + +static int print_path(String *str, const json_path_t *p) +{ + return str->append('\'') || + str->append_for_single_quote((const char *) p->s.c_str, + p->s.str_end - p->s.c_str) || + str->append('\''); +} + + +/* + Print the string representation of the Json_table_column. + + @param thd - the thread + @param f - the remaining array of Field-s from the table + if the Json_table_column + @param str - the string where to print +*/ +int Json_table_column::print(THD *thd, Field **f, String *str) +{ + StringBuffer column_type(str->charset()); + + if (append_identifier(thd, str, &m_field->field_name) || + str->append(' ')) + return 1; + + switch (m_column_type) + { + case FOR_ORDINALITY: + if (str->append(STRING_WITH_LEN("FOR ORDINALITY"))) + return 1; + break; + case EXISTS_PATH: + case PATH: + { + static const LEX_CSTRING path= { STRING_WITH_LEN(" PATH ") }; + static const LEX_CSTRING exists_path= { STRING_WITH_LEN(" EXISTS PATH ") }; + + (*f)->sql_type(column_type); + + if (str->append(column_type) || + ((*f)->has_charset() && m_explicit_cs && + (str->append(STRING_WITH_LEN(" CHARSET ")) || + str->append(&m_explicit_cs->cs_name) || + (Charset(m_explicit_cs).can_have_collate_clause() && + (str->append(STRING_WITH_LEN(" COLLATE ")) || + str->append(&m_explicit_cs->coll_name))))) || + str->append(m_column_type == PATH ? &path : &exists_path) || + print_path(str, &m_path)) + return 1; + break; + } + }; + + if (m_on_empty.print("EMPTY", str) || + m_on_error.print("ERROR", str)) + return 1; + + return 0; +} + + +int Json_table_nested_path::set_path(THD *thd, const LEX_CSTRING &path) +{ + if (json_path_setup(&m_path, thd->variables.collation_connection, + (const uchar *) path.str, (const uchar *)(path.str + path.length))) + { + report_path_error_ex(path.str, &m_path, "JSON_TABLE", 1, + Sql_condition::WARN_LEVEL_ERROR); + return 1; + } + + /* + This is done so the ::print function can just print the path string. + Can be removed if we redo that function to print the path using its + actual content. Not sure though if we should. + */ + m_path.s.c_str= (const uchar *) path.str; + return 0; +} + + +/* + @brief + Perform the action of this response on field @f (emit an error, or set @f + to NULL, or set it to default value). + error_num supposed to have the error message with field_name and table_name + arguments. +*/ + +int Json_table_column::On_response::respond(Json_table_column *jc, Field *f, + uint error_num) +{ + switch (m_response) + { + case Json_table_column::RESPONSE_NOT_SPECIFIED: + case Json_table_column::RESPONSE_NULL: + f->set_null(); + break; + case Json_table_column::RESPONSE_ERROR: + f->set_null(); + my_error(error_num, MYF(0), f->field_name.str, f->table->alias.ptr()); + return 1; + case Json_table_column::RESPONSE_DEFAULT: + f->set_notnull(); + f->store(m_default.str, + m_default.length, jc->m_defaults_cs); + break; + } + return 0; +} + + +int Json_table_column::On_response::print(const char *name, String *str) const +{ + LEX_CSTRING resp; + const LEX_CSTRING *ds= NULL; + if (m_response == Json_table_column::RESPONSE_NOT_SPECIFIED) + return 0; + + switch (m_response) + { + case Json_table_column::RESPONSE_NULL: + lex_string_set3(&resp, STRING_WITH_LEN("NULL")); + break; + case Json_table_column::RESPONSE_ERROR: + lex_string_set3(&resp, STRING_WITH_LEN("ERROR")); + break; + case Json_table_column::RESPONSE_DEFAULT: + { + lex_string_set3(&resp, STRING_WITH_LEN("DEFAULT")); + ds= &m_default; + break; + } + default: + lex_string_set3(&resp, "", 0); + DBUG_ASSERT(FALSE); /* should never happen. */ + } + + return (str->append(' ') || str->append(resp) || + (ds && (str->append(STRING_WITH_LEN(" '")) || + str->append_for_single_quote(ds->str, ds->length) || + str->append('\''))) || + str->append(STRING_WITH_LEN(" ON ")) || + str->append(name, strlen(name))); +} + + +void Table_function_json_table::start_nested_path(Json_table_nested_path *np) +{ + np->m_parent= cur_parent; + *last_sibling_hook= np; + + // Make the newly added path the parent + cur_parent= np; + last_sibling_hook= &np->m_nested; +} + + +void Table_function_json_table::end_nested_path() +{ + last_sibling_hook= &cur_parent->m_next_nested; + cur_parent= cur_parent->m_parent; +} + + +/* + @brief Create a name resolution context for doing name resolution in table + function argument. + + @seealso + push_new_name_resolution_context +*/ + +bool push_table_function_arg_context(LEX *lex, MEM_ROOT *alloc) +{ + // Walk the context stack until we find a context that is used for resolving + // the SELECT's WHERE clause. + List_iterator it(lex->context_stack); + Name_resolution_context *ctx; + while ((ctx= it++)) + { + if (ctx->select_lex && ctx == &ctx->select_lex->context) + break; + } + DBUG_ASSERT(ctx); + + // Then, create a copy of it and return it. + Name_resolution_context *new_ctx= new (alloc) Name_resolution_context; + + // Note: not all fields of *ctx are initialized yet at this point. + // We will get all of the fields filled in Table_function_json_table::setup + // (search for the "Prepare the name resolution context" comment). + *new_ctx= *ctx; + return lex->push_context(new_ctx); +} + + +/* + @brief + Perform name-resolution phase tasks + + @detail + The only argument that needs name resolution is the first parameter which + has the JSON text: + + JSON_TABLE(json_doc, ... ) + + The argument may refer to other tables and uses special name resolution + rules (see get_disallowed_table_deps_for_list for details). This function + sets up Name_resolution_context object appropriately before calling + fix_fields for the argument. + + @return + false OK + true Fatal error +*/ + +bool Table_function_json_table::setup(THD *thd, TABLE_LIST *sql_table, + SELECT_LEX *s_lex) +{ + thd->where= "JSON_TABLE argument"; + + if (!m_context_setup_done) + { + m_context_setup_done= true; + // Prepare the name resolution context. First, copy the context that is + // used for name resolution of the WHERE clause + *m_context= s_lex->context; + + // Then, restrict it to only allow to refer to tables that come before the + // table function reference + if (!(m_context->ignored_tables= + get_disallowed_table_deps(thd->stmt_arena->mem_root, s_lex, + sql_table))) + return TRUE; // Error + } + + bool save_is_item_list_lookup; + save_is_item_list_lookup= s_lex->is_item_list_lookup; + s_lex->is_item_list_lookup= 0; + + // Do the same what setup_without_group() does: do not count the referred + // fields in non_agg_field_used: + const bool saved_non_agg_field_used= s_lex->non_agg_field_used(); + + bool res= m_json->fix_fields_if_needed_for_scalar(thd, &m_json); + + s_lex->is_item_list_lookup= save_is_item_list_lookup; + s_lex->set_non_agg_field_used(saved_non_agg_field_used); + + if (res) + return TRUE; // Error + + return FALSE; +} + +int Table_function_json_table::walk_items(Item_processor processor, + bool walk_subquery, void *argument) +{ + return m_json->walk(processor, walk_subquery, argument); +} + +void Table_function_json_table::get_estimates(ha_rows *out_rows, + double *scan_time, + double *startup_cost) +{ + *out_rows= 40; + *scan_time= 0.0; + *startup_cost= 0.0; +} + + +/* + Check if a column belongs to the nested path + or a path that nested into it. + It only supposed to be used in the Json_table_nested_path::print, and + since the nested path should have at least one field we + don't have to loop through the m_next_nested. +*/ +bool Json_table_nested_path::column_in_this_or_nested( + const Json_table_nested_path *p, const Json_table_column *jc) +{ + for (; p; p= p->m_nested) + { + if (jc->m_nest == p) + return TRUE; + } + return FALSE; +} + + +/* + Print the string representation of the Json_nested_path object. + Which is the COLUMNS(...) part of the JSON_TABLE definition. + + @param thd - the thread + @param f - the remaining part of the array of Field* objects + taken from the TABLE. + It's needed as Json_table_column objects + don't have links to the related Field-s. + @param str - the string where to print + @param it - the remaining part of the Json_table_column list + @param last_column - the last column taken from the list. +*/ + +int Json_table_nested_path::print(THD *thd, Field ***f, String *str, + List_iterator_fast &it, + Json_table_column **last_column) +{ + Json_table_nested_path *c_path= this; + Json_table_nested_path *c_nested= m_nested; + Json_table_column *jc= *last_column; + bool first_column= TRUE; + + if (str->append(STRING_WITH_LEN("COLUMNS ("))) + return 1; + + /* loop while jc belongs to the current or nested paths. */ + while(jc && + (jc->m_nest == c_path || column_in_this_or_nested(c_nested, jc))) + { + if (first_column) + first_column= FALSE; + else if (str->append(STRING_WITH_LEN(", "))) + return 1; + + if (jc->m_nest == c_path) + { + if (jc->print(thd, *f, str)) + return 1; + if ((jc= it++)) + ++(*f); + } + else + { + DBUG_ASSERT(column_in_this_or_nested(c_nested, jc)); + if (str->append(STRING_WITH_LEN("NESTED PATH ")) || + print_path(str, &jc->m_nest->m_path) || + str->append(' ') || + c_nested->print(thd, f, str, it, &jc)) + return 1; + c_nested= c_nested->m_next_nested; + } + } + + if (str->append(STRING_WITH_LEN(")"))) + return 1; + + *last_column= jc; + return 0; +} + + +/* + Print the SQL definition of the JSON_TABLE. + Used mostly as a part of the CREATE VIEW statement. + + @param thd - the thread + @param sql_table - the corresponding TABLE_LIST object + @param str - the string where to print + @param query_type - the query type +*/ +int Table_function_json_table::print(THD *thd, TABLE_LIST *sql_table, + String *str, enum_query_type query_type) +{ + List_iterator_fast jc_i(m_columns); + Json_table_column *jc= jc_i++; + Field **f_list= sql_table->table->field; + + DBUG_ENTER("Table_function_json_table::print"); + + if (str->append(STRING_WITH_LEN("JSON_TABLE("))) + DBUG_RETURN(TRUE); + + m_json->print(str, query_type); + + if (str->append(STRING_WITH_LEN(", ")) || + print_path(str, &m_nested_path.m_path) || + str->append(' ') || + m_nested_path.print(thd, &f_list, str, jc_i, &jc) || + str->append(')')) + DBUG_RETURN(TRUE); + + DBUG_RETURN(0); +} + + +void Table_function_json_table::fix_after_pullout(TABLE_LIST *sql_table, + st_select_lex *new_parent, bool merge) +{ + m_json->fix_after_pullout(new_parent, &m_json, merge); + sql_table->dep_tables= used_tables(); +} + + +/* + @brief + Recursively make all tables in the join_list also depend on deps. +*/ + +static void add_extra_deps(List *join_list, table_map deps) +{ + TABLE_LIST *table; + List_iterator li(*join_list); + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return; + while ((table= li++)) + { + table->dep_tables |= deps; + NESTED_JOIN *nested_join; + if ((nested_join= table->nested_join)) + { + // set the deps inside, too + add_extra_deps(&nested_join->join_list, deps); + } + } +} + + +/* + @brief + Add table dependencies that are directly caused by table functions, also + add extra dependencies so that the join optimizer does not construct + "dead-end" join prefixes. + + @detail + There are two kinds of limitations on join order: + 1A. Outer joins require that inner tables follow outer. + 1B. Tables within a join nest must be present in the join order + "without interleaving". See check_interleaving_with_nj for details. + + 2. Table function argument may refer to *any* table that precedes the + current table in the query text. The table maybe outside of the current + nested join and/or inside another nested join. + + One may think that adding dependency according to #2 would be sufficient, + but this is not the case. + + @example + + select ... + from + t20 left join t21 on t20.a=t21.a + join + (t31 left join (t32 join + JSON_TABLE(t21.js, + '$' COLUMNS (ab INT PATH '$.a')) AS jt + ) on t31.a<3 + ) + + Here, jt's argument refers to t21. + + Table dependencies are: + t21 -> t20 + t32 -> t31 + jt -> t21 t31 (also indirectly depends on t20 through t21) + + This allows to construct a "dead-end" join prefix, like: + + t31, t32 + + Here, "no interleaving" rule requires the next table to be jt, but we + can't add it, because it depends on t21 which is not in the join prefix. + + @end example + + Dead-end join prefixes do not work with join prefix pruning done for + @@optimizer_prune_level: it is possible that all non-dead-end prefixes are + pruned away. + + The solution is as follows: if there is an outer join that contains + (directly on indirectly) a table function JT which has a reference JREF + outside of the outer join: + + left join ( T_I ... json_table(JREF, ...) as JT ...) + + then make *all* tables T_I also dependent on outside references in JREF. + This way, the optimizer will put table T_I into the join prefix only when + JT can be put there as well, and "dead-end" prefixes will not be built. + + @param join_list List of tables to process. Initial invocation should + supply the JOIN's top-level table list. + @param nest_tables Bitmap of all tables in the join list. + + @return Bitmap of all outside references that tables in join_list have +*/ + +table_map add_table_function_dependencies(List *join_list, + table_map nest_tables) +{ + TABLE_LIST *table; + table_map res= 0; + List_iterator li(*join_list); + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if ((res=check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL))) + return res; + + // Recursively compute extra dependencies + while ((table= li++)) + { + NESTED_JOIN *nested_join; + if ((nested_join= table->nested_join)) + { + res |= add_table_function_dependencies(&nested_join->join_list, + nested_join->used_tables); + } + else if (table->table_function) + { + table->dep_tables |= table->table_function->used_tables(); + res |= table->dep_tables; + } + } + res= res & ~nest_tables & ~PSEUDO_TABLE_BITS; + // Then, make all "peers" have them: + if (res) + add_extra_deps(join_list, res); + + return res; +} + + diff --git a/sql/json_table.h b/sql/json_table.h new file mode 100644 index 00000000..74c2bfc2 --- /dev/null +++ b/sql/json_table.h @@ -0,0 +1,294 @@ +#ifndef JSON_TABLE_INCLUDED +#define JSON_TABLE_INCLUDED + +/* Copyright (c) 2020, MariaDB Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include + +class Json_table_column; + +/* + The Json_table_nested_path represents the 'current nesting' level + for a set of JSON_TABLE columns. + Each column (Json_table_column instance) is linked with corresponding + 'nested path' object and gets its piece of JSON to parse during the computation + phase. + The root 'nested_path' is always present as a part of Table_function_json_table, + then other 'nested_paths' can be created and linked into a tree structure when new + 'NESTED PATH' is met. The nested 'nested_paths' are linked with 'm_nested', the same-level + 'nested_paths' are linked with 'm_next_nested'. + So for instance + JSON_TABLE( '...', '$[*]' + COLUMNS( a INT PATH '$.a' , + NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$', + NESTED PATH '$.c[*]' COLUMNS(x INT PATH '$')), + NESTED PATH '$.n[*]' COLUMNS (z INT PATH '$')) + results in 4 'nested_path' created: + root nested_b nested_c nested_n + m_path '$[*]' '$.b[*]' '$.c[*]' '$.n[*] + m_nested &nested_b &nested_c NULL NULL + n_next_nested NULL &nested_n NULL NULL + + and 4 columns created: + a b x z + m_nest &root &nested_b &nested_c &nested_n +*/ + +class Json_table_nested_path : public Sql_alloc +{ +public: + json_path_t m_path; /* The JSON Path to get the rows from */ + bool m_null; // TRUE <=> producing a NULL-complemented row. + + /*** Construction interface ***/ + Json_table_nested_path(): + m_null(TRUE), m_nested(NULL), m_next_nested(NULL) + {} + + int set_path(THD *thd, const LEX_CSTRING &path); + + /*** Methods for performing a scan ***/ + void scan_start(CHARSET_INFO *i_cs, const uchar *str, const uchar *end); + int scan_next(); + bool check_error(const char *str); + + /*** Members for getting the values we've scanned to ***/ + const uchar *get_value() { return m_engine.value_begin; } + const uchar *get_value_end() { return m_engine.s.str_end; } + + /* Counts the rows produced. Used by FOR ORDINALITY columns */ + longlong m_ordinality_counter; + + int print(THD *thd, Field ***f, String *str, + List_iterator_fast &it, + Json_table_column **last_column); +private: + /* The head of the list of nested NESTED PATH statements. */ + Json_table_nested_path *m_nested; + + /* in the above list items are linked with the */ + Json_table_nested_path *m_next_nested; + + /*** Members describing NESTED PATH structure ***/ + /* Parent nested path. The "root" path has this NULL */ + Json_table_nested_path *m_parent; + + /*** Members describing current JSON Path scan state ***/ + /* The JSON Parser and JSON Path evaluator */ + json_engine_t m_engine; + + /* The path the parser is currently pointing to */ + json_path_t m_cur_path; + + /* The child NESTED PATH we're currently scanning */ + Json_table_nested_path *m_cur_nested; + + static bool column_in_this_or_nested(const Json_table_nested_path *p, + const Json_table_column *jc); + friend class Table_function_json_table; +}; + + +/* + @brief + Describes the column definition in JSON_TABLE(...) syntax. + + @detail + Has methods for printing/handling errors but otherwise it's a static + object. +*/ + +class Json_table_column : public Sql_alloc +{ +public: + enum enum_type + { + FOR_ORDINALITY, + PATH, + EXISTS_PATH + }; + + enum enum_on_type + { + ON_EMPTY, + ON_ERROR + }; + + enum enum_on_response + { + RESPONSE_NOT_SPECIFIED, + RESPONSE_ERROR, + RESPONSE_NULL, + RESPONSE_DEFAULT + }; + + struct On_response + { + public: + Json_table_column::enum_on_response m_response; + LEX_CSTRING m_default; + int respond(Json_table_column *jc, Field *f, uint error_num); + int print(const char *name, String *str) const; + bool specified() const { return m_response != RESPONSE_NOT_SPECIFIED; } + }; + + enum_type m_column_type; + bool m_format_json; + json_path_t m_path; + On_response m_on_error; + On_response m_on_empty; + Create_field *m_field; + Json_table_nested_path *m_nest; + CHARSET_INFO *m_explicit_cs; + CHARSET_INFO *m_defaults_cs; + + void set(enum_type ctype) + { + m_column_type= ctype; + } + int set(THD *thd, enum_type ctype, const LEX_CSTRING &path, CHARSET_INFO *cs); + int set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + const Lex_column_charset_collation_attrs_st &cl); + Json_table_column(Create_field *f, Json_table_nested_path *nest) : + m_field(f), m_nest(nest), m_explicit_cs(NULL) + { + m_on_error.m_response= RESPONSE_NOT_SPECIFIED; + m_on_empty.m_response= RESPONSE_NOT_SPECIFIED; + } + int print(THD *tnd, Field **f, String *str); +}; + + +/* + Class represents the table function, the function + that returns the table as a result so supposed to appear + in the FROM list of the SELECT statement. + At the moment there is only one such function JSON_TABLE, + so the class named after it, but should be refactored + into the hierarchy root if we create more of that functions. + + As the parser finds the table function in the list it + creates an instance of Table_function_json_table storing it + into the TABLE_LIST::table_function. + Then the ha_json_table instance is created based on it in + the create_table_for_function(). + + == Replication: whether JSON_TABLE is deterministic == + + In sql_yacc.yy, we set BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION whenever + JSON_TABLE is used. The reasoning behind this is as follows: + + In the current MariaDB code, evaluation of JSON_TABLE is deterministic, + that is, for a given input string JSON_TABLE will always produce the same + set of rows in the same order. However one can think of JSON documents + that one can consider indentical which will produce different output. + In order to be feature-proof and withstand changes like: + - sorting JSON object members by name (like MySQL does) + - changing the way duplicate object members are handled + we mark the function as SBR-unsafe. + (If there is ever an issue with this, marking the function as SBR-safe + is a non-intrusive change we will always be able to make) +*/ + +class Table_function_json_table : public Sql_alloc +{ +public: + /*** Basic properties of the original JSON_TABLE(...) ***/ + Item *m_json; /* The JSON value to be parsed. */ + + /* The COLUMNS(...) part representation. */ + Json_table_nested_path m_nested_path; + + /* The list of table column definitions. */ + List m_columns; + + /*** Name resolution functions ***/ + bool setup(THD *thd, TABLE_LIST *sql_table, SELECT_LEX *s_lex); + + int walk_items(Item_processor processor, bool walk_subquery, + void *argument); + + /*** Functions for interaction with the Query Optimizer ***/ + void fix_after_pullout(TABLE_LIST *sql_table, + st_select_lex *new_parent, bool merge); + void update_used_tables() { m_json->update_used_tables(); } + + table_map used_tables() const { return m_json->used_tables(); } + bool join_cache_allowed() const + { + /* + Can use join cache when we have an outside reference. + If there's dependency on any other table or randomness, + cannot use it. + */ + return !(used_tables() & ~OUTER_REF_TABLE_BIT); + } + void get_estimates(ha_rows *out_rows, + double *scan_time, double *startup_cost); + + int print(THD *thd, TABLE_LIST *sql_table, + String *str, enum_query_type query_type); + + /*** Construction interface to be used from the parser ***/ + Table_function_json_table(Item *json): + m_json(json), + m_context_setup_done(false) + { + cur_parent= &m_nested_path; + last_sibling_hook= &m_nested_path.m_nested; + } + + void start_nested_path(Json_table_nested_path *np); + void end_nested_path(); + Json_table_nested_path *get_cur_nested_path() { return cur_parent; } + void set_name_resolution_context(Name_resolution_context *arg) + { + m_context= arg; + } + + /* SQL Parser: current column in JSON_TABLE (...) syntax */ + Json_table_column *m_cur_json_table_column; + + /* SQL Parser: charset of the current text literal */ + CHARSET_INFO *m_text_literal_cs; + +private: + /* Context to be used for resolving the first argument. */ + Name_resolution_context *m_context; + + bool m_context_setup_done; + + /* Current NESTED PATH level being parsed */ + Json_table_nested_path *cur_parent; + + /* + Pointer to the list tail where we add the next NESTED PATH. + It points to the cur_parnt->m_nested for the first nested + and prev_nested->m_next_nested for the coesequent ones. + */ + Json_table_nested_path **last_sibling_hook; +}; + +bool push_table_function_arg_context(LEX *lex, MEM_ROOT *alloc); + +TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table); + +table_map add_table_function_dependencies(List *join_list, + table_map nest_tables); + +#endif /* JSON_TABLE_INCLUDED */ + diff --git a/sql/key.cc b/sql/key.cc new file mode 100644 index 00000000..4e40a335 --- /dev/null +++ b/sql/key.cc @@ -0,0 +1,903 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2018, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Functions to handle keys and fields in forms */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "key.h" // key_rec_cmp +#include "field.h" // Field + +/* + Search after a key that starts with 'field' + + SYNOPSIS + find_ref_key() + key First key to check + key_count How many keys to check + record Start of record + field Field to search after + key_length On partial match, contains length of fields before + field + keypart key part # of a field + + NOTES + Used when calculating key for NEXT_NUMBER + + IMPLEMENTATION + If no key starts with field test if field is part of some key. If we find + one, then return first key and set key_length to the number of bytes + preceding 'field'. + + RETURN + -1 field is not part of the key + # Key part for key matching key. + key_length is set to length of key before (not including) field +*/ + +int find_ref_key(KEY *key, uint key_count, uchar *record, Field *field, + uint *key_length, uint *keypart) +{ + int i; + KEY *key_info; + uint fieldpos; + + fieldpos= field->offset(record); + + /* Test if some key starts as fieldpos */ + for (i= 0, key_info= key ; + i < (int) key_count ; + i++, key_info++) + { + if (key_info->key_part[0].offset == fieldpos && + key_info->key_part[0].field->type() != MYSQL_TYPE_BIT) + { /* Found key. Calc keylength */ + *key_length= *keypart= 0; + return i; /* Use this key */ + } + } + + /* Test if some key contains fieldpos */ + for (i= 0, key_info= key; + i < (int) key_count ; + i++, key_info++) + { + uint j; + KEY_PART_INFO *key_part; + *key_length=0; + for (j=0, key_part=key_info->key_part ; + j < key_info->user_defined_key_parts ; + j++, key_part++) + { + if (key_part->offset == fieldpos && + key_part->field->type() != MYSQL_TYPE_BIT) + { + *keypart= j; + return i; /* Use this key */ + } + *key_length+= key_part->store_length; + } + } + return(-1); /* No key is ok */ +} + + +/** + Copy part of a record that forms a key or key prefix to a buffer. + + The function takes a complete table record (as e.g. retrieved by + handler::index_read()), and a description of an index on the same table, + and extracts the first key_length bytes of the record which are part of a + key into to_key. If length == 0 then copy all bytes from the record that + form a key. + + @param to_key buffer that will be used as a key + @param from_record full record to be copied from + @param key_info descriptor of the index + @param key_length specifies length of all keyparts that will be copied + @param with_zerofill skipped bytes in the key buffer to be filled with 0 +*/ + +void key_copy(uchar *to_key, const uchar *from_record, const KEY *key_info, + uint key_length, bool with_zerofill) +{ + uint length; + KEY_PART_INFO *key_part; + + if (key_length == 0) + key_length= key_info->key_length; + for (key_part= key_info->key_part; + (int) key_length > 0; + key_part++, to_key+= length, key_length-= length) + { + if (key_part->null_bit) + { + *to_key++= MY_TEST(from_record[key_part->null_offset] & + key_part->null_bit); + key_length--; + if (to_key[-1]) + { + /* + Don't copy data for null values + The -1 below is to subtract the null byte which is already handled + */ + length= MY_MIN(key_length, uint(key_part->store_length)-1); + if (with_zerofill) + bzero((char*) to_key, length); + continue; + } + } + auto *from_ptr= key_part->field->ptr_in_record(from_record); + if (key_part->key_part_flag & HA_BLOB_PART || + key_part->key_part_flag & HA_VAR_LENGTH_PART) + { + key_length-= HA_KEY_BLOB_LENGTH; + length= MY_MIN(key_length, key_part->length); + uint bytes= key_part->field->get_key_image(to_key, length, from_ptr, + key_info->flags & HA_SPATIAL ? Field::itMBR : Field::itRAW); + if (with_zerofill && bytes < length) + bzero((char*) to_key + bytes, length - bytes); + to_key+= HA_KEY_BLOB_LENGTH; + } + else + { + length= MY_MIN(key_length, key_part->length); + Field *field= key_part->field; + CHARSET_INFO *cs= field->charset(); + uint bytes= field->get_key_image(to_key, length, from_ptr, Field::itRAW); + if (bytes < length) + cs->fill((char*) to_key + bytes, length - bytes, ' '); + } + } +} + + +/** + Restore a key from some buffer to record. + + This function converts a key into record format. It can be used in cases + when we want to return a key as a result row. + + @param to_record record buffer where the key will be restored to + @param from_key buffer that contains a key + @param key_info descriptor of the index + @param key_length specifies length of all keyparts that will be restored +*/ + +void key_restore(uchar *to_record, const uchar *from_key, KEY *key_info, + uint key_length) +{ + uint length; + KEY_PART_INFO *key_part; + + if (key_length == 0) + { + key_length= key_info->key_length; + } + for (key_part= key_info->key_part ; + (int) key_length > 0 ; + key_part++, from_key+= length, key_length-= length) + { + uchar used_uneven_bits= 0; + if (key_part->null_bit) + { + bool null_value; + if ((null_value= *from_key++)) + to_record[key_part->null_offset]|= key_part->null_bit; + else + to_record[key_part->null_offset]&= ~key_part->null_bit; + key_length--; + if (null_value) + { + /* + Don't copy data for null bytes + The -1 below is to subtract the null byte which is already handled + */ + length= MY_MIN(key_length, uint(key_part->store_length)-1); + continue; + } + } + if (key_part->type == HA_KEYTYPE_BIT) + { + Field_bit *field= (Field_bit *) (key_part->field); + if (field->bit_len) + { + uchar bits= *(from_key + key_part->length - + field->pack_length_in_rec() - 1); + set_rec_bits(bits, to_record + key_part->null_offset + + (key_part->null_bit == 128), + field->bit_ofs, field->bit_len); + /* we have now used the byte with 'uneven' bits */ + used_uneven_bits= 1; + } + } + if (key_part->key_part_flag & HA_BLOB_PART) + { + /* + This in fact never happens, as we have only partial BLOB + keys yet anyway, so it's difficult to find any sense to + restore the part of a record. + Maybe this branch is to be removed, but now we + have to ignore GCov compaining. + */ + uint blob_length= uint2korr(from_key); + Field_blob *field= (Field_blob*) key_part->field; + from_key+= HA_KEY_BLOB_LENGTH; + key_length-= HA_KEY_BLOB_LENGTH; + field->set_ptr_offset(to_record - field->table->record[0], + (ulong) blob_length, from_key); + length= key_part->length; + } + else if (key_part->key_part_flag & HA_VAR_LENGTH_PART) + { + Field *field= key_part->field; + my_ptrdiff_t ptrdiff= to_record - field->table->record[0]; + field->move_field_offset(ptrdiff); + key_length-= HA_KEY_BLOB_LENGTH; + length= MY_MIN(key_length, key_part->length); + MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table, &field->table->write_set); + field->set_key_image(from_key, length); + dbug_tmp_restore_column_map(&field->table->write_set, old_map); + from_key+= HA_KEY_BLOB_LENGTH; + field->move_field_offset(-ptrdiff); + } + else + { + length= MY_MIN(key_length, key_part->length); + /* skip the byte with 'uneven' bits, if used */ + memcpy(to_record + key_part->offset, from_key + used_uneven_bits + , (size_t) length - used_uneven_bits); + } + } +} + + +/** + Compare if a key has changed. + + @param table TABLE + @param key key to compare to row + @param idx Index used + @param key_length Length of key + + @note + In theory we could just call field->cmp() for all field types, + but as we are only interested if a key has changed (not if the key is + larger or smaller than the previous value) we can do things a bit + faster by using memcmp() instead. + + @retval + 0 If key is equal + @retval + 1 Key has changed +*/ + +bool key_cmp_if_same(TABLE *table,const uchar *key,uint idx,uint key_length) +{ + uint store_length; + KEY_PART_INFO *key_part; + const uchar *key_end= key + key_length;; + + for (key_part=table->key_info[idx].key_part; + key < key_end ; + key_part++, key+= store_length) + { + uint length; + store_length= key_part->store_length; + + if (key_part->null_bit) + { + if (*key != MY_TEST(table->record[0][key_part->null_offset] & + key_part->null_bit)) + return 1; + if (*key) + continue; + key++; + store_length--; + } + if (!(key_part->key_part_flag & HA_CAN_MEMCMP)) + { + if (key_part->field->key_cmp(key, key_part->length)) + return 1; + continue; + } + length= MY_MIN((uint) (key_end-key), store_length); + if (!(key_part->key_type & (FIELDFLAG_NUMBER+FIELDFLAG_BINARY+ + FIELDFLAG_PACK))) + { + CHARSET_INFO *cs= key_part->field->charset(); + size_t char_length= key_part->length / cs->mbmaxlen; + const uchar *pos= table->record[0] + key_part->offset; + if (length > char_length) + { + char_length= cs->charpos(pos, pos + length, char_length); + set_if_smaller(char_length, length); + } + if (cs->strnncollsp(key, length, pos, char_length)) + return 1; + continue; + } + if (memcmp(key,table->record[0]+key_part->offset,length)) + return 1; + } + return 0; +} + + +/** + Unpack a field and append it. + + @param[inout] to String to append the field contents to. + @param field Field to unpack. + @param rec Record which contains the field data. + @param max_length Maximum length of field to unpack + or 0 for unlimited. + @param prefix_key The field is used as a prefix key. +*/ + +void field_unpack(String *to, Field *field, const uchar *rec, uint max_length, + bool prefix_key) +{ + String tmp; + DBUG_ENTER("field_unpack"); + if (!max_length) + max_length= field->pack_length(); + if (field) + { + if (field->is_null()) + { + to->append(NULL_clex_str); + DBUG_VOID_RETURN; + } + CHARSET_INFO *cs= field->charset(); + field->val_str(&tmp); + /* + For BINARY(N) strip trailing zeroes to make + the error message nice-looking + */ + if (field->binary() && field->type() == MYSQL_TYPE_STRING && tmp.length()) + { + const char *tmp_end= tmp.ptr() + tmp.length(); + while (tmp_end > tmp.ptr() && !*--tmp_end) ; + tmp.length((uint32)(tmp_end - tmp.ptr() + 1)); + } + if (cs->mbmaxlen > 1 && prefix_key) + { + /* + Prefix key, multi-byte charset. + For the columns of type CHAR(N), the above val_str() + call will return exactly "key_part->length" bytes, + which can break a multi-byte characters in the middle. + Align, returning not more than "char_length" characters. + */ + size_t charpos, char_length= max_length / cs->mbmaxlen; + if ((charpos= cs->charpos(tmp.ptr(), + tmp.ptr() + tmp.length(), + char_length)) < tmp.length()) + tmp.length(charpos); + } + if (max_length < field->pack_length()) + tmp.length(MY_MIN(tmp.length(),max_length)); + ErrConvString err(&tmp); + to->append(err.lex_cstring()); + } + else + to->append(STRING_WITH_LEN("???")); + DBUG_VOID_RETURN; +} + + +/* + unpack key-fields from record to some buffer. + + This is used mainly to get a good error message. We temporary + change the column bitmap so that all columns are readable. + + @param + to Store value here in an easy to read form + @param + table Table to use + @param + key Key +*/ + +void key_unpack(String *to, TABLE *table, KEY *key) +{ + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set); + DBUG_ENTER("key_unpack"); + + to->length(0); + KEY_PART_INFO *key_part_end= key->key_part + key->user_defined_key_parts; + for (KEY_PART_INFO *key_part= key->key_part; + key_part < key_part_end; + key_part++) + { + if (key_part->field->invisible > INVISIBLE_USER) + continue; + if (to->length()) + to->append('-'); + if (key_part->null_bit) + { + if (table->record[0][key_part->null_offset] & key_part->null_bit) + { + to->append(NULL_clex_str); + continue; + } + } + field_unpack(to, key_part->field, table->record[0], key_part->length, + MY_TEST(key_part->key_part_flag & HA_PART_KEY_SEG)); + } + dbug_tmp_restore_column_map(&table->read_set, old_map); + DBUG_VOID_RETURN; +} + + +/* + Check if key uses field that is marked in passed field bitmap. + + SYNOPSIS + is_key_used() + table TABLE object with which keys and fields are associated. + idx Key to be checked. + fields Bitmap of fields to be checked. + + NOTE + This function uses TABLE::tmp_set bitmap so the caller should care + about saving/restoring its state if it also uses this bitmap. + + RETURN VALUE + TRUE Key uses field from bitmap + FALSE Otherwise +*/ + +bool is_key_used(TABLE *table, uint idx, const MY_BITMAP *fields) +{ + table->mark_index_columns(idx, &table->tmp_set); + return bitmap_is_overlapping(&table->tmp_set, fields); +} + + +/** + Compare key in row to a given key. + + @param key_part Key part handler + @param key Key to compare to value in table->record[0] + @param key_length length of 'key' + + @return + The return value is SIGN(key_in_row - range_key): + - 0 Key is equal to range or 'range' == 0 (no range) + - -1 Key is less than range + - 1 Key is larger than range +*/ + +int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length) +{ + uint store_length; + + for (const uchar *end=key + key_length; + key < end; + key+= store_length, key_part++) + { + int cmp; + store_length= key_part->store_length; + int sort_order = (key_part->key_part_flag & HA_REVERSE_SORT) ? -1 : 1; + if (key_part->null_bit) + { + /* This key part allows null values; NULL is lower than everything */ + bool field_is_null= key_part->field->is_null(); + if (*key) // If range key is null + { + /* the range is expecting a null value */ + if (!field_is_null) + return sort_order; // Found key is > range + /* null -- exact match, go to next key part */ + continue; + } + else if (field_is_null) + return -sort_order; // NULL is less than any value + key++; // Skip null byte + store_length--; + } + if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0) + return -sort_order; + if (cmp > 0) + return sort_order; + } + return 0; // Keys are equal +} + + +/** + Compare two records in index order. + + This method is set-up such that it can be called directly from the + priority queue and it is attempted to be optimised as much as possible + since this will be called O(N * log N) times while performing a merge + sort in various places in the code. + + We retrieve the pointer to table->record[0] using the fact that key_parts + have an offset making it possible to calculate the start of the record. + We need to get the diff to the compared record since none of the records + being compared are stored in table->record[0]. + + We first check for NULL values, if there are no NULL values we use + a compare method that gets two field pointers and a max length + and return the result of the comparison. + + key is a null terminated array, since in some cases (clustered + primary key) it must compare more than one index. + + @param key Null terminated array of index information + @param first_rec Pointer to record compare with + @param second_rec Pointer to record compare against first_rec + + @return Return value is SIGN(first_rec - second_rec) + @retval 0 Keys are equal + @retval -1 second_rec is greater than first_rec + @retval +1 first_rec is greater than second_rec +*/ + +int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec) +{ + KEY **key= (KEY**) key_p; + KEY *key_info= *(key++); // Start with first key + uint key_parts, key_part_num; + KEY_PART_INFO *key_part= key_info->key_part; + uchar *rec0= key_part->field->ptr - key_part->offset; + my_ptrdiff_t first_diff= first_rec - rec0, sec_diff= second_rec - rec0; + int result= 0; + Field *field; + DBUG_ENTER("key_rec_cmp"); + + /* loop over all given keys */ + do + { + key_parts= key_info->user_defined_key_parts; + key_part= key_info->key_part; + key_part_num= 0; + + /* loop over every key part */ + do + { + const int GREATER= key_part->key_part_flag & HA_REVERSE_SORT ? -1 : +1; + const int LESS= -GREATER; + + field= key_part->field; + + if (key_part->null_bit) + { + /* The key_part can contain NULL values */ + bool first_is_null= field->is_real_null(first_diff); + bool sec_is_null= field->is_real_null(sec_diff); + /* + NULL is smaller then everything so if first is NULL and the other + not then we know that we should return -1 and for the opposite + we should return +1. If both are NULL then we call it equality + although it is a strange form of equality, we have equally little + information of the real value. + */ + if (!first_is_null) + { + if (!sec_is_null) + ; /* Fall through, no NULL fields */ + else + { + DBUG_RETURN(GREATER); + } + } + else if (!sec_is_null) + { + DBUG_RETURN(LESS); + } + else + goto next_loop; /* Both were NULL */ + } + /* + No null values in the fields + We use the virtual method cmp_max with a max length parameter. + For most field types this translates into a cmp without + max length. The exceptions are the BLOB and VARCHAR field types + that take the max length into account. + */ + if ((result= field->cmp_prefix(field->ptr+first_diff, field->ptr+sec_diff, + key_part->length / + field->charset()->mbmaxlen))) + DBUG_RETURN(result * GREATER); +next_loop: + key_part++; + key_part_num++; + } while (key_part_num < key_parts); /* this key is done */ + + key_info= *(key++); + } while (key_info); /* no more keys to test */ + DBUG_RETURN(0); +} + + +/* + Compare two key tuples. + + @brief + Compare two key tuples, i.e. two key values in KeyTupleFormat. + + @param part KEY_PART_INFO with key description + @param key1 First key to compare + @param key2 Second key to compare + @param tuple_length Length of key1 (and key2, they are the same) in bytes. + + @return + @retval 0 key1 == key2 + @retval -1 key1 < key2 + @retval +1 key1 > key2 +*/ + +int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, + uint tuple_length) +{ + uchar *key1_end= key1 + tuple_length; + int UNINIT_VAR(len); + int res; + for (;key1 < key1_end; key1 += len, key2 += len, part++) + { + len= part->store_length; + if (part->null_bit) + { + if (*key1) // key1 == NULL + { + if (!*key2) // key1(NULL) < key2(notNULL) + return -1; + continue; + } + else if (*key2) // key1(notNULL) > key2 (NULL) + return 1; + /* Step over the NULL bytes for key_cmp() call */ + key1++; + key2++; + len--; + } + if ((res= part->field->key_cmp(key1, key2))) + return res; + } + return 0; +} + + +/** + Get hash value for the key from a key buffer + + @param key_info the key descriptor + @param used_key_part number of key parts used for the key + @param key pointer to the buffer with the key value + + @datails + When hashing we should take special care only of: + 1. NULLs (and keyparts which can be null so one byte reserved for it); + 2. Strings for which we have to take into account their collations + and the values of their lengths in the prefixes. + + @return hash value calculated for the key +*/ + +ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key) +{ + ulong nr=1, nr2=4; + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + used_key_parts; + + for (; key_part < end_key_part; key_part++) + { + uchar *pos= (uchar*)key; + CHARSET_INFO *UNINIT_VAR(cs); + size_t UNINIT_VAR(length), UNINIT_VAR(pack_length); + bool is_string= TRUE; + + key+= key_part->length; + if (key_part->null_bit) + { + key++; /* Skip null byte */ + if (*pos) /* Found null */ + { + nr^= (nr << 1) | 1; + /* Add key pack length to key for VARCHAR segments */ + switch (key_part->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: + key+= 2; + break; + default: + ; + } + continue; + } + pos++; /* Skip null byte */ + } + /* If it is string set parameters of the string */ + switch (key_part->type) { + case HA_KEYTYPE_TEXT: + cs= key_part->field->charset(); + length= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_BINARY : + cs= &my_charset_bin; + length= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + cs= key_part->field->charset(); + length= uint2korr(pos); + pack_length= 2; + break; + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + cs= &my_charset_bin; + length= uint2korr(pos); + pack_length= 2; + break; + default: + is_string= FALSE; + } + + if (is_string) + { + if (cs->mbmaxlen > 1) + { + size_t char_length= cs->charpos(pos + pack_length, + pos + pack_length + length, + length / cs->mbmaxlen); + set_if_smaller(length, char_length); + } + cs->hash_sort(pos+pack_length, length, &nr, &nr2); + key+= pack_length; + } + else + { + for (; pos < (uchar*)key ; pos++) + { + nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8); + nr2+=3; + } + } + } + DBUG_PRINT("exit", ("hash: %lx", nr)); + return(nr); +} + + +/** + Check whether two keys in the key buffers are equal + + @param key_info the key descriptor + @param used_key_part number of key parts used for the keys + @param key1 pointer to the buffer with the first key + @param key2 pointer to the buffer with the second key + + @detail See details of key_hashnr(). + + @retval TRUE keys in the buffers are NOT equal + @retval FALSE keys in the buffers are equal +*/ + +bool key_buf_cmp(KEY *key_info, uint used_key_parts, + const uchar *key1, const uchar *key2) +{ + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + used_key_parts; + + for (; key_part < end_key_part; key_part++) + { + uchar *pos1= (uchar*)key1; + uchar *pos2= (uchar*)key2; + CHARSET_INFO *UNINIT_VAR(cs); + size_t UNINIT_VAR(length1), UNINIT_VAR(length2), UNINIT_VAR(pack_length); + bool is_string= TRUE; + + key1+= key_part->length; + key2+= key_part->length; + if (key_part->null_bit) + { + key1++; key2++; /* Skip null byte */ + if (*pos1 && *pos2) /* Both are null */ + { + /* Add key pack length to key for VARCHAR segments */ + switch (key_part->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: + key1+= 2; key2+= 2; + break; + default: + ; + } + continue; + } + if (*pos1 != *pos2) + return TRUE; + pos1++; pos2++; + } + + /* If it is string set parameters of the string */ + switch (key_part->type) { + case HA_KEYTYPE_TEXT: + cs= key_part->field->charset(); + length1= length2= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_BINARY : + cs= &my_charset_bin; + length1= length2= key_part->length; + pack_length= 0; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + cs= key_part->field->charset(); + length1= uint2korr(pos1); + length2= uint2korr(pos2); + pack_length= 2; + break; + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + cs= &my_charset_bin; + length1= uint2korr(pos1); + length2= uint2korr(pos2); + pack_length= 2; + break; + default: + is_string= FALSE; + } + + if (is_string) + { + /* + Compare the strings taking into account length in characters + and collation + */ + size_t byte_len1= length1, byte_len2= length2; + if (cs->mbmaxlen > 1) + { + size_t char_length1= cs->charpos(pos1 + pack_length, + pos1 + pack_length + length1, + length1 / cs->mbmaxlen); + size_t char_length2= cs->charpos(pos2 + pack_length, + pos2 + pack_length + length2, + length2 / cs->mbmaxlen); + set_if_smaller(length1, char_length1); + set_if_smaller(length2, char_length2); + } + if (length1 != length2 || + cs->strnncollsp(pos1 + pack_length, byte_len1, + pos2 + pack_length, byte_len2)) + return TRUE; + key1+= pack_length; key2+= pack_length; + } + else + { + /* it is OK to compare non-string byte per byte */ + for (; pos1 < (uchar*)key1 ; pos1++, pos2++) + { + if (pos1[0] != pos2[0]) + return TRUE; + } + } + } + return FALSE; +} diff --git a/sql/key.h b/sql/key.h new file mode 100644 index 00000000..871373bf --- /dev/null +++ b/sql/key.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef KEY_INCLUDED +#define KEY_INCLUDED + +class Field; +class String; +struct TABLE; +typedef struct st_bitmap MY_BITMAP; +typedef struct st_key KEY; +typedef struct st_key_part_info KEY_PART_INFO; + +int find_ref_key(KEY *key, uint key_count, uchar *record, Field *field, + uint *key_length, uint *keypart); +void key_copy(uchar *to_key, const uchar *from_record, const KEY *key_info, + uint key_length, bool with_zerofill= FALSE); +void key_restore(uchar *to_record, const uchar *from_key, KEY *key_info, + uint key_length); +bool key_cmp_if_same(TABLE *form,const uchar *key,uint index,uint key_length); +void key_unpack(String *to, TABLE *table, KEY *key); +void field_unpack(String *to, Field *field, const uchar *rec, uint max_length, + bool prefix_key); +bool is_key_used(TABLE *table, uint idx, const MY_BITMAP *fields); +int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length); +ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key); +bool key_buf_cmp(KEY *key_info, uint used_key_parts, + const uchar *key1, const uchar *key2); +extern "C" int key_rec_cmp(void *key_info, uchar *a, uchar *b); +int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, uint tuple_length); + +#endif /* KEY_INCLUDED */ diff --git a/sql/keycaches.cc b/sql/keycaches.cc new file mode 100644 index 00000000..10bec7c1 --- /dev/null +++ b/sql/keycaches.cc @@ -0,0 +1,236 @@ +/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "keycaches.h" + +/**************************************************************************** + Named list handling +****************************************************************************/ + +NAMED_ILIST key_caches; +NAMED_ILIST rpl_filters; + +extern "C" PSI_memory_key key_memory_KEY_CACHE; +extern PSI_memory_key key_memory_NAMED_ILINK_name; + +/** + ilink (intrusive list element) with a name +*/ +class NAMED_ILINK :public ilink +{ +public: + const char *name; + size_t name_length; + uchar* data; + + NAMED_ILINK(I_List *links, const char *name_arg, + size_t name_length_arg, uchar* data_arg) + :name_length(name_length_arg), data(data_arg) + { + name= my_strndup(key_memory_NAMED_ILINK_name, name_arg, name_length, + MYF(MY_WME)); + links->push_back(this); + } + inline bool cmp(const char *name_cmp, size_t length) + { + return length == name_length && !memcmp(name, name_cmp, length); + } + ~NAMED_ILINK() + { + my_free((void *) name); + } +}; + +uchar* find_named(I_List *list, const char *name, size_t length, + NAMED_ILINK **found) +{ + I_List_iterator it(*list); + NAMED_ILINK *element; + while ((element= it++)) + { + if (element->cmp(name, length)) + { + if (found) + *found= element; + return element->data; + } + } + return 0; +} + + +bool NAMED_ILIST::delete_element(const char *name, size_t length, void (*free_element)(const char *name, void*)) +{ + I_List_iterator it(*this); + NAMED_ILINK *element; + DBUG_ENTER("NAMED_ILIST::delete_element"); + while ((element= it++)) + { + if (element->cmp(name, length)) + { + (*free_element)(element->name, element->data); + delete element; + DBUG_RETURN(0); + } + } + DBUG_RETURN(1); +} + +void NAMED_ILIST::delete_elements(void (*free_element)(const char *name, void*)) +{ + NAMED_ILINK *element; + DBUG_ENTER("NAMED_ILIST::delete_elements"); + while ((element= get())) + { + (*free_element)(element->name, element->data); + delete element; + } + DBUG_VOID_RETURN; +} + + +/* Key cache functions */ + +LEX_CSTRING default_key_cache_base= {STRING_WITH_LEN("default")}; + +KEY_CACHE zero_key_cache; ///< @@nonexistent_cache.param->value_ptr() points here + +KEY_CACHE *get_key_cache(const LEX_CSTRING *cache_name) +{ + if (!cache_name || ! cache_name->length) + cache_name= &default_key_cache_base; + return ((KEY_CACHE*) find_named(&key_caches, + cache_name->str, cache_name->length, 0)); +} + +KEY_CACHE *create_key_cache(const char *name, size_t length) +{ + KEY_CACHE *key_cache; + DBUG_ENTER("create_key_cache"); + DBUG_PRINT("enter",("name: %.*s", (int)length, name)); + + if ((key_cache= (KEY_CACHE*) my_malloc(key_memory_KEY_CACHE, + sizeof(KEY_CACHE), MYF(MY_ZEROFILL | MY_WME)))) + { + if (!new NAMED_ILINK(&key_caches, name, length, (uchar*) key_cache)) + { + my_free(key_cache); + key_cache= 0; + } + else + { + /* + Set default values for a key cache + The values in dflt_key_cache_var is set by my_getopt() at startup + + We don't set 'buff_size' as this is used to enable the key cache + */ + key_cache->param_block_size= dflt_key_cache_var.param_block_size; + key_cache->param_division_limit= dflt_key_cache_var.param_division_limit; + key_cache->param_age_threshold= dflt_key_cache_var.param_age_threshold; + key_cache->param_partitions= dflt_key_cache_var.param_partitions; + } + } + DBUG_RETURN(key_cache); +} + + +KEY_CACHE *get_or_create_key_cache(const char *name, size_t length) +{ + LEX_CSTRING key_cache_name; + KEY_CACHE *key_cache; + + key_cache_name.str= name; + key_cache_name.length= length; + if (!(key_cache= get_key_cache(&key_cache_name))) + key_cache= create_key_cache(name, length); + return key_cache; +} + + +void free_key_cache(const char *name, void *key_cache) +{ + end_key_cache(static_cast(key_cache), 1); // Can never fail + my_free(key_cache); +} + + +bool process_key_caches(process_key_cache_t func, void *param) +{ + I_List_iterator it(key_caches); + NAMED_ILINK *element; + int res= 0; + + while ((element= it++)) + { + KEY_CACHE *key_cache= (KEY_CACHE *) element->data; + res |= func(element->name, key_cache, param); + } + return res != 0; +} + +/* Rpl_filter functions */ + +LEX_CSTRING default_rpl_filter_base= {STRING_WITH_LEN("")}; + +Rpl_filter *get_rpl_filter(LEX_CSTRING *filter_name) +{ + if (!filter_name->length) + filter_name= &default_rpl_filter_base; + return ((Rpl_filter*) find_named(&rpl_filters, + filter_name->str, filter_name->length, 0)); +} + +Rpl_filter *create_rpl_filter(const char *name, size_t length) +{ + Rpl_filter *filter; + DBUG_ENTER("create_rpl_filter"); + DBUG_PRINT("enter",("name: %.*s", (int)length, name)); + + filter= new Rpl_filter; + if (filter) + { + if (!new NAMED_ILINK(&rpl_filters, name, length, (uchar*) filter)) + { + delete filter; + filter= 0; + } + } + DBUG_RETURN(filter); +} + + +Rpl_filter *get_or_create_rpl_filter(const char *name, size_t length) +{ + LEX_CSTRING rpl_filter_name; + Rpl_filter *filter; + + rpl_filter_name.str= (char *) name; + rpl_filter_name.length= length; + if (!(filter= get_rpl_filter(&rpl_filter_name))) + filter= create_rpl_filter(name, length); + return filter; +} + +void free_rpl_filter(const char *name, void *filter) +{ + delete static_cast(filter); +} + +void free_all_rpl_filters() +{ + rpl_filters.delete_elements(free_rpl_filter); +} diff --git a/sql/keycaches.h b/sql/keycaches.h new file mode 100644 index 00000000..68c3dd3a --- /dev/null +++ b/sql/keycaches.h @@ -0,0 +1,57 @@ +#ifndef KEYCACHES_INCLUDED +#define KEYCACHES_INCLUDED + +/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_list.h" +#include +#include + +extern "C" +{ + typedef int (*process_key_cache_t) (const char *, KEY_CACHE *, void *); +} + +class NAMED_ILINK; + +class NAMED_ILIST: public I_List +{ + public: + void delete_elements(void (*free_element)(const char*, void*)); + bool delete_element(const char *name, size_t length, void (*free_element)(const char*, void*)); +}; + +/* For key cache */ +extern LEX_CSTRING default_key_cache_base; +extern KEY_CACHE zero_key_cache; +extern NAMED_ILIST key_caches; + +KEY_CACHE *create_key_cache(const char *name, size_t length); +KEY_CACHE *get_key_cache(const LEX_CSTRING *cache_name); +KEY_CACHE *get_or_create_key_cache(const char *name, size_t length); +void free_key_cache(const char *name, void *key_cache); +bool process_key_caches(process_key_cache_t func, void *param); + +/* For Rpl_filter */ +extern LEX_CSTRING default_rpl_filter_base; +extern NAMED_ILIST rpl_filters; + +Rpl_filter *create_rpl_filter(const char *name, size_t length); +Rpl_filter *get_rpl_filter(LEX_CSTRING *filter_name); +Rpl_filter *get_or_create_rpl_filter(const char *name, size_t length); +void free_all_rpl_filters(void); + +#endif /* KEYCACHES_INCLUDED */ diff --git a/sql/lex.h b/sql/lex.h new file mode 100644 index 00000000..89e055a4 --- /dev/null +++ b/sql/lex.h @@ -0,0 +1,807 @@ +#ifndef LEX_INCLUDED +#define LEX_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. + Copyright (c) 2009, 2015, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* This file includes all reserved words and functions */ + +#include "lex_symbol.h" + +SYM_GROUP sym_group_common= {"", ""}; +SYM_GROUP sym_group_geom= {"Spatial extensions", "HAVE_SPATIAL"}; +SYM_GROUP sym_group_rtree= {"RTree keys", "HAVE_RTREE_KEYS"}; + +/* We don't want to include sql_yacc.h into gen_lex_hash */ +#ifdef NO_YACC_SYMBOLS +#define SYM_OR_NULL(A) 0 +#else +#define SYM_OR_NULL(A) A +#endif + +#define SYM(A) SYM_OR_NULL(A),0,&sym_group_common + +/* + Symbols are broken into separated arrays to allow field names with + same name as functions. + These are kept sorted for human lookup (the symbols are hashed). + + NOTE! The symbol tables should be the same regardless of what features + are compiled into the server. Don't add ifdef'ed symbols to the + lists + NOTE!! + If you add or delete symbols from this file, you must also update results for + the perfschema.start_server_low_digest_sql_length test! +*/ + +SYMBOL symbols[] = { + { "&&", SYM(AND_AND_SYM)}, + { "<=", SYM(LE)}, + { "<>", SYM(NE)}, + { "!=", SYM(NE)}, + { ">=", SYM(GE)}, + { "<<", SYM(SHIFT_LEFT)}, + { ">>", SYM(SHIFT_RIGHT)}, + { "<=>", SYM(EQUAL_SYM)}, + { "ACCESSIBLE", SYM(ACCESSIBLE_SYM)}, + { "ACCOUNT", SYM(ACCOUNT_SYM)}, + { "ACTION", SYM(ACTION)}, + { "ADD", SYM(ADD)}, + { "ADMIN", SYM(ADMIN_SYM)}, + { "AFTER", SYM(AFTER_SYM)}, + { "AGAINST", SYM(AGAINST)}, + { "AGGREGATE", SYM(AGGREGATE_SYM)}, + { "ALL", SYM(ALL)}, + { "ALGORITHM", SYM(ALGORITHM_SYM)}, + { "ALTER", SYM(ALTER)}, + { "ALWAYS", SYM(ALWAYS_SYM)}, + { "ANALYZE", SYM(ANALYZE_SYM)}, + { "AND", SYM(AND_SYM)}, + { "ANY", SYM(ANY_SYM)}, + { "AS", SYM(AS)}, + { "ASC", SYM(ASC)}, + { "ASCII", SYM(ASCII_SYM)}, + { "ASENSITIVE", SYM(ASENSITIVE_SYM)}, + { "AT", SYM(AT_SYM)}, + { "ATOMIC", SYM(ATOMIC_SYM)}, + { "AUTHORS", SYM(AUTHORS_SYM)}, + { "AUTO_INCREMENT", SYM(AUTO_INC)}, + { "AUTOEXTEND_SIZE", SYM(AUTOEXTEND_SIZE_SYM)}, + { "AUTO", SYM(AUTO_SYM)}, + { "AVG", SYM(AVG_SYM)}, + { "AVG_ROW_LENGTH", SYM(AVG_ROW_LENGTH)}, + { "BACKUP", SYM(BACKUP_SYM)}, + { "BEFORE", SYM(BEFORE_SYM)}, + { "BEGIN", SYM(BEGIN_MARIADB_SYM)}, + { "BETWEEN", SYM(BETWEEN_SYM)}, + { "BIGINT", SYM(BIGINT)}, + { "BINARY", SYM(BINARY)}, + { "BINLOG", SYM(BINLOG_SYM)}, + { "BIT", SYM(BIT_SYM)}, + { "BLOB", SYM(BLOB_MARIADB_SYM)}, + { "BLOCK", SYM(BLOCK_SYM)}, + { "BODY", SYM(BODY_MARIADB_SYM)}, + { "BOOL", SYM(BOOL_SYM)}, + { "BOOLEAN", SYM(BOOLEAN_SYM)}, + { "BOTH", SYM(BOTH)}, + { "BTREE", SYM(BTREE_SYM)}, + { "BY", SYM(BY)}, + { "BYTE", SYM(BYTE_SYM)}, + { "CACHE", SYM(CACHE_SYM)}, + { "CALL", SYM(CALL_SYM)}, + { "CASCADE", SYM(CASCADE)}, + { "CASCADED", SYM(CASCADED)}, + { "CASE", SYM(CASE_SYM)}, + { "CATALOG_NAME", SYM(CATALOG_NAME_SYM)}, + { "CHAIN", SYM(CHAIN_SYM)}, + { "CHANGE", SYM(CHANGE)}, + { "CHANGED", SYM(CHANGED)}, + { "CHANNEL", SYM(CHANNEL_SYM)}, + { "CHAR", SYM(CHAR_SYM)}, + { "CHARACTER", SYM(CHAR_SYM)}, + { "CHARSET", SYM(CHARSET)}, + { "CHECK", SYM(CHECK_SYM)}, + { "CHECKPOINT", SYM(CHECKPOINT_SYM)}, + { "CHECKSUM", SYM(CHECKSUM_SYM)}, + { "CIPHER", SYM(CIPHER_SYM)}, + { "CLASS_ORIGIN", SYM(CLASS_ORIGIN_SYM)}, + { "CLIENT", SYM(CLIENT_SYM)}, + { "CLOB", SYM(CLOB_MARIADB_SYM)}, + { "CLOSE", SYM(CLOSE_SYM)}, + { "COALESCE", SYM(COALESCE)}, + { "CODE", SYM(CODE_SYM)}, + { "COLLATE", SYM(COLLATE_SYM)}, + { "COLLATION", SYM(COLLATION_SYM)}, + { "COLUMN", SYM(COLUMN_SYM)}, + { "COLUMN_NAME", SYM(COLUMN_NAME_SYM)}, + { "COLUMNS", SYM(COLUMNS)}, + { "COLUMN_ADD", SYM(COLUMN_ADD_SYM)}, + { "COLUMN_CHECK", SYM(COLUMN_CHECK_SYM)}, + { "COLUMN_CREATE", SYM(COLUMN_CREATE_SYM)}, + { "COLUMN_DELETE", SYM(COLUMN_DELETE_SYM)}, + { "COLUMN_GET", SYM(COLUMN_GET_SYM)}, + { "COMMENT", SYM(COMMENT_SYM)}, + { "COMMIT", SYM(COMMIT_SYM)}, + { "COMMITTED", SYM(COMMITTED_SYM)}, + { "COMPACT", SYM(COMPACT_SYM)}, + { "COMPLETION", SYM(COMPLETION_SYM)}, + { "COMPRESSED", SYM(COMPRESSED_SYM)}, + { "CONCURRENT", SYM(CONCURRENT)}, + { "CONDITION", SYM(CONDITION_SYM)}, + { "CONNECTION", SYM(CONNECTION_SYM)}, + { "CONSISTENT", SYM(CONSISTENT_SYM)}, + { "CONSTRAINT", SYM(CONSTRAINT)}, + { "CONSTRAINT_CATALOG", SYM(CONSTRAINT_CATALOG_SYM)}, + { "CONSTRAINT_NAME", SYM(CONSTRAINT_NAME_SYM)}, + { "CONSTRAINT_SCHEMA", SYM(CONSTRAINT_SCHEMA_SYM)}, + { "CONTAINS", SYM(CONTAINS_SYM)}, + { "CONTEXT", SYM(CONTEXT_SYM)}, + { "CONTINUE", SYM(CONTINUE_MARIADB_SYM)}, + { "CONTRIBUTORS", SYM(CONTRIBUTORS_SYM)}, + { "CONVERT", SYM(CONVERT_SYM)}, + { "CPU", SYM(CPU_SYM)}, + { "CREATE", SYM(CREATE)}, + { "CROSS", SYM(CROSS)}, + { "CUBE", SYM(CUBE_SYM)}, + { "CURRENT", SYM(CURRENT_SYM)}, + { "CURRENT_DATE", SYM(CURDATE)}, + { "CURRENT_POS", SYM(CURRENT_POS_SYM)}, + { "CURRENT_ROLE", SYM(CURRENT_ROLE)}, + { "CURRENT_TIME", SYM(CURTIME)}, + { "CURRENT_TIMESTAMP", SYM(NOW_SYM)}, + { "CURRENT_USER", SYM(CURRENT_USER)}, + { "CURSOR", SYM(CURSOR_SYM)}, + { "CURSOR_NAME", SYM(CURSOR_NAME_SYM)}, + { "CYCLE", SYM(CYCLE_SYM)}, + { "DATA", SYM(DATA_SYM)}, + { "DATABASE", SYM(DATABASE)}, + { "DATABASES", SYM(DATABASES)}, + { "DATAFILE", SYM(DATAFILE_SYM)}, + { "DATE", SYM(DATE_SYM)}, + { "DATETIME", SYM(DATETIME)}, + { "DAY", SYM(DAY_SYM)}, + { "DAY_HOUR", SYM(DAY_HOUR_SYM)}, + { "DAY_MICROSECOND", SYM(DAY_MICROSECOND_SYM)}, + { "DAY_MINUTE", SYM(DAY_MINUTE_SYM)}, + { "DAY_SECOND", SYM(DAY_SECOND_SYM)}, + { "DEALLOCATE", SYM(DEALLOCATE_SYM)}, + { "DEC", SYM(DECIMAL_SYM)}, + { "DECIMAL", SYM(DECIMAL_SYM)}, + { "DECLARE", SYM(DECLARE_MARIADB_SYM)}, + { "DEFAULT", SYM(DEFAULT)}, + { "DEFINER", SYM(DEFINER_SYM)}, + { "DELAYED", SYM(DELAYED_SYM)}, + { "DELAY_KEY_WRITE", SYM(DELAY_KEY_WRITE_SYM)}, + { "DELETE", SYM(DELETE_SYM)}, + { "DELETE_DOMAIN_ID", SYM(DELETE_DOMAIN_ID_SYM)}, + { "DESC", SYM(DESC)}, + { "DESCRIBE", SYM(DESCRIBE)}, + { "DES_KEY_FILE", SYM(DES_KEY_FILE)}, + { "DETERMINISTIC", SYM(DETERMINISTIC_SYM)}, + { "DIAGNOSTICS", SYM(DIAGNOSTICS_SYM)}, + { "DIRECTORY", SYM(DIRECTORY_SYM)}, + { "DISABLE", SYM(DISABLE_SYM)}, + { "DISCARD", SYM(DISCARD)}, + { "DISK", SYM(DISK_SYM)}, + { "DISTINCT", SYM(DISTINCT)}, + { "DISTINCTROW", SYM(DISTINCT)}, /* Access likes this */ + { "DIV", SYM(DIV_SYM)}, + { "DO", SYM(DO_SYM)}, + { "DOUBLE", SYM(DOUBLE_SYM)}, + { "DO_DOMAIN_IDS", SYM(DO_DOMAIN_IDS_SYM)}, + { "DROP", SYM(DROP)}, + { "DUAL", SYM(DUAL_SYM)}, + { "DUMPFILE", SYM(DUMPFILE)}, + { "DUPLICATE", SYM(DUPLICATE_SYM)}, + { "DYNAMIC", SYM(DYNAMIC_SYM)}, + { "EACH", SYM(EACH_SYM)}, + { "ELSE", SYM(ELSE)}, + { "ELSEIF", SYM(ELSEIF_MARIADB_SYM)}, + { "ELSIF", SYM(ELSIF_MARIADB_SYM)}, + { "EMPTY", SYM(EMPTY_SYM)}, + { "ENABLE", SYM(ENABLE_SYM)}, + { "ENCLOSED", SYM(ENCLOSED)}, + { "END", SYM(END)}, + { "ENDS", SYM(ENDS_SYM)}, + { "ENGINE", SYM(ENGINE_SYM)}, + { "ENGINES", SYM(ENGINES_SYM)}, + { "ENUM", SYM(ENUM)}, + { "ERROR", SYM(ERROR_SYM)}, + { "ERRORS", SYM(ERRORS)}, + { "ESCAPE", SYM(ESCAPE_SYM)}, + { "ESCAPED", SYM(ESCAPED)}, + { "EVENT", SYM(EVENT_SYM)}, + { "EVENTS", SYM(EVENTS_SYM)}, + { "EVERY", SYM(EVERY_SYM)}, + { "EXAMINED", SYM(EXAMINED_SYM)}, + { "EXCEPT", SYM(EXCEPT_SYM)}, + { "EXCHANGE", SYM(EXCHANGE_SYM)}, + { "EXCLUDE", SYM(EXCLUDE_SYM)}, + { "EXECUTE", SYM(EXECUTE_SYM)}, + { "EXCEPTION", SYM(EXCEPTION_MARIADB_SYM)}, + { "EXISTS", SYM(EXISTS)}, + { "EXIT", SYM(EXIT_MARIADB_SYM)}, + { "EXPANSION", SYM(EXPANSION_SYM)}, + { "EXPIRE", SYM(EXPIRE_SYM)}, + { "EXPORT", SYM(EXPORT_SYM)}, + { "EXPLAIN", SYM(DESCRIBE)}, + { "EXTENDED", SYM(EXTENDED_SYM)}, + { "EXTENT_SIZE", SYM(EXTENT_SIZE_SYM)}, + { "FALSE", SYM(FALSE_SYM)}, + { "FAST", SYM(FAST_SYM)}, + { "FAULTS", SYM(FAULTS_SYM)}, + { "FEDERATED", SYM(FEDERATED_SYM)}, + { "FETCH", SYM(FETCH_SYM)}, + { "FIELDS", SYM(COLUMNS)}, + { "FILE", SYM(FILE_SYM)}, + { "FIRST", SYM(FIRST_SYM)}, + { "FIXED", SYM(FIXED_SYM)}, + { "FLOAT", SYM(FLOAT_SYM)}, + { "FLOAT4", SYM(FLOAT_SYM)}, + { "FLOAT8", SYM(DOUBLE_SYM)}, + { "FLUSH", SYM(FLUSH_SYM)}, + { "FOLLOWING", SYM(FOLLOWING_SYM)}, + { "FOLLOWS", SYM(FOLLOWS_SYM)}, + { "FOR", SYM(FOR_SYM)}, + { "FORCE", SYM(FORCE_SYM)}, + { "FOREIGN", SYM(FOREIGN)}, + { "FORMAT", SYM(FORMAT_SYM)}, + { "FOUND", SYM(FOUND_SYM)}, + { "FROM", SYM(FROM)}, + { "FULL", SYM(FULL)}, + { "FULLTEXT", SYM(FULLTEXT_SYM)}, + { "FUNCTION", SYM(FUNCTION_SYM)}, + { "GENERAL", SYM(GENERAL)}, + { "GENERATED", SYM(GENERATED_SYM)}, + { "GET_FORMAT", SYM(GET_FORMAT)}, + { "GET", SYM(GET_SYM)}, + { "GLOBAL", SYM(GLOBAL_SYM)}, + { "GOTO", SYM(GOTO_MARIADB_SYM)}, + { "GRANT", SYM(GRANT)}, + { "GRANTS", SYM(GRANTS)}, + { "GROUP", SYM(GROUP_SYM)}, + { "HANDLER", SYM(HANDLER_SYM)}, + { "HARD", SYM(HARD_SYM)}, + { "HASH", SYM(HASH_SYM)}, + { "HAVING", SYM(HAVING)}, + { "HELP", SYM(HELP_SYM)}, + { "HIGH_PRIORITY", SYM(HIGH_PRIORITY)}, + { "HISTORY", SYM(HISTORY_SYM)}, + { "HOST", SYM(HOST_SYM)}, + { "HOSTS", SYM(HOSTS_SYM)}, + { "HOUR", SYM(HOUR_SYM)}, + { "HOUR_MICROSECOND", SYM(HOUR_MICROSECOND_SYM)}, + { "HOUR_MINUTE", SYM(HOUR_MINUTE_SYM)}, + { "HOUR_SECOND", SYM(HOUR_SECOND_SYM)}, + { "ID", SYM(ID_SYM)}, + { "IDENTIFIED", SYM(IDENTIFIED_SYM)}, + { "IF", SYM(IF_SYM)}, + { "IGNORE", SYM(IGNORE_SYM)}, + { "IGNORED", SYM(IGNORED_SYM)}, + { "IGNORE_DOMAIN_IDS", SYM(IGNORE_DOMAIN_IDS_SYM)}, + { "IGNORE_SERVER_IDS", SYM(IGNORE_SERVER_IDS_SYM)}, + { "IMMEDIATE", SYM(IMMEDIATE_SYM)}, + { "IMPORT", SYM(IMPORT)}, + { "INTERSECT", SYM(INTERSECT_SYM)}, + { "IN", SYM(IN_SYM)}, + { "INCREMENT", SYM(INCREMENT_SYM)}, + { "INDEX", SYM(INDEX_SYM)}, + { "INDEXES", SYM(INDEXES)}, + { "INFILE", SYM(INFILE)}, + { "INITIAL_SIZE", SYM(INITIAL_SIZE_SYM)}, + { "INNER", SYM(INNER_SYM)}, + { "INOUT", SYM(INOUT_SYM)}, + { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, + { "INSERT", SYM(INSERT)}, + { "INSERT_METHOD", SYM(INSERT_METHOD)}, + { "INSTALL", SYM(INSTALL_SYM)}, + { "INT", SYM(INT_SYM)}, + { "INT1", SYM(TINYINT)}, + { "INT2", SYM(SMALLINT)}, + { "INT3", SYM(MEDIUMINT)}, + { "INT4", SYM(INT_SYM)}, + { "INT8", SYM(BIGINT)}, + { "INTEGER", SYM(INT_SYM)}, + { "INTERVAL", SYM(INTERVAL_SYM)}, + { "INVISIBLE", SYM(INVISIBLE_SYM)}, + { "INTO", SYM(INTO)}, + { "IO", SYM(IO_SYM)}, + { "IO_THREAD", SYM(RELAY_THREAD)}, + { "IPC", SYM(IPC_SYM)}, + { "IS", SYM(IS)}, + { "ISOLATION", SYM(ISOLATION)}, + { "ISOPEN", SYM(ISOPEN_SYM)}, + { "ISSUER", SYM(ISSUER_SYM)}, + { "ITERATE", SYM(ITERATE_SYM)}, + { "INVOKER", SYM(INVOKER_SYM)}, + { "JOIN", SYM(JOIN_SYM)}, + { "JSON", SYM(JSON_SYM)}, + { "JSON_TABLE", SYM(JSON_TABLE_SYM)}, + { "KEY", SYM(KEY_SYM)}, + { "KEYS", SYM(KEYS)}, + { "KEY_BLOCK_SIZE", SYM(KEY_BLOCK_SIZE)}, + { "KILL", SYM(KILL_SYM)}, + { "LANGUAGE", SYM(LANGUAGE_SYM)}, + { "LAST", SYM(LAST_SYM)}, + { "LAST_VALUE", SYM(LAST_VALUE)}, + { "LASTVAL", SYM(LASTVAL_SYM)}, + { "LEADING", SYM(LEADING)}, + { "LEAVE", SYM(LEAVE_SYM)}, + { "LEAVES", SYM(LEAVES)}, + { "LEFT", SYM(LEFT)}, + { "LESS", SYM(LESS_SYM)}, + { "LEVEL", SYM(LEVEL_SYM)}, + { "LIKE", SYM(LIKE)}, + { "LIMIT", SYM(LIMIT)}, + { "LINEAR", SYM(LINEAR_SYM)}, + { "LINES", SYM(LINES)}, + { "LIST", SYM(LIST_SYM)}, + { "LOAD", SYM(LOAD)}, + { "LOCAL", SYM(LOCAL_SYM)}, + { "LOCALTIME", SYM(NOW_SYM)}, + { "LOCALTIMESTAMP", SYM(NOW_SYM)}, + { "LOCK", SYM(LOCK_SYM)}, + { "LOCKED", SYM(LOCKED_SYM)}, + { "LOCKS", SYM(LOCKS_SYM)}, + { "LOGFILE", SYM(LOGFILE_SYM)}, + { "LOGS", SYM(LOGS_SYM)}, + { "LONG", SYM(LONG_SYM)}, + { "LONGBLOB", SYM(LONGBLOB)}, + { "LONGTEXT", SYM(LONGTEXT)}, + { "LOOP", SYM(LOOP_SYM)}, + { "LOW_PRIORITY", SYM(LOW_PRIORITY)}, + { "MASTER", SYM(MASTER_SYM)}, + { "MASTER_CONNECT_RETRY", SYM(MASTER_CONNECT_RETRY_SYM)}, + { "MASTER_DELAY", SYM(MASTER_DELAY_SYM)}, + { "MASTER_GTID_POS", SYM(MASTER_GTID_POS_SYM)}, + { "MASTER_HOST", SYM(MASTER_HOST_SYM)}, + { "MASTER_LOG_FILE", SYM(MASTER_LOG_FILE_SYM)}, + { "MASTER_LOG_POS", SYM(MASTER_LOG_POS_SYM)}, + { "MASTER_PASSWORD", SYM(MASTER_PASSWORD_SYM)}, + { "MASTER_PORT", SYM(MASTER_PORT_SYM)}, + { "MASTER_SERVER_ID", SYM(MASTER_SERVER_ID_SYM)}, + { "MASTER_SSL", SYM(MASTER_SSL_SYM)}, + { "MASTER_SSL_CA", SYM(MASTER_SSL_CA_SYM)}, + { "MASTER_SSL_CAPATH",SYM(MASTER_SSL_CAPATH_SYM)}, + { "MASTER_SSL_CERT", SYM(MASTER_SSL_CERT_SYM)}, + { "MASTER_SSL_CIPHER",SYM(MASTER_SSL_CIPHER_SYM)}, + { "MASTER_SSL_CRL", SYM(MASTER_SSL_CRL_SYM)}, + { "MASTER_SSL_CRLPATH",SYM(MASTER_SSL_CRLPATH_SYM)}, + { "MASTER_SSL_KEY", SYM(MASTER_SSL_KEY_SYM)}, + { "MASTER_SSL_VERIFY_SERVER_CERT", SYM(MASTER_SSL_VERIFY_SERVER_CERT_SYM)}, + { "MASTER_USER", SYM(MASTER_USER_SYM)}, + { "MASTER_USE_GTID", SYM(MASTER_USE_GTID_SYM)}, + { "MASTER_DEMOTE_TO_SLAVE", SYM(MASTER_DEMOTE_TO_SLAVE_SYM)}, + { "MASTER_HEARTBEAT_PERIOD", SYM(MASTER_HEARTBEAT_PERIOD_SYM)}, + { "MATCH", SYM(MATCH)}, + { "MAX_CONNECTIONS_PER_HOUR", SYM(MAX_CONNECTIONS_PER_HOUR)}, + { "MAX_QUERIES_PER_HOUR", SYM(MAX_QUERIES_PER_HOUR)}, + { "MAX_ROWS", SYM(MAX_ROWS)}, + { "MAX_SIZE", SYM(MAX_SIZE_SYM)}, + { "MAX_STATEMENT_TIME", SYM(MAX_STATEMENT_TIME_SYM)}, + { "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)}, + { "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)}, + { "MAXVALUE", SYM(MAXVALUE_SYM)}, + { "MEDIUM", SYM(MEDIUM_SYM)}, + { "MEDIUMBLOB", SYM(MEDIUMBLOB)}, + { "MEDIUMINT", SYM(MEDIUMINT)}, + { "MEDIUMTEXT", SYM(MEDIUMTEXT)}, + { "MEMORY", SYM(MEMORY_SYM)}, + { "MERGE", SYM(MERGE_SYM)}, + { "MESSAGE_TEXT", SYM(MESSAGE_TEXT_SYM)}, + { "MICROSECOND", SYM(MICROSECOND_SYM)}, + { "MIDDLEINT", SYM(MEDIUMINT)}, /* For powerbuilder */ + { "MIGRATE", SYM(MIGRATE_SYM)}, + { "MINUS", SYM(MINUS_ORACLE_SYM)}, + { "MINUTE", SYM(MINUTE_SYM)}, + { "MINUTE_MICROSECOND", SYM(MINUTE_MICROSECOND_SYM)}, + { "MINUTE_SECOND", SYM(MINUTE_SECOND_SYM)}, + { "MINVALUE", SYM(MINVALUE_SYM)}, + { "MIN_ROWS", SYM(MIN_ROWS)}, + { "MOD", SYM(MOD_SYM)}, + { "MODE", SYM(MODE_SYM)}, + { "MODIFIES", SYM(MODIFIES_SYM)}, + { "MODIFY", SYM(MODIFY_SYM)}, + { "MONITOR", SYM(MONITOR_SYM)}, + { "MONTH", SYM(MONTH_SYM)}, + { "MUTEX", SYM(MUTEX_SYM)}, + { "MYSQL", SYM(MYSQL_SYM)}, + { "MYSQL_ERRNO", SYM(MYSQL_ERRNO_SYM)}, + { "NAME", SYM(NAME_SYM)}, + { "NAMES", SYM(NAMES_SYM)}, + { "NATIONAL", SYM(NATIONAL_SYM)}, + { "NATURAL", SYM(NATURAL)}, + { "NCHAR", SYM(NCHAR_SYM)}, + { "NESTED", SYM(NESTED_SYM)}, + { "NEVER", SYM(NEVER_SYM)}, + { "NEW", SYM(NEW_SYM)}, + { "NEXT", SYM(NEXT_SYM)}, + { "NEXTVAL", SYM(NEXTVAL_SYM)}, + { "NO", SYM(NO_SYM)}, + { "NOMAXVALUE", SYM(NOMAXVALUE_SYM)}, + { "NOMINVALUE", SYM(NOMINVALUE_SYM)}, + { "NOCACHE", SYM(NOCACHE_SYM)}, + { "NOCYCLE", SYM(NOCYCLE_SYM)}, + { "NO_WAIT", SYM(NO_WAIT_SYM)}, + { "NOWAIT", SYM(NOWAIT_SYM)}, + { "NODEGROUP", SYM(NODEGROUP_SYM)}, + { "NONE", SYM(NONE_SYM)}, + { "NOT", SYM(NOT_SYM)}, + { "NOTFOUND", SYM(NOTFOUND_SYM)}, + { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)}, + { "NULL", SYM(NULL_SYM)}, + { "NUMBER", SYM(NUMBER_MARIADB_SYM)}, + { "NUMERIC", SYM(NUMERIC_SYM)}, + { "NVARCHAR", SYM(NVARCHAR_SYM)}, + { "OF", SYM(OF_SYM)}, + { "OFFSET", SYM(OFFSET_SYM)}, + { "OLD_PASSWORD", SYM(OLD_PASSWORD_SYM)}, + { "ON", SYM(ON)}, + { "ONE", SYM(ONE_SYM)}, + { "ONLINE", SYM(ONLINE_SYM)}, + { "ONLY", SYM(ONLY_SYM)}, + { "OPEN", SYM(OPEN_SYM)}, + { "OPTIMIZE", SYM(OPTIMIZE)}, + { "OPTIONS", SYM(OPTIONS_SYM)}, + { "OPTION", SYM(OPTION)}, + { "OPTIONALLY", SYM(OPTIONALLY)}, + { "OR", SYM(OR_SYM)}, + { "ORDER", SYM(ORDER_SYM)}, + { "ORDINALITY", SYM(ORDINALITY_SYM)}, + { "OTHERS", SYM(OTHERS_MARIADB_SYM)}, + { "OUT", SYM(OUT_SYM)}, + { "OUTER", SYM(OUTER)}, + { "OUTFILE", SYM(OUTFILE)}, + { "OVER", SYM(OVER_SYM)}, + { "OVERLAPS", SYM(OVERLAPS_SYM)}, + { "OWNER", SYM(OWNER_SYM)}, + { "PACKAGE", SYM(PACKAGE_MARIADB_SYM)}, + { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, + { "PAGE", SYM(PAGE_SYM)}, + { "PAGE_CHECKSUM", SYM(PAGE_CHECKSUM_SYM)}, + { "PARSER", SYM(PARSER_SYM)}, + { "PARSE_VCOL_EXPR", SYM(PARSE_VCOL_EXPR_SYM)}, + { "PATH", SYM(PATH_SYM)}, + { "PERIOD", SYM(PERIOD_SYM)}, + { "PARTIAL", SYM(PARTIAL)}, + { "PARTITION", SYM(PARTITION_SYM)}, + { "PARTITIONING", SYM(PARTITIONING_SYM)}, + { "PARTITIONS", SYM(PARTITIONS_SYM)}, + { "PASSWORD", SYM(PASSWORD_SYM)}, + { "PERSISTENT", SYM(PERSISTENT_SYM)}, + { "PHASE", SYM(PHASE_SYM)}, + { "PLUGIN", SYM(PLUGIN_SYM)}, + { "PLUGINS", SYM(PLUGINS_SYM)}, + { "PORT", SYM(PORT_SYM)}, + { "PORTION", SYM(PORTION_SYM)}, + { "PRECEDES", SYM(PRECEDES_SYM)}, + { "PRECEDING", SYM(PRECEDING_SYM)}, + { "PRECISION", SYM(PRECISION)}, + { "PREPARE", SYM(PREPARE_SYM)}, + { "PRESERVE", SYM(PRESERVE_SYM)}, + { "PREV", SYM(PREV_SYM)}, + { "PREVIOUS", SYM(PREVIOUS_SYM)}, + { "PRIMARY", SYM(PRIMARY_SYM)}, + { "PRIVILEGES", SYM(PRIVILEGES)}, + { "PROCEDURE", SYM(PROCEDURE_SYM)}, + { "PROCESS" , SYM(PROCESS)}, + { "PROCESSLIST", SYM(PROCESSLIST_SYM)}, + { "PROFILE", SYM(PROFILE_SYM)}, + { "PROFILES", SYM(PROFILES_SYM)}, + { "PROXY", SYM(PROXY_SYM)}, + { "PURGE", SYM(PURGE)}, + { "QUARTER", SYM(QUARTER_SYM)}, + { "QUERY", SYM(QUERY_SYM)}, + { "QUICK", SYM(QUICK)}, + { "RAISE", SYM(RAISE_MARIADB_SYM)}, + { "RANGE", SYM(RANGE_SYM)}, + { "RAW", SYM(RAW_MARIADB_SYM)}, + { "READ", SYM(READ_SYM)}, + { "READ_ONLY", SYM(READ_ONLY_SYM)}, + { "READ_WRITE", SYM(READ_WRITE_SYM)}, + { "READS", SYM(READS_SYM)}, + { "REAL", SYM(REAL)}, + { "REBUILD", SYM(REBUILD_SYM)}, + { "RECOVER", SYM(RECOVER_SYM)}, + { "RECURSIVE", SYM(RECURSIVE_SYM)}, + { "REDO_BUFFER_SIZE", SYM(REDO_BUFFER_SIZE_SYM)}, + { "REDOFILE", SYM(REDOFILE_SYM)}, + { "REDUNDANT", SYM(REDUNDANT_SYM)}, + { "REFERENCES", SYM(REFERENCES)}, + { "REGEXP", SYM(REGEXP)}, + { "RELAY", SYM(RELAY)}, + { "RELAYLOG", SYM(RELAYLOG_SYM)}, + { "RELAY_LOG_FILE", SYM(RELAY_LOG_FILE_SYM)}, + { "RELAY_LOG_POS", SYM(RELAY_LOG_POS_SYM)}, + { "RELAY_THREAD", SYM(RELAY_THREAD)}, + { "RELEASE", SYM(RELEASE_SYM)}, + { "RELOAD", SYM(RELOAD)}, + { "REMOVE", SYM(REMOVE_SYM)}, + { "RENAME", SYM(RENAME)}, + { "REORGANIZE", SYM(REORGANIZE_SYM)}, + { "REPAIR", SYM(REPAIR)}, + { "REPEATABLE", SYM(REPEATABLE_SYM)}, + { "REPLACE", SYM(REPLACE)}, + { "REPLAY", SYM(REPLAY_SYM)}, + { "REPLICA", SYM(SLAVE)}, + { "REPLICAS", SYM(SLAVES)}, + { "REPLICA_POS", SYM(SLAVE_POS_SYM)}, + { "REPLICATION", SYM(REPLICATION)}, + { "REPEAT", SYM(REPEAT_SYM)}, + { "REQUIRE", SYM(REQUIRE_SYM)}, + { "RESET", SYM(RESET_SYM)}, + { "RESIGNAL", SYM(RESIGNAL_SYM)}, + { "RESTART", SYM(RESTART_SYM)}, + { "RESTORE", SYM(RESTORE_SYM)}, + { "RESTRICT", SYM(RESTRICT)}, + { "RESUME", SYM(RESUME_SYM)}, + { "RETURNED_SQLSTATE",SYM(RETURNED_SQLSTATE_SYM)}, + { "RETURN", SYM(RETURN_MARIADB_SYM)}, + { "RETURNING", SYM(RETURNING_SYM)}, + { "RETURNS", SYM(RETURNS_SYM)}, + { "REUSE", SYM(REUSE_SYM)}, + { "REVERSE", SYM(REVERSE_SYM)}, + { "REVOKE", SYM(REVOKE)}, + { "RIGHT", SYM(RIGHT)}, + { "RLIKE", SYM(REGEXP)}, /* Like in mSQL2 */ + { "ROLE", SYM(ROLE_SYM)}, + { "ROLLBACK", SYM(ROLLBACK_SYM)}, + { "ROLLUP", SYM(ROLLUP_SYM)}, + { "ROUTINE", SYM(ROUTINE_SYM)}, + { "ROW", SYM(ROW_SYM)}, + { "ROWCOUNT", SYM(ROWCOUNT_SYM)}, /* Oracle-N */ + { "ROWNUM", SYM(ROWNUM_SYM)}, /* Oracle-R */ + { "ROWS", SYM(ROWS_SYM)}, + { "ROWTYPE", SYM(ROWTYPE_MARIADB_SYM)}, + { "ROW_COUNT", SYM(ROW_COUNT_SYM)}, + { "ROW_FORMAT", SYM(ROW_FORMAT_SYM)}, + /** sql_function and condition_property_name for GET DIAGNOSTICS */ + { "ROW_NUMBER", SYM(ROW_NUMBER_SYM)}, + { "RTREE", SYM(RTREE_SYM)}, + { "SAVEPOINT", SYM(SAVEPOINT_SYM)}, + { "SCHEDULE", SYM(SCHEDULE_SYM)}, + { "SCHEMA", SYM(DATABASE)}, + { "SCHEMA_NAME", SYM(SCHEMA_NAME_SYM)}, + { "SCHEMAS", SYM(DATABASES)}, + { "SECOND", SYM(SECOND_SYM)}, + { "SECOND_MICROSECOND", SYM(SECOND_MICROSECOND_SYM)}, + { "SECURITY", SYM(SECURITY_SYM)}, + { "SELECT", SYM(SELECT_SYM)}, + { "SENSITIVE", SYM(SENSITIVE_SYM)}, + { "SEPARATOR", SYM(SEPARATOR_SYM)}, + { "SEQUENCE", SYM(SEQUENCE_SYM)}, + { "SERIAL", SYM(SERIAL_SYM)}, + { "SERIALIZABLE", SYM(SERIALIZABLE_SYM)}, + { "SESSION", SYM(SESSION_SYM)}, + { "SERVER", SYM(SERVER_SYM)}, + { "SET", SYM(SET)}, + { "SETVAL", SYM(SETVAL_SYM)}, + { "SHARE", SYM(SHARE_SYM)}, + { "SHOW", SYM(SHOW)}, + { "SHUTDOWN", SYM(SHUTDOWN)}, + { "SIGNAL", SYM(SIGNAL_SYM)}, + { "SIGNED", SYM(SIGNED_SYM)}, + { "SIMPLE", SYM(SIMPLE_SYM)}, + { "SKIP", SYM(SKIP_SYM)}, + { "SLAVE", SYM(SLAVE)}, + { "SLAVES", SYM(SLAVES)}, + { "SLAVE_POS", SYM(SLAVE_POS_SYM)}, + { "SLOW", SYM(SLOW)}, + { "SNAPSHOT", SYM(SNAPSHOT_SYM)}, + { "SMALLINT", SYM(SMALLINT)}, + { "SOCKET", SYM(SOCKET_SYM)}, + { "SOFT", SYM(SOFT_SYM)}, + { "SOME", SYM(ANY_SYM)}, + { "SONAME", SYM(SONAME_SYM)}, + { "SOUNDS", SYM(SOUNDS_SYM)}, + { "SOURCE", SYM(SOURCE_SYM)}, + { "STAGE", SYM(STAGE_SYM)}, + { "STORED", SYM(STORED_SYM)}, + { "SPATIAL", SYM(SPATIAL_SYM)}, + { "SPECIFIC", SYM(SPECIFIC_SYM)}, + { "REF_SYSTEM_ID", SYM(REF_SYSTEM_ID_SYM)}, + { "SQL", SYM(SQL_SYM)}, + { "SQLEXCEPTION", SYM(SQLEXCEPTION_SYM)}, + { "SQLSTATE", SYM(SQLSTATE_SYM)}, + { "SQLWARNING", SYM(SQLWARNING_SYM)}, + { "SQL_BIG_RESULT", SYM(SQL_BIG_RESULT)}, + { "SQL_BUFFER_RESULT", SYM(SQL_BUFFER_RESULT)}, + { "SQL_CACHE", SYM(SQL_CACHE_SYM)}, + { "SQL_CALC_FOUND_ROWS", SYM(SQL_CALC_FOUND_ROWS)}, + { "SQL_NO_CACHE", SYM(SQL_NO_CACHE_SYM)}, + { "SQL_SMALL_RESULT", SYM(SQL_SMALL_RESULT)}, + { "SQL_THREAD", SYM(SQL_THREAD)}, + { "SQL_TSI_SECOND", SYM(SECOND_SYM)}, + { "SQL_TSI_MINUTE", SYM(MINUTE_SYM)}, + { "SQL_TSI_HOUR", SYM(HOUR_SYM)}, + { "SQL_TSI_DAY", SYM(DAY_SYM)}, + { "SQL_TSI_WEEK", SYM(WEEK_SYM)}, + { "SQL_TSI_MONTH", SYM(MONTH_SYM)}, + { "SQL_TSI_QUARTER", SYM(QUARTER_SYM)}, + { "SQL_TSI_YEAR", SYM(YEAR_SYM)}, + { "SSL", SYM(SSL_SYM)}, + { "START", SYM(START_SYM)}, + { "STARTING", SYM(STARTING)}, + { "STARTS", SYM(STARTS_SYM)}, + { "STATEMENT", SYM(STATEMENT_SYM)}, + { "STATS_AUTO_RECALC",SYM(STATS_AUTO_RECALC_SYM)}, + { "STATS_PERSISTENT", SYM(STATS_PERSISTENT_SYM)}, + { "STATS_SAMPLE_PAGES",SYM(STATS_SAMPLE_PAGES_SYM)}, + { "STATUS", SYM(STATUS_SYM)}, + { "STOP", SYM(STOP_SYM)}, + { "STORAGE", SYM(STORAGE_SYM)}, + { "STRAIGHT_JOIN", SYM(STRAIGHT_JOIN)}, + { "STRING", SYM(STRING_SYM)}, + { "SUBCLASS_ORIGIN", SYM(SUBCLASS_ORIGIN_SYM)}, + { "SUBJECT", SYM(SUBJECT_SYM)}, + { "SUBPARTITION", SYM(SUBPARTITION_SYM)}, + { "SUBPARTITIONS", SYM(SUBPARTITIONS_SYM)}, + { "SUPER", SYM(SUPER_SYM)}, + { "SUSPEND", SYM(SUSPEND_SYM)}, + { "SWAPS", SYM(SWAPS_SYM)}, + { "SWITCHES", SYM(SWITCHES_SYM)}, + { "SYSDATE", SYM(SYSDATE)}, + { "SYSTEM", SYM(SYSTEM)}, + { "SYSTEM_TIME", SYM(SYSTEM_TIME_SYM)}, + { "TABLE", SYM(TABLE_SYM)}, + { "TABLE_NAME", SYM(TABLE_NAME_SYM)}, + { "TABLES", SYM(TABLES)}, + { "TABLESPACE", SYM(TABLESPACE)}, + { "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)}, + { "TEMPORARY", SYM(TEMPORARY)}, + { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, + { "TERMINATED", SYM(TERMINATED)}, + { "TEXT", SYM(TEXT_SYM)}, + { "THAN", SYM(THAN_SYM)}, + { "THEN", SYM(THEN_SYM)}, + { "TIES", SYM(TIES_SYM)}, + { "TIME", SYM(TIME_SYM)}, + { "TIMESTAMP", SYM(TIMESTAMP)}, + { "TIMESTAMPADD", SYM(TIMESTAMP_ADD)}, + { "TIMESTAMPDIFF", SYM(TIMESTAMP_DIFF)}, + { "TINYBLOB", SYM(TINYBLOB)}, + { "TINYINT", SYM(TINYINT)}, + { "TINYTEXT", SYM(TINYTEXT)}, + { "TO", SYM(TO_SYM)}, + { "TRAILING", SYM(TRAILING)}, + { "TRANSACTION", SYM(TRANSACTION_SYM)}, + { "TRANSACTIONAL", SYM(TRANSACTIONAL_SYM)}, + { "THREADS", SYM(THREADS_SYM)}, + { "TRIGGER", SYM(TRIGGER_SYM)}, + { "TRIGGERS", SYM(TRIGGERS_SYM)}, + { "TRUE", SYM(TRUE_SYM)}, + { "TRUNCATE", SYM(TRUNCATE_SYM)}, + { "TYPE", SYM(TYPE_SYM)}, + { "TYPES", SYM(TYPES_SYM)}, + { "UNBOUNDED", SYM(UNBOUNDED_SYM)}, + { "UNCOMMITTED", SYM(UNCOMMITTED_SYM)}, + { "UNDEFINED", SYM(UNDEFINED_SYM)}, + { "UNDO_BUFFER_SIZE", SYM(UNDO_BUFFER_SIZE_SYM)}, + { "UNDOFILE", SYM(UNDOFILE_SYM)}, + { "UNDO", SYM(UNDO_SYM)}, + { "UNICODE", SYM(UNICODE_SYM)}, + { "UNION", SYM(UNION_SYM)}, + { "UNIQUE", SYM(UNIQUE_SYM)}, + { "UNKNOWN", SYM(UNKNOWN_SYM)}, + { "UNLOCK", SYM(UNLOCK_SYM)}, + { "UNINSTALL", SYM(UNINSTALL_SYM)}, + { "UNSIGNED", SYM(UNSIGNED)}, + { "UNTIL", SYM(UNTIL_SYM)}, + { "UPDATE", SYM(UPDATE_SYM)}, + { "UPGRADE", SYM(UPGRADE_SYM)}, + { "USAGE", SYM(USAGE)}, + { "USE", SYM(USE_SYM)}, + { "USER", SYM(USER_SYM)}, + { "USER_RESOURCES", SYM(RESOURCES)}, + { "USE_FRM", SYM(USE_FRM)}, + { "USING", SYM(USING)}, + { "UTC_DATE", SYM(UTC_DATE_SYM)}, + { "UTC_TIME", SYM(UTC_TIME_SYM)}, + { "UTC_TIMESTAMP", SYM(UTC_TIMESTAMP_SYM)}, + { "VALUE", SYM(VALUE_SYM)}, + { "VALUES", SYM(VALUES)}, + { "VARBINARY", SYM(VARBINARY)}, + { "VARCHAR", SYM(VARCHAR)}, + { "VARCHARACTER", SYM(VARCHAR)}, + { "VARCHAR2", SYM(VARCHAR2_MARIADB_SYM)}, + { "VARIABLES", SYM(VARIABLES)}, + { "VARYING", SYM(VARYING)}, + { "VIA", SYM(VIA_SYM)}, + { "VIEW", SYM(VIEW_SYM)}, + { "VIRTUAL", SYM(VIRTUAL_SYM)}, + { "VISIBLE", SYM(VISIBLE_SYM)}, + { "VERSIONING", SYM(VERSIONING_SYM)}, + { "WAIT", SYM(WAIT_SYM)}, + { "WARNINGS", SYM(WARNINGS)}, + { "WEEK", SYM(WEEK_SYM)}, + { "WEIGHT_STRING", SYM(WEIGHT_STRING_SYM)}, + { "WHEN", SYM(WHEN_SYM)}, + { "WHERE", SYM(WHERE)}, + { "WHILE", SYM(WHILE_SYM)}, + { "WINDOW", SYM(WINDOW_SYM)}, + { "WITH", SYM(WITH)}, + { "WITHIN", SYM(WITHIN)}, + { "WITHOUT", SYM(WITHOUT)}, + { "WORK", SYM(WORK_SYM)}, + { "WRAPPER", SYM(WRAPPER_SYM)}, + { "WRITE", SYM(WRITE_SYM)}, + { "X509", SYM(X509_SYM)}, + { "XOR", SYM(XOR)}, + { "XA", SYM(XA_SYM)}, + { "XML", SYM(XML_SYM)}, /* LOAD XML Arnold/Erik */ + { "YEAR", SYM(YEAR_SYM)}, + { "YEAR_MONTH", SYM(YEAR_MONTH_SYM)}, + { "ZEROFILL", SYM(ZEROFILL)}, + { "||", SYM(OR2_SYM)} +}; + + +SYMBOL sql_functions[] = { + { "ADDDATE", SYM(ADDDATE_SYM)}, + { "ADD_MONTHS", SYM(ADD_MONTHS_SYM)}, + { "BIT_AND", SYM(BIT_AND)}, + { "BIT_OR", SYM(BIT_OR)}, + { "BIT_XOR", SYM(BIT_XOR)}, + { "CAST", SYM(CAST_SYM)}, + { "COUNT", SYM(COUNT_SYM)}, + { "CUME_DIST", SYM(CUME_DIST_SYM)}, + { "CURDATE", SYM(CURDATE)}, + { "CURTIME", SYM(CURTIME)}, + { "DATE_ADD", SYM(DATE_ADD_INTERVAL)}, + { "DATE_SUB", SYM(DATE_SUB_INTERVAL)}, + { "DATE_FORMAT", SYM(DATE_FORMAT_SYM)}, + { "DENSE_RANK", SYM(DENSE_RANK_SYM)}, + { "EXTRACT", SYM(EXTRACT_SYM)}, + { "FIRST_VALUE", SYM(FIRST_VALUE_SYM)}, + { "GROUP_CONCAT", SYM(GROUP_CONCAT_SYM)}, + { "JSON_ARRAYAGG", SYM(JSON_ARRAYAGG_SYM)}, + { "JSON_OBJECTAGG", SYM(JSON_OBJECTAGG_SYM)}, + { "LAG", SYM(LAG_SYM)}, + { "LEAD", SYM(LEAD_SYM)}, + { "MAX", SYM(MAX_SYM)}, + { "MEDIAN", SYM(MEDIAN_SYM)}, + { "MID", SYM(SUBSTRING)}, /* unireg function */ + { "MIN", SYM(MIN_SYM)}, + { "NOW", SYM(NOW_SYM)}, + { "NTH_VALUE", SYM(NTH_VALUE_SYM)}, + { "NTILE", SYM(NTILE_SYM)}, + { "POSITION", SYM(POSITION_SYM)}, + { "PERCENT_RANK", SYM(PERCENT_RANK_SYM)}, + { "PERCENTILE_CONT", SYM(PERCENTILE_CONT_SYM)}, + { "PERCENTILE_DISC", SYM(PERCENTILE_DISC_SYM)}, + { "RANK", SYM(RANK_SYM)}, + { "SESSION_USER", SYM(USER_SYM)}, + { "STD", SYM(STD_SYM)}, + { "STDDEV", SYM(STD_SYM)}, + { "STDDEV_POP", SYM(STD_SYM)}, + { "STDDEV_SAMP", SYM(STDDEV_SAMP_SYM)}, + { "SUBDATE", SYM(SUBDATE_SYM)}, + { "SUBSTR", SYM(SUBSTRING)}, + { "SUBSTRING", SYM(SUBSTRING)}, + { "SUM", SYM(SUM_SYM)}, + { "SYSTEM_USER", SYM(USER_SYM)}, + { "TRIM", SYM(TRIM)}, + { "TRIM_ORACLE", SYM(TRIM_ORACLE)}, + { "VARIANCE", SYM(VARIANCE_SYM)}, + { "VAR_POP", SYM(VARIANCE_SYM)}, + { "VAR_SAMP", SYM(VAR_SAMP_SYM)}, +}; + +size_t symbols_length= sizeof(symbols) / sizeof(SYMBOL); +size_t sql_functions_length= sizeof(sql_functions) / sizeof(SYMBOL); + +#endif /* LEX_INCLUDED */ diff --git a/sql/lex_charset.cc b/sql/lex_charset.cc new file mode 100644 index 00000000..cfb74a0b --- /dev/null +++ b/sql/lex_charset.cc @@ -0,0 +1,775 @@ +/* Copyright (c) 2021, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "my_global.h" +#include "my_sys.h" +#include "m_ctype.h" +#include "lex_charset.h" +#include "mysqld_error.h" + + +static void +raise_ER_CONFLICTING_DECLARATIONS(const char *clause1, + const char *name1, + const char *clause2, + const char *name2, + bool reverse_order) +{ + if (!reverse_order) + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + clause1, name1, clause2, name2); + else + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + clause2, name2, clause1, name1); +} + + +static void +raise_ER_CONFLICTING_DECLARATIONS(const char *clause1, + const char *name1, + const char *name1_part2, + const char *clause2, + const char *name2, + bool reverse_order) +{ + char def[MY_CS_CHARACTER_SET_NAME_SIZE * 2]; + my_snprintf(def, sizeof(def), "%s (%s)", name1, name1_part2); + raise_ER_CONFLICTING_DECLARATIONS(clause1, def, + clause2, name2, + reverse_order); +} + + +bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const +{ + if (m_ci == rhs.m_ci) + return false; + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "CHARACTER SET ", m_ci->cs_name.str, + "CHARACTER SET ", rhs.m_ci->cs_name.str); + return true; +} + + +bool Lex_exact_charset:: + raise_if_not_applicable(const Lex_exact_collation &cl) const +{ + return Lex_exact_charset_opt_extended_collate(m_ci, false). + raise_if_not_applicable(cl); +} + + +bool Lex_exact_charset_opt_extended_collate:: + raise_if_charsets_differ(const Lex_exact_charset &cs) const +{ + if (!my_charset_same(m_ci, cs.charset_info())) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "CHARACTER SET ", m_ci->cs_name.str, + "CHARACTER SET ", cs.charset_info()->cs_name.str); + return true; + } + return false; +} + + +bool Lex_exact_charset_opt_extended_collate:: + raise_if_not_applicable(const Lex_exact_collation &cl) const +{ + if (!my_charset_same(m_ci, cl.charset_info())) + { + my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), + cl.charset_info()->coll_name.str, m_ci->cs_name.str); + return true; + } + return false; +} + + +bool +Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const +{ + if (m_ci != cl.m_ci) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "COLLATE ", m_ci->coll_name.str, + "COLLATE ", cl.m_ci->coll_name.str); + return true; + } + return false; +} + + +/* + Merge an exact collation and a contexual collation. + @param cl - The contextual collation to merge to "this". + @param reverse_order - If the contextual collation is on the left side + + Use reverse_order as follows: + false: COLLATE latin1_swedish_ci COLLATE DEFAULT + true: COLLATE DEFAULT COLLATE latin1_swedish_ci +*/ +bool +Lex_exact_collation:: + raise_if_conflicts_with_context_collation(const Lex_context_collation &cl, + bool reverse_order) const +{ + if (cl.is_contextually_typed_collate_default()) + { + if (!(m_ci->state & MY_CS_PRIMARY)) + { + raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str, + "COLLATE ", "DEFAULT", reverse_order); + return true; + } + return false; + } + + if (cl.is_contextually_typed_binary_style()) + { + if (!(m_ci->state & MY_CS_BINSORT)) + { + raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str, + "", "BINARY", reverse_order); + return true; + } + return false; + } + + DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str, + STRING_WITH_LEN("utf8mb4_uca1400_"))); + + Charset_loader_server loader; + CHARSET_INFO *ci= loader.get_exact_collation_by_context_name( + m_ci, + cl.collation_name_context_suffix().str, + MYF(0)); + if (m_ci != ci) + { + raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", + m_ci->coll_name.str, + "COLLATE ", + cl.collation_name_for_show().str, + reverse_order); + return true; + } + return false; +} + + +bool +Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const +{ + /* + Only equal context collations are possible here so far: + - Column grammar only supports BINARY, but does not support COLLATE DEFAULT + - DB/Table grammar only support COLLATE DEFAULT + */ + if (m_ci != cl.m_ci) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + is_contextually_typed_binary_style() ? "" : "COLLATE ", + collation_name_for_show().str, + cl.is_contextually_typed_binary_style() ? "" : "COLLATE ", + cl.collation_name_for_show().str); + return true; + } + return false; +} + + +/* + Resolve a context collation to the character set (when the former gets known): + CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1; + CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1; +*/ +bool Lex_exact_charset_opt_extended_collate:: + merge_context_collation_override(const Lex_context_collation &cl) +{ + DBUG_ASSERT(m_ci); + + // CHAR(10) BINARY + if (cl.is_contextually_typed_binary_style()) + { + CHARSET_INFO *ci= find_bin_collation(); + if (!ci) + return true; + m_ci= ci; + m_with_collate= true; + return false; + } + + // COLLATE DEFAULT + if (cl.is_contextually_typed_collate_default()) + { + CHARSET_INFO *ci= find_default_collation(); + DBUG_ASSERT(ci); + if (!ci) + return true; + m_ci= ci; + m_with_collate= true; + return false; + } + + DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str, + STRING_WITH_LEN("utf8mb4_uca1400_"))); + + CHARSET_INFO *ci= Charset_loader_server(). + get_exact_collation_by_context_name_or_error(m_ci, + cl.charset_info()->coll_name.str + 8, MYF(0)); + if (!ci) + return true; + m_ci= ci; + m_with_collate= true; + return false; +} + + +bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs) +{ + switch (m_type) { + case TYPE_EXACT: + { + // COLLATE latin1_swedish_ci .. CHARACTER SET latin1 + return cs.raise_if_not_applicable(Lex_exact_collation(m_ci)); + } + case TYPE_CONTEXTUALLY_TYPED: + { + // COLLATE DEFAULT .. CHARACTER SET latin1 + Lex_exact_charset_opt_extended_collate tmp(cs); + if (tmp.merge_context_collation(Lex_context_collation(m_ci))) + return true; + *this= Lex_extended_collation(tmp.collation()); + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +bool Lex_extended_collation_st:: + merge_exact_collation(const Lex_exact_collation &rhs) +{ + switch (m_type) { + + case TYPE_EXACT: + /* + EXACT + EXACT + COLLATE latin1_bin .. COLLATE latin1_bin + */ + return Lex_exact_collation(m_ci).raise_if_not_equal(rhs); + + case TYPE_CONTEXTUALLY_TYPED: + { + /* + CONTEXT + EXACT + CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci + CHAR(10) BINARY .. COLLATE latin1_bin + CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin + */ + if (rhs.raise_if_conflicts_with_context_collation( + Lex_context_collation(m_ci), true)) + return true; + *this= Lex_extended_collation(rhs); + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +bool Lex_extended_collation_st:: + raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs) + const +{ + switch (m_type) { + + case TYPE_EXACT: + /* + EXACT + CONTEXT + COLLATE latin1_swedish_ci .. COLLATE DEFAULT + */ + return Lex_exact_collation(m_ci). + raise_if_conflicts_with_context_collation(rhs, false); + + case TYPE_CONTEXTUALLY_TYPED: + { + /* + CONTEXT + CONTEXT: + CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser + CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT; + */ + return Lex_context_collation(m_ci).raise_if_not_equal(rhs); + } + } + DBUG_ASSERT(0); + return false; +} + + +/* + Merge two non-empty COLLATE clauses. +*/ +bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs) +{ + switch (rhs.type()) { + case TYPE_EXACT: + /* + EXACT + EXACT + COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci + + CONTEXT + EXACT + COLLATE DEFAULT .. COLLATE latin1_swedish_ci + CHAR(10) BINARY .. COLLATE latin1_bin + */ + return merge_exact_collation(Lex_exact_collation(rhs.m_ci)); + case TYPE_CONTEXTUALLY_TYPED: + /* + EXACT + CONTEXT + COLLATE latin1_swedish_ci .. COLLATE DEFAULT + + CONTEXT + CONTEXT + COLLATE DEFAULT .. COLLATE DEFAULT + CHAR(10) BINARY .. COLLATE DEFAULT + */ + return raise_if_conflicts_with_context_collation( + Lex_context_collation(rhs.m_ci)); + } + DBUG_ASSERT(0); + return false; +} + + +LEX_CSTRING Lex_context_collation::collation_name_for_show() const +{ + if (is_contextually_typed_collate_default()) + return LEX_CSTRING({STRING_WITH_LEN("DEFAULT")}); + if (is_contextually_typed_binary_style()) + return LEX_CSTRING({STRING_WITH_LEN("BINARY")}); + return collation_name_context_suffix(); +} + + +bool Lex_extended_collation_st::set_by_name(const char *name, myf my_flags) +{ + Charset_loader_server loader; + CHARSET_INFO *cs; + + if (!strncasecmp(name, STRING_WITH_LEN("uca1400_"))) + { + if (!(cs= loader.get_context_collation_or_error(name, my_flags))) + return true; + + *this= Lex_extended_collation(Lex_context_collation(cs)); + return false; + } + + if (!(cs= loader.get_exact_collation_or_error(name, my_flags))) + return true; + + *this= Lex_extended_collation(Lex_exact_collation(cs)); + return false; +} + + +/** find a collation with binary comparison rules +*/ +CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const +{ + /* + We don't need to handle old_mode=UTF8_IS_UTF8MB3 here, + because "m_ci" points to a real character set name. + It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8". + No thd->get_utf8_flag() flag passed to get_charset_by_csname(). + */ + DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4)); + /* + CREATE TABLE t1 (a CHAR(10) BINARY) + CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + Nothing to do, we have the binary collation already. + */ + if (m_ci->state & MY_CS_BINSORT) + return m_ci; + + // CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4; + CHARSET_INFO *cs; + if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0)))) + { + char tmp[65]; + strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL); + my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp); + } + return cs; +} + + +CHARSET_INFO * +Lex_exact_charset_opt_extended_collate::find_default_collation() const +{ + // See comments in find_bin_collation() + DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4)); + /* + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4; + Nothing to do, we have the default collation already. + */ + if (m_ci->state & MY_CS_PRIMARY) + return m_ci; + /* + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) + CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + + Don't need to handle old_mode=UTF8_IS_UTF8MB3 here. + See comments in find_bin_collation. + */ + CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str, + MY_CS_PRIMARY, MYF(MY_WME)); + /* + The above should never fail, as we have default collations for + all character sets. + */ + DBUG_ASSERT(cs); + return cs; +} + + +/* + Resolve an empty or a contextually typed collation according to the + upper level default character set (and optionally a collation), e.g.: + CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1; + CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1; + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) + CHARACTER SET latin1 COLLATE latin1_bin; + + "this" is the COLLATE clause (e.g. of a column) + "def" is the upper level CHARACTER SET clause (e.g. of a table) +*/ +CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: + resolved_to_character_set(CHARSET_INFO *def) const +{ + DBUG_ASSERT(def); + + switch (m_type) { + case TYPE_EMPTY: + return def; + case TYPE_CHARACTER_SET: + case TYPE_CHARACTER_SET_COLLATE_EXACT: + case TYPE_COLLATE_EXACT: + DBUG_ASSERT(m_ci); + return m_ci; + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + { + Lex_exact_charset_opt_extended_collate tmp(def, true); + if (tmp.merge_context_collation_override(Lex_context_collation(m_ci))) + return NULL; + return tmp.collation().charset_info(); + } + } + DBUG_ASSERT(0); + return NULL; +} + + +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_exact_collation(const Lex_exact_collation &cl) +{ + switch (m_type) { + case TYPE_EMPTY: + /* + No CHARACTER SET clause + CHAR(10) NOT NULL COLLATE latin1_bin + */ + *this= Lex_exact_charset_extended_collation_attrs(cl); + return false; + case TYPE_CHARACTER_SET: + { + // CHARACTER SET latin1 .. COLLATE latin1_swedish_ci + Lex_exact_charset_opt_extended_collate tmp(m_ci, false); + if (tmp.merge_exact_collation(cl)) + return true; + *this= Lex_exact_charset_extended_collation_attrs(tmp); + return false; + } + case TYPE_CHARACTER_SET_COLLATE_EXACT: + case TYPE_COLLATE_EXACT: + { + // [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin + return Lex_exact_collation(m_ci).raise_if_not_equal(cl); + } + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + { + // COLLATE DEFAULT .. COLLATE latin1_swedish_ci + if (cl.raise_if_conflicts_with_context_collation( + Lex_context_collation(m_ci), true)) + return true; + *this= Lex_exact_charset_extended_collation_attrs(cl); + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_context_collation(const Lex_context_collation &cl) +{ + switch (m_type) { + case TYPE_EMPTY: + /* + No CHARACTER SET clause + CHAR(10) NOT NULL .. COLLATE DEFAULT + */ + *this= Lex_exact_charset_extended_collation_attrs(cl); + return false; + case TYPE_CHARACTER_SET: + { + // CHARACTER SET latin1 .. COLLATE DEFAULT + Lex_exact_charset_opt_extended_collate tmp(m_ci, false); + if (tmp.merge_context_collation(cl)) + return true; + *this= Lex_exact_charset_extended_collation_attrs(tmp); + return false; + } + case TYPE_CHARACTER_SET_COLLATE_EXACT: + case TYPE_COLLATE_EXACT: + // [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT + return Lex_exact_collation(m_ci). + raise_if_conflicts_with_context_collation(cl, false); + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + // COLLATE DEFAULT .. COLLATE DEFAULT + return Lex_context_collation(m_ci).raise_if_not_equal(cl); + } + + DBUG_ASSERT(0); + return false; +} + + +bool Lex_exact_charset_opt_extended_collate:: + merge_exact_collation(const Lex_exact_collation &cl) +{ + // CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin + if (m_with_collate) + return Lex_exact_collation(m_ci).raise_if_not_equal(cl); + return merge_exact_collation_override(cl); +} + + +bool Lex_exact_charset_opt_extended_collate:: + merge_exact_collation_override(const Lex_exact_collation &cl) +{ + // CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin + if (raise_if_not_applicable(cl)) + return true; + *this= Lex_exact_charset_opt_extended_collate(cl); + return false; +} + + +bool Lex_exact_charset_opt_extended_collate:: + merge_context_collation(const Lex_context_collation &cl) +{ + // CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT + if (m_with_collate) + return Lex_exact_collation(m_ci). + raise_if_conflicts_with_context_collation(cl, false); + return merge_context_collation_override(cl); +} + + +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_collation(const Lex_extended_collation_st &cl) +{ + switch (cl.type()) { + case Lex_extended_collation_st::TYPE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return merge_context_collation(Lex_context_collation(cl.charset_info())); + } + DBUG_ASSERT(0); + return false; +} + + +/* + Mix an unordered combination of CHARACTER SET and COLLATE clauses + (i.e. COLLATE can come before CHARACTER SET). + Merge a CHARACTER SET clause. + @param cs - The "CHARACTER SET exact_charset_name". +*/ +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_exact_charset(const Lex_exact_charset &cs) +{ + DBUG_ASSERT(cs.charset_info()); + + switch (m_type) { + case TYPE_EMPTY: + // CHARACTER SET cs + *this= Lex_exact_charset_extended_collation_attrs(cs); + return false; + + case TYPE_CHARACTER_SET: + // CHARACTER SET cs1 .. CHARACTER SET cs2 + return Lex_exact_charset(m_ci).raise_if_not_equal(cs); + + case TYPE_COLLATE_EXACT: + // COLLATE latin1_bin .. CHARACTER SET cs + if (cs.raise_if_not_applicable(Lex_exact_collation(m_ci))) + return true; + m_type= TYPE_CHARACTER_SET_COLLATE_EXACT; + return false; + + case TYPE_CHARACTER_SET_COLLATE_EXACT: + // CHARACTER SET cs1 COLLATE cl .. CHARACTER SET cs2 + return Lex_exact_charset_opt_extended_collate(m_ci, true). + raise_if_charsets_differ(cs); + + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + // COLLATE DEFAULT .. CHARACTER SET cs + { + Lex_exact_charset_opt_extended_collate tmp(cs); + if (tmp.merge_context_collation(Lex_context_collation(m_ci))) + return true; + *this= Lex_exact_charset_extended_collation_attrs(tmp); + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default() +{ + if (m_charset_order == CHARSET_TYPE_EMPTY) + m_charset_order= CHARSET_TYPE_CONTEXT; + Lex_opt_context_charset_st::merge_charset_default(); + return false; +} + + +bool Lex_extended_charset_extended_collation_attrs_st:: + merge_exact_charset(const Lex_exact_charset &cs) +{ + if (m_charset_order == CHARSET_TYPE_EMPTY) + m_charset_order= CHARSET_TYPE_EXACT; + return Lex_exact_charset_extended_collation_attrs_st::merge_exact_charset(cs); +} + + +bool Lex_extended_charset_extended_collation_attrs_st:: + raise_if_charset_conflicts_with_default( + const Lex_exact_charset_opt_extended_collate &def) const +{ + DBUG_ASSERT(m_charset_order != CHARSET_TYPE_EMPTY || is_empty()); + if (!my_charset_same(def.collation().charset_info(), m_ci)) + { + raise_ER_CONFLICTING_DECLARATIONS("CHARACTER SET ", "DEFAULT", + def.collation().charset_info()->cs_name.str, + "CHARACTER SET ", m_ci->cs_name.str, + m_charset_order == CHARSET_TYPE_EXACT); + return true; + } + return false; +} + + +CHARSET_INFO * +Lex_extended_charset_extended_collation_attrs_st:: + resolved_to_context(const Charset_collation_context &ctx) const +{ + if (Lex_opt_context_charset_st::is_empty()) + { + // Without CHARACTER SET DEFAULT + return Lex_exact_charset_extended_collation_attrs_st:: + resolved_to_character_set(ctx.collate_default().charset_info()); + } + + // With CHARACTER SET DEFAULT + switch (type()) { + case TYPE_EMPTY: + // CHARACTER SET DEFAULT; + return ctx.charset_default().charset().charset_info(); + + case TYPE_CHARACTER_SET: + // CHARACTER SET DEFAULT CHARACTER SET cs_exact + if (raise_if_charset_conflicts_with_default(ctx.charset_default())) + { + /* + A possible scenario: + SET character_set_server=utf8mb4; + CREATE DATABASE db1 CHARACTER SET latin1 CHARACTER SET DEFAULT; + */ + return NULL; + } + return m_ci; + + case TYPE_CHARACTER_SET_COLLATE_EXACT: + case TYPE_COLLATE_EXACT: + { + /* + CREATE DATABASE db1 + COLLATE cl_exact + [ CHARACTER SET cs_exact ] + CHARACTER SET DEFAULT; + */ + if (m_type == TYPE_CHARACTER_SET_COLLATE_EXACT && + raise_if_charset_conflicts_with_default(ctx.charset_default())) + { + /* + A possible scenario: + SET character_set_server=utf8mb4; + CREATE DATABASE db1 + COLLATE latin1_bin + CHARACTER SET latin1 + CHARACTER SET DEFAULT; + */ + return NULL; + } + /* + Now check that "COLLATE cl_exact" does not conflict with + CHARACTER SET DEFAULT. + */ + if (ctx.charset_default(). + raise_if_not_applicable(Lex_exact_collation(m_ci))) + { + /* + A possible scenario: + SET character_set_server=utf8mb4; + CREATE DATABASE db1 + COLLATE latin1_bin + CHARACTER SET DEFAULT; + */ + return NULL; + } + return m_ci; + } + + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + /* + Both CHARACTER SET and COLLATE are contextual: + ALTER DATABASE db1 CHARACTER SET DEFAULT COLLATE DEFAULT; + ALTER DATABASE db1 COLLATE DEFAULT CHARACTER SET DEFAULT; + */ + return Lex_exact_charset_extended_collation_attrs_st:: + resolved_to_character_set(ctx.charset_default(). + collation().charset_info()); + } + DBUG_ASSERT(0); + return NULL; +} diff --git a/sql/lex_charset.h b/sql/lex_charset.h new file mode 100644 index 00000000..2bbeff8a --- /dev/null +++ b/sql/lex_charset.h @@ -0,0 +1,802 @@ +/* Copyright (c) 2021, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LEX_CHARSET_INCLUDED +#define LEX_CHARSET_INCLUDED + + +/* + An extention for Charset_loader_mysys, + with server error and warning support. +*/ +class Charset_loader_server: public Charset_loader_mysys +{ +public: + using Charset_loader_mysys::Charset_loader_mysys; + void raise_unknown_collation_error(const char *name) const; + void raise_not_applicable_error(const char *cs, const char *cl) const; + + /* + Find an exact collation by name. + Raise an error on a faulure. + + @param cs - the character set + @param collation_name - the collation name, e.g. "utf8_bin" + @param my_flags - my flags, e.g. MYF(WME) + @returns - a NULL pointer in case of failure, or + a CHARSET_INFO pointer on success. + */ + + CHARSET_INFO * + get_exact_collation_or_error(const char *name, myf my_flags= MYF(0)) + { + CHARSET_INFO *ci= get_exact_collation(name, my_flags); + if (!ci) + raise_unknown_collation_error(name); + return ci; + } + + /* + Find an exact collation by a character set and a + contextually typed collation name. + Raise an error on in case of a faulure. + + @param cs - the character set + @param context_cl_name - the context name, e.g. "uca1400_cs_ci" + @param my_flags - my flags, e.g. MYF(WME) + @returns - a NULL pointer in case of failure, or + a CHARSET_INFO pointer on success. + */ + CHARSET_INFO * + get_exact_collation_by_context_name_or_error(CHARSET_INFO *cs, + const char *name, + myf my_flags= MYF(0)) + { + CHARSET_INFO *ci= get_exact_collation_by_context_name(cs, name, my_flags); + if (!ci) + raise_not_applicable_error(cs->cs_name.str, name); + return ci; + } + + /* + Find an abstract context collation by name. + Raise an error on a faulure. + The returned pointer needs to be resolved to a character set name. + It should not be passed directly to the character set routines. + + @param cs - the character set + @param context_cl_name - the context name, e.g. "uca1400_cs_ci" + @param my_flags - my flags, e.g. MYF(WME) + @returns - a NULL pointer in case of failure, or + a CHARSET_INFO pointer on success. + */ + + CHARSET_INFO * + get_context_collation_or_error(const char *collation_name, + myf my_flags= MYF(0)) + { + CHARSET_INFO *ci= get_context_collation(collation_name, my_flags); + if (!ci) + raise_unknown_collation_error(collation_name); + return ci; + } + + /* + Find an exact binary collation in the given character set. + Raise an error on a faulure. + + @param cs - the character set + @param my_flags - my flags, e.g. MYF(WME) + @returns - a NULL pointer in case of failure, or + a CHARSET_INFO pointer on success. + */ + + CHARSET_INFO * + get_bin_collation_or_error(CHARSET_INFO *cs, + myf my_flags= MYF(0)) + { + const char *cs_name= cs->cs_name.str; + if (!(cs= get_bin_collation(cs, my_flags))) + { + char tmp[65]; + strxnmov(tmp, sizeof(tmp)-1, cs_name, "_bin", NULL); + raise_unknown_collation_error(tmp); + } + return cs; + } + + /* + Find an exact default collation in the given character set. + This routine does not fail. + Any character set must have a default collation. + + @param cs - the character set + @param my_flags - my flags, e.g. MYF(WME) + @returns - a CHARSET_INFO pointer + */ + + CHARSET_INFO *get_default_collation(CHARSET_INFO *cs, + myf my_flags= MYF(0)) + { + return Charset_loader_mysys::get_default_collation(cs, my_flags); + } +}; + + +///////////////////////////////////////////////////////////////////// + +/* + An exact character set, e.g: + CHARACTER SET latin1 +*/ +class Lex_exact_charset +{ + CHARSET_INFO *m_ci; +public: + explicit Lex_exact_charset(CHARSET_INFO *ci) + :m_ci(ci) + { + DBUG_ASSERT(m_ci); + DBUG_ASSERT(m_ci->state & MY_CS_PRIMARY); + } + CHARSET_INFO *charset_info() const { return m_ci; } + bool raise_if_not_equal(const Lex_exact_charset &rhs) const; + bool raise_if_not_applicable(const class Lex_exact_collation &cl) const; +}; + + +/* + An optional contextually typed character set: + [ CHARACTER SET DEFAULT ] +*/ +class Lex_opt_context_charset_st +{ + /* + Currently we support only DEFAULT as a possible value. + So "bool" is enough. + */ + bool m_had_charset_default; +public: + void init() + { + m_had_charset_default= false; + } + void merge_charset_default() + { + /* + Ok to specify CHARACTER SET DEFAULT multiple times. + No error raised here. + */ + m_had_charset_default= true; + } + bool is_empty() const + { + return !m_had_charset_default; + } + bool is_contextually_typed_charset_default() const + { + return m_had_charset_default; + } +}; + + +/* + A contextually typed collation, e.g.: + COLLATE DEFAULT + CHAR(10) BINARY +*/ +class Lex_context_collation +{ + CHARSET_INFO *m_ci; +public: + explicit Lex_context_collation(CHARSET_INFO *ci) + :m_ci(ci) + { + DBUG_ASSERT(ci); + } + CHARSET_INFO *charset_info() const { return m_ci; } + bool is_contextually_typed_collate_default() const + { + return m_ci == &my_collation_contextually_typed_default; + } + bool is_contextually_typed_binary_style() const + { + return m_ci == &my_collation_contextually_typed_binary; + } + bool raise_if_not_equal(const Lex_context_collation &cl) const; + /* + Skip the character set prefix, return the suffix. + utf8mb4_uca1400_as_ci -> uca1400_as_ci + */ + LEX_CSTRING collation_name_context_suffix() const + { + return m_ci->get_collation_name(MY_COLLATION_NAME_MODE_CONTEXT); + } + LEX_CSTRING collation_name_for_show() const; +}; + + +/* + An exact collation, e.g. + COLLATE latin1_swedish_ci +*/ +class Lex_exact_collation +{ + CHARSET_INFO *m_ci; +public: + explicit Lex_exact_collation(CHARSET_INFO *ci) + :m_ci(ci) + { + DBUG_ASSERT(ci); + } + CHARSET_INFO *charset_info() const { return m_ci; } + // EXACT + EXACT + bool raise_if_not_equal(const Lex_exact_collation &cl) const; + // EXACT + CONTEXT + // CONTEXT + EXACT + bool raise_if_conflicts_with_context_collation(const Lex_context_collation &, + bool reverse_order) const; +}; + + +/* + Parse time COLLATE clause: + COLLATE colation_name + The collation can be either exact or contextual: + COLLATE latin1_bin + COLLATE DEFAULT +*/ +class Lex_extended_collation_st +{ +public: + enum Type + { + TYPE_EXACT, + TYPE_CONTEXTUALLY_TYPED + }; +protected: + CHARSET_INFO *m_ci; + Type m_type; +public: + void init(CHARSET_INFO *ci, Type type) + { + m_ci= ci; + m_type= type; + } + CHARSET_INFO *charset_info() const { return m_ci; } + Type type() const { return m_type; } + LEX_CSTRING collation_name_for_show() const + { + switch (m_type) { + case TYPE_CONTEXTUALLY_TYPED: + return Lex_context_collation(m_ci).collation_name_for_show(); + case TYPE_EXACT: + return m_ci->coll_name; + } + DBUG_ASSERT(0); + return m_ci->coll_name; + } + void set_collate_default() + { + m_ci= &my_collation_contextually_typed_default; + m_type= TYPE_CONTEXTUALLY_TYPED; + } + bool set_by_name(const char *name, myf my_flags); // e.g. MY_UTF8_IS_UTF8MB3 + bool raise_if_conflicts_with_context_collation(const Lex_context_collation &) + const; + bool merge_exact_charset(const Lex_exact_charset &rhs); + bool merge_exact_collation(const Lex_exact_collation &rhs); + bool merge(const Lex_extended_collation_st &rhs); +}; + + +class Lex_extended_collation: public Lex_extended_collation_st +{ +public: + Lex_extended_collation(CHARSET_INFO *ci, Type type) + { + init(ci, type); + } + Lex_extended_collation(const Lex_exact_collation &rhs) + { + init(rhs.charset_info(), TYPE_EXACT); + } + Lex_extended_collation(const Lex_context_collation &rhs) + { + init(rhs.charset_info(), TYPE_CONTEXTUALLY_TYPED); + } +}; + + +/* + CHARACTER SET cs_exact [COLLATE cl_exact_or_context] +*/ +class Lex_exact_charset_opt_extended_collate +{ + CHARSET_INFO *m_ci; + bool m_with_collate; +public: + Lex_exact_charset_opt_extended_collate(CHARSET_INFO *ci, bool with_collate) + :m_ci(ci), m_with_collate(with_collate) + { + DBUG_ASSERT(m_ci); + DBUG_ASSERT((m_ci->state & MY_CS_PRIMARY) || m_with_collate); + } + Lex_exact_charset_opt_extended_collate(const Lex_exact_charset &cs) + :m_ci(cs.charset_info()), m_with_collate(false) + { + DBUG_ASSERT(m_ci); + DBUG_ASSERT(m_ci->state & MY_CS_PRIMARY); + } + Lex_exact_charset_opt_extended_collate(const Lex_exact_collation &cl) + :m_ci(cl.charset_info()), m_with_collate(true) + { + DBUG_ASSERT(m_ci); + } + bool with_collate() const { return m_with_collate; } + CHARSET_INFO *find_bin_collation() const; + CHARSET_INFO *find_default_collation() const; + bool raise_if_charsets_differ(const Lex_exact_charset &cs) const; + bool raise_if_not_applicable(const Lex_exact_collation &cl) const; + /* + Add another COLLATE clause (exact or context). + So the full syntax looks like: + CHARACTER SET cs [COLLATE cl] ... COLLATE cl2 + */ + bool merge_collation(const Lex_extended_collation_st &cl) + { + switch (cl.type()) { + case Lex_extended_collation_st::TYPE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return merge_context_collation(Lex_context_collation(cl.charset_info())); + } + DBUG_ASSERT(0); + return false; + } + bool merge_collation_override(const Lex_extended_collation_st &cl) + { + switch (cl.type()) { + case Lex_extended_collation_st::TYPE_EXACT: + return merge_exact_collation_override( + Lex_exact_collation(cl.charset_info())); + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return merge_context_collation_override( + Lex_context_collation(cl.charset_info())); + } + DBUG_ASSERT(0); + return false; + } + /* + Add a context collation: + CHARACTER SET cs [COLLATE cl] ... COLLATE DEFAULT + */ + bool merge_context_collation(const Lex_context_collation &cl); + bool merge_context_collation_override(const Lex_context_collation &cl); + /* + Add an exact collation: + CHARACTER SET cs [COLLATE cl] ... COLLATE latin1_bin + */ + bool merge_exact_collation(const Lex_exact_collation &cl); + bool merge_exact_collation_override(const Lex_exact_collation &cl); + Lex_exact_collation collation() const + { + return Lex_exact_collation(m_ci); + } + Lex_exact_charset charset() const + { + if ((m_ci->state & MY_CS_PRIMARY)) + return Lex_exact_charset(m_ci); + return Lex_exact_charset(find_default_collation()); + } +}; + + +/* + Parse time character set and collation for: + [CHARACTER SET cs_exact] [COLLATE cl_exact_or_context] + + Can be: + + 1. Empty (not specified on the column level): + CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin2; -- (1a) + CREATE TABLE t1 (a CHAR(10)); -- (1b) + + 2. Precisely typed: + CREATE TABLE t1 (a CHAR(10) COLLATE latin1_bin); -- (2a) + CREATE TABLE t1 ( + a CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin); -- (2b) + + 3. Contextually typed: + CREATE TABLE t2 (a CHAR(10) BINARY) CHARACTER SET latin2; -- (3a) + CREATE TABLE t2 (a CHAR(10) BINARY); -- (3b) + CREATE TABLE t2 (a CHAR(10) COLLATE DEFAULT) + CHARACER SET latin2 COLLATE latin2_bin; -- (3c) + + In case of an empty or a contextually typed collation, + it is a subject to later resolution, when the context + character set becomes known in the end of the CREATE statement: + - either after the explicit table level CHARACTER SET, like in (1a,3a,3c) + - or by the inhereted database level CHARACTER SET, like in (1b,3b) + + Resolution happens in Type_handler::Column_definition_prepare_stage1(). +*/ +struct Lex_exact_charset_extended_collation_attrs_st +{ +public: + enum Type + { + TYPE_EMPTY= 0, + TYPE_CHARACTER_SET= 1, + TYPE_COLLATE_EXACT= 2, + TYPE_CHARACTER_SET_COLLATE_EXACT= 3, + TYPE_COLLATE_CONTEXTUALLY_TYPED= 4 + }; + +// Number of bits required to store enum Type values + +#define LEX_CHARSET_COLLATION_TYPE_BITS 3 +#define LEX_CHARSET_COLLATION_TYPE_MASK ((1<= + TYPE_COLLATE_CONTEXTUALLY_TYPED, + "Lex_exact_charset_extended_collation_attrs_st::Type bits"); + +protected: + CHARSET_INFO *m_ci; + Type m_type; +protected: + static Type type_from_lex_collation_type(Lex_extended_collation_st::Type type) + { + switch (type) { + case Lex_extended_collation_st::TYPE_EXACT: + return TYPE_COLLATE_EXACT; + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + DBUG_ASSERT(0); + return TYPE_COLLATE_EXACT; + } +public: + void init() + { + m_ci= NULL; + m_type= TYPE_EMPTY; + } + void init(CHARSET_INFO *cs, Type type) + { + DBUG_ASSERT(cs || type == TYPE_EMPTY); + m_ci= cs; + m_type= type; + } + void init(const Lex_exact_charset &cs) + { + m_ci= cs.charset_info(); + m_type= TYPE_CHARACTER_SET; + } + void init(const Lex_exact_collation &cs) + { + m_ci= cs.charset_info(); + m_type= TYPE_COLLATE_EXACT; + } + void init(const Lex_exact_charset_opt_extended_collate &cscl) + { + if (cscl.with_collate()) + init(cscl.collation().charset_info(), TYPE_CHARACTER_SET_COLLATE_EXACT); + else + init(cscl.charset()); + } + bool is_empty() const + { + return m_type == TYPE_EMPTY; + } + void set_charset(const Lex_exact_charset &cs) + { + m_ci= cs.charset_info(); + m_type= TYPE_CHARACTER_SET; + } + bool set_charset_collate_default(const Lex_exact_charset &cs) + { + CHARSET_INFO *ci; + if (!(ci= Lex_exact_charset_opt_extended_collate(cs). + find_default_collation())) + return true; + m_ci= ci; + m_type= TYPE_CHARACTER_SET_COLLATE_EXACT; + return false; + } + bool set_charset_collate_binary(const Lex_exact_charset &cs) + { + CHARSET_INFO *ci; + if (!(ci= Lex_exact_charset_opt_extended_collate(cs).find_bin_collation())) + return true; + m_ci= ci; + m_type= TYPE_CHARACTER_SET_COLLATE_EXACT; + return false; + } + void set_collate_default() + { + m_ci= &my_collation_contextually_typed_default; + m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + void set_contextually_typed_binary_style() + { + m_ci= &my_collation_contextually_typed_binary; + m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + bool is_contextually_typed_collate_default() const + { + return Lex_context_collation(m_ci).is_contextually_typed_collate_default(); + } + CHARSET_INFO *charset_info() const + { + return m_ci; + } + Type type() const + { + return m_type; + } + bool is_contextually_typed_collation() const + { + return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const; + /* + Merge the column CHARACTER SET clause to: + - an exact collation name + - a contextually typed collation + "this" corresponds to `CHARACTER SET xxx [BINARY]` + "cl" corresponds to the COLLATE clause + */ + bool merge_column_charset_clause_and_collate_clause( + const Lex_exact_charset_extended_collation_attrs_st &cl) + { + switch (cl.type()) { + case TYPE_EMPTY: + return false; + case TYPE_COLLATE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + return merge_context_collation(Lex_context_collation(cl.charset_info())); + case TYPE_CHARACTER_SET: + case TYPE_CHARACTER_SET_COLLATE_EXACT: + break; + } + DBUG_ASSERT(0); + return false; + } + /* + This method is used in the "attribute_list" rule to merge two independent + COLLATE clauses (not belonging to a CHARACTER SET clause). + "BINARY" and "COLLATE DEFAULT" are not possible + in an independent COLLATE clause in a column attribute. + */ + bool merge_column_collate_clause_and_collate_clause( + const Lex_exact_charset_extended_collation_attrs_st &cl) + { + DBUG_ASSERT(m_type != TYPE_CHARACTER_SET); + switch (cl.type()) { + case TYPE_EMPTY: + return false; + case TYPE_COLLATE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + return merge_context_collation(Lex_context_collation(cl.charset_info())); + case TYPE_CHARACTER_SET: + case TYPE_CHARACTER_SET_COLLATE_EXACT: + break; + } + DBUG_ASSERT(0); + return false; + } + bool merge_exact_charset(const Lex_exact_charset &cs); + bool merge_exact_collation(const Lex_exact_collation &cl); + bool merge_context_collation(const Lex_context_collation &cl); + bool merge_collation(const Lex_extended_collation_st &cl); +}; + + +class Charset_collation_context +{ + /* + Although the goal of m_charset_default is to store the meaning + of CHARACTER SET DEFAULT, it does not necessarily point to a + default collation of CHARACTER SET DEFAULT. It can point to its any + arbitrary collation. + For performance purposes we don't need to find the default + collation at the instantiation time of "this", because: + - m_charset_default may not be even needed during the resolution + - when it's needed, in many cases it's passed to my_charset_same(), + which does not need the default collation again. + + Note, m_charset_default and m_collate_default are not necessarily equal. + + - The default value for CHARACTER SET is taken from the upper level: + CREATE DATABASE db1 CHARACTER SET DEFAULT; <-- @@character_set_server + ALTER DATABASE db1 CHARACTER SET DEFAULT; <-- @@character_set_server + + - The default value for COLLATE is taken from the upper level for CREATE: + CREATE DATABASE db1 COLLATE DEFAULT; <-- @@collation_server + CREATE TABLE db1.t1 COLLATE DEFAULT; <-- character set of "db1" + + - The default value for COLLATE is taken from the same level for ALTER: + ALTER DATABASE db1 COLLATE DEFAULT; <-- the default collation of the + current db1 character set + ALTER TABLE db1.t1 COLLATE DEFAULT; <-- the default collation of the + current db1.t1 character set + */ + + // comes from the upper level + Lex_exact_charset_opt_extended_collate m_charset_default; + + // comes from the upper or the current level + Lex_exact_collation m_collate_default; +public: + Charset_collation_context(CHARSET_INFO *charset_default, + CHARSET_INFO *collate_default) + :m_charset_default(charset_default, + !(charset_default->state & MY_CS_PRIMARY)), + m_collate_default(collate_default) + { } + const Lex_exact_charset_opt_extended_collate charset_default() const + { + return m_charset_default; + } + const Lex_exact_collation collate_default() const + { + return m_collate_default; + } +}; + + +/* + A universal container. It can store at the same time: + - CHARACTER SET DEFAULT + - CHARACTER SET cs_exact + - COLLATE {cl_exact|cl_context} + All three parts can co-exist. + All three parts are optional. + Parts can come in any arbitrary order, e.g: + + CHARACTER SET DEFAULT [CHARACTER SET latin1] COLLATE latin1_bin + CHARACTER SET latin1 CHARACTER SET DEFAULT COLLATE latin1_bin + COLLATE latin1_bin [CHARACTER SET latin1] CHARACTER SET DEFAULT + COLLATE latin1_bin CHARACTER SET DEFAULT [CHARACTER SET latin1] +*/ +class Lex_extended_charset_extended_collation_attrs_st: + public Lex_opt_context_charset_st, + public Lex_exact_charset_extended_collation_attrs_st +{ + enum charset_type_t + { + CHARSET_TYPE_EMPTY, + CHARSET_TYPE_CONTEXT, + CHARSET_TYPE_EXACT + }; + /* + Which part came first: + - CHARACTER SET DEFAULT or + - CHARACTER SET cs_exact + e.g. to produce error messages preserving the user typed + order of CHARACTER SET clauses in case of conflicts. + */ + charset_type_t m_charset_order; +public: + void init() + { + Lex_opt_context_charset_st::init(); + Lex_exact_charset_extended_collation_attrs_st::init(); + m_charset_order= CHARSET_TYPE_EMPTY; + } + void init(const Lex_exact_charset_opt_extended_collate &c) + { + Lex_opt_context_charset_st::init(); + Lex_exact_charset_extended_collation_attrs_st::init(c); + m_charset_order= CHARSET_TYPE_EXACT; + } + bool is_empty() const + { + return Lex_opt_context_charset_st::is_empty() && + Lex_exact_charset_extended_collation_attrs_st::is_empty(); + } + bool raise_if_charset_conflicts_with_default( + const Lex_exact_charset_opt_extended_collate &def) const; + CHARSET_INFO *resolved_to_context(const Charset_collation_context &ctx) const; + bool merge_charset_default(); + bool merge_exact_charset(const Lex_exact_charset &cs); +}; + + +class Lex_exact_charset_extended_collation_attrs: + public Lex_exact_charset_extended_collation_attrs_st +{ +public: + Lex_exact_charset_extended_collation_attrs() + { + init(); + } + Lex_exact_charset_extended_collation_attrs(CHARSET_INFO *collation, Type type) + { + init(collation, type); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_exact_charset &cs) + { + init(cs.charset_info(), TYPE_CHARACTER_SET); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_exact_collation &cl) + { + init(cl.charset_info(), TYPE_COLLATE_EXACT); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_context_collation &cl) + { + init(cl.charset_info(), TYPE_COLLATE_CONTEXTUALLY_TYPED); + } + explicit + Lex_exact_charset_extended_collation_attrs( + const Lex_exact_charset_opt_extended_collate &cscl) + { + init(cscl); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_extended_collation_st &cl) + { + init(cl.charset_info(), type_from_lex_collation_type(cl.type())); + } + static Lex_exact_charset_extended_collation_attrs national(bool bin_mod) + { + return bin_mod ? + Lex_exact_charset_extended_collation_attrs(&my_charset_utf8mb3_bin, + TYPE_COLLATE_EXACT) : + Lex_exact_charset_extended_collation_attrs(&my_charset_utf8mb3_general_ci, + TYPE_CHARACTER_SET); + } +}; + + +class Lex_extended_charset_extended_collation_attrs: + public Lex_extended_charset_extended_collation_attrs_st +{ +public: + Lex_extended_charset_extended_collation_attrs() + { + init(); + } + explicit Lex_extended_charset_extended_collation_attrs( + const Lex_exact_charset_opt_extended_collate &c) + { + init(c); + } +}; + + + +using Lex_column_charset_collation_attrs_st = + Lex_exact_charset_extended_collation_attrs_st; + +using Lex_column_charset_collation_attrs = + Lex_exact_charset_extended_collation_attrs; + + +using Lex_table_charset_collation_attrs_st = + Lex_extended_charset_extended_collation_attrs_st; + +using Lex_table_charset_collation_attrs = + Lex_extended_charset_extended_collation_attrs; + + +#endif // LEX_CHARSET_INCLUDED diff --git a/sql/lex_string.h b/sql/lex_string.h new file mode 100644 index 00000000..56f37706 --- /dev/null +++ b/sql/lex_string.h @@ -0,0 +1,165 @@ +/* + Copyright (c) 2018, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#ifndef LEX_STRING_INCLUDED +#define LEX_STRING_INCLUDED + + +typedef struct st_mysql_const_lex_string LEX_CSTRING; + + +class Lex_cstring : public LEX_CSTRING +{ + public: + Lex_cstring() + { + str= NULL; + length= 0; + } + Lex_cstring(const LEX_CSTRING &str) + { + LEX_CSTRING::operator=(str); + } + Lex_cstring(const char *_str, size_t _len) + { + str= _str; + length= _len; + } + Lex_cstring(const char *start, const char *end) + { + DBUG_ASSERT(start <= end); + str= start; + length= end - start; + } + void set(const char *_str, size_t _len) + { + str= _str; + length= _len; + } + + /* + Trim left white spaces. + Assumes that there are no multi-bytes characters + that can be considered white-space. + */ + Lex_cstring ltrim_whitespace(CHARSET_INFO *cs) const + { + DBUG_ASSERT(cs->mbminlen == 1); + Lex_cstring str= *this; + while (str.length > 0 && my_isspace(cs, str.str[0])) + { + str.length--; + str.str++; + } + return str; + } + + /* + Trim right white spaces. + Assumes that there are no multi-bytes characters + that can be considered white-space. + Also, assumes that the character set supports backward space parsing. + */ + Lex_cstring rtrim_whitespace(CHARSET_INFO *cs) const + { + DBUG_ASSERT(cs->mbminlen == 1); + Lex_cstring str= *this; + while (str.length > 0 && my_isspace(cs, str.str[str.length - 1])) + { + str.length --; + } + return str; + } + + /* + Trim all spaces. + */ + Lex_cstring trim_whitespace(CHARSET_INFO *cs) const + { + return ltrim_whitespace(cs).rtrim_whitespace(cs); + } + + /* + Trim all spaces and return the length of the leading space sequence. + */ + Lex_cstring trim_whitespace(CHARSET_INFO *cs, size_t *prefix_length) const + { + Lex_cstring tmp= Lex_cstring(*this).ltrim_whitespace(cs); + if (prefix_length) + *prefix_length= tmp.str - str; + return tmp.rtrim_whitespace(cs); + } + +}; + + +class Lex_cstring_strlen: public Lex_cstring +{ +public: + Lex_cstring_strlen(const char *from) + :Lex_cstring(from, from ? strlen(from) : 0) + { } +}; + + +/* Functions to compare if two lex strings are equal */ + +static inline bool lex_string_cmp(CHARSET_INFO *charset, const LEX_CSTRING *a, + const LEX_CSTRING *b) +{ + return my_strcasecmp(charset, a->str, b->str); +} + +/* + Compare to LEX_CSTRING's and return 0 if equal +*/ + +static inline bool cmp(const LEX_CSTRING *a, const LEX_CSTRING *b) +{ + return a->length != b->length || + (a->length && memcmp(a->str, b->str, a->length)); +} +static inline bool cmp(const LEX_CSTRING a, const LEX_CSTRING b) +{ + return a.length != b.length || (a.length && memcmp(a.str, b.str, a.length)); +} + +/* + Compare if two LEX_CSTRING are equal. Assumption is that + character set is ASCII (like for plugin names) +*/ + +static inline bool lex_string_eq(const LEX_CSTRING *a, const LEX_CSTRING *b) +{ + if (a->length != b->length) + return 0; /* Different */ + return strcasecmp(a->str, b->str) == 0; +} + +/* + To be used when calling lex_string_eq with STRING_WITH_LEN() as second + argument +*/ + +static inline bool lex_string_eq(const LEX_CSTRING *a, const char *b, size_t b_length) +{ + if (a->length != b_length) + return 0; /* Different */ + return strcasecmp(a->str, b) == 0; +} + +#endif /* LEX_STRING_INCLUDED */ diff --git a/sql/lex_symbol.h b/sql/lex_symbol.h new file mode 100644 index 00000000..e7819cd4 --- /dev/null +++ b/sql/lex_symbol.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2000, 2001, 2004, 2006, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* This struct includes all reserved words and functions */ + +#ifndef _lex_symbol_h +#define _lex_symbol_h + +struct st_sym_group; + +typedef struct st_symbol { + const char *name; + uint tok; + uint length; + struct st_sym_group *group; +} SYMBOL; + +typedef struct st_lex_symbol +{ + SYMBOL *symbol; + char *str; + uint length; +} LEX_SYMBOL; + +typedef struct st_sym_group { + const char *name; + const char *needed_define; +} SYM_GROUP; + +extern SYM_GROUP sym_group_common; +extern SYM_GROUP sym_group_geom; +extern SYM_GROUP sym_group_rtree; + +#endif /* _lex_symbol_h */ diff --git a/sql/lock.cc b/sql/lock.cc new file mode 100644 index 00000000..ef8c2ba3 --- /dev/null +++ b/sql/lock.cc @@ -0,0 +1,1259 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2020, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + Locking functions for mysql. + + Because of the new concurrent inserts, we must first get external locks + before getting internal locks. If we do it in the other order, the status + information is not up to date when called from the lock handler. + + GENERAL DESCRIPTION OF LOCKING + + When not using LOCK TABLES: + + - For each SQL statement mysql_lock_tables() is called for all involved + tables. + - mysql_lock_tables() will call + table_handler->external_lock(thd,locktype) for each table. + This is followed by a call to thr_multi_lock() for all tables. + + - When statement is done, we call mysql_unlock_tables(). + table_handler->external_lock(thd, F_UNLCK) followed by + thr_multi_unlock() for each table. + + - Note that mysql_unlock_tables() may be called several times as + MySQL in some cases can free some tables earlier than others. + + - The above is true both for normal and temporary tables. + + - Temporary non transactional tables are never passed to thr_multi_lock() + and we never call external_lock(thd, F_UNLOCK) on these. + + When using LOCK TABLES: + + - LOCK TABLE will call mysql_lock_tables() for all tables. + mysql_lock_tables() will call + table_handler->external_lock(thd,locktype) for each table. + This is followed by a call to thr_multi_lock() for all tables. + + - For each statement, we will call table_handler->start_stmt(THD) + to inform the table handler that we are using the table. + + The tables used can only be tables used in LOCK TABLES or a + temporary table. + + - When statement is done, we will call ha_commit_stmt(thd); + + - When calling UNLOCK TABLES we call mysql_unlock_tables() for all + tables used in LOCK TABLES + + If table_handler->external_lock(thd, locktype) fails, we call + table_handler->external_lock(thd, F_UNLCK) for each table that was locked, + excluding one that caused failure. That means handler must cleanup itself + in case external_lock() fails. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "debug_sync.h" +#include "lock.h" +#include "sql_base.h" // close_tables_for_reopen +#include "sql_parse.h" // is_log_table_write_query +#include "sql_handler.h" +#include +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_server_state.h" +#endif + +/** + @defgroup Locking Locking + @{ +*/ + +extern HASH open_cache; + +static int lock_external(THD *thd, TABLE **table,uint count); +static int unlock_external(THD *thd, TABLE **table,uint count); + + +/* Map the return value of thr_lock to an error from errmsg.txt */ +static int thr_lock_errno_to_mysql[]= +{ 0, ER_LOCK_ABORTED, ER_LOCK_WAIT_TIMEOUT, ER_LOCK_DEADLOCK }; + +/** + Perform semantic checks for mysql_lock_tables. + @param thd The current thread + @param tables The tables to lock + @param count The number of tables to lock + @param flags Lock flags + @return 0 if all the check passed, non zero if a check failed. +*/ + +static int +lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags) +{ + uint system_count, i; + bool ignore_read_only, log_table_write_query; + + DBUG_ENTER("lock_tables_check"); + + system_count= 0; + ignore_read_only= + (thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY) != NO_ACL; + log_table_write_query= (is_log_table_write_query(thd->lex->sql_command) + || ((flags & MYSQL_LOCK_LOG_TABLE) != 0)); + + for (i=0 ; is->table_category != TABLE_UNKNOWN_CATEGORY); + + /* + Table I/O to performance schema tables is performed + only internally by the server implementation. + When a user is requesting a lock, the following + constraints are enforced: + */ + if (t->s->require_write_privileges() && + ! log_table_write_query) + { + /* + A user should not be able to prevent writes, + or hold any type of lock in a session, + since this would be a DOS attack. + */ + if ((t->reginfo.lock_type >= TL_FIRST_WRITE) + || (thd->lex->sql_command == SQLCOM_LOCK_TABLES)) + { + my_error(ER_CANT_LOCK_LOG_TABLE, MYF(0)); + DBUG_RETURN(1); + } + } + + if (t->reginfo.lock_type >= TL_FIRST_WRITE) + { + if (t->s->table_category == TABLE_CATEGORY_SYSTEM) + system_count++; + + if (t->db_stat & HA_READ_ONLY) + { + my_error(ER_OPEN_AS_READONLY, MYF(0), t->alias.c_ptr_safe()); + DBUG_RETURN(1); + } + } + + /* + If we are going to lock a non-temporary table we must own metadata + lock of appropriate type on it (I.e. for table to be locked for + write we must own metadata lock of MDL_SHARED_WRITE or stronger + type. For table to be locked for read we must own metadata lock + of MDL_SHARED_READ or stronger type). + */ + DBUG_ASSERT(t->s->tmp_table || + thd->mdl_context.is_lock_owner(MDL_key::TABLE, + t->s->db.str, t->s->table_name.str, + t->reginfo.lock_type >= TL_FIRST_WRITE ? + MDL_SHARED_WRITE : MDL_SHARED_READ)); + + /* + Prevent modifications to base tables if READ_ONLY is activated. + In any case, read only does not apply to temporary tables. + */ + if (!(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY) && !t->s->tmp_table) + { + if (t->reginfo.lock_type >= TL_FIRST_WRITE && + !ignore_read_only && opt_readonly && !thd->slave_thread) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + DBUG_RETURN(1); + } + } + } + + /* + Locking of system tables is restricted: + locking a mix of system and non-system tables in the same lock + is prohibited, to prevent contention. + */ + if ((system_count > 0) && (system_count < count)) + { + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0)); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + +/** + Reset lock type in lock data + + @param mysql_lock Lock structures to reset. + @param unlock If set, then set lock type to TL_UNLOCK, + otherwise set to original lock type from + get_store_lock(). + + @note After a locking error we want to quit the locking of the table(s). + The test case in the bug report for Bug #18544 has the following + cases: 1. Locking error in lock_external() due to InnoDB timeout. + 2. Locking error in get_lock_data() due to missing write permission. + 3. Locking error in wait_if_global_read_lock() due to lock conflict. + + @note In all these cases we have already set the lock type into the lock + data of the open table(s). If the table(s) are in the open table + cache, they could be reused with the non-zero lock type set. This + could lead to ignoring a different lock type with the next lock. + + @note Clear the lock type of all lock data. This ensures that the next + lock request will set its lock type properly. +*/ + + +void reset_lock_data(MYSQL_LOCK *sql_lock, bool unlock) +{ + THR_LOCK_DATA **ldata, **ldata_end; + DBUG_ENTER("reset_lock_data"); + + /* Clear the lock type of all lock data to avoid reusage. */ + for (ldata= sql_lock->locks, ldata_end= ldata + sql_lock->lock_count; + ldata < ldata_end; + ldata++) + (*ldata)->type= unlock ? TL_UNLOCK : (*ldata)->org_type; + DBUG_VOID_RETURN; +} + + +/** + Scan array of tables for access types; update transaction tracker + accordingly. + + @param thd The current thread. + @param tables An array of pointers to the tables to lock. + @param count The number of tables to lock. +*/ + +#ifndef EMBEDDED_LIBRARY +static void track_table_access(THD *thd, TABLE **tables, size_t count) +{ + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + { + while (count--) + { + if (TABLE *t= tables[count]) + thd->session_tracker.transaction_info.add_trx_state(thd, + t->reginfo.lock_type, t->file->has_transaction_manager()); + } + } +} +#else +#define track_table_access(A,B,C) +#endif //EMBEDDED_LIBRARY + + + +/** + Lock tables. + + @param thd The current thread. + @param tables An array of pointers to the tables to lock. + @param count The number of tables to lock. + @param flags Options: + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY Ignore SET GLOBAL READ_ONLY + MYSQL_LOCK_IGNORE_TIMEOUT Use maximum timeout value. + + @retval A lock structure pointer on success. + @retval NULL if an error or if wait on a lock was killed. +*/ + +MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags) +{ + MYSQL_LOCK *sql_lock; + uint gld_flags= GET_LOCK_STORE_LOCKS; + DBUG_ENTER("mysql_lock_tables(tables)"); + + if (lock_tables_check(thd, tables, count, flags)) + DBUG_RETURN(NULL); + + if (!(thd->variables.option_bits & OPTION_TABLE_LOCK) && + !(flags & MYSQL_LOCK_USE_MALLOC)) + gld_flags|= GET_LOCK_ON_THD; + + if (! (sql_lock= get_lock_data(thd, tables, count, gld_flags))) + DBUG_RETURN(NULL); + + if (mysql_lock_tables(thd, sql_lock, flags)) + { + /* Clear the lock type of all lock data to avoid reusage. */ + reset_lock_data(sql_lock, 1); + if (!(gld_flags & GET_LOCK_ON_THD)) + my_free(sql_lock); + sql_lock= 0; + } + + track_table_access(thd, tables, count); + + DBUG_RETURN(sql_lock); +} + +/** + Lock tables based on a MYSQL_LOCK structure. + + mysql_lock_tables() + + @param thd The current thread. + @param sql_lock Tables that should be locked + @param flags See mysql_lock_tables() above + + @return 0 ok + @return 1 error +*/ + +bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) +{ + int rc= 1; + ulong timeout= (flags & MYSQL_LOCK_IGNORE_TIMEOUT) ? + LONG_TIMEOUT : thd->variables.lock_wait_timeout; + PSI_stage_info org_stage; + DBUG_ENTER("mysql_lock_tables(sql_lock)"); + + thd->backup_stage(&org_stage); + THD_STAGE_INFO(thd, stage_system_lock); + if (sql_lock->table_count && lock_external(thd, sql_lock->table, + sql_lock->table_count)) + goto end; + + THD_STAGE_INFO(thd, stage_table_lock); + + /* Copy the lock data array. thr_multi_lock() reorders its contents. */ + memmove(sql_lock->locks + sql_lock->lock_count, sql_lock->locks, + sql_lock->lock_count * sizeof(*sql_lock->locks)); + + /* Lock on the copied half of the lock data array. */ + rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks + + sql_lock->lock_count, + sql_lock->lock_count, + &thd->lock_info, timeout)]; + if (rc && sql_lock->table_count) + (void) unlock_external(thd, sql_lock->table, sql_lock->table_count); + +end: + THD_STAGE_INFO(thd, org_stage); + + if (thd->killed && !thd->get_stmt_da()->is_ok()) + { + thd->send_kill_message(); + if (!rc) + { + mysql_unlock_tables(thd, sql_lock, 0); + THD_STAGE_INFO(thd, stage_after_table_lock); + } + rc= 1; + } + else if (rc > 1) + my_error(rc, MYF(0)); + + thd->set_time_after_lock(); + DBUG_RETURN(rc); +} + + +static int lock_external(THD *thd, TABLE **tables, uint count) +{ + uint i; + int lock_type,error; + DBUG_ENTER("lock_external"); + + DBUG_PRINT("info", ("count %d", count)); + for (i=1 ; i <= count ; i++, tables++) + { + DBUG_ASSERT((*tables)->reginfo.lock_type >= TL_READ); + lock_type=F_WRLCK; /* Lock exclusive */ + if ((*tables)->db_stat & HA_READ_ONLY || + ((*tables)->reginfo.lock_type >= TL_READ && + (*tables)->reginfo.lock_type < TL_FIRST_WRITE)) + lock_type=F_RDLCK; + + if (unlikely((error=(*tables)->file->ha_external_lock(thd,lock_type)))) + { + (*tables)->file->print_error(error, MYF(0)); + while (--i) + { + tables--; + (*tables)->file->ha_external_unlock(thd); + (*tables)->current_lock=F_UNLCK; + } + DBUG_RETURN(error); + } + else + { + (*tables)->current_lock= lock_type; + } + } + DBUG_RETURN(0); +} + + +int mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock) +{ + return mysql_unlock_tables(thd, sql_lock, + (thd->variables.option_bits & OPTION_TABLE_LOCK) || + !(sql_lock->flags & GET_LOCK_ON_THD)); +} + + +int mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock) +{ + bool errors= thd->is_error(); + int error= 0; + PSI_stage_info org_stage; + DBUG_ENTER("mysql_unlock_tables"); + + thd->backup_stage(&org_stage); + THD_STAGE_INFO(thd, stage_unlocking_tables); + + if (sql_lock->table_count) + error= unlock_external(thd, sql_lock->table, sql_lock->table_count); + if (sql_lock->lock_count) + thr_multi_unlock(sql_lock->locks, sql_lock->lock_count, 0); + if (free_lock) + { + DBUG_ASSERT(!(sql_lock->flags & GET_LOCK_ON_THD)); + my_free(sql_lock); + } + if (likely(!errors && !error)) + thd->clear_error(); + THD_STAGE_INFO(thd, org_stage); + if (error) + DBUG_PRINT("exit", ("error: %d", error)); + DBUG_RETURN(error); +} + +/** + Unlock some of the tables locked by mysql_lock_tables. + + This will work even if get_lock_data fails (next unlock will free all) +*/ + +int mysql_unlock_some_tables(THD *thd, TABLE **table,uint count, uint flag) +{ + int error; + MYSQL_LOCK *sql_lock; + if (!(sql_lock= get_lock_data(thd, table, count, + GET_LOCK_UNLOCK | GET_LOCK_ON_THD | flag))) + error= ER_OUTOFMEMORY; + else + error= mysql_unlock_tables(thd, sql_lock, 0); + return error; +} + + +/** + unlock all tables locked for read. +*/ + +int mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock) +{ + uint i,found; + int error= 0; + DBUG_ENTER("mysql_unlock_read_tables"); + + /* Call external lock for all tables to be unlocked */ + + /* Move all write locked tables first */ + TABLE **table=sql_lock->table; + for (i=found=0 ; i < sql_lock->table_count ; i++) + { + DBUG_ASSERT(sql_lock->table[i]->lock_position == i); + if ((uint) sql_lock->table[i]->reginfo.lock_type >= TL_FIRST_WRITE) + { + swap_variables(TABLE *, *table, sql_lock->table[i]); + table++; + found++; + } + } + /* Unlock all read locked tables */ + if (i != found) + { + error= unlock_external(thd,table,i-found); + sql_lock->table_count=found; + } + + /* Call thr_unlock() for all tables to be unlocked */ + + /* Move all write locks first */ + THR_LOCK_DATA **lock=sql_lock->locks; + for (i=found=0 ; i < sql_lock->lock_count ; i++) + { + if (sql_lock->locks[i]->type >= TL_FIRST_WRITE) + { + swap_variables(THR_LOCK_DATA *, *lock, sql_lock->locks[i]); + lock++; + found++; + } + } + /* unlock the read locked tables */ + if (i != found) + { + thr_multi_unlock(lock, i-found, 0); + sql_lock->lock_count= found; + } + + /* Fix the lock positions in TABLE */ + table= sql_lock->table; + found= 0; + for (i= 0; i < sql_lock->table_count; i++) + { + TABLE *tbl= *table; + tbl->lock_position= (uint) (table - sql_lock->table); + tbl->lock_data_start= found; + found+= tbl->lock_count; + table++; + } + DBUG_RETURN(error); +} + + +/** + Try to find the table in the list of locked tables. + In case of success, unlock the table and remove it from this list. + If a table has more than one lock instance, removes them all. + + @param thd thread context + @param locked list of locked tables + @param table the table to unlock +*/ + +int mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table) +{ + int error= 0; + if (locked) + { + uint i; + for (i=0; i < locked->table_count; i++) + { + if (locked->table[i] == table) + { + uint j, removed_locks, old_tables; + int tmp_error; + TABLE *tbl; + uint lock_data_end; + + DBUG_ASSERT(table->lock_position == i); + + /* Unlock the table. */ + if ((tmp_error= mysql_unlock_some_tables(thd, &table, + /* table count */ 1, 0))) + { + table->file->print_error(tmp_error, MYF(0)); + if (!error) + error= tmp_error; + } + + /* Decrement table_count in advance, making below expressions easier */ + old_tables= --locked->table_count; + + /* The table has 'removed_locks' lock data elements in locked->locks */ + removed_locks= table->lock_count; + + /* Move down all table pointers above 'i'. */ + bmove((char*) (locked->table+i), + (char*) (locked->table+i+1), + (old_tables - i) * sizeof(TABLE*)); + + lock_data_end= table->lock_data_start + table->lock_count; + /* Move down all lock data pointers above 'table->lock_data_end-1' */ + bmove((char*) (locked->locks + table->lock_data_start), + (char*) (locked->locks + lock_data_end), + (locked->lock_count - lock_data_end) * + sizeof(THR_LOCK_DATA*)); + + /* + Fix moved table elements. + lock_position is the index in the 'locked->table' array, + it must be fixed by one. + table->lock_data_start is pointer to the lock data for this table + in the 'locked->locks' array, they must be fixed by 'removed_locks', + the lock data count of the removed table. + */ + for (j= i ; j < old_tables; j++) + { + tbl= locked->table[j]; + tbl->lock_position--; + DBUG_ASSERT(tbl->lock_position == j); + tbl->lock_data_start-= removed_locks; + } + + /* Finally adjust lock_count. */ + locked->lock_count-= removed_locks; + break; + } + } + } + return error; +} + + +/** + Abort one thread / table combination. + + @param thd Thread handler + @param table Table that should be removed from lock queue + + @retval + 0 Table was not locked by another thread + @retval + 1 Table was locked by at least one other thread +*/ + +bool mysql_lock_abort_for_thread(THD *thd, TABLE *table) +{ + MYSQL_LOCK *locked; + bool result= FALSE; + DBUG_ENTER("mysql_lock_abort_for_thread"); + + if ((locked= get_lock_data(thd, &table, 1, GET_LOCK_UNLOCK | GET_LOCK_ON_THD))) + { + for (uint i=0; i < locked->lock_count; i++) + { + if (thr_abort_locks_for_thread(locked->locks[i]->lock, + table->in_use->thread_id)) + result= TRUE; + } + } + DBUG_RETURN(result); +} + + +/** + Merge two thr_lock:s + mysql_lock_merge() + + @param a Original locks + @param b New locks + + @retval New lock structure that contains a and b + + @note + a and b are freed with my_free() +*/ + +MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a, MYSQL_LOCK *b, THD *thd) +{ + MYSQL_LOCK *sql_lock; + TABLE **table, **end_table; + DBUG_ENTER("mysql_lock_merge"); + DBUG_PRINT("enter", ("a->lock_count: %u b->lock_count: %u", + a->lock_count, b->lock_count)); + + const size_t lock_size= sizeof(*sql_lock) + + sizeof(THR_LOCK_DATA *) * ((a->lock_count + b->lock_count) * 2) + + sizeof(TABLE *) * (a->table_count + b->table_count); + if (thd) + { + sql_lock= (MYSQL_LOCK *) thd->alloc(lock_size); + if (!sql_lock) + DBUG_RETURN(0); + sql_lock->flags= GET_LOCK_ON_THD; + } + else + { + sql_lock= (MYSQL_LOCK *) + my_malloc(key_memory_MYSQL_LOCK, lock_size, MYF(MY_WME)); + if (!sql_lock) + DBUG_RETURN(0); + sql_lock->flags= 0; + } + sql_lock->lock_count=a->lock_count+b->lock_count; + sql_lock->table_count=a->table_count+b->table_count; + sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1); + sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count*2); + memcpy(sql_lock->locks,a->locks,a->lock_count*sizeof(*a->locks)); + memcpy(sql_lock->locks+a->lock_count,b->locks, + b->lock_count*sizeof(*b->locks)); + memcpy(sql_lock->table,a->table,a->table_count*sizeof(*a->table)); + memcpy(sql_lock->table+a->table_count,b->table, + b->table_count*sizeof(*b->table)); + + /* + Now adjust lock_position and lock_data_start for all objects that was + moved in 'b' (as there is now all objects in 'a' before these). + */ + for (table= sql_lock->table + a->table_count, + end_table= table + b->table_count; + table < end_table; + table++) + { + (*table)->lock_position+= a->table_count; + (*table)->lock_data_start+= a->lock_count; + } + + /* + Ensure that locks of the same tables share same data structures if we + reopen a table that is already open. This can happen for example with + MERGE tables. + */ + + /* Copy the lock data array. thr_merge_lock() reorders its content */ + memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks, + sql_lock->lock_count * sizeof(*sql_lock->locks)); + thr_merge_locks(sql_lock->locks + sql_lock->lock_count, + a->lock_count, b->lock_count); + + /* Delete old, not needed locks */ + if (!(a->flags & GET_LOCK_ON_THD)) + my_free(a); + if (!(b->flags & GET_LOCK_ON_THD)) + my_free(b); + DBUG_RETURN(sql_lock); +} + + +/** Unlock a set of external. */ + +static int unlock_external(THD *thd, TABLE **table,uint count) +{ + int error,error_code; + DBUG_ENTER("unlock_external"); + + error_code=0; + do + { + if ((*table)->current_lock != F_UNLCK) + { + (*table)->current_lock = F_UNLCK; + if (unlikely((error=(*table)->file->ha_external_unlock(thd)))) + { + error_code= error; + (*table)->file->print_error(error, MYF(0)); + } + } + table++; + } while (--count); + DBUG_RETURN(error_code); +} + + +/** + Get lock structures from table structs and initialize locks. + + @param thd Thread handler + @param table_ptr Pointer to tables that should be locks + @param flags One of: + - GET_LOCK_UNLOCK : If we should send TL_IGNORE to store lock + - GET_LOCK_STORE_LOCKS : Store lock info in TABLE + - GET_LOCK_SKIP_SEQUENCES : Ignore sequences (for temporary unlock) + - GET_LOCK_ON_THD : Store lock in thd->mem_root + + Temporary tables are not locked (as these are single user), except for + TRANSACTIONAL_TMP_TABLES as locking is needed to handle transactions. +*/ + +MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, uint flags) +{ + uint i,lock_count,table_count; + MYSQL_LOCK *sql_lock; + THR_LOCK_DATA **locks, **locks_buf; + TABLE **to, **table_buf; + DBUG_ENTER("get_lock_data"); + + DBUG_PRINT("info", ("count %d", count)); + + for (i=lock_count=table_count=0 ; i < count ; i++) + { + TABLE *t= table_ptr[i]; + + if ((likely(!t->s->tmp_table) || + (t->s->tmp_table == TRANSACTIONAL_TMP_TABLE)) && + (!(flags & GET_LOCK_SKIP_SEQUENCES) || t->s->sequence == 0)) + { + lock_count+= t->file->lock_count(); + table_count++; + } + } + + /* + Allocating twice the number of pointers for lock data for use in + thr_multi_lock(). This function reorders the lock data, but cannot + update the table values. So the second part of the array is copied + from the first part immediately before calling thr_multi_lock(). + */ + size_t amount= sizeof(*sql_lock) + + sizeof(THR_LOCK_DATA*) * lock_count * 2 + + sizeof(table_ptr) * table_count; + if (!(sql_lock= (MYSQL_LOCK*) (flags & GET_LOCK_ON_THD ? + thd->alloc(amount) : + my_malloc(key_memory_MYSQL_LOCK, amount, + MYF(0))))) + DBUG_RETURN(0); + locks= locks_buf= sql_lock->locks= (THR_LOCK_DATA**) (sql_lock + 1); + to= table_buf= sql_lock->table= (TABLE**) (locks + lock_count * 2); + sql_lock->table_count= table_count; + sql_lock->flags= flags; + + for (i=0 ; i < count ; i++) + { + TABLE *table= table_ptr[i]; + enum thr_lock_type lock_type; + THR_LOCK_DATA **locks_start; + + if (!((likely(!table->s->tmp_table) || + (table->s->tmp_table == TRANSACTIONAL_TMP_TABLE)) && + (!(flags & GET_LOCK_SKIP_SEQUENCES) || table->s->sequence == 0))) + continue; + lock_type= table->reginfo.lock_type; + DBUG_ASSERT(lock_type != TL_WRITE_DEFAULT && lock_type != TL_READ_DEFAULT); + locks_start= locks; + locks= table->file->store_lock(thd, locks, + (flags & GET_LOCK_ACTION_MASK) == GET_LOCK_UNLOCK ? TL_IGNORE : + lock_type); + if ((flags & GET_LOCK_ACTION_MASK) == GET_LOCK_STORE_LOCKS) + { + table->lock_position= (uint) (to - table_buf); + table->lock_data_start= (uint) (locks_start - locks_buf); + table->lock_count= (uint) (locks - locks_start); + } + *to++= table; + if (locks) + { + for ( ; locks_start != locks ; locks_start++) + { + (*locks_start)->debug_print_param= (void *) table; + (*locks_start)->m_psi= table->file->m_psi; + (*locks_start)->lock->name= table->alias.c_ptr(); + (*locks_start)->org_type= (*locks_start)->type; + } + } + } + /* + We do not use 'lock_count', because there are cases where store_lock() + returns less locks than lock_count() claimed. This can happen when + a FLUSH TABLES tries to abort locks from a MERGE table of another + thread. When that thread has just opened the table, but not yet + attached its children, it cannot return the locks. lock_count() + always returns the number of locks that an attached table has. + This is done to avoid the reverse situation: If lock_count() would + return 0 for a non-attached MERGE table, and that table becomes + attached between the calls to lock_count() and store_lock(), then + we would have allocated too little memory for the lock data. Now + we may allocate too much, but better safe than memory overrun. + And in the FLUSH case, the memory is released quickly anyway. + */ + sql_lock->lock_count= (uint)(locks - locks_buf); + DBUG_ASSERT(sql_lock->lock_count <= lock_count); + DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d", + sql_lock->table_count, sql_lock->lock_count)); + DBUG_RETURN(sql_lock); +} + + +/** + Obtain an exclusive metadata lock on a schema name. + + @param thd Thread handle. + @param db The database name. + + To avoid deadlocks, we do not try to obtain exclusive metadata + locks in LOCK TABLES mode, since in this mode there may be + other metadata locks already taken by the current connection, + and we must not wait for MDL locks while holding locks. + + @retval FALSE Success. + @retval TRUE Failure: we're in LOCK TABLES mode, or out of memory, + or this connection was killed. +*/ + +bool lock_schema_name(THD *thd, const char *db) +{ + MDL_request_list mdl_requests; + MDL_request global_request; + MDL_request mdl_request; + + if (thd->locked_tables_mode) + { + my_message(ER_LOCK_OR_ACTIVE_TRANSACTION, + ER_THD(thd, ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); + return TRUE; + } + + if (thd->has_read_only_protection()) + return TRUE; + MDL_REQUEST_INIT(&global_request, MDL_key::BACKUP, "", "", MDL_BACKUP_DDL, + MDL_STATEMENT); + MDL_REQUEST_INIT(&mdl_request, MDL_key::SCHEMA, db, "", MDL_EXCLUSIVE, + MDL_TRANSACTION); + + mdl_requests.push_front(&mdl_request); + mdl_requests.push_front(&global_request); + + if (thd->mdl_context.acquire_locks(&mdl_requests, + thd->variables.lock_wait_timeout)) + return TRUE; + + DEBUG_SYNC(thd, "after_wait_locked_schema_name"); + return FALSE; +} + + +/** + Obtain an exclusive metadata lock on an object name. + + @param thd Thread handle. + @param mdl_type Object type (currently functions, procedures + and events can be name-locked). + @param db The schema the object belongs to. + @param name Object name in the schema. + + This function assumes that no metadata locks were acquired + before calling it. It is enforced by asserts in MDL_context::acquire_locks(). + To avoid deadlocks, we do not try to obtain exclusive metadata + locks in LOCK TABLES mode, since in this mode there may be + other metadata locks already taken by the current connection, + and we must not wait for MDL locks while holding locks. + + @retval FALSE Success. + @retval TRUE Failure: we're in LOCK TABLES mode, or out of memory, + or this connection was killed. +*/ + +bool lock_object_name(THD *thd, MDL_key::enum_mdl_namespace mdl_type, + const char *db, const char *name) +{ + MDL_request_list mdl_requests; + MDL_request global_request; + MDL_request schema_request; + MDL_request mdl_request; + + DBUG_SLOW_ASSERT(ok_for_lower_case_names(db)); + + if (thd->locked_tables_mode) + { + my_message(ER_LOCK_OR_ACTIVE_TRANSACTION, + ER_THD(thd, ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); + return TRUE; + } + + DBUG_ASSERT(name); + DEBUG_SYNC(thd, "before_wait_locked_pname"); + + if (thd->has_read_only_protection()) + return TRUE; + MDL_REQUEST_INIT(&global_request, MDL_key::BACKUP, "", "", MDL_BACKUP_DDL, + MDL_STATEMENT); + MDL_REQUEST_INIT(&schema_request, MDL_key::SCHEMA, db, "", + MDL_INTENTION_EXCLUSIVE, MDL_TRANSACTION); + MDL_REQUEST_INIT(&mdl_request, mdl_type, db, name, MDL_EXCLUSIVE, + MDL_TRANSACTION); + + mdl_requests.push_front(&mdl_request); + mdl_requests.push_front(&schema_request); + mdl_requests.push_front(&global_request); + + if (thd->mdl_context.acquire_locks(&mdl_requests, + thd->variables.lock_wait_timeout)) + return TRUE; + + DEBUG_SYNC(thd, "after_wait_locked_pname"); + return FALSE; +} + + +/**************************************************************************** + Handling of global read locks + + Global read lock is implemented using metadata lock infrastructure. + + Taking the global read lock is TWO steps (2nd step is optional; without + it, COMMIT of existing transactions will be allowed): + lock_global_read_lock() THEN make_global_read_lock_block_commit(). + + How blocking of threads by global read lock is achieved: that's + semi-automatic. We assume that any statement which should be blocked + by global read lock will either open and acquires write-lock on tables + or acquires metadata locks on objects it is going to modify. For any + such statement MDL_BACKUP_STMT metadata lock is automatically acquired + for its duration (in case of LOCK TABLES until end of LOCK TABLES mode). + And lock_global_read_lock() simply acquires MDL_BACKUP_FTWRL1 metadata + lock and thus prohibits execution of statements which modify data (unless + they modify only temporary tables). If deadlock happens it is detected + by MDL subsystem and resolved in the standard fashion (by backing-off + metadata locks acquired so far and restarting open tables process + if possible). + + Why does FLUSH TABLES WITH READ LOCK need to block COMMIT: because it's used + to read a non-moving SHOW MASTER STATUS, and a COMMIT writes to the binary + log. + + Why getting the global read lock is two steps and not one. Because FLUSH + TABLES WITH READ LOCK needs to insert one other step between the two: + flushing tables. So the order is + 1) lock_global_read_lock() (prevents any new table write locks, i.e. stalls + all new updates) + 2) close_cached_tables() (the FLUSH TABLES), which will wait for tables + currently opened and being updated to close (so it's possible that there is + a moment where all new updates of server are stalled *and* FLUSH TABLES WITH + READ LOCK is, too). + 3) make_global_read_lock_block_commit(). + If we have merged 1) and 3) into 1), we would have had this deadlock: + imagine thread 1 and 2, in non-autocommit mode, thread 3, and an InnoDB + table t. + thd1: SELECT * FROM t FOR UPDATE; + thd2: UPDATE t SET a=1; # blocked by row-level locks of thd1 + thd3: FLUSH TABLES WITH READ LOCK; # blocked in close_cached_tables() by the + table instance of thd2 + thd1: COMMIT; # blocked by thd3. + thd1 blocks thd2 which blocks thd3 which blocks thd1: deadlock. + + Note that we need to support that one thread does + FLUSH TABLES WITH READ LOCK; and then COMMIT; + (that's what innobackup does, for some good reason). + So in this exceptional case the COMMIT should not be blocked by the FLUSH + TABLES WITH READ LOCK. + +****************************************************************************/ + +/** + Take global read lock, wait if there is protection against lock. + + If the global read lock is already taken by this thread, then nothing is + done. + + Concurrent thread can acquire protection against global read lock either + before or after it got table metadata lock. This may lead to a deadlock if + there is pending global read lock request. E.g. + t1 does DML, holds SHARED table lock, waiting for t3 (GRL protection) + t2 does DDL, holds GRL protection, waiting for t1 (EXCLUSIVE) + t3 does FTWRL, has pending GRL, waiting for t2 (GRL) + + Since this is very seldom deadlock and FTWRL connection must not hold any + other locks, FTWRL connection is made deadlock victim and attempt to acquire + GRL retried. + + See also "Handling of global read locks" above. + + @param thd Reference to thread. + + @retval False Success, global read lock set, commits are NOT blocked. + @retval True Failure, thread was killed. +*/ + +bool Global_read_lock::lock_global_read_lock(THD *thd) +{ + DBUG_ENTER("lock_global_read_lock"); + + if (!m_state) + { + MDL_deadlock_and_lock_abort_error_handler mdl_deadlock_handler; + MDL_request mdl_request; + bool result; + + if (thd->current_backup_stage != BACKUP_FINISHED) + { + my_error(ER_BACKUP_LOCK_IS_ACTIVE, MYF(0)); + DBUG_RETURN(1); + } + + /* + Release HANDLER OPEN by the current THD as they may cause deadlocks + if another thread is trying to simultaneous drop the table + */ + mysql_ha_cleanup_no_free(thd); + DEBUG_SYNC(thd, "ftwrl_before_lock"); + + DBUG_ASSERT(! thd->mdl_context.is_lock_owner(MDL_key::BACKUP, "", "", + MDL_BACKUP_FTWRL1)); + DBUG_ASSERT(! thd->mdl_context.is_lock_owner(MDL_key::BACKUP, "", "", + MDL_BACKUP_FTWRL2)); + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_FTWRL1, + MDL_EXPLICIT); + + do + { + mdl_deadlock_handler.init(); + thd->push_internal_handler(&mdl_deadlock_handler); + result= thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout); + thd->pop_internal_handler(); + } while (mdl_deadlock_handler.need_reopen()); + + if (result) + DBUG_RETURN(true); + + m_mdl_global_read_lock= mdl_request.ticket; + m_state= GRL_ACQUIRED; + } + /* + We DON'T set global_read_lock_blocks_commit now, it will be set after + tables are flushed (as the present function serves for FLUSH TABLES WITH + READ LOCK only). Doing things in this order is necessary to avoid + deadlocks (we must allow COMMIT until all tables are closed; we should not + forbid it before, or we can have a 3-thread deadlock if 2 do SELECT FOR + UPDATE and one does FLUSH TABLES WITH READ LOCK). + */ + DBUG_RETURN(0); +} + + +/** + Unlock global read lock. + + Commits may or may not be blocked when this function is called. + + See also "Handling of global read locks" above. + + @param thd Reference to thread. +*/ + +void Global_read_lock::unlock_global_read_lock(THD *thd) +{ + DBUG_ENTER("unlock_global_read_lock"); + + DBUG_ASSERT(m_mdl_global_read_lock && m_state); + + if (thd->global_disable_checkpoint) + { + thd->global_disable_checkpoint= 0; + if (!--global_disable_checkpoint) + { + ha_checkpoint_state(0); // Enable checkpoints + } + } + + thd->mdl_context.release_lock(m_mdl_global_read_lock); + +#ifdef WITH_WSREP + if (m_state == GRL_ACQUIRED_AND_BLOCKS_COMMIT && + thd->wsrep_desynced_backup_stage) + { + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + if (server_state.state() == Wsrep_server_state::s_donor || + (WSREP_NNULL(thd) && + server_state.state() != Wsrep_server_state::s_synced)) + { + server_state.resume(); + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + } + else if (WSREP_NNULL(thd) && + server_state.state() == Wsrep_server_state::s_synced) + { + THD_STAGE_INFO(thd, stage_waiting_flow); + WSREP_DEBUG("unlock_global_read_lock: waiting for flow control for %s", + wsrep_thd_query(thd)); + server_state.resume_and_resync(); + DEBUG_SYNC(thd, "wsrep_unlock_global_read_lock_after_resume_and_resync"); + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + } + thd->wsrep_desynced_backup_stage= false; + } +#endif /* WITH_WSREP */ + + m_mdl_global_read_lock= NULL; + m_state= GRL_NONE; + + DBUG_VOID_RETURN; +} + + +/** + Make global read lock also block commits. + + The scenario is: + - This thread has the global read lock. + - Global read lock blocking of commits is not set. + + See also "Handling of global read locks" above. + + @param thd Reference to thread. + + @retval False Success, global read lock set, commits are blocked. + @retval True Failure, thread was killed. +*/ + +bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) +{ + DBUG_ENTER("make_global_read_lock_block_commit"); + /* + If we didn't succeed lock_global_read_lock(), or if we already succeeded + make_global_read_lock_block_commit(), do nothing. + */ + + if (m_state != GRL_ACQUIRED) + DBUG_RETURN(0); + + if (thd->mdl_context.upgrade_shared_lock(m_mdl_global_read_lock, + MDL_BACKUP_FTWRL2, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(TRUE); + + m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT; + +#ifdef WITH_WSREP + /* Native threads should bail out before wsrep operations to follow. + Donor servicing thread is an exception, it should pause provider + but not desync, as it is already desynced in donor state. + Desync should be called only when we are in synced state. + */ + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + wsrep::seqno paused_seqno; + if (server_state.state() == Wsrep_server_state::s_donor || + (WSREP_NNULL(thd) && + server_state.state() != Wsrep_server_state::s_synced)) + { + paused_seqno= server_state.pause(); + thd->wsrep_desynced_backup_stage= true; + } + else if (WSREP_NNULL(thd) && + server_state.state() == Wsrep_server_state::s_synced) + { + paused_seqno= server_state.desync_and_pause(); + thd->wsrep_desynced_backup_stage= true; + } + else + { + DBUG_RETURN(FALSE); + } + WSREP_INFO("Server paused at: %lld", paused_seqno.get()); + if (paused_seqno.get() >= 0) + { + wsrep_locked_seqno= paused_seqno.get(); + } + DEBUG_SYNC(thd, "wsrep_global_read_lock_block_commit_after_pause"); +#endif /* WITH_WSREP */ + DBUG_RETURN(FALSE); +} + + +/** + Set explicit duration for metadata locks which are used to implement GRL. + + @param thd Reference to thread. +*/ + +void Global_read_lock::set_explicit_lock_duration(THD *thd) +{ + if (m_mdl_global_read_lock) + thd->mdl_context.set_lock_duration(m_mdl_global_read_lock, MDL_EXPLICIT); +} + +/** + @} (end of group Locking) +*/ diff --git a/sql/lock.h b/sql/lock.h new file mode 100644 index 00000000..85a93b9a --- /dev/null +++ b/sql/lock.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LOCK_INCLUDED +#define LOCK_INCLUDED + +#include "thr_lock.h" /* thr_lock_type */ +#include "mdl.h" + +// Forward declarations +struct TABLE; +struct TABLE_LIST; +class THD; +typedef struct st_mysql_lock MYSQL_LOCK; + + +MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **table, uint count, uint flags); +bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags); +int mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock); +int mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock); +int mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock); +int mysql_unlock_some_tables(THD *thd, TABLE **table,uint count, uint flag); +int mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table); +bool mysql_lock_abort_for_thread(THD *thd, TABLE *table); +MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a, MYSQL_LOCK *b, THD *thd= NULL); +/* Lock based on name */ +bool lock_schema_name(THD *thd, const char *db); +/* Lock based on stored routine name */ +bool lock_object_name(THD *thd, MDL_key::enum_mdl_namespace mdl_type, + const char *db, const char *name); + +/* flags for get_lock_data */ +#define GET_LOCK_UNLOCK 0 +#define GET_LOCK_STORE_LOCKS 1 +#define GET_LOCK_ACTION_MASK 1 +#define GET_LOCK_ON_THD (1 << 1) +#define GET_LOCK_SKIP_SEQUENCES (1 << 2) + +MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, uint flags); +void reset_lock_data(MYSQL_LOCK *sql_lock, bool unlock); + +#endif /* LOCK_INCLUDED */ diff --git a/sql/log.cc b/sql/log.cc new file mode 100644 index 00000000..d3879aad --- /dev/null +++ b/sql/log.cc @@ -0,0 +1,12168 @@ +/* Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + @brief + logging of commands + + @todo + Abort logging when we get an error in reading or writing log files +*/ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "log.h" +#include "sql_base.h" // open_log_table +#include "sql_repl.h" +#include "sql_delete.h" // mysql_truncate +#include "sql_parse.h" // command_name +#include "sql_time.h" // calc_time_from_sec, my_time_compare +#include "tztime.h" // my_tz_OFFSET0, struct Time_zone +#include "log_event.h" // Query_log_event +#include "rpl_filter.h" +#include "rpl_rli.h" +#include "sql_audit.h" +#include "mysqld.h" +#include "ddl_log.h" + +#include +#include // For test_if_number + +#include // for Sys_last_gtid_ptr + +#ifdef _WIN32 +#include "message.h" +#endif + +#include "sql_plugin.h" +#include "debug_sync.h" +#include "sql_show.h" +#include "my_pthread.h" +#include "semisync_master.h" +#include "sp_rcontext.h" +#include "sp_head.h" +#include "sql_table.h" + +#include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#include "wsrep_status.h" +#endif /* WITH_WSREP */ + +#ifdef HAVE_REPLICATION +#include "semisync_master.h" +#include "semisync_slave.h" +#include // pair +#endif + +/* max size of the log message */ +#define MAX_LOG_BUFFER_SIZE 1024 +#define MAX_TIME_SIZE 32 +#define MY_OFF_T_UNDEF (~(my_off_t)0UL) +/* Truncate cache log files bigger than this */ +#define CACHE_FILE_TRUNC_SIZE 65536 + +#define FLAGSTR(V,F) ((V)&(F)?#F" ":"") + +handlerton *binlog_hton; +LOGGER logger; + +const char *log_bin_index= 0; +const char *log_bin_basename= 0; + +MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period); + +static bool test_if_number(const char *str, + ulong *res, bool allow_wildcards); +static int binlog_init(void *p); +static int binlog_close_connection(handlerton *hton, THD *thd); +static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv); +static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv); +static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton, + THD *thd); +static int binlog_rollback(handlerton *hton, THD *thd, bool all); +static int binlog_prepare(handlerton *hton, THD *thd, bool all); +static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd); +static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, + Log_event *end_ev, bool all, bool using_stmt, + bool using_trx, bool is_ro_1pc); + +static const LEX_CSTRING write_error_msg= + { STRING_WITH_LEN("error writing to the binary log") }; + +static my_bool opt_optimize_thread_scheduling= TRUE; +ulong binlog_checksum_options; +#ifndef DBUG_OFF +ulong opt_binlog_dbug_fsync_sleep= 0; +#endif + +mysql_mutex_t LOCK_prepare_ordered; +mysql_cond_t COND_prepare_ordered; +mysql_mutex_t LOCK_after_binlog_sync; +mysql_mutex_t LOCK_commit_ordered; + +static ulonglong binlog_status_var_num_commits; +static ulonglong binlog_status_var_num_group_commits; +static ulonglong binlog_status_group_commit_trigger_count; +static ulonglong binlog_status_group_commit_trigger_lock_wait; +static ulonglong binlog_status_group_commit_trigger_timeout; +static char binlog_snapshot_file[FN_REFLEN]; +static ulonglong binlog_snapshot_position; + +static const char *fatal_log_error= + "Could not use %s for logging (error %d). " + "Turning logging off for the whole duration of the MariaDB server process. " + "To turn it on again: fix the cause, shutdown the MariaDB server and " + "restart it."; + + +static SHOW_VAR binlog_status_vars_detail[]= +{ + {"commits", + (char *)&binlog_status_var_num_commits, SHOW_LONGLONG}, + {"group_commits", + (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG}, + {"group_commit_trigger_count", + (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG}, + {"group_commit_trigger_lock_wait", + (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG}, + {"group_commit_trigger_timeout", + (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG}, + {"snapshot_file", + (char *)&binlog_snapshot_file, SHOW_CHAR}, + {"snapshot_position", + (char *)&binlog_snapshot_position, SHOW_LONGLONG}, + {NullS, NullS, SHOW_LONG} +}; + +/* + Variables for the binlog background thread. + Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex. + */ +static bool binlog_background_thread_started= false; +static bool binlog_background_thread_stop= false; +static MYSQL_BIN_LOG::xid_count_per_binlog * + binlog_background_thread_queue= NULL; + +static bool start_binlog_background_thread(); + +static rpl_binlog_state rpl_global_gtid_binlog_state; + +void setup_log_handling() +{ + rpl_global_gtid_binlog_state.init(); +} + + +/** + purge logs, master and slave sides both, related error code + converter. + Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs() + + @param res an internal to purging routines error code + + @return the user level error code ER_* +*/ +uint purge_log_get_error_code(int res) +{ + uint errcode= 0; + + switch (res) { + case 0: break; + case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break; + case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break; + case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break; + case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break; + case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break; + case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break; + case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break; + case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break; + default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break; + } + + return errcode; +} + +/** + Silence all errors and warnings reported when performing a write + to a log table. + Errors and warnings are not reported to the client or SQL exception + handlers, so that the presence of logging does not interfere and affect + the logic of an application. +*/ +class Silence_log_table_errors : public Internal_error_handler +{ + char m_message[MYSQL_ERRMSG_SIZE]; +public: + Silence_log_table_errors() + { + m_message[0]= '\0'; + } + + virtual ~Silence_log_table_errors() = default; + + virtual bool handle_condition(THD *thd, + uint sql_errno, + const char* sql_state, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl); + const char *message() const { return m_message; } +}; + +bool +Silence_log_table_errors::handle_condition(THD *, + uint, + const char*, + Sql_condition::enum_warning_level*, + const char* msg, + Sql_condition ** cond_hdl) +{ + *cond_hdl= NULL; + strmake_buf(m_message, msg); + return TRUE; +} + +sql_print_message_func sql_print_message_handlers[3] = +{ + sql_print_information, + sql_print_warning, + sql_print_error +}; + + +/** + Create the name of the log file + + @param[OUT] out a pointer to a new allocated name will go there + @param[IN] log_ext The extension for the file (e.g .log) + @param[IN] once whether to use malloc_once or a normal malloc. +*/ +void make_default_log_name(char **out, const char* log_ext, bool once) +{ + char buff[FN_REFLEN+10]; + fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT)); + if (once) + *out= my_once_strdup(buff, MYF(MY_WME)); + else + { + my_free(*out); + *out= my_strdup(PSI_INSTRUMENT_ME, buff, MYF(MY_WME)); + } +} + + +/* + Helper classes to store non-transactional and transactional data + before copying it to the binary log. +*/ +class binlog_cache_data +{ +public: + binlog_cache_data(): m_pending(0), status(0), + before_stmt_pos(MY_OFF_T_UNDEF), + incident(FALSE), + saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0), + ptr_binlog_cache_disk_use(0) + { } + + ~binlog_cache_data() + { + DBUG_ASSERT(empty()); + close_cached_file(&cache_log); + } + + /* + Return 1 if there is no relevant entries in the cache + + This is: + - Cache is empty + - There are row or critical (DDL?) events in the cache + + The status test is needed to avoid writing entries with only + a table map entry, which would crash in do_apply_event() on the slave + as it assumes that there is always a row entry after a table map. + */ + bool empty() const + { + return (pending() == NULL && + (my_b_write_tell(&cache_log) == 0 || + ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0))); + } + + Rows_log_event *pending() const + { + return m_pending; + } + + void set_pending(Rows_log_event *const pending_arg) + { + m_pending= pending_arg; + } + + void set_incident(void) + { + incident= TRUE; + } + + bool has_incident(void) + { + return(incident); + } + + void reset() + { + bool cache_was_empty= empty(); + bool truncate_file= (cache_log.file != -1 && + my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE); + truncate(0,1); // Forget what's in cache + if (!cache_was_empty) + compute_statistics(); + if (truncate_file) + my_chsize(cache_log.file, 0, 0, MYF(MY_WME)); + + status= 0; + incident= FALSE; + before_stmt_pos= MY_OFF_T_UNDEF; + DBUG_ASSERT(empty()); + } + + my_off_t get_byte_position() const + { + return my_b_tell(&cache_log); + } + + my_off_t get_prev_position() + { + return(before_stmt_pos); + } + + void set_prev_position(my_off_t pos) + { + before_stmt_pos= pos; + } + + void restore_prev_position() + { + truncate(before_stmt_pos); + } + + void restore_savepoint(my_off_t pos) + { + truncate(pos); + if (pos < before_stmt_pos) + before_stmt_pos= MY_OFF_T_UNDEF; + } + + void set_binlog_cache_info(my_off_t param_max_binlog_cache_size, + ulong *param_ptr_binlog_cache_use, + ulong *param_ptr_binlog_cache_disk_use) + { + /* + The assertions guarantee that the set_binlog_cache_info is + called just once and information passed as parameters are + never zero. + + This is done while calling the constructor binlog_cache_mngr. + We cannot set information in the constructor binlog_cache_data + because the space for binlog_cache_mngr is allocated through + a placement new. + + In the future, we can refactor this and change it to avoid + the set_binlog_info. + */ + DBUG_ASSERT(saved_max_binlog_cache_size == 0); + DBUG_ASSERT(param_max_binlog_cache_size != 0); + DBUG_ASSERT(ptr_binlog_cache_use == 0); + DBUG_ASSERT(param_ptr_binlog_cache_use != 0); + DBUG_ASSERT(ptr_binlog_cache_disk_use == 0); + DBUG_ASSERT(param_ptr_binlog_cache_disk_use != 0); + + saved_max_binlog_cache_size= param_max_binlog_cache_size; + ptr_binlog_cache_use= param_ptr_binlog_cache_use; + ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use; + cache_log.end_of_file= saved_max_binlog_cache_size; + } + + void add_status(enum_logged_status status_arg) + { + status|= status_arg; + } + + /* + Cache to store data before copying it to the binary log. + */ + IO_CACHE cache_log; + +private: + /* + Pending binrows event. This event is the event where the rows are currently + written. + */ + Rows_log_event *m_pending; + + /* + Bit flags for what has been writing to cache. Used to + discard logs without any data changes. + see enum_logged_status; + */ + uint32 status; + + /* + Binlog position before the start of the current statement. + */ + my_off_t before_stmt_pos; + + /* + This indicates that some events did not get into the cache and most likely + it is corrupted. + */ + bool incident; + + /** + This function computes binlog cache and disk usage. + */ + void compute_statistics() + { + statistic_increment(*ptr_binlog_cache_use, &LOCK_status); + if (cache_log.disk_writes != 0) + { +#ifdef REAL_STATISTICS + statistic_add(*ptr_binlog_cache_disk_use, + cache_log.disk_writes, &LOCK_status); +#else + statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status); +#endif + cache_log.disk_writes= 0; + } + } + + /* + Stores the values of maximum size of the cache allowed when this cache + is configured. This corresponds to either + . max_binlog_cache_size or max_binlog_stmt_cache_size. + */ + my_off_t saved_max_binlog_cache_size; + + /* + Stores a pointer to the status variable that keeps track of the in-memory + cache usage. This corresponds to either + . binlog_cache_use or binlog_stmt_cache_use. + */ + ulong *ptr_binlog_cache_use; + + /* + Stores a pointer to the status variable that keeps track of the disk + cache usage. This corresponds to either + . binlog_cache_disk_use or binlog_stmt_cache_disk_use. + */ + ulong *ptr_binlog_cache_disk_use; + + /* + It truncates the cache to a certain position. This includes deleting the + pending event. + */ + void truncate(my_off_t pos, bool reset_cache=0) + { + DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos)); + cache_log.error=0; + if (pending()) + { + delete pending(); + set_pending(0); + } + reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache); + cache_log.end_of_file= saved_max_binlog_cache_size; + } + + binlog_cache_data& operator=(const binlog_cache_data& info); + binlog_cache_data(const binlog_cache_data& info); +}; + + +void Log_event_writer::add_status(enum_logged_status status) +{ + if (likely(cache_data)) + cache_data->add_status(status); +} + +void Log_event_writer::set_incident() +{ + cache_data->set_incident(); +} + + +class binlog_cache_mngr { +public: + binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size, + my_off_t param_max_binlog_cache_size, + ulong *param_ptr_binlog_stmt_cache_use, + ulong *param_ptr_binlog_stmt_cache_disk_use, + ulong *param_ptr_binlog_cache_use, + ulong *param_ptr_binlog_cache_disk_use) + : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0) + { + stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size, + param_ptr_binlog_stmt_cache_use, + param_ptr_binlog_stmt_cache_disk_use); + trx_cache.set_binlog_cache_info(param_max_binlog_cache_size, + param_ptr_binlog_cache_use, + param_ptr_binlog_cache_disk_use); + last_commit_pos_file[0]= 0; + } + + void reset(bool do_stmt, bool do_trx) + { + if (do_stmt) + stmt_cache.reset(); + if (do_trx) + { + trx_cache.reset(); + using_xa= FALSE; + last_commit_pos_file[0]= 0; + last_commit_pos_offset= 0; + } + } + + binlog_cache_data* get_binlog_cache_data(bool is_transactional) + { + return (is_transactional ? &trx_cache : &stmt_cache); + } + + IO_CACHE* get_binlog_cache_log(bool is_transactional) + { + return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log); + } + + binlog_cache_data stmt_cache; + + binlog_cache_data trx_cache; + + /* + Binlog position for current transaction. + For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog + position corresponding to the snapshot taken. During (and after) commit, + this is set to the binlog position corresponding to just after the + commit (so storage engines can store it in their transaction log). + */ + char last_commit_pos_file[FN_REFLEN]; + my_off_t last_commit_pos_offset; + + /* + Flag set true if this transaction is committed with log_xid() as part of + XA, false if not. + */ + bool using_xa; + my_xid xa_xid; + bool need_unlog; + /* + Id of binlog that transaction was written to; only needed if need_unlog is + true. + */ + ulong binlog_id; + /* Set if we get an error during commit that must be returned from unlog(). */ + bool delayed_error; + //Will be reset when gtid is written into binlog + uchar gtid_flags3; + decltype (rpl_gtid::seq_no) sa_seq_no; +private: + + binlog_cache_mngr& operator=(const binlog_cache_mngr& info); + binlog_cache_mngr(const binlog_cache_mngr& info); +}; + +/** + The function handles the first phase of two-phase binlogged ALTER. + On master binlogs START ALTER when that is configured to do so. + On slave START ALTER gets binlogged and its gtid committed into gtid slave pos + table. + + @param thd Thread handle. + @param start_alter_id Start Alter identifier or zero. + @param[out] + partial_alter Is set to true when Start Alter phase is completed. + @param if_exists True indicates the binary logging of the query + should be done with "if exists" option. + + @return false on success, true on failure + @return @c partial_alter set to @c true when START ALTER phase + has been completed +*/ +bool write_bin_log_start_alter(THD *thd, bool& partial_alter, + uint64 start_alter_id, bool if_exists) +{ +#if defined(HAVE_REPLICATION) + if (thd->variables.option_bits & OPTION_BIN_TMP_LOG_OFF) + return false; + + if (start_alter_id) + { + if (thd->rgi_slave->get_finish_event_group_called()) + return false; // can get here through retrying + + DBUG_EXECUTE_IF("at_write_start_alter", { + debug_sync_set_action(thd, + STRING_WITH_LEN("now wait_for alter_cont")); + }); + + Master_info *mi= thd->rgi_slave->rli->mi; + start_alter_info *info= thd->rgi_slave->sa_info; + bool is_shutdown= false; + + info->sa_seq_no= start_alter_id; + info->domain_id= thd->variables.gtid_domain_id; + mysql_mutex_lock(&mi->start_alter_list_lock); + // possible stop-slave's marking of the whole alter state list is checked + is_shutdown= mi->is_shutdown; + mi->start_alter_list.push_back(info, &mi->mem_root); + mysql_mutex_unlock(&mi->start_alter_list_lock); + info->state= start_alter_state::REGISTERED; + thd->rgi_slave->commit_orderer.wait_for_prior_commit(thd); + thd->rgi_slave->start_alter_ev->update_pos(thd->rgi_slave); + if (mysql_bin_log.is_open()) + { + Write_log_with_flags wlwf (thd, Gtid_log_event::FL_START_ALTER_E1); + if (write_bin_log(thd, true, thd->query(), thd->query_length())) + { + DBUG_ASSERT(thd->is_error()); + return true; + } + } + thd->rgi_slave->mark_start_commit(); + thd->wakeup_subsequent_commits(0); + thd->rgi_slave->finish_start_alter_event_group(); + + if (is_shutdown) + { + /* SA exists abruptly and will notify any CA|RA waiter. */ + mysql_mutex_lock(&mi->start_alter_lock); + /* + If there is (or will be) unlikely any CA it will execute + the whole query before to stop itself. + */ + info->direct_commit_alter= true; + info->state= start_alter_state::ROLLBACK_ALTER; + mysql_mutex_unlock(&mi->start_alter_lock); + + return true; + } + + return false; + } +#endif + +#ifndef WITH_WSREP + rpl_group_info *rgi= thd->rgi_slave ? thd->rgi_slave : thd->rgi_fake; +#else + rpl_group_info *rgi= thd->slave_thread ? thd->rgi_slave : + WSREP(thd) ? (thd->wsrep_rgi ? thd->wsrep_rgi : thd->rgi_fake) : + thd->rgi_fake; +#endif + + if (!rgi && thd->variables.binlog_alter_two_phase) + { + /* slave applier can handle here only regular ALTER */ + DBUG_ASSERT(!rgi || !(rgi->gtid_ev_flags_extra & + (Gtid_log_event::FL_START_ALTER_E1 | + Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1))); + + /* + After logging binlog state stays flagged with SA flags3 an seq_no. + The state is not reset after write_bin_log() is done which is + deferred for the second logging phase. + */ + thd->set_binlog_flags_for_alter(Gtid_log_event::FL_START_ALTER_E1); + if(write_bin_log_with_if_exists(thd, false, false, if_exists, false)) + { + DBUG_ASSERT(thd->is_error()); + + thd->set_binlog_flags_for_alter(0); + return true; + } + partial_alter= true; + } + else if (rgi && rgi->direct_commit_alter) + { + DBUG_ASSERT(rgi->gtid_ev_flags_extra & + Gtid_log_event::FL_COMMIT_ALTER_E1); + + partial_alter= true; + } + + return false; +} + +bool LOGGER::is_log_table_enabled(uint log_table_type) +{ + switch (log_table_type) { + case QUERY_LOG_SLOW: + return (table_log_handler != NULL) && global_system_variables.sql_log_slow + && (log_output_options & LOG_TABLE); + case QUERY_LOG_GENERAL: + return (table_log_handler != NULL) && opt_log + && (log_output_options & LOG_TABLE); + default: + DBUG_ASSERT(0); + return FALSE; /* make compiler happy */ + } +} + +/** + Check if a given table is opened log table + + @param table Table to check + @param check_if_opened Only fail if it's a log table in use + @param error_msg String to put in error message if not ok. + No error message if 0 + @return 0 ok + @return # Type of log file + */ + +int check_if_log_table(const TABLE_LIST *table, + bool check_if_opened, + const char *error_msg) +{ + int result= 0; + if (table->db.length == 5 && + !my_strcasecmp(table_alias_charset, table->db.str, "mysql")) + { + const char *table_name= table->table_name.str; + + if (table->table_name.length == 11 && + !my_strcasecmp(table_alias_charset, table_name, "general_log")) + { + result= QUERY_LOG_GENERAL; + goto end; + } + + if (table->table_name.length == 8 && + !my_strcasecmp(table_alias_charset, table_name, "slow_log")) + { + result= QUERY_LOG_SLOW; + goto end; + } + } + return 0; + +end: + if (!check_if_opened || logger.is_log_table_enabled(result)) + { + if (error_msg) + my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg); + return result; + } + return 0; +} + + +Log_to_csv_event_handler::Log_to_csv_event_handler() = default; + + +Log_to_csv_event_handler::~Log_to_csv_event_handler() = default; + + +void Log_to_csv_event_handler::cleanup() +{ + logger.is_log_tables_initialized= FALSE; +} + +/* log event handlers */ + +/** + Log command to the general log table + + Log given command to the general log table. + + @param event_time command start timestamp + @param user_host the pointer to the string with user@host info + @param user_host_len length of the user_host string. this is computed + once and passed to all general log event handlers + @param thread_id Id of the thread, issued a query + @param command_type the type of the command being logged + @param command_type_len the length of the string above + @param sql_text the very text of the query being executed + @param sql_text_len the length of sql_text string + + + @return This function attempts to never call my_error(). This is + necessary, because general logging happens already after a statement + status has been sent to the client, so the client can not see the + error anyway. Besides, the error is not related to the statement + being executed and is internal, and thus should be handled + internally (@todo: how?). + If a write to the table has failed, the function attempts to + write to a short error message to the file. The failure is also + indicated in the return value. + + @retval FALSE OK + @retval TRUE error occurred +*/ + +bool Log_to_csv_event_handler:: + log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len, + CHARSET_INFO *client_cs) +{ + TABLE_LIST table_list; + TABLE *table; + bool result= TRUE; + bool need_close= FALSE; + bool need_pop= FALSE; + bool need_rnd_end= FALSE; + uint field_index; + Silence_log_table_errors error_handler; + Open_tables_backup open_tables_backup; + THD::used_t save_time_zone_used= thd->used & THD::TIME_ZONE_USED; + DBUG_ENTER("log_general"); + + /* + CSV uses TIME_to_timestamp() internally if table needs to be repaired + which will set TIME_ZONE_USED + */ + + table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0, + TL_WRITE_CONCURRENT_INSERT); + + /* + 1) open_log_table generates an error of the + table can not be opened or is corrupted. + 2) "INSERT INTO general_log" can generate warning sometimes. + + Suppress these warnings and errors, they can't be dealt with + properly anyway. + + QQ: this problem needs to be studied in more detail. + Comment this 2 lines and run "cast.test" to see what's happening. + */ + thd->push_internal_handler(& error_handler); + need_pop= TRUE; + + if (!(table= open_log_table(thd, &table_list, &open_tables_backup))) + goto err; + + need_close= TRUE; + + if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) || + table->file->ha_rnd_init_with_error(0)) + goto err; + + need_rnd_end= TRUE; + + /* Honor next number columns if present */ + table->next_number_field= table->found_next_number_field; + + /* + NOTE: we do not call restore_record() here, as all fields are + filled by the Logger (=> no need to load default ones). + */ + + /* + We do not set a value for table->field[0], as it will use + default value (which is CURRENT_TIMESTAMP). + */ + + /* check that all columns exist */ + if (table->s->fields < 6) + goto err; + + DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP); + + table->field[0]->store_timestamp( + hrtime_to_my_time(event_time), hrtime_sec_part(event_time)); + + /* do a write */ + if (table->field[1]->store(user_host, user_host_len, client_cs) || + table->field[2]->store((longlong) thread_id_arg, TRUE) || + table->field[3]->store((longlong) global_system_variables.server_id, + TRUE) || + table->field[4]->store(command_type, command_type_len, client_cs)) + goto err; + + /* + A positive return value in store() means truncation. + Still logging a message in the log in this case. + */ + table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE; + if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0) + goto err; + + /* mark all fields as not null */ + table->field[1]->set_notnull(); + table->field[2]->set_notnull(); + table->field[3]->set_notnull(); + table->field[4]->set_notnull(); + table->field[5]->set_notnull(); + + /* Set any extra columns to their default values */ + for (field_index= 6 ; field_index < table->s->fields ; field_index++) + { + table->field[field_index]->set_default(); + } + + if (table->file->ha_write_row(table->record[0])) + goto err; + + result= FALSE; + +err: + if (result && !thd->killed) + sql_print_error("Failed to write to mysql.general_log: %s", + error_handler.message()); + + if (need_rnd_end) + { + table->file->ha_rnd_end(); + table->file->ha_release_auto_increment(); + } + if (need_pop) + thd->pop_internal_handler(); + if (need_close) + close_log_table(thd, &open_tables_backup); + + thd->used= (thd->used & ~THD::TIME_ZONE_USED) | save_time_zone_used; + DBUG_RETURN(result); +} + + +/* + Log a query to the slow log table + + SYNOPSIS + log_slow() + thd THD of the query + current_time current timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + query_time Amount of time the query took to execute (in microseconds) + lock_time Amount of time the query was locked (in microseconds) + is_command The flag, which determines, whether the sql_text is a + query or an administrator command (these are treated + differently by the old logging routines) + sql_text the very text of the query or administrator command + processed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log a query to the slow log table + + RETURN + FALSE - OK + TRUE - error occurred +*/ + +bool Log_to_csv_event_handler:: + log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, size_t user_host_len, + ulonglong query_utime, ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len) +{ + TABLE_LIST table_list; + TABLE *table; + bool result= TRUE; + bool need_close= FALSE; + bool need_rnd_end= FALSE; + Silence_log_table_errors error_handler; + Open_tables_backup open_tables_backup; + CHARSET_INFO *client_cs= thd->variables.character_set_client; + THD::used_t save_time_zone_used= thd->used & THD::TIME_ZONE_USED; + ulong query_time= (ulong) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS); + ulong lock_time= (ulong) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS); + ulong query_time_micro= (ulong) (query_utime % 1000000); + ulong lock_time_micro= (ulong) (lock_utime % 1000000); + DBUG_ENTER("Log_to_csv_event_handler::log_slow"); + + thd->push_internal_handler(& error_handler); + + table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0, + TL_WRITE_CONCURRENT_INSERT); + + if (!(table= open_log_table(thd, &table_list, &open_tables_backup))) + goto err; + + need_close= TRUE; + + if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) || + table->file->ha_rnd_init_with_error(0)) + goto err; + + need_rnd_end= TRUE; + + /* Honor next number columns if present */ + table->next_number_field= table->found_next_number_field; + + restore_record(table, s->default_values); // Get empty record + + /* check that all columns exist */ + if (table->s->fields < 13) + goto err; + + /* store the time and user values */ + DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP); + table->field[0]->store_timestamp( + hrtime_to_my_time(current_time), hrtime_sec_part(current_time)); + if (table->field[1]->store(user_host, user_host_len, client_cs)) + goto err; + + /* + A TIME field can not hold the full longlong range; query_time or + lock_time may be truncated without warning here, if greater than + 839 hours (~35 days) + */ + MYSQL_TIME t; + t.neg= 0; + + /* fill in query_time field */ + calc_time_from_sec(&t, query_time, query_time_micro); + if (table->field[2]->store_time(&t)) + goto err; + /* lock_time */ + calc_time_from_sec(&t, lock_time, lock_time_micro); + if (table->field[3]->store_time(&t)) + goto err; + /* rows_sent */ + if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE)) + goto err; + /* rows_examined */ + if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE)) + goto err; + + /* fill database field */ + if (thd->db.str) + { + if (table->field[6]->store(thd->db.str, thd->db.length, client_cs)) + goto err; + table->field[6]->set_notnull(); + } + + if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + if (table-> + field[7]->store((longlong) + thd->first_successful_insert_id_in_prev_stmt_for_binlog, + TRUE)) + goto err; + table->field[7]->set_notnull(); + } + + /* + Set value if we do an insert on autoincrement column. Note that for + some engines (those for which get_auto_increment() does not leave a + table lock until the statement ends), this is just the first value and + the next ones used may not be contiguous to it. + */ + if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) + { + if (table-> + field[8]->store((longlong) + thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE)) + goto err; + table->field[8]->set_notnull(); + } + + if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE)) + goto err; + table->field[9]->set_notnull(); + + /* + Column sql_text. + A positive return value in store() means truncation. + Still logging a message in the log in this case. + */ + if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0) + goto err; + + if (table->field[11]->store((longlong) thd->thread_id, TRUE)) + goto err; + + /* Rows_affected */ + if (table->field[12]->store(thd->get_stmt_da()->is_ok() ? + (longlong) thd->get_stmt_da()->affected_rows() : + 0, TRUE)) + goto err; + + if (table->file->ha_write_row(table->record[0])) + goto err; + + result= FALSE; + +err: + thd->pop_internal_handler(); + + if (result && !thd->killed) + sql_print_error("Failed to write to mysql.slow_log: %s", + error_handler.message()); + + if (need_rnd_end) + { + table->file->ha_rnd_end(); + table->file->ha_release_auto_increment(); + } + if (need_close) + close_log_table(thd, &open_tables_backup); + thd->used= (thd->used & ~THD::TIME_ZONE_USED) | save_time_zone_used; + DBUG_RETURN(result); +} + +int Log_to_csv_event_handler:: + activate_log(THD *thd, uint log_table_type) +{ + TABLE_LIST table_list; + TABLE *table; + LEX_CSTRING *UNINIT_VAR(log_name); + int result; + Open_tables_backup open_tables_backup; + + DBUG_ENTER("Log_to_csv_event_handler::activate_log"); + + if (log_table_type == QUERY_LOG_GENERAL) + { + log_name= &GENERAL_LOG_NAME; + } + else + { + DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW); + + log_name= &SLOW_LOG_NAME; + } + table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT); + + table= open_log_table(thd, &table_list, &open_tables_backup); + if (table) + { + result= 0; + close_log_table(thd, &open_tables_backup); + } + else + result= 1; + + DBUG_RETURN(result); +} + +bool Log_to_csv_event_handler:: + log_error(enum loglevel level, const char *format, va_list args) +{ + /* No log table is implemented */ + DBUG_ASSERT(0); + return FALSE; +} + +bool Log_to_file_event_handler:: + log_error(enum loglevel level, const char *format, + va_list args) +{ + return vprint_msg_to_log(level, format, args); +} + +void Log_to_file_event_handler::init_pthread_objects() +{ + mysql_log.init_pthread_objects(); + mysql_slow_log.init_pthread_objects(); +} + + +/** Wrapper around MYSQL_LOG::write() for slow log. */ + +bool Log_to_file_event_handler:: + log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, size_t user_host_len, + ulonglong query_utime, ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len) +{ + Silence_log_table_errors error_handler; + thd->push_internal_handler(&error_handler); + bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time), + user_host, user_host_len, + query_utime, lock_utime, is_command, + sql_text, sql_text_len); + thd->pop_internal_handler(); + return retval; +} + + +/** + Wrapper around MYSQL_LOG::write() for general log. We need it since we + want all log event handlers to have the same signature. +*/ + +bool Log_to_file_event_handler:: + log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len, + CHARSET_INFO *client_cs) +{ + Silence_log_table_errors error_handler; + thd->push_internal_handler(&error_handler); + bool retval= mysql_log.write(hrtime_to_time(event_time), user_host, + user_host_len, + thread_id_arg, command_type, command_type_len, + sql_text, sql_text_len); + thd->pop_internal_handler(); + return retval; +} + + +bool Log_to_file_event_handler::init() +{ + if (!is_initialized) + { + if (global_system_variables.sql_log_slow) + mysql_slow_log.open_slow_log(opt_slow_logname); + + if (opt_log) + mysql_log.open_query_log(opt_logname); + + is_initialized= TRUE; + } + + return FALSE; +} + + +void Log_to_file_event_handler::cleanup() +{ + mysql_log.cleanup(); + mysql_slow_log.cleanup(); +} + +void Log_to_file_event_handler::flush() +{ + /* reopen log files */ + if (opt_log) + mysql_log.reopen_file(); + if (global_system_variables.sql_log_slow) + mysql_slow_log.reopen_file(); +} + +/* + Log error with all enabled log event handlers + + SYNOPSIS + error_log_print() + + level The level of the error significance: NOTE, + WARNING or ERROR. + format format string for the error message + args list of arguments for the format string + + RETURN + FALSE - OK + TRUE - error occurred +*/ + +bool LOGGER::error_log_print(enum loglevel level, const char *format, + va_list args) +{ + bool error= FALSE; + Log_event_handler **current_handler; + THD *thd= current_thd; + + if (likely(thd)) + thd->error_printed_to_log= 1; + + /* currently we don't need locking here as there is no error_log table */ + for (current_handler= error_log_handler_list ; *current_handler ;) + error= (*current_handler++)->log_error(level, format, args) || error; + + return error; +} + + +void LOGGER::cleanup_base() +{ + DBUG_ASSERT(inited == 1); + mysql_rwlock_destroy(&LOCK_logger); + if (table_log_handler) + { + table_log_handler->cleanup(); + delete table_log_handler; + table_log_handler= NULL; + } + if (file_log_handler) + file_log_handler->cleanup(); +} + + +void LOGGER::cleanup_end() +{ + DBUG_ASSERT(inited == 1); + if (file_log_handler) + { + delete file_log_handler; + file_log_handler=NULL; + } + inited= 0; +} + + +/** + Perform basic log initialization: create file-based log handler and + init error log. +*/ +void LOGGER::init_base() +{ + DBUG_ASSERT(inited == 0); + inited= 1; + + /* + Here we create file log handler. We don't do it for the table log handler + here as it cannot be created so early. The reason is THD initialization, + which depends on the system variables (parsed later). + */ + if (!file_log_handler) + file_log_handler= new Log_to_file_event_handler; + + /* by default we use traditional error log */ + init_error_log(LOG_FILE); + + file_log_handler->init_pthread_objects(); + mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger); +} + + +void LOGGER::init_log_tables() +{ + if (!table_log_handler) + table_log_handler= new Log_to_csv_event_handler; + + if (!is_log_tables_initialized && + !table_log_handler->init() && !file_log_handler->init()) + is_log_tables_initialized= TRUE; +} + + +/** + Close and reopen the slow log (with locks). + + @returns FALSE. +*/ +bool LOGGER::flush_slow_log() +{ + /* + Now we lock logger, as nobody should be able to use logging routines while + log tables are closed + */ + logger.lock_exclusive(); + + /* Reopen slow log file */ + if (global_system_variables.sql_log_slow) + file_log_handler->get_mysql_slow_log()->reopen_file(); + + /* End of log flush */ + logger.unlock(); + + return 0; +} + + +/** + Close and reopen the general log (with locks). + + @returns FALSE. +*/ +bool LOGGER::flush_general_log() +{ + /* + Now we lock logger, as nobody should be able to use logging routines while + log tables are closed + */ + logger.lock_exclusive(); + + /* Reopen general log file */ + if (opt_log) + file_log_handler->get_mysql_log()->reopen_file(); + + /* End of log flush */ + logger.unlock(); + + return 0; +} + + +/* + Log slow query with all enabled log event handlers + + SYNOPSIS + slow_log_print() + + thd THD of the query being logged + query The query being logged + query_length The length of the query string + current_utime Current time in microseconds (from undefined start) + + RETURN + FALSE OK + TRUE error occurred +*/ + +bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length, + ulonglong current_utime) + +{ + bool error= FALSE; + Log_event_handler **current_handler; + bool is_command= FALSE; + char user_host_buff[MAX_USER_HOST_SIZE + 1]; + Security_context *sctx= thd->security_ctx; + uint user_host_len= 0; + ulonglong query_utime, lock_utime; + + DBUG_ASSERT(thd->enable_slow_log); + /* + Print the message to the buffer if we have slow log enabled + */ + + if (*slow_log_handler_list) + { + /* do not log slow queries from replication threads */ + if (!thd->variables.sql_log_slow) + return 0; + + lock_shared(); + if (!global_system_variables.sql_log_slow) + { + unlock(); + return 0; + } + + /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */ + user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE, + sctx->priv_user, "[", + sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ", + sctx->host ? sctx->host : "", " [", + sctx->ip ? sctx->ip : "", "]", NullS) - + user_host_buff); + + DBUG_ASSERT(thd->start_utime); + DBUG_ASSERT(thd->start_time); + query_utime= (current_utime - thd->start_utime); + lock_utime= (thd->utime_after_lock - thd->start_utime); + my_hrtime_t current_time= { hrtime_from_time(thd->start_time) + + thd->start_time_sec_part + query_utime }; + + if (!query || thd->get_command() == COM_STMT_PREPARE) + { + is_command= TRUE; + query= command_name[thd->get_command()].str; + query_length= (uint)command_name[thd->get_command()].length; + } + + for (current_handler= slow_log_handler_list; *current_handler ;) + error= (*current_handler++)->log_slow(thd, current_time, + user_host_buff, user_host_len, + query_utime, lock_utime, is_command, + query, query_length) || error; + + unlock(); + } + return error; +} + +bool LOGGER::general_log_write(THD *thd, enum enum_server_command command, + const char *query, size_t query_length) +{ + bool error= FALSE; + Log_event_handler **current_handler= general_log_handler_list; + char user_host_buff[MAX_USER_HOST_SIZE + 1]; + uint user_host_len= 0; + my_hrtime_t current_time; + + DBUG_ASSERT(thd); + + user_host_len= make_user_name(thd, user_host_buff); + + current_time= my_hrtime(); + + mysql_audit_general_log(thd, hrtime_to_time(current_time), + user_host_buff, user_host_len, + command_name[(uint) command].str, + (uint)command_name[(uint) command].length, + query, (uint)query_length); + + if (opt_log && log_command(thd, command)) + { + lock_shared(); + while (*current_handler) + error|= (*current_handler++)-> + log_general(thd, current_time, user_host_buff, + user_host_len, thd->thread_id, + command_name[(uint) command].str, + command_name[(uint) command].length, + query, query_length, + thd->variables.character_set_client) || error; + unlock(); + } + + return error; +} + +bool LOGGER::general_log_print(THD *thd, enum enum_server_command command, + const char *format, va_list args) +{ + size_t message_buff_len= 0; + char message_buff[MAX_LOG_BUFFER_SIZE]; + + /* prepare message */ + if (format) + message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff), + format, args); + else + message_buff[0]= '\0'; + + return general_log_write(thd, command, message_buff, message_buff_len); +} + +void LOGGER::init_error_log(ulonglong error_log_printer) +{ + if (error_log_printer & LOG_NONE) + { + error_log_handler_list[0]= 0; + return; + } + + switch (error_log_printer) { + case LOG_FILE: + error_log_handler_list[0]= file_log_handler; + error_log_handler_list[1]= 0; + break; + /* these two are disabled for now */ + case LOG_TABLE: + DBUG_ASSERT(0); + break; + case LOG_TABLE|LOG_FILE: + DBUG_ASSERT(0); + break; + } +} + +void LOGGER::init_slow_log(ulonglong slow_log_printer) +{ + if (slow_log_printer & LOG_NONE) + { + slow_log_handler_list[0]= 0; + return; + } + + switch (slow_log_printer) { + case LOG_FILE: + slow_log_handler_list[0]= file_log_handler; + slow_log_handler_list[1]= 0; + break; + case LOG_TABLE: + slow_log_handler_list[0]= table_log_handler; + slow_log_handler_list[1]= 0; + break; + case LOG_TABLE|LOG_FILE: + slow_log_handler_list[0]= file_log_handler; + slow_log_handler_list[1]= table_log_handler; + slow_log_handler_list[2]= 0; + break; + } +} + +void LOGGER::init_general_log(ulonglong general_log_printer) +{ + if (general_log_printer & LOG_NONE) + { + general_log_handler_list[0]= 0; + return; + } + + switch (general_log_printer) { + case LOG_FILE: + general_log_handler_list[0]= file_log_handler; + general_log_handler_list[1]= 0; + break; + case LOG_TABLE: + general_log_handler_list[0]= table_log_handler; + general_log_handler_list[1]= 0; + break; + case LOG_TABLE|LOG_FILE: + general_log_handler_list[0]= file_log_handler; + general_log_handler_list[1]= table_log_handler; + general_log_handler_list[2]= 0; + break; + } +} + + +bool LOGGER::activate_log_handler(THD* thd, uint log_type) +{ + MYSQL_QUERY_LOG *file_log; + bool res= FALSE; + lock_exclusive(); + switch (log_type) { + case QUERY_LOG_SLOW: + if (!global_system_variables.sql_log_slow) + { + file_log= file_log_handler->get_mysql_slow_log(); + + file_log->open_slow_log(opt_slow_logname); + if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW)) + { + /* Error printed by open table in activate_log() */ + res= TRUE; + file_log->close(0); + } + else + { + init_slow_log(log_output_options); + global_system_variables.sql_log_slow= TRUE; + } + } + break; + case QUERY_LOG_GENERAL: + if (!opt_log) + { + file_log= file_log_handler->get_mysql_log(); + + file_log->open_query_log(opt_logname); + if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL)) + { + /* Error printed by open table in activate_log() */ + res= TRUE; + file_log->close(0); + } + else + { + init_general_log(log_output_options); + opt_log= TRUE; + } + } + break; + default: + DBUG_ASSERT(0); + } + unlock(); + return res; +} + + +void LOGGER::deactivate_log_handler(THD *thd, uint log_type) +{ + my_bool *tmp_opt= 0; + MYSQL_LOG *UNINIT_VAR(file_log); + + switch (log_type) { + case QUERY_LOG_SLOW: + tmp_opt= &global_system_variables.sql_log_slow; + file_log= file_log_handler->get_mysql_slow_log(); + break; + case QUERY_LOG_GENERAL: + tmp_opt= &opt_log; + file_log= file_log_handler->get_mysql_log(); + break; + default: + MY_ASSERT_UNREACHABLE(); + } + + if (!(*tmp_opt)) + return; + + lock_exclusive(); + file_log->close(0); + *tmp_opt= FALSE; + unlock(); +} + + +/* the parameters are unused for the log tables */ +bool Log_to_csv_event_handler::init() +{ + return 0; +} + +int LOGGER::set_handlers(ulonglong slow_log_printer, + ulonglong general_log_printer) +{ + lock_exclusive(); + + if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) && + !is_log_tables_initialized) + { + slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE; + general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE; + + sql_print_error("Failed to initialize log tables. " + "Falling back to the old-fashioned logs"); + } + + init_slow_log(slow_log_printer); + init_general_log(general_log_printer); + + unlock(); + + return 0; +} + + /* + Save position of binary log transaction cache. + + SYNPOSIS + binlog_trans_log_savepos() + + thd The thread to take the binlog data from + pos Pointer to variable where the position will be stored + + DESCRIPTION + + Save the current position in the binary log transaction cache into + the variable pointed to by 'pos' + */ + +static void +binlog_trans_log_savepos(THD *thd, my_off_t *pos) +{ + DBUG_ENTER("binlog_trans_log_savepos"); + DBUG_ASSERT(pos != NULL); + binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); + DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()); + *pos= cache_mngr->trx_cache.get_byte_position(); + DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); + DBUG_VOID_RETURN; +} + + +/* + Truncate the binary log transaction cache. + + SYNPOSIS + binlog_trans_log_truncate() + + thd The thread to take the binlog data from + pos Position to truncate to + + DESCRIPTION + + Truncate the binary log to the given position. Will not change + anything else. + + */ +static void +binlog_trans_log_truncate(THD *thd, my_off_t pos) +{ + DBUG_ENTER("binlog_trans_log_truncate"); + DBUG_PRINT("enter", ("pos: %lu", (ulong) pos)); + + DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL); + /* Only true if binlog_trans_log_savepos() wasn't called before */ + DBUG_ASSERT(pos != ~(my_off_t) 0); + + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + cache_mngr->trx_cache.restore_savepoint(pos); + DBUG_VOID_RETURN; +} + + +/* + this function is mostly a placeholder. + conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open) + should be moved here. +*/ + +int binlog_init(void *p) +{ + binlog_hton= (handlerton *)p; + binlog_hton->savepoint_offset= sizeof(my_off_t); + binlog_hton->close_connection= binlog_close_connection; + binlog_hton->savepoint_set= binlog_savepoint_set; + binlog_hton->savepoint_rollback= binlog_savepoint_rollback; + binlog_hton->savepoint_rollback_can_release_mdl= + binlog_savepoint_rollback_can_release_mdl; + binlog_hton->commit= [](handlerton *, THD *thd, bool all) { return 0; }; + binlog_hton->rollback= binlog_rollback; + binlog_hton->drop_table= [](handlerton *, const char*) { return -1; }; + if (WSREP_ON || opt_bin_log) + { + binlog_hton->prepare= binlog_prepare; + binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot; + } + + binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN | HTON_NO_ROLLBACK; + return 0; +} + +#ifdef WITH_WSREP +#include "wsrep_binlog.h" +#endif /* WITH_WSREP */ +static int binlog_close_connection(handlerton *hton, THD *thd) +{ + DBUG_ENTER("binlog_close_connection"); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (WSREP(thd) && cache_mngr && !cache_mngr->trx_cache.empty()) { + IO_CACHE* cache= cache_mngr->get_binlog_cache_log(true); + uchar *buf; + size_t len=0; + wsrep_write_cache_buf(cache, &buf, &len); + WSREP_WARN("binlog trx cache not empty (%zu bytes) @ connection close %lld", + len, (longlong) thd->thread_id); + if (len > 0) wsrep_dump_rbr_buf(thd, buf, len); + + cache = cache_mngr->get_binlog_cache_log(false); + wsrep_write_cache_buf(cache, &buf, &len); + WSREP_WARN("binlog stmt cache not empty (%zu bytes) @ connection close %lld", + len, (longlong) thd->thread_id); + if (len > 0) wsrep_dump_rbr_buf(thd, buf, len); + } +#endif /* WITH_WSREP */ + DBUG_ASSERT(cache_mngr->trx_cache.empty()); + DBUG_ASSERT(cache_mngr->stmt_cache.empty()); + cache_mngr->~binlog_cache_mngr(); + my_free(cache_mngr); + DBUG_RETURN(0); +} + +/* + This function flushes a cache upon commit/rollback. + + SYNOPSIS + binlog_flush_cache() + + thd The thread whose transaction should be ended + cache_mngr Pointer to the binlog_cache_mngr to use + all True if the entire transaction should be ended, false if + only the statement transaction should be ended. + end_ev The end event to use (COMMIT, ROLLBACK, or commit XID) + using_stmt True if the statement cache should be flushed + using_trx True if the transaction cache should be flushed + + DESCRIPTION + + End the currently transaction or statement. The transaction can be either + a real transaction or a statement transaction. + + This can be to commit a transaction, with a COMMIT query event or an XA + commit XID event. But it can also be to rollback a transaction with a + ROLLBACK query event, used for rolling back transactions which also + contain updates to non-transactional tables. Or it can be a flush of + a statement cache. + */ + +static int +binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, + Log_event *end_ev, bool all, bool using_stmt, + bool using_trx, bool is_ro_1pc= false) +{ + int error= 0; + DBUG_ENTER("binlog_flush_cache"); + DBUG_PRINT("enter", ("end_ev: %p", end_ev)); + + if ((using_stmt && !cache_mngr->stmt_cache.empty()) || + (using_trx && !cache_mngr->trx_cache.empty()) || + thd->transaction->xid_state.is_explicit_XA()) + { + if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE)) + DBUG_RETURN(1); + if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE)) + DBUG_RETURN(1); + + /* + Doing a commit or a rollback including non-transactional tables, + i.e., ending a transaction where we might write the transaction + cache to the binary log. + + We can always end the statement when ending a transaction since + transactions are not allowed inside stored functions. If they + were, we would have to ensure that we're not ending a statement + inside a stored function. + */ + error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr, + end_ev, all, + using_stmt, using_trx, + is_ro_1pc); + } + else + { + /* + This can happen in row-format binlog with something like + BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table; + The nontrans_table is written directly into the binlog before commit, + and if the trans_table is ignored there will be no rows to write when + we get here. + + So there is no work to do. Therefore, we will not increment any XID + count, so we must not decrement any XID count in unlog(). + */ + cache_mngr->need_unlog= 0; + } + cache_mngr->reset(using_stmt, using_trx); + + DBUG_ASSERT(!using_stmt || cache_mngr->stmt_cache.empty()); + DBUG_ASSERT(!using_trx || cache_mngr->trx_cache.empty()); + DBUG_RETURN(error); +} + + +/** + This function flushes the stmt-cache upon commit. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache manager + + @return + nonzero if an error pops up when flushing the cache. +*/ +static inline int +binlog_commit_flush_stmt_cache(THD *thd, bool all, + binlog_cache_mngr *cache_mngr) +{ + DBUG_ENTER("binlog_commit_flush_stmt_cache"); +#ifdef WITH_WSREP + if (thd->wsrep_mysql_replicated > 0) + { + DBUG_ASSERT(WSREP(thd)); + WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d", + thd->wsrep_mysql_replicated); + return 0; + } +#endif + + Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), + FALSE, TRUE, TRUE, 0); + DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE)); +} + + +inline size_t serialize_with_xid(XID *xid, char *buf, + const char *query, size_t q_len) +{ + memcpy(buf, query, q_len); + + return + q_len + strlen(static_cast(xid)->serialize(buf + q_len)); +} + + +/** + This function flushes the trx-cache upon commit. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache manager + + @return + nonzero if an error pops up when flushing the cache. +*/ +static inline int +binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr, + bool ro_1pc) +{ + DBUG_ENTER("binlog_commit_flush_trx_cache"); + + const char query[]= "XA COMMIT "; + const size_t q_len= sizeof(query) - 1; // do not count trailing 0 + char buf[q_len + ser_buf_size]= "COMMIT"; + size_t buflen= sizeof("COMMIT") - 1; + + if (thd->lex->sql_command == SQLCOM_XA_COMMIT && + thd->lex->xa_opt != XA_ONE_PHASE) + { + DBUG_ASSERT(thd->transaction->xid_state.is_explicit_XA()); + DBUG_ASSERT(thd->transaction->xid_state.get_state_code() == + XA_PREPARED); + + buflen= serialize_with_xid(thd->transaction->xid_state.get_xid(), + buf, query, q_len); + } + Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0); + + DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE, ro_1pc)); +} + + +/** + This function flushes the trx-cache upon rollback. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache manager + + @return + nonzero if an error pops up when flushing the cache. +*/ +static inline int +binlog_rollback_flush_trx_cache(THD *thd, bool all, + binlog_cache_mngr *cache_mngr) +{ + const char query[]= "XA ROLLBACK "; + const size_t q_len= sizeof(query) - 1; // do not count trailing 0 + char buf[q_len + ser_buf_size]= "ROLLBACK"; + size_t buflen= sizeof("ROLLBACK") - 1; + + if (thd->transaction->xid_state.is_explicit_XA()) + { + /* for not prepared use plain ROLLBACK */ + if (thd->transaction->xid_state.get_state_code() == XA_PREPARED) + buflen= serialize_with_xid(thd->transaction->xid_state.get_xid(), + buf, query, q_len); + } + Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0); + + return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE)); +} + +/** + This function flushes the trx-cache upon commit. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache manager + @param xid Transaction Id + + @return + nonzero if an error pops up when flushing the cache. +*/ +static inline int +binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr, + bool all, my_xid xid) +{ + DBUG_ASSERT(xid); // replaced former treatment of ONE-PHASE XA + + Xid_log_event end_evt(thd, xid, TRUE); + return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE)); +} + +/** + This function truncates the transactional cache upon committing or rolling + back either a transaction or a statement. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache data to be flushed + @param all @c true means truncate the transaction, otherwise the + statement must be truncated. + + @return + nonzero if an error pops up when truncating the transactional cache. +*/ +static int +binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all) +{ + DBUG_ENTER("binlog_truncate_trx_cache"); + int error=0; + /* + This function handles transactional changes and as such this flag + equals to true. + */ + bool const is_transactional= TRUE; + + DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s", + FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT), + FLAGSTR(thd->variables.option_bits, OPTION_BEGIN), + all ? "all" : "stmt")); + + thd->binlog_remove_pending_rows_event(TRUE, is_transactional); + /* + If rolling back an entire transaction or a single statement not + inside a transaction, we reset the transaction cache. + */ + if (ending_trans(thd, all)) + { + if (cache_mngr->trx_cache.has_incident()) + error= mysql_bin_log.write_incident(thd); + + thd->reset_binlog_for_next_statement(); + + cache_mngr->reset(false, true); + } + /* + If rolling back a statement in a transaction, we truncate the + transaction cache to remove the statement. + */ + else + cache_mngr->trx_cache.restore_prev_position(); + + DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL); + DBUG_RETURN(error); +} + + +inline bool is_preparing_xa(THD *thd) +{ + return + thd->transaction->xid_state.is_explicit_XA() && + thd->lex->sql_command == SQLCOM_XA_PREPARE; +} + + +static int binlog_prepare(handlerton *hton, THD *thd, bool all) +{ + /* Do nothing unless the transaction is a user XA. */ + return is_preparing_xa(thd) ? binlog_commit(thd, all, FALSE) : 0; +} + + +int binlog_commit_by_xid(handlerton *hton, XID *xid) +{ + int rc= 0; + THD *thd= current_thd; + + if (thd->is_current_stmt_binlog_disabled()) + { + return thd->wait_for_prior_commit(); + } + + /* the asserted state can't be reachable with xa commit */ + DBUG_ASSERT(!thd->get_stmt_da()->is_error() || + thd->get_stmt_da()->sql_errno() != ER_XA_RBROLLBACK); + /* + This is a recovered user xa transaction commit. + Create a "temporary" binlog transaction to write the commit record + into binlog. + */ + THD_TRANS trans; + trans.ha_list= NULL; + + thd->ha_data[hton->slot].ha_info[1].register_ha(&trans, hton); + thd->ha_data[binlog_hton->slot].ha_info[1].set_trx_read_write(); + (void) thd->binlog_setup_trx_data(); + + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT); + + rc= binlog_commit(thd, TRUE, FALSE); + thd->ha_data[binlog_hton->slot].ha_info[1].reset(); + + return rc; +} + + +int binlog_rollback_by_xid(handlerton *hton, XID *xid) +{ + int rc= 0; + THD *thd= current_thd; + + if (thd->is_current_stmt_binlog_disabled()) + { + return thd->wait_for_prior_commit(); + } + + if (thd->get_stmt_da()->is_error() && + thd->get_stmt_da()->sql_errno() == ER_XA_RBROLLBACK) + return rc; + + THD_TRANS trans; + trans.ha_list= NULL; + + thd->ha_data[hton->slot].ha_info[1].register_ha(&trans, hton); + thd->ha_data[hton->slot].ha_info[1].set_trx_read_write(); + (void) thd->binlog_setup_trx_data(); + + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_ROLLBACK || + (thd->transaction->xid_state.get_state_code() == XA_ROLLBACK_ONLY)); + + rc= binlog_rollback(hton, thd, TRUE); + thd->ha_data[hton->slot].ha_info[1].reset(); + + return rc; +} + + +inline bool is_prepared_xa(THD *thd) +{ + return thd->transaction->xid_state.is_explicit_XA() && + thd->transaction->xid_state.get_state_code() == XA_PREPARED; +} + + +/* + We flush the cache wrapped in a beging/rollback if: + . aborting a single or multi-statement transaction and; + . the OPTION_BINLOG_THIS_TRX is active or; + . the format is STMT and a non-trans table was updated or; + . the format is MIXED and a temporary non-trans table was + updated or; + . the format is MIXED, non-trans table was updated and + aborting a single statement transaction; +*/ +static bool trans_cannot_safely_rollback(THD *thd, bool all) +{ + DBUG_ASSERT(ending_trans(thd, all)); + ulong binlog_format= thd->wsrep_binlog_format(thd->variables.binlog_format); + + return ((thd->variables.option_bits & OPTION_BINLOG_THIS_TRX) || + (trans_has_updated_non_trans_table(thd) && + binlog_format == BINLOG_FORMAT_STMT) || + (thd->transaction->all.has_modified_non_trans_temp_table() && + binlog_format == BINLOG_FORMAT_MIXED) || + (trans_has_updated_non_trans_table(thd) && + ending_single_stmt_trans(thd,all) && + binlog_format == BINLOG_FORMAT_MIXED) || + is_prepared_xa(thd)); +} + + +/** + Specific log flusher invoked through log_xa_prepare(). +*/ +static int binlog_commit_flush_xa_prepare(THD *thd, bool all, + binlog_cache_mngr *cache_mngr) +{ + XID *xid= thd->transaction->xid_state.get_xid(); + { + // todo assert wsrep_simulate || is_open() + + /* + Log the XA END event first. + We don't do that in trans_xa_end() as XA COMMIT ONE PHASE + is logged as simple BEGIN/COMMIT so the XA END should + not get to the log. + */ + const char query[]= "XA END "; + const size_t q_len= sizeof(query) - 1; // do not count trailing 0 + char buf[q_len + ser_buf_size]; + size_t buflen; + binlog_cache_data *cache_data; + IO_CACHE *file; + + memcpy(buf, query, q_len); + buflen= q_len + + strlen(static_cast(xid)->serialize(buf + q_len)); + cache_data= cache_mngr->get_binlog_cache_data(true); + file= &cache_data->cache_log; + thd->lex->sql_command= SQLCOM_XA_END; + Query_log_event xa_end(thd, buf, buflen, true, false, true, 0); + if (mysql_bin_log.write_event(&xa_end, cache_data, file)) + return 1; + thd->lex->sql_command= SQLCOM_XA_PREPARE; + } + + cache_mngr->using_xa= FALSE; + XA_prepare_log_event end_evt(thd, xid, FALSE); + + return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE)); +} + +/** + This function is called once after each statement. + + It has the responsibility to flush the caches to the binary log on commits. + + @param thd The client thread that executes the transaction. + @param all This is @c true if this is a real transaction commit, and + @false otherwise. + @param ro_1pc read-only one-phase commit transaction +*/ +int binlog_commit(THD *thd, bool all, bool ro_1pc) +{ + int error= 0; + PSI_stage_info org_stage; + DBUG_ENTER("binlog_commit"); + + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + if (!cache_mngr) + { + DBUG_ASSERT(WSREP(thd) || + (thd->lex->sql_command != SQLCOM_XA_PREPARE && + !(thd->lex->sql_command == SQLCOM_XA_COMMIT && + thd->lex->xa_opt == XA_ONE_PHASE))); + + DBUG_RETURN(0); + } + /* + This is true if we are doing an alter table that is replicated as + CREATE TABLE ... SELECT + */ + if (thd->variables.option_bits & OPTION_BIN_COMMIT_OFF) + DBUG_RETURN(0); + + DBUG_PRINT("debug", + ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", + all, + YESNO(thd->in_multi_stmt_transaction_mode()), + YESNO(thd->transaction->all.modified_non_trans_table), + YESNO(thd->transaction->stmt.modified_non_trans_table))); + + thd->backup_stage(&org_stage); + THD_STAGE_INFO(thd, stage_binlog_write); +#ifdef WITH_WSREP + // DON'T clear stmt cache in case we are in transaction + if (!cache_mngr->stmt_cache.empty() && + (!wsrep_on(thd) || ending_trans(thd, all))) +#else + if (!cache_mngr->stmt_cache.empty()) +#endif + { + error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr); + } + + if (cache_mngr->trx_cache.empty() && + (thd->transaction->xid_state.get_state_code() != XA_PREPARED || + !(thd->ha_data[binlog_hton->slot].ha_info[1].is_started() && + thd->ha_data[binlog_hton->slot].ha_info[1].is_trx_read_write()))) + { + /* + This is an empty transaction commit (both the regular and xa), + or such transaction xa-prepare or + either one's statement having no effect on the transactional cache + as any prior to it. + The empty xa-prepare sinks in only when binlog is read-only. + */ + cache_mngr->reset(false, true); + THD_STAGE_INFO(thd, org_stage); + DBUG_RETURN(error); + } + + /* + We commit the transaction if: + - We are not in a transaction and committing a statement, or + - We are in a transaction and a full transaction is committed. + Otherwise, we accumulate the changes. + */ + if (likely(!error) && ending_trans(thd, all)) + { + bool is_xa_prepare= is_preparing_xa(thd); + + error= is_xa_prepare ? + binlog_commit_flush_xa_prepare(thd, all, cache_mngr) : + binlog_commit_flush_trx_cache (thd, all, cache_mngr, ro_1pc); + // the user xa is unlogged on common exec path with the "empty" xa case + if (cache_mngr->need_unlog && !is_xa_prepare) + { + error= + mysql_bin_log.unlog(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id, + cache_mngr->delayed_error), 1); + cache_mngr->need_unlog= false; + } + } + /* + This is part of the stmt rollback. + */ + if (!all) + cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); + + THD_STAGE_INFO(thd, org_stage); + DBUG_RETURN(error); +} + +/** + This function is called when a transaction or a statement is rolled back. + + @param hton The binlog handlerton. + @param thd The client thread that executes the transaction. + @param all This is @c true if this is a real transaction rollback, and + @false otherwise. + + @see handlerton::rollback +*/ +static int binlog_rollback(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("binlog_rollback"); + + int error= 0; + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + if (!cache_mngr) + { + DBUG_ASSERT(WSREP(thd)); + DBUG_ASSERT(thd->lex->sql_command != SQLCOM_XA_ROLLBACK); + + DBUG_RETURN(0); + } + + DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", + YESNO(all), + YESNO(thd->transaction->all.modified_non_trans_table), + YESNO(thd->transaction->stmt.modified_non_trans_table))); + + /* + If an incident event is set we do not flush the content of the statement + cache because it may be corrupted. + */ + if (cache_mngr->stmt_cache.has_incident()) + { + error |= static_cast(mysql_bin_log.write_incident(thd)); + cache_mngr->reset(true, false); + } + else if (!cache_mngr->stmt_cache.empty()) + { + error |= binlog_commit_flush_stmt_cache(thd, all, cache_mngr); + } + + if (!cache_mngr->trx_cache.has_incident() && cache_mngr->trx_cache.empty() && + (thd->transaction->xid_state.get_state_code() != XA_PREPARED || + !(thd->ha_data[binlog_hton->slot].ha_info[1].is_started() && + thd->ha_data[binlog_hton->slot].ha_info[1].is_trx_read_write()))) + { + /* + The same comments apply as in the binlog commit method's branch. + */ + cache_mngr->reset(false, true); + thd->reset_binlog_for_next_statement(); + DBUG_RETURN(error); + } + if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd)) + { + /* + "all == true" means that a "rollback statement" triggered the error and + this function was called. However, this must not happen as a rollback + is written directly to the binary log. And in auto-commit mode, a single + statement that is rolled back has the flag all == false. + */ + DBUG_ASSERT(!all); + /* + We reach this point if the effect of a statement did not properly get into + a cache and need to be rolled back. + */ + error |= binlog_truncate_trx_cache(thd, cache_mngr, all); + } + else if (likely(!error)) + { + ulong binlog_format= thd->wsrep_binlog_format(thd->variables.binlog_format); + if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all)) + error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr); + /* + Truncate the cache if: + . aborting a single or multi-statement transaction or; + . the current statement created or dropped a temporary table + while having actual STATEMENT format; + . the format is not STMT or no non-trans table was + updated and; + . the format is not MIXED or no temporary non-trans table + was updated. + */ + else if (ending_trans(thd, all) || + (!(thd->transaction->stmt.has_created_dropped_temp_table() && + !thd->is_current_stmt_binlog_format_row()) && + (!stmt_has_updated_non_trans_table(thd) || + binlog_format != BINLOG_FORMAT_STMT) && + (!thd->transaction->stmt.has_modified_non_trans_temp_table() || + binlog_format != BINLOG_FORMAT_MIXED))) + error= binlog_truncate_trx_cache(thd, cache_mngr, all); + } + + /* + This is part of the stmt rollback. + */ + if (!all) + cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); + thd->reset_binlog_for_next_statement(); + + DBUG_RETURN(error); +} + + +void binlog_reset_cache(THD *thd) +{ + binlog_cache_mngr *const cache_mngr= opt_bin_log ? + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0; + DBUG_ENTER("binlog_reset_cache"); + if (cache_mngr) + { + thd->binlog_remove_pending_rows_event(TRUE, TRUE); + cache_mngr->reset(true, true); + } + DBUG_VOID_RETURN; +} + + +void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional) +{ + DBUG_ENTER("MYSQL_BIN_LOG::set_write_error"); + + write_error= 1; + + if (unlikely(check_write_error(thd))) + DBUG_VOID_RETURN; + + if (my_errno == EFBIG) + { + if (is_transactional) + { + my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(0)); + } + else + { + my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(0)); + } + } + else + { + my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno); + } +#ifdef WITH_WSREP + /* If wsrep transaction is active and binlog emulation is on, + binlog write error may leave transaction without any registered + htons. This makes wsrep rollback hooks to be skipped and the + transaction will remain alive in wsrep world after rollback. + Register binlog hton here to ensure that rollback happens in full. */ + if (WSREP_EMULATE_BINLOG(thd)) + { + if (is_transactional) + trans_register_ha(thd, TRUE, binlog_hton, 0); + trans_register_ha(thd, FALSE, binlog_hton, 0); + } +#endif /* WITH_WSREP */ + DBUG_VOID_RETURN; +} + +bool MYSQL_BIN_LOG::check_write_error(THD *thd) +{ + DBUG_ENTER("MYSQL_BIN_LOG::check_write_error"); + + bool checked= FALSE; + + if (likely(!thd->is_error())) + DBUG_RETURN(checked); + + switch (thd->get_stmt_da()->sql_errno()) + { + case ER_TRANS_CACHE_FULL: + case ER_STMT_CACHE_FULL: + case ER_ERROR_ON_WRITE: + case ER_BINLOG_LOGGING_IMPOSSIBLE: + checked= TRUE; + break; + } + + DBUG_RETURN(checked); +} + + +/** + @note + How do we handle this (unlikely but legal) case: + @verbatim + [transaction] + [update to non-trans table] + [rollback to savepoint] ? + @endverbatim + The problem occurs when a savepoint is before the update to the + non-transactional table. Then when there's a rollback to the savepoint, if we + simply truncate the binlog cache, we lose the part of the binlog cache where + the update is. If we want to not lose it, we need to write the SAVEPOINT + command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter + is easy: it's just write at the end of the binlog cache, but the former + should be *inserted* to the place where the user called SAVEPOINT. The + solution is that when the user calls SAVEPOINT, we write it to the binlog + cache (so no need to later insert it). As transactions are never intermixed + in the binary log (i.e. they are serialized), we won't have conflicts with + savepoint names when using mysqlbinlog or in the slave SQL thread. + Then when ROLLBACK TO SAVEPOINT is called, if we updated some + non-transactional table, we don't truncate the binlog cache but instead write + ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which + will chop the SAVEPOINT command from the binlog cache, which is good as in + that case there is no need to have it in the binlog). +*/ + +static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) +{ + int error= 1; + DBUG_ENTER("binlog_savepoint_set"); + + char buf[1024]; + + String log_query(buf, sizeof(buf), &my_charset_bin); + if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) || + append_identifier(thd, &log_query, &thd->lex->ident)) + DBUG_RETURN(1); + int errcode= query_error_code(thd, thd->killed == NOT_KILLED); + Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(), + TRUE, FALSE, TRUE, errcode); + /* + We cannot record the position before writing the statement + because a rollback to a savepoint (.e.g. consider it "S") would + prevent the savepoint statement (i.e. "SAVEPOINT S") from being + written to the binary log despite the fact that the server could + still issue other rollback statements to the same savepoint (i.e. + "S"). + Given that the savepoint is valid until the server releases it, + ie, until the transaction commits or it is released explicitly, + we need to log it anyway so that we don't have "ROLLBACK TO S" + or "RELEASE S" without the preceding "SAVEPOINT S" in the binary + log. + */ + if (likely(!(error= mysql_bin_log.write(&qinfo)))) + binlog_trans_log_savepos(thd, (my_off_t*) sv); + + DBUG_RETURN(error); +} + +static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("binlog_savepoint_rollback"); + + /* + Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some + non-transactional table. Otherwise, truncate the binlog cache starting + from the SAVEPOINT command. + */ +#ifdef WITH_WSREP + /* for streaming replication, we must replicate savepoint rollback so that + slaves can maintain SR transactions + */ + if (unlikely(thd->wsrep_trx().is_streaming() || + (trans_has_updated_non_trans_table(thd)) || + (thd->variables.option_bits & OPTION_BINLOG_THIS_TRX))) +#else + if (unlikely(trans_has_updated_non_trans_table(thd) || + (thd->variables.option_bits & OPTION_BINLOG_THIS_TRX))) +#endif /* WITH_WSREP */ + { + char buf[1024]; + String log_query(buf, sizeof(buf), &my_charset_bin); + if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) || + append_identifier(thd, &log_query, &thd->lex->ident)) + DBUG_RETURN(1); + int errcode= query_error_code(thd, thd->killed == NOT_KILLED); + Query_log_event qinfo(thd, log_query.ptr(), log_query.length(), + TRUE, FALSE, TRUE, errcode); + DBUG_RETURN(mysql_bin_log.write(&qinfo)); + } + + binlog_trans_log_truncate(thd, *(my_off_t*)sv); + + /* + When a SAVEPOINT is executed inside a stored function/trigger we force the + pending event to be flushed with a STMT_END_F flag and reset binlog + as well to ensure that following DMLs will have a clean state to start + with. ROLLBACK inside a stored routine has to finalize possibly existing + current row-based pending event with cleaning up table maps. That ensures + that following DMLs will have a clean state to start with. + */ + if (thd->in_sub_stmt) + thd->reset_binlog_for_next_statement(); + + DBUG_RETURN(0); +} + + +/** + Check whether binlog state allows to safely release MDL locks after + rollback to savepoint. + + @param hton The binlog handlerton. + @param thd The client thread that executes the transaction. + + @return true - It is safe to release MDL locks. + false - If it is not. +*/ +static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton, + THD *thd) +{ + DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl"); + /* + If we have not updated any non-transactional tables rollback + to savepoint will simply truncate binlog cache starting from + SAVEPOINT command. So it should be safe to release MDL acquired + after SAVEPOINT command in this case. + */ + DBUG_RETURN(!trans_cannot_safely_rollback(thd, true)); +} + + +int check_binlog_magic(IO_CACHE* log, const char** errmsg) +{ + uchar magic[4]; + DBUG_ASSERT(my_b_tell(log) == 0); + + if (my_b_read(log, magic, sizeof(magic))) + { + *errmsg = "I/O error reading the header from the binary log"; + sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno, + log->error); + return 1; + } + if (bcmp(magic, BINLOG_MAGIC, sizeof(magic))) + { + *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MariaDB"; + return 1; + } + return 0; +} + + +File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg) +{ + File file; + DBUG_ENTER("open_binlog"); + + if ((file= mysql_file_open(key_file_binlog, + log_file_name, O_RDONLY | O_BINARY | O_SHARE, + MYF(MY_WME))) < 0) + { + sql_print_error("Failed to open log (file '%s', errno %d)", + log_file_name, my_errno); + *errmsg = "Could not open log file"; + goto err; + } + if (init_io_cache_ext(log, file, (size_t)binlog_file_cache_size, READ_CACHE, + 0, 0, MYF(MY_WME|MY_DONT_CHECK_FILESIZE), key_file_binlog_cache)) + { + sql_print_error("Failed to create a cache on log (file '%s')", + log_file_name); + *errmsg = "Could not open log file"; + goto err; + } + if (check_binlog_magic(log,errmsg)) + goto err; + DBUG_RETURN(file); + +err: + if (file >= 0) + { + mysql_file_close(file, MYF(0)); + end_io_cache(log); + } + DBUG_RETURN(-1); +} + +#ifdef _WIN32 +static int eventSource = 0; + +static void setup_windows_event_source() +{ + HKEY hRegKey= NULL; + DWORD dwError= 0; + TCHAR szPath[MAX_PATH]; + DWORD dwTypes; + + if (eventSource) // Ensure that we are only called once + return; + eventSource= 1; + + // Create the event source registry key + dwError= RegCreateKey(HKEY_LOCAL_MACHINE, + "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MariaDB", + &hRegKey); + + /* Name of the PE module that contains the message resource */ + GetModuleFileName(NULL, szPath, MAX_PATH); + + /* Register EventMessageFile */ + dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ, + (PBYTE) szPath, (DWORD) (strlen(szPath) + 1)); + + /* Register supported event types */ + dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE | + EVENTLOG_INFORMATION_TYPE); + dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD, + (LPBYTE) &dwTypes, sizeof dwTypes); + + RegCloseKey(hRegKey); +} + +#endif /* _WIN32 */ + + +/** + Find a unique filename for 'filename.#'. + + Set '#' to the number next to the maximum found in the most + recent log file extension. + + This function will return nonzero if: (i) the generated name + exceeds FN_REFLEN; (ii) if the number of extensions is exhausted; + or (iii) some other error happened while examining the filesystem. + + @param name Base name of file + @param min_log_number_to_use minimum log number to choose. Set by + CHANGE MASTER .. TO + @param last_used_log_number If 0, find log number based on files. + If not 0, then use *last_used_log_number +1 + Will be update to new generated number + @return + 0 ok + nonzero if not possible to get unique filename. +*/ + +static int find_uniq_filename(char *name, ulong min_log_number_to_use, + ulong *last_used_log_number) +{ + char buff[FN_REFLEN], ext_buf[FN_REFLEN]; + struct st_my_dir *dir_info; + struct fileinfo *file_info; + ulong max_found= 0, next= 0, number= 0; + size_t i, buf_length, length; + char *start, *end; + int error= 0; + DBUG_ENTER("find_uniq_filename"); + + length= dirname_part(buff, name, &buf_length); + start= name + length; + end= strend(start); + + *end='.'; + length= (size_t) (end - start + 1); + + /* The following matches the code for my_dir () below */ + DBUG_EXECUTE_IF("error_unique_log_filename", + { + strmov(end,".1"); + DBUG_RETURN(1); + }); + + if (*last_used_log_number) + max_found= *last_used_log_number; + else + { + if (unlikely(!(dir_info= my_dir(buff, MYF(MY_DONT_SORT))))) + { // This shouldn't happen + strmov(end,".1"); // use name+1 + DBUG_RETURN(1); + } + file_info= dir_info->dir_entry; + max_found= min_log_number_to_use ? min_log_number_to_use-1 : 0; + for (i= dir_info->number_of_files ; i-- ; file_info++) + { + if (strncmp(file_info->name, start, length) == 0 && + test_if_number(file_info->name+length, &number,0)) + { + set_if_bigger(max_found, number); + } + } + my_dirend(dir_info); + } + + /* check if reached the maximum possible extension number */ + if (max_found >= MAX_LOG_UNIQUE_FN_EXT) + { + sql_print_error("Log filename extension number exhausted: %06lu. \ +Please fix this by archiving old logs and \ +updating the index files.", max_found); + error= 1; + goto end; + } + + next= max_found + 1; + if (sprintf(ext_buf, "%06lu", next)<0) + { + error= 1; + goto end; + } + *end++='.'; + + /* + Check if the generated extension size + the file name exceeds the + buffer size used. If one did not check this, then the filename might be + truncated, resulting in error. + */ + if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN)) + { + sql_print_error("Log filename too large: %s%s (%zu). \ +Please fix this by archiving old logs and updating the \ +index files.", name, ext_buf, (strlen(ext_buf) + (end - name))); + error= 1; + goto end; + } + + if (sprintf(end, "%06lu", next)<0) + { + error= 1; + goto end; + } + *last_used_log_number= next; + + /* print warning if reaching the end of available extensions. */ + if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT))) + sql_print_warning("Next log extension: %lu. \ +Remaining log filename extensions: %lu. \ +Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next)); + +end: + DBUG_RETURN(error); +} + + +bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name, + const char *new_name, + ulong next_log_number, + enum_log_type log_type_arg, + enum cache_type io_cache_type_arg) +{ + log_type= log_type_arg; + io_cache_type= io_cache_type_arg; + + if (new_name) + { + strmov(log_file_name, new_name); + } + else if (!new_name && generate_new_name(log_file_name, log_name, + next_log_number)) + return TRUE; + + return FALSE; +} + + +/* + Open a (new) log file. + + SYNOPSIS + open() + + log_name The name of the log to open + log_type_arg The type of the log. E.g. LOG_NORMAL + new_name The new name for the logfile. This is only needed + when the method is used to open the binlog file. + io_cache_type_arg The type of the IO_CACHE to use for this log file + + DESCRIPTION + Open the logfile, init IO_CACHE and write startup messages + (in case of general and slow query logs). + + RETURN VALUES + 0 ok + 1 error +*/ + +bool MYSQL_LOG::open( +#ifdef HAVE_PSI_INTERFACE + PSI_file_key log_file_key, +#endif + const char *log_name, enum_log_type log_type_arg, + const char *new_name, ulong next_log_number, + enum cache_type io_cache_type_arg) +{ + char buff[FN_REFLEN]; + MY_STAT f_stat; + File file= -1; + my_off_t seek_offset; + bool is_fifo = false; + int open_flags= O_CREAT | O_BINARY | O_CLOEXEC; + DBUG_ENTER("MYSQL_LOG::open"); + DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg)); + + write_error= 0; + + if (!(name= my_strdup(key_memory_MYSQL_LOG_name, log_name, MYF(MY_WME)))) + { + name= (char *)log_name; // for the error message + goto err; + } + + /* + log_type is LOG_UNKNOWN if we should not generate a new name + This is only used when called from MYSQL_BINARY_LOG::open, which + has already updated log_file_name. + */ + if (log_type_arg != LOG_UNKNOWN && + init_and_set_log_file_name(name, new_name, next_log_number, + log_type_arg, io_cache_type_arg)) + goto err; + + is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) && + MY_S_ISFIFO(f_stat.st_mode); + + if (io_cache_type == SEQ_READ_APPEND) + open_flags |= O_RDWR | O_APPEND; + else + open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND); + + if (is_fifo) + open_flags |= O_NONBLOCK; + + db[0]= 0; + +#ifdef HAVE_PSI_INTERFACE + /* Keep the key for reopen */ + m_log_file_key= log_file_key; +#endif + + if ((file= mysql_file_open(log_file_key, log_file_name, open_flags, + MYF(MY_WME))) < 0) + goto err; + + if (is_fifo) + seek_offset= 0; + else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME)))) + goto err; + + if (init_io_cache(&log_file, file, (log_type == LOG_NORMAL ? IO_SIZE : + LOG_BIN_IO_SIZE), + io_cache_type, seek_offset, 0, + MYF(MY_WME | MY_NABP | + ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0)))) + goto err; + + if (log_type == LOG_NORMAL) + { + char *end; + size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). " +#ifdef EMBEDDED_LIBRARY + "embedded library\n", + my_progname, server_version, MYSQL_COMPILATION_COMMENT +#elif defined(_WIN32) + "started with:\nTCP Port: %d, Named Pipe: %s\n", + my_progname, server_version, MYSQL_COMPILATION_COMMENT, + mysqld_port, mysqld_unix_port +#else + "started with:\nTcp port: %d Unix socket: %s\n", + my_progname, server_version, MYSQL_COMPILATION_COMMENT, + mysqld_port, mysqld_unix_port +#endif + ); + end= strnmov(buff + len, "Time\t\t Id Command\tArgument\n", + sizeof(buff) - len); + if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) || + flush_io_cache(&log_file)) + goto err; + } + + log_state= LOG_OPENED; + DBUG_RETURN(0); + +err: + sql_print_error(fatal_log_error, name, errno); + if (file >= 0) + mysql_file_close(file, MYF(0)); + end_io_cache(&log_file); + my_free(name); + name= NULL; + log_state= LOG_CLOSED; + DBUG_RETURN(1); +} + +MYSQL_LOG::MYSQL_LOG() + : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN), + log_state(LOG_CLOSED) +{ + /* + We don't want to initialize LOCK_Log here as such initialization depends on + safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is + called only in main(). Doing initialization here would make it happen + before main(). + */ + bzero((char*) &log_file, sizeof(log_file)); +} + +void MYSQL_LOG::init_pthread_objects() +{ + DBUG_ASSERT(inited == 0); + inited= 1; + mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW); +} + +/* + Close the log file + + SYNOPSIS + close() + exiting Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call + open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for + binlog rotation, to delay actual close of the old file until + we have successfully created the new file. + + NOTES + One can do an open on the object at once after doing a close. + The internal structures are not freed until cleanup() is called +*/ + +void MYSQL_LOG::close(uint exiting) +{ // One can't set log_type here! + DBUG_ENTER("MYSQL_LOG::close"); + DBUG_PRINT("enter",("exiting: %d", (int) exiting)); + if (log_state == LOG_OPENED) + { + end_io_cache(&log_file); + + if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error) + { + write_error= 1; + sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno); + } + + if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) && + mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error) + { + write_error= 1; + sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno); + } + } + + log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED; + my_free(name); + name= NULL; + DBUG_VOID_RETURN; +} + +/** This is called only once. */ + +void MYSQL_LOG::cleanup() +{ + DBUG_ENTER("cleanup"); + if (inited) + { + inited= 0; + mysql_mutex_destroy(&LOCK_log); + close(0); + } + DBUG_VOID_RETURN; +} + + +int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name, + ulong next_log_number) +{ + fn_format(new_name, log_name, mysql_data_home, "", 4); + return 0; +} + +int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name, + ulong next_log_number) +{ + fn_format(new_name, log_name, mysql_data_home, "", 4); + if (!fn_ext(log_name)[0]) + { + if (DBUG_IF("binlog_inject_new_name_error") || + unlikely(find_uniq_filename(new_name, next_log_number, + &last_used_log_number))) + { + THD *thd= current_thd; + if (unlikely(thd)) + my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATAL), log_name); + sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name); + return 1; + } + } + return 0; +} + + +/* + Reopen the log file + + SYNOPSIS + reopen_file() + + DESCRIPTION + Reopen the log file. The method is used during FLUSH LOGS + and locks LOCK_log mutex +*/ + + +void MYSQL_QUERY_LOG::reopen_file() +{ + char *save_name; + DBUG_ENTER("MYSQL_LOG::reopen_file"); + + mysql_mutex_lock(&LOCK_log); + if (!is_open()) + { + DBUG_PRINT("info",("log is closed")); + mysql_mutex_unlock(&LOCK_log); + DBUG_VOID_RETURN; + } + + save_name= name; + name= 0; // Don't free name + close(LOG_CLOSE_TO_BE_OPENED); + + /* + Note that at this point, log_state != LOG_CLOSED (important for is_open()). + */ + + open( +#ifdef HAVE_PSI_INTERFACE + m_log_file_key, +#endif + save_name, log_type, 0, 0, io_cache_type); + my_free(save_name); + + mysql_mutex_unlock(&LOCK_log); + + DBUG_VOID_RETURN; +} + + +/* + Write a command to traditional general log file + + SYNOPSIS + write() + + event_time command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + thread_id Id of the thread, issued a query + command_type the type of the command being logged + command_type_len the length of the string above + sql_text the very text of the query being executed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log given command to to normal (not rotable) log file + + RETURN + FASE - OK + TRUE - error occurred +*/ + +bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, + size_t user_host_len, my_thread_id thread_id_arg, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len) +{ + char buff[32]; + char local_time_buff[MAX_TIME_SIZE]; + struct tm start; + size_t time_buff_len= 0; + + mysql_mutex_lock(&LOCK_log); + + /* Test if someone closed between the is_open test and lock */ + if (is_open()) + { + /* for testing output of timestamp and thread id */ + DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;); + + /* Note that my_b_write() assumes it knows the length for this */ + if (event_time != last_time) + { + last_time= event_time; + + localtime_r(&event_time, &start); + + time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE, + "%02d%02d%02d %2d:%02d:%02d\t", + start.tm_year % 100, start.tm_mon + 1, + start.tm_mday, start.tm_hour, + start.tm_min, start.tm_sec); + + if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len)) + goto err; + } + else + if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0) + goto err; + + /* command_type, thread_id */ + size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg); + + if (my_b_write(&log_file, (uchar*) buff, length)) + goto err; + + if (my_b_write(&log_file, (uchar*) command_type, command_type_len)) + goto err; + + if (my_b_write(&log_file, (uchar*) "\t", 1)) + goto err; + + /* sql_text */ + if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len)) + goto err; + + if (my_b_write(&log_file, (uchar*) "\n", 1) || + flush_io_cache(&log_file)) + goto err; + } + + mysql_mutex_unlock(&LOCK_log); + return FALSE; +err: + + if (!write_error) + { + write_error= 1; + sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno); + } + mysql_mutex_unlock(&LOCK_log); + return TRUE; +} + + +/* + Log a query to the traditional slow log file + + SYNOPSIS + write() + + thd THD of the query + current_time current timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + query_utime Amount of time the query took to execute (in microseconds) + lock_utime Amount of time the query was locked (in microseconds) + is_command The flag, which determines, whether the sql_text is a + query or an administrator command. + sql_text the very text of the query or administrator command + processed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log a query to the slow log file. + + RETURN + FALSE - OK + TRUE - error occurred +*/ + +bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time, + const char *user_host, size_t user_host_len, + ulonglong query_utime, + ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len) +{ + bool error= 0; + char llbuff[22]; + DBUG_ENTER("MYSQL_QUERY_LOG::write"); + + mysql_mutex_lock(&LOCK_log); + if (is_open()) + { // Safety against reopen + char buff[80], *end; + char query_time_buff[22+7], lock_time_buff[22+7]; + size_t buff_len; + ulonglong log_slow_verbosity= thd->variables.log_slow_verbosity; + if (log_slow_verbosity & LOG_SLOW_VERBOSITY_FULL) + log_slow_verbosity= ~(ulonglong) 0; + + end= buff; + + if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT)) + { + if (current_time != last_time) + { + last_time= current_time; + struct tm start; + localtime_r(¤t_time, &start); + + buff_len= my_snprintf(buff, sizeof buff, + "# Time: %02d%02d%02d %2d:%02d:%02d\n", + start.tm_year % 100, start.tm_mon + 1, + start.tm_mday, start.tm_hour, + start.tm_min, start.tm_sec); + + /* Note that my_b_write() assumes it knows the length for this */ + if (my_b_write(&log_file, (uchar*) buff, buff_len)) + goto err; + } + const uchar uh[]= "# User@Host: "; + if (my_b_write(&log_file, uh, sizeof(uh) - 1) || + my_b_write(&log_file, (uchar*) user_host, user_host_len) || + my_b_write(&log_file, (uchar*) "\n", 1)) + goto err; + + sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0); + sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0); + if (my_b_printf(&log_file, + "# Thread_id: %lu Schema: %s QC_hit: %s\n" + "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu\n" + "# Rows_affected: %lu Bytes_sent: %lu\n", + (ulong) thd->thread_id, thd->get_db(), + ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"), + query_time_buff, lock_time_buff, + (ulong) thd->get_sent_row_count(), + (ulong) thd->get_examined_row_count(), + (ulong) thd->get_affected_rows(), + (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old))) + goto err; + + if (unlikely(log_slow_verbosity & + LOG_SLOW_VERBOSITY_ENGINE) && + thd->handler_stats.has_stats()) + { + ha_handler_stats *stats= &thd->handler_stats; + double tracker_frequency= timer_tracker_frequency(); + sprintf(query_time_buff, "%.4f", + 1000.0 * ulonglong2double(stats->pages_read_time)/ + tracker_frequency); + sprintf(lock_time_buff, "%.4f", + 1000.0 * ulonglong2double(stats->engine_time)/ + tracker_frequency); + + if (my_b_printf(&log_file, + "# Pages_accessed: %lu Pages_read: %lu " + "Pages_updated: %lu Old_rows_read: %lu\n" + "# Pages_read_time: %s Engine_time: %s\n", + (ulong) stats->pages_accessed, + (ulong) stats->pages_read_count, + (ulong) stats->pages_updated, + (ulong) stats->undo_records_read, + query_time_buff, lock_time_buff)) + goto err; + } + + if ((log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) && + thd->tmp_tables_used && + my_b_printf(&log_file, + "# Tmp_tables: %lu Tmp_disk_tables: %lu " + "Tmp_table_sizes: %s\n", + (ulong) thd->tmp_tables_used, + (ulong) thd->tmp_tables_disk_used, + llstr(thd->tmp_tables_size, llbuff))) + goto err; + + if (thd->spcont && + my_b_printf(&log_file, "# Stored_routine: %s\n", + ErrConvDQName(thd->spcont->m_sp).ptr())) + goto err; + + if ((log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) && + (thd->query_plan_flags & + (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE | + QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK | + QPLAN_FILESORT_PRIORITY_QUEUE)) && + my_b_printf(&log_file, + "# Full_scan: %s Full_join: %s " + "Tmp_table: %s Tmp_table_on_disk: %s\n" + "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu " + "Priority_queue: %s\n", + ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"), + ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"), + (thd->tmp_tables_used ? "Yes" : "No"), + (thd->tmp_tables_disk_used ? "Yes" : "No"), + ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"), + ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? + "Yes" : "No"), + thd->query_plan_fsort_passes, + ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ? + "Yes" : "No") + )) + goto err; + if (log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN && thd->lex->explain) + { + StringBuffer<128> buf; + DBUG_ASSERT(!thd->free_list); + if (!print_explain_for_slow_log(thd->lex, thd, &buf)) + if (my_b_printf(&log_file, "%s", buf.c_ptr_safe())) + goto err; + thd->free_items(); + } + if ((log_slow_verbosity & LOG_SLOW_VERBOSITY_WARNINGS) && + thd->get_stmt_da()->unsafe_statement_warn_count()) + { + Diagnostics_area::Sql_condition_iterator it= + thd->get_stmt_da()->sql_conditions(); + ulong idx, max_warnings= thd->variables.log_slow_max_warnings; + const Sql_condition *err; + my_b_printf(&log_file, "# Warnings\n"); + for (idx= 0; (err= it++) && idx < max_warnings; idx++) + { + my_b_printf(&log_file, "# %-15s %4u %.*s\n", + warning_level_names[err->get_level()].str, + (uint) err->get_sql_errno(), + (int) err->get_message_octet_length(), + err->get_message_text()); + } + } + if (thd->db.str && strcmp(thd->db.str, db)) + { // Database changed + if (my_b_printf(&log_file,"use %s;\n",thd->db.str)) + goto err; + strmov(db,thd->db.str); + } + if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + end=strmov(end, ",last_insert_id="); + end=longlong10_to_str((longlong) + thd->first_successful_insert_id_in_prev_stmt_for_binlog, + end, -10); + } + // Save value if we do an insert. + if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) + { + if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT)) + { + end=strmov(end,",insert_id="); + end=longlong10_to_str((longlong) + thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), + end, -10); + } + } + /* + This info used to show up randomly, depending on whether the query + checked the query start time or not. now we always write current + timestamp to the slow log + */ + end= strmov(end, ",timestamp="); + end= int10_to_str((long) current_time, end, 10); + + if (end != buff) + { + *end++=';'; + *end='\n'; + if (my_b_write(&log_file, (uchar*) "SET ", 4) || + my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff))) + goto err; + } + if (is_command) + { + end= strxmov(buff, "# administrator command: ", NullS); + buff_len= (ulong) (end - buff); + DBUG_EXECUTE_IF("simulate_slow_log_write_error", + {DBUG_SET("+d,simulate_file_write_error");}); + if(my_b_write(&log_file, (uchar*) buff, buff_len)) + goto err; + } + if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) || + my_b_write(&log_file, (uchar*) ";\n",2) || + flush_io_cache(&log_file)) + goto err; + + } + } +end: + mysql_mutex_unlock(&LOCK_log); + DBUG_RETURN(error); + +err: + error= 1; + if (!write_error) + { + write_error= 1; + sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno); + } + goto end; +} + + +/** + @todo + The following should be using fn_format(); We just need to + first change fn_format() to cut the file name if it's too long. +*/ +const char *MYSQL_LOG::generate_name(const char *log_name, + const char *suffix, + bool strip_ext, char *buff) +{ + if (!log_name || !log_name[0]) + { + strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1); + return (const char *) + fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR)); + } + // get rid of extension if the log is binary to avoid problems + if (strip_ext) + { + char *p= fn_ext(log_name); + uint length= (uint) (p - log_name); + strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1)); + return (const char*)buff; + } + return log_name; +} + + +/* + Print some additional information about addition/removal of + XID list entries. + TODO: Remove once MDEV-9510 is fixed. +*/ +#ifdef WITH_WSREP +#define WSREP_XID_LIST_ENTRY(X, Y) \ + if (wsrep_debug) \ + { \ + char buf[FN_REFLEN]; \ + strmake(buf, Y->binlog_name, Y->binlog_name_len); \ + WSREP_DEBUG(X, buf, Y->binlog_id); \ + } +#else +#define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0) +#endif + +MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period) + :reset_master_pending(0), mark_xid_done_waiting(0), + bytes_written(0), last_used_log_number(0), + file_id(1), open_count(1), + group_commit_queue(0), group_commit_queue_busy(FALSE), + num_commits(0), num_group_commits(0), + group_commit_trigger_count(0), group_commit_trigger_timeout(0), + group_commit_trigger_lock_wait(0), + sync_period_ptr(sync_period), sync_counter(0), + state_file_deleted(false), binlog_state_recover_done(false), + is_relay_log(0), relay_signal_cnt(0), + checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF), + relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), + description_event_for_exec(0), description_event_for_queue(0), + current_binlog_id(0), reset_master_count(0) +{ + /* + We don't want to initialize locks here as such initialization depends on + safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is + called only in main(). Doing initialization here would make it happen + before main(). + */ + index_file_name[0] = 0; + bzero((char*) &index_file, sizeof(index_file)); + bzero((char*) &purge_index_file, sizeof(purge_index_file)); +} + +void MYSQL_BIN_LOG::stop_background_thread() +{ + if (binlog_background_thread_started) + { + mysql_mutex_lock(&LOCK_binlog_background_thread); + binlog_background_thread_stop= true; + mysql_cond_signal(&COND_binlog_background_thread); + while (binlog_background_thread_stop) + mysql_cond_wait(&COND_binlog_background_thread_end, + &LOCK_binlog_background_thread); + mysql_mutex_unlock(&LOCK_binlog_background_thread); + binlog_background_thread_started= false; + binlog_background_thread_stop= true; // mark it's not going to restart + } +} + +/* this is called only once */ + +void MYSQL_BIN_LOG::cleanup() +{ + DBUG_ENTER("cleanup"); + if (inited) + { + xid_count_per_binlog *b; + + /* Wait for the binlog background thread to stop. */ + if (!is_relay_log) + stop_background_thread(); + + inited= 0; + mysql_mutex_lock(&LOCK_log); + close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT); + mysql_mutex_unlock(&LOCK_log); + delete description_event_for_queue; + delete description_event_for_exec; + + while ((b= binlog_xid_count_list.get())) + { + /* + There should be no pending XIDs at shutdown, and only one entry (for + the active binlog file) in the list. + */ + DBUG_ASSERT(b->xid_count == 0); + DBUG_ASSERT(!binlog_xid_count_list.head()); + WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry " + "for %s (%lu)", b); + delete b; + } + + mysql_mutex_destroy(&LOCK_log); + mysql_mutex_destroy(&LOCK_index); + mysql_mutex_destroy(&LOCK_xid_list); + mysql_mutex_destroy(&LOCK_binlog_background_thread); + mysql_mutex_destroy(&LOCK_binlog_end_pos); + mysql_cond_destroy(&COND_relay_log_updated); + mysql_cond_destroy(&COND_bin_log_updated); + mysql_cond_destroy(&COND_queue_busy); + mysql_cond_destroy(&COND_xid_list); + mysql_cond_destroy(&COND_binlog_background_thread); + mysql_cond_destroy(&COND_binlog_background_thread_end); + } + + /* + Free data for global binlog state. + We can't do that automatically as we need to do this before + safemalloc is shut down + */ + if (!is_relay_log) + rpl_global_gtid_binlog_state.free(); + DBUG_VOID_RETURN; +} + + +/* Init binlog-specific vars */ +void MYSQL_BIN_LOG::init(ulong max_size_arg) +{ + DBUG_ENTER("MYSQL_BIN_LOG::init"); + max_size= max_size_arg; + DBUG_PRINT("info",("max_size: %lu", max_size)); + DBUG_VOID_RETURN; +} + + +void MYSQL_BIN_LOG::init_pthread_objects() +{ + MYSQL_LOG::init_pthread_objects(); + mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW); + mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION); + mysql_mutex_init(key_BINLOG_LOCK_xid_list, + &LOCK_xid_list, MY_MUTEX_INIT_FAST); + mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0); + mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0); + mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0); + mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0); + + mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread, + &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_BINLOG_COND_binlog_background_thread, + &COND_binlog_background_thread, 0); + mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end, + &COND_binlog_background_thread_end, 0); + + mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos, + MY_MUTEX_INIT_SLOW); +} + + +bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg, + const char *log_name, bool need_mutex) +{ + File index_file_nr= -1; + DBUG_ASSERT(!my_b_inited(&index_file)); + + /* + First open of this class instance + Create an index file that will hold all file names uses for logging. + Add new entries to the end of it. + */ + myf opt= MY_UNPACK_FILENAME; + if (!index_file_name_arg) + { + index_file_name_arg= log_name; // Use same basename for index file + opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT; + } + fn_format(index_file_name, index_file_name_arg, mysql_data_home, + ".index", opt); + if ((index_file_nr= mysql_file_open(m_key_file_log_index, + index_file_name, + O_RDWR | O_CREAT | O_BINARY | O_CLOEXEC, + MYF(MY_WME))) < 0 || + mysql_file_sync(index_file_nr, MYF(MY_WME)) || + init_io_cache_ext(&index_file, index_file_nr, + IO_SIZE, WRITE_CACHE, + mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)), + 0, MYF(MY_WME | MY_WAIT_IF_FULL), + m_key_file_log_index_cache) || + DBUG_IF("fault_injection_openning_index")) + { + /* + TODO: all operations creating/deleting the index file or a log, should + call my_sync_dir() or my_sync_dir_by_file() to be durable. + TODO: file creation should be done with mysql_file_create() + not mysql_file_open(). + */ + if (index_file_nr >= 0) + mysql_file_close(index_file_nr, MYF(0)); + return TRUE; + } + +#ifdef HAVE_REPLICATION + /* + Sync the index by purging any binary log file that is not registered. + In other words, either purge binary log files that were removed from + the index but not purged from the file system due to a crash or purge + any binary log file that was created but not register in the index + due to a crash. + */ + + if (set_purge_index_file_name(index_file_name_arg) || + open_purge_index_file(FALSE) || + purge_index_entry(NULL, NULL, need_mutex) || + close_purge_index_file() || + DBUG_IF("fault_injection_recovering_index")) + { + sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index " + "file."); + return TRUE; + } +#endif + + return FALSE; +} + + +/** + Open a (new) binlog file. + + - Open the log file and the index file. Register the new + file name in it + - When calling this when the file is in use, you must have a locks + on LOCK_log and LOCK_index. + + @retval + 0 ok + @retval + 1 error +*/ + +bool MYSQL_BIN_LOG::open(const char *log_name, + const char *new_name, + ulong next_log_number, + enum cache_type io_cache_type_arg, + ulong max_size_arg, + bool null_created_arg, + bool need_mutex) +{ + File file= -1; + xid_count_per_binlog *new_xid_list_entry= NULL, *b; + DBUG_ENTER("MYSQL_BIN_LOG::open"); + + mysql_mutex_assert_owner(&LOCK_log); + + if (!is_relay_log) + { + if (!binlog_state_recover_done) + { + binlog_state_recover_done= true; + if (do_binlog_recovery(opt_bin_logname, false)) + DBUG_RETURN(1); + } + + if ((!binlog_background_thread_started && + !binlog_background_thread_stop) && + start_binlog_background_thread()) + DBUG_RETURN(1); + } + + /* We need to calculate new log file name for purge to delete old */ + if (init_and_set_log_file_name(log_name, new_name, next_log_number, + LOG_BIN, io_cache_type_arg)) + { + sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name."); + if (!is_relay_log) + goto err; + DBUG_RETURN(1); + } + +#ifdef HAVE_REPLICATION + if (open_purge_index_file(TRUE) || + register_create_index_entry(log_file_name) || + sync_purge_index_file() || + DBUG_IF("fault_injection_registering_index")) + { + /** + TODO: + Although this was introduced to appease valgrind when + injecting emulated faults using + fault_injection_registering_index it may be good to consider + what actually happens when open_purge_index_file succeeds but + register or sync fails. + + Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup + for "real life" purposes as well? + */ + DBUG_EXECUTE_IF("fault_injection_registering_index", { + if (my_b_inited(&purge_index_file)) + { + end_io_cache(&purge_index_file); + my_close(purge_index_file.file, MYF(0)); + } + }); + + sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file."); + DBUG_RETURN(1); + } + DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE();); +#endif + + write_error= 0; + + /* open the main log file */ + if (MYSQL_LOG::open( +#ifdef HAVE_PSI_INTERFACE + m_key_file_log, +#endif + log_name, + LOG_UNKNOWN, /* Don't generate new name */ + 0, 0, io_cache_type_arg)) + { +#ifdef HAVE_REPLICATION + close_purge_index_file(); +#endif + DBUG_RETURN(1); /* all warnings issued */ + } + + init(max_size_arg); + + open_count++; + + DBUG_ASSERT(log_type == LOG_BIN); + + { + bool write_file_name_to_index_file=0; + + if (!my_b_filelength(&log_file)) + { + /* + The binary log file was empty (probably newly created) + This is the normal case and happens when the user doesn't specify + an extension for the binary log files. + In this case we write a standard header to it. + */ + if (my_b_safe_write(&log_file, BINLOG_MAGIC, + BIN_LOG_HEADER_SIZE)) + goto err; + bytes_written+= BIN_LOG_HEADER_SIZE; + write_file_name_to_index_file= 1; + } + + { + /* + In 4.x we put Start event only in the first binlog. But from 5.0 we + want a Start event even if this is not the very first binlog. + */ + Format_description_log_event s(BINLOG_VERSION); + /* + don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache + as we won't be able to reset it later + */ + if (io_cache_type == WRITE_CACHE) + s.flags |= LOG_EVENT_BINLOG_IN_USE_F; + + if (is_relay_log) + { + if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF) + relay_log_checksum_alg= + opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options + : BINLOG_CHECKSUM_ALG_OFF; + s.checksum_alg= relay_log_checksum_alg; + s.set_relay_log_event(); + } + else + s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options; + + crypto.scheme = 0; + DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF); + if (!s.is_valid()) + goto err; + s.dont_set_created= null_created_arg; + if (write_event(&s)) + goto err; + bytes_written+= s.data_written; + + if (encrypt_binlog) + { + uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA); + if (key_version == ENCRYPTION_KEY_VERSION_INVALID) + { + sql_print_error("Failed to enable encryption of binary logs"); + goto err; + } + + if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED) + { + if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce))) + goto err; + + Start_encryption_log_event sele(1, key_version, crypto.nonce); + sele.checksum_alg= s.checksum_alg; + if (write_event(&sele)) + goto err; + + // Start_encryption_log_event is written, enable the encryption + if (crypto.init(sele.crypto_scheme, key_version)) + goto err; + } + } + + if (!is_relay_log) + { + char buf[FN_REFLEN]; + + /* + Output a Gtid_list_log_event at the start of the binlog file. + + This is used to quickly determine which GTIDs are found in binlog + files earlier than this one, and which are found in this (or later) + binlogs. + + The list gives a mapping from (domain_id, server_id) -> seq_no (so + this means that there is at most one entry for every unique pair + (domain_id, server_id) in the list). It indicates that this seq_no is + the last one found in an earlier binlog file for this (domain_id, + server_id) combination - so any higher seq_no should be search for + from this binlog file, or a later one. + + This allows to locate the binlog file containing a given GTID by + scanning backwards, reading just the Gtid_list_log_event at the + start of each file, and scanning only the relevant binlog file when + found, not all binlog files. + + The existence of a given entry (domain_id, server_id, seq_no) + guarantees only that this seq_no will not be found in this or any + later binlog file. It does not guarantee that it can be found it an + earlier binlog file, for example the file may have been purged. + + If there is no entry for a given (domain_id, server_id) pair, then + it means that no such GTID exists in any earlier binlog. It is + permissible to remove such pair from future Gtid_list_log_events + if all previous binlog files containing such GTIDs have been purged + (though such optimization is not performed at the time of this + writing). So if there is no entry for given GTID it means that such + GTID should be search for in this or later binlog file, same as if + there had been an entry (domain_id, server_id, 0). + */ + + Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0); + if (write_event(&gl_ev)) + goto err; + + /* Output a binlog checkpoint event at the start of the binlog file. */ + + /* + Construct an entry in the binlog_xid_count_list for the new binlog + file (we will not link it into the list until we know the new file + is successfully created; otherwise we would have to remove it again + if creation failed, which gets tricky since other threads may have + seen the entry in the meantime - and we do not want to hold + LOCK_xid_list for long periods of time). + + Write the current binlog checkpoint into the log, so XA recovery will + know from where to start recovery. + */ + size_t off= dirname_length(log_file_name); + uint len= static_cast(strlen(log_file_name) - off); + new_xid_list_entry= new xid_count_per_binlog(log_file_name+off, len); + if (!new_xid_list_entry) + goto err; + + /* + Find the name for the Initial binlog checkpoint. + + Normally this will just be the first entry, as we delete entries + when their count drops to zero. But we scan the list to handle any + corner case, eg. for the first binlog file opened after startup, the + list will be empty. + */ + mysql_mutex_lock(&LOCK_xid_list); + I_List_iterator it(binlog_xid_count_list); + while ((b= it++) && b->xid_count == 0) + ; + mysql_mutex_unlock(&LOCK_xid_list); + if (!b) + b= new_xid_list_entry; + if (b->binlog_name) + strmake(buf, b->binlog_name, b->binlog_name_len); + else + goto err; + Binlog_checkpoint_log_event ev(buf, len); + DBUG_EXECUTE_IF("crash_before_write_checkpoint_event", + flush_io_cache(&log_file); + mysql_file_sync(log_file.file, MYF(MY_WME)); + DBUG_SUICIDE();); + if (write_event(&ev)) + goto err; + bytes_written+= ev.data_written; + } + } + if (description_event_for_queue && + description_event_for_queue->binlog_version>=4) + { + /* + This is a relay log written to by the I/O slave thread. + Write the event so that others can later know the format of this relay + log. + Note that this event is very close to the original event from the + master (it has binlog version of the master, event types of the + master), so this is suitable to parse the next relay log's event. It + has been produced by + Format_description_log_event::Format_description_log_event(char* buf,). + Why don't we want to write the description_event_for_queue if this + event is for format<4 (3.23 or 4.x): this is because in that case, the + description_event_for_queue describes the data received from the + master, but not the data written to the relay log (*conversion*), + which is in format 4 (slave's). + */ + /* + Set 'created' to 0, so that in next relay logs this event does not + trigger cleaning actions on the slave in + Format_description_log_event::apply_event_impl(). + */ + description_event_for_queue->created= 0; + /* Don't set log_pos in event header */ + description_event_for_queue->set_artificial_event(); + + if (write_event(description_event_for_queue)) + goto err; + bytes_written+= description_event_for_queue->data_written; + } + if (flush_io_cache(&log_file) || + mysql_file_sync(log_file.file, MYF(MY_WME))) + goto err; + + my_off_t offset= my_b_tell(&log_file); + + if (!is_relay_log) + { + /* update binlog_end_pos so that it can be read by after sync hook */ + reset_binlog_end_pos(log_file_name, offset); + + mysql_mutex_lock(&LOCK_commit_ordered); + strmake_buf(last_commit_pos_file, log_file_name); + last_commit_pos_offset= offset; + mysql_mutex_unlock(&LOCK_commit_ordered); + } + + if (write_file_name_to_index_file) + { +#ifdef HAVE_REPLICATION +#ifdef ENABLED_DEBUG_SYNC + if (current_thd) + DEBUG_SYNC(current_thd, "binlog_open_before_update_index"); +#endif + DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE();); +#endif + + DBUG_ASSERT(my_b_inited(&index_file) != 0); + reinit_io_cache(&index_file, WRITE_CACHE, + my_b_filelength(&index_file), 0, 0); + /* + As this is a new log file, we write the file name to the index + file. As every time we write to the index file, we sync it. + */ + if (DBUG_IF("fault_injection_updating_index") || + my_b_write(&index_file, (uchar*) log_file_name, + strlen(log_file_name)) || + my_b_write(&index_file, (uchar*) "\n", 1) || + flush_io_cache(&index_file) || + mysql_file_sync(index_file.file, MYF(MY_WME))) + goto err; + +#ifdef HAVE_REPLICATION + DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE();); +#endif + } + } + + if (!is_relay_log) + { + /* + Now the file was created successfully, so we can link in the entry for + the new binlog file in binlog_xid_count_list. + */ + mysql_mutex_lock(&LOCK_xid_list); + ++current_binlog_id; + new_xid_list_entry->binlog_id= current_binlog_id; + /* Remove any initial entries with no pending XIDs. */ + while ((b= binlog_xid_count_list.head()) && b->xid_count == 0) + { + WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for " + "%s (%lu)", b); + delete binlog_xid_count_list.get(); + } + mysql_cond_broadcast(&COND_xid_list); + WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for " + "%s (%lu)", new_xid_list_entry); + binlog_xid_count_list.push_back(new_xid_list_entry); + mysql_mutex_unlock(&LOCK_xid_list); + + /* + Now that we have synced a new binlog file with an initial Gtid_list + event, it is safe to delete the binlog state file. We will write out + a new, updated file at shutdown, and if we crash before we can recover + the state from the newly written binlog file. + + Since the state file will contain out-of-date data as soon as the first + new GTID is binlogged, it is better to remove it, to avoid any risk of + accidentally reading incorrect data later. + */ + if (!state_file_deleted) + { + char buf[FN_REFLEN]; + fn_format(buf, opt_bin_logname, mysql_data_home, ".state", + MY_UNPACK_FILENAME); + my_delete(buf, MY_SYNC_DIR); + state_file_deleted= true; + } + } + + log_state= LOG_OPENED; + +#ifdef HAVE_REPLICATION + close_purge_index_file(); +#endif + + /* Notify the io thread that binlog is rotated to a new file */ + if (is_relay_log) + signal_relay_log_update(); + else + update_binlog_end_pos(); + DBUG_RETURN(0); + +err: + int tmp_errno= errno; +#ifdef HAVE_REPLICATION + if (is_inited_purge_index_file()) + purge_index_entry(NULL, NULL, need_mutex); + close_purge_index_file(); +#endif + sql_print_error(fatal_log_error, (name) ? name : log_name, tmp_errno); + if (new_xid_list_entry) + delete new_xid_list_entry; + if (file >= 0) + mysql_file_close(file, MYF(0)); + close(LOG_CLOSE_INDEX); + DBUG_RETURN(1); +} + + +int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo) +{ + mysql_mutex_lock(&LOCK_log); + int ret = raw_get_current_log(linfo); + mysql_mutex_unlock(&LOCK_log); + return ret; +} + +int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo) +{ + mysql_mutex_assert_owner(&LOCK_log); + strmake_buf(linfo->log_file_name, log_file_name); + linfo->pos = my_b_tell(&log_file); + return 0; +} + +/** + Move all data up in a file in an filename index file. + + We do the copy outside of the IO_CACHE as the cache buffers would just + make things slower and more complicated. + In most cases the copy loop should only do one read. + + @param index_file File to move + @param offset Move everything from here to beginning + + @note + File will be truncated to be 'offset' shorter or filled up with newlines + + @retval + 0 ok +*/ + +#ifdef HAVE_REPLICATION + +static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset) +{ + int bytes_read; + my_off_t init_offset= offset; + File file= index_file->file; + uchar io_buf[IO_SIZE*2]; + DBUG_ENTER("copy_up_file_and_fill"); + + for (;; offset+= bytes_read) + { + mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0)); + if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf), + MYF(MY_WME))) + < 0) + goto err; + if (!bytes_read) + break; // end of file + mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0)); + if (mysql_file_write(file, io_buf, bytes_read, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + goto err; + } + /* The following will either truncate the file or fill the end with \n' */ + if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) || + mysql_file_sync(file, MYF(MY_WME))) + goto err; + + /* Reset data in old index cache */ + reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1); + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + +#endif /* HAVE_REPLICATION */ + +/** + Find the position in the log-index-file for the given log name. + + @param linfo Store here the found log file name and position to + the NEXT log file name in the index file. + @param log_name Filename to find in the index file. + Is a null pointer if we want to read the first entry + @param need_lock Set this to 1 if the parent doesn't already have a + lock on LOCK_index + + @note + On systems without the truncate function the file will end with one or + more empty lines. These will be ignored when reading the file. + + @retval + 0 ok + @retval + LOG_INFO_EOF End of log-index-file found + @retval + LOG_INFO_IO Got IO error while reading file +*/ + +int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name, + bool need_lock) +{ + int error= 0; + char *full_fname= linfo->log_file_name; + char full_log_name[FN_REFLEN], fname[FN_REFLEN]; + uint log_name_len= 0, fname_len= 0; + DBUG_ENTER("find_log_pos"); + full_log_name[0]= full_fname[0]= 0; + + /* + Mutex needed because we need to make sure the file pointer does not + move from under our feet + */ + if (need_lock) + mysql_mutex_lock(&LOCK_index); + mysql_mutex_assert_owner(&LOCK_index); + + // extend relative paths for log_name to be searched + if (log_name) + { + if(normalize_binlog_name(full_log_name, log_name, is_relay_log)) + { + error= LOG_INFO_EOF; + goto end; + } + } + + log_name_len= log_name ? (uint) strlen(full_log_name) : 0; + DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s", + log_name ? log_name : "NULL", full_log_name)); + + /* As the file is flushed, we can't get an error here */ + (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0); + + for (;;) + { + size_t length; + my_off_t offset= my_b_tell(&index_file); + + DBUG_EXECUTE_IF("simulate_find_log_pos_error", + error= LOG_INFO_EOF; break;); + /* If we get 0 or 1 characters, this is the end of the file */ + if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1) + { + /* Did not find the given entry; Return not found or error */ + error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO; + break; + } + if (fname[length-1] != '\n') + continue; // Not a log entry + fname[length-1]= 0; // Remove end \n + + // extend relative paths and match against full path + if (normalize_binlog_name(full_fname, fname, is_relay_log)) + { + error= LOG_INFO_EOF; + break; + } + fname_len= (uint) strlen(full_fname); + + // if the log entry matches, null string matching anything + if (!log_name || + (log_name_len == fname_len && + !strncmp(full_fname, full_log_name, log_name_len))) + { + DBUG_PRINT("info", ("Found log file entry")); + linfo->index_file_start_offset= offset; + linfo->index_file_offset = my_b_tell(&index_file); + break; + } + } + +end: + if (need_lock) + mysql_mutex_unlock(&LOCK_index); + DBUG_RETURN(error); +} + + +/** + Find the position in the log-index-file for the given log name. + + @param + linfo Store here the next log file name and position to + the file name after that. + @param + need_lock Set this to 1 if the parent doesn't already have a + lock on LOCK_index + + @note + - Before calling this function, one has to call find_log_pos() + to set up 'linfo' + - Mutex needed because we need to make sure the file pointer does not move + from under our feet + + @retval + 0 ok + @retval + LOG_INFO_EOF End of log-index-file found + @retval + LOG_INFO_IO Got IO error while reading file +*/ + +int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock) +{ + int error= 0; + size_t length; + char fname[FN_REFLEN]; + char *full_fname= linfo->log_file_name; + + if (need_lock) + mysql_mutex_lock(&LOCK_index); + mysql_mutex_assert_owner(&LOCK_index); + + /* As the file is flushed, we can't get an error here */ + (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0, + 0); + + linfo->index_file_start_offset= linfo->index_file_offset; + if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1) + { + error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO; + goto err; + } + + if (fname[0] != 0) + { + if(normalize_binlog_name(full_fname, fname, is_relay_log)) + { + error= LOG_INFO_EOF; + goto err; + } + length= strlen(full_fname); + } + + full_fname[length-1]= 0; // kill \n + linfo->index_file_offset= my_b_tell(&index_file); + +err: + if (need_lock) + mysql_mutex_unlock(&LOCK_index); + return error; +} + + +/** + Delete all logs referred to in the index file. + + The new index file will only contain this file. + + @param thd Thread id. This can be zero in case of resetting + relay logs + @param create_new_log 1 if we should start writing to a new log file + @param next_log_number min number of next log file to use, if possible. + + @note + If not called from slave thread, write start event to new log + + @retval + 0 ok + @retval + 1 error +*/ + +bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log, + rpl_gtid *init_state, uint32 init_state_len, + ulong next_log_number) +{ + LOG_INFO linfo; + bool error=0; + int err; + const char* save_name; + DBUG_ENTER("reset_logs"); + + if (!is_relay_log) + { + if (init_state && !is_empty_state()) + { + my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0)); + DBUG_RETURN(1); + } + + /* + Mark that a RESET MASTER is in progress. + This ensures that a binlog checkpoint will not try to write binlog + checkpoint events, which would be useless (as we are deleting the binlog + anyway) and could deadlock, as we are holding LOCK_log. + + Wait for any mark_xid_done() calls that might be already running to + complete (mark_xid_done_waiting counter to drop to zero); we need to + do this before we take the LOCK_log to not deadlock. + */ + mysql_mutex_lock(&LOCK_xid_list); + reset_master_pending++; + while (mark_xid_done_waiting > 0) + mysql_cond_wait(&COND_xid_list, &LOCK_xid_list); + mysql_mutex_unlock(&LOCK_xid_list); + } + + DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending"); + /* + We need to get both locks to be sure that no one is trying to + write to the index log file. + */ + mysql_mutex_lock(&LOCK_log); + mysql_mutex_lock(&LOCK_index); + + if (!is_relay_log) + { + /* + We are going to nuke all binary log files. + Without binlog, we cannot XA recover prepared-but-not-committed + transactions in engines. So force a commit checkpoint first. + + Note that we take and immediately + release LOCK_after_binlog_sync/LOCK_commit_ordered. This has + the effect to ensure that any on-going group commit (in + trx_group_commit_leader()) has completed before we request the checkpoint, + due to the chaining of LOCK_log and LOCK_commit_ordered in that function. + (We are holding LOCK_log, so no new group commit can start). + + Without this, it is possible (though perhaps unlikely) that the RESET + MASTER could run in-between the write to the binlog and the + commit_ordered() in the engine of some transaction, and then a crash + later would leave such transaction not recoverable. + */ + + mysql_mutex_lock(&LOCK_after_binlog_sync); + mysql_mutex_lock(&LOCK_commit_ordered); + mysql_mutex_unlock(&LOCK_after_binlog_sync); + mysql_mutex_unlock(&LOCK_commit_ordered); + + mark_xids_active(current_binlog_id, 1); + do_checkpoint_request(current_binlog_id); + + /* Now wait for all checkpoint requests and pending unlog() to complete. */ + mysql_mutex_lock(&LOCK_xid_list); + for (;;) + { + if (is_xidlist_idle_nolock()) + break; + /* + Wait until signalled that one more binlog dropped to zero, then check + again. + */ + mysql_cond_wait(&COND_xid_list, &LOCK_xid_list); + } + + /* + Now all XIDs are fully flushed to disk, and we are holding LOCK_log so + no new ones will be written. So we can proceed to delete the logs. + */ + mysql_mutex_unlock(&LOCK_xid_list); + } + + /* Save variables so that we can reopen the log */ + save_name=name; + name=0; // Protect against free + close(LOG_CLOSE_TO_BE_OPENED); + + last_used_log_number= 0; // Reset log number cache + + /* + First delete all old log files and then update the index file. + As we first delete the log files and do not use sort of logging, + a crash may lead to an inconsistent state where the index has + references to non-existent files. + + We need to invert the steps and use the purge_index_file methods + in order to make the operation safe. + */ + + if ((err= find_log_pos(&linfo, NullS, 0)) != 0) + { + uint errcode= purge_log_get_error_code(err); + sql_print_error("Failed to locate old binlog or relay log files"); + my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0)); + error= 1; + goto err; + } + + for (;;) + { + if (unlikely((error= my_delete(linfo.log_file_name, MYF(0))))) + { + if (my_errno == ENOENT) + { + if (thd) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, + ER_THD(thd, ER_LOG_PURGE_NO_FILE), + linfo.log_file_name); + + sql_print_information("Failed to delete file '%s'", + linfo.log_file_name); + my_errno= 0; + error= 0; + } + else + { + if (thd) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with deleting %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + linfo.log_file_name); + error= 1; + goto err; + } + } + if (find_next_log(&linfo, 0)) + break; + } + + if (!is_relay_log) + { + if (init_state) + rpl_global_gtid_binlog_state.load(init_state, init_state_len); + else + rpl_global_gtid_binlog_state.reset(); + } + + /* Start logging with a new file */ + close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED); + // Reset (open will update) + if (unlikely((error= my_delete(index_file_name, MYF(0))))) + { + if (my_errno == ENOENT) + { + if (thd) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, + ER_THD(thd, ER_LOG_PURGE_NO_FILE), + index_file_name); + sql_print_information("Failed to delete file '%s'", + index_file_name); + my_errno= 0; + error= 0; + } + else + { + if (thd) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with deleting %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + index_file_name); + error= 1; + goto err; + } + } + if (create_new_log && !open_index_file(index_file_name, 0, FALSE)) + if (unlikely((error= open(save_name, 0, next_log_number, + io_cache_type, max_size, 0, FALSE)))) + goto err; + my_free((void *) save_name); + +err: + if (error == 1) + name= const_cast(save_name); + + if (!is_relay_log) + { + xid_count_per_binlog *b; + /* + Remove all entries in the xid_count list except the last. + Normally we will just be deleting all the entries that we waited for to + drop to zero above. But if we fail during RESET MASTER for some reason + then we will not have created any new log file, and we may keep the last + of the old entries. + */ + mysql_mutex_lock(&LOCK_xid_list); + for (;;) + { + b= binlog_xid_count_list.head(); + DBUG_ASSERT(b /* List can never become empty. */); + if (b->binlog_id == current_binlog_id) + break; + DBUG_ASSERT(b->xid_count == 0); + WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing " + "xid_list_entry for %s (%lu)", b); + delete binlog_xid_count_list.get(); + } + mysql_cond_broadcast(&COND_xid_list); + reset_master_pending--; + reset_master_count++; + mysql_mutex_unlock(&LOCK_xid_list); + } + + mysql_mutex_unlock(&LOCK_index); + mysql_mutex_unlock(&LOCK_log); + DBUG_RETURN(error); +} + + +void MYSQL_BIN_LOG::wait_for_last_checkpoint_event() +{ + mysql_mutex_lock(&LOCK_xid_list); + for (;;) + { + if (binlog_xid_count_list.is_last(binlog_xid_count_list.head())) + break; + mysql_cond_wait(&COND_xid_list, &LOCK_xid_list); + } + mysql_mutex_unlock(&LOCK_xid_list); + + /* + LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be + obtained after mark_xid_done() has written the last checkpoint event. + */ + mysql_mutex_lock(&LOCK_log); + mysql_mutex_unlock(&LOCK_log); +} + + +/** + Delete relay log files prior to rli->group_relay_log_name + (i.e. all logs which are not involved in a non-finished group + (transaction)), remove them from the index file and start on next + relay log. + + IMPLEMENTATION + + - You must hold rli->data_lock before calling this function, since + it writes group_relay_log_pos and similar fields of + Relay_log_info. + - Protects index file with LOCK_index + - Delete relevant relay log files + - Copy all file names after these ones to the front of the index file + - If the OS has truncate, truncate the file, else fill it with \n' + - Read the next file name from the index file and store in rli->linfo + + @param rli Relay log information + @param included If false, all relay logs that are strictly before + rli->group_relay_log_name are deleted ; if true, the + latter is deleted too (i.e. all relay logs + read by the SQL slave thread are deleted). + + @note + - This is only called from the slave SQL thread when it has read + all commands from a relay log and want to switch to a new relay log. + - When this happens, we can be in an active transaction as + a transaction can span over two relay logs + (although it is always written as a single block to the master's binary + log, hence cannot span over two master's binary logs). + + @retval + 0 ok + @retval + LOG_INFO_EOF End of log-index-file found + @retval + LOG_INFO_SEEK Could not allocate IO cache + @retval + LOG_INFO_IO Got IO error while reading file +*/ + +#ifdef HAVE_REPLICATION + +int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) +{ + int error, errcode; + char *to_purge_if_included= NULL; + inuse_relaylog *ir; + ulonglong log_space_reclaimed= 0; + DBUG_ENTER("purge_first_log"); + + DBUG_ASSERT(is_open()); + DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT); + DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name)); + + mysql_mutex_assert_owner(&rli->data_lock); + + mysql_mutex_lock(&LOCK_index); + + ir= rli->inuse_relaylog_list; + while (ir) + { + inuse_relaylog *next= ir->next; + if (!ir->completed || ir->dequeued_count < ir->queued_count) + { + included= false; + break; + } + if (!included && !strcmp(ir->name, rli->group_relay_log_name)) + break; + if (!next) + { + rli->last_inuse_relaylog= NULL; + included= 1; + to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name, + ir->name, MYF(0)); + } + rli->free_inuse_relaylog(ir); + ir= next; + } + rli->inuse_relaylog_list= ir; + if (ir) + to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name, + ir->name, MYF(0)); + + /* + Read the next log file name from the index file and pass it back to + the caller. + */ + if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, + 0))) || + unlikely((error=find_next_log(&rli->linfo, 0)))) + { + sql_print_error("next log error: %d offset: %llu log: %s included: %d", + error, rli->linfo.index_file_offset, + rli->event_relay_log_name, included); + goto err; + } + + /* + Reset rli's coordinates to the current log. + */ + rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE; + strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name); + + /* + If we removed the rli->group_relay_log_name file, + we must update the rli->group* coordinates, otherwise do not touch it as the + group's execution is not finished (e.g. COMMIT not executed) + */ + if (included) + { + rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE; + strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name); + rli->notify_group_relay_log_name_update(); + } + + /* Store where we are in the new file for the execution thread */ + if (rli->flush()) + error= LOG_INFO_IO; + + DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE();); + + rli->relay_log.purge_logs(to_purge_if_included, included, + 0, 0, &log_space_reclaimed); + + mysql_mutex_lock(&rli->log_space_lock); + rli->log_space_total-= log_space_reclaimed; + mysql_cond_broadcast(&rli->log_space_cond); + mysql_mutex_unlock(&rli->log_space_lock); + + /* + * Need to update the log pos because purge logs has been called + * after fetching initially the log pos at the beginning of the method. + */ + if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))) + { + sql_print_error("next log error: %d offset: %llu log: %s included: %d", + errcode, rli->linfo.index_file_offset, + rli->group_relay_log_name, included); + goto err; + } + + /* If included was passed, rli->linfo should be the first entry. */ + DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0); + +err: + my_free(to_purge_if_included); + mysql_mutex_unlock(&LOCK_index); + DBUG_RETURN(error); +} + +/** + Update log index_file. +*/ + +int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads) +{ + if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset)) + return LOG_INFO_IO; + + // now update offsets in index file for running threads + if (need_update_threads) + adjust_linfo_offsets(log_info->index_file_start_offset); + return 0; +} + +/** + Remove all logs before the given log from disk and from the index file. + + @param to_log Delete all log file name before this file. + @param included If true, to_log is deleted too. + @param need_mutex + @param need_update_threads If we want to update the log coordinates of + all threads. False for relay logs, true otherwise. + @param reclaimeed_log_space If not null, increment this variable to + the amount of log space freed + + @note + If any of the logs before the deleted one is in use, + only purge logs up to this one. + + @retval + 0 ok + @retval + LOG_INFO_EOF to_log not found + LOG_INFO_EMFILE too many files opened + LOG_INFO_FATAL if any other than ENOENT error from + mysql_file_stat() or mysql_file_delete() +*/ + +int MYSQL_BIN_LOG::purge_logs(const char *to_log, + bool included, + bool need_mutex, + bool need_update_threads, + ulonglong *reclaimed_space) +{ + int error= 0; + bool exit_loop= 0; + LOG_INFO log_info; + THD *thd= current_thd; + DBUG_ENTER("purge_logs"); + DBUG_PRINT("info",("to_log= %s",to_log)); + + if (need_mutex) + mysql_mutex_lock(&LOCK_index); + if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) ) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not " + "listed in the index.", to_log); + goto err; + } + + if (unlikely((error= open_purge_index_file(TRUE)))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file."); + goto err; + } + + /* + File name exists in index file; delete until we find this file + or a file that is used. + */ + if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))) + goto err; + while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) && + can_purge_log(log_info.log_file_name)) + { + if (unlikely((error= register_purge_index_entry(log_info.log_file_name)))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.", + log_info.log_file_name); + goto err; + } + + if (find_next_log(&log_info, 0) || exit_loop) + break; + } + + DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE();); + + if (unlikely((error= sync_purge_index_file()))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file."); + goto err; + } + + /* We know how many files to delete. Update index file. */ + if (unlikely((error=update_log_index(&log_info, need_update_threads)))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file"); + goto err; + } + + DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE();); + +err: + /* Read each entry from purge_index_file and delete the file. */ + if (is_inited_purge_index_file() && + (error= purge_index_entry(thd, reclaimed_space, FALSE))) + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files" + " that would be purged."); + close_purge_index_file(); + + DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE();); + + if (need_mutex) + mysql_mutex_unlock(&LOCK_index); + DBUG_RETURN(error); +} + +int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name) +{ + int error= 0; + DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name"); + if (fn_format(purge_index_file_name, base_file_name, mysql_data_home, + ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH | + MY_REPLACE_EXT)) == NULL) + { + error= 1; + sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set " + "file name."); + } + DBUG_RETURN(error); +} + +int MYSQL_BIN_LOG::open_purge_index_file(bool destroy) +{ + int error= 0; + File file= -1; + + DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file"); + + if (destroy) + close_purge_index_file(); + + if (!my_b_inited(&purge_index_file)) + { + if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY, + MYF(MY_WME))) < 0 || + init_io_cache(&purge_index_file, file, IO_SIZE, + (destroy ? WRITE_CACHE : READ_CACHE), + 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + error= 1; + sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register " + " file."); + } + } + DBUG_RETURN(error); +} + +int MYSQL_BIN_LOG::close_purge_index_file() +{ + int error= 0; + + DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file"); + + if (my_b_inited(&purge_index_file)) + { + end_io_cache(&purge_index_file); + error= my_close(purge_index_file.file, MYF(0)); + } + my_delete(purge_index_file_name, MYF(0)); + bzero((char*) &purge_index_file, sizeof(purge_index_file)); + + DBUG_RETURN(error); +} + +bool MYSQL_BIN_LOG::is_inited_purge_index_file() +{ + return my_b_inited(&purge_index_file); +} + +int MYSQL_BIN_LOG::sync_purge_index_file() +{ + int error= 0; + DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file"); + + if (unlikely((error= flush_io_cache(&purge_index_file))) || + unlikely((error= my_sync(purge_index_file.file, + MYF(MY_WME))))) + DBUG_RETURN(error); + + DBUG_RETURN(error); +} + +int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry) +{ + int error= 0; + DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry"); + + if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry, + strlen(entry)))) || + unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))) + DBUG_RETURN (error); + + DBUG_RETURN(error); +} + +int MYSQL_BIN_LOG::register_create_index_entry(const char *entry) +{ + DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry"); + DBUG_RETURN(register_purge_index_entry(entry)); +} + +int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space, + bool need_mutex) +{ + DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry"); + MY_STAT s; + int error= 0; + LOG_INFO log_info; + LOG_INFO check_log_info; + + DBUG_ASSERT(my_b_inited(&purge_index_file)); + + if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, + 0)))) + { + sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file " + "for read"); + goto err; + } + + for (;;) + { + size_t length; + + if ((length=my_b_gets(&purge_index_file, log_info.log_file_name, + FN_REFLEN)) <= 1) + { + if (purge_index_file.error) + { + error= purge_index_file.error; + sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from " + "register file.", error); + goto err; + } + + /* Reached EOF */ + break; + } + + /* Get rid of the trailing '\n' */ + log_info.log_file_name[length-1]= 0; + + if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, + MYF(0)))) + { + if (my_errno == ENOENT) + { + /* + It's not fatal if we can't stat a log file that does not exist; + If we could not stat, we won't delete. + */ + if (thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE), + log_info.log_file_name); + } + sql_print_information("Failed to execute mysql_file_stat on file '%s'", + log_info.log_file_name); + my_errno= 0; + } + else + { + /* + Other than ENOENT are fatal + */ + if (thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with getting info on being purged %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + log_info.log_file_name); + } + else + { + sql_print_information("Failed to delete log file '%s'; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + log_info.log_file_name); + } + error= LOG_INFO_FATAL; + goto err; + } + } + else + { + if (unlikely((error= find_log_pos(&check_log_info, + log_info.log_file_name, need_mutex)))) + { + if (error != LOG_INFO_EOF) + { + if (thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with deleting %s and " + "reading the binlog index file", + log_info.log_file_name); + } + else + { + sql_print_information("Failed to delete file '%s' and " + "read the binlog index file", + log_info.log_file_name); + } + goto err; + } + + error= 0; + + DBUG_PRINT("info",("purging %s",log_info.log_file_name)); + if (!my_delete(log_info.log_file_name, MYF(0))) + { + if (reclaimed_space) + *reclaimed_space+= s.st_size; + } + else + { + if (my_errno == ENOENT) + { + if (thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE), + log_info.log_file_name); + } + sql_print_information("Failed to delete file '%s'", + log_info.log_file_name); + my_errno= 0; + } + else + { + if (thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with deleting %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + log_info.log_file_name); + } + else + { + sql_print_information("Failed to delete file '%s'; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + log_info.log_file_name); + } + if (my_errno == EMFILE) + { + DBUG_PRINT("info", + ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno)); + error= LOG_INFO_EMFILE; + goto err; + } + error= LOG_INFO_FATAL; + goto err; + } + } + } + } + } + +err: + DBUG_RETURN(error); +} + +/** + Remove all logs before the given file date from disk and from the + index file. + + @param thd Thread pointer + @param purge_time Delete all log files before given date. + + @note + If any of the logs before the deleted one is in use, + only purge logs up to this one. + + @retval + 0 ok + @retval + LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated + LOG_INFO_FATAL if any other than ENOENT error from + mysql_file_stat() or mysql_file_delete() +*/ + +int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time) +{ + int error; + char to_log[FN_REFLEN]; + LOG_INFO log_info; + MY_STAT stat_area; + THD *thd= current_thd; + DBUG_ENTER("purge_logs_before_date"); + + mysql_mutex_lock(&LOCK_index); + to_log[0]= 0; + + if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))) + goto err; + + while (strcmp(log_file_name, log_info.log_file_name) && + can_purge_log(log_info.log_file_name)) + { + if (!mysql_file_stat(m_key_file_log, + log_info.log_file_name, &stat_area, MYF(0))) + { + if (my_errno == ENOENT) + { + /* + It's not fatal if we can't stat a log file that does not exist. + */ + my_errno= 0; + } + else + { + /* + Other than ENOENT are fatal + */ + if (thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with getting info on being purged %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + log_info.log_file_name); + } + else + { + sql_print_information("Failed to delete log file '%s'", + log_info.log_file_name); + } + error= LOG_INFO_FATAL; + goto err; + } + } + else + { + if (stat_area.st_mtime < purge_time) + strmake_buf(to_log, log_info.log_file_name); + else + break; + } + if (find_next_log(&log_info, 0)) + break; + } + + error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0); + +err: + mysql_mutex_unlock(&LOCK_index); + DBUG_RETURN(error); +} + + +bool +MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg) +{ + xid_count_per_binlog *b; + + if (is_active(log_file_name_arg)) + return false; + mysql_mutex_lock(&LOCK_xid_list); + { + I_List_iterator it(binlog_xid_count_list); + while ((b= it++) && + 0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg), + b->binlog_name, b->binlog_name_len)) + ; + } + mysql_mutex_unlock(&LOCK_xid_list); + if (b) + return false; + return !log_in_use(log_file_name_arg); +} +#endif /* HAVE_REPLICATION */ + + +bool +MYSQL_BIN_LOG::is_xidlist_idle() +{ + bool res; + mysql_mutex_lock(&LOCK_xid_list); + res= is_xidlist_idle_nolock(); + mysql_mutex_unlock(&LOCK_xid_list); + return res; +} + + +bool +MYSQL_BIN_LOG::is_xidlist_idle_nolock() +{ + xid_count_per_binlog *b; + + I_List_iterator it(binlog_xid_count_list); + while ((b= it++)) + { + if (b->xid_count > 0) + return false; + } + return true; +} + +/** + Create a new log file name. + + @param buf buf of at least FN_REFLEN where new name is stored + + @note + If file name will be longer then FN_REFLEN it will be truncated +*/ + +void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident) +{ + size_t dir_len = dirname_length(log_file_name); + if (dir_len >= FN_REFLEN) + dir_len=FN_REFLEN-1; + strnmov(buf, log_file_name, dir_len); + strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1); +} + + +/** + Check if we are writing/reading to the given log file. +*/ + +bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg) +{ + /** + * there should/must be mysql_mutex_assert_owner(&LOCK_log) here... + * but code violates this! (scary monsters and super creeps!) + * + * example stacktrace: + * #8 MYSQL_BIN_LOG::is_active + * #9 MYSQL_BIN_LOG::can_purge_log + * #10 MYSQL_BIN_LOG::purge_logs + * #11 MYSQL_BIN_LOG::purge_first_log + * #12 next_event + * #13 exec_relay_log_event + * + * I didn't investigate if this is ligit...(i.e if my comment is wrong) + */ + return !strcmp(log_file_name, log_file_name_arg); +} + + +/* + Wrappers around new_file_impl to avoid using argument + to control locking. The argument 1) less readable 2) breaks + incapsulation 3) allows external access to the class without + a lock (which is not possible with private new_file_without_locking + method). + + @retval + nonzero - error +*/ + +int MYSQL_BIN_LOG::new_file() +{ + int res; + mysql_mutex_lock(&LOCK_log); + res= new_file_impl(); + mysql_mutex_unlock(&LOCK_log); + return res; +} + +/* + @retval + nonzero - error + */ +int MYSQL_BIN_LOG::new_file_without_locking() +{ + return new_file_impl(); +} + + +/** + Start writing to a new log file or reopen the old file. + + @param need_lock Set to 1 if caller has not locked LOCK_log + + @retval + nonzero - error + + @note + The new file name is stored last in the index file +*/ + +int MYSQL_BIN_LOG::new_file_impl() +{ + int error= 0, close_on_error= FALSE; + char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open; + uint close_flag; + bool delay_close= false; + File UNINIT_VAR(old_file); + DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl"); + + DBUG_ASSERT(log_type == LOG_BIN); + mysql_mutex_assert_owner(&LOCK_log); + + if (!is_open()) + { + DBUG_PRINT("info",("log is closed")); + DBUG_RETURN(error); + } + + mysql_mutex_lock(&LOCK_index); + + /* Reuse old name if not binlog and not update log */ + new_name_ptr= name; + + /* + If user hasn't specified an extension, generate a new log name + We have to do this here and not in open as we want to store the + new file name in the current binary log file. + */ + if (unlikely((error= generate_new_name(new_name, name, 0)))) + { +#ifdef ENABLE_AND_FIX_HANG + close_on_error= TRUE; +#endif + goto end2; + } + new_name_ptr=new_name; + + { + /* + We log the whole file name for log file as the user may decide + to change base names at some point. + */ + Rotate_log_event r(new_name + dirname_length(new_name), 0, LOG_EVENT_OFFSET, + is_relay_log ? Rotate_log_event::RELAY_LOG : 0); + /* + The current relay-log's closing Rotate event must have checksum + value computed with an algorithm of the last relay-logged FD event. + */ + if (is_relay_log) + r.checksum_alg= relay_log_checksum_alg; + DBUG_ASSERT(!is_relay_log || + relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF); + if ((DBUG_IF("fault_injection_new_file_rotate_event") && + (error= close_on_error= TRUE)) || + (error= write_event(&r))) + { + DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno= 2;); + close_on_error= TRUE; + my_printf_error(ER_ERROR_ON_WRITE, + ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE), + MYF(ME_FATAL), name, errno); + goto end; + } + bytes_written+= r.data_written; + } + + /* + Update needs to be signalled even if there is no rotate event + log rotation should give the waiting thread a signal to + discover EOF and move on to the next log. + */ + if (unlikely((error= flush_io_cache(&log_file)))) + { + close_on_error= TRUE; + goto end; + } + update_binlog_end_pos(); + + old_name=name; + name=0; // Don't free name + close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX; + if (!is_relay_log) + { + /* + We need to keep the old binlog file open (and marked as in-use) until + the new one is fully created and synced to disk and index. Otherwise we + leave a window where if we crash, there is no binlog file marked as + crashed for server restart to detect the need for recovery. + */ + old_file= log_file.file; + close_flag|= LOG_CLOSE_DELAYED_CLOSE; + delay_close= true; + } + close(close_flag); + if (checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF) + { + DBUG_ASSERT(!is_relay_log); + DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset); + binlog_checksum_options= checksum_alg_reset; + } + /* + Note that at this point, log_state != LOG_CLOSED + (important for is_open()). + */ + + /* + new_file() is only used for rotation (in FLUSH LOGS or because size > + max_binlog_size or max_relay_log_size). + If this is a binary log, the Format_description_log_event at the + beginning of the new file should have created=0 (to distinguish with the + Format_description_log_event written at server startup, which should + trigger temp tables deletion on slaves. + */ + + /* reopen index binlog file, BUG#34582 */ + file_to_open= index_file_name; + error= open_index_file(index_file_name, 0, FALSE); + if (likely(!error)) + { + /* reopen the binary log file. */ + file_to_open= new_name_ptr; + error= open(old_name, new_name_ptr, 0, io_cache_type, max_size, 1, FALSE); + } + + /* handle reopening errors */ + if (unlikely(error)) + { + my_error(ER_CANT_OPEN_FILE, MYF(ME_FATAL), file_to_open, error); + close_on_error= TRUE; + } + + my_free(old_name); + +end: + /* In case of errors, reuse the last generated log file name */ + if (unlikely(error)) + { + DBUG_ASSERT(last_used_log_number > 0); + last_used_log_number--; + } + +end2: + if (delay_close) + { + clear_inuse_flag_when_closing(old_file); + mysql_file_close(old_file, MYF(MY_WME)); + } + + if (unlikely(error && close_on_error)) /* rotate or reopen failed */ + { + /* + Close whatever was left opened. + + We are keeping the behavior as it exists today, ie, + we disable logging and move on (see: BUG#51014). + + TODO: as part of WL#1790 consider other approaches: + - kill mysql (safety); + - try multiple locations for opening a log file; + - switch server to protected/readonly mode + - ... + */ + close(LOG_CLOSE_INDEX); + sql_print_error(fatal_log_error, new_name_ptr, errno); + } + + mysql_mutex_unlock(&LOCK_index); + + DBUG_RETURN(error); +} + +bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data, + IO_CACHE *file) +{ + Log_event_writer writer(file, 0, &crypto); + if (crypto.scheme && file == &log_file) + { + writer.ctx= alloca(crypto.ctx_size); + writer.set_encrypted_writer(); + } + if (cache_data) + cache_data->add_status(ev->logged_status()); + return writer.write(ev); +} + +bool MYSQL_BIN_LOG::append(Log_event *ev) +{ + bool res; + mysql_mutex_lock(&LOCK_log); + res= append_no_lock(ev); + mysql_mutex_unlock(&LOCK_log); + return res; +} + + +bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev) +{ + bool error = 0; + DBUG_ENTER("MYSQL_BIN_LOG::append"); + + mysql_mutex_assert_owner(&LOCK_log); + DBUG_ASSERT(log_file.type == SEQ_READ_APPEND); + + if (write_event(ev)) + { + error=1; + goto err; + } + bytes_written+= ev->data_written; + DBUG_PRINT("info",("max_size: %lu",max_size)); + if (flush_and_sync(0)) + goto err; + if (my_b_append_tell(&log_file) > max_size) + error= new_file_without_locking(); +err: + update_binlog_end_pos(); + DBUG_RETURN(error); +} + +bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len) +{ + bool error= 1; + uchar *ebuf= 0; + DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer"); + + DBUG_ASSERT(log_file.type == SEQ_READ_APPEND); + + mysql_mutex_assert_owner(&LOCK_log); + + if (crypto.scheme != 0) + { + DBUG_ASSERT(crypto.scheme == 1); + + uint elen; + uchar iv[BINLOG_IV_LENGTH]; + + ebuf= (uchar*)my_safe_alloca(len); + if (!ebuf) + goto err; + + crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file)); + + /* + we want to encrypt everything, excluding the event length: + massage the data before the encryption + */ + memcpy(buf + EVENT_LEN_OFFSET, buf, 4); + + if (encryption_crypt(buf + 4, len - 4, + ebuf + 4, &elen, + crypto.key, crypto.key_length, iv, sizeof(iv), + ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD, + ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version)) + goto err; + + DBUG_ASSERT(elen == len - 4); + + /* massage the data after the encryption */ + memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4); + int4store(ebuf + EVENT_LEN_OFFSET, len); + + buf= ebuf; + } + if (my_b_append(&log_file, buf, len)) + goto err; + bytes_written+= len; + + error= 0; + DBUG_PRINT("info",("max_size: %lu",max_size)); + if (flush_and_sync(0)) + goto err; + if (my_b_append_tell(&log_file) > max_size) + error= new_file_without_locking(); +err: + my_safe_afree(ebuf, len); + if (likely(!error)) + update_binlog_end_pos(); + DBUG_RETURN(error); +} + +bool MYSQL_BIN_LOG::flush_and_sync(bool *synced) +{ + int err=0, fd=log_file.file; + if (synced) + *synced= 0; + mysql_mutex_assert_owner(&LOCK_log); + if (flush_io_cache(&log_file)) + return 1; + uint sync_period= get_sync_period(); + if (sync_period && ++sync_counter >= sync_period) + { + sync_counter= 0; + err= mysql_file_sync(fd, MYF(MY_WME)); + if (synced) + *synced= 1; +#ifndef DBUG_OFF + if (opt_binlog_dbug_fsync_sleep > 0) + my_sleep(opt_binlog_dbug_fsync_sleep); +#endif + } + return err; +} + +void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param) +{ + DBUG_ASSERT(!thd->binlog_evt_union.do_union); + thd->binlog_evt_union.do_union= TRUE; + thd->binlog_evt_union.unioned_events= FALSE; + thd->binlog_evt_union.unioned_events_trans= FALSE; + thd->binlog_evt_union.first_query_id= query_id_param; +} + +void MYSQL_BIN_LOG::stop_union_events(THD *thd) +{ + DBUG_ASSERT(thd->binlog_evt_union.do_union); + thd->binlog_evt_union.do_union= FALSE; +} + +bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) +{ + return (thd->binlog_evt_union.do_union && + query_id_param >= thd->binlog_evt_union.first_query_id); +} + +/** + This function checks if a transactional table was updated by the + current transaction. + + @param thd The client thread that executed the current statement. + @return + @c true if a transactional table was updated, @c false otherwise. +*/ +bool +trans_has_updated_trans_table(const THD* thd) +{ + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0); +} + +/** + This function checks if a transactional table was updated by the + current statement. + + @param thd The client thread that executed the current statement. + @return + @c true if a transactional table with rollback was updated, + @c false otherwise. +*/ +bool +stmt_has_updated_trans_table(const THD *thd) +{ + Ha_trx_info *ha_info; + + for (ha_info= thd->transaction->stmt.ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && + !(ha_info->ht()->flags & HTON_NO_ROLLBACK)) + return (TRUE); + } + return (FALSE); +} + +/** + This function checks if either a trx-cache or a non-trx-cache should + be used. If @c bin_log_direct_non_trans_update is active or the format + is either MIXED or ROW, the cache to be used depends on the flag @c + is_transactional. + + On the other hand, if binlog_format is STMT or direct option is + OFF, the trx-cache should be used if and only if the statement is + transactional or the trx-cache is not empty. Otherwise, the + non-trx-cache should be used. + + @param thd The client thread. + @param is_transactional The changes are related to a trx-table. + @return + @c true if a trx-cache should be used, @c false otherwise. +*/ +bool use_trans_cache(const THD* thd, bool is_transactional) +{ + if (is_transactional) + return 1; + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + return ((thd->is_current_stmt_binlog_format_row() || + thd->variables.binlog_direct_non_trans_update) ? 0 : + !cache_mngr->trx_cache.empty()); +} + +/** + This function checks if a transaction, either a multi-statement + or a single statement transaction is about to commit or not. + + @param thd The client thread that executed the current statement. + @param all Committing a transaction (i.e. TRUE) or a statement + (i.e. FALSE). + @return + @c true if committing a transaction, otherwise @c false. +*/ +bool ending_trans(THD* thd, const bool all) +{ + return (all || ending_single_stmt_trans(thd, all)); +} + +/** + This function checks if a single statement transaction is about + to commit or not. + + @param thd The client thread that executed the current statement. + @param all Committing a transaction (i.e. TRUE) or a statement + (i.e. FALSE). + @return + @c true if committing a single statement transaction, otherwise + @c false. +*/ +bool ending_single_stmt_trans(THD* thd, const bool all) +{ + return (!all && !thd->in_multi_stmt_transaction_mode()); +} + +/** + This function checks if a non-transactional table was updated by + the current transaction. + + @param thd The client thread that executed the current statement. + @return + @c true if a non-transactional table was updated, @c false + otherwise. +*/ +bool trans_has_updated_non_trans_table(const THD* thd) +{ + return (thd->transaction->all.modified_non_trans_table || + thd->transaction->stmt.modified_non_trans_table); +} + +/** + This function checks if a non-transactional table was updated by the + current statement. + + @param thd The client thread that executed the current statement. + @return + @c true if a non-transactional table was updated, @c false otherwise. +*/ +bool stmt_has_updated_non_trans_table(const THD* thd) +{ + return (thd->transaction->stmt.modified_non_trans_table); +} + +/* + These functions are placed in this file since they need access to + binlog_hton, which has internal linkage. +*/ + +binlog_cache_mngr *THD::binlog_setup_trx_data() +{ + DBUG_ENTER("THD::binlog_setup_trx_data"); + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + + if (cache_mngr) + DBUG_RETURN(cache_mngr); // Already set up + + cache_mngr= (binlog_cache_mngr*) my_malloc(key_memory_binlog_cache_mngr, + sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL)); + if (!cache_mngr || + open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir, + LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) || + open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir, + LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME))) + { + my_free(cache_mngr); + DBUG_RETURN(0); // Didn't manage to set it up + } + thd_set_ha_data(this, binlog_hton, cache_mngr); + + cache_mngr= new (cache_mngr) + binlog_cache_mngr(max_binlog_stmt_cache_size, + max_binlog_cache_size, + &binlog_stmt_cache_use, + &binlog_stmt_cache_disk_use, + &binlog_cache_use, + &binlog_cache_disk_use); + DBUG_RETURN(cache_mngr); +} + + +/* + Two phase logged ALTER getter and setter methods. +*/ +uchar THD::get_binlog_flags_for_alter() +{ + return mysql_bin_log.is_open() ? binlog_setup_trx_data()->gtid_flags3 : 0; +} + +void THD::set_binlog_flags_for_alter(uchar flags) +{ + if (mysql_bin_log.is_open()) + { + // SA must find the flag set empty + DBUG_ASSERT(flags != Gtid_log_event::FL_START_ALTER_E1 || + binlog_setup_trx_data()->gtid_flags3 == 0); + + binlog_setup_trx_data()->gtid_flags3= flags; + } +} + +uint64 THD::get_binlog_start_alter_seq_no() +{ + return mysql_bin_log.is_open() ? binlog_setup_trx_data()->sa_seq_no : 0; +} + +void THD::set_binlog_start_alter_seq_no(uint64 s_no) +{ + if (mysql_bin_log.is_open()) + binlog_setup_trx_data()->sa_seq_no= s_no; +} + + +/* + Function to start a statement and optionally a transaction for the + binary log. + + SYNOPSIS + binlog_start_trans_and_stmt() + + DESCRIPTION + + This function does three things: + - Start a transaction if not in autocommit mode or if a BEGIN + statement has been seen. + + - Start a statement transaction to allow us to truncate the cache. + + - Save the current binlog position so that we can roll back the + statement by truncating the cache. + + We only update the saved position if the old one was undefined, + the reason is that there are some cases (e.g., for CREATE-SELECT) + where the position is saved twice (e.g., both in + select_create::prepare() and binlog_write_table_map()) , but + we should use the first. This means that calls to this function + can be used to start the statement before the first table map + event, to include some extra events. + */ + +void +THD::binlog_start_trans_and_stmt() +{ + binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + DBUG_ENTER("binlog_start_trans_and_stmt"); + DBUG_PRINT("enter", ("cache_mngr: %p cache_mngr->trx_cache.get_prev_position(): %lu", + cache_mngr, + (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() : + (ulong) 0))); + + if (cache_mngr == NULL || + cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF) + { + this->binlog_set_stmt_begin(); + bool mstmt_mode= in_multi_stmt_transaction_mode(); +#ifdef WITH_WSREP + /* + With wsrep binlog emulation we can skip the rest because the + binlog cache will not be written into binlog. Note however that + because of this the hton callbacks will not get called to clean + up the cache, so this must be done explicitly when the transaction + terminates. + */ + if (WSREP_EMULATE_BINLOG_NNULL(this)) + { + DBUG_VOID_RETURN; + } + /* If this event replicates through a master-slave then we need to + inject manually GTID so it is preserved in the cluster. We are writing + directly to WSREP buffer and not in IO cache because in case of IO cache + GTID event will be duplicated in binlog. + We have to do this only one time in mysql transaction. + Since this function is called multiple times , We will check for + ha_info->is_started(). + */ + Ha_trx_info *ha_info; + ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0); + + if (!ha_info->is_started() && + (this->variables.gtid_seq_no || this->variables.wsrep_gtid_seq_no) && + wsrep_on(this) && + (this->wsrep_cs().mode() == wsrep::client_state::m_local)) + { + uchar *buf= 0; + size_t len= 0; + IO_CACHE tmp_io_cache; + Log_event_writer writer(&tmp_io_cache, 0); + if(!open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX, + 128, MYF(MY_WME))) + { + uint64 seqno= this->variables.gtid_seq_no; + uint32 domain_id= this->variables.gtid_domain_id; + uint32 server_id= this->variables.server_id; + if (!this->variables.gtid_seq_no && this->variables.wsrep_gtid_seq_no) + { + seqno= this->variables.wsrep_gtid_seq_no; + domain_id= wsrep_gtid_server.domain_id; + server_id= wsrep_gtid_server.server_id; + } + Gtid_log_event gtid_event(this, seqno, domain_id, true, + LOG_EVENT_SUPPRESS_USE_F, true, 0); + // Replicated events in writeset doesn't have checksum + gtid_event.checksum_alg= BINLOG_CHECKSUM_ALG_OFF; + gtid_event.server_id= server_id; + writer.write(>id_event); + wsrep_write_cache_buf(&tmp_io_cache, &buf, &len); + if (len > 0) this->wsrep_cs().append_data(wsrep::const_buffer(buf, len)); + if (buf) my_free(buf); + close_cached_file(&tmp_io_cache); + } + } +#endif + if (mstmt_mode) + trans_register_ha(this, TRUE, binlog_hton, 0); + trans_register_ha(this, FALSE, binlog_hton, 0); + /* + Mark statement transaction as read/write. We never start + a binary log transaction and keep it read-only, + therefore it's best to mark the transaction read/write just + at the same time we start it. + Not necessary to mark the normal transaction read/write + since the statement-level flag will be propagated automatically + inside ha_commit_trans. + */ + ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write(); + } + DBUG_VOID_RETURN; +} + +void THD::binlog_set_stmt_begin() { + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + + /* + The call to binlog_trans_log_savepos() might create the cache_mngr + structure, if it didn't exist before, so we save the position + into an auto variable and then write it into the transaction + data for the binary log (i.e., cache_mngr). + */ + my_off_t pos= 0; + binlog_trans_log_savepos(this, &pos); + cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + cache_mngr->trx_cache.set_prev_position(pos); +} + +static int +binlog_start_consistent_snapshot(handlerton *hton, THD *thd) +{ + int err= 0; + DBUG_ENTER("binlog_start_consistent_snapshot"); + + binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); + + /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */ + mysql_mutex_assert_owner(&LOCK_commit_ordered); + strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file); + cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset; + + trans_register_ha(thd, TRUE, binlog_hton, 0); + + DBUG_RETURN(err); +} + + +/** + Prepare all tables that are updated for row logging + + Annotate events and table maps are written by binlog_write_table_maps() +*/ + +void THD::binlog_prepare_for_row_logging() +{ + DBUG_ENTER("THD::binlog_prepare_for_row_logging"); + for (TABLE *table= open_tables ; table; table= table->next) + { + if (table->query_id == query_id && table->current_lock == F_WRLCK) + table->file->prepare_for_row_logging(); + } + DBUG_VOID_RETURN; +} + +/** + Write annnotated row event (the query) if needed +*/ + +bool THD::binlog_write_annotated_row(Log_event_writer *writer) +{ + DBUG_ENTER("THD::binlog_write_annotated_row"); + + if (!(IF_WSREP(!wsrep_fragments_certified_for_stmt(this), true) && + variables.binlog_annotate_row_events && + query_length())) + DBUG_RETURN(0); + + Annotate_rows_log_event anno(this, 0, false); + DBUG_RETURN(writer->write(&anno)); +} + + +/** + Write table map events for all tables that are using row logging. + This includes all tables used by this statement, including tables + used in triggers. + + Also write annotate events and start transactions. + This is using the "tables_with_row_logging" list prepared by + THD::binlog_prepare_for_row_logging +*/ + +bool THD::binlog_write_table_maps() +{ + bool with_annotate; + MYSQL_LOCK *locks[2], **locks_end= locks; + DBUG_ENTER("THD::binlog_write_table_maps"); + + DBUG_ASSERT(!binlog_table_maps); + DBUG_ASSERT(is_current_stmt_binlog_format_row()); + + /* Initialize cache_mngr once per statement */ + binlog_start_trans_and_stmt(); + with_annotate= 1; // Write annotate with first map + + if ((*locks_end= extra_lock)) + locks_end++; + if ((*locks_end= lock)) + locks_end++; + + for (MYSQL_LOCK **cur_lock= locks ; cur_lock < locks_end ; cur_lock++) + { + TABLE **const end_ptr= (*cur_lock)->table + (*cur_lock)->table_count; + for (TABLE **table_ptr= (*cur_lock)->table; + table_ptr != end_ptr ; + ++table_ptr) + { + TABLE *table= *table_ptr; + bool restore= 0; + /* + We have to also write table maps for tables that have not yet been + used, like for tables in after triggers + */ + if (!table->file->row_logging && + table->query_id != query_id && table->current_lock == F_WRLCK) + { + if (table->file->prepare_for_row_logging()) + restore= 1; + } + if (table->file->row_logging) + { + if (binlog_write_table_map(table, with_annotate)) + DBUG_RETURN(1); + with_annotate= 0; + } + if (restore) + { + /* + Restore original setting so that it doesn't cause problem for the + next statement + */ + table->file->row_logging= table->file->row_logging_init= 0; + } + } + } + binlog_table_maps= 1; // Table maps written + DBUG_RETURN(0); +} + + +/** + This function writes a table map to the binary log. + + If an error occurs while writing events and rollback is not possible, e.g. + due to the statement modifying a non-transactional table, an incident event + is logged. + + @param table a pointer to the table. + @param with_annotate @c true to write an annotate event before writing + the table_map event, @c false otherwise. + @return + nonzero if an error pops up when writing the table map event. +*/ + +bool THD::binlog_write_table_map(TABLE *table, bool with_annotate) +{ + int error= 1; + bool is_transactional= table->file->row_logging_has_trans; + DBUG_ENTER("THD::binlog_write_table_map"); + DBUG_PRINT("enter", ("table: %p (%s: #%lu)", + table, table->s->table_name.str, + table->s->table_map_id)); + + /* Pre-conditions */ + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_transactional= 1; + + Table_map_log_event + the_event(this, table, table->s->table_map_id, is_transactional); + + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + binlog_cache_data *cache_data= (cache_mngr-> + get_binlog_cache_data(is_transactional)); + IO_CACHE *file= &cache_data->cache_log; + Log_event_writer writer(file, cache_data); + + if (with_annotate) + if (binlog_write_annotated_row(&writer)) + goto write_err; + + DBUG_EXECUTE_IF("table_map_write_error", + { + if (is_transactional) + { + my_errno= EFBIG; + goto write_err; + } + }); + + if (unlikely((error= writer.write(&the_event)))) + goto write_err; + + DBUG_RETURN(0); + +write_err: + mysql_bin_log.set_write_error(this, is_transactional); + /* + For non-transactional engine or multi statement transaction with mixed + engines, data is written to table but writing to binary log failed. In + these scenarios rollback is not possible. Hence report an incident. + */ + if (mysql_bin_log.check_write_error(this) && cache_data && + lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE) && + table->current_lock == F_WRLCK) + cache_data->set_incident(); + DBUG_RETURN(error); +} + + +/** + This function retrieves a pending row event from a cache which is + specified through the parameter @c is_transactional. Respectively, when it + is @c true, the pending event is returned from the transactional cache. + Otherwise from the non-transactional cache. + + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. + @return + The row event if any. +*/ +Rows_log_event* +THD::binlog_get_pending_rows_event(bool is_transactional) const +{ + Rows_log_event* rows= NULL; + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + + /* + This is less than ideal, but here's the story: If there is no cache_mngr, + prepare_pending_rows_event() has never been called (since the cache_mngr + is set up there). In that case, we just return NULL. + */ + if (cache_mngr) + { + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional)); + + rows= cache_data->pending(); + } + return (rows); +} + +/** + This function stores a pending row event into a cache which is specified + through the parameter @c is_transactional. Respectively, when it is @c + true, the pending event is stored into the transactional cache. Otherwise + into the non-transactional cache. + + @param evt a pointer to the row event. + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. +*/ +void +THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional) +{ + binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data(); + + DBUG_ASSERT(cache_mngr); + + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional)); + + cache_data->set_pending(ev); +} + + +/** + This function removes the pending rows event, discarding any outstanding + rows. If there is no pending rows event available, this is effectively a + no-op. + + @param thd a pointer to the user thread. + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. +*/ +int +MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional) +{ + DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event"); + + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + DBUG_ASSERT(cache_mngr); + + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional)); + + if (Rows_log_event* pending= cache_data->pending()) + { + delete pending; + cache_data->set_pending(NULL); + } + + DBUG_RETURN(0); +} + +/* + Moves the last bunch of rows from the pending Rows event to a cache (either + transactional cache if is_transaction is @c true, or the non-transactional + cache otherwise. Sets a new pending event. + + @param thd a pointer to the user thread. + @param evt a pointer to the row event. + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. +*/ +int +MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, + Rows_log_event* event, + bool is_transactional) +{ + DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); + DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()); + DBUG_PRINT("enter", ("event: %p", event)); + + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + DBUG_ASSERT(cache_mngr); + + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional)); + + DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending())); + + if (Rows_log_event* pending= cache_data->pending()) + { + Log_event_writer writer(&cache_data->cache_log, cache_data); + + /* + Write pending event to the cache. + */ + DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending", + {DBUG_SET("+d,simulate_file_write_error");}); + if (writer.write(pending)) + { + set_write_error(thd, is_transactional); + if (check_write_error(thd) && cache_data && + stmt_has_updated_non_trans_table(thd)) + cache_data->set_incident(); + delete pending; + cache_data->set_pending(NULL); + DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending", + {DBUG_SET("-d,simulate_file_write_error");}); + DBUG_RETURN(1); + } + + delete pending; + } + + thd->binlog_set_pending_rows_event(event, is_transactional); + + DBUG_RETURN(0); +} + + +/* Generate a new global transaction ID, and write it to the binlog */ + +bool +MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, + bool is_transactional, uint64 commit_id, + bool has_xid, bool is_ro_1pc) +{ + rpl_gtid gtid; + uint32 domain_id; + uint32 local_server_id; + uint64 seq_no; + int err; + DBUG_ENTER("write_gtid_event"); + DBUG_PRINT("enter", ("standalone: %d", standalone)); + + seq_no= thd->variables.gtid_seq_no; + domain_id= thd->variables.gtid_domain_id; + local_server_id= thd->variables.server_id; + + DBUG_ASSERT(local_server_id != 0); + + if (thd->variables.option_bits & OPTION_GTID_BEGIN) + { + DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. " + "Master and slave will have different GTID values")); + /* Reset the flag, as we will write out a GTID anyway */ + thd->variables.option_bits&= ~OPTION_GTID_BEGIN; + } + + /* + Reset the session variable gtid_seq_no, to reduce the risk of accidentally + producing a duplicate GTID. + */ + thd->variables.gtid_seq_no= 0; + if (seq_no != 0) + { + /* Use the specified sequence number. */ + gtid.domain_id= domain_id; + gtid.server_id= local_server_id; + gtid.seq_no= seq_no; + err= rpl_global_gtid_binlog_state.update(>id, opt_gtid_strict_mode); + if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER) + errno= ER_GTID_STRICT_OUT_OF_ORDER; + } + else + { + /* Allocate the next sequence number for the GTID. */ + err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id, + local_server_id, >id); + seq_no= gtid.seq_no; + } + if (err) + DBUG_RETURN(true); + + thd->set_last_commit_gtid(gtid); + if (thd->get_binlog_flags_for_alter() & Gtid_log_event::FL_START_ALTER_E1) + thd->set_binlog_start_alter_seq_no(gtid.seq_no); + + Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone, + LOG_EVENT_SUPPRESS_USE_F, is_transactional, + commit_id, has_xid, is_ro_1pc); + + /* Write the event to the binary log. */ + DBUG_ASSERT(this == &mysql_bin_log); + +#ifdef WITH_WSREP + if (wsrep_gtid_mode) + { + thd->variables.gtid_domain_id= global_system_variables.gtid_domain_id; + thd->variables.server_id= global_system_variables.server_id; + } +#endif + + if (write_event(>id_event)) + DBUG_RETURN(true); + status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written); + + DBUG_RETURN(false); +} + + +int +MYSQL_BIN_LOG::write_state_to_file() +{ + File file_no; + IO_CACHE cache; + char buf[FN_REFLEN]; + int err; + bool opened= false; + bool log_inited= false; + + fn_format(buf, opt_bin_logname, mysql_data_home, ".state", + MY_UNPACK_FILENAME); + if ((file_no= mysql_file_open(key_file_binlog_state, buf, + O_RDWR|O_CREAT|O_TRUNC|O_BINARY, + MYF(MY_WME))) < 0) + { + err= 1; + goto err; + } + opened= true; + if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0, + MYF(MY_WME|MY_WAIT_IF_FULL)))) + goto err; + log_inited= true; + if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache))) + goto err; + log_inited= false; + if ((err= end_io_cache(&cache))) + goto err; + if ((err= mysql_file_sync(file_no, MYF(MY_WME)))) + goto err; + goto end; + +err: + sql_print_error("Error writing binlog state to file '%s'.", buf); + if (log_inited) + end_io_cache(&cache); +end: + if (opened) + mysql_file_close(file_no, MYF(0)); + + return err; +} + + +/* + Initialize the binlog state from the master-bin.state file, at server startup. + + Returns: + 0 for success. + 2 for when .state file did not exist. + 1 for other error. +*/ +int +MYSQL_BIN_LOG::read_state_from_file() +{ + File file_no; + IO_CACHE cache; + char buf[FN_REFLEN]; + int err; + bool opened= false; + bool log_inited= false; + + fn_format(buf, opt_bin_logname, mysql_data_home, ".state", + MY_UNPACK_FILENAME); + if ((file_no= mysql_file_open(key_file_binlog_state, buf, + O_RDONLY|O_BINARY, MYF(0))) < 0) + { + if (my_errno != ENOENT) + { + err= 1; + goto err; + } + else + { + /* + If the state file does not exist, this is the first server startup + with GTID enabled. So initialize to empty state. + */ + rpl_global_gtid_binlog_state.reset(); + err= 2; + goto end; + } + } + opened= true; + if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0, + MYF(MY_WME|MY_WAIT_IF_FULL)))) + goto err; + log_inited= true; + if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache))) + goto err; + goto end; + +err: + sql_print_error("Error reading binlog GTID state from file '%s'.", buf); +end: + if (log_inited) + end_io_cache(&cache); + if (opened) + mysql_file_close(file_no, MYF(0)); + + return err; +} + + +int +MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size) +{ + return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size); +} + + +bool +MYSQL_BIN_LOG::append_state_pos(String *str) +{ + return rpl_global_gtid_binlog_state.append_pos(str); +} + + +bool +MYSQL_BIN_LOG::append_state(String *str) +{ + return rpl_global_gtid_binlog_state.append_state(str); +} + + +bool +MYSQL_BIN_LOG::is_empty_state() +{ + return (rpl_global_gtid_binlog_state.count() == 0); +} + + +bool +MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg, + rpl_gtid *out_gtid) +{ + rpl_gtid *gtid; + if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg))) + *out_gtid= *gtid; + return gtid != NULL; +} + + +bool +MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id, + rpl_gtid *out_gtid) +{ + rpl_gtid *found_gtid; + + if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id))) + { + *out_gtid= *found_gtid; + return true; + } + + return false; +} + + +int +MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no) +{ + return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no); +} + + +bool +MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id, + uint32 server_id_arg, + uint64 seq_no, + bool no_error) +{ + return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id, + server_id_arg, + seq_no, + no_error); +} + + +/** + Write an event to the binary log. If with_annotate != NULL and + *with_annotate = TRUE write also Annotate_rows before the event + (this should happen only if the event is a Table_map). +*/ + +bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) +{ + THD *thd= event_info->thd; + bool error= 1; + binlog_cache_data *cache_data= 0; + bool is_trans_cache= FALSE; + bool using_trans= event_info->use_trans_cache(); + bool direct= event_info->use_direct_logging(); + ulong UNINIT_VAR(prev_binlog_id); + DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)"); + + /* + When binary logging is not enabled (--log-bin=0), wsrep-patch partially + enables it without opening the binlog file (MYSQL_BIN_LOG::open(). + So, avoid writing to binlog file. + */ + if (direct && + (wsrep_emulate_bin_log || + (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG)))) + DBUG_RETURN(0); + + if (thd->variables.option_bits & + (OPTION_GTID_BEGIN | OPTION_BIN_COMMIT_OFF)) + { + DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set")); + /* Wait for commit from binary log before we commit */ + direct= 0; + using_trans= 1; + /* Set cache_type to ensure we don't get checksums for this event */ + event_info->cache_type= Log_event::EVENT_TRANSACTIONAL_CACHE; + } + + if (thd->binlog_evt_union.do_union) + { + /* + In Stored function; Remember that function call caused an update. + We will log the function call to the binary log on function exit + */ + thd->binlog_evt_union.unioned_events= TRUE; + thd->binlog_evt_union.unioned_events_trans |= using_trans; + DBUG_RETURN(0); + } + + /* + We only end the statement if we are in a top-level statement. If + we are inside a stored function, we do not end the statement since + this will close all tables on the slave. But there can be a special case + where we are inside a stored function/trigger and a SAVEPOINT is being + set in side the stored function/trigger. This SAVEPOINT execution will + force the pending event to be flushed without an STMT_END_F flag. This + will result in a case where following DMLs will be considered as part of + same statement and result in data loss on slave. Hence in this case we + force the end_stmt to be true. + */ + bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command == + SQLCOM_SAVEPOINT) ? true : + (thd->locked_tables_mode && thd->lex->requires_prelocking()); + if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans)) + DBUG_RETURN(error); + + /* + In most cases this is only called if 'is_open()' is true; in fact this is + mostly called if is_open() *was* true a few instructions before, but it + could have changed since. + */ + /* applier and replayer can skip writing binlog events */ + if ((WSREP_EMULATE_BINLOG(thd) && + IF_WSREP(thd->wsrep_cs().mode() == wsrep::client_state::m_local, 0)) || is_open()) + { + my_off_t UNINIT_VAR(my_org_b_tell); +#ifdef HAVE_REPLICATION + /* + In the future we need to add to the following if tests like + "do the involved tables match (to be implemented) + binlog_[wild_]{do|ignore}_table?" (WL#1049)" + */ + const char *local_db= event_info->get_db(); + + bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG); + + /* + Log all updates to binlog cache so that they can get replicated to other + nodes. A check has been added to stop them from getting logged into + binary log files. + */ + if (WSREP(thd)) + option_bin_log_flag= true; + + if ((!(option_bin_log_flag)) || + (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT && + thd->lex->sql_command != SQLCOM_SAVEPOINT && + !binlog_filter->db_ok(local_db))) + DBUG_RETURN(0); +#endif /* HAVE_REPLICATION */ + + IO_CACHE *file= NULL; + + if (direct) + { + /* We come here only for incident events */ + int res; + uint64 commit_id= 0; + MDL_request mdl_request; + DBUG_PRINT("info", ("direct is set")); + DBUG_ASSERT(!thd->backup_commit_lock); + + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(1); + thd->backup_commit_lock= &mdl_request; + + if ((res= thd->wait_for_prior_commit())) + { + if (mdl_request.ticket) + thd->mdl_context.release_lock(mdl_request.ticket); + thd->backup_commit_lock= 0; + DBUG_RETURN(res); + } + file= &log_file; + my_org_b_tell= my_b_tell(file); + mysql_mutex_lock(&LOCK_log); + prev_binlog_id= current_binlog_id; + DBUG_EXECUTE_IF("binlog_force_commit_id", + { + const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") }; + bool null_value; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, + (uchar*) commit_name.str, + commit_name.length); + commit_id= entry->val_int(&null_value); + }); + res= write_gtid_event(thd, true, using_trans, commit_id); + if (mdl_request.ticket) + thd->mdl_context.release_lock(mdl_request.ticket); + thd->backup_commit_lock= 0; + if (res) + goto err; + } + else + { + binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); + if (!cache_mngr) + goto err; + + is_trans_cache= use_trans_cache(thd, using_trans); + cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache); + file= &cache_data->cache_log; + + if (thd->lex->stmt_accessed_non_trans_temp_table() && is_trans_cache) + thd->transaction->stmt.mark_modified_non_trans_temp_table(); + thd->binlog_start_trans_and_stmt(); + } + DBUG_PRINT("info",("event type: %d",event_info->get_type_code())); + + /* + No check for auto events flag here - this write method should + never be called if auto-events are enabled. + + Write first log events which describe the 'run environment' + of the SQL command. If row-based binlogging, Insert_id, Rand + and other kind of "setting context" events are not needed. + */ + + if (with_annotate && *with_annotate) + { + DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT); + Annotate_rows_log_event anno(thd, using_trans, direct); + /* Annotate event should be written not more than once */ + *with_annotate= 0; + if (write_event(&anno, cache_data, file)) + goto err; + } + + { + if (!thd->is_current_stmt_binlog_format_row()) + { + if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, + thd->first_successful_insert_id_in_prev_stmt_for_binlog, + using_trans, direct); + if (write_event(&e, cache_data, file)) + goto err; + } + if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) + { + DBUG_PRINT("info",("number of auto_inc intervals: %u", + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + nb_elements())); + Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT, + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + minimum(), using_trans, direct); + if (write_event(&e, cache_data, file)) + goto err; + } + if (thd->used & THD::RAND_USED) + { + Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2, + using_trans, direct); + if (write_event(&e, cache_data, file)) + goto err; + } + if (thd->user_var_events.elements) + { + for (uint i= 0; i < thd->user_var_events.elements; i++) + { + BINLOG_USER_VAR_EVENT *user_var_event; + get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i); + + /* setting flags for user var log event */ + uchar flags= User_var_log_event::UNDEF_F; + if (user_var_event->unsigned_flag) + flags|= User_var_log_event::UNSIGNED_F; + + User_var_log_event e(thd, user_var_event->user_var_event->name.str, + user_var_event->user_var_event->name.length, + user_var_event->value, + user_var_event->length, + user_var_event->type, + user_var_event->charset_number, + flags, + using_trans, + direct); + if (write_event(&e, cache_data, file)) + goto err; + } + } + } + } + + /* + Write the event. + */ + if (write_event(event_info, cache_data, file) || + DBUG_IF("injecting_fault_writing")) + goto err; + + error= 0; +err: + if (direct) + { + my_off_t offset= my_b_tell(file); + bool check_purge= false; + DBUG_ASSERT(!is_relay_log); + + if (likely(!error)) + { + bool synced; + + if ((error= flush_and_sync(&synced))) + { + } + else + { + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); +#ifdef HAVE_REPLICATION + if (repl_semisync_master.report_binlog_update(thd, log_file_name, + file->pos_in_file)) + { + sql_print_error("Failed to run 'after_flush' hooks"); + error= 1; + } + else +#endif + { + /* + update binlog_end_pos so it can be read by dump thread + note: must be _after_ the RUN_HOOK(after_flush) or else + semi-sync might not have put the transaction into + it's list before dump-thread tries to send it + */ + update_binlog_end_pos(offset); + if (unlikely((error= rotate(false, &check_purge)))) + check_purge= false; + } + } + } + + status_var_add(thd->status_var.binlog_bytes_written, + offset - my_org_b_tell); + + mysql_mutex_lock(&LOCK_after_binlog_sync); + mysql_mutex_unlock(&LOCK_log); + + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); +#ifdef HAVE_REPLICATION + if (repl_semisync_master.wait_after_sync(log_file_name, + file->pos_in_file)) + { + error=1; + /* error is already printed inside hook */ + } +#endif + + /* + Take mutex to protect against a reader seeing partial writes of 64-bit + offset on 32-bit CPUs. + */ + mysql_mutex_lock(&LOCK_commit_ordered); + mysql_mutex_unlock(&LOCK_after_binlog_sync); + last_commit_pos_offset= offset; + mysql_mutex_unlock(&LOCK_commit_ordered); + + if (check_purge) + checkpoint_and_purge(prev_binlog_id); + } + + if (unlikely(error)) + { + set_write_error(thd, is_trans_cache); + if (check_write_error(thd) && cache_data && + stmt_has_updated_non_trans_table(thd)) + cache_data->set_incident(); + } + } + + DBUG_RETURN(error); +} + + +int error_log_print(enum loglevel level, const char *format, + va_list args) +{ + return logger.error_log_print(level, format, args); +} + + +bool slow_log_print(THD *thd, const char *query, uint query_length, + ulonglong current_utime) +{ + return logger.slow_log_print(thd, query, query_length, current_utime); +} + + +/** + Decide if we should log the command to general log + + @retval + FALSE No logging + TRUE Ok to log +*/ + +bool LOGGER::log_command(THD *thd, enum enum_server_command command) +{ + /* + Log command if we have at least one log event handler enabled and want + to log this king of commands + */ + if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command)))) + return FALSE; + + /* + If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then + OPTION_LOG_OFF is set. + Only the super user can set this bit. + */ + return !(thd->variables.option_bits & OPTION_LOG_OFF); +} + + +bool general_log_print(THD *thd, enum enum_server_command command, + const char *format, ...) +{ + va_list args; + uint error= 0; + + /* Print the message to the buffer if we want to log this kind of commands */ + if (! logger.log_command(thd, command)) + return FALSE; + + va_start(args, format); + error= logger.general_log_print(thd, command, format, args); + va_end(args); + + return error; +} + +bool general_log_write(THD *thd, enum enum_server_command command, + const char *query, size_t query_length) +{ + /* Write the message to the log if we want to log this king of commands */ + if (logger.log_command(thd, command) || mysql_audit_general_enabled()) + return logger.general_log_write(thd, command, query, query_length); + + return FALSE; +} + + +static void +binlog_checkpoint_callback(void *cookie) +{ + MYSQL_BIN_LOG::xid_count_per_binlog *entry= + (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie; + /* + For every supporting engine, we increment the xid_count and issue a + commit_checkpoint_request(). Then we can count when all + commit_checkpoint_notify() callbacks have occurred, and then log a new + binlog checkpoint event. + */ + mysql_bin_log.mark_xids_active(entry->binlog_id, 1); +} + + +/* + Request a commit checkpoint from each supporting engine. + This must be called after each binlog rotate, and after LOCK_log has been + released. The xid_count value in the xid_count_per_binlog entry was + incremented by 1 and will be decremented in this function; this ensures + that the entry will not go away early despite LOCK_log not being held. +*/ +void +MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id) +{ + xid_count_per_binlog *entry; + + /* + Find the binlog entry, and invoke commit_checkpoint_request() on it in + each supporting storage engine. + */ + mysql_mutex_lock(&LOCK_xid_list); + I_List_iterator it(binlog_xid_count_list); + do { + entry= it++; + DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */); + } while (entry->binlog_id != binlog_id); + mysql_mutex_unlock(&LOCK_xid_list); + + ha_commit_checkpoint_request(entry, binlog_checkpoint_callback); + /* + When we rotated the binlog, we incremented xid_count to make sure the + entry would not go away until this point, where we have done all necessary + commit_checkpoint_request() calls. + So now we can (and must) decrease the count - when it reaches zero, we + will know that both all pending unlog() and all pending + commit_checkpoint_notify() calls are done, and we can log a new binlog + checkpoint. + */ + mark_xid_done(binlog_id, true); +} + + +/** + The method executes rotation when LOCK_log is already acquired + by the caller. + + @param force_rotate caller can request the log rotation + @param check_purge is set to true if rotation took place + + @note + Caller _must_ check the check_purge variable. If this is set, it means + that the binlog was rotated, and caller _must_ ensure that + do_checkpoint_request() is called later with the binlog_id of the rotated + binlog file. The call to do_checkpoint_request() must happen after + LOCK_log is released (which is why we cannot simply do it here). + Usually, checkpoint_and_purge() is appropriate, as it will both handle + the checkpointing and any needed purging of old logs. + + @note + If rotation fails, for instance the server was unable + to create a new log file, we still try to write an + incident event to the current log. + + @retval + nonzero - error in rotating routine. +*/ +int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge) +{ + int error= 0; + DBUG_ENTER("MYSQL_BIN_LOG::rotate"); + +#ifdef WITH_WSREP + if (WSREP_ON && wsrep_to_isolation) + { + *check_purge= false; + WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d", + wsrep_to_isolation); + DBUG_RETURN(0); + } +#endif /* WITH_WSREP */ + + //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log); + *check_purge= false; + + if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size)) + { + ulong binlog_id= current_binlog_id; + /* + We rotate the binlog, so we need to start a commit checkpoint in all + supporting engines - when it finishes, we can log a new binlog checkpoint + event. + + But we cannot start the checkpoint here - there could be a group commit + still in progress which needs to be included in the checkpoint, and + besides we do not want to do the (possibly expensive) checkpoint while + LOCK_log is held. + + On the other hand, we must be sure that the xid_count entry for the + previous log does not go away until we start the checkpoint - which it + could do as it is no longer the most recent. So we increment xid_count + (to count the pending checkpoint request) - this will fix the entry in + place until we decrement again in do_checkpoint_request(). + */ + mark_xids_active(binlog_id, 1); + + if (unlikely((error= new_file_without_locking()))) + { + /** + Be conservative... There are possible lost events (eg, + failing to log the Execute_load_query_log_event + on a LOAD DATA while using a non-transactional + table)! + + We give it a shot and try to write an incident event anyway + to the current log. + */ + if (!write_incident_already_locked(current_thd)) + flush_and_sync(0); + + /* + We failed to rotate - so we have to decrement the xid_count back that + we incremented before attempting the rotate. + */ + mark_xid_done(binlog_id, false); + } + else + *check_purge= true; + } + DBUG_RETURN(error); +} + +/** + The method executes logs purging routine. + + @retval + nonzero - error in rotating routine. +*/ +void MYSQL_BIN_LOG::purge() +{ + mysql_mutex_assert_not_owner(&LOCK_log); +#ifdef HAVE_REPLICATION + if (binlog_expire_logs_seconds) + { + DEBUG_SYNC(current_thd, "at_purge_logs_before_date"); + time_t purge_time= my_time(0) - binlog_expire_logs_seconds; + DBUG_EXECUTE_IF("expire_logs_always", { purge_time = my_time(0); }); + if (purge_time >= 0) + { + purge_logs_before_date(purge_time); + } + DEBUG_SYNC(current_thd, "after_purge_logs_before_date"); + } +#endif +} + + +void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id) +{ + do_checkpoint_request(binlog_id); + purge(); +} + + +/** + Searches for the first (oldest) binlog file name in in the binlog index. + + @param[in,out] buf_arg pointer to a buffer to hold found + the first binary log file name + @return NULL on success, otherwise error message +*/ +static const char* get_first_binlog(char* buf_arg) +{ + IO_CACHE *index_file; + size_t length; + char fname[FN_REFLEN]; + const char* errmsg= NULL; + + DBUG_ENTER("get_first_binlog"); + + DBUG_ASSERT(mysql_bin_log.is_open()); + + mysql_bin_log.lock_index(); + + index_file=mysql_bin_log.get_index_file(); + if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0)) + { + errmsg= "failed to create a cache on binlog index"; + goto end; + } + /* The file ends with EOF or empty line */ + if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1) + { + errmsg= "empty binlog index"; + goto end; + } + else + { + fname[length-1]= 0; // Remove end \n + } + if (normalize_binlog_name(buf_arg, fname, false)) + { + errmsg= "could not normalize the first file name in the binlog index"; + goto end; + } +end: + mysql_bin_log.unlock_index(); + + DBUG_RETURN(errmsg); +} + +/** + Check weather the gtid binlog state can safely remove gtid + domains passed as the argument. A safety condition is satisfied when + there are no events from the being deleted domains in the currently existing + binlog files. Upon successful check the supplied domains are removed + from @@gtid_binlog_state. The caller is supposed to rotate binlog so that + the active latest file won't have the deleted domains in its Gtid_list header. + + @param domain_drop_lex gtid domain id sequence from lex. + Passed as a pointer to dynamic array must be not empty + unless pointer value NULL. + @retval zero on success + @retval > 0 ineffective call none from the *non* empty + gtid domain sequence is deleted + @retval < 0 on error +*/ +static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex) +{ + int rc= 0; + Gtid_list_log_event *glev= NULL; + char buf[FN_REFLEN]; + File file; + IO_CACHE cache; + const char* errmsg= NULL; + char errbuf[MYSQL_ERRMSG_SIZE]= {0}; + + if (!domain_drop_lex) + return 0; // still "effective" having empty domain sequence to delete + + DBUG_ASSERT(domain_drop_lex->elements > 0); + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + + if ((errmsg= get_first_binlog(buf)) != NULL) + goto end; + bzero((char*) &cache, sizeof(cache)); + if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1) + goto end; + errmsg= get_gtid_list_event(&cache, &glev); + end_io_cache(&cache); + mysql_file_close(file, MYF(MY_WME)); + + DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error", + errmsg= "injected error";); + if (errmsg) + goto end; + errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex, + glev, errbuf); + +end: + if (errmsg) + { + if (strlen(errmsg) > 0) + { + my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg); + rc= -1; + } + else + { + rc= 1; + } + } + delete glev; + + return rc; +} + +/** + The method is a shortcut of @c rotate() and @c purge(). + LOCK_log is acquired prior to rotate and is released after it. + + @param force_rotate caller can request the log rotation + + @retval + nonzero - error in rotating routine. +*/ +int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate, + DYNAMIC_ARRAY *domain_drop_lex) +{ + int err_gtid=0, error= 0; + ulong prev_binlog_id; + DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge"); + bool check_purge= false; + + mysql_mutex_lock(&LOCK_log); + + DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log"); + + prev_binlog_id= current_binlog_id; + + if ((err_gtid= do_delete_gtid_domain(domain_drop_lex))) + { + // inffective attempt to delete merely skips rotate and purge + if (err_gtid < 0) + error= 1; // otherwise error is propagated the user + } + else if (unlikely((error= rotate(force_rotate, &check_purge)))) + check_purge= false; + + DEBUG_SYNC(current_thd, "rotate_after_rotate"); + + /* + NOTE: Run purge_logs wo/ holding LOCK_log because it does not need + the mutex. Otherwise causes various deadlocks. + Explicit binlog rotation must be synchronized with a concurrent + binlog ordered commit, in particular not let binlog + checkpoint notification request until early binlogged + concurrent commits have has been completed. + */ + mysql_mutex_lock(&LOCK_after_binlog_sync); + mysql_mutex_unlock(&LOCK_log); + mysql_mutex_lock(&LOCK_commit_ordered); + mysql_mutex_unlock(&LOCK_after_binlog_sync); + mysql_mutex_unlock(&LOCK_commit_ordered); + + if (check_purge) + checkpoint_and_purge(prev_binlog_id); + + DBUG_RETURN(error); +} + +uint MYSQL_BIN_LOG::next_file_id() +{ + uint res; + mysql_mutex_lock(&LOCK_log); + res = file_id++; + mysql_mutex_unlock(&LOCK_log); + return res; +} + +class CacheWriter: public Log_event_writer +{ +public: + size_t remains; + + CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum, + Binlog_crypt_data *cr) + : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg), + first(true) + { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; } + + ~CacheWriter() + { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); } + + int write(uchar* pos, size_t len) + { + DBUG_ENTER("CacheWriter::write"); + if (first) + write_header(pos, len); + else + write_data(pos, len); + + remains -= len; + if ((first= !remains)) + write_footer(); + DBUG_RETURN(0); + } +private: + THD *thd; + bool first; +}; + +/* + Write the contents of a cache to the binary log. + + SYNOPSIS + write_cache() + thd Current_thread + cache Cache to write to the binary log + + DESCRIPTION + Write the contents of the cache to the binary log. The cache will + be reset as a READ_CACHE to be able to read the contents from it. + + Reading from the trans cache with possible (per @c binlog_checksum_options) + adding checksum value and then fixing the length and the end_log_pos of + events prior to fill in the binlog cache. +*/ + +int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache) +{ + DBUG_ENTER("MYSQL_BIN_LOG::write_cache"); + + mysql_mutex_assert_owner(&LOCK_log); + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs; + size_t val; + size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t + uchar header[LOG_EVENT_HEADER_LEN]; + CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto); + + if (crypto.scheme) + { + writer.ctx= alloca(crypto.ctx_size); + writer.set_encrypted_writer(); + } + // while there is just one alg the following must hold: + DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF || + binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32); + + /* + The events in the buffer have incorrect end_log_pos data + (relative to beginning of group rather than absolute), + so we'll recalculate them in situ so the binlog is always + correct, even in the middle of a group. This is possible + because we now know the start position of the group (the + offset of this cache in the log, if you will); all we need + to do is to find all event-headers, and add the position of + the group to the end_log_pos of each event. This is pretty + straight forward, except that we read the cache in segments, + so an event-header might end up on the cache-border and get + split. + */ + + group= (size_t)my_b_tell(&log_file); + hdr_offs= carry= 0; + + do + { + /* + if we only got a partial header in the last iteration, + get the other half now and process a full header. + */ + if (unlikely(carry > 0)) + { + DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN); + size_t tail= LOG_EVENT_HEADER_LEN - carry; + + /* assemble both halves */ + memcpy(&header[carry], (char *)cache->read_pos, tail); + + uint32 len= uint4korr(header + EVENT_LEN_OFFSET); + writer.remains= len; + + /* fix end_log_pos */ + end_log_pos_inc += writer.checksum_len; + val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc; + int4store(header + LOG_POS_OFFSET, val); + + /* fix len */ + len+= writer.checksum_len; + int4store(header + EVENT_LEN_OFFSET, len); + + if (writer.write(header, LOG_EVENT_HEADER_LEN)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + + cache->read_pos+= tail; + length-= tail; + carry= 0; + + /* next event header at ... */ + hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len; + } + + /* if there is anything to write, process it. */ + + if (likely(length > 0)) + { + DBUG_EXECUTE_IF("fail_binlog_write_1", + errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE);); + /* + process all event-headers in this (partial) cache. + if next header is beyond current read-buffer, + we'll get it later (though not necessarily in the + very next iteration, just "eventually"). + */ + + if (hdr_offs >= length) + { + if (writer.write(cache->read_pos, length)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + + while (hdr_offs < length) + { + /* + finish off with remains of the last event that crawls + from previous into the current buffer + */ + if (writer.remains != 0) + { + if (writer.write(cache->read_pos, hdr_offs)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + + /* + partial header only? save what we can get, process once + we get the rest. + */ + if (hdr_offs + LOG_EVENT_HEADER_LEN > length) + { + carry= length - hdr_offs; + memcpy(header, (char *)cache->read_pos + hdr_offs, carry); + length= hdr_offs; + } + else + { + /* we've got a full event-header, and it came in one piece */ + uchar *ev= (uchar *)cache->read_pos + hdr_offs; + uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len + uchar *log_pos= ev + LOG_POS_OFFSET; + + end_log_pos_inc += writer.checksum_len; + /* fix end_log_pos */ + val= uint4korr(log_pos) + group + end_log_pos_inc; + int4store(log_pos, val); + + /* fix length */ + int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len); + + writer.remains= ev_len; + if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs))) + DBUG_RETURN(ER_ERROR_ON_WRITE); + + /* next event header at ... */ + hdr_offs += ev_len; // incr by the netto len + + DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length); + } + } + + /* + Adjust hdr_offs. Note that it may still point beyond the segment + read in the next iteration; if the current event is very long, + it may take a couple of read-iterations (and subsequent adjustments + of hdr_offs) for it to point into the then-current segment. + If we have a split header (!carry), hdr_offs will be set at the + beginning of the next iteration, overwriting the value we set here: + */ + hdr_offs -= length; + } + } while ((length= my_b_fill(cache))); + + DBUG_ASSERT(carry == 0); + DBUG_ASSERT(!writer.checksum_len || writer.remains == 0); + + DBUG_RETURN(0); // All OK +} + +/* + Helper function to get the error code of the query to be binlogged. + */ +int query_error_code(THD *thd, bool not_killed) +{ + int error; + + if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA)) + { + error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0; + if (!error) + return error; + + /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or + ER_QUERY_INTERRUPTED, So here we need to make sure that error + is not set to these errors when specified not_killed by the + caller. + */ + if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED || + error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED) + error= 0; + } + else + { + /* killed status for DELAYED INSERT thread should never be used */ + DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT)); + error= thd->killed_errno(); + } + + return error; +} + + +bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd) +{ + uint error= 0; + DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked"); + Incident incident= INCIDENT_LOST_EVENTS; + Incident_log_event ev(thd, incident, &write_error_msg); + + if (likely(is_open())) + { + error= write_event(&ev); + status_var_add(thd->status_var.binlog_bytes_written, ev.data_written); + } + + DBUG_RETURN(error); +} + + +bool MYSQL_BIN_LOG::write_incident(THD *thd) +{ + uint error= 0; + my_off_t offset; + bool check_purge= false; + ulong prev_binlog_id; + DBUG_ENTER("MYSQL_BIN_LOG::write_incident"); + + mysql_mutex_lock(&LOCK_log); + if (likely(is_open())) + { + prev_binlog_id= current_binlog_id; + if (likely(!(error= DBUG_IF("incident_event_write_error") + ? 1 + : write_incident_already_locked(thd))) && + likely(!(error= flush_and_sync(0)))) + { + update_binlog_end_pos(); + if (unlikely((error= rotate(false, &check_purge)))) + check_purge= false; + } + + offset= my_b_tell(&log_file); + + update_binlog_end_pos(offset); + + /* + Take mutex to protect against a reader seeing partial writes of 64-bit + offset on 32-bit CPUs. + */ + mysql_mutex_lock(&LOCK_commit_ordered); + last_commit_pos_offset= offset; + mysql_mutex_unlock(&LOCK_commit_ordered); + mysql_mutex_unlock(&LOCK_log); + + if (check_purge) + checkpoint_and_purge(prev_binlog_id); + } + else + { + mysql_mutex_unlock(&LOCK_log); + } + + /* + Upon writing incident event, check for thd->error() and print the + relevant error message in the error log. + */ + if (thd->is_error()) + { + sql_print_error("Write to binary log failed: " + "%s. An incident event is written to binary log " + "and slave will be stopped.\n", + thd->get_stmt_da()->message()); + } + if (error) + { + sql_print_error("Incident event write to the binary log file failed."); + } + + DBUG_RETURN(error); +} + +void +MYSQL_BIN_LOG:: +write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len) +{ + my_off_t offset; + Binlog_checkpoint_log_event ev(name_arg, len); + /* + Note that we must sync the binlog checkpoint to disk. + Otherwise a subsequent log purge could delete binlogs that XA recovery + thinks are needed (even though they are not really). + */ + if (!write_event(&ev) && !flush_and_sync(0)) + { + update_binlog_end_pos(); + } + else + { + /* + If we fail to write the checkpoint event, something is probably really + bad with the binlog. We complain in the error log. + + Note that failure to write binlog checkpoint does not compromise the + ability to do crash recovery - crash recovery will just have to scan a + bit more of the binlog than strictly necessary. + */ + sql_print_error("Failed to write binlog checkpoint event to binary log"); + } + + offset= my_b_tell(&log_file); + + update_binlog_end_pos(offset); + + /* + Take mutex to protect against a reader seeing partial writes of 64-bit + offset on 32-bit CPUs. + */ + mysql_mutex_lock(&LOCK_commit_ordered); + last_commit_pos_offset= offset; + mysql_mutex_unlock(&LOCK_commit_ordered); +} + + +/** + Write a cached log entry to the binary log. + - To support transaction over replication, we wrap the transaction + with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log. + We want to write a BEGIN/ROLLBACK block when a non-transactional table + was updated in a transaction which was rolled back. This is to ensure + that the same updates are run on the slave. + + @param thd + @param cache The cache to copy to the binlog + @param commit_event The commit event to print after writing the + contents of the cache. + @param incident Defines if an incident event should be created to + notify that some non-transactional changes did + not get into the binlog. + + @note + We only come here if there is something in the cache. + @note + The thing in the cache is always a complete transaction. + @note + 'cache' needs to be reinitialized after this functions returns. +*/ + +bool +MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, + binlog_cache_mngr *cache_mngr, + Log_event *end_ev, bool all, + bool using_stmt_cache, + bool using_trx_cache, + bool is_ro_1pc) +{ + group_commit_entry entry; + Ha_trx_info *ha_info; + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); + + /* + Control should not be allowed beyond this point in wsrep_emulate_bin_log + mode. Also, do not write the cached updates to binlog if binary logging is + disabled (log-bin/sql_log_bin). + */ + if (wsrep_emulate_bin_log) + { + DBUG_RETURN(0); + } + else if (!(thd->variables.option_bits & OPTION_BIN_LOG)) + { + cache_mngr->need_unlog= false; + DBUG_RETURN(0); + } + + entry.thd= thd; + entry.cache_mngr= cache_mngr; + entry.error= 0; + entry.all= all; + entry.using_stmt_cache= using_stmt_cache; + entry.using_trx_cache= using_trx_cache; + entry.need_unlog= is_preparing_xa(thd); + ha_info= all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list; + entry.ro_1pc= is_ro_1pc; + entry.end_event= end_ev; + auto has_xid= entry.end_event->get_type_code() == XID_EVENT; + + for (; has_xid && !entry.need_unlog && ha_info; ha_info= ha_info->next()) + { + if (ha_info->is_started() && ha_info->ht() != binlog_hton && + !ha_info->ht()->commit_checkpoint_request) + entry.need_unlog= true; + } + + if (cache_mngr->stmt_cache.has_incident() || + cache_mngr->trx_cache.has_incident()) + { + Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg); + entry.incident_event= &inc_ev; + DBUG_RETURN(write_transaction_to_binlog_events(&entry)); + } + else + { + entry.incident_event= NULL; + DBUG_RETURN(write_transaction_to_binlog_events(&entry)); + } +} + + +/* + Put a transaction that is ready to commit in the group commit queue. + The transaction is identified by the ENTRY object passed into this function. + + To facilitate group commit for the binlog, we first queue up ourselves in + this function. Then later the first thread to enter the queue waits for + the LOCK_log mutex, and commits for everyone in the queue once it gets the + lock. Any other threads in the queue just wait for the first one to finish + the commit and wake them up. This way, all transactions in the queue get + committed in a single disk operation. + + The main work in this function is when the commit in one transaction has + been marked to wait for the commit of another transaction to happen + first. This is used to support in-order parallel replication, where + transactions can execute out-of-order but need to be committed in-order with + how they happened on the master. The waiting of one commit on another needs + to be integrated with the group commit queue, to ensure that the waiting + transaction can participate in the same group commit as the waited-for + transaction. + + So when we put a transaction in the queue, we check if there were other + transactions already prepared to commit but just waiting for the first one + to commit. If so, we add those to the queue as well, transitively for all + waiters. + + And if a transaction is marked to wait for a prior transaction, but that + prior transaction is already queued for group commit, then we can queue the + new transaction directly to participate in the group commit. + + @retval < 0 Error + @retval -2 WSREP error with commit ordering + @retval -3 WSREP return code to mark the leader + @retval > 0 If queued as the first entry in the queue (meaning this + is the leader) + @retval 0 Otherwise (queued as participant, leader handles the commit) +*/ + +int +MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) +{ + group_commit_entry *entry, *orig_queue, *last; + wait_for_commit *cur; + wait_for_commit *wfc; + bool backup_lock_released= 0; + int result= 0; + THD *thd= orig_entry->thd; + DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit"); + DBUG_ASSERT(thd == current_thd); + + /* + Check if we need to wait for another transaction to commit before us. + + It is safe to do a quick check without lock first in the case where we do + not have to wait. But if the quick check shows we need to wait, we must do + another safe check under lock, to avoid the race where the other + transaction wakes us up between the check and the wait. + */ + wfc= orig_entry->thd->wait_for_commit_ptr; + orig_entry->queued_by_other= false; + if (wfc && wfc->waitee.load(std::memory_order_acquire)) + { + wait_for_commit *loc_waitee; + + mysql_mutex_lock(&wfc->LOCK_wait_commit); + /* + Do an extra check here, this time safely under lock. + + If waitee->commit_started is set, it means that the transaction we need + to wait for has already queued up for group commit. In this case it is + safe for us to queue up immediately as well, increasing the opprtunities + for group commit. Because waitee has taken the LOCK_prepare_ordered + before setting the flag, so there is no risk that we can queue ahead of + it. + */ + if ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) && + !loc_waitee->commit_started) + { + PSI_stage_info old_stage; + + /* + Release MDL_BACKUP_COMMIT LOCK while waiting for other threads to + commit. + This is needed to avoid deadlock between the other threads (which not + yet have the MDL_BACKUP_COMMIT_LOCK) and any threads using + BACKUP LOCK BLOCK_COMMIT. + */ + if (thd->backup_commit_lock && thd->backup_commit_lock->ticket && + !backup_lock_released) + { + backup_lock_released= 1; + thd->mdl_context.release_lock(thd->backup_commit_lock->ticket); + thd->backup_commit_lock->ticket= 0; + } + + /* + By setting wfc->opaque_pointer to our own entry, we mark that we are + ready to commit, but waiting for another transaction to commit before + us. + + This other transaction may then take over the commit process for us to + get us included in its own group commit. If this happens, the + queued_by_other flag is set. + + Setting this flag may or may not be seen by the other thread, but we + are safe in any case: The other thread will set queued_by_other under + its LOCK_wait_commit, and we will not check queued_by_other until after + we have been woken up. + */ + wfc->opaque_pointer= orig_entry; + DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior"); + orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit, + &wfc->LOCK_wait_commit, + &stage_waiting_for_prior_transaction_to_commit, + &old_stage); + while ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) && + !orig_entry->thd->check_killed(1)) + mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit); + wfc->opaque_pointer= NULL; + DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d", + orig_entry->queued_by_other)); + + if (loc_waitee) + { + /* Wait terminated due to kill. */ + mysql_mutex_lock(&loc_waitee->LOCK_wait_commit); + if (loc_waitee->wakeup_subsequent_commits_running || + orig_entry->queued_by_other) + { + /* Our waitee is already waking us up, so ignore the kill. */ + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + do + { + mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit); + } while (wfc->waitee.load(std::memory_order_relaxed)); + } + else + { + /* We were killed, so remove us from the list of waitee. */ + wfc->remove_from_list(&loc_waitee->subsequent_commits_list); + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + /* + This is the thread clearing its own status, it is no longer on + the list of waiters. So no memory barriers are needed here. + */ + wfc->waitee.store(NULL, std::memory_order_relaxed); + + orig_entry->thd->EXIT_COND(&old_stage); + /* Interrupted by kill. */ + DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed"); + wfc->wakeup_error= orig_entry->thd->killed_errno(); + if (!wfc->wakeup_error) + wfc->wakeup_error= ER_QUERY_INTERRUPTED; + my_message(wfc->wakeup_error, + ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0)); + result= -1; + goto end; + } + } + orig_entry->thd->EXIT_COND(&old_stage); + } + else + mysql_mutex_unlock(&wfc->LOCK_wait_commit); + } + /* + If the transaction we were waiting for has already put us into the group + commit queue (and possibly already done the entire binlog commit for us), + then there is nothing else to do. + */ + if (orig_entry->queued_by_other) + goto end; + + if (wfc && wfc->wakeup_error) + { + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + result= -1; + goto end; + } + + /* Now enqueue ourselves in the group commit queue. */ + DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue"); + orig_entry->thd->clear_wakeup_ready(); + mysql_mutex_lock(&LOCK_prepare_ordered); + orig_queue= group_commit_queue; + + /* + Iteratively process everything added to the queue, looking for waiters, + and their waiters, and so on. If a waiter is ready to commit, we + immediately add it to the queue, and mark it as queued_by_other. + + This would be natural to do with recursion, but we want to avoid + potentially unbounded recursion blowing the C stack, so we use the list + approach instead. + + We keep a list of the group_commit_entry of all the waiters that need to + be processed. Initially this list contains only the entry passed into this + function. + + We process entries in the list one by one. The element currently being + processed is pointed to by `entry`, and the element at the end of the list + is pointed to by `last` (we do not use NULL to terminate the list). + + As we process an entry, any waiters for that entry are added at the end of + the list, to be processed in subsequent iterations. Then the entry is added + to the group_commit_queue. This continues until the list is exhausted, + with all entries ever added eventually processed. + + The end result is a breath-first traversal of the tree of waiters, + re-using the `next' pointers of the group_commit_entry objects in place of + extra stack space in a recursive traversal. + + The temporary list linked through these `next' pointers is not used by the + caller or any other function; it only exists while doing the iterative + tree traversal. After, all the processed entries are linked into the + group_commit_queue. + */ + + cur= wfc; + last= orig_entry; + entry= orig_entry; + for (;;) + { + group_commit_entry *next_entry; + + if (entry->cache_mngr->using_xa) + { + DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered"); + run_prepare_ordered(entry->thd, entry->all); + DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered"); + } + + if (cur) + { + /* + Now that we have taken LOCK_prepare_ordered and will queue up in the + group commit queue, it is safe for following transactions to queue + themselves. We will grab here any transaction that is now ready to + queue up, but after that, more transactions may become ready while the + leader is waiting to start the group commit. So set the flag + `commit_started', so that later transactions can still participate in + the group commit.. + */ + cur->commit_started= true; + + /* + Check if this transaction has other transaction waiting for it to + commit. + + If so, process the waiting transactions, and their waiters and so on, + transitively. + */ + if (cur->subsequent_commits_list) + { + wait_for_commit *waiter, **waiter_ptr; + + mysql_mutex_lock(&cur->LOCK_wait_commit); + /* + Grab the list, now safely under lock, and process it if still + non-empty. + */ + waiter= cur->subsequent_commits_list; + waiter_ptr= &cur->subsequent_commits_list; + while (waiter) + { + wait_for_commit *next_waiter= waiter->next_subsequent_commit; + group_commit_entry *entry2= + (group_commit_entry *)waiter->opaque_pointer; + if (entry2) + { + /* + This is another transaction ready to be written to the binary + log. We can put it into the queue directly, without needing a + separate context switch to the other thread. We just set a flag + so that the other thread will know when it wakes up that it was + already processed. + + So remove it from the list of our waiters, and instead put it at + the end of the list to be processed in a subsequent iteration of + the outer loop. + */ + *waiter_ptr= next_waiter; + entry2->queued_by_other= true; + last->next= entry2; + last= entry2; + /* + As a small optimisation, we do not actually need to set + entry2->next to NULL, as we can use the pointer `last' to check + for end-of-list. + */ + } + else + { + /* + This transaction is not ready to participate in the group commit + yet, so leave it in the waiter list. It might join the group + commit later, if it completes soon enough to do so (it will see + our wfc->commit_started flag set), or it might commit later in a + later group commit. + */ + waiter_ptr= &waiter->next_subsequent_commit; + } + waiter= next_waiter; + } + mysql_mutex_unlock(&cur->LOCK_wait_commit); + } + } + + /* + Handle the heuristics that if another transaction is waiting for this + transaction (or if it does so later), then we want to trigger group + commit immediately, without waiting for the binlog_commit_wait_usec + timeout to expire. + */ + entry->thd->waiting_on_group_commit= true; + + /* Add the entry to the group commit queue. */ + next_entry= entry->next; + entry->next= group_commit_queue; + group_commit_queue= entry; + if (entry == last) + break; + /* + Move to the next entry in the flattened list of waiting transactions + that still need to be processed transitively. + */ + entry= next_entry; + DBUG_ASSERT(entry != NULL); + cur= entry->thd->wait_for_commit_ptr; + } + + result= orig_queue == NULL; + +#ifdef WITH_WSREP + if (wsrep_is_active(entry->thd) && + wsrep_run_commit_hook(entry->thd, entry->all)) + { + /* Release commit order here */ + if (wsrep_ordered_commit(entry->thd, entry->all)) + result= -2; + + /* return -3, if this is leader */ + if (orig_queue == NULL) + result= -3; + } +#endif /* WITH_WSREP */ + + if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL) + mysql_cond_signal(&COND_prepare_ordered); + mysql_mutex_unlock(&LOCK_prepare_ordered); + DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered"); + + DBUG_PRINT("info", ("Queued for group commit as %s", + (orig_queue == NULL) ? "leader" : "participant")); + +end: + if (backup_lock_released) + thd->mdl_context.acquire_lock(thd->backup_commit_lock, + thd->variables.lock_wait_timeout); + DBUG_RETURN(result); +} + +bool +MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) +{ + int is_leader= queue_for_group_commit(entry); +#ifdef WITH_WSREP + /* commit order was released in queue_for_group_commit() call, + here we check if wsrep_commit_ordered() failed or if we are leader */ + switch (is_leader) + { + case -2: /* wsrep_ordered_commit() has failed */ + DBUG_ASSERT(wsrep_is_active(entry->thd)); + DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all)); + entry->thd->wakeup_subsequent_commits(1); + return true; + case -3: /* this is leader, wait for prior commit to + complete. This establishes total order for group leaders + */ + DBUG_ASSERT(wsrep_is_active(entry->thd)); + DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all)); + if (entry->thd->wait_for_prior_commit()) + return true; + + /* retain the correct is_leader value */ + is_leader= 1; + break; + + default: /* native MariaDB cases */ + break; + } +#endif /* WITH_WSREP */ + + /* + The first in the queue handles group commit for all; the others just wait + to be signalled when group commit is done. + */ + if (is_leader < 0) + return true; /* Error */ + else if (is_leader) + trx_group_commit_leader(entry); + else if (!entry->queued_by_other) + { + DEBUG_SYNC(entry->thd, "after_semisync_queue"); + + entry->thd->wait_for_wakeup_ready(); + } + else + { + /* + If we were queued by another prior commit, then we are woken up + only when the leader has already completed the commit for us. + So nothing to do here then. + */ + } + + if (!opt_optimize_thread_scheduling) + { + /* For the leader, trx_group_commit_leader() already took the lock. */ + if (!is_leader) + mysql_mutex_lock(&LOCK_commit_ordered); + + DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered"); + ++num_commits; + if (entry->cache_mngr->using_xa && !entry->error) + run_commit_ordered(entry->thd, entry->all); + + group_commit_entry *next= entry->next; + if (!next) + { + group_commit_queue_busy= FALSE; + mysql_cond_signal(&COND_queue_busy); + DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered"); + } + mysql_mutex_unlock(&LOCK_commit_ordered); + entry->thd->wakeup_subsequent_commits(entry->error); + + if (next) + { + /* + Wake up the next thread in the group commit. + + The next thread can be waiting in two different ways, depending on + whether it put itself in the queue, or if it was put in queue by us + because it had to wait for us to commit first. + + So execute the appropriate wakeup, identified by the queued_by_other + field. + */ + if (next->queued_by_other) + next->thd->wait_for_commit_ptr->wakeup(entry->error); + else + next->thd->signal_wakeup_ready(); + } + else + { + /* + If we rotated the binlog, and if we are using the unoptimized thread + scheduling where every thread runs its own commit_ordered(), then we + must do the commit checkpoint and log purge here, after all + commit_ordered() calls have finished, and locks have been released. + */ + if (entry->check_purge) + checkpoint_and_purge(entry->binlog_id); + } + + } + + if (likely(!entry->error)) + return entry->thd->wait_for_prior_commit(); + + switch (entry->error) + { + case ER_ERROR_ON_WRITE: + my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, entry->commit_errno); + break; + case ER_ERROR_ON_READ: + my_error(ER_ERROR_ON_READ, MYF(ME_ERROR_LOG), + entry->error_cache->file_name, entry->commit_errno); + break; + default: + /* + There are not (and should not be) any errors thrown not covered above. + But just in case one is added later without updating the above switch + statement, include a catch-all. + */ + my_printf_error(entry->error, + "Error writing transaction to binary log: %d", + MYF(ME_ERROR_LOG), entry->error); + } + + /* + Since we return error, this transaction XID will not be committed, so + we need to mark it as not needed for recovery (unlog() is not called + for a transaction if log_xid() fails). + */ + if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid && + entry->cache_mngr->need_unlog) + mark_xid_done(entry->cache_mngr->binlog_id, true); + + return 1; +} + +/* + Do binlog group commit as the lead thread. + + This must be called when this statement/transaction is queued at the start of + the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group + commit all the transactions in the queue (more may have entered while waiting + for LOCK_log). After commit is done, all other threads in the queue will be + signalled. + + */ +void +MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) +{ + uint xid_count= 0; + my_off_t UNINIT_VAR(commit_offset); + group_commit_entry *current, *last_in_queue; + group_commit_entry *queue= NULL; + bool check_purge= false; + ulong UNINIT_VAR(binlog_id); + uint64 commit_id; + DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader"); + + { +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log", + DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN + ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1"))); + ); +#endif + /* + Lock the LOCK_log(), and once we get it, collect any additional writes + that queued up while we were waiting. + */ + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log"); + mysql_mutex_lock(&LOCK_log); + DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log"); + + mysql_mutex_lock(&LOCK_prepare_ordered); + if (opt_binlog_commit_wait_count) + wait_for_sufficient_commits(); + /* + Note that wait_for_sufficient_commits() may have released and + re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait. + */ + current= group_commit_queue; + group_commit_queue= NULL; + mysql_mutex_unlock(&LOCK_prepare_ordered); + binlog_id= current_binlog_id; + + /* As the queue is in reverse order of entering, reverse it. */ + last_in_queue= current; + while (current) + { + group_commit_entry *next= current->next; + /* + Now that group commit is started, we can clear the flag; there is no + longer any use in waiters on this commit trying to trigger it early. + */ + current->thd->waiting_on_group_commit= false; + current->next= queue; + queue= current; + current= next; + } + DBUG_ASSERT(leader == queue /* the leader should be first in queue */); + + /* Now we have in queue the list of transactions to be committed in order. */ + } + + DBUG_ASSERT(is_open()); + if (likely(is_open())) // Should always be true + { + commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id); + DBUG_EXECUTE_IF("binlog_force_commit_id", + { + const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") }; + bool null_value; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&leader->thd->user_vars, + (uchar*) commit_name.str, + commit_name.length); + commit_id= entry->val_int(&null_value); + }); + /* + Commit every transaction in the queue. + + Note that we are doing this in a different thread than the one running + the transaction! So we are limited in the operations we can do. In + particular, we cannot call my_error() on behalf of a transaction, as + that obtains the THD from thread local storage. Instead, we must set + current->error and let the thread do the error reporting itself once + we wake it up. + */ + for (current= queue; current != NULL; current= current->next) + { + set_current_thd(current->thd); + binlog_cache_mngr *cache_mngr= current->cache_mngr; + + /* + We already checked before that at least one cache is non-empty; if both + are empty we would have skipped calling into here. + */ + DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || + !cache_mngr->trx_cache.empty() || + current->thd->transaction->xid_state.is_explicit_XA()); + + if (unlikely((current->error= write_transaction_or_stmt(current, + commit_id)))) + current->commit_errno= errno; + + strmake_buf(cache_mngr->last_commit_pos_file, log_file_name); + commit_offset= my_b_write_tell(&log_file); + cache_mngr->last_commit_pos_offset= commit_offset; + if ((cache_mngr->using_xa && cache_mngr->xa_xid) || current->need_unlog) + { + /* + If all storage engines support commit_checkpoint_request(), then we + do not need to keep track of when this XID is durably committed. + Instead we will just ask the storage engine to durably commit all its + XIDs when we rotate a binlog file. + */ + if (current->need_unlog) + { + xid_count++; + cache_mngr->need_unlog= true; + cache_mngr->binlog_id= binlog_id; + } + else + cache_mngr->need_unlog= false; + + cache_mngr->delayed_error= false; + } + } + set_current_thd(leader->thd); + + bool synced= 0; + if (unlikely(flush_and_sync(&synced))) + { + for (current= queue; current != NULL; current= current->next) + { + if (!current->error) + { + current->error= ER_ERROR_ON_WRITE; + current->commit_errno= errno; + current->error_cache= NULL; + } + } + } + else + { + DEBUG_SYNC(leader->thd, "commit_before_update_binlog_end_pos"); + bool any_error= false; + + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + + for (current= queue; current != NULL; current= current->next) + { +#ifdef HAVE_REPLICATION + if (likely(!current->error) && + unlikely(repl_semisync_master. + report_binlog_update(current->thd, + current->cache_mngr-> + last_commit_pos_file, + current->cache_mngr-> + last_commit_pos_offset))) + { + current->error= ER_ERROR_ON_WRITE; + current->commit_errno= -1; + current->error_cache= NULL; + any_error= true; + } +#endif + } + + /* + update binlog_end_pos so it can be read by dump thread + Note: must be _after_ the RUN_HOOK(after_flush) or else + semi-sync might not have put the transaction into + it's list before dump-thread tries to send it + */ + update_binlog_end_pos(commit_offset); + + if (unlikely(any_error)) + sql_print_error("Failed to run 'after_flush' hooks"); + } + + /* + If any commit_events are Xid_log_event, increase the number of pending + XIDs in current binlog (it's decreased in ::unlog()). When the count in + a (not active) binlog file reaches zero, we know that it is no longer + needed in XA recovery, and we can log a new binlog checkpoint event. + */ + if (xid_count > 0) + { + mark_xids_active(binlog_id, xid_count); + } + + if (rotate(false, &check_purge)) + { + /* + If we fail to rotate, which thread should get the error? + We give the error to the leader, as any my_error() thrown inside + rotate() will have been registered for the leader THD. + + However we must not return error from here - that would cause + ha_commit_trans() to abort and rollback the transaction, which would + leave an inconsistent state with the transaction committed in the + binlog but rolled back in the engine. + + Instead set a flag so that we can return error later, from unlog(), + when the transaction has been safely committed in the engine. + */ + leader->cache_mngr->delayed_error= true; + my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, errno); + check_purge= false; + } + /* In case of binlog rotate, update the correct current binlog offset. */ + commit_offset= my_b_write_tell(&log_file); + } + + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync"); + mysql_mutex_lock(&LOCK_after_binlog_sync); + /* + We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync; + otherwise scheduling could allow the next group commit to run ahead of us, + messing up the order of commit_ordered() calls. But as soon as + LOCK_after_binlog_sync is obtained, we can let the next group commit start. + */ + mysql_mutex_unlock(&LOCK_log); + + DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log"); + + /* + Loop through threads and run the binlog_sync hook + */ + { + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + + bool first __attribute__((unused))= true; + bool last __attribute__((unused)); + for (current= queue; current != NULL; current= current->next) + { + last= current->next == NULL; +#ifdef HAVE_REPLICATION + if (likely(!current->error)) + current->error= + repl_semisync_master.wait_after_sync(current->cache_mngr-> + last_commit_pos_file, + current->cache_mngr-> + last_commit_pos_offset); +#endif + first= false; + } + } + + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); + + mysql_mutex_lock(&LOCK_commit_ordered); + DBUG_EXECUTE_IF("crash_before_engine_commit", + { + DBUG_SUICIDE(); + }); + last_commit_pos_offset= commit_offset; + + /* + Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been + acquired so that groups can not reorder for the different stages of + the group commit procedure. + */ + mysql_mutex_unlock(&LOCK_after_binlog_sync); + DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync"); + ++num_group_commits; + + if (!opt_optimize_thread_scheduling) + { + /* + If we want to run commit_ordered() each in the transaction's own thread + context, then we need to mark the queue reserved; we need to finish all + threads in one group commit before the next group commit can be allowed + to proceed, and we cannot unlock a simple pthreads mutex in a different + thread from the one that locked it. + */ + + while (group_commit_queue_busy) + mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered); + group_commit_queue_busy= TRUE; + + /* + Set these so parent can run checkpoint_and_purge() in last thread. + (When using optimized thread scheduling, we run checkpoint_and_purge() + in this function, so parent does not need to and we need not set these + values). + */ + last_in_queue->check_purge= check_purge; + last_in_queue->binlog_id= binlog_id; + + /* Note that we return with LOCK_commit_ordered locked! */ + DBUG_VOID_RETURN; + } + + /* + Wakeup each participant waiting for our group commit, first calling the + commit_ordered() methods for any transactions doing 2-phase commit. + */ + current= queue; + while (current != NULL) + { + group_commit_entry *next; + + DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered"); + ++num_commits; + if (current->cache_mngr->using_xa && likely(!current->error) && + !DBUG_IF("skip_commit_ordered")) + { + mysql_mutex_lock(¤t->thd->LOCK_thd_data); + run_commit_ordered(current->thd, current->all); + mysql_mutex_unlock(¤t->thd->LOCK_thd_data); + } + current->thd->wakeup_subsequent_commits(current->error); + + /* + Careful not to access current->next after waking up the other thread! As + it may change immediately after wakeup. + */ + next= current->next; + if (current != leader) // Don't wake up ourself + { + if (current->queued_by_other) + current->thd->wait_for_commit_ptr->wakeup(current->error); + else + current->thd->signal_wakeup_ready(); + } + current= next; + } + DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered"); + mysql_mutex_unlock(&LOCK_commit_ordered); + DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered"); + + if (check_purge) + checkpoint_and_purge(binlog_id); + + DBUG_VOID_RETURN; +} + + +int +MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, + uint64 commit_id) +{ + binlog_cache_mngr *mngr= entry->cache_mngr; + bool has_xid= entry->end_event->get_type_code() == XID_EVENT; + + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt"); + + if (write_gtid_event(entry->thd, is_prepared_xa(entry->thd), + entry->using_trx_cache, commit_id, + has_xid, entry->ro_1pc)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + + if (entry->using_stmt_cache && !mngr->stmt_cache.empty() && + write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE))) + { + entry->error_cache= &mngr->stmt_cache.cache_log; + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + + if (entry->using_trx_cache && !mngr->trx_cache.empty()) + { + DBUG_EXECUTE_IF("crash_before_writing_xid", + { + if ((write_cache(entry->thd, + mngr->get_binlog_cache_log(TRUE)))) + DBUG_PRINT("info", ("error writing binlog cache")); + else + flush_and_sync(0); + + DBUG_PRINT("info", ("crashing before writing xid")); + DBUG_SUICIDE(); + }); + + if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE))) + { + entry->error_cache= &mngr->trx_cache.cache_log; + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + } + + DBUG_EXECUTE_IF("inject_error_writing_xid", + { + entry->error_cache= NULL; + errno= 28; + DBUG_RETURN(ER_ERROR_ON_WRITE); + }); + + if (write_event(entry->end_event)) + { + entry->error_cache= NULL; + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + status_var_add(entry->thd->status_var.binlog_bytes_written, + entry->end_event->data_written); + + if (entry->incident_event) + { + if (write_event(entry->incident_event)) + { + entry->error_cache= NULL; + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + } + + if (unlikely(mngr->get_binlog_cache_log(FALSE)->error)) + { + entry->error_cache= &mngr->stmt_cache.cache_log; + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + if (unlikely(mngr->get_binlog_cache_log(TRUE)->error)) // Error on read + { + entry->error_cache= &mngr->trx_cache.cache_log; + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + + DBUG_RETURN(0); +} + + +/* + Wait for sufficient commits to queue up for group commit, according to the + values of binlog_commit_wait_count and binlog_commit_wait_usec. + + Note that this function may release and re-acquire LOCK_log and + LOCK_prepare_ordered if it needs to wait. +*/ + +void +MYSQL_BIN_LOG::wait_for_sufficient_commits() +{ + size_t count; + group_commit_entry *e; + group_commit_entry *last_head; + struct timespec wait_until; + + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_prepare_ordered); + + for (e= last_head= group_commit_queue, count= 0; e; e= e->next) + { + if (++count >= opt_binlog_commit_wait_count) + { + group_commit_trigger_count++; + return; + } + if (unlikely(e->thd->has_waiter)) + { + group_commit_trigger_lock_wait++; + return; + } + } + + mysql_mutex_unlock(&LOCK_log); + set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec); + + for (;;) + { + int err; + group_commit_entry *head; + + err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered, + &wait_until); + if (err == ETIMEDOUT) + { + group_commit_trigger_timeout++; + break; + } + if (unlikely(last_head->thd->has_waiter)) + { + group_commit_trigger_lock_wait++; + break; + } + head= group_commit_queue; + for (e= head; e && e != last_head; e= e->next) + { + ++count; + if (unlikely(e->thd->has_waiter)) + { + group_commit_trigger_lock_wait++; + goto after_loop; + } + } + if (count >= opt_binlog_commit_wait_count) + { + group_commit_trigger_count++; + break; + } + last_head= head; + } +after_loop: + + /* + We must not wait for LOCK_log while holding LOCK_prepare_ordered. + LOCK_log can be held for long periods (eg. we do I/O under it), while + LOCK_prepare_ordered must only be held for short periods. + + In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would + violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could + cause SAFEMUTEX warnings (even if it cannot actually deadlock with current + code, as there can be at most one group commit leader thread at a time). + + So release and re-acquire LOCK_prepare_ordered if we need to wait for the + LOCK_log. + */ + if (mysql_mutex_trylock(&LOCK_log)) + { + mysql_mutex_unlock(&LOCK_prepare_ordered); + mysql_mutex_lock(&LOCK_log); + mysql_mutex_lock(&LOCK_prepare_ordered); + } +} + + +void +MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit() +{ + group_commit_entry *head; + mysql_mutex_assert_owner(&LOCK_prepare_ordered); + head= group_commit_queue; + if (head) + { + head->thd->has_waiter= true; + mysql_cond_signal(&COND_prepare_ordered); + } +} + + +/* + This function is called when a transaction T1 goes to wait for another + transaction T2. It is used to cut short any binlog group commit delay from + --binlog-commit-wait-count in the case where another transaction is stalled + on the wait due to conflicting row locks. + + If T2 is already ready to group commit, any waiting group commit will be + signalled to proceed immediately. Otherwise, a flag will be set in T2, and + when T2 later becomes ready, immediate group commit will be triggered. +*/ +void +binlog_report_wait_for(THD *thd1, THD *thd2) +{ + if (opt_binlog_commit_wait_count == 0) + return; + mysql_mutex_lock(&LOCK_prepare_ordered); + thd2->has_waiter= true; + if (thd2->waiting_on_group_commit) + mysql_bin_log.binlog_trigger_immediate_group_commit(); + mysql_mutex_unlock(&LOCK_prepare_ordered); +} + + +/** + Wait until we get a signal that the relay log has been updated. + + @param thd Thread variable + + @note + One must have a lock on LOCK_log before calling this function. + This lock will be released before return! That's required by + THD::enter_cond() (see NOTES in sql_class.h). +*/ + +void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd) +{ + PSI_stage_info old_stage; + DBUG_ENTER("wait_for_update_relay_log"); + + mysql_mutex_assert_owner(&LOCK_log); + thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log, + &stage_slave_has_read_all_relay_log, + &old_stage); + mysql_cond_wait(&COND_relay_log_updated, &LOCK_log); + thd->EXIT_COND(&old_stage); + DBUG_VOID_RETURN; +} + +/** + Wait until we get a signal that the binary log has been updated. + Applies to master only. + + NOTES + @param[in] thd a THD struct + @param[in] timeout a pointer to a timespec; + NULL means to wait w/o timeout. + @retval 0 if got signalled on update + @retval non-0 if wait timeout elapsed + @note + LOCK_log must be taken before calling this function. + LOCK_log is being released while the thread is waiting. + LOCK_log is released by the caller. +*/ + +int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd, + struct timespec *timeout) +{ + int ret= 0; + DBUG_ENTER("wait_for_update_binlog_end_pos"); + + thd_wait_begin(thd, THD_WAIT_BINLOG); + mysql_mutex_assert_owner(get_binlog_end_pos_lock()); + if (!timeout) + mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock()); + else + ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(), + timeout); + thd_wait_end(thd); + DBUG_RETURN(ret); +} + + +/** + Close the log file. + + @param exiting Bitmask for one or more of the following bits: + - LOG_CLOSE_INDEX : if we should close the index file + - LOG_CLOSE_TO_BE_OPENED : if we intend to call open + at once after close. + - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log + - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the + LOG_EVENT_BINLOG_IN_USE_F flag + + @note + One can do an open on the object at once after doing a close. + The internal structures are not freed until cleanup() is called +*/ + +void MYSQL_BIN_LOG::close(uint exiting) +{ // One can't set log_type here! + bool failed_to_save_state= false; + DBUG_ENTER("MYSQL_BIN_LOG::close"); + DBUG_PRINT("enter",("exiting: %d", (int) exiting)); + + mysql_mutex_assert_owner(&LOCK_log); + + if (log_state == LOG_OPENED) + { + DBUG_ASSERT(log_type == LOG_BIN); +#ifdef HAVE_REPLICATION + if (exiting & LOG_CLOSE_STOP_EVENT) + { + Stop_log_event s; + // the checksumming rule for relay-log case is similar to Rotate + s.checksum_alg= is_relay_log ? relay_log_checksum_alg + : (enum_binlog_checksum_alg)binlog_checksum_options; + DBUG_ASSERT(!is_relay_log || + relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF); + write_event(&s); + bytes_written+= s.data_written; + flush_io_cache(&log_file); + update_binlog_end_pos(); + + /* + When we shut down server, write out the binlog state to a separate + file so we do not have to scan an entire binlog file to recover it + at next server start. + + Note that this must be written and synced to disk before marking the + last binlog file as "not crashed". + */ + if (!is_relay_log && write_state_to_file()) + { + sql_print_error("Failed to save binlog GTID state during shutdown. " + "Binlog will be marked as crashed, so that crash " + "recovery can recover the state at next server " + "startup."); + /* + Leave binlog file marked as crashed, so we can recover state by + scanning it now that we failed to write out the state properly. + */ + failed_to_save_state= true; + } + } +#endif /* HAVE_REPLICATION */ + + /* don't pwrite in a file opened with O_APPEND - it doesn't work */ + if (log_file.type == WRITE_CACHE && !(exiting & LOG_CLOSE_DELAYED_CLOSE)) + { + my_off_t org_position= mysql_file_tell(log_file.file, MYF(0)); + if (!failed_to_save_state) + clear_inuse_flag_when_closing(log_file.file); + /* + Restore position so that anything we have in the IO_cache is written + to the correct position. + We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the + original position on system that doesn't support pwrite(). + */ + mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0)); + } + + /* this will cleanup IO_CACHE, sync and close the file */ + MYSQL_LOG::close(exiting); + } + + /* + The following test is needed even if is_open() is not set, as we may have + called a not complete close earlier and the index file is still open. + */ + + if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file)) + { + end_io_cache(&index_file); + if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) && + ! write_error) + { + write_error= 1; + sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), index_file_name, errno); + } + } + log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED; + my_free(name); + name= NULL; + DBUG_VOID_RETURN; +} + + +/* + Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly + closed and not needing crash recovery. +*/ +void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file) +{ + my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; + uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F + mysql_file_pwrite(file, &flags, 1, offset, MYF(0)); +} + + +void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg) +{ + /* + We need to take locks, otherwise this may happen: + new_file() is called, calls open(old_max_size), then before open() starts, + set_max_size() sets max_size to max_size_arg, then open() starts and + uses the old_max_size argument, so max_size_arg has been overwritten and + it's like if the SET command was never run. + */ + DBUG_ENTER("MYSQL_BIN_LOG::set_max_size"); + mysql_mutex_lock(&LOCK_log); + if (is_open()) + max_size= max_size_arg; + mysql_mutex_unlock(&LOCK_log); + DBUG_VOID_RETURN; +} + + +/** + Check if a string is a valid number. + + @param str String to test + @param res Store value here + @param allow_wildcards Set to 1 if we should ignore '%' and '_' + + @note + For the moment the allow_wildcards argument is not used + Should be move to some other file. + + @retval + 1 String is a number + @retval + 0 String is not a number +*/ + +static bool test_if_number(const char *str, ulong *res, bool allow_wildcards) +{ + int flag; + const char *start; + DBUG_ENTER("test_if_number"); + + flag=0; start=str; + while (*str++ == ' ') ; + if (*--str == '-' || *str == '+') + str++; + while (my_isdigit(files_charset_info,*str) || + (allow_wildcards && (*str == wild_many || *str == wild_one))) + { + flag=1; + str++; + } + if (*str == '.') + { + for (str++ ; + my_isdigit(files_charset_info,*str) || + (allow_wildcards && (*str == wild_many || *str == wild_one)) ; + str++, flag=1) ; + } + if (*str != 0 || flag == 0) + DBUG_RETURN(0); + if (res) + *res=atol(start); + DBUG_RETURN(1); /* Number ok */ +} /* test_if_number */ + + +void sql_perror(const char *message) +{ +#if defined(_WIN32) + char* buf; + DWORD dw= GetLastError(); + if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, NULL, dw, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0) + { + sql_print_error("%s: %s",message, buf); + LocalFree((HLOCAL)buf); + } + else + { + sql_print_error("%s", message); + } +#elif defined(HAVE_STRERROR) + sql_print_error("%s: %s",message, strerror(errno)); +#else + perror(message); +#endif +} + + +/* + Change the file associated with two output streams. Used to + redirect stdout and stderr to a file. The streams are reopened + only for appending (writing at end of file). +*/ +bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream) +{ + static constexpr const char *mode= "a" IF_WIN("t", ); + if ((outstream && !my_freopen(filename, mode, outstream)) || + (errstream && !my_freopen(filename, mode, errstream))) + { + my_error(ER_CANT_CREATE_FILE, MYF(0), filename, errno); + return TRUE; + } + + /* The error stream must be unbuffered. */ + if (errstream) + setbuf(errstream, NULL); + + return FALSE; +} + + +/* + Unfortunately, there seems to be no good way + to restore the original streams upon failure. +*/ +static bool redirect_std_streams(const char *file) +{ + if (reopen_fstreams(file, stdout, stderr)) + return TRUE; + + setbuf(stderr, NULL); + return FALSE; +} + + +bool flush_error_log() +{ + bool result= 0; + if (opt_error_log) + { + mysql_mutex_lock(&LOCK_error_log); + if (redirect_std_streams(log_error_file)) + result= 1; + mysql_mutex_unlock(&LOCK_error_log); + } + return result; +} + +#ifdef _WIN32 +struct eventlog_source +{ + HANDLE handle; + eventlog_source() + { + setup_windows_event_source(); + handle = RegisterEventSource(NULL, "MariaDB"); + } + + ~eventlog_source() + { + if (handle) + DeregisterEventSource(handle); + } +}; + +static eventlog_source eventlog; + +static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff, + size_t length, size_t buffLen) +{ + HANDLE event= eventlog.handle; + char *buffptr= buff; + DBUG_ENTER("print_buffer_to_nt_eventlog"); + + /* Add ending CR/LF's to string, overwrite last chars if necessary */ + strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n"); + + if (event) + { + switch (level) { + case ERROR_LEVEL: + ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0, + (LPCSTR*)&buffptr, NULL); + break; + case WARNING_LEVEL: + ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0, + (LPCSTR*) &buffptr, NULL); + break; + case INFORMATION_LEVEL: + ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1, + 0, (LPCSTR*) &buffptr, NULL); + break; + } + } + + DBUG_VOID_RETURN; +} +#endif /* _WIN32 */ + + +#ifndef EMBEDDED_LIBRARY +static void print_buffer_to_file(enum loglevel level, const char *buffer, + size_t length) +{ + time_t skr; + struct tm tm_tmp; + struct tm *start; + THD *thd= 0; + size_t tag_length= 0; + char tag[NAME_LEN]; + DBUG_ENTER("print_buffer_to_file"); + DBUG_PRINT("enter",("buffer: %s", buffer)); + + if (mysqld_server_initialized && (thd= current_thd)) + { + if (thd->connection_name.length) + { + /* + Add tag for slaves so that the user can see from which connection + the error originates. + */ + tag_length= my_snprintf(tag, sizeof(tag), + ER_THD(thd, ER_MASTER_LOG_PREFIX), + (int) thd->connection_name.length, + thd->connection_name.str); + } + } + + mysql_mutex_lock(&LOCK_error_log); + + skr= my_time(0); + localtime_r(&skr, &tm_tmp); + start=&tm_tmp; + + fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n", + start->tm_year + 1900, + start->tm_mon+1, + start->tm_mday, + start->tm_hour, + start->tm_min, + start->tm_sec, + (unsigned long) (thd ? thd->thread_id : 0), + (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ? + "Warning" : "Note"), + (int) tag_length, tag, + (int) length, buffer); + + fflush(stderr); + +#ifdef WITH_WSREP + if (level <= WARNING_LEVEL) + { + wsrep::reporter::log_level const lvl = (level <= ERROR_LEVEL ? + wsrep::reporter::error : + wsrep::reporter::warning); + Wsrep_status::report_log_msg(lvl, tag, tag_length, buffer, length, skr); + } +#endif /* WITH_WSREP */ + + mysql_mutex_unlock(&LOCK_error_log); + DBUG_VOID_RETURN; +} + +/** + Prints a printf style message to the error log and, under NT, to the + Windows event log. + + This function prints the message into a buffer and then sends that buffer + to other functions to write that message to other logging sources. + + @param level The level of the msg significance + @param format Printf style format of message + @param args va_list list of arguments for the message + + @returns + The function always returns 0. The return value is present in the + signature to be compatible with other logging routines, which could + return an error (e.g. logging to the log tables) +*/ +int vprint_msg_to_log(enum loglevel level, const char *format, va_list args) +{ + char buff[1024]; + size_t length; + DBUG_ENTER("vprint_msg_to_log"); + + length= my_vsnprintf(buff, sizeof(buff), format, args); + print_buffer_to_file(level, buff, length); + +#ifdef _WIN32 + print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff)); +#endif + + DBUG_RETURN(0); +} +#endif /* EMBEDDED_LIBRARY */ + + +void sql_print_error(const char *format, ...) +{ + va_list args; + DBUG_ENTER("sql_print_error"); + + va_start(args, format); + error_log_print(ERROR_LEVEL, format, args); + va_end(args); + + DBUG_VOID_RETURN; +} + + +void sql_print_warning(const char *format, ...) +{ + va_list args; + DBUG_ENTER("sql_print_warning"); + + va_start(args, format); + error_log_print(WARNING_LEVEL, format, args); + va_end(args); + + DBUG_VOID_RETURN; +} + + +void sql_print_information(const char *format, ...) +{ + va_list args; + DBUG_ENTER("sql_print_information"); + + va_start(args, format); + sql_print_information_v(format, args); + va_end(args); + + DBUG_VOID_RETURN; +} + +void sql_print_information_v(const char *format, va_list ap) +{ + if (disable_log_notes) + return; // Skip notes during start/shutdown + + error_log_print(INFORMATION_LEVEL, format, ap); +} + +void +TC_LOG::run_prepare_ordered(THD *thd, bool all) +{ + Ha_trx_info *ha_info= + all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list; + + mysql_mutex_assert_owner(&LOCK_prepare_ordered); + for (; ha_info; ha_info= ha_info->next()) + { + handlerton *ht= ha_info->ht(); + if (!ht->prepare_ordered) + continue; + ht->prepare_ordered(ht, thd, all); + } +} + + +void +TC_LOG::run_commit_ordered(THD *thd, bool all) +{ + Ha_trx_info *ha_info= + all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list; + + mysql_mutex_assert_owner(&LOCK_commit_ordered); + for (; ha_info; ha_info= ha_info->next()) + { + handlerton *ht= ha_info->ht(); + if (!ht->commit_ordered) + continue; + ht->commit_ordered(ht, thd, all); + DBUG_EXECUTE_IF("enable_log_write_upto_crash", + { + DBUG_SET_INITIAL("+d,crash_after_log_write_upto"); + sleep(1000); + }); + DEBUG_SYNC(thd, "commit_after_run_commit_ordered"); + } +} + + +int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, + bool need_commit_ordered) +{ + int cookie; + struct commit_entry entry; + bool UNINIT_VAR(is_group_commit_leader); + + if (need_prepare_ordered) + { + mysql_mutex_lock(&LOCK_prepare_ordered); + run_prepare_ordered(thd, all); + if (need_commit_ordered) + { + /* + Must put us in queue so we can run_commit_ordered() in same sequence + as we did run_prepare_ordered(). + */ + thd->clear_wakeup_ready(); + entry.thd= thd; + commit_entry *previous_queue= commit_ordered_queue; + entry.next= previous_queue; + commit_ordered_queue= &entry; + is_group_commit_leader= (previous_queue == NULL); + } + mysql_mutex_unlock(&LOCK_prepare_ordered); + } + + if (thd->wait_for_prior_commit()) + return 0; + + cookie= 0; + if (xid) + cookie= log_one_transaction(xid); + + if (need_commit_ordered) + { + if (need_prepare_ordered) + { + /* + We did the run_prepare_ordered() serialised, then ran the log_xid() in + parallel. Now we have to do run_commit_ordered() serialised in the + same sequence as run_prepare_ordered(). + + We do this starting from the head of the queue, each thread doing + run_commit_ordered() and signalling the next in queue. + */ + if (is_group_commit_leader) + { + /* The first in queue starts the ball rolling. */ + mysql_mutex_lock(&LOCK_prepare_ordered); + while (commit_ordered_queue_busy) + mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered); + commit_entry *queue= commit_ordered_queue; + commit_ordered_queue= NULL; + /* + Mark the queue busy while we bounce it from one thread to the + next. + */ + commit_ordered_queue_busy= true; + mysql_mutex_unlock(&LOCK_prepare_ordered); + + /* Reverse the queue list so we get correct order. */ + commit_entry *prev= NULL; + while (queue) + { + commit_entry *next= queue->next; + queue->next= prev; + prev= queue; + queue= next; + } + DBUG_ASSERT(prev == &entry); + DBUG_ASSERT(prev->thd == thd); + } + else + { + /* Not first in queue; just wait until previous thread wakes us up. */ + thd->wait_for_wakeup_ready(); + } + } + + /* Only run commit_ordered() if log_xid was successful. */ + if (cookie) + { + mysql_mutex_lock(&LOCK_commit_ordered); + run_commit_ordered(thd, all); + mysql_mutex_unlock(&LOCK_commit_ordered); + } + + if (need_prepare_ordered) + { + commit_entry *next= entry.next; + if (next) + { + next->thd->signal_wakeup_ready(); + } + else + { + mysql_mutex_lock(&LOCK_prepare_ordered); + commit_ordered_queue_busy= false; + mysql_cond_signal(&COND_queue_busy); + mysql_mutex_unlock(&LOCK_prepare_ordered); + } + } + } + + return cookie; +} + + +/********* transaction coordinator log for 2pc - mmap() based solution *******/ + +/* + the log consists of a file, mapped to memory. + file is divided into pages of tc_log_page_size size. + (usable size of the first page is smaller because of the log header) + there is a PAGE control structure for each page + each page (or rather its PAGE control structure) can be in one of + the three states - active, syncing, pool. + there could be only one page in the active or syncing state, + but many in pool - pool is a fifo queue. + the usual lifecycle of a page is pool->active->syncing->pool. + the "active" page is a page where new xid's are logged. + the page stays active as long as the syncing slot is taken. + the "syncing" page is being synced to disk. no new xid can be added to it. + when the syncing is done the page is moved to a pool and an active page + becomes "syncing". + + the result of such an architecture is a natural "commit grouping" - + If commits are coming faster than the system can sync, they do not + stall. Instead, all commits that came since the last sync are + logged to the same "active" page, and they all are synced with the next - + one - sync. Thus, thought individual commits are delayed, throughput + is not decreasing. + + when an xid is added to an active page, the thread of this xid waits + for a page's condition until the page is synced. when syncing slot + becomes vacant one of these waiters is awaken to take care of syncing. + it syncs the page and signals all waiters that the page is synced. + PAGE::waiters is used to count these waiters, and a page may never + become active again until waiters==0 (that is all waiters from the + previous sync have noticed that the sync was completed) + + note, that the page becomes "dirty" and has to be synced only when a + new xid is added into it. Removing a xid from a page does not make it + dirty - we don't sync xid removals to disk. +*/ + +ulong tc_log_page_waits= 0; + +#ifdef HAVE_MMAP + +#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1) + +static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74}; + +ulong opt_tc_log_size; +ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0; + +int TC_LOG_MMAP::open(const char *opt_name) +{ + uint i; + bool crashed=FALSE; + PAGE *pg; + + DBUG_ASSERT(total_ha_2pc > 1); + DBUG_ASSERT(opt_name); + DBUG_ASSERT(opt_name[0]); + + tc_log_page_size= my_getpagesize(); + + fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME); + if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR | O_CLOEXEC, MYF(0))) < 0) + { + if (my_errno != ENOENT) + goto err; + if (using_heuristic_recover()) + return 1; + if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE, + O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0) + goto err; + inited=1; + file_length= opt_tc_log_size; + if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME))) + goto err; + } + else + { + inited= 1; + crashed= TRUE; + sql_print_information("Recovering after a crash using %s", opt_name); + if (tc_heuristic_recover) + { + sql_print_error("Cannot perform automatic crash recovery when " + "--tc-heuristic-recover is used"); + goto err; + } + file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE)); + if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size) + goto err; + } + + data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE, + MAP_NOSYNC|MAP_SHARED, fd, 0); + if (data == MAP_FAILED) + { + my_errno=errno; + goto err; + } + inited=2; + + npages=(uint)file_length/tc_log_page_size; + if (npages < 3) // to guarantee non-empty pool + goto err; + if (!(pages=(PAGE *)my_malloc(key_memory_TC_LOG_MMAP_pages, + npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL)))) + goto err; + inited=3; + for (pg=pages, i=0; i < npages; i++, pg++) + { + pg->next=pg+1; + pg->waiters=0; + pg->state=PS_POOL; + mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_PAGE_cond, &pg->cond, 0); + pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size); + pg->size=pg->free=tc_log_page_size/sizeof(my_xid); + pg->end=pg->start + pg->size; + } + pages[0].size=pages[0].free= + (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid); + pages[0].start=pages[0].end-pages[0].size; + pages[npages-1].next=0; + inited=4; + + if (crashed && recover()) + goto err; + + memcpy(data, tc_log_magic, sizeof(tc_log_magic)); + data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc; + my_msync(fd, data, tc_log_page_size, MS_SYNC); + inited=5; + + mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_active, &COND_active, 0); + mysql_cond_init(key_COND_pool, &COND_pool, 0); + mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0); + + inited=6; + + syncing= 0; + active=pages; + DBUG_ASSERT(npages >= 2); + pool=pages+1; + pool_last_ptr= &((pages+npages-1)->next); + commit_ordered_queue= NULL; + commit_ordered_queue_busy= false; + + return 0; + +err: + close(); + return 1; +} + +/** + there is no active page, let's got one from the pool. + + Two strategies here: + -# take the first from the pool + -# if there're waiters - take the one with the most free space. + + @todo + page merging. try to allocate adjacent page first, + so that they can be flushed both in one sync +*/ + +void TC_LOG_MMAP::get_active_from_pool() +{ + PAGE **p, **best_p=0; + int best_free; + + mysql_mutex_lock(&LOCK_pool); + + do + { + best_p= p= &pool; + if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ? + break; // yes - take it. + + best_free=0; // no - trying second strategy + for (p=&(*p)->next; *p; p=&(*p)->next) + { + if ((*p)->waiters == 0 && (*p)->free > best_free) + { + best_free=(*p)->free; + best_p=p; + } + } + } + while ((*best_p == 0 || best_free == 0) && overflow()); + + mysql_mutex_assert_owner(&LOCK_active); + active=*best_p; + + /* Unlink the page from the pool. */ + if (!(*best_p)->next) + pool_last_ptr= best_p; + *best_p=(*best_p)->next; + mysql_mutex_unlock(&LOCK_pool); + + mysql_mutex_lock(&active->lock); + if (active->free == active->size) // we've chosen an empty page + { + tc_log_cur_pages_used++; + set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used); + } +} + +/** + @todo + perhaps, increase log size ? +*/ +int TC_LOG_MMAP::overflow() +{ + /* + simple overflow handling - just wait + TODO perhaps, increase log size ? + let's check the behaviour of tc_log_page_waits first + */ + tc_log_page_waits++; + mysql_cond_wait(&COND_pool, &LOCK_pool); + return 1; // always return 1 +} + +/** + Record that transaction XID is committed on the persistent storage. + + This function is called in the middle of two-phase commit: + First all resources prepare the transaction, then tc_log->log() is called, + then all resources commit the transaction, then tc_log->unlog() is called. + + All access to active page is serialized but it's not a problem, as + we're assuming that fsync() will be a main bottleneck. + That is, parallelizing writes to log pages we'll decrease number of + threads waiting for a page, but then all these threads will be waiting + for a fsync() anyway + + If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and + records XID in a special Xid_log_event. + If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped + log. + + @retval + 0 - error + @retval + \# - otherwise, "cookie", a number that will be passed as an argument + to unlog() call. tc_log can define it any way it wants, + and use for whatever purposes. TC_LOG_MMAP sets it + to the position in memory where xid was logged to. +*/ + +int TC_LOG_MMAP::log_one_transaction(my_xid xid) +{ + int err; + PAGE *p; + ulong cookie; + + mysql_mutex_lock(&LOCK_active); + + /* + if the active page is full - just wait... + frankly speaking, active->free here accessed outside of mutex + protection, but it's safe, because it only means we may miss an + unlog() for the active page, and we're not waiting for it here - + unlog() does not signal COND_active. + */ + while (unlikely(active && active->free == 0)) + mysql_cond_wait(&COND_active, &LOCK_active); + + /* no active page ? take one from the pool */ + if (active == 0) + get_active_from_pool(); + else + mysql_mutex_lock(&active->lock); + + p=active; + + /* + p->free is always > 0 here because to decrease it one needs + to take p->lock and before it one needs to take LOCK_active. + But checked that active->free > 0 under LOCK_active and + haven't release it ever since + */ + + /* searching for an empty slot */ + while (*p->ptr) + { + p->ptr++; + DBUG_ASSERT(p->ptr < p->end); // because p->free > 0 + } + + /* found! store xid there and mark the page dirty */ + cookie= (ulong)((uchar *)p->ptr - data); // can never be zero + *p->ptr++= xid; + p->free--; + p->state= PS_DIRTY; + mysql_mutex_unlock(&p->lock); + + mysql_mutex_lock(&LOCK_sync); + if (syncing) + { // somebody's syncing. let's wait + mysql_mutex_unlock(&LOCK_active); + mysql_mutex_lock(&p->lock); + p->waiters++; + while (p->state == PS_DIRTY && syncing) + { + mysql_mutex_unlock(&p->lock); + mysql_cond_wait(&p->cond, &LOCK_sync); + mysql_mutex_lock(&p->lock); + } + p->waiters--; + err= p->state == PS_ERROR; + if (p->state != PS_DIRTY) // page was synced + { + mysql_mutex_unlock(&LOCK_sync); + if (p->waiters == 0) + mysql_cond_signal(&COND_pool); // in case somebody's waiting + mysql_mutex_unlock(&p->lock); + goto done; // we're done + } + DBUG_ASSERT(!syncing); + mysql_mutex_unlock(&p->lock); + syncing = p; + mysql_mutex_unlock(&LOCK_sync); + + mysql_mutex_lock(&LOCK_active); + active=0; // page is not active anymore + mysql_cond_broadcast(&COND_active); + mysql_mutex_unlock(&LOCK_active); + } + else + { + syncing = p; // place is vacant - take it + mysql_mutex_unlock(&LOCK_sync); + active = 0; // page is not active anymore + mysql_cond_broadcast(&COND_active); + mysql_mutex_unlock(&LOCK_active); + } + err= sync(); + +done: + return err ? 0 : cookie; +} + +int TC_LOG_MMAP::sync() +{ + int err; + + DBUG_ASSERT(syncing != active); + + /* + sit down and relax - this can take a while... + note - no locks are held at this point + */ + err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC); + + /* page is synced. let's move it to the pool */ + mysql_mutex_lock(&LOCK_pool); + (*pool_last_ptr)=syncing; + pool_last_ptr=&(syncing->next); + syncing->next=0; + syncing->state= err ? PS_ERROR : PS_POOL; + mysql_cond_signal(&COND_pool); // in case somebody's waiting + mysql_mutex_unlock(&LOCK_pool); + + /* marking 'syncing' slot free */ + mysql_mutex_lock(&LOCK_sync); + mysql_cond_broadcast(&syncing->cond); // signal "sync done" + syncing=0; + /* + we check the "active" pointer without LOCK_active. Still, it's safe - + "active" can change from NULL to not NULL any time, but it + will take LOCK_sync before waiting on active->cond. That is, it can never + miss a signal. + And "active" can change to NULL only by the syncing thread + (the thread that will send a signal below) + */ + if (active) + mysql_cond_signal(&active->cond); // wake up a new syncer + mysql_mutex_unlock(&LOCK_sync); + return err; +} + +static void +mmap_do_checkpoint_callback(void *data) +{ + TC_LOG_MMAP::pending_cookies *pending= + static_cast(data); + ++pending->pending_count; +} + +int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid) +{ + pending_cookies *full_buffer= NULL; + uint32 ncookies= tc_log_page_size / sizeof(my_xid); + DBUG_ASSERT(*(my_xid *)(data+cookie) == xid); + + /* + Do not delete the entry immediately, as there may be participating storage + engines which implement commit_checkpoint_request(), and thus have not yet + flushed the commit durably to disk. + + Instead put it in a queue - and periodically, we will request a checkpoint + from all engines and delete a whole batch at once. + */ + mysql_mutex_lock(&LOCK_pending_checkpoint); + if (pending_checkpoint == NULL) + { + uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1); + if (!(pending_checkpoint= + (pending_cookies *)my_malloc(PSI_INSTRUMENT_ME, size, + MYF(MY_ZEROFILL)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), size); + mysql_mutex_unlock(&LOCK_pending_checkpoint); + return 1; + } + } + + pending_checkpoint->cookies[pending_checkpoint->count++]= cookie; + if (pending_checkpoint->count == ncookies) + { + full_buffer= pending_checkpoint; + pending_checkpoint= NULL; + } + mysql_mutex_unlock(&LOCK_pending_checkpoint); + + if (full_buffer) + { + /* + We do an extra increment and notify here - this ensures that + things work also if there are no engines at all that support + commit_checkpoint_request. + */ + ++full_buffer->pending_count; + ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback); + commit_checkpoint_notify(full_buffer); + } + return 0; +} + + +void +TC_LOG_MMAP::commit_checkpoint_notify(void *cookie) +{ + uint count; + pending_cookies *pending= static_cast(cookie); + mysql_mutex_lock(&LOCK_pending_checkpoint); + DBUG_ASSERT(pending->pending_count > 0); + count= --pending->pending_count; + mysql_mutex_unlock(&LOCK_pending_checkpoint); + if (count == 0) + { + uint i; + for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i) + delete_entry(pending->cookies[i]); + my_free(pending); + } +} + + +/** + erase xid from the page, update page free space counters/pointers. + cookie points directly to the memory where xid was logged. +*/ + +int TC_LOG_MMAP::delete_entry(ulong cookie) +{ + PAGE *p=pages+(cookie/tc_log_page_size); + my_xid *x=(my_xid *)(data+cookie); + + DBUG_ASSERT(x >= p->start); + DBUG_ASSERT(x < p->end); + + mysql_mutex_lock(&p->lock); + *x=0; + p->free++; + DBUG_ASSERT(p->free <= p->size); + set_if_smaller(p->ptr, x); + if (p->free == p->size) // the page is completely empty + statistic_decrement(tc_log_cur_pages_used, &LOCK_status); + if (p->waiters == 0) // the page is in pool and ready to rock + mysql_cond_signal(&COND_pool); // ping ... for overflow() + mysql_mutex_unlock(&p->lock); + return 0; +} + +void TC_LOG_MMAP::close() +{ + uint i; + switch (inited) { + case 6: + mysql_mutex_destroy(&LOCK_sync); + mysql_mutex_destroy(&LOCK_active); + mysql_mutex_destroy(&LOCK_pool); + mysql_mutex_destroy(&LOCK_pending_checkpoint); + mysql_cond_destroy(&COND_pool); + mysql_cond_destroy(&COND_active); + mysql_cond_destroy(&COND_queue_busy); + /* fall through */ + case 5: + data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails + /* fall through */ + case 4: + for (i=0; i < npages; i++) + { + if (pages[i].ptr == 0) + break; + mysql_mutex_destroy(&pages[i].lock); + mysql_cond_destroy(&pages[i].cond); + } + /* fall through */ + case 3: + my_free(pages); + /* fall through */ + case 2: + my_munmap((char*)data, (size_t)file_length); + /* fall through */ + case 1: + mysql_file_close(fd, MYF(0)); + } + if (inited>=5) // cannot do in the switch because of Windows + mysql_file_delete(key_file_tclog, logname, MYF(MY_WME)); + if (pending_checkpoint) + my_free(pending_checkpoint); + inited=0; +} + + +int TC_LOG_MMAP::recover() +{ + HASH xids; + PAGE *p=pages, *end_p=pages+npages; + + if (bcmp(data, tc_log_magic, sizeof(tc_log_magic))) + { + sql_print_error("Bad magic header in tc log"); + goto err1; + } + + /* + the first byte after magic signature is set to current + number of storage engines on startup + */ + if (data[sizeof(tc_log_magic)] > total_ha_2pc) + { + sql_print_error("Recovery failed! You must enable " + "all engines that were enabled at the moment of the crash"); + goto err1; + } + + if (my_hash_init(PSI_INSTRUMENT_ME, &xids, &my_charset_bin, + tc_log_page_size/3, 0, sizeof(my_xid), 0, 0, MYF(0))) + goto err1; + + for ( ; p < end_p ; p++) + { + for (my_xid *x=p->start; x < p->end; x++) + if (*x && my_hash_insert(&xids, (uchar *)x)) + goto err2; // OOM + } + + if (ha_recover(&xids)) + goto err2; + + my_hash_free(&xids); + bzero(data, (size_t)file_length); + return 0; + +err2: + my_hash_free(&xids); +err1: + sql_print_error("Crash recovery failed. Either correct the problem " + "(if it's, for example, out of memory error) and restart, " + "or delete tc log and start server with " + "--tc-heuristic-recover={commit|rollback}"); + return 1; +} +#endif + +TC_LOG *tc_log; +TC_LOG_DUMMY tc_log_dummy; +TC_LOG_MMAP tc_log_mmap; + +/** + Perform heuristic recovery, if --tc-heuristic-recover was used. + + @note + no matter whether heuristic recovery was successful or not + mysqld must exit. So, return value is the same in both cases. + + @retval + 0 no heuristic recovery was requested + @retval + 1 heuristic recovery was performed +*/ + +int TC_LOG::using_heuristic_recover() +{ + if (!tc_heuristic_recover) + return 0; + + sql_print_information("Heuristic crash recovery mode"); + if (ha_recover(0)) + sql_print_error("Heuristic crash recovery failed"); + sql_print_information("Please restart without --tc-heuristic-recover"); + return 1; +} + +/****** transaction coordinator log for 2pc - binlog() based solution ******/ +#define TC_LOG_BINLOG MYSQL_BIN_LOG + +/** + Truncates the current binlog to specified position. Removes the rest of binlogs + which are present after this binlog file. + + @param truncate_file Holds the binlog name to be truncated + @param truncate_pos Position within binlog from where it needs to + truncated. + + @retval true ok + @retval false error + +*/ +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *file_name, + my_off_t pos, + rpl_gtid *ptr_gtid) +{ + int error= 0; +#ifdef HAVE_REPLICATION + LOG_INFO log_info; + THD *thd= current_thd; + my_off_t index_file_offset= 0; + File file= -1; + MY_STAT s; + my_off_t old_size; + + if ((error= find_log_pos(&log_info, file_name, 1))) + { + sql_print_error("Failed to locate binary log file:%s." + "Error:%d", file_name, error); + goto end; + } + + while (!(error= find_next_log(&log_info, 1))) + { + if (!index_file_offset) + { + index_file_offset= log_info.index_file_start_offset; + if ((error= open_purge_index_file(TRUE))) + { + sql_print_error("Failed to open purge index " + "file:%s. Error:%d", purge_index_file_name, error); + goto end; + } + } + if ((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("Failed to copy %s to purge index" + " file. Error:%d", log_info.log_file_name, error); + goto end; + } + } + + if (error != LOG_INFO_EOF) + { + sql_print_error("Failed to find the next binlog to " + "add to purge index register. Error:%d", error); + goto end; + } + + if (is_inited_purge_index_file()) + { + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + + if ((error= sync_purge_index_file())) + { + sql_print_error("Failed to flush purge index " + "file. Error:%d", error); + goto end; + } + + // Trim index file + error= mysql_file_chsize(index_file.file, index_file_offset, '\n', + MYF(MY_WME)); + if (!error) + error= mysql_file_sync(index_file.file, MYF(MY_WME)); + if (error) + { + sql_print_error("Failed to truncate binlog index " + "file:%s to offset:%llu. Error:%d", index_file_name, + index_file_offset, error); + goto end; + } + + /* Reset data in old index cache */ + if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1))) + { + sql_print_error("Failed to reinit binlog index " + "file. Error:%d", error); + goto end; + } + + /* Read each entry from purge_index_file and delete the file. */ + if ((error= purge_index_entry(thd, NULL, TRUE))) + { + sql_print_error("Failed to process registered " + "files that would be purged."); + goto end; + } + } + + DBUG_ASSERT(pos); + + if ((file= mysql_file_open(key_file_binlog, file_name, + O_RDWR | O_BINARY, MYF(MY_WME))) < 0) + { + error= 1; + sql_print_error("Failed to open binlog file:%s for " + "truncation.", file_name); + goto end; + } + my_stat(file_name, &s, MYF(0)); + old_size= s.st_size; + clear_inuse_flag_when_closing(file); + /* Change binlog file size to truncate_pos */ + error= mysql_file_chsize(file, pos, 0, MYF(MY_WME)); + if (!error) + error= mysql_file_sync(file, MYF(MY_WME)); + if (error) + { + sql_print_error("Failed to truncate the " + "binlog file:%s to size:%llu. Error:%d", + file_name, pos, error); + goto end; + } + else + { + char buf[21]; + longlong10_to_str(ptr_gtid->seq_no, buf, 10); + sql_print_information("Successfully truncated binlog file:%s " + "from previous file size %llu " + "to pos:%llu to remove transactions starting from " + "GTID %u-%u-%s", + file_name, old_size, pos, + ptr_gtid->domain_id, ptr_gtid->server_id, buf); + } + +end: + if (file >= 0) + mysql_file_close(file, MYF(MY_WME)); + + error= error || close_purge_index_file(); +#endif + return error > 0; +} +int TC_LOG_BINLOG::open(const char *opt_name) +{ + int error= 1; + DBUG_ENTER("TC_LOG_BINLOG::open"); + + DBUG_ASSERT(total_ha_2pc > 1); + DBUG_ASSERT(opt_name); + DBUG_ASSERT(opt_name[0]); + + if (!my_b_inited(&index_file)) + { + /* There was a failure to open the index file, can't open the binlog */ + cleanup(); + DBUG_RETURN(1); + } + + if (using_heuristic_recover()) + { + mysql_mutex_lock(&LOCK_log); + /* generate a new binlog to mask a corrupted one */ + open(opt_name, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE); + mysql_mutex_unlock(&LOCK_log); + cleanup(); + DBUG_RETURN(1); + } + + error= do_binlog_recovery(opt_name, true); + binlog_state_recover_done= true; + DBUG_RETURN(error); +} + +/** This is called on shutdown, after ha_panic. */ +void TC_LOG_BINLOG::close() +{ +} + +/* + Do a binlog log_xid() for a group of transactions, linked through + thd->next_commit_ordered. +*/ +int +TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered __attribute__((unused)), + bool need_commit_ordered __attribute__((unused))) +{ + int err; + DBUG_ENTER("TC_LOG_BINLOG::log_and_order"); + + binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data(); + if (!cache_mngr) + { + WSREP_DEBUG("Skipping empty log_xid: %s", thd->query()); + DBUG_RETURN(0); + } + + cache_mngr->using_xa= TRUE; + cache_mngr->xa_xid= xid; + err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid); + + DEBUG_SYNC(thd, "binlog_after_log_and_order"); + + if (err) + DBUG_RETURN(0); + + bool need_unlog= cache_mngr->need_unlog; + /* + The transaction won't need the flag anymore. + Todo/fixme: consider to move the statement into cache_mngr->reset() + relocated to the current or later point. + */ + cache_mngr->need_unlog= false; + /* + If using explicit user XA, we will not have XID. We must still return a + non-zero cookie (as zero cookie signals error). + */ + if (!xid || !need_unlog) + DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error)); + + DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id, + cache_mngr->delayed_error)); +} + +/* + After an XID is logged, we need to hold on to the current binlog file until + it is fully committed in the storage engine. The reason is that crash + recovery only looks at the latest binlog, so we must make sure there are no + outstanding prepared (but not committed) transactions before rotating the + binlog. + + To handle this, we keep a count of outstanding XIDs. This function is used + to increase this count when committing one or more transactions to the + binary log. +*/ +void +TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count) +{ + xid_count_per_binlog *b; + + DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active"); + DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count)); + + mysql_mutex_lock(&LOCK_xid_list); + I_List_iterator it(binlog_xid_count_list); + while ((b= it++)) + { + if (b->binlog_id == binlog_id) + { + b->xid_count += xid_count; + break; + } + } + /* + As we do not delete elements until count reach zero, elements should always + be found. + */ + DBUG_ASSERT(b); + mysql_mutex_unlock(&LOCK_xid_list); + DBUG_VOID_RETURN; +} + +/* + Once an XID is committed, it can no longer be needed during crash recovery, + as it has been durably recorded on disk as "committed". + + This function is called to mark an XID this way. It needs to decrease the + count of pending XIDs in the corresponding binlog. When the count reaches + zero (for an "old" binlog that is not the active one), that binlog file no + longer need to be scanned during crash recovery, so we can log a new binlog + checkpoint. +*/ +void +TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint) +{ + xid_count_per_binlog *b; + bool first; + ulong current; + + DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done"); + + mysql_mutex_lock(&LOCK_xid_list); + current= current_binlog_id; + I_List_iterator it(binlog_xid_count_list); + first= true; + while ((b= it++)) + { + if (b->binlog_id == binlog_id) + { + --b->xid_count; + + DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement + + break; + } + first= false; + } + /* Binlog is always found, as we do not remove until count reaches 0 */ + DBUG_ASSERT(b); + /* + If a RESET MASTER is pending, we are about to remove all log files, and + the RESET MASTER thread is waiting for all pending unlog() calls to + complete while holding LOCK_log. In this case we should not log a binlog + checkpoint event (it would be deleted immediately anyway and we would + deadlock on LOCK_log) but just signal the thread. + */ + if (unlikely(reset_master_pending)) + { + mysql_cond_broadcast(&COND_xid_list); + mysql_mutex_unlock(&LOCK_xid_list); + DBUG_VOID_RETURN; + } + + if (likely(binlog_id == current) || b->xid_count != 0 || !first || + !write_checkpoint) + { + /* No new binlog checkpoint reached yet. */ + mysql_mutex_unlock(&LOCK_xid_list); + DBUG_VOID_RETURN; + } + + /* + Now log a binlog checkpoint for the first binlog file with a non-zero count. + + Note that it is possible (though perhaps unlikely) that when count of + binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may + need to skip several entries before we find the one to log in the binlog + checkpoint event. + + We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that + Binlog_checkpoint_events are logged in order. This simplifies recovery a + bit, as it can just take the last binlog checkpoint in the log, rather + than compare all found against each other to find the one pointing to the + most recent binlog. + + Note also that we need to first release LOCK_xid_list, then acquire + LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while + holding LOCK_xid_list, we might deadlock with other threads that take the + locks in the opposite order. + */ + + ++mark_xid_done_waiting; + mysql_mutex_unlock(&LOCK_xid_list); + mysql_mutex_lock(&LOCK_log); + mysql_mutex_lock(&LOCK_xid_list); + --mark_xid_done_waiting; + mysql_cond_broadcast(&COND_xid_list); + /* We need to reload current_binlog_id due to release/re-take of lock. */ + current= current_binlog_id; + + for (;;) + { + /* Remove initial element(s) with zero count. */ + b= binlog_xid_count_list.head(); + /* + We must not remove all elements in the list - the entry for the current + binlog must be present always. + */ + DBUG_ASSERT(b); + if (b->binlog_id == current || b->xid_count > 0) + break; + WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing " + "xid_list_entry for %s (%lu)", b); + delete binlog_xid_count_list.get(); + } + + mysql_mutex_unlock(&LOCK_xid_list); + write_binlog_checkpoint_event_already_locked(b->binlog_name, + b->binlog_name_len); + mysql_mutex_unlock(&LOCK_log); + DBUG_VOID_RETURN; +} + +int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) +{ + DBUG_ENTER("TC_LOG_BINLOG::unlog"); + if (!xid) + DBUG_RETURN(0); + + if (!BINLOG_COOKIE_IS_DUMMY(cookie)) + mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true); + /* + See comment in trx_group_commit_leader() - if rotate() gave a failure, + we delay the return of error code to here. + */ + DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie)); +} + +static bool write_empty_xa_prepare(THD *thd, binlog_cache_mngr *cache_mngr) +{ + return binlog_commit_flush_xa_prepare(thd, true, cache_mngr); +} + +int TC_LOG_BINLOG::unlog_xa_prepare(THD *thd, bool all) +{ + DBUG_ASSERT(is_preparing_xa(thd)); + + binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data(); + int cookie= 0; + + if (!cache_mngr->need_unlog) + { + Ha_trx_info *ha_info; + uint rw_count= ha_count_rw_all(thd, &ha_info); + bool rc= false; + + /* + This transaction has not been binlogged as indicated by need_unlog. + Such exceptional cases include transactions with no effect to engines, + e.g REPLACE that does not change the dat but still the Engine + transaction branch claims to be rw, and few more. + In all such cases an empty XA-prepare group of events is bin-logged. + */ + if (rw_count > 0) + { + /* an empty XA-prepare event group is logged */ + rc= write_empty_xa_prepare(thd, cache_mngr); // normally gains need_unlog + trans_register_ha(thd, true, binlog_hton, 0); // do it for future commmit + thd->ha_data[binlog_hton->slot].ha_info[1].set_trx_read_write(); + } + if (rw_count == 0 || !cache_mngr->need_unlog) + return rc; + } + + cookie= BINLOG_COOKIE_MAKE(cache_mngr->binlog_id, cache_mngr->delayed_error); + cache_mngr->need_unlog= false; + + return unlog(cookie, 1); +} + + +void +TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie) +{ + xid_count_per_binlog *entry= static_cast(cookie); + bool found_entry= false; + mysql_mutex_lock(&LOCK_binlog_background_thread); + /* count the same notification kind from different engines */ + for (xid_count_per_binlog *link= binlog_background_thread_queue; + link && !found_entry; link= link->next_in_queue) + { + if ((found_entry= (entry == link))) + entry->notify_count++; + } + if (!found_entry) + { + entry->next_in_queue= binlog_background_thread_queue; + binlog_background_thread_queue= entry; + } + mysql_cond_signal(&COND_binlog_background_thread); + mysql_mutex_unlock(&LOCK_binlog_background_thread); +} + +/* + Binlog background thread. + + This thread is used to log binlog checkpoints in the background, rather than + in the context of random storage engine threads that happen to call + commit_checkpoint_notify_ha() and may not like the delays while syncing + binlog to disk or may not be setup with all my_thread_init() and other + necessary stuff. + + In the future, this thread could also be used to do log rotation in the + background, which could eliminate all stalls around binlog rotations. +*/ +pthread_handler_t +binlog_background_thread(void *arg __attribute__((unused))) +{ + bool stop; + MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next; + THD *thd; + my_thread_init(); + DBUG_ENTER("binlog_background_thread"); + + thd= new THD(next_thread_id()); + thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND; + thd->thread_stack= (char*) &thd; /* Set approximate stack start */ + thd->store_globals(); + thd->security_ctx->skip_grants(); + thd->set_command(COM_DAEMON); + THD_count::count--; + + /* + Load the slave replication GTID state from the mysql.gtid_slave_pos + table. + + This is mostly so that we can start our seq_no counter from the highest + seq_no seen by a slave. This way, we have a way to tell if a transaction + logged by ourselves as master is newer or older than a replicated + transaction. + */ +#ifdef HAVE_REPLICATION + if (rpl_load_gtid_slave_state(thd)) + sql_print_warning("Failed to load slave replication state from table " + "%s.%s: %u: %s", "mysql", + rpl_gtid_slave_state_table_name.str, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); +#endif + + mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread); + binlog_background_thread_started= true; + mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end); + mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread); + + for (;;) + { + /* + Wait until there is something in the queue to process, or we are asked + to shut down. + */ + THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks); + mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread); + for (;;) + { + stop= binlog_background_thread_stop; + queue= binlog_background_thread_queue; + if (stop && !mysql_bin_log.is_xidlist_idle()) + { + /* + Delay stop until all pending binlog checkpoints have been processed. + */ + stop= false; + } + if (stop || queue) + break; + mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread, + &mysql_bin_log.LOCK_binlog_background_thread); + } + /* Grab the queue, if any. */ + binlog_background_thread_queue= NULL; + mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread); + + /* Process any incoming commit_checkpoint_notify() calls. */ +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done", + DBUG_ASSERT(!debug_sync_set_action( + thd, + STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done " + "SIGNAL injected_binlog_background_thread " + "WAIT_FOR something_that_will_never_happen " + "TIMEOUT 2"))); + ); +#endif + while (queue) + { + long count= queue->notify_count; + THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify); + DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done"); + /* Set the thread start time */ + thd->set_time(); + /* Grab next pointer first, as mark_xid_done() may free the element. */ + next= queue->next_in_queue; + queue->notify_count= 0; + for (long i= 0; i <= count; i++) + mysql_bin_log.mark_xid_done(queue->binlog_id, true); + queue= next; + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("binlog_background_checkpoint_processed", + DBUG_ASSERT(!debug_sync_set_action( + thd, + STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed"))); + ); +#endif + } + + if (stop) + break; + } + + THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread); + + /* No need to use mutex as thd is not linked into other threads */ + THD_count::count++; + delete thd; + + my_thread_end(); + + /* Signal that we are (almost) stopped. */ + mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread); + binlog_background_thread_stop= false; + mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end); + mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread); + + DBUG_RETURN(0); +} + +#ifdef HAVE_PSI_INTERFACE +static PSI_thread_key key_thread_binlog; + +static PSI_thread_info all_binlog_threads[]= +{ + { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL}, +}; +#endif /* HAVE_PSI_INTERFACE */ + +static bool +start_binlog_background_thread() +{ + pthread_t th; + +#ifdef HAVE_PSI_INTERFACE + if (PSI_server) + PSI_server->register_thread("sql", all_binlog_threads, + array_elements(all_binlog_threads)); +#endif + + if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib, + binlog_background_thread, NULL)) + return 1; + + /* + Wait for the thread to have started (so we know that the slave replication + state is loaded and we have correct global_gtid_counter). + */ + mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread); + while (!binlog_background_thread_started) + mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end, + &mysql_bin_log.LOCK_binlog_background_thread); + mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread); + + return 0; +} +#ifdef HAVE_REPLICATION +class Recovery_context +{ +public: + my_off_t prev_event_pos; + rpl_gtid last_gtid; + bool last_gtid_standalone; + bool last_gtid_valid; + bool last_gtid_no2pc; // true when the group does not end with Xid event + uint last_gtid_engines; + Binlog_offset last_gtid_coord; // + /* + When true, it's semisync slave recovery mode + rolls back transactions in doubt and wipes them off from binlog. + The rest of declarations deal with this type of recovery. + */ + bool do_truncate; + /* + transaction-in-doubt's gtid:s. `truncate_gtid` is the ultimate value, + if it's non-zero truncation is taking place to start from it. + Its value gets refined throughout binlog scanning conducted with at most + 2 rounds. + When an estimate is done in the 1st round of 2-round recovery its value + gets memorized for possible adoption as the ultimate `truncate_gtid`. + */ + rpl_gtid truncate_gtid, truncate_gtid_1st_round; + /* + the last non-transactional group that is located in binlog + behind truncate_gtid. + */ + rpl_gtid binlog_unsafe_gtid; + char binlog_truncate_file_name[FN_REFLEN] ; + char binlog_unsafe_file_name[FN_REFLEN] ; + /* + When do_truncate is true, the truncate position may not be + found in one round when recovered transactions are multi-engine + or just on different engines. + In the single recoverable engine case `truncate_reset_done` and + therefore `truncate_validated` remains `false` when the last + binlog is the binlog-checkpoint one. + The meaning of `truncate_reset_done` is according to the following example: + Let round = 1, Binlog contains the sequence of replication event groups: + [g1, G2, g3] + where `G` (in capital) stands for committed, `g` for prepared. + g1 is first set as truncation candidate, then G2 reset it to indicate + the actual truncation is behind (to the right of) it. + `truncate_validated` is set to true when `binlog_truncate_pos` (as of `g3`) + won't change. + Observe last_gtid_valid is affected, so in the above example `g1` that + was initially ignored for the gtid binlog state now seeing `G2` + would have to be added to it. See gtid_maybe_to_truncate. + */ + bool truncate_validated; // trued when the truncate position settled + bool truncate_reset_done; // trued when the position is to reevaluate + /* Flags the fact of truncate position estimation is done the 1st round */ + bool truncate_set_in_1st; + /* + Monotonically indexes binlog files in the recovery list. + When the list is "likely" singleton the value is UINT_MAX. + Otherwise enumeration starts with zero for the first file, increments + by one for any next file except for the last file in the list, which + is also the initial binlog file for recovery, + that is enumberated with UINT_MAX. + */ + Binlog_file_id id_binlog; + enum_binlog_checksum_alg checksum_alg; + Binlog_offset binlog_truncate_coord, + binlog_truncate_coord_1st_round; // pair is similar to truncate_gtid + Binlog_offset binlog_unsafe_coord; + /* + Populated at decide_or_assess() with gtid-in-doubt whose + binlog offset greater of equal by that of the current gtid truncate + candidate. + Gets empited by reset_truncate_coord into gtid binlog state. + */ + Dynamic_array *gtid_maybe_to_truncate; + Recovery_context(); + ~Recovery_context() { delete gtid_maybe_to_truncate; } + /* + Completes the recovery procedure. + In the normal case prepared xids gets committed when they also found + in binlog, otherwise they are rolled back. + In the semisync slave case the xids that are located in binlog in + a truncated tail get rolled back, otherwise they are committed. + Both decisions are contingent on safety to truncate. + */ + bool complete(MYSQL_BIN_LOG *log, HASH &xids); + + /* + decides on commit of xid passed through member argument. + In the semisync slave case it assigns binlog coordinate to + any xid that remains in-doubt. Decision on them will be + done after binlog scan rounds. + */ + bool decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos); + + /* + Assigns last_gtid and assesses the maximum (in the binlog offset term) + unsafe gtid (group of events). + */ + void process_gtid(int round, Gtid_log_event *gev, LOG_INFO *linfo); + + /* + Compute next action at the end of processing of the current binlog file. + It may increment the round. + When the round turns in the semisync-slave recovery + binlog_id, truncate_validated, truncate_reset_done + gets reset/set for the next round. + Within the 2nd round id_binlog keeps incrementing. + + Passed arguments: + round the current round that *may* be increment here + last_log_name the recovery starting binlog file + binlog_checkpoint_name + binlog checkpoint file + linfo binlog file list struct for next file + log pointer to mysql_bin_log instance + + Returns: 0 when rounds continue, maybe the current one remains + 1 when all rounds are done + */ + int next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, MYSQL_BIN_LOG *log); + /* + Relates to the semisync recovery. + Returns true when truncated tail does not contain non-transactional + group of events. + Otherwise returns false. + */ + bool is_safe_to_truncate() + { + return !do_truncate ? true : + (truncate_gtid.seq_no == 0 || // no truncate + binlog_unsafe_coord < binlog_truncate_coord); // or unsafe is earlier + } + + /* + Relates to the semisync recovery. + Is invoked when a standalone or non-2pc group is detected. + Both are unsafe to truncate in the semisync-slave recovery so + the maximum unsafe coordinate may be updated. + In the non-2pc group case though, *exeptionally*, + the no-engine group is considered safe, to be invalidated + to not contribute to binlog state. + */ + void update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo); + + /* + Relates to the semisync recovery. + Is called when a committed or decided to-commit transaction is detected. + Actions: + truncate_gtid then is set to "nil" as indicated by rpl_gtid::seq_no := 0. + truncate_reset_done takes a note of that fact. + binlog_truncate_coord gets reset to the current gtid offset merely to + "suggest" any potential future truncate gtid must have a greater offset. + gtid_maybe_to_truncate gets emptied into gtid binlog state. + + Returns: + false on success, otherwise + true when OOM at rpl_global_gtid_binlog_state insert + */ + bool reset_truncate_coord(my_off_t pos); + + /* + Sets binlog_truncate_pos to the value of the current transaction's gtid. + In multi-engine case that might be just an assessment to be refined + in the current round and confirmed in a next one. + gtid_maybe_to_truncate receives the current gtid as a new element. + Returns + false on success, otherwise + true when OOM at gtid_maybe_to_truncate append + + */ + bool set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_checksum_alg); +}; + +bool Recovery_context::complete(MYSQL_BIN_LOG *log, HASH &xids) +{ + if (!do_truncate || is_safe_to_truncate()) + { + uint count_in_prepare= + ha_recover_complete(&xids, + !do_truncate ? NULL : + (truncate_gtid.seq_no > 0 ? + &binlog_truncate_coord : &last_gtid_coord)); + + if (count_in_prepare > 0 && global_system_variables.log_warnings > 2) + { + sql_print_warning("Could not complete %u number of transactions.", + count_in_prepare); + return false; // there's later dry run ha_recover() to error out + } + } + + /* Truncation is not done when there's no transaction to roll back */ + if (do_truncate && truncate_gtid.seq_no > 0) + { + if (is_safe_to_truncate()) + { + if (log->truncate_and_remove_binlogs(binlog_truncate_file_name, + binlog_truncate_coord.second, + &truncate_gtid)) + { + sql_print_error("Failed to truncate the binary log to " + "file:%s pos:%llu.", binlog_truncate_file_name, + binlog_truncate_coord.second); + return true; + } + } + else + { + sql_print_error("Cannot truncate the binary log to file:%s " + "pos:%llu as unsafe statement " + "is found at file:%s pos:%llu which is " + "beyond the truncation position;" + "all transactions in doubt are left intact. ", + binlog_truncate_file_name, binlog_truncate_coord.second, + binlog_unsafe_file_name, binlog_unsafe_coord.second); + return true; + } + } + + return false; +} + +Recovery_context::Recovery_context() : + prev_event_pos(0), + last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false), + last_gtid_engines(0), + do_truncate(rpl_semi_sync_slave_enabled), + truncate_validated(false), truncate_reset_done(false), + truncate_set_in_1st(false), id_binlog(MAX_binlog_id), + checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL) +{ + last_gtid_coord= Binlog_offset(0,0); + binlog_truncate_coord= binlog_truncate_coord_1st_round= Binlog_offset(0,0); + binlog_unsafe_coord= Binlog_offset(0,0); + binlog_truncate_file_name[0]= 0; + binlog_unsafe_file_name [0]= 0; + binlog_unsafe_gtid= truncate_gtid= truncate_gtid_1st_round= rpl_gtid(); + if (do_truncate) + gtid_maybe_to_truncate= new Dynamic_array(16, 16); +} + +bool Recovery_context::reset_truncate_coord(my_off_t pos) +{ + DBUG_ASSERT(binlog_truncate_coord.second == 0 || + last_gtid_coord >= binlog_truncate_coord || + truncate_set_in_1st); + // save as backup to restore at next_binlog_or_round when necessary + if (truncate_set_in_1st && truncate_gtid_1st_round.seq_no == 0) + { + truncate_gtid_1st_round= truncate_gtid; + binlog_truncate_coord_1st_round= binlog_truncate_coord; + } + binlog_truncate_coord= Binlog_offset(id_binlog, pos); + truncate_gtid= rpl_gtid(); + truncate_reset_done= true; + for (uint i= 0; i < gtid_maybe_to_truncate->elements(); i++) + { + rpl_gtid gtid= gtid_maybe_to_truncate->at(i); + if (rpl_global_gtid_binlog_state.update_nolock(>id, false)) + return true; + } + gtid_maybe_to_truncate->clear(); + + return false; +} + +bool Recovery_context::set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_checksum) +{ + binlog_truncate_coord= last_gtid_coord; + strmake_buf(binlog_truncate_file_name, linfo->log_file_name); + + truncate_gtid= last_gtid; + checksum_alg= fd_checksum; + truncate_set_in_1st= (round == 1); + + return gtid_maybe_to_truncate->append(last_gtid); +} + +bool Recovery_context::decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos) +{ + if (member) + { + /* + xid in doubt are resolved as follows: + in_engine_prepare is compared agaist binlogged info to + yield the commit-or-rollback decision in the normal case. + In the semisync-slave recovery the decision is done later + after the binlog scanning has determined the truncation offset. + */ + if (member->in_engine_prepare > last_gtid_engines) + { + char buf[21]; + longlong10_to_str(last_gtid.seq_no, buf, 10); + sql_print_error("Error to recovery multi-engine transaction: " + "the number of engines prepared %u exceeds the " + "respective number %u in its GTID %u-%u-%s " + "located at file:%s pos:%llu", + member->in_engine_prepare, last_gtid_engines, + last_gtid.domain_id, last_gtid.server_id, buf, + linfo->log_file_name, last_gtid_coord.second); + return true; + } + else if (member->in_engine_prepare < last_gtid_engines) + { + DBUG_ASSERT(member->in_engine_prepare > 0); + /* + This is an "unlikely" branch of two or more engines in transaction + that is partially committed, so to complete. + */ + member->decided_to_commit= true; + if (do_truncate) + { + /* Validated truncate at this point can be only in the 2nd round. */ + DBUG_ASSERT(!truncate_validated || + (round == 2 && truncate_set_in_1st && + last_gtid_coord < binlog_truncate_coord)); + /* + Estimated truncate must not be greater than the current one's + offset, unless the turn of the rounds. + */ + DBUG_ASSERT(truncate_validated || + (last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st))); + + if (!truncate_validated && reset_truncate_coord(pos)) + return true; + } + } + else // member->in_engine_prepare == last_gtid_engines + { + if (!do_truncate) // "normal" recovery + { + member->decided_to_commit= true; + } + else + { + member->binlog_coord= last_gtid_coord; + last_gtid_valid= false; + /* + First time truncate position estimate before its validation. + An estimate may change to involve reset_truncate_coord call. + */ + if (!truncate_validated) + { + if (truncate_gtid.seq_no == 0 /* was reset or never set */ || + (truncate_set_in_1st && round == 2 /* reevaluted at round turn */)) + { + if (set_truncate_coord(linfo, round, fdle->checksum_alg)) + return true; + } + else + { + /* Truncate estimate was done ago, this gtid can't improve it. */ + DBUG_ASSERT(last_gtid_coord >= binlog_truncate_coord); + + gtid_maybe_to_truncate->append(last_gtid); + } + + DBUG_ASSERT(member->decided_to_commit == false); // may redecided + } + else + { + /* + binlog truncate was determined, possibly to none, otherwise + its offset greater than that of the current gtid. + */ + DBUG_ASSERT(truncate_gtid.seq_no == 0 || + last_gtid_coord < binlog_truncate_coord); + member->decided_to_commit= true; + } + } + } + } + else if (do_truncate) // "0" < last_gtid_engines + { + /* + Similar to the partial commit branch above. + */ + DBUG_ASSERT(!truncate_validated || last_gtid_coord < binlog_truncate_coord); + DBUG_ASSERT(truncate_validated || + (last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st))); + + if (!truncate_validated && reset_truncate_coord(pos)) + return true; + } + + return false; +} + +void Recovery_context::update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo) +{ + if (!do_truncate) + return; + + if (truncate_gtid.seq_no > 0 && // g1,U2, *not* G1,U2 + last_gtid_coord > binlog_truncate_coord) + { + DBUG_ASSERT(binlog_truncate_coord.second > 0); + /* + Potentially unsafe when the truncate coordinate is not determined, + just detected as unsafe when behind the latter. + */ + if (last_gtid_engines == 0) + { + last_gtid_valid= false; + } + else + { + binlog_unsafe_gtid= last_gtid; + binlog_unsafe_coord= last_gtid_coord; + strmake_buf(binlog_unsafe_file_name, linfo->log_file_name); + } + } +} + +void Recovery_context::process_gtid(int round, Gtid_log_event *gev, + LOG_INFO *linfo) +{ + last_gtid.domain_id= gev->domain_id; + last_gtid.server_id= gev->server_id; + last_gtid.seq_no= gev->seq_no; + last_gtid_engines= gev->extra_engines != UCHAR_MAX ? + gev->extra_engines + 1 : 0; + last_gtid_coord= Binlog_offset(id_binlog, prev_event_pos); + + DBUG_ASSERT(!last_gtid_valid); + DBUG_ASSERT(last_gtid.seq_no != 0); + + if (round == 1 || (do_truncate && !truncate_validated)) + { + DBUG_ASSERT(!last_gtid_valid); + + last_gtid_no2pc= false; + last_gtid_standalone= + (gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false; + if (do_truncate && last_gtid_standalone) + update_binlog_unsafe_coord_if_needed(linfo); + /* Update the binlog state with any 'valid' GTID logged after Gtid_list. */ + last_gtid_valid= true; // may flip at Xid when falls to truncate + } +} + +int Recovery_context::next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, + MYSQL_BIN_LOG *log) +{ + if (!strcmp(linfo->log_file_name, last_log_name)) + { + /* Exit the loop now at the end of the current round. */ + DBUG_ASSERT(round <= 2); + + if (do_truncate) + { + truncate_validated= truncate_reset_done; + truncate_reset_done= false; + /* + Restore the 1st round saved estimate if it was not refined in the 2nd. + That can only occur in multiple log files context when the inital file + has a truncation candidate (a `g`) and does not have any commited `G`, + *and* other files (binlog-checkpoint one and so on) do not have any + transaction-in-doubt. + */ + if (truncate_gtid.seq_no == 0 && truncate_set_in_1st) + { + DBUG_ASSERT(truncate_gtid_1st_round.seq_no > 0); + + truncate_gtid= truncate_gtid_1st_round; + binlog_truncate_coord= binlog_truncate_coord_1st_round; + } + } + return 1; + } + else if (round == 1) + { + if (do_truncate) + { + truncate_validated= truncate_reset_done; + if (!truncate_validated) + { + rpl_global_gtid_binlog_state.reset_nolock(); + gtid_maybe_to_truncate->clear(); + } + truncate_reset_done= false; + id_binlog= 0; + } + round++; + } + else if (do_truncate) // binlog looping within round 2 + { + id_binlog++; + + DBUG_ASSERT(id_binlog <= MAX_binlog_id); // the assert is "practical" + } + + DBUG_ASSERT(!do_truncate || id_binlog != MAX_binlog_id || + !strcmp(linfo->log_file_name, binlog_checkpoint_name)); + + return 0; +} +#endif + +/* + Execute recovery of the binary log + + @param do_xa + if true: Collect all Xid events and call ha_recover(). + if false: Collect only Xid events from Query events. This is + used to disable entries in the ddl recovery log that + are found in the binary log (and thus already executed and + logged and thus don't have to be redone). +*/ + +int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, + IO_CACHE *first_log, + Format_description_log_event *fdle, bool do_xa) +{ + Log_event *ev= NULL; + HASH xids, ddl_log_ids; + MEM_ROOT mem_root; + char binlog_checkpoint_name[FN_REFLEN]; + bool binlog_checkpoint_found; + IO_CACHE log; + File file= -1; + const char *errmsg; +#ifdef HAVE_REPLICATION + Recovery_context ctx; +#endif + DBUG_ENTER("TC_LOG_BINLOG::recover"); + /* + The for-loop variable is updated by the following rule set: + Initially set to 1. + After the initial binlog file is processed to identify + the Binlog-checkpoint file it is incremented when the latter file + is different from the initial one. Otherwise the only log has been + fully parsed so the for loop exits. + The 2nd round parses all earlier in binlog index order files + starting from the Binlog-checkpoint file. It ends when the initial + binlog file is reached. + */ + int round; + + if (! fdle->is_valid() || + (my_hash_init(key_memory_binlog_recover_exec, &xids, + &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, + sizeof(my_xid), 0, 0, MYF(0))) || + (my_hash_init(key_memory_binlog_recover_exec, &ddl_log_ids, + &my_charset_bin, 64, 0, + sizeof(my_xid), 0, 0, MYF(0)))) + goto err1; + + init_alloc_root(key_memory_binlog_recover_exec, &mem_root, + TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); + + fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error + + /* finds xids when root is not NULL */ + if (do_xa && ha_recover(&xids, &mem_root)) + goto err1; + + /* + Scan the binlog for XIDs that need to be committed if still in the + prepared stage. + + Start with the latest binlog file, then continue with any other binlog + files if the last found binlog checkpoint indicates it is needed. + */ + + binlog_checkpoint_found= false; + for (round= 1;;) + { + while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log, + fdle, opt_master_verify_checksum)) + && ev->is_valid()) + { + enum Log_event_type typ= ev->get_type_code(); + switch (typ) + { + case XID_EVENT: + if (do_xa) + { + xid_recovery_member *member= + (xid_recovery_member*) + my_hash_search(&xids, (uchar*) &static_cast(ev)->xid, + sizeof(my_xid)); +#ifndef HAVE_REPLICATION + { + if (member) + member->decided_to_commit= true; + } +#else + if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos)) + goto err2; +#endif + } + break; + case QUERY_EVENT: + { + Query_log_event *query_ev= (Query_log_event*) ev; + if (query_ev->xid) + { + DBUG_PRINT("QQ", ("xid: %llu xid")); + DBUG_ASSERT(sizeof(query_ev->xid) == sizeof(my_xid)); + uchar *x= (uchar *) memdup_root(&mem_root, + (uchar*) &query_ev->xid, + sizeof(query_ev->xid)); + if (!x || my_hash_insert(&ddl_log_ids, x)) + goto err2; + } +#ifdef HAVE_REPLICATION + if (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback()) + { + ctx.last_gtid_no2pc= true; + ctx.update_binlog_unsafe_coord_if_needed(linfo); + } +#endif + break; + } + case BINLOG_CHECKPOINT_EVENT: + if (round == 1 && do_xa) + { + size_t dir_len; + Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev; + if (cev->binlog_file_len >= FN_REFLEN) + sql_print_warning("Incorrect binlog checkpoint event with too " + "long file name found."); + else + { + /* + Note that we cannot use make_log_name() here, as we have not yet + initialised MYSQL_BIN_LOG::log_file_name. + */ + dir_len= dirname_length(last_log_name); + strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len), + cev->binlog_file_name, FN_REFLEN - 1 - dir_len); + binlog_checkpoint_found= true; + } + } + break; +#ifdef HAVE_REPLICATION + case GTID_LIST_EVENT: + if (round == 1 || (ctx.do_truncate && ctx.id_binlog == 0)) + { + Gtid_list_log_event *glev= (Gtid_list_log_event *)ev; + + /* Initialise the binlog state from the Gtid_list event. */ + if (rpl_global_gtid_binlog_state.load(glev->list, glev->count)) + goto err2; + } + break; + + case GTID_EVENT: + ctx.process_gtid(round, (Gtid_log_event *)ev, linfo); + break; + + case XA_PREPARE_LOG_EVENT: + ctx.last_gtid_no2pc= true; // TODO: complete MDEV-21469 that removes this block + ctx.update_binlog_unsafe_coord_if_needed(linfo); + break; +#endif + + case START_ENCRYPTION_EVENT: + { + if (fdle->start_decryption((Start_encryption_log_event*) ev)) + goto err2; + } + break; + + default: + /* Nothing. */ + break; + } // end of switch + +#ifdef HAVE_REPLICATION + if (ctx.last_gtid_valid && + ((ctx.last_gtid_standalone && !ev->is_part_of_group(typ)) || + (!ctx.last_gtid_standalone && + (typ == XID_EVENT || ctx.last_gtid_no2pc)))) + { + DBUG_ASSERT(round == 1 || (ctx.do_truncate && !ctx.truncate_validated)); + DBUG_ASSERT(!ctx.last_gtid_no2pc || + (ctx.last_gtid_standalone || + typ == XA_PREPARE_LOG_EVENT || + (LOG_EVENT_IS_QUERY(typ) && + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback())))); + + if (rpl_global_gtid_binlog_state.update_nolock(&ctx.last_gtid, false)) + goto err2; + ctx.last_gtid_valid= false; + } + ctx.prev_event_pos= ev->log_pos; +#endif + delete ev; + ev= NULL; + } // end of while + + /* + If the last binlog checkpoint event points to an older log, we have to + scan all logs from there also, to get all possible XIDs to recover. + + If there was no binlog checkpoint event at all, this means the log was + written by an older version of MariaDB (or MySQL) - these always have an + (implicit) binlog checkpoint event at the start of the last binlog file. + */ + if (round == 1) + { + if (!binlog_checkpoint_found) + break; + DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004", + if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) && + 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name)) + DBUG_SUICIDE(); + ); + if (find_log_pos(linfo, binlog_checkpoint_name, 1)) + { + sql_print_error("Binlog file '%s' not found in binlog index, needed " + "for recovery. Aborting.", binlog_checkpoint_name); + goto err2; + } + } + else + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + /* + NOTE: reading other binlog's FD is necessary for finding out + the checksum status of the respective binlog file. + */ + if (find_next_log(linfo, 1)) + { + sql_print_error("Error reading binlog files during recovery. " + "Aborting."); + goto err2; + } + } + +#ifdef HAVE_REPLICATION + int rc= ctx.next_binlog_or_round(round, last_log_name, + binlog_checkpoint_name, linfo, this); + if (rc == -1) + goto err2; + else if (rc == 1) + break; // all rounds done +#else + if (!strcmp(linfo->log_file_name, last_log_name)) + break; // No more files to do + round++; +#endif + + if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0) + { + sql_print_error("%s", errmsg); + goto err2; + } + fdle->reset_crypto(); + } // end of for + + if (do_xa) + { + if (binlog_checkpoint_found) + { +#ifndef HAVE_REPLICATION + if (ha_recover_complete(&xids)) +#else + if (ctx.complete(this, xids)) +#endif + goto err2; + } + } + if (ddl_log_close_binlogged_events(&ddl_log_ids)) + goto err2; + free_root(&mem_root, MYF(0)); + my_hash_free(&xids); + my_hash_free(&ddl_log_ids); + DBUG_RETURN(0); + +err2: + delete ev; + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + } + free_root(&mem_root, MYF(0)); + my_hash_free(&xids); + my_hash_free(&ddl_log_ids); + +err1: + sql_print_error("Crash recovery failed. Either correct the problem " + "(if it's, for example, out of memory error) and restart, " + "or delete (or rename) binary log and start serverwith " + "--tc-heuristic-recover={commit|rollback}"); + DBUG_RETURN(1); +} + + + +int +MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery) +{ + LOG_INFO log_info; + const char *errmsg; + IO_CACHE log; + File file; + Log_event *ev= 0; + Format_description_log_event fdle(BINLOG_VERSION); + char log_name[FN_REFLEN]; + int error; + + if (unlikely((error= find_log_pos(&log_info, NullS, 1)))) + { + /* + If there are no binlog files (LOG_INFO_EOF), then we still try to read + the .state file to restore the binlog state. This allows to copy a server + to provision a new one without copying the binlog files (except the + master-bin.state file) and still preserve the correct binlog state. + */ + if (error != LOG_INFO_EOF) + sql_print_error("find_log_pos() failed (error: %d)", error); + else + { + error= read_state_from_file(); + if (error == 2) + { + /* + No binlog files and no binlog state is not an error (eg. just initial + server start after fresh installation). + */ + error= 0; + } + } + return error; + } + + if (! fdle.is_valid()) + return 1; + + do + { + strmake_buf(log_name, log_info.log_file_name); + } while (!(error= find_next_log(&log_info, 1))); + + if (error != LOG_INFO_EOF) + { + sql_print_error("find_log_pos() failed (error: %d)", error); + return error; + } + + if ((file= open_binlog(&log, log_name, &errmsg)) < 0) + { + sql_print_error("%s", errmsg); + return 1; + } + + if ((ev= Log_event::read_log_event(&log, &fdle, + opt_master_verify_checksum)) && + ev->get_type_code() == FORMAT_DESCRIPTION_EVENT) + { + if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F) + { + sql_print_information("Recovering after a crash using %s", opt_name); + error= recover(&log_info, log_name, &log, + (Format_description_log_event *)ev, do_xa_recovery); + } + else + { + error= read_state_from_file(); + if (unlikely(error == 2)) + { + /* + The binlog exists, but the .state file is missing. This is normal if + this is the first master start after a major upgrade to 10.0 (with + GTID support). + + However, it could also be that the .state file was lost somehow, and + in this case it could be a serious issue, as we would set the wrong + binlog state in the next binlog file to be created, and GTID + processing would be corrupted. A common way would be copying files + from an old server to a new one and forgetting the .state file. + + So in this case, we want to try to recover the binlog state by + scanning the last binlog file (but we do not need any XA recovery). + + ToDo: We could avoid one scan at first start after major upgrade, by + detecting that there is no GTID_LIST event at the start of the + binlog file, and stopping the scan in that case. + */ + error= recover(&log_info, log_name, &log, + (Format_description_log_event *)ev, false); + } + } + } + + delete ev; + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + + return error; +} + + +#ifdef INNODB_COMPATIBILITY_HOOKS +/* + Get the current position of the MySQL binlog for transaction currently being + committed. + + This is valid to call from within storage engine commit_ordered() and + commit() methods only. + + Since it stores the position inside THD, it is safe to call without any + locking. +*/ +void +mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file) +{ + binlog_cache_mngr *cache_mngr; + if (opt_bin_log && + (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton))) + { + *out_file= cache_mngr->last_commit_pos_file; + *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset); + } + else + { + *out_file= NULL; + *out_pos= 0; + } +} +#endif /* INNODB_COMPATIBILITY_HOOKS */ + + +static void +binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var, + void *var_ptr, const void *save) +{ + ulong value= *((ulong *)save); + bool check_purge= false; + ulong UNINIT_VAR(prev_binlog_id); + + mysql_mutex_lock(mysql_bin_log.get_log_lock()); + if(mysql_bin_log.is_open()) + { + prev_binlog_id= mysql_bin_log.current_binlog_id; + if (binlog_checksum_options != value) + mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value; + if (mysql_bin_log.rotate(true, &check_purge)) + check_purge= false; + } + else + { + binlog_checksum_options= value; + } + DBUG_ASSERT(binlog_checksum_options == value); + mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF; + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + if (check_purge) + mysql_bin_log.checkpoint_and_purge(prev_binlog_id); +} + + +static int show_binlog_vars(THD *thd, SHOW_VAR *var, void *, + system_status_var *status_var, enum_var_type) +{ + mysql_bin_log.set_status_variables(thd); + var->type= SHOW_ARRAY; + var->value= (char *)&binlog_status_vars_detail; + return 0; +} + +static SHOW_VAR binlog_status_vars_top[]= { + SHOW_FUNC_ENTRY("Binlog", &show_binlog_vars), + {NullS, NullS, SHOW_LONG} +}; + +static MYSQL_SYSVAR_BOOL( + optimize_thread_scheduling, + opt_optimize_thread_scheduling, + PLUGIN_VAR_READONLY, + "Run fast part of group commit in a single thread, to optimize kernel " + "thread scheduling. On by default. Disable to run each transaction in group " + "commit in its own thread, which can be slower at very high concurrency. " + "This option is mostly for testing one algorithm versus the other, and it " + "should not normally be necessary to change it.", + NULL, + NULL, + 1); + +static MYSQL_SYSVAR_ENUM( + checksum, + binlog_checksum_options, + PLUGIN_VAR_RQCMDARG, + "Type of BINLOG_CHECKSUM_ALG. Include checksum for " + "log events in the binary log", + NULL, + binlog_checksum_update, + BINLOG_CHECKSUM_ALG_CRC32, + &binlog_checksum_typelib); + +static struct st_mysql_sys_var *binlog_sys_vars[]= +{ + MYSQL_SYSVAR(optimize_thread_scheduling), + MYSQL_SYSVAR(checksum), + NULL +}; + + +/* + Copy out the non-directory part of binlog position filename for the + `binlog_snapshot_file' status variable, same way as it is done for + SHOW BINLOG STATUS. +*/ +static void +set_binlog_snapshot_file(const char *src) +{ + size_t dir_len = dirname_length(src); + strmake_buf(binlog_snapshot_file, src + dir_len); +} + +/* + Copy out current values of status variables, for SHOW STATUS or + information_schema.global_status. + + This is called only under LOCK_all_status_vars, so we can fill in a static array. +*/ +void +TC_LOG_BINLOG::set_status_variables(THD *thd) +{ + binlog_cache_mngr *cache_mngr; + + if (thd && opt_bin_log) + cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + else + cache_mngr= 0; + + bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0); + mysql_mutex_lock(&LOCK_commit_ordered); + binlog_status_var_num_commits= this->num_commits; + binlog_status_var_num_group_commits= this->num_group_commits; + if (!have_snapshot) + { + set_binlog_snapshot_file(last_commit_pos_file); + binlog_snapshot_position= last_commit_pos_offset; + } + mysql_mutex_unlock(&LOCK_commit_ordered); + mysql_mutex_lock(&LOCK_prepare_ordered); + binlog_status_group_commit_trigger_count= this->group_commit_trigger_count; + binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout; + binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait; + mysql_mutex_unlock(&LOCK_prepare_ordered); + + if (have_snapshot) + { + set_binlog_snapshot_file(cache_mngr->last_commit_pos_file); + binlog_snapshot_position= cache_mngr->last_commit_pos_offset; + } +} + + +/* + Find the Gtid_list_log_event at the start of a binlog. + + NULL for ok, non-NULL error message for error. + + If ok, then the event is returned in *out_gtid_list. This can be NULL if we + get back to binlogs written by old server version without GTID support. If + so, it means we have reached the point to start from, as no GTID events can + exist in earlier binlogs. +*/ +const char * +get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list) +{ + Format_description_log_event init_fdle(BINLOG_VERSION); + Format_description_log_event *fdle; + Log_event *ev; + const char *errormsg = NULL; + + *out_gtid_list= NULL; + + if (!(ev= Log_event::read_log_event(cache, &init_fdle, + opt_master_verify_checksum)) || + ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) + { + if (ev) + delete ev; + return "Could not read format description log event while looking for " + "GTID position in binlog"; + } + + fdle= static_cast(ev); + + for (;;) + { + Log_event_type typ; + + ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum); + if (!ev) + { + errormsg= "Could not read GTID list event while looking for GTID " + "position in binlog"; + break; + } + typ= ev->get_type_code(); + if (typ == GTID_LIST_EVENT) + break; /* Done, found it */ + if (typ == START_ENCRYPTION_EVENT) + { + if (fdle->start_decryption((Start_encryption_log_event*) ev)) + { + errormsg= "Could not set up decryption for binlog."; + typ= UNKNOWN_EVENT; // to cleanup and abort below + } + } + delete ev; + if (typ == ROTATE_EVENT || typ == STOP_EVENT || + typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT) + continue; /* Continue looking */ + + /* We did not find any Gtid_list_log_event, must be old binlog. */ + ev= NULL; + break; + } + + delete fdle; + *out_gtid_list= static_cast(ev); + return errormsg; +} + + +struct st_mysql_storage_engine binlog_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +maria_declare_plugin(binlog) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &binlog_storage_engine, + "binlog", + "MySQL AB", + "This is a pseudo storage engine to represent the binlog in a transaction", + PLUGIN_LICENSE_GPL, + binlog_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + binlog_status_vars_top, /* status variables */ + binlog_sys_vars, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +} +maria_declare_plugin_end; + +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" + +IO_CACHE *wsrep_get_cache(THD * thd, bool is_transactional) +{ + DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF); + binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) + thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + return cache_mngr->get_binlog_cache_log(is_transactional); + + WSREP_DEBUG("binlog cache not initialized, conn: %llu", + thd->thread_id); + return NULL; +} + +void wsrep_thd_binlog_trx_reset(THD * thd) +{ + DBUG_ENTER("wsrep_thd_binlog_trx_reset"); + WSREP_DEBUG("wsrep_thd_binlog_reset"); + /* + todo: fix autocommit select to not call the caller + */ + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + { + cache_mngr->reset(false, true); + if (!cache_mngr->stmt_cache.empty()) + { + WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query()); + cache_mngr->stmt_cache.reset(); + } + } + thd->reset_binlog_for_next_statement(); + DBUG_VOID_RETURN; +} + +void wsrep_thd_binlog_stmt_rollback(THD * thd) +{ + DBUG_ENTER("wsrep_thd_binlog_stmt_rollback"); + WSREP_DEBUG("wsrep_thd_binlog_stmt_rollback"); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + { + thd->binlog_remove_pending_rows_event(TRUE, TRUE); + cache_mngr->stmt_cache.reset(); + } + DBUG_VOID_RETURN; +} + +void wsrep_register_binlog_handler(THD *thd, bool trx) +{ + DBUG_ENTER("register_binlog_handler"); + /* + If this is the first call to this function while processing a statement, + the transactional cache does not have a savepoint defined. So, in what + follows: + . an implicit savepoint is defined; + . callbacks are registered; + . binary log is set as read/write. + + The savepoint allows for truncating the trx-cache transactional changes + fail. Callbacks are necessary to flush caches upon committing or rolling + back a statement or a transaction. However, notifications do not happen + if the binary log is set as read/write. + */ + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + /* cache_mngr may be missing e.g. in mtr test ev51914.test */ + if (cache_mngr) + { + /* + Set an implicit savepoint in order to be able to truncate a trx-cache. + */ + if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF) + { + my_off_t pos= 0; + binlog_trans_log_savepos(thd, &pos); + cache_mngr->trx_cache.set_prev_position(pos); + } + + /* + Set callbacks in order to be able to call commmit or rollback. + */ + if (trx) + trans_register_ha(thd, TRUE, binlog_hton, 0); + trans_register_ha(thd, FALSE, binlog_hton, 0); + + /* + Set the binary log as read/write otherwise callbacks are not called. + */ + thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write(); + } + DBUG_VOID_RETURN; +} + +#endif /* WITH_WSREP */ diff --git a/sql/log.h b/sql/log.h new file mode 100644 index 00000000..c20f0fe5 --- /dev/null +++ b/sql/log.h @@ -0,0 +1,1271 @@ +/* Copyright (c) 2005, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LOG_H +#define LOG_H + +#include "handler.h" /* my_xid */ +#include "rpl_constants.h" + +class Relay_log_info; + +class Format_description_log_event; + +bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream); +void setup_log_handling(); +bool trans_has_updated_trans_table(const THD* thd); +bool stmt_has_updated_trans_table(const THD *thd); +bool use_trans_cache(const THD* thd, bool is_transactional); +bool ending_trans(THD* thd, const bool all); +bool ending_single_stmt_trans(THD* thd, const bool all); +bool trans_has_updated_non_trans_table(const THD* thd); +bool stmt_has_updated_non_trans_table(const THD* thd); + +/* + Transaction Coordinator log - a base abstract class + for two different implementations +*/ +class TC_LOG +{ + public: + int using_heuristic_recover(); + TC_LOG() = default; + virtual ~TC_LOG() = default; + + virtual int open(const char *opt_name)=0; + virtual void close()=0; + /* + Transaction coordinator 2-phase commit. + + Must invoke the run_prepare_ordered and run_commit_ordered methods, as + described below for these methods. + + In addition, must invoke THD::wait_for_prior_commit(), or equivalent + wait, to ensure that one commit waits for another if registered to do so. + */ + virtual int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, + bool need_commit_ordered) = 0; + virtual int unlog(ulong cookie, my_xid xid)=0; + virtual int unlog_xa_prepare(THD *thd, bool all)= 0; + virtual void commit_checkpoint_notify(void *cookie)= 0; + +protected: + /* + These methods are meant to be invoked from log_and_order() implementations + to run any prepare_ordered() respectively commit_ordered() methods in + participating handlers. + + They must be called using suitable thread syncronisation to ensure that + they are each called in the correct commit order among all + transactions. However, it is only necessary to call them if the + corresponding flag passed to log_and_order is set (it is safe, but not + required, to call them when the flag is false). + + The caller must be holding LOCK_prepare_ordered respectively + LOCK_commit_ordered when calling these methods. + */ + void run_prepare_ordered(THD *thd, bool all); + void run_commit_ordered(THD *thd, bool all); +}; + +/* + Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered() + and TC_LOG::run_commit_ordered(), or any other code that calls handler + prepare_ordered() or commit_ordered() methods. +*/ +extern mysql_mutex_t LOCK_prepare_ordered; +extern mysql_cond_t COND_prepare_ordered; +extern mysql_mutex_t LOCK_after_binlog_sync; +extern mysql_mutex_t LOCK_commit_ordered; +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered; +extern PSI_mutex_key key_LOCK_after_binlog_sync; +extern PSI_cond_key key_COND_prepare_ordered; +#endif + +class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging +{ +public: + TC_LOG_DUMMY() = default; + int open(const char *opt_name) { return 0; } + void close() { } + /* + TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we + only use internal XA during commit when >= 2 XA-capable engines + participate. + */ + int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, bool need_commit_ordered) + { + DBUG_ASSERT(0); + return 1; + } + int unlog(ulong cookie, my_xid xid) { return 0; } + int unlog_xa_prepare(THD *thd, bool all) + { + return 0; + } + void commit_checkpoint_notify(void *cookie) { DBUG_ASSERT(0); }; +}; + +#define TC_LOG_PAGE_SIZE 8192 + +#ifdef HAVE_MMAP +class TC_LOG_MMAP: public TC_LOG +{ + public: // only to keep Sun Forte on sol9x86 happy + typedef enum { + PS_POOL, // page is in pool + PS_ERROR, // last sync failed + PS_DIRTY // new xids added since last sync + } PAGE_STATE; + + struct pending_cookies { + uint count; + uint pending_count; + ulong cookies[1]; + }; + + private: + typedef struct st_page { + struct st_page *next; // page a linked in a fifo queue + my_xid *start, *end; // usable area of a page + my_xid *ptr; // next xid will be written here + int size, free; // max and current number of free xid slots on the page + int waiters; // number of waiters on condition + PAGE_STATE state; // see above + mysql_mutex_t lock; // to access page data or control structure + mysql_cond_t cond; // to wait for a sync + } PAGE; + + /* List of THDs for which to invoke commit_ordered(), in order. */ + struct commit_entry + { + struct commit_entry *next; + THD *thd; + }; + + char logname[FN_REFLEN]; + File fd; + my_off_t file_length; + uint npages, inited; + uchar *data; + struct st_page *pages, *syncing, *active, *pool, **pool_last_ptr; + /* + note that, e.g. LOCK_active is only used to protect + 'active' pointer, to protect the content of the active page + one has to use active->lock. + Same for LOCK_pool and LOCK_sync + */ + mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync, LOCK_pending_checkpoint; + mysql_cond_t COND_pool, COND_active; + /* + Queue of threads that need to call commit_ordered(). + Access to this queue must be protected by LOCK_prepare_ordered. + */ + commit_entry *commit_ordered_queue; + /* + This flag and condition is used to reserve the queue while threads in it + each run the commit_ordered() methods one after the other. Only once the + last commit_ordered() in the queue is done can we start on a new queue + run. + + Since we start this process in the first thread in the queue and finish in + the last (and possibly different) thread, we need a condition variable for + this (we cannot unlock a mutex in a different thread than the one who + locked it). + + The condition is used together with the LOCK_prepare_ordered mutex. + */ + mysql_cond_t COND_queue_busy; + my_bool commit_ordered_queue_busy; + pending_cookies* pending_checkpoint; + + public: + TC_LOG_MMAP(): inited(0), pending_checkpoint(0) {} + int open(const char *opt_name); + void close(); + int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, bool need_commit_ordered); + int unlog(ulong cookie, my_xid xid); + int unlog_xa_prepare(THD *thd, bool all) + { + return 0; + } + void commit_checkpoint_notify(void *cookie); + int recover(); + + private: + int log_one_transaction(my_xid xid); + void get_active_from_pool(); + int sync(); + int overflow(); + int delete_entry(ulong cookie); +}; +#else +#define TC_LOG_MMAP TC_LOG_DUMMY +#endif + +extern TC_LOG *tc_log; +extern TC_LOG_MMAP tc_log_mmap; +extern TC_LOG_DUMMY tc_log_dummy; + +/* log info errors */ +#define LOG_INFO_EOF -1 +#define LOG_INFO_IO -2 +#define LOG_INFO_INVALID -3 +#define LOG_INFO_SEEK -4 +#define LOG_INFO_MEM -6 +#define LOG_INFO_FATAL -7 +#define LOG_INFO_IN_USE -8 +#define LOG_INFO_EMFILE -9 + + +/* bitmap to SQL_LOG::close() */ +#define LOG_CLOSE_INDEX 1 +#define LOG_CLOSE_TO_BE_OPENED 2 +#define LOG_CLOSE_STOP_EVENT 4 +#define LOG_CLOSE_DELAYED_CLOSE 8 + +/* + Maximum unique log filename extension. + Note: setting to 0x7FFFFFFF due to atol windows + overflow/truncate. + */ +#define MAX_LOG_UNIQUE_FN_EXT 0x7FFFFFFF + +/* + Number of warnings that will be printed to error log + before extension number is exhausted. +*/ +#define LOG_WARN_UNIQUE_FN_EXT_LEFT 1000 + +class Relay_log_info; + +/* + Note that we destroy the lock mutex in the desctructor here. + This means that object instances cannot be destroyed/go out of scope, + until we have reset thd->current_linfo to NULL; + */ +typedef struct st_log_info +{ + char log_file_name[FN_REFLEN]; + my_off_t index_file_offset, index_file_start_offset; + my_off_t pos; + bool fatal; // if the purge happens to give us a negative offset + st_log_info() : index_file_offset(0), index_file_start_offset(0), + pos(0), fatal(0) + { + DBUG_ENTER("LOG_INFO"); + log_file_name[0] = '\0'; + DBUG_VOID_RETURN; + } +} LOG_INFO; + +/* + Currently we have only 3 kinds of logging functions: old-fashioned + logs, stdout and csv logging routines. +*/ +#define MAX_LOG_HANDLERS_NUM 3 + +/* log event handler flags */ +#define LOG_NONE 1U +#define LOG_FILE 2U +#define LOG_TABLE 4U + +class Log_event; +class Rows_log_event; + +enum enum_log_type { LOG_UNKNOWN, LOG_NORMAL, LOG_BIN }; +enum enum_log_state { LOG_OPENED, LOG_CLOSED, LOG_TO_BE_OPENED }; + +/* + Use larger buffers when reading from and to binary log + We make it one step smaller than 64K to account for malloc overhead. +*/ +#define LOG_BIN_IO_SIZE MY_ALIGN_DOWN(65536-1, IO_SIZE) + +/* + TODO use mmap instead of IO_CACHE for binlog + (mmap+fsync is two times faster than write+fsync) +*/ + +class MYSQL_LOG +{ +public: + MYSQL_LOG(); + virtual ~MYSQL_LOG() = default; + void init_pthread_objects(); + void cleanup(); + bool open( +#ifdef HAVE_PSI_INTERFACE + PSI_file_key log_file_key, +#endif + const char *log_name, + enum_log_type log_type, + const char *new_name, ulong next_file_number, + enum cache_type io_cache_type_arg); + void close(uint exiting); + inline bool is_open() { return log_state != LOG_CLOSED; } + const char *generate_name(const char *log_name, + const char *suffix, + bool strip_ext, char *buff); + virtual int generate_new_name(char *new_name, const char *log_name, + ulong next_log_number); + protected: + /* LOCK_log is inited by init_pthread_objects() */ + mysql_mutex_t LOCK_log; + char *name; + char log_file_name[FN_REFLEN]; + char time_buff[20], db[NAME_LEN + 1]; + bool write_error, inited; + IO_CACHE log_file; + enum_log_type log_type; + volatile enum_log_state log_state; + enum cache_type io_cache_type; + friend class Log_event; +#ifdef HAVE_PSI_INTERFACE + /** Instrumentation key to use for file io in @c log_file */ + PSI_file_key m_log_file_key; +#endif + + bool init_and_set_log_file_name(const char *log_name, + const char *new_name, + ulong next_log_number, + enum_log_type log_type_arg, + enum cache_type io_cache_type_arg); +}; + +/* Tell the io thread if we can delay the master info sync. */ +#define SEMI_SYNC_SLAVE_DELAY_SYNC 1 +/* Tell the io thread if the current event needs a ack. */ +#define SEMI_SYNC_NEED_ACK 2 + +class MYSQL_QUERY_LOG: public MYSQL_LOG +{ +public: + MYSQL_QUERY_LOG() : last_time(0) {} + void reopen_file(); + bool write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len); + bool write(THD *thd, time_t current_time, + const char *user_host, size_t user_host_len, + ulonglong query_utime, ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len); + bool open_slow_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open( +#ifdef HAVE_PSI_INTERFACE + key_file_slow_log, +#endif + generate_name(log_name, "-slow.log", 0, buf), + LOG_NORMAL, 0, 0, WRITE_CACHE); + } + bool open_query_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open( +#ifdef HAVE_PSI_INTERFACE + key_file_query_log, +#endif + generate_name(log_name, ".log", 0, buf), + LOG_NORMAL, 0, 0, WRITE_CACHE); + } + +private: + time_t last_time; +}; + +/* + We assign each binlog file an internal ID, used to identify them for unlog(). + The IDs start from 0 and increment for each new binlog created. + + In unlog() we need to know the ID of the binlog file that the corresponding + transaction was written into. We also need a special value for a corner + case where there is no corresponding binlog id (since nothing was logged). + And we need an error flag to mark that unlog() must return failure. + + We use the following macros to pack all of this information into the single + ulong available with log_and_order() / unlog(). + + Note that we cannot use the value 0 for cookie, as that is reserved as error + return value from log_and_order(). + */ +#define BINLOG_COOKIE_ERROR_RETURN 0 +#define BINLOG_COOKIE_DUMMY_ID 1 +#define BINLOG_COOKIE_BASE 2 +#define BINLOG_COOKIE_DUMMY(error_flag) \ + ( (BINLOG_COOKIE_DUMMY_ID<<1) | ((error_flag)&1) ) +#define BINLOG_COOKIE_MAKE(id, error_flag) \ + ( (((id)+BINLOG_COOKIE_BASE)<<1) | ((error_flag)&1) ) +#define BINLOG_COOKIE_GET_ERROR_FLAG(c) ((c) & 1) +#define BINLOG_COOKIE_GET_ID(c) ( ((ulong)(c)>>1) - BINLOG_COOKIE_BASE ) +#define BINLOG_COOKIE_IS_DUMMY(c) \ + ( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID ) + +class binlog_cache_mngr; +class binlog_cache_data; +struct rpl_gtid; +struct wait_for_commit; + +class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG +{ + /** The instrumentation key to use for @ LOCK_index. */ + PSI_mutex_key m_key_LOCK_index; + /** The instrumentation key to use for @ COND_relay_log_updated */ + PSI_cond_key m_key_relay_log_update; + /** The instrumentation key to use for @ COND_bin_log_updated */ + PSI_cond_key m_key_bin_log_update; + /** The instrumentation key to use for opening the log file. */ + PSI_file_key m_key_file_log, m_key_file_log_cache; + /** The instrumentation key to use for opening the log index file. */ + PSI_file_key m_key_file_log_index, m_key_file_log_index_cache; + + PSI_cond_key m_key_COND_queue_busy; + /** The instrumentation key to use for LOCK_binlog_end_pos. */ + PSI_mutex_key m_key_LOCK_binlog_end_pos; + + struct group_commit_entry + { + struct group_commit_entry *next; + THD *thd; + binlog_cache_mngr *cache_mngr; + bool using_stmt_cache; + bool using_trx_cache; + /* + Extra events (COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be + written during group commit. The incident_event is only valid if + trx_data->has_incident() is true. + */ + Log_event *end_event; + Log_event *incident_event; + /* Set during group commit to record any per-thread error. */ + int error; + int commit_errno; + IO_CACHE *error_cache; + /* This is the `all' parameter for ha_commit_ordered(). */ + bool all; + /* + True if we need to increment xid_count in trx_group_commit_leader() and + decrement in unlog() (this is needed if there is a participating engine + that does not implement the commit_checkpoint_request() handlerton + method). + */ + bool need_unlog; + /* + Fields used to pass the necessary information to the last thread in a + group commit, only used when opt_optimize_thread_scheduling is not set. + */ + bool check_purge; + /* Flag used to optimise around wait_for_prior_commit. */ + bool queued_by_other; + ulong binlog_id; + bool ro_1pc; // passes the binlog_cache_mngr::ro_1pc value to Gtid ctor + }; + + /* + When this is set, a RESET MASTER is in progress. + + Then we should not write any binlog checkpoints into the binlog (that + could result in deadlock on LOCK_log, and we will delete all binlog files + anyway). Instead we should signal COND_xid_list whenever a new binlog + checkpoint arrives - when all have arrived, RESET MASTER will complete. + */ + uint reset_master_pending; + ulong mark_xid_done_waiting; + + /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ + mysql_mutex_t LOCK_index; + mysql_mutex_t LOCK_binlog_end_pos; + mysql_mutex_t LOCK_xid_list; + mysql_cond_t COND_xid_list; + mysql_cond_t COND_relay_log_updated, COND_bin_log_updated; + ulonglong bytes_written; + IO_CACHE index_file; + char index_file_name[FN_REFLEN]; + /* + purge_file is a temp file used in purge_logs so that the index file + can be updated before deleting files from disk, yielding better crash + recovery. It is created on demand the first time purge_logs is called + and then reused for subsequent calls. It is cleaned up in cleanup(). + */ + IO_CACHE purge_index_file; + char purge_index_file_name[FN_REFLEN]; + /* + The max size before rotation (usable only if log_type == LOG_BIN: binary + logs and relay logs). + For a binlog, max_size should be max_binlog_size. + max_size is set in init(), and dynamically changed (when one does SET + GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) from sys_vars.cc + */ + ulong max_size; + /* + Number generated by last call of find_uniq_filename(). Corresponds + closely with current_binlog_id + */ + ulong last_used_log_number; + // current file sequence number for load data infile binary logging + uint file_id; + uint open_count; // For replication + int readers_count; + /* Queue of transactions queued up to participate in group commit. */ + group_commit_entry *group_commit_queue; + /* + Condition variable to mark that the group commit queue is busy. + Used when each thread does it's own commit_ordered() (when + binlog_optimize_thread_scheduling=1). + Used with the LOCK_commit_ordered mutex. + */ + my_bool group_commit_queue_busy; + mysql_cond_t COND_queue_busy; + /* Total number of committed transactions. */ + ulonglong num_commits; + /* Number of group commits done. */ + ulonglong num_group_commits; + /* The reason why the group commit was grouped */ + ulonglong group_commit_trigger_count, group_commit_trigger_timeout; + ulonglong group_commit_trigger_lock_wait; + + /* binlog encryption data */ + struct Binlog_crypt_data crypto; + + /* pointer to the sync period variable, for binlog this will be + sync_binlog_period, for relay log this will be + sync_relay_log_period + */ + uint *sync_period_ptr; + uint sync_counter; + bool state_file_deleted; + bool binlog_state_recover_done; + + inline uint get_sync_period() + { + return *sync_period_ptr; + } + + int write_to_file(IO_CACHE *cache); + /* + This is used to start writing to a new log file. The difference from + new_file() is locking. new_file_without_locking() does not acquire + LOCK_log. + */ + int new_file_without_locking(); + int new_file_impl(); + void do_checkpoint_request(ulong binlog_id); + void purge(); + int write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id); + int queue_for_group_commit(group_commit_entry *entry); + bool write_transaction_to_binlog_events(group_commit_entry *entry); + void trx_group_commit_leader(group_commit_entry *leader); + bool is_xidlist_idle_nolock(); +public: + /* + A list of struct xid_count_per_binlog is used to keep track of how many + XIDs are in prepared, but not committed, state in each binlog. And how + many commit_checkpoint_request()'s are pending. + + When count drops to zero in a binlog after rotation, it means that there + are no more XIDs in prepared state, so that binlog is no longer needed + for XA crash recovery, and we can log a new binlog checkpoint event. + + The list is protected against simultaneous access from multiple + threads by LOCK_xid_list. + */ + struct xid_count_per_binlog : public ilink { + char *binlog_name; + uint binlog_name_len; + ulong binlog_id; + /* Total prepared XIDs and pending checkpoint requests in this binlog. */ + long xid_count; + long notify_count; + /* For linking in requests to the binlog background thread. */ + xid_count_per_binlog *next_in_queue; + xid_count_per_binlog(char *log_file_name, uint log_file_name_len) + :binlog_id(0), xid_count(0), notify_count(0) + { + binlog_name_len= log_file_name_len; + binlog_name= (char *) my_malloc(PSI_INSTRUMENT_ME, binlog_name_len, MYF(MY_ZEROFILL)); + if (binlog_name) + memcpy(binlog_name, log_file_name, binlog_name_len); + } + ~xid_count_per_binlog() + { + my_free(binlog_name); + } + }; + I_List binlog_xid_count_list; + mysql_mutex_t LOCK_binlog_background_thread; + mysql_cond_t COND_binlog_background_thread; + mysql_cond_t COND_binlog_background_thread_end; + + void stop_background_thread(); + + using MYSQL_LOG::generate_name; + using MYSQL_LOG::is_open; + + /* This is relay log */ + bool is_relay_log; + ulong relay_signal_cnt; // update of the counter is checked by heartbeat + enum enum_binlog_checksum_alg checksum_alg_reset; // to contain a new value when binlog is rotated + /* + Holds the last seen in Relay-Log FD's checksum alg value. + The initial value comes from the slave's local FD that heads + the very first Relay-Log file. In the following the value may change + with each received master's FD_m. + Besides to be used in verification events that IO thread receives + (except the 1st fake Rotate, see @c Master_info:: checksum_alg_before_fd), + the value specifies if/how to compute checksum for slave's local events + and the first fake Rotate (R_f^1) coming from the master. + R_f^1 needs logging checksum-compatibly with the RL's heading FD_s. + + Legends for the checksum related comments: + + FD - Format-Description event, + R - Rotate event + R_f - the fake Rotate event + E - an arbirary event + + The underscore indexes for any event + `_s' indicates the event is generated by Slave + `_m' - by Master + + Two special underscore indexes of FD: + FD_q - Format Description event for queuing (relay-logging) + FD_e - Format Description event for executing (relay-logging) + + Upper indexes: + E^n - n:th event is a sequence + + RL - Relay Log + (A) - checksum algorithm descriptor value + FD.(A) - the value of (A) in FD + */ + enum enum_binlog_checksum_alg relay_log_checksum_alg; + /* + These describe the log's format. This is used only for relay logs. + _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's + necessary to have 2 distinct objects, because the I/O thread may be reading + events in a different format from what the SQL thread is reading (consider + the case of a master which has been upgraded from 5.0 to 5.1 without doing + RESET MASTER, or from 4.x to 5.0). + */ + Format_description_log_event *description_event_for_exec, + *description_event_for_queue; + /* + Binlog position of last commit (or non-transactional write) to the binlog. + Access to this is protected by LOCK_commit_ordered. + */ + char last_commit_pos_file[FN_REFLEN]; + my_off_t last_commit_pos_offset; + ulong current_binlog_id; + + /* + Tracks the number of times that the master has been reset + */ + Atomic_counter reset_master_count; + + MYSQL_BIN_LOG(uint *sync_period); + /* + note that there's no destructor ~MYSQL_BIN_LOG() ! + The reason is that we don't want it to be automatically called + on exit() - but only during the correct shutdown process + */ + +#ifdef HAVE_PSI_INTERFACE + void set_psi_keys(PSI_mutex_key key_LOCK_index, + PSI_cond_key key_relay_log_update, + PSI_cond_key key_bin_log_update, + PSI_file_key key_file_log, + PSI_file_key key_file_log_cache, + PSI_file_key key_file_log_index, + PSI_file_key key_file_log_index_cache, + PSI_cond_key key_COND_queue_busy, + PSI_mutex_key key_LOCK_binlog_end_pos) + { + m_key_LOCK_index= key_LOCK_index; + m_key_relay_log_update= key_relay_log_update; + m_key_bin_log_update= key_bin_log_update; + m_key_file_log= key_file_log; + m_key_file_log_cache= key_file_log_cache; + m_key_file_log_index= key_file_log_index; + m_key_file_log_index_cache= key_file_log_index_cache; + m_key_COND_queue_busy= key_COND_queue_busy; + m_key_LOCK_binlog_end_pos= key_LOCK_binlog_end_pos; + } +#endif + + int open(const char *opt_name); + void close(); + virtual int generate_new_name(char *new_name, const char *log_name, + ulong next_log_number); + int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, bool need_commit_ordered); + int unlog(ulong cookie, my_xid xid); + int unlog_xa_prepare(THD *thd, bool all); + void commit_checkpoint_notify(void *cookie); + int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log, + Format_description_log_event *fdle, bool do_xa); + int do_binlog_recovery(const char *opt_name, bool do_xa_recovery); +#if !defined(MYSQL_CLIENT) + + int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event, + bool is_transactional); + int remove_pending_rows_event(THD *thd, bool is_transactional); + +#endif /* !defined(MYSQL_CLIENT) */ + void reset_bytes_written() + { + bytes_written = 0; + } + void harvest_bytes_written(Atomic_counter *counter) + { +#ifdef DBUG_TRACE + char buf1[22],buf2[22]; +#endif + DBUG_ENTER("harvest_bytes_written"); + (*counter)+=bytes_written; + DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), + llstr(bytes_written,buf2))); + bytes_written=0; + DBUG_VOID_RETURN; + } + void set_max_size(ulong max_size_arg); + + /* Handle signaling that relay has been updated */ + void signal_relay_log_update() + { + mysql_mutex_assert_owner(&LOCK_log); + DBUG_ASSERT(is_relay_log); + DBUG_ENTER("MYSQL_BIN_LOG::signal_relay_log_update"); + relay_signal_cnt++; + mysql_cond_broadcast(&COND_relay_log_updated); + DBUG_VOID_RETURN; + } + void signal_bin_log_update() + { + mysql_mutex_assert_owner(&LOCK_binlog_end_pos); + DBUG_ASSERT(!is_relay_log); + DBUG_ENTER("MYSQL_BIN_LOG::signal_bin_log_update"); + mysql_cond_broadcast(&COND_bin_log_updated); + DBUG_VOID_RETURN; + } + void update_binlog_end_pos() + { + if (is_relay_log) + signal_relay_log_update(); + else + { + lock_binlog_end_pos(); + binlog_end_pos= my_b_safe_tell(&log_file); + signal_bin_log_update(); + unlock_binlog_end_pos(); + } + } + void update_binlog_end_pos(my_off_t pos) + { + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_not_owner(&LOCK_binlog_end_pos); + lock_binlog_end_pos(); + /* + Note: it would make more sense to assert(pos > binlog_end_pos) + but there are two places triggered by mtr that has pos == binlog_end_pos + i didn't investigate but accepted as it should do no harm + */ + DBUG_ASSERT(pos >= binlog_end_pos); + binlog_end_pos= pos; + signal_bin_log_update(); + unlock_binlog_end_pos(); + } + + void wait_for_sufficient_commits(); + void binlog_trigger_immediate_group_commit(); + void wait_for_update_relay_log(THD* thd); + void init(ulong max_size); + void init_pthread_objects(); + void cleanup(); + bool open(const char *log_name, + const char *new_name, + ulong next_log_number, + enum cache_type io_cache_type_arg, + ulong max_size, + bool null_created, + bool need_mutex); + bool open_index_file(const char *index_file_name_arg, + const char *log_name, bool need_mutex); + /* Use this to start writing a new log file */ + int new_file(); + + bool write(Log_event* event_info, + my_bool *with_annotate= 0); // binary log write + bool write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr, + Log_event *end_ev, bool all, + bool using_stmt_cache, bool using_trx_cache, + bool is_ro_1pc); + + bool write_incident_already_locked(THD *thd); + bool write_incident(THD *thd); + void write_binlog_checkpoint_event_already_locked(const char *name, uint len); + int write_cache(THD *thd, IO_CACHE *cache); + void set_write_error(THD *thd, bool is_transactional); + bool check_write_error(THD *thd); + + void start_union_events(THD *thd, query_id_t query_id_param); + void stop_union_events(THD *thd); + bool is_query_in_union(THD *thd, query_id_t query_id_param); + + bool write_event(Log_event *ev, binlog_cache_data *data, IO_CACHE *file); + bool write_event(Log_event *ev) { return write_event(ev, 0, &log_file); } + + bool write_event_buffer(uchar* buf,uint len); + bool append(Log_event* ev); + bool append_no_lock(Log_event* ev); + + void mark_xids_active(ulong cookie, uint xid_count); + void mark_xid_done(ulong cookie, bool write_checkpoint); + void make_log_name(char* buf, const char* log_ident); + bool is_active(const char* log_file_name); + bool can_purge_log(const char *log_file_name); + int update_log_index(LOG_INFO* linfo, bool need_update_threads); + int rotate(bool force_rotate, bool* check_purge); + void checkpoint_and_purge(ulong binlog_id); + int rotate_and_purge(bool force_rotate, DYNAMIC_ARRAY* drop_gtid_domain= NULL); + /** + Flush binlog cache and synchronize to disk. + + This function flushes events in binlog cache to binary log file, + it will do synchronizing according to the setting of system + variable 'sync_binlog'. If file is synchronized, @c synced will + be set to 1, otherwise 0. + + @param[out] synced if not NULL, set to 1 if file is synchronized, otherwise 0 + + @retval 0 Success + @retval other Failure + */ + bool flush_and_sync(bool *synced); + int purge_logs(const char *to_log, bool included, + bool need_mutex, bool need_update_threads, + ulonglong *decrease_log_space); + int purge_logs_before_date(time_t purge_time); + int purge_first_log(Relay_log_info* rli, bool included); + int set_purge_index_file_name(const char *base_file_name); + int open_purge_index_file(bool destroy); + bool truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos, + rpl_gtid *gtid); + bool is_inited_purge_index_file(); + int close_purge_index_file(); + int clean_purge_index_file(); + int sync_purge_index_file(); + int register_purge_index_entry(const char* entry); + int register_create_index_entry(const char* entry); + int purge_index_entry(THD *thd, ulonglong *decrease_log_space, + bool need_mutex); + bool reset_logs(THD* thd, bool create_new_log, + rpl_gtid *init_state, uint32 init_state_len, + ulong next_log_number); + void wait_for_last_checkpoint_event(); + void close(uint exiting); + void clear_inuse_flag_when_closing(File file); + + // iterating through the log index file + int find_log_pos(LOG_INFO* linfo, const char* log_name, + bool need_mutex); + int find_next_log(LOG_INFO* linfo, bool need_mutex); + int get_current_log(LOG_INFO* linfo); + int raw_get_current_log(LOG_INFO* linfo); + uint next_file_id(); + inline char* get_index_fname() { return index_file_name;} + inline char* get_log_fname() { return log_file_name; } + inline char* get_name() { return name; } + inline mysql_mutex_t* get_log_lock() { return &LOCK_log; } + inline mysql_cond_t* get_bin_log_cond() { return &COND_bin_log_updated; } + inline IO_CACHE* get_log_file() { return &log_file; } + inline uint64 get_reset_master_count() { return reset_master_count; } + + inline void lock_index() { mysql_mutex_lock(&LOCK_index);} + inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);} + inline IO_CACHE *get_index_file() { return &index_file;} + inline uint32 get_open_count() { return open_count; } + void set_status_variables(THD *thd); + bool is_xidlist_idle(); + bool write_gtid_event(THD *thd, bool standalone, bool is_transactional, + uint64 commit_id, + bool has_xid= false, bool ro_1pc= false); + int read_state_from_file(); + int write_state_to_file(); + int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size); + bool append_state_pos(String *str); + bool append_state(String *str); + bool is_empty_state(); + bool find_in_binlog_state(uint32 domain_id, uint32 server_id, + rpl_gtid *out_gtid); + bool lookup_domain_in_binlog_state(uint32 domain_id, rpl_gtid *out_gtid); + int bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no); + bool check_strict_gtid_sequence(uint32 domain_id, uint32 server_id, + uint64 seq_no, bool no_error= false); + + /** + * used when opening new file, and binlog_end_pos moves backwards + */ + void reset_binlog_end_pos(const char file_name[FN_REFLEN], my_off_t pos) + { + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_not_owner(&LOCK_binlog_end_pos); + lock_binlog_end_pos(); + binlog_end_pos= pos; + strcpy(binlog_end_pos_file, file_name); + signal_bin_log_update(); + unlock_binlog_end_pos(); + } + + /* + It is called by the threads(e.g. dump thread) which want to read + log without LOCK_log protection. + */ + my_off_t get_binlog_end_pos(char file_name_buf[FN_REFLEN]) const + { + mysql_mutex_assert_not_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_binlog_end_pos); + strcpy(file_name_buf, binlog_end_pos_file); + return binlog_end_pos; + } + void lock_binlog_end_pos() { mysql_mutex_lock(&LOCK_binlog_end_pos); } + void unlock_binlog_end_pos() { mysql_mutex_unlock(&LOCK_binlog_end_pos); } + mysql_mutex_t* get_binlog_end_pos_lock() { return &LOCK_binlog_end_pos; } + + /* + Ensures the log's state is either LOG_OPEN or LOG_CLOSED. If something + failed along the desired path and left the log in invalid state, i.e. + LOG_TO_BE_OPENED, forces the state to be LOG_CLOSED. + */ + void try_fix_log_state() + { + mysql_mutex_lock(get_log_lock()); + /* Only change the log state if it is LOG_TO_BE_OPENED */ + if (log_state == LOG_TO_BE_OPENED) + log_state= LOG_CLOSED; + mysql_mutex_unlock(get_log_lock()); + } + + int wait_for_update_binlog_end_pos(THD* thd, struct timespec * timeout); + + /* + Binlog position of end of the binlog. + Access to this is protected by LOCK_binlog_end_pos + + The difference between this and last_commit_pos_{file,offset} is that + the commit position is updated later. If semi-sync wait point is set + to WAIT_AFTER_SYNC, the commit pos is update after semi-sync-ack has + been received and the end point is updated after the write as it's needed + for the dump threads to be able to semi-sync the event. + */ + my_off_t binlog_end_pos; + char binlog_end_pos_file[FN_REFLEN]; +}; + +class Log_event_handler +{ +public: + Log_event_handler() = default; + virtual bool init()= 0; + virtual void cleanup()= 0; + + virtual bool log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, size_t user_host_len, ulonglong query_utime, + ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len)= 0; + virtual bool log_error(enum loglevel level, const char *format, + va_list args)= 0; + virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len, + CHARSET_INFO *client_cs)= 0; + virtual ~Log_event_handler() = default; +}; + + +int check_if_log_table(const TABLE_LIST *table, bool check_if_opened, + const char *errmsg); + +class Log_to_csv_event_handler: public Log_event_handler +{ + friend class LOGGER; + +public: + Log_to_csv_event_handler(); + ~Log_to_csv_event_handler(); + virtual bool init(); + virtual void cleanup(); + + virtual bool log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, size_t user_host_len, ulonglong query_utime, + ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len); + virtual bool log_error(enum loglevel level, const char *format, + va_list args); + virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len, + CHARSET_INFO *client_cs); + + int activate_log(THD *thd, uint log_type); +}; + + +/* type of the log table */ +#define QUERY_LOG_SLOW 1 +#define QUERY_LOG_GENERAL 2 + +class Log_to_file_event_handler: public Log_event_handler +{ + MYSQL_QUERY_LOG mysql_log; + MYSQL_QUERY_LOG mysql_slow_log; + bool is_initialized; +public: + Log_to_file_event_handler(): is_initialized(FALSE) + {} + virtual bool init(); + virtual void cleanup(); + + virtual bool log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, size_t user_host_len, ulonglong query_utime, + ulonglong lock_utime, bool is_command, + const char *sql_text, size_t sql_text_len); + virtual bool log_error(enum loglevel level, const char *format, + va_list args); + virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id, + const char *command_type, size_t command_type_len, + const char *sql_text, size_t sql_text_len, + CHARSET_INFO *client_cs); + void flush(); + void init_pthread_objects(); + MYSQL_QUERY_LOG *get_mysql_slow_log() { return &mysql_slow_log; } + MYSQL_QUERY_LOG *get_mysql_log() { return &mysql_log; } +}; + + +/* Class which manages slow, general and error log event handlers */ +class LOGGER +{ + mysql_rwlock_t LOCK_logger; + /* flag to check whether logger mutex is initialized */ + uint inited; + + /* available log handlers */ + Log_to_csv_event_handler *table_log_handler; + Log_to_file_event_handler *file_log_handler; + + /* NULL-terminated arrays of log handlers */ + Log_event_handler *error_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; + Log_event_handler *slow_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; + Log_event_handler *general_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; + +public: + + bool is_log_tables_initialized; + + LOGGER() : inited(0), table_log_handler(NULL), + file_log_handler(NULL), is_log_tables_initialized(FALSE) + {} + void lock_shared() { mysql_rwlock_rdlock(&LOCK_logger); } + void lock_exclusive() { mysql_rwlock_wrlock(&LOCK_logger); } + void unlock() { mysql_rwlock_unlock(&LOCK_logger); } + bool is_log_table_enabled(uint log_table_type); + bool log_command(THD *thd, enum enum_server_command command); + + /* + We want to initialize all log mutexes as soon as possible, + but we cannot do it in constructor, as safe_mutex relies on + initialization, performed by MY_INIT(). This why this is done in + this function. + */ + void init_base(); + void init_log_tables(); + bool flush_slow_log(); + bool flush_general_log(); + /* Perform basic logger cleanup. this will leave e.g. error log open. */ + void cleanup_base(); + /* Free memory. Nothing could be logged after this function is called */ + void cleanup_end(); + bool error_log_print(enum loglevel level, const char *format, + va_list args); + bool slow_log_print(THD *thd, const char *query, size_t query_length, + ulonglong current_utime); + bool general_log_print(THD *thd,enum enum_server_command command, + const char *format, va_list args); + bool general_log_write(THD *thd, enum enum_server_command command, + const char *query, size_t query_length); + + /* we use this function to setup all enabled log event handlers */ + int set_handlers(ulonglong slow_log_printer, + ulonglong general_log_printer); + void init_error_log(ulonglong error_log_printer); + void init_slow_log(ulonglong slow_log_printer); + void init_general_log(ulonglong general_log_printer); + void deactivate_log_handler(THD* thd, uint log_type); + bool activate_log_handler(THD* thd, uint log_type); + MYSQL_QUERY_LOG *get_slow_log_file_handler() const + { + if (file_log_handler) + return file_log_handler->get_mysql_slow_log(); + return NULL; + } + MYSQL_QUERY_LOG *get_log_file_handler() const + { + if (file_log_handler) + return file_log_handler->get_mysql_log(); + return NULL; + } +}; + +enum enum_binlog_format { + BINLOG_FORMAT_MIXED= 0, ///< statement if safe, otherwise row - autodetected + BINLOG_FORMAT_STMT= 1, ///< statement-based + BINLOG_FORMAT_ROW= 2, ///< row-based + BINLOG_FORMAT_UNSPEC=3 ///< thd_binlog_format() returns it when binlog is closed +}; + +int query_error_code(THD *thd, bool not_killed); +uint purge_log_get_error_code(int res); + +int vprint_msg_to_log(enum loglevel level, const char *format, va_list args); +void sql_print_error(const char *format, ...); +void sql_print_warning(const char *format, ...); +void sql_print_information(const char *format, ...); +void sql_print_information_v(const char *format, va_list ap); +typedef void (*sql_print_message_func)(const char *format, ...); +extern sql_print_message_func sql_print_message_handlers[]; + +int error_log_print(enum loglevel level, const char *format, + va_list args); + +bool slow_log_print(THD *thd, const char *query, uint query_length, + ulonglong current_utime); + +bool general_log_print(THD *thd, enum enum_server_command command, + const char *format,...); + +bool general_log_write(THD *thd, enum enum_server_command command, + const char *query, size_t query_length); + +void binlog_report_wait_for(THD *thd, THD *other_thd); +void sql_perror(const char *message); +bool flush_error_log(); + +File open_binlog(IO_CACHE *log, const char *log_file_name, + const char **errmsg); + +void make_default_log_name(char **out, const char* log_ext, bool once); +void binlog_reset_cache(THD *thd); +bool write_annotated_row(THD *thd); + +extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log; +extern handlerton *binlog_hton; +extern LOGGER logger; + +extern const char *log_bin_index; +extern const char *log_bin_basename; + +/** + Turns a relative log binary log path into a full path, based on the + opt_bin_logname or opt_relay_logname. + + @param from The log name we want to make into an absolute path. + @param to The buffer where to put the results of the + normalization. + @param is_relay_log Switch that makes is used inside to choose which + option (opt_bin_logname or opt_relay_logname) to + use when calculating the base path. + + @returns true if a problem occurs, false otherwise. + */ + +inline bool normalize_binlog_name(char *to, const char *from, bool is_relay_log) +{ + DBUG_ENTER("normalize_binlog_name"); + bool error= false; + char buff[FN_REFLEN]; + char *ptr= (char*) from; + char *opt_name= is_relay_log ? opt_relay_logname : opt_bin_logname; + + DBUG_ASSERT(from); + + /* opt_name is not null and not empty and from is a relative path */ + if (opt_name && opt_name[0] && from && !test_if_hard_path(from)) + { + // take the path from opt_name + // take the filename from from + char log_dirpart[FN_REFLEN], log_dirname[FN_REFLEN]; + size_t log_dirpart_len, log_dirname_len; + dirname_part(log_dirpart, opt_name, &log_dirpart_len); + dirname_part(log_dirname, from, &log_dirname_len); + + /* log may be empty => relay-log or log-bin did not + hold paths, just filename pattern */ + if (log_dirpart_len > 0) + { + /* create the new path name */ + if(fn_format(buff, from+log_dirname_len, log_dirpart, "", + MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH)) == NULL) + { + error= true; + goto end; + } + + ptr= buff; + } + } + + DBUG_ASSERT(ptr); + + if (ptr) + strmake(to, ptr, strlen(ptr)); + +end: + DBUG_RETURN(error); +} + +static inline TC_LOG *get_tc_log_implementation() +{ + if (total_ha_2pc <= 1) + return &tc_log_dummy; + if (opt_bin_log) + return &mysql_bin_log; + return &tc_log_mmap; +} + +#ifdef WITH_WSREP +IO_CACHE* wsrep_get_cache(THD *, bool); +void wsrep_thd_binlog_trx_reset(THD * thd); +void wsrep_thd_binlog_stmt_rollback(THD * thd); +#endif /* WITH_WSREP */ + +class Gtid_list_log_event; +const char * +get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list); + +int binlog_commit(THD *thd, bool all, bool is_ro_1pc= false); +int binlog_commit_by_xid(handlerton *hton, XID *xid); +int binlog_rollback_by_xid(handlerton *hton, XID *xid); +bool write_bin_log_start_alter(THD *thd, bool& partial_alter, + uint64 start_alter_id, bool log_if_exists); + + +#endif /* LOG_H */ diff --git a/sql/log_event.cc b/sql/log_event.cc new file mode 100644 index 00000000..5e255646 --- /dev/null +++ b/sql/log_event.cc @@ -0,0 +1,4182 @@ +/* + Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mariadb.h" +#include "sql_priv.h" +#include "handler.h" +#ifndef MYSQL_CLIENT +#include "unireg.h" +#include "log_event.h" +#include "sql_base.h" // close_thread_tables +#include "sql_cache.h" // QUERY_CACHE_FLAGS_SIZE +#include "sql_locale.h" // MY_LOCALE, my_locale_by_number, my_locale_en_US +#include "key.h" // key_copy +#include "lock.h" // mysql_unlock_tables +#include "sql_parse.h" // mysql_test_parse_for_slave +#include "tztime.h" // struct Time_zone +#include "sql_load.h" // mysql_load +#include "sql_db.h" // load_db_opt_by_name +#include "slave.h" +#include "rpl_rli.h" +#include "rpl_mi.h" +#include "rpl_filter.h" +#include "rpl_record.h" +#include "transaction.h" +#include +#include "sql_show.h" // append_identifier +#include +#include +#include "compat56.h" +#include "sql_insert.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ +#else +#include "mysqld_error.h" +#endif /* MYSQL_CLIENT */ + +#include +#include "rpl_utility.h" +#include "rpl_constants.h" +#include "sql_digest.h" +#include "zlib.h" +#include "myisampack.h" +#include + +#define my_b_write_string(A, B) my_b_write((A), (uchar*)(B), (uint) (sizeof(B) - 1)) + +#ifndef _AIX +PSI_memory_key key_memory_log_event; +#endif +PSI_memory_key key_memory_Incident_log_event_message; +PSI_memory_key key_memory_Rows_query_log_event_rows_query; + +/** + BINLOG_CHECKSUM variable. +*/ +const char *binlog_checksum_type_names[]= { + "NONE", + "CRC32", + NullS +}; + +unsigned int binlog_checksum_type_length[]= { + sizeof("NONE") - 1, + sizeof("CRC32") - 1, + 0 +}; + +TYPELIB binlog_checksum_typelib= +{ + array_elements(binlog_checksum_type_names) - 1, "", + binlog_checksum_type_names, + binlog_checksum_type_length +}; + + +#define FLAGSTR(V,F) ((V)&(F)?#F" ":"") + +/* + Size of buffer for printing a double in format %.g + + optional '-' + optional zero + '.' + PREC digits + 'e' + sign + + exponent digits + '\0' +*/ +#define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1) + +/* + replication event checksum is introduced in the following "checksum-home" version. + The checksum-aware servers extract FD's version to decide whether the FD event + carries checksum info. + + TODO: correct the constant when it has been determined + (which main tree to push and when) +*/ +const Version checksum_version_split_mysql(5, 6, 1); +const Version checksum_version_split_mariadb(5, 3, 0); + +// First MySQL version with fraction seconds +const Version fsp_version_split_mysql(5, 6, 0); + +/* + Cache that will automatically be written to a dedicated file on + destruction. + + DESCRIPTION + + */ +class Write_on_release_cache +{ +public: + enum flag + { + FLUSH_F + }; + + typedef unsigned short flag_set; + + /* + Constructor. + + SYNOPSIS + Write_on_release_cache + cache Pointer to cache to use + file File to write cache to upon destruction + flags Flags for the cache + + DESCRIPTION + Cache common parameters and ensure common flush_data() code + on successful copy of the cache, the cache will be reinited as a + WRITE_CACHE. + + Currently, a pointer to the cache is provided in the + constructor, but it would be possible to create a subclass + holding the IO_CACHE itself. + */ + Write_on_release_cache(IO_CACHE *cache, FILE *file, flag_set flags= 0, Log_event *ev= NULL) + : m_cache(cache), m_file(file), m_flags(flags), m_ev(ev) + { + reinit_io_cache(m_cache, WRITE_CACHE, 0L, FALSE, TRUE); + } + + ~Write_on_release_cache() = default; + + bool flush_data() + { +#ifdef MYSQL_CLIENT + if (m_ev == NULL) + { + if (copy_event_cache_to_file_and_reinit(m_cache, m_file)) + return 1; + if ((m_flags & FLUSH_F) && fflush(m_file)) + return 1; + } + else // if m_ev<>NULL, then storing the output in output_buf + { + LEX_STRING tmp_str; + bool res; + if (copy_event_cache_to_string_and_reinit(m_cache, &tmp_str)) + return 1; + /* use 2 argument append as tmp_str is not \0 terminated */ + res= m_ev->output_buf.append(tmp_str.str, tmp_str.length); + my_free(tmp_str.str); + return res ? res : 0; + } +#else /* MySQL_SERVER */ + if (copy_event_cache_to_file_and_reinit(m_cache, m_file)) + return 1; + if ((m_flags & FLUSH_F) && fflush(m_file)) + return 1; +#endif + return 0; + } + + /* + Return a pointer to the internal IO_CACHE. + + SYNOPSIS + operator&() + + DESCRIPTION + + Function to return a pointer to the internal cache, so that the + object can be treated as a IO_CACHE and used with the my_b_* + IO_CACHE functions + + RETURN VALUE + A pointer to the internal IO_CACHE. + */ + IO_CACHE *operator&() + { + return m_cache; + } + +private: + // Hidden, to prevent usage. + Write_on_release_cache(Write_on_release_cache const&); + + IO_CACHE *m_cache; + FILE *m_file; + flag_set m_flags; + Log_event *m_ev; // Used for Flashback +}; + +#ifndef DBUG_OFF +#define DBUG_DUMP_EVENT_BUF(B,L) \ + do { \ + const uchar *_buf=(uchar*)(B); \ + size_t _len=(L); \ + if (_len >= LOG_EVENT_MINIMAL_HEADER_LEN) \ + { \ + DBUG_PRINT("data", ("header: timestamp:%u type:%u server_id:%u len:%u log_pos:%u flags:%u", \ + uint4korr(_buf), _buf[EVENT_TYPE_OFFSET], \ + uint4korr(_buf+SERVER_ID_OFFSET), \ + uint4korr(_buf+EVENT_LEN_OFFSET), \ + uint4korr(_buf+LOG_POS_OFFSET), \ + uint4korr(_buf+FLAGS_OFFSET))); \ + DBUG_DUMP("data", _buf+LOG_EVENT_MINIMAL_HEADER_LEN, \ + _len-LOG_EVENT_MINIMAL_HEADER_LEN); \ + } \ + else \ + DBUG_DUMP("data", _buf, _len); \ + } while(0) +#else +#define DBUG_DUMP_EVENT_BUF(B,L) do { } while(0) +#endif + +/* + read_str() +*/ + +static inline bool read_str(const uchar **buf, const uchar *buf_end, + const char **str, uint8 *len) +{ + if (*buf + ((uint) **buf) >= buf_end) + return 1; + *len= (uint8) **buf; + *str= (char*) (*buf)+1; + (*buf)+= (uint) *len+1; + return 0; +} + + +/** + Transforms a string into "" or its expression in X'HHHH' form. +*/ + +char *str_to_hex(char *to, const char *from, size_t len) +{ + if (len) + { + *to++= 'X'; + *to++= '\''; + to= octet2hex(to, from, len); + *to++= '\''; + *to= '\0'; + } + else + to= strmov(to, "\"\""); + return to; // pointer to end 0 of 'to' +} + +#define BINLOG_COMPRESSED_HEADER_LEN 1 +#define BINLOG_COMPRESSED_ORIGINAL_LENGTH_MAX_BYTES 4 +/** + Compressed Record + Record Header: 1 Byte + 7 Bit: Always 1, mean compressed; + 4-6 Bit: Compressed algorithm - Always 0, means zlib + It maybe support other compression algorithm in the future. + 0-3 Bit: Bytes of "Record Original Length" + Record Original Length: 1-4 Bytes + Compressed Buf: +*/ + +/** + Get the length of compress content. +*/ + +uint32 binlog_get_compress_len(uint32 len) +{ + /* 5 for the begin content, 1 reserved for a '\0'*/ + return ALIGN_SIZE((BINLOG_COMPRESSED_HEADER_LEN + BINLOG_COMPRESSED_ORIGINAL_LENGTH_MAX_BYTES) + + compressBound(len) + 1); +} + +/** + Compress buf from 'src' to 'dst'. + + Note: 1) Then the caller should guarantee the length of 'dst', which + can be got by binlog_get_uncompress_len, is enough to hold + the content uncompressed. + 2) The 'comlen' should stored the length of 'dst', and it will + be set as the size of compressed content after return. + + return zero if successful, others otherwise. +*/ +int binlog_buf_compress(const uchar *src, uchar *dst, uint32 len, uint32 *comlen) +{ + uchar lenlen; + if (len & 0xFF000000) + { + dst[1]= uchar(len >> 24); + dst[2]= uchar(len >> 16); + dst[3]= uchar(len >> 8); + dst[4]= uchar(len); + lenlen= 4; + } + else if (len & 0x00FF0000) + { + dst[1]= uchar(len >> 16); + dst[2]= uchar(len >> 8); + dst[3]= uchar(len); + lenlen= 3; + } + else if (len & 0x0000FF00) + { + dst[1]= uchar(len >> 8); + dst[2]= uchar(len); + lenlen= 2; + } + else + { + dst[1]= uchar(len); + lenlen= 1; + } + dst[0]= 0x80 | (lenlen & 0x07); + + uLongf tmplen= (uLongf)*comlen - BINLOG_COMPRESSED_HEADER_LEN - lenlen - 1; + if (compress((Bytef *)dst + BINLOG_COMPRESSED_HEADER_LEN + lenlen, &tmplen, + (const Bytef *)src, (uLongf)len) != Z_OK) + { + return 1; + } + *comlen= (uint32)tmplen + BINLOG_COMPRESSED_HEADER_LEN + lenlen; + return 0; +} + +/** + Convert a query_compressed_log_event to query_log_event + from 'src' to 'dst', the size after compression stored in 'newlen'. + + @Note: + 1) The caller should call my_free to release 'dst' if *is_malloc is + returned as true. + 2) If *is_malloc is retuened as false, then 'dst' reuses the passed-in + 'buf'. + + return zero if successful, non-zero otherwise. +*/ + +int +query_event_uncompress(const Format_description_log_event *description_event, + bool contain_checksum, const uchar *src, ulong src_len, + uchar* buf, ulong buf_size, bool* is_malloc, uchar **dst, + ulong *newlen) +{ + ulong len= uint4korr(src + EVENT_LEN_OFFSET); + const uchar *tmp= src; + const uchar *end= src + len; + uchar *new_dst; + + // bad event + if (src_len < len) + return 1; + + DBUG_ASSERT((uchar)src[EVENT_TYPE_OFFSET] == QUERY_COMPRESSED_EVENT); + + uint8 common_header_len= description_event->common_header_len; + uint8 post_header_len= + description_event->post_header_len[QUERY_COMPRESSED_EVENT-1]; + + *is_malloc= false; + + tmp+= common_header_len; + // bad event + if (end <= tmp) + return 1; + + uint db_len= (uint)tmp[Q_DB_LEN_OFFSET]; + uint16 status_vars_len= uint2korr(tmp + Q_STATUS_VARS_LEN_OFFSET); + + tmp+= post_header_len + status_vars_len + db_len + 1; + // bad event + if (end <= tmp) + return 1; + + int32 comp_len= (int32)(len - (tmp - src) - + (contain_checksum ? BINLOG_CHECKSUM_LEN : 0)); + uint32 un_len= binlog_get_uncompress_len(tmp); + + // bad event + if (comp_len < 0 || un_len == 0) + return 1; + + *newlen= (ulong)(tmp - src) + un_len; + if (contain_checksum) + *newlen+= BINLOG_CHECKSUM_LEN; + + uint32 alloc_size= (uint32)ALIGN_SIZE(*newlen); + + if (alloc_size <= buf_size) + new_dst= buf; + else + { + new_dst= (uchar *) my_malloc(PSI_INSTRUMENT_ME, alloc_size, MYF(MY_WME)); + if (!new_dst) + return 1; + *is_malloc= true; + } + + /* copy the head*/ + memcpy(new_dst, src , tmp - src); + if (binlog_buf_uncompress(tmp, new_dst + (tmp - src), comp_len, &un_len)) + { + if (*is_malloc) + { + *is_malloc= false; + my_free(new_dst); + } + return 1; + } + + new_dst[EVENT_TYPE_OFFSET]= QUERY_EVENT; + int4store(new_dst + EVENT_LEN_OFFSET, *newlen); + if (contain_checksum) + { + ulong clear_len= *newlen - BINLOG_CHECKSUM_LEN; + int4store(new_dst + clear_len, + my_checksum(0L, (uchar *)new_dst, clear_len)); + } + *dst= new_dst; + return 0; +} + +int +row_log_event_uncompress(const Format_description_log_event *description_event, + bool contain_checksum, const uchar *src, ulong src_len, + uchar* buf, ulong buf_size, bool* is_malloc, + uchar **dst, ulong *newlen) +{ + Log_event_type type= (Log_event_type)(uchar)src[EVENT_TYPE_OFFSET]; + ulong len= uint4korr(src + EVENT_LEN_OFFSET); + const uchar *tmp= src; + uchar *new_dst= NULL; + const uchar *end= tmp + len; + + if (src_len < len) + return 1; // bad event + + DBUG_ASSERT(LOG_EVENT_IS_ROW_COMPRESSED(type)); + + uint8 common_header_len= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[type-1]; + + tmp+= common_header_len + ROWS_HEADER_LEN_V1; + if (post_header_len == ROWS_HEADER_LEN_V2) + { + /* + Have variable length header, check length, + which includes length bytes + */ + + if (end - tmp <= 2) + return 1; // bad event + + uint16 var_header_len= uint2korr(tmp); + DBUG_ASSERT(var_header_len >= 2); + + /* skip over var-len header, extracting 'chunks' */ + tmp+= var_header_len; + + /* get the uncompressed event type */ + type= + (Log_event_type)(type - WRITE_ROWS_COMPRESSED_EVENT + WRITE_ROWS_EVENT); + } + else + { + /* get the uncompressed event type */ + type= (Log_event_type) + (type - WRITE_ROWS_COMPRESSED_EVENT_V1 + WRITE_ROWS_EVENT_V1); + } + + if (end <= tmp) + return 1; //bad event + + ulong m_width= net_field_length((uchar **)&tmp); + tmp+= (m_width + 7) / 8; + + if (type == UPDATE_ROWS_EVENT_V1 || type == UPDATE_ROWS_EVENT) + { + tmp+= (m_width + 7) / 8; + } + + if (end <= tmp) + return 1; //bad event + + uint32 un_len= binlog_get_uncompress_len(tmp); + if (un_len == 0) + return 1; //bad event + + int32 comp_len= (int32)(len - (tmp - src) - + (contain_checksum ? BINLOG_CHECKSUM_LEN : 0)); + if (comp_len <=0) + return 1; //bad event + + *newlen= ulong(tmp - src) + un_len; + if (contain_checksum) + *newlen+= BINLOG_CHECKSUM_LEN; + + size_t alloc_size= ALIGN_SIZE(*newlen); + + *is_malloc= false; + if (alloc_size <= buf_size) + { + new_dst= buf; + } + else + { + new_dst= (uchar*) my_malloc(PSI_INSTRUMENT_ME, alloc_size, MYF(MY_WME)); + if (!new_dst) + return 1; + *is_malloc= true; + } + + /* Copy the head. */ + memcpy(new_dst, src , tmp - src); + /* Uncompress the body. */ + if (binlog_buf_uncompress(tmp, new_dst + (tmp - src), + comp_len, &un_len)) + { + if (*is_malloc) + my_free(new_dst); + return 1; + } + + new_dst[EVENT_TYPE_OFFSET]= type; + int4store(new_dst + EVENT_LEN_OFFSET, *newlen); + if (contain_checksum) + { + ulong clear_len= *newlen - BINLOG_CHECKSUM_LEN; + int4store(new_dst + clear_len, + my_checksum(0L, (uchar *)new_dst, clear_len)); + } + *dst= new_dst; + return 0; +} + +/** + Get the length of uncompress content. + return 0 means error. +*/ + +uint32 binlog_get_uncompress_len(const uchar *buf) +{ + uint32 len, lenlen; + + if ((buf == NULL) || ((buf[0] & 0xe0) != 0x80)) + return 0; + + lenlen= buf[0] & 0x07; + + buf++; + /* Length is stored in high byte first order, like myisam keys */ + switch(lenlen) { + case 1: + len= buf[0]; + break; + case 2: + len= mi_uint2korr(buf); + break; + case 3: + len= mi_uint3korr(buf); + break; + case 4: + len= mi_uint4korr(buf); + break; + default: + DBUG_ASSERT(lenlen >= 1 && lenlen <= 4); + len= 0; + break; + } + return len; +} + +/** + Uncompress the content in 'src' with length of 'len' to 'dst'. + + Note: 1) Then the caller should guarantee the length of 'dst' (which + can be got by statement_get_uncompress_len) is enough to hold + the content uncompressed. + 2) The 'newlen' should stored the length of 'dst', and it will + be set as the size of uncompressed content after return. + + return zero if successful, others otherwise. +*/ +int binlog_buf_uncompress(const uchar *src, uchar *dst, uint32 len, + uint32 *newlen) +{ + if ((src[0] & 0x80) == 0) + return 1; + + uint32 lenlen= src[0] & 0x07; + uLongf buflen= *newlen; // zlib type + + uint32 alg= (src[0] & 0x70) >> 4; + switch(alg) { + case 0: + // zlib + if (uncompress((Bytef *)dst, &buflen, + (const Bytef*)src + 1 + lenlen, len - 1 - lenlen) != Z_OK) + return 1; + break; + default: + //TODO + //bad algorithm + return 1; + } + + DBUG_ASSERT(*newlen == (uint32)buflen); + *newlen= (uint32)buflen; + return 0; +} + + +/************************************************************************** + Log_event methods (= the parent class of all events) +**************************************************************************/ + +/** + @return + returns the human readable name of the event's type +*/ + +const char* Log_event::get_type_str(Log_event_type type) +{ + switch(type) { + case START_EVENT_V3: return "Start_v3"; + case STOP_EVENT: return "Stop"; + case QUERY_EVENT: return "Query"; + case ROTATE_EVENT: return "Rotate"; + case INTVAR_EVENT: return "Intvar"; + case LOAD_EVENT: return "Load"; + case NEW_LOAD_EVENT: return "New_load"; + case SLAVE_EVENT: return "Slave"; + case CREATE_FILE_EVENT: return "Create_file"; + case APPEND_BLOCK_EVENT: return "Append_block"; + case DELETE_FILE_EVENT: return "Delete_file"; + case EXEC_LOAD_EVENT: return "Exec_load"; + case RAND_EVENT: return "RAND"; + case XID_EVENT: return "Xid"; + case USER_VAR_EVENT: return "User var"; + case FORMAT_DESCRIPTION_EVENT: return "Format_desc"; + case TABLE_MAP_EVENT: return "Table_map"; + case PRE_GA_WRITE_ROWS_EVENT: return "Write_rows_event_old"; + case PRE_GA_UPDATE_ROWS_EVENT: return "Update_rows_event_old"; + case PRE_GA_DELETE_ROWS_EVENT: return "Delete_rows_event_old"; + case WRITE_ROWS_EVENT_V1: return "Write_rows_v1"; + case UPDATE_ROWS_EVENT_V1: return "Update_rows_v1"; + case DELETE_ROWS_EVENT_V1: return "Delete_rows_v1"; + case WRITE_ROWS_EVENT: return "Write_rows"; + case UPDATE_ROWS_EVENT: return "Update_rows"; + case DELETE_ROWS_EVENT: return "Delete_rows"; + case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query"; + case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query"; + case INCIDENT_EVENT: return "Incident"; + case ANNOTATE_ROWS_EVENT: return "Annotate_rows"; + case BINLOG_CHECKPOINT_EVENT: return "Binlog_checkpoint"; + case GTID_EVENT: return "Gtid"; + case GTID_LIST_EVENT: return "Gtid_list"; + case START_ENCRYPTION_EVENT: return "Start_encryption"; + + /* The following is only for mysqlbinlog */ + case IGNORABLE_LOG_EVENT: return "Ignorable log event"; + case ROWS_QUERY_LOG_EVENT: return "MySQL Rows_query"; + case GTID_LOG_EVENT: return "MySQL Gtid"; + case ANONYMOUS_GTID_LOG_EVENT: return "MySQL Anonymous_Gtid"; + case PREVIOUS_GTIDS_LOG_EVENT: return "MySQL Previous_gtids"; + case HEARTBEAT_LOG_EVENT: return "Heartbeat"; + case TRANSACTION_CONTEXT_EVENT: return "Transaction_context"; + case VIEW_CHANGE_EVENT: return "View_change"; + case XA_PREPARE_LOG_EVENT: return "XA_prepare"; + case QUERY_COMPRESSED_EVENT: return "Query_compressed"; + case WRITE_ROWS_COMPRESSED_EVENT: return "Write_rows_compressed"; + case UPDATE_ROWS_COMPRESSED_EVENT: return "Update_rows_compressed"; + case DELETE_ROWS_COMPRESSED_EVENT: return "Delete_rows_compressed"; + case WRITE_ROWS_COMPRESSED_EVENT_V1: return "Write_rows_compressed_v1"; + case UPDATE_ROWS_COMPRESSED_EVENT_V1: return "Update_rows_compressed_v1"; + case DELETE_ROWS_COMPRESSED_EVENT_V1: return "Delete_rows_compressed_v1"; + + default: return "Unknown"; /* impossible */ + } +} + +const char* Log_event::get_type_str() +{ + return get_type_str(get_type_code()); +} + + +/* + Log_event::Log_event() +*/ + +Log_event::Log_event(const uchar *buf, + const Format_description_log_event* description_event) + :temp_buf(0), exec_time(0), cache_type(Log_event::EVENT_INVALID_CACHE), + checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF) +{ +#ifndef MYSQL_CLIENT + thd= 0; +#endif + when= uint4korr(buf); + when_sec_part= ~0UL; + server_id= uint4korr(buf + SERVER_ID_OFFSET); + data_written= uint4korr(buf + EVENT_LEN_OFFSET); + if (description_event->binlog_version==1) + { + log_pos= 0; + flags= 0; + return; + } + /* 4.0 or newer */ + log_pos= uint4korr(buf + LOG_POS_OFFSET); + /* + If the log is 4.0 (so here it can only be a 4.0 relay log read by + the SQL thread or a 4.0 master binlog read by the I/O thread), + log_pos is the beginning of the event: we transform it into the end + of the event, which is more useful. + But how do you know that the log is 4.0: you know it if + description_event is version 3 *and* you are not reading a + Format_desc (remember that mysqlbinlog starts by assuming that 5.0 + logs are in 4.0 format, until it finds a Format_desc). + */ + if (description_event->binlog_version==3 && + (uchar)buf[EVENT_TYPE_OFFSET]group_master_log_pos" (see + inc_group_relay_log_pos()). As it is unreal log_pos, adding the + event len's is nonsense. For example, a fake Rotate event should + not have its log_pos (which is 0) changed or it will modify + Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense + value of (a non-zero offset which does not exist in the master's + binlog, so which will cause problems if the user uses this value + in CHANGE MASTER). + */ + log_pos+= data_written; /* purecov: inspected */ + } + DBUG_PRINT("info", ("log_pos: %llu", log_pos)); + + flags= uint2korr(buf + FLAGS_OFFSET); + if (((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) || + ((uchar)buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT)) + { + /* + These events always have a header which stops here (i.e. their + header is FROZEN). + */ + /* + Initialization to zero of all other Log_event members as they're + not specified. Currently there are no such members; in the future + there will be an event UID (but Format_description and Rotate + don't need this UID, as they are not propagated through + --log-slave-updates (remember the UID is used to not play a query + twice when you have two masters which are slaves of a 3rd master). + Then we are done. + */ + return; + } + /* otherwise, go on with reading the header from buf (nothing now) */ +} + + +/** + This needn't be format-tolerant, because we only parse the first + LOG_EVENT_MINIMAL_HEADER_LEN bytes (just need the event's length). +*/ + +int Log_event::read_log_event(IO_CACHE* file, String* packet, + const Format_description_log_event *fdle, + enum enum_binlog_checksum_alg checksum_alg_arg) +{ + ulong data_len; + char buf[LOG_EVENT_MINIMAL_HEADER_LEN]; + uchar ev_offset= packet->length(); +#if !defined(MYSQL_CLIENT) + THD *thd=current_thd; + ulong max_allowed_packet= thd ? thd->slave_thread ? slave_max_allowed_packet + : thd->variables.max_allowed_packet + : ~(uint)0; +#endif + DBUG_ENTER("Log_event::read_log_event(IO_CACHE*,String*...)"); + + if (my_b_read(file, (uchar*) buf, sizeof(buf))) + { + /* + If the read hits eof, we must report it as eof so the caller + will know it can go into cond_wait to be woken up on the next + update to the log. + */ + DBUG_PRINT("error",("file->error: %d", file->error)); + DBUG_RETURN(file->error == 0 ? LOG_READ_EOF : + file->error > 0 ? LOG_READ_TRUNC : LOG_READ_IO); + } + data_len= uint4korr(buf + EVENT_LEN_OFFSET); + + /* Append the log event header to packet */ + if (packet->append(buf, sizeof(buf))) + DBUG_RETURN(LOG_READ_MEM); + + if (data_len < LOG_EVENT_MINIMAL_HEADER_LEN) + DBUG_RETURN(LOG_READ_BOGUS); + + if (data_len > MY_MAX(max_allowed_packet, + opt_binlog_rows_event_max_size + MAX_LOG_EVENT_HEADER)) + DBUG_RETURN(LOG_READ_TOO_LARGE); + + if (likely(data_len > LOG_EVENT_MINIMAL_HEADER_LEN)) + { + /* Append rest of event, read directly from file into packet */ + if (packet->append(file, data_len - LOG_EVENT_MINIMAL_HEADER_LEN)) + { + /* + Fatal error occurred when appending rest of the event + to packet, possible failures: + 1. EOF occurred when reading from file, it's really an error + as there's supposed to be more bytes available. + file->error will have been set to number of bytes left to read + 2. Read was interrupted, file->error would normally be set to -1 + 3. Failed to allocate memory for packet, my_errno + will be ENOMEM(file->error should be 0, but since the + memory allocation occurs before the call to read it might + be uninitialized) + */ + DBUG_RETURN(my_errno == ENOMEM ? LOG_READ_MEM : + (file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO)); + } + } + + if (fdle->crypto_data.scheme) + { + uchar iv[BINLOG_IV_LENGTH]; + fdle->crypto_data.set_iv(iv, (uint32) (my_b_tell(file) - data_len)); + size_t sz= data_len + ev_offset + 1; +#ifdef HAVE_WOLFSSL + /* + Workaround for MDEV-19582. + WolfSSL reads memory out of bounds with decryption/NOPAD) + We allocate a little more memory therefore. + */ + sz+= MY_AES_BLOCK_SIZE; +#endif + char *newpkt= (char*)my_malloc(PSI_INSTRUMENT_ME, sz, MYF(MY_WME)); + if (!newpkt) + DBUG_RETURN(LOG_READ_MEM); + memcpy(newpkt, packet->ptr(), ev_offset); + + uint dstlen; + uchar *src= (uchar*)packet->ptr() + ev_offset; + uchar *dst= (uchar*)newpkt + ev_offset; + memcpy(src + EVENT_LEN_OFFSET, src, 4); + if (encryption_crypt(src + 4, data_len - 4, dst + 4, &dstlen, + fdle->crypto_data.key, fdle->crypto_data.key_length, iv, + sizeof(iv), ENCRYPTION_FLAG_DECRYPT | ENCRYPTION_FLAG_NOPAD, + ENCRYPTION_KEY_SYSTEM_DATA, fdle->crypto_data.key_version)) + { + my_free(newpkt); + DBUG_RETURN(LOG_READ_DECRYPT); + } + DBUG_ASSERT(dstlen == data_len - 4); + memcpy(dst, dst + EVENT_LEN_OFFSET, 4); + int4store(dst + EVENT_LEN_OFFSET, data_len); + packet->reset(newpkt, data_len + ev_offset, data_len + ev_offset + 1, + &my_charset_bin); + } + + /* + CRC verification of the Dump thread + */ + if (data_len > LOG_EVENT_MINIMAL_HEADER_LEN) + { + /* Corrupt the event for Dump thread*/ + DBUG_EXECUTE_IF("corrupt_read_log_event2", + uchar *debug_event_buf_c= (uchar*) packet->ptr() + ev_offset; + if (debug_event_buf_c[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT) + { + int debug_cor_pos= rand() % (data_len - BINLOG_CHECKSUM_LEN); + debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos]; + DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event: byte on position %d", debug_cor_pos)); + DBUG_SET("-d,corrupt_read_log_event2"); + } + ); + if (event_checksum_test((uchar*) packet->ptr() + ev_offset, + data_len, checksum_alg_arg)) + DBUG_RETURN(LOG_READ_CHECKSUM_FAILURE); + } + DBUG_RETURN(0); +} + +Log_event* Log_event::read_log_event(IO_CACHE* file, + const Format_description_log_event *fdle, + my_bool crc_check) +{ + DBUG_ENTER("Log_event::read_log_event(IO_CACHE*,Format_description_log_event*...)"); + DBUG_ASSERT(fdle != 0); + String event; + const char *error= 0; + Log_event *res= 0; + + switch (read_log_event(file, &event, fdle, BINLOG_CHECKSUM_ALG_OFF)) + { + case 0: + break; + case LOG_READ_EOF: // no error here; we are at the file's end + goto err; + case LOG_READ_BOGUS: + error= "Event invalid"; + goto err; + case LOG_READ_IO: + error= "read error"; + goto err; + case LOG_READ_MEM: + error= "Out of memory"; + goto err; + case LOG_READ_TRUNC: + error= "Event truncated"; + goto err; + case LOG_READ_TOO_LARGE: + error= "Event too big"; + goto err; + case LOG_READ_DECRYPT: + error= "Event decryption failure"; + goto err; + case LOG_READ_CHECKSUM_FAILURE: + default: + DBUG_ASSERT(0); + error= "internal error"; + goto err; + } + + if ((res= read_log_event((uchar*) event.ptr(), event.length(), + &error, fdle, crc_check))) + res->register_temp_buf((uchar*) event.release(), true); + +err: + if (unlikely(error)) + { + DBUG_ASSERT(!res); +#ifdef MYSQL_CLIENT + if (force_opt) + DBUG_RETURN(new Unknown_log_event()); +#endif + if (event.length() >= OLD_HEADER_LEN) + sql_print_error("Error in Log_event::read_log_event(): '%s'," + " data_len: %lu, event_type: %u", error, + (ulong) uint4korr(&event[EVENT_LEN_OFFSET]), + (uint) (uchar)event[EVENT_TYPE_OFFSET]); + else + sql_print_error("Error in Log_event::read_log_event(): '%s'", error); + /* + The SQL slave thread will check if file->error<0 to know + if there was an I/O error. Even if there is no "low-level" I/O errors + with 'file', any of the high-level above errors is worrying + enough to stop the SQL thread now ; as we are skipping the current event, + going on with reading and successfully executing other events can + only corrupt the slave's databases. So stop. + */ + file->error= -1; + } + DBUG_RETURN(res); +} + + +/** + Binlog format tolerance is in (buf, event_len, fdle) + constructors. +*/ + +Log_event* Log_event::read_log_event(const uchar *buf, uint event_len, + const char **error, + const Format_description_log_event *fdle, + my_bool crc_check) +{ + Log_event* ev; + enum enum_binlog_checksum_alg alg; + DBUG_ENTER("Log_event::read_log_event(char*,...)"); + DBUG_ASSERT(fdle != 0); + DBUG_PRINT("info", ("binlog_version: %d", fdle->binlog_version)); + DBUG_DUMP_EVENT_BUF(buf, event_len); + + /* + Check the integrity; This is needed because handle_slave_io() doesn't + check if packet is of proper length. + */ + if (event_len < EVENT_LEN_OFFSET) + { + *error="Sanity check failed"; // Needed to free buffer + DBUG_RETURN(NULL); // general sanity check - will fail on a partial read + } + + uint event_type= buf[EVENT_TYPE_OFFSET]; + // all following START events in the current file are without checksum + if (event_type == START_EVENT_V3) + (const_cast< Format_description_log_event *>(fdle))->checksum_alg= BINLOG_CHECKSUM_ALG_OFF; + /* + CRC verification by SQL and Show-Binlog-Events master side. + The caller has to provide @fdle->checksum_alg to + be the last seen FD's (A) descriptor. + If event is FD the descriptor is in it. + Notice, FD of the binlog can be only in one instance and therefore + Show-Binlog-Events executing master side thread needs just to know + the only FD's (A) value - whereas RL can contain more. + In the RL case, the alg is kept in FD_e (@fdle) which is reset + to the newer read-out event after its execution with possibly new alg descriptor. + Therefore in a typical sequence of RL: + {FD_s^0, FD_m, E_m^1} E_m^1 + will be verified with (A) of FD_m. + + See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg docs + lines (log.h). + + Notice, a pre-checksum FD version forces alg := BINLOG_CHECKSUM_ALG_UNDEF. + */ + alg= (event_type != FORMAT_DESCRIPTION_EVENT) ? + fdle->checksum_alg : get_checksum_alg(buf, event_len); + // Emulate the corruption during reading an event + DBUG_EXECUTE_IF("corrupt_read_log_event_char", + if (event_type != FORMAT_DESCRIPTION_EVENT) + { + uchar *debug_event_buf_c= const_cast(buf); + int debug_cor_pos= rand() % (event_len - BINLOG_CHECKSUM_LEN); + debug_event_buf_c[debug_cor_pos]=~ debug_event_buf_c[debug_cor_pos]; + DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event(char*,...): byte on position %d", debug_cor_pos)); + DBUG_SET("-d,corrupt_read_log_event_char"); + } + ); + if (crc_check && event_checksum_test(const_cast(buf), event_len, alg)) + { +#ifdef MYSQL_CLIENT + *error= "Event crc check failed! Most likely there is event corruption."; + if (force_opt) + { + ev= new Unknown_log_event(buf, fdle); + DBUG_RETURN(ev); + } + else + DBUG_RETURN(NULL); +#else + *error= ER_THD_OR_DEFAULT(current_thd, ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE); + sql_print_error("%s", *error); + DBUG_RETURN(NULL); +#endif + } + + if (event_type > fdle->number_of_event_types && + event_type != FORMAT_DESCRIPTION_EVENT) + { + /* + It is unsafe to use the fdle if its post_header_len + array does not include the event type. + */ + DBUG_PRINT("error", ("event type %d found, but the current " + "Format_description_log_event supports only %d event " + "types", event_type, + fdle->number_of_event_types)); + ev= NULL; + } + else + { + /* + In some previuos versions (see comment in + Format_description_log_event::Format_description_log_event(char*,...)), + event types were assigned different id numbers than in the + present version. In order to replicate from such versions to the + present version, we must map those event type id's to our event + type id's. The mapping is done with the event_type_permutation + array, which was set up when the Format_description_log_event + was read. + */ + if (fdle->event_type_permutation) + { + int new_event_type= fdle->event_type_permutation[event_type]; + DBUG_PRINT("info", ("converting event type %d to %d (%s)", + event_type, new_event_type, + get_type_str((Log_event_type)new_event_type))); + event_type= new_event_type; + } + + if (alg != BINLOG_CHECKSUM_ALG_UNDEF && + (event_type == FORMAT_DESCRIPTION_EVENT || + alg != BINLOG_CHECKSUM_ALG_OFF)) + event_len= event_len - BINLOG_CHECKSUM_LEN; + + /* + Create an object of Ignorable_log_event for unrecognized sub-class. + So that SLAVE SQL THREAD will only update the position and continue. + We should look for this flag first instead of judging by event_type + Any event can be Ignorable_log_event if it has this flag on. + look into @note of Ignorable_log_event + */ + if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F) + { + ev= new Ignorable_log_event(buf, fdle, + get_type_str((Log_event_type) event_type)); + goto exit; + } + switch(event_type) { + case QUERY_EVENT: + ev= new Query_log_event(buf, event_len, fdle, QUERY_EVENT); + break; + case QUERY_COMPRESSED_EVENT: + ev= new Query_compressed_log_event(buf, event_len, fdle, + QUERY_COMPRESSED_EVENT); + break; + case LOAD_EVENT: + ev= new Load_log_event(buf, event_len, fdle); + break; + case NEW_LOAD_EVENT: + ev= new Load_log_event(buf, event_len, fdle); + break; + case ROTATE_EVENT: + ev= new Rotate_log_event(buf, event_len, fdle); + break; + case BINLOG_CHECKPOINT_EVENT: + ev= new Binlog_checkpoint_log_event(buf, event_len, fdle); + break; + case GTID_EVENT: + ev= new Gtid_log_event(buf, event_len, fdle); + break; + case GTID_LIST_EVENT: + ev= new Gtid_list_log_event(buf, event_len, fdle); + break; + case CREATE_FILE_EVENT: + ev= new Create_file_log_event(buf, event_len, fdle); + break; + case APPEND_BLOCK_EVENT: + ev= new Append_block_log_event(buf, event_len, fdle); + break; + case DELETE_FILE_EVENT: + ev= new Delete_file_log_event(buf, event_len, fdle); + break; + case EXEC_LOAD_EVENT: + ev= new Execute_load_log_event(buf, event_len, fdle); + break; + case START_EVENT_V3: /* this is sent only by MySQL <=4.x */ + ev= new Start_log_event_v3(buf, event_len, fdle); + break; + case STOP_EVENT: + ev= new Stop_log_event(buf, fdle); + break; + case INTVAR_EVENT: + ev= new Intvar_log_event(buf, fdle); + break; + case XID_EVENT: + ev= new Xid_log_event(buf, fdle); + break; + case XA_PREPARE_LOG_EVENT: + ev= new XA_prepare_log_event(buf, fdle); + break; + case RAND_EVENT: + ev= new Rand_log_event(buf, fdle); + break; + case USER_VAR_EVENT: + ev= new User_var_log_event(buf, event_len, fdle); + break; + case FORMAT_DESCRIPTION_EVENT: + ev= new Format_description_log_event(buf, event_len, fdle); + break; +#if defined(HAVE_REPLICATION) + case PRE_GA_WRITE_ROWS_EVENT: + ev= new Write_rows_log_event_old(buf, event_len, fdle); + break; + case PRE_GA_UPDATE_ROWS_EVENT: + ev= new Update_rows_log_event_old(buf, event_len, fdle); + break; + case PRE_GA_DELETE_ROWS_EVENT: + ev= new Delete_rows_log_event_old(buf, event_len, fdle); + break; + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT: + ev= new Write_rows_log_event(buf, event_len, fdle); + break; + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT: + ev= new Update_rows_log_event(buf, event_len, fdle); + break; + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT: + ev= new Delete_rows_log_event(buf, event_len, fdle); + break; + + case WRITE_ROWS_COMPRESSED_EVENT: + case WRITE_ROWS_COMPRESSED_EVENT_V1: + ev= new Write_rows_compressed_log_event(buf, event_len, fdle); + break; + case UPDATE_ROWS_COMPRESSED_EVENT: + case UPDATE_ROWS_COMPRESSED_EVENT_V1: + ev= new Update_rows_compressed_log_event(buf, event_len, fdle); + break; + case DELETE_ROWS_COMPRESSED_EVENT: + case DELETE_ROWS_COMPRESSED_EVENT_V1: + ev= new Delete_rows_compressed_log_event(buf, event_len, fdle); + break; + + /* MySQL GTID events are ignored */ + case GTID_LOG_EVENT: + case ANONYMOUS_GTID_LOG_EVENT: + case PREVIOUS_GTIDS_LOG_EVENT: + case TRANSACTION_CONTEXT_EVENT: + case VIEW_CHANGE_EVENT: + ev= new Ignorable_log_event(buf, fdle, + get_type_str((Log_event_type) event_type)); + break; + + case TABLE_MAP_EVENT: + ev= new Table_map_log_event(buf, event_len, fdle); + break; +#endif + case BEGIN_LOAD_QUERY_EVENT: + ev= new Begin_load_query_log_event(buf, event_len, fdle); + break; + case EXECUTE_LOAD_QUERY_EVENT: + ev= new Execute_load_query_log_event(buf, event_len, fdle); + break; + case INCIDENT_EVENT: + ev= new Incident_log_event(buf, event_len, fdle); + break; + case ANNOTATE_ROWS_EVENT: + ev= new Annotate_rows_log_event(buf, event_len, fdle); + break; + case START_ENCRYPTION_EVENT: + ev= new Start_encryption_log_event(buf, event_len, fdle); + break; + default: + DBUG_PRINT("error",("Unknown event code: %d", + (uchar) buf[EVENT_TYPE_OFFSET])); + ev= NULL; + break; + } + } +exit: + + if (ev) + { + ev->checksum_alg= alg; +#ifdef MYSQL_CLIENT + if (ev->checksum_alg != BINLOG_CHECKSUM_ALG_OFF && + ev->checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) + ev->crc= uint4korr(buf + (event_len)); +#endif + } + + DBUG_PRINT("read_event", ("%s(type_code: %u; event_len: %u)", + ev ? ev->get_type_str() : "", + (uchar)buf[EVENT_TYPE_OFFSET], + event_len)); + /* + is_valid() are small event-specific sanity tests which are + important; for example there are some my_malloc() in constructors + (e.g. Query_log_event::Query_log_event(char*...)); when these + my_malloc() fail we can't return an error out of the constructor + (because constructor is "void") ; so instead we leave the pointer we + wanted to allocate (e.g. 'query') to 0 and we test it in is_valid(). + Same for Format_description_log_event, member 'post_header_len'. + + SLAVE_EVENT is never used, so it should not be read ever. + */ + if (!ev || !ev->is_valid() || (event_type == SLAVE_EVENT)) + { + DBUG_PRINT("error",("Found invalid event in binary log")); + + delete ev; +#ifdef MYSQL_CLIENT + if (!force_opt) /* then mysqlbinlog dies */ + { + *error= "Found invalid event in binary log"; + DBUG_RETURN(0); + } + ev= new Unknown_log_event(buf, fdle); +#else + *error= "Found invalid event in binary log"; + DBUG_RETURN(0); +#endif + } + DBUG_RETURN(ev); +} + + + +/* 2 utility functions for the next method */ + +/** + Read a string with length from memory. + + This function reads the string-with-length stored at + src and extract the length into *len and + a pointer to the start of the string into *dst. The + string can then be copied using memcpy() with the + number of bytes given in *len. + + @param src Pointer to variable holding a pointer to the memory to + read the string from. + @param dst Pointer to variable holding a pointer where the actual + string starts. Starting from this position, the string + can be copied using @c memcpy(). + @param len Pointer to variable where the length will be stored. + @param end One-past-the-end of the memory where the string is + stored. + + @return Zero if the entire string can be copied successfully, + @c UINT_MAX if the length could not be read from memory + (that is, if *src >= end), otherwise the + number of bytes that are missing to read the full + string, which happends *dst + *len >= end. +*/ +static int +get_str_len_and_pointer(const Log_event::Byte **src, + const char **dst, + uint *len, + const Log_event::Byte *end) +{ + if (*src >= end) + return -1; // Will be UINT_MAX in two-complement arithmetics + uint length= **src; + if (length > 0) + { + if (*src + length >= end) + return (int)(*src + length - end + 1); // Number of bytes missing + *dst= (char *)*src + 1; // Will be copied later + } + *len= length; + *src+= length + 1; + return 0; +} + +static void copy_str_and_move(const char **src, Log_event::Byte **dst, + size_t len) +{ + memcpy(*dst, *src, len); + *src= (const char *)*dst; + (*dst)+= len; + *(*dst)++= 0; +} + + +#ifdef DBUG_TRACE +static char const * +code_name(int code) +{ + static char buf[255]; + switch (code) { + case Q_FLAGS2_CODE: return "Q_FLAGS2_CODE"; + case Q_SQL_MODE_CODE: return "Q_SQL_MODE_CODE"; + case Q_CATALOG_CODE: return "Q_CATALOG_CODE"; + case Q_AUTO_INCREMENT: return "Q_AUTO_INCREMENT"; + case Q_CHARSET_CODE: return "Q_CHARSET_CODE"; + case Q_TIME_ZONE_CODE: return "Q_TIME_ZONE_CODE"; + case Q_CATALOG_NZ_CODE: return "Q_CATALOG_NZ_CODE"; + case Q_LC_TIME_NAMES_CODE: return "Q_LC_TIME_NAMES_CODE"; + case Q_CHARSET_DATABASE_CODE: return "Q_CHARSET_DATABASE_CODE"; + case Q_TABLE_MAP_FOR_UPDATE_CODE: return "Q_TABLE_MAP_FOR_UPDATE_CODE"; + case Q_MASTER_DATA_WRITTEN_CODE: return "Q_MASTER_DATA_WRITTEN_CODE"; + case Q_HRNOW: return "Q_HRNOW"; + case Q_XID: return "XID"; + case Q_GTID_FLAGS3: return "Q_GTID_FLAGS3"; + } + sprintf(buf, "CODE#%d", code); + return buf; +} +#endif + +#define VALIDATE_BYTES_READ(CUR_POS, START, EVENT_LEN) \ + do { \ + uchar *cur_pos= (uchar *)CUR_POS; \ + uchar *start= (uchar *)START; \ + uint len= EVENT_LEN; \ + uint bytes_read= (uint)(cur_pos - start); \ + DBUG_PRINT("info", ("Bytes read: %u event_len:%u.\n",\ + bytes_read, len)); \ + if (bytes_read >= len) \ + DBUG_VOID_RETURN; \ + } while (0) + +/** + Macro to check that there is enough space to read from memory. + + @param PTR Pointer to memory + @param END End of memory + @param CNT Number of bytes that should be read. + */ +#define CHECK_SPACE(PTR,END,CNT) \ + do { \ + DBUG_PRINT("info", ("Read %s", code_name(pos[-1]))); \ + if ((PTR) + (CNT) > (END)) { \ + DBUG_PRINT("info", ("query= 0")); \ + query= 0; \ + DBUG_VOID_RETURN; \ + } \ + } while (0) + + +/** + This is used by the SQL slave thread to prepare the event before execution. +*/ +Query_log_event::Query_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event, + Log_event_type event_type) + :Log_event(buf, description_event), data_buf(0), query(NullS), + db(NullS), catalog_len(0), status_vars_len(0), + flags2_inited(0), sql_mode_inited(0), charset_inited(0), flags2(0), + auto_increment_increment(1), auto_increment_offset(1), + time_zone_len(0), lc_time_names_number(0), charset_database_number(0), + table_map_for_update(0), xid(0), master_data_written(0), gtid_flags_extra(0), + sa_seq_no(0) +{ + ulong data_len; + uint32 tmp; + uint8 common_header_len, post_header_len; + Log_event::Byte *start; + const Log_event::Byte *end; + bool catalog_nz= 1; + DBUG_ENTER("Query_log_event::Query_log_event(char*,...)"); + + memset(&user, 0, sizeof(user)); + memset(&host, 0, sizeof(host)); + common_header_len= description_event->common_header_len; + post_header_len= description_event->post_header_len[event_type-1]; + DBUG_PRINT("info",("event_len: %u common_header_len: %d post_header_len: %d", + event_len, common_header_len, post_header_len)); + + /* + We test if the event's length is sensible, and if so we compute data_len. + We cannot rely on QUERY_HEADER_LEN here as it would not be format-tolerant. + We use QUERY_HEADER_MINIMAL_LEN which is the same for 3.23, 4.0 & 5.0. + */ + if (event_len < (uint)(common_header_len + post_header_len)) + DBUG_VOID_RETURN; + data_len= event_len - (common_header_len + post_header_len); + buf+= common_header_len; + + thread_id = slave_proxy_id = uint4korr(buf + Q_THREAD_ID_OFFSET); + exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET); + db_len = (uchar)buf[Q_DB_LEN_OFFSET]; // TODO: add a check of all *_len vars + error_code = uint2korr(buf + Q_ERR_CODE_OFFSET); + + /* + 5.0 format starts here. + Depending on the format, we may or not have affected/warnings etc + The remnent post-header to be parsed has length: + */ + tmp= post_header_len - QUERY_HEADER_MINIMAL_LEN; + if (tmp) + { + status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET); + /* + Check if status variable length is corrupt and will lead to very + wrong data. We could be even more strict and require data_len to + be even bigger, but this will suffice to catch most corruption + errors that can lead to a crash. + */ + if (status_vars_len > MY_MIN(data_len, MAX_SIZE_LOG_EVENT_STATUS)) + { + DBUG_PRINT("info", ("status_vars_len (%u) > data_len (%lu); query= 0", + status_vars_len, data_len)); + query= 0; + DBUG_VOID_RETURN; + } + data_len-= status_vars_len; + DBUG_PRINT("info", ("Query_log_event has status_vars_len: %u", + (uint) status_vars_len)); + tmp-= 2; + } + else + { + /* + server version < 5.0 / binlog_version < 4 master's event is + relay-logged with storing the original size of the event in + Q_MASTER_DATA_WRITTEN_CODE status variable. + The size is to be restored at reading Q_MASTER_DATA_WRITTEN_CODE-marked + event from the relay log. + */ + DBUG_ASSERT(description_event->binlog_version < 4); + master_data_written= (uint32)data_written; + } + /* + We have parsed everything we know in the post header for QUERY_EVENT, + the rest of post header is either comes from older version MySQL or + dedicated to derived events (e.g. Execute_load_query...) + */ + + /* variable-part: the status vars; only in MySQL 5.0 */ + + start= (Log_event::Byte*) (buf+post_header_len); + end= (const Log_event::Byte*) (start+status_vars_len); + for (const Log_event::Byte* pos= start; pos < end;) + { + switch (*pos++) { + case Q_FLAGS2_CODE: + CHECK_SPACE(pos, end, 4); + flags2_inited= description_event->options_written_to_bin_log; + flags2= uint4korr(pos); + DBUG_PRINT("info",("In Query_log_event, read flags2: %lu", (ulong) flags2)); + pos+= 4; + break; + case Q_SQL_MODE_CODE: + { + CHECK_SPACE(pos, end, 8); + sql_mode_inited= 1; + sql_mode= (sql_mode_t) uint8korr(pos); + DBUG_PRINT("info",("In Query_log_event, read sql_mode: %llu", sql_mode)); + pos+= 8; + break; + } + case Q_CATALOG_NZ_CODE: + DBUG_PRINT("info", ("case Q_CATALOG_NZ_CODE; pos:%p; end:%p", + pos, end)); + if (get_str_len_and_pointer(&pos, &catalog, &catalog_len, end)) + { + DBUG_PRINT("info", ("query= 0")); + query= 0; + DBUG_VOID_RETURN; + } + break; + case Q_AUTO_INCREMENT: + CHECK_SPACE(pos, end, 4); + auto_increment_increment= uint2korr(pos); + auto_increment_offset= uint2korr(pos+2); + pos+= 4; + break; + case Q_CHARSET_CODE: + { + CHECK_SPACE(pos, end, 6); + charset_inited= 1; + memcpy(charset, pos, 6); + pos+= 6; + break; + } + case Q_TIME_ZONE_CODE: + { + if (get_str_len_and_pointer(&pos, &time_zone_str, &time_zone_len, end)) + { + DBUG_PRINT("info", ("Q_TIME_ZONE_CODE: query= 0")); + query= 0; + DBUG_VOID_RETURN; + } + break; + } + case Q_CATALOG_CODE: /* for 5.0.x where 0<=x<=3 masters */ + CHECK_SPACE(pos, end, 1); + if ((catalog_len= *pos)) + catalog= (char*) pos+1; // Will be copied later + CHECK_SPACE(pos, end, catalog_len + 2); + pos+= catalog_len+2; // leap over end 0 + catalog_nz= 0; // catalog has end 0 in event + break; + case Q_LC_TIME_NAMES_CODE: + CHECK_SPACE(pos, end, 2); + lc_time_names_number= uint2korr(pos); + pos+= 2; + break; + case Q_CHARSET_DATABASE_CODE: + CHECK_SPACE(pos, end, 2); + charset_database_number= uint2korr(pos); + pos+= 2; + break; + case Q_TABLE_MAP_FOR_UPDATE_CODE: + CHECK_SPACE(pos, end, 8); + table_map_for_update= uint8korr(pos); + pos+= 8; + break; + case Q_MASTER_DATA_WRITTEN_CODE: + CHECK_SPACE(pos, end, 4); + data_written= master_data_written= uint4korr(pos); + pos+= 4; + break; + case Q_INVOKER: + { + CHECK_SPACE(pos, end, 1); + user.length= *pos++; + CHECK_SPACE(pos, end, user.length); + user.str= (char *)pos; + pos+= user.length; + + CHECK_SPACE(pos, end, 1); + host.length= *pos++; + CHECK_SPACE(pos, end, host.length); + host.str= (char *)pos; + pos+= host.length; + break; + } + case Q_HRNOW: + { + CHECK_SPACE(pos, end, 3); + when_sec_part= uint3korr(pos); + pos+= 3; + break; + } + case Q_XID: + { + CHECK_SPACE(pos, end, 8); + xid= uint8korr(pos); + pos+= 8; + break; + } + case Q_GTID_FLAGS3: + { + CHECK_SPACE(pos, end, 1); + gtid_flags_extra= *pos++; + if (gtid_flags_extra & (Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1)) + { + CHECK_SPACE(pos, end, 8); + sa_seq_no = uint8korr(pos); + pos+= 8; + } + break; + } + default: + /* That's why you must write status vars in growing order of code */ + DBUG_PRINT("info",("Query_log_event has unknown status vars (first has\ + code: %u), skipping the rest of them", (uint) *(pos-1))); + pos= (const uchar*) end; // Break loop + } + } + +#if !defined(MYSQL_CLIENT) + if (description_event->server_version_split.kind == + Format_description_log_event::master_version_split::KIND_MYSQL) + { + // Handle MariaDB/MySQL incompatible sql_mode bits + sql_mode_t mysql_sql_mode= sql_mode; + sql_mode&= MODE_MASK_MYSQL_COMPATIBLE; // Unset MySQL specific bits + + /* + sql_mode flags related to fraction second rounding/truncation + have opposite meaning in MySQL vs MariaDB. + MySQL: + - rounds fractional seconds by default + - truncates if TIME_TRUNCATE_FRACTIONAL is set + MariaDB: + - truncates fractional seconds by default + - rounds if TIME_ROUND_FRACTIONAL is set + */ + if (description_event->server_version_split >= fsp_version_split_mysql && + !(mysql_sql_mode & MODE_MYSQL80_TIME_TRUNCATE_FRACTIONAL)) + sql_mode|= MODE_TIME_ROUND_FRACTIONAL; + } +#endif + + /** + Layout for the data buffer is as follows + +--------+-----------+------+------+---------+----+-------+ + | catlog | time_zone | user | host | db name | \0 | Query | + +--------+-----------+------+------+---------+----+-------+ + + To support the query cache we append the following buffer to the above + +-------+----------------------------------------+-------+ + |db len | uninitiatlized space of size of db len | FLAGS | + +-------+----------------------------------------+-------+ + + The area of buffer starting from Query field all the way to the end belongs + to the Query buffer and its structure is described in alloc_query() in + sql_parse.cc + */ + +#if !defined(MYSQL_CLIENT) && defined(HAVE_QUERY_CACHE) + if (!(start= data_buf= (Log_event::Byte*) my_malloc(PSI_INSTRUMENT_ME, + catalog_len + 1 + + time_zone_len + 1 + + user.length + 1 + + host.length + 1 + + data_len + 1 + + sizeof(size_t)//for db_len + + db_len + 1 + + QUERY_CACHE_DB_LENGTH_SIZE + + QUERY_CACHE_FLAGS_SIZE, + MYF(MY_WME)))) +#else + if (!(start= data_buf= (Log_event::Byte*) my_malloc(PSI_INSTRUMENT_ME, + catalog_len + 1 + + time_zone_len + 1 + + user.length + 1 + + host.length + 1 + + data_len + 1, + MYF(MY_WME)))) +#endif + DBUG_VOID_RETURN; + if (catalog_len) // If catalog is given + { + /** + @todo we should clean up and do only copy_str_and_move; it + works for both cases. Then we can remove the catalog_nz + flag. /sven + */ + if (likely(catalog_nz)) // true except if event comes from 5.0.0|1|2|3. + copy_str_and_move(&catalog, &start, catalog_len); + else + { + memcpy(start, catalog, catalog_len+1); // copy end 0 + catalog= (const char *)start; + start+= catalog_len+1; + } + } + if (time_zone_len) + copy_str_and_move(&time_zone_str, &start, time_zone_len); + + if (user.length) + { + copy_str_and_move(&user.str, &start, user.length); + } + else + { + user.str= (char*) start; + *(start++)= 0; + } + + if (host.length) + copy_str_and_move(&host.str, &start, host.length); + else + { + host.str= (char*) start; + *(start++)= 0; + } + + /** + if time_zone_len or catalog_len are 0, then time_zone and catalog + are uninitialized at this point. shouldn't they point to the + zero-length null-terminated strings we allocated space for in the + my_alloc call above? /sven + */ + + /* A 2nd variable part; this is common to all versions */ + memcpy((char*) start, end, data_len); // Copy db and query + start[data_len]= '\0'; // End query with \0 (For safetly) + db= (char *)start; + query= (char *)(start + db_len + 1); + q_len= data_len - db_len -1; + + if (data_len && (data_len < db_len || + data_len < q_len || + data_len != (db_len + q_len + 1))) + { + q_len= 0; + query= NULL; + DBUG_VOID_RETURN; + } + + uint32 max_length= uint32(event_len - ((end + db_len + 1) - + (buf - common_header_len))); + if (q_len != max_length || + (event_len < uint((end + db_len + 1) - (buf - common_header_len)))) + { + q_len= 0; + query= NULL; + DBUG_VOID_RETURN; + } + /** + Append the db length at the end of the buffer. This will be used by + Query_cache::send_result_to_client() in case the query cache is On. + */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_QUERY_CACHE) + size_t db_length= (size_t)db_len; + memcpy(start + data_len + 1, &db_length, sizeof(size_t)); +#endif + DBUG_VOID_RETURN; +} + +Query_compressed_log_event::Query_compressed_log_event(const uchar *buf, + uint event_len, + const Format_description_log_event + *description_event, + Log_event_type event_type) + :Query_log_event(buf, event_len, description_event, event_type), + query_buf(NULL) +{ + if (query) + { + uint32 un_len= binlog_get_uncompress_len((uchar*) query); + if (!un_len) + { + query= 0; + return; + } + + /* Reserve one byte for '\0' */ + query_buf= (Log_event::Byte*) my_malloc(PSI_INSTRUMENT_ME, + ALIGN_SIZE(un_len + 1), MYF(MY_WME)); + if (query_buf && !binlog_buf_uncompress((uchar*) query, (uchar *) query_buf, + q_len, &un_len)) + { + query_buf[un_len]= 0; + query= (char*) query_buf; + q_len= un_len; + } + else + { + query= 0; + } + } +} + + +/* + Replace a binlog event read into a packet with a dummy event. Either a + Query_log_event that has just a comment, or if that will not fit in the + space used for the event to be replaced, then a NULL user_var event. + + This is used when sending binlog data to a slave which does not understand + this particular event and which is too old to support informational events + or holes in the event stream. + + This allows to write such events into the binlog on the master and still be + able to replicate against old slaves without them breaking. + + Clears the flag LOG_EVENT_THREAD_SPECIFIC_F and set LOG_EVENT_SUPPRESS_USE_F. + Overwrites the type with QUERY_EVENT (or USER_VAR_EVENT), and replaces the + body with a minimal query / NULL user var. + + Returns zero on success, -1 if error due to too little space in original + event. A minimum of 25 bytes (19 bytes fixed header + 6 bytes in the body) + is needed in any event to be replaced with a dummy event. +*/ +int +Query_log_event::dummy_event(String *packet, ulong ev_offset, + enum enum_binlog_checksum_alg checksum_alg) +{ + uchar *p= (uchar *)packet->ptr() + ev_offset; + size_t data_len= packet->length() - ev_offset; + uint16 flags; + static const size_t min_user_var_event_len= + LOG_EVENT_HEADER_LEN + UV_NAME_LEN_SIZE + 1 + UV_VAL_IS_NULL; // 25 + static const size_t min_query_event_len= + LOG_EVENT_HEADER_LEN + QUERY_HEADER_LEN + 1 + 1; // 34 + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + data_len-= BINLOG_CHECKSUM_LEN; + else + DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + checksum_alg == BINLOG_CHECKSUM_ALG_OFF); + + if (data_len < min_user_var_event_len) + /* Cannot replace with dummy, event too short. */ + return -1; + + flags= uint2korr(p + FLAGS_OFFSET); + flags&= ~LOG_EVENT_THREAD_SPECIFIC_F; + flags|= LOG_EVENT_SUPPRESS_USE_F; + int2store(p + FLAGS_OFFSET, flags); + + if (data_len < min_query_event_len) + { + /* + Have to use dummy user_var event for such a short packet. + + This works, but the event will be considered part of an event group with + the following event. So for example @@global.sql_slave_skip_counter=1 + will skip not only the dummy event, but also the immediately following + event. + + We write a NULL user var with the name @`!dummyvar` (or as much + as that as will fit within the size of the original event - so + possibly just @`!`). + */ + static const char var_name[]= "!dummyvar"; + size_t name_len= data_len - (min_user_var_event_len - 1); + + p[EVENT_TYPE_OFFSET]= USER_VAR_EVENT; + int4store(p + LOG_EVENT_HEADER_LEN, name_len); + memcpy(p + LOG_EVENT_HEADER_LEN + UV_NAME_LEN_SIZE, var_name, name_len); + p[LOG_EVENT_HEADER_LEN + UV_NAME_LEN_SIZE + name_len]= 1; // indicates NULL + } + else + { + /* + Use a dummy query event, just a comment. + */ + static const char message[]= + "# Dummy event replacing event type %u that slave cannot handle."; + char buf[sizeof(message)+1]; /* +1, as %u can expand to 3 digits. */ + uchar old_type= p[EVENT_TYPE_OFFSET]; + uchar *q= p + LOG_EVENT_HEADER_LEN; + size_t comment_len, len; + + p[EVENT_TYPE_OFFSET]= QUERY_EVENT; + int4store(q + Q_THREAD_ID_OFFSET, 0); + int4store(q + Q_EXEC_TIME_OFFSET, 0); + q[Q_DB_LEN_OFFSET]= 0; + int2store(q + Q_ERR_CODE_OFFSET, 0); + int2store(q + Q_STATUS_VARS_LEN_OFFSET, 0); + q[Q_DATA_OFFSET]= 0; /* Zero terminator for empty db */ + q+= Q_DATA_OFFSET + 1; + len= my_snprintf(buf, sizeof(buf), message, old_type); + comment_len= data_len - (min_query_event_len - 1); + if (comment_len <= len) + memcpy(q, buf, comment_len); + else + { + memcpy(q, buf, len); + memset(q+len, ' ', comment_len - len); + } + } + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + ha_checksum crc= my_checksum(0, p, data_len); + int4store(p + data_len, crc); + } + return 0; +} + +/* + Replace an event (GTID event) with a BEGIN query event, to be compatible + with an old slave. +*/ +int +Query_log_event::begin_event(String *packet, ulong ev_offset, + enum enum_binlog_checksum_alg checksum_alg) +{ + uchar *p= (uchar *)packet->ptr() + ev_offset; + uchar *q= p + LOG_EVENT_HEADER_LEN; + size_t data_len= packet->length() - ev_offset; + uint16 flags; + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + data_len-= BINLOG_CHECKSUM_LEN; + else + DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + checksum_alg == BINLOG_CHECKSUM_ALG_OFF); + + /* + Currently we only need to replace GTID event. + The length of GTID differs depending on whether it contains commit id. + */ + DBUG_ASSERT(data_len == LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN || + data_len == LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN + 2); + if (data_len != LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN && + data_len != LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN + 2) + return 1; + + flags= uint2korr(p + FLAGS_OFFSET); + flags&= ~LOG_EVENT_THREAD_SPECIFIC_F; + flags|= LOG_EVENT_SUPPRESS_USE_F; + int2store(p + FLAGS_OFFSET, flags); + + p[EVENT_TYPE_OFFSET]= QUERY_EVENT; + int4store(q + Q_THREAD_ID_OFFSET, 0); + int4store(q + Q_EXEC_TIME_OFFSET, 0); + q[Q_DB_LEN_OFFSET]= 0; + int2store(q + Q_ERR_CODE_OFFSET, 0); + if (data_len == LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN) + { + int2store(q + Q_STATUS_VARS_LEN_OFFSET, 0); + q[Q_DATA_OFFSET]= 0; /* Zero terminator for empty db */ + q+= Q_DATA_OFFSET + 1; + } + else + { + DBUG_ASSERT(data_len == LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN + 2); + /* Put in an empty time_zone_str to take up the extra 2 bytes. */ + int2store(q + Q_STATUS_VARS_LEN_OFFSET, 2); + q[Q_DATA_OFFSET]= Q_TIME_ZONE_CODE; + q[Q_DATA_OFFSET+1]= 0; /* Zero length for empty time_zone_str */ + q[Q_DATA_OFFSET+2]= 0; /* Zero terminator for empty db */ + q+= Q_DATA_OFFSET + 3; + } + memcpy(q, "BEGIN", 5); + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + ha_checksum crc= my_checksum(0, p, data_len); + int4store(p + data_len, crc); + } + return 0; +} + + +/************************************************************************** + Start_log_event_v3 methods +**************************************************************************/ + + +Start_log_event_v3::Start_log_event_v3(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + :Log_event(buf, description_event), binlog_version(BINLOG_VERSION) +{ + if (event_len < LOG_EVENT_MINIMAL_HEADER_LEN + ST_COMMON_HEADER_LEN_OFFSET) + { + server_version[0]= 0; + return; + } + buf+= LOG_EVENT_MINIMAL_HEADER_LEN; + binlog_version= uint2korr(buf+ST_BINLOG_VER_OFFSET); + memcpy(server_version, buf+ST_SERVER_VER_OFFSET, + ST_SERVER_VER_LEN); + // prevent overrun if log is corrupted on disk + server_version[ST_SERVER_VER_LEN-1]= 0; + created= uint4korr(buf+ST_CREATED_OFFSET); + dont_set_created= 1; +} + + +/*************************************************************************** + Format_description_log_event methods +****************************************************************************/ + +/** + Format_description_log_event 1st ctor. + + Ctor. Can be used to create the event to write to the binary log (when the + server starts or when FLUSH LOGS), or to create artificial events to parse + binlogs from MySQL 3.23 or 4.x. + When in a client, only the 2nd use is possible. + + @param binlog_version the binlog version for which we want to build + an event. Can be 1 (=MySQL 3.23), 3 (=4.0.x + x>=2 and 4.1) or 4 (MySQL 5.0). Note that the + old 4.0 (binlog version 2) is not supported; + it should not be used for replication with + 5.0. + @param server_ver a string containing the server version. +*/ + +Format_description_log_event:: +Format_description_log_event(uint8 binlog_ver, const char* server_ver) + :Start_log_event_v3(), event_type_permutation(0) +{ + binlog_version= binlog_ver; + switch (binlog_ver) { + case 4: /* MySQL 5.0 */ + memcpy(server_version, ::server_version, ST_SERVER_VER_LEN); + DBUG_EXECUTE_IF("pretend_version_50034_in_binlog", + strmov(server_version, "5.0.34");); + common_header_len= LOG_EVENT_HEADER_LEN; + number_of_event_types= LOG_EVENT_TYPES; + /* we'll catch my_malloc() error in is_valid() */ + post_header_len=(uint8*) my_malloc(PSI_INSTRUMENT_ME, + number_of_event_types*sizeof(uint8) + + BINLOG_CHECKSUM_ALG_DESC_LEN, + MYF(0)); + /* + This long list of assignments is not beautiful, but I see no way to + make it nicer, as the right members are #defines, not array members, so + it's impossible to write a loop. + */ + if (post_header_len) + { +#ifndef DBUG_OFF + // Allows us to sanity-check that all events initialized their + // events (see the end of this 'if' block). + memset(post_header_len, 255, number_of_event_types*sizeof(uint8)); +#endif + + /* Note: all event types must explicitly fill in their lengths here. */ + post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN; + post_header_len[QUERY_EVENT-1]= QUERY_HEADER_LEN; + post_header_len[STOP_EVENT-1]= STOP_HEADER_LEN; + post_header_len[ROTATE_EVENT-1]= ROTATE_HEADER_LEN; + post_header_len[INTVAR_EVENT-1]= INTVAR_HEADER_LEN; + post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN; + post_header_len[SLAVE_EVENT-1]= SLAVE_HEADER_LEN; + post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN; + post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN; + post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN; + post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; + post_header_len[NEW_LOAD_EVENT-1]= NEW_LOAD_HEADER_LEN; + post_header_len[RAND_EVENT-1]= RAND_HEADER_LEN; + post_header_len[USER_VAR_EVENT-1]= USER_VAR_HEADER_LEN; + post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN; + post_header_len[XID_EVENT-1]= XID_HEADER_LEN; + post_header_len[XA_PREPARE_LOG_EVENT-1]= XA_PREPARE_HEADER_LEN; + post_header_len[BEGIN_LOAD_QUERY_EVENT-1]= BEGIN_LOAD_QUERY_HEADER_LEN; + post_header_len[EXECUTE_LOAD_QUERY_EVENT-1]= EXECUTE_LOAD_QUERY_HEADER_LEN; + /* + The PRE_GA events are never be written to any binlog, but + their lengths are included in Format_description_log_event. + Hence, we need to be assign some value here, to avoid reading + uninitialized memory when the array is written to disk. + */ + post_header_len[PRE_GA_WRITE_ROWS_EVENT-1]= 0; + post_header_len[PRE_GA_UPDATE_ROWS_EVENT-1]= 0; + post_header_len[PRE_GA_DELETE_ROWS_EVENT-1]= 0; + + post_header_len[TABLE_MAP_EVENT-1]= TABLE_MAP_HEADER_LEN; + post_header_len[WRITE_ROWS_EVENT_V1-1]= ROWS_HEADER_LEN_V1; + post_header_len[UPDATE_ROWS_EVENT_V1-1]= ROWS_HEADER_LEN_V1; + post_header_len[DELETE_ROWS_EVENT_V1-1]= ROWS_HEADER_LEN_V1; + /* + We here have the possibility to simulate a master of before we changed + the table map id to be stored in 6 bytes: when it was stored in 4 + bytes (=> post_header_len was 6). This is used to test backward + compatibility. + This code can be removed after a few months (today is Dec 21st 2005), + when we know that the 4-byte masters are not deployed anymore (check + with Tomas Ulin first!), and the accompanying test (rpl_row_4_bytes) + too. + */ + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + post_header_len[TABLE_MAP_EVENT-1]= + post_header_len[WRITE_ROWS_EVENT_V1-1]= + post_header_len[UPDATE_ROWS_EVENT_V1-1]= + post_header_len[DELETE_ROWS_EVENT_V1-1]= 6;); + post_header_len[INCIDENT_EVENT-1]= INCIDENT_HEADER_LEN; + post_header_len[HEARTBEAT_LOG_EVENT-1]= 0; + post_header_len[IGNORABLE_LOG_EVENT-1]= 0; + post_header_len[ROWS_QUERY_LOG_EVENT-1]= 0; + post_header_len[GTID_LOG_EVENT-1]= 0; + post_header_len[ANONYMOUS_GTID_LOG_EVENT-1]= 0; + post_header_len[PREVIOUS_GTIDS_LOG_EVENT-1]= 0; + post_header_len[TRANSACTION_CONTEXT_EVENT-1]= 0; + post_header_len[VIEW_CHANGE_EVENT-1]= 0; + post_header_len[XA_PREPARE_LOG_EVENT-1]= 0; + post_header_len[WRITE_ROWS_EVENT-1]= ROWS_HEADER_LEN_V2; + post_header_len[UPDATE_ROWS_EVENT-1]= ROWS_HEADER_LEN_V2; + post_header_len[DELETE_ROWS_EVENT-1]= ROWS_HEADER_LEN_V2; + + // Set header length of the reserved events to 0 + memset(post_header_len + MYSQL_EVENTS_END - 1, 0, + (MARIA_EVENTS_BEGIN - MYSQL_EVENTS_END)*sizeof(uint8)); + + // Set header lengths of Maria events + post_header_len[ANNOTATE_ROWS_EVENT-1]= ANNOTATE_ROWS_HEADER_LEN; + post_header_len[BINLOG_CHECKPOINT_EVENT-1]= + BINLOG_CHECKPOINT_HEADER_LEN; + post_header_len[GTID_EVENT-1]= GTID_HEADER_LEN; + post_header_len[GTID_LIST_EVENT-1]= GTID_LIST_HEADER_LEN; + post_header_len[START_ENCRYPTION_EVENT-1]= START_ENCRYPTION_HEADER_LEN; + + //compressed event + post_header_len[QUERY_COMPRESSED_EVENT-1]= QUERY_HEADER_LEN; + post_header_len[WRITE_ROWS_COMPRESSED_EVENT-1]= ROWS_HEADER_LEN_V2; + post_header_len[UPDATE_ROWS_COMPRESSED_EVENT-1]= ROWS_HEADER_LEN_V2; + post_header_len[DELETE_ROWS_COMPRESSED_EVENT-1]= ROWS_HEADER_LEN_V2; + post_header_len[WRITE_ROWS_COMPRESSED_EVENT_V1-1]= ROWS_HEADER_LEN_V1; + post_header_len[UPDATE_ROWS_COMPRESSED_EVENT_V1-1]= ROWS_HEADER_LEN_V1; + post_header_len[DELETE_ROWS_COMPRESSED_EVENT_V1-1]= ROWS_HEADER_LEN_V1; + + // Sanity-check that all post header lengths are initialized. + int i; + for (i=0; i=2 */ + /* + We build an artificial (i.e. not sent by the master) event, which + describes what those old master versions send. + */ + if (binlog_ver==1) + strmov(server_version, server_ver ? server_ver : "3.23"); + else + strmov(server_version, server_ver ? server_ver : "4.0"); + common_header_len= binlog_ver==1 ? OLD_HEADER_LEN : + LOG_EVENT_MINIMAL_HEADER_LEN; + /* + The first new event in binlog version 4 is Format_desc. So any event type + after that does not exist in older versions. We use the events known by + version 3, even if version 1 had only a subset of them (this is not a + problem: it uses a few bytes for nothing but unifies code; it does not + make the slave detect less corruptions). + */ + number_of_event_types= FORMAT_DESCRIPTION_EVENT - 1; + post_header_len=(uint8*) my_malloc(PSI_INSTRUMENT_ME, + number_of_event_types*sizeof(uint8), MYF(0)); + if (post_header_len) + { + post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN; + post_header_len[QUERY_EVENT-1]= QUERY_HEADER_MINIMAL_LEN; + post_header_len[STOP_EVENT-1]= 0; + post_header_len[ROTATE_EVENT-1]= (binlog_ver==1) ? 0 : ROTATE_HEADER_LEN; + post_header_len[INTVAR_EVENT-1]= 0; + post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN; + post_header_len[SLAVE_EVENT-1]= 0; + post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN; + post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN; + post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN; + post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; + post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1]; + post_header_len[RAND_EVENT-1]= 0; + post_header_len[USER_VAR_EVENT-1]= 0; + } + break; + default: /* Includes binlog version 2 i.e. 4.0.x x<=1 */ + post_header_len= 0; /* will make is_valid() fail */ + break; + } + calc_server_version_split(); + deduct_options_written_to_bin_log(); + checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF; + reset_crypto(); +} + + +/** + The problem with this constructor is that the fixed header may have a + length different from this version, but we don't know this length as we + have not read the Format_description_log_event which says it, yet. This + length is in the post-header of the event, but we don't know where the + post-header starts. + + So this type of event HAS to: + - either have the header's length at the beginning (in the header, at a + fixed position which will never be changed), not in the post-header. That + would make the header be "shifted" compared to other events. + - or have a header of size LOG_EVENT_MINIMAL_HEADER_LEN (19), in all future + versions, so that we know for sure. + + I (Guilhem) chose the 2nd solution. Rotate has the same constraint (because + it is sent before Format_description_log_event). +*/ + +Format_description_log_event:: +Format_description_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* + description_event) + :Start_log_event_v3(buf, event_len, description_event), + common_header_len(0), post_header_len(NULL), event_type_permutation(0) +{ + DBUG_ENTER("Format_description_log_event::Format_description_log_event(char*,...)"); + if (!Start_log_event_v3::is_valid()) + DBUG_VOID_RETURN; /* sanity check */ + buf+= LOG_EVENT_MINIMAL_HEADER_LEN; + if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN) + DBUG_VOID_RETURN; /* sanity check */ + number_of_event_types= + event_len - (LOG_EVENT_MINIMAL_HEADER_LEN + ST_COMMON_HEADER_LEN_OFFSET + 1); + DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d", + common_header_len, number_of_event_types)); + /* If alloc fails, we'll detect it in is_valid() */ + + post_header_len= (uint8*) my_memdup(PSI_INSTRUMENT_ME, + buf+ST_COMMON_HEADER_LEN_OFFSET+1, + number_of_event_types* + sizeof(*post_header_len), + MYF(0)); + calc_server_version_split(); + if (!is_version_before_checksum(&server_version_split)) + { + /* the last bytes are the checksum alg desc and value (or value's room) */ + number_of_event_types -= BINLOG_CHECKSUM_ALG_DESC_LEN; + checksum_alg= (enum_binlog_checksum_alg)post_header_len[number_of_event_types]; + } + else + { + checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF; + } + deduct_options_written_to_bin_log(); + reset_crypto(); + + DBUG_VOID_RETURN; +} + +bool Format_description_log_event::start_decryption(Start_encryption_log_event* sele) +{ + DBUG_ASSERT(crypto_data.scheme == 0); + + if (!sele->is_valid()) + return 1; + + memcpy(crypto_data.nonce, sele->nonce, BINLOG_NONCE_LENGTH); + return crypto_data.init(sele->crypto_scheme, sele->key_version); +} + + +Version::Version(const char *version, const char **endptr) +{ + const char *p= version; + ulong number; + for (uint i= 0; i<=2; i++) + { + char *r; + number= strtoul(p, &r, 10); + /* + It is an invalid version if any version number greater than 255 or + first number is not followed by '.'. + */ + if (number < 256 && (*r == '.' || i != 0)) + m_ver[i]= (uchar) number; + else + { + *this= Version(); + break; + } + + p= r; + if (*r == '.') + p++; // skip the dot + } + endptr[0]= p; +} + + +Format_description_log_event:: + master_version_split::master_version_split(const char *version) +{ + const char *p; + static_cast(this)[0]= Version(version, &p); + if (strstr(p, "MariaDB") != 0 || strstr(p, "-maria-") != 0) + kind= KIND_MARIADB; + else + kind= KIND_MYSQL; +} + + +/** + Splits the event's 'server_version' string into three numeric pieces stored + into 'server_version_split': + X.Y.Zabc (X,Y,Z numbers, a not a digit) -> {X,Y,Z} + X.Yabc -> {X,Y,0} + 'server_version_split' is then used for lookups to find if the server which + created this event has some known bug. +*/ +void Format_description_log_event::calc_server_version_split() +{ + server_version_split= master_version_split(server_version); + + DBUG_PRINT("info",("Format_description_log_event::server_version_split:" + " '%s' %d %d %d", server_version, + server_version_split[0], + server_version_split[1], server_version_split[2])); +} + + +void Format_description_log_event::deduct_options_written_to_bin_log() +{ + options_written_to_bin_log= OPTION_AUTO_IS_NULL | OPTION_NOT_AUTOCOMMIT | + OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS | + OPTION_INSERT_HISTORY; + if (!server_version_split.version_is_valid() || + server_version_split.kind == master_version_split::KIND_MYSQL || + server_version_split < Version(10,5,2)) + return; + options_written_to_bin_log|= OPTION_IF_EXISTS; + if (server_version_split[0] == 10) + { + const static char v[10]={99,99,99,99,99,17,9,5,4,2}; + if (server_version_split[1] < 10 && + server_version_split[2] < v[server_version_split[1]]) + return; + } + options_written_to_bin_log|= OPTION_EXPLICIT_DEF_TIMESTAMP; + + DBUG_ASSERT(options_written_to_bin_log == OPTIONS_WRITTEN_TO_BIN_LOG); +} + +/** + @return TRUE is the event's version is earlier than one that introduced + the replication event checksum. FALSE otherwise. +*/ +bool +Format_description_log_event::is_version_before_checksum(const master_version_split + *version_split) +{ + return *version_split < + (version_split->kind == master_version_split::KIND_MARIADB ? + checksum_version_split_mariadb : checksum_version_split_mysql); +} + +/** + @param buf buffer holding serialized FD event + @param len netto (possible checksum is stripped off) length of the event buf + + @return the version-safe checksum alg descriptor where zero + designates no checksum, 255 - the orginator is + checksum-unaware (effectively no checksum) and the actuall + [1-254] range alg descriptor. +*/ +enum enum_binlog_checksum_alg get_checksum_alg(const uchar *buf, ulong len) +{ + enum enum_binlog_checksum_alg ret; + char version[ST_SERVER_VER_LEN]; + + DBUG_ENTER("get_checksum_alg"); + DBUG_ASSERT(buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT); + + memcpy(version, + buf + LOG_EVENT_MINIMAL_HEADER_LEN + ST_SERVER_VER_OFFSET, + ST_SERVER_VER_LEN); + version[ST_SERVER_VER_LEN - 1]= 0; + + Format_description_log_event::master_version_split version_split(version); + ret= Format_description_log_event::is_version_before_checksum(&version_split) + ? BINLOG_CHECKSUM_ALG_UNDEF + : (enum_binlog_checksum_alg)buf[len - BINLOG_CHECKSUM_LEN - BINLOG_CHECKSUM_ALG_DESC_LEN]; + DBUG_ASSERT(ret == BINLOG_CHECKSUM_ALG_OFF || + ret == BINLOG_CHECKSUM_ALG_UNDEF || + ret == BINLOG_CHECKSUM_ALG_CRC32); + DBUG_RETURN(ret); +} + +Start_encryption_log_event:: +Start_encryption_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) +{ + if ((int)event_len == + LOG_EVENT_MINIMAL_HEADER_LEN + Start_encryption_log_event::get_data_size()) + { + buf+= LOG_EVENT_MINIMAL_HEADER_LEN; + crypto_scheme= *buf; + key_version= uint4korr(buf + BINLOG_CRYPTO_SCHEME_LENGTH); + memcpy(nonce, + buf + BINLOG_CRYPTO_SCHEME_LENGTH + BINLOG_KEY_VERSION_LENGTH, + BINLOG_NONCE_LENGTH); + } + else + crypto_scheme= ~0; // invalid +} + + +/************************************************************************** + Load_log_event methods + General note about Load_log_event: the binlogging of LOAD DATA INFILE is + going to be changed in 5.0 (or maybe in 5.1; not decided yet). + However, the 5.0 slave could still have to read such events (from a 4.x + master), convert them (which just means maybe expand the header, when 5.0 + servers have a UID in events) (remember that whatever is after the header + will be like in 4.x, as this event's format is not modified in 5.0 as we + will use new types of events to log the new LOAD DATA INFILE features). + To be able to read/convert, we just need to not assume that the common + header is of length LOG_EVENT_HEADER_LEN (we must use the description + event). + Note that I (Guilhem) manually tested replication of a big LOAD DATA INFILE + between 3.23 and 5.0, and between 4.0 and 5.0, and it works fine (and the + positions displayed in SHOW SLAVE STATUS then are fine too). +**************************************************************************/ + + +/** + @note + The caller must do buf[event_len]= 0 before he starts using the + constructed event. +*/ + +Load_log_event::Load_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + :Log_event(buf, description_event), num_fields(0), fields(0), + field_lens(0),field_block_len(0), + table_name(0), db(0), fname(0), local_fname(FALSE), + /* + Load_log_event which comes from the binary log does not contain + information about the type of insert which was used on the master. + Assume that it was an ordinary, non-concurrent LOAD DATA. + */ + is_concurrent(FALSE) +{ + DBUG_ENTER("Load_log_event"); + /* + I (Guilhem) manually tested replication of LOAD DATA INFILE for 3.23->5.0, + 4.0->5.0 and 5.0->5.0 and it works. + */ + if (event_len) + copy_log_event(buf, event_len, + (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ? + LOAD_HEADER_LEN + + description_event->common_header_len : + LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN), + description_event); + /* otherwise it's a derived class, will call copy_log_event() itself */ + DBUG_VOID_RETURN; +} + + +/* + Load_log_event::copy_log_event() +*/ + +int Load_log_event::copy_log_event(const uchar *buf, ulong event_len, + int body_offset, + const Format_description_log_event + *description_event) +{ + DBUG_ENTER("Load_log_event::copy_log_event"); + uint data_len; + if ((int) event_len <= body_offset) + DBUG_RETURN(1); + const uchar *buf_end= buf + event_len; + /* this is the beginning of the post-header */ + const uchar *data_head= buf + description_event->common_header_len; + thread_id= slave_proxy_id= uint4korr(data_head + L_THREAD_ID_OFFSET); + exec_time= uint4korr(data_head + L_EXEC_TIME_OFFSET); + skip_lines= uint4korr(data_head + L_SKIP_LINES_OFFSET); + table_name_len= (uint)data_head[L_TBL_LEN_OFFSET]; + db_len= (uint)data_head[L_DB_LEN_OFFSET]; + num_fields= uint4korr(data_head + L_NUM_FIELDS_OFFSET); + + /* + Sql_ex.init() on success returns the pointer to the first byte after + the sql_ex structure, which is the start of field lengths array. + */ + if (!(field_lens= (uchar*) sql_ex.init(buf + body_offset, buf_end, + buf[EVENT_TYPE_OFFSET] != LOAD_EVENT))) + DBUG_RETURN(1); + + data_len= event_len - body_offset; + if (num_fields > data_len) // simple sanity check against corruption + DBUG_RETURN(1); + for (uint i= 0; i < num_fields; i++) + field_block_len+= (uint)field_lens[i] + 1; + + fields= (char*) field_lens + num_fields; + table_name= fields + field_block_len; + if (strlen(table_name) > NAME_LEN) + goto err; + + db= table_name + table_name_len + 1; + DBUG_EXECUTE_IF("simulate_invalid_address", db_len= data_len;); + fname= db + db_len + 1; + if ((db_len > data_len) || (fname > (char*) buf_end)) + goto err; + fname_len= (uint) strlen(fname); + if ((fname_len > data_len) || (fname + fname_len > (char*) buf_end)) + goto err; + // null termination is accomplished by the caller doing buf[event_len]=0 + + DBUG_RETURN(0); + +err: + // Invalid event. + table_name= 0; + DBUG_RETURN(1); +} + + +/************************************************************************** + Rotate_log_event methods +**************************************************************************/ + +Rotate_log_event::Rotate_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* + description_event) + :Log_event(buf, description_event) ,new_log_ident(0), flags(DUP_NAME) +{ + DBUG_ENTER("Rotate_log_event::Rotate_log_event(char*,...)"); + // The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET + uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1]; + uint ident_offset; + if (event_len < (uint)(LOG_EVENT_MINIMAL_HEADER_LEN + post_header_len)) + DBUG_VOID_RETURN; + buf+= LOG_EVENT_MINIMAL_HEADER_LEN; + pos= post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4; + ident_len= (uint)(event_len - (LOG_EVENT_MINIMAL_HEADER_LEN + post_header_len)); + ident_offset= post_header_len; + set_if_smaller(ident_len,FN_REFLEN-1); + new_log_ident= my_strndup(PSI_INSTRUMENT_ME, (char*) buf + ident_offset, + (uint) ident_len, MYF(MY_WME)); + DBUG_PRINT("debug", ("new_log_ident: '%s'", new_log_ident)); + DBUG_VOID_RETURN; +} + + +/************************************************************************** + Binlog_checkpoint_log_event methods +**************************************************************************/ + +Binlog_checkpoint_log_event::Binlog_checkpoint_log_event( + const uchar *buf, uint event_len, + const Format_description_log_event *description_event) + :Log_event(buf, description_event), binlog_file_name(0) +{ + uint8 header_size= description_event->common_header_len; + uint8 post_header_len= + description_event->post_header_len[BINLOG_CHECKPOINT_EVENT-1]; + if (event_len < (uint) header_size + (uint) post_header_len || + post_header_len < BINLOG_CHECKPOINT_HEADER_LEN) + return; + buf+= header_size; + /* See uint4korr and int4store below */ + compile_time_assert(BINLOG_CHECKPOINT_HEADER_LEN == 4); + binlog_file_len= uint4korr(buf); + if (event_len - (header_size + post_header_len) < binlog_file_len) + return; + binlog_file_name= my_strndup(PSI_INSTRUMENT_ME, (char*) buf + post_header_len, + binlog_file_len, MYF(MY_WME)); + return; +} + + +/************************************************************************** + Global transaction ID stuff +**************************************************************************/ + +Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + : Log_event(buf, description_event), seq_no(0), commit_id(0), + flags_extra(0), extra_engines(0) +{ + uint8 header_size= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1]; + const uchar *buf_0= buf; + if (event_len < (uint) header_size + (uint) post_header_len || + post_header_len < GTID_HEADER_LEN) + return; + + buf+= header_size; + seq_no= uint8korr(buf); + buf+= 8; + domain_id= uint4korr(buf); + buf+= 4; + flags2= *(buf++); + if (flags2 & FL_GROUP_COMMIT_ID) + { + if (event_len < (uint)header_size + GTID_HEADER_LEN + 2) + { + seq_no= 0; // So is_valid() returns false + return; + } + commit_id= uint8korr(buf); + buf+= 8; + } + if (flags2 & (FL_PREPARED_XA | FL_COMPLETED_XA)) + { + xid.formatID= uint4korr(buf); + buf+= 4; + + xid.gtrid_length= (long) buf[0]; + xid.bqual_length= (long) buf[1]; + buf+= 2; + + long data_length= xid.bqual_length + xid.gtrid_length; + memcpy(xid.data, buf, data_length); + buf+= data_length; + } + + /* the extra flags check and actions */ + if (static_cast(buf - buf_0) < event_len) + { + flags_extra= *buf++; + /* + extra engines flags presence is identifed by non-zero byte value + at this point + */ + if (flags_extra & FL_EXTRA_MULTI_ENGINE_E1) + { + DBUG_ASSERT(static_cast(buf - buf_0) < event_len); + + extra_engines= *buf++; + + DBUG_ASSERT(extra_engines > 0); + } + if (flags_extra & (FL_COMMIT_ALTER_E1 | FL_ROLLBACK_ALTER_E1)) + { + sa_seq_no= uint8korr(buf); + buf+= 8; + } + } + /* + the strict '<' part of the assert corresponds to extra zero-padded + trailing bytes, + */ + DBUG_ASSERT(static_cast(buf - buf_0) <= event_len); + /* and the last of them is tested. */ +#ifdef MYSQL_SERVER +#ifdef WITH_WSREP + if (!WSREP_ON) +#endif +#endif + DBUG_ASSERT(static_cast(buf - buf_0) == event_len || + buf_0[event_len - 1] == 0); +} + +int compare_glle_gtids(const void * _gtid1, const void *_gtid2) +{ + rpl_gtid *gtid1= (rpl_gtid *) _gtid1; + rpl_gtid *gtid2= (rpl_gtid *) _gtid2; + + int ret; + if (*gtid1 < *gtid2) + ret= -1; + else if (*gtid1 > *gtid2) + ret= 1; + else + ret= 0; + return ret; +} + +/* GTID list. */ + +Gtid_list_log_event::Gtid_list_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + : Log_event(buf, description_event), count(0), list(0), sub_id_list(0) +{ + uint32 i; + uint32 val; + uint8 header_size= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[GTID_LIST_EVENT-1]; + if (event_len < (uint) header_size + (uint) post_header_len || + post_header_len < GTID_LIST_HEADER_LEN) + return; + + buf+= header_size; + val= uint4korr(buf); + count= val & ((1<<28)-1); + gl_flags= val & ((uint32)0xf << 28); + buf+= 4; + if (event_len - (header_size + post_header_len) < count*element_size || + (!(list= (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, + count*sizeof(*list) + (count == 0), MYF(MY_WME))))) + return; + + for (i= 0; i < count; ++i) + { + list[i].domain_id= uint4korr(buf); + buf+= 4; + list[i].server_id= uint4korr(buf); + buf+= 4; + list[i].seq_no= uint8korr(buf); + buf+= 8; + } + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) + if ((gl_flags & FLAG_IGN_GTIDS)) + { + uint32 i; + if (!(sub_id_list= (uint64 *)my_malloc(PSI_INSTRUMENT_ME, + count*sizeof(uint64), MYF(MY_WME)))) + { + my_free(list); + list= NULL; + return; + } + for (i= 0; i < count; ++i) + { + if (!(sub_id_list[i]= + rpl_global_gtid_slave_state->next_sub_id(list[i].domain_id))) + { + my_free(list); + my_free(sub_id_list); + list= NULL; + sub_id_list= NULL; + return; + } + } + } +#endif +} + + +/* + Used to record gtid_list event while sending binlog to slave, without having to + fully contruct the event object. +*/ +bool +Gtid_list_log_event::peek(const char *event_start, size_t event_len, + enum enum_binlog_checksum_alg checksum_alg, + rpl_gtid **out_gtid_list, uint32 *out_list_len, + const Format_description_log_event *fdev) +{ + const char *p; + uint32 count_field, count; + rpl_gtid *gtid_list; + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + if (event_len > BINLOG_CHECKSUM_LEN) + event_len-= BINLOG_CHECKSUM_LEN; + else + event_len= 0; + } + else + DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + checksum_alg == BINLOG_CHECKSUM_ALG_OFF); + + if (event_len < (uint32)fdev->common_header_len + GTID_LIST_HEADER_LEN) + return true; + p= event_start + fdev->common_header_len; + count_field= uint4korr(p); + p+= 4; + count= count_field & ((1<<28)-1); + if (event_len < (uint32)fdev->common_header_len + GTID_LIST_HEADER_LEN + + element_size * count) + return true; + if (!(gtid_list= (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(rpl_gtid)*count + (count == 0), MYF(MY_WME)))) + return true; + *out_gtid_list= gtid_list; + *out_list_len= count; + while (count--) + { + gtid_list->domain_id= uint4korr(p); + p+= 4; + gtid_list->server_id= uint4korr(p); + p+= 4; + gtid_list->seq_no= uint8korr(p); + p+= 8; + ++gtid_list; + } + + return false; +} + + +/************************************************************************** + Intvar_log_event methods +**************************************************************************/ + +/* + Intvar_log_event::Intvar_log_event() +*/ + +Intvar_log_event::Intvar_log_event(const uchar *buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) +{ + /* The Post-Header is empty. The Variable Data part begins immediately. */ + buf+= description_event->common_header_len + + description_event->post_header_len[INTVAR_EVENT-1]; + type= buf[I_TYPE_OFFSET]; + val= uint8korr(buf+I_VAL_OFFSET); +} + + +/* + Intvar_log_event::get_var_type_name() +*/ + +const char* Intvar_log_event::get_var_type_name() +{ + switch(type) { + case LAST_INSERT_ID_EVENT: return "LAST_INSERT_ID"; + case INSERT_ID_EVENT: return "INSERT_ID"; + default: /* impossible */ return "UNKNOWN"; + } +} + + +/************************************************************************** + Rand_log_event methods +**************************************************************************/ + +Rand_log_event::Rand_log_event(const uchar *buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) +{ + /* The Post-Header is empty. The Variable Data part begins immediately. */ + buf+= description_event->common_header_len + + description_event->post_header_len[RAND_EVENT-1]; + seed1= uint8korr(buf+RAND_SEED1_OFFSET); + seed2= uint8korr(buf+RAND_SEED2_OFFSET); +} + + +/************************************************************************** + Xid_log_event methods +**************************************************************************/ + +/** + @note + It's ok not to use int8store here, + as long as xid_t::set(ulonglong) and + xid_t::get_my_xid doesn't do it either. + We don't care about actual values of xids as long as + identical numbers compare identically +*/ + +Xid_log_event:: +Xid_log_event(const uchar *buf, + const Format_description_log_event *description_event) + :Xid_apply_log_event(buf, description_event) +{ + /* The Post-Header is empty. The Variable Data part begins immediately. */ + buf+= description_event->common_header_len + + description_event->post_header_len[XID_EVENT-1]; + memcpy((char*) &xid, buf, sizeof(xid)); +} + +/************************************************************************** + XA_prepare_log_event methods +**************************************************************************/ +XA_prepare_log_event:: +XA_prepare_log_event(const uchar *buf, + const Format_description_log_event *description_event) + :Xid_apply_log_event(buf, description_event) +{ + buf+= description_event->common_header_len + + description_event->post_header_len[XA_PREPARE_LOG_EVENT-1]; + one_phase= * (bool *) buf; + buf+= 1; + + m_xid.formatID= uint4korr(buf); + buf+= 4; + m_xid.gtrid_length= uint4korr(buf); + buf+= 4; + // Todo: validity here and elsewhere checks to be replaced by MDEV-21839 fixes + if (m_xid.gtrid_length <= 0 || m_xid.gtrid_length > MAXGTRIDSIZE) + { + m_xid.formatID= -1; + return; + } + m_xid.bqual_length= uint4korr(buf); + buf+= 4; + if (m_xid.bqual_length < 0 || m_xid.bqual_length > MAXBQUALSIZE) + { + m_xid.formatID= -1; + return; + } + DBUG_ASSERT(m_xid.gtrid_length + m_xid.bqual_length <= XIDDATASIZE); + + memcpy(m_xid.data, buf, m_xid.gtrid_length + m_xid.bqual_length); + + xid= NULL; +} + + +/************************************************************************** + User_var_log_event methods +**************************************************************************/ + +User_var_log_event:: +User_var_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) +#ifndef MYSQL_CLIENT + , deferred(false), query_id(0) +#endif +{ + bool error= false; + const uchar *buf_start= buf, *buf_end= buf + event_len; + + /* The Post-Header is empty. The Variable Data part begins immediately. */ + buf+= description_event->common_header_len + + description_event->post_header_len[USER_VAR_EVENT-1]; + name_len= uint4korr(buf); + /* Avoid reading out of buffer */ + if ((buf - buf_start) + UV_NAME_LEN_SIZE + name_len > event_len) + { + error= true; + goto err; + } + + name= (char *) buf + UV_NAME_LEN_SIZE; + + /* + We don't know yet is_null value, so we must assume that name_len + may have the bigger value possible, is_null= True and there is no + payload for val, or even that name_len is 0. + */ + if (name + name_len + UV_VAL_IS_NULL > (char*) buf_end) + { + error= true; + goto err; + } + + buf+= UV_NAME_LEN_SIZE + name_len; + is_null= (bool) *buf; + flags= User_var_log_event::UNDEF_F; // defaults to UNDEF_F + if (is_null) + { + type= STRING_RESULT; + charset_number= my_charset_bin.number; + val_len= 0; + val= 0; + } + else + { + val= (char *) (buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE + + UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE); + + if (val > (char*) buf_end) + { + error= true; + goto err; + } + + type= (Item_result) buf[UV_VAL_IS_NULL]; + charset_number= uint4korr(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE); + val_len= uint4korr(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE + + UV_CHARSET_NUMBER_SIZE); + + /** + We need to check if this is from an old server + that did not pack information for flags. + We do this by checking if there are extra bytes + after the packed value. If there are we take the + extra byte and it's value is assumed to contain + the flags value. + + Old events will not have this extra byte, thence, + we keep the flags set to UNDEF_F. + */ + size_t bytes_read= (val + val_len) - (char*) buf_start; + if (bytes_read > event_len) + { + error= true; + goto err; + } + if ((data_written - bytes_read) > 0) + { + flags= (uint) *(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE + + UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE + + val_len); + } + } + +err: + if (unlikely(error)) + name= 0; +} + + +/************************************************************************** + Create_file_log_event methods +**************************************************************************/ + +/* + Create_file_log_event ctor +*/ + +Create_file_log_event:: +Create_file_log_event(const uchar *buf, uint len, + const Format_description_log_event* description_event) + :Load_log_event(buf,0,description_event),fake_base(0),block(0), + inited_from_old(0) +{ + DBUG_ENTER("Create_file_log_event::Create_file_log_event(char*,...)"); + uint block_offset; + uint header_len= description_event->common_header_len; + uint8 load_header_len= description_event->post_header_len[LOAD_EVENT-1]; + uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1]; + if (!(event_buf= (uchar*) my_memdup(PSI_INSTRUMENT_ME, buf, len, + MYF(MY_WME))) || + copy_log_event(event_buf,len, + (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ? + load_header_len + header_len : + (fake_base ? (header_len+load_header_len) : + (header_len+load_header_len) + + create_file_header_len)), + description_event)) + DBUG_VOID_RETURN; + if (description_event->binlog_version!=1) + { + file_id= uint4korr(buf + + header_len + + load_header_len + CF_FILE_ID_OFFSET); + /* + Note that it's ok to use get_data_size() below, because it is computed + with values we have already read from this event (because we called + copy_log_event()); we are not using slave's format info to decode + master's format, we are really using master's format info. + Anyway, both formats should be identical (except the common_header_len) + as these Load events are not changed between 4.0 and 5.0 (as logging of + LOAD DATA INFILE does not use Load_log_event in 5.0). + + The + 1 is for \0 terminating fname + */ + block_offset= (description_event->common_header_len + + Load_log_event::get_data_size() + + create_file_header_len + 1); + if (len < block_offset) + DBUG_VOID_RETURN; + block= const_cast(buf) + block_offset; + block_len= len - block_offset; + } + else + { + sql_ex.force_new_format(); + inited_from_old= 1; + } + DBUG_VOID_RETURN; +} + + +/************************************************************************** + Append_block_log_event methods +**************************************************************************/ + +/* + Append_block_log_event ctor +*/ + +Append_block_log_event:: +Append_block_log_event(const uchar *buf, uint len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event),block(0) +{ + DBUG_ENTER("Append_block_log_event::Append_block_log_event(char*,...)"); + uint8 common_header_len= description_event->common_header_len; + uint8 append_block_header_len= + description_event->post_header_len[APPEND_BLOCK_EVENT-1]; + uint total_header_len= common_header_len+append_block_header_len; + if (len < total_header_len) + DBUG_VOID_RETURN; + file_id= uint4korr(buf + common_header_len + AB_FILE_ID_OFFSET); + block= const_cast(buf) + total_header_len; + block_len= len - total_header_len; + DBUG_VOID_RETURN; +} + + +/************************************************************************** + Delete_file_log_event methods +**************************************************************************/ + +/* + Delete_file_log_event ctor +*/ + +Delete_file_log_event:: +Delete_file_log_event(const uchar *buf, uint len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event),file_id(0) +{ + uint8 common_header_len= description_event->common_header_len; + uint8 delete_file_header_len= description_event->post_header_len[DELETE_FILE_EVENT-1]; + if (len < (uint)(common_header_len + delete_file_header_len)) + return; + file_id= uint4korr(buf + common_header_len + DF_FILE_ID_OFFSET); +} + + +/************************************************************************** + Execute_load_log_event methods +**************************************************************************/ + +/* + Execute_load_log_event ctor +*/ + +Execute_load_log_event:: +Execute_load_log_event(const uchar *buf, uint len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event), file_id(0) +{ + uint8 common_header_len= description_event->common_header_len; + uint8 exec_load_header_len= description_event->post_header_len[EXEC_LOAD_EVENT-1]; + if (len < (uint)(common_header_len+exec_load_header_len)) + return; + file_id= uint4korr(buf + common_header_len + EL_FILE_ID_OFFSET); +} + + +/************************************************************************** + Begin_load_query_log_event methods +**************************************************************************/ + +Begin_load_query_log_event:: +Begin_load_query_log_event(const uchar *buf, uint len, + const Format_description_log_event* desc_event) + :Append_block_log_event(buf, len, desc_event) +{ +} + + +/************************************************************************** + Execute_load_query_log_event methods +**************************************************************************/ + + +Execute_load_query_log_event:: +Execute_load_query_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* desc_event): + Query_log_event(buf, event_len, desc_event, EXECUTE_LOAD_QUERY_EVENT), + file_id(0), fn_pos_start(0), fn_pos_end(0) +{ + if (!Query_log_event::is_valid()) + return; + + buf+= desc_event->common_header_len; + + fn_pos_start= uint4korr(buf + ELQ_FN_POS_START_OFFSET); + fn_pos_end= uint4korr(buf + ELQ_FN_POS_END_OFFSET); + dup_handling= (enum_load_dup_handling)(*(buf + ELQ_DUP_HANDLING_OFFSET)); + + if (fn_pos_start > q_len || fn_pos_end > q_len || + dup_handling > LOAD_DUP_REPLACE) + return; + + file_id= uint4korr(buf + ELQ_FILE_ID_OFFSET); +} + + +ulong Execute_load_query_log_event::get_post_header_size_for_derived() +{ + return EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN; +} + + +/************************************************************************** + sql_ex_info methods +**************************************************************************/ + +/* + sql_ex_info::init() +*/ + +const uchar *sql_ex_info::init(const uchar *buf, const uchar *buf_end, + bool use_new_format) +{ + cached_new_format= use_new_format; + if (use_new_format) + { + empty_flags=0; + /* + The code below assumes that buf will not disappear from + under our feet during the lifetime of the event. This assumption + holds true in the slave thread if the log is in new format, but is not + the case when we have old format because we will be reusing net buffer + to read the actual file before we write out the Create_file event. + */ + if (read_str(&buf, buf_end, &field_term, &field_term_len) || + read_str(&buf, buf_end, &enclosed, &enclosed_len) || + read_str(&buf, buf_end, &line_term, &line_term_len) || + read_str(&buf, buf_end, &line_start, &line_start_len) || + read_str(&buf, buf_end, &escaped, &escaped_len)) + return 0; + opt_flags= *buf++; + } + else + { + if (buf_end - buf < 7) + return 0; // Wrong data + field_term_len= enclosed_len= line_term_len= line_start_len= escaped_len=1; + field_term= (char*) buf++; // Use first byte in string + enclosed= (char*) buf++; + line_term= (char*) buf++; + line_start= (char*) buf++; + escaped= (char*) buf++; + opt_flags= *buf++; + empty_flags= *buf++; + if (empty_flags & FIELD_TERM_EMPTY) + field_term_len=0; + if (empty_flags & ENCLOSED_EMPTY) + enclosed_len=0; + if (empty_flags & LINE_TERM_EMPTY) + line_term_len=0; + if (empty_flags & LINE_START_EMPTY) + line_start_len=0; + if (empty_flags & ESCAPED_EMPTY) + escaped_len=0; + } + return buf; +} + + + +/************************************************************************** + Rows_log_event member functions +**************************************************************************/ + + +Rows_log_event::Rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + : Log_event(buf, description_event), + m_row_count(0), +#ifndef MYSQL_CLIENT + m_table(NULL), +#endif + m_table_id(0), m_rows_buf(0), m_rows_cur(0), m_rows_end(0), + m_extra_row_data(0) +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + , m_curr_row(NULL), m_curr_row_end(NULL), + m_key(NULL), m_key_info(NULL), m_key_nr(0), + master_had_triggers(0) +#endif +{ + DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)"); + uint8 const common_header_len= description_event->common_header_len; + Log_event_type event_type= (Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET]; + m_type= event_type; + m_cols_ai.bitmap= 0; + + uint8 const post_header_len= description_event->post_header_len[event_type-1]; + + if (event_len < (uint)(common_header_len + post_header_len)) + { + m_cols.bitmap= 0; + DBUG_VOID_RETURN; + } + + DBUG_PRINT("enter",("event_len: %u common_header_len: %d " + "post_header_len: %d", + event_len, common_header_len, + post_header_len)); + + const uchar *post_start= buf + common_header_len; + post_start+= RW_MAPID_OFFSET; + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + m_table_id= (ulong) uint6korr(post_start); + post_start+= RW_FLAGS_OFFSET; + } + + m_flags_pos= post_start - buf; + m_flags= uint2korr(post_start); + post_start+= 2; + + uint16 var_header_len= 0; + if (post_header_len == ROWS_HEADER_LEN_V2) + { + /* + Have variable length header, check length, + which includes length bytes + */ + var_header_len= uint2korr(post_start); + /* Check length and also avoid out of buffer read */ + if (var_header_len < 2 || + event_len < static_cast(var_header_len + + (post_start - buf))) + { + m_cols.bitmap= 0; + DBUG_VOID_RETURN; + } + var_header_len-= 2; + + /* Iterate over var-len header, extracting 'chunks' */ + const uchar *start= post_start + 2; + const uchar *end= start + var_header_len; + for (const uchar* pos= start; pos < end;) + { + switch(*pos++) + { + case RW_V_EXTRAINFO_TAG: + { + /* Have an 'extra info' section, read it in */ + assert((end - pos) >= EXTRA_ROW_INFO_HDR_BYTES); + uint8 infoLen= pos[EXTRA_ROW_INFO_LEN_OFFSET]; + assert((end - pos) >= infoLen); + /* Just store/use the first tag of this type, skip others */ + if (likely(!m_extra_row_data)) + { + m_extra_row_data= (uchar*) my_malloc(PSI_INSTRUMENT_ME, infoLen, + MYF(MY_WME)); + if (likely(m_extra_row_data != NULL)) + { + memcpy(m_extra_row_data, pos, infoLen); + } + } + pos+= infoLen; + break; + } + default: + /* Unknown code, we will not understand anything further here */ + pos= end; /* Break loop */ + } + } + } + + uchar const *const var_start= + (const uchar *)buf + common_header_len + post_header_len + var_header_len; + uchar const *const ptr_width= var_start; + uchar *ptr_after_width= (uchar*) ptr_width; + DBUG_PRINT("debug", ("Reading from %p", ptr_after_width)); + m_width= net_field_length(&ptr_after_width); + DBUG_PRINT("debug", ("m_width=%lu", m_width)); + + /* Avoid reading out of buffer */ + if (ptr_after_width + (m_width + 7) / 8 > (uchar*)buf + event_len) + { + m_cols.bitmap= NULL; + DBUG_VOID_RETURN; + } + + /* if my_bitmap_init fails, caught in is_valid() */ + if (likely(!my_bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + m_width))) + { + DBUG_PRINT("debug", ("Reading from %p", ptr_after_width)); + memcpy(m_cols.bitmap, ptr_after_width, (m_width + 7) / 8); + create_last_word_mask(&m_cols); + ptr_after_width+= (m_width + 7) / 8; + DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols)); + } + else + { + // Needed because my_bitmap_init() does not set it to null on failure + m_cols.bitmap= NULL; + DBUG_VOID_RETURN; + } + + m_cols_ai.bitmap= m_cols.bitmap; /* See explanation in is_valid() */ + + if (LOG_EVENT_IS_UPDATE_ROW(event_type)) + { + DBUG_PRINT("debug", ("Reading from %p", ptr_after_width)); + + /* if my_bitmap_init fails, caught in is_valid() */ + if (likely(!my_bitmap_init(&m_cols_ai, + m_width <= sizeof(m_bitbuf_ai)*8 ? m_bitbuf_ai : NULL, + m_width))) + { + DBUG_PRINT("debug", ("Reading from %p", ptr_after_width)); + memcpy(m_cols_ai.bitmap, ptr_after_width, (m_width + 7) / 8); + create_last_word_mask(&m_cols_ai); + ptr_after_width+= (m_width + 7) / 8; + DBUG_DUMP("m_cols_ai", (uchar*) m_cols_ai.bitmap, + no_bytes_in_map(&m_cols_ai)); + } + else + { + // Needed because my_bitmap_init() does not set it to null on failure + m_cols_ai.bitmap= 0; + DBUG_VOID_RETURN; + } + } + + const uchar* const ptr_rows_data= (const uchar*) ptr_after_width; + + size_t const read_size= ptr_rows_data - (const unsigned char *) buf; + if (read_size > event_len) + { + DBUG_VOID_RETURN; + } + size_t const data_size= event_len - read_size; + DBUG_PRINT("info",("m_table_id: %llu m_flags: %d m_width: %lu data_size: %lu", + m_table_id, m_flags, m_width, (ulong) data_size)); + + m_rows_buf= (uchar*) my_malloc(PSI_INSTRUMENT_ME, data_size, MYF(MY_WME)); + if (likely((bool)m_rows_buf)) + { +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + m_curr_row= m_rows_buf; +#endif + m_rows_end= m_rows_buf + data_size; + m_rows_cur= m_rows_end; + memcpy(m_rows_buf, ptr_rows_data, data_size); + m_rows_before_size= ptr_rows_data - (const uchar *) buf; // Get the size that before SET part + } + else + m_cols.bitmap= 0; // to not free it + + DBUG_VOID_RETURN; +} + +void Rows_log_event::uncompress_buf() +{ + uint32 un_len= binlog_get_uncompress_len(m_rows_buf); + if (!un_len) + return; + + uchar *new_buf= (uchar*) my_malloc(PSI_INSTRUMENT_ME, ALIGN_SIZE(un_len), + MYF(MY_WME)); + if (new_buf) + { + if (!binlog_buf_uncompress(m_rows_buf, new_buf, + (uint32)(m_rows_cur - m_rows_buf), &un_len)) + { + my_free(m_rows_buf); + m_rows_buf= new_buf; +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + m_curr_row= m_rows_buf; +#endif + m_rows_end= m_rows_buf + un_len; + m_rows_cur= m_rows_end; + return; + } + else + { + my_free(new_buf); + } + } + m_cols.bitmap= 0; // catch it in is_valid +} + +Rows_log_event::~Rows_log_event() +{ + if (m_cols.bitmap == m_bitbuf) // no my_malloc happened + m_cols.bitmap= 0; // so no my_free in my_bitmap_free + my_bitmap_free(&m_cols); // To pair with my_bitmap_init(). + my_free(m_rows_buf); + my_free(m_extra_row_data); +} + +int Rows_log_event::get_data_size() +{ + int const general_type_code= get_general_type_code(); + + uchar buf[MAX_INT_WIDTH]; + uchar *end= net_store_length(buf, m_width); + + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + return (int)(6 + no_bytes_in_map(&m_cols) + (end - buf) + + (general_type_code == UPDATE_ROWS_EVENT ? no_bytes_in_map(&m_cols_ai) : 0) + + m_rows_cur - m_rows_buf);); + int data_size= 0; + Log_event_type type= get_type_code(); + bool is_v2_event= LOG_EVENT_IS_ROW_V2(type); + if (is_v2_event) + { + data_size= ROWS_HEADER_LEN_V2 + + (m_extra_row_data ? + RW_V_TAG_LEN + m_extra_row_data[EXTRA_ROW_INFO_LEN_OFFSET]: + 0); + } + else + { + data_size= ROWS_HEADER_LEN_V1; + } + data_size+= no_bytes_in_map(&m_cols); + data_size+= (uint) (end - buf); + + if (general_type_code == UPDATE_ROWS_EVENT) + data_size+= no_bytes_in_map(&m_cols_ai); + + data_size+= (uint) (m_rows_cur - m_rows_buf); + return data_size; +} + + +/************************************************************************** + Annotate_rows_log_event member functions +**************************************************************************/ + +Annotate_rows_log_event:: +Annotate_rows_log_event(const uchar *buf, + uint event_len, + const Format_description_log_event *desc) + : Log_event(buf, desc), + m_save_thd_query_txt(0), + m_save_thd_query_len(0), + m_saved_thd_query(false), + m_used_query_txt(0) +{ + m_query_len= event_len - desc->common_header_len; + m_query_txt= (char*) buf + desc->common_header_len; +} + +Annotate_rows_log_event::~Annotate_rows_log_event() +{ + DBUG_ENTER("Annotate_rows_log_event::~Annotate_rows_log_event"); +#ifndef MYSQL_CLIENT + if (m_saved_thd_query) + thd->set_query(m_save_thd_query_txt, m_save_thd_query_len); + else if (m_used_query_txt) + thd->reset_query(); +#endif + DBUG_VOID_RETURN; +} + +int Annotate_rows_log_event::get_data_size() +{ + return m_query_len; +} + +Log_event_type Annotate_rows_log_event::get_type_code() +{ + return ANNOTATE_ROWS_EVENT; +} + +bool Annotate_rows_log_event::is_valid() const +{ + return (m_query_txt != NULL && m_query_len != 0); +} + + +/************************************************************************** + Table_map_log_event member functions and support functions +**************************************************************************/ + +/** + @page How replication of field metadata works. + + When a table map is created, the master first calls + Table_map_log_event::save_field_metadata() which calculates how many + values will be in the field metadata. Only those fields that require the + extra data are added. The method also loops through all of the fields in + the table calling the method Field::save_field_metadata() which returns the + values for the field that will be saved in the metadata and replicated to + the slave. Once all fields have been processed, the table map is written to + the binlog adding the size of the field metadata and the field metadata to + the end of the body of the table map. + + When a table map is read on the slave, the field metadata is read from the + table map and passed to the table_def class constructor which saves the + field metadata from the table map into an array based on the type of the + field. Field metadata values not present (those fields that do not use extra + data) in the table map are initialized as zero (0). The array size is the + same as the columns for the table on the slave. + + Additionally, values saved for field metadata on the master are saved as a + string of bytes (uchar) in the binlog. A field may require 1 or more bytes + to store the information. In cases where values require multiple bytes + (e.g. values > 255), the endian-safe methods are used to properly encode + the values on the master and decode them on the slave. When the field + metadata values are captured on the slave, they are stored in an array of + type uint16. This allows the least number of casts to prevent casting bugs + when the field metadata is used in comparisons of field attributes. When + the field metadata is used for calculating addresses in pointer math, the + type used is uint32. +*/ + +/* + Constructor used by slave to read the event from the binary log. + */ +#if defined(HAVE_REPLICATION) +Table_map_log_event::Table_map_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + + : Log_event(buf, description_event), +#ifndef MYSQL_CLIENT + m_table(NULL), +#endif + m_dbnam(NULL), m_dblen(0), m_tblnam(NULL), m_tbllen(0), + m_colcnt(0), m_coltype(0), + m_memory(NULL), m_table_id(ULONGLONG_MAX), m_flags(0), + m_data_size(0), m_field_metadata(0), m_field_metadata_size(0), + m_null_bits(0), m_meta_memory(NULL), + m_optional_metadata_len(0), m_optional_metadata(NULL) +{ + unsigned int bytes_read= 0; + DBUG_ENTER("Table_map_log_event::Table_map_log_event(const char*,uint,...)"); + + uint8 common_header_len= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[TABLE_MAP_EVENT-1]; + DBUG_PRINT("info",("event_len: %u common_header_len: %d post_header_len: %d", + event_len, common_header_len, post_header_len)); + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("event buffer", (uchar*) buf, event_len); +#endif + + if (event_len < (uint)(common_header_len + post_header_len)) + DBUG_VOID_RETURN; + + /* Read the post-header */ + const uchar *post_start= buf + common_header_len; + + post_start+= TM_MAPID_OFFSET; + VALIDATE_BYTES_READ(post_start, buf, event_len); + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + DBUG_ASSERT(post_header_len == TABLE_MAP_HEADER_LEN); + m_table_id= (ulong) uint6korr(post_start); + post_start+= TM_FLAGS_OFFSET; + } + + DBUG_ASSERT(m_table_id != ~0ULL); + + m_flags= uint2korr(post_start); + + /* Read the variable part of the event */ + const uchar *const vpart= buf + common_header_len + post_header_len; + + /* Extract the length of the various parts from the buffer */ + uchar const *const ptr_dblen= (uchar const*)vpart + 0; + VALIDATE_BYTES_READ(ptr_dblen, buf, event_len); + m_dblen= *(uchar*) ptr_dblen; + + /* Length of database name + counter + terminating null */ + uchar const *const ptr_tbllen= ptr_dblen + m_dblen + 2; + VALIDATE_BYTES_READ(ptr_tbllen, buf, event_len); + m_tbllen= *(uchar*) ptr_tbllen; + + /* Length of table name + counter + terminating null */ + uchar const *const ptr_colcnt= ptr_tbllen + m_tbllen + 2; + uchar *ptr_after_colcnt= (uchar*) ptr_colcnt; + VALIDATE_BYTES_READ(ptr_after_colcnt, buf, event_len); + m_colcnt= net_field_length(&ptr_after_colcnt); + + DBUG_PRINT("info",("m_dblen: %lu off: %ld m_tbllen: %lu off: %ld m_colcnt: %lu off: %ld", + (ulong) m_dblen, (long) (ptr_dblen - vpart), + (ulong) m_tbllen, (long) (ptr_tbllen - vpart), + m_colcnt, (long) (ptr_colcnt - vpart))); + + /* Allocate mem for all fields in one go. If fails, caught in is_valid() */ + m_memory= (uchar*) my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &m_dbnam, (uint) m_dblen + 1, + &m_tblnam, (uint) m_tbllen + 1, + &m_coltype, (uint) m_colcnt, + NullS); + + if (m_memory) + { + /* Copy the different parts into their memory */ + strncpy(const_cast(m_dbnam), (const char*)ptr_dblen + 1, m_dblen + 1); + strncpy(const_cast(m_tblnam), (const char*)ptr_tbllen + 1, m_tbllen + 1); + memcpy(m_coltype, ptr_after_colcnt, m_colcnt); + + ptr_after_colcnt= ptr_after_colcnt + m_colcnt; + VALIDATE_BYTES_READ(ptr_after_colcnt, buf, event_len); + m_field_metadata_size= net_field_length(&ptr_after_colcnt); + if (m_field_metadata_size <= (m_colcnt * 2)) + { + uint num_null_bytes= (m_colcnt + 7) / 8; + m_meta_memory= (uchar *)my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &m_null_bits, num_null_bytes, + &m_field_metadata, m_field_metadata_size, + NULL); + memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size); + ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size; + memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes); + ptr_after_colcnt= (unsigned char*)ptr_after_colcnt + num_null_bytes; + } + else + { + m_coltype= NULL; + my_free(m_memory); + m_memory= NULL; + DBUG_VOID_RETURN; + } + + bytes_read= (uint) (ptr_after_colcnt - (uchar *)buf); + + /* After null_bits field, there are some new fields for extra metadata. */ + if (bytes_read < event_len) + { + m_optional_metadata_len= event_len - bytes_read; + m_optional_metadata= + static_cast(my_malloc(PSI_INSTRUMENT_ME, m_optional_metadata_len, MYF(MY_WME))); + memcpy(m_optional_metadata, ptr_after_colcnt, m_optional_metadata_len); + } + } +#ifdef MYSQL_SERVER + if (!m_table) + DBUG_VOID_RETURN; + binlog_type_info_array= (Binlog_type_info *)thd->alloc(m_table->s->fields * + sizeof(Binlog_type_info)); + for (uint i= 0; i < m_table->s->fields; i++) + binlog_type_info_array[i]= m_table->field[i]->binlog_type_info(); +#endif + + DBUG_VOID_RETURN; +} +#endif + +Table_map_log_event::~Table_map_log_event() +{ + my_free(m_meta_memory); + my_free(m_memory); + my_free(m_optional_metadata); + m_optional_metadata= NULL; +} + +/** + Parses SIGNEDNESS field. + + @param[out] vec stores the signedness flags extracted from field. + @param[in] field SIGNEDNESS field in table_map_event. + @param[in] length length of the field + */ +static void parse_signedness(std::vector &vec, + unsigned char *field, unsigned int length) +{ + for (unsigned int i= 0; i < length; i++) + { + for (unsigned char c= 0x80; c != 0; c>>= 1) + vec.push_back(field[i] & c); + } +} + +/** + Parses DEFAULT_CHARSET field. + + @param[out] default_charset stores collation numbers extracted from field. + @param[in] field DEFAULT_CHARSET field in table_map_event. + @param[in] length length of the field + */ +static void parse_default_charset(Table_map_log_event::Optional_metadata_fields:: + Default_charset &default_charset, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + default_charset.default_charset= net_field_length(&p); + while (p < field + length) + { + unsigned int col_index= net_field_length(&p); + unsigned int col_charset= net_field_length(&p); + + default_charset.charset_pairs.push_back(std::make_pair(col_index, + col_charset)); + } +} + +/** + Parses COLUMN_CHARSET field. + + @param[out] vec stores collation numbers extracted from field. + @param[in] field COLUMN_CHARSET field in table_map_event. + @param[in] length length of the field + */ +static void parse_column_charset(std::vector &vec, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + while (p < field + length) + vec.push_back(net_field_length(&p)); +} + +/** + Parses COLUMN_NAME field. + + @param[out] vec stores column names extracted from field. + @param[in] field COLUMN_NAME field in table_map_event. + @param[in] length length of the field + */ +static void parse_column_name(std::vector &vec, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + while (p < field + length) + { + unsigned len= net_field_length(&p); + vec.push_back(std::string(reinterpret_cast(p), len)); + p+= len; + } +} + +/** + Parses SET_STR_VALUE/ENUM_STR_VALUE field. + + @param[out] vec stores SET/ENUM column's string values extracted from + field. Each SET/ENUM column's string values are stored + into a string separate vector. All of them are stored + in 'vec'. + @param[in] field COLUMN_NAME field in table_map_event. + @param[in] length length of the field + */ +static void parse_set_str_value(std::vector &vec, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + while (p < field + length) + { + unsigned int count= net_field_length(&p); + + vec.push_back(std::vector()); + for (unsigned int i= 0; i < count; i++) + { + unsigned len1= net_field_length(&p); + vec.back().push_back(std::string(reinterpret_cast(p), len1)); + p+= len1; + } + } +} + +/** + Parses GEOMETRY_TYPE field. + + @param[out] vec stores geometry column's types extracted from field. + @param[in] field GEOMETRY_TYPE field in table_map_event. + @param[in] length length of the field + */ +static void parse_geometry_type(std::vector &vec, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + while (p < field + length) + vec.push_back(net_field_length(&p)); +} + +/** + Parses SIMPLE_PRIMARY_KEY field. + + @param[out] vec stores primary key's column information extracted from + field. Each column has an index and a prefix which are + stored as a unit_pair. prefix is always 0 for + SIMPLE_PRIMARY_KEY field. + @param[in] field SIMPLE_PRIMARY_KEY field in table_map_event. + @param[in] length length of the field + */ +static void parse_simple_pk(std::vector &vec, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + while (p < field + length) + vec.push_back(std::make_pair(net_field_length(&p), 0)); +} + +/** + Parses PRIMARY_KEY_WITH_PREFIX field. + + @param[out] vec stores primary key's column information extracted from + field. Each column has an index and a prefix which are + stored as a unit_pair. + @param[in] field PRIMARY_KEY_WITH_PREFIX field in table_map_event. + @param[in] length length of the field + */ + +static void parse_pk_with_prefix(std::vector &vec, + unsigned char *field, unsigned int length) +{ + unsigned char* p= field; + + while (p < field + length) + { + unsigned int col_index= net_field_length(&p); + unsigned int col_prefix= net_field_length(&p); + vec.push_back(std::make_pair(col_index, col_prefix)); + } +} + +Table_map_log_event::Optional_metadata_fields:: +Optional_metadata_fields(unsigned char* optional_metadata, + unsigned int optional_metadata_len) +{ + unsigned char* field= optional_metadata; + + if (optional_metadata == NULL) + return; + + while (field < optional_metadata + optional_metadata_len) + { + unsigned int len; + Optional_metadata_field_type type= + static_cast(field[0]); + + // Get length and move field to the value. + field++; + len= net_field_length(&field); + + switch(type) + { + case SIGNEDNESS: + parse_signedness(m_signedness, field, len); + break; + case DEFAULT_CHARSET: + parse_default_charset(m_default_charset, field, len); + break; + case COLUMN_CHARSET: + parse_column_charset(m_column_charset, field, len); + break; + case COLUMN_NAME: + parse_column_name(m_column_name, field, len); + break; + case SET_STR_VALUE: + parse_set_str_value(m_set_str_value, field, len); + break; + case ENUM_STR_VALUE: + parse_set_str_value(m_enum_str_value, field, len); + break; + case GEOMETRY_TYPE: + parse_geometry_type(m_geometry_type, field, len); + break; + case SIMPLE_PRIMARY_KEY: + parse_simple_pk(m_primary_key, field, len); + break; + case PRIMARY_KEY_WITH_PREFIX: + parse_pk_with_prefix(m_primary_key, field, len); + break; + case ENUM_AND_SET_DEFAULT_CHARSET: + parse_default_charset(m_enum_and_set_default_charset, field, len); + break; + case ENUM_AND_SET_COLUMN_CHARSET: + parse_column_charset(m_enum_and_set_column_charset, field, len); + break; + default: + DBUG_ASSERT(0); + } + // next field + field+= len; + } +} + + +/************************************************************************** + Write_rows_log_event member functions +**************************************************************************/ + + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Write_rows_log_event::Write_rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) +: Rows_log_event(buf, event_len, description_event) +{ +} + +Write_rows_compressed_log_event::Write_rows_compressed_log_event( + const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) +: Write_rows_log_event(buf, event_len, description_event) +{ + uncompress_buf(); +} +#endif + + +/************************************************************************** + Delete_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Delete_rows_log_event::Delete_rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + : Rows_log_event(buf, event_len, description_event) +{ +} + +Delete_rows_compressed_log_event::Delete_rows_compressed_log_event( + const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + : Delete_rows_log_event(buf, event_len, description_event) +{ + uncompress_buf(); +} +#endif + +/************************************************************************** + Update_rows_log_event member functions +**************************************************************************/ + +Update_rows_log_event::~Update_rows_log_event() +{ + if (m_cols_ai.bitmap) + { + if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened + m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free + my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init(). + } +} + + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Update_rows_log_event::Update_rows_log_event(const uchar *buf, uint event_len, + const + Format_description_log_event + *description_event) + : Rows_log_event(buf, event_len, description_event) +{ +} + +Update_rows_compressed_log_event::Update_rows_compressed_log_event( + const uchar *buf, uint event_len, + const Format_description_log_event + *description_event) + : Update_rows_log_event(buf, event_len, description_event) +{ + uncompress_buf(); +} +#endif + +Incident_log_event::Incident_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *descr_event) + : Log_event(buf, descr_event) +{ + DBUG_ENTER("Incident_log_event::Incident_log_event"); + uint8 const common_header_len= + descr_event->common_header_len; + uint8 const post_header_len= + descr_event->post_header_len[INCIDENT_EVENT-1]; + + DBUG_PRINT("info",("event_len: %u; common_header_len: %d; post_header_len: %d", + event_len, common_header_len, post_header_len)); + + m_message.str= NULL; + m_message.length= 0; + int incident_number= uint2korr(buf + common_header_len); + if (incident_number >= INCIDENT_COUNT || + incident_number <= INCIDENT_NONE) + { + // If the incident is not recognized, this binlog event is + // invalid. If we set incident_number to INCIDENT_NONE, the + // invalidity will be detected by is_valid(). + m_incident= INCIDENT_NONE; + DBUG_VOID_RETURN; + } + m_incident= static_cast(incident_number); + uchar const *ptr= buf + common_header_len + post_header_len; + uchar const *const str_end= buf + event_len; + uint8 len= 0; // Assignment to keep compiler happy + const char *str= NULL; // Assignment to keep compiler happy + if (read_str(&ptr, str_end, &str, &len)) + { + /* Mark this event invalid */ + m_incident= INCIDENT_NONE; + DBUG_VOID_RETURN; + } + if (!(m_message.str= (char*) my_malloc(key_memory_log_event, len+1, MYF(MY_WME)))) + { + /* Mark this event invalid */ + m_incident= INCIDENT_NONE; + DBUG_VOID_RETURN; + } + strmake(m_message.str, str, len); + m_message.length= len; + DBUG_PRINT("info", ("m_incident: %d", m_incident)); + DBUG_VOID_RETURN; +} + + +Incident_log_event::~Incident_log_event() +{ + if (m_message.str) + my_free(m_message.str); +} + + +const char * +Incident_log_event::description() const +{ + static const char *const description[]= { + "NOTHING", // Not used + "LOST_EVENTS" + }; + + DBUG_PRINT("info", ("m_incident: %d", m_incident)); + return description[m_incident]; +} + + +Ignorable_log_event::Ignorable_log_event(const uchar *buf, + const Format_description_log_event + *descr_event, + const char *event_name) + :Log_event(buf, descr_event), number((int) (uchar) buf[EVENT_TYPE_OFFSET]), + description(event_name) +{ + DBUG_ENTER("Ignorable_log_event::Ignorable_log_event"); + DBUG_VOID_RETURN; +} + +Ignorable_log_event::~Ignorable_log_event() = default; + +bool copy_event_cache_to_file_and_reinit(IO_CACHE *cache, FILE *file) +{ + return (my_b_copy_all_to_file(cache, file) || + reinit_io_cache(cache, WRITE_CACHE, 0, FALSE, TRUE)); +} + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) +int Log_event::apply_event(rpl_group_info* rgi) +{ + int res; + THD_STAGE_INFO(thd, stage_apply_event); + rgi->current_event= this; + res= do_apply_event(rgi); + rgi->current_event= NULL; + THD_STAGE_INFO(thd, stage_after_apply_event); + return res; +} +#endif diff --git a/sql/log_event.h b/sql/log_event.h new file mode 100644 index 00000000..67e06d70 --- /dev/null +++ b/sql/log_event.h @@ -0,0 +1,5931 @@ +/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @addtogroup Replication + @{ + + @file + + @brief Binary log event definitions. This includes generic code + common to all types of log events, as well as specific code for each + type of log event. +*/ + + +#ifndef _log_event_h +#define _log_event_h + +#if defined(USE_PRAGMA_INTERFACE) && defined(MYSQL_SERVER) +#pragma interface /* gcc class implementation */ +#endif + +#include +#include "rpl_constants.h" +#include +#include +#include +#include +#include + +#ifdef MYSQL_CLIENT +#include "sql_const.h" +#include "rpl_utility.h" +#include "hash.h" +#include "rpl_tblmap.h" +#include "sql_string.h" +#endif + +#ifdef MYSQL_SERVER +#include "rpl_record.h" +#include "rpl_reporting.h" +#include "sql_class.h" /* THD */ +#endif + +#include "rpl_gtid.h" + +/* Forward declarations */ +#ifndef MYSQL_CLIENT +class String; +#endif + +#define PREFIX_SQL_LOAD "SQL_LOAD-" +#define LONG_FIND_ROW_THRESHOLD 60 /* seconds */ + +/** + Either assert or return an error. + + In debug build, the condition will be checked, but in non-debug + builds, the error code given will be returned instead. + + @param COND Condition to check + @param ERRNO Error number to return in non-debug builds +*/ +#ifdef DBUG_OFF +#define ASSERT_OR_RETURN_ERROR(COND, ERRNO) \ + do { if (!(COND)) return ERRNO; } while (0) +#else +#define ASSERT_OR_RETURN_ERROR(COND, ERRNO) \ + DBUG_ASSERT(COND) +#endif + +#define LOG_READ_EOF -1 +#define LOG_READ_BOGUS -2 +#define LOG_READ_IO -3 +#define LOG_READ_MEM -5 +#define LOG_READ_TRUNC -6 +#define LOG_READ_TOO_LARGE -7 +#define LOG_READ_CHECKSUM_FAILURE -8 +#define LOG_READ_DECRYPT -9 + +#define LOG_EVENT_OFFSET 4 + +/* + 3 is MySQL 4.x; 4 is MySQL 5.0.0. + Compared to version 3, version 4 has: + - a different Start_log_event, which includes info about the binary log + (sizes of headers); this info is included for better compatibility if the + master's MySQL version is different from the slave's. + - all events have a unique ID (the triplet (server_id, timestamp at server + start, other) to be sure an event is not executed more than once in a + multimaster setup, example: + M1 + / \ + v v + M2 M3 + \ / + v v + S + if a query is run on M1, it will arrive twice on S, so we need that S + remembers the last unique ID it has processed, to compare and know if the + event should be skipped or not. Example of ID: we already have the server id + (4 bytes), plus: + timestamp_when_the_master_started (4 bytes), a counter (a sequence number + which increments every time we write an event to the binlog) (3 bytes). + Q: how do we handle when the counter is overflowed and restarts from 0 ? + + - Query and Load (Create or Execute) events may have a more precise + timestamp (with microseconds), number of matched/affected/warnings rows + and fields of session variables: SQL_MODE, + FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, SQL_AUTO_IS_NULL, the collations and + charsets, the PASSWORD() version (old/new/...). +*/ +#define BINLOG_VERSION 4 + +/* + We could have used SERVER_VERSION_LENGTH, but this introduces an + obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH + this would break the replication protocol +*/ +#define ST_SERVER_VER_LEN 50 + +/* + These are flags and structs to handle all the LOAD DATA INFILE options (LINES + TERMINATED etc). +*/ + +/* + These are flags and structs to handle all the LOAD DATA INFILE options (LINES + TERMINATED etc). + DUMPFILE_FLAG is probably useless (DUMPFILE is a clause of SELECT, not of LOAD + DATA). +*/ +#define DUMPFILE_FLAG 0x1 +#define OPT_ENCLOSED_FLAG 0x2 +#define REPLACE_FLAG 0x4 +#define IGNORE_FLAG 0x8 + +#define FIELD_TERM_EMPTY 0x1 +#define ENCLOSED_EMPTY 0x2 +#define LINE_TERM_EMPTY 0x4 +#define LINE_START_EMPTY 0x8 +#define ESCAPED_EMPTY 0x10 + +#define NUM_LOAD_DELIM_STRS 5 + +/***************************************************************************** + + MySQL Binary Log + + This log consists of events. Each event has a fixed-length header, + possibly followed by a variable length data body. + + The data body consists of an optional fixed length segment (post-header) + and an optional variable length segment. + + See the #defines below for the format specifics. + + The events which really update data are Query_log_event, + Execute_load_query_log_event and old Load_log_event and + Execute_load_log_event events (Execute_load_query is used together with + Begin_load_query and Append_block events to replicate LOAD DATA INFILE. + Create_file/Append_block/Execute_load (which includes Load_log_event) + were used to replicate LOAD DATA before the 5.0.3). + + ****************************************************************************/ + +#define LOG_EVENT_HEADER_LEN 19 /* the fixed header length */ +#define OLD_HEADER_LEN 13 /* the fixed header length in 3.23 */ +/* + Fixed header length, where 4.x and 5.0 agree. That is, 5.0 may have a longer + header (it will for sure when we have the unique event's ID), but at least + the first 19 bytes are the same in 4.x and 5.0. So when we have the unique + event's ID, LOG_EVENT_HEADER_LEN will be something like 26, but + LOG_EVENT_MINIMAL_HEADER_LEN will remain 19. +*/ +#define LOG_EVENT_MINIMAL_HEADER_LEN 19 + +/* event-specific post-header sizes */ +// where 3.23, 4.x and 5.0 agree +#define QUERY_HEADER_MINIMAL_LEN (4 + 4 + 1 + 2) +// where 5.0 differs: 2 for len of N-bytes vars. +#define QUERY_HEADER_LEN (QUERY_HEADER_MINIMAL_LEN + 2) +#define STOP_HEADER_LEN 0 +#define LOAD_HEADER_LEN (4 + 4 + 4 + 1 +1 + 4) +#define SLAVE_HEADER_LEN 0 +#define START_V3_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4) +#define ROTATE_HEADER_LEN 8 // this is FROZEN (the Rotate post-header is frozen) +#define INTVAR_HEADER_LEN 0 +#define CREATE_FILE_HEADER_LEN 4 +#define APPEND_BLOCK_HEADER_LEN 4 +#define EXEC_LOAD_HEADER_LEN 4 +#define DELETE_FILE_HEADER_LEN 4 +#define NEW_LOAD_HEADER_LEN LOAD_HEADER_LEN +#define RAND_HEADER_LEN 0 +#define USER_VAR_HEADER_LEN 0 +#define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES) +#define XID_HEADER_LEN 0 +#define BEGIN_LOAD_QUERY_HEADER_LEN APPEND_BLOCK_HEADER_LEN +#define ROWS_HEADER_LEN_V1 8 +#define TABLE_MAP_HEADER_LEN 8 +#define EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN (4 + 4 + 4 + 1) +#define EXECUTE_LOAD_QUERY_HEADER_LEN (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN) +#define INCIDENT_HEADER_LEN 2 +#define HEARTBEAT_HEADER_LEN 0 +#define IGNORABLE_HEADER_LEN 0 +#define ROWS_HEADER_LEN_V2 10 +#define ANNOTATE_ROWS_HEADER_LEN 0 +#define BINLOG_CHECKPOINT_HEADER_LEN 4 +#define GTID_HEADER_LEN 19 +#define GTID_LIST_HEADER_LEN 4 +#define START_ENCRYPTION_HEADER_LEN 0 +#define XA_PREPARE_HEADER_LEN 0 + +/* + Max number of possible extra bytes in a replication event compared to a + packet (i.e. a query) sent from client to master; + First, an auxiliary log_event status vars estimation: +*/ +#define MAX_SIZE_LOG_EVENT_STATUS (1 + 4 /* type, flags2 */ + \ + 1 + 8 /* type, sql_mode */ + \ + 1 + 1 + 255 /* type, length, catalog */ + \ + 1 + 4 /* type, auto_increment */ + \ + 1 + 6 /* type, charset */ + \ + 1 + 1 + 255 /* type, length, time_zone */ + \ + 1 + 2 /* type, lc_time_names_number */ + \ + 1 + 2 /* type, charset_database_number */ + \ + 1 + 8 /* type, table_map_for_update */ + \ + 1 + 4 /* type, master_data_written */ + \ + 1 + 3 /* type, sec_part of NOW() */ + \ + 1 + 16 + 1 + 60/* type, user_len, user, host_len, host */ + \ + 1 + 2 + 8 /* type, flags3, seq_no */) +#define MAX_LOG_EVENT_HEADER ( /* in order of Query_log_event::write */ \ + LOG_EVENT_HEADER_LEN + /* write_header */ \ + QUERY_HEADER_LEN + /* write_data */ \ + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN + /*write_post_header_for_derived */ \ + MAX_SIZE_LOG_EVENT_STATUS + /* status */ \ + NAME_LEN + 1) + +/* + The new option is added to handle large packets that are sent from the master + to the slave. It is used to increase the thd(max_allowed) for both the + DUMP thread on the master and the SQL/IO thread on the slave. +*/ +#define MAX_MAX_ALLOWED_PACKET (1024*1024*1024) + +/* + Event header offsets; + these point to places inside the fixed header. +*/ + +#define EVENT_TYPE_OFFSET 4 +#define SERVER_ID_OFFSET 5 +#define EVENT_LEN_OFFSET 9 +#define LOG_POS_OFFSET 13 +#define FLAGS_OFFSET 17 + +/* start event post-header (for v3 and v4) */ + +#define ST_BINLOG_VER_OFFSET 0 +#define ST_SERVER_VER_OFFSET 2 +#define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN) +#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4) + +/* slave event post-header (this event is never written) */ + +#define SL_MASTER_PORT_OFFSET 8 +#define SL_MASTER_POS_OFFSET 0 +#define SL_MASTER_HOST_OFFSET 10 + +/* query event post-header */ + +#define Q_THREAD_ID_OFFSET 0 +#define Q_EXEC_TIME_OFFSET 4 +#define Q_DB_LEN_OFFSET 8 +#define Q_ERR_CODE_OFFSET 9 +#define Q_STATUS_VARS_LEN_OFFSET 11 +#define Q_DATA_OFFSET QUERY_HEADER_LEN +/* these are codes, not offsets; not more than 256 values (1 byte). */ +#define Q_FLAGS2_CODE 0 +#define Q_SQL_MODE_CODE 1 +/* + Q_CATALOG_CODE is catalog with end zero stored; it is used only by MySQL + 5.0.x where 0<=x<=3. We have to keep it to be able to replicate these + old masters. +*/ +#define Q_CATALOG_CODE 2 +#define Q_AUTO_INCREMENT 3 +#define Q_CHARSET_CODE 4 +#define Q_TIME_ZONE_CODE 5 +/* + Q_CATALOG_NZ_CODE is catalog withOUT end zero stored; it is used by MySQL + 5.0.x where x>=4. Saves one byte in every Query_log_event in binlog, + compared to Q_CATALOG_CODE. The reason we didn't simply re-use + Q_CATALOG_CODE is that then a 5.0.3 slave of this 5.0.x (x>=4) master would + crash (segfault etc) because it would expect a 0 when there is none. +*/ +#define Q_CATALOG_NZ_CODE 6 + +#define Q_LC_TIME_NAMES_CODE 7 + +#define Q_CHARSET_DATABASE_CODE 8 + +#define Q_TABLE_MAP_FOR_UPDATE_CODE 9 + +#define Q_MASTER_DATA_WRITTEN_CODE 10 + +#define Q_INVOKER 11 + +#define Q_HRNOW 128 +#define Q_XID 129 + +#define Q_GTID_FLAGS3 130 +/* Intvar event post-header */ + +/* Intvar event data */ +#define I_TYPE_OFFSET 0 +#define I_VAL_OFFSET 1 + +/* Rand event data */ +#define RAND_SEED1_OFFSET 0 +#define RAND_SEED2_OFFSET 8 + +/* User_var event data */ +#define UV_VAL_LEN_SIZE 4 +#define UV_VAL_IS_NULL 1 +#define UV_VAL_TYPE_SIZE 1 +#define UV_NAME_LEN_SIZE 4 +#define UV_CHARSET_NUMBER_SIZE 4 + +/* Load event post-header */ +#define L_THREAD_ID_OFFSET 0 +#define L_EXEC_TIME_OFFSET 4 +#define L_SKIP_LINES_OFFSET 8 +#define L_TBL_LEN_OFFSET 12 +#define L_DB_LEN_OFFSET 13 +#define L_NUM_FIELDS_OFFSET 14 +#define L_SQL_EX_OFFSET 18 +#define L_DATA_OFFSET LOAD_HEADER_LEN + +/* Rotate event post-header */ +#define R_POS_OFFSET 0 +#define R_IDENT_OFFSET 8 + +/* CF to DF handle LOAD DATA INFILE */ + +/* CF = "Create File" */ +#define CF_FILE_ID_OFFSET 0 +#define CF_DATA_OFFSET CREATE_FILE_HEADER_LEN + +/* AB = "Append Block" */ +#define AB_FILE_ID_OFFSET 0 +#define AB_DATA_OFFSET APPEND_BLOCK_HEADER_LEN + +/* EL = "Execute Load" */ +#define EL_FILE_ID_OFFSET 0 + +/* DF = "Delete File" */ +#define DF_FILE_ID_OFFSET 0 + +/* TM = "Table Map" */ +#define TM_MAPID_OFFSET 0 +#define TM_FLAGS_OFFSET 6 + +/* RW = "RoWs" */ +#define RW_MAPID_OFFSET 0 +#define RW_FLAGS_OFFSET 6 +#define RW_VHLEN_OFFSET 8 +#define RW_V_TAG_LEN 1 +#define RW_V_EXTRAINFO_TAG 0 + +/* ELQ = "Execute Load Query" */ +#define ELQ_FILE_ID_OFFSET QUERY_HEADER_LEN +#define ELQ_FN_POS_START_OFFSET ELQ_FILE_ID_OFFSET + 4 +#define ELQ_FN_POS_END_OFFSET ELQ_FILE_ID_OFFSET + 8 +#define ELQ_DUP_HANDLING_OFFSET ELQ_FILE_ID_OFFSET + 12 + +/* 4 bytes which all binlogs should begin with */ +#define BINLOG_MAGIC (const uchar*) "\xfe\x62\x69\x6e" + +/* + The 2 flags below were useless : + - the first one was never set + - the second one was set in all Rotate events on the master, but not used for + anything useful. + So they are now removed and their place may later be reused for other + flags. Then one must remember that Rotate events in 4.x have + LOG_EVENT_FORCED_ROTATE_F set, so one should not rely on the value of the + replacing flag when reading a Rotate event. + I keep the defines here just to remember what they were. +*/ +#ifdef TO_BE_REMOVED +#define LOG_EVENT_TIME_F 0x1 +#define LOG_EVENT_FORCED_ROTATE_F 0x2 +#endif + +/* + This flag only makes sense for Format_description_log_event. It is set + when the event is written, and *reset* when a binlog file is + closed (yes, it's the only case when MySQL modifies already written + part of binlog). Thus it is a reliable indicator that binlog was + closed correctly. (Stop_log_event is not enough, there's always a + small chance that mysqld crashes in the middle of insert and end of + the binlog would look like a Stop_log_event). + + This flag is used to detect a restart after a crash, and to provide + "unbreakable" binlog. The problem is that on a crash storage engines + rollback automatically, while binlog does not. To solve this we use this + flag and automatically append ROLLBACK to every non-closed binlog (append + virtually, on reading, file itself is not changed). If this flag is found, + mysqlbinlog simply prints "ROLLBACK" Replication master does not abort on + binlog corruption, but takes it as EOF, and replication slave forces a + rollback in this case. + + Note, that old binlogs does not have this flag set, so we get a + a backward-compatible behaviour. +*/ + +#define LOG_EVENT_BINLOG_IN_USE_F 0x1 + +/** + @def LOG_EVENT_THREAD_SPECIFIC_F + + If the query depends on the thread (for example: TEMPORARY TABLE). + Currently this is used by mysqlbinlog to know it must print + SET @@PSEUDO_THREAD_ID=xx; before the query (it would not hurt to print it + for every query but this would be slow). +*/ +#define LOG_EVENT_THREAD_SPECIFIC_F 0x4 + +/** + @def LOG_EVENT_SUPPRESS_USE_F + + Suppress the generation of 'USE' statements before the actual + statement. This flag should be set for any events that does not need + the current database set to function correctly. Most notable cases + are 'CREATE DATABASE' and 'DROP DATABASE'. + + This flags should only be used in exceptional circumstances, since + it introduce a significant change in behaviour regarding the + replication logic together with the flags --binlog-do-db and + --replicated-do-db. + */ +#define LOG_EVENT_SUPPRESS_USE_F 0x8 + +/* + Note: this is a place holder for the flag + LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F (0x10), which is not used any + more, please do not reused this value for other flags. + */ + +/** + @def LOG_EVENT_ARTIFICIAL_F + + Artificial events are created arbitrarily and not written to binary + log + + These events should not update the master log position when slave + SQL thread executes them. +*/ +#define LOG_EVENT_ARTIFICIAL_F 0x20 + +/** + @def LOG_EVENT_RELAY_LOG_F + + Events with this flag set are created by slave IO thread and written + to relay log +*/ +#define LOG_EVENT_RELAY_LOG_F 0x40 + +/** + @def LOG_EVENT_IGNORABLE_F + + For an event, 'e', carrying a type code, that a slave, + 's', does not recognize, 's' will check 'e' for + LOG_EVENT_IGNORABLE_F, and if the flag is set, then 'e' + is ignored. Otherwise, 's' acknowledges that it has + found an unknown event in the relay log. +*/ +#define LOG_EVENT_IGNORABLE_F 0x80 + +/** + @def LOG_EVENT_ACCEPT_OWN_F + + Flag sets by the semisync slave for accepting + the same server_id ("own") events which the slave must not have + in its state. Typically such events were never committed by + their originator (this server) and discared at its semisync-slave recovery. +*/ +#define LOG_EVENT_ACCEPT_OWN_F 0x4000 + +/** + @def LOG_EVENT_SKIP_REPLICATION_F + + Flag set by application creating the event (with @@skip_replication); the + slave will skip replication of such events if + --replicate-events-marked-for-skip is not set to REPLICATE. + + This is a MariaDB flag; we allocate it from the end of the available + values to reduce risk of conflict with new MySQL flags. +*/ +#define LOG_EVENT_SKIP_REPLICATION_F 0x8000 + + +/** + @def OPTIONS_WRITTEN_TO_BIN_LOG + + OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must + be written to the binlog. OPTIONS_WRITTEN_TO_BIN_LOG could be + written into the Format_description_log_event, so that if later we + don't want to replicate a variable we did replicate, or the + contrary, it's doable. But it should not be too hard to deduct + the value of OPTIONS_WRITTEN_TO_BIN_LOG from the master's version. + + This is done in deduct_options_written_to_bin_log(). + You *must* update it, when changing the definition below. +*/ +#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_EXPLICIT_DEF_TIMESTAMP |\ + OPTION_AUTO_IS_NULL | OPTION_NO_FOREIGN_KEY_CHECKS | \ + OPTION_RELAXED_UNIQUE_CHECKS | OPTION_NOT_AUTOCOMMIT | OPTION_IF_EXISTS |\ + OPTION_INSERT_HISTORY) + +#define CHECKSUM_CRC32_SIGNATURE_LEN 4 +/** + defined statically while there is just one alg implemented +*/ +#define BINLOG_CHECKSUM_LEN CHECKSUM_CRC32_SIGNATURE_LEN +#define BINLOG_CHECKSUM_ALG_DESC_LEN 1 /* 1 byte checksum alg descriptor */ + +/* + These are capability numbers for MariaDB slave servers. + + Newer MariaDB slaves set this to inform the master about their capabilities. + This allows the master to decide which events it can send to the slave + without breaking replication on old slaves that maybe do not understand + all events from newer masters. + + As new releases are backwards compatible, a given capability implies also + all capabilities with smaller number. + + Older MariaDB slaves and other MySQL slave servers do not set this, so they + are recorded with capability 0. +*/ + +/* MySQL or old MariaDB slave with no announced capability. */ +#define MARIA_SLAVE_CAPABILITY_UNKNOWN 0 +/* MariaDB >= 5.3, which understands ANNOTATE_ROWS_EVENT. */ +#define MARIA_SLAVE_CAPABILITY_ANNOTATE 1 +/* + MariaDB >= 5.5. This version has the capability to tolerate events omitted + from the binlog stream without breaking replication (MySQL slaves fail + because they mis-compute the offsets into the master's binlog). +*/ +#define MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES 2 +/* MariaDB >= 10.0, which knows about binlog_checkpoint_log_event. */ +#define MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT 3 +/* MariaDB >= 10.0.1, which knows about global transaction id events. */ +#define MARIA_SLAVE_CAPABILITY_GTID 4 + +/* Our capability. */ +#define MARIA_SLAVE_CAPABILITY_MINE MARIA_SLAVE_CAPABILITY_GTID + + +/* + When the size of 'log_pos' within Heartbeat_log_event exceeds UINT32_MAX it + cannot be accommodated in common_header, as 'log_pos' is of 4 bytes size. In + such cases, sub_header, of size 8 bytes will hold larger 'log_pos' value. +*/ +#define HB_SUB_HEADER_LEN 8 + + +/** + @enum Log_event_type + + Enumeration type for the different types of log events. +*/ +enum Log_event_type +{ + /* + Every time you update this enum (when you add a type), you have to + fix Format_description_log_event::Format_description_log_event(). + */ + UNKNOWN_EVENT= 0, + START_EVENT_V3= 1, + QUERY_EVENT= 2, + STOP_EVENT= 3, + ROTATE_EVENT= 4, + INTVAR_EVENT= 5, + LOAD_EVENT= 6, + SLAVE_EVENT= 7, + CREATE_FILE_EVENT= 8, + APPEND_BLOCK_EVENT= 9, + EXEC_LOAD_EVENT= 10, + DELETE_FILE_EVENT= 11, + /* + NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer + sql_ex, allowing multibyte TERMINATED BY etc; both types share the + same class (Load_log_event) + */ + NEW_LOAD_EVENT= 12, + RAND_EVENT= 13, + USER_VAR_EVENT= 14, + FORMAT_DESCRIPTION_EVENT= 15, + XID_EVENT= 16, + BEGIN_LOAD_QUERY_EVENT= 17, + EXECUTE_LOAD_QUERY_EVENT= 18, + + TABLE_MAP_EVENT = 19, + + /* + These event numbers were used for 5.1.0 to 5.1.15 and are + therefore obsolete. + */ + PRE_GA_WRITE_ROWS_EVENT = 20, + PRE_GA_UPDATE_ROWS_EVENT = 21, + PRE_GA_DELETE_ROWS_EVENT = 22, + + /* + These event numbers are used from 5.1.16 until mysql-5.6.6, + and in MariaDB + */ + WRITE_ROWS_EVENT_V1 = 23, + UPDATE_ROWS_EVENT_V1 = 24, + DELETE_ROWS_EVENT_V1 = 25, + + /* + Something out of the ordinary happened on the master + */ + INCIDENT_EVENT= 26, + + /* + Heartbeat event to be send by master at its idle time + to ensure master's online status to slave + */ + HEARTBEAT_LOG_EVENT= 27, + + /* + In some situations, it is necessary to send over ignorable + data to the slave: data that a slave can handle in case there + is code for handling it, but which can be ignored if it is not + recognized. + + These mysql-5.6 events are not recognized (and ignored) by MariaDB + */ + IGNORABLE_LOG_EVENT= 28, + ROWS_QUERY_LOG_EVENT= 29, + + /* Version 2 of the Row events, generated only by mysql-5.6.6+ */ + WRITE_ROWS_EVENT = 30, + UPDATE_ROWS_EVENT = 31, + DELETE_ROWS_EVENT = 32, + + /* MySQL 5.6 GTID events, ignored by MariaDB */ + GTID_LOG_EVENT= 33, + ANONYMOUS_GTID_LOG_EVENT= 34, + PREVIOUS_GTIDS_LOG_EVENT= 35, + + /* MySQL 5.7 events, ignored by MariaDB */ + TRANSACTION_CONTEXT_EVENT= 36, + VIEW_CHANGE_EVENT= 37, + /* not ignored */ + XA_PREPARE_LOG_EVENT= 38, + + /* + Add new events here - right above this comment! + Existing events (except ENUM_END_EVENT) should never change their numbers + */ + + /* New MySQL/Sun events are to be added right above this comment */ + MYSQL_EVENTS_END, + + MARIA_EVENTS_BEGIN= 160, + /* New Maria event numbers start from here */ + ANNOTATE_ROWS_EVENT= 160, + /* + Binlog checkpoint event. Used for XA crash recovery on the master, not used + in replication. + A binlog checkpoint event specifies a binlog file such that XA crash + recovery can start from that file - and it is guaranteed to find all XIDs + that are prepared in storage engines but not yet committed. + */ + BINLOG_CHECKPOINT_EVENT= 161, + /* + Gtid event. For global transaction ID, used to start a new event group, + instead of the old BEGIN query event, and also to mark stand-alone + events. + */ + GTID_EVENT= 162, + /* + Gtid list event. Logged at the start of every binlog, to record the + current replication state. This consists of the last GTID seen for + each replication domain. + */ + GTID_LIST_EVENT= 163, + + START_ENCRYPTION_EVENT= 164, + + /* + Compressed binlog event. + + Note that the order between WRITE/UPDATE/DELETE events is significant; + this is so that we can convert from the compressed to the uncompressed + event type with (type-WRITE_ROWS_COMPRESSED_EVENT + WRITE_ROWS_EVENT) + and similar for _V1. + */ + QUERY_COMPRESSED_EVENT = 165, + WRITE_ROWS_COMPRESSED_EVENT_V1 = 166, + UPDATE_ROWS_COMPRESSED_EVENT_V1 = 167, + DELETE_ROWS_COMPRESSED_EVENT_V1 = 168, + WRITE_ROWS_COMPRESSED_EVENT = 169, + UPDATE_ROWS_COMPRESSED_EVENT = 170, + DELETE_ROWS_COMPRESSED_EVENT = 171, + + /* Add new MariaDB events here - right above this comment! */ + + ENUM_END_EVENT /* end marker */ +}; + + +/* + Bit flags for what has been writing to cache. Used to + discard logs with table map events but not row events and + nothing else important. This is stored by cache. +*/ + +enum enum_logged_status +{ + LOGGED_TABLE_MAP= 1, + LOGGED_ROW_EVENT= 2, + LOGGED_NO_DATA= 4, + LOGGED_CRITICAL= 8 +}; + +static inline bool LOG_EVENT_IS_QUERY(enum Log_event_type type) +{ + return type == QUERY_EVENT || type == QUERY_COMPRESSED_EVENT; +} + + +static inline bool LOG_EVENT_IS_WRITE_ROW(enum Log_event_type type) +{ + return type == WRITE_ROWS_EVENT || type == WRITE_ROWS_EVENT_V1 || + type == WRITE_ROWS_COMPRESSED_EVENT || + type == WRITE_ROWS_COMPRESSED_EVENT_V1; +} + + +static inline bool LOG_EVENT_IS_UPDATE_ROW(enum Log_event_type type) +{ + return type == UPDATE_ROWS_EVENT || type == UPDATE_ROWS_EVENT_V1 || + type == UPDATE_ROWS_COMPRESSED_EVENT || + type == UPDATE_ROWS_COMPRESSED_EVENT_V1; +} + + +static inline bool LOG_EVENT_IS_DELETE_ROW(enum Log_event_type type) +{ + return type == DELETE_ROWS_EVENT || type == DELETE_ROWS_EVENT_V1 || + type == DELETE_ROWS_COMPRESSED_EVENT || + type == DELETE_ROWS_COMPRESSED_EVENT_V1; +} + + +static inline bool LOG_EVENT_IS_ROW_COMPRESSED(enum Log_event_type type) +{ + return type == WRITE_ROWS_COMPRESSED_EVENT || + type == WRITE_ROWS_COMPRESSED_EVENT_V1 || + type == UPDATE_ROWS_COMPRESSED_EVENT || + type == UPDATE_ROWS_COMPRESSED_EVENT_V1 || + type == DELETE_ROWS_COMPRESSED_EVENT || + type == DELETE_ROWS_COMPRESSED_EVENT_V1; +} + + +static inline bool LOG_EVENT_IS_ROW_V2(enum Log_event_type type) +{ + return (type >= WRITE_ROWS_EVENT && type <= DELETE_ROWS_EVENT) || + (type >= WRITE_ROWS_COMPRESSED_EVENT && type <= DELETE_ROWS_COMPRESSED_EVENT); +} + + +/* + The number of types we handle in Format_description_log_event (UNKNOWN_EVENT + is not to be handled, it does not exist in binlogs, it does not have a + format). +*/ +#define LOG_EVENT_TYPES (ENUM_END_EVENT-1) + +enum Int_event_type +{ + INVALID_INT_EVENT = 0, LAST_INSERT_ID_EVENT = 1, INSERT_ID_EVENT = 2 +}; + +#ifdef MYSQL_SERVER +class String; +class MYSQL_BIN_LOG; +class THD; +#endif + +class Format_description_log_event; +class Relay_log_info; +class binlog_cache_data; + +bool copy_event_cache_to_file_and_reinit(IO_CACHE *cache, FILE *file); + +#ifdef MYSQL_CLIENT +enum enum_base64_output_mode { + BASE64_OUTPUT_NEVER= 0, + BASE64_OUTPUT_AUTO= 1, + BASE64_OUTPUT_UNSPEC= 2, + BASE64_OUTPUT_DECODE_ROWS= 3, + /* insert new output modes here */ + BASE64_OUTPUT_MODE_COUNT +}; + +bool copy_event_cache_to_string_and_reinit(IO_CACHE *cache, LEX_STRING *to); + +/* + A structure for mysqlbinlog to know how to print events + + This structure is passed to the event's print() methods, + + There are two types of settings stored here: + 1. Last db, flags2, sql_mode etc comes from the last printed event. + They are stored so that only the necessary USE and SET commands + are printed. + 2. Other information on how to print the events, e.g. short_form, + hexdump_from. These are not dependent on the last event. +*/ +typedef struct st_print_event_info +{ + /* + Settings for database, sql_mode etc that comes from the last event + that was printed. We cache these so that we don't have to print + them if they are unchanged. + */ + char db[FN_REFLEN+1]; // TODO: make this a LEX_STRING when thd->db is + char charset[6]; // 3 variables, each of them storable in 2 bytes + char time_zone_str[MAX_TIME_ZONE_NAME_LENGTH]; + char delimiter[16]; + sql_mode_t sql_mode; /* must be same as THD.variables.sql_mode */ + my_thread_id thread_id; + ulonglong row_events; + ulong auto_increment_increment, auto_increment_offset; + uint lc_time_names_number; + uint charset_database_number; + uint verbose; + uint32 flags2; + uint32 server_id; + uint32 domain_id; + uint8 common_header_len; + enum_base64_output_mode base64_output_mode; + my_off_t hexdump_from; + + table_mapping m_table_map; + table_mapping m_table_map_ignored; + bool flags2_inited; + bool sql_mode_inited; + bool charset_inited; + bool thread_id_printed; + bool server_id_printed; + bool domain_id_printed; + bool allow_parallel; + bool allow_parallel_printed; + bool found_row_event; + bool print_row_count; + static const uint max_delimiter_size= 16; + /* Settings on how to print the events */ + bool short_form; + /* + This is set whenever a Format_description_event is printed. + Later, when an event is printed in base64, this flag is tested: if + no Format_description_event has been seen, it is unsafe to print + the base64 event, so an error message is generated. + */ + bool printed_fd_event; + /* + Track when @@skip_replication changes so we need to output a SET + statement for it. + */ + bool skip_replication; + bool print_table_metadata; + + /* + These two caches are used by the row-based replication events to + collect the header information and the main body of the events + making up a statement. + */ + IO_CACHE head_cache; + IO_CACHE body_cache; + IO_CACHE tail_cache; +#ifdef WHEN_FLASHBACK_REVIEW_READY + /* Storing the SQL for reviewing */ + IO_CACHE review_sql_cache; +#endif + FILE *file; + + + + /* + Used to include the events within a GTID start/stop boundary + */ + my_bool m_is_event_group_active; + + /* + Tracks whether or not output events must be explicitly activated in order + to be printed + */ + my_bool m_is_event_group_filtering_enabled; + + st_print_event_info(); + + ~st_print_event_info() { + close_cached_file(&head_cache); + close_cached_file(&body_cache); + close_cached_file(&tail_cache); +#ifdef WHEN_FLASHBACK_REVIEW_READY + close_cached_file(&review_sql_cache); +#endif + } + bool init_ok() /* tells if construction was successful */ + { return my_b_inited(&head_cache) && my_b_inited(&body_cache) +#ifdef WHEN_FLASHBACK_REVIEW_READY + && my_b_inited(&review_sql_cache) +#endif + ; } + void flush_for_error() + { + if (!copy_event_cache_to_file_and_reinit(&head_cache, file)) + copy_event_cache_to_file_and_reinit(&body_cache, file); + fflush(file); + } + + /* + Notify that all events part of the current group should be printed + */ + void activate_current_event_group() + { + m_is_event_group_active= TRUE; + } + void deactivate_current_event_group() + { + m_is_event_group_active= FALSE; + } + + /* + Used for displaying events part of an event group. + Returns TRUE when both event group filtering is enabled and the current + event group should be displayed, OR if event group filtering is + disabled. More specifically, if filtering is disabled, all events + should be shown. + Returns FALSE when event group filtering is enabled and the current event + group is filtered out. + */ + my_bool is_event_group_active() + { + return m_is_event_group_filtering_enabled ? m_is_event_group_active : TRUE; + } + + /* + Notify that events must be explicitly activated in order to be printed + */ + void enable_event_group_filtering() + { + m_is_event_group_filtering_enabled= TRUE; + } +} PRINT_EVENT_INFO; +#endif + +/** + This class encapsulates writing of Log_event objects to IO_CACHE. + Automatically calculates the checksum and encrypts the data, if necessary. +*/ + +class Log_event_writer +{ + /* Log_event_writer is updated when ctx is set */ + int (Log_event_writer::*encrypt_or_write)(const uchar *pos, size_t len); +public: + ulonglong bytes_written; + void *ctx; ///< Encryption context or 0 if no encryption is needed + uint checksum_len; + int write(Log_event *ev); + int write_header(uchar *pos, size_t len); + int write_data(const uchar *pos, size_t len); + int write_footer(); + my_off_t pos() { return my_b_safe_tell(file); } + void add_status(enum_logged_status status); + void set_incident(); + void set_encrypted_writer() + { encrypt_or_write= &Log_event_writer::encrypt_and_write; } + + Log_event_writer(IO_CACHE *file_arg, binlog_cache_data *cache_data_arg, + Binlog_crypt_data *cr= 0) + :encrypt_or_write(&Log_event_writer::write_internal), + bytes_written(0), ctx(0), + file(file_arg), cache_data(cache_data_arg), crypto(cr) { } + +private: + IO_CACHE *file; + binlog_cache_data *cache_data; + /** + Placeholder for event checksum while writing to binlog. + */ + ha_checksum crc; + /** + Encryption data (key, nonce). Only used if ctx != 0. + */ + Binlog_crypt_data *crypto; + /** + Event length to be written into the next encrypted block + */ + uint event_len; + int write_internal(const uchar *pos, size_t len); + int encrypt_and_write(const uchar *pos, size_t len); + int maybe_write_event_len(uchar *pos, size_t len); +}; + +/** + the struct aggregates two parameters that identify an event + uniquely in scope of communication of a particular master and slave couple. + I.e there can not be 2 events from the same staying connected master which + have the same coordinates. + @note + Such identifier is not yet unique generally as the event originating master + is resettable. Also the crashed master can be replaced with some other. +*/ +typedef struct event_coordinates +{ + char * file_name; // binlog file name (directories stripped) + my_off_t pos; // event's position in the binlog file +} LOG_POS_COORD; + +/** + @class Log_event + + This is the abstract base class for binary log events. + + @section Log_event_binary_format Binary Format + + Any @c Log_event saved on disk consists of the following three + components. + + - Common-Header + - Post-Header + - Body + + The Common-Header, documented in the table @ref Table_common_header + "below", always has the same form and length within one version of + MySQL. Each event type specifies a format and length of the + Post-Header. The length of the Common-Header is the same for all + events of the same type. The Body may be of different format and + length even for different events of the same type. The binary + formats of Post-Header and Body are documented separately in each + subclass. The binary format of Common-Header is as follows. + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Common-Header
NameFormatDescription
timestamp4 byte unsigned integerThe time when the query started, in seconds since 1970. +
type1 byte enumerationSee enum #Log_event_type.
server_id4 byte unsigned integerServer ID of the server that created the event.
total_size4 byte unsigned integerThe total size of this event, in bytes. In other words, this + is the sum of the sizes of Common-Header, Post-Header, and Body. +
master_position4 byte unsigned integerThe position of the next event in the master binary log, in + bytes from the beginning of the file. In a binlog that is not a + relay log, this is just the position of the next event, in bytes + from the beginning of the file. In a relay log, this is + the position of the next event in the master's binlog. +
flags2 byte bitfieldSee Log_event::flags.
+ + Summing up the numbers above, we see that the total size of the + common header is 19 bytes. + + @subsection Log_event_format_of_atomic_primitives Format of Atomic Primitives + + - All numbers, whether they are 16-, 24-, 32-, or 64-bit numbers, + are stored in little endian, i.e., the least significant byte first, + unless otherwise specified. + + @anchor packed_integer + - Some events use a special format for efficient representation of + unsigned integers, called Packed Integer. A Packed Integer has the + capacity of storing up to 8-byte integers, while small integers + still can use 1, 3, or 4 bytes. The value of the first byte + determines how to read the number, according to the following table: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Format of Packed Integer
First byteFormat
0-250The first byte is the number (in the range 0-250), and no more + bytes are used.
252Two more bytes are used. The number is in the range + 251-0xffff.
253Three more bytes are used. The number is in the range + 0xffff-0xffffff.
254Eight more bytes are used. The number is in the range + 0xffffff-0xffffffffffffffff.
+ + - Strings are stored in various formats. The format of each string + is documented separately. +*/ +class Log_event +{ +public: + /** + Enumeration of what kinds of skipping (and non-skipping) that can + occur when the slave executes an event. + + @see shall_skip + @see do_shall_skip + */ + enum enum_skip_reason { + /** + Don't skip event. + */ + EVENT_SKIP_NOT, + + /** + Skip event by ignoring it. + + This means that the slave skip counter will not be changed. + */ + EVENT_SKIP_IGNORE, + + /** + Skip event and decrease skip counter. + */ + EVENT_SKIP_COUNT + }; + + enum enum_event_cache_type + { + EVENT_INVALID_CACHE, + /* + If possible the event should use a non-transactional cache before + being flushed to the binary log. This means that it must be flushed + right after its correspondent statement is completed. + */ + EVENT_STMT_CACHE, + /* + The event should use a transactional cache before being flushed to + the binary log. This means that it must be flushed upon commit or + rollback. + */ + EVENT_TRANSACTIONAL_CACHE, + /* + The event must be written directly to the binary log without going + through a cache. + */ + EVENT_NO_CACHE, + /** + If there is a need for different types, introduce them before this. + */ + EVENT_CACHE_COUNT + }; + + /* + The following type definition is to be used whenever data is placed + and manipulated in a common buffer. Use this typedef for buffers + that contain data containing binary and character data. + */ + typedef unsigned char Byte; + + /* + The offset in the log where this event originally appeared (it is + preserved in relay logs, making SHOW SLAVE STATUS able to print + coordinates of the event in the master's binlog). Note: when a + transaction is written by the master to its binlog (wrapped in + BEGIN/COMMIT) the log_pos of all the queries it contains is the + one of the BEGIN (this way, when one does SHOW SLAVE STATUS it + sees the offset of the BEGIN, which is logical as rollback may + occur), except the COMMIT query which has its real offset. + */ + my_off_t log_pos; + /* + A temp buffer for read_log_event; it is later analysed according to the + event's type, and its content is distributed in the event-specific fields. + */ + uchar *temp_buf; + + /* + TRUE <=> this event 'owns' temp_buf and should call my_free() when done + with it + */ + bool event_owns_temp_buf; + + /* + Timestamp on the master(for debugging and replication of + NOW()/TIMESTAMP). It is important for queries and LOAD DATA + INFILE. This is set at the event's creation time, except for Query + and Load (et al.) events where this is set at the query's + execution time, which guarantees good replication (otherwise, we + could have a query and its event with different timestamps). + */ + my_time_t when; + ulong when_sec_part; + /* The number of seconds the query took to run on the master. */ + ulong exec_time; + /* Number of bytes written by write() function */ + size_t data_written; + + /* + The master's server id (is preserved in the relay log; used to + prevent from infinite loops in circular replication). + */ + uint32 server_id; + + /** + Some 16 flags. See the definitions above for LOG_EVENT_TIME_F, + LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F, + LOG_EVENT_SUPPRESS_USE_F, and LOG_EVENT_SKIP_REPLICATION_F for notes. + */ + uint16 flags; + + enum_event_cache_type cache_type; + + /** + A storage to cache the global system variable's value. + Handling of a separate event will be governed its member. + */ + ulong slave_exec_mode; + + Log_event_writer *writer; + +#ifdef MYSQL_SERVER + THD* thd; + + Log_event(); + Log_event(THD* thd_arg, uint16 flags_arg, bool is_transactional); + + /* + init_show_field_list() prepares the column names and types for the + output of SHOW BINLOG EVENTS; it is used only by SHOW BINLOG + EVENTS. + */ + static void init_show_field_list(THD *thd, List* field_list); +#ifdef HAVE_REPLICATION + int net_send(Protocol *protocol, const char* log_name, my_off_t pos); + + /* + pack_info() is used by SHOW BINLOG EVENTS; as print() it prepares and sends + a string to display to the user, so it resembles print(). + */ + + virtual void pack_info(Protocol *protocol); + +#endif /* HAVE_REPLICATION */ + virtual const char* get_db() + { + return thd ? thd->db.str : 0; + } +#else + Log_event() : temp_buf(0), when(0), flags(0) {} + ha_checksum crc; + /* print*() functions are used by mysqlbinlog */ + virtual bool print(FILE* file, PRINT_EVENT_INFO* print_event_info) = 0; + bool print_timestamp(IO_CACHE* file, time_t *ts = 0); + bool print_header(IO_CACHE* file, PRINT_EVENT_INFO* print_event_info, + bool is_more); + bool print_base64(IO_CACHE* file, PRINT_EVENT_INFO* print_event_info, + bool do_print_encoded); +#endif /* MYSQL_SERVER */ + + /* The following code used for Flashback */ +#ifdef MYSQL_CLIENT + my_bool is_flashback; + my_bool need_flashback_review; + String output_buf; // Storing the event output +#ifdef WHEN_FLASHBACK_REVIEW_READY + String m_review_dbname; + String m_review_tablename; + + void set_review_dbname(const char *name) + { + if (name) + { + m_review_dbname.free(); + m_review_dbname.append(name); + } + } + void set_review_tablename(const char *name) + { + if (name) + { + m_review_tablename.free(); + m_review_tablename.append(name); + } + } + const char *get_review_dbname() const { return m_review_dbname.ptr(); } + const char *get_review_tablename() const { return m_review_tablename.ptr(); } +#endif +#endif + + /* + read_log_event() functions read an event from a binlog or relay + log; used by SHOW BINLOG EVENTS, the binlog_dump thread on the + master (reads master's binlog), the slave IO thread (reads the + event sent by binlog_dump), the slave SQL thread (reads the event + from the relay log). If mutex is 0, the read will proceed without + mutex. We need the description_event to be able to parse the + event (to know the post-header's size); in fact in read_log_event + we detect the event's type, then call the specific event's + constructor and pass description_event as an argument. + */ + static Log_event* read_log_event(IO_CACHE* file, + const Format_description_log_event + *description_event, + my_bool crc_check); + + /** + Reads an event from a binlog or relay log. Used by the dump thread + this method reads the event into a raw buffer without parsing it. + + @Note If mutex is 0, the read will proceed without mutex. + + @Note If a log name is given than the method will check if the + given binlog is still active. + + @param[in] file log file to be read + @param[out] packet packet to hold the event + @param[in] checksum_alg_arg verify the event checksum using this + algorithm (or don't if it's + use BINLOG_CHECKSUM_ALG_OFF) + + @retval 0 success + @retval LOG_READ_EOF end of file, nothing was read + @retval LOG_READ_BOGUS malformed event + @retval LOG_READ_IO io error while reading + @retval LOG_READ_MEM packet memory allocation failed + @retval LOG_READ_TRUNC only a partial event could be read + @retval LOG_READ_TOO_LARGE event too large + */ + static int read_log_event(IO_CACHE* file, String* packet, + const Format_description_log_event *fdle, + enum enum_binlog_checksum_alg checksum_alg_arg); + /* + The value is set by caller of FD constructor and + Log_event::write_header() for the rest. + In the FD case it's propagated into the last byte + of post_header_len[] at FD::write(). + On the slave side the value is assigned from post_header_len[last] + of the last seen FD event. + */ + enum enum_binlog_checksum_alg checksum_alg; + + static void *operator new(size_t size) + { + extern PSI_memory_key key_memory_log_event; + return my_malloc(key_memory_log_event, size, MYF(MY_WME|MY_FAE)); + } + + static void operator delete(void *ptr, size_t) + { + my_free(ptr); + } + + /* Placement version of the above operators */ + static void *operator new(size_t, void* ptr) { return ptr; } + static void operator delete(void*, void*) { } + +#ifdef MYSQL_SERVER + bool write_header(size_t event_data_length); + bool write_data(const uchar *buf, size_t data_length) + { return writer->write_data(buf, data_length); } + bool write_data(const char *buf, size_t data_length) + { return write_data((uchar*)buf, data_length); } + bool write_footer() + { return writer->write_footer(); } + + my_bool need_checksum(); + + virtual bool write() + { + return write_header(get_data_size()) || write_data_header() || + write_data_body() || write_footer(); + } + virtual bool write_data_header() + { return 0; } + virtual bool write_data_body() + { return 0; } + + /* Return start of query time or current time */ + inline my_time_t get_time() + { + THD *tmp_thd; + if (when) + return when; + if (thd) + { + when= thd->start_time; + when_sec_part= thd->start_time_sec_part; + return when; + } + /* thd will only be 0 here at time of log creation */ + if ((tmp_thd= current_thd)) + { + when= tmp_thd->start_time; + when_sec_part= tmp_thd->start_time_sec_part; + return when; + } + my_hrtime_t hrtime= my_hrtime(); + when= hrtime_to_my_time(hrtime); + when_sec_part= hrtime_sec_part(hrtime); + return when; + } +#endif + virtual Log_event_type get_type_code() = 0; + virtual enum_logged_status logged_status() { return LOGGED_CRITICAL; } + virtual bool is_valid() const = 0; + virtual my_off_t get_header_len(my_off_t len) { return len; } + void set_artificial_event() { flags |= LOG_EVENT_ARTIFICIAL_F; } + void set_relay_log_event() { flags |= LOG_EVENT_RELAY_LOG_F; } + bool is_artificial_event() const { return flags & LOG_EVENT_ARTIFICIAL_F; } + bool is_relay_log_event() const { return flags & LOG_EVENT_RELAY_LOG_F; } + inline bool use_trans_cache() const + { + return (cache_type == Log_event::EVENT_TRANSACTIONAL_CACHE); + } + inline void set_direct_logging() + { + cache_type = Log_event::EVENT_NO_CACHE; + } + inline bool use_direct_logging() + { + return (cache_type == Log_event::EVENT_NO_CACHE); + } + Log_event(const uchar *buf, const Format_description_log_event + *description_event); + virtual ~Log_event() { free_temp_buf();} + void register_temp_buf(uchar* buf, bool must_free) + { + temp_buf= buf; + event_owns_temp_buf= must_free; + } + void free_temp_buf() + { + if (temp_buf) + { + if (event_owns_temp_buf) + my_free(temp_buf); + temp_buf = 0; + } + } + /* + Get event length for simple events. For complicated events the length + is calculated during write() + */ + virtual int get_data_size() { return 0;} + static Log_event* read_log_event(const uchar *buf, uint event_len, + const char **error, + const Format_description_log_event + *description_event, my_bool crc_check); + /** + Returns the human readable name of the given event type. + */ + static const char* get_type_str(Log_event_type type); + /** + Returns the human readable name of this event's type. + */ + const char* get_type_str(); + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + + /** + Apply the event to the database. + + This function represents the public interface for applying an + event. + + @see do_apply_event + */ + int apply_event(rpl_group_info *rgi); + + + /** + Update the relay log position. + + This function represents the public interface for "stepping over" + the event and will update the relay log information. + + @see do_update_pos + */ + int update_pos(rpl_group_info *rgi) + { + return do_update_pos(rgi); + } + + /** + Decide if the event shall be skipped, and the reason for skipping + it. + + @see do_shall_skip + */ + enum_skip_reason shall_skip(rpl_group_info *rgi) + { + return do_shall_skip(rgi); + } + + + /* + Check if an event is non-final part of a stand-alone event group, + such as Intvar_log_event (such events should be processed as part + of the following event group, not individually). + See also is_part_of_group() + */ + static bool is_part_of_group(enum Log_event_type ev_type) + { + switch (ev_type) + { + case GTID_EVENT: + case INTVAR_EVENT: + case RAND_EVENT: + case USER_VAR_EVENT: + case TABLE_MAP_EVENT: + case ANNOTATE_ROWS_EVENT: + return true; + case DELETE_ROWS_EVENT: + case UPDATE_ROWS_EVENT: + case WRITE_ROWS_EVENT: + /* + ToDo: also check for non-final Rows_log_event (though such events + are usually in a BEGIN-COMMIT group). + */ + default: + return false; + } + } + /* + Same as above, but works on the object. In addition this is true for all + rows event except the last one. + */ + virtual bool is_part_of_group() { return 0; } + + static bool is_group_event(enum Log_event_type ev_type) + { + switch (ev_type) + { + case START_EVENT_V3: + case STOP_EVENT: + case ROTATE_EVENT: + case SLAVE_EVENT: + case FORMAT_DESCRIPTION_EVENT: + case INCIDENT_EVENT: + case HEARTBEAT_LOG_EVENT: + case BINLOG_CHECKPOINT_EVENT: + case GTID_LIST_EVENT: + case START_ENCRYPTION_EVENT: + return false; + + default: + return true; + } + } + +protected: + + /** + Helper function to ignore an event w.r.t. the slave skip counter. + + This function can be used inside do_shall_skip() for functions + that cannot end a group. If the slave skip counter is 1 when + seeing such an event, the event shall be ignored, the counter + left intact, and processing continue with the next event. + + A typical usage is: + @code + enum_skip_reason do_shall_skip(rpl_group_info *rgi) { + return continue_group(rgi); + } + @endcode + + @return Skip reason + */ + enum_skip_reason continue_group(rpl_group_info *rgi); + + /** + Primitive to apply an event to the database. + + This is where the change to the database is made. + + @note The primitive is protected instead of private, since there + is a hierarchy of actions to be performed in some cases. + + @see Format_description_log_event::do_apply_event() + + @param rli Pointer to relay log info structure + + @retval 0 Event applied successfully + @retval errno Error code if event application failed + */ + virtual int do_apply_event(rpl_group_info *rgi) + { + return 0; /* Default implementation does nothing */ + } + + + /** + Advance relay log coordinates. + + This function is called to advance the relay log coordinates to + just after the event. It is essential that both the relay log + coordinate and the group log position is updated correctly, since + this function is used also for skipping events. + + Normally, each implementation of do_update_pos() shall: + + - Update the event position to refer to the position just after + the event. + + - Update the group log position to refer to the position just + after the event if the event is last in a group + + @param rli Pointer to relay log info structure + + @retval 0 Coordinates changed successfully + @retval errno Error code if advancing failed (usually just + 1). Observe that handler errors are returned by the + do_apply_event() function, and not by this one. + */ + virtual int do_update_pos(rpl_group_info *rgi); + + + /** + Decide if this event shall be skipped or not and the reason for + skipping it. + + The default implementation decide that the event shall be skipped + if either: + + - the server id of the event is the same as the server id of the + server and rli->replicate_same_server_id is true, + or + + - if rli->slave_skip_counter is greater than zero. + + @see do_apply_event + @see do_update_pos + + @retval Log_event::EVENT_SKIP_NOT + The event shall not be skipped and should be applied. + + @retval Log_event::EVENT_SKIP_IGNORE + The event shall be skipped by just ignoring it, i.e., the slave + skip counter shall not be changed. This happends if, for example, + the originating server id of the event is the same as the server + id of the slave. + + @retval Log_event::EVENT_SKIP_COUNT + The event shall be skipped because the slave skip counter was + non-zero. The caller shall decrease the counter by one. + */ + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +/* + One class for each type of event. + Two constructors for each class: + - one to create the event for logging (when the server acts as a master), + called after an update to the database is done, + which accepts parameters like the query, the database, the options for LOAD + DATA INFILE... + - one to create the event from a packet (when the server acts as a slave), + called before reproducing the update, which accepts parameters (like a + buffer). Used to read from the master, from the relay log, and in + mysqlbinlog. This constructor must be format-tolerant. +*/ + +/** + @class Query_log_event + + A @c Query_log_event is created for each query that modifies the + database, unless the query is logged row-based. + + @section Query_log_event_binary_format Binary format + + See @ref Log_event_binary_format "Binary format for log events" for + a general discussion and introduction to the binary format of binlog + events. + + The Post-Header has five components: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Post-Header for Query_log_event
NameFormatDescription
slave_proxy_id4 byte unsigned integerAn integer identifying the client thread that issued the + query. The id is unique per server. (Note, however, that two + threads on different servers may have the same slave_proxy_id.) + This is used when a client thread creates a temporary table local + to the client. The slave_proxy_id is used to distinguish + temporary tables that belong to different clients. +
exec_time4 byte unsigned integerThe time from when the query started to when it was logged in + the binlog, in seconds.
db_len1 byte integerThe length of the name of the currently selected database.
error_code2 byte unsigned integerError code generated by the master. If the master fails, the + slave will fail with the same error code, except for the error + codes ER_DB_CREATE_EXISTS == 1007 and ER_DB_DROP_EXISTS == 1008. +
status_vars_len2 byte unsigned integerThe length of the status_vars block of the Body, in bytes. See + @ref query_log_event_status_vars "below". +
+ + The Body has the following components: + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Body for Query_log_event
NameFormatDescription
@anchor query_log_event_status_vars status_varsstatus_vars_len bytesZero or more status variables. Each status variable consists + of one byte identifying the variable stored, followed by the value + of the variable. The possible variables are listed separately in + the table @ref Table_query_log_event_status_vars "below". MySQL + always writes events in the order defined below; however, it is + capable of reading them in any order.
dbdb_len+1The currently selected database, as a null-terminated string. + + (The trailing zero is redundant since the length is already known; + it is db_len from Post-Header.) +
queryvariable length string without trailing zero, extending to the + end of the event (determined by the length field of the + Common-Header) + The SQL query.
+ + The following table lists the status variables that may appear in + the status_vars field. + + @anchor Table_query_log_event_status_vars + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Status variables for Query_log_event
Status variable1 byte identifierFormatDescription
flags2Q_FLAGS2_CODE == 04 byte bitfieldThe flags in @c thd->options, binary AND-ed with @c + OPTIONS_WRITTEN_TO_BIN_LOG. The @c thd->options bitfield contains + options for "SELECT". @c OPTIONS_WRITTEN identifies those options + that need to be written to the binlog (not all do). + + These flags correspond to the SQL variables SQL_AUTO_IS_NULL, + FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, and AUTOCOMMIT, documented in + the "SET Syntax" section of the MySQL Manual. + + This field is always written to the binlog in version >= 5.0, and + never written in version < 5.0. +
sql_modeQ_SQL_MODE_CODE == 18 byte bitfieldThe @c sql_mode variable. See the section "SQL Modes" in the + MySQL manual, and see sql_priv.h for a list of the possible + flags. Currently (2007-10-04), the following flags are available: +
+    MODE_REAL_AS_FLOAT==0x1
+    MODE_PIPES_AS_CONCAT==0x2
+    MODE_ANSI_QUOTES==0x4
+    MODE_IGNORE_SPACE==0x8
+    MODE_IGNORE_BAD_TABLE_OPTIONS==0x10
+    MODE_ONLY_FULL_GROUP_BY==0x20
+    MODE_NO_UNSIGNED_SUBTRACTION==0x40
+    MODE_NO_DIR_IN_CREATE==0x80
+    MODE_POSTGRESQL==0x100
+    MODE_ORACLE==0x200
+    MODE_MSSQL==0x400
+    MODE_DB2==0x800
+    MODE_MAXDB==0x1000
+    MODE_NO_KEY_OPTIONS==0x2000
+    MODE_NO_TABLE_OPTIONS==0x4000
+    MODE_NO_FIELD_OPTIONS==0x8000
+    MODE_MYSQL323==0x10000
+    MODE_MYSQL323==0x20000
+    MODE_MYSQL40==0x40000
+    MODE_ANSI==0x80000
+    MODE_NO_AUTO_VALUE_ON_ZERO==0x100000
+    MODE_NO_BACKSLASH_ESCAPES==0x200000
+    MODE_STRICT_TRANS_TABLES==0x400000
+    MODE_STRICT_ALL_TABLES==0x800000
+    MODE_NO_ZERO_IN_DATE==0x1000000
+    MODE_NO_ZERO_DATE==0x2000000
+    MODE_INVALID_DATES==0x4000000
+    MODE_ERROR_FOR_DIVISION_BY_ZERO==0x8000000
+    MODE_TRADITIONAL==0x10000000
+    MODE_NO_AUTO_CREATE_USER==0x20000000
+    MODE_HIGH_NOT_PRECEDENCE==0x40000000
+    MODE_PAD_CHAR_TO_FULL_LENGTH==0x80000000
+    
+ All these flags are replicated from the server. However, all + flags except @c MODE_NO_DIR_IN_CREATE are honored by the slave; + the slave always preserves its old value of @c + MODE_NO_DIR_IN_CREATE. For a rationale, see comment in + @c Query_log_event::do_apply_event in @c log_event.cc. + + This field is always written to the binlog. +
catalogQ_CATALOG_NZ_CODE == 6Variable-length string: the length in bytes (1 byte) followed + by the characters (at most 255 bytes) + Stores the client's current catalog. Every database belongs + to a catalog, the same way that every table belongs to a + database. Currently, there is only one catalog, "std". + + This field is written if the length of the catalog is > 0; + otherwise it is not written. +
auto_incrementQ_AUTO_INCREMENT == 3two 2 byte unsigned integers, totally 2+2=4 bytesThe two variables auto_increment_increment and + auto_increment_offset, in that order. For more information, see + "System variables" in the MySQL manual. + + This field is written if auto_increment > 1. Otherwise, it is not + written. +
charsetQ_CHARSET_CODE == 4three 2 byte unsigned integers, totally 2+2+2=6 bytesThe three variables character_set_client, + collation_connection, and collation_server, in that order. + character_set_client is a code identifying the character set and + collation used by the client to encode the query. + collation_connection identifies the character set and collation + that the master converts the query to when it receives it; this is + useful when comparing literal strings. collation_server is the + default character set and collation used when a new database is + created. + + See also "Connection Character Sets and Collations" in the MySQL + 5.1 manual. + + All three variables are codes identifying a (character set, + collation) pair. To see which codes map to which pairs, run the + query "SELECT id, character_set_name, collation_name FROM + COLLATIONS". + + Cf. Q_CHARSET_DATABASE_CODE below. + + This field is always written. +
time_zoneQ_TIME_ZONE_CODE == 5Variable-length string: the length in bytes (1 byte) followed + by the characters (at most 255 bytes). + The time_zone of the master. + + See also "System Variables" and "MySQL Server Time Zone Support" + in the MySQL manual. + + This field is written if the length of the time zone string is > + 0; otherwise, it is not written. +
lc_time_names_numberQ_LC_TIME_NAMES_CODE == 72 byte integerA code identifying a table of month and day names. The + mapping from codes to languages is defined in @c sql_locale.cc. + + This field is written if it is not 0, i.e., if the locale is not + en_US. +
charset_database_numberQ_CHARSET_DATABASE_CODE == 82 byte integerThe value of the collation_database system variable (in the + source code stored in @c thd->variables.collation_database), which + holds the code for a (character set, collation) pair as described + above (see Q_CHARSET_CODE). + + collation_database was used in old versions (???WHEN). Its value + was loaded when issuing a "use db" query and could be changed by + issuing a "SET collation_database=xxx" query. It used to affect + the "LOAD DATA INFILE" and "CREATE TABLE" commands. + + In newer versions, "CREATE TABLE" has been changed to take the + character set from the database of the created table, rather than + the character set of the current database. This makes a + difference when creating a table in another database than the + current one. "LOAD DATA INFILE" has not yet changed to do this, + but there are plans to eventually do it, and to make + collation_database read-only. + + This field is written if it is not 0. +
table_map_for_updateQ_TABLE_MAP_FOR_UPDATE_CODE == 98 byte integerThe value of the table map that is to be updated by the + multi-table update query statement. Every bit of this variable + represents a table, and is set to 1 if the corresponding table is + to be updated by this statement. + + The value of this variable is set when executing a multi-table update + statement and used by slave to apply filter rules without opening + all the tables on slave. This is required because some tables may + not exist on slave because of the filter rules. +
+ + @subsection Query_log_event_notes_on_previous_versions Notes on Previous Versions + + * Status vars were introduced in version 5.0. To read earlier + versions correctly, check the length of the Post-Header. + + * The status variable Q_CATALOG_CODE == 2 existed in MySQL 5.0.x, + where 0<=x<=3. It was identical to Q_CATALOG_CODE, except that the + string had a trailing '\0'. The '\0' was removed in 5.0.4 since it + was redundant (the string length is stored before the string). The + Q_CATALOG_CODE will never be written by a new master, but can still + be understood by a new slave. + + * See Q_CHARSET_DATABASE_CODE in the table above. + + * When adding new status vars, please don't forget to update the + MAX_SIZE_LOG_EVENT_STATUS, and update function code_name + +*/ +class Query_log_event: public Log_event +{ + LEX_CSTRING user; + LEX_CSTRING host; +protected: + Log_event::Byte* data_buf; +public: + const char* query; + const char* catalog; + const char* db; + /* + If we already know the length of the query string + we pass it with q_len, so we would not have to call strlen() + otherwise, set it to 0, in which case, we compute it with strlen() + */ + uint32 q_len; + uint32 db_len; + uint16 error_code; + my_thread_id thread_id; + /* + For events created by Query_log_event::do_apply_event (and + Load_log_event::do_apply_event()) we need the *original* thread + id, to be able to log the event with the original (=master's) + thread id (fix for BUG#1686). + */ + ulong slave_proxy_id; + + /* + Binlog format 3 and 4 start to differ (as far as class members are + concerned) from here. + */ + + uint catalog_len; // <= 255 char; 0 means uninited + + /* + We want to be able to store a variable number of N-bit status vars: + (generally N=32; but N=64 for SQL_MODE) a user may want to log the number + of affected rows (for debugging) while another does not want to lose 4 + bytes in this. + The storage on disk is the following: + status_vars_len is part of the post-header, + status_vars are in the variable-length part, after the post-header, before + the db & query. + status_vars on disk is a sequence of pairs (code, value) where 'code' means + 'sql_mode', 'affected' etc. Sometimes 'value' must be a short string, so + its first byte is its length. For now the order of status vars is: + flags2 - sql_mode - catalog - autoinc - charset + We should add the same thing to Load_log_event, but in fact + LOAD DATA INFILE is going to be logged with a new type of event (logging of + the plain text query), so Load_log_event would be frozen, so no need. The + new way of logging LOAD DATA INFILE would use a derived class of + Query_log_event, so automatically benefit from the work already done for + status variables in Query_log_event. + */ + uint16 status_vars_len; + + /* + 'flags2' is a second set of flags (on top of those in Log_event), for + session variables. These are thd->options which is & against a mask + (OPTIONS_WRITTEN_TO_BIN_LOG). + flags2_inited helps make a difference between flags2==0 (3.23 or 4.x + master, we don't know flags2, so use the slave server's global options) and + flags2==0 (5.0 master, we know this has a meaning of flags all down which + must influence the query). + */ + uint32 flags2_inited; + bool sql_mode_inited; + bool charset_inited; + + uint32 flags2; + sql_mode_t sql_mode; + ulong auto_increment_increment, auto_increment_offset; + char charset[6]; + uint time_zone_len; /* 0 means uninited */ + const char *time_zone_str; + uint lc_time_names_number; /* 0 means en_US */ + uint charset_database_number; + /* + map for tables that will be updated for a multi-table update query + statement, for other query statements, this will be zero. + */ + ulonglong table_map_for_update; + /* Xid for the event, if such exists */ + ulonglong xid; + /* + Holds the original length of a Query_log_event that comes from a + master of version < 5.0 (i.e., binlog_version < 4). When the IO + thread writes the relay log, it augments the Query_log_event with a + Q_MASTER_DATA_WRITTEN_CODE status_var that holds the original event + length. This field is initialized to non-zero in the SQL thread when + it reads this augmented event. SQL thread does not write + Q_MASTER_DATA_WRITTEN_CODE to the slave's server binlog. + */ + uint32 master_data_written; + /* + A copy of Gtid event's extra flags that is relevant for two-phase + logged ALTER. + */ + uchar gtid_flags_extra; + decltype(rpl_gtid::seq_no) sa_seq_no; /* data part for CA/RA flags */ + +#ifdef MYSQL_SERVER + + Query_log_event(THD* thd_arg, const char* query_arg, size_t query_length, + bool using_trans, bool direct, bool suppress_use, int error); + const char* get_db() { return db; } +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print_query_header(IO_CACHE* file, PRINT_EVENT_INFO* print_event_info); + bool print_verbose(IO_CACHE* cache, PRINT_EVENT_INFO* print_event_info); + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Query_log_event(); + Query_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event, + Log_event_type event_type); + ~Query_log_event() + { + if (data_buf) + my_free(data_buf); + } + Log_event_type get_type_code() { return QUERY_EVENT; } + static int dummy_event(String *packet, ulong ev_offset, + enum enum_binlog_checksum_alg checksum_alg); + static int begin_event(String *packet, ulong ev_offset, + enum enum_binlog_checksum_alg checksum_alg); +#ifdef MYSQL_SERVER + bool write(); + virtual bool write_post_header_for_derived() { return FALSE; } +#endif + bool is_valid() const { return query != 0; } + + /* + Returns number of bytes additionally written to post header by derived + events (so far it is only Execute_load_query event). + */ + virtual ulong get_post_header_size_for_derived() { return 0; } + /* Writes derived event-specific part of post header. */ + +public: /* !!! Public in this patch to allow old usage */ +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + + int do_apply_event(rpl_group_info *rgi, + const char *query_arg, + uint32 q_len_arg); + static bool peek_is_commit_rollback(const uchar *event_start, + size_t event_len, + enum enum_binlog_checksum_alg + checksum_alg); + int handle_split_alter_query_log_event(rpl_group_info *rgi, + bool &skip_error_check); + +#endif /* HAVE_REPLICATION */ + /* + If true, the event always be applied by slave SQL thread or be printed by + mysqlbinlog + */ + bool is_trans_keyword() + { + /* + Before the patch for bug#50407, The 'SAVEPOINT and ROLLBACK TO' + queries input by user was written into log events directly. + So the keywords can be written in both upper case and lower case + together, strncasecmp is used to check both cases. they also could be + binlogged with comments in the front of these keywords. for examples: + / * bla bla * / SAVEPOINT a; + / * bla bla * / ROLLBACK TO a; + but we don't handle these cases and after the patch, both quiries are + binlogged in upper case with no comments. + */ + return !strncmp(query, "BEGIN", q_len) || + !strncmp(query, "COMMIT", q_len) || + !strncasecmp(query, "SAVEPOINT", 9) || + !strncasecmp(query, "ROLLBACK", 8); + } + virtual bool is_begin() { return !strcmp(query, "BEGIN"); } + virtual bool is_commit() { return !strcmp(query, "COMMIT"); } + virtual bool is_rollback() { return !strcmp(query, "ROLLBACK"); } +}; + +class Query_compressed_log_event:public Query_log_event{ +protected: + Log_event::Byte* query_buf; // point to the uncompressed query +public: + Query_compressed_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event, + Log_event_type event_type); + ~Query_compressed_log_event() + { + if (query_buf) + my_free(query_buf); + } + Log_event_type get_type_code() { return QUERY_COMPRESSED_EVENT; } + + /* + the min length of log_bin_compress_min_len is 10, + means that Begin/Commit/Rollback would never be compressed! + */ + virtual bool is_begin() { return false; } + virtual bool is_commit() { return false; } + virtual bool is_rollback() { return false; } +#ifdef MYSQL_SERVER + Query_compressed_log_event(THD* thd_arg, const char* query_arg, + ulong query_length, + bool using_trans, bool direct, bool suppress_use, + int error); + virtual bool write(); +#endif +}; + + +/***************************************************************************** + sql_ex_info struct + ****************************************************************************/ +struct sql_ex_info +{ + const char* field_term; + const char* enclosed; + const char* line_term; + const char* line_start; + const char* escaped; + int cached_new_format= -1; + uint8 field_term_len= 0, enclosed_len= 0, line_term_len= 0, + line_start_len= 0, escaped_len= 0; + char opt_flags; + char empty_flags= 0; + + // store in new format even if old is possible + void force_new_format() { cached_new_format = 1;} + int data_size() + { + return (new_format() ? + field_term_len + enclosed_len + line_term_len + + line_start_len + escaped_len + 6 : 7); + } + bool write_data(Log_event_writer *writer); + const uchar *init(const uchar *buf, const uchar* buf_end, bool use_new_format); + bool new_format() + { + return ((cached_new_format != -1) ? cached_new_format : + (cached_new_format=(field_term_len > 1 || + enclosed_len > 1 || + line_term_len > 1 || line_start_len > 1 || + escaped_len > 1))); + } +}; + +/** + @class Load_log_event + + This log event corresponds to a "LOAD DATA INFILE" SQL query on the + following form: + + @verbatim + (1) USE db; + (2) LOAD DATA [CONCURRENT] [LOCAL] INFILE 'file_name' + (3) [REPLACE | IGNORE] + (4) INTO TABLE 'table_name' + (5) [FIELDS + (6) [TERMINATED BY 'field_term'] + (7) [[OPTIONALLY] ENCLOSED BY 'enclosed'] + (8) [ESCAPED BY 'escaped'] + (9) ] + (10) [LINES + (11) [TERMINATED BY 'line_term'] + (12) [LINES STARTING BY 'line_start'] + (13) ] + (14) [IGNORE skip_lines LINES] + (15) (field_1, field_2, ..., field_n)@endverbatim + + @section Load_log_event_binary_format Binary Format + + The Post-Header consists of the following six components. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Post-Header for Load_log_event
NameFormatDescription
slave_proxy_id4 byte unsigned integerAn integer identifying the client thread that issued the + query. The id is unique per server. (Note, however, that two + threads on different servers may have the same slave_proxy_id.) + This is used when a client thread creates a temporary table local + to the client. The slave_proxy_id is used to distinguish + temporary tables that belong to different clients. +
exec_time4 byte unsigned integerThe time from when the query started to when it was logged in + the binlog, in seconds.
skip_lines4 byte unsigned integerThe number on line (14) above, if present, or 0 if line (14) + is left out. +
table_name_len1 byte unsigned integerThe length of 'table_name' on line (4) above.
db_len1 byte unsigned integerThe length of 'db' on line (1) above.
num_fields4 byte unsigned integerThe number n of fields on line (15) above.
+ + The Body contains the following components. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Body of Load_log_event
NameFormatDescription
sql_exvariable lengthDescribes the part of the query on lines (3) and + (5)–(13) above. More precisely, it stores the five strings + (on lines) field_term (6), enclosed (7), escaped (8), line_term + (11), and line_start (12); as well as a bitfield indicating the + presence of the keywords REPLACE (3), IGNORE (3), and OPTIONALLY + (7). + + The data is stored in one of two formats, called "old" and "new". + The type field of Common-Header determines which of these two + formats is used: type LOAD_EVENT means that the old format is + used, and type NEW_LOAD_EVENT means that the new format is used. + When MySQL writes a Load_log_event, it uses the new format if at + least one of the five strings is two or more bytes long. + Otherwise (i.e., if all strings are 0 or 1 bytes long), the old + format is used. + + The new and old format differ in the way the five strings are + stored. + +
    +
  • In the new format, the strings are stored in the order + field_term, enclosed, escaped, line_term, line_start. Each string + consists of a length (1 byte), followed by a sequence of + characters (0-255 bytes). Finally, a boolean combination of the + following flags is stored in 1 byte: REPLACE_FLAG==0x4, + IGNORE_FLAG==0x8, and OPT_ENCLOSED_FLAG==0x2. If a flag is set, + it indicates the presence of the corresponding keyword in the SQL + query. + +
  • In the old format, we know that each string has length 0 or + 1. Therefore, only the first byte of each string is stored. The + order of the strings is the same as in the new format. These five + bytes are followed by the same 1 byte bitfield as in the new + format. Finally, a 1 byte bitfield called empty_flags is stored. + The low 5 bits of empty_flags indicate which of the five strings + have length 0. For each of the following flags that is set, the + corresponding string has length 0; for the flags that are not set, + the string has length 1: FIELD_TERM_EMPTY==0x1, + ENCLOSED_EMPTY==0x2, LINE_TERM_EMPTY==0x4, LINE_START_EMPTY==0x8, + ESCAPED_EMPTY==0x10. +
+ + Thus, the size of the new format is 6 bytes + the sum of the sizes + of the five strings. The size of the old format is always 7 + bytes. +
field_lensnum_fields 1 byte unsigned integersAn array of num_fields integers representing the length of + each field in the query. (num_fields is from the Post-Header). +
fieldsnum_fields null-terminated stringsAn array of num_fields null-terminated strings, each + representing a field in the query. (The trailing zero is + redundant, since the length are stored in the num_fields array.) + The total length of all strings equals to the sum of all + field_lens, plus num_fields bytes for all the trailing zeros. +
table_namenull-terminated string of length table_len+1 bytesThe 'table_name' from the query, as a null-terminated string. + (The trailing zero is actually redundant since the table_len is + known from Post-Header.) +
dbnull-terminated string of length db_len+1 bytesThe 'db' from the query, as a null-terminated string. + (The trailing zero is actually redundant since the db_len is known + from Post-Header.) +
file_namevariable length string without trailing zero, extending to the + end of the event (determined by the length field of the + Common-Header) + The 'file_name' from the query. +
+ + @subsection Load_log_event_notes_on_previous_versions Notes on Previous Versions + + This event type is understood by current versions, but only + generated by MySQL 3.23 and earlier. +*/ +class Load_log_event: public Log_event +{ +private: +protected: + int copy_log_event(const uchar *buf, ulong event_len, + int body_offset, + const Format_description_log_event* description_event); + +public: + bool print_query(THD *thd, bool need_db, const char *cs, String *buf, + my_off_t *fn_start, my_off_t *fn_end, + const char *qualify_db); + my_thread_id thread_id; + ulong slave_proxy_id; + uint32 table_name_len; + /* + No need to have a catalog, as these events can only come from 4.x. + TODO: this may become false if Dmitri pushes his new LOAD DATA INFILE in + 5.0 only (not in 4.x). + */ + uint32 db_len; + uint32 fname_len; + uint32 num_fields; + const char* fields; + const uchar* field_lens; + uint32 field_block_len; + + const char* table_name; + const char* db; + const char* fname; + uint32 skip_lines; + sql_ex_info sql_ex; + bool local_fname; + /** + Indicates that this event corresponds to LOAD DATA CONCURRENT, + + @note Since Load_log_event event coming from the binary log + lacks information whether LOAD DATA on master was concurrent + or not, this flag is only set to TRUE for an auxiliary + Load_log_event object which is used in mysql_load() to + re-construct LOAD DATA statement from function parameters, + for logging. + */ + bool is_concurrent; + + /* fname doesn't point to memory inside Log_event::temp_buf */ + void set_fname_outside_temp_buf(const char *afname, size_t alen) + { + fname= afname; + fname_len= (uint)alen; + local_fname= TRUE; + } + /* fname doesn't point to memory inside Log_event::temp_buf */ + int check_fname_outside_temp_buf() + { + return local_fname; + } + +#ifdef MYSQL_SERVER + String field_lens_buf; + String fields_buf; + + Load_log_event(THD* thd, const sql_exchange* ex, const char* db_arg, + const char* table_name_arg, + List& fields_arg, + bool is_concurrent_arg, + enum enum_duplicates handle_dup, bool ignore, + bool using_trans); + void set_fields(const char* db, List &fields_arg, + Name_resolution_context *context); + const char* get_db() { return db; } +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool commented); +#endif + + /* + Note that for all the events related to LOAD DATA (Load_log_event, + Create_file/Append/Exec/Delete, we pass description_event; however as + logging of LOAD DATA is going to be changed in 4.1 or 5.0, this is only used + for the common_header_len (post_header_len will not be changed). + */ + Load_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event); + ~Load_log_event() = default; + Log_event_type get_type_code() + { + return sql_ex.new_format() ? NEW_LOAD_EVENT: LOAD_EVENT; + } +#ifdef MYSQL_SERVER + bool write_data_header(); + bool write_data_body(); +#endif + bool is_valid() const { return table_name != 0; } + int get_data_size() + { + return (table_name_len + db_len + 2 + fname_len + + LOAD_HEADER_LEN + + sql_ex.data_size() + field_block_len + num_fields); + } + +public: /* !!! Public in this patch to allow old usage */ +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi) + { + return do_apply_event(thd->slave_net,rgi,0); + } + + int do_apply_event(NET *net, rpl_group_info *rgi, + bool use_rli_only_for_errors); +#endif +}; + +/** + @class Start_log_event_v3 + + Start_log_event_v3 is the Start_log_event of binlog format 3 (MySQL 3.23 and + 4.x). + + Format_description_log_event derives from Start_log_event_v3; it is + the Start_log_event of binlog format 4 (MySQL 5.0), that is, the + event that describes the other events' Common-Header/Post-Header + lengths. This event is sent by MySQL 5.0 whenever it starts sending + a new binlog if the requested position is >4 (otherwise if ==4 the + event will be sent naturally). + + @section Start_log_event_v3_binary_format Binary Format +*/ +class Start_log_event_v3: public Log_event +{ +public: + /* + If this event is at the start of the first binary log since server + startup 'created' should be the timestamp when the event (and the + binary log) was created. In the other case (i.e. this event is at + the start of a binary log created by FLUSH LOGS or automatic + rotation), 'created' should be 0. This "trick" is used by MySQL + >=4.0.14 slaves to know whether they must drop stale temporary + tables and whether they should abort unfinished transaction. + + Note that when 'created'!=0, it is always equal to the event's + timestamp; indeed Start_log_event is written only in log.cc where + the first constructor below is called, in which 'created' is set + to 'when'. So in fact 'created' is a useless variable. When it is + 0 we can read the actual value from timestamp ('when') and when it + is non-zero we can read the same value from timestamp + ('when'). Conclusion: + - we use timestamp to print when the binlog was created. + - we use 'created' only to know if this is a first binlog or not. + In 3.23.57 we did not pay attention to this identity, so mysqlbinlog in + 3.23.57 does not print 'created the_date' if created was zero. This is now + fixed. + */ + time_t created; + uint16 binlog_version; + char server_version[ST_SERVER_VER_LEN]; + /* + We set this to 1 if we don't want to have the created time in the log, + which is the case when we rollover to a new log. + */ + bool dont_set_created; + +#ifdef MYSQL_SERVER + Start_log_event_v3(); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + Start_log_event_v3() = default; + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Start_log_event_v3(const uchar *buf, uint event_len, + const Format_description_log_event* description_event); + ~Start_log_event_v3() = default; + Log_event_type get_type_code() { return START_EVENT_V3;} + my_off_t get_header_len(my_off_t l __attribute__((unused))) + { return LOG_EVENT_MINIMAL_HEADER_LEN; } +#ifdef MYSQL_SERVER + bool write(); +#endif + bool is_valid() const { return server_version[0] != 0; } + int get_data_size() + { + return START_V3_HEADER_LEN; //no variable-sized part + } + +protected: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info*) + { + /* + Events from ourself should be skipped, but they should not + decrease the slave skip counter. + */ + if (this->server_id == global_system_variables.server_id) + return Log_event::EVENT_SKIP_IGNORE; + else + return Log_event::EVENT_SKIP_NOT; + } +#endif +}; + +/** + @class Start_encryption_log_event + + Start_encryption_log_event marks the beginning of encrypted data (all events + after this event are encrypted). + + It contains the cryptographic scheme used for the encryption as well as any + data required to decrypt (except the actual key). + + For binlog cryptoscheme 1: key version, and nonce for iv generation. +*/ +class Start_encryption_log_event : public Log_event +{ +public: +#ifdef MYSQL_SERVER + Start_encryption_log_event(uint crypto_scheme_arg, uint key_version_arg, + const uchar* nonce_arg) + : crypto_scheme(crypto_scheme_arg), key_version(key_version_arg) + { + cache_type = EVENT_NO_CACHE; + DBUG_ASSERT(crypto_scheme == 1); + memcpy(nonce, nonce_arg, BINLOG_NONCE_LENGTH); + } + + bool write_data_body() + { + uchar scheme_buf= crypto_scheme; + uchar key_version_buf[BINLOG_KEY_VERSION_LENGTH]; + int4store(key_version_buf, key_version); + return write_data(&scheme_buf, sizeof(scheme_buf)) || + write_data(key_version_buf, sizeof(key_version_buf)) || + write_data(nonce, BINLOG_NONCE_LENGTH); + } +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Start_encryption_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + + bool is_valid() const { return crypto_scheme == 1; } + + Log_event_type get_type_code() { return START_ENCRYPTION_EVENT; } + + int get_data_size() + { + return BINLOG_CRYPTO_SCHEME_LENGTH + BINLOG_KEY_VERSION_LENGTH + + BINLOG_NONCE_LENGTH; + } + + uint crypto_scheme; + uint key_version; + uchar nonce[BINLOG_NONCE_LENGTH]; + +protected: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info* rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info* rgi) + { + return Log_event::EVENT_SKIP_NOT; + } +#endif + +}; + + +class Version +{ +protected: + uchar m_ver[3]; + int cmp(const Version &other) const + { + return memcmp(m_ver, other.m_ver, 3); + } +public: + Version() + { + m_ver[0]= m_ver[1]= m_ver[2]= '\0'; + } + Version(uchar v0, uchar v1, uchar v2) + { + m_ver[0]= v0; + m_ver[1]= v1; + m_ver[2]= v2; + } + Version(const char *version, const char **endptr); + const uchar& operator [] (size_t i) const + { + DBUG_ASSERT(i < 3); + return m_ver[i]; + } + bool operator<(const Version &other) const { return cmp(other) < 0; } + bool operator>(const Version &other) const { return cmp(other) > 0; } + bool operator<=(const Version &other) const { return cmp(other) <= 0; } + bool operator>=(const Version &other) const { return cmp(other) >= 0; } +}; + + +/** + @class Format_description_log_event + + For binlog version 4. + This event is saved by threads which read it, as they need it for future + use (to decode the ordinary events). + + @section Format_description_log_event_binary_format Binary Format +*/ + +class Format_description_log_event: public Start_log_event_v3 +{ +public: + /* + The size of the fixed header which _all_ events have + (for binlogs written by this version, this is equal to + LOG_EVENT_HEADER_LEN), except FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT + (those have a header of size LOG_EVENT_MINIMAL_HEADER_LEN). + */ + uint8 common_header_len; + uint8 number_of_event_types; + /* + The list of post-headers' lengths followed + by the checksum alg description byte + */ + uint8 *post_header_len; + class master_version_split: public Version { + public: + enum {KIND_MYSQL, KIND_MARIADB}; + int kind; + master_version_split() :kind(KIND_MARIADB) { } + master_version_split(const char *version); + bool version_is_valid() const + { + /* It is invalid only when all version numbers are 0 */ + return !(m_ver[0] == 0 && m_ver[1] == 0 && m_ver[2] == 0); + } + }; + master_version_split server_version_split; + const uint8 *event_type_permutation; + uint32 options_written_to_bin_log; + + Format_description_log_event(uint8 binlog_ver, const char* server_ver=0); + Format_description_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + ~Format_description_log_event() + { + my_free(post_header_len); + } + Log_event_type get_type_code() { return FORMAT_DESCRIPTION_EVENT;} +#ifdef MYSQL_SERVER + bool write(); +#endif + bool header_is_valid() const + { + return ((common_header_len >= ((binlog_version==1) ? OLD_HEADER_LEN : + LOG_EVENT_MINIMAL_HEADER_LEN)) && + (post_header_len != NULL)); + } + + bool is_valid() const + { + return header_is_valid() && server_version_split.version_is_valid(); + } + + int get_data_size() + { + /* + The vector of post-header lengths is considered as part of the + post-header, because in a given version it never changes (contrary to the + query in a Query_log_event). + */ + return FORMAT_DESCRIPTION_HEADER_LEN; + } + + Binlog_crypt_data crypto_data; + bool start_decryption(Start_encryption_log_event* sele); + void copy_crypto_data(const Format_description_log_event* o) + { + crypto_data= o->crypto_data; + } + void reset_crypto() + { + crypto_data.scheme= 0; + } + + void calc_server_version_split(); + void deduct_options_written_to_bin_log(); + static bool is_version_before_checksum(const master_version_split *version_split); +protected: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +/** + @class Intvar_log_event + + An Intvar_log_event will be created just before a Query_log_event, + if the query uses one of the variables LAST_INSERT_ID or INSERT_ID. + Each Intvar_log_event holds the value of one of these variables. + + @section Intvar_log_event_binary_format Binary Format + + The Post-Header for this event type is empty. The Body has two + components: + + + + + + + + + + + + + + + + + + + + + + +
Body for Intvar_log_event
NameFormatDescription
type1 byte enumerationOne byte identifying the type of variable stored. Currently, + two identifiers are supported: LAST_INSERT_ID_EVENT==1 and + INSERT_ID_EVENT==2. +
value8 byte unsigned integerThe value of the variable.
+*/ +class Intvar_log_event: public Log_event +{ +public: + ulonglong val; + uchar type; + +#ifdef MYSQL_SERVER +Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg, + bool using_trans, bool direct) + :Log_event(thd_arg,0,using_trans),val(val_arg),type(type_arg) + { + if (direct) + cache_type= Log_event::EVENT_NO_CACHE; + } +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Intvar_log_event(const uchar *buf, + const Format_description_log_event *description_event); + ~Intvar_log_event() = default; + Log_event_type get_type_code() { return INTVAR_EVENT;} + const char* get_var_type_name(); + int get_data_size() { return 9; /* sizeof(type) + sizeof(val) */;} +#ifdef MYSQL_SERVER + bool write(); +#endif + bool is_valid() const { return 1; } + bool is_part_of_group() { return 1; } + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +/** + @class Rand_log_event + + Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.0. + 4.1.1 does not need it (it's repeatable again) so this event needn't be + written in 4.1.1 for PASSWORD() (but the fact that it is written is just a + waste, it does not cause bugs). + + The state of the random number generation consists of 128 bits, + which are stored internally as two 64-bit numbers. + + @section Rand_log_event_binary_format Binary Format + + The Post-Header for this event type is empty. The Body has two + components: + + + + + + + + + + + + + + + + + + + + + +
Body for Rand_log_event
NameFormatDescription
seed18 byte unsigned integer64 bit random seed1.
seed28 byte unsigned integer64 bit random seed2.
+*/ + +class Rand_log_event: public Log_event +{ + public: + ulonglong seed1; + ulonglong seed2; + +#ifdef MYSQL_SERVER + Rand_log_event(THD* thd_arg, ulonglong seed1_arg, ulonglong seed2_arg, + bool using_trans, bool direct) + :Log_event(thd_arg,0,using_trans),seed1(seed1_arg),seed2(seed2_arg) + { + if (direct) + cache_type= Log_event::EVENT_NO_CACHE; + } +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Rand_log_event(const uchar *buf, + const Format_description_log_event *description_event); + ~Rand_log_event() = default; + Log_event_type get_type_code() { return RAND_EVENT;} + int get_data_size() { return 16; /* sizeof(ulonglong) * 2*/ } +#ifdef MYSQL_SERVER + bool write(); +#endif + bool is_valid() const { return 1; } + bool is_part_of_group() { return 1; } + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +class Xid_apply_log_event: public Log_event +{ +public: +#ifdef MYSQL_SERVER + Xid_apply_log_event(THD* thd_arg): + Log_event(thd_arg, 0, TRUE) {} +#endif + Xid_apply_log_event(const uchar *buf, + const Format_description_log_event *description_event): + Log_event(buf, description_event) {} + + ~Xid_apply_log_event() {} + bool is_valid() const { return 1; } +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_commit()= 0; + virtual int do_apply_event(rpl_group_info *rgi); + int do_record_gtid(THD *thd, rpl_group_info *rgi, bool in_trans, + void **out_hton, bool force_err= false); + enum_skip_reason do_shall_skip(rpl_group_info *rgi); + virtual const char* get_query()= 0; +#endif +}; + + +/** + @class Xid_log_event + + Logs xid of the transaction-to-be-committed in the 2pc protocol. + Has no meaning in replication, slaves ignore it. + + @section Xid_log_event_binary_format Binary Format +*/ +#ifdef MYSQL_CLIENT +typedef ulonglong my_xid; // this line is the same as in handler.h +#endif + +class Xid_log_event: public Xid_apply_log_event +{ +public: + my_xid xid; + +#ifdef MYSQL_SERVER + Xid_log_event(THD* thd_arg, my_xid x, bool direct): + Xid_apply_log_event(thd_arg), xid(x) + { + if (direct) + cache_type= Log_event::EVENT_NO_CACHE; + } + const char* get_query() + { + return "COMMIT /* implicit, from Xid_log_event */"; + } +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Xid_log_event(const uchar *buf, + const Format_description_log_event *description_event); + ~Xid_log_event() = default; + Log_event_type get_type_code() { return XID_EVENT;} + int get_data_size() { return sizeof(xid); } +#ifdef MYSQL_SERVER + bool write(); +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + int do_commit(); +#endif +}; + + +/** + @class XA_prepare_log_event + + Similar to Xid_log_event except that + - it is specific to XA transaction + - it carries out the prepare logics rather than the final committing + when @c one_phase member is off. The latter option is only for + compatibility with the upstream. + + From the groupping perspective the event finalizes the current + "prepare" group that is started with Gtid_log_event similarly to the + regular replicated transaction. +*/ + +/** + Function serializes XID which is characterized by by four last arguments + of the function. + Serialized XID is presented in valid hex format and is returned to + the caller in a buffer pointed by the first argument. + The buffer size provived by the caller must be not less than + 8 + 2 * XIDDATASIZE + 4 * sizeof(XID::formatID) + 1, see + {MYSQL_,}XID definitions. + + @param buf pointer to a buffer allocated for storing serialized data + @param fmt formatID value + @param gln gtrid_length value + @param bln bqual_length value + @param dat data value + + @return the value of the buffer pointer +*/ + +inline char *serialize_xid(char *buf, long fmt, long gln, long bln, + const char *dat) +{ + int i; + char *c= buf; + /* + Build a string consisting of the hex format representation of XID + as passed through fmt,gln,bln,dat argument: + X'hex11hex12...hex1m',X'hex21hex22...hex2n',11 + and store it into buf. + */ + c[0]= 'X'; + c[1]= '\''; + c+= 2; + for (i= 0; i < gln; i++) + { + c[0]=_dig_vec_lower[((uchar*) dat)[i] >> 4]; + c[1]=_dig_vec_lower[((uchar*) dat)[i] & 0x0f]; + c+= 2; + } + c[0]= '\''; + c[1]= ','; + c[2]= 'X'; + c[3]= '\''; + c+= 4; + + for (; i < gln + bln; i++) + { + c[0]=_dig_vec_lower[((uchar*) dat)[i] >> 4]; + c[1]=_dig_vec_lower[((uchar*) dat)[i] & 0x0f]; + c+= 2; + } + c[0]= '\''; + sprintf(c+1, ",%lu", fmt); + + return buf; +} + +/* + The size of the string containing serialized Xid representation + is computed as a sum of + eight as the number of formatting symbols (X'',X'',) + plus 2 x XIDDATASIZE (2 due to hex format), + plus space for decimal digits of XID::formatID, + plus one for 0x0. +*/ +static const uint ser_buf_size= + 8 + 2 * MYSQL_XIDDATASIZE + 4 * sizeof(long) + 1; + +struct event_mysql_xid_t : MYSQL_XID +{ + char buf[ser_buf_size]; + char *serialize() + { + return serialize_xid(buf, formatID, gtrid_length, bqual_length, data); + } +}; + +#ifndef MYSQL_CLIENT +struct event_xid_t : XID +{ + char buf[ser_buf_size]; + + char *serialize(char *buf_arg) + { + return serialize_xid(buf_arg, formatID, gtrid_length, bqual_length, data); + } + char *serialize() + { + return serialize(buf); + } +}; +#endif + +class XA_prepare_log_event: public Xid_apply_log_event +{ +protected: + + /* Constant contributor to subheader in write() by members of XID struct. */ + static const int xid_subheader_no_data= 12; + event_mysql_xid_t m_xid; + void *xid; + bool one_phase; + +public: +#ifdef MYSQL_SERVER + XA_prepare_log_event(THD* thd_arg, XID *xid_arg, bool one_phase_arg): + Xid_apply_log_event(thd_arg), xid(xid_arg), one_phase(one_phase_arg) + { + cache_type= Log_event::EVENT_NO_CACHE; + } +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + XA_prepare_log_event(const uchar *buf, + const Format_description_log_event *description_event); + ~XA_prepare_log_event() {} + Log_event_type get_type_code() { return XA_PREPARE_LOG_EVENT; } + bool is_valid() const { return m_xid.formatID != -1; } + int get_data_size() + { + return xid_subheader_no_data + m_xid.gtrid_length + m_xid.bqual_length; + } + +#ifdef MYSQL_SERVER + bool write(); +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + char query[sizeof("XA COMMIT ONE PHASE") + 1 + ser_buf_size]; + int do_commit(); + const char* get_query() + { + sprintf(query, + (one_phase ? "XA COMMIT %s ONE PHASE" : "XA PREPARE %s"), + m_xid.serialize()); + return query; + } +#endif +}; + + +/** + @class User_var_log_event + + Every time a query uses the value of a user variable, a User_var_log_event is + written before the Query_log_event, to set the user variable. + + @section User_var_log_event_binary_format Binary Format +*/ + +class User_var_log_event: public Log_event +{ +public: + enum { + UNDEF_F= 0, + UNSIGNED_F= 1 + }; + const char *name; + size_t name_len; + const char *val; + size_t val_len; + Item_result type; + uint charset_number; + bool is_null; + uchar flags; +#ifdef MYSQL_SERVER + bool deferred; + query_id_t query_id; + User_var_log_event(THD* thd_arg, const char *name_arg, size_t name_len_arg, + const char *val_arg, size_t val_len_arg, + Item_result type_arg, + uint charset_number_arg, uchar flags_arg, + bool using_trans, bool direct) + :Log_event(thd_arg, 0, using_trans), + name(name_arg), name_len(name_len_arg), val(val_arg), + val_len(val_len_arg), type(type_arg), charset_number(charset_number_arg), + flags(flags_arg), deferred(false) + { + is_null= !val; + if (direct) + cache_type= Log_event::EVENT_NO_CACHE; + } + void pack_info(Protocol* protocol); +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + User_var_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); + ~User_var_log_event() = default; + Log_event_type get_type_code() { return USER_VAR_EVENT;} +#ifdef MYSQL_SERVER + bool write(); + /* + Getter and setter for deferred User-event. + Returns true if the event is not applied directly + and which case the applier adjusts execution path. + */ + bool is_deferred() { return deferred; } + /* + In case of the deferred applying the variable instance is flagged + and the parsing time query id is stored to be used at applying time. + */ + void set_deferred(query_id_t qid) { deferred= true; query_id= qid; } +#endif + bool is_valid() const { return name != 0; } + bool is_part_of_group() { return 1; } + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +/** + @class Stop_log_event + + @section Stop_log_event_binary_format Binary Format + + The Post-Header and Body for this event type are empty; it only has + the Common-Header. +*/ +class Stop_log_event: public Log_event +{ +public: +#ifdef MYSQL_SERVER + Stop_log_event() :Log_event() + {} +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Stop_log_event(const uchar *buf, + const Format_description_log_event *description_event): + Log_event(buf, description_event) + {} + ~Stop_log_event() = default; + Log_event_type get_type_code() { return STOP_EVENT;} + bool is_valid() const { return 1; } + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi) + { + /* + Events from ourself should be skipped, but they should not + decrease the slave skip counter. + */ + if (this->server_id == global_system_variables.server_id) + return Log_event::EVENT_SKIP_IGNORE; + else + return Log_event::EVENT_SKIP_NOT; + } +#endif +}; + +/** + @class Rotate_log_event + + This will be deprecated when we move to using sequence ids. + + @section Rotate_log_event_binary_format Binary Format + + The Post-Header has one component: + + + + + + + + + + + + + + + + +
Post-Header for Rotate_log_event
NameFormatDescription
position8 byte integerThe position within the binlog to rotate to.
+ + The Body has one component: + + + + + + + + + + + + + + + + +
Body for Rotate_log_event
NameFormatDescription
new_logvariable length string without trailing zero, extending to the + end of the event (determined by the length field of the + Common-Header) + Name of the binlog to rotate to.
+*/ + +class Rotate_log_event: public Log_event +{ +public: + enum { + DUP_NAME= 2, // if constructor should dup the string argument + RELAY_LOG=4 // rotate event for relay log + }; + const char *new_log_ident; + ulonglong pos; + uint ident_len; + uint flags; +#ifdef MYSQL_SERVER + Rotate_log_event(const char* new_log_ident_arg, + uint ident_len_arg, + ulonglong pos_arg, uint flags); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Rotate_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event); + ~Rotate_log_event() + { + if (flags & DUP_NAME) + my_free((void*) new_log_ident); + } + Log_event_type get_type_code() { return ROTATE_EVENT;} + my_off_t get_header_len(my_off_t l __attribute__((unused))) + { return LOG_EVENT_MINIMAL_HEADER_LEN; } + int get_data_size() { return ident_len + ROTATE_HEADER_LEN;} + bool is_valid() const { return new_log_ident != 0; } +#ifdef MYSQL_SERVER + bool write(); +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +class Binlog_checkpoint_log_event: public Log_event +{ +public: + char *binlog_file_name; + uint binlog_file_len; + +#ifdef MYSQL_SERVER + Binlog_checkpoint_log_event(const char *binlog_file_name_arg, + uint binlog_file_len_arg); +#ifdef HAVE_REPLICATION + void pack_info(Protocol *protocol); +#endif +#else + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + Binlog_checkpoint_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + ~Binlog_checkpoint_log_event() { my_free(binlog_file_name); } + Log_event_type get_type_code() { return BINLOG_CHECKPOINT_EVENT;} + int get_data_size() { return binlog_file_len + BINLOG_CHECKPOINT_HEADER_LEN;} + bool is_valid() const { return binlog_file_name != 0; } +#ifdef MYSQL_SERVER + bool write(); + enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +/** + @class Gtid_log_event + + This event is logged as part of every event group to give the global + transaction id (GTID) of that group. + + It replaces the BEGIN query event used in earlier versions to begin most + event groups, but is also used for events that used to be stand-alone. + + @section Gtid_log_event_binary_format Binary Format + + The binary format for Gtid_log_event has 6 extra reserved bytes to make the + length a total of 19 byte (+ 19 bytes of header in common with all events). + This is just the minimal size for a BEGIN query event, which makes it easy + to replace this event with such BEGIN event to remain compatible with old + slave servers. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Post-Header
NameFormatDescription
seq_no8 byte unsigned integerincreasing id within one server_id. Starts at 1, holes in the sequence + may occur
domain_id4 byte unsigned integerReplication domain id, identifying independent replication streams>
flags1 byte bitfieldBit 0 set indicates stand-alone event (no terminating COMMIT)Bit 1 set indicates group commit, and that commit id existsBit 2 set indicates a transactional event group (can be safely rolled + back).Bit 3 set indicates that user allowed optimistic parallel apply (the + @@SESSION.replicate_allow_parallel value was true at commit).Bit 4 set indicates that this transaction encountered a row (or other) + lock wait during execution.
Reserved (no group commit) / commit id (group commit) (see flags bit 1)6 bytes / 8 bytesReserved bytes, set to 0. Maybe be used for future expansion (no + group commit). OR commit id, same for all GTIDs in the same group + commit (see flags bit 1).
+ + The Body of Gtid_log_event is empty. The total event size is 19 bytes + + the normal 19 bytes common-header. +*/ + +class Gtid_log_event: public Log_event +{ +public: + uint64 seq_no; + uint64 commit_id; + uint32 domain_id; + uint64 sa_seq_no; // start alter identifier for CA/RA +#ifdef MYSQL_SERVER + event_xid_t xid; +#else + event_mysql_xid_t xid; +#endif + uchar flags2; + /* + More flags area placed after the regular flags2's area. The type + is declared to be in agreement with Query_log_event's member that + may copy the flags_extra value. + */ + decltype(Query_log_event::gtid_flags_extra) flags_extra; + /* + Number of engine participants in transaction minus 1. + When zero the event does not contain that information. + */ + uint8 extra_engines; + + /* Flags2. */ + + /* FL_STANDALONE is set when there is no terminating COMMIT event. */ + static const uchar FL_STANDALONE= 1; + /* + FL_GROUP_COMMIT_ID is set when event group is part of a group commit on the + master. Groups with same commit_id are part of the same group commit. + */ + static const uchar FL_GROUP_COMMIT_ID= 2; + /* + FL_TRANSACTIONAL is set for an event group that can be safely rolled back + (no MyISAM, eg.). + */ + static const uchar FL_TRANSACTIONAL= 4; + /* + FL_ALLOW_PARALLEL reflects the (negation of the) value of + @@SESSION.skip_parallel_replication at the time of commit. + */ + static const uchar FL_ALLOW_PARALLEL= 8; + /* + FL_WAITED is set if a row lock wait (or other wait) is detected during the + execution of the transaction. + */ + static const uchar FL_WAITED= 16; + /* FL_DDL is set for event group containing DDL. */ + static const uchar FL_DDL= 32; + /* FL_PREPARED_XA is set for XA transaction. */ + static const uchar FL_PREPARED_XA= 64; + /* FL_"COMMITTED or ROLLED-BACK"_XA is set for XA transaction. */ + static const uchar FL_COMPLETED_XA= 128; + + /* + flags_extra 's bit values. + _E1 suffix below stands for Extra to infer the extra flags, + their "1st" generation (more *generations* can come when necessary). + + FL_EXTRA_MULTI_ENGINE_E1 is set for event group comprising a transaction + involving multiple storage engines. No flag and extra data are added + to the event when the transaction involves only one engine. + */ + static const uchar FL_EXTRA_MULTI_ENGINE_E1= 1; + static const uchar FL_START_ALTER_E1= 2; + static const uchar FL_COMMIT_ALTER_E1= 4; + static const uchar FL_ROLLBACK_ALTER_E1= 8; + +#ifdef MYSQL_SERVER + Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone, + uint16 flags, bool is_transactional, uint64 commit_id, + bool has_xid= false, bool is_ro_1pc= false); +#ifdef HAVE_REPLICATION + void pack_info(Protocol *protocol); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +#else + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + Gtid_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); + ~Gtid_log_event() = default; + Log_event_type get_type_code() { return GTID_EVENT; } + enum_logged_status logged_status() { return LOGGED_NO_DATA; } + int get_data_size() + { + return GTID_HEADER_LEN + ((flags2 & FL_GROUP_COMMIT_ID) ? 2 : 0); + } + bool is_valid() const { return seq_no != 0; } +#ifdef MYSQL_SERVER + bool write(); + static int make_compatible_event(String *packet, bool *need_dummy_event, + ulong ev_offset, enum enum_binlog_checksum_alg checksum_alg); + static bool peek(const uchar *event_start, size_t event_len, + enum enum_binlog_checksum_alg checksum_alg, + uint32 *domain_id, uint32 *server_id, uint64 *seq_no, + uchar *flags2, const Format_description_log_event *fdev); +#endif +}; + + +/** + @class Gtid_list_log_event + + This event is logged at the start of every binlog file to record the + current replication state: the last global transaction id (GTID) applied + on the server within each replication domain. + + It consists of a list of GTIDs, one for each replication domain ever seen + on the server. + + @section Gtid_list_log_event_binary_format Binary Format + + + + + + + + + + + + + + + +
Post-Header
NameFormatDescription
count4 byte unsigned integerThe lower 28 bits are the number of GTIDs. The upper 4 bits are + flags bits.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Body
NameFormatDescription
domain_id4 byte unsigned integerReplication domain id of one GTID
server_id4 byte unsigned integerServer id of one GTID
seq_no8 byte unsigned integersequence number of one GTID
+ + The three elements in the body repeat COUNT times to form the GTID list. + + At the time of writing, only two flag bit are in use. + + Bit 28 of `count' is used for flag FLAG_UNTIL_REACHED, which is sent in a + Gtid_list event from the master to the slave to indicate that the START + SLAVE UNTIL master_gtid_pos=xxx condition has been reached. (This flag is + only sent in "fake" events generated on the fly, it is not written into + the binlog). +*/ + +class Gtid_list_log_event: public Log_event +{ +public: + uint32 count; + uint32 gl_flags; + struct rpl_gtid *list; + uint64 *sub_id_list; + + static const uint element_size= 4+4+8; + /* Upper bits stored in 'count'. See comment above */ + enum gtid_flags + { + FLAG_UNTIL_REACHED= (1<<28), + FLAG_IGN_GTIDS= (1<<29), + }; +#ifdef MYSQL_SERVER + Gtid_list_log_event(rpl_binlog_state *gtid_set, uint32 gl_flags); + Gtid_list_log_event(slave_connection_state *gtid_set, uint32 gl_flags); +#ifdef HAVE_REPLICATION + void pack_info(Protocol *protocol); +#endif +#else + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + Gtid_list_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); + ~Gtid_list_log_event() { my_free(list); my_free(sub_id_list); } + Log_event_type get_type_code() { return GTID_LIST_EVENT; } + int get_data_size() { + /* + Replacing with dummy event, needed for older slaves, requires a minimum + of 6 bytes in the body. + */ + return (count==0 ? + GTID_LIST_HEADER_LEN+2 : GTID_LIST_HEADER_LEN+count*element_size); + } + bool is_valid() const { return list != NULL; } +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + bool to_packet(String *packet); + bool write(); + virtual int do_apply_event(rpl_group_info *rgi); + enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif + static bool peek(const char *event_start, size_t event_len, + enum enum_binlog_checksum_alg checksum_alg, + rpl_gtid **out_gtid_list, uint32 *out_list_len, + const Format_description_log_event *fdev); +}; + + +/* the classes below are for the new LOAD DATA INFILE logging */ + +/** + @class Create_file_log_event + + @section Create_file_log_event_binary_format Binary Format +*/ + +class Create_file_log_event: public Load_log_event +{ +protected: + /* + Pretend we are Load event, so we can write out just + our Load part - used on the slave when writing event out to + SQL_LOAD-*.info file + */ + bool fake_base; +public: + uchar *block; + const uchar *event_buf; + uint block_len; + uint file_id; + bool inited_from_old; + +#ifdef MYSQL_SERVER + Create_file_log_event(THD* thd, sql_exchange* ex, const char* db_arg, + const char* table_name_arg, + List& fields_arg, + bool is_concurrent_arg, + enum enum_duplicates handle_dup, bool ignore, + uchar* block_arg, uint block_len_arg, + bool using_trans); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info, + bool enable_local); +#endif + + Create_file_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event); + ~Create_file_log_event() + { + my_free((void*) event_buf); + } + + Log_event_type get_type_code() + { + return fake_base ? Load_log_event::get_type_code() : CREATE_FILE_EVENT; + } + int get_data_size() + { + return (fake_base ? Load_log_event::get_data_size() : + Load_log_event::get_data_size() + + 4 + 1 + block_len); + } + bool is_valid() const { return inited_from_old || block != 0; } +#ifdef MYSQL_SERVER + bool write_data_header(); + bool write_data_body(); + /* + Cut out Create_file extensions and + write it as Load event - used on the slave + */ + bool write_base(); +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); +#endif +}; + + +/** + @class Append_block_log_event + + @section Append_block_log_event_binary_format Binary Format +*/ + +class Append_block_log_event: public Log_event +{ +public: + uchar* block; + uint block_len; + uint file_id; + /* + 'db' is filled when the event is created in mysql_load() (the + event needs to have a 'db' member to be well filtered by + binlog-*-db rules). 'db' is not written to the binlog (it's not + used by Append_block_log_event::write()), so it can't be read in + the Append_block_log_event(const uchar *buf, int event_len) + constructor. In other words, 'db' is used only for filtering by + binlog-*-db rules. Create_file_log_event is different: it's 'db' + (which is inherited from Load_log_event) is written to the binlog + and can be re-read. + */ + const char* db; + +#ifdef MYSQL_SERVER + Append_block_log_event(THD* thd, const char* db_arg, uchar* block_arg, + uint block_len_arg, bool using_trans); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); + virtual int get_create_or_append() const; +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Append_block_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + ~Append_block_log_event() = default; + Log_event_type get_type_code() { return APPEND_BLOCK_EVENT;} + int get_data_size() { return block_len + APPEND_BLOCK_HEADER_LEN ;} + bool is_valid() const { return block != 0; } +#ifdef MYSQL_SERVER + bool write(); + const char* get_db() { return db; } +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); +#endif +}; + + +/** + @class Delete_file_log_event + + @section Delete_file_log_event_binary_format Binary Format +*/ + +class Delete_file_log_event: public Log_event +{ +public: + uint file_id; + const char* db; /* see comment in Append_block_log_event */ + +#ifdef MYSQL_SERVER + Delete_file_log_event(THD* thd, const char* db_arg, bool using_trans); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info, + bool enable_local); +#endif + + Delete_file_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event); + ~Delete_file_log_event() = default; + Log_event_type get_type_code() { return DELETE_FILE_EVENT;} + int get_data_size() { return DELETE_FILE_HEADER_LEN ;} + bool is_valid() const { return file_id != 0; } +#ifdef MYSQL_SERVER + bool write(); + const char* get_db() { return db; } +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); +#endif +}; + + +/** + @class Execute_load_log_event + + @section Delete_file_log_event_binary_format Binary Format +*/ + +class Execute_load_log_event: public Log_event +{ +public: + uint file_id; + const char* db; /* see comment in Append_block_log_event */ + +#ifdef MYSQL_SERVER + Execute_load_log_event(THD* thd, const char* db_arg, bool using_trans); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); +#endif + + Execute_load_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + ~Execute_load_log_event() = default; + Log_event_type get_type_code() { return EXEC_LOAD_EVENT;} + int get_data_size() { return EXEC_LOAD_HEADER_LEN ;} + bool is_valid() const { return file_id != 0; } +#ifdef MYSQL_SERVER + bool write(); + const char* get_db() { return db; } +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); +#endif +}; + + +/** + @class Begin_load_query_log_event + + Event for the first block of file to be loaded, its only difference from + Append_block event is that this event creates or truncates existing file + before writing data. + + @section Begin_load_query_log_event_binary_format Binary Format +*/ +class Begin_load_query_log_event: public Append_block_log_event +{ +public: +#ifdef MYSQL_SERVER + Begin_load_query_log_event(THD* thd_arg, const char *db_arg, + uchar* block_arg, uint block_len_arg, + bool using_trans); +#ifdef HAVE_REPLICATION + Begin_load_query_log_event(THD* thd); + int get_create_or_append() const; +#endif /* HAVE_REPLICATION */ +#endif + Begin_load_query_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + ~Begin_load_query_log_event() = default; + Log_event_type get_type_code() { return BEGIN_LOAD_QUERY_EVENT; } +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif +}; + + +/* + Elements of this enum describe how LOAD DATA handles duplicates. +*/ +enum enum_load_dup_handling { LOAD_DUP_ERROR= 0, LOAD_DUP_IGNORE, + LOAD_DUP_REPLACE }; + +/** + @class Execute_load_query_log_event + + Event responsible for LOAD DATA execution, it similar to Query_log_event + but before executing the query it substitutes original filename in LOAD DATA + query with name of temporary file. + + @section Execute_load_query_log_event_binary_format Binary Format +*/ +class Execute_load_query_log_event: public Query_log_event +{ +public: + uint file_id; // file_id of temporary file + uint fn_pos_start; // pointer to the part of the query that should + // be substituted + uint fn_pos_end; // pointer to the end of this part of query + /* + We have to store type of duplicate handling explicitly, because + for LOAD DATA it also depends on LOCAL option. And this part + of query will be rewritten during replication so this information + may be lost... + */ + enum_load_dup_handling dup_handling; + +#ifdef MYSQL_SERVER + Execute_load_query_log_event(THD* thd, const char* query_arg, + ulong query_length, uint fn_pos_start_arg, + uint fn_pos_end_arg, + enum_load_dup_handling dup_handling_arg, + bool using_trans, bool direct, + bool suppress_use, int errcode); +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); +#endif /* HAVE_REPLICATION */ +#else + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); + /* Prints the query as LOAD DATA LOCAL and with rewritten filename */ + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info, + const char *local_fname); +#endif + Execute_load_query_log_event(const uchar *buf, uint event_len, + const Format_description_log_event + *description_event); + ~Execute_load_query_log_event() = default; + + Log_event_type get_type_code() { return EXECUTE_LOAD_QUERY_EVENT; } + bool is_valid() const { return Query_log_event::is_valid() && file_id != 0; } + + ulong get_post_header_size_for_derived(); +#ifdef MYSQL_SERVER + bool write_post_header_for_derived(); +#endif + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); +#endif +}; + + +#ifdef MYSQL_CLIENT +/** + @class Unknown_log_event + + @section Unknown_log_event_binary_format Binary Format +*/ +class Unknown_log_event: public Log_event +{ +public: + enum { UNKNOWN, ENCRYPTED } what; + /* + Even if this is an unknown event, we still pass description_event to + Log_event's ctor, this way we can extract maximum information from the + event's header (the unique ID for example). + */ + Unknown_log_event(const uchar *buf, + const Format_description_log_event *description_event): + Log_event(buf, description_event), what(UNKNOWN) + {} + /* constructor for hopelessly corrupted events */ + Unknown_log_event(): Log_event(), what(ENCRYPTED) {} + ~Unknown_log_event() = default; + bool print(FILE* file, PRINT_EVENT_INFO* print_event_info); + Log_event_type get_type_code() { return UNKNOWN_EVENT;} + bool is_valid() const { return 1; } +}; +#endif +char *str_to_hex(char *to, const char *from, size_t len); + +/** + @class Annotate_rows_log_event + + In row-based mode, if binlog_annotate_row_events = ON, each group of + Table_map_log_events is preceded by an Annotate_rows_log_event which + contains the query which caused the subsequent rows operations. + + The Annotate_rows_log_event has no post-header and its body contains + the corresponding query (without trailing zero). Note. The query length + is to be calculated as a difference between the whole event length and + the common header length. +*/ +class Annotate_rows_log_event: public Log_event +{ +public: +#ifndef MYSQL_CLIENT + Annotate_rows_log_event(THD*, bool using_trans, bool direct); +#endif + Annotate_rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event*); + ~Annotate_rows_log_event(); + + virtual int get_data_size(); + virtual Log_event_type get_type_code(); + enum_logged_status logged_status() { return LOGGED_NO_DATA; } + virtual bool is_valid() const; + virtual bool is_part_of_group() { return 1; } + +#ifndef MYSQL_CLIENT + virtual bool write_data_header(); + virtual bool write_data_body(); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual void pack_info(Protocol*); +#endif + +#ifdef MYSQL_CLIENT + virtual bool print(FILE*, PRINT_EVENT_INFO*); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +private: + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info*); +#endif + +private: + char *m_query_txt; + uint m_query_len; + char *m_save_thd_query_txt; + uint m_save_thd_query_len; + bool m_saved_thd_query; + bool m_used_query_txt; +}; + +/** + @class Table_map_log_event + + In row-based mode, every row operation event is preceded by a + Table_map_log_event which maps a table definition to a number. The + table definition consists of database name, table name, and column + definitions. + + @section Table_map_log_event_binary_format Binary Format + + The Post-Header has the following components: + + + + + + + + + + + + + + + + + + + + + + +
Post-Header for Table_map_log_event
NameFormatDescription
table_id6 bytes unsigned integerThe number that identifies the table.
flags2 byte bitfieldReserved for future use; currently always 0.
+ + The Body has the following components: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Body for Table_map_log_event
NameFormatDescription
database_nameone byte string length, followed by null-terminated stringThe name of the database in which the table resides. The name + is represented as a one byte unsigned integer representing the + number of bytes in the name, followed by length bytes containing + the database name, followed by a terminating 0 byte. (Note the + redundancy in the representation of the length.)
table_nameone byte string length, followed by null-terminated stringThe name of the table, encoded the same way as the database + name above.
column_count@ref packed_integer "Packed Integer"The number of columns in the table, represented as a packed + variable-length integer.
column_typeList of column_count 1 byte enumeration valuesThe type of each column in the table, listed from left to + right. Each byte is mapped to a column type according to the + enumeration type enum_field_types defined in mysql_com.h. The + mapping of types to numbers is listed in the table @ref + Table_table_map_log_event_column_types "below" (along with + description of the associated metadata field).
metadata_length@ref packed_integer "Packed Integer"The length of the following metadata block
metadatalist of metadata for each columnFor each column from left to right, a chunk of data who's + length and semantics depends on the type of the column. The + length and semantics for the metadata for each column are listed + in the table @ref Table_table_map_log_event_column_types + "below".
null_bitscolumn_count bits, rounded up to nearest byteFor each column, a bit indicating whether data in the column + can be NULL or not. The number of bytes needed for this is + int((column_count+7)/8). The flag for the first column from the + left is in the least-significant bit of the first byte, the second + is in the second least significant bit of the first byte, the + ninth is in the least significant bit of the second byte, and so + on.
optional metadata fieldsoptional metadata fields are stored in Type, Length, Value(TLV) format. + Type takes 1 byte. Length is a packed integer value. Values takes + Length bytes. + There are some optional metadata defined. They are listed in the table + @ref Table_table_map_event_optional_metadata. Optional metadata fields + follow null_bits. Whether binlogging an optional metadata is decided by the + server. The order is not defined, so they can be binlogged in any order. +
+ + The table below lists all column types, along with the numerical + identifier for it and the size and interpretation of meta-data used + to describe the type. + + @anchor Table_table_map_log_event_column_types + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table_map_log_event column types: numerical identifier and + metadata
NameIdentifierSize of metadata in bytesDescription of metadata
MYSQL_TYPE_DECIMAL00No column metadata.
MYSQL_TYPE_TINY10No column metadata.
MYSQL_TYPE_SHORT20No column metadata.
MYSQL_TYPE_LONG30No column metadata.
MYSQL_TYPE_FLOAT41 byte1 byte unsigned integer, representing the "pack_length", which + is equal to sizeof(float) on the server from which the event + originates.
MYSQL_TYPE_DOUBLE51 byte1 byte unsigned integer, representing the "pack_length", which + is equal to sizeof(double) on the server from which the event + originates.
MYSQL_TYPE_NULL60No column metadata.
MYSQL_TYPE_TIMESTAMP70No column metadata.
MYSQL_TYPE_LONGLONG80No column metadata.
MYSQL_TYPE_INT2490No column metadata.
MYSQL_TYPE_DATE100No column metadata.
MYSQL_TYPE_TIME110No column metadata.
MYSQL_TYPE_DATETIME120No column metadata.
MYSQL_TYPE_YEAR130No column metadata.
MYSQL_TYPE_NEWDATE14This enumeration value is only used internally and cannot + exist in a binlog.
MYSQL_TYPE_VARCHAR152 bytes2 byte unsigned integer representing the maximum length of + the string.
MYSQL_TYPE_BIT162 bytesA 1 byte unsigned int representing the length in bits of the + bitfield (0 to 64), followed by a 1 byte unsigned int + representing the number of bytes occupied by the bitfield. The + number of bytes is either int((length+7)/8) or int(length/8).
MYSQL_TYPE_NEWDECIMAL2462 bytesA 1 byte unsigned int representing the precision, followed + by a 1 byte unsigned int representing the number of decimals.
MYSQL_TYPE_ENUM247This enumeration value is only used internally and cannot + exist in a binlog.
MYSQL_TYPE_SET248This enumeration value is only used internally and cannot + exist in a binlog.
MYSQL_TYPE_TINY_BLOB249This enumeration value is only used internally and cannot + exist in a binlog.
MYSQL_TYPE_MEDIUM_BLOB250This enumeration value is only used internally and cannot + exist in a binlog.
MYSQL_TYPE_LONG_BLOB251This enumeration value is only used internally and cannot + exist in a binlog.
MYSQL_TYPE_BLOB2521 byteThe pack length, i.e., the number of bytes needed to represent + the length of the blob: 1, 2, 3, or 4.
MYSQL_TYPE_VAR_STRING2532 bytesThis is used to store both strings and enumeration values. + The first byte is a enumeration value storing the real + type, which may be either MYSQL_TYPE_VAR_STRING or + MYSQL_TYPE_ENUM. The second byte is a 1 byte unsigned integer + representing the field size, i.e., the number of bytes needed to + store the length of the string.
MYSQL_TYPE_STRING2542 bytesThe first byte is always MYSQL_TYPE_VAR_STRING (i.e., 253). + The second byte is the field size, i.e., the number of bytes in + the representation of size of the string: 3 or 4.
MYSQL_TYPE_GEOMETRY2551 byteThe pack length, i.e., the number of bytes needed to represent + the length of the geometry: 1, 2, 3, or 4.
+ The table below lists all optional metadata types, along with the numerical + identifier for it and the size and interpretation of meta-data used + to describe the type. + + @anchor Table_table_map_event_optional_metadata + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table_map_event optional metadata types: numerical identifier and + metadata. Optional metadata fields are stored in TLV fields. + Format of values are described in this table.
TypeDescriptionFormat
SIGNEDNESSsignedness of numeric colums. This is included for all values of + binlog_row_metadata.For each numeric column, a bit indicates whether the numeric + colunm has unsigned flag. 1 means it is unsigned. The number of + bytes needed for this is int((column_count + 7) / 8). The order is + the same as the order of column_type field.
DEFAULT_CHARSETCharsets of character columns. It has a default charset for + the case that most of character columns have same charset and the + most used charset is binlogged as default charset.Collation + numbers are binlogged for identifying charsets. They are stored in + packed length format. Either DEFAULT_CHARSET or COLUMN_CHARSET is + included for all values of binlog_row_metadata.Default charset's collation is logged first. The charsets which are not + same to default charset are logged following default charset. They are + logged as column index and charset collation number pair sequence. The + column index is counted only in all character columns. The order is same to + the order of column_type + field.
COLUMN_CHARSETCharsets of character columns. For the case that most of columns have + different charsets, this field is logged. It is never logged with + DEFAULT_CHARSET together. Either DEFAULT_CHARSET or COLUMN_CHARSET is + included for all values of binlog_row_metadata.It is a collation number sequence for all character columns.
COLUMN_NAMENames of columns. This is only included if + binlog_row_metadata=FULL.A sequence of column names. For each column name, 1 byte for + the string length in bytes is followed by a string without null + terminator.
SET_STR_VALUEThe string values of SET columns. This is only included if + binlog_row_metadata=FULL.For each SET column, a pack_length representing the value + count is followed by a sequence of length and string pairs. length + is the byte count in pack_length format. The string has no null + terminator.
ENUM_STR_VALUEThe string values is ENUM columns. This is only included + if binlog_row_metadata=FULL.The format is the same as SET_STR_VALUE.
GEOMETRY_TYPEThe real type of geometry columns. This is only included + if binlog_row_metadata=FULL.A sequence of real type of geometry columns are stored in pack_length + format.
SIMPLE_PRIMARY_KEYThe primary key without any prefix. This is only included + if binlog_row_metadata=FULL and there is a primary key where every + key part covers an entire column.A sequence of column indexes. The indexes are stored in pack_length + format.
PRIMARY_KEY_WITH_PREFIXThe primary key with some prefix. It doesn't appear together with + SIMPLE_PRIMARY_KEY. This is only included if + binlog_row_metadata=FULL and there is a primary key where some key + part covers a prefix of the column.A sequence of column index and prefix length pairs. Both + column index and prefix length are in pack_length format. Prefix length + 0 means that the whole column value is used.
ENUM_AND_SET_DEFAULT_CHARSETCharsets of ENUM and SET columns. It has the same layout as + DEFAULT_CHARSET. If there are SET or ENUM columns and + binlog_row_metadata=FULL, exactly one of + ENUM_AND_SET_DEFAULT_CHARSET and ENUM_AND_SET_COLUMN_CHARSET + appears (the encoder chooses the representation that uses the + least amount of space). Otherwise, none of them appears.The same format as for DEFAULT_CHARSET, except it counts ENUM + and SET columns rather than character columns.
ENUM_AND_SET_COLUMN_CHARSETCharsets of ENUM and SET columns. It has the same layout as + COLUMN_CHARSET. If there are SET or ENUM columns and + binlog_row_metadata=FULL, exactly one of + ENUM_AND_SET_DEFAULT_CHARSET and ENUM_AND_SET_COLUMN_CHARSET + appears (the encoder chooses the representation that uses the + least amount of space). Otherwise, none of them appears.The same format as for COLUMN_CHARSET, except it counts ENUM + and SET columns rather than character columns.
+*/ +class Table_map_log_event : public Log_event +{ +public: + /* Constants */ + enum + { + TYPE_CODE = TABLE_MAP_EVENT + }; + + /** + Enumeration of the errors that can be returned. + */ + enum enum_error + { + ERR_OPEN_FAILURE = -1, /**< Failure to open table */ + ERR_OK = 0, /**< No error */ + ERR_TABLE_LIMIT_EXCEEDED = 1, /**< No more room for tables */ + ERR_OUT_OF_MEM = 2, /**< Out of memory */ + ERR_BAD_TABLE_DEF = 3, /**< Table definition does not match */ + ERR_RBR_TO_SBR = 4 /**< daisy-chanining RBR to SBR not allowed */ + }; + + enum enum_flag + { + /* + Nothing here right now, but the flags support is there in + preparation for changes that are coming. Need to add a + constant to make it compile under HP-UX: aCC does not like + empty enumerations. + */ + ENUM_FLAG_COUNT + }; + + typedef uint16 flag_set; + /** + DEFAULT_CHARSET and COLUMN_CHARSET don't appear together, and + ENUM_AND_SET_DEFAULT_CHARSET and ENUM_AND_SET_COLUMN_CHARSET don't + appear together. They are just alternative ways to pack character + set information. When binlogging, it logs character sets in the + way that occupies least storage. + + SIMPLE_PRIMARY_KEY and PRIMARY_KEY_WITH_PREFIX don't appear together. + SIMPLE_PRIMARY_KEY is for the primary keys which only use whole values of + pk columns. PRIMARY_KEY_WITH_PREFIX is + for the primary keys which just use part value of pk columns. + */ + enum Optional_metadata_field_type + { + SIGNEDNESS = 1, // UNSIGNED flag of numeric columns + DEFAULT_CHARSET, /* Character set of string columns, optimized to + minimize space when many columns have the + same charset. */ + COLUMN_CHARSET, /* Character set of string columns, optimized to + minimize space when columns have many + different charsets. */ + COLUMN_NAME, + SET_STR_VALUE, // String value of SET columns + ENUM_STR_VALUE, // String value of ENUM columns + GEOMETRY_TYPE, // Real type of geometry columns + SIMPLE_PRIMARY_KEY, // Primary key without prefix + PRIMARY_KEY_WITH_PREFIX, // Primary key with prefix + ENUM_AND_SET_DEFAULT_CHARSET, /* Character set of enum and set + columns, optimized to minimize + space when many columns have the + same charset. */ + ENUM_AND_SET_COLUMN_CHARSET, /* Character set of enum and set + columns, optimized to minimize + space when many columns have the + same charset. */ + }; + /** + Metadata_fields organizes m_optional_metadata into a structured format which + is easy to access. + */ + // Values for binlog_row_metadata sysvar + enum enum_binlog_row_metadata + { + BINLOG_ROW_METADATA_NO_LOG= 0, + BINLOG_ROW_METADATA_MINIMAL= 1, + BINLOG_ROW_METADATA_FULL= 2 + }; + struct Optional_metadata_fields + { + typedef std::pair uint_pair; + typedef std::vector str_vector; + + struct Default_charset + { + Default_charset() : default_charset(0) {} + bool empty() const { return default_charset == 0; } + + // Default charset for the columns which are not in charset_pairs. + unsigned int default_charset; + + /* The uint_pair means . */ + std::vector charset_pairs; + }; + + // Contents of DEFAULT_CHARSET field is converted into Default_charset. + Default_charset m_default_charset; + // Contents of ENUM_AND_SET_DEFAULT_CHARSET are converted into + // Default_charset. + Default_charset m_enum_and_set_default_charset; + std::vector m_signedness; + // Character set number of every string column + std::vector m_column_charset; + // Character set number of every ENUM or SET column. + std::vector m_enum_and_set_column_charset; + std::vector m_column_name; + // each str_vector stores values of one enum/set column + std::vector m_enum_str_value; + std::vector m_set_str_value; + std::vector m_geometry_type; + /* + The uint_pair means . Prefix length is 0 if + whole column value is used. + */ + std::vector m_primary_key; + + /* + It parses m_optional_metadata and populates into above variables. + + @param[in] optional_metadata points to the begin of optional metadata + fields in table_map_event. + @param[in] optional_metadata_len length of optional_metadata field. + */ + Optional_metadata_fields(unsigned char* optional_metadata, + unsigned int optional_metadata_len); + }; + + /** + Print column metadata. Its format looks like: + # Columns(colume_name type, colume_name type, ...) + if colume_name field is not logged into table_map_log_event, then + only type is printed. + + @@param[out] file the place where colume metadata is printed + @@param[in] The metadata extracted from optional metadata fields + */ + void print_columns(IO_CACHE *file, + const Optional_metadata_fields &fields); + /** + Print primary information. Its format looks like: + # Primary Key(colume_name, column_name(prifix), ...) + if colume_name field is not logged into table_map_log_event, then + colume index is printed. + + @@param[out] file the place where primary key is printed + @@param[in] The metadata extracted from optional metadata fields + */ + void print_primary_key(IO_CACHE *file, + const Optional_metadata_fields &fields); + + /* Special constants representing sets of flags */ + enum + { + TM_NO_FLAGS = 0U, + TM_BIT_LEN_EXACT_F = (1U << 0), + // MariaDB flags (we starts from the other end) + TM_BIT_HAS_TRIGGERS_F= (1U << 14) + }; + + flag_set get_flags(flag_set flag) const { return m_flags & flag; } + +#ifdef MYSQL_SERVER + Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Table_map_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + + ~Table_map_log_event(); + +#ifdef MYSQL_CLIENT + table_def *create_table_def() + { + return new table_def(m_coltype, m_colcnt, m_field_metadata, + m_field_metadata_size, m_null_bits, m_flags); + } + int rewrite_db(const char* new_name, size_t new_name_len, + const Format_description_log_event*); +#endif + ulonglong get_table_id() const { return m_table_id; } + const char *get_table_name() const { return m_tblnam; } + const char *get_db_name() const { return m_dbnam; } + + virtual Log_event_type get_type_code() { return TABLE_MAP_EVENT; } + virtual enum_logged_status logged_status() { return LOGGED_TABLE_MAP; } + virtual bool is_valid() const { return m_memory != NULL; /* we check malloc */ } + virtual bool is_part_of_group() { return 1; } + + virtual int get_data_size() { return (uint) m_data_size; } +#ifdef MYSQL_SERVER + virtual int save_field_metadata(); + virtual bool write_data_header(); + virtual bool write_data_body(); + virtual const char *get_db() { return m_dbnam; } +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual void pack_info(Protocol *protocol); +#endif + +#ifdef MYSQL_CLIENT + virtual bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + + +private: +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); +#endif + +#ifdef MYSQL_SERVER + TABLE *m_table; + Binlog_type_info *binlog_type_info_array; + + + // Metadata fields buffer + StringBuffer<1024> m_metadata_buf; + + /** + Capture the optional metadata fields which should be logged into + table_map_log_event and serialize them into m_metadata_buf. + */ + void init_metadata_fields(); + bool init_signedness_field(); + /** + Capture and serialize character sets. Character sets for + character columns (TEXT etc) and character sets for ENUM and SET + columns are stored in different metadata fields. The reason is + that TEXT character sets are included even when + binlog_row_metadata=MINIMAL, whereas ENUM and SET character sets + are included only when binlog_row_metadata=FULL. + + @param include_type Predicate to determine if a given Field object + is to be included in the metadata field. + + @param default_charset_type Type code when storing in "default + charset" format. (See comment above Table_maps_log_event in + libbinlogevents/include/rows_event.h) + + @param column_charset_type Type code when storing in "column + charset" format. (See comment above Table_maps_log_event in + libbinlogevents/include/rows_event.h) + */ + bool init_charset_field(bool(* include_type)(Binlog_type_info *, Field *), + Optional_metadata_field_type default_charset_type, + Optional_metadata_field_type column_charset_type); + bool init_column_name_field(); + bool init_set_str_value_field(); + bool init_enum_str_value_field(); + bool init_geometry_type_field(); + bool init_primary_key_field(); +#endif + +#ifdef MYSQL_CLIENT + class Charset_iterator; + class Default_charset_iterator; + class Column_charset_iterator; +#endif + char const *m_dbnam; + size_t m_dblen; + char const *m_tblnam; + size_t m_tbllen; + ulong m_colcnt; + uchar *m_coltype; + + uchar *m_memory; + ulonglong m_table_id; + flag_set m_flags; + + size_t m_data_size; + + uchar *m_field_metadata; // buffer for field metadata + /* + The size of field metadata buffer set by calling save_field_metadata() + */ + ulong m_field_metadata_size; + uchar *m_null_bits; + uchar *m_meta_memory; + unsigned int m_optional_metadata_len; + unsigned char *m_optional_metadata; +}; + + +/** + @class Rows_log_event + + Common base class for all row-containing log events. + + RESPONSIBILITIES + + Encode the common parts of all events containing rows, which are: + - Write data header and data body to an IO_CACHE. + - Provide an interface for adding an individual row to the event. + + @section Rows_log_event_binary_format Binary Format +*/ + + +class Rows_log_event : public Log_event +{ +public: + /** + Enumeration of the errors that can be returned. + */ + enum enum_error + { + ERR_OPEN_FAILURE = -1, /**< Failure to open table */ + ERR_OK = 0, /**< No error */ + ERR_TABLE_LIMIT_EXCEEDED = 1, /**< No more room for tables */ + ERR_OUT_OF_MEM = 2, /**< Out of memory */ + ERR_BAD_TABLE_DEF = 3, /**< Table definition does not match */ + ERR_RBR_TO_SBR = 4 /**< daisy-chanining RBR to SBR not allowed */ + }; + + /* + These definitions allow you to combine the flags into an + appropriate flag set using the normal bitwise operators. The + implicit conversion from an enum-constant to an integer is + accepted by the compiler, which is then used to set the real set + of flags. + */ + enum enum_flag + { + /* Last event of a statement */ + STMT_END_F = (1U << 0), + + /* Value of the OPTION_NO_FOREIGN_KEY_CHECKS flag in thd->options */ + NO_FOREIGN_KEY_CHECKS_F = (1U << 1), + + /* Value of the OPTION_RELAXED_UNIQUE_CHECKS flag in thd->options */ + RELAXED_UNIQUE_CHECKS_F = (1U << 2), + + /** + Indicates that rows in this event are complete, that is contain + values for all columns of the table. + */ + COMPLETE_ROWS_F = (1U << 3), + + /* Value of the OPTION_NO_CHECK_CONSTRAINT_CHECKS flag in thd->options */ + NO_CHECK_CONSTRAINT_CHECKS_F = (1U << 7) + }; + + typedef uint16 flag_set; + + /* Special constants representing sets of flags */ + enum + { + RLE_NO_FLAGS = 0U + }; + + virtual ~Rows_log_event(); + + void set_flags(flag_set flags_arg) { m_flags |= flags_arg; } + void clear_flags(flag_set flags_arg) { m_flags &= ~flags_arg; } + flag_set get_flags(flag_set flags_arg) const { return m_flags & flags_arg; } + void update_flags() { int2store(temp_buf + m_flags_pos, m_flags); } + + Log_event_type get_type_code() { return m_type; } /* Specific type (_V1 etc) */ + enum_logged_status logged_status() { return LOGGED_ROW_EVENT; } + virtual Log_event_type get_general_type_code() = 0; /* General rows op type, no version */ + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual void pack_info(Protocol *protocol); +#endif + +#ifdef MYSQL_CLIENT + /* not for direct call, each derived has its own ::print() */ + virtual bool print(FILE *file, PRINT_EVENT_INFO *print_event_info)= 0; + void change_to_flashback_event(PRINT_EVENT_INFO *print_event_info, uchar *rows_buff, Log_event_type ev_type); + bool print_verbose(IO_CACHE *file, + PRINT_EVENT_INFO *print_event_info); + size_t print_verbose_one_row(IO_CACHE *file, table_def *td, + PRINT_EVENT_INFO *print_event_info, + MY_BITMAP *cols_bitmap, + const uchar *ptr, const uchar *prefix, + const my_bool no_fill_output= 0); // if no_fill_output=1, then print result is unnecessary + size_t calc_row_event_length(table_def *td, + PRINT_EVENT_INFO *print_event_info, + MY_BITMAP *cols_bitmap, + const uchar *value); + void count_row_events(PRINT_EVENT_INFO *print_event_info); + +#endif + +#ifdef MYSQL_SERVER + int add_row_data(uchar *data, size_t length) + { + return do_add_row_data(data,length); + } +#endif + + /* Member functions to implement superclass interface */ + virtual int get_data_size(); + + MY_BITMAP const *get_cols() const { return &m_cols; } + MY_BITMAP const *get_cols_ai() const { return &m_cols_ai; } + size_t get_width() const { return m_width; } + ulonglong get_table_id() const { return m_table_id; } + +#if defined(MYSQL_SERVER) + /* + This member function compares the table's read/write_set + with this event's m_cols and m_cols_ai. Comparison takes + into account what type of rows event is this: Delete, Write or + Update, therefore it uses the correct m_cols[_ai] according + to the event type code. + + Note that this member function should only be called for the + following events: + - Delete_rows_log_event + - Write_rows_log_event + - Update_rows_log_event + + @param[IN] table The table to compare this events bitmaps + against. + + @return TRUE if sets match, FALSE otherwise. (following + bitmap_cmp return logic). + + */ + bool read_write_bitmaps_cmp(TABLE *table) + { + bool res= FALSE; + + switch (get_general_type_code()) + { + case DELETE_ROWS_EVENT: + res= bitmap_cmp(get_cols(), table->read_set); + break; + case UPDATE_ROWS_EVENT: + res= (bitmap_cmp(get_cols(), table->read_set) && + bitmap_cmp(get_cols_ai(), table->rpl_write_set)); + break; + case WRITE_ROWS_EVENT: + res= bitmap_cmp(get_cols(), table->rpl_write_set); + break; + default: + /* + We should just compare bitmaps for Delete, Write + or Update rows events. + */ + DBUG_ASSERT(0); + } + return res; + } +#endif + +#ifdef MYSQL_SERVER + virtual bool write_data_header(); + virtual bool write_data_body(); + virtual bool write_compressed(); + virtual const char *get_db() { return m_table->s->db.str; } +#endif + /* + Check that malloc() succeeded in allocating memory for the rows + buffer and the COLS vector. Checking that an Update_rows_log_event + is valid is done in the Update_rows_log_event::is_valid() + function. + */ + virtual bool is_valid() const + { + return m_rows_buf && m_cols.bitmap; + } + bool is_part_of_group() { return get_flags(STMT_END_F) != 0; } + + uint m_row_count; /* The number of rows added to the event */ + + const uchar* get_extra_row_data() const { return m_extra_row_data; } + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual uint8 get_trg_event_map()= 0; + + inline bool do_invoke_trigger() + { + return (slave_run_triggers_for_rbr && !master_had_triggers) || + slave_run_triggers_for_rbr == SLAVE_RUN_TRIGGERS_FOR_RBR_ENFORCE; + } +#endif + +protected: + /* + The constructors are protected since you're supposed to inherit + this class, not create instances of this class. + */ +#ifdef MYSQL_SERVER + Rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional, + Log_event_type event_type); +#endif + Rows_log_event(const uchar *row_data, uint event_len, + const Format_description_log_event *description_event); + void uncompress_buf(); + +#ifdef MYSQL_CLIENT + bool print_helper(FILE *, PRINT_EVENT_INFO *, char const *const name); +#endif + +#ifdef MYSQL_SERVER + virtual int do_add_row_data(uchar *data, size_t length); +#endif + +#ifdef MYSQL_SERVER + TABLE *m_table; /* The table the rows belong to */ +#endif + ulonglong m_table_id; /* Table ID */ + MY_BITMAP m_cols; /* Bitmap denoting columns available */ + ulong m_width; /* The width of the columns bitmap */ + /* + Bitmap for columns available in the after image, if present. These + fields are only available for Update_rows events. Observe that the + width of both the before image COLS vector and the after image + COLS vector is the same: the number of columns of the table on the + master. + */ + MY_BITMAP m_cols_ai; + + ulong m_master_reclength; /* Length of record on master side */ + + /* Bit buffers in the same memory as the class */ + uint32 m_bitbuf[128/(sizeof(uint32)*8)]; + uint32 m_bitbuf_ai[128/(sizeof(uint32)*8)]; + + uchar *m_rows_buf; /* The rows in packed format */ + uchar *m_rows_cur; /* One-after the end of the data */ + uchar *m_rows_end; /* One-after the end of the allocated space */ + + size_t m_rows_before_size; /* The length before m_rows_buf */ + size_t m_flags_pos; /* The position of the m_flags */ + + flag_set m_flags; /* Flags for row-level events */ + + Log_event_type m_type; /* Actual event type */ + + uchar *m_extra_row_data; /* Pointer to extra row data if any */ + /* If non null, first byte is length */ + + bool m_vers_from_plain; + + + /* helper functions */ + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + const uchar *m_curr_row; /* Start of the row being processed */ + const uchar *m_curr_row_end; /* One-after the end of the current row */ + uchar *m_key; /* Buffer to keep key value during searches */ + KEY *m_key_info; /* Pointer to KEY info for m_key_nr */ + uint m_key_nr; /* Key number */ + bool master_had_triggers; /* set after tables opening */ + + /* + RAII helper class to automatically handle the override/restore of thd->db + when applying row events, so it will be visible in SHOW PROCESSLIST. + + If triggers will be invoked, their logic frees the current thread's db, + so we use set_db() to use a copy of the table share's database. + + If not using triggers, the db is never freed, and we can reference the + same memory owned by the table share. + */ + class Db_restore_ctx + { + private: + THD *thd; + LEX_CSTRING restore_db; + bool db_copied; + + Db_restore_ctx(Rows_log_event *rev) + : thd(rev->thd), restore_db(rev->thd->db) + { + TABLE *table= rev->m_table; + + if (table->triggers && rev->do_invoke_trigger()) + { + thd->reset_db(&null_clex_str); + thd->set_db(&table->s->db); + db_copied= true; + } + else + { + thd->reset_db(&table->s->db); + db_copied= false; + } + } + + ~Db_restore_ctx() + { + if (db_copied) + thd->set_db(&null_clex_str); + thd->reset_db(&restore_db); + } + + friend class Rows_log_event; + }; + + int find_key(); // Find a best key to use in find_row() + int find_row(rpl_group_info *); + int write_row(rpl_group_info *, const bool); + int update_sequence(); + + // Unpack the current row into m_table->record[0], but with + // a different columns bitmap. + int unpack_current_row(rpl_group_info *rgi, MY_BITMAP const *cols) + { + DBUG_ASSERT(m_table); + + ASSERT_OR_RETURN_ERROR(m_curr_row <= m_rows_end, HA_ERR_CORRUPT_EVENT); + return ::unpack_row(rgi, m_table, m_width, m_curr_row, cols, + &m_curr_row_end, &m_master_reclength, m_rows_end); + } + + // Unpack the current row into m_table->record[0] + int unpack_current_row(rpl_group_info *rgi) + { + DBUG_ASSERT(m_table); + + ASSERT_OR_RETURN_ERROR(m_curr_row <= m_rows_end, HA_ERR_CORRUPT_EVENT); + return ::unpack_row(rgi, m_table, m_width, m_curr_row, &m_cols, + &m_curr_row_end, &m_master_reclength, m_rows_end); + } + bool process_triggers(trg_event_type event, + trg_action_time_type time_type, + bool old_row_is_record1); + + /** + Helper function to check whether there is an auto increment + column on the table where the event is to be applied. + + @return true if there is an autoincrement field on the extra + columns, false otherwise. + */ + inline bool is_auto_inc_in_extra_columns() + { + DBUG_ASSERT(m_table); + return (m_table->next_number_field && + m_table->next_number_field->field_index >= m_width); + } +#endif + +private: + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); + + /* + Primitive to prepare for a sequence of row executions. + + DESCRIPTION + + Before doing a sequence of do_prepare_row() and do_exec_row() + calls, this member function should be called to prepare for the + entire sequence. Typically, this member function will allocate + space for any buffers that are needed for the two member + functions mentioned above. + + RETURN VALUE + + The member function will return 0 if all went OK, or a non-zero + error code otherwise. + */ + virtual + int do_before_row_operations(const Slave_reporting_capability *const log) = 0; + + /* + Primitive to clean up after a sequence of row executions. + + DESCRIPTION + + After doing a sequence of do_prepare_row() and do_exec_row(), + this member function should be called to clean up and release + any allocated buffers. + + The error argument, if non-zero, indicates an error which happened during + row processing before this function was called. In this case, even if + function is successful, it should return the error code given in the argument. + */ + virtual + int do_after_row_operations(const Slave_reporting_capability *const log, + int error) = 0; + + /* + Primitive to do the actual execution necessary for a row. + + DESCRIPTION + The member function will do the actual execution needed to handle a row. + The row is located at m_curr_row. When the function returns, + m_curr_row_end should point at the next row (one byte after the end + of the current row). + + RETURN VALUE + 0 if execution succeeded, 1 if execution failed. + + */ + virtual int do_exec_row(rpl_group_info *rli) = 0; +#endif /* defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) */ + + friend class Old_rows_log_event; +}; + +/** + @class Write_rows_log_event + + Log row insertions and updates. The event contain several + insert/update rows for a table. Note that each event contains only + rows for one table. + + @section Write_rows_log_event_binary_format Binary Format +*/ +class Write_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = WRITE_ROWS_EVENT + }; + +#if defined(MYSQL_SERVER) + Write_rows_log_event(THD*, TABLE*, ulong table_id, + bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Write_rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if defined(MYSQL_SERVER) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + const uchar *before_record + __attribute__((unused)), + const uchar *after_record) + { + DBUG_ASSERT(!table->versioned(VERS_TRX_ID)); + return thd->binlog_write_row(table, is_transactional, after_record); + } +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + uint8 get_trg_event_map(); +#endif + +private: + virtual Log_event_type get_general_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_before_row_operations(const Slave_reporting_capability *const); + virtual int do_after_row_operations(const Slave_reporting_capability *const,int); + virtual int do_exec_row(rpl_group_info *); +#endif +}; + +class Write_rows_compressed_log_event : public Write_rows_log_event +{ +public: +#if defined(MYSQL_SERVER) + Write_rows_compressed_log_event(THD*, TABLE*, ulong table_id, + bool is_transactional); + virtual bool write(); +#endif +#ifdef HAVE_REPLICATION + Write_rows_compressed_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +private: +#if defined(MYSQL_CLIENT) + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif +}; + +/** + @class Update_rows_log_event + + Log row updates with a before image. The event contain several + update rows for a table. Note that each event contains only rows for + one table. + + Also note that the row data consists of pairs of row data: one row + for the old data and one row for the new data. + + @section Update_rows_log_event_binary_format Binary Format +*/ +class Update_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = UPDATE_ROWS_EVENT + }; + +#ifdef MYSQL_SERVER + Update_rows_log_event(THD*, TABLE*, ulong table_id, + bool is_transactional); + + void init(MY_BITMAP const *cols); +#endif + + virtual ~Update_rows_log_event(); + +#ifdef HAVE_REPLICATION + Update_rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + +#ifdef MYSQL_SERVER + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + const uchar *before_record, + const uchar *after_record) + { + DBUG_ASSERT(!table->versioned(VERS_TRX_ID)); + return thd->binlog_update_row(table, is_transactional, + before_record, after_record); + } +#endif + + virtual bool is_valid() const + { + return Rows_log_event::is_valid() && m_cols_ai.bitmap; + } + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + uint8 get_trg_event_map(); +#endif + +protected: + virtual Log_event_type get_general_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_before_row_operations(const Slave_reporting_capability *const); + virtual int do_after_row_operations(const Slave_reporting_capability *const,int); + virtual int do_exec_row(rpl_group_info *); +#endif /* defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) */ +}; + +class Update_rows_compressed_log_event : public Update_rows_log_event +{ +public: +#if defined(MYSQL_SERVER) + Update_rows_compressed_log_event(THD*, TABLE*, ulong table_id, + bool is_transactional); + virtual bool write(); +#endif +#ifdef HAVE_REPLICATION + Update_rows_compressed_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +private: +#if defined(MYSQL_CLIENT) + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif +}; + +/** + @class Delete_rows_log_event + + Log row deletions. The event contain several delete rows for a + table. Note that each event contains only rows for one table. + + RESPONSIBILITIES + + - Act as a container for rows that has been deleted on the master + and should be deleted on the slave. + + COLLABORATION + + Row_writer + Create the event and add rows to the event. + Row_reader + Extract the rows from the event. + + @section Delete_rows_log_event_binary_format Binary Format +*/ +class Delete_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = DELETE_ROWS_EVENT + }; + +#ifdef MYSQL_SERVER + Delete_rows_log_event(THD*, TABLE*, ulong, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Delete_rows_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#ifdef MYSQL_SERVER + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + const uchar *before_record, + const uchar *after_record + __attribute__((unused))) + { + DBUG_ASSERT(!table->versioned(VERS_TRX_ID)); + return thd->binlog_delete_row(table, is_transactional, + before_record); + } +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + uint8 get_trg_event_map(); +#endif + +protected: + virtual Log_event_type get_general_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_before_row_operations(const Slave_reporting_capability *const); + virtual int do_after_row_operations(const Slave_reporting_capability *const,int); + virtual int do_exec_row(rpl_group_info *); +#endif +}; + +class Delete_rows_compressed_log_event : public Delete_rows_log_event +{ +public: +#if defined(MYSQL_SERVER) + Delete_rows_compressed_log_event(THD*, TABLE*, ulong, bool is_transactional); + virtual bool write(); +#endif +#ifdef HAVE_REPLICATION + Delete_rows_compressed_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +private: +#if defined(MYSQL_CLIENT) + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif +}; + + +#include "log_event_old.h" + +/** + @class Incident_log_event + + Class representing an incident, an occurence out of the ordinary, + that happened on the master. + + The event is used to inform the slave that something out of the + ordinary happened on the master that might cause the database to be + in an inconsistent state. + + + + + + + + + + + + + + + + + + + + + + + +
Incident event format
SymbolFormatDescription
INCIDENT2Incident number as an unsigned integer
MSGLEN1Message length as an unsigned integer
MESSAGEMSGLENThe message, if present. Not null terminated.
+ + @section Delete_rows_log_event_binary_format Binary Format +*/ +class Incident_log_event : public Log_event { +public: +#ifdef MYSQL_SERVER + Incident_log_event(THD *thd_arg, Incident incident) + : Log_event(thd_arg, 0, FALSE), m_incident(incident) + { + DBUG_ENTER("Incident_log_event::Incident_log_event"); + DBUG_PRINT("enter", ("m_incident: %d", m_incident)); + m_message.str= NULL; /* Just as a precaution */ + m_message.length= 0; + set_direct_logging(); + /* Replicate the incident regardless of @@skip_replication. */ + flags&= ~LOG_EVENT_SKIP_REPLICATION_F; + DBUG_VOID_RETURN; + } + + Incident_log_event(THD *thd_arg, Incident incident, const LEX_CSTRING *msg) + : Log_event(thd_arg, 0, FALSE), m_incident(incident) + { + extern PSI_memory_key key_memory_Incident_log_event_message; + DBUG_ENTER("Incident_log_event::Incident_log_event"); + DBUG_PRINT("enter", ("m_incident: %d", m_incident)); + m_message.length= 0; + if (!(m_message.str= (char*) my_malloc(key_memory_Incident_log_event_message, + msg->length + 1, MYF(MY_WME)))) + { + /* Mark this event invalid */ + m_incident= INCIDENT_NONE; + DBUG_VOID_RETURN; + } + strmake(m_message.str, msg->str, msg->length); + m_message.length= msg->length; + set_direct_logging(); + /* Replicate the incident regardless of @@skip_replication. */ + flags&= ~LOG_EVENT_SKIP_REPLICATION_F; + DBUG_VOID_RETURN; + } +#endif + +#ifdef MYSQL_SERVER + void pack_info(Protocol*); + + virtual bool write_data_header(); + virtual bool write_data_body(); +#endif + + Incident_log_event(const uchar *buf, uint event_len, + const Format_description_log_event *descr_event); + + virtual ~Incident_log_event(); + +#ifdef MYSQL_CLIENT + virtual bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); +#endif + + virtual Log_event_type get_type_code() { return INCIDENT_EVENT; } + + virtual bool is_valid() const + { + return m_incident > INCIDENT_NONE && m_incident < INCIDENT_COUNT; + } + virtual int get_data_size() { + return INCIDENT_HEADER_LEN + 1 + (uint) m_message.length; + } + +private: + const char *description() const; + + Incident m_incident; + LEX_STRING m_message; +}; + +/** + @class Ignorable_log_event + + Base class for ignorable log events. Events deriving from + this class can be safely ignored by slaves that cannot + recognize them. Newer slaves, will be able to read and + handle them. This has been designed to be an open-ended + architecture, so adding new derived events shall not harm + the old slaves that support ignorable log event mechanism + (they will just ignore unrecognized ignorable events). + + @note The only thing that makes an event ignorable is that it has + the LOG_EVENT_IGNORABLE_F flag set. It is not strictly necessary + that ignorable event types derive from Ignorable_log_event; they may + just as well derive from Log_event and pass LOG_EVENT_IGNORABLE_F as + argument to the Log_event constructor. +**/ + +class Ignorable_log_event : public Log_event { +public: + int number; + const char *description; + +#ifndef MYSQL_CLIENT + Ignorable_log_event(THD *thd_arg) + :Log_event(thd_arg, LOG_EVENT_IGNORABLE_F, FALSE), + number(0), description("internal") + { + DBUG_ENTER("Ignorable_log_event::Ignorable_log_event"); + DBUG_VOID_RETURN; + } +#endif + + Ignorable_log_event(const uchar *buf, + const Format_description_log_event *descr_event, + const char *event_name); + virtual ~Ignorable_log_event(); + +#ifndef MYSQL_CLIENT + void pack_info(Protocol*); +#endif + +#ifdef MYSQL_CLIENT + virtual bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + + virtual Log_event_type get_type_code() { return IGNORABLE_LOG_EVENT; } + + virtual bool is_valid() const { return 1; } + + virtual int get_data_size() { return IGNORABLE_HEADER_LEN; } +}; + +#ifdef MYSQL_CLIENT +bool copy_cache_to_string_wrapped(IO_CACHE *body, + LEX_STRING *to, + bool do_wrap, + const char *delimiter, + bool is_verbose); +bool copy_cache_to_file_wrapped(IO_CACHE *body, + FILE *file, + bool do_wrap, + const char *delimiter, + bool is_verbose); +#endif + +#ifdef MYSQL_SERVER +/***************************************************************************** + + Heartbeat Log Event class + + Replication event to ensure to slave that master is alive. + The event is originated by master's dump thread and sent straight to + slave without being logged. Slave itself does not store it in relay log + but rather uses a data for immediate checks and throws away the event. + + Two members of the class log_ident and Log_event::log_pos comprise + @see the event_coordinates instance. The coordinates that a heartbeat + instance carries correspond to the last event master has sent from + its binlog. + + ****************************************************************************/ +class Heartbeat_log_event: public Log_event +{ +public: + uint8 hb_flags; + Heartbeat_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event); + Log_event_type get_type_code() { return HEARTBEAT_LOG_EVENT; } + bool is_valid() const + { + return (log_ident != NULL && ident_len <= FN_REFLEN-1 && + log_pos >= BIN_LOG_HEADER_SIZE); + } + const uchar * get_log_ident() { return log_ident; } + uint get_ident_len() { return ident_len; } + +private: + uint ident_len; + const uchar *log_ident; +}; + +inline int Log_event_writer::write(Log_event *ev) +{ + ev->writer= this; + int res= ev->write(); + IF_DBUG(ev->writer= 0,); // writer must be set before every Log_event::write + add_status(ev->logged_status()); + return res; +} + +/** + The function is called by slave applier in case there are + active table filtering rules to force gathering events associated + with Query-log-event into an array to execute + them once the fate of the Query is determined for execution. +*/ +bool slave_execute_deferred_events(THD *thd); +#endif + +bool event_that_should_be_ignored(const uchar *buf); +bool event_checksum_test(uchar *buf, ulong event_len, + enum_binlog_checksum_alg alg); +enum enum_binlog_checksum_alg get_checksum_alg(const uchar *buf, ulong len); +extern TYPELIB binlog_checksum_typelib; +#ifdef WITH_WSREP +enum Log_event_type wsrep_peak_event(rpl_group_info *rgi, ulonglong* event_size); +#endif /* WITH_WSREP */ + +/** + @} (end of group Replication) +*/ + + +int binlog_buf_compress(const uchar *src, uchar *dst, uint32 len, + uint32 *comlen); +int binlog_buf_uncompress(const uchar *src, uchar *dst, uint32 len, + uint32 *newlen); +uint32 binlog_get_compress_len(uint32 len); +uint32 binlog_get_uncompress_len(const uchar *buf); + +int query_event_uncompress(const Format_description_log_event *description_event, + bool contain_checksum, + const uchar *src, ulong src_len, uchar *buf, + ulong buf_size, bool* is_malloc, + uchar **dst, ulong *newlen); +int row_log_event_uncompress(const Format_description_log_event + *description_event, + bool contain_checksum, + const uchar *src, ulong src_len, + uchar* buf, ulong buf_size, bool *is_malloc, + uchar **dst, ulong *newlen); + +bool is_parallel_retry_error(rpl_group_info *rgi, int err); + +/* + Compares two GTIDs to facilitate sorting a GTID list log event by domain id + (ascending) and sequence number (ascending) +*/ +int compare_glle_gtids(const void * _gtid1, const void *_gtid2); + +#endif /* _log_event_h */ diff --git a/sql/log_event_client.cc b/sql/log_event_client.cc new file mode 100644 index 00000000..4ae8bffc --- /dev/null +++ b/sql/log_event_client.cc @@ -0,0 +1,4011 @@ +/* + Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + + +#include "log_event.h" +#ifndef MYSQL_CLIENT +#error MYSQL_CLIENT must be defined here +#endif + +#ifdef MYSQL_SERVER +#error MYSQL_SERVER must not be defined here +#endif + + +static bool pretty_print_str(IO_CACHE* cache, const char* str, + size_t len, bool identifier) +{ + const char* end = str + len; + if (my_b_write_byte(cache, identifier ? '`' : '\'')) + goto err; + + while (str < end) + { + char c; + int error; + + switch ((c=*str++)) { + case '\n': error= my_b_write(cache, (uchar*)"\\n", 2); break; + case '\r': error= my_b_write(cache, (uchar*)"\\r", 2); break; + case '\\': error= my_b_write(cache, (uchar*)"\\\\", 2); break; + case '\b': error= my_b_write(cache, (uchar*)"\\b", 2); break; + case '\t': error= my_b_write(cache, (uchar*)"\\t", 2); break; + case '\'': error= my_b_write(cache, (uchar*)"\\'", 2); break; + case 0 : error= my_b_write(cache, (uchar*)"\\0", 2); break; + default: + error= my_b_write_byte(cache, c); + break; + } + if (unlikely(error)) + goto err; + } + return my_b_write_byte(cache, identifier ? '`' : '\''); + +err: + return 1; +} + +/** + Print src as an string enclosed with "'" + + @param[out] cache IO_CACHE where the string will be printed. + @param[in] str the string will be printed. + @param[in] len length of the string. +*/ +static inline bool pretty_print_str(IO_CACHE* cache, const char* str, + size_t len) +{ + return pretty_print_str(cache, str, len, false); +} + +/** + Print src as an identifier enclosed with "`" + + @param[out] cache IO_CACHE where the identifier will be printed. + @param[in] str the string will be printed. + @param[in] len length of the string. + */ +static inline bool pretty_print_identifier(IO_CACHE* cache, const char* str, + size_t len) +{ + return pretty_print_str(cache, str, len, true); +} + + + +/** + Prints a "session_var=value" string. Used by mysqlbinlog to print some SET + commands just before it prints a query. +*/ + +static bool print_set_option(IO_CACHE* file, uint32 bits_changed, + uint32 option, uint32 flags, const char* name, + bool* need_comma) +{ + if (bits_changed & option) + { + if (*need_comma) + if (my_b_write(file, (uchar*)", ", 2)) + goto err; + if (my_b_printf(file, "%s=%d", name, MY_TEST(flags & option))) + goto err; + *need_comma= 1; + } + return 0; +err: + return 1; +} + + +static bool hexdump_minimal_header_to_io_cache(IO_CACHE *file, + my_off_t offset, + uchar *ptr) +{ + DBUG_ASSERT(LOG_EVENT_MINIMAL_HEADER_LEN == 19); + + /* + Pretty-print the first LOG_EVENT_MINIMAL_HEADER_LEN (19) bytes of the + common header, which contains the basic information about the log event. + Every event will have at least this much header, but events could contain + more headers (which must be printed by other methods, if desired). + */ + char emit_buf[120]; // Enough for storing one line + size_t emit_buf_written; + + if (my_b_printf(file, + "# " + "|Timestamp " + "|Type " + "|Master ID " + "|Size " + "|Master Pos " + "|Flags\n")) + goto err; + emit_buf_written= + my_snprintf(emit_buf, sizeof(emit_buf), + "# %8llx " /* Position */ + "|%02x %02x %02x %02x " /* Timestamp */ + "|%02x " /* Type */ + "|%02x %02x %02x %02x " /* Master ID */ + "|%02x %02x %02x %02x " /* Size */ + "|%02x %02x %02x %02x " /* Master Pos */ + "|%02x %02x\n", /* Flags */ + (ulonglong) offset, /* Position */ + ptr[0], ptr[1], ptr[2], ptr[3], /* Timestamp */ + ptr[4], /* Type */ + ptr[5], ptr[6], ptr[7], ptr[8], /* Master ID */ + ptr[9], ptr[10], ptr[11], ptr[12], /* Size */ + ptr[13], ptr[14], ptr[15], ptr[16], /* Master Pos */ + ptr[17], ptr[18]); /* Flags */ + + DBUG_ASSERT(static_cast(emit_buf_written) < sizeof(emit_buf)); + if (my_b_write(file, reinterpret_cast(emit_buf), emit_buf_written) || + my_b_write(file, (uchar*)"#\n", 2)) + goto err; + + return 0; +err: + return 1; +} + + +/* + The number of bytes to print per line. Should be an even number, + and "hexdump -C" uses 16, so we'll duplicate that here. +*/ +#define HEXDUMP_BYTES_PER_LINE 16 + +static void format_hex_line(char *emit_buff) +{ + memset(emit_buff + 1, ' ', + 1 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2 + + HEXDUMP_BYTES_PER_LINE); + emit_buff[0]= '#'; + emit_buff[2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 1]= '|'; + emit_buff[2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2 + + HEXDUMP_BYTES_PER_LINE]= '|'; + emit_buff[2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2 + + HEXDUMP_BYTES_PER_LINE + 1]= '\n'; + emit_buff[2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2 + + HEXDUMP_BYTES_PER_LINE + 2]= '\0'; +} + +static bool hexdump_data_to_io_cache(IO_CACHE *file, + my_off_t offset, + uchar *ptr, + my_off_t size) +{ + /* + 2 = '# ' + 8 = address + 2 = ' ' + (HEXDUMP_BYTES_PER_LINE * 3 + 1) = Each byte prints as two hex digits, + plus a space + 2 = ' |' + HEXDUMP_BYTES_PER_LINE = text representation + 2 = '|\n' + 1 = '\0' + */ + char emit_buffer[2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2 + + HEXDUMP_BYTES_PER_LINE + 2 + 1 ]; + char *h,*c; + my_off_t i; + + if (size == 0) + return 0; // ok, nothing to do + + format_hex_line(emit_buffer); + /* + Print the rest of the event (without common header) + */ + my_off_t starting_offset = offset; + for (i= 0, + c= emit_buffer + 2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2, + h= emit_buffer + 2 + 8 + 2; + i < size; + i++, ptr++) + { + my_snprintf(h, 4, "%02x ", *ptr); + h+= 3; + + *c++= my_isprint(&my_charset_bin, *ptr) ? *ptr : '.'; + + /* Print in groups of HEXDUMP_BYTES_PER_LINE characters. */ + if ((i % HEXDUMP_BYTES_PER_LINE) == (HEXDUMP_BYTES_PER_LINE - 1)) + { + /* remove \0 left after printing hex byte representation */ + *h= ' '; + /* prepare space to print address */ + memset(emit_buffer + 2, ' ', 8); + /* print address */ + size_t const emit_buf_written= my_snprintf(emit_buffer + 2, 9, "%8llx", + (ulonglong) starting_offset); + /* remove \0 left after printing address */ + emit_buffer[2 + emit_buf_written]= ' '; + if (my_b_write(file, reinterpret_cast(emit_buffer), + sizeof(emit_buffer) - 1)) + goto err; + c= emit_buffer + 2 + 8 + 2 + (HEXDUMP_BYTES_PER_LINE * 3 + 1) + 2; + h= emit_buffer + 2 + 8 + 2; + format_hex_line(emit_buffer); + starting_offset+= HEXDUMP_BYTES_PER_LINE; + } + else if ((i % (HEXDUMP_BYTES_PER_LINE / 2)) + == ((HEXDUMP_BYTES_PER_LINE / 2) - 1)) + { + /* + In the middle of the group of HEXDUMP_BYTES_PER_LINE, emit an extra + space in the hex string, to make two groups. + */ + *h++= ' '; + } + + } + + /* + There is still data left in our buffer, which means that the previous + line was not perfectly HEXDUMP_BYTES_PER_LINE characters, so write an + incomplete line, with spaces to pad out to the same length as a full + line would be, to make things more readable. + */ + if (h != emit_buffer + 2 + 8 + 2) + { + *h= ' '; + *c++= '|'; *c++= '\n'; + memset(emit_buffer + 2, ' ', 8); + size_t const emit_buf_written= my_snprintf(emit_buffer + 2, 9, "%8llx", + (ulonglong) starting_offset); + emit_buffer[2 + emit_buf_written]= ' '; + /* pad unprinted area */ + memset(h, ' ', + (HEXDUMP_BYTES_PER_LINE * 3 + 1) - (h - (emit_buffer + 2 + 8 + 2))); + if (my_b_write(file, reinterpret_cast(emit_buffer), + c - emit_buffer)) + goto err; + } + if (my_b_write(file, (uchar*)"#\n", 2)) + goto err; + + return 0; +err: + return 1; +} + +static inline bool is_numeric_type(uint type) +{ + switch (type) + { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + return true; + default: + return false; + } + return false; +} + +static inline bool is_character_type(uint type) +{ + switch (type) + { + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_BLOB: + // Base class is blob for geom type + case MYSQL_TYPE_GEOMETRY: + return true; + default: + return false; + } +} + +static inline bool is_enum_or_set_type(uint type) { + return type == MYSQL_TYPE_ENUM || type == MYSQL_TYPE_SET; +} + + +/* + Log_event::print_header() +*/ + +bool Log_event::print_header(IO_CACHE* file, + PRINT_EVENT_INFO* print_event_info, + bool is_more __attribute__((unused))) +{ + char llbuff[22]; + my_off_t hexdump_from= print_event_info->hexdump_from; + DBUG_ENTER("Log_event::print_header"); + + if (my_b_write_byte(file, '#') || + print_timestamp(file) || + my_b_printf(file, " server id %lu end_log_pos %s ", (ulong) server_id, + llstr(log_pos,llbuff))) + goto err; + + /* print the checksum */ + + if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF && + checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) + { + char checksum_buf[BINLOG_CHECKSUM_LEN * 2 + 4]; // to fit to "%p " + size_t const bytes_written= + my_snprintf(checksum_buf, sizeof(checksum_buf), "0x%08x ", crc); + if (my_b_printf(file, "%s ", get_type(&binlog_checksum_typelib, + checksum_alg)) || + my_b_printf(file, checksum_buf, bytes_written)) + goto err; + } + + /* mysqlbinlog --hexdump */ + if (print_event_info->hexdump_from) + { + my_b_write_byte(file, '\n'); + uchar *ptr= (uchar*)temp_buf; + my_off_t size= uint4korr(ptr + EVENT_LEN_OFFSET); + my_off_t hdr_len= get_header_len(print_event_info->common_header_len); + + size-= hdr_len; + + if (my_b_printf(file, "# Position\n")) + goto err; + + /* Write the header, nicely formatted by field. */ + if (hexdump_minimal_header_to_io_cache(file, hexdump_from, ptr)) + goto err; + + ptr+= hdr_len; + hexdump_from+= hdr_len; + + /* Print the rest of the data, mimicking "hexdump -C" output. */ + if (hexdump_data_to_io_cache(file, hexdump_from, ptr, size)) + goto err; + + /* + Prefix the next line so that the output from print_helper() + will appear as a comment. + */ + if (my_b_write(file, (uchar*)"# Event: ", 9)) + goto err; + } + + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + +/** + Prints a quoted string to io cache. + Control characters are displayed as hex sequence, e.g. \x00 + Single-quote and backslash characters are escaped with a \ + + @param[in] file IO cache + @param[in] prt Pointer to string + @param[in] length String length +*/ + +static void +my_b_write_quoted(IO_CACHE *file, const uchar *ptr, uint length) +{ + const uchar *s; + my_b_write_byte(file, '\''); + for (s= ptr; length > 0 ; s++, length--) + { + if (*s > 0x1F) + my_b_write_byte(file, *s); + else if (*s == '\'') + my_b_write(file, (uchar*)"\\'", 2); + else if (*s == '\\') + my_b_write(file, (uchar*)"\\\\", 2); + else + { + uchar hex[10]; + size_t len= my_snprintf((char*) hex, sizeof(hex), "%s%02x", "\\x", *s); + my_b_write(file, hex, len); + } + } + my_b_write_byte(file, '\''); +} + + +/** + Prints a bit string to io cache in format b'1010'. + + @param[in] file IO cache + @param[in] ptr Pointer to string + @param[in] nbits Number of bits +*/ +static void +my_b_write_bit(IO_CACHE *file, const uchar *ptr, uint nbits) +{ + uint bitnum, nbits8= ((nbits + 7) / 8) * 8, skip_bits= nbits8 - nbits; + my_b_write(file, (uchar*)"b'", 2); + for (bitnum= skip_bits ; bitnum < nbits8; bitnum++) + { + int is_set= (ptr[(bitnum) / 8] >> (7 - bitnum % 8)) & 0x01; + my_b_write_byte(file, (is_set ? '1' : '0')); + } + my_b_write_byte(file, '\''); +} + + +/** + Prints a packed string to io cache. + The string consists of length packed to 1 or 2 bytes, + followed by string data itself. + + @param[in] file IO cache + @param[in] ptr Pointer to string + @param[in] length String size + + @retval - number of bytes scanned. +*/ +static size_t +my_b_write_quoted_with_length(IO_CACHE *file, const uchar *ptr, uint length) +{ + if (length < 256) + { + length= *ptr; + my_b_write_quoted(file, ptr + 1, length); + return length + 1; + } + else + { + length= uint2korr(ptr); + my_b_write_quoted(file, ptr + 2, length); + return length + 2; + } +} + + +/** + Prints a 32-bit number in both signed and unsigned representation + + @param[in] file IO cache + @param[in] sl Signed number + @param[in] ul Unsigned number +*/ +static bool +my_b_write_sint32_and_uint32(IO_CACHE *file, int32 si, uint32 ui) +{ + bool res= my_b_printf(file, "%d", si); + if (si < 0) + if (my_b_printf(file, " (%u)", ui)) + res= 1; + return res; +} + + +/** + Print a packed value of the given SQL type into IO cache + + @param[in] file IO cache + @param[in] ptr Pointer to string + @param[in] type Column type + @param[in] meta Column meta information + @param[out] typestr SQL type string buffer (for verbose output) + @param[out] typestr_length Size of typestr + + @retval - number of bytes scanned from ptr. + Except in case of NULL, in which case we return 1 to indicate ok +*/ + +static size_t +log_event_print_value(IO_CACHE *file, PRINT_EVENT_INFO *print_event_info, + const uchar *ptr, uint type, uint meta, + char *typestr, size_t typestr_length) +{ + uint32 length= 0; + + if (type == MYSQL_TYPE_STRING) + { + if (meta >= 256) + { + uint byte0= meta >> 8; + uint byte1= meta & 0xFF; + + if ((byte0 & 0x30) != 0x30) + { + /* a long CHAR() field: see #37426 */ + length= byte1 | (((byte0 & 0x30) ^ 0x30) << 4); + type= byte0 | 0x30; + } + else + length = meta & 0xFF; + } + else + length= meta; + } + + switch (type) { + case MYSQL_TYPE_LONG: + { + strmake(typestr, "INT", typestr_length); + if (!ptr) + goto return_null; + + int32 si= sint4korr(ptr); + uint32 ui= uint4korr(ptr); + my_b_write_sint32_and_uint32(file, si, ui); + return 4; + } + + case MYSQL_TYPE_TINY: + { + strmake(typestr, "TINYINT", typestr_length); + if (!ptr) + goto return_null; + + my_b_write_sint32_and_uint32(file, (int) (signed char) *ptr, + (uint) (unsigned char) *ptr); + return 1; + } + + case MYSQL_TYPE_SHORT: + { + strmake(typestr, "SHORTINT", typestr_length); + if (!ptr) + goto return_null; + + int32 si= (int32) sint2korr(ptr); + uint32 ui= (uint32) uint2korr(ptr); + my_b_write_sint32_and_uint32(file, si, ui); + return 2; + } + + case MYSQL_TYPE_INT24: + { + strmake(typestr, "MEDIUMINT", typestr_length); + if (!ptr) + goto return_null; + + int32 si= sint3korr(ptr); + uint32 ui= uint3korr(ptr); + my_b_write_sint32_and_uint32(file, si, ui); + return 3; + } + + case MYSQL_TYPE_LONGLONG: + { + strmake(typestr, "LONGINT", typestr_length); + if (!ptr) + goto return_null; + + char tmp[64]; + size_t length; + longlong si= sint8korr(ptr); + length= (longlong10_to_str(si, tmp, -10) - tmp); + my_b_write(file, (uchar*)tmp, length); + if (si < 0) + { + ulonglong ui= uint8korr(ptr); + longlong10_to_str((longlong) ui, tmp, 10); + my_b_printf(file, " (%s)", tmp); + } + return 8; + } + + case MYSQL_TYPE_NEWDECIMAL: + { + uint precision= meta >> 8; + uint decimals= meta & 0xFF; + my_snprintf(typestr, typestr_length, "DECIMAL(%d,%d)", + precision, decimals); + if (!ptr) + goto return_null; + + uint bin_size= my_decimal_get_binary_size(precision, decimals); + my_decimal dec((const uchar *) ptr, precision, decimals); + int length= DECIMAL_MAX_STR_LENGTH; + char buff[DECIMAL_MAX_STR_LENGTH + 1]; + decimal2string(&dec, buff, &length, 0, 0, 0); + my_b_write(file, (uchar*)buff, length); + return bin_size; + } + + case MYSQL_TYPE_FLOAT: + { + strmake(typestr, "FLOAT", typestr_length); + if (!ptr) + goto return_null; + + float fl; + float4get(fl, ptr); + char tmp[320]; + sprintf(tmp, "%-20g", (double) fl); + my_b_printf(file, "%s", tmp); /* my_snprintf doesn't support %-20g */ + return 4; + } + + case MYSQL_TYPE_DOUBLE: + { + double dbl; + strmake(typestr, "DOUBLE", typestr_length); + if (!ptr) + goto return_null; + + float8get(dbl, ptr); + char tmp[320]; + sprintf(tmp, "%-.20g", dbl); /* strmake doesn't support %-20g */ + my_b_printf(file, tmp, "%s"); + return 8; + } + + case MYSQL_TYPE_BIT: + { + /* Meta-data: bit_len, bytes_in_rec, 2 bytes */ + uint nbits= ((meta >> 8) * 8) + (meta & 0xFF); + my_snprintf(typestr, typestr_length, "BIT(%d)", nbits); + if (!ptr) + goto return_null; + + length= (nbits + 7) / 8; + my_b_write_bit(file, ptr, nbits); + return length; + } + + case MYSQL_TYPE_TIMESTAMP: + { + strmake(typestr, "TIMESTAMP", typestr_length); + if (!ptr) + goto return_null; + + uint32 i32= uint4korr(ptr); + my_b_printf(file, "%d", i32); + return 4; + } + + case MYSQL_TYPE_TIMESTAMP2: + { + my_snprintf(typestr, typestr_length, "TIMESTAMP(%d)", meta); + if (!ptr) + goto return_null; + + char buf[MAX_DATE_STRING_REP_LENGTH]; + struct timeval tm; + my_timestamp_from_binary(&tm, ptr, meta); + int buflen= my_timeval_to_str(&tm, buf, meta); + my_b_write(file, (uchar*)buf, buflen); + return my_timestamp_binary_length(meta); + } + + case MYSQL_TYPE_DATETIME: + { + strmake(typestr, "DATETIME", typestr_length); + if (!ptr) + goto return_null; + + ulong d, t; + uint64 i64= uint8korr(ptr); /* YYYYMMDDhhmmss */ + d= (ulong) (i64 / 1000000); + t= (ulong) (i64 % 1000000); + + my_b_printf(file, "'%04d-%02d-%02d %02d:%02d:%02d'", + (int) (d / 10000), (int) (d % 10000) / 100, (int) (d % 100), + (int) (t / 10000), (int) (t % 10000) / 100, (int) t % 100); + return 8; + } + + case MYSQL_TYPE_DATETIME2: + { + my_snprintf(typestr, typestr_length, "DATETIME(%d)", meta); + if (!ptr) + goto return_null; + + char buf[MAX_DATE_STRING_REP_LENGTH]; + MYSQL_TIME ltime; + longlong packed= my_datetime_packed_from_binary(ptr, meta); + TIME_from_longlong_datetime_packed(<ime, packed); + int buflen= my_datetime_to_str(<ime, buf, meta); + my_b_write_quoted(file, (uchar *) buf, buflen); + return my_datetime_binary_length(meta); + } + + case MYSQL_TYPE_TIME: + { + strmake(typestr, "TIME", typestr_length); + if (!ptr) + goto return_null; + + int32 tmp= sint3korr(ptr); + int32 i32= tmp >= 0 ? tmp : - tmp; + const char *sign= tmp < 0 ? "-" : ""; + my_b_printf(file, "'%s%02d:%02d:%02d'", + sign, i32 / 10000, (i32 % 10000) / 100, i32 % 100, i32); + return 3; + } + + case MYSQL_TYPE_TIME2: + { + my_snprintf(typestr, typestr_length, "TIME(%d)", meta); + if (!ptr) + goto return_null; + + char buf[MAX_DATE_STRING_REP_LENGTH]; + MYSQL_TIME ltime; + longlong packed= my_time_packed_from_binary(ptr, meta); + TIME_from_longlong_time_packed(<ime, packed); + int buflen= my_time_to_str(<ime, buf, meta); + my_b_write_quoted(file, (uchar *) buf, buflen); + return my_time_binary_length(meta); + } + + case MYSQL_TYPE_NEWDATE: + { + strmake(typestr, "DATE", typestr_length); + if (!ptr) + goto return_null; + + uint32 tmp= uint3korr(ptr); + int part; + char buf[11]; + char *pos= &buf[10]; // start from '\0' to the beginning + + /* Copied from field.cc */ + *pos--=0; // End NULL + part=(int) (tmp & 31); + *pos--= (char) ('0'+part%10); + *pos--= (char) ('0'+part/10); + *pos--= ':'; + part=(int) (tmp >> 5 & 15); + *pos--= (char) ('0'+part%10); + *pos--= (char) ('0'+part/10); + *pos--= ':'; + part=(int) (tmp >> 9); + *pos--= (char) ('0'+part%10); part/=10; + *pos--= (char) ('0'+part%10); part/=10; + *pos--= (char) ('0'+part%10); part/=10; + *pos= (char) ('0'+part); + my_b_printf(file , "'%s'", buf); + return 3; + } + + case MYSQL_TYPE_DATE: + { + strmake(typestr, "DATE", typestr_length); + if (!ptr) + goto return_null; + + uint i32= uint3korr(ptr); + my_b_printf(file , "'%04d:%02d:%02d'", + (int)(i32 / (16L * 32L)), (int)(i32 / 32L % 16L), + (int)(i32 % 32L)); + return 3; + } + + case MYSQL_TYPE_YEAR: + { + strmake(typestr, "YEAR", typestr_length); + if (!ptr) + goto return_null; + + uint32 i32= *ptr; + my_b_printf(file, "%04d", i32+ 1900); + return 1; + } + + case MYSQL_TYPE_ENUM: + switch (meta & 0xFF) { + case 1: + strmake(typestr, "ENUM(1 byte)", typestr_length); + if (!ptr) + goto return_null; + + my_b_printf(file, "%d", (int) *ptr); + return 1; + case 2: + { + strmake(typestr, "ENUM(2 bytes)", typestr_length); + if (!ptr) + goto return_null; + + int32 i32= uint2korr(ptr); + my_b_printf(file, "%d", i32); + return 2; + } + default: + my_b_printf(file, "!! Unknown ENUM packlen=%d", meta & 0xFF); + return 0; + } + break; + + case MYSQL_TYPE_SET: + my_snprintf(typestr, typestr_length, "SET(%d bytes)", meta & 0xFF); + if (!ptr) + goto return_null; + + my_b_write_bit(file, ptr , (meta & 0xFF) * 8); + return meta & 0xFF; + + case MYSQL_TYPE_BLOB_COMPRESSED: + case MYSQL_TYPE_BLOB: + switch (meta) { + case 1: + my_snprintf(typestr, typestr_length, "TINYBLOB/TINYTEXT%s", + type == MYSQL_TYPE_BLOB_COMPRESSED ? " COMPRESSED" : ""); + if (!ptr) + goto return_null; + + length= *ptr; + my_b_write_quoted(file, ptr + 1, length); + return length + 1; + case 2: + my_snprintf(typestr, typestr_length, "BLOB/TEXT%s", + type == MYSQL_TYPE_BLOB_COMPRESSED ? " COMPRESSED" : ""); + if (!ptr) + goto return_null; + + length= uint2korr(ptr); + my_b_write_quoted(file, ptr + 2, length); + return length + 2; + case 3: + my_snprintf(typestr, typestr_length, "MEDIUMBLOB/MEDIUMTEXT%s", + type == MYSQL_TYPE_BLOB_COMPRESSED ? " COMPRESSED" : ""); + if (!ptr) + goto return_null; + + length= uint3korr(ptr); + my_b_write_quoted(file, ptr + 3, length); + return length + 3; + case 4: + my_snprintf(typestr, typestr_length, "LONGBLOB/LONGTEXT%s", + type == MYSQL_TYPE_BLOB_COMPRESSED ? " COMPRESSED" : ""); + if (!ptr) + goto return_null; + + length= uint4korr(ptr); + my_b_write_quoted(file, ptr + 4, length); + return length + 4; + default: + my_b_printf(file, "!! Unknown BLOB packlen=%d", length); + return 0; + } + + case MYSQL_TYPE_VARCHAR_COMPRESSED: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + length= meta; + my_snprintf(typestr, typestr_length, "VARSTRING(%d)%s", length, + type == MYSQL_TYPE_VARCHAR_COMPRESSED ? " COMPRESSED" : ""); + if (!ptr) + goto return_null; + + return my_b_write_quoted_with_length(file, ptr, length); + + case MYSQL_TYPE_STRING: + my_snprintf(typestr, typestr_length, "STRING(%d)", length); + if (!ptr) + goto return_null; + + return my_b_write_quoted_with_length(file, ptr, length); + + case MYSQL_TYPE_DECIMAL: + print_event_info->flush_for_error(); + fprintf(stderr, "\nError: Found Old DECIMAL (mysql-4.1 or earlier). " + "Not enough metadata to display the value.\n"); + break; + + case MYSQL_TYPE_GEOMETRY: + strmake(typestr, "GEOMETRY", typestr_length); + if (!ptr) + goto return_null; + + length= uint4korr(ptr); + my_b_write_quoted(file, ptr + meta, length); + return length + meta; + + default: + print_event_info->flush_for_error(); + fprintf(stderr, + "\nError: Don't know how to handle column type: %d meta: %d (%04x)\n", + type, meta, meta); + break; + } + *typestr= 0; + return 0; + +return_null: + return my_b_write(file, (uchar*) "NULL", 4) ? 0 : 1; +} + + +/** + Print a packed row into IO cache + + @param[in] file IO cache + @param[in] td Table definition + @param[in] print_event_into Print parameters + @param[in] cols_bitmap Column bitmaps. + @param[in] value Pointer to packed row + @param[in] prefix Row's SQL clause ("SET", "WHERE", etc) + + @retval 0 error + # number of bytes scanned. +*/ + + +size_t +Rows_log_event::print_verbose_one_row(IO_CACHE *file, table_def *td, + PRINT_EVENT_INFO *print_event_info, + MY_BITMAP *cols_bitmap, + const uchar *value, const uchar *prefix, + const my_bool no_fill_output) +{ + const uchar *value0= value; + const uchar *null_bits= value; + uint null_bit_index= 0; + char typestr[64]= ""; + +#ifdef WHEN_FLASHBACK_REVIEW_READY + /* Storing the review SQL */ + IO_CACHE *review_sql= &print_event_info->review_sql_cache; + LEX_STRING review_str; +#endif + + /* + Skip metadata bytes which gives the information about nullabity of master + columns. Master writes one bit for each affected column. + */ + + value+= (bitmap_bits_set(cols_bitmap) + 7) / 8; + + if (!no_fill_output) + if (my_b_printf(file, "%s", prefix)) + goto err; + + for (uint i= 0; i < (uint)td->size(); i ++) + { + size_t size; + int is_null= (null_bits[null_bit_index / 8] + >> (null_bit_index % 8)) & 0x01; + + if (bitmap_is_set(cols_bitmap, i) == 0) + continue; + + if (!no_fill_output) + if (my_b_printf(file, "### @%d=", static_cast(i + 1))) + goto err; + + if (!is_null) + { + size_t fsize= td->calc_field_size((uint)i, (uchar*) value); + if (value + fsize > m_rows_end) + { + if (!no_fill_output) + if (my_b_printf(file, "***Corrupted replication event was detected." + " Not printing the value***\n")) + goto err; + value+= fsize; + return 0; + } + } + + if (!no_fill_output) + { + size= log_event_print_value(file, print_event_info, is_null? NULL: value, + td->type(i), td->field_metadata(i), + typestr, sizeof(typestr)); +#ifdef WHEN_FLASHBACK_REVIEW_READY + if (need_flashback_review) + { + String tmp_str, hex_str; + IO_CACHE tmp_cache; + + // Using a tmp IO_CACHE to get the value output + open_cached_file(&tmp_cache, NULL, NULL, 0, MYF(MY_WME | MY_NABP)); + size= log_event_print_value(&tmp_cache, print_event_info, + is_null ? NULL: value, + td->type(i), td->field_metadata(i), + typestr, sizeof(typestr)); + error= copy_event_cache_to_string_and_reinit(&tmp_cache, &review_str); + close_cached_file(&tmp_cache); + if (unlikely(error)) + return 0; + + switch (td->type(i)) // Converting a string to HEX format + { + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_BLOB: + // Avoid write_pos changed to a new area + // tmp_str.free(); + tmp_str.append(review_str.str + 1, review_str.length - 2); // Removing quotation marks + if (hex_str.alloc(tmp_str.length()*2+1)) // If out of memory + { + fprintf(stderr, "\nError: Out of memory. " + "Could not print correct binlog event.\n"); + exit(1); + } + octet2hex((char*) hex_str.ptr(), tmp_str.ptr(), tmp_str.length()); + if (my_b_printf(review_sql, ", UNHEX('%s')", hex_str.ptr())) + goto err; + break; + default: + tmp_str.free(); + if (tmp_str.append(review_str.str, review_str.length) || + my_b_printf(review_sql, ", %s", tmp_str.ptr())) + goto err; + break; + } + my_free(revieww_str.str); + } +#endif + } + else + { + IO_CACHE tmp_cache; + open_cached_file(&tmp_cache, NULL, NULL, 0, MYF(MY_WME | MY_NABP)); + size= log_event_print_value(&tmp_cache, print_event_info, + is_null ? NULL: value, + td->type(i), td->field_metadata(i), + typestr, sizeof(typestr)); + close_cached_file(&tmp_cache); + } + + if (!size) + goto err; + + if (!is_null) + value+= size; + + if (print_event_info->verbose > 1 && !no_fill_output) + { + if (my_b_write(file, (uchar*)" /* ", 4) || + my_b_printf(file, "%s ", typestr) || + my_b_printf(file, "meta=%d nullable=%d is_null=%d ", + td->field_metadata(i), + td->maybe_null(i), is_null) || + my_b_write(file, (uchar*)"*/", 2)) + goto err; + } + + if (!no_fill_output) + if (my_b_write_byte(file, '\n')) + goto err; + + null_bit_index++; + } + return value - value0; + +err: + return 0; +} + + +/** + Exchange the SET part and WHERE part for the Update events. + Revert the operations order for the Write and Delete events. + And then revert the events order from the last one to the first one. + + @param[in] print_event_info PRINT_EVENT_INFO + @param[in] rows_buff Packed event buff +*/ + +void Rows_log_event::change_to_flashback_event(PRINT_EVENT_INFO *print_event_info, + uchar *rows_buff, Log_event_type ev_type) +{ + Table_map_log_event *map; + table_def *td; + DYNAMIC_ARRAY rows_arr; + uchar *swap_buff1; + uchar *rows_pos= rows_buff + m_rows_before_size; + + if (!(map= print_event_info->m_table_map.get_table(m_table_id)) || + !(td= map->create_table_def())) + return; + + /* If the write rows event contained no values for the AI */ + if (((get_general_type_code() == WRITE_ROWS_EVENT) && (m_rows_buf==m_rows_end))) + goto end; + + (void) my_init_dynamic_array(PSI_NOT_INSTRUMENTED, &rows_arr, sizeof(LEX_STRING), 8, 8, MYF(0)); + + for (uchar *value= m_rows_buf; value < m_rows_end; ) + { + uchar *start_pos= value; + size_t length1= 0; + if (!(length1= print_verbose_one_row(NULL, td, print_event_info, + &m_cols, value, + (const uchar*) "", TRUE))) + { + fprintf(stderr, "\nError row length: %zu\n", length1); + exit(1); + } + value+= length1; + + swap_buff1= (uchar *) my_malloc(PSI_NOT_INSTRUMENTED, length1, MYF(0)); + if (!swap_buff1) + { + fprintf(stderr, "\nError: Out of memory. " + "Could not exchange to flashback event.\n"); + exit(1); + } + memcpy(swap_buff1, start_pos, length1); + + // For Update_event, we have the second part + size_t length2= 0; + if (ev_type == UPDATE_ROWS_EVENT || + ev_type == UPDATE_ROWS_EVENT_V1) + { + if (!(length2= print_verbose_one_row(NULL, td, print_event_info, + &m_cols, value, + (const uchar*) "", TRUE))) + { + fprintf(stderr, "\nError row length: %zu\n", length2); + exit(1); + } + value+= length2; + + void *swap_buff2= my_malloc(PSI_NOT_INSTRUMENTED, length2, MYF(0)); + if (!swap_buff2) + { + fprintf(stderr, "\nError: Out of memory. " + "Could not exchange to flashback event.\n"); + exit(1); + } + memcpy(swap_buff2, start_pos + length1, length2); // WHERE part + + /* Swap SET and WHERE part */ + memcpy(start_pos, swap_buff2, length2); + memcpy(start_pos + length2, swap_buff1, length1); + my_free(swap_buff2); + } + + my_free(swap_buff1); + + /* Copying one row into a buff, and pushing into the array */ + LEX_STRING one_row; + + one_row.length= length1 + length2; + one_row.str= (char *) my_malloc(PSI_NOT_INSTRUMENTED, one_row.length, MYF(0)); + memcpy(one_row.str, start_pos, one_row.length); + if (one_row.str == NULL || push_dynamic(&rows_arr, (uchar *) &one_row)) + { + fprintf(stderr, "\nError: Out of memory. " + "Could not push flashback event into array.\n"); + exit(1); + } + } + + /* Copying rows from the end to the begining into event */ + for (size_t i= rows_arr.elements; i > 0; --i) + { + LEX_STRING *one_row= dynamic_element(&rows_arr, i - 1, LEX_STRING*); + + memcpy(rows_pos, (uchar *)one_row->str, one_row->length); + rows_pos+= one_row->length; + my_free(one_row->str); + } + delete_dynamic(&rows_arr); + +end: + delete td; +} + +/** + Calc length of a packed value of the given SQL type + + @param[in] ptr Pointer to string + @param[in] type Column type + @param[in] meta Column meta information + + @retval - number of bytes scanned from ptr. + Except in case of NULL, in which case we return 1 to indicate ok +*/ + +static size_t calc_field_event_length(const uchar *ptr, uint type, uint meta) +{ + uint32 length= 0; + + if (type == MYSQL_TYPE_STRING) + { + if (meta >= 256) + { + uint byte0= meta >> 8; + uint byte1= meta & 0xFF; + + if ((byte0 & 0x30) != 0x30) + { + /* a long CHAR() field: see #37426 */ + length= byte1 | (((byte0 & 0x30) ^ 0x30) << 4); + type= byte0 | 0x30; + } + else + length = meta & 0xFF; + } + else + length= meta; + } + + switch (type) { + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_TIMESTAMP: + return 4; + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_YEAR: + return 1; + case MYSQL_TYPE_SHORT: + return 2; + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + return 3; + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_DATETIME: + return 8; + case MYSQL_TYPE_NEWDECIMAL: + { + uint precision= meta >> 8; + uint decimals= meta & 0xFF; + uint bin_size= my_decimal_get_binary_size(precision, decimals); + return bin_size; + } + case MYSQL_TYPE_FLOAT: + return 4; + case MYSQL_TYPE_DOUBLE: + return 8; + case MYSQL_TYPE_BIT: + { + /* Meta-data: bit_len, bytes_in_rec, 2 bytes */ + uint nbits= ((meta >> 8) * 8) + (meta & 0xFF); + length= (nbits + 7) / 8; + return length; + } + case MYSQL_TYPE_TIMESTAMP2: + return my_timestamp_binary_length(meta); + case MYSQL_TYPE_DATETIME2: + return my_datetime_binary_length(meta); + case MYSQL_TYPE_TIME2: + return my_time_binary_length(meta); + case MYSQL_TYPE_ENUM: + switch (meta & 0xFF) { + case 1: + case 2: + return (meta & 0xFF); + default: + /* Unknown ENUM packlen=%d", meta & 0xFF */ + return 0; + } + break; + case MYSQL_TYPE_SET: + return meta & 0xFF; + case MYSQL_TYPE_BLOB: + switch (meta) { + default: + return 0; + case 1: + return *ptr + 1; + case 2: + return uint2korr(ptr) + 2; + case 3: + return uint3korr(ptr) + 3; + case 4: + return uint4korr(ptr) + 4; + } + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + length= meta; + /* fall through */ + case MYSQL_TYPE_STRING: + if (length < 256) + return (uint) *ptr + 1; + return uint2korr(ptr) + 2; + case MYSQL_TYPE_DECIMAL: + break; + default: + break; + } + return 0; +} + + +size_t +Rows_log_event::calc_row_event_length(table_def *td, + PRINT_EVENT_INFO *print_event_info, + MY_BITMAP *cols_bitmap, + const uchar *value) +{ + const uchar *value0= value; + const uchar *null_bits= value; + uint null_bit_index= 0; + + /* + Skip metadata bytes which gives the information about nullabity of master + columns. Master writes one bit for each affected column. + */ + + value+= (bitmap_bits_set(cols_bitmap) + 7) / 8; + + for (uint i= 0; i < (uint)td->size(); i ++) + { + int is_null; + is_null= (null_bits[null_bit_index / 8] >> (null_bit_index % 8)) & 0x01; + + if (bitmap_is_set(cols_bitmap, i) == 0) + continue; + + if (!is_null) + { + size_t size; + size_t fsize= td->calc_field_size((uint)i, (uchar*) value); + if (value + fsize > m_rows_end) + { + /* Corrupted replication event was detected, skipping entry */ + return 0; + } + if (!(size= calc_field_event_length(value, td->type(i), + td->field_metadata(i)))) + return 0; + value+= size; + } + null_bit_index++; + } + return value - value0; +} + + +/** + Calculate how many rows there are in the event + + @param[in] file IO cache + @param[in] print_event_into Print parameters +*/ + +void Rows_log_event::count_row_events(PRINT_EVENT_INFO *print_event_info) +{ + Table_map_log_event *map; + table_def *td; + uint row_events; + Log_event_type general_type_code= get_general_type_code(); + + switch (general_type_code) { + case WRITE_ROWS_EVENT: + case DELETE_ROWS_EVENT: + row_events= 1; + break; + case UPDATE_ROWS_EVENT: + row_events= 2; + break; + default: + DBUG_ASSERT(0); /* Not possible */ + return; + } + + if (!(map= print_event_info->m_table_map.get_table(m_table_id)) || + !(td= map->create_table_def())) + { + /* Row event for unknown table */ + return; + } + + for (const uchar *value= m_rows_buf; value < m_rows_end; ) + { + size_t length; + print_event_info->row_events++; + + /* Print the first image */ + if (!(length= calc_row_event_length(td, print_event_info, + &m_cols, value))) + break; + value+= length; + DBUG_ASSERT(value <= m_rows_end); + + /* Print the second image (for UPDATE only) */ + if (row_events == 2) + { + if (!(length= calc_row_event_length(td, print_event_info, + &m_cols_ai, value))) + break; + value+= length; + DBUG_ASSERT(value <= m_rows_end); + } + } + delete td; +} + + +/** + Print a row event into IO cache in human readable form (in SQL format) + + @param[in] file IO cache + @param[in] print_event_into Print parameters +*/ + +bool Rows_log_event::print_verbose(IO_CACHE *file, + PRINT_EVENT_INFO *print_event_info) +{ + Table_map_log_event *map; + table_def *td= 0; + const char *sql_command, *sql_clause1, *sql_clause2; + const char *sql_command_short __attribute__((unused)); + Log_event_type general_type_code= get_general_type_code(); +#ifdef WHEN_FLASHBACK_REVIEW_READY + IO_CACHE *review_sql= &print_event_info->review_sql_cache; +#endif + + if (m_extra_row_data) + { + uint8 extra_data_len= m_extra_row_data[EXTRA_ROW_INFO_LEN_OFFSET]; + uint8 extra_payload_len= extra_data_len - EXTRA_ROW_INFO_HDR_BYTES; + assert(extra_data_len >= EXTRA_ROW_INFO_HDR_BYTES); + + if (my_b_printf(file, "### Extra row data format: %u, len: %u :", + m_extra_row_data[EXTRA_ROW_INFO_FORMAT_OFFSET], + extra_payload_len)) + goto err; + if (extra_payload_len) + { + /* + Buffer for hex view of string, including '0x' prefix, + 2 hex chars / byte and trailing 0 + */ + const int buff_len= 2 + (256 * 2) + 1; + char buff[buff_len]; + str_to_hex(buff, (const char*) &m_extra_row_data[EXTRA_ROW_INFO_HDR_BYTES], + extra_payload_len); + if (my_b_printf(file, "%s", buff)) + goto err; + } + if (my_b_printf(file, "\n")) + goto err; + } + + switch (general_type_code) { + case WRITE_ROWS_EVENT: + sql_command= "INSERT INTO"; + sql_clause1= "### SET\n"; + sql_clause2= NULL; + sql_command_short= "I"; + break; + case DELETE_ROWS_EVENT: + sql_command= "DELETE FROM"; + sql_clause1= "### WHERE\n"; + sql_clause2= NULL; + sql_command_short= "D"; + break; + case UPDATE_ROWS_EVENT: + sql_command= "UPDATE"; + sql_clause1= "### WHERE\n"; + sql_clause2= "### SET\n"; + sql_command_short= "U"; + break; + default: + sql_command= sql_clause1= sql_clause2= NULL; + sql_command_short= ""; + DBUG_ASSERT(0); /* Not possible */ + } + + if (!(map= print_event_info->m_table_map.get_table(m_table_id)) || + !(td= map->create_table_def())) + { + return (my_b_printf(file, "### Row event for unknown table #%lu", + (ulong) m_table_id)); + } + + /* If the write rows event contained no values for the AI */ + if (((general_type_code == WRITE_ROWS_EVENT) && (m_rows_buf==m_rows_end))) + { + if (my_b_printf(file, "### INSERT INTO %`s.%`s VALUES ()\n", + map->get_db_name(), map->get_table_name())) + goto err; + goto end; + } + + for (const uchar *value= m_rows_buf; value < m_rows_end; ) + { + size_t length; + print_event_info->row_events++; + + if (my_b_printf(file, "### %s %`s.%`s\n", + sql_command, + map->get_db_name(), map->get_table_name())) + goto err; +#ifdef WHEN_FLASHBACK_REVIEW_READY + if (need_flashback_review) + if (my_b_printf(review_sql, "\nINSERT INTO `%s`.`%s` VALUES ('%s'", + map->get_review_dbname(), map->get_review_tablename(), + sql_command_short)) + goto err; +#endif + + /* Print the first image */ + if (!(length= print_verbose_one_row(file, td, print_event_info, + &m_cols, value, + (const uchar*) sql_clause1))) + goto err; + value+= length; + + /* Print the second image (for UPDATE only) */ + if (sql_clause2) + { + if (!(length= print_verbose_one_row(file, td, print_event_info, + &m_cols_ai, value, + (const uchar*) sql_clause2))) + goto err; + value+= length; + } +#ifdef WHEN_FLASHBACK_REVIEW_READY + else + { + if (need_flashback_review) + for (size_t i= 0; i < td->size(); i ++) + if (my_b_printf(review_sql, ", NULL")) + goto err; + } + + if (need_flashback_review) + if (my_b_printf(review_sql, ")%s\n", print_event_info->delimiter)) + goto err; +#endif + } + +end: + delete td; + return 0; +err: + delete td; + return 1; +} + +void free_table_map_log_event(Table_map_log_event *event) +{ + delete event; +} + +/** + Encode the event, optionally per 'do_print_encoded' arg store the + result into the argument cache; optionally per event_info's + 'verbose' print into the cache a verbose representation of the event. + Note, no extra wrapping is done to the being io-cached data, like + to producing a BINLOG query. It's left for a routine that extracts from + the cache. + + @param file pointer to IO_CACHE + @param print_event_info pointer to print_event_info specializing + what out of and how to print the event + @param do_print_encoded whether to store base64-encoded event + into @file. +*/ +bool Log_event::print_base64(IO_CACHE* file, + PRINT_EVENT_INFO* print_event_info, + bool do_print_encoded) +{ + uchar *ptr= temp_buf; + uint32 size= uint4korr(ptr + EVENT_LEN_OFFSET); + DBUG_ENTER("Log_event::print_base64"); + + if (is_flashback) + { + uint tmp_size= size; + Rows_log_event *ev= NULL; + Log_event_type ev_type = (enum Log_event_type) ptr[EVENT_TYPE_OFFSET]; + if (checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF && + checksum_alg != BINLOG_CHECKSUM_ALG_OFF) + tmp_size-= BINLOG_CHECKSUM_LEN; // checksum is displayed through the header + switch (ev_type) { + case WRITE_ROWS_EVENT: + ptr[EVENT_TYPE_OFFSET]= DELETE_ROWS_EVENT; + ev= new Delete_rows_log_event(ptr, tmp_size, + glob_description_event); + ev->change_to_flashback_event(print_event_info, ptr, ev_type); + break; + case WRITE_ROWS_EVENT_V1: + ptr[EVENT_TYPE_OFFSET]= DELETE_ROWS_EVENT_V1; + ev= new Delete_rows_log_event(ptr, tmp_size, + glob_description_event); + ev->change_to_flashback_event(print_event_info, ptr, ev_type); + break; + case DELETE_ROWS_EVENT: + ptr[EVENT_TYPE_OFFSET]= WRITE_ROWS_EVENT; + ev= new Write_rows_log_event(ptr, tmp_size, + glob_description_event); + ev->change_to_flashback_event(print_event_info, ptr, ev_type); + break; + case DELETE_ROWS_EVENT_V1: + ptr[EVENT_TYPE_OFFSET]= WRITE_ROWS_EVENT_V1; + ev= new Write_rows_log_event(ptr, tmp_size, + glob_description_event); + ev->change_to_flashback_event(print_event_info, ptr, ev_type); + break; + case UPDATE_ROWS_EVENT: + case UPDATE_ROWS_EVENT_V1: + ev= new Update_rows_log_event(ptr, tmp_size, + glob_description_event); + ev->change_to_flashback_event(print_event_info, ptr, ev_type); + break; + default: + break; + } + delete ev; + } + + if (do_print_encoded) + { + size_t const tmp_str_sz= my_base64_needed_encoded_length((int) size); + char *tmp_str; + if (!(tmp_str= (char *) my_malloc(PSI_NOT_INSTRUMENTED, tmp_str_sz, MYF(MY_WME)))) + goto err; + + if (my_base64_encode(ptr, (size_t) size, tmp_str)) + { + DBUG_ASSERT(0); + } + + my_b_printf(file, "%s\n", tmp_str); + my_free(tmp_str); + } + +#ifdef WHEN_FLASHBACK_REVIEW_READY + if (print_event_info->verbose || print_event_info->print_row_count || + need_flashback_review) +#else + // Flashback need the table_map to parse the event + if (print_event_info->verbose || print_event_info->print_row_count || + is_flashback) +#endif + { + Rows_log_event *ev= NULL; + Log_event_type et= (Log_event_type) ptr[EVENT_TYPE_OFFSET]; + + if (checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF && + checksum_alg != BINLOG_CHECKSUM_ALG_OFF) + size-= BINLOG_CHECKSUM_LEN; // checksum is displayed through the header + + switch (et) + { + case TABLE_MAP_EVENT: + { + Table_map_log_event *map; + map= new Table_map_log_event(ptr, size, + glob_description_event); +#ifdef WHEN_FLASHBACK_REVIEW_READY + if (need_flashback_review) + { + map->set_review_dbname(m_review_dbname.ptr()); + map->set_review_tablename(m_review_tablename.ptr()); + } +#endif + print_event_info->m_table_map.set_table(map->get_table_id(), map); + break; + } + case WRITE_ROWS_EVENT: + case WRITE_ROWS_EVENT_V1: + { + ev= new Write_rows_log_event(ptr, size, + glob_description_event); + break; + } + case DELETE_ROWS_EVENT: + case DELETE_ROWS_EVENT_V1: + { + ev= new Delete_rows_log_event(ptr, size, + glob_description_event); + break; + } + case UPDATE_ROWS_EVENT: + case UPDATE_ROWS_EVENT_V1: + { + ev= new Update_rows_log_event(ptr, size, + glob_description_event); + break; + } + case WRITE_ROWS_COMPRESSED_EVENT: + case WRITE_ROWS_COMPRESSED_EVENT_V1: + { + ev= new Write_rows_compressed_log_event(ptr, size, + glob_description_event); + break; + } + case UPDATE_ROWS_COMPRESSED_EVENT: + case UPDATE_ROWS_COMPRESSED_EVENT_V1: + { + ev= new Update_rows_compressed_log_event(ptr, size, + glob_description_event); + break; + } + case DELETE_ROWS_COMPRESSED_EVENT: + case DELETE_ROWS_COMPRESSED_EVENT_V1: + { + ev= new Delete_rows_compressed_log_event(ptr, size, + glob_description_event); + break; + } + default: + break; + } + + if (ev) + { + bool error= 0; + +#ifdef WHEN_FLASHBACK_REVIEW_READY + ev->need_flashback_review= need_flashback_review; + if (print_event_info->verbose) + { + if (ev->print_verbose(&print_event_info->tail_cache, print_event_info)) + goto err; + } + else + { + IO_CACHE tmp_cache; + + if (open_cached_file(&tmp_cache, NULL, NULL, 0, + MYF(MY_WME | MY_NABP))) + { + delete ev; + goto err; + } + + error= ev->print_verbose(&tmp_cache, print_event_info); + close_cached_file(&tmp_cache); + if (unlikely(error)) + { + delete ev; + goto err; + } + } +#else + if (print_event_info->verbose) + error= ev->print_verbose(&print_event_info->tail_cache, print_event_info); + else + ev->count_row_events(print_event_info); +#endif + delete ev; + if (unlikely(error)) + goto err; + } + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + +/* + Log_event::print_timestamp() +*/ + +bool Log_event::print_timestamp(IO_CACHE* file, time_t* ts) +{ + struct tm *res; + time_t my_when= when; + DBUG_ENTER("Log_event::print_timestamp"); + if (!ts) + ts = &my_when; + res=localtime(ts); + + DBUG_RETURN(my_b_printf(file,"%02d%02d%02d %2d:%02d:%02d", + res->tm_year % 100, + res->tm_mon+1, + res->tm_mday, + res->tm_hour, + res->tm_min, + res->tm_sec)); +} + + +/** + Query_log_event::print(). + + @todo + print the catalog ?? +*/ +bool Query_log_event::print_query_header(IO_CACHE* file, + PRINT_EVENT_INFO* print_event_info) +{ + // TODO: print the catalog ?? + char buff[64], *end; // Enough for SET TIMESTAMP + bool different_db= 1; + uint32 tmp; + + if (!print_event_info->short_form) + { + if (print_header(file, print_event_info, FALSE) || + my_b_printf(file, + "\t%s\tthread_id=%lu\texec_time=%lu\terror_code=%d" + "\txid=%lu\n", + get_type_str(), (ulong) thread_id, (ulong) exec_time, + error_code, (ulong) xid)) + goto err; + } + + if ((flags & LOG_EVENT_SUPPRESS_USE_F)) + { + if (!is_trans_keyword()) + print_event_info->db[0]= '\0'; + } + else if (db) + { + different_db= memcmp(print_event_info->db, db, db_len + 1); + if (different_db) + memcpy(print_event_info->db, db, db_len + 1); + if (db[0] && different_db) + if (my_b_printf(file, "use %`s%s\n", db, print_event_info->delimiter)) + goto err; + } + + end=int10_to_str((long) when, strmov(buff,"SET TIMESTAMP="),10); + if (when_sec_part && when_sec_part <= TIME_MAX_SECOND_PART) + { + *end++= '.'; + end=int10_to_str(when_sec_part, end, 10); + } + end= strmov(end, print_event_info->delimiter); + *end++='\n'; + if (my_b_write(file, (uchar*) buff, (uint) (end-buff))) + goto err; + if ((!print_event_info->thread_id_printed || + ((flags & LOG_EVENT_THREAD_SPECIFIC_F) && + thread_id != print_event_info->thread_id))) + { + // If --short-form, print deterministic value instead of pseudo_thread_id. + if (my_b_printf(file,"SET @@session.pseudo_thread_id=%lu%s\n", + short_form ? 999999999 : (ulong)thread_id, + print_event_info->delimiter)) + goto err; + print_event_info->thread_id= thread_id; + print_event_info->thread_id_printed= 1; + } + + /* + If flags2_inited==0, this is an event from 3.23 or 4.0 or a dummy + event from the mtr test suite; nothing to print (remember we don't + produce mixed relay logs so there cannot be 5.0 events before that + one so there is nothing to reset). + */ + if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */ + { + /* tmp is a bitmask of bits which have changed. */ + if (likely(print_event_info->flags2_inited)) + /* All bits which have changed */ + tmp= (print_event_info->flags2) ^ flags2; + else /* that's the first Query event we read */ + { + print_event_info->flags2_inited= 1; + tmp= ~((uint32)0); /* all bits have changed */ + } + + if (unlikely(tmp)) /* some bits have changed */ + { + bool need_comma= 0; + ulonglong mask= glob_description_event->options_written_to_bin_log; + if (my_b_write_string(file, "SET ") || + print_set_option(file, tmp, OPTION_NO_FOREIGN_KEY_CHECKS, ~flags2, + "@@session.foreign_key_checks", &need_comma)|| + print_set_option(file, tmp, OPTION_AUTO_IS_NULL, flags2, + "@@session.sql_auto_is_null", &need_comma) || + print_set_option(file, tmp, OPTION_RELAXED_UNIQUE_CHECKS, ~flags2, + "@@session.unique_checks", &need_comma) || + print_set_option(file, tmp, OPTION_NOT_AUTOCOMMIT, ~flags2, + "@@session.autocommit", &need_comma) || + print_set_option(file, tmp, OPTION_NO_CHECK_CONSTRAINT_CHECKS, ~flags2, + "@@session.check_constraint_checks", &need_comma) || + print_set_option(file, tmp, mask & OPTION_IF_EXISTS, flags2, + "@@session.sql_if_exists", &need_comma) || + print_set_option(file, tmp, mask & OPTION_EXPLICIT_DEF_TIMESTAMP, flags2, + "@@session.explicit_defaults_for_timestamp", + &need_comma) || + print_set_option(file, tmp, mask & OPTION_INSERT_HISTORY, flags2, + "@@session.system_versioning_insert_history", + &need_comma) || + my_b_printf(file,"%s\n", print_event_info->delimiter)) + goto err; + print_event_info->flags2= flags2; + } + } + + /* + Now the session variables; + it's more efficient to pass SQL_MODE as a number instead of a + comma-separated list. + FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only + variables (they have no global version; they're not listed in + sql_class.h), The tests below work for pure binlogs or pure relay + logs. Won't work for mixed relay logs but we don't create mixed + relay logs (that is, there is no relay log with a format change + except within the 3 first events, which mysqlbinlog handles + gracefully). So this code should always be good. + */ + + if (likely(sql_mode_inited) && + (unlikely(print_event_info->sql_mode != sql_mode || + !print_event_info->sql_mode_inited))) + { + char llbuff[22]; + if (my_b_printf(file,"SET @@session.sql_mode=%s%s\n", + ullstr(sql_mode, llbuff), print_event_info->delimiter)) + goto err; + print_event_info->sql_mode= sql_mode; + print_event_info->sql_mode_inited= 1; + } + if (print_event_info->auto_increment_increment != auto_increment_increment || + print_event_info->auto_increment_offset != auto_increment_offset) + { + if (my_b_printf(file,"SET @@session.auto_increment_increment=%lu, @@session.auto_increment_offset=%lu%s\n", + auto_increment_increment,auto_increment_offset, + print_event_info->delimiter)) + goto err; + print_event_info->auto_increment_increment= auto_increment_increment; + print_event_info->auto_increment_offset= auto_increment_offset; + } + + /* TODO: print the catalog when we feature SET CATALOG */ + + if (likely(charset_inited) && + (unlikely(!print_event_info->charset_inited || + memcmp(print_event_info->charset, charset, 6)))) + { + CHARSET_INFO *cs_info= get_charset(uint2korr(charset), MYF(MY_WME)); + if (cs_info) + { + /* for mysql client */ + if (my_b_printf(file, "/*!\\C %s */%s\n", + cs_info->cs_name.str, print_event_info->delimiter)) + goto err; + } + if (my_b_printf(file,"SET " + "@@session.character_set_client=%s," + "@@session.collation_connection=%d," + "@@session.collation_server=%d" + "%s\n", + cs_info->cs_name.str, + uint2korr(charset+2), + uint2korr(charset+4), + print_event_info->delimiter)) + goto err; + memcpy(print_event_info->charset, charset, 6); + print_event_info->charset_inited= 1; + } + if (time_zone_len) + { + if (memcmp(print_event_info->time_zone_str, + time_zone_str, time_zone_len+1)) + { + if (my_b_printf(file,"SET @@session.time_zone='%s'%s\n", + time_zone_str, print_event_info->delimiter)) + goto err; + memcpy(print_event_info->time_zone_str, time_zone_str, time_zone_len+1); + } + } + if (lc_time_names_number != print_event_info->lc_time_names_number) + { + if (my_b_printf(file, "SET @@session.lc_time_names=%d%s\n", + lc_time_names_number, print_event_info->delimiter)) + goto err; + print_event_info->lc_time_names_number= lc_time_names_number; + } + if (charset_database_number != print_event_info->charset_database_number) + { + if (charset_database_number) + { + if (my_b_printf(file, "SET @@session.collation_database=%d%s\n", + charset_database_number, print_event_info->delimiter)) + goto err; + } + else if (my_b_printf(file, "SET @@session.collation_database=DEFAULT%s\n", + print_event_info->delimiter)) + goto err; + print_event_info->charset_database_number= charset_database_number; + } + return 0; + +err: + return 1; +} + +bool Query_log_event::print_verbose(IO_CACHE* cache, PRINT_EVENT_INFO* print_event_info) +{ + if (my_b_printf(cache, "### ") || + my_b_write(cache, (uchar *) query, q_len) || + my_b_printf(cache, "\n")) + { + goto err; + } + return 0; + +err: + return 1; +} + +bool Query_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file, 0, this); + + /** + reduce the size of io cache so that the write function is called + for every call to my_b_write(). + */ + DBUG_EXECUTE_IF ("simulate_file_write_error", + {(&cache)->write_pos= (&cache)->write_end- 500;}); + if (print_query_header(&cache, print_event_info)) + goto err; + if (!is_flashback) + { + if (gtid_flags_extra & (Gtid_log_event::FL_START_ALTER_E1 | + Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1)) + { + bool do_print_encoded= + print_event_info->base64_output_mode != BASE64_OUTPUT_NEVER && + print_event_info->base64_output_mode != BASE64_OUTPUT_DECODE_ROWS && + !print_event_info->short_form; + bool comment_mode= do_print_encoded && + gtid_flags_extra & (Gtid_log_event::FL_START_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1); + + if(comment_mode) + my_b_printf(&cache, "/*!100600 "); + if (do_print_encoded) + my_b_printf(&cache, "BINLOG '\n"); + if (print_base64(&cache, print_event_info, do_print_encoded)) + goto err; + if (do_print_encoded) + { + if(comment_mode) + my_b_printf(&cache, "' */%s\n", print_event_info->delimiter); + else + my_b_printf(&cache, "'%s\n", print_event_info->delimiter); + } + if (print_event_info->verbose && print_verbose(&cache, print_event_info)) + { + goto err; + } + } + else + { + if (my_b_write(&cache, (uchar*) query, q_len) || + my_b_printf(&cache, "\n%s\n", print_event_info->delimiter)) + goto err; + } + } + else // is_flashback == 1 + { + if (strcmp("BEGIN", query) == 0) + { + if (my_b_write(&cache, (uchar*) "COMMIT", 6) || + my_b_printf(&cache, "\n%s\n", print_event_info->delimiter)) + goto err; + } + else if (strcmp("COMMIT", query) == 0) + { + if (my_b_printf(&cache, "START TRANSACTION\n%s\n", print_event_info->delimiter)) + goto err; + } + } + return cache.flush_data(); +err: + return 1; +} + + +bool Start_log_event_v3::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + DBUG_ENTER("Start_log_event_v3::print"); + + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + + if (!print_event_info->short_form) + { + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\tStart: binlog v %d, server v %s created ", + binlog_version, server_version) || + print_timestamp(&cache)) + goto err; + if (created) + if (my_b_printf(&cache," at startup")) + goto err; + if (my_b_printf(&cache, "\n")) + goto err; + if (flags & LOG_EVENT_BINLOG_IN_USE_F) + if (my_b_printf(&cache, + "# Warning: this binlog is either in use or was not " + "closed properly.\n")) + goto err; + } + if (!is_artificial_event() && created) + { +#ifdef WHEN_WE_HAVE_THE_RESET_CONNECTION_SQL_COMMAND + /* + This is for mysqlbinlog: like in replication, we want to delete the stale + tmp files left by an unclean shutdown of mysqld (temporary tables) + and rollback unfinished transaction. + Probably this can be done with RESET CONNECTION (syntax to be defined). + */ + if (my_b_printf(&cache,"RESET CONNECTION%s\n", + print_event_info->delimiter)) + goto err; +#else + if (my_b_printf(&cache,"ROLLBACK%s\n", print_event_info->delimiter)) + goto err; +#endif + } + if (temp_buf && + print_event_info->base64_output_mode != BASE64_OUTPUT_NEVER && + !print_event_info->short_form) + { + /* BINLOG is matched with the delimiter below on the same level */ + bool do_print_encoded= + print_event_info->base64_output_mode != BASE64_OUTPUT_DECODE_ROWS; + if (do_print_encoded) + my_b_printf(&cache, "BINLOG '\n"); + + if (print_base64(&cache, print_event_info, do_print_encoded)) + goto err; + + if (do_print_encoded) + my_b_printf(&cache, "'%s\n", print_event_info->delimiter); + + print_event_info->printed_fd_event= TRUE; + } + DBUG_RETURN(cache.flush_data()); +err: + DBUG_RETURN(1); +} + + +bool Start_encryption_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file); + StringBuffer<1024> buf; + buf.append(STRING_WITH_LEN("# Encryption scheme: ")); + buf.append_ulonglong(crypto_scheme); + buf.append(STRING_WITH_LEN(", key_version: ")); + buf.append_ulonglong(key_version); + buf.append(STRING_WITH_LEN(", nonce: ")); + buf.append_hex(nonce, BINLOG_NONCE_LENGTH); + buf.append(STRING_WITH_LEN("\n# The rest of the binlog is encrypted!\n")); + if (my_b_write(&cache, (uchar*)buf.ptr(), buf.length())) + return 1; + return (cache.flush_data()); +} + + +bool Load_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + return print(file, print_event_info, 0); +} + + +bool Load_log_event::print(FILE* file_arg, PRINT_EVENT_INFO* print_event_info, + bool commented) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file_arg); + bool different_db= 1; + DBUG_ENTER("Load_log_event::print"); + + if (!print_event_info->short_form) + { + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\tQuery\tthread_id=%ld\texec_time=%ld\n", + thread_id, exec_time)) + goto err; + } + + if (db) + { + /* + If the database is different from the one of the previous statement, we + need to print the "use" command, and we update the last_db. + But if commented, the "use" is going to be commented so we should not + update the last_db. + */ + if ((different_db= memcmp(print_event_info->db, db, db_len + 1)) && + !commented) + memcpy(print_event_info->db, db, db_len + 1); + } + + if (db && db[0] && different_db) + if (my_b_printf(&cache, "%suse %`s%s\n", + commented ? "# " : "", + db, print_event_info->delimiter)) + goto err; + + if (flags & LOG_EVENT_THREAD_SPECIFIC_F) + if (my_b_printf(&cache,"%sSET @@session.pseudo_thread_id=%lu%s\n", + commented ? "# " : "", (ulong)thread_id, + print_event_info->delimiter)) + goto err; + if (my_b_printf(&cache, "%sLOAD DATA ", + commented ? "# " : "")) + goto err; + if (check_fname_outside_temp_buf()) + if (my_b_write_string(&cache, "LOCAL ")) + goto err; + if (my_b_printf(&cache, "INFILE '%-*s' ", fname_len, fname)) + goto err; + + if (sql_ex.opt_flags & REPLACE_FLAG) + { + if (my_b_write_string(&cache, "REPLACE ")) + goto err; + } + else if (sql_ex.opt_flags & IGNORE_FLAG) + if (my_b_write_string(&cache, "IGNORE ")) + goto err; + + if (my_b_printf(&cache, "INTO TABLE `%s`", table_name) || + my_b_write_string(&cache, " FIELDS TERMINATED BY ") || + pretty_print_str(&cache, sql_ex.field_term, sql_ex.field_term_len)) + goto err; + + if (sql_ex.opt_flags & OPT_ENCLOSED_FLAG) + if (my_b_write_string(&cache, " OPTIONALLY ")) + goto err; + if (my_b_write_string(&cache, " ENCLOSED BY ") || + pretty_print_str(&cache, sql_ex.enclosed, sql_ex.enclosed_len) || + my_b_write_string(&cache, " ESCAPED BY ") || + pretty_print_str(&cache, sql_ex.escaped, sql_ex.escaped_len) || + my_b_write_string(&cache, " LINES TERMINATED BY ") || + pretty_print_str(&cache, sql_ex.line_term, sql_ex.line_term_len)) + goto err; + + if (sql_ex.line_start) + { + if (my_b_write_string(&cache," STARTING BY ") || + pretty_print_str(&cache, sql_ex.line_start, sql_ex.line_start_len)) + goto err; + } + if ((long) skip_lines > 0) + if (my_b_printf(&cache, " IGNORE %ld LINES", (long) skip_lines)) + goto err; + + if (num_fields) + { + uint i; + const char* field = fields; + if (my_b_write_string(&cache, " (")) + goto err; + for (i = 0; i < num_fields; i++) + { + if (i) + if (my_b_write_byte(&cache, ',')) + goto err; + if (my_b_printf(&cache, "%`s", field)) + goto err; + field += field_lens[i] + 1; + } + if (my_b_write_byte(&cache, ')')) + goto err; + } + + if (my_b_printf(&cache, "%s\n", print_event_info->delimiter)) + goto err; + DBUG_RETURN(cache.flush_data()); +err: + DBUG_RETURN(1); +} + + +bool Rotate_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + if (print_event_info->short_form) + return 0; + + char buf[22]; + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + if (print_header(&cache, print_event_info, FALSE) || + my_b_write_string(&cache, "\tRotate to ")) + goto err; + if (new_log_ident) + if (my_b_write(&cache, (uchar*) new_log_ident, (uint)ident_len)) + goto err; + if (my_b_printf(&cache, " pos: %s\n", llstr(pos, buf))) + goto err; + return cache.flush_data(); +err: + return 1; +} + + +bool Binlog_checkpoint_log_event::print(FILE *file, + PRINT_EVENT_INFO *print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_write_string(&cache, "\tBinlog checkpoint ") || + my_b_write(&cache, (uchar*)binlog_file_name, binlog_file_len) || + my_b_write_byte(&cache, '\n')) + return 1; + return cache.flush_data(); +} + + +bool +Gtid_list_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + char buf[21]; + uint32 i; + + qsort(list, count, sizeof(rpl_gtid), compare_glle_gtids); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\tGtid list [")) + goto err; + + for (i= 0; i < count; ++i) + { + longlong10_to_str(list[i].seq_no, buf, 10); + if (my_b_printf(&cache, "%u-%u-%s", list[i].domain_id, + list[i].server_id, buf)) + goto err; + if (i < count-1) + if (my_b_printf(&cache, ",\n# ")) + goto err; + } + if (my_b_printf(&cache, "]\n")) + goto err; + + return cache.flush_data(); +err: + return 1; +} + + +bool Intvar_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + char llbuff[22]; + const char *UNINIT_VAR(msg); + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + + if (!print_event_info->short_form) + { + if (print_header(&cache, print_event_info, FALSE) || + my_b_write_string(&cache, "\tIntvar\n")) + goto err; + } + + if (my_b_printf(&cache, "SET ")) + goto err; + switch (type) { + case LAST_INSERT_ID_EVENT: + msg="LAST_INSERT_ID"; + break; + case INSERT_ID_EVENT: + msg="INSERT_ID"; + break; + case INVALID_INT_EVENT: + default: // cannot happen + msg="INVALID_INT"; + break; + } + if (my_b_printf(&cache, "%s=%s%s\n", + msg, llstr(val,llbuff), print_event_info->delimiter)) + goto err; + + return cache.flush_data(); +err: + return 1; +} + + +bool Rand_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + + char llbuff[22],llbuff2[22]; + if (!print_event_info->short_form) + { + if (print_header(&cache, print_event_info, FALSE) || + my_b_write_string(&cache, "\tRand\n")) + goto err; + } + if (my_b_printf(&cache, "SET @@RAND_SEED1=%s, @@RAND_SEED2=%s%s\n", + llstr(seed1, llbuff),llstr(seed2, llbuff2), + print_event_info->delimiter)) + goto err; + + return cache.flush_data(); +err: + return 1; +} + + +bool Xid_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F, this); + + if (!print_event_info->short_form) + { + char buf[64]; + longlong10_to_str(xid, buf, 10); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\tXid = %s\n", buf)) + goto err; + } + if (my_b_printf(&cache, is_flashback ? "START TRANSACTION%s\n" : "COMMIT%s\n", + print_event_info->delimiter)) + goto err; + + return cache.flush_data(); +err: + return 1; +} + + +bool User_var_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F); + + if (!print_event_info->short_form) + { + if (print_header(&cache, print_event_info, FALSE) || + my_b_write_string(&cache, "\tUser_var\n")) + goto err; + } + + if (my_b_write_string(&cache, "SET @") || + my_b_write_backtick_quote(&cache, name, name_len)) + goto err; + + if (is_null) + { + if (my_b_printf(&cache, ":=NULL%s\n", print_event_info->delimiter)) + goto err; + } + else + { + switch (type) { + case REAL_RESULT: + double real_val; + char real_buf[FMT_G_BUFSIZE(14)]; + float8get(real_val, val); + sprintf(real_buf, "%.14g", real_val); + if (my_b_printf(&cache, ":=%s%s\n", real_buf, + print_event_info->delimiter)) + goto err; + break; + case INT_RESULT: + char int_buf[22]; + longlong10_to_str(uint8korr(val), int_buf, + ((flags & User_var_log_event::UNSIGNED_F) ? 10 : -10)); + if (my_b_printf(&cache, ":=%s%s\n", int_buf, + print_event_info->delimiter)) + goto err; + break; + case DECIMAL_RESULT: + { + char str_buf[200]; + int str_len= sizeof(str_buf) - 1; + int precision= (int)val[0]; + int scale= (int)val[1]; + decimal_digit_t dec_buf[10]; + decimal_t dec; + dec.len= 10; + dec.buf= dec_buf; + + bin2decimal((uchar*) val+2, &dec, precision, scale); + decimal2string(&dec, str_buf, &str_len, 0, 0, 0); + str_buf[str_len]= 0; + if (my_b_printf(&cache, ":=%s%s\n", str_buf, + print_event_info->delimiter)) + goto err; + break; + } + case STRING_RESULT: + { + /* + Let's express the string in hex. That's the most robust way. If we + print it in character form instead, we need to escape it with + character_set_client which we don't know (we will know it in 5.0, but + in 4.1 we don't know it easily when we are printing + User_var_log_event). Explanation why we would need to bother with + character_set_client (quoting Bar): + > Note, the parser doesn't switch to another unescaping mode after + > it has met a character set introducer. + > For example, if an SJIS client says something like: + > SET @a= _ucs2 \0a\0b' + > the string constant is still unescaped according to SJIS, not + > according to UCS2. + */ + char *hex_str; + CHARSET_INFO *cs; + bool error; + + // 2 hex digits / byte + hex_str= (char *) my_malloc(PSI_NOT_INSTRUMENTED, 2 * val_len + 1 + 3, MYF(MY_WME)); + if (!hex_str) + goto err; + str_to_hex(hex_str, val, val_len); + /* + For proper behaviour when mysqlbinlog|mysql, we need to explicitly + specify the variable's collation. It will however cause problems when + people want to mysqlbinlog|mysql into another server not supporting the + character set. But there's not much to do about this and it's unlikely. + */ + if (!(cs= get_charset(charset_number, MYF(0)))) + { /* + Generate an unusable command (=> syntax error) is probably the best + thing we can do here. + */ + error= my_b_printf(&cache, ":=???%s\n", print_event_info->delimiter); + } + else + error= my_b_printf(&cache, ":=_%s %s COLLATE `%s`%s\n", + cs->cs_name.str, hex_str, cs->coll_name.str, + print_event_info->delimiter); + my_free(hex_str); + if (unlikely(error)) + goto err; + break; + } + case ROW_RESULT: + default: + DBUG_ASSERT(0); + break; + } + } + + return cache.flush_data(); +err: + return 1; +} + + +#ifdef HAVE_REPLICATION + +bool Unknown_log_event::print(FILE* file_arg, PRINT_EVENT_INFO* print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file_arg); + + if (what != ENCRYPTED) + { + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\n# Unknown event\n")) + goto err; + } + else if (my_b_printf(&cache, "# Encrypted event\n")) + goto err; + + return cache.flush_data(); +err: + return 1; +} + + +bool Stop_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F, this); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_write_string(&cache, "\tStop\n")) + return 1; + return cache.flush_data(); +} + +#endif + + +bool Create_file_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info, + bool enable_local) +{ + if (print_event_info->short_form) + { + if (enable_local && check_fname_outside_temp_buf()) + return Load_log_event::print(file, print_event_info); + return 0; + } + + Write_on_release_cache cache(&print_event_info->head_cache, file); + + if (enable_local) + { + if (Load_log_event::print(file, print_event_info, + !check_fname_outside_temp_buf())) + goto err; + + /** + reduce the size of io cache so that the write function is called + for every call to my_b_printf(). + */ + DBUG_EXECUTE_IF ("simulate_create_event_write_error", + {(&cache)->write_pos= (&cache)->write_end; + DBUG_SET("+d,simulate_file_write_error");}); + /* + That one is for "file_id: etc" below: in mysqlbinlog we want the #, in + SHOW BINLOG EVENTS we don't. + */ + if (my_b_write_byte(&cache, '#')) + goto err; + } + + if (my_b_printf(&cache, " file_id: %d block_len: %d\n", file_id, block_len)) + goto err; + + return cache.flush_data(); +err: + return 1; + +} + + +bool Create_file_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info) +{ + return print(file, print_event_info, 0); +} + + +/* + Append_block_log_event::print() +*/ + +bool Append_block_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\n#%s: file_id: %d block_len: %d\n", + get_type_str(), file_id, block_len)) + goto err; + + return cache.flush_data(); +err: + return 1; +} + + +/* + Delete_file_log_event::print() +*/ + +bool Delete_file_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\n#Delete_file: file_id=%u\n", file_id)) + return 1; + + return cache.flush_data(); +} + +/* + Execute_load_log_event::print() +*/ + +bool Execute_load_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\n#Exec_load: file_id=%d\n", + file_id)) + return 1; + + return cache.flush_data(); +} + +bool Execute_load_query_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info) +{ + return print(file, print_event_info, 0); +} + +/** + Prints the query as LOAD DATA LOCAL and with rewritten filename. +*/ +bool Execute_load_query_log_event::print(FILE* file, + PRINT_EVENT_INFO* print_event_info, + const char *local_fname) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file); + + if (print_query_header(&cache, print_event_info)) + goto err; + + /** + reduce the size of io cache so that the write function is called + for every call to my_b_printf(). + */ + DBUG_EXECUTE_IF ("simulate_execute_event_write_error", + {(&cache)->write_pos= (&cache)->write_end; + DBUG_SET("+d,simulate_file_write_error");}); + + if (local_fname) + { + if (my_b_write(&cache, (uchar*) query, fn_pos_start) || + my_b_write_string(&cache, " LOCAL INFILE ") || + pretty_print_str(&cache, local_fname, (int)strlen(local_fname))) + goto err; + + if (dup_handling == LOAD_DUP_REPLACE) + if (my_b_write_string(&cache, " REPLACE")) + goto err; + + if (my_b_write_string(&cache, " INTO") || + my_b_write(&cache, (uchar*) query + fn_pos_end, q_len-fn_pos_end) || + my_b_printf(&cache, "\n%s\n", print_event_info->delimiter)) + goto err; + } + else + { + if (my_b_write(&cache, (uchar*) query, q_len) || + my_b_printf(&cache, "\n%s\n", print_event_info->delimiter)) + goto err; + } + + if (!print_event_info->short_form) + my_b_printf(&cache, "# file_id: %d \n", file_id); + + return cache.flush_data(); +err: + return 1; +} + + + +const char str_binlog[]= "\nBINLOG '\n"; +const char fmt_delim[]= "'%s\n"; +const char fmt_n_delim[]= "\n'%s"; +const char fmt_frag[]= "\nSET @binlog_fragment_%d ='\n"; +const char fmt_binlog2[]= "BINLOG @binlog_fragment_0, @binlog_fragment_1%s\n"; + +/** + Print an event "body" cache to @c file possibly in two fragments. + Each fragement is optionally per @c do_wrap to produce an SQL statement. + + @param file a file to print to + @param body the "body" IO_CACHE of event + @param do_wrap whether to wrap base64-encoded strings with + SQL cover. + @param delimiter delimiter string + + @param is_verbose MDEV-10362 workraround parameter to pass + info on presence of verbose printout in cache encoded data + + The function signals on any error through setting @c body->error to -1. +*/ +bool copy_cache_to_file_wrapped(IO_CACHE *body, + FILE *file, + bool do_wrap, + const char *delimiter, + bool is_verbose /*TODO: remove */) +{ + const my_off_t cache_size= my_b_tell(body); + + if (reinit_io_cache(body, READ_CACHE, 0L, FALSE, FALSE)) + goto err; + + if (!do_wrap) + { + my_b_copy_to_file(body, file, SIZE_T_MAX); + } + else if (4 + sizeof(str_binlog) + cache_size + sizeof(fmt_delim) > + opt_binlog_rows_event_max_encoded_size) + { + /* + 2 fragments can always represent near 1GB row-based + base64-encoded event as two strings each of size less than + max(max_allowed_packet). Greater number of fragments does not + save from potential need to tweak (increase) @@max_allowed_packet + before to process the fragments. So 2 is safe and enough. + + Split the big query when its packet size's estimation exceeds a + limit. The estimate includes the maximum packet header + contribution of non-compressed packet. + */ + my_fprintf(file, fmt_frag, 0); + if (my_b_copy_to_file(body, file, (size_t) cache_size/2 + 1)) + goto err; + my_fprintf(file, fmt_n_delim, delimiter); + + my_fprintf(file, fmt_frag, 1); + if (my_b_copy_to_file(body, file, SIZE_T_MAX)) + goto err; + my_fprintf(file, fmt_delim, delimiter); + + my_fprintf(file, fmt_binlog2, delimiter); + } + else + { + my_fprintf(file, str_binlog); + if (my_b_copy_to_file(body, file, SIZE_T_MAX)) + goto err; + my_fprintf(file, fmt_delim, delimiter); + } + reinit_io_cache(body, WRITE_CACHE, 0, FALSE, TRUE); + + return false; + +err: + body->error = -1; + return true; +} + + +/** + Print an event "body" cache to @c file possibly in two fragments. + Each fragement is optionally per @c do_wrap to produce an SQL statement. + + @param file a file to print to + @param body the "body" IO_CACHE of event + @param do_wrap whether to wrap base64-encoded strings with + SQL cover. + @param delimiter delimiter string + + The function signals on any error through setting @c body->error to -1. +*/ +bool copy_cache_to_string_wrapped(IO_CACHE *cache, + LEX_STRING *to, + bool do_wrap, + const char *delimiter, + bool is_verbose) +{ + const my_off_t cache_size= my_b_tell(cache); + // contribution to total size estimate of formating + const size_t fmt_size= + sizeof(str_binlog) + 2*(sizeof(fmt_frag) + 2 /* %d */) + + sizeof(fmt_delim) + sizeof(fmt_n_delim) + + sizeof(fmt_binlog2) + + 3*PRINT_EVENT_INFO::max_delimiter_size; + + if (reinit_io_cache(cache, READ_CACHE, 0L, FALSE, FALSE)) + goto err; + + if (!(to->str= (char*) my_malloc(PSI_NOT_INSTRUMENTED, (size_t)cache->end_of_file + fmt_size, + MYF(0)))) + { + perror("Out of memory: can't allocate memory in " + "copy_cache_to_string_wrapped()."); + goto err; + } + + if (!do_wrap) + { + if (my_b_read(cache, (uchar*) to->str, + (to->length= (size_t)cache->end_of_file))) + goto err; + } + else if (4 + sizeof(str_binlog) + cache_size + sizeof(fmt_delim) > + opt_binlog_rows_event_max_encoded_size) + { + /* + 2 fragments can always represent near 1GB row-based + base64-encoded event as two strings each of size less than + max(max_allowed_packet). Greater number of fragments does not + save from potential need to tweak (increase) @@max_allowed_packet + before to process the fragments. So 2 is safe and enough. + + Split the big query when its packet size's estimation exceeds a + limit. The estimate includes the maximum packet header + contribution of non-compressed packet. + */ + char *str= to->str; + size_t add_to_len; + + str += (to->length= sprintf(str, fmt_frag, 0)); + if (my_b_read(cache, (uchar*) str, (uint32) (cache_size/2 + 1))) + goto err; + str += (add_to_len = (uint32) (cache_size/2 + 1)); + to->length += add_to_len; + str += (add_to_len= sprintf(str, fmt_n_delim, delimiter)); + to->length += add_to_len; + + str += (add_to_len= sprintf(str, fmt_frag, 1)); + to->length += add_to_len; + if (my_b_read(cache, (uchar*) str, uint32(cache->end_of_file - (cache_size/2 + 1)))) + goto err; + str += (add_to_len= uint32(cache->end_of_file - (cache_size/2 + 1))); + to->length += add_to_len; + { + str += (add_to_len= sprintf(str , fmt_delim, delimiter)); + to->length += add_to_len; + } + to->length += sprintf(str, fmt_binlog2, delimiter); + } + else + { + char *str= to->str; + + str += (to->length= sprintf(str, str_binlog)); + if (my_b_read(cache, (uchar*) str, (size_t)cache->end_of_file)) + goto err; + str += cache->end_of_file; + to->length += (size_t)cache->end_of_file; + to->length += sprintf(str , fmt_delim, delimiter); + } + + reinit_io_cache(cache, WRITE_CACHE, 0, FALSE, TRUE); + + return false; + +err: + cache->error= -1; + return true; +} + +/** + The function invokes base64 encoder to run on the current + event string and store the result into two caches. + When the event ends the current statement the caches are is copied into + the argument file. + Copying is also concerned how to wrap the event, specifically to produce + a valid SQL syntax. + When the encoded data size is within max(MAX_ALLOWED_PACKET) + a regular BINLOG query is composed. Otherwise it is build as fragmented + + SET @binlog_fragment_0='...'; + SET @binlog_fragment_1='...'; + BINLOG @binlog_fragment_0, @binlog_fragment_1; + + where fragments are represented by a pair of indexed user + "one shot" variables. + + @note + If any changes made don't forget to duplicate them to + Old_rows_log_event as long as it's supported. + + @param file pointer to IO_CACHE + @param print_event_info pointer to print_event_info specializing + what out of and how to print the event + @param name the name of a table that the event operates on + + The function signals on any error of cache access through setting + that cache's @c error to -1. +*/ +bool Rows_log_event::print_helper(FILE *file, + PRINT_EVENT_INFO *print_event_info, + char const *const name) +{ + IO_CACHE *const head= &print_event_info->head_cache; + IO_CACHE *const body= &print_event_info->body_cache; + IO_CACHE *const tail= &print_event_info->tail_cache; +#ifdef WHEN_FLASHBACK_REVIEW_READY + IO_CACHE *const sql= &print_event_info->review_sql_cache; +#endif + bool do_print_encoded= + print_event_info->base64_output_mode != BASE64_OUTPUT_NEVER && + print_event_info->base64_output_mode != BASE64_OUTPUT_DECODE_ROWS && + !print_event_info->short_form; + bool const last_stmt_event= get_flags(STMT_END_F); + + if (!print_event_info->short_form) + { + char llbuff[22]; + + print_header(head, print_event_info, !last_stmt_event); + if (my_b_printf(head, "\t%s: table id %s%s\n", + name, ullstr(m_table_id, llbuff), + last_stmt_event ? " flags: STMT_END_F" : "")) + goto err; + } + if (!print_event_info->short_form || print_event_info->print_row_count) + if (print_base64(body, print_event_info, do_print_encoded)) + goto err; + + if (last_stmt_event) + { + if (!is_flashback) + { + if (copy_event_cache_to_file_and_reinit(head, file) || + copy_cache_to_file_wrapped(body, file, do_print_encoded, + print_event_info->delimiter, + print_event_info->verbose) || + copy_event_cache_to_file_and_reinit(tail, file)) + goto err; + } + else + { + LEX_STRING tmp_str; + + if (copy_event_cache_to_string_and_reinit(head, &tmp_str)) + return 1; + output_buf.append(tmp_str.str, tmp_str.length); // Not \0 terminated); + my_free(tmp_str.str); + + if (copy_cache_to_string_wrapped(body, &tmp_str, do_print_encoded, + print_event_info->delimiter, + print_event_info->verbose)) + return 1; + output_buf.append(tmp_str.str, tmp_str.length); + my_free(tmp_str.str); + if (copy_event_cache_to_string_and_reinit(tail, &tmp_str)) + return 1; + output_buf.append(tmp_str.str, tmp_str.length); + my_free(tmp_str.str); + +#ifdef WHEN_FLASHBACK_REVIEW_READY + if (copy_event_cache_to_string_and_reinit(sql, &tmp_str)) + return 1; + output_buf.append(tmp_str.str, tmp_str.length); + my_free(tmp_str.str); +#endif + } + } + + return 0; +err: + return 1; +} + + +bool Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo) +{ + char *pbeg; // beginning of the next line + char *pend; // end of the next line + uint cnt= 0; // characters counter + + if (!pinfo->short_form) + { + if (print_header(&pinfo->head_cache, pinfo, TRUE) || + my_b_printf(&pinfo->head_cache, "\tAnnotate_rows:\n")) + goto err; + } + else if (my_b_printf(&pinfo->head_cache, "# Annotate_rows:\n")) + goto err; + + for (pbeg= m_query_txt; ; pbeg= pend) + { + // skip all \r's and \n's at the beginning of the next line + for (;; pbeg++) + { + if (++cnt > m_query_len) + return 0; + + if (*pbeg != '\r' && *pbeg != '\n') + break; + } + + // find end of the next line + for (pend= pbeg + 1; + ++cnt <= m_query_len && *pend != '\r' && *pend != '\n'; + pend++) + ; + + // print next line + if (my_b_write(&pinfo->head_cache, (const uchar*) "#Q> ", 4) || + my_b_write(&pinfo->head_cache, (const uchar*) pbeg, pend - pbeg) || + my_b_write(&pinfo->head_cache, (const uchar*) "\n", 1)) + goto err; + } + + return 0; +err: + return 1; +} + + +/* + Rewrite database name for the event to name specified by new_db + SYNOPSIS + new_db Database name to change to + new_len Length + desc Event describing binlog that we're writing to. + + DESCRIPTION + Reset db name. This function assumes that temp_buf member contains event + representation taken from a binary log. It resets m_dbnam and m_dblen and + rewrites temp_buf with new db name. + + RETURN + 0 - Success + other - Error +*/ + +int Table_map_log_event::rewrite_db(const char* new_db, size_t new_len, + const Format_description_log_event* desc) +{ + DBUG_ENTER("Table_map_log_event::rewrite_db"); + DBUG_ASSERT(temp_buf); + + uint header_len= MY_MIN(desc->common_header_len, + LOG_EVENT_MINIMAL_HEADER_LEN) + TABLE_MAP_HEADER_LEN; + int len_diff; + + if (!(len_diff= (int)(new_len - m_dblen))) + { + memcpy((void*) (temp_buf + header_len + 1), new_db, m_dblen + 1); + memcpy((void*) m_dbnam, new_db, m_dblen + 1); + DBUG_RETURN(0); + } + + // Create new temp_buf + ulong event_cur_len= uint4korr(temp_buf + EVENT_LEN_OFFSET); + ulong event_new_len= event_cur_len + len_diff; + uchar* new_temp_buf= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED, event_new_len, + MYF(MY_WME)); + + if (!new_temp_buf) + { + sql_print_error("Table_map_log_event::rewrite_db: " + "failed to allocate new temp_buf (%d bytes required)", + event_new_len); + DBUG_RETURN(-1); + } + + // Rewrite temp_buf + uchar *ptr= new_temp_buf; + size_t cnt= 0; + + // Copy header and change event length + memcpy(ptr, temp_buf, header_len); + int4store(ptr + EVENT_LEN_OFFSET, event_new_len); + ptr += header_len; + cnt += header_len; + + // Write new db name length and new name + DBUG_ASSERT(new_len < 0xff); + *ptr++ = (char)new_len; + memcpy(ptr, new_db, new_len + 1); + ptr += new_len + 1; + cnt += m_dblen + 2; + + // Copy rest part + memcpy(ptr, temp_buf + cnt, event_cur_len - cnt); + + // Reregister temp buf + free_temp_buf(); + register_temp_buf(new_temp_buf, TRUE); + + // Reset m_dbnam and m_dblen members + m_dblen= new_len; + + // m_dbnam resides in m_memory together with m_tblnam and m_coltype + uchar* memory= m_memory; + char const* tblnam= m_tblnam; + uchar* coltype= m_coltype; + + m_memory= (uchar*) my_multi_malloc(PSI_NOT_INSTRUMENTED, MYF(MY_WME), + &m_dbnam, (uint) m_dblen + 1, + &m_tblnam, (uint) m_tbllen + 1, + &m_coltype, (uint) m_colcnt, + NullS); + + if (!m_memory) + { + sql_print_error("Table_map_log_event::rewrite_db: " + "failed to allocate new m_memory (%d + %d + %d bytes required)", + m_dblen + 1, m_tbllen + 1, m_colcnt); + DBUG_RETURN(-1); + } + + memcpy((void*)m_dbnam, new_db, m_dblen + 1); + memcpy((void*)m_tblnam, tblnam, m_tbllen + 1); + memcpy(m_coltype, coltype, m_colcnt); + + my_free(memory); + DBUG_RETURN(0); +} + + +bool Table_map_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info) +{ + if (!print_event_info->short_form) + { + char llbuff[22]; + + print_header(&print_event_info->head_cache, print_event_info, TRUE); + if (my_b_printf(&print_event_info->head_cache, + "\tTable_map: %`s.%`s mapped to number %s%s\n", + m_dbnam, m_tblnam, ullstr(m_table_id, llbuff), + ((m_flags & TM_BIT_HAS_TRIGGERS_F) ? + " (has triggers)" : ""))) + goto err; + } + if (!print_event_info->short_form || print_event_info->print_row_count) + { + + if (print_event_info->print_table_metadata) + { + Optional_metadata_fields fields(m_optional_metadata, + m_optional_metadata_len); + + print_columns(&print_event_info->head_cache, fields); + print_primary_key(&print_event_info->head_cache, fields); + } + bool do_print_encoded= + print_event_info->base64_output_mode != BASE64_OUTPUT_NEVER && + print_event_info->base64_output_mode != BASE64_OUTPUT_DECODE_ROWS && + !print_event_info->short_form; + + if (print_base64(&print_event_info->body_cache, print_event_info, + do_print_encoded) || + copy_event_cache_to_file_and_reinit(&print_event_info->head_cache, + file)) + goto err; + } + + return 0; +err: + return 1; +} + +/** + Interface for iterator over charset columns. +*/ +class Table_map_log_event::Charset_iterator +{ + public: + typedef Table_map_log_event::Optional_metadata_fields::Default_charset + Default_charset; + virtual const CHARSET_INFO *next()= 0; + virtual ~Charset_iterator(){}; + /** + Factory method to create an instance of the appropriate subclass. + */ + static std::unique_ptr create_charset_iterator( + const Default_charset &default_charset, + const std::vector &column_charset); +}; + +/** + Implementation of charset iterator for the DEFAULT_CHARSET type. +*/ +class Table_map_log_event::Default_charset_iterator : public Charset_iterator +{ + public: + Default_charset_iterator(const Default_charset &default_charset) + : m_iterator(default_charset.charset_pairs.begin()), + m_end(default_charset.charset_pairs.end()), + m_column_index(0), + m_default_charset_info( + get_charset(default_charset.default_charset, 0)) {} + + const CHARSET_INFO *next() override { + const CHARSET_INFO *ret; + if (m_iterator != m_end && m_iterator->first == m_column_index) { + ret = get_charset(m_iterator->second, 0); + m_iterator++; + } else + ret = m_default_charset_info; + m_column_index++; + return ret; + } + ~Default_charset_iterator(){}; + + private: + std::vector::const_iterator m_iterator, + m_end; + uint m_column_index; + const CHARSET_INFO *m_default_charset_info; +}; +//Table_map_log_event::Default_charset_iterator::~Default_charset_iterator(){int a=8;a++; a--;}; +/** + Implementation of charset iterator for the COLUMNT_CHARSET type. +*/ +class Table_map_log_event::Column_charset_iterator : public Charset_iterator +{ + public: + Column_charset_iterator(const std::vector &column_charset) + : m_iterator(column_charset.begin()), m_end(column_charset.end()) {} + + const CHARSET_INFO *next() override { + const CHARSET_INFO *ret = nullptr; + if (m_iterator != m_end) { + ret = get_charset(*m_iterator, 0); + m_iterator++; + } + return ret; + } + + ~Column_charset_iterator(){}; + private: + std::vector::const_iterator m_iterator; + std::vector::const_iterator m_end; +}; +//Table_map_log_event::Column_charset_iterator::~Column_charset_iterator(){int a=8;a++; a--;}; + +std::unique_ptr +Table_map_log_event::Charset_iterator::create_charset_iterator( + const Default_charset &default_charset, + const std::vector &column_charset) +{ + if (!default_charset.empty()) + return std::unique_ptr( + new Default_charset_iterator(default_charset)); + else + return std::unique_ptr( + new Column_charset_iterator(column_charset)); +} +/** + return the string name of a type. + + @param[in] type type of a column + @param[in|out] meta_ptr the meta_ptr of the column. If the type doesn't have + metadata, it will not change meta_ptr, otherwise + meta_ptr will be moved to the end of the column's + metadat. + @param[in] cs charset of the column if it is a character column. + @param[out] typestr buffer to storing the string name of the type + @param[in] typestr_length length of typestr + @param[in] geometry_type internal geometry_type + */ +static void get_type_name(uint type, unsigned char** meta_ptr, + const CHARSET_INFO *cs, char *typestr, + uint typestr_length, unsigned int geometry_type) +{ + switch (type) { + case MYSQL_TYPE_LONG: + my_snprintf(typestr, typestr_length, "%s", "INT"); + break; + case MYSQL_TYPE_TINY: + my_snprintf(typestr, typestr_length, "TINYINT"); + break; + case MYSQL_TYPE_SHORT: + my_snprintf(typestr, typestr_length, "SMALLINT"); + break; + case MYSQL_TYPE_INT24: + my_snprintf(typestr, typestr_length, "MEDIUMINT"); + break; + case MYSQL_TYPE_LONGLONG: + my_snprintf(typestr, typestr_length, "BIGINT"); + break; + case MYSQL_TYPE_NEWDECIMAL: + my_snprintf(typestr, typestr_length, "DECIMAL(%d,%d)", + (*meta_ptr)[0], (*meta_ptr)[1]); + (*meta_ptr)+= 2; + break; + case MYSQL_TYPE_FLOAT: + my_snprintf(typestr, typestr_length, "FLOAT"); + (*meta_ptr)++; + break; + case MYSQL_TYPE_DOUBLE: + my_snprintf(typestr, typestr_length, "DOUBLE"); + (*meta_ptr)++; + break; + case MYSQL_TYPE_BIT: + my_snprintf(typestr, typestr_length, "BIT(%d)", + (((*meta_ptr)[0])) + (*meta_ptr)[1]*8); + (*meta_ptr)+= 2; + break; + case MYSQL_TYPE_TIMESTAMP2: + if (**meta_ptr != 0) + my_snprintf(typestr, typestr_length, "TIMESTAMP(%d)", **meta_ptr); + else + my_snprintf(typestr, typestr_length, "TIMESTAMP"); + (*meta_ptr)++; + break; + case MYSQL_TYPE_DATETIME2: + if (**meta_ptr != 0) + my_snprintf(typestr, typestr_length, "DATETIME(%d)", **meta_ptr); + else + my_snprintf(typestr, typestr_length, "DATETIME"); + (*meta_ptr)++; + break; + case MYSQL_TYPE_TIME2: + if (**meta_ptr != 0) + my_snprintf(typestr, typestr_length, "TIME(%d)", **meta_ptr); + else + my_snprintf(typestr, typestr_length, "TIME"); + (*meta_ptr)++; + break; + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + my_snprintf(typestr, typestr_length, "DATE"); + break; + case MYSQL_TYPE_YEAR: + my_snprintf(typestr, typestr_length, "YEAR"); + break; + case MYSQL_TYPE_ENUM: + my_snprintf(typestr, typestr_length, "ENUM"); + (*meta_ptr)+= 2; + break; + case MYSQL_TYPE_SET: + my_snprintf(typestr, typestr_length, "SET"); + (*meta_ptr)+= 2; + break; + case MYSQL_TYPE_BLOB: + { + bool is_text= (cs && cs->number != my_charset_bin.number); + const char *names[5][2] = { + {"INVALID_BLOB(%d)", "INVALID_TEXT(%d)"}, + {"TINYBLOB", "TINYTEXT"}, + {"BLOB", "TEXT"}, + {"MEDIUMBLOB", "MEDIUMTEXT"}, + {"LONGBLOB", "LONGTEXT"} + }; + unsigned char size= **meta_ptr; + + if (size == 0 || size > 4) + my_snprintf(typestr, typestr_length, names[0][is_text], size); + else + my_snprintf(typestr, typestr_length, names[**meta_ptr][is_text]); + + (*meta_ptr)++; + } + break; + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + if (cs && cs->number != my_charset_bin.number) + my_snprintf(typestr, typestr_length, "VARCHAR(%d)", + uint2korr(*meta_ptr)/cs->mbmaxlen); + else + my_snprintf(typestr, typestr_length, "VARBINARY(%d)", + uint2korr(*meta_ptr)); + + (*meta_ptr)+= 2; + break; + case MYSQL_TYPE_STRING: + { + uint byte0= (*meta_ptr)[0]; + uint byte1= (*meta_ptr)[1]; + uint len= (((byte0 & 0x30) ^ 0x30) << 4) | byte1; + + if (cs && cs->number != my_charset_bin.number) + my_snprintf(typestr, typestr_length, "CHAR(%d)", len/cs->mbmaxlen); + else + my_snprintf(typestr, typestr_length, "BINARY(%d)", len); + + (*meta_ptr)+= 2; + } + break; + case MYSQL_TYPE_GEOMETRY: + { + const char* names[8] = { + "GEOMETRY", "POINT", "LINESTRING", "POLYGON", "MULTIPOINT", + "MULTILINESTRING", "MULTIPOLYGON", "GEOMETRYCOLLECTION" + }; + if (geometry_type < 8) + my_snprintf(typestr, typestr_length, names[geometry_type]); + else + my_snprintf(typestr, typestr_length, "INVALID_GEOMETRY_TYPE(%u)", + geometry_type); + (*meta_ptr)++; + } + break; + default: + *typestr= 0; + break; + } +} + +void Table_map_log_event::print_columns(IO_CACHE *file, + const Optional_metadata_fields &fields) +{ + unsigned char* field_metadata_ptr= m_field_metadata; + std::vector::const_iterator signedness_it= fields.m_signedness.begin(); + + std::unique_ptr charset_it = + Charset_iterator::create_charset_iterator(fields.m_default_charset, + fields.m_column_charset); + std::unique_ptr enum_and_set_charset_it = + Charset_iterator::create_charset_iterator( + fields.m_enum_and_set_default_charset, + fields.m_enum_and_set_column_charset); + std::vector::const_iterator col_names_it= + fields.m_column_name.begin(); + std::vector::const_iterator + set_str_values_it= fields.m_set_str_value.begin(); + std::vector::const_iterator + enum_str_values_it= fields.m_enum_str_value.begin(); + std::vector::const_iterator geometry_type_it= + fields.m_geometry_type.begin(); + + uint geometry_type= 0; + + my_b_printf(file, "# Columns("); + + for (unsigned long i= 0; i < m_colcnt; i++) + { + uint real_type = m_coltype[i]; + if (real_type == MYSQL_TYPE_STRING && + (*field_metadata_ptr == MYSQL_TYPE_ENUM || + *field_metadata_ptr == MYSQL_TYPE_SET)) + real_type= *field_metadata_ptr; + + // Get current column's collation id if it is a character, enum, + // or set column + const CHARSET_INFO *cs = NULL; + if (is_character_type(real_type)) + cs = charset_it->next(); + else if (is_enum_or_set_type(real_type)) + cs = enum_and_set_charset_it->next(); + + // Print column name + if (col_names_it != fields.m_column_name.end()) + { + pretty_print_identifier(file, col_names_it->c_str(), col_names_it->size()); + my_b_printf(file, " "); + col_names_it++; + } + + + // update geometry_type for geometry columns + if (real_type == MYSQL_TYPE_GEOMETRY) + { + geometry_type= (geometry_type_it != fields.m_geometry_type.end()) ? + *geometry_type_it++ : 0; + } + + // print column type + const uint TYPE_NAME_LEN = 100; + char type_name[TYPE_NAME_LEN]; + get_type_name(real_type, &field_metadata_ptr, cs, type_name, + TYPE_NAME_LEN, geometry_type); + + if (type_name[0] == '\0') + { + my_b_printf(file, "INVALID_TYPE(%d)", real_type); + continue; + } + my_b_printf(file, "%s", type_name); + + // Print UNSIGNED for numeric column + if (is_numeric_type(real_type) && + signedness_it != fields.m_signedness.end()) + { + if (*signedness_it == true) + my_b_printf(file, " UNSIGNED"); + signedness_it++; + } + + // if the column is not marked as 'null', print 'not null' + if (!(m_null_bits[(i / 8)] & (1 << (i % 8)))) + my_b_printf(file, " NOT NULL"); + + // Print string values of SET and ENUM column + const Optional_metadata_fields::str_vector *str_values= NULL; + if (real_type == MYSQL_TYPE_ENUM && + enum_str_values_it != fields.m_enum_str_value.end()) + { + str_values= &(*enum_str_values_it); + enum_str_values_it++; + } + else if (real_type == MYSQL_TYPE_SET && + set_str_values_it != fields.m_set_str_value.end()) + { + str_values= &(*set_str_values_it); + set_str_values_it++; + } + + if (str_values != NULL) + { + const char *separator= "("; + for (Optional_metadata_fields::str_vector::const_iterator it= + str_values->begin(); it != str_values->end(); it++) + { + my_b_printf(file, "%s", separator); + pretty_print_str(file, it->c_str(), it->size()); + separator= ","; + } + my_b_printf(file, ")"); + } + // Print column character set, except in text columns with binary collation + if (cs != NULL && + (is_enum_or_set_type(real_type) || cs->number != my_charset_bin.number)) + my_b_printf(file, " CHARSET %s COLLATE %s", cs->cs_name.str, + cs->coll_name.str); + if (i != m_colcnt - 1) my_b_printf(file, ",\n# "); + } + my_b_printf(file, ")"); + my_b_printf(file, "\n"); +} + +void Table_map_log_event::print_primary_key + (IO_CACHE *file,const Optional_metadata_fields &fields) +{ + if (!fields.m_primary_key.empty()) + { + my_b_printf(file, "# Primary Key("); + + std::vector::const_iterator it= + fields.m_primary_key.begin(); + + for (; it != fields.m_primary_key.end(); it++) + { + if (it != fields.m_primary_key.begin()) + my_b_printf(file, ", "); + + // Print column name or column index + if (it->first >= fields.m_column_name.size()) + my_b_printf(file, "%u", it->first); + else + my_b_printf(file, "%s", fields.m_column_name[it->first].c_str()); + + // Print prefix length + if (it->second != 0) + my_b_printf(file, "(%u)", it->second); + } + + my_b_printf(file, ")\n"); + } +} + + +bool Write_rows_log_event::print(FILE *file, PRINT_EVENT_INFO* print_event_info) +{ + DBUG_EXECUTE_IF("simulate_cache_read_error", + {DBUG_SET("+d,simulate_my_b_fill_error");}); + return Rows_log_event::print_helper(file, print_event_info, is_flashback ? "Delete_rows" : "Write_rows"); +} + +bool Write_rows_compressed_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + uchar *new_buf; + ulong len; + bool is_malloc = false; + if(!row_log_event_uncompress(glob_description_event, + checksum_alg == BINLOG_CHECKSUM_ALG_CRC32, + temp_buf, UINT_MAX32, NULL, 0, &is_malloc, + &new_buf, &len)) + { + free_temp_buf(); + register_temp_buf(new_buf, true); + if (Rows_log_event::print_helper(file, print_event_info, + "Write_compressed_rows")) + goto err; + } + else + { + if (my_b_printf(&print_event_info->head_cache, + "ERROR: uncompress write_compressed_rows failed\n")) + goto err; + } + + return 0; +err: + return 1; +} + + +bool Delete_rows_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + return Rows_log_event::print_helper(file, print_event_info, is_flashback ? "Write_rows" : "Delete_rows"); +} + + +bool Delete_rows_compressed_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + uchar *new_buf; + ulong len; + bool is_malloc = false; + if(!row_log_event_uncompress(glob_description_event, + checksum_alg == BINLOG_CHECKSUM_ALG_CRC32, + temp_buf, UINT_MAX32, NULL, 0, &is_malloc, + &new_buf, &len)) + { + free_temp_buf(); + register_temp_buf(new_buf, true); + if (Rows_log_event::print_helper(file, print_event_info, + "Delete_compressed_rows")) + goto err; + } + else + { + if (my_b_printf(&print_event_info->head_cache, + "ERROR: uncompress delete_compressed_rows failed\n")) + goto err; + } + + return 0; +err: + return 1; +} + + +bool Update_rows_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + return Rows_log_event::print_helper(file, print_event_info, "Update_rows"); +} + +bool +Update_rows_compressed_log_event::print(FILE *file, + PRINT_EVENT_INFO *print_event_info) +{ + uchar *new_buf; + ulong len; + bool is_malloc= false; + if(!row_log_event_uncompress(glob_description_event, + checksum_alg == BINLOG_CHECKSUM_ALG_CRC32, + temp_buf, UINT_MAX32, NULL, 0, &is_malloc, + &new_buf, &len)) + { + free_temp_buf(); + register_temp_buf(new_buf, true); + if (Rows_log_event::print_helper(file, print_event_info, + "Update_compressed_rows")) + goto err; + } + else + { + if (my_b_printf(&print_event_info->head_cache, + "ERROR: uncompress update_compressed_rows failed\n")) + goto err; + } + + return 0; +err: + return 1; +} + + +bool Incident_log_event::print(FILE *file, + PRINT_EVENT_INFO *print_event_info) +{ + if (print_event_info->short_form) + return 0; + + Write_on_release_cache cache(&print_event_info->head_cache, file); + + if (print_header(&cache, print_event_info, FALSE) || + my_b_printf(&cache, "\n# Incident: %s\nRELOAD DATABASE; # Shall generate syntax error\n", description())) + return 1; + return cache.flush_data(); +} + + +/* Print for its unrecognized ignorable event */ +bool Ignorable_log_event::print(FILE *file, + PRINT_EVENT_INFO *print_event_info) +{ + if (print_event_info->short_form) + return 0; + + if (print_header(&print_event_info->head_cache, print_event_info, FALSE) || + my_b_printf(&print_event_info->head_cache, "\tIgnorable\n") || + my_b_printf(&print_event_info->head_cache, + "# Ignorable event type %d (%s)\n", number, description) || + copy_event_cache_to_file_and_reinit(&print_event_info->head_cache, + file)) + return 1; + return 0; +} + + +/** + The default values for these variables should be values that are + *incorrect*, i.e., values that cannot occur in an event. This way, + they will always be printed for the first event. +*/ +st_print_event_info::st_print_event_info() +{ + myf const flags = MYF(MY_WME | MY_NABP); + /* + Currently we only use static PRINT_EVENT_INFO objects, so zeroed at + program's startup, but these explicit bzero() is for the day someone + creates dynamic instances. + */ + bzero(db, sizeof(db)); + bzero(charset, sizeof(charset)); + bzero(time_zone_str, sizeof(time_zone_str)); + delimiter[0]= ';'; + delimiter[1]= 0; + flags2_inited= 0; + flags2= 0; + sql_mode_inited= 0; + row_events= 0; + sql_mode= 0; + auto_increment_increment= 0; + auto_increment_offset= 0; + charset_inited= 0; + lc_time_names_number= ~0; + charset_database_number= ILLEGAL_CHARSET_INFO_NUMBER; + thread_id= 0; + server_id= 0; + domain_id= 0; + thread_id_printed= false; + server_id_printed= false; + domain_id_printed= false; + allow_parallel= true; + allow_parallel_printed= false; + found_row_event= false; + print_row_count= false; + short_form= false; + skip_replication= 0; + printed_fd_event=FALSE; + file= 0; + base64_output_mode=BASE64_OUTPUT_UNSPEC; + m_is_event_group_active= TRUE; + m_is_event_group_filtering_enabled= FALSE; + open_cached_file(&head_cache, NULL, NULL, 0, flags); + open_cached_file(&body_cache, NULL, NULL, 0, flags); + open_cached_file(&tail_cache, NULL, NULL, 0, flags); +#ifdef WHEN_FLASHBACK_REVIEW_READY + open_cached_file(&review_sql_cache, NULL, NULL, 0, flags); +#endif +} + + +bool copy_event_cache_to_string_and_reinit(IO_CACHE *cache, LEX_STRING *to) +{ + reinit_io_cache(cache, READ_CACHE, 0L, FALSE, FALSE); + if (cache->end_of_file > SIZE_T_MAX || + !(to->str= (char*) my_malloc(PSI_NOT_INSTRUMENTED, (to->length= (size_t)cache->end_of_file), MYF(0)))) + { + perror("Out of memory: can't allocate memory in copy_event_cache_to_string_and_reinit()."); + goto err; + } + if (my_b_read(cache, (uchar*) to->str, to->length)) + { + my_free(to->str); + perror("Can't read data from IO_CACHE"); + return true; + } + reinit_io_cache(cache, WRITE_CACHE, 0, FALSE, TRUE); + return false; + +err: + to->str= 0; + to->length= 0; + return true; +} + + +bool +Gtid_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F, this); + char buf[21]; + char buf2[21]; + + if (!print_event_info->short_form && !is_flashback) + { + print_header(&cache, print_event_info, FALSE); + longlong10_to_str(seq_no, buf, 10); + if (my_b_printf(&cache, "\tGTID %u-%u-%s", domain_id, server_id, buf)) + goto err; + if (flags2 & FL_GROUP_COMMIT_ID) + { + longlong10_to_str(commit_id, buf2, 10); + if (my_b_printf(&cache, " cid=%s", buf2)) + goto err; + } + if (flags2 & FL_DDL) + if (my_b_write_string(&cache, " ddl")) + goto err; + if (flags2 & FL_TRANSACTIONAL) + if (my_b_write_string(&cache, " trans")) + goto err; + if (flags2 & FL_WAITED) + if (my_b_write_string(&cache, " waited")) + goto err; + if (flags_extra & FL_START_ALTER_E1) + if (my_b_write_string(&cache, " START ALTER")) + goto err; + if (flags_extra & FL_COMMIT_ALTER_E1) + if (my_b_printf(&cache, " COMMIT ALTER id= %lu", sa_seq_no)) + goto err; + if (flags_extra & FL_ROLLBACK_ALTER_E1) + if (my_b_printf(&cache, " ROLLBACK ALTER id= %lu", sa_seq_no)) + goto err; + if (my_b_printf(&cache, "\n")) + goto err; + + if (!print_event_info->allow_parallel_printed || + print_event_info->allow_parallel != !!(flags2 & FL_ALLOW_PARALLEL)) + { + if (my_b_printf(&cache, + "/*!100101 SET @@session.skip_parallel_replication=%u*/%s\n", + !(flags2 & FL_ALLOW_PARALLEL), + print_event_info->delimiter)) + goto err; + print_event_info->allow_parallel= !!(flags2 & FL_ALLOW_PARALLEL); + print_event_info->allow_parallel_printed= true; + } + + if (!print_event_info->domain_id_printed || + print_event_info->domain_id != domain_id) + { + if (my_b_printf(&cache, + "/*!100001 SET @@session.gtid_domain_id=%u*/%s\n", + domain_id, print_event_info->delimiter)) + goto err; + print_event_info->domain_id= domain_id; + print_event_info->domain_id_printed= true; + } + + if (!print_event_info->server_id_printed || + print_event_info->server_id != server_id) + { + if (my_b_printf(&cache, "/*!100001 SET @@session.server_id=%u*/%s\n", + server_id, print_event_info->delimiter)) + goto err; + print_event_info->server_id= server_id; + print_event_info->server_id_printed= true; + } + + if (!is_flashback) + if (my_b_printf(&cache, "/*!100001 SET @@session.gtid_seq_no=%s*/%s\n", + buf, print_event_info->delimiter)) + goto err; + } + if ((flags2 & FL_PREPARED_XA) && !is_flashback) + { + my_b_write_string(&cache, "XA START "); + xid.serialize(); + my_b_write(&cache, (uchar*) xid.buf, strlen(xid.buf)); + if (my_b_printf(&cache, "%s\n", print_event_info->delimiter)) + goto err; + } + else if (!(flags2 & FL_STANDALONE)) + { + if (my_b_printf(&cache, is_flashback ? "COMMIT\n%s\n" : + "START TRANSACTION\n%s\n", print_event_info->delimiter)) + goto err; + } + + return cache.flush_data(); +err: + return 1; +} + +bool XA_prepare_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + Write_on_release_cache cache(&print_event_info->head_cache, file, + Write_on_release_cache::FLUSH_F, this); + m_xid.serialize(); + + if (!print_event_info->short_form) + { + print_header(&cache, print_event_info, FALSE); + if (my_b_printf(&cache, "\tXID = %s\n", m_xid.buf)) + goto error; + } + + if (my_b_printf(&cache, "XA PREPARE %s\n%s\n", + m_xid.buf, print_event_info->delimiter)) + goto error; + + return cache.flush_data(); +error: + return TRUE; +} diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc new file mode 100644 index 00000000..19901035 --- /dev/null +++ b/sql/log_event_old.cc @@ -0,0 +1,2749 @@ +/* Copyright (c) 2007, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2019, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#ifndef MYSQL_CLIENT +#include "unireg.h" +#endif +#include "log_event.h" +#ifndef MYSQL_CLIENT +#include "sql_cache.h" // QUERY_CACHE_FLAGS_SIZE +#include "sql_base.h" // close_tables_for_reopen +#include "key.h" // key_copy +#include "lock.h" // mysql_unlock_tables +#include "rpl_rli.h" +#include "rpl_utility.h" +#endif +#include "log_event_old.h" +#include "rpl_record_old.h" +#include "transaction.h" + +PSI_memory_key key_memory_log_event_old; + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + +// Old implementation of do_apply_event() +int +Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) +{ + DBUG_ENTER("Old_rows_log_event::do_apply_event(st_relay_log_info*)"); + int error= 0; + THD *ev_thd= ev->thd; + uchar const *row_start= ev->m_rows_buf; + const Relay_log_info *rli= rgi->rli; + + /* + If m_table_id == ~0UL, then we have a dummy event that does not + contain any data. In that case, we just remove all tables in the + tables_to_lock list, close the thread tables, and return with + success. + */ + if (ev->m_table_id == ~0UL) + { + /* + This one is supposed to be set: just an extra check so that + nothing strange has happened. + */ + DBUG_ASSERT(ev->get_flags(Old_rows_log_event::STMT_END_F)); + + rgi->slave_close_thread_tables(ev_thd); + ev_thd->clear_error(); + DBUG_RETURN(0); + } + + /* + 'ev_thd' has been set by exec_relay_log_event(), just before calling + do_apply_event(). We still check here to prevent future coding + errors. + */ + DBUG_ASSERT(rgi->thd == ev_thd); + + /* + If there is no locks taken, this is the first binrow event seen + after the table map events. We should then lock all the tables + used in the transaction and proceed with execution of the actual + event. + */ + if (!ev_thd->lock) + { + /* + Lock_tables() reads the contents of ev_thd->lex, so they must be + initialized. + + We also call the THD::reset_for_next_command(), since this + is the logical start of the next "statement". Note that this + call might reset the value of current_stmt_binlog_format, so + we need to do any changes to that value after this function. + */ + delete_explain_query(thd->lex); + lex_start(ev_thd); + ev_thd->reset_for_next_command(); + + /* + This is a row injection, so we flag the "statement" as + such. Note that this code is called both when the slave does row + injections and when the BINLOG statement is used to do row + injections. + */ + ev_thd->lex->set_stmt_row_injection(); + + if (unlikely(open_and_lock_tables(ev_thd, rgi->tables_to_lock, FALSE, 0))) + { + if (ev_thd->is_error()) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications. + We should not honour --slave-skip-errors at this point as we are + having severe errors which should not be skipped. + */ + rli->report(ERROR_LEVEL, ev_thd->get_stmt_da()->sql_errno(), NULL, + "Error '%s' on opening tables", + ev_thd->get_stmt_da()->message()); + ev_thd->is_slave_error= 1; + } + DBUG_RETURN(1); + } + + /* + When the open and locking succeeded, we check all tables to + ensure that they still have the correct type. + */ + + { + TABLE_LIST *table_list_ptr= rgi->tables_to_lock; + for (uint i=0 ; table_list_ptr&& (i< rgi->tables_to_lock_count); + table_list_ptr= table_list_ptr->next_global, i++) + { + /* + Please see comment in log_event.cc-Rows_log_event::do_apply_event() + function for the explanation of the below if condition + */ + if (table_list_ptr->parent_l) + continue; + /* + We can use a down cast here since we know that every table added + to the tables_to_lock is a RPL_TABLE_LIST(or child table which is + skipped above). + */ + RPL_TABLE_LIST *ptr=static_cast(table_list_ptr); + DBUG_ASSERT(ptr->m_tabledef_valid); + TABLE *conv_table; + if (!ptr->m_tabledef.compatible_with(thd, rgi, ptr->table, &conv_table)) + { + ev_thd->is_slave_error= 1; + rgi->slave_close_thread_tables(ev_thd); + DBUG_RETURN(Old_rows_log_event::ERR_BAD_TABLE_DEF); + } + DBUG_PRINT("debug", ("Table: %s.%s is compatible with master" + " - conv_table: %p", + ptr->table->s->db.str, + ptr->table->s->table_name.str, conv_table)); + ptr->m_conv_table= conv_table; + } + } + + /* + ... and then we add all the tables to the table map and remove + them from tables to lock. + + We also invalidate the query cache for all the tables, since + they will now be changed. + + TODO [/Matz]: Maybe the query cache should not be invalidated + here? It might be that a table is not changed, even though it + was locked for the statement. We do know that each + Old_rows_log_event contain at least one row, so after processing one + Old_rows_log_event, we can invalidate the query cache for the + associated table. + */ + TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i=0; ptr && (i < rgi->tables_to_lock_count); ptr= ptr->next_global, i++) + { + /* + Please see comment in log_event.cc-Rows_log_event::do_apply_event() + function for the explanation of the below if condition + */ + if (ptr->parent_l) + continue; + rgi->m_table_map.set_table(ptr->table_id, ptr->table); + } +#ifdef HAVE_QUERY_CACHE + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); +#endif + } + + TABLE* table= rgi->m_table_map.get_table(ev->m_table_id); + + if (table) + { + /* + table == NULL means that this table should not be replicated + (this was set up by Table_map_log_event::do_apply_event() + which tested replicate-* rules). + */ + + /* + It's not needed to set_time() but + 1) it continues the property that "Time" in SHOW PROCESSLIST shows how + much slave is behind + 2) it will be needed when we allow replication from a table with no + TIMESTAMP column to a table with one. + So we call set_time(), like in SBR. Presently it changes nothing. + */ + ev_thd->set_time(ev->when, ev->when_sec_part); + /* + There are a few flags that are replicated with each row event. + Make sure to set/clear them before executing the main body of + the event. + */ + if (ev->get_flags(Old_rows_log_event::NO_FOREIGN_KEY_CHECKS_F)) + ev_thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + ev_thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (ev->get_flags(Old_rows_log_event::RELAXED_UNIQUE_CHECKS_F)) + ev_thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + ev_thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + /* A small test to verify that objects have consistent types */ + DBUG_ASSERT(sizeof(ev_thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); + + table->rpl_write_set= table->write_set; + + error= do_before_row_operations(table); + while (error == 0 && row_start < ev->m_rows_end) + { + uchar const *row_end= NULL; + if (unlikely((error= do_prepare_row(ev_thd, rgi, table, row_start, + &row_end)))) + break; // We should perform the after-row operation even in + // the case of error + + DBUG_ASSERT(row_end != NULL); // cannot happen + DBUG_ASSERT(row_end <= ev->m_rows_end); + + /* in_use can have been set to NULL in close_tables_for_reopen */ + THD* old_thd= table->in_use; + if (!table->in_use) + table->in_use= ev_thd; + error= do_exec_row(table); + table->in_use = old_thd; + switch (error) + { + /* Some recoverable errors */ + case HA_ERR_RECORD_CHANGED: + case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if + tuple does not exist */ + error= 0; + case 0: + break; + + default: + rli->report(ERROR_LEVEL, ev_thd->get_stmt_da()->sql_errno(), NULL, + "Error in %s event: row application failed. %s", + ev->get_type_str(), + ev_thd->is_error() ? ev_thd->get_stmt_da()->message() : ""); + thd->is_slave_error= 1; + break; + } + + row_start= row_end; + } + DBUG_EXECUTE_IF("stop_slave_middle_group", + const_cast(rli)->abort_slave= 1;); + error= do_after_row_operations(table, error); + } + + if (unlikely(error)) + { /* error has occurred during the transaction */ + rli->report(ERROR_LEVEL, ev_thd->get_stmt_da()->sql_errno(), NULL, + "Error in %s event: error during transaction execution " + "on table %s.%s. %s", + ev->get_type_str(), table->s->db.str, + table->s->table_name.str, + ev_thd->is_error() ? ev_thd->get_stmt_da()->message() : ""); + + /* + If one day we honour --skip-slave-errors in row-based replication, and + the error should be skipped, then we would clear mappings, rollback, + close tables, but the slave SQL thread would not stop and then may + assume the mapping is still available, the tables are still open... + So then we should clear mappings/rollback/close here only if this is a + STMT_END_F. + For now we code, knowing that error is not skippable and so slave SQL + thread is certainly going to stop. + rollback at the caller along with sbr. + */ + ev_thd->reset_current_stmt_binlog_format_row(); + rgi->cleanup_context(ev_thd, error); + ev_thd->is_slave_error= 1; + DBUG_RETURN(error); + } + + DBUG_RETURN(0); +} +#endif + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + +/* + Check if there are more UNIQUE keys after the given key. +*/ +static int +last_uniq_key(TABLE *table, uint keyno) +{ + while (++keyno < table->s->keys) + if (table->key_info[keyno].flags & HA_NOSAME) + return 0; + return 1; +} + + +/* + Compares table->record[0] and table->record[1] + + Returns TRUE if different. +*/ +static bool record_compare(TABLE *table) +{ + bool result= FALSE; + if (table->s->blob_fields + table->s->varchar_fields == 0) + { + result= cmp_record(table,record[1]); + goto record_compare_exit; + } + + /* Compare null bits */ + if (memcmp(table->null_flags, + table->null_flags+table->s->rec_buff_length, + table->s->null_bytes)) + { + result= TRUE; // Diff in NULL value + goto record_compare_exit; + } + + /* Compare updated fields */ + for (Field **ptr=table->field ; *ptr ; ptr++) + { + if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length)) + { + result= TRUE; + goto record_compare_exit; + } + } + +record_compare_exit: + return result; +} + + +/* + Copy "extra" columns from record[1] to record[0]. + + Copy the extra fields that are not present on the master but are + present on the slave from record[1] to record[0]. This is used + after fetching a record that are to be updated, either inside + replace_record() or as part of executing an update_row(). + */ +static int +copy_extra_record_fields(TABLE *table, + size_t master_reclength, + my_ptrdiff_t master_fields) +{ + DBUG_ENTER("copy_extra_record_fields(table, master_reclen, master_fields)"); + DBUG_PRINT("info", ("Copying to %p " + "from field %lu at offset %lu " + "to field %d at offset %lu", + table->record[0], + (ulong) master_fields, (ulong) master_reclength, + table->s->fields, table->s->reclength)); + /* + Copying the extra fields of the slave that does not exist on + master into record[0] (which are basically the default values). + */ + + if (table->s->fields < (uint) master_fields) + DBUG_RETURN(0); + + DBUG_ASSERT(master_reclength <= table->s->reclength); + if (master_reclength < table->s->reclength) + memcpy(table->record[0] + master_reclength, + table->record[1] + master_reclength, + table->s->reclength - master_reclength); + + /* + Bit columns are special. We iterate over all the remaining + columns and copy the "extra" bits to the new record. This is + not a very good solution: it should be refactored on + opportunity. + + REFACTORING SUGGESTION (Matz). Introduce a member function + similar to move_field_offset() called copy_field_offset() to + copy field values and implement it for all Field subclasses. Use + this function to copy data from the found record to the record + that are going to be inserted. + + The copy_field_offset() function need to be a virtual function, + which in this case will prevent copying an entire range of + fields efficiently. + */ + { + Field **field_ptr= table->field + master_fields; + for ( ; *field_ptr ; ++field_ptr) + { + /* + Set the null bit according to the values in record[1] + */ + if ((*field_ptr)->maybe_null() && + (*field_ptr)->is_null_in_record(reinterpret_cast(table->record[1]))) + (*field_ptr)->set_null(); + else + (*field_ptr)->set_notnull(); + + /* + Do the extra work for special columns. + */ + switch ((*field_ptr)->real_type()) + { + default: + /* Nothing to do */ + break; + + case MYSQL_TYPE_BIT: + Field_bit *f= static_cast(*field_ptr); + if (f->bit_len > 0) + { + my_ptrdiff_t const offset= table->record[1] - table->record[0]; + uchar const bits= + get_rec_bits(f->bit_ptr + offset, f->bit_ofs, f->bit_len); + set_rec_bits(bits, f->bit_ptr, f->bit_ofs, f->bit_len); + } + break; + } + } + } + DBUG_RETURN(0); // All OK +} + + +/* + Replace the provided record in the database. + + SYNOPSIS + replace_record() + thd Thread context for writing the record. + table Table to which record should be written. + master_reclength + Offset to first column that is not present on the master, + alternatively the length of the record on the master + side. + + RETURN VALUE + Error code on failure, 0 on success. + + DESCRIPTION + Similar to how it is done in mysql_insert(), we first try to do + a ha_write_row() and of that fails due to duplicated keys (or + indices), we do an ha_update_row() or a ha_delete_row() instead. + */ +static int +replace_record(THD *thd, TABLE *table, + ulong const master_reclength, + uint const master_fields) +{ + DBUG_ENTER("replace_record"); + DBUG_ASSERT(table != NULL && thd != NULL); + + int error; + int keynum; + auto_afree_ptr key(NULL); + +#ifndef DBUG_OFF + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + DBUG_PRINT_BITSET("debug", "write_set = %s", table->write_set); + DBUG_PRINT_BITSET("debug", "read_set = %s", table->read_set); +#endif + + while (unlikely(error= table->file->ha_write_row(table->record[0]))) + { + if (error == HA_ERR_LOCK_DEADLOCK || error == HA_ERR_LOCK_WAIT_TIMEOUT) + { + table->file->print_error(error, MYF(0)); /* to check at exec_relay_log_event */ + DBUG_RETURN(error); + } + if (unlikely((keynum= table->file->get_dup_key(error)) < 0)) + { + table->file->print_error(error, MYF(0)); + /* + We failed to retrieve the duplicate key + - either because the error was not "duplicate key" error + - or because the information which key is not available + */ + DBUG_RETURN(error); + } + + /* + We need to retrieve the old row into record[1] to be able to + either update or delete the offending record. We either: + + - use rnd_pos() with a row-id (available as dupp_row) to the + offending row, if that is possible (MyISAM and Blackhole), or else + + - use index_read_idx() with the key that is duplicated, to + retrieve the offending row. + */ + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref); + if (unlikely(error)) + { + DBUG_PRINT("info",("rnd_pos() returns error %d",error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + else + { + if (unlikely(table->file->extra(HA_EXTRA_FLUSH_CACHE))) + { + DBUG_RETURN(my_errno); + } + + if (key.get() == NULL) + { + key.assign(static_cast(my_alloca(table->s->max_unique_length))); + if (unlikely(key.get() == NULL)) + DBUG_RETURN(ENOMEM); + } + + key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum, + 0); + error= table->file->ha_index_read_idx_map(table->record[1], keynum, + (const uchar*)key.get(), + HA_WHOLE_KEY, + HA_READ_KEY_EXACT); + if (unlikely(error)) + { + DBUG_PRINT("info", ("index_read_idx() returns error %d", error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + + /* + Now, table->record[1] should contain the offending row. That + will enable us to update it or, alternatively, delete it (so + that we can insert the new row afterwards). + + First we copy the columns into table->record[0] that are not + present on the master from table->record[1], if there are any. + */ + copy_extra_record_fields(table, master_reclength, master_fields); + + /* + REPLACE is defined as either INSERT or DELETE + INSERT. If + possible, we can replace it with an UPDATE, but that will not + work on InnoDB if FOREIGN KEY checks are necessary. + + I (Matz) am not sure of the reason for the last_uniq_key() + check as, but I'm guessing that it's something along the + following lines. + + Suppose that we got the duplicate key to be a key that is not + the last unique key for the table and we perform an update: + then there might be another key for which the unique check will + fail, so we're better off just deleting the row and inserting + the correct row. + */ + if (last_uniq_key(table, keynum) && + !table->file->referenced_by_foreign_key()) + { + error=table->file->ha_update_row(table->record[1], + table->record[0]); + if (unlikely(error) && error != HA_ERR_RECORD_IS_THE_SAME) + table->file->print_error(error, MYF(0)); + else + error= 0; + DBUG_RETURN(error); + } + else + { + if (unlikely((error= table->file->ha_delete_row(table->record[1])))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + /* Will retry ha_write_row() with the offending row removed. */ + } + } + + DBUG_RETURN(error); +} + + +/** + Find the row given by 'key', if the table has keys, or else use a table scan + to find (and fetch) the row. + + If the engine allows random access of the records, a combination of + position() and rnd_pos() will be used. + + @param table Pointer to table to search + @param key Pointer to key to use for search, if table has key + + @pre table->record[0] shall contain the row to locate + and key shall contain a key to use for searching, if + the engine has a key. + + @post If the return value is zero, table->record[1] + will contain the fetched row and the internal "cursor" will refer to + the row. If the return value is non-zero, + table->record[1] is undefined. In either case, + table->record[0] is undefined. + + @return Zero if the row was successfully fetched into + table->record[1], error code otherwise. + */ + +static int find_and_fetch_row(TABLE *table, uchar *key) +{ + DBUG_ENTER("find_and_fetch_row(TABLE *table, uchar *key, uchar *record)"); + DBUG_PRINT("enter", ("table: %p, key: %p record: %p", + table, key, table->record[1])); + + DBUG_ASSERT(table->in_use != NULL); + + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) && + table->s->primary_key < MAX_KEY) + { + /* + Use a more efficient method to fetch the record given by + table->record[0] if the engine allows it. We first compute a + row reference using the position() member function (it will be + stored in table->file->ref) and the use rnd_pos() to position + the "cursor" (i.e., record[0] in this case) at the correct row. + + TODO: Add a check that the correct record has been fetched by + comparing with the original record. Take into account that the + record on the master and slave can be of different + length. Something along these lines should work: + + ADD>>> store_record(table,record[1]); + int error= table->file->ha_rnd_pos(table->record[0], table->file->ref); + ADD>>> DBUG_ASSERT(memcmp(table->record[1], table->record[0], + table->s->reclength) == 0); + + */ + table->file->position(table->record[0]); + int error= table->file->ha_rnd_pos(table->record[0], table->file->ref); + /* + rnd_pos() returns the record in table->record[0], so we have to + move it to table->record[1]. + */ + memcpy(table->record[1], table->record[0], table->s->reclength); + DBUG_RETURN(error); + } + + /* We need to retrieve all fields */ + /* TODO: Move this out from this function to main loop */ + table->use_all_columns(); + + if (table->s->keys > 0) + { + int error; + /* We have a key: search the table using the index */ + if (!table->file->inited && + unlikely(error= table->file->ha_index_init(0, FALSE))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength); + DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength); +#endif + + /* + We need to set the null bytes to ensure that the filler bit are + all set when returning. There are storage engines that just set + the necessary bits on the bytes and don't set the filler bits + correctly. + */ + my_ptrdiff_t const pos= + table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0; + table->record[1][pos]= 0xFF; + if (unlikely((error= table->file->ha_index_read_map(table->record[1], key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)))) + { + table->file->print_error(error, MYF(0)); + table->file->ha_index_end(); + DBUG_RETURN(error); + } + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength); + DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength); +#endif + /* + Below is a minor "optimization". If the key (i.e., key number + 0) has the HA_NOSAME flag set, we know that we have found the + correct record (since there can be no duplicates); otherwise, we + have to compare the record with the one found to see if it is + the correct one. + + CAVEAT! This behaviour is essential for the replication of, + e.g., the mysql.proc table since the correct record *shall* be + found using the primary key *only*. There shall be no + comparison of non-PK columns to decide if the correct record is + found. I can see no scenario where it would be incorrect to + chose the row to change only using a PK or an UNNI. + */ + if (table->key_info->flags & HA_NOSAME) + { + table->file->ha_index_end(); + DBUG_RETURN(0); + } + + while (record_compare(table)) + { + int error; + + while ((error= table->file->ha_index_next(table->record[1]))) + { + table->file->print_error(error, MYF(0)); + table->file->ha_index_end(); + DBUG_RETURN(error); + } + } + + /* + Have to restart the scan to be able to fetch the next row. + */ + table->file->ha_index_end(); + } + else + { + int restart_count= 0; // Number of times scanning has restarted from top + int error; + + /* We don't have a key: search the table using rnd_next() */ + if (unlikely((error= table->file->ha_rnd_init_with_error(1)))) + return error; + + /* Continue until we find the right record or have made a full loop */ + do + { + error= table->file->ha_rnd_next(table->record[1]); + + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + DBUG_DUMP("record[1]", table->record[1], table->s->reclength); + + switch (error) { + case 0: + break; + + case HA_ERR_END_OF_FILE: + if (++restart_count < 2) + { + int error2; + if (unlikely((error2= table->file->ha_rnd_init_with_error(1)))) + DBUG_RETURN(error2); + } + break; + + default: + table->file->print_error(error, MYF(0)); + DBUG_PRINT("info", ("Record not found")); + (void) table->file->ha_rnd_end(); + DBUG_RETURN(error); + } + } + while (restart_count < 2 && record_compare(table)); + + /* + Have to restart the scan to be able to fetch the next row. + */ + DBUG_PRINT("info", ("Record %sfound", restart_count == 2 ? "not " : "")); + table->file->ha_rnd_end(); + + DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0); + DBUG_RETURN(error); + } + + DBUG_RETURN(0); +} + + +/********************************************************** + Row handling primitives for Write_rows_log_event_old + **********************************************************/ + +int Write_rows_log_event_old::do_before_row_operations(TABLE *table) +{ + int error= 0; + + /* + We are using REPLACE semantics and not INSERT IGNORE semantics + when writing rows, that is: new rows replace old rows. We need to + inform the storage engine that it should use this behaviour. + */ + + /* Tell the storage engine that we are using REPLACE semantics. */ + thd->lex->duplicates= DUP_REPLACE; + + thd->lex->sql_command= SQLCOM_REPLACE; + /* + Do not raise the error flag in case of hitting to an unique attribute + */ + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + table->file->extra(HA_EXTRA_IGNORE_NO_KEY); + table->file->ha_start_bulk_insert(0); + return error; +} + + +int Write_rows_log_event_old::do_after_row_operations(TABLE *table, int error) +{ + int local_error= 0; + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + /* + resetting the extra with + table->file->extra(HA_EXTRA_NO_IGNORE_NO_KEY); + fires bug#27077 + todo: explain or fix + */ + if (unlikely((local_error= table->file->ha_end_bulk_insert()))) + { + table->file->print_error(local_error, MYF(0)); + } + return error? error : local_error; +} + + +int +Write_rows_log_event_old::do_prepare_row(THD *thd_arg, + rpl_group_info *rgi, + TABLE *table, + uchar const *row_start, + uchar const **row_end) +{ + DBUG_ASSERT(table != NULL); + DBUG_ASSERT(row_start && row_end); + + int error; + error= unpack_row_old(rgi, + table, m_width, table->record[0], + row_start, m_rows_end, + &m_cols, row_end, &m_master_reclength, + table->write_set, PRE_GA_WRITE_ROWS_EVENT); + bitmap_copy(table->read_set, table->write_set); + return error; +} + + +int Write_rows_log_event_old::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + int error= replace_record(thd, table, m_master_reclength, m_width); + return error; +} + + +/********************************************************** + Row handling primitives for Delete_rows_log_event_old + **********************************************************/ + +int Delete_rows_log_event_old::do_before_row_operations(TABLE *table) +{ + DBUG_ASSERT(m_memory == NULL); + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) && + table->s->primary_key < MAX_KEY) + { + /* + We don't need to allocate any memory for m_after_image and + m_key since they are not used. + */ + return 0; + } + + int error= 0; + + if (table->s->keys > 0) + { + m_memory= (uchar*) my_multi_malloc(key_memory_log_event_old, MYF(MY_WME), + &m_after_image, + (uint) table->s->reclength, + &m_key, + (uint) table->key_info->key_length, + NullS); + } + else + { + m_after_image= (uchar*) my_malloc(key_memory_log_event_old, table->s->reclength, MYF(MY_WME)); + m_memory= (uchar*)m_after_image; + m_key= NULL; + } + if (!m_memory) + return HA_ERR_OUT_OF_MEM; + + return error; +} + + +int Delete_rows_log_event_old::do_after_row_operations(TABLE *table, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + table->file->ha_index_or_rnd_end(); + my_free(m_memory); // Free for multi_malloc + m_memory= NULL; + m_after_image= NULL; + m_key= NULL; + + return error; +} + + +int +Delete_rows_log_event_old::do_prepare_row(THD *thd_arg, + rpl_group_info *rgi, + TABLE *table, + uchar const *row_start, + uchar const **row_end) +{ + int error; + DBUG_ASSERT(row_start && row_end); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + + error= unpack_row_old(rgi, + table, m_width, table->record[0], + row_start, m_rows_end, + &m_cols, row_end, &m_master_reclength, + table->read_set, PRE_GA_DELETE_ROWS_EVENT); + /* + If we will access rows using the random access method, m_key will + be set to NULL, so we do not need to make a key copy in that case. + */ + if (m_key) + { + KEY *const key_info= table->key_info; + + key_copy(m_key, table->record[0], key_info, 0); + } + + return error; +} + + +int Delete_rows_log_event_old::do_exec_row(TABLE *table) +{ + int error; + DBUG_ASSERT(table != NULL); + + if (likely(!(error= ::find_and_fetch_row(table, m_key)))) + { + /* + Now we should have the right row to delete. We are using + record[0] since it is guaranteed to point to a record with the + correct value. + */ + error= table->file->ha_delete_row(table->record[0]); + } + return error; +} + + +/********************************************************** + Row handling primitives for Update_rows_log_event_old + **********************************************************/ + +int Update_rows_log_event_old::do_before_row_operations(TABLE *table) +{ + DBUG_ASSERT(m_memory == NULL); + + int error= 0; + + if (table->s->keys > 0) + { + m_memory= (uchar*) my_multi_malloc(key_memory_log_event_old, MYF(MY_WME), + &m_after_image, + (uint) table->s->reclength, + &m_key, + (uint) table->key_info->key_length, + NullS); + } + else + { + m_after_image= (uchar*) my_malloc(key_memory_log_event_old, table->s->reclength, MYF(MY_WME)); + m_memory= m_after_image; + m_key= NULL; + } + if (!m_memory) + return HA_ERR_OUT_OF_MEM; + + return error; +} + + +int Update_rows_log_event_old::do_after_row_operations(TABLE *table, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + table->file->ha_index_or_rnd_end(); + my_free(m_memory); + m_memory= NULL; + m_after_image= NULL; + m_key= NULL; + + return error; +} + + +int Update_rows_log_event_old::do_prepare_row(THD *thd_arg, + rpl_group_info *rgi, + TABLE *table, + uchar const *row_start, + uchar const **row_end) +{ + int error; + DBUG_ASSERT(row_start && row_end); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + + /* record[0] is the before image for the update */ + error= unpack_row_old(rgi, + table, m_width, table->record[0], + row_start, m_rows_end, + &m_cols, row_end, &m_master_reclength, + table->read_set, PRE_GA_UPDATE_ROWS_EVENT); + row_start = *row_end; + /* m_after_image is the after image for the update */ + error= unpack_row_old(rgi, + table, m_width, m_after_image, + row_start, m_rows_end, + &m_cols, row_end, &m_master_reclength, + table->write_set, PRE_GA_UPDATE_ROWS_EVENT); + + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + DBUG_DUMP("m_after_image", m_after_image, table->s->reclength); + + /* + If we will access rows using the random access method, m_key will + be set to NULL, so we do not need to make a key copy in that case. + */ + if (m_key) + { + KEY *const key_info= table->key_info; + + key_copy(m_key, table->record[0], key_info, 0); + } + + return error; +} + + +int Update_rows_log_event_old::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + + int error= ::find_and_fetch_row(table, m_key); + if (unlikely(error)) + return error; + + /* + We have to ensure that the new record (i.e., the after image) is + in record[0] and the old record (i.e., the before image) is in + record[1]. This since some storage engines require this (for + example, the partition engine). + + Since find_and_fetch_row() puts the fetched record (i.e., the old + record) in record[1], we can keep it there. We put the new record + (i.e., the after image) into record[0], and copy the fields that + are on the slave (i.e., in record[1]) into record[0], effectively + overwriting the default values that where put there by the + unpack_row() function. + */ + memcpy(table->record[0], m_after_image, table->s->reclength); + copy_extra_record_fields(table, m_master_reclength, m_width); + + /* + Now we have the right row to update. The old row (the one we're + looking for) is in record[1] and the new row has is in record[0]. + We also have copied the original values already in the slave's + database into the after image delivered from the master. + */ + error= table->file->ha_update_row(table->record[1], table->record[0]); + if (unlikely(error == HA_ERR_RECORD_IS_THE_SAME)) + error= 0; + + return error; +} + +#endif + + +/************************************************************************** + Rows_log_event member functions +**************************************************************************/ + +#ifndef MYSQL_CLIENT +Old_rows_log_event::Old_rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid, + MY_BITMAP const *cols, + bool is_transactional) + : Log_event(thd_arg, 0, is_transactional), + m_row_count(0), + m_table(tbl_arg), + m_table_id(tid), + m_width(tbl_arg ? tbl_arg->s->fields : 1), + m_rows_buf(0), m_rows_cur(0), m_rows_end(0), m_flags(0) +#ifdef HAVE_REPLICATION + , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL) +#endif +{ + + // This constructor should not be reached. + assert(0); + + /* + We allow a special form of dummy event when the table, and cols + are null and the table id is ~0UL. This is a temporary + solution, to be able to terminate a started statement in the + binary log: the extraneous events will be removed in the future. + */ + DBUG_ASSERT((tbl_arg && tbl_arg->s && tid != ~0UL) || + (!tbl_arg && !cols && tid == ~0UL)); + + if (thd_arg->variables.option_bits & OPTION_NO_FOREIGN_KEY_CHECKS) + set_flags(NO_FOREIGN_KEY_CHECKS_F); + if (thd_arg->variables.option_bits & OPTION_RELAXED_UNIQUE_CHECKS) + set_flags(RELAXED_UNIQUE_CHECKS_F); + /* if my_bitmap_init fails, caught in is_valid() */ + if (likely(!my_bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + m_width))) + { + /* Cols can be zero if this is a dummy binrows event */ + if (likely(cols != NULL)) + { + memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols)); + create_last_word_mask(&m_cols); + } + } + else + { + // Needed because my_bitmap_init() does not set it to null on failure + m_cols.bitmap= 0; + } +} +#endif + + +Old_rows_log_event::Old_rows_log_event(const uchar *buf, uint event_len, + Log_event_type event_type, + const Format_description_log_event + *description_event) + : Log_event(buf, description_event), + m_row_count(0), +#ifndef MYSQL_CLIENT + m_table(NULL), +#endif + m_table_id(0), m_rows_buf(0), m_rows_cur(0), m_rows_end(0) +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL) +#endif +{ + DBUG_ENTER("Old_rows_log_event::Old_Rows_log_event(const char*,...)"); + uint8 const common_header_len= description_event->common_header_len; + uint8 const post_header_len= description_event->post_header_len[event_type-1]; + + DBUG_PRINT("enter",("event_len: %u common_header_len: %d " + "post_header_len: %d", + event_len, common_header_len, + post_header_len)); + + const uchar *post_start= buf + common_header_len; + DBUG_DUMP("post_header", post_start, post_header_len); + post_start+= RW_MAPID_OFFSET; + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + m_table_id= (ulong) uint6korr(post_start); + post_start+= RW_FLAGS_OFFSET; + } + + m_flags= uint2korr(post_start); + + uchar const *const var_start= + (const uchar *)buf + common_header_len + post_header_len; + uchar const *const ptr_width= var_start; + uchar *ptr_after_width= (uchar*) ptr_width; + DBUG_PRINT("debug", ("Reading from %p", ptr_after_width)); + m_width = net_field_length(&ptr_after_width); + DBUG_PRINT("debug", ("m_width=%lu", m_width)); + /* Avoid reading out of buffer */ + if (ptr_after_width + m_width > (uchar *)buf + event_len) + { + m_cols.bitmap= NULL; + DBUG_VOID_RETURN; + } + + /* if my_bitmap_init fails, caught in is_valid() */ + if (likely(!my_bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + m_width))) + { + DBUG_PRINT("debug", ("Reading from %p", ptr_after_width)); + memcpy(m_cols.bitmap, ptr_after_width, (m_width + 7) / 8); + create_last_word_mask(&m_cols); + ptr_after_width+= (m_width + 7) / 8; + DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols)); + } + else + { + // Needed because my_bitmap_init() does not set it to null on failure + m_cols.bitmap= NULL; + DBUG_VOID_RETURN; + } + + const uchar* const ptr_rows_data= (const uchar*) ptr_after_width; + size_t const data_size= event_len - (ptr_rows_data - (const uchar *) buf); + DBUG_PRINT("info",("m_table_id: %lu m_flags: %d m_width: %lu data_size: %zu", + m_table_id, m_flags, m_width, data_size)); + DBUG_DUMP("rows_data", (uchar*) ptr_rows_data, data_size); + + m_rows_buf= (uchar*) my_malloc(key_memory_log_event_old, data_size, MYF(MY_WME)); + if (likely((bool)m_rows_buf)) + { +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + m_curr_row= m_rows_buf; +#endif + m_rows_end= m_rows_buf + data_size; + m_rows_cur= m_rows_end; + memcpy(m_rows_buf, ptr_rows_data, data_size); + } + else + m_cols.bitmap= 0; // to not free it + + DBUG_VOID_RETURN; +} + + +Old_rows_log_event::~Old_rows_log_event() +{ + if (m_cols.bitmap == m_bitbuf) // no my_malloc happened + m_cols.bitmap= 0; // so no my_free in my_bitmap_free + my_bitmap_free(&m_cols); // To pair with my_bitmap_init(). + my_free(m_rows_buf); +} + + +int Old_rows_log_event::get_data_size() +{ + uchar buf[MAX_INT_WIDTH]; + uchar *end= net_store_length(buf, (m_width + 7) / 8); + + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + return (int)(6 + no_bytes_in_map(&m_cols) + (end - buf) + + m_rows_cur - m_rows_buf);); + int data_size= ROWS_HEADER_LEN; + data_size+= no_bytes_in_map(&m_cols); + data_size+= (uint) (end - buf); + + data_size+= (uint) (m_rows_cur - m_rows_buf); + return data_size; +} + + +#ifndef MYSQL_CLIENT +int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length) +{ + /* + When the table has a primary key, we would probably want, by default, to + log only the primary key value instead of the entire "before image". This + would save binlog space. TODO + */ + DBUG_ENTER("Old_rows_log_event::do_add_row_data"); + DBUG_PRINT("enter", ("row_data: %p length: %zu",row_data, + length)); + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("row_data", row_data, MY_MIN(length, 32)); +#endif + + DBUG_ASSERT(m_rows_buf <= m_rows_cur); + DBUG_ASSERT(!m_rows_buf || (m_rows_end && m_rows_buf < m_rows_end)); + DBUG_ASSERT(m_rows_cur <= m_rows_end); + + /* The cast will always work since m_rows_cur <= m_rows_end */ + if (static_cast(m_rows_end - m_rows_cur) <= length) + { + size_t const block_size= 1024; + my_ptrdiff_t const cur_size= m_rows_cur - m_rows_buf; + my_ptrdiff_t const new_alloc= + block_size * ((cur_size + length + block_size - 1) / block_size); + + uchar* const new_buf= (uchar*)my_realloc(key_memory_log_event_old, (uchar*)m_rows_buf, (uint) new_alloc, + MYF(MY_ALLOW_ZERO_PTR|MY_WME)); + if (unlikely(!new_buf)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + /* If the memory moved, we need to move the pointers */ + if (new_buf != m_rows_buf) + { + m_rows_buf= new_buf; + m_rows_cur= m_rows_buf + cur_size; + } + + /* + The end pointer should always be changed to point to the end of + the allocated memory. + */ + m_rows_end= m_rows_buf + new_alloc; + } + + DBUG_ASSERT(m_rows_cur + length <= m_rows_end); + memcpy(m_rows_cur, row_data, length); + m_rows_cur+= length; + m_row_count++; + DBUG_RETURN(0); +} +#endif + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) +{ + DBUG_ENTER("Old_rows_log_event::do_apply_event(Relay_log_info*)"); + int error= 0; + Relay_log_info const *rli= rgi->rli; + + /* + If m_table_id == ~0UL, then we have a dummy event that does not + contain any data. In that case, we just remove all tables in the + tables_to_lock list, close the thread tables, and return with + success. + */ + if (m_table_id == ~0UL) + { + /* + This one is supposed to be set: just an extra check so that + nothing strange has happened. + */ + DBUG_ASSERT(get_flags(STMT_END_F)); + + rgi->slave_close_thread_tables(thd); + thd->clear_error(); + DBUG_RETURN(0); + } + + /* + 'thd' has been set by exec_relay_log_event(), just before calling + do_apply_event(). We still check here to prevent future coding + errors. + */ + DBUG_ASSERT(rgi->thd == thd); + + /* + If there is no locks taken, this is the first binrow event seen + after the table map events. We should then lock all the tables + used in the transaction and proceed with execution of the actual + event. + */ + if (!thd->lock) + { + /* + lock_tables() reads the contents of thd->lex, so they must be + initialized. Contrary to in + Table_map_log_event::do_apply_event() we don't call + mysql_init_query() as that may reset the binlog format. + */ + lex_start(thd); + + if (unlikely((error= lock_tables(thd, rgi->tables_to_lock, + rgi->tables_to_lock_count, 0)))) + { + if (thd->is_slave_error || thd->is_fatal_error) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications (we don't honour --slave-skip-errors) + */ + uint actual_error= thd->net.last_errno; + rli->report(ERROR_LEVEL, actual_error, NULL, + "Error '%s' in %s event: when locking tables", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + get_type_str()); + thd->is_fatal_error= 1; + } + else + { + rli->report(ERROR_LEVEL, error, NULL, + "Error in %s event: when locking tables", + get_type_str()); + } + rgi->slave_close_thread_tables(thd); + DBUG_RETURN(error); + } + + /* + When the open and locking succeeded, we check all tables to + ensure that they still have the correct type. + */ + + { + TABLE_LIST *table_list_ptr= rgi->tables_to_lock; + for (uint i=0; table_list_ptr&& (i< rgi->tables_to_lock_count); + table_list_ptr= static_cast(table_list_ptr->next_global), i++) + { + /* + Please see comment in log_event.cc-Rows_log_event::do_apply_event() + function for the explanation of the below if condition + */ + if (table_list_ptr->parent_l) + continue; + /* + We can use a down cast here since we know that every table added + to the tables_to_lock is a RPL_TABLE_LIST (or child table which is + skipped above). + */ + RPL_TABLE_LIST *ptr=static_cast(table_list_ptr); + TABLE *conv_table; + if (ptr->m_tabledef.compatible_with(thd, rgi, ptr->table, &conv_table)) + { + thd->is_slave_error= 1; + rgi->slave_close_thread_tables(thd); + DBUG_RETURN(ERR_BAD_TABLE_DEF); + } + ptr->m_conv_table= conv_table; + } + } + + /* + ... and then we add all the tables to the table map but keep + them in the tables to lock list. + + + We also invalidate the query cache for all the tables, since + they will now be changed. + + TODO [/Matz]: Maybe the query cache should not be invalidated + here? It might be that a table is not changed, even though it + was locked for the statement. We do know that each + Old_rows_log_event contain at least one row, so after processing one + Old_rows_log_event, we can invalidate the query cache for the + associated table. + */ + for (TABLE_LIST *ptr= rgi->tables_to_lock ; ptr ; ptr= ptr->next_global) + { + rgi->m_table_map.set_table(ptr->table_id, ptr->table); + } +#ifdef HAVE_QUERY_CACHE + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); +#endif + } + + TABLE* + table= + m_table= rgi->m_table_map.get_table(m_table_id); + + if (table) + { + /* + table == NULL means that this table should not be replicated + (this was set up by Table_map_log_event::do_apply_event() + which tested replicate-* rules). + */ + + /* + It's not needed to set_time() but + 1) it continues the property that "Time" in SHOW PROCESSLIST shows how + much slave is behind + 2) it will be needed when we allow replication from a table with no + TIMESTAMP column to a table with one. + So we call set_time(), like in SBR. Presently it changes nothing. + */ + thd->set_time(when, when_sec_part); + /* + There are a few flags that are replicated with each row event. + Make sure to set/clear them before executing the main body of + the event. + */ + if (get_flags(NO_FOREIGN_KEY_CHECKS_F)) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (get_flags(RELAXED_UNIQUE_CHECKS_F)) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + /* A small test to verify that objects have consistent types */ + DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); + + if ( m_width == table->s->fields && bitmap_is_set_all(&m_cols)) + set_flags(COMPLETE_ROWS_F); + + /* + Set tables write and read sets. + + Read_set contains all slave columns (in case we are going to fetch + a complete record from slave) + + Write_set equals the m_cols bitmap sent from master but it can be + longer if slave has extra columns. + */ + + DBUG_PRINT_BITSET("debug", "Setting table's write_set from: %s", &m_cols); + + bitmap_set_all(table->read_set); + bitmap_set_all(table->write_set); + if (!get_flags(COMPLETE_ROWS_F)) + bitmap_intersect(table->write_set,&m_cols); + table->rpl_write_set= table->write_set; + + // Do event specific preparations + + error= do_before_row_operations(rli); + + // row processing loop + + while (error == 0 && m_curr_row < m_rows_end) + { + /* in_use can have been set to NULL in close_tables_for_reopen */ + THD* old_thd= table->in_use; + if (!table->in_use) + table->in_use= thd; + + error= do_exec_row(rgi); + + DBUG_PRINT("info", ("error: %d", error)); + DBUG_ASSERT(error != HA_ERR_RECORD_DELETED); + + table->in_use = old_thd; + switch (error) + { + case 0: + break; + + /* Some recoverable errors */ + case HA_ERR_RECORD_CHANGED: + case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if + tuple does not exist */ + error= 0; + break; + + default: + rli->report(ERROR_LEVEL, thd->net.last_errno, NULL, + "Error in %s event: row application failed. %s", + get_type_str(), thd->net.last_error); + thd->is_slave_error= 1; + break; + } + + /* + If m_curr_row_end was not set during event execution (e.g., because + of errors) we can't proceed to the next row. If the error is transient + (i.e., error==0 at this point) we must call unpack_current_row() to set + m_curr_row_end. + */ + + DBUG_PRINT("info", ("error: %d", error)); + DBUG_PRINT("info", ("curr_row: %p; curr_row_end:%p; rows_end: %p", + m_curr_row, m_curr_row_end, m_rows_end)); + + if (!m_curr_row_end && likely(!error)) + unpack_current_row(rgi); + + // at this moment m_curr_row_end should be set + DBUG_ASSERT(error || m_curr_row_end != NULL); + DBUG_ASSERT(error || m_curr_row < m_curr_row_end); + DBUG_ASSERT(error || m_curr_row_end <= m_rows_end); + + m_curr_row= m_curr_row_end; + + } // row processing loop + + DBUG_EXECUTE_IF("stop_slave_middle_group", + const_cast(rli)->abort_slave= 1;); + error= do_after_row_operations(rli, error); + } // if (table) + + if (unlikely(error)) + { /* error has occurred during the transaction */ + rli->report(ERROR_LEVEL, thd->net.last_errno, NULL, + "Error in %s event: error during transaction execution " + "on table %s.%s. %s", + get_type_str(), table->s->db.str, + table->s->table_name.str, + thd->net.last_error); + + /* + If one day we honour --skip-slave-errors in row-based replication, and + the error should be skipped, then we would clear mappings, rollback, + close tables, but the slave SQL thread would not stop and then may + assume the mapping is still available, the tables are still open... + So then we should clear mappings/rollback/close here only if this is a + STMT_END_F. + For now we code, knowing that error is not skippable and so slave SQL + thread is certainly going to stop. + rollback at the caller along with sbr. + */ + thd->reset_current_stmt_binlog_format_row(); + rgi->cleanup_context(thd, error); + thd->is_slave_error= 1; + DBUG_RETURN(error); + } + + /* + This code would ideally be placed in do_update_pos() instead, but + since we have no access to table there, we do the setting of + last_event_start_time here instead. + */ + if (table && (table->s->primary_key == MAX_KEY) && + !use_trans_cache() && get_flags(STMT_END_F) == RLE_NO_FLAGS) + { + /* + ------------ Temporary fix until WL#2975 is implemented --------- + + This event is not the last one (no STMT_END_F). If we stop now + (in case of terminate_slave_thread()), how will we restart? We + have to restart from Table_map_log_event, but as this table is + not transactional, the rows already inserted will still be + present, and idempotency is not guaranteed (no PK) so we risk + that repeating leads to double insert. So we desperately try to + continue, hope we'll eventually leave this buggy situation (by + executing the final Old_rows_log_event). If we are in a hopeless + wait (reached end of last relay log and nothing gets appended + there), we timeout after one minute, and notify DBA about the + problem. When WL#2975 is implemented, just remove the member + Relay_log_info::last_event_start_time and all its occurrences. + */ + rgi->last_event_start_time= my_time(0); + } + + if (get_flags(STMT_END_F)) + { + /* + This is the end of a statement or transaction, so close (and + unlock) the tables we opened when processing the + Table_map_log_event starting the statement. + + OBSERVER. This will clear *all* mappings, not only those that + are open for the table. There is not good handle for on-close + actions for tables. + + NOTE. Even if we have no table ('table' == 0) we still need to be + here, so that we increase the group relay log position. If we didn't, we + could have a group relay log position which lags behind "forever" + (assume the last master's transaction is ignored by the slave because of + replicate-ignore rules). + */ + int binlog_error= thd->binlog_flush_pending_rows_event(TRUE); + + /* + If this event is not in a transaction, the call below will, if some + transactional storage engines are involved, commit the statement into + them and flush the pending event to binlog. + If this event is in a transaction, the call will do nothing, but a + Xid_log_event will come next which will, if some transactional engines + are involved, commit the transaction and flush the pending event to the + binlog. + If there was a deadlock the transaction should have been rolled back + already. So there should be no need to rollback the transaction. + */ + DBUG_ASSERT(! thd->transaction_rollback_request); + if (unlikely((error= (binlog_error ? + trans_rollback_stmt(thd) : + trans_commit_stmt(thd))))) + rli->report(ERROR_LEVEL, error, NULL, + "Error in %s event: commit of row events failed, " + "table `%s`.`%s`", + get_type_str(), m_table->s->db.str, + m_table->s->table_name.str); + error|= binlog_error; + + /* + Now what if this is not a transactional engine? we still need to + flush the pending event to the binlog; we did it with + thd->binlog_flush_pending_rows_event(). Note that we imitate + what is done for real queries: a call to + ha_autocommit_or_rollback() (sometimes only if involves a + transactional engine), and a call to be sure to have the pending + event flushed. + */ + + thd->reset_current_stmt_binlog_format_row(); + rgi->cleanup_context(thd, 0); + } + + DBUG_RETURN(error); +} + + +Log_event::enum_skip_reason +Old_rows_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + If the slave skip counter is 1 and this event does not end a + statement, then we should not start executing on the next event. + Otherwise, we defer the decision to the normal skipping logic. + */ + if (rgi->rli->slave_skip_counter == 1 && !get_flags(STMT_END_F)) + return Log_event::EVENT_SKIP_IGNORE; + else + return Log_event::do_shall_skip(rgi); +} + +int +Old_rows_log_event::do_update_pos(rpl_group_info *rgi) +{ + Relay_log_info *rli= rgi->rli; + int error= 0; + DBUG_ENTER("Old_rows_log_event::do_update_pos"); + + DBUG_PRINT("info", ("flags: %s", + get_flags(STMT_END_F) ? "STMT_END_F " : "")); + + if (get_flags(STMT_END_F)) + { + /* + Indicate that a statement is finished. + Step the group log position if we are not in a transaction, + otherwise increase the event log position. + */ + error= rli->stmt_done(log_pos, thd, rgi); + /* + Clear any errors in thd->net.last_err*. It is not known if this is + needed or not. It is believed that any errors that may exist in + thd->net.last_err* are allowed. Examples of errors are "key not + found", which is produced in the test case rpl_row_conflicts.test + */ + thd->clear_error(); + } + else + { + rgi->inc_event_relay_log_pos(); + } + + DBUG_RETURN(error); +} + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + + +#ifndef MYSQL_CLIENT +bool Old_rows_log_event::write_data_header() +{ + uchar buf[ROWS_HEADER_LEN]; // No need to init the buffer + + // This method should not be reached. + assert(0); + + DBUG_ASSERT(m_table_id != ~0UL); + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return write_data(buf, 6); + }); + int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id); + int2store(buf + RW_FLAGS_OFFSET, m_flags); + return write_data(buf, ROWS_HEADER_LEN); +} + + +bool Old_rows_log_event::write_data_body() +{ + /* + Note that this should be the number of *bits*, not the number of + bytes. + */ + uchar sbuf[MAX_INT_WIDTH]; + my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf; + + // This method should not be reached. + assert(0); + + bool res= false; + uchar *const sbuf_end= net_store_length(sbuf, (size_t) m_width); + DBUG_ASSERT(static_cast(sbuf_end - sbuf) <= sizeof(sbuf)); + + DBUG_DUMP("m_width", sbuf, (size_t) (sbuf_end - sbuf)); + res= res || write_data(sbuf, (size_t) (sbuf_end - sbuf)); + + DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols)); + res= res || write_data((uchar*)m_cols.bitmap, no_bytes_in_map(&m_cols)); + DBUG_DUMP("rows", m_rows_buf, data_size); + res= res || write_data(m_rows_buf, (size_t) data_size); + + return res; + +} +#endif + + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) +void Old_rows_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + char const *const flagstr= + get_flags(STMT_END_F) ? " flags: STMT_END_F" : ""; + size_t bytes= my_snprintf(buf, sizeof(buf), + "table_id: %lu%s", m_table_id, flagstr); + protocol->store(buf, bytes, &my_charset_bin); +} +#endif + + +#ifdef MYSQL_CLIENT +/* Method duplicates Rows_log_event's one */ +bool Old_rows_log_event::print_helper(FILE *file, + PRINT_EVENT_INFO *print_event_info, + char const *const name) +{ + IO_CACHE *const head= &print_event_info->head_cache; + IO_CACHE *const body= &print_event_info->body_cache; + IO_CACHE *const tail= &print_event_info->tail_cache; + bool do_print_encoded= + print_event_info->base64_output_mode != BASE64_OUTPUT_DECODE_ROWS && + print_event_info->base64_output_mode != BASE64_OUTPUT_NEVER && + !print_event_info->short_form; + + if (!print_event_info->short_form) + { + if (print_header(head, print_event_info, !do_print_encoded) || + my_b_printf(head, "\t%s: table id %lu%s\n", + name, m_table_id, + do_print_encoded ? " flags: STMT_END_F" : "") || + print_base64(body, print_event_info, do_print_encoded)) + goto err; + } + + if (get_flags(STMT_END_F)) + { + if (copy_event_cache_to_file_and_reinit(head, file) || + copy_cache_to_file_wrapped(body, file, do_print_encoded, + print_event_info->delimiter, + print_event_info->verbose) || + copy_event_cache_to_file_and_reinit(tail, file)) + goto err; + } + return 0; +err: + return 1; +} +#endif + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +/** + Write the current row into event's table. + + The row is located in the row buffer, pointed by @c m_curr_row member. + Number of columns of the row is stored in @c m_width member (it can be + different from the number of columns in the table to which we insert). + Bitmap @c m_cols indicates which columns are present in the row. It is assumed + that event's table is already open and pointed by @c m_table. + + If the same record already exists in the table it can be either overwritten + or an error is reported depending on the value of @c overwrite flag + (error reporting not yet implemented). Note that the matching record can be + different from the row we insert if we use primary keys to identify records in + the table. + + The row to be inserted can contain values only for selected columns. The + missing columns are filled with default values using @c prepare_record() + function. If a matching record is found in the table and @c overwritte is + true, the missing columns are taken from it. + + @param rli Relay log info (needed for row unpacking). + @param overwrite + Shall we overwrite if the row already exists or signal + error (currently ignored). + + @returns Error code on failure, 0 on success. + + This method, if successful, sets @c m_curr_row_end pointer to point at the + next row in the rows buffer. This is done when unpacking the row to be + inserted. + + @note If a matching record is found, it is either updated using + @c ha_update_row() or first deleted and then new record written. +*/ + +int +Old_rows_log_event::write_row(rpl_group_info *rgi, const bool overwrite) +{ + DBUG_ENTER("write_row"); + DBUG_ASSERT(m_table != NULL && thd != NULL); + + TABLE *table= m_table; // pointer to event's table + int error; + int keynum; + auto_afree_ptr key(NULL); + + /* fill table->record[0] with default values */ + + if (unlikely((error= + prepare_record(table, m_width, + TRUE /* check if columns have def. values */)))) + DBUG_RETURN(error); + + /* unpack row into table->record[0] */ + if ((error= unpack_current_row(rgi))) + DBUG_RETURN(error); + +#ifndef DBUG_OFF + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + DBUG_PRINT_BITSET("debug", "write_set = %s", table->write_set); + DBUG_PRINT_BITSET("debug", "read_set = %s", table->read_set); +#endif + + /* + Try to write record. If a corresponding record already exists in the table, + we try to change it using ha_update_row() if possible. Otherwise we delete + it and repeat the whole process again. + + TODO: Add safety measures against infinite looping. + */ + + while (unlikely(error= table->file->ha_write_row(table->record[0]))) + { + if (error == HA_ERR_LOCK_DEADLOCK || error == HA_ERR_LOCK_WAIT_TIMEOUT) + { + table->file->print_error(error, MYF(0)); /* to check at exec_relay_log_event */ + DBUG_RETURN(error); + } + if (unlikely((keynum= table->file->get_dup_key(error)) < 0)) + { + DBUG_PRINT("info",("Can't locate duplicate key (get_dup_key returns %d)",keynum)); + table->file->print_error(error, MYF(0)); + /* + We failed to retrieve the duplicate key + - either because the error was not "duplicate key" error + - or because the information which key is not available + */ + DBUG_RETURN(error); + } + + /* + We need to retrieve the old row into record[1] to be able to + either update or delete the offending record. We either: + + - use rnd_pos() with a row-id (available as dupp_row) to the + offending row, if that is possible (MyISAM and Blackhole), or else + + - use index_read_idx() with the key that is duplicated, to + retrieve the offending row. + */ + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + DBUG_PRINT("info",("Locating offending record using rnd_pos()")); + error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref); + if (unlikely(error)) + { + DBUG_PRINT("info",("rnd_pos() returns error %d",error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + else + { + DBUG_PRINT("info",("Locating offending record using index_read_idx()")); + + if (table->file->extra(HA_EXTRA_FLUSH_CACHE)) + { + DBUG_PRINT("info",("Error when setting HA_EXTRA_FLUSH_CACHE")); + DBUG_RETURN(my_errno); + } + + if (key.get() == NULL) + { + key.assign(static_cast(my_alloca(table->s->max_unique_length))); + if (unlikely(key.get() == NULL)) + { + DBUG_PRINT("info",("Can't allocate key buffer")); + DBUG_RETURN(ENOMEM); + } + } + + key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum, + 0); + error= table->file->ha_index_read_idx_map(table->record[1], keynum, + (const uchar*)key.get(), + HA_WHOLE_KEY, + HA_READ_KEY_EXACT); + if (unlikely(error)) + { + DBUG_PRINT("info",("index_read_idx() returns error %d", error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + + /* + Now, record[1] should contain the offending row. That + will enable us to update it or, alternatively, delete it (so + that we can insert the new row afterwards). + */ + + /* + If row is incomplete we will use the record found to fill + missing columns. + */ + if (!get_flags(COMPLETE_ROWS_F)) + { + restore_record(table,record[1]); + error= unpack_current_row(rgi); + } + +#ifndef DBUG_OFF + DBUG_PRINT("debug",("preparing for update: before and after image")); + DBUG_DUMP("record[1] (before)", table->record[1], table->s->reclength); + DBUG_DUMP("record[0] (after)", table->record[0], table->s->reclength); +#endif + + /* + REPLACE is defined as either INSERT or DELETE + INSERT. If + possible, we can replace it with an UPDATE, but that will not + work on InnoDB if FOREIGN KEY checks are necessary. + + I (Matz) am not sure of the reason for the last_uniq_key() + check as, but I'm guessing that it's something along the + following lines. + + Suppose that we got the duplicate key to be a key that is not + the last unique key for the table and we perform an update: + then there might be another key for which the unique check will + fail, so we're better off just deleting the row and inserting + the correct row. + */ + if (last_uniq_key(table, keynum) && + !table->file->referenced_by_foreign_key()) + { + DBUG_PRINT("info",("Updating row using ha_update_row()")); + error=table->file->ha_update_row(table->record[1], + table->record[0]); + switch (error) { + + case HA_ERR_RECORD_IS_THE_SAME: + DBUG_PRINT("info",("ignoring HA_ERR_RECORD_IS_THE_SAME error from" + " ha_update_row()")); + error= 0; + + case 0: + break; + + default: + DBUG_PRINT("info",("ha_update_row() returns error %d",error)); + table->file->print_error(error, MYF(0)); + } + + DBUG_RETURN(error); + } + else + { + DBUG_PRINT("info",("Deleting offending row and trying to write new one again")); + if (unlikely((error= table->file->ha_delete_row(table->record[1])))) + { + DBUG_PRINT("info",("ha_delete_row() returns error %d",error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + /* Will retry ha_write_row() with the offending row removed. */ + } + } + + DBUG_RETURN(error); +} + + +/** + Locate the current row in event's table. + + The current row is pointed by @c m_curr_row. Member @c m_width tells how many + columns are there in the row (this can be differnet from the number of columns + in the table). It is assumed that event's table is already open and pointed + by @c m_table. + + If a corresponding record is found in the table it is stored in + @c m_table->record[0]. Note that when record is located based on a primary + key, it is possible that the record found differs from the row being located. + + If no key is specified or table does not have keys, a table scan is used to + find the row. In that case the row should be complete and contain values for + all columns. However, it can still be shorter than the table, i.e. the table + can contain extra columns not present in the row. It is also possible that + the table has fewer columns than the row being located. + + @returns Error code on failure, 0 on success. + + @post In case of success @c m_table->record[0] contains the record found. + Also, the internal "cursor" of the table is positioned at the record found. + + @note If the engine allows random access of the records, a combination of + @c position() and @c rnd_pos() will be used. + + Note that one MUST call ha_index_or_rnd_end() after this function if + it returns 0 as we must leave the row position in the handler intact + for any following update/delete command. +*/ + +int Old_rows_log_event::find_row(rpl_group_info *rgi) +{ + DBUG_ENTER("find_row"); + + DBUG_ASSERT(m_table && m_table->in_use != NULL); + + TABLE *table= m_table; + int error; + + /* unpack row - missing fields get default values */ + + // TODO: shall we check and report errors here? + prepare_record(table, m_width, FALSE /* don't check errors */); + error= unpack_current_row(rgi); + +#ifndef DBUG_OFF + DBUG_PRINT("info",("looking for the following record")); + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); +#endif + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) && + table->s->primary_key < MAX_KEY) + { + /* + Use a more efficient method to fetch the record given by + table->record[0] if the engine allows it. We first compute a + row reference using the position() member function (it will be + stored in table->file->ref) and the use rnd_pos() to position + the "cursor" (i.e., record[0] in this case) at the correct row. + + TODO: Add a check that the correct record has been fetched by + comparing with the original record. Take into account that the + record on the master and slave can be of different + length. Something along these lines should work: + + ADD>>> store_record(table,record[1]); + int error= table->file->ha_rnd_pos(table->record[0], table->file->ref); + ADD>>> DBUG_ASSERT(memcmp(table->record[1], table->record[0], + table->s->reclength) == 0); + + */ + DBUG_PRINT("info",("locating record using primary key (position)")); + int error= table->file->ha_rnd_pos_by_record(table->record[0]); + if (unlikely(error)) + { + DBUG_PRINT("info",("rnd_pos returns error %d",error)); + table->file->print_error(error, MYF(0)); + } + DBUG_RETURN(error); + } + + // We can't use position() - try other methods. + + /* + We need to retrieve all fields + TODO: Move this out from this function to main loop + */ + table->use_all_columns(); + + /* + Save copy of the record in table->record[1]. It might be needed + later if linear search is used to find exact match. + */ + store_record(table,record[1]); + + if (table->s->keys > 0) + { + DBUG_PRINT("info",("locating record using primary key (index_read)")); + + /* We have a key: search the table using the index */ + if (!table->file->inited && + unlikely(error= table->file->ha_index_init(0, FALSE))) + { + DBUG_PRINT("info",("ha_index_init returns error %d",error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + + /* Fill key data for the row */ + + DBUG_ASSERT(m_key); + key_copy(m_key, table->record[0], table->key_info, 0); + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("key data", m_key, table->key_info->key_length); +#endif + + /* + We need to set the null bytes to ensure that the filler bit are + all set when returning. There are storage engines that just set + the necessary bits on the bytes and don't set the filler bits + correctly. + */ + my_ptrdiff_t const pos= + table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0; + table->record[0][pos]= 0xFF; + + if (unlikely((error= table->file->ha_index_read_map(table->record[0], + m_key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)))) + { + DBUG_PRINT("info",("no record matching the key found in the table")); + table->file->print_error(error, MYF(0)); + table->file->ha_index_end(); + DBUG_RETURN(error); + } + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_PRINT("info",("found first matching record")); + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); +#endif + /* + Below is a minor "optimization". If the key (i.e., key number + 0) has the HA_NOSAME flag set, we know that we have found the + correct record (since there can be no duplicates); otherwise, we + have to compare the record with the one found to see if it is + the correct one. + + CAVEAT! This behaviour is essential for the replication of, + e.g., the mysql.proc table since the correct record *shall* be + found using the primary key *only*. There shall be no + comparison of non-PK columns to decide if the correct record is + found. I can see no scenario where it would be incorrect to + chose the row to change only using a PK or an UNNI. + */ + if (table->key_info->flags & HA_NOSAME) + { + /* Unique does not have non nullable part */ + if (!(table->key_info->flags & (HA_NULL_PART_KEY))) + { + DBUG_RETURN(0); + } + else + { + KEY *keyinfo= table->key_info; + /* + Unique has nullable part. We need to check if there is any + field in the BI image that is null and part of UNNI. + */ + bool null_found= FALSE; + for (uint i=0; i < keyinfo->user_defined_key_parts && !null_found; i++) + { + uint fieldnr= keyinfo->key_part[i].fieldnr - 1; + Field **f= table->field+fieldnr; + null_found= (*f)->is_null(); + } + + if (!null_found) + { + DBUG_RETURN(0); + } + + /* else fall through to index scan */ + } + } + + /* + In case key is not unique, we still have to iterate over records found + and find the one which is identical to the row given. A copy of the + record we are looking for is stored in record[1]. + */ + DBUG_PRINT("info",("non-unique index, scanning it to find matching record")); + + while (record_compare(table)) + { + while (unlikely(error= table->file->ha_index_next(table->record[0]))) + { + DBUG_PRINT("info",("no record matching the given row found")); + table->file->print_error(error, MYF(0)); + (void) table->file->ha_index_end(); + DBUG_RETURN(error); + } + } + } + else + { + DBUG_PRINT("info",("locating record using table scan (rnd_next)")); + + int restart_count= 0; // Number of times scanning has restarted from top + + /* We don't have a key: search the table using rnd_next() */ + if (unlikely((error= table->file->ha_rnd_init_with_error(1)))) + { + DBUG_PRINT("info",("error initializing table scan" + " (ha_rnd_init returns %d)",error)); + DBUG_RETURN(error); + } + + /* Continue until we find the right record or have made a full loop */ + do + { + restart_rnd_next: + error= table->file->ha_rnd_next(table->record[0]); + + switch (error) { + + case 0: + break; + + case HA_ERR_END_OF_FILE: + if (++restart_count < 2) + { + int error2; + table->file->ha_rnd_end(); + if (unlikely((error2= table->file->ha_rnd_init_with_error(1)))) + DBUG_RETURN(error2); + goto restart_rnd_next; + } + break; + + default: + DBUG_PRINT("info", ("Failed to get next record" + " (rnd_next returns %d)",error)); + table->file->print_error(error, MYF(0)); + table->file->ha_rnd_end(); + DBUG_RETURN(error); + } + } + while (restart_count < 2 && record_compare(table)); + + /* + Note: above record_compare will take into accout all record fields + which might be incorrect in case a partial row was given in the event + */ + + /* + Have to restart the scan to be able to fetch the next row. + */ + if (restart_count == 2) + DBUG_PRINT("info", ("Record not found")); + else + DBUG_DUMP("record found", table->record[0], table->s->reclength); + if (error) + table->file->ha_rnd_end(); + + DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0); + DBUG_RETURN(error); + } + + DBUG_RETURN(0); +} + +#endif + + +/************************************************************************** + Write_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +#if !defined(MYSQL_CLIENT) +Write_rows_log_event_old::Write_rows_log_event_old(THD *thd_arg, + TABLE *tbl_arg, + ulong tid_arg, + MY_BITMAP const *cols, + bool is_transactional) + : Old_rows_log_event(thd_arg, tbl_arg, tid_arg, cols, is_transactional) +{ + + // This constructor should not be reached. + assert(0); + +} +#endif + + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Write_rows_log_event_old::Write_rows_log_event_old(const uchar *buf, + uint event_len, + const Format_description_log_event + *description_event) +: Old_rows_log_event(buf, event_len, PRE_GA_WRITE_ROWS_EVENT, + description_event) +{ +} +#endif + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int +Write_rows_log_event_old::do_before_row_operations(const Slave_reporting_capability *const) +{ + int error= 0; + + /* + We are using REPLACE semantics and not INSERT IGNORE semantics + when writing rows, that is: new rows replace old rows. We need to + inform the storage engine that it should use this behaviour. + */ + + /* Tell the storage engine that we are using REPLACE semantics. */ + thd->lex->duplicates= DUP_REPLACE; + + thd->lex->sql_command= SQLCOM_REPLACE; + /* + Do not raise the error flag in case of hitting to an unique attribute + */ + m_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + m_table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + m_table->file->extra(HA_EXTRA_IGNORE_NO_KEY); + m_table->file->ha_start_bulk_insert(0); + return error; +} + + +int +Write_rows_log_event_old::do_after_row_operations(const Slave_reporting_capability *const, + int error) +{ + int local_error= 0; + m_table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + m_table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + /* + resetting the extra with + table->file->extra(HA_EXTRA_NO_IGNORE_NO_KEY); + fires bug#27077 + todo: explain or fix + */ + if (unlikely((local_error= m_table->file->ha_end_bulk_insert()))) + { + m_table->file->print_error(local_error, MYF(0)); + } + return error? error : local_error; +} + + +int +Write_rows_log_event_old::do_exec_row(rpl_group_info *rgi) +{ + DBUG_ASSERT(m_table != NULL); + int error= write_row(rgi, TRUE /* overwrite */); + + if (unlikely(error) && !thd->net.last_errno) + thd->net.last_errno= error; + + return error; +} + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + + +#ifdef MYSQL_CLIENT +bool Write_rows_log_event_old::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + return Old_rows_log_event::print_helper(file, print_event_info, + "Write_rows_old"); +} +#endif + + +/************************************************************************** + Delete_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ + +#ifndef MYSQL_CLIENT +Delete_rows_log_event_old::Delete_rows_log_event_old(THD *thd_arg, + TABLE *tbl_arg, + ulong tid, + MY_BITMAP const *cols, + bool is_transactional) + : Old_rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional), + m_after_image(NULL), m_memory(NULL) +{ + + // This constructor should not be reached. + assert(0); + +} +#endif /* #if !defined(MYSQL_CLIENT) */ + + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Delete_rows_log_event_old:: +Delete_rows_log_event_old(const uchar *buf, + uint event_len, + const Format_description_log_event + *description_event) + :Old_rows_log_event(buf, event_len, PRE_GA_DELETE_ROWS_EVENT, + description_event), + m_after_image(NULL), m_memory(NULL) +{ +} +#endif + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + +int Delete_rows_log_event_old:: +do_before_row_operations(const Slave_reporting_capability *const) +{ + if ((m_table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) && + m_table->s->primary_key < MAX_KEY) + { + /* + We don't need to allocate any memory for m_key since it is not used. + */ + return 0; + } + + if (m_table->s->keys > 0) + { + // Allocate buffer for key searches + m_key= (uchar*)my_malloc(key_memory_log_event_old, m_table->key_info->key_length, MYF(MY_WME)); + if (!m_key) + return HA_ERR_OUT_OF_MEM; + } + return 0; +} + + +int +Delete_rows_log_event_old::do_after_row_operations(const Slave_reporting_capability *const, + int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + m_table->file->ha_index_or_rnd_end(); + my_free(m_key); + m_key= NULL; + + return error; +} + + +int Delete_rows_log_event_old::do_exec_row(rpl_group_info *rgi) +{ + int error; + DBUG_ASSERT(m_table != NULL); + + if (likely(!(error= find_row(rgi))) ) + { + /* + Delete the record found, located in record[0] + */ + error= m_table->file->ha_delete_row(m_table->record[0]); + m_table->file->ha_index_or_rnd_end(); + } + return error; +} + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + + +#ifdef MYSQL_CLIENT +bool Delete_rows_log_event_old::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + return Old_rows_log_event::print_helper(file, print_event_info, + "Delete_rows_old"); +} +#endif + + +/************************************************************************** + Update_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +#if !defined(MYSQL_CLIENT) +Update_rows_log_event_old::Update_rows_log_event_old(THD *thd_arg, + TABLE *tbl_arg, + ulong tid, + MY_BITMAP const *cols, + bool is_transactional) + : Old_rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional), + m_after_image(NULL), m_memory(NULL) +{ + + // This constructor should not be reached. + assert(0); +} +#endif /* !defined(MYSQL_CLIENT) */ + + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Update_rows_log_event_old::Update_rows_log_event_old(const uchar *buf, + uint event_len, + const + Format_description_log_event + *description_event) + : Old_rows_log_event(buf, event_len, PRE_GA_UPDATE_ROWS_EVENT, + description_event), + m_after_image(NULL), m_memory(NULL) +{ +} +#endif + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + +int +Update_rows_log_event_old:: +do_before_row_operations(const Slave_reporting_capability *const) +{ + if (m_table->s->keys > 0) + { + // Allocate buffer for key searches + m_key= (uchar*)my_malloc(key_memory_log_event_old, + m_table->key_info->key_length, MYF(MY_WME)); + if (!m_key) + return HA_ERR_OUT_OF_MEM; + } + + return 0; +} + + +int +Update_rows_log_event_old:: +do_after_row_operations(const Slave_reporting_capability *const, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + m_table->file->ha_index_or_rnd_end(); + my_free(m_key); // Free for multi_malloc + m_key= NULL; + + return error; +} + + +int +Update_rows_log_event_old::do_exec_row(rpl_group_info *rgi) +{ + DBUG_ASSERT(m_table != NULL); + + int error= find_row(rgi); + if (unlikely(error)) + { + /* + We need to read the second image in the event of error to be + able to skip to the next pair of updates + */ + m_curr_row= m_curr_row_end; + unpack_current_row(rgi); + return error; + } + + /* + This is the situation after locating BI: + + ===|=== before image ====|=== after image ===|=== + ^ ^ + m_curr_row m_curr_row_end + + BI found in the table is stored in record[0]. We copy it to record[1] + and unpack AI to record[0]. + */ + + store_record(m_table,record[1]); + + m_curr_row= m_curr_row_end; + error= unpack_current_row(rgi); // this also updates m_curr_row_end + + /* + Now we have the right row to update. The old row (the one we're + looking for) is in record[1] and the new row is in record[0]. + */ +#ifndef HAVE_valgrind + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ + DBUG_PRINT("info",("Updating row in table")); + DBUG_DUMP("old record", m_table->record[1], m_table->s->reclength); + DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); +#endif + + error= m_table->file->ha_update_row(m_table->record[1], m_table->record[0]); + m_table->file->ha_index_or_rnd_end(); + + if (unlikely(error == HA_ERR_RECORD_IS_THE_SAME)) + error= 0; + + return error; +} + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + + +#ifdef MYSQL_CLIENT +bool Update_rows_log_event_old::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + return Old_rows_log_event::print_helper(file, print_event_info, + "Update_rows_old"); +} +#endif diff --git a/sql/log_event_old.h b/sql/log_event_old.h new file mode 100644 index 00000000..e5aaacec --- /dev/null +++ b/sql/log_event_old.h @@ -0,0 +1,569 @@ +/* Copyright (c) 2007, 2013, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LOG_EVENT_OLD_H +#define LOG_EVENT_OLD_H + +/* + Need to include this file at the proper position of log_event.h + */ + + +/** + @file + + @brief This file contains classes handling old formats of row-based + binlog events. +*/ +/* + Around 2007-10-31, I made these classes completely separated from + the new classes (before, there was a complex class hierarchy + involving multiple inheritance; see BUG#31581), by simply copying + and pasting the entire contents of Rows_log_event into + Old_rows_log_event and the entire contents of + {Write|Update|Delete}_rows_log_event into + {Write|Update|Delete}_rows_log_event_old. For clarity, I will keep + the comments marking which code was cut-and-pasted for some time. + With the classes collapsed into one, there is probably some + redundancy (maybe some methods can be simplified and/or removed), + but we keep them this way for now. /Sven +*/ + +/* These classes are based on the v1 RowsHeaderLen */ +#undef ROWS_HEADER_LEN +#define ROWS_HEADER_LEN ROWS_HEADER_LEN_V1 + +/** + @class Old_rows_log_event + + Base class for the three types of row-based events + {Write|Update|Delete}_row_log_event_old, with event type codes + PRE_GA_{WRITE|UPDATE|DELETE}_ROWS_EVENT. These events are never + created any more, except when reading a relay log created by an old + server. +*/ +class Old_rows_log_event : public Log_event +{ + /********** BEGIN CUT & PASTE FROM Rows_log_event **********/ +public: + /** + Enumeration of the errors that can be returned. + */ + enum enum_error + { + ERR_OPEN_FAILURE = -1, /**< Failure to open table */ + ERR_OK = 0, /**< No error */ + ERR_TABLE_LIMIT_EXCEEDED = 1, /**< No more room for tables */ + ERR_OUT_OF_MEM = 2, /**< Out of memory */ + ERR_BAD_TABLE_DEF = 3, /**< Table definition does not match */ + ERR_RBR_TO_SBR = 4 /**< daisy-chanining RBR to SBR not allowed */ + }; + + /* + These definitions allow you to combine the flags into an + appropriate flag set using the normal bitwise operators. The + implicit conversion from an enum-constant to an integer is + accepted by the compiler, which is then used to set the real set + of flags. + */ + enum enum_flag + { + /* Last event of a statement */ + STMT_END_F = (1U << 0), + + /* Value of the OPTION_NO_FOREIGN_KEY_CHECKS flag in thd->options */ + NO_FOREIGN_KEY_CHECKS_F = (1U << 1), + + /* Value of the OPTION_RELAXED_UNIQUE_CHECKS flag in thd->options */ + RELAXED_UNIQUE_CHECKS_F = (1U << 2), + + /** + Indicates that rows in this event are complete, that is contain + values for all columns of the table. + */ + COMPLETE_ROWS_F = (1U << 3) + }; + + typedef uint16 flag_set; + + /* Special constants representing sets of flags */ + enum + { + RLE_NO_FLAGS = 0U + }; + + virtual ~Old_rows_log_event(); + + void set_flags(flag_set flags_arg) { m_flags |= flags_arg; } + void clear_flags(flag_set flags_arg) { m_flags &= ~flags_arg; } + flag_set get_flags(flag_set flags_arg) const { return m_flags & flags_arg; } + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual void pack_info(Protocol *protocol); +#endif + +#ifdef MYSQL_CLIENT + /* not for direct call, each derived has its own ::print() */ + virtual bool print(FILE *file, PRINT_EVENT_INFO *print_event_info)= 0; +#endif + +#ifndef MYSQL_CLIENT + int add_row_data(uchar *data, size_t length) + { + return do_add_row_data(data,length); + } +#endif + + /* Member functions to implement superclass interface */ + virtual int get_data_size(); + + MY_BITMAP const *get_cols() const { return &m_cols; } + size_t get_width() const { return m_width; } + ulong get_table_id() const { return m_table_id; } + +#ifndef MYSQL_CLIENT + virtual bool write_data_header(); + virtual bool write_data_body(); + virtual const char *get_db() { return m_table->s->db.str; } +#endif + /* + Check that malloc() succeeded in allocating memory for the rows + buffer and the COLS vector. Checking that an Update_rows_log_event_old + is valid is done in the Update_rows_log_event_old::is_valid() + function. + */ + virtual bool is_valid() const + { + return m_rows_buf && m_cols.bitmap; + } + bool is_part_of_group() { return 1; } + + uint m_row_count; /* The number of rows added to the event */ + +protected: + /* + The constructors are protected since you're supposed to inherit + this class, not create instances of this class. + */ +#ifndef MYSQL_CLIENT + Old_rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif + Old_rows_log_event(const uchar *row_data, uint event_len, + Log_event_type event_type, + const Format_description_log_event *description_event); + +#ifdef MYSQL_CLIENT + bool print_helper(FILE *, PRINT_EVENT_INFO *, char const *const name); +#endif + +#ifndef MYSQL_CLIENT + virtual int do_add_row_data(uchar *data, size_t length); +#endif + +#ifndef MYSQL_CLIENT + TABLE *m_table; /* The table the rows belong to */ +#endif + ulong m_table_id; /* Table ID */ + MY_BITMAP m_cols; /* Bitmap denoting columns available */ + ulong m_width; /* The width of the columns bitmap */ + + ulong m_master_reclength; /* Length of record on master side */ + + /* Bit buffers in the same memory as the class */ + uint32 m_bitbuf[128/(sizeof(uint32)*8)]; + uint32 m_bitbuf_ai[128/(sizeof(uint32)*8)]; + + uchar *m_rows_buf; /* The rows in packed format */ + uchar *m_rows_cur; /* One-after the end of the data */ + uchar *m_rows_end; /* One-after the end of the allocated space */ + + flag_set m_flags; /* Flags for row-level events */ + + /* helper functions */ + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + const uchar *m_curr_row; /* Start of the row being processed */ + const uchar *m_curr_row_end; /* One-after the end of the current row */ + uchar *m_key; /* Buffer to keep key value during searches */ + + int find_row(rpl_group_info *); + int write_row(rpl_group_info *, const bool); + + // Unpack the current row into m_table->record[0] + int unpack_current_row(rpl_group_info *rgi) + { + DBUG_ASSERT(m_table); + ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT); + return ::unpack_row(rgi, m_table, m_width, m_curr_row, &m_cols, + &m_curr_row_end, &m_master_reclength, m_rows_end); + } +#endif + +private: + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); + + /* + Primitive to prepare for a sequence of row executions. + + DESCRIPTION + + Before doing a sequence of do_prepare_row() and do_exec_row() + calls, this member function should be called to prepare for the + entire sequence. Typically, this member function will allocate + space for any buffers that are needed for the two member + functions mentioned above. + + RETURN VALUE + + The member function will return 0 if all went OK, or a non-zero + error code otherwise. + */ + virtual + int do_before_row_operations(const Slave_reporting_capability *const log) = 0; + + /* + Primitive to clean up after a sequence of row executions. + + DESCRIPTION + + After doing a sequence of do_prepare_row() and do_exec_row(), + this member function should be called to clean up and release + any allocated buffers. + + The error argument, if non-zero, indicates an error which happened during + row processing before this function was called. In this case, even if + function is successful, it should return the error code given in the argument. + */ + virtual + int do_after_row_operations(const Slave_reporting_capability *const log, + int error) = 0; + + /* + Primitive to do the actual execution necessary for a row. + + DESCRIPTION + The member function will do the actual execution needed to handle a row. + The row is located at m_curr_row. When the function returns, + m_curr_row_end should point at the next row (one byte after the end + of the current row). + + RETURN VALUE + 0 if execution succeeded, 1 if execution failed. + + */ + virtual int do_exec_row(rpl_group_info *rgi) = 0; +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + + /********** END OF CUT & PASTE FROM Rows_log_event **********/ + protected: + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + + int do_apply_event(Old_rows_log_event*, rpl_group_info *rgi); + + /* + Primitive to prepare for a sequence of row executions. + + DESCRIPTION + + Before doing a sequence of do_prepare_row() and do_exec_row() + calls, this member function should be called to prepare for the + entire sequence. Typically, this member function will allocate + space for any buffers that are needed for the two member + functions mentioned above. + + RETURN VALUE + + The member function will return 0 if all went OK, or a non-zero + error code otherwise. + */ + virtual int do_before_row_operations(TABLE *table) = 0; + + /* + Primitive to clean up after a sequence of row executions. + + DESCRIPTION + + After doing a sequence of do_prepare_row() and do_exec_row(), + this member function should be called to clean up and release + any allocated buffers. + */ + virtual int do_after_row_operations(TABLE *table, int error) = 0; + + /* + Primitive to prepare for handling one row in a row-level event. + + DESCRIPTION + + The member function prepares for execution of operations needed for one + row in a row-level event by reading up data from the buffer containing + the row. No specific interpretation of the data is normally done here, + since SQL thread specific data is not available: that data is made + available for the do_exec function. + + A pointer to the start of the next row, or NULL if the preparation + failed. Currently, preparation cannot fail, but don't rely on this + behavior. + + RETURN VALUE + Error code, if something went wrong, 0 otherwise. + */ + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, + uchar const *row_start, + uchar const **row_end) = 0; + + /* + Primitive to do the actual execution necessary for a row. + + DESCRIPTION + The member function will do the actual execution needed to handle a row. + + RETURN VALUE + 0 if execution succeeded, 1 if execution failed. + + */ + virtual int do_exec_row(TABLE *table) = 0; + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ +}; + + +/** + @class Write_rows_log_event_old + + Old class for binlog events that write new rows to a table (event + type code PRE_GA_WRITE_ROWS_EVENT). Such events are never produced + by this version of the server, but they may be read from a relay log + created by an old server. New servers create events of class + Write_rows_log_event (event type code WRITE_ROWS_EVENT) instead. +*/ +class Write_rows_log_event_old : public Old_rows_log_event +{ + /********** BEGIN CUT & PASTE FROM Write_rows_log_event **********/ +public: +#if !defined(MYSQL_CLIENT) + Write_rows_log_event_old(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Write_rows_log_event_old(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if !defined(MYSQL_CLIENT) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + const uchar *before_record + __attribute__((unused)), + const uchar *after_record) + { + return thd->binlog_write_row(table, is_transactional, after_record); + } +#endif + +private: +#ifdef MYSQL_CLIENT + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int do_before_row_operations(const Slave_reporting_capability *const); + virtual int do_after_row_operations(const Slave_reporting_capability *const,int); + virtual int do_exec_row(rpl_group_info *); +#endif + /********** END OF CUT & PASTE FROM Write_rows_log_event **********/ + +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = PRE_GA_WRITE_ROWS_EVENT + }; + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + // use old definition of do_apply_event() + virtual int do_apply_event(rpl_group_info *rgi) + { return Old_rows_log_event::do_apply_event(this, rgi); } + + // primitives for old version of do_apply_event() + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, + uchar const *row_start, uchar const **row_end); + virtual int do_exec_row(TABLE *table); + +#endif +}; + + +/** + @class Update_rows_log_event_old + + Old class for binlog events that modify existing rows to a table + (event type code PRE_GA_UPDATE_ROWS_EVENT). Such events are never + produced by this version of the server, but they may be read from a + relay log created by an old server. New servers create events of + class Update_rows_log_event (event type code UPDATE_ROWS_EVENT) + instead. +*/ +class Update_rows_log_event_old : public Old_rows_log_event +{ + /********** BEGIN CUT & PASTE FROM Update_rows_log_event **********/ +public: +#ifndef MYSQL_CLIENT + Update_rows_log_event_old(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, + bool is_transactional); +#endif + +#ifdef HAVE_REPLICATION + Update_rows_log_event_old(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + +#if !defined(MYSQL_CLIENT) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const uchar *before_record, + const uchar *after_record) + { + return thd->binlog_update_row(table, is_transactional, + before_record, after_record); + } +#endif + +protected: +#ifdef MYSQL_CLIENT + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int do_before_row_operations(const Slave_reporting_capability *const); + virtual int do_after_row_operations(const Slave_reporting_capability *const,int); + virtual int do_exec_row(rpl_group_info *); +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + /********** END OF CUT & PASTE FROM Update_rows_log_event **********/ + + uchar *m_after_image, *m_memory; + +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = PRE_GA_UPDATE_ROWS_EVENT + }; + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + // use old definition of do_apply_event() + virtual int do_apply_event(rpl_group_info *rgi) + { return Old_rows_log_event::do_apply_event(this, rgi); } + + // primitives for old version of do_apply_event() + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, + uchar const *row_start, uchar const **row_end); + virtual int do_exec_row(TABLE *table); +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ +}; + + +/** + @class Delete_rows_log_event_old + + Old class for binlog events that delete existing rows from a table + (event type code PRE_GA_DELETE_ROWS_EVENT). Such events are never + produced by this version of the server, but they may be read from a + relay log created by an old server. New servers create events of + class Delete_rows_log_event (event type code DELETE_ROWS_EVENT) + instead. +*/ +class Delete_rows_log_event_old : public Old_rows_log_event +{ + /********** BEGIN CUT & PASTE FROM Update_rows_log_event **********/ +public: +#ifndef MYSQL_CLIENT + Delete_rows_log_event_old(THD*, TABLE*, ulong, + MY_BITMAP const *cols, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Delete_rows_log_event_old(const uchar *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if !defined(MYSQL_CLIENT) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const uchar *before_record, + const uchar *after_record + __attribute__((unused))) + { + return thd->binlog_delete_row(table, is_transactional, before_record); + } +#endif + +protected: +#ifdef MYSQL_CLIENT + bool print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int do_before_row_operations(const Slave_reporting_capability *const); + virtual int do_after_row_operations(const Slave_reporting_capability *const,int); + virtual int do_exec_row(rpl_group_info *); +#endif + /********** END CUT & PASTE FROM Delete_rows_log_event **********/ + + uchar *m_after_image, *m_memory; + +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = PRE_GA_DELETE_ROWS_EVENT + }; + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + // use old definition of do_apply_event() + virtual int do_apply_event(rpl_group_info *rgi) + { return Old_rows_log_event::do_apply_event(this, rgi); } + + // primitives for old version of do_apply_event() + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, + uchar const *row_start, uchar const **row_end); + virtual int do_exec_row(TABLE *table); +#endif +}; + + +#endif diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc new file mode 100644 index 00000000..5cb15c1c --- /dev/null +++ b/sql/log_event_server.cc @@ -0,0 +1,9087 @@ +/* + Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mariadb.h" +#include "sql_priv.h" + +#ifdef MYSQL_CLIENT +#error MYSQL_CLIENT must not be defined here +#endif + +#ifndef MYSQL_SERVER +#error MYSQL_SERVER must be defined here +#endif + +#include "unireg.h" +#include "log_event.h" +#include "sql_base.h" // close_thread_tables +#include "sql_cache.h" // QUERY_CACHE_FLAGS_SIZE +#include "sql_locale.h" // MY_LOCALE, my_locale_by_number, my_locale_en_US +#include "key.h" // key_copy +#include "lock.h" // mysql_unlock_tables +#include "sql_parse.h" // mysql_test_parse_for_slave +#include "tztime.h" // struct Time_zone +#include "sql_load.h" // mysql_load +#include "sql_db.h" // load_db_opt_by_name +#include "slave.h" +#include "rpl_rli.h" +#include "rpl_mi.h" +#include "rpl_filter.h" +#include "rpl_record.h" +#include "transaction.h" +#include +#include "sql_show.h" // append_identifier +#include "debug_sync.h" // debug_sync +#include +#include +#include "compat56.h" +#include "wsrep_mysqld.h" +#include "sql_insert.h" +#include "sql_table.h" + +#include +#include "rpl_utility.h" +#include "rpl_constants.h" +#include "sql_digest.h" +#include "zlib.h" + + +#define log_cs &my_charset_latin1 + + +#if defined(HAVE_REPLICATION) +static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD* thd); + +static const char *HA_ERR(int i) +{ + /* + This function should only be called in case of an error + was detected + */ + DBUG_ASSERT(i != 0); + switch (i) { + case HA_ERR_KEY_NOT_FOUND: return "HA_ERR_KEY_NOT_FOUND"; + case HA_ERR_FOUND_DUPP_KEY: return "HA_ERR_FOUND_DUPP_KEY"; + case HA_ERR_RECORD_CHANGED: return "HA_ERR_RECORD_CHANGED"; + case HA_ERR_WRONG_INDEX: return "HA_ERR_WRONG_INDEX"; + case HA_ERR_CRASHED: return "HA_ERR_CRASHED"; + case HA_ERR_WRONG_IN_RECORD: return "HA_ERR_WRONG_IN_RECORD"; + case HA_ERR_OUT_OF_MEM: return "HA_ERR_OUT_OF_MEM"; + case HA_ERR_NOT_A_TABLE: return "HA_ERR_NOT_A_TABLE"; + case HA_ERR_WRONG_COMMAND: return "HA_ERR_WRONG_COMMAND"; + case HA_ERR_OLD_FILE: return "HA_ERR_OLD_FILE"; + case HA_ERR_NO_ACTIVE_RECORD: return "HA_ERR_NO_ACTIVE_RECORD"; + case HA_ERR_RECORD_DELETED: return "HA_ERR_RECORD_DELETED"; + case HA_ERR_RECORD_FILE_FULL: return "HA_ERR_RECORD_FILE_FULL"; + case HA_ERR_INDEX_FILE_FULL: return "HA_ERR_INDEX_FILE_FULL"; + case HA_ERR_END_OF_FILE: return "HA_ERR_END_OF_FILE"; + case HA_ERR_UNSUPPORTED: return "HA_ERR_UNSUPPORTED"; + case HA_ERR_TO_BIG_ROW: return "HA_ERR_TO_BIG_ROW"; + case HA_WRONG_CREATE_OPTION: return "HA_WRONG_CREATE_OPTION"; + case HA_ERR_FOUND_DUPP_UNIQUE: return "HA_ERR_FOUND_DUPP_UNIQUE"; + case HA_ERR_UNKNOWN_CHARSET: return "HA_ERR_UNKNOWN_CHARSET"; + case HA_ERR_WRONG_MRG_TABLE_DEF: return "HA_ERR_WRONG_MRG_TABLE_DEF"; + case HA_ERR_CRASHED_ON_REPAIR: return "HA_ERR_CRASHED_ON_REPAIR"; + case HA_ERR_CRASHED_ON_USAGE: return "HA_ERR_CRASHED_ON_USAGE"; + case HA_ERR_LOCK_WAIT_TIMEOUT: return "HA_ERR_LOCK_WAIT_TIMEOUT"; + case HA_ERR_LOCK_TABLE_FULL: return "HA_ERR_LOCK_TABLE_FULL"; + case HA_ERR_READ_ONLY_TRANSACTION: return "HA_ERR_READ_ONLY_TRANSACTION"; + case HA_ERR_LOCK_DEADLOCK: return "HA_ERR_LOCK_DEADLOCK"; + case HA_ERR_CANNOT_ADD_FOREIGN: return "HA_ERR_CANNOT_ADD_FOREIGN"; + case HA_ERR_NO_REFERENCED_ROW: return "HA_ERR_NO_REFERENCED_ROW"; + case HA_ERR_ROW_IS_REFERENCED: return "HA_ERR_ROW_IS_REFERENCED"; + case HA_ERR_NO_SAVEPOINT: return "HA_ERR_NO_SAVEPOINT"; + case HA_ERR_NON_UNIQUE_BLOCK_SIZE: return "HA_ERR_NON_UNIQUE_BLOCK_SIZE"; + case HA_ERR_NO_SUCH_TABLE: return "HA_ERR_NO_SUCH_TABLE"; + case HA_ERR_TABLE_EXIST: return "HA_ERR_TABLE_EXIST"; + case HA_ERR_NO_CONNECTION: return "HA_ERR_NO_CONNECTION"; + case HA_ERR_NULL_IN_SPATIAL: return "HA_ERR_NULL_IN_SPATIAL"; + case HA_ERR_TABLE_DEF_CHANGED: return "HA_ERR_TABLE_DEF_CHANGED"; + case HA_ERR_NO_PARTITION_FOUND: return "HA_ERR_NO_PARTITION_FOUND"; + case HA_ERR_RBR_LOGGING_FAILED: return "HA_ERR_RBR_LOGGING_FAILED"; + case HA_ERR_DROP_INDEX_FK: return "HA_ERR_DROP_INDEX_FK"; + case HA_ERR_FOREIGN_DUPLICATE_KEY: return "HA_ERR_FOREIGN_DUPLICATE_KEY"; + case HA_ERR_TABLE_NEEDS_UPGRADE: return "HA_ERR_TABLE_NEEDS_UPGRADE"; + case HA_ERR_TABLE_READONLY: return "HA_ERR_TABLE_READONLY"; + case HA_ERR_AUTOINC_READ_FAILED: return "HA_ERR_AUTOINC_READ_FAILED"; + case HA_ERR_AUTOINC_ERANGE: return "HA_ERR_AUTOINC_ERANGE"; + case HA_ERR_GENERIC: return "HA_ERR_GENERIC"; + case HA_ERR_RECORD_IS_THE_SAME: return "HA_ERR_RECORD_IS_THE_SAME"; + case HA_ERR_LOGGING_IMPOSSIBLE: return "HA_ERR_LOGGING_IMPOSSIBLE"; + case HA_ERR_CORRUPT_EVENT: return "HA_ERR_CORRUPT_EVENT"; + case HA_ERR_ROWS_EVENT_APPLY : return "HA_ERR_ROWS_EVENT_APPLY"; + case HA_ERR_PARTITION_LIST : return "HA_ERR_PARTITION_LIST"; + } + return "No Error!"; +} + + +/* + Return true if an error caught during event execution is a temporary error + that will cause automatic retry of the event group during parallel + replication, false otherwise. + + In parallel replication, conflicting transactions can occasionally cause + deadlocks; such errors are handled automatically by rolling back re-trying + the transactions, so should not pollute the error log. +*/ +bool +is_parallel_retry_error(rpl_group_info *rgi, int err) +{ + if (!rgi->is_parallel_exec) + return false; + if (rgi->speculation == rpl_group_info::SPECULATE_OPTIMISTIC) + return true; + if (rgi->killed_for_retry && + (err == ER_QUERY_INTERRUPTED || err == ER_CONNECTION_KILLED)) + return true; + return has_temporary_error(rgi->thd); +} + +/** + Accumulate a Diagnostics_area's errors and warnings into an output buffer + + @param errbuf The output buffer to write error messages + @param errbuf_size The size of the output buffer + @param da The Diagnostics_area to check for errors +*/ +static void inline aggregate_da_errors(char *errbuf, size_t errbuf_size, + Diagnostics_area *da) +{ + const char *errbuf_end= errbuf + errbuf_size; + char *slider; + Diagnostics_area::Sql_condition_iterator it= da->sql_conditions(); + const Sql_condition *err; + size_t len; + for (err= it++, slider= errbuf; err && slider < errbuf_end - 1; + slider += len, err= it++) + { + len= my_snprintf(slider, errbuf_end - slider, + " %s, Error_code: %d;", err->get_message_text(), + err->get_sql_errno()); + } +} + + +/** + Error reporting facility for Rows_log_event::do_apply_event + + @param level error, warning or info + @param ha_error HA_ERR_ code + @param rli pointer to the active Relay_log_info instance + @param thd pointer to the slave thread's thd + @param table pointer to the event's table object + @param type the type of the event + @param log_name the master binlog file name + @param pos the master binlog file pos (the next after the event) + +*/ +static void inline slave_rows_error_report(enum loglevel level, int ha_error, + rpl_group_info *rgi, THD *thd, + TABLE *table, const char * type, + const char *log_name, my_off_t pos) +{ + const char *handler_error= (ha_error ? HA_ERR(ha_error) : NULL); + char buff[MAX_SLAVE_ERRMSG]; + Relay_log_info const *rli= rgi->rli; + buff[0]= 0; + int errcode= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0; + + /* + In parallel replication, deadlocks or other temporary errors can happen + occasionally in normal operation, they will be handled correctly and + automatically by re-trying the transactions. So do not pollute the error + log with messages about them. + */ + if (is_parallel_retry_error(rgi, errcode)) + return; + + aggregate_da_errors(buff, sizeof(buff), thd->get_stmt_da()); + + if (ha_error != 0 && !thd->killed) + rli->report(level, errcode, rgi->gtid_info(), + "Could not execute %s event on table %s.%s;" + "%s handler error %s; " + "the event's master log %s, end_log_pos %llu", + type, table->s->db.str, table->s->table_name.str, + buff, handler_error == NULL ? "" : handler_error, + log_name, pos); + else + rli->report(level, errcode, rgi->gtid_info(), + "Could not execute %s event on table %s.%s;" + "%s the event's master log %s, end_log_pos %llu", + type, table->s->db.str, table->s->table_name.str, + buff, log_name, pos); +} +#endif + +#if defined(HAVE_REPLICATION) +static void set_thd_db(THD *thd, Rpl_filter *rpl_filter, + const char *db, uint32 db_len) +{ + char lcase_db_buf[NAME_LEN +1]; + LEX_CSTRING new_db; + new_db.length= db_len; + if (lower_case_table_names == 1) + { + strmov(lcase_db_buf, db); + my_casedn_str(system_charset_info, lcase_db_buf); + new_db.str= lcase_db_buf; + } + else + new_db.str= db; + /* TODO WARNING this makes rewrite_db respect lower_case_table_names values + * for more info look MDEV-17446 */ + new_db.str= rpl_filter->get_rewrite_db(new_db.str, &new_db.length); + thd->set_db(&new_db); +} +#endif + + +#if defined(HAVE_REPLICATION) + +inline int idempotent_error_code(int err_code) +{ + int ret= 0; + + switch (err_code) + { + case 0: + ret= 1; + break; + /* + The following list of "idempotent" errors + means that an error from the list might happen + because of idempotent (more than once) + applying of a binlog file. + Notice, that binlog has a ddl operation its + second applying may cause + + case HA_ERR_TABLE_DEF_CHANGED: + case HA_ERR_CANNOT_ADD_FOREIGN: + + which are not included into to the list. + + Note that HA_ERR_RECORD_DELETED is not in the list since + do_exec_row() should not return that error code. + */ + case HA_ERR_RECORD_CHANGED: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_END_OF_FILE: + case HA_ERR_FOUND_DUPP_KEY: + case HA_ERR_FOUND_DUPP_UNIQUE: + case HA_ERR_FOREIGN_DUPLICATE_KEY: + case HA_ERR_NO_REFERENCED_ROW: + case HA_ERR_ROW_IS_REFERENCED: + ret= 1; + break; + default: + ret= 0; + break; + } + return (ret); +} + +/** + Ignore error code specified on command line. +*/ + +inline int ignored_error_code(int err_code) +{ + if (use_slave_mask && bitmap_is_set(&slave_error_mask, err_code)) + { + statistic_increment(slave_skipped_errors, LOCK_status); + return 1; + } + return err_code == ER_SLAVE_IGNORED_TABLE; +} + +/* + This function converts an engine's error to a server error. + + If the thread does not have an error already reported, it tries to + define it by calling the engine's method print_error. However, if a + mapping is not found, it uses the ER_UNKNOWN_ERROR and prints out a + warning message. +*/ +int convert_handler_error(int error, THD* thd, TABLE *table) +{ + uint actual_error= (thd->is_error() ? thd->get_stmt_da()->sql_errno() : + 0); + + if (actual_error == 0) + { + table->file->print_error(error, MYF(0)); + actual_error= (thd->is_error() ? thd->get_stmt_da()->sql_errno() : + ER_UNKNOWN_ERROR); + if (actual_error == ER_UNKNOWN_ERROR) + if (global_system_variables.log_warnings) + sql_print_warning("Unknown error detected %d in handler", error); + } + + return (actual_error); +} + +inline bool concurrency_error_code(int error) +{ + switch (error) + { + case ER_LOCK_WAIT_TIMEOUT: + case ER_LOCK_DEADLOCK: + case ER_XA_RBDEADLOCK: + return TRUE; + default: + return (FALSE); + } +} + +inline bool unexpected_error_code(int unexpected_error) +{ + switch (unexpected_error) + { + case ER_NET_READ_ERROR: + case ER_NET_ERROR_ON_WRITE: + case ER_QUERY_INTERRUPTED: + case ER_STATEMENT_TIMEOUT: + case ER_CONNECTION_KILLED: + case ER_SERVER_SHUTDOWN: + case ER_NEW_ABORTING_CONNECTION: + return(TRUE); + default: + return(FALSE); + } +} + +/* + pretty_print_str() +*/ + +static void +pretty_print_str(String *packet, const char *str, int len) +{ + const char *end= str + len; + packet->append(STRING_WITH_LEN("'")); + while (str < end) + { + char c; + switch ((c=*str++)) { + case '\n': packet->append(STRING_WITH_LEN("\\n")); break; + case '\r': packet->append(STRING_WITH_LEN("\\r")); break; + case '\\': packet->append(STRING_WITH_LEN("\\\\")); break; + case '\b': packet->append(STRING_WITH_LEN("\\b")); break; + case '\t': packet->append(STRING_WITH_LEN("\\t")); break; + case '\'': packet->append(STRING_WITH_LEN("\\'")); break; + case 0 : packet->append(STRING_WITH_LEN("\\0")); break; + default: + packet->append(&c, 1); + break; + } + } + packet->append(STRING_WITH_LEN("'")); +} +#endif /* HAVE_REPLICATION */ + + +#if defined(HAVE_REPLICATION) + +/** + Create a prefix for the temporary files that is to be used for + load data file name for this master + + @param name Store prefix of name here + @param connection_name Connection name + + @return pointer to end of name + + @description + We assume that FN_REFLEN is big enough to hold + MAX_CONNECTION_NAME * MAX_FILENAME_MBWIDTH characters + 2 numbers + + a short extension. + + The resulting file name has the following parts, each separated with a '-' + - PREFIX_SQL_LOAD (SQL_LOAD-) + - If a connection name is given (multi-master setup): + - Add an extra '-' to mark that this is a multi-master file + - connection name in lower case, converted to safe file characters. + (see create_logfile_name_with_suffix()). + - server_id + - A last '-' (after server_id). +*/ + +static char *load_data_tmp_prefix(char *name, + LEX_CSTRING *connection_name) +{ + name= strmov(name, PREFIX_SQL_LOAD); + if (connection_name->length) + { + uint buf_length; + uint errors; + /* Add marker that this is a multi-master-file */ + *name++='-'; + /* Convert connection_name to a safe filename */ + buf_length= strconvert(system_charset_info, connection_name->str, FN_REFLEN, + &my_charset_filename, name, FN_REFLEN, &errors); + name+= buf_length; + *name++= '-'; + } + name= int10_to_str(global_system_variables.server_id, name, 10); + *name++ = '-'; + *name= '\0'; // For testing prefixes + return name; +} + + +/** + Creates a temporary name for LOAD DATA INFILE + + @param buf Store new filename here + @param file_id File_id (part of file name) + @param event_server_id Event_id (part of file name) + @param ext Extension for file name + + @return + Pointer to start of extension +*/ + +static char *slave_load_file_stem(char *buf, uint file_id, + int event_server_id, const char *ext, + LEX_CSTRING *connection_name) +{ + char *res; + res= buf+ unpack_dirname(buf, slave_load_tmpdir); + to_unix_path(buf); + buf= load_data_tmp_prefix(res, connection_name); + buf= int10_to_str(event_server_id, buf, 10); + *buf++ = '-'; + res= int10_to_str(file_id, buf, 10); + strmov(res, ext); // Add extension last + return res; // Pointer to extension +} +#endif + + +#if defined(HAVE_REPLICATION) + +/** + Delete all temporary files used for SQL_LOAD. +*/ + +static void cleanup_load_tmpdir(LEX_CSTRING *connection_name) +{ + MY_DIR *dirp; + FILEINFO *file; + size_t i; + char dir[FN_REFLEN], fname[FN_REFLEN]; + char prefbuf[31 + MAX_CONNECTION_NAME* MAX_FILENAME_MBWIDTH + 1]; + DBUG_ENTER("cleanup_load_tmpdir"); + + unpack_dirname(dir, slave_load_tmpdir); + if (!(dirp=my_dir(dir, MYF(MY_WME)))) + return; + + /* + When we are deleting temporary files, we should only remove + the files associated with the server id of our server. + We don't use event_server_id here because since we've disabled + direct binlogging of Create_file/Append_file/Exec_load events + we cannot meet Start_log event in the middle of events from one + LOAD DATA. + */ + + load_data_tmp_prefix(prefbuf, connection_name); + DBUG_PRINT("enter", ("dir: '%s' prefix: '%s'", dir, prefbuf)); + + for (i=0 ; i < dirp->number_of_files; i++) + { + file=dirp->dir_entry+i; + if (is_prefix(file->name, prefbuf)) + { + fn_format(fname,file->name,slave_load_tmpdir,"",MY_UNPACK_FILENAME); + mysql_file_delete(key_file_misc, fname, MYF(0)); + } + } + + my_dirend(dirp); + DBUG_VOID_RETURN; +} +#endif + + +/** + Append a version of the 'str' string suitable for use in a query to + the 'to' string. To generate a correct escaping, the character set + information in 'csinfo' is used. +*/ + +int append_query_string(CHARSET_INFO *csinfo, String *to, + const char *str, size_t len, bool no_backslash) +{ + char *beg, *ptr; + my_bool overflow; + uint32 const orig_len= to->length(); + if (to->reserve(orig_len + len * 2 + 4)) + return 1; + + beg= (char*) to->ptr() + to->length(); + ptr= beg; + if (csinfo->escape_with_backslash_is_dangerous) + ptr= str_to_hex(ptr, str, len); + else + { + *ptr++= '\''; + if (!no_backslash) + { + ptr+= escape_string_for_mysql(csinfo, ptr, 0, str, len, &overflow); + } + else + { + const char *frm_str= str; + + for (; frm_str < (str + len); frm_str++) + { + /* Using '' way to represent "'" */ + if (*frm_str == '\'') + *ptr++= *frm_str; + + *ptr++= *frm_str; + } + } + + *ptr++= '\''; + } + to->length((uint32)(orig_len + ptr - beg)); + return 0; +} + + +/************************************************************************** + Log_event methods (= the parent class of all events) +**************************************************************************/ + +Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans) + :log_pos(0), temp_buf(0), exec_time(0), thd(thd_arg), + checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF) +{ + server_id= thd->variables.server_id; + when= thd->start_time; + when_sec_part=thd->start_time_sec_part; + + if (using_trans) + cache_type= Log_event::EVENT_TRANSACTIONAL_CACHE; + else + cache_type= Log_event::EVENT_STMT_CACHE; + flags= flags_arg | + (thd->variables.option_bits & OPTION_SKIP_REPLICATION ? + LOG_EVENT_SKIP_REPLICATION_F : 0); +} + +/** + This minimal constructor is for when you are not even sure that there + is a valid THD. For example in the server when we are shutting down or + flushing logs after receiving a SIGHUP (then we must write a Rotate to + the binlog but we have no THD, so we need this minimal constructor). +*/ + +Log_event::Log_event() + :temp_buf(0), exec_time(0), flags(0), cache_type(EVENT_INVALID_CACHE), + thd(0), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF) +{ + server_id= global_system_variables.server_id; + /* + We can't call my_time() here as this would cause a call before + my_init() is called + */ + when= 0; + when_sec_part=0; + log_pos= 0; +} + + + +#ifdef HAVE_REPLICATION + +int Log_event::do_update_pos(rpl_group_info *rgi) +{ + Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Log_event::do_update_pos"); + + DBUG_ASSERT(!rli->belongs_to_client()); + /* + rli is null when (as far as I (Guilhem) know) the caller is + Load_log_event::do_apply_event *and* that one is called from + Execute_load_log_event::do_apply_event. In this case, we don't + do anything here ; Execute_load_log_event::do_apply_event will + call Log_event::do_apply_event again later with the proper rli. + Strictly speaking, if we were sure that rli is null only in the + case discussed above, 'if (rli)' is useless here. But as we are + not 100% sure, keep it for now. + + Matz: I don't think we will need this check with this refactoring. + */ + if (rli) + { + /* + In parallel execution, delay position update for the events that are + not part of event groups (format description, rotate, and such) until + the actual event execution reaches that point. + */ + if (!rgi->is_parallel_exec || is_group_event(get_type_code())) + rli->stmt_done(log_pos, thd, rgi); + } + DBUG_RETURN(0); // Cannot fail currently +} + + +Log_event::enum_skip_reason +Log_event::do_shall_skip(rpl_group_info *rgi) +{ + Relay_log_info *rli= rgi->rli; + DBUG_PRINT("info", ("ev->server_id: %lu, ::server_id: %lu," + " rli->replicate_same_server_id: %d," + " rli->slave_skip_counter: %llu", + (ulong) server_id, + (ulong) global_system_variables.server_id, + rli->replicate_same_server_id, + rli->slave_skip_counter)); + if ((server_id == global_system_variables.server_id && + !(rli->replicate_same_server_id || (flags & LOG_EVENT_ACCEPT_OWN_F))) || + (rli->slave_skip_counter == 1 && rli->is_in_group()) || + (flags & LOG_EVENT_SKIP_REPLICATION_F && + opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE)) + return EVENT_SKIP_IGNORE; + if (rli->slave_skip_counter > 0) + return EVENT_SKIP_COUNT; + return EVENT_SKIP_NOT; +} + + +/* + Log_event::pack_info() +*/ + +void Log_event::pack_info(Protocol *protocol) +{ + protocol->store("", 0, &my_charset_bin); +} + + +/** + Only called by SHOW BINLOG EVENTS +*/ +int Log_event::net_send(Protocol *protocol, const char* log_name, my_off_t pos) +{ + const char *p= strrchr(log_name, FN_LIBCHAR); + const char *event_type; + if (p) + log_name = p + 1; + + protocol->prepare_for_resend(); + protocol->store(log_name, strlen(log_name), &my_charset_bin); + protocol->store((ulonglong) pos); + event_type = get_type_str(); + protocol->store(event_type, strlen(event_type), &my_charset_bin); + protocol->store((uint32) server_id); + protocol->store((ulonglong) log_pos); + pack_info(protocol); + return protocol->write(); +} +#endif /* HAVE_REPLICATION */ + + +/** + init_show_field_list() prepares the column names and types for the + output of SHOW BINLOG EVENTS; it is used only by SHOW BINLOG + EVENTS. +*/ + +void Log_event::init_show_field_list(THD *thd, List* field_list) +{ + MEM_ROOT *mem_root= thd->mem_root; + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Log_name", 20), + mem_root); + field_list->push_back(new (mem_root) + Item_return_int(thd, "Pos", + MY_INT64_NUM_DECIMAL_DIGITS, + MYSQL_TYPE_LONGLONG), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Event_type", 20), + mem_root); + field_list->push_back(new (mem_root) + Item_return_int(thd, "Server_id", 10, + MYSQL_TYPE_LONG), + mem_root); + field_list->push_back(new (mem_root) + Item_return_int(thd, "End_log_pos", + MY_INT64_NUM_DECIMAL_DIGITS, + MYSQL_TYPE_LONGLONG), + mem_root); + field_list->push_back(new (mem_root) Item_empty_string(thd, "Info", 20), + mem_root); +} + +/** + A decider of whether to trigger checksum computation or not. + To be invoked in Log_event::write() stack. + The decision is positive + + S,M) if it's been marked for checksumming with @c checksum_alg + + M) otherwise, if @@global.binlog_checksum is not NONE and the event is + directly written to the binlog file. + The to-be-cached event decides at @c write_cache() time. + + Otherwise the decision is negative. + + @note A side effect of the method is altering Log_event::checksum_alg + it the latter was undefined at calling. + + @return true Checksum should be used. Log_event::checksum_alg is set. + @return false No checksum +*/ + +my_bool Log_event::need_checksum() +{ + my_bool ret; + DBUG_ENTER("Log_event::need_checksum"); + + /* + few callers of Log_event::write + (incl FD::write, FD constructing code on the slave side, Rotate relay log + and Stop event) + provides their checksum alg preference through Log_event::checksum_alg. + */ + if (checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) + ret= checksum_alg != BINLOG_CHECKSUM_ALG_OFF; + else + { + ret= binlog_checksum_options && cache_type == Log_event::EVENT_NO_CACHE; + checksum_alg= ret ? (enum_binlog_checksum_alg)binlog_checksum_options + : BINLOG_CHECKSUM_ALG_OFF; + } + /* + FD calls the methods before data_written has been calculated. + The following invariant claims if the current is not the first + call (and therefore data_written is not zero) then `ret' must be + TRUE. It may not be null because FD is always checksummed. + */ + + DBUG_ASSERT(get_type_code() != FORMAT_DESCRIPTION_EVENT || ret || + data_written == 0); + + DBUG_ASSERT(!ret || + ((checksum_alg == binlog_checksum_options || + /* + Stop event closes the relay-log and its checksum alg + preference is set by the caller can be different + from the server's binlog_checksum_options. + */ + get_type_code() == STOP_EVENT || + /* + Rotate:s can be checksummed regardless of the server's + binlog_checksum_options. That applies to both + the local RL's Rotate and the master's Rotate + which IO thread instantiates via queue_binlog_ver_3_event. + */ + get_type_code() == ROTATE_EVENT || + get_type_code() == START_ENCRYPTION_EVENT || + /* FD is always checksummed */ + get_type_code() == FORMAT_DESCRIPTION_EVENT) && + checksum_alg != BINLOG_CHECKSUM_ALG_OFF)); + + DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF); + + DBUG_ASSERT(((get_type_code() != ROTATE_EVENT && + get_type_code() != STOP_EVENT) || + get_type_code() != FORMAT_DESCRIPTION_EVENT) || + cache_type == Log_event::EVENT_NO_CACHE); + + DBUG_RETURN(ret); +} + +int Log_event_writer::write_internal(const uchar *pos, size_t len) +{ + DBUG_ASSERT(!ctx || encrypt_or_write == &Log_event_writer::encrypt_and_write); + if (my_b_safe_write(file, pos, len)) + { + DBUG_PRINT("error", ("write to log failed: %d", my_errno)); + return 1; + } + bytes_written+= len; + return 0; +} + +/* + as soon as encryption produces the first output block, write event_len + where it should be in a valid event header +*/ +int Log_event_writer::maybe_write_event_len(uchar *pos, size_t len) +{ + if (len && event_len) + { + DBUG_ASSERT(len >= EVENT_LEN_OFFSET); + if (write_internal(pos + EVENT_LEN_OFFSET - 4, 4)) + return 1; + int4store(pos + EVENT_LEN_OFFSET - 4, event_len); + event_len= 0; + } + return 0; +} + +int Log_event_writer::encrypt_and_write(const uchar *pos, size_t len) +{ + uchar *dst; + size_t dstsize; + uint dstlen; + int res; // Safe as res is always set + DBUG_ASSERT(ctx); + + if (!len) + return 0; + + dstsize= encryption_encrypted_length((uint)len, ENCRYPTION_KEY_SYSTEM_DATA, + crypto->key_version); + if (!(dst= (uchar*)my_safe_alloca(dstsize))) + return 1; + + if (encryption_ctx_update(ctx, pos, (uint)len, dst, &dstlen)) + { + res= 1; + goto err; + } + + if (maybe_write_event_len(dst, dstlen)) + { + res= 1; + goto err; + } + + res= write_internal(dst, dstlen); + +err: + my_safe_afree(dst, dstsize); + return res; +} + +int Log_event_writer::write_header(uchar *pos, size_t len) +{ + DBUG_ENTER("Log_event_writer::write_header"); + /* + recording checksum of FD event computed with dropped + possibly active LOG_EVENT_BINLOG_IN_USE_F flag. + Similar step at verication: the active flag is dropped before + checksum computing. + */ + if (checksum_len) + { + uchar save=pos[FLAGS_OFFSET]; + pos[FLAGS_OFFSET]&= ~LOG_EVENT_BINLOG_IN_USE_F; + crc= my_checksum(0, pos, len); + pos[FLAGS_OFFSET]= save; + } + + if (ctx) + { + uchar iv[BINLOG_IV_LENGTH]; + crypto->set_iv(iv, (uint32)my_b_safe_tell(file)); + if (encryption_ctx_init(ctx, crypto->key, crypto->key_length, + iv, sizeof(iv), ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD, + ENCRYPTION_KEY_SYSTEM_DATA, crypto->key_version)) + DBUG_RETURN(1); + + DBUG_ASSERT(len >= LOG_EVENT_HEADER_LEN); + event_len= uint4korr(pos + EVENT_LEN_OFFSET); + DBUG_ASSERT(event_len >= len); + memcpy(pos + EVENT_LEN_OFFSET, pos, 4); + pos+= 4; + len-= 4; + } + DBUG_RETURN((this->*encrypt_or_write)(pos, len)); +} + +int Log_event_writer::write_data(const uchar *pos, size_t len) +{ + DBUG_ENTER("Log_event_writer::write_data"); + if (checksum_len) + crc= my_checksum(crc, pos, len); + + DBUG_RETURN((this->*encrypt_or_write)(pos, len)); +} + +int Log_event_writer::write_footer() +{ + DBUG_ENTER("Log_event_writer::write_footer"); + if (checksum_len) + { + uchar checksum_buf[BINLOG_CHECKSUM_LEN]; + int4store(checksum_buf, crc); + if ((this->*encrypt_or_write)(checksum_buf, BINLOG_CHECKSUM_LEN)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + if (ctx) + { + uint dstlen; + uchar dst[MY_AES_BLOCK_SIZE*2]; + if (encryption_ctx_finish(ctx, dst, &dstlen)) + DBUG_RETURN(1); + if (maybe_write_event_len(dst, dstlen) || write_internal(dst, dstlen)) + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + DBUG_RETURN(0); +} + +/* + Log_event::write_header() +*/ + +bool Log_event::write_header(size_t event_data_length) +{ + uchar header[LOG_EVENT_HEADER_LEN]; + ulong now; + DBUG_ENTER("Log_event::write_header"); + DBUG_PRINT("enter", ("filepos: %lld length: %zu type: %d", + (longlong) writer->pos(), event_data_length, + (int) get_type_code())); + + writer->checksum_len= need_checksum() ? BINLOG_CHECKSUM_LEN : 0; + + /* Store number of bytes that will be written by this event */ + data_written= event_data_length + sizeof(header) + writer->checksum_len; + + /* + log_pos != 0 if this is relay-log event. In this case we should not + change the position + */ + + if (is_artificial_event()) + { + /* + Artificial events are automatically generated and do not exist + in master's binary log, so log_pos should be set to 0. + */ + log_pos= 0; + } + else if (!log_pos) + { + /* + Calculate the position of where the next event will start + (end of this event, that is). + */ + + log_pos= writer->pos() + data_written; + + DBUG_EXECUTE_IF("dbug_master_binlog_over_2GB", log_pos += (1ULL <<31);); + } + + now= get_time(); // Query start time + + /* + Header will be of size LOG_EVENT_HEADER_LEN for all events, except for + FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT, where it will be + LOG_EVENT_MINIMAL_HEADER_LEN (remember these 2 have a frozen header, + because we read them before knowing the format). + */ + + int4store(header, now); // timestamp + header[EVENT_TYPE_OFFSET]= get_type_code(); + int4store(header+ SERVER_ID_OFFSET, server_id); + int4store(header+ EVENT_LEN_OFFSET, data_written); + int4store(header+ LOG_POS_OFFSET, log_pos); + int2store(header + FLAGS_OFFSET, flags); + + bool ret= writer->write_header(header, sizeof(header)); + DBUG_RETURN(ret); +} + + + +#if defined(HAVE_REPLICATION) +inline Log_event::enum_skip_reason +Log_event::continue_group(rpl_group_info *rgi) +{ + if (rgi->rli->slave_skip_counter == 1) + return Log_event::EVENT_SKIP_IGNORE; + return Log_event::do_shall_skip(rgi); +} +#endif + +/************************************************************************** + Query_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) + +/** + This (which is used only for SHOW BINLOG EVENTS) could be updated to + print SET @@session_var=. But this is not urgent, as SHOW BINLOG EVENTS is + only an information, it does not produce suitable queries to replay (for + example it does not print LOAD DATA INFILE). + @todo + show the catalog ?? +*/ + +void Query_log_event::pack_info(Protocol *protocol) +{ + // TODO: show the catalog ?? + char buf_mem[1024]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + buf.real_alloc(9 + db_len + q_len); + if (!(flags & LOG_EVENT_SUPPRESS_USE_F) + && db && db_len) + { + buf.append(STRING_WITH_LEN("use ")); + append_identifier(protocol->thd, &buf, db, db_len); + buf.append(STRING_WITH_LEN("; ")); + } + + DBUG_ASSERT(!flags2 || flags2_inited); + + if (flags2 & (OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_AUTO_IS_NULL | + OPTION_RELAXED_UNIQUE_CHECKS | + OPTION_NO_CHECK_CONSTRAINT_CHECKS | + OPTION_IF_EXISTS | + OPTION_INSERT_HISTORY)) + { + buf.append(STRING_WITH_LEN("set ")); + if (flags2 & OPTION_NO_FOREIGN_KEY_CHECKS) + buf.append(STRING_WITH_LEN("foreign_key_checks=1, ")); + if (flags2 & OPTION_AUTO_IS_NULL) + buf.append(STRING_WITH_LEN("sql_auto_is_null, ")); + if (flags2 & OPTION_RELAXED_UNIQUE_CHECKS) + buf.append(STRING_WITH_LEN("unique_checks=1, ")); + if (flags2 & OPTION_NO_CHECK_CONSTRAINT_CHECKS) + buf.append(STRING_WITH_LEN("check_constraint_checks=1, ")); + if (flags2 & OPTION_IF_EXISTS) + buf.append(STRING_WITH_LEN("@@sql_if_exists=1, ")); + if (flags2 & OPTION_INSERT_HISTORY) + buf.append(STRING_WITH_LEN("@@system_versioning_insert_history=1, ")); + buf[buf.length()-2]=';'; + } + if (query && q_len) + buf.append(query, q_len); + protocol->store(&buf); +} +#endif + + +/** + Utility function for the next method (Query_log_event::write()) . +*/ +static void store_str_with_code_and_len(uchar **dst, const char *src, + uint len, uint code) +{ + /* + only 1 byte to store the length of catalog, so it should not + surpass 255 + */ + DBUG_ASSERT(len <= 255); + DBUG_ASSERT(src); + *((*dst)++)= (uchar) code; + *((*dst)++)= (uchar) len; + bmove(*dst, src, len); + (*dst)+= len; +} + + +/** + Query_log_event::write(). + + @note + In this event we have to modify the header to have the correct + EVENT_LEN_OFFSET as we don't yet know how many status variables we + will print! +*/ + +bool Query_log_event::write() +{ + uchar buf[QUERY_HEADER_LEN + MAX_SIZE_LOG_EVENT_STATUS]; + uchar *start, *start_of_status; + ulong event_length; + + if (!query) + return 1; // Something wrong with event + + /* + We want to store the thread id: + (- as an information for the user when he reads the binlog) + - if the query uses temporary table: for the slave SQL thread to know to + which master connection the temp table belongs. + Now imagine we (write()) are called by the slave SQL thread (we are + logging a query executed by this thread; the slave runs with + --log-slave-updates). Then this query will be logged with + thread_id=the_thread_id_of_the_SQL_thread. Imagine that 2 temp tables of + the same name were created simultaneously on the master (in the master + binlog you have + CREATE TEMPORARY TABLE t; (thread 1) + CREATE TEMPORARY TABLE t; (thread 2) + ...) + then in the slave's binlog there will be + CREATE TEMPORARY TABLE t; (thread_id_of_the_slave_SQL_thread) + CREATE TEMPORARY TABLE t; (thread_id_of_the_slave_SQL_thread) + which is bad (same thread id!). + + To avoid this, we log the thread's thread id EXCEPT for the SQL + slave thread for which we log the original (master's) thread id. + Now this moves the bug: what happens if the thread id on the + master was 10 and when the slave replicates the query, a + connection number 10 is opened by a normal client on the slave, + and updates a temp table of the same name? We get a problem + again. To avoid this, in the handling of temp tables (sql_base.cc) + we use thread_id AND server_id. TODO when this is merged into + 4.1: in 4.1, slave_proxy_id has been renamed to pseudo_thread_id + and is a session variable: that's to make mysqlbinlog work with + temp tables. We probably need to introduce + + SET PSEUDO_SERVER_ID + for mysqlbinlog in 4.1. mysqlbinlog would print: + SET PSEUDO_SERVER_ID= + SET PSEUDO_THREAD_ID= + for each query using temp tables. + */ + int4store(buf + Q_THREAD_ID_OFFSET, slave_proxy_id); + int4store(buf + Q_EXEC_TIME_OFFSET, exec_time); + buf[Q_DB_LEN_OFFSET] = (char) db_len; + int2store(buf + Q_ERR_CODE_OFFSET, error_code); + + /* + You MUST always write status vars in increasing order of code. This + guarantees that a slightly older slave will be able to parse those he + knows. + */ + start_of_status= start= buf+QUERY_HEADER_LEN; + if (flags2_inited) + { + *start++= Q_FLAGS2_CODE; + int4store(start, flags2); + start+= 4; + } + if (sql_mode_inited) + { + *start++= Q_SQL_MODE_CODE; + int8store(start, (ulonglong)sql_mode); + start+= 8; + } + if (catalog_len) // i.e. this var is inited (false for 4.0 events) + { + store_str_with_code_and_len(&start, + catalog, catalog_len, Q_CATALOG_NZ_CODE); + /* + In 5.0.x where x<4 masters we used to store the end zero here. This was + a waste of one byte so we don't do it in x>=4 masters. We change code to + Q_CATALOG_NZ_CODE, because re-using the old code would make x<4 slaves + of this x>=4 master segfault (expecting a zero when there is + none). Remaining compatibility problems are: the older slave will not + find the catalog; but it is will not crash, and it's not an issue + that it does not find the catalog as catalogs were not used in these + older MySQL versions (we store it in binlog and read it from relay log + but do nothing useful with it). What is an issue is that the older slave + will stop processing the Q_* blocks (and jumps to the db/query) as soon + as it sees unknown Q_CATALOG_NZ_CODE; so it will not be able to read + Q_AUTO_INCREMENT*, Q_CHARSET and so replication will fail silently in + various ways. Documented that you should not mix alpha/beta versions if + they are not exactly the same version, with example of 5.0.3->5.0.2 and + 5.0.4->5.0.3. If replication is from older to new, the new will + recognize Q_CATALOG_CODE and have no problem. + */ + } + if (auto_increment_increment != 1 || auto_increment_offset != 1) + { + *start++= Q_AUTO_INCREMENT; + int2store(start, auto_increment_increment); + int2store(start+2, auto_increment_offset); + start+= 4; + } + if (charset_inited) + { + *start++= Q_CHARSET_CODE; + memcpy(start, charset, 6); + start+= 6; + } + if (time_zone_len) + { + /* In the TZ sys table, column Name is of length 64 so this should be ok */ + DBUG_ASSERT(time_zone_len <= MAX_TIME_ZONE_NAME_LENGTH); + store_str_with_code_and_len(&start, + time_zone_str, time_zone_len, Q_TIME_ZONE_CODE); + } + if (lc_time_names_number) + { + DBUG_ASSERT(lc_time_names_number <= 0xFFFF); + *start++= Q_LC_TIME_NAMES_CODE; + int2store(start, lc_time_names_number); + start+= 2; + } + if (charset_database_number) + { + DBUG_ASSERT(charset_database_number <= 0xFFFF); + *start++= Q_CHARSET_DATABASE_CODE; + int2store(start, charset_database_number); + start+= 2; + } + if (table_map_for_update) + { + *start++= Q_TABLE_MAP_FOR_UPDATE_CODE; + int8store(start, table_map_for_update); + start+= 8; + } + if (master_data_written != 0) + { + /* + Q_MASTER_DATA_WRITTEN_CODE only exists in relay logs where the master + has binlog_version<4 and the slave has binlog_version=4. See comment + for master_data_written in log_event.h for details. + */ + *start++= Q_MASTER_DATA_WRITTEN_CODE; + int4store(start, master_data_written); + start+= 4; + } + + if (thd && thd->need_binlog_invoker()) + { + LEX_CSTRING user; + LEX_CSTRING host; + memset(&user, 0, sizeof(user)); + memset(&host, 0, sizeof(host)); + + if (thd->slave_thread && thd->has_invoker()) + { + /* user will be null, if master is older than this patch */ + user= thd->get_invoker_user(); + host= thd->get_invoker_host(); + } + else + { + Security_context *ctx= thd->security_ctx; + + if (thd->need_binlog_invoker() == THD::INVOKER_USER) + { + user.str= ctx->priv_user; + host.str= ctx->priv_host; + host.length= strlen(host.str); + } + else + { + user.str= ctx->priv_role; + host= empty_clex_str; + } + user.length= strlen(user.str); + } + + if (user.length > 0) + { + *start++= Q_INVOKER; + + /* + Store user length and user. The max length of use is 16, so 1 byte is + enough to store the user's length. + */ + *start++= (uchar)user.length; + memcpy(start, user.str, user.length); + start+= user.length; + + /* + Store host length and host. The max length of host is 60, so 1 byte is + enough to store the host's length. + */ + *start++= (uchar)host.length; + memcpy(start, host.str, host.length); + start+= host.length; + } + } + + if (thd && (thd->used & THD::QUERY_START_SEC_PART_USED)) + { + *start++= Q_HRNOW; + get_time(); + int3store(start, when_sec_part); + start+= 3; + } + + /* xid's is used with ddl_log handling */ + if (thd && thd->binlog_xid) + { + *start++= Q_XID; + int8store(start, thd->binlog_xid); + start+= 8; + } + + if (gtid_flags_extra) + { + *start++= Q_GTID_FLAGS3; + *start++= gtid_flags_extra; + if (gtid_flags_extra & + (Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1)) + { + int8store(start, sa_seq_no); + start+= 8; + } + } + + + /* + NOTE: When adding new status vars, please don't forget to update + the MAX_SIZE_LOG_EVENT_STATUS in log_event.h and update the function + code_name() in this file. + + Here there could be code like + if (command-line-option-which-says-"log_this_variable" && inited) + { + *start++= Q_THIS_VARIABLE_CODE; + int4store(start, this_variable); + start+= 4; + } + */ + + /* Store length of status variables */ + status_vars_len= (uint) (start-start_of_status); + DBUG_ASSERT(status_vars_len <= MAX_SIZE_LOG_EVENT_STATUS); + int2store(buf + Q_STATUS_VARS_LEN_OFFSET, status_vars_len); + + /* + Calculate length of whole event + The "1" below is the \0 in the db's length + */ + event_length= ((uint) (start-buf) + get_post_header_size_for_derived() + + db_len + 1 + q_len); + + return write_header(event_length) || + write_data(buf, QUERY_HEADER_LEN) || + write_post_header_for_derived() || + write_data(start_of_status, (uint) status_vars_len) || + write_data(db, db_len + 1) || + write_data(query, q_len) || + write_footer(); +} + +bool Query_compressed_log_event::write() +{ + uchar *buffer; + uint32 alloc_size, compressed_size; + bool ret= true; + + compressed_size= alloc_size= binlog_get_compress_len(q_len); + buffer= (uchar*) my_safe_alloca(alloc_size); + if (buffer && + !binlog_buf_compress((uchar*) query, buffer, q_len, &compressed_size)) + { + /* + Write the compressed event. We have to temporarily store the event + in query and q_len as Query_log_event::write() uses these. + */ + const char *query_tmp= query; + uint32 q_len_tmp= q_len; + query= (char*) buffer; + q_len= compressed_size; + ret= Query_log_event::write(); + query= query_tmp; + q_len= q_len_tmp; + } + my_safe_afree(buffer, alloc_size); + return ret; +} + + +/** + The simplest constructor that could possibly work. This is used for + creating static objects that have a special meaning and are invisible + to the log. +*/ +Query_log_event::Query_log_event() + :Log_event(), data_buf(0) +{ + memset(&user, 0, sizeof(user)); + memset(&host, 0, sizeof(host)); +} + + +/* + SYNOPSIS + Query_log_event::Query_log_event() + thd_arg - thread handle + query_arg - array of char representing the query + query_length - size of the `query_arg' array + using_trans - there is a modified transactional table + direct - Don't cache statement + suppress_use - suppress the generation of 'USE' statements + errcode - the error code of the query + + DESCRIPTION + Creates an event for binlogging + The value for `errcode' should be supplied by caller. +*/ +Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, + size_t query_length, bool using_trans, + bool direct, bool suppress_use, int errcode) + + :Log_event(thd_arg, + ((thd_arg->used & THD::THREAD_SPECIFIC_USED) + ? LOG_EVENT_THREAD_SPECIFIC_F : 0) | + (suppress_use ? LOG_EVENT_SUPPRESS_USE_F : 0), + using_trans), + data_buf(0), query(query_arg), catalog(thd_arg->catalog), + q_len((uint32) query_length), + thread_id(thd_arg->thread_id), + /* save the original thread id; we already know the server id */ + slave_proxy_id((ulong)thd_arg->variables.pseudo_thread_id), + flags2_inited(1), sql_mode_inited(1), charset_inited(1), flags2(0), + sql_mode(thd_arg->variables.sql_mode), + auto_increment_increment(thd_arg->variables.auto_increment_increment), + auto_increment_offset(thd_arg->variables.auto_increment_offset), + lc_time_names_number(thd_arg->variables.lc_time_names->number), + charset_database_number(0), + table_map_for_update((ulonglong)thd_arg->table_map_for_update), + master_data_written(0), + gtid_flags_extra(thd_arg->get_binlog_flags_for_alter()), + sa_seq_no(0) +{ + /* status_vars_len is set just before writing the event */ + + time_t end_time; + +#ifdef WITH_WSREP + /* + If Query_log_event will contain non trans keyword (not BEGIN, COMMIT, + SAVEPOINT or ROLLBACK) we disable PA for this transaction. + Note that here WSREP(thd) might not be true e.g. when wsrep_shcema + is created we create tables with thd->variables.wsrep_on=false + to avoid replicating wsrep_schema tables to other nodes. + */ + if (WSREP_ON && !is_trans_keyword()) + { + thd->wsrep_PA_safe= false; + } +#endif /* WITH_WSREP */ + + memset(&user, 0, sizeof(user)); + memset(&host, 0, sizeof(host)); + error_code= errcode; + + end_time= my_time(0); + exec_time = (ulong) (end_time - thd_arg->start_time); + /** + @todo this means that if we have no catalog, then it is replicated + as an existing catalog of length zero. is that safe? /sven + */ + catalog_len = (catalog) ? (uint32) strlen(catalog) : 0; + + if (!(db= thd->db.str)) + db= ""; + db_len= (uint32) strlen(db); + if (thd_arg->variables.collation_database != thd_arg->db_charset) + charset_database_number= thd_arg->variables.collation_database->number; + + /* + We only replicate over the bits of flags2 that we need: the rest + are masked out by "& OPTIONS_WRITTEN_TO_BINLOG". + + We also force AUTOCOMMIT=1. Rationale (cf. BUG#29288): After + fixing BUG#26395, we always write BEGIN and COMMIT around all + transactions (even single statements in autocommit mode). This is + so that replication from non-transactional to transactional table + and error recovery from XA to non-XA table should work as + expected. The BEGIN/COMMIT are added in log.cc. However, there is + one exception: MyISAM bypasses log.cc and writes directly to the + binlog. So if autocommit is off, master has MyISAM, and slave has + a transactional engine, then the slave will just see one long + never-ending transaction. The only way to bypass explicit + BEGIN/COMMIT in the binlog is by using a non-transactional table. + So setting AUTOCOMMIT=1 will make this work as expected. + + Note: explicitly replicate AUTOCOMMIT=1 from master. We do not + assume AUTOCOMMIT=1 on slave; the slave still reads the state of + the autocommit flag as written by the master to the binlog. This + behavior may change after WL#4162 has been implemented. + */ + flags2= (uint32) (thd_arg->variables.option_bits & + (OPTIONS_WRITTEN_TO_BIN_LOG & ~OPTION_NOT_AUTOCOMMIT)); + DBUG_ASSERT(thd_arg->variables.character_set_client->number < 256*256); + DBUG_ASSERT(thd_arg->variables.collation_connection->number < 256*256); + DBUG_ASSERT(thd_arg->variables.collation_server->number < 256*256); + DBUG_ASSERT(thd_arg->variables.character_set_client->mbminlen == 1); + int2store(charset, thd_arg->variables.character_set_client->number); + int2store(charset+2, thd_arg->variables.collation_connection->number); + int2store(charset+4, thd_arg->variables.collation_server->number); + if (thd_arg->used & THD::TIME_ZONE_USED) + { + /* + Note that our event becomes dependent on the Time_zone object + representing the time zone. Fortunately such objects are never deleted + or changed during mysqld's lifetime. + */ + time_zone_len= thd_arg->variables.time_zone->get_name()->length(); + time_zone_str= thd_arg->variables.time_zone->get_name()->ptr(); + } + else + time_zone_len= 0; + + LEX *lex= thd->lex; + /* + Defines that the statement will be written directly to the binary log + without being wrapped by a BEGIN...COMMIT. Otherwise, the statement + will be written to either the trx-cache or stmt-cache. + + Note that a cache will not be used if the parameter direct is TRUE. + */ + bool use_cache= FALSE; + /* + TRUE defines that the trx-cache must be used and by consequence the + use_cache is TRUE. + + Note that a cache will not be used if the parameter direct is TRUE. + */ + bool trx_cache= FALSE; + cache_type= Log_event::EVENT_INVALID_CACHE; + + if (!direct) + { + switch (lex->sql_command) + { + case SQLCOM_DROP_TABLE: + case SQLCOM_DROP_SEQUENCE: + use_cache= (lex->tmp_table() && thd->in_multi_stmt_transaction_mode()); + break; + + case SQLCOM_CREATE_TABLE: + case SQLCOM_CREATE_SEQUENCE: + /* + If we are using CREATE ... SELECT or if we are a slave + executing BEGIN...COMMIT (generated by CREATE...SELECT) we + have to use the transactional cache to ensure we don't + calculate any checksum for the CREATE part. + */ + trx_cache= (lex->first_select_lex()->item_list.elements && + thd->is_current_stmt_binlog_format_row()) || + (thd->variables.option_bits & OPTION_GTID_BEGIN); + use_cache= (lex->tmp_table() && + thd->in_multi_stmt_transaction_mode()) || trx_cache; + break; + case SQLCOM_SET_OPTION: + if (lex->autocommit) + use_cache= trx_cache= FALSE; + else + use_cache= TRUE; + break; + case SQLCOM_RELEASE_SAVEPOINT: + case SQLCOM_ROLLBACK_TO_SAVEPOINT: + case SQLCOM_SAVEPOINT: + case SQLCOM_XA_END: + use_cache= trx_cache= TRUE; + break; + default: + use_cache= (gtid_flags_extra) ? false : sqlcom_can_generate_row_events(thd); + break; + } + } + + if (gtid_flags_extra & (Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1)) + sa_seq_no= thd_arg->get_binlog_start_alter_seq_no(); + + if (!use_cache || direct) + { + cache_type= Log_event::EVENT_NO_CACHE; + } + else if (using_trans || trx_cache || stmt_has_updated_trans_table(thd) || + thd->lex->is_mixed_stmt_unsafe(thd->in_multi_stmt_transaction_mode(), + thd->variables.binlog_direct_non_trans_update, + trans_has_updated_trans_table(thd), + thd->tx_isolation)) + cache_type= Log_event::EVENT_TRANSACTIONAL_CACHE; + else + cache_type= Log_event::EVENT_STMT_CACHE; + DBUG_ASSERT(cache_type != Log_event::EVENT_INVALID_CACHE); + DBUG_PRINT("info",("Query_log_event has flags2: %lu sql_mode: %llu cache_tye: %d", + (ulong) flags2, sql_mode, cache_type)); +} + +Query_compressed_log_event::Query_compressed_log_event(THD* thd_arg, const char* query_arg, + ulong query_length, bool using_trans, + bool direct, bool suppress_use, int errcode) + :Query_log_event(thd_arg, query_arg, query_length, using_trans, direct, + suppress_use, errcode), + query_buf(0) +{ + +} + + +#if defined(HAVE_REPLICATION) + +int Query_log_event::do_apply_event(rpl_group_info *rgi) +{ + return do_apply_event(rgi, query, q_len); +} + +/** + Compare if two errors should be regarded as equal. + This is to handle the case when you can get slightly different errors + on master and slave for the same thing. + @param + expected_error Error we got on master + actual_error Error we got on slave + + @return + 1 Errors are equal + 0 Errors are different +*/ + +bool test_if_equal_repl_errors(int expected_error, int actual_error) +{ + if (expected_error == actual_error) + return 1; + switch (expected_error) { + case ER_DUP_ENTRY: + case ER_DUP_ENTRY_WITH_KEY_NAME: + case ER_DUP_KEY: + case ER_AUTOINC_READ_FAILED: + return (actual_error == ER_DUP_ENTRY || + actual_error == ER_DUP_ENTRY_WITH_KEY_NAME || + actual_error == ER_DUP_KEY || + actual_error == ER_AUTOINC_READ_FAILED || + actual_error == HA_ERR_AUTOINC_ERANGE); + case ER_UNKNOWN_TABLE: + return actual_error == ER_IT_IS_A_VIEW; + default: + break; + } + return 0; +} + + +static start_alter_info *get_new_start_alter_info(THD *thd) +{ + /* + Why on global memory ?- So that process_commit/rollback_alter should not get + error when spawned threads exits too early. + */ + start_alter_info *info; + if (!(info= (start_alter_info *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(start_alter_info), MYF(MY_WME)))) + { + sql_print_error("Failed to allocate memory for ddl log free list"); + return 0; + } + info->sa_seq_no= 0; + info->domain_id= 0; + info->direct_commit_alter= false; + info->state= start_alter_state::INVALID; + mysql_cond_init(0, &info->start_alter_cond, NULL); + info->error= 0; + + return info; +} + + +/* + Perform necessary actions for two-phase-logged ALTER parts, to + return + + 0 when the event's query proceeds normal parsing and execution + 1 when the event skips parsing and execution + -1 as error. +*/ +int Query_log_event::handle_split_alter_query_log_event(rpl_group_info *rgi, + bool &skip_error_check) +{ + int rc= 0; + + rgi->gtid_ev_flags_extra= gtid_flags_extra; + if (gtid_flags_extra & Gtid_log_event::FL_START_ALTER_E1) + { + //No Slave, Normal Slave, Start Alter under Worker 1 will simple binlog and exit + if(!rgi->rpt || rgi->reserved_start_alter_thread || WSREP(thd)) + { + rc= 1; + /* + We will just write the binlog and move to next event , because COMMIT + Alter will take care of actual work + */ + rgi->reserved_start_alter_thread= false; + thd->lex->sql_command= SQLCOM_ALTER_TABLE; + Write_log_with_flags wlwf(thd, Gtid_log_event::FL_START_ALTER_E1, + true /* wsrep to isolation end */); +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_thd_is_local(thd) && + // no need to supply other than db in this case + wsrep_to_isolation_begin(thd, db, NULL,NULL,NULL,NULL,NULL)) + return -1; +#endif + if (write_bin_log(thd, false, thd->query(), thd->query_length())) + return -1; + + my_ok(thd); + return rc; + } + if (!rgi->sa_info) + rgi->sa_info= get_new_start_alter_info(thd); + else + { + /* Not send Start-Alter into query execution when it's to rollback */ + mysql_mutex_lock(&rgi->rli->mi->start_alter_lock); + if (rgi->sa_info->state == start_alter_state::ROLLBACK_ALTER) + mysql_cond_broadcast(&rgi->sa_info->start_alter_cond); + mysql_mutex_unlock(&rgi->rli->mi->start_alter_lock); + } + + return rc; + } + + bool is_CA= (gtid_flags_extra & Gtid_log_event::FL_COMMIT_ALTER_E1) ? true : false; + if (is_CA) + { + DBUG_EXECUTE_IF("rpl_slave_stop_CA_before_binlog", + { + // the awake comes from STOP-SLAVE running driver (sql) thread + debug_sync_set_action(thd, + STRING_WITH_LEN("now WAIT_FOR proceed_CA_1")); + }); + } + start_alter_info *info=NULL; + Master_info *mi= NULL; + + rgi->gtid_ev_sa_seq_no= sa_seq_no; + // is set for both the direct execution and the write to binlog + thd->set_binlog_start_alter_seq_no(sa_seq_no); + mi= rgi->rli->mi; + mysql_mutex_lock(&mi->start_alter_list_lock); + { + List_iterator info_iterator(mi->start_alter_list); + while ((info= info_iterator++)) + { + if(info->sa_seq_no == rgi->gtid_ev_sa_seq_no && + info->domain_id == rgi->current_gtid.domain_id) + { + info_iterator.remove(); + break; + } + } + } + mysql_mutex_unlock(&mi->start_alter_list_lock); + + if (!info) + { + if (is_CA) + { + /* + error handeling, direct_commit_alter is turned on, so that we dont + wait for master reply in mysql_alter_table (in wait_for_master) + */ + rgi->direct_commit_alter= true; +#ifdef WITH_WSREP + if (WSREP(thd)) + thd->set_binlog_flags_for_alter(Gtid_log_event::FL_COMMIT_ALTER_E1); +#endif + goto cleanup; + } + else + { + //Just write the binlog because there is nothing to be done + goto write_binlog; + } + } + + mysql_mutex_lock(&mi->start_alter_lock); + if (info->state != start_alter_state::COMPLETED) + { + if (is_CA) + info->state= start_alter_state::COMMIT_ALTER; + else + info->state= start_alter_state::ROLLBACK_ALTER; + mysql_cond_broadcast(&info->start_alter_cond); + mysql_mutex_unlock(&mi->start_alter_lock); + /* + Wait till Start Alter worker has changed the state to ::COMPLETED + when start alter worker reaches the old code write_bin_log(), it will + change state to COMMITTED. + COMMITTED and `direct_commit_alter == true` at the same time indicates + the query needs re-execution by the CA running thread. + */ + mysql_mutex_lock(&mi->start_alter_lock); + + DBUG_ASSERT(info->state == start_alter_state::COMPLETED || + !info->direct_commit_alter); + + while(info->state != start_alter_state::COMPLETED) + mysql_cond_wait(&info->start_alter_cond, &mi->start_alter_lock); + } + else + { + // SA has completed and left being kicked out by deadlock or ftwrl + DBUG_ASSERT(info->direct_commit_alter); + } + mysql_mutex_unlock(&mi->start_alter_lock); + + if (info->direct_commit_alter) + { + rgi->direct_commit_alter= true; // execute the query as if there was no SA + if (is_CA) + goto cleanup; + } + +write_binlog: + rc= 1; + + if(!is_CA) + { + if(((info && info->error) || error_code) && + global_system_variables.log_warnings > 2) + { + sql_print_information("Query '%s' having %d error code on master " + "is rolled back%s", query, error_code, + !(info && info->error) ? "." : ";"); + if (info && info->error) + sql_print_information("its execution on slave %sproduced %d error.", + info->error == error_code ? "re":"", info->error); + } + } + { + thd->lex->sql_command= SQLCOM_ALTER_TABLE; + Write_log_with_flags wlwf(thd, is_CA ? Gtid_log_event::FL_COMMIT_ALTER_E1 : + Gtid_log_event::FL_ROLLBACK_ALTER_E1, + true); +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_thd_is_local(thd) && + wsrep_to_isolation_begin(thd, db, NULL,NULL,NULL,NULL,NULL)) + rc= -1; +#endif + if (rc != -1 && + write_bin_log(thd, false, thd->query(), thd->query_length())) + rc= -1; + } + + if (!thd->is_error()) + { + skip_error_check= true; + my_ok(thd); + } + +cleanup: + if (info) + { + mysql_cond_destroy(&info->start_alter_cond); + my_free(info); + } + return rc; +} + + +/** + @todo + Compare the values of "affected rows" around here. Something + like: + @code + if ((uint32) affected_in_event != (uint32) affected_on_slave) + { + sql_print_error("Slave: did not get the expected number of affected \ + rows running query from master - expected %d, got %d (this numbers \ + should have matched modulo 4294967296).", 0, ...); + thd->query_error = 1; + } + @endcode + We may also want an option to tell the slave to ignore "affected" + mismatch. This mismatch could be implemented with a new ER_ code, and + to ignore it you would use --slave-skip-errors... +*/ +int Query_log_event::do_apply_event(rpl_group_info *rgi, + const char *query_arg, uint32 q_len_arg) +{ + int expected_error,actual_error= 0; + Schema_specification_st db_options; + uint64 sub_id= 0; + void *hton= NULL; + rpl_gtid gtid; + Relay_log_info const *rli= rgi->rli; + Rpl_filter *rpl_filter= rli->mi->rpl_filter; + bool current_stmt_is_commit; + bool skip_error_check= false; + DBUG_ENTER("Query_log_event::do_apply_event"); + + /* + Colleagues: please never free(thd->catalog) in MySQL. This would + lead to bugs as here thd->catalog is a part of an alloced block, + not an entire alloced block (see + Query_log_event::do_apply_event()). Same for thd->db. Thank + you. + */ + thd->catalog= catalog_len ? (char *) catalog : (char *)""; + rgi->start_alter_ev= this; + + size_t valid_len= Well_formed_prefix(system_charset_info, + db, db_len, NAME_LEN).length(); + + if (valid_len != db_len) + { + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "Invalid database name in Query event."); + thd->is_slave_error= true; + goto end; + } + + set_thd_db(thd, rpl_filter, db, db_len); + + /* + Setting the character set and collation of the current database thd->db. + */ + load_db_opt_by_name(thd, thd->db.str, &db_options); + if (db_options.default_table_charset) + thd->db_charset= db_options.default_table_charset; + thd->variables.auto_increment_increment= auto_increment_increment; + thd->variables.auto_increment_offset= auto_increment_offset; + + DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos)); + + thd->clear_error(1); + current_stmt_is_commit= is_commit(); + + DBUG_ASSERT(!current_stmt_is_commit || !rgi->tables_to_lock); + rgi->slave_close_thread_tables(thd); + + /* + Note: We do not need to execute reset_one_shot_variables() if this + db_ok() test fails. + Reason: The db stored in binlog events is the same for SET and for + its companion query. If the SET is ignored because of + db_ok(), the companion query will also be ignored, and if + the companion query is ignored in the db_ok() test of + ::do_apply_event(), then the companion SET also have so + we don't need to reset_one_shot_variables(). + */ + if (is_trans_keyword() || rpl_filter->db_ok(thd->db.str)) + { + bool is_rb_alter= gtid_flags_extra & Gtid_log_event::FL_ROLLBACK_ALTER_E1; + + thd->set_time(when, when_sec_part); + thd->set_query_and_id((char*)query_arg, q_len_arg, + thd->charset(), next_query_id()); + thd->variables.pseudo_thread_id= thread_id; // for temp tables + DBUG_PRINT("query",("%s", thd->query())); + + if (unlikely(!(expected_error= !is_rb_alter ? error_code : 0)) || + ignored_error_code(expected_error) || + !unexpected_error_code(expected_error)) + { + thd->slave_expected_error= expected_error; + if (flags2_inited) + { + ulonglong mask= flags2_inited; + thd->variables.option_bits= (flags2 & mask) | + (thd->variables.option_bits & ~mask); + } + /* + else, we are in a 3.23/4.0 binlog; we previously received a + Rotate_log_event which reset thd->variables.option_bits and + sql_mode etc, so nothing to do. + */ + /* + We do not replicate MODE_NO_DIR_IN_CREATE. That is, if the master is a + slave which runs with SQL_MODE=MODE_NO_DIR_IN_CREATE, this should not + force us to ignore the dir too. Imagine you are a ring of machines, and + one has a disk problem so that you temporarily need + MODE_NO_DIR_IN_CREATE on this machine; you don't want it to propagate + elsewhere (you don't want all slaves to start ignoring the dirs). + */ + if (sql_mode_inited) + thd->variables.sql_mode= + (sql_mode_t) ((thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE) | + (sql_mode & ~(sql_mode_t) MODE_NO_DIR_IN_CREATE)); + if (charset_inited) + { + rpl_sql_thread_info *sql_info= thd->system_thread_info.rpl_sql_info; + if (thd->slave_thread && sql_info->cached_charset_compare(charset)) + { + /* Verify that we support the charsets found in the event. */ + if (!(thd->variables.character_set_client= + get_charset(uint2korr(charset), MYF(MY_WME))) || + !(thd->variables.collation_connection= + get_charset(uint2korr(charset+2), MYF(MY_WME))) || + !(thd->variables.collation_server= + get_charset(uint2korr(charset+4), MYF(MY_WME)))) + { + /* + We updated the thd->variables with nonsensical values (0). Let's + set them to something safe (i.e. which avoids crash), and we'll + stop with EE_UNKNOWN_CHARSET in compare_errors (unless set to + ignore this error). + */ + set_slave_thread_default_charset(thd, rgi); + goto compare_errors; + } + thd->update_charset(); // for the charset change to take effect + /* + Reset thd->query_string.cs to the newly set value. + Note, there is a small flaw here. For a very short time frame + if the new charset is different from the old charset and + if another thread executes "SHOW PROCESSLIST" after + the above thd->set_query_and_id() and before this thd->set_query(), + and if the current query has some non-ASCII characters, + the another thread may see some '?' marks in the PROCESSLIST + result. This should be acceptable now. This is a reminder + to fix this if any refactoring happens here sometime. + */ + thd->set_query((char*) query_arg, q_len_arg, thd->charset()); + } + } + if (time_zone_len) + { + String tmp(time_zone_str, time_zone_len, &my_charset_bin); + if (!(thd->variables.time_zone= my_tz_find(thd, &tmp))) + { + my_error(ER_UNKNOWN_TIME_ZONE, MYF(0), tmp.c_ptr()); + thd->variables.time_zone= global_system_variables.time_zone; + goto compare_errors; + } + } + if (lc_time_names_number) + { + if (!(thd->variables.lc_time_names= + my_locale_by_number(lc_time_names_number))) + { + my_printf_error(ER_UNKNOWN_ERROR, + "Unknown locale: '%d'", MYF(0), lc_time_names_number); + thd->variables.lc_time_names= &my_locale_en_US; + goto compare_errors; + } + } + else + thd->variables.lc_time_names= &my_locale_en_US; + if (charset_database_number) + { + CHARSET_INFO *cs; + if (!(cs= get_charset(charset_database_number, MYF(0)))) + { + char buf[20]; + int10_to_str((int) charset_database_number, buf, -10); + my_error(ER_UNKNOWN_COLLATION, MYF(0), buf); + goto compare_errors; + } + thd->variables.collation_database= cs; + } + else + thd->variables.collation_database= thd->db_charset; + + { + const CHARSET_INFO *cs= thd->charset(); + /* + We cannot ask for parsing a statement using a character set + without state_maps (parser internal data). + */ + if (!cs->state_map) + { + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "character_set cannot be parsed"); + thd->is_slave_error= true; + goto end; + } + } + + /* + Record any GTID in the same transaction, so slave state is + transactionally consistent. + */ + if (current_stmt_is_commit) + { + thd->variables.option_bits&= ~OPTION_GTID_BEGIN; + if (rgi->gtid_pending) + { + sub_id= rgi->gtid_sub_id; + rgi->gtid_pending= false; + + gtid= rgi->current_gtid; + if (unlikely(rpl_global_gtid_slave_state->record_gtid(thd, >id, + sub_id, + true, false, + &hton))) + { + int errcode= thd->get_stmt_da()->sql_errno(); + if (!is_parallel_retry_error(rgi, errcode)) + rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, + rgi->gtid_info(), + "Error during COMMIT: failed to update GTID state in " + "%s.%s: %d: %s", + "mysql", rpl_gtid_slave_state_table_name.str, + errcode, + thd->get_stmt_da()->message()); + sub_id= 0; + thd->is_slave_error= 1; + goto end; + } + } + } + + thd->table_map_for_update= (table_map)table_map_for_update; + thd->set_invoker(&user, &host); + /* + Flag if we need to rollback the statement transaction on + slave if it by chance succeeds. + If we expected a non-zero error code and get nothing and, + it is a concurrency issue or ignorable issue, effects + of the statement should be rolled back. + */ + if (unlikely(expected_error) && + (ignored_error_code(expected_error) || + concurrency_error_code(expected_error))) + { + thd->variables.option_bits|= OPTION_MASTER_SQL_ERROR; + thd->variables.option_bits&= ~OPTION_GTID_BEGIN; + } + + int sa_result= 0; + bool is_2p_alter= gtid_flags_extra & + (Gtid_log_event::FL_START_ALTER_E1 | + Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1); + if (is_2p_alter) + sa_result= handle_split_alter_query_log_event(rgi, skip_error_check); + if (sa_result == 0) + { + /* Execute the query (note that we bypass dispatch_command()) */ + Parser_state parser_state; + if (!parser_state.init(thd, thd->query(), thd->query_length())) + { + DBUG_ASSERT(thd->m_digest == NULL); + thd->m_digest= & thd->m_digest_state; + DBUG_ASSERT(thd->m_statement_psi == NULL); + thd->m_statement_psi= MYSQL_START_STATEMENT(&thd->m_statement_state, + stmt_info_rpl.m_key, + thd->db.str, thd->db.length, + thd->charset(), NULL); + THD_STAGE_INFO(thd, stage_starting); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), thd->query_length()); + if (thd->m_digest != NULL) + thd->m_digest->reset(thd->m_token_array, max_digest_length); + + if (thd->slave_thread) + { + /* + To be compatible with previous releases, the slave thread uses the global + log_slow_disabled_statements value, wich can be changed dynamically, so we + have to set the sql_log_slow respectively. + */ + thd->variables.sql_log_slow= !MY_TEST(global_system_variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE); + } + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); + /* Finalize server status flags after executing a statement. */ + thd->update_server_status(); + log_slow_statement(thd); + thd->lex->restore_set_statement_var(); + + /* + When THD::slave_expected_error gets reset inside execution stack + that is the case of to be ignored event. In this case the expected + error must change to the reset value as well. + */ + expected_error= thd->slave_expected_error; + } + } + else if (sa_result == -1) + { + rli->report(ERROR_LEVEL, expected_error, rgi->gtid_info(), + "TODO start alter error"); + thd->is_slave_error= 1; + goto end; + } + thd->variables.option_bits&= ~OPTION_MASTER_SQL_ERROR; + if (is_2p_alter && !rgi->is_parallel_exec) + { + rgi->gtid_ev_flags_extra= 0; + rgi->direct_commit_alter= 0; + rgi->gtid_ev_sa_seq_no= 0; + } + } + else + { + /* + The query got a really bad error on the master (thread killed etc), + which could be inconsistent. Parse it to test the table names: if the + replicate-*-do|ignore-table rules say "this query must be ignored" then + we exit gracefully; otherwise we warn about the bad error and tell DBA + to check/fix it. + */ + if (mysql_test_parse_for_slave(thd, thd->query(), thd->query_length())) + thd->clear_error(1); + else + { + rli->report(ERROR_LEVEL, expected_error, rgi->gtid_info(), + "\ +Query partially completed on the master (error on master: %d) \ +and was aborted. There is a chance that your master is inconsistent at this \ +point. If you are sure that your master is ok, run this query manually on the \ +slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1; \ +START SLAVE; . Query: '%s'", expected_error, thd->query()); + thd->is_slave_error= 1; + } + goto end; + } + + /* If the query was not ignored, it is printed to the general log */ + if (likely(!thd->is_error()) || + thd->get_stmt_da()->sql_errno() != ER_SLAVE_IGNORED_TABLE) + general_log_write(thd, COM_QUERY, thd->query(), thd->query_length()); + else + { + /* + Bug#54201: If we skip an INSERT query that uses auto_increment, then we + should reset any @@INSERT_ID set by an Intvar_log_event associated with + the query; otherwise the @@INSERT_ID will linger until the next INSERT + that uses auto_increment and may affect extra triggers on the slave etc. + + We reset INSERT_ID unconditionally; it is probably cheaper than + checking if it is necessary. + */ + thd->auto_inc_intervals_forced.empty(); + } + +compare_errors: + /* + In the slave thread, we may sometimes execute some DROP / * 40005 + TEMPORARY * / TABLE that come from parts of binlogs (likely if we + use RESET SLAVE or CHANGE MASTER TO), while the temporary table + has already been dropped. To ignore such irrelevant "table does + not exist errors", we silently clear the error if TEMPORARY was used. + */ + if ((thd->lex->sql_command == SQLCOM_DROP_TABLE || + thd->lex->sql_command == SQLCOM_DROP_SEQUENCE) && + thd->lex->tmp_table() && + thd->is_error() && thd->get_stmt_da()->sql_errno() == ER_BAD_TABLE_ERROR && + !expected_error) + thd->get_stmt_da()->reset_diagnostics_area(); + /* + If we expected a non-zero error code, and we don't get the same error + code, and it should be ignored or is related to a concurrency issue. + */ + actual_error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : + skip_error_check? expected_error : 0; + DBUG_PRINT("info",("expected_error: %d sql_errno: %d", + expected_error, actual_error)); + + if ((unlikely(expected_error) && + !test_if_equal_repl_errors(expected_error, actual_error) && + !concurrency_error_code(expected_error)) && + !ignored_error_code(actual_error) && + !ignored_error_code(expected_error)) + { + rli->report(ERROR_LEVEL, 0, rgi->gtid_info(), + "Query caused different errors on master and slave. " + "Error on master: message (format)='%s' error code=%d ; " + "Error on slave: actual message='%s', error code=%d. " + "Default database: '%s'. Query: '%s'", + ER_THD(thd, expected_error), + expected_error, + actual_error ? thd->get_stmt_da()->message() : "no error", + actual_error, + print_slave_db_safe(db), query_arg); + thd->is_slave_error= 1; + } + /* + If we get the same error code as expected and it is not a concurrency + issue, or should be ignored. + */ + else if ((test_if_equal_repl_errors(expected_error, actual_error) && + !concurrency_error_code(expected_error)) || + ignored_error_code(actual_error)) + { + DBUG_PRINT("info",("error ignored")); + thd->clear_error(1); + if (actual_error == ER_QUERY_INTERRUPTED || + actual_error == ER_CONNECTION_KILLED) + thd->reset_killed(); + } + /* + Other cases: mostly we expected no error and get one. + */ + else if (unlikely(thd->is_slave_error || thd->is_fatal_error)) + { + if (!is_parallel_retry_error(rgi, actual_error)) + rli->report(ERROR_LEVEL, actual_error, rgi->gtid_info(), + "Error '%s' on query. Default database: '%s'. Query: '%s'", + (actual_error ? thd->get_stmt_da()->message() : + "unexpected success or fatal error"), + thd->get_db(), query_arg); + thd->is_slave_error= 1; +#ifdef WITH_WSREP + if (wsrep_thd_is_toi(thd) && wsrep_must_ignore_error(thd)) + { + thd->clear_error(1); + thd->killed= NOT_KILLED; + thd->wsrep_has_ignored_error= true; + } +#endif /* WITH_WSREP */ + } + + /* + TODO: compare the values of "affected rows" around here. Something + like: + if ((uint32) affected_in_event != (uint32) affected_on_slave) + { + sql_print_error("Slave: did not get the expected number of affected \ + rows running query from master - expected %d, got %d (this numbers \ + should have matched modulo 4294967296).", 0, ...); + thd->is_slave_error = 1; + } + We may also want an option to tell the slave to ignore "affected" + mismatch. This mismatch could be implemented with a new ER_ code, and + to ignore it you would use --slave-skip-errors... + + To do the comparison we need to know the value of "affected" which the + above mysql_parse() computed. And we need to know the value of + "affected" in the master's binlog. Both will be implemented later. The + important thing is that we now have the format ready to log the values + of "affected" in the binlog. So we can release 5.0.0 before effectively + logging "affected" and effectively comparing it. + */ + } /* End of if (db_ok(... */ + + { + /** + The following failure injecion works in cooperation with tests + setting @@global.debug= 'd,stop_slave_middle_group'. + The sql thread receives the killed status and will proceed + to shutdown trying to finish incomplete events group. + */ + DBUG_EXECUTE_IF("stop_slave_middle_group", + if (!current_stmt_is_commit && is_begin() == 0) + { + if (thd->transaction->all.modified_non_trans_table) + const_cast(rli)->abort_slave= 1; + };); + } + +end: + if (unlikely(sub_id && !thd->is_slave_error)) + rpl_global_gtid_slave_state->update_state_hash(sub_id, >id, hton, rgi); + + /* + Probably we have set thd->query, thd->db, thd->catalog to point to places + in the data_buf of this event. Now the event is going to be deleted + probably, so data_buf will be freed, so the thd->... listed above will be + pointers to freed memory. + So we must set them to 0, so that those bad pointers values are not later + used. Note that "cleanup" queries like automatic DROP TEMPORARY TABLE + don't suffer from these assignments to 0 as DROP TEMPORARY + TABLE uses the db.table syntax. + */ + thd->catalog= 0; + thd->set_db(&null_clex_str); /* will free the current database */ + thd->reset_query(); + DBUG_PRINT("info", ("end: query= 0")); + + /* Mark the statement completed. */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + /* + As a disk space optimization, future masters will not log an event for + LAST_INSERT_ID() if that function returned 0 (and thus they will be able + to replace the THD::stmt_depends_on_first_successful_insert_id_in_prev_stmt + variable by (THD->first_successful_insert_id_in_prev_stmt > 0) ; with the + resetting below we are ready to support that. + */ + thd->first_successful_insert_id_in_prev_stmt_for_binlog= 0; + thd->first_successful_insert_id_in_prev_stmt= 0; + thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + DBUG_RETURN(thd->is_slave_error); +} + +Log_event::enum_skip_reason +Query_log_event::do_shall_skip(rpl_group_info *rgi) +{ + Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Query_log_event::do_shall_skip"); + DBUG_PRINT("debug", ("query: '%s' q_len: %d", query, q_len)); + DBUG_ASSERT(query && q_len > 0); + DBUG_ASSERT(thd == rgi->thd); + + /* + An event skipped due to @@skip_replication must not be counted towards the + number of events to be skipped due to @@sql_slave_skip_counter. + */ + if (flags & LOG_EVENT_SKIP_REPLICATION_F && + opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE) + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + + if (rli->slave_skip_counter > 0) + { + if (is_begin()) + { + thd->variables.option_bits|= OPTION_BEGIN | OPTION_GTID_BEGIN; + DBUG_RETURN(Log_event::continue_group(rgi)); + } + + if (is_commit() || is_rollback()) + { + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN); + DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); + } + } +#ifdef WITH_WSREP + else if (WSREP(thd) && wsrep_mysql_replication_bundle && + opt_slave_domain_parallel_threads == 0 && + thd->wsrep_mysql_replicated > 0 && + (is_begin() || is_commit())) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif /* WITH_WSREP */ + DBUG_RETURN(Log_event::do_shall_skip(rgi)); +} + + +bool +Query_log_event::peek_is_commit_rollback(const uchar *event_start, + size_t event_len, + enum enum_binlog_checksum_alg + checksum_alg) +{ + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + if (event_len > BINLOG_CHECKSUM_LEN) + event_len-= BINLOG_CHECKSUM_LEN; + else + event_len= 0; + } + else + DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + checksum_alg == BINLOG_CHECKSUM_ALG_OFF); + + if (event_len < LOG_EVENT_HEADER_LEN + QUERY_HEADER_LEN || event_len < 9) + return false; + return !memcmp(event_start + (event_len-7), "\0COMMIT", 7) || + !memcmp(event_start + (event_len-9), "\0ROLLBACK", 9); +} + +#endif + + +/************************************************************************** + Start_log_event_v3 methods +**************************************************************************/ + +Start_log_event_v3::Start_log_event_v3() + :Log_event(), created(0), binlog_version(BINLOG_VERSION), + dont_set_created(0) +{ + memcpy(server_version, ::server_version, ST_SERVER_VER_LEN); +} + + +#if defined(HAVE_REPLICATION) +void Start_log_event_v3::pack_info(Protocol *protocol) +{ + char buf[12 + ST_SERVER_VER_LEN + 14 + 22], *pos; + pos= strmov(buf, "Server ver: "); + pos= strmov(pos, server_version); + pos= strmov(pos, ", Binlog ver: "); + pos= int10_to_str(binlog_version, pos, 10); + protocol->store(buf, (uint) (pos-buf), &my_charset_bin); +} +#endif + + +bool Start_log_event_v3::write() +{ + char buff[START_V3_HEADER_LEN]; + int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version); + memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN); + if (!dont_set_created) + created= get_time(); // this sets when and when_sec_part as a side effect + int4store(buff + ST_CREATED_OFFSET,created); + return write_header(sizeof(buff)) || + write_data(buff, sizeof(buff)) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) + +/** + Start_log_event_v3::do_apply_event() . + The master started + + IMPLEMENTATION + - To handle the case where the master died without having time to write + DROP TEMPORARY TABLE, DO RELEASE_LOCK (prepared statements' deletion is + TODO), we clean up all temporary tables that we got, if we are sure we + can (see below). + + @todo + - Remove all active user locks. + Guilhem 2003-06: this is true but not urgent: the worst it can cause is + the use of a bit of memory for a user lock which will not be used + anymore. If the user lock is later used, the old one will be released. In + other words, no deadlock problem. +*/ + +int Start_log_event_v3::do_apply_event(rpl_group_info *rgi) +{ + DBUG_ENTER("Start_log_event_v3::do_apply_event"); + int error= 0; + Relay_log_info *rli= rgi->rli; + + switch (binlog_version) + { + case 3: + case 4: + /* + This can either be 4.x (then a Start_log_event_v3 is only at master + startup so we are sure the master has restarted and cleared his temp + tables; the event always has 'created'>0) or 5.0 (then we have to test + 'created'). + */ + if (created) + { + rli->close_temporary_tables(); + + /* + The following is only false if we get here with a BINLOG statement + */ + if (rli->mi) + cleanup_load_tmpdir(&rli->mi->cmp_connection_name); + } + break; + + /* + Now the older formats; in that case load_tmpdir is cleaned up by the I/O + thread. + */ + case 1: + if (strncmp(rli->relay_log.description_event_for_exec->server_version, + "3.23.57",7) >= 0 && created) + { + /* + Can distinguish, based on the value of 'created': this event was + generated at master startup. + */ + rli->close_temporary_tables(); + } + /* + Otherwise, can't distinguish a Start_log_event generated at + master startup and one generated by master FLUSH LOGS, so cannot + be sure temp tables have to be dropped. So do nothing. + */ + break; + default: + /* + This case is not expected. It can be either an event corruption or an + unsupported binary log version. + */ + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "Binlog version not supported"); + DBUG_RETURN(1); + } + DBUG_RETURN(error); +} +#endif /* defined(HAVE_REPLICATION) */ + +/*************************************************************************** + Format_description_log_event methods +****************************************************************************/ + +bool Format_description_log_event::write() +{ + bool ret; + bool no_checksum; + /* + We don't call Start_log_event_v3::write() because this would make 2 + my_b_safe_write(). + */ + uchar buff[START_V3_HEADER_LEN+1]; + size_t rec_size= sizeof(buff) + BINLOG_CHECKSUM_ALG_DESC_LEN + + number_of_event_types; + int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version); + memcpy((char*) buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN); + if (!dont_set_created) + created= get_time(); + int4store(buff + ST_CREATED_OFFSET,created); + buff[ST_COMMON_HEADER_LEN_OFFSET]= common_header_len; + /* + if checksum is requested + record the checksum-algorithm descriptor next to + post_header_len vector which will be followed by the checksum value. + Master is supposed to trigger checksum computing by binlog_checksum_options, + slave does it via marking the event according to + FD_queue checksum_alg value. + */ + compile_time_assert(BINLOG_CHECKSUM_ALG_DESC_LEN == 1); +#ifdef DBUG_ASSERT_EXISTS + data_written= 0; // to prepare for need_checksum assert +#endif + uint8 checksum_byte= (uint8) + (need_checksum() ? checksum_alg : BINLOG_CHECKSUM_ALG_OFF); + /* + FD of checksum-aware server is always checksum-equipped, (V) is in, + regardless of @@global.binlog_checksum policy. + Thereby a combination of (A) == 0, (V) != 0 means + it's the checksum-aware server's FD event that heads checksum-free binlog + file. + Here 0 stands for checksumming OFF to evaluate (V) as 0 is that case. + A combination of (A) != 0, (V) != 0 denotes FD of the checksum-aware server + heading the checksummed binlog. + (A), (V) presence in FD of the checksum-aware server makes the event + 1 + 4 bytes bigger comparing to the former FD. + */ + + if ((no_checksum= (checksum_alg == BINLOG_CHECKSUM_ALG_OFF))) + { + checksum_alg= BINLOG_CHECKSUM_ALG_CRC32; // Forcing (V) room to fill anyway + } + ret= write_header(rec_size) || + write_data(buff, sizeof(buff)) || + write_data(post_header_len, number_of_event_types) || + write_data(&checksum_byte, sizeof(checksum_byte)) || + write_footer(); + if (no_checksum) + checksum_alg= BINLOG_CHECKSUM_ALG_OFF; + return ret; +} + +#if defined(HAVE_REPLICATION) +/* + Auxiliary function to conduct cleanup of unfinished two-phase logged ALTERs. +*/ +static void check_and_remove_stale_alter(Relay_log_info *rli) +{ + Master_info *mi= rli->mi; + start_alter_info *info=NULL; + + mysql_mutex_lock(&mi->start_alter_list_lock); + List_iterator info_iterator(mi->start_alter_list); + while ((info= info_iterator++)) + { + DBUG_ASSERT(info->state == start_alter_state::REGISTERED); + + sql_print_warning("ALTER query started at %u-%u-%llu could not " + "be completed because of unexpected master server " + "or its binlog change", info->sa_seq_no, // todo:gtid + 0, 0); + info_iterator.remove(); + mysql_mutex_lock(&mi->start_alter_lock); + info->state= start_alter_state::ROLLBACK_ALTER; + mysql_mutex_unlock(&mi->start_alter_lock); + mysql_cond_broadcast(&info->start_alter_cond); + mysql_mutex_lock(&mi->start_alter_lock); + while(info->state != start_alter_state::COMPLETED) + mysql_cond_wait(&info->start_alter_cond, &mi->start_alter_lock); + mysql_mutex_unlock(&mi->start_alter_lock); + mysql_cond_destroy(&info->start_alter_cond); + my_free(info); + } + mysql_mutex_unlock(&mi->start_alter_list_lock); +} + +int Format_description_log_event::do_apply_event(rpl_group_info *rgi) +{ + int ret= 0; + Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Format_description_log_event::do_apply_event"); + + /* + As a transaction NEVER spans on 2 or more binlogs: + if we have an active transaction at this point, the master died + while writing the transaction to the binary log, i.e. while + flushing the binlog cache to the binlog. XA guarantees that master has + rolled back. So we roll back. + Note: this event could be sent by the master to inform us of the + format of its binlog; in other words maybe it is not at its + original place when it comes to us; we'll know this by checking + log_pos ("artificial" events have log_pos == 0). + */ + if (!is_artificial_event() && created && !thd->rli_fake && !thd->rgi_fake) + { + // check_and_remove stale Start Alter:s + if (flags & LOG_EVENT_BINLOG_IN_USE_F) + check_and_remove_stale_alter(rli); + if (thd->transaction->all.ha_list) + { + /* This is not an error (XA is safe), just an information */ + rli->report(INFORMATION_LEVEL, 0, NULL, + "Rolling back unfinished transaction (no COMMIT " + "or ROLLBACK in relay log). A probable cause is that " + "the master died while writing the transaction to " + "its binary log, thus rolled back too."); + rgi->cleanup_context(thd, 1); + } + } + + /* + If this event comes from ourselves, there is no cleaning task to + perform, we don't call Start_log_event_v3::do_apply_event() + (this was just to update the log's description event). + */ + if (server_id != (uint32) global_system_variables.server_id) + { + /* + If the event was not requested by the slave i.e. the master sent + it while the slave asked for a position >4, the event will make + rli->group_master_log_pos advance. Say that the slave asked for + position 1000, and the Format_desc event's end is 96. Then in + the beginning of replication rli->group_master_log_pos will be + 0, then 96, then jump to first really asked event (which is + >96). So this is ok. + */ + ret= Start_log_event_v3::do_apply_event(rgi); + } + + if (!ret) + { + /* Save the information describing this binlog */ + copy_crypto_data(rli->relay_log.description_event_for_exec); + delete rli->relay_log.description_event_for_exec; + rli->relay_log.description_event_for_exec= this; + } + + DBUG_RETURN(ret); +} + +int Format_description_log_event::do_update_pos(rpl_group_info *rgi) +{ + if (server_id == (uint32) global_system_variables.server_id) + { + /* + We only increase the relay log position if we are skipping + events and do not touch any group_* variables, nor flush the + relay log info. If there is a crash, we will have to re-skip + the events again, but that is a minor issue. + + If we do not skip stepping the group log position (and the + server id was changed when restarting the server), it might well + be that we start executing at a position that is invalid, e.g., + at a Rows_log_event or a Query_log_event preceeded by a + Intvar_log_event instead of starting at a Table_map_log_event or + the Intvar_log_event respectively. + */ + rgi->inc_event_relay_log_pos(); + return 0; + } + else + { + return Log_event::do_update_pos(rgi); + } +} + +Log_event::enum_skip_reason +Format_description_log_event::do_shall_skip(rpl_group_info *rgi) +{ + return Log_event::EVENT_SKIP_NOT; +} + +#endif + + +#if defined(HAVE_REPLICATION) +int Start_encryption_log_event::do_apply_event(rpl_group_info* rgi) +{ + return rgi->rli->relay_log.description_event_for_exec->start_decryption(this); +} + +int Start_encryption_log_event::do_update_pos(rpl_group_info *rgi) +{ + /* + master never sends Start_encryption_log_event, any SELE that a slave + might see was created locally in MYSQL_BIN_LOG::open() on the slave + */ + rgi->inc_event_relay_log_pos(); + return 0; +} + +#endif + + +/************************************************************************** + Load_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +bool Load_log_event::print_query(THD *thd, bool need_db, const char *cs, + String *buf, my_off_t *fn_start, + my_off_t *fn_end, const char *qualify_db) +{ + if (need_db && db && db_len) + { + buf->append(STRING_WITH_LEN("use ")); + append_identifier(thd, buf, db, db_len); + buf->append(STRING_WITH_LEN("; ")); + } + + buf->append(STRING_WITH_LEN("LOAD DATA ")); + + if (is_concurrent) + buf->append(STRING_WITH_LEN("CONCURRENT ")); + + if (fn_start) + *fn_start= buf->length(); + + if (check_fname_outside_temp_buf()) + buf->append(STRING_WITH_LEN("LOCAL ")); + buf->append(STRING_WITH_LEN("INFILE '")); + buf->append_for_single_quote(fname, fname_len); + buf->append(STRING_WITH_LEN("' ")); + + if (sql_ex.opt_flags & REPLACE_FLAG) + buf->append(STRING_WITH_LEN("REPLACE ")); + else if (sql_ex.opt_flags & IGNORE_FLAG) + buf->append(STRING_WITH_LEN("IGNORE ")); + + buf->append(STRING_WITH_LEN("INTO")); + + if (fn_end) + *fn_end= buf->length(); + + buf->append(STRING_WITH_LEN(" TABLE ")); + if (qualify_db) + { + append_identifier(thd, buf, qualify_db, strlen(qualify_db)); + buf->append(STRING_WITH_LEN(".")); + } + append_identifier(thd, buf, table_name, table_name_len); + + if (cs != NULL) + { + buf->append(STRING_WITH_LEN(" CHARACTER SET ")); + buf->append(cs, strlen(cs)); + } + + /* We have to create all optional fields as the default is not empty */ + buf->append(STRING_WITH_LEN(" FIELDS TERMINATED BY ")); + pretty_print_str(buf, sql_ex.field_term, sql_ex.field_term_len); + if (sql_ex.opt_flags & OPT_ENCLOSED_FLAG) + buf->append(STRING_WITH_LEN(" OPTIONALLY ")); + buf->append(STRING_WITH_LEN(" ENCLOSED BY ")); + pretty_print_str(buf, sql_ex.enclosed, sql_ex.enclosed_len); + + buf->append(STRING_WITH_LEN(" ESCAPED BY ")); + pretty_print_str(buf, sql_ex.escaped, sql_ex.escaped_len); + + buf->append(STRING_WITH_LEN(" LINES TERMINATED BY ")); + pretty_print_str(buf, sql_ex.line_term, sql_ex.line_term_len); + if (sql_ex.line_start_len) + { + buf->append(STRING_WITH_LEN(" STARTING BY ")); + pretty_print_str(buf, sql_ex.line_start, sql_ex.line_start_len); + } + + if ((long) skip_lines > 0) + { + buf->append(STRING_WITH_LEN(" IGNORE ")); + buf->append_ulonglong(skip_lines); + buf->append(STRING_WITH_LEN(" LINES ")); + } + + if (num_fields) + { + uint i; + const char *field= fields; + buf->append(STRING_WITH_LEN(" (")); + for (i = 0; i < num_fields; i++) + { + if (i) + { + /* + Yes, the space and comma is reversed here. But this is mostly dead + code, at most used when reading really old binlogs from old servers, + so better just leave it as is... + */ + buf->append(STRING_WITH_LEN(" ,")); + } + append_identifier(thd, buf, field, field_lens[i]); + field+= field_lens[i] + 1; + } + buf->append(STRING_WITH_LEN(")")); + } + return 0; +} + + +void Load_log_event::pack_info(Protocol *protocol) +{ + char query_buffer[1024]; + String query_str(query_buffer, sizeof(query_buffer), system_charset_info); + + query_str.length(0); + print_query(protocol->thd, TRUE, NULL, &query_str, 0, 0, NULL); + protocol->store(query_str.ptr(), query_str.length(), &my_charset_bin); +} +#endif /* defined(HAVE_REPLICATION) */ + + +bool Load_log_event::write_data_header() +{ + char buf[LOAD_HEADER_LEN]; + int4store(buf + L_THREAD_ID_OFFSET, slave_proxy_id); + int4store(buf + L_EXEC_TIME_OFFSET, exec_time); + int4store(buf + L_SKIP_LINES_OFFSET, skip_lines); + buf[L_TBL_LEN_OFFSET] = (char)table_name_len; + buf[L_DB_LEN_OFFSET] = (char)db_len; + int4store(buf + L_NUM_FIELDS_OFFSET, num_fields); + return write_data(buf, LOAD_HEADER_LEN) != 0; +} + + +bool Load_log_event::write_data_body() +{ + if (sql_ex.write_data(writer)) + return 1; + if (num_fields && fields && field_lens) + { + if (write_data(field_lens, num_fields) || + write_data(fields, field_block_len)) + return 1; + } + return (write_data(table_name, table_name_len + 1) || + write_data(db, db_len + 1) || + write_data(fname, fname_len)); +} + + +Load_log_event::Load_log_event(THD *thd_arg, const sql_exchange *ex, + const char *db_arg, const char *table_name_arg, + List &fields_arg, + bool is_concurrent_arg, + enum enum_duplicates handle_dup, + bool ignore, bool using_trans) + :Log_event(thd_arg, + (thd_arg->used & THD::THREAD_SPECIFIC_USED) + ? LOG_EVENT_THREAD_SPECIFIC_F : 0, + using_trans), + thread_id(thd_arg->thread_id), + slave_proxy_id((ulong)thd_arg->variables.pseudo_thread_id), + num_fields(0),fields(0), + field_lens(0),field_block_len(0), + table_name(table_name_arg ? table_name_arg : ""), + db(db_arg), fname(ex->file_name), local_fname(FALSE), + is_concurrent(is_concurrent_arg) +{ + time_t end_time; + time(&end_time); + exec_time = (ulong) (end_time - thd_arg->start_time); + /* db can never be a zero pointer in 4.0 */ + db_len = (uint32) strlen(db); + table_name_len = (uint32) strlen(table_name); + fname_len = (fname) ? (uint) strlen(fname) : 0; + sql_ex.field_term = ex->field_term->ptr(); + sql_ex.field_term_len = (uint8) ex->field_term->length(); + sql_ex.enclosed = ex->enclosed->ptr(); + sql_ex.enclosed_len = (uint8) ex->enclosed->length(); + sql_ex.line_term = ex->line_term->ptr(); + sql_ex.line_term_len = (uint8) ex->line_term->length(); + sql_ex.line_start = ex->line_start->ptr(); + sql_ex.line_start_len = (uint8) ex->line_start->length(); + sql_ex.escaped = ex->escaped->ptr(); + sql_ex.escaped_len = (uint8) ex->escaped->length(); + sql_ex.opt_flags = 0; + sql_ex.cached_new_format = -1; + + if (ex->dumpfile) + sql_ex.opt_flags|= DUMPFILE_FLAG; + if (ex->opt_enclosed) + sql_ex.opt_flags|= OPT_ENCLOSED_FLAG; + + sql_ex.empty_flags= 0; + + switch (handle_dup) { + case DUP_REPLACE: + sql_ex.opt_flags|= REPLACE_FLAG; + break; + case DUP_UPDATE: // Impossible here + case DUP_ERROR: + break; + } + if (ignore) + sql_ex.opt_flags|= IGNORE_FLAG; + + if (!ex->field_term->length()) + sql_ex.empty_flags |= FIELD_TERM_EMPTY; + if (!ex->enclosed->length()) + sql_ex.empty_flags |= ENCLOSED_EMPTY; + if (!ex->line_term->length()) + sql_ex.empty_flags |= LINE_TERM_EMPTY; + if (!ex->line_start->length()) + sql_ex.empty_flags |= LINE_START_EMPTY; + if (!ex->escaped->length()) + sql_ex.empty_flags |= ESCAPED_EMPTY; + + skip_lines = ex->skip_lines; + + List_iterator li(fields_arg); + field_lens_buf.length(0); + fields_buf.length(0); + Item* item; + while ((item = li++)) + { + num_fields++; + uchar len= (uchar) item->name.length; + field_block_len += len + 1; + fields_buf.append(item->name.str, len + 1); + field_lens_buf.append((char*)&len, 1); + } + + field_lens = (const uchar*)field_lens_buf.ptr(); + fields = fields_buf.ptr(); +} + + +/** + Load_log_event::set_fields() + + @note + This function can not use the member variable + for the database, since LOAD DATA INFILE on the slave + can be for a different database than the current one. + This is the reason for the affected_db argument to this method. +*/ + +void Load_log_event::set_fields(const char* affected_db, + List &field_list, + Name_resolution_context *context) +{ + uint i; + const char* field = fields; + for (i= 0; i < num_fields; i++) + { + LEX_CSTRING field_name= {field, field_lens[i] }; + field_list.push_back(new (thd->mem_root) + Item_field(thd, context, + Lex_cstring_strlen(affected_db), + Lex_cstring_strlen(table_name), + field_name), + thd->mem_root); + field+= field_lens[i] + 1; + } +} + + +#if defined(HAVE_REPLICATION) +/** + Does the data loading job when executing a LOAD DATA on the slave. + + @param net + @param rli + @param use_rli_only_for_errors If set to 1, rli is provided to + Load_log_event::exec_event only for this + function to have RPL_LOG_NAME and + rli->last_slave_error, both being used by + error reports. rli's position advancing + is skipped (done by the caller which is + Execute_load_log_event::exec_event). + If set to 0, rli is provided for full use, + i.e. for error reports and position + advancing. + + @todo + fix this; this can be done by testing rules in + Create_file_log_event::exec_event() and then discarding Append_block and + al. + @todo + this is a bug - this needs to be moved to the I/O thread + + @retval + 0 Success + @retval + 1 Failure +*/ + +int Load_log_event::do_apply_event(NET* net, rpl_group_info *rgi, + bool use_rli_only_for_errors) +{ + Relay_log_info const *rli= rgi->rli; + Rpl_filter *rpl_filter= rli->mi->rpl_filter; + DBUG_ENTER("Load_log_event::do_apply_event"); + + DBUG_ASSERT(thd->query() == 0); + set_thd_db(thd, rpl_filter, db, db_len); + thd->clear_error(1); + + /* see Query_log_event::do_apply_event() and BUG#13360 */ + DBUG_ASSERT(!rgi->m_table_map.count()); + /* + Usually lex_start() is called by mysql_parse(), but we need it here + as the present method does not call mysql_parse(). + */ + lex_start(thd); + thd->lex->local_file= local_fname; + thd->reset_for_next_command(0); // Errors are cleared above + + /* + We test replicate_*_db rules. Note that we have already prepared + the file to load, even if we are going to ignore and delete it + now. So it is possible that we did a lot of disk writes for + nothing. In other words, a big LOAD DATA INFILE on the master will + still consume a lot of space on the slave (space in the relay log + + space of temp files: twice the space of the file to load...) + even if it will finally be ignored. TODO: fix this; this can be + done by testing rules in Create_file_log_event::do_apply_event() + and then discarding Append_block and al. Another way is do the + filtering in the I/O thread (more efficient: no disk writes at + all). + + + Note: We do not need to execute reset_one_shot_variables() if this + db_ok() test fails. + Reason: The db stored in binlog events is the same for SET and for + its companion query. If the SET is ignored because of + db_ok(), the companion query will also be ignored, and if + the companion query is ignored in the db_ok() test of + ::do_apply_event(), then the companion SET also have so + we don't need to reset_one_shot_variables(). + */ + if (rpl_filter->db_ok(thd->db.str)) + { + thd->set_time(when, when_sec_part); + thd->set_query_id(next_query_id()); + thd->get_stmt_da()->opt_clear_warning_info(thd->query_id); + + TABLE_LIST tables; + LEX_CSTRING db_name= { thd->strmake(thd->db.str, thd->db.length), thd->db.length }; + if (lower_case_table_names) + my_casedn_str(system_charset_info, (char *)table_name); + LEX_CSTRING tbl_name= { table_name, strlen(table_name) }; + tables.init_one_table(&db_name, &tbl_name, 0, TL_WRITE); + tables.updating= 1; + + // the table will be opened in mysql_load + if (rpl_filter->is_on() && !rpl_filter->tables_ok(thd->db.str, &tables)) + { + // TODO: this is a bug - this needs to be moved to the I/O thread + if (net) + skip_load_data_infile(net); + } + else + { + enum enum_duplicates handle_dup; + bool ignore= 0; + char query_buffer[1024]; + String query_str(query_buffer, sizeof(query_buffer), system_charset_info); + char *load_data_query; + + query_str.length(0); + /* + Forge LOAD DATA INFILE query which will be used in SHOW PROCESS LIST + and written to slave's binlog if binlogging is on. + */ + print_query(thd, FALSE, NULL, &query_str, NULL, NULL, NULL); + if (!(load_data_query= (char *)thd->strmake(query_str.ptr(), + query_str.length()))) + { + /* + This will set thd->fatal_error in case of OOM. So we surely will notice + that something is wrong. + */ + goto error; + } + + thd->set_query(load_data_query, (uint) (query_str.length())); + + if (sql_ex.opt_flags & REPLACE_FLAG) + handle_dup= DUP_REPLACE; + else if (sql_ex.opt_flags & IGNORE_FLAG) + { + ignore= 1; + handle_dup= DUP_ERROR; + } + else + { + /* + When replication is running fine, if it was DUP_ERROR on the + master then we could choose IGNORE here, because if DUP_ERROR + suceeded on master, and data is identical on the master and slave, + then there should be no uniqueness errors on slave, so IGNORE is + the same as DUP_ERROR. But in the unlikely case of uniqueness errors + (because the data on the master and slave happen to be different + (user error or bug), we want LOAD DATA to print an error message on + the slave to discover the problem. + + If reading from net (a 3.23 master), mysql_load() will change this + to IGNORE. + */ + handle_dup= DUP_ERROR; + } + /* + We need to set thd->lex->sql_command and thd->lex->duplicates + since InnoDB tests these variables to decide if this is a LOAD + DATA ... REPLACE INTO ... statement even though mysql_parse() + is not called. This is not needed in 5.0 since there the LOAD + DATA ... statement is replicated using mysql_parse(), which + sets the thd->lex fields correctly. + */ + thd->lex->sql_command= SQLCOM_LOAD; + thd->lex->duplicates= handle_dup; + + sql_exchange ex((char*)fname, sql_ex.opt_flags & DUMPFILE_FLAG); + String field_term(sql_ex.field_term,sql_ex.field_term_len,log_cs); + String enclosed(sql_ex.enclosed,sql_ex.enclosed_len,log_cs); + String line_term(sql_ex.line_term,sql_ex.line_term_len,log_cs); + String line_start(sql_ex.line_start,sql_ex.line_start_len,log_cs); + String escaped(sql_ex.escaped,sql_ex.escaped_len, log_cs); + ex.field_term= &field_term; + ex.enclosed= &enclosed; + ex.line_term= &line_term; + ex.line_start= &line_start; + ex.escaped= &escaped; + + ex.opt_enclosed = (sql_ex.opt_flags & OPT_ENCLOSED_FLAG); + if (sql_ex.empty_flags & FIELD_TERM_EMPTY) + ex.field_term->length(0); + + ex.skip_lines = skip_lines; + List field_list; + thd->lex->first_select_lex()->context.resolve_in_table_list_only(&tables); + set_fields(tables.db.str, + field_list, &thd->lex->first_select_lex()->context); + thd->variables.pseudo_thread_id= thread_id; + if (net) + { + // mysql_load will use thd->net to read the file + thd->net.vio = net->vio; + // Make sure the client does not get confused about the packet sequence + thd->net.pkt_nr = net->pkt_nr; + } + /* + It is safe to use tmp_list twice because we are not going to + update it inside mysql_load(). + */ + List tmp_list; + if (thd->open_temporary_tables(&tables) || + mysql_load(thd, &ex, &tables, field_list, tmp_list, tmp_list, + handle_dup, ignore, net != 0)) + thd->is_slave_error= 1; + if (thd->cuted_fields) + { + /* log_pos is the position of the LOAD event in the master log */ + sql_print_warning("Slave: load data infile on table '%s' at " + "log position %llu in log '%s' produced %ld " + "warning(s). Default database: '%s'", + (char*) table_name, log_pos, RPL_LOG_NAME, + (ulong) thd->cuted_fields, + thd->get_db()); + } + if (net) + net->pkt_nr= thd->net.pkt_nr; + } + } + else + { + /* + We will just ask the master to send us /dev/null if we do not + want to load the data. + TODO: this a bug - needs to be done in I/O thread + */ + if (net) + skip_load_data_infile(net); + } + +error: + thd->net.vio = 0; + const char *remember_db= thd->get_db(); + thd->catalog= 0; + thd->set_db(&null_clex_str); /* will free the current database */ + thd->reset_query(); + thd->get_stmt_da()->set_overwrite_status(true); + thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN); + thd->get_stmt_da()->set_overwrite_status(false); + close_thread_tables(thd); + /* + - If transaction rollback was requested due to deadlock + perform it and release metadata locks. + - If inside a multi-statement transaction, + defer the release of metadata locks until the current + transaction is either committed or rolled back. This prevents + other statements from modifying the table for the entire + duration of this transaction. This provides commit ordering + and guarantees serializability across multiple transactions. + - If in autocommit mode, or outside a transactional context, + automatically release metadata locks of the current statement. + */ + if (thd->transaction_rollback_request) + { + trans_rollback_implicit(thd); + thd->release_transactional_locks(); + } + else if (! thd->in_multi_stmt_transaction_mode()) + thd->release_transactional_locks(); + else + thd->mdl_context.release_statement_locks(); + + DBUG_EXECUTE_IF("LOAD_DATA_INFILE_has_fatal_error", + thd->is_slave_error= 0; thd->is_fatal_error= 1;); + + if (unlikely(thd->is_slave_error)) + { + /* this err/sql_errno code is copy-paste from net_send_error() */ + const char *err; + int sql_errno; + if (thd->is_error()) + { + err= thd->get_stmt_da()->message(); + sql_errno= thd->get_stmt_da()->sql_errno(); + } + else + { + sql_errno=ER_UNKNOWN_ERROR; + err= ER_THD(thd, sql_errno); + } + rli->report(ERROR_LEVEL, sql_errno, rgi->gtid_info(), "\ +Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", + err, (char*)table_name, remember_db); + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + DBUG_RETURN(1); + } + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + + if (unlikely(thd->is_fatal_error)) + { + char buf[256]; + my_snprintf(buf, sizeof(buf), + "Running LOAD DATA INFILE on table '%-.64s'." + " Default database: '%-.64s'", + (char*)table_name, + remember_db); + + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), + ER_THD(thd, ER_SLAVE_FATAL_ERROR), buf); + DBUG_RETURN(1); + } + + DBUG_RETURN( use_rli_only_for_errors ? 0 : Log_event::do_apply_event(rgi) ); +} +#endif + + +/************************************************************************** + Rotate_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +void Rotate_log_event::pack_info(Protocol *protocol) +{ + StringBuffer<256> tmp(log_cs); + tmp.length(0); + tmp.append(new_log_ident, ident_len); + tmp.append(STRING_WITH_LEN(";pos=")); + tmp.append_ulonglong(pos); + protocol->store(tmp.ptr(), tmp.length(), &my_charset_bin); +} +#endif + + +Rotate_log_event::Rotate_log_event(const char* new_log_ident_arg, + uint ident_len_arg, ulonglong pos_arg, + uint flags_arg) + :Log_event(), new_log_ident(new_log_ident_arg), + pos(pos_arg),ident_len(ident_len_arg ? ident_len_arg : + (uint) strlen(new_log_ident_arg)), flags(flags_arg) +{ + DBUG_ENTER("Rotate_log_event::Rotate_log_event(...,flags)"); + DBUG_PRINT("enter",("new_log_ident: %s pos: %llu flags: %lu", new_log_ident_arg, + pos_arg, (ulong) flags)); + cache_type= EVENT_NO_CACHE; + if (flags & DUP_NAME) + new_log_ident= my_strndup(PSI_INSTRUMENT_ME, new_log_ident_arg, ident_len, MYF(MY_WME)); + if (flags & RELAY_LOG) + set_relay_log_event(); + DBUG_VOID_RETURN; +} + + +bool Rotate_log_event::write() +{ + char buf[ROTATE_HEADER_LEN]; + int8store(buf + R_POS_OFFSET, pos); + return (write_header(ROTATE_HEADER_LEN + ident_len) || + write_data(buf, ROTATE_HEADER_LEN) || + write_data(new_log_ident, (uint) ident_len) || + write_footer()); +} + + +#if defined(HAVE_REPLICATION) + +/* + Got a rotate log event from the master. + + This is mainly used so that we can later figure out the logname and + position for the master. + + We can't rotate the slave's BINlog as this will cause infinitive rotations + in a A -> B -> A setup. + The NOTES below is a wrong comment which will disappear when 4.1 is merged. + + This must only be called from the Slave SQL thread, since it calls + Relay_log_info::flush(). + + @retval + 0 ok + 1 error +*/ +int Rotate_log_event::do_update_pos(rpl_group_info *rgi) +{ + int error= 0; + Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Rotate_log_event::do_update_pos"); + + DBUG_PRINT("info", ("server_id=%lu; ::server_id=%lu", + (ulong) this->server_id, (ulong) global_system_variables.server_id)); + DBUG_PRINT("info", ("new_log_ident: %s", this->new_log_ident)); + DBUG_PRINT("info", ("pos: %llu", this->pos)); + + /* + If we are in a transaction or in a group: the only normal case is + when the I/O thread was copying a big transaction, then it was + stopped and restarted: we have this in the relay log: + + BEGIN + ... + ROTATE (a fake one) + ... + COMMIT or ROLLBACK + + In that case, we don't want to touch the coordinates which + correspond to the beginning of the transaction. Starting from + 5.0.0, there also are some rotates from the slave itself, in the + relay log, which shall not change the group positions. + + In parallel replication, rotate event is executed out-of-band with normal + events, so we cannot update group_master_log_name or _pos here, it will + be updated with the next normal event instead. + */ + if ((server_id != global_system_variables.server_id || + rli->replicate_same_server_id) && + !is_relay_log_event() && + !rli->is_in_group() && + !rgi->is_parallel_exec) + { + mysql_mutex_lock(&rli->data_lock); + DBUG_PRINT("info", ("old group_master_log_name: '%s' " + "old group_master_log_pos: %lu", + rli->group_master_log_name, + (ulong) rli->group_master_log_pos)); + memcpy(rli->group_master_log_name, new_log_ident, ident_len+1); + rli->notify_group_master_log_name_update(); + rli->inc_group_relay_log_pos(pos, rgi, TRUE /* skip_lock */); + DBUG_PRINT("info", ("new group_master_log_name: '%s' " + "new group_master_log_pos: %lu", + rli->group_master_log_name, + (ulong) rli->group_master_log_pos)); + mysql_mutex_unlock(&rli->data_lock); + rpl_global_gtid_slave_state->record_and_update_gtid(thd, rgi); + error= rli->flush(); + + /* + Reset thd->variables.option_bits and sql_mode etc, because this could + be the signal of a master's downgrade from 5.0 to 4.0. + However, no need to reset description_event_for_exec: indeed, if the next + master is 5.0 (even 5.0.1) we will soon get a Format_desc; if the next + master is 4.0 then the events are in the slave's format (conversion). + */ + set_slave_thread_options(thd); + set_slave_thread_default_charset(thd, rgi); + thd->variables.sql_mode= global_system_variables.sql_mode; + thd->variables.auto_increment_increment= + thd->variables.auto_increment_offset= 1; + } + else + rgi->inc_event_relay_log_pos(); + + DBUG_RETURN(error); +} + + +Log_event::enum_skip_reason +Rotate_log_event::do_shall_skip(rpl_group_info *rgi) +{ + enum_skip_reason reason= Log_event::do_shall_skip(rgi); + + switch (reason) { + case Log_event::EVENT_SKIP_NOT: + case Log_event::EVENT_SKIP_COUNT: + return Log_event::EVENT_SKIP_NOT; + + case Log_event::EVENT_SKIP_IGNORE: + return Log_event::EVENT_SKIP_IGNORE; + } + DBUG_ASSERT(0); + return Log_event::EVENT_SKIP_NOT; // To keep compiler happy +} + +#endif + + +/************************************************************************** + Binlog_checkpoint_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +void Binlog_checkpoint_log_event::pack_info(Protocol *protocol) +{ + protocol->store(binlog_file_name, binlog_file_len, &my_charset_bin); +} + + +Log_event::enum_skip_reason +Binlog_checkpoint_log_event::do_shall_skip(rpl_group_info *rgi) +{ + enum_skip_reason reason= Log_event::do_shall_skip(rgi); + if (reason == EVENT_SKIP_COUNT) + reason= EVENT_SKIP_NOT; + return reason; +} +#endif + + +Binlog_checkpoint_log_event::Binlog_checkpoint_log_event( + const char *binlog_file_name_arg, + uint binlog_file_len_arg) + :Log_event(), + binlog_file_name(my_strndup(PSI_INSTRUMENT_ME, binlog_file_name_arg, binlog_file_len_arg, + MYF(MY_WME))), + binlog_file_len(binlog_file_len_arg) +{ + cache_type= EVENT_NO_CACHE; +} + + +bool Binlog_checkpoint_log_event::write() +{ + uchar buf[BINLOG_CHECKPOINT_HEADER_LEN]; + int4store(buf, binlog_file_len); + return write_header(BINLOG_CHECKPOINT_HEADER_LEN + binlog_file_len) || + write_data(buf, BINLOG_CHECKPOINT_HEADER_LEN) || + write_data(binlog_file_name, binlog_file_len) || + write_footer(); +} + + +/************************************************************************** + Global transaction ID stuff +**************************************************************************/ + +Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, + uint32 domain_id_arg, bool standalone, + uint16 flags_arg, bool is_transactional, + uint64 commit_id_arg, bool has_xid, + bool ro_1pc) + : Log_event(thd_arg, flags_arg, is_transactional), + seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg), + flags2((standalone ? FL_STANDALONE : 0) | + (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)), + flags_extra(0), extra_engines(0) +{ + cache_type= Log_event::EVENT_NO_CACHE; + bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table(); + if (thd_arg->transaction->stmt.trans_did_wait() || + thd_arg->transaction->all.trans_did_wait()) + flags2|= FL_WAITED; + if (thd_arg->transaction->stmt.trans_did_ddl() || + thd_arg->transaction->stmt.has_created_dropped_temp_table() || + thd_arg->transaction->stmt.trans_executed_admin_cmd() || + thd_arg->transaction->all.trans_did_ddl() || + thd_arg->transaction->all.has_created_dropped_temp_table() || + thd_arg->transaction->all.trans_executed_admin_cmd()) + flags2|= FL_DDL; + else if (is_transactional && !is_tmp_table && + !(thd_arg->transaction->all.modified_non_trans_table && + thd->variables.binlog_direct_non_trans_update == 0 && + !thd->is_current_stmt_binlog_format_row())) + flags2|= FL_TRANSACTIONAL; + if (!(thd_arg->variables.option_bits & OPTION_RPL_SKIP_PARALLEL)) + flags2|= FL_ALLOW_PARALLEL; + /* Preserve any DDL or WAITED flag in the slave's binlog. */ + if (thd_arg->rgi_slave) + flags2|= (thd_arg->rgi_slave->gtid_ev_flags2 & (FL_DDL|FL_WAITED)); + + XID_STATE &xid_state= thd->transaction->xid_state; + if (is_transactional) + { + if (xid_state.is_explicit_XA() && + (thd->lex->sql_command == SQLCOM_XA_PREPARE || + xid_state.get_state_code() == XA_PREPARED)) + { + DBUG_ASSERT(!(thd->lex->sql_command == SQLCOM_XA_COMMIT && + thd->lex->xa_opt == XA_ONE_PHASE)); + + flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ? + FL_PREPARED_XA : FL_COMPLETED_XA; + xid.set(xid_state.get_xid()); + } + /* count non-zero extra recoverable engines; total = extra + 1 */ + if (has_xid) + { + DBUG_ASSERT(ha_count_rw_2pc(thd_arg, + thd_arg->in_multi_stmt_transaction_mode())); + + extra_engines= + ha_count_rw_2pc(thd_arg, thd_arg->in_multi_stmt_transaction_mode()) - 1; + } + else if (ro_1pc) + { + extra_engines= UCHAR_MAX; + } + else if (thd->lex->sql_command == SQLCOM_XA_PREPARE) + { + DBUG_ASSERT(thd_arg->in_multi_stmt_transaction_mode()); + + uint8 count= ha_count_rw_2pc(thd_arg, true); + extra_engines= count > 1 ? 0 : UCHAR_MAX; + } + if (extra_engines > 0) + flags_extra|= FL_EXTRA_MULTI_ENGINE_E1; + } + if (thd->get_binlog_flags_for_alter()) + { + flags_extra |= thd->get_binlog_flags_for_alter(); + if (flags_extra & (FL_COMMIT_ALTER_E1 | FL_ROLLBACK_ALTER_E1)) + sa_seq_no= thd->get_binlog_start_alter_seq_no(); + flags2|= FL_DDL; + } + + DBUG_ASSERT(thd_arg->lex->sql_command != SQLCOM_CREATE_SEQUENCE || + (flags2 & FL_DDL) || thd_arg->in_multi_stmt_transaction_mode()); +} + + +/* + Used to record GTID while sending binlog to slave, without having to + fully contruct every Gtid_log_event() needlessly. +*/ +bool +Gtid_log_event::peek(const uchar *event_start, size_t event_len, + enum enum_binlog_checksum_alg checksum_alg, + uint32 *domain_id, uint32 *server_id, uint64 *seq_no, + uchar *flags2, const Format_description_log_event *fdev) +{ + const uchar *p; + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + if (event_len > BINLOG_CHECKSUM_LEN) + event_len-= BINLOG_CHECKSUM_LEN; + else + event_len= 0; + } + else + DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + checksum_alg == BINLOG_CHECKSUM_ALG_OFF); + + if (event_len < (uint32)fdev->common_header_len + GTID_HEADER_LEN) + return true; + *server_id= uint4korr(event_start + SERVER_ID_OFFSET); + p= event_start + fdev->common_header_len; + *seq_no= uint8korr(p); + p+= 8; + *domain_id= uint4korr(p); + p+= 4; + *flags2= *p; + return false; +} + + +bool +Gtid_log_event::write() +{ + uchar buf[GTID_HEADER_LEN+2+sizeof(XID) + /* flags_extra: */ 1+4]; + size_t write_len= 13; + + int8store(buf, seq_no); + int4store(buf+8, domain_id); + buf[12]= flags2; + if (flags2 & FL_GROUP_COMMIT_ID) + { + DBUG_ASSERT(write_len + 8 == GTID_HEADER_LEN + 2); + + int8store(buf+write_len, commit_id); + write_len= GTID_HEADER_LEN + 2; + } + + if (flags2 & (FL_PREPARED_XA | FL_COMPLETED_XA)) + { + int4store(&buf[write_len], xid.formatID); + buf[write_len +4]= (uchar) xid.gtrid_length; + buf[write_len +4+1]= (uchar) xid.bqual_length; + write_len+= 6; + long data_length= xid.bqual_length + xid.gtrid_length; + memcpy(buf+write_len, xid.data, data_length); + write_len+= data_length; + } + if (flags_extra > 0) + { + buf[write_len]= flags_extra; + write_len++; + } + if (flags_extra & FL_EXTRA_MULTI_ENGINE_E1) + { + buf[write_len]= extra_engines; + write_len++; + } + + if (flags_extra & (FL_COMMIT_ALTER_E1 | FL_ROLLBACK_ALTER_E1)) + { + int8store(buf + write_len, sa_seq_no); + write_len+= 8; + } + + if (write_len < GTID_HEADER_LEN) + { + bzero(buf+write_len, GTID_HEADER_LEN-write_len); + write_len= GTID_HEADER_LEN; + } + return write_header(write_len) || + write_data(buf, write_len) || + write_footer(); +} + + +/* + Replace a GTID event with either a BEGIN event, dummy event, or nothing, as + appropriate to work with old slave that does not know global transaction id. + + The need_dummy_event argument is an IN/OUT argument. It is passed as TRUE + if slave has capability lower than MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES. + It is returned TRUE if we return a BEGIN (or dummy) event to be sent to the + slave, FALSE if event should be skipped completely. +*/ +int +Gtid_log_event::make_compatible_event(String *packet, bool *need_dummy_event, + ulong ev_offset, + enum enum_binlog_checksum_alg checksum_alg) +{ + uchar flags2; + if (packet->length() - ev_offset < LOG_EVENT_HEADER_LEN + GTID_HEADER_LEN) + return 1; + flags2= (*packet)[ev_offset + LOG_EVENT_HEADER_LEN + 12]; + if (flags2 & FL_STANDALONE) + { + if (*need_dummy_event) + return Query_log_event::dummy_event(packet, ev_offset, checksum_alg); + return 0; + } + + *need_dummy_event= true; + return Query_log_event::begin_event(packet, ev_offset, checksum_alg); +} + + +#ifdef HAVE_REPLICATION +void +Gtid_log_event::pack_info(Protocol *protocol) +{ + char buf[6+5+10+1+10+1+20+1+4+20+1+ ser_buf_size+5 /* sprintf */]; + char *p; + p = strmov(buf, (flags2 & FL_STANDALONE ? "GTID " : + flags2 & FL_PREPARED_XA ? "XA START " : "BEGIN GTID ")); + if (flags2 & FL_PREPARED_XA) + { + p+= sprintf(p, "%s GTID ", xid.serialize()); + } + p= longlong10_to_str(domain_id, p, 10); + *p++= '-'; + p= longlong10_to_str(server_id, p, 10); + *p++= '-'; + p= longlong10_to_str(seq_no, p, 10); + if (flags2 & FL_GROUP_COMMIT_ID) + { + p= strmov(p, " cid="); + p= longlong10_to_str(commit_id, p, 10); + } + if (flags_extra & FL_START_ALTER_E1) + { + p= strmov(p, " START ALTER"); + } + if (flags_extra & FL_COMMIT_ALTER_E1) + { + p= strmov(p, " COMMIT ALTER id="); + p= longlong10_to_str(sa_seq_no, p, 10); + } + if (flags_extra & FL_ROLLBACK_ALTER_E1) + { + p= strmov(p, " ROLLBACK ALTER id="); + p= longlong10_to_str(sa_seq_no, p, 10); + } + + protocol->store(buf, p-buf, &my_charset_bin); +} + +static char gtid_begin_string[] = "BEGIN"; + +int +Gtid_log_event::do_apply_event(rpl_group_info *rgi) +{ + ulonglong bits= thd->variables.option_bits; + thd->variables.server_id= this->server_id; + thd->variables.gtid_domain_id= this->domain_id; + thd->variables.gtid_seq_no= this->seq_no; + rgi->gtid_ev_flags2= flags2; + + rgi->gtid_ev_flags_extra= flags_extra; + rgi->gtid_ev_sa_seq_no= sa_seq_no; + thd->reset_for_next_command(); + + if (opt_gtid_strict_mode && opt_bin_log && opt_log_slave_updates) + { + if (mysql_bin_log.check_strict_gtid_sequence(this->domain_id, + this->server_id, this->seq_no)) + return 1; + } + + DBUG_ASSERT((bits & OPTION_GTID_BEGIN) == 0); + + Master_info *mi=rgi->rli->mi; + switch (flags2 & (FL_DDL | FL_TRANSACTIONAL)) + { + case FL_TRANSACTIONAL: + mi->total_trans_groups++; + break; + case FL_DDL: + mi->total_ddl_groups++; + break; + default: + mi->total_non_trans_groups++; + } + + if (flags2 & FL_STANDALONE) + return 0; + + /* Execute this like a BEGIN query event. */ + bits|= OPTION_GTID_BEGIN; + if (flags2 & FL_ALLOW_PARALLEL) + bits&= ~(ulonglong)OPTION_RPL_SKIP_PARALLEL; + else + bits|= (ulonglong)OPTION_RPL_SKIP_PARALLEL; + thd->variables.option_bits= bits; + DBUG_PRINT("info", ("Set OPTION_GTID_BEGIN")); + thd->is_slave_error= 0; + + char buf_xa[sizeof("XA START") + 1 + ser_buf_size]; + if (flags2 & FL_PREPARED_XA) + { + const char fmt[]= "XA START %s"; + + thd->lex->xid= &xid; + thd->lex->xa_opt= XA_NONE; + sprintf(buf_xa, fmt, xid.serialize()); + thd->set_query_and_id(buf_xa, static_cast(strlen(buf_xa)), + &my_charset_bin, next_query_id()); + thd->lex->sql_command= SQLCOM_XA_START; + if (trans_xa_start(thd)) + { + DBUG_PRINT("error", ("trans_xa_start() failed")); + thd->is_slave_error= 1; + } + } + else + { + thd->set_query_and_id(gtid_begin_string, sizeof(gtid_begin_string)-1, + &my_charset_bin, next_query_id()); + thd->lex->sql_command= SQLCOM_BEGIN; + if (trans_begin(thd, 0)) + { + DBUG_PRINT("error", ("trans_begin() failed")); + thd->is_slave_error= 1; + } + } + status_var_increment(thd->status_var.com_stat[thd->lex->sql_command]); + thd->update_stats(); + + if (likely(!thd->is_slave_error)) + general_log_write(thd, COM_QUERY, thd->query(), thd->query_length()); + + thd->reset_query(); + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + return thd->is_slave_error; +} + + +int +Gtid_log_event::do_update_pos(rpl_group_info *rgi) +{ + rgi->inc_event_relay_log_pos(); + return 0; +} + + +Log_event::enum_skip_reason +Gtid_log_event::do_shall_skip(rpl_group_info *rgi) +{ + Relay_log_info *rli= rgi->rli; + /* + An event skipped due to @@skip_replication must not be counted towards the + number of events to be skipped due to @@sql_slave_skip_counter. + */ + if (flags & LOG_EVENT_SKIP_REPLICATION_F && + opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE) + return Log_event::EVENT_SKIP_IGNORE; + + if (rli->slave_skip_counter > 0) + { + if (!(flags2 & FL_STANDALONE)) + { + thd->variables.option_bits|= OPTION_BEGIN; + DBUG_ASSERT(rgi->rli->get_flag(Relay_log_info::IN_TRANSACTION)); + } + return Log_event::continue_group(rgi); + } + return Log_event::do_shall_skip(rgi); +} + + +#endif /* HAVE_REPLICATION */ + + + +Gtid_list_log_event::Gtid_list_log_event(rpl_binlog_state *gtid_set, + uint32 gl_flags_) + : count(gtid_set->count()), gl_flags(gl_flags_), list(0), sub_id_list(0) +{ + cache_type= EVENT_NO_CACHE; + /* Failure to allocate memory will be caught by is_valid() returning false. */ + if (count < (1<<28) && + (list = (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, + count * sizeof(*list) + (count == 0), MYF(MY_WME)))) + gtid_set->get_gtid_list(list, count); +} + + +Gtid_list_log_event::Gtid_list_log_event(slave_connection_state *gtid_set, + uint32 gl_flags_) + : count(gtid_set->count()), gl_flags(gl_flags_), list(0), sub_id_list(0) +{ + cache_type= EVENT_NO_CACHE; + /* Failure to allocate memory will be caught by is_valid() returning false. */ + if (count < (1<<28) && + (list = (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, + count * sizeof(*list) + (count == 0), MYF(MY_WME)))) + { + gtid_set->get_gtid_list(list, count); +#if defined(HAVE_REPLICATION) + if (gl_flags & FLAG_IGN_GTIDS) + { + uint32 i; + + if (!(sub_id_list= (uint64 *)my_malloc(PSI_INSTRUMENT_ME, + count * sizeof(uint64), MYF(MY_WME)))) + { + my_free(list); + list= NULL; + return; + } + for (i= 0; i < count; ++i) + { + if (!(sub_id_list[i]= + rpl_global_gtid_slave_state->next_sub_id(list[i].domain_id))) + { + my_free(list); + my_free(sub_id_list); + list= NULL; + sub_id_list= NULL; + return; + } + } + } +#endif + } +} + + +#if defined(HAVE_REPLICATION) +bool +Gtid_list_log_event::to_packet(String *packet) +{ + uint32 i; + uchar *p; + uint32 needed_length; + + DBUG_ASSERT(count < 1<<28); + + needed_length= packet->length() + get_data_size(); + if (packet->reserve(needed_length)) + return true; + p= (uchar *)packet->ptr() + packet->length();; + packet->length(needed_length); + int4store(p, (count & ((1<<28)-1)) | gl_flags); + p += 4; + /* Initialise the padding for empty Gtid_list. */ + if (count == 0) + int2store(p, 0); + for (i= 0; i < count; ++i) + { + int4store(p, list[i].domain_id); + int4store(p+4, list[i].server_id); + int8store(p+8, list[i].seq_no); + p += 16; + } + + return false; +} + + +bool +Gtid_list_log_event::write() +{ + char buf[128]; + String packet(buf, sizeof(buf), system_charset_info); + + packet.length(0); + if (to_packet(&packet)) + return true; + return write_header(get_data_size()) || + write_data(packet.ptr(), packet.length()) || + write_footer(); +} + + +int +Gtid_list_log_event::do_apply_event(rpl_group_info *rgi) +{ + Relay_log_info *rli= const_cast(rgi->rli); + int ret; + if (gl_flags & FLAG_IGN_GTIDS) + { + void *hton= NULL; + uint32 i; + + for (i= 0; i < count; ++i) + { + if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i], + sub_id_list[i], + false, false, &hton))) + return ret; + rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i], + hton, NULL); + } + } + ret= Log_event::do_apply_event(rgi); + if (rli->until_condition == Relay_log_info::UNTIL_GTID && + (gl_flags & FLAG_UNTIL_REACHED)) + { + char str_buf[128]; + String str(str_buf, sizeof(str_buf), system_charset_info); + rli->until_gtid_pos.to_string(&str); + sql_print_information("Slave SQL thread stops because it reached its" + " UNTIL master_gtid_pos %s", str.c_ptr_safe()); + rli->abort_slave= true; + rli->stop_for_until= true; + } + free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); + return ret; +} + + +Log_event::enum_skip_reason +Gtid_list_log_event::do_shall_skip(rpl_group_info *rgi) +{ + enum_skip_reason reason= Log_event::do_shall_skip(rgi); + if (reason == EVENT_SKIP_COUNT) + reason= EVENT_SKIP_NOT; + return reason; +} + + +void +Gtid_list_log_event::pack_info(Protocol *protocol) +{ + char buf_mem[1024]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + uint32 i; + bool first; + + /* + For output consistency and ease of reading, we sort the GTID list in + ascending order + */ + qsort(list, count, sizeof(rpl_gtid), compare_glle_gtids); + + buf.length(0); + buf.append(STRING_WITH_LEN("[")); + first= true; + for (i= 0; i < count; ++i) + rpl_slave_state_tostring_helper(&buf, &list[i], &first); + buf.append(STRING_WITH_LEN("]")); + + protocol->store(&buf); +} +#endif /* HAVE_REPLICATION */ + + + +/************************************************************************** + Intvar_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +void Intvar_log_event::pack_info(Protocol *protocol) +{ + char buf[256], *pos; + pos= strmake(buf, get_var_type_name(), sizeof(buf)-23); + *pos++= '='; + pos= longlong10_to_str(val, pos, -10); + protocol->store(buf, (uint) (pos-buf), &my_charset_bin); +} +#endif + + +bool Intvar_log_event::write() +{ + uchar buf[9]; + buf[I_TYPE_OFFSET]= (uchar) type; + int8store(buf + I_VAL_OFFSET, val); + return write_header(sizeof(buf)) || + write_data(buf, sizeof(buf)) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) + +/* + Intvar_log_event::do_apply_event() +*/ + +int Intvar_log_event::do_apply_event(rpl_group_info *rgi) +{ + DBUG_ENTER("Intvar_log_event::do_apply_event"); + if (rgi->deferred_events_collecting) + { + DBUG_PRINT("info",("deferring event")); + DBUG_RETURN(rgi->deferred_events->add(this)); + } + + switch (type) { + case LAST_INSERT_ID_EVENT: + thd->first_successful_insert_id_in_prev_stmt= val; + DBUG_PRINT("info",("last_insert_id_event: %ld", (long) val)); + break; + case INSERT_ID_EVENT: + thd->force_one_auto_inc_interval(val); + break; + } + DBUG_RETURN(0); +} + +int Intvar_log_event::do_update_pos(rpl_group_info *rgi) +{ + rgi->inc_event_relay_log_pos(); + return 0; +} + + +Log_event::enum_skip_reason +Intvar_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + It is a common error to set the slave skip counter to 1 instead of + 2 when recovering from an insert which used a auto increment, + rand, or user var. Therefore, if the slave skip counter is 1, we + just say that this event should be skipped by ignoring it, meaning + that we do not change the value of the slave skip counter since it + will be decreased by the following insert event. + */ + return continue_group(rgi); +} + +#endif + + +/************************************************************************** + Rand_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +void Rand_log_event::pack_info(Protocol *protocol) +{ + char buf1[256], *pos; + pos= strmov(buf1,"rand_seed1="); + pos= int10_to_str((long) seed1, pos, 10); + pos= strmov(pos, ",rand_seed2="); + pos= int10_to_str((long) seed2, pos, 10); + protocol->store(buf1, (uint) (pos-buf1), &my_charset_bin); +} +#endif + + +bool Rand_log_event::write() +{ + uchar buf[16]; + int8store(buf + RAND_SEED1_OFFSET, seed1); + int8store(buf + RAND_SEED2_OFFSET, seed2); + return write_header(sizeof(buf)) || + write_data(buf, sizeof(buf)) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) +int Rand_log_event::do_apply_event(rpl_group_info *rgi) +{ + if (rgi->deferred_events_collecting) + return rgi->deferred_events->add(this); + + thd->rand.seed1= (ulong) seed1; + thd->rand.seed2= (ulong) seed2; + return 0; +} + +int Rand_log_event::do_update_pos(rpl_group_info *rgi) +{ + rgi->inc_event_relay_log_pos(); + return 0; +} + + +Log_event::enum_skip_reason +Rand_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + It is a common error to set the slave skip counter to 1 instead of + 2 when recovering from an insert which used a auto increment, + rand, or user var. Therefore, if the slave skip counter is 1, we + just say that this event should be skipped by ignoring it, meaning + that we do not change the value of the slave skip counter since it + will be decreased by the following insert event. + */ + return continue_group(rgi); +} + +/** + Exec deferred Int-, Rand- and User- var events prefixing + a Query-log-event event. + + @param thd THD handle + + @return false on success, true if a failure in an event applying occurred. +*/ +bool slave_execute_deferred_events(THD *thd) +{ + bool res= false; + rpl_group_info *rgi= thd->rgi_slave; + + DBUG_ASSERT(rgi && (!rgi->deferred_events_collecting || rgi->deferred_events)); + + if (!rgi->deferred_events_collecting || rgi->deferred_events->is_empty()) + return res; + + res= rgi->deferred_events->execute(rgi); + rgi->deferred_events->rewind(); + + return res; +} + +#endif /* HAVE_REPLICATION */ + + +/************************************************************************** + Xid_apply_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) + +int Xid_apply_log_event::do_record_gtid(THD *thd, rpl_group_info *rgi, + bool in_trans, void **out_hton, + bool force_err) +{ + int err= 0; + Relay_log_info const *rli= rgi->rli; + + rgi->gtid_pending= false; + err= rpl_global_gtid_slave_state->record_gtid(thd, &rgi->current_gtid, + rgi->gtid_sub_id, + in_trans, false, out_hton); + + if (unlikely(err)) + { + int ec= thd->get_stmt_da()->sql_errno(); + /* + Do not report an error if this is really a kill due to a deadlock. + In this case, the transaction will be re-tried instead. Unless force_err + is set, as in the case of XA PREPARE, as the GTID state is updated as a + separate transaction, and if that fails, we should not retry but exit in + error immediately. + */ + if (!is_parallel_retry_error(rgi, ec) || force_err) + { + char buff[MAX_SLAVE_ERRMSG]; + buff[0]= 0; + aggregate_da_errors(buff, sizeof(buff), thd->get_stmt_da()); + + if (force_err) + thd->clear_error(); + + rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, rgi->gtid_info(), + "Error during XID COMMIT: failed to update GTID state in " + "%s.%s: %d: %s the event's master log %s, end_log_pos %llu", + "mysql", rpl_gtid_slave_state_table_name.str, ec, + buff, RPL_LOG_NAME, log_pos); + } + thd->is_slave_error= 1; + } + + return err; +} + +static bool wsrep_must_replay(THD *thd) +{ +#ifdef WITH_WSREP + mysql_mutex_lock(&thd->LOCK_thd_data); + bool res= WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay; + mysql_mutex_unlock(&thd->LOCK_thd_data); + return res; +#else + return false; +#endif +} + + +int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi) +{ + bool res; + int err; + uint64 sub_id= 0; + void *hton= NULL; + rpl_gtid gtid; + + /* + An instance of this class such as XID_EVENT works like a COMMIT + statement. It updates mysql.gtid_slave_pos with the GTID of the + current transaction. + Therefore, it acts much like a normal SQL statement, so we need to do + THD::reset_for_next_command() as if starting a new statement. + + XA_PREPARE_LOG_EVENT also updates the gtid table *but* the update gets + committed as separate "autocommit" transaction. + */ + thd->reset_for_next_command(); + /* + Record any GTID in the same transaction, so slave state is transactionally + consistent. + */ +#ifdef WITH_WSREP + thd->wsrep_affected_rows= 0; +#endif + + if (rgi->gtid_pending) + { + sub_id= rgi->gtid_sub_id; + gtid= rgi->current_gtid; + + if (!thd->transaction->xid_state.is_explicit_XA()) + { + if ((err= do_record_gtid(thd, rgi, true /* in_trans */, &hton))) + return err; + + DBUG_EXECUTE_IF("gtid_fail_after_record_gtid", + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), + HA_ERR_WRONG_COMMAND); + thd->is_slave_error= 1; + return 1; + }); + } + } + + general_log_print(thd, COM_QUERY, get_query()); + thd->variables.option_bits&= ~OPTION_GTID_BEGIN; + res= do_commit(); + if (!res && rgi->gtid_pending) + { + DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA()); + + if ((err= do_record_gtid(thd, rgi, false, &hton, true))) + return err; + } + + if (sub_id && (!res || wsrep_must_replay(thd))) + rpl_global_gtid_slave_state->update_state_hash(sub_id, >id, hton, rgi); + /* + Increment the global status commit count variable + */ + enum enum_sql_command cmd= !thd->transaction->xid_state.is_explicit_XA() + ? SQLCOM_COMMIT : SQLCOM_XA_PREPARE; + status_var_increment(thd->status_var.com_stat[cmd]); + + return res; +} + +Log_event::enum_skip_reason +Xid_apply_log_event::do_shall_skip(rpl_group_info *rgi) +{ + DBUG_ENTER("Xid_apply_log_event::do_shall_skip"); + if (rgi->rli->slave_skip_counter > 0) + { + DBUG_ASSERT(!rgi->rli->get_flag(Relay_log_info::IN_TRANSACTION)); + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN); + DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); + } +#ifdef WITH_WSREP + else if (wsrep_mysql_replication_bundle && WSREP(thd) && + opt_slave_domain_parallel_threads == 0) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif + DBUG_RETURN(Log_event::do_shall_skip(rgi)); +} +#endif /* HAVE_REPLICATION */ + +/************************************************************************** + Xid_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +void Xid_log_event::pack_info(Protocol *protocol) +{ + char buf[128], *pos; + pos= strmov(buf, "COMMIT /* xid="); + pos= longlong10_to_str(xid, pos, 10); + pos= strmov(pos, " */"); + protocol->store(buf, (uint) (pos-buf), &my_charset_bin); +} + + +int Xid_log_event::do_commit() +{ + bool res; + res= trans_commit(thd); /* Automatically rolls back on error. */ + thd->release_transactional_locks(); + return res; +} +#endif + + +bool Xid_log_event::write() +{ + DBUG_EXECUTE_IF("do_not_write_xid", return 0;); + return write_header(sizeof(xid)) || + write_data((uchar*)&xid, sizeof(xid)) || + write_footer(); +} + +/************************************************************************** + XA_prepare_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +void XA_prepare_log_event::pack_info(Protocol *protocol) +{ + char query[sizeof("XA COMMIT ONE PHASE") + 1 + ser_buf_size]; + + sprintf(query, + (one_phase ? "XA COMMIT %s ONE PHASE" : "XA PREPARE %s"), + m_xid.serialize()); + + protocol->store(query, strlen(query), &my_charset_bin); +} + + +int XA_prepare_log_event::do_commit() +{ + int res; + xid_t xid; + xid.set(m_xid.formatID, + m_xid.data, m_xid.gtrid_length, + m_xid.data + m_xid.gtrid_length, m_xid.bqual_length); + + thd->lex->xid= &xid; + if (!one_phase) + { + if ((res= thd->wait_for_prior_commit())) + return res; + + thd->lex->sql_command= SQLCOM_XA_PREPARE; + res= trans_xa_prepare(thd); + } + else + res= trans_xa_commit(thd); + + return res; +} +#endif // HAVE_REPLICATION + + +bool XA_prepare_log_event::write() +{ + uchar data[1 + 4 + 4 + 4]= {one_phase,}; + uint8 one_phase_byte= one_phase; + + int4store(data+1, static_cast(xid)->formatID); + int4store(data+(1+4), static_cast(xid)->gtrid_length); + int4store(data+(1+4+4), static_cast(xid)->bqual_length); + + DBUG_ASSERT(xid_subheader_no_data == sizeof(data) - 1); + + return write_header(sizeof(one_phase_byte) + xid_subheader_no_data + + static_cast(xid)->gtrid_length + + static_cast(xid)->bqual_length) || + write_data(data, sizeof(data)) || + write_data((uchar*) static_cast(xid)->data, + static_cast(xid)->gtrid_length + + static_cast(xid)->bqual_length) || + write_footer(); +} + + +/************************************************************************** + User_var_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +static bool +user_var_append_name_part(THD *thd, String *buf, + const char *name, size_t name_len) +{ + return buf->append('@') || + append_identifier(thd, buf, name, name_len) || + buf->append('='); +} + +void User_var_log_event::pack_info(Protocol* protocol) +{ + if (is_null) + { + char buf_mem[FN_REFLEN+7]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + buf.length(0); + if (user_var_append_name_part(protocol->thd, &buf, name, name_len) || + buf.append(NULL_clex_str)) + return; + protocol->store(buf.ptr(), buf.length(), &my_charset_bin); + } + else + { + switch (type) { + case REAL_RESULT: + { + double real_val; + char buf2[MY_GCVT_MAX_FIELD_WIDTH+1]; + char buf_mem[FN_REFLEN + MY_GCVT_MAX_FIELD_WIDTH + 1]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + float8get(real_val, val); + buf.length(0); + if (user_var_append_name_part(protocol->thd, &buf, name, name_len) || + buf.append(buf2, my_gcvt(real_val, MY_GCVT_ARG_DOUBLE, + MY_GCVT_MAX_FIELD_WIDTH, buf2, NULL))) + return; + protocol->store(buf.ptr(), buf.length(), &my_charset_bin); + break; + } + case INT_RESULT: + { + char buf2[22]; + char buf_mem[FN_REFLEN + 22]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + buf.length(0); + if (user_var_append_name_part(protocol->thd, &buf, name, name_len) || + buf.append(buf2, + longlong10_to_str(uint8korr(val), buf2, + ((flags & User_var_log_event::UNSIGNED_F) ? 10 : -10))-buf2)) + return; + protocol->store(buf.ptr(), buf.length(), &my_charset_bin); + break; + } + case DECIMAL_RESULT: + { + char buf_mem[FN_REFLEN + DECIMAL_MAX_STR_LENGTH]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + char buf2[DECIMAL_MAX_STR_LENGTH+1]; + String str(buf2, sizeof(buf2), &my_charset_bin); + buf.length(0); + my_decimal((const uchar *) (val + 2), val[0], val[1]).to_string(&str); + if (user_var_append_name_part(protocol->thd, &buf, name, name_len) || + buf.append(str)) + return; + protocol->store(buf.ptr(), buf.length(), &my_charset_bin); + + break; + } + case STRING_RESULT: + { + /* 15 is for 'COLLATE' and other chars */ + char buf_mem[FN_REFLEN + 512 + 1 + 15 + + MY_CS_CHARACTER_SET_NAME_SIZE + + MY_CS_COLLATION_NAME_SIZE]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + CHARSET_INFO *cs; + buf.length(0); + if (!(cs= get_charset(charset_number, MYF(0)))) + { + if (buf.append(STRING_WITH_LEN("???"))) + return; + } + else + { + size_t old_len; + char *beg, *end; + if (user_var_append_name_part(protocol->thd, &buf, name, name_len) || + buf.append('_') || + buf.append(cs->cs_name) || + buf.append(' ')) + return; + old_len= buf.length(); + if (buf.reserve(old_len + val_len * 2 + 3 + sizeof(" COLLATE ") + + MY_CS_COLLATION_NAME_SIZE)) + return; + beg= const_cast(buf.ptr()) + old_len; + end= str_to_hex(beg, val, val_len); + buf.length(old_len + (end - beg)); + if (buf.append(STRING_WITH_LEN(" COLLATE ")) || + buf.append(cs->coll_name)) + return; + } + protocol->store(buf.ptr(), buf.length(), &my_charset_bin); + break; + } + case ROW_RESULT: + default: + DBUG_ASSERT(0); + return; + } + } +} +#endif // HAVE_REPLICATION + + +bool User_var_log_event::write() +{ + char buf[UV_NAME_LEN_SIZE]; + char buf1[UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE + + UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE]; + uchar buf2[MY_MAX(8, DECIMAL_MAX_FIELD_SIZE + 2)], *pos= buf2; + uint unsigned_len= 0; + uint buf1_length; + size_t event_length; + + int4store(buf, name_len); + + if ((buf1[0]= is_null)) + { + buf1_length= 1; + val_len= 0; // Length of 'pos' + } + else + { + buf1[1]= type; + int4store(buf1 + 2, charset_number); + + switch (type) { + case REAL_RESULT: + float8store(buf2, *(double*) val); + break; + case INT_RESULT: + int8store(buf2, *(longlong*) val); + unsigned_len= 1; + break; + case DECIMAL_RESULT: + { + my_decimal *dec= (my_decimal *)val; + dec->fix_buffer_pointer(); + buf2[0]= (char)(dec->intg + dec->frac); + buf2[1]= (char)dec->frac; + decimal2bin((decimal_t*)val, buf2+2, buf2[0], buf2[1]); + val_len= decimal_bin_size(buf2[0], buf2[1]) + 2; + break; + } + case STRING_RESULT: + pos= (uchar*) val; + break; + case ROW_RESULT: + default: + DBUG_ASSERT(0); + return 0; + } + int4store(buf1 + 2 + UV_CHARSET_NUMBER_SIZE, val_len); + buf1_length= 10; + } + + /* Length of the whole event */ + event_length= sizeof(buf)+ name_len + buf1_length + val_len + unsigned_len; + + return write_header(event_length) || + write_data(buf, sizeof(buf)) || + write_data(name, name_len) || + write_data(buf1, buf1_length) || + write_data(pos, val_len) || + write_data(&flags, unsigned_len) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) +int User_var_log_event::do_apply_event(rpl_group_info *rgi) +{ + Item *it= 0; + CHARSET_INFO *charset; + DBUG_ENTER("User_var_log_event::do_apply_event"); + query_id_t sav_query_id= 0; /* memorize orig id when deferred applying */ + + if (rgi->deferred_events_collecting) + { + set_deferred(current_thd->query_id); + DBUG_RETURN(rgi->deferred_events->add(this)); + } + else if (is_deferred()) + { + sav_query_id= current_thd->query_id; + current_thd->query_id= query_id; /* recreating original time context */ + } + + if (!(charset= get_charset(charset_number, MYF(MY_WME)))) + { + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "Invalid character set for User var event"); + DBUG_RETURN(1); + } + LEX_CSTRING user_var_name; + user_var_name.str= name; + user_var_name.length= name_len; + double real_val; + longlong int_val; + + if (is_null) + { + it= new (thd->mem_root) Item_null(thd); + } + else + { + switch (type) { + case REAL_RESULT: + if (val_len != 8) + { + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "Invalid variable length at User var event"); + return 1; + } + float8get(real_val, val); + it= new (thd->mem_root) Item_float(thd, real_val, 0); + val= (char*) &real_val; // Pointer to value in native format + val_len= 8; + break; + case INT_RESULT: + if (val_len != 8) + { + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "Invalid variable length at User var event"); + return 1; + } + int_val= (longlong) uint8korr(val); + it= new (thd->mem_root) Item_int(thd, int_val); + val= (char*) &int_val; // Pointer to value in native format + val_len= 8; + break; + case DECIMAL_RESULT: + { + if (val_len < 3) + { + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER_THD(thd, ER_SLAVE_FATAL_ERROR), + "Invalid variable length at User var event"); + return 1; + } + Item_decimal *dec= new (thd->mem_root) Item_decimal(thd, (uchar*) val+2, val[0], val[1]); + it= dec; + val= (char *)dec->val_decimal(NULL); + val_len= sizeof(my_decimal); + break; + } + case STRING_RESULT: + it= new (thd->mem_root) Item_string(thd, val, (uint)val_len, charset); + break; + case ROW_RESULT: + default: + DBUG_ASSERT(0); + DBUG_RETURN(0); + } + } + + Item_func_set_user_var *e= new (thd->mem_root) Item_func_set_user_var(thd, &user_var_name, it); + /* + Item_func_set_user_var can't substitute something else on its place => + 0 can be passed as last argument (reference on item) + + Fix_fields() can fail, in which case a call of update_hash() might + crash the server, so if fix fields fails, we just return with an + error. + */ + if (e->fix_fields(thd, 0)) + DBUG_RETURN(1); + + /* + A variable can just be considered as a table with + a single record and with a single column. Thus, like + a column value, it could always have IMPLICIT derivation. + */ + e->update_hash((void*) val, val_len, type, charset, + (flags & User_var_log_event::UNSIGNED_F)); + if (!is_deferred()) + free_root(thd->mem_root, 0); + else + current_thd->query_id= sav_query_id; /* restore current query's context */ + + DBUG_RETURN(0); +} + +int User_var_log_event::do_update_pos(rpl_group_info *rgi) +{ + rgi->inc_event_relay_log_pos(); + return 0; +} + +Log_event::enum_skip_reason +User_var_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + It is a common error to set the slave skip counter to 1 instead + of 2 when recovering from an insert which used a auto increment, + rand, or user var. Therefore, if the slave skip counter is 1, we + just say that this event should be skipped by ignoring it, meaning + that we do not change the value of the slave skip counter since it + will be decreased by the following insert event. + */ + return continue_group(rgi); +} +#endif // HAVE_REPLICATION + + +#ifdef HAVE_REPLICATION + +/************************************************************************** + Stop_log_event methods +**************************************************************************/ + +/* + The master stopped. We used to clean up all temporary tables but + this is useless as, as the master has shut down properly, it has + written all DROP TEMPORARY TABLE (prepared statements' deletion is + TODO only when we binlog prep stmts). We used to clean up + slave_load_tmpdir, but this is useless as it has been cleared at the + end of LOAD DATA INFILE. So we have nothing to do here. The place + were we must do this cleaning is in + Start_log_event_v3::do_apply_event(), not here. Because if we come + here, the master was sane. + + This must only be called from the Slave SQL thread, since it calls + Relay_log_info::flush(). +*/ + +int Stop_log_event::do_update_pos(rpl_group_info *rgi) +{ + int error= 0; + Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Stop_log_event::do_update_pos"); + /* + We do not want to update master_log pos because we get a rotate event + before stop, so by now group_master_log_name is set to the next log. + If we updated it, we will have incorrect master coordinates and this + could give false triggers in MASTER_POS_WAIT() that we have reached + the target position when in fact we have not. + */ + if (rli->get_flag(Relay_log_info::IN_TRANSACTION)) + rgi->inc_event_relay_log_pos(); + else if (!rgi->is_parallel_exec) + { + rpl_global_gtid_slave_state->record_and_update_gtid(thd, rgi); + rli->inc_group_relay_log_pos(0, rgi); + if (rli->flush()) + error= 1; + } + DBUG_RETURN(error); +} + +#endif /* HAVE_REPLICATION */ + + +/************************************************************************** + Create_file_log_event methods +**************************************************************************/ + +Create_file_log_event:: +Create_file_log_event(THD* thd_arg, sql_exchange* ex, + const char* db_arg, const char* table_name_arg, + List& fields_arg, + bool is_concurrent_arg, + enum enum_duplicates handle_dup, + bool ignore, + uchar* block_arg, uint block_len_arg, bool using_trans) + :Load_log_event(thd_arg, ex, db_arg, table_name_arg, fields_arg, + is_concurrent_arg, + handle_dup, ignore, using_trans), + fake_base(0), block(block_arg), event_buf(0), block_len(block_len_arg), + file_id(thd_arg->file_id = mysql_bin_log.next_file_id()) +{ + DBUG_ENTER("Create_file_log_event"); + sql_ex.force_new_format(); + DBUG_VOID_RETURN; +} + + +/* + Create_file_log_event::write_data_body() +*/ + +bool Create_file_log_event::write_data_body() +{ + bool res; + if ((res= Load_log_event::write_data_body()) || fake_base) + return res; + return write_data("", 1) || + write_data(block, block_len); +} + + +/* + Create_file_log_event::write_data_header() +*/ + +bool Create_file_log_event::write_data_header() +{ + bool res; + uchar buf[CREATE_FILE_HEADER_LEN]; + if ((res= Load_log_event::write_data_header()) || fake_base) + return res; + int4store(buf + CF_FILE_ID_OFFSET, file_id); + return write_data(buf, CREATE_FILE_HEADER_LEN) != 0; +} + + +/* + Create_file_log_event::write_base() +*/ + +bool Create_file_log_event::write_base() +{ + bool res; + fake_base= 1; // pretend we are Load event + res= write(); + fake_base= 0; + return res; +} + + +#if defined(HAVE_REPLICATION) +void Create_file_log_event::pack_info(Protocol *protocol) +{ + char buf[SAFE_NAME_LEN*2 + 30 + 21*2], *pos; + pos= strmov(buf, "db="); + memcpy(pos, db, db_len); + pos= strmov(pos + db_len, ";table="); + memcpy(pos, table_name, table_name_len); + pos= strmov(pos + table_name_len, ";file_id="); + pos= int10_to_str((long) file_id, pos, 10); + pos= strmov(pos, ";block_len="); + pos= int10_to_str((long) block_len, pos, 10); + protocol->store(buf, (uint) (pos-buf), &my_charset_bin); +} +#endif /* defined(HAVE_REPLICATION) */ + + +/** + Create_file_log_event::do_apply_event() + Constructor for Create_file_log_event to intantiate an event + from the relay log on the slave. + + @retval + 0 Success + @retval + 1 Failure +*/ + +#if defined(HAVE_REPLICATION) +int Create_file_log_event::do_apply_event(rpl_group_info *rgi) +{ + char fname_buf[FN_REFLEN]; + char *ext; + int fd = -1; + IO_CACHE file; + Log_event_writer lew(&file, 0); + int error = 1; + Relay_log_info const *rli= rgi->rli; + + THD_STAGE_INFO(thd, stage_making_temp_file_create_before_load_data); + bzero((char*)&file, sizeof(file)); + ext= slave_load_file_stem(fname_buf, file_id, server_id, ".info", + &rli->mi->connection_name); + /* old copy may exist already */ + mysql_file_delete(key_file_log_event_info, fname_buf, MYF(0)); + if ((fd= mysql_file_create(key_file_log_event_info, + fname_buf, CREATE_MODE, + O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, + MYF(MY_WME))) < 0 || + init_io_cache(&file, fd, IO_SIZE, WRITE_CACHE, (my_off_t)0, 0, + MYF(MY_WME|MY_NABP))) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in Create_file event: could not open file '%s'", + fname_buf); + goto err; + } + + // a trick to avoid allocating another buffer + fname= fname_buf; + fname_len= (uint) (strmov(ext, ".data") - fname); + writer= &lew; + if (write_base()) + { + strmov(ext, ".info"); // to have it right in the error message + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in Create_file event: could not write to file '%s'", + fname_buf); + goto err; + } + end_io_cache(&file); + mysql_file_close(fd, MYF(0)); + + // fname_buf now already has .data, not .info, because we did our trick + /* old copy may exist already */ + mysql_file_delete(key_file_log_event_data, fname_buf, MYF(0)); + if ((fd= mysql_file_create(key_file_log_event_data, + fname_buf, CREATE_MODE, + O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, + MYF(MY_WME))) < 0) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in Create_file event: could not open file '%s'", + fname_buf); + goto err; + } + if (mysql_file_write(fd, (uchar*) block, block_len, MYF(MY_WME+MY_NABP))) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in Create_file event: write to '%s' failed", + fname_buf); + goto err; + } + error=0; // Everything is ok + +err: + if (unlikely(error)) + end_io_cache(&file); + if (likely(fd >= 0)) + mysql_file_close(fd, MYF(0)); + return error != 0; +} +#endif /* defined(HAVE_REPLICATION) */ + + +/************************************************************************** + Append_block_log_event methods +**************************************************************************/ + +Append_block_log_event::Append_block_log_event(THD *thd_arg, + const char *db_arg, + uchar *block_arg, + uint block_len_arg, + bool using_trans) + :Log_event(thd_arg,0, using_trans), block(block_arg), + block_len(block_len_arg), file_id(thd_arg->file_id), db(db_arg) +{ +} + + +bool Append_block_log_event::write() +{ + uchar buf[APPEND_BLOCK_HEADER_LEN]; + int4store(buf + AB_FILE_ID_OFFSET, file_id); + return write_header(APPEND_BLOCK_HEADER_LEN + block_len) || + write_data(buf, APPEND_BLOCK_HEADER_LEN) || + write_data(block, block_len) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) +void Append_block_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + uint length; + length= (uint) sprintf(buf, ";file_id=%u;block_len=%u", file_id, block_len); + protocol->store(buf, length, &my_charset_bin); +} + + +/* + Append_block_log_event::get_create_or_append() +*/ + +int Append_block_log_event::get_create_or_append() const +{ + return 0; /* append to the file, fail if not exists */ +} + +/* + Append_block_log_event::do_apply_event() +*/ + +int Append_block_log_event::do_apply_event(rpl_group_info *rgi) +{ + char fname[FN_REFLEN]; + int fd; + int error = 1; + Relay_log_info const *rli= rgi->rli; + DBUG_ENTER("Append_block_log_event::do_apply_event"); + + THD_STAGE_INFO(thd, stage_making_temp_file_append_before_load_data); + slave_load_file_stem(fname, file_id, server_id, ".data", + &rli->mi->cmp_connection_name); + if (get_create_or_append()) + { + /* + Usually lex_start() is called by mysql_parse(), but we need it here + as the present method does not call mysql_parse(). + */ + lex_start(thd); + thd->reset_for_next_command(); + /* old copy may exist already */ + mysql_file_delete(key_file_log_event_data, fname, MYF(0)); + if ((fd= mysql_file_create(key_file_log_event_data, + fname, CREATE_MODE, + O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, + MYF(MY_WME))) < 0) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in %s event: could not create file '%s'", + get_type_str(), fname); + goto err; + } + } + else if ((fd= mysql_file_open(key_file_log_event_data, + fname, + O_WRONLY | O_APPEND | O_BINARY | O_NOFOLLOW, + MYF(MY_WME))) < 0) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in %s event: could not open file '%s'", + get_type_str(), fname); + goto err; + } + + DBUG_EXECUTE_IF("remove_slave_load_file_before_write", + { + my_delete(fname, MYF(0)); + }); + + if (mysql_file_write(fd, (uchar*) block, block_len, MYF(MY_WME+MY_NABP))) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in %s event: write to '%s' failed", + get_type_str(), fname); + goto err; + } + error=0; + +err: + if (fd >= 0) + mysql_file_close(fd, MYF(0)); + DBUG_RETURN(error); +} +#endif // HAVE_REPLICATION + + +/************************************************************************** + Delete_file_log_event methods +**************************************************************************/ + +Delete_file_log_event::Delete_file_log_event(THD *thd_arg, const char* db_arg, + bool using_trans) + :Log_event(thd_arg, 0, using_trans), file_id(thd_arg->file_id), db(db_arg) +{ +} + + +bool Delete_file_log_event::write() +{ + uchar buf[DELETE_FILE_HEADER_LEN]; + int4store(buf + DF_FILE_ID_OFFSET, file_id); + return write_header(sizeof(buf)) || + write_data(buf, sizeof(buf)) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) +void Delete_file_log_event::pack_info(Protocol *protocol) +{ + char buf[64]; + uint length; + length= (uint) sprintf(buf, ";file_id=%u", (uint) file_id); + protocol->store(buf, (int32) length, &my_charset_bin); +} +#endif + + +#if defined(HAVE_REPLICATION) +int Delete_file_log_event::do_apply_event(rpl_group_info *rgi) +{ + char fname[FN_REFLEN+10]; + Relay_log_info const *rli= rgi->rli; + char *ext= slave_load_file_stem(fname, file_id, server_id, ".data", + &rli->mi->cmp_connection_name); + mysql_file_delete(key_file_log_event_data, fname, MYF(MY_WME)); + strmov(ext, ".info"); + mysql_file_delete(key_file_log_event_info, fname, MYF(MY_WME)); + return 0; +} +#endif /* defined(HAVE_REPLICATION) */ + + +/************************************************************************** + Execute_load_log_event methods +**************************************************************************/ + +Execute_load_log_event::Execute_load_log_event(THD *thd_arg, + const char* db_arg, + bool using_trans) + :Log_event(thd_arg, 0, using_trans), file_id(thd_arg->file_id), db(db_arg) +{ +} + + +bool Execute_load_log_event::write() +{ + uchar buf[EXEC_LOAD_HEADER_LEN]; + int4store(buf + EL_FILE_ID_OFFSET, file_id); + return write_header(sizeof(buf)) || + write_data(buf, sizeof(buf)) || + write_footer(); +} + + +#if defined(HAVE_REPLICATION) +void Execute_load_log_event::pack_info(Protocol *protocol) +{ + char buf[64]; + uint length; + length= (uint) sprintf(buf, ";file_id=%u", (uint) file_id); + protocol->store(buf, (int32) length, &my_charset_bin); +} + + +/* + Execute_load_log_event::do_apply_event() +*/ + +int Execute_load_log_event::do_apply_event(rpl_group_info *rgi) +{ + char fname[FN_REFLEN+10]; + char *ext; + int fd; + int error= 1; + IO_CACHE file; + Load_log_event *lev= 0; + Relay_log_info const *rli= rgi->rli; + + ext= slave_load_file_stem(fname, file_id, server_id, ".info", + &rli->mi->cmp_connection_name); + if ((fd= mysql_file_open(key_file_log_event_info, + fname, O_RDONLY | O_BINARY | O_NOFOLLOW, + MYF(MY_WME))) < 0 || + init_io_cache(&file, fd, IO_SIZE, READ_CACHE, (my_off_t)0, 0, + MYF(MY_WME|MY_NABP))) + { + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), + "Error in Exec_load event: could not open file '%s'", + fname); + goto err; + } + if (!(lev= (Load_log_event*) + Log_event::read_log_event(&file, + rli->relay_log.description_event_for_exec, + opt_slave_sql_verify_checksum)) || + lev->get_type_code() != NEW_LOAD_EVENT) + { + rli->report(ERROR_LEVEL, 0, rgi->gtid_info(), "Error in Exec_load event: " + "file '%s' appears corrupted", fname); + goto err; + } + lev->thd = thd; + /* + lev->do_apply_event should use rli only for errors i.e. should + not advance rli's position. + + lev->do_apply_event is the place where the table is loaded (it + calls mysql_load()). + */ + + if (lev->do_apply_event(0,rgi,1)) + { + /* + We want to indicate the name of the file that could not be loaded + (SQL_LOADxxx). + But as we are here we are sure the error is in rli->last_slave_error and + rli->last_slave_errno (example of error: duplicate entry for key), so we + don't want to overwrite it with the filename. + What we want instead is add the filename to the current error message. + */ + char *tmp= my_strdup(PSI_INSTRUMENT_ME, rli->last_error().message, MYF(MY_WME)); + if (tmp) + { + rli->report(ERROR_LEVEL, rli->last_error().number, rgi->gtid_info(), + "%s. Failed executing load from '%s'", tmp, fname); + my_free(tmp); + } + goto err; + } + /* + We have an open file descriptor to the .info file; we need to close it + or Windows will refuse to delete the file in mysql_file_delete(). + */ + if (fd >= 0) + { + mysql_file_close(fd, MYF(0)); + end_io_cache(&file); + fd= -1; + } + mysql_file_delete(key_file_log_event_info, fname, MYF(MY_WME)); + memcpy(ext, ".data", 6); + mysql_file_delete(key_file_log_event_data, fname, MYF(MY_WME)); + error = 0; + +err: + delete lev; + if (fd >= 0) + { + mysql_file_close(fd, MYF(0)); + end_io_cache(&file); + } + return error; +} + +#endif /* defined(HAVE_REPLICATION) */ + +/************************************************************************** + Begin_load_query_log_event methods +**************************************************************************/ + +Begin_load_query_log_event:: +Begin_load_query_log_event(THD* thd_arg, const char* db_arg, uchar* block_arg, + uint block_len_arg, bool using_trans) + :Append_block_log_event(thd_arg, db_arg, block_arg, block_len_arg, + using_trans) +{ + file_id= thd_arg->file_id= mysql_bin_log.next_file_id(); +} + + +#if defined( HAVE_REPLICATION) +int Begin_load_query_log_event::get_create_or_append() const +{ + return 1; /* create the file */ +} + + +Log_event::enum_skip_reason +Begin_load_query_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + If the slave skip counter is 1, then we should not start executing + on the next event. + */ + return continue_group(rgi); +} +#endif /* defined( HAVE_REPLICATION) */ + + +/************************************************************************** + Execute_load_query_log_event methods +**************************************************************************/ + +Execute_load_query_log_event:: +Execute_load_query_log_event(THD *thd_arg, const char* query_arg, + ulong query_length_arg, uint fn_pos_start_arg, + uint fn_pos_end_arg, + enum_load_dup_handling dup_handling_arg, + bool using_trans, bool direct, bool suppress_use, + int errcode): + Query_log_event(thd_arg, query_arg, query_length_arg, using_trans, direct, + suppress_use, errcode), + file_id(thd_arg->file_id), fn_pos_start(fn_pos_start_arg), + fn_pos_end(fn_pos_end_arg), dup_handling(dup_handling_arg) +{ +} + + +bool +Execute_load_query_log_event::write_post_header_for_derived() +{ + uchar buf[EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN]; + int4store(buf, file_id); + int4store(buf + 4, fn_pos_start); + int4store(buf + 4 + 4, fn_pos_end); + *(buf + 4 + 4 + 4)= (uchar) dup_handling; + return write_data(buf, EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN); +} + + +#if defined(HAVE_REPLICATION) +void Execute_load_query_log_event::pack_info(Protocol *protocol) +{ + char buf_mem[1024]; + String buf(buf_mem, sizeof(buf_mem), system_charset_info); + buf.real_alloc(9 + db_len + q_len + 10 + 21); + if (db && db_len) + { + if (buf.append(STRING_WITH_LEN("use ")) || + append_identifier(protocol->thd, &buf, db, db_len) || + buf.append(STRING_WITH_LEN("; "))) + return; + } + if (query && q_len && buf.append(query, q_len)) + return; + if (buf.append(STRING_WITH_LEN(" ;file_id=")) || + buf.append_ulonglong(file_id)) + return; + protocol->store(buf.ptr(), buf.length(), &my_charset_bin); +} + + +int +Execute_load_query_log_event::do_apply_event(rpl_group_info *rgi) +{ + char *p; + char *buf; + char *fname; + char *fname_end; + int error; + Relay_log_info const *rli= rgi->rli; + + buf= (char*) my_malloc(PSI_INSTRUMENT_ME, q_len + 1 - + (fn_pos_end - fn_pos_start) + (FN_REFLEN + 10) + 10 + 8 + 5, MYF(MY_WME)); + + DBUG_EXECUTE_IF("LOAD_DATA_INFILE_has_fatal_error", my_free(buf); buf= NULL;); + + /* Replace filename and LOCAL keyword in query before executing it */ + if (buf == NULL) + { + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), + ER_THD(rgi->thd, ER_SLAVE_FATAL_ERROR), "Not enough memory"); + return 1; + } + + p= buf; + memcpy(p, query, fn_pos_start); + p+= fn_pos_start; + fname= (p= strmake(p, STRING_WITH_LEN(" INFILE \'"))); + p= slave_load_file_stem(p, file_id, server_id, ".data", + &rli->mi->cmp_connection_name); + fname_end= p= strend(p); // Safer than p=p+5 + *(p++)='\''; + switch (dup_handling) { + case LOAD_DUP_IGNORE: + p= strmake(p, STRING_WITH_LEN(" IGNORE")); + break; + case LOAD_DUP_REPLACE: + p= strmake(p, STRING_WITH_LEN(" REPLACE")); + break; + default: + /* Ordinary load data */ + break; + } + p= strmake(p, STRING_WITH_LEN(" INTO ")); + p= strmake(p, query+fn_pos_end, q_len-fn_pos_end); + + error= Query_log_event::do_apply_event(rgi, buf, (uint32)(p-buf)); + + /* Forging file name for deletion in same buffer */ + *fname_end= 0; + + /* + If there was an error the slave is going to stop, leave the + file so that we can re-execute this event at START SLAVE. + */ + if (unlikely(!error)) + mysql_file_delete(key_file_log_event_data, fname, MYF(MY_WME)); + + my_free(buf); + return error; +} +#endif // HAVE_REPLICATION + + +/************************************************************************** + sql_ex_info methods +**************************************************************************/ + +static bool write_str(Log_event_writer *writer, const char *str, uint length) +{ + uchar tmp[1]; + tmp[0]= (uchar) length; + return (writer->write_data(tmp, sizeof(tmp)) || + writer->write_data((uchar*) str, length)); +} + +bool sql_ex_info::write_data(Log_event_writer *writer) +{ + if (new_format()) + { + return write_str(writer, field_term, field_term_len) || + write_str(writer, enclosed, enclosed_len) || + write_str(writer, line_term, line_term_len) || + write_str(writer, line_start, line_start_len) || + write_str(writer, escaped, escaped_len) || + writer->write_data((uchar*) &opt_flags, 1); + } + else + { + uchar old_ex[7]; + old_ex[0]= *field_term; + old_ex[1]= *enclosed; + old_ex[2]= *line_term; + old_ex[3]= *line_start; + old_ex[4]= *escaped; + old_ex[5]= opt_flags; + old_ex[6]= empty_flags; + return writer->write_data(old_ex, sizeof(old_ex)); + } +} + + + +/************************************************************************** + Rows_log_event member functions +**************************************************************************/ + +Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid, + MY_BITMAP const *cols, bool is_transactional, + Log_event_type event_type) + : Log_event(thd_arg, 0, is_transactional), + m_row_count(0), + m_table(tbl_arg), + m_table_id(tid), + m_width(tbl_arg ? tbl_arg->s->fields : 1), + m_rows_buf(0), m_rows_cur(0), m_rows_end(0), m_flags(0), + m_type(event_type), m_extra_row_data(0) +#ifdef HAVE_REPLICATION + , m_curr_row(NULL), m_curr_row_end(NULL), + m_key(NULL), m_key_info(NULL), m_key_nr(0), + master_had_triggers(0) +#endif +{ + /* + We allow a special form of dummy event when the table, and cols + are null and the table id is ~0UL. This is a temporary + solution, to be able to terminate a started statement in the + binary log: the extraneous events will be removed in the future. + */ + DBUG_ASSERT((tbl_arg && tbl_arg->s && tid != ~0UL) || + (!tbl_arg && !cols && tid == ~0UL)); + + if (thd_arg->variables.option_bits & OPTION_NO_FOREIGN_KEY_CHECKS) + set_flags(NO_FOREIGN_KEY_CHECKS_F); + if (thd_arg->variables.option_bits & OPTION_RELAXED_UNIQUE_CHECKS) + set_flags(RELAXED_UNIQUE_CHECKS_F); + if (thd_arg->variables.option_bits & OPTION_NO_CHECK_CONSTRAINT_CHECKS) + set_flags(NO_CHECK_CONSTRAINT_CHECKS_F); + /* if my_bitmap_init fails, caught in is_valid() */ + if (likely(!my_bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + m_width))) + { + /* Cols can be zero if this is a dummy binrows event */ + if (likely(cols != NULL)) + { + memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols)); + create_last_word_mask(&m_cols); + } + } + else + { + // Needed because my_bitmap_init() does not set it to null on failure + m_cols.bitmap= 0; + } +} + + +int Rows_log_event::do_add_row_data(uchar *row_data, size_t length) +{ + /* + When the table has a primary key, we would probably want, by default, to + log only the primary key value instead of the entire "before image". This + would save binlog space. TODO + */ + DBUG_ENTER("Rows_log_event::do_add_row_data"); + DBUG_PRINT("enter", ("row_data:%p length: %lu", row_data, + (ulong) length)); + + /* + If length is zero, there is nothing to write, so we just + return. Note that this is not an optimization, since calling + realloc() with size 0 means free(). + */ + if (length == 0) + { + m_row_count++; + DBUG_RETURN(0); + } + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("row_data", row_data, MY_MIN(length, 32)); +#endif + + DBUG_ASSERT(m_rows_buf <= m_rows_cur); + DBUG_ASSERT(!m_rows_buf || (m_rows_end && m_rows_buf < m_rows_end)); + DBUG_ASSERT(m_rows_cur <= m_rows_end); + + /* The cast will always work since m_rows_cur <= m_rows_end */ + if (static_cast(m_rows_end - m_rows_cur) <= length) + { + size_t const block_size= 1024; + size_t cur_size= m_rows_cur - m_rows_buf; + DBUG_EXECUTE_IF("simulate_too_big_row_case1", + cur_size= UINT_MAX32 - (block_size * 10); + length= UINT_MAX32 - (block_size * 10);); + DBUG_EXECUTE_IF("simulate_too_big_row_case2", + cur_size= UINT_MAX32 - (block_size * 10); + length= block_size * 10;); + DBUG_EXECUTE_IF("simulate_too_big_row_case3", + cur_size= block_size * 10; + length= UINT_MAX32 - (block_size * 10);); + DBUG_EXECUTE_IF("simulate_too_big_row_case4", + cur_size= UINT_MAX32 - (block_size * 10); + length= (block_size * 10) - block_size + 1;); + size_t remaining_space= UINT_MAX32 - cur_size; + /* Check that the new data fits within remaining space and we can add + block_size without wrapping. + */ + if (cur_size > UINT_MAX32 || length > remaining_space || + ((length + block_size) > remaining_space)) + { + sql_print_error("The row data is greater than 4GB, which is too big to " + "write to the binary log."); + DBUG_RETURN(ER_BINLOG_ROW_LOGGING_FAILED); + } + size_t const new_alloc= + block_size * ((cur_size + length + block_size - 1) / block_size); + + uchar* const new_buf= (uchar*)my_realloc(PSI_INSTRUMENT_ME, m_rows_buf, + new_alloc, MYF(MY_ALLOW_ZERO_PTR|MY_WME)); + if (unlikely(!new_buf)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + /* If the memory moved, we need to move the pointers */ + if (new_buf != m_rows_buf) + { + m_rows_buf= new_buf; + m_rows_cur= m_rows_buf + cur_size; + } + + /* + The end pointer should always be changed to point to the end of + the allocated memory. + */ + m_rows_end= m_rows_buf + new_alloc; + } + + DBUG_ASSERT(m_rows_cur + length <= m_rows_end); + memcpy(m_rows_cur, row_data, length); + m_rows_cur+= length; + m_row_count++; + DBUG_RETURN(0); +} + + +#if defined(HAVE_REPLICATION) + +/** + Restores empty table list as it was before trigger processing. + + @note We have a lot of ASSERTS that check the lists when we close tables. + There was the same problem with MERGE MYISAM tables and so here we try to + go the same way. +*/ +inline void restore_empty_query_table_list(LEX *lex) +{ + if (lex->first_not_own_table()) + (*lex->first_not_own_table()->prev_global)= NULL; + lex->query_tables= NULL; + lex->query_tables_last= &lex->query_tables; +} + + +int Rows_log_event::do_apply_event(rpl_group_info *rgi) +{ + Relay_log_info const *rli= rgi->rli; + TABLE* table; + DBUG_ENTER("Rows_log_event::do_apply_event(Relay_log_info*)"); + int error= 0; + LEX *lex= thd->lex; + uint8 new_trg_event_map= get_trg_event_map(); + /* + If m_table_id == ~0ULL, then we have a dummy event that does not + contain any data. In that case, we just remove all tables in the + tables_to_lock list, close the thread tables, and return with + success. + */ + if (m_table_id == ~0ULL) + { + /* + This one is supposed to be set: just an extra check so that + nothing strange has happened. + */ + DBUG_ASSERT(get_flags(STMT_END_F)); + + rgi->slave_close_thread_tables(thd); + thd->clear_error(); + DBUG_RETURN(0); + } + + /* + 'thd' has been set by exec_relay_log_event(), just before calling + do_apply_event(). We still check here to prevent future coding + errors. + */ + DBUG_ASSERT(rgi->thd == thd); + + /* + Where a Query_log_event can rely on the normal command execution logic to + set/reset the slave thread's timer; a Rows_log_event update needs to set + the timer itself + */ + thd->set_query_timer(); + + /* + If there is no locks taken, this is the first binrow event seen + after the table map events. We should then lock all the tables + used in the transaction and proceed with execution of the actual + event. + */ + if (!thd->lock) + { + /* + Lock_tables() reads the contents of thd->lex, so they must be + initialized. + + We also call the THD::reset_for_next_command(), since this + is the logical start of the next "statement". Note that this + call might reset the value of current_stmt_binlog_format, so + we need to do any changes to that value after this function. + */ + delete_explain_query(thd->lex); + lex_start(thd); + thd->reset_for_next_command(); + /* + The current statement is just about to begin and + has not yet modified anything. Note, all.modified is reset + by THD::reset_for_next_command(). + */ + thd->transaction->stmt.modified_non_trans_table= FALSE; + thd->transaction->stmt.m_unsafe_rollback_flags&= ~THD_TRANS::DID_WAIT; + /* + This is a row injection, so we flag the "statement" as + such. Note that this code is called both when the slave does row + injections and when the BINLOG statement is used to do row + injections. + */ + thd->lex->set_stmt_row_injection(); + + /* + There are a few flags that are replicated with each row event. + Make sure to set/clear them before executing the main body of + the event. + */ + if (get_flags(NO_FOREIGN_KEY_CHECKS_F)) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (get_flags(RELAXED_UNIQUE_CHECKS_F)) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + if (get_flags(NO_CHECK_CONSTRAINT_CHECKS_F)) + thd->variables.option_bits|= OPTION_NO_CHECK_CONSTRAINT_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_CHECK_CONSTRAINT_CHECKS; + + /* A small test to verify that objects have consistent types */ + DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); + + DBUG_EXECUTE_IF("rows_log_event_before_open_table", + { + const char action[] = "now SIGNAL before_open_table WAIT_FOR go_ahead_sql"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action))); + };); + + + /* + Trigger's procedures work with global table list. So we have to add + rgi->tables_to_lock content there to get trigger's in the list. + + Then restore_empty_query_table_list() restore the list as it was + */ + DBUG_ASSERT(lex->query_tables == NULL); + if ((lex->query_tables= rgi->tables_to_lock)) + rgi->tables_to_lock->prev_global= &lex->query_tables; + + for (TABLE_LIST *tables= rgi->tables_to_lock; tables; + tables= tables->next_global) + { + if (slave_run_triggers_for_rbr) + { + tables->trg_event_map= new_trg_event_map; + lex->query_tables_last= &tables->next_global; + } + else + { + tables->slave_fk_event_map= new_trg_event_map; + lex->query_tables_last= &tables->next_global; + } + } + + /* + It is needed to set_time(): + 1) it continues the property that "Time" in SHOW PROCESSLIST shows how + much slave is behind + 2) it will be needed when we allow replication from a table with no + TIMESTAMP column to a table with one. + So we call set_time(), like in SBR. Presently it changes nothing. + 3) vers_set_hist_part() requires proper query time. + */ + thd->set_time(when, when_sec_part); + + if (unlikely(open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0))) + { +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d " + "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)", + thd->get_stmt_da()->sql_errno(), + thd->is_fatal_error, + thd->wsrep_cs().mode(), + thd->wsrep_trx().state(), + (long long) wsrep_thd_trx_seqno(thd)); + } +#endif /* WITH_WSREP */ + if (thd->is_error() && + !is_parallel_retry_error(rgi, error= thd->get_stmt_da()->sql_errno())) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications. + We should not honour --slave-skip-errors at this point as we are + having severe errors which should not be skipped. + */ + rli->report(ERROR_LEVEL, error, rgi->gtid_info(), + "Error executing row event: '%s'", + (error ? thd->get_stmt_da()->message() : + "unexpected success or fatal error")); + thd->is_slave_error= 1; + } + /* remove trigger's tables */ + goto err; + } + + /* + When the open and locking succeeded, we check all tables to + ensure that they still have the correct type. + */ + + { + DBUG_PRINT("debug", ("Checking compatibility of tables to lock - tables_to_lock: %p", + rgi->tables_to_lock)); + + /** + When using RBR and MyISAM MERGE tables the base tables that make + up the MERGE table can be appended to the list of tables to lock. + + Thus, we just check compatibility for those that tables that have + a correspondent table map event (ie, those that are actually going + to be accessed while applying the event). That's why the loop stops + at rli->tables_to_lock_count . + + NOTE: The base tables are added here are removed when + close_thread_tables is called. + */ + TABLE_LIST *table_list_ptr= rgi->tables_to_lock; + for (uint i=0 ; table_list_ptr && (i < rgi->tables_to_lock_count); + table_list_ptr= table_list_ptr->next_global, i++) + { + /* + Below if condition takes care of skipping base tables that + make up the MERGE table (which are added by open_tables() + call). They are added next to the merge table in the list. + For eg: If RPL_TABLE_LIST is t3->t1->t2 (where t1 and t2 + are base tables for merge table 't3'), open_tables will modify + the list by adding t1 and t2 again immediately after t3 in the + list (*not at the end of the list*). New table_to_lock list will + look like t3->t1'->t2'->t1->t2 (where t1' and t2' are TABLE_LIST + objects added by open_tables() call). There is no flag(or logic) in + open_tables() that can skip adding these base tables to the list. + So the logic here should take care of skipping them. + + tables_to_lock_count logic will take care of skipping base tables + that are added at the end of the list. + For eg: If RPL_TABLE_LIST is t1->t2->t3, open_tables will modify + the list into t1->t2->t3->t1'->t2'. t1' and t2' will be skipped + because tables_to_lock_count logic in this for loop. + */ + if (table_list_ptr->parent_l) + continue; + /* + We can use a down cast here since we know that every table added + to the tables_to_lock is a RPL_TABLE_LIST (or child table which is + skipped above). + */ + RPL_TABLE_LIST *ptr= static_cast(table_list_ptr); + DBUG_ASSERT(ptr->m_tabledef_valid); + TABLE *conv_table; + if (!ptr->m_tabledef.compatible_with(thd, rgi, ptr->table, &conv_table)) + { + DBUG_PRINT("debug", ("Table: %s.%s is not compatible with master", + ptr->table->s->db.str, + ptr->table->s->table_name.str)); + /* + We should not honour --slave-skip-errors at this point as we are + having severe errors which should not be skiped. + */ + thd->is_slave_error= 1; + /* remove trigger's tables */ + error= ERR_BAD_TABLE_DEF; + goto err; + } + DBUG_PRINT("debug", ("Table: %s.%s is compatible with master" + " - conv_table: %p", + ptr->table->s->db.str, + ptr->table->s->table_name.str, conv_table)); + ptr->m_conv_table= conv_table; + } + } + + /* + ... and then we add all the tables to the table map and but keep + them in the tables to lock list. + + We also invalidate the query cache for all the tables, since + they will now be changed. + + TODO [/Matz]: Maybe the query cache should not be invalidated + here? It might be that a table is not changed, even though it + was locked for the statement. We do know that each + Rows_log_event contain at least one row, so after processing one + Rows_log_event, we can invalidate the query cache for the + associated table. + */ + TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i=0 ; ptr && (i < rgi->tables_to_lock_count); ptr= ptr->next_global, i++) + { + /* + Please see comment in above 'for' loop to know the reason + for this if condition + */ + if (ptr->parent_l) + continue; + rgi->m_table_map.set_table(ptr->table_id, ptr->table); + /* + Following is passing flag about triggers on the server. The problem was + to pass it between table map event and row event. I do it via extended + TABLE_LIST (RPL_TABLE_LIST) but row event uses only TABLE so I need to + find somehow the corresponding TABLE_LIST. + */ + if (m_table_id == ptr->table_id) + { + ptr->table->master_had_triggers= + ((RPL_TABLE_LIST*)ptr)->master_had_triggers; + } + } + +#ifdef HAVE_QUERY_CACHE + /* + Moved invalidation right before the call to rows_event_stmt_cleanup(), + to avoid query cache being polluted with stale entries, + */ +# ifdef WITH_WSREP + /* Query cache is not invalidated on wsrep applier here */ + if (!(WSREP(thd) && wsrep_thd_is_applying(thd))) +# endif /* WITH_WSREP */ + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); +#endif /* HAVE_QUERY_CACHE */ + } + + table= m_table= rgi->m_table_map.get_table(m_table_id); + + DBUG_PRINT("debug", ("m_table:%p, m_table_id: %llu%s", + m_table, m_table_id, + table && master_had_triggers ? + " (master had triggers)" : "")); + if (table) + { + Rows_log_event::Db_restore_ctx restore_ctx(this); + master_had_triggers= table->master_had_triggers; + bool transactional_table= table->file->has_transactions_and_rollback(); + table->file->prepare_for_insert(get_general_type_code() != WRITE_ROWS_EVENT); + + /* + table == NULL means that this table should not be replicated + (this was set up by Table_map_log_event::do_apply_event() + which tested replicate-* rules). + */ + + if (m_width == table->s->fields && bitmap_is_set_all(&m_cols)) + set_flags(COMPLETE_ROWS_F); + + /* + Set tables write and read sets. + + Read_set contains all slave columns (in case we are going to fetch + a complete record from slave) + + Write_set equals the m_cols bitmap sent from master but it can be + longer if slave has extra columns. + */ + + DBUG_PRINT_BITSET("debug", "Setting table's read_set from: %s", &m_cols); + + bitmap_set_all(table->read_set); + if (get_general_type_code() == DELETE_ROWS_EVENT || + get_general_type_code() == UPDATE_ROWS_EVENT) + bitmap_intersect(table->read_set,&m_cols); + + bitmap_set_all(table->write_set); + table->rpl_write_set= table->write_set; + + /* WRITE ROWS EVENTS store the bitmap in m_cols instead of m_cols_ai */ + MY_BITMAP *after_image= ((get_general_type_code() == UPDATE_ROWS_EVENT) ? + &m_cols_ai : &m_cols); + bitmap_intersect(table->write_set, after_image); + + if (table->versioned()) + { + bitmap_set_bit(table->write_set, table->s->vers.start_fieldno); + bitmap_set_bit(table->write_set, table->s->vers.end_fieldno); + } + + this->slave_exec_mode= slave_exec_mode_options; // fix the mode + + // Do event specific preparations + error= do_before_row_operations(rli); + + /* + Bug#56662 Assertion failed: next_insert_id == 0, file handler.cc + Don't allow generation of auto_increment value when processing + rows event by setting 'MODE_NO_AUTO_VALUE_ON_ZERO'. The exception + to this rule happens when the auto_inc column exists on some + extra columns on the slave. In that case, do not force + MODE_NO_AUTO_VALUE_ON_ZERO. + */ + sql_mode_t saved_sql_mode= thd->variables.sql_mode; + if (!is_auto_inc_in_extra_columns()) + thd->variables.sql_mode= MODE_NO_AUTO_VALUE_ON_ZERO; + + // row processing loop + + /* + set the initial time of this ROWS statement if it was not done + before in some other ROWS event. + */ + rgi->set_row_stmt_start_timestamp(); + + THD_STAGE_INFO(thd, stage_executing); + do + { + /* in_use can have been set to NULL in close_tables_for_reopen */ + THD* old_thd= table->in_use; + if (!table->in_use) + table->in_use= thd; + + error= do_exec_row(rgi); + + if (unlikely(error)) + DBUG_PRINT("info", ("error: %s", HA_ERR(error))); + DBUG_ASSERT(error != HA_ERR_RECORD_DELETED); + + table->in_use = old_thd; + + if (unlikely(error)) + { + int actual_error= convert_handler_error(error, thd, table); + bool idempotent_error= (idempotent_error_code(error) && + (slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT)); + bool ignored_error= (idempotent_error == 0 ? + ignored_error_code(actual_error) : 0); + +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_applier && + wsrep_ignored_error_code(this, actual_error)) + { + idempotent_error= true; + thd->wsrep_has_ignored_error= true; + } +#endif /* WITH_WSREP */ + if (idempotent_error || ignored_error) + { + if (global_system_variables.log_warnings) + slave_rows_error_report(WARNING_LEVEL, error, rgi, thd, table, + get_type_str(), + RPL_LOG_NAME, log_pos); + thd->clear_error(1); + error= 0; + if (idempotent_error == 0) + break; + } + } + + /* + If m_curr_row_end was not set during event execution (e.g., because + of errors) we can't proceed to the next row. If the error is transient + (i.e., error==0 at this point) we must call unpack_current_row() to set + m_curr_row_end. + */ + + DBUG_PRINT("info", ("curr_row: %p; curr_row_end: %p; rows_end:%p", + m_curr_row, m_curr_row_end, m_rows_end)); + + if (!m_curr_row_end && likely(!error)) + error= unpack_current_row(rgi); + + m_curr_row= m_curr_row_end; + + if (likely(error == 0) && !transactional_table) + thd->transaction->all.modified_non_trans_table= + thd->transaction->stmt.modified_non_trans_table= TRUE; + if (likely(error == 0)) + { + error= thd->killed_errno(); + if (error && !thd->is_error()) + my_error(error, MYF(0)); + } + } // row processing loop + while (error == 0 && (m_curr_row != m_rows_end)); + + /* + Restore the sql_mode after the rows event is processed. + */ + thd->variables.sql_mode= saved_sql_mode; + + {/** + The following failure injecion works in cooperation with tests + setting @@global.debug= 'd,stop_slave_middle_group'. + The sql thread receives the killed status and will proceed + to shutdown trying to finish incomplete events group. + */ + DBUG_EXECUTE_IF("stop_slave_middle_group", + if (thd->transaction->all.modified_non_trans_table) + const_cast(rli)->abort_slave= 1;); + } + + if (unlikely(error= do_after_row_operations(rli, error)) && + ignored_error_code(convert_handler_error(error, thd, table))) + { + + if (global_system_variables.log_warnings) + slave_rows_error_report(WARNING_LEVEL, error, rgi, thd, table, + get_type_str(), + RPL_LOG_NAME, log_pos); + thd->clear_error(1); + error= 0; + } + } // if (table) + + + if (unlikely(error)) + { + slave_rows_error_report(ERROR_LEVEL, error, rgi, thd, table, + get_type_str(), + RPL_LOG_NAME, log_pos); + /* + @todo We should probably not call + reset_current_stmt_binlog_format_row() from here. + + Note: this applies to log_event_old.cc too. + /Sven + */ + thd->reset_current_stmt_binlog_format_row(); + thd->is_slave_error= 1; + /* remove trigger's tables */ + goto err; + } + + /* remove trigger's tables */ + restore_empty_query_table_list(thd->lex); + +#if defined(WITH_WSREP) && defined(HAVE_QUERY_CACHE) + if (WSREP(thd) && wsrep_thd_is_applying(thd)) + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); +#endif /* WITH_WSREP && HAVE_QUERY_CACHE */ + + if (get_flags(STMT_END_F)) + { + if (unlikely((error= rows_event_stmt_cleanup(rgi, thd)))) + slave_rows_error_report(ERROR_LEVEL, thd->is_error() ? 0 : error, + rgi, thd, table, get_type_str(), + RPL_LOG_NAME, log_pos); + if (thd->slave_thread) + free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); + } + + thd->reset_query_timer(); + DBUG_RETURN(error); + +err: + restore_empty_query_table_list(thd->lex); + rgi->slave_close_thread_tables(thd); + thd->reset_query_timer(); + DBUG_RETURN(error); +} + +Log_event::enum_skip_reason +Rows_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + If the slave skip counter is 1 and this event does not end a + statement, then we should not start executing on the next event. + Otherwise, we defer the decision to the normal skipping logic. + */ + if (rgi->rli->slave_skip_counter == 1 && !get_flags(STMT_END_F)) + return Log_event::EVENT_SKIP_IGNORE; + else + return Log_event::do_shall_skip(rgi); +} + +/** + The function is called at Rows_log_event statement commit time, + normally from Rows_log_event::do_update_pos() and possibly from + Query_log_event::do_apply_event() of the COMMIT. + The function commits the last statement for engines, binlog and + releases resources have been allocated for the statement. + + @retval 0 Ok. + @retval non-zero Error at the commit. + */ + +static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD * thd) +{ + int error; + DBUG_ENTER("rows_event_stmt_cleanup"); + + { + /* + This is the end of a statement or transaction, so close (and + unlock) the tables we opened when processing the + Table_map_log_event starting the statement. + + OBSERVER. This will clear *all* mappings, not only those that + are open for the table. There is not good handle for on-close + actions for tables. + + NOTE. Even if we have no table ('table' == 0) we still need to be + here, so that we increase the group relay log position. If we didn't, we + could have a group relay log position which lags behind "forever" + (assume the last master's transaction is ignored by the slave because of + replicate-ignore rules). + */ + error= thd->binlog_flush_pending_rows_event(TRUE); + + /* + If this event is not in a transaction, the call below will, if some + transactional storage engines are involved, commit the statement into + them and flush the pending event to binlog. + If this event is in a transaction, the call will do nothing, but a + Xid_log_event will come next which will, if some transactional engines + are involved, commit the transaction and flush the pending event to the + binlog. + If there was a deadlock the transaction should have been rolled back + already. So there should be no need to rollback the transaction. + */ + DBUG_ASSERT(! thd->transaction_rollback_request); + error|= (int)(error ? trans_rollback_stmt(thd) : trans_commit_stmt(thd)); + + /* + Now what if this is not a transactional engine? we still need to + flush the pending event to the binlog; we did it with + thd->binlog_flush_pending_rows_event(). Note that we imitate + what is done for real queries: a call to + ha_autocommit_or_rollback() (sometimes only if involves a + transactional engine), and a call to be sure to have the pending + event flushed. + */ + + /* + @todo We should probably not call + reset_current_stmt_binlog_format_row() from here. + + Note: this applies to log_event_old.cc too + + Btw, the previous comment about transactional engines does not + seem related to anything that happens here. + /Sven + */ + thd->reset_current_stmt_binlog_format_row(); + + /* + Reset modified_non_trans_table that we have set in + rows_log_event::do_apply_event() + */ + if (!thd->in_multi_stmt_transaction_mode()) + { + thd->transaction->all.modified_non_trans_table= 0; + thd->transaction->all.m_unsafe_rollback_flags&= ~THD_TRANS::DID_WAIT; + } + + rgi->cleanup_context(thd, 0); + } + DBUG_RETURN(error); +} + +/** + The method either increments the relay log position or + commits the current statement and increments the master group + possition if the event is STMT_END_F flagged and + the statement corresponds to the autocommit query (i.e replicated + without wrapping in BEGIN/COMMIT) + + @retval 0 Success + @retval non-zero Error in the statement commit + */ +int +Rows_log_event::do_update_pos(rpl_group_info *rgi) +{ + Relay_log_info *rli= rgi->rli; + int error= 0; + DBUG_ENTER("Rows_log_event::do_update_pos"); + + DBUG_PRINT("info", ("flags: %s", + get_flags(STMT_END_F) ? "STMT_END_F " : "")); + + if (get_flags(STMT_END_F)) + { + /* + Indicate that a statement is finished. + Step the group log position if we are not in a transaction, + otherwise increase the event log position. + */ + error= rli->stmt_done(log_pos, thd, rgi); + /* + Clear any errors in thd->net.last_err*. It is not known if this is + needed or not. It is believed that any errors that may exist in + thd->net.last_err* are allowed. Examples of errors are "key not + found", which is produced in the test case rpl_row_conflicts.test + */ + thd->clear_error(); + } + else + { + rgi->inc_event_relay_log_pos(); + } + + DBUG_RETURN(error); +} + +#endif /* defined(HAVE_REPLICATION) */ + + +bool Rows_log_event::write_data_header() +{ + uchar buf[ROWS_HEADER_LEN_V2]; // No need to init the buffer + DBUG_ASSERT(m_table_id != ~0ULL); + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return (write_data(buf, 6)); + }); + int6store(buf + RW_MAPID_OFFSET, m_table_id); + int2store(buf + RW_FLAGS_OFFSET, m_flags); + return write_data(buf, ROWS_HEADER_LEN); +} + +bool Rows_log_event::write_data_body() +{ + /* + Note that this should be the number of *bits*, not the number of + bytes. + */ + uchar sbuf[MAX_INT_WIDTH]; + my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf; + bool res= false; + uchar *const sbuf_end= net_store_length(sbuf, (size_t) m_width); + DBUG_ASSERT(static_cast(sbuf_end - sbuf) <= sizeof(sbuf)); + + DBUG_DUMP("m_width", sbuf, (size_t) (sbuf_end - sbuf)); + res= res || write_data(sbuf, (size_t) (sbuf_end - sbuf)); + + DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols)); + res= res || write_data((uchar*)m_cols.bitmap, no_bytes_in_map(&m_cols)); + /* + TODO[refactor write]: Remove the "down cast" here (and elsewhere). + */ + if (get_general_type_code() == UPDATE_ROWS_EVENT) + { + DBUG_DUMP("m_cols_ai", (uchar*) m_cols_ai.bitmap, + no_bytes_in_map(&m_cols_ai)); + res= res || write_data((uchar*)m_cols_ai.bitmap, + no_bytes_in_map(&m_cols_ai)); + } + DBUG_DUMP("rows", m_rows_buf, data_size); + res= res || write_data(m_rows_buf, (size_t) data_size); + + return res; + +} + +bool Rows_log_event::write_compressed() +{ + uchar *m_rows_buf_tmp= m_rows_buf; + uchar *m_rows_cur_tmp= m_rows_cur; + bool ret= true; + uint32 comlen, alloc_size; + comlen= alloc_size= binlog_get_compress_len((uint32)(m_rows_cur_tmp - + m_rows_buf_tmp)); + m_rows_buf= (uchar*) my_safe_alloca(alloc_size); + if(m_rows_buf && + !binlog_buf_compress(m_rows_buf_tmp, m_rows_buf, + (uint32)(m_rows_cur_tmp - m_rows_buf_tmp), &comlen)) + { + m_rows_cur= comlen + m_rows_buf; + ret= Log_event::write(); + } + my_safe_afree(m_rows_buf, alloc_size); + m_rows_buf= m_rows_buf_tmp; + m_rows_cur= m_rows_cur_tmp; + return ret; +} + + +#if defined(HAVE_REPLICATION) +void Rows_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + char const *const flagstr= + get_flags(STMT_END_F) ? " flags: STMT_END_F" : ""; + size_t bytes= my_snprintf(buf, sizeof(buf), + "table_id: %llu%s", m_table_id, flagstr); + protocol->store(buf, bytes, &my_charset_bin); +} +#endif + + +/************************************************************************** + Annotate_rows_log_event member functions +**************************************************************************/ + +Annotate_rows_log_event::Annotate_rows_log_event(THD *thd, + bool using_trans, + bool direct) + : Log_event(thd, 0, using_trans), + m_save_thd_query_txt(0), + m_save_thd_query_len(0), + m_saved_thd_query(false), + m_used_query_txt(0) +{ + m_query_txt= thd->query(); + m_query_len= thd->query_length(); + if (direct) + cache_type= Log_event::EVENT_NO_CACHE; +} + + +bool Annotate_rows_log_event::write_data_header() +{ + return 0; +} + + +bool Annotate_rows_log_event::write_data_body() +{ + return write_data(m_query_txt, m_query_len); +} + + +#if defined(HAVE_REPLICATION) +void Annotate_rows_log_event::pack_info(Protocol* protocol) +{ + if (m_query_txt && m_query_len) + protocol->store(m_query_txt, m_query_len, &my_charset_bin); +} +#endif + + +#if defined(HAVE_REPLICATION) +int Annotate_rows_log_event::do_apply_event(rpl_group_info *rgi) +{ + rgi->free_annotate_event(); + m_save_thd_query_txt= thd->query(); + m_save_thd_query_len= thd->query_length(); + m_saved_thd_query= true; + m_used_query_txt= 1; + thd->set_query(m_query_txt, m_query_len); + return 0; +} +#endif + + +#if defined(HAVE_REPLICATION) +int Annotate_rows_log_event::do_update_pos(rpl_group_info *rgi) +{ + rgi->inc_event_relay_log_pos(); + return 0; +} +#endif + + +#if defined(HAVE_REPLICATION) +Log_event::enum_skip_reason +Annotate_rows_log_event::do_shall_skip(rpl_group_info *rgi) +{ + return continue_group(rgi); +} +#endif + +/************************************************************************** + Table_map_log_event member functions and support functions +**************************************************************************/ + +/** + Save the field metadata based on the real_type of the field. + The metadata saved depends on the type of the field. Some fields + store a single byte for pack_length() while others store two bytes + for field_length (max length). + + @retval 0 Ok. + + @todo + We may want to consider changing the encoding of the information. + Currently, the code attempts to minimize the number of bytes written to + the tablemap. There are at least two other alternatives; 1) using + net_store_length() to store the data allowing it to choose the number of + bytes that are appropriate thereby making the code much easier to + maintain (only 1 place to change the encoding), or 2) use a fixed number + of bytes for each field. The problem with option 1 is that net_store_length() + will use one byte if the value < 251, but 3 bytes if it is > 250. Thus, + for fields like CHAR which can be no larger than 255 characters, the method + will use 3 bytes when the value is > 250. Further, every value that is + encoded using 2 parts (e.g., pack_length, field_length) will be numerically + > 250 therefore will use 3 bytes for eah value. The problem with option 2 + is less wasteful for space but does waste 1 byte for every field that does + not encode 2 parts. +*/ +int Table_map_log_event::save_field_metadata() +{ + DBUG_ENTER("Table_map_log_event::save_field_metadata"); + int index= 0; + Binlog_type_info *info; + for (unsigned int i= 0 ; i < m_table->s->fields ; i++) + { + DBUG_PRINT("debug", ("field_type: %d", m_coltype[i])); + info= binlog_type_info_array + i; + int2store(&m_field_metadata[index], info->m_metadata); + index+= info->m_metadata_size; + DBUG_EXECUTE_IF("inject_invalid_blob_size", + { + if (m_coltype[i] == MYSQL_TYPE_BLOB) + m_field_metadata[index-1] = 5; + }); + } + DBUG_RETURN(index); +} + + +/* + Constructor used to build an event for writing to the binary log. + Mats says tbl->s lives longer than this event so it's ok to copy pointers + (tbl->s->db etc) and not pointer content. + */ +Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, + bool is_transactional) + : Log_event(thd, 0, is_transactional), + m_table(tbl), + m_dbnam(tbl->s->db.str), + m_dblen(m_dbnam ? tbl->s->db.length : 0), + m_tblnam(tbl->s->table_name.str), + m_tbllen(tbl->s->table_name.length), + m_colcnt(tbl->s->fields), + m_memory(NULL), + m_table_id(tid), + m_flags(TM_BIT_LEN_EXACT_F), + m_data_size(0), + m_field_metadata(0), + m_field_metadata_size(0), + m_null_bits(0), + m_meta_memory(NULL), + m_optional_metadata_len(0), + m_optional_metadata(NULL) +{ + uchar cbuf[MAX_INT_WIDTH]; + uchar *cbuf_end; + DBUG_ENTER("Table_map_log_event::Table_map_log_event(TABLE)"); + DBUG_ASSERT(m_table_id != ~0ULL); + /* + In TABLE_SHARE, "db" and "table_name" are 0-terminated (see this comment in + table.cc / alloc_table_share(): + Use the fact the key is db/0/table_name/0 + As we rely on this let's assert it. + */ + DBUG_ASSERT((tbl->s->db.str == 0) || + (tbl->s->db.str[tbl->s->db.length] == 0)); + DBUG_ASSERT(tbl->s->table_name.str[tbl->s->table_name.length] == 0); + + binlog_type_info_array= (Binlog_type_info *)thd->alloc(m_table->s->fields * + sizeof(Binlog_type_info)); + for (uint i= 0; i < m_table->s->fields; i++) + binlog_type_info_array[i]= m_table->field[i]->binlog_type_info(); + + m_data_size= TABLE_MAP_HEADER_LEN; + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", m_data_size= 6;); + m_data_size+= m_dblen + 2; // Include length and terminating \0 + m_data_size+= m_tbllen + 2; // Include length and terminating \0 + cbuf_end= net_store_length(cbuf, (size_t) m_colcnt); + DBUG_ASSERT(static_cast(cbuf_end - cbuf) <= sizeof(cbuf)); + m_data_size+= (cbuf_end - cbuf) + m_colcnt; // COLCNT and column types + + if (tbl->triggers) + m_flags|= TM_BIT_HAS_TRIGGERS_F; + + /* If malloc fails, caught in is_valid() */ + if ((m_memory= (uchar*) my_malloc(PSI_INSTRUMENT_ME, m_colcnt, MYF(MY_WME)))) + { + m_coltype= reinterpret_cast(m_memory); + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + m_coltype[i]= binlog_type_info_array[i].m_type_code; + DBUG_EXECUTE_IF("inject_invalid_column_type", m_coltype[1]= 230;); + } + + /* + Calculate a bitmap for the results of maybe_null() for all columns. + The bitmap is used to determine when there is a column from the master + that is not on the slave and is null and thus not in the row data during + replication. + */ + uint num_null_bytes= (m_table->s->fields + 7) / 8; + m_data_size+= num_null_bytes; + m_meta_memory= (uchar *)my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &m_null_bits, num_null_bytes, + &m_field_metadata, (m_colcnt * 2), + NULL); + + bzero(m_field_metadata, (m_colcnt * 2)); + + /* + Create an array for the field metadata and store it. + */ + m_field_metadata_size= save_field_metadata(); + DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2)); + + /* + Now set the size of the data to the size of the field metadata array + plus one or three bytes (see pack.c:net_store_length) for number of + elements in the field metadata array. + */ + if (m_field_metadata_size < 251) + m_data_size+= m_field_metadata_size + 1; + else + m_data_size+= m_field_metadata_size + 3; + + bzero(m_null_bits, num_null_bytes); + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + if (m_table->field[i]->maybe_null()) + m_null_bits[(i / 8)]+= 1 << (i % 8); + + init_metadata_fields(); + m_data_size+= m_metadata_buf.length(); + + DBUG_VOID_RETURN; +} + + +/* + Return value is an error code, one of: + + -1 Failure to open table [from open_tables()] + 0 Success + 1 No room for more tables [from set_table()] + 2 Out of memory [from set_table()] + 3 Wrong table definition + 4 Daisy-chaining RBR with SBR not possible + */ + +#if defined(HAVE_REPLICATION) + +enum enum_tbl_map_status +{ + /* no duplicate identifier found */ + OK_TO_PROCESS= 0, + + /* this table map must be filtered out */ + FILTERED_OUT= 1, + + /* identifier mapping table with different properties */ + SAME_ID_MAPPING_DIFFERENT_TABLE= 2, + + /* a duplicate identifier was found mapping the same table */ + SAME_ID_MAPPING_SAME_TABLE= 3 +}; + +/* + Checks if this table map event should be processed or not. First + it checks the filtering rules, and then looks for duplicate identifiers + in the existing list of rli->tables_to_lock. + + It checks that there hasn't been any corruption by verifying that there + are no duplicate entries with different properties. + + In some cases, some binary logs could get corrupted, showing several + tables mapped to the same table_id, 0 (see: BUG#56226). Thus we do this + early sanity check for such cases and avoid that the server crashes + later. + + In some corner cases, the master logs duplicate table map events, i.e., + same id, same database name, same table name (see: BUG#37137). This is + different from the above as it's the same table that is mapped again + to the same identifier. Thus we cannot just check for same ids and + assume that the event is corrupted we need to check every property. + + NOTE: in the event that BUG#37137 ever gets fixed, this extra check + will still be valid because we would need to support old binary + logs anyway. + + @param rli The relay log info reference. + @param table_list A list element containing the table to check against. + @return OK_TO_PROCESS + if there was no identifier already in rli->tables_to_lock + + FILTERED_OUT + if the event is filtered according to the filtering rules + + SAME_ID_MAPPING_DIFFERENT_TABLE + if the same identifier already maps a different table in + rli->tables_to_lock + + SAME_ID_MAPPING_SAME_TABLE + if the same identifier already maps the same table in + rli->tables_to_lock. +*/ +static enum_tbl_map_status +check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list) +{ + DBUG_ENTER("check_table_map"); + enum_tbl_map_status res= OK_TO_PROCESS; + Relay_log_info *rli= rgi->rli; + if ((rgi->thd->slave_thread /* filtering is for slave only */ || + IF_WSREP((WSREP(rgi->thd) && rgi->thd->wsrep_applier), 0)) && + (!rli->mi->rpl_filter->db_ok(table_list->db.str) || + (rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list)))) + res= FILTERED_OUT; + else + { + RPL_TABLE_LIST *ptr= static_cast(rgi->tables_to_lock); + for(uint i=0 ; ptr && (i< rgi->tables_to_lock_count); + ptr= static_cast(ptr->next_local), i++) + { + if (ptr->table_id == table_list->table_id) + { + + if (cmp(&ptr->db, &table_list->db) || + cmp(&ptr->alias, &table_list->table_name) || + ptr->lock_type != TL_WRITE) // the ::do_apply_event always sets TL_WRITE + res= SAME_ID_MAPPING_DIFFERENT_TABLE; + else + res= SAME_ID_MAPPING_SAME_TABLE; + + break; + } + } + } + + DBUG_PRINT("debug", ("check of table map ended up with: %u", res)); + + DBUG_RETURN(res); +} + +int Table_map_log_event::do_apply_event(rpl_group_info *rgi) +{ + RPL_TABLE_LIST *table_list; + char *db_mem, *tname_mem, *ptr; + size_t dummy_len, db_mem_length, tname_mem_length; + void *memory; + Rpl_filter *filter; + Relay_log_info const *rli= rgi->rli; + DBUG_ENTER("Table_map_log_event::do_apply_event(Relay_log_info*)"); + + /* Step the query id to mark what columns that are actually used. */ + thd->set_query_id(next_query_id()); + + if (!(memory= my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &table_list, (uint) sizeof(RPL_TABLE_LIST), + &db_mem, (uint) NAME_LEN + 1, + &tname_mem, (uint) NAME_LEN + 1, + NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + db_mem_length= strmov(db_mem, m_dbnam) - db_mem; + tname_mem_length= strmov(tname_mem, m_tblnam) - tname_mem; + if (lower_case_table_names) + { + my_casedn_str(files_charset_info, (char*)tname_mem); + my_casedn_str(files_charset_info, (char*)db_mem); + } + + /* call from mysql_client_binlog_statement() will not set rli->mi */ + filter= rgi->thd->slave_thread ? rli->mi->rpl_filter : global_rpl_filter; + + /* rewrite rules changed the database */ + if (((ptr= (char*) filter->get_rewrite_db(db_mem, &dummy_len)) != db_mem)) + db_mem_length= strmov(db_mem, ptr) - db_mem; + + LEX_CSTRING tmp_db_name= {db_mem, db_mem_length }; + LEX_CSTRING tmp_tbl_name= {tname_mem, tname_mem_length }; + + table_list->init_one_table(&tmp_db_name, &tmp_tbl_name, 0, TL_WRITE); + table_list->table_id= DBUG_IF("inject_tblmap_same_id_maps_diff_table") ? 0 : m_table_id; + table_list->updating= 1; + table_list->required_type= TABLE_TYPE_NORMAL; + + DBUG_PRINT("debug", ("table: %s is mapped to %llu", + table_list->table_name.str, + table_list->table_id)); + table_list->master_had_triggers= ((m_flags & TM_BIT_HAS_TRIGGERS_F) ? 1 : 0); + DBUG_PRINT("debug", ("table->master_had_triggers=%d", + (int)table_list->master_had_triggers)); + + enum_tbl_map_status tblmap_status= check_table_map(rgi, table_list); + if (tblmap_status == OK_TO_PROCESS) + { + DBUG_ASSERT(thd->lex->query_tables != table_list); + + /* + Use placement new to construct the table_def instance in the + memory allocated for it inside table_list. + + The memory allocated by the table_def structure (i.e., not the + memory allocated *for* the table_def structure) is released + inside Relay_log_info::clear_tables_to_lock() by calling the + table_def destructor explicitly. + */ + new (&table_list->m_tabledef) + table_def(m_coltype, m_colcnt, + m_field_metadata, m_field_metadata_size, + m_null_bits, m_flags); + table_list->m_tabledef_valid= TRUE; + table_list->m_conv_table= NULL; + table_list->open_type= OT_BASE_ONLY; + + /* + We record in the slave's information that the table should be + locked by linking the table into the list of tables to lock. + */ + table_list->next_global= table_list->next_local= rgi->tables_to_lock; + rgi->tables_to_lock= table_list; + rgi->tables_to_lock_count++; + /* 'memory' is freed in clear_tables_to_lock */ + } + else // FILTERED_OUT, SAME_ID_MAPPING_* + { + /* + If mapped already but with different properties, we raise an + error. + If mapped already but with same properties we skip the event. + If filtered out we skip the event. + + In all three cases, we need to free the memory previously + allocated. + */ + if (tblmap_status == SAME_ID_MAPPING_DIFFERENT_TABLE) + { + /* + Something bad has happened. We need to stop the slave as strange things + could happen if we proceed: slave crash, wrong table being updated, ... + As a consequence we push an error in this case. + */ + + char buf[256]; + + my_snprintf(buf, sizeof(buf), + "Found table map event mapping table id %u which " + "was already mapped but with different settings.", + table_list->table_id); + + if (thd->slave_thread) + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), + ER_THD(thd, ER_SLAVE_FATAL_ERROR), buf); + else + /* + For the cases in which a 'BINLOG' statement is set to + execute in a user session + */ + my_error(ER_SLAVE_FATAL_ERROR, MYF(0), buf); + } + + my_free(memory); + } + + DBUG_RETURN(tblmap_status == SAME_ID_MAPPING_DIFFERENT_TABLE); +} + +Log_event::enum_skip_reason +Table_map_log_event::do_shall_skip(rpl_group_info *rgi) +{ + /* + If the slave skip counter is 1, then we should not start executing + on the next event. + */ + return continue_group(rgi); +} + +int Table_map_log_event::do_update_pos(rpl_group_info *rgi) +{ + rgi->inc_event_relay_log_pos(); + return 0; +} + +#endif /* defined(HAVE_REPLICATION) */ + +bool Table_map_log_event::write_data_header() +{ + DBUG_ASSERT(m_table_id != ~0ULL); + uchar buf[TABLE_MAP_HEADER_LEN]; + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return (write_data(buf, 6)); + }); + int6store(buf + TM_MAPID_OFFSET, m_table_id); + int2store(buf + TM_FLAGS_OFFSET, m_flags); + return write_data(buf, TABLE_MAP_HEADER_LEN); +} + +bool Table_map_log_event::write_data_body() +{ + DBUG_ASSERT(m_dbnam != NULL); + DBUG_ASSERT(m_tblnam != NULL); + /* We use only one byte per length for storage in event: */ + DBUG_ASSERT(m_dblen <= MY_MIN(NAME_LEN, 255)); + DBUG_ASSERT(m_tbllen <= MY_MIN(NAME_LEN, 255)); + + uchar const dbuf[]= { (uchar) m_dblen }; + uchar const tbuf[]= { (uchar) m_tbllen }; + + uchar cbuf[MAX_INT_WIDTH]; + uchar *const cbuf_end= net_store_length(cbuf, (size_t) m_colcnt); + DBUG_ASSERT(static_cast(cbuf_end - cbuf) <= sizeof(cbuf)); + + /* + Store the size of the field metadata. + */ + uchar mbuf[MAX_INT_WIDTH]; + uchar *const mbuf_end= net_store_length(mbuf, m_field_metadata_size); + + return write_data(dbuf, sizeof(dbuf)) || + write_data(m_dbnam, m_dblen+1) || + write_data(tbuf, sizeof(tbuf)) || + write_data(m_tblnam, m_tbllen+1) || + write_data(cbuf, (size_t) (cbuf_end - cbuf)) || + write_data(m_coltype, m_colcnt) || + write_data(mbuf, (size_t) (mbuf_end - mbuf)) || + write_data(m_field_metadata, m_field_metadata_size), + write_data(m_null_bits, (m_colcnt + 7) / 8) || + write_data((const uchar*) m_metadata_buf.ptr(), + m_metadata_buf.length()); + } + +/** + stores an integer into packed format. + + @param[out] str_buf a buffer where the packed integer will be stored. + @param[in] length the integer will be packed. + */ +static inline +void store_compressed_length(String &str_buf, ulonglong length) +{ + // Store Type and packed length + uchar buf[4]; + uchar *buf_ptr = net_store_length(buf, length); + + str_buf.append(reinterpret_cast(buf), buf_ptr-buf); +} + +/** + Write data into str_buf with Type|Length|Value(TLV) format. + + @param[out] str_buf a buffer where the field is stored. + @param[in] type type of the field + @param[in] length length of the field value + @param[in] value value of the field +*/ +static inline +bool write_tlv_field(String &str_buf, + enum Table_map_log_event::Optional_metadata_field_type + type, uint length, const uchar *value) +{ + /* type is stored in one byte, so it should never bigger than 255. */ + DBUG_ASSERT(static_cast(type) <= 255); + str_buf.append((char) type); + store_compressed_length(str_buf, length); + return str_buf.append(reinterpret_cast(value), length); +} + +/** + Write data into str_buf with Type|Length|Value(TLV) format. + + @param[out] str_buf a buffer where the field is stored. + @param[in] type type of the field + @param[in] value value of the field +*/ +static inline +bool write_tlv_field(String &str_buf, + enum Table_map_log_event::Optional_metadata_field_type + type, const String &value) +{ + return write_tlv_field(str_buf, type, value.length(), + reinterpret_cast(value.ptr())); +} + +static inline bool is_character_field(Binlog_type_info *info_array, Field *field) +{ + Binlog_type_info *info= info_array + field->field_index; + if (!info->m_cs) + return 0; + if (info->m_set_typelib || info->m_enum_typelib) + return 0; + return 1; +} + +static inline bool is_enum_or_set_field(Binlog_type_info *info_array, Field *field) { + Binlog_type_info *info= info_array + field->field_index; + if (info->m_set_typelib || info->m_enum_typelib) + return 1; + return 0; +} + + +void Table_map_log_event::init_metadata_fields() +{ + DBUG_ENTER("init_metadata_fields"); + DBUG_EXECUTE_IF("simulate_no_optional_metadata", DBUG_VOID_RETURN;); + + if (binlog_row_metadata == BINLOG_ROW_METADATA_NO_LOG) + DBUG_VOID_RETURN; + if (init_signedness_field() || + init_charset_field(&is_character_field, DEFAULT_CHARSET, + COLUMN_CHARSET) || + init_geometry_type_field()) + { + m_metadata_buf.length(0); + DBUG_VOID_RETURN; + } + + if (binlog_row_metadata == BINLOG_ROW_METADATA_FULL) + { + if ((!DBUG_IF("dont_log_column_name") && init_column_name_field()) || + init_charset_field(&is_enum_or_set_field, ENUM_AND_SET_DEFAULT_CHARSET, + ENUM_AND_SET_COLUMN_CHARSET) || + init_set_str_value_field() || + init_enum_str_value_field() || + init_primary_key_field()) + m_metadata_buf.length(0); + } + DBUG_VOID_RETURN; +} + +bool Table_map_log_event::init_signedness_field() +{ + /* use it to store signed flags, each numeric column take a bit. */ + StringBuffer<128> buf; + unsigned char flag= 0; + unsigned char mask= 0x80; + Binlog_type_info *info; + + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + info= binlog_type_info_array + i; + if (info->m_signedness != Binlog_type_info::SIGN_NOT_APPLICABLE) + { + if (info->m_signedness == Binlog_type_info::SIGN_UNSIGNED) + flag|= mask; + mask >>= 1; + + // 8 fields are tested, store the result and clear the flag. + if (mask == 0) + { + buf.append(flag); + flag= 0; + mask= 0x80; + } + } + } + + // Stores the signedness flags of last few columns + if (mask != 0x80) + buf.append(flag); + + // The table has no numeric column, so don't log SIGNEDNESS field + if (buf.is_empty()) + return false; + + return write_tlv_field(m_metadata_buf, SIGNEDNESS, buf); +} + +bool Table_map_log_event::init_charset_field( + bool (* include_type)(Binlog_type_info *, Field *), + Optional_metadata_field_type default_charset_type, + Optional_metadata_field_type column_charset_type) +{ + DBUG_EXECUTE_IF("simulate_init_charset_field_error", return true;); + + std::map collation_map; + // For counting characters columns + uint char_col_cnt= 0; + + /* Find the collation number used by most fields */ + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + if ((*include_type)(binlog_type_info_array, m_table->field[i])) + { + collation_map[binlog_type_info_array[i].m_cs->number]++; + char_col_cnt++; + } + } + + if (char_col_cnt == 0) + return false; + + /* Find the most used collation */ + uint most_used_collation= 0; + uint most_used_count= 0; + for (std::map::iterator it= collation_map.begin(); + it != collation_map.end(); it++) + { + if (it->second > most_used_count) + { + most_used_count= it->second; + most_used_collation= it->first; + } + } + + /* + Comparing length of COLUMN_CHARSET field and COLUMN_CHARSET_WITH_DEFAULT + field to decide which field should be logged. + + Length of COLUMN_CHARSET = character column count * collation id size. + Length of COLUMN_CHARSET_WITH_DEFAULT = + default collation_id size + count of columns not use default charset * + (column index size + collation id size) + + Assume column index just uses 1 byte and collation number also uses 1 byte. + */ + if (char_col_cnt * 1 < (1 + (char_col_cnt - most_used_count) * 2)) + { + StringBuffer<512> buf; + + /* + Stores character set information into COLUMN_CHARSET format, + character sets of all columns are stored one by one. + ----------------------------------------- + | Charset number | .... |Charset number | + ----------------------------------------- + */ + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + if (include_type(binlog_type_info_array, m_table->field[i])) + store_compressed_length(buf, binlog_type_info_array[i].m_cs->number); + } + return write_tlv_field(m_metadata_buf, column_charset_type, buf); + } + else + { + StringBuffer<512> buf; + uint char_column_index= 0; + uint default_collation= most_used_collation; + + /* + Stores character set information into DEFAULT_CHARSET format, + First stores the default character set, and then stores the character + sets different to default character with their column index one by one. + -------------------------------------------------------- + | Default Charset | Col Index | Charset number | ... | + -------------------------------------------------------- + */ + + // Store the default collation number + store_compressed_length(buf, default_collation); + + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + if (include_type(binlog_type_info_array, m_table->field[i])) + { + CHARSET_INFO *cs= binlog_type_info_array[i].m_cs; + DBUG_ASSERT(cs); + if (cs->number != default_collation) + { + store_compressed_length(buf, char_column_index); + store_compressed_length(buf, cs->number); + } + char_column_index++; + } + } + return write_tlv_field(m_metadata_buf, default_charset_type, buf); + } +} + +bool Table_map_log_event::init_column_name_field() +{ + StringBuffer<2048> buf; + + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + size_t len= m_table->field[i]->field_name.length; + + store_compressed_length(buf, len); + buf.append(m_table->field[i]->field_name.str, len); + } + return write_tlv_field(m_metadata_buf, COLUMN_NAME, buf); +} + +bool Table_map_log_event::init_set_str_value_field() +{ + StringBuffer<1024> buf; + TYPELIB *typelib; + + /* + SET string values are stored in the same format: + ---------------------------------------------- + | Value number | value1 len | value 1| .... | // first SET column + ---------------------------------------------- + | Value number | value1 len | value 1| .... | // second SET column + ---------------------------------------------- + */ + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + if ((typelib= binlog_type_info_array[i].m_set_typelib)) + { + store_compressed_length(buf, typelib->count); + for (unsigned int i= 0; i < typelib->count; i++) + { + store_compressed_length(buf, typelib->type_lengths[i]); + buf.append(typelib->type_names[i], typelib->type_lengths[i]); + } + } + } + if (buf.length() > 0) + return write_tlv_field(m_metadata_buf, SET_STR_VALUE, buf); + return false; +} + +bool Table_map_log_event::init_enum_str_value_field() +{ + StringBuffer<1024> buf; + TYPELIB *typelib; + + /* ENUM is same to SET columns, see comment in init_set_str_value_field */ + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + if ((typelib= binlog_type_info_array[i].m_enum_typelib)) + { + store_compressed_length(buf, typelib->count); + for (unsigned int i= 0; i < typelib->count; i++) + { + store_compressed_length(buf, typelib->type_lengths[i]); + buf.append(typelib->type_names[i], typelib->type_lengths[i]); + } + } + } + + if (buf.length() > 0) + return write_tlv_field(m_metadata_buf, ENUM_STR_VALUE, buf); + return false; +} + +bool Table_map_log_event::init_geometry_type_field() +{ + StringBuffer<256> buf; + uint geom_type; + + /* Geometry type of geometry columns is stored one by one as packed length */ + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + { + if (binlog_type_info_array[i].m_type_code == MYSQL_TYPE_GEOMETRY) + { + geom_type= binlog_type_info_array[i].m_geom_type; + DBUG_EXECUTE_IF("inject_invalid_geometry_type", geom_type= 100;); + store_compressed_length(buf, geom_type); + } + } + + if (buf.length() > 0) + return write_tlv_field(m_metadata_buf, GEOMETRY_TYPE, buf); + return false; +} + +bool Table_map_log_event::init_primary_key_field() +{ + DBUG_EXECUTE_IF("simulate_init_primary_key_field_error", return true;); + + if (unlikely(m_table->s->primary_key == MAX_KEY)) + return false; + + // If any key column uses prefix like KEY(c1(10)) */ + bool has_prefix= false; + KEY *pk= m_table->key_info + m_table->s->primary_key; + + DBUG_ASSERT(pk->user_defined_key_parts > 0); + + /* Check if any key column uses prefix */ + for (uint i= 0; i < pk->user_defined_key_parts; i++) + { + KEY_PART_INFO *key_part= pk->key_part+i; + if (key_part->length != m_table->field[key_part->fieldnr-1]->key_length()) + { + has_prefix= true; + break; + } + } + + StringBuffer<128> buf; + + if (!has_prefix) + { + /* Index of PK columns are stored one by one. */ + for (uint i= 0; i < pk->user_defined_key_parts; i++) + { + KEY_PART_INFO *key_part= pk->key_part+i; + store_compressed_length(buf, key_part->fieldnr-1); + } + return write_tlv_field(m_metadata_buf, SIMPLE_PRIMARY_KEY, buf); + } + else + { + /* Index of PK columns are stored with a prefix length one by one. */ + for (uint i= 0; i < pk->user_defined_key_parts; i++) + { + KEY_PART_INFO *key_part= pk->key_part+i; + size_t prefix= 0; + + store_compressed_length(buf, key_part->fieldnr-1); + + // Store character length but not octet length + if (key_part->length != m_table->field[key_part->fieldnr-1]->key_length()) + prefix= key_part->length / key_part->field->charset()->mbmaxlen; + store_compressed_length(buf, prefix); + } + return write_tlv_field(m_metadata_buf, PRIMARY_KEY_WITH_PREFIX, buf); + } +} + +#if defined(HAVE_REPLICATION) +/* + Print some useful information for the SHOW BINARY LOG information + field. + */ + +void Table_map_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + size_t bytes= my_snprintf(buf, sizeof(buf), + "table_id: %llu (%s.%s)", + m_table_id, m_dbnam, m_tblnam); + protocol->store(buf, bytes, &my_charset_bin); +} +#endif + + +/************************************************************************** + Write_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +Write_rows_log_event::Write_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid_arg, + bool is_transactional) + :Rows_log_event(thd_arg, tbl_arg, tid_arg, tbl_arg->rpl_write_set, + is_transactional, WRITE_ROWS_EVENT_V1) +{ +} + +Write_rows_compressed_log_event::Write_rows_compressed_log_event( + THD *thd_arg, + TABLE *tbl_arg, + ulong tid_arg, + bool is_transactional) + : Write_rows_log_event(thd_arg, tbl_arg, tid_arg, is_transactional) +{ + m_type = WRITE_ROWS_COMPRESSED_EVENT_V1; +} + +bool Write_rows_compressed_log_event::write() +{ + return Rows_log_event::write_compressed(); +} + + +#if defined(HAVE_REPLICATION) +int +Write_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const) +{ + int error= 0; + + /* + Increment the global status insert count variable + */ + if (get_flags(STMT_END_F)) + status_var_increment(thd->status_var.com_stat[SQLCOM_INSERT]); + + /** + todo: to introduce a property for the event (handler?) which forces + applying the event in the replace (idempotent) fashion. + */ + if (slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) + { + /* + We are using REPLACE semantics and not INSERT IGNORE semantics + when writing rows, that is: new rows replace old rows. We need to + inform the storage engine that it should use this behaviour. + */ + + /* Tell the storage engine that we are using REPLACE semantics. */ + thd->lex->duplicates= DUP_REPLACE; + + /* + Pretend we're executing a REPLACE command: this is needed for + InnoDB since it is not (properly) checking the lex->duplicates flag. + */ + thd->lex->sql_command= SQLCOM_REPLACE; + /* + Do not raise the error flag in case of hitting to an unique attribute + */ + m_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + /* + The following is needed in case if we have AFTER DELETE triggers. + */ + m_table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + m_table->file->extra(HA_EXTRA_IGNORE_NO_KEY); + } + if (m_table->triggers && do_invoke_trigger()) + m_table->prepare_triggers_for_insert_stmt_or_event(); + + /* Honor next number column if present */ + m_table->next_number_field= m_table->found_next_number_field; + /* + * Fixed Bug#45999, In RBR, Store engine of Slave auto-generates new + * sequence numbers for auto_increment fields if the values of them are 0. + * If generateing a sequence number is decided by the values of + * table->auto_increment_field_not_null and SQL_MODE(if includes + * MODE_NO_AUTO_VALUE_ON_ZERO) in update_auto_increment function. + * SQL_MODE of slave sql thread is always consistency with master's. + * In RBR, auto_increment fields never are NULL, except if the auto_inc + * column exists only on the slave side (i.e., in an extra column + * on the slave's table). + */ + if (!is_auto_inc_in_extra_columns()) + m_table->auto_increment_field_not_null= TRUE; + else + { + /* + Here we have checked that there is an extra field + on this server's table that has an auto_inc column. + + Mark that the auto_increment field is null and mark + the read and write set bits. + + (There can only be one AUTO_INC column, it is always + indexed and it cannot have a DEFAULT value). + */ + m_table->auto_increment_field_not_null= FALSE; + m_table->mark_auto_increment_column(); + } + + return error; +} + +int +Write_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const, + int error) +{ + int local_error= 0; + + /** + Clear the write_set bit for auto_inc field that only + existed on the destination table as an extra column. + */ + if (is_auto_inc_in_extra_columns()) + { + bitmap_clear_bit(m_table->rpl_write_set, + m_table->next_number_field->field_index); + bitmap_clear_bit(m_table->read_set, + m_table->next_number_field->field_index); + + if (get_flags(STMT_END_F)) + m_table->file->ha_release_auto_increment(); + } + m_table->next_number_field=0; + m_table->auto_increment_field_not_null= FALSE; + if (slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) + { + m_table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + m_table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + /* + resetting the extra with + table->file->extra(HA_EXTRA_NO_IGNORE_NO_KEY); + fires bug#27077 + explanation: file->reset() performs this duty + ultimately. Still todo: fix + */ + } + if (unlikely((local_error= m_table->file->ha_end_bulk_insert()))) + { + m_table->file->print_error(local_error, MYF(0)); + } + return error? error : local_error; +} + +bool Rows_log_event::process_triggers(trg_event_type event, + trg_action_time_type time_type, + bool old_row_is_record1) +{ + bool result; + DBUG_ENTER("Rows_log_event::process_triggers"); + m_table->triggers->mark_fields_used(event); + if (slave_run_triggers_for_rbr == SLAVE_RUN_TRIGGERS_FOR_RBR_YES) + { + result= m_table->triggers->process_triggers(thd, event, + time_type, + old_row_is_record1); + } + else + result= m_table->triggers->process_triggers(thd, event, + time_type, + old_row_is_record1); + + DBUG_RETURN(result); +} +/* + Check if there are more UNIQUE keys after the given key. +*/ +static int +last_uniq_key(TABLE *table, uint keyno) +{ + while (++keyno < table->s->keys) + if (table->key_info[keyno].flags & HA_NOSAME) + return 0; + return 1; +} + +/** + Check if an error is a duplicate key error. + + This function is used to check if an error code is one of the + duplicate key error, i.e., and error code for which it is sensible + to do a get_dup_key() to retrieve the duplicate key. + + @param errcode The error code to check. + + @return true if the error code is such that + get_dup_key() will return true, false + otherwise. + */ +bool +is_duplicate_key_error(int errcode) +{ + switch (errcode) + { + case HA_ERR_FOUND_DUPP_KEY: + case HA_ERR_FOUND_DUPP_UNIQUE: + return true; + } + return false; +} + +/** + Write the current row into event's table. + + The row is located in the row buffer, pointed by @c m_curr_row member. + Number of columns of the row is stored in @c m_width member (it can be + different from the number of columns in the table to which we insert). + Bitmap @c m_cols indicates which columns are present in the row. It is assumed + that event's table is already open and pointed by @c m_table. + + If the same record already exists in the table it can be either overwritten + or an error is reported depending on the value of @c overwrite flag + (error reporting not yet implemented). Note that the matching record can be + different from the row we insert if we use primary keys to identify records in + the table. + + The row to be inserted can contain values only for selected columns. The + missing columns are filled with default values using @c prepare_record() + function. If a matching record is found in the table and @c overwritte is + true, the missing columns are taken from it. + + @param rli Relay log info (needed for row unpacking). + @param overwrite + Shall we overwrite if the row already exists or signal + error (currently ignored). + + @returns Error code on failure, 0 on success. + + This method, if successful, sets @c m_curr_row_end pointer to point at the + next row in the rows buffer. This is done when unpacking the row to be + inserted. + + @note If a matching record is found, it is either updated using + @c ha_update_row() or first deleted and then new record written. +*/ + +int +Rows_log_event::write_row(rpl_group_info *rgi, + const bool overwrite) +{ + DBUG_ENTER("write_row"); + DBUG_ASSERT(m_table != NULL && thd != NULL); + + TABLE *table= m_table; // pointer to event's table + int error; + int UNINIT_VAR(keynum); + const bool invoke_triggers= (m_table->triggers && do_invoke_trigger()); + auto_afree_ptr key(NULL); + + prepare_record(table, m_width, true); + + /* unpack row into table->record[0] */ + if (unlikely((error= unpack_current_row(rgi)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + + if (m_curr_row == m_rows_buf && !invoke_triggers && !table->s->long_unique_table) + { + /* + This table has no triggers so we can do bulk insert. + + This is the first row to be inserted, we estimate the rows with + the size of the first row and use that value to initialize + storage engine for bulk insertion. + */ + /* this is the first row to be inserted, we estimate the rows with + the size of the first row and use that value to initialize + storage engine for bulk insertion */ + DBUG_ASSERT(!(m_curr_row > m_curr_row_end)); + ha_rows estimated_rows= 0; + if (m_curr_row < m_curr_row_end) + estimated_rows= (m_rows_end - m_curr_row) / (m_curr_row_end - m_curr_row); + else if (m_curr_row == m_curr_row_end) + estimated_rows= 1; + + table->file->ha_start_bulk_insert(estimated_rows); + } + + /* + Explicitly set the auto_inc to null to make sure that + it gets an auto_generated value. + */ + if (is_auto_inc_in_extra_columns()) + m_table->next_number_field->set_null(); + + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + DBUG_PRINT_BITSET("debug", "rpl_write_set: %s", table->rpl_write_set); + DBUG_PRINT_BITSET("debug", "read_set: %s", table->read_set); + + if (table->s->long_unique_table) + table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_WRITE); + + if (invoke_triggers && + unlikely(process_triggers(TRG_EVENT_INSERT, TRG_ACTION_BEFORE, TRUE))) + { + DBUG_RETURN(HA_ERR_GENERIC); // in case if error is not set yet + } + + // Handle INSERT. + if (table->versioned(VERS_TIMESTAMP)) + { + ulong sec_part; + // Check whether a row came from unversioned table and fix vers fields. + if (table->vers_start_field()->get_timestamp(&sec_part) == 0 && sec_part == 0) + table->vers_update_fields(); + } + + /* + Try to write record. If a corresponding record already exists in the table, + we try to change it using ha_update_row() if possible. Otherwise we delete + it and repeat the whole process again. + + TODO: Add safety measures against infinite looping. + */ + + if (table->s->sequence) + error= update_sequence(); + else while (unlikely(error= table->file->ha_write_row(table->record[0]))) + { + if (error == HA_ERR_LOCK_DEADLOCK || + error == HA_ERR_LOCK_WAIT_TIMEOUT || + (keynum= table->file->get_dup_key(error)) < 0 || + !overwrite) + { + DBUG_PRINT("info",("get_dup_key returns %d)", keynum)); + /* + Deadlock, waiting for lock or just an error from the handler + such as HA_ERR_FOUND_DUPP_KEY when overwrite is false. + Retrieval of the duplicate key number may fail + - either because the error was not "duplicate key" error + - or because the information which key is not available + */ + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + /* + We need to retrieve the old row into record[1] to be able to + either update or delete the offending record. We either: + + - use rnd_pos() with a row-id (available as dupp_row) to the + offending row, if that is possible (MyISAM and Blackhole), or else + + - use index_read_idx() with the key that is duplicated, to + retrieve the offending row. + */ + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + DBUG_PRINT("info",("Locating offending record using rnd_pos()")); + + if ((error= table->file->ha_rnd_init_with_error(0))) + { + DBUG_RETURN(error); + } + + error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref); + if (unlikely(error)) + { + DBUG_PRINT("info",("rnd_pos() returns error %d",error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + table->file->ha_rnd_end(); + } + else + { + DBUG_PRINT("info",("Locating offending record using index_read_idx()")); + + if (table->file->extra(HA_EXTRA_FLUSH_CACHE)) + { + DBUG_PRINT("info",("Error when setting HA_EXTRA_FLUSH_CACHE")); + DBUG_RETURN(my_errno); + } + + if (key.get() == NULL) + { + key.assign(static_cast(my_alloca(table->s->max_unique_length))); + if (key.get() == NULL) + { + DBUG_PRINT("info",("Can't allocate key buffer")); + DBUG_RETURN(ENOMEM); + } + } + + key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum, + 0); + error= table->file->ha_index_read_idx_map(table->record[1], keynum, + (const uchar*)key.get(), + HA_WHOLE_KEY, + HA_READ_KEY_EXACT); + if (unlikely(error)) + { + DBUG_PRINT("info",("index_read_idx() returns %s", HA_ERR(error))); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + + /* + Now, record[1] should contain the offending row. That + will enable us to update it or, alternatively, delete it (so + that we can insert the new row afterwards). + */ + if (table->s->long_unique_table) + { + /* same as for REPLACE/ODKU */ + table->move_fields(table->field, table->record[1], table->record[0]); + table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_REPLACE); + table->move_fields(table->field, table->record[0], table->record[1]); + } + + /* + If row is incomplete we will use the record found to fill + missing columns. + */ + if (!get_flags(COMPLETE_ROWS_F)) + { + restore_record(table,record[1]); + error= unpack_current_row(rgi); + if (table->s->long_unique_table) + table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_WRITE); + } + + DBUG_PRINT("debug",("preparing for update: before and after image")); + DBUG_DUMP("record[1] (before)", table->record[1], table->s->reclength); + DBUG_DUMP("record[0] (after)", table->record[0], table->s->reclength); + + /* + REPLACE is defined as either INSERT or DELETE + INSERT. If + possible, we can replace it with an UPDATE, but that will not + work on InnoDB if FOREIGN KEY checks are necessary. + + I (Matz) am not sure of the reason for the last_uniq_key() + check as, but I'm guessing that it's something along the + following lines. + + Suppose that we got the duplicate key to be a key that is not + the last unique key for the table and we perform an update: + then there might be another key for which the unique check will + fail, so we're better off just deleting the row and inserting + the correct row. + + Additionally we don't use UPDATE if rbr triggers should be invoked - + when triggers are used we want a simple and predictable execution path. + */ + if (last_uniq_key(table, keynum) && !invoke_triggers && + !table->file->referenced_by_foreign_key()) + { + DBUG_PRINT("info",("Updating row using ha_update_row()")); + error= table->file->ha_update_row(table->record[1], + table->record[0]); + switch (error) { + + case HA_ERR_RECORD_IS_THE_SAME: + DBUG_PRINT("info",("ignoring HA_ERR_RECORD_IS_THE_SAME error from" + " ha_update_row()")); + error= 0; + + case 0: + break; + + default: + DBUG_PRINT("info",("ha_update_row() returns error %d",error)); + table->file->print_error(error, MYF(0)); + } + + DBUG_RETURN(error); + } + else + { + DBUG_PRINT("info",("Deleting offending row and trying to write new one again")); + if (invoke_triggers && + unlikely(process_triggers(TRG_EVENT_DELETE, TRG_ACTION_BEFORE, + TRUE))) + error= HA_ERR_GENERIC; // in case if error is not set yet + else + { + if (unlikely((error= table->file->ha_delete_row(table->record[1])))) + { + DBUG_PRINT("info",("ha_delete_row() returns error %d",error)); + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + if (invoke_triggers && + unlikely(process_triggers(TRG_EVENT_DELETE, TRG_ACTION_AFTER, + TRUE))) + DBUG_RETURN(HA_ERR_GENERIC); // in case if error is not set yet + } + /* Will retry ha_write_row() with the offending row removed. */ + } + } + + if (invoke_triggers && + unlikely(process_triggers(TRG_EVENT_INSERT, TRG_ACTION_AFTER, TRUE))) + error= HA_ERR_GENERIC; // in case if error is not set yet + + DBUG_RETURN(error); +} + + +int Rows_log_event::update_sequence() +{ + TABLE *table= m_table; // pointer to event's table + bool old_master= false; + int err= 0; + + if (!bitmap_is_set(table->rpl_write_set, MIN_VALUE_FIELD_NO) || + ( +#if defined(WITH_WSREP) + ! WSREP(thd) && +#endif + !(table->in_use->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_DDL) && + !(old_master= + rpl_master_has_bug(thd->rgi_slave->rli, + 29621, FALSE, FALSE, FALSE, TRUE)))) + { + /* This event come from a setval function executed on the master. + Update the sequence next_number and round, like we do with setval() + */ + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, + &table->read_set); + longlong nextval= table->field[NEXT_FIELD_NO]->val_int(); + longlong round= table->field[ROUND_FIELD_NO]->val_int(); + dbug_tmp_restore_column_map(&table->read_set, old_map); + + return table->s->sequence->set_value(table, nextval, round, 0) > 0; + } + if (old_master && !WSREP(thd) && thd->rgi_slave->is_parallel_exec) + { + DBUG_ASSERT(thd->rgi_slave->parallel_entry); + /* + With parallel replication enabled, we can't execute alongside any other + transaction in which we may depend, so we force retry to release + the server layer table lock for possible prior in binlog order + same table transactions. + */ + if (thd->rgi_slave->parallel_entry->last_committed_sub_id < + thd->rgi_slave->wait_commit_sub_id) + { + err= ER_LOCK_DEADLOCK; + my_error(err, MYF(0)); + } + } + /* + Update all fields in table and update the active sequence, like with + ALTER SEQUENCE + */ + return err == 0 ? table->file->ha_write_row(table->record[0]) : err; +} + + +#endif + + +#if defined(HAVE_REPLICATION) + +int +Write_rows_log_event::do_exec_row(rpl_group_info *rgi) +{ + DBUG_ASSERT(m_table != NULL); + const char *tmp= thd->get_proc_info(); + char *message, msg[128]; + const LEX_CSTRING &table_name= m_table->s->table_name; + const char quote_char= + get_quote_char_for_identifier(thd, table_name.str, table_name.length); + my_snprintf(msg, sizeof msg, + "Write_rows_log_event::write_row() on table %c%.*s%c", + quote_char, int(table_name.length), table_name.str, quote_char); + message= msg; + int error; + +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Write_rows_log_event::write_row(%lld) on table %c%.*s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, + int(table_name.length), table_name.str, quote_char); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); + error= write_row(rgi, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); + thd_proc_info(thd, tmp); + + if (unlikely(error) && unlikely(!thd->is_error())) + { + DBUG_ASSERT(0); + my_error(ER_UNKNOWN_ERROR, MYF(0)); + } + + return error; +} + +#endif /* defined(HAVE_REPLICATION) */ + + +#if defined(HAVE_REPLICATION) +uint8 Write_rows_log_event::get_trg_event_map() +{ + return trg2bit(TRG_EVENT_INSERT) | trg2bit(TRG_EVENT_UPDATE) | + trg2bit(TRG_EVENT_DELETE); +} +#endif + +/************************************************************************** + Delete_rows_log_event member functions +**************************************************************************/ + +#if defined(HAVE_REPLICATION) +/* + Compares table->record[0] and table->record[1] + + Returns TRUE if different. +*/ +static bool record_compare(TABLE *table, bool vers_from_plain= false) +{ + bool result= FALSE; + /** + Compare full record only if: + - there are no blob fields (otherwise we would also need + to compare blobs contents as well); + - there are no varchar fields (otherwise we would also need + to compare varchar contents as well); + - there are no null fields, otherwise NULLed fields + contents (i.e., the don't care bytes) may show arbitrary + values, depending on how each engine handles internally. + */ + if ((table->s->blob_fields + + table->s->varchar_fields + + table->s->null_fields) == 0) + { + result= cmp_record(table,record[1]); + goto record_compare_exit; + } + + /* Compare null bits */ + if (memcmp(table->null_flags, + table->null_flags+table->s->rec_buff_length, + table->s->null_bytes)) + { + result= TRUE; // Diff in NULL value + goto record_compare_exit; + } + + /* Compare fields */ + for (Field **ptr=table->field ; *ptr ; ptr++) + { + /* + If the table is versioned, don't compare using the version if there is a + primary key. If there isn't a primary key, we need the version to + identify the correct record if there are duplicate rows in the data set. + However, if the primary server is unversioned (vers_from_plain is true), + then we implicitly use row_end as the primary key on our side. This is + because the implicit row_end value will be set to the maximum value for + the latest row update (which is what we care about). + */ + if (table->versioned() && (*ptr)->vers_sys_field() && + (table->s->primary_key < MAX_KEY || + (vers_from_plain && table->vers_start_field() == (*ptr)))) + continue; + /** + We only compare field contents that are not null. + NULL fields (i.e., their null bits) were compared + earlier. + */ + if (!(*(ptr))->is_null()) + { + if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length)) + { + result= TRUE; + goto record_compare_exit; + } + } + } + +record_compare_exit: + return result; +} + + +/** + Find the best key to use when locating the row in @c find_row(). + + A primary key is preferred if it exists; otherwise a unique index is + preferred. Else we pick the index with the smalles rec_per_key value. + + If a suitable key is found, set @c m_key, @c m_key_nr and @c m_key_info + member fields appropriately. + + @returns Error code on failure, 0 on success. +*/ +int Rows_log_event::find_key() +{ + uint i, best_key_nr, last_part; + KEY *key, *UNINIT_VAR(best_key); + ulong UNINIT_VAR(best_rec_per_key), tmp; + DBUG_ENTER("Rows_log_event::find_key"); + DBUG_ASSERT(m_table); + + best_key_nr= MAX_KEY; + + /* + Keys are sorted so that any primary key is first, followed by unique keys, + followed by any other. So we will automatically pick the primary key if + it exists. + */ + for (i= 0, key= m_table->key_info; i < m_table->s->keys; i++, key++) + { + if (!m_table->s->keys_in_use.is_set(i)) + continue; + /* + We cannot use a unique key with NULL-able columns to uniquely identify + a row (but we can still select it for range scan below if nothing better + is available). + */ + if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) + { + best_key_nr= i; + best_key= key; + break; + } + /* + We can only use a non-unique key if it allows range scans (ie. skip + FULLTEXT indexes and such). + */ + last_part= key->user_defined_key_parts - 1; + DBUG_PRINT("info", ("Index %s rec_per_key[%u]= %lu", + key->name.str, last_part, key->rec_per_key[last_part])); + if (!(m_table->file->index_flags(i, last_part, 1) & HA_READ_NEXT)) + continue; + + tmp= key->rec_per_key[last_part]; + if (best_key_nr == MAX_KEY || (tmp > 0 && tmp < best_rec_per_key)) + { + best_key_nr= i; + best_key= key; + best_rec_per_key= tmp; + } + } + + if (best_key_nr == MAX_KEY) + { + m_key_info= NULL; + DBUG_RETURN(0); + } + + // Allocate buffer for key searches + m_key= (uchar *) my_malloc(PSI_INSTRUMENT_ME, best_key->key_length, MYF(MY_WME)); + if (m_key == NULL) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + m_key_info= best_key; + m_key_nr= best_key_nr; + + DBUG_RETURN(0);; +} + + +/* + Check if we are already spending too much time on this statement. + if we are, warn user that it might be because table does not have + a PK, but only if the warning was not printed before for this STMT. + + @param type The event type code. + @param table_name The name of the table that the slave is + operating. + @param is_index_scan States whether the slave is doing an index scan + or not. + @param rli The relay metadata info. +*/ +static inline +void issue_long_find_row_warning(Log_event_type type, + const char *table_name, + bool is_index_scan, + rpl_group_info *rgi) +{ + if ((global_system_variables.log_warnings > 1 && + !rgi->is_long_find_row_note_printed())) + { + ulonglong now= microsecond_interval_timer(); + ulonglong stmt_ts= rgi->get_row_stmt_start_timestamp(); + + DBUG_EXECUTE_IF("inject_long_find_row_note", + stmt_ts-=(LONG_FIND_ROW_THRESHOLD*2*HRTIME_RESOLUTION);); + + longlong delta= (now - stmt_ts)/HRTIME_RESOLUTION; + + if (delta > LONG_FIND_ROW_THRESHOLD) + { + rgi->set_long_find_row_note_printed(); + const char* evt_type= LOG_EVENT_IS_DELETE_ROW(type) ? " DELETE" : "n UPDATE"; + const char* scan_type= is_index_scan ? "scanning an index" : "scanning the table"; + + sql_print_information("The slave is applying a ROW event on behalf of a%s statement " + "on table %s and is currently taking a considerable amount " + "of time (%lld seconds). This is due to the fact that it is %s " + "while looking up records to be processed. Consider adding a " + "primary key (or unique key) to the table to improve " + "performance.", + evt_type, table_name, (long) delta, scan_type); + } + } +} + + +/* + HA_ERR_KEY_NOT_FOUND is a fatal error normally, but it's an expected + error in speculate optimistic mode, so use something non-fatal instead +*/ +static int row_not_found_error(rpl_group_info *rgi) +{ + return rgi->speculation != rpl_group_info::SPECULATE_OPTIMISTIC + ? HA_ERR_KEY_NOT_FOUND : HA_ERR_RECORD_CHANGED; +} + +/** + Locate the current row in event's table. + + The current row is pointed by @c m_curr_row. Member @c m_width tells + how many columns are there in the row (this can be differnet from + the number of columns in the table). It is assumed that event's + table is already open and pointed by @c m_table. + + If a corresponding record is found in the table it is stored in + @c m_table->record[0]. Note that when record is located based on a primary + key, it is possible that the record found differs from the row being located. + + If no key is specified or table does not have keys, a table scan is used to + find the row. In that case the row should be complete and contain values for + all columns. However, it can still be shorter than the table, i.e. the table + can contain extra columns not present in the row. It is also possible that + the table has fewer columns than the row being located. + + @returns Error code on failure, 0 on success. + + @post In case of success @c m_table->record[0] contains the record found. + Also, the internal "cursor" of the table is positioned at the record found. + + @note If the engine allows random access of the records, a combination of + @c position() and @c rnd_pos() will be used. + + Note that one MUST call ha_index_or_rnd_end() after this function if + it returns 0 as we must leave the row position in the handler intact + for any following update/delete command. +*/ + +int Rows_log_event::find_row(rpl_group_info *rgi) +{ + DBUG_ENTER("Rows_log_event::find_row"); + + DBUG_ASSERT(m_table && m_table->in_use != NULL); + + TABLE *table= m_table; + int error= 0; + bool is_table_scan= false, is_index_scan= false; + + /* + rpl_row_tabledefs.test specifies that + if the extra field on the slave does not have a default value + and this is okay with Delete or Update events. + Todo: fix wl3228 hld that requires defauls for all types of events + */ + + prepare_record(table, m_width, FALSE); + error= unpack_current_row(rgi); + + m_vers_from_plain= false; + if (table->versioned()) + { + Field *row_end= table->vers_end_field(); + DBUG_ASSERT(table->read_set); + // check whether master table is unversioned + if (row_end->val_int() == 0) + { + // Plain source table may have a PRIMARY KEY. And row_end is always + // a part of PRIMARY KEY. Set it to max value for engine to find it in + // index. Needed for an UPDATE/DELETE cases. + table->vers_end_field()->set_max(); + m_vers_from_plain= true; + } + } + + DBUG_PRINT("info",("looking for the following record")); + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) && + table->s->primary_key < MAX_KEY) + { + /* + Use a more efficient method to fetch the record given by + table->record[0] if the engine allows it. We first compute a + row reference using the position() member function (it will be + stored in table->file->ref) and the use rnd_pos() to position + the "cursor" (i.e., record[0] in this case) at the correct row. + + TODO: Add a check that the correct record has been fetched by + comparing with the original record. Take into account that the + record on the master and slave can be of different + length. Something along these lines should work: + + ADD>>> store_record(table,record[1]); + int error= table->file->ha_rnd_pos(table->record[0], + table->file->ref); + ADD>>> DBUG_ASSERT(memcmp(table->record[1], table->record[0], + table->s->reclength) == 0); + + */ + int error; + DBUG_PRINT("info",("locating record using primary key (position)")); + + error= table->file->ha_rnd_pos_by_record(table->record[0]); + if (unlikely(error)) + { + DBUG_PRINT("info",("rnd_pos returns error %d",error)); + if (error == HA_ERR_KEY_NOT_FOUND) + error= row_not_found_error(rgi); + table->file->print_error(error, MYF(0)); + } + DBUG_RETURN(error); + } + + // We can't use position() - try other methods. + + /* + We need to retrieve all fields + TODO: Move this out from this function to main loop + */ + table->use_all_columns(); + + /* + Save copy of the record in table->record[1]. It might be needed + later if linear search is used to find exact match. + */ + store_record(table,record[1]); + + if (m_key_info) + { + DBUG_PRINT("info",("locating record using key #%u [%s] (index_read)", + m_key_nr, m_key_info->name.str)); + /* We use this to test that the correct key is used in test cases. */ + DBUG_EXECUTE_IF("slave_crash_if_wrong_index", + if(0 != strcmp(m_key_info->name.str,"expected_key")) abort();); + + /* The key is active: search the table using the index */ + if (!table->file->inited && + (error= table->file->ha_index_init(m_key_nr, FALSE))) + { + DBUG_PRINT("info",("ha_index_init returns error %d",error)); + table->file->print_error(error, MYF(0)); + goto end; + } + + /* Fill key data for the row */ + + DBUG_ASSERT(m_key); + key_copy(m_key, table->record[0], m_key_info, 0); + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("key data", m_key, m_key_info->key_length); +#endif + + /* + We need to set the null bytes to ensure that the filler bit are + all set when returning. There are storage engines that just set + the necessary bits on the bytes and don't set the filler bits + correctly. + */ + if (table->s->null_bytes > 0) + table->record[0][table->s->null_bytes - 1]|= + 256U - (1U << table->s->last_null_bit_pos); + + if (unlikely((error= table->file->ha_index_read_map(table->record[0], + m_key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)))) + { + DBUG_PRINT("info",("no record matching the key found in the table")); + if (error == HA_ERR_KEY_NOT_FOUND) + error= row_not_found_error(rgi); + table->file->print_error(error, MYF(0)); + table->file->ha_index_end(); + goto end; + } + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_PRINT("info",("found first matching record")); + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); +#endif + /* + Below is a minor "optimization". If the key (i.e., key number + 0) has the HA_NOSAME flag set, we know that we have found the + correct record (since there can be no duplicates); otherwise, we + have to compare the record with the one found to see if it is + the correct one. + + CAVEAT! This behaviour is essential for the replication of, + e.g., the mysql.proc table since the correct record *shall* be + found using the primary key *only*. There shall be no + comparison of non-PK columns to decide if the correct record is + found. I can see no scenario where it would be incorrect to + chose the row to change only using a PK or an UNNI. + */ + if (table->key_info->flags & HA_NOSAME) + { + /* Unique does not have non nullable part */ + if (!(table->key_info->flags & (HA_NULL_PART_KEY))) + { + error= 0; + goto end; + } + else + { + KEY *keyinfo= table->key_info; + /* + Unique has nullable part. We need to check if there is any + field in the BI image that is null and part of UNNI. + */ + bool null_found= FALSE; + for (uint i=0; i < keyinfo->user_defined_key_parts && !null_found; i++) + { + uint fieldnr= keyinfo->key_part[i].fieldnr - 1; + Field **f= table->field+fieldnr; + null_found= (*f)->is_null(); + } + + if (!null_found) + { + error= 0; + goto end; + } + + /* else fall through to index scan */ + } + } + + is_index_scan=true; + + /* + In case key is not unique, we still have to iterate over records found + and find the one which is identical to the row given. A copy of the + record we are looking for is stored in record[1]. + */ + DBUG_PRINT("info",("non-unique index, scanning it to find matching record")); + /* We use this to test that the correct key is used in test cases. */ + DBUG_EXECUTE_IF("slave_crash_if_index_scan", abort();); + + while (record_compare(table, m_vers_from_plain)) + { + while ((error= table->file->ha_index_next(table->record[0]))) + { + DBUG_PRINT("info",("no record matching the given row found")); + table->file->print_error(error, MYF(0)); + table->file->ha_index_end(); + goto end; + } + } + } + else + { + DBUG_PRINT("info",("locating record using table scan (rnd_next)")); + /* We use this to test that the correct key is used in test cases. */ + DBUG_EXECUTE_IF("slave_crash_if_table_scan", abort();); + + /* We don't have a key: search the table using rnd_next() */ + if (unlikely((error= table->file->ha_rnd_init_with_error(1)))) + { + DBUG_PRINT("info",("error initializing table scan" + " (ha_rnd_init returns %d)",error)); + goto end; + } + + is_table_scan= true; + + /* Continue until we find the right record or have made a full loop */ + do + { + error= table->file->ha_rnd_next(table->record[0]); + + if (unlikely(error)) + DBUG_PRINT("info", ("error: %s", HA_ERR(error))); + switch (error) { + + case 0: + DBUG_DUMP("record found", table->record[0], table->s->reclength); + break; + + case HA_ERR_END_OF_FILE: + DBUG_PRINT("info", ("Record not found")); + table->file->ha_rnd_end(); + goto end; + + default: + DBUG_PRINT("info", ("Failed to get next record" + " (rnd_next returns %d)",error)); + table->file->print_error(error, MYF(0)); + table->file->ha_rnd_end(); + goto end; + } + } + while (record_compare(table, m_vers_from_plain)); + + /* + Note: above record_compare will take into accout all record fields + which might be incorrect in case a partial row was given in the event + */ + + DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0); + } + +end: + if (is_table_scan || is_index_scan) + issue_long_find_row_warning(get_general_type_code(), m_table->alias.c_ptr(), + is_index_scan, rgi); + DBUG_RETURN(error); +} + +#endif + +/* + Constructor used to build an event for writing to the binary log. + */ + +Delete_rows_log_event::Delete_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, bool is_transactional) + : Rows_log_event(thd_arg, tbl_arg, tid, tbl_arg->read_set, is_transactional, + DELETE_ROWS_EVENT_V1) +{ +} + +Delete_rows_compressed_log_event::Delete_rows_compressed_log_event( + THD *thd_arg, TABLE *tbl_arg, + ulong tid_arg, + bool is_transactional) + : Delete_rows_log_event(thd_arg, tbl_arg, tid_arg, is_transactional) +{ + m_type= DELETE_ROWS_COMPRESSED_EVENT_V1; +} + +bool Delete_rows_compressed_log_event::write() +{ + return Rows_log_event::write_compressed(); +} + + +#if defined(HAVE_REPLICATION) + +int +Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const) +{ + /* + Increment the global status delete count variable + */ + if (get_flags(STMT_END_F)) + status_var_increment(thd->status_var.com_stat[SQLCOM_DELETE]); + + if ((m_table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) && + m_table->s->primary_key < MAX_KEY) + { + /* + We don't need to allocate any memory for m_key since it is not used. + */ + return 0; + } + if (do_invoke_trigger()) + m_table->prepare_triggers_for_delete_stmt_or_event(); + + return find_key(); +} + +int +Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const, + int error) +{ + m_table->file->ha_index_or_rnd_end(); + my_free(m_key); + m_key= NULL; + m_key_info= NULL; + + return error; +} + +int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) +{ + int error; + const char *tmp= thd->get_proc_info(); + char *message, msg[128]; + const LEX_CSTRING &table_name= m_table->s->table_name; + const char quote_char= + get_quote_char_for_identifier(thd, table_name.str, table_name.length); + my_snprintf(msg, sizeof msg, + "Delete_rows_log_event::find_row() on table %c%.*s%c", + quote_char, int(table_name.length), table_name.str, quote_char); + message= msg; + const bool invoke_triggers= (m_table->triggers && do_invoke_trigger()); + DBUG_ASSERT(m_table != NULL); + +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Delete_rows_log_event::find_row(%lld) on table %c%.*s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, + int(table_name.length), table_name.str, + quote_char); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); + if (likely(!(error= find_row(rgi)))) + { + /* + Delete the record found, located in record[0] + */ + my_snprintf(msg, sizeof msg, + "Delete_rows_log_event::ha_delete_row() on table %c%.*s%c", + quote_char, int(table_name.length), table_name.str, + quote_char); + message= msg; +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Delete_rows_log_event::ha_delete_row(%lld) on table %c%.*s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, + int(table_name.length), table_name.str, quote_char); + message= thd->wsrep_info; +#endif + thd_proc_info(thd, message); + + if (invoke_triggers && + unlikely(process_triggers(TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE))) + error= HA_ERR_GENERIC; // in case if error is not set yet + if (likely(!error)) + { + m_table->mark_columns_per_binlog_row_image(); + if (m_vers_from_plain && m_table->versioned(VERS_TIMESTAMP)) + { + Field *end= m_table->vers_end_field(); + store_record(m_table, record[1]); + end->set_time(); + error= m_table->file->ha_update_row(m_table->record[1], + m_table->record[0]); + } + else + { + error= m_table->file->ha_delete_row(m_table->record[0]); + } + m_table->default_column_bitmaps(); + } + if (invoke_triggers && likely(!error) && + unlikely(process_triggers(TRG_EVENT_DELETE, TRG_ACTION_AFTER, FALSE))) + error= HA_ERR_GENERIC; // in case if error is not set yet + m_table->file->ha_index_or_rnd_end(); + } + thd_proc_info(thd, tmp); + return error; +} + +#endif /* defined(HAVE_REPLICATION) */ + +#if defined(HAVE_REPLICATION) +uint8 Delete_rows_log_event::get_trg_event_map() +{ + return trg2bit(TRG_EVENT_DELETE); +} +#endif + +/************************************************************************** + Update_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +Update_rows_log_event::Update_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, + bool is_transactional) +: Rows_log_event(thd_arg, tbl_arg, tid, tbl_arg->read_set, is_transactional, + UPDATE_ROWS_EVENT_V1) +{ + init(tbl_arg->rpl_write_set); +} + +Update_rows_compressed_log_event::Update_rows_compressed_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, + bool is_transactional) +: Update_rows_log_event(thd_arg, tbl_arg, tid, is_transactional) +{ + m_type = UPDATE_ROWS_COMPRESSED_EVENT_V1; +} + +bool Update_rows_compressed_log_event::write() +{ + return Rows_log_event::write_compressed(); +} + +void Update_rows_log_event::init(MY_BITMAP const *cols) +{ + /* if my_bitmap_init fails, caught in is_valid() */ + if (likely(!my_bitmap_init(&m_cols_ai, + m_width <= sizeof(m_bitbuf_ai)*8 ? m_bitbuf_ai : NULL, + m_width))) + { + /* Cols can be zero if this is a dummy binrows event */ + if (likely(cols != NULL)) + { + memcpy(m_cols_ai.bitmap, cols->bitmap, no_bytes_in_map(cols)); + create_last_word_mask(&m_cols_ai); + } + } +} + + +#if defined(HAVE_REPLICATION) + +int +Update_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const) +{ + /* + Increment the global status update count variable + */ + if (get_flags(STMT_END_F)) + status_var_increment(thd->status_var.com_stat[SQLCOM_UPDATE]); + + int err; + if ((err= find_key())) + return err; + + if (do_invoke_trigger()) + m_table->prepare_triggers_for_update_stmt_or_event(); + + return 0; +} + +int +Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const, + int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + m_table->file->ha_index_or_rnd_end(); + my_free(m_key); // Free for multi_malloc + m_key= NULL; + m_key_info= NULL; + + return error; +} + +int +Update_rows_log_event::do_exec_row(rpl_group_info *rgi) +{ + const bool invoke_triggers= (m_table->triggers && do_invoke_trigger()); + const char *tmp= thd->get_proc_info(); + DBUG_ASSERT(m_table != NULL); + char *message, msg[128]; + const LEX_CSTRING &table_name= m_table->s->table_name; + const char quote_char= + get_quote_char_for_identifier(thd, table_name.str, table_name.length); + my_snprintf(msg, sizeof msg, + "Update_rows_log_event::find_row() on table %c%.*s%c", + quote_char, int(table_name.length), table_name.str, quote_char); + message= msg; + +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::find_row(%lld) on table %c%.*s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, + int(table_name.length), table_name.str, + quote_char); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); + // Temporary fix to find out why it fails [/Matz] + memcpy(m_table->read_set->bitmap, m_cols.bitmap, (m_table->read_set->n_bits + 7) / 8); + memcpy(m_table->write_set->bitmap, m_cols_ai.bitmap, (m_table->write_set->n_bits + 7) / 8); + + m_table->mark_columns_per_binlog_row_image(); + + int error= find_row(rgi); + if (unlikely(error)) + { + /* + We need to read the second image in the event of error to be + able to skip to the next pair of updates + */ + if ((m_curr_row= m_curr_row_end)) + unpack_current_row(rgi, &m_cols_ai); + thd_proc_info(thd, tmp); + return error; + } + + const bool history_change= m_table->versioned() ? + !m_table->vers_end_field()->is_max() : false; + TABLE_LIST *tl= m_table->pos_in_table_list; + uint8 trg_event_map_save= tl->trg_event_map; + + /* + This is the situation after locating BI: + + ===|=== before image ====|=== after image ===|=== + ^ ^ + m_curr_row m_curr_row_end + + BI found in the table is stored in record[0]. We copy it to record[1] + and unpack AI to record[0]. + */ + + store_record(m_table,record[1]); + + m_curr_row= m_curr_row_end; + my_snprintf(msg, sizeof msg, + "Update_rows_log_event::unpack_current_row() on table %c%.*s%c", + quote_char, int(table_name.length), table_name.str, quote_char); + message= msg; +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::unpack_current_row(%lld) on table %c%.*s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, + int(table_name.length), table_name.str, quote_char); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + /* this also updates m_curr_row_end */ + thd_proc_info(thd, message); + if (unlikely((error= unpack_current_row(rgi, &m_cols_ai)))) + goto err; + if (m_table->s->long_unique_table) + m_table->update_virtual_fields(m_table->file, VCOL_UPDATE_FOR_WRITE); + + /* + Now we have the right row to update. The old row (the one we're + looking for) is in record[1] and the new row is in record[0]. + */ +#ifndef HAVE_valgrind + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ + DBUG_PRINT("info",("Updating row in table")); + DBUG_DUMP("old record", m_table->record[1], m_table->s->reclength); + DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); +#endif + + my_snprintf(msg, sizeof msg, + "Update_rows_log_event::ha_update_row() on table %c%.*s%c", + quote_char, int(table_name.length), table_name.str, quote_char); + message= msg; +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::ha_update_row(%lld) on table %c%.*s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, + int(table_name.length), table_name.str, quote_char); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); + if (invoke_triggers && + unlikely(process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_BEFORE, TRUE))) + { + error= HA_ERR_GENERIC; // in case if error is not set yet + goto err; + } + + if (m_table->versioned()) + { + if (m_vers_from_plain && m_table->versioned(VERS_TIMESTAMP)) + m_table->vers_update_fields(); + if (!history_change && !m_table->vers_end_field()->is_max()) + { + tl->trg_event_map|= trg2bit(TRG_EVENT_DELETE); + } + } + error= m_table->file->ha_update_row(m_table->record[1], m_table->record[0]); + tl->trg_event_map= trg_event_map_save; + if (unlikely(error == HA_ERR_RECORD_IS_THE_SAME)) + error= 0; + if (m_vers_from_plain && m_table->versioned(VERS_TIMESTAMP)) + { + store_record(m_table, record[2]); + error= vers_insert_history_row(m_table); + restore_record(m_table, record[2]); + } + m_table->default_column_bitmaps(); + + if (invoke_triggers && likely(!error) && + unlikely(process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_AFTER, TRUE))) + error= HA_ERR_GENERIC; // in case if error is not set yet + + +err: + thd_proc_info(thd, tmp); + m_table->file->ha_index_or_rnd_end(); + return error; +} + +#endif /* defined(HAVE_REPLICATION) */ + + +#if defined(HAVE_REPLICATION) +uint8 Update_rows_log_event::get_trg_event_map() +{ + return trg2bit(TRG_EVENT_UPDATE); +} +#endif + + +void Incident_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + size_t bytes; + if (m_message.length > 0) + bytes= my_snprintf(buf, sizeof(buf), "#%d (%s)", + m_incident, description()); + else + bytes= my_snprintf(buf, sizeof(buf), "#%d (%s): %s", + m_incident, description(), m_message.str); + protocol->store(buf, bytes, &my_charset_bin); +} + + +#if defined(WITH_WSREP) +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ +#define WSREP_MAX_ALLOWED_PACKET 1024*1024*1024 // current protocol max + +Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + uchar *head= (uchar*) (*arg_buf); + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + const char *error= 0; + Log_event *res= 0; + DBUG_ENTER("wsrep_read_log_event"); + + if (data_len > WSREP_MAX_ALLOWED_PACKET) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(head, data_len, &error, description_event, + false); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %u, event_type: %d", + error, data_len, (int) head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} +#endif + + +#if defined(HAVE_REPLICATION) +int Incident_log_event::do_apply_event(rpl_group_info *rgi) +{ + Relay_log_info const *rli= rgi->rli; + DBUG_ENTER("Incident_log_event::do_apply_event"); + + if (ignored_error_code(ER_SLAVE_INCIDENT)) + { + DBUG_PRINT("info", ("Ignoring Incident")); + DBUG_RETURN(0); + } + + rli->report(ERROR_LEVEL, ER_SLAVE_INCIDENT, NULL, + ER_THD(rgi->thd, ER_SLAVE_INCIDENT), + description(), + m_message.length > 0 ? m_message.str : ""); + DBUG_RETURN(1); +} +#endif + + +bool +Incident_log_event::write_data_header() +{ + DBUG_ENTER("Incident_log_event::write_data_header"); + DBUG_PRINT("enter", ("m_incident: %d", m_incident)); + uchar buf[sizeof(int16)]; + int2store(buf, (int16) m_incident); + DBUG_RETURN(write_data(buf, sizeof(buf))); +} + +bool +Incident_log_event::write_data_body() +{ + uchar tmp[1]; + DBUG_ENTER("Incident_log_event::write_data_body"); + tmp[0]= (uchar) m_message.length; + DBUG_RETURN(write_data(tmp, sizeof(tmp)) || + write_data(m_message.str, m_message.length)); +} + + +/* Pack info for its unrecognized ignorable event */ +void Ignorable_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + size_t bytes; + bytes= my_snprintf(buf, sizeof(buf), "# Ignorable event type %d (%s)", + number, description); + protocol->store(buf, bytes, &my_charset_bin); +} + + +#if defined(HAVE_REPLICATION) +Heartbeat_log_event::Heartbeat_log_event(const uchar *buf, uint event_len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) +{ + uint8 header_size= description_event->common_header_len; + if (log_pos == 0) + { + log_pos= uint8korr(buf + header_size); + log_ident= buf + header_size + HB_SUB_HEADER_LEN; + ident_len= event_len - (header_size + HB_SUB_HEADER_LEN); + } + else + { + log_ident= buf + header_size; + ident_len = event_len - header_size; + } +} +#endif + + +/** + Check if we should write event to the relay log + + This is used to skip events that is only supported by MySQL + + Return: + 0 ok + 1 Don't write event +*/ + +bool event_that_should_be_ignored(const uchar *buf) +{ + uint event_type= buf[EVENT_TYPE_OFFSET]; + if (event_type == GTID_LOG_EVENT || + event_type == ANONYMOUS_GTID_LOG_EVENT || + event_type == PREVIOUS_GTIDS_LOG_EVENT || + event_type == TRANSACTION_CONTEXT_EVENT || + event_type == VIEW_CHANGE_EVENT || + (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)) + return 1; + return 0; +} diff --git a/sql/log_slow.h b/sql/log_slow.h new file mode 100644 index 00000000..eb6895b9 --- /dev/null +++ b/sql/log_slow.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2009, 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 or later of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Defining what to log to slow log */ + +#ifndef LOG_SLOW_INCLUDED +#define LOG_SLOW_INCLUDED + +#define LOG_SLOW_VERBOSITY_INIT 0 +#define LOG_SLOW_VERBOSITY_INNODB (1U << 0) /* Old option */ +#define LOG_SLOW_VERBOSITY_QUERY_PLAN (1U << 1) +#define LOG_SLOW_VERBOSITY_EXPLAIN (1U << 2) +#define LOG_SLOW_VERBOSITY_STORAGE_ENGINE (1U << 3) /* Replaces InnoDB */ +#define LOG_SLOW_VERBOSITY_WARNINGS (1U << 4) +#define LOG_SLOW_VERBOSITY_FULL (1U << 5) + +#define LOG_SLOW_VERBOSITY_ENGINE (LOG_SLOW_VERBOSITY_FULL | \ + LOG_SLOW_VERBOSITY_INNODB | \ + LOG_SLOW_VERBOSITY_STORAGE_ENGINE) + +#define QPLAN_INIT QPLAN_QC_NO + +#define QPLAN_ADMIN (1U << 0) +#define QPLAN_FILESORT (1U << 1) +#define QPLAN_FILESORT_DISK (1U << 2) +#define QPLAN_FILESORT_PRIORITY_QUEUE (1U << 3) +#define QPLAN_FULL_JOIN (1U << 4) +#define QPLAN_FULL_SCAN (1U << 5) +#define QPLAN_NOT_USING_INDEX (1U << 6) +#define QPLAN_QC (1U << 7) +#define QPLAN_QC_NO (1U << 8) +#define QPLAN_TMP_TABLE (1U << 9) +#define QPLAN_TMP_DISK (1U << 10) + +/* ... */ +#define QPLAN_STATUS (1UL << 31) /* not in the slow_log_filter */ +#define QPLAN_MAX (1UL << 31) /* reserved as placeholder */ + +/* Bits for log_slow_disabled_statements */ +#define LOG_SLOW_DISABLE_ADMIN (1 << 0) +#define LOG_SLOW_DISABLE_CALL (1 << 1) +#define LOG_SLOW_DISABLE_SLAVE (1 << 2) +#define LOG_SLOW_DISABLE_SP (1 << 3) + +/* Bits for log_disabled_statements */ +#define LOG_DISABLE_SLAVE (1 << 0) +#define LOG_DISABLE_SP (1 << 1) + +#endif /* LOG_SLOW_INCLUDED */ diff --git a/sql/main.cc b/sql/main.cc new file mode 100644 index 00000000..357c72b8 --- /dev/null +++ b/sql/main.cc @@ -0,0 +1,36 @@ +/* Copyright (c) 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + main() for mysqld. + Calls mysqld_main() entry point exported by sql library. + On Windows, might do some service handling. +*/ +#ifdef _WIN32 +/* Windows main function, service handling, calls mysqld_main */ +extern int mysqld_win_main(int argc, char **argv); +#else +extern int mysqld_main(int argc, char **argv); +#endif + +int main(int argc, char **argv) +{ +#ifdef _WIN32 + return mysqld_win_main(argc, argv); +#else + return mysqld_main(argc, argv); +#endif +} diff --git a/sql/mariadb.h b/sql/mariadb.h new file mode 100644 index 00000000..00cf2ed1 --- /dev/null +++ b/sql/mariadb.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2010, 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Include file that should always be included first in all file in the sql + directory. Used to ensure that some files, like my_global.h and my_config.h + are always included first. + It can also be used to speed up compilation by using precompiled headers. + + This file should include a minum set of header files used by all files + and header files that are very seldom changed. + It can also include some defines that all files should be aware of. +*/ + +#ifndef MARIADB_INCLUDED +#define MARIADB_INCLUDED +#include +#endif /* MARIADB_INCLUDED */ diff --git a/sql/mdl.cc b/sql/mdl.cc new file mode 100644 index 00000000..32374415 --- /dev/null +++ b/sql/mdl.cc @@ -0,0 +1,3341 @@ +/* Copyright (c) 2007, 2012, Oracle and/or its affiliates. + Copyright (c) 2020, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mariadb.h" +#include "sql_class.h" +#include "debug_sync.h" +#include "sql_array.h" +#include "rpl_rli.h" +#include +#include "unireg.h" +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif + +static PSI_memory_key key_memory_MDL_context_acquire_locks; + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_MDL_wait_LOCK_wait_status; + +static PSI_mutex_info all_mdl_mutexes[]= +{ + { &key_MDL_wait_LOCK_wait_status, "MDL_wait::LOCK_wait_status", 0} +}; + +static PSI_rwlock_key key_MDL_lock_rwlock; +static PSI_rwlock_key key_MDL_context_LOCK_waiting_for; + +static PSI_rwlock_info all_mdl_rwlocks[]= +{ + { &key_MDL_lock_rwlock, "MDL_lock::rwlock", 0}, + { &key_MDL_context_LOCK_waiting_for, "MDL_context::LOCK_waiting_for", 0} +}; + +static PSI_cond_key key_MDL_wait_COND_wait_status; + +static PSI_cond_info all_mdl_conds[]= +{ + { &key_MDL_wait_COND_wait_status, "MDL_context::COND_wait_status", 0} +}; + +static PSI_memory_info all_mdl_memory[]= +{ + { &key_memory_MDL_context_acquire_locks, "MDL_context::acquire_locks", 0} +}; + +/** + Initialise all the performance schema instrumentation points + used by the MDL subsystem. +*/ +static void init_mdl_psi_keys(void) +{ + int count; + + count= array_elements(all_mdl_mutexes); + mysql_mutex_register("sql", all_mdl_mutexes, count); + + count= array_elements(all_mdl_rwlocks); + mysql_rwlock_register("sql", all_mdl_rwlocks, count); + + count= array_elements(all_mdl_conds); + mysql_cond_register("sql", all_mdl_conds, count); + + count= array_elements(all_mdl_memory); + mysql_memory_register("sql", all_mdl_memory, count); + + MDL_key::init_psi_keys(); +} +#endif /* HAVE_PSI_INTERFACE */ + + +/** + Thread state names to be used in case when we have to wait on resource + belonging to certain namespace. +*/ + +PSI_stage_info MDL_key::m_namespace_to_wait_state_name[NAMESPACE_END]= +{ + {0, "Waiting for backup lock", 0}, + {0, "Waiting for schema metadata lock", 0}, + {0, "Waiting for table metadata lock", 0}, + {0, "Waiting for stored function metadata lock", 0}, + {0, "Waiting for stored procedure metadata lock", 0}, + {0, "Waiting for stored package body metadata lock", 0}, + {0, "Waiting for trigger metadata lock", 0}, + {0, "Waiting for event metadata lock", 0}, + {0, "User lock", 0} /* Be compatible with old status. */ +}; + + +static const LEX_STRING lock_types[]= +{ + { C_STRING_WITH_LEN("MDL_INTENTION_EXCLUSIVE") }, + { C_STRING_WITH_LEN("MDL_SHARED") }, + { C_STRING_WITH_LEN("MDL_SHARED_HIGH_PRIO") }, + { C_STRING_WITH_LEN("MDL_SHARED_READ") }, + { C_STRING_WITH_LEN("MDL_SHARED_WRITE") }, + { C_STRING_WITH_LEN("MDL_SHARED_UPGRADABLE") }, + { C_STRING_WITH_LEN("MDL_SHARED_READ_ONLY") }, + { C_STRING_WITH_LEN("MDL_SHARED_NO_WRITE") }, + { C_STRING_WITH_LEN("MDL_SHARED_NO_READ_WRITE") }, + { C_STRING_WITH_LEN("MDL_EXCLUSIVE") }, +}; + + +static const LEX_STRING backup_lock_types[]= +{ + { C_STRING_WITH_LEN("MDL_BACKUP_START") }, + { C_STRING_WITH_LEN("MDL_BACKUP_FLUSH") }, + { C_STRING_WITH_LEN("MDL_BACKUP_WAIT_FLUSH") }, + { C_STRING_WITH_LEN("MDL_BACKUP_WAIT_DDL") }, + { C_STRING_WITH_LEN("MDL_BACKUP_WAIT_COMMIT") }, + { C_STRING_WITH_LEN("MDL_BACKUP_FTWRL1") }, + { C_STRING_WITH_LEN("MDL_BACKUP_FTWRL2") }, + { C_STRING_WITH_LEN("MDL_BACKUP_DML") }, + { C_STRING_WITH_LEN("MDL_BACKUP_TRANS_DML") }, + { C_STRING_WITH_LEN("MDL_BACKUP_SYS_DML") }, + { C_STRING_WITH_LEN("MDL_BACKUP_DDL") }, + { C_STRING_WITH_LEN("MDL_BACKUP_BLOCK_DDL") }, + { C_STRING_WITH_LEN("MDL_BACKUP_ALTER_COPY") }, + { C_STRING_WITH_LEN("MDL_BACKUP_COMMIT") } +}; + + +#ifdef HAVE_PSI_INTERFACE +void MDL_key::init_psi_keys() +{ + int i; + int count; + PSI_stage_info *info __attribute__((unused)); + + count= array_elements(MDL_key::m_namespace_to_wait_state_name); + for (i= 0; iget_key(); + my_snprintf(buffer, sizeof(buffer) - 1, "%.*s/%.*s (%s)", + (int) mdl_key->db_name_length(), mdl_key->db_name(), + (int) mdl_key->name_length(), mdl_key->name(), + mdl_ticket->get_type_name()->str); + return buffer; +} + + +const char *dbug_print(MDL_ticket *mdl_ticket) +{ + return dbug_print_mdl(mdl_ticket); +} + + +static int mdl_dbug_print_lock(MDL_ticket *mdl_ticket, void *arg, bool granted) +{ + String *tmp= (String*) arg; + char buffer[256]; + size_t length= my_snprintf(buffer, sizeof(buffer) - 1, + "\n %s (%s)", dbug_print_mdl(mdl_ticket), + granted ? "granted" : "waiting"); + tmp->append(buffer, length); + return 0; +} + +const char *mdl_dbug_print_locks() +{ + thread_local String tmp; + tmp.length(0); + mdl_iterate(mdl_dbug_print_lock, (void*) &tmp); + return tmp.c_ptr(); +} +#endif /* DBUG_OFF */ + +/** + Enter a node of a wait-for graph. After + a node is entered, inspect_edge() will be called + for all wait-for destinations of this node. Then + leave_node() will be called. + We call "enter_node()" for all nodes we inspect, + including the starting node. + + @retval TRUE Maximum search depth exceeded. + @retval FALSE OK. +*/ + +bool Deadlock_detection_visitor::enter_node(MDL_context *node) +{ + m_found_deadlock= ++m_current_search_depth >= MAX_SEARCH_DEPTH; + if (m_found_deadlock) + { + DBUG_ASSERT(! m_victim); + opt_change_victim_to(node); + } + return m_found_deadlock; +} + + +/** + Done inspecting this node. Decrease the search + depth. If a deadlock is found, and we are + backtracking to the start node, optionally + change the deadlock victim to one with lower + deadlock weight. +*/ + +void Deadlock_detection_visitor::leave_node(MDL_context *node) +{ + --m_current_search_depth; + if (m_found_deadlock) + opt_change_victim_to(node); +} + + +/** + Inspect a wait-for graph edge from one MDL context to another. + + @retval TRUE A loop is found. + @retval FALSE No loop is found. +*/ + +bool Deadlock_detection_visitor::inspect_edge(MDL_context *node) +{ + m_found_deadlock= node == m_start_node; + return m_found_deadlock; +} + + +/** + Change the deadlock victim to a new one if it has lower deadlock + weight. + + @retval new_victim Victim is not changed. + @retval !new_victim New victim became the current. +*/ + +void +Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim) +{ + if (m_victim == NULL || + m_victim->get_deadlock_weight() >= new_victim->get_deadlock_weight()) + { + /* Swap victims, unlock the old one. */ + MDL_context *tmp= m_victim; + m_victim= new_victim; + m_victim->lock_deadlock_victim(); + if (tmp) + tmp->unlock_deadlock_victim(); + } +} + + +/** + Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps + and compatibility matrices. +*/ + +/** + The lock context. Created internally for an acquired lock. + For a given name, there exists only one MDL_lock instance, + and it exists only when the lock has been granted. + Can be seen as an MDL subsystem's version of TABLE_SHARE. + + This is an abstract class which lacks information about + compatibility rules for lock types. They should be specified + in its descendants. +*/ + +class MDL_lock +{ +public: + typedef mdl_bitmap_t bitmap_t; + + class Ticket_list + { + using List= ilist; + public: + Ticket_list() :m_bitmap(0) { m_type_counters.fill(0); } + + void add_ticket(MDL_ticket *ticket); + void remove_ticket(MDL_ticket *ticket); + bool is_empty() const { return m_list.empty(); } + bitmap_t bitmap() const { return m_bitmap; } + List::const_iterator begin() const { return m_list.begin(); } + List::const_iterator end() const { return m_list.end(); } + private: + /** List of tickets. */ + List m_list; + /** Bitmap of types of tickets in this list. */ + bitmap_t m_bitmap; + std::array m_type_counters; // hash table + }; + + + /** + Helper struct which defines how different types of locks are handled + for a specific MDL_lock. In practice we use only three strategies: + "backup" lock strategy for locks in BACKUP namespace, "scoped" lock + strategy for locks in SCHEMA namespace and "object" lock strategy for + all other namespaces. + */ + struct MDL_lock_strategy + { + virtual const bitmap_t *incompatible_granted_types_bitmap() const = 0; + virtual const bitmap_t *incompatible_waiting_types_bitmap() const = 0; + virtual bool needs_notification(const MDL_ticket *ticket) const = 0; + virtual bool conflicting_locks(const MDL_ticket *ticket) const = 0; + virtual bitmap_t hog_lock_types_bitmap() const = 0; + virtual ~MDL_lock_strategy() = default; + }; + + + /** + An implementation of the scoped metadata lock. The only locking modes + which are supported at the moment are SHARED and INTENTION EXCLUSIVE + and EXCLUSIVE + */ + struct MDL_scoped_lock : public MDL_lock_strategy + { + MDL_scoped_lock() = default; + virtual const bitmap_t *incompatible_granted_types_bitmap() const + { return m_granted_incompatible; } + virtual const bitmap_t *incompatible_waiting_types_bitmap() const + { return m_waiting_incompatible; } + virtual bool needs_notification(const MDL_ticket *ticket) const + { return (ticket->get_type() == MDL_SHARED); } + + /** + Notify threads holding scoped IX locks which conflict with a pending + S lock. + + Thread which holds global IX lock can be a handler thread for + insert delayed. We need to kill such threads in order to get + global shared lock. We do this my calling code outside of MDL. + */ + virtual bool conflicting_locks(const MDL_ticket *ticket) const + { return ticket->get_type() == MDL_INTENTION_EXCLUSIVE; } + + /* + In scoped locks, only IX lock request would starve because of X/S. But that + is practically very rare case. So just return 0 from this function. + */ + virtual bitmap_t hog_lock_types_bitmap() const + { return 0; } + private: + static const bitmap_t m_granted_incompatible[MDL_TYPE_END]; + static const bitmap_t m_waiting_incompatible[MDL_TYPE_END]; + }; + + + /** + An implementation of a per-object lock. Supports SHARED, SHARED_UPGRADABLE, + SHARED HIGH PRIORITY and EXCLUSIVE locks. + */ + struct MDL_object_lock : public MDL_lock_strategy + { + MDL_object_lock() = default; + virtual const bitmap_t *incompatible_granted_types_bitmap() const + { return m_granted_incompatible; } + virtual const bitmap_t *incompatible_waiting_types_bitmap() const + { return m_waiting_incompatible; } + virtual bool needs_notification(const MDL_ticket *ticket) const + { + return (MDL_BIT(ticket->get_type()) & + (MDL_BIT(MDL_SHARED_NO_WRITE) | + MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_EXCLUSIVE))); + } + + /** + Notify threads holding a shared metadata locks on object which + conflict with a pending X, SNW or SNRW lock. + + If thread which holds conflicting lock is waiting on table-level + lock or some other non-MDL resource we might need to wake it up + by calling code outside of MDL. + */ + virtual bool conflicting_locks(const MDL_ticket *ticket) const + { return ticket->get_type() < MDL_SHARED_UPGRADABLE; } + + /* + To prevent starvation, these lock types that are only granted + max_write_lock_count times in a row while other lock types are + waiting. + */ + virtual bitmap_t hog_lock_types_bitmap() const + { + return (MDL_BIT(MDL_SHARED_NO_WRITE) | + MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_EXCLUSIVE)); + } + + private: + static const bitmap_t m_granted_incompatible[MDL_TYPE_END]; + static const bitmap_t m_waiting_incompatible[MDL_TYPE_END]; + }; + + + struct MDL_backup_lock: public MDL_lock_strategy + { + MDL_backup_lock() = default; + virtual const bitmap_t *incompatible_granted_types_bitmap() const + { return m_granted_incompatible; } + virtual const bitmap_t *incompatible_waiting_types_bitmap() const + { return m_waiting_incompatible; } + virtual bool needs_notification(const MDL_ticket *ticket) const + { + return (MDL_BIT(ticket->get_type()) & MDL_BIT(MDL_BACKUP_FTWRL1)); + } + + /** + Insert delayed threads may hold DML or TRANS_DML lock. + We need to kill such threads in order to get lock for FTWRL statements. + We do this by calling code outside of MDL. + */ + virtual bool conflicting_locks(const MDL_ticket *ticket) const + { + return (MDL_BIT(ticket->get_type()) & + (MDL_BIT(MDL_BACKUP_DML) | + MDL_BIT(MDL_BACKUP_TRANS_DML))); + } + + /* + In backup namespace DML/DDL may starve because of concurrent FTWRL or + BACKUP statements. This scenario is partically useless in real world, + so we just return 0 here. + */ + virtual bitmap_t hog_lock_types_bitmap() const + { return 0; } + private: + static const bitmap_t m_granted_incompatible[MDL_BACKUP_END]; + static const bitmap_t m_waiting_incompatible[MDL_BACKUP_END]; + }; + +public: + /** The key of the object (data) being protected. */ + MDL_key key; + /** + Read-write lock protecting this lock context. + + @note The fact that we use read-write lock prefers readers here is + important as deadlock detector won't work correctly otherwise. + + For example, imagine that we have following waiters graph: + + ctxA -> obj1 -> ctxB -> obj1 -| + ^ | + |----------------------------| + + and both ctxA and ctxB start deadlock detection process: + + ctxA read-locks obj1 ctxB read-locks obj2 + ctxA goes deeper ctxB goes deeper + + Now ctxC comes in who wants to start waiting on obj1, also + ctxD comes in who wants to start waiting on obj2. + + ctxC tries to write-lock obj1 ctxD tries to write-lock obj2 + ctxC is blocked ctxD is blocked + + Now ctxA and ctxB resume their search: + + ctxA tries to read-lock obj2 ctxB tries to read-lock obj1 + + If m_rwlock prefers writes (or fair) both ctxA and ctxB would be + blocked because of pending write locks from ctxD and ctxC + correspondingly. Thus we will get a deadlock in deadlock detector. + If m_wrlock prefers readers (actually ignoring pending writers is + enough) ctxA and ctxB will continue and no deadlock will occur. + */ + mysql_prlock_t m_rwlock; + + bool is_empty() const + { + return (m_granted.is_empty() && m_waiting.is_empty()); + } + + const bitmap_t *incompatible_granted_types_bitmap() const + { return m_strategy->incompatible_granted_types_bitmap(); } + const bitmap_t *incompatible_waiting_types_bitmap() const + { return m_strategy->incompatible_waiting_types_bitmap(); } + + bool has_pending_conflicting_lock(enum_mdl_type type); + + bool can_grant_lock(enum_mdl_type type, MDL_context *requstor_ctx, + bool ignore_lock_priority) const; + + inline unsigned long get_lock_owner() const; + + void reschedule_waiters(); + + void remove_ticket(LF_PINS *pins, Ticket_list MDL_lock::*queue, + MDL_ticket *ticket); + + bool visit_subgraph(MDL_ticket *waiting_ticket, + MDL_wait_for_graph_visitor *gvisitor); + + bool needs_notification(const MDL_ticket *ticket) const + { return m_strategy->needs_notification(ticket); } + void notify_conflicting_locks(MDL_context *ctx) + { + for (const auto &conflicting_ticket : m_granted) + { + if (conflicting_ticket.get_ctx() != ctx && + m_strategy->conflicting_locks(&conflicting_ticket)) + { + MDL_context *conflicting_ctx= conflicting_ticket.get_ctx(); + + ctx->get_owner()-> + notify_shared_lock(conflicting_ctx->get_owner(), + conflicting_ctx->get_needs_thr_lock_abort()); + } + } + } + + bitmap_t hog_lock_types_bitmap() const + { return m_strategy->hog_lock_types_bitmap(); } + +#ifndef DBUG_OFF + bool check_if_conflicting_replication_locks(MDL_context *ctx); +#endif + + /** List of granted tickets for this lock. */ + Ticket_list m_granted; + /** Tickets for contexts waiting to acquire a lock. */ + Ticket_list m_waiting; + + /** + Number of times high priority lock requests have been granted while + low priority lock requests were waiting. + */ + ulong m_hog_lock_count; + +public: + + MDL_lock() + : m_hog_lock_count(0), + m_strategy(0) + { mysql_prlock_init(key_MDL_lock_rwlock, &m_rwlock); } + + MDL_lock(const MDL_key *key_arg) + : key(key_arg), + m_hog_lock_count(0), + m_strategy(&m_backup_lock_strategy) + { + DBUG_ASSERT(key_arg->mdl_namespace() == MDL_key::BACKUP); + mysql_prlock_init(key_MDL_lock_rwlock, &m_rwlock); + } + + ~MDL_lock() + { mysql_prlock_destroy(&m_rwlock); } + + static void lf_alloc_constructor(uchar *arg) + { new (arg + LF_HASH_OVERHEAD) MDL_lock(); } + + static void lf_alloc_destructor(uchar *arg) + { ((MDL_lock*)(arg + LF_HASH_OVERHEAD))->~MDL_lock(); } + + static void lf_hash_initializer(LF_HASH *hash __attribute__((unused)), + MDL_lock *lock, MDL_key *key_arg) + { + DBUG_ASSERT(key_arg->mdl_namespace() != MDL_key::BACKUP); + new (&lock->key) MDL_key(key_arg); + if (key_arg->mdl_namespace() == MDL_key::SCHEMA) + lock->m_strategy= &m_scoped_lock_strategy; + else + lock->m_strategy= &m_object_lock_strategy; + } + + const MDL_lock_strategy *m_strategy; +private: + static const MDL_backup_lock m_backup_lock_strategy; + static const MDL_scoped_lock m_scoped_lock_strategy; + static const MDL_object_lock m_object_lock_strategy; +}; + + +const MDL_lock::MDL_backup_lock MDL_lock::m_backup_lock_strategy; +const MDL_lock::MDL_scoped_lock MDL_lock::m_scoped_lock_strategy; +const MDL_lock::MDL_object_lock MDL_lock::m_object_lock_strategy; + + +static MDL_map mdl_locks; + + +extern "C" +{ +static uchar * +mdl_locks_key(const uchar *record, size_t *length, + my_bool not_used __attribute__((unused))) +{ + MDL_lock *lock=(MDL_lock*) record; + *length= lock->key.length(); + return (uchar*) lock->key.ptr(); +} +} /* extern "C" */ + + +/** + Initialize the metadata locking subsystem. + + This function is called at server startup. + + In particular, initializes the new global mutex and + the associated condition variable: LOCK_mdl and COND_mdl. + These locking primitives are implementation details of the MDL + subsystem and are private to it. +*/ + +void mdl_init() +{ + DBUG_ASSERT(! mdl_initialized); + mdl_initialized= TRUE; + +#ifdef HAVE_PSI_INTERFACE + init_mdl_psi_keys(); +#endif + + mdl_locks.init(); +} + + +/** + Release resources of metadata locking subsystem. + + Destroys the global mutex and the condition variable. + Called at server shutdown. +*/ + +void mdl_destroy() +{ + if (mdl_initialized) + { + mdl_initialized= FALSE; + mdl_locks.destroy(); + } +} + + +struct mdl_iterate_arg +{ + mdl_iterator_callback callback; + void *argument; +}; + + +static my_bool mdl_iterate_lock(MDL_lock *lock, mdl_iterate_arg *arg) +{ + /* + We can skip check for m_strategy here, becase m_granted + must be empty for such locks anyway. + */ + mysql_prlock_rdlock(&lock->m_rwlock); + bool res= std::any_of(lock->m_granted.begin(), lock->m_granted.end(), + [arg](MDL_ticket &ticket) { + return arg->callback(&ticket, arg->argument, true); + }); + res= std::any_of(lock->m_waiting.begin(), lock->m_waiting.end(), + [arg](MDL_ticket &ticket) { + return arg->callback(&ticket, arg->argument, false); + }); + mysql_prlock_unlock(&lock->m_rwlock); + return res; +} + + +int mdl_iterate(mdl_iterator_callback callback, void *arg) +{ + DBUG_ENTER("mdl_iterate"); + mdl_iterate_arg argument= { callback, arg }; + LF_PINS *pins= mdl_locks.get_pins(); + int res= 1; + + if (pins) + { + res= mdl_iterate_lock(mdl_locks.m_backup_lock, &argument) || + lf_hash_iterate(&mdl_locks.m_locks, pins, + (my_hash_walk_action) mdl_iterate_lock, &argument); + lf_hash_put_pins(pins); + } + DBUG_RETURN(res); +} + + +my_hash_value_type mdl_hash_function(CHARSET_INFO *cs, + const uchar *key, size_t length) +{ + MDL_key *mdl_key= (MDL_key*) (key - offsetof(MDL_key, m_ptr)); + return mdl_key->hash_value(); +} + + +/** Initialize the container for all MDL locks. */ + +void MDL_map::init() +{ + MDL_key backup_lock_key(MDL_key::BACKUP, "", ""); + + m_backup_lock= new (std::nothrow) MDL_lock(&backup_lock_key); + + lf_hash_init(&m_locks, sizeof(MDL_lock), LF_HASH_UNIQUE, 0, 0, + mdl_locks_key, &my_charset_bin); + m_locks.alloc.constructor= MDL_lock::lf_alloc_constructor; + m_locks.alloc.destructor= MDL_lock::lf_alloc_destructor; + m_locks.initializer= (lf_hash_initializer) MDL_lock::lf_hash_initializer; + m_locks.hash_function= mdl_hash_function; +} + + +/** + Destroy the container for all MDL locks. + @pre It must be empty. +*/ + +void MDL_map::destroy() +{ + delete m_backup_lock; + + DBUG_ASSERT(!lf_hash_size(&m_locks)); + lf_hash_destroy(&m_locks); +} + + +/** + Find MDL_lock object corresponding to the key, create it + if it does not exist. + + @retval non-NULL - Success. MDL_lock instance for the key with + locked MDL_lock::m_rwlock. + @retval NULL - Failure (OOM). +*/ + +MDL_lock* MDL_map::find_or_insert(LF_PINS *pins, const MDL_key *mdl_key) +{ + MDL_lock *lock; + + if (mdl_key->mdl_namespace() == MDL_key::BACKUP) + { + /* + Return pointer to pre-allocated MDL_lock instance. Such an optimization + allows to save one hash lookup for any statement changing data. + + It works since this namespace contains only one element so keys + for them look like '\0\0'. + */ + DBUG_ASSERT(mdl_key->length() == 3); + mysql_prlock_wrlock(&m_backup_lock->m_rwlock); + return m_backup_lock; + } + +retry: + while (!(lock= (MDL_lock*) lf_hash_search(&m_locks, pins, mdl_key->ptr(), + mdl_key->length()))) + if (lf_hash_insert(&m_locks, pins, (uchar*) mdl_key) == -1) + return NULL; + + mysql_prlock_wrlock(&lock->m_rwlock); + if (unlikely(!lock->m_strategy)) + { + mysql_prlock_unlock(&lock->m_rwlock); + lf_hash_search_unpin(pins); + goto retry; + } + lf_hash_search_unpin(pins); + + return lock; +} + + +/** + * Return thread id of the owner of the lock, if it is owned. + */ + +unsigned long +MDL_map::get_lock_owner(LF_PINS *pins, const MDL_key *mdl_key) +{ + unsigned long res= 0; + + if (mdl_key->mdl_namespace() == MDL_key::BACKUP) + { + mysql_prlock_rdlock(&m_backup_lock->m_rwlock); + res= m_backup_lock->get_lock_owner(); + mysql_prlock_unlock(&m_backup_lock->m_rwlock); + } + else + { + MDL_lock *lock= (MDL_lock*) lf_hash_search(&m_locks, pins, mdl_key->ptr(), + mdl_key->length()); + if (lock) + { + /* + We can skip check for m_strategy here, becase m_granted + must be empty for such locks anyway. + */ + mysql_prlock_rdlock(&lock->m_rwlock); + res= lock->get_lock_owner(); + mysql_prlock_unlock(&lock->m_rwlock); + lf_hash_search_unpin(pins); + } + } + return res; +} + + +/** + Destroy MDL_lock object or delegate this responsibility to + whatever thread that holds the last outstanding reference to + it. +*/ + +void MDL_map::remove(LF_PINS *pins, MDL_lock *lock) +{ + if (lock->key.mdl_namespace() == MDL_key::BACKUP) + { + /* Never destroy pre-allocated MDL_lock object in BACKUP namespace. */ + mysql_prlock_unlock(&lock->m_rwlock); + return; + } + + lock->m_strategy= 0; + mysql_prlock_unlock(&lock->m_rwlock); + lf_hash_delete(&m_locks, pins, lock->key.ptr(), lock->key.length()); +} + + +/** + Initialize a metadata locking context. + + This is to be called when a new server connection is created. +*/ + +MDL_context::MDL_context() + : + m_owner(NULL), + m_needs_thr_lock_abort(FALSE), + m_waiting_for(NULL), + m_pins(NULL) +{ + mysql_prlock_init(key_MDL_context_LOCK_waiting_for, &m_LOCK_waiting_for); +} + + +/** + Destroy metadata locking context. + + Assumes and asserts that there are no active or pending locks + associated with this context at the time of the destruction. + + Currently does nothing. Asserts that there are no pending + or satisfied lock requests. The pending locks must be released + prior to destruction. This is a new way to express the assertion + that all tables are closed before a connection is destroyed. +*/ + +void MDL_context::destroy() +{ + DBUG_ASSERT(m_tickets[MDL_STATEMENT].is_empty()); + DBUG_ASSERT(m_tickets[MDL_TRANSACTION].is_empty()); + DBUG_ASSERT(m_tickets[MDL_EXPLICIT].is_empty()); + + mysql_prlock_destroy(&m_LOCK_waiting_for); + if (m_pins) + lf_hash_put_pins(m_pins); +} + + +bool MDL_context::fix_pins() +{ + return m_pins ? false : (m_pins= mdl_locks.get_pins()) == 0; +} + + +/** + Initialize a lock request. + + This is to be used for every lock request. + + Note that initialization and allocation are split into two + calls. This is to allow flexible memory management of lock + requests. Normally a lock request is stored in statement memory + (e.g. is a member of struct TABLE_LIST), but we would also like + to allow allocation of lock requests in other memory roots, + for example in the grant subsystem, to lock privilege tables. + + The MDL subsystem does not own or manage memory of lock requests. + + @param mdl_namespace Id of namespace of object to be locked + @param db Name of database to which the object belongs + @param name Name of of the object + @param mdl_type The MDL lock type for the request. +*/ + +void MDL_request::init_with_source(MDL_key::enum_mdl_namespace mdl_namespace, + const char *db_arg, + const char *name_arg, + enum_mdl_type mdl_type_arg, + enum_mdl_duration mdl_duration_arg, + const char *src_file, + uint src_line) +{ + key.mdl_key_init(mdl_namespace, db_arg, name_arg); + type= mdl_type_arg; + duration= mdl_duration_arg; + ticket= NULL; + m_src_file= src_file; + m_src_line= src_line; +} + + +/** + Initialize a lock request using pre-built MDL_key. + + @sa MDL_request::init(namespace, db, name, type). + + @param key_arg The pre-built MDL key for the request. + @param mdl_type_arg The MDL lock type for the request. +*/ + +void MDL_request::init_by_key_with_source(const MDL_key *key_arg, + enum_mdl_type mdl_type_arg, + enum_mdl_duration mdl_duration_arg, + const char *src_file, + uint src_line) +{ + key.mdl_key_init(key_arg); + type= mdl_type_arg; + duration= mdl_duration_arg; + ticket= NULL; + m_src_file= src_file; + m_src_line= src_line; +} + + +/** + Auxiliary functions needed for creation/destruction of MDL_ticket + objects. + + @todo This naive implementation should be replaced with one that saves + on memory allocation by reusing released objects. +*/ + +MDL_ticket *MDL_ticket::create(MDL_context *ctx_arg, enum_mdl_type type_arg +#ifndef DBUG_OFF + , enum_mdl_duration duration_arg +#endif + ) +{ + return new (std::nothrow) + MDL_ticket(ctx_arg, type_arg +#ifndef DBUG_OFF + , duration_arg +#endif + ); +} + + +void MDL_ticket::destroy(MDL_ticket *ticket) +{ + mysql_mdl_destroy(ticket->m_psi); + ticket->m_psi= NULL; + + delete ticket; +} + + +/** + Return the 'weight' of this ticket for the + victim selection algorithm. Requests with + lower weight are preferred to requests + with higher weight when choosing a victim. +*/ + +uint MDL_ticket::get_deadlock_weight() const +{ + if (m_lock->key.mdl_namespace() == MDL_key::BACKUP) + { + if (m_type == MDL_BACKUP_FTWRL1) + return DEADLOCK_WEIGHT_FTWRL1; + return DEADLOCK_WEIGHT_DDL; + } + return m_type >= MDL_SHARED_UPGRADABLE ? + DEADLOCK_WEIGHT_DDL : DEADLOCK_WEIGHT_DML; +} + + +/** Construct an empty wait slot. */ + +MDL_wait::MDL_wait() + :m_wait_status(EMPTY) +{ + mysql_mutex_init(key_MDL_wait_LOCK_wait_status, &m_LOCK_wait_status, NULL); + mysql_cond_init(key_MDL_wait_COND_wait_status, &m_COND_wait_status, NULL); +} + + +/** Destroy system resources. */ + +MDL_wait::~MDL_wait() +{ + mysql_mutex_destroy(&m_LOCK_wait_status); + mysql_cond_destroy(&m_COND_wait_status); +} + + +/** + Set the status unless it's already set. Return FALSE if set, + TRUE otherwise. +*/ + +bool MDL_wait::set_status(enum_wait_status status_arg) +{ + bool was_occupied= TRUE; + mysql_mutex_lock(&m_LOCK_wait_status); + if (m_wait_status == EMPTY) + { + was_occupied= FALSE; + m_wait_status= status_arg; + mysql_cond_signal(&m_COND_wait_status); + } + mysql_mutex_unlock(&m_LOCK_wait_status); + return was_occupied; +} + + +/** Query the current value of the wait slot. */ + +MDL_wait::enum_wait_status MDL_wait::get_status() +{ + enum_wait_status result; + mysql_mutex_lock(&m_LOCK_wait_status); + result= m_wait_status; + mysql_mutex_unlock(&m_LOCK_wait_status); + return result; +} + + +/** Clear the current value of the wait slot. */ + +void MDL_wait::reset_status() +{ + mysql_mutex_lock(&m_LOCK_wait_status); + m_wait_status= EMPTY; + mysql_mutex_unlock(&m_LOCK_wait_status); +} + + +/** + Wait for the status to be assigned to this wait slot. + + @param owner MDL context owner. + @param abs_timeout Absolute time after which waiting should stop. + @param set_status_on_timeout TRUE - If in case of timeout waiting + context should close the wait slot by + sending TIMEOUT to itself. + FALSE - Otherwise. + @param wait_state_name Thread state name to be set for duration of wait. + + @returns Signal posted. +*/ + +MDL_wait::enum_wait_status +MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout, + bool set_status_on_timeout, + const PSI_stage_info *wait_state_name) +{ + PSI_stage_info old_stage; + enum_wait_status result; + int wait_result= 0; + DBUG_ENTER("MDL_wait::timed_wait"); + + mysql_mutex_lock(&m_LOCK_wait_status); + + owner->ENTER_COND(&m_COND_wait_status, &m_LOCK_wait_status, + wait_state_name, & old_stage); + thd_wait_begin(NULL, THD_WAIT_META_DATA_LOCK); + tpool::tpool_wait_begin(); + while (!m_wait_status && !owner->is_killed() && + wait_result != ETIMEDOUT && wait_result != ETIME) + { +#ifdef WITH_WSREP +# ifdef ENABLED_DEBUG_SYNC + // Allow tests to block the applier thread using the DBUG facilities + DBUG_EXECUTE_IF("sync.wsrep_before_mdl_wait", + { + const char act[]= + "now " + "wait_for signal.wsrep_before_mdl_wait"; + DBUG_ASSERT(!debug_sync_set_action((owner->get_thd()), + STRING_WITH_LEN(act))); + };); +# endif + if (WSREP_ON && wsrep_thd_is_BF(owner->get_thd(), false)) + { + wait_result= mysql_cond_wait(&m_COND_wait_status, &m_LOCK_wait_status); + } + else +#endif /* WITH_WSREP */ + wait_result= mysql_cond_timedwait(&m_COND_wait_status, &m_LOCK_wait_status, + abs_timeout); + } + tpool::tpool_wait_end(); + thd_wait_end(NULL); + + if (m_wait_status == EMPTY) + { + /* + Wait has ended not due to a status being set from another + thread but due to this connection/statement being killed or a + time out. + To avoid races, which may occur if another thread sets + GRANTED status before the code which calls this method + processes the abort/timeout, we assign the status under + protection of the m_LOCK_wait_status, within the critical + section. An exception is when set_status_on_timeout is + false, which means that the caller intends to restart the + wait. + */ + if (owner->is_killed()) + m_wait_status= KILLED; + else if (set_status_on_timeout) + m_wait_status= TIMEOUT; + } + result= m_wait_status; + + owner->EXIT_COND(& old_stage); + + DBUG_RETURN(result); +} + + +/** + Add ticket to MDL_lock's list of waiting requests and + update corresponding bitmap of lock types. +*/ + +void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) +{ + /* + Ticket being added to the list must have MDL_ticket::m_lock set, + since for such tickets methods accessing this member might be + called by other threads. + */ + DBUG_ASSERT(ticket->get_lock()); +#ifdef WITH_WSREP + if (WSREP_ON && (this == &(ticket->get_lock()->m_waiting)) && + wsrep_thd_is_BF(ticket->get_ctx()->get_thd(), false)) + { + DBUG_ASSERT(WSREP(ticket->get_ctx()->get_thd())); + + m_list.insert(std::find_if(ticket->get_lock()->m_waiting.begin(), + ticket->get_lock()->m_waiting.end(), + [](const MDL_ticket &waiting) { + return !wsrep_thd_is_BF( + waiting.get_ctx()->get_thd(), true); + }), + *ticket); + } + else +#endif /* WITH_WSREP */ + { + /* + Add ticket to the *back* of the queue to ensure fairness + among requests with the same priority. + */ + m_list.push_back(*ticket); + } + m_bitmap|= MDL_BIT(ticket->get_type()); + m_type_counters[ticket->get_type()]++; +} + + +/** + Remove ticket from MDL_lock's list of requests and + update corresponding bitmap of lock types. +*/ + +void MDL_lock::Ticket_list::remove_ticket(MDL_ticket *ticket) +{ + m_list.remove(*ticket); + /* + Check if waiting queue has another ticket with the same type as + one which was removed. If there is no such ticket, i.e. we have + removed last ticket of particular type, then we need to update + bitmap of waiting ticket's types. + */ + if (--m_type_counters[ticket->get_type()] == 0) + m_bitmap&= ~MDL_BIT(ticket->get_type()); +} + + +/** + Determine waiting contexts which requests for the lock can be + satisfied, grant lock to them and wake them up. + + @note Together with MDL_lock::add_ticket() this method implements + fair scheduling among requests with the same priority. + It tries to grant lock from the head of waiters list, while + add_ticket() adds new requests to the back of this list. + +*/ + +void MDL_lock::reschedule_waiters() +{ + bool skip_high_priority= false; + bitmap_t hog_lock_types= hog_lock_types_bitmap(); + + if (m_hog_lock_count >= max_write_lock_count) + { + /* + If number of successively granted high-prio, strong locks has exceeded + max_write_lock_count give a way to low-prio, weak locks to avoid their + starvation. + */ + + if ((m_waiting.bitmap() & ~hog_lock_types) != 0) + { + /* + Even though normally when m_hog_lock_count is non-0 there is + some pending low-prio lock, we still can encounter situation + when m_hog_lock_count is non-0 and there are no pending low-prio + locks. This, for example, can happen when a ticket for pending + low-prio lock was removed from waiters list due to timeout, + and reschedule_waiters() is called after that to update the + waiters queue. m_hog_lock_count will be reset to 0 at the + end of this call in such case. + + Note that it is not an issue if we fail to wake up any pending + waiters for weak locks in the loop below. This would mean that + all of them are either killed, timed out or chosen as a victim + by deadlock resolver, but have not managed to remove ticket + from the waiters list yet. After tickets will be removed from + the waiters queue there will be another call to + reschedule_waiters() with pending bitmap updated to reflect new + state of waiters queue. + */ + skip_high_priority= true; + } + } + + /* + Find the first (and hence the oldest) waiting request which + can be satisfied (taking into account priority). Grant lock to it. + Repeat the process for the remainder of waiters. + Note we don't need to re-start iteration from the head of the + list after satisfying the first suitable request as in our case + all compatible types of requests have the same priority. + + TODO/FIXME: We should: + - Either switch to scheduling without priorities + which will allow to stop iteration through the + list of waiters once we found the first ticket + which can't be satisfied + - Or implement some check using bitmaps which will + allow to stop iteration in cases when, e.g., we + grant SNRW lock and there are no pending S or + SH locks. + */ + for (auto it= m_waiting.begin(); it != m_waiting.end(); ++it) + { + /* + Skip high-prio, strong locks if earlier we have decided to give way to + low-prio, weaker locks. + */ + if (skip_high_priority && + ((MDL_BIT(it->get_type()) & hog_lock_types) != 0)) + continue; + + if (can_grant_lock(it->get_type(), it->get_ctx(), + skip_high_priority)) + { + if (!it->get_ctx()->m_wait.set_status(MDL_wait::GRANTED)) + { + /* + Satisfy the found request by updating lock structures. + It is OK to do so even after waking up the waiter since any + session which tries to get any information about the state of + this lock has to acquire MDL_lock::m_rwlock first and thus, + when manages to do so, already sees an updated state of the + MDL_lock object. + */ + auto prev_it= std::prev(it); // this might be begin()-- but the hack + // works because list is circular + m_waiting.remove_ticket(&*it); + m_granted.add_ticket(&*it); + + /* + Increase counter of successively granted high-priority strong locks, + if we have granted one. + */ + if ((MDL_BIT(it->get_type()) & hog_lock_types) != 0) + m_hog_lock_count++; + + it= prev_it; + } + /* + If we could not update the wait slot of the waiter, + it can be due to fact that its connection/statement was + killed or it has timed out (i.e. the slot is not empty). + Since in all such cases the waiter assumes that the lock was + not been granted, we should keep the request in the waiting + queue and look for another request to reschedule. + */ + } + } + + if ((m_waiting.bitmap() & ~hog_lock_types) == 0) + { + /* + Reset number of successively granted high-prio, strong locks + if there are no pending low-prio, weak locks. + This ensures: + - That m_hog_lock_count is correctly reset after strong lock + is released and weak locks are granted (or there are no + other lock requests). + - That situation when SNW lock is granted along with some SR + locks, but SW locks are still blocked are handled correctly. + - That m_hog_lock_count is zero in most cases when there are no pending + weak locks (see comment at the start of this method for example of + exception). This allows to save on checks at the start of this method. + */ + m_hog_lock_count= 0; + } +} + + +/** + Compatibility (or rather "incompatibility") matrices for scoped metadata + lock. + Scoped locks are database (or schema) locks. + Arrays of bitmaps which elements specify which granted/waiting locks + are incompatible with type of lock being requested. + + The first array specifies if particular type of request can be satisfied + if there is granted scoped lock of certain type. + + (*) Since intention shared scoped locks (IS) are compatible with all other + type of locks, they don't need to be implemented and there is no code + for them. + + | Type of active | + Request | scoped lock | + type | IS(*) IX S X | + ---------+------------------+ + IS(*) | + + + + | + IX | + + - - | + S | + - + - | + X | + - - - | + + The second array specifies if particular type of request can be satisfied + if there is already waiting request for the scoped lock of certain type. + I.e. it specifies what is the priority of different lock types. + + | Pending | + Request | scoped lock | + type | IS(*) IX S X | + ---------+-----------------+ + IS(*) | + + + + | + IX | + + - - | + S | + + + - | + X | + + + + | + + Here: "+" -- means that request can be satisfied + "-" -- means that request can't be satisfied and should wait + + Note that relation between scoped locks and objects locks requested + by statement is not straightforward and is therefore fully defined + by SQL-layer. + For example, in order to support global read lock implementation + SQL-layer acquires IX lock in GLOBAL namespace for each statement + that can modify metadata or data (i.e. for each statement that + needs SW, SU, SNW, SNRW or X object locks). OTOH, to ensure that + DROP DATABASE works correctly with concurrent DDL, IX metadata locks + in SCHEMA namespace are acquired for DDL statements which can update + metadata in the schema (i.e. which acquire SU, SNW, SNRW and X locks + on schema objects) and aren't acquired for DML. +*/ + +const MDL_lock::bitmap_t +MDL_lock::MDL_scoped_lock::m_granted_incompatible[MDL_TYPE_END]= +{ + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_INTENTION_EXCLUSIVE), + 0, 0, 0, 0, 0, 0, 0, + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED) | MDL_BIT(MDL_INTENTION_EXCLUSIVE) +}; + +const MDL_lock::bitmap_t +MDL_lock::MDL_scoped_lock::m_waiting_incompatible[MDL_TYPE_END]= +{ + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED), + MDL_BIT(MDL_EXCLUSIVE), 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +/** + Compatibility (or rather "incompatibility") matrices for per-object + metadata lock. Arrays of bitmaps which elements specify which granted/ + waiting locks are incompatible with type of lock being requested. + + The first array specifies if particular type of request can be satisfied + if there is granted lock of certain type. + + Request | Granted requests for lock | + type | S SH SR SW SU SRO SNW SNRW X | + ----------+------------------------------------+ + S | + + + + + + + + - | + SH | + + + + + + + + - | + SR | + + + + + + + - - | + SW | + + + + + - - - - | + SU | + + + + - + - - - | + SRO | + + + - + + + - - | + SNW | + + + - - + - - - | + SNRW | + + - - - - - - - | + X | - - - - - - - - - | + SU -> X | - - - - 0 - 0 0 0 | + SNW -> X | - - - 0 0 - 0 0 0 | + SNRW -> X | - - 0 0 0 0 0 0 0 | + + The second array specifies if particular type of request can be satisfied + if there is waiting request for the same lock of certain type. In other + words it specifies what is the priority of different lock types. + + Request | Pending requests for lock | + type | S SH SR SW SU SRO SNW SNRW X | + ----------+-----------------------------------+ + S | + + + + + + + + - | + SH | + + + + + + + + + | + SR | + + + + + + + - - | + SW | + + + + + + - - - | + SU | + + + + + + + + - | + SRO | + + + - + + + - - | + SNW | + + + + + + + + - | + SNRW | + + + + + + + + - | + X | + + + + + + + + + | + SU -> X | + + + + + + + + + | + SNW -> X | + + + + + + + + + | + SNRW -> X | + + + + + + + + + | + + Here: "+" -- means that request can be satisfied + "-" -- means that request can't be satisfied and should wait + "0" -- means impossible situation which will trigger assert + + @note In cases then current context already has "stronger" type + of lock on the object it will be automatically granted + thanks to usage of the MDL_context::find_ticket() method. + + @note IX locks are excluded since they are not used for per-object + metadata locks. +*/ + +const MDL_lock::bitmap_t +MDL_lock::MDL_object_lock::m_granted_incompatible[MDL_TYPE_END]= +{ + 0, + MDL_BIT(MDL_EXCLUSIVE), + MDL_BIT(MDL_EXCLUSIVE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_WRITE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE) | + MDL_BIT(MDL_SHARED_WRITE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY) | + MDL_BIT(MDL_SHARED_UPGRADABLE) | MDL_BIT(MDL_SHARED_WRITE) | + MDL_BIT(MDL_SHARED_READ), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY) | + MDL_BIT(MDL_SHARED_UPGRADABLE) | MDL_BIT(MDL_SHARED_WRITE) | + MDL_BIT(MDL_SHARED_READ) | MDL_BIT(MDL_SHARED_HIGH_PRIO) | + MDL_BIT(MDL_SHARED) +}; + + +const MDL_lock::bitmap_t +MDL_lock::MDL_object_lock::m_waiting_incompatible[MDL_TYPE_END]= +{ + 0, + MDL_BIT(MDL_EXCLUSIVE), + 0, + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_NO_WRITE), + MDL_BIT(MDL_EXCLUSIVE), + MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) | + MDL_BIT(MDL_SHARED_WRITE), + MDL_BIT(MDL_EXCLUSIVE), + MDL_BIT(MDL_EXCLUSIVE), + 0 +}; + + +/** + Compatibility (or rather "incompatibility") matrices for backup metadata + lock. Arrays of bitmaps which elements specify which granted/waiting locks + are incompatible with type of lock being requested. + + The first array specifies if particular type of request can be satisfied + if there is granted backup lock of certain type. + + Request | Type of active backup lock | + type | S0 S1 S2 S3 S4 F1 F2 D TD SD DD BL AC C | + ----------+---------------------------------------------------------+ + S0 | - - - - - + + + + + + + + + | + S1 | - + + + + + + + + + + + + + | + S2 | - + + + + + + - + + + + + + | + S3 | - + + + + + + - + + - + + + | + S4 | - + + + + + + - + - - + + - | + FTWRL1 | + + + + + + + - - - - + - + | + FTWRL2 | + + + + + + + - - - - + - - | + D | + - - - - - - + + + + + + + | + TD | + + + + + - - + + + + + + + | + SD | + + + + - - - + + + + + + + | + DDL | + + + - - - - + + + + - + + | + BLOCK_DDL | - + + + + + + + + + - + + + | + ALTER_COP | + + + + + - - + + + + + + + | + COMMIT | + + + + - + - + + + + + + + | + + The second array specifies if particular type of request can be satisfied + if there is already waiting request for the backup lock of certain type. + I.e. it specifies what is the priority of different lock types. + + Request | Pending backup lock | + type | S0 S1 S2 S3 S4 F1 F2 D TD SD DD BL AC C | + ----------+---------------------------------------------------------+ + S0 | + - - - - + + + + + + + + + | + S1 | + + + + + + + + + + + + + + | + S2 | + + + + + + + + + + + + + + | + S3 | + + + + + + + + + + + + + + | + S4 | + + + + + + + + + + + + + + | + FTWRL1 | + + + + + + + + + + + + + + | + FTWRL2 | + + + + + + + + + + + + + + | + D | + - - - - - - + + + + + + + | + TD | + + + + + - - + + + + + + + | + SD | + + + + - - - + + + + + + + | + DDL | + + + - - - - + + + + - + + | + BLOCK_DDL | + + + + + + + + + + + + + + | + ALTER_COP | + + + + + - - + + + + + + + | + COMMIT | + + + + - + - + + + + + + + | + + Here: "+" -- means that request can be satisfied + "-" -- means that request can't be satisfied and should wait +*/ + +/* + NOTE: If you add a new MDL_BACKUP_XXX level lock, you have to also add it + to MDL_BACKUP_START in the two arrays below! +*/ + +const MDL_lock::bitmap_t +MDL_lock::MDL_backup_lock::m_granted_incompatible[MDL_BACKUP_END]= +{ + /* MDL_BACKUP_START */ + MDL_BIT(MDL_BACKUP_START) | MDL_BIT(MDL_BACKUP_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_BLOCK_DDL), + MDL_BIT(MDL_BACKUP_START), + MDL_BIT(MDL_BACKUP_START) | MDL_BIT(MDL_BACKUP_DML), + MDL_BIT(MDL_BACKUP_START) | MDL_BIT(MDL_BACKUP_DML) | MDL_BIT(MDL_BACKUP_DDL), + MDL_BIT(MDL_BACKUP_START) | MDL_BIT(MDL_BACKUP_DML) | MDL_BIT(MDL_BACKUP_SYS_DML) | MDL_BIT(MDL_BACKUP_DDL) | MDL_BIT(MDL_BACKUP_COMMIT), + + /* MDL_BACKUP_FTWRL1 */ + MDL_BIT(MDL_BACKUP_DML) | MDL_BIT(MDL_BACKUP_TRANS_DML) | MDL_BIT(MDL_BACKUP_SYS_DML) | MDL_BIT(MDL_BACKUP_DDL) | MDL_BIT(MDL_BACKUP_ALTER_COPY), + MDL_BIT(MDL_BACKUP_DML) | MDL_BIT(MDL_BACKUP_TRANS_DML) | MDL_BIT(MDL_BACKUP_SYS_DML) | MDL_BIT(MDL_BACKUP_DDL) | MDL_BIT(MDL_BACKUP_ALTER_COPY) | MDL_BIT(MDL_BACKUP_COMMIT), + /* MDL_BACKUP_DML */ + MDL_BIT(MDL_BACKUP_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + /* MDL_BACKUP_DDL */ + MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2) | MDL_BIT(MDL_BACKUP_BLOCK_DDL), + /* MDL_BACKUP_BLOCK_DDL */ + MDL_BIT(MDL_BACKUP_START) | MDL_BIT(MDL_BACKUP_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_BLOCK_DDL) | MDL_BIT(MDL_BACKUP_DDL), + MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + /* MDL_BACKUP_COMMIT */ + MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL2) +}; + + +const MDL_lock::bitmap_t +MDL_lock::MDL_backup_lock::m_waiting_incompatible[MDL_BACKUP_END]= +{ + /* MDL_BACKUP_START */ + MDL_BIT(MDL_BACKUP_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_BLOCK_DDL), + 0, + 0, + 0, + 0, + /* MDL_BACKUP_FTWRL1 */ + 0, + 0, + + /* MDL_BACKUP_DML */ + MDL_BIT(MDL_BACKUP_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_FLUSH) | MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + /* MDL_BACKUP_DDL */ + MDL_BIT(MDL_BACKUP_WAIT_DDL) | MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2) | MDL_BIT(MDL_BACKUP_BLOCK_DDL), + /* MDL_BACKUP_BLOCK_DDL */ + MDL_BIT(MDL_BACKUP_START), + MDL_BIT(MDL_BACKUP_FTWRL1) | MDL_BIT(MDL_BACKUP_FTWRL2), + /* MDL_BACKUP_COMMIT */ + MDL_BIT(MDL_BACKUP_WAIT_COMMIT) | MDL_BIT(MDL_BACKUP_FTWRL2) +}; + + +/** + Check if request for the metadata lock can be satisfied given its + current state. + + New lock request can be satisfied iff: + - There are no incompatible types of satisfied requests + in other contexts + - There are no waiting requests which have higher priority + than this request when priority was not ignored. + + @param type_arg The requested lock type. + @param requestor_ctx The MDL context of the requestor. + @param ignore_lock_priority Ignore lock priority. + + @retval TRUE Lock request can be satisfied + @retval FALSE There is some conflicting lock. + + @note In cases then current context already has "stronger" type + of lock on the object it will be automatically granted + thanks to usage of the MDL_context::find_ticket() method. +*/ + +bool +MDL_lock::can_grant_lock(enum_mdl_type type_arg, + MDL_context *requestor_ctx, + bool ignore_lock_priority) const +{ + bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg]; + bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg]; + +#ifdef WITH_WSREP + /* + Approve lock request in BACKUP namespace for BF threads. + */ + if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) && + (wsrep_thd_is_toi(requestor_ctx->get_thd()) || + wsrep_thd_is_applying(requestor_ctx->get_thd())) && + key.mdl_namespace() == MDL_key::BACKUP) + { + bool waiting_incompatible= m_waiting.bitmap() & waiting_incompat_map; + bool granted_incompatible= m_granted.bitmap() & granted_incompat_map; + if (waiting_incompatible || granted_incompatible) + { + WSREP_DEBUG("global lock granted for BF%s: %lu %s", + waiting_incompatible ? " (waiting queue)" : "", + thd_get_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + } + return true; + } +#endif /* WITH_WSREP */ + + if (!ignore_lock_priority && (m_waiting.bitmap() & waiting_incompat_map)) + return false; + + if (m_granted.bitmap() & granted_incompat_map) + { + bool can_grant= true; + + /* Check that the incompatible lock belongs to some other context. */ + for (const auto &ticket : m_granted) + { + if (ticket.get_ctx() != requestor_ctx && + ticket.is_incompatible_when_granted(type_arg)) + { + can_grant= false; +#ifdef WITH_WSREP + /* + non WSREP threads must report conflict immediately + note: RSU processing wsrep threads, have wsrep_on==OFF + */ + if (WSREP(requestor_ctx->get_thd()) || + requestor_ctx->get_thd()->wsrep_cs().mode() == + wsrep::client_state::m_rsu) + { + wsrep_handle_mdl_conflict(requestor_ctx, &ticket, &key); + if (wsrep_log_conflicts) + { + auto key= ticket.get_key(); + WSREP_INFO("MDL conflict db=%s table=%s ticket=%d solved by abort", + key->db_name(), key->name(), ticket.get_type()); + } + continue; + } +#endif /* WITH_WSREP */ + break; + } + } + return can_grant; + } + return true; +} + + +/** + Return thread id of the thread to which the first ticket was + granted. +*/ + +inline unsigned long +MDL_lock::get_lock_owner() const +{ + if (m_granted.is_empty()) + return 0; + + return m_granted.begin()->get_ctx()->get_thread_id(); +} + + +/** Remove a ticket from waiting or pending queue and wakeup up waiters. */ + +void MDL_lock::remove_ticket(LF_PINS *pins, Ticket_list MDL_lock::*list, + MDL_ticket *ticket) +{ + mysql_prlock_wrlock(&m_rwlock); + (this->*list).remove_ticket(ticket); + if (is_empty()) + mdl_locks.remove(pins, this); + else + { + /* + There can be some contexts waiting to acquire a lock + which now might be able to do it. Grant the lock to + them and wake them up! + + We always try to reschedule locks, since there is no easy way + (i.e. by looking at the bitmaps) to find out whether it is + required or not. + In a general case, even when the queue's bitmap is not changed + after removal of the ticket, there is a chance that some request + can be satisfied (due to the fact that a granted request + reflected in the bitmap might belong to the same context as a + pending request). + */ + reschedule_waiters(); + mysql_prlock_unlock(&m_rwlock); + } +} + + +/** + Check if we have any pending locks which conflict with existing + shared lock. + + @pre The ticket must match an acquired lock. + + @return TRUE if there is a conflicting lock request, FALSE otherwise. +*/ + +bool MDL_lock::has_pending_conflicting_lock(enum_mdl_type type) +{ + bool result; + + mysql_prlock_rdlock(&m_rwlock); + result= (m_waiting.bitmap() & incompatible_granted_types_bitmap()[type]); + mysql_prlock_unlock(&m_rwlock); + return result; +} + + +MDL_wait_for_graph_visitor::~MDL_wait_for_graph_visitor() += default; + + +MDL_wait_for_subgraph::~MDL_wait_for_subgraph() += default; + +/** + Check if ticket represents metadata lock of "stronger" or equal type + than specified one. I.e. if metadata lock represented by ticket won't + allow any of locks which are not allowed by specified type of lock. + + @return TRUE if ticket has stronger or equal type + FALSE otherwise. +*/ + +bool MDL_ticket::has_stronger_or_equal_type(enum_mdl_type type) const +{ + const MDL_lock::bitmap_t * + granted_incompat_map= m_lock->incompatible_granted_types_bitmap(); + + return ! (granted_incompat_map[type] & ~(granted_incompat_map[m_type])); +} + + +bool MDL_ticket::is_incompatible_when_granted(enum_mdl_type type) const +{ + return (MDL_BIT(m_type) & + m_lock->incompatible_granted_types_bitmap()[type]); +} + + +bool MDL_ticket::is_incompatible_when_waiting(enum_mdl_type type) const +{ + return (MDL_BIT(m_type) & + m_lock->incompatible_waiting_types_bitmap()[type]); +} + + +static const LEX_STRING +*get_mdl_lock_name(MDL_key::enum_mdl_namespace mdl_namespace, + enum_mdl_type type) +{ + return mdl_namespace == MDL_key::BACKUP ? + &backup_lock_types[type] : + &lock_types[type]; +} + + +const LEX_STRING *MDL_ticket::get_type_name() const +{ + return get_mdl_lock_name(get_key()->mdl_namespace(), m_type); +} + +const LEX_STRING *MDL_ticket::get_type_name(enum_mdl_type type) const +{ + return get_mdl_lock_name(get_key()->mdl_namespace(), type); +} + + +/** + Check whether the context already holds a compatible lock ticket + on an object. + Start searching from list of locks for the same duration as lock + being requested. If not look at lists for other durations. + + @param mdl_request Lock request object for lock to be acquired + @param[out] result_duration Duration of lock which was found. + + @note Tickets which correspond to lock types "stronger" than one + being requested are also considered compatible. + + @return A pointer to the lock ticket for the object or NULL otherwise. +*/ + +MDL_ticket * +MDL_context::find_ticket(MDL_request *mdl_request, + enum_mdl_duration *result_duration) +{ + MDL_ticket *ticket; + int i; + + for (i= 0; i < MDL_DURATION_END; i++) + { + enum_mdl_duration duration= (enum_mdl_duration)((mdl_request->duration+i) % + MDL_DURATION_END); + Ticket_iterator it(m_tickets[duration]); + + while ((ticket= it++)) + { + if (mdl_request->key.is_equal(&ticket->m_lock->key) && + ticket->has_stronger_or_equal_type(mdl_request->type)) + { + DBUG_PRINT("info", ("Adding mdl lock %s to %s", + get_mdl_lock_name(mdl_request->key.mdl_namespace(), + mdl_request->type)->str, + ticket->get_type_name()->str)); + *result_duration= duration; + return ticket; + } + } + } + return NULL; +} + + +/** + Try to acquire one lock. + + Unlike exclusive locks, shared locks are acquired one by + one. This is interface is chosen to simplify introduction of + the new locking API to the system. MDL_context::try_acquire_lock() + is currently used from open_table(), and there we have only one + table to work with. + + This function may also be used to try to acquire an exclusive + lock on a destination table, by ALTER TABLE ... RENAME. + + Returns immediately without any side effect if encounters a lock + conflict. Otherwise takes the lock. + + FIXME: Compared to lock_table_name_if_not_cached() (from 5.1) + it gives slightly more false negatives. + + @param mdl_request [in/out] Lock request object for lock to be acquired + + @retval FALSE Success. The lock may have not been acquired. + Check the ticket, if it's NULL, a conflicting lock + exists. + @retval TRUE Out of resources, an error has been reported. +*/ + +bool +MDL_context::try_acquire_lock(MDL_request *mdl_request) +{ + MDL_ticket *ticket; + + if (try_acquire_lock_impl(mdl_request, &ticket)) + return TRUE; + + if (! mdl_request->ticket) + { + /* + Our attempt to acquire lock without waiting has failed. + Let us release resources which were acquired in the process. + We can't get here if we allocated a new lock object so there + is no need to release it. + */ + DBUG_ASSERT(! ticket->m_lock->is_empty()); + mysql_prlock_unlock(&ticket->m_lock->m_rwlock); + MDL_ticket::destroy(ticket); + } + + return FALSE; +} + + +/** + Auxiliary method for acquiring lock without waiting. + + @param mdl_request [in/out] Lock request object for lock to be acquired + @param out_ticket [out] Ticket for the request in case when lock + has not been acquired. + + @retval FALSE Success. The lock may have not been acquired. + Check MDL_request::ticket, if it's NULL, a conflicting + lock exists. In this case "out_ticket" out parameter + points to ticket which was constructed for the request. + MDL_ticket::m_lock points to the corresponding MDL_lock + object and MDL_lock::m_rwlock write-locked. + @retval TRUE Out of resources, an error has been reported. +*/ + +bool +MDL_context::try_acquire_lock_impl(MDL_request *mdl_request, + MDL_ticket **out_ticket) +{ + MDL_lock *lock; + MDL_key *key= &mdl_request->key; + MDL_ticket *ticket; + enum_mdl_duration found_duration; + + /* Don't take chances in production. */ + DBUG_ASSERT(mdl_request->ticket == NULL); + mdl_request->ticket= NULL; + + /* + Check whether the context already holds a shared lock on the object, + and if so, grant the request. + */ + if ((ticket= find_ticket(mdl_request, &found_duration))) + { + DBUG_ASSERT(ticket->m_lock); + DBUG_ASSERT(ticket->has_stronger_or_equal_type(mdl_request->type)); + /* + If the request is for a transactional lock, and we found + a transactional lock, just reuse the found ticket. + + It's possible that we found a transactional lock, + but the request is for a HANDLER lock. In that case HANDLER + code will clone the ticket (see below why it's needed). + + If the request is for a transactional lock, and we found + a HANDLER lock, create a copy, to make sure that when user + does HANDLER CLOSE, the transactional lock is not released. + + If the request is for a handler lock, and we found a + HANDLER lock, also do the clone. HANDLER CLOSE for one alias + should not release the lock on the table HANDLER opened through + a different alias. + */ + mdl_request->ticket= ticket; + if ((found_duration != mdl_request->duration || + mdl_request->duration == MDL_EXPLICIT) && + clone_ticket(mdl_request)) + { + /* Clone failed. */ + mdl_request->ticket= NULL; + return TRUE; + } + return FALSE; + } + + if (fix_pins()) + return TRUE; + + if (!(ticket= MDL_ticket::create(this, mdl_request->type +#ifndef DBUG_OFF + , mdl_request->duration +#endif + ))) + return TRUE; + + /* The below call implicitly locks MDL_lock::m_rwlock on success. */ + if (!(lock= mdl_locks.find_or_insert(m_pins, key))) + { + MDL_ticket::destroy(ticket); + return TRUE; + } + + DBUG_ASSERT(ticket->m_psi == NULL); + ticket->m_psi= mysql_mdl_create(ticket, + &mdl_request->key, + mdl_request->type, + mdl_request->duration, + MDL_ticket::PENDING, + mdl_request->m_src_file, + mdl_request->m_src_line); + + ticket->m_lock= lock; + + if (lock->can_grant_lock(mdl_request->type, this, false)) + { + lock->m_granted.add_ticket(ticket); + + mysql_prlock_unlock(&lock->m_rwlock); + + m_tickets[mdl_request->duration].push_front(ticket); + + mdl_request->ticket= ticket; + + mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED); + } + else + *out_ticket= ticket; + + return FALSE; +} + + +/** + Create a copy of a granted ticket. + This is used to make sure that HANDLER ticket + is never shared with a ticket that belongs to + a transaction, so that when we HANDLER CLOSE, + we don't release a transactional ticket, and + vice versa -- when we COMMIT, we don't mistakenly + release a ticket for an open HANDLER. + + @retval TRUE Out of memory. + @retval FALSE Success. +*/ + +bool +MDL_context::clone_ticket(MDL_request *mdl_request) +{ + MDL_ticket *ticket; + + + /* + Since in theory we can clone ticket belonging to a different context + we need to prepare target context for possible attempts to release + lock and thus possible removal of MDL_lock from MDL_map container. + So we allocate pins to be able to work with this container if they + are not allocated already. + */ + if (fix_pins()) + return TRUE; + + /* + By submitting mdl_request->type to MDL_ticket::create() + we effectively downgrade the cloned lock to the level of + the request. + */ + if (!(ticket= MDL_ticket::create(this, mdl_request->type +#ifndef DBUG_OFF + , mdl_request->duration +#endif + ))) + return TRUE; + + DBUG_ASSERT(ticket->m_psi == NULL); + ticket->m_psi= mysql_mdl_create(ticket, + &mdl_request->key, + mdl_request->type, + mdl_request->duration, + MDL_ticket::PENDING, + mdl_request->m_src_file, + mdl_request->m_src_line); + + /* clone() is not supposed to be used to get a stronger lock. */ + DBUG_ASSERT(mdl_request->ticket->has_stronger_or_equal_type(ticket->m_type)); + + ticket->m_lock= mdl_request->ticket->m_lock; + mdl_request->ticket= ticket; + + mysql_prlock_wrlock(&ticket->m_lock->m_rwlock); + ticket->m_lock->m_granted.add_ticket(ticket); + mysql_prlock_unlock(&ticket->m_lock->m_rwlock); + + m_tickets[mdl_request->duration].push_front(ticket); + + mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED); + + return FALSE; +} + + +/** + Check if there is any conflicting lock that could cause this thread + to wait for another thread which is not ready to commit. + This is always an error, as the upper level of parallel replication + should not allow a scheduling of a conflicting DDL until all earlier + transactions have been committed. + + This function is only called for a slave using parallel replication + and trying to get an exclusive lock for the table. +*/ + +#ifndef DBUG_OFF +bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx) +{ + rpl_group_info *rgi_slave= ctx->get_thd()->rgi_slave; + + if (!rgi_slave->gtid_sub_id) + return 0; + + for (const auto &conflicting_ticket : m_granted) + { + if (conflicting_ticket.get_ctx() != ctx) + { + MDL_context *conflicting_ctx= conflicting_ticket.get_ctx(); + rpl_group_info *conflicting_rgi_slave; + conflicting_rgi_slave= conflicting_ctx->get_thd()->rgi_slave; + + /* + If the conflicting thread is another parallel replication + thread for the same master and it's not in commit or post-commit stages, + then the current transaction has started too early and something is + seriously wrong. + */ + if (conflicting_rgi_slave && + conflicting_rgi_slave->gtid_sub_id && + conflicting_rgi_slave->rli == rgi_slave->rli && + conflicting_rgi_slave->current_gtid.domain_id == + rgi_slave->current_gtid.domain_id && + !((conflicting_rgi_slave->did_mark_start_commit || + conflicting_rgi_slave->worker_error) || + conflicting_rgi_slave->finish_event_group_called)) + return 1; // Fatal error + } + } + return 0; +} +#endif + + +/** + Acquire one lock with waiting for conflicting locks to go away if needed. + + @param mdl_request [in/out] Lock request object for lock to be acquired + + @param lock_wait_timeout [in] Seconds to wait before timeout. + + @retval FALSE Success. MDL_request::ticket points to the ticket + for the lock. + @retval TRUE Failure (Out of resources or waiting is aborted), +*/ + +bool +MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout) +{ + MDL_lock *lock; + MDL_ticket *ticket; + MDL_wait::enum_wait_status wait_status; + DBUG_ENTER("MDL_context::acquire_lock"); +#ifdef DBUG_TRACE + const char *mdl_lock_name= get_mdl_lock_name( + mdl_request->key.mdl_namespace(), mdl_request->type)->str; +#endif + DBUG_PRINT("enter", ("lock_type: %s timeout: %f", + mdl_lock_name, + lock_wait_timeout)); + + if (try_acquire_lock_impl(mdl_request, &ticket)) + { + DBUG_PRINT("mdl", ("OOM: %s", mdl_lock_name)); + DBUG_RETURN(TRUE); + } + + if (mdl_request->ticket) + { + /* + We have managed to acquire lock without waiting. + MDL_lock, MDL_context and MDL_request were updated + accordingly, so we can simply return success. + */ + DBUG_PRINT("info", ("Got lock without waiting")); + DBUG_PRINT("mdl", ("Seized: %s", dbug_print_mdl(mdl_request->ticket))); + DBUG_RETURN(FALSE); + } + +#ifdef DBUG_TRACE + const char *ticket_msg= dbug_print_mdl(ticket); +#endif + + /* + Our attempt to acquire lock without waiting has failed. + As a result of this attempt we got MDL_ticket with m_lock + member pointing to the corresponding MDL_lock object which + has MDL_lock::m_rwlock write-locked. + */ + lock= ticket->m_lock; + + if (lock_wait_timeout == 0) + { + DBUG_PRINT("mdl", ("Nowait: %s", ticket_msg)); + mysql_prlock_unlock(&lock->m_rwlock); + MDL_ticket::destroy(ticket); + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); + DBUG_RETURN(TRUE); + } + +#ifdef WITH_WSREP + if (WSREP(get_thd())) + { + THD* requester= get_thd(); + bool requester_toi= wsrep_thd_is_toi(requester) || wsrep_thd_is_applying(requester); + WSREP_DEBUG("::acquire_lock is TOI %d for %s", requester_toi, + wsrep_thd_query(requester)); + if (requester_toi) + THD_STAGE_INFO(requester, stage_waiting_ddl); + else + THD_STAGE_INFO(requester, stage_waiting_isolation); + } +#endif /* WITH_WSREP */ + + lock->m_waiting.add_ticket(ticket); + + /* + Once we added a pending ticket to the waiting queue, + we must ensure that our wait slot is empty, so + that our lock request can be scheduled. Do that in the + critical section formed by the acquired write lock on MDL_lock. + */ + m_wait.reset_status(); + + /* + Don't break conflicting locks if timeout is 0 as 0 is used + To check if there is any conflicting locks... + */ + if (lock->needs_notification(ticket) && lock_wait_timeout) + lock->notify_conflicting_locks(this); + + /* + Ensure that if we are trying to get an exclusive lock for a slave + running parallel replication, then we are not blocked by another + parallel slave thread that is not committed. This should never happen as + the parallel replication scheduler should never schedule a DDL while + DML's are still running. + */ + DBUG_SLOW_ASSERT((mdl_request->type != MDL_INTENTION_EXCLUSIVE && + mdl_request->type != MDL_EXCLUSIVE) || + !(get_thd()->rgi_slave && + get_thd()->rgi_slave->is_parallel_exec && + lock->check_if_conflicting_replication_locks(this))); + + mysql_prlock_unlock(&lock->m_rwlock); + +#ifdef HAVE_PSI_INTERFACE + PSI_metadata_locker_state state __attribute__((unused)); + PSI_metadata_locker *locker= NULL; + + if (ticket->m_psi != NULL) + locker= PSI_CALL_start_metadata_wait(&state, ticket->m_psi, __FILE__, __LINE__); +#endif + + DBUG_PRINT("mdl", ("Waiting: %s", ticket_msg)); + will_wait_for(ticket); + + /* There is a shared or exclusive lock on the object. */ + DEBUG_SYNC(get_thd(), "mdl_acquire_lock_wait"); + + find_deadlock(); + + struct timespec abs_timeout, abs_shortwait; + set_timespec_nsec(abs_timeout, + (ulonglong)(lock_wait_timeout * 1000000000ULL)); + set_timespec(abs_shortwait, 1); + wait_status= MDL_wait::EMPTY; + + while (cmp_timespec(abs_shortwait, abs_timeout) <= 0) + { + /* abs_timeout is far away. Wait a short while and notify locks. */ + wait_status= m_wait.timed_wait(m_owner, &abs_shortwait, FALSE, + mdl_request->key.get_wait_state_name()); + + if (wait_status != MDL_wait::EMPTY) + break; + /* Check if the client is gone while we were waiting. */ + if (! thd_is_connected(m_owner->get_thd())) + { + /* + * The client is disconnected. Don't wait forever: + * assume it's the same as a wait timeout, this + * ensures all error handling is correct. + */ + wait_status= MDL_wait::TIMEOUT; + break; + } + + mysql_prlock_wrlock(&lock->m_rwlock); + if (lock->needs_notification(ticket)) + lock->notify_conflicting_locks(this); + mysql_prlock_unlock(&lock->m_rwlock); + set_timespec(abs_shortwait, 1); + } + if (wait_status == MDL_wait::EMPTY) + wait_status= m_wait.timed_wait(m_owner, &abs_timeout, TRUE, + mdl_request->key.get_wait_state_name()); + + done_waiting_for(); + +#ifdef HAVE_PSI_INTERFACE + if (locker != NULL) + PSI_CALL_end_metadata_wait(locker, 0); +#endif + + if (wait_status != MDL_wait::GRANTED) + { + lock->remove_ticket(m_pins, &MDL_lock::m_waiting, ticket); + MDL_ticket::destroy(ticket); + switch (wait_status) + { + case MDL_wait::VICTIM: + DBUG_PRINT("mdl", ("Deadlock: %s", ticket_msg)); + DBUG_PRINT("mdl_locks", ("Existing locks:%s", mdl_dbug_print_locks())); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + break; + case MDL_wait::TIMEOUT: + DBUG_PRINT("mdl", ("Timeout: %s", ticket_msg)); + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); + break; + case MDL_wait::KILLED: + DBUG_PRINT("mdl", ("Killed: %s", ticket_msg)); + get_thd()->send_kill_message(); + break; + default: + DBUG_ASSERT(0); + break; + } + DBUG_RETURN(TRUE); + } + + /* + We have been granted our request. + State of MDL_lock object is already being appropriately updated by a + concurrent thread (@sa MDL_lock:reschedule_waiters()). + So all we need to do is to update MDL_context and MDL_request objects. + */ + DBUG_ASSERT(wait_status == MDL_wait::GRANTED); + + m_tickets[mdl_request->duration].push_front(ticket); + + mdl_request->ticket= ticket; + + mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED); + + DBUG_PRINT("mdl", ("Acquired: %s", ticket_msg)); + DBUG_RETURN(FALSE); +} + + +extern "C" int mdl_request_ptr_cmp(const void* ptr1, const void* ptr2) +{ + MDL_request *req1= *(MDL_request**)ptr1; + MDL_request *req2= *(MDL_request**)ptr2; + return req1->key.cmp(&req2->key); +} + + +/** + Acquire exclusive locks. There must be no granted locks in the + context. + + This is a replacement of lock_table_names(). It is used in + RENAME, DROP and other DDL SQL statements. + + @param mdl_requests List of requests for locks to be acquired. + + @param lock_wait_timeout Seconds to wait before timeout. + + @note The list of requests should not contain non-exclusive lock requests. + There should not be any acquired locks in the context. + + @note Assumes that one already owns scoped intention exclusive lock. + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool MDL_context::acquire_locks(MDL_request_list *mdl_requests, + double lock_wait_timeout) +{ + MDL_request_list::Iterator it(*mdl_requests); + MDL_request **sort_buf, **p_req; + MDL_savepoint mdl_svp= mdl_savepoint(); + ssize_t req_count= static_cast(mdl_requests->elements()); + DBUG_ENTER("MDL_context::acquire_locks"); + + if (req_count == 0) + DBUG_RETURN(FALSE); + + /* Sort requests according to MDL_key. */ + if (! (sort_buf= (MDL_request **)my_malloc(key_memory_MDL_context_acquire_locks, + req_count * sizeof(MDL_request*), + MYF(MY_WME)))) + DBUG_RETURN(TRUE); + + for (p_req= sort_buf; p_req < sort_buf + req_count; p_req++) + *p_req= it++; + + my_qsort(sort_buf, req_count, sizeof(MDL_request*), + mdl_request_ptr_cmp); + + for (p_req= sort_buf; p_req < sort_buf + req_count; p_req++) + { + if (acquire_lock(*p_req, lock_wait_timeout)) + goto err; + } + my_free(sort_buf); + DBUG_RETURN(FALSE); + +err: + /* + Release locks we have managed to acquire so far. + Use rollback_to_savepoint() since there may be duplicate + requests that got assigned the same ticket. + */ + rollback_to_savepoint(mdl_svp); + /* Reset lock requests back to its initial state. */ + for (req_count= p_req - sort_buf, p_req= sort_buf; + p_req < sort_buf + req_count; p_req++) + { + (*p_req)->ticket= NULL; + } + my_free(sort_buf); + DBUG_RETURN(TRUE); +} + + +/** + Upgrade a shared metadata lock. + + Used in ALTER TABLE. + + @param mdl_ticket Lock to upgrade. + @param new_type Lock type to upgrade to. + @param lock_wait_timeout Seconds to wait before timeout. + + @note In case of failure to upgrade lock (e.g. because upgrader + was killed) leaves lock in its original state (locked in + shared mode). + + @note There can be only one upgrader for a lock or we will have deadlock. + This invariant is ensured by the fact that upgradeable locks SU, SNW + and SNRW are not compatible with each other and themselves. + + @retval FALSE Success + @retval TRUE Failure (thread was killed) +*/ + +bool +MDL_context::upgrade_shared_lock(MDL_ticket *mdl_ticket, + enum_mdl_type new_type, + double lock_wait_timeout) +{ + MDL_request mdl_xlock_request; + MDL_savepoint mdl_svp= mdl_savepoint(); + bool is_new_ticket; + DBUG_ENTER("MDL_context::upgrade_shared_lock"); + DBUG_PRINT("enter",("old_type: %s new_type: %s lock_wait_timeout: %f", + mdl_ticket->get_type_name()->str, + mdl_ticket->get_type_name(new_type)->str, + lock_wait_timeout)); + DEBUG_SYNC(get_thd(), "mdl_upgrade_lock"); + + /* + Do nothing if already upgraded. Used when we FLUSH TABLE under + LOCK TABLES and a table is listed twice in LOCK TABLES list. + + In BACKUP namespace upgrade must always happen. Even though + MDL_BACKUP_START is not stronger than MDL_BACKUP_FLUSH from + has_stronger_or_equal_type(), the latter effectively blocks + new MDL_BACKUP_DML while the former doesn't. + */ + if (mdl_ticket->has_stronger_or_equal_type(new_type) && + mdl_ticket->get_key()->mdl_namespace() != MDL_key::BACKUP) + DBUG_RETURN(FALSE); + + MDL_REQUEST_INIT_BY_KEY(&mdl_xlock_request, &mdl_ticket->m_lock->key, + new_type, MDL_TRANSACTION); + + if (acquire_lock(&mdl_xlock_request, lock_wait_timeout)) + DBUG_RETURN(TRUE); + + is_new_ticket= ! has_lock(mdl_svp, mdl_xlock_request.ticket); + + /* Merge the acquired and the original lock. @todo: move to a method. */ + mysql_prlock_wrlock(&mdl_ticket->m_lock->m_rwlock); + if (is_new_ticket) + mdl_ticket->m_lock->m_granted.remove_ticket(mdl_xlock_request.ticket); + /* + Set the new type of lock in the ticket. To update state of + MDL_lock object correctly we need to temporarily exclude + ticket from the granted queue and then include it back. + */ + mdl_ticket->m_lock->m_granted.remove_ticket(mdl_ticket); + mdl_ticket->m_type= new_type; + mdl_ticket->m_lock->m_granted.add_ticket(mdl_ticket); + + mysql_prlock_unlock(&mdl_ticket->m_lock->m_rwlock); + + if (is_new_ticket) + { + m_tickets[MDL_TRANSACTION].remove(mdl_xlock_request.ticket); + MDL_ticket::destroy(mdl_xlock_request.ticket); + } + + DBUG_RETURN(FALSE); +} + + +/** + A fragment of recursive traversal of the wait-for graph + in search for deadlocks. Direct the deadlock visitor to all + contexts that own the lock the current node in the wait-for + graph is waiting for. + As long as the initial node is remembered in the visitor, + a deadlock is found when the same node is seen twice. +*/ + +bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket, + MDL_wait_for_graph_visitor *gvisitor) +{ + MDL_context *src_ctx= waiting_ticket->get_ctx(); + bool result= TRUE; + + mysql_prlock_rdlock(&m_rwlock); + + /* + MDL_lock's waiting and granted queues and MDL_context::m_waiting_for + member are updated by different threads when the lock is granted + (see MDL_context::acquire_lock() and MDL_lock::reschedule_waiters()). + As a result, here we may encounter a situation when MDL_lock data + already reflects the fact that the lock was granted but + m_waiting_for member has not been updated yet. + + For example, imagine that: + + thread1: Owns SNW lock on table t1. + thread2: Attempts to acquire SW lock on t1, + but sees an active SNW lock. + Thus adds the ticket to the waiting queue and + sets m_waiting_for to point to the ticket. + thread1: Releases SNW lock, updates MDL_lock object to + grant SW lock to thread2 (moves the ticket for + SW from waiting to the active queue). + Attempts to acquire a new SNW lock on t1, + sees an active SW lock (since it is present in the + active queue), adds ticket for SNW lock to the waiting + queue, sets m_waiting_for to point to this ticket. + + At this point deadlock detection algorithm run by thread1 will see that: + - Thread1 waits for SNW lock on t1 (since m_waiting_for is set). + - SNW lock is not granted, because it conflicts with active SW lock + owned by thread 2 (since ticket for SW is present in granted queue). + - Thread2 waits for SW lock (since its m_waiting_for has not been + updated yet!). + - SW lock is not granted because there is pending SNW lock from thread1. + Therefore deadlock should exist [sic!]. + + To avoid detection of such false deadlocks we need to check the "actual" + status of the ticket being waited for, before analyzing its blockers. + We do this by checking the wait status of the context which is waiting + for it. To avoid races this has to be done under protection of + MDL_lock::m_rwlock lock. + */ + if (src_ctx->m_wait.get_status() != MDL_wait::EMPTY) + { + result= FALSE; + goto end; + } + + /* + To avoid visiting nodes which were already marked as victims of + deadlock detection (or whose requests were already satisfied) we + enter the node only after peeking at its wait status. + This is necessary to avoid active waiting in a situation + when previous searches for a deadlock already selected the + node we're about to enter as a victim (see the comment + in MDL_context::find_deadlock() for explanation why several searches + can be performed for the same wait). + There is no guarantee that the node isn't chosen a victim while we + are visiting it but this is OK: in the worst case we might do some + extra work and one more context might be chosen as a victim. + */ + if (gvisitor->enter_node(src_ctx)) + goto end; + + /* + We do a breadth-first search first -- that is, inspect all + edges of the current node, and only then follow up to the next + node. In workloads that involve wait-for graph loops this + has proven to be a more efficient strategy [citation missing]. + */ + for (const auto& ticket : m_granted) + { + /* Filter out edges that point to the same node. */ + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_granted(waiting_ticket->get_type()) && + gvisitor->inspect_edge(ticket.get_ctx())) + { + goto end_leave_node; + } + } + + for (const auto &ticket : m_waiting) + { + /* Filter out edges that point to the same node. */ + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_waiting(waiting_ticket->get_type()) && + gvisitor->inspect_edge(ticket.get_ctx())) + { + goto end_leave_node; + } + } + + /* Recurse and inspect all adjacent nodes. */ + for (const auto &ticket : m_granted) + { + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_granted(waiting_ticket->get_type()) && + ticket.get_ctx()->visit_subgraph(gvisitor)) + { + goto end_leave_node; + } + } + + for (const auto &ticket : m_waiting) + { + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_waiting(waiting_ticket->get_type()) && + ticket.get_ctx()->visit_subgraph(gvisitor)) + { + goto end_leave_node; + } + } + + result= FALSE; + +end_leave_node: + gvisitor->leave_node(src_ctx); + +end: + mysql_prlock_unlock(&m_rwlock); + return result; +} + + +/** + Traverse a portion of wait-for graph which is reachable + through the edge represented by this ticket and search + for deadlocks. + + @retval TRUE A deadlock is found. A pointer to deadlock + victim is saved in the visitor. + @retval FALSE +*/ + +bool MDL_ticket::accept_visitor(MDL_wait_for_graph_visitor *gvisitor) +{ + return m_lock->visit_subgraph(this, gvisitor); +} + + +/** + A fragment of recursive traversal of the wait-for graph of + MDL contexts in the server in search for deadlocks. + Assume this MDL context is a node in the wait-for graph, + and direct the visitor to all adjacent nodes. As long + as the starting node is remembered in the visitor, a + deadlock is found when the same node is visited twice. + One MDL context is connected to another in the wait-for + graph if it waits on a resource that is held by the other + context. + + @retval TRUE A deadlock is found. A pointer to deadlock + victim is saved in the visitor. + @retval FALSE +*/ + +bool MDL_context::visit_subgraph(MDL_wait_for_graph_visitor *gvisitor) +{ + bool result= FALSE; + + mysql_prlock_rdlock(&m_LOCK_waiting_for); + + if (m_waiting_for) + result= m_waiting_for->accept_visitor(gvisitor); + + mysql_prlock_unlock(&m_LOCK_waiting_for); + + return result; +} + + +/** + Try to find a deadlock. This function produces no errors. + + @note If during deadlock resolution context which performs deadlock + detection is chosen as a victim it will be informed about the + fact by setting VICTIM status to its wait slot. +*/ + +void MDL_context::find_deadlock() +{ + while (1) + { + /* + The fact that we use fresh instance of gvisitor for each + search performed by find_deadlock() below is important, + the code responsible for victim selection relies on this. + */ + Deadlock_detection_visitor dvisitor(this); + MDL_context *victim; + + if (! visit_subgraph(&dvisitor)) + { + /* No deadlocks are found! */ + break; + } + + victim= dvisitor.get_victim(); + + /* + Failure to change status of the victim is OK as it means + that the victim has received some other message and is + about to stop its waiting/to break deadlock loop. + Even when the initiator of the deadlock search is + chosen the victim, we need to set the respective wait + result in order to "close" it for any attempt to + schedule the request. + This is needed to avoid a possible race during + cleanup in case when the lock request on which the + context was waiting is concurrently satisfied. + */ + (void) victim->m_wait.set_status(MDL_wait::VICTIM); + victim->inc_deadlock_overweight(); + victim->unlock_deadlock_victim(); + + if (victim == this) + break; + /* + After adding a new edge to the waiting graph we found that it + creates a loop (i.e. there is a deadlock). We decided to destroy + this loop by removing an edge, but not the one that we added. + Since this doesn't guarantee that all loops created by addition + of the new edge are destroyed, we have to repeat the search. + */ + } +} + + +/** + Release lock. + + @param duration Lock duration. + @param ticket Ticket for lock to be released. + +*/ + +void MDL_context::release_lock(enum_mdl_duration duration, MDL_ticket *ticket) +{ + MDL_lock *lock= ticket->m_lock; + DBUG_ENTER("MDL_context::release_lock"); + DBUG_PRINT("enter", ("db: '%s' name: '%s'", + lock->key.db_name(), lock->key.name())); + + DBUG_ASSERT(this == ticket->get_ctx()); + DBUG_PRINT("mdl", ("Released: %s", dbug_print_mdl(ticket))); + + lock->remove_ticket(m_pins, &MDL_lock::m_granted, ticket); + + m_tickets[duration].remove(ticket); + MDL_ticket::destroy(ticket); + + DBUG_VOID_RETURN; +} + + +/** + Release lock with explicit duration. + + @param ticket Ticket for lock to be released. + +*/ + +void MDL_context::release_lock(MDL_ticket *ticket) +{ + DBUG_SLOW_ASSERT(ticket->m_duration == MDL_EXPLICIT); + + release_lock(MDL_EXPLICIT, ticket); +} + + +/** + Release all locks associated with the context. If the sentinel + is not NULL, do not release locks stored in the list after and + including the sentinel. + + Statement and transactional locks are added to the beginning of + the corresponding lists, i.e. stored in reverse temporal order. + This allows to employ this function to: + - back off in case of a lock conflict. + - release all locks in the end of a statement or transaction + - rollback to a savepoint. +*/ + +void MDL_context::release_locks_stored_before(enum_mdl_duration duration, + MDL_ticket *sentinel) +{ + MDL_ticket *ticket; + Ticket_iterator it(m_tickets[duration]); + DBUG_ENTER("MDL_context::release_locks_stored_before"); + + if (m_tickets[duration].is_empty()) + DBUG_VOID_RETURN; + + while ((ticket= it++) && ticket != sentinel) + { + DBUG_PRINT("info", ("found lock to release ticket=%p", ticket)); + release_lock(duration, ticket); + } + + DBUG_VOID_RETURN; +} + + +/** + Release all explicit locks in the context which correspond to the + same name/object as this lock request. + + @param ticket One of the locks for the name/object for which all + locks should be released. +*/ + +void MDL_context::release_all_locks_for_name(MDL_ticket *name) +{ + /* Use MDL_ticket::m_lock to identify other locks for the same object. */ + MDL_lock *lock= name->m_lock; + + /* Remove matching lock tickets from the context. */ + MDL_ticket *ticket; + Ticket_iterator it_ticket(m_tickets[MDL_EXPLICIT]); + + while ((ticket= it_ticket++)) + { + DBUG_ASSERT(ticket->m_lock); + if (ticket->m_lock == lock) + release_lock(MDL_EXPLICIT, ticket); + } +} + + +/** + Downgrade an EXCLUSIVE or SHARED_NO_WRITE lock to shared metadata lock. + + @param type Type of lock to which exclusive lock should be downgraded. +*/ + +void MDL_ticket::downgrade_lock(enum_mdl_type type) +{ + DBUG_ENTER("MDL_ticket::downgrade_lock"); + DBUG_PRINT("enter",("old_type: %s new_type: %s", + get_type_name()->str, + get_type_name(type)->str)); + /* + Do nothing if already downgraded. Used when we FLUSH TABLE under + LOCK TABLES and a table is listed twice in LOCK TABLES list. + Note that this code might even try to "downgrade" a weak lock + (e.g. SW) to a stronger one (e.g SNRW). So we can't even assert + here that target lock is weaker than existing lock. + */ + if (m_type == type || !has_stronger_or_equal_type(type)) + { + DBUG_PRINT("info", ("Nothing to downgrade")); + DBUG_VOID_RETURN; + } + + /* Only allow downgrade in some specific known cases */ + DBUG_ASSERT((get_key()->mdl_namespace() != MDL_key::BACKUP && + (m_type == MDL_EXCLUSIVE || + m_type == MDL_SHARED_NO_WRITE)) || + (get_key()->mdl_namespace() == MDL_key::BACKUP && + (m_type == MDL_BACKUP_DDL || + m_type == MDL_BACKUP_BLOCK_DDL || + m_type == MDL_BACKUP_WAIT_FLUSH))); + + mysql_prlock_wrlock(&m_lock->m_rwlock); + /* + To update state of MDL_lock object correctly we need to temporarily + exclude ticket from the granted queue and then include it back. + */ + m_lock->m_granted.remove_ticket(this); + m_type= type; + m_lock->m_granted.add_ticket(this); + m_lock->reschedule_waiters(); + mysql_prlock_unlock(&m_lock->m_rwlock); + DBUG_VOID_RETURN; +} + + +/** + Auxiliary function which allows to check if we have some kind of lock on + a object. Returns TRUE if we have a lock of a given or stronger type. + + @param mdl_namespace Id of object namespace + @param db Name of the database + @param name Name of the object + @param mdl_type Lock type. Pass in the weakest type to find + out if there is at least some lock. + + @return TRUE if current context contains satisfied lock for the object, + FALSE otherwise. +*/ + +bool +MDL_context::is_lock_owner(MDL_key::enum_mdl_namespace mdl_namespace, + const char *db, const char *name, + enum_mdl_type mdl_type) +{ + MDL_request mdl_request; + enum_mdl_duration not_unused; + /* We don't care about exact duration of lock here. */ + MDL_REQUEST_INIT(&mdl_request, mdl_namespace, db, name, mdl_type, + MDL_TRANSACTION); + MDL_ticket *ticket= find_ticket(&mdl_request, ¬_unused); + + DBUG_ASSERT(ticket == NULL || ticket->m_lock); + + return ticket; +} + + +/** + Return thread id of the owner of the lock or 0 if + there is no owner. + @note: Lock type is not considered at all, the function + simply checks that there is some lock for the given key. + + @return thread id of the owner of the lock or 0 +*/ + +unsigned long +MDL_context::get_lock_owner(MDL_key *key) +{ + fix_pins(); + return mdl_locks.get_lock_owner(m_pins, key); +} + + +/** + Check if we have any pending locks which conflict with existing shared lock. + + @pre The ticket must match an acquired lock. + + @return TRUE if there is a conflicting lock request, FALSE otherwise. +*/ + +bool MDL_ticket::has_pending_conflicting_lock() const +{ + return m_lock->has_pending_conflicting_lock(m_type); +} + +/** Return a key identifying this lock. */ +MDL_key *MDL_ticket::get_key() const +{ + return &m_lock->key; +} + +/** + Releases metadata locks that were acquired after a specific savepoint. + + @note Used to release tickets acquired during a savepoint unit. + @note It's safe to iterate and unlock any locks after taken after this + savepoint because other statements that take other special locks + cause a implicit commit (ie LOCK TABLES). +*/ + +void MDL_context::rollback_to_savepoint(const MDL_savepoint &mdl_savepoint) +{ + DBUG_ENTER("MDL_context::rollback_to_savepoint"); + + /* If savepoint is NULL, it is from the start of the transaction. */ + release_locks_stored_before(MDL_STATEMENT, mdl_savepoint.m_stmt_ticket); + release_locks_stored_before(MDL_TRANSACTION, mdl_savepoint.m_trans_ticket); + + DBUG_VOID_RETURN; +} + + +/** + Release locks acquired by normal statements (SELECT, UPDATE, + DELETE, etc) in the course of a transaction. Do not release + HANDLER locks, if there are any. + + This method is used at the end of a transaction, in + implementation of COMMIT (implicit or explicit) and ROLLBACK. +*/ + +void MDL_context::release_transactional_locks(THD *thd) +{ + DBUG_ENTER("MDL_context::release_transactional_locks"); + /* Fail if there are active transactions */ + DBUG_ASSERT(!(thd->server_status & + (SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY))); + release_locks_stored_before(MDL_STATEMENT, NULL); + release_locks_stored_before(MDL_TRANSACTION, NULL); + DBUG_VOID_RETURN; +} + +void MDL_context::release_statement_locks() +{ + DBUG_ENTER("MDL_context::release_transactional_locks"); + release_locks_stored_before(MDL_STATEMENT, NULL); + DBUG_VOID_RETURN; +} + + +/** + Does this savepoint have this lock? + + @retval TRUE The ticket is older than the savepoint or + is an LT, HA or GLR ticket. Thus it belongs + to the savepoint or has explicit duration. + @retval FALSE The ticket is newer than the savepoint. + and is not an LT, HA or GLR ticket. +*/ + +bool MDL_context::has_lock(const MDL_savepoint &mdl_savepoint, + MDL_ticket *mdl_ticket) +{ + MDL_ticket *ticket; + /* Start from the beginning, most likely mdl_ticket's been just acquired. */ + MDL_context::Ticket_iterator s_it(m_tickets[MDL_STATEMENT]); + MDL_context::Ticket_iterator t_it(m_tickets[MDL_TRANSACTION]); + + while ((ticket= s_it++) && ticket != mdl_savepoint.m_stmt_ticket) + { + if (ticket == mdl_ticket) + return FALSE; + } + + while ((ticket= t_it++) && ticket != mdl_savepoint.m_trans_ticket) + { + if (ticket == mdl_ticket) + return FALSE; + } + return TRUE; +} + + +/** + Change lock duration for transactional lock. + + @param ticket Ticket representing lock. + @param duration Lock duration to be set. + + @note This method only supports changing duration of + transactional lock to some other duration. +*/ + +void MDL_context::set_lock_duration(MDL_ticket *mdl_ticket, + enum_mdl_duration duration) +{ + DBUG_SLOW_ASSERT(mdl_ticket->m_duration == MDL_TRANSACTION && + duration != MDL_TRANSACTION); + + m_tickets[MDL_TRANSACTION].remove(mdl_ticket); + m_tickets[duration].push_front(mdl_ticket); +#ifndef DBUG_OFF + mdl_ticket->m_duration= duration; +#endif +} + + +/** + Set explicit duration for all locks in the context. +*/ + +void MDL_context::set_explicit_duration_for_all_locks() +{ + int i; + MDL_ticket *ticket; + + /* + In the most common case when this function is called list + of transactional locks is bigger than list of locks with + explicit duration. So we start by swapping these two lists + and then move elements from new list of transactional + locks and list of statement locks to list of locks with + explicit duration. + */ + + m_tickets[MDL_EXPLICIT].swap(m_tickets[MDL_TRANSACTION]); + + for (i= 0; i < MDL_EXPLICIT; i++) + { + Ticket_iterator it_ticket(m_tickets[i]); + + while ((ticket= it_ticket++)) + { + m_tickets[i].remove(ticket); + m_tickets[MDL_EXPLICIT].push_front(ticket); + } + } + +#ifndef DBUG_OFF + Ticket_iterator exp_it(m_tickets[MDL_EXPLICIT]); + + while ((ticket= exp_it++)) + ticket->m_duration= MDL_EXPLICIT; +#endif +} + + +/** + Set transactional duration for all locks in the context. +*/ + +void MDL_context::set_transaction_duration_for_all_locks() +{ + MDL_ticket *ticket; + + /* + In the most common case when this function is called list + of explicit locks is bigger than two other lists (in fact, + list of statement locks is always empty). So we start by + swapping list of explicit and transactional locks and then + move contents of new list of explicit locks to list of + locks with transactional duration. + */ + + DBUG_ASSERT(m_tickets[MDL_STATEMENT].is_empty()); + + m_tickets[MDL_TRANSACTION].swap(m_tickets[MDL_EXPLICIT]); + + Ticket_iterator it_ticket(m_tickets[MDL_EXPLICIT]); + + while ((ticket= it_ticket++)) + { + m_tickets[MDL_EXPLICIT].remove(ticket); + m_tickets[MDL_TRANSACTION].push_front(ticket); + } + +#ifndef DBUG_OFF + Ticket_iterator trans_it(m_tickets[MDL_TRANSACTION]); + + while ((ticket= trans_it++)) + ticket->m_duration= MDL_TRANSACTION; +#endif +} + + + +void MDL_context::release_explicit_locks() +{ + release_locks_stored_before(MDL_EXPLICIT, NULL); +} + +bool MDL_context::has_explicit_locks() +{ + MDL_ticket *ticket = NULL; + + Ticket_iterator it(m_tickets[MDL_EXPLICIT]); + + while ((ticket = it++)) + { + return true; + } + + return false; +} + +#ifdef WITH_WSREP +static +const char *wsrep_get_mdl_namespace_name(MDL_key::enum_mdl_namespace ns) +{ + switch (ns) + { + case MDL_key::BACKUP : return "BACKUP"; + case MDL_key::SCHEMA : return "SCHEMA"; + case MDL_key::TABLE : return "TABLE"; + case MDL_key::FUNCTION : return "FUNCTION"; + case MDL_key::PROCEDURE : return "PROCEDURE"; + case MDL_key::PACKAGE_BODY: return "PACKAGE BODY"; + case MDL_key::TRIGGER : return "TRIGGER"; + case MDL_key::EVENT : return "EVENT"; + case MDL_key::USER_LOCK : return "USER_LOCK"; + default: break; + } + return "UNKNOWN"; +} + +void MDL_ticket::wsrep_report(bool debug) const +{ + if (!debug) return; + + const PSI_stage_info *psi_stage= m_lock->key.get_wait_state_name(); + WSREP_DEBUG("MDL ticket: type: %s space: %s db: %s name: %s (%s)", + get_type_name()->str, + wsrep_get_mdl_namespace_name(m_lock->key.mdl_namespace()), + m_lock->key.db_name(), + m_lock->key.name(), + psi_stage->m_name); +} +#endif /* WITH_WSREP */ diff --git a/sql/mdl.h b/sql/mdl.h new file mode 100644 index 00000000..d1ece797 --- /dev/null +++ b/sql/mdl.h @@ -0,0 +1,1154 @@ +#ifndef MDL_H +#define MDL_H +/* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2020, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_plist.h" +#include "ilist.h" +#include +#include +#include +#include + +class THD; + +class MDL_context; +class MDL_lock; +class MDL_ticket; +bool ok_for_lower_case_names(const char *name); + +typedef unsigned short mdl_bitmap_t; +#define MDL_BIT(A) static_cast(1U << A) + + +/** + @def ENTER_COND(C, M, S, O) + Start a wait on a condition. + @param C the condition to wait on + @param M the associated mutex + @param S the new stage to enter + @param O the previous stage + @sa EXIT_COND(). +*/ +#define ENTER_COND(C, M, S, O) enter_cond(C, M, S, O, __func__, __FILE__, __LINE__) + +/** + @def EXIT_COND(S) + End a wait on a condition + @param S the new stage to enter +*/ +#define EXIT_COND(S) exit_cond(S, __func__, __FILE__, __LINE__) + +/** + An interface to separate the MDL module from the THD, and the rest of the + server code. + */ + +class MDL_context_owner +{ +public: + virtual ~MDL_context_owner() = default; + + /** + Enter a condition wait. + For @c enter_cond() / @c exit_cond() to work the mutex must be held before + @c enter_cond(); this mutex is then released by @c exit_cond(). + Usage must be: lock mutex; enter_cond(); your code; exit_cond(). + @param cond the condition to wait on + @param mutex the associated mutex + @param [in] stage the stage to enter, or NULL + @param [out] old_stage the previous stage, or NULL + @param src_function function name of the caller + @param src_file file name of the caller + @param src_line line number of the caller + @sa ENTER_COND(), THD::enter_cond() + @sa EXIT_COND(), THD::exit_cond() + */ + virtual void enter_cond(mysql_cond_t *cond, mysql_mutex_t *mutex, + const PSI_stage_info *stage, PSI_stage_info *old_stage, + const char *src_function, const char *src_file, + int src_line) = 0; + + /** + @def EXIT_COND(S) + End a wait on a condition + @param [in] stage the new stage to enter + @param src_function function name of the caller + @param src_file file name of the caller + @param src_line line number of the caller + @sa ENTER_COND(), THD::enter_cond() + @sa EXIT_COND(), THD::exit_cond() + */ + virtual void exit_cond(const PSI_stage_info *stage, + const char *src_function, const char *src_file, + int src_line) = 0; + /** + Has the owner thread been killed? + */ + virtual int is_killed() = 0; + + /** + This one is only used for DEBUG_SYNC. + (Do not use it to peek/poke into other parts of THD.) + */ + virtual THD* get_thd() = 0; + + /** + @see THD::notify_shared_lock() + */ + virtual bool notify_shared_lock(MDL_context_owner *in_use, + bool needs_thr_lock_abort) = 0; +}; + +/** + Type of metadata lock request. + + @sa Comments for MDL_object_lock::can_grant_lock() and + MDL_scoped_lock::can_grant_lock() for details. + + Scoped locks are database (or schema) locks. + The object locks are for tables, triggers etc. +*/ + +enum enum_mdl_type { + /* This means that the MDL_request is not initialized */ + MDL_NOT_INITIALIZED= -1, + /* + An intention exclusive metadata lock (IX). Used only for scoped locks. + Owner of this type of lock can acquire upgradable exclusive locks on + individual objects. + Compatible with other IX locks, but is incompatible with scoped S and + X locks. + IX lock is taken in SCHEMA namespace when we intend to modify + object metadata. Object may refer table, stored procedure, trigger, + view/etc. + */ + MDL_INTENTION_EXCLUSIVE= 0, + /* + A shared metadata lock (S). + To be used in cases when we are interested in object metadata only + and there is no intention to access object data (e.g. for stored + routines or during preparing prepared statements). + We also mis-use this type of lock for open HANDLERs, since lock + acquired by this statement has to be compatible with lock acquired + by LOCK TABLES ... WRITE statement, i.e. SNRW (We can't get by by + acquiring S lock at HANDLER ... OPEN time and upgrading it to SR + lock for HANDLER ... READ as it doesn't solve problem with need + to abort DML statements which wait on table level lock while having + open HANDLER in the same connection). + To avoid deadlock which may occur when SNRW lock is being upgraded to + X lock for table on which there is an active S lock which is owned by + thread which waits in its turn for table-level lock owned by thread + performing upgrade we have to use thr_abort_locks_for_thread() + facility in such situation. + This problem does not arise for locks on stored routines as we don't + use SNRW locks for them. It also does not arise when S locks are used + during PREPARE calls as table-level locks are not acquired in this + case. + This lock is taken for global read lock, when caching a stored + procedure in memory for the duration of the transaction and for + tables used by prepared statements. + */ + MDL_SHARED, + /* + A high priority shared metadata lock. + Used for cases when there is no intention to access object data (i.e. + data in the table). + "High priority" means that, unlike other shared locks, it is granted + ignoring pending requests for exclusive locks. Intended for use in + cases when we only need to access metadata and not data, e.g. when + filling an INFORMATION_SCHEMA table. + Since SH lock is compatible with SNRW lock, the connection that + holds SH lock lock should not try to acquire any kind of table-level + or row-level lock, as this can lead to a deadlock. Moreover, after + acquiring SH lock, the connection should not wait for any other + resource, as it might cause starvation for X locks and a potential + deadlock during upgrade of SNW or SNRW to X lock (e.g. if the + upgrading connection holds the resource that is being waited for). + */ + MDL_SHARED_HIGH_PRIO, + /* + A shared metadata lock (SR) for cases when there is an intention to read + data from table. + A connection holding this kind of lock can read table metadata and read + table data (after acquiring appropriate table and row-level locks). + This means that one can only acquire TL_READ, TL_READ_NO_INSERT, and + similar table-level locks on table if one holds SR MDL lock on it. + To be used for tables in SELECTs, subqueries, and LOCK TABLE ... READ + statements. + */ + MDL_SHARED_READ, + /* + A shared metadata lock (SW) for cases when there is an intention to modify + (and not just read) data in the table. + A connection holding SW lock can read table metadata and modify or read + table data (after acquiring appropriate table and row-level locks). + To be used for tables to be modified by INSERT, UPDATE, DELETE + statements, but not LOCK TABLE ... WRITE or DDL). Also taken by + SELECT ... FOR UPDATE. + */ + MDL_SHARED_WRITE, + /* + An upgradable shared metadata lock for cases when there is an + intention to modify (and not just read) data in the table. + Can be upgraded to MDL_SHARED_NO_WRITE and MDL_EXCLUSIVE. + A connection holding SU lock can read table metadata and modify or read + table data (after acquiring appropriate table and row-level locks). + To be used for the first phase of ALTER TABLE. + */ + MDL_SHARED_UPGRADABLE, + /* + A shared metadata lock for cases when we need to read data from table + and block all concurrent modifications to it (for both data and metadata). + Used by LOCK TABLES READ statement. + */ + MDL_SHARED_READ_ONLY, + /* + An upgradable shared metadata lock which blocks all attempts to update + table data, allowing reads. + A connection holding this kind of lock can read table metadata and read + table data. + Can be upgraded to X metadata lock. + Note, that since this type of lock is not compatible with SNRW or SW + lock types, acquiring appropriate engine-level locks for reading + (TL_READ* for MyISAM, shared row locks in InnoDB) should be + contention-free. + To be used for the first phase of ALTER TABLE, when copying data between + tables, to allow concurrent SELECTs from the table, but not UPDATEs. + */ + MDL_SHARED_NO_WRITE, + /* + An upgradable shared metadata lock which allows other connections + to access table metadata, but not data. + It blocks all attempts to read or update table data, while allowing + INFORMATION_SCHEMA and SHOW queries. + A connection holding this kind of lock can read table metadata modify and + read table data. + Can be upgraded to X metadata lock. + To be used for LOCK TABLES WRITE statement. + Not compatible with any other lock type except S and SH. + */ + MDL_SHARED_NO_READ_WRITE, + /* + An exclusive metadata lock (X). + A connection holding this lock can modify both table's metadata and data. + No other type of metadata lock can be granted while this lock is held. + To be used for CREATE/DROP/RENAME TABLE statements and for execution of + certain phases of other DDL statements. + */ + MDL_EXCLUSIVE, + /* This should be the last !!! */ + MDL_TYPE_END +}; + + +/** Backup locks */ + +/** + Block concurrent backup +*/ +#define MDL_BACKUP_START enum_mdl_type(0) +/** + Block new write requests to non transactional tables +*/ +#define MDL_BACKUP_FLUSH enum_mdl_type(1) +/** + In addition to previous locks, blocks running requests to non trans tables + Used to wait until all DML usage of on trans tables are finished +*/ +#define MDL_BACKUP_WAIT_FLUSH enum_mdl_type(2) +/** + In addition to previous locks, blocks new DDL's from starting +*/ +#define MDL_BACKUP_WAIT_DDL enum_mdl_type(3) +/** + In addition to previous locks, blocks commits +*/ +#define MDL_BACKUP_WAIT_COMMIT enum_mdl_type(4) + +/** + Blocks (or is blocked by) statements that intend to modify data. Acquired + before commit lock by FLUSH TABLES WITH READ LOCK. +*/ +#define MDL_BACKUP_FTWRL1 enum_mdl_type(5) + +/** + Blocks (or is blocked by) commits. Acquired after global read lock by + FLUSH TABLES WITH READ LOCK. +*/ +#define MDL_BACKUP_FTWRL2 enum_mdl_type(6) + +#define MDL_BACKUP_DML enum_mdl_type(7) +#define MDL_BACKUP_TRANS_DML enum_mdl_type(8) +#define MDL_BACKUP_SYS_DML enum_mdl_type(9) + +/** + Must be acquired by DDL statements that intend to modify data. + Currently it's also used for LOCK TABLES. +*/ +#define MDL_BACKUP_DDL enum_mdl_type(10) + +/** + Blocks new DDL's. Used by backup code to enable DDL logging +*/ +#define MDL_BACKUP_BLOCK_DDL enum_mdl_type(11) + +/* + Statement is modifying data, but will not block MDL_BACKUP_DDL or earlier + BACKUP stages. + ALTER TABLE is started with MDL_BACKUP_DDL, but changed to + MDL_BACKUP_ALTER_COPY while alter table is copying or modifing data. +*/ + +#define MDL_BACKUP_ALTER_COPY enum_mdl_type(12) + +/** + Must be acquired during commit. +*/ +#define MDL_BACKUP_COMMIT enum_mdl_type(13) +#define MDL_BACKUP_END enum_mdl_type(14) + + +/** Duration of metadata lock. */ + +enum enum_mdl_duration { + /** + Locks with statement duration are automatically released at the end + of statement or transaction. + */ + MDL_STATEMENT= 0, + /** + Locks with transaction duration are automatically released at the end + of transaction. + */ + MDL_TRANSACTION, + /** + Locks with explicit duration survive the end of statement and transaction. + They have to be released explicitly by calling MDL_context::release_lock(). + */ + MDL_EXPLICIT, + /* This should be the last ! */ + MDL_DURATION_END }; + + +/** Maximal length of key for metadata locking subsystem. */ +#define MAX_MDLKEY_LENGTH (1 + NAME_LEN + 1 + NAME_LEN + 1) + + +/** + Metadata lock object key. + + A lock is requested or granted based on a fully qualified name and type. + E.g. They key for a table consists of <0 (=table)>++. + Elsewhere in the comments this triple will be referred to simply as "key" + or "name". +*/ + +struct MDL_key +{ +public: +#ifdef HAVE_PSI_INTERFACE + static void init_psi_keys(); +#endif + + /** + Object namespaces. + Sic: when adding a new member to this enum make sure to + update m_namespace_to_wait_state_name array in mdl.cc and + metadata_lock_info_lock_name in metadata_lock_info.cc! + + Different types of objects exist in different namespaces + - SCHEMA is for databases (to protect against DROP DATABASE) + - TABLE is for tables and views. + - BACKUP is for locking DML, DDL and COMMIT's during BACKUP STAGES + - FUNCTION is for stored functions. + - PROCEDURE is for stored procedures. + - TRIGGER is for triggers. + - EVENT is for event scheduler events + Note that although there isn't metadata locking on triggers, + it's necessary to have a separate namespace for them since + MDL_key is also used outside of the MDL subsystem. + */ + enum enum_mdl_namespace { BACKUP=0, + SCHEMA, + TABLE, + FUNCTION, + PROCEDURE, + PACKAGE_BODY, + TRIGGER, + EVENT, + USER_LOCK, /* user level locks. */ + /* This should be the last ! */ + NAMESPACE_END }; + + const uchar *ptr() const { return (uchar*) m_ptr; } + uint length() const { return m_length; } + + const char *db_name() const { return m_ptr + 1; } + uint db_name_length() const { return m_db_name_length; } + + const char *name() const { return m_ptr + m_db_name_length + 2; } + uint name_length() const { return m_length - m_db_name_length - 3; } + + enum_mdl_namespace mdl_namespace() const + { return (enum_mdl_namespace)(m_ptr[0]); } + + /** + Construct a metadata lock key from a triplet (mdl_namespace, + database and name). + + @remark The key for a table is ++
+ + @param mdl_namespace Id of namespace of object to be locked + @param db Name of database to which the object belongs + @param name Name of of the object + @param key Where to store the the MDL key. + */ + void mdl_key_init(enum_mdl_namespace mdl_namespace_arg, + const char *db, const char *name_arg) + { + m_ptr[0]= (char) mdl_namespace_arg; + /* + It is responsibility of caller to ensure that db and object names + are not longer than NAME_LEN. Still we play safe and try to avoid + buffer overruns. + */ + DBUG_ASSERT(strlen(db) <= NAME_LEN); + DBUG_ASSERT(strlen(name_arg) <= NAME_LEN); + m_db_name_length= static_cast(strmake(m_ptr + 1, db, NAME_LEN) - + m_ptr - 1); + m_length= static_cast(strmake(m_ptr + m_db_name_length + 2, + name_arg, + NAME_LEN) - m_ptr + 1); + m_hash_value= my_hash_sort(&my_charset_bin, (uchar*) m_ptr + 1, + m_length - 1); + DBUG_SLOW_ASSERT(mdl_namespace_arg == USER_LOCK || ok_for_lower_case_names(db)); + } + void mdl_key_init(const MDL_key *rhs) + { + memcpy(m_ptr, rhs->m_ptr, rhs->m_length); + m_length= rhs->m_length; + m_db_name_length= rhs->m_db_name_length; + m_hash_value= rhs->m_hash_value; + } + bool is_equal(const MDL_key *rhs) const + { + return (m_length == rhs->m_length && + memcmp(m_ptr, rhs->m_ptr, m_length) == 0); + } + /** + Compare two MDL keys lexicographically. + */ + int cmp(const MDL_key *rhs) const + { + /* + The key buffer is always '\0'-terminated. Since key + character set is utf-8, we can safely assume that no + character starts with a zero byte. + */ + return memcmp(m_ptr, rhs->m_ptr, MY_MIN(m_length, rhs->m_length)); + } + + MDL_key(const MDL_key *rhs) + { + mdl_key_init(rhs); + } + MDL_key(enum_mdl_namespace namespace_arg, + const char *db_arg, const char *name_arg) + { + mdl_key_init(namespace_arg, db_arg, name_arg); + } + MDL_key() = default; /* To use when part of MDL_request. */ + + /** + Get thread state name to be used in case when we have to + wait on resource identified by key. + */ + const PSI_stage_info * get_wait_state_name() const + { + return & m_namespace_to_wait_state_name[(int)mdl_namespace()]; + } + my_hash_value_type hash_value() const + { + return m_hash_value + mdl_namespace(); + } + my_hash_value_type tc_hash_value() const + { + return m_hash_value; + } + +private: + uint16 m_length; + uint16 m_db_name_length; + my_hash_value_type m_hash_value; + char m_ptr[MAX_MDLKEY_LENGTH]; + static PSI_stage_info m_namespace_to_wait_state_name[NAMESPACE_END]; +private: + MDL_key(const MDL_key &); /* not implemented */ + MDL_key &operator=(const MDL_key &); /* not implemented */ + friend my_hash_value_type mdl_hash_function(CHARSET_INFO *, + const uchar *, size_t); +}; + + +/** + A pending metadata lock request. + + A lock request and a granted metadata lock are represented by + different classes because they have different allocation + sites and hence different lifetimes. The allocation of lock requests is + controlled from outside of the MDL subsystem, while allocation of granted + locks (tickets) is controlled within the MDL subsystem. + + MDL_request is a C structure, you don't need to call a constructor + or destructor for it. +*/ + +class MDL_request +{ +public: + /** Type of metadata lock. */ + enum enum_mdl_type type; + /** Duration for requested lock. */ + enum enum_mdl_duration duration; + + /** + Pointers for participating in the list of lock requests for this context. + */ + MDL_request *next_in_list; + MDL_request **prev_in_list; + /** + Pointer to the lock ticket object for this lock request. + Valid only if this lock request is satisfied. + */ + MDL_ticket *ticket; + + /** A lock is requested based on a fully qualified name and type. */ + MDL_key key; + + const char *m_src_file; + uint m_src_line; + +public: + + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () + { return alloc_root(mem_root, size); } + static void operator delete(void *, MEM_ROOT *) {} + + void init_with_source(MDL_key::enum_mdl_namespace namespace_arg, + const char *db_arg, const char *name_arg, + enum_mdl_type mdl_type_arg, + enum_mdl_duration mdl_duration_arg, + const char *src_file, uint src_line); + void init_by_key_with_source(const MDL_key *key_arg, enum_mdl_type mdl_type_arg, + enum_mdl_duration mdl_duration_arg, + const char *src_file, uint src_line); + /** Set type of lock request. Can be only applied to pending locks. */ + inline void set_type(enum_mdl_type type_arg) + { + DBUG_ASSERT(ticket == NULL); + type= type_arg; + } + void move_from(MDL_request &from) + { + type= from.type; + duration= from.duration; + ticket= from.ticket; + next_in_list= from.next_in_list; + prev_in_list= from.prev_in_list; + key.mdl_key_init(&from.key); + from.ticket= NULL; // that's what "move" means + } + + /** + Is this a request for a lock which allow data to be updated? + + @note This method returns true for MDL_SHARED_UPGRADABLE type of + lock. Even though this type of lock doesn't allow updates + it will always be upgraded to one that does. + */ + bool is_write_lock_request() const + { + return (type >= MDL_SHARED_WRITE && + type != MDL_SHARED_READ_ONLY); + } + + /* + This is to work around the ugliness of TABLE_LIST + compiler-generated assignment operator. It is currently used + in several places to quickly copy "most" of the members of the + table list. These places currently never assume that the mdl + request is carried over to the new TABLE_LIST, or shared + between lists. + + This method does not initialize the instance being assigned! + Use of init() for initialization after this assignment operator + is mandatory. Can only be used before the request has been + granted. + */ + MDL_request& operator=(const MDL_request &) + { + type= MDL_NOT_INITIALIZED; + ticket= NULL; + /* Do nothing, in particular, don't try to copy the key. */ + return *this; + } + /* Another piece of ugliness for TABLE_LIST constructor */ + MDL_request(): type(MDL_NOT_INITIALIZED), ticket(NULL) {} + + MDL_request(const MDL_request *rhs) + :type(rhs->type), + duration(rhs->duration), + ticket(NULL), + key(&rhs->key) + {} +}; + + +typedef void (*mdl_cached_object_release_hook)(void *); + +#define MDL_REQUEST_INIT(R, P1, P2, P3, P4, P5) \ + (*R).init_with_source(P1, P2, P3, P4, P5, __FILE__, __LINE__) + +#define MDL_REQUEST_INIT_BY_KEY(R, P1, P2, P3) \ + (*R).init_by_key_with_source(P1, P2, P3, __FILE__, __LINE__) + + +/** + An abstract class for inspection of a connected + subgraph of the wait-for graph. +*/ + +class MDL_wait_for_graph_visitor +{ +public: + virtual bool enter_node(MDL_context *node) = 0; + virtual void leave_node(MDL_context *node) = 0; + + virtual bool inspect_edge(MDL_context *dest) = 0; + virtual ~MDL_wait_for_graph_visitor(); + MDL_wait_for_graph_visitor() = default; +}; + +/** + Abstract class representing an edge in the waiters graph + to be traversed by deadlock detection algorithm. +*/ + +class MDL_wait_for_subgraph +{ +public: + virtual ~MDL_wait_for_subgraph(); + + /** + Accept a wait-for graph visitor to inspect the node + this edge is leading to. + */ + virtual bool accept_visitor(MDL_wait_for_graph_visitor *gvisitor) = 0; + + enum enum_deadlock_weight + { + DEADLOCK_WEIGHT_FTWRL1= 0, + DEADLOCK_WEIGHT_DML= 1, + DEADLOCK_WEIGHT_DDL= 100 + }; + /* A helper used to determine which lock request should be aborted. */ + virtual uint get_deadlock_weight() const = 0; +}; + + +/** + A granted metadata lock. + + @warning MDL_ticket members are private to the MDL subsystem. + + @note Multiple shared locks on a same object are represented by a + single ticket. The same does not apply for other lock types. + + @note There are two groups of MDL_ticket members: + - "Externally accessible". These members can be accessed from + threads/contexts different than ticket owner in cases when + ticket participates in some list of granted or waiting tickets + for a lock. Therefore one should change these members before + including then to waiting/granted lists or while holding lock + protecting those lists. + - "Context private". Such members are private to thread/context + owning this ticket. I.e. they should not be accessed from other + threads/contexts. +*/ + +class MDL_ticket : public MDL_wait_for_subgraph, public ilist_node<> +{ +public: + /** + Pointers for participating in the list of lock requests for this context. + Context private. + */ + MDL_ticket *next_in_context; + MDL_ticket **prev_in_context; +public: +#ifdef WITH_WSREP + void wsrep_report(bool debug) const; +#endif /* WITH_WSREP */ + bool has_pending_conflicting_lock() const; + + MDL_context *get_ctx() const { return m_ctx; } + bool is_upgradable_or_exclusive() const + { + return m_type == MDL_SHARED_UPGRADABLE || + m_type == MDL_SHARED_NO_WRITE || + m_type == MDL_SHARED_NO_READ_WRITE || + m_type == MDL_EXCLUSIVE; + } + enum_mdl_type get_type() const { return m_type; } + const LEX_STRING *get_type_name() const; + const LEX_STRING *get_type_name(enum_mdl_type type) const; + MDL_lock *get_lock() const { return m_lock; } + MDL_key *get_key() const; + void downgrade_lock(enum_mdl_type type); + + bool has_stronger_or_equal_type(enum_mdl_type type) const; + + bool is_incompatible_when_granted(enum_mdl_type type) const; + bool is_incompatible_when_waiting(enum_mdl_type type) const; + + /** Implement MDL_wait_for_subgraph interface. */ + virtual bool accept_visitor(MDL_wait_for_graph_visitor *dvisitor); + virtual uint get_deadlock_weight() const; + /** + Status of lock request represented by the ticket as reflected in P_S. + */ + enum enum_psi_status { PENDING = 0, GRANTED, + PRE_ACQUIRE_NOTIFY, POST_RELEASE_NOTIFY }; +private: + friend class MDL_context; + + MDL_ticket(MDL_context *ctx_arg, enum_mdl_type type_arg +#ifndef DBUG_OFF + , enum_mdl_duration duration_arg +#endif + ) + : m_type(type_arg), +#ifndef DBUG_OFF + m_duration(duration_arg), +#endif + m_ctx(ctx_arg), + m_lock(NULL), + m_psi(NULL) + {} + + virtual ~MDL_ticket() + { + DBUG_ASSERT(m_psi == NULL); + } + + static MDL_ticket *create(MDL_context *ctx_arg, enum_mdl_type type_arg +#ifndef DBUG_OFF + , enum_mdl_duration duration_arg +#endif + ); + static void destroy(MDL_ticket *ticket); +private: + /** Type of metadata lock. Externally accessible. */ + enum enum_mdl_type m_type; +#ifndef DBUG_OFF + /** + Duration of lock represented by this ticket. + Context private. Debug-only. + */ + enum_mdl_duration m_duration; +#endif + /** + Context of the owner of the metadata lock ticket. Externally accessible. + */ + MDL_context *m_ctx; + + /** + Pointer to the lock object for this lock ticket. Externally accessible. + */ + MDL_lock *m_lock; + + PSI_metadata_lock *m_psi; + +private: + MDL_ticket(const MDL_ticket &); /* not implemented */ + MDL_ticket &operator=(const MDL_ticket &); /* not implemented */ +}; + + +/** + Savepoint for MDL context. + + Doesn't include metadata locks with explicit duration as + they are not released during rollback to savepoint. +*/ + +class MDL_savepoint +{ +public: + MDL_savepoint() = default;; + +private: + MDL_savepoint(MDL_ticket *stmt_ticket, MDL_ticket *trans_ticket) + : m_stmt_ticket(stmt_ticket), m_trans_ticket(trans_ticket) + {} + + friend class MDL_context; + +private: + /** + Pointer to last lock with statement duration which was taken + before creation of savepoint. + */ + MDL_ticket *m_stmt_ticket; + /** + Pointer to last lock with transaction duration which was taken + before creation of savepoint. + */ + MDL_ticket *m_trans_ticket; +}; + + +/** + A reliable way to wait on an MDL lock. +*/ + +class MDL_wait +{ +public: + MDL_wait(); + ~MDL_wait(); + + enum enum_wait_status { EMPTY = 0, GRANTED, VICTIM, TIMEOUT, KILLED }; + + bool set_status(enum_wait_status result_arg); + enum_wait_status get_status(); + void reset_status(); + enum_wait_status timed_wait(MDL_context_owner *owner, + struct timespec *abs_timeout, + bool signal_timeout, + const PSI_stage_info *wait_state_name); +private: + /** + Condvar which is used for waiting until this context's pending + request can be satisfied or this thread has to perform actions + to resolve a potential deadlock (we subscribe to such + notification by adding a ticket corresponding to the request + to an appropriate queue of waiters). + */ + mysql_mutex_t m_LOCK_wait_status; + mysql_cond_t m_COND_wait_status; + enum_wait_status m_wait_status; +}; + + +typedef I_P_List, + I_P_List_counter> + MDL_request_list; + +/** + Context of the owner of metadata locks. I.e. each server + connection has such a context. +*/ + +class MDL_context +{ +public: + typedef I_P_List > + Ticket_list; + + typedef Ticket_list::Iterator Ticket_iterator; + + MDL_context(); + void destroy(); + + bool try_acquire_lock(MDL_request *mdl_request); + bool acquire_lock(MDL_request *mdl_request, double lock_wait_timeout); + bool acquire_locks(MDL_request_list *requests, double lock_wait_timeout); + bool upgrade_shared_lock(MDL_ticket *mdl_ticket, + enum_mdl_type new_type, + double lock_wait_timeout); + + bool clone_ticket(MDL_request *mdl_request); + + void release_all_locks_for_name(MDL_ticket *ticket); + void release_lock(MDL_ticket *ticket); + + bool is_lock_owner(MDL_key::enum_mdl_namespace mdl_namespace, + const char *db, const char *name, + enum_mdl_type mdl_type); + unsigned long get_lock_owner(MDL_key *mdl_key); + + bool has_lock(const MDL_savepoint &mdl_savepoint, MDL_ticket *mdl_ticket); + + inline bool has_locks() const + { + return !(m_tickets[MDL_STATEMENT].is_empty() && + m_tickets[MDL_TRANSACTION].is_empty() && + m_tickets[MDL_EXPLICIT].is_empty()); + } + bool has_explicit_locks() const + { + return !m_tickets[MDL_EXPLICIT].is_empty(); + } + inline bool has_transactional_locks() const + { + return !m_tickets[MDL_TRANSACTION].is_empty(); + } + + MDL_savepoint mdl_savepoint() + { + return MDL_savepoint(m_tickets[MDL_STATEMENT].front(), + m_tickets[MDL_TRANSACTION].front()); + } + + void set_explicit_duration_for_all_locks(); + void set_transaction_duration_for_all_locks(); + void set_lock_duration(MDL_ticket *mdl_ticket, enum_mdl_duration duration); + + void release_statement_locks(); + void release_transactional_locks(THD *thd); + void release_explicit_locks(); + void rollback_to_savepoint(const MDL_savepoint &mdl_savepoint); + + MDL_context_owner *get_owner() { return m_owner; } + + /** @pre Only valid if we started waiting for lock. */ + inline uint get_deadlock_weight() const + { return m_waiting_for->get_deadlock_weight() + m_deadlock_overweight; } + void inc_deadlock_overweight() { m_deadlock_overweight++; } + /** + Post signal to the context (and wake it up if necessary). + + @retval FALSE - Success, signal was posted. + @retval TRUE - Failure, signal was not posted since context + already has received some signal or closed + signal slot. + */ + void init(MDL_context_owner *arg) { m_owner= arg; reset(); } + void reset() { m_deadlock_overweight= 0; } + + void set_needs_thr_lock_abort(bool needs_thr_lock_abort) + { + /* + @note In theory, this member should be modified under protection + of some lock since it can be accessed from different threads. + In practice, this is not necessary as code which reads this + value and so might miss the fact that value was changed will + always re-try reading it after small timeout and therefore + will see the new value eventually. + */ + m_needs_thr_lock_abort= needs_thr_lock_abort; + } + bool get_needs_thr_lock_abort() const + { + return m_needs_thr_lock_abort; + } +public: + /** + If our request for a lock is scheduled, or aborted by the deadlock + detector, the result is recorded in this class. + */ + MDL_wait m_wait; +private: + /** + Lists of all MDL tickets acquired by this connection. + + Lists of MDL tickets: + --------------------- + The entire set of locks acquired by a connection can be separated + in three subsets according to their duration: locks released at + the end of statement, at the end of transaction and locks are + released explicitly. + + Statement and transactional locks are locks with automatic scope. + They are accumulated in the course of a transaction, and released + either at the end of uppermost statement (for statement locks) or + on COMMIT, ROLLBACK or ROLLBACK TO SAVEPOINT (for transactional + locks). They must not be (and never are) released manually, + i.e. with release_lock() call. + + Tickets with explicit duration are taken for locks that span + multiple transactions or savepoints. + These are: HANDLER SQL locks (HANDLER SQL is + transaction-agnostic), LOCK TABLES locks (you can COMMIT/etc + under LOCK TABLES, and the locked tables stay locked), user level + locks (GET_LOCK()/RELEASE_LOCK() functions) and + locks implementing "global read lock". + + Statement/transactional locks are always prepended to the + beginning of the appropriate list. In other words, they are + stored in reverse temporal order. Thus, when we rollback to + a savepoint, we start popping and releasing tickets from the + front until we reach the last ticket acquired after the savepoint. + + Locks with explicit duration are not stored in any + particular order, and among each other can be split into + four sets: + + [LOCK TABLES locks] [USER locks] [HANDLER locks] [GLOBAL READ LOCK locks] + + The following is known about these sets: + + * GLOBAL READ LOCK locks are always stored last. + This is because one can't say SET GLOBAL read_only=1 or + FLUSH TABLES WITH READ LOCK if one has locked tables. One can, + however, LOCK TABLES after having entered the read only mode. + Note, that subsequent LOCK TABLES statement will unlock the previous + set of tables, but not the GRL! + There are no HANDLER locks after GRL locks because + SET GLOBAL read_only performs a FLUSH TABLES WITH + READ LOCK internally, and FLUSH TABLES, in turn, implicitly + closes all open HANDLERs. + However, one can open a few HANDLERs after entering the + read only mode. + * LOCK TABLES locks include intention exclusive locks on + involved schemas and global intention exclusive lock. + */ + Ticket_list m_tickets[MDL_DURATION_END]; + MDL_context_owner *m_owner; + /** + TRUE - if for this context we will break protocol and try to + acquire table-level locks while having only S lock on + some table. + To avoid deadlocks which might occur during concurrent + upgrade of SNRW lock on such object to X lock we have to + abort waits for table-level locks for such connections. + FALSE - Otherwise. + */ + bool m_needs_thr_lock_abort; + + /** + Read-write lock protecting m_waiting_for member. + + @note The fact that this read-write lock prefers readers is + important as deadlock detector won't work correctly + otherwise. @sa Comment for MDL_lock::m_rwlock. + */ + mysql_prlock_t m_LOCK_waiting_for; + /** + Tell the deadlock detector what metadata lock or table + definition cache entry this session is waiting for. + In principle, this is redundant, as information can be found + by inspecting waiting queues, but we'd very much like it to be + readily available to the wait-for graph iterator. + */ + MDL_wait_for_subgraph *m_waiting_for; + LF_PINS *m_pins; + uint m_deadlock_overweight; +private: + MDL_ticket *find_ticket(MDL_request *mdl_req, + enum_mdl_duration *duration); + void release_locks_stored_before(enum_mdl_duration duration, MDL_ticket *sentinel); + void release_lock(enum_mdl_duration duration, MDL_ticket *ticket); + bool try_acquire_lock_impl(MDL_request *mdl_request, + MDL_ticket **out_ticket); + bool fix_pins(); + +public: + THD *get_thd() const { return m_owner->get_thd(); } + bool has_explicit_locks(); + void find_deadlock(); + + ulong get_thread_id() const { return thd_get_thread_id(get_thd()); } + + bool visit_subgraph(MDL_wait_for_graph_visitor *dvisitor); + + /** Inform the deadlock detector there is an edge in the wait-for graph. */ + void will_wait_for(MDL_wait_for_subgraph *waiting_for_arg) + { + mysql_prlock_wrlock(&m_LOCK_waiting_for); + m_waiting_for= waiting_for_arg; + mysql_prlock_unlock(&m_LOCK_waiting_for); + } + + /** Remove the wait-for edge from the graph after we're done waiting. */ + void done_waiting_for() + { + mysql_prlock_wrlock(&m_LOCK_waiting_for); + m_waiting_for= NULL; + mysql_prlock_unlock(&m_LOCK_waiting_for); + } + void lock_deadlock_victim() + { + mysql_prlock_rdlock(&m_LOCK_waiting_for); + } + void unlock_deadlock_victim() + { + mysql_prlock_unlock(&m_LOCK_waiting_for); + } +private: + MDL_context(const MDL_context &rhs); /* not implemented */ + MDL_context &operator=(MDL_context &rhs); /* not implemented */ + + /* metadata_lock_info plugin */ + friend int i_s_metadata_lock_info_fill_row(MDL_ticket*, void*); +#ifndef DBUG_OFF +public: + /** + This is for the case when the thread opening the table does not acquire + the lock itself, but utilizes a lock guarantee from another MDL context. + + For example, in InnoDB, MDL is acquired by the purge_coordinator_task, + but the table may be opened and used in a purge_worker_task. + The coordinator thread holds the lock for the duration of worker's purge + job, or longer, possibly reusing shared MDL for different workers and jobs. + */ + MDL_context *lock_warrant= NULL; + + inline bool is_lock_warrantee(MDL_key::enum_mdl_namespace ns, + const char *db, const char *name, + enum_mdl_type mdl_type) const + { + return lock_warrant && lock_warrant->is_lock_owner(ns, db, name, mdl_type); + } +#endif +}; + + +void mdl_init(); +void mdl_destroy(); + +extern "C" unsigned long thd_get_thread_id(const MYSQL_THD thd); + +/** + Check if a connection in question is no longer connected. + + @details + Replication apply thread is always connected. Otherwise, + does a poll on the associated socket to check if the client + is gone. +*/ +extern "C" int thd_is_connected(MYSQL_THD thd); + + +/* + Metadata locking subsystem tries not to grant more than + max_write_lock_count high-prio, strong locks successively, + to avoid starving out weak, low-prio locks. +*/ +extern "C" ulong max_write_lock_count; + +typedef int (*mdl_iterator_callback)(MDL_ticket *ticket, void *arg, + bool granted); +extern MYSQL_PLUGIN_IMPORT +int mdl_iterate(mdl_iterator_callback callback, void *arg); +#endif /* MDL_H */ diff --git a/sql/mem_root_array.h b/sql/mem_root_array.h new file mode 100644 index 00000000..3d03a5a5 --- /dev/null +++ b/sql/mem_root_array.h @@ -0,0 +1,245 @@ +/* Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef MEM_ROOT_ARRAY_INCLUDED +#define MEM_ROOT_ARRAY_INCLUDED + +#include + +/** + A typesafe replacement for DYNAMIC_ARRAY. + We use MEM_ROOT for allocating storage, rather than the C++ heap. + The interface is chosen to be similar to std::vector. + + @remark + Unlike DYNAMIC_ARRAY, elements are properly copied + (rather than memcpy()d) if the underlying array needs to be expanded. + + @remark + Depending on has_trivial_destructor, we destroy objects which are + removed from the array (including when the array object itself is destroyed). + + @remark + Note that MEM_ROOT has no facility for reusing free space, + so don't use this if multiple re-expansions are likely to happen. + + @param Element_type The type of the elements of the container. + Elements must be copyable. + @param has_trivial_destructor If true, we don't destroy elements. + We could have used type traits to determine this. + __has_trivial_destructor is supported by some (but not all) + compilers we use. +*/ +template +class Mem_root_array +{ +public: + /// Convenience typedef, same typedef name as std::vector + typedef Element_type value_type; + + Mem_root_array(MEM_ROOT *root) + : m_root(root), m_array(NULL), m_size(0), m_capacity(0) + { + DBUG_ASSERT(m_root != NULL); + } + + Mem_root_array(MEM_ROOT *root, size_t n, const value_type &val= value_type()) + : m_root(root), m_array(NULL), m_size(0), m_capacity(0) + { + resize(n, val); + } + + ~Mem_root_array() + { + clear(); + } + + Element_type &at(size_t n) + { + DBUG_ASSERT(n < size()); + return m_array[n]; + } + + const Element_type &at(size_t n) const + { + DBUG_ASSERT(n < size()); + return m_array[n]; + } + + Element_type &operator[](size_t n) { return at(n); } + const Element_type &operator[](size_t n) const { return at(n); } + + Element_type &back() { return at(size() - 1); } + const Element_type &back() const { return at(size() - 1); } + + // Returns a pointer to the first element in the array. + Element_type *begin() { return &m_array[0]; } + const Element_type *begin() const { return &m_array[0]; } + + // Returns a pointer to the past-the-end element in the array. + Element_type *end() { return &m_array[size()]; } + const Element_type *end() const { return &m_array[size()]; } + + // Erases all of the elements. + void clear() + { + if (!empty()) + chop(0); + } + + /* + Chops the tail off the array, erasing all tail elements. + @param pos Index of first element to erase. + */ + void chop(const size_t pos) + { + DBUG_ASSERT(pos < m_size); + if (!has_trivial_destructor) + { + for (size_t ix= pos; ix < m_size; ++ix) + { + Element_type *p= &m_array[ix]; + p->~Element_type(); // Destroy discarded element. + } + } + m_size= pos; + } + + /* + Reserves space for array elements. + Copies over existing elements, in case we are re-expanding the array. + + @param n number of elements. + @retval true if out-of-memory, false otherwise. + */ + bool reserve(size_t n) + { + if (n <= m_capacity) + return false; + + void *mem= alloc_root(m_root, n * element_size()); + if (!mem) + return true; + Element_type *array= static_cast(mem); + + // Copy all the existing elements into the new array. + for (size_t ix= 0; ix < m_size; ++ix) + { + Element_type *new_p= &array[ix]; + Element_type *old_p= &m_array[ix]; + new (new_p) Element_type(*old_p); // Copy into new location. + if (!has_trivial_destructor) + old_p->~Element_type(); // Destroy the old element. + } + + // Forget the old array. + m_array= array; + m_capacity= n; + return false; + } + + /* + Adds a new element at the end of the array, after its current last + element. The content of this new element is initialized to a copy of + the input argument. + + @param element Object to copy. + @retval true if out-of-memory, false otherwise. + */ + bool push_back(const Element_type &element) + { + const size_t min_capacity= 20; + const size_t expansion_factor= 2; + if (0 == m_capacity && reserve(min_capacity)) + return true; + if (m_size == m_capacity && reserve(m_capacity * expansion_factor)) + return true; + Element_type *p= &m_array[m_size++]; + new (p) Element_type(element); + return false; + } + + /** + Removes the last element in the array, effectively reducing the + container size by one. This destroys the removed element. + */ + void pop_back() + { + DBUG_ASSERT(!empty()); + if (!has_trivial_destructor) + back().~Element_type(); + m_size-= 1; + } + + /** + Resizes the container so that it contains n elements. + + If n is smaller than the current container size, the content is + reduced to its first n elements, removing those beyond (and + destroying them). + + If n is greater than the current container size, the content is + expanded by inserting at the end as many elements as needed to + reach a size of n. If val is specified, the new elements are + initialized as copies of val, otherwise, they are + value-initialized. + + If n is also greater than the current container capacity, an automatic + reallocation of the allocated storage space takes place. + + Notice that this function changes the actual content of the + container by inserting or erasing elements from it. + */ + void resize(size_t n, const value_type &val= value_type()) + { + if (n == m_size) + return; + if (n > m_size) + { + if (!reserve(n)) + { + while (n != m_size) + push_back(val); + } + return; + } + if (!has_trivial_destructor) + { + while (n != m_size) + pop_back(); + } + m_size= n; + } + + size_t capacity() const { return m_capacity; } + size_t element_size() const { return sizeof(Element_type); } + bool empty() const { return size() == 0; } + size_t size() const { return m_size; } + const MEM_ROOT *mem_root() const { return m_root; } + +private: + MEM_ROOT *const m_root; + Element_type *m_array; + size_t m_size; + size_t m_capacity; + + // Not (yet) implemented. + Mem_root_array(const Mem_root_array&); + Mem_root_array &operator=(const Mem_root_array&); +}; + + +#endif // MEM_ROOT_ARRAY_INCLUDED diff --git a/sql/message.h b/sql/message.h new file mode 100644 index 00000000..a6491736 --- /dev/null +++ b/sql/message.h @@ -0,0 +1,54 @@ +/* + To change or add messages mysqld writes to the Windows error log, run + mc.exe message.mc + and checkin generated messages.h, messages.rc and msg000001.bin under the + source control. + mc.exe can be installed with Windows SDK, some Visual Studio distributions + do not include it. +*/ +// +// Values are 32 bit values laid out as follows: +// +// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 +// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 +// +---+-+-+-----------------------+-------------------------------+ +// |Sev|C|R| Facility | Code | +// +---+-+-+-----------------------+-------------------------------+ +// +// where +// +// Sev - is the severity code +// +// 00 - Success +// 01 - Informational +// 10 - Warning +// 11 - Error +// +// C - is the Customer code flag +// +// R - is a reserved bit +// +// Facility - is the facility code +// +// Code - is the facility's status code +// +// +// Define the facility codes +// + + +// +// Define the severity codes +// + + +// +// MessageId: MSG_DEFAULT +// +// MessageText: +// +// %1 +// +// +#define MSG_DEFAULT 0xC0000064L + diff --git a/sql/message.mc b/sql/message.mc new file mode 100644 index 00000000..c009b048 --- /dev/null +++ b/sql/message.mc @@ -0,0 +1,15 @@ +;/* +; To change or add messages mysqld writes to the Windows error log, run +; mc.exe message.mc +; and checkin generated messages.h, messages.rc and msg000001.bin under the +; source control. +; mc.exe can be installed with Windows SDK, some Visual Studio distributions +; do not include it. +;*/ +MessageId = 100 +Severity = Error +Facility = Application +SymbolicName = MSG_DEFAULT +Language = English +%1 + diff --git a/sql/message.rc b/sql/message.rc new file mode 100644 index 00000000..0abcb0fa --- /dev/null +++ b/sql/message.rc @@ -0,0 +1,2 @@ +LANGUAGE 0x9,0x1 +1 11 "MSG00001.bin" diff --git a/sql/mf_iocache.cc b/sql/mf_iocache.cc new file mode 100644 index 00000000..a8087ed5 --- /dev/null +++ b/sql/mf_iocache.cc @@ -0,0 +1,97 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @details + Caching of files with only does (sequential) read or writes of fixed- + length records. A read isn't allowed to go over file-length. A read is ok + if it ends at file-length and next read can try to read after file-length + (and get a EOF-error). + Possibly use of asyncronic io. + macros for read and writes for faster io. + Used instead of FILE when reading or writing whole files. + This will make mf_rec_cache obsolete. + One can change info->pos_in_file to a higher value to skip bytes in file if + also info->read_pos is set to info->read_end. + If called through open_cached_file(), then the temporary file will + only be created if a write exeeds the file buffer or if one calls + flush_io_cache(). +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" // THD +#ifdef HAVE_REPLICATION + +extern "C" { + +/** + Read buffered from the net. + + @retval + 1 if can't read requested characters + @retval + 0 if record read +*/ + + +int _my_b_net_read(IO_CACHE *info, uchar *Buffer, size_t) +{ + ulong read_length; + NET *net= &(current_thd)->net; + DBUG_ENTER("_my_b_net_read"); + + if (!info->end_of_file) + DBUG_RETURN(1); /* because my_b_get (no _) takes 1 byte at a time */ + read_length= my_net_read_packet(net, 0); + if (unlikely(read_length == packet_error)) + { + info->error= -1; + DBUG_RETURN(1); + } + if (unlikely(read_length == 0)) + { + info->end_of_file= 0; /* End of file from client */ + DBUG_RETURN(1); + } + /* to set up stuff for my_b_get (no _) */ + info->read_end = (info->read_pos = (uchar*) net->read_pos) + read_length; + Buffer[0] = info->read_pos[0]; /* length is always 1 */ + + /* + info->request_pos is used by log_loaded_block() to know the size + of the current block. + info->pos_in_file is used by log_loaded_block() too. + */ + info->pos_in_file+= read_length; + info->request_pos=info->read_pos; + + info->read_pos++; + + DBUG_RETURN(0); +} + +} /* extern "C" */ + +#elif defined(_WIN32) + +// Remove linker warning 4221 about empty file +namespace { char dummy; }; + +#endif /* HAVE_REPLICATION */ + + diff --git a/sql/mf_iocache_encr.cc b/sql/mf_iocache_encr.cc new file mode 100644 index 00000000..63830ec6 --- /dev/null +++ b/sql/mf_iocache_encr.cc @@ -0,0 +1,275 @@ +/* + Copyright (c) 2015, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/************************************************************************* + Limitation of encrypted IO_CACHEs + 1. Designed to support temporary files only (open_cached_file, fd=-1) + 2. Created with WRITE_CACHE, later can be reinit_io_cache'ed to + READ_CACHE and WRITE_CACHE in any order arbitrary number of times. + 3. no seeks for writes, but reinit_io_cache(WRITE_CACHE, seek_offset) + is allowed (there's a special hack in reinit_io_cache() for that) +*/ + +#include "../mysys/mysys_priv.h" +#include "log.h" +#include "mysqld.h" +#include "sql_class.h" + +static uint keyid, keyver; + +#define set_iv(IV, N1, N2) \ + do { \ + compile_time_assert(sizeof(IV) >= sizeof(N1) + sizeof(N2)); \ + memcpy(IV, &(N1), sizeof(N1)); \ + memcpy(IV + sizeof(N1), &(N2), sizeof(N2)); \ + } while(0) + +static int my_b_encr_read(IO_CACHE *info, uchar *Buffer, size_t Count) +{ + my_off_t pos_in_file= info->pos_in_file + (info->read_end - info->buffer); + my_off_t old_pos_in_file= pos_in_file, pos_offset= 0; + IO_CACHE_CRYPT *crypt_data= + (IO_CACHE_CRYPT *)(info->buffer + info->buffer_length + MY_AES_BLOCK_SIZE); + uchar *wbuffer= (uchar*)&(crypt_data->inbuf_counter); + uchar *ebuffer= (uchar*)(crypt_data + 1); + DBUG_ENTER("my_b_encr_read"); + + if (pos_in_file == info->end_of_file) + { + /* reading past EOF should not empty the cache */ + info->read_pos= info->read_end; + info->error= 0; + DBUG_RETURN(MY_TEST(Count)); + } + + if (info->seek_not_done) + { + my_off_t wpos; + + pos_offset= pos_in_file % info->buffer_length; + pos_in_file-= pos_offset; + + wpos= pos_in_file / info->buffer_length * crypt_data->block_length; + + if ((mysql_file_seek(info->file, wpos, MY_SEEK_SET, MYF(0)) + == MY_FILEPOS_ERROR)) + { + info->error= -1; + DBUG_RETURN(1); + } + info->seek_not_done= 0; + if (info->next_file_user) + { + IO_CACHE *c; + for (c= info->next_file_user; + c!= info; + c= c->next_file_user) + { + c->seek_not_done= 1; + } + } + } + + do + { + uint elength, wlength, length; + uchar iv[MY_AES_BLOCK_SIZE]= {0}; + + DBUG_ASSERT(pos_in_file % info->buffer_length == 0); + + if (info->end_of_file - pos_in_file >= info->buffer_length) + wlength= crypt_data->block_length; + else + wlength= crypt_data->last_block_length; + + if (mysql_file_read(info->file, wbuffer, wlength, info->myflags | MY_NABP)) + { + info->error= -1; + DBUG_RETURN(1); + } + + elength= wlength - (uint)(ebuffer - wbuffer); + set_iv(iv, pos_in_file, crypt_data->inbuf_counter); + + if (encryption_crypt(ebuffer, elength, info->buffer, &length, + crypt_data->key, sizeof(crypt_data->key), + iv, sizeof(iv), ENCRYPTION_FLAG_DECRYPT, + keyid, keyver)) + { + my_errno= 1; + DBUG_RETURN(info->error= -1); + } + + DBUG_ASSERT(length <= info->buffer_length); + + size_t copied= MY_MIN(Count, (size_t)(length - pos_offset)); + if (copied) + { + memcpy(Buffer, info->buffer + pos_offset, copied); + Count-= copied; + Buffer+= copied; + } + + info->read_pos= info->buffer + pos_offset + copied; + info->read_end= info->buffer + length; + info->pos_in_file= pos_in_file; + pos_in_file+= length; + pos_offset= 0; + + if (wlength < crypt_data->block_length && pos_in_file < info->end_of_file) + { + info->error= (int)(pos_in_file - old_pos_in_file); + DBUG_RETURN(1); + } + } while (Count); + + DBUG_RETURN(0); +} + +static int my_b_encr_write(IO_CACHE *info, const uchar *Buffer, size_t Count) +{ + IO_CACHE_CRYPT *crypt_data= + (IO_CACHE_CRYPT *)(info->buffer + info->buffer_length + MY_AES_BLOCK_SIZE); + uchar *wbuffer= (uchar*)&(crypt_data->inbuf_counter); + uchar *ebuffer= (uchar*)(crypt_data + 1); + DBUG_ENTER("my_b_encr_write"); + + if (Buffer != info->write_buffer) + { + Count-= Count % info->buffer_length; + if (!Count) + DBUG_RETURN(0); + } + + if (info->seek_not_done) + { + DBUG_ASSERT(info->pos_in_file % info->buffer_length == 0); + my_off_t wpos= info->pos_in_file / info->buffer_length * crypt_data->block_length; + + if ((mysql_file_seek(info->file, wpos, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)) + { + info->error= -1; + DBUG_RETURN(1); + } + info->seek_not_done= 0; + } + + if (info->pos_in_file == 0) + { + if (my_random_bytes(crypt_data->key, sizeof(crypt_data->key))) + { + my_errno= 1; + DBUG_RETURN(info->error= -1); + } + crypt_data->counter= 0; + + IF_DBUG(crypt_data->block_length= 0,); + } + + do + { + size_t length= MY_MIN(info->buffer_length, Count); + uint elength, wlength; + uchar iv[MY_AES_BLOCK_SIZE]= {0}; + + crypt_data->inbuf_counter= crypt_data->counter; + set_iv(iv, info->pos_in_file, crypt_data->inbuf_counter); + + if (encryption_crypt(Buffer, (uint)length, ebuffer, &elength, + crypt_data->key, (uint) sizeof(crypt_data->key), + iv, (uint) sizeof(iv), ENCRYPTION_FLAG_ENCRYPT, + keyid, keyver)) + { + my_errno= 1; + DBUG_RETURN(info->error= -1); + } + wlength= elength + (uint)(ebuffer - wbuffer); + + if (length == info->buffer_length) + { + /* + block_length should be always the same. that is, encrypting + buffer_length bytes should *always* produce block_length bytes + */ + DBUG_ASSERT(crypt_data->block_length == 0 || crypt_data->block_length == wlength); + DBUG_ASSERT(elength <= encryption_encrypted_length((uint)length, keyid, keyver)); + crypt_data->block_length= wlength; + } + else + { + /* if we write a partial block, it *must* be the last write */ + IF_DBUG(info->write_function= 0,); + crypt_data->last_block_length= wlength; + } + + if (mysql_file_write(info->file, wbuffer, wlength, info->myflags | MY_NABP)) + DBUG_RETURN(info->error= -1); + + Buffer+= length; + Count-= length; + info->pos_in_file+= length; + crypt_data->counter++; + } while (Count); + DBUG_RETURN(0); +} + +/** + determine what key id and key version to use for IO_CACHE temp files + + First, try key id 2, if it doesn't exist, use key id 1. + + (key id 1 is the default system key id, used pretty much everywhere, it must + exist. key id 2 is for tempfiles, it can be used, for example, to set a + faster encryption algorithm for temporary files) + + This looks like it might have a bug: if an encryption plugin is unloaded when + there's an open IO_CACHE, that IO_CACHE will become unreadable after reinit. + But in fact it is safe, as an encryption plugin can only be unloaded on + server shutdown. + + Note that encrypt_tmp_files variable is read-only. +*/ +int init_io_cache_encryption() +{ + if (encrypt_tmp_files) + { + keyid= ENCRYPTION_KEY_TEMPORARY_DATA; + keyver= encryption_key_get_latest_version(keyid); + if (keyver == ENCRYPTION_KEY_VERSION_INVALID) + { + keyid= ENCRYPTION_KEY_SYSTEM_DATA; + keyver= encryption_key_get_latest_version(keyid); + } + if (keyver == ENCRYPTION_KEY_VERSION_INVALID) + { + sql_print_error("Failed to enable encryption of temporary files"); + return 1; + } + + if (keyver != ENCRYPTION_KEY_NOT_ENCRYPTED) + { + sql_print_information("Using encryption key id %d for temporary files", keyid); + _my_b_encr_read= my_b_encr_read; + _my_b_encr_write= my_b_encr_write; + return 0; + } + } + + _my_b_encr_read= 0; + _my_b_encr_write= 0; + return 0; +} + diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc new file mode 100644 index 00000000..2180cbb6 --- /dev/null +++ b/sql/multi_range_read.cc @@ -0,0 +1,2131 @@ +/* Copyright (C) 2010, 2011 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_parse.h" +#include +#include "sql_select.h" +#include "key.h" +#include "sql_statistics.h" +#include "rowid_filter.h" + +/**************************************************************************** + * Default MRR implementation (MRR to non-MRR converter) + ***************************************************************************/ + +/** + Get cost and other information about MRR scan over a known list of ranges + + Calculate estimated cost and other information about an MRR scan for given + sequence of ranges. + + @param keyno Index number + @param seq Range sequence to be traversed + @param seq_init_param First parameter for seq->init() + @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller + can't efficiently determine it + @param bufsz INOUT IN: Size of the buffer available for use + OUT: Size of the buffer that is expected to be actually + used, or 0 if buffer is not needed. + @param flags INOUT A combination of HA_MRR_* flags + @param cost OUT Estimated cost of MRR access + + @note + This method (or an overriding one in a derived class) must check for + thd->killed and return HA_POS_ERROR if it is not zero. This is required + for a user to be able to interrupt the calculation by killing the + connection/query. + + @retval + HA_POS_ERROR Error or the engine is unable to perform the requested + scan. Values of OUT parameters are undefined. + @retval + other OK, *cost contains cost of the scan, *bufsz and *flags + contain scan parameters. +*/ + +ha_rows +handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges_arg, + uint *bufsz, uint *flags, + Cost_estimate *cost) +{ + KEY_MULTI_RANGE range; + range_seq_t seq_it; + ha_rows total_rows= 0; + uint n_ranges=0; + ha_rows max_rows= stats.records; + THD *thd= table->in_use; + ulonglong io_blocks; + + /* + Counter of blocks that contain range edges for those ranges + for which records_in_range() is called + */ + ulonglong edge_blocks_cnt= 0; + /* + Counter of blocks that contain index tuples for those ranges + for which records_in_range() is called + */ + ulonglong range_blocks_cnt= 0; + /* + The position of the block containing the last record of the previous range + for which the info about range position is provided + */ + ulonglong prev_range_last_block= UNUSED_PAGE_NO; + /* The counter of records the staring from prev_range_last_block */ + ulonglong prev_range_last_block_records= 0; + /* + The counter of single point ranges. + (For single point ranges we do not call records_in_range()) + */ + ulonglong single_point_ranges= 0; + /* + The counter of of single point ranges that we succeded to assign + to some blocks + */ + ulonglong assigned_single_point_ranges= 0; + /* + Counter of single point ranges for which records_in_range in not + called and that are encountered between two ranges without such property + For example, let's have a subsequence of ranges + R1,r1,....rk,R2 + where r1,...,rk are single point ranges for which records_in_range is + called while R1 and R2 are not such ranges. + Then single_point_ranges_delta will count ranges r1,...,rk. + */ + ulonglong unassigned_single_point_ranges= 0; + + uint len= table->key_info[keyno].key_length + table->file->ref_length; + if (table->file->is_clustering_key(keyno)) + len= table->s->stored_rec_length; + /* Assume block is 75 % full */ + uint avg_block_records= ((uint) (stats.block_size*3/4))/len + 1; + uint limit= thd->variables.eq_range_index_dive_limit; + bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq, + seq_init_param, + limit); + DBUG_ENTER("multi_range_read_info_const"); + + /* Default MRR implementation doesn't need buffer */ + *bufsz= 0; + + seq_it= seq->init(seq_init_param, n_ranges, *flags); + while (!seq->next(seq_it, &range)) + { + ha_rows rows; + + if (unlikely(thd->killed != 0)) + DBUG_RETURN(HA_POS_ERROR); + + n_ranges++; + key_range *min_endp, *max_endp; + if (range.range_flag & GEOM_FLAG) + { + /* In this case tmp_min_flag contains the handler-read-function */ + range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG); + min_endp= &range.start_key; + max_endp= NULL; + } + else + { + min_endp= range.start_key.length? &range.start_key : NULL; + max_endp= range.end_key.length? &range.end_key : NULL; + } + int keyparts_used= my_count_bits(range.start_key.keypart_map); + + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + { + rows= 1; + /* + In this case we do not call records_in_range() and as a result + do not get any info on the edge blocks for this range. However if it + happens that the range for which we have such info uses the same block + for its first record as the last range for which such info is + provided uses for its last record then this range can be assigned + later to one of the blocks used by other ranges. + + Note that we don't have to increment edge_blocks_cnt or + range_blocks_cnt here. + */ + single_point_ranges++; + } + else if (use_statistics_for_eq_range && + !(range.range_flag & NULL_RANGE) && + (range.range_flag & EQ_RANGE) && + table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5) + { + rows= ((ha_rows) table->key_info[keyno]. + actual_rec_per_key(keyparts_used-1)); + range_blocks_cnt+= ((MY_MAX(rows, 1) - 1) / avg_block_records + 1); + } + else + { + page_range pages= unused_page_range; + if ((rows= this->records_in_range(keyno, min_endp, max_endp, &pages)) == + HA_POS_ERROR) + { + /* Can't scan one range => can't do MRR scan at all */ + total_rows= HA_POS_ERROR; + if (thd->is_error()) + DBUG_RETURN(HA_POS_ERROR); + break; + } + if (pages.first_page == UNUSED_PAGE_NO) + { + /* + The engine does not provide info on the range position. + Place the range in a new block. Note that in this case + any new range will be placed in a new block. + */ + ulonglong additional_blocks= ((MY_MAX(rows,1) - 1) / avg_block_records + + 1); + edge_blocks_cnt+= additional_blocks == 1 ? 1 : 2; + range_blocks_cnt+= additional_blocks; + } + else + { + /* The info on the range position is provided */ + if (pages.first_page == prev_range_last_block) + { + /* + The new range starts in the same block that the last range + for which the position of the range was provided. + */ + /* + First add records of single point ranges that can be placed + between these two ranges. + */ + prev_range_last_block_records+= (single_point_ranges - + assigned_single_point_ranges); + assigned_single_point_ranges= single_point_ranges; + if (pages.first_page == pages.last_page) + { + /* + All records of the current range are in the same block + Note that the prev_range_last_block_records can be much larger + than max_records_in_block as the rows can be compressed! + */ + prev_range_last_block_records+= rows; + DBUG_ASSERT(prev_range_last_block_records < + stats.block_size); + } + else + { + /* + The current range spans more than one block + + Place part of the range records in 'prev_range_last_block' + and the remaining records in additional blocks. + + We don't know where the first key was positioned in the + block, so we assume the range started in the middle of the + block. + + Note that prev_range_last_block_records > avg_block_records + can be true in case of compressed rows. + */ + ha_rows rem_rows= rows; + + if (avg_block_records > prev_range_last_block_records) + { + ha_rows space_left_in_prev_block= + (avg_block_records - prev_range_last_block_records)/2; + rem_rows= 0; + if (rows > space_left_in_prev_block) + rem_rows= rows - space_left_in_prev_block; + } + /* Calculate how many additional blocks we need for rem_rows */ + ulonglong additional_blocks= ((MY_MAX(rem_rows, 1) - 1) / + avg_block_records + 1); + edge_blocks_cnt++; + range_blocks_cnt+= additional_blocks; + prev_range_last_block= pages.last_page; + /* There is at least one row on last page */ + prev_range_last_block_records= 1; + } + } + else + { + /* + The new range does not start in the same block that the last range + for which the position of the range was provided. + Note that rows may be 0! + */ + ulonglong additional_blocks= ((MY_MAX(rows, 1) - 1) / + avg_block_records + 1); + edge_blocks_cnt+= additional_blocks == 1 ? 1 : 2; + range_blocks_cnt+= additional_blocks; + unassigned_single_point_ranges+= (single_point_ranges - + assigned_single_point_ranges); + assigned_single_point_ranges= single_point_ranges; + prev_range_last_block= pages.last_page; + /* There is at least one row on last page */ + prev_range_last_block_records= 1; + } + } + } + total_rows+= rows; + } + /* + Count the number of io_blocks that where not yet read and thus not cached. + The number of equal read blocks that where not read are: + + (single_point_ranges - assigned_single_point_ranges). + + We don't add these to io_blocks as we don't want to penalize equal + readss (if we did, a range that would read 5 rows would be + regarded as better than one equal read). + + Better to assume we have done a records_in_range() for the equal + range and it's also cached. + */ + io_blocks= (range_blocks_cnt - edge_blocks_cnt); + unassigned_single_point_ranges+= (single_point_ranges - + assigned_single_point_ranges); + + if (total_rows != HA_POS_ERROR) + { + set_if_smaller(total_rows, max_rows); + + /* The following calculation is the same as in multi_range_read_info(): */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + cost->reset(); + cost->avg_io_cost= cost->idx_avg_io_cost= avg_io_cost(); + + if (!is_clustering_key(keyno)) + { + cost->idx_io_count= (double) io_blocks; + cost->idx_cpu_cost= (keyread_time(keyno, 0, total_rows) + + n_ranges * IDX_LOOKUP_COST); + if (!(*flags & HA_MRR_INDEX_ONLY)) + cost->cpu_cost= read_time(keyno, 0, total_rows); + } + else + { + /* + Clustered index + If all index dives are to a few blocks, then limit the + ranges used by read_time to the number of dives. + */ + io_blocks+= unassigned_single_point_ranges; + cost->idx_cpu_cost= n_ranges * IDX_LOOKUP_COST; + uint limited_ranges= (uint) MY_MIN((ulonglong) n_ranges, io_blocks); + cost->cpu_cost= read_time(keyno, limited_ranges, total_rows); + } + cost->cpu_cost+= (rows2double(total_rows) / TIME_FOR_COMPARE + + MULTI_RANGE_READ_SETUP_COST); + } + DBUG_PRINT("statistics", + ("key: %s rows: %llu total_cost: %.3f io_blocks: %llu " + "idx_io_count: %.3f cpu_cost: %.3f io_count: %.3f", + table->s->keynames.type_names[keyno], + (ulonglong) total_rows, cost->total_cost(), (ulonglong) io_blocks, + cost->idx_io_count, cost->cpu_cost, cost->io_count)); + DBUG_RETURN(total_rows); +} + + +/** + Get cost and other information about MRR scan over some sequence of ranges + + Calculate estimated cost and other information about an MRR scan for some + sequence of ranges. + + The ranges themselves will be known only at execution phase. When this + function is called we only know number of ranges and a (rough) E(#records) + within those ranges. + + Currently this function is only called for "n-keypart singlepoint" ranges, + i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN" + + The flags parameter is a combination of those flags: HA_MRR_SORTED, + HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS. + + @param keyno Index number + @param n_ranges Estimated number of ranges (i.e. intervals) in the + range sequence. + @param n_rows Estimated total number of records contained within all + of the ranges + @param bufsz INOUT IN: Size of the buffer available for use + OUT: Size of the buffer that will be actually used, or + 0 if buffer is not needed. + @param flags INOUT A combination of HA_MRR_* flags + @param cost OUT Estimated cost of MRR access + + @retval + 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan + parameters. + @retval + other Error or can't perform the requested scan +*/ + +ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, + uint key_parts, uint *bufsz, + uint *flags, Cost_estimate *cost) +{ + /* + Currently we expect this function to be called only in preparation of scan + with HA_MRR_SINGLE_POINT property. + */ + DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT); + + *bufsz= 0; /* Default implementation doesn't need a buffer */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + + cost->reset(); + /* Produce the same cost as non-MRR code does */ + if (!is_clustering_key(keyno)) + { + /* + idx_io_count could potentially be increased with the number of + index leaf blocks we have to read for finding n_rows. + */ + cost->idx_io_count= n_ranges; + cost->idx_cpu_cost= (keyread_time(keyno, 0, n_rows) + + n_ranges * IDX_LOOKUP_COST); + if (!(*flags & HA_MRR_INDEX_ONLY)) + { + cost->cpu_cost= read_time(keyno, 0, n_rows); + } + } + else + { + cost->cpu_cost= read_time(keyno, n_ranges, (uint)n_rows); + } + cost->cpu_cost+= rows2double(n_rows) / TIME_FOR_COMPARE; + return 0; +} + + +/** + Initialize the MRR scan + + Initialize the MRR scan. This function may do heavyweight scan + initialization like row prefetching/sorting/etc (NOTE: but better not do + it here as we may not need it, e.g. if we never satisfy WHERE clause on + previous tables. For many implementations it would be natural to do such + initializations in the first multi_read_range_next() call) + + mode is a combination of the following flags: HA_MRR_SORTED, + HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION + + @param seq Range sequence to be traversed + @param seq_init_param First parameter for seq->init() + @param n_ranges Number of ranges in the sequence + @param mode Flags, see the description section for the details + @param buf INOUT: memory buffer to be used + + @note + One must have called index_init() before calling this function. Several + multi_range_read_init() calls may be made in course of one query. + + Buffer memory management is done according to the following scenario: + The caller allocates the buffer and provides it to the callee by filling + the members of HANDLER_BUFFER structure. + The callee consumes all or some fraction of the provided buffer space, and + sets the HANDLER_BUFFER members accordingly. + The callee may use the buffer memory until the next multi_range_read_init() + call is made, all records have been read, or until index_end() call is + made, whichever comes first. + + @retval 0 OK + @retval 1 Error +*/ + +int +handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, + uint n_ranges, uint mode, HANDLER_BUFFER *buf) +{ + DBUG_ENTER("handler::multi_range_read_init"); + mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + mrr_funcs= *seq_funcs; + mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED); + mrr_have_range= FALSE; + DBUG_RETURN(0); +} + +/** + Get next record in MRR scan + + Default MRR implementation: read the next record + + @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect + Otherwise, the opaque value associated with the range + that contains the returned record. + + @retval 0 OK + @retval other Error code +*/ + +int handler::multi_range_read_next(range_id_t *range_info) +{ + int result= HA_ERR_END_OF_FILE; + bool range_res; + DBUG_ENTER("handler::multi_range_read_next"); + + if (!mrr_have_range) + { + mrr_have_range= TRUE; + goto start; + } + + do + { + /* Save a call if there can be only one row in range. */ + if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)) + { + result= read_range_next(); + /* On success or non-EOF errors jump to the end. */ + if (result != HA_ERR_END_OF_FILE) + break; + } + else + { + if (ha_was_semi_consistent_read()) + { + /* + The following assignment is redundant, but for extra safety and to + remove the compiler warning: + */ + range_res= FALSE; + goto scan_it_again; + } + /* + We need to set this for the last range only, but checking this + condition is more expensive than just setting the result code. + */ + result= HA_ERR_END_OF_FILE; + } + +start: + /* Try the next range(s) until one matches a record. */ + while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))) + { +scan_it_again: + result= read_range_first(mrr_cur_range.start_key.keypart_map ? + &mrr_cur_range.start_key : 0, + mrr_cur_range.end_key.keypart_map ? + &mrr_cur_range.end_key : 0, + MY_TEST(mrr_cur_range.range_flag & EQ_RANGE), + mrr_is_output_sorted); + if (result != HA_ERR_END_OF_FILE) + break; + } + } + while ((result == HA_ERR_END_OF_FILE) && !range_res); + + *range_info= mrr_cur_range.ptr; + DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result)); + DBUG_RETURN(result); +} + +/**************************************************************************** + * Mrr_*_reader classes (building blocks for DS-MRR) + ***************************************************************************/ + +int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par_arg, + Lifo_buffer *key_buffer_arg, + Buffer_manager *buf_manager_arg) +{ + HANDLER_BUFFER no_buffer = {NULL, NULL, NULL}; + file= h_arg; + return file->handler::multi_range_read_init(seq_funcs, seq_init_param, + n_ranges, mode, &no_buffer); +} + + +int Mrr_simple_index_reader::get_next(range_id_t *range_info) +{ + int res; + while (!(res= file->handler::multi_range_read_next(range_info))) + { + KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range; + if (!file->mrr_funcs.skip_index_tuple || + !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr)) + break; + } + if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND) + file->print_error(res, MYF(0)); // Fatal error + return res; +} + + +/** + @brief Get next index record + + @param range_info OUT identifier of range that the returned record belongs to + + @note + We actually iterate over nested sequences: + - an ordered sequence of groups of identical keys + - each key group has key value, which has multiple matching records + - thus, each record matches all members of the key group + + @retval 0 OK, next record was successfully read + @retval HA_ERR_END_OF_FILE End of records + @retval Other Some other error; Error is printed +*/ + +int Mrr_ordered_index_reader::get_next(range_id_t *range_info) +{ + int res; + DBUG_ENTER("Mrr_ordered_index_reader::get_next"); + + for(;;) + { + if (!scanning_key_val_iter) + { + while ((res= kv_it.init(this))) + { + if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) + DBUG_RETURN(res); /* Some fatal error */ + + if (key_buffer->is_empty()) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + } + scanning_key_val_iter= TRUE; + } + + if ((res= kv_it.get_next(range_info))) + { + scanning_key_val_iter= FALSE; + if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) + DBUG_RETURN(res); + kv_it.move_to_next_key_value(); + continue; + } + if (!skip_index_tuple(*range_info) && + !skip_record(*range_info, NULL)) + { + break; + } + /* Go get another (record, range_id) combination */ + } /* while */ + + DBUG_RETURN(0); +} + + +/* + Supply index reader with the O(1)space it needs for scan interrupt/restore + operation +*/ + +bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length, + uint key_len, + uint saved_pk_len, + uchar **space_start, + uchar *space_end) +{ + if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len)) + return TRUE; + support_scan_interruptions= TRUE; + + saved_rowid= *space_start; + *space_start += rowid_length; + + if (saved_pk_len) + { + saved_primary_key= *space_start; + *space_start += saved_pk_len; + } + else + saved_primary_key= NULL; + + saved_key_tuple= *space_start; + *space_start += key_len; + + have_saved_rowid= FALSE; + read_was_interrupted= FALSE; + return FALSE; +} + +void Mrr_ordered_index_reader::set_no_interruption_temp_buffer() +{ + support_scan_interruptions= FALSE; + saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */ + have_saved_rowid= FALSE; + read_was_interrupted= FALSE; +} + +void Mrr_ordered_index_reader::interrupt_read() +{ + DBUG_ASSERT(support_scan_interruptions); + TABLE *table= file->get_table(); + KEY *used_index= &table->key_info[file->active_index]; + /* Save the current key value */ + key_copy(saved_key_tuple, table->record[0], + used_index, used_index->key_length); + + if (saved_primary_key) + { + key_copy(saved_primary_key, table->record[0], + &table->key_info[table->s->primary_key], + table->key_info[table->s->primary_key].key_length); + } + read_was_interrupted= TRUE; + + /* Save the last rowid */ + memcpy(saved_rowid, file->ref, file->ref_length); + have_saved_rowid= TRUE; +} + +void Mrr_ordered_index_reader::position() +{ + if (have_saved_rowid) + memcpy(file->ref, saved_rowid, file->ref_length); + else + Mrr_index_reader::position(); +} + +void Mrr_ordered_index_reader::resume_read() +{ + TABLE *table= file->get_table(); + + if (!read_was_interrupted) + return; + + KEY *used_index= &table->key_info[file->active_index]; + key_restore(table->record[0], saved_key_tuple, + used_index, used_index->key_length); + if (saved_primary_key) + { + key_restore(table->record[0], saved_primary_key, + &table->key_info[table->s->primary_key], + table->key_info[table->s->primary_key].key_length); + } +} + + +/** + Fill the buffer with (lookup_tuple, range_id) pairs and sort + + @return + 0 OK, the buffer is non-empty and sorted + HA_ERR_END_OF_FILE Source exhausted, the buffer is empty. +*/ + +int Mrr_ordered_index_reader::refill_buffer(bool initial) +{ + KEY_MULTI_RANGE cur_range; + DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer"); + + DBUG_ASSERT(key_buffer->is_empty()); + + if (source_exhausted) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + buf_manager->reset_buffer_sizes(buf_manager->arg); + key_buffer->reset(); + key_buffer->setup_writing(keypar.key_size_in_keybuf, + is_mrr_assoc? sizeof(range_id_t) : 0); + + while (key_buffer->can_write() && + !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range))) + { + DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); + + /* Put key, or {key, range_id} pair into the buffer */ + key_buffer->write_ptr1= keypar.use_key_pointers ? + (uchar*)&cur_range.start_key.key : + (uchar*)cur_range.start_key.key; + key_buffer->write_ptr2= (uchar*)&cur_range.ptr; + key_buffer->write(); + } + + /* Force get_next() to start with kv_it.init() call: */ + scanning_key_val_iter= FALSE; + + if (source_exhausted && key_buffer->is_empty()) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + if (!initial) + { + /* This is a non-initial buffer fill and we've got a non-empty buffer */ + THD *thd= current_thd; + status_var_increment(thd->status_var.ha_mrr_key_refills_count); + } + + key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? + (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse : + (qsort2_cmp)Mrr_ordered_index_reader::compare_keys, + this); + DBUG_RETURN(0); +} + + +int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par_arg, + Lifo_buffer *key_buffer_arg, + Buffer_manager *buf_manager_arg) +{ + file= h_arg; + key_buffer= key_buffer_arg; + buf_manager= buf_manager_arg; + keypar= *key_par_arg; + + KEY *key_info= &file->get_table()->key_info[file->active_index]; + keypar.index_ranges_unique= MY_TEST(key_info->flags & HA_NOSAME && + key_info->user_defined_key_parts == + my_count_bits(keypar.key_tuple_map)); + + mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); + mrr_funcs= *seq_funcs; + source_exhausted= FALSE; + read_was_interrupted= false; + have_saved_rowid= FALSE; + return 0; +} + + +static int rowid_cmp_reverse(void *file, uchar *a, uchar *b) +{ + return - ((handler*)file)->cmp_ref(a, b); +} + + +int Mrr_ordered_rndpos_reader::init(handler *h_arg, + Mrr_index_reader *index_reader_arg, + uint mode, + Lifo_buffer *buf, + Rowid_filter *filter) +{ + file= h_arg; + index_reader= index_reader_arg; + rowid_buffer= buf; + is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); + index_reader_exhausted= FALSE; + index_reader_needs_refill= TRUE; + rowid_filter= filter; + + return 0; +} + + +/** + DS-MRR: Fill and sort the rowid buffer + + Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into + buffer. When the buffer is full or scan is completed, sort the buffer by + rowid and return. + + When this function returns, either rowid buffer is not empty, or the source + of lookup keys (i.e. ranges) is exhaused. + + @retval 0 OK, the next portion of rowids is in the buffer, + properly ordered + @retval other Error +*/ + +int Mrr_ordered_rndpos_reader::refill_buffer(bool initial) +{ + int res; + bool first_call= initial; + DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer"); + + if (index_reader_exhausted) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + while (initial || index_reader_needs_refill || + (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE) + { + if ((res= index_reader->refill_buffer(initial))) + { + if (res == HA_ERR_END_OF_FILE) + index_reader_exhausted= TRUE; + break; + } + initial= FALSE; + index_reader_needs_refill= FALSE; + } + + if (!first_call && !index_reader_exhausted) + { + /* Ok, this was a successful buffer refill operation */ + THD *thd= current_thd; + status_var_increment(thd->status_var.ha_mrr_rowid_refills_count); + } + + DBUG_RETURN(res); +} + + +void Mrr_index_reader::position() +{ + file->position(file->get_table()->record[0]); +} + + +/* + @brief Try to refill the rowid buffer without calling + index_reader->refill_buffer(). +*/ + +int Mrr_ordered_rndpos_reader::refill_from_index_reader() +{ + range_id_t range_info; + int res; + DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader"); + + DBUG_ASSERT(rowid_buffer->is_empty()); + index_rowid= index_reader->get_rowid_ptr(); + rowid_buffer->reset(); + rowid_buffer->setup_writing(file->ref_length, + is_mrr_assoc? sizeof(range_id_t) : 0); + + last_identical_rowid= NULL; + + index_reader->resume_read(); + while (rowid_buffer->can_write()) + { + res= index_reader->get_next(&range_info); + + if (res) + { + if (res != HA_ERR_END_OF_FILE) + DBUG_RETURN(res); + index_reader_needs_refill=TRUE; + break; + } + + index_reader->position(); + + /* + If the built rowid filter cannot be used at the engine level, use it here. + */ + if (rowid_filter && !file->pushed_rowid_filter && + !rowid_filter->check((char *)index_rowid)) + continue; + + /* Put rowid, or {rowid, range_id} pair into the buffer */ + rowid_buffer->write_ptr1= index_rowid; + rowid_buffer->write_ptr2= (uchar*)&range_info; + rowid_buffer->write(); + } + + /* + When index_reader_needs_refill=TRUE, this means we've got all of index + tuples for lookups keys that index_reader had. We are not in the middle + of an index read, so there is no need to call interrupt_read. + + Actually, we must not call interrupt_read(), because it could be that we + haven't read a single row (because all index lookups returned + HA_ERR_KEY_NOT_FOUND). In this case, interrupt_read() will cause [harmless] + valgrind warnings when trying to save garbage from table->record[0]. + */ + if (!index_reader_needs_refill) + index_reader->interrupt_read(); + /* Sort the buffer contents by rowid */ + rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file); + + rowid_buffer->setup_reading(file->ref_length, + is_mrr_assoc ? sizeof(range_id_t) : 0); + DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0); +} + + +/* + Get the next {record, range_id} using ordered array of rowid+range_id pairs + + @note + Since we have sorted rowids, we try not to make multiple rnd_pos() calls + with the same rowid value. +*/ + +int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info) +{ + int res; + + /* + First, check if rowid buffer has elements with the same rowid value as + the previous. + */ + while (last_identical_rowid) + { + /* + Current record (the one we've returned in previous call) was obtained + from a rowid that matched multiple range_ids. Return this record again, + with next matching range_id. + */ + (void)rowid_buffer->read(); + + if (rowid_buffer->read_ptr1 == last_identical_rowid) + last_identical_rowid= NULL; /* reached the last of identical rowids */ + + if (!is_mrr_assoc) + return 0; + + memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t)); + if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1)) + return 0; + } + + /* + Ok, last_identical_rowid==NULL, it's time to read next different rowid + value and get record for it. + */ + for(;;) + { + /* Return eof if there are no rowids in the buffer after re-fill attempt */ + if (rowid_buffer->read()) + return HA_ERR_END_OF_FILE; + + if (is_mrr_assoc) + { + memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t)); + if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1)) + continue; + } + + res= file->ha_rnd_pos(file->get_table()->record[0], + rowid_buffer->read_ptr1); + + if (res) + return res; /* Some fatal error */ + + break; /* Got another record */ + } + + /* + Check if subsequent buffer elements have the same rowid value as this + one. If yes, remember this fact so that we don't make any more rnd_pos() + calls with this value. + + Note: this implies that SQL layer doesn't touch table->record[0] + between calls. + */ + Lifo_buffer_iterator it; + it.init(rowid_buffer); + while (!it.read()) + { + if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1)) + break; + last_identical_rowid= it.read_ptr1; + } + return 0; +} + + +/**************************************************************************** + * Top-level DS-MRR implementation functions (the ones called by storage engine) + ***************************************************************************/ + +/** + DS-MRR: Initialize and start MRR scan + + Initialize and start the MRR scan. Depending on the mode parameter, this + may use default or DS-MRR implementation. + + @param h_arg Table handler to be used + @param key Index to be used + @param seq_funcs Interval sequence enumeration functions + @param seq_init_param Interval sequence enumeration parameter + @param n_ranges Number of ranges in the sequence. + @param mode HA_MRR_* modes to use + @param buf INOUT Buffer to use + + @retval 0 Ok, Scan started. + @retval other Error +*/ + +int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, uint mode, + HANDLER_BUFFER *buf) +{ + TABLE *table= h_arg->get_table(); + THD *thd= table->in_use; + int res; + Key_parameters keypar; + uint UNINIT_VAR(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */ + handler *h_idx; + Mrr_ordered_rndpos_reader *disk_strategy= NULL; + bool do_sort_keys= FALSE; + DBUG_ENTER("DsMrr_impl::dsmrr_init"); + /* + index_merge may invoke a scan on an object for which dsmrr_info[_const] + has not been called, so set the owner handler here as well. + */ + primary_file= h_arg; + is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); + + strategy_exhausted= FALSE; + + /* By default, have do-nothing buffer manager */ + buf_manager.arg= this; + buf_manager.reset_buffer_sizes= do_nothing; + buf_manager.redistribute_buffer_space= do_nothing; + + if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) + goto use_default_impl; + + /* + Determine whether we'll need to do key sorting and/or rnd_pos() scan + */ + index_strategy= NULL; + if ((mode & HA_MRR_SINGLE_POINT) && + optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) + { + do_sort_keys= TRUE; + index_strategy= &reader_factory.ordered_index_reader; + } + else + index_strategy= &reader_factory.simple_index_reader; + + strategy= index_strategy; + /* + We don't need a rowid-to-rndpos step if + - We're doing a scan on clustered primary key + - [In the future] We're doing an index_only read + */ + DBUG_ASSERT(primary_file->inited == handler::INDEX || + (primary_file->inited == handler::RND && + secondary_file && + secondary_file->inited == handler::INDEX)); + + h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file; + keyno= h_idx->active_index; + + if (! h_idx->is_clustering_key(keyno)) + { + strategy= disk_strategy= &reader_factory.ordered_rndpos_reader; + if (h_arg->pushed_rowid_filter) + { + /* + Currently usage of a rowid filter within InnoDB engine is not supported + if the table is accessed by the primary key. + With optimizer switches ''mrr' and 'mrr_sort_keys' are both enabled + any access by a secondary index is converted to the rndpos access. In + InnoDB the rndpos access is always uses the primary key. + Do not use pushed rowid filter if the table is accessed actually by the + primary key. Use the rowid filter outside the engine code (see + Mrr_ordered_rndpos_reader::refill_from_index_reader). + */ + rowid_filter= h_arg->pushed_rowid_filter; + h_arg->cancel_pushed_rowid_filter(); + } + } + + full_buf= buf->buffer; + full_buf_end= buf->buffer_end; + + if (do_sort_keys) + { + /* Pre-calculate some parameters of key sorting */ + keypar.use_key_pointers= MY_TEST(mode & HA_MRR_MATERIALIZED_KEYS); + seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length, + &keypar.key_tuple_map); + keypar.key_size_in_keybuf= keypar.use_key_pointers? + sizeof(char*) : keypar.key_tuple_length; + key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*); + + /* Ordered index reader needs some space to store an index tuple */ + if (strategy != index_strategy) + { + uint saved_pk_length=0; + uint pk= h_idx->get_table()->s->primary_key; + if (h_idx->pk_is_clustering_key(pk)) + { + saved_pk_length= h_idx->get_table()->key_info[pk].key_length; + } + + KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index]; + if (reader_factory.ordered_index_reader. + set_interruption_temp_buffer(primary_file->ref_length, + used_index->key_length, + saved_pk_length, + &full_buf, full_buf_end)) + goto use_default_impl; + } + else + reader_factory.ordered_index_reader.set_no_interruption_temp_buffer(); + } + + if (strategy == index_strategy) + { + /* + Index strategy alone handles the record retrieval. Give all buffer space + to it. Key buffer should have forward orientation so we can return the + end of it. + */ + key_buffer= &forward_key_buf; + key_buffer->set_buffer_space(full_buf, full_buf_end); + + /* Safety: specify that rowid buffer has zero size: */ + rowid_buffer.set_buffer_space(full_buf_end, full_buf_end); + + if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size)) + goto use_default_impl; + + if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges, + mode, &keypar, key_buffer, &buf_manager))) + goto error; + } + else + { + /* We'll have both index and rndpos strategies working together */ + if (do_sort_keys) + { + /* Both strategies will need buffer space, share the buffer */ + if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map)) + goto use_default_impl; + + buf_manager.reset_buffer_sizes= reset_buffer_sizes; + buf_manager.redistribute_buffer_space= redistribute_buffer_space; + } + else + { + /* index strategy doesn't need buffer, give all space to rowids*/ + rowid_buffer.set_buffer_space(full_buf, full_buf_end); + if (!rowid_buffer.have_space_for(primary_file->ref_length + + (int)is_mrr_assoc * sizeof(range_id_t))) + goto use_default_impl; + } + + // setup_two_handlers() will call dsmrr_close() will clears the filter. + // Save its value and restore afterwards. + Rowid_filter *tmp = rowid_filter; + if ((res= setup_two_handlers())) + goto error; + rowid_filter= tmp; + + if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param, + n_ranges, mode, &keypar, key_buffer, + &buf_manager)) || + (res= disk_strategy->init(primary_file, index_strategy, mode, + &rowid_buffer, rowid_filter))) + { + goto error; + } + } + + /* + At this point, we're sure that we're running a native MRR scan (i.e. we + didnt fall back to default implementation for some reason). + */ + status_var_increment(thd->status_var.ha_mrr_init_count); + + res= strategy->refill_buffer(TRUE); + if (res) + { + if (res != HA_ERR_END_OF_FILE) + goto error; + strategy_exhausted= TRUE; + } + + /* + If we have scanned through all intervals in *seq, then adjust *buf to + indicate that the remaining buffer space will not be used. + */ +// if (dsmrr_eof) +// buf->end_of_used_area= rowid_buffer.end_of_space(); + + + DBUG_RETURN(0); +error: + close_second_handler(); + /* Safety, not really needed but: */ + strategy= NULL; + DBUG_RETURN(res); + +use_default_impl: + if (primary_file->inited != handler::INDEX) + { + /* We can get here when + - we've previously successfully done a DS-MRR scan (and so have + secondary_file!= NULL, secondary_file->inited= INDEX, + primary_file->inited=RND) + - for this invocation, we haven't got enough buffer space, and so we + have to use the default MRR implementation. + + note: primary_file->ha_index_end() will call dsmrr_close() which will + close/destroy the secondary_file, this is intentional. + (Yes this is slow, but one can't expect performance with join buffer + so small that it can accomodate one rowid and one index tuple) + */ + if ((res= primary_file->ha_rnd_end()) || + (res= primary_file->ha_index_init(keyno, MY_TEST(mode & HA_MRR_SORTED)))) + { + DBUG_RETURN(res); + } + } + /* Call correct init function and assign to top level object */ + Mrr_simple_index_reader *s= &reader_factory.simple_index_reader; + res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL, + NULL, NULL); + strategy= s; + DBUG_RETURN(res); +} + + +/* + Whatever the current state is, make it so that we have two handler objects: + - primary_file - initialized for rnd_pos() scan + - secondary_file - initialized for scanning the index specified in + this->keyno + RETURN + 0 OK + HA_XXX Error code +*/ + +int DsMrr_impl::setup_two_handlers() +{ + int res; + THD *thd= primary_file->get_table()->in_use; + DBUG_ENTER("DsMrr_impl::setup_two_handlers"); + if (!secondary_file) + { + handler *new_h2; + Item *pushed_cond= NULL; + DBUG_ASSERT(primary_file->inited == handler::INDEX); + /* Create a separate handler object to do rnd_pos() calls. */ + /* + ::clone() takes up a lot of stack, especially on 64 bit platforms. + The constant 5 is an empiric result. + */ + if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) + DBUG_RETURN(1); + + /* Create a separate handler object to do rnd_pos() calls. */ + if (!(new_h2= primary_file->clone(primary_file->get_table()->s-> + normalized_path.str, + thd->mem_root)) || + new_h2->ha_external_lock(thd, F_RDLCK)) + { + delete new_h2; + DBUG_RETURN(1); + } + + if (keyno == primary_file->pushed_idx_cond_keyno) + pushed_cond= primary_file->pushed_idx_cond; + + Mrr_reader *save_strategy= strategy; + strategy= NULL; + /* + Caution: this call will invoke this->dsmrr_close(). Do not put the + created secondary table handler new_h2 into this->secondary_file or it + will delete it. Also, save the picked strategy + */ + res= primary_file->ha_index_end(); + + strategy= save_strategy; + secondary_file= new_h2; + + if (res || (res= (primary_file->ha_rnd_init(FALSE)))) + goto error; + + table->prepare_for_position(); + secondary_file->extra(HA_EXTRA_KEYREAD); + secondary_file->mrr_iter= primary_file->mrr_iter; + + if ((res= secondary_file->ha_index_init(keyno, FALSE))) + goto error; + + if (pushed_cond) + secondary_file->idx_cond_push(keyno, pushed_cond); + } + else + { + DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX); + /* + We get here when the access alternates betwen MRR scan(s) and non-MRR + scans. + + Calling primary_file->index_end() will invoke dsmrr_close() for this + object, which will delete secondary_file. We need to keep it, so put it + away and don't let it be deleted: + */ + if (primary_file->inited == handler::INDEX) + { + handler *save_h2= secondary_file; + Mrr_reader *save_strategy= strategy; + secondary_file= NULL; + strategy= NULL; + res= primary_file->ha_index_end(); + secondary_file= save_h2; + strategy= save_strategy; + if (res) + goto error; + } + if ((primary_file->inited != handler::RND) && + (res= primary_file->ha_rnd_init(FALSE))) + goto error; + } + DBUG_RETURN(0); + +error: + DBUG_RETURN(res); +} + + +void DsMrr_impl::close_second_handler() +{ + if (secondary_file) + { + secondary_file->extra(HA_EXTRA_NO_KEYREAD); + secondary_file->ha_index_or_rnd_end(); + secondary_file->ha_external_unlock(current_thd); + secondary_file->ha_close(); + delete secondary_file; + secondary_file= NULL; + } +} + + +void DsMrr_impl::dsmrr_close() +{ + DBUG_ENTER("DsMrr_impl::dsmrr_close"); + rowid_filter= NULL; + close_second_handler(); + strategy= NULL; + DBUG_VOID_RETURN; +} + + +/* + my_qsort2-compatible static member function to compare key tuples +*/ + +int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg, + uchar* key2_arg) +{ + Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg; + TABLE *table= reader->file->get_table(); + KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part; + uchar *key1, *key2; + + if (reader->keypar.use_key_pointers) + { + /* the buffer stores pointers to keys, get to the keys */ + memcpy(&key1, key1_arg, sizeof(char*)); + memcpy(&key2, key2_arg, sizeof(char*)); + } + else + { + key1= key1_arg; + key2= key2_arg; + } + + return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length); +} + + +int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1, + uchar* key2) +{ + return -compare_keys(arg, key1, key2); +} + + +/** + Set the buffer space to be shared between rowid and key buffer + + @return FALSE ok + @return TRUE There is so little buffer space that we won't be able to use + the strategy. + This happens when we don't have enough space for one rowid + element and one key element so this is mainly targeted at + testing. +*/ + +bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, + key_part_map key_tuple_map) +{ + long key_buff_elem_size= key_size_in_keybuf + + (int)is_mrr_assoc * sizeof(range_id_t); + + KEY *key_info= &primary_file->get_table()->key_info[keyno]; + /* + Ok if we got here we need to allocate one part of the buffer + for keys and another part for rowids. + */ + ulonglong rowid_buf_elem_size= primary_file->ref_length + + (int)is_mrr_assoc * sizeof(range_id_t); + + /* + Use rec_per_key statistics as a basis to find out how many rowids + we'll get for each key value. + TODO: what should be the default value to use when there is no + statistics? + */ + uint parts= my_count_bits(key_tuple_map); + ha_rows rpc; + ulonglong rowids_size= rowid_buf_elem_size; + if ((rpc= (ha_rows) key_info->actual_rec_per_key(parts - 1))) + rowids_size= rowid_buf_elem_size * rpc; + + double fraction_for_rowids= + (ulonglong2double(rowids_size) / + (ulonglong2double(rowids_size) + key_buff_elem_size)); + + ptrdiff_t bytes_for_rowids= + (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf)); + + ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids; + + if (bytes_for_keys < key_buff_elem_size + 1 || + bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1) + return TRUE; /* Failed to provide minimum space for one of the buffers */ + + rowid_buffer_end= full_buf + bytes_for_rowids; + rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end); + key_buffer= &backward_key_buf; + key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); + + /* The above code guarantees that the buffers are big enough */ + DBUG_ASSERT(key_buffer->have_space_for(key_buff_elem_size) && + rowid_buffer.have_space_for((size_t)rowid_buf_elem_size)); + + return FALSE; +} + + +void DsMrr_impl::do_nothing(void *dsmrr_arg) +{ + /* Do nothing */ +} + + +void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg) +{ + DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; + dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf, + dsmrr->rowid_buffer_end); + dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end, + dsmrr->full_buf_end); +} + + +/* + Take unused space from the key buffer and give it to the rowid buffer +*/ + +void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg) +{ + DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; + uchar *unused_start, *unused_end; + dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end); + dsmrr->rowid_buffer.grow(unused_start, unused_end); +} + + +/* + @brief Initialize the iterator + + @note + Initialize the iterator to produce matches for the key of the first element + in owner_arg->key_buffer + + @retval 0 OK + @retval HA_ERR_END_OF_FILE Either the owner->key_buffer is empty or + no matches for the key we've tried (check + key_buffer->is_empty() to tell these apart) + @retval other code Fatal error +*/ + +int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg) +{ + int res; + owner= owner_arg; + + identical_key_it.init(owner->key_buffer); + owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf, + owner->is_mrr_assoc ? sizeof(void*) : 0); + + if (identical_key_it.read()) + return HA_ERR_END_OF_FILE; + + uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1; + + uchar *index_tuple= key_in_buf; + if (owner->keypar.use_key_pointers) + memcpy(&index_tuple, key_in_buf, sizeof(char*)); + + /* Check out how many more identical keys are following */ + while (!identical_key_it.read()) + { + if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf, + identical_key_it.read_ptr1)) + break; + last_identical_key_ptr= identical_key_it.read_ptr1; + } + identical_key_it.init(owner->key_buffer); + res= owner->file->ha_index_read_map(owner->file->get_table()->record[0], + index_tuple, + owner->keypar.key_tuple_map, + HA_READ_KEY_EXACT); + + if (res) + { + /* Failed to find any matching records */ + move_to_next_key_value(); + return res; + } + owner->have_saved_rowid= FALSE; + get_next_row= FALSE; + return 0; +} + + +int Key_value_records_iterator::get_next(range_id_t *range_info) +{ + int res; + + if (get_next_row) + { + if (owner->keypar.index_ranges_unique) + { + /* We're using a full unique key, no point to call index_next_same */ + return HA_ERR_END_OF_FILE; + } + + handler *h= owner->file; + uchar *lookup_key; + if (owner->keypar.use_key_pointers) + memcpy(&lookup_key, identical_key_it.read_ptr1, sizeof(void*)); + else + lookup_key= identical_key_it.read_ptr1; + + if ((res= h->ha_index_next_same(h->get_table()->record[0], + lookup_key, + owner->keypar.key_tuple_length))) + { + /* It's either HA_ERR_END_OF_FILE or some other error */ + return res; + } + identical_key_it.init(owner->key_buffer); + owner->have_saved_rowid= FALSE; + get_next_row= FALSE; + } + + identical_key_it.read(); /* This gets us next range_id */ + memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t)); + + if (!last_identical_key_ptr || + (identical_key_it.read_ptr1 == last_identical_key_ptr)) + { + /* + We've reached the last of the identical keys that current record is a + match for. Set get_next_row=TRUE so that we read the next index record + on the next call to this function. + */ + get_next_row= TRUE; + } + return 0; +} + + +void Key_value_records_iterator::move_to_next_key_value() +{ + while (!owner->key_buffer->read() && + (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {} +} + + +/** + DS-MRR implementation: multi_range_read_next() function. + + Calling convention is like multi_range_read_next() has. +*/ + +int DsMrr_impl::dsmrr_next(range_id_t *range_info) +{ + int res; + if (strategy_exhausted) + return HA_ERR_END_OF_FILE; + + while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE) + { + if ((res= strategy->refill_buffer(FALSE))) + break; /* EOF or error */ + } + return res; +} + + +/** + DS-MRR implementation: multi_range_read_info() function +*/ +ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, + uint key_parts, + uint *bufsz, uint *flags, Cost_estimate *cost) +{ + ha_rows res __attribute__((unused)); + uint def_flags= *flags; + uint def_bufsz= *bufsz; + + /* Get cost/flags/mem_usage of default MRR implementation */ + res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows, + key_parts, &def_bufsz, + &def_flags, cost); + DBUG_ASSERT(!res); + + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, rows, flags, bufsz, cost)) + { + /* Default implementation is chosen */ + DBUG_PRINT("info", ("Default MRR implementation chosen")); + *flags= def_flags; + *bufsz= def_bufsz; + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("DS-MRR implementation chosen")); + } + return 0; +} + + +/** + DS-MRR Implementation: multi_range_read_info_const() function +*/ + +ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges, + uint *bufsz, uint *flags, Cost_estimate *cost) +{ + ha_rows rows; + uint def_flags= *flags; + uint def_bufsz= *bufsz; + /* Get cost/flags/mem_usage of default MRR implementation */ + rows= primary_file->handler::multi_range_read_info_const(keyno, seq, + seq_init_param, + n_ranges, + &def_bufsz, + &def_flags, cost); + if (rows == HA_POS_ERROR) + { + /* Default implementation can't perform MRR scan => we can't either */ + return rows; + } + + /* + If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to + use the default MRR implementation (we need it for UPDATE/DELETE). + Otherwise, make a choice based on cost and @@optimizer_switch settings + */ + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, rows, flags, bufsz, cost)) + { + DBUG_PRINT("info", ("Default MRR implementation chosen")); + *flags= def_flags; + *bufsz= def_bufsz; + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("DS-MRR implementation chosen")); + } + return rows; +} + + +/** + Check if key has partially-covered columns + + We can't use DS-MRR to perform range scans when the ranges are over + partially-covered keys, because we'll not have full key part values + (we'll have their prefixes from the index) and will not be able to check + if we've reached the end the range. + + @param keyno Key to check + + @todo + Allow use of DS-MRR in cases where the index has partially-covered + components but they are not used for scanning. + + @retval TRUE Yes + @retval FALSE No +*/ + +bool key_uses_partial_cols(TABLE_SHARE *share, uint keyno) +{ + KEY_PART_INFO *kp= share->key_info[keyno].key_part; + KEY_PART_INFO *kp_end= kp + share->key_info[keyno].user_defined_key_parts; + for (; kp != kp_end; kp++) + { + if (!kp->field->part_of_key.is_set(keyno)) + return TRUE; + } + return FALSE; +} + + +/* + Check if key/flags allow DS-MRR/CPK strategy to be used + + @param thd + @param keyno Index that will be used + @param mrr_flags + + @retval TRUE DS-MRR/CPK should be used + @retval FALSE Otherwise +*/ + +bool DsMrr_impl::check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, + uint mrr_flags) +{ + return MY_TEST((mrr_flags & HA_MRR_SINGLE_POINT) && + primary_file->is_clustering_key(keyno) && + optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)); +} + + +/* + DS-MRR Internals: Choose between Default MRR implementation and DS-MRR + + Make the choice between using Default MRR implementation and DS-MRR. + This function contains common functionality factored out of dsmrr_info() + and dsmrr_info_const(). The function assumes that the default MRR + implementation's applicability requirements are satisfied. + + @param keyno Index number + @param rows E(full rows to be retrieved) + @param flags IN MRR flags provided by the MRR user + OUT If DS-MRR is chosen, flags of DS-MRR implementation + else the value is not modified + @param bufsz IN If DS-MRR is chosen, buffer use of DS-MRR implementation + else the value is not modified + @param cost IN Cost of default MRR implementation + OUT If DS-MRR is chosen, cost of DS-MRR scan + else the value is not modified + + @retval TRUE Default MRR implementation should be used + @retval FALSE DS-MRR implementation should be used +*/ + + +bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, + uint *bufsz, Cost_estimate *cost) +{ + Cost_estimate dsmrr_cost; + bool res; + THD *thd= primary_file->get_table()->in_use; + TABLE_SHARE *share= primary_file->get_table_share(); + + bool doing_cpk_scan= check_cpk_scan(thd, share, keyno, *flags); + bool using_cpk= primary_file->is_clustering_key(keyno); + *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS; + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) || + *flags & HA_MRR_INDEX_ONLY || + (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(share, keyno)) + { + /* Use the default implementation */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS; + return TRUE; + } + + uint add_len= share->key_info[keyno].key_length + primary_file->ref_length; + if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, add_len, + &dsmrr_cost)) + return TRUE; + + bool force_dsmrr; + /* + If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of + DS-MRR and Default implementations cost. This allows one to force use of + DS-MRR whenever it is applicable without affecting other cost-based + choices. + */ + if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) && + dsmrr_cost.total_cost() > cost->total_cost()) + dsmrr_cost= *cost; + + if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost()) + { + *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */ + *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ + *cost= dsmrr_cost; + res= FALSE; + + + if ((using_cpk && doing_cpk_scan) || + (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) && + *flags & HA_MRR_SINGLE_POINT)) + { + *flags |= DSMRR_IMPL_SORT_KEYS; + } + + if (!(using_cpk && doing_cpk_scan) && + !(*flags & HA_MRR_INDEX_ONLY)) + { + *flags |= DSMRR_IMPL_SORT_ROWIDS; + } + /* + if ((*flags & HA_MRR_SINGLE_POINT) && + optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) + *flags |= HA_MRR_MATERIALIZED_KEYS; + */ + } + else + { + /* Use the default MRR implementation */ + res= TRUE; + } + return res; +} + +/* + Take the flags we've returned previously and print one of + - Key-ordered scan + - Rowid-ordered scan + - Key-ordered Rowid-ordered scan +*/ + +int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size) +{ + const char *key_ordered= "Key-ordered scan"; + const char *rowid_ordered= "Rowid-ordered scan"; + const char *both_ordered= "Key-ordered Rowid-ordered scan"; + const char *used_str=""; + const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS); + + if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL)) + { + if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS) + used_str= both_ordered; + else if (mrr_mode & DSMRR_IMPL_SORT_KEYS) + used_str= key_ordered; + else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS) + used_str= rowid_ordered; + + size_t used_str_len= strlen(used_str); + size_t copy_len= MY_MIN(used_str_len, size); + memcpy(str, used_str, copy_len); + return (int)copy_len; + } + return 0; +} + + +static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost); + + +/** + Get cost of DS-MRR scan + + @param keynr Index to be used + @param rows E(Number of rows to be scanned) + @param flags Scan parameters (HA_MRR_* flags) + @param buffer_size INOUT Buffer size + IN: Buffer of size 0 means the function + will determine the best size and return it. + @param extra_mem_overhead Extra memory overhead of the MRR implementation + (the function assumes this many bytes of buffer + space will not be usable by DS-MRR) + @param cost OUT The cost + + @retval FALSE OK + @retval TRUE Error, DS-MRR cannot be used (the buffer is too small + for even 1 rowid) +*/ + +bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, + uint *buffer_size, + uint extra_mem_overhead, + Cost_estimate *cost) +{ + ulong max_buff_entries, elem_size; + ha_rows rows_in_full_step; + ha_rows rows_in_last_step; + uint n_full_steps; + double index_read_cost; + + elem_size= primary_file->ref_length + + sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION)); + + if (!*buffer_size) + { + /* + We are requested to determine how much memory we need. + Request memory to finish the scan in one pass but do not request + more than @@mrr_buff_size. + */ + *buffer_size= (uint) MY_MIN(extra_mem_overhead + elem_size*(ulong)rows, + MY_MAX(table->in_use->variables.mrr_buff_size, + extra_mem_overhead)); + } + + if (elem_size + extra_mem_overhead > *buffer_size) + return TRUE; /* Buffer has not enough space for even 1 rowid */ + + max_buff_entries = (*buffer_size - extra_mem_overhead) / elem_size; + + /* Number of iterations we'll make with full buffer */ + n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries); + + /* + Get numbers of rows we'll be processing in + - non-last sweep, with full buffer + - last iteration, with non-full buffer + */ + rows_in_full_step= max_buff_entries; + rows_in_last_step= rows % max_buff_entries; + + /* Adjust buffer size if we expect to use only part of the buffer */ + if (n_full_steps) + { + get_sort_and_sweep_cost(table, rows_in_full_step, cost); + cost->multiply(n_full_steps); + } + else + { + cost->reset(); + *buffer_size= (uint)MY_MAX(*buffer_size, + (size_t)(1.2*rows_in_last_step) * elem_size + + primary_file->ref_length + table->key_info[keynr].key_length); + } + + Cost_estimate last_step_cost; + get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); + cost->add(&last_step_cost); + + if (n_full_steps != 0) + cost->mem_cost= *buffer_size; + else + cost->mem_cost= (double)rows_in_last_step * elem_size; + + /* Total cost of all index accesses */ + index_read_cost= primary_file->keyread_time(keynr, 1, rows); + cost->add_io(index_read_cost, 1 /* Random seeks */); + + cost->cpu_cost+= (rows2double(rows) / TIME_FOR_COMPARE + + MULTI_RANGE_READ_SETUP_COST); + return FALSE; +} + + +/* + Get cost of one sort-and-sweep step + + It consists of two parts: + - sort an array of #nrows ROWIDs using qsort + - read #nrows records from table in a sweep. + + @param table Table being accessed + @param nrows Number of rows to be sorted and retrieved + @param cost OUT The cost of scan +*/ + +static +void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost) +{ + if (nrows) + { + get_sweep_read_cost(table, nrows, FALSE, cost); + /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ + double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); + if (cmp_op < 3) + cmp_op= 3; + cost->cpu_cost += cmp_op * log2(cmp_op); + } + else + cost->reset(); +} + + +/** + Get cost of reading nrows table records in a "disk sweep" + + A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made + for an ordered sequence of rowids. + + We assume hard disk IO. The read is performed as follows: + + 1. The disk head is moved to the needed cylinder + 2. The controller waits for the plate to rotate + 3. The data is transferred + + Time to do #3 is insignificant compared to #2+#1. + + Time to move the disk head is proportional to head travel distance. + + Time to wait for the plate to rotate depends on whether the disk head + was moved or not. + + If disk head wasn't moved, the wait time is proportional to distance + between the previous block and the block we're reading. + + If the head was moved, we don't know how much we'll need to wait for the + plate to rotate. We assume the wait time to be a variate with a mean of + 0.5 of full rotation time. + + Our cost units are "random disk seeks". The cost of random disk seek is + actually not a constant, it depends one range of cylinders we're going + to access. We make it constant by introducing a fuzzy concept of "typical + datafile length" (it's fuzzy as it's hard to tell whether it should + include index file, temp.tables etc). Then random seek cost is: + + 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length + + We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. + + If handler::avg_io_cost() < 1.0, then we will trust the handler + when it comes to the average cost (this is for example true for HEAP). + + @param table Table to be accessed + @param nrows Number of rows to retrieve + @param interrupted TRUE <=> Assume that the disk sweep will be + interrupted by other disk IO. FALSE - otherwise. + @param cost OUT The cost. +*/ + +void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, + Cost_estimate *cost) +{ + DBUG_ENTER("get_sweep_read_cost"); + + cost->reset(); + if (table->file->pk_is_clustering_key(table->s->primary_key)) + { + cost->cpu_cost= table->file->read_time(table->s->primary_key, + (uint) nrows, nrows); + } + else if ((cost->avg_io_cost= table->file->avg_io_cost()) >= 0.999) + { + double n_blocks= + ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + + DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, + busy_blocks)); + cost->io_count= busy_blocks; + + if (!interrupted) + { + /* Assume reading is done in one 'sweep' */ + cost->avg_io_cost= (DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + } + DBUG_PRINT("info",("returning cost=%g", cost->total_cost())); + DBUG_VOID_RETURN; +} + + +/* ************************************************************************** + * DS-MRR implementation ends + ***************************************************************************/ diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h new file mode 100644 index 00000000..57cfd217 --- /dev/null +++ b/sql/multi_range_read.h @@ -0,0 +1,670 @@ +/* + Copyright (c) 2009, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @defgroup DS-MRR declarations + @{ +*/ + +/** + A Disk-Sweep implementation of MRR Interface (DS-MRR for short) + + This is a "plugin"(*) for storage engines that allows to + 1. When doing index scans, read table rows in rowid order; + 2. when making many index lookups, do them in key order and don't + lookup the same key value multiple times; + 3. Do both #1 and #2, when applicable. + These changes are expected to speed up query execution for disk-based + storage engines running io-bound loads and "big" queries (ie. queries that + do joins and enumerate lots of records). + + (*) - only conceptually. No dynamic loading or binary compatibility of any + kind. + + General scheme of things: + + SQL Layer code + | | | + v v v + -|---|---|---- handler->multi_range_read_XXX() function calls + | | | + _____________________________________ + / DS-MRR module \ + | (order/de-duplicate lookup keys, | + | scan indexes in key order, | + | order/de-duplicate rowids, | + | retrieve full record reads in rowid | + | order) | + \_____________________________________/ + | | | + -|---|---|----- handler->read_range_first()/read_range_next(), + | | | handler->index_read(), handler->rnd_pos() calls. + | | | + v v v + Storage engine internals + + + Currently DS-MRR is used by MyISAM, InnoDB and Maria storage engines. + Potentially it can be used with any table handler that has disk-based data + storage and has better performance when reading data in rowid order. +*/ + +#include "sql_lifo_buffer.h" + +class DsMrr_impl; +class Mrr_ordered_index_reader; + + +/* A structure with key parameters that's shared among several classes */ +class Key_parameters +{ +public: + uint key_tuple_length; /* Length of index lookup tuple, in bytes */ + key_part_map key_tuple_map; /* keyparts used in index lookup tuples */ + + /* + This is + = key_tuple_length if we copy keys to buffer + = sizeof(void*) if we're using pointers to materialized keys. + */ + uint key_size_in_keybuf; + + /* TRUE <=> don't copy key values, use pointers to them instead. */ + bool use_key_pointers; + + /* TRUE <=> We can get at most one index tuple for a lookup key */ + bool index_ranges_unique; +}; + + +/** + A class to enumerate (record, range_id) pairs that match given key value. + + @note + + The idea is that we have a Lifo_buffer which holds (key, range_id) pairs + ordered by key value. From the front of the buffer we see + + (key_val1, range_id1), (key_val1, range_id2) ... (key_val2, range_idN) + + we take the first elements that have the same key value (key_val1 in the + example above), and make lookup into the table. The table will have + multiple matches for key_val1: + + == Table Index == + ... + key_val1 -> key_val1, index_tuple1 + key_val1, index_tuple2 + ... + key_val1, index_tupleN + ... + + Our goal is to produce all possible combinations, i.e. we need: + + {(key_val1, index_tuple1), range_id1} + {(key_val1, index_tuple1), range_id2} + ... ... | + {(key_val1, index_tuple1), range_idN}, + + {(key_val1, index_tuple2), range_id1} + {(key_val1, index_tuple2), range_id2} + ... ... | + {(key_val1, index_tuple2), range_idN}, + + ... ... ... + + {(key_val1, index_tupleK), range_idN} +*/ + +class Key_value_records_iterator +{ + /* Use this to get table handler, key buffer and other parameters */ + Mrr_ordered_index_reader *owner; + + /* Iterator to get (key, range_id) pairs from */ + Lifo_buffer_iterator identical_key_it; + + /* + Last of the identical key values (when we get this pointer from + identical_key_it, it will be time to stop). + */ + uchar *last_identical_key_ptr; + + /* + FALSE <=> we're right after the init() call, the record has been already + read with owner->file->index_read_map() call + */ + bool get_next_row; + +public: + int init(Mrr_ordered_index_reader *owner_arg); + int get_next(range_id_t *range_info); + void move_to_next_key_value(); +}; + + +/* + Buffer manager interface. Mrr_reader objects use it to inqure DsMrr_impl + to manage buffer space for them. +*/ +typedef struct st_buffer_manager +{ +public: + /* Opaque value to be passed as the first argument to all member functions */ + void *arg; + + /* + This is called when we've freed more space from the rowid buffer. The + callee will get the unused space from the rowid buffer and give it to the + key buffer. + */ + void (*redistribute_buffer_space)(void *arg); + + /* + This is called when both key and rowid buffers are empty, and so it's time + to reset them to their original size (They've lost their original size, + because we were dynamically growing rowid buffer and shrinking key buffer). + */ + void (*reset_buffer_sizes)(void *arg); + +} Buffer_manager; + + +/* + Mrr_reader - DS-MRR execution strategy abstraction + + A reader produces ([index]_record, range_info) pairs, and requires periodic + refill operations. + + - one starts using the reader by calling reader->get_next(), + - when a get_next() call returns HA_ERR_END_OF_FILE, one must call + refill_buffer() before they can make more get_next() calls. + - when refill_buffer() returns HA_ERR_END_OF_FILE, this means the real + end of stream and get_next() should not be called anymore. + + Both functions can return other error codes, these mean unrecoverable errors + after which one cannot continue. +*/ + +class Mrr_reader +{ +public: + virtual int get_next(range_id_t *range_info) = 0; + virtual int refill_buffer(bool initial) = 0; + virtual ~Mrr_reader() = default; /* just to remove compiler warning */ +}; + + +/* + A common base for readers that do index scans and produce index tuples +*/ + +class Mrr_index_reader : public Mrr_reader +{ +protected: + handler *file; /* Handler object to use */ +public: + virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par, + Lifo_buffer *key_buffer, + Buffer_manager *buf_manager_arg) = 0; + + /* Get pointer to place where every get_next() call will put rowid */ + virtual uchar *get_rowid_ptr() = 0; + /* Get the rowid (call this after get_next() call) */ + virtual void position(); + virtual bool skip_record(range_id_t range_id, uchar *rowid) = 0; + + virtual void interrupt_read() {} + virtual void resume_read() {} +}; + + +/* + A "bypass" index reader that just does and index scan. The index scan is done + by calling default MRR implementation (i.e. handler::multi_range_read_XXX()) + functions. +*/ + +class Mrr_simple_index_reader : public Mrr_index_reader +{ +public: + int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par, + Lifo_buffer *key_buffer, + Buffer_manager *buf_manager_arg); + int get_next(range_id_t *range_info); + int refill_buffer(bool initial) { return initial? 0: HA_ERR_END_OF_FILE; } + uchar *get_rowid_ptr() { return file->ref; } + bool skip_record(range_id_t range_id, uchar *rowid) + { + return (file->mrr_funcs.skip_record && + file->mrr_funcs.skip_record(file->mrr_iter, range_id, rowid)); + } +}; + + +/* + A reader that sorts the key values before it makes the index lookups. +*/ + +class Mrr_ordered_index_reader : public Mrr_index_reader +{ +public: + int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par, + Lifo_buffer *key_buffer, + Buffer_manager *buf_manager_arg); + int get_next(range_id_t *range_info); + int refill_buffer(bool initial); + uchar *get_rowid_ptr() { return file->ref; } + + bool skip_record(range_id_t range_info, uchar *rowid) + { + return (mrr_funcs.skip_record && + mrr_funcs.skip_record(mrr_iter, range_info, rowid)); + } + + bool skip_index_tuple(range_id_t range_info) + { + return (mrr_funcs.skip_index_tuple && + mrr_funcs.skip_index_tuple(mrr_iter, range_info)); + } + + bool set_interruption_temp_buffer(uint rowid_length, uint key_len, + uint saved_pk_len, + uchar **space_start, uchar *space_end); + void set_no_interruption_temp_buffer(); + + void interrupt_read(); + void resume_read(); + void position(); +private: + Key_value_records_iterator kv_it; + + bool scanning_key_val_iter; + + /* Buffer to store (key, range_id) pairs */ + Lifo_buffer *key_buffer; + + /* This manages key buffer allocation and sizing for us */ + Buffer_manager *buf_manager; + + Key_parameters keypar; /* index scan and lookup tuple parameters */ + + /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ + bool is_mrr_assoc; + + /* Range sequence iteration members */ + RANGE_SEQ_IF mrr_funcs; + range_seq_t mrr_iter; + + /* TRUE == reached eof when enumerating ranges */ + bool source_exhausted; + + /* + Following members are for interrupt_read()/resume_read(). The idea is that + in some cases index scan that is done by this object is interrupted by + rnd_pos() calls made by Mrr_ordered_rndpos_reader. The problem is that + we're sharing handler->record[0] with that object, and it destroys its + contents. + We need to save/restore our current + - index tuple (for pushed index condition checks) + - clustered primary key values (again, for pushed index condition checks) + - rowid of the last record we've retrieved (in case this rowid matches + multiple ranges and we'll need to return it again) + */ + bool support_scan_interruptions; + /* Space where we save the rowid of the last record we've returned */ + uchar *saved_rowid; + + /* TRUE <=> saved_rowid has the last saved rowid */ + bool have_saved_rowid; + + uchar *saved_key_tuple; /* Saved current key tuple */ + uchar *saved_primary_key; /* Saved current primary key tuple */ + + /* + TRUE<=> saved_key_tuple (and saved_primary_key when applicable) have + valid values. + */ + bool read_was_interrupted; + + static int compare_keys(void* arg, uchar* key1, uchar* key2); + static int compare_keys_reverse(void* arg, uchar* key1, uchar* key2); + + friend class Key_value_records_iterator; + friend class DsMrr_impl; + friend class Mrr_ordered_rndpos_reader; +}; + + +/* + A reader that gets rowids from an Mrr_index_reader, and then sorts them + before getting full records with handler->rndpos() calls. +*/ + +class Mrr_ordered_rndpos_reader : public Mrr_reader +{ +public: + int init(handler *file, Mrr_index_reader *index_reader, uint mode, + Lifo_buffer *buf, Rowid_filter *filter); + int get_next(range_id_t *range_info); + int refill_buffer(bool initial); +private: + handler *file; /* Handler to use */ + + /* This what we get (rowid, range_info) pairs from */ + Mrr_index_reader *index_reader; + + /* index_reader->get_next() puts rowid here */ + uchar *index_rowid; + + /* TRUE <=> index_reader->refill_buffer() call has returned EOF */ + bool index_reader_exhausted; + + /* + TRUE <=> We should call index_reader->refill_buffer(). This happens if + 1. we've made index_reader->get_next() call which returned EOF + 2. we haven't made any index_reader calls (and our first call should + be index_reader->refill_buffer(initial=TRUE) + */ + bool index_reader_needs_refill; + + /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ + bool is_mrr_assoc; + + /* + When reading from ordered rowid buffer: the rowid element of the last + buffer element that has rowid identical to this one. + */ + uchar *last_identical_rowid; + + /* Buffer to store (rowid, range_id) pairs */ + Lifo_buffer *rowid_buffer; + + /* Rowid filter to be checked against (if any) */ + Rowid_filter *rowid_filter; + + int refill_from_index_reader(); +}; + + +/* + A primitive "factory" of various Mrr_*_reader classes (the point is to + get various kinds of readers without having to allocate them on the heap) +*/ + +class Mrr_reader_factory +{ +public: + Mrr_ordered_rndpos_reader ordered_rndpos_reader; + Mrr_ordered_index_reader ordered_index_reader; + Mrr_simple_index_reader simple_index_reader; +}; + + +#define DSMRR_IMPL_SORT_KEYS HA_MRR_IMPLEMENTATION_FLAG1 +#define DSMRR_IMPL_SORT_ROWIDS HA_MRR_IMPLEMENTATION_FLAG2 + +/* + DS-MRR implementation for one table. Create/use one object of this class for + each ha_{myisam/innobase/etc} object. That object will be further referred to + as "the handler" + + DsMrr_impl supports has the following execution strategies: + + - Bypass DS-MRR, pass all calls to default MRR implementation, which is + an MRR-to-non-MRR call converter. + - Key-Ordered Retrieval + - Rowid-Ordered Retrieval + + DsMrr_impl will use one of the above strategies, or a combination of them, + according to the following diagram: + + (mrr function calls) + | + +----------------->-----------------+ + | | + ___________v______________ _______________v________________ + / default: use lookup keys \ / KEY-ORDERED RETRIEVAL: \ + | (or ranges) in whatever | | sort lookup keys and then make | + | order they are supplied | | index lookups in index order | + \__________________________/ \________________________________/ + | | | | | + +---<---+ | +--------------->-----------|----+ + | | | | + | | +---------------+ | + | ______v___ ______ | _______________v_______________ + | / default: read \ | / ROWID-ORDERED RETRIEVAL: \ + | | table records | | | Before reading table records, | + v | in random order | v | sort their rowids and then | + | \_________________/ | | read them in rowid order | + | | | \_______________________________/ + | | | | + | | | | + +-->---+ | +----<------+-----------<--------+ + | | | + v v v + (table records and range_ids) + + The choice of strategy depends on MRR scan properties, table properties + (whether we're scanning clustered primary key), and @@optimizer_switch + settings. + + Key-Ordered Retrieval + --------------------- + The idea is: if MRR scan is essentially a series of lookups on + + tbl.key=value1 OR tbl.key=value2 OR ... OR tbl.key=valueN + + then it makes sense to collect and order the set of lookup values, i.e. + + sort(value1, value2, .. valueN) + + and then do index lookups in index order. This results in fewer index page + fetch operations, and we also can avoid making multiple index lookups for the + same value. That is, if value1=valueN we can easily discover that after + sorting and make one index lookup for them instead of two. + + Rowid-Ordered Retrieval + ----------------------- + If we do a regular index scan or a series of index lookups, we'll be hitting + table records at random. For disk-based engines, this is much slower than + reading the same records in disk order. We assume that disk ordering of + rows is the same as ordering of their rowids (which is provided by + handler::cmp_ref()) + In order to retrieve records in different order, we must separate index + scanning and record fetching, that is, MRR scan uses the following steps: + + 1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and + fill a buffer with {rowid, range_id} pairs + 2. Sort the buffer by rowid value + 3. for each {rowid, range_id} pair in the buffer + get record by rowid and return the {record, range_id} pair + 4. Repeat the above steps until we've exhausted the list of ranges we're + scanning. + + Buffer space management considerations + -------------------------------------- + With regards to buffer/memory management, MRR interface specifies that + - SQL layer provides multi_range_read_init() with buffer of certain size. + - MRR implementation may use (i.e. have at its disposal till the end of + the MRR scan) all of the buffer, or return the unused end of the buffer + to SQL layer. + + DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When + we need to accumulate/sort only keys (or only rowids), it is fairly trivial. + + When we need to accumulate/sort both keys and rowids, efficient buffer use + gets complicated. We need to: + - First, accumulate keys and sort them + - Then use the keys (smaller values go first) to obtain rowids. A key is not + needed after we've got matching rowids for it. + - Make sure that rowids are accumulated at the front of the buffer, so that we + can return the end part of the buffer to SQL layer, should there be too + few rowid values to occupy the buffer. + + All of these goals are achieved by using the following scheme: + + | | We get an empty buffer from SQL layer. + + | *-| + | *----| First, we fill the buffer with keys. Key_buffer + | *-------| part grows from end of the buffer space to start + | *----------| (In this picture, the buffer is big enough to + | *-------------| accomodate all keys and even have some space left) + + | *=============| We want to do key-ordered index scan, so we sort + the keys + + |-x *===========| Then we use the keys get rowids. Rowids are + |----x *========| stored from start of buffer space towards the end. + |--------x *=====| The part of the buffer occupied with keys + |------------x *===| gradually frees up space for rowids. In this + |--------------x *=| picture we run out of keys before we've ran out + |----------------x | of buffer space (it can be other way as well). + + |================x | Then we sort the rowids. + + | |~~~| The unused part of the buffer is at the end, so + we can return it to the SQL layer. + + |================* Sorted rowids are then used to read table records + in disk order + +*/ + +class DsMrr_impl +{ +public: + typedef void (handler::*range_check_toggle_func_t)(bool on); + + DsMrr_impl() + : secondary_file(NULL), + rowid_filter(NULL) {}; + + void init(handler *h_arg, TABLE *table_arg) + { + primary_file= h_arg; + table= table_arg; + } + int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, uint mode, + HANDLER_BUFFER *buf); + void dsmrr_close(); + int dsmrr_next(range_id_t *range_info); + + ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, + uint *bufsz, uint *flags, Cost_estimate *cost); + + ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges, uint *bufsz, + uint *flags, Cost_estimate *cost); + + int dsmrr_explain_info(uint mrr_mode, char *str, size_t size); +private: + /* Buffer to store (key, range_id) pairs */ + Lifo_buffer *key_buffer; + + /* + The "owner" handler object (the one that is expected to "own" this object + and call its functions). + */ + handler *primary_file; + TABLE *table; /* Always equal to primary_file->table */ + + /* + Secondary handler object. (created when needed, we need it when we need + to run both index scan and rnd_pos() scan at the same time) + */ + handler *secondary_file; + + /* + The rowid filter that DS-MRR has "unpushed" from the storage engine. + If it's present, DS-MRR will use it. + */ + Rowid_filter *rowid_filter; + + uint keyno; /* index we're running the scan on */ + /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ + bool is_mrr_assoc; + + Mrr_reader_factory reader_factory; + + Mrr_reader *strategy; + bool strategy_exhausted; + + Mrr_index_reader *index_strategy; + + /* The whole buffer space that we're using */ + uchar *full_buf; + uchar *full_buf_end; + + /* + When using both rowid and key buffers: the boundary between key and rowid + parts of the buffer. This is the "original" value, actual memory ranges + used by key and rowid parts may be different because of dynamic space + reallocation between them. + */ + uchar *rowid_buffer_end; + + /* + One of the following two is used for key buffer: forward is used when + we only need key buffer, backward is used when we need both key and rowid + buffers. + */ + Forward_lifo_buffer forward_key_buf; + Backward_lifo_buffer backward_key_buf; + + /* + Buffer to store (rowid, range_id) pairs, or just rowids if + is_mrr_assoc==FALSE + */ + Forward_lifo_buffer rowid_buffer; + + bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, + Cost_estimate *cost); + bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, + uint *buffer_size, uint extra_mem_overhead, + Cost_estimate *cost); + bool check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, uint mrr_flags); + + bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map); + + /* Buffer_manager and its member functions */ + Buffer_manager buf_manager; + static void redistribute_buffer_space(void *dsmrr_arg); + static void reset_buffer_sizes(void *dsmrr_arg); + static void do_nothing(void *dsmrr_arg); + + Lifo_buffer* get_key_buffer() { return key_buffer; } + + friend class Key_value_records_iterator; + friend class Mrr_ordered_index_reader; + friend class Mrr_ordered_rndpos_reader; + + int setup_two_handlers(); + void close_second_handler(); +}; + +/** + @} (end of group DS-MRR declarations) +*/ + diff --git a/sql/my_apc.cc b/sql/my_apc.cc new file mode 100644 index 00000000..50e88abf --- /dev/null +++ b/sql/my_apc.cc @@ -0,0 +1,239 @@ +/* + Copyright (c) 2011, 2013 Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef MY_APC_STANDALONE + +#include "mariadb.h" +#include "sql_class.h" + +#endif + +/* For standalone testing of APC system, see unittest/sql/my_apc-t.cc */ + +/* + Initialize the target. + + @note + Initialization must be done prior to enabling/disabling the target, or making + any call requests to it. + Initial state after initialization is 'disabled'. +*/ +void Apc_target::init(mysql_mutex_t *target_mutex) +{ + DBUG_ASSERT(!enabled); + LOCK_thd_kill_ptr= target_mutex; +#ifndef DBUG_OFF + n_calls_processed= 0; +#endif +} + + +/* [internal] Put request qe into the request list */ + +void Apc_target::enqueue_request(Call_request *qe) +{ + mysql_mutex_assert_owner(LOCK_thd_kill_ptr); + if (apc_calls) + { + Call_request *after= apc_calls->prev; + qe->next= apc_calls; + apc_calls->prev= qe; + + qe->prev= after; + after->next= qe; + } + else + { + apc_calls= qe; + qe->next= qe->prev= qe; + } +} + + +/* + [internal] Remove request qe from the request queue. + + The request is not necessarily first in the queue. +*/ + +void Apc_target::dequeue_request(Call_request *qe) +{ + mysql_mutex_assert_owner(LOCK_thd_kill_ptr); + if (apc_calls == qe) + { + if ((apc_calls= apc_calls->next) == qe) + { + apc_calls= NULL; + } + } + + qe->prev->next= qe->next; + qe->next->prev= qe->prev; +} + +#ifdef HAVE_PSI_INTERFACE + +/* One key for all conds */ +PSI_cond_key key_show_explain_request_COND; + +static PSI_cond_info show_explain_psi_conds[]= +{ + { &key_show_explain_request_COND, "show_explain", 0 /* not using PSI_FLAG_GLOBAL*/ } +}; + +void init_show_explain_psi_keys(void) +{ + if (PSI_server == NULL) + return; + + PSI_server->register_cond("sql", show_explain_psi_conds, + array_elements(show_explain_psi_conds)); +} +#endif + + +/* + Make an APC (Async Procedure Call) to another thread. + + @detail + Make an APC call: schedule it for execution and wait until the target + thread has executed it. + + - The caller is responsible for making sure he's not posting request + to the thread he's calling this function from. + + - The caller must have locked target_mutex. The function will release it. + + @retval FALSE - Ok, the call has been made + @retval TRUE - Call wasnt made (either the target is in disabled state or + timeout occurred) +*/ + +bool Apc_target::make_apc_call(THD *caller_thd, Apc_call *call, + int timeout_sec, bool *timed_out) +{ + bool res= TRUE; + *timed_out= FALSE; + + if (enabled) + { + /* Create and post the request */ + Call_request apc_request; + apc_request.call= call; + apc_request.processed= FALSE; + mysql_cond_init(key_show_explain_request_COND, &apc_request.COND_request, + NULL); + enqueue_request(&apc_request); + apc_request.what="enqueued by make_apc_call"; + + struct timespec abstime; + const int timeout= timeout_sec; + set_timespec(abstime, timeout); + + int wait_res= 0; + PSI_stage_info old_stage; + caller_thd->ENTER_COND(&apc_request.COND_request, LOCK_thd_kill_ptr, + &stage_show_explain, &old_stage); + /* todo: how about processing other errors here? */ + while (!apc_request.processed && (wait_res != ETIMEDOUT)) + { + /* We own LOCK_thd_kill_ptr */ + wait_res= mysql_cond_timedwait(&apc_request.COND_request, + LOCK_thd_kill_ptr, &abstime); + // &apc_request.LOCK_request, &abstime); + if (caller_thd->killed) + break; + } + + if (!apc_request.processed) + { + /* + The wait has timed out, or this thread was KILLed. + Remove the request from the queue (ok to do because we own + LOCK_thd_kill_ptr) + */ + apc_request.processed= TRUE; + dequeue_request(&apc_request); + *timed_out= TRUE; + res= TRUE; + } + else + { + /* Request was successfully executed and dequeued by the target thread */ + res= FALSE; + } + /* + exit_cond() will call mysql_mutex_unlock(LOCK_thd_kill_ptr) for us: + */ + caller_thd->EXIT_COND(&old_stage); + + /* Destroy all APC request data */ + mysql_cond_destroy(&apc_request.COND_request); + } + else + { +#ifndef DBUG_OFF + /* We didn't make the call, because the target is disabled */ + n_calls_processed++; +#endif + mysql_mutex_unlock(LOCK_thd_kill_ptr); + } + return res; +} + + +/* + Process all APC requests. + This should be called periodically by the APC target thread. +*/ + +void Apc_target::process_apc_requests(bool force) +{ + while (1) + { + Call_request *request; + + if (force) + mysql_mutex_lock(LOCK_thd_kill_ptr); + else if (mysql_mutex_trylock(LOCK_thd_kill_ptr)) + break; // Mutex is blocked, try again later + if (!(request= get_first_in_queue())) + { + /* No requests in the queue */ + mysql_mutex_unlock(LOCK_thd_kill_ptr); + break; + } + + /* + Remove the request from the queue (we're holding queue lock so we can be + sure that request owner won't try to remove it) + */ + request->what="dequeued by process_apc_requests"; + dequeue_request(request); + request->processed= TRUE; + + request->call->call_in_target_thread(); + request->what="func called by process_apc_requests"; + +#ifndef DBUG_OFF + n_calls_processed++; +#endif + mysql_cond_signal(&request->COND_request); + mysql_mutex_unlock(LOCK_thd_kill_ptr); + } +} + diff --git a/sql/my_apc.h b/sql/my_apc.h new file mode 100644 index 00000000..29fa3172 --- /dev/null +++ b/sql/my_apc.h @@ -0,0 +1,161 @@ +#ifndef SQL_MY_APC_INCLUDED +#define SQL_MY_APC_INCLUDED +/* + Copyright (c) 2011, 2013 Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Interface + ~~~~~~~~~ + ( + - This is an APC request queue + - We assume there is a particular owner thread which periodically calls + process_apc_requests() to serve the call requests. + - Other threads can post call requests, and block until they are exectued. + ) + + Implementation + ~~~~~~~~~~~~~~ + - The target has a mutex-guarded request queue. + + - After the request has been put into queue, the requestor waits for request + to be satisfied. The worker satisifes the request and signals the + requestor. +*/ + +class THD; + +/* + Target for asynchronous procedure calls (APCs). + - A target is running in some particular thread, + - One can make calls to it from other threads. +*/ +class Apc_target +{ + mysql_mutex_t *LOCK_thd_kill_ptr; +public: + Apc_target() : enabled(0), apc_calls(NULL) {} + ~Apc_target() { DBUG_ASSERT(!enabled && !apc_calls);} + + void init(mysql_mutex_t *target_mutex); + + /* Destroy the target. The target must be disabled when this call is made. */ + void destroy() { DBUG_ASSERT(!enabled); } + + /* Enter ther state where the target is available for serving APC requests */ + void enable() { enabled++; } + + /* + Make the target unavailable for serving APC requests. + + @note + This call will serve all requests that were already enqueued + */ + void disable() + { + DBUG_ASSERT(enabled); + mysql_mutex_lock(LOCK_thd_kill_ptr); + bool process= !--enabled && have_apc_requests(); + mysql_mutex_unlock(LOCK_thd_kill_ptr); + if (unlikely(process)) + process_apc_requests(true); + } + + void process_apc_requests(bool force); + /* + A lightweight function, intended to be used in frequent checks like this: + + if (apc_target.have_requests()) apc_target.process_apc_requests() + */ + inline bool have_apc_requests() + { + return MY_TEST(apc_calls); + } + + inline bool is_enabled() { return enabled; } + + /* Functor class for calls you can schedule */ + class Apc_call + { + public: + /* This function will be called in the target thread */ + virtual void call_in_target_thread()= 0; + virtual ~Apc_call() = default; + }; + + /* Make a call in the target thread (see function definition for details) */ + bool make_apc_call(THD *caller_thd, Apc_call *call, int timeout_sec, bool *timed_out); + +#ifndef DBUG_OFF + int n_calls_processed; /* Number of calls served by this target */ +#endif +private: + class Call_request; + + /* + Non-zero value means we're enabled. It's an int, not bool, because one can + call enable() N times (and then needs to call disable() N times before the + target is really disabled) + */ + int enabled; + + /* + Circular, double-linked list of all enqueued call requests. + We use this structure, because we + - process requests sequentially: requests are added at the end of the + list and removed from the front. With circular list, we can keep one + pointer, and access both front an back of the list with it. + - a thread that has posted a request may time out (or be KILLed) and + cancel the request, which means we need a fast request-removal + operation. + */ + Call_request *apc_calls; + + class Call_request + { + public: + Apc_call *call; /* Functor to be called */ + + /* The caller will actually wait for "processed==TRUE" */ + bool processed; + + /* Condition that will be signalled when the request has been served */ + mysql_cond_t COND_request; + + /* Double linked-list linkage */ + Call_request *next; + Call_request *prev; + + const char *what; /* (debug) state of the request */ + }; + + void enqueue_request(Call_request *qe); + void dequeue_request(Call_request *qe); + + /* return the first call request in queue, or NULL if there are none enqueued */ + Call_request *get_first_in_queue() + { + return apc_calls; + } +}; + +#ifdef HAVE_PSI_INTERFACE +void init_show_explain_psi_keys(void); +#else +#define init_show_explain_psi_keys() /* no-op */ +#endif + +#endif //SQL_MY_APC_INCLUDED + diff --git a/sql/my_decimal.cc b/sql/my_decimal.cc new file mode 100644 index 00000000..54b038cc --- /dev/null +++ b/sql/my_decimal.cc @@ -0,0 +1,437 @@ +/* + Copyright (c) 2005, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include + +#ifndef MYSQL_CLIENT +#include "sql_class.h" // THD +#include "field.h" +#endif + +#define DIG_BASE 1000000000 +#define DIG_PER_DEC1 9 +#define ROUND_UP(X) (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1) + +#ifndef MYSQL_CLIENT +/** + report result of decimal operation. + + @param result decimal library return code (E_DEC_* see include/decimal.h) + + @todo + Fix error messages + + @return + result +*/ + +int decimal_operation_results(int result, const char *value, const char *type) +{ + /* Avoid calling current_thd on default path */ + if (likely(result == E_DEC_OK)) + return(result); + + THD *thd= current_thd; + switch (result) { + case E_DEC_TRUNCATED: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DATA_TRUNCATED, ER_THD(thd, ER_DATA_TRUNCATED), + value, type); + break; + case E_DEC_OVERFLOW: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DATA_OVERFLOW, ER_THD(thd, ER_DATA_OVERFLOW), + value, type); + break; + case E_DEC_DIV_ZERO: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DIVISION_BY_ZERO, ER_THD(thd, ER_DIVISION_BY_ZERO)); + break; + case E_DEC_BAD_NUM: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BAD_DATA, ER_THD(thd, ER_BAD_DATA), + value, type); + break; + case E_DEC_OOM: + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + break; + default: + DBUG_ASSERT(0); + } + return result; +} + + +/** + @brief Converting decimal to string + + @details Convert given my_decimal to String; allocate buffer as needed. + + @param[in] mask what problems to warn on (mask of E_DEC_* values) + @param[in] d the decimal to print + @param[in] fixed_prec overall number of digits if ZEROFILL, 0 otherwise + @param[in] fixed_dec number of decimal places (if fixed_prec != 0) + @param[in] filler what char to pad with (ZEROFILL et al.) + @param[out] *str where to store the resulting string + + @return error coce + @retval E_DEC_OK + @retval E_DEC_TRUNCATED + @retval E_DEC_OVERFLOW + @retval E_DEC_OOM +*/ + +int my_decimal::to_string_native(String *str, uint fixed_prec, uint fixed_dec, + char filler, uint mask) const +{ + /* + Calculate the size of the string: For DECIMAL(a,b), fixed_prec==a + holds true iff the type is also ZEROFILL, which in turn implies + UNSIGNED. Hence the buffer for a ZEROFILLed value is the length + the user requested, plus one for a possible decimal point, plus + one if the user only wanted decimal places, but we force a leading + zero on them, plus one for the '\0' terminator. Because the type + is implicitly UNSIGNED, we do not need to reserve a character for + the sign. For all other cases, fixed_prec will be 0, and + my_decimal_string_length() will be called instead to calculate the + required size of the buffer. + */ + int length= (fixed_prec + ? (fixed_prec + ((fixed_prec == fixed_dec) ? 1 : 0) + 1) + : my_decimal_string_length(this)); + int result; + if (str->alloc(length+1)) // Alloc also space for \0 + return check_result(mask, E_DEC_OOM); + result= decimal2string(this, (char*) str->ptr(), + &length, (int)fixed_prec, fixed_dec, + filler); + str->length(length); + str->set_charset(&my_charset_numeric); + return check_result(mask, result); +} + + +/** + @brief Converting decimal to string with character set conversion + + @details Convert given my_decimal to String; allocate buffer as needed. + + @param[in] mask what problems to warn on (mask of E_DEC_* values) + @param[in] val the decimal to print + @param[in] fixed_prec overall number of digits if ZEROFILL, 0 otherwise + @param[in] fixed_dec number of decimal places (if fixed_prec != 0) + @param[in] filler what char to pad with (ZEROFILL et al.) + @param[out] *str where to store the resulting string + @param[in] cs character set + + @return error coce + @retval E_DEC_OK + @retval E_DEC_TRUNCATED + @retval E_DEC_OVERFLOW + @retval E_DEC_OOM + + Would be great to make it a method of the String class, + but this would need to include + my_decimal.h from sql_string.h and sql_string.cc, which is not desirable. +*/ +bool +str_set_decimal(uint mask, const my_decimal *val, + uint fixed_prec, uint fixed_dec, char filler, + String *str, CHARSET_INFO *cs) +{ + if (!(cs->state & MY_CS_NONASCII)) + { + // For ASCII-compatible character sets we can use to_string_native() + val->to_string_native(str, fixed_prec, fixed_dec, filler, mask); + str->set_charset(cs); + return FALSE; + } + else + { + /* + For ASCII-incompatible character sets (like UCS2) we + call my_string_native() on a temporary buffer first, + and then convert the result to the target character + with help of str->copy(). + */ + uint errors; + StringBuffer tmp; + val->to_string_native(&tmp, fixed_prec, fixed_dec, filler, mask); + return str->copy(tmp.ptr(), tmp.length(), &my_charset_latin1, cs, &errors); + } +} + + +/* + Convert from decimal to binary representation + + SYNOPSIS + to_binary() + mask error processing mask + d number for conversion + bin pointer to buffer where to write result + prec overall number of decimal digits + scale number of decimal digits after decimal point + + NOTE + Before conversion we round number if it need but produce truncation + error in this case + + RETURN + E_DEC_OK + E_DEC_TRUNCATED + E_DEC_OVERFLOW +*/ + +int my_decimal::to_binary(uchar *bin, int prec, decimal_digits_t scale, + uint mask) const +{ + int err1= E_DEC_OK, err2; + my_decimal rounded; + my_decimal2decimal(this, &rounded); + rounded.frac= decimal_actual_fraction(&rounded); + if (scale < rounded.frac) + { + err1= E_DEC_TRUNCATED; + /* decimal_round can return only E_DEC_TRUNCATED */ + decimal_round(&rounded, &rounded, scale, HALF_UP); + } + err2= decimal2bin(&rounded, bin, prec, scale); + if (!err2) + err2= err1; + return check_result(mask, err2); +} + + +/* + Convert string for decimal when string can be in some multibyte charset + + SYNOPSIS + str2my_decimal() + mask error processing mask + from string to process + length length of given string + charset charset of given string + decimal_value buffer for result storing + + RESULT + E_DEC_OK + E_DEC_TRUNCATED + E_DEC_OVERFLOW + E_DEC_BAD_NUM + E_DEC_OOM +*/ + +int str2my_decimal(uint mask, const char *from, size_t length, + CHARSET_INFO *charset, my_decimal *decimal_value, + const char **end_ptr) +{ + int err; + if (charset->mbminlen > 1) + { + StringBuffer tmp; + uint dummy_errors; + tmp.copy(from, length, charset, &my_charset_latin1, &dummy_errors); + char *end= (char*) tmp.end(); + err= string2decimal(tmp.ptr(), (decimal_t*) decimal_value, &end); + *end_ptr= from + charset->mbminlen * (size_t) (end - tmp.ptr()); + } + else + { + char *end= (char*) from + length; + err= string2decimal(from, (decimal_t*) decimal_value, &end); + *end_ptr= end; + } + check_result_and_overflow(mask, err, decimal_value); + return err; +} + + +/** + converts a decimal into a pair of integers - for integer and fractional parts + + special version, for decimals representing number of seconds. + integer part cannot be larger that 1e18 (otherwise it's an overflow). + fractional part is microseconds. +*/ +bool my_decimal2seconds(const my_decimal *d, ulonglong *sec, + ulong *microsec, ulong *nanosec) +{ + int pos; + + if (d->intg) + { + pos= (d->intg-1)/DIG_PER_DEC1; + *sec= d->buf[pos]; + if (pos > 0) + *sec+= static_cast(d->buf[pos-1]) * DIG_BASE; + } + else + { + *sec=0; + pos= -1; + } + + *microsec= d->frac ? static_cast(d->buf[pos+1]) / (DIG_BASE/1000000) : 0; + *nanosec= d->frac ? static_cast(d->buf[pos+1]) % (DIG_BASE/1000000) : 0; + + if (pos > 1) + { + for (int i=0; i < pos-1; i++) + if (d->buf[i]) + { + *sec= LONGLONG_MAX; + break; + } + } + return d->sign(); +} + + +/** + converts a pair of integers (seconds, microseconds) into a decimal +*/ +my_decimal *seconds2my_decimal(bool sign, + ulonglong sec, ulong microsec, my_decimal *d) +{ + d->init(); + longlong2decimal(sec, d); // cannot fail + if (microsec) + { + d->buf[(d->intg-1) / DIG_PER_DEC1 + 1]= microsec * (DIG_BASE/1000000); + d->frac= 6; + } + ((decimal_t *)d)->sign= sign; + return d; +} + + +my_decimal *date2my_decimal(const MYSQL_TIME *ltime, my_decimal *dec) +{ + longlong date= (ltime->year*100L + ltime->month)*100L + ltime->day; + if (ltime->time_type > MYSQL_TIMESTAMP_DATE) + date= ((date*100L + ltime->hour)*100L+ ltime->minute)*100L + ltime->second; + return seconds2my_decimal(ltime->neg, date, ltime->second_part, dec); +} + + +void my_decimal_trim(ulonglong *precision, decimal_digits_t *scale) +{ + if (!(*precision) && !(*scale)) + { + *precision= 10; + *scale= 0; + return; + } +} + + +/* + Convert a decimal to an ulong with a descriptive error message +*/ + +int my_decimal2int(uint mask, const decimal_t *d, bool unsigned_flag, + longlong *l, decimal_round_mode round_type) +{ + int res; + my_decimal rounded; + /* decimal_round can return only E_DEC_TRUNCATED */ + decimal_round(d, &rounded, 0, round_type); + res= (unsigned_flag ? + decimal2ulonglong(&rounded, (ulonglong *) l) : + decimal2longlong(&rounded, l)); + if (res & mask) + { + char buff[DECIMAL_MAX_STR_LENGTH]; + int length= sizeof(buff); + decimal2string(d, buff, &length, 0, 0, 0); + + decimal_operation_results(res, buff, + unsigned_flag ? "UNSIGNED INT" : + "INT"); + } + return res; +} + + +longlong my_decimal::to_longlong(bool unsigned_flag) const +{ + longlong result; + my_decimal2int(E_DEC_FATAL_ERROR, this, unsigned_flag, &result); + return result; +} + + +my_decimal::my_decimal(Field *field) +{ + init(); + DBUG_ASSERT(!field->is_null()); +#ifdef DBUG_ASSERT_EXISTS + my_decimal *dec= +#endif + field->val_decimal(this); + DBUG_ASSERT(dec == this); +} + + +#ifndef DBUG_OFF +/* routines for debugging print */ + +/* print decimal */ +void +print_decimal(const my_decimal *dec) +{ + int i, end; + char buff[512], *pos; + pos= buff; + pos+= sprintf(buff, "Decimal: sign: %d intg: %d frac: %d { ", + dec->sign(), dec->intg, dec->frac); + end= ROUND_UP(dec->frac)+ROUND_UP(dec->intg)-1; + for (i=0; i < end; i++) + pos+= sprintf(pos, "%09d, ", dec->buf[i]); + pos+= sprintf(pos, "%09d }\n", dec->buf[i]); + fputs(buff, DBUG_FILE); +} + + +/* print decimal with its binary representation */ +void +print_decimal_buff(const my_decimal *dec, const uchar* ptr, int length) +{ + print_decimal(dec); + fprintf(DBUG_FILE, "Record: "); + for (int i= 0; i < length; i++) + { + fprintf(DBUG_FILE, "%02X ", (uint)((uchar *)ptr)[i]); + } + fprintf(DBUG_FILE, "\n"); +} + + +const char *dbug_decimal_as_string(char *buff, const my_decimal *val) +{ + int length= DECIMAL_MAX_STR_LENGTH + 1; /* minimum size for buff */ + if (!val) + return "NULL"; + (void)decimal2string((decimal_t*) val, buff, &length, 0,0,0); + return buff; +} + + +#endif /*DBUG_OFF*/ +#endif /*MYSQL_CLIENT*/ diff --git a/sql/my_decimal.h b/sql/my_decimal.h new file mode 100644 index 00000000..a0e3be2f --- /dev/null +++ b/sql/my_decimal.h @@ -0,0 +1,550 @@ +/* Copyright (c) 2005, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2014, SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + It is interface module to fixed precision decimals library. + + Most functions use 'uint mask' as parameter, if during operation error + which fit in this mask is detected then it will be processed automatically + here. (errors are E_DEC_* constants, see include/decimal.h) + + Most function are just inline wrappers around library calls +*/ + +#ifndef my_decimal_h +#define my_decimal_h + +#include "sql_basic_types.h" + +#if defined(MYSQL_SERVER) || defined(EMBEDDED_LIBRARY) +#include "sql_string.h" /* String */ +#endif + +C_MODE_START +#include +#include +C_MODE_END + +class String; +class Field; +typedef struct st_mysql_time MYSQL_TIME; + +/** + maximum size of packet length. +*/ +#define DECIMAL_MAX_FIELD_SIZE DECIMAL_MAX_PRECISION + + +inline uint my_decimal_size(decimal_digits_t precision, + decimal_digits_t scale) +{ + /* + Always allocate more space to allow library to put decimal point + where it want + */ + return decimal_size(precision, scale) + 1; +} + + +inline decimal_digits_t my_decimal_int_part(decimal_digits_t precision, + decimal_digits_t decimals) +{ + return (decimal_digits_t) (precision - + ((decimals == DECIMAL_NOT_SPECIFIED) ? 0 : + decimals)); +} + + +#ifndef MYSQL_CLIENT +int decimal_operation_results(int result, const char *value, const char *type); +#else +inline int decimal_operation_results(int result, const char *value, + const char *type) +{ + return result; +} +#endif /*MYSQL_CLIENT*/ + + +inline int check_result(uint mask, int result) +{ + if (result & mask) + decimal_operation_results(result, "", "DECIMAL"); + return result; +} + + +/** + my_decimal class limits 'decimal_t' type to what we need in MySQL. + + It contains internally all necessary space needed by the instance so + no extra memory is needed. One should call fix_buffer_pointer() function + when he moves my_decimal objects in memory. +*/ + +class my_decimal :public decimal_t +{ + /* + Several of the routines in strings/decimal.c have had buffer + overrun/underrun problems. These are *not* caught by valgrind. + To catch them, we allocate dummy fields around the buffer, + and test that their values do not change. + */ +#if !defined(DBUG_OFF) + int foo1; +#endif + + decimal_digit_t buffer[DECIMAL_BUFF_LENGTH]; + +#if !defined(DBUG_OFF) + int foo2; + static const int test_value= 123; +#endif + +public: + + my_decimal(const my_decimal &rhs) : decimal_t(rhs) + { + init(); + for (uint i= 0; i < DECIMAL_BUFF_LENGTH; i++) + buffer[i]= rhs.buffer[i]; + } + + my_decimal& operator=(const my_decimal &rhs) + { + if (this == &rhs) + return *this; + decimal_t::operator=(rhs); + for (uint i= 0; i < DECIMAL_BUFF_LENGTH; i++) + buffer[i]= rhs.buffer[i]; + fix_buffer_pointer(); + return *this; + } + + void init() + { +#if !defined(DBUG_OFF) + foo1= test_value; + foo2= test_value; +#endif + len= DECIMAL_BUFF_LENGTH; + buf= buffer; + TRASH_ALLOC(buffer, sizeof(buffer)); + } + + my_decimal() + { + init(); + } + my_decimal(const uchar *bin, decimal_digits_t prec, decimal_digits_t scale) + { + init(); + check_result(E_DEC_FATAL_ERROR, bin2decimal(bin, this, prec, scale)); + } + my_decimal(Field *field); + ~my_decimal() + { + sanity_check(); + } + + void sanity_check() + { + DBUG_SLOW_ASSERT(foo1 == test_value); + DBUG_SLOW_ASSERT(foo2 == test_value); + } + + void fix_buffer_pointer() { buf= buffer; } + + bool sign() const { return decimal_t::sign; } + void sign(bool s) { decimal_t::sign= s; } + decimal_digits_t precision() const { return (decimal_digits_t) (intg + frac); } + void set_zero() + { + /* + We need the up-cast here, since my_decimal has sign() member functions, + which conflicts with decimal_t::sign + (and decimal_make_zero is a macro, rather than a funcion). + */ + decimal_make_zero(static_cast(this)); + } + int cmp(const my_decimal *other) const + { + return decimal_cmp(this, other); + } + +#ifndef MYSQL_CLIENT + bool to_bool() const + { + return !decimal_is_zero(this); + } + double to_double() const + { + double res; + decimal2double(this, &res); + return res; + } + longlong to_longlong(bool unsigned_flag) const; + /* + Return the value as a signed or unsigned longlong, depending on the sign. + - Positive values are returned as unsigned. + - Negative values are returned as signed. + This is used by bit SQL operators: | & ^ ~ + as well as by the SQL function BIT_COUNT(). + */ + longlong to_xlonglong() const + { return to_longlong(!sign()); } + + // Convert to string returning decimal2string() error code + int to_string_native(String *to, uint prec, uint dec, char filler, + uint mask= E_DEC_FATAL_ERROR) const; + // Convert to string returning the String pointer + String *to_string(String *to, uint prec, uint dec, char filler) const + { + return to_string_native(to, prec, dec, filler) ? NULL : to; + } + String *to_string(String *to) const + { + return to_string(to, 0, 0, 0); + } + String *to_string_round(String *to, decimal_digits_t scale, + my_decimal *round_buff) const + { + (void) round_to(round_buff, scale, HALF_UP); // QQ: check result? + return round_buff->to_string(to); + } + /* Scale can be negative here when called from truncate() */ + int round_to(my_decimal *to, int scale, decimal_round_mode mode, + int mask= E_DEC_FATAL_ERROR) const + { + return check_result(mask, decimal_round(this, to, scale, mode)); + } + int to_binary(uchar *bin, int prec, decimal_digits_t scale, + uint mask= E_DEC_FATAL_ERROR) const; +#endif + /** Swap two my_decimal values */ + void swap(my_decimal &rhs) + { + swap_variables(my_decimal, *this, rhs); + } +}; + + +#ifndef DBUG_OFF +void print_decimal(const my_decimal *dec); +void print_decimal_buff(const my_decimal *dec, const uchar* ptr, int length); +const char *dbug_decimal_as_string(char *buff, const my_decimal *val); +#else +#define dbug_decimal_as_string(A) NULL +#endif + +bool str_set_decimal(uint mask, const my_decimal *val, uint fixed_prec, + uint fixed_dec, char filler, String *str, + CHARSET_INFO *cs); + +extern my_decimal decimal_zero; + +inline +void max_my_decimal(my_decimal *to, decimal_digits_t precision, + decimal_digits_t frac) +{ + DBUG_ASSERT((precision <= DECIMAL_MAX_PRECISION)&& + (frac <= DECIMAL_MAX_SCALE)); + max_decimal(precision, frac, to); +} + +inline void max_internal_decimal(my_decimal *to) +{ + max_my_decimal(to, DECIMAL_MAX_PRECISION, 0); +} + +inline int check_result_and_overflow(uint mask, int result, my_decimal *val) +{ + if (check_result(mask, result) & E_DEC_OVERFLOW) + { + bool sign= val->sign(); + val->fix_buffer_pointer(); + max_internal_decimal(val); + val->sign(sign); + } + return result; +} + +inline decimal_digits_t my_decimal_length_to_precision(decimal_digits_t length, + decimal_digits_t scale, + bool unsigned_flag) +{ + /* Precision can't be negative thus ignore unsigned_flag when length is 0. */ + DBUG_ASSERT(length || !scale); + return (decimal_digits_t) (length - (scale>0 ? 1:0) - + (unsigned_flag || !length ? 0:1)); +} + +inline decimal_digits_t +my_decimal_precision_to_length_no_truncation(decimal_digits_t precision, + decimal_digits_t scale, + bool unsigned_flag) +{ + /* + When precision is 0 it means that original length was also 0. Thus + unsigned_flag is ignored in this case. + */ + DBUG_ASSERT(precision || !scale); + return (decimal_digits_t)(precision + (scale > 0 ? 1 : 0) + + (unsigned_flag || !precision ? 0 : 1)); +} + +inline decimal_digits_t +my_decimal_precision_to_length(decimal_digits_t precision, + decimal_digits_t scale, + bool unsigned_flag) +{ + /* + When precision is 0 it means that original length was also 0. Thus + unsigned_flag is ignored in this case. + */ + DBUG_ASSERT(precision || !scale); + set_if_smaller(precision, DECIMAL_MAX_PRECISION); + return my_decimal_precision_to_length_no_truncation(precision, scale, + unsigned_flag); +} + +inline +uint my_decimal_string_length(const my_decimal *d) +{ + /* length of string representation including terminating '\0' */ + return decimal_string_size(d); +} + + +inline +uint my_decimal_max_length(const my_decimal *d) +{ + /* -1 because we do not count \0 */ + return decimal_string_size(d) - 1; +} + + +inline +uint my_decimal_get_binary_size(decimal_digits_t precision, + decimal_digits_t scale) +{ + return decimal_bin_size(precision, scale); +} + + +inline +void my_decimal2decimal(const my_decimal *from, my_decimal *to) +{ + *to= *from; +} + + +inline +int binary2my_decimal(uint mask, const uchar *bin, my_decimal *d, + decimal_digits_t prec, decimal_digits_t scale) +{ + return check_result(mask, bin2decimal(bin, d, prec, scale)); +} + + +inline +int my_decimal_set_zero(my_decimal *d) +{ + d->set_zero(); + return 0; +} + + +inline +bool my_decimal_is_zero(const my_decimal *decimal_value) +{ + return decimal_is_zero(decimal_value); +} + + +inline bool str_set_decimal(const my_decimal *val, String *str, + CHARSET_INFO *cs) +{ + return str_set_decimal(E_DEC_FATAL_ERROR, val, 0, 0, 0, str, cs); +} + + +bool my_decimal2seconds(const my_decimal *d, ulonglong *sec, + ulong *microsec, ulong *nanosec); + +my_decimal *seconds2my_decimal(bool sign, ulonglong sec, ulong microsec, + my_decimal *d); + +#define TIME_to_my_decimal(TIME, DECIMAL) \ + seconds2my_decimal((TIME)->neg, TIME_to_ulonglong(TIME), \ + (TIME)->second_part, (DECIMAL)) + +int my_decimal2int(uint mask, const decimal_t *d, bool unsigned_flag, + longlong *l, decimal_round_mode round_type= HALF_UP); + +inline +int my_decimal2double(uint, const decimal_t *d, double *result) +{ + /* No need to call check_result as this will always succeed */ + return decimal2double(d, result); +} + + +inline +int str2my_decimal(uint mask, const char *str, my_decimal *d, char **end) +{ + return check_result_and_overflow(mask, string2decimal(str, d, end), d); +} + + +int str2my_decimal(uint mask, const char *from, size_t length, + CHARSET_INFO *charset, my_decimal *decimal_value, + const char **end); + +inline int str2my_decimal(uint mask, const char *from, size_t length, + CHARSET_INFO *charset, my_decimal *decimal_value) +{ + const char *end; + return str2my_decimal(mask, from, length, charset, decimal_value, &end); +} + +#if defined(MYSQL_SERVER) || defined(EMBEDDED_LIBRARY) +inline +int string2my_decimal(uint mask, const String *str, my_decimal *d) +{ + const char *end; + return str2my_decimal(mask, str->ptr(), str->length(), str->charset(), + d, &end); +} + + +my_decimal *date2my_decimal(const MYSQL_TIME *ltime, my_decimal *dec); + + +#endif /*defined(MYSQL_SERVER) || defined(EMBEDDED_LIBRARY) */ + +inline +int double2my_decimal(uint mask, double val, my_decimal *d) +{ + return check_result_and_overflow(mask, double2decimal(val, d), d); +} + + +inline +int int2my_decimal(uint mask, longlong i, my_bool unsigned_flag, my_decimal *d) +{ + return check_result(mask, (unsigned_flag ? + ulonglong2decimal((ulonglong)i, d) : + longlong2decimal(i, d))); +} + +inline +void decimal2my_decimal(decimal_t *from, my_decimal *to) +{ + DBUG_ASSERT(to->len >= from->len); + to->intg= from->intg; + to->frac= from->frac; + to->sign(from->sign); + memcpy(to->buf, from->buf, to->len*sizeof(decimal_digit_t)); +} + + +inline +void my_decimal_neg(decimal_t *arg) +{ + if (decimal_is_zero(arg)) + { + arg->sign= 0; + return; + } + decimal_neg(arg); +} + + +inline +int my_decimal_add(uint mask, my_decimal *res, const my_decimal *a, + const my_decimal *b) +{ + return check_result_and_overflow(mask, + decimal_add(a, b, res), + res); +} + + +inline +int my_decimal_sub(uint mask, my_decimal *res, const my_decimal *a, + const my_decimal *b) +{ + return check_result_and_overflow(mask, + decimal_sub(a, b, res), + res); +} + + +inline +int my_decimal_mul(uint mask, my_decimal *res, const my_decimal *a, + const my_decimal *b) +{ + return check_result_and_overflow(mask, + decimal_mul(a, b, res), + res); +} + + +inline +int my_decimal_div(uint mask, my_decimal *res, const my_decimal *a, + const my_decimal *b, int div_scale_inc) +{ + return check_result_and_overflow(mask, + decimal_div(a, b, res, div_scale_inc), + res); +} + + +inline +int my_decimal_mod(uint mask, my_decimal *res, const my_decimal *a, + const my_decimal *b) +{ + return check_result_and_overflow(mask, + decimal_mod(a, b, res), + res); +} + +/** + @return + -1 if ab and 0 if a==b +*/ +inline +int my_decimal_cmp(const my_decimal *a, const my_decimal *b) +{ + return decimal_cmp(a, b); +} + + +inline +int my_decimal_intg(const my_decimal *a) +{ + return decimal_intg(a); +} + + +void my_decimal_trim(ulonglong *precision, decimal_digits_t *scale); + + +#endif /*my_decimal_h*/ + diff --git a/sql/my_json_writer.cc b/sql/my_json_writer.cc new file mode 100644 index 00000000..54eb8423 --- /dev/null +++ b/sql/my_json_writer.cc @@ -0,0 +1,496 @@ +/* Copyright (C) 2014, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "my_global.h" +#include "my_json_writer.h" + +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + +bool Json_writer::named_item_expected() const +{ + return named_items_expectation.size() + && named_items_expectation.back(); +} +#endif + +void Json_writer::append_indent() +{ + if (!document_start) + output.append('\n'); + for (int i=0; i< indent_level; i++) + output.append(' '); +} + +inline void Json_writer::on_start_object() +{ +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + if(!fmt_helper.is_making_writer_calls()) + { + if (got_name != named_item_expected()) + { + sql_print_error(got_name + ? "Json_writer got a member name which is not expected.\n" + : "Json_writer: a member name was expected.\n"); + VALIDITY_ASSERT(got_name == named_item_expected()); + } + named_items_expectation.push_back(true); + } +#endif + fmt_helper.on_start_object(); +} + +void Json_writer::start_object() +{ + on_start_object(); + + if (!element_started) + start_element(); + + output.append('{'); + indent_level+=INDENT_SIZE; + first_child=true; + element_started= false; + document_start= false; +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + got_name= false; + named_items.emplace(); +#endif +} + +void Json_writer::start_array() +{ +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + if(!fmt_helper.is_making_writer_calls()) + { + VALIDITY_ASSERT(got_name == named_item_expected()); + named_items_expectation.push_back(false); + got_name= false; + if (document_start) + named_items.emplace(); + } +#endif + + if (fmt_helper.on_start_array()) + return; + + if (!element_started) + start_element(); + + output.append('['); + indent_level+=INDENT_SIZE; + first_child=true; + element_started= false; + document_start= false; +} + + +void Json_writer::end_object() +{ +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + VALIDITY_ASSERT(named_item_expected()); + named_items_expectation.pop_back(); + VALIDITY_ASSERT(!got_name); + got_name= false; + VALIDITY_ASSERT(named_items.size()); + named_items.pop(); +#endif + indent_level-=INDENT_SIZE; + if (!first_child) + append_indent(); + first_child= false; + output.append('}'); +} + + +void Json_writer::end_array() +{ +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + VALIDITY_ASSERT(!named_item_expected()); + named_items_expectation.pop_back(); + got_name= false; +#endif + if (fmt_helper.on_end_array()) + return; + indent_level-=INDENT_SIZE; + if (!first_child) + append_indent(); + output.append(']'); +} + + +Json_writer& Json_writer::add_member(const char *name) +{ + size_t len= strlen(name); + return add_member(name, len); +} + +Json_writer& Json_writer::add_member(const char *name, size_t len) +{ + if (!fmt_helper.on_add_member(name, len)) + { + // assert that we are in an object + DBUG_ASSERT(!element_started); + start_element(); + + output.append('"'); + output.append(name, len); + output.append(STRING_WITH_LEN("\": ")); + } +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + if (!fmt_helper.is_making_writer_calls()) + { + VALIDITY_ASSERT(!got_name); + got_name= true; + VALIDITY_ASSERT(named_items.size()); + auto& named_items_keys= named_items.top(); + auto emplaced= named_items_keys.emplace(name, len); + auto is_uniq_key= emplaced.second; + if(!is_uniq_key) + { + sql_print_error("Duplicated key: %s\n", emplaced.first->c_str()); + VALIDITY_ASSERT(is_uniq_key); + } + } +#endif + return *this; +} + + +/* + Used by formatting helper to print something that is formatted by the helper. + We should only separate it from the previous element. +*/ + +void Json_writer::start_sub_element() +{ + //element_started= true; + if (first_child) + first_child= false; + else + output.append(','); + + append_indent(); +} + + +void Json_writer::start_element() +{ + element_started= true; + + if (first_child) + first_child= false; + else + output.append(','); + + append_indent(); +} + +void Json_writer::add_ll(longlong val) +{ + char buf[64]; + my_snprintf(buf, sizeof(buf), "%lld", val); + add_unquoted_str(buf); +} + +void Json_writer::add_ull(ulonglong val) +{ + char buf[64]; + my_snprintf(buf, sizeof(buf), "%llu", val); + add_unquoted_str(buf); +} + + +/* Add a memory size, printing in Kb, Kb, Gb if necessary */ +void Json_writer::add_size(longlong val) +{ + char buf[64]; + size_t len; + if (val < 1024) + len= my_snprintf(buf, sizeof(buf), "%lld", val); + else if (val < 1024*1024*16) + { + /* Values less than 16MB are specified in KB for precision */ + len= my_snprintf(buf, sizeof(buf), "%lld", val/1024); + strcpy(buf + len, "Kb"); + len+= 2; + } + else + { + len= my_snprintf(buf, sizeof(buf), "%lld", val/(1024*1024)); + strcpy(buf + len, "Mb"); + len+= 2; + } + add_str(buf, len); +} + + +void Json_writer::add_double(double val) +{ + char buf[64]; + size_t len= my_snprintf(buf, sizeof(buf), "%-.11lg", val); + add_unquoted_str(buf, len); +} + + +void Json_writer::add_bool(bool val) +{ + add_unquoted_str(val? "true" : "false"); +} + + +void Json_writer::add_null() +{ + add_unquoted_str("null", (size_t) 4); +} + + +void Json_writer::add_unquoted_str(const char* str) +{ + size_t len= strlen(str); + add_unquoted_str(str, len); +} + +void Json_writer::add_unquoted_str(const char* str, size_t len) +{ + VALIDITY_ASSERT(fmt_helper.is_making_writer_calls() || + got_name == named_item_expected()); + if (on_add_str(str, len)) + return; + + if (!element_started) + start_element(); + + output.append(str, len); + element_started= false; +} + +inline bool Json_writer::on_add_str(const char *str, size_t num_bytes) +{ +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + got_name= false; +#endif + bool helped= fmt_helper.on_add_str(str, num_bytes); + return helped; +} + +void Json_writer::add_str(const char *str) +{ + size_t len= strlen(str); + add_str(str, len); +} + +/* + This function is used to add only num_bytes of str to the output string +*/ + +void Json_writer::add_str(const char* str, size_t num_bytes) +{ + VALIDITY_ASSERT(fmt_helper.is_making_writer_calls() || + got_name == named_item_expected()); + if (on_add_str(str, num_bytes)) + return; + + if (!element_started) + start_element(); + + output.append('"'); + output.append(str, num_bytes); + output.append('"'); + element_started= false; +} + +void Json_writer::add_str(const String &str) +{ + add_str(str.ptr(), str.length()); +} + +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS +thread_local std::vector Json_writer_struct::named_items_expectation; +#endif + +Json_writer_temp_disable::Json_writer_temp_disable(THD *thd_arg) +{ + thd= thd_arg; + thd->opt_trace.disable_tracing_if_required(); +} +Json_writer_temp_disable::~Json_writer_temp_disable() +{ + thd->opt_trace.enable_tracing_if_required(); +} + +bool Single_line_formatting_helper::on_add_member(const char *name, + size_t len) +{ + DBUG_ASSERT(state== INACTIVE || state == DISABLED); + if (state != DISABLED) + { + // remove everything from the array + buf_ptr= buffer; + + //append member name to the array + if (len < MAX_LINE_LEN) + { + memcpy(buf_ptr, name, len); + buf_ptr+=len; + *(buf_ptr++)= 0; + + line_len= owner->indent_level + (uint)len + 1; + state= ADD_MEMBER; + return true; // handled + } + } + return false; // not handled +} + + +bool Single_line_formatting_helper::on_start_array() +{ + if (state == ADD_MEMBER) + { + state= IN_ARRAY; + return true; // handled + } + else + { + if (state != DISABLED) + state= INACTIVE; + // TODO: what if we have accumulated some stuff already? shouldn't we + // flush it? + return false; // not handled + } +} + + +bool Single_line_formatting_helper::on_end_array() +{ + if (state == IN_ARRAY) + { + flush_on_one_line(); + state= INACTIVE; + return true; // handled + } + return false; // not handled +} + + +void Single_line_formatting_helper::on_start_object() +{ + // Nested objects will not be printed on one line + disable_and_flush(); +} + + +bool Single_line_formatting_helper::on_add_str(const char *str, + size_t len) +{ + if (state == IN_ARRAY) + { + // New length will be: + // "$string", + // quote + quote + comma + space = 4 + if (line_len + len + 4 > MAX_LINE_LEN) + { + disable_and_flush(); + return false; // didn't handle the last element + } + + //append string to array + memcpy(buf_ptr, str, len); + buf_ptr+=len; + *(buf_ptr++)= 0; + line_len += (uint)len + 4; + return true; // handled + } + + disable_and_flush(); + return false; // not handled +} + + +/* + Append everything accumulated to the output on one line +*/ + +void Single_line_formatting_helper::flush_on_one_line() +{ + owner->start_sub_element(); + char *ptr= buffer; + int nr= 0; + while (ptr < buf_ptr) + { + char *str= ptr; + + if (nr == 0) + { + owner->output.append('"'); + owner->output.append(str); + owner->output.append(STRING_WITH_LEN("\": ")); + owner->output.append('['); + } + else + { + if (nr != 1) + owner->output.append(STRING_WITH_LEN(", ")); + owner->output.append('"'); + owner->output.append(str); + owner->output.append('"'); + } + nr++; + + while (*ptr!=0) + ptr++; + ptr++; + } + owner->output.append(']'); + /* We've printed out the contents of the buffer, mark it as empty */ + buf_ptr= buffer; +} + + +void Single_line_formatting_helper::disable_and_flush() +{ + if (state == DISABLED) + return; + + bool start_array= (state == IN_ARRAY); + state= DISABLED; + // deactivate ourselves and flush all accumulated calls. + char *ptr= buffer; + int nr= 0; + while (ptr < buf_ptr) + { + char *str= ptr; + size_t len= strlen(str); + + if (nr == 0) + { + owner->add_member(str, len); + if (start_array) + owner->start_array(); + } + else + { + //if (nr == 1) + // owner->start_array(); + owner->add_str(str, len); + } + + nr++; + ptr+= len+1; + } + buf_ptr= buffer; + state= INACTIVE; +} + diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h new file mode 100644 index 00000000..87d1a7fa --- /dev/null +++ b/sql/my_json_writer.h @@ -0,0 +1,793 @@ +/* Copyright (C) 2014 SkySQL Ab, MariaDB Corporation Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef JSON_WRITER_INCLUDED +#define JSON_WRITER_INCLUDED + +#include "my_base.h" +#include "sql_string.h" + +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) || defined ENABLED_JSON_WRITER_CONSISTENCY_CHECKS +#include +#include +#include +#include +#endif + +#ifdef JSON_WRITER_UNIT_TEST +// Also, mock objects are defined in my_json_writer-t.cc +#define VALIDITY_ASSERT(x) if (!(x)) this->invalid_json= true; +#else +#include "sql_class.h" // For class THD +#include "log.h" // for sql_print_error +#define VALIDITY_ASSERT(x) DBUG_ASSERT(x) +#endif + +#include + +class Opt_trace_stmt; +class Opt_trace_context; +class Json_writer; + +struct TABLE; +struct st_join_table; +using JOIN_TAB= struct st_join_table; + +/* + Single_line_formatting_helper is used by Json_writer to do better formatting + of JSON documents. + + The idea is to catch arrays that can be printed on one line: + + arrayName : [ "boo", 123, 456 ] + + and actually print them on one line. Arrrays that occupy too much space on + the line, or have nested members cannot be printed on one line. + + We hook into JSON printing functions and try to detect the pattern. While + detecting the pattern, we will accumulate "boo", 123, 456 as strings. + + Then, + - either the pattern is broken, and we print the elements out, + - or the pattern lasts till the end of the array, and we print the + array on one line. +*/ + +class Single_line_formatting_helper +{ + enum enum_state + { + INACTIVE, + ADD_MEMBER, + IN_ARRAY, + DISABLED + }; + + /* + This works like a finite automaton. + + state=DISABLED means the helper is disabled - all on_XXX functions will + return false (which means "not handled") and do nothing. + + +->-+ + | v + INACTIVE ---> ADD_MEMBER ---> IN_ARRAY--->-+ + ^ | + +------------------<--------------------+ + + For other states: + INACTIVE - initial state, we have nothing. + ADD_MEMBER - add_member() was called, the buffer has "member_name\0". + IN_ARRAY - start_array() was called. + + + */ + enum enum_state state; + enum { MAX_LINE_LEN= 80 }; + char buffer[80]; + + /* The data in the buffer is located between buffer[0] and buf_ptr */ + char *buf_ptr; + uint line_len; + + Json_writer *owner; +public: + Single_line_formatting_helper() : state(INACTIVE), buf_ptr(buffer) {} + + void init(Json_writer *owner_arg) { owner= owner_arg; } + + bool on_add_member(const char *name, size_t len); + + bool on_start_array(); + bool on_end_array(); + void on_start_object(); + // on_end_object() is not needed. + + bool on_add_str(const char *str, size_t num_bytes); + + /* + Returns true if the helper is flushing its buffer and is probably + making calls back to its Json_writer. (The Json_writer uses this + function to avoid re-doing the processing that it has already done + before making a call to fmt_helper) + */ + bool is_making_writer_calls() const { return state == DISABLED; } + +private: + void flush_on_one_line(); + void disable_and_flush(); +}; + + +/* + Something that looks like class String, but has an internal limit of + how many bytes one can append to it. + + Bytes that were truncated due to the size limitation are counted. +*/ + +class String_with_limit +{ +public: + + String_with_limit() : size_limit(SIZE_T_MAX), truncated_len(0) + { + str.length(0); + } + + size_t get_truncated_bytes() const { return truncated_len; } + size_t get_size_limit() { return size_limit; } + + void set_size_limit(size_t limit_arg) + { + // Setting size limit to be shorter than length will not have the desired + // effect + DBUG_ASSERT(str.length() < size_limit); + size_limit= limit_arg; + } + + void append(const char *s, size_t size) + { + if (str.length() + size <= size_limit) + { + // Whole string can be added, just do it + str.append(s, size); + } + else + { + // We cannot add the whole string + if (str.length() < size_limit) + { + // But we can still add something + size_t bytes_to_add = size_limit - str.length(); + str.append(s, bytes_to_add); + truncated_len += size - bytes_to_add; + } + else + truncated_len += size; + } + } + + void append(const char *s) + { + append(s, strlen(s)); + } + + void append(char c) + { + if (str.length() + 1 > size_limit) + truncated_len++; + else + str.append(c); + } + + const String *get_string() { return &str; } + size_t length() { return str.length(); } +private: + String str; + + // str must not get longer than this many bytes. + size_t size_limit; + + // How many bytes were truncated from the string + size_t truncated_len; +}; + +/* + A class to write well-formed JSON documents. The documents are also formatted + for human readability. +*/ + +class Json_writer +{ +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + /* + In debug mode, Json_writer will fail and assertion if one attempts to + produce an invalid JSON document (e.g. JSON array having named elements). + */ + std::vector named_items_expectation; + std::stack > named_items; + + bool named_item_expected() const; + + bool got_name; + +#ifdef JSON_WRITER_UNIT_TEST +public: + // When compiled for unit test, creating invalid JSON will set this to true + // instead of an assertion. + bool invalid_json= false; +#endif +#endif + +public: + /* Add a member. We must be in an object. */ + Json_writer& add_member(const char *name); + Json_writer& add_member(const char *name, size_t len); + + /* Add atomic values */ + + /* Note: the add_str methods do not do escapes. Should this change? */ + void add_str(const char* val); + void add_str(const char* val, size_t num_bytes); + void add_str(const String &str); + void add_str(Item *item); + void add_table_name(const JOIN_TAB *tab); + void add_table_name(const TABLE* table); + + void add_ll(longlong val); + void add_ull(ulonglong val); + void add_size(longlong val); + void add_double(double val); + void add_bool(bool val); + void add_null(); + +private: + void add_unquoted_str(const char* val); + void add_unquoted_str(const char* val, size_t len); + + bool on_add_str(const char *str, size_t num_bytes); + void on_start_object(); + +public: + /* Start a child object */ + void start_object(); + void start_array(); + + void end_object(); + void end_array(); + + /* + One can set a limit of how large a JSON document should be. + Writes beyond that size will be counted, but will not be collected. + */ + void set_size_limit(size_t mem_size) { output.set_size_limit(mem_size); } + + size_t get_truncated_bytes() { return output.get_truncated_bytes(); } + + Json_writer() : +#if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) + got_name(false), +#endif + indent_level(0), document_start(true), element_started(false), + first_child(true) + { + fmt_helper.init(this); + } +private: + // TODO: a stack of (name, bool is_object_or_array) elements. + int indent_level; + enum { INDENT_SIZE = 2 }; + + friend class Single_line_formatting_helper; + friend class Json_writer_nesting_guard; + bool document_start; + bool element_started; + bool first_child; + + Single_line_formatting_helper fmt_helper; + + void append_indent(); + void start_element(); + void start_sub_element(); + +public: + String_with_limit output; +}; + +/* A class to add values to Json_writer_object and Json_writer_array */ +class Json_value_helper +{ + Json_writer* writer; + +public: + void init(Json_writer *my_writer) { writer= my_writer; } + void add_str(const char* val) + { + writer->add_str(val); + } + void add_str(const char* val, size_t length) + { + writer->add_str(val, length); + } + void add_str(const String &str) + { + writer->add_str(str.ptr(), str.length()); + } + void add_str(const LEX_CSTRING &str) + { + writer->add_str(str.str, str.length); + } + void add_str(Item *item) + { + writer->add_str(item); + } + + void add_ll(longlong val) + { + writer->add_ll(val); + } + void add_size(longlong val) + { + writer->add_size(val); + } + void add_double(double val) + { + writer->add_double(val); + } + void add_bool(bool val) + { + writer->add_bool(val); + } + void add_null() + { + writer->add_null(); + } + void add_table_name(const JOIN_TAB *tab) + { + writer->add_table_name(tab); + } + void add_table_name(const TABLE* table) + { + writer->add_table_name(table); + } +}; + +/* A common base for Json_writer_object and Json_writer_array */ +class Json_writer_struct +{ + Json_writer_struct(const Json_writer_struct&)= delete; + Json_writer_struct& operator=(const Json_writer_struct&)= delete; + +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS + static thread_local std::vector named_items_expectation; +#endif +protected: + Json_writer* my_writer; + Json_value_helper context; + /* + Tells when a json_writer_struct has been closed or not + */ + bool closed; + + explicit Json_writer_struct(Json_writer *writer) + : my_writer(writer) + { + context.init(my_writer); + closed= false; +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS + named_items_expectation.push_back(expect_named_children); +#endif + } + explicit Json_writer_struct(THD *thd) + : Json_writer_struct(thd->opt_trace.get_current_json()) + { + } + +public: + +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS + virtual ~Json_writer_struct() + { + named_items_expectation.pop_back(); + } +#else + virtual ~Json_writer_struct() = default; +#endif + + bool trace_started() const + { + return my_writer != 0; + } + +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS + bool named_item_expected() const + { + return named_items_expectation.size() > 1 + && *(named_items_expectation.rbegin() + 1); + } +#endif +}; + + +/* + RAII-based class to start/end writing a JSON object into the JSON document + + There is "ignore mode": one can initialize Json_writer_object with a NULL + Json_writer argument, and then all its calls will do nothing. This is used + by optimizer trace which can be enabled or disabled. +*/ + +class Json_writer_object : public Json_writer_struct +{ +private: + void add_member(const char *name) + { + my_writer->add_member(name); + } +public: + explicit Json_writer_object(Json_writer* writer, const char *str= nullptr) + : Json_writer_struct(writer) + { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS + DBUG_ASSERT(named_item_expected()); +#endif + if (unlikely(my_writer)) + { + if (str) + my_writer->add_member(str); + my_writer->start_object(); + } + } + + explicit Json_writer_object(THD* thd, const char *str= nullptr) + : Json_writer_object(thd->opt_trace.get_current_json(), str) + { + } + + ~Json_writer_object() + { + if (my_writer && !closed) + my_writer->end_object(); + closed= TRUE; + } + + Json_writer_object& add(const char *name, bool value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_bool(value); + } + return *this; + } + + Json_writer_object& add(const char *name, ulonglong value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + my_writer->add_ull(value); + } + return *this; + } + + template::value>::type + > + Json_writer_object& add(const char *name, IntT value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_ll(value); + } + return *this; + } + + Json_writer_object& add(const char *name, double value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_double(value); + } + return *this; + } + + Json_writer_object& add(const char *name, const char *value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_str(value); + } + return *this; + } + Json_writer_object& add(const char *name, const char *value, size_t num_bytes) + { + add_member(name); + context.add_str(value, num_bytes); + return *this; + } + Json_writer_object& add(const char *name, const LEX_CSTRING &value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_str(value.str, value.length); + } + return *this; + } + Json_writer_object& add(const char *name, Item *value) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_str(value); + } + return *this; + } + Json_writer_object& add_null(const char*name) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member(name); + context.add_null(); + } + return *this; + } + Json_writer_object& add_table_name(const JOIN_TAB *tab) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member("table"); + context.add_table_name(tab); + } + return *this; + } + Json_writer_object& add_table_name(const TABLE *table) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member("table"); + context.add_table_name(table); + } + return *this; + } + Json_writer_object& add_select_number(uint select_number) + { + DBUG_ASSERT(!closed); + if (my_writer) + { + add_member("select_id"); + if (unlikely(select_number == FAKE_SELECT_LEX_ID)) + context.add_str("fake"); + else + context.add_ll(static_cast(select_number)); + } + return *this; + } + void end() + { + DBUG_ASSERT(!closed); + if (unlikely(my_writer)) + my_writer->end_object(); + closed= TRUE; + } +}; + + +/* + RAII-based class to start/end writing a JSON array into the JSON document + + There is "ignore mode": one can initialize Json_writer_array with a NULL + Json_writer argument, and then all its calls will do nothing. This is used + by optimizer trace which can be enabled or disabled. +*/ + +class Json_writer_array : public Json_writer_struct +{ +public: + explicit Json_writer_array(Json_writer *writer, const char *str= nullptr) + : Json_writer_struct(writer) + { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS + DBUG_ASSERT(!named_item_expected()); +#endif + if (unlikely(my_writer)) + { + if (str) + my_writer->add_member(str); + my_writer->start_array(); + } + } + + explicit Json_writer_array(THD *thd, const char *str= nullptr) + : Json_writer_array(thd->opt_trace.get_current_json(), str) + { + } + + ~Json_writer_array() + { + if (unlikely(my_writer && !closed)) + { + my_writer->end_array(); + closed= TRUE; + } + } + + void end() + { + DBUG_ASSERT(!closed); + if (unlikely(my_writer)) + my_writer->end_array(); + closed= TRUE; + } + + Json_writer_array& add(bool value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_bool(value); + return *this; + } + Json_writer_array& add(ulonglong value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_ll(static_cast(value)); + return *this; + } + Json_writer_array& add(longlong value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_ll(value); + return *this; + } + Json_writer_array& add(double value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_double(value); + return *this; + } + #ifndef _WIN64 + Json_writer_array& add(size_t value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_ll(static_cast(value)); + return *this; + } + #endif + Json_writer_array& add(const char *value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_str(value); + return *this; + } + Json_writer_array& add(const char *value, size_t num_bytes) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_str(value, num_bytes); + return *this; + } + Json_writer_array& add(const LEX_CSTRING &value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_str(value.str, value.length); + return *this; + } + Json_writer_array& add(Item *value) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_str(value); + return *this; + } + Json_writer_array& add_null() + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_null(); + return *this; + } + Json_writer_array& add_table_name(const JOIN_TAB *tab) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_table_name(tab); + return *this; + } + Json_writer_array& add_table_name(const TABLE *table) + { + DBUG_ASSERT(!closed); + if (my_writer) + context.add_table_name(table); + return *this; + } +}; + +/* + RAII-based class to disable writing into the JSON document + The tracing is disabled as soon as the object is created. + The destuctor is called as soon as we exit the scope of the object + and the tracing is enabled back. +*/ + +class Json_writer_temp_disable +{ +public: + Json_writer_temp_disable(THD *thd_arg); + ~Json_writer_temp_disable(); + THD *thd; +}; + +/* + RAII-based helper class to detect incorrect use of Json_writer. + + The idea is that a function typically must leave Json_writer at the same + identation level as it was when it was invoked. Leaving it at a different + level typically means we forgot to close an object or an array + + So, here is a way to guard + void foo(Json_writer *writer) + { + Json_writer_nesting_guard(writer); + .. do something with writer + + // at the end of the function, ~Json_writer_nesting_guard() is called + // and it makes sure that the nesting is the same as when the function was + // entered. + } +*/ + +class Json_writer_nesting_guard +{ +#ifdef DBUG_OFF +public: + Json_writer_nesting_guard(Json_writer *) {} +#else + Json_writer* writer; + int indent_level; +public: + Json_writer_nesting_guard(Json_writer *writer_arg) : + writer(writer_arg), + indent_level(writer->indent_level) + {} + + ~Json_writer_nesting_guard() + { + DBUG_ASSERT(indent_level == writer->indent_level); + } +#endif +}; + +#endif diff --git a/sql/myskel.m4.in b/sql/myskel.m4.in new file mode 100644 index 00000000..13a39f96 --- /dev/null +++ b/sql/myskel.m4.in @@ -0,0 +1,13 @@ +# +# fix the #line directives in the generated .cc files +# to refer to the original sql_yacc.yy +# +m4_define([b4_syncline], +[b4_sync_start([$1], m4_bpatsubst([$2],[@CMAKE_CURRENT_BINARY_DIR@/yy_[a-z]+\.yy],@CMAKE_CURRENT_SOURCE_DIR@/sql_yacc.yy))[]dnl + +]) + +# try both paths for different bison versions +m4_sinclude(skeletons/c-skel.m4) +m4_sinclude(c-skel.m4) + diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc new file mode 100644 index 00000000..5294b917 --- /dev/null +++ b/sql/mysql_install_db.cc @@ -0,0 +1,983 @@ +/* Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + mysql_install_db creates a new database instance (optionally as service) + on Windows. +*/ +#define DONT_DEFINE_VOID +#include "mariadb.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +struct IUnknown; +#include +#include + +#include + +#define USAGETEXT \ +"mysql_install_db.exe Ver 1.00 for Windows\n" \ +"Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub\n" \ +"This software comes with ABSOLUTELY NO WARRANTY. This is free software,\n" \ +"and you are welcome to modify and redistribute it under the GPL v2 license\n" \ +"Usage: mysql_install_db.exe [OPTIONS]\n" \ +"OPTIONS:" + +extern "C" const char* mysql_bootstrap_sql[]; + +static char default_datadir[MAX_PATH]; +static int create_db_instance(const char *datadir); +static uint opt_silent; +static char datadir_buffer[FN_REFLEN]; +static char mysqld_path[FN_REFLEN]; +static char *opt_datadir; +static char *opt_service; +static char *opt_password; +static int opt_port; +static int opt_innodb_page_size; +static char *opt_socket; +static my_bool opt_default_user; +static my_bool opt_allow_remote_root_access; +static my_bool opt_skip_networking; +static my_bool opt_verbose_bootstrap; +static my_bool verbose_errors; +static my_bool opt_large_pages; +static char *opt_config; + +#define DEFAULT_INNODB_PAGE_SIZE 16*1024 + +static struct my_option my_long_options[]= +{ + {"help", '?', "Display this help message and exit.", 0, 0, 0, GET_NO_ARG, + NO_ARG, 0, 0, 0, 0, 0, 0}, + {"datadir", 'd', "Data directory of the new database", + &opt_datadir, &opt_datadir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"service", 'S', "Name of the Windows service", + &opt_service, &opt_service, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"password", 'p', "Root password", + &opt_password, &opt_password, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"port", 'P', "mysql port", + &opt_port, &opt_port, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"socket", 'W', + "named pipe name (if missing, it will be set the same as service)", + &opt_socket, &opt_socket, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"default-user", 'D', "Create default user", + &opt_default_user, &opt_default_user, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"allow-remote-root-access", 'R', + "Allows remote access from network for user root", + &opt_allow_remote_root_access, &opt_allow_remote_root_access, 0 , GET_BOOL, + OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-networking", 'N', "Do not use TCP connections, use pipe instead", + &opt_skip_networking, &opt_skip_networking, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, + 0, 0}, + { "innodb-page-size", 'i', "Page size for innodb", + &opt_innodb_page_size, &opt_innodb_page_size, 0, GET_INT, REQUIRED_ARG, DEFAULT_INNODB_PAGE_SIZE, 1*1024, 64*1024, 0, 0, 0 }, + {"silent", 's', "Print less information", &opt_silent, + &opt_silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose-bootstrap", 'o', "Include mysqld bootstrap output",&opt_verbose_bootstrap, + &opt_verbose_bootstrap, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "large-pages",'l', "Use large pages", &opt_large_pages, + &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"config",'c', "my.ini config template file", &opt_config, + &opt_config, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static my_bool +get_one_option(const struct my_option *opt, const char *, const char *) +{ + DBUG_ENTER("get_one_option"); + switch (opt->id) { + case '?': + printf("%s\n", USAGETEXT); + my_print_help(my_long_options); + exit(0); + break; + } + DBUG_RETURN(0); +} + + +ATTRIBUTE_NORETURN static void die(const char *fmt, ...) +{ + va_list args; + DBUG_ENTER("die"); + + /* Print the error message */ + va_start(args, fmt); + fprintf(stderr, "FATAL ERROR: "); + vfprintf(stderr, fmt, args); + fputc('\n', stderr); + va_end(args); + my_end(0); + exit(1); +} + + +static void verbose( const char *fmt, ...) +{ + va_list args; + + if (opt_silent) + return; + + /* Print the verbose message */ + va_start(args, fmt); + vfprintf(stdout, fmt, args); + fputc('\n', stdout); + fflush(stdout); + va_end(args); +} + +static char full_config_path[MAX_PATH]; + +int main(int argc, char **argv) +{ + int error; + char self_name[MAX_PATH]; + char *p; + char *datadir = NULL; + MY_INIT(argv[0]); + GetModuleFileName(NULL, self_name, MAX_PATH); + strcpy(mysqld_path,self_name); + p= strrchr(mysqld_path, FN_LIBCHAR); + if (p) + { + strcpy(p, "\\mysqld.exe"); + } + + if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(error); + + if (opt_config != 0 && _access(opt_config, 04) != 0) + { + int err= errno; + switch(err) + { + case EACCES: + die("File %s can't be read", opt_config); + break; + case ENOENT: + die("File %s does not exist", opt_config); + break; + default: + die("Can't access file %s, errno %d",opt_config, err); + break; + } + } + if (opt_config) + { + DWORD dwret = GetFullPathName(opt_config, sizeof(full_config_path), full_config_path, NULL); + if (dwret == 0) + { + die("GetFullPathName failed, last error %u", GetLastError()); + } + else if (dwret > sizeof(full_config_path)) + { + die("Can't resolve the config file name, path too large"); + } + opt_config= full_config_path; + } + + if(opt_datadir) + datadir = opt_datadir; + + if (!datadir && opt_config) + { + for(auto section : {"server","mysqld"}) + { + auto ret = GetPrivateProfileStringA(section,"datadir", NULL, default_datadir, + sizeof(default_datadir)-1, opt_config); + if (ret) + { + datadir= default_datadir; + printf("Data directory (from config file) is %s\n",datadir); + break; + } + } + } + + if (!datadir) + { + /* + Figure out default data directory. It "data" directory, next to "bin" directory, where + mysql_install_db.exe resides. + */ + strcpy(default_datadir, self_name); + p = strrchr(default_datadir, FN_LIBCHAR); + if (p) + { + *p= 0; + p= strrchr(default_datadir, FN_LIBCHAR); + if (p) + *p= 0; + } + if (!p) + { + die("--datadir option not provided, and default datadir not found"); + my_print_help(my_long_options); + } + strcat_s(default_datadir, "\\data"); + datadir= default_datadir; + printf("Default data directory is %s\n",datadir); + } + + DBUG_ASSERT(datadir); + + + /* Workaround WiX bug (strip possible quote character at the end of path) */ + size_t len= strlen(datadir); + if (len > 0) + { + if (datadir[len-1] == '"') + { + datadir[len-1]= 0; + } + if (datadir[0] == '"') + { + datadir++; + } + } + GetFullPathName(datadir, FN_REFLEN, datadir_buffer, NULL); + datadir= datadir_buffer; + + if (create_db_instance(datadir)) + { + die("database creation failed"); + } + + printf("Creation of the database was successful\n"); + return 0; +} + + + +/** + Convert slashes in paths into MySQL-compatible form +*/ + +static void convert_slashes(char *s, char replacement) +{ + for (; *s; s++) + if (*s == '\\' || *s == '/') + *s= replacement; +} + + +/** + Calculate basedir from mysqld.exe path. + Basedir assumed to be is one level up from the mysqld.exe directory location. + E.g basedir for C:\my\bin\mysqld.exe would be C:\my +*/ + +static void get_basedir(char *basedir, int size, const char *mysqld_path, + char slash) +{ + strcpy_s(basedir, size, mysqld_path); + convert_slashes(basedir, '\\'); + char *p= strrchr(basedir, '\\'); + if (p) + { + *p = 0; + p= strrchr(basedir, '\\'); + if (p) + *p= 0; + } +} + +#define STR(s) _STR(s) +#define _STR(s) #s + +static char *get_plugindir() +{ + static char plugin_dir[2*MAX_PATH]; + get_basedir(plugin_dir, sizeof(plugin_dir), mysqld_path, '/'); + safe_strcat(plugin_dir, sizeof(plugin_dir), "/" STR(INSTALL_PLUGINDIR)); + + if (access(plugin_dir, 0) == 0) + return plugin_dir; + + return NULL; +} + +/** + Allocate and initialize command line for mysqld --bootstrap. + The resulting string is passed to popen, so it has a lot of quoting + quoting around the full string plus quoting around parameters with spaces. +*/ + +static char *init_bootstrap_command_line(char *cmdline, size_t size) +{ + snprintf(cmdline, size - 1, + "\"\"%s\"" + " --defaults-file=my.ini" + " %s" + " --bootstrap" + " --datadir=." + " --loose-innodb-buffer-pool-size=20M" + "\"" + , mysqld_path, opt_verbose_bootstrap ? "--console" : ""); + return cmdline; +} + +static char my_ini_path[MAX_PATH]; + +static void write_myini_str(const char *key, const char* val, const char *section="mysqld") +{ + DBUG_ASSERT(my_ini_path[0]); + if (!WritePrivateProfileString(section, key, val, my_ini_path)) + { + die("Can't write to ini file key=%s, val=%s, section=%s, Windows error %u",key,val,section, + GetLastError()); + } +} + + +static void write_myini_int(const char* key, int val, const char* section = "mysqld") +{ + char buf[10]; + itoa(val, buf, 10); + write_myini_str(key, buf, section); +} + +/** + Create my.ini in current directory (this is assumed to be + data directory as well). +*/ + +static int create_myini() +{ + my_bool enable_named_pipe= FALSE; + printf("Creating my.ini file\n"); + + char path_buf[MAX_PATH]; + GetCurrentDirectory(MAX_PATH, path_buf); + snprintf(my_ini_path,sizeof(my_ini_path), "%s\\my.ini", path_buf); + if (opt_config) + { + if (!CopyFile(opt_config, my_ini_path,TRUE)) + { + die("Can't copy %s to my.ini , last error %lu", opt_config, GetLastError()); + } + } + + /* Write out server settings. */ + convert_slashes(path_buf,'/'); + write_myini_str("datadir",path_buf); + + if (opt_skip_networking) + { + write_myini_str("skip-networking","ON"); + if (!opt_socket) + opt_socket= opt_service; + } + enable_named_pipe= (my_bool) + ((opt_socket && opt_socket[0]) || opt_skip_networking); + + if (enable_named_pipe) + { + write_myini_str("named-pipe","ON"); + } + + if (opt_socket && opt_socket[0]) + { + write_myini_str("socket", opt_socket); + } + if (opt_port) + { + write_myini_int("port", opt_port); + } + if (opt_innodb_page_size != DEFAULT_INNODB_PAGE_SIZE) + { + write_myini_int("innodb-page-size", opt_innodb_page_size); + } + if (opt_large_pages) + { + write_myini_str("large-pages","ON"); + } + + /* Write out client settings. */ + + /* Used for named pipes */ + if (opt_socket && opt_socket[0]) + write_myini_str("socket",opt_socket,"client"); + if (opt_skip_networking) + write_myini_str("protocol", "pipe", "client"); + else if (opt_port) + write_myini_int("port",opt_port,"client"); + + char *plugin_dir = get_plugindir(); + if (plugin_dir) + write_myini_str("plugin-dir", plugin_dir, "client"); + return 0; +} + + +static constexpr const char* update_root_passwd= + "UPDATE mysql.global_priv SET priv=json_set(priv," + "'$.password_last_changed', UNIX_TIMESTAMP()," + "'$.plugin','mysql_native_password'," + "'$.authentication_string','%s'," + "'$.auth_or', json_array(json_object(), json_object('plugin', 'gssapi','authentication_string','SID:BA'))" + ") where User= 'root';\n "; + +static constexpr char remove_default_user_cmd[]= + "DELETE FROM mysql.user where User='';\n"; +static constexpr char allow_remote_root_access_cmd[]= + "CREATE TEMPORARY TABLE tmp_user LIKE global_priv;\n" + "INSERT INTO tmp_user SELECT * from global_priv where user='root' " + " AND host='localhost';\n" + "UPDATE tmp_user SET host='%';\n" + "INSERT INTO global_priv SELECT * FROM tmp_user;\n" + "DROP TABLE tmp_user;\n"; +static const char end_of_script[]="-- end."; + +/* +Add or remove privilege for a user +@param[in] account_name - user name, Windows style, e.g "NT SERVICE\mariadb", or ".\joe" +@param[in] privilege name - standard Windows privilege name, e.g "SeLockMemoryPrivilege" +@param[in] add - when true, add privilege, otherwise remove it + +In special case where privilege name is NULL, and add is false +all privileges for the user are removed. +*/ +static int handle_user_privileges(const char *account_name, const wchar_t *privilege_name, bool add) +{ + LSA_OBJECT_ATTRIBUTES attr{}; + LSA_HANDLE lsa_handle; + auto status= LsaOpenPolicy( + 0, &attr, POLICY_LOOKUP_NAMES | POLICY_CREATE_ACCOUNT, &lsa_handle); + if (status) + { + verbose("LsaOpenPolicy returned %lu", LsaNtStatusToWinError(status)); + return 1; + } + BYTE sidbuf[SECURITY_MAX_SID_SIZE]; + PSID sid= (PSID) sidbuf; + SID_NAME_USE name_use; + char domain_name[256]; + DWORD cbSid= sizeof(sidbuf); + DWORD cbDomain= sizeof(domain_name); + BOOL ok= LookupAccountNameA(0, account_name, sid, &cbSid, domain_name, + &cbDomain, &name_use); + if (!ok) + { + verbose("LsaOpenPolicy returned %lu", LsaNtStatusToWinError(status)); + return 1; + } + + if (privilege_name) + { + LSA_UNICODE_STRING priv{}; + priv.Buffer= (PWSTR) privilege_name; + priv.Length= (USHORT) wcslen(privilege_name) * sizeof(wchar_t); + priv.MaximumLength= priv.Length; + if (add) + { + status= LsaAddAccountRights(lsa_handle, sid, &priv, 1); + if (status) + { + verbose("LsaAddAccountRights returned %lu/%lu", status, + LsaNtStatusToWinError(status)); + return 1; + } + } + else + { + status= LsaRemoveAccountRights(lsa_handle, sid, FALSE, &priv, 1); + if (status) + { + verbose("LsaRemoveRights returned %lu/%lu", + LsaNtStatusToWinError(status)); + return 1; + } + } + } + else + { + DBUG_ASSERT(!add); + status= LsaRemoveAccountRights(lsa_handle, sid, TRUE, 0, 0); + } + LsaClose(lsa_handle); + return 0; +} + +/* Register service. Assume my.ini is in datadir */ + +static int register_service(const char *datadir, const char *user, const char *passwd) +{ + char buf[3*MAX_PATH +32]; /* path to mysqld.exe, to my.ini, service name */ + SC_HANDLE sc_manager, sc_service; + + size_t datadir_len= strlen(datadir); + const char *backslash_after_datadir= "\\"; + + if (datadir_len && datadir[datadir_len-1] == '\\') + backslash_after_datadir= ""; + + verbose("Registering service '%s'", opt_service); + my_snprintf(buf, sizeof(buf)-1, + "\"%s\" \"--defaults-file=%s%smy.ini\" \"%s\"" , mysqld_path, datadir, + backslash_after_datadir, opt_service); + + /* Get a handle to the SCM database. */ + sc_manager= OpenSCManager( NULL, NULL, SC_MANAGER_ALL_ACCESS); + if (!sc_manager) + { + die("OpenSCManager failed (%u)\n", GetLastError()); + } + + /* Create the service. */ + sc_service= CreateService(sc_manager, opt_service, opt_service, + SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, + SERVICE_ERROR_NORMAL, buf, NULL, NULL, NULL, user, passwd); + + if (!sc_service) + { + CloseServiceHandle(sc_manager); + die("CreateService failed (%u)", GetLastError()); + } + char description[] = "MariaDB database server"; + SERVICE_DESCRIPTION sd= { description }; + ChangeServiceConfig2(sc_service, SERVICE_CONFIG_DESCRIPTION, &sd); + CloseServiceHandle(sc_service); + CloseServiceHandle(sc_manager); + return 0; +} + + +static void clean_directory(const char *dir) +{ + char dir2[MAX_PATH + 4]= {}; + snprintf(dir2, MAX_PATH+2, "%s\\*", dir); + + SHFILEOPSTRUCT fileop; + fileop.hwnd= NULL; /* no status display */ + fileop.wFunc= FO_DELETE; /* delete operation */ + fileop.pFrom= dir2; /* source file name as double null terminated string */ + fileop.pTo= NULL; /* no destination needed */ + fileop.fFlags= FOF_NOCONFIRMATION|FOF_SILENT; /* do not prompt the user */ + + + fileop.fAnyOperationsAborted= FALSE; + fileop.lpszProgressTitle= NULL; + fileop.hNameMappings= NULL; + + SHFileOperation(&fileop); +} + + +/* + Define directory permission to have inheritable all access for a user + (defined as username or group string or as SID) +*/ + +static int set_directory_permissions(const char *dir, const char *os_user, + DWORD permission) +{ + + struct{ + TOKEN_USER tokenUser; + BYTE buffer[SECURITY_MAX_SID_SIZE]; + } tokenInfoBuffer; + + HANDLE hDir= CreateFile(dir,READ_CONTROL|WRITE_DAC,0,NULL,OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS,NULL); + if (hDir == INVALID_HANDLE_VALUE) + return -1; + ACL* pOldDACL; + SECURITY_DESCRIPTOR* pSD= NULL; + EXPLICIT_ACCESS ea={0}; + WELL_KNOWN_SID_TYPE wellKnownSidType = WinNullSid; + PSID pSid= NULL; + + GetSecurityInfo(hDir, SE_FILE_OBJECT , DACL_SECURITY_INFORMATION,NULL, NULL, + &pOldDACL, NULL, (void**)&pSD); + + if (os_user) + { + /* Check for 3 predefined service users + They might have localized names in non-English Windows, thus they need + to be handled using well-known SIDs. + */ + if (stricmp(os_user, "NT AUTHORITY\\NetworkService") == 0) + { + wellKnownSidType= WinNetworkServiceSid; + } + else if (stricmp(os_user, "NT AUTHORITY\\LocalService") == 0) + { + wellKnownSidType= WinLocalServiceSid; + } + else if (stricmp(os_user, "NT AUTHORITY\\LocalSystem") == 0) + { + wellKnownSidType= WinLocalSystemSid; + } + + if (wellKnownSidType != WinNullSid) + { + DWORD size= SECURITY_MAX_SID_SIZE; + pSid= (PSID)tokenInfoBuffer.buffer; + if (!CreateWellKnownSid(wellKnownSidType, NULL, pSid, + &size)) + { + return 1; + } + ea.Trustee.TrusteeForm= TRUSTEE_IS_SID; + ea.Trustee.ptstrName= (LPTSTR)pSid; + } + else + { + ea.Trustee.TrusteeForm= TRUSTEE_IS_NAME; + ea.Trustee.ptstrName= (LPSTR)os_user; + } + } + else + { + HANDLE token; + if (OpenProcessToken(GetCurrentProcess(),TOKEN_QUERY, &token)) + { + + DWORD length= (DWORD) sizeof(tokenInfoBuffer); + if (GetTokenInformation(token, TokenUser, &tokenInfoBuffer, + length, &length)) + { + pSid= tokenInfoBuffer.tokenUser.User.Sid; + } + } + if (!pSid) + return 0; + ea.Trustee.TrusteeForm= TRUSTEE_IS_SID; + ea.Trustee.ptstrName= (LPTSTR)pSid; + } + ea.Trustee.TrusteeType= TRUSTEE_IS_UNKNOWN; + ea.grfAccessMode= GRANT_ACCESS; + ea.grfAccessPermissions= permission; + ea.grfInheritance= CONTAINER_INHERIT_ACE | OBJECT_INHERIT_ACE; + ACL *pNewDACL= 0; + + ACCESS_MASK access_mask; + if (GetEffectiveRightsFromAcl(pOldDACL, &ea.Trustee, &access_mask) != ERROR_SUCCESS + || (access_mask & permission) != permission) + { + SetEntriesInAcl(1, &ea, pOldDACL, &pNewDACL); + } + + if (pNewDACL) + { + SetSecurityInfo(hDir,SE_FILE_OBJECT,DACL_SECURITY_INFORMATION,NULL, NULL, + pNewDACL, NULL); + } + if (pSD != NULL) + LocalFree((HLOCAL) pSD); + if (pNewDACL != NULL) + LocalFree((HLOCAL) pNewDACL); + CloseHandle(hDir); + return 0; +} + +static void set_permissions(const char *datadir, const char *service_user) +{ + /* + Set data directory permissions for both current user and + the one who who runs services. + */ + set_directory_permissions(datadir, NULL, + FILE_GENERIC_READ | FILE_GENERIC_WRITE); + if (!service_user) + return; + + /* Datadir permission for the service. */ + set_directory_permissions(datadir, service_user, FILE_ALL_ACCESS); + char basedir[MAX_PATH]; + char path[MAX_PATH]; + + struct + { + const char *subdir; + DWORD perm; + } all_subdirs[]= { + {STR(INSTALL_PLUGINDIR), FILE_GENERIC_READ | FILE_GENERIC_EXECUTE}, + {STR(INSTALL_SHAREDIR), FILE_GENERIC_READ}, + }; + + + if (strncmp(service_user,"NT SERVICE\\",sizeof("NT SERVICE\\")-1) == 0) + { + /* + Read and execute permission for executables can/should be given + to any service account, rather than specific one. + */ + service_user="NT SERVICE\\ALL SERVICES"; + } + + get_basedir(basedir, sizeof(basedir), mysqld_path, '\\'); + for (int i= 0; i < array_elements(all_subdirs); i++) + { + auto subdir= + snprintf(path, sizeof(path), "%s\\%s", basedir, all_subdirs[i].subdir); + if (access(path, 0) == 0) + { + set_directory_permissions(path, service_user, all_subdirs[i].perm); + } + } + + /* Bindir, the directory where mysqld_path is located. */ + strcpy_s(path, mysqld_path); + char *end= strrchr(path, '/'); + if (!end) + end= strrchr(path, '\\'); + if (end) + *end= 0; + if (access(path, 0) == 0) + { + set_directory_permissions(path, service_user, + FILE_GENERIC_READ | FILE_GENERIC_EXECUTE); + } +} + +/* Create database instance (including registering as service etc) .*/ + +static int create_db_instance(const char *datadir) +{ + int ret= 0; + char cwd[MAX_PATH]; + DWORD cwd_len= MAX_PATH; + char cmdline[3*MAX_PATH]; + FILE *in; + bool created_datadir= false; + DWORD last_error; + bool service_created= false; + std::string mysql_db_dir; + + verbose("Running bootstrap"); + + GetCurrentDirectory(cwd_len, cwd); + + /* Create datadir and datadir/mysql, if they do not already exist. */ + + if (CreateDirectory(datadir, NULL)) + { + created_datadir= true; + } + else if (GetLastError() != ERROR_ALREADY_EXISTS) + { + last_error = GetLastError(); + switch(last_error) + { + case ERROR_ACCESS_DENIED: + die("Can't create data directory '%s' (access denied)\n", + datadir); + break; + case ERROR_PATH_NOT_FOUND: + die("Can't create data directory '%s' " + "(one or more intermediate directories do not exist)\n", + datadir); + break; + default: + die("Can't create data directory '%s', last error %u\n", + datadir, last_error); + break; + } + } + + if (!SetCurrentDirectory(datadir)) + { + last_error = GetLastError(); + switch (last_error) + { + case ERROR_DIRECTORY: + die("Can't set current directory to '%s', the path is not a valid directory \n", + datadir); + break; + default: + die("Can' set current directory to '%s', last error %u\n", + datadir, last_error); + break; + } + } + + if (!PathIsDirectoryEmpty(datadir)) + { + fprintf(stderr, "ERROR : Data directory %s is not empty." + " Only new or empty existing directories are accepted for --datadir\n", datadir); + exit(1); + } + + std::string service_user; + /* Register service if requested. */ + if (opt_service && opt_service[0]) + { + /* Run service under virtual account NT SERVICE\service_name.*/ + service_user.append("NT SERVICE\\").append(opt_service); + ret = register_service(datadir, service_user.c_str(), NULL); + if (ret) + goto end; + service_created = true; + } + + set_permissions(datadir, service_user.c_str()); + + if (opt_large_pages) + { + handle_user_privileges(service_user.c_str(), L"SeLockMemoryPrivilege", true); + } + + /* + Get security descriptor for the data directory. + It will be passed, as SDDL text, to the mysqld bootstrap subprocess, + to allow for correct subdirectory permissions. + */ + PSECURITY_DESCRIPTOR pSD; + if (GetNamedSecurityInfoA(datadir, SE_FILE_OBJECT, DACL_SECURITY_INFORMATION, + 0, 0, 0, 0, &pSD) == ERROR_SUCCESS) + { + char* string_sd = NULL; + if (ConvertSecurityDescriptorToStringSecurityDescriptor(pSD, SDDL_REVISION_1, + DACL_SECURITY_INFORMATION, &string_sd, 0)) + { + _putenv_s("MARIADB_NEW_DIRECTORY_SDDL", string_sd); + LocalFree(string_sd); + } + LocalFree(pSD); + } + + /* Create my.ini file in data directory.*/ + ret = create_myini(); + if (ret) + goto end; + + /* Do mysqld --bootstrap. */ + init_bootstrap_command_line(cmdline, sizeof(cmdline)); + + if(opt_verbose_bootstrap) + printf("Executing %s\n", cmdline); + + in= popen(cmdline, "wt"); + if (!in) + goto end; + + if (setvbuf(in, NULL, _IONBF, 0)) + { + verbose("WARNING: Can't disable buffering on mysqld's stdin"); + } + static const char *pre_bootstrap_sql[] = { "create database mysql;\n","use mysql;\n"}; + for (auto cmd : pre_bootstrap_sql) + { + /* Write the bootstrap script to stdin. */ + if (fwrite(cmd, strlen(cmd), 1, in) != 1) + { + verbose("ERROR: Can't write to mysqld's stdin"); + ret= 1; + goto end; + } + } + + for (int i= 0; mysql_bootstrap_sql[i]; i++) + { + auto cmd = mysql_bootstrap_sql[i]; + /* Write the bootstrap script to stdin. */ + if (fwrite(cmd, strlen(cmd), 1, in) != 1) + { + verbose("ERROR: Can't write to mysqld's stdin"); + ret= 1; + goto end; + } + } + + /* Remove default user, if requested. */ + if (!opt_default_user) + { + verbose("Removing default user",remove_default_user_cmd); + fputs(remove_default_user_cmd, in); + fflush(in); + } + + if (opt_allow_remote_root_access) + { + verbose("Allowing remote access for user root",remove_default_user_cmd); + fputs(allow_remote_root_access_cmd,in); + fflush(in); + } + + /* Change root password if requested. */ + if (opt_password && opt_password[0]) + { + verbose("Setting root password"); + char buf[2 * MY_SHA1_HASH_SIZE + 2]; + my_make_scrambled_password(buf, opt_password, strlen(opt_password)); + fprintf(in, update_root_passwd, buf); + fflush(in); + } + + /* + On some reason, bootstrap chokes if last command sent via stdin ends with + newline, so we supply a dummy comment, that does not end with newline. + */ + fputs(end_of_script, in); + fflush(in); + + /* Check if bootstrap has completed successfully. */ + ret= pclose(in); + if (ret) + { + verbose("mysqld returned error %d in pclose",ret); + goto end; + } + +end: + if (!ret) + return ret; + + /* Cleanup after error.*/ + if (created_datadir) + { + SetCurrentDirectory(cwd); + clean_directory(datadir); + } + + if (service_created) + { + auto sc_manager = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS); + if (sc_manager) + { + auto sc_handle= OpenService(sc_manager,opt_service, DELETE); + if (sc_handle) + { + DeleteService(sc_handle); + CloseServiceHandle(sc_handle); + } + CloseServiceHandle(sc_manager); + } + + /*Remove all service user privileges for the user.*/ + if(strncmp(service_user.c_str(), "NT SERVICE\\", + sizeof("NT SERVICE\\")-1)) + { + handle_user_privileges(service_user.c_str(), 0, false); + } + if (created_datadir) + RemoveDirectory(opt_datadir); + } + return ret; +} diff --git a/sql/mysql_upgrade_service.cc b/sql/mysql_upgrade_service.cc new file mode 100644 index 00000000..02fae11a --- /dev/null +++ b/sql/mysql_upgrade_service.cc @@ -0,0 +1,613 @@ +/* Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + mysql_upgrade_service upgrades mysql service on Windows. + It changes service definition to point to the new mysqld.exe, restarts the + server and runs mysql_upgrade +*/ + +#define DONT_DEFINE_VOID +#include "mariadb.h" +#include +#include +#include +#include +#include +#include + +#include +#include + +extern int upgrade_config_file(const char *myini_path); + +/* We're using version APIs */ +#pragma comment(lib, "version") + +#define USAGETEXT \ +"mysql_upgrade_service.exe Ver 1.00 for Windows\n" \ +"Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub" \ +"This software comes with ABSOLUTELY NO WARRANTY. This is free software,\n" \ +"and you are welcome to modify and redistribute it under the GPL v2 license\n" \ +"Usage: mysql_upgrade_service.exe [OPTIONS]\n" \ +"OPTIONS:" + +static char mysqld_path[MAX_PATH]; +static char mysqladmin_path[MAX_PATH]; +static char mysqlupgrade_path[MAX_PATH]; + +static char defaults_file_param[MAX_PATH + 16]; /*--defaults-file= */ +static char logfile_path[MAX_PATH]; +char my_ini_bck[MAX_PATH]; +mysqld_service_properties service_properties; +static char *opt_service; +static SC_HANDLE service; +static SC_HANDLE scm; +HANDLE mysqld_process; // mysqld.exe started for upgrade +DWORD initial_service_state= UINT_MAX; // initial state of the service +HANDLE logfile_handle; + +/* + Startup and shutdown timeouts, in seconds. + Maybe,they can be made parameters +*/ +static unsigned int startup_timeout= 60; +static unsigned int shutdown_timeout= 60*60; + +static struct my_option my_long_options[]= +{ + {"help", '?', "Display this help message and exit.", 0, 0, 0, GET_NO_ARG, + NO_ARG, 0, 0, 0, 0, 0, 0}, + {"service", 'S', "Name of the existing Windows service", + &opt_service, &opt_service, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + + +static my_bool +get_one_option(const struct my_option *opt, const char *, const char *) +{ + DBUG_ENTER("get_one_option"); + switch (opt->id) { + case '?': + printf("%s\n", USAGETEXT); + my_print_help(my_long_options); + exit(0); + break; + } + DBUG_RETURN(0); +} + + + +static void log(const char *fmt, ...) +{ + va_list args; + /* Print the error message */ + va_start(args, fmt); + vfprintf(stdout,fmt, args); + va_end(args); + fputc('\n', stdout); + fflush(stdout); +} + + +static void die(const char *fmt, ...) +{ + va_list args; + DBUG_ENTER("die"); + + /* Print the error message */ + va_start(args, fmt); + + fprintf(stderr, "FATAL ERROR: "); + vfprintf(stderr, fmt, args); + fputc('\n', stderr); + if (logfile_path[0]) + { + fprintf(stderr, "Additional information can be found in the log file %s", + logfile_path); + } + va_end(args); + fputc('\n', stderr); + fflush(stdout); + /* Cleanup */ + + if (my_ini_bck[0]) + { + MoveFileEx(my_ini_bck, service_properties.inifile,MOVEFILE_REPLACE_EXISTING); + } + + /* + Stop service that we started, if it was not initially running at + program start. + */ + if (initial_service_state != UINT_MAX && initial_service_state != SERVICE_RUNNING) + { + SERVICE_STATUS service_status; + ControlService(service, SERVICE_CONTROL_STOP, &service_status); + } + + if (scm) + CloseServiceHandle(scm); + if (service) + CloseServiceHandle(service); + /* Stop mysqld.exe, if it was started for upgrade */ + if (mysqld_process) + TerminateProcess(mysqld_process, 3); + if (logfile_handle) + CloseHandle(logfile_handle); + my_end(0); + + exit(1); +} + +#define WRITE_LOG(fmt,...) {\ + char log_buf[1024]; \ + DWORD nbytes; \ + snprintf(log_buf,sizeof(log_buf), fmt, __VA_ARGS__);\ + WriteFile(logfile_handle,log_buf, (DWORD)strlen(log_buf), &nbytes , 0);\ +} + +/* + spawn-like function to run subprocesses. + We also redirect the full output to the log file. + + Typical usage could be something like + run_tool(P_NOWAIT, "cmd.exe", "/c" , "echo", "foo", NULL) + + @param wait_flag (P_WAIT or P_NOWAIT) + @program program to run + + Rest of the parameters is NULL terminated strings building command line. + + @return intptr containing either process handle, if P_NOWAIT is used + or return code of the process (if P_WAIT is used) +*/ + +static intptr_t run_tool(int wait_flag, const char *program,...) +{ + static char cmdline[32*1024]; + char *end; + va_list args; + va_start(args, program); + if (!program) + die("Invalid call to run_tool"); + end= strxmov(cmdline, "\"", program, "\"", NullS); + + for(;;) + { + char *param= va_arg(args,char *); + if(!param) + break; + end= strxmov(end, " \"", param, "\"", NullS); + } + va_end(args); + + /* Create output file if not alredy done */ + if (!logfile_handle) + { + char tmpdir[FN_REFLEN]; + GetTempPath(FN_REFLEN, tmpdir); + sprintf_s(logfile_path, "%smysql_upgrade_service.%s.log", tmpdir, + opt_service); + SECURITY_ATTRIBUTES attr= {0}; + attr.nLength= sizeof(SECURITY_ATTRIBUTES); + attr.bInheritHandle= TRUE; + logfile_handle= CreateFile(logfile_path, FILE_APPEND_DATA, + FILE_SHARE_READ|FILE_SHARE_WRITE, &attr, CREATE_ALWAYS, 0, NULL); + if (logfile_handle == INVALID_HANDLE_VALUE) + { + die("Cannot open log file %s, windows error %u", + logfile_path, GetLastError()); + } + } + + WRITE_LOG("Executing %s\r\n", cmdline); + + /* Start child process */ + STARTUPINFO si= {0}; + si.cb= sizeof(si); + si.hStdInput= GetStdHandle(STD_INPUT_HANDLE); + si.hStdError= logfile_handle; + si.hStdOutput= logfile_handle; + si.dwFlags= STARTF_USESTDHANDLES; + PROCESS_INFORMATION pi; + if (!CreateProcess(NULL, cmdline, NULL, + NULL, TRUE, NULL, NULL, NULL, &si, &pi)) + { + die("CreateProcess failed (commandline %s)", cmdline); + } + CloseHandle(pi.hThread); + + if (wait_flag == P_NOWAIT) + { + /* Do not wait for process to complete, return handle. */ + return (intptr_t)pi.hProcess; + } + + /* Wait for process to complete. */ + if (WaitForSingleObject(pi.hProcess, INFINITE) != WAIT_OBJECT_0) + { + die("WaitForSingleObject() failed"); + } + DWORD exit_code; + if (!GetExitCodeProcess(pi.hProcess, &exit_code)) + { + die("GetExitCodeProcess() failed"); + } + return (intptr_t)exit_code; +} + + +void stop_mysqld_service() +{ + DWORD needed; + SERVICE_STATUS_PROCESS ssp; + int timeout= shutdown_timeout*1000; + for(;;) + { + if (!QueryServiceStatusEx(service, SC_STATUS_PROCESS_INFO, + (LPBYTE)&ssp, + sizeof(SERVICE_STATUS_PROCESS), + &needed)) + { + die("QueryServiceStatusEx failed (%u)\n", GetLastError()); + } + + /* + Remember initial state of the service, we will restore it on + exit. + */ + if(initial_service_state == UINT_MAX) + initial_service_state= ssp.dwCurrentState; + + switch(ssp.dwCurrentState) + { + case SERVICE_STOPPED: + return; + case SERVICE_RUNNING: + if(!ControlService(service, SERVICE_CONTROL_STOP, + (SERVICE_STATUS *)&ssp)) + die("ControlService failed, error %u\n", GetLastError()); + case SERVICE_START_PENDING: + case SERVICE_STOP_PENDING: + if(timeout < 0) + die("Service does not stop after %d seconds timeout",shutdown_timeout); + Sleep(100); + timeout -= 100; + break; + default: + die("Unexpected service state %d",ssp.dwCurrentState); + } + } +} + + +/* + Shutdown mysql server. Not using mysqladmin, since + our --skip-grant-tables do not work anymore after mysql_upgrade + that does "flush privileges". Instead, the shutdown event is set. +*/ +void initiate_mysqld_shutdown() +{ + char event_name[32]; + DWORD pid= GetProcessId(mysqld_process); + sprintf_s(event_name, "MySQLShutdown%d", pid); + HANDLE shutdown_handle= OpenEvent(EVENT_MODIFY_STATE, FALSE, event_name); + if(!shutdown_handle) + { + die("OpenEvent() failed for shutdown event"); + } + + if(!SetEvent(shutdown_handle)) + { + die("SetEvent() failed"); + } +} + +static void get_service_config() +{ + scm = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS); + if (!scm) + die("OpenSCManager failed with %u", GetLastError()); + service = OpenService(scm, opt_service, SERVICE_ALL_ACCESS); + if (!service) + die("OpenService failed with %u", GetLastError()); + + BYTE config_buffer[8 * 1024]; + LPQUERY_SERVICE_CONFIGW config = (LPQUERY_SERVICE_CONFIGW)config_buffer; + DWORD size = sizeof(config_buffer); + DWORD needed; + if (!QueryServiceConfigW(service, config, size, &needed)) + die("QueryServiceConfig failed with %u", GetLastError()); + + if (get_mysql_service_properties(config->lpBinaryPathName, &service_properties)) + { + die("Not a valid MySQL service"); + } + + int my_major = MYSQL_VERSION_ID / 10000; + int my_minor = (MYSQL_VERSION_ID % 10000) / 100; + int my_patch = MYSQL_VERSION_ID % 100; + + if (my_major < service_properties.version_major || + (my_major == service_properties.version_major && my_minor < service_properties.version_minor)) + { + die("Can not downgrade, the service is currently running as version %d.%d.%d" + ", my version is %d.%d.%d", service_properties.version_major, service_properties.version_minor, + service_properties.version_patch, my_major, my_minor, my_patch); + } + if (service_properties.inifile[0] == 0) + { + /* + Weird case, no --defaults-file in service definition, need to create one. + */ + sprintf_s(service_properties.inifile, MAX_PATH, "%s\\my.ini", service_properties.datadir); + } + sprintf(defaults_file_param, "--defaults-file=%s", service_properties.inifile); +} +/* + Change service configuration (binPath) to point to mysqld from + this installation. +*/ +static void change_service_config() +{ + char buf[MAX_PATH]; + char commandline[3 * MAX_PATH + 19]; + int i; + + /* + Write datadir to my.ini, after converting backslashes to + unix style slashes. + */ + if (service_properties.datadir[0]) + { + strcpy_s(buf, MAX_PATH, service_properties.datadir); + for (i= 0; buf[i]; i++) + { + if (buf[i] == '\\') + buf[i]= '/'; + } + WritePrivateProfileString("mysqld", "datadir", buf, + service_properties.inifile); + } + + /* + Remove basedir from defaults file, otherwise the service wont come up in + the new version, and will complain about mismatched message file. + */ + WritePrivateProfileString("mysqld", "basedir",NULL, service_properties.inifile); + + sprintf(defaults_file_param,"--defaults-file=%s", service_properties.inifile); + sprintf_s(commandline, "\"%s\" \"%s\" \"%s\"", mysqld_path, + defaults_file_param, opt_service); + if (!ChangeServiceConfig(service, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, + SERVICE_NO_CHANGE, commandline, NULL, NULL, NULL, NULL, NULL, NULL)) + { + die("ChangeServiceConfig failed with %u", GetLastError()); + } + +} + + +int main(int argc, char **argv) +{ + int error; + MY_INIT(argv[0]); + char bindir[FN_REFLEN]; + char *p; + + /* Parse options */ + if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) + die(""); + if (!opt_service) + die("--service=# parameter is mandatory"); + + /* + Get full path to mysqld, we need it when changing service configuration. + Assume installation layout, i.e mysqld.exe, mysqladmin.exe, mysqlupgrade.exe + and mysql_upgrade_service.exe are in the same directory. + */ + GetModuleFileName(NULL, bindir, FN_REFLEN); + p= strrchr(bindir, FN_LIBCHAR); + if(p) + { + *p= 0; + } + sprintf_s(mysqld_path, "%s\\mysqld.exe", bindir); + sprintf_s(mysqladmin_path, "%s\\mysqladmin.exe", bindir); + sprintf_s(mysqlupgrade_path, "%s\\mysql_upgrade.exe", bindir); + + char *paths[]= {mysqld_path, mysqladmin_path, mysqlupgrade_path}; + for(int i= 0; i< 3;i++) + { + if(GetFileAttributes(paths[i]) == INVALID_FILE_ATTRIBUTES) + die("File %s does not exist", paths[i]); + } + + /* + Messages written on stdout should not be buffered, GUI upgrade program + reads them from pipe and uses as progress indicator. + */ + setvbuf(stdout, NULL, _IONBF, 0); + int phase = 0; + int max_phases=10; + get_service_config(); + + bool my_ini_exists; + bool old_mysqld_exe_exists; + + log("Phase %d/%d: Stopping service", ++phase,max_phases); + stop_mysqld_service(); + + my_ini_exists = (GetFileAttributes(service_properties.inifile) != INVALID_FILE_ATTRIBUTES); + if (!my_ini_exists) + { + HANDLE h = CreateFile(service_properties.inifile, GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, + 0, CREATE_NEW, 0 ,0); + if (h != INVALID_HANDLE_VALUE) + { + CloseHandle(h); + } + else if (GetLastError() != ERROR_FILE_EXISTS) + { + die("Can't create ini file %s, last error %u", service_properties.inifile, GetLastError()); + } + } + + old_mysqld_exe_exists= (GetFileAttributes(service_properties.mysqld_exe) != + INVALID_FILE_ATTRIBUTES); + bool do_start_stop_server = old_mysqld_exe_exists && initial_service_state != SERVICE_RUNNING; + + log("Phase %d/%d: Start and stop server in the old version, to avoid crash recovery %s", ++phase, max_phases, + do_start_stop_server?",this can take some time":"(skipped)"); + + char socket_param[FN_REFLEN]; + sprintf_s(socket_param, "--socket=mysql_upgrade_service_%u", + GetCurrentProcessId()); + + DWORD start_duration_ms = 0; + + if (do_start_stop_server) + { + /* Start/stop server with --loose-innodb-fast-shutdown=1 */ + mysqld_process = (HANDLE)run_tool(P_NOWAIT, service_properties.mysqld_exe, + defaults_file_param, "--loose-innodb-fast-shutdown=1", "--skip-networking", + "--enable-named-pipe", socket_param, "--skip-slave-start", NULL); + + if (mysqld_process == INVALID_HANDLE_VALUE) + { + die("Cannot start mysqld.exe process, last error =%u", GetLastError()); + } + char pipe_name[64]; + snprintf(pipe_name, sizeof(pipe_name), "\\\\.\\pipe\\mysql_upgrade_service_%lu", + GetCurrentProcessId()); + for (;;) + { + if (WaitForSingleObject(mysqld_process, 0) != WAIT_TIMEOUT) + die("mysqld.exe did not start"); + + if (WaitNamedPipe(pipe_name, 0)) + { + // Server started, shut it down. + initiate_mysqld_shutdown(); + if (WaitForSingleObject((HANDLE)mysqld_process, shutdown_timeout * 1000) != WAIT_OBJECT_0) + { + die("Could not shutdown server started with '--innodb-fast-shutdown=0'"); + } + DWORD exit_code; + if (!GetExitCodeProcess((HANDLE)mysqld_process, &exit_code)) + { + die("Could not get mysqld's exit code"); + } + if (exit_code) + { + die("Could not get successfully shutdown mysqld"); + } + CloseHandle(mysqld_process); + break; + } + Sleep(500); + start_duration_ms += 500; + } + } + + log("Phase %d/%d: Fixing server config file%s", ++phase, max_phases, + my_ini_exists ? "" : "(skipped)"); + snprintf(my_ini_bck, sizeof(my_ini_bck), "%s.BCK", + service_properties.inifile); + CopyFile(service_properties.inifile, my_ini_bck, FALSE); + upgrade_config_file(service_properties.inifile); + + /* + Start mysqld.exe as non-service skipping privileges (so we do not + care about the password). But disable networking and enable pipe + for communication, for security reasons. + */ + + log("Phase %d/%d: Starting mysqld for upgrade",++phase,max_phases); + mysqld_process= (HANDLE)run_tool(P_NOWAIT, mysqld_path, + defaults_file_param, "--skip-networking", "--skip-grant-tables", + "--enable-named-pipe", socket_param,"--skip-slave-start", NULL); + + if (mysqld_process == INVALID_HANDLE_VALUE) + { + die("Cannot start mysqld.exe process, errno=%d", errno); + } + + log("Phase %d/%d: Waiting for startup to complete",++phase,max_phases); + start_duration_ms= 0; + for(;;) + { + if (WaitForSingleObject(mysqld_process, 0) != WAIT_TIMEOUT) + die("mysqld.exe did not start"); + + if (run_tool(P_WAIT, mysqladmin_path, "--protocol=pipe", socket_param, + "ping", "--no-beep", NULL) == 0) + { + break; + } + if (start_duration_ms > startup_timeout*1000) + die("Server did not come up in %d seconds",startup_timeout); + Sleep(500); + start_duration_ms+= 500; + } + + log("Phase %d/%d: Running mysql_upgrade",++phase,max_phases); + int upgrade_err= (int) run_tool(P_WAIT, mysqlupgrade_path, + "--protocol=pipe", "--force", socket_param, + NULL); + + if (upgrade_err) + die("mysql_upgrade failed with error code %d\n", upgrade_err); + + log("Phase %d/%d: Changing service configuration", ++phase, max_phases); + change_service_config(); + + log("Phase %d/%d: Initiating server shutdown",++phase, max_phases); + initiate_mysqld_shutdown(); + + log("Phase %d/%d: Waiting for shutdown to complete",++phase, max_phases); + if (WaitForSingleObject(mysqld_process, shutdown_timeout*1000) + != WAIT_OBJECT_0) + { + /* Shutdown takes too long */ + die("mysqld does not shutdown."); + } + CloseHandle(mysqld_process); + mysqld_process= NULL; + + log("Phase %d/%d: Starting service%s",++phase,max_phases, + (initial_service_state == SERVICE_RUNNING)?"":" (skipped)"); + if (initial_service_state == SERVICE_RUNNING) + { + StartService(service, NULL, NULL); + } + + log("Service '%s' successfully upgraded.\nLog file is written to %s", + opt_service, logfile_path); + CloseServiceHandle(service); + CloseServiceHandle(scm); + if (logfile_handle) + CloseHandle(logfile_handle); + if(my_ini_bck[0]) + { + DeleteFile(my_ini_bck); + } + my_end(0); + exit(0); +} diff --git a/sql/mysqld.cc b/sql/mysqld.cc new file mode 100644 index 00000000..95286923 --- /dev/null +++ b/sql/mysqld.cc @@ -0,0 +1,9884 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_plugin.h" // Includes mariadb.h +#include "sql_priv.h" +#include "unireg.h" +#include +#ifndef _WIN32 +#include // getservbyname, servent +#endif +#include "sql_parse.h" // path_starts_from_data_home_dir +#include "sql_cache.h" // query_cache, query_cache_* +#include "sql_locale.h" // MY_LOCALES, my_locales, my_locale_by_name +#include "sql_show.h" // free_status_vars, add_status_vars, + // reset_status_vars +#include "strfunc.h" // find_set_from_flags +#include "parse_file.h" // File_parser_dummy_hook +#include "sql_db.h" // my_dboptions_cache_free + // my_dboptions_cache_init +#include "sql_table.h" // ddl_log_release, ddl_log_execute_recovery +#include "sql_connect.h" // free_max_user_conn, init_max_user_conn, + // handle_one_connection +#include "thread_cache.h" +#include "sql_time.h" // known_date_time_formats, + // get_date_time_format_str, + // date_time_format_make +#include "tztime.h" // my_tz_free, my_tz_init, my_tz_SYSTEM +#include "hostname.h" // hostname_cache_free, hostname_cache_init +#include "sql_acl.h" // acl_free, grant_free, acl_init, + // grant_init +#include "sql_base.h" +#include "sql_test.h" // mysql_print_status +#include "item_create.h" // item_create_cleanup, item_create_init +#include "sql_servers.h" // servers_free, servers_init +#include "init.h" // unireg_init +#include "derror.h" // init_errmessage +#include "des_key_file.h" // load_des_key_file +#include "sql_manager.h" // stop_handle_manager, start_handle_manager +#include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit +#include "sys_vars_shared.h" +#include "ddl_log.h" + +#include +#include +#include +#include "my_cpu.h" +#include "slave.h" +#include "rpl_mi.h" +#include "sql_repl.h" +#include "rpl_filter.h" +#include "client_settings.h" +#include "repl_failsafe.h" +#include +#include +#include "mysqld_suffix.h" +#include "mysys_err.h" +#include "events.h" +#include "sql_audit.h" +#include "probes_mysql.h" +#include "scheduler.h" +#include +#include "debug_sync.h" +#include "wsrep_mysqld.h" +#include "wsrep_var.h" +#ifdef WITH_WSREP +#include "wsrep_thd.h" +#include "wsrep_sst.h" +#include "wsrep_server_state.h" +#endif /* WITH_WSREP */ +#include "proxy_protocol.h" + +#include "sql_callback.h" +#include "threadpool.h" + +#ifdef HAVE_OPENSSL +#include +#endif + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE +#include "../storage/perfschema/pfs_server.h" +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ +#include +#include +#include +#include "mysql_com_server.h" + +#include "keycaches.h" +#include "../storage/myisam/ha_myisam.h" +#include "set_var.h" + +#include "rpl_injector.h" +#include "semisync_master.h" +#include "semisync_slave.h" + +#include "transaction.h" + +#ifdef HAVE_SYS_PRCTL_H +#include +#endif + +#include +#include +#include +#include "sp_rcontext.h" +#include "sp_cache.h" +#include "sql_reload.h" // reload_acl_and_cache +#include "sp_head.h" // init_sp_psi_keys + +#include + +#ifdef HAVE_POLL_H +#include +#endif + +#ifdef _WIN32 +#include +#include +#include /* SERVICE_STOPPED, SERVICE_RUNNING etc */ +#endif + +#include + +#include + +#define mysqld_charset &my_charset_latin1 + +extern "C" { // Because of SCO 3.2V4.2 +#include +#ifndef __GNU_LIBRARY__ +#define __GNU_LIBRARY__ // Skip warnings in getopt.h +#endif +#include +#ifdef HAVE_SYSENT_H +#include +#endif +#ifdef HAVE_PWD_H +#include // For struct passwd +#endif +#include + +#if !defined(_WIN32) +#include +#ifdef HAVE_SYS_UN_H +#include +#endif +#ifdef HAVE_SELECT_H +#include +#endif +#ifdef HAVE_SYS_SELECT_H +#include +#endif +#include +#endif /* _WIN32 */ + +#include + +#ifdef _WIN32 +#include +#endif + +#ifdef _AIX41 +int initgroups(const char *,unsigned int); +#endif + +#if defined(__FreeBSD__) && defined(HAVE_IEEEFP_H) && !defined(HAVE_FEDISABLEEXCEPT) +#include +#ifdef HAVE_FP_EXCEPT // Fix type conflict +typedef fp_except fp_except_t; +#endif +#endif /* __FreeBSD__ && HAVE_IEEEFP_H && !HAVE_FEDISABLEEXCEPT */ +#ifdef HAVE_SYS_FPU_H +/* for IRIX to use set_fpc_csr() */ +#include +#endif +#ifdef HAVE_FPU_CONTROL_H +#include +#endif +#if defined(__i386__) && !defined(HAVE_FPU_CONTROL_H) +# define fpu_control_t unsigned int +# define _FPU_EXTENDED 0x300 +# define _FPU_DOUBLE 0x200 +# if defined(__GNUC__) || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590) +# define _FPU_GETCW(cw) asm volatile ("fnstcw %0" : "=m" (*&cw)) +# define _FPU_SETCW(cw) asm volatile ("fldcw %0" : : "m" (*&cw)) +# else +# define _FPU_GETCW(cw) (cw= 0) +# define _FPU_SETCW(cw) +# endif +#endif + +#ifndef HAVE_FCNTL +#define fcntl(X,Y,Z) 0 +#endif + +inline void setup_fpu() +{ +#if defined(__FreeBSD__) && defined(HAVE_IEEEFP_H) && !defined(HAVE_FEDISABLEEXCEPT) + /* We can't handle floating point exceptions with threads, so disable + this on freebsd + Don't fall for overflow, underflow,divide-by-zero or loss of precision. + fpsetmask() is deprecated in favor of fedisableexcept() in C99. + */ +#if defined(FP_X_DNML) + fpsetmask(~(FP_X_INV | FP_X_DNML | FP_X_OFL | FP_X_UFL | FP_X_DZ | + FP_X_IMP)); +#else + fpsetmask(~(FP_X_INV | FP_X_OFL | FP_X_UFL | FP_X_DZ | + FP_X_IMP)); +#endif /* FP_X_DNML */ +#endif /* __FreeBSD__ && HAVE_IEEEFP_H && !HAVE_FEDISABLEEXCEPT */ + +#ifdef HAVE_FEDISABLEEXCEPT + fedisableexcept(FE_ALL_EXCEPT); +#endif + +#ifdef HAVE_FESETROUND + /* Set FPU rounding mode to "round-to-nearest" */ + fesetround(FE_TONEAREST); +#endif /* HAVE_FESETROUND */ + + /* + x86 (32-bit) requires FPU precision to be explicitly set to 64 bit + (double precision) for portable results of floating point operations. + However, there is no need to do so if compiler is using SSE2 for floating + point, double values will be stored and processed in 64 bits anyway. + */ +#if defined(__i386__) && !defined(__SSE2_MATH__) +#if defined(_WIN32) +#if !defined(_WIN64) + _control87(_PC_53, MCW_PC); +#endif /* !_WIN64 */ +#else /* !_WIN32 */ + fpu_control_t cw; + _FPU_GETCW(cw); + cw= (cw & ~_FPU_EXTENDED) | _FPU_DOUBLE; + _FPU_SETCW(cw); +#endif /* _WIN32 && */ +#endif /* __i386__ */ + +#if defined(__sgi) && defined(HAVE_SYS_FPU_H) + /* Enable denormalized DOUBLE values support for IRIX */ + union fpc_csr n; + n.fc_word = get_fpc_csr(); + n.fc_struct.flush = 0; + set_fpc_csr(n.fc_word); +#endif +} + +} /* cplusplus */ + +#define MYSQL_KILL_SIGNAL SIGTERM + +#include // For thr_setconcurency() + +#ifdef SOLARIS +extern "C" int gethostname(char *name, int namelen); +#endif + +extern "C" sig_handler handle_fatal_signal(int sig); + +#if defined(__linux__) +#define ENABLE_TEMP_POOL 1 +#else +#define ENABLE_TEMP_POOL 0 +#endif + +int init_io_cache_encryption(); + +extern "C" +{ + static void my_malloc_size_cb_func(long long size, + my_bool is_thread_specific); +} + +/* Constants */ + +#include // ORACLE_WELCOME_COPYRIGHT_NOTICE + +const char *show_comp_option_name[]= {"YES", "NO", "DISABLED"}; + +static const char *tc_heuristic_recover_names[]= +{ + "OFF", "COMMIT", "ROLLBACK", NullS +}; +static TYPELIB tc_heuristic_recover_typelib= +{ + array_elements(tc_heuristic_recover_names)-1,"", + tc_heuristic_recover_names, NULL +}; + +const char *first_keyword= "first"; +const char *my_localhost= "localhost", *delayed_user= "DELAYED"; + +bool opt_large_files= sizeof(my_off_t) > 4; +static my_bool opt_autocommit; ///< for --autocommit command-line option +/* + Used with --help for detailed option +*/ +static my_bool opt_verbose= 0; + +/* Timer info to be used by the SQL layer */ +MY_TIMER_INFO sys_timer_info; + +/* static variables */ + +#ifdef HAVE_PSI_INTERFACE +#ifdef HAVE_OPENSSL10 +static PSI_rwlock_key key_rwlock_openssl; +#endif +#endif /* HAVE_PSI_INTERFACE */ + +/** + Statement instrumentation key for replication. +*/ +#ifdef HAVE_PSI_STATEMENT_INTERFACE +PSI_statement_info stmt_info_rpl; +#endif + +/* the default log output is log tables */ +static bool lower_case_table_names_used= 0; +static bool volatile select_thread_in_use, signal_thread_in_use; +static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0; +static my_bool opt_short_log_format= 0, opt_silent_startup= 0; + +ulong max_used_connections; +static const char *mysqld_user, *mysqld_chroot; +static char *default_character_set_name; +static char *character_set_filesystem_name; +static char *lc_messages; +static char *lc_time_names_name; +char *my_bind_addr_str; +static char *default_collation_name; +char *default_storage_engine, *default_tmp_storage_engine; +char *enforced_storage_engine=NULL; +char *gtid_pos_auto_engines; +plugin_ref *opt_gtid_pos_auto_plugins; +static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; +Thread_cache thread_cache; +static bool binlog_format_used= false; +LEX_STRING opt_init_connect, opt_init_slave; +static DYNAMIC_ARRAY all_options; +static longlong start_memory_used; + +/* Global variables */ + +bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0; +bool opt_bin_log_compress; +uint opt_bin_log_compress_min_len; +my_bool opt_log, debug_assert_if_crashed_table= 0, opt_help= 0; +my_bool debug_assert_on_not_freed_memory= 0; +my_bool disable_log_notes, opt_support_flashback= 0; +static my_bool opt_abort; +ulonglong log_output_options; +my_bool opt_userstat_running; +bool opt_error_log= IF_WIN(1,0); +bool opt_disable_networking=0, opt_skip_show_db=0; +bool opt_skip_name_resolve=0; +my_bool opt_character_set_client_handshake= 1; +bool opt_endinfo, using_udf_functions; +my_bool locked_in_memory; +bool opt_using_transactions; +bool volatile abort_loop; +uint volatile global_disable_checkpoint; +#if defined(_WIN32) +ulong slow_start_timeout; +#endif +static MEM_ROOT startup_root; +MEM_ROOT read_only_root; + +/** + @brief 'grant_option' is used to indicate if privileges needs + to be checked, in which case the lock, LOCK_grant, is used + to protect access to the grant table. + @note This flag is dropped in 5.1 + @see grant_init() + */ +bool volatile grant_option; + +my_bool opt_skip_slave_start = 0; ///< If set, slave is not autostarted +my_bool opt_reckless_slave = 0; +my_bool opt_enable_named_pipe= 0; +my_bool opt_local_infile, opt_slave_compressed_protocol; +my_bool opt_safe_user_create = 0; +my_bool opt_show_slave_auth_info; +my_bool opt_log_slave_updates= 0; +my_bool opt_replicate_annotate_row_events= 0; +my_bool opt_mysql56_temporal_format=0, strict_password_validation= 1; +char *opt_slave_skip_errors; +char *opt_slave_transaction_retry_errors; + +/* + Legacy global handlerton. These will be removed (please do not add more). +*/ +handlerton *heap_hton; +handlerton *myisam_hton; +handlerton *partition_hton; + +my_bool read_only= 0, opt_readonly= 0; +my_bool use_temp_pool, relay_log_purge; +my_bool relay_log_recovery; +my_bool opt_sync_frm, opt_allow_suspicious_udfs; +my_bool opt_secure_auth= 0; +my_bool opt_require_secure_transport= 0; +char* opt_secure_file_priv; +my_bool lower_case_file_system= 0; +my_bool opt_large_pages= 0; +my_bool opt_super_large_pages= 0; +my_bool opt_myisam_use_mmap= 0; +uint opt_large_page_size= 0; +#if defined(ENABLED_DEBUG_SYNC) +MYSQL_PLUGIN_IMPORT uint opt_debug_sync_timeout= 0; +#endif /* defined(ENABLED_DEBUG_SYNC) */ +my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; +ulong opt_replicate_events_marked_for_skip; + +/* + True if there is at least one per-hour limit for some user, so we should + check them before each query (and possibly reset counters when hour is + changed). False otherwise. +*/ +volatile bool mqh_used = 0; +my_bool opt_noacl; +my_bool sp_automatic_privileges= 1; + +ulong opt_binlog_rows_event_max_size; +ulong binlog_row_metadata; +my_bool opt_master_verify_checksum= 0; +my_bool opt_slave_sql_verify_checksum= 1; +const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS}; +volatile sig_atomic_t calling_initgroups= 0; /**< Used in SIGSEGV handler. */ +uint mysqld_port, select_errors, dropping_tables, ha_open_options; +uint mysqld_extra_port; +uint mysqld_port_timeout; +ulong delay_key_write_options; +uint protocol_version; +uint lower_case_table_names; +ulong tc_heuristic_recover= 0; +Atomic_counter THD_count::count, CONNECT::count; +bool shutdown_wait_for_slaves; +Atomic_counter slave_open_temp_tables; +ulong thread_created; +ulong back_log, connect_timeout, server_id; +ulong what_to_log; +ulong slow_launch_time; +ulong open_files_limit, max_binlog_size; +ulong slave_trans_retries; +ulong slave_trans_retry_interval; +uint slave_net_timeout; +ulong slave_exec_mode_options; +ulong slave_run_triggers_for_rbr= 0; +ulong slave_ddl_exec_mode_options= SLAVE_EXEC_MODE_IDEMPOTENT; +ulonglong slave_type_conversions_options; +ulong thread_cache_size=0; +ulonglong binlog_cache_size=0; +ulonglong binlog_file_cache_size=0; +ulonglong max_binlog_cache_size=0; +ulong slave_max_allowed_packet= 0; +double slave_max_statement_time_double; +ulonglong slave_max_statement_time; +ulonglong binlog_stmt_cache_size=0; +ulonglong max_binlog_stmt_cache_size=0; +ulonglong test_flags; +ulonglong query_cache_size=0; +ulong query_cache_limit=0; +ulong executed_events=0; +Atomic_counter global_query_id; +ulong aborted_threads, aborted_connects, aborted_connects_preauth; +ulong delayed_insert_timeout, delayed_insert_limit, delayed_queue_size; +ulong delayed_insert_threads, delayed_insert_writes, delayed_rows_in_use; +ulong delayed_insert_errors,flush_time; +ulong malloc_calls; +ulong specialflag=0; +ulong binlog_cache_use= 0, binlog_cache_disk_use= 0; +ulong binlog_stmt_cache_use= 0, binlog_stmt_cache_disk_use= 0; +ulong max_connections, max_connect_errors; +uint max_password_errors; +ulong extra_max_connections; +uint max_digest_length= 0; +ulong slave_retried_transactions; +ulong transactions_multi_engine; +ulong rpl_transactions_multi_engine; +ulong transactions_gtid_foreign_engine; +ulonglong slave_skipped_errors; +ulong feature_files_opened_with_delayed_keys= 0, feature_check_constraint= 0; +ulonglong denied_connections; +my_decimal decimal_zero; +long opt_secure_timestamp; +uint default_password_lifetime; +my_bool disconnect_on_expired_password; + +bool max_user_connections_checking=0; +/** + Limit of the total number of prepared statements in the server. + Is necessary to protect the server against out-of-memory attacks. +*/ +uint max_prepared_stmt_count; +/** + Current total number of prepared statements in the server. This number + is exact, and therefore may not be equal to the difference between + `com_stmt_prepare' and `com_stmt_close' (global status variables), as + the latter ones account for all registered attempts to prepare + a statement (including unsuccessful ones). Prepared statements are + currently connection-local: if the same SQL query text is prepared in + two different connections, this counts as two distinct prepared + statements. +*/ +uint prepared_stmt_count=0; +my_thread_id global_thread_id= 0; +ulong current_pid; +ulong slow_launch_threads = 0; +uint sync_binlog_period= 0, sync_relaylog_period= 0, + sync_relayloginfo_period= 0, sync_masterinfo_period= 0; +double expire_logs_days = 0; +ulong binlog_expire_logs_seconds = 0; + +/** + Soft upper limit for number of sp_head objects that can be stored + in the sp_cache for one connection. +*/ +ulong stored_program_cache_size= 0; + +ulong opt_slave_parallel_threads= 0; +ulong opt_slave_domain_parallel_threads= 0; +ulong opt_slave_parallel_mode; +ulong opt_binlog_commit_wait_count= 0; +ulong opt_binlog_commit_wait_usec= 0; +ulong opt_slave_parallel_max_queued= 131072; +my_bool opt_gtid_ignore_duplicates= FALSE; +uint opt_gtid_cleanup_batch_size= 64; + +const double log_10[] = { + 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, + 1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, + 1e020, 1e021, 1e022, 1e023, 1e024, 1e025, 1e026, 1e027, 1e028, 1e029, + 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, 1e039, + 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049, + 1e050, 1e051, 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059, + 1e060, 1e061, 1e062, 1e063, 1e064, 1e065, 1e066, 1e067, 1e068, 1e069, + 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, 1e078, 1e079, + 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089, + 1e090, 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, + 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, + 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, + 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, + 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, + 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, + 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, + 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, + 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, + 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, + 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, + 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, + 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, + 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308 +}; + +time_t server_start_time, flush_status_time; + +char mysql_home[FN_REFLEN], pidfile_name[FN_REFLEN], system_time_zone[30]; +char *default_tz_name; +char log_error_file[FN_REFLEN], glob_hostname[FN_REFLEN], *opt_log_basename; +char mysql_real_data_home[FN_REFLEN], + lc_messages_dir[FN_REFLEN], reg_ext[FN_EXTLEN], + mysql_charsets_dir[FN_REFLEN], + *opt_init_file, *opt_tc_log_file, *opt_ddl_recovery_file; +char *lc_messages_dir_ptr= lc_messages_dir, *log_error_file_ptr; +char mysql_unpacked_real_data_home[FN_REFLEN]; +size_t mysql_unpacked_real_data_home_len; +uint mysql_real_data_home_len, mysql_data_home_len= 1; +uint reg_ext_length; +const key_map key_map_empty(0); +key_map key_map_full(0); // Will be initialized later + +DATE_TIME_FORMAT global_date_format, global_datetime_format, global_time_format; +Time_zone *default_tz; + +const char *mysql_real_data_home_ptr= mysql_real_data_home; +extern "C" { +char server_version[SERVER_VERSION_LENGTH]; +} +char *server_version_ptr; +bool using_custom_server_version= false; +char *mysqld_unix_port, *opt_mysql_tmpdir; +ulong thread_handling; + +my_bool encrypt_binlog; +my_bool encrypt_tmp_disk_tables, encrypt_tmp_files; + +/** name of reference on left expression in rewritten IN subquery */ +const LEX_CSTRING in_left_expr_name= {STRING_WITH_LEN("") }; +/** name of additional condition */ +const LEX_CSTRING in_having_cond= {STRING_WITH_LEN("") }; +const LEX_CSTRING in_additional_cond= {STRING_WITH_LEN("") }; + +/** Number of connection errors when selecting on the listening port */ +ulong connection_errors_select= 0; +/** Number of connection errors when accepting sockets in the listening port. */ +ulong connection_errors_accept= 0; +/** Number of connection errors from TCP wrappers. */ +ulong connection_errors_tcpwrap= 0; +/** Number of connection errors from internal server errors. */ +ulong connection_errors_internal= 0; +/** Number of connection errors from the server max_connection limit. */ +ulong connection_errors_max_connection= 0; +/** Number of errors when reading the peer address. */ +ulong connection_errors_peer_addr= 0; + +/* classes for comparation parsing/processing */ +Eq_creator eq_creator; +Ne_creator ne_creator; +Gt_creator gt_creator; +Lt_creator lt_creator; +Ge_creator ge_creator; +Le_creator le_creator; + +THD_list server_threads; +Rpl_filter* cur_rpl_filter; +Rpl_filter* global_rpl_filter; +Rpl_filter* binlog_filter; + +struct system_variables global_system_variables; +/** + Following is just for options parsing, used with a difference against + global_system_variables. + + TODO: something should be done to get rid of following variables +*/ +const char *current_dbug_option=""; + +struct system_variables max_system_variables; +struct system_status_var global_status_var; + +MY_TMPDIR mysql_tmpdir_list; +static MY_BITMAP temp_pool; +static mysql_mutex_t LOCK_temp_pool; + +void temp_pool_clear_bit(uint bit) +{ + mysql_mutex_lock(&LOCK_temp_pool); + bitmap_clear_bit(&temp_pool, bit); + mysql_mutex_unlock(&LOCK_temp_pool); +} + +uint temp_pool_set_next() +{ + mysql_mutex_lock(&LOCK_temp_pool); + uint res= bitmap_set_next(&temp_pool); + mysql_mutex_unlock(&LOCK_temp_pool); + return res; +} + +CHARSET_INFO *system_charset_info, *files_charset_info ; +CHARSET_INFO *national_charset_info, *table_alias_charset; +CHARSET_INFO *character_set_filesystem; +CHARSET_INFO *error_message_charset_info; + +MY_LOCALE *my_default_lc_messages; +MY_LOCALE *my_default_lc_time_names; + +SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen, have_query_cache; +SHOW_COMP_OPTION have_geometry, have_rtree_keys; +SHOW_COMP_OPTION have_crypt, have_compress; +SHOW_COMP_OPTION have_profiling; +SHOW_COMP_OPTION have_openssl; + +#ifndef EMBEDDED_LIBRARY +static std::atomic shutdown_user; +#endif //EMBEDDED_LIBRARY +std::atomic shutdown_thread_id; + +/* Thread specific variables */ + +static thread_local THD *THR_THD; + +/** + Get current THD object from thread local data + + @retval The THD object for the thread, NULL if not connection thread +*/ + +MYSQL_THD _current_thd() { return THR_THD; } +void set_current_thd(THD *thd) { THR_THD= thd; } + +/* + LOCK_start_thread is used to syncronize thread start and stop with + other threads. + + It also protects these variables: + select_thread_in_use + slave_init_thread_running + check_temp_dir() call +*/ +mysql_mutex_t LOCK_start_thread; + +mysql_mutex_t + LOCK_status, LOCK_error_log, LOCK_short_uuid_generator, + LOCK_delayed_insert, LOCK_delayed_status, LOCK_delayed_create, + LOCK_crypt, + LOCK_global_system_variables, + LOCK_user_conn, + LOCK_error_messages; +mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats, + LOCK_global_table_stats, LOCK_global_index_stats; + +/* This protects against changes in master_info_index */ +mysql_mutex_t LOCK_active_mi; + +/* This protects connection id.*/ +mysql_mutex_t LOCK_thread_id; + +/** + The below lock protects access to two global server variables: + max_prepared_stmt_count and prepared_stmt_count. These variables + set the limit and hold the current total number of prepared statements + in the server, respectively. As PREPARE/DEALLOCATE rate in a loaded + server may be fairly high, we need a dedicated lock. +*/ +mysql_mutex_t LOCK_prepared_stmt_count; +#ifdef HAVE_OPENSSL +mysql_mutex_t LOCK_des_key_file; +#endif +mysql_mutex_t LOCK_backup_log; +mysql_rwlock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; +mysql_rwlock_t LOCK_ssl_refresh; +mysql_rwlock_t LOCK_all_status_vars; +mysql_prlock_t LOCK_system_variables_hash; +mysql_cond_t COND_start_thread; +pthread_t signal_thread; +pthread_attr_t connection_attrib; +mysql_mutex_t LOCK_server_started; +mysql_cond_t COND_server_started; + +int mysqld_server_started=0, mysqld_server_initialized= 0; +File_parser_dummy_hook file_parser_dummy_hook; + +/* replication parameters, if master_host is not NULL, we are a slave */ +uint report_port= 0; +ulong master_retry_count=0; +char *master_info_file; +char *relay_log_info_file, *report_user, *report_password, *report_host; +char *opt_relay_logname = 0, *opt_relaylog_index_name=0; +char *opt_logname, *opt_slow_logname, *opt_bin_logname; +char *opt_binlog_index_name=0; + + + +/* Static variables */ + +my_bool opt_stack_trace; +my_bool opt_expect_abort= 0, opt_bootstrap= 0; +static my_bool opt_myisam_log; +static int cleanup_done; +static ulong opt_specialflag; +char *mysql_home_ptr, *pidfile_name_ptr; +/** Initial command line arguments (count), after load_defaults().*/ +static int defaults_argc; +/** + Initial command line arguments (arguments), after load_defaults(). + This memory is allocated by @c load_defaults() and should be freed + using @c free_defaults(). + Do not modify defaults_argc / defaults_argv, + use remaining_argc / remaining_argv instead to parse the command + line arguments in multiple steps. +*/ +static char **defaults_argv; +/** Remaining command line arguments (count), filtered by handle_options().*/ +static int remaining_argc; +/** Remaining command line arguments (arguments), filtered by handle_options().*/ +static char **remaining_argv; + +int orig_argc; +char **orig_argv; + +static struct my_option pfs_early_options[]= +{ +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + {"performance_schema_instrument", OPT_PFS_INSTRUMENT, + "Default startup value for a performance schema instrument.", + &pfs_param.m_pfs_instrument, &pfs_param.m_pfs_instrument, 0, GET_STR, + OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_stages_current", 0, + "Default startup value for the events_stages_current consumer.", + &pfs_param.m_consumer_events_stages_current_enabled, + &pfs_param.m_consumer_events_stages_current_enabled, 0, GET_BOOL, + OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_stages_history", 0, + "Default startup value for the events_stages_history consumer.", + &pfs_param.m_consumer_events_stages_history_enabled, + &pfs_param.m_consumer_events_stages_history_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_stages_history_long", 0, + "Default startup value for the events_stages_history_long consumer.", + &pfs_param.m_consumer_events_stages_history_long_enabled, + &pfs_param.m_consumer_events_stages_history_long_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_statements_current", 0, + "Default startup value for the events_statements_current consumer.", + &pfs_param.m_consumer_events_statements_current_enabled, + &pfs_param.m_consumer_events_statements_current_enabled, 0, GET_BOOL, + OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_statements_history", 0, + "Default startup value for the events_statements_history consumer.", + &pfs_param.m_consumer_events_statements_history_enabled, + &pfs_param.m_consumer_events_statements_history_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_statements_history_long", 0, + "Default startup value for the events_statements_history_long consumer.", + &pfs_param.m_consumer_events_statements_history_long_enabled, + &pfs_param.m_consumer_events_statements_history_long_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_transactions_current", 0, + "Default startup value for the events_transactions_current consumer.", + &pfs_param.m_consumer_events_transactions_current_enabled, + &pfs_param.m_consumer_events_transactions_current_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_transactions_history", 0, + "Default startup value for the events_transactions_history consumer.", + &pfs_param.m_consumer_events_transactions_history_enabled, + &pfs_param.m_consumer_events_transactions_history_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_transactions_history_long", 0, + "Default startup value for the events_transactions_history_long consumer.", + &pfs_param.m_consumer_events_transactions_history_long_enabled, + &pfs_param.m_consumer_events_transactions_history_long_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_waits_current", 0, + "Default startup value for the events_waits_current consumer.", + &pfs_param.m_consumer_events_waits_current_enabled, + &pfs_param.m_consumer_events_waits_current_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_waits_history", 0, + "Default startup value for the events_waits_history consumer.", + &pfs_param.m_consumer_events_waits_history_enabled, + &pfs_param.m_consumer_events_waits_history_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_events_waits_history_long", 0, + "Default startup value for the events_waits_history_long consumer.", + &pfs_param.m_consumer_events_waits_history_long_enabled, + &pfs_param.m_consumer_events_waits_history_long_enabled, 0, + GET_BOOL, OPT_ARG, FALSE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_global_instrumentation", 0, + "Default startup value for the global_instrumentation consumer.", + &pfs_param.m_consumer_global_instrumentation_enabled, + &pfs_param.m_consumer_global_instrumentation_enabled, 0, + GET_BOOL, OPT_ARG, TRUE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_thread_instrumentation", 0, + "Default startup value for the thread_instrumentation consumer.", + &pfs_param.m_consumer_thread_instrumentation_enabled, + &pfs_param.m_consumer_thread_instrumentation_enabled, 0, + GET_BOOL, OPT_ARG, TRUE, 0, 0, 0, 0, 0}, + {"performance_schema_consumer_statements_digest", 0, + "Default startup value for the statements_digest consumer.", + &pfs_param.m_consumer_statement_digest_enabled, + &pfs_param.m_consumer_statement_digest_enabled, 0, + GET_BOOL, OPT_ARG, TRUE, 0, 0, 0, 0, 0}, +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ + {"getopt-prefix-matching", 0, + "Recognize command-line options by their unambiguos prefixes.", + &my_getopt_prefix_matching, &my_getopt_prefix_matching, 0, GET_BOOL, + NO_ARG, 1, 0, 1, 0, 0, 0} +}; + +PSI_file_key key_file_binlog, key_file_binlog_cache, key_file_binlog_index, + key_file_binlog_index_cache, key_file_casetest, + key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file, + key_file_fileparser, key_file_frm, key_file_global_ddl_log, key_file_load, + key_file_loadfile, key_file_log_event_data, key_file_log_event_info, + key_file_log_ddl, + key_file_master_info, key_file_misc, key_file_partition_ddl_log, + key_file_pid, key_file_relay_log_info, key_file_send_file, key_file_tclog, + key_file_trg, key_file_trn, key_file_init; +PSI_file_key key_file_query_log, key_file_slow_log; +PSI_file_key key_file_relaylog, key_file_relaylog_index, + key_file_relaylog_cache, key_file_relaylog_index_cache; +PSI_file_key key_file_binlog_state; + +#ifdef HAVE_PSI_INTERFACE +#ifdef HAVE_MMAP +PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool, + key_LOCK_pending_checkpoint; +#endif /* HAVE_MMAP */ + +#ifdef HAVE_OPENSSL +PSI_mutex_key key_LOCK_des_key_file; +#endif /* HAVE_OPENSSL */ + +PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, + key_BINLOG_LOCK_binlog_background_thread, + key_LOCK_binlog_end_pos, + key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi, + key_LOCK_crypt, key_LOCK_delayed_create, + key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log, + key_LOCK_gdl, key_LOCK_global_system_variables, + key_LOCK_manager, key_LOCK_backup_log, + key_LOCK_prepared_stmt_count, + key_LOCK_rpl_status, key_LOCK_server_started, + key_LOCK_status, key_LOCK_temp_pool, + key_LOCK_system_variables_hash, key_LOCK_thd_data, key_LOCK_thd_kill, + key_LOCK_user_conn, key_LOCK_uuid_short_generator, key_LOG_LOCK_log, + key_master_info_data_lock, key_master_info_run_lock, + key_master_info_sleep_lock, key_master_info_start_stop_lock, + key_master_info_start_alter_lock, + key_master_info_start_alter_list_lock, + key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock, + key_rpl_group_info_sleep_lock, + key_relay_log_info_log_space_lock, key_relay_log_info_run_lock, + key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, + key_LOCK_error_messages, + key_LOCK_start_thread, + key_PARTITION_LOCK_auto_inc; +PSI_mutex_key key_RELAYLOG_LOCK_index; +PSI_mutex_key key_LOCK_relaylog_end_pos; +PSI_mutex_key key_LOCK_thread_id; +PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, + key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry; +PSI_mutex_key key_LOCK_rpl_semi_sync_master_enabled; +PSI_mutex_key key_LOCK_binlog; + +PSI_mutex_key key_LOCK_stats, + key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, + key_LOCK_global_index_stats, + key_LOCK_wakeup_ready, key_LOCK_wait_commit; +PSI_mutex_key key_LOCK_gtid_waiting; + +PSI_mutex_key key_LOCK_after_binlog_sync; +PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered; +PSI_mutex_key key_TABLE_SHARE_LOCK_share; +PSI_mutex_key key_TABLE_SHARE_LOCK_statistics; +PSI_mutex_key key_LOCK_ack_receiver; + +PSI_mutex_key key_TABLE_SHARE_LOCK_rotation; +PSI_cond_key key_TABLE_SHARE_COND_rotation; + +static PSI_mutex_info all_server_mutexes[]= +{ +#ifdef HAVE_MMAP + { &key_PAGE_lock, "PAGE::lock", 0}, + { &key_LOCK_sync, "TC_LOG_MMAP::LOCK_sync", 0}, + { &key_LOCK_active, "TC_LOG_MMAP::LOCK_active", 0}, + { &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pool", 0}, + { &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pending_checkpoint", 0}, +#endif /* HAVE_MMAP */ + +#ifdef HAVE_OPENSSL + { &key_LOCK_des_key_file, "LOCK_des_key_file", PSI_FLAG_GLOBAL}, +#endif /* HAVE_OPENSSL */ + + { &key_BINLOG_LOCK_index, "MYSQL_BIN_LOG::LOCK_index", 0}, + { &key_BINLOG_LOCK_xid_list, "MYSQL_BIN_LOG::LOCK_xid_list", 0}, + { &key_BINLOG_LOCK_binlog_background_thread, "MYSQL_BIN_LOG::LOCK_binlog_background_thread", 0}, + { &key_LOCK_binlog_end_pos, "MYSQL_BIN_LOG::LOCK_binlog_end_pos", 0 }, + { &key_RELAYLOG_LOCK_index, "MYSQL_RELAY_LOG::LOCK_index", 0}, + { &key_LOCK_relaylog_end_pos, "MYSQL_RELAY_LOG::LOCK_binlog_end_pos", 0}, + { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0}, + { &key_hash_filo_lock, "hash_filo::lock", 0}, + { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL}, + { &key_LOCK_backup_log, "LOCK_backup_log", PSI_FLAG_GLOBAL}, + { &key_LOCK_temp_pool, "LOCK_temp_pool", PSI_FLAG_GLOBAL}, + { &key_LOCK_thread_id, "LOCK_thread_id", PSI_FLAG_GLOBAL}, + { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL}, + { &key_LOCK_delayed_create, "LOCK_delayed_create", PSI_FLAG_GLOBAL}, + { &key_LOCK_delayed_insert, "LOCK_delayed_insert", PSI_FLAG_GLOBAL}, + { &key_LOCK_delayed_status, "LOCK_delayed_status", PSI_FLAG_GLOBAL}, + { &key_LOCK_error_log, "LOCK_error_log", PSI_FLAG_GLOBAL}, + { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL}, + { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL}, + { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL}, + { &key_LOCK_prepared_stmt_count, "LOCK_prepared_stmt_count", PSI_FLAG_GLOBAL}, + { &key_LOCK_rpl_status, "LOCK_rpl_status", PSI_FLAG_GLOBAL}, + { &key_LOCK_server_started, "LOCK_server_started", PSI_FLAG_GLOBAL}, + { &key_LOCK_status, "LOCK_status", PSI_FLAG_GLOBAL}, + { &key_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL}, + { &key_LOCK_stats, "LOCK_stats", PSI_FLAG_GLOBAL}, + { &key_LOCK_global_user_client_stats, "LOCK_global_user_client_stats", PSI_FLAG_GLOBAL}, + { &key_LOCK_global_table_stats, "LOCK_global_table_stats", PSI_FLAG_GLOBAL}, + { &key_LOCK_global_index_stats, "LOCK_global_index_stats", PSI_FLAG_GLOBAL}, + { &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0}, + { &key_LOCK_wait_commit, "wait_for_commit::LOCK_wait_commit", 0}, + { &key_LOCK_gtid_waiting, "gtid_waiting::LOCK_gtid_waiting", 0}, + { &key_LOCK_thd_data, "THD::LOCK_thd_data", 0}, + { &key_LOCK_thd_kill, "THD::LOCK_thd_kill", 0}, + { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL}, + { &key_LOCK_uuid_short_generator, "LOCK_uuid_short_generator", PSI_FLAG_GLOBAL}, + { &key_LOG_LOCK_log, "LOG::LOCK_log", 0}, + { &key_master_info_data_lock, "Master_info::data_lock", 0}, + { &key_master_info_start_stop_lock, "Master_info::start_stop_lock", 0}, + { &key_master_info_run_lock, "Master_info::run_lock", 0}, + { &key_master_info_sleep_lock, "Master_info::sleep_lock", 0}, + { &key_master_info_start_alter_lock, "Master_info::start_alter_lock", 0}, + { &key_master_info_start_alter_list_lock, "Master_info::start_alter_lock", 0}, + { &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0}, + { &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0}, + { &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0}, + { &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0}, + { &key_rpl_group_info_sleep_lock, "Rpl_group_info::sleep_lock", 0}, + { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0}, + { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0}, + { &key_TABLE_SHARE_LOCK_share, "TABLE_SHARE::LOCK_share", 0}, + { &key_TABLE_SHARE_LOCK_statistics, "TABLE_SHARE::LOCK_statistics", 0}, + { &key_TABLE_SHARE_LOCK_rotation, "TABLE_SHARE::LOCK_rotation", 0}, + { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL}, + { &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL}, + { &key_LOCK_after_binlog_sync, "LOCK_after_binlog_sync", PSI_FLAG_GLOBAL}, + { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL}, + { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}, + { &key_LOCK_slave_state, "LOCK_slave_state", 0}, + { &key_LOCK_start_thread, "LOCK_start_thread", PSI_FLAG_GLOBAL}, + { &key_LOCK_binlog_state, "LOCK_binlog_state", 0}, + { &key_LOCK_rpl_thread, "LOCK_rpl_thread", 0}, + { &key_LOCK_rpl_thread_pool, "LOCK_rpl_thread_pool", 0}, + { &key_LOCK_parallel_entry, "LOCK_parallel_entry", 0}, + { &key_LOCK_ack_receiver, "Ack_receiver::mutex", 0}, + { &key_LOCK_rpl_semi_sync_master_enabled, "LOCK_rpl_semi_sync_master_enabled", 0}, + { &key_LOCK_binlog, "LOCK_binlog", 0} +}; + +PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, + key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, + key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock, + key_LOCK_SEQUENCE, + key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial, + key_rwlock_LOCK_ssl_refresh, + key_rwlock_THD_list, + key_rwlock_LOCK_all_status_vars; + +static PSI_rwlock_info all_server_rwlocks[]= +{ +#ifdef HAVE_OPENSSL10 + { &key_rwlock_openssl, "CRYPTO_dynlock_value::lock", 0}, +#endif + { &key_rwlock_LOCK_grant, "LOCK_grant", PSI_FLAG_GLOBAL}, + { &key_rwlock_LOCK_logger, "LOGGER::LOCK_logger", 0}, + { &key_rwlock_LOCK_sys_init_connect, "LOCK_sys_init_connect", PSI_FLAG_GLOBAL}, + { &key_rwlock_LOCK_sys_init_slave, "LOCK_sys_init_slave", PSI_FLAG_GLOBAL}, + { &key_LOCK_SEQUENCE, "LOCK_SEQUENCE", 0}, + { &key_rwlock_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL}, + { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0}, + { &key_rwlock_LOCK_vers_stats, "Vers_field_stats::lock", 0}, + { &key_rwlock_LOCK_stat_serial, "TABLE_SHARE::LOCK_stat_serial", 0}, + { &key_rwlock_LOCK_ssl_refresh, "LOCK_ssl_refresh", PSI_FLAG_GLOBAL }, + { &key_rwlock_THD_list, "THD_list::lock", PSI_FLAG_GLOBAL }, + { &key_rwlock_LOCK_all_status_vars, "LOCK_all_status_vars", PSI_FLAG_GLOBAL } +}; + +#ifdef HAVE_MMAP +PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool; +#endif /* HAVE_MMAP */ + +PSI_cond_key key_BINLOG_COND_xid_list, + key_BINLOG_COND_bin_log_updated, key_BINLOG_COND_relay_log_updated, + key_BINLOG_COND_binlog_background_thread, + key_BINLOG_COND_binlog_background_thread_end, + key_COND_cache_status_changed, key_COND_manager, + key_COND_rpl_status, key_COND_server_started, + key_delayed_insert_cond, key_delayed_insert_cond_client, + key_item_func_sleep_cond, key_master_info_data_cond, + key_master_info_start_cond, key_master_info_stop_cond, + key_master_info_sleep_cond, + key_relay_log_info_data_cond, key_relay_log_info_log_space_cond, + key_relay_log_info_start_cond, key_relay_log_info_stop_cond, + key_rpl_group_info_sleep_cond, + key_TABLE_SHARE_cond, key_user_level_lock_cond, + key_COND_start_thread, key_COND_binlog_send, + key_BINLOG_COND_queue_busy; +PSI_cond_key key_RELAYLOG_COND_relay_log_updated, + key_RELAYLOG_COND_bin_log_updated, key_COND_wakeup_ready, + key_COND_wait_commit; +PSI_cond_key key_RELAYLOG_COND_queue_busy; +PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; +PSI_cond_key key_COND_rpl_thread_queue, key_COND_rpl_thread, + key_COND_rpl_thread_stop, key_COND_rpl_thread_pool, + key_COND_parallel_entry, key_COND_group_commit_orderer, + key_COND_prepare_ordered; +PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates; +PSI_cond_key key_COND_ack_receiver; + +static PSI_cond_info all_server_conds[]= +{ +#ifdef HAVE_MMAP + { &key_PAGE_cond, "PAGE::cond", 0}, + { &key_COND_active, "TC_LOG_MMAP::COND_active", 0}, + { &key_COND_pool, "TC_LOG_MMAP::COND_pool", 0}, + { &key_TC_LOG_MMAP_COND_queue_busy, "TC_LOG_MMAP::COND_queue_busy", 0}, +#endif /* HAVE_MMAP */ + { &key_BINLOG_COND_bin_log_updated, "MYSQL_BIN_LOG::COND_bin_log_updated", 0}, { &key_BINLOG_COND_relay_log_updated, "MYSQL_BIN_LOG::COND_relay_log_updated", 0}, + { &key_BINLOG_COND_xid_list, "MYSQL_BIN_LOG::COND_xid_list", 0}, + { &key_BINLOG_COND_binlog_background_thread, "MYSQL_BIN_LOG::COND_binlog_background_thread", 0}, + { &key_BINLOG_COND_binlog_background_thread_end, "MYSQL_BIN_LOG::COND_binlog_background_thread_end", 0}, + { &key_BINLOG_COND_queue_busy, "MYSQL_BIN_LOG::COND_queue_busy", 0}, + { &key_RELAYLOG_COND_relay_log_updated, "MYSQL_RELAY_LOG::COND_relay_log_updated", 0}, + { &key_RELAYLOG_COND_bin_log_updated, "MYSQL_RELAY_LOG::COND_bin_log_updated", 0}, + { &key_RELAYLOG_COND_queue_busy, "MYSQL_RELAY_LOG::COND_queue_busy", 0}, + { &key_COND_wakeup_ready, "THD::COND_wakeup_ready", 0}, + { &key_COND_wait_commit, "wait_for_commit::COND_wait_commit", 0}, + { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0}, + { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL}, + { &key_COND_server_started, "COND_server_started", PSI_FLAG_GLOBAL}, + { &key_delayed_insert_cond, "Delayed_insert::cond", 0}, + { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0}, + { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0}, + { &key_master_info_data_cond, "Master_info::data_cond", 0}, + { &key_master_info_start_cond, "Master_info::start_cond", 0}, + { &key_master_info_stop_cond, "Master_info::stop_cond", 0}, + { &key_master_info_sleep_cond, "Master_info::sleep_cond", 0}, + { &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0}, + { &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0}, + { &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0}, + { &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0}, + { &key_rpl_group_info_sleep_cond, "Rpl_group_info::sleep_cond", 0}, + { &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0}, + { &key_user_level_lock_cond, "User_level_lock::cond", 0}, + { &key_COND_rpl_thread, "COND_rpl_thread", 0}, + { &key_COND_rpl_thread_queue, "COND_rpl_thread_queue", 0}, + { &key_COND_rpl_thread_stop, "COND_rpl_thread_stop", 0}, + { &key_COND_rpl_thread_pool, "COND_rpl_thread_pool", 0}, + { &key_COND_parallel_entry, "COND_parallel_entry", 0}, + { &key_COND_group_commit_orderer, "COND_group_commit_orderer", 0}, + { &key_COND_prepare_ordered, "COND_prepare_ordered", 0}, + { &key_COND_start_thread, "COND_start_thread", PSI_FLAG_GLOBAL}, + { &key_COND_wait_gtid, "COND_wait_gtid", 0}, + { &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0}, + { &key_COND_ack_receiver, "Ack_receiver::cond", 0}, + { &key_COND_binlog_send, "COND_binlog_send", 0}, + { &key_TABLE_SHARE_COND_rotation, "TABLE_SHARE::COND_rotation", 0} +}; + +PSI_thread_key key_thread_delayed_insert, + key_thread_handle_manager, key_thread_main, + key_thread_one_connection, key_thread_signal_hand, + key_thread_slave_background, key_rpl_parallel_thread; +PSI_thread_key key_thread_ack_receiver; + +static PSI_thread_info all_server_threads[]= +{ + { &key_thread_delayed_insert, "delayed_insert", 0}, + { &key_thread_handle_manager, "manager", PSI_FLAG_GLOBAL}, + { &key_thread_main, "main", PSI_FLAG_GLOBAL}, + { &key_thread_one_connection, "one_connection", 0}, + { &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL}, + { &key_thread_slave_background, "slave_background", PSI_FLAG_GLOBAL}, + { &key_thread_ack_receiver, "Ack_receiver", PSI_FLAG_GLOBAL}, + { &key_rpl_parallel_thread, "rpl_parallel_thread", 0} +}; + +#ifdef HAVE_MMAP +PSI_file_key key_file_map; +#endif /* HAVE_MMAP */ + +#endif /* HAVE_PSI_INTERFACE */ + +#ifdef HAVE_PSI_STATEMENT_INTERFACE +PSI_statement_info stmt_info_new_packet; +#endif + +#ifndef EMBEDDED_LIBRARY +void net_before_header_psi(struct st_net *net, void *thd, size_t /* unused: count */) +{ + DBUG_ASSERT(thd); + /* + We only come where when the server is IDLE, waiting for the next command. + Technically, it is a wait on a socket, which may take a long time, + because the call is blocking. + Disable the socket instrumentation, to avoid recording a SOCKET event. + Instead, start explicitly an IDLE event. + */ + MYSQL_SOCKET_SET_STATE(net->vio->mysql_socket, PSI_SOCKET_STATE_IDLE); + MYSQL_START_IDLE_WAIT(static_cast(thd)->m_idle_psi, + &static_cast(thd)->m_idle_state); +} + +void net_after_header_psi(struct st_net *net, void *user_data, + size_t /* unused: count */, my_bool rc) +{ + THD *thd; + thd= static_cast (user_data); + DBUG_ASSERT(thd != NULL); + + /* + The server just got data for a network packet header, + from the network layer. + The IDLE event is now complete, since we now have a message to process. + We need to: + - start a new STATEMENT event + - start a new STAGE event, within this statement, + - start recording SOCKET WAITS events, within this stage. + The proper order is critical to get events numbered correctly, + and nested in the proper parent. + */ + MYSQL_END_IDLE_WAIT(thd->m_idle_psi); + + if (! rc) + { + thd->m_statement_psi= MYSQL_START_STATEMENT(&thd->m_statement_state, + stmt_info_new_packet.m_key, + thd->get_db(), thd->db.length, + thd->charset(), NULL); + + THD_STAGE_INFO(thd, stage_starting); + } + + /* + TODO: consider recording a SOCKET event for the bytes just read, + by also passing count here. + */ + MYSQL_SOCKET_SET_STATE(net->vio->mysql_socket, PSI_SOCKET_STATE_ACTIVE); +} + + +void init_net_server_extension(THD *thd) +{ + /* Start with a clean state for connection events. */ + thd->m_idle_psi= NULL; + thd->m_statement_psi= NULL; + /* Hook up the NET_SERVER callback in the net layer. */ + thd->m_net_server_extension.m_user_data= thd; + thd->m_net_server_extension.m_before_header= net_before_header_psi; + thd->m_net_server_extension.m_after_header= net_after_header_psi; + /* Activate this private extension for the mysqld server. */ + thd->net.extension= & thd->m_net_server_extension; +} +#else +void init_net_server_extension(THD *thd) +{ +} +#endif /* EMBEDDED_LIBRARY */ + + +/** + A log message for the error log, buffered in memory. + Log messages are temporarily buffered when generated before the error log + is initialized, and then printed once the error log is ready. +*/ +class Buffered_log : public Sql_alloc +{ +public: + Buffered_log(enum loglevel level, const char *message); + + ~Buffered_log() = default; + + void print(void); + +private: + /** Log message level. */ + enum loglevel m_level; + /** Log message text. */ + String m_message; +}; + +/** + Constructor. + @param level the message log level + @param message the message text +*/ +Buffered_log::Buffered_log(enum loglevel level, const char *message) + : m_level(level), m_message() +{ + m_message.copy(message, strlen(message), &my_charset_latin1); +} + +/** + Print a buffered log to the real log file. +*/ +void Buffered_log::print() +{ + /* + Since messages are buffered, they can be printed out + of order with other entries in the log. + Add "Buffered xxx" to the message text to prevent confusion. + */ + switch(m_level) + { + case ERROR_LEVEL: + sql_print_error("Buffered error: %s", m_message.c_ptr_safe()); + break; + case WARNING_LEVEL: + sql_print_warning("Buffered warning: %s", m_message.c_ptr_safe()); + break; + case INFORMATION_LEVEL: + /* + Messages printed as "information" still end up in the mysqld *error* log, + but with a [Note] tag instead of an [ERROR] tag. + While this is probably fine for a human reading the log, + it is upsetting existing automated scripts used to parse logs, + because such scripts are likely to not already handle [Note] properly. + INFORMATION_LEVEL messages are simply silenced, on purpose, + to avoid un needed verbosity. + */ + break; + } +} + +/** + Collection of all the buffered log messages. +*/ +class Buffered_logs +{ +public: + Buffered_logs() = default; + + ~Buffered_logs() = default; + + void init(); + void cleanup(); + + void buffer(enum loglevel m_level, const char *msg); + void print(); +private: + /** + Memory root to use to store buffered logs. + This memory root lifespan is between init and cleanup. + Once the buffered logs are printed, they are not needed anymore, + and all the memory used is reclaimed. + */ + MEM_ROOT m_root; + /** List of buffered log messages. */ + List m_list; +}; + +void Buffered_logs::init() +{ + init_alloc_root(PSI_NOT_INSTRUMENTED, &m_root, 1024, 0, MYF(0)); +} + +void Buffered_logs::cleanup() +{ + m_list.delete_elements(); + free_root(&m_root, MYF(0)); +} + +/** + Add a log message to the buffer. +*/ +void Buffered_logs::buffer(enum loglevel level, const char *msg) +{ + /* + Do not let Sql_alloc::operator new(size_t) allocate memory, + there is no memory root associated with the main() thread. + Give explicitly the proper memory root to use to + Sql_alloc::operator new(size_t, MEM_ROOT *) instead. + */ + Buffered_log *log= new (&m_root) Buffered_log(level, msg); + if (log) + m_list.push_back(log, &m_root); +} + +/** + Print buffered log messages. +*/ +void Buffered_logs::print() +{ + Buffered_log *log; + List_iterator_fast it(m_list); + while ((log= it++)) + log->print(); +} + +/** Logs reported before a logger is available. */ +static Buffered_logs buffered_logs; + +struct my_rnd_struct sql_rand; ///< used by sql_class.cc:THD::THD() + +#ifndef EMBEDDED_LIBRARY + +Dynamic_array listen_sockets(PSI_INSTRUMENT_MEM, 0); +bool unix_sock_is_online= false; +static int systemd_sock_activation; /* systemd socket activation */ + + +C_MODE_START +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE +/** + Error reporter that buffer log messages. + @param level log message level + @param format log message format string +*/ +static void buffered_option_error_reporter(enum loglevel level, + const char *format, ...) +{ + va_list args; + char buffer[1024]; + + va_start(args, format); + my_vsnprintf(buffer, sizeof(buffer), format, args); + va_end(args); + buffered_logs.buffer(level, buffer); +} +#endif + + +/** + Character set and collation error reporter that prints to sql error log. + @param level log message level + @param format log message format string + + This routine is used to print character set and collation + warnings and errors inside an already running mysqld server, + e.g. when a character set or collation is requested for the very first time + and its initialization does not go well for some reasons. + + Note: At early mysqld initialization stage, + when error log is not yet available, + we use buffered_option_error_reporter() instead, + to print general character set subsystem initialization errors, + such as Index.xml syntax problems, bad XML tag hierarchy, etc. +*/ +static void charset_error_reporter(enum loglevel level, + const char *format, ...) +{ + va_list args; + va_start(args, format); + vprint_msg_to_log(level, format, args); + va_end(args); +} +C_MODE_END + +struct passwd *user_info; +static pthread_t select_thread; +#endif + +/* OS specific variables */ + +#ifdef _WIN32 +HANDLE hEventShutdown; +#endif + + +#ifndef EMBEDDED_LIBRARY +bool mysqld_embedded=0; +#else +bool mysqld_embedded=1; +#endif + +my_bool plugins_are_initialized= FALSE; + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif +#ifdef HAVE_LIBWRAP +const char *libwrapName= NULL; +int allow_severity = LOG_INFO; +int deny_severity = LOG_WARNING; +#endif +#ifdef HAVE_QUERY_CACHE +ulong query_cache_min_res_unit= QUERY_CACHE_MIN_RESULT_DATA_SIZE; +Query_cache query_cache; +#endif + + +my_bool opt_use_ssl = 0; +char *opt_ssl_ca= NULL, *opt_ssl_capath= NULL, *opt_ssl_cert= NULL, + *opt_ssl_cipher= NULL, *opt_ssl_key= NULL, *opt_ssl_crl= NULL, + *opt_ssl_crlpath= NULL, *opt_tls_version= NULL; +ulonglong tls_version= 0; + +static scheduler_functions thread_scheduler_struct, extra_thread_scheduler_struct; +scheduler_functions *thread_scheduler= &thread_scheduler_struct, + *extra_thread_scheduler= &extra_thread_scheduler_struct; + +#ifdef HAVE_OPENSSL +#include +#if defined(HAVE_OPENSSL10) && !defined(HAVE_WOLFSSL) +typedef struct CRYPTO_dynlock_value +{ + mysql_rwlock_t lock; +} openssl_lock_t; + +static openssl_lock_t *openssl_stdlocks; +static openssl_lock_t *openssl_dynlock_create(const char *, int); +static void openssl_dynlock_destroy(openssl_lock_t *, const char *, int); +static void openssl_lock_function(int, int, const char *, int); +static void openssl_lock(int, openssl_lock_t *, const char *, int); +#endif /* HAVE_OPENSSL10 */ +char *des_key_file; +#ifndef EMBEDDED_LIBRARY +struct st_VioSSLFd *ssl_acceptor_fd; +#endif +#endif /* HAVE_OPENSSL */ + +/** + Number of currently active user connections. +*/ +static Atomic_counter connection_count; +static Atomic_counter extra_connection_count; + +my_bool opt_gtid_strict_mode= FALSE; + + +/* Function declarations */ + +pthread_handler_t signal_hand(void *arg); +static int mysql_init_variables(void); +static int get_options(int *argc_ptr, char ***argv_ptr); +static bool add_terminator(DYNAMIC_ARRAY *options); +static bool add_many_options(DYNAMIC_ARRAY *, my_option *, size_t); +extern "C" my_bool mysqld_get_one_option(const struct my_option *, const char *, + const char *); +static int init_thread_environment(); +static char *get_relative_path(const char *path); +static int fix_paths(void); +#ifndef _WIN32 +void handle_connections_sockets(); +#endif + +static bool read_init_file(char *file_name); +pthread_handler_t handle_slave(void *arg); +static void clean_up(bool print_message); +static int test_if_case_insensitive(const char *dir_name); + +#ifndef EMBEDDED_LIBRARY +static bool pid_file_created= false; +static void usage(void); +static void start_signal_handler(void); +static void clean_up_mutexes(void); +static void wait_for_signal_thread_to_end(void); +static void create_pid_file(); +ATTRIBUTE_NORETURN static void mysqld_exit(int exit_code); +#endif +static void delete_pid_file(myf flags); +static void end_ssl(); + + +#ifndef EMBEDDED_LIBRARY +extern Atomic_counter local_connection_thread_count; + +uint THD_count::connection_thd_count() +{ + return value() - + binlog_dump_thread_count - + local_connection_thread_count; +} + + +/**************************************************************************** +** Code to end mysqld +****************************************************************************/ + +/* common callee of two shutdown phases */ +static void kill_thread(THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_thd_kill); + thd->abort_current_cond_wait(true); + mysql_mutex_unlock(&thd->LOCK_thd_kill); +} + + +/** + First shutdown everything but slave threads and binlog dump connections +*/ +static my_bool kill_thread_phase_1(THD *thd, int *n_threads_awaiting_ack) +{ + DBUG_PRINT("quit", ("Informing thread %ld that it's time to die", + (ulong) thd->thread_id)); + + if (thd->slave_thread || thd->is_binlog_dump_thread() || + (shutdown_wait_for_slaves && + repl_semisync_master.is_thd_awaiting_semisync_ack(thd) && + ++(*n_threads_awaiting_ack))) + return 0; + + if (DBUG_IF("only_kill_system_threads") && !thd->system_thread) + return 0; + if (DBUG_IF("only_kill_system_threads_no_loop") && !thd->system_thread) + return 0; + + thd->awake(KILL_SERVER_HARD); + return 0; +} + + +/** + Last shutdown binlog dump connections +*/ +static my_bool kill_thread_phase_2(THD *thd, void *) +{ + if (shutdown_wait_for_slaves && thd->is_binlog_dump_thread()) + { + thd->set_killed(KILL_SERVER); + } + else + { + thd->set_killed(KILL_SERVER_HARD); + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd)); + } + kill_thread(thd); + return 0; +} + + +/* associated with the kill thread phase 1 */ +static my_bool warn_threads_active_after_phase_1(THD *thd, void *) +{ + if (!thd->is_binlog_dump_thread() && thd->vio_ok()) + sql_print_warning("%s: Thread %llu (user : '%s') did not exit\n", my_progname, + (ulonglong) thd->thread_id, + (thd->main_security_ctx.user ? + thd->main_security_ctx.user : "")); + return 0; +} + + +/* associated with the kill thread phase 2 */ +static my_bool warn_threads_active_after_phase_2(THD *thd, void *) +{ + mysql_mutex_lock(&thd->LOCK_thd_data); + // dump thread may not have yet (or already) current_linfo set + sql_print_warning("Dump thread %llu last sent to server %lu " + "binlog file:pos %s:%llu", + thd->thread_id, thd->variables.server_id, + thd->current_linfo ? + my_basename(thd->current_linfo->log_file_name) : "NULL", + thd->current_linfo ? thd->current_linfo->pos : 0); + mysql_mutex_unlock(&thd->LOCK_thd_data); + + return 0; +} + + +/** + Kills main thread. + + @note this function is responsible for setting abort_loop and breaking + poll() in main thread. Shutdown as such is supposed to be performed by main + thread itself. +*/ + +static void break_connect_loop() +{ +#ifdef EXTRA_DEBUG + int count=0; +#endif + + abort_loop= 1; + +#if defined(_WIN32) + mysqld_win_initiate_shutdown(); +#else + /* Avoid waiting for ourselves when thread-handling=no-threads. */ + if (pthread_equal(pthread_self(), select_thread)) + return; + DBUG_PRINT("quit", ("waiting for select thread: %lu", + (ulong)select_thread)); + + mysql_mutex_lock(&LOCK_start_thread); + while (select_thread_in_use) + { + struct timespec abstime; + int UNINIT_VAR(error); + DBUG_PRINT("info",("Waiting for select thread")); + +#ifndef DONT_USE_THR_ALARM + if (pthread_kill(select_thread, thr_client_alarm)) + break; // allready dead +#endif + set_timespec(abstime, 2); + for (uint tmp=0 ; tmp < 10 && select_thread_in_use; tmp++) + { + error= mysql_cond_timedwait(&COND_start_thread, &LOCK_start_thread, + &abstime); + if (error != EINTR) + break; + } +#ifdef EXTRA_DEBUG + if (error != 0 && error != ETIMEDOUT && !count++) + sql_print_error("Got error %d from mysql_cond_timedwait", error); +#endif + } + mysql_mutex_unlock(&LOCK_start_thread); +#endif /* _WIN32 */ +} + + +/** + A wrapper around kill_main_thrad(). + + Sets shutdown user. This function may be called by multiple threads + concurrently, thus it performs safe update of shutdown_user + (first thread wins). +*/ + +void kill_mysql(THD *thd) +{ + char user_host_buff[MAX_USER_HOST_SIZE + 1]; + char *user, *expected_shutdown_user= 0; + + make_user_name(thd, user_host_buff); + + if ((user= my_strdup(PSI_NOT_INSTRUMENTED, user_host_buff, MYF(0))) && + !shutdown_user.compare_exchange_strong(expected_shutdown_user, + user, + std::memory_order_relaxed, + std::memory_order_relaxed)) + { + my_free(user); + } + + shutdown_thread_id= thd->thread_id; + DBUG_EXECUTE_IF("mysql_admin_shutdown_wait_for_slaves", + thd->lex->is_shutdown_wait_for_slaves= true;); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("simulate_delay_at_shutdown", + { + DBUG_ASSERT(binlog_dump_thread_count == 3); + const char act[]= + "now " + "SIGNAL greetings_from_kill_mysql"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif + + if (thd->lex->is_shutdown_wait_for_slaves) + shutdown_wait_for_slaves= true; + break_connect_loop(); +} + + +static void close_connections(void) +{ + DBUG_ENTER("close_connections"); + + /* Clear thread cache */ + thread_cache.final_flush(); + + /* Abort listening to new connections */ + DBUG_PRINT("quit",("Closing sockets")); + /* Protect against pthread_kill() calling close_server_sock(*) */ + mysql_mutex_lock(&LOCK_start_thread); + for (uint i= 0 ; i < listen_sockets.elements() ; i++) + { + MYSQL_SOCKET *sock= listen_sockets.get_pos(i); + (void) mysql_socket_close(*sock); + if (sock->is_unix_domain_socket && !systemd_sock_activation) + { + (void) unlink(mysqld_unix_port); + } + } + /* + The following is needed to the threads stuck in + setup_connection_thread_globals() + to continue. + */ + listen_sockets.free_memory(); + mysql_mutex_unlock(&LOCK_start_thread); + + end_thr_alarm(0); // Abort old alarms. + + while (CONNECT::count) + my_sleep(100); + + /* + First signal all threads that it's time to die + This will give the threads some time to gracefully abort their + statements and inform their clients that the server is about to die. + */ + DBUG_EXECUTE_IF("mysqld_delay_kill_threads_phase_1", my_sleep(200000);); + int n_threads_awaiting_ack= 0; + server_threads.iterate(kill_thread_phase_1, &n_threads_awaiting_ack); + + /* + If we are waiting on any ACKs, delay killing the thread until either an ACK + is received or the timeout is hit. + + Allow at max the number of sessions to await a timeout; however, if all + ACKs have been received in less iterations, then quit early + */ + if (shutdown_wait_for_slaves && repl_semisync_master.get_master_enabled()) + { + int waiting_threads= repl_semisync_master.sync_get_master_wait_sessions(); + if (waiting_threads) + sql_print_information("Delaying shutdown to await semi-sync ACK"); + + while (waiting_threads-- > 0) + repl_semisync_master.await_slave_reply(); + } + + DBUG_EXECUTE_IF("delay_shutdown_phase_2_after_semisync_wait", + my_sleep(500000);); + + Events::deinit(); + slave_prepare_for_shutdown(); + ack_receiver.stop(); + + /* + Give threads time to die. + + In 5.5, this was waiting 100 rounds @ 20 milliseconds/round, so as little + as 2 seconds, depending on thread scheduling. + + From 10.0, we increase this to 1000 rounds / 20 seconds. The rationale is + that on a server with heavy I/O load, it is quite possible for eg. an + fsync() of the binlog or whatever to cause something like LOCK_log to be + held for more than 2 seconds. We do not want to force kill threads in + such cases, if it can be avoided. Note that normally, the wait will be + much smaller than even 2 seconds, this is only a safety fallback against + stuck threads so server shutdown is not held up forever. + */ + DBUG_PRINT("info", ("THD_count: %u", THD_count::value())); + + for (int i= 0; THD_count::connection_thd_count() - n_threads_awaiting_ack + && i < 1000; i++) + { + if (DBUG_IF("only_kill_system_threads_no_loop")) + break; + my_sleep(20000); + } + + if (global_system_variables.log_warnings) + server_threads.iterate(warn_threads_active_after_phase_1); + +#ifdef WITH_WSREP + if (wsrep_inited == 1) + { + wsrep_deinit(true); + } + wsrep_sst_auth_free(); +#endif + /* All threads has now been aborted */ + DBUG_PRINT("quit", ("Waiting for threads to die (count=%u)", + THD_count::connection_thd_count() - n_threads_awaiting_ack)); + + while (THD_count::connection_thd_count() - n_threads_awaiting_ack) + { + if (DBUG_IF("only_kill_system_threads_no_loop")) + break; + my_sleep(1000); + } + + /* Kill phase 2 */ + server_threads.iterate(kill_thread_phase_2); + for (uint64 i= 0; THD_count::value() > local_connection_thread_count; i++) + { + /* + This time the warnings are emitted within the loop to provide a + dynamic view on the shutdown status through the errorlog. + */ + if (global_system_variables.log_warnings > 2 && i % 60000 == 0) + server_threads.iterate(warn_threads_active_after_phase_2); + my_sleep(1000); + } + /* End of kill phase 2 */ + + DBUG_PRINT("quit",("close_connections thread")); + DBUG_VOID_RETURN; +} + +#endif /*EMBEDDED_LIBRARY*/ + + +extern "C" sig_handler print_signal_warning(int sig) +{ + if (global_system_variables.log_warnings) + sql_print_warning("Got signal %d from thread %u", sig, + (uint)my_thread_id()); +#ifdef SIGNAL_HANDLER_RESET_ON_DELIVERY + my_sigset(sig,print_signal_warning); /* int. thread system calls */ +#endif +#if !defined(_WIN32) + if (sig == SIGALRM) + alarm(2); /* reschedule alarm */ +#endif +} + +#ifdef _WIN32 +typedef void (*report_svc_status_t)(DWORD current_state, DWORD win32_exit_code, + DWORD wait_hint); +static void dummy_svc_status(DWORD, DWORD, DWORD) {} +static report_svc_status_t my_report_svc_status= dummy_svc_status; +#endif + +#ifndef EMBEDDED_LIBRARY +extern "C" void unireg_abort(int exit_code) +{ + DBUG_ENTER("unireg_abort"); + + if (opt_help) + usage(); + else if (exit_code) + sql_print_error("Aborting"); + /* Don't write more notes to the log to not hide error message */ + disable_log_notes= 1; + +#ifdef WITH_WSREP + // Note that we do not have thd here, thus can't use + // WSREP(thd) + + if (WSREP_ON && + Wsrep_server_state::is_inited() && + Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected) + { + /* + This is an abort situation, we cannot expect to gracefully close all + wsrep threads here, we can only diconnect from service + */ + wsrep_close_client_connections(FALSE); + Wsrep_server_state::instance().disconnect(); + WSREP_INFO("Service disconnected."); + wsrep_close_threads(NULL); /* this won't close all threads */ + sleep(1); /* so give some time to exit for those which can */ + WSREP_INFO("Some threads may fail to exit."); + } + + if (WSREP_ON && wsrep_inited) + { + wsrep_deinit(true); + wsrep_deinit_server(); + } + wsrep_sst_auth_free(); +#endif // WITH_WSREP + + clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */ + DBUG_PRINT("quit",("done with cleanup in unireg_abort")); + mysqld_exit(exit_code); +} + +static void mysqld_exit(int exit_code) +{ + DBUG_ENTER("mysqld_exit"); + /* + Important note: we wait for the signal thread to end, + but if a kill -15 signal was sent, the signal thread did + spawn the kill_server_thread thread, which is running concurrently. + */ + rpl_deinit_gtid_waiting(); + rpl_deinit_gtid_slave_state(); + wait_for_signal_thread_to_end(); +#ifdef WITH_WSREP + wsrep_deinit_server(); + wsrep_sst_auth_free(); +#endif /* WITH_WSREP */ + mysql_audit_finalize(); + clean_up_mutexes(); + my_end((opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0)); +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + shutdown_performance_schema(); // we do it as late as possible +#endif + set_malloc_size_cb(NULL); + if (global_status_var.global_memory_used) + { + fprintf(stderr, "Warning: Memory not freed: %lld\n", + (longlong) global_status_var.global_memory_used); + if (exit_code == 0 || opt_endinfo) + SAFEMALLOC_REPORT_MEMORY(0); + } + DBUG_LEAVE; +#ifdef _WIN32 + my_report_svc_status(SERVICE_STOPPED, exit_code, 0); +#endif + sd_notify(0, "STATUS=MariaDB server is down"); + exit(exit_code); /* purecov: inspected */ +} + +#endif /* !EMBEDDED_LIBRARY */ + +static void clean_up(bool print_message) +{ + DBUG_PRINT("exit",("clean_up")); + if (cleanup_done++) + return; /* purecov: inspected */ + +#ifdef HAVE_REPLICATION + // We must call end_slave() as clean_up may have been called during startup + end_slave(); + if (use_slave_mask) + my_bitmap_free(&slave_error_mask); +#endif + stop_handle_manager(); + ddl_log_release(); + + logger.cleanup_base(); + + injector::free_instance(); + mysql_bin_log.cleanup(); + + my_tz_free(); + my_dboptions_cache_free(); + ignore_db_dirs_free(); + servers_free(1); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + acl_free(1); + grant_free(); +#endif + query_cache_destroy(); + hostname_cache_free(); + item_func_sleep_free(); + lex_free(); /* Free some memory */ + item_create_cleanup(); + tdc_start_shutdown(); +#ifdef HAVE_REPLICATION + semi_sync_master_deinit(); +#endif + plugin_shutdown(); + udf_free(); + ha_end(); + if (tc_log) + tc_log->close(); + xid_cache_free(); + tdc_deinit(); + mdl_destroy(); + dflt_key_cache= 0; + key_caches.delete_elements(free_key_cache); + wt_end(); + multi_keycache_free(); + sp_cache_end(); + free_status_vars(); + end_thr_alarm(1); /* Free allocated memory */ + end_thr_timer(); + my_free_open_file_info(); + if (defaults_argv) + free_defaults(defaults_argv); + free_tmpdir(&mysql_tmpdir_list); + my_bitmap_free(&temp_pool); + free_max_user_conn(); + free_global_user_stats(); + free_global_client_stats(); + free_global_table_stats(); + free_global_index_stats(); + delete_dynamic(&all_options); // This should be empty + free_all_rpl_filters(); + wsrep_thr_deinit(); + my_uuid_end(); + delete type_handler_data; + delete binlog_filter; + delete global_rpl_filter; + end_ssl(); +#ifndef EMBEDDED_LIBRARY + vio_end(); + listen_sockets.free_memory(); +#endif /*!EMBEDDED_LIBRARY*/ +#if defined(ENABLED_DEBUG_SYNC) + /* End the debug sync facility. See debug_sync.cc. */ + debug_sync_end(); +#endif /* defined(ENABLED_DEBUG_SYNC) */ + + delete_pid_file(MYF(0)); + + if (print_message && my_default_lc_messages && server_start_time) + sql_print_information(ER_DEFAULT(ER_SHUTDOWN_COMPLETE),my_progname); + MYSQL_CALLBACK(thread_scheduler, end, ()); + thread_scheduler= 0; + mysql_library_end(); + finish_client_errs(); + free_root(&startup_root, MYF(0)); + protect_root(&read_only_root, PROT_READ | PROT_WRITE); + free_root(&read_only_root, MYF(0)); + cleanup_errmsgs(); + free_error_messages(); + /* Tell main we are ready */ + logger.cleanup_end(); + sys_var_end(); + free_charsets(); + + my_free(const_cast(log_bin_basename)); + my_free(const_cast(log_bin_index)); +#ifndef EMBEDDED_LIBRARY + my_free(const_cast(relay_log_basename)); + my_free(const_cast(relay_log_index)); +#endif + free_list(opt_plugin_load_list_ptr); + destroy_proxy_protocol_networks(); + + /* + The following lines may never be executed as the main thread may have + killed us + */ + DBUG_PRINT("quit", ("done with cleanup")); +} /* clean_up */ + + +#ifndef EMBEDDED_LIBRARY + +/** + This is mainly needed when running with purify, but it's still nice to + know that all child threads have died when mysqld exits. +*/ +static void wait_for_signal_thread_to_end() +{ + uint i; + /* + Wait up to 10 seconds for signal thread to die. We use this mainly to + avoid getting warnings that my_thread_end has not been called + */ + for (i= 0 ; i < 100 && signal_thread_in_use; i++) + { + if (pthread_kill(signal_thread, MYSQL_KILL_SIGNAL) == ESRCH) + break; + my_sleep(100); // Give it time to die + } +} +#endif /*EMBEDDED_LIBRARY*/ + +static void clean_up_mutexes() +{ + DBUG_ENTER("clean_up_mutexes"); + server_threads.destroy(); + thread_cache.destroy(); + mysql_rwlock_destroy(&LOCK_grant); + mysql_mutex_destroy(&LOCK_start_thread); + mysql_mutex_destroy(&LOCK_status); + mysql_rwlock_destroy(&LOCK_all_status_vars); + mysql_mutex_destroy(&LOCK_delayed_insert); + mysql_mutex_destroy(&LOCK_delayed_status); + mysql_mutex_destroy(&LOCK_delayed_create); + mysql_mutex_destroy(&LOCK_crypt); + mysql_mutex_destroy(&LOCK_user_conn); + mysql_mutex_destroy(&LOCK_thread_id); + mysql_mutex_destroy(&LOCK_stats); + mysql_mutex_destroy(&LOCK_global_user_client_stats); + mysql_mutex_destroy(&LOCK_global_table_stats); + mysql_mutex_destroy(&LOCK_global_index_stats); +#ifdef HAVE_OPENSSL + mysql_mutex_destroy(&LOCK_des_key_file); +#if defined(HAVE_OPENSSL10) && !defined(HAVE_WOLFSSL) + for (int i= 0; i < CRYPTO_num_locks(); ++i) + mysql_rwlock_destroy(&openssl_stdlocks[i].lock); + OPENSSL_free(openssl_stdlocks); +#endif /* HAVE_OPENSSL10 */ +#endif /* HAVE_OPENSSL */ +#ifdef HAVE_REPLICATION + mysql_mutex_destroy(&LOCK_rpl_status); +#endif /* HAVE_REPLICATION */ + mysql_mutex_destroy(&LOCK_active_mi); + mysql_rwlock_destroy(&LOCK_ssl_refresh); + mysql_mutex_destroy(&LOCK_backup_log); + mysql_mutex_destroy(&LOCK_temp_pool); + mysql_rwlock_destroy(&LOCK_sys_init_connect); + mysql_rwlock_destroy(&LOCK_sys_init_slave); + mysql_mutex_destroy(&LOCK_global_system_variables); + mysql_prlock_destroy(&LOCK_system_variables_hash); + mysql_mutex_destroy(&LOCK_short_uuid_generator); + mysql_mutex_destroy(&LOCK_prepared_stmt_count); + mysql_mutex_destroy(&LOCK_error_messages); + mysql_cond_destroy(&COND_start_thread); + mysql_mutex_destroy(&LOCK_server_started); + mysql_cond_destroy(&COND_server_started); + mysql_mutex_destroy(&LOCK_prepare_ordered); + mysql_cond_destroy(&COND_prepare_ordered); + mysql_mutex_destroy(&LOCK_after_binlog_sync); + mysql_mutex_destroy(&LOCK_commit_ordered); +#ifndef EMBEDDED_LIBRARY + mysql_mutex_destroy(&LOCK_error_log); +#endif + DBUG_VOID_RETURN; +} + + +/**************************************************************************** +** Init IP and UNIX socket +****************************************************************************/ + +#ifdef EMBEDDED_LIBRARY +void close_connection(THD *thd, uint sql_errno) +{ +} +#else +static void set_ports() +{ + char *env; + if (!mysqld_port && !opt_disable_networking) + { // Get port if not from commandline + mysqld_port= MYSQL_PORT; + + /* + if builder specifically requested a default port, use that + (even if it coincides with our factory default). + only if they didn't do we check /etc/services (and, failing + on that, fall back to the factory default of 3306). + either default can be overridden by the environment variable + MYSQL_TCP_PORT, which in turn can be overridden with command + line options. + */ + +#if MYSQL_PORT_DEFAULT == 0 +# if !__has_feature(memory_sanitizer) // Work around MSAN deficiency + struct servent *serv_ptr; + if ((serv_ptr= getservbyname("mysql", "tcp"))) + SYSVAR_AUTOSIZE(mysqld_port, ntohs((u_short) serv_ptr->s_port)); +# endif +#endif + if ((env = getenv("MYSQL_TCP_PORT"))) + { + mysqld_port= (uint) atoi(env); + set_sys_var_value_origin(&mysqld_port, sys_var::ENV); + } + } + if (!mysqld_unix_port) + { +#ifdef _WIN32 + mysqld_unix_port= (char*) MYSQL_NAMEDPIPE; +#else + mysqld_unix_port= (char*) MYSQL_UNIX_ADDR; +#endif + if ((env = getenv("MYSQL_UNIX_PORT"))) + { + mysqld_unix_port= env; + set_sys_var_value_origin(&mysqld_unix_port, sys_var::ENV); + } + } +} + +/* Change to run as another user if started with --user */ + +static struct passwd *check_user(const char *user) +{ + myf flags= 0; + if (global_system_variables.log_warnings) + flags|= MY_WME; + if (!opt_bootstrap && !opt_help) + flags|= MY_FAE; + + struct passwd *tmp_user_info= my_check_user(user, MYF(flags)); + + if (!tmp_user_info && my_errno==EINVAL && (flags & MY_FAE)) + unireg_abort(1); + + return tmp_user_info; +} + +static inline void allow_coredumps() +{ +#ifdef PR_SET_DUMPABLE + if (test_flags & TEST_CORE_ON_SIGNAL) + { + /* inform kernel that process is dumpable */ + (void) prctl(PR_SET_DUMPABLE, 1); + } +#endif +} + + +static void set_user(const char *user, struct passwd *user_info_arg) +{ + /* + We can get a SIGSEGV when calling initgroups() on some systems when NSS + is configured to use LDAP and the server is statically linked. We set + calling_initgroups as a flag to the SIGSEGV handler that is then used to + output a specific message to help the user resolve this problem. + */ + calling_initgroups= 1; + int res= my_set_user(user, user_info_arg, MYF(MY_WME)); + calling_initgroups= 0; + if (res) + unireg_abort(1); + allow_coredumps(); +} + +#if !defined(_WIN32) +static void set_effective_user(struct passwd *user_info_arg) +{ + DBUG_ASSERT(user_info_arg != 0); + if (setregid((gid_t)-1, user_info_arg->pw_gid) == -1) + { + sql_perror("setregid"); + unireg_abort(1); + } + if (setreuid((uid_t)-1, user_info_arg->pw_uid) == -1) + { + sql_perror("setreuid"); + unireg_abort(1); + } + allow_coredumps(); +} +#endif + +/** Change root user if started with @c --chroot . */ +static void set_root(const char *path) +{ +#if !defined(_WIN32) + if (chroot(path) == -1) + { + sql_perror("chroot"); + unireg_abort(1); + } + my_setwd("/", MYF(0)); +#endif +} + +/** + Activate usage of a tcp port +*/ + +static void activate_tcp_port(uint port, + Dynamic_array *sockets, + bool is_extra_port= false) +{ + struct addrinfo *ai, *a = NULL, *head = NULL; + struct addrinfo hints; + int error; + int arg; + char port_buf[NI_MAXSERV]; + const char *real_bind_addr_str; + MYSQL_SOCKET ip_sock= MYSQL_INVALID_SOCKET; + DBUG_ENTER("activate_tcp_port"); + DBUG_PRINT("general",("IP Socket is %d",port)); + + bzero(&hints, sizeof (hints)); + hints.ai_flags= AI_PASSIVE; + hints.ai_socktype= SOCK_STREAM; + hints.ai_family= AF_UNSPEC; + + if (my_bind_addr_str && strcmp(my_bind_addr_str, "*") == 0) + real_bind_addr_str= NULL; // windows doesn't seem to support * here + else + real_bind_addr_str= my_bind_addr_str; + + my_snprintf(port_buf, NI_MAXSERV, "%d", port); + + if (real_bind_addr_str && *real_bind_addr_str) + { + + char *end; + char address[FN_REFLEN]; + + do + { + end= strcend(real_bind_addr_str, ','); + strmake(address, real_bind_addr_str, (uint) (end - real_bind_addr_str)); + + error= getaddrinfo(address, port_buf, &hints, &ai); + if (unlikely(error != 0)) + { + DBUG_PRINT("error", ("Got error: %d from getaddrinfo()", error)); + + sql_print_error("%s: %s", ER_DEFAULT(ER_IPSOCK_ERROR), + gai_strerror(error)); + unireg_abort(1); /* purecov: tested */ + } + + if (!head) + { + head= ai; + } + if (a) + { + a->ai_next= ai; + } + a= ai; + while (a->ai_next) + { + a= a->ai_next; + } + + real_bind_addr_str= end + 1; + } while (*end); + } + else + { + error= getaddrinfo(real_bind_addr_str, port_buf, &hints, &ai); + head= ai; + } + + for (a= head; a != NULL; a= a->ai_next) + { + ip_sock= mysql_socket_socket(key_socket_tcpip, a->ai_family, + a->ai_socktype, a->ai_protocol); + + char ip_addr[INET6_ADDRSTRLEN]; + if (vio_get_normalized_ip_string(a->ai_addr, a->ai_addrlen, + ip_addr, sizeof (ip_addr))) + { + ip_addr[0]= 0; + } + + if (mysql_socket_getfd(ip_sock) == INVALID_SOCKET) + { + sql_print_message_func func= real_bind_addr_str ? sql_print_error + : sql_print_warning; + func("Failed to create a socket for %s '%s': errno: %d.", + (a->ai_family == AF_INET) ? "IPv4" : "IPv6", + (const char *) ip_addr, (int) socket_errno); + } + else + { + ip_sock.address_family= a->ai_family; + sql_print_information("Server socket created on IP: '%s'.", + (const char *) ip_addr); + + if (mysql_socket_getfd(ip_sock) == INVALID_SOCKET) + { + DBUG_PRINT("error",("Got error: %d from socket()",socket_errno)); + sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR)); /* purecov: tested */ + unireg_abort(1); /* purecov: tested */ + } + + mysql_socket_set_thread_owner(ip_sock); + +#ifndef _WIN32 + /* + We should not use SO_REUSEADDR on windows as this would enable a + user to open two mysqld servers with the same TCP/IP port. + */ + arg= 1; + (void) mysql_socket_setsockopt(ip_sock, SOL_SOCKET, SO_REUSEADDR, + (char*)&arg, sizeof(arg)); +#endif /* _WIN32 */ + +#ifdef IPV6_V6ONLY + /* + If an address name resolves to both IPv4 and IPv6 addresses, the server + will listen on them both. With IPV6_V6ONLY unset, listening on an IPv6 + wildcard address may cause listening on an IPv4 wildcard address + to fail. That's why IPV6_V6ONLY needs to be forcefully turned on. + */ + if (a->ai_family == AF_INET6) + { + arg= 1; + (void) mysql_socket_setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, + (char*)&arg, sizeof(arg)); + } +#endif + +#ifdef IP_FREEBIND + arg= 1; + (void) mysql_socket_setsockopt(ip_sock, IPPROTO_IP, IP_FREEBIND, + (char*) &arg, sizeof(arg)); +#endif + /* + Sometimes the port is not released fast enough when stopping and + restarting the server. This happens quite often with the test suite + on busy Linux systems. Retry to bind the address at these intervals: + Sleep intervals: 1, 2, 4, 6, 9, 13, 17, 22, ... + Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ... + Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#). + */ + int ret; + uint waited, retry, this_wait; + for (waited= 0, retry= 1; ; retry++, waited+= this_wait) + { + if (((ret= mysql_socket_bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) + || (socket_errno != SOCKET_EADDRINUSE) + || (waited >= mysqld_port_timeout)) + break; + sql_print_information("Retrying bind on TCP/IP port %u", port); + this_wait= retry * retry / 3 + 1; + sleep(this_wait); + } + + if (ret < 0) + { + char buff[100]; + int s_errno= socket_errno; + sprintf(buff, "Can't start server: Bind on TCP/IP port. Got error: %d", + (int) s_errno); + sql_perror(buff); + /* + Linux will quite happily bind to addresses not present. The + mtr test main.bind_multiple_addresses_resolution relies on this. + For Windows, this is fatal and generates the error: + WSAEADDRNOTAVAIL: The requested address is not valid in its context + In this case, where multiple addresses where specified, maybe + we can live with an error in the log and hope the other addresses + are successful. We catch if no successful bindings occur at the + end of this function. + + FreeBSD returns EADDRNOTAVAIL, and EADDRNOTAVAIL is even in Linux + manual pages. So may was well apply uniform behaviour. + */ +#ifdef _WIN32 + if (s_errno == WSAEADDRNOTAVAIL) + continue; +#endif +#ifdef EADDRNOTAVAIL + if (s_errno == EADDRNOTAVAIL) + continue; +#endif + sql_print_error("Do you already have another server running on " + "port: %u ?", port); + unireg_abort(1); + } + if (mysql_socket_listen(ip_sock,(int) back_log) < 0) + { + sql_perror("Can't start server: listen() on TCP/IP port"); + sql_print_error("listen() on TCP/IP failed with error %d", + socket_errno); + unireg_abort(1); + } + +#ifdef FD_CLOEXEC + (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC); +#endif + ip_sock.is_extra_port= is_extra_port; + sockets->push(ip_sock); + } + } + + freeaddrinfo(head); + if (head && sockets->size() == 0) + { + sql_print_error("No TCP address could be bound to"); + unireg_abort(1); + } + DBUG_VOID_RETURN; +} + + +/** + Activate usage of a systemd activated sockets + i.e started by mariadb.socket +*/ + +static void use_systemd_activated_sockets() +{ +#ifndef __linux__ + return; +#else + char **names = NULL; + int sd_sockets; + DBUG_ENTER("use_systemd_activated_sockets"); + + sd_sockets= sd_listen_fds_with_names(0, &names); + + if (!sd_sockets) + DBUG_VOID_RETURN; + + DBUG_PRINT("general",("Systemd listen_fds is %d", sd_sockets)); + while (sd_sockets--) + { + MYSQL_SOCKET sock; + int stype= 0, accepting= 0, getnameinfo_err; + socklen_t l; + union + { + struct sockaddr sa; + struct sockaddr_storage storage; + struct sockaddr_in in; + struct sockaddr_in6 in6; + struct sockaddr_un un; + } addr; + SOCKET_SIZE_TYPE addrlen= sizeof(addr); + char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; + + int fd= SD_LISTEN_FDS_START + sd_sockets; + + if (getsockname(fd, &addr.sa, &addrlen)) + { + sql_print_error("Unable to getsockname on systemd socket activation socket %d," + " errno %d", fd, errno); + goto err; + } + + l= sizeof(stype); + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &stype, &l) < 0) + { + sql_print_error("Unable to getsockopt(SOL_SOCKET, SO_TYPE) on" + " systemd socket activation socket %d," + " errno %d", fd, errno); + goto err; + } + + if (stype != SOCK_STREAM) + { + sql_print_error("Unknown systemd socket activation socket %d," + " not of type SOCK_STREAM - type %d", fd, stype); + goto err; + } + + l= sizeof(accepting); + if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &accepting, &l) < 0) + { + sql_print_error("Unable to getsockopt(SOL_SOCKET, SO_ACCEPTCONN) on" + " systemd socket activation socket %d," + " errno %d", fd, errno); + goto err; + } + + if (!accepting) + { + sql_print_error("Unknown systemd socket activation socket %d," + " is not listening", fd); + goto err; + } + + switch (addr.sa.sa_family) + { + case AF_INET: + sock= mysql_socket_fd(key_socket_tcpip, fd); + sock.is_unix_domain_socket= 0; + mysqld_port= ntohs(addr.in.sin_port); + break; + case AF_INET6: + sock= mysql_socket_fd(key_socket_tcpip, fd); + sock.is_unix_domain_socket= 0; + mysqld_port= ntohs(addr.in6.sin6_port); + break; + case AF_UNIX: + sock= mysql_socket_fd(key_socket_unix, fd); + sock.is_unix_domain_socket= 1; + break; + default: + sql_print_error("Unknown systemd socket activation socket %d," + " not UNIX or INET socket", fd); + goto err; + } + + /* + We check names!=NULL here because sd_listen_fds_with_names maybe + just sd_listen_fds on older pre v227 systemd + */ + sock.is_extra_port= names && strcmp(names[sd_sockets], "extra") == 0; + + if (addr.sa.sa_family == AF_UNIX) + { + /* + Handle abstract sockets and present them in @ form. + */ + if (addr.un.sun_path[0] == '\0') + addr.un.sun_path[0] = '@'; + sql_print_information("Using systemd activated unix socket %s%s", + addr.un.sun_path, sock.is_extra_port ? " (extra)" : ""); + memset(addr.un.sun_path, 0, sizeof(addr.un.sun_path)); + } + else + { + getnameinfo_err= getnameinfo(&addr.sa, addrlen, hbuf, sizeof(hbuf), sbuf, + sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV); + if (getnameinfo_err) + sql_print_warning("getnameinfo() on systemd socket activation socket %d" + " failed with error %s(%d)", fd, + gai_strerror(getnameinfo_err), getnameinfo_err); + else + sql_print_information("Using systemd activated socket host %s port %s%s", hbuf, sbuf, + sock.is_extra_port ? " (extra)" : ""); + } + + mysql_socket_set_thread_owner(sock); + listen_sockets.push(sock); + } + systemd_sock_activation= 1; + free(names); + + DBUG_VOID_RETURN; + +err: + free(names); + unireg_abort(1); + DBUG_VOID_RETURN; +#endif /* __linux__ */ +} + + +static void network_init(void) +{ +#ifdef HAVE_SYS_UN_H + struct sockaddr_un UNIXaddr; + int arg; +#endif + DBUG_ENTER("network_init"); + + use_systemd_activated_sockets(); + + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init, (), 0)) + unireg_abort(1); /* purecov: inspected */ + + if (init_proxy_protocol_networks(my_proxy_protocol_networks)) + unireg_abort(1); + + set_ports(); + + if (report_port == 0) + { + SYSVAR_AUTOSIZE(report_port, mysqld_port); + } +#ifndef DBUG_OFF + if (!opt_disable_networking) + DBUG_ASSERT(report_port != 0); +#endif + if (!opt_disable_networking && !opt_bootstrap && !systemd_sock_activation) + { + if (mysqld_port) + activate_tcp_port(mysqld_port, &listen_sockets, + /* is_extra_port= */ false); + if (mysqld_extra_port) + activate_tcp_port(mysqld_extra_port, &listen_sockets, + /* is_extra_port= */ true); + } + +#if defined(HAVE_SYS_UN_H) + /* + ** Create the UNIX socket + */ + if (mysqld_unix_port[0] && !opt_bootstrap && systemd_sock_activation==0) + { + MYSQL_SOCKET unix_sock= MYSQL_INVALID_SOCKET; + size_t port_len; + DBUG_PRINT("general",("UNIX Socket is %s",mysqld_unix_port)); + + if ((port_len= strlen(mysqld_unix_port)) > sizeof(UNIXaddr.sun_path) - 1) + { + sql_print_error("The socket file path is too long (> %u): %s", + (uint) sizeof(UNIXaddr.sun_path) - 1, mysqld_unix_port); + unireg_abort(1); + } + unix_sock= mysql_socket_socket(key_socket_unix, AF_UNIX, SOCK_STREAM, 0); + if (mysql_socket_getfd(unix_sock) < 0) + { + sql_perror("Can't start server : UNIX Socket "); /* purecov: inspected */ + unireg_abort(1); /* purecov: inspected */ + } + + unix_sock.is_unix_domain_socket= true; + listen_sockets.push(unix_sock); + unix_sock_is_online= true; + mysql_socket_set_thread_owner(unix_sock); + + bzero((char*) &UNIXaddr, sizeof(UNIXaddr)); + UNIXaddr.sun_family = AF_UNIX; + strmov(UNIXaddr.sun_path, mysqld_unix_port); +#if defined(__linux__) + /* Abstract socket */ + if (mysqld_unix_port[0] == '@') + { + UNIXaddr.sun_path[0]= '\0'; + port_len+= offsetof(struct sockaddr_un, sun_path); + } + else +#endif + { + (void) unlink(mysqld_unix_port); + port_len= sizeof(UNIXaddr); + } + arg= 1; + (void) mysql_socket_setsockopt(unix_sock,SOL_SOCKET,SO_REUSEADDR, + (char*)&arg, sizeof(arg)); + umask(0); + if (mysql_socket_bind(unix_sock, + reinterpret_cast(&UNIXaddr), + port_len) < 0) + { + sql_perror("Can't start server : Bind on unix socket"); /* purecov: tested */ + sql_print_error("Do you already have another server running on socket: %s ?",mysqld_unix_port); + unireg_abort(1); /* purecov: tested */ + } + umask(((~my_umask) & 0666)); +#if defined(S_IFSOCK) && defined(SECURE_SOCKETS) + (void) chmod(mysqld_unix_port,S_IFSOCK); /* Fix solaris 2.6 bug */ +#endif + if (mysql_socket_listen(unix_sock,(int) back_log) < 0) + sql_print_warning("listen() on Unix socket failed with error %d", + socket_errno); +#ifdef FD_CLOEXEC + (void) fcntl(mysql_socket_getfd(unix_sock), F_SETFD, FD_CLOEXEC); +#endif + } +#endif + +#ifdef _WIN32 + network_init_win(); +#endif + + DBUG_PRINT("info",("server started")); + DBUG_VOID_RETURN; +} + + +/** + Close a connection. + + @param thd Thread handle. + @param sql_errno The error code to send before disconnect. + + @note + For the connection that is doing shutdown, this is called twice +*/ + +void close_connection(THD *thd, uint sql_errno) +{ + int lvl= (thd->main_security_ctx.user ? 3 : 1); + DBUG_ENTER("close_connection"); + + if (sql_errno) + { + thd->protocol->net_send_error(thd, sql_errno, ER_DEFAULT(sql_errno), NULL); + thd->print_aborted_warning(lvl, ER_DEFAULT(sql_errno)); + } + else if (!thd->main_security_ctx.user) + thd->print_aborted_warning(lvl, "This connection closed normally without" + " authentication"); + + thd->disconnect(); + + MYSQL_CONNECTION_DONE((int) sql_errno, thd->thread_id); + + if (MYSQL_CONNECTION_DONE_ENABLED()) + { + sleep(0); /* Workaround to avoid tailcall optimisation */ + } + mysql_audit_notify_connection_disconnect(thd, sql_errno); + DBUG_VOID_RETURN; +} + + +/** Called when mysqld is aborted with ^C */ +/* ARGSUSED */ +extern "C" sig_handler end_mysqld_signal(int sig __attribute__((unused))) +{ + DBUG_ENTER("end_mysqld_signal"); + /* Don't kill if signal thread is not running */ + if (signal_thread_in_use) + break_connect_loop(); // Take down mysqld nicely + DBUG_VOID_RETURN; /* purecov: deadcode */ +} +#endif /* EMBEDDED_LIBRARY */ + + +/* + Unlink thd from global list of available connections + + SYNOPSIS + unlink_thd() + thd Thread handler +*/ + +void unlink_thd(THD *thd) +{ + DBUG_ENTER("unlink_thd"); + DBUG_PRINT("enter", ("thd: %p", thd)); + + thd->cleanup(); + thd->add_status_to_global(); + server_threads.erase(thd); + +#ifdef WITH_WSREP + /* + Do not decrement when its wsrep system thread. wsrep_applier is set for + applier as well as rollbacker threads. + */ + if (!thd->wsrep_applier) +#endif /* WITH_WSREP */ + --*thd->scheduler->connection_count; + + thd->free_connection(); + + DBUG_VOID_RETURN; +} + + +#if defined(_WIN32) +/* + If server is started as service, the service routine will set + the callback function. +*/ +void mysqld_set_service_status_callback(void (*r)(DWORD, DWORD, DWORD)) +{ + my_report_svc_status= r; +} + +static bool startup_complete() +{ + return hEventShutdown != NULL; +} + +/** + Initiates shutdown on Windows by setting shutdown event. + Reports windows service status. + + If startup was not finished, terminates process (no good + cleanup possible) +*/ +void mysqld_win_initiate_shutdown() +{ + if (startup_complete()) + { + my_report_svc_status(SERVICE_STOP_PENDING, 0, 0); + abort_loop= 1; + if (!SetEvent(hEventShutdown)) + /* This should never fail.*/ + abort(); + } + else + { + my_report_svc_status(SERVICE_STOPPED, 1, 0); + TerminateProcess(GetCurrentProcess(), 1); + } +} + +/* + Signal when server has started and can accept connections. +*/ +void mysqld_win_set_startup_complete() +{ + my_report_svc_status(SERVICE_RUNNING, 0, 0); + DBUG_ASSERT(startup_complete()); +} + + +void mysqld_win_extend_service_timeout(DWORD sec) +{ + my_report_svc_status((DWORD)-1, 0, 2*1000*sec); +} + + +void mysqld_win_set_service_name(const char *name) +{ + if (stricmp(name, "mysql")) + load_default_groups[array_elements(load_default_groups) - 2]= name; +} + +/* + On Windows, we use native SetConsoleCtrlHandler for handle events like Ctrl-C + with graceful shutdown. + Also, we do not use signal(), but SetUnhandledExceptionFilter instead - as it + provides possibility to pass the exception to just-in-time debugger, collect + dumps and potentially also the exception and thread context used to output + callstack. +*/ + +static BOOL WINAPI console_event_handler( DWORD type ) +{ + static const char *names[]= { + "CTRL_C_EVENT","CTRL_BREAK_EVENT", "CTRL_CLOSE_EVENT", "", "", + "CTRL_LOGOFF_EVENT", "CTRL_SHUTDOWN_EVENT"}; + + switch (type) + { + case CTRL_C_EVENT: + case CTRL_BREAK_EVENT: + sql_print_information("console_event_handler: received %s event, shutting down", + names[type]); + mysqld_win_initiate_shutdown(); + return TRUE; + case CTRL_CLOSE_EVENT: + sql_print_information("console_event_handler: received CTRL_CLOSE_EVENT event, terminating"); + TerminateProcess(GetCurrentProcess(), 1); + return TRUE; + default: + return FALSE; + } +} + + +#ifdef DEBUG_UNHANDLED_EXCEPTION_FILTER +#define DEBUGGER_ATTACH_TIMEOUT 120 +/* + Wait for debugger to attach and break into debugger. If debugger is + not attached, resume after timeout. +*/ +static void wait_for_debugger(int timeout_sec) +{ + if(!IsDebuggerPresent()) + { + int i; + printf("Waiting for debugger to attach, pid=%u\n",GetCurrentProcessId()); + fflush(stdout); + for(i= 0; i < timeout_sec; i++) + { + Sleep(1000); + if(IsDebuggerPresent()) + { + /* Break into debugger */ + __debugbreak(); + return; + } + } + printf("pid=%u, debugger not attached after %d seconds, resuming\n",GetCurrentProcessId(), + timeout_sec); + fflush(stdout); + } +} +#endif /* DEBUG_UNHANDLED_EXCEPTION_FILTER */ + +static LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) +{ + static BOOL first_time= TRUE; + if(!first_time) + { + /* + This routine can be called twice, typically + when detaching in JIT debugger. + Return EXCEPTION_EXECUTE_HANDLER to terminate process. + */ + return EXCEPTION_EXECUTE_HANDLER; + } + first_time= FALSE; +#ifdef DEBUG_UNHANDLED_EXCEPTION_FILTER + /* + Unfortunately there is no clean way to debug unhandled exception filters, + as debugger does not stop there(also documented in MSDN) + To overcome, one could put a MessageBox, but this will not work in service. + Better solution is to print error message and sleep some minutes + until debugger is attached + */ + wait_for_debugger(DEBUGGER_ATTACH_TIMEOUT); +#endif /* DEBUG_UNHANDLED_EXCEPTION_FILTER */ + __try + { + my_set_exception_pointers(ex_pointers); + handle_fatal_signal(ex_pointers->ExceptionRecord->ExceptionCode); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + DWORD written; + const char msg[] = "Got exception in exception handler!\n"; + WriteFile(GetStdHandle(STD_OUTPUT_HANDLE),msg, sizeof(msg)-1, + &written,NULL); + } + /* + Return EXCEPTION_CONTINUE_SEARCH to give JIT debugger + (drwtsn32 or vsjitdebugger) possibility to attach, + if JIT debugger is configured. + Windows Error reporting might generate a dump here. + */ + return EXCEPTION_CONTINUE_SEARCH; +} + + +void init_signals(void) +{ + SetConsoleCtrlHandler(console_event_handler,TRUE); + + /* Avoid MessageBox()es*/ + _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE); + _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); + _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE); + _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR); + _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE); + _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR); + + /* + Do not use SEM_NOGPFAULTERRORBOX in the following SetErrorMode (), + because it would prevent JIT debugger and Windows error reporting + from working. We need WER or JIT-debugging, since our own unhandled + exception filter is not guaranteed to work in all situation + (like heap corruption or stack overflow) + */ + SetErrorMode(SetErrorMode(0) | SEM_FAILCRITICALERRORS + | SEM_NOOPENFILEERRORBOX); + if(!opt_debugging) + SetUnhandledExceptionFilter(my_unhandler_exception_filter); +} + + +static void start_signal_handler(void) +{ +#ifndef EMBEDDED_LIBRARY + // Save vm id of this process + if (!opt_bootstrap) + create_pid_file(); +#endif /* EMBEDDED_LIBRARY */ +} + + +static void check_data_home(const char *path) +{} + +#endif /* _WIN32 */ + + +#if BACKTRACE_DEMANGLE +#include +extern "C" char *my_demangle(const char *mangled_name, int *status) +{ + return abi::__cxa_demangle(mangled_name, NULL, NULL, status); +} +#endif + + +#ifdef DBUG_ASSERT_AS_PRINTF +extern "C" void +mariadb_dbug_assert_failed(const char *assert_expr, const char *file, + unsigned long line) +{ + fprintf(stderr, "Warning: assertion failed: %s at %s line %lu\n", + assert_expr, file, line); + if (opt_stack_trace) + { + fprintf(stderr, "Attempting backtrace to find out the reason for the assert:\n"); + my_print_stacktrace(NULL, (ulong) my_thread_stack_size, 1); + } +} +#endif /* DBUG_ASSERT_AS_PRINT */ + +#if !defined(_WIN32) +#ifndef SA_RESETHAND +#define SA_RESETHAND 0 +#endif /* SA_RESETHAND */ +#ifndef SA_NODEFER +#define SA_NODEFER 0 +#endif /* SA_NODEFER */ + +#ifndef EMBEDDED_LIBRARY + +void init_signals(void) +{ + sigset_t set; + struct sigaction sa; + DBUG_ENTER("init_signals"); + + my_sigset(THR_SERVER_ALARM,print_signal_warning); // Should never be called! + + if (opt_stack_trace || (test_flags & TEST_CORE_ON_SIGNAL)) + { + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sigprocmask(SIG_SETMASK,&sa.sa_mask,NULL); + +#if defined(__amiga__) + sa.sa_handler=(void(*)())handle_fatal_signal; +#else + sa.sa_handler=handle_fatal_signal; +#endif + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGABRT, &sa, NULL); +#ifdef SIGBUS + sigaction(SIGBUS, &sa, NULL); +#endif + sigaction(SIGILL, &sa, NULL); + sigaction(SIGFPE, &sa, NULL); + } + +#ifdef HAVE_GETRLIMIT + if (test_flags & TEST_CORE_ON_SIGNAL) + { + /* Change limits so that we will get a core file */ + STRUCT_RLIMIT rl; + rl.rlim_cur = rl.rlim_max = (rlim_t) RLIM_INFINITY; + if (setrlimit(RLIMIT_CORE, &rl) && global_system_variables.log_warnings) + sql_print_warning("setrlimit could not change the size of core files to 'infinity'; We may not be able to generate a core file on signals"); + } +#endif + (void) sigemptyset(&set); + my_sigset(SIGPIPE,SIG_IGN); + sigaddset(&set,SIGPIPE); +#ifndef IGNORE_SIGHUP_SIGQUIT + sigaddset(&set,SIGQUIT); + sigaddset(&set,SIGHUP); +#endif + sigaddset(&set,SIGTERM); + + /* Fix signals if blocked by parents (can happen on Mac OS X) */ + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = print_signal_warning; + sigaction(SIGTERM, &sa, (struct sigaction*) 0); + sa.sa_flags = 0; + sa.sa_handler = print_signal_warning; + sigaction(SIGHUP, &sa, (struct sigaction*) 0); + sigaddset(&set,THR_SERVER_ALARM); + if (test_flags & TEST_SIGINT) + { + /* Allow SIGINT to break mysqld. This is for debugging with --gdb */ + my_sigset(SIGINT, end_mysqld_signal); + sigdelset(&set, SIGINT); + } + else + { + sigaddset(&set,SIGINT); +#ifdef SIGTSTP + sigaddset(&set,SIGTSTP); +#endif + } + + sigprocmask(SIG_SETMASK,&set,NULL); + pthread_sigmask(SIG_SETMASK,&set,NULL); + DBUG_VOID_RETURN; +} + + +static void start_signal_handler(void) +{ + int error; + pthread_attr_t thr_attr; + DBUG_ENTER("start_signal_handler"); + + (void) pthread_attr_init(&thr_attr); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + (void) my_setstacksize(&thr_attr,my_thread_stack_size); + + mysql_mutex_lock(&LOCK_start_thread); + if (unlikely((error= mysql_thread_create(key_thread_signal_hand, + &signal_thread, &thr_attr, + signal_hand, 0)))) + { + sql_print_error("Can't create interrupt-thread (error %d, errno: %d)", + error,errno); + exit(1); + } + mysql_cond_wait(&COND_start_thread, &LOCK_start_thread); + mysql_mutex_unlock(&LOCK_start_thread); + + (void) pthread_attr_destroy(&thr_attr); + DBUG_VOID_RETURN; +} + + +#if defined(USE_ONE_SIGNAL_HAND) +pthread_handler_t kill_server_thread(void *arg __attribute__((unused))) +{ + my_thread_init(); // Initialize new thread + break_connect_loop(); + my_thread_end(); + pthread_exit(0); + return 0; +} +#endif + + +/** This threads handles all signals and alarms. */ +/* ARGSUSED */ +pthread_handler_t signal_hand(void *arg __attribute__((unused))) +{ + sigset_t set; + int sig; + my_thread_init(); // Init new thread + DBUG_ENTER("signal_hand"); + signal_thread_in_use= 1; + + /* + Setup alarm handler + This should actually be '+ max_number_of_slaves' instead of +10, + but the +10 should be quite safe. + */ + init_thr_alarm(thread_scheduler->max_threads + extra_max_connections + + global_system_variables.max_insert_delayed_threads + 10); + if (test_flags & TEST_SIGINT) + { + /* Allow SIGINT to break mysqld. This is for debugging with --gdb */ + (void) sigemptyset(&set); + (void) sigaddset(&set,SIGINT); + (void) pthread_sigmask(SIG_UNBLOCK,&set,NULL); + } + (void) sigemptyset(&set); // Setup up SIGINT for debug +#ifdef USE_ONE_SIGNAL_HAND + (void) sigaddset(&set,THR_SERVER_ALARM); // For alarms +#endif +#ifndef IGNORE_SIGHUP_SIGQUIT + (void) sigaddset(&set,SIGQUIT); + (void) sigaddset(&set,SIGHUP); +#endif + (void) sigaddset(&set,SIGTERM); + (void) sigaddset(&set,SIGTSTP); + + /* Save pid to this process (or thread on Linux) */ + if (!opt_bootstrap) + create_pid_file(); + + /* + signal to start_signal_handler that we are ready + This works by waiting for start_signal_handler to free mutex, + after which we signal it that we are ready. + At this point there is no other threads running, so there + should not be any other mysql_cond_signal() calls. + */ + mysql_mutex_lock(&LOCK_start_thread); + mysql_cond_broadcast(&COND_start_thread); + mysql_mutex_unlock(&LOCK_start_thread); + + (void) pthread_sigmask(SIG_BLOCK,&set,NULL); + for (;;) + { + int error; + int origin; + + while ((error= my_sigwait(&set, &sig, &origin)) == EINTR) /* no-op */; + if (cleanup_done) + { + DBUG_PRINT("quit",("signal_handler: calling my_thread_end()")); + my_thread_end(); + DBUG_LEAVE; // Must match DBUG_ENTER() + signal_thread_in_use= 0; + pthread_exit(0); // Safety + return 0; // Avoid compiler warnings + } + switch (sig) { + case SIGTERM: + case SIGQUIT: + case SIGKILL: +#ifdef EXTRA_DEBUG + sql_print_information("Got signal %d to shutdown server",sig); +#endif + /* switch to the old log message processing */ + logger.set_handlers(global_system_variables.sql_log_slow ? LOG_FILE:LOG_NONE, + opt_log ? LOG_FILE:LOG_NONE); + DBUG_PRINT("info",("Got signal: %d abort_loop: %d",sig,abort_loop)); + if (!abort_loop) + { + /* Delete the instrumentation for the signal thread */ + PSI_CALL_delete_current_thread(); +#ifdef USE_ONE_SIGNAL_HAND + pthread_t tmp; + if (unlikely((error= mysql_thread_create(0, /* Not instrumented */ + &tmp, &connection_attrib, + kill_server_thread, + (void*) &sig)))) + sql_print_error("Can't create thread to kill server (errno= %d)", + error); +#else + my_sigset(sig, SIG_IGN); + break_connect_loop(); // MIT THREAD has a alarm thread +#endif + } + break; + case SIGHUP: +#if defined(SI_KERNEL) + if (!abort_loop && origin != SI_KERNEL) +#elif defined(SI_USER) + if (!abort_loop && origin <= SI_USER) +#else + if (!abort_loop) +#endif + { + int not_used; + mysql_print_status(); // Print some debug info + reload_acl_and_cache((THD*) 0, + (REFRESH_LOG | REFRESH_TABLES | REFRESH_FAST | + REFRESH_GRANT | + REFRESH_THREADS | REFRESH_HOSTS), + (TABLE_LIST*) 0, ¬_used); // Flush logs + + /* reenable logs after the options were reloaded */ + ulonglong fixed_log_output_options= + log_output_options & LOG_NONE ? LOG_TABLE : log_output_options; + + logger.set_handlers(global_system_variables.sql_log_slow + ? fixed_log_output_options : LOG_NONE, + opt_log ? fixed_log_output_options : LOG_NONE); + } + break; +#ifdef USE_ONE_SIGNAL_HAND + case THR_SERVER_ALARM: + process_alarm(sig); // Trigger alarms. + break; +#endif + default: +#ifdef EXTRA_DEBUG + sql_print_warning("Got signal: %d error: %d",sig,error); /* purecov: tested */ +#endif + break; /* purecov: tested */ + } + } + return(0); /* purecov: deadcode */ +} + +static void check_data_home(const char *path) +{} + +#endif /*!EMBEDDED_LIBRARY*/ +#endif /* _WIN32*/ + + +/** + All global error messages are sent here where the first one is stored + for the client. +*/ +/* ARGSUSED */ +extern "C" void my_message_sql(uint error, const char *str, myf MyFlags); + +void my_message_sql(uint error, const char *str, myf MyFlags) +{ + THD *thd= MyFlags & ME_ERROR_LOG_ONLY ? NULL : current_thd; + Sql_condition::enum_warning_level level; + sql_print_message_func func; + DBUG_ENTER("my_message_sql"); + DBUG_PRINT("error", ("error: %u message: '%s' Flag: %lu", error, str, + MyFlags)); + + DBUG_ASSERT(str != NULL); + DBUG_ASSERT(error != 0); + DBUG_ASSERT((MyFlags & ~(ME_BELL | ME_ERROR_LOG | ME_ERROR_LOG_ONLY | + ME_NOTE | ME_WARNING | ME_FATAL)) == 0); + + if (MyFlags & ME_NOTE) + { + level= Sql_condition::WARN_LEVEL_NOTE; + func= sql_print_information; + } + else if (MyFlags & ME_WARNING) + { + level= Sql_condition::WARN_LEVEL_WARN; + func= sql_print_warning; + } + else + { + level= Sql_condition::WARN_LEVEL_ERROR; + func= sql_print_error; + } + + if (likely(thd)) + { + if (unlikely(MyFlags & ME_FATAL)) + thd->is_fatal_error= 1; + (void) thd->raise_condition(error, "\0\0\0\0\0", level, str); + } + else + mysql_audit_general(0, MYSQL_AUDIT_GENERAL_ERROR, error, str); + + /* When simulating OOM, skip writing to error log to avoid mtr errors */ + DBUG_EXECUTE_IF("simulate_out_of_memory", DBUG_VOID_RETURN;); + + if (unlikely(!thd) || thd->log_all_errors || (MyFlags & ME_ERROR_LOG)) + (*func)("%s: %s", my_progname_short, str); /* purecov: inspected */ + DBUG_VOID_RETURN; +} + + +extern "C" void *my_str_malloc_mysqld(size_t size); + +void *my_str_malloc_mysqld(size_t size) +{ + return my_malloc(key_memory_my_str_malloc, size, MYF(MY_FAE)); +} + + +#if 0 +extern "C" void *my_str_realloc_mysqld(void *ptr, size_t size); +void *my_str_realloc_mysqld(void *ptr, size_t size) +{ + return my_realloc(key_memory_my_str_malloc, ptr, size, MYF(MY_FAE)); +} +#endif + + + + +/** + This function is used to check for stack overrun for pathological + cases of regular expressions and 'like' expressions. +*/ +extern "C" int +check_enough_stack_size_slow() +{ + uchar stack_top; + THD *my_thd= current_thd; + if (my_thd != NULL) + return check_stack_overrun(my_thd, STACK_MIN_SIZE * 2, &stack_top); + return 0; +} + + +/* + The call to current_thd in check_enough_stack_size_slow is quite expensive, + so we try to avoid it for the normal cases. + The size of each stack frame for the wildcmp() routines is ~128 bytes, + so checking *every* recursive call is not necessary. + */ +extern "C" int +check_enough_stack_size(int recurse_level) +{ + if (recurse_level % 16 != 0) + return 0; + return check_enough_stack_size_slow(); +} + + +static void init_libstrings() +{ +#ifndef EMBEDDED_LIBRARY + my_string_stack_guard= check_enough_stack_size; +#endif +} + + +/** + Initialize one of the global date/time format variables. + + @param format_type What kind of format should be supported + @param var_ptr Pointer to variable that should be updated + + @retval + 0 ok + @retval + 1 error +*/ + +static bool init_global_datetime_format(timestamp_type format_type, + DATE_TIME_FORMAT *format) +{ + /* + Get command line option + format->format.str is already set by my_getopt + */ + format->format.length= strlen(format->format.str); + + if (parse_date_time_format(format_type, format)) + { + fprintf(stderr, "Wrong date/time format specifier: %s\n", + format->format.str); + return true; + } + return false; +} + +#define COM_STATUS(X) (void*) offsetof(STATUS_VAR, X), SHOW_LONG_STATUS +#define STMT_STATUS(X) COM_STATUS(com_stat[(uint) X]) + +SHOW_VAR com_status_vars[]= { + {"admin_commands", COM_STATUS(com_other)}, + {"alter_db", STMT_STATUS(SQLCOM_ALTER_DB)}, + {"alter_db_upgrade", STMT_STATUS(SQLCOM_ALTER_DB_UPGRADE)}, + {"alter_event", STMT_STATUS(SQLCOM_ALTER_EVENT)}, + {"alter_function", STMT_STATUS(SQLCOM_ALTER_FUNCTION)}, + {"alter_procedure", STMT_STATUS(SQLCOM_ALTER_PROCEDURE)}, + {"alter_server", STMT_STATUS(SQLCOM_ALTER_SERVER)}, + {"alter_sequence", STMT_STATUS(SQLCOM_ALTER_SEQUENCE)}, + {"alter_table", STMT_STATUS(SQLCOM_ALTER_TABLE)}, + {"alter_user", STMT_STATUS(SQLCOM_ALTER_USER)}, + {"analyze", STMT_STATUS(SQLCOM_ANALYZE)}, + {"assign_to_keycache", STMT_STATUS(SQLCOM_ASSIGN_TO_KEYCACHE)}, + {"backup", STMT_STATUS(SQLCOM_BACKUP)}, + {"backup_lock", STMT_STATUS(SQLCOM_BACKUP_LOCK)}, + {"begin", STMT_STATUS(SQLCOM_BEGIN)}, + {"binlog", STMT_STATUS(SQLCOM_BINLOG_BASE64_EVENT)}, + {"call_procedure", STMT_STATUS(SQLCOM_CALL)}, + {"change_db", STMT_STATUS(SQLCOM_CHANGE_DB)}, + {"change_master", STMT_STATUS(SQLCOM_CHANGE_MASTER)}, + {"check", STMT_STATUS(SQLCOM_CHECK)}, + {"checksum", STMT_STATUS(SQLCOM_CHECKSUM)}, + {"commit", STMT_STATUS(SQLCOM_COMMIT)}, + {"compound_sql", STMT_STATUS(SQLCOM_COMPOUND)}, + {"create_db", STMT_STATUS(SQLCOM_CREATE_DB)}, + {"create_event", STMT_STATUS(SQLCOM_CREATE_EVENT)}, + {"create_function", STMT_STATUS(SQLCOM_CREATE_SPFUNCTION)}, + {"create_index", STMT_STATUS(SQLCOM_CREATE_INDEX)}, + {"create_package", STMT_STATUS(SQLCOM_CREATE_PACKAGE)}, + {"create_package_body", STMT_STATUS(SQLCOM_CREATE_PACKAGE_BODY)}, + {"create_procedure", STMT_STATUS(SQLCOM_CREATE_PROCEDURE)}, + {"create_role", STMT_STATUS(SQLCOM_CREATE_ROLE)}, + {"create_sequence", STMT_STATUS(SQLCOM_CREATE_SEQUENCE)}, + {"create_server", STMT_STATUS(SQLCOM_CREATE_SERVER)}, + {"create_table", STMT_STATUS(SQLCOM_CREATE_TABLE)}, + {"create_temporary_table", COM_STATUS(com_create_tmp_table)}, + {"create_trigger", STMT_STATUS(SQLCOM_CREATE_TRIGGER)}, + {"create_udf", STMT_STATUS(SQLCOM_CREATE_FUNCTION)}, + {"create_user", STMT_STATUS(SQLCOM_CREATE_USER)}, + {"create_view", STMT_STATUS(SQLCOM_CREATE_VIEW)}, + {"dealloc_sql", STMT_STATUS(SQLCOM_DEALLOCATE_PREPARE)}, + {"delete", STMT_STATUS(SQLCOM_DELETE)}, + {"delete_multi", STMT_STATUS(SQLCOM_DELETE_MULTI)}, + {"do", STMT_STATUS(SQLCOM_DO)}, + {"drop_db", STMT_STATUS(SQLCOM_DROP_DB)}, + {"drop_event", STMT_STATUS(SQLCOM_DROP_EVENT)}, + {"drop_function", STMT_STATUS(SQLCOM_DROP_FUNCTION)}, + {"drop_index", STMT_STATUS(SQLCOM_DROP_INDEX)}, + {"drop_procedure", STMT_STATUS(SQLCOM_DROP_PROCEDURE)}, + {"drop_package", STMT_STATUS(SQLCOM_DROP_PACKAGE)}, + {"drop_package_body", STMT_STATUS(SQLCOM_DROP_PACKAGE_BODY)}, + {"drop_role", STMT_STATUS(SQLCOM_DROP_ROLE)}, + {"drop_server", STMT_STATUS(SQLCOM_DROP_SERVER)}, + {"drop_sequence", STMT_STATUS(SQLCOM_DROP_SEQUENCE)}, + {"drop_table", STMT_STATUS(SQLCOM_DROP_TABLE)}, + {"drop_temporary_table", COM_STATUS(com_drop_tmp_table)}, + {"drop_trigger", STMT_STATUS(SQLCOM_DROP_TRIGGER)}, + {"drop_user", STMT_STATUS(SQLCOM_DROP_USER)}, + {"drop_view", STMT_STATUS(SQLCOM_DROP_VIEW)}, + {"empty_query", STMT_STATUS(SQLCOM_EMPTY_QUERY)}, + {"execute_immediate", STMT_STATUS(SQLCOM_EXECUTE_IMMEDIATE)}, + {"execute_sql", STMT_STATUS(SQLCOM_EXECUTE)}, + {"flush", STMT_STATUS(SQLCOM_FLUSH)}, + {"get_diagnostics", STMT_STATUS(SQLCOM_GET_DIAGNOSTICS)}, + {"grant", STMT_STATUS(SQLCOM_GRANT)}, + {"grant_role", STMT_STATUS(SQLCOM_GRANT_ROLE)}, + {"ha_close", STMT_STATUS(SQLCOM_HA_CLOSE)}, + {"ha_open", STMT_STATUS(SQLCOM_HA_OPEN)}, + {"ha_read", STMT_STATUS(SQLCOM_HA_READ)}, + {"help", STMT_STATUS(SQLCOM_HELP)}, + {"insert", STMT_STATUS(SQLCOM_INSERT)}, + {"insert_select", STMT_STATUS(SQLCOM_INSERT_SELECT)}, + {"install_plugin", STMT_STATUS(SQLCOM_INSTALL_PLUGIN)}, + {"kill", STMT_STATUS(SQLCOM_KILL)}, + {"load", STMT_STATUS(SQLCOM_LOAD)}, + {"lock_tables", STMT_STATUS(SQLCOM_LOCK_TABLES)}, + {"optimize", STMT_STATUS(SQLCOM_OPTIMIZE)}, + {"preload_keys", STMT_STATUS(SQLCOM_PRELOAD_KEYS)}, + {"prepare_sql", STMT_STATUS(SQLCOM_PREPARE)}, + {"purge", STMT_STATUS(SQLCOM_PURGE)}, + {"purge_before_date", STMT_STATUS(SQLCOM_PURGE_BEFORE)}, + {"release_savepoint", STMT_STATUS(SQLCOM_RELEASE_SAVEPOINT)}, + {"rename_table", STMT_STATUS(SQLCOM_RENAME_TABLE)}, + {"rename_user", STMT_STATUS(SQLCOM_RENAME_USER)}, + {"repair", STMT_STATUS(SQLCOM_REPAIR)}, + {"replace", STMT_STATUS(SQLCOM_REPLACE)}, + {"replace_select", STMT_STATUS(SQLCOM_REPLACE_SELECT)}, + {"reset", STMT_STATUS(SQLCOM_RESET)}, + {"resignal", STMT_STATUS(SQLCOM_RESIGNAL)}, + {"revoke", STMT_STATUS(SQLCOM_REVOKE)}, + {"revoke_all", STMT_STATUS(SQLCOM_REVOKE_ALL)}, + {"revoke_role", STMT_STATUS(SQLCOM_REVOKE_ROLE)}, + {"rollback", STMT_STATUS(SQLCOM_ROLLBACK)}, + {"rollback_to_savepoint",STMT_STATUS(SQLCOM_ROLLBACK_TO_SAVEPOINT)}, + {"savepoint", STMT_STATUS(SQLCOM_SAVEPOINT)}, + {"select", STMT_STATUS(SQLCOM_SELECT)}, + {"set_option", STMT_STATUS(SQLCOM_SET_OPTION)}, + {"show_authors", STMT_STATUS(SQLCOM_SHOW_AUTHORS)}, + {"show_binlog_events", STMT_STATUS(SQLCOM_SHOW_BINLOG_EVENTS)}, + {"show_binlogs", STMT_STATUS(SQLCOM_SHOW_BINLOGS)}, + {"show_charsets", STMT_STATUS(SQLCOM_SHOW_CHARSETS)}, + {"show_collations", STMT_STATUS(SQLCOM_SHOW_COLLATIONS)}, + {"show_contributors", STMT_STATUS(SQLCOM_SHOW_CONTRIBUTORS)}, + {"show_create_db", STMT_STATUS(SQLCOM_SHOW_CREATE_DB)}, + {"show_create_event", STMT_STATUS(SQLCOM_SHOW_CREATE_EVENT)}, + {"show_create_func", STMT_STATUS(SQLCOM_SHOW_CREATE_FUNC)}, + {"show_create_package", STMT_STATUS(SQLCOM_SHOW_CREATE_PACKAGE)}, + {"show_create_package_body",STMT_STATUS(SQLCOM_SHOW_CREATE_PACKAGE_BODY)}, + {"show_create_proc", STMT_STATUS(SQLCOM_SHOW_CREATE_PROC)}, + {"show_create_table", STMT_STATUS(SQLCOM_SHOW_CREATE)}, + {"show_create_trigger", STMT_STATUS(SQLCOM_SHOW_CREATE_TRIGGER)}, + {"show_create_user", STMT_STATUS(SQLCOM_SHOW_CREATE_USER)}, + {"show_databases", STMT_STATUS(SQLCOM_SHOW_DATABASES)}, + {"show_engine_logs", STMT_STATUS(SQLCOM_SHOW_ENGINE_LOGS)}, + {"show_engine_mutex", STMT_STATUS(SQLCOM_SHOW_ENGINE_MUTEX)}, + {"show_engine_status", STMT_STATUS(SQLCOM_SHOW_ENGINE_STATUS)}, + {"show_errors", STMT_STATUS(SQLCOM_SHOW_ERRORS)}, + {"show_events", STMT_STATUS(SQLCOM_SHOW_EVENTS)}, + {"show_explain", STMT_STATUS(SQLCOM_SHOW_EXPLAIN)}, + {"show_analyze", STMT_STATUS(SQLCOM_SHOW_ANALYZE)}, + {"show_fields", STMT_STATUS(SQLCOM_SHOW_FIELDS)}, +#ifndef DBUG_OFF + {"show_function_code", STMT_STATUS(SQLCOM_SHOW_FUNC_CODE)}, +#endif + {"show_function_status", STMT_STATUS(SQLCOM_SHOW_STATUS_FUNC)}, + {"show_generic", STMT_STATUS(SQLCOM_SHOW_GENERIC)}, + {"show_grants", STMT_STATUS(SQLCOM_SHOW_GRANTS)}, + {"show_keys", STMT_STATUS(SQLCOM_SHOW_KEYS)}, + {"show_binlog_status", STMT_STATUS(SQLCOM_SHOW_BINLOG_STAT)}, + {"show_open_tables", STMT_STATUS(SQLCOM_SHOW_OPEN_TABLES)}, + {"show_package_status", STMT_STATUS(SQLCOM_SHOW_STATUS_PACKAGE)}, +#ifndef DBUG_OFF + {"show_package_body_code", STMT_STATUS(SQLCOM_SHOW_PACKAGE_BODY_CODE)}, +#endif + {"show_package_body_status", STMT_STATUS(SQLCOM_SHOW_STATUS_PACKAGE_BODY)}, + {"show_plugins", STMT_STATUS(SQLCOM_SHOW_PLUGINS)}, + {"show_privileges", STMT_STATUS(SQLCOM_SHOW_PRIVILEGES)}, +#ifndef DBUG_OFF + {"show_procedure_code", STMT_STATUS(SQLCOM_SHOW_PROC_CODE)}, +#endif + {"show_procedure_status",STMT_STATUS(SQLCOM_SHOW_STATUS_PROC)}, + {"show_processlist", STMT_STATUS(SQLCOM_SHOW_PROCESSLIST)}, + {"show_profile", STMT_STATUS(SQLCOM_SHOW_PROFILE)}, + {"show_profiles", STMT_STATUS(SQLCOM_SHOW_PROFILES)}, + {"show_relaylog_events", STMT_STATUS(SQLCOM_SHOW_RELAYLOG_EVENTS)}, + {"show_slave_hosts", STMT_STATUS(SQLCOM_SHOW_SLAVE_HOSTS)}, + {"show_slave_status", STMT_STATUS(SQLCOM_SHOW_SLAVE_STAT)}, + {"show_status", STMT_STATUS(SQLCOM_SHOW_STATUS)}, + {"show_storage_engines", STMT_STATUS(SQLCOM_SHOW_STORAGE_ENGINES)}, + {"show_table_status", STMT_STATUS(SQLCOM_SHOW_TABLE_STATUS)}, + {"show_tables", STMT_STATUS(SQLCOM_SHOW_TABLES)}, + {"show_triggers", STMT_STATUS(SQLCOM_SHOW_TRIGGERS)}, + {"show_variables", STMT_STATUS(SQLCOM_SHOW_VARIABLES)}, + {"show_warnings", STMT_STATUS(SQLCOM_SHOW_WARNS)}, + {"shutdown", STMT_STATUS(SQLCOM_SHUTDOWN)}, + {"signal", STMT_STATUS(SQLCOM_SIGNAL)}, + {"start_all_slaves", STMT_STATUS(SQLCOM_SLAVE_ALL_START)}, + {"start_slave", STMT_STATUS(SQLCOM_SLAVE_START)}, + {"stmt_close", COM_STATUS(com_stmt_close)}, + {"stmt_execute", COM_STATUS(com_stmt_execute)}, + {"stmt_fetch", COM_STATUS(com_stmt_fetch)}, + {"stmt_prepare", COM_STATUS(com_stmt_prepare)}, + {"stmt_reprepare", COM_STATUS(com_stmt_reprepare)}, + {"stmt_reset", COM_STATUS(com_stmt_reset)}, + {"stmt_send_long_data", COM_STATUS(com_stmt_send_long_data)}, + {"stop_all_slaves", STMT_STATUS(SQLCOM_SLAVE_ALL_STOP)}, + {"stop_slave", STMT_STATUS(SQLCOM_SLAVE_STOP)}, + {"truncate", STMT_STATUS(SQLCOM_TRUNCATE)}, + {"uninstall_plugin", STMT_STATUS(SQLCOM_UNINSTALL_PLUGIN)}, + {"unlock_tables", STMT_STATUS(SQLCOM_UNLOCK_TABLES)}, + {"update", STMT_STATUS(SQLCOM_UPDATE)}, + {"update_multi", STMT_STATUS(SQLCOM_UPDATE_MULTI)}, + {"xa_commit", STMT_STATUS(SQLCOM_XA_COMMIT)}, + {"xa_end", STMT_STATUS(SQLCOM_XA_END)}, + {"xa_prepare", STMT_STATUS(SQLCOM_XA_PREPARE)}, + {"xa_recover", STMT_STATUS(SQLCOM_XA_RECOVER)}, + {"xa_rollback", STMT_STATUS(SQLCOM_XA_ROLLBACK)}, + {"xa_start", STMT_STATUS(SQLCOM_XA_START)}, + {NullS, NullS, SHOW_LONG} +}; + + +#ifdef HAVE_PSI_STATEMENT_INTERFACE +PSI_statement_info sql_statement_info[(uint) SQLCOM_END + 1]; +PSI_statement_info com_statement_info[(uint) COM_END + 1]; + +/** + Initialize the command names array. + Since we do not want to maintain a separate array, + this is populated from data mined in com_status_vars, + which already has one name for each command. +*/ +void init_sql_statement_info() +{ + size_t first_com= offsetof(STATUS_VAR, com_stat[0]); + size_t last_com= offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_END]); + int record_size= offsetof(STATUS_VAR, com_stat[1]) + - offsetof(STATUS_VAR, com_stat[0]); + size_t ptr; + uint i; + uint com_index; + + static const char* dummy= ""; + for (i= 0; i < ((uint) SQLCOM_END + 1); i++) + { + sql_statement_info[i].m_name= dummy; + sql_statement_info[i].m_flags= 0; + } + + SHOW_VAR *var= &com_status_vars[0]; + while (var->name != NULL) + { + ptr= (size_t)(var->value); + if ((first_com <= ptr) && (ptr < last_com)) + { + com_index= ((int)(ptr - first_com))/record_size; + DBUG_ASSERT(com_index < (uint) SQLCOM_END); + sql_statement_info[com_index].m_name= var->name; + } + var++; + } + + DBUG_ASSERT(strcmp(sql_statement_info[(uint) SQLCOM_SELECT].m_name, "select") == 0); + DBUG_ASSERT(strcmp(sql_statement_info[(uint) SQLCOM_SIGNAL].m_name, "signal") == 0); + + sql_statement_info[(uint) SQLCOM_END].m_name= "error"; +} + +void init_com_statement_info() +{ + uint index; + + for (index= 0; index < (uint) COM_END + 1; index++) + { + com_statement_info[index].m_name= command_name[index].str; + com_statement_info[index].m_flags= 0; + } + + /* "statement/abstract/query" can mutate into "statement/sql/..." */ + com_statement_info[(uint) COM_QUERY].m_flags= PSI_FLAG_MUTABLE; +} +#endif + + +#ifdef SAFEMALLOC +/* + Return the id for the current THD, to allow safemalloc to associate + the memory with the right id. +*/ + +extern "C" my_thread_id mariadb_dbug_id() +{ + THD *thd; + if ((thd= current_thd) && thd->thread_dbug_id) + { + return thd->thread_dbug_id; + } + return my_thread_dbug_id(); +} +#endif /* SAFEMALLOC */ + +/* Thread Mem Usage By P.Linux */ +extern "C" { +static void my_malloc_size_cb_func(long long size, my_bool is_thread_specific) +{ + THD *thd= current_thd; + +#ifndef DBUG_OFF + statistic_increment(malloc_calls, &LOCK_status); +#endif + + /* + When thread specific is set, both mysqld_server_initialized and thd + must be set, and we check that with DBUG_ASSERT. + + However, do not crash, if current_thd is NULL, in release version. + */ + DBUG_ASSERT(!is_thread_specific || (mysqld_server_initialized && thd)); + + if (is_thread_specific && likely(thd)) /* If thread specific memory */ + { + DBUG_PRINT("info", ("thd memory_used: %lld size: %lld", + (longlong) thd->status_var.local_memory_used, + size)); + thd->status_var.local_memory_used+= size; + set_if_bigger(thd->status_var.max_local_memory_used, + thd->status_var.local_memory_used); + if (size > 0 && + thd->status_var.local_memory_used > (int64)thd->variables.max_mem_used && + likely(!thd->killed) && !thd->get_stmt_da()->is_set()) + { + /* Ensure we don't get called here again */ + char buf[50], *buf2; + thd->set_killed(KILL_QUERY); + my_snprintf(buf, sizeof(buf), "--max-session-mem-used=%llu", + thd->variables.max_mem_used); + if ((buf2= (char*) thd->alloc(256))) + { + my_snprintf(buf2, 256, ER_THD(thd, ER_OPTION_PREVENTS_STATEMENT), buf); + thd->set_killed(KILL_QUERY, ER_OPTION_PREVENTS_STATEMENT, buf2); + } + else + { + thd->set_killed(KILL_QUERY, ER_OPTION_PREVENTS_STATEMENT, + "--max-session-mem-used"); + } + } + DBUG_ASSERT((longlong) thd->status_var.local_memory_used >= 0 || + !debug_assert_on_not_freed_memory); + } + else if (likely(thd)) + { + DBUG_PRINT("info", ("global thd memory_used: %lld size: %lld", + (longlong) thd->status_var.global_memory_used, size)); + thd->status_var.global_memory_used+= size; + } + else + update_global_memory_status(size); +} + +int json_escape_string(const char *str,const char *str_end, + char *json, char *json_end) +{ + return json_escape(system_charset_info, + (const uchar *) str, (const uchar *) str_end, + &my_charset_utf8mb4_bin, + (uchar *) json, (uchar *) json_end); +} + + +int json_unescape_json(const char *json_str, const char *json_end, + char *res, char *res_end) +{ + return json_unescape(&my_charset_utf8mb4_bin, + (const uchar *) json_str, (const uchar *) json_end, + system_charset_info, (uchar *) res, (uchar *) res_end); +} + +} /*extern "C"*/ + + +/** + Create a replication file name or base for file names. + + @param[in] opt Value of option, or NULL + @param[in] def Default value if option value is not set. + @param[in] ext Extension to use for the path + + @returns Pointer to string containing the full file path, or NULL if + it was not possible to create the path. + */ +static const char *rpl_make_log_name(PSI_memory_key key, const char *opt, + const char *def, const char *ext) +{ + DBUG_ENTER("rpl_make_log_name"); + DBUG_PRINT("enter", ("opt: %s, def: %s, ext: %s", opt ? opt : "(null)", + def, ext)); + char buff[FN_REFLEN]; + const char *base= opt ? opt : def; + unsigned int options= + MY_REPLACE_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH; + + /* mysql_real_data_home_ptr may be null if no value of datadir has been + specified through command-line or througha cnf file. If that is the + case we make mysql_real_data_home_ptr point to mysql_real_data_home + which, in that case holds the default path for data-dir. + */ + if(mysql_real_data_home_ptr == NULL) + mysql_real_data_home_ptr= mysql_real_data_home; + + if (fn_format(buff, base, mysql_real_data_home_ptr, ext, options)) + DBUG_RETURN(my_strdup(key, buff, MYF(MY_WME))); + else + DBUG_RETURN(NULL); +} + +/* We have to setup my_malloc_size_cb_func early to catch all mallocs */ + +static int init_early_variables() +{ + set_current_thd(0); + set_malloc_size_cb(my_malloc_size_cb_func); + global_status_var.global_memory_used= 0; + init_alloc_root(PSI_NOT_INSTRUMENTED, &startup_root, 1024, 0, MYF(0)); + init_alloc_root(PSI_NOT_INSTRUMENTED, &read_only_root, 1024, 0, + MYF(MY_ROOT_USE_MPROTECT)); + return 0; +} + +#ifdef _WIN32 +static void get_win_tzname(char* buf, size_t size) +{ + static struct + { + const wchar_t* windows_name; + const char* tzdb_name; + } + tz_data[] = + { +#include "win_tzname_data.h" + {0,0} + }; + DYNAMIC_TIME_ZONE_INFORMATION tzinfo; + if (GetDynamicTimeZoneInformation(&tzinfo) == TIME_ZONE_ID_INVALID) + { + strncpy(buf, "unknown", size); + return; + } + + for (size_t i= 0; tz_data[i].windows_name; i++) + { + if (wcscmp(tzinfo.TimeZoneKeyName, tz_data[i].windows_name) == 0) + { + strncpy(buf, tz_data[i].tzdb_name, size); + return; + } + } + wcstombs(buf, tzinfo.TimeZoneKeyName, size); + buf[size-1]= 0; + return; +} +#endif + +static int init_common_variables() +{ + umask(((~my_umask) & 0666)); + connection_errors_select= 0; + connection_errors_accept= 0; + connection_errors_tcpwrap= 0; + connection_errors_internal= 0; + connection_errors_max_connection= 0; + connection_errors_peer_addr= 0; + my_decimal_set_zero(&decimal_zero); // set decimal_zero constant; + + init_libstrings(); + tzset(); // Set tzname + +#ifdef SAFEMALLOC + sf_malloc_dbug_id= mariadb_dbug_id; +#endif /* SAFEMALLOC */ +#ifdef DBUG_ASSERT_AS_PRINTF + my_dbug_assert_failed= mariadb_dbug_assert_failed; +#endif /* DBUG_ASSERT_AS_PRINTF */ + + if (!(type_handler_data= new Type_handler_data) || + type_handler_data->init()) + { + sql_perror("Could not allocate type_handler_data"); + return 1; + } + + max_system_variables.pseudo_thread_id= ~(my_thread_id) 0; + server_start_time= flush_status_time= my_time(0); + my_disable_copystat_in_redel= 1; + + global_rpl_filter= new Rpl_filter; + binlog_filter= new Rpl_filter; + if (!global_rpl_filter || !binlog_filter) + { + sql_perror("Could not allocate replication and binlog filters"); + exit(1); + } + +#ifdef HAVE_OPENSSL + if (check_openssl_compatibility()) + { + sql_print_error("Incompatible OpenSSL version. Cannot continue..."); + exit(1); + } +#endif + + if (init_thread_environment() || mysql_init_variables()) + exit(1); + + if (ignore_db_dirs_init()) + exit(1); + + struct tm tm_tmp; + localtime_r(&server_start_time, &tm_tmp); + +#ifdef HAVE_TZNAME +#ifdef _WIN32 + /* + If env.variable TZ is set, derive timezone name from it. + Otherwise, use IANA tz name from get_win_tzname. + */ + if (!getenv("TZ")) + get_win_tzname(system_time_zone, sizeof(system_time_zone)); + else +#endif + { + const char *tz_name= tzname[tm_tmp.tm_isdst != 0 ? 1 : 0]; + strmake_buf(system_time_zone, tz_name); + } +#endif + + /* + We set SYSTEM time zone as reasonable default and + also for failure of my_tz_init() and bootstrap mode. + If user explicitly set time zone with --default-time-zone + option we will change this value in my_tz_init(). + */ + global_system_variables.time_zone= my_tz_SYSTEM; + +#ifdef HAVE_PSI_INTERFACE + /* + Complete the mysql_bin_log initialization. + Instrumentation keys are known only after the performance schema + initialization, and can not be set in the MYSQL_BIN_LOG + constructor (called before main()). + */ + mysql_bin_log.set_psi_keys(key_BINLOG_LOCK_index, + key_BINLOG_COND_relay_log_updated, + key_BINLOG_COND_bin_log_updated, + key_file_binlog, + key_file_binlog_cache, + key_file_binlog_index, + key_file_binlog_index_cache, + key_BINLOG_COND_queue_busy, + key_LOCK_binlog_end_pos); +#endif + + /* + Init mutexes for the global MYSQL_BIN_LOG objects. + As safe_mutex depends on what MY_INIT() does, we can't init the mutexes of + global MYSQL_BIN_LOGs in their constructors, because then they would be + inited before MY_INIT(). So we do it here. + */ + mysql_bin_log.init_pthread_objects(); + + /* TODO: remove this when my_time_t is 64 bit compatible */ + if (!IS_TIME_T_VALID_FOR_TIMESTAMP(server_start_time)) + { + sql_print_error("This server doesn't support dates later than 2038"); + exit(1); + } + + opt_log_basename= const_cast("mysql"); + + if (gethostname(glob_hostname,sizeof(glob_hostname)) < 0) + { + /* + Get hostname of computer (used by 'show variables') and as default + basename for the pid file if --log-basename is not given. + */ + strmake(glob_hostname, STRING_WITH_LEN("localhost")); + sql_print_warning("gethostname failed, using '%s' as hostname", + glob_hostname); + } + else if (is_filename_allowed(glob_hostname, strlen(glob_hostname), FALSE)) + opt_log_basename= glob_hostname; + + strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5); + strmov(fn_ext(pidfile_name),".pid"); // Add proper extension + SYSVAR_AUTOSIZE(pidfile_name_ptr, pidfile_name); + set_sys_var_value_origin(&opt_tc_log_size, sys_var::AUTO); + + /* + The default-storage-engine entry in my_long_options should have a + non-null default value. It was earlier intialized as + (longlong)"MyISAM" in my_long_options but this triggered a + compiler error in the Sun Studio 12 compiler. As a work-around we + set the def_value member to 0 in my_long_options and initialize it + to the correct value here. + + From MySQL 5.5 onwards, the default storage engine is InnoDB + (except in the embedded server, where the default continues to + be MyISAM) + */ +#if defined(WITH_INNOBASE_STORAGE_ENGINE) + default_storage_engine= const_cast("InnoDB"); +#else + default_storage_engine= const_cast("MyISAM"); +#endif + default_tmp_storage_engine= NULL; + gtid_pos_auto_engines= const_cast(""); + + /* + Add server status variables to the dynamic list of + status variables that is shown by SHOW STATUS. + Later, in plugin_init, and mysql_install_plugin + new entries could be added to that list. + */ + if (add_status_vars(status_vars)) + exit(1); // an error was already reported + +#ifndef DBUG_OFF + /* + We have few debug-only commands in com_status_vars, only visible in debug + builds. for simplicity we enable the assert only in debug builds + + There are 10 Com_ variables which don't have corresponding SQLCOM_ values: + (TODO strictly speaking they shouldn't be here, should not have Com_ prefix + that is. Perhaps Stmt_ ? Comstmt_ ? Prepstmt_ ?) + + Com_admin_commands => com_other + Com_create_temporary_table => com_create_tmp_table + Com_drop_temporary_table => com_drop_tmp_table + Com_stmt_close => com_stmt_close + Com_stmt_execute => com_stmt_execute + Com_stmt_fetch => com_stmt_fetch + Com_stmt_prepare => com_stmt_prepare + Com_stmt_reprepare => com_stmt_reprepare + Com_stmt_reset => com_stmt_reset + Com_stmt_send_long_data => com_stmt_send_long_data + + With this correction the number of Com_ variables (number of elements in + the array, excluding the last element - terminator) must match the number + of SQLCOM_ constants. + */ + compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 == + SQLCOM_END + 10); +#endif + + if (get_options(&remaining_argc, &remaining_argv)) + exit(1); + if (IS_SYSVAR_AUTOSIZE(&server_version_ptr)) + set_server_version(server_version, sizeof(server_version)); + + mysql_real_data_home_len= uint(strlen(mysql_real_data_home)); + + sf_leaking_memory= 0; // no memory leaks from now on + +#ifndef EMBEDDED_LIBRARY + if (opt_abort && !opt_verbose) + unireg_abort(0); +#endif /*!EMBEDDED_LIBRARY*/ + + DBUG_PRINT("info",("%s Ver %s for %s on %s\n",my_progname, + server_version, SYSTEM_TYPE,MACHINE_TYPE)); + + /* Initialize large page size */ + if (opt_large_pages) + { + DBUG_PRINT("info", ("Large page set")); + if (my_init_large_pages(opt_super_large_pages)) + { + return 1; + } + } + +#if defined(HAVE_POOL_OF_THREADS) + if (IS_SYSVAR_AUTOSIZE(&threadpool_size)) + SYSVAR_AUTOSIZE(threadpool_size, my_getncpus()); +#endif + + /* connections and databases needs lots of files */ + { + uint files, wanted_files, max_open_files, min_tc_size, extra_files, + min_connections; + ulong org_max_connections, org_tc_size; + + /* Number of files reserved for temporary files */ + extra_files= 30; + min_connections= 10; + /* MyISAM requires two file handles per table. */ + wanted_files= (extra_files + max_connections + extra_max_connections + + tc_size * 2 * tc_instances); +#if defined(HAVE_POOL_OF_THREADS) && !defined(_WIN32) + // add epoll or kevent fd for each threadpool group, in case pool of threads is used + wanted_files+= (thread_handling > SCHEDULER_NO_THREADS) ? 0 : threadpool_size; +#endif + + min_tc_size= MY_MIN(tc_size, TABLE_OPEN_CACHE_MIN); + org_max_connections= max_connections; + org_tc_size= tc_size; + + /* + We are trying to allocate no less than max_connections*5 file + handles (i.e. we are trying to set the limit so that they will + be available). In addition, we allocate no less than how much + was already allocated. However below we report a warning and + recompute values only if we got less file handles than were + explicitly requested. No warning and re-computation occur if we + can't get max_connections*5 but still got no less than was + requested (value of wanted_files). + */ + max_open_files= MY_MAX(MY_MAX(wanted_files, + (max_connections + extra_max_connections)*5), + open_files_limit); + files= my_set_max_open_files(max_open_files); + SYSVAR_AUTOSIZE_IF_CHANGED(open_files_limit, files, ulong); + + if (files < max_open_files && global_system_variables.log_warnings) + sql_print_warning("Could not increase number of max_open_files to more than %u (request: %u)", files, max_open_files); + + /* If we required too much tc_instances than we reduce */ + SYSVAR_AUTOSIZE_IF_CHANGED(tc_instances, + (uint32) MY_MIN(MY_MAX((files - extra_files - + max_connections)/ + 2/tc_size, + 1), + tc_instances), + uint32); + /* + If we have requested too much file handles than we bring + max_connections in supported bounds. Still leave at least + 'min_connections' connections + */ + SYSVAR_AUTOSIZE_IF_CHANGED(max_connections, + (ulong) MY_MAX(MY_MIN(files- extra_files- + min_tc_size*2*tc_instances, + max_connections), + min_connections), + ulong); + + /* + Decrease tc_size according to max_connections, but + not below min_tc_size. Outer MY_MIN() ensures that we + never increase tc_size automatically (that could + happen if max_connections is decreased above). + */ + SYSVAR_AUTOSIZE_IF_CHANGED(tc_size, + (ulong) MY_MIN(MY_MAX((files - extra_files - + max_connections) / 2 / tc_instances, + min_tc_size), + tc_size), ulong); + DBUG_PRINT("warning", + ("Current limits: max_open_files: %u max_connections: %ld table_cache: %ld", + files, max_connections, tc_size)); + if (global_system_variables.log_warnings > 1 && + (max_connections < org_max_connections || + tc_size < org_tc_size)) + sql_print_warning("Changed limits: max_open_files: %u max_connections: %lu (was %lu) table_cache: %lu (was %lu)", + files, max_connections, org_max_connections, + tc_size, org_tc_size); + } + /* + Max_connections and tc_cache are now set. + Now we can fix other variables depending on this variable. + */ + + /* Fix host_cache_size */ + if (IS_SYSVAR_AUTOSIZE(&host_cache_size)) + { + /* + The default value is 128. + The autoset value is 128, plus 1 for a value of max_connections + up to 500, plus 1 for every increment of 20 over 500 in the + max_connections value, capped at 2000. + */ + uint size= (HOST_CACHE_SIZE + MY_MIN(max_connections, 500) + + MY_MAX(((long) max_connections)-500,0)/20); + SYSVAR_AUTOSIZE(host_cache_size, size); + } + + /* Fix back_log (back_log == 0 added for MySQL compatibility) */ + if (back_log == 0 || IS_SYSVAR_AUTOSIZE(&back_log)) + { + /* + The default value is 150. + The autoset value is 50 + max_connections / 5 capped at 900 + */ + SYSVAR_AUTOSIZE(back_log, MY_MIN(900, (50 + max_connections / 5))); + } + + unireg_init(opt_specialflag); /* Set up extern variabels */ + if (!(my_default_lc_messages= + my_locale_by_name(lc_messages))) + { + sql_print_error("Unknown locale: '%s'", lc_messages); + return 1; + } + + if (init_errmessage()) /* Read error messages from file */ + return 1; + global_system_variables.lc_messages= my_default_lc_messages; + global_system_variables.errmsgs= my_default_lc_messages->errmsgs->errmsgs; + init_client_errs(); + mysql_library_init(unused,unused,unused); /* for replication */ + lex_init(); + if (item_create_init()) + return 1; + item_init(); + /* + Process a comma-separated character set list and choose + the first available character set. This is mostly for + test purposes, to be able to start "mysqld" even if + the requested character set is not available (see bug#18743). + */ + myf utf8_flag= global_system_variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? MY_UTF8_IS_UTF8MB3 : 0; + for (;;) + { + char *next_character_set_name= strchr(default_character_set_name, ','); + if (next_character_set_name) + *next_character_set_name++= '\0'; + if (!(default_charset_info= + get_charset_by_csname(default_character_set_name, + MY_CS_PRIMARY, MYF(utf8_flag | MY_WME)))) + { + if (next_character_set_name) + { + default_character_set_name= next_character_set_name; + default_collation_name= 0; // Ignore collation + } + else + return 1; // Eof of the list + } + else + break; + } + + if (default_collation_name) + { + CHARSET_INFO *default_collation; + default_collation= get_charset_by_name(default_collation_name, MYF(utf8_flag)); + if (!default_collation) + { +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + buffered_logs.print(); + buffered_logs.cleanup(); +#endif + sql_print_error(ER_DEFAULT(ER_UNKNOWN_COLLATION), default_collation_name); + return 1; + } + if (!my_charset_same(default_charset_info, default_collation)) + { + sql_print_error(ER_DEFAULT(ER_COLLATION_CHARSET_MISMATCH), + default_collation_name, + default_charset_info->cs_name.str); + return 1; + } + default_charset_info= default_collation; + } + /* Set collactions that depends on the default collation */ + global_system_variables.collation_server= default_charset_info; + global_system_variables.collation_database= default_charset_info; + if (is_supported_parser_charset(default_charset_info)) + { + global_system_variables.collation_connection= default_charset_info; + global_system_variables.character_set_results= default_charset_info; + global_system_variables.character_set_client= default_charset_info; + } + else + { + sql_print_warning("'%s' can not be used as client character set. " + "'%s' will be used as default client character set.", + default_charset_info->cs_name.str, + my_charset_latin1.cs_name.str); + global_system_variables.collation_connection= &my_charset_latin1; + global_system_variables.character_set_results= &my_charset_latin1; + global_system_variables.character_set_client= &my_charset_latin1; + } + + if (!(character_set_filesystem= + get_charset_by_csname(character_set_filesystem_name, + MY_CS_PRIMARY, MYF(utf8_flag | MY_WME)))) + return 1; + global_system_variables.character_set_filesystem= character_set_filesystem; + + if (!(my_default_lc_time_names= + my_locale_by_name(lc_time_names_name))) + { + sql_print_error("Unknown locale: '%s'", lc_time_names_name); + return 1; + } + global_system_variables.lc_time_names= my_default_lc_time_names; + + /* check log options and issue warnings if needed */ + if (opt_log && opt_logname && *opt_logname && + !(log_output_options & (LOG_FILE | LOG_NONE))) + sql_print_warning("Although a general log file was specified, " + "log tables are used. " + "To enable logging to files use the --log-output option."); + + if (global_system_variables.sql_log_slow && opt_slow_logname && + *opt_slow_logname && + !(log_output_options & (LOG_FILE | LOG_NONE))) + sql_print_warning("Although a slow query log file was specified, " + "log tables are used. " + "To enable logging to files use the --log-output=file option."); + + if (!opt_logname || !*opt_logname) + make_default_log_name(&opt_logname, ".log", false); + if (!opt_slow_logname || !*opt_slow_logname) + make_default_log_name(&opt_slow_logname, "-slow.log", false); + +#if defined(ENABLED_DEBUG_SYNC) + /* Initialize the debug sync facility. See debug_sync.cc. */ + if (debug_sync_init()) + return 1; /* purecov: tested */ +#endif /* defined(ENABLED_DEBUG_SYNC) */ + +#if (ENABLE_TEMP_POOL) + if (use_temp_pool && my_bitmap_init(&temp_pool,0,1024)) + return 1; +#else + use_temp_pool= 0; +#endif + + if (my_dboptions_cache_init()) + return 1; + + /* + Ensure that lower_case_table_names is set on system where we have case + insensitive names. If this is not done the users MyISAM tables will + get corrupted if accesses with names of different case. + */ + DBUG_PRINT("info", ("lower_case_table_names: %d", lower_case_table_names)); + if(mysql_real_data_home_ptr == NULL || *mysql_real_data_home_ptr == 0) + mysql_real_data_home_ptr= mysql_real_data_home; + SYSVAR_AUTOSIZE(lower_case_file_system, + test_if_case_insensitive(mysql_real_data_home_ptr)); + if (!lower_case_table_names && lower_case_file_system == 1) + { + if (lower_case_table_names_used) + { + sql_print_error("The server option 'lower_case_table_names' is " + "configured to use case sensitive table names but the " + "data directory resides on a case-insensitive file system. " + "Please use a case sensitive file system for your data " + "directory or switch to a case-insensitive table name " + "mode."); + return 1; + } + else + { + if (global_system_variables.log_warnings) + sql_print_warning("Setting lower_case_table_names=2 because file " + "system for %s is case insensitive", mysql_real_data_home_ptr); + SYSVAR_AUTOSIZE(lower_case_table_names, 2); + } + } + else if (lower_case_table_names == 2 && + !(lower_case_file_system= (lower_case_file_system == 1))) + { + if (global_system_variables.log_warnings) + sql_print_warning("lower_case_table_names was set to 2, even though your " + "the file system '%s' is case sensitive. Now setting " + "lower_case_table_names to 0 to avoid future problems.", + mysql_real_data_home_ptr); + SYSVAR_AUTOSIZE(lower_case_table_names, 0); + } + else + { + lower_case_file_system= (lower_case_file_system == 1); + } + + /* Reset table_alias_charset, now that lower_case_table_names is set. */ + table_alias_charset= (lower_case_table_names ? + files_charset_info : + &my_charset_bin); + + if (ignore_db_dirs_process_additions()) + { + sql_print_error("An error occurred while storing ignore_db_dirs to a hash."); + return 1; + } + + if (tls_version & (VIO_TLSv1_0 + VIO_TLSv1_1)) + sql_print_warning("TLSv1.0 and TLSv1.1 are insecure and should not be used for tls_version"); + +#ifdef WITH_WSREP + /* + We need to initialize auxiliary variables, that will be + further keep the original values of auto-increment options + as they set by the user. These variables used to restore + user-defined values of the auto-increment options after + setting of the wsrep_auto_increment_control to 'OFF'. + */ + global_system_variables.saved_auto_increment_increment= + global_system_variables.auto_increment_increment; + global_system_variables.saved_auto_increment_offset= + global_system_variables.auto_increment_offset; +#endif /* WITH_WSREP */ + + return 0; +} + + +static int init_thread_environment() +{ + DBUG_ENTER("init_thread_environment"); + server_threads.init(); + mysql_mutex_init(key_LOCK_start_thread, &LOCK_start_thread, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_status, &LOCK_status, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_delayed_insert, + &LOCK_delayed_insert, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_delayed_status, + &LOCK_delayed_status, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_delayed_create, + &LOCK_delayed_create, MY_MUTEX_INIT_SLOW); + mysql_mutex_init(key_LOCK_crypt, &LOCK_crypt, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_user_conn, &LOCK_user_conn, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_active_mi, &LOCK_active_mi, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_global_system_variables, + &LOCK_global_system_variables, MY_MUTEX_INIT_FAST); + mysql_mutex_record_order(&LOCK_active_mi, &LOCK_global_system_variables); + mysql_prlock_init(key_rwlock_LOCK_system_variables_hash, + &LOCK_system_variables_hash); + mysql_mutex_init(key_LOCK_prepared_stmt_count, + &LOCK_prepared_stmt_count, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_error_messages, + &LOCK_error_messages, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_uuid_short_generator, + &LOCK_short_uuid_generator, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_thread_id, + &LOCK_thread_id, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_stats, &LOCK_stats, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_global_user_client_stats, + &LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_global_table_stats, + &LOCK_global_table_stats, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_global_index_stats, + &LOCK_global_index_stats, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_prepare_ordered, &LOCK_prepare_ordered, + MY_MUTEX_INIT_SLOW); + mysql_cond_init(key_COND_prepare_ordered, &COND_prepare_ordered, NULL); + mysql_mutex_init(key_LOCK_after_binlog_sync, &LOCK_after_binlog_sync, + MY_MUTEX_INIT_SLOW); + mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered, + MY_MUTEX_INIT_SLOW); + mysql_mutex_init(key_LOCK_backup_log, &LOCK_backup_log, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_temp_pool, &LOCK_temp_pool, MY_MUTEX_INIT_FAST); + +#ifdef HAVE_OPENSSL + mysql_mutex_init(key_LOCK_des_key_file, + &LOCK_des_key_file, MY_MUTEX_INIT_FAST); +#if defined(HAVE_OPENSSL10) && !defined(HAVE_WOLFSSL) + openssl_stdlocks= (openssl_lock_t*) OPENSSL_malloc(CRYPTO_num_locks() * + sizeof(openssl_lock_t)); + for (int i= 0; i < CRYPTO_num_locks(); ++i) + mysql_rwlock_init(key_rwlock_openssl, &openssl_stdlocks[i].lock); + CRYPTO_set_dynlock_create_callback(openssl_dynlock_create); + CRYPTO_set_dynlock_destroy_callback(openssl_dynlock_destroy); + CRYPTO_set_dynlock_lock_callback(openssl_lock); + CRYPTO_set_locking_callback(openssl_lock_function); +#endif /* HAVE_OPENSSL10 */ +#endif /* HAVE_OPENSSL */ + mysql_rwlock_init(key_rwlock_LOCK_sys_init_connect, &LOCK_sys_init_connect); + mysql_rwlock_init(key_rwlock_LOCK_sys_init_slave, &LOCK_sys_init_slave); + mysql_rwlock_init(key_rwlock_LOCK_ssl_refresh, &LOCK_ssl_refresh); + mysql_rwlock_init(key_rwlock_LOCK_grant, &LOCK_grant); + mysql_rwlock_init(key_rwlock_LOCK_all_status_vars, &LOCK_all_status_vars); + mysql_cond_init(key_COND_start_thread, &COND_start_thread, NULL); +#ifdef HAVE_REPLICATION + mysql_mutex_init(key_LOCK_rpl_status, &LOCK_rpl_status, MY_MUTEX_INIT_FAST); +#endif + mysql_mutex_init(key_LOCK_server_started, + &LOCK_server_started, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_server_started, &COND_server_started, NULL); + sp_cache_init(); +#ifdef HAVE_EVENT_SCHEDULER + Events::init_mutexes(); +#endif + init_show_explain_psi_keys(); + /* Parameter for threads created for connections */ + (void) pthread_attr_init(&connection_attrib); + (void) pthread_attr_setdetachstate(&connection_attrib, + PTHREAD_CREATE_DETACHED); + pthread_attr_setscope(&connection_attrib, PTHREAD_SCOPE_SYSTEM); + +#ifdef HAVE_REPLICATION + rpl_init_gtid_slave_state(); + rpl_init_gtid_waiting(); +#endif + + DBUG_RETURN(0); +} + + +#if defined(HAVE_OPENSSL10) && !defined(HAVE_WOLFSSL) +static openssl_lock_t *openssl_dynlock_create(const char *file, int line) +{ + openssl_lock_t *lock= new openssl_lock_t; + mysql_rwlock_init(key_rwlock_openssl, &lock->lock); + return lock; +} + + +static void openssl_dynlock_destroy(openssl_lock_t *lock, const char *file, + int line) +{ + mysql_rwlock_destroy(&lock->lock); + delete lock; +} + + +static void openssl_lock_function(int mode, int n, const char *file, int line) +{ + if (n < 0 || n > CRYPTO_num_locks()) + { + /* Lock number out of bounds. */ + sql_print_error("Fatal: OpenSSL interface problem (n = %d)", n); + abort(); + } + openssl_lock(mode, &openssl_stdlocks[n], file, line); +} + + +static void openssl_lock(int mode, openssl_lock_t *lock, const char *file, + int line) +{ + int err; + char const *what; + + switch (mode) { + case CRYPTO_LOCK|CRYPTO_READ: + what = "read lock"; + err= mysql_rwlock_rdlock(&lock->lock); + break; + case CRYPTO_LOCK|CRYPTO_WRITE: + what = "write lock"; + err= mysql_rwlock_wrlock(&lock->lock); + break; + case CRYPTO_UNLOCK|CRYPTO_READ: + case CRYPTO_UNLOCK|CRYPTO_WRITE: + what = "unlock"; + err= mysql_rwlock_unlock(&lock->lock); + break; + default: + /* Unknown locking mode. */ + sql_print_error("Fatal: OpenSSL interface problem (mode=0x%x)", mode); + abort(); + } + if (err) + { + sql_print_error("Fatal: can't %s OpenSSL lock", what); + abort(); + } +} +#endif /* HAVE_OPENSSL10 */ + + +struct SSL_ACCEPTOR_STATS +{ + long accept; + long accept_good; + long cache_size; + long verify_mode; + long verify_depth; + long zero; + const char *session_cache_mode; + + SSL_ACCEPTOR_STATS(): + accept(),accept_good(),cache_size(),verify_mode(),verify_depth(),zero(), + session_cache_mode("NONE") + { + } + + void init() + { + DBUG_ASSERT(ssl_acceptor_fd !=0 && ssl_acceptor_fd->ssl_context != 0); + SSL_CTX *ctx= ssl_acceptor_fd->ssl_context; + accept= 0; + accept_good= 0; + verify_mode= SSL_CTX_get_verify_mode(ctx); + verify_depth= SSL_CTX_get_verify_depth(ctx); + cache_size= SSL_CTX_sess_get_cache_size(ctx); + switch (SSL_CTX_get_session_cache_mode(ctx)) + { + case SSL_SESS_CACHE_OFF: + session_cache_mode= "OFF"; break; + case SSL_SESS_CACHE_CLIENT: + session_cache_mode= "CLIENT"; break; + case SSL_SESS_CACHE_SERVER: + session_cache_mode= "SERVER"; break; + case SSL_SESS_CACHE_BOTH: + session_cache_mode= "BOTH"; break; + case SSL_SESS_CACHE_NO_AUTO_CLEAR: + session_cache_mode= "NO_AUTO_CLEAR"; break; + case SSL_SESS_CACHE_NO_INTERNAL_LOOKUP: + session_cache_mode= "NO_INTERNAL_LOOKUP"; break; + default: + session_cache_mode= "Unknown"; break; + } + } +}; + +static SSL_ACCEPTOR_STATS ssl_acceptor_stats; +void ssl_acceptor_stats_update(int sslaccept_ret) +{ + statistic_increment(ssl_acceptor_stats.accept, &LOCK_status); + if (!sslaccept_ret) + statistic_increment(ssl_acceptor_stats.accept_good,&LOCK_status); +} + +static void init_ssl() +{ +#if !defined(EMBEDDED_LIBRARY) +/* + Not need to check require_secure_transport on the Linux, + because it always has Unix domain sockets that are secure: +*/ +#ifdef _WIN32 + if (opt_require_secure_transport && + !opt_use_ssl && + !opt_enable_named_pipe && + !opt_bootstrap) + { + sql_print_error("Server is started with --require-secure-transport=ON " + "but no secure transport (SSL or PIPE) are configured."); + unireg_abort(1); + } +#endif +#if defined(HAVE_OPENSSL) + if (opt_use_ssl) + { + enum enum_ssl_init_error error= SSL_INITERR_NOERROR; + + /* having ssl_acceptor_fd != 0 signals the use of SSL */ + ssl_acceptor_fd= new_VioSSLAcceptorFd(opt_ssl_key, opt_ssl_cert, + opt_ssl_ca, opt_ssl_capath, + opt_ssl_cipher, &error, + opt_ssl_crl, opt_ssl_crlpath, + tls_version); + DBUG_PRINT("info",("ssl_acceptor_fd: %p", ssl_acceptor_fd)); + if (!ssl_acceptor_fd) + { + sql_print_error("Failed to setup SSL"); + sql_print_error("SSL error: %s", sslGetErrString(error)); + if (!opt_bootstrap) + unireg_abort(1); + opt_use_ssl = 0; + have_ssl= SHOW_OPTION_DISABLED; + } + else + ssl_acceptor_stats.init(); + + if (global_system_variables.log_warnings > 0) + { + ulong err; + while ((err= ERR_get_error())) + { + char buf[256]; + ERR_error_string_n(err, buf, sizeof(buf)); + sql_print_warning("SSL error: %s",buf); + } + } + else + ERR_remove_state(0); + } + else + { + have_ssl= SHOW_OPTION_DISABLED; + } + if (des_key_file) + load_des_key_file(des_key_file); +#endif /* HAVE_OPENSSL */ +#endif /* !EMBEDDED_LIBRARY */ +} + +/* Reinitialize SSL (FLUSH SSL) */ +int reinit_ssl() +{ +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) + if (!opt_use_ssl) + return 0; + + enum enum_ssl_init_error error = SSL_INITERR_NOERROR; + st_VioSSLFd *new_fd = new_VioSSLAcceptorFd(opt_ssl_key, opt_ssl_cert, + opt_ssl_ca, opt_ssl_capath, opt_ssl_cipher, &error, opt_ssl_crl, + opt_ssl_crlpath, tls_version); + + if (!new_fd) + { + my_printf_error(ER_UNKNOWN_ERROR, "Failed to refresh SSL, error: %s", MYF(0), + sslGetErrString(error)); + ERR_clear_error(); + return 1; + } + mysql_rwlock_wrlock(&LOCK_ssl_refresh); + free_vio_ssl_acceptor_fd(ssl_acceptor_fd); + ssl_acceptor_fd= new_fd; + ssl_acceptor_stats.init(); + mysql_rwlock_unlock(&LOCK_ssl_refresh); +#endif + return 0; +} + +static void end_ssl() +{ +#ifdef HAVE_OPENSSL +#ifndef EMBEDDED_LIBRARY + if (ssl_acceptor_fd) + { + free_vio_ssl_acceptor_fd(ssl_acceptor_fd); + ssl_acceptor_fd= 0; + } +#endif /* ! EMBEDDED_LIBRARY */ +#endif /* HAVE_OPENSSL */ +} + +#ifdef _WIN32 +/** + Registers a file to be collected when Windows Error Reporting creates a crash + report. +*/ +#include +static void add_file_to_crash_report(char *file) +{ + wchar_t wfile[MAX_PATH+1]= {0}; + if (mbstowcs(wfile, file, MAX_PATH) != (size_t)-1) + { + WerRegisterFile(wfile, WerRegFileTypeOther, WER_FILE_ANONYMOUS_DATA); + } +} +#endif + +#define init_default_storage_engine(X,Y) \ + init_default_storage_engine_impl(#X, X, &global_system_variables.Y) + +static int init_default_storage_engine_impl(const char *opt_name, + char *engine_name, plugin_ref *res) +{ + if (!engine_name) + { + *res= 0; + return 0; + } + + LEX_CSTRING name= { engine_name, strlen(engine_name) }; + plugin_ref plugin; + handlerton *hton; + if ((plugin= ha_resolve_by_name(0, &name, false))) + hton= plugin_hton(plugin); + else + { + sql_print_error("Unknown/unsupported storage engine: %s", engine_name); + return 1; + } + if (!ha_storage_engine_is_enabled(hton)) + { + if (!opt_bootstrap) + { + sql_print_error("%s (%s) is not available", opt_name, engine_name); + return 1; + } + DBUG_ASSERT(*res); + } + else + { + /* + Need to unlock as global_system_variables.table_plugin + was acquired during plugin_init() + */ + mysql_mutex_lock(&LOCK_global_system_variables); + if (*res) + plugin_unlock(0, *res); + *res= plugin; + mysql_mutex_unlock(&LOCK_global_system_variables); + } + return 0; +} + +static int +init_gtid_pos_auto_engines(void) +{ + plugin_ref *plugins; + + /* + For the command-line option --gtid_pos_auto_engines, we allow (and ignore) + engines that are unknown. This is convenient, since it allows to set + default auto-create engines that might not be used by particular users. + The option sets a list of storage engines that will have gtid position + table auto-created for them if needed. And if the engine is not available, + then it will certainly not be needed. + */ + if (gtid_pos_auto_engines) + plugins= resolve_engine_list(NULL, gtid_pos_auto_engines, + strlen(gtid_pos_auto_engines), false, false); + else + plugins= resolve_engine_list(NULL, "", 0, false, false); + if (!plugins) + return 1; + mysql_mutex_lock(&LOCK_global_system_variables); + opt_gtid_pos_auto_plugins= plugins; + mysql_mutex_unlock(&LOCK_global_system_variables); + return 0; +} + +#define MYSQL_COMPATIBILITY_OPTION(option) \ + { option, OPT_MYSQL_COMPATIBILITY, \ + 0, 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0 } + +#define MYSQL_TO_BE_IMPLEMENTED_OPTION(option) \ + { option, OPT_MYSQL_TO_BE_IMPLEMENTED, \ + 0, 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0 } + +#define MYSQL_SUGGEST_ANALOG_OPTION(option, str) \ + { option, OPT_MYSQL_COMPATIBILITY, \ + 0, 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0 } + +#define MARIADB_REMOVED_OPTION(option) \ + { option, OPT_REMOVED_OPTION, \ + 0, 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0 } + +static int init_server_components() +{ + DBUG_ENTER("init_server_components"); + /* + We need to call each of these following functions to ensure that + all things are initialized so that unireg_abort() doesn't fail + */ + my_cpu_init(); + mdl_init(); + if (tdc_init() || hostname_cache_init()) + unireg_abort(1); + + query_cache_set_min_res_unit(query_cache_min_res_unit); + query_cache_result_size_limit(query_cache_limit); + /* if we set size of QC non zero in config then probably we want it ON */ + if (query_cache_size != 0 && + global_system_variables.query_cache_type == 0 && + !IS_SYSVAR_AUTOSIZE(&query_cache_size)) + { + global_system_variables.query_cache_type= 1; + } + query_cache_init(); + DBUG_ASSERT(query_cache_size < ULONG_MAX); + query_cache_resize((ulong)query_cache_size); + my_rnd_init(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2); + setup_fpu(); + init_thr_lock(); + backup_init(); + + if (init_thr_timer(thread_scheduler->max_threads + extra_max_connections)) + { + fprintf(stderr, "Can't initialize timers\n"); + unireg_abort(1); + } + + my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345); + wt_init(); + + /* Setup logs */ + + setup_log_handling(); + + /* + Enable old-fashioned error log, except when the user has requested + help information. Since the implementation of plugin server + variables the help output is now written much later. + */ +#ifdef _WIN32 + if (opt_console) + opt_error_log= false; +#endif + + if (opt_error_log && !opt_abort) + { + if (!log_error_file_ptr[0]) + { + fn_format(log_error_file, pidfile_name, mysql_data_home, ".err", + MY_REPLACE_EXT); /* replace '.' by '.err', bug#4997 */ + SYSVAR_AUTOSIZE(log_error_file_ptr, log_error_file); + } + else + { + fn_format(log_error_file, log_error_file_ptr, mysql_data_home, ".err", + MY_UNPACK_FILENAME | MY_SAFE_PATH); + log_error_file_ptr= log_error_file; + } + if (!log_error_file[0]) + opt_error_log= 0; // Too long file name + else + { + my_bool res; +#ifndef EMBEDDED_LIBRARY + res= reopen_fstreams(log_error_file, stdout, stderr); +#else + res= reopen_fstreams(log_error_file, NULL, stderr); +#endif + + if (!res) + setbuf(stderr, NULL); + +#ifdef _WIN32 + /* Add error log to windows crash reporting. */ + add_file_to_crash_report(log_error_file); +#endif + } + } + + /* set up the hook before initializing plugins which may use it */ + error_handler_hook= my_message_sql; + proc_info_hook= set_thd_stage_info; + + /* + Print source revision hash, as one of the first lines, if not the + first in error log, for troubleshooting and debugging purposes + */ + if (!opt_help) + sql_print_information("Starting MariaDB %s source revision %s as process %lu", + server_version, SOURCE_REVISION, (ulong) getpid()); + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + /* + Parsing the performance schema command line option may have reported + warnings/information messages. + Now that the logger is finally available, and redirected + to the proper file when the --log--error option is used, + print the buffered messages to the log. + */ + buffered_logs.print(); + buffered_logs.cleanup(); +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ + +#ifndef EMBEDDED_LIBRARY + /* + Now that the logger is available, redirect character set + errors directly to the logger + (instead of the buffered_logs used at the server startup time). + */ + my_charset_error_reporter= charset_error_reporter; +#endif + + xid_cache_init(); + + /* need to configure logging before initializing storage engines */ + if (!opt_bin_log_used && !WSREP_ON) + { + if (opt_log_slave_updates) + sql_print_warning("You need to use --log-bin to make " + "--log-slave-updates work."); + if (binlog_format_used) + sql_print_warning("You need to use --log-bin to make " + "--binlog-format work."); + } + + /* Check that we have not let the format to unspecified at this point */ + DBUG_ASSERT((uint)global_system_variables.binlog_format <= + array_elements(binlog_format_names)-1); + +#ifdef HAVE_REPLICATION + if (opt_log_slave_updates && replicate_same_server_id) + { + if (opt_bin_log) + { + sql_print_error("using --replicate-same-server-id in conjunction with " + "--log-slave-updates is impossible, it would lead to " + "infinite loops in this server."); + unireg_abort(1); + } + else + sql_print_warning("using --replicate-same-server-id in conjunction with " + "--log-slave-updates would lead to infinite loops in " + "this server. However this will be ignored as the " + "--log-bin option is not defined."); + } +#endif + + if (opt_bin_log) + { + /* Reports an error and aborts, if the --log-bin's path + is a directory.*/ + if (opt_bin_logname[0] && + opt_bin_logname[strlen(opt_bin_logname) - 1] == FN_LIBCHAR) + { + sql_print_error("Path '%s' is a directory name, please specify " + "a file name for --log-bin option", opt_bin_logname); + unireg_abort(1); + } + + /* Reports an error and aborts, if the --log-bin-index's path + is a directory.*/ + if (opt_binlog_index_name && + opt_binlog_index_name[strlen(opt_binlog_index_name) - 1] + == FN_LIBCHAR) + { + sql_print_error("Path '%s' is a directory name, please specify " + "a file name for --log-bin-index option", + opt_binlog_index_name); + unireg_abort(1); + } + + char buf[FN_REFLEN]; + const char *ln; + ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf); + if (!opt_bin_logname[0] && !opt_binlog_index_name) + { + /* + User didn't give us info to name the binlog index file. + Picking `hostname`-bin.index like did in 4.x, causes replication to + fail if the hostname is changed later. So, we would like to instead + require a name. But as we don't want to break many existing setups, we + only give warning, not error. + */ + sql_print_warning("No argument was provided to --log-bin and " + "neither --log-basename or --log-bin-index where " + "used; This may cause repliction to break when this " + "server acts as a master and has its hostname " + "changed! Please use '--log-basename=%s' or " + "'--log-bin=%s' to avoid this problem.", + opt_log_basename, ln); + } + if (ln == buf) + opt_bin_logname= my_once_strdup(buf, MYF(MY_WME)); + } + + /* + Since some wsrep threads (THDs) are create before plugins are + initialized, LOCK_plugin mutex needs to be initialized here. + */ + plugin_mutex_init(); + + /* + Wsrep initialization must happen at this point, because: + - opt_bin_logname must be known when starting replication + since SST may need it + - SST may modify binlog index file, so it must be opened + after SST has happened + + We also (unconditionally) initialize wsrep LOCKs and CONDs. + It is because they are used while accessing wsrep system + variables even when a wsrep provider is not loaded. + */ + + /* It's now safe to use thread specific memory */ + mysqld_server_initialized= 1; + +#ifndef EMBEDDED_LIBRARY + wsrep_thr_init(); +#endif + +#ifdef WITH_WSREP + if (wsrep_init_server()) unireg_abort(1); + + if (WSREP_ON && !wsrep_recovery && !opt_abort) + { + if (opt_bootstrap) // bootsrap option given - disable wsrep functionality + { + wsrep_provider_init(WSREP_NONE); + if (wsrep_init()) + unireg_abort(1); + } + else // full wsrep initialization + { + // add basedir/bin to PATH to resolve wsrep script names + size_t tmp_path_size= strlen(mysql_home) + 5; /* including "/bin" */ + char* const tmp_path= (char*)my_alloca(tmp_path_size); + if (tmp_path) + { + snprintf(tmp_path, tmp_path_size, "%s/bin", mysql_home); + wsrep_prepend_PATH(tmp_path); + } + else + { + WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); + } + my_afree(tmp_path); + + if (wsrep_before_SE()) + { + set_ports(); // this is also called in network_init() later but we need + // to know mysqld_port now - lp:1071882 + wsrep_init_startup(true); + } + } + } +#endif /* WITH_WSREP */ + + if (!opt_help && opt_bin_log) + { + if (mysql_bin_log.open_index_file(opt_binlog_index_name, opt_bin_logname, + TRUE)) + { + unireg_abort(1); + } + + log_bin_basename= + rpl_make_log_name(key_memory_MYSQL_BIN_LOG_basename, + opt_bin_logname, pidfile_name, + opt_bin_logname ? "" : "-bin"); + log_bin_index= + rpl_make_log_name(key_memory_MYSQL_BIN_LOG_index, + opt_binlog_index_name, log_bin_basename, ".index"); + if (log_bin_basename == NULL || log_bin_index == NULL) + { + sql_print_error("Unable to create replication path names:" + " out of memory or path names too long" + " (path name exceeds " STRINGIFY_ARG(FN_REFLEN) + " or file name exceeds " STRINGIFY_ARG(FN_LEN) ")."); + unireg_abort(1); + } + } + +#ifndef EMBEDDED_LIBRARY + DBUG_PRINT("debug", + ("opt_bin_logname: %s, opt_relay_logname: %s, pidfile_name: %s", + opt_bin_logname, opt_relay_logname, pidfile_name)); + if (opt_relay_logname) + { + relay_log_basename= + rpl_make_log_name(key_memory_MYSQL_RELAY_LOG_basename, + opt_relay_logname, pidfile_name, + opt_relay_logname ? "" : "-relay-bin"); + relay_log_index= + rpl_make_log_name(key_memory_MYSQL_RELAY_LOG_index, + opt_relaylog_index_name, relay_log_basename, ".index"); + if (relay_log_basename == NULL || relay_log_index == NULL) + { + sql_print_error("Unable to create replication path names:" + " out of memory or path names too long" + " (path name exceeds " STRINGIFY_ARG(FN_REFLEN) + " or file name exceeds " STRINGIFY_ARG(FN_LEN) ")."); + unireg_abort(1); + } + } +#endif /* !EMBEDDED_LIBRARY */ + + /* call ha_init_key_cache() on all key caches to init them */ + process_key_caches(&ha_init_key_cache, 0); + + init_global_table_stats(); + init_global_index_stats(); + init_update_queries(); + + /* Allow storage engine to give real error messages */ + if (unlikely(ha_init_errors())) + DBUG_RETURN(1); + + tc_log= 0; // ha_initialize_handlerton() needs that + + if (!opt_abort && ddl_log_initialize()) + unireg_abort(1); + + if (plugin_init(&remaining_argc, remaining_argv, + (opt_noacl ? PLUGIN_INIT_SKIP_PLUGIN_TABLE : 0) | + (opt_abort ? PLUGIN_INIT_SKIP_INITIALIZATION : 0))) + { + sql_print_error("Failed to initialize plugins."); + unireg_abort(1); + } + plugins_are_initialized= TRUE; /* Don't separate from init function */ + +#ifdef HAVE_REPLICATION + /* + Semisync is not required by other components, which justifies its + initialization at this point when thread specific memory is also available. + */ + if (repl_semisync_master.init_object() || + repl_semisync_slave.init_object()) + { + sql_print_error("Could not initialize semisync."); + unireg_abort(1); + } +#endif + +#ifndef EMBEDDED_LIBRARY + if (session_tracker_init()) + return 1; +#endif //EMBEDDED_LIBRARY + + /* we do want to exit if there are any other unknown options */ + if (remaining_argc > 1) + { + int ho_error; + struct my_option removed_opts[]= + { + /* The following options exist in 5.6 but not in 10.0 */ + MYSQL_COMPATIBILITY_OPTION("log-raw"), + MYSQL_COMPATIBILITY_OPTION("log-bin-use-v1-row-events"), + MYSQL_TO_BE_IMPLEMENTED_OPTION("default-authentication-plugin"), + MYSQL_COMPATIBILITY_OPTION("binlog-max-flush-queue-time"), + MYSQL_COMPATIBILITY_OPTION("master-info-repository"), + MYSQL_COMPATIBILITY_OPTION("relay-log-info-repository"), + MYSQL_SUGGEST_ANALOG_OPTION("binlog-rows-query-log-events", "--binlog-annotate-row-events"), + MYSQL_COMPATIBILITY_OPTION("binlog-order-commits"), + MYSQL_TO_BE_IMPLEMENTED_OPTION("log-throttle-queries-not-using-indexes"), + MYSQL_TO_BE_IMPLEMENTED_OPTION("end-markers-in-json"), + MYSQL_TO_BE_IMPLEMENTED_OPTION("optimizer-trace-features"), // OPTIMIZER_TRACE + MYSQL_TO_BE_IMPLEMENTED_OPTION("optimizer-trace-offset"), // OPTIMIZER_TRACE + MYSQL_TO_BE_IMPLEMENTED_OPTION("optimizer-trace-limit"), // OPTIMIZER_TRACE + MYSQL_COMPATIBILITY_OPTION("server-id-bits"), + MYSQL_TO_BE_IMPLEMENTED_OPTION("slave-rows-search-algorithms"), // HAVE_REPLICATION + MYSQL_TO_BE_IMPLEMENTED_OPTION("slave-allow-batching"), // HAVE_REPLICATION + MYSQL_COMPATIBILITY_OPTION("slave-checkpoint-period"), // HAVE_REPLICATION + MYSQL_COMPATIBILITY_OPTION("slave-checkpoint-group"), // HAVE_REPLICATION + MYSQL_SUGGEST_ANALOG_OPTION("slave-pending-jobs-size-max", "--slave-parallel-max-queued"), // HAVE_REPLICATION + MYSQL_TO_BE_IMPLEMENTED_OPTION("sha256-password-private-key-path"), // HAVE_OPENSSL + MYSQL_TO_BE_IMPLEMENTED_OPTION("sha256-password-public-key-path"), // HAVE_OPENSSL + + /* The following options exist in 5.5 and 5.6 but not in 10.0 */ + MYSQL_SUGGEST_ANALOG_OPTION("abort-slave-event-count", "--debug-abort-slave-event-count"), + MYSQL_SUGGEST_ANALOG_OPTION("disconnect-slave-event-count", "--debug-disconnect-slave-event-count"), + MYSQL_SUGGEST_ANALOG_OPTION("exit-info", "--debug-exit-info"), + MYSQL_SUGGEST_ANALOG_OPTION("max-binlog-dump-events", "--debug-max-binlog-dump-events"), + MYSQL_SUGGEST_ANALOG_OPTION("sporadic-binlog-dump-fail", "--debug-sporadic-binlog-dump-fail"), + MYSQL_COMPATIBILITY_OPTION("new"), + MYSQL_COMPATIBILITY_OPTION("show_compatibility_56"), + + /* The following options were removed in 10.6 */ + MARIADB_REMOVED_OPTION("innodb-force-load-corrupted"), + + /* The following options were removed in 10.5 */ +#if defined(__linux__) + MARIADB_REMOVED_OPTION("super-large-pages"), +#endif + MARIADB_REMOVED_OPTION("innodb-idle-flush-pct"), + MARIADB_REMOVED_OPTION("innodb-locks-unsafe-for-binlog"), + MARIADB_REMOVED_OPTION("innodb-rollback-segments"), + MARIADB_REMOVED_OPTION("innodb-stats-sample-pages"), + MARIADB_REMOVED_OPTION("max-long-data-size"), + MARIADB_REMOVED_OPTION("multi-range-count"), + MARIADB_REMOVED_OPTION("skip-bdb"), + MARIADB_REMOVED_OPTION("thread-concurrency"), + MARIADB_REMOVED_OPTION("timed-mutexes"), + + /* The following options were added after 5.6.10 */ + MYSQL_TO_BE_IMPLEMENTED_OPTION("rpl-stop-slave-timeout"), + MYSQL_TO_BE_IMPLEMENTED_OPTION("validate-user-plugins"), // NO_EMBEDDED_ACCESS_CHECKS + + /* The following options were deprecated in 10.5 or earlier */ + MARIADB_REMOVED_OPTION("innodb-adaptive-max-sleep-delay"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-check-interval"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-compressed"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-interval"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-uncompressed"), + MARIADB_REMOVED_OPTION("innodb-buffer-pool-instances"), + MARIADB_REMOVED_OPTION("innodb-commit-concurrency"), + MARIADB_REMOVED_OPTION("innodb-concurrency-tickets"), + MARIADB_REMOVED_OPTION("innodb-file-format"), + MARIADB_REMOVED_OPTION("innodb-large-prefix"), + MARIADB_REMOVED_OPTION("innodb-lock-schedule-algorithm"), + MARIADB_REMOVED_OPTION("innodb-log-checksums"), + MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"), + MARIADB_REMOVED_OPTION("innodb-log-files-in-group"), + MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"), + MARIADB_REMOVED_OPTION("innodb-log-write-ahead-size"), + MARIADB_REMOVED_OPTION("innodb-page-cleaners"), + MARIADB_REMOVED_OPTION("innodb-replication-delay"), + MARIADB_REMOVED_OPTION("innodb-scrub-log"), + MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"), + MARIADB_REMOVED_OPTION("innodb-sync-array-size"), + MARIADB_REMOVED_OPTION("innodb-thread-concurrency"), + MARIADB_REMOVED_OPTION("innodb-thread-sleep-delay"), + MARIADB_REMOVED_OPTION("innodb-undo-logs"), + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} + }; + /* + We need to eat any 'loose' arguments first before we conclude + that there are unprocessed options. + */ + my_getopt_skip_unknown= 0; +#ifdef WITH_WSREP + if (wsrep_recovery) + my_getopt_skip_unknown= TRUE; +#endif + + if ((ho_error= handle_options(&remaining_argc, &remaining_argv, removed_opts, + mysqld_get_one_option))) + unireg_abort(ho_error); + /* Add back the program name handle_options removes */ + remaining_argc++; + remaining_argv--; + my_getopt_skip_unknown= TRUE; + +#ifdef WITH_WSREP + if (!wsrep_recovery) + { +#endif + if (remaining_argc > 1) + { + fprintf(stderr, "%s: Too many arguments (first extra is '%s').\n", + my_progname, remaining_argv[1]); + unireg_abort(1); + } +#ifdef WITH_WSREP + } +#endif + } + + if (opt_abort) + unireg_abort(0); + + if (init_io_cache_encryption()) + unireg_abort(1); + + /* if the errmsg.sys is not loaded, terminate to maintain behaviour */ + if (!DEFAULT_ERRMSGS[0][0]) + unireg_abort(1); + + /* We have to initialize the storage engines before CSV logging */ + if (ha_init()) + { + sql_print_error("Can't init databases"); + unireg_abort(1); + } + + if (opt_bootstrap) + log_output_options= LOG_FILE; + else + logger.init_log_tables(); + + if (log_output_options & LOG_NONE) + { + /* + Issue a warning if there were specified additional options to the + log-output along with NONE. Probably this wasn't what user wanted. + */ + if ((log_output_options & LOG_NONE) && (log_output_options & ~LOG_NONE)) + sql_print_warning("There were other values specified to " + "log-output besides NONE. Disabling slow " + "and general logs anyway."); + logger.set_handlers(LOG_NONE, LOG_NONE); + } + else + { + /* fall back to the log files if tables are not present */ + LEX_CSTRING csv_name={STRING_WITH_LEN("csv")}; + if (!plugin_is_ready(&csv_name, MYSQL_STORAGE_ENGINE_PLUGIN)) + { + /* purecov: begin inspected */ + sql_print_error("CSV engine is not present, falling back to the " + "log files"); + SYSVAR_AUTOSIZE(log_output_options, + (log_output_options & ~LOG_TABLE) | LOG_FILE); + /* purecov: end */ + } + + logger.set_handlers(global_system_variables.sql_log_slow ? + log_output_options:LOG_NONE, + opt_log ? log_output_options:LOG_NONE); + } + + if (init_default_storage_engine(default_storage_engine, table_plugin)) + unireg_abort(1); + + if (default_tmp_storage_engine && !*default_tmp_storage_engine) + default_tmp_storage_engine= NULL; + + if (enforced_storage_engine && !*enforced_storage_engine) + enforced_storage_engine= NULL; + + if (init_default_storage_engine(default_tmp_storage_engine, tmp_table_plugin)) + unireg_abort(1); + + if (init_default_storage_engine(enforced_storage_engine, enforced_table_plugin)) + unireg_abort(1); + + if (init_gtid_pos_auto_engines()) + unireg_abort(1); + +#ifdef USE_ARIA_FOR_TMP_TABLES + if (!ha_storage_engine_is_enabled(maria_hton) && !opt_bootstrap) + { + sql_print_error("Aria engine is not enabled or did not start. The Aria engine must be enabled to continue as server was configured with --with-aria-tmp-tables"); + unireg_abort(1); + } +#endif + +#ifdef WITH_WSREP + /* + Now is the right time to initialize members of wsrep startup threads + that rely on plugins and other related global system variables to be + initialized. This initialization was not possible before, as plugins + (and thus some global system variables) are initialized after wsrep + startup threads are created. + Note: This only needs to be done for rsync and mariabackup based SST + methods. + */ + if (wsrep_before_SE()) + wsrep_plugins_post_init(); + + if (WSREP_ON && !opt_bin_log) + { + wsrep_emulate_bin_log= 1; + } +#endif + + tc_log= get_tc_log_implementation(); + + if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file)) + { + sql_print_error("Can't init tc log"); + unireg_abort(1); + } + + if (ha_recover(0)) + unireg_abort(1); + +#ifndef EMBEDDED_LIBRARY + start_handle_manager(); +#endif + if (opt_bin_log) + { + int error; + mysql_mutex_t *log_lock= mysql_bin_log.get_log_lock(); + mysql_mutex_lock(log_lock); + error= mysql_bin_log.open(opt_bin_logname, 0, 0, + WRITE_CACHE, max_binlog_size, 0, TRUE); + mysql_mutex_unlock(log_lock); + if (unlikely(error)) + unireg_abort(1); + } + +#ifdef HAVE_REPLICATION + if (opt_bin_log) + { + if (binlog_expire_logs_seconds) + { + time_t purge_time= server_start_time - binlog_expire_logs_seconds; + if (purge_time >= 0) + mysql_bin_log.purge_logs_before_date(purge_time); + } + } + else + { + if (binlog_expire_logs_seconds) + sql_print_warning("You need to use --log-bin to make --expire-logs-days " + "or --binlog-expire-logs-seconds work."); + } +#endif + + if (ddl_log_execute_recovery() > 0) + unireg_abort(1); + ha_signal_ddl_recovery_done(); + + if (opt_myisam_log) + (void) mi_log(1); + +#if defined(HAVE_MLOCKALL) && defined(MCL_CURRENT) && !defined(EMBEDDED_LIBRARY) + if (locked_in_memory) + { + int error; + if (user_info) + { + DBUG_ASSERT(!getuid()); + if (setreuid((uid_t) -1, 0) == -1) + { + sql_perror("setreuid"); + unireg_abort(1); + } + error= mlockall(MCL_CURRENT); + set_user(mysqld_user, user_info); + } + else + error= mlockall(MCL_CURRENT); + + if (unlikely(error)) + { + if (global_system_variables.log_warnings) + sql_print_warning("Failed to lock memory. Errno: %d\n",errno); + locked_in_memory= 0; + } + } +#else + locked_in_memory= 0; +#endif + + ft_init_stopwords(); + + init_max_user_conn(); + init_global_user_stats(); + init_global_client_stats(); + if (!opt_bootstrap) + servers_init(0); + init_status_vars(); + Item_false= new (&read_only_root) Item_bool_static("FALSE", 0); + Item_true= new (&read_only_root) Item_bool_static("TRUE", 1); + DBUG_ASSERT(Item_false); + + DBUG_RETURN(0); +} + + +#ifndef EMBEDDED_LIBRARY + +#ifndef DBUG_OFF +/* + Debugging helper function to keep the locale database + (see sql_locale.cc) and max_month_name_length and + max_day_name_length variable values in consistent state. +*/ +static void test_lc_time_sz() +{ + DBUG_ENTER("test_lc_time_sz"); + for (MY_LOCALE **loc= my_locales; *loc; loc++) + { + size_t max_month_len= 0; + size_t max_day_len= 0; + for (const char **month= (*loc)->month_names->type_names; *month; month++) + { + set_if_bigger(max_month_len, + my_numchars_mb(&my_charset_utf8mb3_general_ci, + *month, *month + strlen(*month))); + } + for (const char **day= (*loc)->day_names->type_names; *day; day++) + { + set_if_bigger(max_day_len, + my_numchars_mb(&my_charset_utf8mb3_general_ci, + *day, *day + strlen(*day))); + } + if ((*loc)->max_month_name_length != max_month_len || + (*loc)->max_day_name_length != max_day_len) + { + DBUG_PRINT("Wrong max day name(or month name) length for locale:", + ("%s", (*loc)->name)); + DBUG_ASSERT(0); + } + } + DBUG_VOID_RETURN; +} +#endif//DBUG_OFF + + +int mysqld_main(int argc, char **argv) +{ +#ifndef _WIN32 + /* We can't close stdin just now, because it may be booststrap mode. */ + bool please_close_stdin= fcntl(STDIN_FILENO, F_GETFD) >= 0; +#endif + + /* + Perform basic thread library and malloc initialization, + to be able to read defaults files and parse options. + */ + my_progname= argv[0]; + sf_leaking_memory= 1; // no safemalloc memory leak reports if we exit early + mysqld_server_started= mysqld_server_initialized= 0; + + if (init_early_variables()) + exit(1); + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + pre_initialize_performance_schema(); +#endif /*WITH_PERFSCHEMA_STORAGE_ENGINE */ + + if (my_init()) // init my_sys library & pthreads + { + fprintf(stderr, "my_init() failed."); + return 1; + } + + orig_argc= argc; + orig_argv= argv; + my_defaults_mark_files= TRUE; + load_defaults_or_exit(MYSQL_CONFIG_NAME, load_default_groups, &argc, &argv); + defaults_argc= argc; + defaults_argv= argv; + remaining_argc= argc; + remaining_argv= argv; + + /* Must be initialized early for comparison of options name */ + system_charset_info= &my_charset_utf8mb3_general_ci; + + sys_var_init(); + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + /* + Initialize the array of performance schema instrument configurations. + */ + init_pfs_instrument_array(); + + /* + Logs generated while parsing the command line + options are buffered and printed later. + */ + buffered_logs.init(); + my_getopt_error_reporter= buffered_option_error_reporter; + my_charset_error_reporter= buffered_option_error_reporter; + + pfs_param.m_pfs_instrument= const_cast(""); +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ + my_timer_init(&sys_timer_info); + + int ho_error __attribute__((unused))= handle_early_options(); + + /* fix tdc_size */ + if (IS_SYSVAR_AUTOSIZE(&tdc_size)) + { + SYSVAR_AUTOSIZE(tdc_size, MY_MIN(400 + tdc_size / 2, 2000)); + } + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + if (ho_error == 0) + { + if (pfs_param.m_enabled && !opt_help && !opt_bootstrap) + { + /* Add sizing hints from the server sizing parameters. */ + pfs_param.m_hints.m_table_definition_cache= tdc_size; + pfs_param.m_hints.m_table_open_cache= tc_size; + pfs_param.m_hints.m_max_connections= max_connections; + pfs_param.m_hints.m_open_files_limit= open_files_limit; + PSI_hook= initialize_performance_schema(&pfs_param); + if (PSI_hook == NULL) + { + pfs_param.m_enabled= false; + buffered_logs.buffer(WARNING_LEVEL, + "Performance schema disabled (reason: init failed)."); + } + } + } +#else + /* + Other provider of the instrumentation interface should + initialize PSI_hook here: + - HAVE_PSI_INTERFACE is for the instrumentation interface + - WITH_PERFSCHEMA_STORAGE_ENGINE is for one implementation + of the interface, + but there could be alternate implementations, which is why + these two defines are kept separate. + */ +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ + +#ifdef HAVE_PSI_INTERFACE + /* + Obtain the current performance schema instrumentation interface, + if available. + */ + if (PSI_hook) + { + PSI *psi_server= (PSI*) PSI_hook->get_interface(PSI_CURRENT_VERSION); + if (likely(psi_server != NULL)) + { + set_psi_server(psi_server); + + /* + Now that we have parsed the command line arguments, and have + initialized the performance schema itself, the next step is to + register all the server instruments. + */ + init_server_psi_keys(); + /* Instrument the main thread */ + PSI_thread *psi= PSI_CALL_new_thread(key_thread_main, NULL, 0); + PSI_CALL_set_thread_os_id(psi); + PSI_CALL_set_thread(psi); + + /* + Now that some instrumentation is in place, + recreate objects which were initialised early, + so that they are instrumented as well. + */ + my_thread_global_reinit(); + } + } +#endif /* HAVE_PSI_INTERFACE */ + + mysql_mutex_init(key_LOCK_error_log, &LOCK_error_log, MY_MUTEX_INIT_FAST); + + /* Initialize audit interface globals. Audit plugins are inited later. */ + mysql_audit_initialize(); + + /* + Perform basic logger initialization logger. Should be called after + MY_INIT, as it initializes mutexes. Log tables are inited later. + */ + logger.init_base(); + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + if (ho_error) + { + /* + Parsing command line option failed, + Since we don't have a workable remaining_argc/remaining_argv + to continue the server initialization, this is as far as this + code can go. + This is the best effort to log meaningful messages: + - messages will be printed to stderr, which is not redirected yet, + - messages will be printed in the NT event log, for windows. + */ + buffered_logs.print(); + buffered_logs.cleanup(); + /* + Not enough initializations for unireg_abort() + Using exit() for windows. + */ + exit (ho_error); + } +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ + +#ifdef _CUSTOMSTARTUPCONFIG_ + if (_cust_check_startup()) + { + / * _cust_check_startup will report startup failure error * / + exit(1); + } +#endif + + if (init_common_variables()) + unireg_abort(1); // Will do exit + + init_signals(); + + ulonglong new_thread_stack_size; + new_thread_stack_size= my_setstacksize(&connection_attrib, + (size_t)my_thread_stack_size); + if (new_thread_stack_size != my_thread_stack_size) + { + if ((new_thread_stack_size < my_thread_stack_size) && + global_system_variables.log_warnings) + sql_print_warning("Asked for %llu thread stack, but got %llu", + my_thread_stack_size, new_thread_stack_size); + SYSVAR_AUTOSIZE(my_thread_stack_size, new_thread_stack_size); + } + + (void) thr_setconcurrency(concurrency); // 10 by default + + select_thread=pthread_self(); + select_thread_in_use=1; + +#ifdef HAVE_LIBWRAP + libwrapName= my_progname+dirname_length(my_progname); + openlog(libwrapName, LOG_PID, LOG_AUTH); +#endif + +#ifndef DBUG_OFF + test_lc_time_sz(); + srand((uint) time(NULL)); +#endif + + /* + We have enough space for fiddling with the argv, continue + */ + check_data_home(mysql_real_data_home); + if (my_setwd(mysql_real_data_home, opt_abort ? 0 : MYF(MY_WME)) && !opt_abort) + unireg_abort(1); /* purecov: inspected */ + + /* Atomic write initialization must be done as root */ + my_init_atomic_write(); + + if ((user_info= check_user(mysqld_user))) + { +#if defined(HAVE_MLOCKALL) && defined(MCL_CURRENT) + if (locked_in_memory) // getuid() == 0 here + set_effective_user(user_info); + else +#endif + set_user(mysqld_user, user_info); + } + +#ifdef WITH_WSREP + wsrep_set_wsrep_on(nullptr); + if (WSREP_ON && wsrep_check_opts()) unireg_abort(1); +#endif + +#ifdef _WIN32 + /* + The subsequent calls may take a long time : e.g. innodb log read. + Thus set the long running service control manager timeout + */ + my_report_svc_status(SERVICE_START_PENDING, NO_ERROR, slow_start_timeout); +#endif + + if (init_server_components()) + unireg_abort(1); + + init_ssl(); + network_init(); + +#ifdef WITH_WSREP + // Recover and exit. + if (wsrep_recovery) + { + select_thread_in_use= 0; + if (WSREP_ON) + wsrep_recover(); + else + sql_print_information("WSREP: disabled, skipping position recovery"); + unireg_abort(0); + } +#endif + + /* + init signals & alarm + After this we can't quit by a simple unireg_abort + */ + start_signal_handler(); // Creates pidfile + + if (mysql_rm_tmp_tables() || acl_init(opt_noacl) || + my_tz_init((THD *)0, default_tz_name, opt_bootstrap)) + unireg_abort(1); + + if (!opt_noacl) + (void) grant_init(); + + udf_init(); + + if (opt_bootstrap) /* If running with bootstrap, do not start replication. */ + opt_skip_slave_start= 1; + + binlog_unsafe_map_init(); + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + initialize_performance_schema_acl(opt_bootstrap); +#endif + + initialize_information_schema_acl(); + + /* + Change EVENTS_ORIGINAL to EVENTS_OFF (the default value) as there is no + point in using ORIGINAL during startup + */ + if (Events::opt_event_scheduler == Events::EVENTS_ORIGINAL) + Events::opt_event_scheduler= Events::EVENTS_OFF; + + Events::set_original_state(Events::opt_event_scheduler); + if (Events::init((THD*) 0, opt_noacl || opt_bootstrap)) + unireg_abort(1); + +#ifdef WITH_WSREP + if (WSREP_ON) + { + if (opt_bootstrap) + { + /*! bootstrap wsrep init was taken care of above */ + } + else + { + wsrep_init_globals(); + if (!wsrep_before_SE()) + { + wsrep_init_startup(false); + } + wsrep_new_cluster= false; + if (wsrep_cluster_address_exists()) + { + WSREP_DEBUG("Startup creating %ld applier threads running %lu", + wsrep_slave_threads - 1, wsrep_running_applier_threads); + wsrep_create_appliers(wsrep_slave_threads - 1); + } + } + } +#endif /* WITH_WSREP */ + + /* Protect read_only_root against writes */ + protect_root(&read_only_root, PROT_READ); + + if (opt_bootstrap) + { + select_thread_in_use= 0; // Allow 'kill' to work + int bootstrap_error= bootstrap(mysql_stdin); + if (!abort_loop) + unireg_abort(bootstrap_error); + else + { + sleep(2); // Wait for kill + exit(0); + } + } + + /* Copy default global rpl_filter to global_rpl_filter */ + copy_filter_setting(global_rpl_filter, get_or_create_rpl_filter("", 0)); + + /* + init_slave() must be called after the thread keys are created. + Some parts of the code (e.g. SHOW STATUS LIKE 'slave_running' and other + places) assume that active_mi != 0, so let's fail if it's 0 (out of + memory); a message has already been printed. + */ + if (init_slave() && !active_mi) + { + unireg_abort(1); + } + + if (opt_init_file && *opt_init_file) + { + if (read_init_file(opt_init_file)) + unireg_abort(1); + } + + disable_log_notes= 0; /* Startup done, now we can give notes again */ + + if (IS_SYSVAR_AUTOSIZE(&server_version_ptr)) + sql_print_information(ER_DEFAULT(ER_STARTUP), my_progname, server_version, + (systemd_sock_activation ? "Systemd socket activated ports" : + (unix_sock_is_online ? mysqld_unix_port : (char*) "")), + mysqld_port, MYSQL_COMPILATION_COMMENT); + else + { + char real_server_version[2 * SERVER_VERSION_LENGTH + 10]; + + set_server_version(real_server_version, sizeof(real_server_version)); + safe_strcat(real_server_version, sizeof(real_server_version), "' as '"); + safe_strcat(real_server_version, sizeof(real_server_version), + server_version); + + sql_print_information(ER_DEFAULT(ER_STARTUP), my_progname, + real_server_version, + (systemd_sock_activation ? "Systemd socket activated ports" : + (unix_sock_is_online ? mysqld_unix_port : (char*) "")), + mysqld_port, MYSQL_COMPILATION_COMMENT); + } + +#ifndef _WIN32 + // try to keep fd=0 busy + if (please_close_stdin && !freopen("/dev/null", "r", stdin)) + { + // fall back on failure + fclose(stdin); + } +#endif + + + /* Signal threads waiting for server to be started */ + mysql_mutex_lock(&LOCK_server_started); + mysqld_server_started= 1; + mysql_cond_broadcast(&COND_server_started); + mysql_mutex_unlock(&LOCK_server_started); + + (void)MYSQL_SET_STAGE(0 ,__FILE__, __LINE__); + + /* Memory used when everything is setup */ + start_memory_used= global_status_var.global_memory_used; + +#ifdef _WIN32 + handle_connections_win(); +#else + handle_connections_sockets(); + + mysql_mutex_lock(&LOCK_start_thread); + select_thread_in_use=0; + mysql_cond_broadcast(&COND_start_thread); + mysql_mutex_unlock(&LOCK_start_thread); +#endif /* _WIN32 */ + + /* Shutdown requested */ + char *user= shutdown_user.load(std::memory_order_relaxed); + sql_print_information(ER_DEFAULT(ER_NORMAL_SHUTDOWN), my_progname, + user ? user : "unknown"); + if (user) + my_free(user); + +#ifdef WITH_WSREP + /* Stop wsrep threads in case they are running. */ + if (wsrep_running_threads > 0) + { + wsrep_shutdown_replication(); + } + /* Release threads if they are waiting in WSREP_SYNC_WAIT_UPTO_GTID */ + wsrep_gtid_server.signal_waiters(0, true); +#endif + + close_connections(); + ha_pre_shutdown(); + clean_up(1); + sd_notify(0, "STATUS=MariaDB server is down"); + + /* (void) pthread_attr_destroy(&connection_attrib); */ + + DBUG_PRINT("quit",("Exiting main thread")); + + /* + Disable the main thread instrumentation, + to avoid recording events during the shutdown. + */ + PSI_CALL_delete_current_thread(); + +#if (defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)) + ERR_remove_state(0); +#endif + mysqld_exit(0); + return 0; +} + +#endif /* !EMBEDDED_LIBRARY */ + + +static bool read_init_file(char *file_name) +{ + MYSQL_FILE *file; + DBUG_ENTER("read_init_file"); + DBUG_PRINT("enter",("name: %s",file_name)); + if (!(file= mysql_file_fopen(key_file_init, file_name, + O_RDONLY, MYF(MY_WME)))) + DBUG_RETURN(TRUE); + bootstrap(file); + mysql_file_fclose(file, MYF(MY_WME)); + DBUG_RETURN(FALSE); +} + + +/** + Increment number of created threads +*/ +void inc_thread_created(void) +{ + statistic_increment(thread_created, &LOCK_status); +} + +#ifndef EMBEDDED_LIBRARY + +/* + Simple scheduler that use the main thread to handle the request + + NOTES + This is only used for debugging, when starting mysqld with + --thread-handling=no-threads or --one-thread +*/ + +void handle_connection_in_main_thread(CONNECT *connect) +{ + do_handle_one_connection(connect, false); +} + + +/* + Scheduler that uses one thread per connection +*/ + +void create_thread_to_handle_connection(CONNECT *connect) +{ + DBUG_ENTER("create_thread_to_handle_connection"); + + if (thread_cache.enqueue(connect)) + DBUG_VOID_RETURN; + + /* Create new thread to handle connection */ + inc_thread_created(); + DBUG_PRINT("info",(("creating thread %lu"), (ulong) connect->thread_id)); + connect->prior_thr_create_utime= microsecond_interval_timer(); + + pthread_t tmp; + if (auto error= mysql_thread_create(key_thread_one_connection, + &tmp, &connection_attrib, + handle_one_connection, (void*) connect)) + { + char error_message_buff[MYSQL_ERRMSG_SIZE]; + /* purecov: begin inspected */ + DBUG_PRINT("error", ("Can't create thread to handle request (error %d)", + error)); + my_snprintf(error_message_buff, sizeof(error_message_buff), + ER_DEFAULT(ER_CANT_CREATE_THREAD), error); + connect->close_with_error(ER_CANT_CREATE_THREAD, error_message_buff, + ER_OUT_OF_RESOURCES); + DBUG_VOID_RETURN; + /* purecov: end */ + } + DBUG_PRINT("info",("Thread created")); + DBUG_VOID_RETURN; +} + + +/** + Create new thread to handle incoming connection. + + This function will create new thread to handle the incoming + connection. If there are idle cached threads one will be used. + 'thd' will be pushed into 'threads'. + + In single-threaded mode (\#define ONE_THREAD) connection will be + handled inside this function. + + @param[in,out] thd Thread handle of future thread. +*/ + +void create_new_thread(CONNECT *connect) +{ + DBUG_ENTER("create_new_thread"); + + /* + Don't allow too many connections. We roughly check here that we allow + only (max_connections + 1) connections. + */ + if ((*connect->scheduler->connection_count)++ >= + *connect->scheduler->max_connections + 1) + { + DBUG_PRINT("error",("Too many connections")); + connect->close_with_error(0, NullS, ER_CON_COUNT_ERROR); + DBUG_VOID_RETURN; + } + + uint sum= connection_count + extra_connection_count; + if (sum > max_used_connections) + max_used_connections= sum; + + /* + The initialization of thread_id is done in create_embedded_thd() for + the embedded library. + TODO: refactor this to avoid code duplication there + */ + connect->thread_id= next_thread_id(); + connect->scheduler->add_connection(connect); + + DBUG_VOID_RETURN; +} +#endif /* EMBEDDED_LIBRARY */ + + + /* Handle new connections and spawn new process to handle them */ + +#ifndef EMBEDDED_LIBRARY + +void handle_accepted_socket(MYSQL_SOCKET new_sock, MYSQL_SOCKET sock) +{ +#ifdef HAVE_LIBWRAP + { + if (!sock.is_unix_domain_socket) + { + struct request_info req; + signal(SIGCHLD, SIG_DFL); + request_init(&req, RQ_DAEMON, libwrapName, RQ_FILE, + mysql_socket_getfd(new_sock), NULL); + my_fromhost(&req); + if (!my_hosts_access(&req)) + { + /* + This may be stupid but refuse() includes an exit(0) + which we surely don't want... + clean_exit() - same stupid thing ... + */ + syslog(deny_severity, "refused connect from %s", + my_eval_client(&req)); + + /* + C++ sucks (the gibberish in front just translates the supplied + sink function pointer in the req structure from a void (*sink)(); + to a void(*sink)(int) if you omit the cast, the C++ compiler + will cry... + */ + if (req.sink) + ((void(*)(int))req.sink)(req.fd); + + (void)mysql_socket_shutdown(new_sock, SHUT_RDWR); + (void)mysql_socket_close(new_sock); + /* + The connection was refused by TCP wrappers. + There are no details (by client IP) available to update the + host_cache. + */ + statistic_increment(connection_errors_tcpwrap, &LOCK_status); + return; + } + } + } +#endif /* HAVE_LIBWRAP */ + + DBUG_PRINT("info", ("Creating CONNECT for new connection")); + + if (auto connect= new CONNECT(new_sock, + sock.is_unix_domain_socket ? + VIO_TYPE_SOCKET : VIO_TYPE_TCPIP, + sock.is_extra_port ? + extra_thread_scheduler : thread_scheduler)) + create_new_thread(connect); + else + { + /* Connect failure */ + (void)mysql_socket_close(new_sock); + statistic_increment(aborted_connects, &LOCK_status); + statistic_increment(connection_errors_internal, &LOCK_status); + } +} + +#ifndef _WIN32 +static void set_non_blocking_if_supported(MYSQL_SOCKET sock) +{ +#if !defined(NO_FCNTL_NONBLOCK) + if (!(test_flags & TEST_BLOCKING)) + { + int flags= fcntl(mysql_socket_getfd(sock), F_GETFL, 0); +#if defined(O_NONBLOCK) + fcntl(mysql_socket_getfd(sock), F_SETFL, flags | O_NONBLOCK); +#elif defined(O_NDELAY) + fcntl(mysql_socket_getfd(sock), F_SETFL, flags | O_NDELAY); +#endif + } +#endif +} + + +void handle_connections_sockets() +{ + MYSQL_SOCKET sock= mysql_socket_invalid(); + uint error_count=0; + struct sockaddr_storage cAddr; + int retval; +#ifdef HAVE_POLL + // for ip_sock, unix_sock and extra_ip_sock + Dynamic_array fds(PSI_INSTRUMENT_MEM); +#else + fd_set readFDs,clientFDs; +#endif + + DBUG_ENTER("handle_connections_sockets"); + +#ifdef HAVE_POLL + for (size_t i= 0; i < listen_sockets.size(); i++) + { + struct pollfd local_fds; + mysql_socket_set_thread_owner(listen_sockets.at(i)); + local_fds.fd= mysql_socket_getfd(listen_sockets.at(i)); + local_fds.events= POLLIN; + fds.push(local_fds); + set_non_blocking_if_supported(listen_sockets.at(i)); + } +#else + FD_ZERO(&clientFDs); + for (size_t i= 0; i < listen_sockets.size(); i++) + { + int fd= mysql_socket_getfd(listen_sockets.at(i)); + FD_SET(fd, &clientFDs); + set_non_blocking_if_supported(listen_sockets.at(i)); + } +#endif + + sd_notify(0, "READY=1\n" + "STATUS=Taking your SQL requests now...\n"); + + DBUG_PRINT("general",("Waiting for connections.")); + while (!abort_loop) + { +#ifdef HAVE_POLL + retval= poll(fds.get_pos(0), fds.size(), -1); +#else + readFDs=clientFDs; + retval= select(FD_SETSIZE, &readFDs, NULL, NULL, NULL); +#endif + + if (retval < 0) + { + if (socket_errno != SOCKET_EINTR) + { + /* + select(2)/poll(2) failed on the listening port. + There is not much details to report about the client, + increment the server global status variable. + */ + statistic_increment(connection_errors_accept, &LOCK_status); + if (!select_errors++ && !abort_loop) /* purecov: inspected */ + sql_print_error("Server: Got error %d from select",socket_errno); /* purecov: inspected */ + } + continue; + } + + if (abort_loop) + break; + + /* Is this a new connection request ? */ +#ifdef HAVE_POLL + for (size_t i= 0; i < fds.size(); ++i) + { + if (fds.at(i).revents & POLLIN) + { + sock= listen_sockets.at(i); + break; + } + } +#else // HAVE_POLL + for (size_t i=0; i < listen_sockets.size(); i++) + { + if (FD_ISSET(mysql_socket_getfd(listen_sockets.at(i)), &readFDs)) + { + sock= listen_sockets.at(i); + break; + } + } +#endif // HAVE_POLL + + for (uint retry=0; retry < MAX_ACCEPT_RETRY && !abort_loop; retry++) + { + size_socket length= sizeof(struct sockaddr_storage); + MYSQL_SOCKET new_sock; + + new_sock= mysql_socket_accept(key_socket_client_connection, sock, + (struct sockaddr *)(&cAddr), + &length); + if (mysql_socket_getfd(new_sock) != INVALID_SOCKET) + handle_accepted_socket(new_sock, sock); + else if (socket_errno != SOCKET_EINTR && socket_errno != SOCKET_EAGAIN) + { + /* + accept(2) failed on the listening port. + There is not much details to report about the client, + increment the server global status variable. + */ + statistic_increment(connection_errors_accept, &LOCK_status); + if ((error_count++ & 255) == 0) // This can happen often + sql_perror("Error in accept"); + if (socket_errno == SOCKET_ENFILE || socket_errno == SOCKET_EMFILE) + sleep(1); // Give other threads some time + break; + } + } + } + sd_notify(0, "STOPPING=1\n" + "STATUS=Shutdown in progress\n"); + DBUG_VOID_RETURN; +} + +#endif /* _WIN32*/ +#endif /* EMBEDDED_LIBRARY */ + + +/**************************************************************************** + Handle start options +******************************************************************************/ + + +/** + Process command line options flagged as 'early'. + Some components needs to be initialized as early as possible, + because the rest of the server initialization depends on them. + Options that needs to be parsed early includes: + - the performance schema, when compiled in, + - options related to the help, + - options related to the bootstrap + The performance schema needs to be initialized as early as possible, + before to-be-instrumented objects of the server are initialized. +*/ + +int handle_early_options() +{ + int ho_error; + DYNAMIC_ARRAY all_early_options; + + /* Skip unknown options so that they may be processed later */ + my_getopt_skip_unknown= TRUE; + + /* prepare all_early_options array */ + my_init_dynamic_array(PSI_NOT_INSTRUMENTED, &all_early_options, + sizeof(my_option), 100, 25, MYF(0)); + add_many_options(&all_early_options, pfs_early_options, + array_elements(pfs_early_options)); + sys_var_add_options(&all_early_options, sys_var::PARSE_EARLY); + add_terminator(&all_early_options); + + ho_error= handle_options(&remaining_argc, &remaining_argv, + (my_option*)(all_early_options.buffer), + mysqld_get_one_option); + if (ho_error == 0) + { + /* Add back the program name handle_options removes */ + remaining_argc++; + remaining_argv--; + } + + delete_dynamic(&all_early_options); + + return ho_error; +} + +/** + System variables are automatically command-line options (few + exceptions are documented in sys_var.h), so don't need + to be listed here. +*/ + +struct my_option my_long_options[]= +{ + {"help", '?', "Display this help and exit.", + &opt_help, &opt_help, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, + 0, 0}, + {"ansi", 'a', "Use ANSI SQL syntax instead of MySQL syntax. This mode " + "will also set transaction isolation level 'serializable'.", 0, 0, 0, + GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + /* + Because Sys_var_bit does not support command-line options, we need to + explicitly add one for --autocommit + */ + {"autocommit", 0, "Set default value for autocommit (0 or 1)", + &opt_autocommit, &opt_autocommit, 0, + GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, NULL}, + {"binlog-do-db", OPT_BINLOG_DO_DB, + "Tells the master it should log updates for the specified database, " + "and exclude all others not explicitly mentioned.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"binlog-ignore-db", OPT_BINLOG_IGNORE_DB, + "Tells the master that updates to the given database should not be logged to the binary log.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"binlog-row-event-max-size", 0, + "The maximum size of a row-based binary log event in bytes. Rows will be " + "grouped into events smaller than this size if possible. " + "The value has to be a multiple of 256.", + &opt_binlog_rows_event_max_size, &opt_binlog_rows_event_max_size, + 0, GET_ULONG, REQUIRED_ARG, + /* def_value */ 8192, /* min_value */ 256, /* max_value */ UINT_MAX32-1, + /* sub_size */ 0, /* block_size */ 256, + /* app_type */ 0 + }, +#ifndef DISABLE_GRANT_OPTIONS + {"bootstrap", OPT_BOOTSTRAP, "Used by mysql installation scripts.", 0, 0, 0, + GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"character-set-client-handshake", 0, + "Don't ignore client side character set value sent during handshake.", + &opt_character_set_client_handshake, + &opt_character_set_client_handshake, + 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, + {"character-set-filesystem", 0, + "Set the filesystem character set.", + &character_set_filesystem_name, + &character_set_filesystem_name, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + {"character-set-server", 'C', "Set the default character set.", + &default_character_set_name, &default_character_set_name, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + {"chroot", 'r', "Chroot mysqld daemon during startup.", + &mysqld_chroot, &mysqld_chroot, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, + {"collation-server", 0, "Set the default collation.", + &default_collation_name, &default_collation_name, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + {"console", OPT_CONSOLE, "Write error output on screen; don't remove the console window on windows.", + &opt_console, &opt_console, 0, GET_BOOL, NO_ARG, 0, 0, 0, + 0, 0, 0}, +#ifdef DBUG_OFF + {"debug", '#', "Built in DBUG debugger. Disabled in this build.", + ¤t_dbug_option, ¤t_dbug_option, 0, GET_STR, OPT_ARG, + 0, 0, 0, 0, 0, 0}, +#endif +#ifdef HAVE_REPLICATION + {"debug-abort-slave-event-count", 0, + "Option used by mysql-test for debugging and testing of replication.", + &abort_slave_event_count, &abort_slave_event_count, + 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif /* HAVE_REPLICATION */ +#ifndef DBUG_OFF + {"debug-assert", 0, + "Allow DBUG_ASSERT() to invoke assert()", + &my_assert, &my_assert, + 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, + {"debug-assert-on-error", 0, + "Do an assert in various functions if we get a fatal error", + &my_assert_on_error, &my_assert_on_error, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"debug-assert-if-crashed-table", 0, + "Do an assert in handler::print_error() if we get a crashed table", + &debug_assert_if_crashed_table, &debug_assert_if_crashed_table, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif +#ifdef HAVE_REPLICATION + {"debug-disconnect-slave-event-count", 0, + "Option used by mysql-test for debugging and testing of replication.", + &disconnect_slave_event_count, &disconnect_slave_event_count, + 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif /* HAVE_REPLICATION */ + {"debug-exit-info", 'T', "Used for debugging. Use at your own risk.", + 0, 0, 0, GET_LONG, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"debug-gdb", 0, + "Set up signals usable for debugging.", + &opt_debugging, &opt_debugging, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef HAVE_REPLICATION + {"debug-max-binlog-dump-events", 0, + "Option used by mysql-test for debugging and testing of replication.", + &max_binlog_dump_events, &max_binlog_dump_events, 0, + GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif /* HAVE_REPLICATION */ + {"debug-no-sync", 0, + "Disables system sync calls. Only for running tests or debugging!", + &my_disable_sync, &my_disable_sync, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef HAVE_REPLICATION + {"debug-sporadic-binlog-dump-fail", 0, + "Option used by mysql-test for debugging and testing of replication.", + &opt_sporadic_binlog_dump_fail, + &opt_sporadic_binlog_dump_fail, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, + 0}, +#endif /* HAVE_REPLICATION */ +#ifndef DBUG_OFF + {"debug-assert-on-not-freed-memory", 0, + "Assert if we found problems with memory allocation", + &debug_assert_on_not_freed_memory, + &debug_assert_on_not_freed_memory, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, + 0}, +#endif /* DBUG_OFF */ + /* default-storage-engine should have "MyISAM" as def_value. Instead + of initializing it here it is done in init_common_variables() due + to a compiler bug in Sun Studio compiler. */ + {"default-storage-engine", 0, "The default storage engine for new tables", + &default_storage_engine, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0 }, + {"default-tmp-storage-engine", 0, + "The default storage engine for user-created temporary tables", + &default_tmp_storage_engine, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0 }, + {"default-time-zone", 0, "Set the default time zone.", + &default_tz_name, &default_tz_name, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, +#if defined(ENABLED_DEBUG_SYNC) + {"debug-sync-timeout", OPT_DEBUG_SYNC_TIMEOUT, + "Enable the debug sync facility " + "and optionally specify a default wait timeout in seconds. " + "A zero value keeps the facility disabled.", + &opt_debug_sync_timeout, 0, + 0, GET_UINT, OPT_ARG, 0, 0, UINT_MAX, 0, 0, 0}, +#endif /* defined(ENABLED_DEBUG_SYNC) */ +#ifdef HAVE_OPENSSL + {"des-key-file", 0, + "Load keys for des_encrypt() and des_encrypt from given file.", + &des_key_file, &des_key_file, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, +#endif /* HAVE_OPENSSL */ +#ifdef HAVE_STACKTRACE + {"stack-trace", 0 , "Print a symbolic stack trace on failure", + &opt_stack_trace, &opt_stack_trace, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, +#endif /* HAVE_STACKTRACE */ + {"enforce-storage-engine", 0, "Force the use of a storage engine for new tables", + &enforced_storage_engine, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0 }, + {"external-locking", 0, "Use system (external) locking (disabled by " + "default). With this option enabled you can run myisamchk to test " + "(not repair) tables while the MySQL server is running. Disable with " + "--skip-external-locking.", &opt_external_locking, &opt_external_locking, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + /* We must always support the next option to make scripts like mysqltest + easier to do */ + {"flashback", 0, + "Setup the server to use flashback. This enables binary log in row mode and will enable extra logging for DDL's needed by flashback feature", + &opt_support_flashback, &opt_support_flashback, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"gdb", 0, + "Set up signals usable for debugging. Deprecated, use --debug-gdb instead.", + &opt_debugging, &opt_debugging, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"gtid-pos-auto-engines", 0, + "List of engines for which to automatically create a " + "mysql.gtid_slave_pos_ENGINE table, if a transaction using that engine " + "is replicated. This can be used to avoid introducing cross-engine " + "transactions, if engines are used different from that used by table " + "mysql.gtid_slave_pos", + >id_pos_auto_engines, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0 }, +#ifdef HAVE_SOLARIS_LARGE_PAGES + {"super-large-pages", 0, "Enable support for super large pages.", + &opt_super_large_pages, &opt_super_large_pages, 0, + GET_BOOL, OPT_ARG, 0, 0, 1, 0, 1, 0}, +#endif + {"language", 'L', + "Client error messages in given language. May be given as a full path. " + "Deprecated. Use --lc-messages-dir instead.", + 0, 0, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"lc-messages", 0, + "Set the language used for the error messages.", + &lc_messages, &lc_messages, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0 }, + {"lc-time-names", 0, + "Set the language used for the month names and the days of the week.", + &lc_time_names_name, &lc_time_names_name, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + {"log-basename", OPT_LOG_BASENAME, + "Basename for all log files and the .pid file. This sets all log file " + "names at once (in 'datadir') and is normally the only option you need " + "for specifying log files. Sets names for --log-bin, --log-bin-index, " + "--relay-log, --relay-log-index, --general-log-file, " + "--log-slow-query-file, --log-error-file, and --pid-file", + &opt_log_basename, &opt_log_basename, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, + {"log-bin", OPT_BIN_LOG, + "Log update queries in binary format. Optional argument should be name for " + "binary log. If not given " + "'datadir'/'log-basename'-bin or 'datadir'/mysql-bin will be used (the later if " + "--log-basename is not specified). We strongly recommend to use either " + "--log-basename or specify a filename to ensure that replication doesn't " + "stop if the real hostname of the computer changes.", + &opt_bin_logname, &opt_bin_logname, 0, GET_STR, + OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"log-bin-index", 0, + "File that holds the names for last binary log files.", + &opt_binlog_index_name, &opt_binlog_index_name, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"relay-log-index", 0, + "The location and name to use for the file that keeps a list of the last " + "relay logs", + &opt_relaylog_index_name, &opt_relaylog_index_name, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"log-ddl-recovery", 0, + "Path to file used for recovery of DDL statements after a crash", + &opt_ddl_recovery_file, &opt_ddl_recovery_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"log-isam", OPT_ISAM_LOG, "Log all MyISAM changes to file.", + &myisam_log_filename, &myisam_log_filename, 0, GET_STR, + OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"log-short-format", 0, + "Don't log extra information to update and slow-query logs.", + &opt_short_log_format, &opt_short_log_format, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"log-tc", 0, + "Path to transaction coordinator log (used for transactions that affect " + "more than one storage engine, when binary log is disabled).", + &opt_tc_log_file, &opt_tc_log_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"master-info-file", 0, + "The location and name of the file that remembers the master and where " + "the I/O replication thread is in the master's binlogs. Defaults to " + "master.info", + &master_info_file, &master_info_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"master-retry-count", 0, + "The number of tries the slave will make to connect to the master before giving up.", + &master_retry_count, &master_retry_count, 0, GET_ULONG, + REQUIRED_ARG, 100000, 0, 0, 0, 0, 0}, +#ifdef HAVE_REPLICATION + {"init-rpl-role", 0, "Set the replication role", + &rpl_status, &rpl_status, &rpl_role_typelib, + GET_ENUM, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif /* HAVE_REPLICATION */ + {"memlock", 0, "Lock mysqld in memory.", &locked_in_memory, + &locked_in_memory, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"old-style-user-limits", 0, + "Enable old-style user limits (before 5.0.3, user resources were counted " + "per each user+host vs. per account).", + &opt_old_style_user_limits, &opt_old_style_user_limits, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"port-open-timeout", 0, + "Maximum time in seconds to wait for the port to become free. " + "(Default: No wait).", &mysqld_port_timeout, &mysqld_port_timeout, 0, + GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"replicate-do-db", OPT_REPLICATE_DO_DB, + "Tells the slave thread to restrict replication to the specified database. " + "To specify more than one database, use the directive multiple times, " + "once for each database. Note that this will only work if you do not use " + "cross-database queries such as UPDATE some_db.some_table SET foo='bar' " + "while having selected a different or no database. If you need cross " + "database updates to work, make sure you have 3.23.28 or later, and use " + "replicate-wild-do-table=db_name.%.", + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"replicate-do-table", OPT_REPLICATE_DO_TABLE, + "Tells the slave thread to restrict replication to the specified table. " + "To specify more than one table, use the directive multiple times, once " + "for each table. This will work for cross-database updates, in contrast " + "to replicate-do-db.", 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"replicate-ignore-db", OPT_REPLICATE_IGNORE_DB, + "Tells the slave thread to not replicate to the specified database. To " + "specify more than one database to ignore, use the directive multiple " + "times, once for each database. This option will not work if you use " + "cross database updates. If you need cross database updates to work, " + "make sure you have 3.23.28 or later, and use replicate-wild-ignore-" + "table=db_name.%. ", 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"replicate-ignore-table", OPT_REPLICATE_IGNORE_TABLE, + "Tells the slave thread to not replicate to the specified table. To specify " + "more than one table to ignore, use the directive multiple times, once for " + "each table. This will work for cross-database updates, in contrast to " + "replicate-ignore-db.", 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"replicate-rewrite-db", OPT_REPLICATE_REWRITE_DB, + "Updates to a database with a different name than the original. Example: " + "replicate-rewrite-db=master_db_name->slave_db_name.", + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef HAVE_REPLICATION + {"replicate-same-server-id", 0, + "In replication, if set to 1, do not skip events having our server id. " + "Default value is 0 (to break infinite loops in circular replication). " + "Can't be set to 1 if --log-slave-updates is used.", + &replicate_same_server_id, &replicate_same_server_id, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"replicate-wild-do-table", OPT_REPLICATE_WILD_DO_TABLE, + "Tells the slave thread to restrict replication to the tables that match " + "the specified wildcard pattern. To specify more than one table, use the " + "directive multiple times, once for each table. This will work for cross-" + "database updates. Example: replicate-wild-do-table=foo%.bar% will " + "replicate only updates to tables in all databases that start with foo " + "and whose table names start with bar.", + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"replicate-wild-ignore-table", OPT_REPLICATE_WILD_IGNORE_TABLE, + "Tells the slave thread to not replicate to the tables that match the " + "given wildcard pattern. To specify more than one table to ignore, use " + "the directive multiple times, once for each table. This will work for " + "cross-database updates. Example: replicate-wild-ignore-table=foo%.bar% " + "will not do updates to tables in databases that start with foo and whose " + "table names start with bar.", + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"safe-mode", OPT_SAFE, "Skip some optimize stages (for testing). Deprecated.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"safe-user-create", 0, + "Don't allow new user creation by the user who has no write privileges to the mysql.user table.", + &opt_safe_user_create, &opt_safe_user_create, 0, GET_BOOL, + NO_ARG, 0, 0, 0, 0, 0, 0}, + {"show-slave-auth-info", 0, + "Show user and password in SHOW SLAVE HOSTS on this master.", + &opt_show_slave_auth_info, &opt_show_slave_auth_info, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"silent-startup", OPT_SILENT, "Don't print [Note] to the error log during startup.", + &opt_silent_startup, &opt_silent_startup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-host-cache", OPT_SKIP_HOST_CACHE, "Don't cache host names.", 0, 0, 0, + GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-slave-start", 0, + "If set, slave is not autostarted.", &opt_skip_slave_start, + &opt_skip_slave_start, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef HAVE_REPLICATION + {"slave-parallel-mode", OPT_SLAVE_PARALLEL_MODE, + "Controls what transactions are applied in parallel when using " + "--slave-parallel-threads. Possible values: \"optimistic\" tries to " + "apply most transactional DML in parallel, and handles any conflicts " + "with rollback and retry. \"conservative\" limits parallelism in an " + "effort to avoid any conflicts. \"aggressive\" tries to maximise the " + "parallelism, possibly at the cost of increased conflict rate. " + "\"minimal\" only parallelizes the commit steps of transactions. " + "\"none\" disables parallel apply completely.", + &opt_slave_parallel_mode, &opt_slave_parallel_mode, + &slave_parallel_mode_typelib, GET_ENUM | GET_ASK_ADDR, REQUIRED_ARG, + SLAVE_PARALLEL_CONSERVATIVE, 0, 0, 0, 0, 0}, +#endif +#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) + {"slow-start-timeout", 0, + "Maximum number of milliseconds that the service control manager should wait " + "before trying to kill the windows service during startup" + "(Default: 15000).", &slow_start_timeout, &slow_start_timeout, 0, + GET_ULONG, REQUIRED_ARG, 15000, 0, 0, 0, 0, 0}, +#endif +#ifdef HAVE_OPENSSL + {"ssl", 0, + "Enable SSL for connection (automatically enabled if an ssl option is used).", + &opt_use_ssl, &opt_use_ssl, 0, GET_BOOL, OPT_ARG, 0, 0, 0, + 0, 0, 0}, +#endif +#ifdef _WIN32 + {"standalone", 0, + "Dummy option to start as a standalone program (NT).", 0, 0, 0, GET_NO_ARG, + NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"symbolic-links", 's', "Enable symbolic link support.", + &my_use_symdir, &my_use_symdir, 0, GET_BOOL, NO_ARG, + /* + The system call realpath() produces warnings under valgrind and + purify. These are not suppressed: instead we disable symlinks + option if compiled with valgrind support. + Also disable by default on Windows, due to high overhead for checking .sym + files. + */ + IF_WIN(0,1), 0, 0, 0, 0, 0}, + {"sysdate-is-now", 0, + "Non-default option to alias SYSDATE() to NOW() to make it safe-replicable. " + "Since 5.0, SYSDATE() returns a `dynamic' value different for different " + "invocations, even within the same statement.", + &global_system_variables.sysdate_is_now, + 0, 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0}, + {"tc-heuristic-recover", 0, + "Decision to use in heuristic recover process", + &tc_heuristic_recover, &tc_heuristic_recover, + &tc_heuristic_recover_typelib, GET_ENUM, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"temp-pool", 0, +#if (ENABLE_TEMP_POOL) + "Using this option will cause most temporary files created to use a small " + "set of names, rather than a unique name for each new file. Deprecated.", +#else + "This option is ignored on this OS.", +#endif + &use_temp_pool, &use_temp_pool, 0, GET_BOOL, NO_ARG, 0, + 0, 0, 0, 0, 0}, + {"transaction-isolation", 0, + "Default transaction isolation level", + &global_system_variables.tx_isolation, + &global_system_variables.tx_isolation, &tx_isolation_typelib, + GET_ENUM, REQUIRED_ARG, ISO_REPEATABLE_READ, 0, 0, 0, 0, 0}, + {"transaction-read-only", 0, + "Default transaction access mode. " + "True if transactions are read-only.", + &global_system_variables.tx_read_only, + &global_system_variables.tx_read_only, 0, + GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"user", 'u', "Run mysqld daemon as user.", 0, 0, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Used with --help option for detailed help.", + &opt_verbose, &opt_verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Output version information and exit.", 0, 0, 0, GET_STR, + OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"plugin-load", OPT_PLUGIN_LOAD, + "Semicolon-separated list of plugins to load, where each plugin is " + "specified as ether a plugin_name=library_file pair or only a library_file. " + "If the latter case, all plugins from a given library_file will be loaded.", + 0, 0, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"plugin-load-add", OPT_PLUGIN_LOAD_ADD, + "Optional semicolon-separated list of plugins to load. This option adds " + "to the list specified by --plugin-load in an incremental way. " + "It can be specified many times, adding more plugins every time.", + 0, 0, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"table_cache", 0, "Deprecated; use --table-open-cache instead.", + &tc_size, &tc_size, 0, GET_ULONG, + REQUIRED_ARG, TABLE_OPEN_CACHE_DEFAULT, 1, 512*1024L, 0, 1, 0}, +#ifdef WITH_WSREP + {"wsrep-new-cluster", 0, "Bootstrap a cluster. It works by overriding the " + "current value of wsrep_cluster_address. It is recommended not to add this " + "option to the config file as this will trigger bootstrap on every server " + "start.", &wsrep_new_cluster, &wsrep_new_cluster, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, +#endif +}; + +static int show_queries(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONGLONG; + var->value= &thd->query_id; + return 0; +} + + +static int show_net_compression(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_MY_BOOL; + var->value= &thd->net.compress; + return 0; +} + +static int show_starttime(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long) (thd->query_start() - server_start_time); + return 0; +} + +#ifdef ENABLED_PROFILING +static int show_flushstatustime(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long) (thd->query_start() - flush_status_time); + return 0; +} +#endif + +#ifdef HAVE_REPLICATION +static int show_rpl_status(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_CHAR; + var->value= const_cast(rpl_status_type[(int)rpl_status]); + return 0; +} + +static int show_slave_running(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + Master_info *mi= NULL; + bool UNINIT_VAR(tmp); + + var->type= SHOW_MY_BOOL; + var->value= buff; + + if ((mi= get_master_info(&thd->variables.default_master_connection, + Sql_condition::WARN_LEVEL_NOTE))) + { + tmp= (my_bool) (mi->slave_running == MYSQL_SLAVE_RUN_READING && + mi->rli.slave_running != MYSQL_SLAVE_NOT_RUN); + mi->release(); + } + if (mi) + *((my_bool *)buff)= tmp; + else + var->type= SHOW_UNDEF; + return 0; +} + + +/* How many masters this slave is connected to */ + + +static int show_slaves_running(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONGLONG; + var->value= buff; + + *((longlong *)buff)= any_slave_sql_running(false); + + return 0; +} + + +static int show_slave_received_heartbeats(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + Master_info *mi; + + var->type= SHOW_LONGLONG; + var->value= buff; + + if ((mi= get_master_info(&thd->variables.default_master_connection, + Sql_condition::WARN_LEVEL_NOTE))) + { + *((longlong *)buff)= mi->received_heartbeats; + mi->release(); + } + else + var->type= SHOW_UNDEF; + return 0; +} + + +static int show_heartbeat_period(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + Master_info *mi; + + var->type= SHOW_CHAR; + var->value= buff; + + if ((mi= get_master_info(&thd->variables.default_master_connection, + Sql_condition::WARN_LEVEL_NOTE))) + { + sprintf(buff, "%.3f", mi->heartbeat_period); + mi->release(); + } + else + var->type= SHOW_UNDEF; + return 0; +} + + +#endif /* HAVE_REPLICATION */ + +static int show_open_tables(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *) buff)= (long) tc_records(); + return 0; +} + +static int show_prepared_stmt_count(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + mysql_mutex_lock(&LOCK_prepared_stmt_count); + *((long *)buff)= (long)prepared_stmt_count; + mysql_mutex_unlock(&LOCK_prepared_stmt_count); + return 0; +} + +static int show_table_definitions(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *) buff)= (long) tdc_records(); + return 0; +} + + +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) + +/* + Functions relying on SSL + Note: In the show_ssl_* functions, we need to check if we have a + valid vio-object since this isn't always true, specifically + when session_status or global_status is requested from + inside an Event. + */ + +static int show_ssl_get_version(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_CHAR; + if( thd->vio_ok() && thd->net.vio->ssl_arg ) + var->value= const_cast(SSL_get_version((SSL*) thd->net.vio->ssl_arg)); + else + var->value= const_cast(""); + return 0; +} + +static int show_ssl_get_default_timeout(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + if( thd->vio_ok() && thd->net.vio->ssl_arg ) + *((long *)buff)= (long)SSL_get_default_timeout((SSL*)thd->net.vio->ssl_arg); + else + *((long *)buff)= 0; + return 0; +} + +static int show_ssl_get_verify_mode(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; +#ifndef HAVE_WOLFSSL + if( thd->net.vio && thd->net.vio->ssl_arg ) + *((long *)buff)= (long)SSL_get_verify_mode((SSL*)thd->net.vio->ssl_arg); + else + *((long *)buff)= 0; +#else + *((long *)buff)= 0; +#endif + return 0; +} + +static int show_ssl_get_verify_depth(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + if( thd->vio_ok() && thd->net.vio->ssl_arg ) + *((long *)buff)= (long)SSL_get_verify_depth((SSL*)thd->net.vio->ssl_arg); + else + *((long *)buff)= 0; + + return 0; +} + +static int show_ssl_get_cipher(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_CHAR; + if( thd->vio_ok() && thd->net.vio->ssl_arg ) + var->value= const_cast(SSL_get_cipher((SSL*) thd->net.vio->ssl_arg)); + else + var->value= const_cast(""); + return 0; +} + +static int show_ssl_get_cipher_list(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_CHAR; + var->value= buff; + if (thd->vio_ok() && thd->net.vio->ssl_arg) + { + int i; + const char *p; + char *end= buff + SHOW_VAR_FUNC_BUFF_SIZE; + for (i=0; (p= SSL_get_cipher_list((SSL*) thd->net.vio->ssl_arg,i)) && + buff < end; i++) + { + buff= strnmov(buff, p, end-buff-1); + *buff++= ':'; + } + if (i) + buff--; + } + *buff=0; + return 0; +} + +#define SHOW_FNAME(name) \ + rpl_semi_sync_master_show_##name + +#define DEF_SHOW_FUNC(name, show_type) \ + static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR *var, void *buff, \ + system_status_var *status_var, \ + enum_var_type var_type) \ + { \ + repl_semisync_master.set_export_stats(); \ + var->type= show_type; \ + var->value= (char *)&rpl_semi_sync_master_##name; \ + return 0; \ + } + +DEF_SHOW_FUNC(status, SHOW_BOOL) +DEF_SHOW_FUNC(clients, SHOW_LONG) +DEF_SHOW_FUNC(wait_sessions, SHOW_LONG) +DEF_SHOW_FUNC(trx_wait_time, SHOW_LONGLONG) +DEF_SHOW_FUNC(trx_wait_num, SHOW_LONGLONG) +DEF_SHOW_FUNC(net_wait_time, SHOW_LONGLONG) +DEF_SHOW_FUNC(net_wait_num, SHOW_LONGLONG) +DEF_SHOW_FUNC(avg_net_wait_time, SHOW_LONG) +DEF_SHOW_FUNC(avg_trx_wait_time, SHOW_LONG) + + +static char * +my_asn1_time_to_string(const ASN1_TIME *time, char *buf, size_t len) +{ + int n_read; + char *res= NULL; + BIO *bio= BIO_new(BIO_s_mem()); + + if (bio == NULL) + return NULL; + + if (!ASN1_TIME_print(bio, const_cast(time))) + goto end; + + n_read= BIO_read(bio, buf, (int) (len - 1)); + + if (n_read > 0) + { + buf[n_read]= 0; + res= buf; + } + +end: + BIO_free(bio); + return res; +} + + +/** + Handler function for the 'ssl_get_server_not_before' variable + + @param thd the mysql thread structure + @param var the data for the variable + @param[out] buf the string to put the value of the variable into + + @return status + @retval 0 success +*/ + +static int +show_ssl_get_server_not_before(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_CHAR; + if(thd->vio_ok() && thd->net.vio->ssl_arg) + { + SSL *ssl= (SSL*) thd->net.vio->ssl_arg; + X509 *cert= SSL_get_certificate(ssl); + const ASN1_TIME *not_before= X509_get0_notBefore(cert); + + var->value= my_asn1_time_to_string(not_before, buff, + SHOW_VAR_FUNC_BUFF_SIZE); + if (!var->value) + return 1; + var->value= buff; + } + else + var->value= empty_c_string; + return 0; +} + + +/** + Handler function for the 'ssl_get_server_not_after' variable + + @param thd the mysql thread structure + @param var the data for the variable + @param[out] buf the string to put the value of the variable into + + @return status + @retval 0 success +*/ + +static int +show_ssl_get_server_not_after(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_CHAR; + if(thd->vio_ok() && thd->net.vio->ssl_arg) + { + SSL *ssl= (SSL*) thd->net.vio->ssl_arg; + X509 *cert= SSL_get_certificate(ssl); + const ASN1_TIME *not_after= X509_get0_notAfter(cert); + + var->value= my_asn1_time_to_string(not_after, buff, + SHOW_VAR_FUNC_BUFF_SIZE); + if (!var->value) + return 1; + } + else + var->value= empty_c_string; + return 0; +} + +#endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */ + +static int show_default_keycache(THD *thd, SHOW_VAR *var, void *buff, + system_status_var *, enum_var_type) +{ + struct st_data { + KEY_CACHE_STATISTICS stats; + SHOW_VAR var[9]; + } *data; + SHOW_VAR *v; + + data=(st_data *)buff; + v= data->var; + + var->type= SHOW_ARRAY; + var->value= v; + + get_key_cache_statistics(dflt_key_cache, 0, &data->stats); + +#define set_one_keycache_var(X,Y) \ + v->name= X; \ + v->type= SHOW_LONGLONG; \ + v->value= &data->stats.Y; \ + v++; + + set_one_keycache_var("blocks_not_flushed", blocks_changed); + set_one_keycache_var("blocks_unused", blocks_unused); + set_one_keycache_var("blocks_used", blocks_used); + set_one_keycache_var("blocks_warm", blocks_warm); + set_one_keycache_var("read_requests", read_requests); + set_one_keycache_var("reads", reads); + set_one_keycache_var("write_requests", write_requests); + set_one_keycache_var("writes", writes); + + v->name= 0; + + DBUG_ASSERT((char*)(v+1) <= static_cast(buff) + SHOW_VAR_FUNC_BUFF_SIZE); + +#undef set_one_keycache_var + + return 0; +} + + +static int show_memory_used(THD *thd, SHOW_VAR *var, char *buff, + struct system_status_var *status_var, + enum enum_var_type scope) +{ + var->type= SHOW_LONGLONG; + var->value= buff; + if (scope == OPT_GLOBAL) + { + calc_sum_of_all_status_if_needed(status_var); + *(longlong*) buff= (status_var->global_memory_used + + status_var->local_memory_used); + } + else + *(longlong*) buff= status_var->local_memory_used; + return 0; +} + + +#ifndef DBUG_OFF +static int debug_status_func(THD *thd, SHOW_VAR *var, void *buff, + system_status_var *, enum_var_type) +{ +#define add_var(X,Y,Z) \ + v->name= X; \ + v->value= (char*)Y; \ + v->type= Z; \ + v++; + + var->type= SHOW_ARRAY; + var->value= buff; + + SHOW_VAR *v= (SHOW_VAR *)buff; + + if (_db_keyword_(0, "role_merge_stats", 1)) + { + static SHOW_VAR roles[]= { + {"global", &role_global_merges, SHOW_ULONG}, + {"db", &role_db_merges, SHOW_ULONG}, + {"table", &role_table_merges, SHOW_ULONG}, + {"column", &role_column_merges, SHOW_ULONG}, + {"routine", &role_routine_merges, SHOW_ULONG}, + {NullS, NullS, SHOW_LONG} + }; + + add_var("role_merges", roles, SHOW_ARRAY); + } + + v->name= 0; + +#undef add_var + + return 0; +} +#endif + +#ifdef HAVE_POOL_OF_THREADS +static int show_threadpool_idle_threads(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_INT; + var->value= buff; + *(int *)buff= tp_get_idle_thread_count(); + return 0; +} + + +static int show_threadpool_threads(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_INT; + var->value= buff; + *(reinterpret_cast(buff))= tp_get_thread_count(); + return 0; +} +#endif + + +static int show_cached_thread_count(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_LONG; + var->value= buff; + *(reinterpret_cast(buff))= thread_cache.size(); + return 0; +} + + +/* + Variables shown by SHOW STATUS in alphabetical order +*/ + +SHOW_VAR status_vars[]= { + {"Aborted_clients", (char*) &aborted_threads, SHOW_LONG}, + {"Aborted_connects", (char*) &aborted_connects, SHOW_LONG}, + {"Aborted_connects_preauth", (char*) &aborted_connects_preauth, SHOW_LONG}, + {"Acl", (char*) acl_statistics, SHOW_ARRAY}, + {"Access_denied_errors", (char*) offsetof(STATUS_VAR, access_denied_errors), SHOW_LONG_STATUS}, + {"Binlog_bytes_written", (char*) offsetof(STATUS_VAR, binlog_bytes_written), SHOW_LONGLONG_STATUS}, + {"Binlog_cache_disk_use", (char*) &binlog_cache_disk_use, SHOW_LONG}, + {"Binlog_cache_use", (char*) &binlog_cache_use, SHOW_LONG}, + {"Binlog_stmt_cache_disk_use",(char*) &binlog_stmt_cache_disk_use, SHOW_LONG}, + {"Binlog_stmt_cache_use", (char*) &binlog_stmt_cache_use, SHOW_LONG}, + {"Busy_time", (char*) offsetof(STATUS_VAR, busy_time), SHOW_DOUBLE_STATUS}, + {"Bytes_received", (char*) offsetof(STATUS_VAR, bytes_received), SHOW_LONGLONG_STATUS}, + {"Bytes_sent", (char*) offsetof(STATUS_VAR, bytes_sent), SHOW_LONGLONG_STATUS}, + {"Column_compressions", (char*) offsetof(STATUS_VAR, column_compressions), SHOW_LONG_STATUS}, + {"Column_decompressions", (char*) offsetof(STATUS_VAR, column_decompressions), SHOW_LONG_STATUS}, + {"Com", (char*) com_status_vars, SHOW_ARRAY}, + {"Compression", (char*) &show_net_compression, SHOW_SIMPLE_FUNC}, + {"Connections", (char*) &global_thread_id, SHOW_LONG_NOFLUSH}, + {"Connection_errors_accept", (char*) &connection_errors_accept, SHOW_LONG}, + {"Connection_errors_internal", (char*) &connection_errors_internal, SHOW_LONG}, + {"Connection_errors_max_connections", (char*) &connection_errors_max_connection, SHOW_LONG}, + {"Connection_errors_peer_address", (char*) &connection_errors_peer_addr, SHOW_LONG}, + {"Connection_errors_select", (char*) &connection_errors_select, SHOW_LONG}, + {"Connection_errors_tcpwrap", (char*) &connection_errors_tcpwrap, SHOW_LONG}, + {"Cpu_time", (char*) offsetof(STATUS_VAR, cpu_time), SHOW_DOUBLE_STATUS}, + {"Created_tmp_disk_tables", (char*) offsetof(STATUS_VAR, created_tmp_disk_tables_), SHOW_LONG_STATUS}, + {"Created_tmp_files", (char*) &my_tmp_file_created, SHOW_LONG}, + {"Created_tmp_tables", (char*) offsetof(STATUS_VAR, created_tmp_tables_), SHOW_LONG_STATUS}, +#ifndef DBUG_OFF + SHOW_FUNC_ENTRY("Debug", &debug_status_func), +#endif + {"Delayed_errors", (char*) &delayed_insert_errors, SHOW_LONG}, + {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_NOFLUSH}, + {"Delayed_writes", (char*) &delayed_insert_writes, SHOW_LONG}, + {"Delete_scan", (char*) offsetof(STATUS_VAR, delete_scan_count), SHOW_LONG_STATUS}, + {"Empty_queries", (char*) offsetof(STATUS_VAR, empty_queries), SHOW_LONG_STATUS}, + {"Executed_events", (char*) &executed_events, SHOW_LONG_NOFLUSH }, + {"Executed_triggers", (char*) offsetof(STATUS_VAR, executed_triggers), SHOW_LONG_STATUS}, + {"Feature_check_constraint", (char*) &feature_check_constraint, SHOW_LONG }, + {"Feature_custom_aggregate_functions", (char*) offsetof(STATUS_VAR, feature_custom_aggregate_functions), SHOW_LONG_STATUS}, + {"Feature_delay_key_write", (char*) &feature_files_opened_with_delayed_keys, SHOW_LONG }, + {"Feature_dynamic_columns", (char*) offsetof(STATUS_VAR, feature_dynamic_columns), SHOW_LONG_STATUS}, + {"Feature_fulltext", (char*) offsetof(STATUS_VAR, feature_fulltext), SHOW_LONG_STATUS}, + {"Feature_gis", (char*) offsetof(STATUS_VAR, feature_gis), SHOW_LONG_STATUS}, + {"Feature_insert_returning", (char*)offsetof(STATUS_VAR, feature_insert_returning), SHOW_LONG_STATUS}, + {"Feature_invisible_columns", (char*) offsetof(STATUS_VAR, feature_invisible_columns), SHOW_LONG_STATUS}, + {"Feature_json", (char*) offsetof(STATUS_VAR, feature_json), SHOW_LONG_STATUS}, + {"Feature_locale", (char*) offsetof(STATUS_VAR, feature_locale), SHOW_LONG_STATUS}, + {"Feature_subquery", (char*) offsetof(STATUS_VAR, feature_subquery), SHOW_LONG_STATUS}, + {"Feature_system_versioning", (char*) offsetof(STATUS_VAR, feature_system_versioning), SHOW_LONG_STATUS}, + {"Feature_application_time_periods", (char*) offsetof(STATUS_VAR, feature_application_time_periods), SHOW_LONG_STATUS}, + {"Feature_timezone", (char*) offsetof(STATUS_VAR, feature_timezone), SHOW_LONG_STATUS}, + {"Feature_trigger", (char*) offsetof(STATUS_VAR, feature_trigger), SHOW_LONG_STATUS}, + {"Feature_window_functions", (char*) offsetof(STATUS_VAR, feature_window_functions), SHOW_LONG_STATUS}, + {"Feature_xml", (char*) offsetof(STATUS_VAR, feature_xml), SHOW_LONG_STATUS}, + {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS}, + {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS}, + {"Handler_discover", (char*) offsetof(STATUS_VAR, ha_discover_count), SHOW_LONG_STATUS}, + {"Handler_external_lock", (char*) offsetof(STATUS_VAR, ha_external_lock_count), SHOW_LONG_STATUS}, + {"Handler_icp_attempts", (char*) offsetof(STATUS_VAR, ha_icp_attempts), SHOW_LONG_STATUS}, + {"Handler_icp_match", (char*) offsetof(STATUS_VAR, ha_icp_match), SHOW_LONG_STATUS}, + {"Handler_mrr_init", (char*) offsetof(STATUS_VAR, ha_mrr_init_count), SHOW_LONG_STATUS}, + {"Handler_mrr_key_refills", (char*) offsetof(STATUS_VAR, ha_mrr_key_refills_count), SHOW_LONG_STATUS}, + {"Handler_mrr_rowid_refills",(char*) offsetof(STATUS_VAR, ha_mrr_rowid_refills_count), SHOW_LONG_STATUS}, + {"Handler_prepare", (char*) offsetof(STATUS_VAR, ha_prepare_count), SHOW_LONG_STATUS}, + {"Handler_read_first", (char*) offsetof(STATUS_VAR, ha_read_first_count), SHOW_LONG_STATUS}, + {"Handler_read_key", (char*) offsetof(STATUS_VAR, ha_read_key_count), SHOW_LONG_STATUS}, + {"Handler_read_last", (char*) offsetof(STATUS_VAR, ha_read_last_count), SHOW_LONG_STATUS}, + {"Handler_read_next", (char*) offsetof(STATUS_VAR, ha_read_next_count), SHOW_LONG_STATUS}, + {"Handler_read_prev", (char*) offsetof(STATUS_VAR, ha_read_prev_count), SHOW_LONG_STATUS}, + {"Handler_read_retry", (char*) offsetof(STATUS_VAR, ha_read_retry_count), SHOW_LONG_STATUS}, + {"Handler_read_rnd", (char*) offsetof(STATUS_VAR, ha_read_rnd_count), SHOW_LONG_STATUS}, + {"Handler_read_rnd_deleted", (char*) offsetof(STATUS_VAR, ha_read_rnd_deleted_count), SHOW_LONG_STATUS}, + {"Handler_read_rnd_next", (char*) offsetof(STATUS_VAR, ha_read_rnd_next_count), SHOW_LONG_STATUS}, + {"Handler_rollback", (char*) offsetof(STATUS_VAR, ha_rollback_count), SHOW_LONG_STATUS}, + {"Handler_savepoint", (char*) offsetof(STATUS_VAR, ha_savepoint_count), SHOW_LONG_STATUS}, + {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS}, + {"Handler_tmp_delete", (char*) offsetof(STATUS_VAR, ha_tmp_delete_count), SHOW_LONG_STATUS}, + {"Handler_tmp_update", (char*) offsetof(STATUS_VAR, ha_tmp_update_count), SHOW_LONG_STATUS}, + {"Handler_tmp_write", (char*) offsetof(STATUS_VAR, ha_tmp_write_count), SHOW_LONG_STATUS}, + {"Handler_update", (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS}, + {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, + SHOW_FUNC_ENTRY("Key", &show_default_keycache), + {"optimizer_join_prefixes_check_calls", (char*) offsetof(STATUS_VAR, optimizer_join_prefixes_check_calls), SHOW_LONG_STATUS}, + {"Last_query_cost", (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS}, +#ifndef DBUG_OFF + {"malloc_calls", (char*) &malloc_calls, SHOW_LONG}, +#endif + {"Max_statement_time_exceeded", (char*) offsetof(STATUS_VAR, max_statement_time_exceeded), SHOW_LONG_STATUS}, + {"Master_gtid_wait_count", (char*) offsetof(STATUS_VAR, master_gtid_wait_count), SHOW_LONG_STATUS}, + {"Master_gtid_wait_timeouts", (char*) offsetof(STATUS_VAR, master_gtid_wait_timeouts), SHOW_LONG_STATUS}, + {"Master_gtid_wait_time", (char*) offsetof(STATUS_VAR, master_gtid_wait_time), SHOW_LONG_STATUS}, + {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, + {"Memory_used", (char*) &show_memory_used, SHOW_SIMPLE_FUNC}, + {"Memory_used_initial", (char*) &start_memory_used, SHOW_LONGLONG}, + {"Resultset_metadata_skipped", (char *) offsetof(STATUS_VAR, skip_metadata_count),SHOW_LONG_STATUS}, + {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_NOFLUSH}, + {"Open_files", (char*) &my_file_opened, SHOW_SINT}, + {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_NOFLUSH}, + {"Open_table_definitions", (char*) &show_table_definitions, SHOW_SIMPLE_FUNC}, + {"Open_tables", (char*) &show_open_tables, SHOW_SIMPLE_FUNC}, + {"Opened_files", (char*) &my_file_total_opened, SHOW_LONG_NOFLUSH}, + {"Opened_plugin_libraries", (char*) &dlopen_count, SHOW_LONG}, + {"Opened_table_definitions", (char*) offsetof(STATUS_VAR, opened_shares), SHOW_LONG_STATUS}, + {"Opened_tables", (char*) offsetof(STATUS_VAR, opened_tables), SHOW_LONG_STATUS}, + {"Opened_views", (char*) offsetof(STATUS_VAR, opened_views), SHOW_LONG_STATUS}, + {"Prepared_stmt_count", (char*) &show_prepared_stmt_count, SHOW_SIMPLE_FUNC}, + {"Rows_sent", (char*) offsetof(STATUS_VAR, rows_sent), SHOW_LONGLONG_STATUS}, + {"Rows_read", (char*) offsetof(STATUS_VAR, rows_read), SHOW_LONGLONG_STATUS}, + {"Rows_tmp_read", (char*) offsetof(STATUS_VAR, rows_tmp_read), SHOW_LONGLONG_STATUS}, +#ifdef HAVE_REPLICATION + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_status", &SHOW_FNAME(status)), + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_clients", &SHOW_FNAME(clients)), + {"Rpl_semi_sync_master_yes_tx", (char*) &rpl_semi_sync_master_yes_transactions, SHOW_LONG}, + {"Rpl_semi_sync_master_no_tx", (char*) &rpl_semi_sync_master_no_transactions, SHOW_LONG}, + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_wait_sessions", &SHOW_FNAME(wait_sessions)), + {"Rpl_semi_sync_master_no_times", (char*) &rpl_semi_sync_master_off_times, SHOW_LONG}, + {"Rpl_semi_sync_master_timefunc_failures", (char*) &rpl_semi_sync_master_timefunc_fails, SHOW_LONG}, + {"Rpl_semi_sync_master_wait_pos_backtraverse", (char*) &rpl_semi_sync_master_wait_pos_backtraverse, SHOW_LONG}, + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_tx_wait_time", &SHOW_FNAME(trx_wait_time)), + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_tx_waits", &SHOW_FNAME(trx_wait_num)), + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_tx_avg_wait_time", &SHOW_FNAME(avg_trx_wait_time)), + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_net_wait_time", &SHOW_FNAME(net_wait_time)), + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_net_waits", &SHOW_FNAME(net_wait_num)), + SHOW_FUNC_ENTRY("Rpl_semi_sync_master_net_avg_wait_time", &SHOW_FNAME(avg_net_wait_time)), + {"Rpl_semi_sync_master_request_ack", (char*) &rpl_semi_sync_master_request_ack, SHOW_LONGLONG}, + {"Rpl_semi_sync_master_get_ack", (char*)&rpl_semi_sync_master_get_ack, SHOW_LONGLONG}, + {"Rpl_semi_sync_slave_status", (char*) &rpl_semi_sync_slave_status, SHOW_BOOL}, + {"Rpl_semi_sync_slave_send_ack", (char*) &rpl_semi_sync_slave_send_ack, SHOW_LONGLONG}, +#endif /* HAVE_REPLICATION */ +#ifdef HAVE_QUERY_CACHE + {"Qcache_free_blocks", (char*) &query_cache.free_memory_blocks, SHOW_LONG_NOFLUSH}, + {"Qcache_free_memory", (char*) &query_cache.free_memory, SHOW_LONG_NOFLUSH}, + {"Qcache_hits", (char*) &query_cache.hits, SHOW_LONG}, + {"Qcache_inserts", (char*) &query_cache.inserts, SHOW_LONG}, + {"Qcache_lowmem_prunes", (char*) &query_cache.lowmem_prunes, SHOW_LONG}, + {"Qcache_not_cached", (char*) &query_cache.refused, SHOW_LONG}, + {"Qcache_queries_in_cache", (char*) &query_cache.queries_in_cache, SHOW_LONG_NOFLUSH}, + {"Qcache_total_blocks", (char*) &query_cache.total_blocks, SHOW_LONG_NOFLUSH}, +#endif /*HAVE_QUERY_CACHE*/ + {"Queries", (char*) &show_queries, SHOW_SIMPLE_FUNC}, + {"Questions", (char*) offsetof(STATUS_VAR, questions), SHOW_LONG_STATUS}, +#ifdef HAVE_REPLICATION + {"Rpl_status", (char*) &show_rpl_status, SHOW_SIMPLE_FUNC}, +#endif + {"Select_full_join", (char*) offsetof(STATUS_VAR, select_full_join_count_), SHOW_LONG_STATUS}, + {"Select_full_range_join", (char*) offsetof(STATUS_VAR, select_full_range_join_count_), SHOW_LONG_STATUS}, + {"Select_range", (char*) offsetof(STATUS_VAR, select_range_count_), SHOW_LONG_STATUS}, + {"Select_range_check", (char*) offsetof(STATUS_VAR, select_range_check_count_), SHOW_LONG_STATUS}, + {"Select_scan", (char*) offsetof(STATUS_VAR, select_scan_count_), SHOW_LONG_STATUS}, + {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_ATOMIC_COUNTER_UINT32_T}, +#ifdef HAVE_REPLICATION + {"Slaves_connected", (char*) &binlog_dump_thread_count, SHOW_ATOMIC_COUNTER_UINT32_T}, + {"Slaves_running", (char*) &show_slaves_running, SHOW_SIMPLE_FUNC }, + {"Slave_connections", (char*) offsetof(STATUS_VAR, com_register_slave), SHOW_LONG_STATUS}, + {"Slave_heartbeat_period", (char*) &show_heartbeat_period, SHOW_SIMPLE_FUNC}, + {"Slave_received_heartbeats",(char*) &show_slave_received_heartbeats, SHOW_SIMPLE_FUNC}, + {"Slave_retried_transactions",(char*)&slave_retried_transactions, SHOW_LONG}, + {"Slave_running", (char*) &show_slave_running, SHOW_SIMPLE_FUNC}, + {"Slave_skipped_errors", (char*) &slave_skipped_errors, SHOW_LONGLONG}, +#endif + {"Slow_launch_threads", (char*) &slow_launch_threads, SHOW_LONG}, + {"Slow_queries", (char*) offsetof(STATUS_VAR, long_query_count), SHOW_LONG_STATUS}, + {"Sort_merge_passes", (char*) offsetof(STATUS_VAR, filesort_merge_passes_), SHOW_LONG_STATUS}, + {"Sort_priority_queue_sorts",(char*) offsetof(STATUS_VAR, filesort_pq_sorts_), SHOW_LONG_STATUS}, + {"Sort_range", (char*) offsetof(STATUS_VAR, filesort_range_count_), SHOW_LONG_STATUS}, + {"Sort_rows", (char*) offsetof(STATUS_VAR, filesort_rows_), SHOW_LONG_STATUS}, + {"Sort_scan", (char*) offsetof(STATUS_VAR, filesort_scan_count_), SHOW_LONG_STATUS}, +#ifdef HAVE_OPENSSL +#ifndef EMBEDDED_LIBRARY + {"Ssl_accept_renegotiates", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_accepts", (char*) &ssl_acceptor_stats.accept, SHOW_LONG}, + {"Ssl_callback_cache_hits", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_cipher", (char*) &show_ssl_get_cipher, SHOW_SIMPLE_FUNC}, + {"Ssl_cipher_list", (char*) &show_ssl_get_cipher_list, SHOW_SIMPLE_FUNC}, + {"Ssl_client_connects", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_connect_renegotiates", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_ctx_verify_depth", (char*) &ssl_acceptor_stats.verify_depth, SHOW_LONG}, + {"Ssl_ctx_verify_mode", (char*) &ssl_acceptor_stats.verify_mode, SHOW_LONG}, + {"Ssl_default_timeout", (char*) &show_ssl_get_default_timeout, SHOW_SIMPLE_FUNC}, + {"Ssl_finished_accepts", (char*) &ssl_acceptor_stats.accept_good, SHOW_LONG}, + {"Ssl_finished_connects", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_server_not_after", (char*) &show_ssl_get_server_not_after, SHOW_SIMPLE_FUNC}, + {"Ssl_server_not_before", (char*) &show_ssl_get_server_not_before, SHOW_SIMPLE_FUNC}, + {"Ssl_session_cache_hits", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_session_cache_misses", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_session_cache_mode", (char*) &ssl_acceptor_stats.session_cache_mode, SHOW_CHAR_PTR}, + {"Ssl_session_cache_overflows", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_session_cache_size", (char*) &ssl_acceptor_stats.cache_size, SHOW_LONG}, + {"Ssl_session_cache_timeouts", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_sessions_reused", (char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_used_session_cache_entries",(char*) &ssl_acceptor_stats.zero, SHOW_LONG}, + {"Ssl_verify_depth", (char*) &show_ssl_get_verify_depth, SHOW_SIMPLE_FUNC}, + {"Ssl_verify_mode", (char*) &show_ssl_get_verify_mode, SHOW_SIMPLE_FUNC}, + {"Ssl_version", (char*) &show_ssl_get_version, SHOW_SIMPLE_FUNC}, +#endif +#endif /* HAVE_OPENSSL */ + {"Syncs", (char*) &my_sync_count, SHOW_LONG_NOFLUSH}, + /* + Expression cache used only for caching subqueries now, so its statistic + variables we call subquery_cache*. + */ + {"Subquery_cache_hit", (char*) &subquery_cache_hit, SHOW_LONG}, + {"Subquery_cache_miss", (char*) &subquery_cache_miss, SHOW_LONG}, + {"Table_locks_immediate", (char*) &locks_immediate, SHOW_LONG}, + {"Table_locks_waited", (char*) &locks_waited, SHOW_LONG}, + {"Table_open_cache_active_instances", (char*) &show_tc_active_instances, SHOW_SIMPLE_FUNC}, + {"Table_open_cache_hits", (char*) offsetof(STATUS_VAR, table_open_cache_hits), SHOW_LONGLONG_STATUS}, + {"Table_open_cache_misses", (char*) offsetof(STATUS_VAR, table_open_cache_misses), SHOW_LONGLONG_STATUS}, + {"Table_open_cache_overflows", (char*) offsetof(STATUS_VAR, table_open_cache_overflows), SHOW_LONGLONG_STATUS}, +#ifdef HAVE_MMAP + {"Tc_log_max_pages_used", (char*) &tc_log_max_pages_used, SHOW_LONG}, + {"Tc_log_page_size", (char*) &tc_log_page_size, SHOW_LONG_NOFLUSH}, + {"Tc_log_page_waits", (char*) &tc_log_page_waits, SHOW_LONG}, +#endif +#ifdef HAVE_POOL_OF_THREADS + {"Threadpool_idle_threads", (char *) &show_threadpool_idle_threads, SHOW_SIMPLE_FUNC}, + {"Threadpool_threads", (char *) &show_threadpool_threads, SHOW_SIMPLE_FUNC}, +#endif + {"Threads_cached", (char*) &show_cached_thread_count, SHOW_SIMPLE_FUNC}, + {"Threads_connected", (char*) &connection_count, SHOW_INT}, + {"Threads_created", (char*) &thread_created, SHOW_LONG_NOFLUSH}, + {"Threads_running", (char*) offsetof(STATUS_VAR, threads_running), SHOW_UINT32_STATUS}, + {"Transactions_multi_engine", (char*) &transactions_multi_engine, SHOW_LONG}, + {"Rpl_transactions_multi_engine", (char*) &rpl_transactions_multi_engine, SHOW_LONG}, + {"Transactions_gtid_foreign_engine", (char*) &transactions_gtid_foreign_engine, SHOW_LONG}, + {"Update_scan", (char*) offsetof(STATUS_VAR, update_scan_count), SHOW_LONG_STATUS}, + {"Uptime", (char*) &show_starttime, SHOW_SIMPLE_FUNC}, +#ifdef ENABLED_PROFILING + {"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_SIMPLE_FUNC}, +#endif +#ifdef WITH_WSREP + {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"wsrep_ready", (char*) &wsrep_show_ready, SHOW_FUNC}, + {"wsrep_cluster_state_uuid",(char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, + {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, + {"wsrep_local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_FUNC}, + {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"wsrep_provider_capabilities", (char*) &wsrep_provider_capabilities, SHOW_CHAR_PTR}, + {"wsrep_thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH}, + {"wsrep_applier_thread_count", (char*) &wsrep_running_applier_threads, SHOW_LONG_NOFLUSH}, + {"wsrep_rollbacker_thread_count", (char *) &wsrep_running_rollbacker_threads, SHOW_LONG_NOFLUSH}, + {"wsrep_cluster_capabilities", (char*) &wsrep_cluster_capabilities, SHOW_CHAR_PTR}, + SHOW_FUNC_ENTRY("wsrep", &wsrep_show_status), +#endif + {NullS, NullS, SHOW_LONG} +}; + +static bool add_terminator(DYNAMIC_ARRAY *options) +{ + my_option empty_element= {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}; + return insert_dynamic(options, (uchar *)&empty_element); +} + +static bool add_many_options(DYNAMIC_ARRAY *options, my_option *list, + size_t elements) +{ + for (my_option *opt= list; opt < list + elements; opt++) + if (insert_dynamic(options, opt)) + return 1; + return 0; +} + +#ifndef EMBEDDED_LIBRARY +static void print_version(void) +{ + if (IS_SYSVAR_AUTOSIZE(&server_version_ptr)) + set_server_version(server_version, sizeof(server_version)); + + printf("%s Ver %s for %s on %s (%s)\n",my_progname, + server_version,SYSTEM_TYPE,MACHINE_TYPE, MYSQL_COMPILATION_COMMENT); +} + +/** Compares two options' names, treats - and _ the same */ +static int option_cmp(my_option *a, my_option *b) +{ + const char *sa= a->name; + const char *sb= b->name; + for (; *sa || *sb; sa++, sb++) + { + if (*sa < *sb) + { + if (*sa == '-' && *sb == '_') + continue; + else + return -1; + } + if (*sa > *sb) + { + if (*sa == '_' && *sb == '-') + continue; + else + return 1; + } + } + return 0; +} + +static void print_help() +{ + MEM_ROOT mem_root; + init_alloc_root(PSI_NOT_INSTRUMENTED, &mem_root, 4096, 4096, MYF(0)); + + pop_dynamic(&all_options); + add_many_options(&all_options, pfs_early_options, + array_elements(pfs_early_options)); + sys_var_add_options(&all_options, sys_var::PARSE_EARLY); + add_plugin_options(&all_options, &mem_root); + sort_dynamic(&all_options, (qsort_cmp) option_cmp); + sort_dynamic(&all_options, (qsort_cmp) option_cmp); + add_terminator(&all_options); + + my_print_help((my_option*) all_options.buffer); + + /* Add variables that must be shown but not changed, like version numbers */ + pop_dynamic(&all_options); + sys_var_add_options(&all_options, sys_var::GETOPT_ONLY_HELP); + sort_dynamic(&all_options, (qsort_cmp) option_cmp); + add_terminator(&all_options); + my_print_variables((my_option*) all_options.buffer); + + free_root(&mem_root, MYF(0)); +} + +static void usage(void) +{ + DBUG_ENTER("usage"); + myf utf8_flag= global_system_variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? MY_UTF8_IS_UTF8MB3 : 0; + if (!(default_charset_info= get_charset_by_csname(default_character_set_name, + MY_CS_PRIMARY, + MYF(utf8_flag | MY_WME)))) + exit(1); + if (!default_collation_name) + default_collation_name= (char*) default_charset_info->coll_name.str; + print_version(); + puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000")); + puts("Starts the MariaDB database server.\n"); + printf("Usage: %s [OPTIONS]\n", my_progname); + if (!opt_verbose) + puts("\nFor more help options (several pages), use mysqld --verbose --help."); + else + { +#ifdef _WIN32 + puts("NT and Win32 specific options:\n" + " --install Install the default service (NT).\n" + " --install-manual Install the default service started manually (NT).\n" + " --install service_name Install an optional service (NT).\n" + " --install-manual service_name Install an optional service started manually (NT).\n" + " --remove Remove the default service from the service list (NT).\n" + " --remove service_name Remove the service_name from the service list (NT).\n" + " --enable-named-pipe Only to be used for the default server (NT).\n" + " --standalone Dummy option to start as a standalone server (NT)."); + puts(""); +#endif + print_defaults(MYSQL_CONFIG_NAME,load_default_groups); + puts(""); + set_ports(); + + /* Print out all the options including plugin supplied options */ + print_help(); + + if (! plugins_are_initialized) + { + puts("\nPlugins have parameters that are not reflected in this list" + "\nbecause execution stopped before plugins were initialized."); + } + + puts("\nTo see what variables a running server is using, type" + "\n'SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES' instead of 'mysqld --verbose --help' or 'mariadbd --verbose --help'."); + } + DBUG_VOID_RETURN; +} +#endif /*!EMBEDDED_LIBRARY*/ + +/** + Initialize MySQL global variables to default values. + + @note + The reason to set a lot of global variables to zero is to allow one to + restart the embedded server with a clean environment + It's also needed on some exotic platforms where global variables are + not set to 0 when a program starts. + + We don't need to set variables referred to in my_long_options + as these are initialized by my_getopt. +*/ + +static int mysql_init_variables(void) +{ + /* Things reset to zero */ + opt_skip_slave_start= opt_reckless_slave = 0; + mysql_home[0]= pidfile_name[0]= log_error_file[0]= 0; +#if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH) + /* We can only test for sub paths if my_symlink.c is using realpath */ + mysys_test_invalid_symlink= path_starts_from_data_home_dir; +#endif + opt_log= 0; + opt_bin_log= opt_bin_log_used= 0; + opt_disable_networking= opt_skip_show_db=0; + opt_skip_name_resolve= 0; + opt_ignore_builtin_innodb= 0; + opt_logname= opt_binlog_index_name= opt_slow_logname= 0; + opt_log_basename= 0; + opt_tc_log_file= (char *)"tc.log"; // no hostname in tc_log file name ! + opt_ddl_recovery_file= (char *) "ddl_recovery.log"; + opt_secure_auth= 0; + opt_bootstrap= opt_myisam_log= 0; + disable_log_notes= 0; + mqh_used= 0; + cleanup_done= 0; + select_errors= dropping_tables= ha_open_options=0; + THD_count::count= CONNECT::count= 0; + slave_open_temp_tables= 0; + opt_endinfo= using_udf_functions= 0; + opt_using_transactions= 0; + abort_loop= select_thread_in_use= signal_thread_in_use= 0; + grant_option= 0; + aborted_threads= aborted_connects= aborted_connects_preauth= 0; + malloc_calls= 0; + subquery_cache_miss= subquery_cache_hit= 0; + delayed_insert_threads= delayed_insert_writes= delayed_rows_in_use= 0; + delayed_insert_errors= thread_created= 0; + specialflag= 0; + binlog_cache_use= binlog_cache_disk_use= 0; + max_used_connections= slow_launch_threads = 0; + mysqld_user= mysqld_chroot= opt_init_file= opt_bin_logname = 0; + prepared_stmt_count= 0; + mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS; + bzero((uchar*) &mysql_tmpdir_list, sizeof(mysql_tmpdir_list)); + /* Clear all except global_memory_used */ + bzero((char*) &global_status_var, offsetof(STATUS_VAR, + last_cleared_system_status_var)); + opt_large_pages= 0; + opt_super_large_pages= 0; +#if defined(ENABLED_DEBUG_SYNC) + opt_debug_sync_timeout= 0; +#endif /* defined(ENABLED_DEBUG_SYNC) */ + key_map_full.set_all(); + + /* Character sets */ + system_charset_info= &my_charset_utf8mb3_general_ci; + files_charset_info= &my_charset_utf8mb3_general_ci; + national_charset_info= &my_charset_utf8mb3_general_ci; + table_alias_charset= &my_charset_bin; + character_set_filesystem= &my_charset_bin; + + opt_specialflag= SPECIAL_ENGLISH; + mysql_home_ptr= mysql_home; + log_error_file_ptr= log_error_file; + protocol_version= PROTOCOL_VERSION; + what_to_log= ~(1UL << COM_TIME); + denied_connections= 0; + executed_events= 0; + global_query_id= 1; + global_thread_id= 0; + strnmov(server_version, MYSQL_SERVER_VERSION, sizeof(server_version)-1); + thread_cache.init(); + key_caches.empty(); + if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str, + default_key_cache_base.length))) + { + sql_print_error("Cannot allocate the keycache"); + return 1; + } + + /* set key_cache_hash.default_value = dflt_key_cache */ + multi_keycache_init(); + + /* Set directory paths */ + mysql_real_data_home_len= + (uint)(strmake_buf(mysql_real_data_home, + get_relative_path(MYSQL_DATADIR)) - mysql_real_data_home); + /* Replication parameters */ + master_info_file= (char*) "master.info", + relay_log_info_file= (char*) "relay-log.info"; + report_user= report_password = report_host= 0; /* TO BE DELETED */ + opt_relay_logname= opt_relaylog_index_name= 0; + slave_retried_transactions= 0; + transactions_multi_engine= 0; + rpl_transactions_multi_engine= 0; + transactions_gtid_foreign_engine= 0; + log_bin_basename= NULL; + log_bin_index= NULL; + + /* Variables in libraries */ + charsets_dir= 0; + default_character_set_name= (char*) MYSQL_DEFAULT_CHARSET_NAME; + default_collation_name= compiled_default_collation_name; + character_set_filesystem_name= (char*) "binary"; + lc_messages= (char*) "en_US"; + lc_time_names_name= (char*) "en_US"; + + /* Variables that depends on compile options */ +#ifndef DBUG_OFF + default_dbug_option=IF_WIN("d:t:i:O,\\mariadbd.trace", + "d:t:i:o,/tmp/mariadbd.trace"); + current_dbug_option= default_dbug_option; +#endif + opt_error_log= IF_WIN(1,0); +#ifdef ENABLED_PROFILING + have_profiling = SHOW_OPTION_YES; +#else + have_profiling = SHOW_OPTION_NO; +#endif + +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) + have_ssl=SHOW_OPTION_YES; +#if defined(HAVE_WOLFSSL) + have_openssl= SHOW_OPTION_NO; +#else + have_openssl= SHOW_OPTION_YES; +#endif +#else + have_openssl= have_ssl= SHOW_OPTION_NO; +#endif +#ifdef HAVE_BROKEN_REALPATH + have_symlink=SHOW_OPTION_NO; +#else + have_symlink=SHOW_OPTION_YES; +#endif +#ifdef HAVE_DLOPEN + have_dlopen=SHOW_OPTION_YES; +#else + have_dlopen=SHOW_OPTION_NO; +#endif +#ifdef HAVE_QUERY_CACHE + have_query_cache=SHOW_OPTION_YES; +#else + have_query_cache=SHOW_OPTION_NO; +#endif +#ifdef HAVE_SPATIAL + have_geometry=SHOW_OPTION_YES; +#else + have_geometry=SHOW_OPTION_NO; +#endif +#ifdef HAVE_RTREE_KEYS + have_rtree_keys=SHOW_OPTION_YES; +#else + have_rtree_keys=SHOW_OPTION_NO; +#endif +#ifdef HAVE_CRYPT + have_crypt=SHOW_OPTION_YES; +#else + have_crypt=SHOW_OPTION_NO; +#endif +#ifdef HAVE_COMPRESS + have_compress= SHOW_OPTION_YES; +#else + have_compress= SHOW_OPTION_NO; +#endif +#ifdef HAVE_LIBWRAP + libwrapName= NullS; +#endif +#ifdef HAVE_OPENSSL + des_key_file = 0; +#ifndef EMBEDDED_LIBRARY + ssl_acceptor_fd= 0; +#endif /* ! EMBEDDED_LIBRARY */ +#endif /* HAVE_OPENSSL */ + +#if defined(_WIN32) + /* Allow Win32 users to move MySQL anywhere */ + { + char prg_dev[LIBLEN]; + char executing_path_name[LIBLEN]; + if (!test_if_hard_path(my_progname)) + { + // we don't want to use GetModuleFileName inside of my_path since + // my_path is a generic path dereferencing function and here we care + // only about the executing binary. + GetModuleFileName(NULL, executing_path_name, sizeof(executing_path_name)); + my_path(prg_dev, executing_path_name, NULL); + } + else + my_path(prg_dev, my_progname, "mysql/bin"); + // Remove 'bin' to get base dir + safe_strcat(prg_dev, sizeof(prg_dev), "/../"); + cleanup_dirname(mysql_home,prg_dev); + } +#else + const char *tmpenv; + if (!(tmpenv = getenv("MY_BASEDIR_VERSION"))) + tmpenv = DEFAULT_MYSQL_HOME; + strmake_buf(mysql_home, tmpenv); + set_sys_var_value_origin(&mysql_home_ptr, sys_var::ENV); +#endif + + if (wsrep_init_vars()) + return 1; + + return 0; +} + +my_bool +mysqld_get_one_option(const struct my_option *opt, const char *argument, + const char *filename) +{ + if (opt->app_type) + { + sys_var *var= (sys_var*) opt->app_type; + if (argument == autoset_my_option) + { + var->value_origin= sys_var::AUTO; + return 0; + } + if (*filename) + { + var->origin_filename= filename; + var->value_origin= sys_var::CONFIG; + } + else + var->value_origin= sys_var::COMMAND_LINE; + } + + switch(opt->id) { + case '#': +#ifndef DBUG_OFF + if (!argument) + argument= (char*) default_dbug_option; + if (argument[0] == '0' && !argument[1]) + { + DEBUGGER_OFF; + break; + } + DEBUGGER_ON; + if (argument[0] == '1' && !argument[1]) + break; + DBUG_SET_INITIAL(argument); + current_dbug_option= argument; + opt_endinfo=1; /* unireg: memory allocation */ +#else + sql_print_warning("'%s' is disabled in this build", opt->name); +#endif + break; + case OPT_REMOVED_OPTION: + sql_print_warning("'%s' was removed. It does nothing now and exists only " + "for compatibility with old my.cnf files.", opt->name); + break; + case OPT_MYSQL_COMPATIBILITY: + sql_print_warning("'%s' is MySQL 5.6 / 5.7 compatible option. Not used or " + "needed in MariaDB.", opt->name); + break; + case OPT_MYSQL_TO_BE_IMPLEMENTED: + sql_print_warning("'%s' is MySQL 5.6 / 5.7 compatible option. To be " + "implemented in later versions.", opt->name); + break; + case 'a': + SYSVAR_AUTOSIZE(global_system_variables.sql_mode, MODE_ANSI); + SYSVAR_AUTOSIZE(global_system_variables.tx_isolation, ISO_SERIALIZABLE); + break; + case 'b': + strmake_buf(mysql_home, argument); + break; + case 'C': + if (default_collation_name == compiled_default_collation_name) + default_collation_name= 0; + break; + case 'h': + strmake_buf(mysql_real_data_home, argument); + /* Correct pointer set by my_getopt (for embedded library) */ + mysql_real_data_home_ptr= mysql_real_data_home; + break; + case 'u': + if (!mysqld_user || !strcmp(mysqld_user, argument)) + mysqld_user= argument; + else + sql_print_warning("Ignoring user change to '%s' because the user was set to '%s' earlier on the command line\n", argument, mysqld_user); + break; + case 'L': + strmake_buf(lc_messages_dir, argument); + break; + case OPT_BINLOG_FORMAT: + binlog_format_used= true; + break; +#include + case 'V': + if (argument) + { + strmake(server_version, argument, sizeof(server_version) - 1); + set_sys_var_value_origin(&server_version_ptr, + *filename ? sys_var::CONFIG : sys_var::COMMAND_LINE, filename); + using_custom_server_version= true; + } +#ifndef EMBEDDED_LIBRARY + else + { + print_version(); + opt_abort= 1; // Abort after parsing all options + } +#endif /*EMBEDDED_LIBRARY*/ + break; + case 'W': + if (!argument) + global_system_variables.log_warnings++; + else if (argument == disabled_my_option) + global_system_variables.log_warnings= 0L; + else + global_system_variables.log_warnings= atoi(argument); + break; + case 'T': + test_flags= argument ? ((uint) atoi(argument) & ~TEST_BLOCKING) : 0; + opt_endinfo=1; + break; + case OPT_THREAD_CONCURRENCY: + WARN_DEPRECATED_NO_REPLACEMENT(NULL, "THREAD_CONCURRENCY"); + break; + case (int) OPT_ISAM_LOG: + opt_myisam_log=1; + break; + case (int) OPT_BIN_LOG: + opt_bin_log= MY_TEST(argument != disabled_my_option); + opt_bin_log_used= 1; + break; + case (int) OPT_LOG_BASENAME: + { + if (opt_log_basename[0] == 0 || strchr(opt_log_basename, FN_EXTCHAR) || + strchr(opt_log_basename,FN_LIBCHAR) || + !is_filename_allowed(opt_log_basename, strlen(opt_log_basename), FALSE)) + { + sql_print_error("Wrong argument for --log-basename. It can't be empty or contain '.' or '" FN_DIRSEP "'. It must be valid filename."); + return 1; + } + if (log_error_file_ptr != disabled_my_option) + SYSVAR_AUTOSIZE(log_error_file_ptr, opt_log_basename); + + /* General log file */ + make_default_log_name(&opt_logname, ".log", false); + /* Slow query log file */ + make_default_log_name(&opt_slow_logname, "-slow.log", false); + /* Binary log file */ + make_default_log_name(&opt_bin_logname, "-bin", true); + /* Binary log index file */ + make_default_log_name(&opt_binlog_index_name, "-bin.index", true); + set_sys_var_value_origin(&opt_logname, sys_var::AUTO); + set_sys_var_value_origin(&opt_slow_logname, sys_var::AUTO); + if (!opt_logname || !opt_slow_logname || !opt_bin_logname || + !opt_binlog_index_name) + return 1; + +#ifdef HAVE_REPLICATION + /* Relay log file */ + make_default_log_name(&opt_relay_logname, "-relay-bin", true); + /* Relay log index file */ + make_default_log_name(&opt_relaylog_index_name, "-relay-bin.index", true); + set_sys_var_value_origin(&opt_relay_logname, sys_var::AUTO); + if (!opt_relay_logname || !opt_relaylog_index_name) + return 1; +#endif + + if (IS_SYSVAR_AUTOSIZE(&pidfile_name_ptr)) + { + SYSVAR_AUTOSIZE(pidfile_name_ptr, pidfile_name); + /* PID file */ + strmake(pidfile_name, argument, sizeof(pidfile_name)-5); + strmov(fn_ext(pidfile_name),".pid"); + } + break; + } + case (int)OPT_EXPIRE_LOGS_DAYS: + { + binlog_expire_logs_seconds= (ulong)(expire_logs_days*24*60*60); + break; + } + case (int)OPT_BINLOG_EXPIRE_LOGS_SECONDS: + { + expire_logs_days= (binlog_expire_logs_seconds/double (24*60*60)); + break; + } + +#ifdef HAVE_REPLICATION + case (int)OPT_REPLICATE_IGNORE_DB: + { + cur_rpl_filter->add_ignore_db(argument); + break; + } + case (int)OPT_REPLICATE_DO_DB: + { + cur_rpl_filter->add_do_db(argument); + break; + } + case (int)OPT_REPLICATE_REWRITE_DB: + { + /* See also OPT_REWRITE_DB handling in client/mysqlbinlog.cc */ + if (cur_rpl_filter->add_rewrite_db(argument)) + { + sql_print_error("Bad syntax in replicate-rewrite-db.Expected syntax is FROM->TO."); + return 1; + } + break; + } + case (int)OPT_SLAVE_PARALLEL_MODE: + { + /* Store latest mode for Master::Info */ + cur_rpl_filter->set_parallel_mode + ((enum_slave_parallel_mode)opt_slave_parallel_mode); + break; + } + + case (int)OPT_BINLOG_IGNORE_DB: + { + binlog_filter->add_ignore_db(argument); + break; + } + case (int)OPT_BINLOG_DO_DB: + { + binlog_filter->add_do_db(argument); + break; + } + case (int)OPT_REPLICATE_DO_TABLE: + { + if (cur_rpl_filter->add_do_table(argument)) + { + sql_print_error("Could not add do table rule '%s'", argument); + return 1; + } + break; + } + case (int)OPT_REPLICATE_WILD_DO_TABLE: + { + if (cur_rpl_filter->add_wild_do_table(argument)) + { + sql_print_error("Could not add do table rule '%s'", argument); + return 1; + } + break; + } + case (int)OPT_REPLICATE_WILD_IGNORE_TABLE: + { + if (cur_rpl_filter->add_wild_ignore_table(argument)) + { + sql_print_error("Could not add ignore table rule '%s'", argument); + return 1; + } + break; + } + case (int)OPT_REPLICATE_IGNORE_TABLE: + { + if (cur_rpl_filter->add_ignore_table(argument)) + { + sql_print_error("Could not add ignore table rule '%s'", argument); + return 1; + } + break; + } +#endif /* HAVE_REPLICATION */ + case (int) OPT_SAFE: + opt_specialflag|= SPECIAL_SAFE_MODE | SPECIAL_NO_NEW_FUNC; + SYSVAR_AUTOSIZE(delay_key_write_options, (uint) DELAY_KEY_WRITE_NONE); + SYSVAR_AUTOSIZE(myisam_recover_options, HA_RECOVER_DEFAULT); + ha_open_options&= ~(HA_OPEN_DELAY_KEY_WRITE); +#ifdef HAVE_QUERY_CACHE + SYSVAR_AUTOSIZE(query_cache_size, 0); +#endif + sql_print_warning("The syntax '--safe-mode' is deprecated and will be " + "removed in a future release."); + break; + case (int) OPT_SKIP_HOST_CACHE: + opt_specialflag|= SPECIAL_NO_HOST_CACHE; + break; + case OPT_CONSOLE: + if (opt_console) + opt_error_log= 0; // Force logs to stdout + break; + case OPT_BOOTSTRAP: + opt_noacl=opt_bootstrap=1; +#ifdef _WIN32 + { + /* + Check if security descriptor is passed from + mysql_install_db.exe. + Used by Windows installer to correctly setup + privileges on the new directories. + */ + char* dir_sddl = getenv("MARIADB_NEW_DIRECTORY_SDDL"); + if (dir_sddl) + { + ConvertStringSecurityDescriptorToSecurityDescriptor( + dir_sddl, SDDL_REVISION_1, &my_dir_security_attributes.lpSecurityDescriptor, NULL); + DBUG_ASSERT(my_dir_security_attributes.lpSecurityDescriptor); + } + } +#endif + break; + case OPT_SERVER_ID: + ::server_id= global_system_variables.server_id; + break; + case OPT_SEQURE_FILE_PRIV: + if (argument == disabled_my_option) + { + my_free(opt_secure_file_priv); + opt_secure_file_priv= 0; + } + break; + case OPT_LOWER_CASE_TABLE_NAMES: + lower_case_table_names_used= 1; + break; +#if defined(ENABLED_DEBUG_SYNC) + case OPT_DEBUG_SYNC_TIMEOUT: + /* + Debug Sync Facility. See debug_sync.cc. + Default timeout for WAIT_FOR action. + Default value is zero (facility disabled). + If option is given without an argument, supply a non-zero value. + */ + if (!argument) + { + /* purecov: begin tested */ + opt_debug_sync_timeout= DEBUG_SYNC_DEFAULT_WAIT_TIMEOUT; + /* purecov: end */ + } + break; +#endif /* defined(ENABLED_DEBUG_SYNC) */ + case OPT_LOG_ERROR: + /* + "No --log-error" == "write errors to stderr", + "--log-error without argument" == "write errors to a file". + */ + if (argument == NULL) /* no argument */ + log_error_file_ptr= const_cast(""); + break; + case OPT_IGNORE_DB_DIRECTORY: + opt_ignore_db_dirs= NULL; // will be set in ignore_db_dirs_process_additions + if (*argument == 0) + ignore_db_dirs_reset(); + else + { + if (push_ignored_db_dir(argument)) + { + sql_print_error("Can't start server: " + "cannot process --ignore-db-dir=%.*s", + FN_REFLEN, argument); + return 1; + } + } + break; + case OPT_PLUGIN_LOAD: + free_list(opt_plugin_load_list_ptr); + if (argument == disabled_my_option) + break; // Resets plugin list + /* fall through */ + case OPT_PLUGIN_LOAD_ADD: + opt_plugin_load_list_ptr->push_back(new i_string(argument)); + break; + case OPT_PFS_INSTRUMENT: + { +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE +#ifndef EMBEDDED_LIBRARY + /* Parse instrument name and value from argument string */ + const char *name= argument, *ptr, *val; + + /* Trim leading spaces from instrument name */ + while (*name && my_isspace(mysqld_charset, *name)) + name++; + + /* Assignment required */ + if (!(ptr= strchr(name, '='))) + { + my_getopt_error_reporter(WARNING_LEVEL, + "Missing value for performance_schema_instrument " + "'%s'", argument); + return 0; + } + + /* Option value */ + val= ptr + 1; + + /* Trim trailing spaces and slashes from instrument name */ + while (ptr > name && (my_isspace(mysqld_charset, ptr[-1]) || + ptr[-1] == '/')) + ptr--; + if (ptr == name) + { + my_getopt_error_reporter(WARNING_LEVEL, + "Invalid instrument name for " + "performance_schema_instrument '%s'", name); + return 0; + } + name= strmake_root(&startup_root, name, (size_t) (ptr - name)); + + /* Trim leading spaces from option value */ + while (*val && my_isspace(mysqld_charset, *val)) + val++; + + /* Find end of value */ + for (ptr= val; *ptr && !my_isspace(mysqld_charset, *ptr) ; ptr++) + {} + if (ptr == val) + { + my_getopt_error_reporter(WARNING_LEVEL, + "No value for performance_schema_instrument " + "'%s'", name); + return 0; + } + val= strmake_root(&startup_root, val, (size_t) (ptr - val)); + + /* Add instrument name and value to array of configuration options */ + if (add_pfs_instr_to_array(name, val)) + { + my_getopt_error_reporter(WARNING_LEVEL, + "Invalid value for performance_schema_instrument " + "'%s'", name); + return 0; + } +#endif /* EMBEDDED_LIBRARY */ +#endif + break; + } +#ifdef WITH_WSREP + case OPT_WSREP_CAUSAL_READS: + { + if (global_system_variables.wsrep_causal_reads) + { + WSREP_WARN("option --wsrep-causal-reads is deprecated"); + if (!(global_system_variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ)) + { + WSREP_WARN("--wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=%u. " + "WSREP_SYNC_WAIT_BEFORE_READ is on", + global_system_variables.wsrep_sync_wait); + global_system_variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } + } + else + { + if (global_system_variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ) { + WSREP_WARN("--wsrep-sync-wait=%u takes precedence over --wsrep-causal-reads=OFF. " + "WSREP_SYNC_WAIT_BEFORE_READ is on", + global_system_variables.wsrep_sync_wait); + global_system_variables.wsrep_causal_reads = 1; + } + } + break; + } + case OPT_WSREP_SYNC_WAIT: + global_system_variables.wsrep_causal_reads= + MY_TEST(global_system_variables.wsrep_sync_wait & + WSREP_SYNC_WAIT_BEFORE_READ); + break; +#endif /* WITH_WSREP */ + } + return 0; +} + + +/** Handle arguments for multiple key caches. */ + +C_MODE_START + +static void* +mysql_getopt_value(const char *name, uint length, + const struct my_option *option, int *error) +{ + if (error) + *error= 0; + switch (option->id) { + case OPT_KEY_BUFFER_SIZE: + case OPT_KEY_CACHE_BLOCK_SIZE: + case OPT_KEY_CACHE_DIVISION_LIMIT: + case OPT_KEY_CACHE_AGE_THRESHOLD: + case OPT_KEY_CACHE_PARTITIONS: + case OPT_KEY_CACHE_CHANGED_BLOCKS_HASH_SIZE: + { + KEY_CACHE *key_cache; + if (unlikely(!(key_cache= get_or_create_key_cache(name, length)))) + { + if (error) + *error= EXIT_OUT_OF_MEMORY; + return 0; + } + switch (option->id) { + case OPT_KEY_BUFFER_SIZE: + return &key_cache->param_buff_size; + case OPT_KEY_CACHE_BLOCK_SIZE: + return &key_cache->param_block_size; + case OPT_KEY_CACHE_DIVISION_LIMIT: + return &key_cache->param_division_limit; + case OPT_KEY_CACHE_AGE_THRESHOLD: + return &key_cache->param_age_threshold; + case OPT_KEY_CACHE_PARTITIONS: + return (uchar**) &key_cache->param_partitions; + case OPT_KEY_CACHE_CHANGED_BLOCKS_HASH_SIZE: + return (uchar**) &key_cache->changed_blocks_hash_size; + } + } + /* We return in all cases above. Let us silence -Wimplicit-fallthrough */ + DBUG_ASSERT(0); +#ifdef HAVE_REPLICATION + /* fall through */ + case OPT_REPLICATE_DO_DB: + case OPT_REPLICATE_DO_TABLE: + case OPT_REPLICATE_IGNORE_DB: + case OPT_REPLICATE_IGNORE_TABLE: + case OPT_REPLICATE_WILD_DO_TABLE: + case OPT_REPLICATE_WILD_IGNORE_TABLE: + case OPT_REPLICATE_REWRITE_DB: + case OPT_SLAVE_PARALLEL_MODE: + { + /* Store current filter for mysqld_get_one_option() */ + if (!(cur_rpl_filter= get_or_create_rpl_filter(name, length))) + { + if (error) + *error= EXIT_OUT_OF_MEMORY; + } + if (option->id == OPT_SLAVE_PARALLEL_MODE) + { + /* + Ensure parallel_mode variable is shown in --help. The other + variables are not easily printable here. + */ + return (char**) &opt_slave_parallel_mode; + } + return 0; + } +#endif + } + return option->value; +} + +static void option_error_reporter(enum loglevel level, const char *format, ...) +{ + va_list args; + va_start(args, format); + + /* + Don't print warnings for --loose options during bootstrap if + log_warnings <= 2 (2 is default) as warnings during bootstrap + can confuse people when running mysql_install_db and other scripts. + Don't print loose warnings at all if log_warnings <= 1 + */ + if (level == ERROR_LEVEL || + (global_system_variables.log_warnings > + (ulong) (1 + MY_TEST(opt_bootstrap)))) + { + vprint_msg_to_log(level, format, args); + } + va_end(args); +} + +C_MODE_END + +/** + Get server options from the command line, + and perform related server initializations. + @param [in, out] argc_ptr command line options (count) + @param [in, out] argv_ptr command line options (values) + @return 0 on success + + @todo + - FIXME add EXIT_TOO_MANY_ARGUMENTS to "mysys_err.h" and return that code? +*/ +static int get_options(int *argc_ptr, char ***argv_ptr) +{ + int ho_error; + + my_getopt_get_addr= mysql_getopt_value; + my_getopt_error_reporter= option_error_reporter; + + /* prepare all_options array */ + my_init_dynamic_array(PSI_INSTRUMENT_ME, &all_options, sizeof(my_option), + array_elements(my_long_options) + sys_var_elements(), + array_elements(my_long_options)/4, MYF(0)); + add_many_options(&all_options, my_long_options, array_elements(my_long_options)); + sys_var_add_options(&all_options, 0); + add_terminator(&all_options); + + /* Skip unknown options so that they may be processed later by plugins */ + my_getopt_skip_unknown= TRUE; + + if ((ho_error= handle_options(argc_ptr, argv_ptr, + (my_option*) (all_options.buffer), + mysqld_get_one_option))) + return ho_error; + + if (!opt_help) + delete_dynamic(&all_options); + else + opt_abort= 1; + + /* Add back the program name handle_options removes */ + (*argc_ptr)++; + (*argv_ptr)--; + + disable_log_notes= opt_silent_startup; + + /* + Options have been parsed. Now some of them need additional special + handling, like custom value checking, checking of incompatibilites + between options, setting of multiple variables, etc. + Do them here. + */ + + if (global_system_variables.old_mode) + { + global_system_variables.old_behavior|= (OLD_MODE_NO_PROGRESS_INFO | + OLD_MODE_IGNORE_INDEX_ONLY_FOR_JOIN | + OLD_MODE_COMPAT_5_1_CHECKSUM); + sql_print_warning("--old is deprecated and will be removed in a future " + "release. Please use --old-mode instead. "); + } + + if (global_system_variables.net_buffer_length > + global_system_variables.max_allowed_packet) + { + sql_print_warning("net_buffer_length (%lu) is set to be larger " + "than max_allowed_packet (%lu). Please rectify.", + global_system_variables.net_buffer_length, + global_system_variables.max_allowed_packet); + } + + if (log_error_file_ptr != disabled_my_option) + opt_error_log= 1; + else + log_error_file_ptr= const_cast(""); + + opt_init_connect.length=strlen(opt_init_connect.str); + opt_init_slave.length=strlen(opt_init_slave.str); + + if (global_system_variables.low_priority_updates) + thr_upgraded_concurrent_insert_lock= TL_WRITE_LOW_PRIORITY; + + if (ft_boolean_check_syntax_string((uchar*) ft_boolean_syntax, + strlen(ft_boolean_syntax), + system_charset_info)) + { + sql_print_error("Invalid ft-boolean-syntax string: %s", + ft_boolean_syntax); + return 1; + } + + if (opt_disable_networking) + mysqld_port= mysqld_extra_port= 0; + + if (opt_skip_show_db) + opt_specialflag|= SPECIAL_SKIP_SHOW_DB; + + if (myisam_flush) + flush_time= 0; + +#ifdef HAVE_REPLICATION + if (init_slave_skip_errors(opt_slave_skip_errors)) + return 1; + if (init_slave_transaction_retry_errors(opt_slave_transaction_retry_errors)) + return 1; +#endif + + if (global_system_variables.max_join_size == HA_POS_ERROR) + global_system_variables.option_bits|= OPTION_BIG_SELECTS; + else + global_system_variables.option_bits&= ~OPTION_BIG_SELECTS; + + if (opt_support_flashback) + { + /* Force binary logging */ + if (!opt_bin_logname) + opt_bin_logname= (char*) ""; // Use default name + opt_bin_log= opt_bin_log_used= 1; + + /* Force format to row */ + binlog_format_used= 1; + global_system_variables.binlog_format= BINLOG_FORMAT_ROW; + } + + if (!opt_bootstrap && WSREP_PROVIDER_EXISTS && WSREP_ON && + global_system_variables.binlog_format != BINLOG_FORMAT_ROW) + { + + WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. " + "Configured value: '%s'. Please adjust your configuration.", + binlog_format_names[global_system_variables.binlog_format]); + return 1; + } + + // Synchronize @@global.autocommit on --autocommit + const ulonglong turn_bit_on= opt_autocommit ? + OPTION_AUTOCOMMIT : OPTION_NOT_AUTOCOMMIT; + global_system_variables.option_bits= + (global_system_variables.option_bits & + ~(OPTION_NOT_AUTOCOMMIT | OPTION_AUTOCOMMIT)) | turn_bit_on; + + global_system_variables.sql_mode= + expand_sql_mode(global_system_variables.sql_mode); +#if !defined(HAVE_REALPATH) || defined(HAVE_BROKEN_REALPATH) + my_use_symdir=0; + my_disable_symlinks=1; + have_symlink=SHOW_OPTION_NO; +#else + if (!my_use_symdir) + { + my_disable_symlinks=1; + have_symlink=SHOW_OPTION_DISABLED; + } +#endif + if (opt_debugging) + { + /* Allow break with SIGINT, no core or stack trace */ + test_flags|= TEST_SIGINT; + test_flags&= ~TEST_CORE_ON_SIGNAL; + } + /* Set global MyISAM variables from delay_key_write_options */ + fix_delay_key_write(0, 0, OPT_GLOBAL); + +#ifndef EMBEDDED_LIBRARY + if (mysqld_chroot) + set_root(mysqld_chroot); +#else + SYSVAR_AUTOSIZE(thread_handling, SCHEDULER_NO_THREADS); + max_allowed_packet= global_system_variables.max_allowed_packet; + net_buffer_length= global_system_variables.net_buffer_length; +#endif + if (fix_paths()) + return 1; + + /* + Set some global variables from the global_system_variables + In most cases the global variables will not be used + */ + my_disable_locking= myisam_single_user= MY_TEST(opt_external_locking == 0); + my_default_record_cache_size=global_system_variables.read_buff_size; + + /* + Log mysys errors when we don't have a thd or thd->log_all_errors is set + (recovery) to the log. This is mainly useful for debugging strange system + errors. + */ + if (global_system_variables.log_warnings >= 10) + my_global_flags= MY_WME | ME_NOTE; + /* Log all errors not handled by thd->handle_error() to my_message_sql() */ + if (global_system_variables.log_warnings >= 11) + my_global_flags|= ME_ERROR_LOG; + if (my_assert_on_error) + debug_assert_if_crashed_table= 1; + + global_system_variables.long_query_time= (ulonglong) + (global_system_variables.long_query_time_double * 1e6 + 0.1); + global_system_variables.max_statement_time= (ulonglong) + (global_system_variables.max_statement_time_double * 1e6 + 0.1); + + if (opt_short_log_format) + opt_specialflag|= SPECIAL_SHORT_LOG_FORMAT; + + if (init_global_datetime_format(MYSQL_TIMESTAMP_DATE, + &global_date_format) || + init_global_datetime_format(MYSQL_TIMESTAMP_TIME, + &global_time_format) || + init_global_datetime_format(MYSQL_TIMESTAMP_DATETIME, + &global_datetime_format)) + return 1; + +#ifdef EMBEDDED_LIBRARY + one_thread_scheduler(thread_scheduler, &connection_count); + /* + It looks like extra_connection_count should be passed here but + its been using connection_count for the last 10+ years and + no-one was requested a change so lets not suprise anyone. + */ + one_thread_scheduler(extra_thread_scheduler, &connection_count); +#else + + if (thread_handling <= SCHEDULER_ONE_THREAD_PER_CONNECTION) + one_thread_per_connection_scheduler(thread_scheduler, &max_connections, + &connection_count); + else if (thread_handling == SCHEDULER_NO_THREADS) + one_thread_scheduler(thread_scheduler, &connection_count); + else + pool_of_threads_scheduler(thread_scheduler, &max_connections, + &connection_count); + + one_thread_per_connection_scheduler(extra_thread_scheduler, + &extra_max_connections, + &extra_connection_count); +#endif + + opt_readonly= read_only; + + /* Remember if max_user_connections was 0 at startup */ + max_user_connections_checking= global_system_variables.max_user_connections != 0; + +#ifdef HAVE_REPLICATION + { + sys_var *max_relay_log_size_var, *max_binlog_size_var; + /* If max_relay_log_size is 0, then set it to max_binlog_size */ + if (!global_system_variables.max_relay_log_size) + SYSVAR_AUTOSIZE(global_system_variables.max_relay_log_size, + max_binlog_size); + + /* + Fix so that DEFAULT and limit checking works with max_relay_log_size + (Yes, this is a hack, but it's required as the definition of + max_relay_log_size allows it to be set to 0). + */ + max_relay_log_size_var= intern_find_sys_var(STRING_WITH_LEN("max_relay_log_size")); + max_binlog_size_var= intern_find_sys_var(STRING_WITH_LEN("max_binlog_size")); + if (max_binlog_size_var && max_relay_log_size_var) + { + max_relay_log_size_var->option.min_value= + max_binlog_size_var->option.min_value; + max_relay_log_size_var->option.def_value= + max_binlog_size_var->option.def_value; + } + slave_max_statement_time= + double2ulonglong(slave_max_statement_time_double * 1e6); + } +#endif + + /* Ensure that some variables are not set higher than needed */ + if (thread_cache_size > max_connections) + SYSVAR_AUTOSIZE(thread_cache_size, max_connections); + + return 0; +} + + +/* + Create version name for running mysqld version + We automaticly add suffixes -debug, -embedded and -log to the version + name to make the version more descriptive. + (MYSQL_SERVER_SUFFIX is set by the compilation environment) +*/ + +void set_server_version(char *buf, size_t size) +{ + bool is_log= opt_log || global_system_variables.sql_log_slow || opt_bin_log; + bool is_debug= IF_DBUG(!strstr(MYSQL_SERVER_SUFFIX_STR, "-debug"), 0); + const char *is_valgrind= +#ifdef HAVE_VALGRIND + !strstr(MYSQL_SERVER_SUFFIX_STR, "-valgrind") ? "-valgrind" : +#endif + ""; + strxnmov(buf, size - 1, + MYSQL_SERVER_VERSION, + MYSQL_SERVER_SUFFIX_STR, + IF_EMBEDDED("-embedded", ""), + is_valgrind, + is_debug ? "-debug" : "", + is_log ? "-log" : "", + NullS); +} + + +static char *get_relative_path(const char *path) +{ + if (test_if_hard_path(path) && + is_prefix(path,DEFAULT_MYSQL_HOME) && + strcmp(DEFAULT_MYSQL_HOME,FN_ROOTDIR)) + { + path+=(uint) strlen(DEFAULT_MYSQL_HOME); + while (*path == FN_LIBCHAR || *path == FN_LIBCHAR2) + path++; + } + return (char*) path; +} + + +/** + Fix filename and replace extension where 'dir' is relative to + mysql_real_data_home. + @return + 1 if len(path) > FN_REFLEN +*/ + +bool +fn_format_relative_to_data_home(char * to, const char *name, + const char *dir, const char *extension) +{ + char tmp_path[FN_REFLEN]; + if (!test_if_hard_path(dir)) + { + strxnmov(tmp_path,sizeof(tmp_path)-1, mysql_real_data_home, + dir, NullS); + dir=tmp_path; + } + return !fn_format(to, name, dir, extension, + MY_APPEND_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH); +} + + +/** + Test a file path to determine if the path is compatible with the secure file + path restriction. + + @param path null terminated character string + + @return + @retval TRUE The path is secure + @retval FALSE The path isn't secure +*/ + +bool is_secure_file_path(char *path) +{ + char buff1[FN_REFLEN], buff2[FN_REFLEN]; + size_t opt_secure_file_priv_len; + /* + All paths are secure if opt_secure_file_path is 0 + */ + if (!opt_secure_file_priv) + return TRUE; + + opt_secure_file_priv_len= strlen(opt_secure_file_priv); + + if (strlen(path) >= FN_REFLEN) + return FALSE; + + if (my_realpath(buff1, path, 0)) + { + /* + The supplied file path might have been a file and not a directory. + */ + size_t length= dirname_length(path); // Guaranteed to be < FN_REFLEN + memcpy(buff2, path, length); + buff2[length]= '\0'; + if (length == 0 || my_realpath(buff1, buff2, 0)) + return FALSE; + } + convert_dirname(buff2, buff1, NullS); + if (!lower_case_file_system) + { + if (strncmp(opt_secure_file_priv, buff2, opt_secure_file_priv_len)) + return FALSE; + } + else + { + if (files_charset_info->strnncoll(buff2, strlen(buff2), + opt_secure_file_priv, + opt_secure_file_priv_len, + TRUE)) + return FALSE; + } + return TRUE; +} + + +static int fix_paths(void) +{ + char buff[FN_REFLEN],*pos; + DBUG_ENTER("fix_paths"); + + convert_dirname(mysql_home,mysql_home,NullS); + /* Resolve symlinks to allow 'mysql_home' to be a relative symlink */ + my_realpath(mysql_home,mysql_home,MYF(0)); + /* Ensure that mysql_home ends in FN_LIBCHAR */ + pos=strend(mysql_home); + if (pos[-1] != FN_LIBCHAR) + { + pos[0]= FN_LIBCHAR; + pos[1]= 0; + } + convert_dirname(lc_messages_dir, lc_messages_dir, NullS); + convert_dirname(mysql_real_data_home,mysql_real_data_home,NullS); + (void) my_load_path(mysql_home,mysql_home,""); // Resolve current dir + (void) my_load_path(mysql_real_data_home,mysql_real_data_home,mysql_home); + (void) my_load_path(pidfile_name, pidfile_name_ptr, mysql_real_data_home); + + convert_dirname(opt_plugin_dir, opt_plugin_dir_ptr ? opt_plugin_dir_ptr : + get_relative_path(PLUGINDIR), NullS); + (void) my_load_path(opt_plugin_dir, opt_plugin_dir, mysql_home); + opt_plugin_dir_ptr= opt_plugin_dir; + pidfile_name_ptr= pidfile_name; + + my_realpath(mysql_unpacked_real_data_home, mysql_real_data_home, MYF(0)); + mysql_unpacked_real_data_home_len= + strlen(mysql_unpacked_real_data_home); + if (mysql_unpacked_real_data_home[mysql_unpacked_real_data_home_len-1] == FN_LIBCHAR) + --mysql_unpacked_real_data_home_len; + + char *sharedir=get_relative_path(SHAREDIR); + if (test_if_hard_path(sharedir)) + strmake_buf(buff, sharedir); /* purecov: tested */ + else + strxnmov(buff,sizeof(buff)-1,mysql_home,sharedir,NullS); + convert_dirname(buff,buff,NullS); + (void) my_load_path(lc_messages_dir, lc_messages_dir, buff); + + /* If --character-sets-dir isn't given, use shared library dir */ + if (charsets_dir) + { + strmake_buf(mysql_charsets_dir, charsets_dir); + charsets_dir= mysql_charsets_dir; + } + else + { + strxnmov(mysql_charsets_dir, sizeof(mysql_charsets_dir)-1, buff, + CHARSET_DIR, NullS); + SYSVAR_AUTOSIZE(charsets_dir, mysql_charsets_dir); + } + (void) my_load_path(mysql_charsets_dir, mysql_charsets_dir, buff); + convert_dirname(mysql_charsets_dir, mysql_charsets_dir, NullS); + + if (init_tmpdir(&mysql_tmpdir_list, opt_mysql_tmpdir)) + DBUG_RETURN(1); + if (!opt_mysql_tmpdir) + opt_mysql_tmpdir= mysql_tmpdir; +#ifdef HAVE_REPLICATION + if (!slave_load_tmpdir) + SYSVAR_AUTOSIZE(slave_load_tmpdir, mysql_tmpdir); +#endif /* HAVE_REPLICATION */ + /* + Convert the secure-file-priv option to system format, allowing + a quick strcmp to check if read or write is in an allowed dir + */ + if (opt_secure_file_priv) + { + if (*opt_secure_file_priv == 0) + { + my_free(opt_secure_file_priv); + opt_secure_file_priv= 0; + } + else + { + if (strlen(opt_secure_file_priv) >= FN_REFLEN) + opt_secure_file_priv[FN_REFLEN-1]= '\0'; + if (my_realpath(buff, opt_secure_file_priv, 0)) + { + sql_print_warning("Failed to normalize the argument for --secure-file-priv."); + DBUG_RETURN(1); + } + char *secure_file_real_path= (char *)my_malloc(PSI_INSTRUMENT_ME, FN_REFLEN, MYF(MY_FAE)); + convert_dirname(secure_file_real_path, buff, NullS); + my_free(opt_secure_file_priv); + opt_secure_file_priv= secure_file_real_path; + } + } + DBUG_RETURN(0); +} + +/** + Check if file system used for databases is case insensitive. + + @param dir_name Directory to test + + @retval -1 Don't know (Test failed) + @retval 0 File system is case sensitive + @retval 1 File system is case insensitive +*/ + +static int test_if_case_insensitive(const char *dir_name) +{ + int result= 0; + File file; + char buff[FN_REFLEN], buff2[FN_REFLEN]; + MY_STAT stat_info; + DBUG_ENTER("test_if_case_insensitive"); + + fn_format(buff, opt_log_basename, dir_name, ".lower-test", + MY_UNPACK_FILENAME | MY_REPLACE_EXT | MY_REPLACE_DIR); + fn_format(buff2, opt_log_basename, dir_name, ".LOWER-TEST", + MY_UNPACK_FILENAME | MY_REPLACE_EXT | MY_REPLACE_DIR); + mysql_file_delete(key_file_casetest, buff2, MYF(0)); + if ((file= mysql_file_create(key_file_casetest, + buff, 0666, O_RDWR, MYF(0))) < 0) + { + if (!opt_abort) + sql_print_warning("Can't create test file '%s' (Errcode: %M)", buff, my_errno); + DBUG_RETURN(-1); + } + mysql_file_close(file, MYF(0)); + if (mysql_file_stat(key_file_casetest, buff2, &stat_info, MYF(0))) + result= 1; // Can access file + mysql_file_delete(key_file_casetest, buff, MYF(MY_WME)); + DBUG_PRINT("exit", ("result: %d", result)); + DBUG_RETURN(result); +} + + +#ifndef EMBEDDED_LIBRARY + +/** + Create file to store pid number. +*/ +static void create_pid_file() +{ + File file; + if ((file= mysql_file_create(key_file_pid, pidfile_name, 0664, + O_WRONLY | O_TRUNC, MYF(MY_WME))) >= 0) + { + char buff[MAX_BIGINT_WIDTH + 1], *end; + end= int10_to_str((long) getpid(), buff, 10); + *end++= '\n'; + if (!mysql_file_write(file, (uchar*) buff, (uint) (end-buff), + MYF(MY_WME | MY_NABP))) + { + mysql_file_close(file, MYF(0)); + pid_file_created= true; + return; + } + mysql_file_close(file, MYF(0)); + } + sql_perror("Can't start server: can't create PID file"); + exit(1); +} +#endif /* EMBEDDED_LIBRARY */ + + +/** + Remove the process' pid file. + + @param flags file operation flags +*/ + +static void delete_pid_file(myf flags) +{ +#ifndef EMBEDDED_LIBRARY + if (pid_file_created) + { + mysql_file_delete(key_file_pid, pidfile_name, flags); + pid_file_created= false; + } +#endif /* EMBEDDED_LIBRARY */ + return; +} + + +/** Clear most status variables. */ +void refresh_status(THD *thd) +{ + mysql_mutex_lock(&LOCK_status); + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + /* Reset aggregated status counters. */ + reset_pfs_status_stats(); +#endif + + /* Add thread's status variabes to global status */ + add_to_status(&global_status_var, &thd->status_var); + + /* Reset thread's status variables */ + thd->set_status_var_init(); + thd->status_var.global_memory_used= 0; + bzero((uchar*) &thd->org_status_var, sizeof(thd->org_status_var)); + thd->start_bytes_received= 0; + + /* Reset some global variables */ + reset_status_vars(); +#ifdef WITH_WSREP + if (WSREP_ON) + { + Wsrep_server_state::instance().provider().reset_status(); + } +#endif /* WITH_WSREP */ + + /* Reset the counters of all key caches (default and named). */ + process_key_caches(reset_key_cache_counters, 0); + flush_status_time= time((time_t*) 0); + mysql_mutex_unlock(&LOCK_status); + + /* + Set max_used_connections to the number of currently open + connections. This is not perfect, but status data is not exact anyway. + */ + max_used_connections= connection_count + extra_connection_count; +} + +#ifdef HAVE_PSI_INTERFACE +static PSI_file_info all_server_files[]= +{ +#ifdef HAVE_MMAP + { &key_file_map, "map", 0}, +#endif /* HAVE_MMAP */ + { &key_file_binlog, "binlog", 0}, + { &key_file_binlog_cache, "binlog_cache", 0}, + { &key_file_binlog_index, "binlog_index", 0}, + { &key_file_binlog_index_cache, "binlog_index_cache", 0}, + { &key_file_relaylog, "relaylog", 0}, + { &key_file_relaylog_cache, "relaylog_cache", 0}, + { &key_file_relaylog_index, "relaylog_index", 0}, + { &key_file_relaylog_index_cache, "relaylog_index_cache", 0}, + { &key_file_io_cache, "io_cache", 0}, + { &key_file_casetest, "casetest", 0}, + { &key_file_dbopt, "dbopt", 0}, + { &key_file_des_key_file, "des_key_file", 0}, + { &key_file_ERRMSG, "ERRMSG", 0}, + { &key_select_to_file, "select_to_file", 0}, + { &key_file_fileparser, "file_parser", 0}, + { &key_file_frm, "FRM", 0}, + { &key_file_global_ddl_log, "global_ddl_log", 0}, + { &key_file_load, "load", 0}, + { &key_file_loadfile, "LOAD_FILE", 0}, + { &key_file_log_ddl, "log_ddl", 0}, + { &key_file_log_event_data, "log_event_data", 0}, + { &key_file_log_event_info, "log_event_info", 0}, + { &key_file_master_info, "master_info", 0}, + { &key_file_misc, "misc", 0}, + { &key_file_partition_ddl_log, "partition_ddl_log", 0}, + { &key_file_pid, "pid", 0}, + { &key_file_query_log, "query_log", 0}, + { &key_file_relay_log_info, "relay_log_info", 0}, + { &key_file_send_file, "send_file", 0}, + { &key_file_slow_log, "slow_log", 0}, + { &key_file_tclog, "tclog", 0}, + { &key_file_trg, "trigger_name", 0}, + { &key_file_trn, "trigger", 0}, + { &key_file_init, "init", 0}, + { &key_file_binlog_state, "binlog_state", 0} +}; +#endif /* HAVE_PSI_INTERFACE */ + +PSI_stage_info stage_after_apply_event= { 0, "After apply log event", 0}; +PSI_stage_info stage_after_create= { 0, "After create", 0}; +PSI_stage_info stage_after_opening_tables= { 0, "After opening tables", 0}; +PSI_stage_info stage_after_table_lock= { 0, "After table lock", 0}; +PSI_stage_info stage_allocating_local_table= { 0, "Allocating local table", 0}; +PSI_stage_info stage_alter_inplace_prepare= { 0, "preparing for alter table", 0}; +PSI_stage_info stage_alter_inplace= { 0, "altering table", 0}; +PSI_stage_info stage_alter_inplace_commit= { 0, "Committing alter table to storage engine", 0}; +PSI_stage_info stage_apply_event= { 0, "Apply log event", 0}; +PSI_stage_info stage_changing_master= { 0, "Changing master", 0}; +PSI_stage_info stage_checking_master_version= { 0, "Checking master version", 0}; +PSI_stage_info stage_checking_permissions= { 0, "checking permissions", 0}; +PSI_stage_info stage_checking_privileges_on_cached_query= { 0, "checking privileges on cached query", 0}; +PSI_stage_info stage_checking_query_cache_for_query= { 0, "Checking query cache for query", 0}; +PSI_stage_info stage_cleaning_up= { 0, "Reset for next command", 0}; +PSI_stage_info stage_closing_tables= { 0, "closing tables", 0}; +PSI_stage_info stage_connecting_to_master= { 0, "Connecting to master", 0}; +PSI_stage_info stage_converting_heap_to_myisam= { 0, "Converting HEAP to " TMP_ENGINE_NAME, 0}; +PSI_stage_info stage_copying_to_group_table= { 0, "Copying to group table", 0}; +PSI_stage_info stage_copying_to_tmp_table= { 0, "Copying to tmp table", 0}; +PSI_stage_info stage_copy_to_tmp_table= { 0, "copy to tmp table", PSI_FLAG_STAGE_PROGRESS}; +PSI_stage_info stage_creating_delayed_handler= { 0, "Creating delayed handler", 0}; +PSI_stage_info stage_creating_sort_index= { 0, "Creating sort index", 0}; +PSI_stage_info stage_creating_table= { 0, "creating table", 0}; +PSI_stage_info stage_creating_tmp_table= { 0, "Creating tmp table", 0}; +PSI_stage_info stage_deleting_from_main_table= { 0, "Deleting from main table", 0}; +PSI_stage_info stage_deleting_from_reference_tables= { 0, "Deleting from reference tables", 0}; +PSI_stage_info stage_discard_or_import_tablespace= { 0, "Discard_or_import_tablespace", 0}; +PSI_stage_info stage_enabling_keys= { 0, "Enabling keys", 0}; +PSI_stage_info stage_end= { 0, "End of update loop", 0}; +PSI_stage_info stage_executing= { 0, "Executing", 0}; +PSI_stage_info stage_execution_of_init_command= { 0, "Execution of init_command", 0}; +PSI_stage_info stage_explaining= { 0, "Explaining", 0}; +PSI_stage_info stage_finding_key_cache= { 0, "Finding key cache", 0}; +PSI_stage_info stage_finished_reading_one_binlog_switching_to_next_binlog= { 0, "Finished reading one binlog; switching to next binlog", 0}; +PSI_stage_info stage_flushing_relay_log_and_master_info_repository= { 0, "Flushing relay log and master info repository.", 0}; +PSI_stage_info stage_flushing_relay_log_info_file= { 0, "Flushing relay-log info file.", 0}; +PSI_stage_info stage_freeing_items= { 0, "Freeing items", 0}; +PSI_stage_info stage_fulltext_initialization= { 0, "Fulltext initialization", 0}; +PSI_stage_info stage_got_handler_lock= { 0, "Got handler lock", 0}; +PSI_stage_info stage_got_old_table= { 0, "Got old table", 0}; +PSI_stage_info stage_init= { 0, "init", 0}; +PSI_stage_info stage_init_update= { 0, "init for update", 0}; +PSI_stage_info stage_insert= { 0, "Insert", 0}; +PSI_stage_info stage_invalidating_query_cache_entries_table= { 0, "Invalidating query cache entries (table)", 0}; +PSI_stage_info stage_invalidating_query_cache_entries_table_list= { 0, "Invalidating query cache entries (table list)", 0}; +PSI_stage_info stage_killing_slave= { 0, "Killing slave", 0}; +PSI_stage_info stage_logging_slow_query= { 0, "Logging slow query", 0}; +PSI_stage_info stage_making_temp_file_append_before_load_data= { 0, "Making temporary file (append) before replaying LOAD DATA INFILE", 0}; +PSI_stage_info stage_making_temp_file_create_before_load_data= { 0, "Making temporary file (create) before replaying LOAD DATA INFILE", 0}; +PSI_stage_info stage_manage_keys= { 0, "Manage keys", 0}; +PSI_stage_info stage_master_has_sent_all_binlog_to_slave= { 0, "Master has sent all binlog to slave; waiting for more updates", 0}; +PSI_stage_info stage_opening_tables= { 0, "Opening tables", 0}; +PSI_stage_info stage_optimizing= { 0, "Optimizing", 0}; +PSI_stage_info stage_preparing= { 0, "Preparing", 0}; +PSI_stage_info stage_purging_old_relay_logs= { 0, "Purging old relay logs", 0}; +PSI_stage_info stage_query_end= { 0, "Query end", 0}; +PSI_stage_info stage_starting_cleanup= { 0, "Starting cleanup", 0}; +PSI_stage_info stage_rollback= { 0, "Rollback", 0}; +PSI_stage_info stage_rollback_implicit= { 0, "Rollback_implicit", 0}; +PSI_stage_info stage_commit= { 0, "Commit", 0}; +PSI_stage_info stage_commit_implicit= { 0, "Commit implicit", 0}; +PSI_stage_info stage_queueing_master_event_to_the_relay_log= { 0, "Queueing master event to the relay log", 0}; +PSI_stage_info stage_reading_event_from_the_relay_log= { 0, "Reading event from the relay log", 0}; +PSI_stage_info stage_recreating_table= { 0, "Recreating table", 0}; +PSI_stage_info stage_registering_slave_on_master= { 0, "Registering slave on master", 0}; +PSI_stage_info stage_removing_duplicates= { 0, "Removing duplicates", 0}; +PSI_stage_info stage_removing_tmp_table= { 0, "Removing tmp table", 0}; +PSI_stage_info stage_rename= { 0, "Rename", 0}; +PSI_stage_info stage_rename_result_table= { 0, "Rename result table", 0}; +PSI_stage_info stage_requesting_binlog_dump= { 0, "Requesting binlog dump", 0}; +PSI_stage_info stage_reschedule= { 0, "Reschedule", 0}; +PSI_stage_info stage_searching_rows_for_update= { 0, "Searching rows for update", 0}; +PSI_stage_info stage_sending_binlog_event_to_slave= { 0, "Sending binlog event to slave", 0}; +PSI_stage_info stage_sending_cached_result_to_client= { 0, "Sending cached result to client", 0}; +PSI_stage_info stage_sending_data= { 0, "Sending data", 0}; +PSI_stage_info stage_setup= { 0, "setup", 0}; +PSI_stage_info stage_show_explain= { 0, "Show explain", 0}; +PSI_stage_info stage_slave_has_read_all_relay_log= { 0, "Slave has read all relay log; waiting for more updates", 0}; +PSI_stage_info stage_sorting= { 0, "Sorting", 0}; +PSI_stage_info stage_sorting_for_group= { 0, "Sorting for group", 0}; +PSI_stage_info stage_sorting_for_order= { 0, "Sorting for order", 0}; +PSI_stage_info stage_sorting_result= { 0, "Sorting result", 0}; +PSI_stage_info stage_statistics= { 0, "Statistics", 0}; +PSI_stage_info stage_sql_thd_waiting_until_delay= { 0, "Waiting until MASTER_DELAY seconds after master executed event", 0 }; +PSI_stage_info stage_storing_result_in_query_cache= { 0, "Storing result in query cache", 0}; +PSI_stage_info stage_storing_row_into_queue= { 0, "Storing row into queue", 0}; +PSI_stage_info stage_system_lock= { 0, "System lock", 0}; +PSI_stage_info stage_unlocking_tables= { 0, "Unlocking tables", 0}; +PSI_stage_info stage_table_lock= { 0, "table lock", 0}; +PSI_stage_info stage_filling_schema_table= { 0, "Filling schema table", 0}; +PSI_stage_info stage_update= { 0, "Update", 0}; +PSI_stage_info stage_updating= { 0, "Updating", 0}; +PSI_stage_info stage_updating_main_table= { 0, "Updating main table", 0}; +PSI_stage_info stage_updating_reference_tables= { 0, "Updating reference tables", 0}; +PSI_stage_info stage_upgrading_lock= { 0, "Upgrading lock", 0}; +PSI_stage_info stage_user_lock= { 0, "User lock", 0}; +PSI_stage_info stage_user_sleep= { 0, "User sleep", 0}; +PSI_stage_info stage_verifying_table= { 0, "Verifying table", 0}; +PSI_stage_info stage_waiting_for_delay_list= { 0, "Waiting for delay_list", 0}; +PSI_stage_info stage_waiting_for_gtid_to_be_written_to_binary_log= { 0, "Waiting for GTID to be written to binary log", 0}; +PSI_stage_info stage_waiting_for_handler_insert= { 0, "Waiting for handler insert", 0}; +PSI_stage_info stage_waiting_for_handler_lock= { 0, "Waiting for handler lock", 0}; +PSI_stage_info stage_waiting_for_handler_open= { 0, "Waiting for handler open", 0}; +PSI_stage_info stage_waiting_for_insert= { 0, "Waiting for INSERT", 0}; +PSI_stage_info stage_waiting_for_master_to_send_event= { 0, "Waiting for master to send event", 0}; +PSI_stage_info stage_waiting_for_master_update= { 0, "Waiting for master update", 0}; +PSI_stage_info stage_waiting_for_relay_log_space= { 0, "Waiting for the slave SQL thread to free enough relay log space", 0}; +PSI_stage_info stage_waiting_for_semi_sync_ack_from_slave= +{ 0, "Waiting for semi-sync ACK from slave", 0}; +PSI_stage_info stage_waiting_for_semi_sync_slave={ 0, "Waiting for semi-sync slave connection", 0}; +PSI_stage_info stage_reading_semi_sync_ack={ 0, "Reading semi-sync ACK from slave", 0}; +PSI_stage_info stage_waiting_for_slave_mutex_on_exit= { 0, "Waiting for slave mutex on exit", 0}; +PSI_stage_info stage_waiting_for_slave_thread_to_start= { 0, "Waiting for slave thread to start", 0}; +PSI_stage_info stage_waiting_for_table_flush= { 0, "Waiting for table flush", 0}; +PSI_stage_info stage_waiting_for_query_cache_lock= { 0, "Waiting for query cache lock", 0}; +PSI_stage_info stage_waiting_for_the_next_event_in_relay_log= { 0, "Waiting for the next event in relay log", 0}; +PSI_stage_info stage_waiting_for_the_slave_thread_to_advance_position= { 0, "Waiting for the slave SQL thread to advance position", 0}; +PSI_stage_info stage_waiting_to_finalize_termination= { 0, "Waiting to finalize termination", 0}; +PSI_stage_info stage_binlog_waiting_background_tasks= { 0, "Waiting for background binlog tasks", 0}; +PSI_stage_info stage_binlog_write= { 0, "Writing to binlog", 0}; +PSI_stage_info stage_binlog_processing_checkpoint_notify= { 0, "Processing binlog checkpoint notification", 0}; +PSI_stage_info stage_binlog_stopping_background_thread= { 0, "Stopping binlog background thread", 0}; +PSI_stage_info stage_waiting_for_work_from_sql_thread= { 0, "Waiting for work from SQL thread", 0}; +PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for prior transaction to commit", 0}; +PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit", 0}; +PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0}; +PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0}; +PSI_stage_info stage_waiting_for_ftwrl= { 0, "Waiting due to global read lock", 0}; +PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause= { 0, "Waiting for worker threads to pause for global read lock", 0}; +PSI_stage_info stage_waiting_for_rpl_thread_pool= { 0, "Waiting while replication worker thread pool is busy", 0}; +PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0}; +PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0}; +PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process the same GTID", 0}; +PSI_stage_info stage_slave_background_process_request= { 0, "Processing requests", 0}; +PSI_stage_info stage_slave_background_wait_request= { 0, "Waiting for requests", 0}; +PSI_stage_info stage_waiting_for_deadlock_kill= { 0, "Waiting for parallel replication deadlock handling to complete", 0}; +PSI_stage_info stage_starting= { 0, "starting", 0}; +PSI_stage_info stage_waiting_for_flush= { 0, "Waiting for non trans tables to be flushed", 0}; +PSI_stage_info stage_waiting_for_ddl= { 0, "Waiting for DDLs", 0}; + +#ifdef WITH_WSREP +// Aditional Galera thread states +PSI_stage_info stage_waiting_isolation= { 0, "Waiting to execute in isolation", 0}; +PSI_stage_info stage_waiting_certification= {0, "Waiting for certification", 0}; +PSI_stage_info stage_waiting_ddl= {0, "Waiting for TOI DDL", 0}; +PSI_stage_info stage_waiting_flow= {0, "Waiting for flow control", 0}; +#endif /* WITH_WSREP */ + +PSI_memory_key key_memory_DATE_TIME_FORMAT; +PSI_memory_key key_memory_DDL_LOG_MEMORY_ENTRY; +PSI_memory_key key_memory_Event_queue_element_for_exec_names; +PSI_memory_key key_memory_Event_scheduler_scheduler_param; +PSI_memory_key key_memory_Filesort_info_merge; +PSI_memory_key key_memory_Filesort_info_record_pointers; +PSI_memory_key key_memory_Gis_read_stream_err_msg; +PSI_memory_key key_memory_JOIN_CACHE; +PSI_memory_key key_memory_MPVIO_EXT_auth_info; +PSI_memory_key key_memory_MYSQL_BIN_LOG_basename; +PSI_memory_key key_memory_MYSQL_BIN_LOG_index; +PSI_memory_key key_memory_MYSQL_LOCK; +PSI_memory_key key_memory_MYSQL_LOG_name; +PSI_memory_key key_memory_MYSQL_RELAY_LOG_basename; +PSI_memory_key key_memory_MYSQL_RELAY_LOG_index; +PSI_memory_key key_memory_NAMED_ILINK_name; +PSI_memory_key key_memory_PROFILE; +PSI_memory_key key_memory_QUICK_RANGE_SELECT_mrr_buf_desc; +PSI_memory_key key_memory_Query_cache; +PSI_memory_key key_memory_Relay_log_info_group_relay_log_name; +PSI_memory_key key_memory_Row_data_memory_memory; +PSI_memory_key key_memory_Rpl_info_file_buffer; +PSI_memory_key key_memory_SLAVE_INFO; +PSI_memory_key key_memory_ST_SCHEMA_TABLE; +PSI_memory_key key_memory_Sort_param_tmp_buffer; +PSI_memory_key key_memory_Sys_var_charptr_value; +PSI_memory_key key_memory_TABLE; +PSI_memory_key key_memory_TABLE_RULE_ENT; +PSI_memory_key key_memory_TC_LOG_MMAP_pages; +PSI_memory_key key_memory_THD_db; +PSI_memory_key key_memory_THD_handler_tables_hash; +PSI_memory_key key_memory_THD_variables; +PSI_memory_key key_memory_Table_trigger_dispatcher; +PSI_memory_key key_memory_Unique_merge_buffer; +PSI_memory_key key_memory_Unique_sort_buffer; +PSI_memory_key key_memory_User_level_lock; +PSI_memory_key key_memory_XID; +PSI_memory_key key_memory_acl_cache; +PSI_memory_key key_memory_acl_mem; +PSI_memory_key key_memory_acl_memex; +PSI_memory_key key_memory_binlog_cache_mngr; +PSI_memory_key key_memory_binlog_pos; +PSI_memory_key key_memory_binlog_recover_exec; +PSI_memory_key key_memory_binlog_statement_buffer; +PSI_memory_key key_memory_binlog_ver_1_event; +PSI_memory_key key_memory_bison_stack; +PSI_memory_key key_memory_blob_mem_storage; +PSI_memory_key key_memory_dboptions_hash; +PSI_memory_key key_memory_dbnames_cache; +PSI_memory_key key_memory_errmsgs; +PSI_memory_key key_memory_frm_string; +PSI_memory_key key_memory_gdl; +PSI_memory_key key_memory_global_system_variables; +PSI_memory_key key_memory_handler_errmsgs; +PSI_memory_key key_memory_handlerton; +PSI_memory_key key_memory_hash_index_key_buffer; +PSI_memory_key key_memory_host_cache_hostname; +PSI_memory_key key_memory_ignored_db; +PSI_memory_key key_memory_locked_table_list; +PSI_memory_key key_memory_locked_thread_list; +PSI_memory_key key_memory_my_str_malloc; +PSI_memory_key key_memory_native_functions; +PSI_memory_key key_memory_prepared_statement_main_mem_root; +PSI_memory_key key_memory_prepared_statement_map; +PSI_memory_key key_memory_queue_item; +PSI_memory_key key_memory_quick_range_select_root; +PSI_memory_key key_memory_rpl_filter; +PSI_memory_key key_memory_sp_cache; +PSI_memory_key key_memory_sp_head_call_root; +PSI_memory_key key_memory_sp_head_execute_root; +PSI_memory_key key_memory_sp_head_main_root; +PSI_memory_key key_memory_table_mapping_root; +PSI_memory_key key_memory_table_share; +PSI_memory_key key_memory_table_triggers_list; +PSI_memory_key key_memory_thd_main_mem_root; +PSI_memory_key key_memory_thd_transactions; +PSI_memory_key key_memory_user_conn; +PSI_memory_key key_memory_user_var_entry; +PSI_memory_key key_memory_user_var_entry_value; +PSI_memory_key key_memory_String_value; +PSI_memory_key key_memory_WSREP; + +#ifdef HAVE_PSI_INTERFACE + +PSI_stage_info *all_server_stages[]= +{ + & stage_after_apply_event, + & stage_after_create, + & stage_after_opening_tables, + & stage_after_table_lock, + & stage_allocating_local_table, + & stage_alter_inplace, + & stage_alter_inplace_commit, + & stage_alter_inplace_prepare, + & stage_apply_event, + & stage_binlog_write, + & stage_binlog_processing_checkpoint_notify, + & stage_binlog_stopping_background_thread, + & stage_binlog_waiting_background_tasks, + & stage_changing_master, + & stage_checking_master_version, + & stage_checking_permissions, + & stage_checking_privileges_on_cached_query, + & stage_checking_query_cache_for_query, + & stage_cleaning_up, + & stage_closing_tables, + & stage_commit, + & stage_commit_implicit, + & stage_connecting_to_master, + & stage_converting_heap_to_myisam, + & stage_copy_to_tmp_table, + & stage_copying_to_group_table, + & stage_copying_to_tmp_table, + & stage_creating_delayed_handler, + & stage_creating_sort_index, + & stage_creating_table, + & stage_creating_tmp_table, + & stage_deleting_from_main_table, + & stage_deleting_from_reference_tables, + & stage_discard_or_import_tablespace, + & stage_enabling_keys, + & stage_end, + & stage_executing, + & stage_execution_of_init_command, + & stage_explaining, + & stage_finding_key_cache, + & stage_finished_reading_one_binlog_switching_to_next_binlog, + & stage_flushing_relay_log_and_master_info_repository, + & stage_flushing_relay_log_info_file, + & stage_freeing_items, + & stage_fulltext_initialization, + & stage_got_handler_lock, + & stage_got_old_table, + & stage_init, + & stage_init_update, + & stage_insert, + & stage_invalidating_query_cache_entries_table, + & stage_invalidating_query_cache_entries_table_list, + & stage_killing_slave, + & stage_logging_slow_query, + & stage_making_temp_file_append_before_load_data, + & stage_making_temp_file_create_before_load_data, + & stage_manage_keys, + & stage_master_has_sent_all_binlog_to_slave, + & stage_opening_tables, + & stage_optimizing, + & stage_preparing, + & stage_purging_old_relay_logs, + & stage_starting_cleanup, + & stage_query_end, + & stage_queueing_master_event_to_the_relay_log, + & stage_reading_event_from_the_relay_log, + & stage_recreating_table, + & stage_registering_slave_on_master, + & stage_removing_duplicates, + & stage_removing_tmp_table, + & stage_rename, + & stage_rename_result_table, + & stage_requesting_binlog_dump, + & stage_reschedule, + & stage_rollback, + & stage_rollback_implicit, + & stage_searching_rows_for_update, + & stage_sending_binlog_event_to_slave, + & stage_sending_cached_result_to_client, + & stage_sending_data, + & stage_setup, + & stage_show_explain, + & stage_slave_has_read_all_relay_log, + & stage_sorting, + & stage_sorting_for_group, + & stage_sorting_for_order, + & stage_sorting_result, + & stage_sql_thd_waiting_until_delay, + & stage_statistics, + & stage_storing_result_in_query_cache, + & stage_storing_row_into_queue, + & stage_system_lock, + & stage_unlocking_tables, + & stage_table_lock, + & stage_filling_schema_table, + & stage_update, + & stage_updating, + & stage_updating_main_table, + & stage_updating_reference_tables, + & stage_upgrading_lock, + & stage_user_lock, + & stage_user_sleep, + & stage_verifying_table, + & stage_waiting_for_delay_list, + & stage_waiting_for_gtid_to_be_written_to_binary_log, + & stage_waiting_for_handler_insert, + & stage_waiting_for_handler_lock, + & stage_waiting_for_handler_open, + & stage_waiting_for_insert, + & stage_waiting_for_master_to_send_event, + & stage_waiting_for_master_update, + & stage_waiting_for_prior_transaction_to_commit, + & stage_waiting_for_prior_transaction_to_start_commit, + & stage_waiting_for_query_cache_lock, + & stage_waiting_for_relay_log_space, + & stage_waiting_for_room_in_worker_thread, + & stage_waiting_for_slave_mutex_on_exit, + & stage_waiting_for_slave_thread_to_start, + & stage_waiting_for_table_flush, + & stage_waiting_for_the_next_event_in_relay_log, + & stage_waiting_for_the_slave_thread_to_advance_position, + & stage_waiting_for_work_from_sql_thread, + & stage_waiting_to_finalize_termination, + & stage_master_gtid_wait_primary, + & stage_master_gtid_wait, + & stage_gtid_wait_other_connection, + & stage_slave_background_process_request, + & stage_slave_background_wait_request, + & stage_waiting_for_semi_sync_ack_from_slave, + & stage_waiting_for_semi_sync_slave, + & stage_reading_semi_sync_ack, + & stage_waiting_for_deadlock_kill, + & stage_starting +#ifdef WITH_WSREP + , + & stage_waiting_isolation, + & stage_waiting_certification, + & stage_waiting_ddl, + & stage_waiting_flow +#endif /* WITH_WSREP */ +}; + +PSI_socket_key key_socket_tcpip, key_socket_unix, key_socket_client_connection; + +static PSI_socket_info all_server_sockets[]= +{ + { &key_socket_tcpip, "server_tcpip_socket", PSI_FLAG_GLOBAL}, + { &key_socket_unix, "server_unix_socket", PSI_FLAG_GLOBAL}, + { &key_socket_client_connection, "client_connection", 0} +}; + +static PSI_memory_info all_server_memory[]= +{ + { &key_memory_locked_table_list, "Locked_tables_list::m_locked_tables_root", 0}, + { &key_memory_locked_thread_list, "display_table_locks", PSI_FLAG_THREAD}, + { &key_memory_thd_transactions, "THD::transactions::mem_root", PSI_FLAG_THREAD}, +// { &key_memory_delegate, "Delegate::memroot", 0}, + { &key_memory_acl_mem, "sql_acl_mem", PSI_FLAG_GLOBAL}, + { &key_memory_acl_memex, "sql_acl_memex", PSI_FLAG_GLOBAL}, + { &key_memory_acl_cache, "acl_cache", PSI_FLAG_GLOBAL}, + { &key_memory_thd_main_mem_root, "thd::main_mem_root", PSI_FLAG_THREAD}, +// { &key_memory_help, "help", 0}, +// { &key_memory_new_frm_mem, "new_frm_mem", 0}, + { &key_memory_table_share, "TABLE_SHARE::mem_root", PSI_FLAG_GLOBAL}, /* table definition cache */ + { &key_memory_gdl, "gdl", 0}, + { &key_memory_table_triggers_list, "Table_triggers_list", 0}, +// { &key_memory_servers, "servers", 0}, + { &key_memory_prepared_statement_map, "Prepared_statement_map", PSI_FLAG_THREAD}, + { &key_memory_prepared_statement_main_mem_root, "Prepared_statement::main_mem_root", PSI_FLAG_THREAD}, +// { &key_memory_protocol_rset_root, "Protocol_local::m_rset_root", PSI_FLAG_THREAD}, +// { &key_memory_warning_info_warn_root, "Warning_info::m_warn_root", PSI_FLAG_THREAD}, + { &key_memory_sp_cache, "THD::sp_cache", 0}, + { &key_memory_sp_head_main_root, "sp_head::main_mem_root", 0}, + { &key_memory_sp_head_execute_root, "sp_head::execute_mem_root", PSI_FLAG_THREAD}, + { &key_memory_sp_head_call_root, "sp_head::call_mem_root", PSI_FLAG_THREAD}, + { &key_memory_table_mapping_root, "table_mapping::m_mem_root", 0}, + { &key_memory_quick_range_select_root, "QUICK_RANGE_SELECT::alloc", PSI_FLAG_THREAD}, +// { &key_memory_quick_index_merge_root, "QUICK_INDEX_MERGE_SELECT::alloc", PSI_FLAG_THREAD}, +// { &key_memory_quick_ror_intersect_select_root, "QUICK_ROR_INTERSECT_SELECT::alloc", PSI_FLAG_THREAD}, +// { &key_memory_quick_ror_union_select_root, "QUICK_ROR_UNION_SELECT::alloc", PSI_FLAG_THREAD}, +// { &key_memory_quick_group_min_max_select_root, "QUICK_GROUP_MIN_MAX_SELECT::alloc", PSI_FLAG_THREAD}, +// { &key_memory_test_quick_select_exec, "test_quick_select", PSI_FLAG_THREAD}, +// { &key_memory_prune_partitions_exec, "prune_partitions::exec", 0}, + { &key_memory_binlog_recover_exec, "MYSQL_BIN_LOG::recover", 0}, + { &key_memory_blob_mem_storage, "Blob_mem_storage::storage", 0}, + { &key_memory_NAMED_ILINK_name, "NAMED_ILINK::name", 0}, + { &key_memory_String_value, "String::value", 0}, + { &key_memory_Sys_var_charptr_value, "Sys_var_charptr::value", 0}, + { &key_memory_queue_item, "Queue::queue_item", 0}, + { &key_memory_THD_db, "THD::db", 0}, + { &key_memory_user_var_entry, "user_var_entry", 0}, +// { &key_memory_Slave_job_group_group_relay_log_name, "Slave_job_group::group_relay_log_name", 0}, + { &key_memory_Relay_log_info_group_relay_log_name, "Relay_log_info::group_relay_log_name", 0}, + { &key_memory_binlog_cache_mngr, "binlog_cache_mngr", 0}, + { &key_memory_Row_data_memory_memory, "Row_data_memory::memory", 0}, +// { &key_memory_Gtid_set_to_string, "Gtid_set::to_string", 0}, +// { &key_memory_Gtid_state_to_string, "Gtid_state::to_string", 0}, +// { &key_memory_Owned_gtids_to_string, "Owned_gtids::to_string", 0}, +// { &key_memory_log_event, "Log_event", 0}, +// { &key_memory_Incident_log_event_message, "Incident_log_event::message", 0}, +// { &key_memory_Rows_query_log_event_rows_query, "Rows_query_log_event::rows_query", 0}, + { &key_memory_Sort_param_tmp_buffer, "Sort_param::tmp_buffer", 0}, + { &key_memory_Filesort_info_merge, "Filesort_info::merge", 0}, + { &key_memory_Filesort_info_record_pointers, "Filesort_info::record_pointers", 0}, +// { &key_memory_Filesort_buffer_sort_keys, "Filesort_buffer::sort_keys", 0}, + { &key_memory_handler_errmsgs, "handler::errmsgs", 0}, + { &key_memory_handlerton, "handlerton", 0}, + { &key_memory_XID, "XID", 0}, + { &key_memory_host_cache_hostname, "host_cache::hostname", 0}, + { &key_memory_user_var_entry_value, "user_var_entry::value", 0}, + { &key_memory_User_level_lock, "User_level_lock", 0}, + { &key_memory_MYSQL_LOG_name, "MYSQL_LOG::name", 0}, + { &key_memory_TC_LOG_MMAP_pages, "TC_LOG_MMAP::pages", 0}, +// { &key_memory_my_bitmap_map, "my_bitmap_map", 0}, + { &key_memory_QUICK_RANGE_SELECT_mrr_buf_desc, "QUICK_RANGE_SELECT::mrr_buf_desc", 0}, + { &key_memory_Event_queue_element_for_exec_names, "Event_queue_element_for_exec::names", 0}, + { &key_memory_my_str_malloc, "my_str_malloc", 0}, + { &key_memory_MYSQL_BIN_LOG_basename, "MYSQL_BIN_LOG::basename", 0}, + { &key_memory_MYSQL_BIN_LOG_index, "MYSQL_BIN_LOG::index", 0}, + { &key_memory_MYSQL_RELAY_LOG_basename, "MYSQL_RELAY_LOG::basename", 0}, + { &key_memory_MYSQL_RELAY_LOG_index, "MYSQL_RELAY_LOG::index", 0}, + { &key_memory_rpl_filter, "rpl_filter memory", 0}, + { &key_memory_errmsgs, "errmsgs", 0}, + { &key_memory_Gis_read_stream_err_msg, "Gis_read_stream::err_msg", 0}, +// { &key_memory_Geometry_objects_data, "Geometry::ptr_and_wkb_data", 0}, + { &key_memory_MYSQL_LOCK, "MYSQL_LOCK", 0}, +// { &key_memory_NET_buff, "NET::buff", 0}, +// { &key_memory_NET_compress_packet, "NET::compress_packet", 0}, + { &key_memory_Event_scheduler_scheduler_param, "Event_scheduler::scheduler_param", 0}, +// { &key_memory_Gtid_set_Interval_chunk, "Gtid_set::Interval_chunk", 0}, +// { &key_memory_Owned_gtids_sidno_to_hash, "Owned_gtids::sidno_to_hash", 0}, +// { &key_memory_Sid_map_Node, "Sid_map::Node", 0}, +// { &key_memory_Gtid_state_group_commit_sidno, "Gtid_state::group_commit_sidno_locks", 0}, +// { &key_memory_Mutex_cond_array_Mutex_cond, "Mutex_cond_array::Mutex_cond", 0}, + { &key_memory_TABLE_RULE_ENT, "TABLE_RULE_ENT", 0}, +// { &key_memory_Rpl_info_table, "Rpl_info_table", 0}, + { &key_memory_Rpl_info_file_buffer, "Rpl_info_file::buffer", 0}, +// { &key_memory_db_worker_hash_entry, "db_worker_hash_entry", 0}, +// { &key_memory_rpl_slave_check_temp_dir, "rpl_slave::check_temp_dir", 0}, +// { &key_memory_rpl_slave_command_buffer, "rpl_slave::command_buffer", 0}, + { &key_memory_binlog_ver_1_event, "binlog_ver_1_event", 0}, + { &key_memory_SLAVE_INFO, "SLAVE_INFO", 0}, + { &key_memory_binlog_pos, "binlog_pos", 0}, +// { &key_memory_HASH_ROW_ENTRY, "HASH_ROW_ENTRY", 0}, + { &key_memory_binlog_statement_buffer, "binlog_statement_buffer", 0}, +// { &key_memory_partition_syntax_buffer, "partition_syntax_buffer", 0}, +// { &key_memory_READ_INFO, "READ_INFO", 0}, + { &key_memory_JOIN_CACHE, "JOIN_CACHE", 0}, +// { &key_memory_TABLE_sort_io_cache, "TABLE::sort_io_cache", 0}, +// { &key_memory_frm, "frm", 0}, + { &key_memory_Unique_sort_buffer, "Unique::sort_buffer", 0}, + { &key_memory_Unique_merge_buffer, "Unique::merge_buffer", 0}, + { &key_memory_TABLE, "TABLE", PSI_FLAG_GLOBAL}, /* Table cache */ +// { &key_memory_frm_extra_segment_buff, "frm::extra_segment_buff", 0}, +// { &key_memory_frm_form_pos, "frm::form_pos", 0}, + { &key_memory_frm_string, "frm::string", 0}, +// { &key_memory_LOG_name, "LOG_name", 0}, + { &key_memory_DATE_TIME_FORMAT, "DATE_TIME_FORMAT", 0}, + { &key_memory_DDL_LOG_MEMORY_ENTRY, "DDL_LOG_MEMORY_ENTRY", 0}, + { &key_memory_ST_SCHEMA_TABLE, "ST_SCHEMA_TABLE", 0}, + { &key_memory_ignored_db, "ignored_db", 0}, + { &key_memory_PROFILE, "PROFILE", 0}, + { &key_memory_global_system_variables, "global_system_variables", 0}, + { &key_memory_THD_variables, "THD::variables", 0}, +// { &key_memory_Security_context, "Security_context", 0}, +// { &key_memory_shared_memory_name, "Shared_memory_name", 0}, + { &key_memory_bison_stack, "bison_stack", 0}, + { &key_memory_THD_handler_tables_hash, "THD::handler_tables_hash", 0}, + { &key_memory_hash_index_key_buffer, "hash_index_key_buffer", 0}, + { &key_memory_dboptions_hash, "dboptions_hash", 0}, + { &key_memory_dbnames_cache, "dbnames_cache", 0}, + { &key_memory_user_conn, "user_conn", 0}, +// { &key_memory_LOG_POS_COORD, "LOG_POS_COORD", 0}, +// { &key_memory_XID_STATE, "XID_STATE", 0}, + { &key_memory_MPVIO_EXT_auth_info, "MPVIO_EXT::auth_info", 0}, +// { &key_memory_opt_bin_logname, "opt_bin_logname", 0}, + { &key_memory_Query_cache, "Query_cache", PSI_FLAG_GLOBAL}, +// { &key_memory_READ_RECORD_cache, "READ_RECORD_cache", 0}, +// { &key_memory_Quick_ranges, "Quick_ranges", 0}, +// { &key_memory_File_query_log_name, "File_query_log::name", 0}, + { &key_memory_Table_trigger_dispatcher, "Table_trigger_dispatcher::m_mem_root", 0}, +// { &key_memory_thd_timer, "thd_timer", 0}, +// { &key_memory_THD_Session_tracker, "THD::Session_tracker", 0}, +// { &key_memory_THD_Session_sysvar_resource_manager, "THD::Session_sysvar_resource_manager", 0}, +// { &key_memory_show_slave_status_io_gtid_set, "show_slave_status_io_gtid_set", 0}, +// { &key_memory_write_set_extraction, "write_set_extraction", 0}, +// { &key_memory_get_all_tables, "get_all_tables", 0}, +// { &key_memory_fill_schema_schemata, "fill_schema_schemata", 0}, + { &key_memory_native_functions, "native_functions", PSI_FLAG_GLOBAL}, + { &key_memory_WSREP, "wsrep", 0 } +}; + +/** + Initialise all the performance schema instrumentation points + used by the server. +*/ +void init_server_psi_keys(void) +{ + const char* category= "sql"; + int count; + + count= array_elements(all_server_mutexes); + mysql_mutex_register(category, all_server_mutexes, count); + + count= array_elements(all_server_rwlocks); + mysql_rwlock_register(category, all_server_rwlocks, count); + + count= array_elements(all_server_conds); + mysql_cond_register(category, all_server_conds, count); + + count= array_elements(all_server_threads); + mysql_thread_register(category, all_server_threads, count); + + count= array_elements(all_server_files); + mysql_file_register(category, all_server_files, count); + + count= array_elements(all_server_stages); + mysql_stage_register(category, all_server_stages, count); + + count= array_elements(all_server_sockets); + mysql_socket_register(category, all_server_sockets, count); + + count= array_elements(all_server_memory); + mysql_memory_register(category, all_server_memory, count); + +#ifdef HAVE_PSI_STATEMENT_INTERFACE + init_sql_statement_info(); + count= array_elements(sql_statement_info); + mysql_statement_register(category, sql_statement_info, count); + + init_sp_psi_keys(); + + category= "com"; + init_com_statement_info(); + + /* + Register [0 .. COM_QUERY - 1] as "statement/com/..." + */ + count= (int) COM_QUERY; + mysql_statement_register(category, com_statement_info, count); + + /* + Register [COM_QUERY + 1 .. COM_END] as "statement/com/..." + */ + count= (int) COM_END - (int) COM_QUERY; + mysql_statement_register(category, & com_statement_info[(int) COM_QUERY + 1], count); + + category= "abstract"; + /* + Register [COM_QUERY] as "statement/abstract/com_query" + */ + mysql_statement_register(category, & com_statement_info[(int) COM_QUERY], 1); + + /* + When a new packet is received, + it is instrumented as "statement/abstract/new_packet". + Based on the packet type found, it later mutates to the + proper narrow type, for example + "statement/abstract/query" or "statement/com/ping". + In cases of "statement/abstract/query", SQL queries are given to + the parser, which mutates the statement type to an even more + narrow classification, for example "statement/sql/select". + */ + stmt_info_new_packet.m_key= 0; + stmt_info_new_packet.m_name= "new_packet"; + stmt_info_new_packet.m_flags= PSI_FLAG_MUTABLE; + mysql_statement_register(category, &stmt_info_new_packet, 1); + + /* + Statements processed from the relay log are initially instrumented as + "statement/abstract/relay_log". The parser will mutate the statement type to + a more specific classification, for example "statement/sql/insert". + */ + stmt_info_rpl.m_key= 0; + stmt_info_rpl.m_name= "relay_log"; + stmt_info_rpl.m_flags= PSI_FLAG_MUTABLE; + mysql_statement_register(category, &stmt_info_rpl, 1); +#endif +} + +#endif /* HAVE_PSI_INTERFACE */ + + +/* + Connection ID allocation. + + We need to maintain thread_ids in the 32bit range, + because this is how it is passed to the client in the protocol. + + The idea is to maintain a id range, initially set to + (0,UINT32_MAX). Whenever new id is needed, we increment the + lower limit and return its new value. + + On "overflow", if id can not be generated anymore(i.e lower == upper -1), + we recalculate the range boundaries. + To do that, we first collect thread ids that are in use, by traversing + THD list, and find largest region within (0,UINT32_MAX), that is still free. + +*/ + +static my_thread_id thread_id_max= UINT_MAX32; + +#include +#include + +/* + Find largest unused thread_id range. + + i.e for every number N within the returned range, + there is no existing connection with thread_id equal to N. + + The range is exclusive, lower bound is always >=0 and + upper bound <=MAX_UINT32. + + @param[out] low - lower bound for the range + @param[out] high - upper bound for the range +*/ + +static my_bool recalculate_callback(THD *thd, std::vector *ids) +{ + ids->push_back(thd->thread_id); + return 0; +} + + +static void recalculate_thread_id_range(my_thread_id *low, my_thread_id *high) +{ + std::vector ids; + + // Add sentinels + ids.push_back(0); + ids.push_back(UINT_MAX32); + server_threads.iterate(recalculate_callback, &ids); + + std::sort(ids.begin(), ids.end()); + my_thread_id max_gap= 0; + for (size_t i= 0; i < ids.size() - 1; i++) + { + my_thread_id gap= ids[i+1] - ids[i]; + if (gap > max_gap) + { + *low= ids[i]; + *high= ids[i+1]; + max_gap= gap; + } + } + + if (max_gap < 2) + { + /* Can't find free id. This is not really possible, + we'd need 2^32 connections for this to happen.*/ + sql_print_error("Cannot find free connection id."); + abort(); + } +} + + +my_thread_id next_thread_id(void) +{ + my_thread_id retval; + DBUG_EXECUTE_IF("thread_id_overflow", global_thread_id= thread_id_max-2;); + + mysql_mutex_lock(&LOCK_thread_id); + + if (unlikely(global_thread_id == thread_id_max - 1)) + { + recalculate_thread_id_range(&global_thread_id, &thread_id_max); + } + + retval= ++global_thread_id; + + mysql_mutex_unlock(&LOCK_thread_id); + return retval; +} diff --git a/sql/mysqld.h b/sql/mysqld.h new file mode 100644 index 00000000..2139b9b6 --- /dev/null +++ b/sql/mysqld.h @@ -0,0 +1,993 @@ +/* Copyright (c) 2006, 2016, Oracle and/or its affiliates. + Copyright (c) 2010, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef MYSQLD_INCLUDED +#define MYSQLD_INCLUDED + +#include "sql_basic_types.h" /* query_id_t */ +#include "sql_mode.h" /* Sql_mode_dependency */ +#include "sql_plugin.h" +#include "sql_bitmap.h" /* Bitmap */ +#include "my_decimal.h" /* my_decimal */ +#include "mysql_com.h" /* SERVER_VERSION_LENGTH */ +#include "my_counter.h" +#include "mysql/psi/mysql_file.h" /* MYSQL_FILE */ +#include "mysql/psi/mysql_socket.h" /* MYSQL_SOCKET */ +#include "sql_list.h" /* I_List */ +#include "sql_cmd.h" +#include +#include "my_pthread.h" +#include "my_rdtsc.h" + +class THD; +class CONNECT; +struct handlerton; +class Time_zone; + +struct scheduler_functions; + +typedef struct st_mysql_show_var SHOW_VAR; + +/* Bits from testflag */ +#define TEST_PRINT_CACHED_TABLES 1U +#define TEST_NO_KEY_GROUP 2U +#define TEST_MIT_THREAD 4U +#define TEST_BLOCKING 8U +#define TEST_KEEP_TMP_TABLES 16U +#define TEST_READCHECK 64U /**< Force use of readcheck */ +#define TEST_NO_EXTRA 128U +#define TEST_CORE_ON_SIGNAL 256U /**< Give core if signal */ +#define TEST_SIGINT 1024U /**< Allow sigint on threads */ +#define TEST_SYNCHRONIZATION 2048U /**< get server to do sleep in + some places */ + +/* Keep things compatible */ +#define OPT_DEFAULT SHOW_OPT_DEFAULT +#define OPT_SESSION SHOW_OPT_SESSION +#define OPT_GLOBAL SHOW_OPT_GLOBAL + +extern MYSQL_PLUGIN_IMPORT MY_TIMER_INFO sys_timer_info; + +/* + Values for --slave-parallel-mode + Must match order in slave_parallel_mode_typelib in sys_vars.cc. +*/ +enum enum_slave_parallel_mode { + SLAVE_PARALLEL_NONE, + SLAVE_PARALLEL_MINIMAL, + SLAVE_PARALLEL_CONSERVATIVE, + SLAVE_PARALLEL_OPTIMISTIC, + SLAVE_PARALLEL_AGGRESSIVE +}; + +/* Function prototypes */ +void kill_mysql(THD *thd); +void close_connection(THD *thd, uint sql_errno= 0); +void handle_connection_in_main_thread(CONNECT *thd); +void create_thread_to_handle_connection(CONNECT *connect); +void unlink_thd(THD *thd); +void refresh_status(THD *thd); +bool is_secure_file_path(char *path); +extern void init_net_server_extension(THD *thd); +extern void handle_accepted_socket(MYSQL_SOCKET new_sock, MYSQL_SOCKET sock); +extern void create_new_thread(CONNECT *connect); + +extern void ssl_acceptor_stats_update(int sslaccept_ret); +extern int reinit_ssl(); + +extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *files_charset_info ; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *national_charset_info; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *table_alias_charset; + +/** + Character set of the buildin error messages loaded from errmsg.sys. +*/ +extern CHARSET_INFO *error_message_charset_info; + +extern CHARSET_INFO *character_set_filesystem; + +void temp_pool_clear_bit(uint bit); +uint temp_pool_set_next(); + +extern bool opt_large_files; +extern bool opt_update_log, opt_bin_log, opt_error_log, opt_bin_log_compress; +extern uint opt_bin_log_compress_min_len; +extern my_bool opt_log, opt_bootstrap; +extern my_bool opt_backup_history_log; +extern my_bool opt_backup_progress_log; +extern my_bool opt_support_flashback; +extern ulonglong log_output_options; +extern ulong log_backup_output_options; +extern bool opt_disable_networking, opt_skip_show_db; +extern bool opt_skip_name_resolve; +extern bool opt_ignore_builtin_innodb; +extern my_bool opt_character_set_client_handshake; +extern my_bool debug_assert_on_not_freed_memory; +extern MYSQL_PLUGIN_IMPORT bool volatile abort_loop; +extern my_bool opt_safe_user_create; +extern my_bool opt_safe_show_db, opt_local_infile, opt_myisam_use_mmap; +extern my_bool opt_slave_compressed_protocol, use_temp_pool; +extern ulong slave_exec_mode_options, slave_ddl_exec_mode_options; +extern ulong slave_retried_transactions; +extern ulong transactions_multi_engine; +extern ulong rpl_transactions_multi_engine; +extern ulong transactions_gtid_foreign_engine; +extern ulong slave_run_triggers_for_rbr; +extern ulonglong slave_type_conversions_options; +extern my_bool read_only, opt_readonly; +extern MYSQL_PLUGIN_IMPORT my_bool lower_case_file_system; +extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; +extern my_bool opt_secure_auth; +extern my_bool opt_require_secure_transport; +extern const char *current_dbug_option; +extern char* opt_secure_file_priv; +extern char* opt_secure_backup_file_priv; +extern size_t opt_secure_backup_file_priv_len; +extern my_bool sp_automatic_privileges, opt_noacl; +extern ulong use_stat_tables; +extern my_bool opt_old_style_user_limits, trust_function_creators; +extern uint opt_crash_binlog_innodb; +extern const char *shared_memory_base_name; +extern MYSQL_PLUGIN_IMPORT char *mysqld_unix_port; +extern my_bool opt_enable_shared_memory; +extern ulong opt_replicate_events_marked_for_skip; +extern char *default_tz_name; +extern Time_zone *default_tz; +extern char *my_bind_addr_str; +extern char *default_storage_engine, *default_tmp_storage_engine; +extern char *enforced_storage_engine; +extern char *gtid_pos_auto_engines; +extern plugin_ref *opt_gtid_pos_auto_plugins; +extern bool opt_endinfo, using_udf_functions; +extern my_bool locked_in_memory; +extern bool opt_using_transactions; +extern ulong current_pid; +extern double expire_logs_days; +extern ulong binlog_expire_logs_seconds; +extern my_bool relay_log_recovery; +extern uint sync_binlog_period, sync_relaylog_period, + sync_relayloginfo_period, sync_masterinfo_period; +extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size; +extern ulong tc_log_page_waits; +extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb; +extern my_bool relay_log_recovery; +extern uint select_errors,ha_open_options; +extern ulonglong test_flags; +extern uint protocol_version, dropping_tables; +extern MYSQL_PLUGIN_IMPORT uint mysqld_port; +extern ulong delay_key_write_options; +extern char *opt_logname, *opt_slow_logname, *opt_bin_logname, + *opt_relay_logname; +extern char *opt_binlog_index_name; +extern char *opt_backup_history_logname, *opt_backup_progress_logname, + *opt_backup_settings_name; +extern const char *log_output_str; +extern const char *log_backup_output_str; + +/* System Versioning begin */ +enum vers_system_time_t +{ + SYSTEM_TIME_UNSPECIFIED = 0, + SYSTEM_TIME_AS_OF, + SYSTEM_TIME_FROM_TO, + SYSTEM_TIME_BETWEEN, + SYSTEM_TIME_BEFORE, // used for DELETE HISTORY ... BEFORE + SYSTEM_TIME_HISTORY, // used for DELETE HISTORY + SYSTEM_TIME_ALL +}; + +struct vers_asof_timestamp_t +{ + ulong type; + my_time_t unix_time; + ulong second_part; +}; + +enum vers_alter_history_enum +{ + VERS_ALTER_HISTORY_ERROR= 0, + VERS_ALTER_HISTORY_KEEP +}; +/* System Versioning end */ + +extern char *mysql_home_ptr, *pidfile_name_ptr; +extern MYSQL_PLUGIN_IMPORT char glob_hostname[FN_REFLEN]; +extern char mysql_home[FN_REFLEN]; +extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file; +extern char default_logfile_name[FN_REFLEN]; +extern char log_error_file[FN_REFLEN], *opt_tc_log_file, *opt_ddl_recovery_file; +extern const double log_10[309]; +extern ulonglong keybuff_size; +extern ulonglong thd_startup_options; +extern my_thread_id global_thread_id; +extern ulong binlog_cache_use, binlog_cache_disk_use; +extern ulong binlog_stmt_cache_use, binlog_stmt_cache_disk_use; +extern ulong aborted_threads, aborted_connects, aborted_connects_preauth; +extern ulong delayed_insert_timeout; +extern ulong delayed_insert_limit, delayed_queue_size; +extern ulong delayed_insert_threads, delayed_insert_writes; +extern ulong delayed_rows_in_use,delayed_insert_errors; +extern Atomic_counter slave_open_temp_tables; +extern ulonglong query_cache_size; +extern ulong query_cache_limit; +extern ulong query_cache_min_res_unit; +extern ulong slow_launch_threads, slow_launch_time; +extern MYSQL_PLUGIN_IMPORT ulong max_connections; +extern uint max_digest_length; +extern ulong max_connect_errors, connect_timeout; +extern uint max_password_errors; +extern my_bool slave_allow_batching; +extern my_bool allow_slave_start; +extern LEX_CSTRING reason_slave_blocked; +extern ulong slave_trans_retries; +extern ulong slave_trans_retry_interval; +extern uint slave_net_timeout; +extern int max_user_connections; +extern ulong what_to_log,flush_time; +extern uint max_prepared_stmt_count, prepared_stmt_count; +extern MYSQL_PLUGIN_IMPORT ulong open_files_limit; +extern ulonglong binlog_cache_size, binlog_stmt_cache_size, binlog_file_cache_size; +extern ulonglong max_binlog_cache_size, max_binlog_stmt_cache_size; +extern ulong max_binlog_size; +extern ulong slave_max_allowed_packet; +extern ulonglong slave_max_statement_time; +extern double slave_max_statement_time_double; +extern ulong opt_binlog_rows_event_max_size; +extern ulong binlog_row_metadata; +extern ulong thread_cache_size; +extern ulong stored_program_cache_size; +extern ulong opt_slave_parallel_threads; +extern ulong opt_slave_domain_parallel_threads; +extern ulong opt_slave_parallel_max_queued; +extern ulong opt_slave_parallel_mode; +extern ulong opt_binlog_commit_wait_count; +extern ulong opt_binlog_commit_wait_usec; +extern my_bool opt_gtid_ignore_duplicates; +extern uint opt_gtid_cleanup_batch_size; +extern ulong back_log; +extern ulong executed_events; +extern char language[FN_REFLEN]; +extern "C" MYSQL_PLUGIN_IMPORT ulong server_id; +extern ulong concurrency; +extern time_t server_start_time, flush_status_time; +extern char *opt_mysql_tmpdir, mysql_charsets_dir[]; +extern size_t mysql_unpacked_real_data_home_len; +extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list; +extern const char *first_keyword, *delayed_user; +extern MYSQL_PLUGIN_IMPORT const char *my_localhost; +extern MYSQL_PLUGIN_IMPORT const char **errmesg; /* Error messages */ +extern const char *myisam_recover_options_str; +extern const LEX_CSTRING in_left_expr_name, in_additional_cond, in_having_cond; +extern const LEX_CSTRING NULL_clex_str; +extern const LEX_CSTRING error_clex_str; +extern SHOW_VAR status_vars[]; +extern struct system_variables max_system_variables; +extern struct system_status_var global_status_var; +extern struct my_rnd_struct sql_rand; +extern const char *opt_date_time_formats[]; +extern handlerton *partition_hton; +extern handlerton *myisam_hton; +extern handlerton *heap_hton; +extern const char *load_default_groups[]; +extern struct my_option my_long_options[]; +int handle_early_options(); +extern int MYSQL_PLUGIN_IMPORT mysqld_server_started; +extern int mysqld_server_initialized; +extern "C" MYSQL_PLUGIN_IMPORT int orig_argc; +extern "C" MYSQL_PLUGIN_IMPORT char **orig_argv; +extern pthread_attr_t connection_attrib; +extern my_bool old_mode; +extern LEX_STRING opt_init_connect, opt_init_slave; +extern char err_shared_dir[]; +extern ulong connection_errors_select; +extern ulong connection_errors_accept; +extern ulong connection_errors_tcpwrap; +extern ulong connection_errors_internal; +extern ulong connection_errors_max_connection; +extern ulong connection_errors_peer_addr; +extern ulong log_warnings; +extern my_bool encrypt_binlog; +extern my_bool encrypt_tmp_disk_tables, encrypt_tmp_files; +extern ulong encryption_algorithm; +extern const char *encryption_algorithm_names[]; +extern long opt_secure_timestamp; +extern uint default_password_lifetime; +extern my_bool disconnect_on_expired_password; + +enum secure_timestamp { SECTIME_NO, SECTIME_SUPER, SECTIME_REPL, SECTIME_YES }; +bool is_set_timestamp_forbidden(THD *thd); + +#ifdef HAVE_MMAP +extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, + key_LOCK_pool, key_LOCK_pending_checkpoint; +#endif /* HAVE_MMAP */ + +#ifdef HAVE_OPENSSL +extern PSI_mutex_key key_LOCK_des_key_file; +#endif + +extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, + key_BINLOG_LOCK_binlog_background_thread, + key_LOCK_binlog_end_pos, + key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi, + key_LOCK_crypt, key_LOCK_delayed_create, + key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log, + key_LOCK_gdl, key_LOCK_global_system_variables, + key_LOCK_logger, key_LOCK_manager, + key_LOCK_prepared_stmt_count, + key_LOCK_rpl_status, key_LOCK_server_started, + key_LOCK_status, + key_LOCK_thd_data, key_LOCK_thd_kill, + key_LOCK_user_conn, key_LOG_LOCK_log, + key_master_info_data_lock, key_master_info_run_lock, + key_master_info_sleep_lock, key_master_info_start_stop_lock, + key_master_info_start_alter_lock, + key_master_info_start_alter_list_lock, + key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock, + key_relay_log_info_log_space_lock, key_relay_log_info_run_lock, + key_rpl_group_info_sleep_lock, + key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, + key_TABLE_SHARE_LOCK_statistics, + key_LOCK_start_thread, + key_LOCK_error_messages, + key_PARTITION_LOCK_auto_inc; +extern PSI_mutex_key key_RELAYLOG_LOCK_index; +extern PSI_mutex_key key_LOCK_relaylog_end_pos; +extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, + key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry; + +extern PSI_mutex_key key_TABLE_SHARE_LOCK_share, key_LOCK_stats, + key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, + key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit, + key_TABLE_SHARE_LOCK_rotation; +extern PSI_mutex_key key_LOCK_gtid_waiting; + +extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, + key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, + key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock, + key_LOCK_SEQUENCE, + key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial, + key_rwlock_THD_list; + +#ifdef HAVE_MMAP +extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool; +#endif /* HAVE_MMAP */ + +extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, + key_BINLOG_COND_binlog_background_thread, + key_BINLOG_COND_binlog_background_thread_end, + key_COND_cache_status_changed, key_COND_manager, + key_COND_rpl_status, key_COND_server_started, + key_delayed_insert_cond, key_delayed_insert_cond_client, + key_item_func_sleep_cond, key_master_info_data_cond, + key_master_info_start_cond, key_master_info_stop_cond, + key_master_info_sleep_cond, + key_relay_log_info_data_cond, key_relay_log_info_log_space_cond, + key_relay_log_info_start_cond, key_relay_log_info_stop_cond, + key_rpl_group_info_sleep_cond, + key_TABLE_SHARE_cond, key_user_level_lock_cond, + key_COND_start_thread; +extern PSI_cond_key key_RELAYLOG_COND_relay_log_updated, + key_RELAYLOG_COND_bin_log_updated, key_COND_wakeup_ready, + key_COND_wait_commit; +extern PSI_cond_key key_RELAYLOG_COND_queue_busy; +extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; +extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_queue, + key_COND_rpl_thread_stop, key_COND_rpl_thread_pool, + key_COND_parallel_entry, key_COND_group_commit_orderer; +extern PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates; +extern PSI_cond_key key_TABLE_SHARE_COND_rotation; + +extern PSI_thread_key key_thread_delayed_insert, + key_thread_handle_manager, key_thread_kill_server, key_thread_main, + key_thread_one_connection, key_thread_signal_hand, + key_thread_slave_background, key_rpl_parallel_thread; + +extern PSI_file_key key_file_binlog, key_file_binlog_cache, + key_file_binlog_index, key_file_binlog_index_cache, key_file_casetest, + key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file, + key_file_fileparser, key_file_frm, key_file_global_ddl_log, key_file_load, + key_file_loadfile, key_file_log_event_data, key_file_log_event_info, + key_file_master_info, key_file_misc, key_file_partition_ddl_log, + key_file_pid, key_file_relay_log_info, key_file_send_file, key_file_tclog, + key_file_trg, key_file_trn, key_file_init, key_file_log_ddl; +extern PSI_file_key key_file_query_log, key_file_slow_log; +extern PSI_file_key key_file_relaylog, key_file_relaylog_index, + key_file_relaylog_cache, key_file_relaylog_index_cache; +extern PSI_socket_key key_socket_tcpip, key_socket_unix, + key_socket_client_connection; +extern PSI_file_key key_file_binlog_state; + +#ifdef HAVE_PSI_INTERFACE +void init_server_psi_keys(); +#endif /* HAVE_PSI_INTERFACE */ + +extern PSI_memory_key key_memory_locked_table_list; +extern PSI_memory_key key_memory_locked_thread_list; +extern PSI_memory_key key_memory_thd_transactions; +extern PSI_memory_key key_memory_delegate; +extern PSI_memory_key key_memory_acl_mem; +extern PSI_memory_key key_memory_acl_memex; +extern PSI_memory_key key_memory_acl_cache; +extern PSI_memory_key key_memory_thd_main_mem_root; +extern PSI_memory_key key_memory_help; +extern PSI_memory_key key_memory_frm; +extern PSI_memory_key key_memory_table_share; +extern PSI_memory_key key_memory_gdl; +extern PSI_memory_key key_memory_table_triggers_list; +extern PSI_memory_key key_memory_prepared_statement_map; +extern PSI_memory_key key_memory_prepared_statement_main_mem_root; +extern PSI_memory_key key_memory_protocol_rset_root; +extern PSI_memory_key key_memory_warning_info_warn_root; +extern PSI_memory_key key_memory_sp_cache; +extern PSI_memory_key key_memory_sp_head_main_root; +extern PSI_memory_key key_memory_sp_head_execute_root; +extern PSI_memory_key key_memory_sp_head_call_root; +extern PSI_memory_key key_memory_table_mapping_root; +extern PSI_memory_key key_memory_quick_range_select_root; +extern PSI_memory_key key_memory_quick_index_merge_root; +extern PSI_memory_key key_memory_quick_ror_intersect_select_root; +extern PSI_memory_key key_memory_quick_ror_union_select_root; +extern PSI_memory_key key_memory_quick_group_min_max_select_root; +extern PSI_memory_key key_memory_test_quick_select_exec; +extern PSI_memory_key key_memory_prune_partitions_exec; +extern PSI_memory_key key_memory_binlog_recover_exec; +extern PSI_memory_key key_memory_blob_mem_storage; + +extern PSI_memory_key key_memory_Sys_var_charptr_value; +extern PSI_memory_key key_memory_THD_db; +extern PSI_memory_key key_memory_user_var_entry; +extern PSI_memory_key key_memory_user_var_entry_value; +extern PSI_memory_key key_memory_Slave_job_group_group_relay_log_name; +extern PSI_memory_key key_memory_Relay_log_info_group_relay_log_name; +extern PSI_memory_key key_memory_binlog_cache_mngr; +extern PSI_memory_key key_memory_Row_data_memory_memory; +extern PSI_memory_key key_memory_errmsgs; +extern PSI_memory_key key_memory_Event_queue_element_for_exec_names; +extern PSI_memory_key key_memory_Event_scheduler_scheduler_param; +extern PSI_memory_key key_memory_Gis_read_stream_err_msg; +extern PSI_memory_key key_memory_Geometry_objects_data; +extern PSI_memory_key key_memory_host_cache_hostname; +extern PSI_memory_key key_memory_User_level_lock; +extern PSI_memory_key key_memory_Filesort_info_record_pointers; +extern PSI_memory_key key_memory_Sort_param_tmp_buffer; +extern PSI_memory_key key_memory_Filesort_info_merge; +extern PSI_memory_key key_memory_Filesort_buffer_sort_keys; +extern PSI_memory_key key_memory_handler_errmsgs; +extern PSI_memory_key key_memory_handlerton; +extern PSI_memory_key key_memory_XID; +extern PSI_memory_key key_memory_MYSQL_LOCK; +extern PSI_memory_key key_memory_MYSQL_LOG_name; +extern PSI_memory_key key_memory_TC_LOG_MMAP_pages; +extern PSI_memory_key key_memory_my_str_malloc; +extern PSI_memory_key key_memory_MYSQL_BIN_LOG_basename; +extern PSI_memory_key key_memory_MYSQL_BIN_LOG_index; +extern PSI_memory_key key_memory_MYSQL_RELAY_LOG_basename; +extern PSI_memory_key key_memory_MYSQL_RELAY_LOG_index; +extern PSI_memory_key key_memory_rpl_filter; +extern PSI_memory_key key_memory_Security_context; +extern PSI_memory_key key_memory_NET_buff; +extern PSI_memory_key key_memory_NET_compress_packet; +extern PSI_memory_key key_memory_my_bitmap_map; +extern PSI_memory_key key_memory_QUICK_RANGE_SELECT_mrr_buf_desc; +extern PSI_memory_key key_memory_TABLE_RULE_ENT; +extern PSI_memory_key key_memory_Mutex_cond_array_Mutex_cond; +extern PSI_memory_key key_memory_Owned_gtids_sidno_to_hash; +extern PSI_memory_key key_memory_Sid_map_Node; +extern PSI_memory_key key_memory_bison_stack; +extern PSI_memory_key key_memory_TABLE_sort_io_cache; +extern PSI_memory_key key_memory_DATE_TIME_FORMAT; +extern PSI_memory_key key_memory_DDL_LOG_MEMORY_ENTRY; +extern PSI_memory_key key_memory_ST_SCHEMA_TABLE; +extern PSI_memory_key key_memory_ignored_db; +extern PSI_memory_key key_memory_SLAVE_INFO; +extern PSI_memory_key key_memory_log_event_old; +extern PSI_memory_key key_memory_HASH_ROW_ENTRY; +extern PSI_memory_key key_memory_table_def_memory; +extern PSI_memory_key key_memory_MPVIO_EXT_auth_info; +extern PSI_memory_key key_memory_LOG_POS_COORD; +extern PSI_memory_key key_memory_XID_STATE; +extern PSI_memory_key key_memory_Rpl_info_file_buffer; +extern PSI_memory_key key_memory_Rpl_info_table; +extern PSI_memory_key key_memory_binlog_pos; +extern PSI_memory_key key_memory_db_worker_hash_entry; +extern PSI_memory_key key_memory_rpl_slave_command_buffer; +extern PSI_memory_key key_memory_binlog_ver_1_event; +extern PSI_memory_key key_memory_rpl_slave_check_temp_dir; +extern PSI_memory_key key_memory_TABLE; +extern PSI_memory_key key_memory_binlog_statement_buffer; +extern PSI_memory_key key_memory_user_conn; +extern PSI_memory_key key_memory_dboptions_hash; +extern PSI_memory_key key_memory_dbnames_cache; +extern PSI_memory_key key_memory_hash_index_key_buffer; +extern PSI_memory_key key_memory_THD_handler_tables_hash; +extern PSI_memory_key key_memory_JOIN_CACHE; +extern PSI_memory_key key_memory_READ_INFO; +extern PSI_memory_key key_memory_partition_syntax_buffer; +extern PSI_memory_key key_memory_global_system_variables; +extern PSI_memory_key key_memory_THD_variables; +extern PSI_memory_key key_memory_PROFILE; +extern PSI_memory_key key_memory_LOG_name; +extern PSI_memory_key key_memory_string_iterator; +extern PSI_memory_key key_memory_frm_extra_segment_buff; +extern PSI_memory_key key_memory_frm_form_pos; +extern PSI_memory_key key_memory_frm_string; +extern PSI_memory_key key_memory_Unique_sort_buffer; +extern PSI_memory_key key_memory_Unique_merge_buffer; +extern PSI_memory_key key_memory_shared_memory_name; +extern PSI_memory_key key_memory_opt_bin_logname; +extern PSI_memory_key key_memory_Query_cache; +extern PSI_memory_key key_memory_READ_RECORD_cache; +extern PSI_memory_key key_memory_Quick_ranges; +extern PSI_memory_key key_memory_File_query_log_name; +extern PSI_memory_key key_memory_Table_trigger_dispatcher; +extern PSI_memory_key key_memory_show_slave_status_io_gtid_set; +extern PSI_memory_key key_memory_write_set_extraction; +extern PSI_memory_key key_memory_thd_timer; +extern PSI_memory_key key_memory_THD_Session_tracker; +extern PSI_memory_key key_memory_THD_Session_sysvar_resource_manager; +extern PSI_memory_key key_memory_get_all_tables; +extern PSI_memory_key key_memory_fill_schema_schemata; +extern PSI_memory_key key_memory_native_functions; +extern PSI_memory_key key_memory_JSON; +extern PSI_memory_key key_memory_WSREP; + +/* + MAINTAINER: Please keep this list in order, to limit merge collisions. + Hint: grep PSI_stage_info | sort -u +*/ +extern PSI_stage_info stage_apply_event; +extern PSI_stage_info stage_after_create; +extern PSI_stage_info stage_after_opening_tables; +extern PSI_stage_info stage_after_table_lock; +extern PSI_stage_info stage_allocating_local_table; +extern PSI_stage_info stage_alter_inplace_prepare; +extern PSI_stage_info stage_alter_inplace; +extern PSI_stage_info stage_alter_inplace_commit; +extern PSI_stage_info stage_after_apply_event; +extern PSI_stage_info stage_changing_master; +extern PSI_stage_info stage_checking_master_version; +extern PSI_stage_info stage_checking_permissions; +extern PSI_stage_info stage_checking_privileges_on_cached_query; +extern PSI_stage_info stage_checking_query_cache_for_query; +extern PSI_stage_info stage_cleaning_up; +extern PSI_stage_info stage_closing_tables; +extern PSI_stage_info stage_connecting_to_master; +extern PSI_stage_info stage_converting_heap_to_myisam; +extern PSI_stage_info stage_copying_to_group_table; +extern PSI_stage_info stage_copying_to_tmp_table; +extern PSI_stage_info stage_copy_to_tmp_table; +extern PSI_stage_info stage_creating_delayed_handler; +extern PSI_stage_info stage_creating_sort_index; +extern PSI_stage_info stage_creating_table; +extern PSI_stage_info stage_creating_tmp_table; +extern PSI_stage_info stage_deleting_from_main_table; +extern PSI_stage_info stage_deleting_from_reference_tables; +extern PSI_stage_info stage_discard_or_import_tablespace; +extern PSI_stage_info stage_end; +extern PSI_stage_info stage_enabling_keys; +extern PSI_stage_info stage_executing; +extern PSI_stage_info stage_execution_of_init_command; +extern PSI_stage_info stage_explaining; +extern PSI_stage_info stage_finding_key_cache; +extern PSI_stage_info stage_finished_reading_one_binlog_switching_to_next_binlog; +extern PSI_stage_info stage_flushing_relay_log_and_master_info_repository; +extern PSI_stage_info stage_flushing_relay_log_info_file; +extern PSI_stage_info stage_freeing_items; +extern PSI_stage_info stage_fulltext_initialization; +extern PSI_stage_info stage_got_handler_lock; +extern PSI_stage_info stage_got_old_table; +extern PSI_stage_info stage_init; +extern PSI_stage_info stage_init_update; +extern PSI_stage_info stage_insert; +extern PSI_stage_info stage_invalidating_query_cache_entries_table; +extern PSI_stage_info stage_invalidating_query_cache_entries_table_list; +extern PSI_stage_info stage_killing_slave; +extern PSI_stage_info stage_logging_slow_query; +extern PSI_stage_info stage_making_temp_file_append_before_load_data; +extern PSI_stage_info stage_making_temp_file_create_before_load_data; +extern PSI_stage_info stage_manage_keys; +extern PSI_stage_info stage_master_has_sent_all_binlog_to_slave; +extern PSI_stage_info stage_opening_tables; +extern PSI_stage_info stage_optimizing; +extern PSI_stage_info stage_preparing; +extern PSI_stage_info stage_purging_old_relay_logs; +extern PSI_stage_info stage_query_end; +extern PSI_stage_info stage_starting_cleanup; +extern PSI_stage_info stage_rollback; +extern PSI_stage_info stage_rollback_implicit; +extern PSI_stage_info stage_commit; +extern PSI_stage_info stage_commit_implicit; +extern PSI_stage_info stage_queueing_master_event_to_the_relay_log; +extern PSI_stage_info stage_reading_event_from_the_relay_log; +extern PSI_stage_info stage_recreating_table; +extern PSI_stage_info stage_registering_slave_on_master; +extern PSI_stage_info stage_removing_duplicates; +extern PSI_stage_info stage_removing_tmp_table; +extern PSI_stage_info stage_rename; +extern PSI_stage_info stage_rename_result_table; +extern PSI_stage_info stage_requesting_binlog_dump; +extern PSI_stage_info stage_reschedule; +extern PSI_stage_info stage_searching_rows_for_update; +extern PSI_stage_info stage_sending_binlog_event_to_slave; +extern PSI_stage_info stage_sending_cached_result_to_client; +extern PSI_stage_info stage_sending_data; +extern PSI_stage_info stage_setup; +extern PSI_stage_info stage_slave_has_read_all_relay_log; +extern PSI_stage_info stage_show_explain; +extern PSI_stage_info stage_sorting; +extern PSI_stage_info stage_sorting_for_group; +extern PSI_stage_info stage_sorting_for_order; +extern PSI_stage_info stage_sorting_result; +extern PSI_stage_info stage_sql_thd_waiting_until_delay; +extern PSI_stage_info stage_statistics; +extern PSI_stage_info stage_storing_result_in_query_cache; +extern PSI_stage_info stage_storing_row_into_queue; +extern PSI_stage_info stage_system_lock; +extern PSI_stage_info stage_unlocking_tables; +extern PSI_stage_info stage_table_lock; +extern PSI_stage_info stage_filling_schema_table; +extern PSI_stage_info stage_update; +extern PSI_stage_info stage_updating; +extern PSI_stage_info stage_updating_main_table; +extern PSI_stage_info stage_updating_reference_tables; +extern PSI_stage_info stage_upgrading_lock; +extern PSI_stage_info stage_user_lock; +extern PSI_stage_info stage_user_sleep; +extern PSI_stage_info stage_verifying_table; +extern PSI_stage_info stage_waiting_for_ddl; +extern PSI_stage_info stage_waiting_for_delay_list; +extern PSI_stage_info stage_waiting_for_flush; +extern PSI_stage_info stage_waiting_for_gtid_to_be_written_to_binary_log; +extern PSI_stage_info stage_waiting_for_handler_insert; +extern PSI_stage_info stage_waiting_for_handler_lock; +extern PSI_stage_info stage_waiting_for_handler_open; +extern PSI_stage_info stage_waiting_for_insert; +extern PSI_stage_info stage_waiting_for_master_to_send_event; +extern PSI_stage_info stage_waiting_for_master_update; +extern PSI_stage_info stage_waiting_for_relay_log_space; +extern PSI_stage_info stage_waiting_for_slave_mutex_on_exit; +extern PSI_stage_info stage_waiting_for_slave_thread_to_start; +extern PSI_stage_info stage_waiting_for_query_cache_lock; +extern PSI_stage_info stage_waiting_for_table_flush; +extern PSI_stage_info stage_waiting_for_the_next_event_in_relay_log; +extern PSI_stage_info stage_waiting_for_the_slave_thread_to_advance_position; +extern PSI_stage_info stage_waiting_to_finalize_termination; +extern PSI_stage_info stage_binlog_waiting_background_tasks; +extern PSI_stage_info stage_binlog_write; +extern PSI_stage_info stage_binlog_processing_checkpoint_notify; +extern PSI_stage_info stage_binlog_stopping_background_thread; +extern PSI_stage_info stage_waiting_for_work_from_sql_thread; +extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit; +extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit; +extern PSI_stage_info stage_waiting_for_room_in_worker_thread; +extern PSI_stage_info stage_waiting_for_workers_idle; +extern PSI_stage_info stage_waiting_for_ftwrl; +extern PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause; +extern PSI_stage_info stage_waiting_for_rpl_thread_pool; +extern PSI_stage_info stage_master_gtid_wait_primary; +extern PSI_stage_info stage_master_gtid_wait; +extern PSI_stage_info stage_gtid_wait_other_connection; +extern PSI_stage_info stage_slave_background_process_request; +extern PSI_stage_info stage_slave_background_wait_request; +extern PSI_stage_info stage_waiting_for_deadlock_kill; +extern PSI_stage_info stage_starting; +#ifdef WITH_WSREP +// Aditional Galera thread states +extern PSI_stage_info stage_waiting_isolation; +extern PSI_stage_info stage_waiting_certification; +extern PSI_stage_info stage_waiting_ddl; +extern PSI_stage_info stage_waiting_flow; +#endif /* WITH_WSREP */ + +#ifdef HAVE_PSI_STATEMENT_INTERFACE +/** + Statement instrumentation keys (sql). + The last entry, at [SQLCOM_END], is for parsing errors. +*/ +extern PSI_statement_info sql_statement_info[(uint) SQLCOM_END + 1]; + +/** + Statement instrumentation keys (com). + The last entry, at [COM_END], is for packet errors. +*/ +extern PSI_statement_info com_statement_info[(uint) COM_END + 1]; + +/** + Statement instrumentation key for replication. +*/ +extern PSI_statement_info stmt_info_rpl; + +void init_sql_statement_info(); +void init_com_statement_info(); +#endif /* HAVE_PSI_STATEMENT_INTERFACE */ + +#ifndef _WIN32 +extern pthread_t signal_thread; +#endif + +#ifdef HAVE_OPENSSL +extern struct st_VioSSLFd * ssl_acceptor_fd; +#endif /* HAVE_OPENSSL */ + +/* + The following variables were under INNODB_COMPABILITY_HOOKS + */ +extern my_bool opt_large_pages; +extern uint opt_large_page_size; +extern MYSQL_PLUGIN_IMPORT char lc_messages_dir[FN_REFLEN]; +extern char *lc_messages_dir_ptr, *log_error_file_ptr; +extern MYSQL_PLUGIN_IMPORT char reg_ext[FN_EXTLEN]; +extern MYSQL_PLUGIN_IMPORT uint reg_ext_length; +extern MYSQL_PLUGIN_IMPORT uint lower_case_table_names; +extern MYSQL_PLUGIN_IMPORT bool mysqld_embedded; +extern ulong specialflag; +extern uint mysql_data_home_len; +extern uint mysql_real_data_home_len; +extern const char *mysql_real_data_home_ptr; +extern ulong thread_handling; +extern "C" MYSQL_PLUGIN_IMPORT char server_version[SERVER_VERSION_LENGTH]; +extern char *server_version_ptr; +extern bool using_custom_server_version; +extern MYSQL_PLUGIN_IMPORT char mysql_real_data_home[]; +extern char mysql_unpacked_real_data_home[]; +extern MYSQL_PLUGIN_IMPORT struct system_variables global_system_variables; +extern char default_logfile_name[FN_REFLEN]; +extern char *my_proxy_protocol_networks; + +#define mysql_tmpdir (my_tmpdir(&mysql_tmpdir_list)) + +extern MYSQL_PLUGIN_IMPORT const key_map key_map_empty; +extern MYSQL_PLUGIN_IMPORT key_map key_map_full; /* Should be threaded as const */ + +/* + Server mutex locks and condition variables. + */ +extern mysql_mutex_t + LOCK_item_func_sleep, LOCK_status, + LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator, + LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone, + LOCK_active_mi, LOCK_manager, LOCK_user_conn, + LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log; +extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables; +extern mysql_rwlock_t LOCK_all_status_vars; +extern mysql_mutex_t LOCK_start_thread; +#ifdef HAVE_OPENSSL +extern char* des_key_file; +extern mysql_mutex_t LOCK_des_key_file; +#endif +extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_server_started; +extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_server_started; +extern mysql_rwlock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; +extern mysql_rwlock_t LOCK_ssl_refresh; +extern mysql_prlock_t LOCK_system_variables_hash; +extern mysql_cond_t COND_start_thread; +extern mysql_cond_t COND_manager; + +extern my_bool opt_use_ssl; +extern char *opt_ssl_ca, *opt_ssl_capath, *opt_ssl_cert, *opt_ssl_cipher, + *opt_ssl_key, *opt_ssl_crl, *opt_ssl_crlpath; +extern ulonglong tls_version; + +#ifdef MYSQL_SERVER + +/** + only options that need special treatment in get_one_option() deserve + to be listed below +*/ +enum options_mysqld +{ + OPT_to_set_the_start_number=256, + OPT_BINLOG_DO_DB, + OPT_BINLOG_FORMAT, + OPT_BINLOG_IGNORE_DB, + OPT_BIN_LOG, + OPT_BOOTSTRAP, + OPT_EXPIRE_LOGS_DAYS, + OPT_BINLOG_EXPIRE_LOGS_SECONDS, + OPT_CONSOLE, + OPT_DEBUG_SYNC_TIMEOUT, + OPT_REMOVED_OPTION, + OPT_IGNORE_DB_DIRECTORY, + OPT_ISAM_LOG, + OPT_KEY_BUFFER_SIZE, + OPT_KEY_CACHE_AGE_THRESHOLD, + OPT_KEY_CACHE_BLOCK_SIZE, + OPT_KEY_CACHE_DIVISION_LIMIT, + OPT_KEY_CACHE_PARTITIONS, + OPT_KEY_CACHE_CHANGED_BLOCKS_HASH_SIZE, + OPT_LOG_BASENAME, + OPT_LOG_ERROR, + OPT_LOWER_CASE_TABLE_NAMES, + OPT_PLUGIN_LOAD, + OPT_PLUGIN_LOAD_ADD, + OPT_PFS_INSTRUMENT, + OPT_REPLICATE_DO_DB, + OPT_REPLICATE_DO_TABLE, + OPT_REPLICATE_IGNORE_DB, + OPT_REPLICATE_IGNORE_TABLE, + OPT_REPLICATE_REWRITE_DB, + OPT_REPLICATE_WILD_DO_TABLE, + OPT_REPLICATE_WILD_IGNORE_TABLE, + OPT_SAFE, + OPT_SERVER_ID, + OPT_SILENT, + OPT_SKIP_HOST_CACHE, + OPT_SLAVE_PARALLEL_MODE, + OPT_SSL_CA, + OPT_SSL_CAPATH, + OPT_SSL_CERT, + OPT_SSL_CIPHER, + OPT_SSL_CRL, + OPT_SSL_CRLPATH, + OPT_SSL_KEY, + OPT_THREAD_CONCURRENCY, + OPT_WANT_CORE, +#ifdef WITH_WSREP + OPT_WSREP_CAUSAL_READS, + OPT_WSREP_SYNC_WAIT, +#endif /* WITH_WSREP */ + OPT_MYSQL_COMPATIBILITY, + OPT_TLS_VERSION, + OPT_MYSQL_TO_BE_IMPLEMENTED, + OPT_SEQURE_FILE_PRIV, + OPT_which_is_always_the_last +}; +#endif + +/** + Query type constants (usable as bitmap flags). +*/ +enum enum_query_type +{ + /// Nothing specific, ordinary SQL query. + QT_ORDINARY= 0, + /// In utf8. + QT_TO_SYSTEM_CHARSET= (1 << 0), + /// Without character set introducers. + QT_WITHOUT_INTRODUCERS= (1 << 1), + /// view internal representation (like QT_ORDINARY except ORDER BY clause) + QT_VIEW_INTERNAL= (1 << 2), + /// If identifiers should not include database names, where unambiguous + QT_ITEM_IDENT_SKIP_DB_NAMES= (1 << 3), + /// If identifiers should not include table names, where unambiguous + QT_ITEM_IDENT_SKIP_TABLE_NAMES= (1 << 4), + /// If Item_cache_wrapper should not print + QT_ITEM_CACHE_WRAPPER_SKIP_DETAILS= (1 << 5), + /// If Item_subselect should print as just "(subquery#1)" + /// rather than display the subquery body + QT_ITEM_SUBSELECT_ID_ONLY= (1 << 6), + /// If NULLIF(a,b) should print itself as + /// CASE WHEN a_for_comparison=b THEN NULL ELSE a_for_return_value END + /// when "a" was replaced to two different items + /// (e.g. by equal fields propagation in optimize_cond()) + /// or always as NULLIF(a, b). + /// The default behaviour is to use CASE syntax when + /// a_for_return_value is not the same as a_for_comparison. + /// SHOW CREATE {VIEW|PROCEDURE|FUNCTION} and other cases where the + /// original representation is required, should set this flag. + QT_ITEM_ORIGINAL_FUNC_NULLIF= (1 << 7), + /// good for parsing + QT_PARSABLE= (1 << 8), + + /// This value means focus on readability, not on ability to parse back, etc. + QT_EXPLAIN= QT_TO_SYSTEM_CHARSET | + QT_ITEM_IDENT_SKIP_DB_NAMES | + QT_ITEM_CACHE_WRAPPER_SKIP_DETAILS | + QT_ITEM_SUBSELECT_ID_ONLY, + + QT_SHOW_SELECT_NUMBER= (1<<10), + + /// Do not print database name or table name in the identifiers (even if + /// this means the printout will be ambigous). It is assumed that the caller + /// passing this flag knows what they are doing. + QT_ITEM_IDENT_DISABLE_DB_TABLE_NAMES= (1 <<11), + + /// This is used for EXPLAIN EXTENDED extra warnings / Be more detailed + /// Be more detailed than QT_EXPLAIN. + /// Perhaps we should eventually include QT_ITEM_IDENT_SKIP_CURRENT_DATABASE + /// here, as it would give better readable results + QT_EXPLAIN_EXTENDED= QT_TO_SYSTEM_CHARSET| + QT_SHOW_SELECT_NUMBER, + + // If an expression is constant, print the expression, not the value + // it evaluates to. Should be used for error messages, so that they + // don't reveal values. + QT_NO_DATA_EXPANSION= (1 << 9), + // Remove wrappers added for TVC when creating or showing view + QT_NO_WRAPPERS_FOR_TVC_IN_VIEW= (1 << 12) +}; + + +/* query_id */ +extern Atomic_counter global_query_id; + +/* increment query_id and return it. */ +inline __attribute__((warn_unused_result)) query_id_t next_query_id() +{ + return global_query_id++; +} + +inline query_id_t get_query_id() +{ + return global_query_id; +} + +/* increment global_thread_id and return it. */ +extern __attribute__((warn_unused_result)) my_thread_id next_thread_id(void); + +/* + TODO: Replace this with an inline function. + */ +#ifndef EMBEDDED_LIBRARY +extern "C" void unireg_abort(int exit_code) __attribute__((noreturn)); +#else +extern "C" void unireg_clear(int exit_code); +#define unireg_abort(exit_code) do { unireg_clear(exit_code); DBUG_RETURN(exit_code); } while(0) +#endif + +inline void table_case_convert(char * name, uint length) +{ + if (lower_case_table_names) + files_charset_info->casedn(name, length, name, length); +} + +extern void set_server_version(char *buf, size_t size); + +#define current_thd _current_thd() +void set_current_thd(THD *thd); + +/* + @todo remove, make it static in ha_maria.cc + currently it's needed for sql_select.cc +*/ +extern handlerton *maria_hton; + +extern uint64 global_gtid_counter; +extern my_bool opt_gtid_strict_mode; +extern my_bool opt_userstat_running, debug_assert_if_crashed_table; +extern uint mysqld_extra_port; +extern ulong opt_progress_report_time; +extern ulong extra_max_connections; +extern ulonglong denied_connections; +extern ulong thread_created; +extern scheduler_functions *thread_scheduler, *extra_thread_scheduler; +extern char *opt_log_basename; +extern my_bool opt_master_verify_checksum; +extern my_bool opt_stack_trace, disable_log_notes; +extern my_bool opt_expect_abort; +extern my_bool opt_slave_sql_verify_checksum; +extern my_bool opt_mysql56_temporal_format, strict_password_validation; +extern ulong binlog_checksum_options; +extern bool max_user_connections_checking; +extern ulong opt_binlog_dbug_fsync_sleep; + +extern uint volatile global_disable_checkpoint; +extern my_bool opt_help; + +extern int mysqld_main(int argc, char **argv); + +#ifdef _WIN32 +extern HANDLE hEventShutdown; +extern void mysqld_win_initiate_shutdown(); +extern void mysqld_win_set_startup_complete(); +extern void mysqld_win_extend_service_timeout(DWORD sec); +extern void mysqld_set_service_status_callback(void (*)(DWORD, DWORD, DWORD)); +extern void mysqld_win_set_service_name(const char *name); +#endif + +#endif /* MYSQLD_INCLUDED */ diff --git a/sql/mysqld_suffix.h b/sql/mysqld_suffix.h new file mode 100644 index 00000000..3c36ba2b --- /dev/null +++ b/sql/mysqld_suffix.h @@ -0,0 +1,34 @@ +#ifndef MYSQLD_SUFFIX_INCLUDED +#define MYSQLD_SUFFIX_INCLUDED + +/* Copyright (c) 2000-2004, 2006, 2007 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + Set MYSQL_SERVER_SUFFIX_STR. + + The following code is quite ugly as there is no portable way to easily set a + string to the value of a macro +*/ + +#ifdef MYSQL_SERVER_SUFFIX +#define MYSQL_SERVER_SUFFIX_STR STRINGIFY_ARG(MYSQL_SERVER_SUFFIX) +#else +#define MYSQL_SERVER_SUFFIX_STR MYSQL_SERVER_SUFFIX_DEF +#endif +#endif /* MYSQLD_SUFFIX_INCLUDED */ diff --git a/sql/net_serv.cc b/sql/net_serv.cc new file mode 100644 index 00000000..70e71d9a --- /dev/null +++ b/sql/net_serv.cc @@ -0,0 +1,1451 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2012, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + This file is the net layer API for the MySQL client/server protocol. + + Write and read of logical packets to/from socket. + + Writes are cached into net_buffer_length big packets. + Read packets are reallocated dynamicly when reading big packets. + Each logical packet has the following pre-info: + 3 byte length & 1 byte package-number. + + This file needs to be written in C as it's used by the libmysql client as a + C file. +*/ + +/* + HFTODO this must be hidden if we don't want client capabilities in + embedded library + */ + +#include "mariadb.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "probes_mysql.h" +#include +#include "proxy_protocol.h" + +PSI_memory_key key_memory_NET_buff; +PSI_memory_key key_memory_NET_compress_packet; + +#ifdef EMBEDDED_LIBRARY +#undef MYSQL_SERVER +#undef MYSQL_CLIENT +#define MYSQL_CLIENT +#endif /*EMBEDDED_LIBRARY */ + +/* + to reduce the number of ifdef's in the code +*/ +#ifdef EXTRA_DEBUG +#define EXTRA_DEBUG_fprintf fprintf +#define EXTRA_DEBUG_fflush fflush +#define EXTRA_DEBUG_ASSERT DBUG_ASSERT +#else +static void inline EXTRA_DEBUG_fprintf(...) {} +#ifndef MYSQL_SERVER +static int inline EXTRA_DEBUG_fflush(...) { return 0; } +#endif +#endif /* EXTRA_DEBUG */ + +#ifdef MYSQL_SERVER +#include +#include +#define MYSQL_SERVER_my_error my_error +#else +static void inline MYSQL_SERVER_my_error(...) {} +#endif + +#ifndef EXTRA_DEBUG_ASSERT +# define EXTRA_DEBUG_ASSERT(X) do {} while(0) +#endif + +/* + The following handles the differences when this is linked between the + client and the server. + + This gives an error if a too big packet is found. + The server can change this, but because the client can't normally do this + the client should have a bigger max_allowed_packet. +*/ + +#if defined(_WIN32) || !defined(MYSQL_SERVER) + /* The following is because alarms doesn't work on windows. */ +#ifndef NO_ALARM +#define NO_ALARM +#endif +#endif + +#ifndef NO_ALARM +#include "my_pthread.h" +void sql_print_error(const char *format,...); +#else +#define DONT_USE_THR_ALARM +#endif /* NO_ALARM */ + +#include "thr_alarm.h" + +#ifdef MYSQL_SERVER +/* + The following variables/functions should really not be declared + extern, but as it's hard to include sql_priv.h here, we have to + live with this for a while. +*/ +extern ulonglong test_flags; +extern ulong bytes_sent, bytes_received, net_big_packet_count; +#ifdef HAVE_QUERY_CACHE +#define USE_QUERY_CACHE +extern void query_cache_insert(void *thd, const char *packet, size_t length, + unsigned pkt_nr); +#endif // HAVE_QUERY_CACHE +#define update_statistics(A) A +extern my_bool thd_net_is_killed(THD *thd); +/* Additional instrumentation hooks for the server */ +#include "mysql_com_server.h" +#else +#define update_statistics(A) +#define thd_net_is_killed(A) 0 +#endif + + +static my_bool net_write_buff(NET *, const uchar *, size_t len); + +my_bool net_allocate_new_packet(NET *net, void *thd, uint my_flags); + +/** Init with packet info. */ + +my_bool my_net_init(NET *net, Vio *vio, void *thd, uint my_flags) +{ + DBUG_ENTER("my_net_init"); + DBUG_PRINT("enter", ("my_flags: %u", my_flags)); + net->vio = vio; + net->read_timeout= 0; + net->write_timeout= 0; + my_net_local_init(net); /* Set some limits */ + + if (net_allocate_new_packet(net, thd, my_flags)) + DBUG_RETURN(1); + + net->error=0; net->return_status=0; + net->pkt_nr=net->compress_pkt_nr=0; + net->last_error[0]=0; + net->compress=0; net->reading_or_writing=0; + net->where_b = net->remain_in_buf=0; + net->net_skip_rest_factor= 0; + net->last_errno=0; + net->thread_specific_malloc= MY_TEST(my_flags & MY_THREAD_SPECIFIC); + net->thd= 0; +#ifdef MYSQL_SERVER + net->extension= NULL; + net->thd= thd; +#endif + + if (vio) + { + /* For perl DBI/DBD. */ + net->fd= vio_fd(vio); +#if defined(MYSQL_SERVER) && !defined(_WIN32) + if (!(test_flags & TEST_BLOCKING)) + { + my_bool old_mode; + vio_blocking(vio, FALSE, &old_mode); + } +#endif + vio_fastsend(vio); + } + DBUG_RETURN(0); +} + + +/** + Allocate and assign new net buffer + + @note In case of error the old buffer left + + @retval TRUE error + @retval FALSE success +*/ + +my_bool net_allocate_new_packet(NET *net, void *thd, uint my_flags) +{ + uchar *tmp; + DBUG_ENTER("net_allocate_new_packet"); + if (!(tmp= (uchar*) my_malloc(key_memory_NET_buff, + (size_t) net->max_packet + + NET_HEADER_SIZE + COMP_HEADER_SIZE + 1, + MYF(MY_WME | my_flags)))) + DBUG_RETURN(1); + net->buff= tmp; + net->buff_end=net->buff+net->max_packet; + net->write_pos=net->read_pos = net->buff; + DBUG_RETURN(0); +} + + +void net_end(NET *net) +{ + DBUG_ENTER("net_end"); + my_free(net->buff); + net->buff=0; + DBUG_VOID_RETURN; +} + + +/** Realloc the packet buffer. */ + +my_bool net_realloc(NET *net, size_t length) +{ + uchar *buff; + size_t pkt_length; + DBUG_ENTER("net_realloc"); + DBUG_PRINT("enter",("length: %lu", (ulong) length)); + + if (length >= net->max_packet_size) + { + DBUG_PRINT("error", ("Packet too large. Max size: %lu", + net->max_packet_size)); + /* @todo: 1 and 2 codes are identical. */ + net->error= 1; + net->last_errno= ER_NET_PACKET_TOO_LARGE; + MYSQL_SERVER_my_error(ER_NET_PACKET_TOO_LARGE, MYF(0)); + DBUG_RETURN(1); + } + pkt_length = (length+IO_SIZE-1) & ~(IO_SIZE-1); + /* + We must allocate some extra bytes for the end 0 and to be able to + read big compressed blocks + 1 safety byte since uint3korr() in + my_real_read() may actually read 4 bytes depending on build flags and + platform. + */ + if (!(buff= (uchar*) my_realloc(key_memory_NET_buff, + (char*) net->buff, pkt_length + + NET_HEADER_SIZE + COMP_HEADER_SIZE + 1, + MYF(MY_WME | (net->thread_specific_malloc + ? MY_THREAD_SPECIFIC : 0))))) + { + /* @todo: 1 and 2 codes are identical. */ + net->error= 1; + net->last_errno= ER_OUT_OF_RESOURCES; + /* In the server the error is reported by MY_WME flag. */ + DBUG_RETURN(1); + } + net->buff=net->write_pos=buff; + net->buff_end=buff+(net->max_packet= (ulong) pkt_length); + DBUG_RETURN(0); +} + + +/** + Check if there is any data to be read from the socket. + + @param sd socket descriptor + + @retval + 0 No data to read + @retval + 1 Data or EOF to read + @retval + -1 Don't know if data is ready or not +*/ + +#if !defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF) + +static int net_data_is_ready(my_socket sd) +{ +#ifdef HAVE_POLL + struct pollfd ufds; + int res; + + ufds.fd= sd; + ufds.events= POLLIN | POLLPRI; + if (!(res= poll(&ufds, 1, 0))) + return 0; + if (res < 0 || !(ufds.revents & (POLLIN | POLLPRI))) + return 0; + return 1; +#else + fd_set sfds; + struct timeval tv; + int res; + +#ifndef _WIN32 + /* Windows uses an _array_ of 64 fd's as default, so it's safe */ + if (sd >= FD_SETSIZE) + return -1; +#define NET_DATA_IS_READY_CAN_RETURN_MINUS_ONE +#endif + + FD_ZERO(&sfds); + FD_SET(sd, &sfds); + + tv.tv_sec= tv.tv_usec= 0; + + if ((res= select((int) (sd + 1), &sfds, NULL, NULL, &tv)) < 0) + return 0; + else + return MY_TEST(res ? FD_ISSET(sd, &sfds) : 0); +#endif /* HAVE_POLL */ +} + +#endif /* EMBEDDED_LIBRARY */ + +/** + Clear (reinitialize) the NET structure for a new command. + + @remark Performs debug checking of the socket buffer to + ensure that the protocol sequence is correct. + + - Read from socket until there is nothing more to read. Discard + what is read. + - Initialize net for new net_read/net_write calls. + + If there is anything when to read 'net_clear' is called this + normally indicates an error in the protocol. Normally one should not + need to do clear the communication buffer. If one compiles without + -DUSE_NET_CLEAR then one wins one read call / query. + + When connection is properly closed (for TCP it means with + a FIN packet), then select() considers a socket "ready to read", + in the sense that there's EOF to read, but read() returns 0. + + @param net NET handler + @param clear_buffer if <> 0, then clear all data from comm buff +*/ + +void net_clear(NET *net, my_bool clear_buffer __attribute__((unused))) +{ + DBUG_ENTER("net_clear"); + +/* + We don't do a clear in case of not DBUG_OFF to catch bugs in the + protocol handling. +*/ + +#if (!defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)) || defined(USE_NET_CLEAR) + if (clear_buffer) + { + size_t count; + int ready; + while ((ready= net_data_is_ready(vio_fd(net->vio))) > 0) + { + /* The socket is ready */ + if ((long) (count= vio_read(net->vio, net->buff, + (size_t) net->max_packet)) > 0) + { + DBUG_PRINT("info",("skipped %ld bytes from file: %s", + (long) count, vio_description(net->vio))); + EXTRA_DEBUG_fprintf(stderr,"Note: net_clear() skipped %ld bytes from file: %s\n", + (long) count, vio_description(net->vio)); + } + else + { + DBUG_PRINT("info",("socket ready but only EOF to read - disconnected")); + net->error= 2; + break; + } + } +#ifdef NET_DATA_IS_READY_CAN_RETURN_MINUS_ONE + /* 'net_data_is_ready' returned "don't know" */ + if (ready == -1) + { + /* Read unblocking to clear net */ + my_bool old_mode; + if (!vio_blocking(net->vio, FALSE, &old_mode)) + { + while ((long) (count= vio_read(net->vio, net->buff, + (size_t) net->max_packet)) > 0) + DBUG_PRINT("info",("skipped %ld bytes from file: %s", + (long) count, vio_description(net->vio))); + vio_blocking(net->vio, TRUE, &old_mode); + } + } +#endif /* NET_DATA_IS_READY_CAN_RETURN_MINUS_ONE */ + } +#endif /* EMBEDDED_LIBRARY */ + net->pkt_nr=net->compress_pkt_nr=0; /* Ready for new command */ + net->write_pos=net->buff; + DBUG_VOID_RETURN; +} + + +/** Flush write_buffer if not empty. */ + +my_bool net_flush(NET *net) +{ + my_bool error= 0; + DBUG_ENTER("net_flush"); + if (net->buff != net->write_pos) + { + error= MY_TEST(net_real_write(net, net->buff, + (size_t) (net->write_pos - net->buff))); + net->write_pos= net->buff; + } + /* Sync packet number if using compression */ + if (net->compress) + net->pkt_nr=net->compress_pkt_nr; + DBUG_RETURN(error); +} + + +/***************************************************************************** +** Write something to server/client buffer +*****************************************************************************/ + +/** + Write a logical packet with packet header. + + Format: Packet length (3 bytes), packet number (1 byte) + When compression is used, a 3 byte compression length is added. + + @note If compression is used, the original packet is modified! +*/ + +my_bool my_net_write(NET *net, const uchar *packet, size_t len) +{ + uchar buff[NET_HEADER_SIZE]; + + if (unlikely(!net->vio)) /* nowhere to write */ + return 0; + + MYSQL_NET_WRITE_START(len); + + /* + Big packets are handled by splitting them in packets of MAX_PACKET_LENGTH + length. The last packet is always a packet that is < MAX_PACKET_LENGTH. + (The last packet may even have a length of 0) + */ + while (len >= MAX_PACKET_LENGTH) + { + const ulong z_size = MAX_PACKET_LENGTH; + int3store(buff, z_size); + buff[3]= (uchar) net->pkt_nr++; + if (net_write_buff(net, buff, NET_HEADER_SIZE) || + net_write_buff(net, packet, z_size)) + { + MYSQL_NET_WRITE_DONE(1); + return 1; + } + packet += z_size; + len-= z_size; + } + /* Write last packet */ + int3store(buff,len); + buff[3]= (uchar) net->pkt_nr++; + if (net_write_buff(net, buff, NET_HEADER_SIZE)) + { + MYSQL_NET_WRITE_DONE(1); + return 1; + } +#ifndef DEBUG_DATA_PACKETS + DBUG_DUMP("packet_header", buff, NET_HEADER_SIZE); +#endif + my_bool rc= MY_TEST(net_write_buff(net, packet, len)); + MYSQL_NET_WRITE_DONE(rc); + return rc; +} + + +/** + Send a command to the server. + + The reason for having both header and packet is so that libmysql + can easy add a header to a special command (like prepared statements) + without having to re-alloc the string. + + As the command is part of the first data packet, we have to do some data + juggling to put the command in there, without having to create a new + packet. + + This function will split big packets into sub-packets if needed. + (Each sub packet can only be 2^24 bytes) + + @param net NET handler + @param command Command in MySQL server (enum enum_server_command) + @param header Header to write after command + @param head_len Length of header + @param packet Query or parameter to query + @param len Length of packet + + @retval + 0 ok + @retval + 1 error +*/ + +my_bool +net_write_command(NET *net,uchar command, + const uchar *header, size_t head_len, + const uchar *packet, size_t len) +{ + size_t length=len+1+head_len; /* 1 extra byte for command */ + uchar buff[NET_HEADER_SIZE+1]; + uint header_size=NET_HEADER_SIZE+1; + my_bool rc; + DBUG_ENTER("net_write_command"); + DBUG_PRINT("enter",("length: %lu", (ulong) len)); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("simulate_error_on_packet_write", + { + if (command == COM_BINLOG_DUMP) + { + net->last_errno = ER_NET_ERROR_ON_WRITE; + DBUG_ASSERT(!debug_sync_set_action( + (THD *)net->thd, + STRING_WITH_LEN("now SIGNAL parked WAIT_FOR continue"))); + DBUG_RETURN(true); + } + };); +#endif + MYSQL_NET_WRITE_START(length); + + buff[4]=command; /* For first packet */ + + if (length >= MAX_PACKET_LENGTH) + { + /* Take into account that we have the command in the first header */ + len= MAX_PACKET_LENGTH - 1 - head_len; + do + { + int3store(buff, MAX_PACKET_LENGTH); + buff[3]= (uchar) net->pkt_nr++; + if (net_write_buff(net, buff, header_size) || + net_write_buff(net, header, head_len) || + net_write_buff(net, packet, len)) + { + MYSQL_NET_WRITE_DONE(1); + DBUG_RETURN(1); + } + packet+= len; + length-= MAX_PACKET_LENGTH; + len= MAX_PACKET_LENGTH; + head_len= 0; + header_size= NET_HEADER_SIZE; + } while (length >= MAX_PACKET_LENGTH); + len=length; /* Data left to be written */ + } + int3store(buff,length); + buff[3]= (uchar) net->pkt_nr++; + rc= MY_TEST(net_write_buff(net, buff, header_size) || + (head_len && net_write_buff(net, header, head_len)) || + net_write_buff(net, packet, len) || net_flush(net)); + MYSQL_NET_WRITE_DONE(rc); + DBUG_RETURN(rc); +} + +/** + Caching the data in a local buffer before sending it. + + Fill up net->buffer and send it to the client when full. + + If the rest of the to-be-sent-packet is bigger than buffer, + send it in one big block (to avoid copying to internal buffer). + If not, copy the rest of the data to the buffer and return without + sending data. + + @param net Network handler + @param packet Packet to send + @param len Length of packet + + @note + The cached buffer can be sent as it is with 'net_flush()'. + In this code we have to be careful to not send a packet longer than + MAX_PACKET_LENGTH to net_real_write() if we are using the compressed + protocol as we store the length of the compressed packet in 3 bytes. + + @retval + 0 ok + @retval + 1 +*/ + +static my_bool +net_write_buff(NET *net, const uchar *packet, size_t len) +{ + size_t left_length; + if (net->compress && net->max_packet > MAX_PACKET_LENGTH) + left_length= (MAX_PACKET_LENGTH - (net->write_pos - net->buff)); + else + left_length= (net->buff_end - net->write_pos); + +#ifdef DEBUG_DATA_PACKETS + DBUG_DUMP("data_written", packet, len); +#endif + if (len > left_length) + { + if (net->write_pos != net->buff) + { + /* Fill up already used packet and write it */ + memcpy((char*) net->write_pos,packet,left_length); + if (net_real_write(net, net->buff, + (size_t) (net->write_pos - net->buff) + left_length)) + return 1; + net->write_pos= net->buff; + packet+= left_length; + len-= left_length; + } + if (net->compress) + { + /* + We can't have bigger packets than 16M with compression + Because the uncompressed length is stored in 3 bytes + */ + left_length= MAX_PACKET_LENGTH; + while (len > left_length) + { + if (net_real_write(net, packet, left_length)) + return 1; + packet+= left_length; + len-= left_length; + } + } + if (len > net->max_packet) + return net_real_write(net, packet, len) ? 1 : 0; + /* Send out rest of the blocks as full sized blocks */ + } + if (len) + memcpy((char*) net->write_pos,packet,len); + net->write_pos+= len; + return 0; +} + + +/** + Read and write one packet using timeouts. + If needed, the packet is compressed before sending. + + @todo + - TODO is it needed to set this variable if we have no socket +*/ + +int +net_real_write(NET *net,const uchar *packet, size_t len) +{ + size_t length; + const uchar *pos,*end; + thr_alarm_t alarmed; +#ifndef NO_ALARM + ALARM alarm_buff; +#endif + uint retry_count=0; + my_bool net_blocking = vio_is_blocking(net->vio); + DBUG_ENTER("net_real_write"); + +#if defined(MYSQL_SERVER) + THD *thd= (THD *)net->thd; +#if defined(USE_QUERY_CACHE) + query_cache_insert(thd, (char*) packet, len, net->pkt_nr); +#endif + if (likely(thd)) + { + /* + Wait until pending operations (currently it is engine + asynchronous group commit) are finished before replying + to the client, to keep durability promise. + */ + thd->async_state.wait_for_pending_ops(); + } +#endif + + if (unlikely(net->error == 2)) + DBUG_RETURN(-1); /* socket can't be used */ + + net->reading_or_writing=2; +#ifdef HAVE_COMPRESS + if (net->compress) + { + size_t complen; + uchar *b; + uint header_length=NET_HEADER_SIZE+COMP_HEADER_SIZE; + if (!(b= (uchar*) my_malloc(key_memory_NET_compress_packet, + len + NET_HEADER_SIZE + COMP_HEADER_SIZE + 1, + MYF(MY_WME | (net->thread_specific_malloc + ? MY_THREAD_SPECIFIC : 0))))) + { + net->error= 2; + net->last_errno= ER_OUT_OF_RESOURCES; + /* In the server, the error is reported by MY_WME flag. */ + net->reading_or_writing= 0; + DBUG_RETURN(1); + } + memcpy(b+header_length,packet,len); + + /* Don't compress error packets (compress == 2) */ + if (net->compress == 2 || my_compress(b+header_length, &len, &complen)) + complen=0; + int3store(&b[NET_HEADER_SIZE],complen); + int3store(b,len); + b[3]=(uchar) (net->compress_pkt_nr++); + len+= header_length; + packet= b; + } +#endif /* HAVE_COMPRESS */ + +#ifdef DEBUG_DATA_PACKETS + DBUG_DUMP("data_written", packet, len); +#endif + +#ifndef NO_ALARM + thr_alarm_init(&alarmed); + if (net_blocking) + thr_alarm(&alarmed, net->write_timeout, &alarm_buff); +#else + alarmed=0; + /* Write timeout is set in my_net_set_write_timeout */ +#endif /* NO_ALARM */ + + pos= packet; + end=pos+len; + while (pos != end) + { + if ((long) (length= vio_write(net->vio,pos,(size_t) (end-pos))) <= 0) + { + my_bool interrupted = vio_should_retry(net->vio); +#if !defined(_WIN32) + if ((interrupted || length == 0) && !thr_alarm_in_use(&alarmed)) + { + if (!thr_alarm(&alarmed, net->write_timeout, &alarm_buff)) + { /* Always true for client */ + my_bool old_mode; + while (vio_blocking(net->vio, TRUE, &old_mode) < 0) + { + if (vio_should_retry(net->vio) && retry_count++ < net->retry_count) + continue; + EXTRA_DEBUG_fprintf(stderr, + "%s: my_net_write: fcntl returned error %d, aborting thread\n", + my_progname,vio_errno(net->vio)); + net->error= 2; /* Close socket */ + net->last_errno= ER_NET_PACKET_TOO_LARGE; + MYSQL_SERVER_my_error(ER_NET_PACKET_TOO_LARGE, MYF(0)); + goto end; + } + retry_count=0; + continue; + } + } + else +#endif /* !defined(_WIN32) */ + if (thr_alarm_in_use(&alarmed) && !thr_got_alarm(&alarmed) && + interrupted) + { + if (retry_count++ < net->retry_count) + continue; + EXTRA_DEBUG_fprintf(stderr, "%s: write looped, aborting thread\n", + my_progname); + } +#ifndef MYSQL_SERVER + if (vio_errno(net->vio) == SOCKET_EINTR) + { + DBUG_PRINT("warning",("Interrupted write. Retrying...")); + continue; + } +#endif /* !defined(MYSQL_SERVER) */ + net->error= 2; /* Close socket */ + net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED : + ER_NET_ERROR_ON_WRITE); + MYSQL_SERVER_my_error(net->last_errno, MYF(0)); + break; + } + pos+=length; + update_statistics(thd_increment_bytes_sent(net->thd, length)); + } +#ifndef _WIN32 + end: +#endif +#ifdef HAVE_COMPRESS + if (net->compress) + my_free((void*) packet); +#endif + if (thr_alarm_in_use(&alarmed)) + { + my_bool old_mode; + thr_end_alarm(&alarmed); + if (!net_blocking) + vio_blocking(net->vio, net_blocking, &old_mode); + } + net->reading_or_writing=0; + DBUG_RETURN(((int) (pos != end))); +} + + +/***************************************************************************** +** Read something from server/clinet +*****************************************************************************/ + +#ifndef NO_ALARM + +static my_bool net_safe_read(NET *net, uchar *buff, size_t length, + thr_alarm_t *alarmed) +{ + uint retry_count=0; + while (length > 0) + { + size_t tmp; + if ((long) (tmp= vio_read(net->vio, buff, length)) <= 0) + { + my_bool interrupted = vio_should_retry(net->vio); + if (!thr_got_alarm(alarmed) && interrupted) + { /* Probably in MIT threads */ + if (retry_count++ < net->retry_count) + continue; + } + return 1; + } + length-= tmp; + buff+= tmp; + } + return 0; +} + +/** + Help function to clear the commuication buffer when we get a too big packet. + + @param net Communication handle + @param remain Bytes to read + @param alarmed Parameter for thr_alarm() + @param alarm_buff Parameter for thr_alarm() + + @retval + 0 Was able to read the whole packet + @retval + 1 Got mailformed packet from client +*/ + +static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed, + ALARM *alarm_buff) +{ + longlong limit= net->max_packet_size*net->net_skip_rest_factor; + uint32 old=remain; + DBUG_ENTER("my_net_skip_rest"); + DBUG_PRINT("enter",("bytes_to_skip: %u", (uint) remain)); + + /* The following is good for debugging */ + update_statistics(thd_increment_net_big_packet_count(net->thd, 1)); + + if (!thr_alarm_in_use(alarmed)) + { + my_bool old_mode; + if (thr_alarm(alarmed,net->read_timeout, alarm_buff) || + vio_blocking(net->vio, TRUE, &old_mode) < 0) + DBUG_RETURN(1); /* Can't setup, abort */ + } + for (;;) + { + while (remain > 0) + { + size_t length= MY_MIN(remain, net->max_packet); + if (net_safe_read(net, net->buff, length, alarmed)) + DBUG_RETURN(1); + update_statistics(thd_increment_bytes_received(net->thd, length)); + remain -= (uint32) length; + limit-= length; + if (limit < 0) + DBUG_RETURN(1); + } + if (old != MAX_PACKET_LENGTH) + break; + if (net_safe_read(net, net->buff, NET_HEADER_SIZE, alarmed)) + DBUG_RETURN(1); + limit-= NET_HEADER_SIZE; + old=remain= uint3korr(net->buff); + net->pkt_nr++; + } + DBUG_RETURN(0); +} +#endif /* NO_ALARM */ + + +/** + Try to parse and process proxy protocol header. + + This function is called in case MySQL packet header cannot be parsed. + It checks if proxy header was sent, and that it was send from allowed remote + host, as defined by proxy-protocol-networks parameter. + + If proxy header is parsed, then THD and ACL structures and changed to indicate + the new peer address and port. + + Note, that proxy header can only be sent either when the connection is established, + or as the client reply packet to +*/ +#undef IGNORE /* for Windows */ +typedef enum { RETRY, ABORT, IGNORE} handle_proxy_header_result; +static handle_proxy_header_result handle_proxy_header(NET *net) +{ +#if !defined(MYSQL_SERVER) || defined(EMBEDDED_LIBRARY) + return IGNORE; +#else + THD *thd= (THD *)net->thd; + + if (!has_proxy_protocol_header(net) || !thd || + thd->get_command() != COM_CONNECT) + return IGNORE; + + /* + Proxy information found in the first 4 bytes received so far. + Read and parse proxy header , change peer ip address and port in THD. + */ + proxy_peer_info peer_info; + + if (!thd->net.vio) + { + DBUG_ASSERT(0); + return ABORT; + } + + if (!is_proxy_protocol_allowed((sockaddr *)&(thd->net.vio->remote))) + { + /* proxy-protocol-networks variable needs to be set to allow this remote address */ + my_printf_error(ER_HOST_NOT_PRIVILEGED, "Proxy header is not accepted from %s", + MYF(0), thd->main_security_ctx.ip); + return ABORT; + } + + if (parse_proxy_protocol_header(net, &peer_info)) + { + /* Failed to parse proxy header*/ + my_printf_error(ER_UNKNOWN_ERROR, "Failed to parse proxy header", MYF(0)); + return ABORT; + } + + if (peer_info.is_local_command) + /* proxy header indicates LOCAL connection, no action necessary */ + return RETRY; + /* Change peer address in THD and ACL structures.*/ + uint host_errors; + return (handle_proxy_header_result)thd_set_peer_addr(thd, + &(peer_info.peer_addr), NULL, peer_info.port, + false, &host_errors); +#endif +} + +/** + Reads one packet to net->buff + net->where_b. + Long packets are handled by my_net_read(). + This function reallocates the net->buff buffer if necessary. + + @return + Returns length of packet. +*/ + +static ulong +my_real_read(NET *net, size_t *complen, + my_bool header __attribute__((unused))) +{ + uchar *pos; + size_t length; + uint i,retry_count=0; + ulong len=packet_error; + my_bool expect_error_packet __attribute__((unused))= 0; + thr_alarm_t alarmed; +#ifndef NO_ALARM + ALARM alarm_buff; +#endif + +retry: + + my_bool net_blocking=vio_is_blocking(net->vio); + uint32 remain= (net->compress ? NET_HEADER_SIZE+COMP_HEADER_SIZE : + NET_HEADER_SIZE); +#ifdef MYSQL_SERVER + size_t count= remain; + struct st_net_server *server_extension= 0; + + if (header) + { + server_extension= static_cast (net->extension); + if (server_extension != NULL) + { + void *user_data= server_extension->m_user_data; + server_extension->m_before_header(net, user_data, count); + } + } +#endif + + *complen = 0; + + net->reading_or_writing=1; + thr_alarm_init(&alarmed); +#ifndef NO_ALARM + if (net_blocking) + thr_alarm(&alarmed,net->read_timeout,&alarm_buff); +#else + /* Read timeout is set in my_net_set_read_timeout */ +#endif /* NO_ALARM */ + + pos = net->buff + net->where_b; /* net->packet -4 */ + for (i=0 ; i < 2 ; i++) + { + while (remain > 0) + { + /* First read is done with non blocking mode */ + if ((long) (length= vio_read(net->vio, pos, remain)) <= 0L) + { + my_bool interrupted = vio_should_retry(net->vio); + + DBUG_PRINT("info",("vio_read returned %ld errno: %d", + (long) length, vio_errno(net->vio))); + + if (i== 0 && unlikely(thd_net_is_killed((THD*) net->thd))) + { + DBUG_PRINT("info", ("thd is killed")); + len= packet_error; + net->error= 0; + net->last_errno= ER_CONNECTION_KILLED; + MYSQL_SERVER_my_error(net->last_errno, MYF(0)); + goto end; + } + +#if !defined(_WIN32) && defined(MYSQL_SERVER) + /* + We got an error that there was no data on the socket. We now set up + an alarm to not 'read forever', change the socket to the blocking + mode and try again + */ + if ((interrupted || length == 0) && !thr_alarm_in_use(&alarmed)) + { + if (!thr_alarm(&alarmed,net->read_timeout,&alarm_buff)) /* Don't wait too long */ + { + my_bool old_mode; + while (vio_blocking(net->vio, TRUE, &old_mode) < 0) + { + if (vio_should_retry(net->vio) && + retry_count++ < net->retry_count) + continue; + DBUG_PRINT("error", + ("fcntl returned error %d, aborting thread", + vio_errno(net->vio))); + EXTRA_DEBUG_fprintf(stderr, + "%s: read: fcntl returned error %d, aborting thread\n", + my_progname,vio_errno(net->vio)); + len= packet_error; + net->error= 2; /* Close socket */ + net->last_errno= ER_NET_FCNTL_ERROR; + MYSQL_SERVER_my_error(ER_NET_FCNTL_ERROR, MYF(0)); + goto end; + } + retry_count=0; + continue; + } + } +#endif /* (!defined(_WIN32) && defined(MYSQL_SERVER) */ + if (thr_alarm_in_use(&alarmed) && !thr_got_alarm(&alarmed) && + interrupted) + { /* Probably in MIT threads */ + if (retry_count++ < net->retry_count) + continue; + EXTRA_DEBUG_fprintf(stderr, "%s: read looped with error %d, aborting thread\n", + my_progname,vio_errno(net->vio)); + } +#ifndef MYSQL_SERVER + if (length != 0 && vio_errno(net->vio) == SOCKET_EINTR) + { + DBUG_PRINT("warning",("Interrupted read. Retrying...")); + continue; + } +#endif + DBUG_PRINT("error",("Couldn't read packet: remain: %u errno: %d length: %ld", + remain, vio_errno(net->vio), (long) length)); + len= packet_error; + net->error= 2; /* Close socket */ + net->last_errno= (vio_was_timeout(net->vio) ? + ER_NET_READ_INTERRUPTED : + ER_NET_READ_ERROR); + MYSQL_SERVER_my_error(net->last_errno, MYF(0)); + goto end; + } + remain -= (uint32) length; + pos+= length; + update_statistics(thd_increment_bytes_received(net->thd, length)); + } + +#ifdef DEBUG_DATA_PACKETS + DBUG_DUMP("data_read", net->buff+net->where_b, length); +#endif + if (i == 0) + { /* First parts is packet length */ + size_t helping; +#ifndef DEBUG_DATA_PACKETS + DBUG_DUMP("packet_header", net->buff+net->where_b, + NET_HEADER_SIZE); +#endif + if (net->buff[net->where_b + 3] != (uchar) net->pkt_nr) + { +#ifndef MYSQL_SERVER + if (net->buff[net->where_b + 3] == (uchar) (net->pkt_nr -1)) + { + /* + If the server was killed then the server may have missed the + last sent client packet and the packet numbering may be one off. + */ + DBUG_PRINT("warning", ("Found possible out of order packets")); + expect_error_packet= 1; + } + else +#endif + goto packets_out_of_order; + } + net->compress_pkt_nr= ++net->pkt_nr; +#ifdef HAVE_COMPRESS + if (net->compress) + { + /* + The following uint3korr() may read 4 bytes, so make sure we don't + read unallocated or uninitialized memory. The right-hand expression + must match the size of the buffer allocated in net_realloc(). + */ + DBUG_ASSERT(net->where_b + NET_HEADER_SIZE + sizeof(uint32) <= + net->max_packet + NET_HEADER_SIZE + COMP_HEADER_SIZE + 1); + /* + If the packet is compressed then complen > 0 and contains the + number of bytes in the uncompressed packet + */ + *complen=uint3korr(&(net->buff[net->where_b + NET_HEADER_SIZE])); + } +#endif + + len=uint3korr(net->buff+net->where_b); + if (!len) /* End of big multi-packet */ + goto end; + helping = MY_MAX(len,*complen) + net->where_b; + /* The necessary size of net->buff */ + if (helping >= net->max_packet) + { + if (net_realloc(net,helping)) + { +#if defined(MYSQL_SERVER) && !defined(NO_ALARM) + if (!net->compress && + !my_net_skip_rest(net, (uint32) len, &alarmed, &alarm_buff)) + net->error= 3; /* Successfully skiped packet */ +#endif + len= packet_error; /* Return error and close connection */ + goto end; + } + } + pos=net->buff + net->where_b; + remain = (uint32) len; +#ifdef MYSQL_SERVER + if (server_extension != NULL) + { + void *user_data= server_extension->m_user_data; + server_extension->m_after_header(net, user_data, count, 0); + server_extension= NULL; + } +#endif + } +#ifndef MYSQL_SERVER + else if (expect_error_packet) + { + /* + This check is safe both for compressed and not compressed protocol + as for the compressed protocol errors are not compressed anymore. + */ + if (net->buff[net->where_b] != (uchar) 255) + { + /* Restore pkt_nr to original value */ + net->pkt_nr--; + goto packets_out_of_order; + } + } +#endif + } + +end: + if (thr_alarm_in_use(&alarmed)) + { + my_bool old_mode; + thr_end_alarm(&alarmed); + if (!net_blocking) + vio_blocking(net->vio, net_blocking, &old_mode); + } + net->reading_or_writing=0; +#ifdef DEBUG_DATA_PACKETS + if (len != packet_error) + DBUG_DUMP("data_read", net->buff+net->where_b, len); +#endif +#ifdef MYSQL_SERVER + if (server_extension != NULL) + { + void *user_data= server_extension->m_user_data; + server_extension->m_after_header(net, user_data, count, 1); + DBUG_ASSERT(len == packet_error || len == 0); + } +#endif + return(len); + +packets_out_of_order: + { + switch (handle_proxy_header(net)) { + case ABORT: + /* error happened, message is already written. */ + len= packet_error; + goto end; + case RETRY: + goto retry; + case IGNORE: + break; + } + + DBUG_PRINT("error", + ("Packets out of order (Found: %d, expected %u)", + (int) net->buff[net->where_b + 3], + net->pkt_nr)); + EXTRA_DEBUG_ASSERT(0); + /* + We don't make noise server side, since the client is expected + to break the protocol for e.g. --send LOAD DATA .. LOCAL where + the server expects the client to send a file, but the client + may reply with a new command instead. + */ +#ifndef MYSQL_SERVER + EXTRA_DEBUG_fflush(stdout); + EXTRA_DEBUG_fprintf(stderr,"Error: Packets out of order (Found: %d, expected %d)\n", + (int) net->buff[net->where_b + 3], + (uint) (uchar) net->pkt_nr); + EXTRA_DEBUG_fflush(stderr); +#endif + len= packet_error; + MYSQL_SERVER_my_error(ER_NET_PACKETS_OUT_OF_ORDER, MYF(0)); + goto end; + } +} + + + +/* Old interface. See my_net_read_packet() for function description */ + +#undef my_net_read + +ulong my_net_read(NET *net) +{ + return my_net_read_packet(net, 0); +} + + +/** + Read a packet from the client/server and return it without the internal + package header. + + If the packet is the first packet of a multi-packet packet + (which is indicated by the length of the packet = 0xffffff) then + all sub packets are read and concatenated. + + If the packet was compressed, its uncompressed and the length of the + uncompressed packet is returned. + + read_from_server is set when the server is reading a new command + from the client. + + @return + The function returns the length of the found packet or packet_error. + net->read_pos points to the read data. +*/ +ulong +my_net_read_packet(NET *net, my_bool read_from_server) +{ + ulong reallen = 0; + return my_net_read_packet_reallen(net, read_from_server, &reallen); +} + + +ulong +my_net_read_packet_reallen(NET *net, my_bool read_from_server, ulong* reallen) +{ + size_t len, complen; + + MYSQL_NET_READ_START(); + + *reallen = 0; +#ifdef HAVE_COMPRESS + if (!net->compress) + { +#endif + len = my_real_read(net,&complen, read_from_server); + if (len == MAX_PACKET_LENGTH) + { + /* First packet of a multi-packet. Concatenate the packets */ + ulong save_pos = net->where_b; + size_t total_length= 0; + do + { + net->where_b += (ulong)len; + total_length += len; + len = my_real_read(net,&complen, 0); + } while (len == MAX_PACKET_LENGTH); + if (likely(len != packet_error)) + len+= total_length; + net->where_b = save_pos; + } + + net->read_pos = net->buff + net->where_b; + if (likely(len != packet_error)) + { + net->read_pos[len]=0; /* Safeguard for mysql_use_result */ + *reallen = (ulong)len; + } + MYSQL_NET_READ_DONE(0, len); + return (ulong)len; +#ifdef HAVE_COMPRESS + } + else + { + /* We are using the compressed protocol */ + + ulong buf_length; + ulong start_of_packet; + ulong first_packet_offset; + uint read_length, multi_byte_packet=0; + + if (net->remain_in_buf) + { + buf_length= net->buf_length; /* Data left in old packet */ + first_packet_offset= start_of_packet= (net->buf_length - + net->remain_in_buf); + /* Restore the character that was overwritten by the end 0 */ + net->buff[start_of_packet]= net->save_char; + } + else + { + /* reuse buffer, as there is nothing in it that we need */ + buf_length= start_of_packet= first_packet_offset= 0; + } + for (;;) + { + ulong packet_len; + + if (buf_length - start_of_packet >= NET_HEADER_SIZE) + { + read_length = uint3korr(net->buff+start_of_packet); + if (!read_length) + { + /* End of multi-byte packet */ + start_of_packet += NET_HEADER_SIZE; + break; + } + if (read_length + NET_HEADER_SIZE <= buf_length - start_of_packet) + { + if (multi_byte_packet) + { + /* Remove packet header for second packet */ + memmove(net->buff + first_packet_offset + start_of_packet, + net->buff + first_packet_offset + start_of_packet + + NET_HEADER_SIZE, + buf_length - start_of_packet); + start_of_packet += read_length; + buf_length -= NET_HEADER_SIZE; + } + else + start_of_packet+= read_length + NET_HEADER_SIZE; + + if (read_length != MAX_PACKET_LENGTH) /* last package */ + { + multi_byte_packet= 0; /* No last zero len packet */ + break; + } + multi_byte_packet= NET_HEADER_SIZE; + /* Move data down to read next data packet after current one */ + if (first_packet_offset) + { + memmove(net->buff,net->buff+first_packet_offset, + buf_length-first_packet_offset); + buf_length-=first_packet_offset; + start_of_packet -= first_packet_offset; + first_packet_offset=0; + } + continue; + } + } + /* Move data down to read next data packet after current one */ + if (first_packet_offset) + { + memmove(net->buff,net->buff+first_packet_offset, + buf_length-first_packet_offset); + buf_length-=first_packet_offset; + start_of_packet -= first_packet_offset; + first_packet_offset=0; + } + + net->where_b=buf_length; + if ((packet_len = my_real_read(net,&complen, read_from_server)) + == packet_error) + { + MYSQL_NET_READ_DONE(1, 0); + return packet_error; + } + read_from_server= 0; + if (my_uncompress(net->buff + net->where_b, packet_len, + &complen)) + { + net->error= 2; /* caller will close socket */ + net->last_errno= ER_NET_UNCOMPRESS_ERROR; + MYSQL_SERVER_my_error(ER_NET_UNCOMPRESS_ERROR, MYF(0)); + MYSQL_NET_READ_DONE(1, 0); + return packet_error; + } + buf_length+= (ulong)complen; + *reallen += packet_len; + } + + net->read_pos= net->buff+ first_packet_offset + NET_HEADER_SIZE; + net->buf_length= buf_length; + net->remain_in_buf= (ulong) (buf_length - start_of_packet); + len = ((ulong) (start_of_packet - first_packet_offset) - NET_HEADER_SIZE - + multi_byte_packet); + net->save_char= net->read_pos[len]; /* Must be saved */ + net->read_pos[len]=0; /* Safeguard for mysql_use_result */ + } +#endif /* HAVE_COMPRESS */ + MYSQL_NET_READ_DONE(0, len); + return (ulong)len; +} + + +void my_net_set_read_timeout(NET *net, uint timeout) +{ + DBUG_ENTER("my_net_set_read_timeout"); + DBUG_PRINT("enter", ("timeout: %d", timeout)); + if (net->read_timeout != timeout) + { + net->read_timeout= timeout; + if (net->vio) + vio_timeout(net->vio, 0, timeout); + } + DBUG_VOID_RETURN; +} + + +void my_net_set_write_timeout(NET *net, uint timeout) +{ + DBUG_ENTER("my_net_set_write_timeout"); + DBUG_PRINT("enter", ("timeout: %d", timeout)); + if (net->write_timeout != timeout) + { + net->write_timeout= timeout; + if (net->vio) + vio_timeout(net->vio, 1, timeout); + } + DBUG_VOID_RETURN; +} diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc new file mode 100644 index 00000000..1aec9e53 --- /dev/null +++ b/sql/opt_histogram_json.cc @@ -0,0 +1,1198 @@ +/* + Copyright (c) 2021, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_base.h" +#include "my_json_writer.h" +#include "sql_statistics.h" +#include "opt_histogram_json.h" + + +/* + @brief + Un-escape a JSON string and save it into *out. + + @detail + There's no way to tell how much space is needed for the output. + Start with a small string and increase its size until json_unescape() + succeeds. +*/ + +static bool json_unescape_to_string(const char *val, int val_len, String* out) +{ + // Make sure 'out' has some memory allocated. + if (!out->alloced_length() && out->alloc(128)) + return true; + + while (1) + { + uchar *buf= (uchar*)out->ptr(); + out->length(out->alloced_length()); + + int res= json_unescape(&my_charset_utf8mb4_bin, + (const uchar*)val, + (const uchar*)val + val_len, + out->charset(), + buf, buf + out->length()); + if (res >= 0) + { + out->length(res); + return false; // Ok + } + + // We get here if the unescaped string didn't fit into memory. + if (out->alloc(out->alloced_length()*2)) + return true; + } +} + + +/* + @brief + Escape a JSON string and save it into *out. + + @detail + There's no way to tell how much space is needed for the output. + Start with a small string and increase its size until json_escape() + succeeds. +*/ + +static int json_escape_to_string(const String *str, String* out) +{ + // Make sure 'out' has some memory allocated. + if (!out->alloced_length() && out->alloc(128)) + return JSON_ERROR_OUT_OF_SPACE; + + while (1) + { + uchar *buf= (uchar*)out->ptr(); + out->length(out->alloced_length()); + const uchar *str_ptr= (const uchar*)str->ptr(); + + int res= json_escape(str->charset(), + str_ptr, + str_ptr + str->length(), + &my_charset_utf8mb4_bin, + buf, buf + out->length()); + if (res >= 0) + { + out->length(res); + return 0; // Ok + } + + if (res != JSON_ERROR_OUT_OF_SPACE) + return res; // Some conversion error + + // Out of space error. Try with a bigger buffer + if (out->alloc(out->alloced_length()*2)) + return JSON_ERROR_OUT_OF_SPACE; + } +} + + +class Histogram_json_builder : public Histogram_builder +{ + Histogram_json_hb *histogram; + /* Number of buckets in the histogram */ + uint hist_width; + + /* + Number of rows that we intend to have in the bucket. That is, this is + + n_rows_in_table / hist_width + + Actual number of rows in the buckets we produce may vary because of + "popular values" and rounding. + */ + longlong bucket_capacity; + + /* Number of the buckets already collected */ + uint n_buckets_collected; + + /* + TRUE means do not try to represent values as UTF-8 text in histogram + storage. Use start_hex/end_hex for all values. + */ + bool force_binary; + + /* Data about the bucket we are filling now */ + struct CurBucket + { + /* Number of values in the bucket so far. */ + longlong size; + + /* Number of distinct values in the bucket */ + int ndv; + }; + CurBucket bucket; + + /* Used to create the JSON representation of the histogram. */ + Json_writer writer; + +public: + + Histogram_json_builder(Histogram_json_hb *hist, Field *col, uint col_len, + ha_rows rows) + : Histogram_builder(col, col_len, rows), histogram(hist) + { + /* + When computing number of rows in the bucket, round it UP. This way, we + will not end up with a histogram that has more buckets than intended. + + We may end up producing a histogram with fewer buckets than intended, but + this is considered tolerable. + */ + bucket_capacity= (longlong)round(rows2double(records) / histogram->get_width() + 0.5); + if (bucket_capacity == 0) + bucket_capacity= 1; + hist_width= histogram->get_width(); + n_buckets_collected= 0; + bucket.ndv= 0; + bucket.size= 0; + force_binary= (col->type() == MYSQL_TYPE_BIT); + + writer.start_object(); + append_histogram_params(); + + writer.add_member(Histogram_json_hb::JSON_NAME).start_array(); + } + + ~Histogram_json_builder() override = default; + +private: + bool bucket_is_empty() { return bucket.ndv == 0; } + + void append_histogram_params() + { + char buf[128]; + String str(buf, sizeof(buf), system_charset_info); + THD *thd= current_thd; + timeval tv= {thd->query_start(), 0}; // we do not need microseconds + + Timestamp(tv).to_datetime(thd).to_string(&str, 0); + writer.add_member("target_histogram_size").add_ull(hist_width); + writer.add_member("collected_at").add_str(str.ptr()); + writer.add_member("collected_by").add_str(server_version); + } + /* + Flush the current bucket out (to JSON output), and set it to be empty. + */ + void finalize_bucket() + { + double fract= (double) bucket.size / records; + writer.add_member("size").add_double(fract); + writer.add_member("ndv").add_ll(bucket.ndv); + writer.end_object(); + n_buckets_collected++; + + bucket.ndv= 0; + bucket.size= 0; + } + + /* + Same as finalize_bucket() but also provide the bucket's end value. + */ + bool finalize_bucket_with_end_value(void *elem) + { + if (append_column_value(elem, false)) + return true; + finalize_bucket(); + return false; + } + + /* + Write the first value group to the bucket. + @param elem The value we are writing + @param cnt The number of such values. + */ + bool start_bucket(void *elem, longlong cnt) + { + DBUG_ASSERT(bucket.size == 0); + writer.start_object(); + if (append_column_value(elem, true)) + return true; + + bucket.ndv= 1; + bucket.size= cnt; + return false; + } + + /* + Append the passed value into the JSON writer as string value + */ + bool append_column_value(void *elem, bool is_start) + { + StringBuffer val; + + // Get the text representation of the value + column->store_field_value((uchar*) elem, col_length); + String *str= column->val_str(&val); + + // Escape the value for JSON + StringBuffer escaped_val; + int rc= JSON_ERROR_ILLEGAL_SYMBOL; + if (!force_binary) + { + rc= json_escape_to_string(str, &escaped_val); + if (!rc) + { + writer.add_member(is_start? "start": "end"); + writer.add_str(escaped_val.c_ptr_safe()); + return false; + } + } + if (rc == JSON_ERROR_ILLEGAL_SYMBOL) + { + escaped_val.set_hex(val.ptr(), val.length()); + writer.add_member(is_start? "start_hex": "end_hex"); + writer.add_str(escaped_val.c_ptr_safe()); + return false; + } + return true; + } + + /* + Append a value group of cnt values. + */ + void append_to_bucket(longlong cnt) + { + bucket.ndv++; + bucket.size += cnt; + } + +public: + /* + @brief + Add data to the histogram. + + @detail + The call signals to add a "value group" of elem_cnt rows, each of which + has the same value that is provided in *elem. + + Subsequent next() calls will add values that are greater than the + current one. + + @return + 0 - OK + */ + int next(void *elem, element_count elem_cnt) override + { + counters.next(elem, elem_cnt); + ulonglong count= counters.get_count(); + + /* + Ok, we've got a "value group" of elem_cnt identical values. + + If we take the values from the value group and put them into + the current bucket, how many values will be left after we've + filled the bucket? + */ + longlong overflow= bucket.size + elem_cnt - bucket_capacity; + + /* + Case #1: This value group should be put into a separate bucket, if + A. It fills the current bucket and also fills the next bucket, OR + B. It fills the current bucket, which was empty. + */ + if (overflow >= bucket_capacity || (bucket_is_empty() && overflow >= 0)) + { + // Finalize the current bucket + if (!bucket_is_empty()) + finalize_bucket(); + + // Start/end the separate bucket for this value group. + if (start_bucket(elem, elem_cnt)) + return 1; // OOM + + if (records == count) + { + if (finalize_bucket_with_end_value(elem)) + return 1; + } + else + finalize_bucket(); + } + else if (overflow >= 0) + { + /* + Case #2: is when Case#1 doesn't hold, but we can still fill the + current bucket. + */ + + // If the bucket was empty, it would have been case #1. + DBUG_ASSERT(!bucket_is_empty()); + + /* + Finalize the current bucket. Put there enough values to make it hold + bucket_capacity values. + */ + append_to_bucket(bucket_capacity - bucket.size); + if (records == count && !overflow) + { + if (finalize_bucket_with_end_value(elem)) + return 1; + } + else + finalize_bucket(); + + if (overflow > 0) + { + // Then, start the new bucket with the remaining values. + if (start_bucket(elem, overflow)) + return 1; + } + } + else + { + // Case #3: there's not enough values to fill the current bucket. + if (bucket_is_empty()) + { + if (start_bucket(elem, elem_cnt)) + return 1; + } + else + append_to_bucket(elem_cnt); + } + + if (records == count) + { + // This is the final value group. + if (!bucket_is_empty()) + { + if (finalize_bucket_with_end_value(elem)) + return 1; + } + } + return 0; + } + + /* + @brief + Finalize the creation of histogram + */ + void finalize() override + { + writer.end_array(); + writer.end_object(); + Binary_string *json_string= (Binary_string *) writer.output.get_string(); + histogram->set_json_text(n_buckets_collected, + json_string->c_ptr(), + (size_t)json_string->length()); + } +}; + + +Histogram_builder *Histogram_json_hb::create_builder(Field *col, uint col_len, + ha_rows rows) +{ + return new Histogram_json_builder(this, col, col_len, rows); +} + + +void Histogram_json_hb::init_for_collection(MEM_ROOT *mem_root, + Histogram_type htype_arg, + ulonglong size_arg) +{ + DBUG_ASSERT(htype_arg == JSON_HB); + size= (size_t)size_arg; +} + + +/* + A syntax sugar interface to json_string_t +*/ +class Json_string +{ + json_string_t str; +public: + explicit Json_string(const char *name) + { + json_string_set_str(&str, (const uchar*)name, + (const uchar*)name + strlen(name)); + json_string_set_cs(&str, system_charset_info); + } + json_string_t *get() { return &str; } +}; + + +/* + This [partially] saves the JSON parser state and then can rollback the parser + to it. + + The goal of this is to be able to make multiple json_key_matches() calls: + + Json_saved_parser_state save(je); + if (json_key_matches(je, KEY_NAME_1)) { + ... + return; + } + save.restore_to(je); + if (json_key_matches(je, KEY_NAME_2)) { + ... + } + + This allows one to parse JSON objects where [optional] members come in any + order. +*/ + +class Json_saved_parser_state +{ + const uchar *c_str; + my_wc_t c_next; + int state; +public: + explicit Json_saved_parser_state(const json_engine_t *je) : + c_str(je->s.c_str), + c_next(je->s.c_next), + state(je->state) + {} + void restore_to(json_engine_t *je) + { + je->s.c_str= c_str; + je->s.c_next= c_next; + je->state= state; + } +}; + + +/* + @brief + Read a constant from JSON document and save it in *out. + + @detail + The JSON document stores constant in text form, we need to save it in + KeyTupleFormat. String constants in JSON may be escaped. +*/ + +bool read_bucket_endpoint(json_engine_t *je, Field *field, String *out, + const char **err) +{ + if (json_read_value(je)) + return true; + + if (je->value_type != JSON_VALUE_STRING && + je->value_type != JSON_VALUE_NUMBER) + { + *err= "String or number expected"; + return true; + } + + const char* je_value= (const char*)je->value; + if (je->value_type == JSON_VALUE_STRING && je->value_escaped) + { + StringBuffer<128> unescape_buf; + if (json_unescape_to_string(je_value, je->value_len, &unescape_buf)) + { + *err= "Un-escape error"; + return true; + } + field->store_text(unescape_buf.ptr(), unescape_buf.length(), + unescape_buf.charset()); + } + else + field->store_text(je_value, je->value_len, &my_charset_utf8mb4_bin); + + out->alloc(field->pack_length()); + uint bytes= field->get_key_image((uchar*)out->ptr(), + field->key_length(), Field::itRAW); + out->length(bytes); + return false; +} + + +bool read_hex_bucket_endpoint(json_engine_t *je, Field *field, String *out, + const char **err) +{ + if (json_read_value(je)) + return true; + + if (je->value_type != JSON_VALUE_STRING || je->value_escaped || + (je->value_len & 1)) + { + *err= "Expected a hex string"; + return true; + } + StringBuffer<128> buf; + + for (auto pc= je->value; pc < je->value + je->value_len; pc+=2) + { + int hex_char1= hexchar_to_int(pc[0]); + int hex_char2= hexchar_to_int(pc[1]); + if (hex_char1 == -1 || hex_char2 == -1) + { + *err= "Expected a hex string"; + return true; + } + buf.append((hex_char1 << 4) | hex_char2); + } + + field->store_text(buf.ptr(), buf.length(), field->charset()); + out->alloc(field->pack_length()); + uint bytes= field->get_key_image((uchar*)out->ptr(), + field->key_length(), Field::itRAW); + out->length(bytes); + return false; +} + + +/* + @brief Parse a JSON reprsentation for one histogram bucket + + @param je The JSON parser object + @param field Table field we are using histogram (used to convert + endpoints from text representation to binary) + @param total_size INOUT Fraction of the table rows in the buckets parsed so + far. + @param assigned_last_end OUT TRUE<=> The bucket had "end" members, the + function has saved it in + this->last_bucket_end_endp + @param err OUT If function returns 1, this *may* be set to point to text + describing the error. + + @detail + + Parse a JSON object in this form: + + { "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"} + + Unknown members are ignored. + + @return + 0 OK + 1 Parse Error + -1 EOF +*/ +int Histogram_json_hb::parse_bucket(json_engine_t *je, Field *field, + double *total_size, + bool *assigned_last_end, + const char **err) +{ + *assigned_last_end= false; + if (json_scan_next(je)) + return 1; + if (je->state != JST_VALUE) + { + if (je->state == JST_ARRAY_END) + return -1; // EOF + else + return 1; // An error + } + + if (json_scan_next(je) || je->state != JST_OBJ_START) + { + *err= "Expected an object in the buckets array"; + return 1; + } + + bool have_start= false; + bool have_size= false; + bool have_ndv= false; + + double size_d; + longlong ndv_ll= 0; + StringBuffer<128> value_buf; + int rc; + + while (!(rc= json_scan_next(je)) && je->state != JST_OBJ_END) + { + Json_saved_parser_state save1(je); + Json_string start_str("start"); + if (json_key_matches(je, start_str.get())) + { + if (read_bucket_endpoint(je, field, &value_buf, err)) + return 1; + + have_start= true; + continue; + } + save1.restore_to(je); + + Json_string size_str("size"); + if (json_key_matches(je, size_str.get())) + { + if (json_read_value(je)) + return 1; + + const char *size= (const char*)je->value_begin; + char *size_end= (char*)je->value_end; + int conv_err; + size_d= my_strtod(size, &size_end, &conv_err); + if (conv_err) + { + *err= ".size member must be a floating-point value"; + return 1; + } + have_size= true; + continue; + } + save1.restore_to(je); + + Json_string ndv_str("ndv"); + if (json_key_matches(je, ndv_str.get())) + { + if (json_read_value(je)) + return 1; + + const char *ndv= (const char*)je->value_begin; + char *ndv_end= (char*)je->value_end; + int conv_err; + ndv_ll= my_strtoll10(ndv, &ndv_end, &conv_err); + if (conv_err) + { + *err= ".ndv member must be an integer value"; + return 1; + } + have_ndv= true; + continue; + } + save1.restore_to(je); + + Json_string end_str("end"); + if (json_key_matches(je, end_str.get())) + { + if (read_bucket_endpoint(je, field, &value_buf, err)) + return 1; + last_bucket_end_endp.assign(value_buf.ptr(), value_buf.length()); + *assigned_last_end= true; + continue; + } + save1.restore_to(je); + + // Less common endoints: + Json_string start_hex_str("start_hex"); + if (json_key_matches(je, start_hex_str.get())) + { + if (read_hex_bucket_endpoint(je, field, &value_buf, err)) + return 1; + + have_start= true; + continue; + } + save1.restore_to(je); + + Json_string end_hex_str("end_hex"); + if (json_key_matches(je, end_hex_str.get())) + { + if (read_hex_bucket_endpoint(je, field, &value_buf, err)) + return 1; + last_bucket_end_endp.assign(value_buf.ptr(), value_buf.length()); + *assigned_last_end= true; + continue; + } + save1.restore_to(je); + + + // Some unknown member. Skip it. + if (json_skip_key(je)) + return 1; + } + + if (rc) + return 1; + + if (!have_start) + { + *err= "\"start\" element not present"; + return 1; + } + if (!have_size) + { + *err= "\"size\" element not present"; + return 1; + } + if (!have_ndv) + { + *err= "\"ndv\" element not present"; + return 1; + } + + *total_size += size_d; + + buckets.push_back({std::string(value_buf.ptr(), value_buf.length()), + *total_size, ndv_ll}); + + return 0; // Ok, continue reading +} + + +/* + @brief + Parse the histogram from its on-disk JSON representation + + @detail + See opt_histogram_json.h, class Histogram_json_hb for description of the + data format. + + @return + false OK + True Error +*/ + +bool Histogram_json_hb::parse(MEM_ROOT *mem_root, const char *db_name, + const char *table_name, Field *field, + const char *hist_data, size_t hist_data_len) +{ + json_engine_t je; + int rc; + const char *err= "JSON parse error"; + double total_size; + int end_element; + bool end_assigned; + DBUG_ENTER("Histogram_json_hb::parse"); + + json_scan_start(&je, &my_charset_utf8mb4_bin, + (const uchar*)hist_data, + (const uchar*)hist_data+hist_data_len); + + if (json_scan_next(&je)) + goto err; + + if (je.state != JST_OBJ_START) + { + err= "Root JSON element must be a JSON object"; + goto err; + } + + while (1) + { + if (json_scan_next(&je)) + goto err; + if (je.state == JST_OBJ_END) + break; // End of object + + if (je.state != JST_KEY) + goto err; // Can' really have this: JSON object has keys in it + + Json_string hist_key_name(JSON_NAME); + if (json_key_matches(&je, hist_key_name.get())) + { + total_size= 0.0; + end_element= -1; + if (json_scan_next(&je)) + goto err; + + if (je.state != JST_ARRAY_START) + { + err= "histogram_hb must contain an array"; + goto err; + } + + while (!(rc= parse_bucket(&je, field, &total_size, &end_assigned, &err))) + { + if (end_assigned && end_element != -1) + end_element= (int)buckets.size(); + } + if (rc > 0) // Got error other than EOF + goto err; + } + else + { + // Some unknown member. Skip it. + if (json_skip_key(&je)) + return 1; + } + } + + if (buckets.size() < 1) + { + err= "Histogram must have at least one bucket"; + goto err; + } + + if (end_element == -1) + { + buckets.back().start_value= last_bucket_end_endp; + } + else if (end_element < (int)buckets.size()) + { + err= ".end is only allowed in the last bucket"; + goto err; + } + + DBUG_RETURN(false); // Ok +err: + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_JSON_HISTOGRAM_PARSE_FAILED, + ER_THD(thd, ER_JSON_HISTOGRAM_PARSE_FAILED), + db_name, table_name, + err, (je.s.c_str - (const uchar*)hist_data)); + sql_print_error(ER_THD(thd, ER_JSON_HISTOGRAM_PARSE_FAILED), + db_name, table_name, err, + (je.s.c_str - (const uchar*)hist_data)); + + DBUG_RETURN(true); +} + + +static +void store_key_image_to_rec_no_null(Field *field, const char *ptr, size_t len) +{ + MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table, + &field->table->write_set); + field->set_key_image((const uchar*)ptr, (uint)len); + dbug_tmp_restore_column_map(&field->table->write_set, old_map); +} + + +static +double position_in_interval(Field *field, const uchar *key, uint key_len, + const std::string& left, const std::string& right) +{ + double res; + if (field->pos_through_val_str()) + { + StringBuffer<64> buf1, buf2, buf3; + + store_key_image_to_rec_no_null(field, left.data(), left.size()); + String *min_str= field->val_str(&buf1); + /* + Make sure we've saved a copy of the data, not a pointer into the + field->ptr. We will overwrite the contents of field->ptr with the next + store_key_image_to_rec_no_null call + */ + if (&buf1 != min_str) + buf1.copy(*min_str); + else + buf1.copy(); + + store_key_image_to_rec_no_null(field, right.data(), right.size()); + String *max_str= field->val_str(&buf2); + /* Same as above */ + if (&buf2 != max_str) + buf2.copy(*max_str); + else + buf2.copy(); + + store_key_image_to_rec_no_null(field, (const char*)key, key_len); + String *midp_str= field->val_str(&buf3); + + res= pos_in_interval_for_string(field->charset(), + (const uchar*)midp_str->ptr(), midp_str->length(), + (const uchar*)buf1.ptr(), buf1.length(), + (const uchar*)buf2.ptr(), buf2.length()); + } + else + { + store_key_image_to_rec_no_null(field, left.data(), field->key_length()); + double min_val_real= field->val_real(); + + store_key_image_to_rec_no_null(field, right.data(), field->key_length()); + double max_val_real= field->val_real(); + + store_key_image_to_rec_no_null(field, (const char*)key, field->key_length()); + double midp_val_real= field->val_real(); + + res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real); + } + return res; +} + + +double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint, + double avg_sel) +{ + const uchar *key = endpoint->key; + if (field->real_maybe_null()) + key++; + + // If the value is outside of the histogram's range, this will "clip" it to + // first or last bucket. + int endp_cmp; + int idx= find_bucket(field, key, &endp_cmp); + + double sel; + + if (buckets[idx].ndv == 1 && (endp_cmp!=0)) + { + /* + The bucket has a single value and it doesn't match! Return a very + small value. + */ + sel= 0.0; + } + else + { + /* + We get here when: + * The bucket has one value and this is the value we are looking for. + * The bucket has multiple values. Then, assume + */ + sel= (buckets[idx].cum_fract - get_left_fract(idx)) / buckets[idx].ndv; + } + return sel; +} + + +double Histogram_json_hb::get_left_fract(int idx) +{ + if (!idx) + return 0.0; + else + return buckets[idx-1].cum_fract; +} + +std::string& Histogram_json_hb::get_end_value(int idx) +{ + if (idx == (int)buckets.size()-1) + return last_bucket_end_endp; + else + return buckets[idx+1].start_value; +} + +/* + @param field The table field histogram is for. We don't care about the + field's current value, we only need its virtual functions to + perform various operations + @param min_endp Left endpoint, or NULL if there is none + @param max_endp Right endpoint, or NULL if there is none + @param avg_sel Average selectivity of "field=const" equality for this field + + @return + Range selectivity: a number between 0.0 and 1.0. + + @note + This may return 0.0. Adjustments to avoid multiply-by-zero meltdown are + made elsewhere. +*/ + +double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, + key_range *max_endp, double avg_sel) +{ + double min, max; + + if (min_endp && !(field->real_maybe_null() && min_endp->key[0])) + { + bool exclusive_endp= (min_endp->flag == HA_READ_AFTER_KEY)? true: false; + const uchar *min_key= min_endp->key; + uint min_key_len= min_endp->length; + if (field->real_maybe_null()) + { + min_key++; + min_key_len--; + } + + // Find the leftmost bucket that contains the lookup value. + // (If the lookup value is to the left of all buckets, find bucket #0) + int endp_cmp; + int idx= find_bucket(field, min_key, &endp_cmp); + + double sel; + // Special handling for buckets with ndv=1: + if (buckets[idx].ndv == 1) + { + if (endp_cmp < 0) + sel= 0.0; + else if (endp_cmp > 0) + sel= 1.0; + else // endp_cmp == 0.0 + sel= (exclusive_endp)? 1.0 : 0.0; + } + else + { + sel= position_in_interval(field, min_key, min_key_len, + buckets[idx].start_value, + get_end_value(idx)); + } + double left_fract= get_left_fract(idx); + min= left_fract + sel * (buckets[idx].cum_fract - left_fract); + } + else + min= 0.0; + + if (max_endp) + { + // The right endpoint cannot be NULL + DBUG_ASSERT(!(field->real_maybe_null() && max_endp->key[0])); + bool inclusive_endp= (max_endp->flag == HA_READ_AFTER_KEY)? true: false; + const uchar *max_key= max_endp->key; + uint max_key_len= max_endp->length; + if (field->real_maybe_null()) + { + max_key++; + max_key_len--; + } + int endp_cmp; + int idx= find_bucket(field, max_key, &endp_cmp); + + if ((endp_cmp == 0) && !inclusive_endp) + { + /* + The range is "col < $CONST" and we've found a bucket starting with + $CONST. + */ + if (idx > 0) + { + // Move to the previous bucket + endp_cmp= 1; + idx--; + } + else + endp_cmp= -1; + } + double sel; + + // Special handling for buckets with ndv=1: + if (buckets[idx].ndv == 1) + { + if (endp_cmp < 0) + sel= 0.0; + else if (endp_cmp > 0) + sel= 1.0; + else // endp_cmp == 0.0 + sel= inclusive_endp? 1.0 : 0.0; + } + else + { + sel= position_in_interval(field, max_key, max_key_len, + buckets[idx].start_value, + get_end_value(idx)); + } + double left_fract= get_left_fract(idx); + max= left_fract + sel * (buckets[idx].cum_fract - left_fract); + } + else + max= 1.0; + + if (min > max) + { + /* + This can happen due to rounding errors. + + What is the acceptable error size? Json_writer::add_double() uses + %.11lg format. This gives 9 digits after the dot. A histogram may have + hundreds of buckets, let's multiply the error by 1000. 9-3=6 + */ + DBUG_ASSERT(max < min + 1e-6); + max= min; + } + return max - min; +} + + +void Histogram_json_hb::serialize(Field *field) +{ + field->store(json_text.data(), json_text.size(), &my_charset_bin); +} + + +#ifndef DBUG_OFF +static int SGN(int x) +{ + if (!x) + return 0; + return (x < 0)? -1 : 1; +} +#endif + + +/* + @brief + Find the leftmost histogram bucket such that "lookup_val >= start_value". + + @param field Field object (used to do value comparisons) + @param lookup_val The lookup value in KeyTupleFormat. + @param cmp OUT How the lookup_val compares to found_bucket.left_bound: + 0 - lookup_val == bucket.left_bound + >0 - lookup_val > bucket.left_bound (the most typical) + <0 - lookup_val < bucket.left_bound. This can only happen + for the first bucket, for all other buckets we would just + pick the previous bucket and have cmp>=0. + @return + The bucket index +*/ + +int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val, + int *cmp) +{ + int res; + int low= 0; + int high= (int)buckets.size() - 1; + *cmp= 1; // By default, (bucket[retval].start_value < *lookup_val) + + while (low + 1 < high) + { + int middle= (low + high) / 2; + res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val); + if (!res) + { + *cmp= res; + low= middle; + goto end; + } + else if (res < 0) + low= middle; + else //res > 0 + high= middle; + } + + /* + If low and high were assigned a value in the above loop and we got here, + then the following holds: + + bucket[low].start_value < lookup_val < bucket[high].start_value + + Besides that, there are two special cases: low=0 and high=last_bucket. + Handle them below. + */ + if (low == 0) + { + res= field->key_cmp(lookup_val, (uchar*)buckets[0].start_value.data()); + if (res <= 0) + *cmp= res; + else // res>0, lookup_val > buckets[0].start_value + { + res= field->key_cmp(lookup_val, (uchar*)buckets[high].start_value.data()); + if (res >= 0) // lookup_val >= buckets[high].start_value + { + // Move to that bucket + low= high; + *cmp= res; + } + else + *cmp= 1; + } + } + else if (high == (int)buckets.size() - 1) + { + res= field->key_cmp(lookup_val, (uchar*)buckets[high].start_value.data()); + if (res >= 0) + { + // Ok the value is in the last bucket. + *cmp= res; + low= high; + } + else + { + // The value is in the 'low' bucket. + res= field->key_cmp(lookup_val, (uchar*)buckets[low].start_value.data()); + *cmp= res; + } + } + +end: + // Verification: *cmp has correct value + DBUG_ASSERT(SGN(*cmp) == + SGN(field->key_cmp(lookup_val, + (uchar*)buckets[low].start_value.data()))); + // buckets[low] <= lookup_val, with one exception of the first bucket. + DBUG_ASSERT(low == 0 || + field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0); + // buckets[low+1] > lookup_val, with one exception of the last bucket + DBUG_ASSERT(low == (int)buckets.size()-1 || + field->key_cmp((uchar*)buckets[low+1].start_value.data(), lookup_val)> 0); + return low; +} diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h new file mode 100644 index 00000000..24846792 --- /dev/null +++ b/sql/opt_histogram_json.h @@ -0,0 +1,147 @@ +/* + Copyright (c) 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_statistics.h" + +/* + An equi-height histogram which stores real values for bucket bounds. + + Handles @@histogram_type=JSON_HB + + Histogram format in JSON: + + { + // The next three are saved but not currently analyzed: + "target_histogram_size": nnn, + "collected_at": "(date and time)", + "collected_by": "(server version)", + + "histogram_hb": [ + { "start": "value", "size":nnn.nn, "ndv": nnn }, + ... + + // Optionally, start and/or end can be replaced with _hex variant + { "start_hex: "value", "size":nnn.nn, "ndv":nnn}, + + ... + { "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"}, + ] + } + + Histogram is a JSON object. It has some global properties and "histogram_hb" + member whose value is a JSON array of histogram buckets. + + Each bucket is an object with these members: + "start" - the first value in the bucket. + "size" - fraction of table rows that is contained in the bucket. + "ndv" - Number of Distinct Values in the bucket. + "end" - Optionally, the last value in the bucket. + + A bucket is a single-point bucket if it has ndv=1. + + Most buckets have no "end" member: the bucket is assumed to contain all + values up to the "start" of the next bucket. + + The exception is single-point buckets where last value is the same as the + first value. + + start/end can be replaced with start_hex/end_hex. In _hex variant, the + constant is encoded in hex. This encoding is used to handle so called + "unassigned characters": some non-UTF8 charsets have byte combinations that + are not mapped to any UTF8 character. +*/ + +class Histogram_json_hb final : public Histogram_base +{ + size_t size; /* Number of elements in the histogram */ + + /* Collection-time only: collected histogram in the JSON form. */ + std::string json_text; + + struct Bucket + { + // The left endpoint in KeyTupleFormat. The endpoint is inclusive, this + // value is in this bucket. + std::string start_value; + + // Cumulative fraction: The fraction of table rows that fall into this + // and preceding buckets. + double cum_fract; + + // Number of distinct values in the bucket. + longlong ndv; + }; + + std::vector buckets; + + std::string last_bucket_end_endp; + +public: + static constexpr const char* JSON_NAME="histogram_hb"; + + bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name, + Field *field, const char *hist_data, + size_t hist_data_len) override; + + void serialize(Field *field) override; + + Histogram_builder *create_builder(Field *col, uint col_len, + ha_rows rows) override; + + // returns number of buckets in the histogram + uint get_width() override + { + return (uint)size; + } + + Histogram_type get_type() override + { + return JSON_HB; + } + + /* + @brief + This used to be the size of the histogram on disk, which was redundant + (one can check the size directly). Return the number of buckets instead. + */ + uint get_size() override + { + return (uint)size; + } + void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, + ulonglong size) override; + + double point_selectivity(Field *field, key_range *endpoint, + double avg_sel) override; + double range_selectivity(Field *field, key_range *min_endp, + key_range *max_endp, double avg_sel) override; + + void set_json_text(ulonglong sz, const char *json_text_arg, + size_t json_text_len) + { + size= (size_t) sz; + json_text.assign(json_text_arg, json_text_len); + } + +private: + int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size, + bool *assigned_last_end, const char **err); + + double get_left_fract(int idx); + std::string& get_end_value(int idx); + int find_bucket(const Field *field, const uchar *lookup_val, int *cmp); +}; + diff --git a/sql/opt_index_cond_pushdown.cc b/sql/opt_index_cond_pushdown.cc new file mode 100644 index 00000000..6a24fa95 --- /dev/null +++ b/sql/opt_index_cond_pushdown.cc @@ -0,0 +1,442 @@ +/* + Copyright (c) 2009, 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_select.h" +#include "sql_test.h" + +/**************************************************************************** + * Index Condition Pushdown code starts + ***************************************************************************/ +/* + Check if given expression uses only table fields covered by the given index + + SYNOPSIS + uses_index_fields_only() + item Expression to check + tbl The table having the index + keyno The index number + other_tbls_ok TRUE <=> Fields of other non-const tables are allowed + + DESCRIPTION + Check if given expression only uses fields covered by index #keyno in the + table tbl. The expression can use any fields in any other tables. + + The expression is guaranteed not to be AND or OR - those constructs are + handled outside of this function. + + RETURN + TRUE Yes + FALSE No +*/ + +bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno, + bool other_tbls_ok) +{ + if (item->walk(&Item::limit_index_condition_pushdown_processor, FALSE, NULL)) + { + return FALSE; + } + + if (item->const_item()) + return TRUE; + + /* + Don't push down the triggered conditions. Nested outer joins execution + code may need to evaluate a condition several times (both triggered and + untriggered), and there is no way to put thi + TODO: Consider cloning the triggered condition and using the copies for: + 1. push the first copy down, to have most restrictive index condition + possible + 2. Put the second copy into tab->select_cond. + */ + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC) + return FALSE; + + if (!(item->used_tables() & tbl->map)) + return other_tbls_ok; + + Item::Type item_type= item->type(); + switch (item_type) { + case Item::FUNC_ITEM: + { + /* This is a function, apply condition recursively to arguments */ + Item_func *item_func= (Item_func*)item; + Item **child; + Item **item_end= (item_func->arguments()) + item_func->argument_count(); + for (child= item_func->arguments(); child != item_end; child++) + { + if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok)) + return FALSE; + } + return TRUE; + } + case Item::COND_ITEM: + { + /* + This is a AND/OR condition. Regular AND/OR clauses are handled by + make_cond_for_index() which will chop off the part that can be + checked with index. This code is for handling non-top-level AND/ORs, + e.g. func(x AND y). + */ + List_iterator li(*((Item_cond*)item)->argument_list()); + Item *item; + while ((item=li++)) + { + if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok)) + return FALSE; + } + return TRUE; + } + case Item::FIELD_ITEM: + { + Item_field *item_field= (Item_field*)item; + Field *field= item_field->field; + if (field->table != tbl) + return TRUE; + /* + The below is probably a repetition - the first part checks the + other two, but let's play it safe: + */ + if(!field->part_of_key.is_set(keyno) || + field->type() == MYSQL_TYPE_GEOMETRY || + field->type() == MYSQL_TYPE_BLOB) + return FALSE; + KEY *key_info= tbl->key_info + keyno; + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *key_part_end= key_part + key_info->user_defined_key_parts; + for ( ; key_part < key_part_end; key_part++) + { + if (field->eq(key_part->field)) + return !(key_part->key_part_flag & HA_PART_KEY_SEG); + } + if ((tbl->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + tbl->s->primary_key != MAX_KEY && + tbl->s->primary_key != keyno) + { + key_info= tbl->key_info + tbl->s->primary_key; + key_part= key_info->key_part; + key_part_end= key_part + key_info->user_defined_key_parts; + for ( ; key_part < key_part_end; key_part++) + { + /* + It does not make sense to use the fact that the engine can read in + a full field if the key if the index is built only over a part + of this field. + */ + if (field->eq(key_part->field)) + return !(key_part->key_part_flag & HA_PART_KEY_SEG); + } + } + return FALSE; + } + case Item::REF_ITEM: + return uses_index_fields_only(item->real_item(), tbl, keyno, + other_tbls_ok); + default: + return FALSE; /* Play it safe, don't push unknown non-const items */ + } +} + + +/* + Get a part of the condition that can be checked using only index fields + + SYNOPSIS + make_cond_for_index() + cond The source condition + table The table that is partially available + keyno The index in the above table. Only fields covered by the + index are available + other_tbls_ok TRUE <=> Fields of other non-const tables are allowed + + DESCRIPTION + Get a part of the condition that can be checked when for the given table + we have values only of fields covered by some index. The condition may + refer to other tables, it is assumed that we have values of all of their + fields. + + Example: + make_cond_for_index( + "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)", + t2, keyno(t2.key1)) + will return + "cond(t1.field) AND cond(t2.key2)" + + RETURN + Index condition, or NULL if no condition could be inferred. +*/ + +static Item *make_cond_for_index(THD *thd, Item *cond, TABLE *table, uint keyno, + bool other_tbls_ok) +{ + if (!cond || cond->basic_const_item()) + return cond; + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + table_map used_tables= 0; + Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_for_index(thd, item, table, keyno, other_tbls_ok); + if (fix) + { + new_cond->argument_list()->push_back(fix, thd->mem_root); + used_tables|= fix->used_tables(); + } + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; + case 1: + /* remove AND level if there is only one argument */ + return new_cond->argument_list()->head(); + default: + new_cond->quick_fix_field(); + new_cond->used_tables_cache= used_tables; + return new_cond; + } + } + else /* It's OR */ + { + Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_for_index(thd, item, table, keyno, other_tbls_ok); + if (!fix) + return (COND*) 0; + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + new_cond->quick_fix_field(); + new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; + new_cond->top_level_item(); + return new_cond; + } + } + + if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok)) + return (COND*) 0; + return cond; +} + + +static Item *make_cond_remainder(THD *thd, Item *cond, TABLE *table, uint keyno, + bool other_tbls_ok, bool exclude_index) +{ + if (exclude_index && + uses_index_fields_only(cond, table, keyno, other_tbls_ok)) + return 0; + + if (cond->type() == Item::COND_ITEM) + { + table_map tbl_map= 0; + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_remainder(thd, item, table, keyno, + other_tbls_ok, exclude_index); + if (fix) + { + new_cond->argument_list()->push_back(fix, thd->mem_root); + tbl_map |= fix->used_tables(); + } + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; + case 1: + return new_cond->argument_list()->head(); + default: + new_cond->quick_fix_field(); + ((Item_cond*)new_cond)->used_tables_cache= tbl_map; + return new_cond; + } + } + else /* It's OR */ + { + Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_remainder(thd, item, table, keyno, + other_tbls_ok, FALSE); + if (!fix) + return (COND*) 0; + new_cond->argument_list()->push_back(fix, thd->mem_root); + tbl_map |= fix->used_tables(); + } + new_cond->quick_fix_field(); + ((Item_cond*)new_cond)->used_tables_cache= tbl_map; + new_cond->top_level_item(); + return new_cond; + } + } + return cond; +} + + +/* + Try to extract and push the index condition + + SYNOPSIS + push_index_cond() + tab A join tab that has tab->table->file and its condition + in tab->select_cond + keyno Index for which extract and push the condition + + DESCRIPTION + Try to extract and push the index condition down to table handler +*/ + +void push_index_cond(JOIN_TAB *tab, uint keyno) +{ + DBUG_ENTER("push_index_cond"); + Item *idx_cond; + + /* + Backported the following from MySQL 5.6: + 6. The index is not a clustered index. The performance improvement + of pushing an index condition on a clustered key is much lower + than on a non-clustered key. This restriction should be + re-evaluated when WL#6061 is implemented. + */ + if ((tab->table->file->index_flags(keyno, 0, 1) & + HA_DO_INDEX_COND_PUSHDOWN) && + optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN) && + tab->join->thd->lex->sql_command != SQLCOM_UPDATE_MULTI && + tab->join->thd->lex->sql_command != SQLCOM_DELETE_MULTI && + tab->type != JT_CONST && tab->type != JT_SYSTEM && + !tab->table->file->is_clustering_key(keyno)) // 6 + { + DBUG_EXECUTE("where", + print_where(tab->select_cond, "full cond", QT_ORDINARY);); + + idx_cond= make_cond_for_index(tab->join->thd, tab->select_cond, tab->table, + keyno, tab->icp_other_tables_ok); + + DBUG_EXECUTE("where", + print_where(idx_cond, "idx cond", QT_ORDINARY);); + + if (idx_cond) + { + Item *idx_remainder_cond= 0; + tab->pre_idx_push_select_cond= tab->select_cond; + /* + For BKA cache we store condition to special BKA cache field + because evaluation of the condition requires additional operations + before the evaluation. This condition is used in + JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions. + */ + if (tab->use_join_cache && + /* + if cache is used then the value is TRUE only + for BKA[_UNIQUE] cache (see check_join_cache_usage func). + */ + tab->icp_other_tables_ok && + (idx_cond->used_tables() & + ~(tab->table->map | tab->join->const_table_map))) + tab->cache_idx_cond= idx_cond; + else + { + idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond); + + /* + If (1) there is an index condition that we couldn't push using ICP, + (2) we are using Join Buffering + (3) and we are using BKA + then use BKA's Index Condition Pushdown mechanism to check it. + */ + if (idx_remainder_cond && tab->use_join_cache && // (1) && (2) + tab->icp_other_tables_ok) // (3) + { + tab->cache_idx_cond= idx_remainder_cond; + idx_remainder_cond= NULL; + } + } + + /* + Disable eq_ref's "lookup cache" if we've pushed down an index + condition. + TODO: This check happens to work on current ICP implementations, but + there may exist a compliant implementation that will not work + correctly with it. Sort this out when we stabilize the condition + pushdown APIs. + */ + if (idx_remainder_cond != idx_cond) + tab->ref.disable_cache= TRUE; + + Item *row_cond= tab->idx_cond_fact_out ? + make_cond_remainder(tab->join->thd, tab->select_cond, + tab->table, keyno, + tab->icp_other_tables_ok, TRUE) : + tab->pre_idx_push_select_cond; + + DBUG_EXECUTE("where", + print_where(row_cond, "remainder cond", QT_ORDINARY);); + + if (row_cond) + { + if (!idx_remainder_cond) + tab->select_cond= row_cond; + else + { + COND *new_cond= new (tab->join->thd->mem_root) + Item_cond_and(tab->join->thd, row_cond, idx_remainder_cond); + tab->select_cond= new_cond; + tab->select_cond->quick_fix_field(); + ((Item_cond_and*)tab->select_cond)->used_tables_cache= + row_cond->used_tables() | idx_remainder_cond->used_tables(); + } + } + else + tab->select_cond= idx_remainder_cond; + if (tab->select) + { + DBUG_EXECUTE("where", + print_where(tab->select->cond, + "select_cond", + QT_ORDINARY);); + + tab->select->cond= tab->select_cond; + tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond; + } + } + } + DBUG_VOID_RETURN; +} + + diff --git a/sql/opt_range.cc b/sql/opt_range.cc new file mode 100644 index 00000000..0ca8402f --- /dev/null +++ b/sql/opt_range.cc @@ -0,0 +1,16784 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2008, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + TODO: + Fix that MAYBE_KEY are stored in the tree so that we can detect use + of full hash keys for queries like: + + select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205); + +*/ + +/* + This file contains: + + RangeAnalysisModule + A module that accepts a condition, index (or partitioning) description, + and builds lists of intervals (in index/partitioning space), such that + all possible records that match the condition are contained within the + intervals. + The entry point for the range analysis module is get_mm_tree() function. + + The lists are returned in form of complicated structure of interlinked + SEL_TREE/SEL_IMERGE/SEL_ARG objects. + See quick_range_seq_next, find_used_partitions for examples of how to walk + this structure. + All direct "users" of this module are located within this file, too. + + + PartitionPruningModule + A module that accepts a partitioned table, condition, and finds which + partitions we will need to use in query execution. Search down for + "PartitionPruningModule" for description. + The module has single entry point - prune_partitions() function. + + + Range/index_merge/groupby-minmax optimizer module + A module that accepts a table, condition, and returns + - a QUICK_*_SELECT object that can be used to retrieve rows that match + the specified condition, or a "no records will match the condition" + statement. + + The module entry points are + test_quick_select() + get_quick_select_for_ref() + + + Record retrieval code for range/index_merge/groupby-min-max. + Implementations of QUICK_*_SELECT classes. + + KeyTupleFormat + ~~~~~~~~~~~~~~ + The code in this file (and elsewhere) makes operations on key value tuples. + Those tuples are stored in the following format: + + The tuple is a sequence of key part values. The length of key part value + depends only on its type (and not depends on the what value is stored) + + KeyTuple: keypart1-data, keypart2-data, ... + + The value of each keypart is stored in the following format: + + keypart_data: [isnull_byte] keypart-value-bytes + + If a keypart may have a NULL value (key_part->field->real_maybe_null() can + be used to check this), then the first byte is a NULL indicator with the + following valid values: + 1 - keypart has NULL value. + 0 - keypart has non-NULL value. + + If isnull_byte==1 (NULL value), then the following + keypart->length bytes must be 0. + + + keypart-value-bytes holds the value. Its format depends on the field type. + The length of keypart-value-bytes may or may not depend on the value being + stored. The default is that length is static and equal to + KEY_PART_INFO::length. + + Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the + value: + + keypart-value-bytes: value_length value_bytes + + The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes. + + See key_copy() and key_restore() for code to move data between index tuple + and table record + + CAUTION: the above description is only sergefp's understanding of the + subject and may omit some details. +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "key.h" // is_key_used, key_copy, key_cmp, key_restore +#include "sql_parse.h" // check_stack_overrun +#include "sql_partition.h" // get_part_id_func, PARTITION_ITERATOR, + // struct partition_info, NOT_A_PARTITION_ID +#include "records.h" // init_read_record, end_read_record +#include +#include "sql_select.h" +#include "sql_statistics.h" +#include "uniques.h" +#include "my_json_writer.h" + +#ifndef EXTRA_DEBUG +#define test_rb_tree(A,B) {} +#define test_use_count(A) {} +#endif + +/* + Convert double value to #rows. Currently this does floor(), and we + might consider using round() instead. +*/ +#define double2rows(x) ((ha_rows)(x)) + +/* + this should be long enough so that any memcmp with a string that + starts from '\0' won't cross is_null_string boundaries, even + if the memcmp is optimized to compare 4- 8- or 16- bytes at once +*/ +static uchar is_null_string[20]= {1,0}; + +/** + Helper function to compare two SEL_ARG's. +*/ +static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2) +{ + if (sa1 == NULL && sa2 == NULL) + return true; + if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL)) + return false; + return sa1->all_same(sa2); +} + +class SEL_IMERGE; + +#define CLONE_KEY1_MAYBE 1 +#define CLONE_KEY2_MAYBE 2 +#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) + + +/* + While objects of the class SEL_ARG represent ranges for indexes or + index infixes (including ranges for index prefixes and index suffixes), + objects of the class SEL_TREE represent AND/OR formulas of such ranges. + Currently an AND/OR formula represented by a SEL_TREE object can have + at most three levels: + + ::= + [ AND ] + [ [ AND ...] ] + + ::= + [ AND ... ] + + ::= + [ OR ] + + As we can see from the above definitions: + - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas + - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas + - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula + and SEL_IMERGE formulas. + It's required above that a SEL_TREE formula has at least one conjunct. + + Usually we will consider normalized SEL_RANGE_TREE formulas where we use + TRUE as conjunct members for those indexes whose SEL_ARG trees are empty. + + We will call an SEL_TREE object simply 'tree'. + The part of a tree that represents SEL_RANGE_TREE formula is called + 'range part' of the tree while the remaining part is called 'imerge part'. + If a tree contains only a range part then we call such a tree 'range tree'. + Components of a range tree that represent SEL_ARG formulas are called ranges. + If a tree does not contain any range part we call such a tree 'imerge tree'. + Components of the imerge part of a tree that represent SEL_IMERGE formula + are called imerges. + + Usually we'll designate: + SEL_TREE formulas by T_1,...,T_k + SEL_ARG formulas by R_1,...,R_k + SEL_RANGE_TREE formulas by RT_1,...,RT_k + SEL_IMERGE formulas by M_1,...,M_k + Accordingly we'll use: + t_1,...,t_k - to designate trees representing T_1,...,T_k + r_1,...,r_k - to designate ranges representing R_1,...,R_k + rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k + m_1,...,m_k - to designate imerges representing M_1,...,M_k + + SEL_TREE objects are usually built from WHERE conditions or + ON expressions. + A SEL_TREE object always represents an inference of the condition it is + built from. Therefore, if a row satisfies a SEL_TREE formula it also + satisfies the condition it is built from. + + The following transformations of tree t representing SEL_TREE formula T + yield a new tree t1 thar represents an inference of T: T=>T1. + (1) remove any of SEL_ARG tree from the range part of t + (2) remove any imerge from the tree t + (3) remove any of SEL_ARG tree from any range tree contained + in any imerge of tree + + Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE + objects in many cases can be effectively used to filter out a big part + of table rows that do not satisfy WHERE/IN conditions utilizing + only single or multiple range index scans. + + A single range index scan is constructed for a range tree that contains + only one SEL_ARG object for an index or an index prefix. + An index intersection scan can be constructed for a range tree + that contains several SEL_ARG objects. Currently index intersection + scans are constructed only for single-point ranges. + An index merge scan is constructed for a imerge tree that contains only + one imerge. If range trees of this imerge contain only single-point merges + than a union of index intersections can be built. + + Usually the tree built by the range optimizer for a query table contains + more than one range in the range part, and additionally may contain some + imerges in the imerge part. The range optimizer evaluates all of them one + by one and chooses the range or the imerge that provides the cheapest + single or multiple range index scan of the table. According to rules + (1)-(3) this scan always filter out only those rows that do not satisfy + the query conditions. + + For any condition the SEL_TREE object for it is built in a bottom up + manner starting from the range trees for the predicates. The tree_and + function builds a tree for any conjunction of formulas from the trees + for its conjuncts. The tree_or function builds a tree for any disjunction + of formulas from the trees for its disjuncts. +*/ + +class SEL_TREE :public Sql_alloc +{ +public: + /* + Starting an effort to document this field: + (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => + (type == SEL_TREE::IMPOSSIBLE) + */ + enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; + + SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys) + : type(type_arg), keys(root, num_keys), n_ror_scans(0) + { + keys_map.clear_all(); + } + + SEL_TREE(MEM_ROOT *root, size_t num_keys) : + type(KEY), keys(root, num_keys), n_ror_scans(0) + { + keys_map.clear_all(); + } + + SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param); + /* + Note: there may exist SEL_TREE objects with sel_tree->type=KEY and + keys[i]=0 for all i. (SergeyP: it is not clear whether there is any + merit in range analyzer functions (e.g. get_mm_parts) returning a + pointer to such SEL_TREE instead of NULL) + */ + Mem_root_array keys; + key_map keys_map; /* bitmask of non-NULL elements in keys */ + + /* + Possible ways to read rows using index_merge. The list is non-empty only + if type==KEY. Currently can be non empty only if keys_map.is_clear_all(). + */ + List merges; + + /* The members below are filled/used only after get_mm_tree is done */ + key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ + uint n_ror_scans; /* number of set bits in ror_scans_map */ + + struct st_index_scan_info **index_scans; /* list of index scans */ + struct st_index_scan_info **index_scans_end; /* last index scan */ + + struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ + struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ + /* Note that #records for each key scan is stored in table->quick_rows */ + + bool without_ranges() { return keys_map.is_clear_all(); } + bool without_imerges() { return merges.is_empty(); } +}; + + +class PARAM : public RANGE_OPT_PARAM +{ +public: + ha_rows quick_rows[MAX_KEY]; + + /* + This will collect 'possible keys' based on the range optimization. + + Queries with a JOIN object actually use ref optimizer (see add_key_field) + to collect possible_keys. This is used by single table UPDATE/DELETE. + */ + key_map possible_keys; + longlong baseflag; + uint max_key_parts, range_count; + + bool quick; // Don't calulate possible keys + + uint fields_bitmap_size; + MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ + MY_BITMAP tmp_covered_fields; + + key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ + + uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ + uint imerge_cost_buff_size; /* size of the buffer */ + + /* Number of ranges in the last checked tree->key */ + uint n_ranges; + uint8 first_null_comp; /* first null component if any, 0 - otherwise */ +}; + + +class TABLE_READ_PLAN; + class TRP_RANGE; + class TRP_ROR_INTERSECT; + class TRP_ROR_UNION; + class TRP_INDEX_INTERSECT; + class TRP_INDEX_MERGE; + class TRP_GROUP_MIN_MAX; + +struct st_index_scan_info; +struct st_ror_scan_info; + +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); +static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, + SEL_ARG *tree, bool update_tbl_stats, + uint *mrr_flags, uint *bufsize, + Cost_estimate *cost, bool *is_ror_scan); + +QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index, + SEL_ARG *key_tree, uint mrr_flags, + uint mrr_buf_size, MEM_ROOT *alloc); +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + bool for_range_access, + double read_time); +static +TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, + double read_time); +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + double read_time, + bool *are_all_covering); +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time); +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time, bool named_trace= false); +static +TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge, + TRP_INDEX_MERGE *imerge_trp, + double read_time); +static +TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree, + double read_time); + +#ifndef DBUG_OFF +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg); +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end); +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); +#endif + +static SEL_TREE *tree_and(RANGE_OPT_PARAM *param, + SEL_TREE *tree1, SEL_TREE *tree2); +static SEL_TREE *tree_or(RANGE_OPT_PARAM *param, + SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); +static SEL_ARG *key_or(RANGE_OPT_PARAM *param, + SEL_ARG *key1, SEL_ARG *key2); +static SEL_ARG *key_and(RANGE_OPT_PARAM *param, + SEL_ARG *key1, SEL_ARG *key2, + uint clone_flag); +static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2); +static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2, + uint clone_flag); +static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1); +bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, + SEL_ARG *key_tree, uchar *min_key,uint min_key_flag, + uchar *max_key,uint max_key_flag); +static bool eq_tree(SEL_ARG* a,SEL_ARG *b); + +SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); +static bool null_part_in_key(KEY_PART *key_part, const uchar *key, + uint length); +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); + +static +SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *sel_arg); +static +bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1, + SEL_ARG *key2); + +#include "opt_range_mrr.cc" + +static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys); +static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, + SEL_TREE *tree); + +static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys); +static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map common_keys); +static int and_range_trees(RANGE_OPT_PARAM *param, + SEL_TREE *tree1, SEL_TREE *tree2, + SEL_TREE *result); +static bool remove_nonrange_trees(PARAM *param, SEL_TREE *tree); +static void restore_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree, + SEL_ARG **backup); +static void print_key_value(String *out, const KEY_PART_INFO *key_part, + const uchar* key, uint length); +static void print_keyparts_name(String *out, const KEY_PART_INFO *key_part, + uint n_keypart, key_part_map keypart_map); + +static void trace_ranges(Json_writer_array *range_trace, + PARAM *param, uint idx, + SEL_ARG *keypart, + const KEY_PART_INFO *key_parts); + +static +void print_range(String *out, const KEY_PART_INFO *key_part, + KEY_MULTI_RANGE *range, uint n_key_parts); + +static +void print_range_for_non_indexed_field(String *out, Field *field, + KEY_MULTI_RANGE *range); + +static void print_min_range_operator(String *out, const ha_rkey_function flag); +static void print_max_range_operator(String *out, const ha_rkey_function flag); + +static bool is_field_an_unique_index(Field *field); + +/* + SEL_IMERGE is a list of possible ways to do index merge, i.e. it is + a condition in the following form: + (t_1||t_2||...||t_N) && (next) + + where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair + (t_i,t_j) contains SEL_ARGS for the same index. + + SEL_TREE contained in SEL_IMERGE always has merges=NULL. + + This class relies on memory manager to do the cleanup. +*/ + +class SEL_IMERGE : public Sql_alloc +{ + enum { PREALLOCED_TREES= 10}; +public: + SEL_TREE *trees_prealloced[PREALLOCED_TREES]; + SEL_TREE **trees; /* trees used to do index_merge */ + SEL_TREE **trees_next; /* last of these trees */ + SEL_TREE **trees_end; /* end of allocated space */ + + SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */ + + SEL_IMERGE() : + trees(&trees_prealloced[0]), + trees_next(trees), + trees_end(trees + PREALLOCED_TREES) + {} + SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param); + int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); + bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree); + int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, + SEL_IMERGE *new_imerge); + int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, + uint n_init_trees, + SEL_TREE *new_tree, + bool is_first_check_pass, + bool *is_last_check_pass); + int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, + uint n_init_trees, + SEL_IMERGE* imerge, + bool is_first_check_pass, + bool *is_last_check_pass); +}; + + +/* + Add a range tree to the range trees of this imerge + + SYNOPSIS + or_sel_tree() + param Context info for the operation + tree SEL_TREE to add to this imerge + + DESCRIPTION + The function just adds the range tree 'tree' to the range trees + of this imerge. + + RETURN + 0 if the operation is success + -1 if the function runs out memory +*/ + +int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + if (trees_next == trees_end) + { + const int realloc_ratio= 2; /* Double size for next round */ + size_t old_elements= (trees_end - trees); + size_t old_size= sizeof(SEL_TREE**) * old_elements; + size_t new_size= old_size * realloc_ratio; + SEL_TREE **new_trees; + if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size))) + return -1; + memcpy(new_trees, trees, old_size); + trees= new_trees; + trees_next= trees + old_elements; + trees_end= trees + old_elements * realloc_ratio; + } + *(trees_next++)= tree; + return 0; +} + + +/* + Check if any of the range trees of this imerge intersects with a given tree + + SYNOPSIS + have_common_keys() + param Context info for the function + tree SEL_TREE intersection with the imerge range trees is checked for + + DESCRIPTION + The function checks whether there is any range tree rt_i in this imerge + such that there are some indexes for which ranges are defined in both + rt_i and the range part of the SEL_TREE tree. + To check this the function calls the function sel_trees_have_common_keys. + + RETURN + TRUE if there are such range trees in this imerge + FALSE otherwise +*/ + +bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + for (SEL_TREE** or_tree= trees, **bound= trees_next; + or_tree != bound; or_tree++) + { + key_map common_keys; + if (sel_trees_have_common_keys(*or_tree, tree, &common_keys)) + return TRUE; + } + return FALSE; +} + + +/* + Perform AND operation for this imerge and the range part of a tree + + SYNOPSIS + and_sel_tree() + param Context info for the operation + tree SEL_TREE for the second operand of the operation + new_imerge OUT imerge for the result of the operation + + DESCRIPTION + This function performs AND operation for this imerge m and the + range part of the SEL_TREE tree rt. In other words the function + pushes rt into this imerge. The resulting imerge is returned in + the parameter new_imerge. + If this imerge m represent the formula + RT_1 OR ... OR RT_k + then the resulting imerge of the function represents the formula + (RT_1 AND RT) OR ... OR (RT_k AND RT) + The function calls the function and_range_trees to construct the + range tree representing (RT_i AND RT). + + NOTE + The function may return an empty imerge without any range trees. + This happens when each call of and_range_trees returns an + impossible range tree (SEL_TREE::IMPOSSIBLE). + Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6). + + RETURN + 0 if the operation is a success + -1 otherwise: there is not enough memory to perform the operation +*/ + +int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, + SEL_IMERGE *new_imerge) +{ + for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++) + { + SEL_TREE *res_or_tree= 0; + SEL_TREE *and_tree= 0; + if (!(res_or_tree= new SEL_TREE(param->mem_root, param->keys)) || + !(and_tree= new SEL_TREE(tree, TRUE, param))) + return (-1); + if (!and_range_trees(param, *or_tree, and_tree, res_or_tree)) + { + if (new_imerge->or_sel_tree(param, res_or_tree)) + return (-1); + } + } + return 0; +} + + +/* + Perform OR operation on this imerge and the range part of a tree + + SYNOPSIS + or_sel_tree_with_checks() + param Context info for the operation + n_trees Number of trees in this imerge to check for oring + tree SEL_TREE whose range part is to be ored + is_first_check_pass <=> the first call of the function for this imerge + is_last_check_pass OUT <=> no more calls of the function for this imerge + + DESCRIPTION + The function performs OR operation on this imerge m and the range part + of the SEL_TREE tree rt. It always replaces this imerge with the result + of the operation. + + The operation can be performed in two different modes: with + is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming + this imerge differently. + + Given this imerge represents the formula + RT_1 OR ... OR RT_k: + + 1. In the first mode, when is_first_check_pass==TRUE : + 1.1. If rt must be ored(see the function sel_trees_must_be_ored) with + some rt_j (there may be only one such range tree in the imerge) + then the function produces an imerge representing the formula + RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k, + where the tree for (RT_j OR RT) is built by oring the pairs + of SEL_ARG trees for the corresponding indexes + 1.2. Otherwise the function produces the imerge representing the formula: + RT_1 OR ... OR RT_k OR RT. + + 2. In the second mode, when is_first_check_pass==FALSE : + 2.1. For each rt_j in the imerge that can be ored (see the function + sel_trees_can_be_ored) with rt the function replaces rt_j for a + range tree such that for each index for which ranges are defined + in both in rt_j and rt the tree contains the result of oring of + these ranges. + 2.2. In other cases the function does not produce any imerge. + + When is_first_check==TRUE the function returns FALSE in the parameter + is_last_check_pass if there is no rt_j such that rt_j can be ored with rt, + but, at the same time, it's not true that rt_j must be ored with rt. + When is_first_check==FALSE the function always returns FALSE in the + parameter is_last_check_pass. + + RETURN + 1 The result of oring of rt_j and rt that must be ored returns the + the range tree with type==SEL_TREE::ALWAYS + (in this case the imerge m should be discarded) + -1 The function runs out of memory + 0 in all other cases +*/ + +int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, + uint n_trees, + SEL_TREE *tree, + bool is_first_check_pass, + bool *is_last_check_pass) +{ + bool was_ored= FALSE; + *is_last_check_pass= is_first_check_pass; + SEL_TREE** or_tree= trees; + for (uint i= 0; i < n_trees; i++, or_tree++) + { + SEL_TREE *result= 0; + key_map result_keys; + key_map ored_keys; + if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys)) + { + bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree, + ored_keys); + if (must_be_ored || !is_first_check_pass) + { + result_keys.clear_all(); + result= *or_tree; + for (uint key_no= 0; key_no < param->keys; key_no++) + { + if (!ored_keys.is_set(key_no)) + { + result->keys[key_no]= 0; + continue; + } + SEL_ARG *key1= (*or_tree)->keys[key_no]; + SEL_ARG *key2= tree->keys[key_no]; + key2->incr_refs(); + if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, + key2))) + { + + result_keys.set_bit(key_no); +#ifdef EXTRA_DEBUG + if (param->alloced_sel_args < + param->thd->variables.optimizer_max_sel_args) + { + key1= result->keys[key_no]; + (key1)->test_use_count(key1); + } +#endif + } + } + } + else if(is_first_check_pass) + *is_last_check_pass= FALSE; + } + + if (result) + { + result->keys_map= result_keys; + if (result_keys.is_clear_all()) + result->type= SEL_TREE::ALWAYS; + if ((result->type == SEL_TREE::MAYBE) || + (result->type == SEL_TREE::ALWAYS)) + return 1; + /* SEL_TREE::IMPOSSIBLE is impossible here */ + *or_tree= result; + was_ored= TRUE; + } + } + if (was_ored) + return 0; + + if (is_first_check_pass && !*is_last_check_pass && + !(tree= new SEL_TREE(tree, FALSE, param))) + return (-1); + return or_sel_tree(param, tree); +} + + +/* + Perform OR operation on this imerge and and another imerge + + SYNOPSIS + or_sel_imerge_with_checks() + param Context info for the operation + n_trees Number of trees in this imerge to check for oring + imerge The second operand of the operation + is_first_check_pass <=> the first call of the function for this imerge + is_last_check_pass OUT <=> no more calls of the function for this imerge + + DESCRIPTION + For each range tree rt from 'imerge' the function calls the method + SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this + SEL_IMERGE object m and the tree rt. The mode of the operation is + specified by the parameter is_first_check_pass. Each call of + SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m. + The function returns FALSE in the prameter is_last_check_pass if + at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks + returns FALSE as the value of its last parameter. + + RETURN + 1 One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1. + (in this case the imerge m should be discarded) + -1 The function runs out of memory + 0 in all other cases +*/ + +int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, + uint n_trees, + SEL_IMERGE* imerge, + bool is_first_check_pass, + bool *is_last_check_pass) +{ + *is_last_check_pass= TRUE; + SEL_TREE** tree= imerge->trees; + SEL_TREE** tree_end= imerge->trees_next; + for ( ; tree < tree_end; tree++) + { + uint rc; + bool is_last= TRUE; + rc= or_sel_tree_with_checks(param, n_trees, *tree, + is_first_check_pass, &is_last); + if (!is_last) + *is_last_check_pass= FALSE; + if (rc) + return rc; + } + return 0; +} + + +/* + Copy constructor for SEL_TREE objects + + SYNOPSIS + SEL_TREE + arg The source tree for the constructor + without_merges <=> only the range part of the tree arg is copied + param Context info for the operation + + DESCRIPTION + The constructor creates a full copy of the SEL_TREE arg if + the prameter without_merges==FALSE. Otherwise a tree is created + that contains the copy only of the range part of the tree arg. +*/ + +SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges, + RANGE_OPT_PARAM *param) + : Sql_alloc(), + keys(param->mem_root, param->keys), + n_ror_scans(0) +{ + keys_map= arg->keys_map; + type= arg->type; + MEM_ROOT *mem_root; + + for (uint idx= 0; idx < param->keys; idx++) + { + if ((keys[idx]= arg->keys[idx])) + keys[idx]->incr_refs_all(); + } + + if (without_merges) + return; + + mem_root= current_thd->mem_root; + List_iterator it(arg->merges); + for (SEL_IMERGE *el= it++; el; el= it++) + { + SEL_IMERGE *merge= new (mem_root) SEL_IMERGE(el, 0, param); + if (!merge || merge->trees == merge->trees_next) + { + merges.empty(); + return; + } + merges.push_back(merge, mem_root); + } +} + + +/* + Copy constructor for SEL_IMERGE objects + + SYNOPSIS + SEL_IMERGE + arg The source imerge for the constructor + cnt How many trees from arg are to be copied + param Context info for the operation + + DESCRIPTION + The cnt==0 then the constructor creates a full copy of the + imerge arg. Otherwise only the first cnt trees of the imerge + are copied. +*/ + +SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt, + RANGE_OPT_PARAM *param) : Sql_alloc() +{ + size_t elements= (arg->trees_end - arg->trees); + if (elements > PREALLOCED_TREES) + { + size_t size= elements * sizeof (SEL_TREE **); + if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size))) + goto mem_err; + } + else + trees= &trees_prealloced[0]; + + trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees); + trees_end= trees + elements; + + for (SEL_TREE **tree= trees, **arg_tree= arg->trees; tree < trees_next; + tree++, arg_tree++) + { + if (!(*tree= new SEL_TREE(*arg_tree, TRUE, param))) + goto mem_err; + } + + return; + +mem_err: + trees= &trees_prealloced[0]; + trees_next= trees; + trees_end= trees; +} + + +/* + Perform AND operation on two imerge lists + + SYNOPSIS + imerge_list_and_list() + param Context info for the operation + im1 The first imerge list for the operation + im2 The second imerge list for the operation + + DESCRIPTION + The function just appends the imerge list im2 to the imerge list im1 + + RETURN VALUE + none +*/ + +inline void imerge_list_and_list(List *im1, List *im2) +{ + im1->append(im2); +} + + +/* + Perform OR operation on two imerge lists + + SYNOPSIS + imerge_list_or_list() + param Context info for the operation + im1 The first imerge list for the operation + im2 The second imerge list for the operation + + DESCRIPTION + Assuming that the first imerge list represents the formula + F1= M1_1 AND ... AND M1_k1 + while the second imerge list represents the formula + F2= M2_1 AND ... AND M2_k2, + where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1]) + and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]), + the function builds a list of imerges for some formula that can be + inferred from the formula (F1 OR F2). + + More exactly the function builds imerges for the formula (M1_1 OR M2_1). + Note that + (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) = + AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) => + M1_1 OR M2_1. + So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2). + + To build imerges for the formula (M1_1 OR M2_1) the function invokes, + possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks + for the imerge m1_1. + At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks + performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by + calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE. + The resulting imerge of the operation is ored with the next range tree of + the imerge m2_1. This oring continues until the last range tree from + m2_1 has been ored. + At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks + performs the same sequence of OR operations, but now calling + SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE. + + The imerges that the operation produces replace those in the list im1 + + RETURN + 0 if the operation is a success + -1 if the function has run out of memory +*/ + +int imerge_list_or_list(RANGE_OPT_PARAM *param, + List *im1, + List *im2) +{ + + uint rc; + bool is_last_check_pass= FALSE; + SEL_IMERGE *imerge= im1->head(); + uint elems= (uint)(imerge->trees_next-imerge->trees); + MEM_ROOT *mem_root= current_thd->mem_root; + + im1->empty(); + im1->push_back(imerge, mem_root); + + rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(), + TRUE, &is_last_check_pass); + if (rc) + { + if (rc == 1) + { + im1->empty(); + rc= 0; + } + return rc; + } + + if (!is_last_check_pass) + { + SEL_IMERGE* new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, param); + if (new_imerge) + { + is_last_check_pass= TRUE; + rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(), + FALSE, &is_last_check_pass); + if (!rc) + im1->push_back(new_imerge, mem_root); + } + } + return rc; +} + + +/* + Perform OR operation for each imerge from a list and the range part of a tree + + SYNOPSIS + imerge_list_or_tree() + param Context info for the operation + merges The list of imerges to be ored with the range part of tree + tree SEL_TREE whose range part is to be ored with the imerges + + DESCRIPTION + For each imerge mi from the list 'merges' the function performes OR + operation with mi and the range part of 'tree' rt, producing one or + two imerges. + + Given the merge mi represent the formula RTi_1 OR ... OR RTi_k, + the function forms the merges by the following rules: + + 1. If rt cannot be ored with any of the trees rti the function just + produces an imerge that represents the formula + RTi_1 OR ... RTi_k OR RT. + 2. If there exist a tree rtj that must be ored with rt the function + produces an imerge the represents the formula + RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k, + where the range tree for (RTi_j OR RT) is constructed by oring the + SEL_ARG trees that must be ored. + 3. For each rti_j that can be ored with rt the function produces + the new tree rti_j' and substitutes rti_j for this new range tree. + + In any case the function removes mi from the list and then adds all + produced imerges. + + To build imerges by rules 1-3 the function calls the method + SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first + call it passes TRUE for the third parameter of the function. + At this first call imerges by rules 1-2 are built. If the call + returns FALSE as the return value of its fourth parameter then the + function are called for the second time. At this call the imerge + of rule 3 is produced. + + If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then + then it means that the produced tree contains an always true + range tree and the whole imerge can be discarded. + + RETURN + 1 if no imerges are produced + 0 otherwise +*/ + +static +int imerge_list_or_tree(RANGE_OPT_PARAM *param, + List *merges, + SEL_TREE *tree) +{ + SEL_IMERGE *imerge; + List additional_merges; + List_iterator it(*merges); + MEM_ROOT *mem_root= current_thd->mem_root; + + while ((imerge= it++)) + { + bool is_last_check_pass; + int rc= 0; + int rc1= 0; + SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param); + if (or_tree) + { + uint elems= (uint)(imerge->trees_next-imerge->trees); + rc= imerge->or_sel_tree_with_checks(param, elems, or_tree, + TRUE, &is_last_check_pass); + if (!is_last_check_pass) + { + SEL_IMERGE *new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, + param); + if (new_imerge) + { + rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree, + FALSE, &is_last_check_pass); + if (!rc1) + additional_merges.push_back(new_imerge, mem_root); + } + } + } + if (rc || rc1 || !or_tree) + it.remove(); + } + + merges->append(&additional_merges); + return merges->is_empty(); +} + + +/* + Perform pushdown operation of the range part of a tree into given imerges + + SYNOPSIS + imerge_list_and_tree() + param Context info for the operation + merges IN/OUT List of imerges to push the range part of 'tree' into + tree SEL_TREE whose range part is to be pushed into imerges + replace if the pushdow operation for a imerge is a success + then the original imerge is replaced for the result + of the pushdown + + DESCRIPTION + For each imerge from the list merges the function pushes the range part + rt of 'tree' into the imerge. + More exactly if the imerge mi from the list represents the formula + RTi_1 OR ... OR RTi_k + the function bulds a new imerge that represents the formula + (RTi_1 AND RT) OR ... OR (RTi_k AND RT) + and adds this imerge to the list merges. + To perform this pushdown operation the function calls the method + SEL_IMERGE::and_sel_tree. + For any imerge mi the new imerge is not created if for each pair of + trees rti_j and rt the intersection of the indexes with defined ranges + is empty. + If the result of the pushdown operation for the imerge mi returns an + imerge with no trees then then not only nothing is added to the list + merges but mi itself is removed from the list. + + TODO + Optimize the code in order to not create new SEL_IMERGE and new SER_TREE + objects when 'replace' is TRUE. (Currently this function is called always + with this parameter equal to TRUE.) + + RETURN + 1 if no imerges are left in the list merges + 0 otherwise +*/ + +static +int imerge_list_and_tree(RANGE_OPT_PARAM *param, + List *merges, + SEL_TREE *tree, + bool replace) +{ + SEL_IMERGE *imerge; + SEL_IMERGE *new_imerge= NULL; + List new_merges; + List_iterator it(*merges); + MEM_ROOT *mem_root= current_thd->mem_root; + + while ((imerge= it++)) + { + if (!new_imerge) + new_imerge= new (mem_root) SEL_IMERGE(); + if (imerge->have_common_keys(param, tree) && + new_imerge && !imerge->and_sel_tree(param, tree, new_imerge)) + { + if (new_imerge->trees == new_imerge->trees_next) + it.remove(); + else + { + if (replace) + it.replace(new_imerge); + else + new_merges.push_back(new_imerge, mem_root); + new_imerge= NULL; + } + } + } + imerge_list_and_list(&new_merges, merges); + *merges= new_merges; + return merges->is_empty(); +} + + +/*************************************************************************** +** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT +***************************************************************************/ + + /* make a select from mysql info + Error is set as following: + 0 = ok + 1 = Got some error (out of memory?) + */ + +SQL_SELECT *make_select(TABLE *head, table_map const_tables, + table_map read_tables, COND *conds, + SORT_INFO *filesort, + bool allow_null_cond, + int *error) +{ + SQL_SELECT *select; + DBUG_ENTER("make_select"); + + *error=0; + + if (!conds && !allow_null_cond) + DBUG_RETURN(0); + if (!(select= new (head->in_use->mem_root) SQL_SELECT)) + { + *error= 1; // out of memory + DBUG_RETURN(0); /* purecov: inspected */ + } + select->read_tables=read_tables; + select->const_tables=const_tables; + select->head=head; + select->cond= conds; + + if (filesort && my_b_inited(&filesort->io_cache)) + { + /* + Hijack the filesort io_cache for make_select + SQL_SELECT will be responsible for ensuring that it's properly freed. + */ + select->file= filesort->io_cache; + select->records=(ha_rows) (select->file.end_of_file/ + head->file->ref_length); + my_b_clear(&filesort->io_cache); + } + DBUG_RETURN(select); +} + + +SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0) +{ + quick_keys.clear_all(); needed_reg.clear_all(); + my_b_clear(&file); +} + + +void SQL_SELECT::cleanup() +{ + delete quick; + quick= 0; + if (free_cond) + { + free_cond=0; + delete cond; + cond= 0; + } + close_cached_file(&file); +} + + +SQL_SELECT::~SQL_SELECT() +{ + cleanup(); +} + +#undef index // Fix for Unixware 7 + +QUICK_SELECT_I::QUICK_SELECT_I() + :max_used_key_length(0), + used_key_parts(0) +{} + +QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, + bool no_alloc, MEM_ROOT *parent_alloc, + bool *create_error) + :thd(thd), no_alloc(no_alloc), parent_alloc(parent_alloc), + free_file(0),cur_range(NULL),last_range(0),dont_free(0) +{ + my_bitmap_map *bitmap; + DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT"); + + in_ror_merged_scan= 0; + index= key_nr; + head= table; + key_part_info= head->key_info[index].key_part; + + /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */ + mrr_buf_size= thd->variables.mrr_buff_size; + mrr_buf_desc= NULL; + + if (!no_alloc && !parent_alloc) + { + // Allocates everything through the internal memroot + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + thd->mem_root= &alloc; + } + else + bzero((char*) &alloc,sizeof(alloc)); + file= head->file; + record= head->record[0]; + + my_init_dynamic_array2(PSI_INSTRUMENT_ME, &ranges, sizeof(QUICK_RANGE*), + thd->alloc(sizeof(QUICK_RANGE*) * 16), 16, 16, + MYF(MY_THREAD_SPECIFIC)); + + /* Allocate a bitmap for used columns */ + if (!(bitmap= (my_bitmap_map*) thd->alloc(head->s->column_bitmap_size))) + { + column_bitmap.bitmap= 0; + *create_error= 1; + } + else + my_bitmap_init(&column_bitmap, bitmap, head->s->fields); + DBUG_VOID_RETURN; +} + + +void QUICK_RANGE_SELECT::need_sorted_output() +{ + if (!(mrr_flags & HA_MRR_SORTED)) + { + /* + Native implementation can't produce sorted output. We'll have to + switch to default + */ + mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; + } + mrr_flags |= HA_MRR_SORTED; +} + + +int QUICK_RANGE_SELECT::init() +{ + DBUG_ENTER("QUICK_RANGE_SELECT::init"); + + if (file->inited != handler::NONE) + file->ha_index_or_rnd_end(); + DBUG_RETURN(FALSE); +} + + +void QUICK_RANGE_SELECT::range_end() +{ + if (file->inited != handler::NONE) + file->ha_index_or_rnd_end(); +} + + +QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() +{ + DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT"); + if (!dont_free) + { + /* file is NULL for CPK scan on covering ROR-intersection */ + if (file) + { + range_end(); + file->ha_end_keyread(); + if (free_file) + { + DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file, + free_file)); + file->ha_external_unlock(current_thd); + file->ha_close(); + delete file; + } + } + delete_dynamic(&ranges); /* ranges are allocated in alloc */ + free_root(&alloc,MYF(0)); + } + my_free(mrr_buf_desc); + DBUG_VOID_RETURN; +} + +/* + QUICK_INDEX_SORT_SELECT works as follows: + - Do index scans, accumulate rowids in the Unique object + (Unique will also sort and de-duplicate rowids) + - Use rowids from unique to run a disk-ordered sweep +*/ + +QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param, TABLE *table) + :unique(NULL), pk_quick_select(NULL), thd(thd_param) +{ + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT"); + index= MAX_KEY; + head= table; + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + DBUG_VOID_RETURN; +} + +int QUICK_INDEX_SORT_SELECT::init() +{ + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init"); + DBUG_RETURN(0); +} + +int QUICK_INDEX_SORT_SELECT::reset() +{ + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset"); + const int retval= read_keys_and_merge(); + DBUG_RETURN(retval); +} + +bool +QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) +{ + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back"); + if (head->file->is_clustering_key(quick_sel_range->index)) + { + /* + A quick_select over a clustered primary key is handled specifically + Here we assume: + - PK columns are included in any other merged index + - Scan on the PK is disk-ordered. + (not meeting #2 will only cause performance degradation) + + We could treat clustered PK as any other index, but that would + be inefficient. There is no point in doing scan on + CPK, remembering the rowid, then making rnd_pos() call with + that rowid. + */ + pk_quick_select= quick_sel_range; + DBUG_RETURN(0); + } + DBUG_RETURN(quick_selects.push_back(quick_sel_range, thd->mem_root)); +} + +QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT() +{ + List_iterator_fast quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT"); + delete unique; + quick_it.rewind(); + while ((quick= quick_it++)) + quick->file= NULL; + quick_selects.delete_elements(); + delete pk_quick_select; + /* It's ok to call the next two even if they are already deinitialized */ + end_read_record(&read_record); + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + +QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, + TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows), + scans_inited(FALSE) +{ + index= MAX_KEY; + head= table; + record= head->record[0]; + if (!parent_alloc) + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + else + bzero(&alloc, sizeof(MEM_ROOT)); + last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc, + head->file->ref_length); +} + + +/* + Do post-constructor initialization. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::init() + + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_INTERSECT_SELECT::init() +{ + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init"); + /* Check if last_rowid was successfully allocated in ctor */ + DBUG_RETURN(!last_rowid); +} + + +/* + Initialize this quick select to be a ROR-merged scan. + + SYNOPSIS + QUICK_RANGE_SELECT::init_ror_merged_scan() + reuse_handler If TRUE, use head->file, otherwise create a separate + handler object + + NOTES + This function creates and prepares for subsequent use a separate handler + object if it can't reuse head->file. The reason for this is that during + ROR-merge several key scans are performed simultaneously, and a single + handler is only capable of preserving context of a single key scan. + + In ROR-merge the quick select doing merge does full records retrieval, + merged quick selects read only keys. + + RETURN + 0 ROR child scan initialized, ok to use. + 1 error +*/ + +int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler, + MEM_ROOT *local_alloc) +{ + handler *save_file= file, *org_file; + THD *thd= head->in_use; + MY_BITMAP * const save_read_set= head->read_set; + MY_BITMAP * const save_write_set= head->write_set; + DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan"); + + in_ror_merged_scan= 1; + if (reuse_handler) + { + DBUG_PRINT("info", ("Reusing handler %p", file)); + if (init()) + { + DBUG_RETURN(1); + } + goto end; + } + + /* Create a separate handler object for this quick select */ + if (free_file) + { + /* already have own 'handler' object. */ + DBUG_RETURN(0); + } + + if (!(file= head->file->clone(head->s->normalized_path.str, local_alloc))) + { + /* + Manually set the error flag. Note: there seems to be quite a few + places where a failure could cause the server to "hang" the client by + sending no response to a query. ATM those are not real errors because + the storage engine calls in question happen to never fail with the + existing storage engines. + */ + my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */ + /* Caller will free the memory */ + goto failure; /* purecov: inspected */ + } + + if (file->ha_external_lock(thd, F_RDLCK)) + goto failure; + + if (init()) + { + file->ha_external_unlock(thd); + file->ha_close(); + goto failure; + } + free_file= TRUE; + last_rowid= file->ref; + +end: + /* + We are only going to read key fields and call position() on 'file' + The following sets head->read_set (== column_bitmap) to only use this + key. The 'column_bitmap' is used in ::get_next() + */ + org_file= head->file; + head->file= file; + + head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap); + head->prepare_for_keyread(index, &column_bitmap); + head->prepare_for_position(); + + head->file= org_file; + + /* Restore head->read_set (and write_set) to what they had before the call */ + head->column_bitmaps_set(save_read_set, save_write_set); + + if (reset()) + { + if (!reuse_handler) + { + file->ha_external_unlock(thd); + file->ha_close(); + goto failure; + } + DBUG_RETURN(1); + } + DBUG_RETURN(0); + +failure: + head->column_bitmaps_set(save_read_set, save_write_set); + delete file; + file= save_file; + free_file= false; + DBUG_RETURN(1); +} + + +/* + Initialize this quick select to be a part of a ROR-merged scan. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan() + reuse_handler If TRUE, use head->file, otherwise create separate + handler object. + RETURN + 0 OK + other error code +*/ +int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler, + MEM_ROOT *local_alloc) +{ + List_iterator_fast quick_it(quick_selects); + QUICK_SELECT_WITH_RECORD *cur; + QUICK_RANGE_SELECT *quick; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan"); + + /* Initialize all merged "children" quick selects */ + DBUG_ASSERT(!need_to_fetch_row || reuse_handler); + if (!need_to_fetch_row && reuse_handler) + { + cur= quick_it++; + quick= cur->quick; + /* + There is no use of this->file. Use it for the first of merged range + selects. + */ + int error= quick->init_ror_merged_scan(TRUE, local_alloc); + if (unlikely(error)) + DBUG_RETURN(error); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + } + while ((cur= quick_it++)) + { + quick= cur->quick; +#ifndef DBUG_OFF + const MY_BITMAP * const save_read_set= quick->head->read_set; + const MY_BITMAP * const save_write_set= quick->head->write_set; +#endif + if (quick->init_ror_merged_scan(FALSE, local_alloc)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + + // Sets are shared by all members of "quick_selects" so must not change +#ifndef DBUG_OFF + DBUG_ASSERT(quick->head->read_set == save_read_set); + DBUG_ASSERT(quick->head->write_set == save_write_set); +#endif + /* All merged scans share the same record buffer in intersection. */ + quick->record= head->record[0]; + } + + if (need_to_fetch_row && + unlikely(head->file->ha_rnd_init_with_error(false))) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_INTERSECT_SELECT::reset() +{ + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset"); + if (!scans_inited && init_ror_merged_scan(TRUE, &alloc)) + DBUG_RETURN(1); + scans_inited= TRUE; + List_iterator_fast it(quick_selects); + QUICK_SELECT_WITH_RECORD *qr; + while ((qr= it++)) + qr->quick->reset(); + DBUG_RETURN(0); +} + + +/* + Add a merged quick select to this ROR-intersection quick select. + + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::push_quick_back() + alloc Mem root to create auxiliary structures on + quick Quick select to be added. The quick select must return + rows in rowid order. + NOTES + This call can only be made before init() is called. + + RETURN + FALSE OK + TRUE Out of memory. +*/ + +bool +QUICK_ROR_INTERSECT_SELECT::push_quick_back(MEM_ROOT *local_alloc, + QUICK_RANGE_SELECT *quick) +{ + QUICK_SELECT_WITH_RECORD *qr; + if (!(qr= new QUICK_SELECT_WITH_RECORD) || + !(qr->key_tuple= (uchar*)alloc_root(local_alloc, + quick->max_used_key_length))) + return TRUE; + qr->quick= quick; + return quick_selects.push_back(qr); +} + + +QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() +{ + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT"); + quick_selects.delete_elements(); + delete cpk_quick; + free_root(&alloc,MYF(0)); + if (need_to_fetch_row && head->file->inited != handler::NONE) + head->file->ha_rnd_end(); + DBUG_VOID_RETURN; +} + + +QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, + TABLE *table) + : thd(thd_param), scans_inited(FALSE) +{ + index= MAX_KEY; + head= table; + rowid_length= table->file->ref_length; + record= head->record[0]; + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + thd_param->mem_root= &alloc; +} + + +/* + Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority + queue. + + SYNPOSIS + QUICK_ROR_UNION_SELECT_queue_cmp() + arg Pointer to QUICK_ROR_UNION_SELECT + val1 First merged select + val2 Second merged select +*/ + +C_MODE_START + +static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2) +{ + QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; + return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, + ((QUICK_SELECT_I*)val2)->last_rowid); +} + +C_MODE_END + + +/* + Do post-constructor initialization. + SYNOPSIS + QUICK_ROR_UNION_SELECT::init() + + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_UNION_SELECT::init() +{ + DBUG_ENTER("QUICK_ROR_UNION_SELECT::init"); + if (init_queue(&queue, quick_selects.elements, 0, + FALSE , QUICK_ROR_UNION_SELECT_queue_cmp, + (void*) this, 0, 0)) + { + bzero(&queue, sizeof(QUEUE)); + DBUG_RETURN(1); + } + + if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length))) + DBUG_RETURN(1); + prev_rowid= cur_rowid + head->file->ref_length; + DBUG_RETURN(0); +} + + +/* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_UNION_SELECT::reset() +{ + QUICK_SELECT_I *quick; + int error; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset"); + have_prev_rowid= FALSE; + if (!scans_inited) + { + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (quick->init_ror_merged_scan(FALSE, &alloc)) + DBUG_RETURN(1); + } + scans_inited= TRUE; + } + queue_remove_all(&queue); + /* + Initialize scans for merged quick selects and put all merged quick + selects into the queue. + */ + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (unlikely((error= quick->reset()))) + DBUG_RETURN(error); + if (unlikely((error= quick->get_next()))) + { + if (error == HA_ERR_END_OF_FILE) + continue; + DBUG_RETURN(error); + } + quick->save_last_pos(); + queue_insert(&queue, (uchar*)quick); + } + /* Prepare for ha_rnd_pos calls. */ + if (head->file->inited && unlikely((error= head->file->ha_rnd_end()))) + { + DBUG_PRINT("error", ("ROR index_merge rnd_end call failed")); + DBUG_RETURN(error); + } + if (unlikely((error= head->file->ha_rnd_init(false)))) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(error); + } + + DBUG_RETURN(0); +} + + +bool +QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) +{ + return quick_selects.push_back(quick_sel_range); +} + +QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT() +{ + DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT"); + delete_queue(&queue); + quick_selects.delete_elements(); + if (head->file->inited != handler::NONE) + head->file->ha_rnd_end(); + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + + +QUICK_RANGE::QUICK_RANGE() + :min_key(0),max_key(0),min_length(0),max_length(0), + flag(NO_MIN_RANGE | NO_MAX_RANGE), + min_keypart_map(0), max_keypart_map(0) +{} + +SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc() +{ + type=arg.type; + min_flag=arg.min_flag; + max_flag=arg.max_flag; + maybe_flag=arg.maybe_flag; + maybe_null=arg.maybe_null; + part=arg.part; + field=arg.field; + min_value=arg.min_value; + max_value=arg.max_value; + next_key_part=arg.next_key_part; + max_part_no= arg.max_part_no; + use_count=1; elements=1; + weight=1; + next= 0; + if (next_key_part) + { + next_key_part->increment_use_count(1); + weight += next_key_part->weight; + } +} + + +inline void SEL_ARG::make_root() +{ + left=right= &null_element; + color=BLACK; + next=prev=0; + use_count=0; + elements=1; + weight= 1 + (next_key_part? next_key_part->weight : 0); +} + +SEL_ARG::SEL_ARG(Field *f, const uchar *min_value_arg, + const uchar *max_value_arg) + :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()), + elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg), + max_value((uchar*) max_value_arg), next(0),prev(0), + next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1) +{ + left=right= &null_element; + max_part_no= 1; +} + +SEL_ARG::SEL_ARG(Field *field_,uint8 part_, + uchar *min_value_, uchar *max_value_, + uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_) + :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_), + part(part_),maybe_null(field_->real_maybe_null()), + elements(1),use_count(1), + field(field_), min_value(min_value_), max_value(max_value_), + next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1) +{ + max_part_no= part+1; + left=right= &null_element; +} + + +/* + A number of helper classes: + SEL_ARG_LE, SEL_ARG_LT, SEL_ARG_GT, SEL_ARG_GE, + to share the code between: + Field::stored_field_make_mm_leaf() + Field::stored_field_make_mm_leaf_exact() +*/ +class SEL_ARG_LE: public SEL_ARG +{ +public: + SEL_ARG_LE(const uchar *key, Field *field) + :SEL_ARG(field, key, key) + { + if (!field->real_maybe_null()) + min_flag= NO_MIN_RANGE; // From start + else + { + min_value= is_null_string; + min_flag= NEAR_MIN; // > NULL + } + } +}; + + +class SEL_ARG_LT: public SEL_ARG_LE +{ +public: + /* + Use this constructor if value->save_in_field() went precisely, + without any data rounding or truncation. + */ + SEL_ARG_LT(const uchar *key, const KEY_PART *key_part, Field *field) + :SEL_ARG_LE(key, field) + { + // Don't use open ranges for partial key_segments + if (!(key_part->flag & HA_PART_KEY_SEG)) + max_flag= NEAR_MAX; + } + /* + Use this constructor if value->save_in_field() returned success, + but we don't know if rounding or truncation happened + (as some Field::store() do not report minor data changes). + */ + SEL_ARG_LT(THD *thd, const uchar *key, + const KEY_PART *key_part, Field *field, Item *value) + :SEL_ARG_LE(key, field) + { + // Don't use open ranges for partial key_segments + if (!(key_part->flag & HA_PART_KEY_SEG) && + stored_field_cmp_to_item(thd, field, value) == 0) + max_flag= NEAR_MAX; + } +}; + + +class SEL_ARG_GT: public SEL_ARG +{ +public: + /* + Use this constructor if value->save_in_field() went precisely, + without any data rounding or truncation. + */ + SEL_ARG_GT(const uchar *key, const KEY_PART *key_part, Field *field) + :SEL_ARG(field, key, key) + { + // Don't use open ranges for partial key_segments + if (!(key_part->flag & HA_PART_KEY_SEG)) + min_flag= NEAR_MIN; + max_flag= NO_MAX_RANGE; + } + /* + Use this constructor if value->save_in_field() returned success, + but we don't know if rounding or truncation happened + (as some Field::store() do not report minor data changes). + */ + SEL_ARG_GT(THD *thd, const uchar *key, + const KEY_PART *key_part, Field *field, Item *value) + :SEL_ARG(field, key, key) + { + // Don't use open ranges for partial key_segments + if ((!(key_part->flag & HA_PART_KEY_SEG)) && + (stored_field_cmp_to_item(thd, field, value) <= 0)) + min_flag= NEAR_MIN; + max_flag= NO_MAX_RANGE; + } +}; + + +class SEL_ARG_GE: public SEL_ARG +{ +public: + /* + Use this constructor if value->save_in_field() went precisely, + without any data rounding or truncation. + */ + SEL_ARG_GE(const uchar *key, Field *field) + :SEL_ARG(field, key, key) + { + max_flag= NO_MAX_RANGE; + } + /* + Use this constructor if value->save_in_field() returned success, + but we don't know if rounding or truncation happened + (as some Field::store() do not report minor data changes). + */ + SEL_ARG_GE(THD *thd, const uchar *key, + const KEY_PART *key_part, Field *field, Item *value) + :SEL_ARG(field, key, key) + { + // Don't use open ranges for partial key_segments + if ((!(key_part->flag & HA_PART_KEY_SEG)) && + (stored_field_cmp_to_item(thd, field, value) < 0)) + min_flag= NEAR_MIN; + max_flag= NO_MAX_RANGE; + } +}; + + +SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, + SEL_ARG **next_arg) +{ + SEL_ARG *tmp; + + /* Bail out if we have already generated too many SEL_ARGs */ + if (++param->alloced_sel_args > param->thd->variables.optimizer_max_sel_args) + return 0; + + if (type != KEY_RANGE) + { + if (!(tmp= new (param->mem_root) SEL_ARG(type))) + return 0; // out of memory + tmp->prev= *next_arg; // Link into next/prev chain + (*next_arg)->next=tmp; + (*next_arg)= tmp; + tmp->part= this->part; + } + else + { + if (!(tmp= new (param->mem_root) SEL_ARG(field, part, + min_value, max_value, + min_flag, max_flag, maybe_flag))) + return 0; // OOM + tmp->parent=new_parent; + tmp->next_key_part=next_key_part; + if (left != &null_element) + if (!(tmp->left=left->clone(param, tmp, next_arg))) + return 0; // OOM + + tmp->prev= *next_arg; // Link into next/prev chain + (*next_arg)->next=tmp; + (*next_arg)= tmp; + + if (right != &null_element) + if (!(tmp->right= right->clone(param, tmp, next_arg))) + return 0; // OOM + } + increment_use_count(1); + tmp->color= color; + tmp->elements= this->elements; + tmp->max_part_no= max_part_no; + tmp->weight= weight; + return tmp; +} + +/** + This gives the first SEL_ARG in the interval list, and the minimal element + in the red-black tree + + @return + SEL_ARG first SEL_ARG in the interval list +*/ +SEL_ARG *SEL_ARG::first() +{ + SEL_ARG *next_arg=this; + if (!next_arg->left) + return 0; // MAYBE_KEY + while (next_arg->left != &null_element) + next_arg=next_arg->left; + return next_arg; +} + +const SEL_ARG *SEL_ARG::first() const +{ + return const_cast(this)->first(); +} + +SEL_ARG *SEL_ARG::last() +{ + SEL_ARG *next_arg=this; + if (!next_arg->right) + return 0; // MAYBE_KEY + while (next_arg->right != &null_element) + next_arg=next_arg->right; + return next_arg; +} + + +/* + Check if a compare is ok, when one takes ranges in account + Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2 +*/ + +int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag, + uint8 b_flag) +{ + int cmp; + /* First check if there was a compare to a min or max element */ + if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) + { + if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) == + (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))) + return 0; + return (a_flag & NO_MIN_RANGE) ? -1 : 1; + } + if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) + return (b_flag & NO_MIN_RANGE) ? 1 : -1; + + if (field->real_maybe_null()) // If null is part of key + { + if (*a != *b) + { + return *a ? -1 : 1; + } + if (*a) + goto end; // NULL where equal + a++; b++; // Skip NULL marker + } + cmp=field->key_cmp(a , b); + if (cmp) return cmp < 0 ? -1 : 1; // The values differed + + // Check if the compared equal arguments was defined with open/closed range + end: + if (a_flag & (NEAR_MIN | NEAR_MAX)) + { + if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX))) + return 0; + if (!(b_flag & (NEAR_MIN | NEAR_MAX))) + return (a_flag & NEAR_MIN) ? 2 : -2; + return (a_flag & NEAR_MIN) ? 1 : -1; + } + if (b_flag & (NEAR_MIN | NEAR_MAX)) + return (b_flag & NEAR_MIN) ? -2 : 2; + return 0; // The elements where equal +} + + +SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param) +{ + SEL_ARG tmp_link,*next_arg,*root; + next_arg= &tmp_link; + if (!(root= clone(param, (SEL_ARG *) 0, &next_arg))) + return 0; + next_arg->next=0; // Fix last link + tmp_link.next->prev=0; // Fix first link + if (root) // If not OOM + root->use_count= 0; + return root; +} + + +/* + Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived + objects from table read plans. +*/ +class TABLE_READ_PLAN +{ +public: + /* + Plan read cost, with or without cost of full row retrieval, depending + on plan creation parameters. + */ + double read_cost; + ha_rows records; /* estimate of #rows to be examined */ + + /* + If TRUE, the scan returns rows in rowid order. This is used only for + scans that can be both ROR and non-ROR. + */ + bool is_ror; + + /* + Create quick select for this plan. + SYNOPSIS + make_quick() + param Parameter from test_quick_select + retrieve_full_rows If TRUE, created quick select will do full record + retrieval. + parent_alloc Memory pool to use, if any. + + NOTES + retrieve_full_rows is ignored by some implementations. + + RETURN + created quick select + NULL on any error. + */ + virtual QUICK_SELECT_I *make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc=NULL) = 0; + + /* Table read plans are allocated on MEM_ROOT and are never deleted */ + static void *operator new(size_t size, MEM_ROOT *mem_root) + { return (void*) alloc_root(mem_root, (uint) size); } + static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); } + static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ } + virtual ~TABLE_READ_PLAN() = default; /* Remove gcc warning */ + /** + Add basic info for this TABLE_READ_PLAN to the optimizer trace. + + @param param Parameters for range analysis of this table + @param trace_object The optimizer trace object the info is appended to + */ + virtual void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const= 0; + +}; + +class TRP_ROR_INTERSECT; +class TRP_ROR_UNION; +class TRP_INDEX_MERGE; + + +/* + Plan for a QUICK_RANGE_SELECT scan. + TRP_RANGE::make_quick ignores retrieve_full_rows parameter because + QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full + record retrieval scans. +*/ + +class TRP_RANGE : public TABLE_READ_PLAN +{ +public: + SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */ + uint key_idx; /* key number in PARAM::key */ + uint mrr_flags; + uint mrr_buf_size; + + TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg) + : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg) + {} + virtual ~TRP_RANGE() = default; /* Remove gcc warning */ + + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + { + DBUG_ENTER("TRP_RANGE::make_quick"); + QUICK_RANGE_SELECT *quick; + if ((quick= get_quick_select(param, key_idx, key, mrr_flags, + mrr_buf_size, parent_alloc))) + { + quick->records= records; + quick->read_time= read_cost; + } + DBUG_RETURN(quick); + } + void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const; +}; + +void TRP_RANGE::trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const +{ + DBUG_ASSERT(trace_object->trace_started()); + DBUG_ASSERT(param->using_real_indexes); + const uint keynr_in_table= param->real_keynr[key_idx]; + + const KEY &cur_key= param->table->key_info[keynr_in_table]; + const KEY_PART_INFO *key_part= cur_key.key_part; + + trace_object->add("type", "range_scan") + .add("index", cur_key.name) + .add("rows", records); + + Json_writer_array trace_range(param->thd, "ranges"); + + // TRP_RANGE should not be created if there are no range intervals + DBUG_ASSERT(key); + + trace_ranges(&trace_range, param, key_idx, key, key_part); +} + + +/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */ + +class TRP_ROR_INTERSECT : public TABLE_READ_PLAN +{ +public: + TRP_ROR_INTERSECT() = default; /* Remove gcc warning */ + virtual ~TRP_ROR_INTERSECT() = default; /* Remove gcc warning */ + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + + /* Array of pointers to ROR range scans used in this intersection */ + struct st_ror_scan_info **first_scan; + struct st_ror_scan_info **last_scan; /* End of the above array */ + struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */ + bool is_covering; /* TRUE if no row retrieval phase is necessary */ + double index_scan_costs; /* SUM(cost(index_scan)) */ + void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const; +}; + + + +/* + Plan for QUICK_ROR_UNION_SELECT scan. + QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + +class TRP_ROR_UNION : public TABLE_READ_PLAN +{ +public: + TRP_ROR_UNION() = default; /* Remove gcc warning */ + virtual ~TRP_ROR_UNION() = default; /* Remove gcc warning */ + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */ + TABLE_READ_PLAN **last_ror; /* end of the above array */ + void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const; +}; + +void TRP_ROR_UNION::trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const +{ + THD *thd= param->thd; + DBUG_ASSERT(trace_object->trace_started()); + trace_object->add("type", "index_roworder_union"); + Json_writer_array smth_trace(thd, "union_of"); + for (TABLE_READ_PLAN **current= first_ror; current != last_ror; current++) + { + Json_writer_object trp_info(thd); + (*current)->trace_basic_info(param, &trp_info); + } +} + +/* + Plan for QUICK_INDEX_INTERSECT_SELECT scan. + QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + +class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN +{ +public: + TRP_INDEX_INTERSECT() = default; /* Remove gcc warning */ + virtual ~TRP_INDEX_INTERSECT() = default; /* Remove gcc warning */ + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */ + TRP_RANGE **range_scans_end; /* end of the array */ + /* keys whose scans are to be filtered by cpk conditions */ + key_map filtered_scans; + void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const; + +}; + +void TRP_INDEX_INTERSECT::trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const +{ + THD *thd= param->thd; + DBUG_ASSERT(trace_object->trace_started()); + trace_object->add("type", "index_sort_intersect"); + Json_writer_array smth_trace(thd, "index_sort_intersect_of"); + for (TRP_RANGE **current= range_scans; current != range_scans_end; + current++) + { + Json_writer_object trp_info(thd); + (*current)->trace_basic_info(param, &trp_info); + } +} + +/* + Plan for QUICK_INDEX_MERGE_SELECT scan. + QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + +class TRP_INDEX_MERGE : public TABLE_READ_PLAN +{ +public: + TRP_INDEX_MERGE() = default; /* Remove gcc warning */ + virtual ~TRP_INDEX_MERGE() = default; /* Remove gcc warning */ + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */ + TRP_RANGE **range_scans_end; /* end of the array */ + void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const; +}; + +void TRP_INDEX_MERGE::trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const +{ + THD *thd= param->thd; + DBUG_ASSERT(trace_object->trace_started()); + trace_object->add("type", "index_merge"); + Json_writer_array smth_trace(thd, "index_merge_of"); + for (TRP_RANGE **current= range_scans; current != range_scans_end; current++) + { + Json_writer_object trp_info(thd); + (*current)->trace_basic_info(param, &trp_info); + } +} + +/* + Plan for a QUICK_GROUP_MIN_MAX_SELECT scan. +*/ + +class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN +{ +private: + bool have_min, have_max, have_agg_distinct; + KEY_PART_INFO *min_max_arg_part; + uint group_prefix_len; + uint used_key_parts; + uint group_key_parts; + KEY *index_info; + uint index; + uint key_infix_len; + uchar key_infix[MAX_KEY_LENGTH]; + SEL_TREE *range_tree; /* Represents all range predicates in the query. */ + SEL_ARG *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */ + uint param_idx; /* Index of used key in param->key. */ + bool is_index_scan; /* Use index_next() instead of random read */ +public: + /* Number of records selected by the ranges in index_tree. */ + ha_rows quick_prefix_records; +public: + TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg, + bool have_agg_distinct_arg, + KEY_PART_INFO *min_max_arg_part_arg, + uint group_prefix_len_arg, uint used_key_parts_arg, + uint group_key_parts_arg, KEY *index_info_arg, + uint index_arg, uint key_infix_len_arg, + uchar *key_infix_arg, + SEL_TREE *tree_arg, SEL_ARG *index_tree_arg, + uint param_idx_arg, ha_rows quick_prefix_records_arg) + : have_min(have_min_arg), have_max(have_max_arg), + have_agg_distinct(have_agg_distinct_arg), + min_max_arg_part(min_max_arg_part_arg), + group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg), + group_key_parts(group_key_parts_arg), index_info(index_info_arg), + index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg), + index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE), + quick_prefix_records(quick_prefix_records_arg) + { + if (key_infix_len) + memcpy(this->key_infix, key_infix_arg, key_infix_len); + } + virtual ~TRP_GROUP_MIN_MAX() = default; /* Remove gcc warning */ + + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + void use_index_scan() { is_index_scan= TRUE; } + void trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const; +}; + + +void TRP_GROUP_MIN_MAX::trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const +{ + THD *thd= param->thd; + DBUG_ASSERT(trace_object->trace_started()); + + trace_object->add("type", "index_group").add("index", index_info->name); + + if (min_max_arg_part) + trace_object->add("min_max_arg", min_max_arg_part->field->field_name); + else + trace_object->add_null("min_max_arg"); + + trace_object->add("min_aggregate", have_min) + .add("max_aggregate", have_max) + .add("distinct_aggregate", have_agg_distinct) + .add("rows", records) + .add("cost", read_cost); + + const KEY_PART_INFO *key_part= index_info->key_part; + { + Json_writer_array trace_keyparts(thd, "key_parts_used_for_access"); + for (uint partno= 0; partno < used_key_parts; partno++) + { + const KEY_PART_INFO *cur_key_part= key_part + partno; + trace_keyparts.add(cur_key_part->field->field_name); + } + } + + Json_writer_array trace_range(thd, "ranges"); + + // can have group quick without ranges + if (index_tree) + { + trace_ranges(&trace_range, param, param_idx, + index_tree, key_part); + } +} + + +typedef struct st_index_scan_info +{ + uint idx; /* # of used key in param->keys */ + uint keynr; /* # of used key in table */ + uint range_count; + ha_rows records; /* estimate of # records this scan will return */ + + /* Set of intervals over key fields that will be used for row retrieval. */ + SEL_ARG *sel_arg; + + KEY *key_info; + uint used_key_parts; + + /* Estimate of # records filtered out by intersection with cpk */ + ha_rows filtered_out; + /* Bitmap of fields used in index intersection */ + MY_BITMAP used_fields; + + /* Fields used in the query and covered by ROR scan. */ + MY_BITMAP covered_fields; + uint used_fields_covered; /* # of set bits in covered_fields */ + int key_rec_length; /* length of key record (including rowid) */ + + /* + Cost of reading all index records with values in sel_arg intervals set + (assuming there is no need to access full table records) + */ + double index_read_cost; + uint first_uncovered_field; /* first unused bit in covered_fields */ + uint key_components; /* # of parts in the key */ +} INDEX_SCAN_INFO; + +/* + Fill param->needed_fields with bitmap of fields used in the query. + SYNOPSIS + fill_used_fields_bitmap() + param Parameter from test_quick_select function. + + NOTES + Clustered PK members are not put into the bitmap as they are implicitly + present in all keys (and it is impossible to avoid reading them). + RETURN + 0 Ok + 1 Out of memory. +*/ + +static int fill_used_fields_bitmap(PARAM *param) +{ + TABLE *table= param->table; + my_bitmap_map *tmp; + uint pk; + param->tmp_covered_fields.bitmap= 0; + param->fields_bitmap_size= table->s->column_bitmap_size; + if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root, + param->fields_bitmap_size)) || + my_bitmap_init(¶m->needed_fields, tmp, table->s->fields)) + return 1; + + bitmap_copy(¶m->needed_fields, table->read_set); + bitmap_union(¶m->needed_fields, table->write_set); + + pk= param->table->s->primary_key; + if (param->table->file->pk_is_clustering_key(pk)) + { + /* The table uses clustered PK and it is not internally generated */ + KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[pk].user_defined_key_parts; + for (;key_part != key_part_end; ++key_part) + bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr-1); + } + return 0; +} + + +/* + Test if a key can be used in different ranges + + SYNOPSIS + SQL_SELECT::test_quick_select() + thd Current thread + keys_to_use Keys to use for range retrieval + prev_tables Tables assumed to be already read when the scan is + performed (but not read at the moment of this call) + limit Query limit + force_quick_range Prefer to use range (instead of full table scan) even + if it is more expensive. + remove_false_parts_of_where Remove parts of OR-clauses for which range + analysis produced SEL_TREE(IMPOSSIBLE) + only_single_index_range_scan Evaluate only single index range scans + + NOTES + Updates the following in the select parameter: + needed_reg - Bits for keys with may be used if all prev regs are read + quick - Parameter to use when reading records. + + In the table struct the following information is updated: + quick_keys - Which keys can be used + quick_rows - How many rows the key matches + opt_range_condition_rows - E(# rows that will satisfy the table condition) + + IMPLEMENTATION + opt_range_condition_rows value is obtained as follows: + + It is a minimum of E(#output rows) for all considered table access + methods (range and index_merge accesses over various indexes). + + The obtained value is not a true E(#rows that satisfy table condition) + but rather a pessimistic estimate. To obtain a true E(#...) one would + need to combine estimates of various access methods, taking into account + correlations between sets of rows they will return. + + For example, if values of tbl.key1 and tbl.key2 are independent (a right + assumption if we have no information about their correlation) then the + correct estimate will be: + + E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) = + = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2) + + which is smaller than + + MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2))) + + which is currently produced. + + TODO + * Change the value returned in opt_range_condition_rows from a pessimistic + estimate to true E(#rows that satisfy table condition). + (we can re-use some of E(#rows) calcuation code from + index_merge/intersection for this) + + * Check if this function really needs to modify keys_to_use, and change the + code to pass it by reference if it doesn't. + + * In addition to force_quick_range other means can be (an usually are) used + to make this function prefer range over full table scan. Figure out if + force_quick_range is really needed. + + RETURN + SQL_SELECT:: + IMPOSSIBLE_RANGE, + impossible select (i.e. certainly no rows will be selected) + ERROR, + an error occurred, either memory or in evaluating conditions + OK = 1, + either + found usable ranges and quick select has been successfully created. + or can't use quick_select +*/ + +quick_select_return +SQL_SELECT::test_quick_select(THD *thd, + key_map keys_to_use, + table_map prev_tables, + ha_rows limit, bool force_quick_range, + bool ordered_output, + bool remove_false_parts_of_where, + bool only_single_index_range_scan, + bool suppress_unusable_key_notes) +{ + uint idx; + double scan_time; + Item *notnull_cond= NULL; + TABLE_READ_PLAN *best_trp= NULL; + SEL_ARG **backup_keys= 0; + quick_select_return returnval= OK; + + DBUG_ENTER("SQL_SELECT::test_quick_select"); + DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", + (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, + (ulong) const_tables)); + DBUG_PRINT("info", ("records: %lu", (ulong) head->stat_records())); + delete quick; + quick=0; + needed_reg.clear_all(); + quick_keys.clear_all(); + head->with_impossible_ranges.clear_all(); + DBUG_ASSERT(!head->is_filled_at_execution()); + if (keys_to_use.is_clear_all() || head->is_filled_at_execution()) + DBUG_RETURN(OK); + records= head->stat_records(); + notnull_cond= head->notnull_cond; + if (!records) + records++; /* purecov: inspected */ + if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + only_single_index_range_scan= 1; + + if (head->force_index || force_quick_range) + scan_time= read_time= DBL_MAX; + else + { + scan_time= rows2double(records) / TIME_FOR_COMPARE; + /* + The 2 is there to prefer range scans to full table scans. + This is mainly to make the test suite happy as many tests has + very few rows. In real life tables has more than a few rows and the + +2 has no practical effect. + */ + read_time= (double) head->file->scan_time() + scan_time + 2; + if (limit < records && read_time < (double) records + scan_time + 1 ) + { + read_time= (double) records + scan_time + 1; // Force to use index + notnull_cond= NULL; + } + } + + possible_keys.clear_all(); + + DBUG_PRINT("info",("Time to scan table: %g", read_time)); + + Json_writer_object table_records(thd); + table_records.add_table_name(head); + + Json_writer_object trace_range(thd, "range_analysis"); + { + Json_writer_object table_rec(thd, "table_scan"); + table_rec.add("rows", records).add("cost", read_time); + } + + keys_to_use.intersect(head->keys_in_use_for_query); + if (!keys_to_use.is_clear_all()) + { + uchar buff[STACK_BUFF_ALLOC]; + MEM_ROOT alloc; + SEL_TREE *tree= NULL; + SEL_TREE *notnull_cond_tree= NULL; + KEY_PART *key_parts; + KEY *key_info; + PARAM param; + bool force_group_by = false; + + if (check_stack_overrun(thd, 2*STACK_MIN_SIZE + sizeof(PARAM), buff)) + DBUG_RETURN(ERROR); // Fatal error flag is set + + /* set up parameter that is passed to all functions */ + bzero((void*) ¶m, sizeof(param)); + param.thd= thd; + param.baseflag= head->file->ha_table_flags(); + param.prev_tables=prev_tables | const_tables; + param.read_tables=read_tables; + param.current_table= head->map; + param.table=head; + param.keys=0; + param.mem_root= &alloc; + param.old_root= thd->mem_root; + param.needed_reg= &needed_reg; + param.imerge_cost_buff_size= 0; + param.using_real_indexes= TRUE; + param.remove_jump_scans= TRUE; + param.max_key_parts= 0; + param.remove_false_where_parts= remove_false_parts_of_where; + param.force_default_mrr= ordered_output; + param.note_unusable_keys= (!suppress_unusable_key_notes && + thd->give_notes_for_unusable_keys()); + + param.possible_keys.clear_all(); + + thd->no_errors=1; // Don't warn about NULL + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + if (!(param.key_parts= + (KEY_PART*) alloc_root(&alloc, + sizeof(KEY_PART) * + head->s->actual_n_key_parts(thd))) || + fill_used_fields_bitmap(¶m)) + { + thd->no_errors=0; + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(ERROR); + } + key_parts= param.key_parts; + + /* + Make an array with description of all key parts of all table keys. + This is used in get_mm_parts function. + */ + key_info= head->key_info; + uint max_key_len= 0; + + Json_writer_array trace_idx(thd, "potential_range_indexes"); + + for (idx=0 ; idx < head->s->keys ; idx++, key_info++) + { + Json_writer_object trace_idx_details(thd); + trace_idx_details.add("index", key_info->name); + KEY_PART_INFO *key_part_info; + uint n_key_parts= head->actual_n_key_parts(key_info); + + if (!keys_to_use.is_set(idx)) + { + trace_idx_details.add("usable", false) + .add("cause", "not applicable"); + continue; + } + if (key_info->flags & HA_FULLTEXT) + { + trace_idx_details.add("usable", false).add("cause", "fulltext"); + continue; // ToDo: ft-keys in non-ft ranges, if possible SerG + } + trace_idx_details.add("usable", true); + param.key[param.keys]=key_parts; + key_part_info= key_info->key_part; + uint cur_key_len= 0; + Json_writer_array trace_keypart(thd, "key_parts"); + for (uint part= 0 ; part < n_key_parts ; + part++, key_parts++, key_part_info++) + { + key_parts->key= param.keys; + key_parts->part= part; + key_parts->length= key_part_info->length; + key_parts->store_length= key_part_info->store_length; + cur_key_len += key_part_info->store_length; + key_parts->field= key_part_info->field; + key_parts->null_bit= key_part_info->null_bit; + key_parts->image_type = + (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW; + /* Only HA_PART_KEY_SEG is used */ + key_parts->flag= (uint8) key_part_info->key_part_flag; + trace_keypart.add(key_parts->field->field_name); + } + trace_keypart.end(); + param.real_keynr[param.keys++]=idx; + if (cur_key_len > max_key_len) + max_key_len= cur_key_len; + } + trace_idx.end(); + + param.key_parts_end=key_parts; + param.alloced_sel_args= 0; + + max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */ + if (!(param.min_key= (uchar*)alloc_root(&alloc,max_key_len)) || + !(param.max_key= (uchar*)alloc_root(&alloc,max_key_len))) + { + thd->no_errors=0; + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(ERROR); + } + + thd->mem_root= &alloc; + /* Calculate cost of full index read for the shortest covering index */ + if (!force_quick_range && !head->covering_keys.is_clear_all()) + { + int key_for_use= find_shortest_key(head, &head->covering_keys); + double key_read_time= (head->file->key_scan_time(key_for_use) + + rows2double(records) / TIME_FOR_COMPARE); + DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " + "read time %g", key_for_use, key_read_time)); + + Json_writer_object trace_cov(thd, "best_covering_index_scan"); + bool chosen= FALSE; + if (key_read_time < read_time) + { + read_time= key_read_time; + chosen= TRUE; + } + trace_cov.add("index", head->key_info[key_for_use].name) + .add("cost", key_read_time).add("chosen", chosen); + if (!chosen) + trace_cov.add("cause", "cost"); + } + + double best_read_time= read_time; + + if (notnull_cond) + notnull_cond_tree= notnull_cond->get_mm_tree(¶m, ¬null_cond); + + if (cond || notnull_cond_tree) + { + { + Json_writer_array trace_range_summary(thd, + "setup_range_conditions"); + if (cond) + tree= cond->get_mm_tree(¶m, &cond); + if (notnull_cond_tree) + tree= tree_and(¶m, tree, notnull_cond_tree); + if (thd->trace_started() && + param.alloced_sel_args >= thd->variables.optimizer_max_sel_args) + { + Json_writer_object wrapper(thd); + Json_writer_object obj(thd, "sel_arg_alloc_limit_hit"); + obj.add("alloced_sel_args", param.alloced_sel_args); + } + } + if (tree) + { + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + records=0L; + returnval= IMPOSSIBLE_RANGE; + read_time= (double) HA_POS_ERROR; + trace_range.add("impossible_range", true); + goto free_mem; + } + /* + If the tree can't be used for range scans, proceed anyway, as we + can construct a group-min-max quick select + */ + if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) + { + trace_range.add("range_scan_possible", false); + tree= NULL; + } + } + else if (thd->is_error()) + { + thd->no_errors=0; + thd->mem_root= param.old_root; + free_root(&alloc, MYF(0)); + DBUG_RETURN(ERROR); + } + } + + if (tree) + { + /* + It is possible to use a range-based quick select (but it might be + slower than 'all' table scan). + */ + TRP_ROR_INTERSECT *rori_trp; + TRP_INDEX_INTERSECT *intersect_trp; + bool can_build_covering= FALSE; + Json_writer_object trace_range(thd, "analyzing_range_alternatives"); + + backup_keys= (SEL_ARG**) alloca(sizeof(backup_keys[0])*param.keys); + memcpy(&backup_keys[0], &tree->keys[0], + sizeof(backup_keys[0])*param.keys); + + remove_nonrange_trees(¶m, tree); + + /* Get best 'range' plan and prepare data for making other plans */ + if (auto range_trp= get_key_scans_params(¶m, tree, + only_single_index_range_scan, + true, best_read_time)) + { + best_trp= range_trp; + best_read_time= best_trp->read_cost; + } + + /* + Simultaneous key scans and row deletes on several handler + objects are not allowed so don't use ROR-intersection for + table deletes. + */ + if ((thd->lex->sql_command != SQLCOM_DELETE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) && + !only_single_index_range_scan) + { + /* + Get best non-covering ROR-intersection plan and prepare data for + building covering ROR-intersection. + */ + if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time, + &can_build_covering))) + { + best_trp= rori_trp; + best_read_time= best_trp->read_cost; + /* + Try constructing covering ROR-intersect only if it looks possible + and worth doing. + */ + if (!rori_trp->is_covering && can_build_covering && + (rori_trp= get_best_covering_ror_intersect(¶m, tree, + best_read_time))) + best_trp= rori_trp; + } + } + /* + Do not look for an index intersection plan if there is a covering + index. The scan by this covering index will be always cheaper than + any index intersection. + */ + if (param.table->covering_keys.is_clear_all() && + optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT) && + !only_single_index_range_scan) + { + if ((intersect_trp= get_best_index_intersect(¶m, tree, + best_read_time))) + { + best_trp= intersect_trp; + best_read_time= best_trp->read_cost; + set_if_smaller(param.table->opt_range_condition_rows, + intersect_trp->records); + } + } + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) && + head->stat_records() != 0 && !only_single_index_range_scan) + { + /* Try creating index_merge/ROR-union scan. */ + SEL_IMERGE *imerge; + TABLE_READ_PLAN *best_conj_trp= NULL, + *UNINIT_VAR(new_conj_trp); /* no empty index_merge lists possible */ + DBUG_PRINT("info",("No range reads possible," + " trying to construct index_merge")); + List_iterator_fast it(tree->merges); + Json_writer_array trace_idx_merge(thd, "analyzing_index_merge_union"); + while ((imerge= it++)) + { + new_conj_trp= get_best_disjunct_quick(¶m, imerge, best_read_time); + if (new_conj_trp) + set_if_smaller(param.table->opt_range_condition_rows, + new_conj_trp->records); + if (new_conj_trp && + (!best_conj_trp || + new_conj_trp->read_cost < best_conj_trp->read_cost)) + { + best_conj_trp= new_conj_trp; + best_read_time= best_conj_trp->read_cost; + } + } + if (best_conj_trp) + best_trp= best_conj_trp; + } + } + + /* + Try to construct a QUICK_GROUP_MIN_MAX_SELECT. + Notice that it can be constructed no matter if there is a range tree. + */ + DBUG_EXECUTE_IF("force_group_by", force_group_by = true; ); + if (!only_single_index_range_scan) + { + TRP_GROUP_MIN_MAX *group_trp; + if (tree) + restore_nonrange_trees(¶m, tree, backup_keys); + if ((group_trp= get_best_group_min_max(¶m, tree, read_time))) + { + set_if_smaller(param.table->opt_range_condition_rows, + group_trp->records); + Json_writer_object grp_summary(thd, "best_group_range_summary"); + + if (unlikely(thd->trace_started())) + group_trp->trace_basic_info(¶m, &grp_summary); + + if (group_trp->read_cost < best_read_time || force_group_by) + { + grp_summary.add("chosen", true); + best_trp= group_trp; + best_read_time= best_trp->read_cost; + } + else + grp_summary.add("chosen", false).add("cause", "cost"); + } + if (tree) + remove_nonrange_trees(¶m, tree); + } + + thd->mem_root= param.old_root; + + /* If we got a read plan, create a quick select from it. */ + if (best_trp) + { + records= best_trp->records; + if (!(quick= best_trp->make_quick(¶m, TRUE)) || quick->init()) + { + delete quick; + quick= NULL; + } + if (quick && records) + returnval= OK; + } + possible_keys= param.possible_keys; + + if (!records) + returnval= IMPOSSIBLE_RANGE; + + free_mem: + if (unlikely(quick && best_trp && thd->trace_started())) + { + Json_writer_object trace_range_summary(thd, + "chosen_range_access_summary"); + { + Json_writer_object trace_range_plan(thd, "range_access_plan"); + best_trp->trace_basic_info(¶m, &trace_range_plan); + } + trace_range_summary.add("rows_for_plan", quick->records) + .add("cost_for_plan", quick->read_time) + .add("chosen", true); + } + + free_root(&alloc,MYF(0)); // Return memory & allocator + thd->mem_root= param.old_root; + thd->no_errors=0; + } + + DBUG_EXECUTE("info", print_quick(quick, &needed_reg);); + + /* + Assume that if the user is using 'limit' we will only need to scan + limit rows if we are using a key + */ + DBUG_RETURN(returnval); +} + +/**************************************************************************** + * Condition selectivity module + ****************************************************************************/ + + +/* + Build descriptors of pseudo-indexes over columns to perform range analysis + + SYNOPSIS + create_key_parts_for_pseudo_indexes() + param IN/OUT data structure for the descriptors to be built + used_fields bitmap of columns for which the descriptors are to be built + + DESCRIPTION + For each column marked in the bitmap used_fields the function builds + a descriptor of a single-component pseudo-index over this column that + can be used for the range analysis of the predicates over this columns. + The descriptors are created in the memory of param->mem_root. + + RETURN + FALSE in the case of success + TRUE otherwise +*/ + +static +bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, + MY_BITMAP *used_fields) +{ + Field **field_ptr; + TABLE *table= param->table; + uint parts= 0; + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *field= *field_ptr; + if (bitmap_is_set(used_fields, field->field_index) && + is_eits_usable(field)) + parts++; + } + + KEY_PART *key_part; + uint keys= 0; + + if (!parts) + return TRUE; + + if (!(key_part= (KEY_PART *) alloc_root(param->mem_root, + sizeof(KEY_PART) * parts))) + return TRUE; + + param->key_parts= key_part; + uint max_key_len= 0; + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *field= *field_ptr; + if (bitmap_is_set(used_fields, field->field_index)) + { + if (!is_eits_usable(field)) + continue; + + uint16 store_length; + uint16 max_key_part_length= (uint16) table->file->max_key_part_length(); + key_part->key= keys; + key_part->part= 0; + if (field->flags & BLOB_FLAG) + key_part->length= max_key_part_length; + else + { + key_part->length= (uint16) field->key_length(); + set_if_smaller(key_part->length, max_key_part_length); + } + store_length= key_part->length; + if (field->real_maybe_null()) + store_length+= HA_KEY_NULL_LENGTH; + if (field->real_type() == MYSQL_TYPE_VARCHAR) + store_length+= HA_KEY_BLOB_LENGTH; + if (max_key_len < store_length) + max_key_len= store_length; + key_part->store_length= store_length; + key_part->field= field; + key_part->image_type= Field::itRAW; + key_part->flag= 0; + param->key[keys]= key_part; + keys++; + key_part++; + } + } + + max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */ + if (!(param->min_key= (uchar*)alloc_root(param->mem_root, max_key_len)) || + !(param->max_key= (uchar*)alloc_root(param->mem_root, max_key_len))) + { + return true; + } + param->keys= keys; + param->key_parts_end= key_part; + + return FALSE; +} + + +/* + Estimate the number of rows in all ranges built for a column + by the range optimizer + + SYNOPSIS + records_in_column_ranges() + param the data structure to access descriptors of pseudo indexes + built over columns used in the condition of the processed query + idx the index of the descriptor of interest in param + tree the tree representing ranges built for the interesting column + + DESCRIPTION + This function retrieves the ranges represented by the SEL_ARG 'tree' and + for each of them r it calls the function get_column_range_cardinality() + that estimates the number of expected rows in r. It is assumed that param + is the data structure containing the descriptors of pseudo-indexes that + has been built to perform range analysis of the range conditions imposed + on the columns used in the processed query, while idx is the index of the + descriptor created in 'param' exactly for the column for which 'tree' + has been built by the range optimizer. + + RETURN + the number of rows in the retrieved ranges +*/ + +static +double records_in_column_ranges(PARAM *param, uint idx, + SEL_ARG *tree) +{ + THD *thd= param->thd; + SEL_ARG_RANGE_SEQ seq; + KEY_MULTI_RANGE range; + range_seq_t seq_it; + double rows; + Field *field; + uint flags= 0; + double total_rows= 0; + RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, + sel_arg_range_seq_next, 0, 0}; + + /* Handle cases when we don't have a valid non-empty list of range */ + if (!tree) + return DBL_MAX; + if (tree->type == SEL_ARG::IMPOSSIBLE) + return (0L); + + field= tree->field; + + seq.keyno= idx; + seq.real_keyno= MAX_KEY; + seq.key_parts= param->key[idx]; + seq.param= param; + seq.start= tree; + seq.is_ror_scan= FALSE; + + seq_it= seq_if.init((void *) &seq, 0, flags); + + Json_writer_array range_trace(thd, "ranges"); + + while (!seq_if.next(seq_it, &range)) + { + key_range *min_endp, *max_endp; + min_endp= range.start_key.length? &range.start_key : NULL; + max_endp= range.end_key.length? &range.end_key : NULL; + int range_flag= range.range_flag; + + if (!range.start_key.length) + range_flag |= NO_MIN_RANGE; + if (!range.end_key.length) + range_flag |= NO_MAX_RANGE; + if (range.start_key.flag == HA_READ_AFTER_KEY) + range_flag |= NEAR_MIN; + if (range.start_key.flag == HA_READ_BEFORE_KEY) + range_flag |= NEAR_MAX; + + if (unlikely(thd->trace_started())) + { + StringBuffer<128> range_info(system_charset_info); + print_range_for_non_indexed_field(&range_info, field, &range); + range_trace.add(range_info.c_ptr_safe(), range_info.length()); + } + + rows= get_column_range_cardinality(field, min_endp, max_endp, range_flag); + if (DBL_MAX == rows) + { + total_rows= DBL_MAX; + break; + } + total_rows += rows; + } + if (total_rows == 0) + total_rows= MY_MIN(1, rows2double(param->table->stat_records())); + + return total_rows; +} + + +/* + Compare quick select ranges according to number of found rows + If there is equal amounts of rows, use the long key part. + The idea is that if we have keys (a),(a,b) and (a,b,c) and we have + a query like WHERE a=1 and b=1 and c=1, + it is better to use key (a,b,c) than (a) as it will ensure we don't also + use histograms for columns b and c +*/ + +static +int cmp_quick_ranges(TABLE *table, uint *a, uint *b) +{ + int tmp= CMP_NUM(table->opt_range[*a].rows, table->opt_range[*b].rows); + if (tmp) + return tmp; + return -CMP_NUM(table->opt_range[*a].key_parts, table->opt_range[*b].key_parts); +} + + +/* + Calculate the selectivity of the condition imposed on the rows of a table + + SYNOPSIS + calculate_cond_selectivity_for_table() + thd the context handle + table the table of interest + cond conditions imposed on the rows of the table + + DESCRIPTION + This function calculates the selectivity of range conditions cond imposed + on the rows of 'table' in the processed query. + The calculated selectivity is assigned to the field table->cond_selectivity. + + Selectivity is calculated as a product of selectivities imposed by: + + 1. possible range accesses. (if multiple range accesses use the same + restrictions on the same field, we make adjustments for that) + 2. Sargable conditions on fields for which we have column statistics (if + a field is used in a possible range access, we assume that selectivity + is already provided by the range access' estimates) + 3. Reading a few records from the table pages and checking the condition + selectivity (this is used for conditions like "column LIKE '%val%'" + where approaches #1 and #2 do not provide selectivity data). + + NOTE + Currently the selectivities of range conditions over different columns are + considered independent. + + RETURN + FALSE on success + TRUE otherwise +*/ + +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) +{ + uint keynr, range_index, ranges; + MY_BITMAP *used_fields= &table->cond_set; + double table_records= (double)table->stat_records(); + uint optimal_key_order[MAX_KEY]; + DBUG_ENTER("calculate_cond_selectivity_for_table"); + + table->cond_selectivity= 1.0; + + if (table_records == 0) + DBUG_RETURN(FALSE); + + QUICK_SELECT_I *quick; + if ((quick=table->reginfo.join_tab->quick) && + quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + table->cond_selectivity*= (quick->records/table_records); + DBUG_RETURN(FALSE); + } + + if (!*cond) + DBUG_RETURN(FALSE); + + if (table->pos_in_table_list->schema_table) + DBUG_RETURN(FALSE); + + MY_BITMAP handled_columns; + my_bitmap_map* buf; + if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size))) + DBUG_RETURN(TRUE); + my_bitmap_init(&handled_columns, buf, table->s->fields); + + /* + Calculate the selectivity of the range conditions supported by indexes. + + First, take into account possible range accesses. + range access estimates are the most precise, we prefer them to any other + estimate sources. + */ + + Json_writer_object trace_wrapper(thd); + Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes"); + + /* + Walk through all quick ranges in the order of least found rows. + */ + for (ranges= keynr= 0 ; keynr < table->s->keys; keynr++) + if (table->opt_range_keys.is_set(keynr)) + optimal_key_order[ranges++]= keynr; + + my_qsort2(optimal_key_order, ranges, + sizeof(optimal_key_order[0]), + (qsort2_cmp) cmp_quick_ranges, table); + + for (range_index= 0 ; range_index < ranges ; range_index++) + { + uint keynr= optimal_key_order[range_index]; + { + { + uint i; + uint used_key_parts= table->opt_range[keynr].key_parts; + double quick_cond_selectivity= (table->opt_range[keynr].rows / + table_records); + KEY *key_info= table->key_info + keynr; + KEY_PART_INFO* key_part= key_info->key_part; + /* + Suppose, there are range conditions on two keys + KEY1 (col1, col2) + KEY2 (col3, col2) + + we don't want to count selectivity of condition on col2 twice. + + First, find the longest key prefix that's made of columns whose + selectivity wasn't already accounted for. + */ + for (i= 0; i < used_key_parts; i++, key_part++) + { + if (bitmap_is_set(&handled_columns, key_part->fieldnr-1)) + break; + bitmap_set_bit(&handled_columns, key_part->fieldnr-1); + } + if (i) + { + double UNINIT_VAR(selectivity_mult); + + /* + There is at least 1-column prefix of columns whose selectivity has + not yet been accounted for. + */ + table->cond_selectivity*= quick_cond_selectivity; + Json_writer_object selectivity_for_index(thd); + selectivity_for_index.add("index_name", key_info->name) + .add("selectivity_from_index", + quick_cond_selectivity); + if (i != used_key_parts) + { + /* + Range access got us estimate for #used_key_parts. + We need estimate for #(i-1) key parts. + */ + double f1= key_info->actual_rec_per_key(i-1); + double f2= key_info->actual_rec_per_key(i); + if (f1 > 0 && f2 > 0) + selectivity_mult= f1 / f2; + else + { + /* + No statistics available, assume the selectivity is proportional + to the number of key parts. + (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1) + */ + selectivity_mult= ((double)(i+1)) / i; + } + table->cond_selectivity*= selectivity_mult; + selectivity_for_index.add("selectivity_multiplier", + selectivity_mult); + } + /* + We need to set selectivity for fields supported by indexes. + For single-component indexes and for some first components + of other indexes we do it here. For the remaining fields + we do it later in this function, in the same way as for the + fields not used in any indexes. + */ + if (i == 1) + { + uint fieldnr= key_info->key_part[0].fieldnr; + table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity; + if (i != used_key_parts) + table->field[fieldnr-1]->cond_selectivity*= selectivity_mult; + bitmap_clear_bit(used_fields, fieldnr-1); + } + } + } + } + } + selectivity_for_indexes.end(); + + /* + Second step: calculate the selectivity of the range conditions not + supported by any index and selectivity of the range condition + over the fields whose selectivity has not been set yet. + */ + Json_writer_array selectivity_for_columns(thd, "selectivity_for_columns"); + + if (thd->variables.optimizer_use_condition_selectivity > 2 && + !bitmap_is_clear_all(used_fields) && + thd->variables.use_stat_tables > 0 && table->stats_is_read) + { + PARAM param; + MEM_ROOT alloc; + SEL_TREE *tree; + double rows; + + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + bzero((void*) ¶m, sizeof(param)); + param.thd= thd; + param.mem_root= &alloc; + param.old_root= thd->mem_root; + param.table= table; + param.remove_false_where_parts= true; + + if (create_key_parts_for_pseudo_indexes(¶m, used_fields)) + goto free_alloc; + + param.prev_tables= param.read_tables= 0; + param.current_table= table->map; + param.using_real_indexes= FALSE; + param.real_keynr[0]= 0; + param.alloced_sel_args= 0; + param.max_key_parts= 0; + + thd->no_errors=1; + + tree= cond[0]->get_mm_tree(¶m, cond); + + if (!tree) + goto free_alloc; + + table->reginfo.impossible_range= 0; + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + rows= 0; + table->reginfo.impossible_range= 1; + goto free_alloc; + } + else if (tree->type == SEL_TREE::ALWAYS) + { + rows= table_records; + goto free_alloc; + } + else if (tree->type == SEL_TREE::MAYBE) + { + rows= table_records; + goto free_alloc; + } + + for (uint idx= 0; idx < param.keys; idx++) + { + SEL_ARG *key= tree->keys[idx]; + if (key) + { + Json_writer_object selectivity_for_column(thd); + selectivity_for_column.add("column_name", key->field->field_name); + if (key->type == SEL_ARG::IMPOSSIBLE) + { + rows= 0; + table->reginfo.impossible_range= 1; + selectivity_for_column.add("selectivity_from_histogram", rows); + selectivity_for_column.add("cause", "impossible range"); + goto free_alloc; + } + else + { + enum_check_fields save_count_cuted_fields= thd->count_cuted_fields; + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + rows= records_in_column_ranges(¶m, idx, key); + thd->count_cuted_fields= save_count_cuted_fields; + if (rows != DBL_MAX) + { + key->field->cond_selectivity= rows/table_records; + selectivity_for_column.add("selectivity_from_histogram", + key->field->cond_selectivity); + } + } + } + } + + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *table_field= *field_ptr; + if (bitmap_is_set(used_fields, table_field->field_index) && + table_field->cond_selectivity < 1.0) + { + if (!bitmap_is_set(&handled_columns, table_field->field_index)) + table->cond_selectivity*= table_field->cond_selectivity; + } + } + + free_alloc: + thd->no_errors= 0; + thd->mem_root= param.old_root; + free_root(&alloc, MYF(0)); + + } + selectivity_for_columns.end(); + + if (quick && (quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)) + { + table->cond_selectivity*= (quick->records/table_records); + } + + bitmap_union(used_fields, &handled_columns); + + /* Check if we can improve selectivity estimates by using sampling */ + ulong check_rows= + MY_MIN(thd->variables.optimizer_selectivity_sampling_limit, + (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE)); + if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && + thd->variables.optimizer_use_condition_selectivity > 4) + { + find_selective_predicates_list_processor_data *dt= + (find_selective_predicates_list_processor_data *) + alloc_root(thd->mem_root, + sizeof(find_selective_predicates_list_processor_data)); + if (!dt) + DBUG_RETURN(TRUE); + dt->list.empty(); + dt->table= table; + if ((*cond)->walk(&Item::find_selective_predicates_list_processor, 0, dt)) + DBUG_RETURN(TRUE); + if (dt->list.elements > 0) + { + check_rows= check_selectivity(thd, check_rows, table, &dt->list); + if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD) + { + COND_STATISTIC *stat; + List_iterator_fast it(dt->list); + double examined_rows= check_rows; + while ((stat= it++)) + { + if (!stat->positive) + { + DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter")); + stat->positive= 1; // avoid 0 + } + DBUG_PRINT("info", ("The predicate selectivity : %g", + (double)stat->positive / examined_rows)); + double selectivity= ((double)stat->positive) / examined_rows; + table->cond_selectivity*= selectivity; + /* + If a field is involved then we register its selectivity in case + there in an equality with the field. + For example in case + t1.a LIKE "%bla%" and t1.a = t2.b + the selectivity we have found could be used also for t2. + */ + if (stat->field_arg) + { + stat->field_arg->cond_selectivity*= selectivity; + + if (stat->field_arg->next_equal_field) + { + for (Field *next_field= stat->field_arg->next_equal_field; + next_field != stat->field_arg; + next_field= next_field->next_equal_field) + { + next_field->cond_selectivity*= selectivity; + next_field->table->cond_selectivity*= selectivity; + } + } + } + } + + } + /* This list and its elements put to mem_root so should not be freed */ + table->cond_selectivity_sampling_explain= &dt->list; + } + } + trace_wrapper.add("cond_selectivity", table->cond_selectivity); + DBUG_RETURN(FALSE); +} + +/**************************************************************************** + * Condition selectivity code ends + ****************************************************************************/ + +/**************************************************************************** + * Partition pruning module + ****************************************************************************/ + +/* + Store field key image to table record + + SYNOPSIS + store_key_image_to_rec() + field Field which key image should be stored + ptr Field value in key format + len Length of the value, in bytes + + ATTENTION + len is the length of the value not counting the NULL-byte (at the same + time, ptr points to the key image, which starts with NULL-byte for + nullable columns) + + DESCRIPTION + Copy the field value from its key image to the table record. The source + is the value in key image format, occupying len bytes in buffer pointed + by ptr. The destination is table record, in "field value in table record" + format. +*/ + +void store_key_image_to_rec(Field *field, uchar *ptr, uint len) +{ + /* Do the same as print_key() does */ + + if (field->real_maybe_null()) + { + if (*ptr) + { + field->set_null(); + return; + } + field->set_notnull(); + ptr++; + } + MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table, + &field->table->write_set); + field->set_key_image(ptr, len); + dbug_tmp_restore_column_map(&field->table->write_set, old_map); +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE + +/* + PartitionPruningModule + + This part of the code does partition pruning. Partition pruning solves the + following problem: given a query over partitioned tables, find partitions + that we will not need to access (i.e. partitions that we can assume to be + empty) when executing the query. + The set of partitions to prune doesn't depend on which query execution + plan will be used to execute the query. + + HOW IT WORKS + + Partition pruning module makes use of RangeAnalysisModule. The following + examples show how the problem of partition pruning can be reduced to the + range analysis problem: + + EXAMPLE 1 + Consider a query: + + SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z' + + where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent + way to find the used (i.e. not pruned away) partitions is as follows: + + 1. analyze the WHERE clause and extract the list of intervals over t1.a + for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)} + + 2. for each interval I + { + find partitions that have non-empty intersection with I; + mark them as used; + } + + EXAMPLE 2 + Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then + we need to: + + 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b). + The list of intervals we'll obtain will look like this: + ((t1.a, t1.b) = (1,'foo')), + ((t1.a, t1.b) = (2,'bar')), + ((t1,a, t1.b) > (10,'zz')) + + 2. for each interval I + { + if (the interval has form "(t1.a, t1.b) = (const1, const2)" ) + { + calculate HASH(part_func(t1.a, t1.b)); + find which partition has records with this hash value and mark + it as used; + } + else + { + mark all partitions as used; + break; + } + } + + For both examples the step #1 is exactly what RangeAnalysisModule could + be used to do, if it was provided with appropriate index description + (array of KEY_PART structures). + In example #1, we need to provide it with description of index(t1.a), + in example #2, we need to provide it with description of index(t1.a, t1.b). + + These index descriptions are further called "partitioning index + descriptions". Note that it doesn't matter if such indexes really exist, + as range analysis module only uses the description. + + Putting it all together, partitioning module works as follows: + + prune_partitions() { + call create_partition_index_description(); + + call get_mm_tree(); // invoke the RangeAnalysisModule + + // analyze the obtained interval list and get used partitions + call find_used_partitions(); + } + +*/ + +struct st_part_prune_param; +struct st_part_opt_info; + +typedef void (*mark_full_part_func)(partition_info*, uint32); + +/* + Partition pruning operation context +*/ +typedef struct st_part_prune_param +{ + RANGE_OPT_PARAM range_param; /* Range analyzer parameters */ + + /*************************************************************** + Following fields are filled in based solely on partitioning + definition and not modified after that: + **************************************************************/ + partition_info *part_info; /* Copy of table->part_info */ + /* Function to get partition id from partitioning fields only */ + get_part_id_func get_top_partition_id_func; + /* Function to mark a partition as used (w/all subpartitions if they exist)*/ + mark_full_part_func mark_full_partition_used; + + /* Partitioning 'index' description, array of key parts */ + KEY_PART *key; + + /* + Number of fields in partitioning 'index' definition created for + partitioning (0 if partitioning 'index' doesn't include partitioning + fields) + */ + uint part_fields; + uint subpart_fields; /* Same as above for subpartitioning */ + + /* + Number of the last partitioning field keypart in the index, or -1 if + partitioning index definition doesn't include partitioning fields. + */ + int last_part_partno; + int last_subpart_partno; /* Same as above for supartitioning */ + + /* + is_part_keypart[i] == MY_TEST(keypart #i in partitioning index is a member + used in partitioning) + Used to maintain current values of cur_part_fields and cur_subpart_fields + */ + my_bool *is_part_keypart; + /* Same as above for subpartitioning */ + my_bool *is_subpart_keypart; + + my_bool ignore_part_fields; /* Ignore rest of partioning fields */ + + /*************************************************************** + Following fields form find_used_partitions() recursion context: + **************************************************************/ + SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */ + SEL_ARG **arg_stack_end; /* Top of the stack */ + /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */ + uint cur_part_fields; + /* Same as cur_part_fields, but for subpartitioning */ + uint cur_subpart_fields; + + /* Iterator to be used to obtain the "current" set of used partitions */ + PARTITION_ITERATOR part_iter; + + /* Initialized bitmap of num_subparts size */ + MY_BITMAP subparts_bitmap; + + uchar *cur_min_key; + uchar *cur_max_key; + + uint cur_min_flag, cur_max_flag; +} PART_PRUNE_PARAM; + +static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par); +static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); +static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, + SEL_IMERGE *imerge); +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List &merges); +static void mark_all_partitions_as_used(partition_info *part_info); + +#ifndef DBUG_OFF +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end); +static void dbug_print_field(Field *field); +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part); +static void dbug_print_singlepoint_range(SEL_ARG **start, uint num); +#endif + + +/** + Perform partition pruning for a given table and condition. + + @param thd Thread handle + @param table Table to perform partition pruning for + @param pprune_cond Condition to use for partition pruning + + @note This function assumes that lock_partitions are setup when it + is invoked. The function analyzes the condition, finds partitions that + need to be used to retrieve the records that match the condition, and + marks them as used by setting appropriate bit in part_info->read_partitions + In the worst case all partitions are marked as used. If the table is not + yet locked, it will also unset bits in part_info->lock_partitions that is + not set in read_partitions. + + This function returns promptly if called for non-partitioned table. + + @return Operation status + @retval true Failure + @retval false Success +*/ + +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) +{ + bool retval= FALSE; + partition_info *part_info = table->part_info; + DBUG_ENTER("prune_partitions"); + + if (!part_info) + DBUG_RETURN(FALSE); /* not a partitioned table */ + + if (!pprune_cond) + { + mark_all_partitions_as_used(part_info); + DBUG_RETURN(FALSE); + } + + PART_PRUNE_PARAM prune_param; + MEM_ROOT alloc; + RANGE_OPT_PARAM *range_par= &prune_param.range_param; + MY_BITMAP *old_sets[2]; + + prune_param.part_info= part_info; + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + bzero((void*) range_par, sizeof(*range_par)); + range_par->mem_root= &alloc; + range_par->old_root= thd->mem_root; + + if (create_partition_index_description(&prune_param)) + { + mark_all_partitions_as_used(part_info); + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(FALSE); + } + + dbug_tmp_use_all_columns(table, old_sets, + &table->read_set, &table->write_set); + range_par->thd= thd; + range_par->table= table; + /* range_par->cond doesn't need initialization */ + range_par->prev_tables= range_par->read_tables= 0; + range_par->current_table= table->map; + /* It should be possible to switch the following ON: */ + range_par->remove_false_where_parts= false; + + range_par->keys= 1; // one index + range_par->using_real_indexes= FALSE; + range_par->remove_jump_scans= FALSE; + range_par->real_keynr[0]= 0; + range_par->alloced_sel_args= 0; + range_par->note_unusable_keys= 0; + + thd->no_errors=1; // Don't warn about NULL + thd->mem_root=&alloc; + + bitmap_clear_all(&part_info->read_partitions); + + prune_param.key= prune_param.range_param.key_parts; + SEL_TREE *tree; + int res; + + tree= pprune_cond->get_mm_tree(range_par, &pprune_cond); + if (!tree) + goto all_used; + + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + retval= TRUE; + goto end; + } + + if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) + goto all_used; + + if (tree->merges.is_empty()) + { + /* Range analysis has produced a single list of intervals. */ + prune_param.arg_stack_end= prune_param.arg_stack; + prune_param.cur_part_fields= 0; + prune_param.cur_subpart_fields= 0; + + prune_param.cur_min_key= prune_param.range_param.min_key; + prune_param.cur_max_key= prune_param.range_param.max_key; + prune_param.cur_min_flag= prune_param.cur_max_flag= 0; + + init_all_partitions_iterator(part_info, &prune_param.part_iter); + if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param, + tree->keys[0])))) + goto all_used; + } + else + { + if (tree->merges.elements == 1) + { + /* + Range analysis has produced a "merge" of several intervals lists, a + SEL_TREE that represents an expression in form + sel_imerge = (tree1 OR tree2 OR ... OR treeN) + that cannot be reduced to one tree. This can only happen when + partitioning index has several keyparts and the condition is OR of + conditions that refer to different key parts. For example, we'll get + here for "partitioning_field=const1 OR subpartitioning_field=const2" + */ + if (-1 == (res= find_used_partitions_imerge(&prune_param, + tree->merges.head()))) + goto all_used; + } + else + { + /* + Range analysis has produced a list of several imerges, i.e. a + structure that represents a condition in form + imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN) + This is produced for complicated WHERE clauses that range analyzer + can't really analyze properly. + */ + if (-1 == (res= find_used_partitions_imerge_list(&prune_param, + tree->merges))) + goto all_used; + } + } + + /* + res == 0 => no used partitions => retval=TRUE + res == 1 => some used partitions => retval=FALSE + res == -1 - we jump over this line to all_used: + */ + retval= MY_TEST(!res); + goto end; + +all_used: + retval= FALSE; // some partitions are used + mark_all_partitions_as_used(prune_param.part_info); +end: + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); + thd->no_errors=0; + thd->mem_root= range_par->old_root; + free_root(&alloc,MYF(0)); // Return memory & allocator + /* + Must be a subset of the locked partitions. + lock_partitions contains the partitions marked by explicit partition + selection (... t PARTITION (pX) ...) and we must only use partitions + within that set. + */ + bitmap_intersect(&prune_param.part_info->read_partitions, + &prune_param.part_info->lock_partitions); + /* + If not yet locked, also prune partitions to lock if not UPDATEing + partition key fields. This will also prune lock_partitions if we are under + LOCK TABLES, so prune away calls to start_stmt(). + TODO: enhance this prune locking to also allow pruning of + 'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds + a lock for part_key partition. + */ + if (table->file->get_lock_type() == F_UNLCK && + !partition_key_modified(table, table->write_set)) + { + bitmap_copy(&prune_param.part_info->lock_partitions, + &prune_param.part_info->read_partitions); + } + if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions))) + { + table->all_partitions_pruned_away= true; + retval= TRUE; + } + DBUG_RETURN(retval); +} + + +/* + For SEL_ARG* array, store sel_arg->min values into table record buffer + + SYNOPSIS + store_selargs_to_rec() + ppar Partition pruning context + start Array of SEL_ARG* for which the minimum values should be stored + num Number of elements in the array + + DESCRIPTION + For each SEL_ARG* interval in the specified array, store the left edge + field value (sel_arg->min, key image format) into the table record. +*/ + +static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start, + int num) +{ + KEY_PART *parts= ppar->range_param.key_parts; + for (SEL_ARG **end= start + num; start != end; start++) + { + SEL_ARG *sel_arg= (*start); + store_key_image_to_rec(sel_arg->field, sel_arg->min_value, + parts[sel_arg->part].length); + } +} + + +/* Mark a partition as used in the case when there are no subpartitions */ +static void mark_full_partition_used_no_parts(partition_info* part_info, + uint32 part_id) +{ + DBUG_ENTER("mark_full_partition_used_no_parts"); + DBUG_PRINT("enter", ("Mark partition %u as used", part_id)); + bitmap_set_bit(&part_info->read_partitions, part_id); + DBUG_VOID_RETURN; +} + + +/* Mark a partition as used in the case when there are subpartitions */ +static void mark_full_partition_used_with_parts(partition_info *part_info, + uint32 part_id) +{ + uint32 start= part_id * part_info->num_subparts; + uint32 end= start + part_info->num_subparts; + DBUG_ENTER("mark_full_partition_used_with_parts"); + + for (; start != end; start++) + { + DBUG_PRINT("info", ("1:Mark subpartition %u as used", start)); + bitmap_set_bit(&part_info->read_partitions, start); + } + DBUG_VOID_RETURN; +} + +/* + Find the set of used partitions for List + SYNOPSIS + find_used_partitions_imerge_list + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + List represents "imerge1 AND imerge2 AND ...". + The set of used partitions is an intersection of used partitions sets + for imerge_{i}. + We accumulate this intersection in a separate bitmap. + + RETURN + See find_used_partitions() +*/ + +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List &merges) +{ + MY_BITMAP all_merges; + uint bitmap_bytes; + my_bitmap_map *bitmap_buf; + uint n_bits= ppar->part_info->read_partitions.n_bits; + bitmap_bytes= bitmap_buffer_size(n_bits); + if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root, + bitmap_bytes))) + { + /* + Fallback, process just the first SEL_IMERGE. This can leave us with more + partitions marked as used then actually needed. + */ + return find_used_partitions_imerge(ppar, merges.head()); + } + my_bitmap_init(&all_merges, bitmap_buf, n_bits); + bitmap_set_prefix(&all_merges, n_bits); + + List_iterator it(merges); + SEL_IMERGE *imerge; + while ((imerge=it++)) + { + int res= find_used_partitions_imerge(ppar, imerge); + if (!res) + { + /* no used partitions on one ANDed imerge => no used partitions at all */ + return 0; + } + + if (res != -1) + bitmap_intersect(&all_merges, &ppar->part_info->read_partitions); + + + if (bitmap_is_clear_all(&all_merges)) + return 0; + + bitmap_clear_all(&ppar->part_info->read_partitions); + } + memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap, + bitmap_bytes); + return 1; +} + + +/* + Find the set of used partitions for SEL_IMERGE structure + SYNOPSIS + find_used_partitions_imerge() + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is + trivial - just use mark used partitions for each tree and bail out early + if for some tree_{i} all partitions are used. + + RETURN + See find_used_partitions(). +*/ + +static +int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge) +{ + int res= 0; + for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) + { + ppar->arg_stack_end= ppar->arg_stack; + ppar->cur_part_fields= 0; + ppar->cur_subpart_fields= 0; + + ppar->cur_min_key= ppar->range_param.min_key; + ppar->cur_max_key= ppar->range_param.max_key; + ppar->cur_min_flag= ppar->cur_max_flag= 0; + + init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); + SEL_ARG *key_tree= (*ptree)->keys[0]; + if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree)))) + return -1; + } + return res; +} + + +/* + Collect partitioning ranges for the SEL_ARG tree and mark partitions as used + + SYNOPSIS + find_used_partitions() + ppar Partition pruning context. + key_tree SEL_ARG range tree to perform pruning for + + DESCRIPTION + This function + * recursively walks the SEL_ARG* tree collecting partitioning "intervals" + * finds the partitions one needs to use to get rows in these intervals + * marks these partitions as used. + The next session desribes the process in greater detail. + + IMPLEMENTATION + TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR + We can find out which [sub]partitions to use if we obtain restrictions on + [sub]partitioning fields in the following form: + 1. "partition_field1=const1 AND ... AND partition_fieldN=constN" + 1.1 Same as (1) but for subpartition fields + + If partitioning supports interval analysis (i.e. partitioning is a + function of a single table field, and partition_info:: + get_part_iter_for_interval != NULL), then we can also use condition in + this form: + 2. "const1 <=? partition_field <=? const2" + 2.1 Same as (2) but for subpartition_field + + INFERRING THE RESTRICTIONS FROM SEL_ARG TREE + + The below is an example of what SEL_ARG tree may represent: + + (start) + | $ + | Partitioning keyparts $ subpartitioning keyparts + | $ + | ... ... $ + | | | $ + | +---------+ +---------+ $ +-----------+ +-----------+ + \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5| + +---------+ +---------+ $ +-----------+ +-----------+ + | $ | | + | $ | +-----------+ + | $ | | subpar2=c6| + | $ | +-----------+ + | $ | + | $ +-----------+ +-----------+ + | $ | subpar1=c4|--| subpar2=c8| + | $ +-----------+ +-----------+ + | $ + | $ + +---------+ $ +------------+ +------------+ + | par1=c2 |------------------| subpar1=c10|--| subpar2=c12| + +---------+ $ +------------+ +------------+ + | $ + ... $ + + The up-down connections are connections via SEL_ARG::left and + SEL_ARG::right. A horizontal connection to the right is the + SEL_ARG::next_key_part connection. + + find_used_partitions() traverses the entire tree via recursion on + * SEL_ARG::next_key_part (from left to right on the picture) + * SEL_ARG::left|right (up/down on the pic). Left-right recursion is + performed for each depth level. + + Recursion descent on SEL_ARG::next_key_part is used to accumulate (in + ppar->arg_stack) constraints on partitioning and subpartitioning fields. + For the example in the above picture, one of stack states is: + in find_used_partitions(key_tree = "subpar2=c5") (***) + in find_used_partitions(key_tree = "subpar1=c3") + in find_used_partitions(key_tree = "par2=c2") (**) + in find_used_partitions(key_tree = "par1=c1") + in prune_partitions(...) + We apply partitioning limits as soon as possible, e.g. when we reach the + depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2", + and save them in ppar->part_iter. + When we reach the depth (***), we find which subpartition(s) correspond to + "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in + appropriate subpartitions as used. + + It is possible that constraints on some partitioning fields are missing. + For the above example, consider this stack state: + in find_used_partitions(key_tree = "subpar2=c12") (***) + in find_used_partitions(key_tree = "subpar1=c10") + in find_used_partitions(key_tree = "par1=c2") + in prune_partitions(...) + Here we don't have constraints for all partitioning fields. Since we've + never set the ppar->part_iter to contain used set of partitions, we use + its default "all partitions" value. We get subpartition id for + "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every + partition. + + The inverse is also possible: we may get constraints on partitioning + fields, but not constraints on subpartitioning fields. In that case, + calls to find_used_partitions() with depth below (**) will return -1, + and we will mark entire partition as used. + + TODO + Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop + + RETURN + 1 OK, one or more [sub]partitions are marked as used. + 0 The passed condition doesn't match any partitions + -1 Couldn't infer any partition pruning "intervals" from the passed + SEL_ARG* tree (which means that all partitions should be marked as + used) Marking partitions as used is the responsibility of the caller. +*/ + +static +int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) +{ + int res, left_res=0, right_res=0; + int key_tree_part= (int)key_tree->part; + bool set_full_part_if_bad_ret= FALSE; + bool ignore_part_fields= ppar->ignore_part_fields; + bool did_set_ignore_part_fields= FALSE; + RANGE_OPT_PARAM *range_par= &(ppar->range_param); + + if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL)) + return -1; + + if (key_tree->left != &null_element) + { + if (-1 == (left_res= find_used_partitions(ppar,key_tree->left))) + return -1; + } + + /* Push SEL_ARG's to stack to enable looking backwards as well */ + ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part]; + ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part]; + *(ppar->arg_stack_end++)= key_tree; + + if (ignore_part_fields) + { + /* + We come here when a condition on the first partitioning + fields led to evaluating the partitioning condition + (due to finding a condition of the type a < const or + b > const). Thus we must ignore the rest of the + partitioning fields but we still want to analyse the + subpartitioning fields. + */ + if (key_tree->next_key_part) + res= find_used_partitions(ppar, key_tree->next_key_part); + else + res= -1; + goto pop_and_go_right; + } + + if (key_tree->type == SEL_ARG::KEY_RANGE) + { + if (ppar->part_info->get_part_iter_for_interval && + key_tree->part <= ppar->last_part_partno) + { + /* Collect left and right bound, their lengths and flags */ + uchar *min_key= ppar->cur_min_key; + uchar *max_key= ppar->cur_max_key; + uchar *tmp_min_key= min_key; + uchar *tmp_max_key= max_key; + key_tree->store_min(ppar->key[key_tree->part].store_length, + &tmp_min_key, ppar->cur_min_flag); + key_tree->store_max(ppar->key[key_tree->part].store_length, + &tmp_max_key, ppar->cur_max_flag); + uint flag; + if (key_tree->next_key_part && + key_tree->next_key_part->part == key_tree->part+1 && + key_tree->next_key_part->part <= ppar->last_part_partno && + key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) + { + /* + There are more key parts for partition pruning to handle + This mainly happens when the condition is an equality + condition. + */ + if ((tmp_min_key - min_key) == (tmp_max_key - max_key) && + (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) && + !key_tree->min_flag && !key_tree->max_flag) + { + /* Set 'parameters' */ + ppar->cur_min_key= tmp_min_key; + ppar->cur_max_key= tmp_max_key; + uint save_min_flag= ppar->cur_min_flag; + uint save_max_flag= ppar->cur_max_flag; + + ppar->cur_min_flag|= key_tree->min_flag; + ppar->cur_max_flag|= key_tree->max_flag; + + res= find_used_partitions(ppar, key_tree->next_key_part); + + /* Restore 'parameters' back */ + ppar->cur_min_key= min_key; + ppar->cur_max_key= max_key; + + ppar->cur_min_flag= save_min_flag; + ppar->cur_max_flag= save_max_flag; + goto pop_and_go_right; + } + /* We have arrived at the last field in the partition pruning */ + uint tmp_min_flag= key_tree->min_flag, + tmp_max_flag= key_tree->max_flag; + if (!tmp_min_flag) + key_tree->next_key_part->store_min_key(ppar->key, + &tmp_min_key, + &tmp_min_flag, + ppar->last_part_partno, + true); + if (!tmp_max_flag) + key_tree->next_key_part->store_max_key(ppar->key, + &tmp_max_key, + &tmp_max_flag, + ppar->last_part_partno, + false); + flag= tmp_min_flag | tmp_max_flag; + } + else + flag= key_tree->min_flag | key_tree->max_flag; + + if (tmp_min_key != range_par->min_key) + flag&= ~NO_MIN_RANGE; + else + flag|= NO_MIN_RANGE; + if (tmp_max_key != range_par->max_key) + flag&= ~NO_MAX_RANGE; + else + flag|= NO_MAX_RANGE; + + /* + We need to call the interval mapper if we have a condition which + makes sense to prune on. In the example of COLUMNS on a and + b it makes sense if we have a condition on a, or conditions on + both a and b. If we only have conditions on b it might make sense + but this is a harder case we will solve later. For the harder case + this clause then turns into use of all partitions and thus we + simply set res= -1 as if the mapper had returned that. + TODO: What to do here is defined in WL#4065. + */ + if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION) + { + uint32 i; + uint32 store_length_array[MAX_KEY]; + uint32 num_keys= ppar->part_fields; + + for (i= 0; i < num_keys; i++) + store_length_array[i]= ppar->key[i].store_length; + res= ppar->part_info-> + get_part_iter_for_interval(ppar->part_info, + FALSE, + store_length_array, + range_par->min_key, + range_par->max_key, + (uint)(tmp_min_key - range_par->min_key), + (uint)(tmp_max_key - range_par->max_key), + flag, + &ppar->part_iter); + if (!res) + goto pop_and_go_right; /* res==0 --> no satisfying partitions */ + } + else + res= -1; + + if (res == -1) + { + /* get a full range iterator */ + init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); + } + /* + Save our intent to mark full partition as used if we will not be able + to obtain further limits on subpartitions + */ + if (key_tree_part < ppar->last_part_partno) + { + /* + We need to ignore the rest of the partitioning fields in all + evaluations after this + */ + did_set_ignore_part_fields= TRUE; + ppar->ignore_part_fields= TRUE; + } + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (key_tree_part == ppar->last_subpart_partno && + (NULL != ppar->part_info->get_subpart_iter_for_interval)) + { + PARTITION_ITERATOR subpart_iter; + DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, + range_par->key_parts);); + res= ppar->part_info-> + get_subpart_iter_for_interval(ppar->part_info, + TRUE, + NULL, /* Currently not used here */ + key_tree->min_value, + key_tree->max_value, + 0, 0, /* Those are ignored here */ + key_tree->min_flag | + key_tree->max_flag, + &subpart_iter); + if (res == 0) + { + /* + The only case where we can get "no satisfying subpartitions" + returned from the above call is when an error has occurred. + */ + DBUG_ASSERT(range_par->thd->is_error()); + return 0; + } + + if (res == -1) + goto pop_and_go_right; /* all subpartitions satisfy */ + + uint32 subpart_id; + bitmap_clear_all(&ppar->subparts_bitmap); + while ((subpart_id= subpart_iter.get_next(&subpart_iter)) != + NOT_A_PARTITION_ID) + bitmap_set_bit(&ppar->subparts_bitmap, subpart_id); + + /* Mark each partition as used in each subpartition. */ + uint32 part_id; + while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != + NOT_A_PARTITION_ID) + { + for (uint i= 0; i < ppar->part_info->num_subparts; i++) + if (bitmap_is_set(&ppar->subparts_bitmap, i)) + bitmap_set_bit(&ppar->part_info->read_partitions, + part_id * ppar->part_info->num_subparts + i); + } + goto pop_and_go_right; + } + + if (key_tree->is_singlepoint()) + { + if (key_tree_part == ppar->last_part_partno && + ppar->cur_part_fields == ppar->part_fields && + ppar->part_info->get_part_iter_for_interval == NULL) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields); + DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack, + ppar->part_fields);); + uint32 part_id; + longlong func_value; + /* Find in which partition the {const1, ...,constN} tuple goes */ + if (ppar->get_top_partition_id_func(ppar->part_info, &part_id, + &func_value)) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + /* Rembember the limit we got - single partition #part_id */ + init_single_partition_iterator(part_id, &ppar->part_iter); + + /* + If there are no subpartitions/we fail to get any limit for them, + then we'll mark full partition as used. + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (key_tree_part == ppar->last_subpart_partno && + ppar->cur_subpart_fields == ppar->subpart_fields) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields, + ppar->subpart_fields); + DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end- + ppar->subpart_fields, + ppar->subpart_fields);); + /* Find the subpartition (it's HASH/KEY so we always have one) */ + partition_info *part_info= ppar->part_info; + uint32 part_id, subpart_id; + + if (part_info->get_subpartition_id(part_info, &subpart_id)) + return 0; + + /* Mark this partition as used in each subpartition. */ + while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != + NOT_A_PARTITION_ID) + { + bitmap_set_bit(&part_info->read_partitions, + part_id * part_info->num_subparts + subpart_id); + } + res= 1; /* Some partitions were marked as used */ + goto pop_and_go_right; + } + } + else + { + /* + Can't handle condition on current key part. If we're that deep that + we're processing subpartititoning's key parts, this means we'll not be + able to infer any suitable condition, so bail out. + */ + if (key_tree_part >= ppar->last_part_partno) + { + res= -1; + goto pop_and_go_right; + } + /* + No meaning in continuing with rest of partitioning key parts. + Will try to continue with subpartitioning key parts. + */ + ppar->ignore_part_fields= true; + did_set_ignore_part_fields= true; + goto process_next_key_part; + } + } + +process_next_key_part: + if (key_tree->next_key_part) + res= find_used_partitions(ppar, key_tree->next_key_part); + else + res= -1; + + if (did_set_ignore_part_fields) + { + /* + We have returned from processing all key trees linked to our next + key part. We are ready to be moving down (using right pointers) and + this tree is a new evaluation requiring its own decision on whether + to ignore partitioning fields. + */ + ppar->ignore_part_fields= FALSE; + } + if (set_full_part_if_bad_ret) + { + if (res == -1) + { + /* Got "full range" for subpartitioning fields */ + uint32 part_id; + bool found= FALSE; + while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != + NOT_A_PARTITION_ID) + { + ppar->mark_full_partition_used(ppar->part_info, part_id); + found= TRUE; + } + res= MY_TEST(found); + } + /* + Restore the "used partitions iterator" to the default setting that + specifies iteration over all partitions. + */ + init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); + } + +pop_and_go_right: + /* Pop this key part info off the "stack" */ + ppar->arg_stack_end--; + ppar->cur_part_fields-= ppar->is_part_keypart[key_tree_part]; + ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part]; + + if (res == -1) + return -1; + if (key_tree->right != &null_element) + { + if (-1 == (right_res= find_used_partitions(ppar,key_tree->right))) + return -1; + } + return (left_res || right_res || res); +} + + +static void mark_all_partitions_as_used(partition_info *part_info) +{ + bitmap_copy(&(part_info->read_partitions), + &(part_info->lock_partitions)); +} + + +/* + Check if field types allow to construct partitioning index description + + SYNOPSIS + fields_ok_for_partition_index() + pfield NULL-terminated array of pointers to fields. + + DESCRIPTION + For an array of fields, check if we can use all of the fields to create + partitioning index description. + + We can't process GEOMETRY fields - for these fields singlepoint intervals + cant be generated, and non-singlepoint are "special" kinds of intervals + to which our processing logic can't be applied. + + It is not known if we could process ENUM fields, so they are disabled to be + on the safe side. + + RETURN + TRUE Yes, fields can be used in partitioning index + FALSE Otherwise +*/ + +static bool fields_ok_for_partition_index(Field **pfield) +{ + if (!pfield) + return FALSE; + for (; (*pfield); pfield++) + { + enum_field_types ftype= (*pfield)->real_type(); + if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY) + return FALSE; + } + return TRUE; +} + + +/* + Create partition index description and fill related info in the context + struct + + SYNOPSIS + create_partition_index_description() + prune_par INOUT Partition pruning context + + DESCRIPTION + Create partition index description. Partition index description is: + + part_index(used_fields_list(part_expr), used_fields_list(subpart_expr)) + + If partitioning/sub-partitioning uses BLOB or Geometry fields, then + corresponding fields_list(...) is not included into index description + and we don't perform partition pruning for partitions/subpartitions. + + RETURN + TRUE Out of memory or can't do partition pruning at all + FALSE OK +*/ + +static bool create_partition_index_description(PART_PRUNE_PARAM *ppar) +{ + RANGE_OPT_PARAM *range_par= &(ppar->range_param); + partition_info *part_info= ppar->part_info; + uint used_part_fields, used_subpart_fields; + + used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ? + part_info->num_part_fields : 0; + used_subpart_fields= + fields_ok_for_partition_index(part_info->subpart_field_array)? + part_info->num_subpart_fields : 0; + + uint total_parts= used_part_fields + used_subpart_fields; + + ppar->ignore_part_fields= FALSE; + ppar->part_fields= used_part_fields; + ppar->last_part_partno= (int)used_part_fields - 1; + + ppar->subpart_fields= used_subpart_fields; + ppar->last_subpart_partno= + used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1; + + if (part_info->is_sub_partitioned()) + { + ppar->mark_full_partition_used= mark_full_partition_used_with_parts; + ppar->get_top_partition_id_func= part_info->get_part_partition_id; + } + else + { + ppar->mark_full_partition_used= mark_full_partition_used_no_parts; + ppar->get_top_partition_id_func= part_info->get_partition_id; + } + + KEY_PART *key_part; + MEM_ROOT *alloc= range_par->mem_root; + if (!total_parts || + !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* + total_parts)) || + !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* + total_parts)) || + !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts)) || + !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts))) + return TRUE; + + if (ppar->subpart_fields) + { + my_bitmap_map *buf; + uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts); + if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize))) + return TRUE; + my_bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts); + } + range_par->key_parts= key_part; + Field **field= (ppar->part_fields)? part_info->part_field_array : + part_info->subpart_field_array; + bool in_subpart_fields= FALSE; + uint total_key_len= 0; + for (uint part= 0; part < total_parts; part++, key_part++) + { + key_part->key= 0; + key_part->part= part; + key_part->length= (uint16)(*field)->key_length(); + key_part->store_length= (uint16)get_partition_field_store_length(*field); + total_key_len += key_part->store_length; + + DBUG_PRINT("info", ("part %u length %u store_length %u", part, + key_part->length, key_part->store_length)); + + key_part->field= (*field); + key_part->image_type = Field::itRAW; + /* + We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked + in the RangeAnalysisModule. + */ + key_part->flag= 0; + /* We don't set key_parts->null_bit as it will not be used */ + + ppar->is_part_keypart[part]= !in_subpart_fields; + ppar->is_subpart_keypart[part]= in_subpart_fields; + + /* + Check if this was last field in this array, in this case we + switch to subpartitioning fields. (This will only happens if + there are subpartitioning fields to cater for). + */ + if (!*(++field)) + { + field= part_info->subpart_field_array; + in_subpart_fields= TRUE; + } + } + range_par->key_parts_end= key_part; + + total_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */ + if (!(range_par->min_key= (uchar*)alloc_root(alloc,total_key_len)) || + !(range_par->max_key= (uchar*)alloc_root(alloc,total_key_len))) + { + return true; + } + + DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts, + range_par->key_parts_end);); + return FALSE; +} + + +#ifndef DBUG_OFF + +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) +{ + DBUG_ENTER("print_partitioning_index"); + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, "partitioning INDEX("); + for (KEY_PART *p=parts; p != parts_end; p++) + { + fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name.str); + } + fputs(");\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + +/* Print field value into debug trace, in NULL-aware way. */ +static void dbug_print_field(Field *field) +{ + if (field->is_real_null()) + fprintf(DBUG_FILE, "NULL"); + else + { + char buf[256]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); + String *pstr; + pstr= field->val_str(&str); + fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); + } +} + + +/* Print a "c1 < keypartX < c2" - type interval into debug trace. */ +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) +{ + DBUG_ENTER("dbug_print_segment_range"); + DBUG_LOCK_FILE; + if (!(arg->min_flag & NO_MIN_RANGE)) + { + store_key_image_to_rec(part->field, arg->min_value, part->length); + dbug_print_field(part->field); + if (arg->min_flag & NEAR_MIN) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + } + + fprintf(DBUG_FILE, "%s", part->field->field_name.str); + + if (!(arg->max_flag & NO_MAX_RANGE)) + { + if (arg->max_flag & NEAR_MAX) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + store_key_image_to_rec(part->field, arg->max_value, part->length); + dbug_print_field(part->field); + } + fputs("\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + + +/* + Print a singlepoint multi-keypart range interval to debug trace + + SYNOPSIS + dbug_print_singlepoint_range() + start Array of SEL_ARG* ptrs representing conditions on key parts + num Number of elements in the array. + + DESCRIPTION + This function prints a "keypartN=constN AND ... AND keypartK=constK"-type + interval to debug trace. +*/ + +static void dbug_print_singlepoint_range(SEL_ARG **start, uint num) +{ + DBUG_ENTER("dbug_print_singlepoint_range"); + DBUG_LOCK_FILE; + SEL_ARG **end= start + num; + + for (SEL_ARG **arg= start; arg != end; arg++) + { + Field *field= (*arg)->field; + fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name.str); + dbug_print_field(field); + } + fputs("\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} +#endif + +/**************************************************************************** + * Partition pruning code ends + ****************************************************************************/ +#endif + + +/* + Get cost of 'sweep' full records retrieval. + SYNOPSIS + get_sweep_read_cost() + param Parameter from test_quick_select + records # of records to be retrieved + RETURN + cost of sweep +*/ + +double get_sweep_read_cost(const PARAM *param, ha_rows records) +{ + double result; + uint pk= param->table->s->primary_key; + DBUG_ENTER("get_sweep_read_cost"); + if (param->table->file->pk_is_clustering_key(pk) || + param->table->file->stats.block_size == 0 /* HEAP */) + { + /* + We are using the primary key to find the rows. + Calculate the cost for this. + */ + result= param->table->file->read_time(pk, (uint)records, records); + } + else + { + /* + Rows will be retreived with rnd_pos(). Caluclate the expected + cost for this. + */ + double n_blocks= + ceil(ulonglong2double(param->table->file->stats.data_file_length) / + IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + DBUG_PRINT("info",("sweep: nblocks: %g, busy_blocks: %g", n_blocks, + busy_blocks)); + /* + Disabled: Bail out if # of blocks to read is bigger than # of blocks in + table data file. + if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks) + return 1; + */ + JOIN *join= param->thd->lex->first_select_lex()->join; + if (!join || join->table_count == 1) + { + /* No join, assume reading is done in one 'sweep' */ + result= busy_blocks*(DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + else + { + /* + Possibly this is a join with source table being non-last table, so + assume that disk seeks are random here. + */ + result= busy_blocks; + } + } + DBUG_PRINT("return",("cost: %g", result)); + DBUG_RETURN(result); +} + + +/* + Get best plan for a SEL_IMERGE disjunctive expression. + SYNOPSIS + get_best_disjunct_quick() + param Parameter from check_quick_select function + imerge Expression to use + read_time Don't create scans with cost > read_time + + NOTES + index_merge cost is calculated as follows: + index_merge_cost = + cost(index_reads) + (see #1) + cost(rowid_to_row_scan) + (see #2) + cost(unique_use) (see #3) + + 1. cost(index_reads) =SUM_i(cost(index_read_i)) + For non-CPK scans, + cost(index_read_i) = {cost of ordinary 'index only' scan} + For CPK scan, + cost(index_read_i) = {cost of non-'index only' scan} + + 2. cost(rowid_to_row_scan) + If table PK is clustered then + cost(rowid_to_row_scan) = + {cost of ordinary clustered PK scan with n_ranges=n_rows} + + Otherwise, we use the following model to calculate costs: + We need to retrieve n_rows rows from file that occupies n_blocks blocks. + We assume that offsets of rows we need are independent variates with + uniform distribution in [0..max_file_offset] range. + + We'll denote block as "busy" if it contains row(s) we need to retrieve + and "empty" if doesn't contain rows we need. + + Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this + applies to any block in file). Let x_i be a variate taking value 1 if + block #i is empty and 0 otherwise. + + Then E(x_i) = (1 - 1/n_blocks)^n_rows; + + E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) = + = n_blocks * ((1 - 1/n_blocks)^n_rows) = + ~= n_blocks * exp(-n_rows/n_blocks). + + E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) = + ~= n_blocks * (1 - exp(-n_rows/n_blocks)). + + Average size of "hole" between neighbor non-empty blocks is + E(hole_size) = n_blocks/E(n_busy_blocks). + + The total cost of reading all needed blocks in one "sweep" is: + + E(n_busy_blocks)* + (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)). + + 3. Cost of Unique use is calculated in Unique::get_use_cost function. + + ROR-union cost is calculated in the same way index_merge, but instead of + Unique a priority queue is used. + + RETURN + Created read plan + NULL - Out of memory or no read scan could be built. +*/ + +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time, bool named_trace) +{ + SEL_TREE **ptree; + TRP_INDEX_MERGE *imerge_trp= NULL; + TRP_RANGE **range_scans; + TRP_RANGE **cur_child; + TRP_RANGE **cpk_scan= NULL; + bool imerge_too_expensive= FALSE; + double imerge_cost= 0.0; + ha_rows cpk_scan_records= 0; + ha_rows non_cpk_scan_records= 0; + bool all_scans_ror_able= TRUE; + bool all_scans_rors= TRUE; + uint unique_calc_buff_size; + TABLE_READ_PLAN **roru_read_plans; + TABLE_READ_PLAN **cur_roru_plan; + double roru_index_costs; + ha_rows roru_total_records; + double roru_intersect_part= 1.0; + size_t n_child_scans; + double limit_read_time= read_time; + THD *thd= param->thd; + DBUG_ENTER("get_best_disjunct_quick"); + DBUG_PRINT("info", ("Full table scan cost: %g", read_time)); + + /* + In every tree of imerge remove SEL_ARG trees that do not make ranges. + If after this removal some SEL_ARG tree becomes empty discard imerge. + */ + for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++) + { + if (remove_nonrange_trees(param, *ptree)) + { + imerge->trees_next= imerge->trees; + break; + } + } + + n_child_scans= imerge->trees_next - imerge->trees; + + if (!n_child_scans) + DBUG_RETURN(NULL); + + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, + sizeof(TRP_RANGE*)* + n_child_scans))) + DBUG_RETURN(NULL); + + const char* trace_best_disjunct_obj_name= named_trace ? "best_disjunct_quick" : nullptr; + Json_writer_object trace_best_disjunct(thd, trace_best_disjunct_obj_name); + Json_writer_array to_merge(thd, "indexes_to_merge"); + /* + Collect best 'range' scan for each of disjuncts, and, while doing so, + analyze possibility of ROR scans. Also calculate some values needed by + other parts of the code. + */ + for (ptree= imerge->trees, cur_child= range_scans; + ptree != imerge->trees_next; + ptree++, cur_child++) + { + DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map, + "tree in SEL_IMERGE");); + Json_writer_object trace_idx(thd); + if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, + read_time))) + { + /* + One of index scans in this index_merge is more expensive than entire + table read for another available option. The entire index_merge (and + any possible ROR-union) will be more expensive then, too. We continue + here only to update SQL_SELECT members. + */ + imerge_too_expensive= TRUE; + } + if (imerge_too_expensive) + { + trace_idx.add("chosen", false).add("cause", "cost"); + continue; + } + const uint keynr_in_table= param->real_keynr[(*cur_child)->key_idx]; + imerge_cost += (*cur_child)->read_cost; + all_scans_ror_able &= ((*ptree)->n_ror_scans > 0); + all_scans_rors &= (*cur_child)->is_ror; + if (param->table->file->is_clustering_key(param->real_keynr[(*cur_child)->key_idx])) + { + cpk_scan= cur_child; + cpk_scan_records= (*cur_child)->records; + } + else + non_cpk_scan_records += (*cur_child)->records; + trace_idx.add("index_to_merge", + param->table->key_info[keynr_in_table].name) + .add("cumulated_cost", imerge_cost); + } + + to_merge.end(); + + DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost)); + trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost); + + if (imerge_too_expensive || (imerge_cost > read_time) || + ((non_cpk_scan_records+cpk_scan_records >= + param->table->stat_records()) && + read_time != DBL_MAX)) + { + /* + Bail out if it is obvious that both index_merge and ROR-union will be + more expensive + */ + DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than " + "full table scan, bailing out")); + trace_best_disjunct.add("chosen", false).add("cause", "cost"); + DBUG_RETURN(NULL); + } + + /* + If all scans happen to be ROR, proceed to generate a ROR-union plan (it's + guaranteed to be cheaper than non-ROR union), unless ROR-unions are + disabled in @@optimizer_switch + */ + if (all_scans_rors && + optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION)) + { + roru_read_plans= (TABLE_READ_PLAN**)range_scans; + trace_best_disjunct.add("use_roworder_union", true) + .add("cause", + "always cheaper than non roworder retrieval"); + goto skip_to_ror_scan; + } + + if (cpk_scan) + { + /* + Add one ROWID comparison for each row retrieved on non-CPK scan. (it + is done in QUICK_RANGE_SELECT::row_in_ranges) + */ + double rid_comp_cost= (rows2double(non_cpk_scan_records) / + TIME_FOR_COMPARE_ROWID); + imerge_cost+= rid_comp_cost; + trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan", + rid_comp_cost); + } + + /* Calculate cost(rowid_to_row_scan) */ + { + double sweep_cost= get_sweep_read_cost(param, non_cpk_scan_records); + imerge_cost+= sweep_cost; + trace_best_disjunct.add("cost_sort_rowid_and_read_disk", sweep_cost); + } + DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", + imerge_cost)); + if (imerge_cost > read_time || + !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION)) + { + trace_best_disjunct.add("use_roworder_index_merge", true); + trace_best_disjunct.add("cause", "cost"); + goto build_ror_index_merge; + } + + /* Add Unique operations cost */ + unique_calc_buff_size= + Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records, + param->table->file->ref_length, + (size_t)param->thd->variables.sortbuff_size); + if (param->imerge_cost_buff_size < unique_calc_buff_size) + { + if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root, + unique_calc_buff_size))) + DBUG_RETURN(NULL); + param->imerge_cost_buff_size= unique_calc_buff_size; + } + + { + const double dup_removal_cost= Unique::get_use_cost( + param->imerge_cost_buff, (uint)non_cpk_scan_records, + param->table->file->ref_length, + (size_t)param->thd->variables.sortbuff_size, + TIME_FOR_COMPARE_ROWID, + FALSE, NULL); + imerge_cost+= dup_removal_cost; + trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost) + .add("total_cost", imerge_cost); + } + + DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", + imerge_cost, read_time)); + if (imerge_cost < read_time) + { + if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE)) + { + imerge_trp->read_cost= imerge_cost; + imerge_trp->records= non_cpk_scan_records + cpk_scan_records; + imerge_trp->records= MY_MIN(imerge_trp->records, + param->table->stat_records()); + imerge_trp->range_scans= range_scans; + imerge_trp->range_scans_end= range_scans + n_child_scans; + read_time= imerge_cost; + } + if (imerge_trp) + { + TABLE_READ_PLAN *trp= merge_same_index_scans(param, imerge, imerge_trp, + limit_read_time); + if (trp != imerge_trp) + DBUG_RETURN(trp); + } + } + +build_ror_index_merge: + if (!all_scans_ror_able || + param->thd->lex->sql_command == SQLCOM_DELETE || + !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION)) + DBUG_RETURN(imerge_trp); + + /* Ok, it is possible to build a ROR-union, try it. */ + bool dummy; + if (!(roru_read_plans= + (TABLE_READ_PLAN**)alloc_root(param->mem_root, + sizeof(TABLE_READ_PLAN*)* + n_child_scans))) + DBUG_RETURN(imerge_trp); + +skip_to_ror_scan: + roru_index_costs= 0.0; + roru_total_records= 0; + cur_roru_plan= roru_read_plans; + + Json_writer_array trace_analyze_ror(thd, "analyzing_roworder_scans"); + + /* Find 'best' ROR scan for each of trees in disjunction */ + for (ptree= imerge->trees, cur_child= range_scans; + ptree != imerge->trees_next; + ptree++, cur_child++, cur_roru_plan++) + { + Json_writer_object trp_info(thd); + if (unlikely(thd->trace_started())) + (*cur_child)->trace_basic_info(param, &trp_info); + /* + Assume the best ROR scan is the one that has cheapest full-row-retrieval + scan cost. + Also accumulate index_only scan costs as we'll need them to calculate + overall index_intersection cost. + */ + double cost; + if ((*cur_child)->is_ror) + { + /* Ok, we have index_only cost, now get full rows scan cost */ + cost= param->table->file-> + read_time(param->real_keynr[(*cur_child)->key_idx], 1, + (*cur_child)->records) + + rows2double((*cur_child)->records) / TIME_FOR_COMPARE; + } + else + cost= read_time; + + TABLE_READ_PLAN *prev_plan= *cur_child; + if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost, + &dummy))) + { + if (prev_plan->is_ror) + *cur_roru_plan= prev_plan; + else + DBUG_RETURN(imerge_trp); + roru_index_costs += (*cur_roru_plan)->read_cost; + } + else + roru_index_costs += + ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs; + roru_total_records += (*cur_roru_plan)->records; + roru_intersect_part *= (*cur_roru_plan)->records / + param->table->stat_records(); + } + trace_analyze_ror.end(); + /* + rows to retrieve= + SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows). + This is valid because index_merge construction guarantees that conditions + in disjunction do not share key parts. + */ + roru_total_records -= (ha_rows)(roru_intersect_part* + param->table->stat_records()); + /* ok, got a ROR read plan for each of the disjuncts + Calculate cost: + cost(index_union_scan(scan_1, ... scan_n)) = + SUM_i(cost_of_index_only_scan(scan_i)) + + queue_use_cost(rowid_len, n) + + cost_of_row_retrieval + See get_merge_buffers_cost function for queue_use_cost formula derivation. + */ + + double roru_total_cost; + roru_total_cost= roru_index_costs + + rows2double(roru_total_records)*log((double)n_child_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2) + + get_sweep_read_cost(param, roru_total_records); + + DBUG_PRINT("info", ("ROR-union: cost %g, %zu members", + roru_total_cost, n_child_scans)); + trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost) + .add("members", n_child_scans); + TRP_ROR_UNION* roru; + if (roru_total_cost < read_time) + { + if ((roru= new (param->mem_root) TRP_ROR_UNION)) + { + trace_best_disjunct.add("chosen", true); + roru->first_ror= roru_read_plans; + roru->last_ror= roru_read_plans + n_child_scans; + roru->read_cost= roru_total_cost; + roru->records= roru_total_records; + DBUG_RETURN(roru); + } + } + else + trace_best_disjunct.add("chosen", false); + DBUG_RETURN(imerge_trp); +} + + +/* + Merge index scans for the same indexes in an index merge plan + + SYNOPSIS + merge_same_index_scans() + param Context info for the operation + imerge IN/OUT SEL_IMERGE from which imerge_trp has been extracted + imerge_trp The index merge plan where index scans for the same + indexes are to be merges + read_time The upper bound for the cost of the plan to be evaluated + + DESRIPTION + For the given index merge plan imerge_trp extracted from the SEL_MERGE + imerge the function looks for range scans with the same indexes and merges + them into SEL_ARG trees. Then for each such SEL_ARG tree r_i the function + creates a range tree rt_i that contains only r_i. All rt_i are joined + into one index merge that replaces the original index merge imerge. + The function calls get_best_disjunct_quick for the new index merge to + get a new index merge plan that contains index scans only for different + indexes. + If there are no index scans for the same index in the original index + merge plan the function does not change the original imerge and returns + imerge_trp as its result. + + RETURN + The original or or improved index merge plan +*/ + +static +TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge, + TRP_INDEX_MERGE *imerge_trp, + double read_time) +{ + uint16 first_scan_tree_idx[MAX_KEY]; + SEL_TREE **tree; + TRP_RANGE **cur_child; + uint removed_cnt= 0; + + DBUG_ENTER("merge_same_index_scans"); + + bzero(first_scan_tree_idx, sizeof(first_scan_tree_idx[0])*param->keys); + + for (tree= imerge->trees, cur_child= imerge_trp->range_scans; + tree != imerge->trees_next; + tree++, cur_child++) + { + DBUG_ASSERT(tree); + uint key_idx= (*cur_child)->key_idx; + uint16 *tree_idx_ptr= &first_scan_tree_idx[key_idx]; + if (!*tree_idx_ptr) + *tree_idx_ptr= (uint16) (tree-imerge->trees+1); + else + { + SEL_TREE **changed_tree= imerge->trees+(*tree_idx_ptr-1); + SEL_ARG *key= (*changed_tree)->keys[key_idx]; + for (uint i= 0; i < param->keys; i++) + (*changed_tree)->keys[i]= NULL; + (*changed_tree)->keys_map.clear_all(); + if (key) + key->incr_refs(); + if ((*tree)->keys[key_idx]) + (*tree)->keys[key_idx]->incr_refs(); + if (((*changed_tree)->keys[key_idx]= + key_or_with_limit(param, key_idx, key, (*tree)->keys[key_idx]))) + (*changed_tree)->keys_map.set_bit(key_idx); + *tree= NULL; + removed_cnt++; + } + } + if (!removed_cnt) + DBUG_RETURN(imerge_trp); + + TABLE_READ_PLAN *trp= NULL; + SEL_TREE **new_trees_next= imerge->trees; + for (tree= new_trees_next; tree != imerge->trees_next; tree++) + { + if (!*tree) + continue; + if (tree > new_trees_next) + *new_trees_next= *tree; + new_trees_next++; + } + imerge->trees_next= new_trees_next; + + DBUG_ASSERT(imerge->trees_next>imerge->trees); + + if (imerge->trees_next-imerge->trees > 1) + trp= get_best_disjunct_quick(param, imerge, read_time, true); + else + { + /* + This alternative theoretically can be reached when the cost + of the index merge for such a formula as + (key1 BETWEEN c1_1 AND c1_2) AND key2 > c2 OR + (key1 BETWEEN c1_3 AND c1_4) AND key3 > c3 + is estimated as being cheaper than the cost of index scan for + the formula + (key1 BETWEEN c1_1 AND c1_2) OR (key1 BETWEEN c1_3 AND c1_4) + + In the current code this may happen for two reasons: + 1. for a single index range scan data records are accessed in + a random order + 2. the functions that estimate the cost of a range scan and an + index merge retrievals are not well calibrated + + As the best range access has been already chosen it does not + make sense to evaluate the one obtained from a degenerated + index merge. + */ + trp= 0; + } + + DBUG_RETURN(trp); +} + + +/* + This structure contains the info common for all steps of a partial + index intersection plan. Morever it contains also the info common + for index intersect plans. This info is filled in by the function + prepare_search_best just before searching for the best index + intersection plan. +*/ + +typedef struct st_common_index_intersect_info +{ + PARAM *param; /* context info for range optimizations */ + uint key_size; /* size of a ROWID element stored in Unique object */ + double compare_factor; /* 1/compare - cost to compare two ROWIDs */ + size_t max_memory_size; /* maximum space allowed for Unique objects */ + ha_rows table_cardinality; /* estimate of the number of records in table */ + double cutoff_cost; /* discard index intersects with greater costs */ + INDEX_SCAN_INFO *cpk_scan; /* clustered primary key used in intersection */ + + bool in_memory; /* unique object for intersection is completely in memory */ + + INDEX_SCAN_INFO **search_scans; /* scans possibly included in intersect */ + uint n_search_scans; /* number of elements in search_scans */ + + bool best_uses_cpk; /* current best intersect uses clustered primary key */ + double best_cost; /* cost of the current best index intersection */ + /* estimate of the number of records in the current best intersection */ + ha_rows best_records; + uint best_length; /* number of indexes in the current best intersection */ + INDEX_SCAN_INFO **best_intersect; /* the current best index intersection */ + /* scans from the best intersect to be filtrered by cpk conditions */ + key_map filtered_scans; + + uint *buff_elems; /* buffer to calculate cost of index intersection */ + +} COMMON_INDEX_INTERSECT_INFO; + + +/* + This structure contains the info specific for one step of an index + intersection plan. The structure is filled in by the function + check_index_intersect_extension. +*/ + +typedef struct st_partial_index_intersect_info +{ + COMMON_INDEX_INTERSECT_INFO *common_info; /* shared by index intersects */ + uint length; /* number of index scans in the partial intersection */ + ha_rows records; /* estimate of the number of records in intersection */ + double cost; /* cost of the partial index intersection */ + + /* estimate of total number of records of all scans of the partial index + intersect sent to the Unique object used for the intersection */ + ha_rows records_sent_to_unique; + + /* total cost of the scans of indexes from the partial index intersection */ + double index_read_cost; + + bool use_cpk_filter; /* cpk filter is to be used for this scan */ + bool in_memory; /* uses unique object in memory */ + double in_memory_cost; /* cost of using unique object in memory */ + + key_map filtered_scans; /* scans to be filtered by cpk conditions */ + + MY_BITMAP *intersect_fields; /* bitmap of fields used in intersection */ + + void init() + { + common_info= NULL; + intersect_fields= NULL; + records_sent_to_unique= records= length= in_memory= use_cpk_filter= 0; + cost= index_read_cost= in_memory_cost= 0.0; + filtered_scans.clear_all(); + } +} PARTIAL_INDEX_INTERSECT_INFO; + + +/* Check whether two indexes have the same first n components */ + +static +bool same_index_prefix(KEY *key1, KEY *key2, uint used_parts) +{ + KEY_PART_INFO *part1= key1->key_part; + KEY_PART_INFO *part2= key2->key_part; + for(uint i= 0; i < used_parts; i++, part1++, part2++) + { + if (part1->fieldnr != part2->fieldnr) + return FALSE; + } + return TRUE; +} + + +/* Create a bitmap for all fields of a table */ + +static +bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap) +{ + my_bitmap_map *bitmap_buf; + + if (!(bitmap_buf= (my_bitmap_map *) alloc_root(param->mem_root, + param->fields_bitmap_size))) + return TRUE; + if (my_bitmap_init(fields_bitmap, bitmap_buf, param->table->s->fields)) + return TRUE; + + return FALSE; +} + +/* Compare two indexes scans for sort before search for the best intersection */ + +static +int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b) +{ + return (*a)->records < (*b)->records ? + -1 : (*a)->records == (*b)->records ? 0 : 1; +} + + +static inline +void set_field_bitmap_for_index_prefix(MY_BITMAP *field_bitmap, + KEY_PART_INFO *key_part, + uint used_key_parts) +{ + bitmap_clear_all(field_bitmap); + for (KEY_PART_INFO *key_part_end= key_part+used_key_parts; + key_part < key_part_end; key_part++) + { + bitmap_set_bit(field_bitmap, key_part->fieldnr-1); + } +} + + +/* + Round up table cardinality read from statistics provided by engine. + This function should go away when mysql test will allow to handle + more or less easily in the test suites deviations of InnoDB + statistical data. +*/ + +static inline +ha_rows get_table_cardinality_for_index_intersect(TABLE *table) +{ + if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) + return table->stat_records(); + else + { + ha_rows d; + double q; + for (q= (double)table->stat_records(), d= 1 ; q >= 10; q/= 10, d*= 10 ) ; + return (ha_rows) (floor(q+0.5) * d); + } +} + +static +void print_keyparts(THD *thd, KEY *key, uint key_parts) +{ + DBUG_ASSERT(thd->trace_started()); + + KEY_PART_INFO *part= key->key_part; + Json_writer_array keyparts(thd, "keyparts"); + for(uint i= 0; i < key_parts; i++, part++) + keyparts.add(part->field->field_name); +} + + +static +ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, + INDEX_SCAN_INFO *ext_index_scan); + +/* + Prepare to search for the best index intersection + + SYNOPSIS + prepare_search_best_index_intersect() + param common info about index ranges + tree tree of ranges for indexes than can be intersected + common OUT info needed for search to be filled by the function + init OUT info for an initial pseudo step of the intersection plans + cutoff_cost cut off cost of the interesting index intersection + + DESCRIPTION + The function initializes all fields of the structure 'common' to be used + when searching for the best intersection plan. It also allocates + memory to store the most cheap index intersection. + + NOTES + When selecting candidates for index intersection we always take only + one representative out of any set of indexes that share the same range + conditions. These indexes always have the same prefixes and the + components of this prefixes are exactly those used in these range + conditions. + Range conditions over clustered primary key (cpk) is always used only + as the condition that filters out some rowids retrieved by the scans + for secondary indexes. The cpk index will be handled in special way by + the function that search for the best index intersection. + + RETURN + FALSE in the case of success + TRUE otherwise +*/ + +static +bool prepare_search_best_index_intersect(PARAM *param, + SEL_TREE *tree, + COMMON_INDEX_INTERSECT_INFO *common, + PARTIAL_INDEX_INTERSECT_INFO *init, + double cutoff_cost) +{ + uint i; + uint n_search_scans; + double cost; + INDEX_SCAN_INFO **index_scan; + INDEX_SCAN_INFO **scan_ptr; + INDEX_SCAN_INFO *cpk_scan= NULL; + TABLE *table= param->table; + uint n_index_scans= (uint)(tree->index_scans_end - tree->index_scans); + THD *thd= param->thd; + + if (n_index_scans <= 1) + return 1; + + init->init(); + init->common_info= common; + init->cost= cutoff_cost; + + common->param= param; + common->key_size= table->file->ref_length; + common->compare_factor= TIME_FOR_COMPARE_ROWID; + common->max_memory_size= (size_t)param->thd->variables.sortbuff_size; + common->cutoff_cost= cutoff_cost; + common->cpk_scan= NULL; + common->table_cardinality= + get_table_cardinality_for_index_intersect(table); + + if (table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) + { + INDEX_SCAN_INFO **index_scan_end; + index_scan= tree->index_scans; + index_scan_end= index_scan+n_index_scans; + for ( ; index_scan < index_scan_end; index_scan++) + { + if (table->file->is_clustering_key((*index_scan)->keynr)) + { + common->cpk_scan= cpk_scan= *index_scan; + break; + } + } + } + + i= n_index_scans - MY_TEST(cpk_scan != NULL) + 1; + + if (!(common->search_scans = + (INDEX_SCAN_INFO **) alloc_root (param->mem_root, + sizeof(INDEX_SCAN_INFO *) * i))) + return TRUE; + bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i); + + INDEX_SCAN_INFO **selected_index_scans= common->search_scans; + Json_writer_array potential_idx_scans(thd, "potential_index_scans"); + for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++) + { + Json_writer_object idx_scan(thd); + uint used_key_parts= (*index_scan)->used_key_parts; + KEY *key_info= (*index_scan)->key_info; + idx_scan.add("index", key_info->name); + + if (*index_scan == cpk_scan) + { + idx_scan.add("chosen", "false") + .add("cause", "clustered index used for filtering"); + continue; + } + if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts && + same_index_prefix(cpk_scan->key_info, key_info, used_key_parts)) + { + idx_scan.add("chosen", "false") + .add("cause", "clustered index used for filtering"); + continue; + } + + cost= table->opt_range[(*index_scan)->keynr].index_only_cost; + + idx_scan.add("cost", cost); + + if (cost >= cutoff_cost) + { + idx_scan.add("chosen", false); + idx_scan.add("cause", "cost"); + continue; + } + + for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++) + { + /* + When we have range conditions for two different indexes with the same + beginning it does not make sense to consider both of them for index + intersection if the range conditions are covered by common initial + components of the indexes. Actually in this case the indexes are + guaranteed to have the same range conditions. + */ + if ((*scan_ptr)->used_key_parts == used_key_parts && + same_index_prefix((*scan_ptr)->key_info, key_info, used_key_parts)) + break; + } + if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost) + { + idx_scan.add("chosen", true); + if (!*scan_ptr) + idx_scan.add("cause", "first occurrence of index prefix"); + else + idx_scan.add("cause", "better cost for same idx prefix"); + *scan_ptr= *index_scan; + (*scan_ptr)->index_read_cost= cost; + } + else + { + idx_scan.add("chosen", false).add("cause", "cost"); + } + } + potential_idx_scans.end(); + + ha_rows records_in_scans= 0; + + for (scan_ptr=selected_index_scans, i= 0; *scan_ptr; scan_ptr++, i++) + { + if (create_fields_bitmap(param, &(*scan_ptr)->used_fields)) + return TRUE; + records_in_scans+= (*scan_ptr)->records; + } + + n_search_scans= i; + + if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields)) + return TRUE; + + if (!(common->n_search_scans= n_search_scans)) + return TRUE; + + common->best_uses_cpk= FALSE; + common->best_cost= cutoff_cost + COST_EPS; + common->best_length= 0; + + if (!(common->best_intersect= + (INDEX_SCAN_INFO **) alloc_root (param->mem_root, + sizeof(INDEX_SCAN_INFO *) * + (i + MY_TEST(cpk_scan != NULL))))) + return TRUE; + + size_t calc_cost_buff_size= + Unique::get_cost_calc_buff_size((size_t)records_in_scans, + common->key_size, + common->max_memory_size); + if (!(common->buff_elems= (uint *) alloc_root(param->mem_root, + calc_cost_buff_size))) + return TRUE; + + my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *), + (qsort_cmp) cmp_intersect_index_scan); + + Json_writer_array selected_idx_scans(thd, "selected_index_scans"); + if (cpk_scan) + { + PARTIAL_INDEX_INTERSECT_INFO curr; + set_field_bitmap_for_index_prefix(&cpk_scan->used_fields, + cpk_scan->key_info->key_part, + cpk_scan->used_key_parts); + curr.common_info= common; + curr.intersect_fields= &cpk_scan->used_fields; + curr.records= cpk_scan->records; + curr.length= 1; + for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++) + { + KEY *key_info= (*scan_ptr)->key_info; + ha_rows scan_records= (*scan_ptr)->records; + ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr); + (*scan_ptr)->filtered_out= records >= scan_records ? + 0 : scan_records-records; + if (thd->trace_started()) + { + Json_writer_object selected_idx(thd); + selected_idx.add("index", key_info->name); + print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts); + selected_idx.add("records", (*scan_ptr)->records) + .add("filtered_records", (*scan_ptr)->filtered_out); + } + } + } + else + { + for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++) + { + KEY *key_info= (*scan_ptr)->key_info; + (*scan_ptr)->filtered_out= 0; + if (thd->trace_started()) + { + Json_writer_object selected_idx(thd); + selected_idx.add("index", key_info->name); + print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts); + selected_idx.add("records", (*scan_ptr)->records) + .add("filtered_records", (*scan_ptr)->filtered_out); + } + } + } + + return FALSE; +} + + +/* + On Estimation of the Number of Records in an Index Intersection + =============================================================== + + Consider query Q over table t. Let C be the WHERE condition of this query, + and, idx1(a1_1,...,a1_k1) and idx2(a2_1,...,a2_k2) be some indexes defined + on table t. + Let rt1 and rt2 be the range trees extracted by the range optimizer from C + for idx1 and idx2 respectively. + Let #t be the estimate of the number of records in table t provided for the + optimizer. + Let #r1 and #r2 be the estimates of the number of records in the range trees + rt1 and rt2, respectively, obtained by the range optimizer. + + We need to get an estimate for the number of records in the index + intersection of rt1 and rt2. In other words, we need to estimate the + cardinality of the set of records that are in both trees. Let's designate + this number by #r. + + If we do not make any assumptions then we can only state that + #r<=MY_MIN(#r1,#r2). + With this estimate we can't say that the index intersection scan will be + cheaper than the cheapest index scan. + + Let Rt1 and Rt2 be AND/OR conditions representing rt and rt2 respectively. + The probability that a record belongs to rt1 is sel(Rt1)=#r1/#t. + The probability that a record belongs to rt2 is sel(Rt2)=#r2/#t. + + If we assume that the values in columns of idx1 and idx2 are independent + then #r/#t=sel(Rt1&Rt2)=sel(Rt1)*sel(Rt2)=(#r1/#t)*(#r2/#t). + So in this case we have: #r=#r1*#r2/#t. + + The above assumption of independence of the columns in idx1 and idx2 means + that: + - all columns are different + - values from one column do not correlate with values from any other column. + + We can't help with the case when column correlate with each other. + Yet, if they are assumed to be uncorrelated the value of #r theoretically can + be evaluated . Unfortunately this evaluation, in general, is rather complex. + + Let's consider two indexes idx1:(dept, manager), idx2:(dept, building) + over table 'employee' and two range conditions over these indexes: + Rt1: dept=10 AND manager LIKE 'S%' + Rt2: dept=10 AND building LIKE 'L%'. + We can state that: + sel(Rt1&Rt2)=sel(dept=10)*sel(manager LIKE 'S%')*sel(building LIKE 'L%') + =sel(Rt1)*sel(Rt2)/sel(dept=10). + sel(Rt1/2_0:dept=10) can be estimated if we know the cardinality #r1_0 of + the range for sub-index idx1_0 (dept) of the index idx1 or the cardinality + #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2. + The current code does not make an estimate either for #rt1_0, or for #rt2_0, + but it can be adjusted to provide those numbers. + Alternatively, MY_MIN(rec_per_key) for (dept) could be used to get an upper + bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided + now. + + Let's consider two other indexes idx1:(dept, last_name), + idx2:(first_name, last_name) and two range conditions over these indexes: + Rt1: dept=5 AND last_name='Sm%' + Rt2: first_name='Robert' AND last_name='Sm%'. + + sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert') + =sel(Rt2)*sel(dept=5) + Here MY_MAX(rec_per_key) for (dept) could be used to get an upper bound for + the value of sel(Rt1&Rt2). + + When the intersected indexes have different major columns, but some + minor column are common the picture may be more complicated. + + Let's consider the following range conditions for the same indexes as in + the previous example: + Rt1: (Rt11: dept=5 AND last_name='So%') + OR + (Rt12: dept=7 AND last_name='Saw%') + Rt2: (Rt21: first_name='Robert' AND last_name='Saw%') + OR + (Rt22: first_name='Bob' AND last_name='So%') + Here we have: + sel(Rt1&Rt2)= sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5) + + sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) + Now consider the range condition: + Rt1_0: (dept=5 OR dept=7) + For this condition we can state that: + sel(Rt1_0&Rt2)=(sel(dept=5)+sel(dept=7))*(sel(Rt21)+sel(Rt22))= + sel(dept=5)*sel(Rt21)+sel(dept=7)*sel(Rt21)+ + sel(dept=5)*sel(Rt22)+sel(dept=7)*sel(Rt22)= + sel(dept=5)*sel(Rt21)+sel(Rt21)*sel(dept=7)+ + sel(Rt22)*sel(dept=5)+sel(dept=7)*sel(Rt22) > + sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5)+ + sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) > + sel(Rt1 & Rt2) + + We've just demonstrated for an example what is intuitively almost obvious + in general. We can remove the ending parts fromrange trees getting less + selective range conditions for sub-indexes. + So if not a most major component with the number k of an index idx is + encountered in the index with which we intersect we can use the sub-index + idx_k-1 that includes the components of idx up to the i-th component and + the range tree for idx_k-1 to make an upper bound estimate for the number + of records in the index intersection. + The range tree for idx_k-1 we use here is the subtree of the original range + tree for idx that contains only parts from the first k-1 components. + + As it was mentioned above the range optimizer currently does not provide + an estimate for the number of records in the ranges for sub-indexes. + However, some reasonable upper bound estimate can be obtained. + + Let's consider the following range tree: + Rt: (first_name='Robert' AND last_name='Saw%') + OR + (first_name='Bob' AND last_name='So%') + Let #r be the number of records in Rt. Let f_1 be the fan-out of column + last_name: + f_1 = rec_per_key[first_name]/rec_per_key[last_name]. + The the number of records in the range tree: + Rt_0: (first_name='Robert' OR first_name='Bob') + for the sub-index (first_name) is not greater than MY_MAX(#r*f_1, #t). + Strictly speaking, we can state only that it's not greater than + MY_MAX(#r*max_f_1, #t), where + max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name]. + Yet, if #r/#t is big enough (and this is the case of an index intersection, + because using this index range with a single index scan is cheaper than + the cost of the intersection when #r/#t is small) then almost safely we + can use here f_1 instead of max_f_1. + + The above considerations can be used in future development. Now, they are + used partly in the function that provides a rough upper bound estimate for + the number of records in an index intersection that follow below. +*/ + +/* + Estimate the number of records selected by an extension a partial intersection + + SYNOPSIS + records_in_index_intersect_extension() + curr partial intersection plan to be extended + ext_index_scan the evaluated extension of this partial plan + + DESCRIPTION + The function provides an estimate for the number of records in the + intersection of the partial index intersection curr with the index + ext_index_scan. If all intersected indexes does not have common columns + then the function returns an exact estimate (assuming there are no + correlations between values in the columns). If the intersected indexes + have common columns the function returns an upper bound for the number + of records in the intersection provided that the intersection of curr + with ext_index_scan can is expected to have less records than the expected + number of records in the partial intersection curr. In this case the + function also assigns the bitmap of the columns in the extended + intersection to ext_index_scan->used_fields. + If the function cannot expect that the number of records in the extended + intersection is less that the expected number of records #r in curr then + the function returns a number bigger than #r. + + NOTES + See the comment before the desription of the function that explains the + reasoning used by this function. + + RETURN + The expected number of rows in the extended index intersection +*/ + +static +ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, + INDEX_SCAN_INFO *ext_index_scan) +{ + KEY *key_info= ext_index_scan->key_info; + KEY_PART_INFO* key_part= key_info->key_part; + uint used_key_parts= ext_index_scan->used_key_parts; + MY_BITMAP *used_fields= &ext_index_scan->used_fields; + + if (!curr->length) + { + /* + If this the first index in the intersection just mark the + fields in the used_fields bitmap and return the expected + number of records in the range scan for the index provided + by the range optimizer. + */ + set_field_bitmap_for_index_prefix(used_fields, key_part, used_key_parts); + return ext_index_scan->records; + } + + uint i; + bool better_selectivity= FALSE; + ha_rows records= curr->records; + MY_BITMAP *curr_intersect_fields= curr->intersect_fields; + for (i= 0; i < used_key_parts; i++, key_part++) + { + if (bitmap_is_set(curr_intersect_fields, key_part->fieldnr-1)) + break; + } + if (i) + { + ha_rows table_cardinality= curr->common_info->table_cardinality; + ha_rows ext_records= ext_index_scan->records; + if (i < used_key_parts) + { + double f1= key_info->actual_rec_per_key(i-1); + double f2= key_info->actual_rec_per_key(i); + ext_records= (ha_rows) ((double) ext_records / f2 * f1); + } + if (ext_records < table_cardinality) + { + better_selectivity= TRUE; + records= (ha_rows) ((double) records / table_cardinality * + ext_records); + bitmap_copy(used_fields, curr_intersect_fields); + key_part= key_info->key_part; + for (uint j= 0; j < used_key_parts; j++, key_part++) + bitmap_set_bit(used_fields, key_part->fieldnr-1); + } + } + return !better_selectivity ? records+1 : + !records ? 1 : records; +} + + +/* + Estimate the cost a binary search within disjoint cpk range intervals + + Number of comparisons to check whether a cpk value satisfies + the cpk range condition = log2(cpk_scan->range_count). +*/ + +static inline +double get_cpk_filter_cost(ha_rows filtered_records, + INDEX_SCAN_INFO *cpk_scan, + double compare_factor) +{ + return log((double) (cpk_scan->range_count+1)) / (compare_factor * M_LN2) * + filtered_records; +} + + +/* + Check whether a patial index intersection plan can be extended + + SYNOPSIS + check_index_intersect_extension() + curr partial intersection plan to be extended + ext_index_scan a possible extension of this plan to be checked + next OUT the structure to be filled for the extended plan + + DESCRIPTION + The function checks whether it makes sense to extend the index + intersection plan adding the index ext_index_scan, and, if this + the case, the function fills in the structure for the extended plan. + + RETURN + TRUE if it makes sense to extend the given plan + FALSE otherwise +*/ + +static +bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, + INDEX_SCAN_INFO *ext_index_scan, + PARTIAL_INDEX_INTERSECT_INFO *next) +{ + ha_rows records; + ha_rows records_sent_to_unique; + double cost; + ha_rows ext_index_scan_records= ext_index_scan->records; + ha_rows records_filtered_out_by_cpk= ext_index_scan->filtered_out; + COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info; + double cutoff_cost= common_info->cutoff_cost; + uint idx= curr->length; + next->index_read_cost= curr->index_read_cost+ext_index_scan->index_read_cost; + if (next->index_read_cost > cutoff_cost) + return FALSE; + + if ((next->in_memory= curr->in_memory)) + next->in_memory_cost= curr->in_memory_cost; + + next->intersect_fields= &ext_index_scan->used_fields; + next->filtered_scans= curr->filtered_scans; + + records_sent_to_unique= curr->records_sent_to_unique; + + next->use_cpk_filter= FALSE; + + /* Calculate the cost of using a Unique object for index intersection */ + if (idx && next->in_memory) + { + /* + All rowids received from the first scan are expected in one unique tree + */ + ha_rows elems_in_tree= common_info->search_scans[0]->records- + common_info->search_scans[0]->filtered_out ; + next->in_memory_cost+= Unique::get_search_cost(elems_in_tree, + common_info->compare_factor)* + ext_index_scan_records; + cost= next->in_memory_cost; + } + else + { + uint *buff_elems= common_info->buff_elems; + uint key_size= common_info->key_size; + double compare_factor= common_info->compare_factor; + size_t max_memory_size= common_info->max_memory_size; + + records_sent_to_unique+= ext_index_scan_records; + cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size, + max_memory_size, compare_factor, TRUE, + &next->in_memory); + if (records_filtered_out_by_cpk) + { + /* Check whether using cpk filter for this scan is beneficial */ + + double cost2; + bool in_memory2; + ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk; + cost2= Unique::get_use_cost(buff_elems, (size_t) records2, key_size, + max_memory_size, compare_factor, TRUE, + &in_memory2); + cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan, + compare_factor); + if (cost > cost2 + COST_EPS) + { + cost= cost2; + next->in_memory= in_memory2; + next->use_cpk_filter= TRUE; + records_sent_to_unique= records2; + } + + } + if (next->in_memory) + next->in_memory_cost= cost; + } + + if (next->use_cpk_filter) + { + next->filtered_scans.set_bit(ext_index_scan->keynr); + bitmap_union(&ext_index_scan->used_fields, + &common_info->cpk_scan->used_fields); + } + next->records_sent_to_unique= records_sent_to_unique; + + records= records_in_index_intersect_extension(curr, ext_index_scan); + if (idx && records > curr->records) + return FALSE; + if (next->use_cpk_filter && curr->filtered_scans.is_clear_all()) + records-= records_filtered_out_by_cpk; + next->records= records; + + cost+= next->index_read_cost; + if (cost >= cutoff_cost) + return FALSE; + + cost+= get_sweep_read_cost(common_info->param, records); + + next->cost= cost; + next->length= curr->length+1; + + return TRUE; +} + + +/* + Search for the cheapest extensions of range scans used to access a table + + SYNOPSIS + find_index_intersect_best_extension() + curr partial intersection to evaluate all possible extension for + + DESCRIPTION + The function tries to extend the partial plan curr in all possible ways + to look for a cheapest index intersection whose cost less than the + cut off value set in curr->common_info.cutoff_cost. +*/ + +static +void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr) +{ + PARTIAL_INDEX_INTERSECT_INFO next; + COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info; + INDEX_SCAN_INFO **index_scans= common_info->search_scans; + uint idx= curr->length; + INDEX_SCAN_INFO **rem_first_index_scan_ptr= &index_scans[idx]; + double cost= curr->cost; + + if (cost + COST_EPS < common_info->best_cost) + { + common_info->best_cost= cost; + common_info->best_length= curr->length; + common_info->best_records= curr->records; + common_info->filtered_scans= curr->filtered_scans; + /* common_info->best_uses_cpk <=> at least one scan uses a cpk filter */ + common_info->best_uses_cpk= !curr->filtered_scans.is_clear_all(); + uint sz= sizeof(INDEX_SCAN_INFO *) * curr->length; + memcpy(common_info->best_intersect, common_info->search_scans, sz); + common_info->cutoff_cost= cost; + } + + if (!(*rem_first_index_scan_ptr)) + return; + + next.common_info= common_info; + + INDEX_SCAN_INFO *rem_first_index_scan= *rem_first_index_scan_ptr; + for (INDEX_SCAN_INFO **index_scan_ptr= rem_first_index_scan_ptr; + *index_scan_ptr; index_scan_ptr++) + { + *rem_first_index_scan_ptr= *index_scan_ptr; + *index_scan_ptr= rem_first_index_scan; + if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next)) + find_index_intersect_best_extension(&next); + *index_scan_ptr= *rem_first_index_scan_ptr; + *rem_first_index_scan_ptr= rem_first_index_scan; + } +} + + +/* + Get the plan of the best intersection of range scans used to access a table + + SYNOPSIS + get_best_index_intersect() + param common info about index ranges + tree tree of ranges for indexes than can be intersected + read_time cut off value for the evaluated plans + + DESCRIPTION + The function looks for the cheapest index intersection of the range + scans to access a table. The info about the ranges for all indexes + is provided by the range optimizer and is passed through the + parameters param and tree. Any plan whose cost is greater than read_time + is rejected. + After the best index intersection is found the function constructs + the structure that manages the execution by the chosen plan. + + RETURN + Pointer to the generated execution structure if a success, + 0 - otherwise. +*/ + +static +TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, + double read_time) +{ + uint i; + uint count; + TRP_RANGE **cur_range; + TRP_RANGE **range_scans; + INDEX_SCAN_INFO *index_scan; + COMMON_INDEX_INTERSECT_INFO common; + PARTIAL_INDEX_INTERSECT_INFO init; + TRP_INDEX_INTERSECT *intersect_trp= NULL; + TABLE *table= param->table; + THD *thd= param->thd; + + DBUG_ENTER("get_best_index_intersect"); + + Json_writer_object trace_idx_interect(thd, "analyzing_sort_intersect"); + + if (prepare_search_best_index_intersect(param, tree, &common, &init, + read_time)) + DBUG_RETURN(NULL); + + find_index_intersect_best_extension(&init); + + if (common.best_length <= 1 && !common.best_uses_cpk) + DBUG_RETURN(NULL); + + if (common.best_uses_cpk) + { + memmove((char *) (common.best_intersect+1), (char *) common.best_intersect, + sizeof(INDEX_SCAN_INFO *) * common.best_length); + common.best_intersect[0]= common.cpk_scan; + common.best_length++; + } + + count= common.best_length; + + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, + sizeof(TRP_RANGE *)* + count))) + DBUG_RETURN(NULL); + + for (i= 0, cur_range= range_scans; i < count; i++) + { + index_scan= common.best_intersect[i]; + if ((*cur_range= new (param->mem_root) TRP_RANGE(index_scan->sel_arg, + index_scan->idx, 0))) + { + TRP_RANGE *trp= *cur_range; + trp->read_cost= index_scan->index_read_cost; + trp->records= index_scan->records; + trp->is_ror= FALSE; + trp->mrr_buf_size= 0; + table->intersect_keys.set_bit(index_scan->keynr); + cur_range++; + } + } + + count= (uint)(tree->index_scans_end - tree->index_scans); + for (i= 0; i < count; i++) + { + index_scan= tree->index_scans[i]; + if (!table->intersect_keys.is_set(index_scan->keynr)) + { + for (uint j= 0; j < common.best_length; j++) + { + INDEX_SCAN_INFO *scan= common.best_intersect[j]; + if (same_index_prefix(index_scan->key_info, scan->key_info, + scan->used_key_parts)) + { + table->intersect_keys.set_bit(index_scan->keynr); + break; + } + } + } + } + + if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT)) + { + + intersect_trp->read_cost= common.best_cost; + intersect_trp->records= common.best_records; + intersect_trp->range_scans= range_scans; + intersect_trp->range_scans_end= cur_range; + intersect_trp->filtered_scans= common.filtered_scans; + trace_idx_interect.add("rows", intersect_trp->records) + .add("cost", intersect_trp->read_cost) + .add("chosen",true); + } + DBUG_RETURN(intersect_trp); +} + + +typedef struct st_ror_scan_info : INDEX_SCAN_INFO +{ +} ROR_SCAN_INFO; + +void TRP_ROR_INTERSECT::trace_basic_info(PARAM *param, + Json_writer_object *trace_object) const +{ + THD *thd= param->thd; + DBUG_ASSERT(trace_object->trace_started()); + + trace_object->add("type", "index_roworder_intersect"); + trace_object->add("rows", records); + trace_object->add("cost", read_cost); + trace_object->add("covering", is_covering); + trace_object->add("clustered_pk_scan", cpk_scan != NULL); + + Json_writer_array smth_trace(thd, "intersect_of"); + for (ROR_SCAN_INFO **cur_scan= first_scan; cur_scan != last_scan; + cur_scan++) + { + const KEY &cur_key= param->table->key_info[(*cur_scan)->keynr]; + const KEY_PART_INFO *key_part= cur_key.key_part; + + Json_writer_object trace_isect_idx(thd); + trace_isect_idx.add("type", "range_scan"); + trace_isect_idx.add("index", cur_key.name); + trace_isect_idx.add("rows", (*cur_scan)->records); + + Json_writer_array trace_range(thd, "ranges"); + + trace_ranges(&trace_range, param, (*cur_scan)->idx, + (*cur_scan)->sel_arg, key_part); + } +} + + +/* + Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using + sel_arg set of intervals. + + SYNOPSIS + make_ror_scan() + param Parameter from test_quick_select function + idx Index of key in param->keys + sel_arg Set of intervals for a given key + + RETURN + NULL - out of memory + ROR scan structure containing a scan for {idx, sel_arg} +*/ + +static +ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) +{ + ROR_SCAN_INFO *ror_scan; + my_bitmap_map *bitmap_buf; + uint keynr; + DBUG_ENTER("make_ror_scan"); + + if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO)))) + DBUG_RETURN(NULL); + + ror_scan->idx= idx; + ror_scan->keynr= keynr= param->real_keynr[idx]; + ror_scan->key_rec_length= (param->table->key_info[keynr].key_length + + param->table->file->ref_length); + ror_scan->sel_arg= sel_arg; + ror_scan->records= param->quick_rows[keynr]; + + if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root, + param->fields_bitmap_size))) + DBUG_RETURN(NULL); + + if (my_bitmap_init(&ror_scan->covered_fields, bitmap_buf, + param->table->s->fields)) + DBUG_RETURN(NULL); + bitmap_clear_all(&ror_scan->covered_fields); + + KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[keynr].user_defined_key_parts; + for (;key_part != key_part_end; ++key_part) + { + if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr-1)) + bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1); + } + ror_scan->index_read_cost= + param->table->file->keyread_time(ror_scan->keynr, 1, ror_scan->records); + DBUG_RETURN(ror_scan); +} + + +/* + Compare two ROR_SCAN_INFO** by E(#records_matched) * key_record_length. + SYNOPSIS + cmp_ror_scan_info() + a ptr to first compared value + b ptr to second compared value + + RETURN + -1 a < b + 0 a = b + 1 a > b +*/ + +static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + double val1= rows2double((*a)->records) * (*a)->key_rec_length; + double val2= rows2double((*b)->records) * (*b)->key_rec_length; + return (val1 < val2)? -1: (val1 == val2)? 0 : 1; +} + +/* + Compare two ROR_SCAN_INFO** by + (#covered fields in F desc, + #components asc, + number of first not covered component asc) + + SYNOPSIS + cmp_ror_scan_info_covering() + a ptr to first compared value + b ptr to second compared value + + RETURN + -1 a < b + 0 a = b + 1 a > b +*/ + +static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + if ((*a)->used_fields_covered > (*b)->used_fields_covered) + return -1; + if ((*a)->used_fields_covered < (*b)->used_fields_covered) + return 1; + if ((*a)->key_components < (*b)->key_components) + return -1; + if ((*a)->key_components > (*b)->key_components) + return 1; + if ((*a)->first_uncovered_field < (*b)->first_uncovered_field) + return -1; + if ((*a)->first_uncovered_field > (*b)->first_uncovered_field) + return 1; + return 0; +} + + +/* Auxiliary structure for incremental ROR-intersection creation */ +typedef struct +{ + const PARAM *param; + MY_BITMAP covered_fields; /* union of fields covered by all scans */ + /* + Fraction of table records that satisfies conditions of all scans. + This is the number of full records that will be retrieved if a + non-index_only index intersection will be employed. + */ + double out_rows; + /* TRUE if covered_fields is a superset of needed_fields */ + bool is_covering; + + ha_rows index_records; /* sum(#records to look in indexes) */ + double index_scan_costs; /* SUM(cost of 'index-only' scans) */ + double total_cost; +} ROR_INTERSECT_INFO; + + +/* + Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans. + + SYNOPSIS + ror_intersect_init() + param Parameter from test_quick_select + + RETURN + allocated structure + NULL on error +*/ + +static +ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param) +{ + ROR_INTERSECT_INFO *info; + my_bitmap_map* buf; + if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, + sizeof(ROR_INTERSECT_INFO)))) + return NULL; + info->param= param; + if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root, + param->fields_bitmap_size))) + return NULL; + if (my_bitmap_init(&info->covered_fields, buf, param->table->s->fields)) + return NULL; + info->is_covering= FALSE; + info->index_scan_costs= 0.0; + info->index_records= 0; + info->out_rows= (double) param->table->stat_records(); + bitmap_clear_all(&info->covered_fields); + return info; +} + +void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src) +{ + dst->param= src->param; + memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, + no_bytes_in_map(&src->covered_fields)); + dst->out_rows= src->out_rows; + dst->is_covering= src->is_covering; + dst->index_records= src->index_records; + dst->index_scan_costs= src->index_scan_costs; + dst->total_cost= src->total_cost; +} + + +/* + Get selectivity of a ROR scan wrt ROR-intersection. + + SYNOPSIS + ror_scan_selectivity() + info ROR-interection + scan ROR scan + + NOTES + Suppose we have a condition on several keys + cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key + k_21=c_21 AND k_22=c_22 AND ... // parts of second key + ... + k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //parts of the key used by *scan + + where k_ij may be the same as any k_pq (i.e. keys may have common parts). + + A full row is retrieved if entire condition holds. + + The recursive procedure for finding P(cond) is as follows: + + First step: + Pick 1st part of 1st key and break conjunction (1) into two parts: + cond= (k_11=c_11 AND R) + + Here R may still contain condition(s) equivalent to k_11=c_11. + Nevertheless, the following holds: + + P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11). + + Mark k_11 as fixed field (and satisfied condition) F, save P(F), + save R to be cond and proceed to recursion step. + + Recursion step: + We have a set of fixed fields/satisfied conditions) F, probability P(F), + and remaining conjunction R + Pick next key part on current key and its condition "k_ij=c_ij". + We will add "k_ij=c_ij" into F and update P(F). + Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then + + P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2) + + (where '|' mean conditional probability, not "or") + + Consider the first multiplier in (2). One of the following holds: + a) F contains condition on field used in t (i.e. t AND F = F). + Then P(t|F) = 1 + + b) F doesn't contain condition on field used in t. Then F and t are + considered independent. + + P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = + = P(t|fields_before_t_in_key). + + P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) / + #records(fields_before_t_in_key, t) + + The second multiplier is calculated by applying this step recursively. + + IMPLEMENTATION + This function calculates the result of application of the "recursion step" + described above for all fixed key members of a single key, accumulating set + of covered fields, selectivity, etc. + + The calculation is conducted as follows: + Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate + + n_{k1} n_{k2} + --------- * --------- * .... (3) + n_{k1-1} n_{k2-1} + + where k1,k2,... are key parts which fields were not yet marked as fixed + ( this is result of application of option b) of the recursion step for + parts of a single key). + Since it is reasonable to expect that most of the fields are not marked + as fixed, we calculate (3) as + + n_{i1} n_{i2} + (3) = n_{max_key_part} / ( --------- * --------- * .... ) + n_{i1-1} n_{i2-1} + + where i1,i2, .. are key parts that were already marked as fixed. + + In order to minimize number of expensive records_in_range calls we group + and reduce adjacent fractions. + + RETURN + Selectivity of given ROR scan. +*/ + +static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, + const ROR_SCAN_INFO *scan) +{ + double selectivity_mult= 1.0; + KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part; + uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */ + uchar *key_ptr= key_val; + SEL_ARG *sel_arg, *tuple_arg= NULL; + key_part_map keypart_map= 0; + bool cur_covered; + bool prev_covered= MY_TEST(bitmap_is_set(&info->covered_fields, + key_part->fieldnr - 1)); + key_range min_range; + key_range max_range; + min_range.key= key_val; + min_range.flag= HA_READ_KEY_EXACT; + max_range.key= key_val; + max_range.flag= HA_READ_AFTER_KEY; + ha_rows prev_records= info->param->table->stat_records(); + DBUG_ENTER("ror_scan_selectivity"); + + for (sel_arg= scan->sel_arg; sel_arg; + sel_arg= sel_arg->next_key_part) + { + DBUG_PRINT("info",("sel_arg step")); + cur_covered= MY_TEST(bitmap_is_set(&info->covered_fields, + key_part[sel_arg->part].fieldnr - 1)); + if (cur_covered != prev_covered) + { + /* create (part1val, ..., part{n-1}val) tuple. */ + ha_rows records; + page_range pages; + if (!tuple_arg) + { + tuple_arg= scan->sel_arg; + /* Here we use the length of the first key part */ + tuple_arg->store_min(key_part->store_length, &key_ptr, 0); + keypart_map= 1; + } + while (tuple_arg->next_key_part != sel_arg) + { + tuple_arg= tuple_arg->next_key_part; + tuple_arg->store_min(key_part[tuple_arg->part].store_length, + &key_ptr, 0); + keypart_map= (keypart_map << 1) | 1; + } + min_range.length= max_range.length= (uint) (key_ptr - key_val); + min_range.keypart_map= max_range.keypart_map= keypart_map; + records= (info->param->table->file-> + records_in_range(scan->keynr, &min_range, &max_range, &pages)); + if (cur_covered) + { + /* uncovered -> covered */ + double tmp= rows2double(records)/rows2double(prev_records); + DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); + selectivity_mult *= tmp; + prev_records= HA_POS_ERROR; + } + else + { + /* covered -> uncovered */ + prev_records= records; + } + } + prev_covered= cur_covered; + } + if (!prev_covered) + { + double tmp= rows2double(info->param->quick_rows[scan->keynr]) / + rows2double(prev_records); + DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); + selectivity_mult *= tmp; + } + DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult)); + DBUG_RETURN(selectivity_mult); +} + + +/* + Check if adding a ROR scan to a ROR-intersection reduces its cost of + ROR-intersection and if yes, update parameters of ROR-intersection, + including its cost. + + SYNOPSIS + ror_intersect_add() + param Parameter from test_quick_select + info ROR-intersection structure to add the scan to. + ror_scan ROR scan info to add. + is_cpk_scan If TRUE, add the scan as CPK scan (this can be inferred + from other parameters and is passed separately only to + avoid duplicating the inference code) + + NOTES + Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR- + intersection decreases. The cost of ROR-intersection is calculated as + follows: + + cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval + + When we add a scan the first increases and the second decreases. + + cost_of_full_rows_retrieval= + (union of indexes used covers all needed fields) ? + cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) : + 0 + + E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) * + ror_scan_selectivity({scan1}, scan2) * ... * + ror_scan_selectivity({scan1,...}, scanN). + RETURN + TRUE ROR scan added to ROR-intersection, cost updated. + FALSE It doesn't make sense to add this ROR scan to this ROR-intersection. +*/ + +static bool ror_intersect_add(ROR_INTERSECT_INFO *info, + ROR_SCAN_INFO* ror_scan, + Json_writer_object *trace_costs, + bool is_cpk_scan) +{ + double selectivity_mult= 1.0; + + DBUG_ENTER("ror_intersect_add"); + DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows)); + DBUG_PRINT("info", ("Adding scan on %s", + info->param->table->key_info[ror_scan->keynr].name.str)); + DBUG_PRINT("info", ("is_cpk_scan: %d",is_cpk_scan)); + + selectivity_mult = ror_scan_selectivity(info, ror_scan); + if (selectivity_mult == 1.0) + { + /* Don't add this scan if it doesn't improve selectivity. */ + DBUG_PRINT("info", ("The scan doesn't improve selectivity.")); + DBUG_RETURN(FALSE); + } + + info->out_rows *= selectivity_mult; + + if (is_cpk_scan) + { + /* + CPK scan is used to filter out rows. We apply filtering for + each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID + per check this gives us: + */ + const double idx_cost= rows2double(info->index_records) / + TIME_FOR_COMPARE_ROWID; + info->index_scan_costs+= idx_cost; + trace_costs->add("index_scan_cost", idx_cost); + } + else + { + info->index_records += info->param->quick_rows[ror_scan->keynr]; + info->index_scan_costs += ror_scan->index_read_cost; + trace_costs->add("index_scan_cost", ror_scan->index_read_cost); + bitmap_union(&info->covered_fields, &ror_scan->covered_fields); + if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, + &info->covered_fields)) + { + DBUG_PRINT("info", ("ROR-intersect is covering now")); + info->is_covering= TRUE; + } + } + + info->total_cost= info->index_scan_costs; + trace_costs->add("cumulated_index_scan_cost", info->index_scan_costs); + DBUG_PRINT("info", ("info->total_cost: %g", info->total_cost)); + if (!info->is_covering) + { + double sweep_cost= get_sweep_read_cost(info->param, + double2rows(info->out_rows)); + info->total_cost+= sweep_cost; + trace_costs->add("disk_sweep_cost", sweep_cost); + DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost)); + } + else + trace_costs->add("disk_sweep_cost", 0); + + DBUG_PRINT("info", ("New out_rows: %g", info->out_rows)); + DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost, + info->is_covering?"" : "non-")); + DBUG_RETURN(TRUE); +} + + +/* + Get best ROR-intersection plan using non-covering ROR-intersection search + algorithm. The returned plan may be covering. + + SYNOPSIS + get_best_ror_intersect() + param Parameter from test_quick_select function. + tree Transformed restriction condition to be used to look + for ROR scans. + read_time Do not return read plans with cost > read_time. + are_all_covering [out] set to TRUE if union of all scans covers all + fields needed by the query (and it is possible to build + a covering ROR-intersection) + + NOTES + get_key_scans_params must be called before this function can be called. + + When this function is called by ROR-union construction algorithm it + assumes it is building an uncovered ROR-intersection (and thus # of full + records to be retrieved is wrong here). This is a hack. + + IMPLEMENTATION + The approximate best non-covering plan search algorithm is as follows: + + find_min_ror_intersection_scan() + { + R= select all ROR scans; + order R by (E(#records_matched) * key_record_length). + + S= first(R); -- set of scans that will be used for ROR-intersection + R= R-first(S); + min_cost= cost(S); + min_scan= make_scan(S); + while (R is not empty) + { + firstR= R - first(R); + if (!selectivity(S + firstR < selectivity(S))) + continue; + + S= S + first(R); + if (cost(S) < min_cost) + { + min_cost= cost(S); + min_scan= make_scan(S); + } + } + return min_scan; + } + + See ror_intersect_add function for ROR intersection costs. + + Special handling for Clustered PK scans + Clustered PK contains all table fields, so using it as a regular scan in + index intersection doesn't make sense: a range scan on CPK will be less + expensive in this case. + Clustered PK scan has special handling in ROR-intersection: it is not used + to retrieve rows, instead its condition is used to filter row references + we get from scans on other keys. + + RETURN + ROR-intersection table read plan + NULL if out of memory or no suitable plan found. +*/ + +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + double read_time, + bool *are_all_covering) +{ + uint idx; + double min_cost= DBL_MAX; + DBUG_ENTER("get_best_ror_intersect"); + THD *thd= param->thd; + Json_writer_object trace_ror(thd, "analyzing_roworder_intersect"); + + if ((tree->n_ror_scans < 2) || !param->table->stat_records() || + !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)) + { + if (tree->n_ror_scans < 2) + trace_ror.add("cause", "too few roworder scans"); + DBUG_RETURN(NULL); + } + + /* + Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of + them. Also find and save clustered PK scan if there is one. + */ + ROR_SCAN_INFO **cur_ror_scan; + ROR_SCAN_INFO *cpk_scan= NULL; + uint cpk_no; + + if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + param->keys))) + return NULL; + cpk_no= (param->table->file-> + pk_is_clustering_key(param->table->s->primary_key) ? + param->table->s->primary_key : MAX_KEY); + + for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) + { + ROR_SCAN_INFO *scan; + uint key_no; + if (!tree->ror_scans_map.is_set(idx)) + continue; + key_no= param->real_keynr[idx]; + if (key_no != cpk_no && + param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX) + { + /* Ignore clustering keys */ + tree->n_ror_scans--; + continue; + } + if (!(scan= make_ror_scan(param, idx, tree->keys[idx]))) + return NULL; + if (key_no == cpk_no) + { + cpk_scan= scan; + tree->n_ror_scans--; + } + else + *(cur_ror_scan++)= scan; + } + + tree->ror_scans_end= cur_ror_scan; + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", + tree->ror_scans, + tree->ror_scans_end);); + /* + Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized + ROR_SCAN_INFO's. + Step 2: Get best ROR-intersection using an approximate algorithm. + */ + my_qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info); + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", + tree->ror_scans, + tree->ror_scans_end);); + + ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */ + ROR_SCAN_INFO **intersect_scans_end; + if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + tree->n_ror_scans))) + return NULL; + intersect_scans_end= intersect_scans; + + /* Create and incrementally update ROR intersection. */ + ROR_INTERSECT_INFO *intersect, *intersect_best; + if (!(intersect= ror_intersect_init(param)) || + !(intersect_best= ror_intersect_init(param))) + return NULL; + + /* [intersect_scans,intersect_scans_best) will hold the best intersection */ + ROR_SCAN_INFO **intersect_scans_best; + cur_ror_scan= tree->ror_scans; + intersect_scans_best= intersect_scans; + Json_writer_array trace_isect_idx(thd, "intersecting_indexes"); + while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering) + { + Json_writer_object trace_idx(thd); + trace_idx.add("index", + param->table->key_info[(*cur_ror_scan)->keynr].name); + + /* S= S + first(R); R= R - first(R); */ + if (!ror_intersect_add(intersect, *cur_ror_scan, &trace_idx, FALSE)) + { + trace_idx.add("usable", false) + .add("cause", "does not reduce cost of intersect"); + cur_ror_scan++; + continue; + } + + trace_idx.add("cumulative_total_cost", intersect->total_cost) + .add("usable", true) + .add("matching_rows_now", intersect->out_rows) + .add("intersect_covering_with_this_index", intersect->is_covering); + + *(intersect_scans_end++)= *(cur_ror_scan++); + + if (intersect->total_cost < min_cost) + { + /* Local minimum found, save it */ + ror_intersect_cpy(intersect_best, intersect); + intersect_scans_best= intersect_scans_end; + min_cost = intersect->total_cost; + trace_idx.add("chosen", true); + } + else + { + trace_idx.add("chosen", false) + .add("cause", "does not reduce cost"); + } + } + trace_isect_idx.end(); + + if (intersect_scans_best == intersect_scans) + { + DBUG_PRINT("info", ("None of scans increase selectivity")); + trace_ror.add("chosen", false) + .add("cause","does not increase selectivity"); + DBUG_RETURN(NULL); + } + + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, + "best ROR-intersection", + intersect_scans, + intersect_scans_best);); + + *are_all_covering= intersect->is_covering; + uint best_num= (uint)(intersect_scans_best - intersect_scans); + ror_intersect_cpy(intersect, intersect_best); + + /* + Ok, found the best ROR-intersection of non-CPK key scans. + Check if we should add a CPK scan. If the obtained ROR-intersection is + covering, it doesn't make sense to add CPK scan. + */ + Json_writer_object trace_cpk(thd, "clustered_pk"); + if (cpk_scan && !intersect->is_covering) + { + if (ror_intersect_add(intersect, cpk_scan, &trace_cpk, TRUE) && + (intersect->total_cost < min_cost)) + { + trace_cpk.add("clustered_pk_scan_added_to_intersect", true) + .add("cumulated_cost", intersect->total_cost); + intersect_best= intersect; //just set pointer here + } + else + { + trace_cpk.add("clustered_pk_added_to_intersect", false) + .add("cause", "cost"); + cpk_scan= 0; // Don't use cpk_scan + } + } + else + { + trace_cpk.add("clustered_pk_added_to_intersect", false) + .add("cause", cpk_scan ? "roworder is covering" + : "no clustered pk index"); + cpk_scan= 0; // Don't use cpk_scan + } + trace_cpk.end(); + + /* Ok, return ROR-intersect plan if we have found one */ + TRP_ROR_INTERSECT *trp= NULL; + if (min_cost < read_time && (cpk_scan || best_num > 1)) + { + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + if (!(trp->first_scan= + (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)*best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= intersect_best->is_covering; + trp->read_cost= intersect_best->total_cost; + /* Prevent divisons by zero */ + ha_rows best_rows = double2rows(intersect_best->out_rows); + if (!best_rows) + best_rows= 1; + set_if_smaller(param->table->opt_range_condition_rows, best_rows); + trp->records= best_rows; + trp->index_scan_costs= intersect_best->index_scan_costs; + trp->cpk_scan= cpk_scan; + DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:" + "cost %g, records %lu", + trp->read_cost, (ulong) trp->records)); + trace_ror.add("rows", trp->records) + .add("cost", trp->read_cost) + .add("covering", trp->is_covering) + .add("chosen", true); + } + else + { + trace_ror.add("chosen", false) + .add("cause", (read_time > min_cost) + ? "too few indexes to merge" + : "cost"); + } + DBUG_RETURN(trp); +} + + +/* + Get best covering ROR-intersection. + SYNOPSIS + get_best_ntersectcovering_ror_intersect() + param Parameter from test_quick_select function. + tree SEL_TREE with sets of intervals for different keys. + read_time Don't return table read plans with cost > read_time. + + RETURN + Best covering ROR-intersection plan + NULL if no plan found. + + NOTES + get_best_ror_intersect must be called for a tree before calling this + function for it. + This function invalidates tree->ror_scans member values. + + The following approximate algorithm is used: + I=set of all covering indexes + F=set of all fields to cover + S={} + + do + { + Order I by (#covered fields in F desc, + #components asc, + number of first not covered component asc); + F=F-covered by first(I); + S=S+first(I); + I=I-first(I); + } while F is not empty. +*/ + +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time) +{ + ROR_SCAN_INFO **ror_scan_mark; + ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end; + DBUG_ENTER("get_best_covering_ror_intersect"); + + if (!optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)) + DBUG_RETURN(NULL); + + for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan) + (*scan)->key_components= + param->table->key_info[(*scan)->keynr].user_defined_key_parts; + + /* + Run covering-ROR-search algorithm. + Assume set I is [ror_scan .. ror_scans_end) + */ + + /*I=set of all covering indexes */ + ror_scan_mark= tree->ror_scans; + + MY_BITMAP *covered_fields= ¶m->tmp_covered_fields; + if (!covered_fields->bitmap) + covered_fields->bitmap= (my_bitmap_map*)alloc_root(param->mem_root, + param->fields_bitmap_size); + if (!covered_fields->bitmap || + my_bitmap_init(covered_fields, covered_fields->bitmap, + param->table->s->fields)) + DBUG_RETURN(0); + bitmap_clear_all(covered_fields); + + double total_cost= 0.0f; + ha_rows records=0; + bool all_covered; + + DBUG_PRINT("info", ("Building covering ROR-intersection")); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "building covering ROR-I", + ror_scan_mark, ror_scans_end);); + do + { + /* + Update changed sorting info: + #covered fields, + number of first not covered component + Calculate and save these values for each of remaining scans. + */ + for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan) + { + bitmap_subtract(&(*scan)->covered_fields, covered_fields); + (*scan)->used_fields_covered= + bitmap_bits_set(&(*scan)->covered_fields); + (*scan)->first_uncovered_field= + bitmap_get_first(&(*scan)->covered_fields); + } + + my_qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info_covering); + + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "remaining scans", + ror_scan_mark, ror_scans_end);); + + /* I=I-first(I) */ + total_cost += (*ror_scan_mark)->index_read_cost; + records += (*ror_scan_mark)->records; + DBUG_PRINT("info", ("Adding scan on %s", + param->table->key_info[(*ror_scan_mark)->keynr].name.str)); + if (total_cost > read_time) + DBUG_RETURN(NULL); + /* F=F-covered by first(I) */ + bitmap_union(covered_fields, &(*ror_scan_mark)->covered_fields); + all_covered= bitmap_is_subset(¶m->needed_fields, covered_fields); + } while ((++ror_scan_mark < ror_scans_end) && !all_covered); + + if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1) + DBUG_RETURN(NULL); + + /* + Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with + cost total_cost. + */ + DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost)); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating covering ROR-intersect", + tree->ror_scans, ror_scan_mark);); + + /* Add priority queue use cost. */ + total_cost += rows2double(records)* + log((double)(ror_scan_mark - tree->ror_scans)) / + (TIME_FOR_COMPARE_ROWID * M_LN2); + DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost)); + + if (total_cost > read_time) + DBUG_RETURN(NULL); + + TRP_ROR_INTERSECT *trp; + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + uint best_num= (uint)(ror_scan_mark - tree->ror_scans); + if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, tree->ror_scans, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= TRUE; + trp->read_cost= total_cost; + trp->records= records; + trp->cpk_scan= NULL; + set_if_smaller(param->table->opt_range_condition_rows, records); + + DBUG_PRINT("info", + ("Returning covering ROR-intersect plan: cost %g, records %lu", + trp->read_cost, (ulong) trp->records)); + DBUG_RETURN(trp); +} + + +/* + Get best "range" table read plan for given SEL_TREE. + Also update PARAM members and store ROR scans info in the SEL_TREE. + SYNOPSIS + get_key_scans_params + param parameters from test_quick_select + tree make range select for this SEL_TREE + index_read_must_be_used if TRUE, assume 'index only' option will be set + (except for clustered PK indexes) + for_range_access if TRUE the function is called to get the best range + plan for range access, not for index merge access + read_time don't create read plans with cost > read_time. + RETURN + Best range read plan + NULL if no plan found or error occurred +*/ + +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + bool for_range_access, + double read_time) +{ + uint idx, UNINIT_VAR(best_idx); + SEL_ARG *key_to_read= NULL; + ha_rows UNINIT_VAR(best_records); /* protected by key_to_read */ + uint UNINIT_VAR(best_mrr_flags), /* protected by key_to_read */ + UNINIT_VAR(best_buf_size); /* protected by key_to_read */ + TRP_RANGE* read_plan= NULL; + DBUG_ENTER("get_key_scans_params"); + THD *thd= param->thd; + /* + Note that there may be trees that have type SEL_TREE::KEY but contain no + key reads at all, e.g. tree for expression "key1 is not null" where key1 + is defined as "not null". + */ + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, + "tree scans");); + Json_writer_array range_scan_alt(thd, "range_scan_alternatives"); + + tree->ror_scans_map.clear_all(); + tree->n_ror_scans= 0; + tree->index_scans= 0; + if (!tree->keys_map.is_clear_all()) + { + tree->index_scans= + (INDEX_SCAN_INFO **) alloc_root(param->mem_root, + sizeof(INDEX_SCAN_INFO *) * param->keys); + } + tree->index_scans_end= tree->index_scans; + + for (idx= 0; idx < param->keys; idx++) + { + SEL_ARG *key= tree->keys[idx]; + if (key) + { + ha_rows found_records; + Cost_estimate cost; + double found_read_time; + uint mrr_flags, buf_size; + bool is_ror_scan= FALSE; + INDEX_SCAN_INFO *index_scan; + uint keynr= param->real_keynr[idx]; + if (key->type == SEL_ARG::MAYBE_KEY || + key->maybe_flag) + param->needed_reg->set_bit(keynr); + + bool read_index_only= index_read_must_be_used ? TRUE : + (bool) param->table->covering_keys.is_set(keynr); + + Json_writer_object trace_idx(thd); + trace_idx.add("index", param->table->key_info[keynr].name); + + found_records= check_quick_select(param, idx, read_index_only, key, + for_range_access, &mrr_flags, + &buf_size, &cost, &is_ror_scan); + + if (!for_range_access && !is_ror_scan && + !optimizer_flag(param->thd,OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION)) + { + /* The scan is not a ROR-scan, just skip it */ + continue; + } + + if (found_records != HA_POS_ERROR && tree->index_scans && + (index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root, + sizeof(INDEX_SCAN_INFO)))) + { + Json_writer_array trace_range(thd, "ranges"); + + const KEY &cur_key= param->table->key_info[keynr]; + const KEY_PART_INFO *key_part= cur_key.key_part; + + index_scan->idx= idx; + index_scan->keynr= keynr; + index_scan->key_info= ¶m->table->key_info[keynr]; + index_scan->used_key_parts= param->max_key_parts; + index_scan->range_count= param->range_count; + index_scan->records= found_records; + index_scan->sel_arg= key; + *tree->index_scans_end++= index_scan; + + if (unlikely(thd->trace_started())) + trace_ranges(&trace_range, param, idx, key, key_part); + trace_range.end(); + + trace_idx.add("rowid_ordered", is_ror_scan) + .add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL)) + .add("index_only", read_index_only) + .add("rows", found_records) + .add("cost", cost.total_cost()); + } + if ((found_records != HA_POS_ERROR) && is_ror_scan) + { + tree->n_ror_scans++; + tree->ror_scans_map.set_bit(idx); + } + if (found_records != HA_POS_ERROR && + read_time > (found_read_time= cost.total_cost())) + { + read_time= found_read_time; + best_records= found_records; + key_to_read= key; + best_idx= idx; + best_mrr_flags= mrr_flags; + best_buf_size= buf_size; + trace_idx.add("chosen", true); + } + else + { + trace_idx.add("chosen", false); + if (found_records == HA_POS_ERROR) + { + if (key->type == SEL_ARG::Type::MAYBE_KEY) + trace_idx.add("cause", "depends on unread values"); + else + trace_idx.add("cause", "unknown"); + } + else + trace_idx.add("cause", "cost"); + } + } + } + + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map, + "ROR scans");); + if (key_to_read) + { + if ((read_plan= new (param->mem_root) TRP_RANGE(key_to_read, best_idx, + best_mrr_flags))) + { + read_plan->records= best_records; + read_plan->is_ror= tree->ror_scans_map.is_set(best_idx); + read_plan->read_cost= read_time; + read_plan->mrr_buf_size= best_buf_size; + DBUG_PRINT("info", + ("Returning range plan for key %s, cost %g, records %lu", + param->table->key_info[param->real_keynr[best_idx]].name.str, + read_plan->read_cost, (ulong) read_plan->records)); + } + } + else + DBUG_PRINT("info", ("No 'range' table read plan found")); + + DBUG_RETURN(read_plan); +} + + +QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_INDEX_MERGE_SELECT *quick_imerge; + QUICK_RANGE_SELECT *quick; + /* index_merge always retrieves full rows, ignore retrieve_full_rows */ + if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table))) + return NULL; + + quick_imerge->records= records; + quick_imerge->read_time= read_cost; + for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; + range_scan++) + { + if (!(quick= (QUICK_RANGE_SELECT*) + ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))|| + quick_imerge->push_quick_back(quick)) + { + delete quick; + delete quick_imerge; + return NULL; + } + } + return quick_imerge; +} + + +QUICK_SELECT_I *TRP_INDEX_INTERSECT::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_INDEX_INTERSECT_SELECT *quick_intersect; + QUICK_RANGE_SELECT *quick; + /* index_merge always retrieves full rows, ignore retrieve_full_rows */ + if (!(quick_intersect= new QUICK_INDEX_INTERSECT_SELECT(param->thd, param->table))) + return NULL; + + quick_intersect->records= records; + quick_intersect->read_time= read_cost; + quick_intersect->filtered_scans= filtered_scans; + for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; + range_scan++) + { + if (!(quick= (QUICK_RANGE_SELECT*) + ((*range_scan)->make_quick(param, FALSE, &quick_intersect->alloc)))|| + quick_intersect->push_quick_back(quick)) + { + delete quick; + delete quick_intersect; + return NULL; + } + } + return quick_intersect; +} + + +QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_INTERSECT_SELECT *quick_intrsect; + QUICK_RANGE_SELECT *quick; + DBUG_ENTER("TRP_ROR_INTERSECT::make_quick"); + MEM_ROOT *alloc; + + if ((quick_intrsect= + new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table, + (retrieve_full_rows? (!is_covering) : + FALSE), + parent_alloc))) + { + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating ROR-intersect", + first_scan, last_scan);); + alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc; + for (ROR_SCAN_INFO **curr_scan= first_scan; curr_scan != last_scan; + ++curr_scan) + { + if (!(quick= get_quick_select(param, (*curr_scan)->idx, + (*curr_scan)->sel_arg, + HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED, + 0, alloc)) || + quick_intrsect->push_quick_back(alloc, quick)) + { + delete quick_intrsect; + DBUG_RETURN(NULL); + } + } + if (cpk_scan) + { + if (!(quick= get_quick_select(param, cpk_scan->idx, + cpk_scan->sel_arg, + HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED, + 0, alloc))) + { + delete quick_intrsect; + DBUG_RETURN(NULL); + } + quick->file= NULL; + quick_intrsect->cpk_quick= quick; + } + quick_intrsect->records= records; + quick_intrsect->read_time= read_cost; + } + DBUG_RETURN(quick_intrsect); +} + + +QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_UNION_SELECT *quick_roru; + TABLE_READ_PLAN **scan; + QUICK_SELECT_I *quick; + DBUG_ENTER("TRP_ROR_UNION::make_quick"); + /* + It is impossible to construct a ROR-union that will not retrieve full + rows, ignore retrieve_full_rows parameter. + */ + if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table))) + { + for (scan= first_ror; scan != last_ror; scan++) + { + if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) || + quick_roru->push_quick_back(quick)) + { + delete quick_roru; + DBUG_RETURN(NULL); + } + } + quick_roru->records= records; + quick_roru->read_time= read_cost; + } + DBUG_RETURN(quick_roru); +} + + +/* + Build a SEL_TREE for <> or NOT BETWEEN predicate + + SYNOPSIS + get_ne_mm_tree() + param PARAM from SQL_SELECT::test_quick_select + cond_func item for the predicate + field field in the predicate + lt_value constant that field should be smaller + gt_value constant that field should be greaterr + + RETURN + # Pointer to tree built tree + 0 on error +*/ + +SEL_TREE *Item_bool_func::get_ne_mm_tree(RANGE_OPT_PARAM *param, + Field *field, + Item *lt_value, Item *gt_value) +{ + SEL_TREE *tree; + tree= get_mm_parts(param, field, Item_func::LT_FUNC, lt_value); + if (tree) + tree= tree_or(param, tree, get_mm_parts(param, field, Item_func::GT_FUNC, + gt_value)); + return tree; +} + + +SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) +{ + DBUG_ENTER("Item_func_ne::get_func_mm_tree"); + /* + If this condition is a "col1<>...", where there is a UNIQUE KEY(col1), + do not construct a SEL_TREE from it. A condition that excludes just one + row in the table is not selective (unless there are only a few rows) + + Note: this logic must be in sync with code in + check_group_min_max_predicates(). That function walks an Item* condition + and checks if the range optimizer would produce an equivalent range for + it. + */ + if (param->using_real_indexes && is_field_an_unique_index(field)) + DBUG_RETURN(NULL); + DBUG_RETURN(get_ne_mm_tree(param, field, value, value)); +} + + +SEL_TREE *Item_func_between::get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) +{ + SEL_TREE *tree; + DBUG_ENTER("Item_func_between::get_func_mm_tree"); + if (!value) + { + if (negated) + { + tree= get_ne_mm_tree(param, field, args[1], args[2]); + } + else + { + tree= get_mm_parts(param, field, Item_func::GE_FUNC, args[1]); + if (tree) + { + tree= tree_and(param, tree, get_mm_parts(param, field, + Item_func::LE_FUNC, + args[2])); + } + } + } + else + { + tree= get_mm_parts(param, field, + (negated ? + (value == (Item*)1 ? Item_func::GT_FUNC : + Item_func::LT_FUNC): + (value == (Item*)1 ? Item_func::LE_FUNC : + Item_func::GE_FUNC)), + args[0]); + } + DBUG_RETURN(tree); +} + + +SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param, + Field *field, Item *value) +{ + SEL_TREE *tree= 0; + DBUG_ENTER("Item_func_in::get_func_mm_tree"); + /* + Array for IN() is constructed when all values have the same result + type. Tree won't be built for values with different result types, + so we check it here to avoid unnecessary work. + */ + if (!arg_types_compatible) + DBUG_RETURN(0); + + if (negated) + { + if (array && array->type_handler()->result_type() != ROW_RESULT) + { + /* + We get here for conditions in form "t.key NOT IN (c1, c2, ...)", + where c{i} are constants. Our goal is to produce a SEL_TREE that + represents intervals: + + ($MINmem_root; + param->thd->mem_root= param->old_root; + /* + Create one Item_type constant object. We'll need it as + get_mm_parts only accepts constant values wrapped in Item_Type + objects. + We create the Item on param->mem_root which points to + per-statement mem_root (while thd->mem_root is currently pointing + to mem_root local to range optimizer). + */ + Item *value_item= array->create_item(param->thd); + param->thd->mem_root= tmp_root; + + if (array->count > NOT_IN_IGNORE_THRESHOLD || !value_item) + DBUG_RETURN(0); + + /* + if this is a "col1 NOT IN (...)", and there is a UNIQUE KEY(col1), do + not constuct a SEL_TREE from it. The rationale is as follows: + - if there are only a few constants, this condition is not selective + (unless the table is also very small in which case we won't gain + anything) + - if there are a lot of constants, the overhead of building and + processing enormous range list is not worth it. + */ + if (param->using_real_indexes && is_field_an_unique_index(field)) + DBUG_RETURN(0); + + /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ + uint i=0; + do + { + array->value_to_item(i, value_item); + tree= get_mm_parts(param, field, Item_func::LT_FUNC, value_item); + if (!tree) + break; + i++; + } while (i < array->count && tree->type == SEL_TREE::IMPOSSIBLE); + + if (!tree || tree->type == SEL_TREE::IMPOSSIBLE) + { + /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */ + DBUG_RETURN(NULL); + } + SEL_TREE *tree2; + for (; i < array->used_count; i++) + { + if (array->compare_elems(i, i-1)) + { + /* Get a SEL_TREE for "-inf < X < c_i" interval */ + array->value_to_item(i, value_item); + tree2= get_mm_parts(param, field, Item_func::LT_FUNC, value_item); + if (!tree2) + { + tree= NULL; + break; + } + + /* Change all intervals to be "c_{i-1} < X < c_i" */ + for (uint idx= 0; idx < param->keys; idx++) + { + SEL_ARG *new_interval, *last_val; + if (((new_interval= tree2->keys[idx])) && + (tree->keys[idx]) && + ((last_val= tree->keys[idx]->last()))) + { + new_interval->min_value= last_val->max_value; + new_interval->min_flag= NEAR_MIN; + + /* + If the interval is over a partial keypart, the + interval must be "c_{i-1} <= X < c_i" instead of + "c_{i-1} < X < c_i". Reason: + + Consider a table with a column "my_col VARCHAR(3)", + and an index with definition + "INDEX my_idx my_col(1)". If the table contains rows + with my_col values "f" and "foo", the index will not + distinguish the two rows. + + Note that tree_or() below will effectively merge + this range with the range created for c_{i-1} and + we'll eventually end up with only one range: + "NULL < X". + + Partitioning indexes are never partial. + */ + if (param->using_real_indexes) + { + const KEY key= + param->table->key_info[param->real_keynr[idx]]; + const KEY_PART_INFO *kpi= key.key_part + new_interval->part; + + if (kpi->key_part_flag & HA_PART_KEY_SEG) + new_interval->min_flag= 0; + } + } + } + /* + The following doesn't try to allocate memory so no need to + check for NULL. + */ + tree= tree_or(param, tree, tree2); + } + } + + if (tree && tree->type != SEL_TREE::IMPOSSIBLE) + { + /* + Get the SEL_TREE for the last "c_last < X < +inf" interval + (value_item cotains c_last already) + */ + tree2= get_mm_parts(param, field, Item_func::GT_FUNC, value_item); + tree= tree_or(param, tree, tree2); + } + } + else + { + tree= get_ne_mm_tree(param, field, args[1], args[1]); + if (tree) + { + Item **arg, **end; + for (arg= args + 2, end= arg + arg_count - 2; arg < end ; arg++) + { + tree= tree_and(param, tree, get_ne_mm_tree(param, field, + *arg, *arg)); + } + } + } + } + else + { + tree= get_mm_parts(param, field, Item_func::EQ_FUNC, args[1]); + if (tree) + { + Item **arg, **end; + for (arg= args + 2, end= arg + arg_count - 2; + arg < end ; arg++) + { + tree= tree_or(param, tree, get_mm_parts(param, field, + Item_func::EQ_FUNC, *arg)); + } + } + } + DBUG_RETURN(tree); +} + + +/* + The structure Key_col_info is purely auxiliary and is used + only in the method Item_func_in::get_func_row_mm_tree +*/ +struct Key_col_info { + Field *field; /* If != NULL the column can be used for keys */ + cmp_item *comparator; /* If != 0 the column can be evaluated */ +}; + +/** + Build SEL_TREE for the IN predicate whose arguments are rows + + @param param PARAM from SQL_SELECT::test_quick_select + @param key_row First operand of the IN predicate + + @note + The function builds a SEL_TREE for in IN predicate in the case + when the predicate uses row arguments. First the function + detects among the components of the key_row (c[1],...,c[n]) taken + from in the left part the predicate those that can be usable + for building SEL_TREE (c[i1],...,c[ik]). They have to contain + items whose real items are field items referring to the current + table or equal to the items referring to the current table. + For the remaining components of the row it checks whether they + can be evaluated. The result of the analysis is put into the + array of structures of the type Key_row_col_info. + + After this the function builds the SEL_TREE for the following + formula that can be inferred from the given IN predicate: + c[i11]=a[1][i11] AND ... AND c[i1k1]=a[1][i1k1] + OR + ... + OR + c[im1]=a[m][im1] AND ... AND c[imkm]=a[m][imkm]. + Here a[1],...,a[m] are all arguments of the IN predicate from + the right part and for each j ij1,...,ijkj is a subset of + i1,...,ik such that a[j][ij1],...,a[j][ijkj] can be evaluated. + + If for some j there no a[j][i1],...,a[j][ik] can be evaluated + then no SEL_TREE can be built for this predicate and the + function immediately returns 0. + + If for some j by using evaluated values of key_row it can be + proven that c[ij1]=a[j][ij1] AND ... AND c[ijkj]=a[j][ijkj] + is always FALSE then this disjunct is omitted. + + @returns + the built SEL_TREE if it can be constructed + 0 - otherwise. +*/ + +SEL_TREE *Item_func_in::get_func_row_mm_tree(RANGE_OPT_PARAM *param, + Item_row *key_row) +{ + DBUG_ENTER("Item_func_in::get_func_row_mm_tree"); + + if (negated) + DBUG_RETURN(0); + + SEL_TREE *res_tree= 0; + uint used_key_cols= 0; + uint col_comparators= 0; + table_map param_comp= ~(param->prev_tables | param->read_tables | + param->current_table); + uint row_cols= key_row->cols(); + Dynamic_array key_cols_info(PSI_INSTRUMENT_MEM,row_cols); + cmp_item_row *row_cmp_item; + + if (array) + { + in_row *row= static_cast(array); + row_cmp_item= static_cast(row->get_cmp_item()); + } + else + { + DBUG_ASSERT(get_comparator_type_handler(0) == &type_handler_row); + row_cmp_item= static_cast(get_comparator_cmp_item(0)); + } + DBUG_ASSERT(row_cmp_item); + + Item **key_col_ptr= key_row->addr(0); + for(uint i= 0; i < row_cols; i++, key_col_ptr++) + { + Key_col_info key_col_info= {0, NULL}; + Item *key_col= *key_col_ptr; + if (key_col->real_item()->type() == Item::FIELD_ITEM) + { + /* + The i-th component of key_row can be used for key access if + key_col->real_item() points to a field of the current table or + if it is equal to a field item pointing to such a field. + */ + Item_field *col_field_item= (Item_field *) (key_col->real_item()); + Field *key_col_field= col_field_item->field; + if (key_col_field->table->map != param->current_table) + { + Item_equal *item_equal= col_field_item->item_equal; + if (item_equal) + { + Item_equal_fields_iterator it(*item_equal); + while (it++) + { + key_col_field= it.get_curr_field(); + if (key_col_field->table->map == param->current_table) + break; + } + } + } + if (key_col_field->table->map == param->current_table) + { + key_col_info.field= key_col_field; + used_key_cols++; + } + } + else if (!(key_col->used_tables() & (param_comp | param->current_table)) + && !key_col->is_expensive()) + { + /* The i-th component of key_row can be evaluated */ + + /* See the comment in Item::get_mm_tree_for_const */ + MEM_ROOT *tmp_root= param->mem_root; + param->thd->mem_root= param->old_root; + + key_col->bring_value(); + key_col_info.comparator= row_cmp_item->get_comparator(i); + DBUG_ASSERT(key_col_info.comparator); + key_col_info.comparator->store_value(key_col); + col_comparators++; + + param->thd->mem_root= tmp_root; + } + key_cols_info.push(key_col_info); + } + + if (!used_key_cols) + DBUG_RETURN(0); + + uint omitted_tuples= 0; + Item **arg_start= arguments() + 1; + Item **arg_end= arg_start + argument_count() - 1; + for (Item **arg= arg_start ; arg < arg_end; arg++) + { + uint i; + + /* + First check whether the disjunct constructed for *arg + is really needed + */ + Item_row *arg_tuple= (Item_row *) (*arg); + if (col_comparators) + { + MEM_ROOT *tmp_root= param->mem_root; + param->thd->mem_root= param->old_root; + for (i= 0; i < row_cols; i++) + { + Key_col_info *key_col_info= &key_cols_info.at(i); + if (key_col_info->comparator) + { + Item *arg_col= arg_tuple->element_index(i); + if (!(arg_col->used_tables() & (param_comp | param->current_table)) && + !arg_col->is_expensive() && + key_col_info->comparator->cmp(arg_col)) + { + omitted_tuples++; + break; + } + } + } + param->thd->mem_root= tmp_root; + if (i < row_cols) + continue; + } + + /* The disjunct for *arg is needed: build it. */ + SEL_TREE *and_tree= 0; + Item **arg_col_ptr= arg_tuple->addr(0); + for (uint i= 0; i < row_cols; i++, arg_col_ptr++) + { + Key_col_info *key_col_info= &key_cols_info.at(i); + if (!key_col_info->field) + continue; + Item *arg_col= *arg_col_ptr; + if (!(arg_col->used_tables() & (param_comp | param->current_table)) && + !arg_col->is_expensive()) + { + and_tree= tree_and(param, and_tree, + get_mm_parts(param, + key_col_info->field, + Item_func::EQ_FUNC, + arg_col->real_item())); + } + } + if (!and_tree) + { + res_tree= 0; + break; + } + /* Join the disjunct the the OR tree that is being constructed */ + res_tree= !res_tree ? and_tree : tree_or(param, res_tree, and_tree); + } + if (omitted_tuples == argument_count() - 1) + { + /* It's turned out that all disjuncts are always FALSE */ + res_tree= new (param->mem_root) SEL_TREE(SEL_TREE::IMPOSSIBLE, + param->mem_root, param->keys); + } + DBUG_RETURN(res_tree); +} + + +/* + Build conjunction of all SEL_TREEs for a simple predicate applying equalities + + SYNOPSIS + get_full_func_mm_tree() + param PARAM from SQL_SELECT::test_quick_select + field_item field in the predicate + value constant in the predicate (or a field already read from + a table in the case of dynamic range access) + (for BETWEEN it contains the number of the field argument, + for IN it's always 0) + inv TRUE <> NOT cond_func is considered + (makes sense only when cond_func is BETWEEN or IN) + + DESCRIPTION + For a simple SARGable predicate of the form (f op c), where f is a field and + c is a constant, the function builds a conjunction of all SEL_TREES that can + be obtained by the substitution of f for all different fields equal to f. + + NOTES + If the WHERE condition contains a predicate (fi op c), + then not only SELL_TREE for this predicate is built, but + the trees for the results of substitution of fi for + each fj belonging to the same multiple equality as fi + are built as well. + E.g. for WHERE t1.a=t2.a AND t2.a > 10 + a SEL_TREE for t2.a > 10 will be built for quick select from t2 + and + a SEL_TREE for t1.a > 10 will be built for quick select from t1. + + A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated + in a similar way: we build a conjuction of trees for the results + of all substitutions of fi for equal fj. + Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed + differently. It is considered as a conjuction of two SARGable + predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree + is called for each of them separately producing trees for + AND j (f1j <=c ) and AND j (f2j <= c) + After this these two trees are united in one conjunctive tree. + It's easy to see that the same tree is obtained for + AND j,k (f1j <=c AND f2k<=c) + which is equivalent to + AND j,k (c BETWEEN f1j AND f2k). + The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i) + which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the + function get_full_func_mm_tree is called for (f1i > c) and (f2i < c) + producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two + trees are united in one OR-tree. The expression + (AND j (f1j > c) OR AND j (f2j < c) + is equivalent to the expression + AND j,k (f1j > c OR f2k < c) + which is just a translation of + AND j,k (c NOT BETWEEN f1j AND f2k) + + In the cases when one of the items f1, f2 is a constant c1 we do not create + a tree for it at all. It works for BETWEEN predicates but does not + work for NOT BETWEEN predicates as we have to evaluate the expression + with it. If it is TRUE then the other tree can be completely ignored. + We do not do it now and no trees are built in these cases for + NOT BETWEEN predicates. + + As to IN predicates only ones of the form (f IN (c1,...,cn)), + where f1 is a field and c1,...,cn are constant, are considered as + SARGable. We never try to narrow the index scan using predicates of + the form (c IN (c1,...,f,...,cn)). + + RETURN + Pointer to the tree representing the built conjunction of SEL_TREEs +*/ + +SEL_TREE *Item_bool_func::get_full_func_mm_tree(RANGE_OPT_PARAM *param, + Item_field *field_item, + Item *value) +{ + DBUG_ENTER("Item_bool_func::get_full_func_mm_tree"); + SEL_TREE *tree= 0; + SEL_TREE *ftree= 0; + table_map ref_tables= 0; + table_map param_comp= ~(param->prev_tables | param->read_tables | + param->current_table); + + for (uint i= 0; i < arg_count; i++) + { + Item *arg= arguments()[i]->real_item(); + if (arg != field_item) + ref_tables|= arg->used_tables(); + } + Field *field= field_item->field; + if (!((ref_tables | field->table->map) & param_comp)) + ftree= get_func_mm_tree(param, field, value); + Item_equal *item_equal= field_item->item_equal; + if (item_equal) + { + Item_equal_fields_iterator it(*item_equal); + while (it++) + { + Field *f= it.get_curr_field(); + if (field->eq(f)) + continue; + if (!((ref_tables | f->table->map) & param_comp)) + { + tree= get_func_mm_tree(param, f, value); + ftree= !ftree ? tree : tree_and(param, ftree, tree); + } + } + } + + DBUG_RETURN(ftree); +} + + +/* + make a select tree of all keys in condition + + @param param Context + @param cond INOUT condition to perform range analysis on. + + @detail + Range analysis may infer that some conditions are never true. + - If the condition is never true, SEL_TREE(type=IMPOSSIBLE) is returned + - if parts of condition are never true, the function may remove these parts + from the condition 'cond'. Sometimes, this will cause the condition to + be substituted for something else. + + + @return + NULL - Could not infer anything from condition cond. + SEL_TREE with type=IMPOSSIBLE - condition can never be true. +*/ +SEL_TREE *Item_cond_and::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + DBUG_ENTER("Item_cond_and::get_mm_tree"); + SEL_TREE *tree= NULL; + List_iterator li(*argument_list()); + Item *item; + while ((item= li++)) + { + SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param,li.ref()); + if (param->statement_should_be_aborted()) + DBUG_RETURN(NULL); + tree= tree_and(param, tree, new_tree); + if (tree && tree->type == SEL_TREE::IMPOSSIBLE) + { + /* + Do not remove 'item' from 'cond'. We return a SEL_TREE::IMPOSSIBLE + and that is sufficient for the caller to see that the whole + condition is never true. + */ + break; + } + } + DBUG_RETURN(tree); +} + + +SEL_TREE *Item_cond::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + DBUG_ENTER("Item_cond::get_mm_tree"); + List_iterator li(*argument_list()); + bool replace_cond= false; + Item *replacement_item= li++; + SEL_TREE *tree= li.ref()[0]->get_mm_tree(param, li.ref()); + if (param->statement_should_be_aborted()) + DBUG_RETURN(NULL); + if (tree) + { + if (tree->type == SEL_TREE::IMPOSSIBLE && + param->remove_false_where_parts) + { + /* See the other li.remove() call below */ + li.remove(); + if (argument_list()->elements <= 1) + replace_cond= true; + } + + Item *item; + while ((item= li++)) + { + SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param, li.ref()); + if (new_tree == NULL || param->statement_should_be_aborted()) + DBUG_RETURN(NULL); + tree= tree_or(param, tree, new_tree); + if (tree == NULL || tree->type == SEL_TREE::ALWAYS) + { + replacement_item= *li.ref(); + break; + } + + if (new_tree && new_tree->type == SEL_TREE::IMPOSSIBLE && + param->remove_false_where_parts) + { + /* + This is a condition in form + + cond = item1 OR ... OR item_i OR ... itemN + + and item_i produces SEL_TREE(IMPOSSIBLE). We should remove item_i + from cond. This may cause 'cond' to become a degenerate, + one-way OR. In that case, we replace 'cond' with the remaining + item_i. + */ + li.remove(); + if (argument_list()->elements <= 1) + replace_cond= true; + } + else + replacement_item= *li.ref(); + } + + if (replace_cond) + *cond_ptr= replacement_item; + } + DBUG_RETURN(tree); +} + + +SEL_TREE *Item::get_mm_tree_for_const(RANGE_OPT_PARAM *param) +{ + DBUG_ENTER("get_mm_tree_for_const"); + if (is_expensive()) + DBUG_RETURN(0); + /* + During the cond->val_int() evaluation we can come across a subselect + item which may allocate memory on the thd->mem_root and assumes + all the memory allocated has the same life span as the subselect + item itself. So we have to restore the thread's mem_root here. + */ + MEM_ROOT *tmp_root= param->mem_root; + param->thd->mem_root= param->old_root; + SEL_TREE *tree; + + const SEL_TREE::Type type= val_int()? SEL_TREE::ALWAYS: SEL_TREE::IMPOSSIBLE; + param->thd->mem_root= tmp_root; + + tree= new (tmp_root) SEL_TREE(type, tmp_root, param->keys); + DBUG_RETURN(tree); +} + + +SEL_TREE *Item::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + DBUG_ENTER("Item::get_mm_tree"); + if (const_item()) + DBUG_RETURN(get_mm_tree_for_const(param)); + + /* + Here we have a not-constant non-function Item. + + Item_field should not appear, as normalize_cond() replaces + "WHERE field" to "WHERE field<>0". + + Item_exists_subselect is possible, e.g. in this query: + SELECT id, st FROM t1 + WHERE st IN ('GA','FL') AND EXISTS (SELECT 1 FROM t2 WHERE t2.id=t1.id) + GROUP BY id; + */ + table_map ref_tables= used_tables(); + if ((ref_tables & param->current_table) || + (ref_tables & ~(param->prev_tables | param->read_tables))) + DBUG_RETURN(0); + DBUG_RETURN(new (param->mem_root) SEL_TREE(SEL_TREE::MAYBE, param->mem_root, + param->keys)); +} + + +SEL_TREE * +Item_func_between::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + DBUG_ENTER("Item_func_between::get_mm_tree"); + if (const_item()) + DBUG_RETURN(get_mm_tree_for_const(param)); + + SEL_TREE *tree= 0; + SEL_TREE *ftree= 0; + + if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *field_item= (Item_field*) (arguments()[0]->real_item()); + ftree= get_full_func_mm_tree(param, field_item, NULL); + } + + /* + Concerning the code below see the NOTES section in + the comments for the function get_full_func_mm_tree() + */ + for (uint i= 1 ; i < arg_count ; i++) + { + if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *field_item= (Item_field*) (arguments()[i]->real_item()); + SEL_TREE *tmp= get_full_func_mm_tree(param, field_item, + (Item*)(intptr) i); + if (negated) + { + tree= !tree ? tmp : tree_or(param, tree, tmp); + if (tree == NULL) + break; + } + else + tree= tree_and(param, tree, tmp); + } + else if (negated) + { + tree= 0; + break; + } + } + + ftree= tree_and(param, ftree, tree); + DBUG_RETURN(ftree); +} + + +SEL_TREE *Item_func_in::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + DBUG_ENTER("Item_func_in::get_mm_tree"); + if (const_item()) + DBUG_RETURN(get_mm_tree_for_const(param)); + + SEL_TREE *tree= 0; + switch (key_item()->real_item()->type()) { + case Item::FIELD_ITEM: + tree= get_full_func_mm_tree(param, + (Item_field*) (key_item()->real_item()), + NULL); + break; + case Item::ROW_ITEM: + tree= get_func_row_mm_tree(param, + (Item_row *) (key_item()->real_item())); + break; + default: + DBUG_RETURN(0); + } + DBUG_RETURN(tree); +} + + +SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) +{ + DBUG_ENTER("Item_equal::get_mm_tree"); + if (const_item()) + DBUG_RETURN(get_mm_tree_for_const(param)); + + SEL_TREE *tree= 0; + SEL_TREE *ftree= 0; + + Item *value; + if (!(value= get_const()) || value->is_expensive()) + DBUG_RETURN(0); + + Item_equal_fields_iterator it(*this); + table_map ref_tables= value->used_tables(); + table_map param_comp= ~(param->prev_tables | param->read_tables | + param->current_table); + while (it++) + { + Field *field= it.get_curr_field(); + if (!((ref_tables | field->table->map) & param_comp)) + { + tree= get_mm_parts(param, field, Item_func::EQ_FUNC, value); + ftree= !ftree ? tree : tree_and(param, ftree, tree); + } + } + + DBUG_RETURN(ftree); +} + + +/* + @brief + Check if there is an one-segment unique key that matches the field exactly + + @detail + In the future we could also add "almost unique" indexes where any value is + present only in a few rows (but necessarily exactly one row) +*/ +static bool is_field_an_unique_index(Field *field) +{ + DBUG_ENTER("is_field_an_unique_index"); + key_map::Iterator it(field->key_start); + uint key_no; + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + KEY *key_info= &field->table->key_info[key_no]; + if (key_info->user_defined_key_parts == 1 && + (key_info->flags & HA_NOSAME)) + { + DBUG_RETURN(true); + } + } + DBUG_RETURN(false); +} + + +SEL_TREE * +Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field, + Item_func::Functype type, Item *value) +{ + DBUG_ENTER("get_mm_parts"); + if (field->table != param->table) + DBUG_RETURN(0); + + KEY_PART *key_part = param->key_parts; + KEY_PART *end = param->key_parts_end; + SEL_TREE *tree=0; + table_map value_used_tables= 0; + if (value && + (value_used_tables= value->used_tables()) & + ~(param->prev_tables | param->read_tables)) + DBUG_RETURN(0); + for (; key_part != end ; key_part++) + { + if (field->eq(key_part->field)) + { + SEL_ARG *sel_arg=0; + if (!tree && !(tree=new (param->thd->mem_root) SEL_TREE(param->mem_root, + param->keys))) + DBUG_RETURN(0); // OOM + if (!value || !(value_used_tables & ~param->read_tables)) + { + /* + We need to restore the runtime mem_root of the thread in this + function because it evaluates the value of its argument, while + the argument can be any, e.g. a subselect. The subselect + items, in turn, assume that all the memory allocated during + the evaluation has the same life span as the item itself. + TODO: opt_range.cc should not reset thd->mem_root at all. + */ + MEM_ROOT *tmp_root= param->mem_root; + param->thd->mem_root= param->old_root; + sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value); + param->thd->mem_root= tmp_root; + + if (!sel_arg) + continue; + if (sel_arg->type == SEL_ARG::IMPOSSIBLE) + { + tree->type=SEL_TREE::IMPOSSIBLE; + DBUG_RETURN(tree); + } + } + else + { + // This key may be used later + if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY))) + DBUG_RETURN(0); // OOM + } + sel_arg->part=(uchar) key_part->part; + sel_arg->max_part_no= sel_arg->part+1; + tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg); + tree->keys_map.set_bit(key_part->key); + } + } + + if (tree && tree->merges.is_empty() && tree->keys_map.is_clear_all()) + tree= NULL; + DBUG_RETURN(tree); +} + + +SEL_ARG * +Item_func_null_predicate::get_mm_leaf(RANGE_OPT_PARAM *param, + Field *field, KEY_PART *key_part, + Item_func::Functype type, + Item *value) +{ + MEM_ROOT *alloc= param->mem_root; + DBUG_ENTER("Item_func_null_predicate::get_mm_leaf"); + DBUG_ASSERT(!value); + /* + No check for field->table->maybe_null. It's perfecly fine to use range + access for cases like + + SELECT * FROM t1 LEFT JOIN t2 ON t2.key IS [NOT] NULL + + ON expression is evaluated before considering NULL-complemented rows, so + IS [NOT] NULL has regular semantics. + */ + if (!field->real_maybe_null()) + DBUG_RETURN(type == ISNULL_FUNC ? &null_element : NULL); + SEL_ARG *tree; + if (!(tree= new (alloc) SEL_ARG(field, is_null_string, is_null_string))) + DBUG_RETURN(0); + if (type == Item_func::ISNOTNULL_FUNC) + { + tree->min_flag=NEAR_MIN; /* IS NOT NULL -> X > NULL */ + tree->max_flag=NO_MAX_RANGE; + } + DBUG_RETURN(tree); +} + + +SEL_ARG * +Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param, + Field *field, KEY_PART *key_part, + Item_func::Functype type, Item *value) +{ + DBUG_ENTER("Item_func_like::get_mm_leaf"); + DBUG_ASSERT(value); + + if (key_part->image_type != Field::itRAW) + DBUG_RETURN(0); + + uint keynr= param->real_keynr[key_part->key]; + if (param->using_real_indexes && + !field->optimize_range(keynr, key_part->part)) + DBUG_RETURN(0); + + if (field->result_type() == STRING_RESULT && + field->charset() != compare_collation()) + { + if (param->note_unusable_keys) + field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, + func_name_cstring(), value, + Data_type_compatibility:: + INCOMPATIBLE_COLLATION); + DBUG_RETURN(0); + } + + StringBuffer tmp(value->collation.collation); + String *res; + + if (!(res= value->val_str(&tmp))) + DBUG_RETURN(&null_element); + + if (field->cmp_type() != STRING_RESULT || + field->type_handler() == &type_handler_enum || + field->type_handler() == &type_handler_set) + { + if (param->note_unusable_keys) + field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, + func_name_cstring(), value, + Data_type_compatibility:: + INCOMPATIBLE_DATA_TYPE); + DBUG_RETURN(0); + } + + /* + TODO: + Check if this was a function. This should have be optimized away + in the sql_select.cc + */ + if (res != &tmp) + { + tmp.copy(*res); // Get own copy + res= &tmp; + } + + uint maybe_null= (uint) field->real_maybe_null(); + size_t field_length= field->pack_length() + maybe_null; + size_t offset= maybe_null; + size_t length= key_part->store_length; + + if (length != key_part->length + maybe_null) + { + /* key packed with length prefix */ + offset+= HA_KEY_BLOB_LENGTH; + field_length= length - HA_KEY_BLOB_LENGTH; + } + else + { + if (unlikely(length < field_length)) + { + /* + This can only happen in a table created with UNIREG where one key + overlaps many fields + */ + length= field_length; + } + else + field_length= length; + } + length+= offset; + uchar *min_str,*max_str; + if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2))) + DBUG_RETURN(0); + max_str= min_str + length; + if (maybe_null) + max_str[0]= min_str[0]=0; + + size_t min_length, max_length; + field_length-= maybe_null; + if (field->charset()->like_range(res->ptr(), res->length(), + escape, wild_one, wild_many, + field_length, + (char*) min_str + offset, + (char*) max_str + offset, + &min_length, &max_length)) + DBUG_RETURN(0); // Can't optimize with LIKE + + if (offset != maybe_null) // BLOB or VARCHAR + { + int2store(min_str + maybe_null, min_length); + int2store(max_str + maybe_null, max_length); + } + SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str); + DBUG_RETURN(tree); +} + + +SEL_ARG * +Item_bool_func::get_mm_leaf(RANGE_OPT_PARAM *param, + Field *field, KEY_PART *key_part, + Item_func::Functype functype, Item *value) +{ + DBUG_ENTER("Item_bool_func::get_mm_leaf"); + DBUG_ASSERT(value); // IS NULL and IS NOT NULL are handled separately + if (key_part->image_type != Field::itRAW) + DBUG_RETURN(0); // e.g. SPATIAL index + DBUG_RETURN(field->get_mm_leaf(param, key_part, this, + functype_to_scalar_comparison_op(functype), + value)); +} + + +Data_type_compatibility +Field::can_optimize_scalar_range(const RANGE_OPT_PARAM *param, + const KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, + Item *value) const +{ + bool is_eq_func= op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL; + uint keynr= param->real_keynr[key_part->key]; + if (param->using_real_indexes && + !optimize_range(keynr, key_part->part) && !is_eq_func) + return Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; + Data_type_compatibility compat= can_optimize_range(cond, value, is_eq_func); + if (compat == Data_type_compatibility::OK) + return compat; + /* + Raise a note that the index part could not be used. + + TODO: Perhaps we also need to raise a similar note when + a partition could not be used (when using_real_indexes==false). + */ + if (param->using_real_indexes && param->note_unusable_keys) + { + DBUG_ASSERT(keynr < table->s->keys); + /* + Here "cond" can be any sargable predicate, e.g.: + 1. field=value (and other scalar comparison predicates: <, <=, <=>, =>, >) + 2. field [NOT] BETWEEN value1 AND value2 + 3. field [NOT] IN (value1, value2...) + Don't print the entire "cond" as in case of BETWEEN and IN + it would list all values. + Let's only print the current field/value pair. + */ + raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, + scalar_comparison_op_to_lex_cstring(op), + value, compat); + } + return compat; +} + + +uchar *Field::make_key_image(MEM_ROOT *mem_root, const KEY_PART *key_part) +{ + DBUG_ENTER("Field::make_key_image"); + uint maybe_null= (uint) real_maybe_null(); + uchar *str; + if (!(str= (uchar*) alloc_root(mem_root, key_part->store_length + 1))) + DBUG_RETURN(0); + if (maybe_null) + *str= (uchar) is_real_null(); // Set to 1 if null + get_key_image(str + maybe_null, key_part->length, key_part->image_type); + DBUG_RETURN(str); +} + + +SEL_ARG *Field::stored_field_make_mm_leaf_truncated(RANGE_OPT_PARAM *param, + scalar_comparison_op op, + Item *value) +{ + DBUG_ENTER("Field::stored_field_make_mm_leaf_truncated"); + if ((op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) && + value->result_type() == item_cmp_type(result_type(), + value->result_type())) + DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this)); + /* + TODO: We should return trees of the type SEL_ARG::IMPOSSIBLE + for the cases like int_field > 999999999999999999999999 as well. + */ + DBUG_RETURN(0); +} + + +SEL_ARG *Field_num::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) +{ + DBUG_ENTER("Field_num::get_mm_leaf"); + if (can_optimize_scalar_range(prm, key_part, cond, op, value) != + Data_type_compatibility::OK) + DBUG_RETURN(0); + int err= value->save_in_field_no_warnings(this, 1); + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) + DBUG_RETURN(&null_element); + if (err > 0 && cmp_type() != value->result_type()) + DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value)); + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); +} + + +SEL_ARG *Field_temporal::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) +{ + DBUG_ENTER("Field_temporal::get_mm_leaf"); + if (can_optimize_scalar_range(prm, key_part, cond, op, value) != + Data_type_compatibility::OK) + DBUG_RETURN(0); + int err= value->save_in_field_no_warnings(this, 1); + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) + DBUG_RETURN(&null_element); + if (err > 0) + DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value)); + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); +} + + +SEL_ARG *Field_date_common::get_mm_leaf(RANGE_OPT_PARAM *prm, + KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, + Item *value) +{ + DBUG_ENTER("Field_date_common::get_mm_leaf"); + if (can_optimize_scalar_range(prm, key_part, cond, op, value) != + Data_type_compatibility::OK) + DBUG_RETURN(0); + int err= value->save_in_field_no_warnings(this, 1); + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) + DBUG_RETURN(&null_element); + if (err > 0) + { + if (err == 3) + { + /* + We were saving DATETIME into a DATE column, the conversion went ok + but a non-zero time part was cut off. + + In MySQL's SQL dialect, DATE and DATETIME are compared as datetime + values. Index over a DATE column uses DATE comparison. Changing + from one comparison to the other is possible: + + datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10' + datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10' + + datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10' + datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10' + + but we'll need to convert '>' to '>=' and '<' to '<='. This will + be done together with other types at the end of this function + (grep for stored_field_cmp_to_item) + */ + if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) + DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this)); + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); + } + DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value)); + } + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); +} + + +SEL_ARG *Field_str::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) +{ + int err; + DBUG_ENTER("Field_str::get_mm_leaf"); + if (can_optimize_scalar_range(prm, key_part, cond, op, value) != + Data_type_compatibility::OK) + DBUG_RETURN(0); + + { + /* + Do CharsetNarrowing if necessary + This means that we are temporary changing the character set of the + current key field to make key lookups possible. + This is needed when comparing an utf8mb3 key field with an utf8mb4 value. + See cset_narrowing.h for more details. + */ + bool do_narrowing= + Utf8_narrow::should_do_narrowing(this, value->collation.collation); + Utf8_narrow narrow(this, do_narrowing); + + err= value->save_in_field_no_warnings(this, 1); + narrow.stop(); + } + + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) + DBUG_RETURN(&null_element); + if (err > 0) + { + if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) + DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this)); + DBUG_RETURN(NULL); /* Cannot infer anything */ + } + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); +} + + +SEL_ARG *Field::get_mm_leaf_int(RANGE_OPT_PARAM *prm, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value, + bool unsigned_field) +{ + DBUG_ENTER("Field::get_mm_leaf_int"); + if (can_optimize_scalar_range(prm, key_part, cond, op, value) != + Data_type_compatibility::OK) + DBUG_RETURN(0); + int err= value->save_in_field_no_warnings(this, 1); + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) + DBUG_RETURN(&null_element); + if (err > 0) + { + if (value->result_type() != INT_RESULT) + DBUG_RETURN(stored_field_make_mm_leaf_truncated(prm, op, value)); + else + DBUG_RETURN(stored_field_make_mm_leaf_bounded_int(prm, key_part, + op, value, + unsigned_field)); + } + if (value->result_type() != INT_RESULT) + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); + DBUG_RETURN(stored_field_make_mm_leaf_exact(prm, key_part, op, value)); +} + + +/* + This method is called when: + - value->save_in_field_no_warnings() returned err > 0 + - and both field and "value" are of integer data types + If an integer got bounded (e.g. to within 0..255 / -128..127) + for < or >, set flags as for <= or >= (no NEAR_MAX / NEAR_MIN) +*/ + +SEL_ARG *Field::stored_field_make_mm_leaf_bounded_int(RANGE_OPT_PARAM *param, + KEY_PART *key_part, + scalar_comparison_op op, + Item *value, + bool unsigned_field) +{ + DBUG_ENTER("Field::stored_field_make_mm_leaf_bounded_int"); + if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) // e.g. tinyint = 200 + DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this)); + longlong item_val= value->val_int(); + + if (op == SCALAR_CMP_LT && ((item_val > 0) + || (value->unsigned_flag && (ulonglong)item_val > 0 ))) + op= SCALAR_CMP_LE; // e.g. rewrite (tinyint < 200) to (tinyint <= 127) + else if (op == SCALAR_CMP_GT && !unsigned_field && + !value->unsigned_flag && item_val < 0) + op= SCALAR_CMP_GE; // e.g. rewrite (tinyint > -200) to (tinyint >= -128) + + /* + Check if we are comparing an UNSIGNED integer with a negative constant. + In this case we know that: + (a) (unsigned_int [< | <=] negative_constant) == FALSE + (b) (unsigned_int [> | >=] negative_constant) == TRUE + In case (a) the condition is false for all values, and in case (b) it + is true for all values, so we can avoid unnecessary retrieval and condition + testing, and we also get correct comparison of unsinged integers with + negative integers (which otherwise fails because at query execution time + negative integers are cast to unsigned if compared with unsigned). + */ + if (unsigned_field && !value->unsigned_flag && item_val < 0) + { + if (op == SCALAR_CMP_LT || op == SCALAR_CMP_LE) // e.g. uint < -1 + DBUG_RETURN(new (param->mem_root) SEL_ARG_IMPOSSIBLE(this)); + if (op == SCALAR_CMP_GT || op == SCALAR_CMP_GE) // e.g. uint > -1 + DBUG_RETURN(0); + } + DBUG_RETURN(stored_field_make_mm_leaf_exact(param, key_part, op, value)); +} + + +SEL_ARG *Field::stored_field_make_mm_leaf(RANGE_OPT_PARAM *param, + KEY_PART *key_part, + scalar_comparison_op op, + Item *value) +{ + DBUG_ENTER("Field::stored_field_make_mm_leaf"); + THD *thd= param->thd; + MEM_ROOT *mem_root= param->mem_root; + uchar *str; + if (!(str= make_key_image(param->mem_root, key_part))) + DBUG_RETURN(0); + + switch (op) { + case SCALAR_CMP_LE: + DBUG_RETURN(new (mem_root) SEL_ARG_LE(str, this)); + case SCALAR_CMP_LT: + DBUG_RETURN(new (mem_root) SEL_ARG_LT(thd, str, key_part, this, value)); + case SCALAR_CMP_GT: + DBUG_RETURN(new (mem_root) SEL_ARG_GT(thd, str, key_part, this, value)); + case SCALAR_CMP_GE: + DBUG_RETURN(new (mem_root) SEL_ARG_GE(thd, str, key_part, this, value)); + case SCALAR_CMP_EQ: + case SCALAR_CMP_EQUAL: + DBUG_RETURN(new (mem_root) SEL_ARG(this, str, str)); + break; + } + DBUG_ASSERT(0); + DBUG_RETURN(NULL); +} + + +SEL_ARG *Field::stored_field_make_mm_leaf_exact(RANGE_OPT_PARAM *param, + KEY_PART *key_part, + scalar_comparison_op op, + Item *value) +{ + DBUG_ENTER("Field::stored_field_make_mm_leaf_exact"); + uchar *str; + if (!(str= make_key_image(param->mem_root, key_part))) + DBUG_RETURN(0); + + switch (op) { + case SCALAR_CMP_LE: + DBUG_RETURN(new (param->mem_root) SEL_ARG_LE(str, this)); + case SCALAR_CMP_LT: + DBUG_RETURN(new (param->mem_root) SEL_ARG_LT(str, key_part, this)); + case SCALAR_CMP_GT: + DBUG_RETURN(new (param->mem_root) SEL_ARG_GT(str, key_part, this)); + case SCALAR_CMP_GE: + DBUG_RETURN(new (param->mem_root) SEL_ARG_GE(str, this)); + case SCALAR_CMP_EQ: + case SCALAR_CMP_EQUAL: + DBUG_RETURN(new (param->mem_root) SEL_ARG(this, str, str)); + break; + } + DBUG_ASSERT(0); + DBUG_RETURN(NULL); +} + + +/****************************************************************************** +** Tree manipulation functions +** If tree is 0 it means that the condition can't be tested. It refers +** to a non existent table or to a field in current table with isn't a key. +** The different tree flags: +** IMPOSSIBLE: Condition is never TRUE +** ALWAYS: Condition is always TRUE +** MAYBE: Condition may exists when tables are read +** MAYBE_KEY: Condition refers to a key that may be used in join loop +** KEY_RANGE: Condition uses a key +******************************************************************************/ + +/* + Update weights for SEL_ARG graph that is connected only via next_key_part + (and not left/right) links +*/ +static uint update_weight_for_single_arg(SEL_ARG *arg) +{ + if (arg->next_key_part) + return (arg->weight= 1 + update_weight_for_single_arg(arg->next_key_part)); + else + return (arg->weight= 1); +} + + +/* + Add a new key test to a key when scanning through all keys + This will never be called for same key parts. +*/ + +static SEL_ARG * +sel_add(SEL_ARG *key1,SEL_ARG *key2) +{ + SEL_ARG *root,**key_link; + + if (!key1) + return key2; + if (!key2) + return key1; + + key_link= &root; + while (key1 && key2) + { + if (key1->part < key2->part) + { + *key_link= key1; + key_link= &key1->next_key_part; + key1=key1->next_key_part; + } + else + { + *key_link= key2; + key_link= &key2->next_key_part; + key2=key2->next_key_part; + } + } + *key_link=key1 ? key1 : key2; + + update_weight_for_single_arg(root); + return root; +} + + +/* + Build a range tree for the conjunction of the range parts of two trees + + SYNOPSIS + and_range_trees() + param Context info for the operation + tree1 SEL_TREE for the first conjunct + tree2 SEL_TREE for the second conjunct + result SEL_TREE for the result + + DESCRIPTION + This function takes range parts of two trees tree1 and tree2 and builds + a range tree for the conjunction of the formulas that these two range parts + represent. + More exactly: + if the range part of tree1 represents the normalized formula + R1_1 AND ... AND R1_k, + and the range part of tree2 represents the normalized formula + R2_1 AND ... AND R2_k, + then the range part of the result represents the formula: + RT = R_1 AND ... AND R_k, where R_i=(R1_i AND R2_i) for each i from [1..k] + + The function assumes that tree1 is never equal to tree2. At the same + time the tree result can be the same as tree1 (but never as tree2). + If result==tree1 then rt replaces the range part of tree1 leaving + imerges as they are. + if result!=tree1 than it is assumed that the SEL_ARG trees in tree1 and + tree2 should be preserved. Otherwise they can be destroyed. + + RETURN + 1 if the type the result tree is SEL_TREE::IMPOSSIBLE + 0 otherwise +*/ + +static +int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2, + SEL_TREE *result) +{ + DBUG_ENTER("and_ranges"); + key_map result_keys; + result_keys.clear_all(); + key_map anded_keys= tree1->keys_map; + anded_keys.merge(tree2->keys_map); + int key_no; + key_map::Iterator it(anded_keys); + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + uint flag=0; + SEL_ARG *key1= tree1->keys[key_no]; + SEL_ARG *key2= tree2->keys[key_no]; + if (key1 && !key1->simple_key()) + flag|= CLONE_KEY1_MAYBE; + if (key2 && !key2->simple_key()) + flag|=CLONE_KEY2_MAYBE; + if (result != tree1) + { + if (key1) + key1->incr_refs(); + if (key2) + key2->incr_refs(); + } + SEL_ARG *key; + if ((result->keys[key_no]= key= key_and_with_limit(param, key_no, + key1, key2, flag))) + { + if (key && key->type == SEL_ARG::IMPOSSIBLE) + { + result->type= SEL_TREE::IMPOSSIBLE; + if (param->using_real_indexes) + { + param->table->with_impossible_ranges.set_bit(param-> + real_keynr[key_no]); + } + DBUG_RETURN(1); + } + result_keys.set_bit(key_no); +#ifdef EXTRA_DEBUG + if (param->alloced_sel_args < + param->thd->variables.optimizer_max_sel_args) + key->test_use_count(key); +#endif + } + } + result->keys_map= result_keys; + DBUG_RETURN(0); +} + + +/* + Build a SEL_TREE for a conjunction out of such trees for the conjuncts + + SYNOPSIS + tree_and() + param Context info for the operation + tree1 SEL_TREE for the first conjunct + tree2 SEL_TREE for the second conjunct + + DESCRIPTION + This function builds a tree for the formula (A AND B) out of the trees + tree1 and tree2 that has been built for the formulas A and B respectively. + + In a general case + tree1 represents the formula RT1 AND MT1, + where RT1 = R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1; + tree2 represents the formula RT2 AND MT2 + where RT2 = R2_1 AND ... AND R2_k2, MT2=M2_1 AND ... AND M2_l2. + + The result tree will represent the formula of the the following structure: + RT AND RT1MT2 AND RT2MT1, such that + rt is a tree obtained by range intersection of trees tree1 and tree2, + RT1MT2 = RT1M2_1 AND ... AND RT1M2_l2, + RT2MT1 = RT2M1_1 AND ... AND RT2M1_l1, + where rt1m2_i (i=1,...,l2) is the result of the pushdown operation + of range tree rt1 into imerge m2_i, while rt2m1_j (j=1,...,l1) is the + result of the pushdown operation of range tree rt2 into imerge m1_j. + + RT1MT2/RT2MT is empty if MT2/MT1 is empty. + + The range intersection of two range trees is produced by the function + and_range_trees. The pushdown of a range tree to a imerge is performed + by the function imerge_list_and_tree. This function may produce imerges + containing only one range tree. Such trees are intersected with rt and + the result of intersection is returned as the range part of the result + tree, while the corresponding imerges are removed altogether from its + imerge part. + + NOTE + The pushdown operation of range trees into imerges is needed to be able + to construct valid imerges for the condition like this: + key1_p1=c1 AND (key1_p2 BETWEEN c21 AND c22 OR key2 < c2) + + NOTE + Currently we do not support intersection between indexes and index merges. + When this will be supported the list of imerges for the result tree + should include also imerges from M1 and M2. That's why an extra parameter + is added to the function imerge_list_and_tree. If we call the function + with the last parameter equal to FALSE then MT1 and MT2 will be preserved + in the imerge list of the result tree. This can lead to the exponential + growth of the imerge list though. + Currently the last parameter of imerge_list_and_tree calls is always + TRUE. + + RETURN + The result tree, if a success + 0 - otherwise. +*/ + +static +SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2) +{ + DBUG_ENTER("tree_and"); + if (!tree1) + DBUG_RETURN(tree2); + if (!tree2) + DBUG_RETURN(tree1); + if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS) + DBUG_RETURN(tree1); + if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS) + DBUG_RETURN(tree2); + if (tree1->type == SEL_TREE::MAYBE) + { + if (tree2->type == SEL_TREE::KEY) + tree2->type=SEL_TREE::KEY_SMALLER; + DBUG_RETURN(tree2); + } + if (tree2->type == SEL_TREE::MAYBE) + { + tree1->type=SEL_TREE::KEY_SMALLER; + DBUG_RETURN(tree1); + } + + if (!tree1->merges.is_empty()) + imerge_list_and_tree(param, &tree1->merges, tree2, TRUE); + if (!tree2->merges.is_empty()) + imerge_list_and_tree(param, &tree2->merges, tree1, TRUE); + if (and_range_trees(param, tree1, tree2, tree1)) + DBUG_RETURN(tree1); + imerge_list_and_list(&tree1->merges, &tree2->merges); + eliminate_single_tree_imerges(param, tree1); + DBUG_RETURN(tree1); +} + + +/* + Eliminate single tree imerges in a SEL_TREE objects + + SYNOPSIS + eliminate_single_tree_imerges() + param Context info for the function + tree SEL_TREE where single tree imerges are to be eliminated + + DESCRIPTION + For each imerge in 'tree' that contains only one disjunct tree, i.e. + for any imerge of the form m=rt, the function performs and operation + the range part of tree, replaces rt the with the result of anding and + removes imerge m from the the merge part of 'tree'. + + RETURN VALUE + none +*/ + +static +void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + SEL_IMERGE *imerge; + List merges= tree->merges; + List_iterator it(merges); + tree->merges.empty(); + while ((imerge= it++)) + { + if (imerge->trees+1 == imerge->trees_next) + { + tree= tree_and(param, tree, *imerge->trees); + it.remove(); + } + } + tree->merges= merges; +} + + +/* + For two trees check that there are indexes with ranges in both of them + + SYNOPSIS + sel_trees_have_common_keys() + tree1 SEL_TREE for the first tree + tree2 SEL_TREE for the second tree + common_keys OUT bitmap of all indexes with ranges in both trees + + DESCRIPTION + For two trees tree1 and tree1 the function checks if there are indexes + in their range parts such that SEL_ARG trees are defined for them in the + range parts of both trees. The function returns the bitmap of such + indexes in the parameter common_keys. + + RETURN + TRUE if there are such indexes (common_keys is nor empty) + FALSE otherwise +*/ + +static +bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys) +{ + *common_keys= tree1->keys_map; + common_keys->intersect(tree2->keys_map); + return !common_keys->is_clear_all(); +} + + +/* + Check whether range parts of two trees can be ored for some indexes + + SYNOPSIS + sel_trees_can_be_ored() + param Context info for the function + tree1 SEL_TREE for the first tree + tree2 SEL_TREE for the second tree + common_keys IN/OUT IN: bitmap of all indexes with SEL_ARG in both trees + OUT: bitmap of all indexes that can be ored + + DESCRIPTION + For two trees tree1 and tree2 and the bitmap common_keys containing + bits for indexes that have SEL_ARG trees in range parts of both trees + the function checks if there are indexes for which SEL_ARG trees can + be ored. Two SEL_ARG trees for the same index can be ored if the most + major components of the index used in these trees coincide. If the + SEL_ARG trees for an index cannot be ored the function clears the bit + for this index in the bitmap common_keys. + + The function does not verify that indexes marked in common_keys really + have SEL_ARG trees in both tree1 and tree2. It assumes that this is true. + + NOTE + The function sel_trees_can_be_ored is usually used in pair with the + function sel_trees_have_common_keys. + + RETURN + TRUE if there are indexes for which SEL_ARG trees can be ored + FALSE otherwise +*/ + +static +bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys) +{ + DBUG_ENTER("sel_trees_can_be_ored"); + if (!sel_trees_have_common_keys(tree1, tree2, common_keys)) + DBUG_RETURN(FALSE); + int key_no; + key_map::Iterator it(*common_keys); + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + DBUG_ASSERT(tree1->keys[key_no] && tree2->keys[key_no]); + /* Trees have a common key, check if they refer to the same key part */ + if (tree1->keys[key_no]->part != tree2->keys[key_no]->part) + common_keys->clear_bit(key_no); + } + DBUG_RETURN(!common_keys->is_clear_all()); +} + +/* + Check whether the key parts inf_init..inf_end-1 of one index can compose + an infix for the key parts key_init..key_end-1 of another index +*/ + +static +bool is_key_infix(KEY_PART *key_init, KEY_PART *key_end, + KEY_PART *inf_init, KEY_PART *inf_end) +{ + KEY_PART *key_part, *inf_part; + for (key_part= key_init; key_part < key_end; key_part++) + { + if (key_part->field->eq(inf_init->field)) + break; + } + if (key_part == key_end) + return false; + for (key_part++, inf_part= inf_init + 1; + key_part < key_end && inf_part < inf_end; + key_part++, inf_part++) + { + if (!key_part->field->eq(inf_part->field)) + return false; + } + return inf_part == inf_end; +} + + +/* + Check whether range parts of two trees must be ored for some indexes + + SYNOPSIS + sel_trees_must_be_ored() + param Context info for the function + tree1 SEL_TREE for the first tree + tree2 SEL_TREE for the second tree + ordable_keys bitmap of SEL_ARG trees that can be ored + + DESCRIPTION + For two trees tree1 and tree2 the function checks whether they must be + ored. The function assumes that the bitmap ordable_keys contains bits for + those corresponding pairs of SEL_ARG trees from tree1 and tree2 that can + be ored. + We believe that tree1 and tree2 must be ored if any pair of SEL_ARG trees + r1 and r2, such that r1 is from tree1 and r2 is from tree2 and both + of them are marked in ordable_keys, can be merged. + + NOTE + The function sel_trees_must_be_ored as a rule is used in pair with the + function sel_trees_can_be_ored. + + RETURN + TRUE if there are indexes for which SEL_ARG trees must be ored + FALSE otherwise +*/ + +static +bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map oredable_keys) +{ + key_map tmp; + DBUG_ENTER("sel_trees_must_be_ored"); + + tmp= tree1->keys_map; + tmp.merge(tree2->keys_map); + tmp.subtract(oredable_keys); + if (!tmp.is_clear_all()) + DBUG_RETURN(FALSE); + + int idx1, idx2; + key_map::Iterator it1(oredable_keys); + while ((idx1= it1++) != key_map::Iterator::BITMAP_END) + { + KEY_PART *key1_init= param->key[idx1]+tree1->keys[idx1]->part; + KEY_PART *key1_end= param->key[idx1]+tree1->keys[idx1]->max_part_no; + key_map::Iterator it2(oredable_keys); + while ((idx2= it2++) != key_map::Iterator::BITMAP_END) + { + if (idx2 <= idx1) + continue; + + KEY_PART *key2_init= param->key[idx2]+tree2->keys[idx2]->part; + KEY_PART *key2_end= param->key[idx2]+tree2->keys[idx2]->max_part_no; + if (!is_key_infix(key1_init, key1_end, key2_init, key2_end) && + !is_key_infix(key2_init, key2_end, key1_init, key1_end)) + DBUG_RETURN(FALSE); + } + } + + DBUG_RETURN(TRUE); +} + + +/* + Remove the trees that are not suitable for record retrieval + + SYNOPSIS + remove_nonrange_trees() + param Context info for the function + tree Tree to be processed, tree->type is KEY or KEY_SMALLER + + DESCRIPTION + This function walks through tree->keys[] and removes the SEL_ARG* trees + that are not "maybe" trees (*) and cannot be used to construct quick range + selects. + (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of + these types here as well. + + A SEL_ARG* tree cannot be used to construct quick select if it has + tree->part != 0. (e.g. it could represent "keypart2 < const"). + + Normally we allow construction of SEL_TREE objects that have SEL_ARG + trees that do not allow quick range select construction. + For example: + for " keypart1=1 AND keypart2=2 " the execution will proceed as follows: + tree1= SEL_TREE { SEL_ARG{keypart1=1} } + tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select + from this + call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG + tree. + + Another example: + tree3= SEL_TREE { SEL_ARG{key1part1 = 1} } + tree4= SEL_TREE { SEL_ARG{key2part2 = 2} } -- can't make quick range select + from this + call tree_or(tree3, tree4) -- creates a SEL_MERGE ot of which no index + merge can be constructed, but it is potentially useful, as anding it with + tree5= SEL_TREE { SEL_ARG{key2part1 = 3} } creates an index merge that + represents the formula + key1part1=1 AND key2part1=3 OR key2part1=3 AND key2part2=2 + for which an index merge can be built. + + Any final SEL_TREE may contain SEL_ARG trees for which no quick select + can be built. Such SEL_ARG trees should be removed from the range part + before different range scans are evaluated. Such SEL_ARG trees also should + be removed from all range trees of each index merge before different + possible index merge plans are evaluated. If after this removal one + of the range trees in the index merge becomes empty the whole index merge + must be discarded. + + RETURN + 0 Ok, some suitable trees left + 1 No tree->keys[] left. +*/ + +static bool remove_nonrange_trees(PARAM *param, SEL_TREE *tree) +{ + bool res= FALSE; + for (uint i=0; i < param->keys; i++) + { + if (tree->keys[i]) + { + if (tree->keys[i]->part) + { + tree->keys[i]= NULL; + /* Mark that records_in_range has not been called */ + param->quick_rows[param->real_keynr[i]]= HA_POS_ERROR; + tree->keys_map.clear_bit(i); + } + else + res= TRUE; + } + } + return !res; +} + + +/* + Restore nonrange trees to their previous state +*/ + +static void restore_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree, + SEL_ARG **backup_keys) +{ + for (uint i=0; i < param->keys; i++) + { + if (backup_keys[i]) + { + tree->keys[i]= backup_keys[i]; + tree->keys_map.set_bit(i); + } + } +} + +/* + Build a SEL_TREE for a disjunction out of such trees for the disjuncts + + SYNOPSIS + tree_or() + param Context info for the operation + tree1 SEL_TREE for the first disjunct + tree2 SEL_TREE for the second disjunct + + DESCRIPTION + This function builds a tree for the formula (A OR B) out of the trees + tree1 and tree2 that has been built for the formulas A and B respectively. + + In a general case + tree1 represents the formula RT1 AND MT1, + where RT1=R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1; + tree2 represents the formula RT2 AND MT2 + where RT2=R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2. + + The function constructs the result tree according the formula + (RT1 OR RT2) AND (MT1 OR RT1) AND (MT2 OR RT2) AND (MT1 OR MT2) + that is equivalent to the formula (RT1 AND MT1) OR (RT2 AND MT2). + + To limit the number of produced imerges the function considers + a weaker formula than the original one: + (RT1 AND M1_1) OR (RT2 AND M2_1) + that is equivalent to: + (RT1 OR RT2) (1) + AND + (M1_1 OR M2_1) (2) + AND + (M1_1 OR RT2) (3) + AND + (M2_1 OR RT1) (4) + + For the first conjunct (1) the function builds a tree with a range part + and, possibly, one imerge. For the other conjuncts (2-4)the function + produces sets of imerges. All constructed imerges are included into the + result tree. + + For the formula (1) the function produces the tree representing a formula + of the structure RT [AND M], such that: + - the range tree rt contains the result of oring SEL_ARG trees from rt1 + and rt2 + - the imerge m consists of two range trees rt1 and rt2. + The imerge m is added if it's not true that rt1 and rt2 must be ored + If rt1 and rt2 can't be ored rt is empty and only m is produced for (1). + + To produce imerges for the formula (2) the function calls the function + imerge_list_or_list passing it the merge parts of tree1 and tree2 as + parameters. + + To produce imerges for the formula (3) the function calls the function + imerge_list_or_tree passing it the imerge m1_1 and the range tree rt2 as + parameters. Similarly, to produce imerges for the formula (4) the function + calls the function imerge_list_or_tree passing it the imerge m2_1 and the + range tree rt1. + + If rt1 is empty then the trees for (1) and (4) are empty. + If rt2 is empty then the trees for (1) and (3) are empty. + If mt1 is empty then the trees for (2) and (3) are empty. + If mt2 is empty then the trees for (2) and (4) are empty. + + RETURN + The result tree for the operation if a success + 0 - otherwise +*/ + +static SEL_TREE * +tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +{ + DBUG_ENTER("tree_or"); + if (!tree1 || !tree2) + DBUG_RETURN(0); + if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS) + DBUG_RETURN(tree2); + if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS) + DBUG_RETURN(tree1); + if (tree1->type == SEL_TREE::MAYBE) + DBUG_RETURN(tree1); // Can't use this + if (tree2->type == SEL_TREE::MAYBE) + DBUG_RETURN(tree2); + + SEL_TREE *result= NULL; + key_map ored_keys; + SEL_TREE *rtree[2]= {NULL,NULL}; + SEL_IMERGE *imerge[2]= {NULL, NULL}; + bool no_ranges1= tree1->without_ranges(); + bool no_ranges2= tree2->without_ranges(); + bool no_merges1= tree1->without_imerges(); + bool no_merges2= tree2->without_imerges(); + if (!no_ranges1 && !no_merges2) + { + rtree[0]= new SEL_TREE(tree1, TRUE, param); + imerge[1]= new SEL_IMERGE(tree2->merges.head(), 0, param); + } + if (!no_ranges2 && !no_merges1) + { + rtree[1]= new SEL_TREE(tree2, TRUE, param); + imerge[0]= new SEL_IMERGE(tree1->merges.head(), 0, param); + } + bool no_imerge_from_ranges= FALSE; + + /* Build the range part of the tree for the formula (1) */ + if (sel_trees_can_be_ored(param, tree1, tree2, &ored_keys)) + { + bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys); + no_imerge_from_ranges= must_be_ored; + + if (no_imerge_from_ranges && no_merges1 && no_merges2) + { + /* + Reuse tree1 as the result in simple cases. This reduces memory usage + for e.g. "key IN (c1, ..., cN)" which produces a lot of ranges. + */ + result= tree1; + result->keys_map.clear_all(); + } + else + { + if (!(result= new (param->mem_root) SEL_TREE(param->mem_root, + param->keys))) + { + DBUG_RETURN(result); + } + } + + key_map::Iterator it(ored_keys); + int key_no; + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + SEL_ARG *key1= tree1->keys[key_no]; + SEL_ARG *key2= tree2->keys[key_no]; + if (!must_be_ored) + { + key1->incr_refs(); + key2->incr_refs(); + } + if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, key2))) + result->keys_map.set_bit(key_no); + } + result->type= tree1->type; + } + else + { + if (!result && !(result= new (param->mem_root) SEL_TREE(param->mem_root, + param->keys))) + DBUG_RETURN(result); + } + + if (no_imerge_from_ranges && no_merges1 && no_merges2) + { + if (result->keys_map.is_clear_all()) + result->type= SEL_TREE::ALWAYS; + DBUG_RETURN(result); + } + + SEL_IMERGE *imerge_from_ranges; + if (!(imerge_from_ranges= new SEL_IMERGE())) + result= NULL; + else if (!no_ranges1 && !no_ranges2 && !no_imerge_from_ranges) + { + /* Build the imerge part of the tree for the formula (1) */ + SEL_TREE *rt1= tree1; + SEL_TREE *rt2= tree2; + if (no_merges1) + rt1= new SEL_TREE(tree1, TRUE, param); + if (no_merges2) + rt2= new SEL_TREE(tree2, TRUE, param); + if (!rt1 || !rt2 || + result->merges.push_back(imerge_from_ranges) || + imerge_from_ranges->or_sel_tree(param, rt1) || + imerge_from_ranges->or_sel_tree(param, rt2)) + result= NULL; + } + if (!result) + DBUG_RETURN(result); + + result->type= tree1->type; + + if (!no_merges1 && !no_merges2 && + !imerge_list_or_list(param, &tree1->merges, &tree2->merges)) + { + /* Build the imerges for the formula (2) */ + imerge_list_and_list(&result->merges, &tree1->merges); + } + + /* Build the imerges for the formulas (3) and (4) */ + for (uint i=0; i < 2; i++) + { + List merges; + SEL_TREE *rt= rtree[i]; + SEL_IMERGE *im= imerge[1-i]; + + if (rt && im && !merges.push_back(im) && + !imerge_list_or_tree(param, &merges, rt)) + imerge_list_and_list(&result->merges, &merges); + } + + DBUG_RETURN(result); +} + + +/* And key trees where key1->part < key2 -> part */ + +static SEL_ARG * +and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, + uint clone_flag) +{ + SEL_ARG *next; + ulong use_count=key1->use_count; + + if (sel_arg_and_weight_heuristic(param, key1, key2)) + return key1; + + if (key1->elements != 1) + { + key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p? + key2->increment_use_count((int) key1->elements-1); + } + if (key1->type == SEL_ARG::MAYBE_KEY) + { + if (key2->type == SEL_ARG::KEY_RANGE) + return key2; + key1->right= key1->left= &null_element; + key1->next= key1->prev= 0; + key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight: 0); + } + + for (next=key1->first(); next ; next=next->next) + { + if (next->next_key_part) + { + uint old_weight= next->next_key_part->weight; + SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag); + if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE) + { + key1=key1->tree_delete(next); + continue; + } + next->next_key_part=tmp; + key1->weight+= (tmp? tmp->weight: 0) - old_weight; + if (use_count) + next->increment_use_count(use_count); + if (param->alloced_sel_args > + param->thd->variables.optimizer_max_sel_args) + break; + } + else + { + next->next_key_part=key2; + key1->weight += key2->weight; + } + } + if (!key1) + return &null_element; // Impossible ranges + key1->use_count++; + + key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1); + return key1; +} + + +/* + Produce a SEL_ARG graph that represents "key1 AND key2" + + SYNOPSIS + key_and() + param Range analysis context (needed to track if we have allocated + too many SEL_ARGs) + key1 First argument, root of its RB-tree + key2 Second argument, root of its RB-tree + + RETURN + RB-tree root of the resulting SEL_ARG graph. + NULL if the result of AND operation is an empty interval {0}. +*/ + +static SEL_ARG * +key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) +{ + if (!key1) + return key2; + if (!key2) + return key1; + if (key1->part != key2->part) + { + if (key1->part > key2->part) + { + swap_variables(SEL_ARG *, key1, key2); + clone_flag=swap_clone_flag(clone_flag); + } + // key1->part < key2->part + + if (sel_arg_and_weight_heuristic(param, key1, key2)) + return key1; + + key1->use_count--; + if (key1->use_count > 0) + if (!(key1= key1->clone_tree(param))) + return 0; // OOM + return and_all_keys(param, key1, key2, clone_flag); + } + + if (((clone_flag & CLONE_KEY2_MAYBE) && + !(clone_flag & CLONE_KEY1_MAYBE) && + key2->type != SEL_ARG::MAYBE_KEY) || + key1->type == SEL_ARG::MAYBE_KEY) + { // Put simple key in key2 + swap_variables(SEL_ARG *, key1, key2); + clone_flag=swap_clone_flag(clone_flag); + } + + /* If one of the key is MAYBE_KEY then the found region may be smaller */ + if (key2->type == SEL_ARG::MAYBE_KEY) + { + if (key1->use_count > 1) + { + key1->use_count--; + if (!(key1=key1->clone_tree(param))) + return 0; // OOM + key1->use_count++; + } + if (key1->type == SEL_ARG::MAYBE_KEY) + { // Both are maybe key + key1->next_key_part=key_and(param, key1->next_key_part, + key2->next_key_part, clone_flag); + + key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight : 0); + + if (key1->next_key_part && + key1->next_key_part->type == SEL_ARG::IMPOSSIBLE) + return key1; + } + else + { + key1->maybe_smaller(); + if (key2->next_key_part) + { + key1->use_count--; // Incremented in and_all_keys + return and_all_keys(param, key1, key2->next_key_part, clone_flag); + } + key2->use_count--; // Key2 doesn't have a tree + } + return key1; + } + + if ((key1->min_flag | key2->min_flag) & GEOM_FLAG) + { + /* TODO: why not leave one of the trees? */ + key1->free_tree(); + key2->free_tree(); + return 0; // Can't optimize this + } + + key1->use_count--; + key2->use_count--; + SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0; + uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no); + + while (e1 && e2) + { + int cmp=e1->cmp_min_to_min(e2); + if (cmp < 0) + { + if (get_range(&e1,&e2,key1)) + continue; + } + else if (get_range(&e2,&e1,key2)) + continue; + SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part, + clone_flag); + e1->incr_refs(); + e2->incr_refs(); + if (!next || next->type != SEL_ARG::IMPOSSIBLE) + { + SEL_ARG *new_arg= e1->clone_and(param->thd, e2); + if (!new_arg) + return &null_element; // End of memory + new_arg->next_key_part=next; + if (new_arg->next_key_part) + new_arg->weight += new_arg->next_key_part->weight; + + if (!new_tree) + { + new_tree=new_arg; + } + else + new_tree=new_tree->insert(new_arg); + } + if (e1->cmp_max_to_max(e2) < 0) + e1=e1->next; // e1 can't overlapp next e2 + else + e2=e2->next; + } + key1->free_tree(); + key2->free_tree(); + if (!new_tree) + return &null_element; // Impossible range + new_tree->max_part_no= max_part_no; + return new_tree; +} + + +static bool +get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1) +{ + (*e1)=root1->find_range(*e2); // first e1->min < e2->min + if ((*e1)->cmp_max_to_min(*e2) < 0) + { + if (!((*e1)=(*e1)->next)) + return 1; + if ((*e1)->cmp_min_to_max(*e2) > 0) + { + (*e2)=(*e2)->next; + return 1; + } + } + return 0; +} + + +#ifndef DBUG_OFF +/* + Verify SEL_TREE's weight. + + Recompute the weight and compare +*/ +uint SEL_ARG::verify_weight() +{ + uint computed_weight= 0; + SEL_ARG *first_arg= first(); + + if (first_arg) + { + for (SEL_ARG *arg= first_arg; arg; arg= arg->next) + { + computed_weight++; + if (arg->next_key_part) + computed_weight+= arg->next_key_part->verify_weight(); + } + } + else + { + // first()=NULL means this is a special kind of SEL_ARG, e.g. + // SEL_ARG with type=MAYBE_KEY + computed_weight= 1; + if (next_key_part) + computed_weight += next_key_part->verify_weight(); + } + + if (computed_weight != weight) + { + sql_print_error("SEL_ARG weight mismatch: computed %u have %u\n", + computed_weight, weight); + DBUG_ASSERT(computed_weight == weight); // Fail an assertion + } + return computed_weight; +} +#endif + +static +SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2) +{ +#ifndef DBUG_OFF + if (key1) + key1->verify_weight(); + if (key2) + key2->verify_weight(); +#endif + + SEL_ARG *res= key_or(param, key1, key2); + res= enforce_sel_arg_weight_limit(param, keyno, res); +#ifndef DBUG_OFF + if (res) + res->verify_weight(); +#endif + return res; +} + + +static +SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) +{ +#ifndef DBUG_OFF + if (key1) + key1->verify_weight(); + if (key2) + key2->verify_weight(); +#endif + SEL_ARG *res= key_and(param, key1, key2, clone_flag); + res= enforce_sel_arg_weight_limit(param, keyno, res); +#ifndef DBUG_OFF + if (res) + res->verify_weight(); +#endif + return res; +} + + +/** + Combine two range expression under a common OR. On a logical level, the + transformation is key_or( expr1, expr2 ) => expr1 OR expr2. + + Both expressions are assumed to be in the SEL_ARG format. In a logic sense, + theformat is reminiscent of DNF, since an expression such as the following + + ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 ) + + where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1 + and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid + SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of + the first range and ranges must not overlap. It follows that they are also + ordered by maximum endpoints. Thus + + ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3 + + Is a a valid SER_ARG expression for a key of at least 2 keyparts. + + For simplicity, we will assume that expr2 is a single range predicate, + i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a + disjunction of several predicates by subsequently call key_or for each + disjunct. + + The algorithm iterates over each disjunct of expr1, and for each disjunct + where the first keypart's range overlaps with the first keypart's range in + expr2: + + If the predicates are equal for the rest of the keyparts, or if there are + no more, the range in expr2 has its endpoints copied in, and the SEL_ARG + node in expr2 is deallocated. If more ranges became connected in expr1, the + surplus is also dealocated. If they differ, two ranges are created. + + - The range leading up to the overlap. Empty if endpoints are equal. + + - The overlapping sub-range. May be the entire range if they are equal. + + Finally, there may be one more range if expr2's first keypart's range has a + greater maximum endpoint than the last range in expr1. + + For the overlapping sub-range, we recursively call key_or. Thus in order to + compute key_or of + + (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 ) + + (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 ) + + We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the + first one, we simply hook on the condition for the second keypart from (1) + : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4 + < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse + the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus + + ( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR + ( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR + ( 10 <= kp1 < 20 AND 4 < kp2 < 20 ) +*/ +static SEL_ARG * +key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) +{ + if (!key1) + { + if (key2) + { + key2->use_count--; + key2->free_tree(); + } + return 0; + } + if (!key2) + { + key1->use_count--; + key1->free_tree(); + return 0; + } + key1->use_count--; + key2->use_count--; + + if (key1->part != key2->part || + (key1->min_flag | key2->min_flag) & GEOM_FLAG) + { + key1->free_tree(); + key2->free_tree(); + return 0; // Can't optimize this + } + + // If one of the key is MAYBE_KEY then the found region may be bigger + if (key1->type == SEL_ARG::MAYBE_KEY) + { + key2->free_tree(); + key1->use_count++; + return key1; + } + if (key2->type == SEL_ARG::MAYBE_KEY) + { + key1->free_tree(); + key2->use_count++; + return key2; + } + + if (key1->use_count > 0) + { + if (key2->use_count == 0 || key1->elements > key2->elements) + { + swap_variables(SEL_ARG *,key1,key2); + } + if (key1->use_count > 0 && !(key1=key1->clone_tree(param))) + return 0; // OOM + } + + // Add tree at key2 to tree at key1 + bool key2_shared=key2->use_count != 0; + key1->maybe_flag|=key2->maybe_flag; + + /* + Notation for illustrations used in the rest of this function: + + Range: [--------] + ^ ^ + start stop + + Two overlapping ranges: + [-----] [----] [--] + [---] or [---] or [-------] + + Ambiguity: *** + The range starts or stops somewhere in the "***" range. + Example: a starts before b and may end before/the same plase/after b + a: [----***] + b: [---] + + Adjacent ranges: + Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3" + a: ----] + b: [---- + */ + + uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no); + + for (key2=key2->first(); ; ) + { + /* + key1 consists of one or more ranges. tmp is the range currently + being handled. + + initialize tmp to the latest range in key1 that starts the same + place or before the range in key2 starts + + key2: [------] + key1: [---] [-----] [----] + ^ + tmp + */ + if (key1->min_flag & NO_MIN_RANGE && + key1->max_flag & NO_MAX_RANGE) + { + if (key1->maybe_flag) + return new SEL_ARG(SEL_ARG::MAYBE_KEY); + return 0; // Always true OR + } + if (!key2) + break; + + SEL_ARG *tmp=key1->find_range(key2); + + /* + Used to describe how two key values are positioned compared to + each other. Consider key_value_a.(key_value_b): + + -2: key_value_a is smaller than key_value_b, and they are adjacent + -1: key_value_a is smaller than key_value_b (not adjacent) + 0: the key values are equal + 1: key_value_a is bigger than key_value_b (not adjacent) + -2: key_value_a is bigger than key_value_b, and they are adjacent + + Example: "cmp= tmp->cmp_max_to_min(key2)" + + key2: [-------- (10 <= x ...) + tmp: -----] (... x < 10) => cmp==-2 + tmp: ----] (... x <= 9) => cmp==-1 + tmp: ------] (... x = 10) => cmp== 0 + tmp: --------] (... x <= 12) => cmp== 1 + (cmp == 2 does not make sense for cmp_max_to_min()) + */ + int cmp= 0; + + if (!tmp) + { + /* + The range in key2 starts before the first range in key1. Use + the first range in key1 as tmp. + + key2: [--------] + key1: [****--] [----] [-------] + ^ + tmp + */ + tmp=key1->first(); + cmp= -1; + } + else if ((cmp= tmp->cmp_max_to_min(key2)) < 0) + { + /* + This is the case: + key2: [-------] + tmp: [----**] + */ + SEL_ARG *next=tmp->next; + if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part)) + { + /* + Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged + + This is the case: + key2: [-------] + tmp: [----] + + Result: + key2: [-------------] => inserted into key1 below + tmp: => deleted + */ + SEL_ARG *key2_next=key2->next; + if (key2_shared) + { + if (!(key2=new SEL_ARG(*key2))) + return 0; // out of memory + key2->increment_use_count(key1->use_count+1); + key2->next=key2_next; // New copy of key2 + } + + key2->copy_min(tmp); + if (!(key1=key1->tree_delete(tmp))) + { // Only one key in tree + if (key2->min_flag & NO_MIN_RANGE && + key2->max_flag & NO_MAX_RANGE) + { + if (key2->maybe_flag) + return new SEL_ARG(SEL_ARG::MAYBE_KEY); + return 0; // Always true OR + } + key1=key2; + key1->make_root(); + key2=key2_next; + break; + } + } + if (!(tmp=next)) // Move to next range in key1. Now tmp.min > key2.min + break; // No more ranges in key1. Copy rest of key2 + } + + if (cmp < 0) + { + /* + This is the case: + key2: [--***] + tmp: [----] + */ + int tmp_cmp; + if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) + { + /* + This is the case: + key2: [------**] + tmp: [----] + */ + if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part)) + { + /* + Adjacent ranges with equal next_key_part. Merge like this: + + This is the case: + key2: [------] + tmp: [-----] + + Result: + key2: [------] + tmp: [-------------] + + Then move on to next key2 range. + */ + tmp->copy_min_to_min(key2); + key1->merge_flags(key2); + if (tmp->min_flag & NO_MIN_RANGE && + tmp->max_flag & NO_MAX_RANGE) + { + if (key1->maybe_flag) + return new SEL_ARG(SEL_ARG::MAYBE_KEY); + return 0; + } + key2->increment_use_count(-1); // Free not used tree + key2=key2->next; + continue; + } + else + { + /* + key2 not adjacent to tmp or has different next_key_part. + Insert into key1 and move to next range in key2 + + This is the case: + key2: [------**] + tmp: [----] + + Result: + key1_ [------**][----] + ^ ^ + insert tmp + */ + SEL_ARG *next=key2->next; + if (key2_shared) + { + SEL_ARG *cpy= new SEL_ARG(*key2); // Must make copy + if (!cpy) + return 0; // OOM + key1=key1->insert(cpy); + key2->increment_use_count(key1->use_count+1); + } + else + key1=key1->insert(key2); // Will destroy key2_root + key2=next; + continue; + } + } + } + + /* + The ranges in tmp and key2 are overlapping: + + key2: [----------] + tmp: [*****-----*****] + + Corollary: tmp.min <= key2.max + */ + if (eq_tree(tmp->next_key_part,key2->next_key_part)) + { + // Merge overlapping ranges with equal next_key_part + if (tmp->is_same(key2)) + { + /* + Found exact match of key2 inside key1. + Use the relevant range in key1. + */ + tmp->merge_flags(key2); // Copy maybe flags + key2->increment_use_count(-1); // Free not used tree + } + else + { + SEL_ARG *last= tmp; + SEL_ARG *first= tmp; + + /* + Find the last range in key1 that overlaps key2 and + where all ranges first...last have the same next_key_part as + key2. + + key2: [****----------------------*******] + key1: [--] [----] [---] [-----] [xxxx] + ^ ^ ^ + first last different next_key_part + + Since key2 covers them, the ranges between first and last + are merged into one range by deleting first...last-1 from + the key1 tree. In the figure, this applies to first and the + two consecutive ranges. The range of last is then extended: + * last.min: Set to MY_MIN(key2.min, first.min) + * last.max: If there is a last->next that overlaps key2 (i.e., + last->next has a different next_key_part): + Set adjacent to last->next.min + Otherwise: Set to MY_MAX(key2.max, last.max) + + Result: + key2: [****----------------------*******] + [--] [----] [---] => deleted from key1 + key1: [**------------------------***][xxxx] + ^ ^ + tmp=last different next_key_part + */ + while (last->next && last->next->cmp_min_to_max(key2) <= 0 && + eq_tree(last->next->next_key_part,key2->next_key_part)) + { + /* + last->next is covered by key2 and has same next_key_part. + last can be deleted + */ + SEL_ARG *save=last; + last=last->next; + key1=key1->tree_delete(save); + } + // Redirect tmp to last which will cover the entire range + tmp= last; + + /* + We need the minimum endpoint of first so we can compare it + with the minimum endpoint of the enclosing key2 range. + */ + last->copy_min(first); + bool full_range= last->copy_min(key2); + if (!full_range) + { + if (last->next && key2->cmp_max_to_min(last->next) >= 0) + { + /* + This is the case: + key2: [-------------] + key1: [***------] [xxxx] + ^ ^ + last different next_key_part + + Extend range of last up to last->next: + key2: [-------------] + key1: [***--------][xxxx] + */ + last->copy_min_to_max(last->next); + } + else + /* + This is the case: + key2: [--------*****] + key1: [***---------] [xxxx] + ^ ^ + last different next_key_part + + Extend range of last up to MY_MAX(last.max, key2.max): + key2: [--------*****] + key1: [***----------**] [xxxx] + */ + full_range= last->copy_max(key2); + } + if (full_range) + { // Full range + key1->free_tree(); + for (; key2 ; key2=key2->next) + key2->increment_use_count(-1); // Free not used tree + if (key1->maybe_flag) + return new SEL_ARG(SEL_ARG::MAYBE_KEY); + return 0; + } + } + } + + if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0) + { + /* + This is the case ("cmp>=0" means that tmp.max >= key2.min): + key2: [----] + tmp: [------------*****] + */ + + if (!tmp->next_key_part) + { + SEL_ARG *key2_next= key2->next; + if (key2_shared) + { + SEL_ARG *key2_cpy= new SEL_ARG(*key2); + if (!key2_cpy) + return 0; + key2= key2_cpy; + } + /* + tmp->next_key_part is empty: cut the range that is covered + by tmp from key2. + Reason: (key2->next_key_part OR tmp->next_key_part) will be + empty and therefore equal to tmp->next_key_part. Thus, this + part of the key2 range is completely covered by tmp. + */ + if (tmp->cmp_max_to_max(key2) >= 0) + { + /* + tmp covers the entire range in key2. + key2: [----] + tmp: [-----------------] + + Move on to next range in key2 + */ + key2->increment_use_count(-1); // Free not used tree + key2= key2_next; + } + else + { + /* + This is the case: + key2: [-------] + tmp: [---------] + + Result: + key2: [---] + tmp: [---------] + */ + key2->copy_max_to_min(tmp); + key2->next= key2_next; // In case of key2_shared + } + continue; + } + + /* + The ranges are overlapping but have not been merged because + next_key_part of tmp and key2 differ. + key2: [----] + tmp: [------------*****] + + Split tmp in two where key2 starts: + key2: [----] + key1: [--------][--*****] + ^ ^ + insert tmp + */ + SEL_ARG *new_arg=tmp->clone_first(key2); + if (!new_arg) + return 0; // OOM + if ((new_arg->next_key_part= tmp->next_key_part)) + new_arg->increment_use_count(key1->use_count+1); + tmp->copy_min_to_min(key2); + key1=key1->insert(new_arg); + } // tmp.min >= key2.min due to this if() + + /* + Now key2.min <= tmp.min <= key2.max: + key2: [---------] + tmp: [****---*****] + */ + SEL_ARG key2_cpy(*key2); // Get copy we can modify + for (;;) + { + if (tmp->cmp_min_to_min(&key2_cpy) > 0) + { + /* + This is the case: + key2_cpy: [------------] + key1: [-*****] + ^ + tmp + + Result: + key2_cpy: [---] + key1: [-------][-*****] + ^ ^ + insert tmp + */ + SEL_ARG *new_arg=key2_cpy.clone_first(tmp); + if (!new_arg) + return 0; // OOM + if ((new_arg->next_key_part=key2_cpy.next_key_part)) + new_arg->increment_use_count(key1->use_count+1); + key1=key1->insert(new_arg); + key2_cpy.copy_min_to_min(tmp); + } + // Now key2_cpy.min == tmp.min + + if ((cmp= tmp->cmp_max_to_max(&key2_cpy)) <= 0) + { + /* + tmp.max <= key2_cpy.max: + key2_cpy: a) [-------] or b) [----] + tmp: [----] [----] + + Steps: + 1) Update next_key_part of tmp: OR it with key2_cpy->next_key_part. + 2) If case a: Insert range [tmp.max, key2_cpy.max] into key1 using + next_key_part of key2_cpy + + Result: + key1: a) [----][-] or b) [----] + */ + tmp->maybe_flag|= key2_cpy.maybe_flag; + key2_cpy.increment_use_count(key1->use_count+1); + + uint old_weight= tmp->next_key_part? tmp->next_key_part->weight: 0; + + tmp->next_key_part= key_or(param, tmp->next_key_part, + key2_cpy.next_key_part); + + uint new_weight= tmp->next_key_part? tmp->next_key_part->weight: 0; + key1->weight += (new_weight - old_weight); + + if (!cmp) + break; // case b: done with this key2 range + + // Make key2_cpy the range [tmp.max, key2_cpy.max] + key2_cpy.copy_max_to_min(tmp); + if (!(tmp=tmp->next)) + { + /* + No more ranges in key1. Insert key2_cpy and go to "end" + label to insert remaining ranges in key2 if any. + */ + SEL_ARG *tmp2= new SEL_ARG(key2_cpy); + if (!tmp2) + return 0; // OOM + key1=key1->insert(tmp2); + key2=key2->next; + goto end; + } + if (tmp->cmp_min_to_max(&key2_cpy) > 0) + { + /* + The next range in key1 does not overlap with key2_cpy. + Insert this range into key1 and move on to the next range + in key2. + */ + SEL_ARG *tmp2= new SEL_ARG(key2_cpy); + if (!tmp2) + return 0; // OOM + key1=key1->insert(tmp2); + break; + } + /* + key2_cpy overlaps with the next range in key1 and the case + is now "key2.min <= tmp.min <= key2.max". Go back to for(;;) + to handle this situation. + */ + continue; + } + else + { + /* + This is the case: + key2_cpy: [-------] + tmp: [------------] + + Result: + key1: [-------][---] + ^ ^ + new_arg tmp + Steps: + 0) If tmp->next_key_part is empty: do nothing. Reason: + (key2_cpy->next_key_part OR tmp->next_key_part) will be + empty and therefore equal to tmp->next_key_part. Thus, + the range in key2_cpy is completely covered by tmp + 1) Make new_arg with range [tmp.min, key2_cpy.max]. + new_arg->next_key_part is OR between next_key_part + of tmp and key2_cpy + 2) Make tmp the range [key2.max, tmp.max] + 3) Insert new_arg into key1 + */ + if (!tmp->next_key_part) // Step 0 + { + key2_cpy.increment_use_count(-1); // Free not used tree + break; + } + SEL_ARG *new_arg=tmp->clone_last(&key2_cpy); + if (!new_arg) + return 0; // OOM + tmp->copy_max_to_min(&key2_cpy); + tmp->increment_use_count(key1->use_count+1); + /* Increment key count as it may be used for next loop */ + key2_cpy.increment_use_count(1); + new_arg->next_key_part= key_or(param, tmp->next_key_part, + key2_cpy.next_key_part); + key1=key1->insert(new_arg); + break; + } + } + // Move on to next range in key2 + key2=key2->next; + } + +end: + /* + Add key2 ranges that are non-overlapping with and higher than the + highest range in key1. + */ + while (key2) + { + SEL_ARG *next=key2->next; + if (key2_shared) + { + SEL_ARG *tmp=new SEL_ARG(*key2); // Must make copy + if (!tmp) + return 0; + key2->increment_use_count(key1->use_count+1); + key1=key1->insert(tmp); + } + else + key1=key1->insert(key2); // Will destroy key2_root + key2=next; + } + key1->use_count++; + + key1->max_part_no= max_part_no; + return key1; +} + + +/* Compare if two trees are equal */ + +static bool eq_tree(SEL_ARG* a,SEL_ARG *b) +{ + if (a == b) + return 1; + if (!a || !b || !a->is_same(b)) + return 0; + if (a->left != &null_element && b->left != &null_element) + { + if (!eq_tree(a->left,b->left)) + return 0; + } + else if (a->left != &null_element || b->left != &null_element) + return 0; + if (a->right != &null_element && b->right != &null_element) + { + if (!eq_tree(a->right,b->right)) + return 0; + } + else if (a->right != &null_element || b->right != &null_element) + return 0; + if (a->next_key_part != b->next_key_part) + { // Sub range + if (!a->next_key_part != !b->next_key_part || + !eq_tree(a->next_key_part, b->next_key_part)) + return 0; + } + return 1; +} + + +/* + Compute the MAX(key part) in this SEL_ARG graph. +*/ +uint SEL_ARG::get_max_key_part() const +{ + const SEL_ARG *cur; + uint max_part= part; + for (cur= first(); cur ; cur=cur->next) + { + if (cur->next_key_part) + { + uint mp= cur->next_key_part->get_max_key_part(); + max_part= MY_MAX(part, mp); + } + } + return max_part; +} + + +/* + Remove the SEL_ARG graph elements which have part > max_part. + + @detail + Also update weight for the graph and any modified subgraphs. +*/ + +void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part) +{ + SEL_ARG *cur; + DBUG_ASSERT(max_part >= sel_arg->part); + + for (cur= sel_arg->first(); cur ; cur=cur->next) + { + if (cur->next_key_part) + { + if (cur->next_key_part->part > max_part) + { + // Remove cur->next_key_part. + sel_arg->weight -= cur->next_key_part->weight; + cur->next_key_part= NULL; + } + else + { + uint old_weight= cur->next_key_part->weight; + prune_sel_arg_graph(cur->next_key_part, max_part); + sel_arg->weight -= (old_weight - cur->next_key_part->weight); + } + } + } +} + + +/* + @brief + Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT, + by cutting off branches if necessary. + + @detail + @see declaration of SEL_ARG::weight for definition of weight. + + This function attempts to reduce the graph's weight by cutting off + SEL_ARG::next_key_part connections if necessary. + + We start with maximum used keypart and then remove one keypart after + another until the graph's weight is within the limit. + + @seealso + sel_arg_and_weight_heuristic(); + + @return + tree pointer The tree after processing, + NULL If it was not possible to reduce the weight of the tree below the + limit. +*/ + +SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *sel_arg) +{ + if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE || + !param->thd->variables.optimizer_max_sel_arg_weight) + return sel_arg; + + Field *field= sel_arg->field; + uint weight1= sel_arg->weight; + + while (1) + { + if (likely(sel_arg->weight <= param->thd->variables. + optimizer_max_sel_arg_weight)) + break; + + uint max_part= sel_arg->get_max_key_part(); + if (max_part == sel_arg->part) + { + /* + We don't return NULL right away as we want to have the information + about the changed tree in the optimizer trace. + */ + sel_arg= NULL; + break; + } + + max_part--; + prune_sel_arg_graph(sel_arg, max_part); + } + + uint weight2= sel_arg? sel_arg->weight : 0; + + if (weight2 != weight1) + { + Json_writer_object wrapper(param->thd); + Json_writer_object obj(param->thd, "enforce_sel_arg_weight_limit"); + if (param->using_real_indexes) + obj.add("index", param->table->key_info[param->real_keynr[keyno]].name); + else + obj.add("pseudo_index", field->field_name); + + obj.add("old_weight", (longlong)weight1); + obj.add("new_weight", (longlong)weight2); + } + return sel_arg; +} + + +/* + @detail + Do not combine the trees if their total weight is likely to exceed the + MAX_WEIGHT. + (It is possible that key1 has next_key_part that has empty overlap with + key2. In this case, the combined tree will have a smaller weight than we + predict. We assume this is rare.) +*/ + +static +bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1, + SEL_ARG *key2) +{ + DBUG_ASSERT(key1->part < key2->part); + + ulong max_weight= param->thd->variables.optimizer_max_sel_arg_weight; + if (max_weight && key1->weight + key1->elements*key2->weight > max_weight) + { + Json_writer_object wrapper(param->thd); + Json_writer_object obj(param->thd, "sel_arg_weight_heuristic"); + obj.add("key1_field", key1->field->field_name); + obj.add("key2_field", key2->field->field_name); + obj.add("key1_weight", (longlong)key1->weight); + obj.add("key2_weight", (longlong)key2->weight); + return true; // Discard key2 + } + return false; +} + + +SEL_ARG * +SEL_ARG::insert(SEL_ARG *key) +{ + SEL_ARG *element,**UNINIT_VAR(par),*UNINIT_VAR(last_element); + + for (element= this; element != &null_element ; ) + { + last_element=element; + if (key->cmp_min_to_min(element) > 0) + { + par= &element->right; element= element->right; + } + else + { + par = &element->left; element= element->left; + } + } + *par=key; + key->parent=last_element; + /* Link in list */ + if (par == &last_element->left) + { + key->next=last_element; + if ((key->prev=last_element->prev)) + key->prev->next=key; + last_element->prev=key; + } + else + { + if ((key->next=last_element->next)) + key->next->prev=key; + key->prev=last_element; + last_element->next=key; + } + key->left=key->right= &null_element; + SEL_ARG *root=rb_insert(key); // rebalance tree + root->use_count=this->use_count; // copy root info + root->elements= this->elements+1; + /* + The new weight is: + old root's weight + +1 for the weight of the added element + + next_key_part's weight of the added element + */ + root->weight = weight + 1 + (key->next_key_part? key->next_key_part->weight: 0); + root->maybe_flag=this->maybe_flag; + return root; +} + + +/* +** Find best key with min <= given key +** Because the call context this should never return 0 to get_range +*/ + +SEL_ARG * +SEL_ARG::find_range(SEL_ARG *key) +{ + SEL_ARG *element=this,*found=0; + + for (;;) + { + if (element == &null_element) + return found; + int cmp=element->cmp_min_to_min(key); + if (cmp == 0) + return element; + if (cmp < 0) + { + found=element; + element=element->right; + } + else + element=element->left; + } +} + + +/* + Remove a element from the tree + + SYNOPSIS + tree_delete() + key Key that is to be deleted from tree (this) + + NOTE + This also frees all sub trees that is used by the element + + RETURN + root of new tree (with key deleted) +*/ + +SEL_ARG * +SEL_ARG::tree_delete(SEL_ARG *key) +{ + enum leaf_color remove_color; + SEL_ARG *root,*nod,**par,*fix_par; + DBUG_ENTER("tree_delete"); + + root=this; + this->parent= 0; + + /* + Compute the weight the tree will have after the element is removed. + We remove the element itself (weight=1) + and the sub-graph connected to its next_key_part. + */ + uint new_weight= root->weight - (1 + (key->next_key_part? + key->next_key_part->weight : 0)); + + DBUG_ASSERT(root->weight >= (1 + (key->next_key_part ? + key->next_key_part->weight : 0))); + + /* Unlink from list */ + if (key->prev) + key->prev->next=key->next; + if (key->next) + key->next->prev=key->prev; + key->increment_use_count(-1); + if (!key->parent) + par= &root; + else + par=key->parent_ptr(); + + if (key->left == &null_element) + { + *par=nod=key->right; + fix_par=key->parent; + if (nod != &null_element) + nod->parent=fix_par; + remove_color= key->color; + } + else if (key->right == &null_element) + { + *par= nod=key->left; + nod->parent=fix_par=key->parent; + remove_color= key->color; + } + else + { + SEL_ARG *tmp=key->next; // next bigger key (exist!) + nod= *tmp->parent_ptr()= tmp->right; // unlink tmp from tree + fix_par=tmp->parent; + if (nod != &null_element) + nod->parent=fix_par; + remove_color= tmp->color; + + tmp->parent=key->parent; // Move node in place of key + (tmp->left=key->left)->parent=tmp; + if ((tmp->right=key->right) != &null_element) + tmp->right->parent=tmp; + tmp->color=key->color; + *par=tmp; + if (fix_par == key) // key->right == key->next + fix_par=tmp; // new parent of nod + } + + if (root == &null_element) + DBUG_RETURN(0); // Maybe root later + if (remove_color == BLACK) + root=rb_delete_fixup(root,nod,fix_par); + test_rb_tree(root,root->parent); + + root->use_count=this->use_count; // Fix root counters + root->weight= new_weight; + root->elements=this->elements-1; + root->maybe_flag=this->maybe_flag; + DBUG_RETURN(root); +} + + + /* Functions to fix up the tree after insert and delete */ + +static void left_rotate(SEL_ARG **root,SEL_ARG *leaf) +{ + SEL_ARG *y=leaf->right; + leaf->right=y->left; + if (y->left != &null_element) + y->left->parent=leaf; + if (!(y->parent=leaf->parent)) + *root=y; + else + *leaf->parent_ptr()=y; + y->left=leaf; + leaf->parent=y; +} + +static void right_rotate(SEL_ARG **root,SEL_ARG *leaf) +{ + SEL_ARG *y=leaf->left; + leaf->left=y->right; + if (y->right != &null_element) + y->right->parent=leaf; + if (!(y->parent=leaf->parent)) + *root=y; + else + *leaf->parent_ptr()=y; + y->right=leaf; + leaf->parent=y; +} + + +SEL_ARG * +SEL_ARG::rb_insert(SEL_ARG *leaf) +{ + SEL_ARG *y,*par,*par2,*root; + root= this; root->parent= 0; + + leaf->color=RED; + while (leaf != root && (par= leaf->parent)->color == RED) + { // This can't be root or 1 level under + if (par == (par2= leaf->parent->parent)->left) + { + y= par2->right; + if (y->color == RED) + { + par->color=BLACK; + y->color=BLACK; + leaf=par2; + leaf->color=RED; /* And the loop continues */ + } + else + { + if (leaf == par->right) + { + left_rotate(&root,leaf->parent); + par=leaf; /* leaf is now parent to old leaf */ + } + par->color=BLACK; + par2->color=RED; + right_rotate(&root,par2); + break; + } + } + else + { + y= par2->left; + if (y->color == RED) + { + par->color=BLACK; + y->color=BLACK; + leaf=par2; + leaf->color=RED; /* And the loop continues */ + } + else + { + if (leaf == par->left) + { + right_rotate(&root,par); + par=leaf; + } + par->color=BLACK; + par2->color=RED; + left_rotate(&root,par2); + break; + } + } + } + root->color=BLACK; + test_rb_tree(root,root->parent); + return root; +} + + +SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par) +{ + SEL_ARG *x,*w; + root->parent=0; + + x= key; + while (x != root && x->color == SEL_ARG::BLACK) + { + if (x == par->left) + { + w=par->right; + if (w->color == SEL_ARG::RED) + { + w->color=SEL_ARG::BLACK; + par->color=SEL_ARG::RED; + left_rotate(&root,par); + w=par->right; + } + if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK) + { + w->color=SEL_ARG::RED; + x=par; + } + else + { + if (w->right->color == SEL_ARG::BLACK) + { + w->left->color=SEL_ARG::BLACK; + w->color=SEL_ARG::RED; + right_rotate(&root,w); + w=par->right; + } + w->color=par->color; + par->color=SEL_ARG::BLACK; + w->right->color=SEL_ARG::BLACK; + left_rotate(&root,par); + x=root; + break; + } + } + else + { + w=par->left; + if (w->color == SEL_ARG::RED) + { + w->color=SEL_ARG::BLACK; + par->color=SEL_ARG::RED; + right_rotate(&root,par); + w=par->left; + } + if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK) + { + w->color=SEL_ARG::RED; + x=par; + } + else + { + if (w->left->color == SEL_ARG::BLACK) + { + w->right->color=SEL_ARG::BLACK; + w->color=SEL_ARG::RED; + left_rotate(&root,w); + w=par->left; + } + w->color=par->color; + par->color=SEL_ARG::BLACK; + w->left->color=SEL_ARG::BLACK; + right_rotate(&root,par); + x=root; + break; + } + } + par=x->parent; + } + x->color=SEL_ARG::BLACK; + return root; +} + + + /* Test that the properties for a red-black tree hold */ + +#ifdef EXTRA_DEBUG +int test_rb_tree(SEL_ARG *element,SEL_ARG *parent) +{ + int count_l,count_r; + + if (element == &null_element) + return 0; // Found end of tree + if (element->parent != parent) + { + sql_print_error("Wrong tree: Parent doesn't point at parent"); + return -1; + } + if (element->color == SEL_ARG::RED && + (element->left->color == SEL_ARG::RED || + element->right->color == SEL_ARG::RED)) + { + sql_print_error("Wrong tree: Found two red in a row"); + return -1; + } + if (element->left == element->right && element->left != &null_element) + { // Dummy test + sql_print_error("Wrong tree: Found right == left"); + return -1; + } + count_l=test_rb_tree(element->left,element); + count_r=test_rb_tree(element->right,element); + if (count_l >= 0 && count_r >= 0) + { + if (count_l == count_r) + return count_l+(element->color == SEL_ARG::BLACK); + sql_print_error("Wrong tree: Incorrect black-count: %d - %d", + count_l,count_r); + } + return -1; // Error, no more warnings +} + + +/** + Count how many times SEL_ARG graph "root" refers to its part "key" via + transitive closure. + + @param root An RB-Root node in a SEL_ARG graph. + @param key Another RB-Root node in that SEL_ARG graph. + + The passed "root" node may refer to "key" node via root->next_key_part, + root->next->n + + This function counts how many times the node "key" is referred (via + SEL_ARG::next_key_part) by + - intervals of RB-tree pointed by "root", + - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from + intervals of RB-tree pointed by "root", + - and so on. + + Here is an example (horizontal links represent next_key_part pointers, + vertical links - next/prev prev pointers): + + +----+ $ + |root|-----------------+ + +----+ $ | + | $ | + | $ | + +----+ +---+ $ | +---+ Here the return value + | |- ... -| |---$-+--+->|key| will be 4. + +----+ +---+ $ | | +---+ + | $ | | + ... $ | | + | $ | | + +----+ +---+ $ | | + | |---| |---------+ | + +----+ +---+ $ | + | | $ | + ... +---+ $ | + | |------------+ + +---+ $ + @return + Number of links to "key" from nodes reachable from "root". +*/ + +static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key) +{ + ulong count= 0; + for (root=root->first(); root ; root=root->next) + { + if (root->next_key_part) + { + if (root->next_key_part == key) + count++; + if (root->next_key_part->part < key->part) + count+=count_key_part_usage(root->next_key_part,key); + } + } + return count; +} + + +/* + Check if SEL_ARG::use_count value is correct + + SYNOPSIS + SEL_ARG::test_use_count() + root The root node of the SEL_ARG graph (an RB-tree root node that + has the least value of sel_arg->part in the entire graph, and + thus is the "origin" of the graph) + + DESCRIPTION + Check if SEL_ARG::use_count value is correct. See the definition of + use_count for what is "correct". +*/ + +void SEL_ARG::test_use_count(SEL_ARG *root) +{ + uint e_count=0; + + if (this->type != SEL_ARG::KEY_RANGE) + return; + for (SEL_ARG *pos=first(); pos ; pos=pos->next) + { + e_count++; + if (pos->next_key_part) + { + ulong count=count_key_part_usage(root,pos->next_key_part); + if (count > pos->next_key_part->use_count) + { + sql_print_information("Use_count: Wrong count for key at %p: %lu " + "should be %lu", pos, + pos->next_key_part->use_count, count); + return; + } + pos->next_key_part->test_use_count(root); + } + } + if (e_count != elements) + sql_print_warning("Wrong use count: %u (should be %u) for tree at %p", + e_count, elements, this); +} +#endif + +/* + Calculate cost and E(#rows) for a given index and intervals tree + + SYNOPSIS + check_quick_select() + param Parameter from test_quick_select + idx Number of index to use in PARAM::key SEL_TREE::key + index_only TRUE - assume only index tuples will be accessed + FALSE - assume full table rows will be read + tree Transformed selection condition, tree->key[idx] holds + the intervals for the given index. + update_tbl_stats TRUE <=> update table->quick_* with information + about range scan we've evaluated. + mrr_flags INOUT MRR access flags + cost OUT Scan cost + is_ror_scan is set to reflect if the key scan is a ROR (see + is_key_scan_ror function for more info) + + NOTES + param->table->opt_range*, param->range_count (and maybe others) are + updated with data of given key scan, see quick_range_seq_next for details. + + RETURN + Estimate # of records to be retrieved. + HA_POS_ERROR if estimate calculation failed due to table handler problems. +*/ + +static +ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, + SEL_ARG *tree, bool update_tbl_stats, + uint *mrr_flags, uint *bufsize, Cost_estimate *cost, + bool *is_ror_scan) +{ + SEL_ARG_RANGE_SEQ seq; + RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0}; + handler *file= param->table->file; + ha_rows rows= HA_POS_ERROR; + uint keynr= param->real_keynr[idx]; + DBUG_ENTER("check_quick_select"); + + /* Range not calculated yet */ + param->quick_rows[keynr]= HA_POS_ERROR; + + /* Handle cases when we don't have a valid non-empty list of range */ + if (!tree) + DBUG_RETURN(HA_POS_ERROR); + if (tree->type == SEL_ARG::IMPOSSIBLE) + DBUG_RETURN(0L); + if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0) + DBUG_RETURN(HA_POS_ERROR); + + seq.keyno= idx; + seq.real_keyno= keynr; + seq.key_parts= param->key[idx]; + seq.param= param; + seq.start= tree; + + param->range_count=0; + param->max_key_parts=0; + + seq.is_ror_scan= TRUE; + if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR) + seq.is_ror_scan= FALSE; + + *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0; + /* + Pass HA_MRR_SORTED to see if MRR implementation can handle sorting. + */ + *mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED; + + // TODO: param->max_key_parts holds 0 now, and not the #keyparts used. + // Passing wrong second argument to index_flags() makes no difference for + // most storage engines but might be an issue for MyRocks with certain + // datatypes. + if (index_only && + (file->index_flags(keynr, param->max_key_parts, 1) & HA_KEYREAD_ONLY) && + !(file->index_flags(keynr, param->max_key_parts, 1) & HA_CLUSTERED_INDEX)) + *mrr_flags |= HA_MRR_INDEX_ONLY; + + if (param->thd->lex->sql_command != SQLCOM_SELECT) + *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; + + *bufsize= param->thd->variables.mrr_buff_size; + /* + Skip materialized derived table/view result table from MRR check as + they aren't contain any data yet. + */ + if (param->table->pos_in_table_list->is_non_derived()) + rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0, + bufsize, mrr_flags, cost); + param->quick_rows[keynr]= rows; + if (rows != HA_POS_ERROR) + { + ha_rows table_records= param->table->stat_records(); + if (rows > table_records) + { + /* + For any index the total number of records within all ranges + cannot be be bigger than the number of records in the table. + This check is needed as sometimes that table statistics or range + estimates may be slightly out of sync. + */ + rows= table_records; + set_if_bigger(rows, 1); + param->quick_rows[keynr]= rows; + } + param->possible_keys.set_bit(keynr); + if (update_tbl_stats) + { + param->table->opt_range_keys.set_bit(keynr); + param->table->opt_range[keynr].key_parts= param->max_key_parts; + param->table->opt_range[keynr].ranges= param->range_count; + param->table->opt_range_condition_rows= + MY_MIN(param->table->opt_range_condition_rows, rows); + param->table->opt_range[keynr].rows= rows; + param->table->opt_range[keynr].cost= cost->total_cost(); + if (param->table->file->is_clustering_key(keynr)) + param->table->opt_range[keynr].index_only_cost= 0; + else + param->table->opt_range[keynr].index_only_cost= cost->index_only_cost(); + } + } + + /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */ + enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm; + if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF)) + { + /* + All scans are non-ROR scans for those index types. + TODO: Don't have this logic here, make table engines return + appropriate flags instead. + */ + seq.is_ror_scan= FALSE; + } + else if (param->table->file->is_clustering_key(keynr)) + { + /* Clustered PK scan is always a ROR scan (TODO: same as above) */ + seq.is_ror_scan= TRUE; + } + else if (param->range_count > 1) + { + /* + Scaning multiple key values in the index: the records are ROR + for each value, but not between values. E.g, "SELECT ... x IN + (1,3)" returns ROR order for all records with x=1, then ROR + order for records with x=3 + */ + seq.is_ror_scan= FALSE; + } + *is_ror_scan= seq.is_ror_scan; + + DBUG_PRINT("exit", ("Records: %lu", (ulong) rows)); + DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp. +} + + +/* + Check if key scan on given index with equality conditions on first n key + parts is a ROR scan. + + SYNOPSIS + is_key_scan_ror() + param Parameter from test_quick_select + keynr Number of key in the table. The key must not be a clustered + primary key. + nparts Number of first key parts for which equality conditions + are present. + + NOTES + ROR (Rowid Ordered Retrieval) key scan is a key scan that produces + ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function) + + This function is needed to handle a practically-important special case: + an index scan is a ROR scan if it is done using a condition in form + + "key1_1=c_1 AND ... AND key1_n=c_n" + + where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n]) + + and the table has a clustered Primary Key defined as + PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) + + i.e. the first key parts of it are identical to uncovered parts ot the + key being scanned. This function assumes that the index flags do not + include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere). + + Check (1) is made in quick_range_seq_next() + + RETURN + TRUE The scan is ROR-scan + FALSE Otherwise +*/ + +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) +{ + KEY *table_key= param->table->key_info + keynr; + KEY_PART_INFO *key_part= table_key->key_part + nparts; + KEY_PART_INFO *key_part_end= (table_key->key_part + + table_key->user_defined_key_parts); + uint pk_number; + + if (param->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + return false; + + for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++) + { + field_index_t fieldnr= (param->table->key_info[keynr]. + key_part[kp - table_key->key_part].fieldnr - 1); + if (param->table->field[fieldnr]->key_length() != kp->length) + return FALSE; + } + + /* + If there are equalities for all key parts, it is a ROR scan. If there are + equalities all keyparts and even some of key parts from "Extended Key" + index suffix, it is a ROR-scan, too. + */ + if (key_part >= key_part_end) + return TRUE; + + key_part= table_key->key_part + nparts; + pk_number= param->table->s->primary_key; + if (!param->table->file->pk_is_clustering_key(pk_number)) + return FALSE; + + KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part; + KEY_PART_INFO *pk_part_end= pk_part + + param->table->key_info[pk_number].user_defined_key_parts; + for (;(key_part!=key_part_end) && (pk_part != pk_part_end); + ++key_part, ++pk_part) + { + if ((key_part->field != pk_part->field) || + (key_part->length != pk_part->length)) + return FALSE; + } + return (key_part == key_part_end); +} + + +/* + Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key. + + SYNOPSIS + get_quick_select() + param + idx Index of used key in param->key. + key_tree SEL_ARG tree for the used key + mrr_flags MRR parameter for quick select + mrr_buf_size MRR parameter for quick select + parent_alloc If not NULL, use it to allocate memory for + quick select data. Otherwise use quick->alloc. + NOTES + The caller must call QUICK_SELECT::init for returned quick select. + + CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be + deallocated when the returned quick select is deleted. + + RETURN + NULL on error + otherwise created quick select +*/ + +QUICK_RANGE_SELECT * +get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags, + uint mrr_buf_size, MEM_ROOT *parent_alloc) +{ + QUICK_RANGE_SELECT *quick; + bool create_err= FALSE; + DBUG_ENTER("get_quick_select"); + + if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL) + quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table, + param->real_keynr[idx], + MY_TEST(parent_alloc), + parent_alloc, &create_err); + else + quick=new QUICK_RANGE_SELECT(param->thd, param->table, + param->real_keynr[idx], + MY_TEST(parent_alloc), NULL, &create_err); + + if (quick) + { + if (create_err || + get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0, + param->max_key,0)) + { + delete quick; + quick=0; + } + else + { + KEY *keyinfo= param->table->key_info+param->real_keynr[idx]; + quick->mrr_flags= mrr_flags; + quick->mrr_buf_size= mrr_buf_size; + quick->key_parts=(KEY_PART*) + memdup_root(parent_alloc? parent_alloc : &quick->alloc, + (char*) param->key[idx], + sizeof(KEY_PART)* + param->table->actual_n_key_parts(keyinfo)); + } + } + DBUG_RETURN(quick); +} + + +void SEL_ARG::store_next_min_max_keys(KEY_PART *key, + uchar **cur_min_key, uint *cur_min_flag, + uchar **cur_max_key, uint *cur_max_flag, + int *min_part, int *max_part) +{ + DBUG_ASSERT(next_key_part); + const bool asc = !(key[next_key_part->part].flag & HA_REVERSE_SORT); + + if (!get_min_flag(key)) + { + if (asc) + { + *min_part += next_key_part->store_min_key(key, cur_min_key, + cur_min_flag, MAX_KEY, true); + } + else + { + uint tmp_flag = invert_min_flag(*cur_min_flag); + *min_part += next_key_part->store_max_key(key, cur_min_key, &tmp_flag, + MAX_KEY, true); + *cur_min_flag = invert_max_flag(tmp_flag); + } + } + if (!get_max_flag(key)) + { + if (asc) + { + *max_part += next_key_part->store_max_key(key, cur_max_key, + cur_max_flag, MAX_KEY, false); + } + else + { + uint tmp_flag = invert_max_flag(*cur_max_flag); + *max_part += next_key_part->store_min_key(key, cur_max_key, &tmp_flag, + MAX_KEY, false); + *cur_max_flag = invert_min_flag(tmp_flag); + } + } +} + +/* +** Fix this to get all possible sub_ranges +*/ +bool +get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, + SEL_ARG *key_tree, uchar *min_key,uint min_key_flag, + uchar *max_key, uint max_key_flag) +{ + QUICK_RANGE *range; + uint flag; + int min_part= key_tree->part-1, // # of keypart values in min_key buffer + max_part= key_tree->part-1; // # of keypart values in max_key buffer + + const bool asc = !(key[key_tree->part].flag & HA_REVERSE_SORT); + SEL_ARG *next_tree = asc ? key_tree->left : key_tree->right; + if (next_tree != &null_element) + { + if (get_quick_keys(param,quick,key,next_tree, + min_key,min_key_flag, max_key, max_key_flag)) + return 1; + } + uchar *tmp_min_key=min_key,*tmp_max_key=max_key; + + key_tree->store_min_max(key, key[key_tree->part].store_length, + &tmp_min_key, min_key_flag, + &tmp_max_key, max_key_flag, + &min_part, &max_part); + + if (key_tree->next_key_part && + key_tree->next_key_part->type == SEL_ARG::KEY_RANGE && + key_tree->next_key_part->part == key_tree->part+1) + { // const key as prefix + if ((tmp_min_key - min_key) == (tmp_max_key - max_key) && + memcmp(min_key, max_key, (uint)(tmp_max_key - max_key))==0 && + key_tree->min_flag==0 && key_tree->max_flag==0) + { + // psergey-note: simplified the parameters below as follows: + // min_key_flag | key_tree->min_flag -> min_key_flag + // max_key_flag | key_tree->max_flag -> max_key_flag + if (get_quick_keys(param,quick,key,key_tree->next_key_part, + tmp_min_key, min_key_flag, + tmp_max_key, max_key_flag)) + return 1; + goto end; // Ugly, but efficient + } + { + uint tmp_min_flag= key_tree->get_min_flag(key); + uint tmp_max_flag= key_tree->get_max_flag(key); + + key_tree->store_next_min_max_keys(key, + &tmp_min_key, &tmp_min_flag, + &tmp_max_key, &tmp_max_flag, + &min_part, &max_part); + flag=tmp_min_flag | tmp_max_flag; + } + } + else + { + if (asc) + { + flag= (key_tree->min_flag & GEOM_FLAG) ? key_tree->min_flag: + (key_tree->min_flag | + key_tree->max_flag); + } + else + { + // Invert flags for DESC keypart + flag= invert_min_flag(key_tree->min_flag) | + invert_max_flag(key_tree->max_flag); + } + } + + /* + Ensure that some part of min_key and max_key are used. If not, + regard this as no lower/upper range + */ + if ((flag & GEOM_FLAG) == 0) + { + if (tmp_min_key != param->min_key) + flag&= ~NO_MIN_RANGE; + else + flag|= NO_MIN_RANGE; + if (tmp_max_key != param->max_key) + flag&= ~NO_MAX_RANGE; + else + flag|= NO_MAX_RANGE; + } + if (flag == 0) + { + uint length= (uint) (tmp_min_key - param->min_key); + if (length == (uint) (tmp_max_key - param->max_key) && + !memcmp(param->min_key,param->max_key,length)) + { + KEY *table_key=quick->head->key_info+quick->index; + flag=EQ_RANGE; + if ((table_key->flags & HA_NOSAME) && + min_part == key_tree->part && + key_tree->part == table_key->user_defined_key_parts-1) + { + DBUG_ASSERT(min_part == max_part); + if ((table_key->flags & HA_NULL_PART_KEY) && + null_part_in_key(key, + param->min_key, + (uint) (tmp_min_key - param->min_key))) + flag|= NULL_RANGE; + else + flag|= UNIQUE_RANGE; + } + } + } + + /* Get range for retrieving rows in QUICK_SELECT::get_next */ + if (!(range= new (param->thd->mem_root) QUICK_RANGE( + param->thd, + param->min_key, + (uint) (tmp_min_key - param->min_key), + min_part >=0 ? make_keypart_map(min_part) : 0, + param->max_key, + (uint) (tmp_max_key - param->max_key), + max_part >=0 ? make_keypart_map(max_part) : 0, + flag))) + return 1; // out of memory + + set_if_bigger(quick->max_used_key_length, range->min_length); + set_if_bigger(quick->max_used_key_length, range->max_length); + set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1); + if (insert_dynamic(&quick->ranges, (uchar*) &range)) + return 1; + + end: + next_tree= asc ? key_tree->right : key_tree->left; + if (next_tree != &null_element) + return get_quick_keys(param,quick,key,next_tree, + min_key,min_key_flag, + max_key,max_key_flag); + return 0; +} + +/* + Return 1 if there is only one range and this uses the whole unique key +*/ + +bool QUICK_RANGE_SELECT::unique_key_range() +{ + if (ranges.elements == 1) + { + QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer); + if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE) + { + KEY *key=head->key_info+index; + return (key->flags & HA_NOSAME) && key->key_length == tmp->min_length; + } + } + return 0; +} + + + +/* + Return TRUE if any part of the key is NULL + + SYNOPSIS + null_part_in_key() + key_part Array of key parts (index description) + key Key values tuple + length Length of key values tuple in bytes. + + RETURN + TRUE The tuple has at least one "keypartX is NULL" + FALSE Otherwise +*/ + +static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length) +{ + for (const uchar *end=key+length ; + key < end; + key+= key_part++->store_length) + { + if (key_part->null_bit && *key) + return 1; + } + return 0; +} + + +bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields) +{ + return is_key_used(head, index, fields); +} + +bool QUICK_INDEX_SORT_SELECT::is_keys_used(const MY_BITMAP *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (is_key_used(head, quick->index, fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_INTERSECT_SELECT::is_keys_used(const MY_BITMAP *fields) +{ + QUICK_SELECT_WITH_RECORD *qr; + List_iterator_fast it(quick_selects); + while ((qr= it++)) + { + if (is_key_used(head, qr->quick->index, fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields) +{ + QUICK_SELECT_I *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (quick->is_keys_used(fields)) + return 1; + } + return 0; +} + + +FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key) +{ + bool create_err= FALSE; + FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err); + if (create_err) + { + delete fts; + return NULL; + } + else + return fts; +} + +/* + Create quick select from ref/ref_or_null scan. + + SYNOPSIS + get_quick_select_for_ref() + thd Thread handle + table Table to access + ref ref[_or_null] scan parameters + records Estimate of number of records (needed only to construct + quick select) + NOTES + This allocates things in a new memory root, as this may be called many + times during a query. + + RETURN + Quick select that retrieves the same rows as passed ref scan + NULL on error. +*/ + +QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, + TABLE_REF *ref, ha_rows records) +{ + MEM_ROOT *old_root, *alloc; + QUICK_RANGE_SELECT *quick; + KEY *key_info = &table->key_info[ref->key]; + KEY_PART *key_part; + QUICK_RANGE *range; + uint part; + bool create_err= FALSE; + Cost_estimate cost; + uint max_used_key_len; + + old_root= thd->mem_root; + /* The following call may change thd->mem_root */ + quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err); + /* save mem_root set by QUICK_RANGE_SELECT constructor */ + alloc= thd->mem_root; + /* + return back default mem_root (thd->mem_root) changed by + QUICK_RANGE_SELECT constructor + */ + thd->mem_root= old_root; + + if (!quick || create_err || quick->init()) + goto err; + quick->records= records; + + if ((cp_buffer_from_ref(thd, table, ref) && + unlikely(thd->is_fatal_error)) || + unlikely(!(range= new(alloc) QUICK_RANGE()))) + goto err; // out of memory + + range->min_key= range->max_key= ref->key_buff; + range->min_length= range->max_length= ref->key_length; + range->min_keypart_map= range->max_keypart_map= + make_prev_keypart_map(ref->key_parts); + range->flag= EQ_RANGE; + + if (unlikely(!(quick->key_parts=key_part=(KEY_PART *) + alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))) + goto err; + + max_used_key_len=0; + for (part=0 ; part < ref->key_parts ;part++,key_part++) + { + key_part->part=part; + key_part->field= key_info->key_part[part].field; + key_part->length= key_info->key_part[part].length; + key_part->store_length= key_info->key_part[part].store_length; + key_part->null_bit= key_info->key_part[part].null_bit; + key_part->flag= (uint8) key_info->key_part[part].key_part_flag; + + max_used_key_len +=key_info->key_part[part].store_length; + } + + quick->max_used_key_length= max_used_key_len; + + if (insert_dynamic(&quick->ranges,(uchar*)&range)) + goto err; + + /* + Add a NULL range if REF_OR_NULL optimization is used. + For example: + if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above + and have ref->null_ref_key set. Will create a new NULL range here. + */ + if (ref->null_ref_key) + { + QUICK_RANGE *null_range; + + *ref->null_ref_key= 1; // Set null byte then create a range + if (!(null_range= new (alloc) + QUICK_RANGE(thd, ref->key_buff, ref->key_length, + make_prev_keypart_map(ref->key_parts), + ref->key_buff, ref->key_length, + make_prev_keypart_map(ref->key_parts), EQ_RANGE))) + goto err; + *ref->null_ref_key= 0; // Clear null byte + if (insert_dynamic(&quick->ranges,(uchar*)&null_range)) + goto err; + } + + /* Call multi_range_read_info() to get the MRR flags and buffer size */ + quick->mrr_flags= HA_MRR_NO_ASSOCIATION | + (table->file->keyread_enabled() ? HA_MRR_INDEX_ONLY : 0); + if (thd->lex->sql_command != SQLCOM_SELECT) + quick->mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; + + quick->mrr_buf_size= thd->variables.mrr_buff_size; + if (table->file->multi_range_read_info(quick->index, 1, (uint)records, + ~0, + &quick->mrr_buf_size, + &quick->mrr_flags, &cost)) + goto err; + + return quick; +err: + delete quick; + return 0; +} + + +/* + Perform key scans for all used indexes (except CPK), get rowids and merge + them into an ordered non-recurrent sequence of rowids. + + The merge/duplicate removal is performed using Unique class. We put all + rowids into Unique, get the sorted sequence and destroy the Unique. + + If table has a clustered primary key that covers all rows (TRUE for bdb + and innodb currently) and one of the index_merge scans is a scan on PK, + then rows that will be retrieved by PK scan are not put into Unique and + primary key scan is not performed here, it is performed later separately. + + RETURN + 0 OK + other error +*/ + +int read_keys_and_merge_scans(THD *thd, + TABLE *head, + List quick_selects, + QUICK_RANGE_SELECT *pk_quick_select, + READ_RECORD *read_record, + bool intersection, + key_map *filtered_scans, + Unique **unique_ptr) +{ + List_iterator_fast cur_quick_it(quick_selects); + QUICK_RANGE_SELECT* cur_quick; + int result; + Unique *unique= *unique_ptr; + handler *file= head->file; + bool with_cpk_filter= pk_quick_select != NULL; + DBUG_ENTER("read_keys_and_merge"); + + /* We're going to just read rowids. */ + head->prepare_for_position(); + + cur_quick_it.rewind(); + cur_quick= cur_quick_it++; + bool first_quick= TRUE; + DBUG_ASSERT(cur_quick != 0); + head->file->ha_start_keyread(cur_quick->index); + + /* + We reuse the same instance of handler so we need to call both init and + reset here. + */ + if (cur_quick->init() || cur_quick->reset()) + goto err; + + if (unique == NULL) + { + DBUG_EXECUTE_IF("index_merge_may_not_create_a_Unique", DBUG_SUICIDE(); ); + DBUG_EXECUTE_IF("only_one_Unique_may_be_created", + DBUG_SET("+d,index_merge_may_not_create_a_Unique"); ); + + unique= new Unique(refpos_order_cmp, (void *)file, + file->ref_length, + (size_t)thd->variables.sortbuff_size, + intersection ? quick_selects.elements : 0); + if (!unique) + goto err; + *unique_ptr= unique; + } + else + { + unique->reset(); + } + + DBUG_ASSERT(file->ref_length == unique->get_size()); + DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size()); + + for (;;) + { + while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE) + { + if (intersection) + with_cpk_filter= filtered_scans->is_set(cur_quick->index); + if (first_quick) + { + first_quick= FALSE; + if (intersection && unique->is_in_memory()) + unique->close_for_expansion(); + } + cur_quick->range_end(); + cur_quick= cur_quick_it++; + if (!cur_quick) + break; + + if (cur_quick->file->inited != handler::NONE) + cur_quick->file->ha_index_end(); + if (cur_quick->init() || cur_quick->reset()) + goto err; + } + + if (result) + { + if (result != HA_ERR_END_OF_FILE) + { + cur_quick->range_end(); + goto err; + } + break; + } + + if (thd->killed) + goto err; + + if (with_cpk_filter && + pk_quick_select->row_in_ranges() != intersection ) + continue; + + cur_quick->file->position(cur_quick->record); + if (unique->unique_add((char*)cur_quick->file->ref)) + goto err; + } + + /* + Ok all rowids are in the Unique now. The next call will initialize + the unique structure so it can be used to iterate through the rowids + sequence. + */ + result= unique->get(head); + /* + index merge currently doesn't support "using index" at all + */ + head->file->ha_end_keyread(); + if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, + &unique->sort, 1 , 1, TRUE)) + result= 1; + DBUG_RETURN(result); + +err: + head->file->ha_end_keyread(); + DBUG_RETURN(1); +} + + +int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() + +{ + int result; + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge"); + result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select, + &read_record, FALSE, NULL, &unique); + doing_pk_scan= FALSE; + DBUG_RETURN(result); +} + +/* + Get next row for index_merge. + NOTES + The rows are read from + 1. rowids stored in Unique. + 2. QUICK_RANGE_SELECT with clustered primary key (if any). + The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint. +*/ + +int QUICK_INDEX_MERGE_SELECT::get_next() +{ + int result; + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next"); + + if (doing_pk_scan) + DBUG_RETURN(pk_quick_select->get_next()); + + if ((result= read_record.read_record()) == -1) + { + result= HA_ERR_END_OF_FILE; + end_read_record(&read_record); + // Free things used by sort early. Shouldn't be strictly necessary + unique->sort.reset(); + /* All rows from Unique have been retrieved, do a clustered PK scan */ + if (pk_quick_select) + { + doing_pk_scan= TRUE; + if ((result= pk_quick_select->init()) || + (result= pk_quick_select->reset())) + DBUG_RETURN(result); + DBUG_RETURN(pk_quick_select->get_next()); + } + } + + DBUG_RETURN(result); +} + +int QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge() + +{ + int result; + DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge"); + result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select, + &read_record, TRUE, &filtered_scans, + &unique); + DBUG_RETURN(result); +} + +int QUICK_INDEX_INTERSECT_SELECT::get_next() +{ + int result; + DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::get_next"); + + if ((result= read_record.read_record()) == -1) + { + result= HA_ERR_END_OF_FILE; + end_read_record(&read_record); + unique->sort.reset(); // Free things early + } + + DBUG_RETURN(result); +} + + +/* + Retrieve next record. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::get_next() + + NOTES + Invariant on enter/exit: all intersected selects have retrieved all index + records with rowid <= some_rowid_val and no intersected select has + retrieved any index records with rowid > some_rowid_val. + We start fresh and loop until we have retrieved the same rowid in each of + the key scans or we got an error. + + If a Clustered PK scan is present, it is used only to check if row + satisfies its condition (and never used for row retrieval). + + Locking: to ensure that exclusive locks are only set on records that + are included in the final result we must release the lock + on all rows we read but do not include in the final result. This + must be done on each index that reads the record and the lock + must be released using the same handler (the same quick object) as + used when reading the record. + + RETURN + 0 - Ok + other - Error code if any error occurred. +*/ + +int QUICK_ROR_INTERSECT_SELECT::get_next() +{ + List_iterator_fast quick_it(quick_selects); + QUICK_SELECT_WITH_RECORD *qr; + QUICK_RANGE_SELECT* quick; + + /* quick that reads the given rowid first. This is needed in order + to be able to unlock the row using the same handler object that locked + it */ + QUICK_RANGE_SELECT* quick_with_last_rowid; + + int error, cmp; + uint last_rowid_count=0; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next"); + + /* Get a rowid for first quick and save it as a 'candidate' */ + qr= quick_it++; + quick= qr->quick; + error= quick->get_next(); + if (cpk_quick) + { + while (!error && !cpk_quick->row_in_ranges()) + { + quick->file->unlock_row(); /* row not in range; unlock */ + error= quick->get_next(); + } + } + if (unlikely(error)) + DBUG_RETURN(error); + + /* Save the read key tuple */ + key_copy(qr->key_tuple, record, head->key_info + quick->index, + quick->max_used_key_length); + + quick->file->position(quick->record); + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + quick_with_last_rowid= quick; + + while (last_rowid_count < quick_selects.elements) + { + if (!(qr= quick_it++)) + { + quick_it.rewind(); + qr= quick_it++; + } + quick= qr->quick; + + do + { + DBUG_EXECUTE_IF("innodb_quick_report_deadlock", + DBUG_SET("+d,innodb_report_deadlock");); + if (unlikely((error= quick->get_next()))) + { + /* On certain errors like deadlock, trx might be rolled back.*/ + if (!thd->transaction_rollback_request) + quick_with_last_rowid->file->unlock_row(); + DBUG_RETURN(error); + } + quick->file->position(quick->record); + cmp= head->file->cmp_ref(quick->file->ref, last_rowid); + if (cmp < 0) + { + /* This row is being skipped. Release lock on it. */ + quick->file->unlock_row(); + } + } while (cmp < 0); + + key_copy(qr->key_tuple, record, head->key_info + quick->index, + quick->max_used_key_length); + + /* Ok, current select 'caught up' and returned ref >= cur_ref */ + if (cmp > 0) + { + /* Found a row with ref > cur_ref. Make it a new 'candidate' */ + if (cpk_quick) + { + while (!cpk_quick->row_in_ranges()) + { + quick->file->unlock_row(); /* row not in range; unlock */ + if (unlikely((error= quick->get_next()))) + { + /* On certain errors like deadlock, trx might be rolled back.*/ + if (!thd->transaction_rollback_request) + quick_with_last_rowid->file->unlock_row(); + DBUG_RETURN(error); + } + } + quick->file->position(quick->record); + } + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + quick_with_last_rowid->file->unlock_row(); + last_rowid_count= 1; + quick_with_last_rowid= quick; + + //save the fields here + key_copy(qr->key_tuple, record, head->key_info + quick->index, + quick->max_used_key_length); + } + else + { + /* current 'candidate' row confirmed by this select */ + last_rowid_count++; + } + } + + /* We get here if we got the same row ref in all scans. */ + if (need_to_fetch_row) + error= head->file->ha_rnd_pos(head->record[0], last_rowid); + + if (!need_to_fetch_row) + { + /* Restore the columns we've read/saved with other quick selects */ + quick_it.rewind(); + while ((qr= quick_it++)) + { + if (qr->quick != quick) + { + key_restore(record, qr->key_tuple, head->key_info + qr->quick->index, + qr->quick->max_used_key_length); + } + } + } + + DBUG_RETURN(error); +} + + +/* + Retrieve next record. + SYNOPSIS + QUICK_ROR_UNION_SELECT::get_next() + + NOTES + Enter/exit invariant: + For each quick select in the queue a {key,rowid} tuple has been + retrieved but the corresponding row hasn't been passed to output. + + RETURN + 0 - Ok + other - Error code if any error occurred. +*/ + +int QUICK_ROR_UNION_SELECT::get_next() +{ + int error, dup_row; + QUICK_SELECT_I *quick; + uchar *tmp; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next"); + + do + { + if (!queue.elements) + DBUG_RETURN(HA_ERR_END_OF_FILE); + /* Ok, we have a queue with >= 1 scans */ + + quick= (QUICK_SELECT_I*)queue_top(&queue); + memcpy(cur_rowid, quick->last_rowid, rowid_length); + + /* put into queue rowid from the same stream as top element */ + if ((error= quick->get_next())) + { + if (error != HA_ERR_END_OF_FILE) + DBUG_RETURN(error); + queue_remove_top(&queue); + } + else + { + quick->save_last_pos(); + queue_replace_top(&queue); + } + + if (!have_prev_rowid) + { + /* No rows have been returned yet */ + dup_row= FALSE; + have_prev_rowid= TRUE; + } + else + dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid); + } while (dup_row); + + tmp= cur_rowid; + cur_rowid= prev_rowid; + prev_rowid= tmp; + + error= head->file->ha_rnd_pos(quick->record, prev_rowid); + DBUG_RETURN(error); +} + + +int QUICK_RANGE_SELECT::reset() +{ + uint buf_size; + uchar *mrange_buff; + int error; + HANDLER_BUFFER empty_buf; + MY_BITMAP * const save_read_set= head->read_set; + MY_BITMAP * const save_write_set= head->write_set; + DBUG_ENTER("QUICK_RANGE_SELECT::reset"); + last_range= NULL; + cur_range= (QUICK_RANGE**) ranges.buffer; + RANGE_SEQ_IF seq_funcs= {NULL, quick_range_seq_init, quick_range_seq_next, 0, 0}; + + if (file->inited == handler::RND) + { + /* Handler could be left in this state by MRR */ + if (unlikely((error= file->ha_rnd_end()))) + DBUG_RETURN(error); + } + + if (in_ror_merged_scan) + head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap); + + if (file->inited == handler::NONE) + { + DBUG_EXECUTE_IF("bug14365043_2", + DBUG_SET("+d,ha_index_init_fail");); + if (unlikely((error= file->ha_index_init(index,1)))) + { + file->print_error(error, MYF(0)); + goto err; + } + } + + /* Allocate buffer if we need one but haven't allocated it yet */ + if (mrr_buf_size && !mrr_buf_desc) + { + buf_size= mrr_buf_size; + while (buf_size && !my_multi_malloc(key_memory_QUICK_RANGE_SELECT_mrr_buf_desc, + MYF(MY_WME), + &mrr_buf_desc, sizeof(*mrr_buf_desc), + &mrange_buff, buf_size, + NullS)) + { + /* Try to shrink the buffers until both are 0. */ + buf_size/= 2; + } + if (!mrr_buf_desc) + { + error= HA_ERR_OUT_OF_MEM; + goto err; + } + + /* Initialize the handler buffer. */ + mrr_buf_desc->buffer= mrange_buff; + mrr_buf_desc->buffer_end= mrange_buff + buf_size; + mrr_buf_desc->end_of_used_area= mrange_buff; + } + + if (!mrr_buf_desc) + empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL; + + error= file->multi_range_read_init(&seq_funcs, (void*)this, + (uint)ranges.elements, mrr_flags, + mrr_buf_desc? mrr_buf_desc: &empty_buf); +err: + /* Restore bitmaps set on entry */ + if (in_ror_merged_scan) + head->column_bitmaps_set_no_signal(save_read_set, save_write_set); + DBUG_RETURN(error); +} + + +/* + Get next possible record using quick-struct. + + SYNOPSIS + QUICK_RANGE_SELECT::get_next() + + NOTES + Record is read into table->record[0] + + RETURN + 0 Found row + HA_ERR_END_OF_FILE No (more) rows in range + # Error code +*/ + +int QUICK_RANGE_SELECT::get_next() +{ + range_id_t dummy; + int result; + DBUG_ENTER("QUICK_RANGE_SELECT::get_next"); + + if (!in_ror_merged_scan) + DBUG_RETURN(file->multi_range_read_next(&dummy)); + + MY_BITMAP * const save_read_set= head->read_set; + MY_BITMAP * const save_write_set= head->write_set; + /* + We don't need to signal the bitmap change as the bitmap is always the + same for this head->file + */ + head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap); + result= file->multi_range_read_next(&dummy); + head->column_bitmaps_set_no_signal(save_read_set, save_write_set); + DBUG_RETURN(result); +} + + +/* + Get the next record with a different prefix. + + @param prefix_length length of cur_prefix + @param group_key_parts The number of key parts in the group prefix + @param cur_prefix prefix of a key to be searched for + + Each subsequent call to the method retrieves the first record that has a + prefix with length prefix_length and which is different from cur_prefix, + such that the record with the new prefix is within the ranges described by + this->ranges. The record found is stored into the buffer pointed by + this->record. The method is useful for GROUP-BY queries with range + conditions to discover the prefix of the next group that satisfies the range + conditions. + + @todo + + This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both + methods should be unified into a more general one to reduce code + duplication. + + @retval 0 on success + @retval HA_ERR_END_OF_FILE if returned all keys + @retval other if some error occurred +*/ + +int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length, + uint group_key_parts, + uchar *cur_prefix) +{ + DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix"); + const key_part_map keypart_map= make_prev_keypart_map(group_key_parts); + + for (;;) + { + int result; + if (last_range) + { + /* Read the next record in the same range with prefix after cur_prefix. */ + DBUG_ASSERT(cur_prefix != NULL); + result= file->ha_index_read_map(record, cur_prefix, keypart_map, + HA_READ_AFTER_KEY); + if (result || last_range->max_keypart_map == 0) { + /* + Only return if actual failure occurred. For HA_ERR_KEY_NOT_FOUND + or HA_ERR_END_OF_FILE, we just want to continue to reach the next + set of ranges. It is possible for the storage engine to return + HA_ERR_KEY_NOT_FOUND/HA_ERR_END_OF_FILE even when there are more + keys if it respects the end range set by the read_range_first call + below. + */ + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) + DBUG_RETURN(result); + } else { + /* + For storage engines that don't respect end range, check if we've + moved past the current range. + */ + key_range previous_endpoint; + last_range->make_max_endpoint(&previous_endpoint, prefix_length, + keypart_map); + if (file->compare_key(&previous_endpoint) <= 0) + DBUG_RETURN(0); + } + } + + size_t count= ranges.elements - (size_t)(cur_range - (QUICK_RANGE**) ranges.buffer); + if (count == 0) + { + /* Ranges have already been used up before. None is left for read. */ + last_range= 0; + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + last_range= *(cur_range++); + + key_range start_key, end_key; + last_range->make_min_endpoint(&start_key, prefix_length, keypart_map); + last_range->make_max_endpoint(&end_key, prefix_length, keypart_map); + + result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0, + last_range->max_keypart_map ? &end_key : 0, + MY_TEST(last_range->flag & EQ_RANGE), + TRUE); + if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE)) + last_range= 0; // Stop searching + + if (result != HA_ERR_END_OF_FILE) + DBUG_RETURN(result); + last_range= 0; // No matching rows; go to next range + } +} + + +/* Get next for geometrical indexes */ + +int QUICK_RANGE_SELECT_GEOM::get_next() +{ + DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next"); + + for (;;) + { + int result; + if (last_range) + { + // Already read through key + result= file->ha_index_next_same(record, last_range->min_key, + last_range->min_length); + if (result != HA_ERR_END_OF_FILE) + DBUG_RETURN(result); + } + + size_t count= ranges.elements - (size_t)(cur_range - (QUICK_RANGE**) ranges.buffer); + if (count == 0) + { + /* Ranges have already been used up before. None is left for read. */ + last_range= 0; + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + last_range= *(cur_range++); + + result= file->ha_index_read_map(record, last_range->min_key, + last_range->min_keypart_map, + (ha_rkey_function)(last_range->flag ^ + GEOM_FLAG)); + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) + DBUG_RETURN(result); + last_range= 0; // Not found, to next range + } +} + + +/* + Check if current row will be retrieved by this QUICK_RANGE_SELECT + + NOTES + It is assumed that currently a scan is being done on another index + which reads all necessary parts of the index that is scanned by this + quick select. + The implementation does a binary search on sorted array of disjoint + ranges, without taking size of range into account. + + This function is used to filter out clustered PK scan rows in + index_merge quick select. + + RETURN + TRUE if current row will be retrieved by this quick select + FALSE if not +*/ + +bool QUICK_RANGE_SELECT::row_in_ranges() +{ + QUICK_RANGE *res; + size_t min= 0; + size_t max= ranges.elements - 1; + size_t mid= (max + min)/2; + + while (min != max) + { + if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid))) + { + /* current row value > mid->max */ + min= mid + 1; + } + else + max= mid; + mid= (min + max) / 2; + } + res= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid); + return (!cmp_next(res) && !cmp_prev(res)); +} + +/* + This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the + get_next() interface, but we have to hold a pointer to the original + QUICK_RANGE_SELECT because its data are used all over the place. What + should be done is to factor out the data that is needed into a base + class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC) + which handle the ranges and implement the get_next() function. But + for now, this seems to work right at least. + */ + +QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, + uint used_key_parts_arg) + :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges), + used_key_parts (used_key_parts_arg) +{ + QUICK_RANGE *r; + /* + Use default MRR implementation for reverse scans. No table engine + currently can do an MRR scan with output in reverse index order. + */ + mrr_buf_desc= NULL; + mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; + mrr_buf_size= 0; + + QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer; + QUICK_RANGE **end_range= pr + ranges.elements; + for (; pr!=end_range; pr++) + rev_ranges.push_front(*pr); + + /* Remove EQ_RANGE flag for keys that are not using the full key */ + for (r = rev_it++; r; r = rev_it++) + { + if ((r->flag & EQ_RANGE) && + head->key_info[index].key_length != r->max_length) + r->flag&= ~EQ_RANGE; + } + rev_it.rewind(); + q->dont_free=1; // Don't free shared mem +} + + +int QUICK_SELECT_DESC::get_next() +{ + DBUG_ENTER("QUICK_SELECT_DESC::get_next"); + + /* The max key is handled as follows: + * - if there is NO_MAX_RANGE, start at the end and move backwards + * - if it is an EQ_RANGE, which means that max key covers the entire + * key, go directly to the key and read through it (sorting backwards is + * same as sorting forwards) + * - if it is NEAR_MAX, go to the key or next, step back once, and + * move backwards + * - otherwise (not NEAR_MAX == include the key), go after the key, + * step back once, and move backwards + */ + + for (;;) + { + int result; + if (last_range) + { // Already read through key + result = ((last_range->flag & EQ_RANGE && + used_key_parts <= head->key_info[index].user_defined_key_parts) ? + file->ha_index_next_same(record, last_range->min_key, + last_range->min_length) : + file->ha_index_prev(record)); + if (!result) + { + if (cmp_prev(*rev_it.ref()) == 0) + DBUG_RETURN(0); + } + else if (result != HA_ERR_END_OF_FILE) + DBUG_RETURN(result); + } + + if (!(last_range= rev_it++)) + DBUG_RETURN(HA_ERR_END_OF_FILE); // All ranges used + + key_range start_key; + start_key.key= (const uchar*) last_range->min_key; + start_key.length= last_range->min_length; + start_key.flag= ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY : + (last_range->flag & EQ_RANGE) ? + HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); + start_key.keypart_map= last_range->min_keypart_map; + key_range end_key; + end_key.key= (const uchar*) last_range->max_key; + end_key.length= last_range->max_length; + end_key.flag= (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : + HA_READ_AFTER_KEY); + end_key.keypart_map= last_range->max_keypart_map; + result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &start_key, + (last_range->flag & NO_MAX_RANGE) ? NULL : &end_key); + if (result) + { + DBUG_RETURN(result); + } + + if (last_range->flag & NO_MAX_RANGE) // Read last record + { + int local_error; + if (unlikely((local_error= file->ha_index_last(record)))) + DBUG_RETURN(local_error); // Empty table + if (cmp_prev(last_range) == 0) + DBUG_RETURN(0); + last_range= 0; // No match; go to next range + continue; + } + + if (last_range->flag & EQ_RANGE && + used_key_parts <= head->key_info[index].user_defined_key_parts) + + { + result= file->ha_index_read_map(record, last_range->max_key, + last_range->max_keypart_map, + HA_READ_KEY_EXACT); + } + else + { + DBUG_ASSERT(last_range->flag & NEAR_MAX || + (last_range->flag & EQ_RANGE && + used_key_parts > head->key_info[index].user_defined_key_parts) || + range_reads_after_key(last_range)); + result= file->ha_index_read_map(record, last_range->max_key, + last_range->max_keypart_map, + ((last_range->flag & NEAR_MAX) ? + HA_READ_BEFORE_KEY : + HA_READ_PREFIX_LAST_OR_PREV)); + } + if (result) + { + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) + DBUG_RETURN(result); + last_range= 0; // Not found, to next range + continue; + } + if (cmp_prev(last_range) == 0) + { + if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE)) + last_range= 0; // Stop searching + DBUG_RETURN(0); // Found key is in range + } + last_range= 0; // To next range + } +} + + +/** + Create a compatible quick select with the result ordered in an opposite way + + @param used_key_parts_arg Number of used key parts + + @retval NULL in case of errors (OOM etc) + @retval pointer to a newly created QUICK_SELECT_DESC if success +*/ + +QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg) +{ + QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg); + if (new_quick == NULL) + { + delete new_quick; + return NULL; + } + return new_quick; +} + + +/* + Compare if found key is over max-value + Returns 0 if key <= range->max_key + TODO: Figure out why can't this function be as simple as cmp_prev(). +*/ + +int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg) +{ + if (range_arg->flag & NO_MAX_RANGE) + return 0; /* key can't be to large */ + + KEY_PART *key_part=key_parts; + uint store_length; + + for (uchar *key=range_arg->max_key, *end=key+range_arg->max_length; + key < end; + key+= store_length, key_part++) + { + int cmp; + bool reverse= MY_TEST(key_part->flag & HA_REVERSE_SORT); + store_length= key_part->store_length; + if (key_part->null_bit) + { + if (*key) + { + if (!key_part->field->is_null()) + return reverse ? 0 : 1; + continue; + } + else if (key_part->field->is_null()) + return reverse ? 1 : 0; + key++; // Skip null byte + store_length--; + } + if ((cmp=key_part->field->key_cmp(key, key_part->length)) < 0) + return reverse ? 1 : 0; + if (cmp > 0) + return reverse ? 0 : 1; + } + return (range_arg->flag & NEAR_MAX) ? 1 : 0; // Exact match +} + + +/* + Returns 0 if found key is inside range (found key >= range->min_key). +*/ + +int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg) +{ + int cmp; + if (range_arg->flag & NO_MIN_RANGE) + return 0; /* key can't be to small */ + + cmp= key_cmp(key_part_info, range_arg->min_key, + range_arg->min_length); + if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN))) + return 0; + return 1; // outside of range +} + + +/* + * TRUE if this range will require using HA_READ_AFTER_KEY + See comment in get_next() about this + */ + +bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg) +{ + return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) || + !(range_arg->flag & EQ_RANGE) || + head->key_info[index].key_length != range_arg->max_length) ? 1 : 0; +} + + +void QUICK_SELECT_I::add_key_name(String *str, bool *first) +{ + KEY *key_info= head->key_info + index; + + if (*first) + *first= FALSE; + else + str->append(','); + str->append(&key_info->name); +} + + +Explain_quick_select* QUICK_RANGE_SELECT::get_explain(MEM_ROOT *local_alloc) +{ + Explain_quick_select *res; + if ((res= new (local_alloc) Explain_quick_select(QS_TYPE_RANGE))) + res->range.set(local_alloc, &head->key_info[index], max_used_key_length); + return res; +} + + +Explain_quick_select* +QUICK_GROUP_MIN_MAX_SELECT::get_explain(MEM_ROOT *local_alloc) +{ + Explain_quick_select *res; + if ((res= new (local_alloc) Explain_quick_select(QS_TYPE_GROUP_MIN_MAX))) + res->range.set(local_alloc, &head->key_info[index], max_used_key_length); + return res; +} + + +Explain_quick_select* +QUICK_INDEX_SORT_SELECT::get_explain(MEM_ROOT *local_alloc) +{ + Explain_quick_select *res; + if (!(res= new (local_alloc) Explain_quick_select(get_type()))) + return NULL; + + QUICK_RANGE_SELECT *quick; + Explain_quick_select *child_explain; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if ((child_explain= quick->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + + if (pk_quick_select) + { + if ((child_explain= pk_quick_select->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + return res; +} + + +/* + Same as QUICK_INDEX_SORT_SELECT::get_explain(), but primary key is printed + first +*/ + +Explain_quick_select* +QUICK_INDEX_INTERSECT_SELECT::get_explain(MEM_ROOT *local_alloc) +{ + Explain_quick_select *res; + Explain_quick_select *child_explain; + + if (!(res= new (local_alloc) Explain_quick_select(get_type()))) + return NULL; + + if (pk_quick_select) + { + if ((child_explain= pk_quick_select->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if ((child_explain= quick->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + return res; +} + + +Explain_quick_select* +QUICK_ROR_INTERSECT_SELECT::get_explain(MEM_ROOT *local_alloc) +{ + Explain_quick_select *res; + Explain_quick_select *child_explain; + + if (!(res= new (local_alloc) Explain_quick_select(get_type()))) + return NULL; + + QUICK_SELECT_WITH_RECORD *qr; + List_iterator_fast it(quick_selects); + while ((qr= it++)) + { + if ((child_explain= qr->quick->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + + if (cpk_quick) + { + if ((child_explain= cpk_quick->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + return res; +} + + +Explain_quick_select* +QUICK_ROR_UNION_SELECT::get_explain(MEM_ROOT *local_alloc) +{ + Explain_quick_select *res; + Explain_quick_select *child_explain; + + if (!(res= new (local_alloc) Explain_quick_select(get_type()))) + return NULL; + + QUICK_SELECT_I *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if ((child_explain= quick->get_explain(local_alloc))) + res->children.push_back(child_explain); + else + return NULL; + } + + return res; +} + + +void QUICK_SELECT_I::add_key_and_length(String *key_names, + String *used_lengths, + bool *first) +{ + char buf[64]; + size_t length; + KEY *key_info= head->key_info + index; + + if (*first) + *first= FALSE; + else + { + key_names->append(','); + used_lengths->append(','); + } + key_names->append(&key_info->name); + length= longlong10_to_str(max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); +} + + +void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + bool first= TRUE; + + add_key_and_length(key_names, used_lengths, &first); +} + +void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + QUICK_RANGE_SELECT *quick; + bool first= TRUE; + + List_iterator_fast it(quick_selects); + + while ((quick= it++)) + { + quick->add_key_and_length(key_names, used_lengths, &first); + } + + if (pk_quick_select) + pk_quick_select->add_key_and_length(key_names, used_lengths, &first); +} + + +void QUICK_INDEX_INTERSECT_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + QUICK_RANGE_SELECT *quick; + bool first= TRUE; + + List_iterator_fast it(quick_selects); + + if (pk_quick_select) + pk_quick_select->add_key_and_length(key_names, used_lengths, &first); + + while ((quick= it++)) + { + quick->add_key_and_length(key_names, used_lengths, &first); + } +} + +void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + QUICK_SELECT_WITH_RECORD *qr; + bool first= TRUE; + + List_iterator_fast it(quick_selects); + + while ((qr= it++)) + { + qr->quick->add_key_and_length(key_names, used_lengths, &first); + } + if (cpk_quick) + cpk_quick->add_key_and_length(key_names, used_lengths, &first); +} + +void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + QUICK_SELECT_I *quick; + bool first= TRUE; + + List_iterator_fast it(quick_selects); + + while ((quick= it++)) + { + if (first) + first= FALSE; + else + { + used_lengths->append(','); + key_names->append(','); + } + quick->add_keys_and_lengths(key_names, used_lengths); + } +} + + +void QUICK_RANGE_SELECT::add_used_key_part_to_set() +{ + uint key_len; + KEY_PART *part= key_parts; + for (key_len=0; key_len < max_used_key_length; + key_len += (part++)->store_length) + { + /* + We have to use field_index instead of part->field + as for partial fields, part->field points to + a temporary field that is only part of the original + field. field_index always points to the original field + */ + Field *field= head->field[part->field->field_index]; + field->register_field_in_read_map(); + } +} + + +void QUICK_GROUP_MIN_MAX_SELECT::add_used_key_part_to_set() +{ + uint key_len; + KEY_PART_INFO *part= index_info->key_part; + for (key_len=0; key_len < max_used_key_length; + key_len += (part++)->store_length) + { + /* + We have to use field_index instead of part->field + as for partial fields, part->field points to + a temporary field that is only part of the original + field. field_index always points to the original field + */ + Field *field= head->field[part->field->field_index]; + field->register_field_in_read_map(); + } +} + + +void QUICK_ROR_INTERSECT_SELECT::add_used_key_part_to_set() +{ + List_iterator_fast it(quick_selects); + QUICK_SELECT_WITH_RECORD *quick; + while ((quick= it++)) + { + quick->quick->add_used_key_part_to_set(); + } +} + + +void QUICK_INDEX_SORT_SELECT::add_used_key_part_to_set() +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + quick->add_used_key_part_to_set(); + } + if (pk_quick_select) + pk_quick_select->add_used_key_part_to_set(); +} + + +void QUICK_ROR_UNION_SELECT::add_used_key_part_to_set() +{ + QUICK_SELECT_I *quick; + List_iterator_fast it(quick_selects); + + while ((quick= it++)) + { + quick->add_used_key_part_to_set(); + } +} + + +/******************************************************************************* +* Implementation of QUICK_GROUP_MIN_MAX_SELECT +*******************************************************************************/ + +static inline uint get_field_keypart(KEY *index, Field *field); +static bool get_sel_arg_for_keypart(Field *field, SEL_ARG *index_range_tree, + SEL_ARG **cur_range); +static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree, + KEY_PART_INFO *first_non_group_part, + KEY_PART_INFO *min_max_arg_part, + KEY_PART_INFO *last_part, THD *thd, + uchar *key_infix, uint *key_infix_len, + KEY_PART_INFO **first_non_infix_part); +static bool +check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item, + Field::imagetype image_type, + bool *has_min_max_fld, bool *has_other_fld); + +static void +cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, + uint group_key_parts, SEL_TREE *range_tree, + SEL_ARG *index_tree, ha_rows quick_prefix_records, + bool have_min, bool have_max, + double *read_cost, ha_rows *records); + + +/** + Test if this access method is applicable to a GROUP query with MIN/MAX + functions, and if so, construct a new TRP object. + + DESCRIPTION + Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT. + Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the + following conditions: + A) Table T has at least one compound index I of the form: + I = + B) Query conditions: + B0. Q is over a single table T. + B1. The attributes referenced by Q are a subset of the attributes of I. + B2. All attributes QA in Q can be divided into 3 overlapping groups: + - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is + referenced by any number of MIN and/or MAX functions if present. + - WA = {W_1, ..., W_p} - from the WHERE clause + - GA = - from the GROUP BY clause (if any) + = SA - if Q is a DISTINCT query (based on the + equivalence of DISTINCT and GROUP queries. + - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in + GROUP BY and not referenced by MIN/MAX functions. + with the following properties specified below. + B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not + applicable. + + SA1. There is at most one attribute in SA referenced by any number of + MIN and/or MAX functions which, which if present, is denoted as C. + SA2. The position of the C attribute in the index is after the last A_k. + SA3. The attribute C can be referenced in the WHERE clause only in + predicates of the forms: + - (C {< | <= | > | >= | =} const) + - (const {< | <= | > | >= | =} C) + - (C between const_i and const_j) + - C IS NULL + - C IS NOT NULL + - C != const (unless C is the primary key) + SA4. If Q has a GROUP BY clause, there are no other aggregate functions + except MIN and MAX. For queries with DISTINCT, aggregate functions + are allowed. + SA5. The select list in DISTINCT queries should not contain expressions. + SA6. Clustered index can not be used by GROUP_MIN_MAX quick select + for AGG_FUNC(DISTINCT ...) optimization because cursor position is + never stored after a unique key lookup in the clustered index and + furhter index_next/prev calls can not be used. So loose index scan + optimization can not be used in this case. + SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this + access method is not used. + For above queries MIN/MAX() aggregation has to be done at + nested_loops_join (end_send_group). But with current design MIN/MAX() + is always set as part of loose index scan. Because of this mismatch + MIN() and MAX() values will be set incorrectly. For such queries to + work we need a new interface for loose index scan. This new interface + should only fetch records with min and max values and let + end_send_group to do aggregation. Until then do not use + loose_index_scan. + GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if + G_i = A_j => i = j. + GA2. If Q has a DISTINCT clause, then there is a permutation of SA that + forms a prefix of I. This permutation is used as the GROUP clause + when the DISTINCT query is converted to a GROUP query. + GA3. The attributes in GA may participate in arbitrary predicates, divided + into two groups: + - RNG(G_1,...,G_q ; where q <= k) is a range condition over the + attributes of a prefix of GA + - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset + of GA. Since P is applied to only GROUP attributes it filters some + groups, and thus can be applied after the grouping. + GA4. There are no expressions among G_i, just direct column references. + NGA1.If in the index I there is a gap between the last GROUP attribute G_k, + and the MIN/MAX attribute C, then NGA must consist of exactly the + index attributes that constitute the gap. As a result there is a + permutation of NGA, BA=, that coincides with the gap + in the index. + NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of + equality conditions for all NG_i of the form (NG_i = const) or + (const = NG_i), such that each NG_i is referenced in exactly one + conjunct. Informally, the predicates provide constants to fill the + gap in the index. + NGA3.If BA <> {}, there can only be one range. TODO: This is a code + limitation and is not strictly needed. See BUG#15947433 + WA1. There are no other attributes in the WHERE clause except the ones + referenced in predicates RNG, PA, PC, EQ defined above. Therefore + WA is subset of (GA union NGA union C) for GA,NGA,C that pass the + above tests. By transitivity then it also follows that each WA_i + participates in the index I (if this was already tested for GA, NGA + and C). + WA2. If there is a predicate on C, then it must be in conjunction + to all predicates on all earlier keyparts in I. + + C) Overall query form: + SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)]) + FROM T + WHERE [RNG(A_1,...,A_p ; where p <= k)] + [AND EQ(B_1,...,B_m)] + [AND PC(C)] + [AND PA(A_i1,...,A_iq)] + GROUP BY A_1,...,A_k + [HAVING PH(A_1, ..., B_1,..., C)] + where EXPR(...) is an arbitrary expression over some or all SELECT fields, + or: + SELECT DISTINCT A_i1,...,A_ik + FROM T + WHERE [RNG(A_1,...,A_p ; where p <= k)] + [AND PA(A_i1,...,A_iq)]; + + NOTES + If the current query satisfies the conditions above, and if + (mem_root! = NULL), then the function constructs and returns a new TRP + object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT. + If (mem_root == NULL), then the function only tests whether the current + query satisfies the conditions above, and, if so, sets + is_applicable = TRUE. + + Queries with DISTINCT for which index access can be used are transformed + into equivalent group-by queries of the form: + + SELECT A_1,...,A_k FROM T + WHERE [RNG(A_1,...,A_p ; where p <= k)] + [AND PA(A_i1,...,A_iq)] + GROUP BY A_1,...,A_k; + + The group-by list is a permutation of the select attributes, according + to their order in the index. + + TODO + - What happens if the query groups by the MIN/MAX field, and there is no + other field as in: "select MY_MIN(a) from t1 group by a" ? + - We assume that the general correctness of the GROUP-BY query was checked + before this point. Is this correct, or do we have to check it completely? + - Lift the limitation in condition (B3), that is, make this access method + applicable to ROLLUP queries. + + @param param Parameter from test_quick_select + @param sel_tree Range tree generated by get_mm_tree + @param read_time Best read time so far of table or index scan time + @return table read plan + @retval NULL Loose index scan not applicable or mem_root == NULL + @retval !NULL Loose index scan table read plan +*/ + +static TRP_GROUP_MIN_MAX * +get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) +{ + THD *thd= param->thd; + JOIN *join= thd->lex->current_select->join; + TABLE *table= param->table; + bool have_min= FALSE; /* TRUE if there is a MIN function. */ + bool have_max= FALSE; /* TRUE if there is a MAX function. */ + Item_field *min_max_arg_item= NULL; // The argument of all MIN/MAX functions + KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */ + uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */ + KEY *index_info= NULL; /* The index chosen for data access. */ + uint index= 0; /* The id of the chosen index. */ + uint group_key_parts= 0; // Number of index key parts in the group prefix. + uint used_key_parts= 0; /* Number of index key parts used for access. */ + uchar key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/ + uint key_infix_len= 0; /* Length of key_infix. */ + TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */ + uint key_part_nr; + uint elements_in_group; + ORDER *tmp_group; + Item *item; + Item_field *item_field; + bool is_agg_distinct; + List agg_distinct_flds; + DBUG_ENTER("get_best_group_min_max"); + + Json_writer_object trace_group(thd, "group_index_range"); + const char* cause= NULL; + + /* Perform few 'cheap' tests whether this access method is applicable. */ + if (!join) /* This is not a select statement. */ + cause= "no join"; + else if (join->table_count != 1) /* The query must reference one table. */ + cause= "not single_table"; + else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */ + cause= "rollup"; + else if (table->s->keys == 0) /* There are no indexes to use. */ + cause= "no index"; + else if (join->conds && join->conds->used_tables() + & OUTER_REF_TABLE_BIT) /* Cannot execute with correlated conditions. */ + cause= "correlated conditions"; + + if (cause) + { + trace_group.add("chosen", false).add("cause", cause); + DBUG_RETURN(NULL); + } + + is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds); + + if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */ + (!join->select_distinct) && + !is_agg_distinct) + { + trace_group.add("chosen", false).add("cause","no group by or distinct"); + DBUG_RETURN(NULL); + } + /* Analyze the query in more detail. */ + + /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/ + List_iterator select_items_it(join->fields_list); + + if (join->sum_funcs[0]) + { + Item_sum *min_max_item; + Item_sum **func_ptr= join->sum_funcs; + while ((min_max_item= *(func_ptr++))) + { + if (min_max_item->sum_func() == Item_sum::MIN_FUNC) + have_min= TRUE; + else if (min_max_item->sum_func() == Item_sum::MAX_FUNC) + have_max= TRUE; + else if (is_agg_distinct && + (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC || + min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC || + min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC)) + continue; + else + { + trace_group.add("chosen", false) + .add("cause", "not applicable aggregate function"); + DBUG_RETURN(NULL); + } + + /* The argument of MIN/MAX. */ + Item *expr= min_max_item->get_arg(0)->real_item(); + if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */ + { + if (! min_max_arg_item) + min_max_arg_item= (Item_field*) expr; + else if (! min_max_arg_item->eq(expr, 1)) + { + trace_group.add("chosen", false) + .add("cause", "arguments different in min max function"); + DBUG_RETURN(NULL); + } + } + else + { + trace_group.add("chosen", false) + .add("cause", "no field item in min max function"); + DBUG_RETURN(NULL); + } + } + } + + /* Check (SA7). */ + if (is_agg_distinct && (have_max || have_min)) + { + trace_group.add("chosen", false) + .add("cause", "have both agg distinct and min max"); + DBUG_RETURN(NULL); + } + + /* Check (SA5). */ + if (join->select_distinct) + { + trace_group.add("distinct_query", true); + while ((item= select_items_it++)) + { + if (item->real_item()->type() != Item::FIELD_ITEM) + { + trace_group.add("chosen", false) + .add("cause", "distinct field is expression"); + DBUG_RETURN(NULL); + } + } + } + + /* Check (GA4) - that there are no expressions among the group attributes. */ + elements_in_group= 0; + for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next) + { + if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM) + { + trace_group.add("chosen", false) + .add("cause", "group field is expression"); + DBUG_RETURN(NULL); + } + elements_in_group++; + } + + /* + Check that table has at least one compound index such that the conditions + (GA1,GA2) are all TRUE. If there is more than one such index, select the + first one. Here we set the variables: group_prefix_len and index_info. + */ + /* Cost-related variables for the best index so far. */ + double best_read_cost= DBL_MAX; + ha_rows best_records= 0; + SEL_ARG *best_index_tree= NULL; + ha_rows best_quick_prefix_records= 0; + uint best_param_idx= 0; + + const uint pk= param->table->s->primary_key; + uint max_key_part; + SEL_ARG *cur_index_tree= NULL; + ha_rows cur_quick_prefix_records= 0; + + // We go through allowed indexes + Json_writer_array trace_indexes(thd, "potential_group_range_indexes"); + + for (uint cur_param_idx= 0; cur_param_idx < param->keys ; ++cur_param_idx) + { + const uint cur_index= param->real_keynr[cur_param_idx]; + KEY *const cur_index_info= &table->key_info[cur_index]; + + Json_writer_object trace_idx(thd); + trace_idx.add("index", cur_index_info->name); + + KEY_PART_INFO *cur_part; + KEY_PART_INFO *end_part; /* Last part for loops. */ + /* Last index part. */ + KEY_PART_INFO *last_part; + KEY_PART_INFO *first_non_group_part; + KEY_PART_INFO *first_non_infix_part; + uint key_parts; + uint key_infix_parts; + uint cur_group_key_parts= 0; + uint cur_group_prefix_len= 0; + double cur_read_cost; + ha_rows cur_records; + key_map used_key_parts_map; + uint cur_key_infix_len= 0; + uchar cur_key_infix[MAX_KEY_LENGTH]; + uint cur_used_key_parts; + + /* + Check (B1) - if current index is covering. + (was also: "Exclude UNIQUE indexes ..." but this was removed because + there are cases Loose Scan over a multi-part index is useful). + */ + if (!table->covering_keys.is_set(cur_index) || + !table->keys_in_use_for_group_by.is_set(cur_index)) + { + cause= "not covering"; + goto next_index; + } + + { + for (uint i= 0; i < table->actual_n_key_parts(cur_index_info); i++) + { + if (cur_index_info->key_part[i].key_part_flag & HA_REVERSE_SORT) + { + cause="Reverse-ordered (not supported yet)"; + goto next_index; + } + } + } + + /* + This function is called on the precondition that the index is covering. + Therefore if the GROUP BY list contains more elements than the index, + these are duplicates. The GROUP BY list cannot be a prefix of the index. + */ + if (elements_in_group > table->actual_n_key_parts(cur_index_info)) + { + cause= "group key parts greater than index key parts"; + goto next_index; + } + + /* + Unless extended keys can be used for cur_index: + If the current storage manager is such that it appends the primary key to + each index, then the above condition is insufficient to check if the + index is covering. In such cases it may happen that some fields are + covered by the PK index, but not by the current index. Since we can't + use the concatenation of both indexes for index lookup, such an index + does not qualify as covering in our case. If this is the case, below + we check that all query fields are indeed covered by 'cur_index'. + */ + if (cur_index_info->user_defined_key_parts == table->actual_n_key_parts(cur_index_info) + && pk < MAX_KEY && cur_index != pk && + (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX)) + { + /* For each table field */ + for (uint i= 0; i < table->s->fields; i++) + { + Field *cur_field= table->field[i]; + /* + If the field is used in the current query ensure that it's + part of 'cur_index' + */ + if (bitmap_is_set(table->read_set, cur_field->field_index) && + !cur_field->part_of_key_not_clustered.is_set(cur_index)) + { + cause= "not covering"; + goto next_index; // Field was not part of key + } + } + } + + trace_idx.add("covering", true); + + max_key_part= 0; + used_key_parts_map.clear_all(); + + /* + Check (GA1) for GROUP BY queries. + */ + if (join->group_list) + { + cur_part= cur_index_info->key_part; + end_part= cur_part + table->actual_n_key_parts(cur_index_info); + /* Iterate in parallel over the GROUP list and the index parts. */ + for (tmp_group= join->group_list; tmp_group && (cur_part != end_part); + tmp_group= tmp_group->next, cur_part++) + { + /* + TODO: + tmp_group::item is an array of Item, is it OK to consider only the + first Item? If so, then why? What is the array for? + */ + /* Above we already checked that all group items are fields. */ + DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM); + Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item(); + if (group_field->field->eq(cur_part->field)) + { + cur_group_prefix_len+= cur_part->store_length; + ++cur_group_key_parts; + max_key_part= (uint)(cur_part - cur_index_info->key_part) + 1; + used_key_parts_map.set_bit(max_key_part); + } + else + { + cause= "group attribute not prefix in index"; + goto next_index; + } + } + } + /* + Check (GA2) if this is a DISTINCT query. + If GA2, then Store a new ORDER object in group_fields_array at the + position of the key part of item_field->field. Thus we get the ORDER + objects for each field ordered as the corresponding key parts. + Later group_fields_array of ORDER objects is used to convert the query + to a GROUP query. + */ + if ((!join->group && join->select_distinct) || + is_agg_distinct) + { + if (!is_agg_distinct) + { + select_items_it.rewind(); + } + + List_iterator agg_distinct_flds_it (agg_distinct_flds); + while (NULL != (item = (is_agg_distinct ? + (Item *) agg_distinct_flds_it++ : select_items_it++))) + { + /* (SA5) already checked above. */ + item_field= (Item_field*) item->real_item(); + DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM); + + /* not doing loose index scan for derived tables */ + if (!item_field->field) + { + cause= "derived table"; + goto next_index; + } + + /* Find the order of the key part in the index. */ + key_part_nr= get_field_keypart(cur_index_info, item_field->field); + /* + Check if this attribute was already present in the select list. + If it was present, then its corresponding key part was alredy used. + */ + if (used_key_parts_map.is_set(key_part_nr)) + continue; + if (key_part_nr < 1 || + (!is_agg_distinct && key_part_nr > join->fields_list.elements)) + { + cause= "select attribute not prefix in index"; + goto next_index; + } + cur_part= cur_index_info->key_part + key_part_nr - 1; + cur_group_prefix_len+= cur_part->store_length; + used_key_parts_map.set_bit(key_part_nr); + ++cur_group_key_parts; + max_key_part= MY_MAX(max_key_part,key_part_nr); + } + /* + Check that used key parts forms a prefix of the index. + To check this we compare bits in all_parts and cur_parts. + all_parts have all bits set from 0 to (max_key_part-1). + cur_parts have bits set for only used keyparts. + */ + ulonglong all_parts, cur_parts; + all_parts= (1ULL << max_key_part) - 1; + cur_parts= used_key_parts_map.to_ulonglong() >> 1; + if (all_parts != cur_parts) + goto next_index; + } + + /* Check (SA2). */ + if (min_max_arg_item) + { + key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field); + if (key_part_nr <= cur_group_key_parts) + { + cause= "aggregate column not suffix in idx"; + goto next_index; + } + min_max_arg_part= cur_index_info->key_part + key_part_nr - 1; + } + + /* + Aplly a heuristic: there is no point to use loose index scan when we're + using the whole unique index. + */ + if (cur_index_info->flags & HA_NOSAME && + cur_group_key_parts == cur_index_info->user_defined_key_parts) + { + cause= "using unique index"; + goto next_index; + } + + /* + Check (NGA1, NGA2) and extract a sequence of constants to be used as part + of all search keys. + */ + + /* + If there is MIN/MAX, each keypart between the last group part and the + MIN/MAX part must participate in one equality with constants, and all + keyparts after the MIN/MAX part must not be referenced in the query. + + If there is no MIN/MAX, the keyparts after the last group part can be + referenced only in equalities with constants, and the referenced keyparts + must form a sequence without any gaps that starts immediately after the + last group keypart. + */ + key_parts= table->actual_n_key_parts(cur_index_info); + last_part= cur_index_info->key_part + key_parts; + first_non_group_part= (cur_group_key_parts < key_parts) ? + cur_index_info->key_part + cur_group_key_parts : + NULL; + first_non_infix_part= min_max_arg_part ? + (min_max_arg_part < last_part) ? + min_max_arg_part : + NULL : + NULL; + if (first_non_group_part && + (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0))) + { + if (tree) + { + SEL_ARG *index_range_tree= tree->keys[cur_param_idx]; + if (!get_constant_key_infix(cur_index_info, index_range_tree, + first_non_group_part, min_max_arg_part, + last_part, thd, cur_key_infix, + &cur_key_infix_len, + &first_non_infix_part)) + { + cause= "nonconst equality gap attribute"; + goto next_index; + } + } + else if (min_max_arg_part && + (min_max_arg_part - first_non_group_part > 0)) + { + /* + There is a gap but no range tree, thus no predicates at all for the + non-group keyparts. + */ + cause= "no nongroup keypart predicate"; + goto next_index; + } + else if (first_non_group_part && join->conds) + { + /* + If there is no MIN/MAX function in the query, but some index + key part is referenced in the WHERE clause, then this index + cannot be used because the WHERE condition over the keypart's + field cannot be 'pushed' to the index (because there is no + range 'tree'), and the WHERE clause must be evaluated before + GROUP BY/DISTINCT. + */ + /* + Store the first and last keyparts that need to be analyzed + into one array that can be passed as parameter. + */ + KEY_PART_INFO *key_part_range[2]; + key_part_range[0]= first_non_group_part; + key_part_range[1]= last_part; + + /* Check if cur_part is referenced in the WHERE clause. */ + if (join->conds->walk(&Item::find_item_in_field_list_processor, true, + key_part_range)) + { + cause= "keypart reference from where clause"; + goto next_index; + } + } + } + + /* + Test (WA1) partially - that no other keypart after the last infix part is + referenced in the query. + */ + if (first_non_infix_part) + { + cur_part= first_non_infix_part + + (min_max_arg_part && (min_max_arg_part < last_part)); + for (; cur_part != last_part; cur_part++) + { + if (bitmap_is_set(table->read_set, cur_part->field->field_index)) + { + cause= "keypart after infix in query"; + goto next_index; + } + } + } + + /** + Test WA2:If there are conditions on a column C participating in + MIN/MAX, those conditions must be conjunctions to all earlier + keyparts. Otherwise, Loose Index Scan cannot be used. + */ + if (tree && min_max_arg_item) + { + SEL_ARG *index_range_tree= tree->keys[cur_param_idx]; + SEL_ARG *cur_range= NULL; + if (get_sel_arg_for_keypart(min_max_arg_part->field, + index_range_tree, &cur_range) || + (cur_range && cur_range->type != SEL_ARG::KEY_RANGE)) + { + cause= "minmax keypart in disjunctive query"; + goto next_index; + } + } + + /* If we got to this point, cur_index_info passes the test. */ + key_infix_parts= cur_key_infix_len ? (uint) + (first_non_infix_part - first_non_group_part) : 0; + cur_used_key_parts= cur_group_key_parts + key_infix_parts; + + /* Compute the cost of using this index. */ + if (tree) + { + if ((cur_index_tree= tree->keys[cur_param_idx])) + { + cur_quick_prefix_records= param->quick_rows[cur_index]; + if (unlikely(cur_index_tree && thd->trace_started())) + { + Json_writer_array trace_range(thd, "ranges"); + trace_ranges(&trace_range, param, cur_param_idx, + cur_index_tree, cur_index_info->key_part); + } + } + else + cur_quick_prefix_records= HA_POS_ERROR; + } + cost_group_min_max(table, cur_index_info, cur_used_key_parts, + cur_group_key_parts, tree, cur_index_tree, + cur_quick_prefix_records, have_min, have_max, + &cur_read_cost, &cur_records); + /* + If cur_read_cost is lower than best_read_cost use cur_index. + Do not compare doubles directly because they may have different + representations (64 vs. 80 bits). + */ + trace_idx.add("rows", cur_records).add("cost", cur_read_cost); + + if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost)) + { + index_info= cur_index_info; + index= cur_index; + best_read_cost= cur_read_cost; + best_records= cur_records; + best_index_tree= cur_index_tree; + best_quick_prefix_records= cur_quick_prefix_records; + best_param_idx= cur_param_idx; + group_key_parts= cur_group_key_parts; + group_prefix_len= cur_group_prefix_len; + key_infix_len= cur_key_infix_len; + if (key_infix_len) + memcpy (key_infix, cur_key_infix, sizeof (key_infix)); + used_key_parts= cur_used_key_parts; + } + + next_index: + if (cause) + { + trace_idx.add("usable", false).add("cause", cause); + cause= NULL; + } + } + + trace_indexes.end(); + + if (!index_info) /* No usable index found. */ + DBUG_RETURN(NULL); + + /* Check (SA3) for the where clause. */ + bool has_min_max_fld= false, has_other_fld= false; + if (join->conds && min_max_arg_item && + !check_group_min_max_predicates(join->conds, min_max_arg_item, + (index_info->flags & HA_SPATIAL) ? + Field::itMBR : Field::itRAW, + &has_min_max_fld, &has_other_fld)) + { + trace_group.add("usable", false) + .add("cause", "unsupported predicate on agg attribute"); + DBUG_RETURN(NULL); + } + + /* + Check (SA6) if clustered key is used + */ + if (is_agg_distinct && table->file->is_clustering_key(index)) + { + trace_group.add("usable", false) + .add("cause", "index is clustered"); + DBUG_RETURN(NULL); + } + + /* The query passes all tests, so construct a new TRP object. */ + read_plan= new (param->mem_root) + TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct, + min_max_arg_part, + group_prefix_len, used_key_parts, + group_key_parts, index_info, index, + key_infix_len, + (key_infix_len > 0) ? key_infix : NULL, + tree, best_index_tree, best_param_idx, + best_quick_prefix_records); + if (read_plan) + { + if (tree && read_plan->quick_prefix_records == 0) + DBUG_RETURN(NULL); + + read_plan->read_cost= best_read_cost; + read_plan->records= best_records; + if (read_time < best_read_cost && is_agg_distinct) + { + trace_group.add("index_scan", true); + read_plan->read_cost= 0; + read_plan->use_index_scan(); + } + + DBUG_PRINT("info", + ("Returning group min/max plan: cost: %g, records: %lu", + read_plan->read_cost, (ulong) read_plan->records)); + } + + DBUG_RETURN(read_plan); +} + + +/* + Check that the MIN/MAX attribute participates only in range predicates + with constants. + + SYNOPSIS + check_group_min_max_predicates() + cond [in] the expression tree being analyzed + min_max_arg [in] the field referenced by the MIN/MAX function(s) + image_type [in] + has_min_max_arg [out] true if the subtree being analyzed references + min_max_arg + has_other_arg [out] true if the subtree being analyzed references a + column other min_max_arg + + DESCRIPTION + The function walks recursively over the cond tree representing a WHERE + clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX + aggregate function, it is referenced only by one of the following + predicates $FUNC$: + {=, !=, <, <=, >, >=, between, is [not] null, multiple equal}. + In addition the function checks that the WHERE condition is equivalent to + "cond1 AND cond2" where : + cond1 - does not use min_max_column at all. + cond2 - is an AND/OR tree with leaves in form + "$FUNC$(min_max_column[, const])". + + RETURN + TRUE if cond passes the test + FALSE o/w +*/ + +static bool +check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item, + Field::imagetype image_type, + bool *has_min_max_arg, bool *has_other_arg) +{ + DBUG_ENTER("check_group_min_max_predicates"); + DBUG_ASSERT(cond && min_max_arg_item); + + cond= cond->real_item(); + Item::Type cond_type= cond->real_type(); + if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */ + { + DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name())); + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + Item *and_or_arg; + Item_func::Functype func_type= ((Item_cond*) cond)->functype(); + bool has_min_max= false, has_other= false; + while ((and_or_arg= li++)) + { + /* + The WHERE clause doesn't pass the condition if: + (1) any subtree doesn't pass the condition or + (2) the subtree passes the test, but it is an OR and it references both + the min/max argument and other columns. + */ + if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item, //1 + image_type, + &has_min_max, &has_other) || + (func_type == Item_func::COND_OR_FUNC && has_min_max && has_other))//2 + DBUG_RETURN(FALSE); + } + *has_min_max_arg= has_min_max || *has_min_max_arg; + *has_other_arg= has_other || *has_other_arg; + DBUG_RETURN(TRUE); + } + + /* + Disallow loose index scan if the MIN/MAX argument field is referenced by + a subquery in the WHERE clause. + */ + + if (unlikely(cond_type == Item::SUBSELECT_ITEM)) + { + Item_subselect *subs_cond= (Item_subselect*) cond; + if (subs_cond->is_correlated) + { + DBUG_ASSERT(subs_cond->upper_refs.elements > 0); + List_iterator_fast + li(subs_cond->upper_refs); + Item_subselect::Ref_to_outside *dep; + while ((dep= li++)) + { + if (dep->item->eq(min_max_arg_item, FALSE)) + DBUG_RETURN(FALSE); + } + } + DBUG_RETURN(TRUE); + } + /* + Subquery with IS [NOT] NULL + TODO: Look into the cache_item and optimize it like we do for + subselect's above + */ + if (unlikely(cond_type == Item::CACHE_ITEM)) + DBUG_RETURN(cond->const_item()); + + /* + Condition of the form 'field' is equivalent to 'field <> 0' and thus + satisfies the SA3 condition. + */ + if (cond_type == Item::FIELD_ITEM) + { + DBUG_PRINT("info", ("Analyzing: %s", cond->full_name())); + if (min_max_arg_item->eq((Item_field*)cond, 1)) + *has_min_max_arg= true; + else + *has_other_arg= true; + DBUG_RETURN(TRUE); + } + + /* We presume that at this point there are no other Items than functions. */ + DBUG_ASSERT(cond_type == Item::FUNC_ITEM); + if (unlikely(cond_type != Item::FUNC_ITEM)) /* Safety */ + DBUG_RETURN(FALSE); + + /* Test if cond references only group-by or non-group fields. */ + Item_func *pred= (Item_func*) cond; + Item_func::Functype pred_type= pred->functype(); + DBUG_PRINT("info", ("Analyzing: %s", pred->func_name())); + if (pred_type == Item_func::MULT_EQUAL_FUNC) + { + /* + Check that each field in a multiple equality is either a constant or + it is a reference to the min/max argument, or it doesn't contain the + min/max argument at all. + */ + Item_equal_fields_iterator eq_it(*((Item_equal*)pred)); + Item *eq_item; + bool has_min_max= false, has_other= false; + while ((eq_item= eq_it++)) + { + if (min_max_arg_item->eq(eq_item->real_item(), 1)) + has_min_max= true; + else + has_other= true; + } + *has_min_max_arg= has_min_max || *has_min_max_arg; + *has_other_arg= has_other || *has_other_arg; + DBUG_RETURN(!(has_min_max && has_other)); + } + + Item **arguments= pred->arguments(); + Item *cur_arg; + bool has_min_max= false, has_other= false; + for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++) + { + cur_arg= arguments[arg_idx]->real_item(); + DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name())); + if (cur_arg->type() == Item::FIELD_ITEM) + { + if (min_max_arg_item->eq(cur_arg, 1)) + { + has_min_max= true; + /* + If pred references the MIN/MAX argument, check whether pred is a range + condition that compares the MIN/MAX argument with a constant. + */ + if (pred_type != Item_func::EQUAL_FUNC && + pred_type != Item_func::LT_FUNC && + pred_type != Item_func::LE_FUNC && + pred_type != Item_func::GT_FUNC && + pred_type != Item_func::GE_FUNC && + pred_type != Item_func::BETWEEN && + pred_type != Item_func::ISNULL_FUNC && + pred_type != Item_func::ISNOTNULL_FUNC && + pred_type != Item_func::EQ_FUNC && + pred_type != Item_func::NE_FUNC) + DBUG_RETURN(FALSE); + + /* Check that pred compares min_max_arg_item with a constant. */ + Item *args[3]; + bzero(args, 3 * sizeof(Item*)); + bool inv; + /* Test if this is a comparison of a field and a constant. */ + if (!simple_pred(pred, args, &inv)) + DBUG_RETURN(FALSE); + + /* + Follow the logic in Item_func_ne::get_func_mm_tree(): condition + in form "tbl.primary_key <> const" is not used to produce intervals. + + If the condition doesn't have an equivalent interval, this means we + fail LooseScan's condition SA3. Return FALSE to indicate this. + */ + if (pred_type == Item_func::NE_FUNC && + is_field_an_unique_index(min_max_arg_item->field)) + DBUG_RETURN(FALSE); + + if (args[0] && args[1]) // this is a binary function or BETWEEN + { + DBUG_ASSERT(pred->fixed_type_handler()); + DBUG_ASSERT(pred->fixed_type_handler()->is_bool_type()); + Item_bool_func *bool_func= (Item_bool_func*) pred; + Field *field= min_max_arg_item->field; + if (!args[2]) // this is a binary function + { + if (field->can_optimize_group_min_max(bool_func, args[1]) != + Data_type_compatibility::OK) + DBUG_RETURN(FALSE); + } + else // this is BETWEEN + { + if (field->can_optimize_group_min_max(bool_func, args[1]) != + Data_type_compatibility::OK || + field->can_optimize_group_min_max(bool_func, args[2]) != + Data_type_compatibility::OK) + DBUG_RETURN(FALSE); + } + } + } + else + has_other= true; + } + else if (cur_arg->type() == Item::FUNC_ITEM) + { + if (!check_group_min_max_predicates(cur_arg, min_max_arg_item, image_type, + &has_min_max, &has_other)) + DBUG_RETURN(FALSE); + } + else if (cur_arg->can_eval_in_optimize()) + { + /* + For predicates of the form "const OP expr" we also have to check 'expr' + to make a decision. + */ + continue; + } + else + DBUG_RETURN(FALSE); + if(has_min_max && has_other) + DBUG_RETURN(FALSE); + } + *has_min_max_arg= has_min_max || *has_min_max_arg; + *has_other_arg= has_other || *has_other_arg; + + DBUG_RETURN(TRUE); +} + + +/* + Get the SEL_ARG tree 'tree' for the keypart covering 'field', if + any. 'tree' must be a unique conjunction to ALL predicates in earlier + keyparts of 'keypart_tree'. + + E.g., if 'keypart_tree' is for a composite index (kp1,kp2) and kp2 + covers 'field', all these conditions satisfies the requirement: + + 1. "(kp1=2 OR kp1=3) AND kp2=10" => returns "kp2=10" + 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=10)" => returns "kp2=10" + 3. "(kp1=2 AND (kp2=10 OR kp2=11)) OR (kp1=3 AND (kp2=10 OR kp2=11))" + => returns "kp2=10 OR kp2=11" + + whereas these do not + 1. "(kp1=2 AND kp2=10) OR kp1=3" + 2. "(kp1=2 AND kp2=10) OR (kp1=3 AND kp2=11)" + 3. "(kp1=2 AND kp2=10) OR (kp1=3 AND (kp2=10 OR kp2=11))" + + This function effectively tests requirement WA2. In combination with + a test that the returned tree has no more than one range it is also + a test of NGA3. + + @param[in] field The field we want the SEL_ARG tree for + @param[in] keypart_tree Root node of the SEL_ARG* tree for the index + @param[out] cur_range The SEL_ARG tree, if any, for the keypart + covering field 'keypart_field' + @retval true 'keypart_tree' contained a predicate for 'field' that + is not conjunction to all predicates on earlier keyparts + @retval false otherwise +*/ + +static bool +get_sel_arg_for_keypart(Field *field, + SEL_ARG *keypart_tree, + SEL_ARG **cur_range) +{ + if (keypart_tree == NULL) + return false; + if (keypart_tree->field->eq(field)) + { + *cur_range= keypart_tree; + return false; + } + + SEL_ARG *tree_first_range= NULL; + SEL_ARG *first_kp= keypart_tree->first(); + + for (SEL_ARG *cur_kp= first_kp; cur_kp; cur_kp= cur_kp->next) + { + SEL_ARG *curr_tree= NULL; + if (cur_kp->next_key_part) + { + if (get_sel_arg_for_keypart(field, + cur_kp->next_key_part, + &curr_tree)) + return true; + } + /* + Check if the SEL_ARG tree for 'field' is identical for all ranges in + 'keypart_tree + */ + if (cur_kp == first_kp) + tree_first_range= curr_tree; + else if (!all_same(tree_first_range, curr_tree)) + return true; + } + *cur_range= tree_first_range; + return false; +} + +/* + Extract a sequence of constants from a conjunction of equality predicates. + + SYNOPSIS + get_constant_key_infix() + index_info [in] Descriptor of the chosen index. + index_range_tree [in] Range tree for the chosen index + first_non_group_part [in] First index part after group attribute parts + min_max_arg_part [in] The keypart of the MIN/MAX argument if any + last_part [in] Last keypart of the index + thd [in] Current thread + key_infix [out] Infix of constants to be used for index lookup + key_infix_len [out] Length of the infix + first_non_infix_part [out] The first keypart after the infix (if any) + + DESCRIPTION + Test conditions (NGA1, NGA2, NGA3) from get_best_group_min_max(). Namely, + for each keypart field NG_i not in GROUP-BY, check that there is exactly one + constant equality predicate among conds with the form (NG_i = const_ci) or + (const_ci = NG_i).. In addition, there can only be one range when there is + such a gap. + Thus all the NGF_i attributes must fill the 'gap' between the last group-by + attribute and the MIN/MAX attribute in the index (if present). Also ensure + that there is only a single range on NGF_i (NGA3). If these + conditions hold, copy each constant from its corresponding predicate into + key_infix, in the order its NG_i attribute appears in the index, and update + key_infix_len with the total length of the key parts in key_infix. + + RETURN + TRUE if the index passes the test + FALSE o/w +*/ +static bool +get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree, + KEY_PART_INFO *first_non_group_part, + KEY_PART_INFO *min_max_arg_part, + KEY_PART_INFO *last_part, THD *thd, + uchar *key_infix, uint *key_infix_len, + KEY_PART_INFO **first_non_infix_part) +{ + KEY_PART_INFO *cur_part; + /* End part for the first loop below. */ + KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part; + + *key_infix_len= 0; + uchar *key_ptr= key_infix; + for (cur_part= first_non_group_part; cur_part != end_part; cur_part++) + { + SEL_ARG *cur_range= NULL; + /* + Check NGA3: + 1. get_sel_arg_for_keypart gets the range tree for the 'field' and also + checks for a unique conjunction of this tree with all the predicates + on the earlier keyparts in the index. + 2. Check for multiple ranges on the found keypart tree. + + We assume that index_range_tree points to the leftmost keypart in + the index. + */ + if (get_sel_arg_for_keypart(cur_part->field, index_range_tree, + &cur_range)) + return false; + + if (cur_range && cur_range->elements > 1) + return false; + + if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE) + { + if (min_max_arg_part) + return false; /* The current keypart has no range predicates at all. */ + else + { + *first_non_infix_part= cur_part; + return true; + } + } + + if ((cur_range->min_flag & NO_MIN_RANGE) || + (cur_range->max_flag & NO_MAX_RANGE) || + (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX)) + return false; + + uint field_length= cur_part->store_length; + if (cur_range->maybe_null && + cur_range->min_value[0] && cur_range->max_value[0]) + { + /* + cur_range specifies 'IS NULL'. In this case the argument points + to a "null value" (is_null_string) that may not always be long + enough for a direct memcpy to a field. + */ + DBUG_ASSERT (field_length > 0); + *key_ptr= 1; + bzero(key_ptr+1,field_length-1); + key_ptr+= field_length; + *key_infix_len+= field_length; + } + else if (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0) + { /* cur_range specifies an equality condition. */ + memcpy(key_ptr, cur_range->min_value, field_length); + key_ptr+= field_length; + *key_infix_len+= field_length; + } + else + return false; + } + + if (!min_max_arg_part && (cur_part == last_part)) + *first_non_infix_part= last_part; + + return TRUE; +} + + +/* + Find the key part referenced by a field. + + SYNOPSIS + get_field_keypart() + index descriptor of an index + field field that possibly references some key part in index + + NOTES + The return value can be used to get a KEY_PART_INFO pointer by + part= index->key_part + get_field_keypart(...) - 1; + + RETURN + Positive number which is the consecutive number of the key part, or + 0 if field does not reference any index field. +*/ + +static inline uint +get_field_keypart(KEY *index, Field *field) +{ + KEY_PART_INFO *part, *end; + + for (part= index->key_part, + end= part + field->table->actual_n_key_parts(index); + part < end; part++) + { + if (field->eq(part->field)) + return (uint)(part - index->key_part + 1); + } + return 0; +} + + +/* + Compute the cost of a quick_group_min_max_select for a particular index. + + SYNOPSIS + cost_group_min_max() + table [in] The table being accessed + index_info [in] The index used to access the table + used_key_parts [in] Number of key parts used to access the index + group_key_parts [in] Number of index key parts in the group prefix + range_tree [in] Tree of ranges for all indexes + index_tree [in] The range tree for the current index + quick_prefix_records [in] Number of records retrieved by the internally + used quick range select if any + have_min [in] True if there is a MIN function + have_max [in] True if there is a MAX function + read_cost [out] The cost to retrieve rows via this quick select + records [out] The number of rows retrieved + + DESCRIPTION + This method computes the access cost of a TRP_GROUP_MIN_MAX instance and + the number of rows returned. + + NOTES + The cost computation distinguishes several cases: + 1) No equality predicates over non-group attributes (thus no key_infix). + If groups are bigger than blocks on the average, then we assume that it + is very unlikely that block ends are aligned with group ends, thus even + if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX + keys, except for the first MIN and the last MAX keys, will be in the + same block. If groups are smaller than blocks, then we are going to + read all blocks. + 2) There are equality predicates over non-group attributes. + In this case the group prefix is extended by additional constants, and + as a result the min/max values are inside sub-groups of the original + groups. The number of blocks that will be read depends on whether the + ends of these sub-groups will be contained in the same or in different + blocks. We compute the probability for the two ends of a subgroup to be + in two different blocks as the ratio of: + - the number of positions of the left-end of a subgroup inside a group, + such that the right end of the subgroup is past the end of the buffer + containing the left-end, and + - the total number of possible positions for the left-end of the + subgroup, which is the number of keys in the containing group. + We assume it is very unlikely that two ends of subsequent subgroups are + in the same block. + 3) The are range predicates over the group attributes. + Then some groups may be filtered by the range predicates. We use the + selectivity of the range predicates to decide how many groups will be + filtered. + + TODO + - Take into account the optional range predicates over the MIN/MAX + argument. + - Check if we have a PK index and we use all cols - then each key is a + group, and it will be better to use an index scan. + + RETURN + None +*/ + +void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, + uint group_key_parts, SEL_TREE *range_tree, + SEL_ARG *index_tree, ha_rows quick_prefix_records, + bool have_min, bool have_max, + double *read_cost, ha_rows *records) +{ + ha_rows table_records; + ha_rows num_groups; + ha_rows num_blocks; + uint keys_per_block; + ha_rows keys_per_group; + ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */ + /* formed by a key infix. */ + double p_overlap; /* Probability that a sub-group overlaps two blocks. */ + double quick_prefix_selectivity; + double io_cost; + DBUG_ENTER("cost_group_min_max"); + + table_records= table->stat_records(); + /* Assume block is 75 % full */ + keys_per_block= (uint) (table->file->stats.block_size * 3 / 4 / + (index_info->key_length + table->file->ref_length) + + 1); + num_blocks= (ha_rows)(table_records / keys_per_block) + 1; + + /* Compute the number of keys in a group. */ + if (!group_key_parts) + { + /* Summary over the whole table */ + keys_per_group= table_records; + } + else + { + keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts - + 1); + } + + if (keys_per_group == 0) /* If there is no statistics try to guess */ + /* each group contains 10% of all records */ + keys_per_group= (table_records / 10) + 1; + num_groups= (table_records / keys_per_group) + 1; + + /* Apply the selectivity of the quick select for group prefixes. */ + if (range_tree && (quick_prefix_records != HA_POS_ERROR)) + { + quick_prefix_selectivity= (double) quick_prefix_records / + (double) table_records; + num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity); + set_if_bigger(num_groups, 1); + } + + if (used_key_parts > group_key_parts) + { /* + Compute the probability that two ends of a subgroup are inside + different blocks. + */ + keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts - 1); + if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */ + p_overlap= 1.0; /* a block, it will overlap at least two blocks. */ + else + { + double blocks_per_group= (double) num_blocks / (double) num_groups; + p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group; + p_overlap= MY_MIN(p_overlap, 1.0); + } + io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks); + } + else + io_cost= (keys_per_group > keys_per_block) ? + (have_min && have_max) ? (double) (num_groups + 1) : + (double) num_groups : + (double) num_blocks; + + /* + CPU cost must be comparable to that of an index scan as computed + in SQL_SELECT::test_quick_select(). When the groups are small, + e.g. for a unique index, using index scan will be cheaper since it + reads the next record without having to re-position to it on every + group. To make the CPU cost reflect this, we estimate the CPU cost + as the sum of: + 1. Cost for evaluating the condition (similarly as for index scan). + 2. Cost for navigating the index structure (assuming a b-tree). + Note: We only add the cost for one comparision per block. For a + b-tree the number of comparisons will be larger. + TODO: This cost should be provided by the storage engine. + */ + const double tree_traversal_cost= + ceil(log(static_cast(table_records))/ + log(static_cast(keys_per_block))) * + 1/(2*TIME_FOR_COMPARE); + + const double cpu_cost= num_groups * + (tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX); + + *read_cost= io_cost + cpu_cost; + *records= num_groups; + + DBUG_PRINT("info", + ("table rows: %lu keys/block: %u keys/group: %lu " + "result rows: %lu blocks: %lu", + (ulong) table_records, keys_per_block, (ulong) keys_per_group, + (ulong) *records, (ulong) num_blocks)); + DBUG_VOID_RETURN; +} + + +/* + Construct a new quick select object for queries with group by with min/max. + + SYNOPSIS + TRP_GROUP_MIN_MAX::make_quick() + param Parameter from test_quick_select + retrieve_full_rows ignored + parent_alloc Memory pool to use, if any. + + NOTES + Make_quick ignores the retrieve_full_rows parameter because + QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans. + The other parameter are ignored as well because all necessary + data to create the QUICK object is computed at this TRP creation + time. + + RETURN + New QUICK_GROUP_MIN_MAX_SELECT object if successfully created, + NULL otherwise. +*/ + +QUICK_SELECT_I * +TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_GROUP_MIN_MAX_SELECT *quick; + DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick"); + + quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table, + param->thd->lex->current_select->join, + have_min, have_max, + have_agg_distinct, min_max_arg_part, + group_prefix_len, group_key_parts, + used_key_parts, index_info, index, + read_cost, records, key_infix_len, + key_infix, parent_alloc, is_index_scan); + if (!quick) + DBUG_RETURN(NULL); + + if (quick->init()) + { + delete quick; + DBUG_RETURN(NULL); + } + + if (range_tree) + { + DBUG_ASSERT(quick_prefix_records > 0); + if (quick_prefix_records == HA_POS_ERROR) + quick->quick_prefix_select= NULL; /* Can't construct a quick select. */ + else + /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */ + quick->quick_prefix_select= get_quick_select(param, param_idx, + index_tree, + HA_MRR_USE_DEFAULT_IMPL, 0, + &quick->alloc); + + /* + Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX + attribute, and create an array of QUICK_RANGES to be used by the + new quick select. + */ + if (min_max_arg_part) + { + SEL_ARG *min_max_range= index_tree; + while (min_max_range) /* Find the tree for the MIN/MAX key part. */ + { + if (min_max_range->field->eq(min_max_arg_part->field)) + break; + min_max_range= min_max_range->next_key_part; + } + /* Scroll to the leftmost interval for the MIN/MAX argument. */ + while (min_max_range && min_max_range->prev) + min_max_range= min_max_range->prev; + /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */ + while (min_max_range) + { + if (quick->add_range(min_max_range)) + { + delete quick; + quick= NULL; + DBUG_RETURN(NULL); + } + min_max_range= min_max_range->next; + } + } + } + else + quick->quick_prefix_select= NULL; + + quick->update_key_stat(); + quick->adjust_prefix_ranges(); + + DBUG_RETURN(quick); +} + + +/* + Construct new quick select for group queries with min/max. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT() + table The table being accessed + join Descriptor of the current query + have_min TRUE if the query selects a MIN function + have_max TRUE if the query selects a MAX function + min_max_arg_part The only argument field of all MIN/MAX functions + group_prefix_len Length of all key parts in the group prefix + prefix_key_parts All key parts in the group prefix + index_info The index chosen for data access + use_index The id of index_info + read_cost Cost of this access method + records Number of records returned + key_infix_len Length of the key infix appended to the group prefix + key_infix Infix of constants from equality predicates + parent_alloc Memory pool for this and quick_prefix_select data + is_index_scan get the next different key not by jumping on it via + index read, but by scanning until the end of the + rows with equal key value. + + RETURN + None +*/ + +QUICK_GROUP_MIN_MAX_SELECT:: +QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg, + bool have_max_arg, bool have_agg_distinct_arg, + KEY_PART_INFO *min_max_arg_part_arg, + uint group_prefix_len_arg, uint group_key_parts_arg, + uint used_key_parts_arg, KEY *index_info_arg, + uint use_index, double read_cost_arg, + ha_rows records_arg, uint key_infix_len_arg, + uchar *key_infix_arg, MEM_ROOT *parent_alloc, + bool is_index_scan_arg) + :file(table->file), join(join_arg), index_info(index_info_arg), + group_prefix_len(group_prefix_len_arg), + group_key_parts(group_key_parts_arg), have_min(have_min_arg), + have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg), + seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg), + key_infix(key_infix_arg), key_infix_len(key_infix_len_arg), + min_functions_it(NULL), max_functions_it(NULL), + is_index_scan(is_index_scan_arg) +{ + head= table; + index= use_index; + record= head->record[0]; + tmp_record= head->record[1]; + read_time= read_cost_arg; + records= records_arg; + used_key_parts= used_key_parts_arg; + real_key_parts= used_key_parts_arg; + real_prefix_len= group_prefix_len + key_infix_len; + group_prefix= NULL; + min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0; + + /* + We can't have parent_alloc set as the init function can't handle this case + yet. + */ + DBUG_ASSERT(!parent_alloc); + if (!parent_alloc) + { + THD *thd= join->thd; + init_sql_alloc(key_memory_quick_range_select_root, &alloc, + thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); + thd->mem_root= &alloc; + } + else + bzero(&alloc, sizeof(MEM_ROOT)); // ensure that it's not used +} + + +/* + Do post-constructor initialization. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::init() + + DESCRIPTION + The method performs initialization that cannot be done in the constructor + such as memory allocations that may fail. It allocates memory for the + group prefix and inifix buffers, and for the lists of MIN/MAX item to be + updated during execution. + + RETURN + 0 OK + other Error code +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::init() +{ + if (group_prefix) /* Already initialized. */ + return 0; + + /* + We allocate one byte more to serve the case when the last field in + the buffer is compared using uint3korr (e.g. a Field_newdate field) + */ + if (!(last_prefix= (uchar*) alloc_root(&alloc, group_prefix_len+1))) + return 1; + /* + We may use group_prefix to store keys with all select fields, so allocate + enough space for it. + We allocate one byte more to serve the case when the last field in + the buffer is compared using uint3korr (e.g. a Field_newdate field) + */ + if (!(group_prefix= (uchar*) alloc_root(&alloc, + real_prefix_len+min_max_arg_len+1))) + return 1; + + if (key_infix_len > 0) + { + /* + The memory location pointed to by key_infix will be deleted soon, so + allocate a new buffer and copy the key_infix into it. + */ + uchar *tmp_key_infix= (uchar*) alloc_root(&alloc, key_infix_len); + if (!tmp_key_infix) + return 1; + memcpy(tmp_key_infix, this->key_infix, key_infix_len); + this->key_infix= tmp_key_infix; + } + + if (min_max_arg_part) + { + if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &min_max_ranges, + sizeof(QUICK_RANGE*), 16, 16, + MYF(MY_THREAD_SPECIFIC))) + return 1; + + if (have_min) + { + if (!(min_functions= new List)) + return 1; + } + else + min_functions= NULL; + if (have_max) + { + if (!(max_functions= new List)) + return 1; + } + else + max_functions= NULL; + + Item_sum *min_max_item; + Item_sum **func_ptr= join->sum_funcs; + while ((min_max_item= *(func_ptr++))) + { + if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC)) + min_functions->push_back(min_max_item); + else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC)) + max_functions->push_back(min_max_item); + } + + if (have_min) + { + if (!(min_functions_it= new List_iterator(*min_functions))) + return 1; + } + + if (have_max) + { + if (!(max_functions_it= new List_iterator(*max_functions))) + return 1; + } + } + else + min_max_ranges.elements= 0; + + return 0; +} + + +QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT() +{ + DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT"); + if (file->inited != handler::NONE) + { + DBUG_ASSERT(file == head->file); + head->file->ha_end_keyread(); + /* + There may be a code path when the same table was first accessed by index, + then the index is closed, and the table is scanned (order by + loose scan). + */ + file->ha_index_or_rnd_end(); + } + if (min_max_arg_part) + delete_dynamic(&min_max_ranges); + free_root(&alloc,MYF(0)); + delete min_functions_it; + delete max_functions_it; + delete quick_prefix_select; + DBUG_VOID_RETURN; +} + + +/* + Eventually create and add a new quick range object. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::add_range() + sel_range Range object from which a + + NOTES + Construct a new QUICK_RANGE object from a SEL_ARG object, and + add it to the array min_max_ranges. If sel_arg is an infinite + range, e.g. (x < 5 or x > 4), then skip it and do not construct + a quick range. + + RETURN + FALSE on success + TRUE otherwise +*/ + +bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range) +{ + QUICK_RANGE *range; + uint range_flag= sel_range->min_flag | sel_range->max_flag; + + /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */ + if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE)) + return FALSE; + + if (!(sel_range->min_flag & NO_MIN_RANGE) && + !(sel_range->max_flag & NO_MAX_RANGE)) + { + if (sel_range->maybe_null && + sel_range->min_value[0] && sel_range->max_value[0]) + range_flag|= NULL_RANGE; /* IS NULL condition */ + else if (memcmp(sel_range->min_value, sel_range->max_value, + min_max_arg_len) == 0) + range_flag|= EQ_RANGE; /* equality condition */ + } + range= new QUICK_RANGE(join->thd, sel_range->min_value, min_max_arg_len, + make_keypart_map(sel_range->part), + sel_range->max_value, min_max_arg_len, + make_keypart_map(sel_range->part), + range_flag); + if (!range) + return TRUE; + if (insert_dynamic(&min_max_ranges, (uchar*)&range)) + return TRUE; + return FALSE; +} + + +/* + Opens the ranges if there are more conditions in quick_prefix_select than + the ones used for jumping through the prefixes. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges() + + NOTES + quick_prefix_select is made over the conditions on the whole key. + It defines a number of ranges of length x. + However when jumping through the prefixes we use only the the first + few most significant keyparts in the range key. However if there + are more keyparts to follow the ones we are using we must make the + condition on the key inclusive (because x < "ab" means + x[0] < 'a' OR (x[0] == 'a' AND x[1] < 'b'). + To achive the above we must turn off the NEAR_MIN/NEAR_MAX +*/ +void QUICK_GROUP_MIN_MAX_SELECT::adjust_prefix_ranges () +{ + if (quick_prefix_select && + group_prefix_len < quick_prefix_select->max_used_key_length) + { + DYNAMIC_ARRAY *arr; + uint inx; + + for (inx= 0, arr= &quick_prefix_select->ranges; inx < arr->elements; inx++) + { + QUICK_RANGE *range; + + get_dynamic(arr, (uchar*)&range, inx); + range->flag &= ~(NEAR_MIN | NEAR_MAX); + } + } +} + + +/* + Determine the total number and length of the keys that will be used for + index lookup. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::update_key_stat() + + DESCRIPTION + The total length of the keys used for index lookup depends on whether + there are any predicates referencing the min/max argument, and/or if + the min/max argument field can be NULL. + This function does an optimistic analysis whether the search key might + be extended by a constant for the min/max keypart. It is 'optimistic' + because during actual execution it may happen that a particular range + is skipped, and then a shorter key will be used. However this is data + dependent and can't be easily estimated here. + + RETURN + None +*/ + +void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat() +{ + max_used_key_length= real_prefix_len; + if (min_max_ranges.elements > 0) + { + QUICK_RANGE *cur_range; + if (have_min) + { /* Check if the right-most range has a lower boundary. */ + get_dynamic(&min_max_ranges, (uchar*)&cur_range, + min_max_ranges.elements - 1); + if (!(cur_range->flag & NO_MIN_RANGE)) + { + max_used_key_length+= min_max_arg_len; + used_key_parts++; + return; + } + } + if (have_max) + { /* Check if the left-most range has an upper boundary. */ + get_dynamic(&min_max_ranges, (uchar*)&cur_range, 0); + if (!(cur_range->flag & NO_MAX_RANGE)) + { + max_used_key_length+= min_max_arg_len; + used_key_parts++; + return; + } + } + } + else if (have_min && min_max_arg_part && + min_max_arg_part->field->real_maybe_null()) + { + /* + If a MIN/MAX argument value is NULL, we can quickly determine + that we're in the beginning of the next group, because NULLs + are always < any other value. This allows us to quickly + determine the end of the current group and jump to the next + group (see next_min()) and thus effectively increases the + usable key length. + */ + max_used_key_length+= min_max_arg_len; + used_key_parts++; + } +} + + +/* + Initialize a quick group min/max select for key retrieval. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::reset() + + DESCRIPTION + Initialize the index chosen for access and find and store the prefix + of the last group. The method is expensive since it performs disk access. + + RETURN + 0 OK + other Error code +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::reset(void) +{ + int result; + DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset"); + + seen_first_key= FALSE; + head->file->ha_start_keyread(index); /* We need only the key attributes */ + + if ((result= file->ha_index_init(index,1))) + { + head->file->print_error(result, MYF(0)); + DBUG_RETURN(result); + } + if (quick_prefix_select && quick_prefix_select->reset()) + DBUG_RETURN(1); + result= file->ha_index_last(record); + if (result == HA_ERR_END_OF_FILE) + DBUG_RETURN(0); + /* Save the prefix of the last group. */ + key_copy(last_prefix, record, index_info, group_prefix_len); + + DBUG_RETURN(0); +} + + + +/* + Get the next key containing the MIN and/or MAX key for the next group. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::get_next() + + DESCRIPTION + The method finds the next subsequent group of records that satisfies the + query conditions and finds the keys that contain the MIN/MAX values for + the key part referenced by the MIN/MAX function(s). Once a group and its + MIN/MAX values are found, store these values in the Item_sum objects for + the MIN/MAX functions. The rest of the values in the result row are stored + in the Item_field::result_field of each select field. If the query does + not contain MIN and/or MAX functions, then the function only finds the + group prefix, which is a query answer itself. + + NOTES + If both MIN and MAX are computed, then we use the fact that if there is + no MIN key, there can't be a MAX key as well, so we can skip looking + for a MAX key in this case. + + RETURN + 0 on success + HA_ERR_END_OF_FILE if returned all keys + other if some error occurred +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::get_next() +{ + int min_res= 0; + int max_res= 0; +#ifdef HPUX11 + /* + volatile is required by a bug in the HP compiler due to which the + last test of result fails. + */ + volatile int result; +#else + int result; +#endif + int is_last_prefix= 0; + + DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next"); + + /* + Loop until a group is found that satisfies all query conditions or the last + group is reached. + */ + do + { + result= next_prefix(); + /* + Check if this is the last group prefix. Notice that at this point + this->record contains the current prefix in record format. + */ + if (!result) + { + is_last_prefix= key_cmp(index_info->key_part, last_prefix, + group_prefix_len); + DBUG_ASSERT(is_last_prefix <= 0); + } + else + { + if (result == HA_ERR_KEY_NOT_FOUND) + continue; + break; + } + + if (have_min) + { + min_res= next_min(); + if (min_res == 0) + update_min_result(); + } + /* If there is no MIN in the group, there is no MAX either. */ + if ((have_max && !have_min) || + (have_max && have_min && (min_res == 0))) + { + max_res= next_max(); + if (max_res == 0) + update_max_result(); + /* If a MIN was found, a MAX must have been found as well. */ + DBUG_ASSERT((have_max && !have_min) || + (have_max && have_min && (max_res == 0))); + } + /* + If this is just a GROUP BY or DISTINCT without MIN or MAX and there + are equality predicates for the key parts after the group, find the + first sub-group with the extended prefix. + */ + if (!have_min && !have_max && key_infix_len > 0) + result= file->ha_index_read_map(record, group_prefix, + make_prev_keypart_map(real_key_parts), + HA_READ_KEY_EXACT); + + result= have_min ? min_res : have_max ? max_res : result; + } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + is_last_prefix != 0); + + if (result == HA_ERR_KEY_NOT_FOUND) + result= HA_ERR_END_OF_FILE; + + DBUG_RETURN(result); +} + + +/* + Retrieve the minimal key in the next group. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::next_min() + + DESCRIPTION + Find the minimal key within this group such that the key satisfies the query + conditions and NULL semantics. The found key is loaded into this->record. + + IMPLEMENTATION + Depending on the values of min_max_ranges.elements, key_infix_len, and + whether there is a NULL in the MIN field, this function may directly + return without any data access. In this case we use the key loaded into + this->record by the call to this->next_prefix() just before this call. + + RETURN + 0 on success + HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - + other if some error occurred +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::next_min() +{ + int result= 0; + DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min"); + + /* Find the MIN key using the eventually extended group prefix. */ + if (min_max_ranges.elements > 0) + { + if ((result= next_min_in_range())) + DBUG_RETURN(result); + } + else + { + /* Apply the constant equality conditions to the non-group select fields */ + if (key_infix_len > 0) + { + if ((result= + file->ha_index_read_map(record, group_prefix, + make_prev_keypart_map(real_key_parts), + HA_READ_KEY_EXACT))) + DBUG_RETURN(result); + } + + /* + If the min/max argument field is NULL, skip subsequent rows in the same + group with NULL in it. Notice that: + - if the first row in a group doesn't have a NULL in the field, no row + in the same group has (because NULL < any other value), + - min_max_arg_part->field->ptr points to some place in 'record'. + */ + if (min_max_arg_part && min_max_arg_part->field->is_null()) + { + uchar *tmp_key_buff= (uchar*)my_alloca(max_used_key_length); + /* Find the first subsequent record without NULL in the MIN/MAX field. */ + key_copy(tmp_key_buff, record, index_info, max_used_key_length); + result= file->ha_index_read_map(record, tmp_key_buff, + make_keypart_map(real_key_parts), + HA_READ_AFTER_KEY); + /* + Check if the new record belongs to the current group by comparing its + prefix with the group's prefix. If it is from the next group, then the + whole group has NULLs in the MIN/MAX field, so use the first record in + the group as a result. + TODO: + It is possible to reuse this new record as the result candidate for the + next call to next_min(), and to save one lookup in the next call. For + this add a new member 'this->next_group_prefix'. + */ + if (!result) + { + if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) + key_restore(record, tmp_key_buff, index_info, 0); + } + else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) + result= 0; /* There is a result in any case. */ + my_afree(tmp_key_buff); + } + } + + /* + If the MIN attribute is non-nullable, this->record already contains the + MIN key in the group, so just return. + */ + DBUG_RETURN(result); +} + + +/* + Retrieve the maximal key in the next group. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::next_max() + + DESCRIPTION + Lookup the maximal key of the group, and store it into this->record. + + RETURN + 0 on success + HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - + other if some error occurred +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::next_max() +{ + int result; + + DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max"); + + /* Get the last key in the (possibly extended) group. */ + if (min_max_ranges.elements > 0) + result= next_max_in_range(); + else + result= file->ha_index_read_map(record, group_prefix, + make_prev_keypart_map(real_key_parts), + HA_READ_PREFIX_LAST); + DBUG_RETURN(result); +} + + +/** + Find the next different key value by skiping all the rows with the same key + value. + + Implements a specialized loose index access method for queries + containing aggregate functions with distinct of the form: + SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t + This method comes to replace the index scan + Unique class + (distinct selection) for loose index scan that visits all the rows of a + covering index instead of jumping in the beginning of each group. + TODO: Placeholder function. To be replaced by a handler API call + + @param is_index_scan hint to use index scan instead of random index read + to find the next different value. + @param file table handler + @param key_part group key to compare + @param record row data + @param group_prefix current key prefix data + @param group_prefix_len length of the current key prefix data + @param group_key_parts number of the current key prefix columns + @return status + @retval 0 success + @retval !0 failure +*/ + +static int index_next_different (bool is_index_scan, handler *file, + KEY_PART_INFO *key_part, uchar * record, + const uchar * group_prefix, + uint group_prefix_len, + uint group_key_parts) +{ + if (is_index_scan) + { + int result= 0; + + while (!key_cmp (key_part, group_prefix, group_prefix_len)) + { + result= file->ha_index_next(record); + if (result) + return(result); + } + return result; + } + else + return file->ha_index_read_map(record, group_prefix, + make_prev_keypart_map(group_key_parts), + HA_READ_AFTER_KEY); +} + + +/* + Determine the prefix of the next group. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::next_prefix() + + DESCRIPTION + Determine the prefix of the next group that satisfies the query conditions. + If there is a range condition referencing the group attributes, use a + QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the + condition. If there is a key infix of constants, append this infix + immediately after the group attributes. The possibly extended prefix is + stored in this->group_prefix. The first key of the found group is stored in + this->record, on which relies this->next_min(). + + RETURN + 0 on success + HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix + HA_ERR_END_OF_FILE if there are no more keys + other if some error occurred +*/ +int QUICK_GROUP_MIN_MAX_SELECT::next_prefix() +{ + int result; + DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix"); + + if (quick_prefix_select) + { + uchar *cur_prefix= seen_first_key ? group_prefix : NULL; + if ((result= quick_prefix_select->get_next_prefix(group_prefix_len, + group_key_parts, + cur_prefix))) + DBUG_RETURN(result); + seen_first_key= TRUE; + } + else + { + if (!seen_first_key) + { + result= file->ha_index_first(record); + if (result) + DBUG_RETURN(result); + seen_first_key= TRUE; + } + else + { + /* Load the first key in this group into record. */ + result= index_next_different (is_index_scan, file, index_info->key_part, + record, group_prefix, group_prefix_len, + group_key_parts); + if (result) + DBUG_RETURN(result); + } + } + + /* Save the prefix of this group for subsequent calls. */ + key_copy(group_prefix, record, index_info, group_prefix_len); + /* Append key_infix to group_prefix. */ + if (key_infix_len > 0) + memcpy(group_prefix + group_prefix_len, + key_infix, key_infix_len); + + DBUG_RETURN(0); +} + + +/** + Allocate a temporary buffer, populate the buffer using the group prefix key + and the min/max field key, and compare the result to the current key pointed + by index_info. + + @param key - the min or max field key + @param length - length of "key" +*/ +int +QUICK_GROUP_MIN_MAX_SELECT::cmp_min_max_key(const uchar *key, uint16 length) +{ + /* + Allocate a buffer. + Note, we allocate one extra byte, because some of Field_xxx::cmp(), + e.g. Field_newdate::cmp(), use uint3korr() which actually read four bytes + and then bit-and the read value with 0xFFFFFF. + See "MDEV-7920 main.group_min_max fails ... with valgrind" for details. + */ + uchar *buffer= (uchar*) my_alloca(real_prefix_len + min_max_arg_len + 1); + /* Concatenate the group prefix key and the min/max field key */ + memcpy(buffer, group_prefix, real_prefix_len); + memcpy(buffer + real_prefix_len, key, length); + /* Compare the key pointed by key_info to the created key */ + int cmp_res= key_cmp(index_info->key_part, buffer, + real_prefix_len + min_max_arg_len); + my_afree(buffer); + return cmp_res; +} + + +/* + Find the minimal key in a group that satisfies some range conditions for the + min/max argument field. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() + + DESCRIPTION + Given the sequence of ranges min_max_ranges, find the minimal key that is + in the left-most possible range. If there is no such key, then the current + group does not have a MIN key that satisfies the WHERE clause. If a key is + found, its value is stored in this->record. + + RETURN + 0 on success + HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of + the ranges + HA_ERR_END_OF_FILE - "" - + other if some error +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() +{ + ha_rkey_function find_flag; + key_part_map keypart_map; + QUICK_RANGE *cur_range; + bool found_null= FALSE; + int result= HA_ERR_KEY_NOT_FOUND; + + DBUG_ASSERT(min_max_ranges.elements > 0); + + for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++) + { /* Search from the left-most range to the right. */ + get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx); + + /* + If the current value for the min/max argument is bigger than the right + boundary of cur_range, there is no need to check this range. + */ + if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) && + (key_cmp(min_max_arg_part, (const uchar*) cur_range->max_key, + min_max_arg_len) == 1)) + continue; + + if (cur_range->flag & NO_MIN_RANGE) + { + keypart_map= make_prev_keypart_map(real_key_parts); + find_flag= HA_READ_KEY_EXACT; + } + else + { + /* Extend the search key with the lower boundary for this range. */ + memcpy(group_prefix + real_prefix_len, cur_range->min_key, + cur_range->min_length); + keypart_map= make_keypart_map(real_key_parts); + find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ? + HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ? + HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT; + } + + result= file->ha_index_read_map(record, group_prefix, keypart_map, + find_flag); + if (result) + { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & (EQ_RANGE | NULL_RANGE))) + continue; /* Check the next range. */ + + /* + In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE, + HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this + range, it can't succeed for any other subsequent range. + */ + break; + } + + /* A key was found. */ + if (cur_range->flag & EQ_RANGE) + break; /* No need to perform the checks below for equal keys. */ + + if (cur_range->flag & NULL_RANGE) + { + /* + Remember this key, and continue looking for a non-NULL key that + satisfies some other condition. + */ + memcpy(tmp_record, record, head->s->rec_buff_length); + found_null= TRUE; + continue; + } + + /* Check if record belongs to the current group. */ + if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) + { + result= HA_ERR_KEY_NOT_FOUND; + continue; + } + + /* If there is an upper limit, check if the found key is in the range. */ + if ( !(cur_range->flag & NO_MAX_RANGE) ) + { + int cmp_res= cmp_min_max_key(cur_range->max_key, cur_range->max_length); + /* + The key is outside of the range if: + the interval is open and the key is equal to the maximum boundry + or + the key is greater than the maximum + */ + if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) || + cmp_res > 0) + { + result= HA_ERR_KEY_NOT_FOUND; + continue; + } + } + /* If we got to this point, the current key qualifies as MIN. */ + DBUG_ASSERT(result == 0); + break; + } + /* + If there was a key with NULL in the MIN/MAX field, and there was no other + key without NULL from the same group that satisfies some other condition, + then use the key with the NULL. + */ + if (found_null && result) + { + memcpy(record, tmp_record, head->s->rec_buff_length); + result= 0; + } + return result; +} + + +/* + Find the maximal key in a group that satisfies some range conditions for the + min/max argument field. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range() + + DESCRIPTION + Given the sequence of ranges min_max_ranges, find the maximal key that is + in the right-most possible range. If there is no such key, then the current + group does not have a MAX key that satisfies the WHERE clause. If a key is + found, its value is stored in this->record. + + RETURN + 0 on success + HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of + the ranges + HA_ERR_END_OF_FILE - "" - + other if some error +*/ + +int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range() +{ + ha_rkey_function find_flag; + key_part_map keypart_map; + QUICK_RANGE *cur_range; + int result; + + DBUG_ASSERT(min_max_ranges.elements > 0); + + for (size_t range_idx= min_max_ranges.elements; range_idx > 0; range_idx--) + { /* Search from the right-most range to the left. */ + get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1); + + /* + If the current value for the min/max argument is smaller than the left + boundary of cur_range, there is no need to check this range. + */ + if (range_idx != min_max_ranges.elements && + !(cur_range->flag & NO_MIN_RANGE) && + (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key, + min_max_arg_len) == -1)) + continue; + + if (cur_range->flag & NO_MAX_RANGE) + { + keypart_map= make_prev_keypart_map(real_key_parts); + find_flag= HA_READ_PREFIX_LAST; + } + else + { + /* Extend the search key with the upper boundary for this range. */ + memcpy(group_prefix + real_prefix_len, cur_range->max_key, + cur_range->max_length); + keypart_map= make_keypart_map(real_key_parts); + find_flag= (cur_range->flag & EQ_RANGE) ? + HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ? + HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV; + } + + result= file->ha_index_read_map(record, group_prefix, keypart_map, + find_flag); + + if (result) + { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & EQ_RANGE)) + continue; /* Check the next range. */ + + /* + In no key was found with this upper bound, there certainly are no keys + in the ranges to the left. + */ + return result; + } + /* A key was found. */ + if (cur_range->flag & EQ_RANGE) + return 0; /* No need to perform the checks below for equal keys. */ + + /* Check if record belongs to the current group. */ + if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) + continue; // Row not found + + /* If there is a lower limit, check if the found key is in the range. */ + if ( !(cur_range->flag & NO_MIN_RANGE) ) + { + int cmp_res= cmp_min_max_key(cur_range->min_key, cur_range->min_length); + /* + The key is outside of the range if: + the interval is open and the key is equal to the minimum boundry + or + the key is less than the minimum + */ + if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) || + cmp_res < 0) + continue; + } + /* If we got to this point, the current key qualifies as MAX. */ + return result; + } + return HA_ERR_KEY_NOT_FOUND; +} + + +/* + Update all MIN function results with the newly found value. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::update_min_result() + + DESCRIPTION + The method iterates through all MIN functions and updates the result value + of each function by calling Item_sum::reset(), which in turn picks the new + result value from this->head->record[0], previously updated by + next_min(). The updated value is stored in a member variable of each of the + Item_sum objects, depending on the value type. + + IMPLEMENTATION + The update must be done separately for MIN and MAX, immediately after + next_min() was called and before next_max() is called, because both MIN and + MAX take their result value from the same buffer this->head->record[0] + (i.e. this->record). + + RETURN + None +*/ + +void QUICK_GROUP_MIN_MAX_SELECT::update_min_result() +{ + Item_sum *min_func; + + min_functions_it->rewind(); + while ((min_func= (*min_functions_it)++)) + min_func->reset_and_add(); +} + + +/* + Update all MAX function results with the newly found value. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::update_max_result() + + DESCRIPTION + The method iterates through all MAX functions and updates the result value + of each function by calling Item_sum::reset(), which in turn picks the new + result value from this->head->record[0], previously updated by + next_max(). The updated value is stored in a member variable of each of the + Item_sum objects, depending on the value type. + + IMPLEMENTATION + The update must be done separately for MIN and MAX, immediately after + next_max() was called, because both MIN and MAX take their result value + from the same buffer this->head->record[0] (i.e. this->record). + + RETURN + None +*/ + +void QUICK_GROUP_MIN_MAX_SELECT::update_max_result() +{ + Item_sum *max_func; + + max_functions_it->rewind(); + while ((max_func= (*max_functions_it)++)) + max_func->reset_and_add(); +} + + +/* + Append comma-separated list of keys this quick select uses to key_names; + append comma-separated list of corresponding used lengths to used_lengths. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths() + key_names [out] Names of used indexes + used_lengths [out] Corresponding lengths of the index names + + DESCRIPTION + This method is used by select_describe to extract the names of the + indexes used by a quick select. + +*/ + +void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + bool first= TRUE; + + add_key_and_length(key_names, used_lengths, &first); +} + + +/* Check whether the number for equality ranges exceeds the set threshold */ + +bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param, + uint limit) +{ + KEY_MULTI_RANGE range; + range_seq_t seq_it; + uint count = 0; + + if (limit == 0) + { + /* 'Statistics instead of index dives' feature is turned off */ + return false; + } + seq_it= seq->init(seq_init_param, 0, 0); + while (!seq->next(seq_it, &range)) + { + if ((range.range_flag & EQ_RANGE) && !(range.range_flag & NULL_RANGE)) + { + if (++count >= limit) + return true; + } + } + return false; +} + +#ifndef DBUG_OFF + +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg) +{ + char buff[1024]; + DBUG_ENTER("print_sel_tree"); + + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for (uint idx= 0; idx < param->keys; idx++) + { + if (tree_map->is_set(idx)) + { + uint keynr= param->real_keynr[idx]; + if (tmp.length()) + tmp.append(','); + tmp.append(¶m->table->key_info[keynr].name); + } + } + if (!tmp.length()) + tmp.append(STRING_WITH_LEN("(empty)")); + + DBUG_PRINT("info", ("SEL_TREE: %p (%s) scans: %s", tree, msg, + tmp.c_ptr_safe())); + + DBUG_VOID_RETURN; +} + + +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end) +{ + DBUG_ENTER("print_ror_scans_arr"); + + char buff[1024]; + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for (;start != end; start++) + { + if (tmp.length()) + tmp.append(','); + tmp.append(&table->key_info[(*start)->keynr].name); + } + if (!tmp.length()) + tmp.append(STRING_WITH_LEN("(empty)")); + DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.c_ptr())); + DBUG_VOID_RETURN; +} + +static String dbug_print_sel_arg_buf; + +static void +print_sel_arg_key(Field *field, const uchar *key, String *out) +{ + TABLE *table= field->table; + MY_BITMAP *old_sets[2]; + dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set); + + if (field->real_maybe_null()) + { + if (*key) + { + out->append(STRING_WITH_LEN("NULL")); + goto end; + } + key++; // Skip null byte + } + + field->set_key_image(key, field->pack_length()); + + if (field->type() == MYSQL_TYPE_BIT) + (void) field->val_int_as_str(out, 1); + else + field->val_str(out); + +end: + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); +} + + +/* + @brief + Produce a string representation of an individual SEL_ARG and return pointer + to it + + @detail + Intended usage: + + (gdb) p dbug_print_sel_arg(ptr) +*/ + +const char *dbug_print_sel_arg(SEL_ARG *sel_arg) +{ + StringBuffer<64> buf; + String &out= dbug_print_sel_arg_buf; + LEX_CSTRING tmp; + out.length(0); + + if (!sel_arg) + { + out.append(STRING_WITH_LEN("NULL")); + goto end; + } + + out.append(STRING_WITH_LEN("SEL_ARG(")); + + const char *stype; + switch(sel_arg->type) { + case SEL_ARG::IMPOSSIBLE: + stype="IMPOSSIBLE"; + break; + case SEL_ARG::MAYBE: + stype="MAYBE"; + break; + case SEL_ARG::MAYBE_KEY: + stype="MAYBE_KEY"; + break; + case SEL_ARG::KEY_RANGE: + default: + stype= NULL; + } + + if (stype) + { + out.append(STRING_WITH_LEN("type=")); + out.append(stype, strlen(stype)); + goto end; + } + + if (sel_arg->min_flag & NO_MIN_RANGE) + out.append(STRING_WITH_LEN("-inf")); + else + { + print_sel_arg_key(sel_arg->field, sel_arg->min_value, &buf); + out.append(buf); + } + + if (sel_arg->min_flag & NEAR_MIN) + lex_string_set3(&tmp, "<", 1); + else + lex_string_set3(&tmp, "<=", 2); + out.append(&tmp); + + out.append(sel_arg->field->field_name); + + if (sel_arg->min_flag & NEAR_MAX) + lex_string_set3(&tmp, "<", 1); + else + lex_string_set3(&tmp, "<=", 2); + out.append(&tmp); + + if (sel_arg->max_flag & NO_MAX_RANGE) + out.append(STRING_WITH_LEN("+inf")); + else + { + buf.length(0); + print_sel_arg_key(sel_arg->field, sel_arg->max_value, &buf); + out.append(buf); + } + + out.append(')'); + +end: + return dbug_print_sel_arg_buf.c_ptr_safe(); +} + + +/***************************************************************************** +** Print a quick range for debugging +** TODO: +** This should be changed to use a String to store each row instead +** of locking the DEBUG stream ! +*****************************************************************************/ + +static void +print_key(KEY_PART *key_part, const uchar *key, uint used_length) +{ + char buff[1024]; + const uchar *key_end= key+used_length; + uint store_length; + TABLE *table= key_part->field->table; + MY_BITMAP *old_sets[2]; + + dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set); + + for (; key < key_end; key+=store_length, key_part++) + { + String tmp(buff,sizeof(buff),&my_charset_bin); + Field *field= key_part->field; + store_length= key_part->store_length; + + if (field->real_maybe_null()) + { + if (*key) + { + fwrite("NULL",sizeof(char),4,DBUG_FILE); + continue; + } + key++; // Skip null byte + store_length--; + } + field->set_key_image(key, key_part->length); + if (field->type() == MYSQL_TYPE_BIT) + (void) field->val_int_as_str(&tmp, 1); + else + field->val_str(&tmp); + fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE); + if (key+store_length < key_end) + fputc('/',DBUG_FILE); + } + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); +} + + +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg) +{ + char buf[MAX_KEY/8+1]; + TABLE *table; + MY_BITMAP *old_sets[2]; + DBUG_ENTER("print_quick"); + if (!quick) + DBUG_VOID_RETURN; + DBUG_LOCK_FILE; + + table= quick->head; + dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set); + quick->dbug_dump(0, TRUE); + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); + + fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf)); + + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + +void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose) +{ + /* purecov: begin inspected */ + fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n", + indent, "", head->key_info[index].name.str, max_used_key_length); + + if (verbose) + { + QUICK_RANGE *range; + QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer; + QUICK_RANGE **end_range= pr + ranges.elements; + for (; pr != end_range; ++pr) + { + fprintf(DBUG_FILE, "%*s", indent + 2, ""); + range= *pr; + if (!(range->flag & NO_MIN_RANGE)) + { + print_key(key_parts, range->min_key, range->min_length); + if (range->flag & NEAR_MIN) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + } + fputs("X",DBUG_FILE); + + if (!(range->flag & NO_MAX_RANGE)) + { + if (range->flag & NEAR_MAX) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + print_key(key_parts, range->max_key, range->max_length); + } + fputs("\n",DBUG_FILE); + } + } + /* purecov: end */ +} + +void QUICK_INDEX_SORT_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (pk_quick_select) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + pk_quick_select->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast it(quick_selects); + QUICK_SELECT_WITH_RECORD *qr; + fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n", + indent, "", need_to_fetch_row? "":"non-"); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((qr= it++)) + qr->quick->dbug_dump(indent+2, verbose); + if (cpk_quick) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + cpk_quick->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast it(quick_selects); + QUICK_SELECT_I *quick; + fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + + +/* + Print quick select information to DBUG_FILE. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::dbug_dump() + indent Indentation offset + verbose If TRUE show more detailed output. + + DESCRIPTION + Print the contents of this quick select to DBUG_FILE. The method also + calls dbug_dump() for the used quick select if any. + + IMPLEMENTATION + Caller is responsible for locking DBUG_FILE before this call and unlocking + it afterwards. + + RETURN + None +*/ + +void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose) +{ + fprintf(DBUG_FILE, + "%*squick_group_min_max_select: index %s (%d), length: %d\n", + indent, "", index_info->name.str, index, max_used_key_length); + if (key_infix_len > 0) + { + fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n", + indent, "", key_infix_len); + } + if (quick_prefix_select) + { + fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, ""); + quick_prefix_select->dbug_dump(indent + 2, verbose); + } + if (min_max_ranges.elements > 0) + { + fprintf(DBUG_FILE, "%*susing %zu quick_ranges for MIN/MAX:\n", + indent, "", min_max_ranges.elements); + } +} + +#endif /* !DBUG_OFF */ + + +/* + @brief Print the comparison operator for the min range +*/ + +static void print_min_range_operator(String *out, const ha_rkey_function flag) +{ + if (flag == HA_READ_AFTER_KEY) + out->append(STRING_WITH_LEN(" < ")); + else if (flag == HA_READ_KEY_EXACT || flag == HA_READ_KEY_OR_NEXT) + out->append(STRING_WITH_LEN(" <= ")); + else + out->append(STRING_WITH_LEN(" ? ")); +} + + +/* + @brief Print the comparison operator for the max range +*/ + +static void print_max_range_operator(String *out, const ha_rkey_function flag) +{ + if (flag == HA_READ_BEFORE_KEY) + out->append(STRING_WITH_LEN(" < ")); + else if (flag == HA_READ_AFTER_KEY) + out->append(STRING_WITH_LEN(" <= ")); + else + out->append(STRING_WITH_LEN(" ? ")); +} + + +static +void print_range(String *out, const KEY_PART_INFO *key_part, + KEY_MULTI_RANGE *range, uint n_key_parts) +{ + uint flag= range->range_flag; + String key_name; + key_name.set_charset(system_charset_info); + key_part_map keypart_map= range->start_key.keypart_map | + range->end_key.keypart_map; + + if (flag & GEOM_FLAG) + { + /* + The flags of GEOM ranges do not work the same way as for other + range types, so printing "col < some_geom" doesn't make sense. + Just print the column name, not operator. + */ + print_keyparts_name(out, key_part, n_key_parts, keypart_map); + out->append(STRING_WITH_LEN(" ")); + print_key_value(out, key_part, range->start_key.key, + range->start_key.length); + return; + } + + if (range->start_key.length) + { + print_key_value(out, key_part, range->start_key.key, + range->start_key.length); + print_min_range_operator(out, range->start_key.flag); + } + + print_keyparts_name(out, key_part, n_key_parts, keypart_map); + + if (range->end_key.length) + { + print_max_range_operator(out, range->end_key.flag); + print_key_value(out, key_part, range->end_key.key, + range->end_key.length); + } +} + + +/* + @brief Print range created for non-indexed columns + + @param + out output string + field field for which the range is printed + range range for the field +*/ + +static +void print_range_for_non_indexed_field(String *out, Field *field, + KEY_MULTI_RANGE *range) +{ + TABLE *table= field->table; + MY_BITMAP *old_sets[2]; + dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set); + + if (range->start_key.length) + { + field->print_key_part_value(out, range->start_key.key, field->key_length()); + print_min_range_operator(out, range->start_key.flag); + } + + out->append(field->field_name); + + if (range->end_key.length) + { + print_max_range_operator(out, range->end_key.flag); + field->print_key_part_value(out, range->end_key.key, field->key_length()); + } + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); +} + + + +/* + + Add ranges to the trace + For ex: + lets say we have an index a_b(a,b) + query: select * from t1 where a=2 and b=4 ; + so we create a range: + (2,4) <= (a,b) <= (2,4) + this is added to the trace +*/ + +static void trace_ranges(Json_writer_array *range_trace, + PARAM *param, uint idx, + SEL_ARG *keypart, + const KEY_PART_INFO *key_parts) +{ + SEL_ARG_RANGE_SEQ seq; + KEY_MULTI_RANGE range; + range_seq_t seq_it; + uint flags= 0; + RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, + sel_arg_range_seq_next, 0, 0}; + KEY *keyinfo= param->table->key_info + param->real_keynr[idx]; + uint n_key_parts= param->table->actual_n_key_parts(keyinfo); + DBUG_ASSERT(range_trace->trace_started()); + seq.keyno= idx; + seq.key_parts= param->key[idx]; + seq.real_keyno= param->real_keynr[idx]; + seq.param= param; + seq.start= keypart; + /* + is_ror_scan is set to FALSE here, because we are only interested + in iterating over all the ranges and printing them. + */ + seq.is_ror_scan= FALSE; + const KEY_PART_INFO *cur_key_part= key_parts + keypart->part; + seq_it= seq_if.init((void *) &seq, 0, flags); + + while (!seq_if.next(seq_it, &range)) + { + StringBuffer<128> range_info(system_charset_info); + print_range(&range_info, cur_key_part, &range, n_key_parts); + range_trace->add(range_info.c_ptr_safe(), range_info.length()); + } +} + +/** + Print a key to a string + + @param[out] out String the key is appended to + @param[in] key_part Index components description + @param[in] key Key tuple + @param[in] used_length length of the key tuple +*/ + +static void print_key_value(String *out, const KEY_PART_INFO *key_part, + const uchar* key, uint used_length) +{ + out->append(STRING_WITH_LEN("(")); + Field *field= key_part->field; + StringBuffer<128> tmp(system_charset_info); + TABLE *table= field->table; + uint store_length; + MY_BITMAP *old_sets[2]; + dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set); + const uchar *key_end= key+used_length; + + for (; key < key_end; key+=store_length, key_part++) + { + field= key_part->field; + store_length= key_part->store_length; + + field->print_key_part_value(out, key, key_part->length); + + if (key + store_length < key_end) + out->append(STRING_WITH_LEN(",")); + } + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); + out->append(STRING_WITH_LEN(")")); +} + +/** + Print key parts involed in a range + @param[out] out String the key is appended to + @param[in] key_part Index components description + @param[in] n_keypart Number of keyparts in index + @param[in] keypart_map map for keyparts involved in the range +*/ + +void print_keyparts_name(String *out, const KEY_PART_INFO *key_part, + uint n_keypart, key_part_map keypart_map) +{ + uint i; + out->append(STRING_WITH_LEN("(")); + bool first_keypart= TRUE; + for (i=0; i < n_keypart; key_part++, i++) + { + if (keypart_map & (1 << i)) + { + if (first_keypart) + first_keypart= FALSE; + else + out->append(STRING_WITH_LEN(",")); + out->append(key_part->field->field_name); + if (key_part->key_part_flag & HA_REVERSE_SORT) + out->append(STRING_WITH_LEN(" DESC")); + } + else + break; + } + out->append(STRING_WITH_LEN(")")); +} diff --git a/sql/opt_range.h b/sql/opt_range.h new file mode 100644 index 00000000..4f766534 --- /dev/null +++ b/sql/opt_range.h @@ -0,0 +1,2013 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* classes to use when handling where clause */ + +#ifndef _opt_range_h +#define _opt_range_h + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "records.h" /* READ_RECORD */ +#include "queues.h" /* QUEUE */ +#include "filesort.h" /* SORT_INFO */ + +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" /* Item */ + +class JOIN; +class Item_sum; + +struct KEY_PART { + uint16 key,part; + /* See KEY_PART_INFO for meaning of the next two: */ + uint16 store_length, length; + uint8 null_bit; + /* + Keypart flags (0 when this structure is used by partition pruning code + for fake partitioning index description) + */ + uint8 flag; + Field *field; + Field::imagetype image_type; +}; + + +/** + A helper function to invert min flags to max flags for DESC key parts. + It changes NEAR_MIN, NO_MIN_RANGE to NEAR_MAX, NO_MAX_RANGE appropriately +*/ + +inline uint invert_min_flag(uint min_flag) +{ + uint max_flag_out = min_flag & ~(NEAR_MIN | NO_MIN_RANGE); + if (min_flag & NEAR_MIN) max_flag_out |= NEAR_MAX; + if (min_flag & NO_MIN_RANGE) max_flag_out |= NO_MAX_RANGE; + return max_flag_out; +} + + +/** + A helper function to invert max flags to min flags for DESC key parts. + It changes NEAR_MAX, NO_MAX_RANGE to NEAR_MIN, NO_MIN_RANGE appropriately +*/ + +inline uint invert_max_flag(uint max_flag) +{ + uint min_flag_out = max_flag & ~(NEAR_MAX | NO_MAX_RANGE); + if (max_flag & NEAR_MAX) min_flag_out |= NEAR_MIN; + if (max_flag & NO_MAX_RANGE) min_flag_out |= NO_MIN_RANGE; + return min_flag_out; +} + +class RANGE_OPT_PARAM; +/* + A construction block of the SEL_ARG-graph. + + The following description only covers graphs of SEL_ARG objects with + sel_arg->type==KEY_RANGE: + + One SEL_ARG object represents an "elementary interval" in form + + min_value <=? table.keypartX <=? max_value + + The interval is a non-empty interval of any kind: with[out] minimum/maximum + bound, [half]open/closed, single-point interval, etc. + + 1. SEL_ARG GRAPH STRUCTURE + + SEL_ARG objects are linked together in a graph. The meaning of the graph + is better demostrated by an example: + + tree->keys[i] + | + | $ $ + | part=1 $ part=2 $ part=3 + | $ $ + | +-------+ $ +-------+ $ +--------+ + | | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 | + | +-------+ $ +-------+ $ +--------+ + | | $ $ | + | | $ $ +--------+ + | | $ $ | kp3=12 | + | | $ $ +--------+ + | +-------+ $ $ + \->| kp1=2 |--$--------------$-+ + +-------+ $ $ | +--------+ + | $ $ ==>| kp3=11 | + +-------+ $ $ | +--------+ + | kp1=3 |--$--------------$-+ | + +-------+ $ $ +--------+ + | $ $ | kp3=14 | + ... $ $ +--------+ + + The entire graph is partitioned into "interval lists". + + An interval list is a sequence of ordered disjoint intervals over the same + key part. SEL_ARG are linked via "next" and "prev" pointers. Additionally, + all intervals in the list form an RB-tree, linked via left/right/parent + pointers. The RB-tree root SEL_ARG object will be further called "root of the + interval list". + + In the example pic, there are 4 interval lists: + "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13". + The vertical lines represent SEL_ARG::next/prev pointers. + + In an interval list, each member X may have SEL_ARG::next_key_part pointer + pointing to the root of another interval list Y. The pointed interval list + must cover a key part with greater number (i.e. Y->part > X->part). + + In the example pic, the next_key_part pointers are represented by + horisontal lines. + + 2. SEL_ARG GRAPH SEMANTICS + + It represents a condition in a special form (we don't have a name for it ATM) + The SEL_ARG::next/prev is "OR", and next_key_part is "AND". + + For example, the picture represents the condition in form: + (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR + (kp1=2 AND (kp3=11 OR kp3=14)) OR + (kp1=3 AND (kp3=11 OR kp3=14)) + + + 3. SEL_ARG GRAPH USE + + Use get_mm_tree() to construct SEL_ARG graph from WHERE condition. + Then walk the SEL_ARG graph and get a list of dijsoint ordered key + intervals (i.e. intervals in form + + (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K) + + Those intervals can be used to access the index. The uses are in: + - check_quick_select() - Walk the SEL_ARG graph and find an estimate of + how many table records are contained within all + intervals. + - get_quick_select() - Walk the SEL_ARG, materialize the key intervals, + and create QUICK_RANGE_SELECT object that will + read records within these intervals. + + 4. SPACE COMPLEXITY NOTES + + SEL_ARG graph is a representation of an ordered disjoint sequence of + intervals over the ordered set of index tuple values. + + For multi-part keys, one can construct a WHERE expression such that its + list of intervals will be of combinatorial size. Here is an example: + + (keypart1 IN (1,2, ..., n1)) AND + (keypart2 IN (1,2, ..., n2)) AND + (keypart3 IN (1,2, ..., n3)) + + For this WHERE clause the list of intervals will have n1*n2*n3 intervals + of form + + (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i} + + SEL_ARG graph structure aims to reduce the amount of required space by + "sharing" the elementary intervals when possible (the pic at the + beginning of this comment has examples of such sharing). The sharing may + prevent combinatorial blowup: + + There are WHERE clauses that have combinatorial-size interval lists but + will be represented by a compact SEL_ARG graph. + Example: + (keypartN IN (1,2, ..., n1)) AND + ... + (keypart2 IN (1,2, ..., n2)) AND + (keypart1 IN (1,2, ..., n3)) + + but not in all cases: + + - There are WHERE clauses that do have a compact SEL_ARG-graph + representation but get_mm_tree() and its callees will construct a + graph of combinatorial size. + Example: + (keypart1 IN (1,2, ..., n1)) AND + (keypart2 IN (1,2, ..., n2)) AND + ... + (keypartN IN (1,2, ..., n3)) + + - There are WHERE clauses for which the minimal possible SEL_ARG graph + representation will have combinatorial size. + Example: + By induction: Let's take any interval on some keypart in the middle: + + kp15=c0 + + Then let's AND it with this interval 'structure' from preceding and + following keyparts: + + (kp14=c1 AND kp16=c3) OR keypart14=c2) (*) + + We will obtain this SEL_ARG graph: + + kp14 $ kp15 $ kp16 + $ $ + +---------+ $ +---------+ $ +---------+ + | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 | + +---------+ $ +---------+ $ +---------+ + | $ $ + +---------+ $ +---------+ $ + | kp14=c2 |--$-->| kp15=c0 | $ + +---------+ $ +---------+ $ + $ $ + + Note that we had to duplicate "kp15=c0" and there was no way to avoid + that. + The induction step: AND the obtained expression with another "wrapping" + expression like (*). + When the process ends because of the limit on max. number of keyparts + we'll have: + + WHERE clause length is O(3*#max_keyparts) + SEL_ARG graph size is O(2^(#max_keyparts/2)) + + (it is also possible to construct a case where instead of 2 in 2^n we + have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31 + nodes) + + We avoid consuming too much memory by setting a limit on the number of + SEL_ARG object we can construct during one range analysis invocation. + + 5. SEL_ARG GRAPH WEIGHT + + A SEL_ARG graph has a property we call weight, and we define it as follows: + + + If the SEL_ARG graph does not have any node with multiple incoming + next_key_part edges, then its weight is the number of SEL_ARG objects used. + + If there is a node with multiple incoming next_key_part edges, clone that + node, (and the nodes connected to it via prev/next links) and redirect one + of the incoming next_key_part edges to the clone. + + Continue with cloning until we get a graph that has no nodes with multiple + incoming next_key_part edges. Then, the number of SEL_ARG objects in the + graph is the weight of the original graph. + + + Example: + + kp1 $ kp2 $ kp3 + $ $ + | +-------+ $ $ + \->| kp1=2 |--$--------------$-+ + +-------+ $ $ | +--------+ + | $ $ ==>| kp3=11 | + +-------+ $ $ | +--------+ + | kp1>3 |--$--------------$-+ | + +-------+ $ $ +--------+ + $ $ | kp3=14 | + $ $ +--------+ + $ $ | + $ $ +--------+ + $ $ | kp3=14 | + $ $ +--------+ + + Here, the weight is 2 + 2*3=8. + + The rationale behind using this definition of weight is: + - it has the same order-of-magnitude as the number of ranges that the + SEL_ARG graph is describing, + - it is a lot easier to compute than computing the number of ranges, + - it can be updated incrementally when performing AND/OR operations on + parts of the graph. + + 6. For handling DESC keyparts, See HowRangeOptimizerHandlesDescKeyparts +*/ + +class SEL_ARG :public Sql_alloc +{ + static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag, + uint8 b_flag); +public: + uint8 min_flag,max_flag,maybe_flag; + uint8 part; // Which key part + uint8 maybe_null; + /* + The ordinal number the least significant component encountered in + the ranges of the SEL_ARG tree (the first component has number 1) + + Note: this number is currently not precise, it is an upper bound. + @seealso SEL_ARG::get_max_key_part() + */ + uint16 max_part_no; + /* + Number of children of this element in the RB-tree, plus 1 for this + element itself. + */ + uint32 elements; + /* + Valid only for elements which are RB-tree roots: Number of times this + RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by + SEL_TREE::keys[i] or by a temporary SEL_ARG* variable) + */ + ulong use_count; + + Field *field; + uchar *min_value,*max_value; // Pointer to range + + /* + eq_tree() requires that left == right == 0 if the type is MAYBE_KEY. + */ + SEL_ARG *left,*right; /* R-B tree children */ + SEL_ARG *next,*prev; /* Links for bi-directional interval list */ + SEL_ARG *parent; /* R-B tree parent */ + SEL_ARG *next_key_part; + enum leaf_color { BLACK,RED } color; + enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type; + + /* + For R-B root nodes only: the graph weight, as defined above in the + SEL_ARG GRAPH WEIGHT section. + */ + uint weight; + enum { MAX_WEIGHT = 32000 }; + +#ifndef DBUG_OFF + uint verify_weight(); +#endif + + /* See RANGE_OPT_PARAM::alloced_sel_args */ + enum { DEFAULT_MAX_SEL_ARGS = 16000 }; + + SEL_ARG() = default; + SEL_ARG(SEL_ARG &); + SEL_ARG(Field *, const uchar *, const uchar *); + SEL_ARG(Field *field, uint8 part, + uchar *min_value, uchar *max_value, + uint8 min_flag, uint8 max_flag, uint8 maybe_flag); + + /* This is used to construct degenerate SEL_ARGS like ALWAYS, IMPOSSIBLE, etc */ + SEL_ARG(enum Type type_arg) + :min_flag(0), + max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/, + elements(1),use_count(1),left(0),right(0), + next_key_part(0), color(BLACK), type(type_arg), weight(1) + {} + /** + returns true if a range predicate is equal. Use all_same() + to check for equality of all the predicates on this keypart. + */ + inline bool is_same(const SEL_ARG *arg) const + { + if (type != arg->type || part != arg->part) + return false; + if (type != KEY_RANGE) + return true; + return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0; + } + + uint get_max_key_part() const; + + /** + returns true if all the predicates in the keypart tree are equal + */ + bool all_same(const SEL_ARG *arg) const + { + if (type != arg->type || part != arg->part) + return false; + if (type != KEY_RANGE) + return true; + if (arg == this) + return true; + const SEL_ARG *cmp_arg= arg->first(); + const SEL_ARG *cur_arg= first(); + for (; cur_arg && cmp_arg && cur_arg->is_same(cmp_arg); + cur_arg= cur_arg->next, cmp_arg= cmp_arg->next) ; + if (cur_arg || cmp_arg) + return false; + return true; + } + inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; } + inline void maybe_smaller() { maybe_flag=1; } + /* Return true iff it's a single-point null interval */ + inline bool is_null_interval() { return maybe_null && max_value[0] == 1; } + inline int cmp_min_to_min(const SEL_ARG* arg) const + { + return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag); + } + inline int cmp_min_to_max(const SEL_ARG* arg) const + { + return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag); + } + inline int cmp_max_to_max(const SEL_ARG* arg) const + { + return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag); + } + inline int cmp_max_to_min(const SEL_ARG* arg) const + { + return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag); + } + SEL_ARG *clone_and(THD *thd, SEL_ARG* arg) + { // Get overlapping range + uchar *new_min,*new_max; + uint8 flag_min,flag_max; + if (cmp_min_to_min(arg) >= 0) + { + new_min=min_value; flag_min=min_flag; + } + else + { + new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */ + } + if (cmp_max_to_max(arg) <= 0) + { + new_max=max_value; flag_max=max_flag; + } + else + { + new_max=arg->max_value; flag_max=arg->max_flag; + } + return new (thd->mem_root) SEL_ARG(field, part, + new_min, new_max, flag_min, + flag_max, + MY_TEST(maybe_flag && arg->maybe_flag)); + } + SEL_ARG *clone_first(SEL_ARG *arg) + { // min <= X < arg->min + return new SEL_ARG(field, part, min_value, arg->min_value, + min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX, + maybe_flag | arg->maybe_flag); + } + SEL_ARG *clone_last(SEL_ARG *arg) + { // min <= X <= key_max + return new SEL_ARG(field, part, min_value, arg->max_value, + min_flag, arg->max_flag, maybe_flag | arg->maybe_flag); + } + SEL_ARG *clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, SEL_ARG **next); + + bool copy_min(SEL_ARG* arg) + { // Get overlapping range + if (cmp_min_to_min(arg) > 0) + { + min_value=arg->min_value; min_flag=arg->min_flag; + if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) == + (NO_MAX_RANGE | NO_MIN_RANGE)) + return 1; // Full range + } + maybe_flag|=arg->maybe_flag; + return 0; + } + bool copy_max(SEL_ARG* arg) + { // Get overlapping range + if (cmp_max_to_max(arg) <= 0) + { + max_value=arg->max_value; max_flag=arg->max_flag; + if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) == + (NO_MAX_RANGE | NO_MIN_RANGE)) + return 1; // Full range + } + maybe_flag|=arg->maybe_flag; + return 0; + } + + void copy_min_to_min(SEL_ARG *arg) + { + min_value=arg->min_value; min_flag=arg->min_flag; + } + void copy_min_to_max(SEL_ARG *arg) + { + max_value=arg->min_value; + max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX; + } + void copy_max_to_min(SEL_ARG *arg) + { + min_value=arg->max_value; + min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN; + } + /* returns a number of keypart values (0 or 1) appended to the key buffer */ + int store_min(uint length, uchar **min_key,uint min_key_flag) + { + /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */ + if ((min_flag & GEOM_FLAG) || + (!(min_flag & NO_MIN_RANGE) && + !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN)))) + { + if (maybe_null && *min_value) + { + **min_key=1; + bzero(*min_key+1,length-1); + } + else + memcpy(*min_key,min_value,length); + (*min_key)+= length; + return 1; + } + return 0; + } + /* returns a number of keypart values (0 or 1) appended to the key buffer */ + int store_max(uint length, uchar **max_key, uint max_key_flag) + { + if (!(max_flag & NO_MAX_RANGE) && + !(max_key_flag & (NO_MAX_RANGE | NEAR_MAX))) + { + if (maybe_null && *max_value) + { + **max_key=1; + bzero(*max_key+1,length-1); + } + else + memcpy(*max_key,max_value,length); + (*max_key)+= length; + return 1; + } + return 0; + } + + /* Save minimum and maximum, taking index order into account */ + void store_min_max(KEY_PART *kp, + uint length, + uchar **min_key, uint min_flag, + uchar **max_key, uint max_flag, + int *min_part, int *max_part) + { + if (kp[part].flag & HA_REVERSE_SORT) { + *max_part += store_min(length, max_key, min_flag); + *min_part += store_max(length, min_key, max_flag); + } else { + *min_part += store_min(length, min_key, min_flag); + *max_part += store_max(length, max_key, max_flag); + } + } + /* + Get the flag for range's starting endpoint, taking index order into + account. + */ + uint get_min_flag(KEY_PART *kp) + { + return (kp[part].flag & HA_REVERSE_SORT)? invert_max_flag(max_flag) : min_flag; + } + /* + Get the flag for range's starting endpoint, taking index order into + account. + */ + uint get_max_flag(KEY_PART *kp) + { + return (kp[part].flag & HA_REVERSE_SORT)? invert_min_flag(min_flag) : max_flag ; + } + /* Get the previous interval, taking index order into account */ + inline SEL_ARG* index_order_prev(KEY_PART *kp) + { + return (kp[part].flag & HA_REVERSE_SORT)? next : prev; + } + /* Get the next interval, taking index order into account */ + inline SEL_ARG* index_order_next(KEY_PART *kp) + { + return (kp[part].flag & HA_REVERSE_SORT)? prev : next; + } + + /* + Produce a single multi-part interval, taking key part ordering into + account. + */ + void store_next_min_max_keys(KEY_PART *key, uchar **cur_min_key, + uint *cur_min_flag, uchar **cur_max_key, + uint *cur_max_flag, int *min_part, + int *max_part); + + /* + Returns a number of keypart values appended to the key buffer + for min key and max key. This function is used by both Range + Analysis and Partition pruning. For partition pruning we have + to ensure that we don't store also subpartition fields. Thus + we have to stop at the last partition part and not step into + the subpartition fields. For Range Analysis we set last_part + to MAX_KEY which we should never reach. + */ + int store_min_key(KEY_PART *key, + uchar **range_key, + uint *range_key_flag, + uint last_part, + bool start_key) + { + SEL_ARG *key_tree= first(); + uint res= key_tree->store_min(key[key_tree->part].store_length, + range_key, *range_key_flag); + // add flags only if a key_part is written to the buffer + if (!res) + return 0; + *range_key_flag|= key_tree->min_flag; + SEL_ARG *nkp= key_tree->next_key_part; + if (nkp && nkp->type == SEL_ARG::KEY_RANGE && + key_tree->part != last_part && + nkp->part == key_tree->part+1 && + !(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN))) + { + const bool asc = !(key[key_tree->part].flag & HA_REVERSE_SORT); + if (start_key == asc) + { + res+= nkp->store_min_key(key, range_key, range_key_flag, last_part, + start_key); + } + else + { + uint tmp_flag = invert_min_flag(*range_key_flag); + res += nkp->store_max_key(key, range_key, &tmp_flag, last_part, + start_key); + *range_key_flag = invert_max_flag(tmp_flag); + } + } + return res; + } + + /* returns a number of keypart values appended to the key buffer */ + int store_max_key(KEY_PART *key, + uchar **range_key, + uint *range_key_flag, + uint last_part, + bool start_key) + { + SEL_ARG *key_tree= last(); + uint res=key_tree->store_max(key[key_tree->part].store_length, + range_key, *range_key_flag); + if (!res) + return 0; + *range_key_flag|= key_tree->max_flag; + SEL_ARG *nkp= key_tree->next_key_part; + if (nkp && nkp->type == SEL_ARG::KEY_RANGE && + key_tree->part != last_part && + nkp->part == key_tree->part+1 && + !(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX))) + { + const bool asc = !(key[key_tree->part].flag & HA_REVERSE_SORT); + if ((!start_key && asc) || (start_key && !asc)) + { + res += nkp->store_max_key(key, range_key, range_key_flag, last_part, + start_key); + } + else + { + uint tmp_flag = invert_max_flag(*range_key_flag); + res += nkp->store_min_key(key, range_key, &tmp_flag, last_part, + start_key); + *range_key_flag = invert_min_flag(tmp_flag); + } + } + return res; + } + + SEL_ARG *insert(SEL_ARG *key); + SEL_ARG *tree_delete(SEL_ARG *key); + SEL_ARG *find_range(SEL_ARG *key); + SEL_ARG *rb_insert(SEL_ARG *leaf); + friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par); +#ifdef EXTRA_DEBUG + friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent); + void test_use_count(SEL_ARG *root); +#endif + SEL_ARG *first(); + const SEL_ARG *first() const; + SEL_ARG *last(); + void make_root(); + inline bool simple_key() + { + return !next_key_part && elements == 1; + } + void increment_use_count(long count) + { + if (next_key_part) + { + next_key_part->use_count+=count; + count*= (next_key_part->use_count-count); + for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next) + if (pos->next_key_part) + pos->increment_use_count(count); + } + } + void incr_refs() + { + increment_use_count(1); + use_count++; + } + void incr_refs_all() + { + for (SEL_ARG *pos=first(); pos ; pos=pos->next) + { + pos->increment_use_count(1); + } + use_count++; + } + void free_tree() + { + for (SEL_ARG *pos=first(); pos ; pos=pos->next) + if (pos->next_key_part) + { + pos->next_key_part->use_count--; + pos->next_key_part->free_tree(); + } + } + + inline SEL_ARG **parent_ptr() + { + return parent->left == this ? &parent->left : &parent->right; + } + + + /* + Check if this SEL_ARG object represents a single-point interval + + SYNOPSIS + is_singlepoint() + + DESCRIPTION + Check if this SEL_ARG object (not tree) represents a single-point + interval, i.e. if it represents a "keypart = const" or + "keypart IS NULL". + + RETURN + TRUE This SEL_ARG object represents a singlepoint interval + FALSE Otherwise + */ + + bool is_singlepoint() const + { + /* + Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field) + flags, and the same for right edge. + */ + if (min_flag || max_flag) + return FALSE; + uchar *min_val= min_value; + uchar *max_val= max_value; + + if (maybe_null) + { + /* First byte is a NULL value indicator */ + if (*min_val != *max_val) + return FALSE; + + if (*min_val) + return TRUE; /* This "x IS NULL" */ + min_val++; + max_val++; + } + return !field->key_cmp(min_val, max_val); + } + SEL_ARG *clone_tree(RANGE_OPT_PARAM *param); +}; + +/* + HowRangeOptimizerHandlesDescKeyparts + ==================================== + + Starting with MySQL-8.0 and MariaDB 10.8, index key parts may be descending, + for example: + + INDEX idx1(col1, col2 DESC, col3, col4 DESC) + + Range Optimizer handles this as follows: + + Other than that, the SEL_ARG graph is built without any regard to DESC + keyparts. + + For example, for an index + + INDEX idx2(kp1 DESC, kp2) + + and range + + kp1 BETWEEN 10 and 20 (RANGE-1) + + the SEL_ARG will have min_value=10, max_value=20 + + The ordering of key parts is taken into account when SEL_ARG graph is + linearized to ranges, in sel_arg_range_seq_next() and get_quick_keys(). + + The storage engine expects the first bound to be the first in the index and + the last bound to be the last, that is, for (RANGE-1) we will flip min and + max and generate these key_range structures: + + start.key='20' , end.key='10' + + See SEL_ARG::store_min_max(). The flag values are flipped as well, see + SEL_ARG::get_min_flag(), get_max_flag(). + + == Handling multiple key parts == + + For multi-part keys, the order of key parts has an effect on which ranges are + generated. Consider + + kp1 >= 10 AND kp2 >'foo' + + for INDEX(kp1 ASC, kp2 ASC) the range will be + + (kp1, kp2) > (10, 'foo') + + while for INDEX(kp1 ASC, kp2 DESC) it will be just + + kp1 >= 10 + + Another example: + + (kp1 BETWEEN 10 AND 20) AND (kp2 BETWEEN 'foo' AND 'quux') + + with INDEX (kp1 ASC, kp2 ASC) will generate + + (10, 'foo') <= (kp1, kp2) < (20, 'quux') + + while with index INDEX (kp1 ASC, kp2 DESC) it will generate + + (10, 'quux') <= (kp1, kp2) < (20, 'foo') + + This is again achieved by sel_arg_range_seq_next() and get_quick_keys() + flipping SEL_ARG's min,max, their flags and next/prev as needed. +*/ + +extern MYSQL_PLUGIN_IMPORT SEL_ARG null_element; + +class SEL_ARG_IMPOSSIBLE: public SEL_ARG +{ +public: + SEL_ARG_IMPOSSIBLE(Field *field) + :SEL_ARG(field, 0, 0) + { + type= SEL_ARG::IMPOSSIBLE; + } +}; + + +class RANGE_OPT_PARAM +{ +public: + THD *thd; /* Current thread handle */ + TABLE *table; /* Table being analyzed */ + table_map prev_tables; + table_map read_tables; + table_map current_table; /* Bit of the table being analyzed */ + + /* Array of parts of all keys for which range analysis is performed */ + KEY_PART *key_parts; + KEY_PART *key_parts_end; + MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */ + MEM_ROOT *old_root; /* Memory that will last until the query end */ + /* + Number of indexes used in range analysis (In SEL_TREE::keys only first + #keys elements are not empty) + */ + uint keys; + + /* + If true, the index descriptions describe real indexes (and it is ok to + call field->optimize_range(real_keynr[...], ...). + Otherwise index description describes fake indexes. + */ + bool using_real_indexes; + + /* + Aggressively remove "scans" that do not have conditions on first + keyparts. Such scans are usable when doing partition pruning but not + regular range optimization. + */ + bool remove_jump_scans; + + /* + TRUE <=> Range analyzer should remove parts of condition that are found + to be always FALSE. + */ + bool remove_false_where_parts; + + bool note_unusable_keys; // Give SQL notes for unusable keys + + /* + used_key_no -> table_key_no translation table. Only makes sense if + using_real_indexes==TRUE + */ + uint real_keynr[MAX_KEY]; + + /* + Used to store 'current key tuples', in both range analysis and + partitioning (list) analysis + */ + uchar *min_key; + uchar *max_key; + + /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */ + uint alloced_sel_args; + + bool force_default_mrr; + KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ + + bool statement_should_be_aborted() const + { + return + thd->killed || + thd->is_fatal_error || + thd->is_error() || + alloced_sel_args > thd->variables.optimizer_max_sel_args; + } +}; + + +class Explain_quick_select; +/* + A "MIN_TUPLE < tbl.key_tuple < MAX_TUPLE" interval. + + One of endpoints may be absent. 'flags' member has flags which tell whether + the endpoints are '<' or '<='. +*/ +class QUICK_RANGE :public Sql_alloc { + public: + uchar *min_key,*max_key; + uint16 min_length,max_length,flag; + key_part_map min_keypart_map, // bitmap of used keyparts in min_key + max_keypart_map; // bitmap of used keyparts in max_key +#ifdef HAVE_valgrind + uint16 dummy; /* Avoid warnings on 'flag' */ +#endif + QUICK_RANGE(); /* Full range */ + QUICK_RANGE(THD *thd, const uchar *min_key_arg, uint min_length_arg, + key_part_map min_keypart_map_arg, + const uchar *max_key_arg, uint max_length_arg, + key_part_map max_keypart_map_arg, + uint flag_arg) + : min_key((uchar*) thd->memdup(min_key_arg, min_length_arg + 1)), + max_key((uchar*) thd->memdup(max_key_arg, max_length_arg + 1)), + min_length((uint16) min_length_arg), + max_length((uint16) max_length_arg), + flag((uint16) flag_arg), + min_keypart_map(min_keypart_map_arg), + max_keypart_map(max_keypart_map_arg) + { +#ifdef HAVE_valgrind + dummy=0; +#endif + } + + /** + Initializes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the minimum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + + @param prefix_length The length of the search key prefix to be used for + lookup. + + @param keypart_map A set (bitmap) of keyparts to be used. + */ + void make_min_endpoint(key_range *kr, uint prefix_length, + key_part_map keypart_map) { + make_min_endpoint(kr); + kr->length= MY_MIN(kr->length, prefix_length); + kr->keypart_map&= keypart_map; + } + + /** + Initializes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the minimum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + */ + void make_min_endpoint(key_range *kr) { + kr->key= (const uchar*)min_key; + kr->length= min_length; + kr->keypart_map= min_keypart_map; + kr->flag= ((flag & NEAR_MIN) ? HA_READ_AFTER_KEY : + (flag & EQ_RANGE) ? HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); + } + + /** + Initializes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the maximum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + + @param prefix_length The length of the search key prefix to be used for + lookup. + + @param keypart_map A set (bitmap) of keyparts to be used. + */ + void make_max_endpoint(key_range *kr, uint prefix_length, + key_part_map keypart_map) { + make_max_endpoint(kr); + kr->length= MY_MIN(kr->length, prefix_length); + kr->keypart_map&= keypart_map; + } + + /** + Initializes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the maximum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + */ + void make_max_endpoint(key_range *kr) { + kr->key= (const uchar*)max_key; + kr->length= max_length; + kr->keypart_map= max_keypart_map; + /* + We use READ_AFTER_KEY here because if we are reading on a key + prefix we want to find all keys with this prefix + */ + kr->flag= (flag & NEAR_MAX ? HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); + } +}; + + +/* + Quick select interface. + This class is a parent for all QUICK_*_SELECT and FT_SELECT classes. + + The usage scenario is as follows: + 1. Create quick select + quick= new QUICK_XXX_SELECT(...); + + 2. Perform lightweight initialization. This can be done in 2 ways: + 2.a: Regular initialization + if (quick->init()) + { + //the only valid action after failed init() call is delete + delete quick; + } + 2.b: Special initialization for quick selects merged by QUICK_ROR_*_SELECT + if (quick->init_ror_merged_scan()) + delete quick; + + 3. Perform zero, one, or more scans. + while (...) + { + // initialize quick select for scan. This may allocate + // buffers and/or prefetch rows. + if (quick->reset()) + { + //the only valid action after failed reset() call is delete + delete quick; + //abort query + } + + // perform the scan + do + { + res= quick->get_next(); + } while (res && ...) + } + + 4. Delete the select: + delete quick; + + NOTE + quick select doesn't use Sql_alloc/MEM_ROOT allocation because "range + checked for each record" functionality may create/destroy + O(#records_in_some_table) quick selects during query execution. +*/ + +class QUICK_SELECT_I +{ +public: + ha_rows records; /* estimate of # of records to be retrieved */ + double read_time; /* time to perform this retrieval */ + TABLE *head; + /* + Index this quick select uses, or MAX_KEY for quick selects + that use several indexes + */ + uint index; + + /* + Total length of first used_key_parts parts of the key. + Applicable if index!= MAX_KEY. + */ + uint max_used_key_length; + + /* + Max. number of (first) key parts this quick select uses for retrieval. + eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2. + Applicable if index!= MAX_KEY. + + For QUICK_GROUP_MIN_MAX_SELECT it includes MIN/MAX argument keyparts. + */ + uint used_key_parts; + + QUICK_SELECT_I(); + virtual ~QUICK_SELECT_I() = default;; + + /* + Do post-constructor initialization. + SYNOPSIS + init() + + init() performs initializations that should have been in constructor if + it was possible to return errors from constructors. The join optimizer may + create and then delete quick selects without retrieving any rows so init() + must not contain any IO or CPU intensive code. + + If init() call fails the only valid action is to delete this quick select, + reset() and get_next() must not be called. + + RETURN + 0 OK + other Error code + */ + virtual int init() = 0; + + /* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + + reset() should be called when it is certain that row retrieval will be + necessary. This call may do heavyweight initialization like buffering first + N records etc. If reset() call fails get_next() must not be called. + Note that reset() may be called several times if + * the quick select is executed in a subselect + * a JOIN buffer is used + + RETURN + 0 OK + other Error code + */ + virtual int reset(void) = 0; + + virtual int get_next() = 0; /* get next record to retrieve */ + + /* Range end should be called when we have looped over the whole index */ + virtual void range_end() {} + + virtual bool reverse_sorted() = 0; + virtual bool unique_key_range() { return false; } + + /* + Request that this quick select produces sorted output. Not all quick + selects can do it, the caller is responsible for calling this function + only for those quick selects that can. + */ + virtual void need_sorted_output() = 0; + enum { + QS_TYPE_RANGE = 0, + QS_TYPE_INDEX_INTERSECT = 1, + QS_TYPE_INDEX_MERGE = 2, + QS_TYPE_RANGE_DESC = 3, + QS_TYPE_FULLTEXT = 4, + QS_TYPE_ROR_INTERSECT = 5, + QS_TYPE_ROR_UNION = 6, + QS_TYPE_GROUP_MIN_MAX = 7 + }; + + /* Get type of this quick select - one of the QS_TYPE_* values */ + virtual int get_type() = 0; + + /* + Initialize this quick select as a merged scan inside a ROR-union or a ROR- + intersection scan. The caller must not additionally call init() if this + function is called. + SYNOPSIS + init_ror_merged_scan() + reuse_handler If true, the quick select may use table->handler, + otherwise it must create and use a separate handler + object. + RETURN + 0 Ok + other Error + */ + virtual int init_ror_merged_scan(bool reuse_handler, MEM_ROOT *alloc) + { DBUG_ASSERT(0); return 1; } + + /* + Save ROWID of last retrieved row in file->ref. This used in ROR-merging. + */ + virtual void save_last_pos(){}; + + void add_key_and_length(String *key_names, + String *used_lengths, + bool *first); + + /* + Append comma-separated list of keys this quick select uses to key_names; + append comma-separated list of corresponding used lengths to used_lengths. + This is used by select_describe. + */ + virtual void add_keys_and_lengths(String *key_names, + String *used_lengths)=0; + + void add_key_name(String *str, bool *first); + + /* Save information about quick select's query plan */ + virtual Explain_quick_select* get_explain(MEM_ROOT *alloc)= 0; + + /* + Return 1 if any index used by this quick select + uses field which is marked in passed bitmap. + */ + virtual bool is_keys_used(const MY_BITMAP *fields); + + /** + Simple sanity check that the quick select has been set up + correctly. Function is overridden by quick selects that merge + indices. + */ + virtual bool is_valid() { return index != MAX_KEY; }; + + /* + rowid of last row retrieved by this quick select. This is used only when + doing ROR-index_merge selects + */ + uchar *last_rowid; + + /* + Table record buffer used by this quick select. + */ + uchar *record; + + virtual void replace_handler(handler *new_file) + { + DBUG_ASSERT(0); /* Only supported in QUICK_RANGE_SELECT */ + } + +#ifndef DBUG_OFF + /* + Print quick select information to DBUG_FILE. Caller is responsible + for locking DBUG_FILE before this call and unlocking it afterwards. + */ + virtual void dbug_dump(int indent, bool verbose)= 0; +#endif + + /* + Returns a QUICK_SELECT with reverse order of to the index. + */ + virtual QUICK_SELECT_I *make_reverse(uint used_key_parts_arg) { return NULL; } + + /* + Add the key columns used by the quick select into table's read set. + + This is used by an optimization in filesort. + */ + virtual void add_used_key_part_to_set()=0; +}; + + +struct st_qsel_param; +class PARAM; + + +/* + MRR range sequence, array implementation: sequence traversal + context. +*/ +typedef struct st_quick_range_seq_ctx +{ + QUICK_RANGE **first; + QUICK_RANGE **cur; + QUICK_RANGE **last; +} QUICK_RANGE_SEQ_CTX; + +range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags); +bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range); + + +/* + Quick select that does a range scan on a single key. The records are + returned in key order. +*/ +class QUICK_RANGE_SELECT : public QUICK_SELECT_I +{ +protected: + THD *thd; + bool no_alloc; + MEM_ROOT *parent_alloc; + + /* true if we enabled key only reads */ + handler *file; + + /* Members to deal with case when this quick select is a ROR-merged scan */ + bool in_ror_merged_scan; + MY_BITMAP column_bitmap; + bool free_file; /* TRUE <=> this->file is "owned" by this quick select */ + + /* Range pointers to be used when not using MRR interface */ + /* Members needed to use the MRR interface */ + QUICK_RANGE_SEQ_CTX qr_traversal_ctx; +public: + uint mrr_flags; /* Flags to be used with MRR interface */ +protected: + uint mrr_buf_size; /* copy from thd->variables.mrr_buff_size */ + HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */ + + /* Info about index we're scanning */ + + DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */ + QUICK_RANGE **cur_range; /* current element in ranges */ + + QUICK_RANGE *last_range; + + KEY_PART *key_parts; + KEY_PART_INFO *key_part_info; + + bool dont_free; /* Used by QUICK_SELECT_DESC */ + + int cmp_next(QUICK_RANGE *range); + int cmp_prev(QUICK_RANGE *range); + bool row_in_ranges(); +public: + MEM_ROOT alloc; + + QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc, + MEM_ROOT *parent_alloc, bool *create_err); + ~QUICK_RANGE_SELECT(); + virtual QUICK_RANGE_SELECT *clone(bool *create_error) + { return new QUICK_RANGE_SELECT(thd, head, index, no_alloc, parent_alloc, + create_error); } + + void need_sorted_output(); + int init(); + int reset(void); + int get_next(); + void range_end(); + int get_next_prefix(uint prefix_length, uint group_key_parts, + uchar *cur_prefix); + bool reverse_sorted() { return 0; } + bool unique_key_range(); + int init_ror_merged_scan(bool reuse_handler, MEM_ROOT *alloc); + void save_last_pos() + { file->position(record); } + int get_type() { return QS_TYPE_RANGE; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + Explain_quick_select *get_explain(MEM_ROOT *alloc); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + virtual void replace_handler(handler *new_file) { file= new_file; } + QUICK_SELECT_I *make_reverse(uint used_key_parts_arg); + + virtual void add_used_key_part_to_set(); + +private: + /* Default copy ctor used by QUICK_SELECT_DESC */ + friend class TRP_ROR_INTERSECT; + friend + QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, + struct st_table_ref *ref, + ha_rows records); + friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick, + KEY_PART *key, SEL_ARG *key_tree, + uchar *min_key, uint min_key_flag, + uchar *max_key, uint max_key_flag); + friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx, + SEL_ARG *key_tree, + uint mrr_flags, + uint mrr_buf_size, + MEM_ROOT *alloc); + friend class QUICK_SELECT_DESC; + friend class QUICK_INDEX_SORT_SELECT; + friend class QUICK_INDEX_MERGE_SELECT; + friend class QUICK_ROR_INTERSECT_SELECT; + friend class QUICK_INDEX_INTERSECT_SELECT; + friend class QUICK_GROUP_MIN_MAX_SELECT; + friend bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range); + friend range_seq_t quick_range_seq_init(void *init_param, + uint n_ranges, uint flags); + friend + int read_keys_and_merge_scans(THD *thd, TABLE *head, + List quick_selects, + QUICK_RANGE_SELECT *pk_quick_select, + READ_RECORD *read_record, + bool intersection, + key_map *filtered_scans, + Unique **unique_ptr); + +}; + + +class QUICK_RANGE_SELECT_GEOM: public QUICK_RANGE_SELECT +{ +public: + QUICK_RANGE_SELECT_GEOM(THD *thd, TABLE *table, uint index_arg, + bool no_alloc, MEM_ROOT *parent_alloc, + bool *create_err) + :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc, + create_err) + {}; + virtual QUICK_RANGE_SELECT *clone(bool *create_error) + { + DBUG_ASSERT(0); + return new QUICK_RANGE_SELECT_GEOM(thd, head, index, no_alloc, + parent_alloc, create_error); + } + virtual int get_next(); +}; + + +/* + QUICK_INDEX_SORT_SELECT is the base class for the common functionality of: + - QUICK_INDEX_MERGE_SELECT, access based on multi-index merge/union + - QUICK_INDEX_INTERSECT_SELECT, access based on multi-index intersection + + + QUICK_INDEX_SORT_SELECT uses + * QUICK_RANGE_SELECTs to get rows + * Unique class + - to remove duplicate rows for QUICK_INDEX_MERGE_SELECT + - to intersect rows for QUICK_INDEX_INTERSECT_SELECT + + INDEX MERGE OPTIMIZER + Current implementation doesn't detect all cases where index merge could + be used, in particular: + + * index_merge+'using index' is not supported + + * If WHERE part contains complex nested AND and OR conditions, some ways + to retrieve rows using index merge will not be considered. The choice + of read plan may depend on the order of conjuncts/disjuncts in WHERE + part of the query, see comments near imerge_list_or_list and + SEL_IMERGE::or_sel_tree_with_checks functions for details. + + * There is no "index_merge_ref" method (but index merge on non-first + table in join is possible with 'range checked for each record'). + + + ROW RETRIEVAL ALGORITHM + + index merge/intersection uses Unique class for duplicates removal. + index merge/intersection takes advantage of Clustered Primary Key (CPK) + if the table has one. + The index merge/intersection algorithm consists of two phases: + + Phase 1 + (implemented by a QUICK_INDEX_MERGE_SELECT::read_keys_and_merge call): + + prepare() + { + activate 'index only'; + while(retrieve next row for non-CPK scan) + { + if (there is a CPK scan and row will be retrieved by it) + skip this row; + else + put its rowid into Unique; + } + deactivate 'index only'; + } + + Phase 2 + (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls): + + fetch() + { + retrieve all rows from row pointers stored in Unique + (merging/intersecting them); + free Unique; + if (! intersection) + retrieve all rows for CPK scan; + } +*/ + +class QUICK_INDEX_SORT_SELECT : public QUICK_SELECT_I +{ +protected: + Unique *unique; +public: + QUICK_INDEX_SORT_SELECT(THD *thd, TABLE *table); + ~QUICK_INDEX_SORT_SELECT(); + + int init(); + void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } + int reset(void); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + bool is_keys_used(const MY_BITMAP *fields); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + Explain_quick_select *get_explain(MEM_ROOT *alloc); + + bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); + + /* range quick selects this index merge/intersect consists of */ + List quick_selects; + + /* quick select that uses clustered primary key (NULL if none) */ + QUICK_RANGE_SELECT* pk_quick_select; + + MEM_ROOT alloc; + THD *thd; + virtual bool is_valid() + { + List_iterator_fast it(quick_selects); + QUICK_RANGE_SELECT *quick; + bool valid= true; + while ((quick= it++)) + { + if (!quick->is_valid()) + { + valid= false; + break; + } + } + return valid; + } + virtual int read_keys_and_merge()= 0; + /* used to get rows collected in Unique */ + READ_RECORD read_record; + + virtual void add_used_key_part_to_set(); +}; + + + +class QUICK_INDEX_MERGE_SELECT : public QUICK_INDEX_SORT_SELECT +{ +private: + /* true if this select is currently doing a clustered PK scan */ + bool doing_pk_scan; +protected: + int read_keys_and_merge(); + +public: + QUICK_INDEX_MERGE_SELECT(THD *thd_arg, TABLE *table) + :QUICK_INDEX_SORT_SELECT(thd_arg, table) {} + + int get_next(); + int get_type() { return QS_TYPE_INDEX_MERGE; } + void add_keys_and_lengths(String *key_names, String *used_lengths); +}; + +class QUICK_INDEX_INTERSECT_SELECT : public QUICK_INDEX_SORT_SELECT +{ +protected: + int read_keys_and_merge(); + +public: + QUICK_INDEX_INTERSECT_SELECT(THD *thd_arg, TABLE *table) + :QUICK_INDEX_SORT_SELECT(thd_arg, table) {} + + key_map filtered_scans; + int get_next(); + int get_type() { return QS_TYPE_INDEX_INTERSECT; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + Explain_quick_select *get_explain(MEM_ROOT *alloc); +}; + + +/* + Rowid-Ordered Retrieval (ROR) index intersection quick select. + This quick select produces intersection of row sequences returned + by several QUICK_RANGE_SELECTs it "merges". + + All merged QUICK_RANGE_SELECTs must return rowids in rowid order. + QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too. + + All merged quick selects retrieve {rowid, covered_fields} tuples (not full + table records). + QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used + by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't + cover needed all fields. + + If one of the merged quick selects is a Clustered PK range scan, it is + used only to filter rowid sequence produced by other merged quick selects. +*/ + +class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + ~QUICK_ROR_INTERSECT_SELECT(); + + int init(); + void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_INTERSECT; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + Explain_quick_select *get_explain(MEM_ROOT *alloc); + bool is_keys_used(const MY_BITMAP *fields); + void add_used_key_part_to_set(); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + int init_ror_merged_scan(bool reuse_handler, MEM_ROOT *alloc); + bool push_quick_back(MEM_ROOT *alloc, QUICK_RANGE_SELECT *quick_sel_range); + + class QUICK_SELECT_WITH_RECORD : public Sql_alloc + { + public: + QUICK_RANGE_SELECT *quick; + uchar *key_tuple; + ~QUICK_SELECT_WITH_RECORD() { delete quick; } + }; + + /* + Range quick selects this intersection consists of, not including + cpk_quick. + */ + List quick_selects; + + virtual bool is_valid() + { + List_iterator_fast it(quick_selects); + QUICK_SELECT_WITH_RECORD *quick; + bool valid= true; + while ((quick= it++)) + { + if (!quick->quick->is_valid()) + { + valid= false; + break; + } + } + return valid; + } + + /* + Merged quick select that uses Clustered PK, if there is one. This quick + select is not used for row retrieval, it is used for row retrieval. + */ + QUICK_RANGE_SELECT *cpk_quick; + + MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ + THD *thd; /* current thread */ + bool need_to_fetch_row; /* if true, do retrieve full table records. */ + /* in top-level quick select, true if merged scans where initialized */ + bool scans_inited; +}; + + +/* + Rowid-Ordered Retrieval index union select. + This quick select produces union of row sequences returned by several + quick select it "merges". + + All merged quick selects must return rowids in rowid order. + QUICK_ROR_UNION_SELECT will return rows in rowid order, too. + + All merged quick selects are set not to retrieve full table records. + ROR-union quick select always retrieves full records. + +*/ + +class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table); + ~QUICK_ROR_UNION_SELECT(); + + int init(); + void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_UNION; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + Explain_quick_select *get_explain(MEM_ROOT *alloc); + bool is_keys_used(const MY_BITMAP *fields); + void add_used_key_part_to_set(); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + + bool push_quick_back(QUICK_SELECT_I *quick_sel_range); + + List quick_selects; /* Merged quick selects */ + + virtual bool is_valid() + { + List_iterator_fast it(quick_selects); + QUICK_SELECT_I *quick; + bool valid= true; + while ((quick= it++)) + { + if (!quick->is_valid()) + { + valid= false; + break; + } + } + return valid; + } + + QUEUE queue; /* Priority queue for merge operation */ + MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ + + THD *thd; /* current thread */ + uchar *cur_rowid; /* buffer used in get_next() */ + uchar *prev_rowid; /* rowid of last row returned by get_next() */ + bool have_prev_rowid; /* true if prev_rowid has valid data */ + uint rowid_length; /* table rowid length */ +private: + bool scans_inited; +}; + + +/* + Index scan for GROUP-BY queries with MIN/MAX aggregate functions. + + This class provides a specialized index access method for GROUP-BY queries + of the forms: + + SELECT A_1,...,A_k, [B_1,...,B_m], [MIN(C)], [MAX(C)] + FROM T + WHERE [RNG(A_1,...,A_p ; where p <= k)] + [AND EQ(B_1,...,B_m)] + [AND PC(C)] + [AND PA(A_i1,...,A_iq)] + GROUP BY A_1,...,A_k; + + or + + SELECT DISTINCT A_i1,...,A_ik + FROM T + WHERE [RNG(A_1,...,A_p ; where p <= k)] + [AND PA(A_i1,...,A_iq)]; + + where all selected fields are parts of the same index. + The class of queries that can be processed by this quick select is fully + specified in the description of get_best_trp_group_min_max() in opt_range.cc. + + The get_next() method directly produces result tuples, thus obviating the + need to call end_send_group() because all grouping is already done inside + get_next(). + + Since one of the requirements is that all select fields are part of the same + index, this class produces only index keys, and not complete records. +*/ + +class QUICK_GROUP_MIN_MAX_SELECT : public QUICK_SELECT_I +{ +private: + handler * const file; /* The handler used to get data. */ + JOIN *join; /* Descriptor of the current query */ + KEY *index_info; /* The index chosen for data access */ + uchar *record; /* Buffer where the next record is returned. */ + uchar *tmp_record; /* Temporary storage for next_min(), next_max(). */ + uchar *group_prefix; /* Key prefix consisting of the GROUP fields. */ + const uint group_prefix_len; /* Length of the group prefix. */ + uint group_key_parts; /* A number of keyparts in the group prefix */ + uchar *last_prefix; /* Prefix of the last group for detecting EOF. */ + bool have_min; /* Specify whether we are computing */ + bool have_max; /* a MIN, a MAX, or both. */ + bool have_agg_distinct;/* aggregate_function(DISTINCT ...). */ + bool seen_first_key; /* Denotes whether the first key was retrieved.*/ + bool doing_key_read; /* true if we enabled key only reads */ + + KEY_PART_INFO *min_max_arg_part; /* The keypart of the only argument field */ + /* of all MIN/MAX functions. */ + uint min_max_arg_len; /* The length of the MIN/MAX argument field */ + uchar *key_infix; /* Infix of constants from equality predicates. */ + uint key_infix_len; + DYNAMIC_ARRAY min_max_ranges; /* Array of range ptrs for the MIN/MAX field. */ + uint real_prefix_len; /* Length of key prefix extended with key_infix. */ + uint real_key_parts; /* A number of keyparts in the above value. */ + List *min_functions; + List *max_functions; + List_iterator *min_functions_it; + List_iterator *max_functions_it; + /* + Use index scan to get the next different key instead of jumping into it + through index read + */ + bool is_index_scan; +public: + /* + The following two members are public to allow easy access from + TRP_GROUP_MIN_MAX::make_quick() + */ + MEM_ROOT alloc; /* Memory pool for this and quick_prefix_select data. */ + QUICK_RANGE_SELECT *quick_prefix_select;/* For retrieval of group prefixes. */ +private: + int next_prefix(); + int next_min_in_range(); + int next_max_in_range(); + int next_min(); + int next_max(); + void update_min_result(); + void update_max_result(); + int cmp_min_max_key(const uchar *key, uint16 length); +public: + QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join, bool have_min, + bool have_max, bool have_agg_distinct, + KEY_PART_INFO *min_max_arg_part, + uint group_prefix_len, uint group_key_parts, + uint used_key_parts, KEY *index_info, uint + use_index, double read_cost, ha_rows records, uint + key_infix_len, uchar *key_infix, MEM_ROOT + *parent_alloc, bool is_index_scan); + ~QUICK_GROUP_MIN_MAX_SELECT(); + bool add_range(SEL_ARG *sel_range); + void update_key_stat(); + void adjust_prefix_ranges(); + bool alloc_buffers(); + int init(); + void need_sorted_output() { /* always do it */ } + int reset(); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_GROUP_MIN_MAX; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_used_key_part_to_set(); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + bool is_agg_distinct() { return have_agg_distinct; } + bool loose_scan_is_scanning() { return is_index_scan; } + Explain_quick_select *get_explain(MEM_ROOT *alloc); +}; + + +class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT +{ +public: + QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts); + virtual QUICK_RANGE_SELECT *clone(bool *create_error) + { DBUG_ASSERT(0); return new QUICK_SELECT_DESC(this, used_key_parts); } + int get_next(); + bool reverse_sorted() { return 1; } + int get_type() { return QS_TYPE_RANGE_DESC; } + QUICK_SELECT_I *make_reverse(uint used_key_parts_arg) + { + return this; // is already reverse sorted + } +private: + bool range_reads_after_key(QUICK_RANGE *range); + int reset(void) { rev_it.rewind(); return QUICK_RANGE_SELECT::reset(); } + List rev_ranges; + List_iterator rev_it; + uint used_key_parts; +}; + + +class SQL_SELECT :public Sql_alloc { + public: + QUICK_SELECT_I *quick; // If quick-select used + COND *cond; // where condition + + /* + When using Index Condition Pushdown: condition that we've had before + extracting and pushing index condition. + In other cases, NULL. + */ + Item *pre_idx_push_select_cond; + TABLE *head; + IO_CACHE file; // Positions to used records + ha_rows records; // Records in use if read from file + double read_time; // Time to read rows + key_map quick_keys; // Possible quick keys + key_map needed_reg; // Possible quick keys after prev tables. + table_map const_tables,read_tables; + /* See PARAM::possible_keys */ + key_map possible_keys; + bool free_cond; /* Currently not used and always FALSE */ + + SQL_SELECT(); + ~SQL_SELECT(); + void cleanup(); + void set_quick(QUICK_SELECT_I *new_quick) { delete quick; quick= new_quick; } + + /* + @return + true - for ERROR and IMPOSSIBLE_RANGE + false - Ok + */ + bool check_quick(THD *thd, bool force_quick_range, ha_rows limit) + { + key_map tmp; + tmp.set_all(); + return test_quick_select(thd, tmp, 0, limit, force_quick_range, + FALSE, FALSE, FALSE) != OK; + } + + /* + RETURN + 0 if record must be skipped <-> (cond && cond->val_int() == 0) + -1 if error + 1 otherwise + */ + inline int skip_record(THD *thd) + { + int rc= MY_TEST(!cond || cond->val_int()); + if (thd->is_error()) + rc= -1; + return rc; + } + + enum quick_select_return_type { + IMPOSSIBLE_RANGE = -1, + ERROR, + OK + }; + + enum quick_select_return_type + test_quick_select(THD *thd, key_map keys, table_map prev_tables, + ha_rows limit, + bool force_quick_range, + bool ordered_output, + bool remove_false_parts_of_where, + bool only_single_index_range_scan, + bool suppress_unusable_key_notes = 0); +}; + +typedef enum SQL_SELECT::quick_select_return_type quick_select_return; + + +class SQL_SELECT_auto +{ + SQL_SELECT *select; +public: + SQL_SELECT_auto(): select(NULL) + {} + ~SQL_SELECT_auto() + { + delete select; + } + SQL_SELECT_auto& + operator= (SQL_SELECT *_select) + { + select= _select; + return *this; + } + operator SQL_SELECT * () const + { + return select; + } + SQL_SELECT * + operator-> () const + { + return select; + } + operator bool () const + { + return select; + } +}; + + +class FT_SELECT: public QUICK_RANGE_SELECT +{ +public: + FT_SELECT(THD *thd, TABLE *table, uint key, bool *create_err) : + QUICK_RANGE_SELECT (thd, table, key, 1, NULL, create_err) + { (void) init(); } + ~FT_SELECT() { file->ft_end(); } + virtual QUICK_RANGE_SELECT *clone(bool *create_error) + { DBUG_ASSERT(0); return new FT_SELECT(thd, head, index, create_error); } + int init() { return file->ft_init(); } + int reset() { return 0; } + int get_next() { return file->ha_ft_read(record); } + int get_type() { return QS_TYPE_FULLTEXT; } +}; + +FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key); +QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, + struct st_table_ref *ref, + ha_rows records); +SQL_SELECT *make_select(TABLE *head, table_map const_tables, + table_map read_tables, COND *conds, + SORT_INFO* filesort, + bool allow_null_cond, int *error); + +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); + +bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param, + uint limit); + +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); +#endif +void store_key_image_to_rec(Field *field, uchar *ptr, uint len); + +extern String null_string; + +/* check this number of rows (default value) */ +#define SELECTIVITY_SAMPLING_LIMIT 100 +/* but no more then this part of table (10%) */ +#define SELECTIVITY_SAMPLING_SHARE 0.10 +/* do not check if we are going check less then this number of records */ +#define SELECTIVITY_SAMPLING_THRESHOLD 10 + +#endif diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc new file mode 100644 index 00000000..452a6864 --- /dev/null +++ b/sql/opt_range_mrr.cc @@ -0,0 +1,408 @@ +/* + Copyright (c) 2009, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/**************************************************************************** + MRR Range Sequence Interface implementation that walks a SEL_ARG* tree. + ****************************************************************************/ + +/* MRR range sequence, SEL_ARG* implementation: stack entry */ +typedef struct st_range_seq_entry +{ + /* + Pointers in min and max keys. They point to right-after-end of key + images. The 0-th entry has these pointing to key tuple start. + */ + uchar *min_key, *max_key; + + /* + Flags, for {keypart0, keypart1, ... this_keypart} subtuple. + min_key_flag may have NULL_RANGE set. + */ + uint min_key_flag, max_key_flag; + + /* Number of key parts */ + int min_key_parts, max_key_parts; + SEL_ARG *key_tree; +} RANGE_SEQ_ENTRY; + + +/* + MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context +*/ +typedef struct st_sel_arg_range_seq +{ + uint keyno; /* index of used tree in SEL_TREE structure */ + uint real_keyno; /* Number of the index in tables */ + PARAM *param; + KEY_PART *key_parts; + SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */ + + RANGE_SEQ_ENTRY stack[MAX_REF_PARTS]; + int i; /* Index of last used element in the above array */ + + bool at_start; /* TRUE <=> The traversal has just started */ + /* + Iteration functions will set this to FALSE + if ranges being traversed do not allow to construct a ROR-scan" + */ + bool is_ror_scan; +} SEL_ARG_RANGE_SEQ; + + +/* + Range sequence interface, SEL_ARG* implementation: Initialize the traversal + + SYNOPSIS + init() + init_params SEL_ARG tree traversal context + n_ranges [ignored] The number of ranges obtained + flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY + + RETURN + Value of init_param +*/ + +range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags) +{ + SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param; + seq->param->range_count=0; + seq->at_start= TRUE; + seq->param->max_key_parts= 0; + seq->stack[0].key_tree= NULL; + seq->stack[0].min_key= seq->param->min_key; + seq->stack[0].min_key_flag= 0; + seq->stack[0].min_key_parts= 0; + + seq->stack[0].max_key= seq->param->max_key; + seq->stack[0].max_key_flag= 0; + seq->stack[0].max_key_parts= 0; + seq->i= 0; + return init_param; +} + + +static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree) +{ + RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1]; + RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i]; + + cur->key_tree= key_tree; + cur->min_key= prev->min_key; + cur->max_key= prev->max_key; + cur->min_key_parts= prev->min_key_parts; + cur->max_key_parts= prev->max_key_parts; + + uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length; + + key_tree->store_min_max(arg->key_parts, stor_length, + &cur->min_key, prev->min_key_flag, + &cur->max_key, prev->max_key_flag, + &cur->min_key_parts, &cur->max_key_parts); + + cur->min_key_flag= prev->min_key_flag | key_tree->get_min_flag(arg->key_parts); + cur->max_key_flag= prev->max_key_flag | key_tree->get_max_flag(arg->key_parts); + + if (key_tree->is_null_interval()) + cur->min_key_flag |= NULL_RANGE; + (arg->i)++; +} + + +/* + Range sequence interface, SEL_ARG* implementation: get the next interval + + SYNOPSIS + sel_arg_range_seq_next() + rseq Value returned from sel_arg_range_seq_init + range OUT Store information about the range here + + DESCRIPTION + This is "get_next" function for Range sequence interface implementation + for SEL_ARG* tree. + + IMPLEMENTATION + The traversal also updates those param members: + - is_ror_scan + - range_count + - max_key_part + + RETURN + FALSE Ok + TRUE No more ranges in the sequence +*/ + +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER == 160030319) +/* + Workaround Visual Studio 2010 RTM compiler backend bug, the function enters + infinite loop. + */ +#pragma optimize("g", off) +#endif + +bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) +{ + SEL_ARG *key_tree; + SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq; + if (seq->at_start) + { + key_tree= seq->start; + seq->at_start= FALSE; + goto walk_up_n_right; + } + + key_tree= seq->stack[seq->i].key_tree; + /* Ok, we're at some "full tuple" position in the tree */ + + /* Step down if we can */ + if (key_tree->index_order_next(seq->key_parts) && + key_tree->index_order_next(seq->key_parts) != &null_element) + { + //step down; (update the tuple, we'll step right and stay there) + seq->i--; + step_down_to(seq, key_tree->index_order_next(seq->key_parts)); + key_tree= key_tree->index_order_next(seq->key_parts); + seq->is_ror_scan= FALSE; + goto walk_right_n_up; + } + + /* Ok, can't step down, walk left until we can step down */ + while (1) + { + if (seq->i == 1) // can't step left + return 1; + /* Step left */ + seq->i--; + key_tree= seq->stack[seq->i].key_tree; + + /* Step down if we can */ + if (key_tree->index_order_next(seq->key_parts) && + key_tree->index_order_next(seq->key_parts) != &null_element) + { + // Step down; update the tuple + seq->i--; + step_down_to(seq, key_tree->index_order_next(seq->key_parts)); + key_tree= key_tree->index_order_next(seq->key_parts); + break; + } + } + + /* + Ok, we've stepped down from the path to previous tuple. + Walk right-up while we can + */ +walk_right_n_up: + while (key_tree->next_key_part && key_tree->next_key_part != &null_element && + key_tree->next_key_part->part == key_tree->part + 1 && + key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) + { + { + RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; + size_t min_key_length= cur->min_key - seq->param->min_key; + size_t max_key_length= cur->max_key - seq->param->max_key; + size_t len= cur->min_key - cur[-1].min_key; + if (!(min_key_length == max_key_length && + !memcmp(cur[-1].min_key, cur[-1].max_key, len) && + !key_tree->min_flag && !key_tree->max_flag)) + { + seq->is_ror_scan= FALSE; + key_tree->store_next_min_max_keys(seq->param->key[seq->keyno], + &cur->min_key, &cur->min_key_flag, + &cur->max_key, &cur->max_key_flag, + &cur->min_key_parts, &cur->max_key_parts); + break; + } + } + + /* + Ok, current atomic interval is in form "t.field=const" and there is + next_key_part interval. Step right, and walk up from there. + */ + key_tree= key_tree->next_key_part; + +walk_up_n_right: + while (key_tree->index_order_prev(seq->key_parts) && + key_tree->index_order_prev(seq->key_parts) != &null_element) + { + /* Step up */ + key_tree= key_tree->index_order_prev(seq->key_parts); + } + step_down_to(seq, key_tree); + } + + /* Ok got a tuple */ + RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; + uint min_key_length= (uint)(cur->min_key - seq->param->min_key); + + range->ptr= (char*)(intptr)(key_tree->part); + uint max_key_parts; + if (cur->min_key_flag & GEOM_FLAG) + { + range->range_flag= cur->min_key_flag; + + /* Here minimum contains also function code bits, and maximum is +inf */ + range->start_key.key= seq->param->min_key; + range->start_key.length= min_key_length; + range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts); + range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG); + max_key_parts= cur->min_key_parts; + } + else + { + max_key_parts= MY_MAX(cur->min_key_parts, cur->max_key_parts); + + range->start_key.key= seq->param->min_key; + range->start_key.length= (uint)(cur->min_key - seq->param->min_key); + range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts); + range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : + HA_READ_KEY_EXACT); + + range->end_key.key= seq->param->max_key; + range->end_key.length= (uint)(cur->max_key - seq->param->max_key); + range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : + HA_READ_AFTER_KEY); + range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts); + + KEY *key_info; + if (seq->real_keyno== MAX_KEY) + key_info= NULL; + else + key_info= &seq->param->table->key_info[seq->real_keyno]; + + /* + This is an equality range (keypart_0=X and ... and keypart_n=Z) if + (1) - There are no flags indicating open range (e.g., + "keypart_x > y") or GIS. + (2) - The lower bound and the upper bound of the range has the + same value (min_key == max_key). + */ + const uint is_open_range = + (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | NEAR_MAX | GEOM_FLAG); + const bool is_eq_range_pred = + !(cur->min_key_flag & is_open_range) && // (1) + !(cur->max_key_flag & is_open_range) && // (1) + range->start_key.length == range->end_key.length && // (2) + !memcmp(seq->param->min_key, seq->param->max_key, // (2) + range->start_key.length); + + range->range_flag= 0; + if (is_eq_range_pred) + { + range->range_flag = EQ_RANGE; + + /* + Conditions below: + (1) - Range analysis is used for estimating condition selectivity + (2) - This is a unique key, and we have conditions for all its + user-defined key parts. + (3) - The table uses extended keys, this key covers all components, + and we have conditions for all key parts. + */ + if ( + !key_info || // (1) + ((uint)key_tree->part+1 == key_info->user_defined_key_parts && // (2) + key_info->flags & HA_NOSAME) || // (2) + ((key_info->flags & HA_EXT_NOSAME) && // (3) + (uint)key_tree->part+1 == key_info->ext_key_parts) // (3) + ) + range->range_flag |= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE); + } + + if (seq->is_ror_scan) + { + /* + If we get here, the condition on the key was converted to form + "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND + somecond(keyXpart{key_tree->part})" + Check if + somecond is "keyXpart{key_tree->part} = const" and + uncovered "tail" of KeyX parts is either empty or is identical to + first members of clustered primary key. + */ + if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && + (range->start_key.length == range->end_key.length) && + !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) && + is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1))) + seq->is_ror_scan= FALSE; + } + } + seq->param->range_count++; + seq->param->max_key_parts= MY_MAX(seq->param->max_key_parts, max_key_parts); + return 0; +} + +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER == 160030319) +/* VS2010 compiler bug workaround */ +#pragma optimize("g", on) +#endif + + +/**************************************************************************** + MRR Range Sequence Interface implementation that walks array + ****************************************************************************/ + +/* + Range sequence interface implementation for array: initialize + + SYNOPSIS + quick_range_seq_init() + init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer + n_ranges Number of ranges in the sequence (ignored) + flags MRR flags (currently not used) + + RETURN + Opaque value to be passed to quick_range_seq_next +*/ + +range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags) +{ + QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param; + quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer; + quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer; + quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur + + quick->ranges.elements; + return &quick->qr_traversal_ctx; +} + + +/* + Range sequence interface implementation for array: get next + + SYNOPSIS + quick_range_seq_next() + rseq Value returned from quick_range_seq_init + range OUT Store information about the range here + + RETURN + 0 Ok + 1 No more ranges in the sequence +*/ + +bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) +{ + QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq; + + if (ctx->cur == ctx->last) + return 1; /* no more ranges */ + + QUICK_RANGE *cur= *(ctx->cur); + cur->make_min_endpoint(&range->start_key); + cur->make_max_endpoint(&range->end_key); + range->range_flag= cur->flag; + ctx->cur++; + return 0; +} + + diff --git a/sql/opt_split.cc b/sql/opt_split.cc new file mode 100644 index 00000000..85ca234f --- /dev/null +++ b/sql/opt_split.cc @@ -0,0 +1,1415 @@ +/* + Copyright (c) 2017, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + This file contains functions to support the splitting technique. + This optimization technique can be applied to equi-joins involving + materialized tables such as materialized views, materialized derived tables + and materialized CTEs. The technique also could be applied to materialized + semi-joins though the code below does not support this usage yet. + + Here are the main ideas behind this technique that we'll call SM optimization + (SplitMaterialization). + + Consider the query + SELECT t1.a, t.min + FROM t1, (SELECT t2.a, MIN(t2.b) as min FROM t2 GROUP BY t2.a) t + WHERE t1.a = t.a and t1.b < const + + Re-write the query into + SELECT t1.a, t.min + FROM t1, LATERAL (SELECT t2.a, MIN(t2.b) as min + FROM t2 WHERE t2.a = t1.a GROUP BY t2.a) t + WHERE t1.b < const + + The execution of the original query (Q1) does the following: + 1. Executes the query in the specification of the derived table + and puts the result set into a temporary table with an index + on the first column. + 2. Joins t1 with the temporary table using the its index. + + The execution of the transformed query (Q1R) follows these steps: + 1. For each row of t1 where t1.b < const a temporary table + containing all rows of of t2 with t2.a = t1.a is created + 2. If there are any rows in the temporary table aggregation + is performed for them + 3. The result of the aggregation is joined with t1. + + The second execution can win if: + a) There is an efficient way to select rows of t2 for which t2.a = t1.a + (For example if there is an index on t2.a) + and + b) The number of temporary tables created for partitions + is much smaller that the total number of partitions + + It should be noted that for the transformed query aggregation + for a partition may be performed several times. + + As we can see the optimization basically splits table t2 into + partitions and performs aggregation over each of them + independently. + + If we have only one equi-join condition then we either push it as + for Q1R or we don't. In a general case we may have much more options. + Consider the query (Q3) + SELECT * + FROM t1,t2 (SELECT t3.a, t3.b, MIN(t3.c) as min + FROM t3 GROUP BY a,b) t + WHERE t.a = t1.a AND t.b = t2.b + AND t1.c < c1 and t2.c < c2 + AND P(t1,t2); + (P(t1,t2) designates some additional conditions over columns of t1,t2). + + Assuming that there indexes on t3(a,b) and t3(b) here we have several + reasonable options to push equi-join conditions into the derived. + All these options should be taken into account when the optimizer + evaluates different join orders. When the join order (t1,t,t2) is + evaluated there is only one way of splitting : to push the condition + t.a = t1.a into t. With the join order (t2,t,t1) only the condition + t.b = t2.b can be pushed. When the join orders (t1,t2,t) and (t2,t1,t) + are evaluated then the optimizer should consider pushing t.a = t1.a, + t.b = t2.b and (t.a = t1.a AND t.b = t2.b) to choose the best condition + for splitting. Apparently here last condition is the best one because + it provides the miximum possible number of partitions. + + If we dropped the index on t3(a,b) and created the index on t3(a) instead + then we would have two options for splitting: to push t.a = t1.a or to + push t.b = t2.b. If the selectivity of the index t3(a) is better than + the selectivity of t3(b) then the first option is preferred. + + Although the condition (t.a = t1.a AND t.b = t2.b) provides a better + splitting than the condition t.a = t1.a the latter will be used for + splitting if the execution plan with the join order (t1,t,t2) turns out + to be the cheapest one. It's quite possible when the join condition + P(t1,t2) has a bad selectivity. + + Whenever the optimizer evaluates the cost of using a splitting it + compares it with the cost of materialization without splitting. + + If we just drop the index on t3(a,b) the chances that the splitting + will be used becomes much lower but they still exists providing that + the fanout of the partial join of t1 and t2 is small enough. + + The lateral derived table LT formed as a result of SM optimization applied + to a materialized derived table DT must be joined after all parameters + of splitting has been evaluated, i.e. after all expressions used in the + equalities pushed into DT that make the employed splitting effective + could be evaluated. With the chosen join order all the parameters can be + evaluated after the last table LPT that contains any columns referenced in + the parameters has been joined and the table APT following LPT in the chosen + join order is accessed. + Usually the formed lateral derived table LT is accessed right after the table + LPT. As in such cases table LT must be refilled for each combination of + splitting parameters this table must be populated before each access to LT + and the estimate of the expected number of refills that could be suggested in + such cases is the number of rows in the partial join ending with table LPT. + However in other cases the chosen join order may contain tables between LPT + and LT. + Consider the query (Q4) + SELECT * + FROM t1 JOIN t2 ON t1.b = t2.b + LEFT JOIN (SELECT t3.a, t3.b, MIN(t3.c) as min + FROM t3 GROUP BY a,b) t + ON t.a = t1.a AND t.c > 0 + [WHERE P(t1,t2)]; + Let's assume that the join order t1,t2,t was chosen for this query and + SP optimization was applied to t with splitting over t3.a using the index + on column t3.a. Here the table t1 serves as LPT, t2 as APT while t with + pushed condition t.a = t1.a serves as LT. Note that here LT is accessed + after t2, not right after t1. Here the number of refills of the lateral + derived is not more that the number of key values of t1.a that might be + less than the cardinality of the partial join (t1,t2). That's why it makes + sense to signal that t3 has to be refilled just before t2 is accessed. + However if the cardinality of the partial join (t1,t2) happens to be less + than the cardinality of the partial join (t1) due to additional selective + condition P(t1,t2) then the flag informing about necessity of a new refill + can be set either when accessing t2 or right after it has been joined. + The current code sets such flag right after generating a record of the + partial join with minimal cardinality for all those partial joins that + end between APT and LT. It allows sometimes to push extra conditions + into the lateral derived without any increase of the number of refills. + However this flag can be set only after the last join table between + APT and LT using join buffer has been joined. +*/ + +/* + Splitting can be applied to a materialized table specified by the query + with post-join operations that require partitioning of the result set produced + by the join expression used in the FROM clause the query such as GROUP BY + operation and window function operation. In any of these cases the post-join + operation can be executed independently for any partition only over the rows + of this partition. Also if the set of all partitions is divided into disjoint + subsets the operation can applied to each subset independently. In this case + all rows are first partitioned into the groups each of which contains all the + rows from the partitions belonging the same subset and then each group + is subpartitioned into groups in the the post join operation. + + The set of all rows belonging to the union of several partitions is called + here superpartition. If a grouping operation is defined by the list + e_1,...,e_n then any set S = {e_i1,...,e_ik} can be used to devide all rows + into superpartions such that for any two rows r1, r2 the following holds: + e_ij(r1) = e_ij(r2) for each e_ij from S. We use the splitting technique + only if S consists of references to colums of the joined tables. + For example if the GROUP BY list looks like this a, g(b), c we can consider + applying the splitting technique to the superpartitions defined by {a,c}, + {a}, {c} (a and c here may be the references to the columns from different + tables). +*/ + + /* + The following describes when and how the optimizer decides whether it + makes sense to employ the splitting technique. + + 1. For each instance of a materialized table (derived/view/CTE) it is + checked that it is potentially splittable. Now it is done right after the + execution plan for the select specifying this table has been chosen. + + 2. Any potentially splittable materialized table T is subject to two-phase + optimization. It means that the optimizer first builds the best execution + plan for join that specifies T. Then the control is passed back to the + optimization process of the embedding select Q. After the execution plan + for Q has been chosen the optimizer finishes the optimization of the join + specifying T. + + 3. When the optimizer builds the container with the KEYUSE structures + for the join of embedding select it detects the equi-join conditions + PC that potentially could be pushed into a potentially splittable + materialized table T. The collected information about such conditions + is stored together with other facts on potential splittings for table T. + + 4. When the optimizer starts looking for the best execution plan for the + embedding select Q for each potentially splittable materialized table T + it creates special KEYUSE structures for pushable equi-join conditions + PC. These structures are used to add new elements to the container + of KEYUSE structures built for T. The specifics of these elements is + that they can be ebabled and disabled during the process of choosing + the best plan for Q. + + 5. When the optimizer extends a partial join order with a potentially + splittable materialized table T (in function best_access_path) it + first evaluates a new execution plan for the modified specification + of T that adds all equi-join conditions that can be pushed with + current join prefix to the WHERE conditions of the original + specification of T. If the cost of the new plan is better than the + the cost of the original materialized table then the optimizer + prefers to use splitting for the current join prefix. As the cost + of the plan depends only on the pushed conditions it makes sense + to cache this plan for other prefixes. + + 6. The optimizer takes into account the cost of splitting / materialization + of a potentially splittable materialized table T as a startup cost + to access table T. + + 7. When the optimizer finally chooses the best execution plan for + the embedding select Q and this plan prefers using splitting + for table T with pushed equi-join conditions PC then the execution + plan for the underlying join with these conditions is chosen for T. +*/ + +/* + The implementation of the splitting technique below allows to apply + the technique only to a materialized derived table / view / CTE whose + specification is either a select with GROUP BY or a non-grouping select + with window functions that share the same PARTITION BY list. +*/ + +#include "mariadb.h" +#include "sql_select.h" +#include "opt_trace.h" + +/* Info on a splitting field */ +struct SplM_field_info +{ + /* Splitting field in the materialized table T */ + Field *mat_field; + /* The item from the select list of the specification of T */ + Item *producing_item; + /* The corresponding splitting field from the specification of T */ + Field *underlying_field; +}; + + +/* Info on the splitting execution plan saved in SplM_opt_info::cache */ +struct SplM_plan_info +{ + /* The cached splitting execution plan P */ + POSITION *best_positions; + /* The cost of the above plan */ + double cost; + /* Selectivity of splitting used in P */ + double split_sel; + /* For fast search of KEYUSE_EXT elements used for splitting in P */ + struct KEYUSE_EXT *keyuse_ext_start; + /* The tables that contains the fields used for splitting in P */ + TABLE *table; + /* The number of the key from 'table' used for splitting in P */ + uint key; + /* Number of the components of 'key' used for splitting in P */ + uint parts; +}; + + +/* + The structure contains the information that is used by the optimizer + for potentially splittable materialization of T that is a materialized + derived_table / view / CTE +*/ +class SplM_opt_info : public Sql_alloc +{ +public: + /* The join for the select specifying T */ + JOIN *join; + /* The map of tables from 'join' whose columns can be used for partitioning */ + table_map tables_usable_for_splitting; + /* Info about the fields of the joined tables usable for splitting */ + SplM_field_info *spl_fields; + /* The number of elements in the above list */ + uint spl_field_cnt; + /* The list of equalities injected into WHERE for split optimization */ + List inj_cond_list; + /* Contains the structures to generate all KEYUSEs for pushable equalities */ + List added_key_fields; + /* The cache of evaluated execution plans for 'join' with pushed equalities */ + List plan_cache; + /* Cost of best execution plan for join when nothing is pushed */ + double unsplit_cost; + /* Cardinality of T when nothing is pushed */ + double unsplit_card; + /* Lastly evaluated execution plan for 'join' with pushed equalities */ + SplM_plan_info *last_plan; + double last_refills; + + SplM_plan_info *find_plan(TABLE *table, uint key, uint parts); +}; + + +void TABLE::set_spl_opt_info(SplM_opt_info *spl_info) +{ + if (spl_info) + spl_info->join->spl_opt_info= spl_info; + spl_opt_info= spl_info; +} + + +void TABLE::deny_splitting() +{ + DBUG_ASSERT(spl_opt_info != NULL); + spl_opt_info->join->spl_opt_info= NULL; + spl_opt_info= NULL; +} + + +double TABLE::get_materialization_cost() +{ + DBUG_ASSERT(spl_opt_info != NULL); + return spl_opt_info->unsplit_cost; +} + + +/* This structure is auxiliary and used only in the function that follows it */ +struct SplM_field_ext_info: public SplM_field_info +{ + uint item_no; + bool is_usable_for_ref_access; +}; + + +/** + @brief + Check whether this join is one for potentially splittable materialized table + + @details + The function checks whether this join is for select that specifies + a potentially splittable materialized table T. If so, the collected + info on potential splittability of T is attached to the field spl_opt_info + of the TABLE structure for T. + + The function returns a positive answer if the following holds: + 1. the optimizer switch 'split_materialized' is set 'on' + 2. the select owning this join specifies a materialized derived/view/cte T + 3. this is the only select in the specification of T + 4. condition pushdown is not prohibited into T + 5. T is not recursive + 6. not all of this join are constant or optimized away + 7. T is either + 7.1. a grouping table with GROUP BY list P + or + 7.2. a non-grouping table with window functions over the same non-empty + partition specified by the PARTITION BY list P + 8. P contains some references on the columns of the joined tables C + occurred also in the select list of this join + 9. There are defined some keys usable for ref access of fields from C + with available statistics. + 10. The select doesn't use WITH ROLLUP (This limitation can probably be + lifted) + + @retval + true if the answer is positive + false otherwise +*/ + +bool JOIN::check_for_splittable_materialized() +{ + ORDER *partition_list= 0; + st_select_lex_unit *unit= select_lex->master_unit(); + TABLE_LIST *derived= unit->derived; + if (!(optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED)) || // !(1) + !(derived && derived->is_materialized_derived()) || // !(2) + (unit->first_select()->next_select()) || // !(3) + (derived->prohibit_cond_pushdown) || // !(4) + (derived->is_recursive_with_table()) || // !(5) + (table_count == 0 || const_tables == top_join_tab_count) || // !(6) + rollup.state != ROLLUP::STATE_NONE) // (10) + return false; + if (group_list) // (7.1) + { + if (!select_lex->have_window_funcs()) + partition_list= group_list; + } + else if (select_lex->have_window_funcs() && + select_lex->window_specs.elements == 1) // (7.2) + { + partition_list= + select_lex->window_specs.head()->partition_list->first; + } + if (!partition_list) + return false; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_split(thd, "check_split_materialized"); + + ORDER *ord; + Dynamic_array candidates(PSI_INSTRUMENT_MEM); + + /* + Select from partition_list all candidates for splitting. + A candidate must be + - field item or refer to such (8.1) + - item mentioned in the select list (8.2) + Put info about such candidates into the array candidates + */ + table_map usable_tables= 0; // tables that contains the candidate + for (ord= partition_list; ord; ord= ord->next) + { + Item *ord_item= *ord->item; + if (ord_item->real_item()->type() != Item::FIELD_ITEM) // !(8.1) + continue; + + Field *ord_field= ((Item_field *) (ord_item->real_item()))->field; + + /* Ignore fields from of inner tables of outer joins */ + TABLE_LIST *tbl= ord_field->table->pos_in_table_list; + if (tbl->is_inner_table_of_outer_join()) + continue; + + List_iterator li(fields_list); + Item *item; + uint item_no= 0; + while ((item= li++)) + { + if ((*ord->item)->eq(item, 0)) // (8.2) + { + SplM_field_ext_info new_elem; + new_elem.producing_item= item; + new_elem.item_no= item_no; + new_elem.mat_field= derived->table->field[item_no]; + new_elem.underlying_field= ord_field; + new_elem.is_usable_for_ref_access= false; + candidates.push(new_elem); + usable_tables|= ord_field->table->map; + break; + } + item_no++; + } + } + if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2) + { + trace_split.add("not_applicable", "group list has no candidates"); + return false; + } + + /* + For each table from this join find the keys that can be used for ref access + of the fields mentioned in the 'array candidates' + */ + + SplM_field_ext_info *cand; + SplM_field_ext_info *cand_start= &candidates.at(0); + SplM_field_ext_info *cand_end= cand_start + candidates.elements(); + + for (JOIN_TAB *tab= join_tab; + tab < join_tab + top_join_tab_count; tab++) + { + TABLE *table= tab->table; + if (!(table->map & usable_tables)) + continue; + + table->keys_usable_for_splitting.clear_all(); + uint i; + for (i= 0; i < table->s->keys; i++) + { + if (!table->keys_in_use_for_query.is_set(i)) + continue; + KEY *key_info= table->key_info + i; + uint key_parts= table->actual_n_key_parts(key_info); + uint usable_kp_cnt= 0; + for ( ; usable_kp_cnt < key_parts; usable_kp_cnt++) + { + if (key_info->actual_rec_per_key(usable_kp_cnt) == 0) + break; + int fldnr= key_info->key_part[usable_kp_cnt].fieldnr; + + for (cand= cand_start; cand < cand_end; cand++) + { + if (cand->underlying_field->table == table && + cand->underlying_field->field_index + 1 == fldnr) + { + cand->is_usable_for_ref_access= true; + break; + } + } + if (cand == cand_end) + break; + } + if (usable_kp_cnt) + table->keys_usable_for_splitting.set_bit(i); + } + } + + /* Count the candidate fields that can be accessed by ref */ + uint spl_field_cnt= (uint)candidates.elements(); + for (cand= cand_start; cand < cand_end; cand++) + { + if (!cand->is_usable_for_ref_access) + spl_field_cnt--; + } + + if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9) + { + trace_split.add("not_applicable", + "no candidate field can be accessed through ref"); + return false; + } + + /* + Create a structure of the type SplM_opt_info and fill it with + the collected info on potential splittability of T + */ + SplM_opt_info *spl_opt_info= new (thd->mem_root) SplM_opt_info(); + SplM_field_info *spl_field= + (SplM_field_info *) (thd->calloc(sizeof(SplM_field_info) * + spl_field_cnt)); + + if (!(spl_opt_info && spl_field)) // consider T as not good for splitting + return false; + + spl_opt_info->join= this; + spl_opt_info->tables_usable_for_splitting= 0; + spl_opt_info->spl_field_cnt= spl_field_cnt; + spl_opt_info->spl_fields= spl_field; + + { + Json_writer_array trace_range(thd, "split_candidates"); + for (cand= cand_start; cand < cand_end; cand++) + { + if (!cand->is_usable_for_ref_access) + continue; + trace_range.add(cand->producing_item); + + spl_field->producing_item= cand->producing_item; + spl_field->underlying_field= cand->underlying_field; + spl_field->mat_field= cand->mat_field; + spl_opt_info->tables_usable_for_splitting|= + cand->underlying_field->table->map; + spl_field++; + } + } + + /* Attach this info to the table T */ + derived->table->set_spl_opt_info(spl_opt_info); + + /* + If this is specification of a materialized derived table T that is + potentially splittable and is used in the from list of the right operand + of an IN predicand transformed to a semi-join then the embedding semi-join + nest is not allowed to be materialized. + */ + if (derived && derived->is_materialized_derived() && + derived->embedding && derived->embedding->sj_subq_pred) + derived->embedding->sj_subq_pred->types_allow_materialization= FALSE; + return true; +} + + +/** + @brief + Collect info on KEY_FIELD usable for splitting + + @param + key_field KEY_FIELD to collect info on + + @details + The function assumes that this table is potentially splittable. + The function checks whether the KEY_FIELD structure key_field built for + this table was created for a splitting field f. If so, the function does + the following using info from key_field: + 1. Builds an equality of the form f = key_field->val that could be + pushed into this table. + 2. Creates a new KEY_FIELD structure for this equality and stores + a reference to this structure in this->spl_opt_info. +*/ + +void TABLE::add_splitting_info_for_key_field(KEY_FIELD *key_field) +{ + DBUG_ASSERT(spl_opt_info != NULL); + JOIN *join= spl_opt_info->join; + Field *field= key_field->field; + SplM_field_info *spl_field= spl_opt_info->spl_fields; + uint i= spl_opt_info->spl_field_cnt; + for ( ; i; i--, spl_field++) + { + if (spl_field->mat_field == field) + break; + } + if (!i) // field is not usable for splitting + return; + + /* + Any equality condition that can be potentially pushed into the + materialized derived table is constructed now though later it may turn out + that it is not needed, because it is not used for splitting. + The reason for this is that the failure to construct it when it has to be + injected causes denial for further processing of the query. + Formally this equality is needed in the KEY_FIELD structure constructed + here that will be used to generate additional keyuses usable for splitting. + However key_field.cond could be used for this purpose (see implementations + of virtual function can_optimize_keypart_ref()). + + The condition is built in such a form that it can be added to the WHERE + condition of the select that specifies this table. + */ + THD *thd= in_use; + Item *left_item= spl_field->producing_item->build_clone(thd); + Item *right_item= key_field->val->build_clone(thd); + Item_func_eq *eq_item= 0; + if (left_item && right_item) + { + right_item->walk(&Item::set_fields_as_dependent_processor, + false, join->select_lex); + right_item->update_used_tables(); + eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item); + } + if (!eq_item) + return; + KEY_FIELD *added_key_field= + (KEY_FIELD *) thd->alloc(sizeof(KEY_FIELD)); + if (!added_key_field || + spl_opt_info->added_key_fields.push_back(added_key_field,thd->mem_root)) + return; + added_key_field->field= spl_field->underlying_field; + added_key_field->cond= eq_item; + added_key_field->val= key_field->val; + added_key_field->level= 0; + added_key_field->optimize= KEY_OPTIMIZE_EQ; + added_key_field->eq_func= true; + + Item *real= key_field->val->real_item(); + if ((real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null()) + added_key_field->null_rejecting= true; + else + added_key_field->null_rejecting= false; + + added_key_field->cond_guard= NULL; + added_key_field->sj_pred_no= UINT_MAX; + return; +} + + +static bool +add_ext_keyuse_for_splitting(Dynamic_array *ext_keyuses, + KEY_FIELD *added_key_field, uint key, uint part) +{ + KEYUSE_EXT keyuse_ext; + Field *field= added_key_field->field; + + JOIN_TAB *tab=field->table->reginfo.join_tab; + key_map possible_keys=field->get_possible_keys(); + possible_keys.intersect(field->table->keys_usable_for_splitting); + tab->keys.merge(possible_keys); + + Item_func_eq *eq_item= (Item_func_eq *) (added_key_field->cond); + keyuse_ext.table= field->table; + keyuse_ext.val= eq_item->arguments()[1]; + keyuse_ext.key= key; + keyuse_ext.keypart=part; + keyuse_ext.keypart_map= (key_part_map) 1 << part; + keyuse_ext.used_tables= keyuse_ext.val->used_tables(); + keyuse_ext.optimize= added_key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; + keyuse_ext.ref_table_rows= 0; + keyuse_ext.null_rejecting= added_key_field->null_rejecting; + keyuse_ext.cond_guard= added_key_field->cond_guard; + keyuse_ext.sj_pred_no= added_key_field->sj_pred_no; + keyuse_ext.validity_ref= 0; + keyuse_ext.needed_in_prefix= added_key_field->val->used_tables() & + ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + keyuse_ext.validity_var= false; + return ext_keyuses->push(keyuse_ext); +} + + +static int +sort_ext_keyuse(KEYUSE_EXT *a, KEYUSE_EXT *b) +{ + if (a->table->tablenr != b->table->tablenr) + return (int) (a->table->tablenr - b->table->tablenr); + if (a->key != b->key) + return (int) (a->key - b->key); + return (int) (a->keypart - b->keypart); +} + + +static void +sort_ext_keyuses(Dynamic_array *keyuses) +{ + KEYUSE_EXT *first_keyuse= &keyuses->at(0); + my_qsort(first_keyuse, keyuses->elements(), sizeof(KEYUSE_EXT), + (qsort_cmp) sort_ext_keyuse); +} + + +/** + @brief + Add info on keyuses usable for splitting into an array +*/ + +static bool +add_ext_keyuses_for_splitting_field(Dynamic_array *ext_keyuses, + KEY_FIELD *added_key_field) +{ + Field *field= added_key_field->field; + TABLE *table= field->table; + for (uint key= 0; key < table->s->keys; key++) + { + if (!(table->keys_usable_for_splitting.is_set(key))) + continue; + KEY *key_info= table->key_info + key; + uint key_parts= table->actual_n_key_parts(key_info); + KEY_PART_INFO *key_part_info= key_info->key_part; + for (uint part=0; part < key_parts; part++, key_part_info++) + { + if (!field->eq(key_part_info->field)) + continue; + if (add_ext_keyuse_for_splitting(ext_keyuses, added_key_field, key, part)) + return true; + } + } + return false; +} + + +/* + @brief + Cost of the post join operation used in specification of splittable table +*/ + +static +double spl_postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len) +{ + double cost; + cost= get_tmp_table_write_cost(thd, join_record_count,rec_len) * + join_record_count; // cost to fill tmp table + cost+= get_tmp_table_lookup_cost(thd, join_record_count,rec_len) * + join_record_count; // cost to perform post join operation used here + cost+= get_tmp_table_lookup_cost(thd, join_record_count, rec_len) + + (join_record_count == 0 ? 0 : + join_record_count * log2 (join_record_count)) * + SORT_INDEX_CMP_COST; // cost to perform sorting + return cost; +} + +/** + @brief + Add KEYUSE structures that can be usable for splitting + + @details + This function is called only for joins created for potentially + splittable materialized tables. The function does the following: + 1. Creates the dynamic array ext_keyuses_for_splitting of KEYUSE_EXT + structures and fills is with info about all keyuses that + could be used for splitting. + 2. Sort the array ext_keyuses_for_splitting for fast access by key + on certain columns. + 3. Collects and stores cost and cardinality info on the best execution + plan that does not use splitting and save this plan together with + corresponding array of keyuses. + 4. Expand this array with KEYUSE elements built from the info stored + in ext_keyuses_for_splitting that could be produced by pushed + equalities employed for splitting. + 5. Prepare the extended array of keyuses to be used in the function + best_access_plan() +*/ + +void JOIN::add_keyuses_for_splitting() +{ + uint i; + size_t idx; + KEYUSE_EXT *keyuse_ext; + KEYUSE_EXT keyuse_ext_end; + double oper_cost; + uint rec_len; + uint added_keyuse_count; + TABLE *table= select_lex->master_unit()->derived->table; + List_iterator_fast li(spl_opt_info->added_key_fields); + KEY_FIELD *added_key_field; + if (!spl_opt_info->added_key_fields.elements) + goto err; + if (!(ext_keyuses_for_splitting= new Dynamic_array(PSI_INSTRUMENT_MEM))) + goto err; + while ((added_key_field= li++)) + { + (void) add_ext_keyuses_for_splitting_field(ext_keyuses_for_splitting, + added_key_field); + } + added_keyuse_count= (uint)ext_keyuses_for_splitting->elements(); + if (!added_keyuse_count) + goto err; + sort_ext_keyuses(ext_keyuses_for_splitting); + bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end)); + if (ext_keyuses_for_splitting->push(keyuse_ext_end)) + goto err; + // psergey-todo: trace anything here? + spl_opt_info->unsplit_card= join_record_count; + + rec_len= table->s->rec_buff_length; + + oper_cost= spl_postjoin_oper_cost(thd, join_record_count, rec_len); + + spl_opt_info->unsplit_cost= best_positions[table_count-1].read_time + + oper_cost; + + if (!(save_qep= new Join_plan_state(table_count + 1))) + goto err; + + save_query_plan(save_qep); + + if (!keyuse.buffer && + my_init_dynamic_array(PSI_INSTRUMENT_ME, &keyuse, sizeof(KEYUSE), + 20, 64, MYF(MY_THREAD_SPECIFIC))) + goto err; + + if (allocate_dynamic(&keyuse, save_qep->keyuse.elements + added_keyuse_count)) + goto err; + + idx= keyuse.elements= save_qep->keyuse.elements; + if (keyuse.elements) + memcpy(keyuse.buffer, + save_qep->keyuse.buffer, + (size_t) keyuse.elements * keyuse.size_of_element); + + keyuse_ext= &ext_keyuses_for_splitting->at(0); + for (i=0; i < added_keyuse_count; i++, keyuse_ext++, idx++) + { + set_dynamic(&keyuse, (KEYUSE *) keyuse_ext, idx); + KEYUSE *added_keyuse= ((KEYUSE *) (keyuse.buffer)) + idx; + added_keyuse->validity_ref= &keyuse_ext->validity_var; + } + + if (sort_and_filter_keyuse(this, &keyuse, true)) + goto err; + optimize_keyuse(this, &keyuse); + + for (uint i= 0; i < table_count; i++) + { + JOIN_TAB *tab= join_tab + i; + map2table[tab->table->tablenr]= tab; + } + + return; + +err: + if (save_qep) + restore_query_plan(save_qep); + table->deny_splitting(); + return; +} + + +/** + @brief + Add KEYUSE structures that can be usable for splitting of this joined table +*/ + +void JOIN_TAB::add_keyuses_for_splitting() +{ + DBUG_ASSERT(table->spl_opt_info != NULL); + SplM_opt_info *spl_opt_info= table->spl_opt_info; + spl_opt_info->join->add_keyuses_for_splitting(); +} + + +/* + @brief + Find info on the splitting plan by the splitting key +*/ + +SplM_plan_info *SplM_opt_info::find_plan(TABLE *table, uint key, uint parts) +{ + List_iterator_fast li(plan_cache); + SplM_plan_info *spl_plan; + while ((spl_plan= li++)) + { + if (spl_plan->table == table && + spl_plan->key == key && + spl_plan->parts == parts) + break; + } + return spl_plan; +} + + +/* + @breaf + Enable/Disable a keyuses that can be used for splitting + */ + +static +void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, + TABLE *table, uint key, + table_map excluded_tables, + bool validity_val) +{ + KEYUSE_EXT *keyuse_ext= key_keyuse_ext_start; + do + { + if (!(keyuse_ext->needed_in_prefix & excluded_tables)) + { + /* + The enabling/disabling flags are set just in KEYUSE_EXT structures. + Yet keyuses that are used by best_access_path() have pointers + to these flags. + */ + keyuse_ext->validity_var= validity_val; + } + keyuse_ext++; + } + while (keyuse_ext->key == key && keyuse_ext->table == table); +} + + +/** + @brief + Choose the best splitting to extend the evaluated partial join + + @param + idx index for joined table T in current partial join P + remaining_tables tables not joined yet + spl_pd_boundary OUT bitmap of the table from P extended by T that + starts the sub-sequence of tables S from which + no conditions are allowed to be pushed into T. + + @details + This function is called during the search for the best execution + plan of the join that contains this table T. The function is called + every time when the optimizer tries to extend a partial join by + joining it with table T. Depending on what tables are already in the + partial join different equalities usable for splitting can be pushed + into T. The function evaluates different variants and chooses the + best one. Then the function finds the plan for the materializing join + with the chosen equality conditions pushed into it. If the cost of the + plan turns out to be less than the cost of the best plan without + splitting the function set it as the true plan of materialization + of the table T. + The function caches the found plans for materialization of table T + together with the info what key was used for splitting. Next time when + the optimizer prefers to use the same key the plan is taken from + the cache of plans + + @retval + Pointer to the info on the found plan that employs the pushed equalities + if the plan has been chosen, NULL - otherwise. + If the function returns NULL the value of spl_param_tables is set to 0. +*/ + +SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, + table_map remaining_tables, + const POSITION *join_positions, + table_map *spl_pd_boundary) +{ + SplM_opt_info *spl_opt_info= table->spl_opt_info; + DBUG_ASSERT(spl_opt_info != NULL); + JOIN *join= spl_opt_info->join; + THD *thd= join->thd; + table_map tables_usable_for_splitting= + spl_opt_info->tables_usable_for_splitting; + KEYUSE_EXT *keyuse_ext= &join->ext_keyuses_for_splitting->at(0); + KEYUSE_EXT *UNINIT_VAR(best_key_keyuse_ext_start); + TABLE *best_table= 0; + double best_rec_per_key= DBL_MAX; + SplM_plan_info *spl_plan= 0; + uint best_key= 0; + uint best_key_parts= 0; + table_map best_param_tables= 0L; + Json_writer_object trace_obj(thd, "choose_best_splitting"); + Json_writer_array trace_arr(thd, "considered_keys"); + /* + Check whether there are keys that can be used to join T employing splitting + and if so, select the best out of such keys + */ + for (uint tablenr= 0; tablenr < join->table_count; tablenr++) + { + if (!((1ULL << tablenr) & tables_usable_for_splitting)) + continue; + JOIN_TAB *tab= join->map2table[tablenr]; + TABLE *table= tab->table; + if (keyuse_ext->table != table) + continue; + do + { + uint key= keyuse_ext->key; + KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext; + key_part_map found_parts= 0; + table_map needed_in_prefix= 0; + do + { + if (keyuse_ext->needed_in_prefix & + (remaining_tables | this->join->sjm_lookup_tables)) + { + keyuse_ext++; + continue; + } + if (!(keyuse_ext->keypart_map & found_parts)) + { + if ((!found_parts && !keyuse_ext->keypart) || + (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts))) + found_parts|= keyuse_ext->keypart_map; + else + { + do + { + keyuse_ext++; + } + while (keyuse_ext->key == key && keyuse_ext->table == table); + break; + } + } + KEY *key_info= table->key_info + key; + double rec_per_key= + key_info->actual_rec_per_key(keyuse_ext->keypart); + needed_in_prefix|= keyuse_ext->needed_in_prefix; + if (rec_per_key < best_rec_per_key) + { + best_table= keyuse_ext->table; + best_key= keyuse_ext->key; + best_key_parts= keyuse_ext->keypart + 1; + best_rec_per_key= rec_per_key; + best_key_keyuse_ext_start= key_keyuse_ext_start; + best_param_tables= needed_in_prefix; + // trace table, key_name, parts, needed_tables. + Json_writer_object cur_index(thd); + cur_index. + add("table_name", best_table->alias.ptr()). + add("index", best_table->key_info[best_key].name). + add("rec_per_key", best_rec_per_key). + add("param_tables", best_param_tables); + } + keyuse_ext++; + } + while (keyuse_ext->key == key && keyuse_ext->table == table); + } + while (keyuse_ext->table == table); + } + trace_arr.end(); + + spl_opt_info->last_plan= 0; + double refills= DBL_MAX; + table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables; + if (best_table) + { + *spl_pd_boundary= this->table->map; + if (!best_param_tables) + refills= 1; + else + { + table_map last_found= this->table->map; + for (const POSITION *pos= &join_positions[idx - 1]; ; pos--) + { + if (pos->table->table->map & excluded_tables) + continue; + if (pos->partial_join_cardinality < refills) + { + *spl_pd_boundary= last_found; + refills= pos->partial_join_cardinality; + } + last_found= pos->table->table->map; + if ((last_found & best_param_tables) || pos->use_join_buffer) + break; + } + } + + trace_obj.add("refills", refills). + add("spl_pd_boundary", *spl_pd_boundary); + + /* + The key for splitting was chosen, look for the plan for this key + in the cache + */ + spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts); + if (!spl_plan) + { + /* + The plan for the chosen key has not been found in the cache. + Build a new plan and save info on it in the cache + */ + Json_writer_array wrapper(thd, "split_plan_search"); + table_map all_table_map= (((table_map) 1) << join->table_count) - 1; + reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, + best_key, excluded_tables, true); + choose_plan(join, all_table_map & ~join->const_table_map); + + wrapper.end(); + /* + Check that the chosen plan is really a splitting plan. + If not or if there is not enough memory to save the plan in the cache + then just return with no splitting plan. + */ + POSITION *first_non_const_pos= join->best_positions + join->const_tables; + TABLE *table= first_non_const_pos->table->table; + key_map spl_keys= table->keys_usable_for_splitting; + if (!(first_non_const_pos->key && + spl_keys.is_set(first_non_const_pos->key->key)) || + !(spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) || + !(spl_plan->best_positions= + (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) || + spl_opt_info->plan_cache.push_back(spl_plan)) + { + reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, + best_key, excluded_tables, false); + trace_obj.add("split_plan_discarded", "constructed unapplicable query plan"); + return 0; + } + + spl_plan->keyuse_ext_start= best_key_keyuse_ext_start; + spl_plan->table= best_table; + spl_plan->key= best_key; + spl_plan->parts= best_key_parts; + spl_plan->split_sel= best_rec_per_key / + (spl_opt_info->unsplit_card ? + spl_opt_info->unsplit_card : 1); + + uint rec_len= table->s->rec_buff_length; + + double split_card= spl_opt_info->unsplit_card * spl_plan->split_sel; + double oper_cost= split_card * + spl_postjoin_oper_cost(thd, split_card, rec_len); + spl_plan->cost= join->best_positions[join->table_count-1].read_time + + + oper_cost; + + memcpy((char *) spl_plan->best_positions, + (char *) join->best_positions, + sizeof(POSITION) * join->table_count); + reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, + best_key, excluded_tables, false); + } + else + trace_obj.add("cached_plan_found", 1); + + if (spl_plan) + { + if (unlikely(thd->trace_started())) + { + trace_obj. + add("lead_table", spl_plan->table->alias.ptr()). + add("index", spl_plan->table->key_info[spl_plan->key].name). + add("parts", spl_plan->parts). + add("split_sel", spl_plan->split_sel). + add("cost", spl_plan->cost). + add("unsplit_cost", spl_opt_info->unsplit_cost). + add("records", (ha_rows) (records * spl_plan->split_sel)); + } + + if (refills * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) + { + /* + The best plan that employs splitting is cheaper than + the plan without splitting + */ + spl_opt_info->last_plan= spl_plan; + spl_opt_info->last_refills= refills; + trace_obj.add("chosen", true); + } + else + trace_obj.add("chosen", false); + } + } + + /* Set the cost of the preferred materialization for this partial join */ + records= (ha_rows)spl_opt_info->unsplit_card; + spl_plan= spl_opt_info->last_plan; + if (spl_plan) + { + startup_cost= spl_opt_info->last_refills * spl_plan->cost; + records= (ha_rows) (records * spl_plan->split_sel); + } + else + { + startup_cost= spl_opt_info->unsplit_cost; + *spl_pd_boundary= 0; + } + return spl_plan; +} + + +/** + @brief + Inject equalities for splitting used by the materialization join + + @param + excluded_tables used to filter out the equalities that are not + to be pushed. + + @details + This function injects equalities pushed into a derived table T for which + the split optimization has been chosen by the optimizer. The function + is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt(). + All equalities usable for splitting T whose right parts do not depend on + any of the 'excluded_tables' can be pushed into the where clause of the + derived table T. + The function also marks the select that specifies T as + UNCACHEABLE_DEPENDENT_INJECTED. + + @retval + false on success + true on failure +*/ + +bool JOIN::inject_best_splitting_cond(table_map excluded_tables) +{ + Item *inj_cond= 0; + List *inj_cond_list= &spl_opt_info->inj_cond_list; + List_iterator li(spl_opt_info->added_key_fields); + KEY_FIELD *added_key_field; + while ((added_key_field= li++)) + { + if (excluded_tables & added_key_field->val->used_tables()) + continue; + if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root)) + return true; + } + DBUG_ASSERT(inj_cond_list->elements); + switch (inj_cond_list->elements) { + case 1: + inj_cond= inj_cond_list->head(); break; + default: + inj_cond= new (thd->mem_root) Item_cond_and(thd, *inj_cond_list); + if (!inj_cond) + return true; + } + if (inj_cond) + inj_cond->fix_fields(thd,0); + + if (inject_cond_into_where(inj_cond->copy_andor_structure(thd))) + return true; + + select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + st_select_lex_unit *unit= select_lex->master_unit(); + unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + + return false; +} + + +/** + @brief + Test if equality is injected for split optimization + + @param + eq_item equality to to test + + @retval + true eq_item is equality injected for split optimization + false otherwise +*/ + +bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item) +{ + Item *left_item= eq_item->arguments()[0]->real_item(); + if (left_item->type() != Item::FIELD_ITEM) + return false; + Field *field= ((Item_field *) left_item)->field; + if (!field->table->reginfo.join_tab) + return false; + JOIN *join= field->table->reginfo.join_tab->join; + if (!join->spl_opt_info) + return false; + List_iterator_fast li(join->spl_opt_info->inj_cond_list); + Item *item; + while ((item= li++)) + { + if (item == eq_item) + return true; + } + return false; +} + + +/** + @brief + Fix the splitting chosen for a splittable table in the final query plan + + @param + spl_plan info on the splitting plan chosen for the splittable table T + excluded_tables tables that cannot be used in equalities pushed into T + is_const_table the table T is a constant table + + @details + If in the final query plan the optimizer has chosen a splitting plan + then the function sets this plan as the final execution plan to + materialized the table T. Otherwise the plan that does not use + splitting is set for the materialization. + + @retval + false on success + true on failure +*/ + +bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan, + table_map excluded_tables, + bool is_const_table) +{ + SplM_opt_info *spl_opt_info= table->spl_opt_info; + DBUG_ASSERT(table->spl_opt_info != 0); + JOIN *md_join= spl_opt_info->join; + if (spl_plan && !is_const_table) + { + is_split_derived= true; + memcpy((char *) md_join->best_positions, + (char *) spl_plan->best_positions, + sizeof(POSITION) * md_join->table_count); + /* + This is called for a proper work of JOIN::get_best_combination() + called for the join that materializes T + */ + reset_validity_vars_for_keyuses(spl_plan->keyuse_ext_start, + spl_plan->table, + spl_plan->key, + excluded_tables, + true); + } + else if (md_join->save_qep) + { + md_join->restore_query_plan(md_join->save_qep); + } + return false; +} + + +/** + @brief + Fix the splittings chosen splittable tables in the final query plan + + @details + The function calls JOIN_TAB::fix_splittins for all potentially + splittable tables in this join to set all final materialization + plans chosen for these tables. + + @retval + false on success + true on failure +*/ + +bool JOIN::fix_all_splittings_in_plan() +{ + table_map prev_tables= 0; + table_map all_tables= (table_map(1) << table_count) - 1; + for (uint tablenr= 0; tablenr < table_count; tablenr++) + { + POSITION *cur_pos= &best_positions[tablenr]; + JOIN_TAB *tab= cur_pos->table; + if (tab->table->is_splittable()) + { + SplM_plan_info *spl_plan= cur_pos->spl_plan; + table_map excluded_tables= (all_tables & ~prev_tables) | + sjm_lookup_tables; + ; + if (spl_plan) + { + POSITION *pos= cur_pos; + table_map spl_pd_boundary= pos->spl_pd_boundary; + do + { + excluded_tables|= pos->table->table->map; + } + while (!((pos--)->table->table->map & spl_pd_boundary)); + } + if (tab->fix_splitting(spl_plan, + excluded_tables, + tablenr < const_tables )) + return true; + } + prev_tables|= tab->table->map; + } + return false; +} + + +/** + @brief + Inject splitting conditions into WHERE of split derived + + @details + The function calls JOIN_TAB::inject_best_splitting_cond() for each + materialized derived table T used in this join for which the split + optimization has been chosen by the optimizer. It is done in order to + inject equalities pushed into the where clause of the specification + of T that would be helpful to employ the splitting technique. + + @retval + false on success + true on failure +*/ + +bool JOIN::inject_splitting_cond_for_all_tables_with_split_opt() +{ + table_map prev_tables= 0; + table_map all_tables= (table_map(1) << table_count) - 1; + for (uint tablenr= 0; tablenr < table_count; tablenr++) + { + POSITION *cur_pos= &best_positions[tablenr]; + JOIN_TAB *tab= cur_pos->table; + prev_tables|= tab->table->map; + if (!(tab->table->is_splittable() && cur_pos->spl_plan)) + continue; + SplM_opt_info *spl_opt_info= tab->table->spl_opt_info; + JOIN *join= spl_opt_info->join; + table_map excluded_tables= (all_tables & ~prev_tables) | sjm_lookup_tables; + table_map spl_pd_boundary= cur_pos->spl_pd_boundary; + for (POSITION *pos= cur_pos; ; pos--) + { + excluded_tables|= pos->table->table->map; + pos->table->no_forced_join_cache= true; + if (pos->table->table->map & spl_pd_boundary) + { + pos->table->split_derived_to_update|= tab->table->map; + break; + } + } + + if (join->inject_best_splitting_cond(excluded_tables)) + return true; + } + return false; +} diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc new file mode 100644 index 00000000..50a14763 --- /dev/null +++ b/sql/opt_subselect.cc @@ -0,0 +1,7288 @@ +/* + Copyright (c) 2010, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + Semi-join subquery optimizations code + +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_base.h" +#include "sql_const.h" +#include "sql_select.h" +#include "filesort.h" +#include "opt_subselect.h" +#include "sql_test.h" +#include +#include "opt_trace.h" + +/* + This file contains optimizations for semi-join subqueries. + + Contents + -------- + 1. What is a semi-join subquery + 2. General idea about semi-join execution + 2.1 Correlated vs uncorrelated semi-joins + 2.2 Mergeable vs non-mergeable semi-joins + 3. Code-level view of semi-join processing + 3.1 Conversion + 3.1.1 Merged semi-join TABLE_LIST object + 3.1.2 Non-merged semi-join data structure + 3.2 Semi-joins and query optimization + 3.2.1 Non-merged semi-joins and join optimization + 3.2.2 Merged semi-joins and join optimization + 3.3 Semi-joins and query execution + + 1. What is a semi-join subquery + ------------------------------- + We use this definition of semi-join: + + outer_tbl SEMI JOIN inner_tbl ON cond = {set of outer_tbl.row such that + exist inner_tbl.row, for which + cond(outer_tbl.row,inner_tbl.row) + is satisfied} + + That is, semi-join operation is similar to inner join operation, with + exception that we don't care how many matches a row from outer_tbl has in + inner_tbl. + + In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of + the WHERE/ON clause. + + 2. General idea about semi-join execution + ----------------------------------------- + We can execute semi-join in a way similar to inner join, with exception that + we need to somehow ensure that we do not generate record combinations that + differ only in rows of inner tables. + There is a number of different ways to achieve this property, implemented by + a number of semi-join execution strategies. + Some strategies can handle any semi-joins, other can be applied only to + semi-joins that have certain properties that are described below: + + 2.1 Correlated vs uncorrelated semi-joins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Uncorrelated semi-joins are special in the respect that they allow to + - execute the subquery (possible as it's uncorrelated) + - somehow make sure that generated set does not have duplicates + - perform an inner join with outer tables. + + or, rephrasing in SQL form: + + SELECT ... FROM ot WHERE ot.col IN (SELECT it.col FROM it WHERE uncorr_cond) + -> + SELECT ... FROM ot JOIN (SELECT DISTINCT it.col FROM it WHERE uncorr_cond) + + 2.2 Mergeable vs non-mergeable semi-joins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Semi-join operation has some degree of commutability with inner join + operation: we can join subquery's tables with ouside table(s) and eliminate + duplicate record combination after that: + + ot1 JOIN ot2 SEMI_JOIN{it1,it2} (it1 JOIN it2) ON sjcond(ot2,it*) -> + | + +-------------------------------+ + v + ot1 SEMI_JOIN{it1,it2} (it1 JOIN it2 JOIN ot2) ON sjcond(ot2,it*) + + In order for this to work, subquery's top-level operation must be join, and + grouping or ordering with limit (grouping or ordering with limit are not + commutative with duplicate removal). In other words, the conversion is + possible when the subquery doesn't have GROUP BY clause, any aggregate + functions*, or ORDER BY ... LIMIT clause. + + Definitions: + - Subquery whose top-level operation is a join is called *mergeable semi-join* + - All other kinds of semi-join subqueries are considered non-mergeable. + + *- this requirement is actually too strong, but its exceptions are too + complicated to be considered here. + + 3. Code-level view of semi-join processing + ------------------------------------------ + + 3.1 Conversion and pre-optimization data structures + --------------------------------------------------- + * When doing JOIN::prepare for the subquery, we detect that it can be + converted into a semi-join and register it in parent_join->sj_subselects + + * At the start of parent_join->optimize(), the predicate is converted into + a semi-join node. A semi-join node is a TABLE_LIST object that is linked + somewhere in parent_join->join_list (either it is just present there, or + it is a descendant of some of its members). + + There are two kinds of semi-joins: + - Merged semi-joins + - Non-merged semi-joins + + 3.1.1 Merged semi-join TABLE_LIST object + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Merged semi-join object is a TABLE_LIST that contains a sub-join of + subquery tables and the semi-join ON expression (in this respect it is + very similar to nested outer join representation) + Merged semi-join represents this SQL: + + ... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr + + Semi-join objects of this kind have TABLE_LIST::sj_subq_pred set. + + 3.1.2 Non-merged semi-join data structure + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Non-merged semi-join object is a leaf TABLE_LIST object that has a subquery + that produces rows. It is similar to a base table and represents this SQL: + + ... SEMI_JOIN (SELECT non_mergeable_select) ON sj_on_expr + + Subquery items that were converted into semi-joins are removed from the WHERE + clause. (They do remain in PS-saved WHERE clause, and they replace themselves + with Item_int(1) on subsequent re-executions). + + 3.2 Semi-joins and join optimization + ------------------------------------ + + 3.2.1 Non-merged semi-joins and join optimization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + For join optimization purposes, non-merged semi-join nests are similar to + base tables. Each such nest is represented by one one JOIN_TAB, which has + two possible access strategies: + - full table scan (representing SJ-Materialization-Scan strategy) + - eq_ref-like table lookup (representing SJ-Materialization-Lookup) + + Unlike regular base tables, non-merged semi-joins have: + - non-zero JOIN_TAB::startup_cost, and + - join_tab->table->is_filled_at_execution()==TRUE, which means one + cannot do const table detection, range analysis or other dataset-dependent + optimizations. + Instead, get_delayed_table_estimates() will run optimization for the + subquery and produce an E(materialized table size). + + 3.2.2 Merged semi-joins and join optimization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + - optimize_semijoin_nests() does pre-optimization + - during join optimization, the join has one JOIN_TAB (or is it POSITION?) + array, and suffix-based detection is used, see optimize_semi_joins() + - after join optimization is done, get_best_combination() switches + the data-structure to prefix-based, multiple JOIN_TAB ranges format. + + 3.3 Semi-joins and query execution + ---------------------------------- + * Join executor has hooks for all semi-join strategies. + TODO elaborate. + +*/ + +/* +EqualityPropagationAndSjmNests +****************************** + +Equalities are used for: +P1. Equality propagation +P2. Equality substitution [for a certain join order] + +The equality propagation is not affected by SJM nests. In fact, it is done +before we determine the execution plan, i.e. before we even know we will use +SJM-nests for execution. + +The equality substitution is affected. + +Substitution without SJMs +========================= +When one doesn't have SJM nests, tables have a strict join order: + + ---------------------------------> + t1 -- t2 -- t3 -- t4 --- t5 + + + ? ^ + \ + --(part-of-WHERE) + + +parts WHERE/ON and ref. expressions are attached at some point along the axis. +Expression is allowed to refer to a table column if the table is to the left of +the attachment point. For any given expression, we have a goal: + + "Move leftmost allowed attachment point as much as possible to the left" + +Substitution with SJMs - task setting +===================================== + +When SJM nests are present, there is no global strict table ordering anymore: + + + ---------------------------------> + + ot1 -- ot2 --- sjm -- ot4 --- ot5 + | + | Main execution + - - - - - - - - - - - - - - - - - - - - - - - - + | Materialization + it1 -- it2 --/ + + +Besides that, we must take into account that + - values for outer table columns, otN.col, are inaccessible at + materialization step (SJM-RULE) + - values for inner table columns, itN.col, are inaccessible at Main execution + step, except for SJ-Materialization-Scan and columns that are in the + subquery's select list. (SJM-RULE) + +Substitution with SJMs - solution +================================= + +First, we introduce global strict table ordering like this: + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +Now, let's see how to meet (SJM-RULE). + +SJ-Materialization is only applicable for uncorrelated subqueries. From this, it +follows that any multiple equality will either +1. include only columns of outer tables, or +2. include only columns of inner tables, or +3. include columns of inner and outer tables, joined together through one + of IN-equalities. + +Cases #1 and #2 can be handled in the same way as with regular inner joins. + +Case #3 requires special handling, so that we don't construct violations of +(SJM-RULE). Let's consider possible ways to build violations. + +Equality propagation starts with the clause in this form + + top_query_where AND subquery_where AND in_equalities + +First, it builds multi-equalities. It can also build a mixed multi-equality + + multiple-equal(ot1.col, ot2.col, ... it1.col, itN.col) + +Multi-equalities are pushed down the OR-clauses in top_query_where and in +subquery_where, so it's possible that clauses like this one are built: + + subquery_cond OR (multiple-equal(it1.col, ot1.col,...) AND ...) + ^^^^^^^^^^^^^ \ + | this must be evaluated + \- can only be evaluated at the main phase. + at the materialization phase + +Finally, equality substitution is started. It does two operations: + + +1. Field reference substitution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(In the code, this is Item_field::replace_equal_field) + +This is a process of replacing each reference to "tblX.col" +with the first element of the multi-equality. (REF-SUBST-ORIG) + +This behaviour can cause problems with Semi-join nests. Suppose, we have a +condition: + + func(it1.col, it2.col) + +and a multi-equality(ot1.col, it1.col). Then, reference to "it1.col" will be +replaced with "ot1.col", constructing a condition + + func(ot1.col, it2.col) + +which will be a violation of (SJM-RULE). + +In order to avoid this, (REF-SUBST-ORIG) is amended as follows: + +- references to tables "itX.col" that are inner wrt some SJM nest, are + replaced with references to the first inner table from the same SJM nest. + +- references to top-level tables "otX.col" are replaced with references to + the first element of the multi-equality, no matter if that first element is + a column of a top-level table or of table from some SJM nest. + (REF-SUBST-SJM) + + The case where the first element is a table from an SJM nest $SJM is ok, + because it can be proven that $SJM uses SJ-Materialization-Scan, and + "unpacks" correct column values to the first element during the main + execution phase. + +2. Item_equal elimination +~~~~~~~~~~~~~~~~~~~~~~~~~ +(In the code: eliminate_item_equal) This is a process of taking + + multiple-equal(a,b,c,d,e) + +and replacing it with an equivalent expression which is an AND of pair-wise +equalities: + + a=b AND a=c AND ... + +The equalities are picked such that for any given join prefix (t1,t2...) the +subset of equalities that can be evaluated gives the most restrictive +filtering. + +Without SJM nests, it is sufficient to compare every multi-equality member +with the first one: + + elem1=elem2 AND elem1=elem3 AND elem1=elem4 ... + +When SJM nests are present, we should take care not to construct equalities +that violate the (SJM-RULE). This is achieved by generating separate sets of +equalites for top-level tables and for inner tables. That is, for the join +order + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +we will generate + ot1.col=ot2.col + ot1.col=ot3.col + ot1.col=ot5.col + it2.col=it1.col + + +2.1 The problem with Item_equals and ORs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As has been mentioned above, multiple equalities are pushed down into OR +clauses, possibly building clauses like this: + + func(it.col2) OR multiple-equal(it1.col1, it1.col2, ot1.col) (1) + +where the first part of the clause has references to inner tables, while the +second has references to the top-level tables, which is a violation of +(SJM-RULE). + +AND-clauses of this kind do not create problems, because make_cond_for_table() +will take them apart. OR-clauses will not be split. It is possible to +split-out the part that's dependent on the inner table: + + func(it.col2) OR it1.col1=it1.col2 + +but this is a less-restrictive condition than condition (1). Current execution +scheme will still try to generate the "remainder" condition: + + func(it.col2) OR it1.col1=ot1.col + +which is a violation of (SJM-RULE). + +QQ: "ot1.col=it1.col" is checked at the upper level. Why was it not removed +here? +AA: because has a proper subset of conditions that are found on this level. + consider a join order of ot, sjm(it) + and a condition + ot.col=it.col AND ( ot.col=it.col='foo' OR it.col2='bar') + + we will produce: + table ot: nothing + table it: ot.col=it.col AND (ot.col='foo' OR it.col2='bar') + ^^^^ ^^^^^^^^^^^^^^^^ + | \ the problem is that + | this part condition didnt + | receive a substitution + | + +--- it was correct to subst, 'ot' is + the left-most. + + +Does it make sense to push "inner=outer" down into ORs? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Yes. Consider the query: + + select * from ot + where ot.col in (select it.col from it where (it.col='foo' OR it.col='bar')) + +here, it may be useful to infer that + + (ot.col='foo' OR ot.col='bar') (CASE-FOR-SUBST) + +and attach that condition to the table 'ot'. + +Possible solutions for Item_equals and ORs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Solution #1 +~~~~~~~~~~~ +Let make_cond_for_table() chop analyze the OR clauses it has produced and +discard them if they violate (SJM-RULE). This solution would allow to handle +cases like (CASE-FOR-SUBST) at the expense of making semantics of +make_cond_for_table() complicated. + +Solution #2 +~~~~~~~~~~~ +Before the equality propagation phase, none of the OR clauses violate the +(SJM-RULE). This way, if we remember which tables the original equality +referred to, we can only generate equalities that refer to the outer (or inner) +tables. Note that this will disallow handling of cases like (CASE-FOR-SUBST). + +Currently, solution #2 is implemented. +*/ + +LEX_CSTRING weedout_key= {STRING_WITH_LEN("weedout_key")}; + +static +bool subquery_types_allow_materialization(THD *thd, Item_in_subselect *in_subs); +static bool replace_where_subcondition(JOIN *, Item **, Item *, Item *, bool); +static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, + void *arg); +static void reset_equality_number_for_subq_conds(Item * cond); +static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, bool *remove); +static TABLE_LIST *alloc_join_nest(THD *thd); +static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements); +bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables); +static SJ_MATERIALIZATION_INFO * +at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, + uint idx, bool *loose_scan); +static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, + Item_in_subselect *subq_pred); +static bool remove_sj_conds(THD *thd, Item **tree); +static bool is_cond_sj_in_equality(Item *item); +static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab); +static Item *remove_additional_cond(Item* conds); +static void remove_subq_pushed_predicates(JOIN *join, Item **where); + +enum_nested_loop_state +end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + + +/* + Check if Materialization strategy is allowed for given subquery predicate. + + @param thd Thread handle + @param in_subs The subquery predicate + @param child_select The select inside predicate (the function will + check it is the only one) + + @return TRUE - Materialization is applicable + FALSE - Otherwise +*/ + +bool is_materialization_applicable(THD *thd, Item_in_subselect *in_subs, + st_select_lex *child_select) +{ + st_select_lex_unit* parent_unit= child_select->master_unit(); + /* + Check if the subquery predicate can be executed via materialization. + The required conditions are: + 0. The materialization optimizer switch was set. + 1. Subquery is a single SELECT (not a UNION). + TODO: this is a limitation that can be fixed + 2. Subquery is not a table-less query. In this case there is no + point in materializing. + 2A The upper query is not a table-less SELECT ... FROM DUAL. We + can't do materialization for SELECT .. FROM DUAL because it + does not call setup_subquery_materialization(). We could make + SELECT ... FROM DUAL call that function but that doesn't seem + to be the case that is worth handling. + 3. Either the subquery predicate is a top-level predicate, or at + least one partial match strategy is enabled. If no partial match + strategy is enabled, then materialization cannot be used for + non-top-level queries because it cannot handle NULLs correctly. + 4. Subquery is non-correlated + TODO: + This condition is too restrictive (limitation). It can be extended to: + (Subquery is non-correlated || + Subquery is correlated to any query outer to IN predicate || + (Subquery is correlated to the immediate outer query && + Subquery !contains {GROUP BY, ORDER BY [LIMIT], + aggregate functions}) && subquery predicate is not under "NOT IN")) + 5. Subquery does not contain recursive references + + A note about prepared statements: we want the if-branch to be taken on + PREPARE and each EXECUTE. The rewrites are only done once, but we need + select_lex->sj_subselects list to be populated for every EXECUTE. + + */ + if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) && // 0 + !child_select->is_part_of_union() && // 1 + parent_unit->first_select()->leaf_tables.elements && // 2 + child_select->outer_select() && + child_select->outer_select()->table_list.first && // 2A + subquery_types_allow_materialization(thd, in_subs) && + (in_subs->is_top_level_item() || //3 + optimizer_flag(thd, + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3 + optimizer_flag(thd, + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3 + !in_subs->is_correlated && //4 + !in_subs->with_recursive_reference) //5 + { + return TRUE; + } + return FALSE; +} + + +/* + Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them + + SYNOPSIS + check_and_do_in_subquery_rewrites() + join Subquery's join + + DESCRIPTION + Check if we need to do + - subquery -> mergeable semi-join rewrite + - if the subquery can be handled with materialization + - 'substitution' rewrite for table-less subqueries like "(select 1)" + - IN->EXISTS rewrite + and, depending on the rewrite, either do it, or record it to be done at a + later phase. + + RETURN + 0 - OK + Other - Some sort of query error +*/ + +int check_and_do_in_subquery_rewrites(JOIN *join) +{ + THD *thd=join->thd; + st_select_lex *select_lex= join->select_lex; + st_select_lex_unit* parent_unit= select_lex->master_unit(); + DBUG_ENTER("check_and_do_in_subquery_rewrites"); + + /* + IN/ALL/ANY rewrites are not applicable for so called fake select + (this select exists only to filter results of union if it is needed). + */ + if (select_lex == select_lex->master_unit()->fake_select_lex) + DBUG_RETURN(0); + + /* + If + 1) this join is inside a subquery (of any type except FROM-clause + subquery) and + 2) we aren't just normalizing a VIEW + + Then perform early unconditional subquery transformations: + - Convert subquery predicate into semi-join, or + - Mark the subquery for execution using materialization, or + - Perform IN->EXISTS transformation, or + - Perform more/less ALL/ANY -> MIN/MAX rewrite + - Substitute trivial scalar-context subquery with its value + + TODO: for PS, make the whole block execute only on the first execution + */ + Item_subselect *subselect; + if (!thd->lex->is_view_context_analysis() && // (1) + (subselect= parent_unit->item)) // (2) + { + Item_in_subselect *in_subs= NULL; + Item_allany_subselect *allany_subs= NULL; + Item_subselect::subs_type substype= subselect->substype(); + switch (substype) { + case Item_subselect::IN_SUBS: + in_subs= subselect->get_IN_subquery(); + break; + case Item_subselect::ALL_SUBS: + case Item_subselect::ANY_SUBS: + DBUG_ASSERT(subselect->get_IN_subquery()); + allany_subs= (Item_allany_subselect *)subselect; + break; + default: + break; + } + + /* + Try removing "ORDER BY" or even "ORDER BY ... LIMIT" from certain kinds + of subqueries. The removal might enable further transformations. + */ + if (substype == Item_subselect::IN_SUBS || + substype == Item_subselect::EXISTS_SUBS || + substype == Item_subselect::ANY_SUBS || + substype == Item_subselect::ALL_SUBS) + { + // (1) - ORDER BY without LIMIT can be removed from IN/EXISTS subqueries + // (2) - for EXISTS, can also remove "ORDER BY ... LIMIT n", + // but cannot remove "ORDER BY ... LIMIT n OFFSET m" + if (!select_lex->limit_params.select_limit || // (1) + (substype == Item_subselect::EXISTS_SUBS && // (2) + !select_lex->limit_params.offset_limit)) // (2) + { + select_lex->join->order= 0; + select_lex->join->skip_sort_order= 1; + } + } + + /* Resolve expressions and perform semantic analysis for IN query */ + if (in_subs != NULL) + /* + TODO: Add the condition below to this if statement when we have proper + support for is_correlated handling for materialized semijoins. + If we were to add this condition now, the fix_fields() call in + convert_subq_to_sj() would force the flag is_correlated to be set + erroneously for prepared queries. + + thd->stmt_arena->state != Query_arena::PREPARED) + */ + { + SELECT_LEX *current= thd->lex->current_select; + thd->lex->current_select= current->return_after_parsing(); + char const *save_where= thd->where; + thd->where= "IN/ALL/ANY subquery"; + + Item **left= in_subs->left_exp_ptr(); + bool failure= (*left)->fix_fields_if_needed(thd, left); + thd->lex->current_select= current; + thd->where= save_where; + if (failure) + DBUG_RETURN(-1); /* purecov: deadcode */ + + // fix_field above can rewrite left expression + uint ncols= (*left)->cols(); + /* + Check if the left and right expressions have the same # of + columns, i.e. we don't have a case like + (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...) + + TODO why do we have this duplicated in IN->EXISTS transformers? + psergey-todo: fix these: grep for duplicated_subselect_card_check + */ + if (select_lex->item_list.elements != ncols) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), ncols); + DBUG_RETURN(-1); + } + + uint cols_num= in_subs->left_exp()->cols(); + for (uint i= 0; i < cols_num; i++) + { + if (select_lex->ref_pointer_array[i]-> + check_cols(in_subs->left_exp()->element_index(i)->cols())) + DBUG_RETURN(-1); + } + } + /* Check if any table is not supporting comparable rowids */ + { + List_iterator_fast li(select_lex->outer_select()->leaf_tables); + TABLE_LIST *tbl; + while ((tbl = li++)) + { + TABLE *table= tbl->table; + if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + join->not_usable_rowid_map|= table->map; + } + } + + DBUG_PRINT("info", ("Checking if subq can be converted to semi-join")); + /* + Check if we're in subquery that is a candidate for flattening into a + semi-join (which is done in flatten_subqueries()). The + requirements are: + 1. Subquery predicate is an IN/=ANY subq predicate + 2. Subquery is a single SELECT (not a UNION) + 3. Subquery does not have GROUP BY or ORDER BY + 4. Subquery does not use aggregate functions or HAVING + 5. Subquery predicate is at the AND-top-level of ON/WHERE clause + 6. We are not in a subquery of a single table UPDATE/DELETE that + doesn't have a JOIN (TODO: We should handle this at some + point by switching to multi-table UPDATE/DELETE) + 7. We're not in a table-less subquery like "SELECT 1" + 8. No execution method was already chosen (by a prepared statement) + 9. Parent select is not a table-less select + 10. Neither parent nor child select have STRAIGHT_JOIN option. + 11. It is first optimisation (the subquery could be moved from ON + clause during first optimisation and then be considered for SJ + on the second when it is too late) + 12. All tables supports comparable rowids. + This is needed for DuplicateWeedout strategy to work (which + is the catch-all semi-join strategy so it must be applicable). + */ + if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) && + in_subs && // 1 + !select_lex->is_part_of_union() && // 2 + !select_lex->group_list.elements && !join->order && // 3 + !join->having && !select_lex->with_sum_func && // 4 + in_subs->emb_on_expr_nest && // 5 + select_lex->outer_select()->join && // 6 + parent_unit->first_select()->leaf_tables.elements && // 7 + !in_subs->has_strategy() && // 8 + select_lex->outer_select()->table_list.first && // 9 + !((join->select_options | // 10 + select_lex->outer_select()->join->select_options) // 10 + & SELECT_STRAIGHT_JOIN) && // 10 + select_lex->first_cond_optimization && // 11 + join->not_usable_rowid_map == 0) // 12 + { + DBUG_PRINT("info", ("Subquery is semi-join conversion candidate")); + + //(void)subquery_types_allow_materialization(thd, in_subs); + + in_subs->is_flattenable_semijoin= TRUE; + + /* Register the subquery for further processing in flatten_subqueries() */ + if (!in_subs->is_registered_semijoin) + { + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + select_lex->outer_select()->sj_subselects.push_back(in_subs, + thd->mem_root); + if (arena) + thd->restore_active_arena(arena, &backup); + in_subs->is_registered_semijoin= TRUE; + } + + /* + Print the transformation into trace. Do it when we've just set + is_registered_semijoin=TRUE above, and also do it when we've already + had it set. + */ + if (in_subs->is_registered_semijoin) + { + OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform, + select_lex->select_number, + "IN (SELECT)", "semijoin"); + trace_transform.add("chosen", true); + } + } + else + { + DBUG_PRINT("info", ("Subquery can't be converted to merged semi-join")); + /* Test if the user has set a legal combination of optimizer switches. */ + DBUG_ASSERT(optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS | + OPTIMIZER_SWITCH_MATERIALIZATION)); + /* + Transform each subquery predicate according to its overloaded + transformer. + */ + if (subselect->select_transformer(join)) + DBUG_RETURN(-1); + + /* + If the subquery predicate is IN/=ANY, analyse and set all possible + subquery execution strategies based on optimizer switches and syntactic + properties. + */ + if (in_subs && !in_subs->has_strategy()) + { + if (is_materialization_applicable(thd, in_subs, select_lex)) + { + in_subs->add_strategy(SUBS_MATERIALIZATION); + + /* + If the subquery is an AND-part of WHERE register for being processed + with jtbm strategy + */ + if (in_subs->emb_on_expr_nest == NO_JOIN_NEST && + optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN)) + { + in_subs->is_flattenable_semijoin= FALSE; + if (!in_subs->is_registered_semijoin) + { + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + select_lex->outer_select()->sj_subselects.push_back(in_subs, + thd->mem_root); + if (arena) + thd->restore_active_arena(arena, &backup); + in_subs->is_registered_semijoin= TRUE; + } + } + } + + /* + IN-TO-EXISTS is the only universal strategy. Choose it if the user + allowed it via an optimizer switch, or if materialization is not + possible. + */ + if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) || + !in_subs->has_strategy()) + in_subs->add_strategy(SUBS_IN_TO_EXISTS); + } + + /* Check if max/min optimization applicable */ + if (allany_subs && !allany_subs->is_set_strategy()) + { + uchar strategy= (allany_subs->is_maxmin_applicable(join) ? + (SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE) : + SUBS_IN_TO_EXISTS); + allany_subs->add_strategy(strategy); + } + + } + } + DBUG_RETURN(0); +} + + +/** + @brief Check if subquery's compared types allow materialization. + + @param in_subs Subquery predicate, updated as follows: + types_allow_materialization TRUE if subquery materialization is allowed. + sjm_scan_allowed If types_allow_materialization is TRUE, + indicates whether it is possible to use subquery + materialization and scan the materialized table. + + @retval TRUE If subquery types allow materialization. + @retval FALSE Otherwise. + + @details + This is a temporary fix for BUG#36752. + + There are two subquery materialization strategies: + + 1. Materialize and do index lookups in the materialized table. See + BUG#36752 for description of restrictions we need to put on the + compared expressions. + + 2. Materialize and then do a full scan of the materialized table. At the + moment, this strategy's applicability criteria are even stricter than + in #1. + + This is so because of the following: consider an uncorrelated subquery + + ...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...) + + and a join order that could be used to do sjm-materialization: + + SJM-Scan(it1, it1), ot1, ot2 + + IN-equalities will be parts of conditions attached to the outer tables: + + ot1: ot1.col1 = ie1 AND ... (C1) + ot2: ot1.col2 = ie2 AND ... (C2) + + besides those there may be additional references to ie1 and ie2 + generated by equality propagation. The problem with evaluating C1 and + C2 is that ie{1,2} refer to subquery tables' columns, while we only have + current value of materialization temptable. Our solution is to + * require that all ie{N} are table column references. This allows + to copy the values of materialization temptable columns to the + original table's columns (see setup_sj_materialization for more + details) + * require that compared columns have exactly the same type. This is + a temporary measure to avoid BUG#36752-type problems. + + JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan expects that for Semi Join Materialization + Scan all the items in the select list of the IN Subquery are of the type Item::FIELD_ITEM. +*/ + +static +bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs) +{ + Item *left_exp= in_subs->left_exp(); + DBUG_ENTER("subquery_types_allow_materialization"); + + DBUG_ASSERT(left_exp->fixed()); + + List_iterator it(in_subs->unit->first_select()->item_list); + uint elements= in_subs->unit->first_select()->item_list.elements; + const char* cause= NULL; + + in_subs->types_allow_materialization= FALSE; // Assign default values + in_subs->sjm_scan_allowed= FALSE; + + OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform, + in_subs->get_select_lex()->select_number, + "IN (SELECT)", "materialization"); + + /* + The checks here must be kept in sync with the one in + Item_func_in::in_predicate_to_in_subs_transformer(). + */ + + bool all_are_fields= TRUE; + uint32 total_key_length = 0; + bool converted_from_in_predicate= in_subs->converted_from_in_predicate; + for (uint i= 0; i < elements; i++) + { + Item *outer= left_exp->element_index(i); + Item *inner= it++; + all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && + inner->real_item()->type() == Item::FIELD_ITEM); + total_key_length += inner->max_length; + if (!inner-> + type_handler()-> + subquery_type_allows_materialization(inner, + outer, + converted_from_in_predicate)) + { + trace_transform.add("possible", false); + trace_transform.add("cause", "types mismatch"); + DBUG_RETURN(FALSE); + } + } + + /* + Make sure that create_tmp_table will not fail due to too long keys. + See MDEV-7122. This check is performed inside create_tmp_table also and + we must do it so that we know the table has keys created. + Make sure that the length of the key for the temp_table is atleast + greater than 0. + */ + if (!total_key_length) + cause= "zero length key for materialized table"; + else if (total_key_length > tmp_table_max_key_length()) + cause= "length of key greater than allowed key length for materialized tables"; + else if (elements > tmp_table_max_key_parts()) + cause= "#keyparts greater than allowed key parts for materialized tables"; + else + { + in_subs->types_allow_materialization= TRUE; + in_subs->sjm_scan_allowed= all_are_fields; + trace_transform.add("sjm_scan_allowed", all_are_fields) + .add("possible", true); + DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed")); + DBUG_RETURN(TRUE); + } + trace_transform.add("possible", false).add("cause", cause); + DBUG_RETURN(FALSE); +} + + +/** + Apply max min optimization of all/any subselect +*/ + +bool JOIN::transform_max_min_subquery() +{ + DBUG_ENTER("JOIN::transform_max_min_subquery"); + Item_subselect *subselect= unit->item; + if (!subselect || (subselect->substype() != Item_subselect::ALL_SUBS && + subselect->substype() != Item_subselect::ANY_SUBS)) + DBUG_RETURN(0); + DBUG_RETURN(((Item_allany_subselect *) subselect)-> + transform_into_max_min(this)); +} + + +/* + Finalize IN->EXISTS conversion in case we couldn't use materialization. + + DESCRIPTION Invoke the IN->EXISTS converter + Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE. + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) +{ + DBUG_ENTER("make_in_exists_conversion"); + JOIN *child_join= item->unit->first_select()->join; + bool res; + + /* + We're going to finalize IN->EXISTS conversion. + Normally, IN->EXISTS conversion takes place inside the + Item_subselect::fix_fields() call, where item_subselect->fixed()==FALSE (as + fix_fields() haven't finished yet) and item_subselect->changed==FALSE (as + the conversion haven't been finalized) + + At the end of Item_subselect::fix_fields() we had to set fixed=TRUE, + changed=TRUE (the only other option would have been to return error). + + So, now we have to set these back for the duration of select_transformer() + call. + */ + item->changed= 0; + item->base_flags|= item_base_t::FIXED; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= item->unit->first_select(); + + res= item->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res) + DBUG_RETURN(TRUE); + + item->changed= 1; + DBUG_ASSERT(item->fixed()); + + Item *substitute= item->substitution; + bool do_fix_fields= !item->substitution->fixed(); + /* + The Item_subselect has already been wrapped with Item_in_optimizer, so we + should search for item->optimizer, not 'item'. + */ + Item *replace_me= item->optimizer; + DBUG_ASSERT(replace_me==substitute); + + Item **tree= (item->emb_on_expr_nest == NO_JOIN_NEST)? + &join->conds : &(item->emb_on_expr_nest->on_expr); + if (replace_where_subcondition(join, tree, replace_me, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + item->substitution= NULL; + + /* + If this is a prepared statement, repeat the above operation for + prep_where (or prep_on_expr). + */ + if (!thd->stmt_arena->is_conventional()) + { + tree= (item->emb_on_expr_nest == (TABLE_LIST*)NO_JOIN_NEST)? + &join->select_lex->prep_where : + &(item->emb_on_expr_nest->prep_on_expr); + + if (replace_where_subcondition(join, tree, replace_me, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +bool check_for_outer_joins(List *join_list) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + List_iterator li(*join_list); + while ((table= li++)) + { + if ((nested_join= table->nested_join)) + { + if (check_for_outer_joins(&nested_join->join_list)) + return TRUE; + } + + if (table->outer_join) + return TRUE; + } + return FALSE; +} + + +void find_and_block_conversion_to_sj(Item *to_find, + List_iterator_fast &li) +{ + if (to_find->type() == Item::FUNC_ITEM && + ((Item_func*)to_find)->functype() == Item_func::IN_OPTIMIZER_FUNC) + to_find= ((Item_in_optimizer*)to_find)->get_wrapped_in_subselect_item(); + + if (to_find->type() != Item::SUBSELECT_ITEM || + ((Item_subselect *) to_find)->substype() != Item_subselect::IN_SUBS) + return; + Item_in_subselect *in_subq; + li.rewind(); + while ((in_subq= li++)) + { + if (in_subq == to_find) + { + in_subq->block_conversion_to_sj(); + return; + } + } +} + + +/* + Convert semi-join subquery predicates into semi-join join nests + + SYNOPSIS + convert_join_subqueries_to_semijoins() + + DESCRIPTION + + Convert candidate subquery predicates into semi-join join nests. This + transformation is performed once in query lifetime and is irreversible. + + Conversion of one subquery predicate + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + We start with a join that has a semi-join subquery: + + SELECT ... + FROM ot, ... + WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where + + and convert it into a semi-join nest: + + SELECT ... + FROM ot SEMI JOIN (it1 ... itN), ... + WHERE outer_where AND subq_where AND oe=ie + + that is, in order to do the conversion, we need to + + * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent + query's FROM structure. + * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if + the subquery predicate was in an ON expression) + * Remove the subquery predicate from the parent query's WHERE + + Considerations when converting many predicates + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A join may have at most MAX_TABLES tables. This may prevent us from + flattening all subqueries when the total number of tables in parent and + child selects exceeds MAX_TABLES. + We deal with this problem by flattening children's subqueries first and + then using a heuristic rule to determine each subquery predicate's + "priority". + + RETURN + FALSE OK + TRUE Error +*/ + +bool convert_join_subqueries_to_semijoins(JOIN *join) +{ + Query_arena *arena, backup; + Item_in_subselect *in_subq; + THD *thd= join->thd; + DBUG_ENTER("convert_join_subqueries_to_semijoins"); + + if (join->select_lex->sj_subselects.is_empty()) + DBUG_RETURN(FALSE); + + List_iterator_fast li(join->select_lex->sj_subselects); + + while ((in_subq= li++)) + { + SELECT_LEX *subq_sel= in_subq->get_select_lex(); + if (subq_sel->handle_derived(thd->lex, DT_MERGE)) + DBUG_RETURN(TRUE); + if (subq_sel->join->transform_in_predicates_into_in_subq(thd)) + DBUG_RETURN(TRUE); + subq_sel->update_used_tables(); + } + + /* + Check all candidates to semi-join conversion that occur + in ON expressions of outer join. Set the flag blocking + this conversion for them. + */ + TABLE_LIST *tbl; + List_iterator ti(join->select_lex->leaf_tables); + while ((tbl= ti++)) + { + TABLE_LIST *embedded; + TABLE_LIST *embedding= tbl; + do + { + embedded= embedding; + bool block_conversion_to_sj= false; + if (embedded->on_expr) + { + /* + Conversion of an IN subquery predicate into semi-join + is blocked now if the predicate occurs: + - in the ON expression of an outer join + - in the ON expression of an inner join embedded directly + or indirectly in the inner nest of an outer join + */ + for (TABLE_LIST *tl= embedded; tl; tl= tl->embedding) + { + if (tl->outer_join) + { + block_conversion_to_sj= true; + break; + } + } + } + if (block_conversion_to_sj) + { + Item *cond= embedded->on_expr; + if (!cond) + ; + else if (cond->type() != Item::COND_ITEM) + find_and_block_conversion_to_sj(cond, li); + else if (((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC) + { + Item *item; + List_iterator it(*(((Item_cond*) cond)->argument_list())); + while ((item= it++)) + { + find_and_block_conversion_to_sj(item, li); + } + } + } + embedding= embedded->embedding; + } + while (embedding && + embedding->nested_join->join_list.head() == embedded); + } + + /* + Block conversion to semi-joins for those candidates that + are encountered in the WHERE condition of the multi-table view + with CHECK OPTION if this view is used in UPDATE/DELETE. + (This limitation can be, probably, easily lifted.) + */ + li.rewind(); + while ((in_subq= li++)) + { + if (in_subq->emb_on_expr_nest != NO_JOIN_NEST && + in_subq->emb_on_expr_nest->effective_with_check) + { + in_subq->block_conversion_to_sj(); + } + } + + if (join->select_options & SELECT_STRAIGHT_JOIN) + { + /* Block conversion to semijoins for all candidates */ + li.rewind(); + while ((in_subq= li++)) + { + in_subq->block_conversion_to_sj(); + } + } + + li.rewind(); + /* First, convert child join's subqueries. We proceed bottom-up here */ + while ((in_subq= li++)) + { + st_select_lex *child_select= in_subq->get_select_lex(); + JOIN *child_join= child_select->join; + child_join->outer_tables = child_join->table_count; + + /* + child_select->where contains only the WHERE predicate of the + subquery itself here. We may be selecting from a VIEW, which has its + own predicate. The combined predicates are available in child_join->conds, + which was built by setup_conds() doing prepare_where() for all views. + */ + child_select->where= child_join->conds; + + if (convert_join_subqueries_to_semijoins(child_join)) + DBUG_RETURN(TRUE); + + + in_subq->sj_convert_priority= + MY_TEST(in_subq->do_not_convert_to_sj) * MAX_TABLES * 2 + + in_subq->is_correlated * MAX_TABLES + child_join->outer_tables; + } + + // Temporary measure: disable semi-joins when they are together with outer + // joins. +#if 0 + if (check_for_outer_joins(join->join_list)) + { + in_subq= join->select_lex->sj_subselects.head(); + arena= thd->activate_stmt_arena_if_needed(&backup); + goto skip_conversion; + } +#endif + //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables); + /* + 2. Pick which subqueries to convert: + sort the subquery array + - prefer correlated subqueries over uncorrelated; + - prefer subqueries that have greater number of outer tables; + */ + bubble_sort(&join->select_lex->sj_subselects, + subq_sj_candidate_cmp, NULL); + // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES + /* Replace all subqueries to be flattened with Item_int(1) */ + arena= thd->activate_stmt_arena_if_needed(&backup); + + li.rewind(); + while ((in_subq= li++)) + { + bool remove_item= TRUE; + subquery_types_allow_materialization(thd, in_subq); + + /* Stop processing if we've reached a subquery that's attached to the ON clause */ + if (in_subq->do_not_convert_to_sj) + { + OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform, + in_subq->get_select_lex()->select_number, + "IN (SELECT)", "semijoin"); + trace_transform.add("converted_to_semi_join", false) + .add("cause", "subquery attached to the ON clause"); + break; + } + + if (in_subq->is_flattenable_semijoin) + { + OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform, + in_subq->get_select_lex()->select_number, + "IN (SELECT)", "semijoin"); + if (join->table_count + + in_subq->unit->first_select()->join->table_count >= MAX_TABLES) + { + trace_transform.add("converted_to_semi_join", false); + trace_transform.add("cause", + "table in parent join now exceeds MAX_TABLES"); + break; + } + if (convert_subq_to_sj(join, in_subq)) + goto restore_arena_and_fail; + trace_transform.add("converted_to_semi_join", true); + } + else + { + if (join->table_count + 1 >= MAX_TABLES) + break; + if (convert_subq_to_jtbm(join, in_subq, &remove_item)) + goto restore_arena_and_fail; + } + if (remove_item) + { + Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)? + &join->conds : &(in_subq->emb_on_expr_nest->on_expr); + Item *replace_me= in_subq->original_item(); + if (replace_where_subcondition(join, tree, replace_me, + new (thd->mem_root) Item_int(thd, 1), + FALSE)) + goto restore_arena_and_fail; + } + } +//skip_conversion: + /* + 3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't + convert: + */ + while (in_subq) + { + JOIN *child_join= in_subq->unit->first_select()->join; + in_subq->changed= 0; + in_subq->base_flags|= item_base_t::FIXED; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= in_subq->unit->first_select(); + + bool res= in_subq->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res) + DBUG_RETURN(TRUE); + + in_subq->changed= 1; + DBUG_ASSERT(in_subq->fixed()); + + Item *substitute= in_subq->substitution; + bool do_fix_fields= !in_subq->substitution->fixed(); + Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)? + &join->conds : &(in_subq->emb_on_expr_nest->on_expr); + Item *replace_me= in_subq->original_item(); + if (replace_where_subcondition(join, tree, replace_me, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + in_subq->substitution= NULL; + /* + If this is a prepared statement, repeat the above operation for + prep_where (or prep_on_expr). Subquery-to-semijoin conversion is + done once for prepared statement. + */ + if (!thd->stmt_arena->is_conventional()) + { + tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)? + &join->select_lex->prep_where : + &(in_subq->emb_on_expr_nest->prep_on_expr); + /* + prep_on_expr/ prep_where may be NULL in some cases. + If that is the case, do nothing - simplify_joins() will copy + ON/WHERE expression into prep_on_expr/prep_where. + */ + if (*tree && replace_where_subcondition(join, tree, replace_me, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + /* + Revert to the IN->EXISTS strategy in the rare case when the subquery could + not be flattened. + */ + in_subq->reset_strategy(SUBS_IN_TO_EXISTS); + if (is_materialization_applicable(thd, in_subq, + in_subq->unit->first_select())) + { + in_subq->add_strategy(SUBS_MATERIALIZATION); + } + + in_subq= li++; + } + + if (arena) + thd->restore_active_arena(arena, &backup); + join->select_lex->sj_subselects.empty(); + DBUG_RETURN(FALSE); + +restore_arena_and_fail: + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(TRUE); +} + + +/* + Get #output_rows and scan_time estimates for a "delayed" table. + + SYNOPSIS + get_delayed_table_estimates() + table IN Table to get estimates for + out_rows OUT E(#rows in the table) + scan_time OUT E(scan_time). + startup_cost OUT cost to populate the table. + + DESCRIPTION + Get #output_rows and scan_time estimates for a "delayed" table. By + "delayed" here we mean that the table is filled at the start of query + execution. This means that the optimizer can't use table statistics to + get #rows estimate for it, it has to call this function instead. + + This function is expected to make different actions depending on the nature + of the table. At the moment there is only one kind of delayed tables, + non-flattenable semi-joins. +*/ + +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost) +{ + Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; + Table_function_json_table *table_function= + table->pos_in_table_list->table_function; + + if (table_function) + { + table_function->get_estimates(out_rows, scan_time, startup_cost); + return; + } + + DBUG_ASSERT(item->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)item->engine); + + *out_rows= (ha_rows)item->jtbm_record_count; + *startup_cost= item->jtbm_read_time; + + /* Calculate cost of scanning the temptable */ + double data_size= COST_MULT(item->jtbm_record_count, + hash_sj_engine->tmp_table->s->reclength); + /* Do like in handler::scan_time() */ + *scan_time= ((data_size/table->file->stats.block_size+2) * + table->file->avg_io_cost()); +} + + +/** + @brief Replaces an expression destructively inside the expression tree of + the WHERE clase. + + @note We substitute AND/OR structure because it was copied by + copy_andor_structure and some changes could be done in the copy but + should be left permanent, also there could be several layers of AND over + AND and OR over OR because ::fix_field() possibly is not called. + + @param join The top-level query. + @param old_cond The expression to be replaced. + @param new_cond The expression to be substituted. + @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for + the new expression. + @return true if there was an error, false if + successful. +*/ + +static bool replace_where_subcondition(JOIN *join, Item **expr, + Item *old_cond, Item *new_cond, + bool do_fix_fields) +{ + if (*expr == old_cond) + { + *expr= new_cond; + if (do_fix_fields) + new_cond->fix_fields(join->thd, expr); + return FALSE; + } + + if ((*expr)->type() == Item::COND_ITEM) + { + List_iterator li(*((Item_cond*)(*expr))->argument_list()); + Item *item; + while ((item= li++)) + { + if (item == old_cond) + { + li.replace(new_cond); + if (do_fix_fields) + new_cond->fix_fields(join->thd, li.ref()); + return FALSE; + } + else if (item->type() == Item::COND_ITEM) + { + replace_where_subcondition(join, li.ref(), + old_cond, new_cond, + do_fix_fields); + } + } + } + /* + We can come to here when + - we're doing replace operations on both on_expr and prep_on_expr + - on_expr is the same as prep_on_expr, or they share a sub-tree + (so, when we do replace in on_expr, we replace in prep_on_expr, too, + and when we try doing a replace in prep_on_expr, the item we wanted + to replace there has already been replaced) + */ + return FALSE; +} + +static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, + void *arg) +{ + return (el1->sj_convert_priority > el2->sj_convert_priority) ? -1 : + ( (el1->sj_convert_priority == el2->sj_convert_priority)? 0 : 1); +} + + +/** + @brief + reset the value of the field in_eqaulity_no for all Item_func_eq + items in the where clause of the subquery. + + Look for in_equality_no description in Item_func_eq class + + DESCRIPTION + Lets have an example: + SELECT t1.a FROM t1 WHERE t1.a IN + (SELECT t2.a FROM t2 where t2.b IN + (select t3.b from t3 where t3.c=27 )) + + So for such a query we have the parent, child and + grandchild select. + + So for the equality t2.b = t3.b we set the value for in_equality_no to + 0 according to its description. Wewe do the same for t1.a = t2.a. + But when we look at the child select (with the grandchild select merged), + the query would be + + SELECT t1.a FROM t1 WHERE t1.a IN + (SELECT t2.a FROM t2 where t2.b = t3.b and t3.c=27) + + and then when the child select is merged into the parent select the query + would look like + + SELECT t1.a FROM t1, semi-join-nest(t2,t3) + WHERE t1.a =t2.a and t2.b = t3.b and t3.c=27 + + Still we would have in_equality_no set for t2.b = t3.b + though it does not take part in the semi-join equality for the parent select, + so we should reset its value to UINT_MAX. + + @param cond WHERE clause of the subquery +*/ + +static void reset_equality_number_for_subq_conds(Item * cond) +{ + if (!cond) + return; + if (cond->type() == Item::COND_ITEM) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype()== Item_func::EQ_FUNC) + ((Item_func_eq*)item)->in_equality_no= UINT_MAX; + } + } + else + { + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*)cond)->functype()== Item_func::EQ_FUNC) + ((Item_func_eq*)cond)->in_equality_no= UINT_MAX; + } + return; +} + +/* + Convert a subquery predicate into a TABLE_LIST semi-join nest + + SYNOPSIS + convert_subq_to_sj() + parent_join Parent join, the one that has subq_pred in its WHERE/ON + clause + subq_pred Subquery predicate to be converted + + DESCRIPTION + Convert a subquery predicate into a TABLE_LIST semi-join nest. All the + prerequisites are already checked, so the conversion is always successfull. + + Prepared Statements: the transformation is permanent: + - Changes in TABLE_LIST structures are naturally permanent + - Item tree changes are performed on statement MEM_ROOT: + = we activate statement MEM_ROOT + = this function is called before the first fix_prepare_information + call. + + This is intended because the criteria for subquery-to-sj conversion remain + constant for the lifetime of the Prepared Statement. + + RETURN + FALSE OK + TRUE Out of memory error +*/ + +static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + TABLE_LIST *emb_tbl_nest= NULL; + TABLE_LIST *orig_tl; + List *emb_join_list= &parent_lex->top_join_list; + THD *thd= parent_join->thd; + SELECT_LEX *save_lex; + Item **left; + Item *left_exp; + Item *left_exp_orig; + + uint ncols; + DBUG_ENTER("convert_subq_to_sj"); + + /* + 1. Find out where to put the predicate into. + Note: for "t1 LEFT JOIN t2" this will be t2, a leaf. + */ + if ((void*)subq_pred->emb_on_expr_nest != (void*)NO_JOIN_NEST) + { + if (subq_pred->emb_on_expr_nest->nested_join) + { + /* + We're dealing with + + ... [LEFT] JOIN ( ... ) ON (subquery AND whatever) ... + + The sj-nest will be inserted into the brackets nest. + */ + emb_tbl_nest= subq_pred->emb_on_expr_nest; + emb_join_list= &emb_tbl_nest->nested_join->join_list; + } + else if (!subq_pred->emb_on_expr_nest->outer_join) + { + /* + We're dealing with + + ... INNER JOIN tblX ON (subquery AND whatever) ... + + The sj-nest will be tblX's "sibling", i.e. another child of its + parent. This is ok because tblX is joined as an inner join. + */ + emb_tbl_nest= subq_pred->emb_on_expr_nest->embedding; + if (emb_tbl_nest) + emb_join_list= &emb_tbl_nest->nested_join->join_list; + } + else if (!subq_pred->emb_on_expr_nest->nested_join) + { + TABLE_LIST *outer_tbl= subq_pred->emb_on_expr_nest; + TABLE_LIST *wrap_nest; + LEX_CSTRING sj_wrap_name= { STRING_WITH_LEN("(sj-wrap)") }; + /* + We're dealing with + + ... LEFT JOIN tbl ON (on_expr AND subq_pred) ... + + we'll need to convert it into: + + ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ... + | | + |<----- wrap_nest ---->| + + Q: other subqueries may be pointing to this element. What to do? + A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest. + But we'll need to fix other pointers. + A2: Another way: have TABLE_LIST::next_ptr so the following + subqueries know the table has been nested. + A3: changes in the TABLE_LIST::outer_join will make everything work + automatically. + */ + if (!(wrap_nest= alloc_join_nest(thd))) + { + DBUG_RETURN(TRUE); + } + wrap_nest->embedding= outer_tbl->embedding; + wrap_nest->join_list= outer_tbl->join_list; + wrap_nest->alias= sj_wrap_name; + + wrap_nest->nested_join->join_list.empty(); + wrap_nest->nested_join->join_list.push_back(outer_tbl, thd->mem_root); + + outer_tbl->embedding= wrap_nest; + outer_tbl->join_list= &wrap_nest->nested_join->join_list; + + /* + wrap_nest will take place of outer_tbl, so move the outer join flag + and on_expr + */ + wrap_nest->outer_join= outer_tbl->outer_join; + outer_tbl->outer_join= 0; + + wrap_nest->on_expr= outer_tbl->on_expr; + outer_tbl->on_expr= NULL; + + List_iterator li(*wrap_nest->join_list); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (tbl == outer_tbl) + { + li.replace(wrap_nest); + break; + } + } + /* + Ok now wrap_nest 'contains' outer_tbl and we're ready to add the + semi-join nest into it + */ + emb_join_list= &wrap_nest->nested_join->join_list; + emb_tbl_nest= wrap_nest; + } + } + + TABLE_LIST *sj_nest; + NESTED_JOIN *nested_join; + LEX_CSTRING sj_nest_name= { STRING_WITH_LEN("(sj-nest)") }; + if (!(sj_nest= alloc_join_nest(thd))) + { + DBUG_RETURN(TRUE); + } + nested_join= sj_nest->nested_join; + + sj_nest->join_list= emb_join_list; + sj_nest->embedding= emb_tbl_nest; + sj_nest->alias= sj_nest_name; + sj_nest->sj_subq_pred= subq_pred; + sj_nest->original_subq_pred_used_tables= subq_pred->used_tables() | + subq_pred->left_exp()->used_tables(); + /* Nests do not participate in those 'chains', so: */ + /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/ + emb_join_list->push_back(sj_nest, thd->mem_root); + + /* + nested_join->used_tables and nested_join->not_null_tables are + initialized in simplify_joins(). + */ + + /* + 2. Walk through subquery's top list and set 'embedding' to point to the + sj-nest. + */ + st_select_lex *subq_lex= subq_pred->unit->first_select(); + DBUG_ASSERT(subq_lex->next_select() == NULL); + nested_join->join_list.empty(); + List_iterator_fast li(subq_lex->top_join_list); + TABLE_LIST *tl; + while ((tl= li++)) + { + tl->embedding= sj_nest; + tl->join_list= &nested_join->join_list; + nested_join->join_list.push_back(tl, thd->mem_root); + } + + /* + Reconnect the next_leaf chain. + TODO: Do we have to put subquery's tables at the end of the chain? + Inserting them at the beginning would be a bit faster. + NOTE: We actually insert them at the front! That's because the order is + reversed in this list. + */ + parent_lex->leaf_tables.append(&subq_lex->leaf_tables); + + if (subq_lex->options & OPTION_SCHEMA_TABLE) + parent_lex->options |= OPTION_SCHEMA_TABLE; + + /* + Same as above for next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + + for (orig_tl= (TABLE_LIST*)(parent_lex->table_list.first); + orig_tl->next_local; + orig_tl= orig_tl->next_local) + {} + + orig_tl->next_local= subq_lex->join->tables_list; + + /* A theory: no need to re-connect the next_global chain */ + + /* 3. Remove the original subquery predicate from the WHERE/ON */ + + /*TODO: also reset the 'm_with_subquery' there. */ + + /* n. Adjust the parent_join->table_count counter */ + uint table_no= parent_join->table_count; + /* n. Walk through child's tables and adjust table->map */ + List_iterator_fast si(subq_lex->leaf_tables); + while ((tl= si++)) + { + tl->set_tablenr(table_no); + if (tl->is_jtbm()) + { + tl->jtbm_table_no= table_no; + Item *dummy= tl->jtbm_subselect; + tl->jtbm_subselect->fix_after_pullout(parent_lex, &dummy, true); + DBUG_ASSERT(dummy == tl->jtbm_subselect); + } + else if (tl->table_function) + { + tl->table_function->fix_after_pullout(tl, parent_lex, true); + } + SELECT_LEX *old_sl= tl->select_lex; + tl->select_lex= parent_join->select_lex; + for (TABLE_LIST *emb= tl->embedding; + emb && emb->select_lex == old_sl; + emb= emb->embedding) + emb->select_lex= parent_join->select_lex; + table_no++; + } + parent_join->table_count += subq_lex->join->table_count; + //parent_join->table_count += subq_lex->leaf_tables.elements; + + /* + Put the subquery's WHERE into semi-join's sj_on_expr + Add the subquery-induced equalities too. + */ + save_lex= thd->lex->current_select; + table_map subq_pred_used_tables; + + thd->lex->current_select=subq_lex; + left= subq_pred->left_exp_ptr(); + if ((*left)->fix_fields_if_needed(thd, left)) + goto restore_tl_and_exit; + left_exp= *left; + left_exp_orig= subq_pred->left_exp_orig(); + thd->lex->current_select=save_lex; + + subq_pred_used_tables= subq_pred->used_tables(); + sj_nest->nested_join->sj_corr_tables= subq_pred_used_tables; + sj_nest->nested_join->sj_depends_on= subq_pred_used_tables | + left_exp->used_tables(); + sj_nest->sj_on_expr= subq_lex->join->conds; + + /* + Create the IN-equalities and inject them into semi-join's ON expression. + Additionally, for LooseScan strategy + - Record the number of IN-equalities. + - Create list of pointers to (oe1, ..., ieN). We'll need the list to + see which of the expressions are bound and which are not (for those + we'll produce a distinct stream of (ie_i1,...ie_ik). + + (TODO: can we just create a list of pointers and hope the expressions + will not substitute themselves on fix_fields()? or we need to wrap + them into Item_direct_view_refs and store pointers to those. The + pointers to Item_direct_view_refs are guaranteed to be stable as + Item_direct_view_refs doesn't substitute itself with anything in + Item_direct_view_ref::fix_fields. + */ + ncols= sj_nest->sj_in_exprs= left_exp->cols(); + sj_nest->nested_join->sj_outer_expr_list.empty(); + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); + + if (ncols == 1) + { + /* add left = select_list_element */ + nested_join->sj_outer_expr_list.push_back(left, + thd->mem_root); + /* + Create Item_func_eq. Note that + 1. this is done on the statement, not execution, arena + 2. if it's a PS then this happens only once - on the first execution. + On following re-executions, the item will be fix_field-ed normally. + 3. Thus it should be created as if it was fix_field'ed, in particular + all pointers to items in the execution arena should be protected + with thd->change_item_tree + */ + Item_func_eq *item_eq= + new (thd->mem_root) Item_func_eq(thd, left_exp_orig, + subq_lex->ref_pointer_array[0]); + if (!item_eq) + goto restore_tl_and_exit; + if (left_exp_orig != left_exp) + thd->change_item_tree(item_eq->arguments(), left_exp); + item_eq->in_equality_no= 0; + sj_nest->sj_on_expr= and_items(thd, sj_nest->sj_on_expr, item_eq); + } + else if (left_exp->type() == Item::ROW_ITEM) + { + /* + disassemple left expression and add + left1 = select_list_element1 and left2 = select_list_element2 ... + */ + for (uint i= 0; i < ncols; i++) + { + nested_join->sj_outer_expr_list.push_back(left_exp->addr(i), + thd->mem_root); + Item_func_eq *item_eq= + new (thd->mem_root) + Item_func_eq(thd, left_exp_orig->element_index(i), + subq_lex->ref_pointer_array[i]); + if (!item_eq) + goto restore_tl_and_exit; + DBUG_ASSERT(left_exp->element_index(i)->fixed()); + if (left_exp_orig->element_index(i) != + left_exp->element_index(i)) + thd->change_item_tree(item_eq->arguments(), + left_exp->element_index(i)); + item_eq->in_equality_no= i; + sj_nest->sj_on_expr= and_items(thd, sj_nest->sj_on_expr, item_eq); + } + } + else + { + /* + add row operation + left = (select_list_element1, select_list_element2, ...) + */ + Item_row *row= new (thd->mem_root) Item_row(thd, subq_lex->pre_fix); + /* fix fields on subquery was call so they should be the same */ + if (!row) + goto restore_tl_and_exit; + DBUG_ASSERT(ncols == row->cols()); + nested_join->sj_outer_expr_list.push_back(left); + Item_func_eq *item_eq= + new (thd->mem_root) Item_func_eq(thd, left_exp_orig, row); + if (!item_eq) + goto restore_tl_and_exit; + for (uint i= 0; i < row->cols(); i++) + { + if (row->element_index(i) != subq_lex->ref_pointer_array[i]) + thd->change_item_tree(row->addr(i), subq_lex->ref_pointer_array[i]); + } + item_eq->in_equality_no= 0; + sj_nest->sj_on_expr= and_items(thd, sj_nest->sj_on_expr, item_eq); + } + /* + Fix the created equality and AND + + Note that fix_fields() can actually fail in a meaningful way here. One + example is when the IN-equality is not valid, because it compares columns + with incompatible collations. (One can argue it would be more appropriate + to check for this at name resolution stage, but as a legacy of IN->EXISTS + we have in here). + */ + if (sj_nest->sj_on_expr->fix_fields_if_needed(thd, &sj_nest->sj_on_expr)) + goto restore_tl_and_exit; + + /* + Walk through sj nest's WHERE and ON expressions and call + item->fix_table_changes() for all items. + */ + sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr, + TRUE); + fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list); + + + /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */ + subq_lex->master_unit()->exclude_level(); + + DBUG_EXECUTE("where", + print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY);); + + /* Inject sj_on_expr into the parent's WHERE or ON */ + if (emb_tbl_nest) + { + emb_tbl_nest->on_expr= and_items(thd, emb_tbl_nest->on_expr, + sj_nest->sj_on_expr); + emb_tbl_nest->on_expr->top_level_item(); + if (emb_tbl_nest->on_expr->fix_fields_if_needed(thd, + &emb_tbl_nest->on_expr)) + goto restore_tl_and_exit; + } + else + { + /* Inject into the WHERE */ + parent_join->conds= and_items(thd, parent_join->conds, sj_nest->sj_on_expr); + parent_join->conds->top_level_item(); + /* + fix_fields must update the properties (e.g. st_select_lex::cond_count of + the correct select_lex. + */ + save_lex= thd->lex->current_select; + thd->lex->current_select=parent_join->select_lex; + if (parent_join->conds->fix_fields_if_needed(thd, &parent_join->conds)) + goto restore_tl_and_exit; + + thd->lex->current_select=save_lex; + parent_join->select_lex->where= parent_join->conds; + } + + if (subq_lex->ftfunc_list->elements) + { + Item_func_match *ifm; + List_iterator_fast li(*(subq_lex->ftfunc_list)); + while ((ifm= li++)) + parent_lex->ftfunc_list->push_front(ifm, thd->mem_root); + } + + // The subqueries were replaced for Item_int(1) earlier + subq_pred->reset_strategy(SUBS_SEMI_JOIN); // for subsequent executions + + parent_lex->have_merged_subqueries= TRUE; + /* Fatal error may have been set to by fix_after_pullout() */ + DBUG_RETURN(thd->is_fatal_error); + +restore_tl_and_exit: + orig_tl->next_local= NULL; + DBUG_RETURN(TRUE); +} + + +const int SUBQERY_TEMPTABLE_NAME_MAX_LEN= 20; + +static void create_subquery_temptable_name(LEX_STRING *str, uint number) +{ + char *to= str->str; + DBUG_ASSERT(number < 10000); + to= strmov(to, "length= (size_t) (to - str->str)+1; +} + + +/* + Convert subquery predicate into non-mergeable semi-join nest. + + TODO: + why does this do IN-EXISTS conversion? Can't we unify it with mergeable + semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless + fatal errors) + + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, + bool *remove_item) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + List *emb_join_list= &parent_lex->top_join_list; + TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins + TABLE_LIST *tl; + bool optimization_delayed= TRUE; + TABLE_LIST *jtbm; + LEX_STRING tbl_alias; + THD *thd= parent_join->thd; + DBUG_ENTER("convert_subq_to_jtbm"); + + subq_pred->set_strategy(SUBS_MATERIALIZATION); + subq_pred->is_jtbm_merged= TRUE; + + *remove_item= TRUE; + + if (!(tbl_alias.str= (char*)thd->calloc(SUBQERY_TEMPTABLE_NAME_MAX_LEN)) || + !(jtbm= alloc_join_nest(thd))) //todo: this is not a join nest! + { + DBUG_RETURN(TRUE); + } + + jtbm->join_list= emb_join_list; + jtbm->embedding= emb_tbl_nest; + jtbm->jtbm_subselect= subq_pred; + jtbm->nested_join= NULL; + + /* Nests do not participate in those 'chains', so: */ + /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/ + emb_join_list->push_back(jtbm, thd->mem_root); + + /* + Inject the jtbm table into TABLE_LIST::next_leaf list, so that + make_join_statistics() and co. can find it. + */ + parent_lex->leaf_tables.push_back(jtbm, thd->mem_root); + + if (subq_pred->unit->first_select()->options & OPTION_SCHEMA_TABLE) + parent_lex->options |= OPTION_SCHEMA_TABLE; + + /* + Same as above for TABLE_LIST::next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + for (tl= (TABLE_LIST*)(parent_lex->table_list.first); tl->next_local; tl= tl->next_local) + {} + tl->next_local= jtbm; + + /* A theory: no need to re-connect the next_global chain */ + if (optimization_delayed) + { + DBUG_ASSERT(parent_join->table_count < MAX_TABLES); + + jtbm->jtbm_table_no= parent_join->table_count; + + create_subquery_temptable_name(&tbl_alias, + subq_pred->unit->first_select()->select_number); + jtbm->alias.str= tbl_alias.str; + jtbm->alias.length= tbl_alias.length; + parent_join->table_count++; + DBUG_RETURN(thd->is_fatal_error); + } + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)subq_pred->engine); + jtbm->table= hash_sj_engine->tmp_table; + + jtbm->table->tablenr= parent_join->table_count; + jtbm->table->map= table_map(1) << (parent_join->table_count); + jtbm->jtbm_table_no= jtbm->table->tablenr; + + parent_join->table_count++; + DBUG_ASSERT(parent_join->table_count < MAX_TABLES); + + Item *conds= hash_sj_engine->semi_join_conds; + conds->fix_after_pullout(parent_lex, &conds, TRUE); + + DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY);); + + create_subquery_temptable_name(&tbl_alias, hash_sj_engine->materialize_join-> + select_lex->select_number); + jtbm->alias.str= tbl_alias.str; + jtbm->alias.length= tbl_alias.length; + + parent_lex->have_merged_subqueries= TRUE; + + /* Don't unlink the child subselect, as the subquery will be used. */ + + DBUG_RETURN(thd->is_fatal_error); +} + + +static TABLE_LIST *alloc_join_nest(THD *thd) +{ + TABLE_LIST *tbl; + if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ + sizeof(NESTED_JOIN)))) + return NULL; + tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl + + ALIGN_SIZE(sizeof(TABLE_LIST))); + return tbl; +} + +/* + @Note thd->is_fatal_error can be set in case of OOM +*/ + +void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List *tlist) +{ + List_iterator it(*tlist); + TABLE_LIST *table; + while ((table= it++)) + { + if (table->on_expr) + table->on_expr->fix_after_pullout(new_parent, &table->on_expr, TRUE); + if (table->nested_join) + fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list); + } +} + + +static void set_emb_join_nest(List *tables, TABLE_LIST *emb_sj_nest) +{ + List_iterator it(*tables); + TABLE_LIST *tbl; + while ((tbl= it++)) + { + /* + Note: check for nested_join first. + derived-merged tables have tbl->table!=NULL && + tbl->table->reginfo==NULL. + */ + if (tbl->nested_join) + set_emb_join_nest(&tbl->nested_join->join_list, emb_sj_nest); + else if (tbl->table) + tbl->table->reginfo.join_tab->emb_sj_nest= emb_sj_nest; + + } +} + +/* + Pull tables out of semi-join nests, if possible + + SYNOPSIS + pull_out_semijoin_tables() + join The join where to do the semi-join flattening + + DESCRIPTION + Try to pull tables out of semi-join nests. + + PRECONDITIONS + When this function is called, the join may have several semi-join nests + but it is guaranteed that one semi-join nest does not contain another. + + ACTION + A table can be pulled out of the semi-join nest if + - It is a constant table, or + - It is accessed via eq_ref(outer_tables) + + POSTCONDITIONS + * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL + * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing + to semi-join nest they are in. + * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly + + This operation is (and should be) performed at each PS execution since + tables may become/cease to be constant across PS reexecutions. + + NOTE + Table pullout may make uncorrelated subquery correlated. Consider this + example: + + ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... ) + + here table it1 can be pulled out (we have it1.primary_key=oe which gives + us functional dependency). Once it1 is pulled out, all references to it1 + from p(it1, it2) become references to outside of the subquery and thus + make the subquery (i.e. its semi-join nest) correlated. + Making the subquery (i.e. its semi-join nest) correlated prevents us from + using Materialization or LooseScan to execute it. + + RETURN + 0 - OK + 1 - Out of memory error +*/ + +int pull_out_semijoin_tables(JOIN *join) +{ + TABLE_LIST *sj_nest; + DBUG_ENTER("pull_out_semijoin_tables"); + List_iterator sj_list_it(join->select_lex->sj_nests); + + /* Try pulling out of the each of the semi-joins */ + while ((sj_nest= sj_list_it++)) + { + List_iterator child_li(sj_nest->nested_join->join_list); + TABLE_LIST *tbl; + Json_writer_object trace_wrapper(join->thd); + Json_writer_object trace(join->thd, "semijoin_table_pullout"); + Json_writer_array trace_arr(join->thd, "pulled_out_tables"); + + /* + Don't do table pull-out for nested joins (if we get nested joins here, it + means these are outer joins. It is theoretically possible to do pull-out + for some of the outer tables but we don't support this currently. + */ + bool have_join_nest_children= FALSE; + + set_emb_join_nest(&sj_nest->nested_join->join_list, sj_nest); + + while ((tbl= child_li++)) + { + if (tbl->nested_join) + { + have_join_nest_children= TRUE; + break; + } + } + + table_map pulled_tables= 0; + table_map dep_tables= 0; + if (have_join_nest_children) + goto skip; + + /* + Calculate set of tables within this semi-join nest that have + other dependent tables + */ + child_li.rewind(); + while ((tbl= child_li++)) + { + TABLE *const table= tbl->table; + if (table && + (table->reginfo.join_tab->dependent & + sj_nest->nested_join->used_tables)) + dep_tables|= table->reginfo.join_tab->dependent; + } + + /* Action #1: Mark the constant tables to be pulled out */ + child_li.rewind(); + while ((tbl= child_li++)) + { + if (tbl->table) + { + tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest; +#if 0 + /* + Do not pull out tables because they are constant. This operation has + a problem: + - Some constant tables may become/cease to be constant across PS + re-executions + - Contrary to our initial assumption, it turned out that table pullout + operation is not easily undoable. + + The solution is to leave constant tables where they are. This will + affect only constant tables that are 1-row or empty, tables that are + constant because they are accessed via eq_ref(const) access will + still be pulled out as functionally-dependent. + + This will cause us to miss the chance to flatten some of the + subqueries, but since const tables do not generate many duplicates, + it really doesn't matter that much whether they were pulled out or + not. + + All of this was done as fix for BUG#43768. + */ + if (tbl->table->map & join->const_table_map) + { + pulled_tables |= tbl->table->map; + DBUG_PRINT("info", ("Table %s pulled out (reason: constant)", + tbl->table->alias)); + } +#endif + } + } + + /* + Action #2: Find which tables we can pull out based on + update_ref_and_keys() data. Note that pulling one table out can allow + us to pull out some other tables too. + */ + bool pulled_a_table; + do + { + pulled_a_table= FALSE; + child_li.rewind(); + while ((tbl= child_li++)) + { + if (tbl->table && !(pulled_tables & tbl->table->map) && + !(dep_tables & tbl->table->map)) + { + if (find_eq_ref_candidate(tbl->table, + sj_nest->nested_join->used_tables & + ~pulled_tables)) + { + pulled_a_table= TRUE; + pulled_tables |= tbl->table->map; + DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)", + tbl->table->alias.c_ptr_safe())); + trace_arr.add(tbl->table->alias.c_ptr_safe()); + /* + Pulling a table out of uncorrelated subquery in general makes + makes it correlated. See the NOTE to this funtion. + */ + sj_nest->sj_subq_pred->is_correlated= TRUE; + sj_nest->nested_join->sj_corr_tables|= tbl->table->map; + sj_nest->nested_join->sj_depends_on|= tbl->table->map; + } + } + } + } while (pulled_a_table); + + child_li.rewind(); + skip: + /* + Action #3: Move the pulled out TABLE_LIST elements to the parents. + */ + table_map inner_tables= sj_nest->nested_join->used_tables & + ~pulled_tables; + /* Record the bitmap of inner tables */ + sj_nest->sj_inner_tables= inner_tables; + if (pulled_tables) + { + List *upper_join_list= (sj_nest->embedding != NULL)? + (&sj_nest->embedding->nested_join->join_list): + (&join->select_lex->top_join_list); + Query_arena *arena, backup; + arena= join->thd->activate_stmt_arena_if_needed(&backup); + while ((tbl= child_li++)) + { + if (tbl->table) + { + if (inner_tables & tbl->table->map) + { + /* This table is not pulled out */ + tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest; + } + else + { + /* This table has been pulled out of the semi-join nest */ + tbl->table->reginfo.join_tab->emb_sj_nest= NULL; + /* + Pull the table up in the same way as simplify_joins() does: + update join_list and embedding pointers but keep next[_local] + pointers. + */ + child_li.remove(); + sj_nest->nested_join->used_tables &= ~tbl->table->map; + upper_join_list->push_back(tbl, join->thd->mem_root); + tbl->join_list= upper_join_list; + tbl->embedding= sj_nest->embedding; + } + } + } + + /* Remove the sj-nest itself if we've removed everything from it */ + if (!inner_tables) + { + List_iterator li(*upper_join_list); + /* Find the sj_nest in the list. */ + while (sj_nest != li++) ; + li.remove(); + /* Also remove it from the list of SJ-nests: */ + sj_list_it.remove(); + } + + if (arena) + join->thd->restore_active_arena(arena, &backup); + } + } + DBUG_RETURN(0); +} + + +/* + Optimize semi-join nests that could be run with sj-materialization + + SYNOPSIS + optimize_semijoin_nests() + join The join to optimize semi-join nests for + all_table_map Bitmap of all tables in the join + + DESCRIPTION + Optimize each of the semi-join nests that can be run with + materialization. For each of the nests, we + - Generate the best join order for this "sub-join" and remember it; + - Remember the sub-join execution cost (it's part of materialization + cost); + - Calculate other costs that will be incurred if we decide + to use materialization strategy for this semi-join nest. + + All obtained information is saved and will be used by the main join + optimization pass. + + NOTES + Because of Join::reoptimize(), this function may be called multiple times. + + RETURN + FALSE Ok + TRUE Out of memory error +*/ + +bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) +{ + DBUG_ENTER("optimize_semijoin_nests"); + THD *thd= join->thd; + List_iterator sj_list_it(join->select_lex->sj_nests); + TABLE_LIST *sj_nest; + if (!join->select_lex->sj_nests.elements) + DBUG_RETURN(FALSE); + Json_writer_object wrapper(thd); + Json_writer_object trace_semijoin_nest(thd, + "execution_plan_for_potential_materialization"); + Json_writer_array trace_steps_array(thd, "steps"); + while ((sj_nest= sj_list_it++)) + { + /* semi-join nests with only constant tables are not valid */ + /// DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map); + + sj_nest->sj_mat_info= NULL; + /* + The statement may have been executed with 'semijoin=on' earlier. + We need to verify that 'semijoin=on' still holds. + */ + if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION)) + { + if ((sj_nest->sj_inner_tables & ~join->const_table_map) && /* not everything was pulled out */ + !sj_nest->sj_subq_pred->is_correlated && + sj_nest->sj_subq_pred->types_allow_materialization) + { + join->emb_sjm_nest= sj_nest; + if (choose_plan(join, all_table_map &~join->const_table_map)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + /* + The best plan to run the subquery is now in join->best_positions, + save it. + */ + uint n_tables= my_count_bits(sj_nest->sj_inner_tables & ~join->const_table_map); + SJ_MATERIALIZATION_INFO* sjm; + if (!(sjm= new SJ_MATERIALIZATION_INFO) || + !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)* + n_tables))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm->tables= n_tables; + sjm->is_used= FALSE; + double subjoin_out_rows, subjoin_read_time; + + /* + join->get_partial_cost_and_fanout(n_tables + join->const_tables, + table_map(-1), + &subjoin_read_time, + &subjoin_out_rows); + */ + join->get_prefix_cost_and_fanout(n_tables, + &subjoin_read_time, + &subjoin_out_rows); + + sjm->materialization_cost.convert_from_cost(subjoin_read_time); + sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows; + + // Don't use the following list because it has "stale" items. use + // ref_pointer_array instead: + // + //List &right_expr_list= + // sj_nest->sj_subq_pred->unit->first_select()->item_list; + /* + Adjust output cardinality estimates. If the subquery has form + + ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) ) + + then the number of distinct output record combinations has an + upper bound of product of number of records matching the tables + that are used by the SELECT clause. + TODO: + We can get a more precise estimate if we + - use rec_per_key cardinality estimates. For simple cases like + "oe IN (SELECT t.key ...)" it is trivial. + - Functional dependencies between the tables in the semi-join + nest (the payoff is probably less here?) + + See also get_post_group_estimate(). + */ + SELECT_LEX *subq_select= sj_nest->sj_subq_pred->unit->first_select(); + { + for (uint i=0 ; i < join->const_tables + sjm->tables ; i++) + { + JOIN_TAB *tab= join->best_positions[i].table; + join->map2table[tab->table->tablenr]= tab; + } + table_map map= 0; + for (uint i=0; i < subq_select->item_list.elements; i++) + map|= subq_select->ref_pointer_array[i]->used_tables(); + map= map & ~PSEUDO_TABLE_BITS; + Table_map_iterator tm_it(map); + int tableno; + double rows= 1.0; + while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) + rows= COST_MULT(rows, + join->map2table[tableno]->table->opt_range_condition_rows); + sjm->rows= MY_MIN(sjm->rows, rows); + } + memcpy((uchar*) sjm->positions, + (uchar*) (join->best_positions + join->const_tables), + sizeof(POSITION) * n_tables); + + /* + Calculate temporary table parameters and usage costs + */ + uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array, + subq_select->item_list.elements); + double lookup_cost= get_tmp_table_lookup_cost(join->thd, + subjoin_out_rows, rowlen); + double write_cost= get_tmp_table_write_cost(join->thd, + subjoin_out_rows, rowlen); + + /* + Let materialization cost include the cost to write the data into the + temporary table: + */ + sjm->materialization_cost.add_io(subjoin_out_rows, write_cost); + + /* + Set the cost to do a full scan of the temptable (will need this to + consider doing sjm-scan): + */ + sjm->scan_cost.reset(); + sjm->scan_cost.add_io(sjm->rows, lookup_cost); + + sjm->lookup_cost.convert_from_cost(lookup_cost); + sj_nest->sj_mat_info= sjm; + DBUG_EXECUTE("opt", print_sjm(sjm);); + } + } + } + join->emb_sjm_nest= NULL; + DBUG_RETURN(FALSE); +} + + +/* + Get estimated record length for semi-join materialization temptable + + SYNOPSIS + get_tmp_table_rec_length() + items IN subquery's select list. + + DESCRIPTION + Calculate estimated record length for semi-join materialization + temptable. It's an estimate because we don't follow every bit of + create_tmp_table()'s logic. This isn't necessary as the return value of + this function is used only for cost calculations. + + RETURN + Length of the temptable record, in bytes +*/ + +static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) +{ + uint len= 0; + Item *item; + //List_iterator it(items); + for (uint i= 0; i < elements ; i++) + { + item = p_items[i]; + switch (item->result_type()) { + case REAL_RESULT: + len += sizeof(double); + break; + case INT_RESULT: + if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1)) + len += 8; + else + len += 4; + break; + case STRING_RESULT: + enum enum_field_types type; + /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */ + if ((type= item->field_type()) == MYSQL_TYPE_DATETIME || + type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE || + type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY) + len += 8; + else + len += item->max_length; + break; + case DECIMAL_RESULT: + len += 10; + break; + case ROW_RESULT: + default: + DBUG_ASSERT(0); /* purecov: deadcode */ + break; + } + } + return len; +} + + +/** + The cost of a lookup into a unique hash/btree index on a temporary table + with 'row_count' rows each of size 'row_size'. + + @param thd current query context + @param row_count number of rows in the temp table + @param row_size average size in bytes of the rows + + @return the cost of one lookup +*/ + +double +get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size) +{ + if (row_count > thd->variables.max_heap_table_size / (double) row_size) + return (double) DISK_TEMPTABLE_LOOKUP_COST; + else + return (double) HEAP_TEMPTABLE_LOOKUP_COST; +} + +/** + The cost of writing a row into a temporary table with 'row_count' unique + rows each of size 'row_size'. + + @param thd current query context + @param row_count number of rows in the temp table + @param row_size average size in bytes of the rows + + @return the cost of writing one row +*/ + +double +get_tmp_table_write_cost(THD *thd, double row_count, uint row_size) +{ + double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size); + /* + TODO: + This is an optimistic estimate. Add additional costs resulting from + actually writing the row to memory/disk and possible index reorganization. + */ + return lookup_cost; +} + + +/* + Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate + + SYNOPSIS + find_eq_ref_candidate() + table Table to be checked + sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't + count. + + DESCRIPTION + Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate + + TODO + Check again if it is feasible to factor common parts with constant table + search + + Also check if it's feasible to factor common parts with table elimination + + RETURN + TRUE - There exists an eq_ref(outer-tables) candidate + FALSE - Otherwise +*/ + +bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) +{ + KEYUSE *keyuse= table->reginfo.join_tab->keyuse; + + if (keyuse) + { + do + { + uint key= keyuse->key; + key_part_map bound_parts= 0; + if (!keyuse->is_for_hash_join() && + (table->key_info[key].flags & HA_NOSAME)) + { + KEY *keyinfo= table->key_info + key; + do /* For all equalities on all key parts */ + { + /* + Check if this is "t.keypart = expr(outer_tables) + + Don't allow variants that can produce duplicates: + - Dont allow "ref or null" + - the keyuse (that is, the operation) must be null-rejecting, + unless the other expression is non-NULLable. + */ + if (!(keyuse->used_tables & sj_inner_tables) && + !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && + (keyuse->null_rejecting || !keyuse->val->maybe_null())) + { + bound_parts |= 1 << keyuse->keypart; + } + keyuse++; + } while (keyuse->key == key && keyuse->table == table); + + if (bound_parts == PREV_BITS(uint, keyinfo->user_defined_key_parts)) + return TRUE; + } + else + { + do + { + keyuse++; + } while (keyuse->key == key && keyuse->table == table); + } + } while (keyuse->table == table); + } + return FALSE; +} + + +/* + Do semi-join optimization step after we've added a new tab to join prefix + + SYNOPSIS + optimize_semi_joins() + join The join we're optimizing + remaining_tables Tables not in the join prefix + new_join_tab Join tab we've just added to the join prefix + idx Index of this join tab (i.e. number of tables + in the prefix minus one) + current_record_count INOUT Estimate of #records in join prefix's output + current_read_time INOUT Cost to execute the join prefix + loose_scan_pos IN A POSITION with LooseScan plan to access + table new_join_tab + (produced by the last best_access_path call) + + DESCRIPTION + Update semi-join optimization state after we've added another tab (table + and access method) to the join prefix. + + The state is maintained in join->positions[#prefix_size]. Each of the + available strategies has its own state variables. + + for each semi-join strategy + { + update strategy's state variables; + + if (join prefix has all the tables that are needed to consider + using this strategy for the semi-join(s)) + { + calculate cost of using the strategy + if ((this is the first strategy to handle the semi-join nest(s) || + the cost is less than other strategies)) + { + // Pick this strategy + pos->sj_strategy= .. + .. + } + } + + Most of the new state is saved join->positions[idx] (and hence no undo + is necessary). Several members of class JOIN are updated also, these + changes can be rolled back with restore_prev_sj_state(). + + See setup_semijoin_dups_elimination() for a description of what kinds of + join prefixes each strategy can handle. +*/ + +bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map inner_tables) +{ + for (int i= (int)idx; i >= 0; i--) + { + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= prefix[i].table->emb_sj_nest)) + { + if (inner_tables & emb_sj_nest->sj_inner_tables) + return !MY_TEST(inner_tables == (emb_sj_nest->sj_inner_tables & + ~join->const_table_map)); + } + } + return FALSE; +} + + +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos) +{ + POSITION *pos= join->positions + idx; + const JOIN_TAB *new_join_tab= pos->table; + +#ifdef HAVE_valgrind + new (&pos->firstmatch_picker) Firstmatch_picker; + new (&pos->loosescan_picker) LooseScan_picker; + new (&pos->sjmat_picker) Sj_materialization_picker; + new (&pos->dups_weedout_picker) Duplicate_weedout_picker; +#endif + + if (join->emb_sjm_nest || //(1) + !join->select_lex->have_merged_subqueries) //(2) + { + /* + (1): We're performing optimization inside SJ-Materialization nest: + - there are no other semi-joins inside semi-join nests + - attempts to build semi-join strategies here will confuse + the optimizer, so bail out. + (2): Don't waste time on semi-join optimizations if we don't have any + semi-joins + */ + pos->sj_strategy= SJ_OPT_NONE; + return; + } + + Semi_join_strategy_picker *pickers[]= + { + &pos->firstmatch_picker, + &pos->loosescan_picker, + &pos->sjmat_picker, + &pos->dups_weedout_picker, + NULL, + }; + Json_writer_array trace_steps(join->thd, "semijoin_strategy_choice"); + /* + Update join->cur_sj_inner_tables (Used by FirstMatch in this function and + LooseScan detector in best_access_path) + */ + remaining_tables &= ~new_join_tab->table->map; + table_map dups_producing_tables, UNINIT_VAR(prev_dups_producing_tables), + UNINIT_VAR(prev_sjm_lookup_tables); + + if (idx == join->const_tables) + dups_producing_tables= 0; + else + dups_producing_tables= pos[-1].dups_producing_tables; + + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= new_join_tab->emb_sj_nest)) + dups_producing_tables |= emb_sj_nest->sj_inner_tables; + + Semi_join_strategy_picker **strategy, **prev_strategy= 0; + if (idx == join->const_tables) + { + /* First table, initialize pickers */ + for (strategy= pickers; *strategy != NULL; strategy++) + (*strategy)->set_empty(); + pos->inner_tables_handled_with_other_sjs= 0; + } + else + { + for (strategy= pickers; *strategy != NULL; strategy++) + { + (*strategy)->set_from_prev(pos - 1); + } + pos->inner_tables_handled_with_other_sjs= + pos[-1].inner_tables_handled_with_other_sjs; + } + + pos->prefix_cost= *current_read_time; + pos->prefix_record_count= *current_record_count; + + { + pos->sj_strategy= SJ_OPT_NONE; + + for (strategy= pickers; *strategy != NULL; strategy++) + { + table_map handled_fanout; + sj_strategy_enum sj_strategy; + double rec_count= *current_record_count; + double read_time= *current_read_time; + if ((*strategy)->check_qep(join, idx, remaining_tables, + new_join_tab, + &rec_count, + &read_time, + &handled_fanout, + &sj_strategy, + loose_scan_pos)) + { + /* + It's possible to use the strategy. Use it, if + - it removes semi-join fanout that was not removed before + - using it is cheaper than using something else, + and {if some other strategy has removed fanout + that this strategy is trying to remove, then it + did remove the fanout only for one semi-join} + This is to avoid a situation when + 1. strategy X removes fanout for semijoin X,Y + 2. using strategy Z is cheaper, but it only removes + fanout from semijoin X. + 3. We have no clue what to do about fanount of semi-join Y. + */ + if ((dups_producing_tables & handled_fanout) || + (read_time < *current_read_time && + !(handled_fanout & pos->inner_tables_handled_with_other_sjs))) + { + DBUG_ASSERT(pos->sj_strategy != sj_strategy); + /* + If the strategy chosen first time or + the strategy replace strategy which was used to exectly the same + tables + */ + if (pos->sj_strategy == SJ_OPT_NONE || + handled_fanout == + (prev_dups_producing_tables ^ dups_producing_tables)) + { + prev_strategy= strategy; + if (pos->sj_strategy == SJ_OPT_NONE) + { + prev_dups_producing_tables= dups_producing_tables; + prev_sjm_lookup_tables= join->sjm_lookup_tables; + } + /* Mark strategy as used */ + (*strategy)->mark_used(); + pos->sj_strategy= sj_strategy; + if (sj_strategy == SJ_OPT_MATERIALIZE) + join->sjm_lookup_tables |= handled_fanout; + else + join->sjm_lookup_tables &= ~handled_fanout; + *current_read_time= read_time; + *current_record_count= rec_count; + dups_producing_tables &= ~handled_fanout; + + //TODO: update bitmap of semi-joins that were handled together with + // others. + if (is_multiple_semi_joins(join, join->positions, idx, + handled_fanout)) + pos->inner_tables_handled_with_other_sjs |= handled_fanout; + } + else + { + /* Conflict fall to most general variant */ + (*prev_strategy)->set_empty(); + dups_producing_tables= prev_dups_producing_tables; + join->sjm_lookup_tables= prev_sjm_lookup_tables; + // mark it 'none' to avpoid loops + pos->sj_strategy= SJ_OPT_NONE; + // next skip to last; + strategy= pickers + + (sizeof(pickers)/sizeof(Semi_join_strategy_picker*) - 3); + continue; + } + } + else + { + /* We decided not to apply the strategy. */ + (*strategy)->set_empty(); + } + } + } + + if (unlikely(join->thd->trace_started() && pos->sj_strategy != SJ_OPT_NONE)) + { + Json_writer_object tr(join->thd); + const char *sname; + switch (pos->sj_strategy) { + case SJ_OPT_MATERIALIZE: + sname= "SJ-Materialization"; + break; + case SJ_OPT_MATERIALIZE_SCAN: + sname= "SJ-Materialization-Scan"; + break; + case SJ_OPT_FIRST_MATCH: + sname= "FirstMatch"; + break; + case SJ_OPT_DUPS_WEEDOUT: + sname= "DuplicateWeedout"; + break; + case SJ_OPT_LOOSE_SCAN: + sname= "LooseScan"; + break; + default: + DBUG_ASSERT(0); + sname="Invalid"; + } + tr.add("chosen_strategy", sname); + } + } + + update_sj_state(join, new_join_tab, idx, remaining_tables); + + pos->prefix_cost= *current_read_time; + pos->prefix_record_count= *current_record_count; + pos->dups_producing_tables= dups_producing_tables; +} + + +/* + Update JOIN's semi-join optimization state after the join tab new_tab + has been added into the join prefix. + + @seealso restore_prev_sj_state() does the reverse actoion +*/ + +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables) +{ + DBUG_ASSERT(!join->emb_sjm_nest); + if (TABLE_LIST *emb_sj_nest= new_tab->emb_sj_nest) + { + join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + + /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ + if (!(remaining_tables & + emb_sj_nest->sj_inner_tables & ~new_tab->table->map)) + join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + } +#ifndef DBUG_OFF + join->dbug_verify_sj_inner_tables(idx + 1); +#endif +} + + +void Sj_materialization_picker::set_from_prev(POSITION *prev) +{ + if (prev->sjmat_picker.is_used) + set_empty(); + else + { + sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables; + sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner; + } + is_used= FALSE; +} + + +bool Sj_materialization_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ + bool sjm_scan; + SJ_MATERIALIZATION_INFO *mat_info; + THD *thd= join->thd; + if ((mat_info= at_sjmat_pos(join, remaining_tables, + new_join_tab, idx, &sjm_scan))) + { + if (sjm_scan) + { + /* + We can't yet evaluate this option yet. This is because we can't + accout for fanout of sj-inner tables yet: + + ntX SJM-SCAN(it1 ... itN) | ot1 ... otN | + ^(1) ^(2) + + we're now at position (1). SJM temptable in general has multiple + records, so at point (1) we'll get the fanout from sj-inner tables (ie + there will be multiple record combinations). + + The final join result will not contain any semi-join produced + fanout, i.e. tables within SJM-SCAN(...) will not contribute to + the cardinality of the join output. Extra fanout produced by + SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN. + + The simple way to model this is to remove SJM-SCAN(...) fanout once + we reach the point #2. + */ + sjm_scan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + sjm_scan_last_inner= idx; + } + else + { + /* This is SJ-Materialization with lookups */ + double prefix_cost; + signed int first_tab= (int)idx - mat_info->tables; + double prefix_rec_count, mat_read_time; + Json_writer_object trace(join->thd); + trace.add("strategy", "SJ-Materialization"); + + if (first_tab < (int)join->const_tables) + { + prefix_cost= 0; + prefix_rec_count= 1.0; + } + else + { + prefix_cost= join->positions[first_tab].prefix_cost; + prefix_rec_count= join->positions[first_tab].prefix_record_count; + } + + mat_read_time= + COST_ADD(prefix_cost, + COST_ADD(mat_info->materialization_cost.total_cost(), + COST_MULT(prefix_rec_count, + mat_info->lookup_cost.total_cost()))); + + /* + NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION + elements to join->positions as that makes it hard to return things + back when making one step back in join optimization. That's done + after the QEP has been chosen. + */ + *read_time= mat_read_time; + *record_count= prefix_rec_count; + *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables; + *strategy= SJ_OPT_MATERIALIZE; + if (unlikely(trace.trace_started())) + { + trace.add("records", *record_count); + trace.add("read_time", *read_time); + } + return TRUE; + } + } + + /* 4.A SJM-Scan second phase check */ + if (sjm_scan_need_tables && /* Have SJM-Scan prefix */ + !(sjm_scan_need_tables & remaining_tables)) + { + Json_writer_object trace(join->thd); + trace.add("strategy", "SJ-Materialization-Scan"); + TABLE_LIST *mat_nest= + join->positions[sjm_scan_last_inner].table->emb_sj_nest; + SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info; + + double prefix_cost; + double prefix_rec_count; + int first_tab= sjm_scan_last_inner + 1 - mat_info->tables; + /* Get the prefix cost */ + if (first_tab == (int)join->const_tables) + { + prefix_rec_count= 1.0; + prefix_cost= 0.0; + } + else + { + prefix_cost= join->positions[first_tab - 1].prefix_cost; + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + } + + /* Add materialization cost */ + prefix_cost= + COST_ADD(prefix_cost, + COST_ADD(mat_info->materialization_cost.total_cost(), + COST_MULT(prefix_rec_count, + mat_info->scan_cost.total_cost()))); + prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows); + + uint i; + table_map rem_tables= remaining_tables; + for (i= idx; i != (first_tab + mat_info->tables - 1); i--) + rem_tables |= join->positions[i].table->table->map; + + POSITION curpos, dummy; + /* Need to re-run best-access-path as we prefix_rec_count has changed */ + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); + Json_writer_temp_disable trace_semijoin_mat_scan(thd); + for (i= first_tab + mat_info->tables; i <= idx; i++) + { + best_access_path(join, join->positions[i].table, rem_tables, + join->positions, i, + disable_jbuf, prefix_rec_count, &curpos, &dummy); + prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read); + prefix_cost= COST_ADD(prefix_cost, curpos.read_time); + prefix_cost= COST_ADD(prefix_cost, + prefix_rec_count / TIME_FOR_COMPARE); + //TODO: take into account join condition selectivity here + } + + *strategy= SJ_OPT_MATERIALIZE_SCAN; + *read_time= prefix_cost; + /* + Note: the next line means we did not remove the subquery's fanout from + *record_count. It needs to be removed, as the join prefix is + + ntX SJM-SCAN(it1 ... itN) | (ot1 ... otN) ... + + here, the SJM-SCAN may have introduced subquery's fanout (duplicate rows, + rows that don't have matches in ot1_i). All this fanout is gone after + table otN (or earlier) but taking it into account is hard. + + Some consolation here is that SJM-Scan strategy is applicable when the + subquery is smaller than tables otX. If the subquery has large cardinality, + we can greatly overestimate *record_count here, but it doesn't matter as + SJ-Materialization-Lookup is a better strategy anyway. + */ + *record_count= prefix_rec_count; + *handled_fanout= mat_nest->sj_inner_tables; + if (unlikely(trace.trace_started())) + { + trace.add("records", *record_count); + trace.add("read_time", *read_time); + } + return TRUE; + } + return FALSE; +} + + +void LooseScan_picker::set_from_prev(POSITION *prev) +{ + if (prev->loosescan_picker.is_used) + set_empty(); + else + { + first_loosescan_table= prev->loosescan_picker.first_loosescan_table; + loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables; + } + is_used= FALSE; +} + + +bool LooseScan_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ + POSITION *first= join->positions + first_loosescan_table; + /* + LooseScan strategy can't handle interleaving between tables from the + semi-join that LooseScan is handling and any other tables. + + If we were considering LooseScan for the join prefix (1) + and the table we're adding creates an interleaving (2) + then + stop considering loose scan + */ + if ((first_loosescan_table != MAX_TABLES) && // (1) + (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) + new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) + { + first_loosescan_table= MAX_TABLES; + } + + /* + If we got an option to use LooseScan for the current table, start + considering using LooseScan strategy + */ + if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) + { + first_loosescan_table= idx; + loosescan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + } + + if ((first_loosescan_table != MAX_TABLES) && + !(remaining_tables & loosescan_need_tables) && + (new_join_tab->table->map & loosescan_need_tables)) + { + Json_writer_object trace(join->thd); + trace.add("strategy", "LooseScan"); + /* + Ok we have LooseScan plan and also have all LooseScan sj-nest's + inner tables and outer correlated tables into the prefix. + */ + + first= join->positions + first_loosescan_table; + uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); + /* Got a complete LooseScan range. Calculate its cost */ + /* + The same problem as with FirstMatch - we need to save POSITIONs + somewhere but reserving space for all cases would require too + much space. We will re-calculate POSITION structures later on. + */ + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); + optimize_wo_join_buffering(join, first_loosescan_table, idx, + remaining_tables, + TRUE, //first_alt + disable_jbuf ? join->table_count : + first_loosescan_table + n_tables, + record_count, + read_time); + /* + We don't yet have any other strategies that could handle this + semi-join nest (the other options are Duplicate Elimination or + Materialization, which need at least the same set of tables in + the join prefix to be considered) so unconditionally pick the + LooseScan. + */ + *strategy= SJ_OPT_LOOSE_SCAN; + *handled_fanout= first->table->emb_sj_nest->sj_inner_tables; + if (unlikely(trace.trace_started())) + { + trace.add("records", *record_count); + trace.add("read_time", *read_time); + } + return TRUE; + } + return FALSE; +} + +void Firstmatch_picker::set_from_prev(POSITION *prev) +{ + if (prev->firstmatch_picker.is_used) + invalidate_firstmatch_prefix(); + else + { + first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table; + first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl; + firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables; + } + is_used= FALSE; +} + +bool Firstmatch_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ + if (new_join_tab->emb_sj_nest && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && + !join->outer_join) + { + const table_map outer_corr_tables= + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on; + const table_map sj_inner_tables= + new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; + + /* + Enter condition: + 1. The next join tab belongs to semi-join nest + (verified for the encompassing code block above). + 2. We're not in a duplicate producer range yet + 3. All outer tables that + - the subquery is correlated with, or + - referred to from the outer_expr + are in the join prefix + 4. All inner tables are still part of remaining_tables. + */ + if (!join->cur_sj_inner_tables && // (2) + !(remaining_tables & outer_corr_tables) && // (3) + (sj_inner_tables == // (4) + ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) + { + /* Start tracking potential FirstMatch range */ + first_firstmatch_table= idx; + firstmatch_need_tables= sj_inner_tables; + first_firstmatch_rtbl= remaining_tables; + } + + if (in_firstmatch_prefix()) + { + if (outer_corr_tables & first_firstmatch_rtbl) + { + /* + Trying to add an sj-inner table whose sj-nest has an outer correlated + table that was not in the prefix. This means FirstMatch can't be used. + */ + invalidate_firstmatch_prefix(); + } + else + { + /* Record that we need all of this semi-join's inner tables, too */ + firstmatch_need_tables|= sj_inner_tables; + } + + if (in_firstmatch_prefix() && + !(firstmatch_need_tables & remaining_tables)) + { + Json_writer_object trace(join->thd); + trace.add("strategy", "FirstMatch"); + /* + Got a complete FirstMatch range. Calculate correct costs and fanout + */ + + if (idx == first_firstmatch_table && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE)) + { + /* + An important special case: only one inner table, and @@optimizer_switch + allows join buffering. + - read_time is the same (i.e. FirstMatch doesn't add any cost + - remove fanout added by the last table + */ + if (*record_count) + *record_count /= join->positions[idx].records_read; + } + else + { + optimize_wo_join_buffering(join, first_firstmatch_table, idx, + remaining_tables, FALSE, idx, + record_count, + read_time); + } + /* + We ought to save the alternate POSITIONs produced by + optimize_wo_join_buffering but the problem is that providing save + space uses too much space. Instead, we will re-calculate the + alternate POSITIONs after we've picked the best QEP. + */ + *handled_fanout= firstmatch_need_tables; + /* *record_count and *read_time were set by the above call */ + *strategy= SJ_OPT_FIRST_MATCH; + if (unlikely(trace.trace_started())) + { + trace.add("records", *record_count); + trace.add("read_time", *read_time); + } + return TRUE; + } + } + } + else + invalidate_firstmatch_prefix(); + return FALSE; +} + + +void Duplicate_weedout_picker::set_from_prev(POSITION *prev) +{ + if (prev->dups_weedout_picker.is_used) + set_empty(); + else + { + dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables; + first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table; + } + is_used= FALSE; +} + + +bool Duplicate_weedout_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos + ) +{ + TABLE_LIST *nest; + if ((nest= new_join_tab->emb_sj_nest)) + { + if (!dupsweedout_tables) + first_dupsweedout_table= idx; + + dupsweedout_tables |= nest->sj_inner_tables | + nest->nested_join->sj_depends_on | + nest->nested_join->sj_corr_tables; + } + + if (dupsweedout_tables) + { + /* we're in the process of constructing a DuplicateWeedout range */ + TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; + /* and we've entered an inner side of an outer join*/ + if (emb && emb->on_expr) + dupsweedout_tables |= emb->nested_join->used_tables; + } + + /* If this is the last table that we need for DuplicateWeedout range */ + if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map & + dupsweedout_tables)) + { + /* + Ok, reached a state where we could put a dups weedout point. + Walk back and calculate + - the join cost (this is needed as the accumulated cost may assume + some other duplicate elimination method) + - extra fanout that will be removed by duplicate elimination + - duplicate elimination cost + There are two cases: + 1. We have other strategy/ies to remove all of the duplicates. + 2. We don't. + + We need to calculate the cost in case #2 also because we need to make + choice between this join order and others. + */ + uint first_tab= first_dupsweedout_table; + double dups_cost; + double prefix_rec_count; + double sj_inner_fanout= 1.0; + double sj_outer_fanout= 1.0; + uint temptable_rec_size; + Json_writer_object trace(join->thd); + trace.add("strategy", "DuplicateWeedout"); + + if (first_tab == join->const_tables) + { + prefix_rec_count= 1.0; + temptable_rec_size= 0; + dups_cost= 0.0; + } + else + { + dups_cost= join->positions[first_tab - 1].prefix_cost; + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + temptable_rec_size= 8; /* This is not true but we'll make it so */ + } + + table_map dups_removed_fanout= 0; + double current_fanout= prefix_rec_count; + for (uint j= first_dupsweedout_table; j <= idx; j++) + { + POSITION *p= join->positions + j; + current_fanout= COST_MULT(current_fanout, p->records_read); + dups_cost= COST_ADD(dups_cost, + COST_ADD(p->read_time, + current_fanout / TIME_FOR_COMPARE)); + if (p->table->emb_sj_nest) + { + sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_read); + dups_removed_fanout |= p->table->table->map; + } + else + { + /* Ensure that table supports comparable rowids */ + DBUG_ASSERT(!(p->table->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)); + + sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_read); + temptable_rec_size += p->table->table->file->ref_length; + } + } + + /* + Add the cost of temptable use. The table will have sj_outer_fanout + records, and we will make + - sj_outer_fanout table writes + - sj_inner_fanout*sj_outer_fanout lookups. + + */ + double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + double one_write_cost= get_tmp_table_write_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + + double write_cost= COST_MULT(join->positions[first_tab].prefix_record_count, + sj_outer_fanout * one_write_cost); + double full_lookup_cost= + COST_MULT(join->positions[first_tab].prefix_record_count, + COST_MULT(sj_outer_fanout, + sj_inner_fanout * one_lookup_cost)); + dups_cost= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost)); + + *read_time= dups_cost; + *record_count= prefix_rec_count * sj_outer_fanout; + *handled_fanout= dups_removed_fanout; + *strategy= SJ_OPT_DUPS_WEEDOUT; + if (unlikely(trace.trace_started())) + { + trace.add("records", *record_count); + trace.add("read_time", *read_time); + } + return TRUE; + } + return FALSE; +} + +#ifndef DBUG_OFF +/* + Verify the value of JOIN::cur_sj_inner_tables by recomputing it +*/ +void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const +{ + table_map cur_map= const_table_map; + table_map nests_entered= 0; + if (emb_sjm_nest) + { + DBUG_ASSERT(cur_sj_inner_tables == 0); + return; + } + + for (uint i= const_tables; i < prefix_size; i++) + { + JOIN_TAB *tab= positions[i].table; + cur_map |= tab->table->map; + if (TABLE_LIST *sj_nest= tab->emb_sj_nest) + { + nests_entered |= sj_nest->sj_inner_tables; + if (!(sj_nest->sj_inner_tables & ~cur_map)) + { + // all nest tables are in the prefix already + nests_entered &= ~sj_nest->sj_inner_tables; + } + } + } + DBUG_ASSERT(nests_entered == cur_sj_inner_tables); +} +#endif + +/* + Remove the last join tab from from join->cur_sj_inner_tables bitmap + + @note + remaining_tables contains @tab. + + @seealso update_sj_state() does the reverse +*/ + +void restore_prev_sj_state(const table_map remaining_tables, + const JOIN_TAB *tab, uint idx) +{ + TABLE_LIST *emb_sj_nest; + + if (tab->emb_sj_nest) + { + table_map subq_tables= tab->emb_sj_nest->sj_inner_tables; + tab->join->sjm_lookup_tables &= ~subq_tables; + } + + if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest)) + { + table_map subq_tables= emb_sj_nest->sj_inner_tables & + ~tab->join->const_table_map; + /* If we're removing the last SJ-inner table, remove the sj-nest */ + if ((remaining_tables & subq_tables) == subq_tables) + { + // All non-const tables of the SJ nest are in the remaining_tables. + // we are not in the nest anymore. + tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + } + else + { + // Semi-join nest has: + // - a table being removed (not in the prefix) + // - some tables in the prefix. + tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + } + } + +#ifndef DBUG_OFF + /* positions[idx] has been removed. Verify the state for [0...idx-1] */ + tab->join->dbug_verify_sj_inner_tables(idx); +#endif +} + + +/* + Given a semi-join nest, find out which of the IN-equalities are bound + + SYNOPSIS + get_bound_sj_equalities() + sj_nest Semi-join nest + remaining_tables Tables that are not yet bound + + DESCRIPTION + Given a semi-join nest, find out which of the IN-equalities have their + left part expression bound (i.e. the said expression doesn't refer to + any of remaining_tables and can be evaluated). + + RETURN + Bitmap of bound IN-equalities. +*/ + +ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, + table_map remaining_tables) +{ + List_iterator li(sj_nest->nested_join->sj_outer_expr_list); + Item **item; + uint i= 0; + ulonglong res= 0; + while ((item= li++)) + { + /* + Q: should this take into account equality propagation and how? + A: If e->outer_side is an Item_field, walk over the equality + class and see if there is an element that is bound? + (this is an optional feature) + */ + if (!(item[0]->used_tables() & remaining_tables)) + { + res |= 1ULL << i; + } + i++; + } + return res; +} + + +/* + Check if the last tables of the partial join order allow to use + sj-materialization strategy for them + + SYNOPSIS + at_sjmat_pos() + join + remaining_tables + tab the last table's join tab + idx last table's index + loose_scan OUT TRUE <=> use LooseScan + + RETURN + TRUE Yes, can apply sj-materialization + FALSE No, some of the requirements are not met +*/ + +static SJ_MATERIALIZATION_INFO * +at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, + uint idx, bool *loose_scan) +{ + /* + Check if + 1. We're in a semi-join nest that can be run with SJ-materialization + 2. All the tables correlated through the IN subquery are in the prefix + */ + TABLE_LIST *emb_sj_nest= tab->emb_sj_nest; + table_map suffix= remaining_tables & ~tab->table->map; + if (emb_sj_nest && emb_sj_nest->sj_mat_info && + !(suffix & emb_sj_nest->sj_inner_tables)) + { + /* + Walk back and check if all immediately preceding tables are from + this semi-join. + */ + uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables); + for (uint i= 1; i < n_tables ; i++) + { + if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest) + return NULL; + } + *loose_scan= MY_TEST(remaining_tables & ~tab->table->map & + (emb_sj_nest->sj_inner_tables | + emb_sj_nest->nested_join->sj_depends_on)); + if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed) + return NULL; + else + return emb_sj_nest->sj_mat_info; + } + return NULL; +} + + +/* + Re-calculate values of join->best_positions[start..end].prefix_record_count +*/ + +static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) +{ + for (uint j= start; j < end ;j++) + { + double prefix_count; + if (j == join->const_tables) + prefix_count= 1.0; + else + prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count, + join->best_positions[j-1].records_read); + + join->best_positions[j].prefix_record_count= prefix_count; + } +} + + +/* + Fix semi-join strategies for the picked join order + + SYNOPSIS + fix_semijoin_strategies_for_picked_join_order() + join The join with the picked join order + + DESCRIPTION + Fix semi-join strategies for the picked join order. This is a step that + needs to be done right after we have fixed the join order. What we do + here is switch join's semi-join strategy description from backward-based + to forwards based. + + When join optimization is in progress, we re-consider semi-join + strategies after we've added another table. Here's an illustration. + Suppose the join optimization is underway: + + 1) ot1 it1 it2 + sjX -- looking at (ot1, it1, it2) join prefix, we decide + to use semi-join strategy sjX. + + 2) ot1 it1 it2 ot2 + sjX sjY -- Having added table ot2, we now may consider + another semi-join strategy and decide to use a + different strategy sjY. Note that the record + of sjX has remained under it2. That is + necessary because we need to be able to get + back to (ot1, it1, it2) join prefix. + what makes things even worse is that there are cases where the choice + of sjY changes the way we should access it2. + + 3) [ot1 it1 it2 ot2 ot3] + sjX sjY -- This means that after join optimization is + finished, semi-join info should be read + right-to-left (while nearly all plan refinement + functions, EXPLAIN, etc proceed from left to + right) + + This function does the needed reversal, making it possible to read the + join and semi-join order from left to right. +*/ + +void fix_semijoin_strategies_for_picked_join_order(JOIN *join) +{ + join->sjm_lookup_tables= 0; + join->sjm_scan_tables= 0; + if (!join->select_lex->sj_nests.elements) + return; + + THD *thd= join->thd; + uint table_count=join->table_count; + uint tablenr; + table_map remaining_tables= 0; + table_map handled_tabs= 0; + Json_writer_object trace_wrapper(thd); + Json_writer_array trace_semijoin_strategies(thd, + "fix_semijoin_strategies_for_picked_join_order"); + + for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--) + { + POSITION *pos= join->best_positions + tablenr; + JOIN_TAB *s= pos->table; + uint UNINIT_VAR(first); // Set by every branch except SJ_OPT_NONE which doesn't use it + + if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE) + { + remaining_tables |= s->table->map; + continue; + } + + if (pos->sj_strategy == SJ_OPT_MATERIALIZE) + { + SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info; + sjm->is_used= TRUE; + sjm->is_sj_scan= FALSE; + memcpy((uchar*) (pos - sjm->tables + 1), (uchar*) sjm->positions, + sizeof(POSITION) * sjm->tables); + recalculate_prefix_record_count(join, tablenr - sjm->tables + 1, + tablenr); + first= tablenr - sjm->tables + 1; + join->best_positions[first].n_sj_tables= sjm->tables; + join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE; + Json_writer_object semijoin_strategy(thd); + semijoin_strategy.add("semi_join_strategy","SJ-Materialization"); + Json_writer_array semijoin_plan(thd, "join_order"); + for (uint i= first; i < first+ sjm->tables; i++) + { + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_one_table(thd); + trace_one_table.add_table_name(join->best_positions[i].table); + } + join->sjm_lookup_tables |= join->best_positions[i].table->table->map; + } + } + else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner; + SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info; + sjm->is_used= TRUE; + sjm->is_sj_scan= TRUE; + first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1; + memcpy((uchar*) (join->best_positions + first), + (uchar*) sjm->positions, sizeof(POSITION) * sjm->tables); + recalculate_prefix_record_count(join, first, first + sjm->tables); + join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; + join->best_positions[first].n_sj_tables= sjm->tables; + /* + Do what optimize_semi_joins did: re-run best_access_path for every + table in the [last_inner_table + 1; pos..) range + */ + double prefix_rec_count; + /* Get the prefix record count */ + if (first == join->const_tables) + prefix_rec_count= 1.0; + else + prefix_rec_count= join->best_positions[first-1].prefix_record_count; + + /* Add materialization record count*/ + prefix_rec_count *= sjm->rows; + + uint i; + table_map rem_tables= remaining_tables; + for (i= tablenr; i != (first + sjm->tables - 1); i--) + rem_tables |= join->best_positions[i].table->table->map; + + for (i= first; i < first+ sjm->tables; i++) + join->sjm_scan_tables |= join->best_positions[i].table->table->map; + + POSITION dummy; + join->cur_sj_inner_tables= 0; + Json_writer_object semijoin_strategy(thd); + semijoin_strategy.add("semi_join_strategy","SJ-Materialization-Scan"); + Json_writer_array semijoin_plan(thd, "join_order"); + for (i= first + sjm->tables; i <= tablenr; i++) + { + Json_writer_object trace_one_table(thd); + if (unlikely(thd->trace_started())) + { + trace_one_table.add_table_name(join->best_positions[i].table); + } + best_access_path(join, join->best_positions[i].table, rem_tables, + join->best_positions, i, + FALSE, prefix_rec_count, + join->best_positions + i, &dummy); + prefix_rec_count *= join->best_positions[i].records_read; + rem_tables &= ~join->best_positions[i].table->table->map; + } + } + + if (pos->sj_strategy == SJ_OPT_FIRST_MATCH) + { + first= pos->firstmatch_picker.first_firstmatch_table; + join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH; + join->best_positions[first].n_sj_tables= tablenr - first + 1; + POSITION dummy; // For loose scan paths + double record_count= (first== join->const_tables)? 1.0: + join->best_positions[tablenr - 1].prefix_record_count; + + table_map rem_tables= remaining_tables; + uint idx; + for (idx= first; idx <= tablenr; idx++) + { + rem_tables |= join->best_positions[idx].table->table->map; + } + /* + Re-run best_access_path to produce best access methods that do not use + join buffering + */ + join->cur_sj_inner_tables= 0; + Json_writer_object semijoin_strategy(thd); + semijoin_strategy.add("semi_join_strategy","FirstMatch"); + Json_writer_array semijoin_plan(thd, "join_order"); + for (idx= first; idx <= tablenr; idx++) + { + Json_writer_object trace_one_table(thd); + if (unlikely(thd->trace_started())) + { + trace_one_table.add_table_name(join->best_positions[idx].table); + } + if (join->best_positions[idx].use_join_buffer) + { + best_access_path(join, join->best_positions[idx].table, + rem_tables, join->best_positions, idx, + TRUE /* no jbuf */, + record_count, join->best_positions + idx, &dummy); + } + record_count *= join->best_positions[idx].records_read; + rem_tables &= ~join->best_positions[idx].table->table->map; + } + } + + if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) + { + first= pos->loosescan_picker.first_loosescan_table; + POSITION *first_pos= join->best_positions + first; + POSITION loose_scan_pos; // For loose scan paths + double record_count= (first== join->const_tables)? 1.0: + join->best_positions[tablenr - 1].prefix_record_count; + + table_map rem_tables= remaining_tables; + uint idx; + for (idx= first; idx <= tablenr; idx++) + rem_tables |= join->best_positions[idx].table->table->map; + /* + Re-run best_access_path to produce best access methods that do not use + join buffering + */ + join->cur_sj_inner_tables= 0; + Json_writer_object semijoin_strategy(thd); + semijoin_strategy.add("semi_join_strategy","LooseScan"); + Json_writer_array semijoin_plan(thd, "join_order"); + for (idx= first; idx <= tablenr; idx++) + { + Json_writer_object trace_one_table(thd); + if (unlikely(thd->trace_started())) + { + trace_one_table.add_table_name(join->best_positions[idx].table); + } + if (join->best_positions[idx].use_join_buffer || (idx == first)) + { + best_access_path(join, join->best_positions[idx].table, + rem_tables, join->best_positions, idx, + TRUE /* no jbuf */, + record_count, join->best_positions + idx, + &loose_scan_pos); + if (idx==first) + { + join->best_positions[idx]= loose_scan_pos; + /* + If LooseScan is based on ref access (including the "degenerate" + one with 0 key parts), we should use full index scan. + + Unfortunately, lots of code assumes that if tab->type==JT_ALL && + tab->quick!=NULL, then quick select should be used. The only + simple way to fix this is to remove the quick select: + */ + if (join->best_positions[idx].key) + { + delete join->best_positions[idx].table->quick; + join->best_positions[idx].table->quick= NULL; + } + } + } + rem_tables &= ~join->best_positions[idx].table->table->map; + record_count *= join->best_positions[idx].records_read; + } + first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN; + first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables); + } + + if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) + { + Json_writer_object semijoin_strategy(thd); + semijoin_strategy.add("semi_join_strategy","DuplicateWeedout"); + /* + Duplicate Weedout starting at pos->first_dupsweedout_table, ending at + this table. + */ + first= pos->dups_weedout_picker.first_dupsweedout_table; + join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT; + join->best_positions[first].n_sj_tables= tablenr - first + 1; + } + + uint i_end= first + join->best_positions[first].n_sj_tables; + for (uint i= first; i < i_end; i++) + { + if (i != first) + join->best_positions[i].sj_strategy= SJ_OPT_NONE; + handled_tabs |= join->best_positions[i].table->table->map; + } + + if (tablenr != first) + pos->sj_strategy= SJ_OPT_NONE; + remaining_tables |= s->table->map; + join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy; + join->join_tab[first].n_sj_tables= join->best_positions[first].n_sj_tables; + } +} + + +/* + Return the number of tables at the top-level of the JOIN + + SYNOPSIS + get_number_of_tables_at_top_level() + join The join with the picked join order + + DESCRIPTION + The number of tables in the JOIN currently include all the inner tables of the + mergeable semi-joins. The function would make sure that we only count the semi-join + nest and not the inner tables of teh semi-join nest. +*/ + +uint get_number_of_tables_at_top_level(JOIN *join) +{ + uint j= 0, tables= 0; + while(j < join->table_count) + { + POSITION *cur_pos= &join->best_positions[j]; + tables++; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j= j + sjm->tables; + } + else + j++; + } + return tables; +} + + +/* + Setup semi-join materialization strategy for one semi-join nest + + SYNOPSIS + + setup_sj_materialization() + tab The first tab in the semi-join + + DESCRIPTION + Setup execution structures for one semi-join materialization nest: + - Create the materialization temporary table + - If we're going to do index lookups + create TABLE_REF structure to make the lookus + - else (if we're going to do a full scan of the temptable) + create Copy_field structures to do copying. + + RETURN + FALSE Ok + TRUE Error +*/ + +bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab) +{ + JOIN_TAB *tab= sjm_tab->bush_children->start; + TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding; + SJ_MATERIALIZATION_INFO *sjm; + THD *thd; + + DBUG_ENTER("setup_sj_materialization"); + + /* Walk out of outer join nests until we reach the semi-join nest we're in */ + while (!emb_sj_nest->sj_mat_info) + emb_sj_nest= emb_sj_nest->embedding; + + sjm= emb_sj_nest->sj_mat_info; + thd= tab->join->thd; + /* First the calls come to the materialization function */ + + DBUG_ASSERT(sjm->is_used); + /* + Set up the table to write to, do as select_union::create_result_table does + */ + sjm->sjm_table_param.init(); + sjm->sjm_table_param.bit_fields_as_long= TRUE; + SELECT_LEX *subq_select= emb_sj_nest->sj_subq_pred->unit->first_select(); + const LEX_CSTRING sj_materialize_name= { STRING_WITH_LEN("sj-materialize") }; + List_iterator it(subq_select->item_list); + Item *item; + while((item= it++)) + { + /* + This semi-join replaced the subquery (subq_select) and so on + re-executing it will not be prepared. To use the Items from its + select list we have to prepare (fix_fields) them + */ + if (item->fix_fields_if_needed(thd, it.ref())) + DBUG_RETURN(TRUE); + item= *(it.ref()); // it can be changed by fix_fields + DBUG_ASSERT(!item->name.length || item->name.length == strlen(item->name.str)); + sjm->sjm_table_cols.push_back(item, thd->mem_root); + } + + sjm->sjm_table_param.field_count= subq_select->item_list.elements; + sjm->sjm_table_param.func_count= sjm->sjm_table_param.field_count; + sjm->sjm_table_param.force_not_null_cols= TRUE; + + if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param, + sjm->sjm_table_cols, (ORDER*) 0, + TRUE /* distinct */, + 1, /*save_sum_fields*/ + thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, + HA_POS_ERROR /*rows_limit */, + &sj_materialize_name))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm->table->map= emb_sj_nest->nested_join->used_tables; + sjm->table->file->extra(HA_EXTRA_WRITE_CACHE); + sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + + tab->join->sj_tmp_tables.push_back(sjm->table, thd->mem_root); + tab->join->sjm_info_list.push_back(sjm, thd->mem_root); + + sjm->materialized= FALSE; + sjm_tab->table= sjm->table; + sjm_tab->tab_list= emb_sj_nest; + sjm->table->pos_in_table_list= emb_sj_nest; + + DBUG_RETURN(FALSE); +} + +/** + @retval + FALSE ok + TRUE error +*/ + +bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) +{ + DBUG_ENTER("setup_sj_materialization_part2"); + JOIN_TAB *tab= sjm_tab->bush_children->start; + TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding; + /* Walk out of outer join nests until we reach the semi-join nest we're in */ + while (!emb_sj_nest->sj_mat_info) + emb_sj_nest= emb_sj_nest->embedding; + SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info; + THD *thd= tab->join->thd; + uint i; + + if (!sjm->is_sj_scan) + { + KEY *tmp_key; /* The only index on the temporary table. */ + uint tmp_key_parts; /* Number of keyparts in tmp_key. */ + tmp_key= sjm->table->key_info; + tmp_key_parts= tmp_key->user_defined_key_parts; + + /* + Create/initialize everything we will need to index lookups into the + temptable. + */ + TABLE_REF *tab_ref; + tab_ref= &sjm_tab->ref; + tab_ref->key= 0; /* The only temp table index. */ + tab_ref->key_length= tmp_key->key_length; + if (!(tab_ref->key_buff= + (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || + !(tab_ref->key_copy= + (store_key**) thd->alloc((sizeof(store_key*) * + (tmp_key_parts + 1)))) || + !(tab_ref->items= + (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length); + tab_ref->key_err=1; + tab_ref->null_rejecting= 1; + tab_ref->disable_cache= FALSE; + + KEY_PART_INFO *cur_key_part= tmp_key->key_part; + store_key **ref_key= tab_ref->key_copy; + uchar *cur_ref_buff= tab_ref->key_buff; + + for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) + { + tab_ref->items[i]= + emb_sj_nest->sj_subq_pred->left_exp()->element_index(i); + int null_count= MY_TEST(cur_key_part->field->real_maybe_null()); + *ref_key= new store_key_item(thd, cur_key_part->field, + /* TODO: + the NULL byte is taken into account in + cur_key_part->store_length, so instead of + cur_ref_buff + MY_TEST(maybe_null), we could + use that information instead. + */ + cur_ref_buff + null_count, + null_count ? cur_ref_buff : 0, + cur_key_part->length, tab_ref->items[i], + FALSE); + if (!*ref_key) + DBUG_RETURN(TRUE); + cur_ref_buff+= cur_key_part->store_length; + } + *ref_key= NULL; /* End marker. */ + + /* + We don't ever have guarded conditions for SJM tables, but code at SQL + layer depends on cond_guards array being alloced. + */ + if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts))) + { + DBUG_RETURN(TRUE); + } + + tab_ref->key_err= 1; + tab_ref->key_parts= tmp_key_parts; + sjm->tab_ref= tab_ref; + + /* + Remove the injected semi-join IN-equalities from join_tab conds. This + needs to be done because the IN-equalities refer to columns of + sj-inner tables which are not available after the materialization + has been finished. + */ + for (i= 0; i < sjm->tables; i++) + { + if (remove_sj_conds(thd, &tab[i].select_cond) || + (tab[i].select && remove_sj_conds(thd, &tab[i].select->cond))) + DBUG_RETURN(TRUE); + } + if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm, + emb_sj_nest->sj_subq_pred))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm_tab->type= JT_EQ_REF; + sjm_tab->select_cond= sjm->in_equality; + } + else + { + /* + We'll be doing full scan of the temptable. + Setup copying of temptable columns back to the record buffers + for their source tables. We need this because IN-equalities + refer to the original tables. + + EXAMPLE + + Consider the query: + SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it) + + Suppose it's executed with SJ-Materialization-scan. We choose to do scan + if we can't do the lookup, i.e. the join order is (it, ot). The plan + would look as follows: + + table access method condition + it materialize+scan - + ot (whatever) ot1.col1=it.col2 (C2) + + The condition C2 refers to current row of table it. The problem is + that by the time we evaluate C2, we would have finished with scanning + it itself and will be scanning the temptable. + + At the moment, our solution is to copy back: when we get the next + temptable record, we copy its columns to their corresponding columns + in the record buffers for the source tables. + */ + if (!(sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements])) + DBUG_RETURN(TRUE); + + //it.rewind(); + Ref_ptr_array p_items= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array; + for (uint i=0; i < sjm->sjm_table_cols.elements; i++) + { + bool dummy; + Item_equal *item_eq; + //Item *item= (it++)->real_item(); + Item *item= p_items[i]->real_item(); + DBUG_ASSERT(item->type() == Item::FIELD_ITEM); + Field *copy_to= ((Item_field*)item)->field; + /* + Tricks with Item_equal are due to the following: suppose we have a + query: + + ... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2 + WHERE it1.col= it2.col) + then equality propagation will create an + + Item_equal(it1.col, it2.col, ot.col) + + then substitute_for_best_equal_field() will change the conditions + according to the join order: + + table | attached condition + ------+-------------------- + it1 | + it2 | it1.col=it2.col + ot | cond(it1.col) + + although we've originally had "SELECT it2.col", conditions attached + to subsequent outer tables will refer to it1.col, so SJM-Scan will + need to unpack data to there. + That is, if an element from subquery's select list participates in + equality propagation, then we need to unpack it to the first + element equality propagation member that refers to table that is + within the subquery. + */ + item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy); + + if (item_eq) + { + List_iterator it(item_eq->equal_items); + /* We're interested in field items only */ + if (item_eq->get_const()) + it++; + Item *item; + while ((item= it++)) + { + if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables)) + { + DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM); + copy_to= ((Item_field *) (item->real_item()))->field; + break; + } + } + } + sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE); + /* The write_set for source tables must be set up to allow the copying */ + bitmap_set_bit(copy_to->table->write_set, copy_to->field_index); + } + sjm_tab->type= JT_ALL; + + /* Initialize full scan */ + sjm_tab->read_first_record= join_init_read_record; + sjm_tab->read_record.copy_field= sjm->copy_field; + sjm_tab->read_record.copy_field_end= sjm->copy_field + + sjm->sjm_table_cols.elements; + sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack; + } + + sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; + + DBUG_RETURN(FALSE); +} + + + +/* + Create subquery IN-equalities assuming use of materialization strategy + + SYNOPSIS + create_subq_in_equalities() + thd Thread handle + sjm Semi-join materialization structure + subq_pred The subquery predicate + + DESCRIPTION + Create subquery IN-equality predicates. That is, for a subquery + + (oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...) + + create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, .. + refer to the columns of the table that's used to materialize the + subquery. + + RETURN + Created condition +*/ + +static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, + Item_in_subselect *subq_pred) +{ + Item *res= NULL; + Item *left_exp= subq_pred->left_exp(); + uint ncols= left_exp->cols(); + if (ncols == 1) + { + if (!(res= new (thd->mem_root) Item_func_eq(thd, left_exp, + new (thd->mem_root) Item_field(thd, sjm->table->field[0])))) + return NULL; /* purecov: inspected */ + } + else + { + Item *conj; + for (uint i= 0; i < ncols; i++) + { + if (!(conj= new (thd->mem_root) Item_func_eq(thd, left_exp->element_index(i), + new (thd->mem_root) Item_field(thd, sjm->table->field[i]))) || + !(res= and_items(thd, res, conj))) + return NULL; /* purecov: inspected */ + } + } + if (res->fix_fields(thd, &res)) + return NULL; /* purecov: inspected */ + return res; +} + + +/** + @retval + 0 ok + 1 error +*/ + +static bool remove_sj_conds(THD *thd, Item **tree) +{ + if (*tree) + { + if (is_cond_sj_in_equality(*tree)) + { + *tree= NULL; + return 0; + } + else if ((*tree)->type() == Item::COND_ITEM) + { + Item *item; + List_iterator li(*(((Item_cond*)*tree)->argument_list())); + while ((item= li++)) + { + if (is_cond_sj_in_equality(item)) + { + Item_int *tmp= new (thd->mem_root) Item_int(thd, 1); + if (!tmp) + return 1; + li.replace(tmp); + } + } + } + } + return 0; +} + + +/* Check if given Item was injected by semi-join equality */ +static bool is_cond_sj_in_equality(Item *item) +{ + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype()== Item_func::EQ_FUNC) + { + Item_func_eq *item_eq= (Item_func_eq*)item; + return MY_TEST(item_eq->in_equality_no != UINT_MAX); + } + return FALSE; +} + + +/* + Create a temporary table to weed out duplicate rowid combinations + + SYNOPSIS + + create_sj_weedout_tmp_table() + thd Thread handle + + DESCRIPTION + Create a temporary table to weed out duplicate rowid combinations. The + table has a single column that is a concatenation of all rowids in the + combination. + + Depending on the needed length, there are two cases: + + 1. When the length of the column < max_key_length: + + CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col)); + + 2. Otherwise (not a valid SQL syntax but internally supported): + + CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col)); + + The code in this function was produced by extraction of relevant parts + from create_tmp_table(). + + RETURN + created table + NULL on error +*/ + +bool +SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) +{ + MEM_ROOT *mem_root_save, own_root; + TABLE *table; + TABLE_SHARE *share; + uint temp_pool_slot=MY_BIT_NONE; + char *tmpname,path[FN_REFLEN]; + Field **reg_field; + KEY_PART_INFO *key_part_info; + KEY *keyinfo; + uchar *group_buff; + uchar *bitmaps; + uint *blob_field; + bool using_unique_constraint=FALSE; + bool use_packed_rows= FALSE; + Field *field, *key_field; + uint null_pack_length, null_count; + uchar *null_flags; + uchar *pos; + DBUG_ENTER("create_sj_weedout_tmp_table"); + DBUG_ASSERT(!is_degenerate); + + tmp_table= NULL; + uint uniq_tuple_length_arg= rowid_len + null_bytes; + /* + STEP 1: Get temporary table name + */ + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) + temp_pool_slot = temp_pool_set_next(); + + if (temp_pool_slot != MY_BIT_NONE) // we got a slot + sprintf(path, "%s-subquery-%lx-%i", tmp_file_prefix, + current_pid, temp_pool_slot); + else + { + /* if we run out of slots or we are not using tempool */ + sprintf(path,"%s-subquery-%lx-%lx-%x", tmp_file_prefix,current_pid, + (ulong) thd->thread_id, thd->tmp_table++); + } + fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + /* STEP 2: Figure if we'll be using a key or blob+constraint */ + /* it always has my_charset_bin, so mbmaxlen==1 */ + if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB) + using_unique_constraint= TRUE; + + /* STEP 3: Allocate memory for temptable description */ + init_sql_alloc(PSI_INSTRUMENT_ME, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(MY_THREAD_SPECIFIC)); + if (!multi_alloc_root(&own_root, + &table, sizeof(*table), + &share, sizeof(*share), + ®_field, sizeof(Field*) * (1+1), + &blob_field, sizeof(uint)*2, + &keyinfo, sizeof(*keyinfo), + &key_part_info, sizeof(*key_part_info) * 2, + &start_recinfo, + sizeof(*recinfo)*(1*2+4), + &tmpname, (uint) strlen(path)+1, + &group_buff, (!using_unique_constraint ? + uniq_tuple_length_arg : 0), + &bitmaps, bitmap_buffer_size(1)*6, + NullS)) + { + if (temp_pool_slot != MY_BIT_NONE) + temp_pool_clear_bit(temp_pool_slot); + DBUG_RETURN(TRUE); + } + strmov(tmpname,path); + + + /* STEP 4: Create TABLE description */ + bzero((char*) table,sizeof(*table)); + bzero((char*) reg_field,sizeof(Field*)*2); + + table->mem_root= own_root; + mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + table->field=reg_field; + table->alias.set("weedout-tmp", sizeof("weedout-tmp")-1, + table_alias_charset); + table->reginfo.lock_type=TL_WRITE; /* Will be updated */ + table->db_stat=HA_OPEN_KEYFILE; + table->map=1; + table->temp_pool_slot = temp_pool_slot; + table->copy_blobs= 1; + table->in_use= thd; + + table->s= share; + init_tmp_table_share(thd, share, "", 0, tmpname, tmpname); + share->blob_field= blob_field; + share->table_charset= NULL; + share->primary_key= MAX_KEY; // Indicate no primary key + + /* Create the field */ + { + LEX_CSTRING field_name= {STRING_WITH_LEN("rowids") }; + /* + For the sake of uniformity, always use Field_varstring (altough we could + use Field_string for shorter keys) + */ + field= new Field_varstring(uniq_tuple_length_arg, FALSE, &field_name, + share, &my_charset_bin); + if (!field) + DBUG_RETURN(0); + field->table= table; + field->key_start.clear_all(); + field->part_of_key.clear_all(); + field->part_of_sortkey.clear_all(); + field->unireg_check= Field::NONE; + field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG); + field->reset_fields(); + field->init(table); + field->orig_table= NULL; + + field->field_index= 0; + + *(reg_field++)= field; + *blob_field= 0; + *reg_field= 0; + + share->fields= 1; + share->blob_fields= 0; + } + + uint reclength= field->pack_length(); + if (using_unique_constraint || thd->variables.tmp_memory_table_size == 0) + { + share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + } + else + { + share->db_plugin= ha_lock_engine(0, heap_hton); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + DBUG_ASSERT(!table->file || uniq_tuple_length_arg <= table->file->max_key_length()); + } + if (!table->file) + goto err; + + if (table->file->set_ha_share_ref(&share->ha_share)) + { + delete table->file; + goto err; + } + + null_count=1; + + null_pack_length= 1; + reclength += null_pack_length; + + share->reclength= reclength; + { + uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1); + share->rec_buff_length= alloc_length; + if (!(table->record[0]= (uchar*) + alloc_root(&table->mem_root, alloc_length*3))) + goto err; + table->record[1]= table->record[0]+alloc_length; + share->default_values= table->record[1]+alloc_length; + } + setup_tmp_table_column_bitmaps(table, bitmaps, table->s->fields); + + recinfo= start_recinfo; + null_flags=(uchar*) table->record[0]; + pos=table->record[0]+ null_pack_length; + if (null_pack_length) + { + bzero((uchar*) recinfo,sizeof(*recinfo)); + recinfo->type=FIELD_NORMAL; + recinfo->length=null_pack_length; + recinfo++; + bfill(null_flags,null_pack_length,255); // Set null fields + + table->null_flags= (uchar*) table->record[0]; + share->null_fields= null_count; + share->null_bytes= null_pack_length; + } + null_count=1; + + { + //Field *field= *reg_field; + uint length; + bzero((uchar*) recinfo,sizeof(*recinfo)); + field->move_field(pos,(uchar*) 0,0); + + field->reset(); + /* + Test if there is a default field value. The test for ->ptr is to skip + 'offset' fields generated by initialize_tables + */ + // Initialize the table field: + bzero(field->ptr, field->pack_length()); + + length=field->pack_length(); + pos+= length; + + /* Make entry for create table */ + recinfo->length=length; + recinfo->type= field->tmp_engine_column_type(use_packed_rows); + field->set_table_name(&table->alias); + } + + if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0) // No limit + share->max_rows= ~(ha_rows) 0; + else + share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ? + MY_MIN(thd->variables.tmp_memory_table_size, + thd->variables.max_heap_table_size) : + thd->variables.tmp_disk_table_size) / + share->reclength); + set_if_bigger(share->max_rows,1); // For dummy start options + + + //// keyinfo= param->keyinfo; + if (TRUE) + { + DBUG_PRINT("info",("Creating group key in temporary table")); + share->keys=1; + share->uniques= MY_TEST(using_unique_constraint); + table->key_info=keyinfo; + keyinfo->key_part=key_part_info; + keyinfo->flags=HA_NOSAME; + keyinfo->usable_key_parts= keyinfo->user_defined_key_parts= 1; + keyinfo->key_length=0; + keyinfo->rec_per_key=0; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->name= weedout_key; + { + key_part_info->null_bit=0; + key_part_info->field= field; + key_part_info->offset= field->offset(table->record[0]); + key_part_info->length= (uint16) field->key_length(); + key_part_info->type= (uint8) field->key_type(); + key_part_info->key_type = FIELDFLAG_BINARY; + if (!using_unique_constraint) + { + if (!(key_field= field->new_key_field(thd->mem_root, table, + group_buff, + key_part_info->length, + field->null_ptr, + field->null_bit))) + goto err; + } + keyinfo->key_length+= key_part_info->length; + } + } + + if (unlikely(thd->is_fatal_error)) // If end of memory + goto err; + share->db_record_offset= 1; + table->no_rows= 1; // We don't need the data + + // recinfo must point after last field + recinfo++; + if (share->db_type() == TMP_ENGINE_HTON) + { + if (unlikely(create_internal_tmp_table(table, keyinfo, start_recinfo, + &recinfo, 0))) + goto err; + } + if (unlikely(open_tmp_table(table))) + goto err; + + thd->mem_root= mem_root_save; + tmp_table= table; + DBUG_RETURN(FALSE); + +err: + thd->mem_root= mem_root_save; + free_tmp_table(thd,table); /* purecov: inspected */ + if (temp_pool_slot != MY_BIT_NONE) + temp_pool_clear_bit(temp_pool_slot); + DBUG_RETURN(TRUE); /* purecov: inspected */ +} + + +/* + SemiJoinDuplicateElimination: Reset the temporary table +*/ + +int SJ_TMP_TABLE::sj_weedout_delete_rows() +{ + DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_delete_rows"); + if (tmp_table) + { + int rc= tmp_table->file->ha_delete_all_rows(); + DBUG_RETURN(rc); + } + have_degenerate_row= FALSE; + DBUG_RETURN(0); +} + + +/* + SemiJoinDuplicateElimination: Weed out duplicate row combinations + + SYNPOSIS + sj_weedout_check_row() + thd Thread handle + + DESCRIPTION + Try storing current record combination of outer tables (i.e. their + rowids) in the temporary table. This records the fact that we've seen + this record combination and also tells us if we've seen it before. + + RETURN + -1 Error + 1 The row combination is a duplicate (discard it) + 0 The row combination is not a duplicate (continue) +*/ + +int SJ_TMP_TABLE::sj_weedout_check_row(THD *thd) +{ + int error; + SJ_TMP_TABLE::TAB *tab= tabs; + SJ_TMP_TABLE::TAB *tab_end= tabs_end; + uchar *ptr; + uchar *nulls_ptr; + + DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_check_row"); + + if (is_degenerate) + { + if (have_degenerate_row) + DBUG_RETURN(1); + + have_degenerate_row= TRUE; + DBUG_RETURN(0); + } + + ptr= tmp_table->record[0] + 1; + + /* Put the the rowids tuple into table->record[0]: */ + + // 1. Store the length + if (((Field_varstring*)(tmp_table->field[0]))->length_bytes == 1) + { + *ptr= (uchar)(rowid_len + null_bytes); + ptr++; + } + else + { + int2store(ptr, rowid_len + null_bytes); + ptr += 2; + } + + nulls_ptr= ptr; + // 2. Zero the null bytes + if (null_bytes) + { + bzero(ptr, null_bytes); + ptr += null_bytes; + } + + // 3. Put the rowids + for (uint i=0; tab != tab_end; tab++, i++) + { + handler *h= tab->join_tab->table->file; + if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row) + { + /* It's a NULL-complemented row */ + *(nulls_ptr + tab->null_byte) |= tab->null_bit; + bzero(ptr + tab->rowid_offset, h->ref_length); + } + else + { + /* Copy the rowid value */ + memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length); + } + } + + error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]); + if (unlikely(error)) + { + /* create_internal_tmp_table_from_heap will generate error if needed */ + if (!tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) + DBUG_RETURN(1); /* Duplicate */ + + bool is_duplicate; + if (create_internal_tmp_table_from_heap(thd, tmp_table, start_recinfo, + &recinfo, error, 1, &is_duplicate)) + DBUG_RETURN(-1); + if (is_duplicate) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint n_tables) +{ + THD *thd= join->thd; + DBUG_ENTER("init_dups_weedout"); + SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; + SJ_TMP_TABLE::TAB *last_tab= sjtabs; + uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) + uint jt_null_bits= 0; // # null bits in tuple bytes + /* + Walk through the range and remember + - tables that need their rowids to be put into temptable + - the last outer table + */ + for (JOIN_TAB *j=join->join_tab + first_table; + j < join->join_tab + first_table + n_tables; j++) + { + if (sj_table_is_included(join, j)) + { + last_tab->join_tab= j; + last_tab->rowid_offset= jt_rowid_offset; + jt_rowid_offset += j->table->file->ref_length; + if (j->table->maybe_null) + { + last_tab->null_byte= jt_null_bits / 8; + last_tab->null_bit= jt_null_bits++; + } + last_tab++; + j->table->prepare_for_position(); + j->keep_current_rowid= TRUE; + } + } + + SJ_TMP_TABLE *sjtbl; + if (jt_rowid_offset) /* Temptable has at least one rowid */ + { + size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || + !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + memcpy(sjtbl->tabs, sjtabs, tabs_size); + sjtbl->is_degenerate= FALSE; + sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); + sjtbl->rowid_len= jt_rowid_offset; + sjtbl->null_bits= jt_null_bits; + sjtbl->null_bytes= (jt_null_bits + 7)/8; + if (sjtbl->create_sj_weedout_tmp_table(thd)) + DBUG_RETURN(TRUE); + join->sj_tmp_tables.push_back(sjtbl->tmp_table, thd->mem_root); + } + else + { + /* + This is a special case where the entire subquery predicate does + not depend on anything at all, ie this is + WHERE const IN (uncorrelated select) + */ + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjtbl->tmp_table= NULL; + sjtbl->is_degenerate= TRUE; + sjtbl->have_degenerate_row= FALSE; + } + + sjtbl->next_flush_table= join->join_tab[first_table].flush_weedout_table; + join->join_tab[first_table].flush_weedout_table= sjtbl; + join->join_tab[first_fanout_table].first_weedout_table= sjtbl; + join->join_tab[first_table + n_tables - 1].check_weed_out_table= sjtbl; + DBUG_RETURN(0); +} + + +/* + @brief + Set up semi-join Loose Scan strategy for execution + + @detail + Other strategies are done in setup_semijoin_dups_elimination(), + however, we need to set up Loose Scan earlier, before make_join_select is + called. This is to prevent make_join_select() from switching full index + scans into quick selects (which will break Loose Scan access). + + @return + 0 OK + 1 Error +*/ + +int setup_semijoin_loosescan(JOIN *join) +{ + uint i; + DBUG_ENTER("setup_semijoin_loosescan"); + + POSITION *pos= join->best_positions + join->const_tables; + for (i= join->const_tables ; i < join->top_join_tab_count; ) + { + JOIN_TAB *tab=join->join_tab + i; + switch (pos->sj_strategy) { + case SJ_OPT_MATERIALIZE: + case SJ_OPT_MATERIALIZE_SCAN: + i+= 1; /* join tabs are embedded in the nest */ + pos += pos->n_sj_tables; + break; + case SJ_OPT_LOOSE_SCAN: + { + /* We jump from the last table to the first one */ + tab->loosescan_match_tab= tab + pos->n_sj_tables - 1; + + /* LooseScan requires records to be produced in order */ + if (tab->select && tab->select->quick) + tab->select->quick->need_sorted_output(); + + for (uint j= i; j < i + pos->n_sj_tables; j++) + join->join_tab[j].inside_loosescan_range= TRUE; + + /* Calculate key length */ + uint keylen= 0; + uint keyno= pos->loosescan_picker.loosescan_key; + for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++) + keylen += tab->table->key_info[keyno].key_part[kp].store_length; + + tab->loosescan_key= keyno; + tab->loosescan_key_len= keylen; + if (pos->n_sj_tables > 1) + tab[pos->n_sj_tables - 1].do_firstmatch= tab; + i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; + break; + } + default: + { + i++; + pos++; + break; + } + } + } + DBUG_RETURN(FALSE); +} + + +/* + Setup the strategies to eliminate semi-join duplicates. + + SYNOPSIS + setup_semijoin_dups_elimination() + join Join to process + options Join options (needed to see if join buffering will be + used or not) + no_jbuf_after Another bit of information re where join buffering will + be used. + + DESCRIPTION + Setup the strategies to eliminate semi-join duplicates. ATM there are 4 + strategies: + + 1. DuplicateWeedout (use of temptable to remove duplicates based on rowids + of row combinations) + 2. FirstMatch (pick only the 1st matching row combination of inner tables) + 3. LooseScan (scanning the sj-inner table in a way that groups duplicates + together and picking the 1st one) + 4. SJ-Materialization. + + The join order has "duplicate-generating ranges", and every range is + served by one strategy or a combination of FirstMatch with with some + other strategy. + + "Duplicate-generating range" is defined as a range within the join order + that contains all of the inner tables of a semi-join. All ranges must be + disjoint, if tables of several semi-joins are interleaved, then the ranges + are joined together, which is equivalent to converting + SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 ) + to + SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...) + . + + Applicability conditions are as follows: + + DuplicateWeedout strategy + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + (ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)* + +------+ +=========================+ +---+ + (1) (2) (3) + + (1) - Prefix of OuterTables (those that participate in + IN-equality and/or are correlated with subquery) and outer + Non-correlated tables. + (2) - The handled range. The range starts with the first sj-inner + table, and covers all sj-inner and outer tables + Within the range, Inner, Outer, outer non-correlated tables + may follow in any order. + (3) - The suffix of outer non-correlated tables. + + FirstMatch strategy + ~~~~~~~~~~~~~~~~~~~ + + (ot|nt)* [ it ((it|nt)* it) ] (nt)* + +------+ +==================+ +---+ + (1) (2) (3) + + (1) - Prefix of outer and non-correlated tables + (2) - The handled range, which may contain only inner and + non-correlated tables. + (3) - The suffix of outer non-correlated tables. + + LooseScan strategy + ~~~~~~~~~~~~~~~~~~ + + (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ] (ot|nt)* + +--------+ +===========+ +=============+ +------+ + (1) (2) (3) (4) + + (1) - Prefix that may contain any outer tables. The prefix must contain + all the non-trivially correlated outer tables. (non-trivially means + that the correlation is not just through the IN-equality). + + (2) - Inner table for which the LooseScan scan is performed. + + (3) - The remainder of the duplicate-generating range. It is served by + application of FirstMatch strategy, with the exception that + outer IN-correlated tables are considered to be non-correlated. + + (4) - THe suffix of outer and outer non-correlated tables. + + + The choice between the strategies is made by the join optimizer (see + optimize_semi_joins() and fix_semijoin_strategies_for_picked_join_order()). + This function sets up all fields/structures/etc needed for execution except + for setup/initialization of semi-join materialization which is done in + setup_sj_materialization() (todo: can't we move that to here also?) + + RETURN + FALSE OK + TRUE Out of memory error +*/ + +int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after) +{ + uint i; + DBUG_ENTER("setup_semijoin_dups_elimination"); + + join->complex_firstmatch_tables= table_map(0); + + POSITION *pos= join->best_positions + join->const_tables; + for (i= join->const_tables ; i < join->top_join_tab_count; ) + { + JOIN_TAB *tab=join->join_tab + i; + switch (pos->sj_strategy) { + case SJ_OPT_MATERIALIZE: + case SJ_OPT_MATERIALIZE_SCAN: + /* Do nothing */ + i+= 1;// It used to be pos->n_sj_tables, but now they are embedded in a nest + pos += pos->n_sj_tables; + break; + case SJ_OPT_LOOSE_SCAN: + { + /* Setup already handled by setup_semijoin_loosescan */ + i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; + break; + } + case SJ_OPT_DUPS_WEEDOUT: + { + /* + Check for join buffering. If there is one, move the first table + forwards, but do not destroy other duplicate elimination methods. + */ + uint first_table= i; + + uint join_cache_level= join->thd->variables.join_cache_level; + for (uint j= i; j < i + pos->n_sj_tables; j++) + { + /* + When we'll properly take join buffering into account during + join optimization, the below check should be changed to + "if (join->best_positions[j].use_join_buffer && + j <= no_jbuf_after)". + For now, use a rough criteria: + */ + JOIN_TAB *js_tab=join->join_tab + j; + if (j != join->const_tables && js_tab->use_quick != 2 && + j <= no_jbuf_after && + ((js_tab->type == JT_ALL && join_cache_level != 0) || + (join_cache_level > 2 && (js_tab->type == JT_REF || + js_tab->type == JT_EQ_REF)))) + { + /* Looks like we'll be using join buffer */ + first_table= join->const_tables; + /* + Make sure that possible sorting of rows from the head table + is not to be employed. + */ + if (join->get_sort_by_join_tab()) + { + join->simple_order= 0; + join->simple_group= 0; + join->need_tmp= join->test_if_need_tmp_table(); + } + break; + } + } + + init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table); + i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; + break; + } + case SJ_OPT_FIRST_MATCH: + { + JOIN_TAB *j; + JOIN_TAB *jump_to= tab-1; + + bool complex_range= FALSE; + table_map tables_in_range= table_map(0); + + for (j= tab; j != tab + pos->n_sj_tables; j++) + { + tables_in_range |= j->table->map; + if (!j->emb_sj_nest) + { + /* + Got a table that's not within any semi-join nest. This is a case + like this: + + SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2) + + with a join order of + + +----- FirstMatch range ----+ + | | + ot1 it1 nt1 nt2 it2 it3 ... + | ^ + | +-------- 'j' points here + +------------- SJ_OPT_FIRST_MATCH was set for this table as + it's the first one that produces duplicates + + */ + DBUG_ASSERT(j != tab); /* table ntX must have an itX before it */ + + /* + If the table right before us is an inner table (like it1 in the + picture), it should be set to jump back to previous outer-table + */ + if (j[-1].emb_sj_nest) + j[-1].do_firstmatch= jump_to; + + jump_to= j; /* Jump back to us */ + complex_range= TRUE; + } + else + { + j->first_sj_inner_tab= tab; + j->last_sj_inner_tab= tab + pos->n_sj_tables - 1; + } + } + j[-1].do_firstmatch= jump_to; + i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; + + if (complex_range) + join->complex_firstmatch_tables|= tables_in_range; + break; + } + case SJ_OPT_NONE: + i++; + pos++; + break; + } + } + DBUG_RETURN(FALSE); +} + + +/* + Destroy all temporary tables created by NL-semijoin runtime +*/ + +void destroy_sj_tmp_tables(JOIN *join) +{ + List_iterator
it(join->sj_tmp_tables); + TABLE *table; + while ((table= it++)) + { + /* + SJ-Materialization tables are initialized for either sequential reading + or index lookup, DuplicateWeedout tables are not initialized for read + (we only write to them), so need to call ha_index_or_rnd_end. + */ + table->file->ha_index_or_rnd_end(); + free_tmp_table(join->thd, table); + } + join->sj_tmp_tables.empty(); + join->sjm_info_list.empty(); +} + + +/* + Remove all records from all temp tables used by NL-semijoin runtime + + SYNOPSIS + clear_sj_tmp_tables() + join The join to remove tables for + + DESCRIPTION + Remove all records from all temp tables used by NL-semijoin runtime. This + must be done before every join re-execution. +*/ + +int clear_sj_tmp_tables(JOIN *join) +{ + int res; + List_iterator
it(join->sj_tmp_tables); + TABLE *table; + while ((table= it++)) + { + if ((res= table->file->ha_delete_all_rows())) + return res; /* purecov: inspected */ + } + + SJ_MATERIALIZATION_INFO *sjm; + List_iterator it2(join->sjm_info_list); + while ((sjm= it2++)) + { + sjm->materialized= FALSE; + } + return 0; +} + + +/* + Check if the table's rowid is included in the temptable + + SYNOPSIS + sj_table_is_included() + join The join + join_tab The table to be checked + + DESCRIPTION + SemiJoinDuplicateElimination: check the table's rowid should be included + in the temptable. This is so if + + 1. The table is not embedded within some semi-join nest + 2. The has been pulled out of a semi-join nest, or + + 3. The table is functionally dependent on some previous table + + [4. This is also true for constant tables that can't be + NULL-complemented but this function is not called for such tables] + + RETURN + TRUE - Include table's rowid + FALSE - Don't +*/ + +static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab) +{ + if (join_tab->emb_sj_nest) + return FALSE; + + /* Check if this table is functionally dependent on the tables that + are within the same outer join nest + */ + TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding; + if (join_tab->type == JT_EQ_REF) + { + table_map depends_on= 0; + uint idx; + + for (uint kp= 0; kp < join_tab->ref.key_parts; kp++) + depends_on |= join_tab->ref.items[kp]->used_tables(); + + Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS); + while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END) + { + JOIN_TAB *ref_tab= join->map2table[idx]; + if (embedding != ref_tab->table->pos_in_table_list->embedding) + return TRUE; + } + /* Ok, functionally dependent */ + return FALSE; + } + /* Not functionally dependent => need to include*/ + return TRUE; +} + + +/* + Index lookup-based subquery: save some flags for EXPLAIN output + + SYNOPSIS + save_index_subquery_explain_info() + join_tab Subquery's join tab (there is only one as index lookup is + only used for subqueries that are single-table SELECTs) + where Subquery's WHERE clause + + DESCRIPTION + For index lookup-based subquery (i.e. one executed with + subselect_uniquesubquery_engine or subselect_indexsubquery_engine), + check its EXPLAIN output row should contain + "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) + "Using Where" (TAB_INFO_USING_WHERE) + "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL) + and set appropriate flags in join_tab->packed_info. +*/ + +static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where) +{ + join_tab->packed_info= TAB_INFO_HAVE_VALUE; + if (join_tab->table->covering_keys.is_set(join_tab->ref.key)) + join_tab->packed_info |= TAB_INFO_USING_INDEX; + if (where) + join_tab->packed_info |= TAB_INFO_USING_WHERE; + for (uint i = 0; i < join_tab->ref.key_parts; i++) + { + if (join_tab->ref.cond_guards[i]) + { + join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL; + break; + } + } +} + + +/* + Check if the join can be rewritten to [unique_]indexsubquery_engine + + DESCRIPTION + Check if the join can be changed into [unique_]indexsubquery_engine. + + The check is done after join optimization, the idea is that if the join + has only one table and uses a [eq_]ref access generated from subselect's + IN-equality then we replace it with a subselect_indexsubquery_engine or a + subselect_uniquesubquery_engine. + + RETURN + 0 - Ok, rewrite done (stop join optimization and return) + 1 - Fatal error (stop join optimization and return) + -1 - No rewrite performed, continue with join optimization +*/ + +int rewrite_to_index_subquery_engine(JOIN *join) +{ + THD *thd= join->thd; + JOIN_TAB* join_tab=join->join_tab; + SELECT_LEX_UNIT *unit= join->unit; + DBUG_ENTER("rewrite_to_index_subquery_engine"); + + /* + is this simple IN subquery? + */ + /* TODO: In order to use these more efficient subquery engines in more cases, + the following problems need to be solved: + - the code that removes GROUP BY (group_list), also adds an ORDER BY + (order), thus GROUP BY queries (almost?) never pass through this branch. + Solution: remove the test below '!join->order', because we remove the + ORDER clase for subqueries anyway. + - in order to set a more efficient engine, the optimizer needs to both + decide to remove GROUP BY, *and* select one of the JT_[EQ_]REF[_OR_NULL] + access methods, *and* loose scan should be more expensive or + inapliccable. When is that possible? + - Consider expanding the applicability of this rewrite for loose scan + for group by queries. + */ + if (!join->group_list && !join->order && + join->unit->item && + join->unit->item->substype() == Item_subselect::IN_SUBS && + join->table_count == 1 && join->conds && + !join->unit->is_unit_op()) + { + if (!join->having) + { + Item *where= join->conds; + if (join_tab[0].type == JT_EQ_REF && + join_tab[0].ref.items[0]->name.str == in_left_expr_name.str) + { + remove_subq_pushed_predicates(join, &where); + save_index_subquery_explain_info(join_tab, where); + join_tab[0].type= JT_UNIQUE_SUBQUERY; + join->error= 0; + DBUG_RETURN(unit->item-> + change_engine(new + subselect_uniquesubquery_engine(thd, + join_tab, + unit->item->get_IN_subquery(), + where))); + } + else if (join_tab[0].type == JT_REF && + join_tab[0].ref.items[0]->name.str == in_left_expr_name.str) + { + remove_subq_pushed_predicates(join, &where); + save_index_subquery_explain_info(join_tab, where); + join_tab[0].type= JT_INDEX_SUBQUERY; + join->error= 0; + DBUG_RETURN(unit->item-> + change_engine(new + subselect_indexsubquery_engine(thd, + join_tab, + unit->item->get_IN_subquery(), + where, + NULL, + 0))); + } + } else if (join_tab[0].type == JT_REF_OR_NULL && + join_tab[0].ref.items[0]->name.str == in_left_expr_name.str && + join->having->name.str == in_having_cond.str) + { + join_tab[0].type= JT_INDEX_SUBQUERY; + join->error= 0; + join->conds= remove_additional_cond(join->conds); + save_index_subquery_explain_info(join_tab, join->conds); + DBUG_RETURN(unit->item-> + change_engine(new subselect_indexsubquery_engine(thd, + join_tab, + unit->item->get_IN_subquery(), + join->conds, + join->having, + 1))); + } + } + + DBUG_RETURN(-1); /* Haven't done the rewrite */ +} + + +/** + Remove additional condition inserted by IN/ALL/ANY transformation. + + @param conds condition for processing + + @return + new conditions +*/ + +static Item *remove_additional_cond(Item* conds) +{ + if (conds->name.str == in_additional_cond.str) + return 0; + if (conds->type() == Item::COND_ITEM) + { + Item_cond *cnd= (Item_cond*) conds; + List_iterator li(*(cnd->argument_list())); + Item *item; + while ((item= li++)) + { + if (item->name.str == in_additional_cond.str) + { + li.remove(); + if (cnd->argument_list()->elements == 1) + return cnd->argument_list()->head(); + return conds; + } + } + } + return conds; +} + + +/* + Remove the predicates pushed down into the subquery + + SYNOPSIS + remove_subq_pushed_predicates() + where IN Must be NULL + OUT The remaining WHERE condition, or NULL + + DESCRIPTION + Given that this join will be executed using (unique|index)_subquery, + without "checking NULL", remove the predicates that were pushed down + into the subquery. + + If the subquery compares scalar values, we can remove the condition that + was wrapped into trig_cond (it will be checked when needed by the subquery + engine) + + If the subquery compares row values, we need to keep the wrapped + equalities in the WHERE clause: when the left (outer) tuple has both NULL + and non-NULL values, we'll do a full table scan and will rely on the + equalities corresponding to non-NULL parts of left tuple to filter out + non-matching records. + + TODO: We can remove the equalities that will be guaranteed to be true by the + fact that subquery engine will be using index lookup. This must be done only + for cases where there are no conversion errors of significance, e.g. 257 + that is searched in a byte. But this requires homogenization of the return + codes of all Field*::store() methods. +*/ + +static void remove_subq_pushed_predicates(JOIN *join, Item **where) +{ + if (join->conds->type() == Item::FUNC_ITEM && + ((Item_func *)join->conds)->functype() == Item_func::EQ_FUNC && + ((Item_func *)join->conds)->arguments()[0]->type() == Item::REF_ITEM && + ((Item_func *)join->conds)->arguments()[1]->type() == Item::FIELD_ITEM && + test_if_ref (join->conds, + (Item_field *)((Item_func *)join->conds)->arguments()[1], + ((Item_func *)join->conds)->arguments()[0])) + { + *where= 0; + return; + } +} + + + + +/** + Optimize all subqueries of a query that were not flattened into a semijoin. + + @details + Optimize all immediate children subqueries of a query. + + This phase must be called after substitute_for_best_equal_field() because + that function may replace items with other items from a multiple equality, + and we need to reference the correct items in the index access method of the + IN predicate. + + @return Operation status + @retval FALSE success. + @retval TRUE error occurred. +*/ + +bool JOIN::optimize_unflattened_subqueries() +{ + return select_lex->optimize_unflattened_subqueries(false); +} + +/** + Optimize all constant subqueries of a query that were not flattened into + a semijoin. + + @details + Similar to other constant conditions, constant subqueries can be used in + various constant optimizations. Having optimized constant subqueries before + these constant optimizations, makes it possible to estimate if a subquery + is "cheap" enough to be executed during the optimization phase. + + Constant subqueries can be optimized and evaluated independent of the outer + query, therefore if const_only = true, this method can be called early in + the optimization phase of the outer query. + + @return Operation status + @retval FALSE success. + @retval TRUE error occurred. +*/ + +bool JOIN::optimize_constant_subqueries() +{ + ulonglong save_options= select_lex->options; + bool res; + /* + Constant subqueries may be executed during the optimization phase. + In EXPLAIN mode the optimizer doesn't initialize many of the data structures + needed for execution. In order to make it possible to execute subqueries + during optimization, constant subqueries must be optimized for execution, + not for EXPLAIN. + */ + select_lex->options&= ~SELECT_DESCRIBE; + res= select_lex->optimize_unflattened_subqueries(true); + select_lex->options= save_options; + return res; +} + + +/* + Join tab execution startup function. + + SYNOPSIS + join_tab_execution_startup() + tab Join tab to perform startup actions for + + DESCRIPTION + Join tab execution startup function. This is different from + tab->read_first_record in the regard that this has actions that are to be + done once per join execution. + + Currently there are only two possible startup functions, so we have them + both here inside if (...) branches. In future we could switch to function + pointers. + + TODO: consider moving this together with JOIN_TAB::preread_init + + RETURN + NESTED_LOOP_OK - OK + NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution +*/ + +enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) +{ + Item_in_subselect *in_subs; + DBUG_ENTER("join_tab_execution_startup"); + + if (tab->table->pos_in_table_list && + (in_subs= tab->table->pos_in_table_list->jtbm_subselect)) + { + /* It's a non-merged SJM nest */ + DBUG_ASSERT(in_subs->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)in_subs->engine); + if (!hash_sj_engine->is_materialized) + { + hash_sj_engine->materialize_join->exec(); + hash_sj_engine->is_materialized= TRUE; + + if (unlikely(hash_sj_engine->materialize_join->error) || + unlikely(tab->join->thd->is_fatal_error)) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + else if (tab->bush_children) + { + /* It's a merged SJM nest */ + enum_nested_loop_state rc; + SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info; + + if (!sjm->materialized) + { + JOIN *join= tab->join; + JOIN_TAB *join_tab= tab->bush_children->start; + JOIN_TAB *save_return_tab= join->return_tab; + /* + Now run the join for the inner tables. The first call is to run the + join, the second one is to signal EOF (this is essential for some + join strategies, e.g. it will make join buffering flush the records) + */ + if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 || + (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0) + { + join->return_tab= save_return_tab; + DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ + } + join->return_tab= save_return_tab; + sjm->materialized= TRUE; + } + } + + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + Create a dummy temporary table, useful only for the sake of having a + TABLE* object with map,tablenr and maybe_null properties. + + This is used by non-mergeable semi-join materilization code to handle + degenerate cases where materialized subquery produced "Impossible WHERE" + and thus wasn't materialized. +*/ + +TABLE *create_dummy_tmp_table(THD *thd) +{ + DBUG_ENTER("create_dummy_tmp_table"); + TABLE *table; + TMP_TABLE_PARAM sjm_table_param; + List sjm_table_cols; + const LEX_CSTRING dummy_name= { STRING_WITH_LEN("dummy") }; + Item *column_item= new (thd->mem_root) Item_int(thd, 1); + if (!column_item) + DBUG_RETURN(NULL); + + sjm_table_param.init(); + sjm_table_param.field_count= sjm_table_param.func_count= 1; + sjm_table_cols.push_back(column_item, thd->mem_root); + if (!(table= create_tmp_table(thd, &sjm_table_param, + sjm_table_cols, (ORDER*) 0, + TRUE /* distinct */, + 1, /*save_sum_fields*/ + thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS, + HA_POS_ERROR /*rows_limit */, + &dummy_name, TRUE /* Do not open */))) + { + DBUG_RETURN(NULL); + } + DBUG_RETURN(table); +} + + +/* + A class that is used to catch one single tuple that is sent to the join + output, and save it in Item_cache element(s). + + It is very similar to select_singlerow_subselect but doesn't require a + Item_singlerow_subselect item. +*/ + +class select_value_catcher :public select_subselect +{ +public: + select_value_catcher(THD *thd_arg, Item_subselect *item_arg): + select_subselect(thd_arg, item_arg) + {} + int send_data(List &items); + int setup(List *items); + bool assigned; /* TRUE <=> we've caught a value */ + uint n_elements; /* How many elements we get */ + Item_cache **row; /* Array of cache elements */ +}; + + +int select_value_catcher::setup(List *items) +{ + assigned= FALSE; + n_elements= items->elements; + + if (!(row= (Item_cache**) thd->alloc(sizeof(Item_cache*) * n_elements))) + return TRUE; + + Item *sel_item; + List_iterator li(*items); + for (uint i= 0; (sel_item= li++); i++) + { + if (!(row[i]= sel_item->get_cache(thd))) + return TRUE; + row[i]->setup(thd, sel_item); + } + return FALSE; +} + + +int select_value_catcher::send_data(List &items) +{ + DBUG_ENTER("select_value_catcher::send_data"); + DBUG_ASSERT(!assigned); + DBUG_ASSERT(items.elements == n_elements); + + Item *val_item; + List_iterator_fast li(items); + for (uint i= 0; (val_item= li++); i++) + { + row[i]->store(val_item); + row[i]->cache_value(); + } + assigned= TRUE; + DBUG_RETURN(0); +} + + +/** + @brief + Attach conditions to already optimized condition + + @param thd the thread handle + @param cond the condition to which add new conditions + @param cond_eq IN/OUT the multiple equalities of cond + @param new_conds the list of conditions to be added + @param cond_value the returned value of the condition + if it can be evaluated + + @details + The method creates new condition through union of cond and + the conditions from new_conds list. + The method is called after optimize_cond() for cond. The result + of the union should be the same as if it was done before the + the optimize_cond() call. + + @retval otherwise the created condition + @retval NULL if an error occurs +*/ + +Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, + COND_EQUAL **cond_eq, + List &new_conds, + Item::cond_result *cond_value) +{ + COND_EQUAL new_cond_equal; + Item *item; + Item_equal *mult_eq; + bool is_simplified_cond= false; + /* The list where parts of the new condition are stored. */ + List_iterator li(new_conds); + List_iterator_fast it(new_cond_equal.current_level); + + /* + Create multiple equalities from the equalities of the list new_conds. + Save the created multiple equalities in new_cond_equal. + If multiple equality can't be created or the condition + from new_conds list isn't an equality leave it in new_conds + list. + + The equality can't be converted into the multiple equality if it + is a knowingly false or true equality. + For example, (3 = 1) equality. + */ + while ((item=li++)) + { + if (item->type() == Item::FUNC_ITEM && + ((Item_func *) item)->functype() == Item_func::EQ_FUNC && + check_simple_equality(thd, + Item::Context(Item::ANY_SUBST, + ((Item_func_eq *)item)->compare_type_handler(), + ((Item_func_eq *)item)->compare_collation()), + ((Item_func *)item)->arguments()[0], + ((Item_func *)item)->arguments()[1], + &new_cond_equal)) + li.remove(); + } + + it.rewind(); + if (cond && cond->type() == Item::COND_ITEM && + ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* + Case when cond is an AND-condition. + Union AND-condition cond, created multiple equalities from + new_cond_equal and remaining conditions from new_conds. + */ + COND_EQUAL *cond_equal= &((Item_cond_and *) cond)->m_cond_equal; + List *cond_equalities= &cond_equal->current_level; + List *and_args= ((Item_cond_and *)cond)->argument_list(); + + /* + Disjoin multiple equalities of cond. + Merge these multiple equalities with the multiple equalities of + new_cond_equal. Save the result in new_cond_equal. + Check if after the merge some multiple equalities are knowingly + true or false. + */ + and_args->disjoin((List *) cond_equalities); + while ((mult_eq= it++)) + { + mult_eq->upper_levels= 0; + mult_eq->merge_into_list(thd, cond_equalities, false, false); + } + List_iterator_fast ei(*cond_equalities); + while ((mult_eq= ei++)) + { + if (mult_eq->const_item() && !mult_eq->val_int()) + is_simplified_cond= true; + else + { + mult_eq->unfix_fields(); + if (mult_eq->fix_fields(thd, NULL)) + return NULL; + } + } + + li.rewind(); + while ((item=li++)) + { + /* + There still can be some equalities at not top level of new_conds + conditions that are not transformed into multiple equalities. + To transform them build_item_equal() is called. + + Examples of not top level equalities: + + 1. (t1.a = 3) OR (t1.b > 5) + (t1.a = 3) - not top level equality. + It is inside OR condition + + 2. ((t3.d = t3.c) AND (t3.c < 15)) OR (t3.d > 1) + (t1.d = t3.c) - not top level equality. + It is inside AND condition which is a part of OR condition + */ + if (item->type() == Item::COND_ITEM && + ((Item_cond *)item)->functype() == Item_func::COND_OR_FUNC) + { + item= item->build_equal_items(thd, + &((Item_cond_and *) cond)->m_cond_equal, + false, NULL); + } + and_args->push_back(item, thd->mem_root); + } + and_args->append((List *) cond_equalities); + *cond_eq= &((Item_cond_and *) cond)->m_cond_equal; + } + else + { + /* + Case when cond isn't an AND-condition or is NULL. + There can be several cases: + + 1. cond is a multiple equality. + In this case merge cond with the multiple equalities of + new_cond_equal. + Create new condition from the created multiple equalities + and new_conds list conditions. + 2. cond is NULL + Create new condition from new_conds list conditions + and multiple equalities from new_cond_equal. + 3. Otherwise + Create new condition through union of cond, conditions from new_conds + list and created multiple equalities from new_cond_equal. + */ + List new_conds_list; + /* Flag is set to true if cond is a multiple equality */ + bool is_mult_eq= (cond && cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC); + + /* + If cond is non-empty and is not multiple equality save it as + a part of a new condition. + */ + if (cond && !is_mult_eq && + new_conds_list.push_back(cond, thd->mem_root)) + return NULL; + + /* + If cond is a multiple equality merge it with new_cond_equal + multiple equalities. + */ + if (is_mult_eq) + { + Item_equal *eq_cond= (Item_equal *)cond; + eq_cond->upper_levels= 0; + eq_cond->merge_into_list(thd, &new_cond_equal.current_level, + false, false); + } + + /** + Fix created multiple equalities and check if they are knowingly + true or false. + */ + List_iterator_fast ei(new_cond_equal.current_level); + while ((mult_eq=ei++)) + { + if (mult_eq->const_item() && !mult_eq->val_int()) + is_simplified_cond= true; + else + { + mult_eq->unfix_fields(); + if (mult_eq->fix_fields(thd, NULL)) + return NULL; + } + } + + /* + Create AND condition if new condition will have two or + more elements. + */ + Item_cond_and *and_cond= 0; + COND_EQUAL *inherited= 0; + if (new_conds_list.elements + + new_conds.elements + + new_cond_equal.current_level.elements > 1) + { + and_cond= new (thd->mem_root) Item_cond_and(thd); + and_cond->m_cond_equal.copy(new_cond_equal); + inherited= &and_cond->m_cond_equal; + } + + li.rewind(); + while ((item=li++)) + { + /* + Look for the comment in the case when cond is an + AND condition above the build_equal_items() call. + */ + if (item->type() == Item::COND_ITEM && + ((Item_cond *)item)->functype() == Item_func::COND_OR_FUNC) + { + item= item->build_equal_items(thd, inherited, false, NULL); + } + new_conds_list.push_back(item, thd->mem_root); + } + new_conds_list.append((List *)&new_cond_equal.current_level); + + if (and_cond) + { + and_cond->argument_list()->append(&new_conds_list); + cond= (Item *)and_cond; + *cond_eq= &((Item_cond_and *) cond)->m_cond_equal; + } + else + { + List_iterator_fast iter(new_conds_list); + cond= iter++; + if (cond->type() == Item::FUNC_ITEM && + ((Item_func *)cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + if (!(*cond_eq)) + *cond_eq= new COND_EQUAL(); + (*cond_eq)->copy(new_cond_equal); + } + else + *cond_eq= 0; + } + } + + if (!cond) + return NULL; + + if (*cond_eq) + { + /* + The multiple equalities are attached only to the upper level + of AND-condition cond. + Push them down to the bottom levels of cond AND-condition if needed. + */ + propagate_new_equalities(thd, cond, + &(*cond_eq)->current_level, + 0, + &is_simplified_cond); + cond= cond->propagate_equal_fields(thd, + Item::Context_boolean(), + *cond_eq); + cond->update_used_tables(); + } + /* Check if conds has knowingly true or false parts. */ + if (cond && + !is_simplified_cond && + cond->walk(&Item::is_simplified_cond_processor, 0, 0)) + is_simplified_cond= true; + + + /* + If it was found that there are some knowingly true or false equalities + remove them from cond and set cond_value to the appropriate value. + */ + if (cond && is_simplified_cond) + cond= cond->remove_eq_conds(thd, cond_value, true); + + if (cond && cond->fix_fields_if_needed(thd, NULL)) + return NULL; + + return cond; +} + + +/** + @brief Materialize a degenerate jtbm semi join + + @param thd thread handler + @param tbl table list for the target jtbm semi join table + @param subq_pred IN subquery predicate with the degenerate jtbm semi join + @param eq_list IN/OUT the list where to add produced equalities + + @details + The method materializes the degenerate jtbm semi join for the + subquery from the IN subquery predicate subq_pred taking table + as the target for materialization. + Any degenerate table is guaranteed to produce 0 or 1 record. + Examples of both cases: + + select * from ot where col in (select ... from it where 2>3) + select * from ot where col in (select MY_MIN(it.key) from it) + + in this case, there is no necessity to create a temp.table for + materialization. + We now just need to + 1. Check whether 1 or 0 records are produced, setup this as a + constant join tab. + 2. Create a dummy temporary table, because all of the join + optimization code relies on TABLE object being present. + + In the case when materialization produces one row the function + additionally creates equalities between the expressions from the + left part of the IN subquery predicate and the corresponding + columns of the produced row. These equalities are added to the + list eq_list. They are supposed to be conjuncted with the condition + of the WHERE clause. + + @retval TRUE if an error occurs + @retval FALSE otherwise +*/ + +bool execute_degenerate_jtbm_semi_join(THD *thd, + TABLE_LIST *tbl, + Item_in_subselect *subq_pred, + List &eq_list) +{ + DBUG_ENTER("execute_degenerate_jtbm_semi_join"); + select_value_catcher *new_sink; + + DBUG_ASSERT(subq_pred->engine->engine_type() == + subselect_engine::SINGLE_SELECT_ENGINE); + subselect_single_select_engine *engine= + (subselect_single_select_engine*)subq_pred->engine; + if (!(new_sink= new (thd->mem_root) select_value_catcher(thd, subq_pred))) + DBUG_RETURN(TRUE); + if (new_sink->setup(&engine->select_lex->join->fields_list) || + engine->select_lex->join->change_result(new_sink, NULL) || + engine->exec()) + { + DBUG_RETURN(TRUE); + } + subq_pred->is_jtbm_const_tab= TRUE; + + if (new_sink->assigned) + { + /* + Subselect produced one row, which is saved in new_sink->row. + Save "left_expr[i] == row[i]" equalities into the eq_list. + */ + subq_pred->jtbm_const_row_found= TRUE; + + Item *eq_cond; + Item *left_exp= subq_pred->left_exp(); + uint ncols= left_exp->cols(); + for (uint i= 0; i < ncols; i++) + { + eq_cond= + new (thd->mem_root) Item_func_eq(thd, + left_exp->element_index(i), + new_sink->row[i]); + if (!eq_cond || eq_cond->fix_fields(thd, NULL) || + eq_list.push_back(eq_cond, thd->mem_root)) + DBUG_RETURN(TRUE); + } + } + else + { + /* Subselect produced no rows. Just set the flag */ + subq_pred->jtbm_const_row_found= FALSE; + } + + TABLE *dummy_table; + if (!(dummy_table= create_dummy_tmp_table(thd))) + DBUG_RETURN(TRUE); + tbl->table= dummy_table; + tbl->table->pos_in_table_list= tbl; + /* + Note: the table created above may be freed by: + 1. JOIN_TAB::cleanup(), when the parent join is a regular join. + 2. cleanup_empty_jtbm_semi_joins(), when the parent join is a + degenerate join (e.g. one with "Impossible where"). + */ + setup_table_map(tbl->table, tbl, tbl->jtbm_table_no); + DBUG_RETURN(FALSE); +} + + +/** + @brief + Execute degenerate jtbm semi joins before optimize_cond() for parent + + @param join the parent join for jtbm semi joins + @param join_list the list of tables where jtbm semi joins are processed + @param eq_list IN/OUT the list where to add equalities produced after + materialization of single-row degenerate jtbm semi joins + + @details + The method traverses join_list trying to find any degenerate jtbm semi + joins for subqueries of IN predicates. For each degenerate jtbm + semi join execute_degenerate_jtbm_semi_join() is called. As a result + of this call new equalities that substitute for single-row materialized + jtbm semi join are added to eq_list. + + In the case when a table is nested in another table 'nested_join' the + method is recursively called for the join_list of the 'nested_join' trying + to find in the list any degenerate jtbm semi joins. Currently a jtbm semi + join may occur in a mergeable semi join nest. + + @retval TRUE if an error occurs + @retval FALSE otherwise +*/ + +bool setup_degenerate_jtbm_semi_joins(JOIN *join, + List *join_list, + List &eq_list) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + List_iterator li(*join_list); + THD *thd= join->thd; + DBUG_ENTER("setup_degenerate_jtbm_semi_joins"); + + while ((table= li++)) + { + Item_in_subselect *subq_pred; + + if ((subq_pred= table->jtbm_subselect)) + { + JOIN *subq_join= subq_pred->unit->first_select()->join; + + if (!subq_join->tables_list || !subq_join->table_count) + { + if (execute_degenerate_jtbm_semi_join(thd, + table, + subq_pred, + eq_list)) + DBUG_RETURN(TRUE); + join->is_orig_degenerated= true; + } + } + if ((nested_join= table->nested_join)) + { + if (setup_degenerate_jtbm_semi_joins(join, + &nested_join->join_list, + eq_list)) + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/** + @brief + Optimize jtbm semi joins for materialization + + @param join the parent join for jtbm semi joins + @param join_list the list of TABLE_LIST objects where jtbm semi join + can occur + @param eq_list IN/OUT the list where to add produced equalities + + @details + This method is called by the optimizer after the call of + optimize_cond() for parent select. + The method traverses join_list trying to find any jtbm semi joins for + subqueries from IN predicates and optimizes them. + After the optimization some of jtbm semi joins may become degenerate. + For example the subquery 'SELECT MAX(b) FROM t2' from the query + + SELECT * FROM t1 WHERE 4 IN (SELECT MAX(b) FROM t2); + + will become degenerate if there is an index on t2.b. + If a subquery becomes degenerate it is handled by the function + execute_degenerate_jtbm_semi_join(). + + Otherwise the method creates a temporary table in which the subquery + of the jtbm semi join will be materialied. + + The function saves the equalities between all pairs of the expressions + from the left part of the IN subquery predicate and the corresponding + columns of the subquery from the predicate in eq_list appending them + to the list. The equalities of eq_list will be later conjucted with the + condition of the WHERE clause. + + In the case when a table is nested in another table 'nested_join' the + method is recursively called for the join_list of the 'nested_join' trying + to find in the list any degenerate jtbm semi joins. Currently a jtbm semi + join may occur in a mergeable semi join nest. + + @retval TRUE if an error occurs + @retval FALSE otherwise +*/ + +bool setup_jtbm_semi_joins(JOIN *join, List *join_list, + List &eq_list) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + List_iterator li(*join_list); + THD *thd= join->thd; + DBUG_ENTER("setup_jtbm_semi_joins"); + + while ((table= li++)) + { + Item_in_subselect *subq_pred; + + if ((subq_pred= table->jtbm_subselect)) + { + double rows; + double read_time; + + /* + Perform optimization of the subquery, so that we know estimated + - cost of materialization process + - how many records will be in the materialized temp.table + */ + if (subq_pred->optimize(&rows, &read_time)) + DBUG_RETURN(TRUE); + + subq_pred->jtbm_read_time= read_time; + subq_pred->jtbm_record_count=rows; + JOIN *subq_join= subq_pred->unit->first_select()->join; + + if (!subq_join->tables_list || !subq_join->table_count) + { + if (!join->is_orig_degenerated && + execute_degenerate_jtbm_semi_join(thd, table, subq_pred, + eq_list)) + DBUG_RETURN(TRUE); + } + else + { + DBUG_ASSERT(subq_pred->test_set_strategy(SUBS_MATERIALIZATION)); + subq_pred->is_jtbm_const_tab= FALSE; + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)subq_pred->engine); + + table->table= hash_sj_engine->tmp_table; + table->table->pos_in_table_list= table; + + setup_table_map(table->table, table, table->jtbm_table_no); + + List_iterator li(*hash_sj_engine->semi_join_conds->argument_list()); + Item *item; + while ((item=li++)) + { + item->update_used_tables(); + if (eq_list.push_back(item, thd->mem_root)) + DBUG_RETURN(TRUE); + } + } + table->table->maybe_null= MY_TEST(join->mixed_implicit_grouping); + } + if ((nested_join= table->nested_join)) + { + if (setup_jtbm_semi_joins(join, &nested_join->join_list, eq_list)) + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/* + Cleanup non-merged semi-joins (JBMs) that have empty. + + This function is to cleanups for a special case: + Consider a query like + + select * from t1 where 1=2 AND t1.col IN (select max(..) ... having 1=2) + + For this query, optimization of subquery will short-circuit, and + setup_jtbm_semi_joins() will call create_dummy_tmp_table() so that we have + empty, constant temp.table to stand in as materialized temp. table. + + Now, suppose that the upper join is also found to be degenerate. In that + case, no JOIN_TAB array will be produced, and hence, JOIN::cleanup() will + have a problem with cleaning up empty JTBMs (non-empty ones are cleaned up + through Item::cleanup() calls). +*/ + +void cleanup_empty_jtbm_semi_joins(JOIN *join, List *join_list) +{ + List_iterator li(*join_list); + TABLE_LIST *table; + while ((table= li++)) + { + if ((table->jtbm_subselect && table->jtbm_subselect->is_jtbm_const_tab)) + { + if (table->table) + { + free_tmp_table(join->thd, table->table); + table->table= NULL; + } + } + else if (table->nested_join && table->sj_subq_pred) + { + cleanup_empty_jtbm_semi_joins(join, &table->nested_join->join_list); + } + } +} + + +/** + Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate + based on cost. + + @param join_tables the set of tables joined in the subquery + + @notes + The method chooses between the materialization and IN=>EXISTS rewrite + strategies for the execution of a non-flattened subquery IN predicate. + The cost-based decision is made as follows: + + 1. compute materialize_strategy_cost based on the unmodified subquery + 2. reoptimize the subquery taking into account the IN-EXISTS predicates + 3. compute in_exists_strategy_cost based on the reoptimized plan + 4. compare and set the cheaper strategy + if (materialize_strategy_cost >= in_exists_strategy_cost) + in_strategy = MATERIALIZATION + else + in_strategy = IN_TO_EXISTS + 5. if in_strategy = MATERIALIZATION and it is not possible to initialize it + revert to IN_TO_EXISTS + 6. if (in_strategy == MATERIALIZATION) + revert the subquery plan to the original one before reoptimizing + else + inject the IN=>EXISTS predicates into the new EXISTS subquery plan + + The implementation itself is a bit more complicated because it takes into + account two more factors: + - whether the user allowed both strategies through an optimizer_switch, and + - if materialization was the cheaper strategy, whether it can be executed + or not. + + @retval FALSE success. + @retval TRUE error occurred. +*/ + +bool JOIN::choose_subquery_plan(table_map join_tables) +{ + enum_reopt_result reopt_result= REOPT_NONE; + Item_in_subselect *in_subs; + + /* + IN/ALL/ANY optimizations are not applicable for so called fake select + (this select exists only to filter results of union if it is needed). + */ + if (select_lex == select_lex->master_unit()->fake_select_lex) + return 0; + + if (is_in_subquery()) + { + in_subs= unit->item->get_IN_subquery(); + if (in_subs->create_in_to_exists_cond(this)) + return true; + } + else + return false; + + /* A strategy must be chosen earlier. */ + DBUG_ASSERT(in_subs->has_strategy()); + DBUG_ASSERT(in_to_exists_where || in_to_exists_having); + DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed()); + DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed()); + + /* The original QEP of the subquery. */ + Join_plan_state save_qep(table_count); + + /* + Compute and compare the costs of materialization and in-exists if both + strategies are possible and allowed by the user (checked during the prepare + phase. + */ + if (in_subs->test_strategy(SUBS_MATERIALIZATION) && + in_subs->test_strategy(SUBS_IN_TO_EXISTS)) + { + JOIN *outer_join; + JOIN *inner_join= this; + /* Number of unique value combinations filtered by the IN predicate. */ + double outer_lookup_keys; + /* Cost and row count of the unmodified subquery. */ + double inner_read_time_1, inner_record_count_1; + /* Cost of the subquery with injected IN-EXISTS predicates. */ + double inner_read_time_2; + /* The cost to compute IN via materialization. */ + double materialize_strategy_cost; + /* The cost of the IN->EXISTS strategy. */ + double in_exists_strategy_cost; + double dummy; + + /* + A. Estimate the number of rows of the outer table that will be filtered + by the IN predicate. + */ + outer_join= unit->outer_select() ? unit->outer_select()->join : NULL; + /* + Get the cost of the outer join if: + (1) It has at least one table, and + (2) It has been already optimized (if there is no join_tab, then the + outer join has not been optimized yet). + */ + if (outer_join && outer_join->table_count > 0 && // (1) + outer_join->join_tab && // (2) + !in_subs->const_item()) + { + /* + TODO: + Currently outer_lookup_keys is computed as the number of rows in + the partial join including the JOIN_TAB where the IN predicate is + pushed to. In the general case this is a gross overestimate because + due to caching we are interested only in the number of unique keys. + The search key may be formed by columns from much fewer than all + tables in the partial join. Example: + select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...); + If the join order: t1, t2, the number of unique lookup keys is ~ to + the number of unique values t2.c2 in the partial join t1 join t2. + */ + outer_join->get_partial_cost_and_fanout(in_subs->get_join_tab_idx(), + table_map(-1), + &dummy, + &outer_lookup_keys); + } + else + { + /* + TODO: outer_join can be NULL for DELETE statements. + How to compute its cost? + */ + outer_lookup_keys= 1; + } + + /* + B. Estimate the cost and number of records of the subquery both + unmodified, and with injected IN->EXISTS predicates. + */ + inner_read_time_1= inner_join->best_read; + inner_record_count_1= inner_join->join_record_count; + + if (in_to_exists_where && const_tables != table_count) + { + /* + Re-optimize and cost the subquery taking into account the IN-EXISTS + conditions. + */ + reopt_result= reoptimize(in_to_exists_where, join_tables, &save_qep); + if (reopt_result == REOPT_ERROR) + return TRUE; + + /* Get the cost of the modified IN-EXISTS plan. */ + inner_read_time_2= inner_join->best_read; + + } + else + { + /* Reoptimization would not produce any better plan. */ + inner_read_time_2= inner_read_time_1; + } + + /* + C. Compute execution costs. + */ + /* C.1 Compute the cost of the materialization strategy. */ + //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list); + uint rowlen= get_tmp_table_rec_length(ref_ptrs, + select_lex->item_list.elements); + /* The cost of writing one row into the temporary table. */ + double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1, + rowlen); + /* The cost of a lookup into the unique index of the materialized table. */ + double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1, + rowlen); + /* + The cost of executing the subquery and storing its result in an indexed + temporary table. + */ + double materialization_cost= COST_ADD(inner_read_time_1, + COST_MULT(write_cost, + inner_record_count_1)); + + materialize_strategy_cost= COST_ADD(materialization_cost, + COST_MULT(outer_lookup_keys, + lookup_cost)); + + /* C.2 Compute the cost of the IN=>EXISTS strategy. */ + in_exists_strategy_cost= COST_MULT(outer_lookup_keys, inner_read_time_2); + + /* C.3 Compare the costs and choose the cheaper strategy. */ + if (materialize_strategy_cost >= in_exists_strategy_cost) + in_subs->set_strategy(SUBS_IN_TO_EXISTS); + else + in_subs->set_strategy(SUBS_MATERIALIZATION); + + DBUG_PRINT("info", + ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f", + materialize_strategy_cost, materialization_cost, write_cost, lookup_cost)); + DBUG_PRINT("info", + ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f", + in_exists_strategy_cost, inner_read_time_2)); + DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys)); + } + + /* + If (1) materialization is a possible strategy based on semantic analysis + during the prepare phase, then if + (2) it is more expensive than the IN->EXISTS transformation, and + (3) it is not possible to create usable indexes for the materialization + strategy, + fall back to IN->EXISTS. + otherwise + use materialization. + */ + if (in_subs->test_strategy(SUBS_MATERIALIZATION) && + in_subs->setup_mat_engine()) + { + /* + If materialization was the cheaper or the only user-selected strategy, + but it is not possible to execute it due to limitations in the + implementation, fall back to IN-TO-EXISTS. + */ + in_subs->set_strategy(SUBS_IN_TO_EXISTS); + } + + if (in_subs->test_strategy(SUBS_MATERIALIZATION)) + { + /* Restore the original query plan used for materialization. */ + if (reopt_result == REOPT_NEW_PLAN) + restore_query_plan(&save_qep); + + in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED; + + /* + Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec. + TODO: + Currently we set the subquery LIMIT to infinity, and this is correct + because we forbid at parse time LIMIT inside IN subqueries (see + Item_in_subselect::test_limit). However, once we allow this, here + we should set the correct limit if given in the query. + */ + in_subs->unit->global_parameters()->limit_params.select_limit= NULL; + in_subs->unit->set_limit(unit->global_parameters()); + /* + Set the limit of this JOIN object as well, because normally its being + set in the beginning of JOIN::optimize, which was already done. + */ + select_limit= in_subs->unit->lim.get_select_limit(); + } + else if (in_subs->test_strategy(SUBS_IN_TO_EXISTS)) + { + if (reopt_result == REOPT_NONE && in_to_exists_where && + const_tables != table_count) + { + /* + The subquery was not reoptimized with the newly injected IN-EXISTS + conditions either because the user allowed only the IN-EXISTS strategy, + or because materialization was not possible based on semantic analysis. + */ + reopt_result= reoptimize(in_to_exists_where, join_tables, NULL); + if (reopt_result == REOPT_ERROR) + return TRUE; + } + + if (in_subs->inject_in_to_exists_cond(this)) + return TRUE; + /* + If the injected predicate is correlated the IN->EXISTS transformation + make the subquery dependent. + */ + if ((in_to_exists_where && + in_to_exists_where->used_tables() & OUTER_REF_TABLE_BIT) || + (in_to_exists_having && + in_to_exists_having->used_tables() & OUTER_REF_TABLE_BIT)) + { + in_subs->unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + } + select_limit= 1; + } + else + DBUG_ASSERT(FALSE); + + return FALSE; +} + + +/** + Choose a query plan for a table-less subquery. + + @notes + + @retval FALSE success. + @retval TRUE error occurred. +*/ + +bool JOIN::choose_tableless_subquery_plan() +{ + DBUG_ASSERT(!tables_list || !table_count); + if (unit->item) + { + DBUG_ASSERT(unit->item->type() == Item::SUBSELECT_ITEM); + Item_subselect *subs_predicate= unit->item; + + /* + If the optimizer determined that his query has an empty result, + in most cases the subquery predicate is a known constant value - + either of TRUE, FALSE or NULL. The implementation of + Item_subselect::no_rows_in_result() determines which one. + */ + if (zero_result_cause) + { + if (!implicit_grouping) + { + /* + Both group by queries and non-group by queries without aggregate + functions produce empty subquery result. There is no need to further + rewrite the subquery because it will not be executed at all. + */ + exec_const_cond= 0; + return FALSE; + } + + /* @todo + A further optimization is possible when a non-group query with + MIN/MAX/COUNT is optimized by opt_sum_query. Then, if there are + only MIN/MAX functions over an empty result set, the subquery + result is a NULL value/row, thus the value of subs_predicate is + NULL. + */ + } + + /* + For IN subqueries, use IN->EXISTS transfomation, unless the subquery + has been converted to a JTBM semi-join. In that case, just leave + everything as-is, setup_jtbm_semi_joins() has special handling for cases + like this. + */ + Item_in_subselect *in_subs; + in_subs= subs_predicate->get_IN_subquery(); + if (in_subs && + !(subs_predicate->substype() == Item_subselect::IN_SUBS && + in_subs->is_jtbm_merged)) + { + in_subs->set_strategy(SUBS_IN_TO_EXISTS); + if (in_subs->create_in_to_exists_cond(this) || + in_subs->inject_in_to_exists_cond(this)) + return TRUE; + tmp_having= having; + } + } + exec_const_cond= zero_result_cause ? 0 : conds; + return FALSE; +} + + +bool Item::pushable_equality_checker_for_subquery(uchar *arg) +{ + return + get_corresponding_field_pair(this, + ((Item *)arg)->get_IN_subquery()-> + corresponding_fields); +} + + +/* + Checks if 'item' or some item equal to it is equal to the field from + some Field_pair of 'pair_list' and returns matching Field_pair or + NULL if the matching Field_pair wasn't found. +*/ + +Field_pair *find_matching_field_pair(Item *item, List pair_list) +{ + Field_pair *field_pair= get_corresponding_field_pair(item, pair_list); + if (field_pair) + return field_pair; + + Item_equal *item_equal= item->get_item_equal(); + if (item_equal) + { + Item_equal_fields_iterator it(*item_equal); + Item *equal_item; + while ((equal_item= it++)) + { + if (equal_item->const_item()) + continue; + field_pair= get_corresponding_field_pair(equal_item, pair_list); + if (field_pair) + return field_pair; + } + } + return NULL; +} + + +bool Item_field::excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) +{ + if (find_matching_field_pair(((Item *) this), subq_pred->corresponding_fields)) + return true; + return false; +} + + +bool Item_direct_view_ref::excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) +{ + if (item_equal) + { + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + if (get_corresponding_field_pair(((Item *)this), subq_pred->corresponding_fields)) + return true; + } + return (*ref)->excl_dep_on_in_subq_left_part(subq_pred); +} + + +bool Item_equal::excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) +{ + Item *left_item = get_const(); + Item_equal_fields_iterator it(*this); + Item *item; + if (!left_item) + { + while ((item=it++)) + { + if (item->excl_dep_on_in_subq_left_part(subq_pred)) + { + left_item= item; + break; + } + } + } + if (!left_item) + return false; + while ((item=it++)) + { + if (item->excl_dep_on_in_subq_left_part(subq_pred)) + return true; + } + return false; +} + + +/** + @brief + Get corresponding item from the select of the right part of IN subquery + + @param thd the thread handle + @param item the item from the left part of subq_pred for which + corresponding item should be found + @param subq_pred the IN subquery predicate + + @details + This method looks through the fields of the select of the right part of + the IN subquery predicate subq_pred trying to find the corresponding + item 'new_item' for item. If item has equal items it looks through + the fields of the select of the right part of subq_pred for each equal + item trying to find the corresponding item. + The method assumes that the given item is either a field item or + a reference to a field item. + + @retval reference to the corresponding item + @retval NULL if item was not found +*/ + +static +Item *get_corresponding_item(THD *thd, Item *item, + Item_in_subselect *subq_pred) +{ + DBUG_ASSERT(item->type() == Item::FIELD_ITEM || + (item->type() == Item::REF_ITEM && + ((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF)); + + Field_pair *field_pair; + Item_equal *item_equal= item->get_item_equal(); + + if (item_equal) + { + Item_equal_fields_iterator it(*item_equal); + Item *equal_item; + while ((equal_item= it++)) + { + field_pair= + get_corresponding_field_pair(equal_item, subq_pred->corresponding_fields); + if (field_pair) + return field_pair->corresponding_item; + } + } + else + { + field_pair= + get_corresponding_field_pair(item, subq_pred->corresponding_fields); + if (field_pair) + return field_pair->corresponding_item; + } + return NULL; +} + + +Item *Item_field::in_subq_field_transformer_for_where(THD *thd, uchar *arg) +{ + Item_in_subselect *subq_pred= ((Item *)arg)->get_IN_subquery(); + Item *producing_item= get_corresponding_item(thd, this, subq_pred); + if (producing_item) + return producing_item->build_clone(thd); + return this; +} + + +Item *Item_direct_view_ref::in_subq_field_transformer_for_where(THD *thd, + uchar *arg) +{ + if (item_equal) + { + Item_in_subselect *subq_pred= ((Item *)arg)->get_IN_subquery(); + Item *producing_item= get_corresponding_item(thd, this, subq_pred); + DBUG_ASSERT (producing_item != NULL); + return producing_item->build_clone(thd); + } + return this; +} + + +/** + @brief + Transforms item so it can be pushed into the IN subquery HAVING clause + + @param thd the thread handle + @param in_item the item for which pushable item should be created + @param subq_pred the IN subquery predicate + + @details + This method finds for in_item that is a field from the left part of the + IN subquery predicate subq_pred its corresponding item from the right part + of subq_pred. + If corresponding item is found, a shell for this item is created. + This shell can be pushed into the HAVING part of subq_pred select. + + @retval reference to the created corresponding item shell for in_item + @retval NULL if mistake occurs +*/ + +static Item* +get_corresponding_item_for_in_subq_having(THD *thd, Item *in_item, + Item_in_subselect *subq_pred) +{ + Item *new_item= get_corresponding_item(thd, in_item, subq_pred); + + if (new_item) + { + Item_ref *ref= + new (thd->mem_root) Item_ref(thd, + &subq_pred->unit->first_select()->context, + new_item->name); + if (!ref) + DBUG_ASSERT(0); + return ref; + } + return new_item; +} + + +Item *Item_field::in_subq_field_transformer_for_having(THD *thd, uchar *arg) +{ + DBUG_ASSERT(((Item *)arg)->get_IN_subquery()); + return get_corresponding_item_for_in_subq_having(thd, this, + (Item_in_subselect *)arg); +} + + +Item *Item_direct_view_ref::in_subq_field_transformer_for_having(THD *thd, + uchar *arg) +{ + if (!item_equal) + return this; + else + { + DBUG_ASSERT(((Item *)arg)->get_IN_subquery()); + Item *new_item= get_corresponding_item_for_in_subq_having(thd, this, + (Item_in_subselect *)arg); + if (!new_item) + return this; + return new_item; + } +} + + +/** + @brief + Find fields that are used in the GROUP BY of the select + + @param thd the thread handle + @param sel the select of the IN subquery predicate + @param fields fields of the left part of the IN subquery predicate + @param grouping_list GROUP BY clause + + @details + This method traverses fields which are used in the GROUP BY of + sel and saves them with their corresponding items from fields. +*/ + +bool grouping_fields_in_the_in_subq_left_part(THD *thd, + st_select_lex *sel, + List *fields, + ORDER *grouping_list) +{ + DBUG_ENTER("grouping_fields_in_the_in_subq_left_part"); + sel->grouping_tmp_fields.empty(); + List_iterator it(*fields); + Field_pair *item; + while ((item= it++)) + { + for (ORDER *ord= grouping_list; ord; ord= ord->next) + { + if ((*ord->item)->eq(item->corresponding_item, 0)) + { + if (sel->grouping_tmp_fields.push_back(item, thd->mem_root)) + DBUG_RETURN(TRUE); + } + } + } + DBUG_RETURN(FALSE); +} + + +/** + @brief + Extract condition that can be pushed into select of this IN subquery + + @param thd the thread handle + @param cond current condition + + @details + This function builds the most restrictive condition depending only on + the list of fields of the left part of this IN subquery predicate + (directly or indirectly through equality) that can be extracted from the + given condition cond and pushes it into this IN subquery. + + Example of the transformation: + + SELECT * FROM t1 + WHERE a>3 AND b>10 AND + (a,b) IN (SELECT x,MAX(y) FROM t2 GROUP BY x); + + => + + SELECT * FROM t1 + WHERE a>3 AND b>10 AND + (a,b) IN (SELECT x,max(y) + FROM t2 + WHERE x>3 + GROUP BY x + HAVING MAX(y)>10); + + + In details: + 1. Check what pushable formula can be extracted from cond + 2. Build a clone PC of the formula that can be extracted + (the clone is built only if the extracted formula is a AND subformula + of cond or conjunction of such subformulas) + 3. If there is no HAVING clause prepare PC to be conjuncted with + WHERE clause of this subquery. Otherwise do 4-7. + 4. Check what formula PC_where can be extracted from PC to be pushed + into the WHERE clause of the subquery + 5. Build PC_where and if PC_where is a conjunct(s) of PC remove it from PC + getting PC_having + 6. Prepare PC_where to be conjuncted with the WHERE clause of + the IN subquery + 7. Prepare PC_having to be conjuncted with the HAVING clause of + the IN subquery + + @note + This method is similar to pushdown_cond_for_derived() + + @retval TRUE if an error occurs + @retval FALSE otherwise +*/ + +bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond) +{ + DBUG_ENTER("Item_in_subselect::pushdown_cond_for_in_subquery"); + Item *remaining_cond= NULL; + + if (!cond) + DBUG_RETURN(FALSE); + + st_select_lex *sel = unit->first_select(); + + if (is_jtbm_const_tab) + DBUG_RETURN(FALSE); + + if (!sel->cond_pushdown_is_allowed()) + DBUG_RETURN(FALSE); + + /* + Create a list of Field_pair items for this IN subquery. + It consists of the pairs of fields from the left part of this IN subquery + predicate 'left_part' and the respective fields from the select of the + right part of the IN subquery 'sel' (the field from left_part with the + corresponding field from the sel projection list). + Attach this list to the IN subquery. + */ + corresponding_fields.empty(); + List_iterator_fast it(sel->join->fields_list); + Item *item; + for (uint i= 0; i < left_expr->cols(); i++) + { + item= it++; + Item *elem= left_expr->element_index(i); + + if (elem->real_item()->type() != Item::FIELD_ITEM) + continue; + + if (corresponding_fields.push_back( + new Field_pair(((Item_field *)(elem->real_item()))->field, + item))) + DBUG_RETURN(TRUE); + } + + /* 1. Check what pushable formula can be extracted from cond */ + Item *extracted_cond; + cond->check_pushable_cond(&Item::pushable_cond_checker_for_subquery, + (uchar *)this); + /* 2. Build a clone PC of the formula that can be extracted */ + extracted_cond= + cond->build_pushable_cond(thd, + &Item::pushable_equality_checker_for_subquery, + (uchar *)this); + /* Nothing to push */ + if (!extracted_cond) + { + DBUG_RETURN(FALSE); + } + + /* Collect fields that are used in the GROUP BY of sel */ + st_select_lex *save_curr_select= thd->lex->current_select; + if (sel->have_window_funcs()) + { + if (sel->group_list.first || sel->join->implicit_grouping) + goto exit; + ORDER *common_partition_fields= + sel->find_common_window_func_partition_fields(thd); + if (!common_partition_fields) + goto exit; + + if (grouping_fields_in_the_in_subq_left_part(thd, sel, &corresponding_fields, + common_partition_fields)) + DBUG_RETURN(TRUE); + } + else if (grouping_fields_in_the_in_subq_left_part(thd, sel, + &corresponding_fields, + sel->group_list.first)) + DBUG_RETURN(TRUE); + + /* Do 4-6 */ + sel->pushdown_cond_into_where_clause(thd, extracted_cond, + &remaining_cond, + &Item::in_subq_field_transformer_for_where, + (uchar *) this); + if (!remaining_cond) + goto exit; + /* + 7. Prepare PC_having to be conjuncted with the HAVING clause of + the IN subquery + */ + remaining_cond= + remaining_cond->transform(thd, + &Item::in_subq_field_transformer_for_having, + (uchar *)this); + if (!remaining_cond || + remaining_cond->walk(&Item::cleanup_excluding_const_fields_processor, + 0, 0)) + goto exit; + + mark_or_conds_to_avoid_pushdown(remaining_cond); + + sel->cond_pushed_into_having= remaining_cond; + +exit: + thd->lex->current_select= save_curr_select; + DBUG_RETURN(FALSE); +} + +/* + @brief + Check if a table is a SJM Scan table + + @retval + TRUE SJM scan table + FALSE Otherwise +*/ +bool TABLE_LIST::is_sjm_scan_table() +{ + return is_active_sjm() && sj_mat_info->is_sj_scan; +} diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h new file mode 100644 index 00000000..7b1b810e --- /dev/null +++ b/sql/opt_subselect.h @@ -0,0 +1,418 @@ +/* + Copyright (c) 2010, 2019, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Semi-join subquery optimization code definitions +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +int check_and_do_in_subquery_rewrites(JOIN *join); +bool convert_join_subqueries_to_semijoins(JOIN *join); +int pull_out_semijoin_tables(JOIN *join); +bool optimize_semijoin_nests(JOIN *join, table_map all_table_map); +bool setup_degenerate_jtbm_semi_joins(JOIN *join, + List *join_list, + List &eq_list); +bool setup_jtbm_semi_joins(JOIN *join, List *join_list, + List &eq_list); +void cleanup_empty_jtbm_semi_joins(JOIN *join, List *join_list); + +// used by Loose_scan_opt +ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, + table_map remaining_tables); + +/* + This is a class for considering possible loose index scan optimizations. + It's usage pattern is as follows: + best_access_path() + { + Loose_scan_opt opt; + + opt.init() + for each index we can do ref access with + { + opt.next_ref_key(); + for each keyuse + opt.add_keyuse(); + opt.check_ref_access(); + } + + if (some criteria for range scans) + opt.check_range_access(); + + opt.get_best_option(); + } +*/ + +class Loose_scan_opt +{ + /* All methods must check this before doing anything else */ + bool try_loosescan; + + /* + If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is + called sj-equality. If oeK depends only on preceding tables then such + equality is called 'bound'. + */ + ulonglong bound_sj_equalities; + + /* Accumulated properties of ref access we're now considering: */ + ulonglong handled_sj_equalities; + key_part_map loose_scan_keyparts; + uint max_loose_keypart; + bool part1_conds_met; + + /* + Use of quick select is a special case. Some of its properties: + */ + uint quick_uses_applicable_index; + uint quick_max_loose_keypart; + + /* Best loose scan method so far */ + uint best_loose_scan_key; + double best_loose_scan_cost; + double best_loose_scan_records; + KEYUSE *best_loose_scan_start_key; + + uint best_max_loose_keypart; + table_map best_ref_depend_map; + +public: + Loose_scan_opt(): + try_loosescan(false), + bound_sj_equalities(0), + quick_uses_applicable_index(0), + quick_max_loose_keypart(0), + best_loose_scan_key(0), + best_loose_scan_cost(0), + best_loose_scan_records(0), + best_loose_scan_start_key(NULL), + best_max_loose_keypart(0), + best_ref_depend_map(0) + { + } + + void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables) + { + /* + Discover the bound equalities. We need to do this if + 1. The next table is an SJ-inner table, and + 2. It is the first table from that semijoin, and + 3. We're not within a semi-join range (i.e. all semi-joins either have + all or none of their tables in join_table_map), except + s->emb_sj_nest (which we've just entered, see #2). + 4. All non-IN-equality correlation references from this sj-nest are + bound + 5. But some of the IN-equalities aren't (so this can't be handled by + FirstMatch strategy) + */ + best_loose_scan_cost= DBL_MAX; + if (!join->emb_sjm_nest && s->emb_sj_nest && // (1) + s->emb_sj_nest->sj_in_exprs < 64 && + ((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2) + s->emb_sj_nest->sj_inner_tables) && // (2) + join->cur_sj_inner_tables == 0 && // (3) + !(remaining_tables & + s->emb_sj_nest->nested_join->sj_corr_tables) && // (4) + remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5) + optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN)) + { + /* This table is an LooseScan scan candidate */ + bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, + remaining_tables); + try_loosescan= TRUE; + DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx", + (longlong)bound_sj_equalities)); + } + } + + void next_ref_key() + { + handled_sj_equalities=0; + loose_scan_keyparts= 0; + max_loose_keypart= 0; + part1_conds_met= FALSE; + } + + void add_keyuse(table_map remaining_tables, KEYUSE *keyuse) + { + if (try_loosescan && keyuse->sj_pred_no != UINT_MAX && + (keyuse->table->file->index_flags(keyuse->key, 0, 1 ) & HA_READ_ORDER)) + + { + if (!(remaining_tables & keyuse->used_tables)) + { + /* + This allows to use equality propagation to infer that some + sj-equalities are bound. + */ + bound_sj_equalities |= 1ULL << keyuse->sj_pred_no; + } + else + { + handled_sj_equalities |= 1ULL << keyuse->sj_pred_no; + loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart; + set_if_bigger(max_loose_keypart, keyuse->keypart); + } + } + } + + bool have_a_case() { return MY_TEST(handled_sj_equalities); } + + void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, + table_map found_part) + { + /* + Check if we can use LooseScan semi-join strategy. We can if + 1. This is the right table at right location + 2. All IN-equalities are either + - "bound", ie. the outer_expr part refers to the preceding tables + - "handled", ie. covered by the index we're considering + 3. Index order allows to enumerate subquery's duplicate groups in + order. This happens when the index definition matches this + pattern: + + (handled_col|bound_col)* (other_col|bound_col) + + */ + if (try_loosescan && // (1) + (handled_sj_equalities | bound_sj_equalities) == // (2) + PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2) + (PREV_BITS(key_part_map, max_loose_keypart+1) & // (3) + (found_part | loose_scan_keyparts)) == // (3) + PREV_BITS(key_part_map, max_loose_keypart+1) && // (3) + !key_uses_partial_cols(s->table->s, key)) + { + if (s->quick && s->quick->index == key && + s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + { + quick_uses_applicable_index= TRUE; + quick_max_loose_keypart= max_loose_keypart; + } + DBUG_PRINT("info", ("Can use LooseScan scan")); + + if (found_part & 1) + { + /* Can use LooseScan on ref access if the first key part is bound */ + part1_conds_met= TRUE; + } + + /* + Check if this is a special case where there are no usable bound + IN-equalities, i.e. we have + + outer_expr IN (SELECT innertbl.key FROM ...) + + and outer_expr cannot be evaluated yet, so it's actually full + index scan and not a ref access. + We can do full index scan if it uses index-only. + */ + if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ + s->table->covering_keys.is_set(key)) + { + part1_conds_met= TRUE; + DBUG_PRINT("info", ("Can use full index scan for LooseScan")); + + /* Calculate the cost of complete loose index scan. */ + double records= rows2double(s->table->file->stats.records); + + /* The cost is entire index scan cost (divided by 2) */ + double read_time= s->table->file->keyread_time(key, 1, + (ha_rows) records); + + /* + Now find out how many different keys we will get (for now we + ignore the fact that we have "keypart_i=const" restriction for + some key components, that may make us think think that loose + scan will produce more distinct records than it actually will) + */ + ulong rpc; + if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart])) + records= records / rpc; + + // TODO: previous version also did /2 + if (read_time < best_loose_scan_cost) + { + best_loose_scan_key= key; + best_loose_scan_cost= read_time; + best_loose_scan_records= records; + best_max_loose_keypart= max_loose_keypart; + best_loose_scan_start_key= start_key; + best_ref_depend_map= 0; + } + } + } + } + + void check_ref_access_part2(uint key, KEYUSE *start_key, double records, + double read_time, table_map ref_depend_map_arg) + { + if (part1_conds_met && read_time < best_loose_scan_cost) + { + /* TODO use rec-per-key-based fanout calculations */ + best_loose_scan_key= key; + best_loose_scan_cost= read_time; + best_loose_scan_records= records; + best_max_loose_keypart= max_loose_keypart; + best_loose_scan_start_key= start_key; + best_ref_depend_map= ref_depend_map_arg; + } + } + + void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick) + { + /* TODO: this the right part restriction: */ + if (quick_uses_applicable_index && idx == join->const_tables && + quick->read_time < best_loose_scan_cost) + { + best_loose_scan_key= quick->index; + best_loose_scan_cost= quick->read_time; + /* this is ok because idx == join->const_tables */ + best_loose_scan_records= rows2double(quick->records); + best_max_loose_keypart= quick_max_loose_keypart; + best_loose_scan_start_key= NULL; + best_ref_depend_map= 0; + } + } + + void save_to_position(JOIN_TAB *tab, POSITION *pos) + { + pos->read_time= best_loose_scan_cost; + if (best_loose_scan_cost != DBL_MAX) + { + pos->records_read= best_loose_scan_records; + pos->key= best_loose_scan_start_key; + pos->cond_selectivity= 1.0; + pos->loosescan_picker.loosescan_key= best_loose_scan_key; + pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; + pos->use_join_buffer= FALSE; + pos->table= tab; + pos->range_rowid_filter_info= tab->range_rowid_filter_info; + pos->ref_depend_map= best_ref_depend_map; + DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s", + tab->table->key_info[best_loose_scan_key].name.str, + best_loose_scan_start_key? "(ref access)": + "(range/index access)")); + } + } +}; + + +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos); +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables); +void restore_prev_sj_state(const table_map remaining_tables, + const JOIN_TAB *tab, uint idx); + +void fix_semijoin_strategies_for_picked_join_order(JOIN *join); + +bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab); +bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab); +uint get_number_of_tables_at_top_level(JOIN *join); + + +/* + Temporary table used by semi-join DuplicateElimination strategy + + This consists of the temptable itself and data needed to put records + into it. The table's DDL is as follows: + + CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col)); + + where the primary key can be replaced with unique constraint if n exceeds + the limit (as it is always done for query execution-time temptables). + + The record value is a concatenation of rowids of tables from the join we're + executing. If a join table is on the inner side of the outer join, we + assume that its rowid can be NULL and provide means to store this rowid in + the tuple. +*/ + +class SJ_TMP_TABLE : public Sql_alloc +{ +public: + /* + Array of pointers to tables whose rowids compose the temporary table + record. + */ + class TAB + { + public: + JOIN_TAB *join_tab; + uint rowid_offset; + ushort null_byte; + uchar null_bit; + }; + TAB *tabs; + TAB *tabs_end; + + /* + is_degenerate==TRUE means this is a special case where the temptable record + has zero length (and presence of a unique key means that the temptable can + have either 0 or 1 records). + In this case we don't create the physical temptable but instead record + its state in SJ_TMP_TABLE::have_degenerate_row. + */ + bool is_degenerate; + + /* + When is_degenerate==TRUE: the contents of the table (whether it has the + record or not). + */ + bool have_degenerate_row; + + /* table record parameters */ + uint null_bits; + uint null_bytes; + uint rowid_len; + + /* The temporary table itself (NULL means not created yet) */ + TABLE *tmp_table; + + /* + These are the members we got from temptable creation code. We'll need + them if we'll need to convert table from HEAP to MyISAM/Maria. + */ + TMP_ENGINE_COLUMNDEF *start_recinfo; + TMP_ENGINE_COLUMNDEF *recinfo; + + SJ_TMP_TABLE *next_flush_table; + + int sj_weedout_delete_rows(); + int sj_weedout_check_row(THD *thd); + bool create_sj_weedout_tmp_table(THD *thd); +}; + +int setup_semijoin_loosescan(JOIN *join); +int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after); +void destroy_sj_tmp_tables(JOIN *join); +int clear_sj_tmp_tables(JOIN *join); +int rewrite_to_index_subquery_engine(JOIN *join); + + +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost); + +enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab); + diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc new file mode 100644 index 00000000..794ec40f --- /dev/null +++ b/sql/opt_sum.cc @@ -0,0 +1,1096 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2008, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + Optimising of MIN(), MAX() and COUNT(*) queries without 'group by' clause + by replacing the aggregate expression with a constant. + + Given a table with a compound key on columns (a,b,c), the following + types of queries are optimised (assuming the table handler supports + the required methods) + + @verbatim + SELECT COUNT(*) FROM t1[,t2,t3,...] + SELECT MIN(b) FROM t1 WHERE a=const + SELECT MAX(c) FROM t1 WHERE a=const AND b=const + SELECT MAX(b) FROM t1 WHERE a=const AND bconst + SELECT MIN(b) FROM t1 WHERE a=const AND b BETWEEN const AND const + SELECT MAX(b) FROM t1 WHERE a=const AND b BETWEEN const AND const + @endverbatim + + Instead of '<' one can use '<=', '>', '>=' and '=' as well. + Instead of 'a=const' the condition 'a IS NULL' can be used. + + If all selected fields are replaced then we will also remove all + involved tables and return the answer without any join. Thus, the + following query will be replaced with a row of two constants: + @verbatim + SELECT MAX(b), MIN(d) FROM t1,t2 + WHERE a=const AND bconst + @endverbatim + (assuming a index for column d of table t2 is defined) +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "key.h" // key_cmp_if_same +#include "sql_select.h" + +static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, Field* field, + COND *cond, uint *range_fl, + uint *key_prefix_length); +static int reckey_in_range(bool max_fl, TABLE_REF *ref, Field* field, + COND *cond, uint range_fl, uint prefix_len); +static int maxmin_in_range(bool max_fl, Field* field, COND *cond); + + +/* + Get exact count of rows in all tables + + SYNOPSIS + get_exact_records() + tables List of tables + + NOTES + When this is called, we know all table handlers supports HA_HAS_RECORDS + or HA_STATS_RECORDS_IS_EXACT + + RETURN + ULONGLONG_MAX Error: Could not calculate number of rows + # Multiplication of number of rows in all tables +*/ + +static ulonglong get_exact_record_count(List &tables) +{ + ulonglong count= 1; + TABLE_LIST *tl; + List_iterator ti(tables); + while ((tl= ti++)) + { + ha_rows tmp= tl->table->file->records(); + if (tmp == HA_POS_ERROR) + return ULONGLONG_MAX; + count*= tmp; + } + return count; +} + + +/** + Use index to read MIN(field) value. + + @param table Table object + @param ref Reference to the structure where we store the key value + @item_field Field used in MIN() + @range_fl Whether range endpoint is strict less than + @prefix_len Length of common key part for the range + + @retval + 0 No errors + HA_ERR_... Otherwise +*/ + +static int get_index_min_value(TABLE *table, TABLE_REF *ref, + Item_field *item_field, uint range_fl, + uint prefix_len) +{ + int error; + + if (!ref->key_length) + error= table->file->ha_index_first(table->record[0]); + else + { + /* + Use index to replace MIN/MAX functions with their values + according to the following rules: + + 1) Insert the minimum non-null values where the WHERE clause still + matches, or + 2) a NULL value if there are only NULL values for key_part_k. + 3) Fail, producing a row of nulls + + Implementation: Read the smallest value using the search key. If + the interval is open, read the next value after the search + key. If read fails, and we're looking for a MIN() value for a + nullable column, test if there is an exact match for the key. + */ + if (!(range_fl & NEAR_MIN)) + /* + Closed interval: Either The MIN argument is non-nullable, or + we have a >= predicate for the MIN argument. + */ + error= table->file->ha_index_read_map(table->record[0], + ref->key_buff, + make_prev_keypart_map(ref->key_parts), + HA_READ_KEY_OR_NEXT); + else + { + /* + Open interval: There are two cases: + 1) We have only MIN() and the argument column is nullable, or + 2) there is a > predicate on it, nullability is irrelevant. + We need to scan the next bigger record first. + Open interval is not used if the search key involves the last keypart, + and it would not work. + */ + DBUG_ASSERT(prefix_len < ref->key_length); + error= table->file->ha_index_read_map(table->record[0], + ref->key_buff, + make_prev_keypart_map(ref->key_parts), + HA_READ_AFTER_KEY); + /* + If the found record is outside the group formed by the search + prefix, or there is no such record at all, check if all + records in that group have NULL in the MIN argument + column. If that is the case return that NULL. + + Check if case 1 from above holds. If it does, we should read + the skipped tuple. + */ + if (item_field->field->real_maybe_null() && + ref->key_buff[prefix_len] == 1 && + /* + Last keypart (i.e. the argument to MIN) is set to NULL by + find_key_for_maxmin only if all other keyparts are bound + to constants in a conjunction of equalities. Hence, we + can detect this by checking only if the last keypart is + NULL. + */ + (error == HA_ERR_KEY_NOT_FOUND || + key_cmp_if_same(table, ref->key_buff, ref->key, prefix_len))) + { + DBUG_ASSERT(item_field->field->real_maybe_null()); + error= table->file->ha_index_read_map(table->record[0], + ref->key_buff, + make_prev_keypart_map(ref->key_parts), + HA_READ_KEY_EXACT); + } + } + } + return error; +} + + +/** + Use index to read MAX(field) value. + + @param table Table object + @param ref Reference to the structure where we store the key value + @range_fl Whether range endpoint is strict greater than + + @retval + 0 No errors + HA_ERR_... Otherwise +*/ + +static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl) +{ + return (ref->key_length ? + table->file->ha_index_read_map(table->record[0], ref->key_buff, + make_prev_keypart_map(ref->key_parts), + range_fl & NEAR_MAX ? + HA_READ_BEFORE_KEY : + HA_READ_PREFIX_LAST_OR_PREV) : + table->file->ha_index_last(table->record[0])); +} + + + +/** + Substitutes constants for some COUNT(), MIN() and MAX() functions. + + @param thd thread handler + @param tables list of leaves of join table tree + @param all_fields All fields to be returned + @param conds WHERE clause + + @note + This function is only called for queries with aggregate functions and no + GROUP BY part. This means that the result set shall contain a single + row only + + @retval + 0 no errors + @retval + 1 if all items were resolved + @retval + HA_ERR_KEY_NOT_FOUND on impossible conditions + @retval + HA_ERR_... if a deadlock or a lock wait timeout happens, for example + @retval + ER_... e.g. ER_SUBQUERY_NO_1_ROW +*/ + +int opt_sum_query(THD *thd, + List &tables, List &all_fields, COND *conds) +{ + List_iterator_fast it(all_fields); + List_iterator ti(tables); + TABLE_LIST *tl; + int const_result= 1; + bool recalc_const_item= 0; + ulonglong count= 1; + bool is_exact_count= TRUE, maybe_exact_count= TRUE; + table_map removed_tables= 0, outer_tables= 0, used_tables= 0; + table_map where_tables= 0; + Item *item; + int error= 0; + DBUG_ENTER("opt_sum_query"); + + thd->lex->current_select->min_max_opt_list.empty(); + + if (conds) + where_tables= conds->used_tables(); + + /* + Analyze outer join dependencies, and, if possible, compute the number + of returned rows. + */ + while ((tl= ti++)) + { + TABLE_LIST *embedded; + for (embedded= tl ; embedded; embedded= embedded->embedding) + { + if (embedded->on_expr) + break; + } + if (embedded) + /* Don't replace expression on a table that is part of an outer join */ + { + outer_tables|= tl->table->map; + + /* + We can't optimise LEFT JOIN in cases where the WHERE condition + restricts the table that is used, like in: + SELECT MAX(t1.a) FROM t1 LEFT JOIN t2 join-condition + WHERE t2.field IS NULL; + */ + if (tl->table->map & where_tables) + DBUG_RETURN(0); + } + else + used_tables|= tl->table->map; + + /* + If the storage manager of 'tl' gives exact row count as part of + statistics (cheap), compute the total number of rows. If there are + no outer table dependencies, this count may be used as the real count. + Schema tables are filled after this function is invoked, so we can't + get row count + */ + if (!(tl->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) || + tl->schema_table) + { + maybe_exact_count&= MY_TEST(!tl->schema_table && + (tl->table->file->ha_table_flags() & + HA_HAS_RECORDS)); + is_exact_count= FALSE; + count= 1; // ensure count != 0 + } + else if (tl->is_materialized_derived() || + tl->jtbm_subselect) + { + /* + Can't remove a derived table as it's number of rows is just an + estimate. + */ + DBUG_RETURN(0); + } + else + { + error= tl->table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + if (unlikely(error)) + { + tl->table->file->print_error(error, MYF(ME_FATAL)); + DBUG_RETURN(error); + } + count*= tl->table->file->stats.records; + } + } + + /* + Iterate through all items in the SELECT clause and replace + COUNT(), MIN() and MAX() with constants (if possible). + */ + + while ((item= it++)) + { + if (item->type() == Item::SUM_FUNC_ITEM) + { + Item_sum *item_sum= (((Item_sum*) item)); + switch (item_sum->sum_func()) { + case Item_sum::COUNT_FUNC: + /* + If the expr in COUNT(expr) can never be null we can change this + to the number of rows in the tables if this number is exact and + there are no outer joins. + */ + if (!conds && !((Item_sum_count*) item)->get_arg(0)->maybe_null() && + !outer_tables && maybe_exact_count && + ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0)) + { + if (!is_exact_count) + { + if ((count= get_exact_record_count(tables)) == ULONGLONG_MAX) + { + /* Error from handler in counting rows. Don't optimize count() */ + const_result= 0; + continue; + } + is_exact_count= 1; // count is now exact + } + ((Item_sum_count*) item)->make_const((longlong) count); + recalc_const_item= 1; + } + else + const_result= 0; + break; + case Item_sum::MIN_FUNC: + case Item_sum::MAX_FUNC: + { + int is_max= MY_TEST(item_sum->sum_func() == Item_sum::MAX_FUNC); + /* + If MIN/MAX(expr) is the first part of a key or if all previous + parts of the key is found in the COND, then we can use + indexes to find the key. + */ + Item *expr=item_sum->get_arg(0); + if (((expr->used_tables() & OUTER_REF_TABLE_BIT) == 0) && + expr->real_item()->type() == Item::FIELD_ITEM) + { + uchar key_buff[MAX_KEY_LENGTH]; + TABLE_REF ref; + uint range_fl, prefix_len; + + ref.key_buff= key_buff; + Item_field *item_field= (Item_field*) (expr->real_item()); + TABLE *table= item_field->field->table; + + /* + Look for a partial key that can be used for optimization. + If we succeed, ref.key_length will contain the length of + this key, while prefix_len will contain the length of + the beginning of this key without field used in MIN/MAX(). + Type of range for the key part for this field will be + returned in range_fl. + */ + if (table->file->inited || (outer_tables & table->map) || + !find_key_for_maxmin(is_max, &ref, item_field->field, conds, + &range_fl, &prefix_len)) + { + const_result= 0; + break; + } + longlong info_limit= 1; + error= 0; + + table->file->info_push(INFO_KIND_FORCE_LIMIT_BEGIN, &info_limit); + if (!table->const_table) + { + if (likely(!(error= table->file->ha_index_init((uint) ref.key, + 1)))) + error= (is_max ? + get_index_max_value(table, &ref, range_fl) : + get_index_min_value(table, &ref, item_field, range_fl, + prefix_len)); + } + /* Verify that the read tuple indeed matches the search key */ + if (!error && + reckey_in_range(is_max, &ref, item_field->field, + conds, range_fl, prefix_len)) + error= HA_ERR_KEY_NOT_FOUND; + if (!table->const_table) + { + table->file->ha_end_keyread(); + table->file->ha_index_end(); + } + table->file->info_push(INFO_KIND_FORCE_LIMIT_END, NULL); + if (error) + { + if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE) + DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); // No rows matching WHERE + /* HA_ERR_LOCK_DEADLOCK or some other error */ + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + removed_tables|= table->map; + } + else if (!expr->const_item() || !is_exact_count || conds) + { + /* + The optimization is not applicable in both cases: + (a) 'expr' is a non-constant expression. Then we can't + replace 'expr' by a constant. + (b) 'expr' is a constant. According to ANSI, MIN/MAX must return + NULL if the query does not return any rows. Thus, if we are not + able to determine if the query returns any rows, we can't apply + the optimization and replace MIN/MAX with a constant. + (c) there is a WHERE clause. The WHERE conditions may result in + an empty result, but the clause cannot be taken into account here. + */ + const_result= 0; + break; + } + item_sum->set_aggregator(thd, + item_sum->has_with_distinct() ? + Aggregator::DISTINCT_AGGREGATOR : + Aggregator::SIMPLE_AGGREGATOR); + /* + If count == 0 (so is_exact_count == TRUE) and + there're no outer joins, set to NULL, + otherwise set to the constant value. + */ + if (!count && !outer_tables) + { + item_sum->aggregator_clear(); + } + else + { + item_sum->reset_and_add(); + /* + Save a reference to the item for possible rollback + of the min/max optimizations for this select + */ + thd->lex->current_select->min_max_opt_list.push_back(item_sum); + } + item_sum->make_const(); + recalc_const_item= 1; + break; + } + default: + const_result= 0; + break; + } + } + else if (const_result) + { + if (recalc_const_item) + item->update_used_tables(); + if (!item->const_item() && item->type() != Item::WINDOW_FUNC_ITEM) + const_result= 0; + } + } + + if (unlikely(thd->is_error())) + DBUG_RETURN(thd->get_stmt_da()->sql_errno()); + + /* + If we have a where clause, we can only ignore searching in the + tables if MIN/MAX optimisation replaced all used tables + We do not use replaced values in case of: + SELECT MIN(key) FROM table_1, empty_table + removed_tables is != 0 if we have used MIN() or MAX(). + */ + if (removed_tables && used_tables != removed_tables) + const_result= 0; // We didn't remove all tables + DBUG_RETURN(const_result); +} + + +/* + Check if both item1 and item2 are strings, and item1 has fewer characters + than item2. +*/ + +static bool check_item1_shorter_item2(Item *item1, Item *item2) +{ + if (item1->cmp_type() == STRING_RESULT && + item2->cmp_type() == STRING_RESULT) + { + int len1= item1->max_length / item1->collation.collation->mbmaxlen; + int len2= item2->max_length / item2->collation.collation->mbmaxlen; + return len1 < len2; + } + return false; /* When the check is not applicable, it means "not bigger" */ +} + + +/** + Test if the predicate compares a field with constants. + + @param func_item Predicate item + @param[out] args Here we store the field followed by constants + @param[out] inv_order Is set to 1 if the predicate is of the form + 'const op field' + + @retval + 0 func_item is a simple predicate: a field is compared with a constant + whose length does not exceed the max length of the field values + @retval + 1 Otherwise +*/ + +bool simple_pred(Item_func *func_item, Item **args, bool *inv_order) +{ + Item *item; + *inv_order= 0; + switch (func_item->argument_count()) { + case 0: + /* MULT_EQUAL_FUNC */ + { + Item_equal *item_equal= (Item_equal *) func_item; + if (!(args[1]= item_equal->get_const())) + return 0; + Item_equal_fields_iterator it(*item_equal); + if (!(item= it++)) + return 0; + args[0]= item->real_item(); + if (check_item1_shorter_item2(args[0], args[1])) + return 0; + if (it++) + return 0; + } + break; + case 1: + /* field IS NULL */ + item= func_item->arguments()[0]->real_item(); + if (item->type() != Item::FIELD_ITEM) + return 0; + args[0]= item; + break; + case 2: + /* 'field op const' or 'const op field' */ + item= func_item->arguments()[0]->real_item(); + if (item->type() == Item::FIELD_ITEM) + { + args[0]= item; + item= func_item->arguments()[1]->real_item(); + if (!item->const_item()) + return 0; + args[1]= item; + } + else if (item->const_item()) + { + args[1]= item; + item= func_item->arguments()[1]->real_item(); + if (item->type() != Item::FIELD_ITEM) + return 0; + args[0]= item; + *inv_order= 1; + } + else + return 0; + if (check_item1_shorter_item2(args[0], args[1])) + return 0; + break; + case 3: + /* field BETWEEN const AND const */ + item= func_item->arguments()[0]->real_item(); + if (item->type() == Item::FIELD_ITEM) + { + args[0]= item; + for (int i= 1 ; i <= 2; i++) + { + item= func_item->arguments()[i]->real_item(); + if (!item->const_item()) + return 0; + args[i]= item; + if (check_item1_shorter_item2(args[0], args[i])) + return 0; + } + } + else + return 0; + } + return 1; +} + + +/** + Check whether a condition matches a key to get {MAX|MIN}(field):. + + For the index specified by the keyinfo parameter and an index that + contains the field as its component (field_part), the function + checks whether + + - the condition cond is a conjunction, + - all of its conjuncts refer to columns of the same table, and + - each conjunct is on one of the following forms: + - f_i = const_i or const_i = f_i or f_i IS NULL, + where f_i is part of the index + - field {<|<=|>=|>|=} const + - const {<|<=|>=|>|=} field + - field BETWEEN const_1 AND const_2 + + As a side-effect, the key value to be used for looking up the MIN/MAX value + is actually stored inside the Field object. An interesting feature is that + the function will find the most restrictive endpoint by over-eager + evaluation of the @c WHERE condition. It continually stores the current + endpoint inside the Field object. For a query such as + + @code + SELECT MIN(a) FROM t1 WHERE a > 3 AND a > 5; + @endcode + + the algorithm will recurse over the conjuction, storing first a 3 in the + field. In the next recursive invocation the expression a > 5 is evaluated + as 3 > 5 (Due to the dual nature of Field objects as value carriers and + field identifiers), which will obviously fail, leading to 5 being stored in + the Field object. + + @param[in] max_fl Set to true if we are optimizing MAX(), + false means we are optimizing %MIN() + @param[in, out] ref Reference to the structure where the function + stores the key value + @param[in] keyinfo Reference to the key info + @param[in] field_part Pointer to the key part for the field + @param[in] cond WHERE condition + @param[in,out] key_part_used Map of matchings parts. The function will output + the set of key parts actually being matched in + this set, yet it relies on the caller to + initialize the value to zero. This is due + to the fact that this value is passed + recursively. + @param[in,out] range_fl Says whether endpoints use strict greater/less + than. + @param[out] prefix_len Length of common key part for the range + where MAX/MIN is searched for + + @retval + false Index can't be used. + @retval + true We can use the index to get MIN/MAX value +*/ + +static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo, + KEY_PART_INFO *field_part, COND *cond, + key_part_map *key_part_used, uint *range_fl, + uint *prefix_len) +{ + DBUG_ENTER("matching_cond"); + if (!cond) + DBUG_RETURN(TRUE); + Field *field= field_part->field; + table_map cond_used_tables= cond->used_tables(); + if (cond_used_tables & OUTER_REF_TABLE_BIT) + { + DBUG_RETURN(FALSE); + } + if (!(cond_used_tables & field->table->map) && + MY_TEST(cond_used_tables & ~PSEUDO_TABLE_BITS)) + { + /* Condition doesn't restrict the used table */ + DBUG_RETURN(!cond->const_item()); + } + else if (cond->is_expensive()) + DBUG_RETURN(FALSE); + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_OR_FUNC) + DBUG_RETURN(FALSE); + + /* AND */ + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (!matching_cond(max_fl, ref, keyinfo, field_part, item, + key_part_used, range_fl, prefix_len)) + DBUG_RETURN(FALSE); + } + DBUG_RETURN(TRUE); + } + + if (cond->type() != Item::FUNC_ITEM) + DBUG_RETURN(FALSE); // Not operator, can't optimize + + bool eq_type= 0; // =, <=> or IS NULL + bool is_null_safe_eq= FALSE; // The operator is NULL safe, e.g. <=> + bool noeq_type= 0; // < or > + bool less_fl= 0; // < or <= + bool is_null= 0; // IS NULL + bool between= 0; // BETWEEN ... AND ... + + switch (((Item_func*) cond)->functype()) { + case Item_func::ISNULL_FUNC: + is_null= 1; /* fall through */ + case Item_func::EQ_FUNC: + eq_type= TRUE; + break; + case Item_func::EQUAL_FUNC: + eq_type= is_null_safe_eq= TRUE; + break; + case Item_func::LT_FUNC: + noeq_type= 1; /* fall through */ + case Item_func::LE_FUNC: + less_fl= 1; + break; + case Item_func::GT_FUNC: + noeq_type= 1; /* fall through */ + case Item_func::GE_FUNC: + break; + case Item_func::BETWEEN: + if (((Item_func_between*) cond)->negated) + DBUG_RETURN(FALSE); + between= 1; + break; + case Item_func::MULT_EQUAL_FUNC: + eq_type= 1; + break; + default: + DBUG_RETURN(FALSE); // Can't optimize function + } + + Item *args[3]; + bool inv; + + /* Test if this is a comparison of a field and constant */ + if (!simple_pred((Item_func*) cond, args, &inv)) + DBUG_RETURN(FALSE); + + if (!is_null_safe_eq && !is_null && + (args[1]->is_null() || (between && args[2]->is_null()))) + DBUG_RETURN(FALSE); + + if (inv && !eq_type) + less_fl= 1-less_fl; // Convert '<' -> '>' (etc) + + /* Check if field is part of the tested partial key */ + uchar *key_ptr= ref->key_buff; + KEY_PART_INFO *part; + for (part= keyinfo->key_part; ; key_ptr+= part++->store_length) + + { + if (part > field_part) + DBUG_RETURN(FALSE); // Field is beyond the tested parts + if (part->field->eq(((Item_field*) args[0])->field)) + break; // Found a part of the key for the field + } + + bool is_field_part= part == field_part; + if (!(is_field_part || eq_type)) + DBUG_RETURN(FALSE); + + key_part_map org_key_part_used= *key_part_used; + if (eq_type || between || max_fl == less_fl) + { + uint length= (uint)(key_ptr-ref->key_buff)+part->store_length; + if (ref->key_length < length) + { + /* Ultimately ref->key_length will contain the length of the search key */ + ref->key_length= length; + ref->key_parts= (uint)(part - keyinfo->key_part) + 1; + } + if (!*prefix_len && part+1 == field_part) + *prefix_len= length; + if (is_field_part && eq_type) + *prefix_len= ref->key_length; + + *key_part_used|= (key_part_map) 1 << (part - keyinfo->key_part); + } + + if (org_key_part_used == *key_part_used && + /* + The current search key is not being extended with a new key part. This + means that the a condition is added a key part for which there was a + previous condition. We can only overwrite such key parts in some special + cases, e.g. a > 2 AND a > 1 (here range_fl must be set to something). In + all other cases the WHERE condition is always false anyway. + */ + (eq_type || *range_fl == 0)) + DBUG_RETURN(FALSE); + + if (org_key_part_used != *key_part_used || + (is_field_part && + (between || eq_type || max_fl == less_fl) && !cond->val_int())) + { + /* + It's the first predicate for this part or a predicate of the + following form that moves upper/lower bounds for max/min values: + - field BETWEEN const AND const + - field = const + - field {<|<=} const, when searching for MAX + - field {>|>=} const, when searching for MIN + */ + + if (is_null || (is_null_safe_eq && args[1]->is_null())) + { + /* + If we have a non-nullable index, we cannot use it, + since set_null will be ignored, and we will compare uninitialized data. + */ + if (!part->field->real_maybe_null()) + DBUG_RETURN(FALSE); + part->field->set_null(); + *key_ptr= (uchar) 1; + } + else + { + /* Update endpoints for MAX/MIN, see function comment. */ + Item *value= args[between && max_fl ? 2 : 1]; + value->save_in_field_no_warnings(part->field, 1); + if (part->null_bit) + *key_ptr++= (uchar) MY_TEST(part->field->is_null()); + part->field->get_key_image(key_ptr, part->length, Field::itRAW); + } + if (is_field_part) + { + if (between || eq_type) + { + *range_fl&= ~(NO_MAX_RANGE | NO_MIN_RANGE); + *range_fl&= ~(max_fl ? NEAR_MAX : NEAR_MIN); + } + else + { + *range_fl&= ~(max_fl ? NO_MAX_RANGE : NO_MIN_RANGE); + if (noeq_type) + *range_fl|= (max_fl ? NEAR_MAX : NEAR_MIN); + else + *range_fl&= ~(max_fl ? NEAR_MAX : NEAR_MIN); + } + } + } + else if (eq_type) + { + if ((!is_null && !cond->val_int()) || + (is_null && !MY_TEST(part->field->is_null()))) + DBUG_RETURN(FALSE); // Impossible test + } + else if (is_field_part) + *range_fl&= ~(max_fl ? NO_MIN_RANGE : NO_MAX_RANGE); + DBUG_RETURN(TRUE); +} + + +/** + Check whether we can get value for {max|min}(field) by using a key. + + If where-condition is not a conjunction of 0 or more conjuct the + function returns false, otherwise it checks whether there is an + index including field as its k-th component/part such that: + + -# for each previous component f_i there is one and only one conjunct + of the form: f_i= const_i or const_i= f_i or f_i is null + -# references to field occur only in conjucts of the form: + field {<|<=|>=|>|=} const or const {<|<=|>=|>|=} field or + field BETWEEN const1 AND const2 + -# all references to the columns from the same table as column field + occur only in conjucts mentioned above. + -# each of k first components the index is not partial, i.e. is not + defined on a fixed length proper prefix of the field. + + If such an index exists the function through the ref parameter + returns the key value to find max/min for the field using the index, + the length of first (k-1) components of the key and flags saying + how to apply the key for the search max/min value. + (if we have a condition field = const, prefix_len contains the length + of the whole search key) + + @param[in] max_fl 0 for MIN(field) / 1 for MAX(field) + @param[in,out] ref Reference to the structure we store the key value + @param[in] field Field used inside MIN() / MAX() + @param[in] cond WHERE condition + @param[out] range_fl Bit flags for how to search if key is ok + @param[out] prefix_len Length of prefix for the search range + + @note + This function may set field->table->key_read to true, + which must be reset after index is used! + (This can only happen when function returns 1) + + @retval + 0 Index can not be used to optimize MIN(field)/MAX(field) + @retval + 1 Can use key to optimize MIN()/MAX(). + In this case ref, range_fl and prefix_len are updated +*/ + +static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, + Field* field, COND *cond, + uint *range_fl, uint *prefix_len) +{ + if (!(field->flags & PART_KEY_FLAG)) + return FALSE; // Not key field + + DBUG_ENTER("find_key_for_maxmin"); + + TABLE *table= field->table; + uint idx= 0; + + KEY *keyinfo,*keyinfo_end; + for (keyinfo= table->key_info, keyinfo_end= keyinfo+table->s->keys ; + keyinfo != keyinfo_end; + keyinfo++,idx++) + { + KEY_PART_INFO *part,*part_end; + key_part_map key_part_to_use= 0; + /* + Perform a check if index is not disabled by ALTER TABLE + or IGNORE INDEX. + */ + if (!table->keys_in_use_for_query.is_set(idx)) + continue; + uint jdx= 0; + *prefix_len= 0; + part_end= keyinfo->key_part+table->actual_n_key_parts(keyinfo); + for (part= keyinfo->key_part ; + part != part_end ; + part++, jdx++, key_part_to_use= (key_part_to_use << 1) | 1) + { + if (!(table->file->index_flags(idx, jdx, 0) & HA_READ_ORDER)) + DBUG_RETURN(FALSE); + + /* Check whether the index component is partial */ + Field *part_field= table->field[part->fieldnr-1]; + if ((part_field->flags & BLOB_FLAG) || + part->length < part_field->key_length()) + break; + + if (part->key_part_flag & HA_REVERSE_SORT) + break; // TODO MDEV-27576 + + if (field->eq(part->field)) + { + ref->key= idx; + ref->key_length= 0; + ref->key_parts= 0; + key_part_map key_part_used= 0; + *range_fl= NO_MIN_RANGE | NO_MAX_RANGE; + if (matching_cond(max_fl, ref, keyinfo, part, cond, + &key_part_used, range_fl, prefix_len) && + !(key_part_to_use & ~key_part_used)) + { + if (!max_fl && key_part_used == key_part_to_use && part->null_bit) + { + /* + The query is on this form: + + SELECT MIN(key_part_k) + FROM t1 + WHERE key_part_1 = const and ... and key_part_k-1 = const + + If key_part_k is nullable, we want to find the first matching row + where key_part_k is not null. The key buffer is now {const, ..., + NULL}. This will be passed to the handler along with a flag + indicating open interval. If a tuple is read that does not match + these search criteria, an attempt will be made to read an exact + match for the key buffer. + */ + /* Set the first byte of key_part_k to 1, that means NULL */ + ref->key_buff[ref->key_length]= 1; + ref->key_length+= part->store_length; + ref->key_parts++; + DBUG_ASSERT(ref->key_parts == jdx+1); + *range_fl&= ~NO_MIN_RANGE; + *range_fl|= NEAR_MIN; // Open interval + } + /* + The following test is false when the key in the key tree is + converted (for example to upper case) + */ + if (field->part_of_key.is_set(idx)) + table->file->ha_start_keyread(idx); + DBUG_RETURN(TRUE); + } + } + } + } + DBUG_RETURN(FALSE); +} + + +/** + Check whether found key is in range specified by conditions. + + @param[in] max_fl 0 for MIN(field) / 1 for MAX(field) + @param[in] ref Reference to the key value and info + @param[in] field Field used the MIN/MAX expression + @param[in] cond WHERE condition + @param[in] range_fl Says whether there is a condition to to be checked + @param[in] prefix_len Length of the constant part of the key + + @retval + 0 ok + @retval + 1 WHERE was not true for the found row +*/ + +static int reckey_in_range(bool max_fl, TABLE_REF *ref, Field* field, + COND *cond, uint range_fl, uint prefix_len) +{ + if (key_cmp_if_same(field->table, ref->key_buff, ref->key, prefix_len)) + return 1; + if (!cond || (range_fl & (max_fl ? NO_MIN_RANGE : NO_MAX_RANGE))) + return 0; + return maxmin_in_range(max_fl, field, cond); +} + + +/** + Check whether {MAX|MIN}(field) is in range specified by conditions. + + @param[in] max_fl 0 for MIN(field) / 1 for MAX(field) + @param[in] field Field used the MIN/MAX expression + @param[in] cond WHERE condition + + @retval + 0 ok + @retval + 1 WHERE was not true for the found row +*/ + +static int maxmin_in_range(bool max_fl, Field* field, COND *cond) +{ + /* If AND/OR condition */ + if (cond->type() == Item::COND_ITEM) + { + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (maxmin_in_range(max_fl, field, item)) + return 1; + } + return 0; + } + + if (cond->used_tables() != field->table->map) + return 0; + bool less_fl= 0; + switch (((Item_func*) cond)->functype()) { + case Item_func::BETWEEN: + return cond->val_int() == 0; // Return 1 if WHERE is false + case Item_func::LT_FUNC: + case Item_func::LE_FUNC: + less_fl= 1; + /* fall through */ + case Item_func::GT_FUNC: + case Item_func::GE_FUNC: + { + Item *item= ((Item_func*) cond)->arguments()[1]; + /* In case of 'const op item' we have to swap the operator */ + if (!item->const_item()) + less_fl= 1-less_fl; + /* + We only have to check the expression if we are using an expression like + SELECT MAX(b) FROM t1 WHERE a=const AND b>const + not for + SELECT MAX(b) FROM t1 WHERE a=const AND bval_int() == 0; // Return 1 if WHERE is false + return 0; + } + default: + break; // Ignore + } + return 0; +} + diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc new file mode 100644 index 00000000..ae73eb75 --- /dev/null +++ b/sql/opt_table_elimination.cc @@ -0,0 +1,2157 @@ +/* + Copyright (c) 2009, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + Table Elimination Module + + @defgroup Table_Elimination Table Elimination Module + @{ +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "my_bit.h" +#include "sql_select.h" +#include "opt_trace.h" +#include "my_json_writer.h" + +/* + OVERVIEW + ======== + + This file contains table elimination module. The idea behind table + elimination is as follows: suppose we have a left join + + SELECT * FROM t1 LEFT JOIN + (t2 JOIN t3) ON t2.primary_key=t1.col AND + t2.primary_key=t2.col + WHERE ... + + such that + * columns of the inner tables are not used anywhere ouside the outer join + (not in WHERE, not in GROUP/ORDER BY clause, not in select list etc etc), + * inner side of the outer join is guaranteed to produce at most one matching + record combination for each record combination of outer tables. + + then the inner side of the outer join can be removed from the query, as it + will always produce only one record combination (either real or + null-complemented one) and we don't care about what that record combination + is. + + + MODULE INTERFACE + ================ + + The module has one entry point - the eliminate_tables() function, which one + needs to call (once) at some point before join optimization. + eliminate_tables() operates over the JOIN structures. Logically, it + removes the inner tables of an outer join operation together with the + operation itself. Physically, it changes the following members: + + * Eliminated tables are marked as constant and moved to the front of the + join order. + + * In addition to this, they are recorded in JOIN::eliminated_tables bitmap. + + * Items that became disused because they were in the ON expression of an + eliminated outer join are notified by means of the Item tree walk which + calls Item::mark_as_eliminated_processor for every item + - At the moment the only Item that cares whether it was eliminated is + Item_subselect with its Item_subselect::eliminated flag which is used + by EXPLAIN code to check if the subquery should be shown in EXPLAIN. + + Table elimination is redone on every PS re-execution. + + + TABLE ELIMINATION ALGORITHM FOR ONE OUTER JOIN + ============================================== + + As described above, we can remove inner side of an outer join if it is + + 1. not referred to from any other parts of the query + 2. always produces one matching record combination. + + We check #1 by doing a recursive descent down the join->join_list while + maintaining a union of used_tables() attribute of all Item expressions in + other parts of the query. When we encounter an outer join, we check if the + bitmap of tables on its inner side has intersection with tables that are used + elsewhere. No intersection means that inner side of the outer join could + potentially be eliminated. + + In order to check #2, one needs to prove that inner side of an outer join + is functionally dependent on the outside. The proof is constructed from + functional dependencies of intermediate objects: + + - Inner side of outer join is functionally dependent when each of its tables + are functionally dependent. (We assume a table is functionally dependent + when its dependencies allow to uniquely identify one table record, or no + records). + + - Table is functionally dependent when it has got a unique key whose columns + are functionally dependent. + + - A column is functionally dependent when we could locate an AND-part of a + certain ON clause in form + + tblX.columnY= expr + + where expr is functionally depdendent. expr is functionally dependent when + all columns that it refers to are functionally dependent. + + These relationships are modeled as a bipartite directed graph that has + dependencies as edges and two kinds of nodes: + + Value nodes: + - Table column values (each is a value of tblX.columnY) + - Table values (each node represents a table inside the join nest we're + trying to eliminate). + A value has one attribute, it is either bound (i.e. functionally dependent) + or not. + + Module nodes: + - Modules representing tblX.colY=expr equalities. Equality module has + = incoming edges from columns used in expr + = outgoing edge to tblX.colY column. + - Nodes representing unique keys. Unique key has + = incoming edges from key component value modules + = outgoing edge to key's table module + - Nodes representing unique pseudo-keys for derived tables. + Unique pseudo-keys are composed as a result of GROUP BY expressions. + Like normal unique keys, they have: + = incoming edges from key component value modules + = outgoing edge to key's table module + - Inner side of outer join module. Outer join module has + = incoming edges from table value modules + = No outgoing edges. Once we reach it, we know we can eliminate the + outer join. + A module may depend on multiple values, and hence its primary attribute is + the number of its arguments that are not bound. + + The algorithm starts with equality nodes that don't have any incoming edges + (their expressions are either constant or depend only on tables that are + outside of the outer join in question) and performns a breadth-first + traversal. If we reach the outer join nest node, it means outer join is + functionally dependent and can be eliminated. Otherwise it cannot be + eliminated. + + HANDLING MULTIPLE NESTED OUTER JOINS + ==================================== + + Outer joins that are not nested one within another are eliminated + independently. For nested outer joins we have the following considerations: + + 1. ON expressions from children outer joins must be taken into account + + Consider this example: + + SELECT t0.* + FROM + t0 + LEFT JOIN + (t1 LEFT JOIN t2 ON t2.primary_key=t1.col1) + ON + t1.primary_key=t0.col AND t2.col1=t1.col2 + + Here we cannot eliminate the "... LEFT JOIN t2 ON ..." part alone because the + ON clause of top level outer join has references to table t2. + We can eliminate the entire "... LEFT JOIN (t1 LEFT JOIN t2) ON .." part, + but in order to do that, we must look at both ON expressions. + + 2. ON expressions of parent outer joins are useless. + Consider an example: + + SELECT t0.* + FROM + t0 + LEFT JOIN + (t1 LEFT JOIN t2 ON some_expr) + ON + t2.primary_key=t1.col -- (*) + + Here the uppermost ON expression has a clause that gives us functional + dependency of table t2 on t1 and hence could be used to eliminate the + "... LEFT JOIN t2 ON..." part. + However, we would not actually encounter this situation, because before the + table elimination we run simplify_joins(), which, among other things, upon + seeing a functional dependency condition like (*) will convert the outer join + of + + "... LEFT JOIN t2 ON ..." + + into inner join and thus make table elimination not to consider eliminating + table t2. +*/ + +class Dep_value; + class Dep_value_field; + class Dep_value_table; + + +class Dep_module; + class Dep_module_expr; + class Dep_module_goal; + class Dep_module_key; + class Dep_module_pseudo_key; + +class Dep_analysis_context; + + +/* + A value, something that can be bound or not bound. One can also iterate over + unbound modules that depend on this value +*/ + +class Dep_value : public Sql_alloc +{ +public: + Dep_value(): bound(FALSE) {} + virtual ~Dep_value() = default; /* purecov: inspected */ + + bool is_bound() { return bound; } + void make_bound() { bound= TRUE; } + + /* Iteration over unbound modules that depend on this value */ + typedef char *Iterator; + virtual Iterator init_unbound_modules_iter(char *buf)=0; + virtual Dep_module* get_next_unbound_module(Dep_analysis_context *dac, + Iterator iter) = 0; + static const size_t iterator_size; +protected: + bool bound; +}; + + +/* + A table field value. There is exactly only one such object for any tblX.fieldY + - the field depends on its table and equalities + - expressions that use the field are its dependencies +*/ + +class Dep_value_field : public Dep_value +{ +public: + Dep_value_field(Dep_value_table *table_arg, Field *field_arg) : + table(table_arg), field(field_arg) + {} + + Dep_value_table *table; /* Table this field is from */ + Field *field; /* Field this object is representing */ + + /* Iteration over unbound modules that are our dependencies */ + Iterator init_unbound_modules_iter(char *buf); + Dep_module* get_next_unbound_module(Dep_analysis_context *dac, + Iterator iter); + + void make_unbound_modules_iter_skip_keys(Iterator iter); + + static const size_t iterator_size; +private: + /* + Field_deps that belong to one table form a linked list, ordered by + field_index + */ + Dep_value_field *next_table_field; + + /* + Offset to bits in Dep_analysis_context::expr_deps (see comment to that + member for semantics of the bits). + */ + uint bitmap_offset; + + class Module_iter + { + public: + /* if not null, return this and advance */ + Dep_module_key *key_dep; + /* Otherwise, this and advance */ + uint equality_no; + /* Or this one and advance */ + Dep_module_pseudo_key *pseudo_key_dep; + }; + friend class Dep_analysis_context; + friend class Field_dependency_recorder; + friend class Dep_value_table; +}; + +const size_t Dep_value_field::iterator_size= + ALIGN_SIZE(sizeof(Dep_value_field::Module_iter)); + + +/* + A table value. There is one Dep_value_table object for every table that can + potentially be eliminated. + + Table becomes bound as soon as some of its unique keys becomes bound + Once the table is bound: + - all of its fields are bound + - its embedding outer join has one less unknown argument +*/ + +class Dep_value_table : public Dep_value +{ +public: + Dep_value_table(TABLE *table_arg) : + table(table_arg), fields(NULL), keys(NULL), pseudo_key(NULL) + {} + TABLE *table; /* Table this object is representing */ + /* Ordered list of fields that belong to this table */ + Dep_value_field *fields; + + /* Ordered list of Unique keys in this table */ + Dep_module_key *keys; + + /* + Possible unique pseudo-key applicable for this table + (only none or a single one is possible) + */ + Dep_module_pseudo_key *pseudo_key; + + /* Iteration over unbound modules that are our dependencies */ + Iterator init_unbound_modules_iter(char *buf); + Dep_module* get_next_unbound_module(Dep_analysis_context *dac, + Iterator iter); + static const size_t iterator_size; +private: + class Module_iter + { + public: + /* Space for field iterator */ + char buf[Dep_value_field::iterator_size]; + /* !NULL <=> iterating over depdenent modules of this field */ + Dep_value_field *field_dep; + bool returned_goal; + }; +}; + + +const size_t Dep_value_table::iterator_size= + ALIGN_SIZE(sizeof(Dep_value_table::Module_iter)); + +const size_t Dep_value::iterator_size= + MY_MAX(Dep_value_table::iterator_size, Dep_value_field::iterator_size); + + +/* + A 'module'. Module has unsatisfied dependencies, number of whose is stored in + unbound_args. Modules also can be linked together in a list. +*/ + +class Dep_module : public Sql_alloc +{ +public: + virtual ~Dep_module() = default; /* purecov: inspected */ + + /* Mark as bound. Currently is non-virtual and does nothing */ + void make_bound() {}; + + /* + The final module will return TRUE here. When we see that TRUE was returned, + that will mean that functional dependency check succeeded. + */ + virtual bool is_final () { return FALSE; } + + /* + Increment number of bound arguments. this is expected to change + is_applicable() from false to true after sufficient set of arguments is + bound. + */ + void touch() { unbound_args--; } + bool is_applicable() { return !MY_TEST(unbound_args); } + + /* Iteration over values that */ + typedef char *Iterator; + virtual Iterator init_unbound_values_iter(char *buf)=0; + virtual Dep_value* get_next_unbound_value(Dep_analysis_context *dac, + Iterator iter)=0; + static const size_t iterator_size; +protected: + uint unbound_args; + + Dep_module() : unbound_args(0) {} + /* to bump unbound_args when constructing depedendencies */ + friend class Field_dependency_recorder; + friend class Dep_analysis_context; +}; + + +/* + This represents either + - "tbl.column= expr" equality dependency, i.e. tbl.column depends on fields + used in the expression, or + - tbl1.col1=tbl2.col2=... multi-equality. +*/ + +class Dep_module_expr : public Dep_module +{ +public: + Dep_value_field *field; + Item *expr; + + List *mult_equal_fields; + /* Used during condition analysis only, similar to KEYUSE::level */ + uint level; + + Iterator init_unbound_values_iter(char *buf); + Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter); + static const size_t iterator_size; +private: + class Value_iter + { + public: + Dep_value_field *field; + List_iterator it; + }; +}; + +const size_t Dep_module_expr::iterator_size= + ALIGN_SIZE(sizeof(Dep_module_expr::Value_iter)); + + +/* + A Unique key module + - Unique key has all of its components as arguments + - Once unique key is bound, its table value is known +*/ + +class Dep_module_key: public Dep_module +{ +public: + Dep_module_key(Dep_value_table *table_arg, uint keyno_arg, uint n_parts_arg) : + table(table_arg), keyno(keyno_arg), next_table_key(NULL) + { + unbound_args= n_parts_arg; + } + Dep_value_table *table; /* Table this key is from */ + uint keyno; /* The index we're representing */ + /* Unique keys form a linked list, ordered by keyno */ + Dep_module_key *next_table_key; + + Iterator init_unbound_values_iter(char *buf); + Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter); + static const size_t iterator_size; +private: + class Value_iter + { + public: + Dep_value_table *table; + }; +}; + +const size_t Dep_module_key::iterator_size= + ALIGN_SIZE(sizeof(Dep_module_key::Value_iter)); + + +/* + A unique pseudo-key module for a derived table. + For example, a derived table + "SELECT a, count(*) from t1 GROUP BY a" + has unique values in its first field "a" due to GROUP BY expression + so this can be considered as a unique key for this derived table +*/ + +class Dep_module_pseudo_key : public Dep_module +{ +public: + Dep_module_pseudo_key(Dep_value_table *table_arg, + MY_BITMAP *exposed_fields, + uint exposed_fields_num) + : table(table_arg), exposed_fields_map(exposed_fields) + { + unbound_args= exposed_fields_num; + } + + Dep_value_table *table; + + Iterator init_unbound_values_iter(char *buf) override; + + Dep_value *get_next_unbound_value(Dep_analysis_context *dac, + Iterator iter) override; + + bool covers_field(int field_index); + + static const size_t iterator_size; + +private: + /* + Bitmap of field numbers in the derived table's SELECT list + which are included in the GROUP BY expression. + For example, unique pseudo-key for SQL + "SELECT count(*), b, a FROM t1 GROUP BY a, b" + will include two elements: {2} and {1}, since "a" and "b" are on the + GROUP BY list and also are present on the SELECT list with numbers 2 and 1 + (numeration starts from 0). + */ + MY_BITMAP *exposed_fields_map; + + class Value_iter + { + public: + Dep_value_table *table; + }; +}; + +const size_t Dep_module_pseudo_key::iterator_size= + ALIGN_SIZE(sizeof(Dep_module_pseudo_key::Value_iter)); + +const size_t Dep_module::iterator_size= + MY_MAX(Dep_module_expr::iterator_size, + MY_MAX(Dep_module_key::iterator_size, + Dep_module_pseudo_key::iterator_size)); + +/* + A module that represents outer join that we're trying to eliminate. If we + manage to declare this module to be bound, then outer join can be eliminated. +*/ + +class Dep_module_goal: public Dep_module +{ +public: + Dep_module_goal(uint n_children) + { + unbound_args= n_children; + } + bool is_final() { return TRUE; } + /* + This is the goal module, so the running wave algorithm should terminate + once it sees that this module is applicable and should never try to apply + it, hence no use for unbound value iterator implementation. + */ + Iterator init_unbound_values_iter(char *buf) + { + DBUG_ASSERT(0); + return NULL; + } + Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter) + { + DBUG_ASSERT(0); + return NULL; + } +}; + + +/* + Functional dependency analyzer context +*/ +class Dep_analysis_context +{ +public: + bool setup_equality_modules_deps(List *bound_modules); + bool run_wave(List *new_bound_modules); + + /* Tables that we're looking at eliminating */ + table_map usable_tables; + + /* Array of equality dependencies */ + Dep_module_expr *equality_mods; + uint n_equality_mods; /* Number of elements in the array */ + uint n_equality_mods_alloced; + + /* tablenr -> Dep_value_table* mapping. */ + Dep_value_table *table_deps[MAX_KEY]; + + /* Element for the outer join we're attempting to eliminate */ + Dep_module_goal *outer_join_dep; + + /* + Bitmap of how expressions depend on bits. Given a Dep_value_field object, + one can check bitmap_is_set(expr_deps, field_val->bitmap_offset + expr_no) + to see if expression equality_mods[expr_no] depends on the given field. + */ + MY_BITMAP expr_deps; + + Dep_value_table *create_table_value(TABLE_LIST *table_list); + Dep_value_field *get_field_value(Field *field); + +#ifndef DBUG_OFF + void dbug_print_deps(); +#endif + +private: + void create_unique_pseudo_key_if_needed(TABLE_LIST *table_list, + Dep_value_table *tbl_dep); + int find_field_in_list(List &fields_list, Item *field); +}; + + +void eliminate_tables(JOIN *join); + +static bool +eliminate_tables_for_list(JOIN *join, + List *join_list, + table_map tables_in_list, + Item *on_expr, + table_map tables_used_elsewhere, + Json_writer_array* trace_eliminate_tables); +static +bool check_func_dependency(JOIN *join, + table_map dep_tables, + List_iterator *it, + TABLE_LIST *oj_tbl, + Item* cond); +static +void build_eq_mods_for_cond(THD *thd, Dep_analysis_context *dac, + Dep_module_expr **eq_mod, uint *and_level, + Item *cond); +static +void check_equality(Dep_analysis_context *dac, Dep_module_expr **eq_mod, + uint and_level, Item_bool_func *cond, + Item *left, Item *right); +static +Dep_module_expr *merge_eq_mods(Dep_module_expr *start, + Dep_module_expr *new_fields, + Dep_module_expr *end, uint and_level); +static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl, + Json_writer_array* trace_eliminate_tables); +static +void add_module_expr(Dep_analysis_context *dac, Dep_module_expr **eq_mod, + uint and_level, Dep_value_field *field_val, Item *right, + List* mult_equal_fields); + + +/*****************************************************************************/ + +/* + Perform table elimination + + SYNOPSIS + eliminate_tables() + join Join to work on + + DESCRIPTION + This is the entry point for table elimination. Grep for MODULE INTERFACE + section in this file for calling convention. + + The idea behind table elimination is that if we have an outer join: + + SELECT * FROM t1 LEFT JOIN + (t2 JOIN t3) ON t2.primary_key=t1.col AND + t3.primary_key=t2.col + such that + + 1. columns of the inner tables are not used anywhere ouside the outer + join (not in WHERE, not in GROUP/ORDER BY clause, not in select list + etc etc), and + 2. inner side of the outer join is guaranteed to produce at most one + record combination for each record combination of outer tables. + + then the inner side of the outer join can be removed from the query. + This is because it will always produce one matching record (either a + real match or a NULL-complemented record combination), and since there + are no references to columns of the inner tables anywhere, it doesn't + matter which record combination it was. + + This function primary handles checking #1. It collects a bitmap of + tables that are not used in select list/GROUP BY/ORDER BY/HAVING/etc and + thus can possibly be eliminated. + + After this, if #1 is met, the function calls eliminate_tables_for_list() + that checks #2. + + SIDE EFFECTS + See the OVERVIEW section at the top of this file. + +*/ + +void eliminate_tables(JOIN *join) +{ + THD* thd= join->thd; + Item *item; + table_map used_tables; + DBUG_ENTER("eliminate_tables"); + + DBUG_ASSERT(join->eliminated_tables == 0); + + /* If there are no outer joins, we have nothing to eliminate: */ + if (!join->outer_join) + DBUG_VOID_RETURN; + + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_TABLE_ELIMINATION)) + DBUG_VOID_RETURN; /* purecov: inspected */ + + Json_writer_object trace_wrapper(thd); + + /* Find the tables that are referred to from WHERE/HAVING */ + used_tables= (join->conds? join->conds->used_tables() : 0) | + (join->having? join->having->used_tables() : 0); + + /* + For "INSERT ... SELECT ... ON DUPLICATE KEY UPDATE column = val" + we should also take into account tables mentioned in "val". + */ + if (join->thd->lex->sql_command == SQLCOM_INSERT_SELECT && + join->select_lex == thd->lex->first_select_lex()) + { + List_iterator val_it(thd->lex->value_list); + while ((item= val_it++)) + { + DBUG_ASSERT(item->fixed()); + used_tables |= item->used_tables(); + } + } + + /* Add tables referred to from the select list */ + List_iterator it(join->fields_list); + while ((item= it++)) + used_tables |= item->used_tables(); + + { + /* + Table function JSON_TABLE() can have references to other tables. Do not + eliminate the tables that JSON_TABLE() refers to. + Note: the JSON_TABLE itself cannot be eliminated as it doesn't + have unique keys. + */ + List_iterator it(join->select_lex->leaf_tables); + TABLE_LIST *tbl; + while ((tbl= it++)) + { + if (tbl->table_function) + used_tables|= tbl->table_function->used_tables(); + } + } + + /* Add tables referred to from ORDER BY and GROUP BY lists */ + ORDER *all_lists[]= { join->order, join->group_list}; + for (int i=0; i < 2; i++) + { + for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next) + used_tables |= (*(cur_list->item))->used_tables(); + } + + if (join->select_lex == thd->lex->first_select_lex()) + { + + /* Multi-table UPDATE: don't eliminate tables referred from SET statement */ + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) + { + /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */ + used_tables |= thd->table_map_for_update; + List_iterator it2(thd->lex->value_list); + while ((item= it2++)) + used_tables |= item->used_tables(); + } + + if (thd->lex->sql_command == SQLCOM_DELETE_MULTI) + { + TABLE_LIST *tbl; + for (tbl= (TABLE_LIST*)thd->lex->auxiliary_table_list.first; + tbl; tbl= tbl->next_local) + { + used_tables |= tbl->table->map; + } + } + } + + table_map all_tables= join->all_tables_map(); + Json_writer_array trace_eliminated_tables(thd,"eliminated_tables"); + if (all_tables & ~used_tables) + { + /* There are some tables that we probably could eliminate. Try it. */ + eliminate_tables_for_list(join, join->join_list, all_tables, NULL, + used_tables, &trace_eliminated_tables); + } + DBUG_VOID_RETURN; +} + + +/* + Perform table elimination in a given join list + + SYNOPSIS + eliminate_tables_for_list() + join The join we're working on + join_list Join list to eliminate tables from (and if + on_expr !=NULL, then try eliminating join_list + itself) + list_tables Bitmap of tables embedded in the join_list. + on_expr ON expression, if the join list is the inner side + of an outer join. + NULL means it's not an outer join but rather a + top-level join list. + tables_used_elsewhere Bitmap of tables that are referred to from + somewhere outside of the join list (e.g. + select list, HAVING, other ON expressions, etc). + + DESCRIPTION + Perform table elimination in a given join list: + - First, walk through join list members and try doing table elimination for + them. + - Then, if the join list itself is an inner side of outer join + (on_expr!=NULL), then try to eliminate the entire join list. + + See "HANDLING MULTIPLE NESTED OUTER JOINS" section at the top of this file + for more detailed description and justification. + + RETURN + TRUE The entire join list eliminated + FALSE Join list wasn't eliminated (but some of its child outer joins + possibly were) +*/ + +static bool +eliminate_tables_for_list(JOIN *join, List *join_list, + table_map list_tables, Item *on_expr, + table_map tables_used_elsewhere, + Json_writer_array *trace_eliminate_tables) +{ + TABLE_LIST *tbl; + List_iterator it(*join_list); + table_map tables_used_on_left= 0; + bool all_eliminated= TRUE; + + while ((tbl= it++)) + { + if (tbl->on_expr) + { + table_map outside_used_tables= tables_used_elsewhere | + tables_used_on_left; + if (on_expr) + outside_used_tables |= on_expr->used_tables(); + if (tbl->nested_join) + { + /* This is "... LEFT JOIN (join_nest) ON cond" */ + if (eliminate_tables_for_list(join, + &tbl->nested_join->join_list, + tbl->nested_join->used_tables, + tbl->on_expr, + outside_used_tables, + trace_eliminate_tables)) + { + mark_as_eliminated(join, tbl, trace_eliminate_tables); + } + else + all_eliminated= FALSE; + } + else + { + /* This is "... LEFT JOIN tbl ON cond" */ + if (!(tbl->table->map & outside_used_tables) && + check_func_dependency(join, tbl->table->map, NULL, tbl, + tbl->on_expr)) + { + mark_as_eliminated(join, tbl, trace_eliminate_tables); + } + else + all_eliminated= FALSE; + } + tables_used_on_left |= tbl->on_expr->used_tables(); + } + else + { + DBUG_ASSERT(!tbl->nested_join || tbl->sj_on_expr); + //psergey-todo: is the following really correct or we'll need to descend + //down all ON clauses: ? + if (tbl->sj_on_expr) + tables_used_on_left |= tbl->sj_on_expr->used_tables(); + } + } + + /* Try eliminating the nest we're called for */ + if (all_eliminated && on_expr && !(list_tables & tables_used_elsewhere)) + { + it.rewind(); + return check_func_dependency(join, list_tables & ~join->eliminated_tables, + &it, NULL, on_expr); + } + return FALSE; /* not eliminated */ +} + + +/* + Check if given condition makes given set of tables functionally dependent + + SYNOPSIS + check_func_dependency() + join Join we're procesing + dep_tables Tables that we check to be functionally dependent (on + everything else) + it Iterator that enumerates these tables, or NULL if we're + checking one single table and it is specified in oj_tbl + parameter. + oj_tbl NULL, or one single table that we're checking + cond Condition to use to prove functional dependency + + DESCRIPTION + Check if we can use given condition to infer that the set of given tables + is functionally dependent on everything else. + + RETURN + TRUE - Yes, functionally dependent + FALSE - No, or error +*/ + +static +bool check_func_dependency(JOIN *join, + table_map dep_tables, + List_iterator *it, + TABLE_LIST *oj_tbl, + Item* cond) +{ + Dep_analysis_context dac; + + /* + Pre-alloc some Dep_module_expr structures. We don't need this to be + guaranteed upper bound. + */ + dac.n_equality_mods_alloced= + join->thd->lex->current_select->max_equal_elems + + (join->thd->lex->current_select->cond_count+1)*2 + + join->thd->lex->current_select->between_count; + + bzero(dac.table_deps, sizeof(dac.table_deps)); + if (!(dac.equality_mods= new Dep_module_expr[dac.n_equality_mods_alloced])) + return FALSE; /* purecov: inspected */ + + Dep_module_expr* last_eq_mod= dac.equality_mods; + + /* Create Dep_value_table objects for all tables we're trying to eliminate */ + if (oj_tbl) + { + if (!dac.create_table_value(oj_tbl)) + return FALSE; /* purecov: inspected */ + } + else + { + TABLE_LIST *tbl; + while ((tbl= (*it)++)) + { + if (tbl->table && (tbl->table->map & dep_tables)) + { + if (!dac.create_table_value(tbl)) + return FALSE; /* purecov: inspected */ + } + } + } + dac.usable_tables= dep_tables; + + /* + Analyze the the ON expression and create Dep_module_expr objects and + Dep_value_field objects for the used fields. + */ + uint and_level=0; + build_eq_mods_for_cond(join->thd, &dac, &last_eq_mod, &and_level, cond); + if (!(dac.n_equality_mods= (uint)(last_eq_mod - dac.equality_mods))) + return FALSE; /* No useful conditions */ + + List bound_modules; + + if (!(dac.outer_join_dep= new Dep_module_goal(my_count_bits(dep_tables))) || + dac.setup_equality_modules_deps(&bound_modules)) + { + return FALSE; /* OOM, default to non-dependent */ /* purecov: inspected */ + } + + DBUG_EXECUTE("test", dac.dbug_print_deps(); ); + + return dac.run_wave(&bound_modules); +} + + +/* + Running wave functional dependency check algorithm + + SYNOPSIS + Dep_analysis_context::run_wave() + new_bound_modules List of bound modules to start the running wave from. + The list is destroyed during execution + + DESCRIPTION + This function uses running wave algorithm to check if the join nest is + functionally-dependent. + We start from provided list of bound modules, and then run the wave across + dependency edges, trying the reach the Dep_module_goal module. If we manage + to reach it, then the join nest is functionally-dependent, otherwise it is + not. + + RETURN + TRUE Yes, functionally dependent + FALSE No. +*/ + +bool Dep_analysis_context::run_wave(List *new_bound_modules) +{ + List new_bound_values; + + Dep_value *value; + Dep_module *module; + + while (!new_bound_modules->is_empty()) + { + /* + The "wave" is in new_bound_modules list. Iterate over values that can be + reached from these modules but are not yet bound, and collect the next + wave generation in new_bound_values list. + */ + List_iterator modules_it(*new_bound_modules); + while ((module= modules_it++)) + { + char iter_buf[Dep_module::iterator_size + ALIGN_MAX_UNIT]; + Dep_module::Iterator iter; + iter= module->init_unbound_values_iter(iter_buf); + while ((value= module->get_next_unbound_value(this, iter))) + { + if (!value->is_bound()) + { + value->make_bound(); + new_bound_values.push_back(value); + } + } + } + new_bound_modules->empty(); + + /* + Now walk over list of values we've just found to be bound and check which + unbound modules can be reached from them. If there are some modules that + became bound, collect them in new_bound_modules list. + */ + List_iterator value_it(new_bound_values); + while ((value= value_it++)) + { + char iter_buf[Dep_value::iterator_size + ALIGN_MAX_UNIT]; + Dep_value::Iterator iter; + iter= value->init_unbound_modules_iter(iter_buf); + while ((module= value->get_next_unbound_module(this, iter))) + { + module->touch(); + if (!module->is_applicable()) + continue; + if (module->is_final()) + return TRUE; /* Functionally dependent */ + module->make_bound(); + new_bound_modules->push_back(module); + } + } + new_bound_values.empty(); + } + return FALSE; +} + + +/* + This is used to analyze expressions in "tbl.col=expr" dependencies so + that we can figure out which fields the expression depends on. +*/ + +class Field_dependency_recorder : public Field_enumerator +{ +public: + Field_dependency_recorder(Dep_analysis_context *ctx_arg): ctx(ctx_arg) + {} + + void visit_field(Item_field *item) + { + Field *field= item->field; + Dep_value_table *tbl_dep; + if ((tbl_dep= ctx->table_deps[field->table->tablenr])) + { + for (Dep_value_field *field_dep= tbl_dep->fields; field_dep; + field_dep= field_dep->next_table_field) + { + if (field->field_index == field_dep->field->field_index) + { + uint offs= field_dep->bitmap_offset + expr_offset; + if (!bitmap_is_set(&ctx->expr_deps, offs)) + ctx->equality_mods[expr_offset].unbound_args++; + bitmap_set_bit(&ctx->expr_deps, offs); + return; + } + } + /* + We got here if didn't find this field. It's not a part of + a unique key, and/or there is no field=expr element for it. + Bump the dependency anyway, this will signal that this dependency + cannot be satisfied. + */ + ctx->equality_mods[expr_offset].unbound_args++; + } + else + visited_other_tables= TRUE; + } + + Dep_analysis_context *ctx; + /* Offset of the expression we're processing in the dependency bitmap */ + uint expr_offset; + + bool visited_other_tables; +}; + + + + +/* + Setup inbound dependency relationships for tbl.col=expr equalities + + SYNOPSIS + setup_equality_modules_deps() + bound_deps_list Put here modules that were found not to depend on + any non-bound columns. + + DESCRIPTION + Setup inbound dependency relationships for tbl.col=expr equalities: + - allocate a bitmap where we store such dependencies + - for each "tbl.col=expr" equality, analyze the expr part and find out + which fields it refers to and set appropriate dependencies. + + RETURN + FALSE OK + TRUE Out of memory +*/ + +bool Dep_analysis_context::setup_equality_modules_deps(List + *bound_modules) +{ + THD *thd= current_thd; + DBUG_ENTER("setup_equality_modules_deps"); + + /* + Count Dep_value_field objects and assign each of them a unique + bitmap_offset value. + */ + uint offset= 0; + for (Dep_value_table **tbl_dep= table_deps; + tbl_dep < table_deps + MAX_TABLES; + tbl_dep++) + { + if (*tbl_dep) + { + for (Dep_value_field *field_dep= (*tbl_dep)->fields; + field_dep; + field_dep= field_dep->next_table_field) + { + field_dep->bitmap_offset= offset; + offset += n_equality_mods; + } + } + } + + void *buf; + if (!(buf= thd->alloc(bitmap_buffer_size(offset))) || + my_bitmap_init(&expr_deps, (my_bitmap_map*)buf, offset)) + { + DBUG_RETURN(TRUE); /* purecov: inspected */ + } + bitmap_clear_all(&expr_deps); + + /* + Analyze all "field=expr" dependencies, and have expr_deps encode + dependencies of expressions from fields. + + Also collect a linked list of equalities that are bound. + */ + Field_dependency_recorder deps_recorder(this); + for (Dep_module_expr *eq_mod= equality_mods; + eq_mod < equality_mods + n_equality_mods; + eq_mod++) + { + deps_recorder.expr_offset= (uint)(eq_mod - equality_mods); + deps_recorder.visited_other_tables= FALSE; + eq_mod->unbound_args= 0; + + if (eq_mod->field) + { + /* Regular tbl.col=expr(tblX1.col1, tblY1.col2, ...) */ + eq_mod->expr->walk(&Item::enumerate_field_refs_processor, FALSE, + &deps_recorder); + } + else + { + /* It's a multi-equality */ + eq_mod->unbound_args= !MY_TEST(eq_mod->expr); + List_iterator it(*eq_mod->mult_equal_fields); + Dep_value_field* field_val; + while ((field_val= it++)) + { + uint offs= (uint)(field_val->bitmap_offset + eq_mod - equality_mods); + bitmap_set_bit(&expr_deps, offs); + } + } + + if (!eq_mod->unbound_args) + bound_modules->push_back(eq_mod, thd->mem_root); + } + + DBUG_RETURN(FALSE); +} + + +/* + Ordering that we're using whenever we need to maintain a no-duplicates list + of field value objects. +*/ + +static +int compare_field_values(Dep_value_field *a, Dep_value_field *b, void *unused) +{ + uint a_ratio= a->field->table->tablenr*MAX_FIELDS + + a->field->field_index; + + uint b_ratio= b->field->table->tablenr*MAX_FIELDS + + b->field->field_index; + return (a_ratio < b_ratio)? 1 : ((a_ratio == b_ratio)? 0 : -1); +} + + +/* + Produce Dep_module_expr elements for given condition. + + SYNOPSIS + build_eq_mods_for_cond() + ctx Table elimination context + eq_mod INOUT Put produced equality conditions here + and_level INOUT AND-level (like in add_key_fields) + cond Condition to process + + DESCRIPTION + Analyze the given condition and produce an array of Dep_module_expr + dependencies from it. The idea of analysis is as follows: + There are useful equalities that have form + + eliminable_tbl.field = expr (denote as useful_equality) + + The condition is composed of useful equalities and other conditions that + are combined together with AND and OR operators. We process the condition + in recursive fashion according to these basic rules: + + useful_equality1 AND useful_equality2 -> make array of two + Dep_module_expr objects + + useful_equality AND other_cond -> discard other_cond + + useful_equality OR other_cond -> discard everything + + useful_equality1 OR useful_equality2 -> check if both sides of OR are the + same equality. If yes, that's the + result, otherwise discard + everything. + + The rules are used to map the condition into an array Dep_module_expr + elements. The array will specify functional dependencies that logically + follow from the condition. + + SEE ALSO + This function is modeled after add_key_fields() +*/ + +static +void build_eq_mods_for_cond(THD *thd, Dep_analysis_context *ctx, + Dep_module_expr **eq_mod, + uint *and_level, Item *cond) +{ + if (cond->type() == Item_func::COND_ITEM) + { + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + size_t orig_offset= *eq_mod - ctx->equality_mods; + + /* AND/OR */ + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + Item *item; + while ((item=li++)) + build_eq_mods_for_cond(thd, ctx, eq_mod, and_level, item); + + for (Dep_module_expr *mod_exp= ctx->equality_mods + orig_offset; + mod_exp != *eq_mod ; mod_exp++) + { + mod_exp->level= *and_level; + } + } + else + { + Item *item; + (*and_level)++; + build_eq_mods_for_cond(thd, ctx, eq_mod, and_level, li++); + while ((item=li++)) + { + Dep_module_expr *start_key_fields= *eq_mod; + (*and_level)++; + build_eq_mods_for_cond(thd, ctx, eq_mod, and_level, item); + *eq_mod= merge_eq_mods(ctx->equality_mods + orig_offset, + start_key_fields, *eq_mod, + ++(*and_level)); + } + } + return; + } + + if (cond->type() != Item::FUNC_ITEM) + return; + + Item_func *cond_func= (Item_func*) cond; + Item **args= cond_func->arguments(); + + switch (cond_func->functype()) { + case Item_func::BETWEEN: + { + Item *fld; + Item_func_between *func= (Item_func_between *) cond_func; + if (!func->negated && + (fld= args[0]->real_item())->type() == Item::FIELD_ITEM && + args[1]->eq(args[2], ((Item_field*)fld)->field->binary())) + { + check_equality(ctx, eq_mod, *and_level, func, args[0], args[1]); + check_equality(ctx, eq_mod, *and_level, func, args[1], args[0]); + } + break; + } + case Item_func::EQ_FUNC: + case Item_func::EQUAL_FUNC: + { + Item_bool_rowready_func2 *func= (Item_bool_rowready_func2*) cond_func; + check_equality(ctx, eq_mod, *and_level, func, args[0], args[1]); + check_equality(ctx, eq_mod, *and_level, func, args[1], args[0]); + break; + } + case Item_func::ISNULL_FUNC: + { + Item *tmp=new (thd->mem_root) Item_null(thd); + if (tmp) + check_equality(ctx, eq_mod, *and_level, + (Item_func_isnull*) cond_func, args[0], tmp); + break; + } + case Item_func::MULT_EQUAL_FUNC: + { + /* + The condition is a + + tbl1.field1 = tbl2.field2 = tbl3.field3 [= const_expr] + + multiple-equality. Do two things: + - Collect List of tblX.colY where tblX is one of the + tables we're trying to eliminate. + - rembember if there was a bound value, either const_expr or tblY.colZ + swher tblY is not a table that we're trying to eliminate. + Store all collected information in a Dep_module_expr object. + */ + Item_equal *item_equal= (Item_equal*)cond; + List *fvl; + if (!(fvl= new List)) + break; /* purecov: inspected */ + + Item_equal_fields_iterator it(*item_equal); + Item *item; + Item *bound_item= item_equal->get_const(); + while ((item= it++)) + { + Field *equal_field= it.get_curr_field(); + if ((item->used_tables() & ctx->usable_tables)) + { + Dep_value_field *field_val; + if ((field_val= ctx->get_field_value(equal_field))) + fvl->push_back(field_val, thd->mem_root); + } + else + { + if (!bound_item) + bound_item= item; + } + } + /* + Multiple equality is only useful if it includes at least one field from + the table that we could potentially eliminate: + */ + if (fvl->elements) + { + + bubble_sort(fvl, compare_field_values, NULL); + add_module_expr(ctx, eq_mod, *and_level, NULL, bound_item, fvl); + } + break; + } + default: + break; + } +} + + +/* + Perform an OR operation on two (adjacent) Dep_module_expr arrays. + + SYNOPSIS + merge_eq_mods() + start Start of left OR-part + new_fields Start of right OR-part + end End of right OR-part + and_level AND-level (like in add_key_fields) + + DESCRIPTION + This function is invoked for two adjacent arrays of Dep_module_expr elements: + + $LEFT_PART $RIGHT_PART + +-----------------------+-----------------------+ + start new_fields end + + The goal is to produce an array which would correspond to the combined + + $LEFT_PART OR $RIGHT_PART + + condition. This is achieved as follows: First, we apply distrubutive law: + + (fdep_A_1 AND fdep_A_2 AND ...) OR (fdep_B_1 AND fdep_B_2 AND ...) = + + = AND_ij (fdep_A_[i] OR fdep_B_[j]) + + Then we walk over the obtained "fdep_A_[i] OR fdep_B_[j]" pairs, and + - Discard those that that have left and right part referring to different + columns. We can't infer anything useful from "col1=expr1 OR col2=expr2". + - When left and right parts refer to the same column, we check if they are + essentially the same. + = If they are the same, we keep one copy + "t.col=expr OR t.col=expr" -> "t.col=expr + = if they are different , then we discard both + "t.col=expr1 OR t.col=expr2" -> (nothing useful) + + (no per-table or for-index FUNC_DEPS exist yet at this phase). + + See also merge_key_fields(). + + RETURN + End of the result array +*/ + +static +Dep_module_expr *merge_eq_mods(Dep_module_expr *start, + Dep_module_expr *new_fields, + Dep_module_expr *end, uint and_level) +{ + if (start == new_fields) + return start; /* (nothing) OR (...) -> (nothing) */ + if (new_fields == end) + return start; /* (...) OR (nothing) -> (nothing) */ + + Dep_module_expr *first_free= new_fields; + + for (; new_fields != end ; new_fields++) + { + for (Dep_module_expr *old=start ; old != first_free ; old++) + { + if (old->field == new_fields->field) + { + if (!old->field) + { + /* + OR-ing two multiple equalities. We must compute an intersection of + used fields, and check the constants according to these rules: + + a=b=c=d OR a=c=e=f -> a=c (compute intersection) + a=const1 OR a=b -> (nothing) + a=const1 OR a=const1 -> a=const1 + a=const1 OR a=const2 -> (nothing) + + If we're performing an OR operation over multiple equalities, e.g. + + (a=b=c AND p=q) OR (a=b AND v=z) + + then we'll need to try combining each equality with each. ANDed + equalities are guaranteed to be disjoint, so we'll only get one + hit. + */ + Field *eq_field= old->mult_equal_fields->head()->field; + if (old->expr && new_fields->expr && + old->expr->eq_by_collation(new_fields->expr, eq_field->binary(), + eq_field->charset())) + { + /* Ok, keep */ + } + else + { + /* no single constant/bound item. */ + old->expr= NULL; + } + + List *fv; + if (!(fv= new List)) + break; /* purecov: inspected */ + + List_iterator it1(*old->mult_equal_fields); + List_iterator it2(*new_fields->mult_equal_fields); + Dep_value_field *lfield= it1++; + Dep_value_field *rfield= it2++; + /* Intersect two ordered lists */ + while (lfield && rfield) + { + if (lfield == rfield) + { + fv->push_back(lfield); + lfield=it1++; + rfield=it2++; + } + else + { + if (compare_field_values(lfield, rfield, NULL) < 0) + lfield= it1++; + else + rfield= it2++; + } + } + + if (fv->elements + MY_TEST(old->expr) > 1) + { + old->mult_equal_fields= fv; + old->level= and_level; + } + } + else if (!new_fields->expr->const_item()) + { + /* + If the value matches, we can use the key reference. + If not, we keep it until we have examined all new values + */ + if (old->expr->eq(new_fields->expr, + old->field->field->binary())) + { + old->level= and_level; + } + } + else if (old->expr->eq_by_collation(new_fields->expr, + old->field->field->binary(), + old->field->field->charset())) + { + old->level= and_level; + } + else + { + /* The expressions are different. */ + if (old == --first_free) // If last item + break; + *old= *first_free; // Remove old value + old--; // Retry this value + } + } + } + } + + /* + Ok, the results are within the [start, first_free) range, and the useful + elements have level==and_level. Now, remove all unusable elements: + */ + for (Dep_module_expr *old=start ; old != first_free ;) + { + if (old->level != and_level) + { // Not used in all levels + if (old == --first_free) + break; + *old= *first_free; // Remove old value + continue; + } + old++; + } + return first_free; +} + + +/* + Add an Dep_module_expr element for left=right condition + + SYNOPSIS + check_equality() + fda Table elimination context + eq_mod INOUT Store created Dep_module_expr here and increment ptr if + you do so + and_level AND-level (like in add_key_fields) + cond Condition we've inferred the left=right equality from. + left Left expression + right Right expression + usable_tables Create Dep_module_expr only if Left_expression's table + belongs to this set. + + DESCRIPTION + Check if the passed left=right equality is such that + - 'left' is an Item_field referring to a field in a table we're checking + to be functionally depdendent, + - the equality allows to conclude that 'left' expression is functionally + dependent on the 'right', + and if so, create an Dep_module_expr object. +*/ + +static +void check_equality(Dep_analysis_context *ctx, Dep_module_expr **eq_mod, + uint and_level, Item_bool_func *cond, + Item *left, Item *right) +{ + if ((left->used_tables() & ctx->usable_tables) && + !(right->used_tables() & RAND_TABLE_BIT) && + left->real_item()->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field*)left->real_item())->field; + if (field->can_optimize_outer_join_table_elimination(cond, right) != + Data_type_compatibility::OK) + return; + Dep_value_field *field_val; + if ((field_val= ctx->get_field_value(field))) + add_module_expr(ctx, eq_mod, and_level, field_val, right, NULL); + } +} + + +/* + Add a Dep_module_expr object with the specified parameters. + + DESCRIPTION + Add a Dep_module_expr object with the specified parameters. Re-allocate + the ctx->equality_mods array if it has no space left. +*/ + +static +void add_module_expr(Dep_analysis_context *ctx, Dep_module_expr **eq_mod, + uint and_level, Dep_value_field *field_val, + Item *right, List* mult_equal_fields) +{ + if (*eq_mod == ctx->equality_mods + ctx->n_equality_mods_alloced) + { + /* + We've filled the entire equality_mods array. Replace it with a bigger + one. We do it somewhat inefficiently but it doesn't matter. + */ + /* purecov: begin inspected */ + Dep_module_expr *new_arr; + if (!(new_arr= new Dep_module_expr[ctx->n_equality_mods_alloced *2])) + return; + ctx->n_equality_mods_alloced *= 2; + for (int i= 0; i < *eq_mod - ctx->equality_mods; i++) + new_arr[i]= ctx->equality_mods[i]; + + ctx->equality_mods= new_arr; + *eq_mod= new_arr + (*eq_mod - ctx->equality_mods); + /* purecov: end */ + } + + (*eq_mod)->field= field_val; + (*eq_mod)->expr= right; + (*eq_mod)->level= and_level; + (*eq_mod)->mult_equal_fields= mult_equal_fields; + (*eq_mod)++; +} + + +/* + Create a Dep_value_table object for the given table + + SYNOPSIS + Dep_analysis_context::create_table_value() + table Table to create object for + + DESCRIPTION + Create a Dep_value_table object for the given table. Also create + Dep_module_key objects for all unique keys in the table. + Create a unique pseudo-key if this table is derived and has + a GROUP BY expression. + + RETURN + Created table value object + NULL if out of memory +*/ + +Dep_value_table * +Dep_analysis_context::create_table_value(TABLE_LIST *table_list) +{ + Dep_value_table *tbl_dep; + if (!(tbl_dep= new Dep_value_table(table_list->table))) + return NULL; /* purecov: inspected */ + + Dep_module_key **key_list= &(tbl_dep->keys); + /* Add dependencies for unique keys */ + for (uint i= 0; i < table_list->table->s->keys; i++) + { + KEY *key= table_list->table->key_info + i; + if (key->flags & HA_NOSAME) + { + Dep_module_key *key_dep; + if (!(key_dep= new Dep_module_key(tbl_dep, i, + key->user_defined_key_parts))) + return NULL; + *key_list= key_dep; + key_list= &(key_dep->next_table_key); + } + } + + create_unique_pseudo_key_if_needed(table_list, tbl_dep); + return table_deps[table_list->table->tablenr]= tbl_dep; +} + + +/* + @brief + Check if we can create a unique pseudo-key for the passed table. + If we can, create a dependency for it + + @detail + Currently, pseudo-key is created for the list of GROUP BY columns. + + TODO: also it can be created if the query uses + - SELECT DISTINCT + - UNION DISTINCT (not UNION ALL) +*/ + +void Dep_analysis_context::create_unique_pseudo_key_if_needed( + TABLE_LIST *table_list, Dep_value_table *tbl_dep) +{ + auto select_unit= table_list->get_unit(); + SELECT_LEX *first_select= nullptr; + if (select_unit) + { + first_select= select_unit->first_select(); + + /* + Exclude UNION (ALL) queries from consideration by checking + next_select() == nullptr + */ + if (unlikely(select_unit->first_select()->next_select())) + first_select= nullptr; + } + + /* + GROUP BY expression is considered as a unique pseudo-key + for the derived table. Add this pseudo key as a dependency. + + first_select->join is NULL for degenerate derived tables + which are known to have just one row and so were already materialized + by the optimizer, check this here + */ + if (first_select && first_select->join && + first_select->group_list.elements > 0) + { + auto max_possible_elements= first_select->join->fields_list.elements; + void *buf; + MY_BITMAP *exposed_fields= (MY_BITMAP*) + current_thd->alloc(sizeof(MY_BITMAP)); + if (!(buf= current_thd->alloc(bitmap_buffer_size(max_possible_elements))) || + my_bitmap_init(exposed_fields, (my_bitmap_map*)buf, + max_possible_elements)) + // Memory allocation failed + return; + bitmap_clear_all(exposed_fields); + uint exposed_fields_count= 0; + + bool valid= true; + for (auto cur_group= first_select->group_list.first; + cur_group; + cur_group= cur_group->next) + { + auto elem= *(cur_group->item); + /* + Make sure GROUP BY elements contain only fields + and no functions or other expressions + */ + if (elem->type() != Item::FIELD_ITEM) + { + valid= false; + break; + } + auto field_no= find_field_in_list(first_select->join->fields_list, elem); + if (field_no == -1) + { + /* + This GROUP BY element is not present in the select list. This is a + case like this: + (SELECT a FROM t1 GROUP by a,b) as TBL + Here, the combination of (a,b) is unique, but the select doesn't + include "b". "a" alone is not unique, so TBL doesn't have a unique + pseudo-key. + */ + valid= false; + break; + } + bitmap_set_bit(exposed_fields, field_no); + exposed_fields_count++; + } + if (valid) + { + Dep_module_pseudo_key *pseudo_key; + pseudo_key= new Dep_module_pseudo_key(tbl_dep, exposed_fields, + exposed_fields_count); + tbl_dep->pseudo_key= pseudo_key; + } + } +} + + +/* + Iterate the list of fields and look for the given field. + Returns the index of the field if it is found on the list + and -1 otherwise +*/ + +int Dep_analysis_context::find_field_in_list(List &fields_list, + Item *field) +{ + List_iterator it(fields_list); + int field_idx= 0; + while (auto next_field= it++) + { + if (next_field->eq(field, false)) + return field_idx; + field_idx++; + } + return -1; /*not found*/ +} + + +/* + Get a Dep_value_field object for the given field, creating it if necessary + + SYNOPSIS + Dep_analysis_context::get_field_value() + field Field to create object for + + DESCRIPTION + Get a Dep_value_field object for the given field. First, we search for it + in the list of Dep_value_field objects we have already created. If we don't + find it, we create a new Dep_value_field and put it into the list of field + objects we have for the table. + + RETURN + Created field value object + NULL if out of memory +*/ + +Dep_value_field *Dep_analysis_context::get_field_value(Field *field) +{ + TABLE *table= field->table; + Dep_value_table *tbl_dep= table_deps[table->tablenr]; + + /* Try finding the field in field list */ + Dep_value_field **pfield= &(tbl_dep->fields); + while (*pfield && (*pfield)->field->field_index < field->field_index) + { + pfield= &((*pfield)->next_table_field); + } + if (*pfield && (*pfield)->field->field_index == field->field_index) + return *pfield; + + /* Create the field and insert it in the list */ + Dep_value_field *new_field= new Dep_value_field(tbl_dep, field); + new_field->next_table_field= *pfield; + *pfield= new_field; + + return new_field; +} + + +/* + Iteration over unbound modules that are our dependencies. + for those we have: + - dependendencies of our fields + - outer join we're in +*/ +char *Dep_value_table::init_unbound_modules_iter(char *buf) +{ + Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter); + iter->field_dep= fields; + if (fields) + { + fields->init_unbound_modules_iter(iter->buf); + fields->make_unbound_modules_iter_skip_keys(iter->buf); + } + iter->returned_goal= FALSE; + return (char*)iter; +} + + +Dep_module* +Dep_value_table::get_next_unbound_module(Dep_analysis_context *dac, + char *iter) +{ + Module_iter *di= (Module_iter*)iter; + while (di->field_dep) + { + Dep_module *res; + if ((res= di->field_dep->get_next_unbound_module(dac, di->buf))) + return res; + if ((di->field_dep= di->field_dep->next_table_field)) + { + char *field_iter= ((Module_iter*)iter)->buf; + di->field_dep->init_unbound_modules_iter(field_iter); + di->field_dep->make_unbound_modules_iter_skip_keys(field_iter); + } + } + + if (!di->returned_goal) + { + di->returned_goal= TRUE; + return dac->outer_join_dep; + } + return NULL; +} + + +char *Dep_module_expr::init_unbound_values_iter(char *buf) +{ + Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter); + iter->field= field; + if (!field) + { + new (&iter->it) List_iterator(*mult_equal_fields); + } + return (char*)iter; +} + + +Dep_value* Dep_module_expr::get_next_unbound_value(Dep_analysis_context *dac, + char *buf) +{ + Dep_value *res; + if (field) + { + res= ((Value_iter*)buf)->field; + ((Value_iter*)buf)->field= NULL; + return (!res || res->is_bound())? NULL : res; + } + else + { + while ((res= ((Value_iter*)buf)->it++)) + { + if (!res->is_bound()) + return res; + } + return NULL; + } +} + + +char *Dep_module_key::init_unbound_values_iter(char *buf) +{ + Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter); + iter->table= table; + return (char*)iter; +} + + +Dep_value* Dep_module_key::get_next_unbound_value(Dep_analysis_context *dac, + Dep_module::Iterator iter) +{ + Dep_value* res= ((Value_iter*)iter)->table; + ((Value_iter*)iter)->table= NULL; + return res; +} + + +char *Dep_module_pseudo_key::init_unbound_values_iter(char *buf) +{ + Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter); + iter->table= table; + return (char *) iter; +} + +Dep_value * +Dep_module_pseudo_key::get_next_unbound_value(Dep_analysis_context *dac, + Dep_module::Iterator iter) +{ + Dep_value *res= ((Value_iter *) iter)->table; + ((Value_iter *) iter)->table= NULL; + return res; +} + + +/* + Check if column number field_no is covered by the pseudo-key. +*/ + +bool Dep_module_pseudo_key::covers_field(int field_no) +{ + return bitmap_is_set(exposed_fields_map, field_no) > 0; +} + + +Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf) +{ + Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter); + iter->key_dep= table->keys; + iter->equality_no= 0; + iter->pseudo_key_dep= table->pseudo_key; + return (char*)iter; +} + + +void +Dep_value_field::make_unbound_modules_iter_skip_keys(Dep_value::Iterator iter) +{ + ((Module_iter*) iter)->key_dep= NULL; + ((Module_iter*) iter)->pseudo_key_dep= NULL; +} + + +Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac, + Dep_value::Iterator iter) +{ + Module_iter *di= (Module_iter*)iter; + Dep_module_key *key_dep= di->key_dep; + + /* + First, enumerate all unique keys that are + - not yet applicable + - have this field as a part of them + */ + while (key_dep && (key_dep->is_applicable() || + !field->part_of_key_not_clustered.is_set(key_dep->keyno))) + { + key_dep= key_dep->next_table_key; + } + + if (key_dep) + { + di->key_dep= key_dep->next_table_key; + return key_dep; + } + else + di->key_dep= NULL; + + Dep_module_pseudo_key *pseudo_key_dep= di->pseudo_key_dep; + if (pseudo_key_dep && !pseudo_key_dep->is_applicable() && + pseudo_key_dep->covers_field(field->field_index)) + { + di->pseudo_key_dep= NULL; + return pseudo_key_dep; + } + else + di->pseudo_key_dep= NULL; + + /* + Then walk through [multi]equalities and find those that + - depend on this field + - and are not bound yet. + */ + uint eq_no= di->equality_no; + while (eq_no < dac->n_equality_mods && + (!bitmap_is_set(&dac->expr_deps, bitmap_offset + eq_no) || + dac->equality_mods[eq_no].is_applicable())) + { + eq_no++; + } + + if (eq_no < dac->n_equality_mods) + { + di->equality_no= eq_no+1; + return &dac->equality_mods[eq_no]; + } + return NULL; +} + + +/* + Mark one table or the whole join nest as eliminated. +*/ + +static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl, + Json_writer_array* trace_eliminate_tables) +{ + TABLE *table; + /* + NOTE: there are TABLE_LIST object that have + tbl->table!= NULL && tbl->nested_join!=NULL and + tbl->table == tbl->nested_join->join_list->element(..)->table + */ + if (tbl->nested_join) + { + TABLE_LIST *child; + List_iterator it(tbl->nested_join->join_list); + while ((child= it++)) + mark_as_eliminated(join, child, trace_eliminate_tables); + } + else if ((table= tbl->table)) + { + JOIN_TAB *tab= tbl->table->reginfo.join_tab; + if (!(join->const_table_map & tab->table->map)) + { + DBUG_PRINT("info", ("Eliminated table %s", table->alias.c_ptr())); + tab->type= JT_CONST; + tab->table->const_table= 1; + join->eliminated_tables |= table->map; + trace_eliminate_tables->add(table->alias.c_ptr_safe()); + join->const_table_map|= table->map; + set_position(join, join->const_tables++, tab, (KEYUSE*)0); + } + } + + if (tbl->on_expr) + tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL); +} + +#ifndef DBUG_OFF +/* purecov: begin inspected */ +void Dep_analysis_context::dbug_print_deps() +{ + DBUG_ENTER("dbug_print_deps"); + DBUG_LOCK_FILE; + + fprintf(DBUG_FILE,"deps {\n"); + + /* Start with printing equalities */ + for (Dep_module_expr *eq_mod= equality_mods; + eq_mod != equality_mods + n_equality_mods; eq_mod++) + { + char buf[128]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); + eq_mod->expr->print(&str, QT_ORDINARY); + if (eq_mod->field) + { + fprintf(DBUG_FILE, " equality%ld: %s -> %s.%s\n", + (long)(eq_mod - equality_mods), + str.c_ptr(), + eq_mod->field->table->table->alias.c_ptr(), + eq_mod->field->field->field_name.str); + } + else + { + fprintf(DBUG_FILE, " equality%ld: multi-equality", + (long)(eq_mod - equality_mods)); + } + } + fprintf(DBUG_FILE,"\n"); + + /* Then tables and their fields */ + for (uint i=0; i < MAX_TABLES; i++) + { + Dep_value_table *table_dep; + if ((table_dep= table_deps[i])) + { + /* Print table */ + fprintf(DBUG_FILE, " table %s\n", table_dep->table->alias.c_ptr()); + /* Print fields */ + for (Dep_value_field *field_dep= table_dep->fields; field_dep; + field_dep= field_dep->next_table_field) + { + fprintf(DBUG_FILE, " field %s.%s ->", + table_dep->table->alias.c_ptr(), + field_dep->field->field_name.str); + uint ofs= field_dep->bitmap_offset; + for (uint bit= ofs; bit < ofs + n_equality_mods; bit++) + { + if (bitmap_is_set(&expr_deps, bit)) + fprintf(DBUG_FILE, " equality%d ", bit - ofs); + } + fprintf(DBUG_FILE, "\n"); + } + } + } + fprintf(DBUG_FILE,"\n}\n"); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} +/* purecov: end */ + +#endif +/** + @} (end of group Table_Elimination) +*/ + diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc new file mode 100644 index 00000000..4bc49394 --- /dev/null +++ b/sql/opt_trace.cc @@ -0,0 +1,770 @@ +/* This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mariadb.h" +#include "sql_array.h" +#include "sql_string.h" +#include "sql_class.h" +#include "sql_show.h" +#include "field.h" +#include "sql_i_s.h" +#include "opt_trace.h" +#include "sql_parse.h" +#include "set_var.h" +#include "my_json_writer.h" +#include "sp_head.h" + +#include "rowid_filter.h" + +const char I_S_table_name[]= "OPTIMIZER_TRACE"; + +/** + Whether a list of tables contains information_schema.OPTIMIZER_TRACE. + @param tbl list of tables + + Can we do better than this here?? + @note this does not catch that a stored routine or view accesses + the OPTIMIZER_TRACE table. So using a stored routine or view to read + OPTIMIZER_TRACE will overwrite OPTIMIZER_TRACE as it runs and provide + uninteresting info. +*/ +bool list_has_optimizer_trace_table(const TABLE_LIST *tbl) +{ + for (; tbl; tbl= tbl->next_global) + { + if (tbl->schema_table && + 0 == strcmp(tbl->schema_table->table_name, I_S_table_name)) + return true; + } + return false; +} + +/* + Returns if a query has a set command with optimizer_trace being switched on/off. + True: Don't trace the query(uninteresting) +*/ + +bool sets_var_optimizer_trace(enum enum_sql_command sql_command, + List *set_vars) +{ + if (sql_command == SQLCOM_SET_OPTION) + { + List_iterator_fast it(*set_vars); + const set_var_base *var; + while ((var= it++)) + if (var->is_var_optimizer_trace()) return true; + } + return false; +} + + +namespace Show { + + +ST_FIELD_INFO optimizer_trace_info[]= +{ + Column("QUERY", Longtext(65535), NOT_NULL), + Column("TRACE", Longtext(65535), NOT_NULL), + Column("MISSING_BYTES_BEYOND_MAX_MEM_SIZE", SLong(20), NOT_NULL), + Column("INSUFFICIENT_PRIVILEGES", STiny(1), NOT_NULL), + CEnd() +}; + +} // namespace Show + + +/* + TODO: one-line needs to be implemented seperately +*/ +const char *Opt_trace_context::flag_names[]= {"enabled", "default", + NullS}; + +/* + Returns if a particular command will be traced or not +*/ + +inline bool sql_command_can_be_traced(enum enum_sql_command sql_command) +{ + /* + For first iteration we are only allowing select queries. + TODO: change to allow other queries. + */ + return sql_command == SQLCOM_SELECT || + sql_command == SQLCOM_UPDATE || + sql_command == SQLCOM_DELETE || + sql_command == SQLCOM_DELETE_MULTI || + sql_command == SQLCOM_UPDATE_MULTI; +} + +void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex, + Json_writer_object *writer) + +{ + DBUG_ASSERT(thd->trace_started()); + + StringBuffer<1024> str(system_charset_info); + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + select_lex->print(thd, &str, + enum_query_type(QT_TO_SYSTEM_CHARSET | + QT_SHOW_SELECT_NUMBER | + QT_ITEM_IDENT_SKIP_DB_NAMES | + QT_VIEW_INTERNAL)); + thd->variables.option_bits= save_option_bits; + /* + The output is not very pretty lots of back-ticks, the output + is as the one in explain extended , lets try to improved it here. + */ + writer->add("expanded_query", str.c_ptr_safe(), str.length()); +} + +void opt_trace_disable_if_no_security_context_access(THD *thd) +{ + if (likely(!(thd->variables.optimizer_trace & + Opt_trace_context::FLAG_ENABLED)) || // (1) + thd->system_thread) // (2) + { + /* + (1) We know that the routine's execution starts with "enabled=off". + If it stays so until the routine ends, we needn't do security checks on + the routine. + If it does not stay so, it means the definer sets it to "on" somewhere + in the routine's body. Then it is his conscious decision to generate + traces, thus it is still correct to skip the security check. + + (2) Threads of the Events Scheduler have an unusual security context + (thd->m_main_security_ctx.priv_user==NULL, see comment in + Security_context::change_security_context()). + */ + return; + } + Opt_trace_context *const trace= &thd->opt_trace; + if (!thd->trace_started()) + { + /* + @@optimizer_trace has "enabled=on" but trace is not started. + Either Opt_trace_start ctor was not called for our statement (3), or it + was called but at that time, the variable had "enabled=off" (4). + + There are no known cases of (3). + + (4) suggests that the user managed to change the variable during + execution of the statement, and this statement is using + view/routine (note that we have not been able to provoke this, maybe + this is impossible). If it happens it is suspicious. + + We disable I_S output. And we cannot do otherwise: we have no place to + store a possible "missing privilege" information (no Opt_trace_stmt, as + is_started() is false), so cannot do security checks, so cannot safely + do tracing, so have to disable I_S output. And even then, we don't know + when to re-enable I_S output, as we have no place to store the + information "re-enable tracing at the end of this statement", and we + don't even have a notion of statement here (statements in the optimizer + trace world mean an Opt_trace_stmt object, and there is none here). So + we must disable for the session's life. + + COM_FIELD_LIST opens views, thus used to be a case of (3). To avoid + disabling I_S output for the session's life when this command is issued + (like in: "SET OPTIMIZER_TRACE='ENABLED=ON';USE somedb;" in the 'mysql' + command-line client), we have decided to create a Opt_trace_start for + this command. The command itself is not traced though + (SQLCOM_SHOW_FIELDS does not have CF_OPTIMIZER_TRACE). + */ + return; + } + /* + Note that thd->main_security_ctx.master_access is probably invariant + accross the life of THD: GRANT/REVOKE don't affect global privileges of an + existing connection, per the manual. + */ + if (!(thd->main_security_ctx.check_access(GLOBAL_ACLS & ~GRANT_ACL)) && + (0 != strcmp(thd->main_security_ctx.priv_user, + thd->security_context()->priv_user) || + 0 != my_strcasecmp(system_charset_info, + thd->main_security_ctx.priv_host, + thd->security_context()->priv_host))) + trace->missing_privilege(); +} + +void opt_trace_disable_if_no_stored_proc_func_access(THD *thd, sp_head *sp) +{ + if (likely(!(thd->variables.optimizer_trace & + Opt_trace_context::FLAG_ENABLED)) || + thd->system_thread || + !thd->trace_started()) + return; + + Opt_trace_context *const trace= &thd->opt_trace; + bool full_access; + Security_context *const backup_thd_sctx= thd->security_context(); + thd->set_security_context(&thd->main_security_ctx); + const bool rc= check_show_routine_access(thd, sp, &full_access) || !full_access; + thd->set_security_context(backup_thd_sctx); + if (rc) + trace->missing_privilege(); +} + +/** + If tracing is on, checks additional privileges on a list of tables/views, + to make sure that the user has the right to do SHOW CREATE TABLE/VIEW and + "SELECT *". For that: + - this functions checks table-level SELECT + - which is sufficient for SHOW CREATE TABLE and "SELECT *", if a base table + - if a view, if the view has not been identified as such then + opt_trace_disable_if_no_view_access() will be later called and check SHOW + VIEW; other we check SHOW VIEW here; SHOW VIEW + SELECT is sufficient for + SHOW CREATE VIEW. + If a privilege is missing, notifies the trace system. + + @param thd + @param tbl list of tables to check +*/ + +void opt_trace_disable_if_no_tables_access(THD *thd, TABLE_LIST *tbl) +{ + if (likely(!(thd->variables.optimizer_trace & + Opt_trace_context::FLAG_ENABLED)) || + thd->system_thread || + !thd->trace_started()) + return; + + Opt_trace_context *const trace= &thd->opt_trace; + Security_context *const backup_thd_sctx= thd->security_context(); + thd->set_security_context(&thd->main_security_ctx); + const TABLE_LIST *const first_not_own_table= thd->lex->first_not_own_table(); + for (TABLE_LIST *t= tbl; t != NULL && t != first_not_own_table; + t= t->next_global) + { + /* + Anonymous derived tables (as in + "SELECT ... FROM (SELECT ...)") and table functions + don't have their grant.privilege set. + */ + if (!t->is_anonymous_derived_table() && + !t->table_function) + { + const GRANT_INFO backup_grant_info= t->grant; + Security_context *const backup_table_sctx= t->security_ctx; + t->security_ctx= NULL; + /* + (1) check_table_access() fills t->grant.privilege. + (2) Because SELECT privileges can be column-based, + check_table_access() will return 'false' as long as there is SELECT + privilege on one column. But we want a table-level privilege. + */ + + bool rc = + check_table_access(thd, SELECT_ACL, t, false, 1, true) || // (1) + ((t->grant.privilege & SELECT_ACL) == NO_ACL); // (2) + if (t->is_view()) + { + /* + It's a view which has already been opened: we are executing a + prepared statement. The view has been unfolded in the global list of + tables. So underlying tables will be automatically checked in the + present function, but we need an explicit check of SHOW VIEW: + */ + rc |= check_table_access(thd, SHOW_VIEW_ACL, t, false, 1, true); + } + t->security_ctx= backup_table_sctx; + t->grant= backup_grant_info; + if (rc) + { + trace->missing_privilege(); + break; + } + } + } + thd->set_security_context(backup_thd_sctx); + return; +} + +void opt_trace_disable_if_no_view_access(THD *thd, TABLE_LIST *view, + TABLE_LIST *underlying_tables) +{ + + if (likely(!(thd->variables.optimizer_trace & + Opt_trace_context::FLAG_ENABLED)) || + thd->system_thread || + !thd->trace_started()) + return; + + Opt_trace_context *const trace= &thd->opt_trace; + Security_context *const backup_table_sctx= view->security_ctx; + Security_context *const backup_thd_sctx= thd->security_context(); + const GRANT_INFO backup_grant_info= view->grant; + + view->security_ctx= NULL; // no SUID context for view + // no SUID context for THD + thd->set_security_context(&thd->main_security_ctx); + const int rc= check_table_access(thd, SHOW_VIEW_ACL, view, false, 1, true); + + view->security_ctx= backup_table_sctx; + thd->set_security_context(backup_thd_sctx); + view->grant= backup_grant_info; + + if (rc) + { + trace->missing_privilege(); + return; + } + /* + We needn't check SELECT privilege on this view. Some + opt_trace_disable_if_no_tables_access() call has or will check it. + + Now we check underlying tables/views of our view: + */ + opt_trace_disable_if_no_tables_access(thd, underlying_tables); + return; +} + + +/** + @class Opt_trace_stmt + + The trace of one statement. +*/ + +Opt_trace_stmt::Opt_trace_stmt(Opt_trace_context *ctx_arg) +{ + ctx= ctx_arg; + current_json= new Json_writer(); + missing_priv= false; + I_S_disabled= 0; +} + +Opt_trace_stmt::~Opt_trace_stmt() +{ + delete current_json; +} + +size_t Opt_trace_stmt::get_length() +{ + return current_json->output.length(); +} + +size_t Opt_trace_stmt::get_truncated_bytes() +{ + return current_json->get_truncated_bytes(); +} + +void Opt_trace_stmt::set_query(const char *query_ptr, size_t length, + const CHARSET_INFO *charset) +{ + query.append(query_ptr, length, charset); +} + +void Opt_trace_context::missing_privilege() +{ + if (current_trace) + current_trace->missing_privilege(); +} + +void Opt_trace_context::set_allowed_mem_size(size_t mem_size) +{ + current_trace->set_allowed_mem_size(mem_size); +} + +/* + TODO: In future when we would be saving multiple trace, + this function would return + max_mem_size - memory_occupied_by_the_saved_traces +*/ + +size_t Opt_trace_context::remaining_mem_size() +{ + return max_mem_size; +} + +/* + Disable tracing for children if the current trace is already present. + Currently only one trace is stored and there is no mechanism + to restore traces, so disabling tracing for children is the best option. +*/ + +bool Opt_trace_context::disable_tracing_if_required() +{ + if (current_trace) + { + current_trace->disable_tracing_for_children(); + return true; + } + return false; +} + +bool Opt_trace_context::enable_tracing_if_required() +{ + if (current_trace) + { + current_trace->enable_tracing_for_children(); + return true; + } + return false; +} + +bool Opt_trace_context::is_enabled() +{ + if (current_trace) + return current_trace->is_enabled(); + return false; +} + +Opt_trace_context::Opt_trace_context() : traces(PSI_INSTRUMENT_MEM) +{ + current_trace= NULL; + max_mem_size= 0; +} +Opt_trace_context::~Opt_trace_context() +{ + delete_traces(); +} + +void Opt_trace_context::set_query(const char *query, size_t length, const CHARSET_INFO *charset) +{ + current_trace->set_query(query, length, charset); +} + +void Opt_trace_context::start(THD *thd, TABLE_LIST *tbl, + enum enum_sql_command sql_command, + const char *query, + size_t query_length, + const CHARSET_INFO *query_charset, + ulong max_mem_size_arg) +{ + /* + This is done currently because we don't want to have multiple + traces open at the same time, so as soon as a new trace is created + we forcefully end the previous one, if it has not ended by itself. + This would mostly happen with stored functions or procedures. + + TODO: handle multiple traces + */ + DBUG_ASSERT(!current_trace); + current_trace= new Opt_trace_stmt(this); + max_mem_size= max_mem_size_arg; + set_allowed_mem_size(remaining_mem_size()); +} + +void Opt_trace_context::end() +{ + if (current_trace) + traces.push(current_trace); + + if (!traces.elements()) + return; + if (traces.elements() > 1) + { + Opt_trace_stmt *prev= traces.at(0); + delete prev; + traces.del(0); + } + current_trace= NULL; +} + + +void Opt_trace_start::init(THD *thd, + TABLE_LIST *tbl, + enum enum_sql_command sql_command, + List *set_vars, + const char *query, + size_t query_length, + const CHARSET_INFO *query_charset) +{ + /* + if optimizer trace is enabled and the statment we have is traceable, + then we start the context. + */ + const ulonglong var= thd->variables.optimizer_trace; + traceable= FALSE; + if (unlikely(var & Opt_trace_context::FLAG_ENABLED) && + sql_command_can_be_traced(sql_command) && + !list_has_optimizer_trace_table(tbl) && + !sets_var_optimizer_trace(sql_command, set_vars) && + !thd->system_thread && + !ctx->disable_tracing_if_required()) + { + ctx->start(thd, tbl, sql_command, query, query_length, query_charset, + thd->variables.optimizer_trace_max_mem_size); + ctx->set_query(query, query_length, query_charset); + traceable= TRUE; + opt_trace_disable_if_no_tables_access(thd, tbl); + Json_writer *w= ctx->get_current_json(); + w->start_object(); + w->add_member("steps").start_array(); + } +} + +Opt_trace_start::~Opt_trace_start() +{ + if (traceable) + { + Json_writer *w= ctx->get_current_json(); + w->end_array(); + w->end_object(); + ctx->end(); + traceable= FALSE; + } + else + { + ctx->enable_tracing_if_required(); + } +} + +void Opt_trace_stmt::fill_info(Opt_trace_info* info) +{ + if (unlikely(info->missing_priv= get_missing_priv())) + { + info->trace_ptr= info->query_ptr= ""; + info->trace_length= info->query_length= 0; + info->query_charset= &my_charset_bin; + info->missing_bytes= 0; + } + else + { + info->trace_ptr= current_json->output.get_string()->ptr(); + info->trace_length= get_length(); + info->query_ptr= query.ptr(); + info->query_length= query.length(); + info->query_charset= query.charset(); + info->missing_bytes= get_truncated_bytes(); + info->missing_priv= get_missing_priv(); + } +} + +void Opt_trace_stmt::missing_privilege() +{ + missing_priv= true; +} + +void Opt_trace_stmt::disable_tracing_for_children() +{ + ++I_S_disabled; +} + +void Opt_trace_stmt::enable_tracing_for_children() +{ + if (I_S_disabled) + --I_S_disabled; +} + +void Opt_trace_stmt::set_allowed_mem_size(size_t mem_size) +{ + current_json->set_size_limit(mem_size); +} + +/* + Prefer this when you are iterating over JOIN_TABs +*/ + +void Json_writer::add_table_name(const JOIN_TAB *tab) +{ + DBUG_ASSERT(tab->join->thd->trace_started()); + if (tab != NULL) + { + char table_name_buffer[SAFE_NAME_LEN]; + if (tab->table && tab->table->derived_select_number) + { + /* Derived table name generation */ + size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1, + "", + tab->table->derived_select_number); + add_str(table_name_buffer, len); + } + else if (tab->bush_children) + { + JOIN_TAB *ctab= tab->bush_children->start; + size_t len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "", + ctab->emb_sj_nest->sj_subq_pred->get_identifier()); + add_str(table_name_buffer, len); + } + else + { + TABLE_LIST *real_table= tab->table->pos_in_table_list; + add_str(real_table->alias.str, real_table->alias.length); + } + } + else + DBUG_ASSERT(0); +} + +void Json_writer::add_table_name(const TABLE *table) +{ + add_str(table->pos_in_table_list->alias.str); +} + + +void trace_condition(THD * thd, const char *name, const char *transform_type, + Item *item, const char *table_name) +{ + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_cond(thd, transform_type); + trace_cond.add("condition", name); + if (table_name) + trace_cond.add("attached_to", table_name); + trace_cond.add("resulting_condition", item); +} + + +void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab) +{ + DBUG_ASSERT(thd->trace_started()); + Json_writer_object table_records(thd); + table_records.add_table_name(tab); + Json_writer_object table_rec(thd, "table_scan"); + table_rec.add("rows", tab->found_records) + .add("cost", tab->read_time); +} + + +/* + @brief + Add the tables inside a partial join to the optimizer trace + + @param join join handler + @param idx length of the partial QEP in 'join->positions' + @table_map map of all non-const tables of the join + + @note + This function is used during best_access_path to print the tables + inside the partial join that were considered doing the cost based + analysis of the various join orders. +*/ + +void trace_plan_prefix(JOIN *join, uint idx, table_map join_tables) +{ + THD *const thd= join->thd; + DBUG_ASSERT(thd->trace_started()); + + Json_writer_array plan_prefix(thd, "plan_prefix"); + for (uint i= 0; i < idx; i++) + { + TABLE_LIST *const tr= join->positions[i].table->tab_list; + if (!(tr->map & join_tables)) + plan_prefix.add_table_name(join->positions[i].table); + } +} + + +/* + Print the join order of all the tables for top level select. + + For example: + + select * from ot1 + where ot1.a IN (select it1.a from it1, it2 where it1.b=it2.a); + + So this function would print + ot1, ----> For select #1 +*/ + +void print_final_join_order(JOIN *join) +{ + DBUG_ASSERT(join->thd->trace_started()); + + Json_writer_object join_order(join->thd); + Json_writer_array best_order(join->thd, "best_join_order"); + JOIN_TAB *j; + uint i; + for (j= join->join_tab,i=0 ; i < join->top_join_tab_count; + i++, j++) + best_order.add_table_name(j); +} + + +void print_best_access_for_table(THD *thd, POSITION *pos, + enum join_type type) +{ + DBUG_ASSERT(thd->trace_started()); + + Json_writer_object obj(thd, "chosen_access_method"); + obj.add("type", type == JT_ALL ? "scan" : join_type_str[type]); + obj.add("records", pos->records_read); + obj.add("cost", pos->read_time); + obj.add("uses_join_buffering", pos->use_join_buffer); + if (pos->range_rowid_filter_info) + { + uint key_no= pos->range_rowid_filter_info->key_no; + obj.add("rowid_filter_key", + pos->table->table->key_info[key_no].name); + } +} + + +/* + Introduce enum_query_type flags parameter, maybe also allow + EXPLAIN also use this function. +*/ + +void Json_writer::add_str(Item *item) +{ + if (item) + { + THD *thd= current_thd; + StringBuffer<256> str(system_charset_info); + + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + item->print(&str, + enum_query_type(QT_TO_SYSTEM_CHARSET | QT_SHOW_SELECT_NUMBER + | QT_ITEM_IDENT_SKIP_DB_NAMES)); + thd->variables.option_bits= save_option_bits; + add_str(str.c_ptr_safe()); + } + else + add_null(); +} + +void Opt_trace_context::delete_traces() +{ + if (traces.elements()) + { + while (traces.elements()) + { + Opt_trace_stmt *prev= traces.at(0); + delete prev; + traces.del(0); + } + } +} + + +int fill_optimizer_trace_info(THD *thd, TABLE_LIST *tables, Item *) +{ + TABLE *table= tables->table; + Opt_trace_info info; + + /* get_values of trace, query , missing bytes and missing_priv + + @todo: Need an iterator here to walk over all the traces + */ + Opt_trace_context* ctx= &thd->opt_trace; + + if (!thd->opt_trace.empty()) + { + Opt_trace_stmt *stmt= ctx->get_top_trace(); + stmt->fill_info(&info); + + table->field[0]->store(info.query_ptr, static_cast(info.query_length), + info.query_charset); + table->field[1]->store(info.trace_ptr, static_cast(info.trace_length), + system_charset_info); + table->field[2]->store(info.missing_bytes, true); + table->field[3]->store(info.missing_priv, true); + // Store in IS + if (schema_table_store_record(thd, table)) + return 1; + } + return 0; +} diff --git a/sql/opt_trace.h b/sql/opt_trace.h new file mode 100644 index 00000000..1ee23a33 --- /dev/null +++ b/sql/opt_trace.h @@ -0,0 +1,218 @@ +#ifndef OPT_TRACE_INCLUDED +#define OPT_TRACE_INCLUDED +/* This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "opt_trace_context.h" // Opt_trace_context +#include "sql_lex.h" +#include "my_json_writer.h" +#include "sql_select.h" +class Item; +class THD; +struct TABLE_LIST; + +/* + User-visible information about a trace. +*/ + +struct Opt_trace_info +{ + /** + String containing trace. + If trace has been end()ed, this is 0-terminated, which is only to aid + debugging or unit testing; this property is not relied upon in normal + server usage. + If trace has not been ended, this is not 0-terminated. That rare case can + happen when a substatement reads OPTIMIZER_TRACE (at that stage, the top + statement is still executing so its trace is not ended yet, but may still + be read by the sub-statement). + */ + const char *trace_ptr; + size_t trace_length; + //// String containing original query. + const char *query_ptr; + size_t query_length; + const CHARSET_INFO *query_charset; ///< charset of query string + /** + How many bytes this trace is missing (for traces which were truncated + because of @@@@optimizer-trace-max-mem-size). + The trace is not extended beyond trace-max-mem-size. + */ + size_t missing_bytes; + /* + Whether user lacks privilege to see this trace. + If this is set to TRUE, then we return an empty trace + */ + bool missing_priv; +}; + +/** + Instantiate this class to start tracing a THD's actions (generally at a + statement's start), and to set the "original" query (not transformed, as + sent by client) for the new trace. Destructor will end the trace. + + @param thd the THD + @param tbl list of tables read/written by the statement. + @param sql_command SQL command being prepared or executed + @param set_vars what variables are set by this command (only used if + sql_command is SQLCOM_SET_OPTION) + @param query query + @param length query's length + @param charset charset which was used to encode this query +*/ + + +class Opt_trace_start +{ + public: + Opt_trace_start(THD *thd_arg): ctx(&thd_arg->opt_trace), traceable(false) {} + + void init(THD *thd, TABLE_LIST *tbl, + enum enum_sql_command sql_command, + List *set_vars, + const char *query, + size_t query_length, + const CHARSET_INFO *query_charset); + + ~Opt_trace_start(); + + private: + Opt_trace_context *const ctx; + /* + True: the query will be traced + False: otherwise + */ + bool traceable; +}; + +/** + Prints SELECT query to optimizer trace. It is not the original query (as in + @c Opt_trace_context::set_query()) but a printout of the parse tree + (Item-s). + @param thd the THD + @param select_lex query's parse tree + @param trace_object Json_writer object to which the query will be added +*/ +void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex, + Json_writer_object *trace_object); + +void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab); +void trace_plan_prefix(JOIN *join, uint idx, table_map join_tables); +void print_final_join_order(JOIN *join); +void print_best_access_for_table(THD *thd, POSITION *pos, + enum join_type type); + +void trace_condition(THD * thd, const char *name, const char *transform_type, + Item *item, const char *table_name= nullptr); + + +/* + Security related (need to add a proper comment here) +*/ + +/** + If the security context is not that of the connected user, inform the trace + system that a privilege is missing. With one exception: see below. + + @param thd + + This serves to eliminate the following issue. + Any information readable by a SELECT may theoretically end up in + the trace. And a SELECT may read information from other places than tables: + - from views (reading their bodies) + - from stored routines (reading their bodies) + - from files (reading their content), with LOAD_FILE() + - from the list of connections (reading their queries...), with + I_S.PROCESSLIST. + If the connected user has EXECUTE privilege on a routine which does a + security context change, the routine can retrieve information internally + (if allowed by the SUID context's privileges), and present only a portion + of it to the connected user. But with tracing on, all information is + possibly in the trace. So the connected user receives more information than + the routine's definer intended to provide. Fixing this issue would require + adding, near many privilege checks in the server, a new + optimizer-trace-specific check done against the connected user's context, + to verify that the connected user has the right to see the retrieved + information. + + Instead, our chosen simpler solution is that if we see a security context + change where SUID user is not the connected user, we disable tracing. With + only one safe exception: if the connected user has all global privileges + (because then she/he can find any information anyway). By "all global + privileges" we mean everything but WITH GRANT OPTION (that latter one isn't + related to information gathering). + + Read access to I_S.OPTIMIZER_TRACE by another user than the connected user + is restricted: @see fill_optimizer_trace_info(). +*/ +void opt_trace_disable_if_no_security_context_access(THD *thd); + +void opt_trace_disable_if_no_tables_access(THD *thd, TABLE_LIST *tbl); + +/** + If tracing is on, checks additional privileges for a view, to make sure + that the user has the right to do SHOW CREATE VIEW. For that: + - this function checks SHOW VIEW + - SELECT is tested in opt_trace_disable_if_no_tables_access() + - SELECT + SHOW VIEW is sufficient for SHOW CREATE VIEW. + We also check underlying tables. + If a privilege is missing, notifies the trace system. + This function should be called when the view's underlying tables have not + yet been merged. + + @param thd THD context + @param view view to check + @param underlying_tables underlying tables/views of 'view' + */ + +void opt_trace_disable_if_no_view_access(THD *thd, TABLE_LIST *view, + TABLE_LIST *underlying_tables); + +/** + If tracing is on, checks additional privileges on a stored routine, to make + sure that the user has the right to do SHOW CREATE PROCEDURE/FUNCTION. For + that, we use the same checks as in those SHOW commands. + If a privilege is missing, notifies the trace system. + + This function is not redundant with + opt_trace_disable_if_no_security_context_access(). + Indeed, for a SQL SECURITY INVOKER routine, there is no context change, but + we must still verify that the invoker can do SHOW CREATE. + + For triggers, see note in sp_head::execute_trigger(). + + @param thd + @param sp routine to check + */ +void opt_trace_disable_if_no_stored_proc_func_access(THD *thd, sp_head *sp); + +/** + Fills information_schema.OPTIMIZER_TRACE with rows (one per trace) + @retval 0 ok + @retval 1 error +*/ +int fill_optimizer_trace_info(THD *thd, TABLE_LIST *tables, Item *); + +#define OPT_TRACE_TRANSFORM(thd, object_level0, object_level1, \ + select_number, from, to) \ + Json_writer_object object_level0(thd); \ + Json_writer_object object_level1(thd, "transformation"); \ + object_level1.add_select_number(select_number).add("from", from).add("to", to); + +#define OPT_TRACE_VIEWS_TRANSFORM(thd, object_level0, object_level1, \ + derived, name, select_number, algorithm) \ + Json_writer_object trace_wrapper(thd); \ + Json_writer_object trace_derived(thd, derived); \ + trace_derived.add("table", name).add_select_number(select_number) \ + .add("algorithm", algorithm); +#endif diff --git a/sql/opt_trace_context.h b/sql/opt_trace_context.h new file mode 100644 index 00000000..f578a0c6 --- /dev/null +++ b/sql/opt_trace_context.h @@ -0,0 +1,135 @@ +#ifndef OPT_TRACE_CONTEXT_INCLUDED +#define OPT_TRACE_CONTEXT_INCLUDED + +#include "sql_array.h" + +class Opt_trace_context; +struct Opt_trace_info; +class Json_writer; + +class Opt_trace_stmt { + public: + /** + Constructor, starts a trace for information_schema and dbug. + @param ctx_arg context + */ + Opt_trace_stmt(Opt_trace_context *ctx_arg); + ~Opt_trace_stmt(); + void set_query(const char *query_ptr, size_t length, const CHARSET_INFO *charset); + void open_struct(const char *key, char opening_bracket); + void close_struct(const char *saved_key, char closing_bracket); + void fill_info(Opt_trace_info* info); + void add(const char *key, char *opening_bracket, size_t val_length); + Json_writer* get_current_json() {return current_json;} + void missing_privilege(); + void disable_tracing_for_children(); + void enable_tracing_for_children(); + bool is_enabled() + { + return I_S_disabled == 0; + } + void set_allowed_mem_size(size_t mem_size); + size_t get_length(); + size_t get_truncated_bytes(); + bool get_missing_priv() { return missing_priv; } + +private: + Opt_trace_context *ctx; + String query; // store the query sent by the user + Json_writer *current_json; // stores the trace + bool missing_priv; ///< whether user lacks privilege to see this trace + /* + 0 <=> this trace should be in information_schema. + !=0 tracing is disabled, this currently happens when we want to trace a + sub-statement. For now traces are only collect for the top statement + not for the sub-statments. + */ + uint I_S_disabled; +}; + + +class Opt_trace_context +{ +public: + Opt_trace_context(); + ~Opt_trace_context(); + + void start(THD *thd, TABLE_LIST *tbl, + enum enum_sql_command sql_command, + const char *query, + size_t query_length, + const CHARSET_INFO *query_charset, + ulong max_mem_size_arg); + void end(); + void set_query(const char *query, size_t length, const CHARSET_INFO *charset); + void delete_traces(); + void set_allowed_mem_size(size_t mem_size); + size_t remaining_mem_size(); + +private: + Opt_trace_stmt* top_trace() + { + return *(traces.front()); + } + +public: + + /* + This returns the top trace from the list of traces. This function + is used when we want to see the contents of the INFORMATION_SCHEMA.OPTIMIZER_TRACE + table. + */ + + Opt_trace_stmt* get_top_trace() + { + if (!traces.elements()) + return NULL; + return top_trace(); + } + + /* + This returns the current trace, to which we are still writing and has not been finished + */ + + Json_writer* get_current_json() + { + if (!is_started()) + return NULL; + return current_trace->get_current_json(); + } + + bool empty() + { + return static_cast(traces.elements()) == 0; + } + + bool is_started() + { + return current_trace && current_trace->is_enabled(); + } + + bool disable_tracing_if_required(); + + bool enable_tracing_if_required(); + + bool is_enabled(); + + void missing_privilege(); + + static const char *flag_names[]; + enum + { + FLAG_DEFAULT = 0, + FLAG_ENABLED = 1 << 0 + }; + +private: + /* + List of traces (currently it stores only 1 trace) + */ + Dynamic_array traces; + Opt_trace_stmt *current_trace; + size_t max_mem_size; +}; + +#endif /* OPT_TRACE_CONTEXT_INCLUDED */ diff --git a/sql/parse_file.cc b/sql/parse_file.cc new file mode 100644 index 00000000..f4aae130 --- /dev/null +++ b/sql/parse_file.cc @@ -0,0 +1,1004 @@ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + Text .frm files management routines +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "parse_file.h" +#include "unireg.h" // CREATE_MODE +#include "sql_table.h" // build_table_filename +#include "debug.h" +#include // EE_WRITE +#include +#include + +/* from sql_db.cc */ +extern long mysql_rm_arc_files(THD *thd, MY_DIR *dirp, const char *org_path); + + +/** + Write string with escaping. + + @param file IO_CACHE for record + @param val_s string for writing + + @retval + FALSE OK + @retval + TRUE error +*/ + +static my_bool +write_escaped_string(IO_CACHE *file, LEX_STRING *val_s) +{ + char *eos= val_s->str + val_s->length; + char *ptr= val_s->str; + + for (; ptr < eos; ptr++) + { + /* + Should be in sync with read_escaped_string() and + parse_quoted_escaped_string() + */ + switch(*ptr) { + case '\\': // escape character + if (my_b_write(file, (const uchar *)STRING_WITH_LEN("\\\\"))) + return TRUE; + break; + case '\n': // parameter value delimiter + if (my_b_write(file, (const uchar *)STRING_WITH_LEN("\\n"))) + return TRUE; + break; + case '\0': // problem for some string processing utilities + if (my_b_write(file, (const uchar *)STRING_WITH_LEN("\\0"))) + return TRUE; + break; + case 26: // problem for windows utilities (Ctrl-Z) + if (my_b_write(file, (const uchar *)STRING_WITH_LEN("\\z"))) + return TRUE; + break; + case '\'': // list of string delimiter + if (my_b_write(file, (const uchar *)STRING_WITH_LEN("\\\'"))) + return TRUE; + break; + default: + if (my_b_write(file, (const uchar *)ptr, 1)) + return TRUE; + } + } + return FALSE; +} + +static ulonglong view_algo_to_frm(ulonglong val) +{ + switch(val) + { + case VIEW_ALGORITHM_UNDEFINED: + return VIEW_ALGORITHM_UNDEFINED_FRM; + case VIEW_ALGORITHM_MERGE: + return VIEW_ALGORITHM_MERGE_FRM; + case VIEW_ALGORITHM_TMPTABLE: + return VIEW_ALGORITHM_TMPTABLE_FRM; + } + DBUG_ASSERT(0); /* Should never happen */ + return VIEW_ALGORITHM_UNDEFINED; +} + +static ulonglong view_algo_from_frm(ulonglong val) +{ + switch(val) + { + case VIEW_ALGORITHM_UNDEFINED_FRM: + return VIEW_ALGORITHM_UNDEFINED; + case VIEW_ALGORITHM_MERGE_FRM: + return VIEW_ALGORITHM_MERGE; + case VIEW_ALGORITHM_TMPTABLE_FRM: + return VIEW_ALGORITHM_TMPTABLE; + } + + /* + Early versions of MariaDB 5.2/5.3 had identical in-memory and frm values + Return input value. + */ + return val; +} + + +/** + Write parameter value to IO_CACHE. + + @param file pointer to IO_CACHE structure for writing + @param base pointer to data structure + @param parameter pointer to parameter descriptor + + @retval + FALSE OK + @retval + TRUE error +*/ + + +static my_bool +write_parameter(IO_CACHE *file, const uchar* base, File_option *parameter) +{ + char num_buf[20]; // buffer for numeric operations + // string for numeric operations + String num(num_buf, sizeof(num_buf), &my_charset_bin); + DBUG_ENTER("write_parameter"); + + switch (parameter->type) { + case FILE_OPTIONS_STRING: + { + LEX_STRING *val_s= (LEX_STRING *)(base + parameter->offset); + if (my_b_write(file, (const uchar *)val_s->str, val_s->length)) + DBUG_RETURN(TRUE); + break; + } + case FILE_OPTIONS_ESTRING: + { + if (write_escaped_string(file, (LEX_STRING *)(base + parameter->offset))) + DBUG_RETURN(TRUE); + break; + } + case FILE_OPTIONS_ULONGLONG: + case FILE_OPTIONS_VIEW_ALGO: + { + ulonglong val= *(ulonglong *)(base + parameter->offset); + + if (parameter->type == FILE_OPTIONS_VIEW_ALGO) + val= view_algo_to_frm(val); + + num.set(val, &my_charset_bin); + if (my_b_write(file, (const uchar *)num.ptr(), num.length())) + DBUG_RETURN(TRUE); + break; + } + case FILE_OPTIONS_TIMESTAMP: + { + /* string have to be allocated already */ + LEX_STRING *val_s= (LEX_STRING *)(base + parameter->offset); + // number of microseconds since Epoch, timezone-independent + my_hrtime_t tm= my_hrtime(); + // Paded to 19 characters for compatibility + val_s->length= snprintf(val_s->str, MICROSECOND_TIMESTAMP_BUFFER_SIZE, + "%019lld", tm.val); + DBUG_ASSERT(val_s->length == MICROSECOND_TIMESTAMP_BUFFER_SIZE-1); + if (my_b_write(file, (const uchar *)val_s->str, + PARSE_FILE_TIMESTAMPLENGTH)) + DBUG_RETURN(TRUE); + break; + } + case FILE_OPTIONS_STRLIST: + { + List_iterator_fast it(*((List*) + (base + parameter->offset))); + bool first= 1; + LEX_STRING *str; + while ((str= it++)) + { + // We need ' ' after string to detect list continuation + if ((!first && my_b_write(file, (const uchar *)STRING_WITH_LEN(" "))) || + my_b_write(file, (const uchar *)STRING_WITH_LEN("\'")) || + write_escaped_string(file, str) || + my_b_write(file, (const uchar *)STRING_WITH_LEN("\'"))) + { + DBUG_RETURN(TRUE); + } + first= 0; + } + break; + } + case FILE_OPTIONS_ULLLIST: + { + List_iterator_fast it(*((List*) + (base + parameter->offset))); + bool first= 1; + ulonglong *val; + while ((val= it++)) + { + num.set(*val, &my_charset_bin); + // We need ' ' after string to detect list continuation + if ((!first && my_b_write(file, (const uchar *)STRING_WITH_LEN(" "))) || + my_b_write(file, (const uchar *)num.ptr(), num.length())) + { + DBUG_RETURN(TRUE); + } + first= 0; + } + break; + } + default: + DBUG_ASSERT(0); // never should happened + } + DBUG_RETURN(FALSE); +} + + +/** + Write new .frm. + + @param dir directory where put .frm + @param file_name .frm file name + @param type .frm type string (VIEW, TABLE) + @param base base address for parameter reading (structure like + TABLE) + @param parameters parameters description + + @retval + FALSE OK + @retval + TRUE error +*/ + +my_bool +sql_create_definition_file(const LEX_CSTRING *dir, + const LEX_CSTRING *file_name, + const LEX_CSTRING *type, + uchar* base, File_option *parameters) +{ + File handler; + IO_CACHE file; + char path[FN_REFLEN+1]; // +1 to put temporary file name for sure + size_t path_end; + File_option *param; + DBUG_ENTER("sql_create_definition_file"); + DBUG_PRINT("enter", ("Dir: %s, file: %s, base %p", + dir ? dir->str : "", + file_name->str, base)); + + if (dir) + { + fn_format(path, file_name->str, dir->str, "", MY_UNPACK_FILENAME); + path_end= strlen(path); + } + else + { + /* + if not dir is passed, it means file_name is a full path, + including dir name, file name itself, and an extension, + and with unpack_filename() executed over it. + */ + path_end= strxnmov(path, sizeof(path) - 1, file_name->str, NullS) - path; + } + + // temporary file name + path[path_end]='~'; + path[path_end+1]= '\0'; + if ((handler= mysql_file_create(key_file_fileparser, + path, CREATE_MODE, O_RDWR | O_TRUNC, + MYF(MY_WME))) < 0) + { + DBUG_RETURN(TRUE); + } + + debug_crash_here("definition_file_after_create"); + + if (init_io_cache(&file, handler, 0, WRITE_CACHE, 0L, 0, MYF(MY_WME))) + goto err_w_file; + + // write header (file signature) + if (my_b_write(&file, (const uchar *)STRING_WITH_LEN("TYPE=")) || + my_b_write(&file, (const uchar *)type->str, type->length) || + my_b_write(&file, (const uchar *)STRING_WITH_LEN("\n"))) + goto err_w_cache; + + if (debug_simulate_error("definition_file_simulate_write_error", EE_WRITE)) + goto err_w_cache; + + // write parameters to temporary file + for (param= parameters; param->name.str; param++) + { + if (my_b_write(&file, (const uchar *)param->name.str, + param->name.length) || + my_b_write(&file, (const uchar *)STRING_WITH_LEN("=")) || + write_parameter(&file, base, param) || + my_b_write(&file, (const uchar *)STRING_WITH_LEN("\n"))) + goto err_w_cache; + } + + if (end_io_cache(&file)) + goto err_w_file; + + if (opt_sync_frm) { + if (mysql_file_sync(handler, MYF(MY_WME))) + goto err_w_file; + } + + if (mysql_file_close(handler, MYF(MY_WME))) + { + DBUG_RETURN(TRUE); + } + + path[path_end]='\0'; + + { + // rename temporary file + char path_to[FN_REFLEN]; + memcpy(path_to, path, path_end+1); + path[path_end]='~'; + if (mysql_file_rename(key_file_fileparser, path, path_to, MYF(MY_WME))) + { + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +err_w_cache: + end_io_cache(&file); +err_w_file: + mysql_file_close(handler, MYF(MY_WME)); + mysql_file_delete(key_file_fileparser, path, MYF(MY_WME)); + DBUG_RETURN(TRUE); +} + + +/* + Make a copy of a definition file with '-' added to the name + + @param org_name Original file name + @param new_name Pointer to a buff of FN_REFLEN. Will be updated to name of + backup file + @return 0 ok + @return 1 error +*/ + +int sql_backup_definition_file(const LEX_CSTRING *org_name, + LEX_CSTRING *new_name) +{ + char *new_name_buff= (char*) new_name->str; + new_name->length= org_name->length+1; + + memcpy(new_name_buff, org_name->str, org_name->length+1); + new_name_buff[org_name->length]= '-'; + new_name_buff[org_name->length+1]= 0; + return my_copy(org_name->str, new_name->str, MYF(MY_WME)); +} + +/* + Restore copy of a definition file + + @param org_name Name of backup file (ending with '-' or '~') + + @return 0 ok + @return 1 error +*/ + +int sql_restore_definition_file(const LEX_CSTRING *name) +{ + char new_name[FN_REFLEN+1]; + memcpy(new_name, name->str, name->length-1); + new_name[name->length-1]= 0; + return mysql_file_rename(key_file_fileparser, name->str, new_name, + MYF(MY_WME)); +} + + +/** + Renames a frm file (including backups) in same schema. + + @thd thread handler + @param schema name of given schema + @param old_name original file name + @param new_db new schema + @param new_name new file name + + @retval + 0 OK + @retval + 1 Error (only if renaming of frm failed) +*/ +my_bool rename_in_schema_file(THD *thd, + const char *schema, const char *old_name, + const char *new_db, const char *new_name) +{ + char old_path[FN_REFLEN + 1], new_path[FN_REFLEN + 1], arc_path[FN_REFLEN + 1]; + + build_table_filename(old_path, sizeof(old_path) - 1, + schema, old_name, reg_ext, 0); + build_table_filename(new_path, sizeof(new_path) - 1, + new_db, new_name, reg_ext, 0); + + if (mysql_file_rename(key_file_frm, old_path, new_path, MYF(MY_WME))) + return 1; + + /* check if arc_dir exists: disabled unused feature (see bug #17823). */ + build_table_filename(arc_path, sizeof(arc_path) - 1, schema, "arc", "", 0); + + { // remove obsolete 'arc' directory and files if any + MY_DIR *new_dirp; + if ((new_dirp = my_dir(arc_path, MYF(MY_DONT_SORT)))) + { + DBUG_PRINT("my",("Archive subdir found: %s", arc_path)); + (void) mysql_rm_arc_files(thd, new_dirp, arc_path); + } + } + return 0; +} + +/** + Prepare frm to parse (read to memory). + + @param file_name path & filename to .frm file + @param mem_root MEM_ROOT for buffer allocation + @param bad_format_errors send errors on bad content + + @note + returned pointer + 1 will be type of .frm + + @return + 0 - error + @return + parser object +*/ + +File_parser * +sql_parse_prepare(const LEX_CSTRING *file_name, MEM_ROOT *mem_root, + bool bad_format_errors) +{ + MY_STAT stat_info; + size_t len; + char *buff, *end, *sign; + File_parser *parser; + File file; + DBUG_ENTER("sql_parse_prepare"); + + if (!mysql_file_stat(key_file_fileparser, + file_name->str, &stat_info, MYF(MY_WME))) + { + DBUG_RETURN(0); + } + + MSAN_STAT_WORKAROUND(&stat_info); + + if (stat_info.st_size > INT_MAX-1) + { + my_error(ER_FPARSER_TOO_BIG_FILE, MYF(0), file_name->str); + DBUG_RETURN(0); + } + + if (!(parser= new(mem_root) File_parser)) + { + DBUG_RETURN(0); + } + + if (!(buff= (char*) alloc_root(mem_root, (size_t)(stat_info.st_size+1)))) + { + DBUG_RETURN(0); + } + + if ((file= mysql_file_open(key_file_fileparser, file_name->str, + O_RDONLY | O_SHARE, MYF(MY_WME))) < 0) + { + DBUG_RETURN(0); + } + + if ((len= mysql_file_read(file, (uchar *)buff, (size_t)stat_info.st_size, + MYF(MY_WME))) == MY_FILE_ERROR) + { + mysql_file_close(file, MYF(MY_WME)); + DBUG_RETURN(0); + } + + if (mysql_file_close(file, MYF(MY_WME))) + { + DBUG_RETURN(0); + } + + end= buff + len; + *end= '\0'; // barrier for more simple parsing + + // 7 = 5 (TYPE=) + 1 (letter at least of type name) + 1 ('\n') + if (len < 7 || + buff[0] != 'T' || + buff[1] != 'Y' || + buff[2] != 'P' || + buff[3] != 'E' || + buff[4] != '=') + goto frm_error; + + // skip signature; + parser->file_type.str= sign= buff + 5; + while (*sign >= 'A' && *sign <= 'Z' && sign < end) + sign++; + if (*sign != '\n') + goto frm_error; + parser->file_type.length= sign - parser->file_type.str; + // EOS for file signature just for safety + *sign= '\0'; + + parser->end= end; + parser->start= sign + 1; + parser->content_ok= 1; + + DBUG_RETURN(parser); + +frm_error: + if (bad_format_errors) + { + my_error(ER_FPARSER_BAD_HEADER, MYF(0), file_name->str); + DBUG_RETURN(0); + } + DBUG_RETURN(parser); // upper level have to check parser->ok() +} + + +/** + parse LEX_STRING. + + @param ptr pointer on string beginning + @param end pointer on symbol after parsed string end (still owned + by buffer and can be accessed + @param mem_root MEM_ROOT for parameter allocation + @param str pointer on string, where results should be stored + + @retval + 0 error + @retval + \# pointer on symbol after string +*/ + + +static const char * +parse_string(const char *ptr, const char *end, MEM_ROOT *mem_root, + LEX_STRING *str) +{ + // get string length + const char *eol= strchr(ptr, '\n'); + + if (eol >= end) + return 0; + + str->length= eol - ptr; + + if (!(str->str= strmake_root(mem_root, ptr, str->length))) + return 0; + return eol+1; +} + + +/** + read escaped string from ptr to eol in already allocated str. + + @param ptr pointer on string beginning + @param eol pointer on character after end of string + @param str target string + + @retval + FALSE OK + @retval + TRUE error +*/ + +my_bool +read_escaped_string(const char *ptr, const char *eol, LEX_STRING *str) +{ + char *write_pos= str->str; + + for (; ptr < eol; ptr++, write_pos++) + { + char c= *ptr; + if (c == '\\') + { + ptr++; + if (ptr >= eol) + return TRUE; + /* + Should be in sync with write_escaped_string() and + parse_quoted_escaped_string() + */ + switch(*ptr) { + case '\\': + *write_pos= '\\'; + break; + case 'n': + *write_pos= '\n'; + break; + case '0': + *write_pos= '\0'; + break; + case 'z': + *write_pos= 26; + break; + case '\'': + *write_pos= '\''; + break; + default: + return TRUE; + } + } + else + *write_pos= c; + } + str->str[str->length= write_pos-str->str]= '\0'; // just for safety + return FALSE; +} + + +/** + parse \\n delimited escaped string. + + @param ptr pointer on string beginning + @param end pointer on symbol after parsed string end (still owned + by buffer and can be accessed + @param mem_root MEM_ROOT for parameter allocation + @param str pointer on string, where results should be stored + + @retval + 0 error + @retval + \# pointer on symbol after string +*/ + + +const char * +parse_escaped_string(const char *ptr, const char *end, MEM_ROOT *mem_root, + LEX_CSTRING *str) +{ + const char *eol= strchr(ptr, '\n'); + + if (eol == 0 || eol >= end || + !(str->str= (char*) alloc_root(mem_root, (eol - ptr) + 1)) || + read_escaped_string(ptr, eol, (LEX_STRING*) str)) + return 0; + + return eol+1; +} + + +/** + parse '' delimited escaped string. + + @param ptr pointer on string beginning + @param end pointer on symbol after parsed string end (still owned + by buffer and can be accessed + @param mem_root MEM_ROOT for parameter allocation + @param str pointer on string, where results should be stored + + @retval + 0 error + @retval + \# pointer on symbol after string +*/ + +static const char * +parse_quoted_escaped_string(const char *ptr, const char *end, + MEM_ROOT *mem_root, LEX_STRING *str) +{ + const char *eol; + uint result_len= 0; + bool escaped= 0; + + // starting ' + if (*(ptr++) != '\'') + return 0; + + // find ending ' + for (eol= ptr; (*eol != '\'' || escaped) && eol < end; eol++) + { + if (!(escaped= (*eol == '\\' && !escaped))) + result_len++; + } + + // process string + if (eol >= end || + !(str->str= (char*) alloc_root(mem_root, result_len + 1)) || + read_escaped_string(ptr, eol, str)) + return 0; + + return eol+1; +} + + +/** + Parser for FILE_OPTIONS_ULLLIST type value. + + @param[in,out] ptr pointer to parameter + @param[in] end end of the configuration + @param[in] line pointer to the line beginning + @param[in] base base address for parameter writing (structure + like TABLE) + @param[in] parameter description + @param[in] mem_root MEM_ROOT for parameters allocation +*/ + +bool get_file_options_ulllist(const char *&ptr, const char *end, + const char *line, + uchar* base, File_option *parameter, + MEM_ROOT *mem_root) +{ + List *nlist= (List*)(base + parameter->offset); + ulonglong *num; + nlist->empty(); + // list parsing + while (ptr < end) + { + int not_used; + char *num_end= const_cast(end); + if (!(num= (ulonglong*)alloc_root(mem_root, sizeof(ulonglong))) || + nlist->push_back(num, mem_root)) + goto nlist_err; + *num= my_strtoll10(ptr, &num_end, ¬_used); + ptr= num_end; + switch (*ptr) { + case '\n': + goto end_of_nlist; + case ' ': + // we cant go over buffer bounds, because we have \0 at the end + ptr++; + break; + default: + goto nlist_err_w_message; + } + } + +end_of_nlist: + if (*(ptr++) != '\n') + goto nlist_err; + return FALSE; + +nlist_err_w_message: + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), parameter->name.str, line); +nlist_err: + return TRUE; +} + + +/** + parse parameters. + + @param base base address for parameter writing (structure like + TABLE) + @param mem_root MEM_ROOT for parameters allocation + @param parameters parameters description + @param required number of required parameters in above list. If the file + contains more parameters than "required", they will + be ignored. If the file contains less parameters + then "required", non-existing parameters will + remain their values. + @param hook hook called for unknown keys + @param hook_data some data specific for the hook + + @retval + FALSE OK + @retval + TRUE error +*/ + + +my_bool +File_parser::parse(uchar* base, MEM_ROOT *mem_root, + struct File_option *parameters, uint required, + Unknown_key_hook *hook) const +{ + uint first_param= 0, found= 0; + const char *ptr= start; + const char *eol; + LEX_STRING *str; + List *list; + DBUG_ENTER("File_parser::parse"); + + while (ptr < end && found < required) + { + const char *line= ptr; + if (*ptr == '#') + { + // it is comment + if (!(ptr= strchr(ptr, '\n'))) + { + my_error(ER_FPARSER_EOF_IN_COMMENT, MYF(0), line); + DBUG_RETURN(TRUE); + } + ptr++; + } + else + { + File_option *parameter= parameters+first_param, + *parameters_end= parameters+required; + size_t len= 0; + for (; parameter < parameters_end; parameter++) + { + len= parameter->name.length; + // check length + if (len < (size_t)(end-ptr) && ptr[len] != '=') + continue; + // check keyword + if (memcmp(parameter->name.str, ptr, len) == 0) + break; + } + + if (parameter < parameters_end) + { + found++; + /* + if we found first parameter, start search from next parameter + next time. + (this small optimisation should work, because they should be + written in same order) + */ + if (parameter == parameters+first_param) + first_param++; + + // get value + ptr+= (len+1); + switch (parameter->type) { + case FILE_OPTIONS_STRING: + { + if (!(ptr= parse_string(ptr, end, mem_root, + (LEX_STRING *)(base + + parameter->offset)))) + { + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), + parameter->name.str, line); + DBUG_RETURN(TRUE); + } + break; + } + case FILE_OPTIONS_ESTRING: + { + if (!(ptr= parse_escaped_string(ptr, end, mem_root, + (LEX_CSTRING *) + (base + parameter->offset)))) + { + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), + parameter->name.str, line); + DBUG_RETURN(TRUE); + } + break; + } + case FILE_OPTIONS_ULONGLONG: + case FILE_OPTIONS_VIEW_ALGO: + if (!(eol= strchr(ptr, '\n'))) + { + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), + parameter->name.str, line); + DBUG_RETURN(TRUE); + } + { + int not_used; + ulonglong val= (ulonglong)my_strtoll10(ptr, 0, ¬_used); + + if (parameter->type == FILE_OPTIONS_VIEW_ALGO) + val= view_algo_from_frm(val); + + *((ulonglong*)(base + parameter->offset))= val; + } + ptr= eol+1; + break; + case FILE_OPTIONS_TIMESTAMP: + { + /* string have to be allocated already */ + LEX_STRING *val= (LEX_STRING *)(base + parameter->offset); + /* 19 characters of timestamp */ + if (ptr[MICROSECOND_TIMESTAMP_BUFFER_SIZE-1] != '\n') + { + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), + parameter->name.str, line); + DBUG_RETURN(TRUE); + } + memcpy(val->str, ptr, MICROSECOND_TIMESTAMP_BUFFER_SIZE-1); + val->str[val->length= MICROSECOND_TIMESTAMP_BUFFER_SIZE-1]= '\0'; + ptr+= MICROSECOND_TIMESTAMP_BUFFER_SIZE; + break; + } + case FILE_OPTIONS_STRLIST: + { + list= (List*)(base + parameter->offset); + + list->empty(); + // list parsing + while (ptr < end) + { + if (!(str= (LEX_STRING*)alloc_root(mem_root, + sizeof(LEX_STRING))) || + list->push_back(str, mem_root)) + goto list_err; + if (!(ptr= parse_quoted_escaped_string(ptr, end, mem_root, str))) + goto list_err_w_message; + switch (*ptr) { + case '\n': + goto end_of_list; + case ' ': + // we cant go over buffer bounds, because we have \0 at the end + ptr++; + break; + default: + goto list_err_w_message; + } + } + +end_of_list: + if (*(ptr++) != '\n') + goto list_err; + break; + +list_err_w_message: + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), + parameter->name.str, line); +list_err: + DBUG_RETURN(TRUE); + } + case FILE_OPTIONS_ULLLIST: + if (get_file_options_ulllist(ptr, end, line, base, + parameter, mem_root)) + DBUG_RETURN(TRUE); + break; + default: + DBUG_ASSERT(0); // never should happened + } + } + else + { + ptr= line; + if (hook->process_unknown_string(ptr, base, mem_root, end)) + { + DBUG_RETURN(TRUE); + } + // skip unknown parameter + if (!(ptr= strchr(ptr, '\n'))) + { + my_error(ER_FPARSER_EOF_IN_UNKNOWN_PARAMETER, MYF(0), line); + DBUG_RETURN(TRUE); + } + ptr++; + } + } + } + + /* + NOTE: if we read less than "required" parameters, it is still Ok. + Probably, we've just read the file of the previous version, which + contains less parameters. + */ + + DBUG_RETURN(FALSE); +} + + +/** + Dummy unknown key hook. + + @param[in,out] unknown_key reference on the line with unknown + parameter and the parsing point + @param[in] base base address for parameter writing + (structure like TABLE) + @param[in] mem_root MEM_ROOT for parameters allocation + @param[in] end the end of the configuration + + @note + This hook used to catch no longer supported keys and process them for + backward compatibility, but it will not slow down processing of modern + format files. + This hook does nothing except debug output. + + @retval + FALSE OK + @retval + TRUE Error +*/ + +bool +File_parser_dummy_hook::process_unknown_string(const char *&unknown_key, + uchar* base, MEM_ROOT *mem_root, + const char *end) +{ + DBUG_ENTER("file_parser_dummy_hook::process_unknown_string"); + DBUG_PRINT("info", ("Unknown key: '%60s'", unknown_key)); + DBUG_RETURN(FALSE); +} diff --git a/sql/parse_file.h b/sql/parse_file.h new file mode 100644 index 00000000..0589d628 --- /dev/null +++ b/sql/parse_file.h @@ -0,0 +1,122 @@ +/* -*- C++ -*- */ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef _PARSE_FILE_H_ +#define _PARSE_FILE_H_ + +#include "sql_string.h" // LEX_STRING +#include "sql_alloc.h" // Sql_alloc + +class THD; + +typedef struct st_mem_root MEM_ROOT; + +#define PARSE_FILE_TIMESTAMPLENGTH 19 + +enum file_opt_type { + FILE_OPTIONS_STRING, /**< String (LEX_STRING) */ + FILE_OPTIONS_ESTRING, /**< Escaped string (LEX_STRING) */ + FILE_OPTIONS_ULONGLONG, /**< ulonglong parameter (ulonglong) */ + FILE_OPTIONS_VIEW_ALGO, /**< Similar to longlong, but needs conversion */ + FILE_OPTIONS_TIMESTAMP, /**< timestamp (LEX_STRING have to be + allocated with length 20 (19+1) */ + FILE_OPTIONS_STRLIST, /**< list of escaped strings + (List) */ + FILE_OPTIONS_ULLLIST /**< list of ulonglong values + (List) */ +}; + +struct File_option +{ + LEX_CSTRING name; /**< Name of the option */ + my_ptrdiff_t offset; /**< offset to base address of value */ + file_opt_type type; /**< Option type */ +}; + + +/** + This hook used to catch no longer supported keys and process them for + backward compatibility. +*/ + +class Unknown_key_hook +{ +public: + Unknown_key_hook() = default; /* Remove gcc warning */ + virtual ~Unknown_key_hook() = default; /* Remove gcc warning */ + virtual bool process_unknown_string(const char *&unknown_key, uchar* base, + MEM_ROOT *mem_root, const char *end)= 0; +}; + + +/** Dummy hook for parsers which do not need hook for unknown keys. */ + +class File_parser_dummy_hook: public Unknown_key_hook +{ +public: + File_parser_dummy_hook() = default; /* Remove gcc warning */ + virtual bool process_unknown_string(const char *&unknown_key, uchar* base, + MEM_ROOT *mem_root, const char *end); +}; + +extern File_parser_dummy_hook file_parser_dummy_hook; + +bool get_file_options_ulllist(const char *&ptr, const char *end, + const char *line, uchar* base, + File_option *parameter, + MEM_ROOT *mem_root); + +const char * +parse_escaped_string(const char *ptr, const char *end, MEM_ROOT *mem_root, + LEX_CSTRING *str); + +class File_parser; +File_parser *sql_parse_prepare(const LEX_CSTRING *file_name, + MEM_ROOT *mem_root, bool bad_format_errors); + +my_bool +sql_create_definition_file(const LEX_CSTRING *dir, + const LEX_CSTRING *file_name, + const LEX_CSTRING *type, + uchar* base, File_option *parameters); +my_bool rename_in_schema_file(THD *thd, + const char *schema, const char *old_name, + const char *new_db, const char *new_name); + +int sql_backup_definition_file(const LEX_CSTRING *org_name, + LEX_CSTRING *new_name); +int sql_restore_definition_file(const LEX_CSTRING *name); + +class File_parser: public Sql_alloc +{ + char *start, *end; + LEX_CSTRING file_type; + bool content_ok; +public: + File_parser() :start(0), end(0), content_ok(0) + { file_type.str= 0; file_type.length= 0; } + + bool ok() { return content_ok; } + const LEX_CSTRING *type() const { return &file_type; } + my_bool parse(uchar* base, MEM_ROOT *mem_root, + struct File_option *parameters, uint required, + Unknown_key_hook *hook) const; + + friend File_parser *sql_parse_prepare(const LEX_CSTRING *file_name, + MEM_ROOT *mem_root, + bool bad_format_errors); +}; +#endif /* _PARSE_FILE_H_ */ diff --git a/sql/partition_element.h b/sql/partition_element.h new file mode 100644 index 00000000..1abaa315 --- /dev/null +++ b/sql/partition_element.h @@ -0,0 +1,178 @@ +#ifndef PARTITION_ELEMENT_INCLUDED +#define PARTITION_ELEMENT_INCLUDED + +/* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "my_base.h" /* ha_rows */ +#include "handler.h" /* UNDEF_NODEGROUP */ + +/** + * An enum and a struct to handle partitioning and subpartitioning. + */ +enum partition_type { + NOT_A_PARTITION= 0, + RANGE_PARTITION, + HASH_PARTITION, + LIST_PARTITION, + VERSIONING_PARTITION +}; + +enum partition_state { + PART_NORMAL= 0, + PART_IS_DROPPED= 1, + PART_TO_BE_DROPPED= 2, + PART_TO_BE_ADDED= 3, + PART_TO_BE_REORGED= 4, + PART_REORGED_DROPPED= 5, + PART_CHANGED= 6, + PART_IS_CHANGED= 7, + PART_IS_ADDED= 8, + PART_ADMIN= 9 +}; + +/* + This struct is used to keep track of column expressions as part + of the COLUMNS concept in conjunction with RANGE and LIST partitioning. + The value can be either of MINVALUE, MAXVALUE and an expression that + must be constant and evaluate to the same type as the column it + represents. + + The data in this fixed in two steps. The parser will only fill in whether + it is a max_value or provide an expression. Filling in + column_value, part_info, partition_id, null_value is done by the + function fix_column_value_function. However the item tree needs + fixed also before writing it into the frm file (in add_column_list_values). + To distinguish between those two variants, fixed= 1 after the + fixing in add_column_list_values and fixed= 2 otherwise. This is + since the fixing in add_column_list_values isn't a complete fixing. +*/ + +typedef struct p_column_list_val +{ + void* column_value; + Item* item_expression; + partition_info *part_info; + uint partition_id; + bool max_value; // MAXVALUE for RANGE type or DEFAULT value for LIST type + bool null_value; + char fixed; +} part_column_list_val; + + +/* + This struct is used to contain the value of an element + in the VALUES IN struct. It needs to keep knowledge of + whether it is a signed/unsigned value and whether it is + NULL or not. +*/ + +typedef struct p_elem_val +{ + longlong value; + uint added_items; + bool null_value; + bool unsigned_flag; + part_column_list_val *col_val_array; +} part_elem_value; + +struct st_ddl_log_memory_entry; + +enum stat_trx_field +{ + STAT_TRX_END= 0 +}; + +class partition_element :public Sql_alloc +{ +public: + enum elem_type_enum + { + CONVENTIONAL= 0, + CURRENT, + HISTORY + }; + + List subpartitions; + List list_val_list; + ha_rows part_max_rows; + ha_rows part_min_rows; + longlong range_value; + const char *partition_name; + struct st_ddl_log_memory_entry *log_entry; + const char* part_comment; + const char* data_file_name; + const char* index_file_name; + handlerton *engine_type; + LEX_CSTRING connect_string; + enum partition_state part_state; + uint16 nodegroup_id; + bool has_null_value; + bool signed_flag; // Range value signed + bool max_value; // MAXVALUE range + uint32 id; + bool empty; + elem_type_enum type; + + engine_option_value *option_list; // create options for partition + ha_table_option_struct *option_struct; // structure with parsed options + + partition_element() + : part_max_rows(0), part_min_rows(0), range_value(0), + partition_name(NULL), + log_entry(NULL), part_comment(NULL), + data_file_name(NULL), index_file_name(NULL), + engine_type(NULL), connect_string(null_clex_str), part_state(PART_NORMAL), + nodegroup_id(UNDEF_NODEGROUP), has_null_value(FALSE), + signed_flag(FALSE), max_value(FALSE), + id(UINT_MAX32), + empty(true), + type(CONVENTIONAL), + option_list(NULL), option_struct(NULL) + {} + partition_element(partition_element *part_elem) + : part_max_rows(part_elem->part_max_rows), + part_min_rows(part_elem->part_min_rows), + range_value(0), partition_name(NULL), + log_entry(NULL), + part_comment(part_elem->part_comment), + data_file_name(part_elem->data_file_name), + index_file_name(part_elem->index_file_name), + engine_type(part_elem->engine_type), + connect_string(null_clex_str), + part_state(part_elem->part_state), + nodegroup_id(part_elem->nodegroup_id), + has_null_value(FALSE), + signed_flag(part_elem->signed_flag), + max_value(part_elem->max_value), + id(part_elem->id), + empty(part_elem->empty), + type(CONVENTIONAL), + option_list(part_elem->option_list), + option_struct(part_elem->option_struct) + {} + ~partition_element() = default; + + part_column_list_val& get_col_val(uint idx) + { + part_elem_value *ev= list_val_list.head(); + DBUG_ASSERT(ev); + DBUG_ASSERT(ev->col_val_array); + return ev->col_val_array[idx]; + } +}; + +#endif /* PARTITION_ELEMENT_INCLUDED */ diff --git a/sql/partition_info.cc b/sql/partition_info.cc new file mode 100644 index 00000000..1b65de6e --- /dev/null +++ b/sql/partition_info.cc @@ -0,0 +1,2935 @@ +/* Copyright (c) 2006, 2015, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Some general useful functions */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation +#endif + +#include "mariadb.h" +#include +#include +#include "sql_priv.h" +// Required to get server definitions for mysql/plugin.h right +#include "sql_plugin.h" +#include "sql_partition.h" // partition_info.h: LIST_PART_ENTRY + // NOT_A_PARTITION_ID +#include "partition_info.h" +#include "sql_parse.h" +#include "sql_base.h" // fill_record +#include "lock.h" +#include "table.h" +#include "sql_class.h" +#include "vers_string.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" +#include "sql_table.h" +#include "transaction.h" + + +partition_info *partition_info::get_clone(THD *thd, bool empty_data_and_index_file) +{ + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("partition_info::get_clone"); + + List_iterator part_it(partitions); + partition_element *part; + partition_info *clone= new (mem_root) partition_info(*this); + if (unlikely(!clone)) + DBUG_RETURN(NULL); + + memset(&(clone->read_partitions), 0, sizeof(clone->read_partitions)); + memset(&(clone->lock_partitions), 0, sizeof(clone->lock_partitions)); + clone->bitmaps_are_initialized= FALSE; + clone->partitions.empty(); + + while ((part= (part_it++))) + { + List_iterator subpart_it(part->subpartitions); + partition_element *subpart; + partition_element *part_clone= new (mem_root) partition_element(*part); + if (!part_clone) + DBUG_RETURN(NULL); + part_clone->subpartitions.empty(); + while ((subpart= (subpart_it++))) + { + partition_element *subpart_clone= new (mem_root) partition_element(*subpart); + if (!subpart_clone) + DBUG_RETURN(NULL); + if (empty_data_and_index_file) + subpart_clone->data_file_name= subpart_clone->index_file_name= NULL; + part_clone->subpartitions.push_back(subpart_clone, mem_root); + } + + if (empty_data_and_index_file) + part_clone->data_file_name= part_clone->index_file_name= NULL; + clone->partitions.push_back(part_clone, mem_root); + part_clone->list_val_list.empty(); + List_iterator list_val_it(part->list_val_list); + part_elem_value *new_val_arr= + (part_elem_value *)alloc_root(mem_root, sizeof(part_elem_value) * + part->list_val_list.elements); + if (!new_val_arr) + DBUG_RETURN(NULL); + + p_column_list_val *new_colval_arr= + (p_column_list_val*)alloc_root(mem_root, sizeof(p_column_list_val) * + num_columns * + part->list_val_list.elements); + if (!new_colval_arr) + DBUG_RETURN(NULL); + + part_elem_value *val; + while ((val= list_val_it++)) + { + part_elem_value *new_val= new_val_arr++; + memcpy(new_val, val, sizeof(part_elem_value)); + if (!val->null_value) + { + p_column_list_val *new_colval= new_colval_arr; + new_colval_arr+= num_columns; + memcpy(new_colval, val->col_val_array, + sizeof(p_column_list_val) * num_columns); + new_val->col_val_array= new_colval; + } + part_clone->list_val_list.push_back(new_val, mem_root); + } + } + if (part_type == VERSIONING_PARTITION && vers_info) + { + // clone Vers_part_info; set now_part, hist_part + clone->vers_info= new (mem_root) Vers_part_info(*vers_info); + List_iterator it(clone->partitions); + while ((part= it++)) + { + if (vers_info->now_part && part->id == vers_info->now_part->id) + clone->vers_info->now_part= part; + else if (vers_info->hist_part && part->id == vers_info->hist_part->id) + clone->vers_info->hist_part= part; + } // while ((part= it++)) + } // if (part_type == VERSIONING_PARTITION ... + DBUG_RETURN(clone); +} + +/** + Mark named [sub]partition to be used/locked. + + @param part_name Partition name to match. Must be \0 terminated! + @param length Partition name length. + + @return Success if partition found + @retval true Partition found + @retval false Partition not found +*/ + +bool partition_info::add_named_partition(const char *part_name, size_t length) +{ + HASH *part_name_hash; + PART_NAME_DEF *part_def; + Partition_share *part_share; + DBUG_ENTER("partition_info::add_named_partition"); + DBUG_ASSERT(part_name[length] == 0); + DBUG_ASSERT(table); + DBUG_ASSERT(table->s); + DBUG_ASSERT(table->s->ha_share); + part_share= static_cast((table->s->ha_share)); + DBUG_ASSERT(part_share->partition_name_hash_initialized); + part_name_hash= &part_share->partition_name_hash; + DBUG_ASSERT(part_name_hash->records); + + part_def= (PART_NAME_DEF*) my_hash_search(part_name_hash, + (const uchar*) part_name, + length); + if (!part_def) + { + my_error(ER_UNKNOWN_PARTITION, MYF(0), part_name, table->alias.c_ptr()); + DBUG_RETURN(true); + } + + if (part_def->is_subpart) + { + bitmap_set_bit(&read_partitions, part_def->part_id); + } + else + { + if (is_sub_partitioned()) + { + /* Mark all subpartitions in the partition */ + uint j, start= part_def->part_id; + uint end= start + num_subparts; + for (j= start; j < end; j++) + bitmap_set_bit(&read_partitions, j); + } + else + bitmap_set_bit(&read_partitions, part_def->part_id); + } + DBUG_PRINT("info", ("Found partition %u is_subpart %d for name %.*s", + part_def->part_id, part_def->is_subpart, + length, part_name)); + DBUG_RETURN(false); +} + + +/** + Mark named [sub]partition to be used/locked. + + @param part_elem Partition element that matched. +*/ + +bool partition_info::set_named_partition_bitmap(const char *part_name, size_t length) +{ + DBUG_ENTER("partition_info::set_named_partition_bitmap"); + bitmap_clear_all(&read_partitions); + if (add_named_partition(part_name, length)) + DBUG_RETURN(true); + bitmap_copy(&lock_partitions, &read_partitions); + DBUG_RETURN(false); +} + + +/** + Prune away partitions not mentioned in the PARTITION () clause, + if used. + + @param partition_names list of names of partitions. + + @return Operation status + @retval true Failure + @retval false Success +*/ +bool partition_info::prune_partition_bitmaps(List *partition_names) +{ + List_iterator partition_names_it(*(partition_names)); + uint num_names= partition_names->elements; + uint i= 0; + DBUG_ENTER("partition_info::prune_partition_bitmaps"); + + if (num_names < 1) + DBUG_RETURN(true); + + /* + TODO: When adding support for FK in partitioned tables, the referenced + table must probably lock all partitions for read, and also write depending + of ON DELETE/UPDATE. + */ + bitmap_clear_all(&read_partitions); + + /* No check for duplicate names or overlapping partitions/subpartitions. */ + + DBUG_PRINT("info", ("Searching through partition_name_hash")); + do + { + String *part_name_str= partition_names_it++; + if (add_named_partition(part_name_str->c_ptr(), part_name_str->length())) + DBUG_RETURN(true); + } while (++i < num_names); + DBUG_RETURN(false); +} + + +/** + Set read/lock_partitions bitmap over non pruned partitions + + @param partition_names list of partition names to query + + @return Operation status + @retval FALSE OK + @retval TRUE Failed to allocate memory for bitmap or list of partitions + did not match + + @note OK to call multiple times without the need for free_bitmaps. +*/ + +bool partition_info::set_partition_bitmaps(List *partition_names) +{ + DBUG_ENTER("partition_info::set_partition_bitmaps"); + + DBUG_ASSERT(bitmaps_are_initialized); + DBUG_ASSERT(table); + if (!bitmaps_are_initialized) + DBUG_RETURN(TRUE); + + if (partition_names && + partition_names->elements) + { + if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) + { + my_error(ER_PARTITION_CLAUSE_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(true); + } + if (prune_partition_bitmaps(partition_names)) + DBUG_RETURN(TRUE); + } + else + { + bitmap_set_all(&read_partitions); + DBUG_PRINT("info", ("Set all partitions")); + } + bitmap_copy(&lock_partitions, &read_partitions); + DBUG_ASSERT(bitmap_get_first_set(&lock_partitions) != MY_BIT_NONE); + DBUG_RETURN(FALSE); +} + + +/* + Create a memory area where default partition names are stored and fill it + up with the names. + + SYNOPSIS + create_default_partition_names() + part_no Partition number for subparts + num_parts Number of partitions + start_no Starting partition number + subpart Is it subpartitions + + RETURN VALUE + A pointer to the memory area of the default partition names + + DESCRIPTION + A support routine for the partition code where default values are + generated. + The external routine needing this code is check_partition_info +*/ + +char *partition_info::create_default_partition_names(THD *thd, uint part_no, + uint num_parts_arg, + uint start_no) +{ + char *ptr= (char*) thd->calloc(num_parts_arg * MAX_PART_NAME_SIZE + 1); + char *move_ptr= ptr; + uint i= 0; + DBUG_ENTER("create_default_partition_names"); + + if (likely(ptr != 0)) + { + do + { + if (make_partition_name(move_ptr, (start_no + i))) + DBUG_RETURN(NULL); + move_ptr+= MAX_PART_NAME_SIZE; + } while (++i < num_parts_arg); + } + DBUG_RETURN(ptr); +} + + +/* + Create a unique name for the subpartition as part_name'sp''subpart_no' + + SYNOPSIS + create_default_subpartition_name() + subpart_no Number of subpartition + part_name Name of partition + RETURN VALUES + >0 A reference to the created name string + 0 Memory allocation error +*/ + +char *partition_info::create_default_subpartition_name(THD *thd, uint subpart_no, + const char *part_name) +{ + size_t size_alloc= strlen(part_name) + MAX_PART_NAME_SIZE; + char *ptr= (char*) thd->calloc(size_alloc); + DBUG_ENTER("create_default_subpartition_name"); + + if (likely(ptr != NULL)) + my_snprintf(ptr, size_alloc, "%ssp%u", part_name, subpart_no); + + DBUG_RETURN(ptr); +} + + +/* + Set up all the default partitions not set-up by the user in the SQL + statement. Also perform a number of checks that the user hasn't tried + to use default values where no defaults exists. + + SYNOPSIS + set_up_default_partitions() + file A reference to a handler of the table + info Create info + start_no Starting partition number + + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine only accepts HASH and KEY partitioning and thus there is + no subpartitioning if this routine is successful. + The external routine needing this code is check_partition_info +*/ + +bool partition_info::set_up_default_partitions(THD *thd, handler *file, + HA_CREATE_INFO *info, + uint start_no) +{ + uint i; + char *default_name; + bool result= TRUE; + DBUG_ENTER("partition_info::set_up_default_partitions"); + + if (part_type == VERSIONING_PARTITION) + { + if (start_no == 0 && use_default_num_partitions) + num_parts= 2; + use_default_num_partitions= false; + } + else if (part_type != HASH_PARTITION) + { + const char *error_string; + if (part_type == RANGE_PARTITION) + error_string= "RANGE"; + else + error_string= "LIST"; + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_string); + goto end; + } + + if ((num_parts == 0) && + ((num_parts= file->get_default_no_partitions(info)) == 0)) + { + my_error(ER_PARTITION_NOT_DEFINED_ERROR, MYF(0), "partitions"); + goto end; + } + + if (unlikely(num_parts > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely((!(default_name= create_default_partition_names(thd, 0, + num_parts, + start_no))))) + goto end; + i= 0; + do + { + partition_element *part_elem= new partition_element(); + if (likely(part_elem != 0 && + (!partitions.push_back(part_elem)))) + { + part_elem->engine_type= default_engine_type; + part_elem->partition_name= default_name; + part_elem->id= i; + default_name+=MAX_PART_NAME_SIZE; + if (part_type == VERSIONING_PARTITION) + { + if (start_no > 0 || i < num_parts - 1) { + part_elem->type= partition_element::HISTORY; + } else { + part_elem->type= partition_element::CURRENT; + part_elem->partition_name= "pn"; + } + } + } + else + goto end; + } while (++i < num_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Set up all the default subpartitions not set-up by the user in the SQL + statement. Also perform a number of checks that the default partitioning + becomes an allowed partitioning scheme. + + SYNOPSIS + set_up_default_subpartitions() + file A reference to a handler of the table + info Create info + + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine is only called for RANGE or LIST partitioning and those + need to be specified so only subpartitions are specified. + The external routine needing this code is check_partition_info +*/ + +bool partition_info::set_up_default_subpartitions(THD *thd, handler *file, + HA_CREATE_INFO *info) +{ + uint i, j; + bool result= TRUE; + partition_element *part_elem; + List_iterator part_it(partitions); + DBUG_ENTER("partition_info::set_up_default_subpartitions"); + + if (num_subparts == 0) + num_subparts= file->get_default_no_partitions(info); + if (unlikely((num_parts * num_subparts) > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + i= 0; + do + { + part_elem= part_it++; + j= 0; + do + { + partition_element *subpart_elem= new partition_element(part_elem); + if (likely(subpart_elem != 0 && + (!part_elem->subpartitions.push_back(subpart_elem)))) + { + char *ptr= create_default_subpartition_name(thd, j, + part_elem->partition_name); + if (!ptr) + goto end; + subpart_elem->engine_type= default_engine_type; + subpart_elem->partition_name= ptr; + } + else + goto end; + } while (++j < num_subparts); + } while (++i < num_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Support routine for check_partition_info + + SYNOPSIS + set_up_defaults_for_partitioning() + file A reference to a handler of the table + info Create info + start_no Starting partition number + + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + + DESCRIPTION + Set up defaults for partition or subpartition (cannot set-up for both, + this will return an error. +*/ + +bool partition_info::set_up_defaults_for_partitioning(THD *thd, handler *file, + HA_CREATE_INFO *info, + uint start_no) +{ + DBUG_ENTER("partition_info::set_up_defaults_for_partitioning"); + + if (!default_partitions_setup) + { + default_partitions_setup= TRUE; + if (use_default_partitions && + set_up_default_partitions(thd, file, info, start_no)) + DBUG_RETURN(TRUE); + if (is_sub_partitioned() && + use_default_subpartitions) + DBUG_RETURN(set_up_default_subpartitions(thd, file, info)); + } + DBUG_RETURN(FALSE); +} + + +/* + Support routine for check_partition_info + + SYNOPSIS + find_duplicate_field + no parameters + + RETURN VALUE + Erroneus field name Error, there are two fields with same name + NULL Ok, no field defined twice + + DESCRIPTION + Check that the user haven't defined the same field twice in + key or column list partitioning. +*/ + +const char* partition_info::find_duplicate_field() +{ + const char *field_name_outer, *field_name_inner; + List_iterator it_outer(part_field_list); + uint num_fields= part_field_list.elements; + uint i,j; + DBUG_ENTER("partition_info::find_duplicate_field"); + + for (i= 0; i < num_fields; i++) + { + field_name_outer= it_outer++; + List_iterator it_inner(part_field_list); + for (j= 0; j < num_fields; j++) + { + field_name_inner= it_inner++; + if (i >= j) + continue; + if (!(my_strcasecmp(system_charset_info, + field_name_outer, + field_name_inner))) + { + DBUG_RETURN(field_name_outer); + } + } + } + DBUG_RETURN(NULL); +} + + +/** + @brief Get part_elem and part_id from partition name + + @param partition_name Name of partition to search for. + @param file_name[out] Partition file name (part after table name, + #P#[#SP#]), skipped if NULL. + @param part_id[out] Id of found partition or NOT_A_PARTITION_ID. + + @retval Pointer to part_elem of [sub]partition, if not found NULL + + @note Since names of partitions AND subpartitions must be unique, + this function searches both partitions and subpartitions and if name of + a partition is given for a subpartitioned table, part_elem will be + the partition, but part_id will be NOT_A_PARTITION_ID and file_name not set. +*/ +partition_element *partition_info::get_part_elem(const char *partition_name, + char *file_name, + size_t file_name_size, + uint32 *part_id) +{ + List_iterator part_it(partitions); + uint i= 0; + DBUG_ENTER("partition_info::get_part_elem"); + DBUG_ASSERT(part_id); + *part_id= NOT_A_PARTITION_ID; + do + { + partition_element *part_elem= part_it++; + if (is_sub_partitioned()) + { + List_iterator sub_part_it(part_elem->subpartitions); + uint j= 0; + do + { + partition_element *sub_part_elem= sub_part_it++; + if (!my_strcasecmp(system_charset_info, + sub_part_elem->partition_name, partition_name)) + { + if (file_name) + if (create_subpartition_name(file_name, file_name_size, "", + part_elem->partition_name, + partition_name, NORMAL_PART_NAME)) + DBUG_RETURN(NULL); + *part_id= j + (i * num_subparts); + DBUG_RETURN(sub_part_elem); + } + } while (++j < num_subparts); + + /* Naming a partition (first level) on a subpartitioned table. */ + if (!my_strcasecmp(system_charset_info, + part_elem->partition_name, partition_name)) + DBUG_RETURN(part_elem); + } + else if (!my_strcasecmp(system_charset_info, + part_elem->partition_name, partition_name)) + { + if (file_name) + if (create_partition_name(file_name, file_name_size, "", + partition_name, NORMAL_PART_NAME, TRUE)) + DBUG_RETURN(NULL); + *part_id= i; + DBUG_RETURN(part_elem); + } + } while (++i < num_parts); + DBUG_RETURN(NULL); +} + + +/** + Helper function to find_duplicate_name. +*/ + +static const char *get_part_name_from_elem(const char *name, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= strlen(name); + return name; +} + +/* + A support function to check partition names for duplication in a + partitioned table + + SYNOPSIS + find_duplicate_name() + + RETURN VALUES + NULL Has unique part and subpart names + !NULL Pointer to duplicated name + + DESCRIPTION + Checks that the list of names in the partitions doesn't contain any + duplicated names. +*/ + +char *partition_info::find_duplicate_name() +{ + HASH partition_names; + uint max_names; + const uchar *curr_name= NULL; + List_iterator parts_it(partitions); + partition_element *p_elem; + + DBUG_ENTER("partition_info::find_duplicate_name"); + + /* + TODO: If table->s->ha_part_data->partition_name_hash.elements is > 0, + then we could just return NULL, but that has not been verified. + And this only happens when in ALTER TABLE with full table copy. + */ + + max_names= num_parts; + if (is_sub_partitioned()) + max_names+= num_parts * num_subparts; + if (my_hash_init(PSI_INSTRUMENT_ME, &partition_names, system_charset_info, max_names, 0, 0, + (my_hash_get_key) get_part_name_from_elem, 0, HASH_UNIQUE)) + { + DBUG_ASSERT(0); + curr_name= (const uchar*) "Internal failure"; + goto error; + } + while ((p_elem= (parts_it++))) + { + curr_name= (const uchar*) p_elem->partition_name; + if (my_hash_insert(&partition_names, curr_name)) + goto error; + + if (!p_elem->subpartitions.is_empty()) + { + List_iterator subparts_it(p_elem->subpartitions); + partition_element *subp_elem; + while ((subp_elem= (subparts_it++))) + { + curr_name= (const uchar*) subp_elem->partition_name; + if (my_hash_insert(&partition_names, curr_name)) + goto error; + } + } + } + my_hash_free(&partition_names); + DBUG_RETURN(NULL); +error: + my_hash_free(&partition_names); + DBUG_RETURN((char*) curr_name); +} + + +/* + A support function to check if a partition element's name is unique + + SYNOPSIS + has_unique_name() + partition_element element to check + + RETURN VALUES + TRUE Has unique name + FALSE Doesn't +*/ + +bool partition_info::has_unique_name(partition_element *element) +{ + DBUG_ENTER("partition_info::has_unique_name"); + + const char *name_to_check= element->partition_name; + List_iterator parts_it(partitions); + + partition_element *el; + while ((el= (parts_it++))) + { + if (!(my_strcasecmp(system_charset_info, el->partition_name, + name_to_check)) && el != element) + DBUG_RETURN(FALSE); + + if (!el->subpartitions.is_empty()) + { + partition_element *sub_el; + List_iterator subparts_it(el->subpartitions); + while ((sub_el= (subparts_it++))) + { + if (!(my_strcasecmp(system_charset_info, sub_el->partition_name, + name_to_check)) && sub_el != element) + DBUG_RETURN(FALSE); + } + } + } + DBUG_RETURN(TRUE); +} + + +/** + @brief Switch history partition according limit or interval + + @note + vers_info->limit Limit by number of partition records + vers_info->interval Limit by fixed time interval + vers_info->hist_part (out) Working history partition +*/ +bool partition_info::vers_set_hist_part(THD *thd, uint *create_count) +{ + const bool auto_hist= create_count && vers_info->auto_hist; + + if (vers_info->limit) + { + DBUG_ASSERT(!vers_info->interval.is_set()); + ha_partition *hp= (ha_partition*)(table->file); + partition_element *next; + List_iterator it(partitions); + ha_rows records= 0; + vers_info->hist_part= partitions.head(); + while ((next= it++) != vers_info->now_part) + { + DBUG_ASSERT(bitmap_is_set(&read_partitions, next->id)); + ha_rows next_records= hp->part_records(next); + if (next_records == 0) + break; + vers_info->hist_part= next; + records= next_records; + } + if (records >= vers_info->limit) + { + if (next == vers_info->now_part) + { + if (auto_hist) + *create_count= 1; + } + else + vers_info->hist_part= next; + } + return 0; + } + else if (vers_info->interval.is_set() && + vers_info->hist_part->range_value <= thd->query_start()) + { + partition_element *next= NULL; + bool error= true; + List_iterator it(partitions); + while (next != vers_info->hist_part) + next= it++; + + while ((next= it++) != vers_info->now_part) + { + vers_info->hist_part= next; + if (next->range_value > thd->query_start()) + { + error= false; + break; + } + } + if (error) + { + if (auto_hist) + { + *create_count= 0; + const my_time_t hist_end= (my_time_t) vers_info->hist_part->range_value; + DBUG_ASSERT(thd->query_start() >= hist_end); + MYSQL_TIME h0, q0; + my_tz_OFFSET0->gmt_sec_to_TIME(&h0, hist_end); + my_tz_OFFSET0->gmt_sec_to_TIME(&q0, thd->query_start()); + longlong q= pack_time(&q0); + longlong h= pack_time(&h0); + while (h <= q) + { + if (date_add_interval(thd, &h0, vers_info->interval.type, + vers_info->interval.step)) + return true; + h= pack_time(&h0); + ++*create_count; + if (*create_count == MAX_PARTITIONS - 2) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(ME_WARNING)); + my_error(ER_VERS_HIST_PART_FAILED, MYF(0), + table->s->db.str, table->s->table_name.str); + return true; + } + } + } + else + { + my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG), + table->s->db.str, table->s->table_name.str, + vers_info->hist_part->partition_name, "INTERVAL"); + } + } + } + + return false; +} + + +/** + @brief Run fast_alter_partition_table() to add new history partitions + for tables requiring them. + + @param num_parts Number of partitions to create +*/ +bool vers_create_partitions(THD *thd, TABLE_LIST* tl, uint num_parts) +{ + bool result= true; + Table_specification_st create_info; + Alter_info alter_info; + partition_info *save_part_info= thd->work_part_info; + Query_tables_list save_query_tables; + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; + bool save_no_write_to_binlog= thd->lex->no_write_to_binlog; + thd->m_reprepare_observer= NULL; + thd->lex->reset_n_backup_query_tables_list(&save_query_tables); + thd->lex->no_write_to_binlog= true; + TABLE *table= tl->table; + + DBUG_ASSERT(!thd->is_error()); + DBUG_ASSERT(num_parts); + + { + DBUG_ASSERT(table->s->get_table_ref_type() == TABLE_REF_BASE_TABLE); + DBUG_ASSERT(table->versioned()); + DBUG_ASSERT(table->part_info); + DBUG_ASSERT(table->part_info->vers_info); + alter_info.reset(); + alter_info.partition_flags= ALTER_PARTITION_ADD|ALTER_PARTITION_AUTO_HIST; + create_info.init(); + create_info.alter_info= &alter_info; + Alter_table_ctx alter_ctx(thd, tl, 1, &table->s->db, &table->s->table_name); + + MDL_REQUEST_INIT(&tl->mdl_request, MDL_key::TABLE, tl->db.str, + tl->table_name.str, MDL_SHARED_NO_WRITE, MDL_TRANSACTION); + if (thd->mdl_context.acquire_lock(&tl->mdl_request, + thd->variables.lock_wait_timeout)) + goto exit; + table->mdl_ticket= tl->mdl_request.ticket; + + create_info.db_type= table->s->db_type(); + create_info.options|= HA_VERSIONED_TABLE; + DBUG_ASSERT(create_info.db_type); + + create_info.vers_info.set_start(table->s->vers_start_field()->field_name); + create_info.vers_info.set_end(table->s->vers_end_field()->field_name); + + partition_info *part_info= new partition_info(); + if (unlikely(!part_info)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto exit; + } + part_info->use_default_num_partitions= false; + part_info->use_default_num_subpartitions= false; + part_info->num_parts= num_parts; + part_info->num_subparts= table->part_info->num_subparts; + part_info->subpart_type= table->part_info->subpart_type; + if (unlikely(part_info->vers_init_info(thd))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto exit; + } + + thd->work_part_info= part_info; + if (part_info->set_up_defaults_for_partitioning(thd, table->file, NULL, + table->part_info->next_part_no(num_parts))) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + bool partition_changed= false; + bool fast_alter_partition= false; + if (prep_alter_part_table(thd, table, &alter_info, &create_info, + &partition_changed, &fast_alter_partition)) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + if (!fast_alter_partition) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + DBUG_ASSERT(partition_changed); + if (mysql_prepare_alter_table(thd, table, &create_info, &alter_info, + &alter_ctx)) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + + alter_info.db= alter_ctx.db; + alter_info.table_name= alter_ctx.table_name; + if (fast_alter_partition_table(thd, table, &alter_info, &alter_ctx, + &create_info, tl)) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + } + + result= false; + // NOTE: we have to return DA_EMPTY for new command + DBUG_ASSERT(thd->get_stmt_da()->is_ok()); + thd->get_stmt_da()->reset_diagnostics_area(); + thd->variables.option_bits|= OPTION_BINLOG_THIS; + +exit: + thd->work_part_info= save_part_info; + thd->m_reprepare_observer= save_reprepare_observer; + thd->lex->restore_backup_query_tables_list(&save_query_tables); + thd->lex->no_write_to_binlog= save_no_write_to_binlog; + return result; +} + + +/** + Warn at the end of DML command if the last history partition is out of LIMIT. +*/ +void partition_info::vers_check_limit(THD *thd) +{ + if (vers_info->auto_hist || !vers_info->limit || + vers_info->hist_part->id + 1 < vers_info->now_part->id) + return; + + /* + NOTE: at this point read_partitions bitmap is already pruned by DML code, + we have to set read bits for working history partition. We could use + bitmap_set_all(), but this is not optimal since there can be quite a number + of partitions. + */ +#ifndef DBUG_OFF + const uint32 sub_factor= num_subparts ? num_subparts : 1; + uint32 part_id= vers_info->hist_part->id * sub_factor; + const uint32 part_id_end __attribute__((unused)) = part_id + sub_factor; + DBUG_ASSERT(part_id_end <= num_parts * sub_factor); +#endif + + ha_partition *hp= (ha_partition*)(table->file); + ha_rows hist_rows= hp->part_records(vers_info->hist_part); + if (hist_rows >= vers_info->limit) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PART_FULL, + ER_THD(thd, WARN_VERS_PART_FULL), + table->s->db.str, table->s->table_name.str, + vers_info->hist_part->partition_name, "LIMIT"); + + sql_print_warning(ER_THD(thd, WARN_VERS_PART_FULL), + table->s->db.str, table->s->table_name.str, + vers_info->hist_part->partition_name, "LIMIT"); + } +} + + +/* + Check that the partition/subpartition is setup to use the correct + storage engine + SYNOPSIS + check_engine_condition() + p_elem Partition element + table_engine_set Have user specified engine on table level + inout::engine_type Current engine used + inout::first Is it first partition + RETURN VALUE + TRUE Failed check + FALSE Ok + DESCRIPTION + Specified engine for table and partitions p0 and pn + Must be correct both on CREATE and ALTER commands + table p0 pn res (0 - OK, 1 - FAIL) + - - - 0 + - - x 1 + - x - 1 + - x x 0 + x - - 0 + x - x 0 + x x - 0 + x x x 0 + i.e: + - All subpartitions must use the same engine + AND it must be the same as the partition. + - All partitions must use the same engine + AND it must be the same as the table. + - if one does NOT specify an engine on the table level + then one must either NOT specify any engine on any + partition/subpartition OR for ALL partitions/subpartitions + Note: + When ALTER a table, the engines are already set for all levels + (table, all partitions and subpartitions). So if one want to + change the storage engine, one must specify it on the table level + +*/ + +static bool check_engine_condition(partition_element *p_elem, + bool table_engine_set, + handlerton **engine_type, + bool *first) +{ + DBUG_ENTER("check_engine_condition"); + + DBUG_PRINT("enter", ("p_eng %s t_eng %s t_eng_set %u first %u state %u", + ha_resolve_storage_engine_name(p_elem->engine_type), + ha_resolve_storage_engine_name(*engine_type), + table_engine_set, *first, p_elem->part_state)); + if (*first && !table_engine_set) + { + *engine_type= p_elem->engine_type; + DBUG_PRINT("info", ("setting table_engine = %s", + ha_resolve_storage_engine_name(*engine_type))); + } + *first= FALSE; + if ((table_engine_set && + (p_elem->engine_type != (*engine_type) && + p_elem->engine_type)) || + (!table_engine_set && + p_elem->engine_type != (*engine_type))) + { + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/* + Check engine mix that it is correct + Current limitation is that all partitions and subpartitions + must use the same storage engine. + SYNOPSIS + check_engine_mix() + inout::engine_type Current engine used + table_engine_set Have user specified engine on table level + RETURN VALUE + TRUE Error, mixed engines + FALSE Ok, no mixed engines + DESCRIPTION + Current check verifies only that all handlers are the same. + Later this check will be more sophisticated. + (specified partition handler ) specified table handler + (MYISAM, MYISAM) - OK + (MYISAM, -) - NOT OK + (MYISAM, -) MYISAM OK + (- , MYISAM) - NOT OK + (- , -) MYISAM OK + (-,-) - OK +*/ + +bool partition_info::check_engine_mix(handlerton *engine_type, + bool table_engine_set) +{ + handlerton *old_engine_type= engine_type; + bool first= TRUE; + uint n_parts= partitions.elements; + DBUG_ENTER("partition_info::check_engine_mix"); + DBUG_PRINT("info", ("in: engine_type = %s, table_engine_set = %u", + ha_resolve_storage_engine_name(engine_type), + table_engine_set)); + if (n_parts) + { + List_iterator part_it(partitions); + uint i= 0; + do + { + partition_element *part_elem= part_it++; + DBUG_PRINT("info", ("part = %d engine = %s table_engine_set %u", + i, ha_resolve_storage_engine_name(part_elem->engine_type), + table_engine_set)); + if (is_sub_partitioned() && + part_elem->subpartitions.elements) + { + uint n_subparts= part_elem->subpartitions.elements; + uint j= 0; + List_iterator sub_it(part_elem->subpartitions); + do + { + partition_element *sub_elem= sub_it++; + DBUG_PRINT("info", ("sub = %d engine = %s table_engie_set %u", + j, ha_resolve_storage_engine_name(sub_elem->engine_type), + table_engine_set)); + if (check_engine_condition(sub_elem, table_engine_set, + &engine_type, &first)) + goto error; + } while (++j < n_subparts); + /* ensure that the partition also has correct engine */ + if (check_engine_condition(part_elem, table_engine_set, + &engine_type, &first)) + goto error; + } + else if (check_engine_condition(part_elem, table_engine_set, + &engine_type, &first)) + goto error; + } while (++i < n_parts); + } + DBUG_PRINT("info", ("engine_type = %s", + ha_resolve_storage_engine_name(engine_type))); + if (!engine_type) + engine_type= old_engine_type; + if (engine_type->flags & HTON_NO_PARTITION) + { + my_error(ER_PARTITION_MERGE_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + DBUG_PRINT("info", ("out: engine_type = %s", + ha_resolve_storage_engine_name(engine_type))); + DBUG_ASSERT(engine_type != partition_hton); + DBUG_RETURN(FALSE); +error: + /* + Mixed engines not yet supported but when supported it will need + the partition handler + */ + DBUG_RETURN(TRUE); +} + + +/** + Check if we allow DATA/INDEX DIRECTORY, if not warn and set them to NULL. + + @param thd THD also containing sql_mode (looks from MODE_NO_DIR_IN_CREATE). + @param part_elem partition_element to check. +*/ +static void warn_if_dir_in_part_elem(THD *thd, partition_element *part_elem) +{ + if (thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE) + { + if (part_elem->data_file_name) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), + "DATA DIRECTORY"); + if (part_elem->index_file_name) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), + "INDEX DIRECTORY"); + part_elem->data_file_name= part_elem->index_file_name= NULL; + } +} + + +/* + This code is used early in the CREATE TABLE and ALTER TABLE process. + + SYNOPSIS + check_partition_info() + thd Thread object + eng_type Return value for used engine in partitions + file A reference to a handler of the table + info Create info + add_or_reorg_part Is it ALTER TABLE ADD/REORGANIZE command + + RETURN VALUE + TRUE Error, something went wrong + FALSE Ok, full partition data structures are now generated + + DESCRIPTION + We will check that the partition info requested is possible to set-up in + this version. This routine is an extension of the parser one could say. + If defaults were used we will generate default data structures for all + partitions. + +*/ + +bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, + handler *file, HA_CREATE_INFO *info, + partition_info *add_or_reorg_part) +{ + handlerton *table_engine= default_engine_type; + uint i, tot_partitions; + bool result= TRUE, table_engine_set; + const char *same_name; + uint32 hist_parts= 0; + uint32 now_parts= 0; + DBUG_ENTER("partition_info::check_partition_info"); + DBUG_ASSERT(default_engine_type != partition_hton); + + DBUG_PRINT("info", ("default table_engine = %s", + ha_resolve_storage_engine_name(table_engine))); + if (!add_or_reorg_part) + { + int err= 0; + + /* Check for partition expression. */ + if (!list_of_part_fields) + { + DBUG_ASSERT(part_expr); + err= part_expr->walk(&Item::check_partition_func_processor, 0, NULL); + } + + /* Check for sub partition expression. */ + if (!err && is_sub_partitioned() && !list_of_subpart_fields) + { + DBUG_ASSERT(subpart_expr); + err= subpart_expr->walk(&Item::check_partition_func_processor, 0, + NULL); + } + + if (err) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + goto end; + } + if (thd->lex->sql_command == SQLCOM_CREATE_TABLE && + fix_parser_data(thd)) + goto end; + } + if (unlikely(!is_sub_partitioned() && + !(use_default_subpartitions && use_default_num_subpartitions))) + { + my_error(ER_SUBPARTITION_ERROR, MYF(0)); + goto end; + } + if (unlikely(is_sub_partitioned() && + (!(part_type == RANGE_PARTITION || + part_type == LIST_PARTITION || + part_type == VERSIONING_PARTITION)))) + { + /* Only RANGE, LIST and SYSTEM_TIME partitioning can be subpartitioned */ + my_error(ER_SUBPARTITION_ERROR, MYF(0)); + goto end; + } + if (unlikely(set_up_defaults_for_partitioning(thd, file, info, (uint)0))) + goto end; + if (!(tot_partitions= get_tot_partitions())) + { + my_error(ER_PARTITION_NOT_DEFINED_ERROR, MYF(0), "partitions"); + goto end; + } + if (unlikely(tot_partitions > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + /* + if NOT specified ENGINE = : + If Create, always use create_info->db_type + else, use previous tables db_type + either ALL or NONE partition should be set to + default_engine_type when not table_engine_set + Note: after a table is created its storage engines for + the table and all partitions/subpartitions are set. + So when ALTER it is already set on table level + */ + if (info && info->used_fields & HA_CREATE_USED_ENGINE) + { + table_engine_set= TRUE; + table_engine= info->db_type; + /* if partition_hton, use thd->lex->create_info */ + if (table_engine == partition_hton) + table_engine= thd->lex->create_info.db_type; + DBUG_ASSERT(table_engine != partition_hton); + DBUG_PRINT("info", ("Using table_engine = %s", + ha_resolve_storage_engine_name(table_engine))); + } + else + { + table_engine_set= FALSE; + if (thd->lex->sql_command != SQLCOM_CREATE_TABLE) + { + table_engine_set= TRUE; + DBUG_PRINT("info", ("No create, table_engine = %s", + ha_resolve_storage_engine_name(table_engine))); + DBUG_ASSERT(table_engine && table_engine != partition_hton); + } + } + + if (part_field_list.elements > 0 && + (same_name= find_duplicate_field())) + { + my_error(ER_SAME_NAME_PARTITION_FIELD, MYF(0), same_name); + goto end; + } + if ((same_name= find_duplicate_name())) + { + my_error(ER_SAME_NAME_PARTITION, MYF(0), same_name); + goto end; + } + + if (part_type == VERSIONING_PARTITION) + { + DBUG_ASSERT(vers_info); + if (num_parts < 2 || !(use_default_partitions || vers_info->now_part)) + { + DBUG_ASSERT(info); + DBUG_ASSERT(info->alias.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias.str); + goto end; + } + DBUG_ASSERT(num_parts == partitions.elements); + } + i= 0; + { + List_iterator part_it(partitions); + uint num_parts_not_set= 0; + uint prev_num_subparts_not_set= num_subparts + 1; + do + { + partition_element *part_elem= part_it++; + warn_if_dir_in_part_elem(thd, part_elem); + if (!is_sub_partitioned()) + { + if (part_elem->engine_type == NULL) + { + num_parts_not_set++; + part_elem->engine_type= default_engine_type; + } + if (check_table_name(part_elem->partition_name, + strlen(part_elem->partition_name), FALSE)) + { + my_error(ER_WRONG_PARTITION_NAME, MYF(0)); + goto end; + } + DBUG_PRINT("info", ("part = %d engine = %s", + i, ha_resolve_storage_engine_name(part_elem->engine_type))); + } + else + { + uint j= 0; + uint num_subparts_not_set= 0; + List_iterator sub_it(part_elem->subpartitions); + partition_element *sub_elem; + do + { + sub_elem= sub_it++; + warn_if_dir_in_part_elem(thd, sub_elem); + if (check_table_name(sub_elem->partition_name, + strlen(sub_elem->partition_name), FALSE)) + { + my_error(ER_WRONG_PARTITION_NAME, MYF(0)); + goto end; + } + if (sub_elem->engine_type == NULL) + { + if (part_elem->engine_type != NULL) + sub_elem->engine_type= part_elem->engine_type; + else + { + sub_elem->engine_type= default_engine_type; + num_subparts_not_set++; + } + } + DBUG_PRINT("info", ("part = %d sub = %d engine = %s", i, j, + ha_resolve_storage_engine_name(sub_elem->engine_type))); + } while (++j < num_subparts); + + if (prev_num_subparts_not_set == (num_subparts + 1) && + (num_subparts_not_set == 0 || + num_subparts_not_set == num_subparts)) + prev_num_subparts_not_set= num_subparts_not_set; + + if (!table_engine_set && + prev_num_subparts_not_set != num_subparts_not_set) + { + DBUG_PRINT("info", ("num_subparts_not_set = %u num_subparts = %u", + num_subparts_not_set, num_subparts)); + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + goto end; + } + + if (part_elem->engine_type == NULL) + { + if (num_subparts_not_set == 0) + part_elem->engine_type= sub_elem->engine_type; + else + { + num_parts_not_set++; + part_elem->engine_type= default_engine_type; + } + } + } + if (part_type == VERSIONING_PARTITION) + { + if (part_elem->type == partition_element::HISTORY) + { + hist_parts++; + } + else + { + DBUG_ASSERT(part_elem->type == partition_element::CURRENT); + now_parts++; + } + } + } while (++i < num_parts); + if (!table_engine_set && + num_parts_not_set != 0 && + num_parts_not_set != num_parts) + { + DBUG_PRINT("info", ("num_parts_not_set = %u num_parts = %u", + num_parts_not_set, num_subparts)); + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + goto end; + } + } + if (unlikely(check_engine_mix(table_engine, table_engine_set))) + { + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + goto end; + } + + if (hist_parts > 1) + { + if (vers_info->limit == 0 && !vers_info->interval.is_set()) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PARAMETERS, + ER_THD(thd, WARN_VERS_PARAMETERS), + "no rotation condition for multiple HISTORY partitions."); + } + } + if (unlikely(now_parts > 1)) + { + my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias.str); + goto end; + } + + + DBUG_ASSERT(table_engine != partition_hton && + default_engine_type == table_engine); + if (eng_type) + *eng_type= table_engine; + + + /* + We need to check all constant expressions that they are of the correct + type and that they are increasing for ranges and not overlapping for + list constants. + */ + + if (add_or_reorg_part) + { + if (part_type == VERSIONING_PARTITION && add_or_reorg_part->partitions.elements) + vers_update_el_ids(); + if (check_constants(thd, this)) + goto end; + } + + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Print error for no partition found + + SYNOPSIS + print_no_partition_found() + table Table object + + RETURN VALUES +*/ + +void partition_info::print_no_partition_found(TABLE *table_arg, myf errflag) +{ + char buf[100]; + char *buf_ptr= (char*)&buf; + TABLE_LIST table_list; + THD *thd= current_thd; + + table_list.reset(); + table_list.db= table_arg->s->db; + table_list.table_name= table_arg->s->table_name; + + if (check_single_table_access(thd, SELECT_ACL, &table_list, TRUE)) + { + my_message(ER_NO_PARTITION_FOR_GIVEN_VALUE, + ER_THD(thd, ER_NO_PARTITION_FOR_GIVEN_VALUE_SILENT), errflag); + } + else + { + if (column_list) + buf_ptr= (char*)"from column_list"; + else + { + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table_arg, &table_arg->read_set); + if (part_expr->null_value) + buf_ptr= (char*)"NULL"; + else + longlong10_to_str(err_value, buf, + part_expr->unsigned_flag ? 10 : -10); + dbug_tmp_restore_column_map(&table_arg->read_set, old_map); + } + my_error(ER_NO_PARTITION_FOR_GIVEN_VALUE, errflag, buf_ptr); + } +} + + +/* + Set fields related to partition expression + SYNOPSIS + set_part_expr() + start_token Start of partition function string + item_ptr Pointer to item tree + end_token End of partition function string + is_subpart Subpartition indicator + RETURN VALUES + TRUE Memory allocation error + FALSE Success +*/ + +bool partition_info::set_part_expr(THD *thd, Item *item_ptr, bool is_subpart) +{ + if (is_subpart) + { + list_of_subpart_fields= FALSE; + subpart_expr= item_ptr; + } + else + { + list_of_part_fields= FALSE; + part_expr= item_ptr; + } + return FALSE; +} + + +/* + Check that partition fields and subpartition fields are not too long + + SYNOPSIS + check_partition_field_length() + + RETURN VALUES + TRUE Total length was too big + FALSE Length is ok +*/ + +bool partition_info::check_partition_field_length() +{ + uint store_length= 0; + uint i; + DBUG_ENTER("partition_info::check_partition_field_length"); + + for (i= 0; i < num_part_fields; i++) + store_length+= get_partition_field_store_length(part_field_array[i]); + if (store_length > MAX_DATA_LENGTH_FOR_KEY) + DBUG_RETURN(TRUE); + store_length= 0; + for (i= 0; i < num_subpart_fields; i++) + store_length+= get_partition_field_store_length(subpart_field_array[i]); + if (store_length > MAX_DATA_LENGTH_FOR_KEY) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +/* + Set up buffers and arrays for fields requiring preparation + SYNOPSIS + set_up_charset_field_preps() + + RETURN VALUES + TRUE Memory Allocation error + FALSE Success + + DESCRIPTION + Set up arrays and buffers for fields that require special care for + calculation of partition id. This is used for string fields with + variable length or string fields with fixed length that isn't using + the binary collation. +*/ + +bool partition_info::set_up_charset_field_preps(THD *thd) +{ + Field *field, **ptr; + uchar **char_ptrs; + unsigned i; + size_t size; + uint tot_part_fields= 0; + uint tot_subpart_fields= 0; + DBUG_ENTER("set_up_charset_field_preps"); + + if (!(part_type == HASH_PARTITION && + list_of_part_fields) && + check_part_func_fields(part_field_array, FALSE)) + { + ptr= part_field_array; + /* Set up arrays and buffers for those fields */ + while ((field= *(ptr++))) + if (field_is_partition_charset(field)) + tot_part_fields++; + size= tot_part_fields * sizeof(char*); + if (!(char_ptrs= (uchar**)thd->calloc(size))) + goto error; + part_field_buffers= char_ptrs; + if (!(char_ptrs= (uchar**)thd->calloc(size))) + goto error; + restore_part_field_ptrs= char_ptrs; + size= (tot_part_fields + 1) * sizeof(Field*); + if (!(char_ptrs= (uchar**)thd->alloc(size))) + goto error; + part_charset_field_array= (Field**)char_ptrs; + ptr= part_field_array; + i= 0; + while ((field= *(ptr++))) + { + if (field_is_partition_charset(field)) + { + uchar *field_buf; + size= field->pack_length(); + if (!(field_buf= (uchar*) thd->calloc(size))) + goto error; + part_charset_field_array[i]= field; + part_field_buffers[i++]= field_buf; + } + } + part_charset_field_array[i]= NULL; + } + if (is_sub_partitioned() && !list_of_subpart_fields && + check_part_func_fields(subpart_field_array, FALSE)) + { + /* Set up arrays and buffers for those fields */ + ptr= subpart_field_array; + while ((field= *(ptr++))) + if (field_is_partition_charset(field)) + tot_subpart_fields++; + size= tot_subpart_fields * sizeof(char*); + if (!(char_ptrs= (uchar**) thd->calloc(size))) + goto error; + subpart_field_buffers= char_ptrs; + if (!(char_ptrs= (uchar**) thd->calloc(size))) + goto error; + restore_subpart_field_ptrs= char_ptrs; + size= (tot_subpart_fields + 1) * sizeof(Field*); + if (!(char_ptrs= (uchar**) thd->alloc(size))) + goto error; + subpart_charset_field_array= (Field**)char_ptrs; + ptr= subpart_field_array; + i= 0; + while ((field= *(ptr++))) + { + uchar *UNINIT_VAR(field_buf); + + if (!field_is_partition_charset(field)) + continue; + size= field->pack_length(); + if (!(field_buf= (uchar*) thd->calloc(size))) + goto error; + subpart_charset_field_array[i]= field; + subpart_field_buffers[i++]= field_buf; + } + subpart_charset_field_array[i]= NULL; + } + DBUG_RETURN(FALSE); +error: + DBUG_RETURN(TRUE); +} + + +/* + Check if path does not contain mysql data home directory + for partition elements with data directory and index directory + + SYNOPSIS + check_partition_dirs() + part_info partition_info struct + + RETURN VALUES + 0 ok + 1 error +*/ + +bool check_partition_dirs(partition_info *part_info) +{ + if (!part_info) + return 0; + + partition_element *part_elem; + List_iterator part_it(part_info->partitions); + while ((part_elem= part_it++)) + { + if (part_elem->subpartitions.elements) + { + List_iterator sub_it(part_elem->subpartitions); + partition_element *subpart_elem; + while ((subpart_elem= sub_it++)) + { + if (unlikely(error_if_data_home_dir(subpart_elem->data_file_name, + "DATA DIRECTORY")) || + unlikely(error_if_data_home_dir(subpart_elem->index_file_name, + "INDEX DIRECTORY"))) + return 1; + } + } + else + { + if (unlikely(error_if_data_home_dir(part_elem->data_file_name, + "DATA DIRECTORY")) || + unlikely(error_if_data_home_dir(part_elem->index_file_name, + "INDEX DIRECTORY"))) + return 1; + } + } + return 0; +} + + +/** + Check what kind of error to report + + @param use_subpart_expr Use the subpart_expr instead of part_expr + @param part_str Name of partition to report error (or NULL) +*/ +void partition_info::report_part_expr_error(bool use_subpart_expr) +{ + Item *expr= part_expr; + DBUG_ENTER("partition_info::report_part_expr_error"); + if (use_subpart_expr) + expr= subpart_expr; + + if (expr->type() == Item::FIELD_ITEM) + { + partition_type type= part_type; + bool list_of_fields= list_of_part_fields; + Item_field *item_field= (Item_field*) expr; + /* + The expression consists of a single field. + It must be of integer type unless KEY or COLUMNS partitioning. + */ + if (use_subpart_expr) + { + type= subpart_type; + list_of_fields= list_of_subpart_fields; + } + if (!column_list && + item_field->field && + item_field->field->result_type() != INT_RESULT && + !(type == HASH_PARTITION && list_of_fields)) + { + my_error(ER_FIELD_TYPE_NOT_ALLOWED_AS_PARTITION_FIELD, MYF(0), + item_field->name.str); + DBUG_VOID_RETURN; + } + } + if (use_subpart_expr) + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), "SUBPARTITION"); + else + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), "PARTITION"); + DBUG_VOID_RETURN; +} + + +/* + Create a new column value in current list with maxvalue + Called from parser + + SYNOPSIS + add_max_value() + RETURN + TRUE Error + FALSE Success +*/ + +int partition_info::add_max_value(THD *thd) +{ + DBUG_ENTER("partition_info::add_max_value"); + + part_column_list_val *col_val; + /* + Makes for LIST COLUMNS 'num_columns' DEFAULT tuples, 1 tuple for RANGEs + */ + uint max_val= (num_columns && part_type == LIST_PARTITION) ? + num_columns : 1; + for (uint i= 0; i < max_val; i++) + { + if (!(col_val= add_column_value(thd))) + { + DBUG_RETURN(TRUE); + } + col_val->max_value= TRUE; + } + DBUG_RETURN(FALSE); +} + +/* + Create a new column value in current list + Called from parser + + SYNOPSIS + add_column_value() + RETURN + >0 A part_column_list_val object which have been + inserted into its list + 0 Memory allocation failure +*/ + +part_column_list_val *partition_info::add_column_value(THD *thd) +{ + uint max_val= num_columns ? num_columns : MAX_REF_PARTS; + DBUG_ENTER("add_column_value"); + DBUG_PRINT("enter", ("num_columns = %u, curr_list_object %u, max_val = %u", + num_columns, curr_list_object, max_val)); + if (curr_list_object < max_val) + { + curr_list_val->added_items++; + DBUG_RETURN(&curr_list_val->col_val_array[curr_list_object++]); + } + if (!num_columns && part_type == LIST_PARTITION) + { + /* + We're trying to add more than MAX_REF_PARTS, this can happen + in ALTER TABLE using List partitions where the first partition + uses VALUES IN (1,2,3...,17) where the number of fields in + the list is more than MAX_REF_PARTS, in this case we know + that the number of columns must be 1 and we thus reorganize + into the structure used for 1 column. After this we call + ourselves recursively which should always succeed. + */ + num_columns= curr_list_object; + if (!reorganize_into_single_field_col_val(thd)) + { + if (!init_column_part(thd)) + DBUG_RETURN(add_column_value(thd)); + } + DBUG_RETURN(NULL); + } + if (column_list) + { + my_error(ER_PARTITION_COLUMN_LIST_ERROR, MYF(0)); + } + else + { + if (part_type == RANGE_PARTITION) + my_error(ER_TOO_MANY_VALUES_ERROR, MYF(0), "RANGE"); + else + my_error(ER_TOO_MANY_VALUES_ERROR, MYF(0), "LIST"); + } + DBUG_RETURN(NULL); +} + + +/* + Initialise part_elem_value object at setting of a new object + (Helper functions to functions called by parser) + + SYNOPSIS + init_col_val + col_val Column value object to be initialised + item Item object representing column value + + RETURN VALUES + TRUE Failure + FALSE Success +*/ +void partition_info::init_col_val(part_column_list_val *col_val, Item *item) +{ + DBUG_ENTER("partition_info::init_col_val"); + + col_val->item_expression= item; + col_val->null_value= item->null_value; + if (item->result_type() == INT_RESULT) + { + /* + This could be both column_list partitioning and function + partitioning, but it doesn't hurt to set the function + partitioning flags about unsignedness. + */ + curr_list_val->value= item->val_int(); + curr_list_val->unsigned_flag= TRUE; + if (!item->unsigned_flag && + curr_list_val->value < 0) + curr_list_val->unsigned_flag= FALSE; + if (!curr_list_val->unsigned_flag) + curr_part_elem->signed_flag= TRUE; + } + col_val->part_info= NULL; + DBUG_VOID_RETURN; +} +/* + Add a column value in VALUES LESS THAN or VALUES IN + (Called from parser) + + SYNOPSIS + add_column_list_value() + lex Parser's lex object + thd Thread object + item Item object representing column value + + RETURN VALUES + TRUE Failure + FALSE Success +*/ +bool partition_info::add_column_list_value(THD *thd, Item *item) +{ + part_column_list_val *col_val; + Name_resolution_context *context= &thd->lex->current_select->context; + TABLE_LIST *save_list= context->table_list; + const char *save_where= thd->where; + DBUG_ENTER("partition_info::add_column_list_value"); + + if (part_type == LIST_PARTITION && + num_columns == 1U) + { + if (init_column_part(thd)) + { + DBUG_RETURN(TRUE); + } + } + + context->table_list= 0; + if (column_list) + thd->where= "field list"; + else + thd->where= "partition function"; + + if (item->walk(&Item::check_partition_func_processor, 0, NULL)) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + DBUG_RETURN(TRUE); + } + if (item->fix_fields(thd, (Item**)0) || + ((context->table_list= save_list), FALSE) || + (!item->const_item())) + { + context->table_list= save_list; + thd->where= save_where; + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + DBUG_RETURN(TRUE); + } + thd->where= save_where; + + if (!(col_val= add_column_value(thd))) + { + DBUG_RETURN(TRUE); + } + init_col_val(col_val, item); + DBUG_RETURN(FALSE); +} + +/* + Initialise part_info object for receiving a set of column values + for a partition, called when parser reaches VALUES LESS THAN or + VALUES IN. + + SYNOPSIS + init_column_part() + lex Parser's lex object + + RETURN VALUES + TRUE Failure + FALSE Success +*/ +bool partition_info::init_column_part(THD *thd) +{ + partition_element *p_elem= curr_part_elem; + part_column_list_val *col_val_array; + part_elem_value *list_val; + uint loc_num_columns; + DBUG_ENTER("partition_info::init_column_part"); + + if (!(list_val= + (part_elem_value*) thd->calloc(sizeof(part_elem_value))) || + p_elem->list_val_list.push_back(list_val, thd->mem_root)) + DBUG_RETURN(TRUE); + + if (num_columns) + loc_num_columns= num_columns; + else + loc_num_columns= MAX_REF_PARTS; + if (!(col_val_array= + (part_column_list_val*) thd->calloc(loc_num_columns * + sizeof(part_column_list_val)))) + DBUG_RETURN(TRUE); + + list_val->col_val_array= col_val_array; + list_val->added_items= 0; + curr_list_val= list_val; + curr_list_object= 0; + DBUG_RETURN(FALSE); +} + +/* + In the case of ALTER TABLE ADD/REORGANIZE PARTITION for LIST + partitions we can specify list values as: + VALUES IN (v1, v2,,,, v17) if we're using the first partitioning + variant with a function or a column list partitioned table with + one partition field. In this case the parser knows not the + number of columns start with and allocates MAX_REF_PARTS in the + array. If we try to allocate something beyond MAX_REF_PARTS we + will call this function to reorganize into a structure with + num_columns = 1. Also when the parser knows that we used LIST + partitioning and we used a VALUES IN like above where number of + values was smaller than MAX_REF_PARTS or equal, then we will + reorganize after discovering this in the parser. + + SYNOPSIS + reorganize_into_single_field_col_val() + + RETURN VALUES + TRUE Failure + FALSE Success +*/ + +int partition_info::reorganize_into_single_field_col_val(THD *thd) +{ + part_column_list_val *col_val, *new_col_val; + part_elem_value *val= curr_list_val; + uint loc_num_columns= num_columns; + uint i; + DBUG_ENTER("partition_info::reorganize_into_single_field_col_val"); + + num_columns= 1; + val->added_items= 1U; + col_val= &val->col_val_array[0]; + init_col_val(col_val, col_val->item_expression); + for (i= 1; i < loc_num_columns; i++) + { + col_val= &val->col_val_array[i]; + DBUG_ASSERT(part_type == LIST_PARTITION); + if (init_column_part(thd)) + { + DBUG_RETURN(TRUE); + } + if (!(new_col_val= add_column_value(thd))) + { + DBUG_RETURN(TRUE); + } + memcpy(new_col_val, col_val, sizeof(*col_val)); + init_col_val(new_col_val, col_val->item_expression); + } + curr_list_val= val; + DBUG_RETURN(FALSE); +} + +/* + This function handles the case of function-based partitioning. + It fixes some data structures created in the parser and puts + them in the format required by the rest of the partitioning + code. + + SYNOPSIS + fix_partition_values() + thd Thread object + col_val Array of one value + part_elem The partition instance + part_id Id of partition instance + + RETURN VALUES + TRUE Failure + FALSE Success +*/ +int partition_info::fix_partition_values(THD *thd, + part_elem_value *val, + partition_element *part_elem) +{ + part_column_list_val *col_val= val->col_val_array; + DBUG_ENTER("partition_info::fix_partition_values"); + + if (col_val->fixed) + { + DBUG_RETURN(FALSE); + } + + Item *item_expr= col_val->item_expression; + if ((val->null_value= item_expr->null_value)) + { + if (part_elem->has_null_value) + { + my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + part_elem->has_null_value= TRUE; + } + else if (item_expr->result_type() != INT_RESULT) + { + my_error(ER_VALUES_IS_NOT_INT_TYPE_ERROR, MYF(0), + part_elem->partition_name); + DBUG_RETURN(TRUE); + } + if (part_type == RANGE_PARTITION) + { + if (part_elem->has_null_value) + { + my_error(ER_NULL_IN_VALUES_LESS_THAN, MYF(0)); + DBUG_RETURN(TRUE); + } + part_elem->range_value= val->value; + } + col_val->fixed= 2; + DBUG_RETURN(FALSE); +} + +/* + Get column item with a proper character set according to the field + + SYNOPSIS + get_column_item() + item Item object to start with + field Field for which the item will be compared to + + RETURN VALUES + NULL Error + item Returned item +*/ + +Item* partition_info::get_column_item(Item *item, Field *field) +{ + if (field->result_type() == STRING_RESULT && + item->collation.collation != field->charset()) + { + if (!(item= convert_charset_partition_constant(item, + field->charset()))) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + return NULL; + } + } + return item; +} + + +/* + Evaluate VALUES functions for column list values + SYNOPSIS + fix_column_value_functions() + thd Thread object + col_val List of column values + part_id Partition id we are fixing + + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + Fix column VALUES and store in memory array adapted to the data type +*/ + +bool partition_info::fix_column_value_functions(THD *thd, + part_elem_value *val, + uint part_id) +{ + uint n_columns= part_field_list.elements; + bool result= FALSE; + uint i; + part_column_list_val *col_val= val->col_val_array; + DBUG_ENTER("partition_info::fix_column_value_functions"); + + if (col_val->fixed > 1) + { + DBUG_RETURN(FALSE); + } + for (i= 0; i < n_columns; col_val++, i++) + { + Item *column_item= col_val->item_expression; + Field *field= part_field_array[i]; + col_val->part_info= this; + col_val->partition_id= part_id; + if (col_val->max_value) + col_val->column_value= NULL; + else + { + col_val->column_value= NULL; + if (!col_val->null_value) + { + uchar *val_ptr; + uint len= field->pack_length(); + bool save_got_warning; + + if (!(column_item= get_column_item(column_item, field))) + { + result= TRUE; + goto end; + } + Sql_mode_instant_set sms(thd, 0); + save_got_warning= thd->got_warning; + thd->got_warning= 0; + if (column_item->save_in_field(field, TRUE) || + thd->got_warning) + { + my_error(ER_WRONG_TYPE_COLUMN_VALUE_ERROR, MYF(0)); + result= TRUE; + goto end; + } + thd->got_warning= save_got_warning; + if (!(val_ptr= (uchar*) thd->memdup(field->ptr, len))) + { + result= TRUE; + goto end; + } + col_val->column_value= val_ptr; + } + } + col_val->fixed= 2; + } +end: + DBUG_RETURN(result); +} + + +/** + Fix partition data from parser. + + @details The parser generates generic data structures, we need to set them + up as the rest of the code expects to find them. This is in reality part + of the syntax check of the parser code. + + It is necessary to call this function in the case of a CREATE TABLE + statement, in this case we do it early in the check_partition_info + function. + + It is necessary to call this function for ALTER TABLE where we + assign a completely new partition structure, in this case we do it + in prep_alter_part_table after discovering that the partition + structure is entirely redefined. + + It's necessary to call this method also for ALTER TABLE ADD/REORGANIZE + of partitions, in this we call it in prep_alter_part_table after + making some initial checks but before going deep to check the partition + info, we also assign the column_list variable before calling this function + here. + + Finally we also call it immediately after returning from parsing the + partitioning text found in the frm file. + + This function mainly fixes the VALUES parts, these are handled differently + whether or not we use column list partitioning. Since the parser doesn't + know which we are using we need to set-up the old data structures after + the parser is complete when we know if what type of partitioning the + base table is using. + + For column lists we will handle this in the fix_column_value_function. + For column lists it is sufficient to verify that the number of columns + and number of elements are in synch with each other. So only partitioning + using functions need to be set-up to their data structures. + + @param thd Thread object + + @return Operation status + @retval TRUE Failure + @retval FALSE Success +*/ + +bool partition_info::fix_parser_data(THD *thd) +{ + List_iterator it(partitions); + partition_element *part_elem; + uint num_elements; + uint i= 0, j, k; + DBUG_ENTER("partition_info::fix_parser_data"); + + if (!(part_type == RANGE_PARTITION || + part_type == LIST_PARTITION)) + { + if (part_type == HASH_PARTITION && list_of_part_fields) + { + /* KEY partitioning, check ALGORITHM = N. Should not pass the parser! */ + if (key_algorithm > KEY_ALGORITHM_55) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + DBUG_RETURN(true); + } + /* If not set, use DEFAULT = 2 for CREATE and ALTER! */ + if ((thd_sql_command(thd) == SQLCOM_CREATE_TABLE || + thd_sql_command(thd) == SQLCOM_ALTER_TABLE) && + key_algorithm == KEY_ALGORITHM_NONE) + key_algorithm= KEY_ALGORITHM_55; + } + DBUG_RETURN(FALSE); + } + if (is_sub_partitioned() && list_of_subpart_fields) + { + /* KEY subpartitioning, check ALGORITHM = N. Should not pass the parser! */ + if (key_algorithm > KEY_ALGORITHM_55) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + DBUG_RETURN(true); + } + /* If not set, use DEFAULT = 2 for CREATE and ALTER! */ + if ((thd_sql_command(thd) == SQLCOM_CREATE_TABLE || + thd_sql_command(thd) == SQLCOM_ALTER_TABLE) && + key_algorithm == KEY_ALGORITHM_NONE) + key_algorithm= KEY_ALGORITHM_55; + } + defined_max_value= FALSE; // in case it already set (CREATE TABLE LIKE) + do + { + part_elem= it++; + List_iterator list_val_it(part_elem->list_val_list); + num_elements= part_elem->list_val_list.elements; + if (unlikely(!num_elements && error_if_requires_values())) + DBUG_RETURN(true); + DBUG_ASSERT(part_type == RANGE_PARTITION ? + num_elements == 1U : TRUE); + + for (j= 0; j < num_elements; j++) + { + part_elem_value *val= list_val_it++; + + if (val->added_items != (column_list ? num_columns : 1)) + { + my_error(ER_PARTITION_COLUMN_LIST_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* + Check the last MAX_VALUE for range partitions and DEFAULT value + for LIST partitions. + Both values are marked with defined_max_value and + default_partition_id. + + This is a max_value/default is max_value is set and this is + a normal RANGE (no column list) or if it's a LIST partition: + + PARTITION p3 VALUES LESS THAN MAXVALUE + or + PARTITION p3 VALUES DEFAULT + */ + if (val->added_items && val->col_val_array[0].max_value && + (!column_list || part_type == LIST_PARTITION)) + { + DBUG_ASSERT(part_type == RANGE_PARTITION || + part_type == LIST_PARTITION); + if (defined_max_value) + { + my_error((part_type == RANGE_PARTITION) ? + ER_PARTITION_MAXVALUE_ERROR : + ER_PARTITION_DEFAULT_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* For RANGE PARTITION MAX_VALUE must be last */ + if (i != (num_parts - 1) && + part_type != LIST_PARTITION) + { + my_error(ER_PARTITION_MAXVALUE_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + + defined_max_value= TRUE; + default_partition_id= i; + part_elem->max_value= TRUE; + part_elem->range_value= LONGLONG_MAX; + continue; + } + + if (column_list) + { + for (k= 0; k < num_columns; k++) + { + part_column_list_val *col_val= &val->col_val_array[k]; + if (col_val->null_value && part_type == RANGE_PARTITION) + { + my_error(ER_NULL_IN_VALUES_LESS_THAN, MYF(0)); + DBUG_RETURN(TRUE); + } + } + } + else + { + if (fix_partition_values(thd, val, part_elem)) + DBUG_RETURN(TRUE); + if (val->null_value) + { + /* + Null values aren't required in the value part, they are kept per + partition instance, only LIST partitions have NULL values. + */ + list_val_it.remove(); + } + } + } + } while (++i < num_parts); + DBUG_RETURN(FALSE); +} + + +/** + helper function to compare strings that can also be + a NULL pointer. + + @param a char pointer (can be NULL). + @param b char pointer (can be NULL). + + @return false if equal + @retval true strings differs + @retval false strings is equal +*/ + +static bool strcmp_null(const char *a, const char *b) +{ + if (!a && !b) + return false; + if (a && b && !strcmp(a, b)) + return false; + return true; +} + + +/** + Check if the new part_info has the same partitioning. + + @param new_part_info New partition definition to compare with. + + @return True if not considered to have changed the partitioning. + @retval true Allowed change (only .frm change, compatible distribution). + @retval false Different partitioning, will need redistribution of rows. + + @note Currently only used to allow changing from non-set key_algorithm + to a specified key_algorithm, to avoid rebuild when upgrading from 5.1 of + such partitioned tables using numeric colums in the partitioning expression. + For more info see bug#14521864. + Does not check if columns etc has changed, i.e. only for + alter_info->partition_flags == ALTER_PARTITION_INFO. +*/ + +bool partition_info::has_same_partitioning(partition_info *new_part_info) +{ + DBUG_ENTER("partition_info::has_same_partitioning"); + + DBUG_ASSERT(part_field_array && part_field_array[0]); + + /* + Only consider pre 5.5.3 .frm's to have same partitioning as + a new one with KEY ALGORITHM = 1 (). + */ + + if (part_field_array[0]->table->s->mysql_version >= 50503) + DBUG_RETURN(false); + + if (!new_part_info || + part_type != new_part_info->part_type || + num_parts != new_part_info->num_parts || + use_default_partitions != new_part_info->use_default_partitions || + new_part_info->is_sub_partitioned() != is_sub_partitioned()) + DBUG_RETURN(false); + + if (part_type != HASH_PARTITION) + { + /* + RANGE or LIST partitioning, check if KEY subpartitioned. + Also COLUMNS partitioning was added in 5.5, so treat that as different. + */ + if (!is_sub_partitioned() || + !new_part_info->is_sub_partitioned() || + column_list || + new_part_info->column_list || + !list_of_subpart_fields || + !new_part_info->list_of_subpart_fields || + new_part_info->num_subparts != num_subparts || + new_part_info->subpart_field_list.elements != + subpart_field_list.elements || + new_part_info->use_default_subpartitions != + use_default_subpartitions) + DBUG_RETURN(false); + } + else + { + /* Check if KEY partitioned. */ + if (!new_part_info->list_of_part_fields || + !list_of_part_fields || + new_part_info->part_field_list.elements != part_field_list.elements) + DBUG_RETURN(false); + } + + /* Check that it will use the same fields in KEY (fields) list. */ + List_iterator old_field_name_it(part_field_list); + List_iterator new_field_name_it(new_part_info->part_field_list); + const char *old_name, *new_name; + while ((old_name= old_field_name_it++)) + { + new_name= new_field_name_it++; + if (!new_name || my_strcasecmp(system_charset_info, + new_name, + old_name)) + DBUG_RETURN(false); + } + + if (is_sub_partitioned()) + { + /* Check that it will use the same fields in KEY subpart fields list. */ + List_iterator old_field_name_it(subpart_field_list); + List_iterator new_field_name_it(new_part_info->subpart_field_list); + const char *old_name, *new_name; + while ((old_name= old_field_name_it++)) + { + new_name= new_field_name_it++; + if (!new_name || my_strcasecmp(system_charset_info, + new_name, + old_name)) + DBUG_RETURN(false); + } + } + + if (!use_default_partitions) + { + /* + Loop over partitions/subpartition to verify that they are + the same, including state and name. + */ + List_iterator part_it(partitions); + List_iterator new_part_it(new_part_info->partitions); + uint i= 0; + do + { + partition_element *part_elem= part_it++; + partition_element *new_part_elem= new_part_it++; + /* + The following must match: + partition_name, data_file_name, index_file_name, + engine_type, part_max_rows, part_min_rows, nodegroup_id. + (max_value, signed_flag, has_null_value only on partition level, + RANGE/LIST) + The following can differ: + - part_comment + part_state must be PART_NORMAL! + */ + if (!part_elem || !new_part_elem || + strcmp(part_elem->partition_name, + new_part_elem->partition_name) || + part_elem->part_state != PART_NORMAL || + new_part_elem->part_state != PART_NORMAL || + part_elem->max_value != new_part_elem->max_value || + part_elem->signed_flag != new_part_elem->signed_flag || + part_elem->has_null_value != new_part_elem->has_null_value) + DBUG_RETURN(false); + + /* new_part_elem may not have engine_type set! */ + if (new_part_elem->engine_type && + part_elem->engine_type != new_part_elem->engine_type) + DBUG_RETURN(false); + + if (is_sub_partitioned()) + { + /* + Check that both old and new partition has the same definition + (VALUES IN/VALUES LESS THAN) (No COLUMNS partitioning, see above) + */ + if (part_type == LIST_PARTITION) + { + List_iterator list_vals(part_elem->list_val_list); + List_iterator + new_list_vals(new_part_elem->list_val_list); + part_elem_value *val; + part_elem_value *new_val; + while ((val= list_vals++)) + { + new_val= new_list_vals++; + if (!new_val) + DBUG_RETURN(false); + if ((!val->null_value && !new_val->null_value) && + val->value != new_val->value) + DBUG_RETURN(false); + } + if (new_list_vals++) + DBUG_RETURN(false); + } + else + { + DBUG_ASSERT(part_type == RANGE_PARTITION); + if (new_part_elem->range_value != part_elem->range_value) + DBUG_RETURN(false); + } + + if (!use_default_subpartitions) + { + List_iterator + sub_part_it(part_elem->subpartitions); + List_iterator + new_sub_part_it(new_part_elem->subpartitions); + uint j= 0; + do + { + partition_element *sub_part_elem= sub_part_it++; + partition_element *new_sub_part_elem= new_sub_part_it++; + /* new_part_elem may not have engine_type set! */ + if (new_sub_part_elem->engine_type && + sub_part_elem->engine_type != new_sub_part_elem->engine_type) + DBUG_RETURN(false); + + if (strcmp(sub_part_elem->partition_name, + new_sub_part_elem->partition_name) || + sub_part_elem->part_state != PART_NORMAL || + new_sub_part_elem->part_state != PART_NORMAL || + sub_part_elem->part_min_rows != + new_sub_part_elem->part_min_rows || + sub_part_elem->part_max_rows != + new_sub_part_elem->part_max_rows || + sub_part_elem->nodegroup_id != + new_sub_part_elem->nodegroup_id) + DBUG_RETURN(false); + + if (strcmp_null(sub_part_elem->data_file_name, + new_sub_part_elem->data_file_name) || + strcmp_null(sub_part_elem->index_file_name, + new_sub_part_elem->index_file_name)) + DBUG_RETURN(false); + + } while (++j < num_subparts); + } + } + else + { + if (part_elem->part_min_rows != new_part_elem->part_min_rows || + part_elem->part_max_rows != new_part_elem->part_max_rows || + part_elem->nodegroup_id != new_part_elem->nodegroup_id) + DBUG_RETURN(false); + + if (strcmp_null(part_elem->data_file_name, + new_part_elem->data_file_name) || + strcmp_null(part_elem->index_file_name, + new_part_elem->index_file_name)) + DBUG_RETURN(false); + } + } while (++i < num_parts); + } + + /* + Only if key_algorithm was not specified before and it is now set, + consider this as nothing was changed, and allow change without rebuild! + */ + if (key_algorithm != partition_info::KEY_ALGORITHM_NONE || + new_part_info->key_algorithm == partition_info::KEY_ALGORITHM_NONE) + DBUG_RETURN(false); + + DBUG_RETURN(true); +} + + +void partition_info::print_debug(const char *str, uint *value) +{ + DBUG_ENTER("print_debug"); + if (value) + DBUG_PRINT("info", ("parser: %s, val = %u", str, *value)); + else + DBUG_PRINT("info", ("parser: %s", str)); + DBUG_VOID_RETURN; +} + +bool partition_info::field_in_partition_expr(Field *field) const +{ + uint i; + for (i= 0; i < num_part_fields; i++) + { + if (field->eq(part_field_array[i])) + return TRUE; + } + for (i= 0; i < num_subpart_fields; i++) + { + if (field->eq(subpart_field_array[i])) + return TRUE; + } + return FALSE; +} + +#else /* WITH_PARTITION_STORAGE_ENGINE */ + /* + For builds without partitioning we need to define these functions + since we they are called from the parser. The parser cannot + remove code parts using ifdef, but the code parts cannot be called + so we simply need to add empty functions to make the linker happy. + */ +part_column_list_val *partition_info::add_column_value(THD *thd) +{ + return NULL; +} + +bool partition_info::set_part_expr(THD *thd, Item *item_ptr, bool is_subpart) +{ + (void)item_ptr; + (void)is_subpart; + return FALSE; +} + +int partition_info::reorganize_into_single_field_col_val(THD *thd) +{ + return 0; +} + +bool partition_info::init_column_part(THD *thd) +{ + return FALSE; +} + +bool partition_info::add_column_list_value(THD *thd, Item *item) +{ + return FALSE; +} +int partition_info::add_max_value(THD *thd) +{ + return 0; +} + +void partition_info::print_debug(const char *str, uint *value) +{ +} + +bool check_partition_dirs(partition_info *part_info) +{ + return 0; +} + +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + +bool partition_info::vers_init_info(THD * thd) +{ + part_type= VERSIONING_PARTITION; + list_of_part_fields= true; + column_list= false; + vers_info= new (thd->mem_root) Vers_part_info; + if (unlikely(!vers_info)) + return true; + + return false; +} + + +/** + Assign INTERVAL and STARTS for SYSTEM_TIME partitions. + + @return true on error +*/ + +bool partition_info::vers_set_interval(THD* thd, Item* interval, + interval_type int_type, Item* starts, + bool auto_hist, const char *table_name) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + + MYSQL_TIME ltime; + uint err; + vers_info->interval.type= int_type; + vers_info->auto_hist= auto_hist; + + /* 1. assign INTERVAL to interval.step */ + if (interval->fix_fields_if_needed_for_scalar(thd, &interval)) + return true; + bool error= get_interval_value(thd, interval, int_type, &vers_info->interval.step) || + vers_info->interval.step.neg || vers_info->interval.step.second_part || + !(vers_info->interval.step.year || vers_info->interval.step.month || + vers_info->interval.step.day || vers_info->interval.step.hour || + vers_info->interval.step.minute || vers_info->interval.step.second); + if (error) + { + my_error(ER_PART_WRONG_VALUE, MYF(0), table_name, "INTERVAL"); + return true; + } + + /* 2. assign STARTS to interval.start */ + if (starts) + { + if (starts->fix_fields_if_needed_for_scalar(thd, &starts)) + return true; + switch (starts->result_type()) + { + case INT_RESULT: + case DECIMAL_RESULT: + case REAL_RESULT: + /* When table member is defined, we are inside mysql_unpack_partition(). */ + if (!table || starts->val_int() > TIMESTAMP_MAX_VALUE) + goto interval_starts_error; + vers_info->interval.start= (my_time_t) starts->val_int(); + break; + case STRING_RESULT: + case TIME_RESULT: + { + Datetime::Options opt(TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE, thd); + starts->get_date(thd, <ime, opt); + vers_info->interval.start= TIME_to_timestamp(thd, <ime, &err); + if (err) + goto interval_starts_error; + break; + } + case ROW_RESULT: + default: + goto interval_starts_error; + } + if (!table) + { + if (thd->query_start() < vers_info->interval.start) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_PART_STARTS_BEYOND_INTERVAL, + ER_THD(thd, ER_PART_STARTS_BEYOND_INTERVAL), + table_name); + } + } + } + else // calculate default STARTS depending on INTERVAL + { + thd->variables.time_zone->gmt_sec_to_TIME(<ime, thd->query_start()); + if (vers_info->interval.step.second) + goto interval_set_starts; + ltime.second= 0; + if (vers_info->interval.step.minute) + goto interval_set_starts; + ltime.minute= 0; + if (vers_info->interval.step.hour) + goto interval_set_starts; + ltime.hour= 0; + +interval_set_starts: + vers_info->interval.start= TIME_to_timestamp(thd, <ime, &err); + if (err) + goto interval_starts_error; + } + + return false; + +interval_starts_error: + my_error(ER_PART_WRONG_VALUE, MYF(0), table_name, "STARTS"); + return true; +} + + +bool partition_info::vers_set_limit(ulonglong limit, bool auto_hist, + const char *table_name) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + + if (limit < 1) + { + my_error(ER_PART_WRONG_VALUE, MYF(0), table_name, "LIMIT"); + return true; + } + + vers_info->limit= limit; + vers_info->auto_hist= auto_hist; + return !limit; +} + + +bool partition_info::error_if_requires_values() const +{ + switch (part_type) { + case NOT_A_PARTITION: + case HASH_PARTITION: + case VERSIONING_PARTITION: + break; + case RANGE_PARTITION: + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), "RANGE", "LESS THAN"); + return true; + case LIST_PARTITION: + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), "LIST", "IN"); + return true; + } + return false; +} diff --git a/sql/partition_info.h b/sql/partition_info.h new file mode 100644 index 00000000..3a8c3a37 --- /dev/null +++ b/sql/partition_info.h @@ -0,0 +1,572 @@ +#ifndef PARTITION_INFO_INCLUDED +#define PARTITION_INFO_INCLUDED + +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_class.h" +#include "partition_element.h" +#include "sql_partition.h" + +class partition_info; +struct TABLE_LIST; +/* Some function typedefs */ +typedef int (*get_part_id_func)(partition_info *part_info, uint32 *part_id, + longlong *func_value); +typedef int (*get_subpart_id_func)(partition_info *part_info, uint32 *part_id); +typedef bool (*check_constants_func)(THD *thd, partition_info *part_info); + +struct st_ddl_log_memory_entry; + +#define MAX_PART_NAME_SIZE 8 + +struct Vers_part_info : public Sql_alloc +{ + Vers_part_info() : + limit(0), + auto_hist(false), + now_part(NULL), + hist_part(NULL) + { + interval.type= INTERVAL_LAST; + } + Vers_part_info(const Vers_part_info &src) : + interval(src.interval), + limit(src.limit), + auto_hist(src.auto_hist), + now_part(NULL), + hist_part(NULL) + { + } + Vers_part_info& operator= (const Vers_part_info &src) + { + interval= src.interval; + limit= src.limit; + auto_hist= src.auto_hist; + now_part= src.now_part; + hist_part= src.hist_part; + return *this; + } + bool initialized() + { + if (now_part) + { + DBUG_ASSERT(now_part->id != UINT_MAX32); + DBUG_ASSERT(now_part->type == partition_element::CURRENT); + if (hist_part) + { + DBUG_ASSERT(hist_part->id != UINT_MAX32); + DBUG_ASSERT(hist_part->type == partition_element::HISTORY); + } + return true; + } + return false; + } + struct interval_t { + my_time_t start; + INTERVAL step; + enum interval_type type; + bool is_set() const { return type < INTERVAL_LAST; } + bool operator==(const interval_t &rhs) const + { + /* TODO: equivalent intervals like 1 hour and 60 mins should be considered equal */ + return start == rhs.start && type == rhs.type && !memcmp(&step, &rhs.step, sizeof(INTERVAL)); + } + } interval; + ulonglong limit; + bool auto_hist; + partition_element *now_part; + partition_element *hist_part; +}; + +/* + See generate_partition_syntax() for details of how the data is used + in partition expression. +*/ +class partition_info : public DDL_LOG_STATE, public Sql_alloc +{ +public: + /* + * Here comes a set of definitions needed for partitioned table handlers. + */ + List partitions; + List temp_partitions; + + /* + These are mutually exclusive with part_expr/subpart_expr depending on + what is specified in partitioning filter: expression or column list. + */ + List part_field_list; + List subpart_field_list; + + /* + If there is no subpartitioning, use only this func to get partition ids. + If there is subpartitioning, use the this func to get partition id when + you have both partition and subpartition fields. + */ + get_part_id_func get_partition_id; + + /* Get partition id when we don't have subpartition fields */ + get_part_id_func get_part_partition_id; + + /* + Get subpartition id when we have don't have partition fields by we do + have subpartition ids. + Mikael said that for given constant tuple + {subpart_field1, ..., subpart_fieldN} the subpartition id will be the + same in all subpartitions + */ + get_subpart_id_func get_subpartition_id; + + /* + When we have various string fields we might need some preparation + before and clean-up after calling the get_part_id_func's. We need + one such method for get_part_partition_id and one for + get_subpartition_id. + */ + get_part_id_func get_part_partition_id_charset; + get_subpart_id_func get_subpartition_id_charset; + + check_constants_func check_constants; + + /* NULL-terminated array of fields used in partitioned expression */ + Field **part_field_array; + Field **subpart_field_array; + Field **part_charset_field_array; + Field **subpart_charset_field_array; + /* + Array of all fields used in partition and subpartition expression, + without duplicates, NULL-terminated. + */ + Field **full_part_field_array; + /* + Set of all fields used in partition and subpartition expression. + Required for testing of partition fields in write_set when + updating. We need to set all bits in read_set because the row may + need to be inserted in a different [sub]partition. + */ + MY_BITMAP full_part_field_set; + + /* + When we have a field that requires transformation before calling the + partition functions we must allocate field buffers for the field of + the fields in the partition function. + */ + uchar **part_field_buffers; + uchar **subpart_field_buffers; + uchar **restore_part_field_ptrs; + uchar **restore_subpart_field_ptrs; + + Item *part_expr; + Item *subpart_expr; + + Item *item_free_list; + + /* + Bitmaps of partitions used by the current query. + * read_partitions - partitions to be used for reading. + * lock_partitions - partitions that must be locked (read or write). + Usually read_partitions is the same set as lock_partitions, but + in case of UPDATE the WHERE clause can limit the read_partitions set, + but not neccesarily the lock_partitions set. + Usage pattern: + * Initialized in ha_partition::open(). + * read+lock_partitions is set according to explicit PARTITION, + WL#5217, in open_and_lock_tables(). + * Bits in read_partitions can be cleared in prune_partitions() + in the optimizing step. + (WL#4443 is about allowing prune_partitions() to affect lock_partitions + and be done before locking too). + * When the partition enabled handler get an external_lock call it locks + all partitions in lock_partitions (and remembers which partitions it + locked, so that it can unlock them later). In case of LOCK TABLES it will + lock all partitions, and keep them locked while lock_partitions can + change for each statement under LOCK TABLES. + * Freed at the same time item_free_list is freed. + */ + MY_BITMAP read_partitions; + MY_BITMAP lock_partitions; + bool bitmaps_are_initialized; + + union { + longlong *range_int_array; + LIST_PART_ENTRY *list_array; + part_column_list_val *range_col_array; + part_column_list_val *list_col_array; + }; + + Vers_part_info *vers_info; + + /******************************************** + * INTERVAL ANALYSIS + ********************************************/ + /* + Partitioning interval analysis function for partitioning, or NULL if + interval analysis is not supported for this kind of partitioning. + */ + get_partitions_in_range_iter get_part_iter_for_interval; + /* + Partitioning interval analysis function for subpartitioning, or NULL if + interval analysis is not supported for this kind of partitioning. + */ + get_partitions_in_range_iter get_subpart_iter_for_interval; + + /******************************************** + * INTERVAL ANALYSIS ENDS + ********************************************/ + + longlong err_value; + char* part_info_string; + + partition_element *curr_part_elem; // part or sub part + partition_element *current_partition; // partition + part_elem_value *curr_list_val; + uint curr_list_object; + uint num_columns; + + TABLE *table; + /* + These key_map's are used for Partitioning to enable quick decisions + on whether we can derive more information about which partition to + scan just by looking at what index is used. + */ + key_map all_fields_in_PF, all_fields_in_PPF, all_fields_in_SPF; + key_map some_fields_in_PF; + + handlerton *default_engine_type; + partition_type part_type; + partition_type subpart_type; + + uint part_info_len; + + uint num_parts; + uint num_subparts; + uint count_curr_subparts; // used during parsing + + uint num_list_values; + + uint num_part_fields; + uint num_subpart_fields; + uint num_full_part_fields; + + uint has_null_part_id; + uint32 default_partition_id; + /* + This variable is used to calculate the partition id when using + LINEAR KEY/HASH. This functionality is kept in the MySQL Server + but mainly of use to handlers supporting partitioning. + */ + uint16 linear_hash_mask; + /* + PARTITION BY KEY ALGORITHM=N + Which algorithm to use for hashing the fields. + N = 1 - Use 5.1 hashing (numeric fields are hashed as binary) + N = 2 - Use 5.5 hashing (numeric fields are hashed like latin1 bytes) + */ + enum enum_key_algorithm + { + KEY_ALGORITHM_NONE= 0, + KEY_ALGORITHM_51= 1, + KEY_ALGORITHM_55= 2 + }; + enum_key_algorithm key_algorithm; + + /* Only the number of partitions defined (uses default names and options). */ + bool use_default_partitions; + bool use_default_num_partitions; + /* Only the number of subpartitions defined (uses default names etc.). */ + bool use_default_subpartitions; + bool use_default_num_subpartitions; + bool default_partitions_setup; + bool defined_max_value; + inline bool has_default_partititon() + { + return (part_type == LIST_PARTITION && defined_max_value); + } + bool list_of_part_fields; // KEY or COLUMNS PARTITIONING + bool list_of_subpart_fields; // KEY SUBPARTITIONING + bool linear_hash_ind; // LINEAR HASH/KEY + bool fixed; + bool is_auto_partitioned; + bool has_null_value; + bool column_list; // COLUMNS PARTITIONING, 5.5+ + + partition_info() + : get_partition_id(NULL), get_part_partition_id(NULL), + get_subpartition_id(NULL), + part_field_array(NULL), subpart_field_array(NULL), + part_charset_field_array(NULL), + subpart_charset_field_array(NULL), + full_part_field_array(NULL), + part_field_buffers(NULL), subpart_field_buffers(NULL), + restore_part_field_ptrs(NULL), restore_subpart_field_ptrs(NULL), + part_expr(NULL), subpart_expr(NULL), item_free_list(NULL), + bitmaps_are_initialized(FALSE), + list_array(NULL), vers_info(NULL), err_value(0), + part_info_string(NULL), + curr_part_elem(NULL), current_partition(NULL), + curr_list_object(0), num_columns(0), table(NULL), + default_engine_type(NULL), + part_type(NOT_A_PARTITION), subpart_type(NOT_A_PARTITION), + part_info_len(0), + num_parts(0), num_subparts(0), + count_curr_subparts(0), + num_list_values(0), num_part_fields(0), num_subpart_fields(0), + num_full_part_fields(0), has_null_part_id(0), linear_hash_mask(0), + key_algorithm(KEY_ALGORITHM_NONE), + use_default_partitions(TRUE), use_default_num_partitions(TRUE), + use_default_subpartitions(TRUE), use_default_num_subpartitions(TRUE), + default_partitions_setup(FALSE), defined_max_value(FALSE), + list_of_part_fields(FALSE), list_of_subpart_fields(FALSE), + linear_hash_ind(FALSE), fixed(FALSE), + is_auto_partitioned(FALSE), + has_null_value(FALSE), column_list(FALSE) + { + bzero((DDL_LOG_STATE *) this, sizeof(DDL_LOG_STATE)); + all_fields_in_PF.clear_all(); + all_fields_in_PPF.clear_all(); + all_fields_in_SPF.clear_all(); + some_fields_in_PF.clear_all(); + partitions.empty(); + temp_partitions.empty(); + part_field_list.empty(); + subpart_field_list.empty(); + } + ~partition_info() = default; + + partition_info *get_clone(THD *thd, bool empty_data_and_index_file= FALSE); + bool set_named_partition_bitmap(const char *part_name, size_t length); + bool set_partition_bitmaps(List *partition_names); + /* Answers the question if subpartitioning is used for a certain table */ + bool is_sub_partitioned() + { + return (subpart_type == NOT_A_PARTITION ? FALSE : TRUE); + } + + /* Returns the total number of partitions on the leaf level */ + uint get_tot_partitions() + { + return num_parts * (is_sub_partitioned() ? num_subparts : 1); + } + + bool set_up_defaults_for_partitioning(THD *thd, handler *file, + HA_CREATE_INFO *info, + uint start_no); + const char *find_duplicate_field(); + char *find_duplicate_name(); + bool check_engine_mix(handlerton *engine_type, bool default_engine); + bool check_partition_info(THD *thd, handlerton **eng_type, + handler *file, HA_CREATE_INFO *info, + partition_info *add_or_reorg_part= NULL); + void print_no_partition_found(TABLE *table, myf errflag); + void print_debug(const char *str, uint*); + Item* get_column_item(Item *item, Field *field); + int fix_partition_values(THD *thd, + part_elem_value *val, + partition_element *part_elem); + bool fix_column_value_functions(THD *thd, + part_elem_value *val, + uint part_id); + bool fix_parser_data(THD *thd); + int add_max_value(THD *thd); + void init_col_val(part_column_list_val *col_val, Item *item); + int reorganize_into_single_field_col_val(THD *thd); + part_column_list_val *add_column_value(THD *thd); + bool set_part_expr(THD *thd, Item *item_ptr, bool is_subpart); + bool set_up_charset_field_preps(THD *thd); + bool check_partition_field_length(); + bool init_column_part(THD *thd); + bool add_column_list_value(THD *thd, Item *item); + partition_element *get_part_elem(const char *partition_name, char *file_name, + size_t file_name_size, uint32 *part_id); + void report_part_expr_error(bool use_subpart_expr); + bool has_same_partitioning(partition_info *new_part_info); + bool error_if_requires_values() const; +private: + bool set_up_default_partitions(THD *thd, handler *file, HA_CREATE_INFO *info, + uint start_no); + bool set_up_default_subpartitions(THD *thd, handler *file, + HA_CREATE_INFO *info); + char *create_default_partition_names(THD *thd, uint part_no, uint num_parts, + uint start_no); + char *create_default_subpartition_name(THD *thd, uint subpart_no, + const char *part_name); + bool prune_partition_bitmaps(List *partition_names); // set_read_partitions() in 8.0 + bool add_named_partition(const char *part_name, size_t length); +public: + bool has_unique_name(partition_element *element); + bool field_in_partition_expr(Field *field) const; + + bool vers_init_info(THD *thd); + bool vers_set_interval(THD *thd, Item *interval, + interval_type int_type, Item *starts, + bool auto_part, const char *table_name); + bool vers_set_limit(ulonglong limit, bool auto_part, const char *table_name); + bool vers_set_hist_part(THD* thd, uint *create_count); + bool vers_require_hist_part(THD *thd) const + { + return part_type == VERSIONING_PARTITION && + thd->lex->vers_history_generating(); + } + void vers_check_limit(THD *thd); + bool vers_fix_field_list(THD *thd); + void vers_update_el_ids(); + partition_element *get_partition(uint part_id) + { + List_iterator it(partitions); + partition_element *el; + while ((el= it++)) + { + if (el->id == part_id) + return el; + } + return NULL; + } + uint next_part_no(uint new_parts) const; + + int gen_part_type(THD *thd, String *str) const; +}; + +void part_type_error(THD *thd, partition_info *work_part_info, + const char *part_type, partition_info *tab_part_info); + +uint32 get_next_partition_id_range(struct st_partition_iter* part_iter); +bool check_partition_dirs(partition_info *part_info); +bool vers_create_partitions(THD* thd, TABLE_LIST* tl, uint num_parts); + +/* Initialize the iterator to return a single partition with given part_id */ + +static inline void init_single_partition_iterator(uint32 part_id, + PARTITION_ITERATOR *part_iter) +{ + part_iter->part_nums.start= part_iter->part_nums.cur= part_id; + part_iter->part_nums.end= part_id+1; + part_iter->ret_null_part= part_iter->ret_null_part_orig= FALSE; + part_iter->ret_default_part= part_iter->ret_default_part_orig= FALSE; + part_iter->get_next= get_next_partition_id_range; +} + +/* Initialize the iterator to enumerate all partitions */ +static inline +void init_all_partitions_iterator(partition_info *part_info, + PARTITION_ITERATOR *part_iter) +{ + part_iter->part_nums.start= part_iter->part_nums.cur= 0; + part_iter->part_nums.end= part_info->num_parts; + part_iter->ret_null_part= part_iter->ret_null_part_orig= FALSE; + part_iter->ret_default_part= part_iter->ret_default_part_orig= FALSE; + part_iter->get_next= get_next_partition_id_range; +} + + +/** + @brief Update part_field_list by row_end field name + + @returns true on error; false on success +*/ +inline +bool partition_info::vers_fix_field_list(THD * thd) +{ + if (!table->versioned()) + { + // frm must be corrupted, normally CREATE/ALTER TABLE checks for that + my_error(ER_FILE_CORRUPT, MYF(0), table->s->path.str); + return true; + } + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + DBUG_ASSERT(table->versioned(VERS_TIMESTAMP)); + + Field *row_end= table->vers_end_field(); + // needed in handle_list_of_fields() + row_end->flags|= GET_FIXED_FIELDS_FLAG; + Name_resolution_context *context= &thd->lex->current_select->context; + Item *row_end_item= new (thd->mem_root) Item_field(thd, context, row_end); + Item *row_end_ts= new (thd->mem_root) Item_func_unix_timestamp(thd, row_end_item); + set_part_expr(thd, row_end_ts, false); + + return false; +} + + +inline +void partition_info::vers_update_el_ids() +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + DBUG_ASSERT(table->versioned(VERS_TIMESTAMP)); + + List_iterator it(partitions); + partition_element *el; + for(uint32 id= 0; ((el= it++)); id++) + { + DBUG_ASSERT(el->type != partition_element::CONVENTIONAL); + /* Newly added element is inserted before AS_OF_NOW. */ + if (el->id == UINT_MAX32 || el->type == partition_element::CURRENT) + { + el->id= id; + if (el->type == partition_element::CURRENT) + break; + } + } +} + + +inline +bool make_partition_name(char *move_ptr, uint i) +{ + int res= snprintf(move_ptr, MAX_PART_NAME_SIZE + 1, "p%u", i); + return res < 0 || res > MAX_PART_NAME_SIZE; +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +inline +uint partition_info::next_part_no(uint new_parts) const +{ + if (part_type != VERSIONING_PARTITION) + return num_parts; + DBUG_ASSERT(new_parts > 0); + /* Choose first non-occupied name suffix */ + uint32 suffix= num_parts - 1; + DBUG_ASSERT(suffix > 0); + char part_name[MAX_PART_NAME_SIZE + 1]; + List_iterator_fast it(table->part_info->partitions); + for (uint cur_part= 0; cur_part < new_parts; ++cur_part, ++suffix) + { + uint32 cur_suffix= suffix; + if (make_partition_name(part_name, suffix)) + return 0; + partition_element *el; + it.rewind(); + while ((el= it++)) + { + if (0 == my_strcasecmp(&my_charset_latin1, el->partition_name, part_name)) + { + if (make_partition_name(part_name, ++suffix)) + return 0; + it.rewind(); + } + } + if (cur_part > 0 && suffix > cur_suffix) + cur_part= 0; + } + return suffix - new_parts; +} +#endif + +#endif /* PARTITION_INFO_INCLUDED */ diff --git a/sql/password.c b/sql/password.c new file mode 100644 index 00000000..d824e615 --- /dev/null +++ b/sql/password.c @@ -0,0 +1,524 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* password checking routines */ +/***************************************************************************** + The main idea is that no password are sent between client & server on + connection and that no password are saved in mysql in a decodable form. + + On connection a random string is generated and sent to the client. + The client generates a new string with a random generator inited with + the hash values from the password and the sent string. + This 'check' string is sent to the server where it is compared with + a string generated from the stored hash_value of the password and the + random string. + + The password is saved (in user.password) by using the PASSWORD() function in + mysql. + + This is .c file because it's used in libmysqlclient, which is entirely in C. + (we need it to be portable to a variety of systems). + Example: + update user set password=PASSWORD("hello") where user="test" + This saves a hashed number as a string in the password field. + + The new authentication is performed in following manner: + + SERVER: public_seed=thd_create_random_password() + send(public_seed) + + CLIENT: recv(public_seed) + hash_stage1=sha1("password") + hash_stage2=sha1(hash_stage1) + reply=xor(hash_stage1, sha1(public_seed,hash_stage2) + + // this three steps are done in scramble() + + send(reply) + + + SERVER: recv(reply) + hash_stage1=xor(reply, sha1(public_seed,hash_stage2)) + candidate_hash2=sha1(hash_stage1) + check(candidate_hash2==hash_stage2) + + // this three steps are done in check_scramble() + +*****************************************************************************/ + +#include "mariadb.h" +#include +#include +#include +#include +#include + +/************ MySQL 3.23-4.0 authentication routines: untouched ***********/ + +/* + New (MySQL 3.21+) random generation structure initialization + SYNOPSIS + my_rnd_init() + rand_st OUT Structure to initialize + seed1 IN First initialization parameter + seed2 IN Second initialization parameter +*/ + +/* + Generate binary hash from raw text string + Used for Pre-4.1 password handling + SYNOPSIS + hash_password() + result OUT store hash in this location + password IN plain text password to build hash + password_len IN password length (password may be not null-terminated) +*/ + +void hash_password(ulong *result, const char *password, uint password_len) +{ + ulong nr=1345345333L, add=7, nr2=0x12345671L; + ulong tmp; + const char *password_end= password + password_len; + for (; password < password_end; password++) + { + if (*password == ' ' || *password == '\t') + continue; /* skip space in password */ + tmp= (ulong) (uchar) *password; + nr^= (((nr & 63)+add)*tmp)+ (nr << 8); + nr2+=(nr2 << 8) ^ nr; + add+=tmp; + } + result[0]=nr & (((ulong) 1L << 31) -1L); /* Don't use sign bit (str2int) */; + result[1]=nr2 & (((ulong) 1L << 31) -1L); +} + + +/* + Create password to be stored in user database from raw string + Used for pre-4.1 password handling + SYNOPSIS + my_make_scrambled_password_323() + to OUT store scrambled password here + password IN user-supplied password + pass_len IN length of password string +*/ + +void my_make_scrambled_password_323(char *to, const char *password, + size_t pass_len) +{ + ulong hash_res[2]; + hash_password(hash_res, password, (uint) pass_len); + sprintf(to, "%08lx%08lx", hash_res[0], hash_res[1]); +} + + +/* + Wrapper around my_make_scrambled_password_323() to maintain client lib ABI + compatibility. + In server code usage of my_make_scrambled_password_323() is preferred to + avoid strlen(). + SYNOPSIS + make_scrambled_password_323() + to OUT store scrambled password here + password IN NULL-terminated string with user-supplied password +*/ + +void make_scrambled_password_323(char *to, const char *password) +{ + my_make_scrambled_password_323(to, password, strlen(password)); +} + + +/* + Scramble string with password. + Used in pre 4.1 authentication phase. + SYNOPSIS + scramble_323() + to OUT Store scrambled message here. Buffer must be at least + SCRAMBLE_LENGTH_323+1 bytes long + message IN Message to scramble. Message must be at least + SRAMBLE_LENGTH_323 bytes long. + password IN Password to use while scrambling +*/ + +void scramble_323(char *to, const char *message, const char *password) +{ + struct my_rnd_struct rand_st; + ulong hash_pass[2], hash_message[2]; + + if (password && password[0]) + { + char extra, *to_start=to; + const char *message_end= message + SCRAMBLE_LENGTH_323; + hash_password(hash_pass,password, (uint) strlen(password)); + hash_password(hash_message, message, SCRAMBLE_LENGTH_323); + my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0], + hash_pass[1] ^ hash_message[1]); + for (; message < message_end; message++) + *to++= (char) (floor(my_rnd(&rand_st)*31)+64); + extra=(char) (floor(my_rnd(&rand_st)*31)); + while (to_start != to) + *(to_start++)^=extra; + } + *to= 0; +} + + +/** + Check scrambled message. Used in pre 4.1 password handling. + + @param scrambled Scrambled message to check. + @param message Original random message which was used for scrambling. + @param hash_pass Password which should be used for scrambling. + + @remark scrambled and message must be SCRAMBLED_LENGTH_323 bytes long. + + @return FALSE if password is correct, TRUE otherwise. +*/ + +my_bool +check_scramble_323(const unsigned char *scrambled, const char *message, + ulong *hash_pass) +{ + struct my_rnd_struct rand_st; + ulong hash_message[2]; + /* Big enough for checks. */ + uchar buff[16], scrambled_buff[SCRAMBLE_LENGTH_323 + 1]; + uchar *to, extra; + const uchar *pos; + + /* Ensure that the scrambled message is null-terminated. */ + memcpy(scrambled_buff, scrambled, SCRAMBLE_LENGTH_323); + scrambled_buff[SCRAMBLE_LENGTH_323]= '\0'; + scrambled= scrambled_buff; + + hash_password(hash_message, message, SCRAMBLE_LENGTH_323); + my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0], + hash_pass[1] ^ hash_message[1]); + to=buff; + DBUG_ASSERT(sizeof(buff) > SCRAMBLE_LENGTH_323); + for (pos=scrambled ; *pos && to < buff+sizeof(buff) ; pos++) + *to++=(char) (floor(my_rnd(&rand_st)*31)+64); + if (pos-scrambled != SCRAMBLE_LENGTH_323) + return 1; + extra=(char) (floor(my_rnd(&rand_st)*31)); + to=buff; + while (*scrambled) + { + if (*scrambled++ != (uchar) (*to++ ^ extra)) + return 1; /* Wrong password */ + } + return 0; +} + +static inline uint8 char_val(uint8 X) +{ + return (uint) (X >= '0' && X <= '9' ? X-'0' : + X >= 'A' && X <= 'Z' ? X-'A'+10 : X-'a'+10); +} + + +/* + Convert password from hex string (as stored in mysql.user) to binary form. + SYNOPSIS + get_salt_from_password_323() + res OUT store salt here + password IN password string as stored in mysql.user + NOTE + This function does not have length check for passwords. It will just crash + Password hashes in old format must have length divisible by 8 +*/ + +void get_salt_from_password_323(ulong *res, const char *password) +{ + res[0]= res[1]= 0; + if (password) + { + while (*password) + { + ulong val=0; + uint i; + for (i=0 ; i < 8 ; i++) + val=(val << 4)+char_val(*password++); + *res++=val; + } + } +} + + +/* + Convert scrambled password from binary form to asciiz hex string. + SYNOPSIS + make_password_from_salt_323() + to OUT store resulting string password here, at least 17 bytes + salt IN password in salt format, 2 ulongs +*/ + +void make_password_from_salt_323(char *to, const ulong *salt) +{ + sprintf(to,"%08lx%08lx", salt[0], salt[1]); +} + + +/* + **************** MySQL 4.1.1 authentication routines ************* +*/ + + +/* Character to use as version identifier for version 4.1 */ + +#define PVERSION41_CHAR '*' + + +/* + Convert given octet sequence to asciiz string of hex characters; + str..str+len and 'to' may not overlap. + SYNOPSIS + octet2hex() + buf OUT output buffer. Must be at least 2*len+1 bytes + str, len IN the beginning and the length of the input string + + RETURN + buf+len*2 +*/ + +char *octet2hex(char *to, const char *str, size_t len) +{ + const char *str_end= str + len; + for (; str != str_end; ++str) + { + *to++= _dig_vec_upper[((uchar) *str) >> 4]; + *to++= _dig_vec_upper[((uchar) *str) & 0x0F]; + } + *to= '\0'; + return to; +} + + +/* + Convert given asciiz string of hex (0..9 a..f) characters to octet + sequence. + SYNOPSIS + hex2octet() + to OUT buffer to place result; must be at least len/2 bytes + str, len IN begin, length for character string; str and to may not + overlap; len % 2 == 0 +*/ + +static void +hex2octet(uint8 *to, const char *str, uint len) +{ + const char *str_end= str + len; + while (str < str_end) + { + char tmp= char_val(*str++); + *to++= (tmp << 4) | char_val(*str++); + } +} + + +/* + Encrypt/Decrypt function used for password encryption in authentication. + Simple XOR is used here but it is OK as we crypt random strings. Note, + that XOR(s1, XOR(s1, s2)) == s2, XOR(s1, s2) == XOR(s2, s1) + SYNOPSIS + my_crypt() + to OUT buffer to hold crypted string; must be at least len bytes + long; to and s1 (or s2) may be the same. + s1, s2 IN input strings (of equal length) + len IN length of s1 and s2 +*/ + +static void +my_crypt(char *to, const uchar *s1, const uchar *s2, uint len) +{ + const uint8 *s1_end= s1 + len; + while (s1 < s1_end) + *to++= *s1++ ^ *s2++; +} + + +/** + Compute two stage SHA1 hash of the password : + + hash_stage1=sha1("password") + hash_stage2=sha1(hash_stage1) + + @param password [IN] Password string. + @param pass_len [IN] Length of the password. + @param hash_stage1 [OUT] sha1(password) + @param hash_stage2 [OUT] sha1(hash_stage1) +*/ + +inline static +void compute_two_stage_sha1_hash(const char *password, size_t pass_len, + uint8 *hash_stage1, uint8 *hash_stage2) +{ + /* Stage 1: hash password */ + my_sha1(hash_stage1, password, pass_len); + + /* Stage 2 : hash first stage's output. */ + my_sha1(hash_stage2, (const char *) hash_stage1, MY_SHA1_HASH_SIZE); +} + + +/* + MySQL 4.1.1 password hashing: SHA conversion (see RFC 2289, 3174) twice + applied to the password string, and then produced octet sequence is + converted to hex string. + The result of this function is used as return value from PASSWORD() and + is stored in the database. + SYNOPSIS + my_make_scrambled_password() + buf OUT buffer of size 2*MY_SHA1_HASH_SIZE + 2 to store hex string + password IN password string + pass_len IN length of password string +*/ + +void my_make_scrambled_password(char *to, const char *password, + size_t pass_len) +{ + uint8 hash_stage2[MY_SHA1_HASH_SIZE]; + + /* Two stage SHA1 hash of the password. */ + compute_two_stage_sha1_hash(password, pass_len, (uint8 *) to, hash_stage2); + + /* convert hash_stage2 to hex string */ + *to++= PVERSION41_CHAR; + octet2hex(to, (const char*) hash_stage2, MY_SHA1_HASH_SIZE); +} + + +/* + Wrapper around my_make_scrambled_password() to maintain client lib ABI + compatibility. + In server code usage of my_make_scrambled_password() is preferred to + avoid strlen(). + SYNOPSIS + make_scrambled_password() + buf OUT buffer of size 2*MY_SHA1_HASH_SIZE + 2 to store hex string + password IN NULL-terminated password string +*/ + +void make_scrambled_password(char *to, const char *password) +{ + my_make_scrambled_password(to, password, strlen(password)); +} + + +/* + Produce an obscure octet sequence from password and random + string, received from the server. This sequence corresponds to the + password, but password can not be easily restored from it. The sequence + is then sent to the server for validation. Trailing zero is not stored + in the buf as it is not needed. + This function is used by client to create authenticated reply to the + server's greeting. + SYNOPSIS + scramble() + buf OUT store scrambled string here. The buf must be at least + MY_SHA1_HASH_SIZE bytes long. + message IN random message, must be exactly SCRAMBLE_LENGTH long and + NULL-terminated. + password IN users' password +*/ + +void +scramble(char *to, const char *message, const char *password) +{ + uint8 hash_stage1[MY_SHA1_HASH_SIZE]; + uint8 hash_stage2[MY_SHA1_HASH_SIZE]; + + /* Two stage SHA1 hash of the password. */ + compute_two_stage_sha1_hash(password, strlen(password), hash_stage1, + hash_stage2); + + /* create crypt string as sha1(message, hash_stage2) */; + my_sha1_multi((uint8 *) to, message, SCRAMBLE_LENGTH, + (const char *) hash_stage2, MY_SHA1_HASH_SIZE, NULL); + my_crypt(to, (const uchar *) to, hash_stage1, SCRAMBLE_LENGTH); +} + + +/* + Check that scrambled message corresponds to the password; the function + is used by server to check that received reply is authentic. + This function does not check lengths of given strings: message must be + null-terminated, reply and hash_stage2 must be at least MY_SHA1_HASH_SIZE + long (if not, something fishy is going on). + SYNOPSIS + check_scramble() + scramble clients' reply, presumably produced by scramble() + message original random string, previously sent to client + (presumably second argument of scramble()), must be + exactly SCRAMBLE_LENGTH long and NULL-terminated. + hash_stage2 hex2octet-decoded database entry + All params are IN. + + RETURN VALUE + 0 password is correct + !0 password is invalid +*/ + +my_bool +check_scramble(const uchar *scramble_arg, const char *message, + const uint8 *hash_stage2) +{ + uint8 buf[MY_SHA1_HASH_SIZE]; + uint8 hash_stage2_reassured[MY_SHA1_HASH_SIZE]; + + /* create key to encrypt scramble */ + my_sha1_multi(buf, message, SCRAMBLE_LENGTH, + (const char *) hash_stage2, MY_SHA1_HASH_SIZE, NULL); + /* encrypt scramble */ + my_crypt((char *) buf, buf, scramble_arg, SCRAMBLE_LENGTH); + + /* now buf supposedly contains hash_stage1: so we can get hash_stage2 */ + my_sha1(hash_stage2_reassured, (const char *) buf, MY_SHA1_HASH_SIZE); + + return MY_TEST(memcmp(hash_stage2, hash_stage2_reassured, MY_SHA1_HASH_SIZE)); +} + +/* + Convert scrambled password from asciiz hex string to binary form. + + SYNOPSIS + get_salt_from_password() + res OUT buf to hold password. Must be at least MY_SHA1_HASH_SIZE + bytes long. + password IN 4.1.1 version value of user.password +*/ + +void get_salt_from_password(uint8 *hash_stage2, const char *password) +{ + hex2octet(hash_stage2, password+1 /* skip '*' */, MY_SHA1_HASH_SIZE * 2); +} + +/* + Convert scrambled password from binary form to asciiz hex string. + SYNOPSIS + make_password_from_salt() + to OUT store resulting string here, 2*MY_SHA1_HASH_SIZE+2 bytes + salt IN password in salt format +*/ + +void make_password_from_salt(char *to, const uint8 *hash_stage2) +{ + *to++= PVERSION41_CHAR; + octet2hex(to, (const char*) hash_stage2, MY_SHA1_HASH_SIZE); +} + diff --git a/sql/plistsort.c b/sql/plistsort.c new file mode 100644 index 00000000..6efea273 --- /dev/null +++ b/sql/plistsort.c @@ -0,0 +1,166 @@ +/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* +things to define before including the file: + +#define LS_LIST_ITEM ListItem +#define LS_COMPARE_FUNC_DECL compare_func var_name, +#define LS_COMPARE_FUNC_CALL(list_el1, list_el2) (*var_name)(list_el1, list_el2) +#define LS_NEXT(A) (A)->next +#define LS_SET_NEXT(A,val) (A)->next= val +#define LS_P_NEXT(A) &(A)->next +#define LS_NAME plistsort +#define LS_SCOPE static +#define LS_STRUCT_NAME ls_struct_name +*/ + +typedef struct LS_STRUCT_NAME +{ + LS_LIST_ITEM *list1; + int list_len; + int return_point; +} LS_STRUCT_NAME; + +LS_SCOPE LS_LIST_ITEM* LS_NAME(LS_COMPARE_FUNC_DECL LS_LIST_ITEM *list, int list_len) +{ + LS_LIST_ITEM *list_end; + LS_LIST_ITEM *sorted_list; + + struct LS_STRUCT_NAME stack[63], *sp= stack; + + if (list_len < 2) + return list; + + sp->list_len= list_len; + sp->return_point= 2; + +recursion_point: + + if (sp->list_len < 4) + { + LS_LIST_ITEM *e1, *e2; + sorted_list= list; + e1= LS_NEXT(sorted_list); + list_end= LS_NEXT(e1); + if (LS_COMPARE_FUNC_CALL(sorted_list, e1)) + { + sorted_list= e1; + e1= list; + } + if (sp->list_len == 2) + { + LS_SET_NEXT(sorted_list, e1); + LS_SET_NEXT(e1, NULL); + goto exit_point; + } + e2= list_end; + list_end= LS_NEXT(e2); + if (LS_COMPARE_FUNC_CALL(e1, e2)) + { + { + LS_LIST_ITEM *tmp_e= e1; + e1= e2; + e2= tmp_e; + } + if (LS_COMPARE_FUNC_CALL(sorted_list, e1)) + { + LS_LIST_ITEM *tmp_e= sorted_list; + sorted_list= e1; + e1= tmp_e; + } + } + + LS_SET_NEXT(sorted_list, e1); + LS_SET_NEXT(e1, e2); + LS_SET_NEXT(e2, NULL); + goto exit_point; + } + + { + struct LS_STRUCT_NAME *sp0= sp++; + sp->list_len= sp0->list_len >> 1; + sp0->list_len-= sp->list_len; + sp->return_point= 0; + } + goto recursion_point; +return_point0: + sp->list1= sorted_list; + { + struct LS_STRUCT_NAME *sp0= sp++; + list= list_end; + sp->list_len= sp0->list_len; + sp->return_point= 1; + } + goto recursion_point; +return_point1: + { + LS_LIST_ITEM **hook= &sorted_list; + LS_LIST_ITEM *list1= sp->list1; + LS_LIST_ITEM *list2= sorted_list; + + if (LS_COMPARE_FUNC_CALL(list1, list2)) + { + LS_LIST_ITEM *tmp_e= list2; + list2= list1; + list1= tmp_e; + } + for (;;) + { + *hook= list1; + do + { + if (!(list1= *(hook= LS_P_NEXT(list1)))) + { + *hook= list2; + goto exit_point; + } + } while (LS_COMPARE_FUNC_CALL(list2, list1)); + + *hook= list2; + do + { + if (!(list2= *(hook= LS_P_NEXT(list2)))) + { + *hook= list1; + goto exit_point; + } + } while (LS_COMPARE_FUNC_CALL(list1, list2)); + } + } + +exit_point: + switch ((sp--)->return_point) + { + case 0: goto return_point0; + case 1: goto return_point1; + default:; + } + + return sorted_list; +} + + +#undef LS_LIST_ITEM +#undef LS_NEXT +#undef LS_SET_NEXT +#undef LS_P_NEXT +#undef LS_NAME +#undef LS_STRUCT_NAME +#undef LS_SCOPE +#undef LS_COMPARE_FUNC_DECL +#undef LS_COMPARE_FUNC_CALL + diff --git a/sql/privilege.h b/sql/privilege.h new file mode 100644 index 00000000..8e9b9a37 --- /dev/null +++ b/sql/privilege.h @@ -0,0 +1,756 @@ +#ifndef PRIVILEGE_H_INCLUDED +#define PRIVILEGE_H_INCLUDED + +/* Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "my_global.h" // ulonglong + + +/* + A strict enum to store privilege bits. + + We should eventually make if even stricter using "enum class privilege_t" and: + - Replace all code pieces like `if (priv)` to `if (priv != NO_ACL)` + - Remove "delete" comparison operators below +*/ +enum privilege_t: unsigned long long +{ + NO_ACL = (0), + SELECT_ACL = (1UL << 0), + INSERT_ACL = (1UL << 1), + UPDATE_ACL = (1UL << 2), + DELETE_ACL = (1UL << 3), + CREATE_ACL = (1UL << 4), + DROP_ACL = (1UL << 5), + RELOAD_ACL = (1UL << 6), + SHUTDOWN_ACL = (1UL << 7), + PROCESS_ACL = (1UL << 8), + FILE_ACL = (1UL << 9), + GRANT_ACL = (1UL << 10), + REFERENCES_ACL = (1UL << 11), + INDEX_ACL = (1UL << 12), + ALTER_ACL = (1UL << 13), + SHOW_DB_ACL = (1UL << 14), + SUPER_ACL = (1UL << 15), + CREATE_TMP_ACL = (1UL << 16), + LOCK_TABLES_ACL = (1UL << 17), + EXECUTE_ACL = (1UL << 18), + REPL_SLAVE_ACL = (1UL << 19), + BINLOG_MONITOR_ACL = (1UL << 20), // Was REPL_CLIENT_ACL prior to 10.5.2 + CREATE_VIEW_ACL = (1UL << 21), + SHOW_VIEW_ACL = (1UL << 22), + CREATE_PROC_ACL = (1UL << 23), + ALTER_PROC_ACL = (1UL << 24), + CREATE_USER_ACL = (1UL << 25), + EVENT_ACL = (1UL << 26), + TRIGGER_ACL = (1UL << 27), + CREATE_TABLESPACE_ACL = (1UL << 28), + DELETE_HISTORY_ACL = (1UL << 29), // Added in 10.3.4 + SET_USER_ACL = (1UL << 30), // Added in 10.5.2 + FEDERATED_ADMIN_ACL = (1UL << 31), // Added in 10.5.2 + CONNECTION_ADMIN_ACL = (1ULL << 32), // Added in 10.5.2 + READ_ONLY_ADMIN_ACL = (1ULL << 33), // Added in 10.5.2 + REPL_SLAVE_ADMIN_ACL = (1ULL << 34), // Added in 10.5.2 + REPL_MASTER_ADMIN_ACL = (1ULL << 35), // Added in 10.5.2 + BINLOG_ADMIN_ACL = (1ULL << 36), // Added in 10.5.2 + BINLOG_REPLAY_ACL = (1ULL << 37), // Added in 10.5.2 + SLAVE_MONITOR_ACL = (1ULL << 38) // Added in 10.5.8 + /* + When adding new privilege bits, don't forget to update: + In this file: + - Add a new LAST_version_ACL + - Add a new ALL_KNOWN_ACL_version + - Change ALL_KNOWN_ACL to ALL_KNOWN_ACL_version + - Change GLOBAL_ACLS if needed + - Change SUPER_ADDED_SINCE_USER_TABLE_ACL if needed + + In other files: + - static struct show_privileges_st sys_privileges[] + - static const char *command_array[] and static uint command_lengths[] + - mysql_system_tables.sql and mysql_system_tables_fix.sql + - acl_init() or whatever - to define behaviour for old privilege tables + - Update User_table_json::get_access() + - sql_yacc.yy - for GRANT/REVOKE to work + + Important: the enum should contain only single-bit values. + In this case, debuggers print bit combinations in the readable form: + (gdb) p (privilege_t) (15) + $8 = (SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL) + + Bit-OR combinations of the above values should be declared outside! + */ +}; + +constexpr static inline privilege_t ALL_KNOWN_BITS(privilege_t x) +{ + return (privilege_t)(x | (x-1)); +} + +// Version markers +constexpr privilege_t LAST_100304_ACL= DELETE_HISTORY_ACL; +constexpr privilege_t LAST_100502_ACL= BINLOG_REPLAY_ACL; +constexpr privilege_t LAST_100508_ACL= SLAVE_MONITOR_ACL; + +// Current version markers +constexpr privilege_t LAST_CURRENT_ACL= LAST_100508_ACL; +constexpr uint PRIVILEGE_T_MAX_BIT= + my_bit_log2_uint64((ulonglong) LAST_CURRENT_ACL); + +static_assert((privilege_t)(1ULL << PRIVILEGE_T_MAX_BIT) == LAST_CURRENT_ACL, + "Something went fatally badly: " + "LAST_CURRENT_ACL and PRIVILEGE_T_MAX_BIT do not match"); + +// A combination of all bits defined in 10.3.4 (and earlier) +constexpr privilege_t ALL_KNOWN_ACL_100304 = ALL_KNOWN_BITS(LAST_100304_ACL); + +// A combination of all bits defined in 10.5.2 +constexpr privilege_t ALL_KNOWN_ACL_100502= ALL_KNOWN_BITS(LAST_100502_ACL); + +// A combination of all bits defined in 10.5.8 +constexpr privilege_t ALL_KNOWN_ACL_100508= ALL_KNOWN_BITS(LAST_100508_ACL); +// unfortunately, SLAVE_MONITOR_ACL was added in 10.5.9, but also in 10.5.8-5 +// let's stay compatible with that branch too. +constexpr privilege_t ALL_KNOWN_ACL_100509= ALL_KNOWN_ACL_100508; + +// A combination of all bits defined as of the current version +constexpr privilege_t ALL_KNOWN_ACL= ALL_KNOWN_BITS(LAST_CURRENT_ACL); + + +// Unary operators +static inline constexpr ulonglong operator~(privilege_t access) +{ + return ~static_cast(access); +} + +/* + Comparison operators. + Delete automatic conversion between to/from integer types as much as possible. + This forces to use `(priv == NO_ACL)` instead of `(priv == 0)`. + + Note: these operators will be gone when we change privilege_t to + "enum class privilege_t". See comments above. +*/ +static inline bool operator==(privilege_t, ulonglong)= delete; +static inline bool operator==(privilege_t, ulong)= delete; +static inline bool operator==(privilege_t, uint)= delete; +static inline bool operator==(privilege_t, uchar)= delete; +static inline bool operator==(privilege_t, longlong)= delete; +static inline bool operator==(privilege_t, long)= delete; +static inline bool operator==(privilege_t, int)= delete; +static inline bool operator==(privilege_t, char)= delete; +static inline bool operator==(privilege_t, bool)= delete; + +static inline bool operator==(ulonglong, privilege_t)= delete; +static inline bool operator==(ulong, privilege_t)= delete; +static inline bool operator==(uint, privilege_t)= delete; +static inline bool operator==(uchar, privilege_t)= delete; +static inline bool operator==(longlong, privilege_t)= delete; +static inline bool operator==(long, privilege_t)= delete; +static inline bool operator==(int, privilege_t)= delete; +static inline bool operator==(char, privilege_t)= delete; +static inline bool operator==(bool, privilege_t)= delete; + +static inline bool operator!=(privilege_t, ulonglong)= delete; +static inline bool operator!=(privilege_t, ulong)= delete; +static inline bool operator!=(privilege_t, uint)= delete; +static inline bool operator!=(privilege_t, uchar)= delete; +static inline bool operator!=(privilege_t, longlong)= delete; +static inline bool operator!=(privilege_t, long)= delete; +static inline bool operator!=(privilege_t, int)= delete; +static inline bool operator!=(privilege_t, char)= delete; +static inline bool operator!=(privilege_t, bool)= delete; + +static inline bool operator!=(ulonglong, privilege_t)= delete; +static inline bool operator!=(ulong, privilege_t)= delete; +static inline bool operator!=(uint, privilege_t)= delete; +static inline bool operator!=(uchar, privilege_t)= delete; +static inline bool operator!=(longlong, privilege_t)= delete; +static inline bool operator!=(long, privilege_t)= delete; +static inline bool operator!=(int, privilege_t)= delete; +static inline bool operator!=(char, privilege_t)= delete; +static inline bool operator!=(bool, privilege_t)= delete; + + +// Dyadic bitwise operators +static inline constexpr privilege_t operator&(privilege_t a, privilege_t b) +{ + return static_cast(static_cast(a) & + static_cast(b)); +} + +static inline constexpr privilege_t operator&(ulonglong a, privilege_t b) +{ + return static_cast(a & static_cast(b)); +} + +static inline constexpr privilege_t operator&(privilege_t a, ulonglong b) +{ + return static_cast(static_cast(a) & b); +} + +static inline constexpr privilege_t operator|(privilege_t a, privilege_t b) +{ + return static_cast(static_cast(a) | + static_cast(b)); +} + + +// Dyadyc bitwise assignment operators +static inline privilege_t& operator&=(privilege_t &a, privilege_t b) +{ + return a= a & b; +} + +static inline privilege_t& operator&=(privilege_t &a, ulonglong b) +{ + return a= a & b; +} + +static inline privilege_t& operator|=(privilege_t &a, privilege_t b) +{ + return a= a | b; +} + + +/* + A combination of all SUPER privileges added since the old user table format. + These privileges are automatically added when upgrading from the + old format mysql.user table if a user has the SUPER privilege. +*/ +constexpr privilege_t GLOBAL_SUPER_ADDED_SINCE_USER_TABLE_ACLS= + SET_USER_ACL | + FEDERATED_ADMIN_ACL | + CONNECTION_ADMIN_ACL | + READ_ONLY_ADMIN_ACL | + REPL_SLAVE_ADMIN_ACL | + BINLOG_ADMIN_ACL | + BINLOG_REPLAY_ACL; + + +constexpr privilege_t COL_DML_ACLS= + SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL; + +constexpr privilege_t VIEW_ACLS= + CREATE_VIEW_ACL | SHOW_VIEW_ACL; + +constexpr privilege_t STD_TABLE_DDL_ACLS= + CREATE_ACL | DROP_ACL | ALTER_ACL; + +constexpr privilege_t ALL_TABLE_DDL_ACLS= + STD_TABLE_DDL_ACLS | INDEX_ACL; + +constexpr privilege_t COL_ACLS= + SELECT_ACL | INSERT_ACL | UPDATE_ACL | REFERENCES_ACL; + +constexpr privilege_t PROC_DDL_ACLS= + CREATE_PROC_ACL | ALTER_PROC_ACL; + +constexpr privilege_t SHOW_PROC_ACLS= + PROC_DDL_ACLS | EXECUTE_ACL; + +constexpr privilege_t TABLE_ACLS= + COL_DML_ACLS | ALL_TABLE_DDL_ACLS | VIEW_ACLS | + GRANT_ACL | REFERENCES_ACL | + TRIGGER_ACL | DELETE_HISTORY_ACL; + +constexpr privilege_t DB_ACLS= + TABLE_ACLS | PROC_DDL_ACLS | EXECUTE_ACL | + CREATE_TMP_ACL | LOCK_TABLES_ACL | EVENT_ACL; + +constexpr privilege_t PROC_ACLS= + ALTER_PROC_ACL | EXECUTE_ACL | GRANT_ACL; + +constexpr privilege_t GLOBAL_ACLS= + DB_ACLS | SHOW_DB_ACL | + CREATE_USER_ACL | CREATE_TABLESPACE_ACL | + SUPER_ACL | RELOAD_ACL | SHUTDOWN_ACL | PROCESS_ACL | FILE_ACL | + REPL_SLAVE_ACL | BINLOG_MONITOR_ACL | + GLOBAL_SUPER_ADDED_SINCE_USER_TABLE_ACLS | + REPL_MASTER_ADMIN_ACL | SLAVE_MONITOR_ACL; + +constexpr privilege_t DEFAULT_CREATE_PROC_ACLS= + ALTER_PROC_ACL | EXECUTE_ACL; + +constexpr privilege_t SHOW_CREATE_TABLE_ACLS= + COL_DML_ACLS | ALL_TABLE_DDL_ACLS | + TRIGGER_ACL | REFERENCES_ACL | GRANT_ACL | VIEW_ACLS; + +/** + Table-level privileges which are automatically "granted" to everyone on + existing temporary tables (CREATE_ACL is necessary for ALTER ... RENAME). +*/ +constexpr privilege_t TMP_TABLE_ACLS= + COL_DML_ACLS | ALL_TABLE_DDL_ACLS | REFERENCES_ACL; + + +constexpr privilege_t PRIV_LOCK_TABLES= SELECT_ACL | LOCK_TABLES_ACL; + +/* + Allow to set an object definer: + CREATE DEFINER=xxx {TRIGGER|VIEW|FUNCTION|PROCEDURE} + Was SUPER prior to 10.5.2 +*/ +constexpr privilege_t PRIV_DEFINER_CLAUSE= SET_USER_ACL | SUPER_ACL; +/* + If a VIEW has a `definer=invoker@host` clause and + the specified definer does not exists, then + - The invoker with REVEAL_MISSING_DEFINER_ACL gets: + ERROR: The user specified as a definer ('definer1'@'localhost') doesn't exist + - The invoker without MISSING_DEFINER_ACL gets a generic access error, + without revealing details that the definer does not exists. + + TODO: we should eventually test the same privilege when processing + other objects that have the DEFINER clause (e.g. routines, triggers). + Currently the missing definer is revealed for non-privileged invokers + in case of routines, triggers, etc. + + Was SUPER prior to 10.5.2 +*/ +constexpr privilege_t PRIV_REVEAL_MISSING_DEFINER= SET_USER_ACL | SUPER_ACL; + +/* Actions that require only the SUPER privilege */ +constexpr privilege_t PRIV_DES_DECRYPT_ONE_ARG= SUPER_ACL; +constexpr privilege_t PRIV_LOG_BIN_TRUSTED_SP_CREATOR= SUPER_ACL; +constexpr privilege_t PRIV_DEBUG= SUPER_ACL; +constexpr privilege_t PRIV_SET_GLOBAL_SYSTEM_VARIABLE= SUPER_ACL; +constexpr privilege_t PRIV_SET_RESTRICTED_SESSION_SYSTEM_VARIABLE= SUPER_ACL; + +/* The following variables respected only SUPER_ACL prior to 10.5.2 */ +constexpr privilege_t PRIV_SET_SYSTEM_VAR_BINLOG_FORMAT= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_VAR_BINLOG_DIRECT_NON_TRANSACTIONAL_UPDATES= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_VAR_BINLOG_ANNOTATE_ROW_EVENTS= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_VAR_BINLOG_ROW_IMAGE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_VAR_SQL_LOG_BIN= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_CACHE_SIZE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_FILE_CACHE_SIZE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_STMT_CACHE_SIZE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_COMMIT_WAIT_COUNT= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_COMMIT_WAIT_USEC= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_ROW_METADATA= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_EXPIRE_LOGS_DAYS= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_LOG_BIN_COMPRESS= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_LOG_BIN_COMPRESS_MIN_LEN= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_LOG_BIN_TRUST_FUNCTION_CREATORS= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MAX_BINLOG_CACHE_SIZE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MAX_BINLOG_STMT_CACHE_SIZE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MAX_BINLOG_SIZE= + SUPER_ACL | BINLOG_ADMIN_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SYNC_BINLOG= + SUPER_ACL | BINLOG_ADMIN_ACL; + + + +/* Privileges related to --read-only */ +// Was super prior to 10.5.2 +constexpr privilege_t PRIV_IGNORE_READ_ONLY= READ_ONLY_ADMIN_ACL; +// Was super prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_READ_ONLY= + READ_ONLY_ADMIN_ACL; + +/* + Privileges related to connection handling. +*/ +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_IGNORE_INIT_CONNECT= CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_IGNORE_MAX_USER_CONNECTIONS= CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_IGNORE_MAX_CONNECTIONS= CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_IGNORE_MAX_PASSWORD_ERRORS= CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_KILL_OTHER_USER_PROCESS= CONNECTION_ADMIN_ACL | SUPER_ACL; + +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_CONNECT_TIMEOUT= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_DISCONNECT_ON_EXPIRED_PASSWORD= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_EXTRA_MAX_CONNECTIONS= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_INIT_CONNECT= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MAX_CONNECTIONS= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MAX_CONNECT_ERRORS= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MAX_PASSWORD_ERRORS= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_PROXY_PROTOCOL_NETWORKS= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SECURE_AUTH= + CONNECTION_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLOW_LAUNCH_TIME= + CONNECTION_ADMIN_ACL | SUPER_ACL; + +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_THREAD_POOL= + CONNECTION_ADMIN_ACL | SUPER_ACL; + + +/* + Binary log related privileges that are checked regardless + of active replication running. +*/ + +/* + This command was renamed from "SHOW MASTER STATUS" + to "SHOW BINLOG STATUS" in 10.5.2. + Was SUPER_ACL | REPL_CLIENT_ACL prior to 10.5.2 + REPL_CLIENT_ACL was renamed to BINLOG_MONITOR_ACL. +*/ +constexpr privilege_t PRIV_STMT_SHOW_BINLOG_STATUS= BINLOG_MONITOR_ACL | SUPER_ACL; + +/* + Was SUPER_ACL | REPL_CLIENT_ACL prior to 10.5.2 + REPL_CLIENT_ACL was renamed to BINLOG_MONITOR_ACL. +*/ +constexpr privilege_t PRIV_STMT_SHOW_BINARY_LOGS= BINLOG_MONITOR_ACL | SUPER_ACL; + +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_PURGE_BINLOG= BINLOG_ADMIN_ACL | SUPER_ACL; + +// Was REPL_SLAVE_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_SHOW_BINLOG_EVENTS= BINLOG_MONITOR_ACL; + + +/* + Privileges for replication related statements and commands + that are executed on the master. +*/ +constexpr privilege_t PRIV_COM_REGISTER_SLAVE= REPL_SLAVE_ACL; +constexpr privilege_t PRIV_COM_BINLOG_DUMP= REPL_SLAVE_ACL; +// Was REPL_SLAVE_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_SHOW_SLAVE_HOSTS= REPL_MASTER_ADMIN_ACL; + +/* + Replication master related variable privileges. + Where SUPER prior to 10.5.2 +*/ +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_MASTER_ENABLED= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_MASTER_TIMEOUT= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_MASTER_WAIT_NO_SLAVE= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_MASTER_TRACE_LEVEL= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_MASTER_WAIT_POINT= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_MASTER_VERIFY_CHECKSUM= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_BINLOG_STATE= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SERVER_ID= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_DOMAIN_ID= + REPL_MASTER_ADMIN_ACL | SUPER_ACL; + + +/* Privileges for statements that are executed on the slave */ +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_START_SLAVE= REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_STOP_SLAVE= REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_CHANGE_MASTER= REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +// Was (SUPER_ACL | REPL_CLIENT_ACL) prior to 10.5.2 +// Was (SUPER_ACL | REPL_SLAVE_ADMIN_ACL) from 10.5.2 to 10.5.7 +constexpr privilege_t PRIV_STMT_SHOW_SLAVE_STATUS= SLAVE_MONITOR_ACL | SUPER_ACL; +// Was REPL_SLAVE_ACL prior to 10.5.2 +// Was REPL_SLAVE_ADMIN_ACL from 10.5.2 to 10.5.7 +constexpr privilege_t PRIV_STMT_SHOW_RELAYLOG_EVENTS= SLAVE_MONITOR_ACL; + +/* + Privileges related to binlog replying. + Were SUPER_ACL prior to 10.5.2 +*/ +constexpr privilege_t PRIV_STMT_BINLOG= BINLOG_REPLAY_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_SESSION_VAR_GTID_SEQ_NO= + BINLOG_REPLAY_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_SESSION_VAR_PSEUDO_THREAD_ID= + BINLOG_REPLAY_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_SESSION_VAR_SERVER_ID= + BINLOG_REPLAY_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_SESSION_VAR_GTID_DOMAIN_ID= + BINLOG_REPLAY_ACL | SUPER_ACL; + +/* + Privileges for slave related global variables. + Were SUPER prior to 10.5.2. +*/ +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_EVENTS_MARKED_FOR_SKIP= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_REWRITE_DB= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_DO_DB= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_DO_TABLE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_IGNORE_DB= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_IGNORE_TABLE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_WILD_DO_TABLE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_WILD_IGNORE_TABLE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_READ_BINLOG_SPEED_LIMIT= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_COMPRESSED_PROTOCOL= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_DDL_EXEC_MODE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_DOMAIN_PARALLEL_THREADS= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_EXEC_MODE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_MAX_ALLOWED_PACKET= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_MAX_STATEMENT_TIME= + REPL_SLAVE_ADMIN_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_NET_TIMEOUT= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_PARALLEL_MAX_QUEUED= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_PARALLEL_MODE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_PARALLEL_THREADS= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_PARALLEL_WORKERS= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_RUN_TRIGGERS_FOR_RBR= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_SQL_VERIFY_CHECKSUM= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_TRANSACTION_RETRY_INTERVAL= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_TYPE_CONVERSIONS= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_INIT_SLAVE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_SLAVE_ENABLED= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_SLAVE_TRACE_LEVEL= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_SLAVE_DELAY_MASTER= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RPL_SEMI_SYNC_SLAVE_KILL_CONN_TIMEOUT= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RELAY_LOG_PURGE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_RELAY_LOG_RECOVERY= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SYNC_MASTER_INFO= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SYNC_RELAY_LOG= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SYNC_RELAY_LOG_INFO= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; + +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_CLEANUP_BATCH_SIZE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_IGNORE_DUPLICATES= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_POS_AUTO_ENGINES= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_SLAVE_POS= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_STRICT_MODE= + REPL_SLAVE_ADMIN_ACL | SUPER_ACL; + + +/* Privileges for federated database related statements */ +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_CREATE_SERVER= FEDERATED_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_ALTER_SERVER= FEDERATED_ADMIN_ACL | SUPER_ACL; +// Was SUPER_ACL prior to 10.5.2 +constexpr privilege_t PRIV_STMT_DROP_SERVER= FEDERATED_ADMIN_ACL | SUPER_ACL; + + +/* Privileges related to processes */ +constexpr privilege_t PRIV_COM_PROCESS_INFO= PROCESS_ACL; +// This privilege applies both for SHOW EXPLAIN and SHOW ANALYZE +constexpr privilege_t PRIV_STMT_SHOW_EXPLAIN= PROCESS_ACL; +constexpr privilege_t PRIV_STMT_SHOW_ENGINE_STATUS= PROCESS_ACL; +constexpr privilege_t PRIV_STMT_SHOW_ENGINE_MUTEX= PROCESS_ACL; +constexpr privilege_t PRIV_STMT_SHOW_PROCESSLIST= PROCESS_ACL; + + +/* + Defines to change the above bits to how things are stored in tables + This is needed as the 'host' and 'db' table is missing a few privileges +*/ + +/* Privileges that need to be reallocated (in continous chunks) */ +constexpr privilege_t DB_CHUNK0 (COL_DML_ACLS | CREATE_ACL | DROP_ACL); +constexpr privilege_t DB_CHUNK1 (GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL); +constexpr privilege_t DB_CHUNK2 (CREATE_TMP_ACL | LOCK_TABLES_ACL); +constexpr privilege_t DB_CHUNK3 (VIEW_ACLS | PROC_DDL_ACLS); +constexpr privilege_t DB_CHUNK4 (EXECUTE_ACL); +constexpr privilege_t DB_CHUNK5 (EVENT_ACL | TRIGGER_ACL); +constexpr privilege_t DB_CHUNK6 (DELETE_HISTORY_ACL); + + +static inline privilege_t fix_rights_for_db(privilege_t access) +{ + ulonglong A(access); + return static_cast + (((A) & DB_CHUNK0) | + ((A << 4) & DB_CHUNK1) | + ((A << 6) & DB_CHUNK2) | + ((A << 9) & DB_CHUNK3) | + ((A << 2) & DB_CHUNK4) | + ((A << 9) & DB_CHUNK5) | + ((A << 10) & DB_CHUNK6)); +} + +static inline privilege_t get_rights_for_db(privilege_t access) +{ + ulonglong A(access); + return static_cast + ((A & DB_CHUNK0) | + ((A & DB_CHUNK1) >> 4) | + ((A & DB_CHUNK2) >> 6) | + ((A & DB_CHUNK3) >> 9) | + ((A & DB_CHUNK4) >> 2) | + ((A & DB_CHUNK5) >> 9) | + ((A & DB_CHUNK6) >> 10)); +} + + +#define TBL_CHUNK0 DB_CHUNK0 +#define TBL_CHUNK1 DB_CHUNK1 +#define TBL_CHUNK2 (CREATE_VIEW_ACL | SHOW_VIEW_ACL) +#define TBL_CHUNK3 TRIGGER_ACL +#define TBL_CHUNK4 (DELETE_HISTORY_ACL) + + +static inline privilege_t fix_rights_for_table(privilege_t access) +{ + ulonglong A(access); + return static_cast + ((A & TBL_CHUNK0) | + ((A << 4) & TBL_CHUNK1) | + ((A << 11) & TBL_CHUNK2) | + ((A << 15) & TBL_CHUNK3) | + ((A << 16) & TBL_CHUNK4)); +} + + +static inline privilege_t get_rights_for_table(privilege_t access) +{ + ulonglong A(access); + return static_cast + ((A & TBL_CHUNK0) | + ((A & TBL_CHUNK1) >> 4) | + ((A & TBL_CHUNK2) >> 11) | + ((A & TBL_CHUNK3) >> 15) | + ((A & TBL_CHUNK4) >> 16)); +} + + +static inline privilege_t fix_rights_for_column(privilege_t A) +{ + const ulonglong mask(SELECT_ACL | INSERT_ACL | UPDATE_ACL); + return (A & mask) | static_cast((A & ~mask) << 8); +} + + +static inline privilege_t get_rights_for_column(privilege_t A) +{ + const ulonglong mask(SELECT_ACL | INSERT_ACL | UPDATE_ACL); + return static_cast((static_cast(A) & mask) | + (static_cast(A) >> 8)); +} + + +static inline privilege_t fix_rights_for_procedure(privilege_t access) +{ + ulonglong A(access); + return static_cast + (((A << 18) & EXECUTE_ACL) | + ((A << 23) & ALTER_PROC_ACL) | + ((A << 8) & GRANT_ACL)); +} + + +static inline privilege_t get_rights_for_procedure(privilege_t access) +{ + ulonglong A(access); + return static_cast + (((A & EXECUTE_ACL) >> 18) | + ((A & ALTER_PROC_ACL) >> 23) | + ((A & GRANT_ACL) >> 8)); +} + + +#endif /* PRIVILEGE_H_INCLUDED */ diff --git a/sql/procedure.cc b/sql/procedure.cc new file mode 100644 index 00000000..21afef27 --- /dev/null +++ b/sql/procedure.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Procedures (functions with changes output of select) */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "procedure.h" +#include "sql_analyse.h" // Includes procedure +#ifdef USE_PROC_RANGE +#include "proc_range.h" +#endif + +static struct st_procedure_def { + const char *name; + Procedure *(*init)(THD *thd,ORDER *param,select_result *result, + List &field_list); +} sql_procs[] = { +#ifdef USE_PROC_RANGE + { "split_sum",proc_sum_range_init }, // Internal procedure at TCX + { "split_count",proc_count_range_init }, // Internal procedure at TCX + { "matris_ranges",proc_matris_range_init }, // Internal procedure at TCX +#endif + { "analyse",proc_analyse_init } // Analyse a result +}; + + +my_decimal *Item_proc_string::val_decimal(my_decimal *decimal_value) +{ + if (null_value) + return 0; + string2my_decimal(E_DEC_FATAL_ERROR, &value, decimal_value); + return (decimal_value); +} + + +my_decimal *Item_proc_int::val_decimal(my_decimal *decimal_value) +{ + if (null_value) + return 0; + int2my_decimal(E_DEC_FATAL_ERROR, value, unsigned_flag, decimal_value); + return (decimal_value); +} + + +my_decimal *Item_proc_real::val_decimal(my_decimal *decimal_value) +{ + if (null_value) + return 0; + double2my_decimal(E_DEC_FATAL_ERROR, value, decimal_value); + return (decimal_value); +} + + +/** + Setup handling of procedure. + + @return + Return 0 if everything is ok +*/ + + +Procedure * +setup_procedure(THD *thd,ORDER *param,select_result *result, + List &field_list,int *error) +{ + uint i; + DBUG_ENTER("setup_procedure"); + *error=0; + if (!param) + DBUG_RETURN(0); + for (i=0 ; i < array_elements(sql_procs) ; i++) + { + if (!my_strcasecmp(system_charset_info, + (*param->item)->name.str, sql_procs[i].name)) + { + Procedure *proc=(*sql_procs[i].init)(thd,param,result,field_list); + *error= !proc; + DBUG_RETURN(proc); + } + } + my_error(ER_UNKNOWN_PROCEDURE, MYF(0), (*param->item)->name.str); + *error=1; + DBUG_RETURN(0); +} diff --git a/sql/procedure.h b/sql/procedure.h new file mode 100644 index 00000000..c59b766d --- /dev/null +++ b/sql/procedure.h @@ -0,0 +1,193 @@ +#ifndef PROCEDURE_INCLUDED +#define PROCEDURE_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* When using sql procedures */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* + It is necessary to include set_var.h instead of item.h because there + are dependencies on include order for set_var.h and item.h. This + will be resolved later. +*/ +#include "sql_class.h" /* select_result, set_var.h: THD */ +#include "set_var.h" /* Item */ + +#define PROC_NO_SORT 1 /**< Bits in flags */ +#define PROC_GROUP 2 /**< proc must have group */ + +/* Procedure items used by procedures to store values for send_result_set_metadata */ + +class Item_proc :public Item +{ +public: + Item_proc(THD *thd, const char *name_par): Item(thd) + { + this->name.str= name_par; + this->name.length= strlen(name_par); + } + enum Type type() const override { return Item::PROC_ITEM; } + Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, + const Tmp_field_param *param) override + { + /* + We can get to here when using a CURSOR for a query with PROCEDURE: + DECLARE c CURSOR FOR SELECT * FROM t1 PROCEDURE analyse(); + OPEN c; + */ + return create_tmp_field_ex_simple(root, table, src, param); + } + virtual void set(double nr)=0; + virtual void set(const char *str,uint length,CHARSET_INFO *cs)=0; + virtual void set(longlong nr)=0; + const Type_handler *type_handler() const override=0; + void set(const char *str) { set(str,(uint) strlen(str), default_charset()); } + unsigned int size_of() { return sizeof(*this);} + bool check_vcol_func_processor(void *arg) override + { + DBUG_ASSERT(0); // impossible + return mark_unsupported_function("proc", arg, VCOL_IMPOSSIBLE); + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + return type_handler()->Item_get_date_with_warn(thd, this, ltime, fuzzydate); + } + Item* get_copy(THD *thd) override { return 0; } +}; + +class Item_proc_real :public Item_proc +{ + double value; +public: + Item_proc_real(THD *thd, const char *name_par, uint dec): + Item_proc(thd, name_par) + { + decimals=dec; max_length=float_length(dec); + } + const Type_handler *type_handler() const override + { return &type_handler_double; } + void set(double nr) override { value=nr; } + void set(longlong nr) override { value=(double) nr; } + void set(const char *str,uint length,CHARSET_INFO *cs) override + { + int err_not_used; + char *end_not_used; + value= cs->strntod((char*) str,length, &end_not_used, &err_not_used); + } + double val_real() override { return value; } + longlong val_int() override { return (longlong) value; } + String *val_str(String *s) override + { + s->set_real(value,decimals,default_charset()); + return s; + } + my_decimal *val_decimal(my_decimal *) override; + unsigned int size_of() { return sizeof(*this);} +}; + +class Item_proc_int :public Item_proc +{ + longlong value; +public: + Item_proc_int(THD *thd, const char *name_par): Item_proc(thd, name_par) + { max_length=11; } + const Type_handler *type_handler() const override + { + if (unsigned_flag) + return &type_handler_ulonglong; + return &type_handler_slonglong; + } + void set(double nr) override { value=(longlong) nr; } + void set(longlong nr) override { value=nr; } + void set(const char *str,uint length, CHARSET_INFO *cs) override + { int err; value= cs->strntoll(str,length,10,NULL,&err); } + double val_real() override { return (double) value; } + longlong val_int() override { return value; } + String *val_str(String *s) override + { s->set(value, default_charset()); return s; } + my_decimal *val_decimal(my_decimal *) override; + unsigned int size_of() { return sizeof(*this);} +}; + + +class Item_proc_string :public Item_proc +{ + String value; +public: + Item_proc_string(THD *thd, const char *name_par, uint length): + Item_proc(thd, name_par) + { + this->max_length=length; + value.set_thread_specific(); + } + const Type_handler *type_handler() const override + { return &type_handler_varchar; } + void set(double nr) override { value.set_real(nr, 2, default_charset()); } + void set(longlong nr) override { value.set(nr, default_charset()); } + void set(const char *str, uint length, CHARSET_INFO *cs) override + { value.copy(str,length,cs); } + double val_real() override + { + int err_not_used; + char *end_not_used; + CHARSET_INFO *cs= value.charset(); + return cs->strntod((char*) value.ptr(), value.length(), + &end_not_used, &err_not_used); + } + longlong val_int() override + { + int err; + CHARSET_INFO *cs=value.charset(); + return cs->strntoll(value.ptr(), value.length(), 10, NULL, &err); + } + String *val_str(String*) override + { + return null_value ? (String*) 0 : &value; + } + my_decimal *val_decimal(my_decimal *) override; + void cleanup() override { value.free(); } + unsigned int size_of() { return sizeof(*this);} +}; + +/* The procedure class definitions */ + +class Procedure { +protected: + List *fields; + select_result *result; +public: + const uint flags; + ORDER *group,*param_fields; + Procedure(select_result *res,uint flags_par) :result(res),flags(flags_par), + group(0),param_fields(0) {} + virtual ~Procedure() {group=param_fields=0; fields=0; } + virtual void add(void)=0; + virtual void end_group(void)=0; + virtual int send_row(List &fields)=0; + virtual bool change_columns(THD *thd, List &fields)= 0; + virtual void update_refs(void) {} + virtual int end_of_records() { return 0; } +}; + +Procedure *setup_procedure(THD *thd,ORDER *proc_param,select_result *result, + List &field_list,int *error); + +#endif /* PROCEDURE_INCLUDED */ diff --git a/sql/protocol.cc b/sql/protocol.cc new file mode 100644 index 00000000..6667129d --- /dev/null +++ b/sql/protocol.cc @@ -0,0 +1,1991 @@ +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. + Copyright (c) 2008, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + Low level functions for storing data to be send to the MySQL client. + The actual communction is handled by the net_xxx functions in net_serv.cc +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "protocol.h" +#include "sql_class.h" // THD +#include + +static const unsigned int PACKET_BUFFER_EXTRA_ALLOC= 1024; +#ifndef EMBEDDED_LIBRARY +static bool write_eof_packet(THD *, NET *, uint, uint); +#endif + +CHARSET_INFO *Protocol::character_set_results() const +{ + return thd->variables.character_set_results; +} + + +#ifndef EMBEDDED_LIBRARY +bool Protocol::net_store_data(const uchar *from, size_t length) +#else +bool Protocol_binary::net_store_data(const uchar *from, size_t length) +#endif +{ + ulong packet_length=packet->length(); + /* + The +9 comes from that strings of length longer than 16M require + 9 bytes to be stored (see net_store_length). + */ + if (packet_length+9+length > packet->alloced_length() && + packet->realloc(packet_length+9+length)) + return 1; + uchar *to= net_store_length((uchar*) packet->ptr()+packet_length, length); + if (length) + memcpy(to,from,length); + packet->length((uint) (to+length-(uchar*) packet->ptr())); + return 0; +} + + +/* + net_store_data_cs() - extended version with character set conversion. + + It is optimized for short strings whose length after + conversion is garanteed to be less than 251, which accupies + exactly one byte to store length. It allows not to use + the "convert" member as a temporary buffer, conversion + is done directly to the "packet" member. + The limit 251 is good enough to optimize send_result_set_metadata() + because column, table, database names fit into this limit. +*/ + +#ifndef EMBEDDED_LIBRARY +bool Protocol::net_store_data_cs(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +#else +bool Protocol_binary::net_store_data_cs(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +#endif +{ + uint dummy_errors; + /* Calculate maxumum possible result length */ + size_t conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen; + + if (conv_length > 250) + { + /* + For strings with conv_length greater than 250 bytes + we don't know how many bytes we will need to store length: one or two, + because we don't know result length until conversion is done. + For example, when converting from utf8 (mbmaxlen=3) to latin1, + conv_length=300 means that the result length can vary between 100 to 300. + length=100 needs one byte, length=300 needs to bytes. + + Thus conversion directly to "packet" is not worthy. + Let's use "convert" as a temporary buffer. + */ + return (convert->copy((const char*) from, length, from_cs, + to_cs, &dummy_errors) || + net_store_data((const uchar*) convert->ptr(), convert->length())); + } + + size_t packet_length= packet->length(); + size_t new_length= packet_length + conv_length + 1; + + if (new_length > packet->alloced_length() && packet->realloc(new_length)) + return 1; + + char *length_pos= (char*) packet->ptr() + packet_length; + char *to= length_pos + 1; + + to+= copy_and_convert(to, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_errors); + + net_store_length((uchar*) length_pos, to - length_pos - 1); + packet->length((uint) (to - packet->ptr())); + return 0; +} + + +/** + Send a error string to client. + + Design note: + + net_printf_error and net_send_error are low-level functions + that shall be used only when a new connection is being + established or at server startup. + + For SIGNAL/RESIGNAL and GET DIAGNOSTICS functionality it's + critical that every error that can be intercepted is issued in one + place only, my_message_sql. + + @param thd Thread handler + @param sql_errno The error code to send + @param err A pointer to the error message + + @return + @retval FALSE The message was sent to the client + @retval TRUE An error occurred and the message wasn't sent properly +*/ + +bool Protocol::net_send_error(THD *thd, uint sql_errno, const char *err, + const char* sqlstate) +{ + bool error; + DBUG_ENTER("Protocol::net_send_error"); + + DBUG_ASSERT(!thd->spcont); + DBUG_ASSERT(sql_errno); + DBUG_ASSERT(err); + + DBUG_PRINT("enter",("sql_errno: %d err: %s", sql_errno, err)); + + if (sqlstate == NULL) + sqlstate= mysql_errno_to_sqlstate(sql_errno); + + /* + It's one case when we can push an error even though there + is an OK or EOF already. + */ + thd->get_stmt_da()->set_overwrite_status(true); + + /* Abort multi-result sets */ + thd->server_status&= ~SERVER_MORE_RESULTS_EXISTS; + + error= net_send_error_packet(thd, sql_errno, err, sqlstate); + + thd->get_stmt_da()->set_overwrite_status(false); + + DBUG_RETURN(error); +} + +/** + Return ok to the client. + + The ok packet has the following structure: + + - 0 : Marker (1 byte) + - affected_rows : Stored in 1-9 bytes + - id : Stored in 1-9 bytes + - server_status : Copy of thd->server_status; Can be used by client + to check if we are inside an transaction. + New in 4.0 protocol + - warning_count : Stored in 2 bytes; New in 4.1 protocol + - message : Stored as packed length (1-9 bytes) + message. + Is not stored if no message. + + @param thd Thread handler + @param server_status The server status + @param statement_warn_count Total number of warnings + @param affected_rows Number of rows changed by statement + @param id Auto_increment id for first row (if used) + @param message Message to send to the client (Used by mysql_status) + @param is_eof this called instead of old EOF packet + + @return + @retval FALSE The message was successfully sent + @retval TRUE An error occurred and the messages wasn't sent properly + +*/ + +#ifndef EMBEDDED_LIBRARY +bool +Protocol::net_send_ok(THD *thd, + uint server_status, uint statement_warn_count, + ulonglong affected_rows, ulonglong id, + const char *message, bool is_eof) +{ + NET *net= &thd->net; + StringBuffer store; + + bool error= FALSE; + DBUG_ENTER("Protocol::net_send_ok"); + + if (! net->vio) // hack for re-parsing queries + { + DBUG_PRINT("info", ("vio present: NO")); + DBUG_RETURN(FALSE); + } + + /* + OK send instead of EOF still require 0xFE header, but OK packet content. + */ + if (is_eof) + { + DBUG_ASSERT(thd->client_capabilities & CLIENT_DEPRECATE_EOF); + store.q_append((char)254); + } + else + store.q_append('\0'); + + /* affected rows */ + store.q_net_store_length(affected_rows); + + /* last insert id */ + store.q_net_store_length(id); + + /* if client has not session tracking capability, don't send state change flag*/ + if (!(thd->client_capabilities & CLIENT_SESSION_TRACK)) { + server_status &= ~SERVER_SESSION_STATE_CHANGED; + } + + if (thd->client_capabilities & CLIENT_PROTOCOL_41) + { + DBUG_PRINT("info", + ("affected_rows: %lu id: %lu status: %u warning_count: %u", + (ulong) affected_rows, + (ulong) id, + (uint) (server_status & 0xffff), + (uint) statement_warn_count)); + store.q_append2b(server_status); + + /* We can only return up to 65535 warnings in two bytes */ + uint tmp= MY_MIN(statement_warn_count, 65535); + store.q_append2b(tmp); + } + else if (net->return_status) // For 4.0 protocol + { + store.q_append2b(server_status); + } + thd->get_stmt_da()->set_overwrite_status(true); + + if ((server_status & SERVER_SESSION_STATE_CHANGED) || (message && message[0])) + { + DBUG_ASSERT(safe_strlen(message) <= MYSQL_ERRMSG_SIZE); + store.q_net_store_data((uchar*) safe_str(message), safe_strlen(message)); + } + + if (unlikely(server_status & SERVER_SESSION_STATE_CHANGED)) + { + store.set_charset(thd->variables.collation_database); + thd->session_tracker.store(thd, &store); + thd->server_status&= ~SERVER_SESSION_STATE_CHANGED; + } + + DBUG_ASSERT(store.length() <= MAX_PACKET_LENGTH); + + error= my_net_write(net, (const unsigned char*)store.ptr(), store.length()); + if (likely(!error)) + error= net_flush(net); + + thd->get_stmt_da()->set_overwrite_status(false); + DBUG_PRINT("info", ("OK sent, so no more error sending allowed")); + + DBUG_RETURN(error); +} + + +static uchar eof_buff[1]= { (uchar) 254 }; /* Marker for end of fields */ + +/** + Send eof (= end of result set) to the client. + + The eof packet has the following structure: + + - 254 : Marker (1 byte) + - warning_count : Stored in 2 bytes; New in 4.1 protocol + - status_flag : Stored in 2 bytes; + For flags like SERVER_MORE_RESULTS_EXISTS. + + Note that the warning count will not be sent if 'no_flush' is set as + we don't want to report the warning count until all data is sent to the + client. + + @param thd Thread handler + @param server_status The server status + @param statement_warn_count Total number of warnings + + @return + @retval FALSE The message was successfully sent + @retval TRUE An error occurred and the message wasn't sent properly +*/ + +bool +Protocol::net_send_eof(THD *thd, uint server_status, uint statement_warn_count) +{ + NET *net= &thd->net; + bool error= FALSE; + DBUG_ENTER("Protocol::net_send_eof"); + DBUG_PRINT("enter", ("status: %u warn_count: %u", + server_status, statement_warn_count)); + + /* + Check if client understand new format packets (OK instead of EOF) + + Normally end of statement reply is signaled by OK packet, but in case + of binlog dump request an EOF packet is sent instead. Also, old clients + expect EOF packet instead of OK + */ + if ((thd->client_capabilities & CLIENT_DEPRECATE_EOF) && + (thd->get_command() != COM_BINLOG_DUMP )) + { + error= net_send_ok(thd, server_status, statement_warn_count, 0, 0, NULL, + true); + DBUG_RETURN(error); + } + + /* Set to TRUE if no active vio, to work well in case of --init-file */ + if (net->vio != 0) + { + thd->get_stmt_da()->set_overwrite_status(true); + error= write_eof_packet(thd, net, server_status, statement_warn_count); + if (likely(!error)) + error= net_flush(net); + thd->get_stmt_da()->set_overwrite_status(false); + DBUG_PRINT("info", ("EOF sent, so no more error sending allowed")); + } + DBUG_RETURN(error); +} + + +/** + Format EOF packet according to the current protocol and + write it to the network output buffer. + + @param thd The thread handler + @param net The network handler + @param server_status The server status + @param statement_warn_count The number of warnings + + + @return + @retval FALSE The message was sent successfully + @retval TRUE An error occurred and the messages wasn't sent properly +*/ + +static bool write_eof_packet(THD *thd, NET *net, + uint server_status, + uint statement_warn_count) +{ + bool error; + if (thd->client_capabilities & CLIENT_PROTOCOL_41) + { + uchar buff[5]; + /* + Don't send warn count during SP execution, as the warn_list + is cleared between substatements, and mysqltest gets confused + */ + uint tmp= MY_MIN(statement_warn_count, 65535); + buff[0]= 254; + int2store(buff+1, tmp); + /* + The following test should never be true, but it's better to do it + because if 'is_fatal_error' is set the server is not going to execute + other queries (see the if test in dispatch_command / COM_QUERY) + */ + if (unlikely(thd->is_fatal_error)) + server_status&= ~SERVER_MORE_RESULTS_EXISTS; + int2store(buff + 3, server_status); + error= my_net_write(net, buff, 5); + } + else + error= my_net_write(net, eof_buff, 1); + + return error; +} + +/** + @param thd Thread handler + @param sql_errno The error code to send + @param err A pointer to the error message + + @return + @retval FALSE The message was successfully sent + @retval TRUE An error occurred and the messages wasn't sent properly +*/ + +bool Protocol::net_send_error_packet(THD *thd, uint sql_errno, const char *err, + const char* sqlstate) + +{ + NET *net= &thd->net; + uint length; + /* + buff[]: sql_errno:2 + ('#':1 + SQLSTATE_LENGTH:5) + MYSQL_ERRMSG_SIZE:512 + */ + uint error; + char converted_err[MYSQL_ERRMSG_SIZE]; + char buff[2+1+SQLSTATE_LENGTH+MYSQL_ERRMSG_SIZE], *pos; + my_bool ret; + uint8 save_compress; + DBUG_ENTER("Protocol::send_error_packet"); + + if (net->vio == 0) + { + if (thd->bootstrap) + { + /* In bootstrap it's ok to print on stderr */ + fprintf(stderr,"ERROR: %d %s\n",sql_errno,err); + } + DBUG_RETURN(FALSE); + } + + int2store(buff,sql_errno); + pos= buff+2; + if (thd->client_capabilities & CLIENT_PROTOCOL_41) + { + /* The first # is to make the protocol backward compatible */ + buff[2]= '#'; + pos= strmov(buff+3, sqlstate); + } + + convert_error_message(converted_err, sizeof(converted_err), + thd->variables.character_set_results, + err, strlen(err), system_charset_info, &error); + /* Converted error message is always null-terminated. */ + length= (uint) (strmake(pos, converted_err, MYSQL_ERRMSG_SIZE - 1) - buff); + + /* + Ensure that errors are not compressed. This is to ensure we can + detect out of bands error messages in the client + */ + if ((save_compress= net->compress)) + net->compress= 2; + + /* + Sometimes, we send errors "out-of-band", e.g ER_CONNECTION_KILLED + on an idle connection. The current protocol "sequence number" is 0, + however some client drivers would however always expect packets + coming from server to have seq_no > 0, due to missing awareness + of "out-of-band" operations. Make these clients happy. + */ + if (!net->pkt_nr && + (sql_errno == ER_CONNECTION_KILLED || sql_errno == ER_SERVER_SHUTDOWN || + sql_errno == ER_QUERY_INTERRUPTED)) + { + net->pkt_nr= 1; + } + + ret= net_write_command(net,(uchar) 255, (uchar*) "", 0, (uchar*) buff, + length); + net->compress= save_compress; + DBUG_RETURN(ret); +} + +#endif /* EMBEDDED_LIBRARY */ + +/** + Faster net_store_length when we know that length is less than 65536. + We keep a separate version for that range because it's widely used in + libmysql. + + uint is used as agrument type because of MySQL type conventions: + - uint for 0..65536 + - ulong for 0..4294967296 + - ulonglong for bigger numbers. +*/ + +static uchar *net_store_length_fast(uchar *packet, size_t length) +{ + DBUG_ASSERT(length < UINT_MAX16); + if (length < 251) + { + *packet=(uchar) length; + return packet+1; + } + *packet++=252; + int2store(packet,(uint) length); + return packet+2; +} + +/** + Send the status of the current statement execution over network. + + @param thd in fact, carries two parameters, NET for the transport and + Diagnostics_area as the source of status information. + + In MySQL, there are two types of SQL statements: those that return + a result set and those that return status information only. + + If a statement returns a result set, it consists of 3 parts: + - result set meta-data + - variable number of result set rows (can be 0) + - followed and terminated by EOF or ERROR packet + + Once the client has seen the meta-data information, it always + expects an EOF or ERROR to terminate the result set. If ERROR is + received, the result set rows are normally discarded (this is up + to the client implementation, libmysql at least does discard them). + EOF, on the contrary, means "successfully evaluated the entire + result set". Since we don't know how many rows belong to a result + set until it's evaluated, EOF/ERROR is the indicator of the end + of the row stream. Note, that we can not buffer result set rows + on the server -- there may be an arbitrary number of rows. But + we do buffer the last packet (EOF/ERROR) in the Diagnostics_area and + delay sending it till the very end of execution (here), to be able to + change EOF to an ERROR if commit failed or some other error occurred + during the last cleanup steps taken after execution. + + A statement that does not return a result set doesn't send result + set meta-data either. Instead it returns one of: + - OK packet + - ERROR packet. + Similarly to the EOF/ERROR of the previous statement type, OK/ERROR + packet is "buffered" in the diagnostics area and sent to the client + in the end of statement. + + @note This method defines a template, but delegates actual + sending of data to virtual Protocol::send_{ok,eof,error}. This + allows for implementation of protocols that "intercept" ok/eof/error + messages, and store them in memory, etc, instead of sending to + the client. + + @pre The diagnostics area is assigned or disabled. It can not be empty + -- we assume that every SQL statement or COM_* command + generates OK, ERROR, or EOF status. + + @post The status information is encoded to protocol format and sent to the + client. + + @return We conventionally return void, since the only type of error + that can happen here is a NET (transport) error, and that one + will become visible when we attempt to read from the NET the + next command. + Diagnostics_area::is_sent is set for debugging purposes only. +*/ + +void Protocol::end_statement() +{ +#ifdef WITH_WSREP + /* + Commented out: This sanity check does not hold in general. + Thd->LOCK_thd_data() must be unlocked before sending response + to client, so BF abort may sneak in here. + DBUG_ASSERT(!WSREP(thd) || thd->wsrep_conflict_state() == NO_CONFLICT); + */ + + /* + sanity check, don't send end statement while replaying + */ + DBUG_ASSERT(thd->wsrep_trx().state() != wsrep::transaction::s_replaying); + if (WSREP(thd) && thd->wsrep_trx().state() == + wsrep::transaction::s_replaying) + { + WSREP_ERROR("attempting net_end_statement while replaying"); + return; + } +#endif /* WITH_WSREP */ + + DBUG_ENTER("Protocol::end_statement"); + DBUG_ASSERT(! thd->get_stmt_da()->is_sent()); + bool error= FALSE; + + /* Can not be true, but do not take chances in production. */ + if (thd->get_stmt_da()->is_sent()) + DBUG_VOID_RETURN; + + switch (thd->get_stmt_da()->status()) { + case Diagnostics_area::DA_ERROR: + /* The query failed, send error to log and abort bootstrap. */ + error= send_error(thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message(), + thd->get_stmt_da()->get_sqlstate()); + break; + case Diagnostics_area::DA_EOF: + case Diagnostics_area::DA_EOF_BULK: + error= send_eof(thd->server_status, + thd->get_stmt_da()->statement_warn_count()); + break; + case Diagnostics_area::DA_OK: + case Diagnostics_area::DA_OK_BULK: + error= send_ok(thd->server_status, + thd->get_stmt_da()->statement_warn_count(), + thd->get_stmt_da()->affected_rows(), + thd->get_stmt_da()->last_insert_id(), + thd->get_stmt_da()->message()); + break; + case Diagnostics_area::DA_DISABLED: + break; + case Diagnostics_area::DA_EMPTY: + default: + DBUG_ASSERT(0); + error= send_ok(thd->server_status, 0, 0, 0, NULL); + break; + } + if (likely(!error)) + thd->get_stmt_da()->set_is_sent(true); + DBUG_VOID_RETURN; +} + +/** + A default implementation of "OK" packet response to the client. + + Currently this implementation is re-used by both network-oriented + protocols -- the binary and text one. They do not differ + in their OK packet format, which allows for a significant simplification + on client side. +*/ + +bool Protocol::send_ok(uint server_status, uint statement_warn_count, + ulonglong affected_rows, ulonglong last_insert_id, + const char *message) +{ + DBUG_ENTER("Protocol::send_ok"); + const bool retval= + net_send_ok(thd, server_status, statement_warn_count, + affected_rows, last_insert_id, message, false); + DBUG_RETURN(retval); +} + + +/** + A default implementation of "EOF" packet response to the client. + + Binary and text protocol do not differ in their EOF packet format. +*/ + +bool Protocol::send_eof(uint server_status, uint statement_warn_count) +{ + DBUG_ENTER("Protocol::send_eof"); + bool retval= net_send_eof(thd, server_status, statement_warn_count); + DBUG_RETURN(retval); +} + + +/** + A default implementation of "ERROR" packet response to the client. + + Binary and text protocol do not differ in ERROR packet format. +*/ + +bool Protocol::send_error(uint sql_errno, const char *err_msg, + const char *sql_state) +{ + DBUG_ENTER("Protocol::send_error"); + const bool retval= net_send_error_packet(thd, sql_errno, err_msg, sql_state); + DBUG_RETURN(retval); +} + + +/** + Send a progress report to the client + + What we send is: + header (255,255,255,1) + stage, max_stage as on byte integers + percentage withing the stage as percentage*1000 + (that is, ratio*100000) as a 3 byte integer + proc_info as a string +*/ + +const uchar progress_header[2]= {(uchar) 255, (uchar) 255 }; + +void net_send_progress_packet(THD *thd) +{ + uchar buff[200], *pos; + const char *proc_info= thd->proc_info ? thd->proc_info : ""; + size_t length= strlen(proc_info); + ulonglong progress; + DBUG_ENTER("net_send_progress_packet"); + + if (unlikely(!thd->net.vio)) + DBUG_VOID_RETURN; // Socket is closed + + pos= buff; + /* + Store number of strings first. This allows us to later expand the + progress indicator if needed. + */ + *pos++= (uchar) 1; // Number of strings + *pos++= (uchar) thd->progress.stage + 1; + /* + We have the MY_MAX() here to avoid problems if max_stage is not set, + which may happen during automatic repair of table + */ + *pos++= (uchar) MY_MAX(thd->progress.max_stage, thd->progress.stage + 1); + progress= 0; + if (thd->progress.max_counter) + progress= 100000ULL * thd->progress.counter / thd->progress.max_counter; + int3store(pos, progress); // Between 0 & 100000 + pos+= 3; + pos= net_store_data(pos, (const uchar*) proc_info, + MY_MIN(length, sizeof(buff)-7)); + net_write_command(&thd->net, (uchar) 255, progress_header, + sizeof(progress_header), (uchar*) buff, + (uint) (pos - buff)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + Functions used by the protocol functions (like net_send_ok) to store + strings and numbers in the header result packet. +****************************************************************************/ + +/* The following will only be used for short strings < 65K */ + +uchar *net_store_data(uchar *to, const uchar *from, size_t length) +{ + to=net_store_length_fast(to,length); + if (length) + memcpy(to,from,length); + return to+length; +} + +uchar *net_store_data(uchar *to,int32 from) +{ + char buff[22]; + uint length=(uint) (int10_to_str(from,buff,10)-buff); + to=net_store_length_fast(to,length); + memcpy(to,buff,length); + return to+length; +} + +uchar *net_store_data(uchar *to,longlong from) +{ + char buff[22]; + uint length=(uint) (longlong10_to_str(from,buff,10)-buff); + to=net_store_length_fast(to,length); + memcpy(to,buff,length); + return to+length; +} + + +/***************************************************************************** + Default Protocol functions +*****************************************************************************/ + +void Protocol::init(THD *thd_arg) +{ + thd=thd_arg; + packet= &thd->packet; + convert= &thd->convert_buffer; +#ifndef DBUG_OFF + field_handlers= 0; + field_pos= 0; +#endif +} + +/** + Finish the result set with EOF packet, as is expected by the client, + if there is an error evaluating the next row and a continue handler + for the error. +*/ + +void Protocol::end_partial_result_set(THD *thd_arg) +{ + net_send_eof(thd_arg, thd_arg->server_status, + 0 /* no warnings, we're inside SP */); +} + + +bool Protocol::flush() +{ +#ifndef EMBEDDED_LIBRARY + bool error; + thd->get_stmt_da()->set_overwrite_status(true); + error= net_flush(&thd->net); + thd->get_stmt_da()->set_overwrite_status(false); + return error; +#else + return 0; +#endif +} + +#ifndef EMBEDDED_LIBRARY + + +class Send_field_packed_extended_metadata: public Binary_string +{ +public: + bool append_chunk(mariadb_field_attr_t type, const LEX_CSTRING &value) + { + /* + If we eventually support many metadata chunk types and long metadata + values, we'll need to encode type and length using net_store_length() + and do corresponding changes to the unpacking code in libmariadb. + For now let's just assert that type and length fit into one byte. + */ + DBUG_ASSERT(net_length_size(type) == 1); + DBUG_ASSERT(net_length_size(value.length) == 1); + size_t nbytes= 1/*type*/ + 1/*length*/ + value.length; + if (reserve(nbytes)) + return true; + qs_append((char) (uchar) type); + qs_append((char) (uchar) value.length); + qs_append(&value); + return false; + } + bool pack(const Send_field_extended_metadata &src) + { + for (uint i= 0 ; i <= MARIADB_FIELD_ATTR_LAST; i++) + { + const LEX_CSTRING attr= src.attr(i); + if (attr.str && append_chunk((mariadb_field_attr_t) i, attr)) + return true; + } + return false; + } +}; + + +bool Protocol_text::store_field_metadata(const THD * thd, + const Send_field &field, + CHARSET_INFO *charset_for_protocol, + uint fieldnr) +{ + CHARSET_INFO *thd_charset= thd->variables.character_set_results; + char *pos; + DBUG_ASSERT(field.is_sane()); + + if (thd->client_capabilities & CLIENT_PROTOCOL_41) + { + const LEX_CSTRING def= {STRING_WITH_LEN("def")}; + if (store_ident(def) || + store_ident(field.db_name) || + store_ident(field.table_name) || + store_ident(field.org_table_name) || + store_ident(field.col_name) || + store_ident(field.org_col_name)) + return true; + if (thd->client_capabilities & MARIADB_CLIENT_EXTENDED_METADATA) + { + Send_field_packed_extended_metadata metadata; + metadata.pack(field); + + /* + Don't apply character set conversion: + extended metadata is a binary encoded data. + */ + if (store_binary_string(metadata.ptr(), metadata.length())) + return true; + } + if (packet->realloc(packet->length() + 12)) + return true; + /* Store fixed length fields */ + pos= (char*) packet->end(); + *pos++= 12; // Length of packed fields + /* inject a NULL to test the client */ + DBUG_EXECUTE_IF("poison_rs_fields", pos[-1]= (char) 0xfb;); + if (charset_for_protocol == &my_charset_bin || thd_charset == NULL) + { + /* No conversion */ + uint id= charset_for_protocol->get_id(MY_COLLATION_ID_TYPE_COMPAT_100800); + DBUG_ASSERT(id <= UINT_MAX16); + int2store(pos, (uint16) id); + int4store(pos + 2, field.length); + } + else + { + /* With conversion */ + uint id= thd_charset->get_id(MY_COLLATION_ID_TYPE_COMPAT_100800); + DBUG_ASSERT(id <= UINT_MAX16); + int2store(pos, (uint16) id); + uint32 field_length= field.max_octet_length(charset_for_protocol, + thd_charset); + int4store(pos + 2, field_length); + } + pos[6]= field.type_handler()->type_code_for_protocol(); + int2store(pos + 7, field.flags); + pos[9]= (char) field.decimals; + pos[10]= 0; // For the future + pos[11]= 0; // For the future + pos+= 12; + } + else + { + if (store_ident(field.table_name) || + store_ident(field.col_name) || + packet->realloc(packet->length() + 10)) + return true; + pos= (char*) packet->end(); + pos[0]= 3; + int3store(pos + 1, field.length); + pos[4]= 1; + pos[5]= field.type_handler()->type_code_for_protocol(); + pos[6]= 3; + int2store(pos + 7, field.flags); + pos[9]= (char) field.decimals; + pos+= 10; + } + packet->length((uint) (pos - packet->ptr())); + return false; +} + + +/* + MARIADB_CLIENT_CACHE_METADATA support. + + Bulk of the code below is dedicated to detecting whether column metadata has + changed after prepare, or between executions of a prepared statement. + + For some prepared statements, metadata can't change without going through + Prepared_Statement::reprepare(), which makes detecting changes easy. + + Others, "SELECT ?" & Co, are more fragile, and sensitive to input parameters, + or user variables. Detecting metadata change for this class of PS is harder, + we calculate signature (hash value), and check whether this changes between + executions. This is a more expensive method. +*/ + + +/** + Detect whether column info can be changed without + PS repreparing. + + Such colum info is called fragile. The opposite of + fragile is. + + + @param it - Item representing column info + @return true, if columninfo is "fragile", false if it is stable + + + @todo does not work due to MDEV-23913. Currently, + everything about prepared statements is fragile. +*/ + +static bool is_fragile_columnifo(Item *it) +{ +#define MDEV_23913_FIXED 0 +#if MDEV_23913_FIXED + if (dynamic_cast(it)) + return true; + + if (dynamic_cast(it)) + return true; + + if (dynamic_cast (it)) + return true; + + /* Check arguments of functions.*/ + auto item_args= dynamic_cast(it); + if (!item_args) + return false; + auto args= item_args->arguments(); + auto arg_count= item_args->argument_count(); + for (uint i= 0; i < arg_count; i++) + { + if (is_fragile_columnifo(args[i])) + return true; + } + return false; +#else /* MDEV-23913 fixed*/ + return true; +#endif +} + + +#define INVALID_METADATA_CHECKSUM 0 + + +/** + Calculate signature for column info sent to the client as CRC32 over data, + that goes into the column info packet. + We assume that if checksum does not change, then column info was not + modified. + + @param thd THD + @param list column info + + @return CRC32 of the metadata +*/ + +static uint32 calc_metadata_hash(THD *thd, List *list) +{ + List_iterator_fast it(*list); + Item *item; + uint32 crc32_c= 0; + while ((item= it++)) + { + Send_field field(thd, item); + auto field_type= item->type_handler()->field_type(); + auto charset= item->charset_for_protocol(); + /* + The data below should contain everything that influences + content of the column info packet. + */ + LEX_CSTRING data[]= + { + field.table_name, + field.org_table_name, + field.col_name, + field.org_col_name, + field.db_name, + field.attr(MARIADB_FIELD_ATTR_DATA_TYPE_NAME), + field.attr(MARIADB_FIELD_ATTR_FORMAT_NAME), + {(const char *) &field.length, sizeof(field.length)}, + {(const char *) &field.flags, sizeof(field.flags)}, + {(const char *) &field.decimals, sizeof(field.decimals)}, + {(const char *) &charset, sizeof(charset)}, + {(const char *) &field_type, sizeof(field_type)}, + }; + for (const auto &chunk : data) + crc32_c= my_crc32c(crc32_c, chunk.str, chunk.length); + } + + if (crc32_c == INVALID_METADATA_CHECKSUM) + return 1; + return crc32_c; +} + + + +/** + Check if metadata columns have changed since last call to this + function. + + @param send_column_info_state saved state, changed if the function + return true. + @param thd THD + @param list columninfo Items + @return true,if metadata columns have changed since last call, + false otherwise +*/ + +static bool metadata_columns_changed(send_column_info_state &state, THD *thd, + List &list) +{ + if (!state.initialized) + { + state.initialized= true; + state.immutable= true; + Item *item; + List_iterator_fast it(list); + while ((item= it++)) + { + if (is_fragile_columnifo(item)) + { + state.immutable= false; + state.checksum= calc_metadata_hash(thd, &list); + break; + } + } + state.last_charset= thd->variables.character_set_client; + return true; + } + + /* + Since column info can change under our feet, we use more expensive + checksumming to check if column metadata has not changed since last time. + */ + if (!state.immutable) + { + uint32 checksum= calc_metadata_hash(thd, &list); + if (checksum != state.checksum) + { + state.checksum= checksum; + state.last_charset= thd->variables.character_set_client; + return true; + } + } + + /* + Character_set_client influences result set metadata, thus resend metadata + whenever it changes. + */ + if (state.last_charset != thd->variables.character_set_client) + { + state.last_charset= thd->variables.character_set_client; + return true; + } + + return false; +} + + +/** + Determine whether column info must be sent to the client. + Skip column info, if client supports caching, and (prepared) statement + output fields have not changed. + + @param thd THD + @param list column info + @param flags send flags. If Protocol::SEND_FORCE_COLUMN_INFO is set, + this function will return true + @return true, if column info must be sent to the client. + false otherwise +*/ + +static bool should_send_column_info(THD* thd, List* list, uint flags) +{ + if (!(thd->client_capabilities & MARIADB_CLIENT_CACHE_METADATA)) + { + /* Client does not support abbreviated metadata.*/ + return true; + } + + if (!thd->cur_stmt) + { + /* Neither COM_PREPARE nor COM_EXECUTE run.*/ + return true; + } + + if (thd->spcont) + { + /* Always sent full metadata from inside the stored procedure.*/ + return true; + } + + if (flags & Protocol::SEND_FORCE_COLUMN_INFO) + return true; + + auto &column_info_state= thd->cur_stmt->column_info_state; +#ifndef DBUG_OFF + auto cmd= thd->get_command(); +#endif + + DBUG_ASSERT(cmd == COM_STMT_EXECUTE || cmd == COM_STMT_PREPARE + || cmd == COM_STMT_BULK_EXECUTE); + DBUG_ASSERT(cmd != COM_STMT_PREPARE || !column_info_state.initialized); + + bool ret= metadata_columns_changed(column_info_state, thd, *list); + + DBUG_ASSERT(cmd != COM_STMT_PREPARE || ret); + if (!ret) + thd->status_var.skip_metadata_count++; + + return ret; +} + + +/** + Send name and type of result to client. + + Sum fields has table name empty and field_name. + + @param THD Thread data object + @param list List of items to send to client + @param flag Bit mask with the following functions: + - 1 send number of rows + - 2 send default values + - 4 don't write eof packet + + @retval + 0 ok + @retval + 1 Error (Note that in this case the error is not sent to the + client) +*/ +bool Protocol::send_result_set_metadata(List *list, uint flags) +{ + DBUG_ENTER("Protocol::send_result_set_metadata"); + + bool send_column_info= should_send_column_info(thd, list, flags); + + if (flags & SEND_NUM_ROWS) + { + /* + Packet with number of columns. + + Will also have a 1 byte column info indicator, in case + MARIADB_CLIENT_CACHE_METADATA client capability is set. + */ + uchar buff[MAX_INT_WIDTH+1]; + uchar *pos= net_store_length(buff, list->elements); + if (thd->client_capabilities & MARIADB_CLIENT_CACHE_METADATA) + *pos++= (uchar)send_column_info; + + DBUG_ASSERT(pos <= buff + sizeof(buff)); + if (my_net_write(&thd->net, buff, (size_t) (pos-buff))) + DBUG_RETURN(1); + } + + if (send_column_info) + { + List_iterator_fast it(*list); + Item *item; + Protocol_text prot(thd, thd->variables.net_buffer_length); +#ifndef DBUG_OFF + field_handlers= (const Type_handler **) thd->alloc( + sizeof(field_handlers[0]) * list->elements); +#endif + + for (uint pos= 0; (item= it++); pos++) + { + prot.prepare_for_resend(); + if (prot.store_item_metadata(thd, item, pos)) + goto err; + if (prot.write()) + DBUG_RETURN(1); +#ifndef DBUG_OFF + field_handlers[pos]= item->type_handler(); +#endif + } + } + + if (flags & SEND_EOF) + { + + /* if it is new client do not send EOF packet */ + if (!(thd->client_capabilities & CLIENT_DEPRECATE_EOF)) + { + /* + Mark the end of meta-data result set, and store thd->server_status, + to show that there is no cursor. + Send no warning information, as it will be sent at statement end. + */ + if (write_eof_packet(thd, &thd->net, thd->server_status, + thd->get_stmt_da()->current_statement_warn_count())) + DBUG_RETURN(1); + } + } + DBUG_RETURN(prepare_for_send(list->elements)); + +err: + my_message(ER_OUT_OF_RESOURCES, ER_THD(thd, ER_OUT_OF_RESOURCES), + MYF(0)); /* purecov: inspected */ + DBUG_RETURN(1); /* purecov: inspected */ +} + + +bool Protocol::send_list_fields(List *list, const TABLE_LIST *table_list) +{ + DBUG_ENTER("Protocol::send_list_fields"); + List_iterator_fast it(*list); + Field *fld; + Protocol_text prot(thd, thd->variables.net_buffer_length); + +#ifndef DBUG_OFF + field_handlers= (const Type_handler **) thd->alloc(sizeof(field_handlers[0]) * + list->elements); +#endif + + for (uint pos= 0; (fld= it++); pos++) + { + prot.prepare_for_resend(); + if (prot.store_field_metadata_for_list_fields(thd, fld, table_list, pos)) + goto err; + prot.store(fld); // Send default value + if (prot.write()) + DBUG_RETURN(1); +#ifndef DBUG_OFF + /* + Historically all BLOB variant Fields are displayed as + MYSQL_TYPE_BLOB in metadata. + See Field_blob::make_send_field() for more comments. + */ + field_handlers[pos]= Send_field(fld).type_handler(); +#endif + } + DBUG_RETURN(prepare_for_send(list->elements)); + +err: + my_message(ER_OUT_OF_RESOURCES, ER_THD(thd, ER_OUT_OF_RESOURCES), MYF(0)); + DBUG_RETURN(1); +} + + +bool Protocol::write() +{ + DBUG_ENTER("Protocol::write"); + DBUG_RETURN(my_net_write(&thd->net, (uchar*) packet->ptr(), + packet->length())); +} +#endif /* EMBEDDED_LIBRARY */ + + +bool Protocol_text::store_item_metadata(THD *thd, Item *item, uint pos) +{ + Send_field field(thd, item); + return store_field_metadata(thd, field, item->charset_for_protocol(), pos); +} + + +bool Protocol_text::store_field_metadata_for_list_fields(const THD *thd, + Field *fld, + const TABLE_LIST *tl, + uint pos) +{ + Send_field field= tl->view ? + Send_field(fld, tl->view_db, tl->view_name) : + Send_field(fld); + return store_field_metadata(thd, field, fld->charset_for_protocol(), pos); +} + + +/** + Send one result set row. + + @param row_items a collection of column values for that row + + @return Error status. + @retval TRUE Error. + @retval FALSE Success. +*/ + +bool Protocol::send_result_set_row(List *row_items) +{ + List_iterator_fast it(*row_items); + ValueBuffer value_buffer; + DBUG_ENTER("Protocol::send_result_set_row"); + + for (Item *item= it++; item; item= it++) + { + value_buffer.reset_buffer(); + if (item->send(this, &value_buffer)) + { + // If we're out of memory, reclaim some, to help us recover. + this->free(); + DBUG_RETURN(TRUE); + } + /* Item::send() may generate an error. If so, abort the loop. */ + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/** + Send \\0 end terminated string or NULL + + @param from NullS or \\0 terminated string + + @note + In most cases one should use store(from, length) instead of this function + + @retval + 0 ok + @retval + 1 error +*/ + +bool Protocol::store_string_or_null(const char *from, CHARSET_INFO *cs) +{ + if (!from) + return store_null(); + return store(from, strlen(from), cs); +} + + +/** + Send a set of strings as one long string with ',' in between. +*/ + +bool Protocol::store(I_List* str_list) +{ + char buf[256]; + String tmp(buf, sizeof(buf), &my_charset_bin); + size_t len= 0; + I_List_iterator it(*str_list); + i_string* s; + const char *delimiter= ","; + + tmp.length(0); + while ((s=it++)) + { + tmp.append(delimiter, len); + tmp.append(s->ptr, strlen(s->ptr)); + len= 1; + } + + return store((char*) tmp.ptr(), tmp.length(), tmp.charset()); +} + + +/** + Send a set of strings as a string of key-value pairs with ',' in between. +*/ + +bool Protocol::store(I_List* str_list) +{ + char buf[256]; + const char *delimiter= ","; + String tmp(buf, sizeof(buf), &my_charset_bin); + size_t delim_len= 0; + I_List_iterator it(*str_list); + i_string_pair* s; + + tmp.length(0); + while ((s=it++)) + { + tmp.append(delimiter, delim_len); + tmp.append(s->key, strlen(s->key)); + tmp.append(STRING_WITH_LEN("->")); + tmp.append(s->val, strlen(s->val)); + delim_len= 1; + } + return store((char*) tmp.ptr(), tmp.length(), tmp.charset()); +} + + +/**************************************************************************** + Functions to handle the simple (default) protocol where everything is + This protocol is the one that is used by default between the MySQL server + and client when you are not using prepared statements. + + All data are sent as 'packed-string-length' followed by 'string-data' +****************************************************************************/ + +#ifndef EMBEDDED_LIBRARY +void Protocol_text::prepare_for_resend() +{ + packet->length(0); +#ifndef DBUG_OFF + field_pos= 0; +#endif +} + +bool Protocol_text::store_null() +{ +#ifndef DBUG_OFF + field_pos++; +#endif + char buff[1]; + buff[0]= (char)251; + return packet->append(buff, sizeof(buff), PACKET_BUFFER_EXTRA_ALLOC); +} +#endif + + +/** + Auxilary function to convert string to the given character set + and store in network buffer. +*/ + +bool Protocol::store_string_aux(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) +{ + /* 'tocs' is set 0 when client issues SET character_set_results=NULL */ + if (needs_conversion(fromcs, tocs)) + { + /* Store with conversion */ + return net_store_data_cs((uchar*) from, length, fromcs, tocs); + } + /* Store without conversion */ + return net_store_data((uchar*) from, length); +} + + +bool Protocol_text::store_numeric_string_aux(const char *from, size_t length) +{ + CHARSET_INFO *tocs= thd->variables.character_set_results; + // 'tocs' is NULL when the client issues SET character_set_results=NULL + if (tocs && (tocs->state & MY_CS_NONASCII)) // Conversion needed + return net_store_data_cs((uchar*) from, length, &my_charset_latin1, tocs); + return net_store_data((uchar*) from, length); // No conversion +} + + +bool Protocol::store_warning(const char *from, size_t length) +{ + BinaryStringBuffer tmp; + CHARSET_INFO *cs= thd->variables.character_set_results; + if (!cs || cs == &my_charset_bin) + cs= system_charset_info; + if (tmp.copy_printable_hhhh(cs, system_charset_info, from, length)) + return net_store_data((const uchar*)"", 0); + return net_store_data((const uchar *) tmp.ptr(), tmp.length()); +} + + +bool Protocol_text::store_str(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) +{ +#ifndef DBUG_OFF + DBUG_PRINT("info", ("Protocol_text::store field %u : %.*b", field_pos, + (int) length, (length == 0 ? "" : from))); + DBUG_ASSERT(field_handlers == 0 || field_pos < field_count); + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_STRING)); + field_pos++; +#endif + return store_string_aux(from, length, fromcs, tocs); +} + + +bool Protocol_text::store_numeric_zerofill_str(const char *from, + size_t length, + protocol_send_type_t send_type) +{ +#ifndef DBUG_OFF + DBUG_PRINT("info", + ("Protocol_text::store_numeric_zerofill_str field %u : %.*b", + field_pos, (int) length, (length == 0 ? "" : from))); + DBUG_ASSERT(field_handlers == 0 || field_pos < field_count); + DBUG_ASSERT(valid_handler(field_pos, send_type)); + field_pos++; +#endif + return store_numeric_string_aux(from, length); +} + + +bool Protocol_text::store_tiny(longlong from) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_TINY)); + field_pos++; +#endif + char buff[22]; + size_t length= (size_t) (int10_to_str((int) from, buff, -10) - buff); + return store_numeric_string_aux(buff, length); +} + + +bool Protocol_text::store_short(longlong from) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_SHORT)); + field_pos++; +#endif + char buff[22]; + size_t length= (size_t) (int10_to_str((int) from, buff, -10) - buff); + return store_numeric_string_aux(buff, length); +} + + +bool Protocol_text::store_long(longlong from) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_LONG)); + field_pos++; +#endif + char buff[22]; + size_t length= (size_t) (int10_to_str((long int)from, buff, + (from < 0) ? - 10 : 10) - buff); + return store_numeric_string_aux(buff, length); +} + + +bool Protocol_text::store_longlong(longlong from, bool unsigned_flag) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_LONGLONG)); + field_pos++; +#endif + char buff[22]; + size_t length= (size_t) (longlong10_to_str(from, buff, + unsigned_flag ? 10 : -10) - + buff); + return store_numeric_string_aux(buff, length); +} + + +bool Protocol_text::store_decimal(const my_decimal *d) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(0); // This method is not used yet + field_pos++; +#endif + StringBuffer str; + (void) d->to_string(&str); + return store_numeric_string_aux(str.ptr(), str.length()); +} + + +bool Protocol_text::store_float(float from, uint32 decimals) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_FLOAT)); + field_pos++; +#endif + Float(from).to_string(&buffer, decimals); + return store_numeric_string_aux(buffer.ptr(), buffer.length()); +} + + +bool Protocol_text::store_double(double from, uint32 decimals) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_DOUBLE)); + field_pos++; +#endif + buffer.set_real(from, decimals, thd->charset()); + return store_numeric_string_aux(buffer.ptr(), buffer.length()); +} + + +bool Protocol_text::store(Field *field) +{ + if (field->is_null()) + return store_null(); +#ifdef DBUG_ASSERT_EXISTS + TABLE *table= field->table; + MY_BITMAP *old_map= 0; + if (table->file) + old_map= dbug_tmp_use_all_columns(table, &table->read_set); +#endif + + bool rc= field->send(this); + +#ifdef DBUG_ASSERT_EXISTS + if (old_map) + dbug_tmp_restore_column_map(&table->read_set, old_map); +#endif + + return rc; +} + + +bool Protocol_text::store_datetime(MYSQL_TIME *tm, int decimals) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_DATETIME)); + field_pos++; +#endif + char buff[MAX_DATE_STRING_REP_LENGTH]; + uint length= my_datetime_to_str(tm, buff, decimals); + return store_numeric_string_aux(buff, length); +} + + +bool Protocol_text::store_date(MYSQL_TIME *tm) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_DATE)); + field_pos++; +#endif + char buff[MAX_DATE_STRING_REP_LENGTH]; + size_t length= my_date_to_str(tm, buff); + return store_numeric_string_aux(buff, length); +} + + +bool Protocol_text::store_time(MYSQL_TIME *tm, int decimals) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(valid_handler(field_pos, PROTOCOL_SEND_TIME)); + field_pos++; +#endif + char buff[MAX_DATE_STRING_REP_LENGTH]; + uint length= my_time_to_str(tm, buff, decimals); + return store_numeric_string_aux(buff, length); +} + +/** + Assign OUT-parameters to user variables. + + @param sp_params List of PS/SP parameters (both input and output). + + @return Error status. + @retval FALSE Success. + @retval TRUE Error. +*/ + +bool Protocol_text::send_out_parameters(List *sp_params) +{ + DBUG_ASSERT(sp_params->elements == thd->lex->prepared_stmt.param_count()); + + List_iterator_fast item_param_it(*sp_params); + List_iterator_fast param_it(thd->lex->prepared_stmt.params()); + + while (true) + { + Item_param *item_param= item_param_it++; + Item *param= param_it++; + Settable_routine_parameter *sparam; + + if (!item_param || !param) + break; + + if (!item_param->get_out_param_info()) + continue; // It's an IN-parameter. + + if (!(sparam= param->get_settable_routine_parameter())) + { + DBUG_ASSERT(0); + continue; + } + + DBUG_ASSERT(sparam->get_item_param() == NULL); + sparam->set_value(thd, thd->spcont, reinterpret_cast(&item_param)); + } + + return FALSE; +} + +/**************************************************************************** + Functions to handle the binary protocol used with prepared statements + + Data format: + + [ok:1] reserved ok packet + [null_field:(field_count+7+2)/8] reserved to send null data. The size is + calculated using: + bit_fields= (field_count+7+2)/8; + 2 bits are reserved for identifying type + of package. + [[length]data] data field (the length applies only for + string/binary/time/timestamp fields and + rest of them are not sent as they have + the default length that client understands + based on the field type + [..]..[[length]data] data +****************************************************************************/ + +bool Protocol_binary::prepare_for_send(uint num_columns) +{ + Protocol::prepare_for_send(num_columns); + bit_fields= (field_count+9)/8; + return packet->alloc(bit_fields+1); + + /* prepare_for_resend will be called after this one */ +} + + +void Protocol_binary::prepare_for_resend() +{ + packet->length(bit_fields+1); + bzero((uchar*) packet->ptr(), 1+bit_fields); + field_pos=0; +} + + +bool Protocol_binary::store_str(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) +{ + field_pos++; + return store_string_aux(from, length, fromcs, tocs); +} + +bool Protocol_binary::store_null() +{ + uint offset= (field_pos+2)/8+1, bit= (1 << ((field_pos+2) & 7)); + /* Room for this as it's allocated in prepare_for_send */ + char *to= (char*) packet->ptr()+offset; + *to= (char) ((uchar) *to | (uchar) bit); + field_pos++; + return 0; +} + + +bool Protocol_binary::store_tiny(longlong from) +{ + char buff[1]; + field_pos++; + buff[0]= (uchar) from; + return packet->append(buff, sizeof(buff), PACKET_BUFFER_EXTRA_ALLOC); +} + + +bool Protocol_binary::store_short(longlong from) +{ + field_pos++; + char *to= packet->prep_append(2, PACKET_BUFFER_EXTRA_ALLOC); + if (!to) + return 1; + int2store(to, (int) from); + return 0; +} + + +bool Protocol_binary::store_long(longlong from) +{ + field_pos++; + char *to= packet->prep_append(4, PACKET_BUFFER_EXTRA_ALLOC); + if (!to) + return 1; + int4store(to, from); + return 0; +} + + +bool Protocol_binary::store_longlong(longlong from, bool unsigned_flag) +{ + field_pos++; + char *to= packet->prep_append(8, PACKET_BUFFER_EXTRA_ALLOC); + if (!to) + return 1; + int8store(to, from); + return 0; +} + +bool Protocol_binary::store_decimal(const my_decimal *d) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(0); // This method is not used yet +#endif + StringBuffer str; + (void) d->to_string(&str); + return store_str(str.ptr(), str.length(), str.charset(), + thd->variables.character_set_results); +} + +bool Protocol_binary::store_float(float from, uint32 decimals) +{ + field_pos++; + char *to= packet->prep_append(4, PACKET_BUFFER_EXTRA_ALLOC); + if (!to) + return 1; + float4store(to, from); + return 0; +} + + +bool Protocol_binary::store_double(double from, uint32 decimals) +{ + field_pos++; + char *to= packet->prep_append(8, PACKET_BUFFER_EXTRA_ALLOC); + if (!to) + return 1; + float8store(to, from); + return 0; +} + + +bool Protocol_binary::store(Field *field) +{ + /* + We should not increment field_pos here as send() will call another + protocol function to do this for us + */ + if (field->is_null()) + return store_null(); + return field->send(this); +} + + +bool Protocol_binary::store_datetime(MYSQL_TIME *tm, int decimals) +{ + char buff[12],*pos; + uint length; + field_pos++; + pos= buff+1; + + int2store(pos, tm->year); + pos[2]= (uchar) tm->month; + pos[3]= (uchar) tm->day; + pos[4]= (uchar) tm->hour; + pos[5]= (uchar) tm->minute; + pos[6]= (uchar) tm->second; + DBUG_ASSERT(decimals == AUTO_SEC_PART_DIGITS || + (decimals >= 0 && decimals <= TIME_SECOND_PART_DIGITS)); + if (decimals != AUTO_SEC_PART_DIGITS) + my_datetime_trunc(tm, decimals); + int4store(pos+7, tm->second_part); + if (tm->second_part) + length=11; + else if (tm->hour || tm->minute || tm->second) + length=7; + else if (tm->year || tm->month || tm->day) + length=4; + else + length=0; + buff[0]=(char) length; // Length is stored first + return packet->append(buff, length+1, PACKET_BUFFER_EXTRA_ALLOC); +} + +bool Protocol_binary::store_date(MYSQL_TIME *tm) +{ + tm->hour= tm->minute= tm->second=0; + tm->second_part= 0; + return Protocol_binary::store_datetime(tm, 0); +} + + +bool Protocol_binary::store_time(MYSQL_TIME *tm, int decimals) +{ + char buff[13], *pos; + uint length; + field_pos++; + pos= buff+1; + pos[0]= tm->neg ? 1 : 0; + if (tm->hour >= 24) + { + uint days= tm->hour/24; + tm->hour-= days*24; + tm->day+= days; + } + int4store(pos+1, tm->day); + pos[5]= (uchar) tm->hour; + pos[6]= (uchar) tm->minute; + pos[7]= (uchar) tm->second; + DBUG_ASSERT(decimals == AUTO_SEC_PART_DIGITS || + (decimals >= 0 && decimals <= TIME_SECOND_PART_DIGITS)); + if (decimals != AUTO_SEC_PART_DIGITS) + my_time_trunc(tm, decimals); + int4store(pos+8, tm->second_part); + if (tm->second_part) + length=12; + else if (tm->hour || tm->minute || tm->second || tm->day) + length=8; + else + length=0; + buff[0]=(char) length; // Length is stored first + return packet->append(buff, length+1, PACKET_BUFFER_EXTRA_ALLOC); +} + +/** + Send a result set with OUT-parameter values by means of PS-protocol. + + @param sp_params List of PS/SP parameters (both input and output). + + @return Error status. + @retval FALSE Success. + @retval TRUE Error. +*/ + +bool Protocol_binary::send_out_parameters(List *sp_params) +{ + bool ret; + if (!(thd->client_capabilities & CLIENT_PS_MULTI_RESULTS)) + { + /* The client does not support OUT-parameters. */ + return FALSE; + } + + List out_param_lst; + + { + List_iterator_fast item_param_it(*sp_params); + + while (true) + { + Item_param *item_param= item_param_it++; + + if (!item_param) + break; + + if (!item_param->get_out_param_info()) + continue; // It's an IN-parameter. + + if (out_param_lst.push_back(item_param, thd->mem_root)) + return TRUE; + } + } + + if (!out_param_lst.elements) + return FALSE; + + /* + We have to set SERVER_PS_OUT_PARAMS in THD::server_status, because it + is used in send_result_set_metadata(). + */ + + thd->server_status|= SERVER_PS_OUT_PARAMS | SERVER_MORE_RESULTS_EXISTS; + + /* Send meta-data. */ + if (send_result_set_metadata(&out_param_lst, + SEND_NUM_ROWS | SEND_EOF | SEND_FORCE_COLUMN_INFO)) + return TRUE; + + /* Send data. */ + + prepare_for_resend(); + + if (send_result_set_row(&out_param_lst)) + return TRUE; + + if (write()) + return TRUE; + + ret= net_send_eof(thd, thd->server_status, 0); + + /* + Reset server_status: + - SERVER_MORE_RESULTS_EXISTS bit, because this is the last packet for sure. + - Restore SERVER_PS_OUT_PARAMS status. + */ + thd->server_status&= ~(SERVER_PS_OUT_PARAMS | SERVER_MORE_RESULTS_EXISTS); + + return ret ? FALSE : TRUE; +} diff --git a/sql/protocol.h b/sql/protocol.h new file mode 100644 index 00000000..4fdfde3e --- /dev/null +++ b/sql/protocol.h @@ -0,0 +1,332 @@ +#ifndef PROTOCOL_INCLUDED +#define PROTOCOL_INCLUDED + +/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_error.h" +#include "my_decimal.h" /* my_decimal */ +#include "sql_type.h" + +class i_string; +class Field; +class Send_field; +class THD; +class Item_param; +struct TABLE_LIST; +typedef struct st_mysql_field MYSQL_FIELD; +typedef struct st_mysql_rows MYSQL_ROWS; + +class Protocol +{ +protected: + String *packet; + /* Used by net_store_data() for charset conversions */ + String *convert; + uint field_pos; +#ifndef DBUG_OFF + const Type_handler **field_handlers; + bool valid_handler(uint pos, protocol_send_type_t type) const + { + return field_handlers == 0 || + field_handlers[field_pos]->protocol_send_type() == type; + } +#endif + uint field_count; + virtual bool net_store_data(const uchar *from, size_t length); + virtual bool net_store_data_cs(const uchar *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); + virtual bool net_send_ok(THD *, uint, uint, ulonglong, ulonglong, const char *, + bool); + virtual bool net_send_error_packet(THD *, uint, const char *, const char *); +#ifdef EMBEDDED_LIBRARY + char **next_field; + MYSQL_FIELD *next_mysql_field; + MEM_ROOT *alloc; +#endif + bool needs_conversion(CHARSET_INFO *fromcs, + CHARSET_INFO *tocs) const + { + // 'tocs' is set 0 when client issues SET character_set_results=NULL + return tocs && !my_charset_same(fromcs, tocs) && + fromcs != &my_charset_bin && + tocs != &my_charset_bin; + } + /* + The following two are low-level functions that are invoked from + higher-level store_xxx() funcs. The data is stored into this->packet. + */ + bool store_string_aux(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); + + virtual bool send_ok(uint server_status, uint statement_warn_count, + ulonglong affected_rows, ulonglong last_insert_id, + const char *message); + + virtual bool send_eof(uint server_status, uint statement_warn_count); + + virtual bool send_error(uint sql_errno, const char *err_msg, + const char *sql_state); + + CHARSET_INFO *character_set_results() const; + +public: + THD *thd; + Protocol(THD *thd_arg) { init(thd_arg); } + virtual ~Protocol() = default; + void init(THD* thd_arg); + + enum { SEND_NUM_ROWS= 1, SEND_EOF= 2, SEND_FORCE_COLUMN_INFO= 4 }; + virtual bool send_result_set_metadata(List *list, uint flags); + bool send_list_fields(List *list, const TABLE_LIST *table_list); + bool send_result_set_row(List *row_items); + + bool store(I_List *str_list); + bool store(I_List *str_list); + bool store_string_or_null(const char *from, CHARSET_INFO *cs); + bool store_warning(const char *from, size_t length); + String *storage_packet() { return packet; } + inline void free() { packet->free(); } + virtual bool write(); + inline bool store(int from) + { return store_long((longlong) from); } + inline bool store(uint32 from) + { return store_long((longlong) from); } + inline bool store(longlong from) + { return store_longlong((longlong) from, 0); } + inline bool store(ulonglong from) + { return store_longlong((longlong) from, 1); } + inline bool store(String *str) + { return store((char*) str->ptr(), str->length(), str->charset()); } + inline bool store(const LEX_CSTRING *from, CHARSET_INFO *cs) + { + return store(from->str, from->length, cs); + } + + virtual bool prepare_for_send(uint num_columns) + { + field_count= num_columns; + return 0; + } + virtual bool flush(); + virtual void end_partial_result_set(THD *thd); + virtual void prepare_for_resend()=0; + + virtual bool store_null()=0; + virtual bool store_tiny(longlong from)=0; + virtual bool store_short(longlong from)=0; + virtual bool store_long(longlong from)=0; + virtual bool store_longlong(longlong from, bool unsigned_flag)=0; + virtual bool store_decimal(const my_decimal *)=0; + virtual bool store_str(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs)=0; + virtual bool store_float(float from, uint32 decimals)=0; + virtual bool store_double(double from, uint32 decimals)=0; + virtual bool store_datetime(MYSQL_TIME *time, int decimals)=0; + virtual bool store_date(MYSQL_TIME *time)=0; + virtual bool store_time(MYSQL_TIME *time, int decimals)=0; + virtual bool store(Field *field)=0; + + // Various useful wrappers for the virtual store*() methods. + // Backward wrapper for store_str() + bool store(const char *from, size_t length, CHARSET_INFO *cs) + { + return store_str(from, length, cs, character_set_results()); + } + bool store_lex_cstring(const LEX_CSTRING &s, + CHARSET_INFO *fromcs, + CHARSET_INFO *tocs) + { + return store_str(s.str, (uint) s.length, fromcs, tocs); + } + bool store_binary_string(const char *str, size_t length) + { + return store_str(str, (uint) length, &my_charset_bin, &my_charset_bin); + } + bool store_ident(const LEX_CSTRING &s) + { + return store_lex_cstring(s, system_charset_info, character_set_results()); + } + // End of wrappers + + virtual bool send_out_parameters(List *sp_params)=0; +#ifdef EMBEDDED_LIBRARY + bool begin_dataset(); + bool begin_dataset(THD *thd, uint numfields); + virtual void remove_last_row() {} +#else + void remove_last_row() {} +#endif + enum enum_protocol_type + { + /* + Before adding a new type, please make sure + there is enough storage for it in Query_cache_query_flags. + */ + PROTOCOL_TEXT= 0, PROTOCOL_BINARY= 1, PROTOCOL_LOCAL= 2, + PROTOCOL_DISCARD= 3 /* Should be last, not used by Query_cache */ + }; + virtual enum enum_protocol_type type()= 0; + + virtual bool net_send_eof(THD *thd, uint server_status, uint statement_warn_count); + bool net_send_error(THD *thd, uint sql_errno, const char *err, + const char* sqlstate); + void end_statement(); +}; + + +/** Class used for the old (MySQL 4.0 protocol). */ + +class Protocol_text :public Protocol +{ + StringBuffer buffer; + bool store_numeric_string_aux(const char *from, size_t length); +public: + Protocol_text(THD *thd_arg, ulong prealloc= 0) + :Protocol(thd_arg) + { + if (prealloc) + packet->alloc(prealloc); + } + void prepare_for_resend() override; + bool store_null() override; + bool store_tiny(longlong from) override; + bool store_short(longlong from) override; + bool store_long(longlong from) override; + bool store_longlong(longlong from, bool unsigned_flag) override; + bool store_decimal(const my_decimal *) override; + bool store_str(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) override; + bool store_datetime(MYSQL_TIME *time, int decimals) override; + bool store_date(MYSQL_TIME *time) override; + bool store_time(MYSQL_TIME *time, int decimals) override; + bool store_float(float nr, uint32 decimals) override; + bool store_double(double from, uint32 decimals) override; + bool store(Field *field) override; + + bool send_out_parameters(List *sp_params) override; + + bool store_numeric_zerofill_str(const char *from, size_t length, + protocol_send_type_t send_type); + +#ifdef EMBEDDED_LIBRARY + void remove_last_row() override; +#endif + virtual bool store_field_metadata(const THD *thd, const Send_field &field, + CHARSET_INFO *charset_for_protocol, + uint pos); + bool store_item_metadata(THD *thd, Item *item, uint pos); + bool store_field_metadata_for_list_fields(const THD *thd, Field *field, + const TABLE_LIST *table_list, + uint pos); + enum enum_protocol_type type() override { return PROTOCOL_TEXT; }; +}; + + +class Protocol_binary final :public Protocol +{ +private: + uint bit_fields; +public: + Protocol_binary(THD *thd_arg) :Protocol(thd_arg) {} + bool prepare_for_send(uint num_columns) override; + void prepare_for_resend() override; +#ifdef EMBEDDED_LIBRARY + bool write() override; + bool net_store_data(const uchar *from, size_t length) override; + bool net_store_data_cs(const uchar *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) override; +#endif + bool store_null() override; + bool store_tiny(longlong from) override; + bool store_short(longlong from) override; + bool store_long(longlong from) override; + bool store_longlong(longlong from, bool unsigned_flag) override; + bool store_decimal(const my_decimal *) override; + bool store_str(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) override; + bool store_datetime(MYSQL_TIME *time, int decimals) override; + bool store_date(MYSQL_TIME *time) override; + bool store_time(MYSQL_TIME *time, int decimals) override; + bool store_float(float nr, uint32 decimals) override; + bool store_double(double from, uint32 decimals) override; + bool store(Field *field) override; + + bool send_out_parameters(List *sp_params) override; + + enum enum_protocol_type type() override { return PROTOCOL_BINARY; }; +}; + + +/* + A helper for "ANALYZE $stmt" which looks a real network procotol but doesn't + write results to the network. + + At first glance, class select_send looks like a more appropriate place to + implement the "write nothing" hook. This is not true, because + - we need to evaluate the value of every item, and do it the way + select_send does it (i.e. call item->val_int() or val_real() or...) + - select_send::send_data() has some other code, like telling the storage + engine that the row can be unlocked. We want to keep that also. + as a result, "ANALYZE $stmt" uses a select_send_analyze which still uses + select_send::send_data() & co., and also uses Protocol_discard object. +*/ + +class Protocol_discard final : public Protocol +{ +public: + Protocol_discard(THD *thd_arg) : Protocol(thd_arg) {} + bool write() override { return 0; } + bool send_result_set_metadata(List *, uint) override { return 0; } + bool send_eof(uint, uint) override { return 0; } + void prepare_for_resend() override { IF_DBUG(field_pos= 0,); } + bool send_out_parameters(List *sp_params) override { return false; } + + /* + Provide dummy overrides for any storage methods so that we + avoid allocating and copying of data + */ + bool store_null() override { return false; } + bool store_tiny(longlong) override { return false; } + bool store_short(longlong) override { return false; } + bool store_long(longlong) override { return false; } + bool store_longlong(longlong, bool) override { return false; } + bool store_decimal(const my_decimal *) override { return false; } + bool store_str(const char *, size_t, CHARSET_INFO *, CHARSET_INFO *) override + { + return false; + } + bool store_datetime(MYSQL_TIME *, int) override { return false; } + bool store_date(MYSQL_TIME *) override { return false; } + bool store_time(MYSQL_TIME *, int) override { return false; } + bool store_float(float, uint32) override { return false; } + bool store_double(double, uint32) override { return false; } + bool store(Field *) override { return false; } + enum enum_protocol_type type() override { return PROTOCOL_DISCARD; }; +}; + + +void send_warning(THD *thd, uint sql_errno, const char *err=0); +void net_send_progress_packet(THD *thd); +uchar *net_store_data(uchar *to,const uchar *from, size_t length); +uchar *net_store_data(uchar *to,int32 from); +uchar *net_store_data(uchar *to,longlong from); + +#endif /* PROTOCOL_INCLUDED */ diff --git a/sql/proxy_protocol.cc b/sql/proxy_protocol.cc new file mode 100644 index 00000000..689d1af8 --- /dev/null +++ b/sql/proxy_protocol.cc @@ -0,0 +1,584 @@ +/* Copyright (c) 2017, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROXY_PROTOCOL_V1_SIGNATURE "PROXY" +#define PROXY_PROTOCOL_V2_SIGNATURE "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A" +#define MAX_PROXY_HEADER_LEN 256 + +static mysql_rwlock_t lock; + +/* + Parse proxy protocol version 1 header (text) +*/ +static int parse_v1_header(char *hdr, size_t len, proxy_peer_info *peer_info) +{ + char address_family[MAX_PROXY_HEADER_LEN + 1]; + char client_address[MAX_PROXY_HEADER_LEN + 1]; + char server_address[MAX_PROXY_HEADER_LEN + 1]; + int client_port; + int server_port; + + int ret = sscanf(hdr, "PROXY %s %s %s %d %d", + address_family, client_address, server_address, + &client_port, &server_port); + + if (ret != 5) + { + if (ret >= 1 && !strcmp(address_family, "UNKNOWN")) + { + peer_info->is_local_command= true; + return 0; + } + return -1; + } + + if (client_port < 0 || client_port > 0xffff + || server_port < 0 || server_port > 0xffff) + return -1; + + if (!strcmp(address_family, "UNKNOWN")) + { + peer_info->is_local_command= true; + return 0; + } + else if (!strcmp(address_family, "TCP4")) + { + /* Initialize IPv4 peer address.*/ + peer_info->peer_addr.ss_family= AF_INET; + if (!inet_pton(AF_INET, client_address, + &((struct sockaddr_in *)(&peer_info->peer_addr))->sin_addr)) + return -1; + } + else if (!strcmp(address_family, "TCP6")) + { + /* Initialize IPv6 peer address.*/ + peer_info->peer_addr.ss_family= AF_INET6; + if (!inet_pton(AF_INET6, client_address, + &((struct sockaddr_in6 *)(&peer_info->peer_addr))->sin6_addr)) + return -1; + } + peer_info->port= client_port; + /* Check if server address is legal.*/ + char addr_bin[16]; + if (!inet_pton(peer_info->peer_addr.ss_family, + server_address, addr_bin)) + return -1; + + return 0; +} + + +/* + Parse proxy protocol V2 (binary) header +*/ +static int parse_v2_header(uchar *hdr, size_t len,proxy_peer_info *peer_info) +{ + /* V2 Signature */ + if (memcmp(hdr, PROXY_PROTOCOL_V2_SIGNATURE, 12)) + return -1; + + /* version + command */ + uint8 ver= (hdr[12] & 0xF0); + if (ver != 0x20) + return -1; /* Wrong version*/ + + uint cmd= (hdr[12] & 0xF); + + /* Address family */ + uchar fam= hdr[13]; + + if (cmd == 0) + { + /* LOCAL command*/ + peer_info->is_local_command= true; + return 0; + } + + if (cmd != 0x01) + { + /* Not PROXY COMMAND */ + return -1; + } + + struct sockaddr_in *sin= (struct sockaddr_in *)(&peer_info->peer_addr); + struct sockaddr_in6 *sin6= (struct sockaddr_in6 *)(&peer_info->peer_addr); + switch (fam) + { + case 0x11: /* TCPv4 */ + sin->sin_family= AF_INET; + memcpy(&(sin->sin_addr), hdr + 16, 4); + peer_info->port= (hdr[24] << 8) + hdr[25]; + break; + case 0x21: /* TCPv6 */ + sin6->sin6_family= AF_INET6; + memcpy(&(sin6->sin6_addr), hdr + 16, 16); + peer_info->port= (hdr[48] << 8) + hdr[49]; + break; + case 0x31: /* AF_UNIX, stream */ + peer_info->peer_addr.ss_family= AF_UNIX; + break; + default: + return -1; + } + return 0; +} + + +bool has_proxy_protocol_header(NET *net) +{ + compile_time_assert(NET_HEADER_SIZE < sizeof(PROXY_PROTOCOL_V1_SIGNATURE)); + compile_time_assert(NET_HEADER_SIZE < sizeof(PROXY_PROTOCOL_V2_SIGNATURE)); + + const uchar *preread_bytes= net->buff + net->where_b; + return !memcmp(preread_bytes, PROXY_PROTOCOL_V1_SIGNATURE, NET_HEADER_SIZE)|| + !memcmp(preread_bytes, PROXY_PROTOCOL_V2_SIGNATURE, NET_HEADER_SIZE); +} + + +/** + Try to parse proxy header. + https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + + Whenever this function is called, client is connecting, and + we have have pre-read 4 bytes (NET_HEADER_SIZE) from the network already. + These 4 bytes did not match MySQL packet header, and (unless the client + is buggy), those bytes must be proxy header. + + @param[in] net - vio and already preread bytes from the header + @param[out] peer_info - parsed proxy header with client host and port + @return 0 in case of success, -1 if error. +*/ +int parse_proxy_protocol_header(NET *net, proxy_peer_info *peer_info) +{ + uchar hdr[MAX_PROXY_HEADER_LEN]; + size_t pos= 0; + + DBUG_ASSERT(!net->compress); + const uchar *preread_bytes= net->buff + net->where_b; + bool have_v1_header= !memcmp(preread_bytes, PROXY_PROTOCOL_V1_SIGNATURE, NET_HEADER_SIZE); + bool have_v2_header= + !have_v1_header && !memcmp(preread_bytes, PROXY_PROTOCOL_V2_SIGNATURE, NET_HEADER_SIZE); + if (!have_v1_header && !have_v2_header) + { + // not a proxy protocol header + return -1; + } + memcpy(hdr, preread_bytes, NET_HEADER_SIZE); + pos= NET_HEADER_SIZE; + Vio *vio= net->vio; + memset(peer_info, 0, sizeof (*peer_info)); + + if (have_v1_header) + { + /* Read until end of header (newline character)*/ + while(pos < sizeof(hdr)) + { + long len= (long)vio_read(vio, hdr + pos, 1); + if (len < 0) + return -1; + pos++; + if (hdr[pos-1] == '\n') + break; + } + hdr[pos]= 0; + + if (parse_v1_header((char *)hdr, pos, peer_info)) + return -1; + } + else // if (have_v2_header) + { +#define PROXY_V2_HEADER_LEN 16 + /* read off 16 bytes of the header.*/ + ssize_t len= vio_read(vio, hdr + pos, PROXY_V2_HEADER_LEN - pos); + if (len < 0) + return -1; + // 2 last bytes are the length in network byte order of the part following header + ushort trail_len= ((ushort)hdr[PROXY_V2_HEADER_LEN-2] >> 8) + hdr[PROXY_V2_HEADER_LEN-1]; + if (trail_len > sizeof(hdr) - PROXY_V2_HEADER_LEN) + return -1; + if (trail_len > 0) + { + len= vio_read(vio, hdr + PROXY_V2_HEADER_LEN, trail_len); + if (len < 0) + return -1; + } + pos= PROXY_V2_HEADER_LEN + trail_len; + if (parse_v2_header(hdr, pos, peer_info)) + return -1; + } + + if (peer_info->peer_addr.ss_family == AF_INET6) + { + /* + Normalize IPv4 compatible or mapped IPv6 addresses. + They will be treated as IPv4. + */ + sockaddr_storage tmp; + memset(&tmp, 0, sizeof(tmp)); + vio_get_normalized_ip((const struct sockaddr *)&peer_info->peer_addr, + sizeof(sockaddr_storage), (struct sockaddr *)&tmp); + memcpy(&peer_info->peer_addr, &tmp, sizeof(tmp)); + } + return 0; +} + + +/** + CIDR address matching etc (for the proxy_protocol_networks parameter) +*/ + +/** + Subnetwork address in CIDR format, e.g + 192.168.1.0/24 or 2001:db8::/32 +*/ +struct subnet +{ + char addr[16]; /* Binary representation of the address, big endian*/ + unsigned short family; /* Address family, AF_INET or AF_INET6 */ + unsigned short bits; /* subnetwork size */ +}; + +static subnet* proxy_protocol_subnets; +size_t proxy_protocol_subnet_count; + +#define MAX_MASK_BITS(family) (family == AF_INET ? 32 : 128) + + +/** Convert IPv4 that are compat or mapped IPv4 to "normal" IPv4 */ +static int normalize_subnet(struct subnet *subnet) +{ + unsigned char *addr= (unsigned char*)subnet->addr; + if (subnet->family == AF_INET6) + { + const struct in6_addr *src_ip6=(in6_addr *)addr; + if (IN6_IS_ADDR_V4MAPPED(src_ip6) || IN6_IS_ADDR_V4COMPAT(src_ip6)) + { + /* Copy the actual IPv4 address (4 last bytes) */ + if (subnet->bits < 96) + return -1; + subnet->family= AF_INET; + memcpy(addr, addr+12, 4); + subnet->bits -= 96; + } + } + return 0; +} + +/** + Convert string representation of a subnet to subnet struct. +*/ +static int parse_subnet(char *addr_str, struct subnet *subnet) +{ + if (strchr(addr_str, ':')) + subnet->family= AF_INET6; + else if (strchr(addr_str, '.')) + subnet->family= AF_INET; + else if (!strcmp(addr_str, "localhost")) + { + subnet->family= AF_UNIX; + subnet->bits= 0; + return 0; + } + + char *pmask= strchr(addr_str, '/'); + if (!pmask) + { + subnet->bits= MAX_MASK_BITS(subnet->family); + } + else + { + *pmask= 0; + pmask++; + int b= 0; + + do + { + if (*pmask < '0' || *pmask > '9') + return -1; + b= 10 * b + *pmask - '0'; + if (b > MAX_MASK_BITS(subnet->family)) + return -1; + pmask++; + } + while (*pmask); + + subnet->bits= (unsigned short)b; + } + + if (!inet_pton(subnet->family, addr_str, subnet->addr)) + return -1; + + if (normalize_subnet(subnet)) + return -1; + + return 0; +} + +/** + Parse comma separated string subnet list into subnets array, + which is stored in 'proxy_protocol_subnets' variable + + @param[in] subnets_str : networks in CIDR format, + separated by comma and/or space + @param[out] out_subnets : parsed subnets; + @param[out] out_count : number of parsed subnets + + @return 0 if success, otherwise -1 +*/ +static int parse_networks(const char *subnets_str, subnet **out_subnets, size_t *out_count) +{ + int ret= -1; + subnet *subnets= 0; + size_t count= 0; + const char *p= subnets_str; + size_t max_subnets; + + if (!subnets_str || !*subnets_str) + { + ret= 0; + goto end; + } + + max_subnets= MY_MAX(3,strlen(subnets_str)/2); + subnets= (subnet *)my_malloc(PSI_INSTRUMENT_ME, + max_subnets * sizeof(subnet), MY_ZEROFILL); + + /* Check for special case '*'. */ + if (strcmp(subnets_str, "*") == 0) + { + subnets[0].family= AF_INET; + subnets[1].family= AF_INET6; + subnets[2].family= AF_UNIX; + count= 3; + ret= 0; + goto end; + } + + char token[256]; + for(count= 0;; count++) + { + while(*p && (*p ==',' || *p == ' ')) + p++; + if (!*p) + break; + + size_t cnt= 0; + while(*p && *p != ',' && *p != ' ' && cnt < sizeof(token)-1) + token[cnt++]= *p++; + + token[cnt++]=0; + if (cnt == sizeof(token)) + goto end; + + if (parse_subnet(token, &subnets[count])) + { + my_printf_error(ER_PARSE_ERROR,"Error parsing proxy_protocol_networks parameter, near '%s'",MYF(0),token); + goto end; + } + } + + ret = 0; + +end: + if (ret) + { + my_free(subnets); + *out_subnets= NULL; + *out_count= 0; + return ret; + } + *out_subnets = subnets; + *out_count= count; + return 0; +} + +/** + Check validity of proxy_protocol_networks parameter + @param[in] in - input string + @return : true, if input is list of CIDR-style networks + separated by command or space +*/ +bool proxy_protocol_networks_valid(const char *in) +{ + subnet *new_subnets; + size_t new_count; + int ret= parse_networks(in, &new_subnets, &new_count); + my_free(new_subnets); + return !ret; +} + + +/** + Set 'proxy_protocol_networks' parameter. + + @param[in] spec : networks in CIDR format, + separated by comma and/or space + + @return 0 if success, otherwise -1 +*/ +int set_proxy_protocol_networks(const char *spec) +{ + subnet *new_subnets; + subnet *old_subnet = 0; + size_t new_count; + + int ret= parse_networks(spec, &new_subnets, &new_count); + if (ret) + return ret; + + mysql_rwlock_wrlock(&lock); + old_subnet = proxy_protocol_subnets; + proxy_protocol_subnets = new_subnets; + proxy_protocol_subnet_count = new_count; + mysql_rwlock_unlock(&lock); + my_free(old_subnet); + return ret; +} + + +/** + Compare memory areas, in memcmp().similar fashion. + The difference to memcmp() is that size parameter is the + bit count, not byte count. +*/ +static int compare_bits(const void *s1, const void *s2, int bit_count) +{ + int result= 0; + int byte_count= bit_count / 8; + if (byte_count && (result= memcmp(s1, s2, byte_count))) + return result; + int rem= bit_count % 8; + if (rem) + { + // compare remaining bits i.e partial bytes. + unsigned char s1_bits= (((char *)s1)[byte_count]) >> (8 - rem); + unsigned char s2_bits= (((char *)s2)[byte_count]) >> (8 - rem); + if (s1_bits > s2_bits) + return 1; + if (s1_bits < s2_bits) + return -1; + } + return 0; +} + +/** + Check whether networks address matches network. +*/ +bool addr_matches_subnet(const sockaddr *sock_addr, const subnet *subnet) +{ + DBUG_ASSERT(subnet->family == AF_UNIX || + subnet->family == AF_INET || + subnet->family == AF_INET6); + + if (sock_addr->sa_family != subnet->family) + return false; + + if (subnet->family == AF_UNIX) + return true; + + void *addr= (subnet->family == AF_INET) ? + (void *)&((struct sockaddr_in *)sock_addr)->sin_addr : + (void *)&((struct sockaddr_in6 *)sock_addr)->sin6_addr; + + return (compare_bits(subnet->addr, addr, subnet->bits) == 0); +} + + +/** + Check whether proxy header from client is allowed, as per + specification in 'proxy_protocol_networks' server variable. + + The non-TCP "localhost" clients (unix socket, shared memory, pipes) + are accepted whenever 127.0.0.1 accepted in 'proxy_protocol_networks' +*/ +bool is_proxy_protocol_allowed(const sockaddr *addr) +{ + if (proxy_protocol_subnet_count == 0) + return false; + + sockaddr_storage addr_storage; + struct sockaddr *normalized_addr= (struct sockaddr *)&addr_storage; + + /* + Non-TCP addresses (unix domain socket, windows pipe and shared memory + gets tranlated to TCP4 localhost address. + + Note, that vio remote addresses are initialized with binary zeros + for these protocols (which is AF_UNSPEC everywhere). + */ + switch(addr->sa_family) + { + case AF_UNSPEC: + case AF_UNIX: + normalized_addr->sa_family= AF_UNIX; + break; + case AF_INET: + case AF_INET6: + { + size_t len= + (addr->sa_family == AF_INET)?sizeof(sockaddr_in):sizeof (sockaddr_in6); + vio_get_normalized_ip(addr, len,normalized_addr); + } + break; + default: + DBUG_ASSERT(0); + } + + bool ret= false; + mysql_rwlock_rdlock(&lock); + for (size_t i= 0; i < proxy_protocol_subnet_count; i++) + { + if (addr_matches_subnet(normalized_addr, &proxy_protocol_subnets[i])) + { + ret= true; + break; + } + } + mysql_rwlock_unlock(&lock); + + return ret; +} + + +int init_proxy_protocol_networks(const char *spec) +{ +#ifdef HAVE_PSI_INTERFACE + static PSI_rwlock_key psi_rwlock_key; + static PSI_rwlock_info psi_rwlock_info={ &psi_rwlock_key, "rwlock", 0 }; + mysql_rwlock_register("proxy_proto", &psi_rwlock_info, 1); +#endif + + mysql_rwlock_init(psi_rwlock_key, &lock); + return set_proxy_protocol_networks(spec); +} + + +void destroy_proxy_protocol_networks() +{ + my_free(proxy_protocol_subnets); + mysql_rwlock_destroy(&lock); +} diff --git a/sql/proxy_protocol.h b/sql/proxy_protocol.h new file mode 100644 index 00000000..0f873e24 --- /dev/null +++ b/sql/proxy_protocol.h @@ -0,0 +1,19 @@ +#include "my_net.h" + +struct proxy_peer_info +{ + struct sockaddr_storage peer_addr; + int port; + bool is_local_command; +}; + +extern bool has_proxy_protocol_header(NET *net); +extern int parse_proxy_protocol_header(NET *net, proxy_peer_info *peer_info); +extern bool is_proxy_protocol_allowed(const sockaddr *remote_addr); + +extern int init_proxy_protocol_networks(const char *spec); +extern void destroy_proxy_protocol_networks(); + +extern int set_proxy_protocol_networks(const char *spec); +extern bool proxy_protocol_networks_valid(const char *spec); + diff --git a/sql/records.cc b/sql/records.cc new file mode 100644 index 00000000..3aad36ca --- /dev/null +++ b/sql/records.cc @@ -0,0 +1,861 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma implementation /* gcc class implementation */ +#endif + +/** + @file + + @brief + Functions for easy reading of records, possible through a cache +*/ + +#include "mariadb.h" +#include "records.h" +#include "sql_priv.h" +#include "records.h" +#include "opt_range.h" // SQL_SELECT +#include "sql_class.h" // THD +#include "sql_base.h" +#include "sql_sort.h" // SORT_ADDON_FIELD + +static int rr_quick(READ_RECORD *info); +int rr_sequential(READ_RECORD *info); +static int rr_from_tempfile(READ_RECORD *info); +template static int rr_unpack_from_tempfile(READ_RECORD *info); +template static int rr_unpack_from_buffer(READ_RECORD *info); +int rr_from_pointers(READ_RECORD *info); +static int rr_from_cache(READ_RECORD *info); +static int init_rr_cache(THD *thd, READ_RECORD *info); +static int rr_cmp(uchar *a,uchar *b); +static int rr_index_first(READ_RECORD *info); +static int rr_index_last(READ_RECORD *info); +static int rr_index(READ_RECORD *info); +static int rr_index_desc(READ_RECORD *info); + + +/** + Initialize READ_RECORD structure to perform full index scan in desired + direction using read_record.read_record() interface + + This function has been added at late stage and is used only by + UPDATE/DELETE. Other statements perform index scans using + join_read_first/next functions. + + @param info READ_RECORD structure to initialize. + @param thd Thread handle + @param table Table to be accessed + @param print_error If true, call table->file->print_error() if an error + occurs (except for end-of-records error) + @param idx index to scan + @param reverse Scan in the reverse direction +*/ + +bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, + bool print_error, uint idx, bool reverse) +{ + int error= 0; + DBUG_ENTER("init_read_record_idx"); + + empty_record(table); + bzero((char*) info,sizeof(*info)); + info->thd= thd; + info->table= table; + info->print_error= print_error; + info->unlock_row= rr_unlock_row; + + table->status=0; /* And it's always found */ + if (!table->file->inited && + unlikely(error= table->file->ha_index_init(idx, 1))) + { + if (print_error) + table->file->print_error(error, MYF(0)); + } + + /* read_record_func will be changed to rr_index in rr_index_first */ + info->read_record_func= reverse ? rr_index_last : rr_index_first; + DBUG_RETURN(error != 0); +} + + +/* + init_read_record is used to scan by using a number of different methods. + Which method to use is set-up in this call so that later calls to + the info->read_record will call the appropriate method using a function + pointer. + + There are five methods that relate completely to the sort function + filesort. The result of a filesort is retrieved using read_record + calls. The other two methods are used for normal table access. + + The filesort will produce references to the records sorted, these + references can be stored in memory or in a temporary file. + + The temporary file is normally used when the references doesn't fit into + a properly sized memory buffer. For most small queries the references + are stored in the memory buffer. + SYNOPSIS + init_read_record() + info OUT read structure + thd Thread handle + table Table the data [originally] comes from. + select SQL_SELECT structure. We may select->quick or + select->file as data source + use_record_cache Call file->extra_opt(HA_EXTRA_CACHE,...) + if we're going to do sequential read and some + additional conditions are satisfied. + print_error Copy this to info->print_error + disable_rr_cache Don't use rr_from_cache (used by sort-union + index-merge which produces rowid sequences that + are already ordered) + + DESCRIPTION + This function sets up reading data via one of the methods: + + The temporary file is also used when performing an update where a key is + modified. + + Methods used when ref's are in memory (using rr_from_pointers): + rr_unpack_from_buffer: + ---------------------- + This method is used when table->sort.addon_field is allocated. + This is allocated for most SELECT queries not involving any BLOB's. + In this case the records are fetched from a memory buffer. + rr_from_pointers: + ----------------- + Used when the above is not true, UPDATE, DELETE and so forth and + SELECT's involving BLOB's. It is also used when the addon_field + buffer is not allocated due to that its size was bigger than the + session variable max_length_for_sort_data. + In this case the record data is fetched from the handler using the + saved reference using the rnd_pos handler call. + + Methods used when ref's are in a temporary file (using rr_from_tempfile) + rr_unpack_from_tempfile: + ------------------------ + Same as rr_unpack_from_buffer except that references are fetched from + temporary file. Should obviously not really happen other than in + strange configurations. + + rr_from_tempfile: + ----------------- + Same as rr_from_pointers except that references are fetched from + temporary file instead of from + rr_from_cache: + -------------- + This is a special variant of rr_from_tempfile that can be used for + handlers that is not using the HA_FAST_KEY_READ table flag. Instead + of reading the references one by one from the temporary file it reads + a set of them, sorts them and reads all of them into a buffer which + is then used for a number of subsequent calls to rr_from_cache. + It is only used for SELECT queries and a number of other conditions + on table size. + + All other accesses use either index access methods (rr_quick) or a full + table scan (rr_sequential). + rr_quick: + --------- + rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to + perform an index scan. There are loads of functionality hidden + in these quick classes. It handles all index scans of various kinds. + rr_sequential: + -------------- + This is the most basic access method of a table using rnd_init, + rnd_next and rnd_end. No indexes are used. +*/ + +bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, + SQL_SELECT *select, + SORT_INFO *filesort, + int use_record_cache, bool print_error, + bool disable_rr_cache) +{ + IO_CACHE *tempfile; + DBUG_ENTER("init_read_record"); + + const bool using_addon_fields= filesort && filesort->using_addon_fields(); + bool using_packed_sortkeys= filesort && filesort->using_packed_sortkeys(); + + bzero((char*) info,sizeof(*info)); + info->thd=thd; + info->table=table; + info->sort_info= filesort; + + if ((table->s->tmp_table == INTERNAL_TMP_TABLE) && + !using_addon_fields) + (void) table->file->extra(HA_EXTRA_MMAP); + + if (using_addon_fields) + { + info->rec_buf= filesort->addon_fields->get_addon_buf(); + info->ref_length= filesort->addon_fields->get_addon_buf_length(); + } + else + { + empty_record(table); + info->ref_length= (uint)table->file->ref_length; + } + info->select=select; + info->print_error=print_error; + info->unlock_row= rr_unlock_row; + table->status= 0; /* Rows are always found */ + + tempfile= 0; + if (select && my_b_inited(&select->file)) + tempfile= &select->file; + else if (filesort && my_b_inited(&filesort->io_cache)) + tempfile= &filesort->io_cache; + + if (tempfile && !(select && select->quick)) + { + if (using_addon_fields) + { + DBUG_PRINT("info",("using rr_from_tempfile")); + if (filesort->addon_fields->using_packed_addons()) + info->read_record_func= rr_unpack_from_tempfile; + else + info->read_record_func= rr_unpack_from_tempfile; + } + else + { + DBUG_PRINT("info",("using rr_from_tempfile")); + info->read_record_func= rr_from_tempfile; + } + + info->io_cache= tempfile; + reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0); + info->ref_pos=table->file->ref; + if (!table->file->inited) + if (unlikely(table->file->ha_rnd_init_with_error(0))) + DBUG_RETURN(1); + + /* + addon_field is checked because if we use addon fields, + it doesn't make sense to use cache - we don't read from the table + and filesort->io_cache is read sequentially + */ + if (!disable_rr_cache && + !using_addon_fields && + thd->variables.read_rnd_buff_size && + !(table->file->ha_table_flags() & HA_FAST_KEY_READ) && + (table->db_stat & HA_READ_ONLY || + table->reginfo.lock_type < TL_FIRST_WRITE) && + (ulonglong) table->s->reclength* (table->file->stats.records+ + table->file->stats.deleted) > + (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE && + info->io_cache->end_of_file/info->ref_length * table->s->reclength > + (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE && + !table->s->blob_fields && + info->ref_length <= MAX_REFLENGTH) + { + if (! init_rr_cache(thd, info)) + { + DBUG_PRINT("info",("using rr_from_cache")); + info->read_record_func= rr_from_cache; + } + } + } + else if (select && select->quick) + { + DBUG_PRINT("info",("using rr_quick")); + info->read_record_func= rr_quick; + } + else if (filesort && filesort->has_filesort_result_in_memory()) + { + DBUG_PRINT("info",("using record_pointers")); + if (unlikely(table->file->ha_rnd_init_with_error(0))) + DBUG_RETURN(1); + + info->cache_pos= filesort->record_pointers; + if (using_addon_fields) + { + DBUG_PRINT("info",("using rr_unpack_from_buffer")); + DBUG_ASSERT(filesort->sorted_result_in_fsbuf); + info->unpack_counter= 0; + + if (filesort->using_packed_addons()) + { + info->read_record_func= using_packed_sortkeys ? + rr_unpack_from_buffer : + rr_unpack_from_buffer; + } + else + { + info->read_record_func= using_packed_sortkeys ? + rr_unpack_from_buffer : + rr_unpack_from_buffer; + } + } + else + { + info->cache_end= (info->cache_pos+ + filesort->return_rows * info->ref_length); + info->read_record_func= rr_from_pointers; + } + } + else if (table->file->keyread_enabled()) + { + int error; + info->read_record_func= rr_index_first; + if (!table->file->inited && + unlikely((error= table->file->ha_index_init(table->file->keyread, 1)))) + { + if (print_error) + table->file->print_error(error, MYF(0)); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("info",("using rr_sequential")); + info->read_record_func= rr_sequential; + if (unlikely(table->file->ha_rnd_init_with_error(1))) + DBUG_RETURN(1); + /* We can use record cache if we don't update dynamic length tables */ + if (!table->no_cache && + (use_record_cache > 0 || + (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY || + !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) || + (use_record_cache < 0 && + !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE)))) + (void) table->file->extra_opt(HA_EXTRA_CACHE, + thd->variables.read_buff_size); + } + /* Condition pushdown to storage engine */ + if ((table->file->ha_table_flags() & HA_CAN_TABLE_CONDITION_PUSHDOWN) && + select && select->cond && + (select->cond->used_tables() & table->map) && + !table->file->pushed_cond) + table->file->cond_push(select->cond); + + DBUG_RETURN(0); +} /* init_read_record */ + + + +void end_read_record(READ_RECORD *info) +{ + /* free cache if used */ + free_cache(info); + if (info->table) + { + if (info->table->db_stat) // if opened + (void) info->table->file->extra(HA_EXTRA_NO_CACHE); + if (info->read_record_func != rr_quick) // otherwise quick_range does it + (void) info->table->file->ha_index_or_rnd_end(); + info->table=0; + } +} + + +void free_cache(READ_RECORD *info) +{ + if (info->cache) + { + my_free_lock(info->cache); + info->cache=0; + } +} + + +static int rr_handle_error(READ_RECORD *info, int error) +{ + if (info->thd->killed) + { + info->thd->send_kill_message(); + return 1; + } + + if (error == HA_ERR_END_OF_FILE) + error= -1; + else + { + if (info->print_error) + info->table->file->print_error(error, MYF(0)); + if (error < 0) // Fix negative BDB errno + error= 1; + } + return error; +} + + +/** Read a record from head-database. */ + +static int rr_quick(READ_RECORD *info) +{ + int tmp; + while ((tmp= info->select->quick->get_next())) + { + tmp= rr_handle_error(info, tmp); + break; + } + return tmp; +} + + +/** + Reads first row in an index scan. + + @param info Scan info + + @retval + 0 Ok + @retval + -1 End of records + @retval + 1 Error +*/ + +static int rr_index_first(READ_RECORD *info) +{ + int tmp; + // tell handler that we are doing an index scan + if ((tmp = info->table->file->prepare_index_scan())) + { + tmp= rr_handle_error(info, tmp); + return tmp; + } + + tmp= info->table->file->ha_index_first(info->record()); + info->read_record_func= rr_index; + if (tmp) + tmp= rr_handle_error(info, tmp); + return tmp; +} + + +/** + Reads last row in an index scan. + + @param info Scan info + + @retval + 0 Ok + @retval + -1 End of records + @retval + 1 Error +*/ + +static int rr_index_last(READ_RECORD *info) +{ + int tmp= info->table->file->ha_index_last(info->record()); + info->read_record_func= rr_index_desc; + if (tmp) + tmp= rr_handle_error(info, tmp); + return tmp; +} + + +/** + Reads index sequentially after first row. + + Read the next index record (in forward direction) and translate return + value. + + @param info Scan info + + @retval + 0 Ok + @retval + -1 End of records + @retval + 1 Error +*/ + +static int rr_index(READ_RECORD *info) +{ + int tmp= info->table->file->ha_index_next(info->record()); + if (tmp) + tmp= rr_handle_error(info, tmp); + return tmp; +} + + +/** + Reads index sequentially from the last row to the first. + + Read the prev index record (in backward direction) and translate return + value. + + @param info Scan info + + @retval + 0 Ok + @retval + -1 End of records + @retval + 1 Error +*/ + +static int rr_index_desc(READ_RECORD *info) +{ + int tmp= info->table->file->ha_index_prev(info->record()); + if (tmp) + tmp= rr_handle_error(info, tmp); + return tmp; +} + + +int rr_sequential(READ_RECORD *info) +{ + int tmp; + while ((tmp= info->table->file->ha_rnd_next(info->record()))) + { + tmp= rr_handle_error(info, tmp); + break; + } + return tmp; +} + + +static int rr_from_tempfile(READ_RECORD *info) +{ + int tmp; + for (;;) + { + if (my_b_read(info->io_cache,info->ref_pos,info->ref_length)) + return -1; /* End of file */ + if (!(tmp= info->table->file->ha_rnd_pos(info->record(), info->ref_pos))) + break; + /* The following is extremely unlikely to happen */ + if (tmp == HA_ERR_KEY_NOT_FOUND) + continue; + tmp= rr_handle_error(info, tmp); + break; + } + return tmp; +} /* rr_from_tempfile */ + + +/** + Read a result set record from a temporary file after sorting. + + The function first reads the next sorted record from the temporary file. + into a buffer. If a success it calls a callback function that unpacks + the fields values use in the result set from this buffer into their + positions in the regular record buffer. + + @param info Reference to the context including record + descriptors + @param Packed_addon_fields Are the addon fields packed? + This is a compile-time constant, to + avoid if (....) tests during execution. + + @retval + 0 Record successfully read. + @retval + -1 There is no record to be read anymore. +*/ + +template +static int rr_unpack_from_tempfile(READ_RECORD *info) +{ + uchar *destination= info->rec_buf; +#ifdef DBUG_TRACE + my_off_t where= my_b_tell(info->io_cache); +#endif + if (Packed_addon_fields) + { + const uint len_sz= Addon_fields::size_of_length_field; + + // First read length of the record. + if (my_b_read(info->io_cache, destination, len_sz)) + return -1; + uint res_length= Addon_fields::read_addon_length(destination); + DBUG_PRINT("info", ("rr_unpack from %llu to %p sz %u", + static_cast(where), + destination, res_length)); + DBUG_ASSERT(res_length > len_sz); + DBUG_ASSERT(info->sort_info->using_addon_fields()); + + // Then read the rest of the record. + if (my_b_read(info->io_cache, destination + len_sz, res_length - len_sz)) + return -1; /* purecov: inspected */ + } + else + { + if (my_b_read(info->io_cache, destination, info->ref_length)) + return -1; + } + + info->sort_info->unpack_addon_fields(destination); + + return 0; +} + +int rr_from_pointers(READ_RECORD *info) +{ + int tmp; + uchar *cache_pos; + + for (;;) + { + if (info->cache_pos == info->cache_end) + return -1; /* End of file */ + cache_pos= info->cache_pos; + info->cache_pos+= info->ref_length; + + if (!(tmp= info->table->file->ha_rnd_pos(info->record(), cache_pos))) + break; + + /* The following is extremely unlikely to happen */ + if (tmp == HA_ERR_KEY_NOT_FOUND) + continue; + tmp= rr_handle_error(info, tmp); + break; + } + return tmp; +} + +/** + Read a result set record from a buffer after sorting. + + The function first reads the next sorted record from the sort buffer. + If a success it calls a callback function that unpacks + the fields values use in the result set from this buffer into their + positions in the regular record buffer. + + @param info Reference to the context including record + descriptors + @param Packed_addon_fields Are the addon fields packed? + This is a compile-time constant, to + avoid if (....) tests during execution. + + @retval + 0 Record successfully read. + @retval + -1 There is no record to be read anymore. +*/ + +template +static int rr_unpack_from_buffer(READ_RECORD *info) +{ + if (info->unpack_counter == info->sort_info->return_rows) + return -1; /* End of buffer */ + + uchar *record= info->sort_info->get_sorted_record( + static_cast(info->unpack_counter)); + + uint sort_length= Packed_sort_keys ? + Sort_keys::read_sortkey_length(record): + info->sort_info->get_sort_length(); + + uchar *plen= record + sort_length; + info->sort_info->unpack_addon_fields(plen); + info->unpack_counter++; + return 0; +} + /* cacheing of records from a database */ + +static const uint STRUCT_LENGTH= 3 + MAX_REFLENGTH; + +static int init_rr_cache(THD *thd, READ_RECORD *info) +{ + uint rec_cache_size, cache_records; + DBUG_ENTER("init_rr_cache"); + + info->reclength= ALIGN_SIZE(info->table->s->reclength+1); + if (info->reclength < STRUCT_LENGTH) + info->reclength= ALIGN_SIZE(STRUCT_LENGTH); + + info->error_offset= info->table->s->reclength; + cache_records= thd->variables.read_rnd_buff_size / + (info->reclength + STRUCT_LENGTH); + rec_cache_size= cache_records * info->reclength; + info->rec_cache_size= cache_records * info->ref_length; + + // We have to allocate one more byte to use uint3korr (see comments for it) + if (cache_records <= 2 || + !(info->cache= (uchar*) my_malloc_lock(rec_cache_size + cache_records * + STRUCT_LENGTH + 1, + MYF(MY_THREAD_SPECIFIC)))) + DBUG_RETURN(1); +#ifdef HAVE_valgrind + // Avoid warnings in qsort + bzero(info->cache, rec_cache_size + cache_records * STRUCT_LENGTH + 1); +#endif + DBUG_PRINT("info", ("Allocated buffer for %d records", cache_records)); + info->read_positions=info->cache+rec_cache_size; + info->cache_pos=info->cache_end=info->cache; + DBUG_RETURN(0); +} /* init_rr_cache */ + + +static int rr_from_cache(READ_RECORD *info) +{ + uint i; + ulong length; + my_off_t rest_of_file; + int16 error; + uchar *position,*ref_position,*record_pos; + ulong record; + + for (;;) + { + if (info->cache_pos != info->cache_end) + { + if (unlikely(info->cache_pos[info->error_offset])) + { + shortget(error,info->cache_pos); + if (info->print_error) + info->table->file->print_error(error,MYF(0)); + } + else + { + error=0; + memcpy(info->record(), info->cache_pos, + (size_t) info->table->s->reclength); + } + info->cache_pos+=info->reclength; + return ((int) error); + } + length=info->rec_cache_size; + rest_of_file=info->io_cache->end_of_file - my_b_tell(info->io_cache); + if ((my_off_t) length > rest_of_file) + length= (ulong) rest_of_file; + if (!length || my_b_read(info->io_cache,info->cache,length)) + { + DBUG_PRINT("info",("Found end of file")); + return -1; /* End of file */ + } + + length/=info->ref_length; + position=info->cache; + ref_position=info->read_positions; + for (i=0 ; i < length ; i++,position+=info->ref_length) + { + memcpy(ref_position,position,(size_t) info->ref_length); + ref_position+=MAX_REFLENGTH; + int3store(ref_position,(long) i); + ref_position+=3; + } + my_qsort(info->read_positions, length, STRUCT_LENGTH, (qsort_cmp) rr_cmp); + + position=info->read_positions; + for (i=0 ; i < length ; i++) + { + memcpy(info->ref_pos,position,(size_t) info->ref_length); + position+=MAX_REFLENGTH; + record=uint3korr(position); + position+=3; + record_pos=info->cache+record*info->reclength; + if (unlikely((error= (int16) info->table->file-> + ha_rnd_pos(record_pos,info->ref_pos)))) + { + record_pos[info->error_offset]=1; + shortstore(record_pos,error); + DBUG_PRINT("error",("Got error: %d:%d when reading row", + my_errno, error)); + } + else + record_pos[info->error_offset]=0; + } + info->cache_end=(info->cache_pos=info->cache)+length*info->reclength; + } +} /* rr_from_cache */ + + +static int rr_cmp(uchar *a,uchar *b) +{ + if (a[0] != b[0]) + return (int) a[0] - (int) b[0]; + if (a[1] != b[1]) + return (int) a[1] - (int) b[1]; + if (a[2] != b[2]) + return (int) a[2] - (int) b[2]; +#if MAX_REFLENGTH == 4 + return (int) a[3] - (int) b[3]; +#else + if (a[3] != b[3]) + return (int) a[3] - (int) b[3]; + if (a[4] != b[4]) + return (int) a[4] - (int) b[4]; + if (a[5] != b[5]) + return (int) a[5] - (int) b[5]; + if (a[6] != b[6]) + return (int) a[6] - (int) b[6]; + return (int) a[7] - (int) b[7]; +#endif +} + + +/** + Copy (unpack) values appended to sorted fields from a buffer back to + their regular positions specified by the Field::ptr pointers. + + @param addon_field Array of descriptors for appended fields + @param buff Buffer which to unpack the value from + + @note + The function is supposed to be used only as a callback function + when getting field values for the sorted result set. + +*/ +template +inline void SORT_INFO::unpack_addon_fields(uchar *buff) +{ + SORT_ADDON_FIELD *addonf= addon_fields->begin(); + uchar *buff_end= buff + sort_buffer_size(); + const uchar *start_of_record= buff + addonf->offset; + + for ( ; addonf != addon_fields->end() ; addonf++) + { + Field *field= addonf->field; + if (addonf->null_bit && (addonf->null_bit & buff[addonf->null_offset])) + { + field->set_null(); + continue; + } + field->set_notnull(); + if (Packed_addon_fields) + start_of_record= field->unpack(field->ptr, start_of_record, buff_end, 0); + else + field->unpack(field->ptr, buff + addonf->offset, buff_end, 0); + } +} + + +/* + @brief + Read and unpack next record from a table + + @details + The function first reads the next record from the table. + If a success then it unpacks the values to the base table fields. + This is used by SJM scan table to unpack the values of the materialized + table to the base table fields + + @retval + 0 Record successfully read. + @retval + -1 There is no record to be read anymore. + >0 Error +*/ +int read_record_func_for_rr_and_unpack(READ_RECORD *info) +{ + int error; + if ((error= info->read_record_func_and_unpack_calls(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} diff --git a/sql/records.h b/sql/records.h new file mode 100644 index 00000000..9bc1b98f --- /dev/null +++ b/sql/records.h @@ -0,0 +1,104 @@ +#ifndef SQL_RECORDS_H +#define SQL_RECORDS_H +/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "table.h" + +struct st_join_table; +class handler; +class THD; +class SQL_SELECT; +class Copy_field; +class SORT_INFO; + +struct READ_RECORD; + +void end_read_record(READ_RECORD *info); +void free_cache(READ_RECORD *info); + +/** + A context for reading through a single table using a chosen access method: + index read, scan, etc, use of cache, etc. + + Use by: + READ_RECORD read_record; + init_read_record(&read_record, ...); + while (read_record.read_record()) + { + ... + } + end_read_record(); +*/ + +struct READ_RECORD +{ + typedef int (*Read_func)(READ_RECORD*); + typedef void (*Unlock_row_func)(st_join_table *); + typedef int (*Setup_func)(struct st_join_table*); + + TABLE *table; /* Head-form */ + Unlock_row_func unlock_row; + Read_func read_record_func; + Read_func read_record_func_and_unpack_calls; + THD *thd; + SQL_SELECT *select; + uint ref_length, reclength, rec_cache_size, error_offset; + + /** + Counting records when reading result from filesort(). + Used when filesort leaves the result in the filesort buffer. + */ + ha_rows unpack_counter; + + uchar *ref_pos; /* pointer to form->refpos */ + uchar *rec_buf; /* to read field values after filesort */ + uchar *cache,*cache_pos,*cache_end,*read_positions; + + /* + Structure storing information about sorting + */ + SORT_INFO *sort_info; + struct st_io_cache *io_cache; + bool print_error; + + int read_record() { return read_record_func(this); } + uchar *record() const { return table->record[0]; } + + /* + SJ-Materialization runtime may need to read fields from the materialized + table and unpack them into original table fields: + */ + Copy_field *copy_field; + Copy_field *copy_field_end; +public: + READ_RECORD() : table(NULL), cache(NULL) {} + ~READ_RECORD() { end_read_record(this); } +}; + +bool init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form, + SQL_SELECT *select, SORT_INFO *sort, + int use_record_cache, + bool print_errors, bool disable_rr_cache); +bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, + bool print_error, uint idx, bool reverse); + +void rr_unlock_row(st_join_table *tab); + +#endif /* SQL_RECORDS_H */ diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc new file mode 100644 index 00000000..d0285b54 --- /dev/null +++ b/sql/repl_failsafe.cc @@ -0,0 +1,250 @@ +/* + Copyright (c) 2001, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + All of the functions defined in this file which are not used (the ones to + handle failsafe) are not used; their code has not been updated for more + than one year now so should be considered as BADLY BROKEN. Do not enable + it. The used functions (to handle LOAD DATA FROM MASTER, plus some small + functions like register_slave()) are working. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_parse.h" // check_access +#ifdef HAVE_REPLICATION + +#include "repl_failsafe.h" +#include "sql_acl.h" // REPL_SLAVE_ACL +#include "sql_repl.h" +#include "slave.h" +#include "rpl_mi.h" +#include "rpl_filter.h" +#include "log_event.h" +#include + + +struct Slave_info +{ + uint32 server_id; + uint32 master_id; + char host[HOSTNAME_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1]; + char user[USERNAME_LENGTH+1]; + char password[MAX_PASSWORD_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1]; + uint16 port; +}; + + +Atomic_counter binlog_dump_thread_count; +ulong rpl_status=RPL_NULL; +mysql_mutex_t LOCK_rpl_status; + +const char *rpl_role_type[] = {"MASTER","SLAVE",NullS}; +TYPELIB rpl_role_typelib = {array_elements(rpl_role_type)-1,"", + rpl_role_type, NULL}; + +const char* rpl_status_type[]= +{ + "AUTH_MASTER","IDLE_SLAVE","ACTIVE_SLAVE","LOST_SOLDIER","TROOP_SOLDIER", + "RECOVERY_CAPTAIN","NULL",NullS +}; + +/* + All of the functions defined in this file which are not used (the ones to + handle failsafe) are not used; their code has not been updated for more than + one year now so should be considered as BADLY BROKEN. Do not enable it. + The used functions (to handle LOAD DATA FROM MASTER, plus some small + functions like register_slave()) are working. +*/ + +void change_rpl_status(ulong from_status, ulong to_status) +{ + mysql_mutex_lock(&LOCK_rpl_status); + if (rpl_status == from_status || rpl_status == RPL_ANY) + rpl_status = to_status; + mysql_mutex_unlock(&LOCK_rpl_status); +} + + +#define get_object(p, obj, msg) \ +{\ + uint len = (uint)*p++; \ + if (p + len > p_end || len >= sizeof(obj)) \ + {\ + errmsg= msg;\ + goto err; \ + }\ + ::strmake(obj, (char*) p, len); \ + p+= len; \ +}\ + + +void THD::unregister_slave() +{ + if (auto old_si= slave_info) + { + mysql_mutex_lock(&LOCK_thd_data); + slave_info= 0; + mysql_mutex_unlock(&LOCK_thd_data); + my_free(old_si); + binlog_dump_thread_count--; + } +} + + +/** + Register slave + + @return + 0 ok + @return + 1 Error. Error message sent to client +*/ + +int THD::register_slave(uchar *packet, size_t packet_length) +{ + Slave_info *si; + uchar *p= packet, *p_end= packet + packet_length; + const char *errmsg= "Wrong parameters to function register_slave"; + + if (check_access(this, PRIV_COM_REGISTER_SLAVE, any_db.str, NULL,NULL,0,0)) + return 1; + if (!(si= (Slave_info*)my_malloc(key_memory_SLAVE_INFO, sizeof(Slave_info), + MYF(MY_WME)))) + return 1; + + variables.server_id= si->server_id= uint4korr(p); + p+= 4; + get_object(p,si->host, "Failed to register slave: too long 'report-host'"); + get_object(p,si->user, "Failed to register slave: too long 'report-user'"); + get_object(p,si->password, "Failed to register slave; too long 'report-password'"); + if (p+10 > p_end) + goto err; + si->port= uint2korr(p); + p += 2; + /* + We need to by pass the bytes used in the fake rpl_recovery_rank + variable. It was removed in patch for BUG#13963. But this would + make a server with that patch unable to connect to an old master. + See: BUG#49259 + */ + // si->rpl_recovery_rank= uint4korr(p); + p += 4; + if (!(si->master_id= uint4korr(p))) + si->master_id= global_system_variables.server_id; + + if (!*si->host) + ::strmake(si->host, main_security_ctx.host_or_ip, sizeof(si->host)); + + unregister_slave(); + mysql_mutex_lock(&LOCK_thd_data); + slave_info= si; + mysql_mutex_unlock(&LOCK_thd_data); + binlog_dump_thread_count++; + return 0; + +err: + delete si; + my_message(ER_UNKNOWN_ERROR, errmsg, MYF(0)); /* purecov: inspected */ + return 1; +} + + +bool THD::is_binlog_dump_thread() +{ + mysql_mutex_lock(&LOCK_thd_data); + bool res= slave_info != NULL; + mysql_mutex_unlock(&LOCK_thd_data); + + return res; +} + + +static my_bool show_slave_hosts_callback(THD *thd, Protocol *protocol) +{ + my_bool res= FALSE; + mysql_mutex_lock(&thd->LOCK_thd_data); + if (auto si= thd->slave_info) + { + protocol->prepare_for_resend(); + protocol->store(si->server_id); + protocol->store(si->host, strlen(si->host), &my_charset_bin); + if (opt_show_slave_auth_info) + { + protocol->store(si->user, safe_strlen(si->user), &my_charset_bin); + protocol->store(si->password, safe_strlen(si->password), &my_charset_bin); + } + protocol->store((uint32) si->port); + protocol->store(si->master_id); + res= protocol->write(); + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + return res; +} + + +/** + Execute a SHOW SLAVE HOSTS statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @retval FALSE success + @retval TRUE failure +*/ +bool show_slave_hosts(THD* thd) +{ + List field_list; + Protocol *protocol= thd->protocol; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("show_slave_hosts"); + + field_list.push_back(new (mem_root) + Item_return_int(thd, "Server_id", 10, + MYSQL_TYPE_LONG), + thd->mem_root); + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Host", 20), + thd->mem_root); + if (opt_show_slave_auth_info) + { + field_list.push_back(new (mem_root) Item_empty_string(thd, "User", 20), + thd->mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Password", 20), + thd->mem_root); + } + field_list.push_back(new (mem_root) + Item_return_int(thd, "Port", 7, MYSQL_TYPE_LONG), + thd->mem_root); + field_list.push_back(new (mem_root) + Item_return_int(thd, "Master_id", 10, MYSQL_TYPE_LONG), + thd->mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + if (server_threads.iterate(show_slave_hosts_callback, protocol)) + DBUG_RETURN(true); + + my_eof(thd); + DBUG_RETURN(FALSE); +} + +#endif /* HAVE_REPLICATION */ + diff --git a/sql/repl_failsafe.h b/sql/repl_failsafe.h new file mode 100644 index 00000000..6f8bdfc5 --- /dev/null +++ b/sql/repl_failsafe.h @@ -0,0 +1,43 @@ +#ifndef REPL_FAILSAFE_INCLUDED +#define REPL_FAILSAFE_INCLUDED + +/* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef HAVE_REPLICATION + +#include "mysql.h" +#include +#include "slave.h" + +extern Atomic_counter binlog_dump_thread_count; +typedef enum {RPL_AUTH_MASTER=0,RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE, + RPL_LOST_SOLDIER,RPL_TROOP_SOLDIER, + RPL_RECOVERY_CAPTAIN,RPL_NULL /* inactive */, + RPL_ANY /* wild card used by change_rpl_status */ } RPL_STATUS; +extern ulong rpl_status; + +extern mysql_mutex_t LOCK_rpl_status; +extern mysql_cond_t COND_rpl_status; +extern TYPELIB rpl_role_typelib; +extern const char* rpl_role_type[], *rpl_status_type[]; + +void change_rpl_status(ulong from_status, ulong to_status); +int find_recovery_captain(THD* thd, MYSQL* mysql); + +bool show_slave_hosts(THD* thd); + +#endif /* HAVE_REPLICATION */ +#endif /* REPL_FAILSAFE_INCLUDED */ diff --git a/sql/replication.h b/sql/replication.h new file mode 100644 index 00000000..9a5254b4 --- /dev/null +++ b/sql/replication.h @@ -0,0 +1,567 @@ +/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef REPLICATION_H +#define REPLICATION_H + +/*************************************************************************** + NOTE: plugin locking. + + The plugin is locked on Binlog_transmit_observer::transmit_start and is + unlocked after Binlog_transmit_observer::transmit_stop. All other + master observable events happen between these two and don't lock the + plugin at all. + + Also a plugin is locked on Binlog_relay_IO_observer::thread_start + and unlocked after Binlog_relay_IO_observer::thread_stop. +***************************************************************************/ + +#include + +typedef struct st_mysql MYSQL; + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Transaction observer flags. +*/ +enum Trans_flags { + /** Transaction is a real transaction */ + TRANS_IS_REAL_TRANS = 1 +}; + +/** + Transaction observer parameter +*/ +typedef struct Trans_param { + uint32 server_id; + uint32 flags; + + /* + The latest binary log file name and position written by current + transaction, if binary log is disabled or no log event has been + written into binary log file by current transaction (events + written into transaction log cache are not counted), these two + member will be zero. + */ + const char *log_file; + my_off_t log_pos; +} Trans_param; + +/** + Observes and extends transaction execution +*/ +typedef struct Trans_observer { + uint32 len; + + /** + This callback is called after transaction commit + + This callback is called right after commit to storage engines for + transactional tables. + + For non-transactional tables, this is called at the end of the + statement, before sending statement status, if the statement + succeeded. + + @note The return value is currently ignored by the server. + @note This hook is called wo/ any global mutex held + + @param param The parameter for transaction observers + + @retval 0 Success + @retval 1 Failure + */ + int (*after_commit)(Trans_param *param); + + /** + This callback is called after transaction rollback + + This callback is called right after rollback to storage engines + for transactional tables. + + For non-transactional tables, this is called at the end of the + statement, before sending statement status, if the statement + failed. + + @note The return value is currently ignored by the server. + + @param param The parameter for transaction observers + + @note This hook is called wo/ any global mutex held + + @retval 0 Success + @retval 1 Failure + */ + int (*after_rollback)(Trans_param *param); +} Trans_observer; + +/** + Binlog storage flags +*/ +enum Binlog_storage_flags { + /** Binary log was sync:ed */ + BINLOG_STORAGE_IS_SYNCED = 1, + + /** First(or alone) in a group commit */ + BINLOG_GROUP_COMMIT_LEADER = 2, + + /** Last(or alone) in a group commit */ + BINLOG_GROUP_COMMIT_TRAILER = 4 +}; + +/** + Binlog storage observer parameters + */ +typedef struct Binlog_storage_param { + uint32 server_id; +} Binlog_storage_param; + +/** + Observe binlog logging storage +*/ +typedef struct Binlog_storage_observer { + uint32 len; + + /** + This callback is called after binlog has been flushed + + This callback is called after cached events have been flushed to + binary log file. Whether the binary log file is synchronized to + disk is indicated by the bit BINLOG_STORAGE_IS_SYNCED in @a flags. + + @note: this hook is called with LOCK_log mutex held + + @param param Observer common parameter + @param log_file Binlog file name been updated + @param log_pos Binlog position after update + @param flags flags for binlog storage + + @retval 0 Success + @retval 1 Failure + */ + int (*after_flush)(Binlog_storage_param *param, + const char *log_file, my_off_t log_pos, + uint32 flags); + + /** + This callback is called after binlog has been synced + + This callback is called after events flushed to disk has been sync:ed + ("group committed"). + + @note: this hook is called with LOCK_after_binlog_sync mutex held + + @param param Observer common parameter + @param log_file Binlog file name been updated + @param log_pos Binlog position after update + @param flags flags for binlog storage + + @retval 0 Success + @retval 1 Failure + */ + int (*after_sync)(Binlog_storage_param *param, + const char *log_file, my_off_t log_pos, + uint32 flags); +} Binlog_storage_observer; + +/** + Replication binlog transmitter (binlog dump) observer parameter. +*/ +typedef struct Binlog_transmit_param { + uint32 server_id; + uint32 flags; +} Binlog_transmit_param; + +/** + Observe and extends the binlog dumping thread. +*/ +typedef struct Binlog_transmit_observer { + uint32 len; + + /** + This callback is called when binlog dumping starts + + + @param param Observer common parameter + @param log_file Binlog file name to transmit from + @param log_pos Binlog position to transmit from + + @retval 0 Success + @retval 1 Failure + */ + int (*transmit_start)(Binlog_transmit_param *param, + const char *log_file, my_off_t log_pos); + + /** + This callback is called when binlog dumping stops + + @param param Observer common parameter + + @retval 0 Success + @retval 1 Failure + */ + int (*transmit_stop)(Binlog_transmit_param *param); + + /** + This callback is called to reserve bytes in packet header for event transmission + + This callback is called when resetting transmit packet header to + reserve bytes for this observer in packet header. + + The @a header buffer is allocated by the server code, and @a size + is the size of the header buffer. Each observer can only reserve + a maximum size of @a size in the header. + + @param param Observer common parameter + @param header Pointer of the header buffer + @param size Size of the header buffer + @param len Header length reserved by this observer + + @retval 0 Success + @retval 1 Failure + */ + int (*reserve_header)(Binlog_transmit_param *param, + unsigned char *header, + unsigned long size, + unsigned long *len); + + /** + This callback is called before sending an event packet to slave + + @param param Observer common parameter + @param packet Binlog event packet to send + @param len Length of the event packet + @param log_file Binlog file name of the event packet to send + @param log_pos Binlog position of the event packet to send + + @retval 0 Success + @retval 1 Failure + */ + int (*before_send_event)(Binlog_transmit_param *param, + unsigned char *packet, unsigned long len, + const char *log_file, my_off_t log_pos ); + + /** + This callback is called after sending an event packet to slave + + @param param Observer common parameter + @param event_buf Binlog event packet buffer sent + @param len length of the event packet buffer + + @retval 0 Success + @retval 1 Failure + */ + int (*after_send_event)(Binlog_transmit_param *param, + const char *event_buf, unsigned long len); + + /** + This callback is called after resetting master status + + This is called when executing the command RESET MASTER, and is + used to reset status variables added by observers. + + @param param Observer common parameter + + @retval 0 Success + @retval 1 Failure + */ + int (*after_reset_master)(Binlog_transmit_param *param); +} Binlog_transmit_observer; + +/** + Binlog relay IO flags +*/ +enum Binlog_relay_IO_flags { + /** Binary relay log was sync:ed */ + BINLOG_RELAY_IS_SYNCED = 1 +}; + + +/** + Replication binlog relay IO observer parameter +*/ +typedef struct Binlog_relay_IO_param { + uint32 server_id; + + /* Master host, user and port */ + char *host; + char *user; + unsigned int port; + + char *master_log_name; + my_off_t master_log_pos; + + MYSQL *mysql; /* the connection to master */ +} Binlog_relay_IO_param; + +/** + Observes and extends the service of slave IO thread. +*/ +typedef struct Binlog_relay_IO_observer { + uint32 len; + + /** + This callback is called when slave IO thread starts + + @param param Observer common parameter + + @retval 0 Success + @retval 1 Failure + */ + int (*thread_start)(Binlog_relay_IO_param *param); + + /** + This callback is called when slave IO thread stops + + @param param Observer common parameter + + @retval 0 Success + @retval 1 Failure + */ + int (*thread_stop)(Binlog_relay_IO_param *param); + + /** + This callback is called before slave requesting binlog transmission from master + + This is called before slave issuing BINLOG_DUMP command to master + to request binlog. + + @param param Observer common parameter + @param flags binlog dump flags + + @retval 0 Success + @retval 1 Failure + */ + int (*before_request_transmit)(Binlog_relay_IO_param *param, uint32 flags); + + /** + This callback is called after read an event packet from master + + @param param Observer common parameter + @param packet The event packet read from master + @param len Length of the event packet read from master + @param event_buf The event packet return after process + @param event_len The length of event packet return after process + + @retval 0 Success + @retval 1 Failure + */ + int (*after_read_event)(Binlog_relay_IO_param *param, + const char *packet, unsigned long len, + const char **event_buf, unsigned long *event_len); + + /** + This callback is called after written an event packet to relay log + + @param param Observer common parameter + @param event_buf Event packet written to relay log + @param event_len Length of the event packet written to relay log + @param flags flags for relay log + + @retval 0 Success + @retval 1 Failure + */ + int (*after_queue_event)(Binlog_relay_IO_param *param, + const char *event_buf, unsigned long event_len, + uint32 flags); + + /** + This callback is called after reset slave relay log IO status + + @param param Observer common parameter + + @retval 0 Success + @retval 1 Failure + */ + int (*after_reset_slave)(Binlog_relay_IO_param *param); +} Binlog_relay_IO_observer; + + +/** + Register a transaction observer + + @param observer The transaction observer to register + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer already exists +*/ +int register_trans_observer(Trans_observer *observer, void *p); + +/** + Unregister a transaction observer + + @param observer The transaction observer to unregister + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer not exists +*/ +int unregister_trans_observer(Trans_observer *observer, void *p); + +/** + Register a binlog storage observer + + @param observer The binlog storage observer to register + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer already exists +*/ +int register_binlog_storage_observer(Binlog_storage_observer *observer, void *p); + +/** + Unregister a binlog storage observer + + @param observer The binlog storage observer to unregister + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer not exists +*/ +int unregister_binlog_storage_observer(Binlog_storage_observer *observer, void *p); + +/** + Register a binlog transmit observer + + @param observer The binlog transmit observer to register + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer already exists +*/ +int register_binlog_transmit_observer(Binlog_transmit_observer *observer, void *p); + +/** + Unregister a binlog transmit observer + + @param observer The binlog transmit observer to unregister + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer not exists +*/ +int unregister_binlog_transmit_observer(Binlog_transmit_observer *observer, void *p); + +/** + Register a binlog relay IO (slave IO thread) observer + + @param observer The binlog relay IO observer to register + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer already exists +*/ +int register_binlog_relay_io_observer(Binlog_relay_IO_observer *observer, void *p); + +/** + Unregister a binlog relay IO (slave IO thread) observer + + @param observer The binlog relay IO observer to unregister + @param p pointer to the internal plugin structure + + @retval 0 Success + @retval 1 Observer not exists +*/ +int unregister_binlog_relay_io_observer(Binlog_relay_IO_observer *observer, void *p); + +/** + Connect to master + + This function can only used in the slave I/O thread context, and + will use the same master information to do the connection. + + @code + MYSQL *mysql = mysql_init(NULL); + if (rpl_connect_master(mysql)) + { + // do stuff with the connection + } + mysql_close(mysql); // close the connection + @endcode + + @param mysql address of MYSQL structure to use, pass NULL will + create a new one + + @return address of MYSQL structure on success, NULL on failure +*/ +MYSQL *rpl_connect_master(MYSQL *mysql); + +/** + Get the value of user variable as an integer. + + This function will return the value of variable @a name as an + integer. If the original value of the variable is not an integer, + the value will be converted into an integer. + + @param name user variable name + @param value pointer to return the value + @param null_value if not NULL, the function will set it to true if + the value of variable is null, set to false if not + + @retval 0 Success + @retval 1 Variable not found +*/ +int get_user_var_int(const char *name, + long long int *value, int *null_value); + +/** + Get the value of user variable as a double precision float number. + + This function will return the value of variable @a name as real + number. If the original value of the variable is not a real number, + the value will be converted into a real number. + + @param name user variable name + @param value pointer to return the value + @param null_value if not NULL, the function will set it to true if + the value of variable is null, set to false if not + + @retval 0 Success + @retval 1 Variable not found +*/ +int get_user_var_real(const char *name, + double *value, int *null_value); + +/** + Get the value of user variable as a string. + + This function will return the value of variable @a name as + string. If the original value of the variable is not a string, + the value will be converted into a string. + + @param name user variable name + @param value pointer to the value buffer + @param len length of the value buffer + @param precision precision of the value if it is a float number + @param null_value if not NULL, the function will set it to true if + the value of variable is null, set to false if not + + @retval 0 Success + @retval 1 Variable not found +*/ +int get_user_var_str(const char *name, + char *value, unsigned long len, + unsigned int precision, int *null_value); + + + +#ifdef __cplusplus +} +#endif +#endif /* REPLICATION_H */ diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc new file mode 100644 index 00000000..d85bed96 --- /dev/null +++ b/sql/rowid_filter.cc @@ -0,0 +1,687 @@ +/* + Copyright (c) 2018, 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mariadb.h" +#include "table.h" +#include "sql_class.h" +#include "opt_range.h" +#include "rowid_filter.h" +#include "sql_select.h" +#include "opt_trace.h" + + +inline +double Range_rowid_filter_cost_info::lookup_cost( + Rowid_filter_container_type cont_type) +{ + switch (cont_type) { + case SORTED_ARRAY_CONTAINER: + return log(est_elements)*0.01; + default: + DBUG_ASSERT(0); + return 0; + } +} + + +/** + @brief + The average gain in cost per row to use the range filter with this cost info +*/ + +inline +double Range_rowid_filter_cost_info::avg_access_and_eval_gain_per_row( + Rowid_filter_container_type cont_type) +{ + return (1+1.0/TIME_FOR_COMPARE) * (1 - selectivity) - + lookup_cost(cont_type); +} + + +/** + @brief + The average adjusted gain in cost per row of using the filter + + @param access_cost_factor the adjusted cost of access a row + + @details + The current code to estimate the cost of a ref access is quite inconsistent: + in some cases the effect of page buffers is taken into account, for others + just the engine dependent read_time() is employed. That's why the average + cost of one random seek might differ from 1. + The parameter access_cost_factor can be considered as the cost of a random + seek that is used for the given ref access. Changing the cost of a random + seek we have to change the first coefficient in the linear formula by which + we calculate the gain of usage the given filter for a_adj. This function + calculates the value of a_adj. + + @note + Currently we require that access_cost_factor should be a number between + 0.0 and 1.0 +*/ + +inline +double Range_rowid_filter_cost_info::avg_adjusted_gain_per_row( + double access_cost_factor) +{ + return a - (1 - access_cost_factor) * (1 - selectivity); +} + + +/** + @brief + Set the parameters used to choose the filter with the best adjusted gain + + @note + This function must be called before the call of get_adjusted_gain() + for the given filter. +*/ + +inline void +Range_rowid_filter_cost_info::set_adjusted_gain_param(double access_cost_factor) +{ + a_adj= avg_adjusted_gain_per_row(access_cost_factor); + cross_x_adj= b / a_adj; +} + + +/** + @brief + Initialize the cost info structure for a range filter + + @param cont_type The type of the container of the range filter + @param tab The table for which the range filter is evaluated + @param idx The index used to create this range filter +*/ + +void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type, + TABLE *tab, uint idx) +{ + DBUG_ASSERT(tab->opt_range_keys.is_set(idx)); + + container_type= cont_type; + table= tab; + key_no= idx; + est_elements= (ulonglong) table->opt_range[key_no].rows; + b= build_cost(container_type); + selectivity= est_elements/((double) table->stat_records()); + a= avg_access_and_eval_gain_per_row(container_type); + if (a > 0) + cross_x= b/a; + else + cross_x= b+1; + abs_independent.clear_all(); +} + + +/** + @brief + Return the cost of building a range filter of a certain type +*/ + +double +Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type) +{ + double cost= 0; + DBUG_ASSERT(table->opt_range_keys.is_set(key_no)); + + cost+= table->opt_range[key_no].index_only_cost; + + switch (cont_type) { + + case SORTED_ARRAY_CONTAINER: + cost+= ARRAY_WRITE_COST * est_elements; /* cost filling the container */ + cost+= ARRAY_SORT_C * est_elements * log(est_elements); /* sorting cost */ + break; + default: + DBUG_ASSERT(0); + } + + return cost; +} + + +Rowid_filter_container *Range_rowid_filter_cost_info::create_container() +{ + THD *thd= table->in_use; + uint elem_sz= table->file->ref_length; + Rowid_filter_container *res= 0; + + switch (container_type) { + case SORTED_ARRAY_CONTAINER: + res= new (thd->mem_root) Rowid_filter_sorted_array((uint) est_elements, + elem_sz); + break; + default: + DBUG_ASSERT(0); + } + return res; +} + + +static +int compare_range_rowid_filter_cost_info_by_a( + Range_rowid_filter_cost_info **filter_ptr_1, + Range_rowid_filter_cost_info **filter_ptr_2) +{ + double diff= (*filter_ptr_2)->get_a() - (*filter_ptr_1)->get_a(); + return (diff < 0 ? -1 : (diff > 0 ? 1 : 0)); +} + + +/** + @brief + Prepare the array with cost info on range filters to be used by optimizer + + @details + The function removes the array of cost info on range filters the elements + for those range filters that won't be ever chosen as the best filter, no + matter what index will be used to access the table and at what step the + table will be joined. +*/ + +void TABLE::prune_range_rowid_filters() +{ + /* + For the elements of the array with cost info on range filters + build a bit matrix of absolutely independent elements. + Two elements are absolutely independent if they such indexes that + there is no other index that overlaps both of them or is constraint + correlated with both of them. Use abs_independent key maps to store + the elements if this bit matrix. + */ + + Range_rowid_filter_cost_info **filter_ptr_1= range_rowid_filter_cost_info_ptr; + for (uint i= 0; + i < range_rowid_filter_cost_info_elems; + i++, filter_ptr_1++) + { + uint key_no= (*filter_ptr_1)->key_no; + Range_rowid_filter_cost_info **filter_ptr_2= filter_ptr_1 + 1; + for (uint j= i+1; + j < range_rowid_filter_cost_info_elems; + j++, filter_ptr_2++) + { + key_map map_1= key_info[key_no].overlapped; + map_1.merge(key_info[key_no].constraint_correlated); + key_map map_2= key_info[(*filter_ptr_2)->key_no].overlapped; + map_2.merge(key_info[(*filter_ptr_2)->key_no].constraint_correlated); + map_1.intersect(map_2); + if (map_1.is_clear_all()) + { + (*filter_ptr_1)->abs_independent.set_bit((*filter_ptr_2)->key_no); + (*filter_ptr_2)->abs_independent.set_bit(key_no); + } + } + } + + /* Sort the array range_filter_cost_info by 'a' in descending order */ + my_qsort(range_rowid_filter_cost_info_ptr, + range_rowid_filter_cost_info_elems, + sizeof(Range_rowid_filter_cost_info *), + (qsort_cmp) compare_range_rowid_filter_cost_info_by_a); + + /* + For each element check whether it is created for the filter that + can be ever chosen as the best one. If it's not the case remove + from the array. Otherwise put it in the array in such a place + that all already checked elements left the array are ordered by + cross_x. + */ + + Range_rowid_filter_cost_info **cand_filter_ptr= + range_rowid_filter_cost_info_ptr; + for (uint i= 0; + i < range_rowid_filter_cost_info_elems; + i++, cand_filter_ptr++) + { + bool is_pruned= false; + Range_rowid_filter_cost_info **usable_filter_ptr= + range_rowid_filter_cost_info_ptr; + key_map abs_indep; + abs_indep.clear_all(); + for (uint j= 0; j < i; j++, usable_filter_ptr++) + { + if ((*cand_filter_ptr)->cross_x >= (*usable_filter_ptr)->cross_x) + { + if (abs_indep.is_set((*usable_filter_ptr)->key_no)) + { + /* + The following is true here for the element e being checked: + There are at 2 elements e1 and e2 among already selected such that + e1.cross_x < e.cross_x and e1.a > e.a + and + e2.cross_x < e_cross_x and e2.a > e.a, + i.e. the range filters f1, f2 of both e1 and e2 always promise + better gains then the range filter of e. + As e1 and e2 are absolutely independent one of the range filters + f1, f2 will be always a better choice than f1 no matter what index + is chosen to access the table. Because of this the element e + can be safely removed from the array. + */ + + is_pruned= true; + break; + } + abs_indep.merge((*usable_filter_ptr)->abs_independent); + } + else + { + /* + Move the element being checked to the proper position to have all + elements that have been already checked to be sorted by cross_x + */ + Range_rowid_filter_cost_info *moved= *cand_filter_ptr; + memmove(usable_filter_ptr+1, usable_filter_ptr, + sizeof(Range_rowid_filter_cost_info *) * (i-j-1)); + *usable_filter_ptr= moved; + } + } + if (is_pruned) + { + /* Remove the checked element from the array */ + memmove(cand_filter_ptr, cand_filter_ptr+1, + sizeof(Range_rowid_filter_cost_info *) * + (range_rowid_filter_cost_info_elems - 1 - i)); + range_rowid_filter_cost_info_elems--; + } + } +} + + +/** + @brief + Return maximum number of elements that a container allowed to have + */ + +static ulonglong +get_max_range_rowid_filter_elems_for_table( + THD *thd, TABLE *tab, + Rowid_filter_container_type cont_type) +{ + switch (cont_type) { + case SORTED_ARRAY_CONTAINER : + return thd->variables.max_rowid_filter_size/tab->file->ref_length; + default : + DBUG_ASSERT(0); + return 0; + } +} + + +/** + @brief + Prepare info on possible range filters used by optimizer + + @param table The thread handler + + @details + The function first selects the indexes of the table that potentially + can be used for range filters and allocates an array of the objects + of the Range_rowid_filter_cost_info type to store cost info on + possible range filters and an array of pointers to these objects. + The latter is created for easy sorting of the objects with cost info + by different sort criteria. Then the function initializes the allocated + array with cost info for each possible range filter. After this + the function calls the method TABLE::prune_range_rowid_filters(). + The method removes the elements of the array for the filters that + promise less gain then others remaining in the array in any situation + and optimizes the order of the elements for faster choice of the best + range filter. +*/ + +void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd) +{ + uint key_no; + key_map usable_range_filter_keys; + usable_range_filter_keys.clear_all(); + key_map::Iterator it(opt_range_keys); + + if (file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + return; // Cannot create filtering + + /* + From all indexes that can be used for range accesses select only such that + - range filter pushdown is supported by the engine for them (1) + - they are not clustered primary (2) + - the range filter containers for them are not too large (3) + */ + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + if (!(file->index_flags(key_no, 0, 1) & HA_DO_RANGE_FILTER_PUSHDOWN)) // !1 + continue; + if (file->is_clustering_key(key_no)) // !2 + continue; + if (opt_range[key_no].rows > + get_max_range_rowid_filter_elems_for_table(thd, this, + SORTED_ARRAY_CONTAINER)) // !3 + continue; + usable_range_filter_keys.set_bit(key_no); + } + + /* + Allocate an array of objects to store cost info for the selected filters + and allocate an array of pointers to these objects + */ + + range_rowid_filter_cost_info_elems= usable_range_filter_keys.bits_set(); + if (!range_rowid_filter_cost_info_elems) + return; + + range_rowid_filter_cost_info_ptr= + (Range_rowid_filter_cost_info **) + thd->calloc(sizeof(Range_rowid_filter_cost_info *) * + range_rowid_filter_cost_info_elems); + range_rowid_filter_cost_info= + new (thd->mem_root) + Range_rowid_filter_cost_info[range_rowid_filter_cost_info_elems]; + if (!range_rowid_filter_cost_info_ptr || !range_rowid_filter_cost_info) + { + range_rowid_filter_cost_info_elems= 0; + return; + } + + /* Fill the allocated array with cost info on the selected range filters */ + + Range_rowid_filter_cost_info **curr_ptr= range_rowid_filter_cost_info_ptr; + Range_rowid_filter_cost_info *curr_filter_cost_info= + range_rowid_filter_cost_info; + + key_map::Iterator li(usable_range_filter_keys); + while ((key_no= li++) != key_map::Iterator::BITMAP_END) + { + *curr_ptr= curr_filter_cost_info; + curr_filter_cost_info->init(SORTED_ARRAY_CONTAINER, this, key_no); + curr_ptr++; + curr_filter_cost_info++; + } + + prune_range_rowid_filters(); + + if (unlikely(thd->trace_started())) + trace_range_rowid_filters(thd); +} + + +void TABLE::trace_range_rowid_filters(THD *thd) const +{ + if (!range_rowid_filter_cost_info_elems) + return; + + Range_rowid_filter_cost_info **p= range_rowid_filter_cost_info_ptr; + Range_rowid_filter_cost_info **end= p + range_rowid_filter_cost_info_elems; + + Json_writer_object js_obj(thd); + js_obj.add_table_name(this); + Json_writer_array js_arr(thd, "rowid_filters"); + + for (; p < end; p++) + (*p)->trace_info(thd); +} + + +void Range_rowid_filter_cost_info::trace_info(THD *thd) +{ + Json_writer_object js_obj(thd); + js_obj.add("key", table->key_info[key_no].name); + js_obj.add("build_cost", b); + js_obj.add("rows", est_elements); +} + +/** + @brief + Choose the best range filter for the given access of the table + + @param access_key_no The index by which the table is accessed + @param records The estimated total number of key tuples with this access + @param access_cost_factor the cost of a random seek to access the table + + @details + The function looks through the array of cost info for range filters + and chooses the element for the range filter that promise the greatest + gain with the the ref or range access of the table by access_key_no. + As the array is sorted by cross_x in ascending order the function stops + the look through as soon as it reaches the first element with + cross_x_adj > records because the range filter for this element and the + range filters for all remaining elements do not promise positive gains. + + @note + It is easy to see that if cross_x[i] > cross_x[j] then + cross_x_adj[i] > cross_x_adj[j] + + @retval Pointer to the cost info for the range filter that promises + the greatest gain, NULL if there is no such range filter +*/ + +Range_rowid_filter_cost_info * +TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, + double records, + double access_cost_factor) +{ + if (range_rowid_filter_cost_info_elems == 0 || + covering_keys.is_set(access_key_no)) + return 0; + + // Disallow use of range filter if the key contains partially-covered + // columns. + for (uint i= 0; i < key_info[access_key_no].usable_key_parts; i++) + { + if (key_info[access_key_no].key_part[i].field->type() == MYSQL_TYPE_BLOB) + return 0; + } + + /* + Currently we do not support usage of range filters if the table + is accessed by the clustered primary key. It does not make sense + if a full key is used. If the table is accessed by a partial + clustered primary key it would, but the current InnoDB code does not + allow it. Later this limitation will be lifted + */ + if (file->is_clustering_key(access_key_no)) + return 0; + + Range_rowid_filter_cost_info *best_filter= 0; + double best_filter_gain= 0; + + key_map no_filter_usage= key_info[access_key_no].overlapped; + no_filter_usage.merge(key_info[access_key_no].constraint_correlated); + for (uint i= 0; i < range_rowid_filter_cost_info_elems ; i++) + { + double curr_gain = 0; + Range_rowid_filter_cost_info *filter= range_rowid_filter_cost_info_ptr[i]; + + /* + Do not use a range filter that uses an in index correlated with + the index by which the table is accessed + */ + if ((filter->key_no == access_key_no) || + no_filter_usage.is_set(filter->key_no)) + continue; + + filter->set_adjusted_gain_param(access_cost_factor); + + if (records < filter->cross_x_adj) + { + /* Does not make sense to look through the remaining filters */ + break; + } + + curr_gain= filter->get_adjusted_gain(records); + if (best_filter_gain < curr_gain) + { + best_filter_gain= curr_gain; + best_filter= filter; + } + } + return best_filter; +} + + +/** + @brief + Fill the range rowid filter performing the associated range index scan + + @details + This function performs the range index scan associated with this + range filter and place into the filter the rowids / primary keys + read from key tuples when doing this scan. + @retval + Rowid_filter::SUCCESS on success + Rowid_filter::NON_FATAL_ERROR the error which does not require transaction + rollback + Rowid_filter::FATAL_ERROR the error which does require transaction + rollback + + @note + The function assumes that the quick select object to perform + the index range scan has been already created. + + @note + Currently the same table handler is used to access the joined table + and to perform range index scan filling the filter. + In the future two different handlers will be used for this + purposes to facilitate a lazy building of the filter. +*/ + +Rowid_filter::build_return_code Range_rowid_filter::build() +{ + build_return_code rc= SUCCESS; + handler *file= table->file; + THD *thd= table->in_use; + QUICK_RANGE_SELECT* quick= (QUICK_RANGE_SELECT*) select->quick; + + uint table_status_save= table->status; + Item *pushed_idx_cond_save= file->pushed_idx_cond; + uint pushed_idx_cond_keyno_save= file->pushed_idx_cond_keyno; + bool in_range_check_pushed_down_save= file->in_range_check_pushed_down; + + table->status= 0; + file->pushed_idx_cond= 0; + file->pushed_idx_cond_keyno= MAX_KEY; + file->in_range_check_pushed_down= false; + + /* We're going to just read rowids / primary keys */ + table->prepare_for_position(); + + table->file->ha_start_keyread(quick->index); + + if (quick->init() || quick->reset()) + rc= FATAL_ERROR; + else + { + for (;;) + { + int quick_get_next_result= quick->get_next(); + if (thd->killed) + { + rc= FATAL_ERROR; + break; + } + if (quick_get_next_result != 0) + { + rc= (quick_get_next_result == HA_ERR_END_OF_FILE ? SUCCESS + : FATAL_ERROR); + /* + The error state has been set by file->print_error(res, MYF(0)) call + inside quick->get_next() call, in Mrr_simple_index_reader::get_next() + */ + DBUG_ASSERT(rc == SUCCESS || thd->is_error()); + break; + } + file->position(quick->record); + if (container->add(NULL, (char *) file->ref)) + { + rc= NON_FATAL_ERROR; + break; + } + else + tracker->increment_container_elements_count(); + } + } + + quick->range_end(); + table->file->ha_end_keyread(); + + table->status= table_status_save; + file->pushed_idx_cond= pushed_idx_cond_save; + file->pushed_idx_cond_keyno= pushed_idx_cond_keyno_save; + file->in_range_check_pushed_down= in_range_check_pushed_down_save; + tracker->report_container_buff_size(table->file->ref_length); + + if (rc == SUCCESS) + table->file->rowid_filter_is_active= true; + return rc; +} + + +/** + @brief + Binary search in the sorted array of a rowid filter + + @param ctxt context of the search + @parab elem rowid / primary key to look for + + @details + The function looks for the rowid / primary key ' elem' in this container + assuming that ctxt contains a pointer to the TABLE structure created + for the table to whose row elem refers to. + + @retval + true elem is found in the container + false otherwise +*/ + +bool Rowid_filter_sorted_array::check(void *ctxt, char *elem) +{ + TABLE *table= (TABLE *) ctxt; + if (!is_checked) + { + refpos_container.sort(refpos_order_cmp, (void *) (table->file)); + is_checked= true; + } + int l= 0; + int r= refpos_container.elements()-1; + while (l <= r) + { + int m= (l + r) / 2; + int cmp= refpos_order_cmp((void *) (table->file), + refpos_container.get_pos(m), elem); + if (cmp == 0) + return true; + if (cmp < 0) + l= m + 1; + else + r= m-1; + } + return false; +} + + +Range_rowid_filter::~Range_rowid_filter() +{ + delete container; + container= 0; + if (select) + { + if (select->quick) + { + delete select->quick; + select->quick= 0; + } + delete select; + select= 0; + } +} diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h new file mode 100644 index 00000000..b1bf4890 --- /dev/null +++ b/sql/rowid_filter.h @@ -0,0 +1,485 @@ +/* + Copyright (c) 2018, 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef ROWID_FILTER_INCLUDED +#define ROWID_FILTER_INCLUDED + + +#include "mariadb.h" +#include "sql_array.h" + +/* + + What rowid / primary filters are + -------------------------------- + + Consider a join query Q of the form + SELECT * FROM T1, ... , Tk WHERE P. + + For any of the table reference Ti(Q) from the from clause of Q different + rowid / primary key filters (pk-filters for short) can be built. + A pk-filter F built for Ti(Q) is a set of rowids / primary keys of Ti + F= {pk1,...,pkN} such that for any row r=r1||...||rk from the result set of Q + ri's rowid / primary key pk(ri) is contained in F. + + When pk-filters are useful + -------------------------- + + If building a pk-filter F for Ti(Q )is not too costly and its cardinality #F + is much less than the cardinality of T - #T then using the pk-filter when + executing Q might be quite beneficial. + + Let r be a random row from Ti. Let s(F) be the probability that pk(r) + belongs to F. Let BC(F) be the cost of building F. + + Suppose that the optimizer has chosen for Q a plan with this join order + T1 => ... Tk and that the table Ti is accessed by a ref access using index I. + Let K = {k1,...,kM} be the set of all rowid/primary keys values used to access + rows of Ti when looking for matches in this table.to join Ti by index I. + + Let's assume that two set sets K and F are uncorrelated. With this assumption + if before accessing data from Ti by the rowid / primary key k we first + check whether k is in F then we can expect saving on M*(1-s(S)) accesses of + data rows from Ti. If we can guarantee that test whether k is in F is + relatively cheap then we can gain a lot assuming that BC(F) is much less + then the cost of fetching M*(1-s(S)) records from Ti and following + evaluation of conditions pushed into Ti. + + Making pk-filter test cheap + --------------------------- + + If the search structure to test whether an element is in F can be fully + placed in RAM then this test is expected to be be much cheaper than a random + access of a record from Ti. We'll consider two search structures for + pk-filters: ordered array and bloom filter. Ordered array is easy to + implement, but it's space consuming. If a filter contains primary keys + then at least space for each primary key from the filter must be allocated + in the search structure. On a the opposite a bloom filter requires a + fixed number of bits and this number does not depend on the cardinality + of the pk-filter (10 bits per element will serve pk-filter of any size). + +*/ + +/* + + How and when the optimizer builds and uses range rowid filters + -------------------------------------------------------------- + + 1. In make_join_statistics() + for each join table s + after the call of get_quick_record_count() + the TABLE::method init_cost_info_for_usable_range_rowid_filters() + is called + The method build an array of Range_rowid_filter_cost_info elements + containing the cost info on possible range filters for s->table. + The array is optimized for further usage. + + 2. For each partial join order when the optimizer considers joining + table s to this partial join + In the function best_access_path() + a. When evaluating a ref access r by index idx to join s + the optimizer estimates the effect of usage of each possible + range filter f and chooses one with the best gain. The gain + is taken into account when the cost of thr ref access r is + calculated. If it turns out that this is the best ref access + to join s then the info about the chosen filter together + with the info on r is remembered in the corresponding element + of the array of POSITION structures. + [We evaluate every pair (ref access, range_filter) rather then + every pair (best ref access, range filter) because if the index + ref_idx used for ref access r correlates with the index rf_idx + used by the filter f then the pair (r,f) is not evaluated + at all as we don't know how to estimate the effect of correlation + between ref_idx and rf_idx.] + b. When evaluating the best range access to join table s the + optimizer estimates the effect of usage of each possible + range filter f and chooses one with the best gain. + [Here we should have evaluated every pair (range access, + range filter) as well, but it's not done yet.] + + 3. When the cheapest execution plan has been chosen and after the + call of JOIN::get_best_combination() + The method JOIN::make_range_rowid_filters() is called + For each range rowid filter used in the chosen execution plan + the method creates a quick select object to be able to perform + index range scan to fill the filter at the execution stage. + The method also creates Range_rowid_filter objects that are + used at the execution stage. + + 4. Just before the execution stage + The method JOIN::init_range_rowid_filters() is called. + For each join table s that is to be accessed with usage of a range + filter the method allocates containers for the range filter and + it lets the engine know that the filter will be used when + accessing s. + + 5. At the execution stage + In the function sub_select() just before the first access of a join + table s employing a range filter + The method JOIN_TAB::build_range_rowid_filter_if_needed() is called + The method fills the filter using the quick select created by + JOIN::make_range_rowid_filters(). + + 6. The accessed key tuples are checked against the filter within the engine + using the info pushed into it. + +*/ + +struct TABLE; +class SQL_SELECT; +class Rowid_filter_container; +class Range_rowid_filter_cost_info; + +/* Cost to write rowid into array */ +#define ARRAY_WRITE_COST 0.005 +/* Factor used to calculate cost of sorting rowids in array */ +#define ARRAY_SORT_C 0.01 +/* Cost to evaluate condition */ +#define COST_COND_EVAL 0.2 + +typedef enum +{ + SORTED_ARRAY_CONTAINER, + BLOOM_FILTER_CONTAINER // Not used yet +} Rowid_filter_container_type; + +/** + @class Rowid_filter_container + + The interface for different types of containers to store info on the set + of rowids / primary keys that defines a pk-filter. + + There will be two implementations of this abstract class. + - sorted array + - bloom filter +*/ + +class Rowid_filter_container : public Sql_alloc +{ +public: + + virtual Rowid_filter_container_type get_type() = 0; + + /* Allocate memory for the container */ + virtual bool alloc() = 0; + + /* + @brief Add info on a rowid / primary to the container + @param ctxt The context info (opaque) + @param elem The rowid / primary key to be added to the container + @retval true if elem is successfully added + */ + virtual bool add(void *ctxt, char *elem) = 0; + + /* + @brief Check whether a rowid / primary key is in container + @param ctxt The context info (opaque) + @param elem The rowid / primary key to be checked against the container + @retval False if elem is definitely not in the container + */ + virtual bool check(void *ctxt, char *elem) = 0; + + /* True if the container does not contain any element */ + virtual bool is_empty() = 0; + + virtual ~Rowid_filter_container() = default; +}; + + +/** + @class Rowid_filter + + The interface for different types of pk-filters + + Currently we support only range pk filters. +*/ + +class Rowid_filter : public Sql_alloc +{ +protected: + + /* The container to store info the set of elements in the filter */ + Rowid_filter_container *container; + + Rowid_filter_tracker *tracker; + +public: + enum build_return_code { + SUCCESS, + NON_FATAL_ERROR, + FATAL_ERROR, + }; + Rowid_filter(Rowid_filter_container *container_arg) + : container(container_arg) {} + + /* + Build the filter : + fill it with info on the set of elements placed there + */ + virtual build_return_code build() = 0; + + /* + Check whether an element is in the filter. + Returns false is the elements is definitely not in the filter. + */ + virtual bool check(char *elem) = 0; + + virtual ~Rowid_filter() = default; + + bool is_empty() { return container->is_empty(); } + + Rowid_filter_container *get_container() { return container; } + + void set_tracker(Rowid_filter_tracker *track_arg) { tracker= track_arg; } + Rowid_filter_tracker *get_tracker() { return tracker; } +}; + + +/** + @class Rowid_filter_container + + The implementation of the Rowid_interface used for pk-filters + that are filled when performing range index scans. +*/ + +class Range_rowid_filter: public Rowid_filter +{ + /* The table for which the rowid filter is built */ + TABLE *table; + /* The select to perform the range scan to fill the filter */ + SQL_SELECT *select; + /* The cost info on the filter (used for EXPLAIN/ANALYZE) */ + Range_rowid_filter_cost_info *cost_info; + +public: + Range_rowid_filter(TABLE *tab, + Range_rowid_filter_cost_info *cost_arg, + Rowid_filter_container *container_arg, + SQL_SELECT *sel) + : Rowid_filter(container_arg), table(tab), select(sel), cost_info(cost_arg) + {} + + ~Range_rowid_filter(); + + build_return_code build(); + + bool check(char *elem) + { + if (container->is_empty()) + return false; + bool was_checked= container->check(table, elem); + tracker->increment_checked_elements_count(was_checked); + return was_checked; + } + + SQL_SELECT *get_select() { return select; } +}; + + +/** + @class Refpos_container_sorted_array + + The wrapper class over Dynamic_array to facilitate operations over + array of elements of the type char[N] where N is the same for all elements +*/ + +class Refpos_container_sorted_array : public Sql_alloc +{ + /* + Maximum number of elements in the array + (Now is used only at the initialization of the dynamic array) + */ + uint max_elements; + /* Number of bytes allocated for an element */ + uint elem_size; + /* The dynamic array over which the wrapper is built */ + Dynamic_array *array; + +public: + + Refpos_container_sorted_array(uint max_elems, uint elem_sz) + : max_elements(max_elems), elem_size(elem_sz), array(0) {} + + ~Refpos_container_sorted_array() + { + delete array; + array= 0; + } + + bool alloc() + { + array= new Dynamic_array (PSI_INSTRUMENT_MEM, + elem_size * max_elements, + elem_size * max_elements/sizeof(char) + 1); + return array == NULL; + } + + bool add(char *elem) + { + for (uint i= 0; i < elem_size; i++) + { + if (array->append(elem[i])) + return true; + } + return false; + } + + char *get_pos(uint n) + { + return array->get_pos(n * elem_size); + } + + uint elements() { return (uint) (array->elements() / elem_size); } + + void sort (int (*cmp) (void *ctxt, const void *el1, const void *el2), + void *cmp_arg) + { + my_qsort2(array->front(), array->elements()/elem_size, + elem_size, (qsort2_cmp) cmp, cmp_arg); + } + + bool is_empty() { return elements() == 0; } +}; + + +/** + @class Rowid_filter_sorted_array + + The implementation of the Rowid_filter_container interface as + a sorted array container of rowids / primary keys +*/ + +class Rowid_filter_sorted_array: public Rowid_filter_container +{ + /* The dynamic array to store rowids / primary keys */ + Refpos_container_sorted_array refpos_container; + /* Initially false, becomes true after the first call of (check() */ + bool is_checked; + +public: + Rowid_filter_sorted_array(uint elems, uint elem_size) + : refpos_container(elems, elem_size), is_checked(false) {} + + Rowid_filter_container_type get_type() + { return SORTED_ARRAY_CONTAINER; } + + bool alloc() { return refpos_container.alloc(); } + + bool add(void *ctxt, char *elem) { return refpos_container.add(elem); } + + bool check(void *ctxt, char *elem); + + bool is_empty() { return refpos_container.is_empty(); } +}; + +/** + @class Range_rowid_filter_cost_info + + An objects of this class is created for each potentially usable + range filter. It contains the info that allows to figure out + whether usage of the range filter promises some gain. +*/ + +class Range_rowid_filter_cost_info : public Sql_alloc +{ + /* The table for which the range filter is to be built (if needed) */ + TABLE *table; + /* Estimated number of elements in the filter */ + ulonglong est_elements; + /* The cost of building the range filter */ + double b; + /* + a*N-b yields the gain of the filter + for N key tuples of the index key_no + */ + double a; + /* The value of N where the gain is 0 */ + double cross_x; + /* Used for pruning of the potential range filters */ + key_map abs_independent; + + /* + These two parameters are used to choose the best range filter + in the function TABLE::best_range_rowid_filter_for_partial_join + */ + double a_adj; + double cross_x_adj; + +public: + /* The type of the container of the range filter */ + Rowid_filter_container_type container_type; + /* The index whose range scan would be used to build the range filter */ + uint key_no; + /* The selectivity of the range filter */ + double selectivity; + + Range_rowid_filter_cost_info() : table(0), key_no(0) {} + + void init(Rowid_filter_container_type cont_type, + TABLE *tab, uint key_no); + + double build_cost(Rowid_filter_container_type container_type); + + inline double lookup_cost(Rowid_filter_container_type cont_type); + + inline double + avg_access_and_eval_gain_per_row(Rowid_filter_container_type cont_type); + + inline double avg_adjusted_gain_per_row(double access_cost_factor); + + inline void set_adjusted_gain_param(double access_cost_factor); + + /* Get the gain that usage of filter promises for r key tuples */ + inline double get_gain(double r) + { + return r * a - b; + } + + /* Get the adjusted gain that usage of filter promises for r key tuples */ + inline double get_adjusted_gain(double r) + { + return r * a_adj - b; + } + + /* + The gain promised by usage of the filter for r key tuples + due to less condition evaluations + */ + inline double get_cmp_gain(double r) + { + return r * (1 - selectivity) / TIME_FOR_COMPARE; + } + + Rowid_filter_container *create_container(); + + double get_a() { return a; } + + void trace_info(THD *thd); + + friend + void TABLE::prune_range_rowid_filters(); + + friend + void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd); + + friend + Range_rowid_filter_cost_info * + TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, + double records, + double access_cost_factor); +}; + +#endif /* ROWID_FILTER_INCLUDED */ diff --git a/sql/rpl_constants.h b/sql/rpl_constants.h new file mode 100644 index 00000000..f319d083 --- /dev/null +++ b/sql/rpl_constants.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2007 MySQL AB, 2008 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_CONSTANTS_H +#define RPL_CONSTANTS_H + +#include +#include + +/** + Enumeration of the incidents that can occur for the server. + */ +enum Incident { + /** No incident */ + INCIDENT_NONE = 0, + + /** There are possibly lost events in the replication stream */ + INCIDENT_LOST_EVENTS = 1, + + /** Shall be last event of the enumeration */ + INCIDENT_COUNT +}; + + +/** + Enumeration of the reserved formats of Binlog extra row information +*/ +enum ExtraRowInfoFormat { + + /** Reserved formats 0 -> 63 inclusive */ + ERIF_LASTRESERVED = 63, + + /** + Available / uncontrolled formats + 64 -> 254 inclusive + */ + ERIF_OPEN1 = 64, + ERIF_OPEN2 = 65, + + ERIF_LASTOPEN = 254, + + /** + Multi-payload format 255 + + Length is total length, payload is sequence of + sub-payloads with their own headers containing + length + format. + */ + ERIF_MULTI = 255 +}; + +/* + 1 byte length, 1 byte format + Length is total length in bytes, including 2 byte header + Length values 0 and 1 are currently invalid and reserved. +*/ +#define EXTRA_ROW_INFO_LEN_OFFSET 0 +#define EXTRA_ROW_INFO_FORMAT_OFFSET 1 +#define EXTRA_ROW_INFO_HDR_BYTES 2 +#define EXTRA_ROW_INFO_MAX_PAYLOAD (255 - EXTRA_ROW_INFO_HDR_BYTES) + +enum enum_binlog_checksum_alg { + BINLOG_CHECKSUM_ALG_OFF= 0, // Events are without checksum though its generator + // is checksum-capable New Master (NM). + BINLOG_CHECKSUM_ALG_CRC32= 1, // CRC32 of zlib algorithm. + BINLOG_CHECKSUM_ALG_ENUM_END, // the cut line: valid alg range is [1, 0x7f]. + BINLOG_CHECKSUM_ALG_UNDEF= 255 // special value to tag undetermined yet checksum + // or events from checksum-unaware servers +}; + +#define BINLOG_CRYPTO_SCHEME_LENGTH 1 +#define BINLOG_KEY_VERSION_LENGTH 4 +#define BINLOG_IV_LENGTH MY_AES_BLOCK_SIZE +#define BINLOG_IV_OFFS_LENGTH 4 +#define BINLOG_NONCE_LENGTH (BINLOG_IV_LENGTH - BINLOG_IV_OFFS_LENGTH) + +struct Binlog_crypt_data { + uint scheme; + uint key_version, key_length, ctx_size; + uchar key[MY_AES_MAX_KEY_LENGTH]; + uchar nonce[BINLOG_NONCE_LENGTH]; + + int init(uint sch, uint kv) + { + scheme= sch; + ctx_size= encryption_ctx_size(ENCRYPTION_KEY_SYSTEM_DATA, kv); + key_version= kv; + key_length= sizeof(key); + return encryption_key_get(ENCRYPTION_KEY_SYSTEM_DATA, kv, key, &key_length); + } + + void set_iv(uchar* iv, uint32 offs) const + { + memcpy(iv, nonce, BINLOG_NONCE_LENGTH); + int4store(iv + BINLOG_NONCE_LENGTH, offs); + } +}; + +#endif /* RPL_CONSTANTS_H */ diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc new file mode 100644 index 00000000..982c0c94 --- /dev/null +++ b/sql/rpl_filter.cc @@ -0,0 +1,939 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "mysqld.h" // system_charset_info +#include "rpl_filter.h" +#include "hash.h" // my_hash_free +#include "table.h" // TABLE_LIST + +#define TABLE_RULE_HASH_SIZE 16 +#define TABLE_RULE_ARR_SIZE 16 + +Rpl_filter::Rpl_filter() : + parallel_mode(SLAVE_PARALLEL_OPTIMISTIC), + table_rules_on(0), + do_table_inited(0), ignore_table_inited(0), + wild_do_table_inited(0), wild_ignore_table_inited(0) +{ + do_db.empty(); + ignore_db.empty(); + rewrite_db.empty(); +} + + +Rpl_filter::~Rpl_filter() +{ + if (do_table_inited) + my_hash_free(&do_table); + if (ignore_table_inited) + my_hash_free(&ignore_table); + if (wild_do_table_inited) + free_string_array(&wild_do_table); + if (wild_ignore_table_inited) + free_string_array(&wild_ignore_table); + free_string_list(&do_db); + free_string_list(&ignore_db); + free_string_pair_list(&rewrite_db); +} + + +#ifndef MYSQL_CLIENT +/* + Returns true if table should be logged/replicated + + SYNOPSIS + tables_ok() + db db to use if db in TABLE_LIST is undefined for a table + tables list of tables to check + + NOTES + Changing table order in the list can lead to different results. + + Note also order of precedence of do/ignore rules (see code). For + that reason, users should not set conflicting rules because they + may get unpredicted results (precedence order is explained in the + manual). + + If no table in the list is marked "updating", then we always + return 0, because there is no reason to execute this statement on + slave if it updates nothing. (Currently, this can only happen if + statement is a multi-delete (SQLCOM_DELETE_MULTI) and "tables" are + the tables in the FROM): + + In the case of SQLCOM_DELETE_MULTI, there will be a second call to + tables_ok(), with tables having "updating==TRUE" (those after the + DELETE), so this second call will make the decision (because + all_tables_not_ok() = !tables_ok(1st_list) && + !tables_ok(2nd_list)). + + TODO + "Include all tables like "abc.%" except "%.EFG"". (Can't be done now.) + If we supported Perl regexps, we could do it with pattern: /^abc\.(?!EFG)/ + (I could not find an equivalent in the regex library MySQL uses). + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables) +{ + bool some_tables_updating= 0; + DBUG_ENTER("Rpl_filter::tables_ok"); + + for (; tables; tables= tables->next_global) + { + char hash_key[SAFE_NAME_LEN*2+2]; + char *end; + uint len; + + if (!tables->updating) + continue; + some_tables_updating= 1; + end= strmov(hash_key, tables->db.str ? tables->db.str : db); + *end++= '.'; + len= (uint) (strmov(end, tables->table_name.str) - hash_key); + if (do_table_inited) // if there are any do's + { + if (my_hash_search(&do_table, (uchar*) hash_key, len)) + DBUG_RETURN(1); + } + if (ignore_table_inited) // if there are any ignores + { + if (my_hash_search(&ignore_table, (uchar*) hash_key, len)) + DBUG_RETURN(0); + } + if (wild_do_table_inited && + find_wild(&wild_do_table, hash_key, len)) + DBUG_RETURN(1); + if (wild_ignore_table_inited && + find_wild(&wild_ignore_table, hash_key, len)) + DBUG_RETURN(0); + } + + /* + If no table was to be updated, ignore statement (no reason we play it on + slave, slave is supposed to replicate _changes_ only). + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_RETURN(some_tables_updating && + !do_table_inited && !wild_do_table_inited); +} + +#endif + +/* + Checks whether a db matches some do_db and ignore_db rules + + SYNOPSIS + db_ok() + db name of the db to check + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok(const char* db) +{ + DBUG_ENTER("Rpl_filter::db_ok"); + + if (do_db.is_empty() && ignore_db.is_empty()) + DBUG_RETURN(1); // Ok to replicate if the user puts no constraints + + /* + Previous behaviour "if the user has specified restrictions on which + databases to replicate and db was not selected, do not replicate" has + been replaced with "do replicate". + Since the filtering criteria is not equal to "NULL" the statement should + be logged into binlog. + */ + if (!db) + DBUG_RETURN(1); + + if (!do_db.is_empty()) // if the do's are not empty + { + I_List_iterator it(do_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(1); // match + } + DBUG_PRINT("exit", ("Don't replicate")); + DBUG_RETURN(0); + } + else // there are some elements in the don't, otherwise we cannot get here + { + I_List_iterator it(ignore_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + { + DBUG_PRINT("exit", ("Don't replicate")); + DBUG_RETURN(0); // match + } + } + DBUG_RETURN(1); + } +} + + +/* + Checks whether a db matches wild_do_table and wild_ignore_table + rules (for replication) + + SYNOPSIS + db_ok_with_wild_table() + db name of the db to check. + Is tested with check_db_name() before calling this function. + + NOTES + Here is the reason for this function. + We advise users who want to exclude a database 'db1' safely to do it + with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or + replicate_ignore_db because the two lasts only check for the selected db, + which won't work in that case: + USE db2; + UPDATE db1.t SET ... #this will be replicated and should not + whereas replicate_wild_ignore_table will work in all cases. + With replicate_wild_ignore_table, we only check tables. When + one does 'DROP DATABASE db1', tables are not involved and the + statement will be replicated, while users could expect it would not (as it + rougly means 'DROP db1.first_table, DROP db1.second_table...'). + In other words, we want to interpret 'db1.%' as "everything touching db1". + That is why we want to match 'db1' against 'db1.%' wild table rules. + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok_with_wild_table(const char *db) +{ + DBUG_ENTER("Rpl_filter::db_ok_with_wild_table"); + + char hash_key[SAFE_NAME_LEN+2]; + char *end; + int len; + end= strmov(hash_key, db); + *end++= '.'; + len= (int)(end - hash_key); + if (wild_do_table_inited && find_wild(&wild_do_table, hash_key, len)) + { + DBUG_PRINT("return",("1")); + DBUG_RETURN(1); + } + if (wild_ignore_table_inited && find_wild(&wild_ignore_table, hash_key, len)) + { + DBUG_PRINT("return",("0")); + DBUG_RETURN(0); + } + + /* + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_PRINT("return",("db=%s,retval=%d", db, !wild_do_table_inited)); + DBUG_RETURN(!wild_do_table_inited); +} + + +bool +Rpl_filter::is_on() +{ + return table_rules_on; +} + + +/** + Parse and add the given comma-separated sequence of filter rules. + + @param spec Comma-separated sequence of filter rules. + @param add Callback member function to add a filter rule. + + @return true if error, false otherwise. +*/ + +int +Rpl_filter::parse_filter_rule(const char* spec, Add_filter add) +{ + int status= 0; + char *arg, *ptr, *pstr; + + if (!spec) + return false; + + if (! (ptr= my_strdup(key_memory_rpl_filter, spec, MYF(MY_WME)))) + return true; + + pstr= ptr; + + while (pstr) + { + arg= pstr; + + /* Parse token string. */ + pstr= strpbrk(arg, ","); + + /* NUL terminate the token string. */ + if (pstr) + *pstr++= '\0'; + + /* Skip an empty token string. */ + if (arg[0] == '\0') + continue; + + /* Skip leading spaces. */ + while (my_isspace(system_charset_info, *arg)) + arg++; + + status= (this->*add)(arg); + + if (status) + break; + } + + my_free(ptr); + + return status; +} + + +int +Rpl_filter::add_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_table"); + if (!do_table_inited) + init_table_rule_hash(&do_table, &do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&do_table, table_spec)); +} + + +int +Rpl_filter::add_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_table"); + if (!ignore_table_inited) + init_table_rule_hash(&ignore_table, &ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&ignore_table, table_spec)); +} + + +int +Rpl_filter::set_do_table(const char* table_spec) +{ + int status; + + if (do_table_inited) + { + my_hash_free(&do_table); + do_table_inited= 0; + } + + status= parse_filter_rule(table_spec, &Rpl_filter::add_do_table); + + if (do_table_inited && status) + { + if (!do_table.records) + { + my_hash_free(&do_table); + do_table_inited= 0; + } + } + + return status; +} + + +int +Rpl_filter::set_ignore_table(const char* table_spec) +{ + int status; + + if (ignore_table_inited) + { + my_hash_free(&ignore_table); + ignore_table_inited= 0; + } + + status= parse_filter_rule(table_spec, &Rpl_filter::add_ignore_table); + + if (ignore_table_inited && status) + { + if (!ignore_table.records) + { + my_hash_free(&ignore_table); + ignore_table_inited= 0; + } + } + + return status; +} + + +int +Rpl_filter::add_wild_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_do_table"); + if (!wild_do_table_inited) + init_table_rule_array(&wild_do_table, &wild_do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_do_table, table_spec)); +} + + +int +Rpl_filter::add_wild_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_ignore_table"); + if (!wild_ignore_table_inited) + init_table_rule_array(&wild_ignore_table, &wild_ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_ignore_table, table_spec)); +} + + +int +Rpl_filter::set_wild_do_table(const char* table_spec) +{ + int status; + + if (wild_do_table_inited) + { + free_string_array(&wild_do_table); + wild_do_table_inited= 0; + } + + status= parse_filter_rule(table_spec, &Rpl_filter::add_wild_do_table); + + if (wild_do_table_inited && status) + { + if (!wild_do_table.elements) + { + delete_dynamic(&wild_do_table); + wild_do_table_inited= 0; + } + } + + return status; +} + + +int +Rpl_filter::set_wild_ignore_table(const char* table_spec) +{ + int status; + + if (wild_ignore_table_inited) + { + free_string_array(&wild_ignore_table); + wild_ignore_table_inited= 0; + } + + status= parse_filter_rule(table_spec, &Rpl_filter::add_wild_ignore_table); + + if (wild_ignore_table_inited && status) + { + if (!wild_ignore_table.elements) + { + delete_dynamic(&wild_ignore_table); + wild_ignore_table_inited= 0; + } + } + + return status; +} + + +int +Rpl_filter::add_table_rule(HASH* h, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + // len is always > 0 because we know the there exists a '.' + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(key_memory_TABLE_RULE_ENT, + sizeof(TABLE_RULE_ENT) + len, + MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + + return my_hash_insert(h, (uchar*)e); +} + + +/* + Add table expression with wildcards to dynamic array +*/ + +int +Rpl_filter::add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(key_memory_TABLE_RULE_ENT, + sizeof(TABLE_RULE_ENT) + len, + MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + return insert_dynamic(a, (uchar*)&e); +} + + +int +Rpl_filter::add_string_pair_list(const char* spec) +{ + /* See also OPT_REWRITE_DB handling in client/mysqlbinlog.cc */ + char* from_db, *to_db; + const char *ptr, *val_ptr; + size_t len; + + // Remove pre-space in key + while(*spec && my_isspace(system_charset_info, (unsigned char)*spec)) spec++; + + if (!(ptr= strstr(spec, "->"))) + { + // Bad syntax, missing -> + return 1; + } + + // value + val_ptr= ptr + 2; + + // Skip blanks at the end of spec + while(ptr > spec && my_isspace(system_charset_info, ptr[-1])) ptr--; + + if (ptr == spec) + { + // Bad syntax: empty FROM db (key) + return 1; + } + + // key + len= (size_t)(ptr - spec); + if (! (from_db= (char *) my_malloc(PSI_NOT_INSTRUMENTED, len + 1, MYF(0)))) + { + return 1; + } + memcpy(from_db, spec, len); + from_db[len]='\0'; + + // Remove pre-space in val + while(*val_ptr && my_isspace(system_charset_info, (unsigned char)*val_ptr)) val_ptr++; + // Value ends with \0 or space + if (!strlen(val_ptr)) + { + // Bad syntax: Empty value \n" + my_free(from_db); + return 1; + } + + for (ptr= val_ptr; *ptr && !my_isspace(system_charset_info, *ptr); ptr++){} + // value + len= (size_t)(ptr - val_ptr); + if(! (to_db= (char *) my_malloc(PSI_NOT_INSTRUMENTED, len + 1, MYF(0)))) + { + my_free(from_db); + return 1; + } + memcpy(to_db, val_ptr, len); + to_db[len]='\0'; + i_string_pair *db_pair = new i_string_pair(from_db, to_db); + rewrite_db.push_back(db_pair); + return false; +} + + +int +Rpl_filter::add_string_list(I_List *list, const char* spec) +{ + char *str; + i_string *node; + + if (! (str= my_strdup(key_memory_rpl_filter, spec, MYF(MY_WME)))) + return true; + + if (! (node= new i_string(str))) + { + my_free(str); + return true; + } + + list->push_back(node); + + return false; +} + + +int +Rpl_filter::add_rewrite_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_rewrite_db"); + DBUG_RETURN(add_string_pair_list(table_spec)); +} + + +int +Rpl_filter::add_do_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_db"); + DBUG_RETURN(add_string_list(&do_db, table_spec)); +} + + +int +Rpl_filter::add_ignore_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_db"); + DBUG_RETURN(add_string_list(&ignore_db, table_spec)); +} + + +int +Rpl_filter::set_rewrite_db(const char* db_spec) +{ + free_string_pair_list(&rewrite_db); + return parse_filter_rule(db_spec, &Rpl_filter::add_rewrite_db); +} + + +int +Rpl_filter::set_do_db(const char* db_spec) +{ + free_string_list(&do_db); + return parse_filter_rule(db_spec, &Rpl_filter::add_do_db); +} + + +int +Rpl_filter::set_ignore_db(const char* db_spec) +{ + free_string_list(&ignore_db); + return parse_filter_rule(db_spec, &Rpl_filter::add_ignore_db); +} + + +extern "C" uchar *get_table_key(const uchar *, size_t *, my_bool); +extern "C" void free_table_ent(void* a); + +uchar *get_table_key(const uchar* a, size_t *len, + my_bool __attribute__((unused))) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + *len= e->key_len; + return (uchar*)e->db; +} + + +void free_table_ent(void* a) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + my_free(e); +} + + +void +Rpl_filter::init_table_rule_hash(HASH* h, bool* h_inited) +{ + my_hash_init(key_memory_TABLE_RULE_ENT, h, + system_charset_info,TABLE_RULE_HASH_SIZE,0,0, get_table_key, + free_table_ent, 0); + *h_inited = 1; +} + + +void +Rpl_filter::init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) +{ + my_init_dynamic_array(key_memory_TABLE_RULE_ENT, a, sizeof(TABLE_RULE_ENT*), + TABLE_RULE_ARR_SIZE, TABLE_RULE_ARR_SIZE, MYF(0)); + *a_inited = 1; +} + + +TABLE_RULE_ENT* +Rpl_filter::find_wild(DYNAMIC_ARRAY *a, const char* key, int len) +{ + uint i; + const char* key_end= key + len; + + for (i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e ; + get_dynamic(a, (uchar*)&e, i); + if (!system_charset_info->wildcmp(key, key_end, + (const char*)e->db, + (const char*)(e->db + e->key_len), + '\\', wild_one, wild_many)) + return e; + } + + return 0; +} + + +void +Rpl_filter::free_string_array(DYNAMIC_ARRAY *a) +{ + uint i; + for (i= 0; i < a->elements; i++) + { + char* p; + get_dynamic(a, (uchar*) &p, i); + my_free(p); + } + delete_dynamic(a); +} + + +void +Rpl_filter::free_string_list(I_List *l) +{ + void *ptr; + i_string *tmp; + + while ((tmp= l->get())) + { + ptr= (void *) tmp->ptr; + my_free(ptr); + delete tmp; + } + + l->empty(); +} + + +void +Rpl_filter::free_string_pair_list(I_List *l) +{ + i_string_pair *tmp; + + while ((tmp= l->get())) + { + my_free((void *) tmp->key); + my_free((void *) tmp->val); + delete tmp; + } + + l->empty(); +} + +/* + Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other + hash, as it assumes that the hash entries are TABLE_RULE_ENT. + + SYNOPSIS + table_rule_ent_hash_to_str() + s pointer to the String to fill + h pointer to the HASH to read + + RETURN VALUES + none +*/ + +void +Rpl_filter::table_rule_ent_hash_to_str(String* s, HASH* h, bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < h->records; i++) + { + TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) my_hash_element(h, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e; + get_dynamic(a, (uchar*)&e, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::get_do_table(String* str) +{ + table_rule_ent_hash_to_str(str, &do_table, do_table_inited); +} + + +void +Rpl_filter::get_ignore_table(String* str) +{ + table_rule_ent_hash_to_str(str, &ignore_table, ignore_table_inited); +} + + +void +Rpl_filter::get_wild_do_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_do_table, wild_do_table_inited); +} + + +void +Rpl_filter::get_wild_ignore_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_ignore_table, wild_ignore_table_inited); +} + + +bool +Rpl_filter::rewrite_db_is_empty() +{ + return rewrite_db.is_empty(); +} + + +I_List* +Rpl_filter::get_rewrite_db() +{ + return &rewrite_db; +} + + +void +Rpl_filter::db_rewrite_rule_ent_list_to_str(String* str, I_List* list) +{ + I_List_iterator it(*list); + i_string_pair* s; + + str->length(0); + + const char *delimiter= ","; + size_t delim_len= 0; + while ((s= it++)) + { + str->append(delimiter, delim_len); + str->append(s->key, strlen(s->key)); + str->append(STRING_WITH_LEN("->")); + str->append(s->val, strlen(s->val)); + delim_len= 1; + } +} + + +const char* +Rpl_filter::get_rewrite_db(const char* db, size_t *new_len) +{ + if (rewrite_db.is_empty() || !db) + return db; + I_List_iterator it(rewrite_db); + i_string_pair* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->key, db)) + { + *new_len= strlen(tmp->val); + return tmp->val; + } + } + return db; +} + + +I_List* +Rpl_filter::get_do_db() +{ + return &do_db; +} + + +I_List* +Rpl_filter::get_ignore_db() +{ + return &ignore_db; +} + + +void +Rpl_filter::db_rule_ent_list_to_str(String* str, I_List* list) +{ + I_List_iterator it(*list); + i_string* s; + + str->length(0); + + while ((s= it++)) + { + str->append(s->ptr, strlen(s->ptr)); + str->append(','); + } + + // Remove last ',' + if (!str->is_empty()) + str->chop(); +} + + +void +Rpl_filter::get_rewrite_db(String* str) +{ + db_rewrite_rule_ent_list_to_str(str, get_rewrite_db()); +} + + +void +Rpl_filter::get_do_db(String* str) +{ + db_rule_ent_list_to_str(str, get_do_db()); +} + + +void +Rpl_filter::get_ignore_db(String* str) +{ + db_rule_ent_list_to_str(str, get_ignore_db()); +} diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h new file mode 100644 index 00000000..ee3b9d51 --- /dev/null +++ b/sql/rpl_filter.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_FILTER_H +#define RPL_FILTER_H + +#include "mysql.h" +#include "mysqld.h" +#include "sql_list.h" /* I_List */ +#include "hash.h" /* HASH */ + +class String; +struct TABLE_LIST; +typedef struct st_dynamic_array DYNAMIC_ARRAY; + +typedef struct st_table_rule_ent +{ + char* db; + char* tbl_name; + uint key_len; +} TABLE_RULE_ENT; + +/* + Rpl_filter + + Inclusion and exclusion rules of tables and databases. + Also handles rewrites of db. + Used for replication and binlogging. + */ +class Rpl_filter +{ +public: + Rpl_filter(); + ~Rpl_filter(); + Rpl_filter(Rpl_filter const&); + Rpl_filter& operator=(Rpl_filter const&); + + /* Checks - returns true if ok to replicate/log */ + +#ifndef MYSQL_CLIENT + bool tables_ok(const char* db, TABLE_LIST *tables); +#endif + bool db_ok(const char* db); + bool db_ok_with_wild_table(const char *db); + + bool is_on(); + + /* Setters - add filtering rules */ + + int add_do_table(const char* table_spec); + int add_ignore_table(const char* table_spec); + + int set_do_table(const char* table_spec); + int set_ignore_table(const char* table_spec); + + int add_wild_do_table(const char* table_spec); + int add_wild_ignore_table(const char* table_spec); + + int set_wild_do_table(const char* table_spec); + int set_wild_ignore_table(const char* table_spec); + + int add_rewrite_db(const char* table_spec); + int add_do_db(const char* db_spec); + int add_ignore_db(const char* db_spec); + + int set_rewrite_db(const char* db_spec); + int set_do_db(const char* db_spec); + int set_ignore_db(const char* db_spec); + + void set_parallel_mode(enum_slave_parallel_mode mode) + { + parallel_mode= mode; + } + /* Return given parallel mode or if one is not given, the default mode */ + enum_slave_parallel_mode get_parallel_mode() + { + return parallel_mode; + } + + /* Getters - to get information about current rules */ + + void get_do_table(String* str); + void get_ignore_table(String* str); + + void get_wild_do_table(String* str); + void get_wild_ignore_table(String* str); + + bool rewrite_db_is_empty(); + I_List* get_rewrite_db(); + void get_rewrite_db(String *str); + const char* get_rewrite_db(const char* db, size_t *new_len); + + I_List* get_do_db(); + I_List* get_ignore_db(); + + void get_do_db(String* str); + void get_ignore_db(String* str); + +private: + + void init_table_rule_hash(HASH* h, bool* h_inited); + void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); + + int add_table_rule(HASH* h, const char* table_spec); + int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); + + typedef int (Rpl_filter::*Add_filter)(char const*); + + int parse_filter_rule(const char* spec, Add_filter func); + + void free_string_array(DYNAMIC_ARRAY *a); + void free_string_list(I_List *l); + void free_string_pair_list(I_List *l); + + void table_rule_ent_hash_to_str(String* s, HASH* h, bool inited); + void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited); + void db_rewrite_rule_ent_list_to_str(String*, I_List*); + void db_rule_ent_list_to_str(String* s, I_List* l); + TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len); + + int add_string_list(I_List *list, const char* spec); + int add_string_pair_list(const char* my_spec); + /* + Those 4 structures below are uninitialized memory unless the + corresponding *_inited variables are "true". + */ + HASH do_table; + HASH ignore_table; + DYNAMIC_ARRAY wild_do_table; + DYNAMIC_ARRAY wild_ignore_table; + enum_slave_parallel_mode parallel_mode; + + bool table_rules_on; + bool do_table_inited; + bool ignore_table_inited; + bool wild_do_table_inited; + bool wild_ignore_table_inited; + + I_List do_db; + I_List ignore_db; + + I_List rewrite_db; +}; + +extern Rpl_filter *global_rpl_filter; +extern Rpl_filter *binlog_filter; + +#endif // RPL_FILTER_H diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc new file mode 100644 index 00000000..11629059 --- /dev/null +++ b/sql/rpl_gtid.cc @@ -0,0 +1,4062 @@ +/* Copyright (c) 2013, Kristian Nielsen and MariaDB Services Ab. + Copyright (c) 2020, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Definitions for MariaDB global transaction ID (GTID). */ +#include + +#ifndef MYSQL_CLIENT +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "mariadb.h" +#include "sql_base.h" +#include "sql_parse.h" +#include "key.h" +#include "rpl_rli.h" +#include "slave.h" +#include "log_event.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" // wsrep_thd_is_local +#include "wsrep_trans_observer.h" // wsrep_start_trx_if_not_started +#endif + +const LEX_CSTRING rpl_gtid_slave_state_table_name= + { STRING_WITH_LEN("gtid_slave_pos") }; + + +void +rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton, + rpl_group_info *rgi) +{ + int err; + /* + Add the gtid to the HASH in the replication slave state. + + We must do this only _after_ commit, so that for parallel replication, + there will not be an attempt to delete the corresponding table row before + it is even committed. + */ + err= update(gtid->domain_id, gtid->server_id, sub_id, gtid->seq_no, hton, rgi); + if (err) + { + sql_print_warning("Slave: Out of memory during slave state maintenance. " + "Some no longer necessary rows in table " + "mysql.%s may be left undeleted.", + rpl_gtid_slave_state_table_name.str); + /* + Such failure is not fatal. We will fail to delete the row for this + GTID, but it will do no harm and will be removed automatically on next + server restart. + */ + } +} + + +int +rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) +{ + DBUG_ENTER("rpl_slave_state::record_and_update_gtid"); + + /* + Update the GTID position, if we have it and did not already update + it in a GTID transaction. + */ + if (rgi->gtid_pending) + { + uint64 sub_id= rgi->gtid_sub_id; + void *hton= NULL; + + rgi->gtid_pending= false; + if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE) + { + if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false, &hton)) + DBUG_RETURN(1); + update_state_hash(sub_id, &rgi->current_gtid, hton, rgi); + } + rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; + } + DBUG_RETURN(0); +} + + +/* + Check GTID event execution when --gtid-ignore-duplicates. + + The idea with --gtid-ignore-duplicates is that we allow multiple master + connections (in multi-source replication) to all receive the same GTIDs and + event groups. Only one instance of each is applied; we use the sequence + number in the GTID to decide whether a GTID has already been applied. + + So if the seq_no of a GTID (or a higher sequence number) has already been + applied, then the event should be skipped. If not then the event should be + applied. + + To avoid two master connections tring to apply the same event + simultaneously, only one is allowed to work in any given domain at any point + in time. The associated Relay_log_info object is called the owner of the + domain (and there can be multiple parallel worker threads working in that + domain for that Relay_log_info). Any other Relay_log_info/master connection + must wait for the domain to become free, or for their GTID to have been + applied, before being allowed to proceed. + + Returns: + 0 This GTID is already applied, it should be skipped. + 1 The GTID is not yet applied; this rli is now the owner, and must apply + the event and release the domain afterwards. + -1 Error (out of memory to allocate a new element for the domain). +*/ +int +rpl_slave_state::check_duplicate_gtid(rpl_gtid *gtid, rpl_group_info *rgi) +{ + uint32 domain_id= gtid->domain_id; + uint64 seq_no= gtid->seq_no; + rpl_slave_state::element *elem; + int res; + bool did_enter_cond= false; + PSI_stage_info old_stage; + THD *UNINIT_VAR(thd); + Relay_log_info *rli= rgi->rli; + + mysql_mutex_lock(&LOCK_slave_state); + if (!(elem= get_element(domain_id))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + res= -1; + goto err; + } + /* + Note that the elem pointer does not change once inserted in the hash. So + we can re-use the pointer without looking it up again in the hash after + each lock release and re-take. + */ + + for (;;) + { + if (elem->highest_seq_no >= seq_no) + { + /* This sequence number is already applied, ignore it. */ + res= 0; + rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_IGNORE; + break; + } + if (!elem->owner_rli) + { + /* The domain became free, grab it and apply the event. */ + elem->owner_rli= rli; + elem->owner_count= 1; + rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_OWNER; + res= 1; + break; + } + if (elem->owner_rli == rli) + { + /* Already own this domain, increment reference count and apply event. */ + ++elem->owner_count; + rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_OWNER; + res= 1; + break; + } + thd= rgi->thd; + if (unlikely(thd->check_killed())) + { + res= -1; + break; + } + /* + Someone else is currently processing this GTID (or an earlier one). + Wait for them to complete (or fail), and then check again. + */ + if (!did_enter_cond) + { + thd->ENTER_COND(&elem->COND_gtid_ignore_duplicates, &LOCK_slave_state, + &stage_gtid_wait_other_connection, &old_stage); + did_enter_cond= true; + } + mysql_cond_wait(&elem->COND_gtid_ignore_duplicates, + &LOCK_slave_state); + } + +err: + if (did_enter_cond) + thd->EXIT_COND(&old_stage); + else + mysql_mutex_unlock(&LOCK_slave_state); + return res; +} + + +void +rpl_slave_state::release_domain_owner(rpl_group_info *rgi) +{ + element *elem= NULL; + + mysql_mutex_lock(&LOCK_slave_state); + if (!(elem= get_element(rgi->current_gtid.domain_id))) + { + /* + We cannot really deal with error here, as we are already called in an + error handling case (transaction failure and rollback). + + However, get_element() only fails if the element did not exist already + and could not be allocated due to out-of-memory - and if it did not + exist, then we would not get here in the first place. + */ + mysql_mutex_unlock(&LOCK_slave_state); + return; + } + + if (rgi->gtid_ignore_duplicate_state == rpl_group_info::GTID_DUPLICATE_OWNER) + { + uint32 count= elem->owner_count; + DBUG_ASSERT(count > 0); + DBUG_ASSERT(elem->owner_rli == rgi->rli); + --count; + elem->owner_count= count; + if (count == 0) + { + elem->owner_rli= NULL; + mysql_cond_broadcast(&elem->COND_gtid_ignore_duplicates); + } + } + rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; + mysql_mutex_unlock(&LOCK_slave_state); +} + + +static void +rpl_slave_state_free_element(void *arg) +{ + struct rpl_slave_state::element *elem= (struct rpl_slave_state::element *)arg; + mysql_cond_destroy(&elem->COND_wait_gtid); + mysql_cond_destroy(&elem->COND_gtid_ignore_duplicates); + my_free(elem); +} + + +rpl_slave_state::rpl_slave_state() + : pending_gtid_count(0), last_sub_id(0), gtid_pos_tables(0), loaded(false) +{ + mysql_mutex_init(key_LOCK_slave_state, &LOCK_slave_state, + MY_MUTEX_INIT_SLOW); + my_hash_init(PSI_INSTRUMENT_ME, &hash, &my_charset_bin, 32, + offsetof(element, domain_id), sizeof(element::domain_id), + NULL, rpl_slave_state_free_element, HASH_UNIQUE); + my_init_dynamic_array(PSI_INSTRUMENT_ME, >id_sort_array, sizeof(rpl_gtid), + 8, 8, MYF(0)); +} + + +rpl_slave_state::~rpl_slave_state() +{ + free_gtid_pos_tables(gtid_pos_tables.load(std::memory_order_relaxed)); + truncate_hash(); + my_hash_free(&hash); + delete_dynamic(>id_sort_array); + mysql_mutex_destroy(&LOCK_slave_state); +} + + +void +rpl_slave_state::truncate_hash() +{ + uint32 i; + + for (i= 0; i < hash.records; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + list_element *l= e->list; + list_element *next; + while (l) + { + next= l->next; + my_free(l); + l= next; + } + /* The element itself is freed by the hash element free function. */ + } + my_hash_reset(&hash); +} + + +int +rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, + uint64 seq_no, void *hton, rpl_group_info *rgi) +{ + int res; + mysql_mutex_lock(&LOCK_slave_state); + res= update_nolock(domain_id, server_id, sub_id, seq_no, hton, rgi); + mysql_mutex_unlock(&LOCK_slave_state); + return res; +} + + +int +rpl_slave_state::update_nolock(uint32 domain_id, uint32 server_id, uint64 sub_id, + uint64 seq_no, void *hton, rpl_group_info *rgi) +{ + element *elem= NULL; + list_element *list_elem= NULL; + + DBUG_ASSERT(hton || !loaded); + mysql_mutex_assert_owner(&LOCK_slave_state); + if (!(elem= get_element(domain_id))) + return 1; + + if (seq_no > elem->highest_seq_no) + elem->highest_seq_no= seq_no; + if (elem->gtid_waiter && elem->min_wait_seq_no <= seq_no) + { + /* + Someone was waiting in MASTER_GTID_WAIT() for this GTID to appear. + Signal (and remove) them. The waiter will handle all the processing + of all pending MASTER_GTID_WAIT(), so we do not slow down the + replication SQL thread. + */ + elem->gtid_waiter= NULL; + mysql_cond_broadcast(&elem->COND_wait_gtid); + } + + if (rgi) + { + if (rgi->gtid_ignore_duplicate_state==rpl_group_info::GTID_DUPLICATE_OWNER) + { +#ifdef DBUG_ASSERT_EXISTS + Relay_log_info *rli= rgi->rli; +#endif + uint32 count= elem->owner_count; + DBUG_ASSERT(count > 0); + DBUG_ASSERT(elem->owner_rli == rli); + --count; + elem->owner_count= count; + if (count == 0) + { + elem->owner_rli= NULL; + mysql_cond_broadcast(&elem->COND_gtid_ignore_duplicates); + } + } + rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; + } + + if (!(list_elem= (list_element *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(*list_elem), MYF(MY_WME)))) + return 1; + list_elem->domain_id= domain_id; + list_elem->server_id= server_id; + list_elem->sub_id= sub_id; + list_elem->seq_no= seq_no; + list_elem->hton= hton; + + elem->add(list_elem); + if (last_sub_id < sub_id) + last_sub_id= sub_id; + +#ifdef HAVE_REPLICATION + ++pending_gtid_count; + if (pending_gtid_count >= opt_gtid_cleanup_batch_size) + { + pending_gtid_count = 0; + slave_background_gtid_pending_delete_request(); + } +#endif + + return 0; +} + + +struct rpl_slave_state::element * +rpl_slave_state::get_element(uint32 domain_id) +{ + struct element *elem; + + elem= (element *)my_hash_search(&hash, (const uchar *)&domain_id, + sizeof(domain_id)); + if (elem) + return elem; + + if (!(elem= (element *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*elem), MYF(MY_WME)))) + return NULL; + elem->list= NULL; + elem->domain_id= domain_id; + elem->highest_seq_no= 0; + elem->gtid_waiter= NULL; + elem->owner_rli= NULL; + elem->owner_count= 0; + mysql_cond_init(key_COND_wait_gtid, &elem->COND_wait_gtid, 0); + mysql_cond_init(key_COND_gtid_ignore_duplicates, + &elem->COND_gtid_ignore_duplicates, 0); + if (my_hash_insert(&hash, (uchar *)elem)) + { + my_free(elem); + return NULL; + } + return elem; +} + + +int +rpl_slave_state::put_back_list(list_element *list) +{ + element *e= NULL; + int err= 0; + + mysql_mutex_lock(&LOCK_slave_state); + while (list) + { + list_element *next= list->next; + + if ((!e || e->domain_id != list->domain_id) && + !(e= (element *)my_hash_search(&hash, (const uchar *)&list->domain_id, + sizeof(list->domain_id)))) + { + err= 1; + goto end; + } + e->add(list); + list= next; + } + +end: + mysql_mutex_unlock(&LOCK_slave_state); + return err; +} + + +int +rpl_slave_state::truncate_state_table(THD *thd) +{ + TABLE_LIST tlist; + int err= 0; + + tlist.init_one_table(&MYSQL_SCHEMA_NAME, &rpl_gtid_slave_state_table_name, + NULL, TL_WRITE); + tlist.mdl_request.set_type(MDL_EXCLUSIVE); + if (!(err= open_and_lock_tables(thd, &tlist, FALSE, + MYSQL_OPEN_IGNORE_LOGGING_FORMAT))) + { + DBUG_ASSERT(!tlist.table->file->row_logging); + tlist.table->s->tdc->flush(thd, true); + err= tlist.table->file->ha_truncate(); + + if (err) + { + ha_rollback_trans(thd, FALSE); + close_thread_tables(thd); + ha_rollback_trans(thd, TRUE); + } + else + { + ha_commit_trans(thd, FALSE); + close_thread_tables(thd); + ha_commit_trans(thd, TRUE); + } + thd->release_transactional_locks(); + } + return err; +} + + +static const TABLE_FIELD_TYPE mysql_rpl_slave_state_coltypes[4]= { + { { STRING_WITH_LEN("domain_id") }, + { STRING_WITH_LEN("int(10) unsigned") }, + {NULL, 0} }, + { { STRING_WITH_LEN("sub_id") }, + { STRING_WITH_LEN("bigint(20) unsigned") }, + {NULL, 0} }, + { { STRING_WITH_LEN("server_id") }, + { STRING_WITH_LEN("int(10) unsigned") }, + {NULL, 0} }, + { { STRING_WITH_LEN("seq_no") }, + { STRING_WITH_LEN("bigint(20) unsigned") }, + {NULL, 0} }, +}; + +static const uint mysql_rpl_slave_state_pk_parts[]= {0, 1}; + +static const TABLE_FIELD_DEF mysql_gtid_slave_pos_tabledef= { + array_elements(mysql_rpl_slave_state_coltypes), + mysql_rpl_slave_state_coltypes, + array_elements(mysql_rpl_slave_state_pk_parts), + mysql_rpl_slave_state_pk_parts +}; + +static Table_check_intact_log_error gtid_table_intact; + +/* + Check that the mysql.gtid_slave_pos table has the correct definition. +*/ +int +gtid_check_rpl_slave_state_table(TABLE *table) +{ + int err; + + if ((err= gtid_table_intact.check(table, &mysql_gtid_slave_pos_tabledef))) + my_error(ER_GTID_OPEN_TABLE_FAILED, MYF(0), "mysql", + rpl_gtid_slave_state_table_name.str); + return err; +} + + +/* + Attempt to find a mysql.gtid_slave_posXXX table that has a storage engine + that is already in use by the current transaction, if any. +*/ +void +rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename) +{ + /* + See comments on rpl_slave_state::gtid_pos_tables for rules around proper + access to the list. + */ + auto list= gtid_pos_tables.load(std::memory_order_acquire); + + Ha_trx_info *ha_info; + uint count = 0; + for (ha_info= thd->transaction->all.ha_list; ha_info; ha_info= ha_info->next()) + { + void *trx_hton= ha_info->ht(); + auto table_entry= list; + + if (!ha_info->is_trx_read_write() || trx_hton == binlog_hton) + continue; + while (table_entry) + { + if (table_entry->table_hton == trx_hton) + { + if (likely(table_entry->state == GTID_POS_AVAILABLE)) + { + *out_tablename= table_entry->table_name; + /* + Check if this is a cross-engine transaction, so we can correctly + maintain the rpl_transactions_multi_engine status variable. + */ + if (count >= 1) + statistic_increment(rpl_transactions_multi_engine, LOCK_status); + else + { + for (;;) + { + ha_info= ha_info->next(); + if (!ha_info) + break; + if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton) + { + statistic_increment(rpl_transactions_multi_engine, LOCK_status); + break; + } + } + } + return; + } + /* + This engine is marked to automatically create the table. + We cannot easily do this here (possibly in the middle of a + transaction). But we can request the slave background thread + to create it, and in a short while it should become available + for following transactions. + */ +#ifdef HAVE_REPLICATION + slave_background_gtid_pos_create_request(table_entry); +#endif + break; + } + table_entry= table_entry->next; + } + ++count; + } + /* + If we cannot find any table whose engine matches an engine that is + already active in the transaction, or if there is no current transaction + engines available, we return the default gtid_slave_pos table. + */ + *out_tablename= + default_gtid_pos_table.load(std::memory_order_acquire)->table_name; + /* Record in status that we failed to find a suitable gtid_pos table. */ + if (count > 0) + { + statistic_increment(transactions_gtid_foreign_engine, LOCK_status); + if (count > 1) + statistic_increment(rpl_transactions_multi_engine, LOCK_status); + } +} + + +/* + Write a gtid to the replication slave state table. + + Do it as part of the transaction, to get slave crash safety, or as a separate + transaction if !in_transaction (eg. MyISAM or DDL). + + gtid The global transaction id for this event group. + sub_id Value allocated within the sub_id when the event group was + read (sub_id must be consistent with commit order in master binlog). + + Note that caller must later ensure that the new gtid and sub_id is inserted + into the appropriate HASH element with rpl_slave_state.add(), so that it can + be deleted later. But this must only be done after COMMIT if in transaction. +*/ +int +rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, + bool in_transaction, bool in_statement, + void **out_hton) +{ + TABLE_LIST tlist; + int err= 0, not_sql_thread; + bool table_opened= false; + TABLE *table; + ulonglong thd_saved_option= thd->variables.option_bits; + Query_tables_list lex_backup; + wait_for_commit* suspended_wfc; + void *hton= NULL; + LEX_CSTRING gtid_pos_table_name; + TABLE *tbl= nullptr; + MDL_savepoint m_start_of_statement_svp(thd->mdl_context.mdl_savepoint()); + DBUG_ENTER("record_gtid"); + + *out_hton= NULL; + if (unlikely(!loaded)) + { + /* + Probably the mysql.gtid_slave_pos table is missing (eg. upgrade) or + corrupt. + + We already complained loudly about this, but we can try to continue + until the DBA fixes it. + */ + DBUG_RETURN(0); + } + + if (!in_statement) + thd->reset_for_next_command(); + + if (thd->rgi_slave && (thd->rgi_slave->gtid_ev_flags_extra & + Gtid_log_event::FL_START_ALTER_E1)) + { + /* + store the open table table list in ptr, so that is close_thread_tables + is called start alter tables are not closed + */ + mysql_mutex_lock(&thd->LOCK_thd_data); + tbl= thd->open_tables; + thd->open_tables= nullptr; + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + /* + Only the SQL thread can call select_gtid_pos_table without a mutex + Other threads needs to use a mutex and take into account that the + result may change during execution, so we have to make a copy. + */ + + if ((not_sql_thread= (thd->system_thread != SYSTEM_THREAD_SLAVE_SQL))) + mysql_mutex_lock(&LOCK_slave_state); + select_gtid_pos_table(thd, >id_pos_table_name); + if (not_sql_thread) + { + LEX_CSTRING *tmp= thd->make_clex_string(gtid_pos_table_name.str, + gtid_pos_table_name.length); + mysql_mutex_unlock(&LOCK_slave_state); + if (!tmp) + DBUG_RETURN(1); + gtid_pos_table_name= *tmp; + } + + DBUG_EXECUTE_IF("gtid_inject_record_gtid", + { + my_error(ER_CANNOT_UPDATE_GTID_STATE, MYF(0)); + DBUG_RETURN(1); + } ); + + /* + If we are applying a non-transactional event group, we will be committing + here a transaction, but that does not imply that the event group has + completed or has been binlogged. So we should not trigger + wakeup_subsequent_commits() here. + + Note: An alternative here could be to put a call to mark_start_commit() in + stmt_done() before the call to record_and_update_gtid(). This would + prevent later calling mark_start_commit() after we have run + wakeup_subsequent_commits() from committing the GTID update transaction + (which must be avoided to avoid accessing freed group_commit_orderer + object). It would also allow following event groups to start slightly + earlier. And in the cases where record_gtid() is called without an active + transaction, the current statement should have been binlogged already, so + binlog order is preserved. + + But this is rather subtle, and potentially fragile. And it does not really + seem worth it; non-transactional loads are unlikely to benefit much from + parallel replication in any case. So for now, we go with the simple + suspend/resume of wakeup_subsequent_commits() here in record_gtid(). + */ + suspended_wfc= thd->suspend_subsequent_commits(); + thd->lex->reset_n_backup_query_tables_list(&lex_backup); + tlist.init_one_table(&MYSQL_SCHEMA_NAME, >id_pos_table_name, NULL, TL_WRITE); + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0))) + goto end; + table_opened= true; + table= tlist.table; + hton= table->s->db_type(); + table->file->row_logging= 0; // No binary logging + + if ((err= gtid_check_rpl_slave_state_table(table))) + goto end; + +#ifdef WITH_WSREP + /* + We should replicate local gtid_slave_pos updates to other nodes. + In applier we should not append them to galera writeset. + */ + if (WSREP_ON_ && wsrep_thd_is_local(thd)) + { + thd->wsrep_ignore_table= false; + wsrep_start_trx_if_not_started(thd); + } + else + { + thd->wsrep_ignore_table= true; + } +#endif + + if (!in_transaction) + { + DBUG_PRINT("info", ("resetting OPTION_BEGIN")); + thd->variables.option_bits&= + ~(ulonglong)(OPTION_NOT_AUTOCOMMIT |OPTION_BEGIN |OPTION_BIN_LOG | + OPTION_GTID_BEGIN); + } + else + thd->variables.option_bits&= ~(ulonglong)OPTION_BIN_LOG; + + bitmap_set_all(table->write_set); + table->rpl_write_set= table->write_set; + + table->field[0]->store((ulonglong)gtid->domain_id, true); + table->field[1]->store(sub_id, true); + table->field[2]->store((ulonglong)gtid->server_id, true); + table->field[3]->store(gtid->seq_no, true); + DBUG_EXECUTE_IF("inject_crash_before_write_rpl_slave_state", DBUG_SUICIDE();); + if ((err= table->file->ha_write_row(table->record[0]))) + { + table->file->print_error(err, MYF(0)); + goto end; + } + *out_hton= hton; + + if(opt_bin_log && + (err= mysql_bin_log.bump_seq_no_counter_if_needed(gtid->domain_id, + gtid->seq_no))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto end; + } +end: + +#ifdef WITH_WSREP + thd->wsrep_ignore_table= false; +#endif + + if (table_opened) + { + if (err || (err= ha_commit_trans(thd, FALSE))) + ha_rollback_trans(thd, FALSE); + close_thread_tables(thd); + if (!thd->rgi_slave || !(thd->rgi_slave->gtid_ev_flags_extra & + Gtid_log_event::FL_START_ALTER_E1)) + { + if (in_transaction) + thd->mdl_context.release_statement_locks(); + else + thd->release_transactional_locks(); + } + } + if (thd->rgi_slave && + thd->rgi_slave->gtid_ev_flags_extra & Gtid_log_event::FL_START_ALTER_E1) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->open_tables= tbl; + mysql_mutex_unlock(&thd->LOCK_thd_data); + thd->mdl_context.rollback_to_savepoint(m_start_of_statement_svp); + } + thd->lex->restore_backup_query_tables_list(&lex_backup); + thd->variables.option_bits= thd_saved_option; + thd->resume_subsequent_commits(suspended_wfc); + DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep", + { + if (gtid->server_id == 100) + my_sleep(500000); + }); + DBUG_RETURN(err); +} + + +/* + Return a list of all old GTIDs in any mysql.gtid_slave_pos* table that are + no longer needed and can be deleted from the table. + + Within each domain, we need to keep around the latest GTID (the one with the + highest sub_id), but any others in that domain can be deleted. +*/ +rpl_slave_state::list_element * +rpl_slave_state::gtid_grab_pending_delete_list() +{ + uint32 i; + list_element *full_list; + + mysql_mutex_lock(&LOCK_slave_state); + full_list= NULL; + for (i= 0; i < hash.records; ++i) + { + element *elem= (element *)my_hash_element(&hash, i); + list_element *elist= elem->list; + list_element *last_elem, **best_ptr_ptr, *cur, *next; + uint64 best_sub_id; + + if (!elist) + continue; /* Nothing here */ + + /* Delete any old stuff, but keep around the most recent one. */ + cur= elist; + best_sub_id= cur->sub_id; + best_ptr_ptr= &elist; + last_elem= cur; + while ((next= cur->next)) { + last_elem= next; + if (next->sub_id > best_sub_id) + { + best_sub_id= next->sub_id; + best_ptr_ptr= &cur->next; + } + cur= next; + } + /* + Append the new elements to the full list. Note the order is important; + we do it here so that we do not break the list if best_sub_id is the + last of the new elements. + */ + last_elem->next= full_list; + /* + Delete the highest sub_id element from the old list, and put it back as + the single-element new list. + */ + cur= *best_ptr_ptr; + *best_ptr_ptr= cur->next; + cur->next= NULL; + elem->list= cur; + + /* + Collect the full list so far here. Note that elist may have moved if we + deleted the first element, so order is again important. + */ + full_list= elist; + } + mysql_mutex_unlock(&LOCK_slave_state); + + return full_list; +} + + +/* Find the mysql.gtid_slave_posXXX table associated with a given hton. */ +LEX_CSTRING * +rpl_slave_state::select_gtid_pos_table(void *hton) +{ + /* + See comments on rpl_slave_state::gtid_pos_tables for rules around proper + access to the list. + */ + auto table_entry= gtid_pos_tables.load(std::memory_order_acquire); + + while (table_entry) + { + if (table_entry->table_hton == hton) + { + if (likely(table_entry->state == GTID_POS_AVAILABLE)) + return &table_entry->table_name; + } + table_entry= table_entry->next; + } + + return &default_gtid_pos_table.load(std::memory_order_acquire)->table_name; +} + + +void +rpl_slave_state::gtid_delete_pending(THD *thd, + rpl_slave_state::list_element **list_ptr) +{ + int err= 0; + ulonglong thd_saved_option; + + if (unlikely(!loaded)) + return; + +#ifdef WITH_WSREP + /* + We should replicate local gtid_slave_pos updates to other nodes. + In applier we should not append them to galera writeset. + */ + if (WSREP_ON_ && wsrep_thd_is_local(thd) && + thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + if (thd->wsrep_trx().active() == false) + { + if (thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + thd->set_query_id(next_query_id()); + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + thd->wsrep_ignore_table= false; + } + thd->wsrep_ignore_table= true; +#endif + + thd_saved_option= thd->variables.option_bits; + thd->variables.option_bits&= + ~(ulonglong)(OPTION_NOT_AUTOCOMMIT |OPTION_BEGIN |OPTION_BIN_LOG | + OPTION_GTID_BEGIN); + + while (*list_ptr) + { + LEX_CSTRING *gtid_pos_table_name, *tmp_table_name; + Query_tables_list lex_backup; + TABLE_LIST tlist; + TABLE *table; + handler::Table_flags direct_pos= 0; + list_element *cur, **cur_ptr_ptr; + bool table_opened= false; + bool index_inited= false; + void *hton= (*list_ptr)->hton; + + thd->reset_for_next_command(); + + /* + Only the SQL thread can call select_gtid_pos_table without a mutex + Other threads needs to use a mutex and take into account that the + result may change during execution, so we have to make a copy. + */ + mysql_mutex_lock(&LOCK_slave_state); + tmp_table_name= select_gtid_pos_table(hton); + gtid_pos_table_name= thd->make_clex_string(tmp_table_name->str, + tmp_table_name->length); + mysql_mutex_unlock(&LOCK_slave_state); + if (!gtid_pos_table_name) + { + /* Out of memory - we can try again later. */ + break; + } + + thd->lex->reset_n_backup_query_tables_list(&lex_backup); + tlist.init_one_table(&MYSQL_SCHEMA_NAME, gtid_pos_table_name, NULL, TL_WRITE); + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0))) + goto end; + table_opened= true; + table= tlist.table; + + if ((err= gtid_check_rpl_slave_state_table(table))) + goto end; + + direct_pos= table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION; + bitmap_set_all(table->write_set); + table->rpl_write_set= table->write_set; + + /* Now delete any already committed GTIDs. */ + bitmap_set_bit(table->read_set, table->field[0]->field_index); + bitmap_set_bit(table->read_set, table->field[1]->field_index); + + if (!direct_pos) + { + if ((err= table->file->ha_index_init(0, 0))) + { + table->file->print_error(err, MYF(0)); + goto end; + } + index_inited= true; + } + + cur = *list_ptr; + cur_ptr_ptr = list_ptr; + do + { + uchar key_buffer[4+8]; + list_element *next= cur->next; + + if (cur->hton == hton) + { + int res; + + table->field[0]->store((ulonglong)cur->domain_id, true); + table->field[1]->store(cur->sub_id, true); + if (direct_pos) + { + res= table->file->ha_rnd_pos_by_record(table->record[0]); + } + else + { + key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false); + res= table->file->ha_index_read_map(table->record[0], key_buffer, + HA_WHOLE_KEY, HA_READ_KEY_EXACT); + } + DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete", + { res= 1; + err= ENOENT; + sql_print_error(" Error deleting old GTID row"); + }); + if (res) + /* We cannot find the row, assume it is already deleted. */ + ; + else if ((err= table->file->ha_delete_row(table->record[0]))) + { + sql_print_error("Error deleting old GTID row: %s", + thd->get_stmt_da()->message()); + /* + In case of error, we still discard the element from the list. We do + not want to endlessly error on the same element in case of table + corruption or such. + */ + } + *cur_ptr_ptr= next; + my_free(cur); + } + else + { + /* Leave this one in the list until we get to the table for its hton. */ + cur_ptr_ptr= &cur->next; + } + cur= next; + if (err) + break; + } while (cur); +end: + if (table_opened) + { + DBUG_ASSERT(direct_pos || index_inited || err); + /* + Index may not be initialized if there was a failure during + 'ha_index_init'. Hence check if index initialization is successful and + then invoke ha_index_end(). Ending an index which is not initialized + will lead to assert. + */ + if (index_inited) + table->file->ha_index_end(); + + if (err || (err= ha_commit_trans(thd, FALSE))) + ha_rollback_trans(thd, FALSE); + } + close_thread_tables(thd); + thd->release_transactional_locks(); + thd->lex->restore_backup_query_tables_list(&lex_backup); + + if (err) + break; + } + thd->variables.option_bits= thd_saved_option; + +#ifdef WITH_WSREP + thd->wsrep_ignore_table= false; +#endif +} + + +uint64 +rpl_slave_state::next_sub_id(uint32 domain_id) +{ + uint64 sub_id= 0; + + mysql_mutex_lock(&LOCK_slave_state); + sub_id= ++last_sub_id; + mysql_mutex_unlock(&LOCK_slave_state); + + return sub_id; +} + +/* A callback used in sorting of gtid list based on domain_id. */ +static int rpl_gtid_cmp_cb(const void *id1, const void *id2) +{ + uint32 d1= ((rpl_gtid *)id1)->domain_id; + uint32 d2= ((rpl_gtid *)id2)->domain_id; + + if (d1 < d2) + return -1; + else if (d1 > d2) + return 1; + return 0; +} + +/* Format the specified gtid and store it in the given string buffer. */ +bool +rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid, bool *first) +{ + if (*first) + *first= false; + else + if (dest->append(',')) + return true; + return + dest->append_ulonglong(gtid->domain_id) || + dest->append('-') || + dest->append_ulonglong(gtid->server_id) || + dest->append('-') || + dest->append_ulonglong(gtid->seq_no); +} + +/* + Sort the given gtid list based on domain_id and store them in the specified + string. +*/ +static bool +rpl_slave_state_tostring_helper(DYNAMIC_ARRAY *gtid_dynarr, String *str) +{ + bool first= true, res= true; + + sort_dynamic(gtid_dynarr, rpl_gtid_cmp_cb); + + for (uint i= 0; i < gtid_dynarr->elements; i ++) + { + rpl_gtid *gtid= dynamic_element(gtid_dynarr, i, rpl_gtid *); + if (rpl_slave_state_tostring_helper(str, gtid, &first)) + goto err; + } + res= false; + +err: + return res; +} + + +/* Sort the given gtid list based on domain_id and call cb for each gtid. */ +static bool +rpl_slave_state_tostring_helper(DYNAMIC_ARRAY *gtid_dynarr, + int (*cb)(rpl_gtid *, void *), + void *data) +{ + rpl_gtid *gtid; + bool res= true; + + sort_dynamic(gtid_dynarr, rpl_gtid_cmp_cb); + + for (uint i= 0; i < gtid_dynarr->elements; i ++) + { + gtid= dynamic_element(gtid_dynarr, i, rpl_gtid *); + if ((*cb)(gtid, data)) + goto err; + } + res= false; + +err: + return res; +} + +int +rpl_slave_state::iterate(int (*cb)(rpl_gtid *, void *), void *data, + rpl_gtid *extra_gtids, uint32 num_extra, + bool sort) +{ + uint32 i; + HASH gtid_hash; + uchar *rec; + rpl_gtid *gtid; + int res= 1; + bool locked= false; + + my_hash_init(PSI_INSTRUMENT_ME, >id_hash, &my_charset_bin, 32, + offsetof(rpl_gtid, domain_id), sizeof(rpl_gtid::domain_id), + NULL, NULL, HASH_UNIQUE); + for (i= 0; i < num_extra; ++i) + if (extra_gtids[i].server_id == global_system_variables.server_id && + my_hash_insert(>id_hash, (uchar *)(&extra_gtids[i]))) + goto err; + + mysql_mutex_lock(&LOCK_slave_state); + locked= true; + reset_dynamic(>id_sort_array); + + for (i= 0; i < hash.records; ++i) + { + uint64 best_sub_id; + rpl_gtid best_gtid; + element *e= (element *)my_hash_element(&hash, i); + list_element *l= e->list; + + if (!l) + continue; /* Nothing here */ + + best_gtid.domain_id= e->domain_id; + best_gtid.server_id= l->server_id; + best_gtid.seq_no= l->seq_no; + best_sub_id= l->sub_id; + while ((l= l->next)) + { + if (l->sub_id > best_sub_id) + { + best_sub_id= l->sub_id; + best_gtid.server_id= l->server_id; + best_gtid.seq_no= l->seq_no; + } + } + + /* Check if we have something newer in the extra list. */ + rec= my_hash_search(>id_hash, (const uchar *)&best_gtid.domain_id, + sizeof(best_gtid.domain_id)); + if (rec) + { + gtid= (rpl_gtid *)rec; + if (gtid->seq_no > best_gtid.seq_no) + memcpy(&best_gtid, gtid, sizeof(best_gtid)); + if (my_hash_delete(>id_hash, rec)) + { + goto err; + } + } + + if ((res= sort ? insert_dynamic(>id_sort_array, + (const void *) &best_gtid) : + (*cb)(&best_gtid, data))) + { + goto err; + } + } + + /* Also add any remaining extra domain_ids. */ + for (i= 0; i < gtid_hash.records; ++i) + { + gtid= (rpl_gtid *)my_hash_element(>id_hash, i); + if ((res= sort ? insert_dynamic(>id_sort_array, (const void *) gtid) : + (*cb)(gtid, data))) + { + goto err; + } + } + + if (sort && rpl_slave_state_tostring_helper(>id_sort_array, cb, data)) + { + goto err; + } + + res= 0; + +err: + if (locked) mysql_mutex_unlock(&LOCK_slave_state); + my_hash_free(>id_hash); + + return res; +} + + +struct rpl_slave_state_tostring_data { + String *dest; + bool first; +}; +static int +rpl_slave_state_tostring_cb(rpl_gtid *gtid, void *data) +{ + rpl_slave_state_tostring_data *p= (rpl_slave_state_tostring_data *)data; + return rpl_slave_state_tostring_helper(p->dest, gtid, &p->first); +} + + +/* + Prepare the current slave state as a string, suitable for sending to the + master to request to receive binlog events starting from that GTID state. + + The state consists of the most recently applied GTID for each domain_id, + ie. the one with the highest sub_id within each domain_id. + + Optinally, extra_gtids is a list of GTIDs from the binlog. This is used when + a server was previously a master and now needs to connect to a new master as + a slave. For each domain_id, if the GTID in the binlog was logged with our + own server_id _and_ has a higher seq_no than what is in the slave state, + then this should be used as the position to start replicating at. This + allows to promote a slave as new master, and connect the old master as a + slave with MASTER_GTID_POS=AUTO. +*/ +int +rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra) +{ + struct rpl_slave_state_tostring_data data; + data.first= true; + data.dest= dest; + + return iterate(rpl_slave_state_tostring_cb, &data, extra_gtids, + num_extra, true); +} + + +/* + Lookup a domain_id in the current replication slave state. + + Returns false if the domain_id has no entries in the slave state. + Otherwise returns true, and fills in out_gtid with the corresponding + GTID. +*/ +bool +rpl_slave_state::domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid) +{ + element *elem; + list_element *list; + uint64 best_sub_id; + + mysql_mutex_lock(&LOCK_slave_state); + elem= (element *)my_hash_search(&hash, (const uchar *)&domain_id, + sizeof(domain_id)); + if (!elem || !(list= elem->list)) + { + mysql_mutex_unlock(&LOCK_slave_state); + return false; + } + + out_gtid->domain_id= domain_id; + out_gtid->server_id= list->server_id; + out_gtid->seq_no= list->seq_no; + best_sub_id= list->sub_id; + + while ((list= list->next)) + { + if (best_sub_id > list->sub_id) + continue; + best_sub_id= list->sub_id; + out_gtid->server_id= list->server_id; + out_gtid->seq_no= list->seq_no; + } + + mysql_mutex_unlock(&LOCK_slave_state); + return true; +} + +#endif + +/* + Parse a GTID at the start of a string, and update the pointer to point + at the first character after the parsed GTID. + + Returns 0 on ok, non-zero on parse error. +*/ +static int +gtid_parser_helper(const char **ptr, const char *end, rpl_gtid *out_gtid) +{ + char *q; + const char *p= *ptr; + uint64 v1, v2, v3; + int err= 0; + + q= (char*) end; + v1= (uint64)my_strtoll10(p, &q, &err); + if (err != 0 || v1 > (uint32)0xffffffff || q == end || *q != '-') + return 1; + p= q+1; + q= (char*) end; + v2= (uint64)my_strtoll10(p, &q, &err); + if (err != 0 || v2 > (uint32)0xffffffff || q == end || *q != '-') + return 1; + p= q+1; + q= (char*) end; + v3= (uint64)my_strtoll10(p, &q, &err); + if (err != 0) + return 1; + + out_gtid->domain_id= (uint32) v1; + out_gtid->server_id= (uint32) v2; + out_gtid->seq_no= v3; + *ptr= q; + return 0; +} + +rpl_gtid * +gtid_parse_string_to_list(const char *str, size_t str_len, uint32 *out_len) +{ + const char *p= const_cast(str); + const char *end= p + str_len; + uint32 len= 0, alloc_len= 5; + rpl_gtid *list= NULL; + + for (;;) + { + rpl_gtid gtid; + + if (len >= (((uint32)1 << 28)-1) || gtid_parser_helper(&p, end, >id)) + { + my_free(list); + return NULL; + } + if ((!list || len >= alloc_len) && + !(list= + (rpl_gtid *)my_realloc(PSI_INSTRUMENT_ME, list, + (alloc_len= alloc_len*2) * sizeof(rpl_gtid), + MYF(MY_FREE_ON_ERROR|MY_ALLOW_ZERO_PTR)))) + return NULL; + list[len++]= gtid; + + if (p == end) + break; + if (*p != ',') + { + my_free(list); + return NULL; + } + ++p; + } + *out_len= len; + return list; +} + +#ifndef MYSQL_CLIENT + +/* + Update the slave replication state with the GTID position obtained from + master when connecting with old-style (filename,offset) position. + + If RESET is true then all existing entries are removed. Otherwise only + domain_ids mentioned in the STATE_FROM_MASTER are changed. + + Returns 0 if ok, non-zero if error. +*/ +int +rpl_slave_state::load(THD *thd, const char *state_from_master, size_t len, + bool reset, bool in_statement) +{ + const char *end= state_from_master + len; + + mysql_mutex_assert_not_owner(&LOCK_slave_state); + if (reset) + { + if (truncate_state_table(thd)) + return 1; + truncate_hash(); + } + if (state_from_master == end) + return 0; + for (;;) + { + rpl_gtid gtid; + uint64 sub_id; + void *hton= NULL; + + if (gtid_parser_helper(&state_from_master, end, >id) || + !(sub_id= next_sub_id(gtid.domain_id)) || + record_gtid(thd, >id, sub_id, false, in_statement, &hton) || + update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, hton, NULL)) + return 1; + if (state_from_master == end) + break; + if (*state_from_master != ',') + return 1; + ++state_from_master; + } + return 0; +} + + +bool +rpl_slave_state::is_empty() +{ + uint32 i; + bool result= true; + + mysql_mutex_lock(&LOCK_slave_state); + for (i= 0; i < hash.records; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + if (e->list) + { + result= false; + break; + } + } + mysql_mutex_unlock(&LOCK_slave_state); + + return result; +} + + +void +rpl_slave_state::free_gtid_pos_tables(struct rpl_slave_state::gtid_pos_table *list) +{ + struct gtid_pos_table *cur, *next; + + cur= list; + while (cur) + { + next= cur->next; + my_free(cur); + cur= next; + } +} + + +/* + Replace the list of available mysql.gtid_slave_posXXX tables with a new list. + The caller must be holding LOCK_slave_state. Additionally, this function + must only be called while all SQL threads are stopped. +*/ +void +rpl_slave_state::set_gtid_pos_tables_list(rpl_slave_state::gtid_pos_table *new_list, + rpl_slave_state::gtid_pos_table *default_entry) +{ + mysql_mutex_assert_owner(&LOCK_slave_state); + auto old_list= gtid_pos_tables.load(std::memory_order_relaxed); + gtid_pos_tables.store(new_list, std::memory_order_release); + default_gtid_pos_table.store(default_entry, std::memory_order_release); + free_gtid_pos_tables(old_list); +} + + +void +rpl_slave_state::add_gtid_pos_table(rpl_slave_state::gtid_pos_table *entry) +{ + mysql_mutex_assert_owner(&LOCK_slave_state); + entry->next= gtid_pos_tables.load(std::memory_order_relaxed); + gtid_pos_tables.store(entry, std::memory_order_release); +} + + +struct rpl_slave_state::gtid_pos_table * +rpl_slave_state::alloc_gtid_pos_table(LEX_CSTRING *table_name, void *hton, + rpl_slave_state::gtid_pos_table_state state) +{ + struct gtid_pos_table *p; + char *allocated_str; + + if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), &p, sizeof(*p), + &allocated_str, table_name->length+1, NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)(sizeof(*p) + table_name->length+1)); + return NULL; + } + memcpy(allocated_str, table_name->str, table_name->length+1); // Also copy '\0' + p->next = NULL; + p->table_hton= hton; + p->table_name.str= allocated_str; + p->table_name.length= table_name->length; + p->state= state; + return p; +} + + +void rpl_binlog_state::init() +{ + my_hash_init(PSI_INSTRUMENT_ME, &hash, &my_charset_bin, 32, + offsetof(element, domain_id), sizeof(element::domain_id), + NULL, my_free, HASH_UNIQUE); + my_init_dynamic_array(PSI_INSTRUMENT_ME, >id_sort_array, sizeof(rpl_gtid), 8, 8, MYF(0)); + mysql_mutex_init(key_LOCK_binlog_state, &LOCK_binlog_state, + MY_MUTEX_INIT_SLOW); + initialized= 1; +} + +void +rpl_binlog_state::reset_nolock() +{ + uint32 i; + + for (i= 0; i < hash.records; ++i) + my_hash_free(&((element *)my_hash_element(&hash, i))->hash); + my_hash_reset(&hash); +} + + +void +rpl_binlog_state::reset() +{ + mysql_mutex_lock(&LOCK_binlog_state); + reset_nolock(); + mysql_mutex_unlock(&LOCK_binlog_state); +} + + +void rpl_binlog_state::free() +{ + if (initialized) + { + initialized= 0; + reset_nolock(); + my_hash_free(&hash); + delete_dynamic(>id_sort_array); + mysql_mutex_destroy(&LOCK_binlog_state); + } +} + + +bool +rpl_binlog_state::load(struct rpl_gtid *list, uint32 count) +{ + uint32 i; + bool res= false; + + mysql_mutex_lock(&LOCK_binlog_state); + reset_nolock(); + for (i= 0; i < count; ++i) + { + if (update_nolock(&(list[i]), false)) + { + res= true; + break; + } + } + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +static int rpl_binlog_state_load_cb(rpl_gtid *gtid, void *data) +{ + rpl_binlog_state *self= (rpl_binlog_state *)data; + return self->update_nolock(gtid, false); +} + + +bool +rpl_binlog_state::load(rpl_slave_state *slave_pos) +{ + bool res= false; + + mysql_mutex_lock(&LOCK_binlog_state); + reset_nolock(); + if (slave_pos->iterate(rpl_binlog_state_load_cb, this, NULL, 0, false)) + res= true; + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +rpl_binlog_state::~rpl_binlog_state() +{ + free(); +} + + +/* + Update replication state with a new GTID. + + If the (domain_id, server_id) pair already exists, then the new GTID replaces + the old one for that domain id. Else a new entry is inserted. + + Returns 0 for ok, 1 for error. +*/ +int +rpl_binlog_state::update_nolock(const struct rpl_gtid *gtid, bool strict) +{ + element *elem; + + if ((elem= (element *)my_hash_search(&hash, + (const uchar *)(>id->domain_id), + sizeof(gtid->domain_id)))) + { + if (strict && elem->last_gtid && elem->last_gtid->seq_no >= gtid->seq_no) + { + my_error(ER_GTID_STRICT_OUT_OF_ORDER, MYF(0), gtid->domain_id, + gtid->server_id, gtid->seq_no, elem->last_gtid->domain_id, + elem->last_gtid->server_id, elem->last_gtid->seq_no); + return 1; + } + if (elem->seq_no_counter < gtid->seq_no) + elem->seq_no_counter= gtid->seq_no; + if (!elem->update_element(gtid)) + return 0; + } + else if (!alloc_element_nolock(gtid)) + return 0; + + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return 1; +} + + +int +rpl_binlog_state::update(const struct rpl_gtid *gtid, bool strict) +{ + int res; + mysql_mutex_lock(&LOCK_binlog_state); + res= update_nolock(gtid, strict); + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +/* + Fill in a new GTID, allocating next sequence number, and update state + accordingly. +*/ +int +rpl_binlog_state::update_with_next_gtid(uint32 domain_id, uint32 server_id, + rpl_gtid *gtid) +{ + element *elem; + int res= 0; + + gtid->domain_id= domain_id; + gtid->server_id= server_id; + + mysql_mutex_lock(&LOCK_binlog_state); + if ((elem= (element *)my_hash_search(&hash, (const uchar *)(&domain_id), + sizeof(domain_id)))) + { + gtid->seq_no= ++elem->seq_no_counter; + if (!elem->update_element(gtid)) + goto end; + } + else + { + gtid->seq_no= 1; + if (!alloc_element_nolock(gtid)) + goto end; + } + + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + res= 1; +end: + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +/* Helper functions for update. */ +int +rpl_binlog_state::element::update_element(const rpl_gtid *gtid) +{ + rpl_gtid *lookup_gtid; + + /* + By far the most common case is that successive events within same + replication domain have the same server id (it changes only when + switching to a new master). So save a hash lookup in this case. + */ + if (likely(last_gtid && last_gtid->server_id == gtid->server_id)) + { + last_gtid->seq_no= gtid->seq_no; + return 0; + } + + lookup_gtid= (rpl_gtid *) + my_hash_search(&hash, (const uchar *)>id->server_id, + sizeof(gtid->server_id)); + if (lookup_gtid) + { + lookup_gtid->seq_no= gtid->seq_no; + last_gtid= lookup_gtid; + return 0; + } + + /* Allocate a new GTID and insert it. */ + lookup_gtid= (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*lookup_gtid), + MYF(MY_WME)); + if (!lookup_gtid) + return 1; + memcpy(lookup_gtid, gtid, sizeof(*lookup_gtid)); + if (my_hash_insert(&hash, (const uchar *)lookup_gtid)) + { + my_free(lookup_gtid); + return 1; + } + last_gtid= lookup_gtid; + return 0; +} + + +int +rpl_binlog_state::alloc_element_nolock(const rpl_gtid *gtid) +{ + element *elem; + rpl_gtid *lookup_gtid; + + /* First time we see this domain_id; allocate a new element. */ + elem= (element *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*elem), MYF(MY_WME)); + lookup_gtid= (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*lookup_gtid), + MYF(MY_WME)); + if (elem && lookup_gtid) + { + elem->domain_id= gtid->domain_id; + my_hash_init(PSI_INSTRUMENT_ME, &elem->hash, &my_charset_bin, 32, + offsetof(rpl_gtid, server_id), sizeof(rpl_gtid::domain_id), + NULL, my_free, HASH_UNIQUE); + elem->last_gtid= lookup_gtid; + elem->seq_no_counter= gtid->seq_no; + memcpy(lookup_gtid, gtid, sizeof(*lookup_gtid)); + if (0 == my_hash_insert(&elem->hash, (const uchar *)lookup_gtid)) + { + lookup_gtid= NULL; /* Do not free. */ + if (0 == my_hash_insert(&hash, (const uchar *)elem)) + return 0; + } + my_hash_free(&elem->hash); + } + + /* An error. */ + if (elem) + my_free(elem); + if (lookup_gtid) + my_free(lookup_gtid); + return 1; +} + + +/* + Check that a new GTID can be logged without creating an out-of-order + sequence number with existing GTIDs. +*/ +bool +rpl_binlog_state::check_strict_sequence(uint32 domain_id, uint32 server_id, + uint64 seq_no, bool no_error) +{ + element *elem; + bool res= 0; + + mysql_mutex_lock(&LOCK_binlog_state); + if ((elem= (element *)my_hash_search(&hash, + (const uchar *)(&domain_id), + sizeof(domain_id))) && + elem->last_gtid && elem->last_gtid->seq_no >= seq_no) + { + if (!no_error) + my_error(ER_GTID_STRICT_OUT_OF_ORDER, MYF(0), domain_id, server_id, seq_no, + elem->last_gtid->domain_id, elem->last_gtid->server_id, + elem->last_gtid->seq_no); + res= 1; + } + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +/* + When we see a new GTID that will not be binlogged (eg. slave thread + with --log-slave-updates=0), then we need to remember to allocate any + GTID seq_no of our own within that domain starting from there. + + Returns 0 if ok, non-zero if out-of-memory. +*/ +int +rpl_binlog_state::bump_seq_no_if_needed(uint32 domain_id, uint64 seq_no) +{ + element *elem; + int res; + + mysql_mutex_lock(&LOCK_binlog_state); + if ((elem= (element *)my_hash_search(&hash, (const uchar *)(&domain_id), + sizeof(domain_id)))) + { + if (elem->seq_no_counter < seq_no) + elem->seq_no_counter= seq_no; + res= 0; + goto end; + } + + /* We need to allocate a new, empty element to remember the next seq_no. */ + if (!(elem= (element *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*elem), + MYF(MY_WME)))) + { + res= 1; + goto end; + } + + elem->domain_id= domain_id; + my_hash_init(PSI_INSTRUMENT_ME, &elem->hash, &my_charset_bin, 32, + offsetof(rpl_gtid, server_id), sizeof(rpl_gtid::server_id), + NULL, my_free, HASH_UNIQUE); + elem->last_gtid= NULL; + elem->seq_no_counter= seq_no; + if (0 == my_hash_insert(&hash, (const uchar *)elem)) + { + res= 0; + goto end; + } + + my_hash_free(&elem->hash); + my_free(elem); + res= 1; + +end: + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +/* + Write binlog state to text file, so we can read it in again without having + to scan last binlog file (normal shutdown/startup, not crash recovery). + + The most recent GTID within each domain_id is written after any other GTID + within this domain. +*/ +int +rpl_binlog_state::write_to_iocache(IO_CACHE *dest) +{ + ulong i, j; + char buf[21]; + int res= 0; + + mysql_mutex_lock(&LOCK_binlog_state); + for (i= 0; i < hash.records; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + if (!e->last_gtid) + { + DBUG_ASSERT(e->hash.records == 0); + continue; + } + for (j= 0; j <= e->hash.records; ++j) + { + const rpl_gtid *gtid; + if (j < e->hash.records) + { + gtid= (const rpl_gtid *)my_hash_element(&e->hash, j); + if (gtid == e->last_gtid) + continue; + } + else + gtid= e->last_gtid; + + longlong10_to_str(gtid->seq_no, buf, 10); + if (my_b_printf(dest, "%u-%u-%s\n", gtid->domain_id, gtid->server_id, + buf)) + { + res= 1; + goto end; + } + } + } + +end: + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +int +rpl_binlog_state::read_from_iocache(IO_CACHE *src) +{ + /* 10-digit - 10-digit - 20-digit \n \0 */ + char buf[10+1+10+1+20+1+1]; + const char *p, *end; + rpl_gtid gtid; + int res= 0; + + mysql_mutex_lock(&LOCK_binlog_state); + reset_nolock(); + for (;;) + { + size_t len= my_b_gets(src, buf, sizeof(buf)); + if (!len) + break; + p= buf; + end= buf + len; + if (gtid_parser_helper(&p, end, >id) || + update_nolock(>id, false)) + { + res= 1; + break; + } + } + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +rpl_gtid * +rpl_binlog_state::find_nolock(uint32 domain_id, uint32 server_id) +{ + element *elem; + if (!(elem= (element *)my_hash_search(&hash, (const uchar *)&domain_id, + sizeof(domain_id)))) + return NULL; + return (rpl_gtid *)my_hash_search(&elem->hash, (const uchar *)&server_id, + sizeof(server_id)); +} + +rpl_gtid * +rpl_binlog_state::find(uint32 domain_id, uint32 server_id) +{ + rpl_gtid *p; + mysql_mutex_lock(&LOCK_binlog_state); + p= find_nolock(domain_id, server_id); + mysql_mutex_unlock(&LOCK_binlog_state); + return p; +} + +rpl_gtid * +rpl_binlog_state::find_most_recent(uint32 domain_id) +{ + element *elem; + rpl_gtid *gtid= NULL; + + mysql_mutex_lock(&LOCK_binlog_state); + elem= (element *)my_hash_search(&hash, (const uchar *)&domain_id, + sizeof(domain_id)); + if (elem && elem->last_gtid) + gtid= elem->last_gtid; + mysql_mutex_unlock(&LOCK_binlog_state); + + return gtid; +} + + +uint32 +rpl_binlog_state::count() +{ + uint32 c= 0; + uint32 i; + + mysql_mutex_lock(&LOCK_binlog_state); + for (i= 0; i < hash.records; ++i) + c+= ((element *)my_hash_element(&hash, i))->hash.records; + mysql_mutex_unlock(&LOCK_binlog_state); + + return c; +} + + +int +rpl_binlog_state::get_gtid_list(rpl_gtid *gtid_list, uint32 list_size) +{ + uint32 i, j, pos; + int res= 0; + + mysql_mutex_lock(&LOCK_binlog_state); + pos= 0; + for (i= 0; i < hash.records; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + if (!e->last_gtid) + { + DBUG_ASSERT(e->hash.records==0); + continue; + } + for (j= 0; j <= e->hash.records; ++j) + { + const rpl_gtid *gtid; + if (j < e->hash.records) + { + gtid= (rpl_gtid *)my_hash_element(&e->hash, j); + if (gtid == e->last_gtid) + continue; + } + else + gtid= e->last_gtid; + + if (pos >= list_size) + { + res= 1; + goto end; + } + memcpy(>id_list[pos++], gtid, sizeof(*gtid)); + } + } + +end: + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + + +/* + Get a list of the most recently binlogged GTID, for each domain_id. + + This can be used when switching from being a master to being a slave, + to know where to start replicating from the new master. + + The returned list must be de-allocated with my_free(). + + Returns 0 for ok, non-zero for out-of-memory. +*/ +int +rpl_binlog_state::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size) +{ + uint32 i; + uint32 alloc_size, out_size; + int res= 0; + + out_size= 0; + mysql_mutex_lock(&LOCK_binlog_state); + alloc_size= hash.records; + if (!(*list= (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, + alloc_size * sizeof(rpl_gtid), MYF(MY_WME)))) + { + res= 1; + goto end; + } + for (i= 0; i < alloc_size; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + if (!e->last_gtid) + continue; + memcpy(&((*list)[out_size++]), e->last_gtid, sizeof(rpl_gtid)); + } + +end: + mysql_mutex_unlock(&LOCK_binlog_state); + *size= out_size; + return res; +} + +bool +rpl_binlog_state::append_pos(String *str) +{ + uint32 i; + + mysql_mutex_lock(&LOCK_binlog_state); + reset_dynamic(>id_sort_array); + + for (i= 0; i < hash.records; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + if (e->last_gtid && + insert_dynamic(>id_sort_array, (const void *) e->last_gtid)) + { + mysql_mutex_unlock(&LOCK_binlog_state); + return true; + } + } + rpl_slave_state_tostring_helper(>id_sort_array, str); + mysql_mutex_unlock(&LOCK_binlog_state); + + return false; +} + + +bool +rpl_binlog_state::append_state(String *str) +{ + uint32 i, j; + bool res= false; + + mysql_mutex_lock(&LOCK_binlog_state); + reset_dynamic(>id_sort_array); + + for (i= 0; i < hash.records; ++i) + { + element *e= (element *)my_hash_element(&hash, i); + if (!e->last_gtid) + { + DBUG_ASSERT(e->hash.records==0); + continue; + } + for (j= 0; j <= e->hash.records; ++j) + { + const rpl_gtid *gtid; + if (j < e->hash.records) + { + gtid= (rpl_gtid *)my_hash_element(&e->hash, j); + if (gtid == e->last_gtid) + continue; + } + else + gtid= e->last_gtid; + + if (insert_dynamic(>id_sort_array, (const void *) gtid)) + { + res= true; + goto end; + } + } + } + + rpl_slave_state_tostring_helper(>id_sort_array, str); + +end: + mysql_mutex_unlock(&LOCK_binlog_state); + return res; +} + +/** + Remove domains supplied by the first argument from binlog state. + Removal is done for any domain whose last gtids (from all its servers) match + ones in Gtid list event of the 2nd argument. + + @param ids gtid domain id sequence, may contain dups + @param glev pointer to Gtid list event describing + the match condition + @param errbuf [out] pointer to possible error message array + + @retval NULL as success when at least one domain is removed + @retval "" empty string to indicate ineffective call + when no domains removed + @retval NOT EMPTY string otherwise an error message +*/ +const char* +rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids, + Gtid_list_log_event *glev, + char* errbuf) +{ + DYNAMIC_ARRAY domain_unique; // sequece (unsorted) of unique element*:s + rpl_binlog_state::element* domain_unique_buffer[16]; + ulong k, l; + const char* errmsg= NULL; + + DBUG_ENTER("rpl_binlog_state::drop_domain"); + + my_init_dynamic_array2(PSI_INSTRUMENT_ME, &domain_unique, + sizeof(element*), domain_unique_buffer, + sizeof(domain_unique_buffer) / sizeof(element*), 4, 0); + + mysql_mutex_lock(&LOCK_binlog_state); + + /* + Gtid list is supposed to come from a binlog's Gtid_list event and + therefore should be a subset of the current binlog state. That is + for every domain in the list the binlog state contains a gtid with + sequence number not less than that of the list. + Exceptions of this inclusion rule are: + A. the list may still refer to gtids from already deleted domains. + Files containing them must have been purged whereas the file + with the list is not yet. + B. out of order groups were injected + C. manually build list of binlog files violating the inclusion + constraint. + While A is a normal case (not necessarily distinguishable from C though), + B and C may require the user's attention so any (incl the A's suspected) + inconsistency is diagnosed and *warned*. + */ + for (l= 0, errbuf[0]= 0; l < glev->count; l++, errbuf[0]= 0) + { + rpl_gtid* rb_state_gtid= find_nolock(glev->list[l].domain_id, + glev->list[l].server_id); + if (!rb_state_gtid) + sprintf(errbuf, + "missing gtids from the '%u-%u' domain-server pair which is " + "referred to in the gtid list describing an earlier state. Ignore " + "if the domain ('%u') was already explicitly deleted", + glev->list[l].domain_id, glev->list[l].server_id, + glev->list[l].domain_id); + else if (rb_state_gtid->seq_no < glev->list[l].seq_no) + sprintf(errbuf, + "having a gtid '%u-%u-%llu' which is less than " + "the '%u-%u-%llu' of the gtid list describing an earlier state. " + "The state may have been affected by manually injecting " + "a lower sequence number gtid or via replication", + rb_state_gtid->domain_id, rb_state_gtid->server_id, + rb_state_gtid->seq_no, glev->list[l].domain_id, + glev->list[l].server_id, glev->list[l].seq_no); + if (strlen(errbuf)) // use strlen() as cheap flag + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_CANT_DELETE_GTID_DOMAIN, + "The current gtid binlog state is incompatible with " + "a former one %s.", errbuf); + } + + /* + For each domain_id from ids + If the domain is already absent from the binlog state + Warn && continue + If any GTID with that domain in binlog state is missing from glev.list + Error out binlog state can't change + */ + for (ulong i= 0; i < ids->elements; i++) + { + rpl_binlog_state::element *elem= NULL; + uint32 *ptr_domain_id; + bool all_found; + + ptr_domain_id= (uint32*) dynamic_array_ptr(ids, i); + elem= (rpl_binlog_state::element *) + my_hash_search(&hash, (const uchar *) ptr_domain_id, + sizeof(ptr_domain_id[0])); + if (!elem) + { + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_BINLOG_CANT_DELETE_GTID_DOMAIN, + "The gtid domain being deleted ('%lu') is not in " + "the current binlog state", (unsigned long) *ptr_domain_id); + continue; + } + + all_found= true; + for (k= 0; k < elem->hash.records && all_found; k++) + { + rpl_gtid *d_gtid= (rpl_gtid *)my_hash_element(&elem->hash, k); + bool match_found= false; + for (ulong l= 0; l < glev->count && !match_found; l++) + match_found= match_found || (*d_gtid == glev->list[l]); + if (!match_found) + all_found= false; + } + + if (!all_found) + { + sprintf(errbuf, "binlog files may contain gtids from the domain ('%u') " + "being deleted. Make sure to first purge those files", + *ptr_domain_id); + errmsg= errbuf; + goto end; + } + // compose a sequence of unique pointers to domain object + for (k= 0; k < domain_unique.elements; k++) + { + if ((rpl_binlog_state::element*) dynamic_array_ptr(&domain_unique, k) + == elem) + break; // domain_id's elem has been already in + } + if (k == domain_unique.elements) // proven not to have duplicates + insert_dynamic(&domain_unique, (uchar*) &elem); + } + + // Domain removal from binlog state + for (k= 0; k < domain_unique.elements; k++) + { + rpl_binlog_state::element *elem= *(rpl_binlog_state::element**) + dynamic_array_ptr(&domain_unique, k); + my_hash_free(&elem->hash); + my_hash_delete(&hash, (uchar*) elem); + } + + DBUG_ASSERT(strlen(errbuf) == 0); + + if (domain_unique.elements == 0) + errmsg= ""; + +end: + mysql_mutex_unlock(&LOCK_binlog_state); + delete_dynamic(&domain_unique); + + DBUG_RETURN(errmsg); +} + +slave_connection_state::slave_connection_state() +{ + my_hash_init(PSI_INSTRUMENT_ME, &hash, &my_charset_bin, 32, + offsetof(entry, gtid) + offsetof(rpl_gtid, domain_id), + sizeof(rpl_gtid::domain_id), NULL, my_free, HASH_UNIQUE); + my_init_dynamic_array(PSI_INSTRUMENT_ME, >id_sort_array, sizeof(rpl_gtid), 8, 8, MYF(0)); +} + + +slave_connection_state::~slave_connection_state() +{ + my_hash_free(&hash); + delete_dynamic(>id_sort_array); +} + + +/* + Create a hash from the slave GTID state that is sent to master when slave + connects to start replication. + + The state is sent as ,,...,, for example: + + 0-2-112,1-4-1022 + + The state gives for each domain_id the GTID to start replication from for + the corresponding replication stream. So domain_id must be unique. + + Returns 0 if ok, non-zero if error due to malformed input. + + Note that input string is built by slave server, so it will not be incorrect + unless bug/corruption/malicious server. So we just need basic sanity check, + not fancy user-friendly error message. +*/ + +int +slave_connection_state::load(const char *slave_request, size_t len) +{ + const char *p, *end; + uchar *rec; + rpl_gtid *gtid; + const entry *e; + + reset(); + p= slave_request; + end= slave_request + len; + if (p == end) + return 0; + for (;;) + { + if (!(rec= (uchar *)my_malloc(PSI_INSTRUMENT_ME, sizeof(entry), MYF(MY_WME)))) + return 1; + gtid= &((entry *)rec)->gtid; + if (gtid_parser_helper(&p, end, gtid)) + { + my_free(rec); + my_error(ER_INCORRECT_GTID_STATE, MYF(0)); + return 1; + } + if ((e= (const entry *) + my_hash_search(&hash, (const uchar *)(>id->domain_id), + sizeof(gtid->domain_id)))) + { + my_error(ER_DUPLICATE_GTID_DOMAIN, MYF(0), gtid->domain_id, + gtid->server_id, (ulonglong)gtid->seq_no, e->gtid.domain_id, + e->gtid.server_id, (ulonglong)e->gtid.seq_no, gtid->domain_id); + my_free(rec); + return 1; + } + ((entry *)rec)->flags= 0; + if (my_hash_insert(&hash, rec)) + { + my_free(rec); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return 1; + } + if (p == end) + break; /* Finished. */ + if (*p != ',') + { + my_error(ER_INCORRECT_GTID_STATE, MYF(0)); + return 1; + } + ++p; + } + + return 0; +} + + +int +slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count) +{ + uint32 i; + + reset(); + for (i= 0; i < count; ++i) + if (update(>id_list[i])) + return 1; + return 0; +} + + +static int +slave_connection_state_load_cb(rpl_gtid *gtid, void *data) +{ + slave_connection_state *state= (slave_connection_state *)data; + return state->update(gtid); +} + + +/* + Same as rpl_slave_state::tostring(), but populates a slave_connection_state + instead. +*/ +int +slave_connection_state::load(rpl_slave_state *state, + rpl_gtid *extra_gtids, uint32 num_extra) +{ + reset(); + return state->iterate(slave_connection_state_load_cb, this, + extra_gtids, num_extra, false); +} + + +slave_connection_state::entry * +slave_connection_state::find_entry(uint32 domain_id) +{ + return (entry *) my_hash_search(&hash, (const uchar *)(&domain_id), + sizeof(domain_id)); +} + + +rpl_gtid * +slave_connection_state::find(uint32 domain_id) +{ + entry *e= find_entry(domain_id); + if (!e) + return NULL; + return &e->gtid; +} + + +int +slave_connection_state::update(const rpl_gtid *in_gtid) +{ + entry *e; + uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), + sizeof(in_gtid->domain_id)); + if (rec) + { + e= (entry *)rec; + e->gtid= *in_gtid; + return 0; + } + + if (!(e= (entry *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*e), MYF(MY_WME)))) + return 1; + e->gtid= *in_gtid; + e->flags= 0; + if (my_hash_insert(&hash, (uchar *)e)) + { + my_free(e); + return 1; + } + + return 0; +} + + +void +slave_connection_state::remove(const rpl_gtid *in_gtid) +{ + uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), + sizeof(in_gtid->domain_id)); +#ifdef DBUG_ASSERT_EXISTS + bool err; + rpl_gtid *slave_gtid= &((entry *)rec)->gtid; + DBUG_ASSERT(rec /* We should never try to remove not present domain_id. */); + DBUG_ASSERT(slave_gtid->server_id == in_gtid->server_id); + DBUG_ASSERT(slave_gtid->seq_no == in_gtid->seq_no); + err= +#endif + my_hash_delete(&hash, rec); + DBUG_ASSERT(!err); +} + + +void +slave_connection_state::remove_if_present(const rpl_gtid *in_gtid) +{ + uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), + sizeof(in_gtid->domain_id)); + if (rec) + my_hash_delete(&hash, rec); +} + + +int +slave_connection_state::to_string(String *out_str) +{ + out_str->length(0); + return append_to_string(out_str); +} + + +int +slave_connection_state::append_to_string(String *out_str) +{ + uint32 i; + bool first; + + first= true; + for (i= 0; i < hash.records; ++i) + { + const entry *e= (const entry *)my_hash_element(&hash, i); + if (rpl_slave_state_tostring_helper(out_str, &e->gtid, &first)) + return 1; + } + return 0; +} + + +int +slave_connection_state::get_gtid_list(rpl_gtid *gtid_list, uint32 list_size) +{ + uint32 i, pos; + + pos= 0; + for (i= 0; i < hash.records; ++i) + { + entry *e; + if (pos >= list_size) + return 1; + e= (entry *)my_hash_element(&hash, i); + memcpy(>id_list[pos++], &e->gtid, sizeof(e->gtid)); + } + + return 0; +} + + +/* + Check if the GTID position has been reached, for mysql_binlog_send(). + + The position has not been reached if we have anything in the state, unless + it has either the START_ON_EMPTY_DOMAIN flag set (which means it does not + belong to this master at all), or the START_OWN_SLAVE_POS (which means that + we start on an old position from when the server was a slave with + --log-slave-updates=0). +*/ +bool +slave_connection_state::is_pos_reached() +{ + uint32 i; + + for (i= 0; i < hash.records; ++i) + { + entry *e= (entry *)my_hash_element(&hash, i); + if (!(e->flags & (START_OWN_SLAVE_POS|START_ON_EMPTY_DOMAIN))) + return false; + } + + return true; +} + + +/* + Execute a MASTER_GTID_WAIT(). + The position to wait for is in gtid_str in string form. + The timeout in microseconds is in timeout_us, zero means no timeout. + + Returns: + 1 for error. + 0 for wait completed. + -1 for wait timed out. +*/ +int +gtid_waiting::wait_for_pos(THD *thd, String *gtid_str, longlong timeout_us) +{ + int err; + rpl_gtid *wait_pos; + uint32 count, i; + struct timespec wait_until, *wait_until_ptr; + ulonglong before; + + /* Wait for the empty position returns immediately. */ + if (gtid_str->length() == 0) + { + status_var_increment(thd->status_var.master_gtid_wait_count); + return 0; + } + + if (!(wait_pos= gtid_parse_string_to_list(gtid_str->ptr(), gtid_str->length(), + &count))) + { + my_error(ER_INCORRECT_GTID_STATE, MYF(0)); + return 1; + } + status_var_increment(thd->status_var.master_gtid_wait_count); + before= microsecond_interval_timer(); + + if (timeout_us >= 0) + { + set_timespec_nsec(wait_until, (ulonglong)1000*timeout_us); + wait_until_ptr= &wait_until; + } + else + wait_until_ptr= NULL; + err= 0; + for (i= 0; i < count; ++i) + { + if ((err= wait_for_gtid(thd, &wait_pos[i], wait_until_ptr))) + break; + } + switch (err) + { + case -1: + status_var_increment(thd->status_var.master_gtid_wait_timeouts); + /* fall through */ + case 0: + status_var_add(thd->status_var.master_gtid_wait_time, + static_cast + (microsecond_interval_timer() - before)); + } + my_free(wait_pos); + return err; +} + + +void +gtid_waiting::promote_new_waiter(gtid_waiting::hash_element *he) +{ + queue_element *qe; + + mysql_mutex_assert_owner(&LOCK_gtid_waiting); + if (queue_empty(&he->queue)) + return; + qe= (queue_element *)queue_top(&he->queue); + qe->do_small_wait= true; + mysql_cond_signal(&qe->thd->COND_wakeup_ready); +} + +void +gtid_waiting::process_wait_hash(uint64 wakeup_seq_no, + gtid_waiting::hash_element *he) +{ + mysql_mutex_assert_owner(&LOCK_gtid_waiting); + + for (;;) + { + queue_element *qe; + + if (queue_empty(&he->queue)) + break; + qe= (queue_element *)queue_top(&he->queue); + if (qe->wait_seq_no > wakeup_seq_no) + break; + DBUG_ASSERT(!qe->done); + queue_remove_top(&he->queue); + qe->done= true;; + mysql_cond_signal(&qe->thd->COND_wakeup_ready); + } +} + + +/* + Execute a MASTER_GTID_WAIT() for one specific domain. + + The implementation is optimised primarily for (1) minimal performance impact + on the slave replication threads, and secondarily for (2) quick performance + of MASTER_GTID_WAIT() on a single GTID, which can be useful for consistent + read to clients in an async replication read-scaleout scenario. + + To achieve (1), we have a "small" wait and a "large" wait. The small wait + contends with the replication threads on the lock on the gtid_slave_pos, so + only minimal processing is done under that lock, and only a single waiter at + a time does the small wait. + + If there is already a small waiter, a new thread will either replace the + small waiter (if it needs to wait for an earlier sequence number), or + instead do a "large" wait. + + Once awoken on the small wait, the waiting thread releases the lock shared + with the SQL threads quickly, and then processes all waiters currently doing + the large wait using a different lock that does not impact replication. + + This way, the SQL threads only need to do a single check + possibly a + pthread_cond_signal() when updating the gtid_slave_state, and the time that + non-SQL threads contend for the lock on gtid_slave_state is minimized. + + There is always at least one thread that has the responsibility to ensure + that there is a small waiter; this thread has queue_element::do_small_wait + set to true. This thread will do the small wait until it is done, at which + point it will make sure to pass on the responsibility to another thread. + Normally only one thread has do_small_wait==true, but it can occasionally + happen that there is more than one, when threads race one another for the + lock on the small wait (this results in slightly increased activity on the + small lock but is otherwise harmless). + + Returns: + 0 Wait completed normally + -1 Wait completed due to timeout + 1 An error (my_error() will have been called to set the error in the da) +*/ +int +gtid_waiting::wait_for_gtid(THD *thd, rpl_gtid *wait_gtid, + struct timespec *wait_until) +{ + bool timed_out= false; +#ifdef HAVE_REPLICATION + queue_element elem; + uint32 domain_id= wait_gtid->domain_id; + uint64 seq_no= wait_gtid->seq_no; + hash_element *he; + rpl_slave_state::element *slave_state_elem= NULL; + PSI_stage_info old_stage; + bool did_enter_cond= false; + + elem.wait_seq_no= seq_no; + elem.thd= thd; + elem.done= false; + + mysql_mutex_lock(&LOCK_gtid_waiting); + if (!(he= get_entry(wait_gtid->domain_id))) + { + mysql_mutex_unlock(&LOCK_gtid_waiting); + return 1; + } + /* + If there is already another waiter with seq_no no larger than our own, + we are sure that there is already a small waiter that will wake us up + (or later pass the small wait responsibility to us). So in this case, we + do not need to touch the small wait lock at all. + */ + elem.do_small_wait= + (queue_empty(&he->queue) || + ((queue_element *)queue_top(&he->queue))->wait_seq_no > seq_no); + + if (register_in_wait_queue(thd, wait_gtid, he, &elem)) + { + mysql_mutex_unlock(&LOCK_gtid_waiting); + return 1; + } + /* + Loop, doing either the small or large wait as appropriate, until either + the position waited for is reached, or we get a kill or timeout. + */ + for (;;) + { + mysql_mutex_assert_owner(&LOCK_gtid_waiting); + + if (elem.do_small_wait) + { + uint64 wakeup_seq_no; + queue_element *cur_waiter; + + mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); + /* + The elements in the gtid_slave_state_hash are never re-allocated once + they enter the hash, so we do not need to re-do the lookup after releasing + and re-aquiring the lock. + */ + if (!slave_state_elem && + !(slave_state_elem= rpl_global_gtid_slave_state->get_element(domain_id))) + { + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + remove_from_wait_queue(he, &elem); + promote_new_waiter(he); + if (did_enter_cond) + thd->EXIT_COND(&old_stage); + else + mysql_mutex_unlock(&LOCK_gtid_waiting); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return 1; + } + + if ((wakeup_seq_no= slave_state_elem->highest_seq_no) >= seq_no) + { + /* + We do not have to wait. (We will be removed from the wait queue when + we call process_wait_hash() below. + */ + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + } + else if ((cur_waiter= slave_state_elem->gtid_waiter) && + slave_state_elem->min_wait_seq_no <= seq_no) + { + /* + There is already a suitable small waiter, go do the large wait. + (Normally we would not have needed to check the small wait in this + case, but it can happen if we race with another thread for the small + lock). + */ + elem.do_small_wait= false; + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + } + else + { + /* + We have to do the small wait ourselves (stealing it from any thread + that might already be waiting for a later seq_no). + */ + slave_state_elem->gtid_waiter= &elem; + slave_state_elem->min_wait_seq_no= seq_no; + if (cur_waiter) + { + /* We stole the wait, so wake up the old waiting thread. */ + mysql_cond_signal(&slave_state_elem->COND_wait_gtid); + } + + /* Release the large lock, and do the small wait. */ + if (did_enter_cond) + { + thd->EXIT_COND(&old_stage); + did_enter_cond= false; + } + else + mysql_mutex_unlock(&LOCK_gtid_waiting); + thd->ENTER_COND(&slave_state_elem->COND_wait_gtid, + &rpl_global_gtid_slave_state->LOCK_slave_state, + &stage_master_gtid_wait_primary, &old_stage); + do + { + if (unlikely(thd->check_killed(1))) + break; + else if (wait_until) + { + int err= + mysql_cond_timedwait(&slave_state_elem->COND_wait_gtid, + &rpl_global_gtid_slave_state->LOCK_slave_state, + wait_until); + if (err == ETIMEDOUT || err == ETIME) + { + timed_out= true; + break; + } + } + else + mysql_cond_wait(&slave_state_elem->COND_wait_gtid, + &rpl_global_gtid_slave_state->LOCK_slave_state); + } while (slave_state_elem->gtid_waiter == &elem); + wakeup_seq_no= slave_state_elem->highest_seq_no; + /* + If we aborted due to timeout or kill, remove us as waiter. + + If we were replaced by another waiter with a smaller seq_no, then we + no longer have responsibility for the small wait. + */ + if ((cur_waiter= slave_state_elem->gtid_waiter)) + { + if (cur_waiter == &elem) + slave_state_elem->gtid_waiter= NULL; + else if (slave_state_elem->min_wait_seq_no <= seq_no) + elem.do_small_wait= false; + } + thd->EXIT_COND(&old_stage); + + mysql_mutex_lock(&LOCK_gtid_waiting); + } + + /* + Note that hash_entry pointers do not change once allocated, so we do + not need to lookup `he' again after re-aquiring LOCK_gtid_waiting. + */ + process_wait_hash(wakeup_seq_no, he); + } + else + { + /* Do the large wait. */ + if (!did_enter_cond) + { + thd->ENTER_COND(&thd->COND_wakeup_ready, &LOCK_gtid_waiting, + &stage_master_gtid_wait, &old_stage); + did_enter_cond= true; + } + while (!elem.done && likely(!thd->check_killed(1))) + { + thd_wait_begin(thd, THD_WAIT_BINLOG); + if (wait_until) + { + int err= mysql_cond_timedwait(&thd->COND_wakeup_ready, + &LOCK_gtid_waiting, wait_until); + if (err == ETIMEDOUT || err == ETIME) + timed_out= true; + } + else + mysql_cond_wait(&thd->COND_wakeup_ready, &LOCK_gtid_waiting); + thd_wait_end(thd); + if (elem.do_small_wait || timed_out) + break; + } + } + + if ((thd->killed || timed_out) && !elem.done) + { + /* Aborted, so remove ourselves from the hash. */ + remove_from_wait_queue(he, &elem); + elem.done= true; + } + if (elem.done) + { + /* + If our wait is done, but we have (or were passed) responsibility for + the small wait, then we need to pass on that task to someone else. + */ + if (elem.do_small_wait) + promote_new_waiter(he); + break; + } + } + + if (did_enter_cond) + thd->EXIT_COND(&old_stage); + else + mysql_mutex_unlock(&LOCK_gtid_waiting); + if (thd->killed) + thd->send_kill_message(); +#endif /* HAVE_REPLICATION */ + return timed_out ? -1 : 0; +} + + +static void +free_hash_element(void *p) +{ + gtid_waiting::hash_element *e= (gtid_waiting::hash_element *)p; + delete_queue(&e->queue); + my_free(e); +} + + +void +gtid_waiting::init() +{ + my_hash_init(PSI_INSTRUMENT_ME, &hash, &my_charset_bin, 32, + offsetof(hash_element, domain_id), + sizeof(hash_element::domain_id), NULL, + free_hash_element, HASH_UNIQUE); + mysql_mutex_init(key_LOCK_gtid_waiting, &LOCK_gtid_waiting, 0); +} + + +void +gtid_waiting::destroy() +{ + mysql_mutex_destroy(&LOCK_gtid_waiting); + my_hash_free(&hash); +} + + +static int +cmp_queue_elem(void *, uchar *a, uchar *b) +{ + uint64 seq_no_a= *(uint64 *)a; + uint64 seq_no_b= *(uint64 *)b; + if (seq_no_a < seq_no_b) + return -1; + else if (seq_no_a == seq_no_b) + return 0; + else + return 1; +} + + +gtid_waiting::hash_element * +gtid_waiting::get_entry(uint32 domain_id) +{ + hash_element *e; + + if ((e= (hash_element *)my_hash_search(&hash, (const uchar *)&domain_id, + sizeof(domain_id)))) + return e; + + if (!(e= (hash_element *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*e), MYF(MY_WME)))) + return NULL; + + if (init_queue(&e->queue, 8, offsetof(queue_element, wait_seq_no), 0, + cmp_queue_elem, NULL, 1+offsetof(queue_element, queue_idx), 1)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + my_free(e); + return NULL; + } + e->domain_id= domain_id; + if (my_hash_insert(&hash, (uchar *)e)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + delete_queue(&e->queue); + my_free(e); + return NULL; + } + return e; +} + + +int +gtid_waiting::register_in_wait_queue(THD *thd, rpl_gtid *wait_gtid, + gtid_waiting::hash_element *he, + gtid_waiting::queue_element *elem) +{ + mysql_mutex_assert_owner(&LOCK_gtid_waiting); + + if (queue_insert_safe(&he->queue, (uchar *)elem)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return 1; + } + + return 0; +} + + +void +gtid_waiting::remove_from_wait_queue(gtid_waiting::hash_element *he, + gtid_waiting::queue_element *elem) +{ + mysql_mutex_assert_owner(&LOCK_gtid_waiting); + + queue_remove(&he->queue, elem->queue_idx); +} + +#endif + +void free_domain_lookup_element(void *p) +{ + struct Binlog_gtid_state_validator::audit_elem *audit_elem= + (struct Binlog_gtid_state_validator::audit_elem *) p; + delete_dynamic(&audit_elem->late_gtids_previous); + delete_dynamic(&audit_elem->late_gtids_real); + my_free(audit_elem); +} + +Binlog_gtid_state_validator::Binlog_gtid_state_validator() +{ + my_hash_init(PSI_INSTRUMENT_ME, &m_audit_elem_domain_lookup, &my_charset_bin, 32, + offsetof(struct audit_elem, domain_id), sizeof(uint32), + NULL, free_domain_lookup_element, HASH_UNIQUE); +} + +Binlog_gtid_state_validator::~Binlog_gtid_state_validator() +{ + my_hash_free(&m_audit_elem_domain_lookup); +} + +void Binlog_gtid_state_validator::initialize_start_gtids(rpl_gtid *start_gtids, + size_t n_gtids) +{ + size_t i; + for(i= 0; i < n_gtids; i++) + { + rpl_gtid *domain_state_gtid= &start_gtids[i]; + + /* + If we are initializing from a GLLE, we can have repeat domain ids from + differing servers, so we want to ensure our start gtid matches the last + known position + */ + struct audit_elem *audit_elem= (struct audit_elem *) my_hash_search( + &m_audit_elem_domain_lookup, + (const uchar *) &(domain_state_gtid->domain_id), 0); + if (audit_elem) + { + /* + We have this domain already specified, so try to overwrite with the + more recent GTID + */ + if (domain_state_gtid->seq_no > audit_elem->start_gtid.seq_no) + audit_elem->start_gtid = *domain_state_gtid; + continue; + } + + /* Initialize a new domain */ + audit_elem= (struct audit_elem *) my_malloc( + PSI_NOT_INSTRUMENTED, sizeof(struct audit_elem), MYF(MY_WME)); + if (!audit_elem) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return; + } + + audit_elem->domain_id= start_gtids[i].domain_id; + audit_elem->start_gtid= start_gtids[i]; + audit_elem->last_gtid= {audit_elem->domain_id, 0, 0}; + + my_init_dynamic_array(PSI_INSTRUMENT_ME, &audit_elem->late_gtids_real, + sizeof(rpl_gtid), 8, 8, MYF(0)); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &audit_elem->late_gtids_previous, + sizeof(rpl_gtid), 8, 8, MYF(0)); + + if (my_hash_insert(&m_audit_elem_domain_lookup, (uchar *) audit_elem)) + { + my_free(audit_elem); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return; + } + } +} + +my_bool Binlog_gtid_state_validator::initialize_gtid_state(FILE *out, + rpl_gtid *gtids, + size_t n_gtids) +{ + size_t i; + my_bool err= FALSE; + + /* + We weren't initialized with starting positions explicitly, so assume the + starting positions of the current gtid state + */ + if (!m_audit_elem_domain_lookup.records) + initialize_start_gtids(gtids, n_gtids); + + for(i= 0; i < n_gtids; i++) + { + rpl_gtid *domain_state_gtid= >ids[i]; + + struct audit_elem *audit_elem= (struct audit_elem *) my_hash_search( + &m_audit_elem_domain_lookup, + (const uchar *) &(domain_state_gtid->domain_id), 0); + + if (!audit_elem) + { + Binlog_gtid_state_validator::error( + out, + "Starting GTID position list does not specify an initial value " + "for domain %u, whose events may be present in the requested binlog " + "file(s). The last known position for this domain was %u-%u-%llu.", + domain_state_gtid->domain_id, PARAM_GTID((*domain_state_gtid))); + err= TRUE; + continue; + } + + if (audit_elem->start_gtid.seq_no < domain_state_gtid->seq_no) + { + Binlog_gtid_state_validator::error( + out, + "Binary logs are missing data for domain %u. Expected data to " + "start from state %u-%u-%llu; however, the initial GTID state of " + "the logs was %u-%u-%llu.", + domain_state_gtid->domain_id, PARAM_GTID(audit_elem->start_gtid), + PARAM_GTID((*domain_state_gtid))); + err= TRUE; + continue; + } + + if (domain_state_gtid->seq_no > audit_elem->last_gtid.seq_no) + audit_elem->last_gtid= *domain_state_gtid; + } + return err; +} + +my_bool Binlog_gtid_state_validator::verify_stop_state(FILE *out, + rpl_gtid *stop_gtids, + size_t n_stop_gtids) +{ + size_t i; + for(i= 0; i < n_stop_gtids; i++) + { + rpl_gtid *stop_gtid= &stop_gtids[i]; + + struct audit_elem *audit_elem= (struct audit_elem *) my_hash_search( + &m_audit_elem_domain_lookup, + (const uchar *) &(stop_gtid->domain_id), 0); + + /* + It is okay if stop gtid doesn't exist in current state because it will be treated + as a new domain + */ + if (audit_elem && stop_gtid->seq_no <= audit_elem->start_gtid.seq_no) + { + Binlog_gtid_state_validator::error( + out, + "--stop-position GTID %u-%u-%llu does not exist in the " + "specified binlog files. The current GTID state of domain %u in the " + "specified binary logs is %u-%u-%llu", + PARAM_GTID((*stop_gtid)), stop_gtid->domain_id, + PARAM_GTID(audit_elem->start_gtid)); + return TRUE; + } + } + + /* No issues with any GTIDs */ + return FALSE; +} + +my_bool +Binlog_gtid_state_validator::verify_gtid_state(FILE *out, + rpl_gtid *domain_state_gtid) +{ + struct audit_elem *audit_elem= (struct audit_elem *) my_hash_search( + &m_audit_elem_domain_lookup, + (const uchar *) &(domain_state_gtid->domain_id), 0); + + if (!audit_elem) + { + Binlog_gtid_state_validator::warn( + out, + "Binary logs are missing data for domain %u. The current binary log " + "specified its " + "current state for this domain as %u-%u-%llu, but neither the " + "starting GTID position list nor any processed events have " + "mentioned " + "this domain.", + domain_state_gtid->domain_id, PARAM_GTID((*domain_state_gtid))); + return TRUE; + } + + if (audit_elem->last_gtid.seq_no < domain_state_gtid->seq_no) + { + Binlog_gtid_state_validator::warn( + out, + "Binary logs are missing data for domain %u. The current binary log " + "state is %u-%u-%llu, but the last seen event was %u-%u-%llu.", + domain_state_gtid->domain_id, PARAM_GTID((*domain_state_gtid)), + PARAM_GTID(audit_elem->last_gtid)); + return TRUE; + } + + return FALSE; +} + +my_bool Binlog_gtid_state_validator::record(rpl_gtid *gtid) +{ + struct audit_elem *audit_elem= (struct audit_elem *) my_hash_search( + &m_audit_elem_domain_lookup, (const uchar *) &(gtid->domain_id), 0); + + if (!audit_elem) + { + /* + We haven't seen any GTIDs in this domian yet. Perform initial set up for + this domain so we can monitor its events. + */ + audit_elem= (struct audit_elem *) my_malloc( + PSI_NOT_INSTRUMENTED, sizeof(struct audit_elem), MYF(MY_WME)); + if (!audit_elem) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return TRUE; + } + + audit_elem->domain_id= gtid->domain_id; + audit_elem->last_gtid= *gtid; + audit_elem->start_gtid= {gtid->domain_id, 0, 0}; + + my_init_dynamic_array(PSI_INSTRUMENT_ME, &audit_elem->late_gtids_real, + sizeof(rpl_gtid), 8, 8, MYF(0)); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &audit_elem->late_gtids_previous, + sizeof(rpl_gtid), 8, 8, MYF(0)); + + if (my_hash_insert(&m_audit_elem_domain_lookup, (uchar *) audit_elem)) + { + my_free(audit_elem); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return TRUE; + } + } + else + { + /* Out of order check */ + if (gtid->seq_no <= audit_elem->last_gtid.seq_no && + gtid->seq_no >= audit_elem->start_gtid.seq_no) + { + /* GTID is out of order */ + insert_dynamic(&audit_elem->late_gtids_real, (const void *) gtid); + insert_dynamic(&audit_elem->late_gtids_previous, + (const void *) &(audit_elem->last_gtid)); + + return TRUE; + } + else + { + /* GTID is valid */ + audit_elem->last_gtid= *gtid; + } + } + + return FALSE; +} + +/* + Data structure used to help pass data into report_audit_findings because + my_hash_iterate only passes one parameter +*/ +struct gtid_report_ctx +{ + FILE *out_file; + my_bool is_strict_mode; + my_bool contains_err; +}; + +static my_bool report_audit_findings(void *entry, void *report_ctx_arg) +{ + struct Binlog_gtid_state_validator::audit_elem *audit_el= + (struct Binlog_gtid_state_validator::audit_elem *) entry; + + struct gtid_report_ctx *report_ctx= + (struct gtid_report_ctx *) report_ctx_arg; + FILE *out= report_ctx->out_file; + my_bool is_strict_mode= report_ctx->is_strict_mode; + size_t i; + void (*report_f)(FILE*, const char*, ...); + + if (is_strict_mode) + report_f= Binlog_gtid_state_validator::error; + else + report_f= Binlog_gtid_state_validator::warn; + + if (audit_el) + { + if (audit_el->last_gtid.seq_no < audit_el->start_gtid.seq_no) + { + report_f(out, + "Binary logs never reached expected GTID state of %u-%u-%llu", + PARAM_GTID(audit_el->start_gtid)); + report_ctx->contains_err= TRUE; + } + + /* Report any out of order GTIDs */ + for(i= 0; i < audit_el->late_gtids_real.elements; i++) + { + rpl_gtid *real_gtid= + (rpl_gtid *) dynamic_array_ptr(&(audit_el->late_gtids_real), i); + rpl_gtid *last_gtid= (rpl_gtid *) dynamic_array_ptr( + &(audit_el->late_gtids_previous), i); + DBUG_ASSERT(real_gtid && last_gtid); + + report_f(out, + "Found out of order GTID. Got %u-%u-%llu after %u-%u-%llu", + PARAM_GTID((*real_gtid)), PARAM_GTID((*last_gtid))); + report_ctx->contains_err= TRUE; + } + } + + return FALSE; +} + +my_bool Binlog_gtid_state_validator::report(FILE *out, my_bool is_strict_mode) +{ + struct gtid_report_ctx report_ctx; + report_ctx.out_file= out; + report_ctx.is_strict_mode= is_strict_mode; + report_ctx.contains_err= FALSE; + my_hash_iterate(&m_audit_elem_domain_lookup, report_audit_findings, &report_ctx); + fflush(out); + return is_strict_mode ? report_ctx.contains_err : FALSE; +} + +Window_gtid_event_filter::Window_gtid_event_filter() + : m_has_start(FALSE), m_has_stop(FALSE), m_is_active(FALSE), + m_has_passed(FALSE) +{ + // m_start and m_stop do not need initial values if unused +} + +int Window_gtid_event_filter::set_start_gtid(rpl_gtid *start) +{ + if (m_has_start) + { + sql_print_error( + "Start position cannot have repeated domain " + "ids (found %u-%u-%llu when %u-%u-%llu was previously specified)", + PARAM_GTID((*start)), PARAM_GTID(m_start)); + return 1; + } + + m_has_start= TRUE; + m_start= *start; + return 0; +} + +int Window_gtid_event_filter::set_stop_gtid(rpl_gtid *stop) +{ + if (m_has_stop) + { + sql_print_error( + "Stop position cannot have repeated domain " + "ids (found %u-%u-%llu when %u-%u-%llu was previously specified)", + PARAM_GTID((*stop)), PARAM_GTID(m_stop)); + return 1; + } + + m_has_stop= TRUE; + m_stop= *stop; + return 0; +} + +my_bool Window_gtid_event_filter::is_range_invalid() +{ + if (m_has_start && m_has_stop && m_start.seq_no > m_stop.seq_no) + { + sql_print_error( + "Queried GTID range is invalid in strict mode. Stop position " + "%u-%u-%llu is not greater than or equal to start %u-%u-%llu.", + PARAM_GTID(m_stop), PARAM_GTID(m_start)); + return TRUE; + } + return FALSE; +} + +static inline my_bool is_gtid_at_or_after(rpl_gtid *boundary, + rpl_gtid *test_gtid) +{ + return test_gtid->domain_id == boundary->domain_id && + test_gtid->seq_no >= boundary->seq_no; +} + +static inline my_bool is_gtid_at_or_before(rpl_gtid *boundary, + rpl_gtid *test_gtid) +{ + return test_gtid->domain_id == boundary->domain_id && + test_gtid->seq_no <= boundary->seq_no; +} + +my_bool Window_gtid_event_filter::exclude(rpl_gtid *gtid) +{ + /* Assume result should be excluded to start */ + my_bool should_exclude= TRUE; + + DBUG_ASSERT((m_has_start && gtid->domain_id == m_start.domain_id) || + (m_has_stop && gtid->domain_id == m_stop.domain_id)); + + if (!m_is_active && !m_has_passed) + { + /* + This filter has not yet been activated. Check if the gtid is within the + bounds of this window. + */ + + if (!m_has_start && is_gtid_at_or_before(&m_stop, gtid)) + { + /* + Start GTID was not provided, so we want to include everything from here + up to m_stop + */ + m_is_active= TRUE; + should_exclude= FALSE; + } + else if ((m_has_start && is_gtid_at_or_after(&m_start, gtid)) && + (!m_has_stop || is_gtid_at_or_before(&m_stop, gtid))) + { + m_is_active= TRUE; + + DBUG_PRINT("gtid-event-filter", + ("Window: Begin (%d-%d-%llu, %d-%d-%llu]", + PARAM_GTID(m_start), PARAM_GTID(m_stop))); + + /* + As the start of the range is exclusive, if this gtid is the start of + the range, exclude it + */ + if (gtid->seq_no == m_start.seq_no) + should_exclude= TRUE; + else + should_exclude= FALSE; + + if (m_has_stop && gtid->seq_no == m_stop.seq_no) + { + m_has_passed= TRUE; + DBUG_PRINT("gtid-event-filter", + ("Window: End (%d-%d-%llu, %d-%d-%llu]", + PARAM_GTID(m_start), PARAM_GTID(m_stop))); + } + } + } /* if (!m_is_active && !m_has_passed) */ + else if (m_is_active && !m_has_passed) + { + /* + This window is currently active so we want the event group to be included + in the results. Additionally check if we are at the end of the window. + If no end of the window is provided, go indefinitely + */ + should_exclude= FALSE; + + if (m_has_stop && is_gtid_at_or_after(&m_stop, gtid)) + { + DBUG_PRINT("gtid-event-filter", + ("Window: End (%d-%d-%llu, %d-%d-%llu]", + PARAM_GTID(m_start), PARAM_GTID(m_stop))); + m_is_active= FALSE; + m_has_passed= TRUE; + + if (!is_gtid_at_or_before(&m_stop, gtid)) + { + /* + The GTID is after the finite stop of the window, don't let it pass + through + */ + should_exclude= TRUE; + } + } + } + + return should_exclude; +} + +my_bool Window_gtid_event_filter::has_finished() +{ + return m_has_stop ? m_has_passed : FALSE; +} + +void free_u32_gtid_filter_element(void *p) +{ + gtid_filter_element *gfe= (gtid_filter_element *) p; + if (gfe->filter) + delete gfe->filter; + my_free(gfe); +} + +template +Id_delegating_gtid_event_filter::Id_delegating_gtid_event_filter() + : m_num_stateful_filters(0), m_num_completed_filters(0), + m_id_restriction_mode(id_restriction_mode::MODE_NOT_SET) +{ + void (*free_func)(void *); + if (std::is_same::value) + free_func= free_u32_gtid_filter_element; + else + DBUG_ASSERT(0); + + my_hash_init(PSI_INSTRUMENT_ME, &m_filters_by_id_hash, &my_charset_bin, 32, + offsetof(gtid_filter_element, identifier), + sizeof(T), NULL, free_func, + HASH_UNIQUE); + + m_default_filter= new Accept_all_gtid_filter(); +} + +template +Id_delegating_gtid_event_filter::~Id_delegating_gtid_event_filter() +{ + my_hash_free(&m_filters_by_id_hash); + delete m_default_filter; +} + +template +void Id_delegating_gtid_event_filter::set_default_filter( + Gtid_event_filter *filter) +{ + if (m_default_filter) + delete m_default_filter; + + m_default_filter= filter; +} + +template +gtid_filter_element * +Id_delegating_gtid_event_filter::find_or_create_filter_element_for_id( + T filter_id) +{ + gtid_filter_element *fe= + (gtid_filter_element *) my_hash_search( + &m_filters_by_id_hash, (const uchar *) &filter_id, 0); + + if (!fe) + { + gtid_filter_element *new_fe= (gtid_filter_element *) my_malloc( + PSI_NOT_INSTRUMENTED, sizeof(gtid_filter_element), MYF(MY_WME)); + new_fe->filter= NULL; + new_fe->identifier= filter_id; + if (my_hash_insert(&m_filters_by_id_hash, (uchar*) new_fe)) + { + my_free(new_fe); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + fe= new_fe; + } + + return fe; +} + +template +my_bool Id_delegating_gtid_event_filter::has_finished() +{ + /* + If all user-defined filters have deactivated, we are effectively + deactivated + */ + return m_num_stateful_filters && + m_num_completed_filters == m_num_stateful_filters; +} + +template +my_bool Id_delegating_gtid_event_filter::exclude(rpl_gtid *gtid) +{ + T filter_id= get_id_from_gtid(gtid); + gtid_filter_element *filter_element= + (gtid_filter_element *) my_hash_search(&m_filters_by_id_hash, + (const uchar *) &filter_id, 0); + Gtid_event_filter *filter= + (filter_element ? filter_element->filter : m_default_filter); + my_bool ret= TRUE; + + if(!filter_element || !filter->has_finished()) + { + ret= filter->exclude(gtid); + + /* + If this is an explicitly defined filter, e.g. Window-based filter, check + if it has completed, and update the counter accordingly if so. + */ + if (filter_element && filter->has_finished()) + m_num_completed_filters++; + } + + return ret; +} + + +template Gtid_event_filter* create_event_filter() +{ + return new F(); +} + +template +int Id_delegating_gtid_event_filter::set_id_restrictions( + T *id_list, size_t n_ids, id_restriction_mode mode) +{ + static const char *WHITELIST_NAME= "do", *BLACKLIST_NAME= "ignore"; + + size_t id_ctr; + int err; + const char *filter_name, *opposite_filter_name; + Gtid_event_filter *(*construct_filter)(void); + Gtid_event_filter *(*construct_default_filter)(void); + + /* + Set up variables which help this filter either be in whitelist or blacklist + mode + */ + if (mode == Gtid_event_filter::id_restriction_mode::WHITELIST_MODE) + { + filter_name= WHITELIST_NAME; + opposite_filter_name= BLACKLIST_NAME; + construct_filter= + create_event_filter; + construct_default_filter= + create_event_filter; + } + else + { + DBUG_ASSERT(mode == + Gtid_event_filter::id_restriction_mode::BLACKLIST_MODE); + filter_name= BLACKLIST_NAME; + opposite_filter_name= WHITELIST_NAME; + construct_filter= + create_event_filter; + construct_default_filter= + create_event_filter; + } + + if (m_id_restriction_mode != + Gtid_event_filter::id_restriction_mode::MODE_NOT_SET) + { + if (mode != m_id_restriction_mode) + { + /* + If a rule specifying the opposite version of this has already been set, + error. + */ + sql_print_error("Cannot create %s filtering rule for %s id because " + "%s rule already exists", + filter_name, get_id_type_name(), + opposite_filter_name); + err= 1; + goto err; + } + + /* This filter is specified more than once, only use the latest values */ + my_hash_reset(&m_filters_by_id_hash); + } + + for (id_ctr= 0; id_ctr < n_ids; id_ctr++) + { + T filter_id= id_list[id_ctr]; + gtid_filter_element *map_element= + find_or_create_filter_element_for_id(filter_id); + + if(map_element == NULL) + { + /* + If map_element is NULL, find_or_create_filter_element_for_id failed and + has already written the error message + */ + err= 1; + goto err; + } + else if (map_element->filter == NULL) + { + map_element->filter= construct_filter(); + m_num_stateful_filters++; + } + else + { + DBUG_ASSERT(map_element->filter->get_filter_type() == + (mode == + Gtid_event_filter::id_restriction_mode::WHITELIST_MODE + ? Gtid_event_filter::ACCEPT_ALL_GTID_FILTER_TYPE + : Gtid_event_filter::REJECT_ALL_GTID_FILTER_TYPE)); + } + } + + /* + With a whitelist, we by only want to accept the ids which are specified. + Everything else should be denied. + + With a blacklist, we by default want to accept everything that is not + specified in the list + */ + set_default_filter(construct_default_filter()); + m_id_restriction_mode= mode; + err= 0; + +err: + return err; +} + +Window_gtid_event_filter * +Domain_gtid_event_filter::find_or_create_window_filter_for_id( + decltype(rpl_gtid::domain_id) domain_id) +{ + gtid_filter_element *filter_element= + find_or_create_filter_element_for_id(domain_id); + Window_gtid_event_filter *wgef= NULL; + + if (filter_element->filter == NULL) + { + /* New filter */ + wgef= new Window_gtid_event_filter(); + filter_element->filter= wgef; + } + else if (filter_element->filter->get_filter_type() == WINDOW_GTID_FILTER_TYPE) + { + /* We have an existing window filter here */ + wgef= (Window_gtid_event_filter *) filter_element->filter; + } + else + { + /* + We have an existing filter but it is not of window type so propogate NULL + filter + */ + sql_print_error("cannot subset domain id %d by position, another rule " + "exists on that domain", + domain_id); + } + + return wgef; +} + +static my_bool check_filter_entry_validity(void *entry, + void *are_filters_invalid_arg) +{ + gtid_filter_element *fe= + (gtid_filter_element *) entry; + + if (fe) + { + Gtid_event_filter *gef= fe->filter; + if (gef->get_filter_type() == Gtid_event_filter::WINDOW_GTID_FILTER_TYPE) + { + Window_gtid_event_filter *wgef= (Window_gtid_event_filter *) gef; + if (wgef->is_range_invalid()) + { + *((int *) are_filters_invalid_arg)= 1; + return TRUE; + } + } + } + return FALSE; +} + +int Domain_gtid_event_filter::validate_window_filters() +{ + int are_filters_invalid= 0; + my_hash_iterate(&m_filters_by_id_hash, check_filter_entry_validity, + &are_filters_invalid); + return are_filters_invalid; +} + +int Domain_gtid_event_filter::add_start_gtid(rpl_gtid *gtid) +{ + int err= 0; + Window_gtid_event_filter *filter_to_update= + find_or_create_window_filter_for_id(gtid->domain_id); + + if (filter_to_update == NULL) + { + err= 1; + } + else if (!(err= filter_to_update->set_start_gtid(gtid))) + { + gtid_filter_element *fe= + (gtid_filter_element *) my_hash_search( + &m_filters_by_id_hash, (const uchar *) &(gtid->domain_id), 0); + insert_dynamic(&m_start_filters, (const void *) &fe); + } + + return err; +} + +int Domain_gtid_event_filter::add_stop_gtid(rpl_gtid *gtid) +{ + int err= 0; + Window_gtid_event_filter *filter_to_update= + find_or_create_window_filter_for_id(gtid->domain_id); + + if (filter_to_update == NULL) + { + err= 1; + } + else if (!(err= filter_to_update->set_stop_gtid(gtid))) + { + gtid_filter_element *fe= + (gtid_filter_element *) my_hash_search( + &m_filters_by_id_hash, (const uchar *) &(gtid->domain_id), 0); + insert_dynamic(&m_stop_filters, (const void *) &fe); + + /* + A window with a stop position can be disabled, and is therefore stateful. + */ + m_num_stateful_filters++; + + /* + Default filtering behavior changes with GTID stop positions, where we + exclude all domains not present in the stop list + */ + if (m_default_filter->get_filter_type() == ACCEPT_ALL_GTID_FILTER_TYPE) + { + delete m_default_filter; + m_default_filter= new Reject_all_gtid_filter(); + } + } + + return err; +} + +rpl_gtid *Domain_gtid_event_filter::get_start_gtids() +{ + rpl_gtid *gtid_list; + uint32 i; + size_t n_start_gtids= get_num_start_gtids(); + + gtid_list= (rpl_gtid *) my_malloc( + PSI_INSTRUMENT_ME, n_start_gtids * sizeof(rpl_gtid), MYF(MY_WME)); + + for (i = 0; i < n_start_gtids; i++) + { + gtid_filter_element *fe= + *(gtid_filter_element **) + dynamic_array_ptr(&m_start_filters, i); + DBUG_ASSERT(fe->filter && + fe->filter->get_filter_type() == WINDOW_GTID_FILTER_TYPE); + Window_gtid_event_filter *wgef= + (Window_gtid_event_filter *) fe->filter; + + rpl_gtid win_start_gtid= wgef->get_start_gtid(); + gtid_list[i]= win_start_gtid; + } + + return gtid_list; +} + +rpl_gtid *Domain_gtid_event_filter::get_stop_gtids() +{ + rpl_gtid *gtid_list; + uint32 i; + size_t n_stop_gtids= get_num_stop_gtids(); + + gtid_list= (rpl_gtid *) my_malloc( + PSI_INSTRUMENT_ME, n_stop_gtids * sizeof(rpl_gtid), MYF(MY_WME)); + + for (i = 0; i < n_stop_gtids; i++) + { + gtid_filter_element *fe= + *(gtid_filter_element **) + dynamic_array_ptr(&m_stop_filters, i); + DBUG_ASSERT(fe->filter && + fe->filter->get_filter_type() == WINDOW_GTID_FILTER_TYPE); + Window_gtid_event_filter *wgef= + (Window_gtid_event_filter *) fe->filter; + + rpl_gtid win_stop_gtid= wgef->get_stop_gtid(); + gtid_list[i]= win_stop_gtid; + } + + return gtid_list; +} + +void Domain_gtid_event_filter::clear_start_gtids() +{ + uint32 i; + for (i = 0; i < get_num_start_gtids(); i++) + { + gtid_filter_element *fe= + *(gtid_filter_element **) + dynamic_array_ptr(&m_start_filters, i); + DBUG_ASSERT(fe->filter && + fe->filter->get_filter_type() == WINDOW_GTID_FILTER_TYPE); + Window_gtid_event_filter *wgef= + (Window_gtid_event_filter *) fe->filter; + + if (wgef->has_stop()) + { + /* + Don't delete the whole filter if it already has a stop position attached + */ + wgef->clear_start_pos(); + } + else + { + /* + This domain only has a stop, so delete the whole filter + */ + my_hash_delete(&m_filters_by_id_hash, (uchar *) fe); + } + } + + reset_dynamic(&m_start_filters); +} + +void Domain_gtid_event_filter::clear_stop_gtids() +{ + uint32 i; + + for (i = 0; i < get_num_stop_gtids(); i++) + { + gtid_filter_element *fe= + *(gtid_filter_element **) + dynamic_array_ptr(&m_stop_filters, i); + DBUG_ASSERT(fe->filter && + fe->filter->get_filter_type() == WINDOW_GTID_FILTER_TYPE); + Window_gtid_event_filter *wgef= + (Window_gtid_event_filter *) fe->filter; + + if (wgef->has_start()) + { + /* + Don't delete the whole filter if it already has a start position + attached + */ + wgef->clear_stop_pos(); + } + else + { + /* + This domain only has a start, so delete the whole filter + */ + my_hash_delete(&m_filters_by_id_hash, (uchar *) fe); + } + m_num_stateful_filters--; + } + + /* + Stop positions were cleared and we want to be inclusive again of other + domains again + */ + if (m_default_filter->get_filter_type() == REJECT_ALL_GTID_FILTER_TYPE) + { + delete m_default_filter; + m_default_filter= new Accept_all_gtid_filter(); + } + + reset_dynamic(&m_stop_filters); +} + +my_bool Domain_gtid_event_filter::exclude(rpl_gtid *gtid) +{ + my_bool include_domain= TRUE; + /* + If GTID stop positions are provided, we limit the domains which are output + to only be those specified with stop positions + */ + if (get_num_stop_gtids()) + { + decltype(rpl_gtid::domain_id) filter_id= get_id_from_gtid(gtid); + gtid_filter_element *filter_element= + (gtid_filter_element *) my_hash_search( + &m_filters_by_id_hash, (const uchar *) &filter_id, 0); + if (filter_element) + { + Gtid_event_filter *filter= filter_element->filter; + if (filter->get_filter_type() == WINDOW_GTID_FILTER_TYPE) + { + Window_gtid_event_filter *wgef= (Window_gtid_event_filter *) filter; + include_domain= wgef->has_stop(); + } + } + } + + return include_domain ? Id_delegating_gtid_event_filter::exclude(gtid) + : TRUE; +} + +Intersecting_gtid_event_filter::Intersecting_gtid_event_filter( + Gtid_event_filter *filter1, Gtid_event_filter *filter2) +{ + my_init_dynamic_array(PSI_INSTRUMENT_ME, &m_filters, + sizeof(Gtid_event_filter *), 3, 3, MYF(0)); + insert_dynamic(&m_filters, (void *) &filter1); + insert_dynamic(&m_filters, (void *) &filter2); +} + +Intersecting_gtid_event_filter::~Intersecting_gtid_event_filter() +{ + Gtid_event_filter *tmp_filter= NULL; + ulong i; + for (i= 0; i < m_filters.elements; i++) + { + tmp_filter= *(Gtid_event_filter **) dynamic_array_ptr(&m_filters, i); + delete tmp_filter; + } + delete_dynamic(&m_filters); +} + +my_bool Intersecting_gtid_event_filter::exclude(rpl_gtid *gtid) +{ + Gtid_event_filter *tmp_filter= NULL; + ulong i; + for (i= 0; i < m_filters.elements; i++) + { + tmp_filter= *(Gtid_event_filter **) dynamic_array_ptr(&m_filters, i); + DBUG_ASSERT(tmp_filter); + if (tmp_filter->exclude(gtid)) + return TRUE; + } + return FALSE; +} + +my_bool Intersecting_gtid_event_filter::has_finished() +{ + Gtid_event_filter *tmp_filter= NULL; + ulong i; + for (i= 0; i < m_filters.elements; i++) + { + tmp_filter= *(Gtid_event_filter **) dynamic_array_ptr(&m_filters, i); + DBUG_ASSERT(tmp_filter); + if (tmp_filter->has_finished()) + return TRUE; + } + return FALSE; +} diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h new file mode 100644 index 00000000..7d25ee6e --- /dev/null +++ b/sql/rpl_gtid.h @@ -0,0 +1,936 @@ +/* Copyright (c) 2013, Kristian Nielsen and MariaDB Services Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_GTID_H +#define RPL_GTID_H + +#include "hash.h" +#include "queues.h" +#include + +/* Definitions for MariaDB global transaction ID (GTID). */ + + +extern const LEX_CSTRING rpl_gtid_slave_state_table_name; + +class String; +#define PARAM_GTID(G) G.domain_id, G.server_id, G.seq_no + +#define GTID_MAX_STR_LENGTH (10+1+10+1+20) +#define PARAM_GTID(G) G.domain_id, G.server_id, G.seq_no + +struct rpl_gtid +{ + uint32 domain_id; + uint32 server_id; + uint64 seq_no; +}; + +inline bool operator==(const rpl_gtid& lhs, const rpl_gtid& rhs) +{ + return + lhs.domain_id == rhs.domain_id && + lhs.server_id == rhs.server_id && + lhs.seq_no == rhs.seq_no; +}; + +inline bool operator<(const rpl_gtid& lhs, const rpl_gtid& rhs) +{ + return (lhs.domain_id == rhs.domain_id) ? lhs.seq_no < rhs.seq_no + : lhs.domain_id < rhs.domain_id; +}; + +inline bool operator>(const rpl_gtid& lhs, const rpl_gtid& rhs) +{ + return (lhs.domain_id == rhs.domain_id) ? lhs.seq_no > rhs.seq_no + : lhs.domain_id > rhs.domain_id; +}; + +enum enum_gtid_skip_type { + GTID_SKIP_NOT, GTID_SKIP_STANDALONE, GTID_SKIP_TRANSACTION +}; + + +/* + Structure to keep track of threads waiting in MASTER_GTID_WAIT(). + + Since replication is (mostly) single-threaded, we want to minimise the + performance impact on that from MASTER_GTID_WAIT(). To achieve this, we + are careful to keep the common lock between replication threads and + MASTER_GTID_WAIT threads held for as short as possible. We keep only + a single thread waiting to be notified by the replication threads; this + thread then handles all the (potentially heavy) lifting of dealing with + all current waiting threads. +*/ +struct gtid_waiting { + /* Elements in the hash, basically a priority queue for each domain. */ + struct hash_element { + QUEUE queue; + uint32 domain_id; + }; + /* A priority queue to handle waiters in one domain in seq_no order. */ + struct queue_element { + uint64 wait_seq_no; + THD *thd; + int queue_idx; + /* + do_small_wait is true if we have responsibility for ensuring that there + is a small waiter. + */ + bool do_small_wait; + /* + The flag `done' is set when the wait is completed (either due to reaching + the position waited for, or due to timeout or kill). The queue_element + is in the queue if and only if `done' is true. + */ + bool done; + }; + + mysql_mutex_t LOCK_gtid_waiting; + HASH hash; + + void init(); + void destroy(); + hash_element *get_entry(uint32 domain_id); + int wait_for_pos(THD *thd, String *gtid_str, longlong timeout_us); + void promote_new_waiter(gtid_waiting::hash_element *he); + int wait_for_gtid(THD *thd, rpl_gtid *wait_gtid, struct timespec *wait_until); + void process_wait_hash(uint64 wakeup_seq_no, gtid_waiting::hash_element *he); + int register_in_wait_queue(THD *thd, rpl_gtid *wait_gtid, hash_element *he, + queue_element *elem); + void remove_from_wait_queue(hash_element *he, queue_element *elem); +}; + + +class Relay_log_info; +struct rpl_group_info; +class Gtid_list_log_event; + +/* + Replication slave state. + + For every independent replication stream (identified by domain_id), this + remembers the last gtid applied on the slave within this domain. + + Since events are always committed in-order within a single domain, this is + sufficient to maintain the state of the replication slave. +*/ +struct rpl_slave_state +{ + /* Elements in the list of GTIDs kept for each domain_id. */ + struct list_element + { + struct list_element *next; + uint64 sub_id; + uint32 domain_id; + uint32 server_id; + uint64 seq_no; + /* + hton of mysql.gtid_slave_pos* table used to record this GTID. + Can be NULL if the gtid table failed to load (eg. missing + mysql.gtid_slave_pos table following an upgrade). + */ + void *hton; + }; + + /* Elements in the HASH that hold the state for one domain_id. */ + struct element + { + struct list_element *list; + uint32 domain_id; + /* Highest seq_no seen so far in this domain. */ + uint64 highest_seq_no; + /* + If this is non-NULL, then it is the waiter responsible for the small + wait in MASTER_GTID_WAIT(). + */ + gtid_waiting::queue_element *gtid_waiter; + /* + If gtid_waiter is non-NULL, then this is the seq_no that its + MASTER_GTID_WAIT() is waiting on. When we reach this seq_no, we need to + signal the waiter on COND_wait_gtid. + */ + uint64 min_wait_seq_no; + mysql_cond_t COND_wait_gtid; + + /* + For --gtid-ignore-duplicates. The Relay_log_info that currently owns + this domain, and the number of worker threads that are active in it. + + The idea is that only one of multiple master connections is allowed to + actively apply events for a given domain. Other connections must either + discard the events (if the seq_no in GTID shows they have already been + applied), or wait to see if the current owner will apply it. + */ + const Relay_log_info *owner_rli; + uint32 owner_count; + mysql_cond_t COND_gtid_ignore_duplicates; + + list_element *grab_list() { list_element *l= list; list= NULL; return l; } + void add(list_element *l) + { + l->next= list; + list= l; + } + }; + + /* Descriptor for mysql.gtid_slave_posXXX table in specific engine. */ + enum gtid_pos_table_state { + GTID_POS_AUTO_CREATE, + GTID_POS_CREATE_REQUESTED, + GTID_POS_CREATE_IN_PROGRESS, + GTID_POS_AVAILABLE + }; + struct gtid_pos_table { + struct gtid_pos_table *next; + /* + Use a void * here, rather than handlerton *, to make explicit that we + are not using the value to access any functionality in the engine. It + is just used as an opaque value to identify which engine we are using + for each GTID row. + */ + void *table_hton; + LEX_CSTRING table_name; + uint8 state; + }; + + /* Mapping from domain_id to its element. */ + HASH hash; + /* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */ + uint32 pending_gtid_count; + /* Mutex protecting access to the state. */ + mysql_mutex_t LOCK_slave_state; + /* Auxiliary buffer to sort gtid list. */ + DYNAMIC_ARRAY gtid_sort_array; + + uint64 last_sub_id; + /* + List of tables available for durably storing the slave GTID position. + + Accesses to this table is protected by LOCK_slave_state. However for + efficiency, there is also a provision for read access to it from a running + slave without lock. + + An element can be added at the head of a list by storing the new + gtid_pos_tables pointer atomically with release semantics, to ensure that + the next pointer of the new element is visible to readers of the new list. + Other changes (like deleting or replacing elements) must happen only while + all SQL driver threads are stopped. LOCK_slave_state must be held in any + case. + + The list can be read without lock by an SQL driver thread or worker thread + by reading the gtid_pos_tables pointer atomically with acquire semantics, + to ensure that it will see the correct next pointer of a new head element. + */ + std::atomic gtid_pos_tables; + /* The default entry in gtid_pos_tables, mysql.gtid_slave_pos. */ + std::atomic default_gtid_pos_table; + bool loaded; + + rpl_slave_state(); + ~rpl_slave_state(); + + void truncate_hash(); + ulong count() const { return hash.records; } + int update(uint32 domain_id, uint32 server_id, uint64 sub_id, + uint64 seq_no, void *hton, rpl_group_info *rgi); + int update_nolock(uint32 domain_id, uint32 server_id, uint64 sub_id, + uint64 seq_no, void *hton, rpl_group_info *rgi); + int truncate_state_table(THD *thd); + void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename); + int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, + bool in_transaction, bool in_statement, void **out_hton); + list_element *gtid_grab_pending_delete_list(); + LEX_CSTRING *select_gtid_pos_table(void *hton); + void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr); + uint64 next_sub_id(uint32 domain_id); + int iterate(int (*cb)(rpl_gtid *, void *), void *data, + rpl_gtid *extra_gtids, uint32 num_extra, + bool sort); + int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra); + bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid); + int load(THD *thd, const char *state_from_master, size_t len, bool reset, + bool in_statement); + bool is_empty(); + + element *get_element(uint32 domain_id); + int put_back_list(list_element *list); + + void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton, + rpl_group_info *rgi); + int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi); + int check_duplicate_gtid(rpl_gtid *gtid, rpl_group_info *rgi); + void release_domain_owner(rpl_group_info *rgi); + void set_gtid_pos_tables_list(gtid_pos_table *new_list, + gtid_pos_table *default_entry); + void add_gtid_pos_table(gtid_pos_table *entry); + struct gtid_pos_table *alloc_gtid_pos_table(LEX_CSTRING *table_name, + void *hton, rpl_slave_state::gtid_pos_table_state state); + void free_gtid_pos_tables(struct gtid_pos_table *list); +}; + + +/* + Binlog state. + This keeps the last GTID written to the binlog for every distinct + (domain_id, server_id) pair. + This will be logged at the start of the next binlog file as a + Gtid_list_log_event; this way, it is easy to find the binlog file + containing a given GTID, by simply scanning backwards from the newest + one until a lower seq_no is found in the Gtid_list_log_event at the + start of a binlog for the given domain_id and server_id. + + We also remember the last logged GTID for every domain_id. This is used + to know where to start when a master is changed to a slave. As a side + effect, it also allows to skip a hash lookup in the very common case of + logging a new GTID with same server id as last GTID. +*/ +struct rpl_binlog_state +{ + struct element { + uint32 domain_id; + HASH hash; /* Containing all server_id for one domain_id */ + /* The most recent entry in the hash. */ + rpl_gtid *last_gtid; + /* Counter to allocate next seq_no for this domain. */ + uint64 seq_no_counter; + + int update_element(const rpl_gtid *gtid); + }; + /* Mapping from domain_id to collection of elements. */ + HASH hash; + /* Mutex protecting access to the state. */ + mysql_mutex_t LOCK_binlog_state; + my_bool initialized; + + /* Auxiliary buffer to sort gtid list. */ + DYNAMIC_ARRAY gtid_sort_array; + + rpl_binlog_state() :initialized(0) {} + ~rpl_binlog_state(); + + void init(); + void reset_nolock(); + void reset(); + void free(); + bool load(struct rpl_gtid *list, uint32 count); + bool load(rpl_slave_state *slave_pos); + int update_nolock(const struct rpl_gtid *gtid, bool strict); + int update(const struct rpl_gtid *gtid, bool strict); + int update_with_next_gtid(uint32 domain_id, uint32 server_id, + rpl_gtid *gtid); + int alloc_element_nolock(const rpl_gtid *gtid); + bool check_strict_sequence(uint32 domain_id, uint32 server_id, uint64 seq_no, + bool no_error= false); + int bump_seq_no_if_needed(uint32 domain_id, uint64 seq_no); + int write_to_iocache(IO_CACHE *dest); + int read_from_iocache(IO_CACHE *src); + uint32 count(); + int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size); + int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size); + bool append_pos(String *str); + bool append_state(String *str); + rpl_gtid *find_nolock(uint32 domain_id, uint32 server_id); + rpl_gtid *find(uint32 domain_id, uint32 server_id); + rpl_gtid *find_most_recent(uint32 domain_id); + const char* drop_domain(DYNAMIC_ARRAY *ids, Gtid_list_log_event *glev, char*); +}; + + +/* + Represent the GTID state that a slave connection to a master requests + the master to start sending binlog events from. +*/ +struct slave_connection_state +{ + struct entry { + rpl_gtid gtid; + uint32 flags; + }; + /* Bits for 'flags' */ + enum start_flags + { + START_OWN_SLAVE_POS= 0x1, + START_ON_EMPTY_DOMAIN= 0x2 + }; + + /* Mapping from domain_id to the entry with GTID requested for that domain. */ + HASH hash; + + /* Auxiliary buffer to sort gtid list. */ + DYNAMIC_ARRAY gtid_sort_array; + + slave_connection_state(); + ~slave_connection_state(); + + void reset() { my_hash_reset(&hash); } + int load(const char *slave_request, size_t len); + int load(const rpl_gtid *gtid_list, uint32 count); + int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra); + rpl_gtid *find(uint32 domain_id); + entry *find_entry(uint32 domain_id); + int update(const rpl_gtid *in_gtid); + void remove(const rpl_gtid *gtid); + void remove_if_present(const rpl_gtid *in_gtid); + ulong count() const { return hash.records; } + int to_string(String *out_str); + int append_to_string(String *out_str); + int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size); + bool is_pos_reached(); +}; + + +extern bool rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid, + bool *first); +extern int gtid_check_rpl_slave_state_table(TABLE *table); +extern rpl_gtid *gtid_parse_string_to_list(const char *p, size_t len, + uint32 *out_len); +extern rpl_gtid *gtid_unpack_string_to_list(const char *p, size_t len, + uint32 *out_len); + + + +/* + This class ensures that the GTID state of an event stream is consistent with + the set of provided binary log files. In particular, it has two concerns: + + 1) Ensuring that GTID events are monotonically increasing within each + domain + 2) Ensuring that the GTID state of the specified binary logs is consistent + both with the initial state that a user provides, and between + binary logs (if multiple are specified) +*/ +class Binlog_gtid_state_validator +{ +public: + + struct audit_elem + { + uint32 domain_id; + + + /* + Holds the largest GTID received, and is indexed by domain_id + */ + rpl_gtid last_gtid; + + /* + Holds the largest GTID received, and is indexed by domain_id + */ + rpl_gtid start_gtid; + + /* + List of the problematic GTIDs received which were out of order + */ + DYNAMIC_ARRAY late_gtids_real; + + /* + For each problematic GTID in late_gtids_real, this list contains the last + GTID of the domain at the time of receiving the out of order GTID. + */ + DYNAMIC_ARRAY late_gtids_previous; + }; + + Binlog_gtid_state_validator(); + ~Binlog_gtid_state_validator(); + + /* + Initialize where we should start monitoring for invalid GTID entries + in the event stream. Note that these start positions must occur at or after + a given binary logs GTID state (from Gtid_list_log_event) + */ + void initialize_start_gtids(rpl_gtid *start_gtids, size_t n_gtids); + + /* + Initialize our current state so we know where to expect GTIDs to start + increasing from. Error if the state exists after our expected start_gtid + positions, because we know we will be missing event data (possibly from + a purged log). + */ + my_bool initialize_gtid_state(FILE *out, rpl_gtid *gtids, size_t n_gtids); + + /* + Ensures that the expected stop GTID positions exist within the specified + binary logs. + */ + my_bool verify_stop_state(FILE *out, rpl_gtid *stop_gtids, + size_t n_stop_gtids); + + /* + Ensure a GTID state (e.g., from a Gtid_list_log_event) is consistent with + the current state of our auditing. For example, if we see a GTID from a + Gtid_list_log_event that is ahead of our current state for that domain, we + have missed events (perhaps from a missing log). + */ + my_bool verify_gtid_state(FILE *out, rpl_gtid *gtid_state_cur); + + /* + Take note of a new GTID being processed. + + returns TRUE if the GTID is invalid, FALSE on success + */ + my_bool record(rpl_gtid *gtid); + + /* + Writes warnings/errors (if any) during GTID processing + + Returns TRUE if any findings were reported, FALSE otherwise + */ + my_bool report(FILE *out, my_bool is_strict_mode); + + static void report_details(FILE *out, const char *format, va_list args) + { + vfprintf(out, format, args); + fprintf(out, "\n"); + } + + static void warn(FILE *out, const char *format,...) + { + va_list args; + va_start(args, format); + fprintf(out, "WARNING: "); + report_details(out, format, args); + } + + static void error(FILE *out, const char *format,...) + { + va_list args; + va_start(args, format); + fprintf(out, "ERROR: "); + report_details(out, format, args); + } + +private: + + /* + Holds the records for each domain id we are monitoring. Elements are of + type `struct audit_elem` and indexed by domian_id. + */ + HASH m_audit_elem_domain_lookup; +}; + +/* + Interface to support different methods of filtering log events by GTID +*/ +class Gtid_event_filter +{ +public: + Gtid_event_filter() {}; + virtual ~Gtid_event_filter() {}; + + enum gtid_event_filter_type + { + DELEGATING_GTID_FILTER_TYPE = 1, + WINDOW_GTID_FILTER_TYPE = 2, + ACCEPT_ALL_GTID_FILTER_TYPE = 3, + REJECT_ALL_GTID_FILTER_TYPE = 4, + INTERSECTING_GTID_FILTER_TYPE = 5 + }; + + enum class id_restriction_mode + { + MODE_NOT_SET, + WHITELIST_MODE, + BLACKLIST_MODE, + }; + + /* + Run the filter on an input gtid to test if the corresponding log events + should be excluded from a result + + Returns TRUE when the event group corresponding to the input GTID should be + excluded. + Returns FALSE when the event group should be included. + */ + virtual my_bool exclude(rpl_gtid *) = 0; + + /* + The gtid_event_filter_type that corresponds to the underlying filter + implementation + */ + virtual uint32 get_filter_type() = 0; + + /* + For filters that can maintain their own state, this tests if the filter + implementation has completed. + + Returns TRUE when completed, and FALSE when the filter has not finished. + */ + virtual my_bool has_finished() = 0; +}; + +/* + Filter implementation which will include any and all input GTIDs. This is + used to set default behavior for GTIDs that do not have explicit filters + set on their domain_id, e.g. when a Window_gtid_event_filter is used for + a specific domain, then all other domain_ids will be accepted using this + filter implementation. +*/ +class Accept_all_gtid_filter : public Gtid_event_filter +{ +public: + Accept_all_gtid_filter() {} + ~Accept_all_gtid_filter() {} + my_bool exclude(rpl_gtid *gtid) { return FALSE; } + uint32 get_filter_type() { return ACCEPT_ALL_GTID_FILTER_TYPE; } + my_bool has_finished() { return FALSE; } +}; + +/* + Filter implementation to exclude all tested GTIDs. +*/ +class Reject_all_gtid_filter : public Gtid_event_filter +{ +public: + Reject_all_gtid_filter() {} + ~Reject_all_gtid_filter() {} + my_bool exclude(rpl_gtid *gtid) { return TRUE; } + uint32 get_filter_type() { return REJECT_ALL_GTID_FILTER_TYPE; } + my_bool has_finished() { return FALSE; } +}; + +/* + A filter implementation that includes events that exist between two GTID + positions, m_start (exclusive) and m_stop (inclusive), within a domain. + + This filter is stateful, such that it expects GTIDs to be an increasing + stream, and internally, the window will activate and deactivate when the + start and stop positions of the event stream have passed through, + respectively. +*/ +class Window_gtid_event_filter : public Gtid_event_filter +{ +public: + Window_gtid_event_filter(); + ~Window_gtid_event_filter() {} + + my_bool exclude(rpl_gtid*); + my_bool has_finished(); + + /* + Set the GTID that begins this window (exclusive) + + Returns 0 on ok, non-zero on error + */ + int set_start_gtid(rpl_gtid *start); + + /* + Set the GTID that ends this window (inclusive) + + Returns 0 on ok, non-zero on error + */ + int set_stop_gtid(rpl_gtid *stop); + + uint32 get_filter_type() { return WINDOW_GTID_FILTER_TYPE; } + + /* + Validates the underlying range is correct, and writes an error if not, i.e. + m_start >= m_stop. + + Returns FALSE on ok, TRUE if range is invalid + */ + my_bool is_range_invalid(); + + /* + Getter/setter methods + */ + my_bool has_start() { return m_has_start; } + my_bool has_stop() { return m_has_stop; } + rpl_gtid get_start_gtid() { return m_start; } + rpl_gtid get_stop_gtid() { return m_stop; } + + void clear_start_pos() + { + m_has_start= FALSE; + m_start= {0, 0, 0}; + } + + void clear_stop_pos() + { + m_has_stop= FALSE; + m_stop= {0, 0, 0}; + } + +protected: + + /* + When processing GTID streams, the order in which they are processed should + be sequential with no gaps between events. If a gap is found within a + window, warn the user. + */ + void verify_gtid_is_expected(rpl_gtid *gtid); + +private: + + enum warning_flags + { + WARN_GTID_SEQUENCE_NUMBER_OUT_OF_ORDER= 0x1 + }; + + /* + m_has_start : Indicates if a start to this window has been explicitly + provided. A window starts immediately if not provided. + */ + my_bool m_has_start; + + /* + m_has_stop : Indicates if a stop to this window has been explicitly + provided. A window continues indefinitely if not provided. + */ + my_bool m_has_stop; + + /* + m_is_active : Indicates whether or not the program is currently reading + events from within this window. When TRUE, events with + different server ids than those specified by m_start or + m_stop will be passed through. + */ + my_bool m_is_active; + + /* + m_has_passed : Indicates whether or not the program is currently reading + events from within this window. + */ + my_bool m_has_passed; + + /* m_start : marks the GTID that begins the window (exclusive). */ + rpl_gtid m_start; + + /* m_stop : marks the GTID that ends the range (inclusive). */ + rpl_gtid m_stop; +}; + +template struct gtid_filter_element +{ + Gtid_event_filter *filter; + T identifier; /* Used for HASH lookup */ +}; + +/* + Gtid_event_filter subclass which has no specific implementation, but rather + delegates the filtering to specific identifiable/mapped implementations. + + A default filter is used for GTIDs that are passed through which no explicit + filter can be identified. + + This class should be subclassed, where the get_id_from_gtid function + specifies how to extract the filter identifier from a GTID. The type of the + filter identifier is a template for the class. +*/ +template +class Id_delegating_gtid_event_filter : public Gtid_event_filter +{ +public: + Id_delegating_gtid_event_filter(); + ~Id_delegating_gtid_event_filter(); + + my_bool exclude(rpl_gtid *gtid); + my_bool has_finished(); + void set_default_filter(Gtid_event_filter *default_filter); + + uint32 get_filter_type() { return DELEGATING_GTID_FILTER_TYPE; } + + virtual T get_id_from_gtid(rpl_gtid *) = 0; + virtual const char* get_id_type_name() = 0; + + /* + Sets restrictions on entire ids using the corresponding mode (i.e. either + whitelist or blacklist, refer to Gtid_event_filter::id_restriction_mode) + + A blacklist will allow all ids except for the ones provided in the input + list. + A whitelist will only allow ids provided in the input list. + + Returns 0 on ok, non-zero on error. + */ + int set_id_restrictions(T *id_list, size_t n_ids, + Gtid_event_filter::id_restriction_mode mode); + +protected: + + uint32 m_num_stateful_filters; + uint32 m_num_completed_filters; + Gtid_event_filter *m_default_filter; + + HASH m_filters_by_id_hash; + + Gtid_event_filter::id_restriction_mode m_id_restriction_mode; + + gtid_filter_element *find_or_create_filter_element_for_id(T); +}; + +/* + A subclass of Id_delegating_gtid_event_filter which identifies filters using + the domain id of a GTID. + + Additional helper functions include: + add_start_gtid(GTID) : adds a start GTID position to this filter, to be + identified by its domain id + add_stop_gtid(GTID) : adds a stop GTID position to this filter, to be + identified by its domain id + clear_start_gtids() : removes existing GTID start positions + clear_stop_gtids() : removes existing GTID stop positions + get_start_gtids() : gets all added GTID start positions + get_stop_gtids() : gets all added GTID stop positions + get_num_start_gtids() : gets the count of added GTID start positions + get_num_stop_gtids() : gets the count of added GTID stop positions +*/ +class Domain_gtid_event_filter + : public Id_delegating_gtid_event_filter +{ +public: + Domain_gtid_event_filter() + { + my_init_dynamic_array(PSI_INSTRUMENT_ME, &m_start_filters, + sizeof(decltype(rpl_gtid::domain_id) *), 8, 8, + MYF(0)); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &m_stop_filters, + sizeof(decltype(rpl_gtid::domain_id) *), 8, 8, + MYF(0)); + } + ~Domain_gtid_event_filter() + { + delete_dynamic(&m_start_filters); + delete_dynamic(&m_stop_filters); + } + + /* + Returns the domain id of from the input GTID + */ + decltype(rpl_gtid::domain_id) get_id_from_gtid(rpl_gtid *gtid) + { + return gtid->domain_id; + } + + const char* get_id_type_name() { return "domain"; } + + /* + Override Id_delegating_gtid_event_filter to extend with domain specific + filtering logic + */ + my_bool exclude(rpl_gtid*); + + /* + Validates that window filters with both a start and stop GTID satisfy + stop_gtid > start_gtid + + Returns 0 on ok, non-zero if any windows are invalid. + */ + int validate_window_filters(); + + /* + Helper function to start a GTID window filter at the given GTID + + Returns 0 on ok, non-zero on error + */ + int add_start_gtid(rpl_gtid *gtid); + + /* + Helper function to end a GTID window filter at the given GTID + + Returns 0 on ok, non-zero on error + */ + int add_stop_gtid(rpl_gtid *gtid); + + /* + If start or stop position is respecified, we remove all existing values + and start over with the new specification. + */ + void clear_start_gtids(); + void clear_stop_gtids(); + + /* + Return list of all GTIDs used as start position. + + Note that this list is allocated and it is up to the user to free it + */ + rpl_gtid *get_start_gtids(); + + /* + Return list of all GTIDs used as stop position. + + Note that this list is allocated and it is up to the user to free it + */ + rpl_gtid *get_stop_gtids(); + + size_t get_num_start_gtids() { return m_start_filters.elements; } + size_t get_num_stop_gtids() { return m_stop_filters.elements; } + +private: + DYNAMIC_ARRAY m_start_filters; + DYNAMIC_ARRAY m_stop_filters; + + Window_gtid_event_filter * + find_or_create_window_filter_for_id(decltype(rpl_gtid::domain_id)); +}; + +/* + A subclass of Id_delegating_gtid_event_filter which identifies filters using + the server id of a GTID. +*/ +class Server_gtid_event_filter + : public Id_delegating_gtid_event_filter +{ +public: + /* + Returns the server id of from the input GTID + */ + decltype(rpl_gtid::server_id) get_id_from_gtid(rpl_gtid *gtid) + { + return gtid->server_id; + } + + const char* get_id_type_name() { return "server"; } +}; + +/* + A Gtid_event_filter implementation that delegates the filtering to other + filters, where the result is the intersection between them all. +*/ +class Intersecting_gtid_event_filter : public Gtid_event_filter +{ +public: + Intersecting_gtid_event_filter(Gtid_event_filter *filter1, + Gtid_event_filter *filter2); + ~Intersecting_gtid_event_filter(); + + /* + Returns TRUE if any filers exclude the gtid, returns FALSE otherwise, i.e. + all filters must allow the GTID. + */ + my_bool exclude(rpl_gtid *gtid); + + /* + Returns true if any filters have finished. To elaborate, as this filter + performs an intersection, if any filter has finished, the result would + be excluded regardless. + */ + my_bool has_finished(); + + uint32 get_filter_type() { return INTERSECTING_GTID_FILTER_TYPE; } + + /* + Adds a new filter to the intersection + */ + my_bool add_filter(Gtid_event_filter *filter) + { + return insert_dynamic(&m_filters, (void *) &filter); + } + + protected: + DYNAMIC_ARRAY m_filters; +}; + +#endif /* RPL_GTID_H */ diff --git a/sql/rpl_injector.cc b/sql/rpl_injector.cc new file mode 100644 index 00000000..3080d92b --- /dev/null +++ b/sql/rpl_injector.cc @@ -0,0 +1,197 @@ +/* Copyright (c) 2006, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "rpl_injector.h" +#include "transaction.h" +#include "sql_parse.h" // begin_trans, end_trans, COMMIT +#include "sql_base.h" // close_thread_tables +#include "log_event.h" // Incident_log_event + +/* + injector::transaction - member definitions +*/ + +/* inline since it's called below */ +inline +injector::transaction::transaction(MYSQL_BIN_LOG *log, THD *thd) + : m_state(START_STATE), m_thd(thd) +{ + /* + Default initialization of m_start_pos (which initializes it to garbage). + We need to fill it in using the code below. + */ + LOG_INFO log_info; + log->get_current_log(&log_info); + /* !!! binlog_pos does not follow RAII !!! */ + m_start_pos.m_file_name= my_strdup(key_memory_binlog_pos, + log_info.log_file_name, MYF(0)); + m_start_pos.m_file_pos= log_info.pos; + + m_thd->lex->start_transaction_opt= 0; /* for begin_trans() */ + trans_begin(m_thd); +} + +injector::transaction::~transaction() +{ + if (!good()) + return; + + /* Needed since my_free expects a 'char*' (instead of 'void*'). */ + char* const the_memory= const_cast(m_start_pos.m_file_name); + + /* + We set the first character to null just to give all the copies of the + start position a (minimal) chance of seening that the memory is lost. + All assuming the my_free does not step over the memory, of course. + */ + *the_memory= '\0'; + + my_free(the_memory); +} + +/** + @retval 0 transaction committed + @retval 1 transaction rolled back + */ +int injector::transaction::commit() +{ + DBUG_ENTER("injector::transaction::commit()"); + int error= m_thd->binlog_flush_pending_rows_event(true); + /* + Cluster replication does not preserve statement or + transaction boundaries of the master. Instead, a new + transaction on replication slave is started when a new GCI + (global checkpoint identifier) is issued, and is committed + when the last event of the check point has been received and + processed. This ensures consistency of each cluster in + cluster replication, and there is no requirement for stronger + consistency: MySQL replication is asynchronous with other + engines as well. + + A practical consequence of that is that row level replication + stream passed through the injector thread never contains + COMMIT events. + Here we should preserve the server invariant that there is no + outstanding statement transaction when the normal transaction + is committed by committing the statement transaction + explicitly. + */ + trans_commit_stmt(m_thd); + if (!trans_commit(m_thd)) + { + close_thread_tables(m_thd); + m_thd->release_transactional_locks(); + } + DBUG_RETURN(error); +} + + +#ifdef TO_BE_DELETED +int injector::transaction::use_table(server_id_type sid, table tbl) +{ + DBUG_ENTER("injector::transaction::use_table"); + + int error; + + if (unlikely((error= check_state(TABLE_STATE)))) + DBUG_RETURN(error); + + server_id_type save_id= m_thd->variables.server_id; + m_thd->set_server_id(sid); + DBUG_ASSERT(tbl.is_transactional() == tbl.get_table()->file->row_logging_has_trans); + error= m_thd->binlog_write_table_map(tbl.get_table(), 0); + m_thd->set_server_id(save_id); + DBUG_RETURN(error); +} +#endif + + +injector::transaction::binlog_pos injector::transaction::start_pos() const +{ + return m_start_pos; +} + + +/* + injector - member definitions +*/ + +/* This constructor is called below */ +inline injector::injector() = default; + +static injector *s_injector= 0; +injector *injector::instance() +{ + if (s_injector == 0) + s_injector= new injector; + /* "There can be only one [instance]" */ + return s_injector; +} + +void injector::free_instance() +{ + injector *inj = s_injector; + + if (inj != 0) + { + s_injector= 0; + delete inj; + } +} + + +injector::transaction injector::new_trans(THD *thd) +{ + DBUG_ENTER("injector::new_trans(THD*)"); + /* + Currently, there is no alternative to using 'mysql_bin_log' since that + is hardcoded into the way the handler is using the binary log. + */ + DBUG_RETURN(transaction(&mysql_bin_log, thd)); +} + +void injector::new_trans(THD *thd, injector::transaction *ptr) +{ + DBUG_ENTER("injector::new_trans(THD *, transaction *)"); + /* + Currently, there is no alternative to using 'mysql_bin_log' since that + is hardcoded into the way the handler is using the binary log. + */ + transaction trans(&mysql_bin_log, thd); + ptr->swap(trans); + + DBUG_VOID_RETURN; +} + +int injector::record_incident(THD *thd, Incident incident) +{ + Incident_log_event ev(thd, incident); + int error; + if (unlikely((error= mysql_bin_log.write(&ev)))) + return error; + return mysql_bin_log.rotate_and_purge(true); +} + +int injector::record_incident(THD *thd, Incident incident, + const LEX_CSTRING *message) +{ + Incident_log_event ev(thd, incident, message); + int error; + if (unlikely((error= mysql_bin_log.write(&ev)))) + return error; + return mysql_bin_log.rotate_and_purge(true); +} diff --git a/sql/rpl_injector.h b/sql/rpl_injector.h new file mode 100644 index 00000000..6a1c7248 --- /dev/null +++ b/sql/rpl_injector.h @@ -0,0 +1,316 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef INJECTOR_H +#define INJECTOR_H + +/* Pull in 'byte', 'my_off_t', and 'uint32' */ +#include + +#include "rpl_constants.h" +#include "table.h" /* TABLE */ + +/* Forward declarations */ +class handler; +class MYSQL_BIN_LOG; +struct TABLE; + + +/* + Injector to inject rows into the MySQL server. + + The injector class is used to notify the MySQL server of new rows that have + appeared outside of MySQL control. + + The original purpose of this is to allow clusters---which handle replication + inside the cluster through other means---to insert new rows into binary log. + Note, however, that the injector should be used whenever rows are altered in + any manner that is outside of MySQL server visibility and which therefore + are not seen by the MySQL server. + */ +class injector +{ +public: + + /* + Get an instance of the injector. + + DESCRIPTION + The injector is a Singleton, so this static function return the + available instance of the injector. + + RETURN VALUE + A pointer to the available injector object. + */ + static injector *instance(); + + /* + Delete the singleton instance (if allocated). Used during server shutdown. + */ + static void free_instance(); + + /* + A transaction where rows can be added. + + DESCRIPTION + The transaction class satisfy the **CopyConstructible** and + **Assignable** requirements. Note that the transaction is *not* + default constructible. + */ + class transaction { + friend class injector; + public: + /* Convenience definitions */ + typedef uchar* record_type; + typedef uint32 server_id_type; + + /* + Table reference. + + RESPONSIBILITY + + The class contains constructors to handle several forms of + references to tables. The constructors can implicitly be used to + construct references from, e.g., strings containing table names. + + EXAMPLE + + The class is intended to be used *by value*. Please, do not try to + construct objects of this type using 'new'; instead construct an + object, possibly a temporary object. For example: + + injector::transaction::table tbl(share->table, true); + MY_BITMAP cols; + my_bitmap_init(&cols, NULL, (i + 7) / 8, false); + inj->write_row(::server_id, tbl, &cols, row_data); + + or + + MY_BITMAP cols; + my_bitmap_init(&cols, NULL, (i + 7) / 8, false); + inj->write_row(::server_id, + injector::transaction::table(share->table, true), + &cols, row_data); + + This will work, be more efficient, and have greater chance of + inlining, not run the risk of losing pointers. + + COLLABORATION + + injector::transaction + Provide a flexible interface to the representation of tables. + + */ + class table + { + public: + table(TABLE *table, bool is_transactional_arg) + : m_table(table), m_is_transactional(is_transactional_arg) + { + } + + char const *db_name() const { return m_table->s->db.str; } + char const *table_name() const { return m_table->s->table_name.str; } + TABLE *get_table() const { return m_table; } + bool is_transactional() const { return m_is_transactional; } + + private: + TABLE *m_table; + bool m_is_transactional; + }; + + /* + Binlog position as a structure. + */ + class binlog_pos { + friend class transaction; + public: + char const *file_name() const { return m_file_name; } + my_off_t file_pos() const { return m_file_pos; } + + private: + char const *m_file_name; + my_off_t m_file_pos; + }; + + transaction() : m_thd(NULL) { } + transaction(transaction const&); + ~transaction(); + + /* Clear transaction, i.e., make calls to 'good()' return false. */ + void clear() { m_thd= NULL; } + + /* Is the transaction in a good state? */ + bool good() const { return m_thd != NULL; } + + /* Default assignment operator: standard implementation */ + transaction& operator=(transaction t) { + swap(t); + return *this; + } + + /* + + DESCRIPTION + + Register table for use within the transaction. All tables + that are going to be used need to be registered before being + used below. The member function will fail with an error if + use_table() is called after any *_row() function has been + called for the transaction. + + RETURN VALUE + + 0 All OK + >0 Failure + + */ +#ifdef TO_BE_DELETED + int use_table(server_id_type sid, table tbl); +#endif + /* + Commit a transaction. + + This member function will clean up after a sequence of *_row calls by, + for example, releasing resource and unlocking files. + */ + int commit(); + + /* + Get the position for the start of the transaction. + + Returns the position in the binary log of the first event in this + transaction. If no event is yet written, the position where the event + *will* be written is returned. This position is known, since a + new_transaction() will lock the binary log and prevent any other + writes to the binary log. + */ + binlog_pos start_pos() const; + + private: + /* Only the injector may construct these object */ + transaction(MYSQL_BIN_LOG *, THD *); + + void swap(transaction& o) { + /* std::swap(m_start_pos, o.m_start_pos); */ + { + binlog_pos const tmp= m_start_pos; + m_start_pos= o.m_start_pos; + o.m_start_pos= tmp; + } + + /* std::swap(m_thd, o.m_thd); */ + { + THD* const tmp= m_thd; + m_thd= o.m_thd; + o.m_thd= tmp; + } + { + enum_state const tmp= m_state; + m_state= o.m_state; + o.m_state= tmp; + } + } + + enum enum_state + { + START_STATE, /* Start state */ + TABLE_STATE, /* At least one table has been registered */ + ROW_STATE, /* At least one row has been registered */ + STATE_COUNT /* State count and sink state */ + } m_state; + + /* + Check and update the state. + + PARAMETER(S) + + target_state + The state we are moving to: TABLE_STATE if we are + writing a table and ROW_STATE if we are writing a row. + + DESCRIPTION + + The internal state will be updated to the target state if + and only if it is a legal move. The only legal moves are: + + START_STATE -> START_STATE + START_STATE -> TABLE_STATE + TABLE_STATE -> TABLE_STATE + TABLE_STATE -> ROW_STATE + + That is: + - It is not possible to write any row before having written at + least one table + - It is not possible to write a table after at least one row + has been written + + RETURN VALUE + + 0 All OK + -1 Incorrect call sequence + */ + int check_state(enum_state const target_state) + { +#ifdef DBUG_TRACE + static char const *state_name[] = { + "START_STATE", "TABLE_STATE", "ROW_STATE", "STATE_COUNT" + }; + + DBUG_PRINT("info", ("In state %s", state_name[m_state])); +#endif + DBUG_ASSERT(target_state <= STATE_COUNT); + + if (m_state <= target_state && target_state <= m_state + 1 && + m_state < STATE_COUNT) + m_state= target_state; + else + m_state= STATE_COUNT; + return m_state == STATE_COUNT ? 1 : 0; + } + + + binlog_pos m_start_pos; + THD *m_thd; + }; + + /* + Create a new transaction. This member function will prepare for a + sequence of *_row calls by, for example, reserving resources and + locking files. There are two overloaded alternatives: one returning a + transaction by value and one using placement semantics. The following + two calls are equivalent, with the exception that the latter will + overwrite the transaction. + + injector::transaction trans1= inj->new_trans(thd); + + injector::transaction trans2; + inj->new_trans(thd, &trans); + */ + transaction new_trans(THD *); + void new_trans(THD *, transaction *); + + int record_incident(THD*, Incident incident); + int record_incident(THD*, Incident incident, const LEX_CSTRING *message); + +private: + explicit injector(); + ~injector() = default; /* Nothing needs to be done */ + injector(injector const&); /* You're not allowed to copy injector + instances. + */ +}; + +#endif /* INJECTOR_H */ diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc new file mode 100644 index 00000000..3c698f27 --- /dev/null +++ b/sql/rpl_mi.cc @@ -0,0 +1,2077 @@ +/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" // For HAVE_REPLICATION +#include "sql_priv.h" +#include +#include "rpl_mi.h" +#include "slave.h" +#include "strfunc.h" +#include "sql_repl.h" + +#ifdef HAVE_REPLICATION + +#define DEFAULT_CONNECT_RETRY 60 + +static void init_master_log_pos(Master_info* mi); + +Master_info::Master_info(LEX_CSTRING *connection_name_arg, + bool is_slave_recovery) + :Slave_reporting_capability("I/O"), + ssl(0), ssl_verify_server_cert(1), fd(-1), io_thd(0), + rli(is_slave_recovery), port(MYSQL_PORT), + checksum_alg_before_fd(BINLOG_CHECKSUM_ALG_UNDEF), + connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0), + slave_running(MYSQL_SLAVE_NOT_RUN), slave_run_id(0), + clock_diff_with_master(0), + sync_counter(0), heartbeat_period(0), received_heartbeats(0), + master_id(0), prev_master_id(0), + using_gtid(USE_GTID_SLAVE_POS), events_queued_since_last_gtid(0), + gtid_reconnect_event_skip_count(0), gtid_event_seen(false), + in_start_all_slaves(0), in_stop_all_slaves(0), in_flush_all_relay_logs(0), + users(0), killed(0), + total_ddl_groups(0), total_non_trans_groups(0), total_trans_groups(0) +{ + char *tmp; + host[0] = 0; user[0] = 0; password[0] = 0; + ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0; + ssl_cipher[0]= 0; ssl_key[0]= 0; + ssl_crl[0]= 0; ssl_crlpath[0]= 0; + + /* + Store connection name and lower case connection name + It's safe to ignore any OMM errors as this is checked by error() + */ + connection_name.length= cmp_connection_name.length= + connection_name_arg->length; + if ((connection_name.str= tmp= (char*) + my_malloc(PSI_INSTRUMENT_ME, connection_name_arg->length*2+2, MYF(MY_WME)))) + { + strmake(tmp, connection_name_arg->str, connection_name.length); + tmp+= connection_name_arg->length+1; + cmp_connection_name.str= tmp; + memcpy(tmp, connection_name_arg->str, connection_name.length+1); + my_casedn_str(system_charset_info, tmp); + } + /* + When MySQL restarted, all Rpl_filter settings which aren't in the my.cnf + will be lost. If you want to lose a setting after restart, you + should add them into my.cnf + */ + rpl_filter= get_or_create_rpl_filter(connection_name.str, + connection_name.length); + copy_filter_setting(rpl_filter, global_rpl_filter); + + parallel_mode= rpl_filter->get_parallel_mode(); + + my_init_dynamic_array(PSI_INSTRUMENT_ME, &ignore_server_ids, + sizeof(global_system_variables.server_id), 16, 16, + MYF(0)); + bzero((char*) &file, sizeof(file)); + mysql_mutex_init(key_master_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_master_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_master_info_start_stop_lock, &start_stop_lock, + MY_MUTEX_INIT_SLOW); + /* + start_alter_lock will protect individual start_alter_info while + start_alter_list_lock is for list insertion and deletion operations + */ + mysql_mutex_init(key_master_info_start_alter_lock, &start_alter_lock, + MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_master_info_start_alter_list_lock, &start_alter_list_lock, + MY_MUTEX_INIT_FAST); + mysql_mutex_setflags(&run_lock, MYF_NO_DEADLOCK_DETECTION); + mysql_mutex_setflags(&data_lock, MYF_NO_DEADLOCK_DETECTION); + mysql_mutex_init(key_master_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_master_info_data_cond, &data_cond, NULL); + mysql_cond_init(key_master_info_start_cond, &start_cond, NULL); + mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL); + mysql_cond_init(key_master_info_sleep_cond, &sleep_cond, NULL); + init_sql_alloc(PSI_INSTRUMENT_ME, &mem_root, MEM_ROOT_BLOCK_SIZE, 0, MYF(0)); +} + + +/** + Wait until no one is using Master_info +*/ + +void Master_info::wait_until_free() +{ + mysql_mutex_lock(&sleep_lock); + killed= 1; + while (users) + mysql_cond_wait(&sleep_cond, &sleep_lock); + mysql_mutex_unlock(&sleep_lock); +} + +/** + Delete master_info +*/ + +Master_info::~Master_info() +{ + wait_until_free(); + my_free(const_cast(connection_name.str)); + delete_dynamic(&ignore_server_ids); + mysql_mutex_destroy(&run_lock); + mysql_mutex_destroy(&data_lock); + mysql_mutex_destroy(&sleep_lock); + mysql_mutex_destroy(&start_stop_lock); + mysql_mutex_destroy(&start_alter_lock); + mysql_mutex_destroy(&start_alter_list_lock); + mysql_cond_destroy(&data_cond); + mysql_cond_destroy(&start_cond); + mysql_cond_destroy(&stop_cond); + mysql_cond_destroy(&sleep_cond); + free_root(&mem_root, MYF(0)); +} + +/** + A comparison function to be supplied as argument to @c sort_dynamic() + and @c bsearch() + + @return -1 if first argument is less, 0 if it equal to, 1 if it is greater + than the second +*/ +static int change_master_id_cmp(const void *id1, const void *id2) +{ + return (*(ulong *) id1 - *(ulong *) id2); +} + +/** + Reports if the s_id server has been configured to ignore events + it generates with + + CHANGE MASTER IGNORE_SERVER_IDS= ( list of server ids ) + + Method is called from the io thread event receiver filtering. + + @param s_id the master server identifier + + @retval TRUE if s_id is in the list of ignored master servers, + @retval FALSE otherwise. + */ +bool Master_info::shall_ignore_server_id(ulong s_id) +{ + if (likely(ignore_server_ids.elements == 1)) + return (* (ulong*) dynamic_array_ptr(&ignore_server_ids, 0)) == s_id; + else + return bsearch((const ulong *) &s_id, + ignore_server_ids.buffer, + ignore_server_ids.elements, sizeof(ulong), + change_master_id_cmp) != NULL; +} + +void Master_info::clear_in_memory_info(bool all) +{ + init_master_log_pos(this); + if (all) + { + port= MYSQL_PORT; + host[0] = 0; user[0] = 0; password[0] = 0; + domain_id_filter.clear_ids(); + reset_dynamic(&ignore_server_ids); + } +} + + +const char * +Master_info::using_gtid_astext(enum enum_using_gtid arg) +{ + switch (arg) + { + case USE_GTID_NO: + return "No"; + case USE_GTID_SLAVE_POS: + return "Slave_Pos"; + default: + DBUG_ASSERT(arg == USE_GTID_CURRENT_POS); + return "Current_Pos"; + } +} + + +void init_master_log_pos(Master_info* mi) +{ + DBUG_ENTER("init_master_log_pos"); + + mi->master_log_name[0] = 0; + mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number + if (mi->master_supports_gtid) + { + mi->using_gtid= Master_info::USE_GTID_SLAVE_POS; + } + mi->gtid_current_pos.reset(); + mi->events_queued_since_last_gtid= 0; + mi->gtid_reconnect_event_skip_count= 0; + mi->gtid_event_seen= false; + + /* Intentionally init ssl_verify_server_cert to 0, no option available */ + mi->ssl_verify_server_cert= 0; + /* + always request heartbeat unless master_heartbeat_period is set + explicitly zero. Here is the default value for heartbeat period + if CHANGE MASTER did not specify it. (no data loss in conversion + as hb period has a max) + */ + mi->heartbeat_period= (float) MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD, + (slave_net_timeout/2.0)); + DBUG_ASSERT(mi->heartbeat_period > (float) 0.001 + || mi->heartbeat_period == 0); + + DBUG_VOID_RETURN; +} + +/** + Parses the IO_CACHE for "key=" and returns the "key". + If no '=' found, returns the whole line (for END_MARKER). + + @param key [OUT] Key buffer + @param max_size [IN] Maximum buffer size + @param f [IN] IO_CACHE file + @param found_equal [OUT] Set true if a '=' was found. + + @retval 0 Either "key=" or '\n' found + @retval 1 EOF +*/ +static int +read_mi_key_from_file(char *key, int max_size, IO_CACHE *f, bool *found_equal) +{ + int i= 0, c; + + DBUG_ENTER("read_key_from_file"); + + *found_equal= false; + if (max_size <= 0) + DBUG_RETURN(1); + for (;;) + { + if (i >= max_size-1) + { + key[i] = '\0'; + DBUG_RETURN(0); + } + c= my_b_get(f); + if (c == my_b_EOF) + { + DBUG_RETURN(1); + } + else if (c == '\n') + { + key[i]= '\0'; + DBUG_RETURN(0); + } + else if (c == '=') + { + key[i]= '\0'; + *found_equal= true; + DBUG_RETURN(0); + } + else + { + key[i]= c; + ++i; + } + } + /* NotReached */ +} + +enum { + LINES_IN_MASTER_INFO_WITH_SSL= 14, + + /* 5.1.16 added value of master_ssl_verify_server_cert */ + LINE_FOR_MASTER_SSL_VERIFY_SERVER_CERT= 15, + + /* 5.5 added value of master_heartbeat_period */ + LINE_FOR_MASTER_HEARTBEAT_PERIOD= 16, + + /* MySQL Cluster 6.3 added master_bind */ + LINE_FOR_MASTER_BIND = 17, + + /* 6.0 added value of master_ignore_server_id */ + LINE_FOR_REPLICATE_IGNORE_SERVER_IDS= 18, + + /* 6.0 added value of master_uuid */ + LINE_FOR_MASTER_UUID= 19, + + /* line for master_retry_count */ + LINE_FOR_MASTER_RETRY_COUNT= 20, + + /* line for ssl_crl */ + LINE_FOR_SSL_CRL= 21, + + /* line for ssl_crl */ + LINE_FOR_SSL_CRLPATH= 22, + + /* MySQL 5.6 fixed-position lines. */ + LINE_FOR_FIRST_MYSQL_5_6=23, + LINE_FOR_LAST_MYSQL_5_6=23, + /* Reserved lines for MySQL future versions. */ + LINE_FOR_LAST_MYSQL_FUTURE=33, + /* Number of (fixed-position) lines used when saving master info file */ + LINES_IN_MASTER_INFO= LINE_FOR_LAST_MYSQL_FUTURE +}; + +int init_master_info(Master_info* mi, const char* master_info_fname, + const char* slave_info_fname, + bool abort_if_no_master_info_file, + int thread_mask) +{ + int fd,error; + char fname[FN_REFLEN+128]; + DBUG_ENTER("init_master_info"); + + if (mi->inited) + { + /* + We have to reset read position of relay-log-bin as we may have + already been reading from 'hotlog' when the slave was stopped + last time. If this case pos_in_file would be set and we would + get a crash when trying to read the signature for the binary + relay log. + + We only rewind the read position if we are starting the SQL + thread. The handle_slave_sql thread assumes that the read + position is at the beginning of the file, and will read the + "signature" and then fast-forward to the last position read. + */ + if (thread_mask & SLAVE_SQL) + { + bool hot_log= FALSE; + /* + my_b_seek does an implicit flush_io_cache, so we need to: + + 1. check if this log is active (hot) + 2. if it is we keep log_lock until the seek ends, otherwise + release it right away. + + If we did not take log_lock, SQL thread might race with IO + thread for the IO_CACHE mutex. + + */ + mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); + mysql_mutex_lock(log_lock); + hot_log= mi->rli.relay_log.is_active(mi->rli.linfo.log_file_name); + + if (!hot_log) + mysql_mutex_unlock(log_lock); + + my_b_seek(mi->rli.cur_log, (my_off_t) 0); + + if (hot_log) + mysql_mutex_unlock(log_lock); + } + DBUG_RETURN(0); + } + + mi->mysql=0; + mi->file_id=1; + fn_format(fname, master_info_fname, mysql_data_home, "", 4+32); + + /* + We need a mutex while we are changing master info parameters to + keep other threads from reading bogus info + */ + + mysql_mutex_lock(&mi->data_lock); + fd = mi->fd; + + /* does master.info exist ? */ + + if (access(fname,F_OK)) + { + if (abort_if_no_master_info_file) + { + mysql_mutex_unlock(&mi->data_lock); + DBUG_RETURN(0); + } + /* + if someone removed the file from underneath our feet, just close + the old descriptor and re-create the old file + */ + if (fd >= 0) + mysql_file_close(fd, MYF(MY_WME)); + if ((fd= mysql_file_open(key_file_master_info, + fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ) + { + sql_print_error("Failed to create a new master info file (\ +file '%s', errno %d)", fname, my_errno); + goto err; + } + if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,0, + MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on master info file (\ +file '%s')", fname); + goto err; + } + + mi->fd = fd; + mi->clear_in_memory_info(false); + + } + else // file exists + { + if (fd >= 0) + reinit_io_cache(&mi->file, READ_CACHE, 0L,0,0); + else + { + if ((fd= mysql_file_open(key_file_master_info, + fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ) + { + sql_print_error("Failed to open the existing master info file (\ +file '%s', errno %d)", fname, my_errno); + goto err; + } + if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L, + 0, MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on master info file (\ +file '%s')", fname); + goto err; + } + } + + mi->fd = fd; + int port, connect_retry, master_log_pos, lines; + int ssl= 0, ssl_verify_server_cert= 0; + float master_heartbeat_period= 0.0; + char *first_non_digit; + char buf[HOSTNAME_LENGTH+1]; + + /* + Starting from 4.1.x master.info has new format. Now its + first line contains number of lines in file. By reading this + number we will be always distinguish to which version our + master.info corresponds to. We can't simply count lines in + file since versions before 4.1.x could generate files with more + lines than needed. + If first line doesn't contain a number or contain number less than + LINES_IN_MASTER_INFO_WITH_SSL then such file is treated like file + from pre 4.1.1 version. + There is no ambiguity when reading an old master.info, as before + 4.1.1, the first line contained the binlog's name, which is either + empty or has an extension (contains a '.'), so can't be confused + with an integer. + + So we're just reading first line and trying to figure which version + is this. + */ + + /* + The first row is temporarily stored in mi->master_log_name, + if it is line count and not binlog name (new format) it will be + overwritten by the second row later. + */ + if (init_strvar_from_file(mi->master_log_name, + sizeof(mi->master_log_name), &mi->file, + "")) + goto errwithmsg; + + lines= strtoul(mi->master_log_name, &first_non_digit, 10); + + if (mi->master_log_name[0]!='\0' && + *first_non_digit=='\0' && lines >= LINES_IN_MASTER_INFO_WITH_SSL) + { + /* Seems to be new format => read master log name from next line */ + if (init_strvar_from_file(mi->master_log_name, + sizeof(mi->master_log_name), &mi->file, "")) + goto errwithmsg; + } + else + lines= 7; + + if (init_intvar_from_file(&master_log_pos, &mi->file, 4) || + init_strvar_from_file(mi->host, sizeof(mi->host), &mi->file, 0) || + init_strvar_from_file(mi->user, sizeof(mi->user), &mi->file, "test") || + init_strvar_from_file(mi->password, SCRAMBLED_PASSWORD_CHAR_LENGTH+1, + &mi->file, 0) || + init_intvar_from_file(&port, &mi->file, MYSQL_PORT) || + init_intvar_from_file(&connect_retry, &mi->file, + DEFAULT_CONNECT_RETRY)) + goto errwithmsg; + + /* + If file has ssl part use it even if we have server without + SSL support. But these options will be ignored later when + slave will try connect to master, so in this case warning + is printed. + */ + if (lines >= LINES_IN_MASTER_INFO_WITH_SSL) + { + if (init_intvar_from_file(&ssl, &mi->file, 0) || + init_strvar_from_file(mi->ssl_ca, sizeof(mi->ssl_ca), + &mi->file, 0) || + init_strvar_from_file(mi->ssl_capath, sizeof(mi->ssl_capath), + &mi->file, 0) || + init_strvar_from_file(mi->ssl_cert, sizeof(mi->ssl_cert), + &mi->file, 0) || + init_strvar_from_file(mi->ssl_cipher, sizeof(mi->ssl_cipher), + &mi->file, 0) || + init_strvar_from_file(mi->ssl_key, sizeof(mi->ssl_key), + &mi->file, 0)) + goto errwithmsg; + + /* + Starting from 5.1.16 ssl_verify_server_cert might be + in the file + */ + if (lines >= LINE_FOR_MASTER_SSL_VERIFY_SERVER_CERT && + init_intvar_from_file(&ssl_verify_server_cert, &mi->file, 0)) + goto errwithmsg; + /* + Starting from 6.0 master_heartbeat_period might be + in the file + */ + if (lines >= LINE_FOR_MASTER_HEARTBEAT_PERIOD && + init_floatvar_from_file(&master_heartbeat_period, &mi->file, 0.0)) + goto errwithmsg; + /* + Starting from MySQL Cluster 6.3 master_bind might be in the file + (this is just a reservation to avoid future upgrade problems) + */ + if (lines >= LINE_FOR_MASTER_BIND && + init_strvar_from_file(buf, sizeof(buf), &mi->file, "")) + goto errwithmsg; + /* + Starting from 6.0 list of server_id of ignorable servers might be + in the file + */ + if (lines >= LINE_FOR_REPLICATE_IGNORE_SERVER_IDS && + init_dynarray_intvar_from_file(&mi->ignore_server_ids, &mi->file)) + { + sql_print_error("Failed to initialize master info ignore_server_ids"); + goto errwithmsg; + } + + /* reserved */ + if (lines >= LINE_FOR_MASTER_UUID && + init_strvar_from_file(buf, sizeof(buf), &mi->file, "")) + goto errwithmsg; + + /* Starting from 5.5 the master_retry_count may be in the repository. */ + if (lines >= LINE_FOR_MASTER_RETRY_COUNT && + init_strvar_from_file(buf, sizeof(buf), &mi->file, "")) + goto errwithmsg; + + if (lines >= LINE_FOR_SSL_CRLPATH && + (init_strvar_from_file(mi->ssl_crl, sizeof(mi->ssl_crl), + &mi->file, "") || + init_strvar_from_file(mi->ssl_crlpath, sizeof(mi->ssl_crlpath), + &mi->file, ""))) + goto errwithmsg; + + /* + Starting with MariaDB 10.0, we use a key=value syntax, which is nicer + in several ways. But we leave a bunch of empty lines to accomodate + any future old-style additions in MySQL (this will make it easier for + users moving from MariaDB to MySQL, to not have MySQL try to + interpret a MariaDB key=value line.) + */ + if (lines >= LINE_FOR_LAST_MYSQL_FUTURE) + { + uint i; + bool got_eq; + bool seen_using_gtid= false; + bool seen_do_domain_ids=false, seen_ignore_domain_ids=false; + + /* Skip lines used by / reserved for MySQL >= 5.6. */ + for (i= LINE_FOR_FIRST_MYSQL_5_6; i <= LINE_FOR_LAST_MYSQL_FUTURE; ++i) + { + if (init_strvar_from_file(buf, sizeof(buf), &mi->file, "")) + goto errwithmsg; + } + + /* + Parse any extra key=value lines. read_key_from_file() parses the file + for "key=" and returns the "key" if found. The "value" can then the + parsed on case by case basis. The "unknown" lines would be ignored to + facilitate downgrades. + 10.0 does not have the END_MARKER before any left-overs at the end + of the file. So ignore any but the first occurrence of a key. + */ + while (!read_mi_key_from_file(buf, sizeof(buf), &mi->file, &got_eq)) + { + if (got_eq && !seen_using_gtid && !strcmp(buf, "using_gtid")) + { + int val; + if (!init_intvar_from_file(&val, &mi->file, 0)) + { + if (val == Master_info::USE_GTID_CURRENT_POS) + mi->using_gtid= Master_info::USE_GTID_CURRENT_POS; + else if (val == Master_info::USE_GTID_SLAVE_POS) + mi->using_gtid= Master_info::USE_GTID_SLAVE_POS; + else + mi->using_gtid= Master_info::USE_GTID_NO; + seen_using_gtid= true; + } else { + sql_print_error("Failed to initialize master info using_gtid"); + goto errwithmsg; + } + } + else if (got_eq && !seen_do_domain_ids && !strcmp(buf, "do_domain_ids")) + { + if (mi->domain_id_filter.init_ids(&mi->file, + Domain_id_filter::DO_DOMAIN_IDS)) + { + sql_print_error("Failed to initialize master info do_domain_ids"); + goto errwithmsg; + } + seen_do_domain_ids= true; + } + else if (got_eq && !seen_ignore_domain_ids && + !strcmp(buf, "ignore_domain_ids")) + { + if (mi->domain_id_filter.init_ids(&mi->file, + Domain_id_filter::IGNORE_DOMAIN_IDS)) + { + sql_print_error("Failed to initialize master info " + "ignore_domain_ids"); + goto errwithmsg; + } + seen_ignore_domain_ids= true; + } + else if (!got_eq && !strcmp(buf, "END_MARKER")) + { + /* + Guard agaist extra left-overs at the end of file, in case a later + update causes the file to shrink compared to earlier contents. + */ + break; + } + } + } + } + +#ifndef HAVE_OPENSSL + if (ssl) + sql_print_warning("SSL information in the master info file " + "('%s') are ignored because this MySQL slave was " + "compiled without SSL support.", fname); +#endif /* HAVE_OPENSSL */ + + /* + This has to be handled here as init_intvar_from_file can't handle + my_off_t types + */ + mi->master_log_pos= (my_off_t) master_log_pos; + mi->port= (uint) port; + mi->connect_retry= (uint) connect_retry; + mi->ssl= (my_bool) ssl; + mi->ssl_verify_server_cert= ssl_verify_server_cert; + mi->heartbeat_period= MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD, master_heartbeat_period); + } + DBUG_PRINT("master_info",("log_file_name: %s position: %ld", + mi->master_log_name, + (ulong) mi->master_log_pos)); + + mi->rli.mi= mi; + if (mi->rli.init(slave_info_fname)) + goto err; + + mi->inited = 1; + mi->rli.is_relay_log_recovery= FALSE; + // now change cache READ -> WRITE - must do this before flush_master_info + reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1); + if (unlikely((error= MY_TEST(flush_master_info(mi, TRUE, TRUE))))) + sql_print_error("Failed to flush master info file"); + mysql_mutex_unlock(&mi->data_lock); + DBUG_RETURN(error); + +errwithmsg: + sql_print_error("Error reading master configuration"); + +err: + if (fd >= 0) + { + mysql_file_close(fd, MYF(0)); + end_io_cache(&mi->file); + } + mi->fd= -1; + mysql_mutex_unlock(&mi->data_lock); + DBUG_RETURN(1); +} + + +/* + RETURN + 2 - flush relay log failed + 1 - flush master info failed + 0 - all ok +*/ +int flush_master_info(Master_info* mi, + bool flush_relay_log_cache, + bool need_lock_relay_log) +{ + IO_CACHE* file = &mi->file; + char lbuf[22]; + int err= 0; + + DBUG_ENTER("flush_master_info"); + DBUG_PRINT("enter",("master_pos: %ld", (long) mi->master_log_pos)); + + /* + Flush the relay log to disk. If we don't do it, then the relay log while + have some part (its last kilobytes) in memory only, so if the slave server + dies now, with, say, from master's position 100 to 150 in memory only (not + on disk), and with position 150 in master.info, then when the slave + restarts, the I/O thread will fetch binlogs from 150, so in the relay log + we will have "[0, 100] U [150, infinity[" and nobody will notice it, so the + SQL thread will jump from 100 to 150, and replication will silently break. + + When we come to this place in code, relay log may or not be initialized; + the caller is responsible for setting 'flush_relay_log_cache' accordingly. + */ + if (flush_relay_log_cache) + { + mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); + IO_CACHE *log_file= mi->rli.relay_log.get_log_file(); + + if (need_lock_relay_log) + mysql_mutex_lock(log_lock); + + mysql_mutex_assert_owner(log_lock); + err= flush_io_cache(log_file); + + if (need_lock_relay_log) + mysql_mutex_unlock(log_lock); + + if (err) + DBUG_RETURN(2); + } + + /* + produce a line listing the total number and all the ignored server_id:s + */ + char* ignore_server_ids_buf; + { + ignore_server_ids_buf= + (char *) my_malloc(PSI_INSTRUMENT_ME, + (sizeof(global_system_variables.server_id) * 3 + 1) * + (1 + mi->ignore_server_ids.elements), MYF(MY_WME)); + if (!ignore_server_ids_buf) + DBUG_RETURN(1); /* error */ + ulong cur_len= sprintf(ignore_server_ids_buf, "%zu", + mi->ignore_server_ids.elements); + for (ulong i= 0; i < mi->ignore_server_ids.elements; i++) + { + ulong s_id; + get_dynamic(&mi->ignore_server_ids, (uchar*) &s_id, i); + cur_len+= sprintf(ignore_server_ids_buf + cur_len, " %lu", s_id); + } + } + + char *do_domain_ids_buf= 0, *ignore_domain_ids_buf= 0; + + do_domain_ids_buf= + mi->domain_id_filter.as_string(Domain_id_filter::DO_DOMAIN_IDS); + if (do_domain_ids_buf == NULL) + { + err= 1; /* error */ + goto done; + } + + ignore_domain_ids_buf= + mi->domain_id_filter.as_string(Domain_id_filter::IGNORE_DOMAIN_IDS); + if (ignore_domain_ids_buf == NULL) + { + err= 1; /* error */ + goto done; + } + + /* + We flushed the relay log BEFORE the master.info file, because if we crash + now, we will get a duplicate event in the relay log at restart. If we + flushed in the other order, we would get a hole in the relay log. + And duplicate is better than hole (with a duplicate, in later versions we + can add detection and scrap one event; with a hole there's nothing we can + do). + */ + + /* + In certain cases this code may create master.info files that seems + corrupted, because of extra lines filled with garbage in the end + file (this happens if new contents take less space than previous + contents of file). But because of number of lines in the first line + of file we don't care about this garbage. + */ + char heartbeat_buf[FLOATING_POINT_BUFFER]; + my_fcvt(mi->heartbeat_period, 3, heartbeat_buf, NULL); + my_b_seek(file, 0L); + my_b_printf(file, + "%u\n%s\n%s\n%s\n%s\n%s\n%d\n%d\n%d\n%s\n%s\n%s\n%s\n%s\n%d\n%s\n%s\n%s\n%s\n%d\n%s\n%s\n" + "\n\n\n\n\n\n\n\n\n\n\n" + "using_gtid=%d\n" + "do_domain_ids=%s\n" + "ignore_domain_ids=%s\n" + "END_MARKER\n", + LINES_IN_MASTER_INFO, + mi->master_log_name, llstr(mi->master_log_pos, lbuf), + mi->host, mi->user, + mi->password, mi->port, mi->connect_retry, + (int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert, + mi->ssl_cipher, mi->ssl_key, mi->ssl_verify_server_cert, + heartbeat_buf, "", ignore_server_ids_buf, + "", 0, + mi->ssl_crl, mi->ssl_crlpath, mi->using_gtid, + do_domain_ids_buf, ignore_domain_ids_buf); + err= flush_io_cache(file); + if (sync_masterinfo_period && !err && + ++(mi->sync_counter) >= sync_masterinfo_period) + { + err= my_sync(mi->fd, MYF(MY_WME)); + mi->sync_counter= 0; + } + + /* Fix err; flush_io_cache()/my_sync() may return -1 */ + err= (err != 0) ? 1 : 0; + +done: + my_free(ignore_server_ids_buf); + my_free(do_domain_ids_buf); + my_free(ignore_domain_ids_buf); + DBUG_RETURN(err); +} + + +void end_master_info(Master_info* mi) +{ + DBUG_ENTER("end_master_info"); + + if (!mi->inited) + DBUG_VOID_RETURN; + if (mi->fd >= 0) + { + end_io_cache(&mi->file); + mysql_file_close(mi->fd, MYF(MY_WME)); + mi->fd = -1; + } + mi->inited = 0; + + DBUG_VOID_RETURN; +} + +/* Multi-Master By P.Linux */ +uchar *get_key_master_info(Master_info *mi, size_t *length, + my_bool not_used __attribute__((unused))) +{ + /* Return lower case name */ + *length= mi->cmp_connection_name.length; + return (uchar*) mi->cmp_connection_name.str; +} + +/* + Delete a master info + + Called from my_hash_delete(&master_info_hash) + Stops associated slave threads and frees master_info +*/ + +void free_key_master_info(Master_info *mi) +{ + DBUG_ENTER("free_key_master_info"); + mysql_mutex_unlock(&LOCK_active_mi); + + /* Ensure that we are not in reset_slave while this is done */ + mi->lock_slave_threads(); + terminate_slave_threads(mi,SLAVE_FORCE_ALL); + /* We use 2 here instead of 1 just to make it easier when debugging */ + mi->killed= 2; + end_master_info(mi); + end_relay_log_info(&mi->rli); + mi->unlock_slave_threads(); + delete mi; + + mysql_mutex_lock(&LOCK_active_mi); + DBUG_VOID_RETURN; +} + +/** + Check if connection name for master_info is valid. + + It's valid if it's a valid system name of length less than + MAX_CONNECTION_NAME. + + @return + 0 ok + 1 error +*/ + +bool check_master_connection_name(LEX_CSTRING *name) +{ + if (name->length >= MAX_CONNECTION_NAME) + return 1; + return 0; +} + + +/** + Create a log file with a given suffix. + + @param + res_file_name Store result here + length Length of res_file_name buffer + info_file Original file name (prefix) + append 1 if we should add suffix last (not before ext) + suffix Suffix + + @note + The suffix is added before the extension of the file name prefixed with '-'. + The suffix is also converted to lower case and we transform + all not safe character, as we do with MySQL table names. + + If suffix is an empty string, then we don't add any suffix. + This is to allow one to use this function also to generate old + file names without a prefix. +*/ + +void create_logfile_name_with_suffix(char *res_file_name, size_t length, + const char *info_file, bool append, + LEX_CSTRING *suffix) +{ + char buff[MAX_CONNECTION_NAME+1], + res[MAX_CONNECTION_NAME * MAX_FILENAME_MBWIDTH+1], *p; + + p= strmake(res_file_name, info_file, length); + /* If not empty suffix and there is place left for some part of the suffix */ + if (suffix->length != 0 && p <= res_file_name + length -1) + { + const char *info_file_end= info_file + (p - res_file_name); + const char *ext= append ? info_file_end : fn_ext2(info_file); + size_t res_length, ext_pos, from_length; + uint errors; + + /* Create null terminated string */ + from_length= strmake(buff, suffix->str, suffix->length) - buff; + /* Convert to characters usable in a file name */ + res_length= strconvert(system_charset_info, buff, from_length, + &my_charset_filename, res, sizeof(res), &errors); + + ext_pos= (size_t) (ext - info_file); + length-= (suffix->length - ext_pos); /* Leave place for extension */ + p= res_file_name + ext_pos; + *p++= '-'; /* Add separator */ + p= strmake(p, res, MY_MIN((size_t) (length - (p - res_file_name)), + res_length)); + /* Add back extension. We have checked above that there is space for it */ + strmov(p, ext); + } +} + +void copy_filter_setting(Rpl_filter* dst_filter, Rpl_filter* src_filter) +{ + char buf[256]; + String tmp(buf, sizeof(buf), &my_charset_bin); + + dst_filter->get_do_db(&tmp); + if (tmp.is_empty()) + { + src_filter->get_do_db(&tmp); + if (!tmp.is_empty()) + dst_filter->set_do_db(tmp.ptr()); + } + + dst_filter->get_do_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_do_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_do_table(tmp.ptr()); + } + + dst_filter->get_ignore_db(&tmp); + if (tmp.is_empty()) + { + src_filter->get_ignore_db(&tmp); + if (!tmp.is_empty()) + dst_filter->set_ignore_db(tmp.ptr()); + } + + dst_filter->get_ignore_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_ignore_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_ignore_table(tmp.ptr()); + } + + dst_filter->get_wild_do_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_wild_do_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_wild_do_table(tmp.ptr()); + } + + dst_filter->get_wild_ignore_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_wild_ignore_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_wild_ignore_table(tmp.ptr()); + } + + dst_filter->get_rewrite_db(&tmp); + if (tmp.is_empty()) + { + src_filter->get_rewrite_db(&tmp); + if (!tmp.is_empty()) + dst_filter->set_rewrite_db(tmp.ptr()); + } +} + +Master_info_index::Master_info_index() +{ + size_t filename_length, dir_length; + /* + Create the Master_info index file by prepending 'multi-' before + the master_info_file file name. + */ + fn_format(index_file_name, master_info_file, mysql_data_home, + "", MY_UNPACK_FILENAME); + filename_length= strlen(index_file_name) + 1; /* Count 0 byte */ + dir_length= dirname_length(index_file_name); + bmove_upp((uchar*) index_file_name + filename_length + 6, + (uchar*) index_file_name + filename_length, + filename_length - dir_length); + memcpy(index_file_name + dir_length, "multi-", 6); + + bzero((char*) &index_file, sizeof(index_file)); + index_file.file= -1; +} + + +/** + Free all connection threads + + This is done during early stages of shutdown + to give connection threads and slave threads time + to die before ~Master_info_index is called +*/ + +void Master_info_index::free_connections() +{ + mysql_mutex_assert_owner(&LOCK_active_mi); + my_hash_reset(&master_info_hash); +} + + +/** + Free all connection threads and free structures +*/ + +Master_info_index::~Master_info_index() +{ + my_hash_free(&master_info_hash); + end_io_cache(&index_file); + if (index_file.file >= 0) + my_close(index_file.file, MYF(MY_WME)); +} + + +/* Load All Master_info from master.info.index File + * RETURN: + * 0 - All Success + * 1 - All Fail + * 2 - Some Success, Some Fail + */ + +bool Master_info_index::init_all_master_info() +{ + int thread_mask; + int err_num= 0, succ_num= 0; // The number of success read Master_info + char sign[MAX_CONNECTION_NAME+1]; + File index_file_nr; + THD *thd; + DBUG_ENTER("init_all_master_info"); + + DBUG_ASSERT(master_info_index); + + if ((index_file_nr= my_open(index_file_name, + O_RDWR | O_CREAT | O_BINARY , + MYF(MY_WME | ME_ERROR_LOG))) < 0 || + my_sync(index_file_nr, MYF(MY_WME)) || + init_io_cache(&index_file, index_file_nr, + IO_SIZE, READ_CACHE, + my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), + 0, MYF(MY_WME | MY_WAIT_IF_FULL))) + { + if (index_file_nr >= 0) + my_close(index_file_nr,MYF(0)); + + sql_print_error("Creation of Master_info index file '%s' failed", + index_file_name); + DBUG_RETURN(1); + } + + /* Initialize Master_info Hash Table */ + if (my_hash_init(PSI_INSTRUMENT_ME, &master_info_hash, system_charset_info, + MAX_REPLICATION_THREAD, 0, 0, + (my_hash_get_key) get_key_master_info, + (my_hash_free_key)free_key_master_info, HASH_UNIQUE)) + { + sql_print_error("Initializing Master_info hash table failed"); + DBUG_RETURN(1); + } + + thd= new THD(next_thread_id()); /* Needed by start_slave_threads */ + thd->thread_stack= (char*) &thd; + thd->store_globals(); + + reinit_io_cache(&index_file, READ_CACHE, 0L,0,0); + while (!init_strvar_from_file(sign, sizeof(sign), + &index_file, NULL)) + { + LEX_CSTRING connection_name; + Master_info *mi; + char buf_master_info_file[FN_REFLEN]; + char buf_relay_log_info_file[FN_REFLEN]; + + connection_name.str= sign; + connection_name.length= strlen(sign); + if (!(mi= new Master_info(&connection_name, relay_log_recovery)) || + mi->error()) + { + delete mi; + goto error; + } + + init_thread_mask(&thread_mask,mi,0 /*not inverse*/); + + create_logfile_name_with_suffix(buf_master_info_file, + sizeof(buf_master_info_file), + master_info_file, 0, + &mi->cmp_connection_name); + create_logfile_name_with_suffix(buf_relay_log_info_file, + sizeof(buf_relay_log_info_file), + relay_log_info_file, 0, + &mi->cmp_connection_name); + if (global_system_variables.log_warnings > 1) + sql_print_information("Reading Master_info: '%s' Relay_info:'%s'", + buf_master_info_file, buf_relay_log_info_file); + + mi->lock_slave_threads(); + if (init_master_info(mi, buf_master_info_file, buf_relay_log_info_file, + 0, thread_mask)) + { + err_num++; + sql_print_error("Initialized Master_info from '%s' failed", + buf_master_info_file); + if (!master_info_index->get_master_info(&connection_name, + Sql_condition::WARN_LEVEL_NOTE)) + { + /* Master_info is not in HASH; Add it */ + if (master_info_index->add_master_info(mi, FALSE)) + goto error; + succ_num++; + mi->unlock_slave_threads(); + } + else + { + /* Master_info already in HASH */ + sql_print_error(ER_THD_OR_DEFAULT(current_thd, + ER_CONNECTION_ALREADY_EXISTS), + (int) connection_name.length, connection_name.str, + (int) connection_name.length, connection_name.str); + mi->unlock_slave_threads(); + delete mi; + } + continue; + } + else + { + /* Initialization of Master_info succeeded. Add it to HASH */ + if (global_system_variables.log_warnings > 1) + sql_print_information("Initialized Master_info from '%s'", + buf_master_info_file); + if (master_info_index->get_master_info(&connection_name, + Sql_condition::WARN_LEVEL_NOTE)) + { + /* Master_info was already registered */ + sql_print_error(ER_THD_OR_DEFAULT(current_thd, + ER_CONNECTION_ALREADY_EXISTS), + (int) connection_name.length, connection_name.str, + (int) connection_name.length, connection_name.str); + mi->unlock_slave_threads(); + delete mi; + continue; + } + + /* Master_info was not registered; add it */ + if (master_info_index->add_master_info(mi, FALSE)) + goto error; + succ_num++; + + if (!opt_skip_slave_start) + { + if (start_slave_threads(current_thd, + 1 /* need mutex */, + 1 /* wait for start*/, + mi, + buf_master_info_file, + buf_relay_log_info_file, + SLAVE_IO | SLAVE_SQL)) + { + sql_print_error("Failed to create slave threads for connection '%.*s'", + (int) connection_name.length, + connection_name.str); + continue; + } + if (global_system_variables.log_warnings) + sql_print_information("Started replication for '%.*s'", + (int) connection_name.length, + connection_name.str); + } + mi->unlock_slave_threads(); + } + } + thd->reset_globals(); + delete thd; + + if (!err_num) // No Error on read Master_info + { + if (global_system_variables.log_warnings > 2) + sql_print_information("Reading of all Master_info entries succeeded"); + DBUG_RETURN(0); + } + + if (succ_num) // Have some Error and some Success + sql_print_warning("Reading of some Master_info entries failed"); + else + sql_print_error("Reading of all Master_info entries failed!"); + DBUG_RETURN(1); + +error: + thd->reset_globals(); + delete thd; + DBUG_RETURN(1); +} + + +/* Write new master.info to master.info.index File */ +bool Master_info_index::write_master_name_to_index_file(LEX_CSTRING *name, + bool do_sync) +{ + DBUG_ASSERT(my_b_inited(&index_file) != 0); + DBUG_ENTER("write_master_name_to_index_file"); + + /* Don't write default slave to master_info.index */ + if (name->length == 0) + DBUG_RETURN(0); + + reinit_io_cache(&index_file, WRITE_CACHE, + my_b_filelength(&index_file), 0, 0); + + if (my_b_write(&index_file, (uchar*) name->str, name->length) || + my_b_write(&index_file, (uchar*) "\n", 1) || + flush_io_cache(&index_file) || + (do_sync && my_sync(index_file.file, MYF(MY_WME)))) + { + sql_print_error("Write of new Master_info for '%.*s' to index file failed", + (int) name->length, name->str); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + + +/** + Get Master_info for a connection and lock the object from deletion + + @param + connection_name Connection name + warning WARN_LEVEL_NOTE -> Don't print anything + WARN_LEVEL_WARN -> Issue warning if not exists + WARN_LEVEL_ERROR-> Issue error if not exists +*/ + +Master_info *get_master_info(const LEX_CSTRING *connection_name, + Sql_condition::enum_warning_level warning) +{ + Master_info *mi; + DBUG_ENTER("get_master_info"); + + /* Protect against inserts into hash */ + mysql_mutex_lock(&LOCK_active_mi); + /* + The following can only be true during shutdown when slave has been killed + but some other threads are still trying to access slave statistics. + */ + if (unlikely(!master_info_index)) + { + if (warning != Sql_condition::WARN_LEVEL_NOTE) + my_error(WARN_NO_MASTER_INFO, + MYF(warning == Sql_condition::WARN_LEVEL_WARN ? + ME_WARNING : 0), + (int) connection_name->length, connection_name->str); + mysql_mutex_unlock(&LOCK_active_mi); + DBUG_RETURN(0); + } + if ((mi= master_info_index->get_master_info(connection_name, warning))) + { + /* + We have to use sleep_lock here. If we would use LOCK_active_mi + then we would take locks in wrong order in Master_info::release() + */ + mysql_mutex_lock(&mi->sleep_lock); + mi->users++; + DBUG_PRINT("info",("users: %d", mi->users)); + mysql_mutex_unlock(&mi->sleep_lock); + } + mysql_mutex_unlock(&LOCK_active_mi); + DBUG_RETURN(mi); +} + + +/** + Release master info. + Signals ~Master_info that it's now safe to delete it +*/ + +void Master_info::release() +{ + mysql_mutex_lock(&sleep_lock); + if (!--users && killed) + { + /* Signal ~Master_info that it's ok to now free it */ + mysql_cond_signal(&sleep_cond); + } + mysql_mutex_unlock(&sleep_lock); +} + + +/** + Get Master_info for a connection + + @param + connection_name Connection name + warning WARN_LEVEL_NOTE -> Don't print anything + WARN_LEVEL_WARN -> Issue warning if not exists + WARN_LEVEL_ERROR-> Issue error if not exists +*/ + +Master_info * +Master_info_index::get_master_info(const LEX_CSTRING *connection_name, + Sql_condition::enum_warning_level warning) +{ + Master_info *mi; + char buff[MAX_CONNECTION_NAME+1], *res; + size_t buff_length; + DBUG_ENTER("get_master_info"); + DBUG_PRINT("enter", + ("connection_name: '%.*s'", (int) connection_name->length, + connection_name->str)); + + /* Make name lower case for comparison */ + res= strmake(buff, connection_name->str, connection_name->length); + my_casedn_str(system_charset_info, buff); + buff_length= (size_t) (res-buff); + + mi= (Master_info*) my_hash_search(&master_info_hash, + (uchar*) buff, buff_length); + if (!mi && warning != Sql_condition::WARN_LEVEL_NOTE) + { + my_error(WARN_NO_MASTER_INFO, + MYF(warning == Sql_condition::WARN_LEVEL_WARN ? ME_WARNING : + 0), + (int) connection_name->length, + connection_name->str); + } + DBUG_RETURN(mi); +} + + +/* Check Master_host & Master_port is duplicated or not */ +bool Master_info_index::check_duplicate_master_info(LEX_CSTRING *name_arg, + const char *host, + uint port) +{ + Master_info *mi; + DBUG_ENTER("check_duplicate_master_info"); + + mysql_mutex_assert_owner(&LOCK_active_mi); + DBUG_ASSERT(master_info_index); + + /* Get full host and port name */ + if ((mi= master_info_index->get_master_info(name_arg, + Sql_condition::WARN_LEVEL_NOTE))) + { + if (!host) + host= mi->host; + if (!port) + port= mi->port; + } + if (!host || !port) + DBUG_RETURN(FALSE); // Not comparable yet + + for (uint i= 0; i < master_info_hash.records; ++i) + { + Master_info *tmp_mi; + tmp_mi= (Master_info *) my_hash_element(&master_info_hash, i); + if (tmp_mi == mi) + continue; // Current connection + if (!strcasecmp(host, tmp_mi->host) && port == tmp_mi->port) + { + my_error(ER_CONNECTION_ALREADY_EXISTS, MYF(0), + (int) name_arg->length, + name_arg->str, + (int) tmp_mi->connection_name.length, + tmp_mi->connection_name.str); + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/* Add a Master_info class to Hash Table */ +bool Master_info_index::add_master_info(Master_info *mi, bool write_to_file) +{ + /* + We have to protect against shutdown to ensure we are not calling + my_hash_insert() while my_hash_free() is in progress + */ + if (unlikely(abort_loop) || + !my_hash_insert(&master_info_hash, (uchar*) mi)) + { + if (global_system_variables.log_warnings > 2) + sql_print_information("Added new Master_info '%.*s' to hash table", + (int) mi->connection_name.length, + mi->connection_name.str); + if (write_to_file) + return write_master_name_to_index_file(&mi->connection_name, 1); + return FALSE; + } + + /* Impossible error (EOM) ? */ + sql_print_error("Adding new entry '%.*s' to master_info failed", + (int) mi->connection_name.length, + mi->connection_name.str); + return TRUE; +} + + +/** + Remove a Master_info class From Hash Table + + TODO: Change this to use my_rename() to make the file name creation + atomic +*/ + +bool Master_info_index::remove_master_info(Master_info *mi, bool clear_log_files) +{ + char tmp_name[FN_REFLEN]; + DBUG_ENTER("remove_master_info"); + mysql_mutex_assert_owner(&LOCK_active_mi); + + if (clear_log_files) + { + /* This code is only executed when change_master() failes to create a new master info */ + + // Delete any temporary relay log files that could have been created by change_master() + mi->rli.relay_log.reset_logs(current_thd, 0, (rpl_gtid*) 0, 0, 0); + /* Delete master-'connection'.info */ + create_logfile_name_with_suffix(tmp_name, + sizeof(tmp_name), + master_info_file, 0, + &mi->cmp_connection_name); + my_delete(tmp_name, MYF(0)); + /* Delete relay-log-'connection'.info */ + create_logfile_name_with_suffix(tmp_name, + sizeof(tmp_name), + relay_log_info_file, 0, + &mi->cmp_connection_name); + my_delete(tmp_name, MYF(0)); + } + + // Delete Master_info and rewrite others to file + if (!my_hash_delete(&master_info_hash, (uchar*) mi)) + { + File index_file_nr; + + // Close IO_CACHE and FILE handler fisrt + end_io_cache(&index_file); + my_close(index_file.file, MYF(MY_WME)); + + // Reopen File and truncate it + if ((index_file_nr= my_open(index_file_name, + O_RDWR | O_CREAT | O_TRUNC | O_BINARY , + MYF(MY_WME))) < 0 || + init_io_cache(&index_file, index_file_nr, + IO_SIZE, WRITE_CACHE, + my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), + 0, MYF(MY_WME | MY_WAIT_IF_FULL))) + { + int error= my_errno; + if (index_file_nr >= 0) + my_close(index_file_nr,MYF(0)); + + sql_print_error("Create of Master Info Index file '%s' failed with " + "error: %M", + index_file_name, error); + DBUG_RETURN(TRUE); + } + + // Rewrite Master_info.index + for (uint i= 0; i< master_info_hash.records; ++i) + { + Master_info *tmp_mi; + tmp_mi= (Master_info *) my_hash_element(&master_info_hash, i); + write_master_name_to_index_file(&tmp_mi->connection_name, 0); + } + if (my_sync(index_file_nr, MYF(MY_WME))) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/** + give_error_if_slave_running() + + @param + already_locked 0 if we need to lock, 1 if we have LOCK_active_mi_locked + + @return + TRUE If some slave is running. An error is printed + FALSE No slave is running +*/ + +bool give_error_if_slave_running(bool already_locked) +{ + bool ret= 0; + DBUG_ENTER("give_error_if_slave_running"); + + if (!already_locked) + mysql_mutex_lock(&LOCK_active_mi); + if (!master_info_index) + { + my_error(ER_SERVER_SHUTDOWN, MYF(0)); + ret= 1; + } + else + { + HASH *hash= &master_info_index->master_info_hash; + for (uint i= 0; i< hash->records; ++i) + { + Master_info *mi; + mi= (Master_info *) my_hash_element(hash, i); + if (mi->rli.slave_running != MYSQL_SLAVE_NOT_RUN) + { + my_error(ER_SLAVE_MUST_STOP, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + ret= 1; + break; + } + } + } + if (!already_locked) + mysql_mutex_unlock(&LOCK_active_mi); + DBUG_RETURN(ret); +} + + +/** + any_slave_sql_running() + + @param + already_locked 0 if we need to lock, 1 if we have LOCK_active_mi_locked + + @return + 0 No Slave SQL thread is running + # Number of slave SQL thread running + + Note that during shutdown we return 1. This is needed to ensure we + don't try to resize thread pool during shutdown as during shutdown + master_info_hash may be freeing the hash and during that time + hash entries can't be accessed. +*/ + +uint any_slave_sql_running(bool already_locked) +{ + uint count= 0; + HASH *hash; + DBUG_ENTER("any_slave_sql_running"); + + if (!already_locked) + mysql_mutex_lock(&LOCK_active_mi); + else + mysql_mutex_assert_owner(&LOCK_active_mi); + if (unlikely(abort_loop || !master_info_index)) + count= 1; + else + { + hash= &master_info_index->master_info_hash; + for (uint i= 0; i< hash->records; ++i) + { + Master_info *mi= (Master_info *)my_hash_element(hash, i); + if (mi->rli.slave_running != MYSQL_SLAVE_NOT_RUN) + count++; + } + } + if (!already_locked) + mysql_mutex_unlock(&LOCK_active_mi); + DBUG_RETURN(count); +} + + +/** + Master_info_index::start_all_slaves() + + Start all slaves that was not running. + + @return + TRUE Error + FALSE Everything ok. + + This code is written so that we don't keep LOCK_active_mi active + while we are starting a slave. +*/ + +bool Master_info_index::start_all_slaves(THD *thd) +{ + bool result= FALSE; + DBUG_ENTER("start_all_slaves"); + mysql_mutex_assert_owner(&LOCK_active_mi); + + for (uint i= 0; i< master_info_hash.records; i++) + { + Master_info *mi; + mi= (Master_info *) my_hash_element(&master_info_hash, i); + mi->in_start_all_slaves= 0; + } + + for (uint i= 0; i< master_info_hash.records; ) + { + int error; + Master_info *mi; + mi= (Master_info *) my_hash_element(&master_info_hash, i); + + /* + Try to start all slaves that are configured (host is defined) + and are not already running + */ + if (!((mi->slave_running == MYSQL_SLAVE_NOT_RUN || + !mi->rli.slave_running) && *mi->host) || + mi->in_start_all_slaves) + { + i++; + continue; + } + mi->in_start_all_slaves= 1; + + mysql_mutex_lock(&mi->sleep_lock); + mi->users++; // Mark used + mysql_mutex_unlock(&mi->sleep_lock); + mysql_mutex_unlock(&LOCK_active_mi); + error= start_slave(thd, mi, 1); + mi->release(); + mysql_mutex_lock(&LOCK_active_mi); + if (unlikely(error)) + { + my_error(ER_CANT_START_STOP_SLAVE, MYF(0), + "START", + (int) mi->connection_name.length, + mi->connection_name.str); + result= 1; + if (error < 0) // fatal error + break; + } + else if (thd) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SLAVE_STARTED, ER_THD(thd, ER_SLAVE_STARTED), + (int) mi->connection_name.length, + mi->connection_name.str); + /* Restart from first element as master_info_hash may have changed */ + i= 0; + continue; + } + DBUG_RETURN(result); +} + + +/** + Master_info_index::stop_all_slaves() + + Start all slaves that was not running. + + @param thread id from user + + @return + TRUE Error + FALSE Everything ok. + + This code is written so that we don't keep LOCK_active_mi active + while we are stopping a slave. +*/ + +bool Master_info_index::stop_all_slaves(THD *thd) +{ + bool result= FALSE; + DBUG_ENTER("stop_all_slaves"); + mysql_mutex_assert_owner(&LOCK_active_mi); + DBUG_ASSERT(thd); + + for (uint i= 0; i< master_info_hash.records; i++) + { + Master_info *mi; + mi= (Master_info *) my_hash_element(&master_info_hash, i); + mi->in_stop_all_slaves= 0; + } + + for (uint i= 0; i< master_info_hash.records ;) + { + int error; + Master_info *mi; + mi= (Master_info *) my_hash_element(&master_info_hash, i); + if (!(mi->slave_running != MYSQL_SLAVE_NOT_RUN || + mi->rli.slave_running) || + mi->in_stop_all_slaves) + { + i++; + continue; + } + mi->in_stop_all_slaves= 1; // Protection for loops + + mysql_mutex_lock(&mi->sleep_lock); + mi->users++; // Mark used + mysql_mutex_unlock(&mi->sleep_lock); + mysql_mutex_unlock(&LOCK_active_mi); + error= stop_slave(thd, mi, 1); + mi->release(); + mysql_mutex_lock(&LOCK_active_mi); + if (unlikely(error)) + { + my_error(ER_CANT_START_STOP_SLAVE, MYF(0), + "STOP", + (int) mi->connection_name.length, + mi->connection_name.str); + result= 1; + if (error < 0) // Fatal error + break; + } + else + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SLAVE_STOPPED, ER_THD(thd, ER_SLAVE_STOPPED), + (int) mi->connection_name.length, + mi->connection_name.str); + /* Restart from first element as master_info_hash may have changed */ + i= 0; + continue; + } + DBUG_RETURN(result); +} + +Domain_id_filter::Domain_id_filter() : m_filter(false) +{ + for (int i= DO_DOMAIN_IDS; i <= IGNORE_DOMAIN_IDS; i ++) + { + my_init_dynamic_array(PSI_INSTRUMENT_ME, &m_domain_ids[i], sizeof(ulong), + 16, 16, MYF(0)); + } +} + +Domain_id_filter::~Domain_id_filter() +{ + for (int i= DO_DOMAIN_IDS; i <= IGNORE_DOMAIN_IDS; i ++) + { + delete_dynamic(&m_domain_ids[i]); + } +} + +/** + Update m_filter flag for the current group by looking up its domain id in the + domain ids list. DO_DOMAIN_IDS list is only looked-up is both (do & ignore) + list are non-empty. +*/ +void Domain_id_filter::do_filter(ulong domain_id) +{ + DYNAMIC_ARRAY *do_domain_ids= &m_domain_ids[DO_DOMAIN_IDS]; + DYNAMIC_ARRAY *ignore_domain_ids= &m_domain_ids[IGNORE_DOMAIN_IDS]; + + if (do_domain_ids->elements > 0) + { + if (likely(do_domain_ids->elements == 1)) + m_filter= ((* (ulong *) dynamic_array_ptr(do_domain_ids, 0)) + != domain_id); + else + m_filter= (bsearch((const ulong *) &domain_id, do_domain_ids->buffer, + do_domain_ids->elements, sizeof(ulong), + change_master_id_cmp) == NULL); + } + else if (ignore_domain_ids->elements > 0) + { + if (likely(ignore_domain_ids->elements == 1)) + m_filter= ((* (ulong *) dynamic_array_ptr(ignore_domain_ids, 0)) == + domain_id); + else + m_filter= (bsearch((const ulong *) &domain_id, ignore_domain_ids->buffer, + ignore_domain_ids->elements, sizeof(ulong), + change_master_id_cmp) != NULL); + } + return; +} + +/** + Reset m_filter. It should be called when IO thread receives COMMIT_EVENT or + XID_EVENT. +*/ +void Domain_id_filter::reset_filter() +{ + m_filter= false; +} + +void Domain_id_filter::clear_ids() +{ + reset_dynamic(&m_domain_ids[DO_DOMAIN_IDS]); + reset_dynamic(&m_domain_ids[IGNORE_DOMAIN_IDS]); +} + +/** + Update the do/ignore domain id filter lists. + + @param do_ids [IN] domain ids to be kept + @param ignore_ids [IN] domain ids to be filtered out + @param using_gtid [IN] use GTID? + + @retval false Success + true Error +*/ +bool Domain_id_filter::update_ids(DYNAMIC_ARRAY *do_ids, + DYNAMIC_ARRAY *ignore_ids, + bool using_gtid) +{ + bool do_list_empty, ignore_list_empty; + + if (do_ids) + { + do_list_empty= (do_ids->elements > 0) ? false : true; + } else { + do_list_empty= (m_domain_ids[DO_DOMAIN_IDS].elements > 0) ? false : true; + } + + if (ignore_ids) + { + ignore_list_empty= (ignore_ids->elements > 0) ? false : true; + } else { + ignore_list_empty= (m_domain_ids[IGNORE_DOMAIN_IDS].elements > 0) ? false : + true; + } + + if (!do_list_empty && !ignore_list_empty) + { + sql_print_error("Both DO_DOMAIN_IDS & IGNORE_DOMAIN_IDS lists can't be " + "non-empty at the same time"); + return true; + } + + if (using_gtid == Master_info::USE_GTID_NO && + (!do_list_empty || !ignore_list_empty)) + { + sql_print_error("DO_DOMAIN_IDS or IGNORE_DOMAIN_IDS lists can't be " + "non-empty in non-GTID mode (MASTER_USE_GTID=no)"); + return true; + } + + if (do_ids) + update_change_master_ids(do_ids, &m_domain_ids[DO_DOMAIN_IDS]); + + if (ignore_ids) + update_change_master_ids(ignore_ids, &m_domain_ids[IGNORE_DOMAIN_IDS]); + + m_filter= false; + + return false; +} + +/** + Serialize and store the ids from domain id lists into the thd's protocol + buffer. + + @param thd [IN] thread handler + + @retval void +*/ +void Domain_id_filter::store_ids(THD *thd) +{ + for (int i= DO_DOMAIN_IDS; i <= IGNORE_DOMAIN_IDS; i ++) + { + prot_store_ids(thd, &m_domain_ids[i]); + } +} + +/** + Initialize the given domain_id list (DYNAMIC_ARRAY) with the + space-separated list of numbers from the specified IO_CACHE where + the first number represents the total number of entries to follows. + + @param f [IN] IO_CACHE file + @param type [IN] domain id list type + + @retval false Success + true Error +*/ +bool Domain_id_filter::init_ids(IO_CACHE *f, enum_list_type type) +{ + return init_dynarray_intvar_from_file(&m_domain_ids[type], f); +} + +/** + Return the elements of the give domain id list type as string. + + @param type [IN] domain id list type + + @retval a string buffer storing the total number + of elements followed by the individual + elements (space-separated) in the + specified list. + + Note: Its caller's responsibility to free the returned string buffer. +*/ +char *Domain_id_filter::as_string(enum_list_type type) +{ + char *buf; + size_t sz; + DYNAMIC_ARRAY *ids= &m_domain_ids[type]; + + sz= (sizeof(ulong) * 3 + 1) * (1 + ids->elements); + + if (!(buf= (char *) my_malloc(PSI_INSTRUMENT_ME, sz, MYF(MY_WME)))) + return NULL; + + // Store the total number of elements followed by the individual elements. + size_t cur_len= sprintf(buf, "%zu", ids->elements); + sz-= cur_len; + + for (uint i= 0; i < ids->elements; i++) + { + ulong domain_id; + get_dynamic(ids, (void *) &domain_id, i); + cur_len+= my_snprintf(buf + cur_len, sz, " %lu", domain_id); + sz-= cur_len; + } + return buf; +} + +void update_change_master_ids(DYNAMIC_ARRAY *new_ids, DYNAMIC_ARRAY *old_ids) +{ + reset_dynamic(old_ids); + + /* bsearch requires an ordered list. */ + sort_dynamic(new_ids, change_master_id_cmp); + + for (uint i= 0; i < new_ids->elements; i++) + { + ulong id; + get_dynamic(new_ids, (void *) &id, i); + + if (bsearch((const ulong *) &id, old_ids->buffer, old_ids->elements, + sizeof(ulong), change_master_id_cmp) == NULL) + { + insert_dynamic(old_ids, (ulong *) &id); + } + } + return; +} + +/** + Serialize and store the ids from the given ids DYNAMIC_ARRAY into the thd's + protocol buffer. + + @param thd [IN] thread handler + @param ids [IN] ids list + + @retval void +*/ + +void prot_store_ids(THD *thd, DYNAMIC_ARRAY *ids) +{ + char buff[FN_REFLEN]; + uint i, cur_len; + + for (i= 0, buff[0]= 0, cur_len= 0; i < ids->elements; i++) + { + ulong id, len; + char dbuff[FN_REFLEN]; + get_dynamic(ids, (void *) &id, i); + len= sprintf(dbuff, (i == 0 ? "%lu" : ", %lu"), id); + if (cur_len + len + 4 > FN_REFLEN) + { + /* + break the loop whenever remained space could not fit + ellipses on the next cycle + */ + cur_len+= sprintf(dbuff + cur_len, "..."); + break; + } + cur_len+= sprintf(buff + cur_len, "%s", dbuff); + } + thd->protocol->store(buff, cur_len, &my_charset_bin); + return; +} + + +bool Master_info_index::flush_all_relay_logs() +{ + DBUG_ENTER("flush_all_relay_logs"); + bool result= false; + int error= 0; + mysql_mutex_lock(&LOCK_active_mi); + for (uint i= 0; i< master_info_hash.records; i++) + { + Master_info *mi; + mi= (Master_info *) my_hash_element(&master_info_hash, i); + mi->in_flush_all_relay_logs= 0; + } + for (uint i=0; i < master_info_hash.records;) + { + Master_info *mi; + mi= (Master_info *)my_hash_element(&master_info_hash, i); + DBUG_ASSERT(mi); + + if (mi->in_flush_all_relay_logs) + { + i++; + continue; + } + mi->in_flush_all_relay_logs= 1; + + mysql_mutex_lock(&mi->sleep_lock); + mi->users++; // Mark used + mysql_mutex_unlock(&mi->sleep_lock); + mysql_mutex_unlock(&LOCK_active_mi); + + mysql_mutex_lock(&mi->data_lock); + error= rotate_relay_log(mi); + mysql_mutex_unlock(&mi->data_lock); + mi->release(); + mysql_mutex_lock(&LOCK_active_mi); + + if (unlikely(error)) + { + result= true; + break; + } + /* Restart from first element as master_info_hash may have changed */ + i= 0; + continue; + } + mysql_mutex_unlock(&LOCK_active_mi); + DBUG_RETURN(result); +} + +#endif /* HAVE_REPLICATION */ diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h new file mode 100644 index 00000000..6058b7fb --- /dev/null +++ b/sql/rpl_mi.h @@ -0,0 +1,475 @@ +/* Copyright (c) 2006, 2012, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_MI_H +#define RPL_MI_H + +#ifdef HAVE_REPLICATION + +#include "rpl_rli.h" +#include "rpl_reporting.h" +#include +#include "rpl_filter.h" +#include "keycaches.h" + +typedef struct st_mysql MYSQL; + +/** + Domain id based filter to handle DO_DOMAIN_IDS and IGNORE_DOMAIN_IDS used to + set filtering on replication slave based on event's GTID domain_id. +*/ +class Domain_id_filter +{ +private: + /* + Flag to tell whether the events in the current GTID group get written to + the relay log. It is set according to the domain_id based filtering rule + on every GTID_EVENT and reset at the end of current GTID event group. + */ + bool m_filter; + +public: + /* domain id list types */ + enum enum_list_type { + DO_DOMAIN_IDS= 0, + IGNORE_DOMAIN_IDS + }; + + /* + DO_DOMAIN_IDS (0): + Ignore all the events which do not belong to any of the domain ids in the + list. + + IGNORE_DOMAIN_IDS (1): + Ignore the events which belong to one of the domain ids in the list. + */ + DYNAMIC_ARRAY m_domain_ids[2]; + + Domain_id_filter(); + + ~Domain_id_filter(); + + /* + Returns whether the current group needs to be filtered. + */ + bool is_group_filtered() { return m_filter; } + + /* + Checks whether the group with the specified domain_id needs to be + filtered and updates m_filter flag accordingly. + */ + void do_filter(ulong domain_id); + + /* + Reset m_filter. It should be called when IO thread receives COMMIT_EVENT or + XID_EVENT. + */ + void reset_filter(); + + /* + Clear do_ids and ignore_ids to disable domain id filtering + */ + void clear_ids(); + + /* + Update the do/ignore domain id filter lists. + + @param do_ids [IN] domain ids to be kept + @param ignore_ids [IN] domain ids to be filtered out + @param using_gtid [IN] use GTID? + + @retval false Success + true Error + */ + bool update_ids(DYNAMIC_ARRAY *do_ids, DYNAMIC_ARRAY *ignore_ids, + bool using_gtid); + + /* + Serialize and store the ids from domain id lists into the thd's protocol + buffer. + + @param thd [IN] thread handler + + @retval void + */ + void store_ids(THD *thd); + + /* + Initialize the given domain id list (DYNAMIC_ARRAY) with the + space-separated list of numbers from the specified IO_CACHE where + the first number is the total number of entries to follows. + + @param f [IN] IO_CACHE file + @param type [IN] domain id list type + + @retval false Success + true Error + */ + bool init_ids(IO_CACHE *f, enum_list_type type); + + /* + Return the elements of the give domain id list type as string. + + @param type [IN] domain id list type + + @retval a string buffer storing the total number + of elements followed by the individual + elements (space-separated) in the + specified list. + + Note: Its caller's responsibility to free the returned string buffer. + */ + char *as_string(enum_list_type type); + +}; + + +extern TYPELIB slave_parallel_mode_typelib; + +typedef struct st_rows_event_tracker +{ + char binlog_file_name[FN_REFLEN]; + my_off_t first_seen; + my_off_t last_seen; + bool stmt_end_seen; + void update(const char *file_name, my_off_t pos, + const uchar *buf, + const Format_description_log_event *fdle); + void reset(); + bool check_and_report(const char* file_name, my_off_t pos); +} Rows_event_tracker; + +/***************************************************************************** + Replication IO Thread + + Master_info contains: + - information about how to connect to a master + - current master log name + - current master log offset + - misc control variables + + Master_info is initialized once from the master.info file if such + exists. Otherwise, data members corresponding to master.info fields + are initialized with defaults specified by master-* options. The + initialization is done through init_master_info() call. + + The format of master.info file: + + log_name + log_pos + master_host + master_user + master_pass + master_port + master_connect_retry + + To write out the contents of master.info file to disk ( needed every + time we read and queue data from the master ), a call to + flush_master_info() is required. + + To clean up, call end_master_info() + +*****************************************************************************/ + +class Master_info : public Slave_reporting_capability +{ + public: + enum enum_using_gtid { + USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2 + }; + + Master_info(LEX_CSTRING *connection_name, bool is_slave_recovery); + ~Master_info(); + bool shall_ignore_server_id(ulong s_id); + void clear_in_memory_info(bool all); + bool error() + { + /* If malloc() in initialization failed */ + return connection_name.str == 0; + } + static const char *using_gtid_astext(enum enum_using_gtid arg); + bool using_parallel() + { + return opt_slave_parallel_threads > 0 && + parallel_mode > SLAVE_PARALLEL_NONE; + } + void release(); + void wait_until_free(); + void lock_slave_threads(); + void unlock_slave_threads(); + + /* the variables below are needed because we can change masters on the fly */ + char master_log_name[FN_REFLEN+6]; /* Room for multi-*/ + char host[HOSTNAME_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1]; + char user[USERNAME_LENGTH+1]; + char password[MAX_PASSWORD_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1]; + LEX_CSTRING connection_name; /* User supplied connection name */ + LEX_CSTRING cmp_connection_name; /* Connection name in lower case */ + bool ssl; // enables use of SSL connection if true + char ssl_ca[FN_REFLEN], ssl_capath[FN_REFLEN], ssl_cert[FN_REFLEN]; + char ssl_cipher[FN_REFLEN], ssl_key[FN_REFLEN]; + char ssl_crl[FN_REFLEN], ssl_crlpath[FN_REFLEN]; + bool ssl_verify_server_cert; + + my_off_t master_log_pos; + File fd; // we keep the file open, so we need to remember the file pointer + IO_CACHE file; + + mysql_mutex_t data_lock, run_lock, sleep_lock, start_stop_lock, start_alter_lock, start_alter_list_lock; + mysql_cond_t data_cond, start_cond, stop_cond, sleep_cond; + THD *io_thd; + MYSQL* mysql; + uint32 file_id; /* for 3.23 load data infile */ + Relay_log_info rli; + uint port; + Rpl_filter* rpl_filter; /* Each replication can set its filter rule*/ + /* + to hold checksum alg in use until IO thread has received FD. + Initialized to novalue, then set to the queried from master + @@global.binlog_checksum and deactivated once FD has been received. + */ + enum enum_binlog_checksum_alg checksum_alg_before_fd; + uint connect_retry; +#ifndef DBUG_OFF + int events_till_disconnect; + + /* + The following are auxiliary DBUG variables used to kill IO thread in the + middle of a group/transaction (see "kill_slave_io_after_2_events"). + */ + bool dbug_do_disconnect; + int dbug_event_counter; +#endif + bool inited; + volatile bool abort_slave; + volatile uint slave_running; + volatile ulong slave_run_id; + /* + The difference in seconds between the clock of the master and the clock of + the slave (second - first). It must be signed as it may be <0 or >0. + clock_diff_with_master is computed when the I/O thread starts; for this the + I/O thread does a SELECT UNIX_TIMESTAMP() on the master. + "how late the slave is compared to the master" is computed like this: + clock_of_slave - last_timestamp_executed_by_SQL_thread - clock_diff_with_master + + */ + long clock_diff_with_master; + /* + Keeps track of the number of events before fsyncing. + The option --sync-master-info determines how many + events should happen before fsyncing. + */ + uint sync_counter; + float heartbeat_period; // interface with CHANGE MASTER or master.info + ulonglong received_heartbeats; // counter of received heartbeat events + DYNAMIC_ARRAY ignore_server_ids; + ulong master_id; + /* + At reconnect and until the first rotate event is seen, prev_master_id is + the value of master_id during the previous connection, used to detect + silent change of master server during reconnects. + */ + ulong prev_master_id; + /* + Which kind of GTID position (if any) is used when connecting to master. + + Note that you can not change the numeric values of these, they are used + in master.info. + */ + enum enum_using_gtid using_gtid; + + /* + This GTID position records how far we have fetched into the relay logs. + This is used to continue fetching when the IO thread reconnects to the + master. + + (Full slave stop/start does not use it, as it resets the relay logs). + */ + slave_connection_state gtid_current_pos; + /* + If events_queued_since_last_gtid is non-zero, it is the number of events + queued so far in the relaylog of a GTID-prefixed event group. + It is zero when no partial event group has been queued at the moment. + */ + uint64 events_queued_since_last_gtid; + /* + The GTID of the partially-queued event group, when + events_queued_since_last_gtid is non-zero. + */ + rpl_gtid last_queued_gtid; + /* Whether last_queued_gtid had the FL_STANDALONE flag set. */ + bool last_queued_gtid_standalone; + /* + When slave IO thread needs to reconnect, gtid_reconnect_event_skip_count + counts number of events to skip from the first GTID-prefixed event group, + to avoid duplicating events in the relay log. + */ + uint64 gtid_reconnect_event_skip_count; + /* gtid_event_seen is false until we receive first GTID event from master. */ + bool gtid_event_seen; + /** + The struct holds some history of Rows- log-event reading/queuing + by the receiver thread. Its fields are updated per each such event + at time of queue_event(), and they are checked to detect + the Rows- event group integrity violation at time of first non-Rows- + event gets handled. + */ + Rows_event_tracker rows_event_tracker; + bool in_start_all_slaves, in_stop_all_slaves; + bool in_flush_all_relay_logs; + uint users; /* Active user for object */ + uint killed; + + + /* No of DDL event group */ + Atomic_counter total_ddl_groups; + + /* No of non-transactional event group*/ + Atomic_counter total_non_trans_groups; + + /* No of transactional event group*/ + Atomic_counter total_trans_groups; + + /* domain-id based filter */ + Domain_id_filter domain_id_filter; + + /* The parallel replication mode. */ + enum_slave_parallel_mode parallel_mode; + /* + semi_ack is used to identify if the current binlog event needs an + ACK from slave, or if delay_master is enabled. + */ + int semi_ack; + /* + The flag has replicate_same_server_id semantics and is raised to accept + a same-server-id event group by the gtid strict mode semisync slave. + Own server-id events can normally appear as result of EITHER + A. this server semisync (failover to) slave crash-recovery: + the transaction was created on this server then being master, + got replicated elsewhere right before the crash before commit, + and finally at recovery the transaction gets evicted from the + server's binlog and its gtid (slave) state; OR + B. in a general circular configuration and then when a recieved (returned + to slave) gtid exists in the server's binlog. Then, in gtid strict mode, + it must be ignored similarly to the replicate-same-server-id rule. + */ + bool do_accept_own_server_id= false; + List start_alter_list; + MEM_ROOT mem_root; + /* + Flag is raised at the parallel worker slave stop. Its purpose + is to mark the whole start_alter_list when slave stops. + The flag is read by Start Alter event to self-mark its state accordingly + at time its alter info struct is about to be appened to the list. + */ + bool is_shutdown= false; + + /* + A replica will default to Slave_Pos for using Using_Gtid; however, we + first need to test if the master supports GTIDs. If not, fall back to 'No'. + Cache the value so future RESET SLAVE commands don't revert to Slave_Pos. + */ + bool master_supports_gtid= true; + + /* + When TRUE, transition this server from being an active master to a slave. + This updates the replication state to account for any transactions which + were committed into the binary log. In particular, it merges + gtid_binlog_pos into gtid_slave_pos. + */ + bool is_demotion= false; +}; + +struct start_alter_thd_args +{ + rpl_group_info *rgi; + LEX_CSTRING query; + LEX_CSTRING *db; + char *catalog; + bool shutdown; + CHARSET_INFO *cs; +}; + +int init_master_info(Master_info* mi, const char* master_info_fname, + const char* slave_info_fname, + bool abort_if_no_master_info_file, + int thread_mask); +void end_master_info(Master_info* mi); +int flush_master_info(Master_info* mi, + bool flush_relay_log_cache, + bool need_lock_relay_log); +void copy_filter_setting(Rpl_filter* dst_filter, Rpl_filter* src_filter); +void update_change_master_ids(DYNAMIC_ARRAY *new_ids, DYNAMIC_ARRAY *old_ids); +void prot_store_ids(THD *thd, DYNAMIC_ARRAY *ids); + +/* + Multi master are handled trough this struct. + Changes to this needs to be protected by LOCK_active_mi; +*/ + +class Master_info_index +{ +private: + IO_CACHE index_file; + char index_file_name[FN_REFLEN]; + +public: + Master_info_index(); + ~Master_info_index(); + + HASH master_info_hash; + + bool init_all_master_info(); + bool write_master_name_to_index_file(LEX_CSTRING *connection_name, + bool do_sync); + + bool check_duplicate_master_info(LEX_CSTRING *connection_name, + const char *host, uint port); + bool add_master_info(Master_info *mi, bool write_to_file); + bool remove_master_info(Master_info *mi, bool clear_log_files); + Master_info *get_master_info(const LEX_CSTRING *connection_name, + Sql_condition::enum_warning_level warning); + bool start_all_slaves(THD *thd); + bool stop_all_slaves(THD *thd); + void free_connections(); + bool flush_all_relay_logs(); +}; + + +/* + The class rpl_io_thread_info is the THD::system_thread_info for the IO thread. +*/ +class rpl_io_thread_info +{ +public: +}; + + +Master_info *get_master_info(const LEX_CSTRING *connection_name, + Sql_condition::enum_warning_level warning); +bool check_master_connection_name(LEX_CSTRING *name); +void create_logfile_name_with_suffix(char *res_file_name, size_t length, + const char *info_file, + bool append, + LEX_CSTRING *suffix); + +uchar *get_key_master_info(Master_info *mi, size_t *length, + my_bool not_used __attribute__((unused))); +void free_key_master_info(Master_info *mi); +uint any_slave_sql_running(bool already_locked); +bool give_error_if_slave_running(bool already_lock); + +#endif /* HAVE_REPLICATION */ +#endif /* RPL_MI_H */ diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc new file mode 100644 index 00000000..95a1234e --- /dev/null +++ b/sql/rpl_parallel.cc @@ -0,0 +1,3453 @@ +#include "mariadb.h" +#include "rpl_parallel.h" +#include "slave.h" +#include "rpl_mi.h" +#include "sql_parse.h" +#include "debug_sync.h" +#include "sql_repl.h" +#include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif + +/* + Code for optional parallel execution of replicated events on the slave. +*/ + + +/* + Maximum number of queued events to accumulate in a local free list, before + moving them to the global free list. There is additional a limit of how much + to accumulate based on opt_slave_parallel_max_queued. +*/ +#define QEV_BATCH_FREE 200 + + +struct rpl_parallel_thread_pool global_rpl_thread_pool; + +static void signal_error_to_sql_driver_thread(THD *thd, rpl_group_info *rgi, + int err); +static void +register_wait_for_prior_event_group_commit(rpl_group_info *rgi, + rpl_parallel_entry *entry); + +static int +rpt_handle_event(rpl_parallel_thread::queued_event *qev, + struct rpl_parallel_thread *rpt) +{ + int err; + rpl_group_info *rgi= qev->rgi; + Relay_log_info *rli= rgi->rli; + THD *thd= rgi->thd; + Log_event *ev; + + DBUG_ASSERT(qev->typ == rpl_parallel_thread::queued_event::QUEUED_EVENT); + ev= qev->ev; +#ifdef WITH_WSREP + if (wsrep_before_statement(thd)) + { + WSREP_WARN("Parallel slave failed at wsrep_before_statement() hook"); + return(1); + } +#endif /* WITH_WSREP */ + + thd->system_thread_info.rpl_sql_info->rpl_filter = rli->mi->rpl_filter; + ev->thd= thd; + + strcpy(rgi->event_relay_log_name_buf, qev->event_relay_log_name); + rgi->event_relay_log_name= rgi->event_relay_log_name_buf; + rgi->event_relay_log_pos= qev->event_relay_log_pos; + rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos; + strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name); + if (event_can_update_last_master_timestamp(ev)) + rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time; + err= apply_event_and_update_pos_for_parallel(ev, thd, rgi); + + rli->executed_entries++; +#ifdef WITH_WSREP + if (wsrep_after_statement(thd)) + { + WSREP_WARN("Parallel slave failed at wsrep_after_statement() hook"); + err= 1; + } +#endif /* WITH_WSREP */ + /* ToDo: error handling. */ + return err; +} + + +static void +handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) +{ + int cmp; + Relay_log_info *rli; + rpl_parallel_entry *e; + + /* + Events that are not part of an event group, such as Format Description, + Stop, GTID List and such, are executed directly in the driver SQL thread, + to keep the relay log state up-to-date. But the associated position update + is done here, in sync with other normal events as they are queued to + worker threads. + */ + if ((thd->variables.option_bits & OPTION_BEGIN) && + opt_using_transactions) + return; + + /* Do not update position if an earlier event group caused an error abort. */ + DBUG_ASSERT(qev->typ == rpl_parallel_thread::queued_event::QUEUED_POS_UPDATE); + rli= qev->rgi->rli; + e= qev->entry_for_queued; + if (e->stop_on_error_sub_id < (uint64)ULONGLONG_MAX || + (e->force_abort && !rli->stop_for_until)) + return; + + mysql_mutex_lock(&rli->data_lock); + cmp= compare_log_name(rli->group_relay_log_name, qev->event_relay_log_name); + if (cmp < 0) + { + rli->group_relay_log_pos= qev->future_event_relay_log_pos; + strmake_buf(rli->group_relay_log_name, qev->event_relay_log_name); + } else if (cmp == 0 && + rli->group_relay_log_pos < qev->future_event_relay_log_pos) + rli->group_relay_log_pos= qev->future_event_relay_log_pos; + + cmp= compare_log_name(rli->group_master_log_name, qev->future_event_master_log_name); + if (cmp < 0) + { + strcpy(rli->group_master_log_name, qev->future_event_master_log_name); + rli->group_master_log_pos= qev->future_event_master_log_pos; + } + else if (cmp == 0 + && rli->group_master_log_pos < qev->future_event_master_log_pos) + rli->group_master_log_pos= qev->future_event_master_log_pos; + mysql_mutex_unlock(&rli->data_lock); + mysql_cond_broadcast(&rli->data_cond); +} + + +/* + Wait for any pending deadlock kills. Since deadlock kills happen + asynchronously, we need to be sure they will be completed before starting a + new transaction. Otherwise the new transaction might suffer a spurious kill. +*/ +static void +wait_for_pending_deadlock_kill(THD *thd, rpl_group_info *rgi) +{ + PSI_stage_info old_stage; + + mysql_mutex_lock(&thd->LOCK_wakeup_ready); + thd->ENTER_COND(&thd->COND_wakeup_ready, &thd->LOCK_wakeup_ready, + &stage_waiting_for_deadlock_kill, &old_stage); + while (rgi->killed_for_retry == rpl_group_info::RETRY_KILL_PENDING) + mysql_cond_wait(&thd->COND_wakeup_ready, &thd->LOCK_wakeup_ready); + thd->EXIT_COND(&old_stage); +} + + +static void +finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, + rpl_parallel_entry *entry, rpl_group_info *rgi) +{ + THD *thd= rpt->thd; + wait_for_commit *wfc= &rgi->commit_orderer; + int err; + + if (rgi->get_finish_event_group_called()) + return; + + thd->get_stmt_da()->set_overwrite_status(true); + + if (unlikely(rgi->worker_error)) + { + /* + In case a previous wait was killed, we need to re-register to be able to + repeat the wait. + + And before doing that, we un-register any previous registration (in case + we got an error earlier and skipped waiting). + */ + thd->wait_for_commit_ptr->unregister_wait_for_prior_commit(); + mysql_mutex_lock(&entry->LOCK_parallel_entry); + register_wait_for_prior_event_group_commit(rgi, entry); + mysql_mutex_unlock(&entry->LOCK_parallel_entry); + } + + /* + Remove any left-over registration to wait for a prior commit to + complete. Normally, such wait would already have been removed at + this point by wait_for_prior_commit() called from within COMMIT + processing. + + However, in case of MyISAM and no binlog, we might not have any commit + processing, and so we need to do the wait here, before waking up any + subsequent commits, to preserve correct order of event execution. + + Also, in the error case we might have skipped waiting and thus need to + remove it explicitly. Or the wait might have been killed and we need to + repeat the registration and the wait. + + It is important in the non-error case to do a wait, not just an + unregister. Because we might be last in a group-commit that is + replicated in parallel, and the following event will then wait + for us to complete and rely on this also ensuring that any other + event in the group has completed. + + And in the error case, correct GCO lifetime relies on the fact that once + the last event group in the GCO has executed wait_for_prior_commit(), + all earlier event groups have also committed; this way no more + mark_start_commit() calls can be made and it is safe to de-allocate + the GCO. + + Thus this final wait is done with kill ignored during the wait. This is + fine, at this point there is no active query or transaction to abort, and + the thread will continue as soon as earlier event groups complete. + + Note though, that in the non-error case there is no guarantee that + finish_event_group() will be run in-order. For example, a successful + binlog group commit will wakeup all participating event groups + simultaneously so only thread scheduling will decide the order in which + finish_event_group() calls acquire LOCK_parallel_entry. + */ + err= wfc->wait_for_prior_commit(thd, false); + if (unlikely(err) && !rgi->worker_error) + signal_error_to_sql_driver_thread(thd, rgi, err); + thd->wait_for_commit_ptr= NULL; + + mysql_mutex_lock(&entry->LOCK_parallel_entry); + /* + We need to mark that this event group started its commit phase, in case we + missed it before (otherwise we would deadlock the next event group that is + waiting for this). In most cases (normal DML), it will be a no-op. + */ + rgi->mark_start_commit_no_lock(); + rgi->commit_orderer.wakeup_blocked= false; + + if (entry->last_committed_sub_id < sub_id) + { + /* + Record that this event group has finished (eg. transaction is + committed, if transactional), so other event groups will no longer + attempt to wait for us to commit. Once we have increased + entry->last_committed_sub_id, no other threads will execute + register_wait_for_prior_commit() against us. Thus, by doing one + extra (usually redundant) wakeup_subsequent_commits() we can ensure + that no register_wait_for_prior_commit() can ever happen without a + subsequent wakeup_subsequent_commits() to wake it up. + + We can race here with the next transactions, but that is fine, as + long as we check that we do not decrease last_committed_sub_id. If + this commit is done, then any prior commits will also have been + done and also no longer need waiting for. + */ + entry->last_committed_sub_id= sub_id; + if (entry->need_sub_id_signal) + mysql_cond_broadcast(&entry->COND_parallel_entry); + + /* Now free any GCOs in which all transactions have committed. */ + group_commit_orderer *tmp_gco= rgi->gco; + while (tmp_gco && + (!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id || + tmp_gco->next_gco->wait_count > entry->count_committing_event_groups)) + { + /* + We must not free a GCO before the wait_count of the following GCO has + been reached and wakeup has been sent. Otherwise we will lose the + wakeup and hang (there were several such bugs in the past). + + The intention is that this is ensured already since we only free when + the last event group in the GCO has committed + (tmp_gco->last_sub_id <= sub_id). However, if we have a bug, we have + extra check on next_gco->wait_count to hopefully avoid hanging; we + have here an assertion in debug builds that this check does not in + fact trigger. + */ + DBUG_ASSERT(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id); + tmp_gco= tmp_gco->prev_gco; + } + while (tmp_gco) + { + group_commit_orderer *prev_gco= tmp_gco->prev_gco; + tmp_gco->next_gco->prev_gco= NULL; + rpt->loc_free_gco(tmp_gco); + tmp_gco= prev_gco; + } + } + + /* + If this event group got error, then any following event groups that have + not yet started should just skip their group, preparing for stop of the + SQL driver thread. + */ + if (unlikely(rgi->worker_error) && entry->stop_on_error_sub_id > sub_id) + entry->stop_on_error_sub_id= sub_id; + mysql_mutex_unlock(&entry->LOCK_parallel_entry); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("hold_worker_on_schedule", { + if (entry->stop_on_error_sub_id < (uint64)ULONGLONG_MAX) + { + debug_sync_set_action(thd, STRING_WITH_LEN("now SIGNAL continue_worker")); + } + }); + + DBUG_EXECUTE_IF("rpl_parallel_simulate_wait_at_retry", { + if (rgi->current_gtid.seq_no == 1000) { + DBUG_ASSERT(entry->stop_on_error_sub_id == sub_id); + debug_sync_set_action(thd, + STRING_WITH_LEN("now WAIT_FOR proceed_by_1000")); + } + }); + DBUG_EXECUTE_IF("hold_worker2_favor_worker3", { + if (rgi->current_gtid.seq_no == 2001) { + DBUG_ASSERT(!rgi->worker_error || entry->stop_on_error_sub_id == sub_id); + debug_sync_set_action(thd, STRING_WITH_LEN("now SIGNAL cont_worker3")); + } + }); +#endif + + if (rgi->killed_for_retry == rpl_group_info::RETRY_KILL_PENDING) + wait_for_pending_deadlock_kill(thd, rgi); + thd->clear_error(); + thd->reset_killed(); + /* + Would do thd->get_stmt_da()->set_overwrite_status(false) here, but + reset_diagnostics_area() already does that. + */ + thd->get_stmt_da()->reset_diagnostics_area(); + wfc->wakeup_subsequent_commits(rgi->worker_error); + rgi->did_mark_start_commit= false; + rgi->set_finish_event_group_called(true); +} + + +static void +signal_error_to_sql_driver_thread(THD *thd, rpl_group_info *rgi, int err) +{ + rgi->worker_error= err; + DBUG_EXECUTE_IF("hold_worker2_favor_worker3", { + if (rgi->current_gtid.seq_no == 2002) { + debug_sync_set_action(thd, STRING_WITH_LEN("now WAIT_FOR cont_worker2")); + }}); + + rgi->cleanup_context(thd, true); + rgi->rli->abort_slave= true; + rgi->rli->stop_for_until= false; + mysql_mutex_lock(rgi->rli->relay_log.get_log_lock()); + rgi->rli->relay_log.signal_relay_log_update(); + mysql_mutex_unlock(rgi->rli->relay_log.get_log_lock()); +} + + +static void +unlock_or_exit_cond(THD *thd, mysql_mutex_t *lock, bool *did_enter_cond, + PSI_stage_info *old_stage) +{ + if (*did_enter_cond) + { + thd->EXIT_COND(old_stage); + *did_enter_cond= false; + } + else + mysql_mutex_unlock(lock); +} + + +static void +register_wait_for_prior_event_group_commit(rpl_group_info *rgi, + rpl_parallel_entry *entry) +{ + mysql_mutex_assert_owner(&entry->LOCK_parallel_entry); + if (rgi->wait_commit_sub_id > entry->last_committed_sub_id) + { + /* + Register that the commit of this event group must wait for the + commit of the previous event group to complete before it may + complete itself, so that we preserve commit order. + */ + wait_for_commit *waitee= + &rgi->wait_commit_group_info->commit_orderer; + rgi->commit_orderer.register_wait_for_prior_commit(waitee); + } +} + + +/* + Do not start parallel execution of this event group until all prior groups + have reached the commit phase that are not safe to run in parallel with. +*/ +static void +do_gco_wait(rpl_group_info *rgi, group_commit_orderer *gco, + bool *did_enter_cond, PSI_stage_info *old_stage) +{ + THD *thd= rgi->thd; + rpl_parallel_entry *entry= rgi->parallel_entry; + uint64 wait_count; + + mysql_mutex_assert_owner(&entry->LOCK_parallel_entry); + + if (!gco->installed) + { + group_commit_orderer *prev_gco= gco->prev_gco; + if (prev_gco) + { + prev_gco->last_sub_id= gco->prior_sub_id; + prev_gco->next_gco= gco; + } + gco->installed= true; + } + wait_count= gco->wait_count; + if (wait_count > entry->count_committing_event_groups) + { + DEBUG_SYNC(thd, "rpl_parallel_start_waiting_for_prior"); + thd->ENTER_COND(&gco->COND_group_commit_orderer, + &entry->LOCK_parallel_entry, + &stage_waiting_for_prior_transaction_to_start_commit, + old_stage); + *did_enter_cond= true; + thd->set_time_for_next_stage(); + do + { + if (!rgi->worker_error && unlikely(thd->check_killed(1))) + { + DEBUG_SYNC(thd, "rpl_parallel_start_waiting_for_prior_killed"); + thd->clear_error(); + thd->get_stmt_da()->reset_diagnostics_area(); + thd->send_kill_message(); + slave_output_error_info(rgi, thd); + signal_error_to_sql_driver_thread(thd, rgi, 1); + /* + Even though we were killed, we need to continue waiting for the + prior event groups to signal that we can continue. Otherwise we + mess up the accounting for ordering. However, now that we have + marked the error, events will just be skipped rather than + executed, and things will progress quickly towards stop. + */ + } + mysql_cond_wait(&gco->COND_group_commit_orderer, + &entry->LOCK_parallel_entry); + } while (wait_count > entry->count_committing_event_groups); + } +} + + +static bool +do_stop_handling(rpl_group_info *rgi) +{ + bool should_stop= false; + rpl_parallel_entry *entry= rgi->parallel_entry; + + mysql_mutex_assert_owner(&entry->LOCK_parallel_entry); + + if (unlikely(entry->force_abort) && rgi->gtid_sub_id > entry->stop_sub_id) + { + /* + We are stopping (STOP SLAVE), and this event group need not be applied + before we can safely stop. So return a flag that will cause us to skip, + rather than execute, the following events. Once all queued events have + been skipped, the STOP SLAVE is complete (for this thread). + */ + should_stop= true; + } + + if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id)) + { + rgi->worker_error= 1; + should_stop= true; + } + + if (likely(!should_stop)) + { + /* + Since we did not decide to stop, bump the largest_started_sub_id while + still holding LOCK_parallel_entry. + */ + if (rgi->gtid_sub_id > entry->largest_started_sub_id) + entry->largest_started_sub_id= rgi->gtid_sub_id; + } + + return should_stop; +} + + +static bool +do_ftwrl_wait(rpl_group_info *rgi, + bool *did_enter_cond, PSI_stage_info *old_stage) +{ + THD *thd= rgi->thd; + rpl_parallel_entry *entry= rgi->parallel_entry; + uint64 sub_id= rgi->gtid_sub_id; + bool aborted= false; + DBUG_ENTER("do_ftwrl_wait"); + + mysql_mutex_assert_owner(&entry->LOCK_parallel_entry); + + /* + If a FLUSH TABLES WITH READ LOCK (FTWRL) is pending, check if this + transaction is later than transactions that have priority to complete + before FTWRL. If so, wait here so that FTWRL can proceed and complete + first. + + (entry->pause_sub_id is ULONGLONG_MAX if no FTWRL is pending, which makes + this test false as required). + */ + if (unlikely(sub_id > entry->pause_sub_id)) + { + thd->ENTER_COND(&entry->COND_parallel_entry, &entry->LOCK_parallel_entry, + &stage_waiting_for_ftwrl, old_stage); + *did_enter_cond= true; + thd->set_time_for_next_stage(); + do + { + if (entry->force_abort || rgi->worker_error) + { + aborted= true; + break; + } + if (unlikely(thd->check_killed())) + { + slave_output_error_info(rgi, thd); + signal_error_to_sql_driver_thread(thd, rgi, 1); + break; + } + mysql_cond_wait(&entry->COND_parallel_entry, &entry->LOCK_parallel_entry); + } while (sub_id > entry->pause_sub_id); + + DBUG_EXECUTE_IF("delay_ftwrl_wait_gtid_0_x_100", { + if (rgi->current_gtid.domain_id == 0 && + rgi->current_gtid.seq_no == 100) { + /* + Simulate delayed wakeup from the mysql_cond_wait(). To do this, we + need to have the LOCK_parallel_entry mutex released during the wait. + */ + mysql_mutex_unlock(&entry->LOCK_parallel_entry); + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL pause_wait_started WAIT_FOR pause_wait_continue")); + mysql_mutex_lock(&entry->LOCK_parallel_entry); + } + }); + /* + We do not call EXIT_COND() here, as this will be done later by our + caller (since we set *did_enter_cond to true). + */ + } + + DBUG_RETURN(aborted); +} + + +static int +pool_mark_busy(rpl_parallel_thread_pool *pool, THD *thd) +{ + PSI_stage_info old_stage; + int res= 0; + + /* + Wait here while the queue is busy. This is done to make FLUSH TABLES WITH + READ LOCK work correctly, without incuring extra locking penalties in + normal operation. FLUSH TABLES WITH READ LOCK needs to lock threads in the + thread pool, and for this we need to make sure the pool will not go away + during the operation. The LOCK_rpl_thread_pool is not suitable for + this. It is taken by release_thread() while holding LOCK_rpl_thread; so it + must be released before locking any LOCK_rpl_thread lock, or a deadlock + can occur. + + So we protect the infrequent operations of FLUSH TABLES WITH READ LOCK and + pool size changes with this condition wait. + */ + DBUG_EXECUTE_IF("mark_busy_mdev_22370",my_sleep(1000000);); + mysql_mutex_lock(&pool->LOCK_rpl_thread_pool); + if (thd) + { + thd->ENTER_COND(&pool->COND_rpl_thread_pool, &pool->LOCK_rpl_thread_pool, + &stage_waiting_for_rpl_thread_pool, &old_stage); + thd->set_time_for_next_stage(); + } + while (pool->busy) + { + if (thd && unlikely(thd->check_killed())) + { + res= 1; + break; + } + mysql_cond_wait(&pool->COND_rpl_thread_pool, &pool->LOCK_rpl_thread_pool); + } + if (!res) + pool->busy= true; + if (thd) + thd->EXIT_COND(&old_stage); + else + mysql_mutex_unlock(&pool->LOCK_rpl_thread_pool); + + return res; +} + + +static void +pool_mark_not_busy(rpl_parallel_thread_pool *pool) +{ + mysql_mutex_lock(&pool->LOCK_rpl_thread_pool); + DBUG_ASSERT(pool->busy); + pool->busy= false; + mysql_cond_broadcast(&pool->COND_rpl_thread_pool); + mysql_mutex_unlock(&pool->LOCK_rpl_thread_pool); +} + + +void +rpl_unpause_after_ftwrl(THD *thd) +{ + uint32 i; + rpl_parallel_thread_pool *pool= &global_rpl_thread_pool; + DBUG_ENTER("rpl_unpause_after_ftwrl"); + + DBUG_ASSERT(pool->busy); + + for (i= 0; i < pool->count; ++i) + { + rpl_parallel_entry *e; + rpl_parallel_thread *rpt= pool->threads[i]; + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + if (!rpt->current_owner) + { + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + continue; + } + e= rpt->current_entry; + mysql_mutex_lock(&e->LOCK_parallel_entry); + rpt->pause_for_ftwrl = false; + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + /* + Do not change pause_sub_id if force_abort is set. + force_abort is set in case of STOP SLAVE. + + Reason: If pause_sub_id is not changed and force_abort_is set, + any parallel slave thread waiting in do_ftwrl_wait() will + on wakeup return from do_ftwrl_wait() with 1. This will set + skip_event_group to 1 in handle_rpl_parallel_thread() and the + parallel thread will abort at once. + + If pause_sub_id is changed, the code in handle_rpl_parallel_thread() + would continue to execute the transaction in the queue, which would + cause some transactions to be lost. + */ + if (!e->force_abort) + e->pause_sub_id= (uint64)ULONGLONG_MAX; + mysql_cond_broadcast(&e->COND_parallel_entry); + mysql_mutex_unlock(&e->LOCK_parallel_entry); + } + + pool_mark_not_busy(pool); + DBUG_VOID_RETURN; +} + + +/* + . + + Note: in case of error return, rpl_unpause_after_ftwrl() must _not_ be called. +*/ +int +rpl_pause_for_ftwrl(THD *thd) +{ + uint32 i; + rpl_parallel_thread_pool *pool= &global_rpl_thread_pool; + int err; + Dynamic_array mi_arr(4, 4); // array of replication source mi:s + DBUG_ENTER("rpl_pause_for_ftwrl"); + + /* + While the count_pending_pause_for_ftwrl counter is non-zero, the pool + cannot be shutdown/resized, so threads are guaranteed to not disappear. + + This is required to safely be able to access the individual threads below. + (We cannot lock an individual thread while holding LOCK_rpl_thread_pool, + as this can deadlock against release_thread()). + */ + if ((err= pool_mark_busy(pool, thd))) + DBUG_RETURN(err); + + for (i= 0; i < pool->count; ++i) + { + PSI_stage_info old_stage; + rpl_parallel_entry *e; + rpl_parallel_thread *rpt= pool->threads[i]; + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + if (!rpt->current_owner) + { + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + continue; + } + e= rpt->current_entry; + mysql_mutex_lock(&e->LOCK_parallel_entry); + /* + Setting the rpt->pause_for_ftwrl flag makes sure that the thread will not + de-allocate itself until signalled to do so by rpl_unpause_after_ftwrl(). + */ + rpt->pause_for_ftwrl = true; + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + ++e->need_sub_id_signal; + if (e->pause_sub_id == (uint64)ULONGLONG_MAX) + { + e->pause_sub_id= e->largest_started_sub_id; + DBUG_EXECUTE_IF("pause_for_ftwrl_wait", { + mysql_mutex_unlock(&e->LOCK_parallel_entry); + debug_sync_set_action(thd, + STRING_WITH_LEN("now " + "SIGNAL pause_ftwrl_waiting " + "WAIT_FOR pause_ftwrl_cont")); + mysql_mutex_lock(&e->LOCK_parallel_entry); + }); + } + thd->ENTER_COND(&e->COND_parallel_entry, &e->LOCK_parallel_entry, + &stage_waiting_for_ftwrl_threads_to_pause, &old_stage); + thd->set_time_for_next_stage(); + while (e->pause_sub_id < (uint64)ULONGLONG_MAX && + e->last_committed_sub_id < e->pause_sub_id && + !err) + { + if (unlikely(thd->check_killed())) + { + err= 1; + break; + } + mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); + }; + --e->need_sub_id_signal; + + thd->EXIT_COND(&old_stage); + if (err) + break; + /* + Notify any source any domain waiting-for-master Start-Alter to give way. + */ + Master_info *mi= e->rli->mi; + bool found= false; + for (uint i= 0; i < mi_arr.elements() && !found; i++) + found= mi_arr.at(i) == mi; + if (!found) + { + mi_arr.append(mi); + start_alter_info *info=NULL; + mysql_mutex_lock(&mi->start_alter_list_lock); + List_iterator info_iterator(mi->start_alter_list); + while ((info= info_iterator++)) + { + mysql_mutex_lock(&mi->start_alter_lock); + + DBUG_ASSERT(info->state == start_alter_state::REGISTERED); + + info->state= start_alter_state::ROLLBACK_ALTER; + info->direct_commit_alter= true; + mysql_cond_broadcast(&info->start_alter_cond); + mysql_mutex_unlock(&mi->start_alter_lock); + } + mysql_mutex_unlock(&mi->start_alter_list_lock); + } + } + + if (err) + rpl_unpause_after_ftwrl(thd); + DBUG_RETURN(err); +} + + +#ifndef DBUG_OFF +static int +dbug_simulate_tmp_error(rpl_group_info *rgi, THD *thd) +{ + if (rgi->current_gtid.domain_id == 0 && rgi->current_gtid.seq_no == 100 && + rgi->retry_event_count == 4) + { + thd->clear_error(); + thd->get_stmt_da()->reset_diagnostics_area(); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return 1; + } + return 0; +} +#endif + + +/* + If we detect a deadlock due to eg. storage engine locks that conflict with + the fixed commit order, then the later transaction will be killed + asynchroneously to allow the former to complete its commit. + + In this case, we convert the 'killed' error into a deadlock error, and retry + the later transaction. + + If we are doing optimistic parallel apply of transactions not known to be + safe, we convert any error to a deadlock error, but then at retry we will + wait for prior transactions to commit first, so that the retries can be + done non-speculative. +*/ +static void +convert_kill_to_deadlock_error(rpl_group_info *rgi) +{ + THD *thd= rgi->thd; + int err_code; + + if (!thd->get_stmt_da()->is_error()) + return; + err_code= thd->get_stmt_da()->sql_errno(); + if ((rgi->speculation == rpl_group_info::SPECULATE_OPTIMISTIC && + err_code != ER_PRIOR_COMMIT_FAILED) || + ((err_code == ER_QUERY_INTERRUPTED || err_code == ER_CONNECTION_KILLED) && + rgi->killed_for_retry)) + { + thd->clear_error(); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + thd->reset_killed(); + } +} + + +/* + Check if an event marks the end of an event group. Returns non-zero if so, + zero otherwise. + + In addition, returns 1 if the group is committing, 2 if it is rolling back. +*/ +static int +is_group_ending(Log_event *ev, Log_event_type event_type) +{ + if (event_type == XID_EVENT || event_type == XA_PREPARE_LOG_EVENT) + return 1; + if (event_type == QUERY_EVENT) // COMMIT/ROLLBACK are never compressed + { + Query_log_event *qev = (Query_log_event *)ev; + if (qev->is_commit() || + !strncmp(qev->query, STRING_WITH_LEN("XA COMMIT")) || + !strncmp(qev->query, STRING_WITH_LEN("XA ROLLBACK"))) + return 1; + if (qev->is_rollback()) + return 2; + } + return 0; +} + + +static int +retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt, + rpl_parallel_thread::queued_event *orig_qev) +{ + IO_CACHE rlog; + LOG_INFO linfo; + File fd= (File)-1; + const char *errmsg; + inuse_relaylog *ir= rgi->relay_log; + uint64 event_count; + uint64 events_to_execute= rgi->retry_event_count; + Relay_log_info *rli= rgi->rli; + int err; + ulonglong cur_offset, old_offset; + char log_name[FN_REFLEN]; + THD *thd= rgi->thd; + rpl_parallel_entry *entry= rgi->parallel_entry; + ulong retries= 0; + Format_description_log_event *description_event= NULL; + +do_retry: + event_count= 0; + err= 0; + errmsg= NULL; +#ifdef WITH_WSREP + thd->wsrep_cs().reset_error(); + WSREP_DEBUG("retrying async replication event"); +#endif /* WITH_WSREP */ + + /* + If we already started committing before getting the deadlock (or other + error) that caused us to need to retry, we have already signalled + subsequent transactions that we have started committing. This is + potentially a problem, as now we will rollback, and if subsequent + transactions would start to execute now, they could see an unexpected + state of the database and get eg. key not found or duplicate key error. + + However, to get a deadlock in the first place, there must have been + another earlier transaction that is waiting for us. Thus that other + transaction has _not_ yet started to commit, and any subsequent + transactions will still be waiting at this point. + + So here, we decrement back the count of transactions that started + committing (if we already incremented it), undoing the effect of an + earlier mark_start_commit(). Then later, when the retry succeeds and we + commit again, we can do a new mark_start_commit() and eventually wake up + subsequent transactions at the proper time. + + We need to do the unmark before the rollback, to be sure that the + transaction we deadlocked with will not signal that it started to commit + until after the unmark. + */ + DBUG_EXECUTE_IF("inject_mdev8302", { my_sleep(20000);}); + rgi->unmark_start_commit(); + DEBUG_SYNC(thd, "rpl_parallel_retry_after_unmark"); + + /* + We might get the deadlock error that causes the retry during commit, while + sitting in wait_for_prior_commit(). If this happens, we will have a + pending error in the wait_for_commit object. So clear this by + unregistering (and later re-registering) the wait. + */ + if(thd->wait_for_commit_ptr) + thd->wait_for_commit_ptr->unregister_wait_for_prior_commit(); + DBUG_EXECUTE_IF("inject_mdev8031", { + /* Simulate that we get deadlock killed at this exact point. */ + rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; + thd->set_killed(KILL_CONNECTION); + }); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("rpl_parallel_simulate_wait_at_retry", { + if (rgi->current_gtid.seq_no == 1001) { + debug_sync_set_action(thd, + STRING_WITH_LEN("rpl_parallel_simulate_wait_at_retry WAIT_FOR proceed_by_1001")); + } + DEBUG_SYNC(thd, "rpl_parallel_simulate_wait_at_retry"); + }); +#endif + + rgi->cleanup_context(thd, 1); + wait_for_pending_deadlock_kill(thd, rgi); + thd->reset_killed(); + thd->clear_error(); + rgi->killed_for_retry = rpl_group_info::RETRY_KILL_NONE; +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("hold_worker2_favor_worker3", { + if (rgi->current_gtid.seq_no == 2003) { + debug_sync_set_action(thd, + STRING_WITH_LEN("now WAIT_FOR cont_worker3")); + } + }); +#endif + /* + If we retry due to a deadlock kill that occurred during the commit step, we + might have already updated (but not committed) an update of table + mysql.gtid_slave_pos, and cleared the gtid_pending flag. Now we have + rolled back any such update, so we must set the gtid_pending flag back to + true so that we will do a new update when/if we succeed with the retry. + */ + rgi->gtid_pending= true; + + mysql_mutex_lock(&rli->data_lock); + ++rli->retried_trans; + ++rpt->last_trans_retry_count; + statistic_increment(slave_retried_transactions, LOCK_status); + mysql_mutex_unlock(&rli->data_lock); + + for (;;) + { + mysql_mutex_lock(&entry->LOCK_parallel_entry); + if (rgi->gtid_sub_id < entry->stop_on_error_sub_id || + DBUG_IF("simulate_mdev_12746")) + { + register_wait_for_prior_event_group_commit(rgi, entry); + } + else + { + /* + A failure of a preceding "parent" transaction may not be + seen by the current one through its own worker_error. + Such induced error gets set by ourselves now. + */ + err= rgi->worker_error= 1; + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + mysql_mutex_unlock(&entry->LOCK_parallel_entry); + + goto err; + } + mysql_mutex_unlock(&entry->LOCK_parallel_entry); + + /* + Let us wait for all prior transactions to complete before trying again. + This way, we avoid repeatedly conflicting with and getting deadlock + killed by the same earlier transaction. + */ + if (!(err= thd->wait_for_prior_commit())) + { + rgi->speculation = rpl_group_info::SPECULATE_WAIT; + break; + } + + convert_kill_to_deadlock_error(rgi); + if (!has_temporary_error(thd)) + goto err; + /* + If we get a temporary error such as a deadlock kill, we can safely + ignore it, as we already rolled back. + + But we still want to retry the wait for the prior transaction to + complete its commit. + */ + thd->clear_error(); + thd->reset_killed(); + if(thd->wait_for_commit_ptr) + thd->wait_for_commit_ptr->unregister_wait_for_prior_commit(); + DBUG_EXECUTE_IF("inject_mdev8031", { + /* Inject a small sleep to give prior transaction a chance to commit. */ + my_sleep(100000); + }); + } + + /* + Let us clear any lingering deadlock kill one more time, here after + wait_for_prior_commit() has completed. This should rule out any + possibility of an old deadlock kill lingering on beyond this point. + */ + thd->reset_killed(); +#ifdef WITH_WSREP + if (wsrep_before_command(thd)) + { + WSREP_WARN("Parallel slave worker failed at wsrep_before_command() hook"); + err= 1; + goto err; + } + wsrep_start_trx_if_not_started(thd); + WSREP_DEBUG("parallel slave retry, after trx start"); + +#endif /* WITH_WSREP */ + strmake_buf(log_name, ir->name); + if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0) + { + err= 1; + goto err; + } + cur_offset= rgi->retry_start_offset; + delete description_event; + description_event= + read_relay_log_description_event(&rlog, cur_offset, &errmsg); + if (!description_event) + { + err= 1; + goto err; + } + DBUG_EXECUTE_IF("inject_mdev8031", { + /* Simulate pending KILL caught in read_relay_log_description_event(). */ + if (unlikely(thd->check_killed())) + { + err= 1; + goto err; + } + }); + my_b_seek(&rlog, cur_offset); + + do + { + Log_event_type event_type; + Log_event *ev; + rpl_parallel_thread::queued_event *qev; + + /* The loop is here so we can try again the next relay log file on EOF. */ + for (;;) + { + old_offset= cur_offset; + ev= Log_event::read_log_event(&rlog, description_event, + opt_slave_sql_verify_checksum); + cur_offset= my_b_tell(&rlog); + + if (ev) + break; + if (unlikely(rlog.error < 0)) + { + errmsg= "slave SQL thread aborted because of I/O error"; + err= 1; + goto check_retry; + } + if (unlikely(rlog.error > 0)) + { + sql_print_error("Slave SQL thread: I/O error reading " + "event(errno: %d cur_log->error: %d)", + my_errno, rlog.error); + errmsg= "Aborting slave SQL thread because of partial event read"; + err= 1; + goto err; + } + /* EOF. Move to the next relay log. */ + end_io_cache(&rlog); + mysql_file_close(fd, MYF(MY_WME)); + fd= (File)-1; + + /* Find the next relay log file. */ + if((err= rli->relay_log.find_log_pos(&linfo, log_name, 1)) || + (err= rli->relay_log.find_next_log(&linfo, 1))) + { + char buff[22]; + sql_print_error("next log error: %d offset: %s log: %s", + err, + llstr(linfo.index_file_offset, buff), + log_name); + goto err; + } + strmake_buf(log_name ,linfo.log_file_name); + + DBUG_EXECUTE_IF("inject_retry_event_group_open_binlog_kill", { + if (retries < 2) + { + /* Simulate that we get deadlock killed during open_binlog(). */ + thd->reset_for_next_command(); + rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; + thd->set_killed(KILL_CONNECTION); + thd->send_kill_message(); + fd= (File)-1; + err= 1; + goto check_retry; + } + }); + if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0) + { + err= 1; + goto check_retry; + } + description_event->reset_crypto(); + /* Loop to try again on the new log file. */ + } + + event_type= ev->get_type_code(); + if (event_type == FORMAT_DESCRIPTION_EVENT) + { + Format_description_log_event *newde= (Format_description_log_event*)ev; + newde->copy_crypto_data(description_event); + delete description_event; + description_event= newde; + continue; + } + else if (event_type == START_ENCRYPTION_EVENT) + { + description_event->start_decryption((Start_encryption_log_event*)ev); + delete ev; + continue; + } + else if (!Log_event::is_group_event(event_type)) + { + delete ev; + continue; + } + ev->thd= thd; + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + qev= rpt->retry_get_qev(ev, orig_qev, log_name, old_offset, + cur_offset - old_offset); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + if (!qev) + { + delete ev; + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + err= 1; + goto err; + } + if (is_group_ending(ev, event_type) == 1) + rgi->mark_start_commit(); + + err= rpt_handle_event(qev, rpt); + ++event_count; + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->free_qev(qev); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + + delete_or_keep_event_post_apply(rgi, event_type, ev); + DBUG_EXECUTE_IF("rpl_parallel_simulate_double_temp_err_gtid_0_x_100", + if (retries == 0) err= dbug_simulate_tmp_error(rgi, thd);); + DBUG_EXECUTE_IF("rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100", + err= dbug_simulate_tmp_error(rgi, thd);); + if (!err) + continue; + +check_retry: + convert_kill_to_deadlock_error(rgi); + if (has_temporary_error(thd)) + { + ++retries; + if (retries < slave_trans_retries) + { + if (fd >= 0) + { + end_io_cache(&rlog); + mysql_file_close(fd, MYF(MY_WME)); + fd= (File)-1; + } + goto do_retry; + } + sql_print_error("Slave worker thread retried transaction %lu time(s) " + "in vain, giving up. Consider raising the value of " + "the slave_transaction_retries variable.", + slave_trans_retries); + } + goto err; + + } while (event_count < events_to_execute); + +err: + + if (description_event) + delete description_event; + if (fd >= 0) + { + end_io_cache(&rlog); + mysql_file_close(fd, MYF(MY_WME)); + } + if (errmsg) + sql_print_error("Error reading relay log event: %s", errmsg); + return err; +} + + +pthread_handler_t +handle_rpl_parallel_thread(void *arg) +{ + THD *thd; + PSI_stage_info old_stage; + struct rpl_parallel_thread::queued_event *events; + bool group_standalone= true; + bool in_event_group= false; + bool skip_event_group= false; + rpl_group_info *group_rgi= NULL; + group_commit_orderer *gco; + uint64 event_gtid_sub_id= 0; + rpl_sql_thread_info sql_info(NULL); + int err; + + struct rpl_parallel_thread *rpt= (struct rpl_parallel_thread *)arg; + + my_thread_init(); + thd = new THD(next_thread_id()); + thd->thread_stack = (char*)&thd; + server_threads.insert(thd); + set_current_thd(thd); + pthread_detach_this_thread(); + thd->store_globals(); + thd->init_for_queries(); + thd->variables.binlog_annotate_row_events= 0; + init_thr_lock(); + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + thd->variables.max_allowed_packet= slave_max_allowed_packet; + /* Ensure that slave can exeute any alter table it gets from master */ + thd->variables.alter_algorithm= (ulong) Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT; + thd->slave_thread= 1; + + set_slave_thread_options(thd); + thd->client_capabilities = CLIENT_LOCAL_FILES; + thd->net.reading_or_writing= 0; + thd_proc_info(thd, "Waiting for work from main SQL threads"); + thd->variables.lock_wait_timeout= LONG_TIMEOUT; + thd->system_thread_info.rpl_sql_info= &sql_info; + /* + We need to use (at least) REPEATABLE READ isolation level. Otherwise + speculative parallel apply can run out-of-order and give wrong results + for statement-based replication. + */ + thd->variables.tx_isolation= ISO_REPEATABLE_READ; + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->thd= thd; + PSI_thread *psi= PSI_CALL_get_thread(); + PSI_CALL_set_thread_os_id(psi); + PSI_CALL_set_thread_THD(psi, thd); + PSI_CALL_set_thread_id(psi, thd->thread_id); + rpt->thd->set_psi(psi); + + while (rpt->delay_start) + mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + + rpt->running= true; + mysql_cond_signal(&rpt->COND_rpl_thread); + + thd->set_command(COM_SLAVE_WORKER); +#ifdef WITH_WSREP + wsrep_open(thd); + if (wsrep_before_command(thd)) + { + WSREP_WARN("Parallel slave failed at wsrep_before_command() hook"); + rpt->stop = true; + } +#endif /* WITH_WSREP */ + while (!rpt->stop) + { + uint wait_count= 0; + rpl_parallel_thread::queued_event *qev, *next_qev; + + rpt->start_time_tracker(); + thd->ENTER_COND(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread, + &stage_waiting_for_work_from_sql_thread, &old_stage); + /* + There are 4 cases that should cause us to wake up: + - Events have been queued for us to handle. + - We have an owner, but no events and not inside event group -> we need + to release ourself to the thread pool + - SQL thread is stopping, and we have an owner but no events, and we are + inside an event group; no more events will be queued to us, so we need + to abort the group (force_abort==1). + - Thread pool shutdown (rpt->stop==1). + */ + while (!( (events= rpt->event_queue) || + (rpt->current_owner && !in_event_group) || + (rpt->current_owner && group_rgi->parallel_entry->force_abort) || + rpt->stop)) + { + if (!wait_count++) + thd->set_time_for_next_stage(); + mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + } + rpt->dequeue1(events); + thd->EXIT_COND(&old_stage); + rpt->add_to_worker_idle_time_and_reset(); + + more_events: + for (qev= events; qev; qev= next_qev) + { + Log_event_type event_type; + rpl_group_info *rgi= qev->rgi; + rpl_parallel_entry *entry= rgi->parallel_entry; + bool end_of_group; + int group_ending; + + next_qev= qev->next; + if (qev->typ == rpl_parallel_thread::queued_event::QUEUED_POS_UPDATE) + { + handle_queued_pos_update(thd, qev); + rpt->loc_free_qev(qev); + continue; + } + else if (qev->typ == + rpl_parallel_thread::queued_event::QUEUED_MASTER_RESTART) + { + if (in_event_group) + { + /* + Master restarted (crashed) in the middle of an event group. + So we need to roll back and discard that event group. + */ + group_rgi->cleanup_context(thd, 1); + in_event_group= false; + finish_event_group(rpt, group_rgi->gtid_sub_id, + qev->entry_for_queued, group_rgi); + + rpt->loc_free_rgi(group_rgi); + thd->rgi_slave= group_rgi= NULL; + } + + rpt->loc_free_qev(qev); + continue; + } + DBUG_ASSERT(qev->typ==rpl_parallel_thread::queued_event::QUEUED_EVENT); + + thd->rgi_slave= rgi; + gco= rgi->gco; + /* Handle a new event group, which will be initiated by a GTID event. */ + if ((event_type= qev->ev->get_type_code()) == GTID_EVENT) + { + rpt->last_trans_retry_count= 0; + rpt->last_seen_gtid= rgi->current_gtid; + rpt->channel_name_length= (uint)rgi->rli->mi->connection_name.length; + if (rpt->channel_name_length) + memcpy(rpt->channel_name, rgi->rli->mi->connection_name.str, + rgi->rli->mi->connection_name.length); + + bool did_enter_cond= false; + PSI_stage_info old_stage; + + DBUG_EXECUTE_IF("rpl_parallel_delay_gtid_0_x_100_start", { + if (rgi->current_gtid.domain_id==0 && + rgi->current_gtid.seq_no == 100) + my_sleep(10000); + }); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("hold_worker_on_schedule", { + if (rgi->current_gtid.domain_id == 0 && + rgi->current_gtid.seq_no == 100) { + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL reached_pause WAIT_FOR continue_worker")); + } + }); + DBUG_EXECUTE_IF("rpl_parallel_scheduled_gtid_0_x_100", { + if (rgi->current_gtid.domain_id == 0 && + rgi->current_gtid.seq_no == 100) { + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL scheduled_gtid_0_x_100")); + } + }); +#endif + + if(unlikely(thd->wait_for_commit_ptr) && group_rgi != NULL) + { + /* + This indicates that we get a new GTID event in the middle of + a not completed event group. This is corrupt binlog (the master + will never write such binlog), so it does not happen unless + someone tries to inject wrong crafted binlog, but let us still + try to handle it somewhat nicely. + */ + group_rgi->cleanup_context(thd, true); + finish_event_group(rpt, group_rgi->gtid_sub_id, + group_rgi->parallel_entry, group_rgi); + rpt->loc_free_rgi(group_rgi); + } + + thd->tx_isolation= (enum_tx_isolation)thd->variables.tx_isolation; + in_event_group= true; + /* + If the standalone flag is set, then this event group consists of a + single statement (possibly preceeded by some Intvar_log_event and + similar), without any terminating COMMIT/ROLLBACK/XID. + */ + group_standalone= + (0 != (static_cast(qev->ev)->flags2 & + Gtid_log_event::FL_STANDALONE)); + + event_gtid_sub_id= rgi->gtid_sub_id; + rgi->thd= thd; + + DBUG_EXECUTE_IF("gco_wait_delay_gtid_0_x_99", { + if (rgi->current_gtid.domain_id == 0 && rgi->current_gtid.seq_no == 99) { + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL gco_wait_paused WAIT_FOR gco_wait_cont")); + } }); + + mysql_mutex_lock(&entry->LOCK_parallel_entry); + do_gco_wait(rgi, gco, &did_enter_cond, &old_stage); + skip_event_group= do_stop_handling(rgi); + if (likely(!skip_event_group)) + skip_event_group= do_ftwrl_wait(rgi, &did_enter_cond, &old_stage); + + /* + Register ourself to wait for the previous commit, if we need to do + such registration _and_ that previous commit has not already + occurred. + */ + register_wait_for_prior_event_group_commit(rgi, entry); + + unlock_or_exit_cond(thd, &entry->LOCK_parallel_entry, + &did_enter_cond, &old_stage); + + thd->wait_for_commit_ptr= &rgi->commit_orderer; + + if (opt_gtid_ignore_duplicates && + rgi->rli->mi->using_gtid != Master_info::USE_GTID_NO) + { + int res= + rpl_global_gtid_slave_state->check_duplicate_gtid(&rgi->current_gtid, + rgi); + if (res < 0) + { + /* Error. */ + slave_output_error_info(rgi, thd); + signal_error_to_sql_driver_thread(thd, rgi, 1); + } + else if (!res) + { + /* GTID already applied by another master connection, skip. */ + skip_event_group= true; + } + else + { + /* We have to apply the event. */ + } + } + /* + If we are optimistically running transactions in parallel, but this + particular event group should not run in parallel with what came + before, then wait now for the prior transaction to complete its + commit. + */ + if (rgi->speculation == rpl_group_info::SPECULATE_WAIT && + (err= thd->wait_for_prior_commit())) + { + slave_output_error_info(rgi, thd); + signal_error_to_sql_driver_thread(thd, rgi, 1); + } + } + + group_rgi= rgi; + group_ending= is_group_ending(qev->ev, event_type); + /* + We do not unmark_start_commit() here in case of an explicit ROLLBACK + statement. Such events should be very rare, there is no real reason + to try to group commit them - on the contrary, it seems best to avoid + running them in parallel with following group commits, as with + ROLLBACK events we are already deep in dangerous corner cases with + mix of transactional and non-transactional tables or the like. And + avoiding the mark_start_commit() here allows us to keep an assertion + in ha_rollback_trans() that we do not rollback after doing + mark_start_commit(). + */ + if (group_ending == 1 && likely(!rgi->worker_error)) + { + /* + Do an extra check for (deadlock) kill here. This helps prevent a + lingering deadlock kill that occurred during normal DML processing to + propagate past the mark_start_commit(). If we detect a deadlock only + after mark_start_commit(), we have to unmark, which has at least a + theoretical possibility of leaving a window where it looks like all + transactions in a GCO have started committing, while in fact one + will need to rollback and retry. This is not supposed to be possible + (since there is a deadlock, at least one transaction should be + blocked from reaching commit), but this seems a fragile ensurance, + and there were historically a number of subtle bugs in this area. + */ + if (!thd->killed) + { + DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit"); + if (thd->lex->stmt_accessed_temp_table()) + { + /* + Temporary tables are special, they require strict + single-threaded use as they have no locks protecting concurrent + access. Therefore, we cannot safely use the optimization of + overlapping the commit of this transaction with the start of the + following. + So we skip the early mark_start_commit() and also block any + wakeup_subsequent_commits() until this event group is fully + done, inside finish_event_group(). + */ + rgi->commit_orderer.wakeup_blocked= true; + } + else + rgi->mark_start_commit(); + DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit"); + } + } + + /* + If the SQL thread is stopping, we just skip execution of all the + following event groups. We still do all the normal waiting and wakeup + processing between the event groups as a simple way to ensure that + everything is stopped and cleaned up correctly. + */ + if (likely(!rgi->worker_error) && !skip_event_group) + { + ++rgi->retry_event_count; +#ifndef DBUG_OFF + err= 0; + DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_xid", + if (event_type == XID_EVENT) + { + thd->clear_error(); + thd->get_stmt_da()->reset_diagnostics_area(); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + err= 1; + DEBUG_SYNC(thd, "rpl_parallel_simulate_temp_err_xid"); + }); + if (!err) +#endif + { + if (unlikely(thd->check_killed())) + { + thd->clear_error(); + thd->get_stmt_da()->reset_diagnostics_area(); + thd->send_kill_message(); + err= 1; + } + else + err= rpt_handle_event(qev, rpt); + } + delete_or_keep_event_post_apply(rgi, event_type, qev->ev); + DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_x_100", + err= dbug_simulate_tmp_error(rgi, thd);); + if (unlikely(err)) + { + ulong max_retries= slave_trans_retries; + convert_kill_to_deadlock_error(rgi); + DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries", + if ((rgi->current_gtid.seq_no % 1000) == 0) + max_retries= 0; + ); + if (has_temporary_error(thd) && max_retries > 0) + err= retry_event_group(rgi, rpt, qev); + } + } + else + { + delete qev->ev; + thd->get_stmt_da()->set_overwrite_status(true); + err= thd->wait_for_prior_commit(); + thd->get_stmt_da()->set_overwrite_status(false); + } + + end_of_group= + in_event_group && + ((group_standalone && !Log_event::is_part_of_group(event_type)) || + group_ending); + + rpt->loc_free_qev(qev); + + if (unlikely(err)) + { + if (!rgi->worker_error) + { + slave_output_error_info(rgi, thd); + signal_error_to_sql_driver_thread(thd, rgi, err); + } + thd->reset_killed(); + } + if (end_of_group) + { + in_event_group= false; + finish_event_group(rpt, event_gtid_sub_id, entry, rgi); + rpt->loc_free_rgi(rgi); + thd->rgi_slave= group_rgi= rgi= NULL; + skip_event_group= false; + DEBUG_SYNC(thd, "rpl_parallel_end_of_group"); + } + } + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + /* + Now that we have the lock, we can move everything from our local free + lists to the real free lists that are also accessible from the SQL + driver thread. + */ + rpt->batch_free(); + + if ((events= rpt->event_queue) != NULL) + { + /* + Take next group of events from the replication pool. + This is faster than having to wakeup the pool manager thread to give + us a new event. + */ + rpt->dequeue1(events); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + goto more_events; + } + + rpt->inuse_relaylog_refcount_update(); + + if (in_event_group && group_rgi->parallel_entry->force_abort) + { + /* + We are asked to abort, without getting the remaining events in the + current event group. + + We have to rollback the current transaction and update the last + sub_id value so that SQL thread will know we are done with the + half-processed event group. + */ + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + signal_error_to_sql_driver_thread(thd, group_rgi, 1); + finish_event_group(rpt, group_rgi->gtid_sub_id, + group_rgi->parallel_entry, group_rgi); + in_event_group= false; + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->free_rgi(group_rgi); + thd->rgi_slave= group_rgi= NULL; + skip_event_group= false; + } + if (!in_event_group) + { + /* If we are in a FLUSH TABLES FOR READ LOCK, wait for it */ + while (rpt->current_entry && rpt->pause_for_ftwrl) + { + /* + We are currently in the delicate process of pausing parallel + replication while FLUSH TABLES WITH READ LOCK is starting. We must + not de-allocate the thread (setting rpt->current_owner= NULL) until + rpl_unpause_after_ftwrl() has woken us up. + */ + rpl_parallel_entry *e= rpt->current_entry; + /* + Wait for rpl_unpause_after_ftwrl() to wake us up. + Note that rpl_pause_for_ftwrl() may wait for 'e->pause_sub_id' + to change. This should happen eventually in finish_event_group() + */ + mysql_mutex_lock(&e->LOCK_parallel_entry); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + if (rpt->pause_for_ftwrl) + mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); + mysql_mutex_unlock(&e->LOCK_parallel_entry); + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + } + + rpt->current_owner= NULL; + /* Tell wait_for_done() that we are done, if it is waiting. */ + if (likely(rpt->current_entry) && + unlikely(rpt->current_entry->force_abort)) + mysql_cond_broadcast(&rpt->COND_rpl_thread_stop); + + rpt->current_entry= NULL; + if (!rpt->stop) + rpt->pool->release_thread(rpt); + } + } +#ifdef WITH_WSREP + wsrep_after_command_before_result(thd); + wsrep_after_command_after_result(thd); + wsrep_close(thd); +#endif /* WITH_WSREP */ + + rpt->thd= NULL; + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + + thd->clear_error(); + thd->catalog= 0; + thd->reset_query(); + thd->reset_db(&null_clex_str); + thd_proc_info(thd, "Slave worker thread exiting"); + thd->temporary_tables= 0; + + THD_CHECK_SENTRY(thd); + server_threads.erase(thd); + delete thd; + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->running= false; + mysql_cond_signal(&rpt->COND_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + + my_thread_end(); + + return NULL; +} + + +static void +dealloc_gco(group_commit_orderer *gco) +{ + mysql_cond_destroy(&gco->COND_group_commit_orderer); + my_free(gco); +} + +/** + Change thread count for global parallel worker threads + + @param pool parallel thread pool + @param new_count Number of threads to be in pool. 0 in shutdown + @param force Force thread count to new_count even if slave + threads are running + + By default we don't resize pool of there are running threads. + However during shutdown we will always do it. + This is needed as any_slave_sql_running() returns 1 during shutdown + as we don't want to access master_info while + Master_info_index::free_connections are running. +*/ + +static int +rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, + uint32 new_count, bool force) +{ + uint32 i; + rpl_parallel_thread **old_list= NULL; + rpl_parallel_thread **new_list= NULL; + rpl_parallel_thread *new_free_list= NULL; + rpl_parallel_thread *rpt_array= NULL; + int res; + + if ((res= pool_mark_busy(pool, current_thd))) + return res; + + /* Protect against parallel pool resizes */ + if (pool->count == new_count) + { + pool_mark_not_busy(pool); + return 0; + } + + /* + If we are about to delete pool, do an extra check that there are no new + slave threads running since we marked pool busy + */ + if (!new_count && !force) + { + if (any_slave_sql_running(false)) + { + DBUG_PRINT("warning", + ("SQL threads running while trying to reset parallel pool")); + pool_mark_not_busy(pool); + return 0; // Ok to not resize pool + } + } + + /* + Allocate the new list of threads up-front. + That way, if we fail half-way, we only need to free whatever we managed + to allocate, and will not be left with a half-functional thread pool. + */ + if (new_count && + !my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME|MY_ZEROFILL), + &new_list, new_count*sizeof(*new_list), + &rpt_array, new_count*sizeof(*rpt_array), + NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int(new_count*sizeof(*new_list) + + new_count*sizeof(*rpt_array)))); + goto err; + } + + for (i= 0; i < new_count; ++i) + { + pthread_t th; + + new_list[i]= &rpt_array[i]; + new_list[i]->delay_start= true; + mysql_mutex_init(key_LOCK_rpl_thread, &new_list[i]->LOCK_rpl_thread, + MY_MUTEX_INIT_SLOW); + mysql_cond_init(key_COND_rpl_thread, &new_list[i]->COND_rpl_thread, NULL); + mysql_cond_init(key_COND_rpl_thread_queue, + &new_list[i]->COND_rpl_thread_queue, NULL); + mysql_cond_init(key_COND_rpl_thread_stop, + &new_list[i]->COND_rpl_thread_stop, NULL); + new_list[i]->pool= pool; + if (mysql_thread_create(key_rpl_parallel_thread, &th, &connection_attrib, + handle_rpl_parallel_thread, new_list[i])) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto err; + } + new_list[i]->next= new_free_list; + new_free_list= new_list[i]; + } + + /* + Grab each old thread in turn, and signal it to stop. + + Note that since we require all replication threads to be stopped before + changing the parallel replication worker thread pool, all the threads will + be already idle and will terminate immediately. + */ + for (i= 0; i < pool->count; ++i) + { + rpl_parallel_thread *rpt; + + mysql_mutex_lock(&pool->LOCK_rpl_thread_pool); + while ((rpt= pool->free_list) == NULL) + mysql_cond_wait(&pool->COND_rpl_thread_pool, &pool->LOCK_rpl_thread_pool); + pool->free_list= rpt->next; + mysql_mutex_unlock(&pool->LOCK_rpl_thread_pool); + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->stop= true; + mysql_cond_signal(&rpt->COND_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + } + + for (i= 0; i < pool->count; ++i) + { + rpl_parallel_thread *rpt= pool->threads[i]; + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + while (rpt->running) + mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + mysql_mutex_destroy(&rpt->LOCK_rpl_thread); + mysql_cond_destroy(&rpt->COND_rpl_thread); + while (rpt->qev_free_list) + { + rpl_parallel_thread::queued_event *next= rpt->qev_free_list->next; + my_free(rpt->qev_free_list); + rpt->qev_free_list= next; + } + while (rpt->rgi_free_list) + { + rpl_group_info *next= rpt->rgi_free_list->next; + delete rpt->rgi_free_list; + rpt->rgi_free_list= next; + } + while (rpt->gco_free_list) + { + group_commit_orderer *next= rpt->gco_free_list->next_gco; + dealloc_gco(rpt->gco_free_list); + rpt->gco_free_list= next; + } + } + + old_list= pool->threads; + if (new_count < pool->count) + pool->count= new_count; + pool->threads= new_list; + if (new_count > pool->count) + pool->count= new_count; + my_free(old_list); + pool->free_list= new_free_list; + for (i= 0; i < pool->count; ++i) + { + mysql_mutex_lock(&pool->threads[i]->LOCK_rpl_thread); + pool->threads[i]->delay_start= false; + pool->threads[i]->current_start_alter_id= 0; + pool->threads[i]->current_start_alter_domain_id= 0; + pool->threads[i]->reserved_start_alter_thread= false; + mysql_cond_signal(&pool->threads[i]->COND_rpl_thread); + while (!pool->threads[i]->running) + mysql_cond_wait(&pool->threads[i]->COND_rpl_thread, + &pool->threads[i]->LOCK_rpl_thread); + mysql_mutex_unlock(&pool->threads[i]->LOCK_rpl_thread); + } + + pool_mark_not_busy(pool); + + return 0; + +err: + if (new_list) + { + while (new_free_list) + { + mysql_mutex_lock(&new_free_list->LOCK_rpl_thread); + new_free_list->delay_start= false; + new_free_list->stop= true; + mysql_cond_signal(&new_free_list->COND_rpl_thread); + while (!new_free_list->running) + mysql_cond_wait(&new_free_list->COND_rpl_thread, + &new_free_list->LOCK_rpl_thread); + while (new_free_list->running) + mysql_cond_wait(&new_free_list->COND_rpl_thread, + &new_free_list->LOCK_rpl_thread); + mysql_mutex_unlock(&new_free_list->LOCK_rpl_thread); + new_free_list= new_free_list->next; + } + my_free(new_list); + } + pool_mark_not_busy(pool); + return 1; +} + +/* + Deactivate the parallel replication thread pool, if there are now no more + SQL threads running. +*/ + +int rpl_parallel_resize_pool_if_no_slaves(void) +{ + /* master_info_index is set to NULL on shutdown */ + if (opt_slave_parallel_threads > 0 && !any_slave_sql_running(false)) + return rpl_parallel_inactivate_pool(&global_rpl_thread_pool); + return 0; +} + + +/** + Pool activation is preceeded by taking a "lock" of pool_mark_busy + which guarantees the number of running slaves drops to zero atomicly + with the number of pool workers. + This resolves race between the function caller thread and one + that may be attempting to deactivate the pool. +*/ +int +rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool) +{ + int rc= 0; + struct pool_bkp_for_pfs* bkp= &pool->pfs_bkp; + + if ((rc= pool_mark_busy(pool, current_thd))) + return rc; // killed + + if (!pool->count) + { + pool_mark_not_busy(pool); + rc= rpl_parallel_change_thread_count(pool, opt_slave_parallel_threads, + 0); + if (!rc) + { + if (pool->count) + { + if (bkp->inited) + { + if (bkp->count != pool->count) + { + bkp->destroy(); + bkp->init(pool->count); + } + } + else + bkp->init(pool->count); + bkp->is_valid= false; // Mark backup as stale during pool init + } + } + + } + else + { + pool_mark_not_busy(pool); + } + return rc; +} + + +int +rpl_parallel_inactivate_pool(rpl_parallel_thread_pool *pool) +{ + return rpl_parallel_change_thread_count(pool, 0, 0); +} + + +void +rpl_parallel_thread::batch_free() +{ + mysql_mutex_assert_owner(&LOCK_rpl_thread); + if (loc_qev_list) + { + *loc_qev_last_ptr_ptr= qev_free_list; + qev_free_list= loc_qev_list; + loc_qev_list= NULL; + dequeue2(loc_qev_size); + /* Signal that our queue can now accept more events. */ + mysql_cond_signal(&COND_rpl_thread_queue); + loc_qev_size= 0; + qev_free_pending= 0; + } + if (loc_rgi_list) + { + *loc_rgi_last_ptr_ptr= rgi_free_list; + rgi_free_list= loc_rgi_list; + loc_rgi_list= NULL; + } + if (loc_gco_list) + { + *loc_gco_last_ptr_ptr= gco_free_list; + gco_free_list= loc_gco_list; + loc_gco_list= NULL; + } +} + + +void +rpl_parallel_thread::inuse_relaylog_refcount_update() +{ + inuse_relaylog *ir= accumulated_ir_last; + if (ir) + { + ir->dequeued_count+= accumulated_ir_count; + accumulated_ir_count= 0; + accumulated_ir_last= NULL; + } +} + + +rpl_parallel_thread::queued_event * +rpl_parallel_thread::get_qev_common(Log_event *ev, ulonglong event_size) +{ + queued_event *qev; + mysql_mutex_assert_owner(&LOCK_rpl_thread); + if ((qev= qev_free_list)) + qev_free_list= qev->next; + else if(!(qev= (queued_event *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*qev), MYF(0)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*qev)); + return NULL; + } + qev->typ= rpl_parallel_thread::queued_event::QUEUED_EVENT; + qev->ev= ev; + qev->event_size= (size_t)event_size; + qev->next= NULL; + return qev; +} + + +rpl_parallel_thread::queued_event * +rpl_parallel_thread::get_qev(Log_event *ev, ulonglong event_size, + Relay_log_info *rli) +{ + queued_event *qev= get_qev_common(ev, event_size); + if (!qev) + return NULL; + strcpy(qev->event_relay_log_name, rli->event_relay_log_name); + qev->event_relay_log_pos= rli->event_relay_log_pos; + qev->future_event_relay_log_pos= rli->future_event_relay_log_pos; + strcpy(qev->future_event_master_log_name, rli->future_event_master_log_name); + return qev; +} + + +rpl_parallel_thread::queued_event * +rpl_parallel_thread::retry_get_qev(Log_event *ev, queued_event *orig_qev, + const char *relay_log_name, + ulonglong event_pos, ulonglong event_size) +{ + queued_event *qev= get_qev_common(ev, event_size); + if (!qev) + return NULL; + qev->rgi= orig_qev->rgi; + strcpy(qev->event_relay_log_name, relay_log_name); + qev->event_relay_log_pos= event_pos; + qev->future_event_relay_log_pos= event_pos+event_size; + strcpy(qev->future_event_master_log_name, + orig_qev->future_event_master_log_name); + return qev; +} + + +void +rpl_parallel_thread::loc_free_qev(rpl_parallel_thread::queued_event *qev) +{ + inuse_relaylog *ir= qev->ir; + inuse_relaylog *last_ir= accumulated_ir_last; + if (ir != last_ir) + { + if (last_ir) + inuse_relaylog_refcount_update(); + accumulated_ir_last= ir; + } + ++accumulated_ir_count; + if (!loc_qev_list) + loc_qev_last_ptr_ptr= &qev->next; + else + qev->next= loc_qev_list; + loc_qev_list= qev; + loc_qev_size+= qev->event_size; + /* + We want to release to the global free list only occasionally, to avoid + having to take the LOCK_rpl_thread muted too many times. + + However, we do need to release regularly. If we let the unreleased part + grow too large, then the SQL driver thread may go to sleep waiting for + the queue to drop below opt_slave_parallel_max_queued, and this in turn + can stall all other worker threads for more stuff to do. + */ + if (++qev_free_pending >= QEV_BATCH_FREE || + loc_qev_size >= opt_slave_parallel_max_queued/3) + { + mysql_mutex_lock(&LOCK_rpl_thread); + batch_free(); + mysql_mutex_unlock(&LOCK_rpl_thread); + } +} + + +void +rpl_parallel_thread::free_qev(rpl_parallel_thread::queued_event *qev) +{ + mysql_mutex_assert_owner(&LOCK_rpl_thread); + qev->next= qev_free_list; + qev_free_list= qev; +} + + +rpl_group_info* +rpl_parallel_thread::get_rgi(Relay_log_info *rli, Gtid_log_event *gtid_ev, + rpl_parallel_entry *e, ulonglong event_size) +{ + rpl_group_info *rgi; + mysql_mutex_assert_owner(&LOCK_rpl_thread); + if ((rgi= rgi_free_list)) + { + rgi_free_list= rgi->next; + rgi->reinit(rli); + } + else + { + if(!(rgi= new rpl_group_info(rli))) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*rgi)); + return NULL; + } + rgi->is_parallel_exec = true; + } + if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on()) && + !rgi->deferred_events) + rgi->deferred_events= new Deferred_log_events(rli); + if (event_group_new_gtid(rgi, gtid_ev)) + { + free_rgi(rgi); + my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); + return NULL; + } + rgi->parallel_entry= e; + rgi->relay_log= rli->last_inuse_relaylog; + rgi->retry_start_offset= rli->future_event_relay_log_pos-event_size; + rgi->retry_event_count= 0; + rgi->killed_for_retry= rpl_group_info::RETRY_KILL_NONE; + /* rgi is transaction specific so we need to move this value to rgi */ + rgi->reserved_start_alter_thread= reserved_start_alter_thread; + rgi->rpt= this; + rgi->direct_commit_alter= false; + rgi->finish_event_group_called= false; + + DBUG_ASSERT(!rgi->sa_info); + /* + We can remove the reserved_start_alter_thread flag. + If we get more concurrent alter handle_split_alter will + automatically set this flag again. + */ + reserved_start_alter_thread= false; + return rgi; +} + + +void +rpl_parallel_thread::loc_free_rgi(rpl_group_info *rgi) +{ + DBUG_ASSERT(rgi->commit_orderer.waitee == NULL); + rgi->free_annotate_event(); + if (!loc_rgi_list) + loc_rgi_last_ptr_ptr= &rgi->next; + else + rgi->next= loc_rgi_list; + loc_rgi_list= rgi; +} + + +void +rpl_parallel_thread::free_rgi(rpl_group_info *rgi) +{ + mysql_mutex_assert_owner(&LOCK_rpl_thread); + DBUG_ASSERT(rgi->commit_orderer.waitee == NULL); + rgi->free_annotate_event(); + rgi->next= rgi_free_list; + rgi_free_list= rgi; +} + + +group_commit_orderer * +rpl_parallel_thread::get_gco(uint64 wait_count, group_commit_orderer *prev, + uint64 prior_sub_id) +{ + group_commit_orderer *gco; + mysql_mutex_assert_owner(&LOCK_rpl_thread); + if ((gco= gco_free_list)) + gco_free_list= gco->next_gco; + else if(!(gco= (group_commit_orderer *)my_malloc(PSI_INSTRUMENT_ME, sizeof(*gco), MYF(0)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*gco)); + return NULL; + } + mysql_cond_init(key_COND_group_commit_orderer, + &gco->COND_group_commit_orderer, NULL); + gco->wait_count= wait_count; + gco->prev_gco= prev; + gco->next_gco= NULL; + gco->prior_sub_id= prior_sub_id; + gco->installed= false; + gco->flags= 0; +#ifndef DBUG_OFF + gco->gc_done= false; +#endif + return gco; +} + + +void +rpl_parallel_thread::loc_free_gco(group_commit_orderer *gco) +{ +#ifndef DBUG_OFF + DBUG_ASSERT(!gco->gc_done); + gco->gc_done= true; +#endif + if (!loc_gco_list) + loc_gco_last_ptr_ptr= &gco->next_gco; + else + gco->next_gco= loc_gco_list; + loc_gco_list= gco; +} + +void rpl_group_info::finish_start_alter_event_group() +{ + finish_event_group(rpt, this->gtid_sub_id, this->parallel_entry, this); +} + +rpl_parallel_thread::rpl_parallel_thread() + : channel_name_length(0), last_error_number(0), last_error_timestamp(0), + worker_idle_time(0), last_trans_retry_count(0), start_time(0) +{ +} + + +rpl_parallel_thread_pool::rpl_parallel_thread_pool() + : threads(0), free_list(0), count(0), inited(false),current_start_alters(0), busy(false), + pfs_bkp{0, false, false, NULL} +{ +} + + +int +rpl_parallel_thread_pool::init(uint32 size) +{ + threads= NULL; + free_list= NULL; + count= 0; + busy= false; + + mysql_mutex_init(key_LOCK_rpl_thread_pool, &LOCK_rpl_thread_pool, + MY_MUTEX_INIT_SLOW); + mysql_cond_init(key_COND_rpl_thread_pool, &COND_rpl_thread_pool, NULL); + inited= true; + + /* + The pool is initially empty. Threads will be spawned when a slave SQL + thread is started. + */ + + return 0; +} + + +void +rpl_parallel_thread_pool::destroy() +{ + deactivate(); + pfs_bkp.destroy(); + destroy_cond_mutex(); +} + +void +rpl_parallel_thread_pool::deactivate() +{ + if (!inited) + return; + rpl_parallel_change_thread_count(this, 0, 1); +} + +void +rpl_parallel_thread_pool::destroy_cond_mutex() +{ + if (!inited) + return; + mysql_mutex_destroy(&LOCK_rpl_thread_pool); + mysql_cond_destroy(&COND_rpl_thread_pool); + inited= false; +} + + +/* + Wait for a worker thread to become idle. When one does, grab the thread for + our use and return it. + + Note that we return with the worker threads's LOCK_rpl_thread mutex locked. +*/ +struct rpl_parallel_thread * +rpl_parallel_thread_pool::get_thread(rpl_parallel_thread **owner, + rpl_parallel_entry *entry) +{ + rpl_parallel_thread *rpt; + + DBUG_ASSERT(count > 0); + mysql_mutex_lock(&LOCK_rpl_thread_pool); + while (unlikely(busy) || !(rpt= free_list)) + mysql_cond_wait(&COND_rpl_thread_pool, &LOCK_rpl_thread_pool); + free_list= rpt->next; + mysql_mutex_unlock(&LOCK_rpl_thread_pool); + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->current_owner= owner; + rpt->current_entry= entry; + + return rpt; +} + + +/* + Release a thread to the thread pool. + The thread should be locked, and should not have any work queued for it. +*/ +void +rpl_parallel_thread_pool::release_thread(rpl_parallel_thread *rpt) +{ + rpl_parallel_thread *list; + + mysql_mutex_assert_owner(&rpt->LOCK_rpl_thread); + DBUG_ASSERT(rpt->current_owner == NULL); + mysql_mutex_lock(&LOCK_rpl_thread_pool); + list= free_list; + rpt->next= list; + free_list= rpt; + if (!list) + mysql_cond_broadcast(&COND_rpl_thread_pool); + mysql_mutex_unlock(&LOCK_rpl_thread_pool); +} + +void +rpl_parallel_thread_pool::copy_pool_for_pfs(Relay_log_info *rli) +{ + if (pfs_bkp.inited) + { + for(uint i=0; ichannel_name_length) + { + pfs_rpt->channel_name_length= rpt->channel_name_length; + strmake(pfs_rpt->channel_name, rpt->channel_name, + rpt->channel_name_length); + } + pfs_rpt->thd= rpt->thd; + pfs_rpt->last_seen_gtid= rpt->last_seen_gtid; + if (rli->err_thread_id && rpt->thd->thread_id == rli->err_thread_id) + { + pfs_rpt->last_error_number= rli->last_error().number; + strmake(pfs_rpt->last_error_message, + rli->last_error().message, sizeof(rli->last_error().message)); + pfs_rpt->last_error_timestamp= rli->last_error().skr*1000000; + } + pfs_rpt->running= false; + pfs_rpt->worker_idle_time= rpt->get_worker_idle_time(); + pfs_rpt->last_trans_retry_count= rpt->last_trans_retry_count; + } + pfs_bkp.is_valid= true; + } +} + +/* + START ALTER , COMMIT ALTER / ROLLBACK ALTER scheduling + + Steps:- + 1. (For Gtid_log_event SA). Get the worker thread which is either + e->rpl_threads[i] is NULL means worker from poll has not been assigned yet + e->rpl_threads[i]->current_owner != &e->rpl_threads[i] + Thread has been released, or about to //same as choose_thread logic + !e->rpl_threads[i]->current_start_alter_id is 0 , safe to schedule. + We dont want to schedule on worker which already have been scheduled SA + but CA/RA has not been scheduled yet. current_start_alter_id will indicate + this. If we dont do this we will get deadlock. + 2. (For Gtid_log_event SA) + call choose_thread_internal so that e->rpl_threads[idx] is not null + update the current_start_alter_id + 3. (For Gtid_log_event SA) + update local e->pending_start_alters(local) variable and + pool->current_start_alters(global) + We need 2 status variable (global and local) because we can have + slave_domain_parallel_threads != pool->threads. + 4. (For CA/RA Gtid_log_event) + Update e->pending_start_alters and pool->current_start_alters + while holding mutex lock on pool (if SA is not assigned to + reserved thread) + + + @returns + true Worker allocated (choose_thread_internal called) + false Worker not allocated (choose_thread_internal not called) +*/ +static bool handle_split_alter(rpl_parallel_entry *e, + Gtid_log_event *gtid_ev, uint32 *idx, + //choose_thread_internal specific + bool *did_enter_cond, rpl_group_info* rgi, + PSI_stage_info *old_stage) +{ + uint16 flags_extra= gtid_ev->flags_extra; + bool thread_allocated= false; + //Step 1 + if (flags_extra & Gtid_log_event::FL_START_ALTER_E1 || + //This will arrange finding threads for CA/RA as well + //as concurrent DDL + e->pending_start_alters) + { + /* + j is needed for round robin scheduling, we will start with rpl_thread_idx + go till rpl_thread_max and then start with 0 to rpl_thread_idx + */ + int j= e->rpl_thread_idx; + for(uint i= 0; i < e->rpl_thread_max; i++) + { + if (!e->rpl_threads[j] || e->rpl_threads[j]->current_owner + != &e->rpl_threads[j] || !e->rpl_threads[j]->current_start_alter_id) + { + //This condition will hit atleast one time no matter what happens + *idx= j; + DBUG_PRINT("info", ("Start alter id %d", j)); + goto idx_found; + } + j++; + j= j % e->rpl_thread_max; + } + //We did not find and idx + DBUG_ASSERT(0); + return false; +idx_found: + e->rpl_thread_idx= *idx; + e->choose_thread_internal(*idx, did_enter_cond, rgi, old_stage); + thread_allocated= true; + if (flags_extra & Gtid_log_event::FL_START_ALTER_E1) + { + mysql_mutex_assert_owner(&e->rpl_threads[*idx]->LOCK_rpl_thread); + e->rpl_threads[e->rpl_thread_idx]->current_start_alter_id= gtid_ev->seq_no; + e->rpl_threads[e->rpl_thread_idx]->current_start_alter_domain_id= + gtid_ev->domain_id; + /* + We are locking LOCK_rpl_thread_pool becuase we are going to update + current_start_alters + */ + mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); + if (e->pending_start_alters < e->rpl_thread_max - 1 && + global_rpl_thread_pool.current_start_alters + < global_rpl_thread_pool.count - 1) + { + e->pending_start_alters++; + global_rpl_thread_pool.current_start_alters++; + } + else + { + e->rpl_threads[*idx]->reserved_start_alter_thread= true; + e->rpl_threads[*idx]->current_start_alter_id= 0; + e->rpl_threads[*idx]->current_start_alter_domain_id= 0; + } + mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); + } + } + if(flags_extra & (Gtid_log_event::FL_COMMIT_ALTER_E1 | + Gtid_log_event::FL_ROLLBACK_ALTER_E1 )) + { + //Free the corrosponding rpt current_start_alter_id + for(uint i= 0; i < e->rpl_thread_max; i++) + { + if(e->rpl_threads[i] && + e->rpl_threads[i]->current_start_alter_id == gtid_ev->sa_seq_no && + e->rpl_threads[i]->current_start_alter_domain_id == gtid_ev->domain_id) + { + mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); + e->rpl_threads[i]->current_start_alter_id= 0; + e->rpl_threads[i]->current_start_alter_domain_id= 0; + global_rpl_thread_pool.current_start_alters--; + e->pending_start_alters--; + DBUG_PRINT("info", ("Commit/Rollback alter id %d", i)); + mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); + break; + } + } + } + + return thread_allocated; + +} + + +/* + Obtain a worker thread that we can queue an event to. + + Each invocation allocates a new worker thread, to maximise + parallelism. However, only up to a maximum of + --slave-domain-parallel-threads workers can be occupied by a single + replication domain; after that point, we start re-using worker threads that + are still executing events that were queued earlier for this thread. + + We never queue more than --rpl-parallel-wait-queue_max amount of events + for one worker, to avoid the SQL driver thread using up all memory with + queued events while worker threads are stalling. + + Note that this function returns with rpl_parallel_thread::LOCK_rpl_thread + locked. Exception is if we were killed, in which case NULL is returned. + + The *did_enter_cond flag is set true if we had to wait for a worker thread + to become free (with mysql_cond_wait()). If so, old_stage will also be set, + and the LOCK_rpl_thread must be released with THD::EXIT_COND() instead + of mysql_mutex_unlock. + + When `gtid_ev' is not NULL the last worker thread will be returned again, + if it is still available. Otherwise a new worker thread is allocated. + + A worker for XA transaction is determined through xid hashing which + ensure for a XA-complete to be scheduled to the same-xid XA-prepare worker. +*/ +rpl_parallel_thread * +rpl_parallel_entry::choose_thread(rpl_group_info *rgi, bool *did_enter_cond, + PSI_stage_info *old_stage, + Gtid_log_event *gtid_ev) +{ + uint32 idx; + + idx= rpl_thread_idx; + if (gtid_ev) + { + if (++idx >= rpl_thread_max) + idx= 0; + //rpl_thread_idx will be updated handle_split_alter + if (handle_split_alter(this, gtid_ev, &idx, did_enter_cond, rgi, old_stage)) + return rpl_threads[idx]; + if (gtid_ev->flags2 & + (Gtid_log_event::FL_COMPLETED_XA | Gtid_log_event::FL_PREPARED_XA)) + { + idx= my_hash_sort(&my_charset_bin, gtid_ev->xid.key(), + gtid_ev->xid.key_length()) % rpl_thread_max; + } + rpl_thread_idx= idx; + } + return choose_thread_internal(idx, did_enter_cond, rgi, old_stage); +} + +rpl_parallel_thread * rpl_parallel_entry::choose_thread_internal(uint idx, + bool *did_enter_cond, rpl_group_info *rgi, + PSI_stage_info *old_stage) +{ + rpl_parallel_thread* thr= rpl_threads[idx]; + Relay_log_info *rli= rgi->rli; + if (thr) + { + *did_enter_cond= false; + mysql_mutex_lock(&thr->LOCK_rpl_thread); + for (;;) + { + if (thr->current_owner != &rpl_threads[idx]) + { + /* + The worker thread became idle, and returned to the free list and + possibly was allocated to a different request. So we should allocate + a new worker thread. + */ + unlock_or_exit_cond(rli->sql_driver_thd, &thr->LOCK_rpl_thread, + did_enter_cond, old_stage); + thr= NULL; + break; + } + else if (thr->queued_size <= opt_slave_parallel_max_queued) + { + /* The thread is ready to queue into. */ + break; + } + else + { + /* + We have reached the limit of how much memory we are allowed to use + for queuing events, so wait for the thread to consume some of its + queue. + */ + if (!*did_enter_cond) + { + /* + We need to do the debug_sync before ENTER_COND(). + Because debug_sync changes the thd->mysys_var->current_mutex, + and this can cause THD::awake to use the wrong mutex. + */ +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("rpl_parallel_wait_queue_max", + { + debug_sync_set_action(rli->sql_driver_thd, + STRING_WITH_LEN("now SIGNAL wait_queue_ready")); + };); +#endif + rli->sql_driver_thd->ENTER_COND(&thr->COND_rpl_thread_queue, + &thr->LOCK_rpl_thread, + &stage_waiting_for_room_in_worker_thread, + old_stage); + *did_enter_cond= true; + } + + if (unlikely(rli->sql_driver_thd->check_killed(1))) + { + unlock_or_exit_cond(rli->sql_driver_thd, &thr->LOCK_rpl_thread, + did_enter_cond, old_stage); + my_error(ER_CONNECTION_KILLED, MYF(0)); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("rpl_parallel_wait_queue_max", + { + debug_sync_set_action(rli->sql_driver_thd, + STRING_WITH_LEN("now SIGNAL wait_queue_killed")); + };); +#endif + slave_output_error_info(rgi, rli->sql_driver_thd); + return NULL; + } + + mysql_cond_wait(&thr->COND_rpl_thread_queue, &thr->LOCK_rpl_thread); + } + } + } + if (!thr) + rpl_threads[idx]= thr= global_rpl_thread_pool.get_thread(&rpl_threads[idx], + this); + + return thr; +} + +static void +free_rpl_parallel_entry(void *element) +{ + rpl_parallel_entry *e= (rpl_parallel_entry *)element; + while (e->current_gco) + { + group_commit_orderer *prev_gco= e->current_gco->prev_gco; + dealloc_gco(e->current_gco); + e->current_gco= prev_gco; + } + mysql_cond_destroy(&e->COND_parallel_entry); + mysql_mutex_destroy(&e->LOCK_parallel_entry); + my_free(e); +} + + +rpl_parallel::rpl_parallel() : + current(NULL), sql_thread_stopping(false) +{ + my_hash_init(PSI_INSTRUMENT_ME, &domain_hash, &my_charset_bin, 32, + offsetof(rpl_parallel_entry, domain_id), sizeof(uint32), + NULL, free_rpl_parallel_entry, HASH_UNIQUE); +} + + +void +rpl_parallel::reset() +{ + my_hash_reset(&domain_hash); + current= NULL; + sql_thread_stopping= false; +} + + +rpl_parallel::~rpl_parallel() +{ + my_hash_free(&domain_hash); +} + + +rpl_parallel_entry * +rpl_parallel::find(uint32 domain_id, Relay_log_info *rli) +{ + struct rpl_parallel_entry *e; + + if (!(e= (rpl_parallel_entry *)my_hash_search(&domain_hash, + (const uchar *)&domain_id, + sizeof(domain_id)))) + { + /* Allocate a new, empty one. */ + ulong count= opt_slave_domain_parallel_threads; + if (count == 0 || count > opt_slave_parallel_threads) + count= opt_slave_parallel_threads; + rpl_parallel_thread **p; + if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME|MY_ZEROFILL), + &e, sizeof(*e), + &p, count*sizeof(*p), + NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)(sizeof(*e)+count*sizeof(*p))); + return NULL; + } + e->rpl_threads= p; + e->rpl_thread_max= count; + e->domain_id= domain_id; + e->stop_on_error_sub_id= (uint64)ULONGLONG_MAX; + e->pause_sub_id= (uint64)ULONGLONG_MAX; + e->pending_start_alters= 0; + e->rli= rli; + mysql_mutex_init(key_LOCK_parallel_entry, &e->LOCK_parallel_entry, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_parallel_entry, &e->COND_parallel_entry, NULL); + if (my_hash_insert(&domain_hash, (uchar *)e)) + { + free_rpl_parallel_entry(e); + return NULL; + } + } + else + { + DBUG_ASSERT(rli == e->rli); + + e->force_abort= false; + } + + return e; +} + +/** + Wait until all sql worker threads has stopped processing + + This is called when sql thread has been killed/stopped +*/ + +void +rpl_parallel::wait_for_done(THD *thd, Relay_log_info *rli) +{ + struct rpl_parallel_entry *e; + rpl_parallel_thread *rpt; + uint32 i, j; + Master_info *mi= rli->mi; + /* + First signal all workers that they must force quit; no more events will + be queued to complete any partial event groups executed. + */ + for (i= 0; i < domain_hash.records; ++i) + { + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); + mysql_mutex_lock(&e->LOCK_parallel_entry); + /* + We want the worker threads to stop as quickly as is safe. If the slave + SQL threads are behind, we could have significant amount of events + queued for the workers, and we want to stop without waiting for them + all to be applied first. But if any event group has already started + executing in a worker, we want to be sure that all prior event groups + are also executed, so that we stop at a consistent point in the binlog + stream (per replication domain). + + At this point, we are holding LOCK_parallel_entry, and we know that no + event group after e->largest_started_sub_id has started running yet. We + record this value in e->stop_sub_id, and then each event group can check + their own sub_id against it. If their sub_id is strictly larger, then + that event group will be skipped. + + If we stop due to reaching the START SLAVE UNTIL condition, then we + need to continue executing any queued events up to that point. + */ + e->force_abort= true; + e->stop_sub_id= rli->stop_for_until ? + e->current_sub_id : e->largest_started_sub_id; + mysql_mutex_unlock(&e->LOCK_parallel_entry); + for (j= 0; j < e->rpl_thread_max; ++j) + { + if ((rpt= e->rpl_threads[j])) + { + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + if (rpt->current_owner == &e->rpl_threads[j]) + mysql_cond_signal(&rpt->COND_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + } + } + } +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("rpl_parallel_wait_for_done_trigger", + { + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL wait_for_done_waiting")); + };); +#endif + + global_rpl_thread_pool.copy_pool_for_pfs(rli); + /* + Shutdown SA alter threads through marking their execution states + to force their early post-SA execution exit. Upon that the affected SA threads + change their state to COMPLETED, notify any waiting CA|RA and this thread. + */ + start_alter_info *info=NULL; + mysql_mutex_lock(&mi->start_alter_list_lock); + List_iterator info_iterator(mi->start_alter_list); + mi->is_shutdown= true; // a sign to stop in concurrently coming in new SA:s + while ((info= info_iterator++)) + { + mysql_mutex_lock(&mi->start_alter_lock); + if (info->state == start_alter_state::COMPLETED) + { + mysql_mutex_unlock(&mi->start_alter_lock); + continue; + } + info->state= start_alter_state::ROLLBACK_ALTER; + // Any possible CA that is (will be) waiting will complete this ALTER instance + info->direct_commit_alter= true; + mysql_cond_broadcast(&info->start_alter_cond); // notify SA:s + mysql_mutex_unlock(&mi->start_alter_lock); + + // await SA in the COMPLETED state + mysql_mutex_lock(&mi->start_alter_lock); + while(info->state == start_alter_state::ROLLBACK_ALTER) + mysql_cond_wait(&info->start_alter_cond, &mi->start_alter_lock); + + DBUG_ASSERT(info->state == start_alter_state::COMPLETED); + + mysql_mutex_unlock(&mi->start_alter_lock); + } + mysql_mutex_unlock(&mi->start_alter_list_lock); + + DBUG_EXECUTE_IF("rpl_slave_stop_CA_before_binlog", + { + debug_sync_set_action(thd, STRING_WITH_LEN("now signal proceed_CA_1")); + }); + + for (i= 0; i < domain_hash.records; ++i) + { + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); + for (j= 0; j < e->rpl_thread_max; ++j) + { + if ((rpt= e->rpl_threads[j])) + { + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + while (rpt->current_owner == &e->rpl_threads[j]) + mysql_cond_wait(&rpt->COND_rpl_thread_stop, &rpt->LOCK_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + } + } + } + // Now that all threads are docked, remained alter states are safe to destroy + mysql_mutex_lock(&mi->start_alter_list_lock); + info_iterator.rewind(); + while ((info= info_iterator++)) + { + info_iterator.remove(); + mysql_cond_destroy(&info->start_alter_cond); + my_free(info); + } + mi->is_shutdown= false; + mysql_mutex_unlock(&mi->start_alter_list_lock); +} + + +/* + This function handles the case where the SQL driver thread reached the + START SLAVE UNTIL position; we stop queueing more events but continue + processing remaining, already queued events; then use executes manual + STOP SLAVE; then this function signals to worker threads that they + should stop the processing of any remaining queued events. +*/ +void +rpl_parallel::stop_during_until() +{ + struct rpl_parallel_entry *e; + uint32 i; + + for (i= 0; i < domain_hash.records; ++i) + { + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); + mysql_mutex_lock(&e->LOCK_parallel_entry); + if (e->force_abort) + e->stop_sub_id= e->largest_started_sub_id; + mysql_mutex_unlock(&e->LOCK_parallel_entry); + } +} + + +bool +rpl_parallel::workers_idle() +{ + struct rpl_parallel_entry *e; + uint32 i, max_i; + + max_i= domain_hash.records; + for (i= 0; i < max_i; ++i) + { + bool active; + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); + mysql_mutex_lock(&e->LOCK_parallel_entry); + active= e->current_sub_id > e->last_committed_sub_id; + mysql_mutex_unlock(&e->LOCK_parallel_entry); + if (active) + break; + } + return (i == max_i); +} + + +int +rpl_parallel_entry::queue_master_restart(rpl_group_info *rgi, + Format_description_log_event *fdev) +{ + uint32 idx; + rpl_parallel_thread *thr; + rpl_parallel_thread::queued_event *qev; + Relay_log_info *rli= rgi->rli; + + /* + We only need to queue the server restart if we still have a thread working + on a (potentially partial) event group. + + If the last thread we queued for has finished, then it cannot have any + partial event group that needs aborting. + + Thus there is no need for the full complexity of choose_thread(). We only + need to check if we have a current worker thread, and queue for it if so. + */ + idx= rpl_thread_idx; + thr= rpl_threads[idx]; + if (!thr) + return 0; + mysql_mutex_lock(&thr->LOCK_rpl_thread); + if (thr->current_owner != &rpl_threads[idx]) + { + /* No active worker thread, so no need to queue the master restart. */ + mysql_mutex_unlock(&thr->LOCK_rpl_thread); + return 0; + } + + if (!(qev= thr->get_qev(fdev, 0, rli))) + { + mysql_mutex_unlock(&thr->LOCK_rpl_thread); + return 1; + } + + qev->rgi= rgi; + qev->typ= rpl_parallel_thread::queued_event::QUEUED_MASTER_RESTART; + qev->entry_for_queued= this; + qev->ir= rli->last_inuse_relaylog; + ++qev->ir->queued_count; + thr->enqueue(qev); + mysql_cond_signal(&thr->COND_rpl_thread); + mysql_mutex_unlock(&thr->LOCK_rpl_thread); + return 0; +} + + +int +rpl_parallel::wait_for_workers_idle(THD *thd) +{ + uint32 i, max_i; + + /* + The domain_hash is only accessed by the SQL driver thread, so it is safe + to iterate over without a lock. + */ + max_i= domain_hash.records; + for (i= 0; i < max_i; ++i) + { + PSI_stage_info old_stage; + struct rpl_parallel_entry *e; + int err= 0; + + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); + mysql_mutex_lock(&e->LOCK_parallel_entry); + ++e->need_sub_id_signal; + thd->ENTER_COND(&e->COND_parallel_entry, &e->LOCK_parallel_entry, + &stage_waiting_for_workers_idle, &old_stage); + while (e->current_sub_id > e->last_committed_sub_id) + { + if (unlikely(thd->check_killed())) + { + err= 1; + break; + } + mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); + } + --e->need_sub_id_signal; + thd->EXIT_COND(&old_stage); + if (err) + return err; + } + return 0; +} + + +/* + Handle seeing a GTID during slave restart in GTID mode. If we stopped with + different replication domains having reached different positions in the relay + log, we need to skip event groups in domains that are further progressed. + + Updates the state with the seen GTID, and returns true if this GTID should + be skipped, false otherwise. +*/ +bool +process_gtid_for_restart_pos(Relay_log_info *rli, rpl_gtid *gtid) +{ + slave_connection_state::entry *gtid_entry; + slave_connection_state *state= &rli->restart_gtid_pos; + + if (likely(state->count() == 0) || + !(gtid_entry= state->find_entry(gtid->domain_id))) + return false; + if (gtid->server_id == gtid_entry->gtid.server_id) + { + uint64 seq_no= gtid_entry->gtid.seq_no; + if (gtid->seq_no >= seq_no) + { + /* + This domain has reached its start position. So remove it, so that + further events will be processed normally. + */ + state->remove(>id_entry->gtid); + } + return gtid->seq_no <= seq_no; + } + else + return true; +} + + +/* + This is used when we get an error during processing in do_event(); + We will not queue any event to the thread, but we still need to wake it up + to be sure that it will be returned to the pool. +*/ +static void +abandon_worker_thread(THD *thd, rpl_parallel_thread *cur_thread, + bool *did_enter_cond, PSI_stage_info *old_stage) +{ + unlock_or_exit_cond(thd, &cur_thread->LOCK_rpl_thread, + did_enter_cond, old_stage); + mysql_cond_signal(&cur_thread->COND_rpl_thread); +} + + +/* + do_event() is executed by the sql_driver_thd thread. + It's main purpose is to find a thread that can execute the query. + + @retval 0 ok, event was accepted + @retval 1 error + @retval -1 event should be executed serially, in the sql driver thread +*/ + +int +rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, + ulonglong event_size) +{ + rpl_parallel_entry *e; + rpl_parallel_thread *cur_thread; + rpl_parallel_thread::queued_event *qev; + rpl_group_info *rgi= NULL; + Relay_log_info *rli= serial_rgi->rli; + enum Log_event_type typ; + bool is_group_event; + bool did_enter_cond= false; + PSI_stage_info old_stage; + + DBUG_EXECUTE_IF("slave_crash_if_parallel_apply", DBUG_SUICIDE();); + /* Handle master log name change, seen in Rotate_log_event. */ + typ= ev->get_type_code(); + if (unlikely(typ == ROTATE_EVENT)) + { + Rotate_log_event *rev= static_cast(ev); + if ((rev->server_id != global_system_variables.server_id || + rli->replicate_same_server_id) && + !rev->is_relay_log_event() && + !rli->is_in_group()) + { + memcpy(rli->future_event_master_log_name, + rev->new_log_ident, rev->ident_len+1); + rli->notify_group_master_log_name_update(); + } + } + + /* + Execute queries non-parallel if slave_skip_counter is set, as it's is + easier to skip queries in single threaded mode. + */ + if (rli->slave_skip_counter) + return -1; + + /* Execute pre-10.0 event, which have no GTID, in single-threaded mode. */ + is_group_event= Log_event::is_group_event(typ); + if (unlikely(!current) && typ != GTID_EVENT && + !(unlikely(rli->gtid_skip_flag != GTID_SKIP_NOT) && is_group_event)) + return -1; + + /* Note: rli->data_lock is released by sql_delay_event(). */ + if (sql_delay_event(ev, rli->sql_driver_thd, serial_rgi)) + { + /* + If sql_delay_event() returns non-zero, it means that the wait timed out + due to slave stop. We should not queue the event in this case, it must + not be applied yet. + */ + delete ev; + return 1; + } + + if (unlikely(typ == FORMAT_DESCRIPTION_EVENT)) + { + Format_description_log_event *fdev= + static_cast(ev); + if (fdev->created) + { + /* + This format description event marks a new binlog after a master server + restart. We are going to close all temporary tables to clean up any + possible left-overs after a prior master crash. + + Thus we need to wait for all prior events to execute to completion, + in case they need access to any of the temporary tables. + + We also need to notify the worker thread running the prior incomplete + event group (if any), as such event group signifies an incompletely + written group cut short by a master crash, and must be rolled back. + */ + if (current->queue_master_restart(serial_rgi, fdev) || + wait_for_workers_idle(rli->sql_driver_thd)) + { + delete ev; + return 1; + } + } + } + else if (unlikely(typ == GTID_LIST_EVENT)) + { + Gtid_list_log_event *glev= static_cast(ev); + rpl_gtid *list= glev->list; + uint32 count= glev->count; + rli->update_relay_log_state(list, count); + while (count) + { + process_gtid_for_restart_pos(rli, list); + ++list; + --count; + } + } + + /* + Stop queueing additional event groups once the SQL thread is requested to + stop. + + We have to queue any remaining events of any event group that has already + been partially queued, but after that we will just ignore any further + events the SQL driver thread may try to queue, and eventually it will stop. + */ + if ((typ == GTID_EVENT || !is_group_event) && rli->abort_slave) + sql_thread_stopping= true; + if (sql_thread_stopping) + { + delete ev; + /* + Return "no error"; normal stop is not an error, and otherwise the error + has already been recorded. + */ + return 0; + } + + if (unlikely(rli->gtid_skip_flag != GTID_SKIP_NOT) && is_group_event) + { + if (typ == GTID_EVENT) + rli->gtid_skip_flag= GTID_SKIP_NOT; + else + { + if (rli->gtid_skip_flag == GTID_SKIP_STANDALONE) + { + if (!Log_event::is_part_of_group(typ)) + rli->gtid_skip_flag= GTID_SKIP_NOT; + } + else + { + DBUG_ASSERT(rli->gtid_skip_flag == GTID_SKIP_TRANSACTION); + if (typ == XID_EVENT || typ == XA_PREPARE_LOG_EVENT || + (typ == QUERY_EVENT && // COMMIT/ROLLBACK are never compressed + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback()))) + rli->gtid_skip_flag= GTID_SKIP_NOT; + } + delete_or_keep_event_post_apply(serial_rgi, typ, ev); + return 0; + } + } + + Gtid_log_event *gtid_ev= NULL; + if (typ == GTID_EVENT) + { + rpl_gtid gtid; + gtid_ev= static_cast(ev); + uint32 domain_id= (rli->mi->using_gtid == Master_info::USE_GTID_NO || + rli->mi->parallel_mode <= SLAVE_PARALLEL_MINIMAL ? + 0 : gtid_ev->domain_id); + if (!(e= find(domain_id, rli))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); + delete ev; + return 1; + } + current= e; + + gtid.domain_id= gtid_ev->domain_id; + gtid.server_id= gtid_ev->server_id; + gtid.seq_no= gtid_ev->seq_no; + rli->update_relay_log_state(>id, 1); + serial_rgi->gtid_ev_flags_extra= gtid_ev->flags_extra; + if (process_gtid_for_restart_pos(rli, >id)) + { + /* + This domain has progressed further into the relay log before the last + SQL thread restart. So we need to skip this event group to not doubly + apply it. + */ + rli->gtid_skip_flag= ((gtid_ev->flags2 & Gtid_log_event::FL_STANDALONE) ? + GTID_SKIP_STANDALONE : GTID_SKIP_TRANSACTION); + delete_or_keep_event_post_apply(serial_rgi, typ, ev); + return 0; + } + } + else + e= current; + + /* + Find a worker thread to queue the event for. + Prefer a new thread, so we maximise parallelism (at least for the group + commit). But do not exceed a limit of --slave-domain-parallel-threads; + instead re-use a thread that we queued for previously. + */ + cur_thread= + e->choose_thread(serial_rgi, &did_enter_cond, &old_stage, gtid_ev); + if (!cur_thread) + { + /* This means we were killed. The error is already signalled. */ + delete ev; + return 1; + } + + if (!(qev= cur_thread->get_qev(ev, event_size, rli))) + { + abandon_worker_thread(rli->sql_driver_thd, cur_thread, + &did_enter_cond, &old_stage); + delete ev; + return 1; + } + + if (typ == GTID_EVENT) + { + bool new_gco; + enum_slave_parallel_mode mode= rli->mi->parallel_mode; + uchar gtid_flags= gtid_ev->flags2; + group_commit_orderer *gco; + uint8 force_switch_flag; + enum rpl_group_info::enum_speculation speculation; + + if (!(rgi= cur_thread->get_rgi(rli, gtid_ev, e, event_size))) + { + cur_thread->free_qev(qev); + abandon_worker_thread(rli->sql_driver_thd, cur_thread, + &did_enter_cond, &old_stage); + delete ev; + return 1; + } + + /* + We queue the event group in a new worker thread, to run in parallel + with previous groups. + + To preserve commit order within the replication domain, we set up + rgi->wait_commit_sub_id to make the new group commit only after the + previous group has committed. + + Event groups that group-committed together on the master can be run + in parallel with each other without restrictions. But one batch of + group-commits may not start before all groups in the previous batch + have initiated their commit phase; we set up rgi->gco to ensure that. + */ + rgi->wait_commit_sub_id= e->current_sub_id; + rgi->wait_commit_group_info= e->current_group_info; + + speculation= rpl_group_info::SPECULATE_NO; + new_gco= true; + force_switch_flag= 0; + gco= e->current_gco; + if (likely(gco)) + { + uint8 flags= gco->flags; + + if (mode <= SLAVE_PARALLEL_MINIMAL || + !(gtid_flags & Gtid_log_event::FL_GROUP_COMMIT_ID) || + e->last_commit_id != gtid_ev->commit_id || + /* + MULTI_BATCH is also set when the current gtid even being a member + of a commit group is flagged as DDL which disallows parallel. + */ + (gtid_flags & Gtid_log_event::FL_DDL)) + flags|= group_commit_orderer::MULTI_BATCH; + /* Make sure we do not attempt to run DDL in parallel speculatively. */ + if (gtid_flags & Gtid_log_event::FL_DDL) + flags|= (force_switch_flag= group_commit_orderer::FORCE_SWITCH); + + if (!(flags & group_commit_orderer::MULTI_BATCH)) + { + /* + Still the same batch of event groups that group-committed together + on the master, so we can run in parallel. + */ + new_gco= false; + } + else if ((mode >= SLAVE_PARALLEL_OPTIMISTIC) && + !(flags & group_commit_orderer::FORCE_SWITCH)) + { + /* + In transactional parallel mode, we optimistically attempt to run + non-DDL in parallel. In case of conflicts, we catch the conflict as + a deadlock or other error, roll back and retry serially. + + The assumption is that only a few event groups will be + non-transactional or otherwise unsuitable for parallel apply. Those + transactions are still scheduled in parallel, but we set a flag that + will make the worker thread wait for everything before to complete + before starting. + */ + new_gco= false; + if (!(gtid_flags & Gtid_log_event::FL_TRANSACTIONAL) || + ( (!(gtid_flags & Gtid_log_event::FL_ALLOW_PARALLEL) || + (gtid_flags & Gtid_log_event::FL_WAITED)) && + (mode < SLAVE_PARALLEL_AGGRESSIVE))) + { + /* + This transaction should not be speculatively run in parallel with + what came before, either because it cannot safely be rolled back in + case of a conflict, or because it was marked as likely to conflict + and require expensive rollback and retry. + + Here we mark it as such, and then the worker thread will do a + wait_for_prior_commit() before starting it. We do not introduce a + new group_commit_orderer, since we still want following transactions + to run in parallel with transactions prior to this one. + */ + speculation= rpl_group_info::SPECULATE_WAIT; + } + else + speculation= rpl_group_info::SPECULATE_OPTIMISTIC; + } + gco->flags= flags; + } + else + { + if (gtid_flags & Gtid_log_event::FL_DDL) + force_switch_flag= group_commit_orderer::FORCE_SWITCH; + } + rgi->speculation= speculation; + + if (gtid_flags & Gtid_log_event::FL_GROUP_COMMIT_ID) + e->last_commit_id= gtid_ev->commit_id; + else + e->last_commit_id= 0; + + if (new_gco) + { + /* + Do not run this event group in parallel with what came before; instead + wait for everything prior to at least have started its commit phase, to + avoid any risk of performing any conflicting action too early. + + Remember the count that marks the end of the previous batch of event + groups that run in parallel, and allocate a new gco. + */ + uint64 count= e->count_queued_event_groups; + + if (!(gco= cur_thread->get_gco(count, gco, e->current_sub_id))) + { + cur_thread->free_rgi(rgi); + cur_thread->free_qev(qev); + abandon_worker_thread(rli->sql_driver_thd, cur_thread, + &did_enter_cond, &old_stage); + delete ev; + return 1; + } + gco->flags|= force_switch_flag; + e->current_gco= gco; + } + rgi->gco= gco; + + qev->rgi= e->current_group_info= rgi; + e->current_sub_id= rgi->gtid_sub_id; + ++e->count_queued_event_groups; + } + else if (!is_group_event) + { + int err; + bool tmp; + /* + Events like ROTATE and FORMAT_DESCRIPTION. Do not run in worker thread. + Same for events not preceeded by GTID (we should not see those normally, + but they might be from an old master). + */ + qev->rgi= serial_rgi; + + tmp= serial_rgi->is_parallel_exec; + serial_rgi->is_parallel_exec= true; + err= rpt_handle_event(qev, NULL); + serial_rgi->is_parallel_exec= tmp; + if (ev->is_relay_log_event()) + qev->future_event_master_log_pos= 0; + else if (typ == ROTATE_EVENT) + qev->future_event_master_log_pos= + (static_cast(ev))->pos; + else + qev->future_event_master_log_pos= ev->log_pos; + delete_or_keep_event_post_apply(serial_rgi, typ, ev); + + if (err) + { + cur_thread->free_qev(qev); + abandon_worker_thread(rli->sql_driver_thd, cur_thread, + &did_enter_cond, &old_stage); + return 1; + } + /* + Queue a position update, so that the position will be updated in a + reasonable way relative to other events: + + - If the currently executing events are queued serially for a single + thread, the position will only be updated when everything before has + completed. + + - If we are executing multiple independent events in parallel, then at + least the position will not be updated until one of them has reached + the current point. + */ + qev->typ= rpl_parallel_thread::queued_event::QUEUED_POS_UPDATE; + qev->entry_for_queued= e; + } + else + { + qev->rgi= e->current_group_info; + } + + /* + Queue the event for processing. + */ + qev->ir= rli->last_inuse_relaylog; + ++qev->ir->queued_count; + cur_thread->enqueue(qev); + unlock_or_exit_cond(rli->sql_driver_thd, &cur_thread->LOCK_rpl_thread, + &did_enter_cond, &old_stage); + mysql_cond_signal(&cur_thread->COND_rpl_thread); + + return 0; +} diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h new file mode 100644 index 00000000..a9cfefcb --- /dev/null +++ b/sql/rpl_parallel.h @@ -0,0 +1,479 @@ +#ifndef RPL_PARALLEL_H +#define RPL_PARALLEL_H + +#include "log_event.h" + + +struct rpl_parallel; +struct rpl_parallel_entry; +struct rpl_parallel_thread_pool; +extern struct rpl_parallel_thread_pool pool_bkp_for_pfs; + +class Relay_log_info; +struct inuse_relaylog; + + +/* + Structure used to keep track of the parallel replication of a batch of + event-groups that group-committed together on the master. + + It is used to ensure that every event group in one batch has reached the + commit stage before the next batch starts executing. + + Note the lifetime of this structure: + + - It is allocated when the first event in a new batch of group commits + is queued, from the free list rpl_parallel_entry::gco_free_list. + + - The gco for the batch currently being queued is owned by + rpl_parallel_entry::current_gco. The gco for a previous batch that has + been fully queued is owned by the gco->prev_gco pointer of the gco for + the following batch. + + - The worker thread waits on gco->COND_group_commit_orderer for + rpl_parallel_entry::count_committing_event_groups to reach wait_count + before starting; the first waiter links the gco into the next_gco + pointer of the gco of the previous batch for signalling. + + - When an event group reaches the commit stage, it signals the + COND_group_commit_orderer if its gco->next_gco pointer is non-NULL and + rpl_parallel_entry::count_committing_event_groups has reached + gco->next_gco->wait_count. + + - The gco lives until all its event groups have completed their commit. + This is detected by rpl_parallel_entry::last_committed_sub_id being + greater than or equal gco->last_sub_id. Once this happens, the gco is + freed. Note that since update of last_committed_sub_id can happen + out-of-order, the thread that frees a given gco can be for any later + event group, not necessarily an event group from the gco being freed. +*/ +struct group_commit_orderer { + /* Wakeup condition, used with rpl_parallel_entry::LOCK_parallel_entry. */ + mysql_cond_t COND_group_commit_orderer; + uint64 wait_count; + group_commit_orderer *prev_gco; + group_commit_orderer *next_gco; + /* + The sub_id of last event group in the previous GCO. + Only valid if prev_gco != NULL. + */ + uint64 prior_sub_id; + /* + The sub_id of the last event group in this GCO. Only valid when next_gco + is non-NULL. + */ + uint64 last_sub_id; + /* + This flag is set when this GCO has been installed into the next_gco pointer + of the previous GCO. + */ + bool installed; + + enum force_switch_bits + { + /* + This flag is set for a GCO in which we have event groups with multiple + different commit_id values from the master. This happens when we + optimistically try to execute in parallel transactions not known to be + conflict-free. + + When this flag is set, in case of DDL we need to start a new GCO + regardless of current commit_id, as DDL is not safe to + speculatively apply in parallel with prior event groups. + */ + MULTI_BATCH= 1, + /* + This flag is set for a GCO that contains DDL. If set, it forces + a switch to a new GCO upon seeing a new commit_id, as DDL is not + safe to speculatively replicate in parallel with subsequent + transactions. + */ + FORCE_SWITCH= 2 + }; + uint8 flags; +#ifndef DBUG_OFF + /* + Flag set when the GCO has been freed and entered the free list, to catch + (in debug) errors in the complex lifetime of this object. + */ + bool gc_done; +#endif +}; + + +struct rpl_parallel_thread { + bool delay_start; + bool running; + bool stop; + bool pause_for_ftwrl; + /* + 0 = No start alter assigned + >0 = Start alter assigned + */ + uint64 current_start_alter_id; + uint32 current_start_alter_domain_id; + /* + This flag is true when Start Alter just needs to be binlogged only. + This scenario will happens when there is congestion , and we can not + allocate independent worker to start alter. + */ + bool reserved_start_alter_thread; + mysql_mutex_t LOCK_rpl_thread; + mysql_cond_t COND_rpl_thread; + mysql_cond_t COND_rpl_thread_queue; + mysql_cond_t COND_rpl_thread_stop; + struct rpl_parallel_thread *next; /* For free list. */ + struct rpl_parallel_thread_pool *pool; + THD *thd; + /* + Who owns the thread, if any (it's a pointer into the + rpl_parallel_entry::rpl_threads array. + */ + struct rpl_parallel_thread **current_owner; + /* The rpl_parallel_entry of the owner. */ + rpl_parallel_entry *current_entry; + struct queued_event { + queued_event *next; + /* + queued_event can hold either an event to be executed, or just a binlog + position to be updated without any associated event. + */ + enum queued_event_t { + QUEUED_EVENT, + QUEUED_POS_UPDATE, + QUEUED_MASTER_RESTART + } typ; + union { + Log_event *ev; /* QUEUED_EVENT */ + rpl_parallel_entry *entry_for_queued; /* QUEUED_POS_UPDATE and + QUEUED_MASTER_RESTART */ + }; + rpl_group_info *rgi; + inuse_relaylog *ir; + ulonglong future_event_relay_log_pos; + char event_relay_log_name[FN_REFLEN]; + char future_event_master_log_name[FN_REFLEN]; + ulonglong event_relay_log_pos; + my_off_t future_event_master_log_pos; + size_t event_size; + } *event_queue, *last_in_queue; + uint64 queued_size; + /* These free lists are protected by LOCK_rpl_thread. */ + queued_event *qev_free_list; + rpl_group_info *rgi_free_list; + group_commit_orderer *gco_free_list; + /* + These free lists are local to the thread, so need not be protected by any + lock. They are moved to the global free lists in batches in the function + batch_free(), to reduce LOCK_rpl_thread contention. + + The lists are not NULL-terminated (as we do not need to traverse them). + Instead, if they are non-NULL, the loc_XXX_last_ptr_ptr points to the + `next' pointer of the last element, which is used to link into the front + of the global freelists. + */ + queued_event *loc_qev_list, **loc_qev_last_ptr_ptr; + size_t loc_qev_size; + uint64 qev_free_pending; + rpl_group_info *loc_rgi_list, **loc_rgi_last_ptr_ptr; + group_commit_orderer *loc_gco_list, **loc_gco_last_ptr_ptr; + /* These keep track of batch update of inuse_relaylog refcounts. */ + inuse_relaylog *accumulated_ir_last; + uint64 accumulated_ir_count; + + char channel_name[MAX_CONNECTION_NAME]; + uint channel_name_length; + rpl_gtid last_seen_gtid; + int last_error_number; + char last_error_message[MAX_SLAVE_ERRMSG]; + ulonglong last_error_timestamp; + ulonglong worker_idle_time; + ulong last_trans_retry_count; + ulonglong start_time; + void start_time_tracker() + { + start_time= microsecond_interval_timer(); + } + ulonglong compute_time_lapsed() + { + return (ulonglong)((microsecond_interval_timer() - start_time) / 1000000.0); + } + void add_to_worker_idle_time_and_reset() + { + worker_idle_time+= compute_time_lapsed(); + start_time=0; + } + ulonglong get_worker_idle_time() + { + if (start_time) + return (worker_idle_time + compute_time_lapsed()); + else + return worker_idle_time; + } + void enqueue(queued_event *qev) + { + if (last_in_queue) + last_in_queue->next= qev; + else + event_queue= qev; + last_in_queue= qev; + queued_size+= qev->event_size; + } + + void dequeue1(queued_event *list) + { + DBUG_ASSERT(list == event_queue); + event_queue= last_in_queue= NULL; + } + + void dequeue2(size_t dequeue_size) + { + queued_size-= dequeue_size; + } + + queued_event *get_qev_common(Log_event *ev, ulonglong event_size); + queued_event *get_qev(Log_event *ev, ulonglong event_size, + Relay_log_info *rli); + queued_event *retry_get_qev(Log_event *ev, queued_event *orig_qev, + const char *relay_log_name, + ulonglong event_pos, ulonglong event_size); + /* + Put a qev on the local free list, to be later released to the global free + list by batch_free(). + */ + void loc_free_qev(queued_event *qev); + /* + Release an rgi immediately to the global free list. Requires holding the + LOCK_rpl_thread mutex. + */ + void free_qev(queued_event *qev); + rpl_group_info *get_rgi(Relay_log_info *rli, Gtid_log_event *gtid_ev, + rpl_parallel_entry *e, ulonglong event_size); + /* + Put an gco on the local free list, to be later released to the global free + list by batch_free(). + */ + void loc_free_rgi(rpl_group_info *rgi); + /* + Release an rgi immediately to the global free list. Requires holding the + LOCK_rpl_thread mutex. + */ + void free_rgi(rpl_group_info *rgi); + group_commit_orderer *get_gco(uint64 wait_count, group_commit_orderer *prev, + uint64 first_sub_id); + /* + Put a gco on the local free list, to be later released to the global free + list by batch_free(). + */ + void loc_free_gco(group_commit_orderer *gco); + /* + Move all local free lists to the global ones. Requires holding + LOCK_rpl_thread. + */ + void batch_free(); + /* Update inuse_relaylog refcounts with what we have accumulated so far. */ + void inuse_relaylog_refcount_update(); + rpl_parallel_thread(); +}; + + +struct pool_bkp_for_pfs{ + uint32 count; + bool inited, is_valid; + struct rpl_parallel_thread **rpl_thread_arr; + void init(uint32 thd_count) + { + DBUG_ASSERT(thd_count); + rpl_thread_arr= (rpl_parallel_thread **) + my_malloc(PSI_INSTRUMENT_ME, + thd_count * sizeof(rpl_parallel_thread*), + MYF(MY_WME | MY_ZEROFILL)); + for (uint i=0; i last_committed_sub_id. This can be used to + safely refer back to previous event groups if they are still executing, + and ignore them if they completed, without requiring explicit + synchronisation between the threads. + */ + uint64 last_committed_sub_id; + /* + The sub_id of the last event group in this replication domain that was + queued for execution by a worker thread. + */ + uint64 current_sub_id; + /* + The largest sub_id that has started its transaction. Protected by + LOCK_parallel_entry. + + (Transactions can start out-of-order, so this value signifies that no + transactions with larger sub_id have started, but not necessarily that all + transactions with smaller sub_id have started). + */ + uint64 largest_started_sub_id; + rpl_group_info *current_group_info; + /* + If we get an error in some event group, we set the sub_id of that event + group here. Then later event groups (with higher sub_id) can know not to + try to start (event groups that already started will be rolled back when + wait_for_prior_commit() returns error). + The value is ULONGLONG_MAX when no error occurred. + */ + uint64 stop_on_error_sub_id; + /* + During FLUSH TABLES WITH READ LOCK, transactions with sub_id larger than + this value must not start, but wait until the global read lock is released. + The value is set to ULONGLONG_MAX when no FTWRL is pending. + */ + uint64 pause_sub_id; + /* Total count of event groups queued so far. */ + uint64 count_queued_event_groups; + /* + Count of event groups that have started (but not necessarily completed) + the commit phase. We use this to know when every event group in a previous + batch of master group commits have started committing on the slave, so + that it is safe to start executing the events in the following batch. + */ + uint64 count_committing_event_groups; + /* The group_commit_orderer object for the events currently being queued. */ + group_commit_orderer *current_gco; + /* Relay log info of replication source for this entry. */ + Relay_log_info *rli; + + rpl_parallel_thread * choose_thread(rpl_group_info *rgi, bool *did_enter_cond, + PSI_stage_info *old_stage, + Gtid_log_event *gtid_ev); + rpl_parallel_thread * + choose_thread_internal(uint idx, bool *did_enter_cond, rpl_group_info *rgi, + PSI_stage_info *old_stage); + int queue_master_restart(rpl_group_info *rgi, + Format_description_log_event *fdev); +}; +struct rpl_parallel { + HASH domain_hash; + rpl_parallel_entry *current; + bool sql_thread_stopping; + + rpl_parallel(); + ~rpl_parallel(); + void reset(); + rpl_parallel_entry *find(uint32 domain_id, Relay_log_info *rli); + void wait_for_done(THD *thd, Relay_log_info *rli); + void stop_during_until(); + bool workers_idle(); + int wait_for_workers_idle(THD *thd); + int do_event(rpl_group_info *serial_rgi, Log_event *ev, ulonglong event_size); +}; + + +extern struct rpl_parallel_thread_pool global_rpl_thread_pool; + + +extern int rpl_parallel_resize_pool_if_no_slaves(void); +extern int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool); +extern int rpl_parallel_inactivate_pool(rpl_parallel_thread_pool *pool); +extern bool process_gtid_for_restart_pos(Relay_log_info *rli, rpl_gtid *gtid); +extern int rpl_pause_for_ftwrl(THD *thd); +extern void rpl_unpause_after_ftwrl(THD *thd); + +#endif /* RPL_PARALLEL_H */ diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc new file mode 100644 index 00000000..6266acf7 --- /dev/null +++ b/sql/rpl_record.cc @@ -0,0 +1,511 @@ +/* Copyright (c) 2007, 2013, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "rpl_rli.h" +#include "rpl_record.h" +#include "slave.h" // Need to pull in slave_print_msg +#include "rpl_utility.h" +#include "rpl_rli.h" + +/** + Pack a record of data for a table into a format suitable for + transfer via the binary log. + + The format for a row in transfer with N fields is the following: + + ceil(N/8) null bytes: + One null bit for every column *regardless of whether it can be + null or not*. This simplifies the decoding. Observe that the + number of null bits is equal to the number of set bits in the + @c cols bitmap. The number of null bytes is the smallest number + of bytes necessary to store the null bits. + + Padding bits are 1. + + N packets: + Each field is stored in packed format. + + + @param table Table describing the format of the record + + @param cols Bitmap with a set bit for each column that should + be stored in the row + + @param row_data Pointer to memory where row will be written + + @param record Pointer to record that should be packed. It is + assumed that the pointer refers to either @c + record[0] or @c record[1], but no such check is + made since the code does not rely on that. + + @return The number of bytes written at @c row_data. + */ +#if !defined(MYSQL_CLIENT) +size_t +pack_row(TABLE *table, MY_BITMAP const* cols, + uchar *row_data, const uchar *record) +{ + Field **p_field= table->field, *field; + int const null_byte_count= (bitmap_bits_set(cols) + 7) / 8; + uchar *pack_ptr = row_data + null_byte_count; + uchar *null_ptr = row_data; + my_ptrdiff_t const rec_offset= record - table->record[0]; + my_ptrdiff_t const def_offset= table->s->default_values - table->record[0]; + + DBUG_ENTER("pack_row"); + + /* + We write the null bits and the packed records using one pass + through all the fields. The null bytes are written little-endian, + i.e., the first fields are in the first byte. + */ + unsigned int null_bits= (1U << 8) - 1; + // Mask to mask out the correct but among the null bits + unsigned int null_mask= 1U; + for ( ; (field= *p_field) ; p_field++) + { + if (bitmap_is_set(cols, (uint)(p_field - table->field))) + { + my_ptrdiff_t offset; + if (field->is_null(rec_offset)) + { + offset= def_offset; + null_bits |= null_mask; + } + else + { + offset= rec_offset; + null_bits &= ~null_mask; + + /* + We only store the data of the field if it is non-null + + For big-endian machines, we have to make sure that the + length is stored in little-endian format, since this is the + format used for the binlog. + */ +#ifndef DBUG_OFF + const uchar *old_pack_ptr= pack_ptr; +#endif + pack_ptr= field->pack(pack_ptr, field->ptr + offset, + field->max_data_length()); + DBUG_PRINT("debug", ("field: %s; real_type: %d, pack_ptr: %p;" + " pack_ptr':%p; bytes: %d", + field->field_name.str, field->real_type(), + old_pack_ptr,pack_ptr, + (int) (pack_ptr - old_pack_ptr))); + DBUG_DUMP("packed_data", old_pack_ptr, pack_ptr - old_pack_ptr); + } + + null_mask <<= 1; + if ((null_mask & 0xFF) == 0) + { + DBUG_ASSERT(null_ptr < row_data + null_byte_count); + null_mask = 1U; + *null_ptr++ = null_bits; + null_bits= (1U << 8) - 1; + } + } + } + + /* + Write the last (partial) byte, if there is one + */ + if ((null_mask & 0xFF) > 1) + { + DBUG_ASSERT(null_ptr < row_data + null_byte_count); + *null_ptr++ = null_bits; + } + + /* + The null pointer should now point to the first byte of the + packed data. If it doesn't, something is very wrong. + */ + DBUG_ASSERT(null_ptr == row_data + null_byte_count); + DBUG_DUMP("row_data", row_data, pack_ptr - row_data); + DBUG_RETURN(static_cast(pack_ptr - row_data)); +} +#endif + + +/** + Unpack a row into @c table->record[0]. + + The function will always unpack into the @c table->record[0] + record. This is because there are too many dependencies on where + the various member functions of Field and subclasses expect to + write. + + The row is assumed to only consist of the fields for which the + corresponding bit in bitset @c cols is set; the other parts of the + record are left alone. + + At most @c colcnt columns are read: if the table is larger than + that, the remaining fields are not filled in. + + @note The relay log information can be NULL, which means that no + checking or comparison with the source table is done, simply + because it is not used. This feature is used by MySQL Backup to + unpack a row from from the backup image, but can be used for other + purposes as well. + + @param rli Relay log info, which can be NULL + @param table Table to unpack into + @param colcnt Number of columns to read from record + @param row_data + Packed row data + @param cols Pointer to bitset describing columns to fill in + @param curr_row_end + Pointer to variable that will hold the value of the + one-after-end position for the current row + @param master_reclength + Pointer to variable that will be set to the length of the + record on the master side + @param row_end + Pointer to variable that will hold the value of the + end position for the data in the row event + + @retval 0 No error + + @retval HA_ERR_GENERIC + A generic, internal, error caused the unpacking to fail. + @retval HA_ERR_CORRUPT_EVENT + Found error when trying to unpack fields. + */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int +unpack_row(rpl_group_info *rgi, + TABLE *table, uint const colcnt, + uchar const *const row_data, MY_BITMAP const *cols, + uchar const **const current_row_end, ulong *const master_reclength, + uchar const *const row_end) +{ + int error; + DBUG_ENTER("unpack_row"); + DBUG_ASSERT(row_data); + DBUG_ASSERT(table); + size_t const master_null_byte_count= (bitmap_bits_set(cols) + 7) / 8; + + uchar const *null_ptr= row_data; + uchar const *pack_ptr= row_data + master_null_byte_count; + + Field **const begin_ptr = table->field; + Field **field_ptr; + Field **const end_ptr= begin_ptr + colcnt; + + if (bitmap_is_clear_all(cols)) + { + /** + There was no data sent from the master, so there is + nothing to unpack. + */ + *current_row_end= pack_ptr; + *master_reclength= 0; + DBUG_RETURN(0); + } + DBUG_ASSERT(null_ptr < row_data + master_null_byte_count); + + // Mask to mask out the correct bit among the null bits + unsigned int null_mask= 1U; + // The "current" null bits + unsigned int null_bits= *null_ptr++; + uint i= 0; + table_def *tabledef= NULL; + TABLE *conv_table= NULL; + bool table_found= rgi && rgi->get_table_data(table, &tabledef, &conv_table); + DBUG_PRINT("debug", ("Table data: table_found: %d, tabldef: %p, conv_table: %p", + table_found, tabledef, conv_table)); + DBUG_ASSERT(table_found); + + /* + If rgi is NULL it means that there is no source table and that the + row shall just be unpacked without doing any checks. This feature + is used by MySQL Backup, but can be used for other purposes as + well. + */ + if (rgi && !table_found) + DBUG_RETURN(HA_ERR_GENERIC); + + for (field_ptr= begin_ptr ; field_ptr < end_ptr && *field_ptr ; ++field_ptr) + { + /* + If there is a conversion table, we pick up the field pointer to + the conversion table. If the conversion table or the field + pointer is NULL, no conversions are necessary. + */ + Field *conv_field= + conv_table ? conv_table->field[field_ptr - begin_ptr] : NULL; + Field *const f= + conv_field ? conv_field : *field_ptr; + DBUG_PRINT("debug", ("Conversion %srequired for field '%s' (#%ld)", + conv_field ? "" : "not ", + (*field_ptr)->field_name.str, + (long) (field_ptr - begin_ptr))); + DBUG_ASSERT(f != NULL); + + /* + No need to bother about columns that does not exist: they have + gotten default values when being emptied above. + */ + if (bitmap_is_set(cols, (uint)(field_ptr - begin_ptr))) + { + if ((null_mask & 0xFF) == 0) + { + DBUG_ASSERT(null_ptr < row_data + master_null_byte_count); + null_mask= 1U; + null_bits= *null_ptr++; + } + + DBUG_ASSERT(null_mask & 0xFF); // One of the 8 LSB should be set + + if (null_bits & null_mask) + { + if (f->maybe_null()) + { + DBUG_PRINT("debug", ("Was NULL; null mask: 0x%x; null bits: 0x%x", + null_mask, null_bits)); + /** + Calling reset just in case one is unpacking on top a + record with data. + + This could probably go into set_null() but doing so, + (i) triggers assertion in other parts of the code at + the moment; (ii) it would make us reset the field, + always when setting null, which right now doesn't seem + needed anywhere else except here. + + TODO: maybe in the future we should consider moving + the reset to make it part of set_null. But then + the assertions triggered need to be + addressed/revisited. + */ + f->reset(); + f->set_null(); + } + else + { + THD *thd= f->table->in_use; + + f->set_default(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_BAD_NULL_ERROR, + ER_THD(thd, ER_BAD_NULL_ERROR), + f->field_name.str); + } + } + else + { + f->set_notnull(); + + /* + We only unpack the field if it was non-null. + Use the master's size information if available else call + normal unpack operation. + */ + uint16 const metadata= tabledef->field_metadata(i); +#ifdef DBUG_TRACE + uchar const *const old_pack_ptr= pack_ptr; +#endif + + pack_ptr= f->unpack(f->ptr, pack_ptr, row_end, metadata); + DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;" + " pack_ptr: %p; pack_ptr': %p; bytes: %d", + f->field_name.str, metadata, + old_pack_ptr, pack_ptr, + (int) (pack_ptr - old_pack_ptr))); + if (!pack_ptr) + { + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, + rgi->gtid_info(), + "Could not read field '%s' of table '%s.%s'", + f->field_name.str, table->s->db.str, + table->s->table_name.str); + DBUG_RETURN(HA_ERR_CORRUPT_EVENT); + } + } + + /* + If conv_field is set, then we are doing a conversion. In this + case, we have unpacked the master data to the conversion + table, so we need to copy the value stored in the conversion + table into the final table and do the conversion at the same time. + */ + if (conv_field) + { + Copy_field copy; +#ifndef DBUG_OFF + char source_buf[MAX_FIELD_WIDTH]; + char value_buf[MAX_FIELD_WIDTH]; + String source_type(source_buf, sizeof(source_buf), system_charset_info); + String value_string(value_buf, sizeof(value_buf), system_charset_info); + conv_field->sql_type(source_type); + conv_field->val_str(&value_string); + DBUG_PRINT("debug", ("Copying field '%s' of type '%s' with value '%s'", + (*field_ptr)->field_name.str, + source_type.c_ptr_safe(), value_string.c_ptr_safe())); +#endif + copy.set(*field_ptr, f, TRUE); + (*copy.do_copy)(©); +#ifndef DBUG_OFF + char target_buf[MAX_FIELD_WIDTH]; + String target_type(target_buf, sizeof(target_buf), system_charset_info); + (*field_ptr)->sql_type(target_type); + (*field_ptr)->val_str(&value_string); + DBUG_PRINT("debug", ("Value of field '%s' of type '%s' is now '%s'", + (*field_ptr)->field_name.str, + target_type.c_ptr_safe(), value_string.c_ptr_safe())); +#endif + } + + null_mask <<= 1; + } + i++; + } + + /* + throw away master's extra fields + */ + uint max_cols= MY_MIN(tabledef->size(), cols->n_bits); + for (; i < max_cols; i++) + { + if (bitmap_is_set(cols, i)) + { + if ((null_mask & 0xFF) == 0) + { + DBUG_ASSERT(null_ptr < row_data + master_null_byte_count); + null_mask= 1U; + null_bits= *null_ptr++; + } + DBUG_ASSERT(null_mask & 0xFF); // One of the 8 LSB should be set + + if (!((null_bits & null_mask) && tabledef->maybe_null(i))) { + uint32 len= tabledef->calc_field_size(i, (uchar *) pack_ptr); + DBUG_DUMP("field_data", pack_ptr, len); + pack_ptr+= len; + } + null_mask <<= 1; + } + } + + /* + Add Extra slave persistent columns + */ + if (unlikely(error= fill_extra_persistent_columns(table, cols->n_bits))) + DBUG_RETURN(error); + + /* + We should now have read all the null bytes, otherwise something is + really wrong. + */ + DBUG_ASSERT(null_ptr == row_data + master_null_byte_count); + + DBUG_DUMP("row_data", row_data, pack_ptr - row_data); + + *current_row_end = pack_ptr; + if (master_reclength) + { + if (*field_ptr) + *master_reclength = (ulong)((*field_ptr)->ptr - table->record[0]); + else + *master_reclength = table->s->reclength; + } + + DBUG_RETURN(0); +} + +/** + Fills @c table->record[0] with default values. + + First @c restore_record() is called to restore the default values for + record concerning the given table. Then, if @c check is true, + a check is performed to see if fields are have default value or can + be NULL. Otherwise error is reported. + + @param table Table whose record[0] buffer is prepared. + @param skip Number of columns for which default/nullable check + should be skipped. + @param check Specifies if lack of default error needs checking. + + @returns 0 on success or a handler level error code + */ +int prepare_record(TABLE *const table, const uint skip, const bool check) +{ + DBUG_ENTER("prepare_record"); + + restore_record(table, s->default_values); + + /* + This skip should be revisited in 6.0, because in 6.0 RBR one + can have holes in the row (as the grain of the writeset is + the column and not the entire row). + */ + if (skip >= table->s->fields || !check) + DBUG_RETURN(0); + + /* + For fields the extra fields on the slave, we check if they have a default. + The check follows the same rules as the INSERT query without specifying an + explicit value for a field not having the explicit default + (@c check_that_all_fields_are_given_values()). + */ + for (Field **field_ptr= table->field+skip; *field_ptr; ++field_ptr) + { + Field *const f= *field_ptr; + if ((f->flags & NO_DEFAULT_VALUE_FLAG) && + (f->real_type() != MYSQL_TYPE_ENUM)) + { + THD *thd= f->table->in_use; + f->set_default(); + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_NO_DEFAULT_FOR_FIELD, + ER_THD(thd, ER_NO_DEFAULT_FOR_FIELD), + f->field_name.str); + } + } + + DBUG_RETURN(0); +} +/** + Fills @c table->record[0] with computed values of extra persistent column + which are present on slave but not on master. + + @param table Table whose record[0] buffer is prepared. + @param master_cols No of columns on master + @returns 0 on success + */ +int fill_extra_persistent_columns(TABLE *table, int master_cols) +{ + int error= 0; + Field **vfield_ptr, *vfield; + + if (!table->vfield) + return 0; + for (vfield_ptr= table->vfield; *vfield_ptr; ++vfield_ptr) + { + vfield= *vfield_ptr; + if (vfield->field_index >= master_cols && vfield->stored_in_db()) + { + bitmap_set_bit(table->write_set, vfield->field_index); + error= vfield->vcol_info->expr->save_in_field(vfield,0); + } + } + return error; +} +#endif // HAVE_REPLICATION diff --git a/sql/rpl_record.h b/sql/rpl_record.h new file mode 100644 index 00000000..357dc761 --- /dev/null +++ b/sql/rpl_record.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2007, 2013, Oracle and/or its affiliates. + Copyright (c) 2008, 2013, SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_RECORD_H +#define RPL_RECORD_H + +#include + +struct rpl_group_info; +struct TABLE; +typedef struct st_bitmap MY_BITMAP; + +#if !defined(MYSQL_CLIENT) +size_t pack_row(TABLE* table, MY_BITMAP const* cols, + uchar *row_data, const uchar *data); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int unpack_row(rpl_group_info *rgi, + TABLE *table, uint const colcnt, + uchar const *const row_data, MY_BITMAP const *cols, + uchar const **const curr_row_end, ulong *const master_reclength, + uchar const *const row_end); + +// Fill table's record[0] with default values. +int prepare_record(TABLE *const table, const uint skip, const bool check); +int fill_extra_persistent_columns(TABLE *table, int master_cols); +#endif + +#endif diff --git a/sql/rpl_record_old.cc b/sql/rpl_record_old.cc new file mode 100644 index 00000000..496e781d --- /dev/null +++ b/sql/rpl_record_old.cc @@ -0,0 +1,199 @@ +/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "rpl_rli.h" +#include "rpl_record_old.h" +#include "log_event.h" // Log_event_type + +size_t +pack_row_old(TABLE *table, MY_BITMAP const* cols, + uchar *row_data, const uchar *record) +{ + Field **p_field= table->field, *field; + int n_null_bytes= table->s->null_bytes; + uchar *ptr; + uint i; + my_ptrdiff_t const rec_offset= record - table->record[0]; + my_ptrdiff_t const def_offset= table->s->default_values - table->record[0]; + memcpy(row_data, record, n_null_bytes); + ptr= row_data+n_null_bytes; + + for (i= 0 ; (field= *p_field) ; i++, p_field++) + { + if (bitmap_is_set(cols,i)) + { + my_ptrdiff_t const offset= + field->is_null(rec_offset) ? def_offset : rec_offset; + field->move_field_offset(offset); + ptr= field->pack(ptr, field->ptr); + field->move_field_offset(-offset); + } + } + return (static_cast(ptr - row_data)); +} + + +/* + Unpack a row into a record. + + SYNOPSIS + unpack_row() + rli Relay log info + table Table to unpack into + colcnt Number of columns to read from record + record Record where the data should be unpacked + row Packed row data + cols Pointer to columns data to fill in + row_end Pointer to variable that will hold the value of the + one-after-end position for the row + master_reclength + Pointer to variable that will be set to the length of the + record on the master side + rw_set Pointer to bitmap that holds either the read_set or the + write_set of the table + + DESCRIPTION + + The row is assumed to only consist of the fields for which the + bitset represented by 'arr' and 'bits'; the other parts of the + record are left alone. + + At most 'colcnt' columns are read: if the table is larger than + that, the remaining fields are not filled in. + + RETURN VALUE + + Error code, or zero if no error. The following error codes can + be returned: + + ER_NO_DEFAULT_FOR_FIELD + Returned if one of the fields existing on the slave but not on + the master does not have a default value (and isn't nullable) + ER_SLAVE_CORRUPT_EVENT + Wrong data for field found. + */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int +unpack_row_old(rpl_group_info *rgi, + TABLE *table, uint const colcnt, uchar *record, + uchar const *row, const uchar *row_buffer_end, + MY_BITMAP const *cols, + uchar const **row_end, ulong *master_reclength, + MY_BITMAP* const rw_set, Log_event_type const event_type) +{ + DBUG_ASSERT(record && row); + my_ptrdiff_t const offset= record - (uchar*) table->record[0]; + size_t master_null_bytes= table->s->null_bytes; + + if (colcnt != table->s->fields) + { + Field **fptr= &table->field[colcnt-1]; + do + master_null_bytes= (*fptr)->last_null_byte(); + while (master_null_bytes == Field::LAST_NULL_BYTE_UNDEF && + fptr-- > table->field); + + /* + If master_null_bytes is LAST_NULL_BYTE_UNDEF (0) at this time, + there were no nullable fields nor BIT fields at all in the + columns that are common to the master and the slave. In that + case, there is only one null byte holding the X bit. + + OBSERVE! There might still be nullable columns following the + common columns, so table->s->null_bytes might be greater than 1. + */ + if (master_null_bytes == Field::LAST_NULL_BYTE_UNDEF) + master_null_bytes= 1; + } + + DBUG_ASSERT(master_null_bytes <= table->s->null_bytes); + memcpy(record, row, master_null_bytes); // [1] + int error= 0; + + bitmap_set_all(rw_set); + + Field **const begin_ptr = table->field; + Field **field_ptr; + uchar const *ptr= row + master_null_bytes; + Field **const end_ptr= begin_ptr + colcnt; + for (field_ptr= begin_ptr ; field_ptr < end_ptr ; ++field_ptr) + { + Field *const f= *field_ptr; + + if (bitmap_is_set(cols, (uint)(field_ptr - begin_ptr))) + { + f->move_field_offset(offset); + ptr= f->unpack(f->ptr, ptr, row_buffer_end, 0); + f->move_field_offset(-offset); + if (!ptr) + { + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, NULL, + "Could not read field `%s` of table `%s`.`%s`", + f->field_name.str, table->s->db.str, + table->s->table_name.str); + return(ER_SLAVE_CORRUPT_EVENT); + } + } + else + bitmap_clear_bit(rw_set, (uint)(field_ptr - begin_ptr)); + } + + *row_end = ptr; + if (master_reclength) + { + if (*field_ptr) + *master_reclength = (ulong)((*field_ptr)->ptr - table->record[0]); + else + *master_reclength = table->s->reclength; + } + + /* + Set properties for remaining columns, if there are any. We let the + corresponding bit in the write_set be set, to write the value if + it was not there already. We iterate over all remaining columns, + even if there were an error, to get as many error messages as + possible. We are still able to return a pointer to the next row, + so redo that. + + This generation of error messages is only relevant when inserting + new rows. + */ + for ( ; *field_ptr ; ++field_ptr) + { + uint32 const mask= NOT_NULL_FLAG | NO_DEFAULT_VALUE_FLAG; + + DBUG_PRINT("debug", ("flags = 0x%x, mask = 0x%x, flags & mask = 0x%x", + (*field_ptr)->flags, mask, + (*field_ptr)->flags & mask)); + + if (event_type == WRITE_ROWS_EVENT && + ((*field_ptr)->flags & mask) == mask) + { + rgi->rli->report(ERROR_LEVEL, ER_NO_DEFAULT_FOR_FIELD, NULL, + "Field `%s` of table `%s`.`%s` " + "has no default value and cannot be NULL", + (*field_ptr)->field_name.str, table->s->db.str, + table->s->table_name.str); + error = ER_NO_DEFAULT_FOR_FIELD; + } + else + (*field_ptr)->set_default(); + } + + return error; +} +#endif diff --git a/sql/rpl_record_old.h b/sql/rpl_record_old.h new file mode 100644 index 00000000..0b2dd432 --- /dev/null +++ b/sql/rpl_record_old.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_RECORD_OLD_H +#define RPL_RECORD_OLD_H + +#include "log_event.h" /* Log_event_type */ + +#ifndef MYSQL_CLIENT +size_t pack_row_old(TABLE *table, MY_BITMAP const* cols, + uchar *row_data, const uchar *record); + +#ifdef HAVE_REPLICATION +int unpack_row_old(rpl_group_info *rgi, + TABLE *table, uint const colcnt, uchar *record, + uchar const *row, uchar const *row_buffer_end, + MY_BITMAP const *cols, + uchar const **row_end, ulong *master_reclength, + MY_BITMAP* const rw_set, + Log_event_type const event_type); +#endif +#endif +#endif diff --git a/sql/rpl_reporting.cc b/sql/rpl_reporting.cc new file mode 100644 index 00000000..d04f18c9 --- /dev/null +++ b/sql/rpl_reporting.cc @@ -0,0 +1,87 @@ + +/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "rpl_reporting.h" +#include "log.h" // sql_print_error, sql_print_warning, + // sql_print_information +#include "sql_class.h" + +Slave_reporting_capability::Slave_reporting_capability(char const *thread_name) + : err_thread_id(0), m_thread_name(thread_name) +{ + mysql_mutex_init(key_mutex_slave_reporting_capability_err_lock, + &err_lock, MY_MUTEX_INIT_FAST); +} + +void +Slave_reporting_capability::report(loglevel level, int err_code, + const char *extra_info, + const char *msg, ...) const +{ + void (*report_function)(const char *, ...); + char buff[MAX_SLAVE_ERRMSG]; + char *pbuff= buff; + uint pbuffsize= sizeof(buff); + va_list args; + va_start(args, msg); + + mysql_mutex_lock(&err_lock); + switch (level) + { + case ERROR_LEVEL: + /* + It's an error, it must be reported in Last_error and Last_errno in SHOW + SLAVE STATUS. + */ + pbuff= m_last_error.message; + pbuffsize= sizeof(m_last_error.message); + m_last_error.number = err_code; + m_last_error.update_timestamp(); + report_function= sql_print_error; + break; + case WARNING_LEVEL: + report_function= sql_print_warning; + break; + case INFORMATION_LEVEL: + report_function= sql_print_information; + break; + default: + va_end(args); + DBUG_ASSERT(0); // should not come here + return; // don't crash production builds, just do nothing + } + + my_vsnprintf(pbuff, pbuffsize, msg, args); + + mysql_mutex_unlock(&err_lock); + va_end(args); + err_thread_id= current_thd->thread_id; + + /* If the msg string ends with '.', do not add a ',' it would be ugly */ + report_function("%s %s: %s%s %s%sInternal MariaDB error code: %d", + (current_thd && current_thd->rgi_fake) ? "" : "Slave", + m_thread_name, pbuff, + (pbuff[0] && *(strend(pbuff)-1) == '.') ? "" : ",", + (extra_info ? extra_info : ""), (extra_info ? ", " : ""), + err_code); +} + +Slave_reporting_capability::~Slave_reporting_capability() +{ + mysql_mutex_destroy(&err_lock); +} diff --git a/sql/rpl_reporting.h b/sql/rpl_reporting.h new file mode 100644 index 00000000..8c10f3f0 --- /dev/null +++ b/sql/rpl_reporting.h @@ -0,0 +1,133 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_REPORTING_H +#define RPL_REPORTING_H + +#include /* loglevel */ + +/** + Maximum size of an error message from a slave thread. + */ +#define MAX_SLAVE_ERRMSG 1024 + +/** + Mix-in to handle the message logging and reporting for relay log + info and master log info structures. + + By inheriting from this class, the class is imbued with + capabilities to do slave reporting. + */ +class Slave_reporting_capability +{ +public: + /** lock used to synchronize m_last_error on 'SHOW SLAVE STATUS' **/ + mutable mysql_mutex_t err_lock; + /** + Constructor. + + @param thread_name Printable name of the slave thread that is reporting. + */ + Slave_reporting_capability(char const *thread_name); + mutable my_thread_id err_thread_id; + + /** + Writes a message and, if it's an error message, to Last_Error + (which will be displayed by SHOW SLAVE STATUS). + + @param level The severity level + @param err_code The error code + @param msg The message (usually related to the error + code, but can contain more information), in + printf() format. + */ + void report(loglevel level, int err_code, const char *extra_info, + const char *msg, ...) const + ATTRIBUTE_FORMAT(printf, 5, 6); + + /** + Clear errors. They will not show up under SHOW SLAVE + STATUS. + */ + void clear_error() { + mysql_mutex_lock(&err_lock); + m_last_error.clear(); + mysql_mutex_unlock(&err_lock); + } + + /** + Error information structure. + */ + class Error { + friend class Slave_reporting_capability; + public: + Error() + { + clear(); + } + + void clear() + { + number= 0; + message[0]= '\0'; + timestamp[0]= '\0'; + } + void update_timestamp() + { + struct tm tm_tmp; + struct tm *start; + + skr= my_time(0); + localtime_r(&skr, &tm_tmp); + start=&tm_tmp; + + snprintf(timestamp, sizeof(timestamp), "%02d%02d%02d %02d:%02d:%02d", + start->tm_year % 100, + start->tm_mon+1, + start->tm_mday, + start->tm_hour, + start->tm_min, + start->tm_sec); + timestamp[15]= '\0'; + } + + /** Error code */ + uint32 number; + /** Error message */ + char message[MAX_SLAVE_ERRMSG]; + /** Error timestamp as string */ + char timestamp[64]; + /** Error timestamp as time_t variable. Used in performance_schema */ + time_t skr; + }; + + Error const& last_error() const { return m_last_error; } + + virtual ~Slave_reporting_capability()= 0; +private: + /** + Last error produced by the I/O or SQL thread respectively. + */ + mutable Error m_last_error; + + char const *const m_thread_name; + + // not implemented + Slave_reporting_capability(const Slave_reporting_capability& rhs); + Slave_reporting_capability& operator=(const Slave_reporting_capability& rhs); +}; + +#endif // RPL_REPORTING_H + diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc new file mode 100644 index 00000000..68f42494 --- /dev/null +++ b/sql/rpl_rli.cc @@ -0,0 +1,2642 @@ +/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" // HAVE_* +#include "rpl_mi.h" +#include "rpl_rli.h" +#include "sql_base.h" // close_thread_tables +#include // For MY_STAT +#include "sql_repl.h" // For check_binlog_magic +#include "log_event.h" // Format_description_log_event, Log_event, + // FORMAT_DESCRIPTION_LOG_EVENT, ROTATE_EVENT, + // PREFIX_SQL_LOAD +#include "rpl_utility.h" +#include "transaction.h" +#include "sql_parse.h" // end_trans, ROLLBACK +#include "slave.h" +#include +#include +#include "lock.h" +#include "sql_table.h" + +static int count_relay_log_space(Relay_log_info* rli); +bool xa_trans_force_rollback(THD *thd); +/** + Current replication state (hash of last GTID executed, per replication + domain). +*/ +rpl_slave_state *rpl_global_gtid_slave_state; +/* Object used for MASTER_GTID_WAIT(). */ +gtid_waiting rpl_global_gtid_waiting; + +const char *const Relay_log_info::state_delaying_string = "Waiting until MASTER_DELAY seconds after master executed event"; + +Relay_log_info::Relay_log_info(bool is_slave_recovery, const char* thread_name) + :Slave_reporting_capability(thread_name), + replicate_same_server_id(::replicate_same_server_id), + info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period), + sync_counter(0), is_relay_log_recovery(is_slave_recovery), + save_temporary_tables(0), + mi(0), inuse_relaylog_list(0), last_inuse_relaylog(0), + cur_log_old_open_count(0), error_on_rli_init_info(false), + group_relay_log_pos(0), event_relay_log_pos(0), + group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0), + last_master_timestamp(0), sql_thread_caught_up(true), slave_skip_counter(0), + abort_pos_wait(0), slave_run_id(0), sql_driver_thd(), + gtid_skip_flag(GTID_SKIP_NOT), inited(0), abort_slave(0), stop_for_until(0), + slave_running(MYSQL_SLAVE_NOT_RUN), until_condition(UNTIL_NONE), + until_log_pos(0), retried_trans(0), executed_entries(0), + last_trans_retry_count(0), sql_delay(0), sql_delay_end(0), + until_relay_log_names_defer(false), + m_flags(0) +{ + DBUG_ENTER("Relay_log_info::Relay_log_info"); + + relay_log.is_relay_log= TRUE; + relay_log_state.init(); +#ifdef HAVE_PSI_INTERFACE + relay_log.set_psi_keys(key_RELAYLOG_LOCK_index, + key_RELAYLOG_COND_relay_log_updated, + key_RELAYLOG_COND_bin_log_updated, + key_file_relaylog, + key_file_relaylog_cache, + key_file_relaylog_index, + key_file_relaylog_index_cache, + key_RELAYLOG_COND_queue_busy, + key_LOCK_relaylog_end_pos); +#endif + + group_relay_log_name[0]= event_relay_log_name[0]= + group_master_log_name[0]= 0; + until_log_name[0]= ign_master_log_name_end[0]= 0; + max_relay_log_size= global_system_variables.max_relay_log_size; + bzero((char*) &info_file, sizeof(info_file)); + bzero((char*) &cache_buf, sizeof(cache_buf)); + bzero(&last_seen_gtid, sizeof(last_seen_gtid)); + mysql_mutex_init(key_relay_log_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_relay_log_info_data_lock, + &data_lock, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_relay_log_info_log_space_lock, + &log_space_lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_relay_log_info_data_cond, &data_cond, NULL); + mysql_cond_init(key_relay_log_info_start_cond, &start_cond, NULL); + mysql_cond_init(key_relay_log_info_stop_cond, &stop_cond, NULL); + mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL); + relay_log.init_pthread_objects(); + DBUG_VOID_RETURN; +} + + +Relay_log_info::~Relay_log_info() +{ + DBUG_ENTER("Relay_log_info::~Relay_log_info"); + + reset_inuse_relaylog(); + mysql_mutex_destroy(&run_lock); + mysql_mutex_destroy(&data_lock); + mysql_mutex_destroy(&log_space_lock); + mysql_cond_destroy(&data_cond); + mysql_cond_destroy(&start_cond); + mysql_cond_destroy(&stop_cond); + mysql_cond_destroy(&log_space_cond); + relay_log.cleanup(); + DBUG_VOID_RETURN; +} + + +/** + Read the relay_log.info file. + + @param info_fname The name of the file to read from. + @retval 0 success + @retval 1 failure +*/ +int Relay_log_info::init(const char* info_fname) +{ + char fname[FN_REFLEN+128]; + const char* msg = 0; + int error = 0; + mysql_mutex_t *log_lock; + DBUG_ENTER("Relay_log_info::init"); + + if (inited) // Set if this function called + DBUG_RETURN(0); + + log_lock= relay_log.get_log_lock(); + fn_format(fname, info_fname, mysql_data_home, "", 4+32); + mysql_mutex_lock(&data_lock); + cur_log_fd = -1; + slave_skip_counter=0; + abort_pos_wait=0; + log_space_limit= relay_log_space_limit; + log_space_total= 0; + + if (unlikely(error_on_rli_init_info)) + goto err; + + char pattern[FN_REFLEN]; + (void) my_realpath(pattern, slave_load_tmpdir, 0); + if (fn_format(pattern, PREFIX_SQL_LOAD, pattern, "", + MY_SAFE_PATH | MY_RETURN_REAL_PATH) == NullS) + { + mysql_mutex_unlock(&data_lock); + sql_print_error("Unable to use slave's temporary directory %s", + slave_load_tmpdir); + DBUG_RETURN(1); + } + unpack_filename(slave_patternload_file, pattern); + slave_patternload_file_size= strlen(slave_patternload_file); + + /* + The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. + Note that the I/O thread flushes it to disk after writing every + event, in flush_master_info(mi, 1, ?). + */ + + { + /* Reports an error and returns, if the --relay-log's path + is a directory.*/ + if (opt_relay_logname && + opt_relay_logname[strlen(opt_relay_logname) - 1] == FN_LIBCHAR) + { + mysql_mutex_unlock(&data_lock); + sql_print_error("Path '%s' is a directory name, please specify \ +a file name for --relay-log option", opt_relay_logname); + DBUG_RETURN(1); + } + + /* Reports an error and returns, if the --relay-log-index's path + is a directory.*/ + if (opt_relaylog_index_name && + opt_relaylog_index_name[strlen(opt_relaylog_index_name) - 1] + == FN_LIBCHAR) + { + mysql_mutex_unlock(&data_lock); + sql_print_error("Path '%s' is a directory name, please specify \ +a file name for --relay-log-index option", opt_relaylog_index_name); + DBUG_RETURN(1); + } + + char buf[FN_REFLEN]; + const char *ln; + static bool name_warning_sent= 0; + ln= relay_log.generate_name(opt_relay_logname, "-relay-bin", + 1, buf); + /* We send the warning only at startup, not after every RESET SLAVE */ + if (!opt_relay_logname && !opt_relaylog_index_name && !name_warning_sent && + !opt_bootstrap) + { + /* + User didn't give us info to name the relay log index file. + Picking `hostname`-relay-bin.index like we do, causes replication to + fail if this slave's hostname is changed later. So, we would like to + instead require a name. But as we don't want to break many existing + setups, we only give warning, not error. + */ + sql_print_warning("Neither --relay-log nor --relay-log-index were used;" + " so replication " + "may break when this MariaDB server acts as a " + "replica and has its hostname changed. Please " + "use '--log-basename=#' or '--relay-log=%s' to avoid " + "this problem.", ln); + name_warning_sent= 1; + } + + /* For multimaster, add connection name to relay log filenames */ + char buf_relay_logname[FN_REFLEN], buf_relaylog_index_name_buff[FN_REFLEN]; + char *buf_relaylog_index_name= opt_relaylog_index_name; + + create_logfile_name_with_suffix(buf_relay_logname, + sizeof(buf_relay_logname), + ln, 1, &mi->cmp_connection_name); + ln= buf_relay_logname; + + if (opt_relaylog_index_name) + { + buf_relaylog_index_name= buf_relaylog_index_name_buff; + create_logfile_name_with_suffix(buf_relaylog_index_name_buff, + sizeof(buf_relaylog_index_name_buff), + opt_relaylog_index_name, 0, + &mi->cmp_connection_name); + } + + /* + note, that if open() fails, we'll still have index file open + but a destructor will take care of that + */ + mysql_mutex_lock(log_lock); + if (relay_log.open_index_file(buf_relaylog_index_name, ln, TRUE) || + relay_log.open(ln, 0, 0, SEQ_READ_APPEND, + (ulong)max_relay_log_size, 1, TRUE)) + { + mysql_mutex_unlock(log_lock); + mysql_mutex_unlock(&data_lock); + sql_print_error("Failed when trying to open logs for '%s' in Relay_log_info::init(). Error: %M", ln, my_errno); + DBUG_RETURN(1); + } + mysql_mutex_unlock(log_lock); + } + + /* if file does not exist */ + if (access(fname,F_OK)) + { + /* + If someone removed the file from underneath our feet, just close + the old descriptor and re-create the old file + */ + if (info_fd >= 0) + mysql_file_close(info_fd, MYF(MY_WME)); + if ((info_fd= mysql_file_open(key_file_relay_log_info, + fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0) + { + sql_print_error("Failed to create a new relay log info file (" + "file '%s', errno %d)", fname, my_errno); + msg= current_thd->get_stmt_da()->message(); + goto err; + } + if (init_io_cache(&info_file, info_fd, LOG_BIN_IO_SIZE, READ_CACHE, 0L,0, + MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on relay log info file '%s'", + fname); + msg= current_thd->get_stmt_da()->message(); + goto err; + } + + /* Init relay log with first entry in the relay index file */ + if (init_relay_log_pos(this,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */, + &msg, 0)) + { + sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4)"); + goto err; + } + group_master_log_name[0]= 0; + group_master_log_pos= 0; + } + else // file exists + { + if (info_fd >= 0) + reinit_io_cache(&info_file, READ_CACHE, 0L,0,0); + else + { + int error=0; + if ((info_fd= mysql_file_open(key_file_relay_log_info, + fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) + { + sql_print_error("\ +Failed to open the existing relay log info file '%s' (errno %d)", + fname, my_errno); + error= 1; + } + else if (init_io_cache(&info_file, info_fd, + LOG_BIN_IO_SIZE, READ_CACHE, 0L, 0, MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on relay log info file '%s'", + fname); + error= 1; + } + if (unlikely(error)) + { + if (info_fd >= 0) + mysql_file_close(info_fd, MYF(0)); + info_fd= -1; + mysql_mutex_lock(log_lock); + relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT); + mysql_mutex_unlock(log_lock); + mysql_mutex_unlock(&data_lock); + DBUG_RETURN(1); + } + } + + int relay_log_pos, master_log_pos, lines; + char *first_non_digit; + + /* + Starting from MySQL 5.6.x, relay-log.info has a new format. + Now, its first line contains the number of lines in the file. + By reading this number we can determine which version our master.info + comes from. We can't simply count the lines in the file, since + versions before 5.6.x could generate files with more lines than + needed. If first line doesn't contain a number, or if it + contains a number less than LINES_IN_RELAY_LOG_INFO_WITH_DELAY, + then the file is treated like a file from pre-5.6.x version. + There is no ambiguity when reading an old master.info: before + 5.6.x, the first line contained the binlog's name, which is + either empty or has an extension (contains a '.'), so can't be + confused with an integer. + + So we're just reading first line and trying to figure which + version is this. + */ + + /* + The first row is temporarily stored in mi->master_log_name, if + it is line count and not binlog name (new format) it will be + overwritten by the second row later. + */ + if (init_strvar_from_file(group_relay_log_name, + sizeof(group_relay_log_name), + &info_file, "")) + { + msg="Error reading slave log configuration"; + goto err; + } + + lines= strtoul(group_relay_log_name, &first_non_digit, 10); + + if (group_relay_log_name[0] != '\0' && + *first_non_digit == '\0' && + lines >= LINES_IN_RELAY_LOG_INFO_WITH_DELAY) + { + DBUG_PRINT("info", ("relay_log_info file is in new format.")); + /* Seems to be new format => read relay log name from next line */ + if (init_strvar_from_file(group_relay_log_name, + sizeof(group_relay_log_name), + &info_file, "")) + { + msg="Error reading slave log configuration"; + goto err; + } + } + else + DBUG_PRINT("info", ("relay_log_info file is in old format.")); + + if (init_intvar_from_file(&relay_log_pos, + &info_file, BIN_LOG_HEADER_SIZE) || + init_strvar_from_file(group_master_log_name, + sizeof(group_master_log_name), + &info_file, "") || + init_intvar_from_file(&master_log_pos, &info_file, 0) || + (lines >= LINES_IN_RELAY_LOG_INFO_WITH_DELAY && + init_intvar_from_file(&sql_delay, &info_file, 0))) + { + msg="Error reading slave log configuration"; + goto err; + } + + strmake_buf(event_relay_log_name,group_relay_log_name); + group_relay_log_pos= event_relay_log_pos= relay_log_pos; + group_master_log_pos= master_log_pos; + + if (is_relay_log_recovery && init_recovery(mi, &msg)) + goto err; + + relay_log_state.load(rpl_global_gtid_slave_state); + if (init_relay_log_pos(this, + group_relay_log_name, + group_relay_log_pos, + 0 /* no data lock*/, + &msg, 0)) + { + sql_print_error("Failed to open the relay log '%s' (relay_log_pos %llu)", + group_relay_log_name, group_relay_log_pos); + goto err; + } + } + + DBUG_PRINT("info", ("my_b_tell(cur_log)=%llu event_relay_log_pos=%llu", + my_b_tell(cur_log), event_relay_log_pos)); + DBUG_ASSERT(event_relay_log_pos >= BIN_LOG_HEADER_SIZE); + DBUG_ASSERT(my_b_tell(cur_log) == event_relay_log_pos); + + /* + Now change the cache from READ to WRITE - must do this + before Relay_log_info::flush() + */ + reinit_io_cache(&info_file, WRITE_CACHE,0L,0,1); + if (unlikely((error= flush()))) + { + msg= "Failed to flush relay log info file"; + goto err; + } + if (count_relay_log_space(this)) + { + msg="Error counting relay log space"; + goto err; + } + inited= 1; + error_on_rli_init_info= false; + mysql_mutex_unlock(&data_lock); + DBUG_RETURN(0); + +err: + error_on_rli_init_info= true; + if (msg) + sql_print_error("%s", msg); + end_io_cache(&info_file); + if (info_fd >= 0) + mysql_file_close(info_fd, MYF(0)); + info_fd= -1; + mysql_mutex_lock(log_lock); + relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT); + mysql_mutex_unlock(log_lock); + mysql_mutex_unlock(&data_lock); + DBUG_RETURN(1); +} + + +static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo) +{ + MY_STAT s; + DBUG_ENTER("add_relay_log"); + if (!mysql_file_stat(key_file_relaylog, + linfo->log_file_name, &s, MYF(0))) + { + sql_print_error("log %s listed in the index, but failed to stat", + linfo->log_file_name); + DBUG_RETURN(1); + } + rli->log_space_total += s.st_size; + DBUG_PRINT("info",("log_space_total: %llu", uint64(rli->log_space_total))); + DBUG_RETURN(0); +} + + +static int count_relay_log_space(Relay_log_info* rli) +{ + LOG_INFO linfo; + DBUG_ENTER("count_relay_log_space"); + rli->log_space_total= 0; + if (rli->relay_log.find_log_pos(&linfo, NullS, 1)) + { + sql_print_error("Could not find first log while counting relay log space"); + DBUG_RETURN(1); + } + do + { + if (add_relay_log(rli,&linfo)) + DBUG_RETURN(1); + } while (!rli->relay_log.find_next_log(&linfo, 1)); + /* + As we have counted everything, including what may have written in a + preceding write, we must reset bytes_written, or we may count some space + twice. + */ + rli->relay_log.reset_bytes_written(); + DBUG_RETURN(0); +} + + +/* + Reset UNTIL condition for Relay_log_info + + SYNOPSYS + clear_until_condition() + rli - Relay_log_info structure where UNTIL condition should be reset + */ + +void Relay_log_info::clear_until_condition() +{ + DBUG_ENTER("clear_until_condition"); + + until_condition= Relay_log_info::UNTIL_NONE; + until_log_name[0]= 0; + until_log_pos= 0; + until_relay_log_names_defer= false; + + DBUG_VOID_RETURN; +} + + +/* + Read the correct format description event for starting to replicate from + a given position in a relay log file. +*/ +Format_description_log_event * +read_relay_log_description_event(IO_CACHE *cur_log, ulonglong start_pos, + const char **errmsg) +{ + Log_event *ev; + Format_description_log_event *fdev; + bool found= false; + + /* + By default the relay log is in binlog format 3 (4.0). + Even if format is 4, this will work enough to read the first event + (Format_desc) (remember that format 4 is just lenghtened compared to format + 3; format 3 is a prefix of format 4). + */ + fdev= new Format_description_log_event(3); + + while (!found) + { + Log_event_type typ; + + /* + Read the possible Format_description_log_event; if position + was 4, no need, it will be read naturally. + */ + DBUG_PRINT("info",("looking for a Format_description_log_event")); + + if (my_b_tell(cur_log) >= start_pos) + break; + + if (!(ev= Log_event::read_log_event(cur_log, fdev, + opt_slave_sql_verify_checksum))) + { + DBUG_PRINT("info",("could not read event, cur_log->error=%d", + cur_log->error)); + if (cur_log->error) /* not EOF */ + { + *errmsg= "I/O error reading event at position 4"; + delete fdev; + return NULL; + } + break; + } + typ= ev->get_type_code(); + if (typ == FORMAT_DESCRIPTION_EVENT) + { + Format_description_log_event *old= fdev; + DBUG_PRINT("info",("found Format_description_log_event")); + fdev= (Format_description_log_event*) ev; + fdev->copy_crypto_data(old); + delete old; + + /* + As ev was returned by read_log_event, it has passed is_valid(), so + my_malloc() in ctor worked, no need to check again. + */ + /* + Ok, we found a Format_description event. But it is not sure that this + describes the whole relay log; indeed, one can have this sequence + (starting from position 4): + Format_desc (of slave) + Rotate (of master) + Format_desc (of master) + So the Format_desc which really describes the rest of the relay log + is the 3rd event (it can't be further than that, because we rotate + the relay log when we queue a Rotate event from the master). + But what describes the Rotate is the first Format_desc. + So what we do is: + go on searching for Format_description events, until you exceed the + position (argument 'pos') or until you find another event than Rotate + or Format_desc. + */ + } + else if (typ == START_ENCRYPTION_EVENT) + { + if (fdev->start_decryption((Start_encryption_log_event*) ev)) + { + *errmsg= "Unable to set up decryption of binlog."; + delete ev; + delete fdev; + return NULL; + } + delete ev; + } + else + { + DBUG_PRINT("info",("found event of another type=%d", typ)); + found= (typ != ROTATE_EVENT); + delete ev; + } + } + return fdev; +} + + +/* + Open the given relay log + + SYNOPSIS + init_relay_log_pos() + rli Relay information (will be initialized) + log Name of relay log file to read from. NULL = First log + pos Position in relay log file + need_data_lock Set to 1 if this functions should do mutex locks + errmsg Store pointer to error message here + look_for_description_event + 1 if we should look for such an event. We only need + this when the SQL thread starts and opens an existing + relay log and has to execute it (possibly from an + offset >4); then we need to read the first event of + the relay log to be able to parse the events we have + to execute. + + DESCRIPTION + - Close old open relay log files. + - If we are using the same relay log as the running IO-thread, then set + rli->cur_log to point to the same IO_CACHE entry. + - If not, open the 'log' binary file. + + TODO + - check proper initialization of group_master_log_name/group_master_log_pos + + RETURN VALUES + 0 ok + 1 error. errmsg is set to point to the error message +*/ + +int init_relay_log_pos(Relay_log_info* rli,const char* log, + ulonglong pos, bool need_data_lock, + const char** errmsg, + bool look_for_description_event) +{ + DBUG_ENTER("init_relay_log_pos"); + DBUG_PRINT("info", ("pos: %lu", (ulong) pos)); + + *errmsg=0; + mysql_mutex_t *log_lock= rli->relay_log.get_log_lock(); + + if (need_data_lock) + mysql_mutex_lock(&rli->data_lock); + + /* + Slave threads are not the only users of init_relay_log_pos(). CHANGE MASTER + is, too, and init_slave() too; these 2 functions allocate a description + event in init_relay_log_pos, which is not freed by the terminating SQL slave + thread as that thread is not started by these functions. So we have to free + the description_event here, in case, so that there is no memory leak in + running, say, CHANGE MASTER. + */ + delete rli->relay_log.description_event_for_exec; + /* + By default the relay log is in binlog format 3 (4.0). + Even if format is 4, this will work enough to read the first event + (Format_desc) (remember that format 4 is just lenghtened compared to format + 3; format 3 is a prefix of format 4). + */ + rli->relay_log.description_event_for_exec= new + Format_description_log_event(3); + + mysql_mutex_lock(log_lock); + + /* Close log file and free buffers if it's already open */ + if (rli->cur_log_fd >= 0) + { + end_io_cache(&rli->cache_buf); + mysql_file_close(rli->cur_log_fd, MYF(MY_WME)); + rli->cur_log_fd = -1; + } + + rli->group_relay_log_pos = rli->event_relay_log_pos = pos; + rli->clear_flag(Relay_log_info::IN_STMT); + rli->clear_flag(Relay_log_info::IN_TRANSACTION); + + /* + Test to see if the previous run was with the skip of purging + If yes, we do not purge when we restart + */ + if (rli->relay_log.find_log_pos(&rli->linfo, NullS, 1)) + { + *errmsg="Could not find first log during relay log initialization"; + goto err; + } + + if (log && rli->relay_log.find_log_pos(&rli->linfo, log, 1)) + { + *errmsg="Could not find target log during relay log initialization"; + goto err; + } + strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name); + strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name); + if (rli->relay_log.is_active(rli->linfo.log_file_name)) + { + /* + The IO thread is using this log file. + In this case, we will use the same IO_CACHE pointer to + read data as the IO thread is using to write data. + */ + my_b_seek((rli->cur_log=rli->relay_log.get_log_file()), (off_t)0); + if (check_binlog_magic(rli->cur_log,errmsg)) + goto err; + rli->cur_log_old_open_count=rli->relay_log.get_open_count(); + } + else + { + /* + Open the relay log and set rli->cur_log to point at this one + */ + if ((rli->cur_log_fd=open_binlog(&rli->cache_buf, + rli->linfo.log_file_name,errmsg)) < 0) + goto err; + rli->cur_log = &rli->cache_buf; + } + /* + In all cases, check_binlog_magic() has been called so we're at offset 4 for + sure. + */ + if (pos > BIN_LOG_HEADER_SIZE) /* If pos<=4, we stay at 4 */ + { + if (look_for_description_event) + { + Format_description_log_event *fdev; + if (!(fdev= read_relay_log_description_event(rli->cur_log, pos, errmsg))) + goto err; + delete rli->relay_log.description_event_for_exec; + rli->relay_log.description_event_for_exec= fdev; + } + my_b_seek(rli->cur_log,(off_t)pos); + DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%llu rli->event_relay_log_pos=%llu", + my_b_tell(rli->cur_log), rli->event_relay_log_pos)); + + } + +err: + /* + If we don't purge, we can't honour relay_log_space_limit ; + silently discard it + */ + if (!relay_log_purge) + rli->log_space_limit= 0; + mysql_cond_broadcast(&rli->data_cond); + + mysql_mutex_unlock(log_lock); + + if (need_data_lock) + mysql_mutex_unlock(&rli->data_lock); + if (!rli->relay_log.description_event_for_exec->is_valid() && !*errmsg) + *errmsg= "Invalid Format_description log event; could be out of memory"; + + DBUG_PRINT("info", ("Returning %d from init_relay_log_pos", (*errmsg)?1:0)); + + DBUG_RETURN ((*errmsg) ? 1 : 0); +} + + +/* + Waits until the SQL thread reaches (has executed up to) the + log/position or timed out. + + SYNOPSIS + wait_for_pos() + thd client thread that sent SELECT MASTER_POS_WAIT + log_name log name to wait for + log_pos position to wait for + timeout timeout in seconds before giving up waiting + + NOTES + timeout is longlong whereas it should be ulong ; but this is + to catch if the user submitted a negative timeout. + + RETURN VALUES + -2 improper arguments (log_pos<0) + or slave not running, or master info changed + during the function's execution, + or client thread killed. -2 is translated to NULL by caller + -1 timed out + >=0 number of log events the function had to wait + before reaching the desired log/position + */ + +int Relay_log_info::wait_for_pos(THD* thd, String* log_name, + longlong log_pos, + longlong timeout) +{ + int event_count = 0; + ulong init_abort_pos_wait; + int error=0; + struct timespec abstime; // for timeout checking + PSI_stage_info old_stage; + DBUG_ENTER("Relay_log_info::wait_for_pos"); + + if (!inited) + DBUG_RETURN(-2); + + DBUG_PRINT("enter",("log_name: '%s' log_pos: %lu timeout: %lu", + log_name->c_ptr(), (ulong) log_pos, (ulong) timeout)); + + set_timespec(abstime,timeout); + mysql_mutex_lock(&data_lock); + thd->ENTER_COND(&data_cond, &data_lock, + &stage_waiting_for_the_slave_thread_to_advance_position, + &old_stage); + /* + This function will abort when it notices that some CHANGE MASTER or + RESET MASTER has changed the master info. + To catch this, these commands modify abort_pos_wait ; We just monitor + abort_pos_wait and see if it has changed. + Why do we have this mechanism instead of simply monitoring slave_running + in the loop (we do this too), as CHANGE MASTER/RESET SLAVE require that + the SQL thread be stopped? + This is becasue if someones does: + STOP SLAVE;CHANGE MASTER/RESET SLAVE; START SLAVE; + the change may happen very quickly and we may not notice that + slave_running briefly switches between 1/0/1. + */ + init_abort_pos_wait= abort_pos_wait; + + /* + We'll need to + handle all possible log names comparisons (e.g. 999 vs 1000). + We use ulong for string->number conversion ; this is no + stronger limitation than in find_uniq_filename in sql/log.cc + */ + ulong log_name_extension; + char log_name_tmp[FN_REFLEN]; //make a char[] from String + + strmake(log_name_tmp, log_name->ptr(), MY_MIN(log_name->length(), FN_REFLEN-1)); + + char *p= fn_ext(log_name_tmp); + char *p_end; + if (!*p || log_pos<0) + { + error= -2; //means improper arguments + goto err; + } + // Convert 0-3 to 4 + log_pos= MY_MAX(log_pos, BIN_LOG_HEADER_SIZE); + /* p points to '.' */ + log_name_extension= strtoul(++p, &p_end, 10); + /* + p_end points to the first invalid character. + If it equals to p, no digits were found, error. + If it contains '\0' it means conversion went ok. + */ + if (p_end==p || *p_end) + { + error= -2; + goto err; + } + + /* The "compare and wait" main loop */ + while (!thd->killed && + init_abort_pos_wait == abort_pos_wait && + slave_running) + { + bool pos_reached; + int cmp_result= 0; + + DBUG_PRINT("info", + ("init_abort_pos_wait: %ld abort_pos_wait: %ld", + init_abort_pos_wait, abort_pos_wait)); + DBUG_PRINT("info",("group_master_log_name: '%s' pos: %lu", + group_master_log_name, (ulong) group_master_log_pos)); + + /* + group_master_log_name can be "", if we are just after a fresh + replication start or after a CHANGE MASTER TO MASTER_HOST/PORT + (before we have executed one Rotate event from the master) or + (rare) if the user is doing a weird slave setup (see next + paragraph). If group_master_log_name is "", we assume we don't + have enough info to do the comparison yet, so we just wait until + more data. In this case master_log_pos is always 0 except if + somebody (wrongly) sets this slave to be a slave of itself + without using --replicate-same-server-id (an unsupported + configuration which does nothing), then group_master_log_pos + will grow and group_master_log_name will stay "". + */ + if (*group_master_log_name) + { + char *basename= (group_master_log_name + + dirname_length(group_master_log_name)); + /* + First compare the parts before the extension. + Find the dot in the master's log basename, + and protect against user's input error : + if the names do not match up to '.' included, return error + */ + char *q= (char*)(fn_ext(basename)+1); + if (strncmp(basename, log_name_tmp, (int)(q-basename))) + { + error= -2; + break; + } + // Now compare extensions. + char *q_end; + ulong group_master_log_name_extension= strtoul(q, &q_end, 10); + if (group_master_log_name_extension < log_name_extension) + cmp_result= -1 ; + else + cmp_result= (group_master_log_name_extension > log_name_extension) ? 1 : 0 ; + + pos_reached= ((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) || + cmp_result > 0); + if (pos_reached || thd->killed) + break; + } + + //wait for master update, with optional timeout. + + DBUG_PRINT("info",("Waiting for master update")); + /* + We are going to mysql_cond_(timed)wait(); if the SQL thread stops it + will wake us up. + */ + thd_wait_begin(thd, THD_WAIT_BINLOG); + if (timeout > 0) + { + /* + Note that mysql_cond_timedwait checks for the timeout + before for the condition ; i.e. it returns ETIMEDOUT + if the system time equals or exceeds the time specified by abstime + before the condition variable is signaled or broadcast, _or_ if + the absolute time specified by abstime has already passed at the time + of the call. + For that reason, mysql_cond_timedwait will do the "timeoutting" job + even if its condition is always immediately signaled (case of a loaded + master). + */ + error= mysql_cond_timedwait(&data_cond, &data_lock, &abstime); + } + else + mysql_cond_wait(&data_cond, &data_lock); + thd_wait_end(thd); + DBUG_PRINT("info",("Got signal of master update or timed out")); + if (error == ETIMEDOUT || error == ETIME) + { + error= -1; + break; + } + error=0; + event_count++; + DBUG_PRINT("info",("Testing if killed or SQL thread not running")); + } + +err: + thd->EXIT_COND(&old_stage); + DBUG_PRINT("exit",("killed: %d abort: %d slave_running: %d \ +improper_arguments: %d timed_out: %d", + thd->killed_errno(), + (int) (init_abort_pos_wait != abort_pos_wait), + (int) slave_running, + (int) (error == -2), + (int) (error == -1))); + if (thd->killed || init_abort_pos_wait != abort_pos_wait || + !slave_running) + { + error= -2; + } + DBUG_RETURN( error ? error : event_count ); +} + + +void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, + rpl_group_info *rgi, + bool skip_lock) +{ + DBUG_ENTER("Relay_log_info::inc_group_relay_log_pos"); + + if (skip_lock) + mysql_mutex_assert_owner(&data_lock); + else + mysql_mutex_lock(&data_lock); + + rgi->inc_event_relay_log_pos(); + DBUG_PRINT("info", ("log_pos: %lu group_master_log_pos: %lu", + (long) log_pos, (long) group_master_log_pos)); + if (rgi->is_parallel_exec) + { + /* In case of parallel replication, do not update the position backwards. */ + int cmp= compare_log_name(group_relay_log_name, rgi->event_relay_log_name); + if (cmp < 0) + { + group_relay_log_pos= rgi->future_event_relay_log_pos; + strmake_buf(group_relay_log_name, rgi->event_relay_log_name); + } else if (cmp == 0 && group_relay_log_pos < rgi->future_event_relay_log_pos) + group_relay_log_pos= rgi->future_event_relay_log_pos; + + /* + In the parallel case we need to update the master_log_name here, rather + than in Rotate_log_event::do_update_pos(). + */ + cmp= compare_log_name(group_master_log_name, rgi->future_event_master_log_name); + if (cmp <= 0) + { + if (cmp < 0) + { + strcpy(group_master_log_name, rgi->future_event_master_log_name); + group_master_log_pos= log_pos; + } + else if (group_master_log_pos < log_pos) + group_master_log_pos= log_pos; + } + + /* + In the parallel case, we only update the Seconds_Behind_Master at the + end of a transaction. In the non-parallel case, the value is updated as + soon as an event is read from the relay log; however this would be too + confusing for the user, seeing the slave reported as up-to-date when + potentially thousands of events are still queued up for worker threads + waiting for execution. + */ + if (rgi->last_master_timestamp && + rgi->last_master_timestamp > last_master_timestamp) + last_master_timestamp= rgi->last_master_timestamp; + } + else + { + /* Non-parallel case. */ + group_relay_log_pos= event_relay_log_pos; + strmake_buf(group_relay_log_name, event_relay_log_name); + notify_group_relay_log_name_update(); + if (log_pos) // not 3.23 binlogs (no log_pos there) and not Stop_log_event + group_master_log_pos= log_pos; + } + + /* + If the slave does not support transactions and replicates a transaction, + users should not trust group_master_log_pos (which they can display with + SHOW SLAVE STATUS or read from relay-log.info), because to compute + group_master_log_pos the slave relies on log_pos stored in the master's + binlog, but if we are in a master's transaction these positions are always + the BEGIN's one (excepted for the COMMIT), so group_master_log_pos does + not advance as it should on the non-transactional slave (it advances by + big leaps, whereas it should advance by small leaps). + */ + /* + In 4.x we used the event's len to compute the positions here. This is + wrong if the event was 3.23/4.0 and has been converted to 5.0, because + then the event's len is not what is was in the master's binlog, so this + will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0 + replication: Exec_master_log_pos is wrong). Only way to solve this is to + have the original offset of the end of the event the relay log. This is + what we do in 5.0: log_pos has become "end_log_pos" (because the real use + of log_pos in 4.0 was to compute the end_log_pos; so better to store + end_log_pos instead of begin_log_pos. + If we had not done this fix here, the problem would also have appeared + when the slave and master are 5.0 but with different event length (for + example the slave is more recent than the master and features the event + UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in + SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this + value which would lead to badly broken replication. + Even the relay_log_pos will be corrupted in this case, because the len is + the relay log is not "val". + With the end_log_pos solution, we avoid computations involving lengthes. + */ + mysql_cond_broadcast(&data_cond); + if (!skip_lock) + mysql_mutex_unlock(&data_lock); + DBUG_VOID_RETURN; +} + + +void Relay_log_info::close_temporary_tables() +{ + DBUG_ENTER("Relay_log_info::close_temporary_tables"); + + TMP_TABLE_SHARE *share; + TABLE *table; + + if (!save_temporary_tables) + { + /* There are no temporary tables. */ + DBUG_VOID_RETURN; + } + + while ((share= save_temporary_tables->pop_front())) + { + /* + Iterate over the list of tables for this TABLE_SHARE and close them. + */ + while ((table= share->all_tmp_tables.pop_front())) + { + DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'", + table->s->db.str, table->s->table_name.str)); + + /* Reset in_use as the table may have been created by another thd */ + table->in_use= 0; + /* + Lets not free TABLE_SHARE here as there could be multiple TABLEs opened + for the same table (TABLE_SHARE). + */ + closefrm(table); + my_free(table); + } + + /* + Don't ask for disk deletion. For now, anyway they will be deleted when + slave restarts, but it is a better intention to not delete them. + */ + + free_table_share(share); + my_free(share); + } + + /* By now, there mustn't be any elements left in the list. */ + DBUG_ASSERT(save_temporary_tables->is_empty()); + + my_free(save_temporary_tables); + save_temporary_tables= NULL; + slave_open_temp_tables= 0; + + DBUG_VOID_RETURN; +} + +/* + purge_relay_logs() + + @param rli Relay log information + @param thd thread id. May be zero during startup + + NOTES + Assumes to have a run lock on rli and that no slave thread are running. +*/ + +int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset, + const char** errmsg) +{ + int error=0; + const char *ln; + char name_buf[FN_REFLEN]; + DBUG_ENTER("purge_relay_logs"); + + /* + Even if rli->inited==0, we still try to empty rli->master_log_* variables. + Indeed, rli->inited==0 does not imply that they already are empty. + It could be that slave's info initialization partly succeeded : + for example if relay-log.info existed but *relay-bin*.* + have been manually removed, Relay_log_info::init() reads the old + relay-log.info and fills rli->master_log_*, then Relay_log_info::init() + checks for the existence of the relay log, this fails and + Relay_log_info::init() leaves rli->inited to 0. + In that pathological case, rli->master_log_pos* will be properly reinited + at the next START SLAVE (as RESET SLAVE or CHANGE + MASTER, the callers of purge_relay_logs, will delete bogus *.info files + or replace them with correct files), however if the user does SHOW SLAVE + STATUS before START SLAVE, he will see old, confusing rli->master_log_*. + In other words, we reinit rli->master_log_* for SHOW SLAVE STATUS + to display fine in any case. + */ + + rli->group_master_log_name[0]= 0; + rli->group_master_log_pos= 0; + + if (!rli->inited) + { + DBUG_PRINT("info", ("rli->inited == 0")); + if (rli->error_on_rli_init_info) + { + ln= rli->relay_log.generate_name(opt_relay_logname, "-relay-bin", + 1, name_buf); + + if (rli->relay_log.open_index_file(opt_relaylog_index_name, ln, TRUE)) + { + sql_print_error("Unable to purge relay log files. Failed to open relay " + "log index file:%s.", rli->relay_log.get_index_fname()); + DBUG_RETURN(1); + } + mysql_mutex_lock(rli->relay_log.get_log_lock()); + if (rli->relay_log.open(ln, 0, 0, SEQ_READ_APPEND, + (ulong)(rli->max_relay_log_size ? rli->max_relay_log_size : + max_binlog_size), 1, TRUE)) + { + sql_print_error("Unable to purge relay log files. Failed to open relay " + "log file:%s.", rli->relay_log.get_log_fname()); + mysql_mutex_unlock(rli->relay_log.get_log_lock()); + DBUG_RETURN(1); + } + mysql_mutex_unlock(rli->relay_log.get_log_lock()); + } + else + DBUG_RETURN(0); + } + else + { + DBUG_ASSERT(rli->slave_running == 0); + DBUG_ASSERT(rli->mi->slave_running == 0); + } + mysql_mutex_lock(&rli->data_lock); + + /* + we close the relay log fd possibly left open by the slave SQL thread, + to be able to delete it; the relay log fd possibly left open by the slave + I/O thread will be closed naturally in reset_logs() by the + close(LOG_CLOSE_TO_BE_OPENED) call + */ + if (rli->cur_log_fd >= 0) + { + end_io_cache(&rli->cache_buf); + mysql_file_close(rli->cur_log_fd, MYF(MY_WME)); + rli->cur_log_fd= -1; + } + + if (rli->relay_log.reset_logs(thd, !just_reset, NULL, 0, 0)) + { + *errmsg = "Failed during log reset"; + error=1; + goto err; + } + rli->relay_log_state.load(rpl_global_gtid_slave_state); + if (!just_reset) + { + /* Save name of used relay log file */ + strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname()); + strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname()); + rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE; + rli->log_space_total= 0; + + if (count_relay_log_space(rli)) + { + *errmsg= "Error counting relay log space"; + error=1; + goto err; + } + error= init_relay_log_pos(rli, rli->group_relay_log_name, + rli->group_relay_log_pos, + 0 /* do not need data lock */, errmsg, 0); + } + else + { + /* Ensure relay log names are not used */ + rli->group_relay_log_name[0]= rli->event_relay_log_name[0]= 0; + } + + if (!rli->inited && rli->error_on_rli_init_info) + { + mysql_mutex_lock(rli->relay_log.get_log_lock()); + rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT); + mysql_mutex_unlock(rli->relay_log.get_log_lock()); + } +err: + DBUG_PRINT("info",("log_space_total: %llu", uint64(rli->log_space_total))); + mysql_mutex_unlock(&rli->data_lock); + DBUG_RETURN(error); +} + + +/* + Check if condition stated in UNTIL clause of START SLAVE is reached. + SYNOPSYS + Relay_log_info::is_until_satisfied() + master_beg_pos position of the beginning of to be executed event + (not log_pos member of the event that points to the + beginning of the following event) + + + DESCRIPTION + Checks if UNTIL condition is reached. Uses caching result of last + comparison of current log file name and target log file name. So cached + value should be invalidated if current log file name changes + (see Relay_log_info::notify_... functions). + + This caching is needed to avoid of expensive string comparisons and + strtol() conversions needed for log names comparison. We don't need to + compare them each time this function is called, we only need to do this + when current log name changes. If we have UNTIL_MASTER_POS condition we + need to do this only after Rotate_log_event::do_apply_event() (which is + rare, so caching gives real benifit), and if we have UNTIL_RELAY_POS + condition then we should invalidate cached comarison value after + inc_group_relay_log_pos() which called for each group of events (so we + have some benefit if we have something like queries that use + autoincrement or if we have transactions). + + Should be called ONLY if until_condition != UNTIL_NONE ! + + In the parallel execution mode and UNTIL_MASTER_POS the file name is + presented by future_event_master_log_name which may be ahead of + group_master_log_name. Log_event::log_pos does relate to it nevertheless + so the pair comprises a correct binlog coordinate. + Internal group events and events that have zero log_pos also + produce the zero for the local log_pos which may not lead to the + function falsely return true. + In UNTIL_RELAY_POS the original caching and notification are simplified + to straightforward files comparison when the current event can't be + a part of an event group. + + RETURN VALUE + true - condition met or error happened (condition seems to have + bad log file name) + false - condition not met +*/ + +bool Relay_log_info::is_until_satisfied(Log_event *ev) +{ + const char *log_name; + ulonglong log_pos; + /* Prevents stopping within transaction; needed solely for Relay UNTIL. */ + bool in_trans= false; + + DBUG_ENTER("Relay_log_info::is_until_satisfied"); + + if (until_condition == UNTIL_MASTER_POS) + { + log_name= (mi->using_parallel() ? future_event_master_log_name + : group_master_log_name); + log_pos= (get_flag(Relay_log_info::IN_TRANSACTION) || !ev || !ev->log_pos) ? + (mi->using_parallel() ? 0 : group_master_log_pos) : + ev->log_pos - ev->data_written; + } + else + { + DBUG_ASSERT(until_condition == UNTIL_RELAY_POS); + if (!mi->using_parallel()) + { + log_name= group_relay_log_name; + log_pos= group_relay_log_pos; + } + else + { + log_name= event_relay_log_name; + log_pos= event_relay_log_pos; + in_trans= get_flag(Relay_log_info::IN_TRANSACTION); + /* + until_log_names_cmp_result is set to UNKNOWN either + - by a non-group event *and* only when it is in the middle of a group + - or by a group event when the preceding group made the above + non-group event to defer the resetting. + */ + if ((ev && !Log_event::is_group_event(ev->get_type_code()))) + { + if (in_trans) + { + until_relay_log_names_defer= true; + } + else + { + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + until_relay_log_names_defer= false; + } + } + else if (!in_trans && until_relay_log_names_defer) + { + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + until_relay_log_names_defer= false; + } + } + } + + DBUG_PRINT("info", ("group_master_log_name='%s', group_master_log_pos=%llu", + group_master_log_name, group_master_log_pos)); + DBUG_PRINT("info", ("group_relay_log_name='%s', group_relay_log_pos=%llu", + group_relay_log_name, group_relay_log_pos)); + DBUG_PRINT("info", ("(%s) log_name='%s', log_pos=%llu", + until_condition == UNTIL_MASTER_POS ? "master" : "relay", + log_name, log_pos)); + DBUG_PRINT("info", ("(%s) until_log_name='%s', until_log_pos=%llu", + until_condition == UNTIL_MASTER_POS ? "master" : "relay", + until_log_name, until_log_pos)); + + if (until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_UNKNOWN) + { + /* + We have no cached comparison results so we should compare log names + and cache result. + If we are after RESET SLAVE, and the SQL slave thread has not processed + any event yet, it could be that group_master_log_name is "". In that case, + just wait for more events (as there is no sensible comparison to do). + */ + + if (*log_name) + { + const char *basename= log_name + dirname_length(log_name); + + const char *q= (const char*)(fn_ext(basename)+1); + if (strncmp(basename, until_log_name, (int)(q-basename)) == 0) + { + /* Now compare extensions. */ + char *q_end; + ulong log_name_extension= strtoul(q, &q_end, 10); + if (log_name_extension < until_log_name_extension) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_LESS; + else + until_log_names_cmp_result= + (log_name_extension > until_log_name_extension) ? + UNTIL_LOG_NAMES_CMP_GREATER : UNTIL_LOG_NAMES_CMP_EQUAL ; + } + else + { + /* Probably error so we aborting */ + sql_print_error("Slave SQL thread is stopped because UNTIL " + "condition is bad."); + DBUG_RETURN(TRUE); + } + } + else + DBUG_RETURN(until_log_pos == 0); + } + + DBUG_RETURN(((until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_EQUAL && + (log_pos >= until_log_pos && !in_trans)) || + until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_GREATER)); +} + + +bool Relay_log_info::stmt_done(my_off_t event_master_log_pos, THD *thd, + rpl_group_info *rgi) +{ + int error= 0; + DBUG_ENTER("Relay_log_info::stmt_done"); + + DBUG_ASSERT(!belongs_to_client()); + DBUG_ASSERT(rgi->rli == this); + /* + If in a transaction, and if the slave supports transactions, just + inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN + (not OPTION_NOT_AUTOCOMMIT) as transactions are logged with + BEGIN/COMMIT, not with SET AUTOCOMMIT= . + + We can't use rgi->rli->get_flag(IN_TRANSACTION) here as OPTION_BEGIN + is also used for single row transactions. + + CAUTION: opt_using_transactions means innodb || bdb ; suppose the + master supports InnoDB and BDB, but the slave supports only BDB, + problems will arise: - suppose an InnoDB table is created on the + master, - then it will be MyISAM on the slave - but as + opt_using_transactions is true, the slave will believe he is + transactional with the MyISAM table. And problems will come when + one does START SLAVE; STOP SLAVE; START SLAVE; (the slave will + resume at BEGIN whereas there has not been any rollback). This is + the problem of using opt_using_transactions instead of a finer + "does the slave support _transactional handler used on the + master_". + + More generally, we'll have problems when a query mixes a + transactional handler and MyISAM and STOP SLAVE is issued in the + middle of the "transaction". START SLAVE will resume at BEGIN + while the MyISAM table has already been updated. + */ + if ((rgi->thd->variables.option_bits & OPTION_BEGIN) && + opt_using_transactions) + rgi->inc_event_relay_log_pos(); + else + { + inc_group_relay_log_pos(event_master_log_pos, rgi); + if (rpl_global_gtid_slave_state->record_and_update_gtid(thd, rgi)) + { + report(WARNING_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, rgi->gtid_info(), + "Failed to update GTID state in %s.%s, slave state may become " + "inconsistent: %d: %s", + "mysql", rpl_gtid_slave_state_table_name.str, + thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message()); + /* + At this point we are not in a transaction (for example after DDL), + so we can not roll back. Anyway, normally updates to the slave + state table should not fail, and if they do, at least we made the + DBA aware of the problem in the error log. + */ + } + DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); + if (mi->using_gtid == Master_info::USE_GTID_NO) + { + if (rgi->is_parallel_exec) + mysql_mutex_lock(&data_lock); + if (flush()) + error= 1; + if (rgi->is_parallel_exec) + mysql_mutex_unlock(&data_lock); + } + DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); + } + DBUG_RETURN(error); +} + + +int +Relay_log_info::alloc_inuse_relaylog(const char *name) +{ + inuse_relaylog *ir; + uint32 gtid_count; + rpl_gtid *gtid_list; + + gtid_count= relay_log_state.count(); + if (!(gtid_list= (rpl_gtid *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(*gtid_list)*gtid_count, MYF(MY_WME)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*gtid_list)*gtid_count); + return 1; + } + if (!(ir= new inuse_relaylog(this, gtid_list, gtid_count, name))) + { + my_free(gtid_list); + my_error(ER_OUTOFMEMORY, MYF(0), (int) sizeof(*ir)); + return 1; + } + if (relay_log_state.get_gtid_list(gtid_list, gtid_count)) + { + my_free(gtid_list); + delete ir; + DBUG_ASSERT(0 /* Should not be possible as we allocated correct length */); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return 1; + } + + if (!inuse_relaylog_list) + inuse_relaylog_list= ir; + else + { + last_inuse_relaylog->completed= true; + last_inuse_relaylog->next= ir; + } + last_inuse_relaylog= ir; + + return 0; +} + + +void +Relay_log_info::free_inuse_relaylog(inuse_relaylog *ir) +{ + my_free(ir->relay_log_state); + delete ir; +} + + +void +Relay_log_info::reset_inuse_relaylog() +{ + inuse_relaylog *cur= inuse_relaylog_list; + while (cur) + { + DBUG_ASSERT(cur->queued_count == cur->dequeued_count); + inuse_relaylog *next= cur->next; + free_inuse_relaylog(cur); + cur= next; + } + inuse_relaylog_list= last_inuse_relaylog= NULL; +} + + +int +Relay_log_info::update_relay_log_state(rpl_gtid *gtid_list, uint32 count) +{ + int res= 0; + while (count) + { + if (relay_log_state.update_nolock(gtid_list, false)) + res= 1; + ++gtid_list; + --count; + } + return res; +} + + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +struct gtid_pos_element { uint64 sub_id; rpl_gtid gtid; void *hton; }; + +static int +scan_one_gtid_slave_pos_table(THD *thd, HASH *hash, DYNAMIC_ARRAY *array, + LEX_CSTRING *tablename, void **out_hton) +{ + TABLE_LIST tlist; + TABLE *UNINIT_VAR(table); + bool table_opened= false; + bool table_scanned= false; + struct gtid_pos_element tmp_entry, *entry; + int err= 0; + + thd->reset_for_next_command(); + tlist.init_one_table(&MYSQL_SCHEMA_NAME, tablename, NULL, TL_READ); + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0))) + goto end; + table_opened= true; + table= tlist.table; + + if ((err= gtid_check_rpl_slave_state_table(table))) + goto end; + + bitmap_set_all(table->read_set); + if (unlikely(err= table->file->ha_rnd_init_with_error(1))) + goto end; + + table_scanned= true; + for (;;) + { + uint32 domain_id, server_id; + uint64 sub_id, seq_no; + uchar *rec; + + if ((err= table->file->ha_rnd_next(table->record[0]))) + { + if (err == HA_ERR_END_OF_FILE) + break; + else + { + table->file->print_error(err, MYF(0)); + goto end; + } + } + domain_id= (uint32)table->field[0]->val_int(); + sub_id= (ulonglong)table->field[1]->val_int(); + server_id= (uint32)table->field[2]->val_int(); + seq_no= (ulonglong)table->field[3]->val_int(); + DBUG_PRINT("info", ("Read slave state row: %u-%u-%lu sub_id=%lu", + (unsigned)domain_id, (unsigned)server_id, + (ulong)seq_no, (ulong)sub_id)); + + tmp_entry.sub_id= sub_id; + tmp_entry.gtid.domain_id= domain_id; + tmp_entry.gtid.server_id= server_id; + tmp_entry.gtid.seq_no= seq_no; + tmp_entry.hton= table->s->db_type(); + if ((err= insert_dynamic(array, (uchar *)&tmp_entry))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto end; + } + + if ((rec= my_hash_search(hash, (const uchar *)&domain_id, + sizeof(domain_id)))) + { + entry= (struct gtid_pos_element *)rec; + if (entry->sub_id >= sub_id) + continue; + entry->sub_id= sub_id; + DBUG_ASSERT(entry->gtid.domain_id == domain_id); + entry->gtid.server_id= server_id; + entry->gtid.seq_no= seq_no; + entry->hton= table->s->db_type(); + } + else + { + if (!(entry= (struct gtid_pos_element *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(*entry), MYF(MY_WME)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*entry)); + err= 1; + goto end; + } + entry->sub_id= sub_id; + entry->gtid.domain_id= domain_id; + entry->gtid.server_id= server_id; + entry->gtid.seq_no= seq_no; + entry->hton= table->s->db_type(); + if ((err= my_hash_insert(hash, (uchar *)entry))) + { + my_free(entry); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto end; + } + } + } + err= 0; /* Clear HA_ERR_END_OF_FILE */ + +end: + if (table_scanned) + { + table->file->ha_index_or_rnd_end(); + ha_commit_trans(thd, FALSE); + trans_commit(thd); + } + if (table_opened) + { + *out_hton= table->s->db_type(); + close_thread_tables(thd); + thd->release_transactional_locks(); + } + return err; +} + + +/* + Look for all tables mysql.gtid_slave_pos*. Read all rows from each such + table found into ARRAY. For each domain id, put the row with highest sub_id + into HASH. +*/ +static int +scan_all_gtid_slave_pos_table(THD *thd, int (*cb)(THD *, LEX_CSTRING *, void *), + void *cb_data) +{ + char path[FN_REFLEN]; + MY_DIR *dirp; + + thd->reset_for_next_command(); + if (lock_schema_name(thd, MYSQL_SCHEMA_NAME.str)) + return 1; + + build_table_filename(path, sizeof(path) - 1, MYSQL_SCHEMA_NAME.str, "", "", 0); + if (!(dirp= my_dir(path, MYF(MY_DONT_SORT)))) + { + my_error(ER_FILE_NOT_FOUND, MYF(0), path, my_errno); + close_thread_tables(thd); + thd->release_transactional_locks(); + return 1; + } + else + { + size_t i; + Dynamic_array files(PSI_INSTRUMENT_MEM, + dirp->number_of_files); + Discovered_table_list tl(thd, &files); + int err; + + err= ha_discover_table_names(thd, &MYSQL_SCHEMA_NAME, dirp, &tl, false); + my_dirend(dirp); + close_thread_tables(thd); + thd->release_transactional_locks(); + if (err) + return err; + + for (i = 0; i < files.elements(); ++i) + { + if (strncmp(files.at(i)->str, + rpl_gtid_slave_state_table_name.str, + rpl_gtid_slave_state_table_name.length) == 0) + { + if ((err= (*cb)(thd, files.at(i), cb_data))) + return err; + } + } + } + + return 0; +} + + +struct load_gtid_state_cb_data { + HASH *hash; + DYNAMIC_ARRAY *array; + struct rpl_slave_state::gtid_pos_table *table_list; + struct rpl_slave_state::gtid_pos_table *default_entry; +}; + +static int +process_gtid_pos_table(THD *thd, LEX_CSTRING *table_name, void *hton, + struct load_gtid_state_cb_data *data) +{ + struct rpl_slave_state::gtid_pos_table *p, *entry, **next_ptr; + bool is_default= + (strcmp(table_name->str, rpl_gtid_slave_state_table_name.str) == 0); + + /* + Ignore tables with duplicate storage engine, with a warning. + Prefer the default mysql.gtid_slave_pos over another table + mysql.gtid_slave_posXXX with the same storage engine. + */ + next_ptr= &data->table_list; + entry= data->table_list; + while (entry) + { + if (entry->table_hton == hton) + { + static const char *warning_msg= "Ignoring redundant table mysql.%s " + "since mysql.%s has the same storage engine"; + if (!is_default) + { + /* Ignore the redundant table. */ + sql_print_warning(warning_msg, table_name->str, entry->table_name.str); + return 0; + } + else + { + sql_print_warning(warning_msg, entry->table_name.str, table_name->str); + /* Delete the redundant table, and proceed to add this one instead. */ + *next_ptr= entry->next; + my_free(entry); + break; + } + } + next_ptr= &entry->next; + entry= entry->next; + } + + p= rpl_global_gtid_slave_state->alloc_gtid_pos_table(table_name, + hton, rpl_slave_state::GTID_POS_AVAILABLE); + if (!p) + return 1; + p->next= data->table_list; + data->table_list= p; + if (is_default) + data->default_entry= p; + return 0; +} + + +/* + Put tables corresponding to @@gtid_pos_auto_engines at the end of the list, + marked to be auto-created if needed. +*/ +static int +gtid_pos_auto_create_tables(rpl_slave_state::gtid_pos_table **list_ptr) +{ + plugin_ref *auto_engines; + int err= 0; + mysql_mutex_lock(&LOCK_global_system_variables); + for (auto_engines= opt_gtid_pos_auto_plugins; + !err && auto_engines && *auto_engines; + ++auto_engines) + { + void *hton= plugin_hton(*auto_engines); + char buf[FN_REFLEN+1]; + LEX_CSTRING table_name; + char *p; + rpl_slave_state::gtid_pos_table *entry, **next_ptr; + + /* See if this engine is already in the list. */ + next_ptr= list_ptr; + entry= *list_ptr; + while (entry) + { + if (entry->table_hton == hton) + break; + next_ptr= &entry->next; + entry= entry->next; + } + if (entry) + continue; + + /* Add an auto-create entry for this engine at end of list. */ + p= strmake(buf, rpl_gtid_slave_state_table_name.str, FN_REFLEN); + p= strmake(p, "_", FN_REFLEN - (p - buf)); + p= strmake(p, plugin_name(*auto_engines)->str, FN_REFLEN - (p - buf)); + table_name.str= buf; + table_name.length= p - buf; + table_case_convert(const_cast(table_name.str), + static_cast(table_name.length)); + entry= rpl_global_gtid_slave_state->alloc_gtid_pos_table + (&table_name, hton, rpl_slave_state::GTID_POS_AUTO_CREATE); + if (!entry) + { + err= 1; + break; + } + *next_ptr= entry; + } + mysql_mutex_unlock(&LOCK_global_system_variables); + return err; +} + + +static int +load_gtid_state_cb(THD *thd, LEX_CSTRING *table_name, void *arg) +{ + int err; + load_gtid_state_cb_data *data= static_cast(arg); + void *hton; + + if ((err= scan_one_gtid_slave_pos_table(thd, data->hash, data->array, + table_name, &hton))) + return err; + return process_gtid_pos_table(thd, table_name, hton, data); +} + + +int +rpl_load_gtid_slave_state(THD *thd) +{ + bool array_inited= false; + struct gtid_pos_element tmp_entry, *entry; + HASH hash; + DYNAMIC_ARRAY array; + int err= 0; + uint32 i; + load_gtid_state_cb_data cb_data; + rpl_slave_state::list_element *old_gtids_list; + DBUG_ENTER("rpl_load_gtid_slave_state"); + + mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); + bool loaded= rpl_global_gtid_slave_state->loaded; + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + if (loaded) + DBUG_RETURN(0); + + cb_data.table_list= NULL; + cb_data.default_entry= NULL; + my_hash_init(PSI_INSTRUMENT_ME, &hash, &my_charset_bin, 32, + offsetof(gtid_pos_element, gtid) + offsetof(rpl_gtid, domain_id), + sizeof(uint32), NULL, my_free, HASH_UNIQUE); + if ((err= my_init_dynamic_array(PSI_INSTRUMENT_ME, &array, + sizeof(gtid_pos_element), 0, 0, MYF(0)))) + goto end; + array_inited= true; + + cb_data.hash = &hash; + cb_data.array = &array; + if ((err= scan_all_gtid_slave_pos_table(thd, load_gtid_state_cb, &cb_data))) + goto end; + + if (!cb_data.default_entry) + { + /* + If the mysql.gtid_slave_pos table does not exist, but at least one other + table is available, arbitrarily pick the first in the list to use as + default. + */ + cb_data.default_entry= cb_data.table_list; + } + if ((err= gtid_pos_auto_create_tables(&cb_data.table_list))) + goto end; + + mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); + if (rpl_global_gtid_slave_state->loaded) + { + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + goto end; + } + + if (!cb_data.table_list) + { + my_error(ER_NO_SUCH_TABLE, MYF(0), "mysql", + rpl_gtid_slave_state_table_name.str); + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + err= 1; + goto end; + } + + for (i= 0; i < array.elements; ++i) + { + get_dynamic(&array, (uchar *)&tmp_entry, i); + if ((err= rpl_global_gtid_slave_state->update_nolock(tmp_entry.gtid.domain_id, + tmp_entry.gtid.server_id, + tmp_entry.sub_id, + tmp_entry.gtid.seq_no, + tmp_entry.hton, + NULL))) + { + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto end; + } + } + + for (i= 0; i < hash.records; ++i) + { + entry= (struct gtid_pos_element *)my_hash_element(&hash, i); + if (opt_bin_log && + mysql_bin_log.bump_seq_no_counter_if_needed(entry->gtid.domain_id, + entry->gtid.seq_no)) + { + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto end; + } + } + + rpl_global_gtid_slave_state->set_gtid_pos_tables_list(cb_data.table_list, + cb_data.default_entry); + cb_data.table_list= NULL; + rpl_global_gtid_slave_state->loaded= true; + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + + /* Clear out no longer needed elements now. */ + old_gtids_list= + rpl_global_gtid_slave_state->gtid_grab_pending_delete_list(); + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &old_gtids_list); + if (old_gtids_list) + rpl_global_gtid_slave_state->put_back_list(old_gtids_list); + +end: + if (array_inited) + delete_dynamic(&array); + my_hash_free(&hash); + if (cb_data.table_list) + rpl_global_gtid_slave_state->free_gtid_pos_tables(cb_data.table_list); + DBUG_RETURN(err); +} + + +static int +find_gtid_pos_tables_cb(THD *thd, LEX_CSTRING *table_name, void *arg) +{ + load_gtid_state_cb_data *data= static_cast(arg); + TABLE_LIST tlist; + TABLE *table= NULL; + int err; + + thd->reset_for_next_command(); + tlist.init_one_table(&MYSQL_SCHEMA_NAME, table_name, NULL, TL_READ); + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0))) + goto end; + table= tlist.table; + + if ((err= gtid_check_rpl_slave_state_table(table))) + goto end; + err= process_gtid_pos_table(thd, table_name, table->s->db_type(), data); + +end: + if (table) + { + ha_commit_trans(thd, FALSE); + ha_commit_trans(thd, TRUE); + close_thread_tables(thd); + thd->release_transactional_locks(); + } + + return err; +} + + +/* + Re-compute the list of available mysql.gtid_slave_posXXX tables. + + This is done at START SLAVE to pick up any newly created tables without + requiring server restart. +*/ +int +find_gtid_slave_pos_tables(THD *thd) +{ + int err= 0; + load_gtid_state_cb_data cb_data; + uint num_running; + + mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); + bool loaded= rpl_global_gtid_slave_state->loaded; + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + if (!loaded) + return 0; + + cb_data.table_list= NULL; + cb_data.default_entry= NULL; + if ((err= scan_all_gtid_slave_pos_table(thd, find_gtid_pos_tables_cb, &cb_data))) + goto end; + + if (!cb_data.table_list) + { + my_error(ER_NO_SUCH_TABLE, MYF(0), "mysql", + rpl_gtid_slave_state_table_name.str); + err= 1; + goto end; + } + if (!cb_data.default_entry) + { + /* + If the mysql.gtid_slave_pos table does not exist, but at least one other + table is available, arbitrarily pick the first in the list to use as + default. + */ + cb_data.default_entry= cb_data.table_list; + } + if ((err= gtid_pos_auto_create_tables(&cb_data.table_list))) + goto end; + + mysql_mutex_lock(&LOCK_active_mi); + num_running= any_slave_sql_running(true); + mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); + if (num_running <= 1) + { + /* + If no slave is running now, the count will be 1, since this SQL thread + which is starting is included in the count. In this case, we can safely + replace the list, no-one can be trying to read it without lock. + */ + DBUG_ASSERT(num_running == 1); + rpl_global_gtid_slave_state->set_gtid_pos_tables_list(cb_data.table_list, + cb_data.default_entry); + cb_data.table_list= NULL; + } + else + { + /* + If there are SQL threads running, we cannot safely remove the old list. + However we can add new entries, and warn about any tables that + disappeared, but may still be visible to running SQL threads. + */ + rpl_slave_state::gtid_pos_table *new_entry, **next_ptr_ptr; + auto old_entry= rpl_global_gtid_slave_state-> + gtid_pos_tables.load(std::memory_order_relaxed); + while (old_entry) + { + new_entry= cb_data.table_list; + while (new_entry) + { + if (new_entry->table_hton == old_entry->table_hton) + break; + new_entry= new_entry->next; + } + if (!new_entry) + sql_print_warning("The table mysql.%s was removed. " + "This change will not take full effect " + "until all SQL threads have been restarted", + old_entry->table_name.str); + old_entry= old_entry->next; + } + next_ptr_ptr= &cb_data.table_list; + new_entry= cb_data.table_list; + while (new_entry) + { + /* Check if we already have a table with this storage engine. */ + old_entry= rpl_global_gtid_slave_state-> + gtid_pos_tables.load(std::memory_order_relaxed); + while (old_entry) + { + if (new_entry->table_hton == old_entry->table_hton) + break; + old_entry= old_entry->next; + } + if (old_entry) + { + /* This new_entry is already available in the list. */ + next_ptr_ptr= &new_entry->next; + new_entry= new_entry->next; + } + else + { + /* Move this new_entry to the list. */ + rpl_slave_state::gtid_pos_table *next= new_entry->next; + rpl_global_gtid_slave_state->add_gtid_pos_table(new_entry); + *next_ptr_ptr= next; + new_entry= next; + } + } + } + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + mysql_mutex_unlock(&LOCK_active_mi); + +end: + if (cb_data.table_list) + rpl_global_gtid_slave_state->free_gtid_pos_tables(cb_data.table_list); + return err; +} + + +void +rpl_group_info::reinit(Relay_log_info *rli) +{ + this->rli= rli; + tables_to_lock= NULL; + tables_to_lock_count= 0; + trans_retries= 0; + last_event_start_time= 0; + gtid_sub_id= 0; + commit_id= 0; + gtid_pending= false; + worker_error= 0; + row_stmt_start_timestamp= 0; + long_find_row_note_printed= false; + did_mark_start_commit= false; + gtid_ev_flags2= 0; + gtid_ev_flags_extra= 0; + gtid_ev_sa_seq_no= 0; + last_master_timestamp = 0; + gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL; + speculation= SPECULATE_NO; + rpt= NULL; + start_alter_ev= NULL; + direct_commit_alter= false; + commit_orderer.reinit(); +} + +rpl_group_info::rpl_group_info(Relay_log_info *rli) + : thd(0), wait_commit_sub_id(0), + wait_commit_group_info(0), parallel_entry(0), + deferred_events(NULL), m_annotate_event(0), is_parallel_exec(false), + gtid_ev_flags2(0), gtid_ev_flags_extra(0), gtid_ev_sa_seq_no(0), + reserved_start_alter_thread(0), finish_event_group_called(0), rpt(NULL), + start_alter_ev(NULL), direct_commit_alter(false), sa_info(NULL) +{ + reinit(rli); + bzero(¤t_gtid, sizeof(current_gtid)); + mysql_mutex_init(key_rpl_group_info_sleep_lock, &sleep_lock, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_rpl_group_info_sleep_cond, &sleep_cond, NULL); +} + +rpl_group_info::~rpl_group_info() +{ + free_annotate_event(); + delete deferred_events; + mysql_mutex_destroy(&sleep_lock); + mysql_cond_destroy(&sleep_cond); +} + + +int +event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) +{ + uint64 sub_id= rpl_global_gtid_slave_state->next_sub_id(gev->domain_id); + if (!sub_id) + { + /* Out of memory caused hash insertion to fail. */ + return 1; + } + rgi->gtid_sub_id= sub_id; + rgi->current_gtid.domain_id= gev->domain_id; + rgi->current_gtid.server_id= gev->server_id; + rgi->current_gtid.seq_no= gev->seq_no; + rgi->commit_id= gev->commit_id; + rgi->gtid_pending= true; + rgi->sa_info= NULL; + return 0; +} + + +void +delete_or_keep_event_post_apply(rpl_group_info *rgi, + Log_event_type typ, Log_event *ev) +{ + /* + ToDo: This needs to work on rpl_group_info, not Relay_log_info, to be + thread-safe for parallel replication. + */ + + switch (typ) { + case FORMAT_DESCRIPTION_EVENT: + /* + Format_description_log_event should not be deleted because it + will be used to read info about the relay log's format; + it will be deleted when the SQL thread does not need it, + i.e. when this thread terminates. + */ + break; + case ANNOTATE_ROWS_EVENT: + /* + Annotate_rows event should not be deleted because after it has + been applied, thd->query points to the string inside this event. + The thd->query will be used to generate new Annotate_rows event + during applying the subsequent Rows events. + */ + rgi->set_annotate_event((Annotate_rows_log_event*) ev); + break; + case DELETE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT: + case UPDATE_ROWS_EVENT: + case WRITE_ROWS_EVENT: + case WRITE_ROWS_COMPRESSED_EVENT: + case DELETE_ROWS_COMPRESSED_EVENT: + case UPDATE_ROWS_COMPRESSED_EVENT: + case WRITE_ROWS_COMPRESSED_EVENT_V1: + case UPDATE_ROWS_COMPRESSED_EVENT_V1: + case DELETE_ROWS_COMPRESSED_EVENT_V1: + /* + After the last Rows event has been applied, the saved Annotate_rows + event (if any) is not needed anymore and can be deleted. + */ + if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F)) + rgi->free_annotate_event(); + /* fall through */ + default: + DBUG_PRINT("info", ("Deleting the event after it has been executed")); + if (!rgi->is_deferred_event(ev)) + delete ev; + break; + } +} + + +void rpl_group_info::cleanup_context(THD *thd, bool error) +{ + DBUG_ENTER("rpl_group_info::cleanup_context"); + DBUG_PRINT("enter", ("error: %d", (int) error)); + + DBUG_ASSERT(this->thd == thd); + /* + 1) Instances of Table_map_log_event, if ::do_apply_event() was called on them, + may have opened tables, which we cannot be sure have been closed (because + maybe the Rows_log_event have not been found or will not be, because slave + SQL thread is stopping, or relay log has a missing tail etc). So we close + all thread's tables. And so the table mappings have to be cancelled. + 2) Rows_log_event::do_apply_event() may even have started statements or + transactions on them, which we need to rollback in case of error. + 3) If finding a Format_description_log_event after a BEGIN, we also need + to rollback before continuing with the next events. + 4) so we need this "context cleanup" function. + */ + if (unlikely(error)) + { + trans_rollback_stmt(thd); // if a "statement transaction" + /* trans_rollback() also resets OPTION_GTID_BEGIN */ + trans_rollback(thd); // if a "real transaction" + /* + Now that we have rolled back the transaction, make sure we do not + erroneously update the GTID position. + */ + gtid_pending= false; + } + m_table_map.clear_tables(); + slave_close_thread_tables(thd); + + if (unlikely(error)) + { + // leave alone any XA prepared transactions + if (thd->transaction->xid_state.is_explicit_XA() && + thd->transaction->xid_state.get_state_code() != XA_PREPARED) + xa_trans_force_rollback(thd); + + thd->release_transactional_locks(); + + if (thd == rli->sql_driver_thd) + { + /* + Reset flags. This is needed to handle incident events and errors in + the relay log noticed by the sql driver thread. + */ + rli->clear_flag(Relay_log_info::IN_STMT); + rli->clear_flag(Relay_log_info::IN_TRANSACTION); + } + + /* + Ensure we always release the domain for others to process, when using + --gtid-ignore-duplicates. + */ + if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL) + rpl_global_gtid_slave_state->release_domain_owner(this); + } + + /* + Cleanup for the flags that have been set at do_apply_event. + */ + thd->variables.option_bits&= ~(OPTION_NO_FOREIGN_KEY_CHECKS | + OPTION_RELAXED_UNIQUE_CHECKS | + OPTION_NO_CHECK_CONSTRAINT_CHECKS); + + /* + Reset state related to long_find_row notes in the error log: + - timestamp + - flag that decides whether the slave prints or not + */ + reset_row_stmt_start_timestamp(); + unset_long_find_row_note_printed(); + + DBUG_EXECUTE_IF("inject_sleep_gtid_100_x_x", { + if (current_gtid.domain_id == 100) + my_sleep(50000); + };); + + DBUG_VOID_RETURN; +} + + +void rpl_group_info::clear_tables_to_lock() +{ + DBUG_ENTER("rpl_group_info::clear_tables_to_lock()"); +#ifndef DBUG_OFF + /** + When replicating in RBR and MyISAM Merge tables are involved + open_and_lock_tables (called in do_apply_event) appends the + base tables to the list of tables_to_lock. Then these are + removed from the list in close_thread_tables (which is called + before we reach this point). + + This assertion just confirms that we get no surprises at this + point. + */ + uint i=0; + for (TABLE_LIST *ptr= tables_to_lock ; ptr ; ptr= ptr->next_global, i++) ; + DBUG_ASSERT(i == tables_to_lock_count); +#endif + + while (tables_to_lock) + { + uchar* to_free= reinterpret_cast(tables_to_lock); + if (tables_to_lock->m_tabledef_valid) + { + tables_to_lock->m_tabledef.table_def::~table_def(); + tables_to_lock->m_tabledef_valid= FALSE; + } + + /* + If blob fields were used during conversion of field values + from the master table into the slave table, then we need to + free the memory used temporarily to store their values before + copying into the slave's table. + */ + if (tables_to_lock->m_conv_table) + free_blobs(tables_to_lock->m_conv_table); + + tables_to_lock= + static_cast(tables_to_lock->next_global); + tables_to_lock_count--; + my_free(to_free); + } + DBUG_ASSERT(tables_to_lock == NULL && tables_to_lock_count == 0); + DBUG_VOID_RETURN; +} + + +void rpl_group_info::slave_close_thread_tables(THD *thd) +{ + DBUG_ENTER("rpl_group_info::slave_close_thread_tables(THD *thd)"); + thd->get_stmt_da()->set_overwrite_status(true); +#ifdef WITH_WSREP + // This can happen e.g. when table_def::compatible_with fails and sets a error + // but thd->is_error() is false then. However, we do not want to commit + // statement on Galera instead we want to rollback it as later in + // apply_write_set we rollback transaction and that can't be done + // after wsrep transaction state is s_committed. + if (WSREP(thd)) + (thd->is_error() || thd->is_slave_error) ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); + else +#endif + thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); + thd->get_stmt_da()->set_overwrite_status(false); + + close_thread_tables(thd); + /* + - If transaction rollback was requested due to deadlock + perform it and release metadata locks. + - If inside a multi-statement transaction, + defer the release of metadata locks until the current + transaction is either committed or rolled back. This prevents + other statements from modifying the table for the entire + duration of this transaction. This provides commit ordering + and guarantees serializability across multiple transactions. + - If in autocommit mode, or outside a transactional context, + automatically release metadata locks of the current statement. + */ + if (thd->transaction_rollback_request) + { + trans_rollback_implicit(thd); + thd->release_transactional_locks(); + } + else if (! thd->in_multi_stmt_transaction_mode()) + thd->release_transactional_locks(); + else + thd->mdl_context.release_statement_locks(); + + clear_tables_to_lock(); + DBUG_VOID_RETURN; +} + + + +static void +mark_start_commit_inner(rpl_parallel_entry *e, group_commit_orderer *gco, + rpl_group_info *rgi) +{ + group_commit_orderer *tmp; + uint64 count= ++e->count_committing_event_groups; + /* Signal any following GCO whose wait_count has been reached now. */ + tmp= gco; + + DBUG_ASSERT(!tmp->gc_done); + + while ((tmp= tmp->next_gco)) + { + DBUG_ASSERT(!tmp->gc_done); + + uint64 wait_count= tmp->wait_count; + if (wait_count > count) + break; + mysql_cond_broadcast(&tmp->COND_group_commit_orderer); + } +} + + +void +rpl_group_info::mark_start_commit_no_lock() +{ + if (did_mark_start_commit) + return; + did_mark_start_commit= true; + mark_start_commit_inner(parallel_entry, gco, this); +} + + +void +rpl_group_info::mark_start_commit() +{ + rpl_parallel_entry *e; + + if (did_mark_start_commit) + return; + did_mark_start_commit= true; + + e= this->parallel_entry; + mysql_mutex_lock(&e->LOCK_parallel_entry); + mark_start_commit_inner(e, gco, this); + mysql_mutex_unlock(&e->LOCK_parallel_entry); +} + + +/* + Format the current GTID as a string suitable for printing in error messages. + + The string is stored in a buffer inside rpl_group_info, so remains valid + until next call to gtid_info() or until destruction of rpl_group_info. + + If no GTID is available, then NULL is returned. +*/ +char * +rpl_group_info::gtid_info() +{ + if (!gtid_sub_id || !current_gtid.seq_no) + return NULL; + my_snprintf(gtid_info_buf, sizeof(gtid_info_buf), "Gtid %u-%u-%llu", + current_gtid.domain_id, current_gtid.server_id, + current_gtid.seq_no); + return gtid_info_buf; +} + + +/* + Undo the effect of a prior mark_start_commit(). + + This is only used for retrying a transaction in parallel replication, after + we have encountered a deadlock or other temporary error. + + When we get such a deadlock, it means that the current group of transactions + did not yet all start committing (else they would not have deadlocked). So + we will not yet have woken up anything in the next group, our rgi->gco is + still live, and we can simply decrement the counter (to be incremented again + later, when the retry succeeds and reaches the commit step). +*/ +void +rpl_group_info::unmark_start_commit() +{ + rpl_parallel_entry *e; + + if (!did_mark_start_commit) + return; + did_mark_start_commit= false; + + e= this->parallel_entry; + mysql_mutex_lock(&e->LOCK_parallel_entry); + --e->count_committing_event_groups; + mysql_mutex_unlock(&e->LOCK_parallel_entry); +} + + +rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter) + : rpl_filter(filter) +{ + cached_charset_invalidate(); +} + + +void rpl_sql_thread_info::cached_charset_invalidate() +{ + DBUG_ENTER("rpl_group_info::cached_charset_invalidate"); + + /* Full of zeroes means uninitialized. */ + bzero(cached_charset, sizeof(cached_charset)); + DBUG_VOID_RETURN; +} + + +bool rpl_sql_thread_info::cached_charset_compare(char *charset) const +{ + DBUG_ENTER("rpl_group_info::cached_charset_compare"); + + if (memcmp(cached_charset, charset, sizeof(cached_charset))) + { + memcpy(const_cast(cached_charset), charset, sizeof(cached_charset)); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/** + Store the file and position where the slave's SQL thread are in the + relay log. + + Notes: + + - This function should be called either from the slave SQL thread, + or when the slave thread is not running. (It reads the + group_{relay|master}_log_{pos|name} and delay fields in the rli + object. These may only be modified by the slave SQL thread or by + a client thread when the slave SQL thread is not running.) + + - If there is an active transaction, then we do not update the + position in the relay log. This is to ensure that we re-execute + statements if we die in the middle of an transaction that was + rolled back. + + - As a transaction never spans binary logs, we don't have to handle + the case where we do a relay-log-rotation in the middle of the + transaction. If transactions could span several binlogs, we would + have to ensure that we do not delete the relay log file where the + transaction started before switching to a new relay log file. + + - Error can happen if writing to file fails or if flushing the file + fails. + + @param rli The object representing the Relay_log_info. + + @todo Change the log file information to a binary format to avoid + calling longlong2str. + + @return 0 on success, 1 on error. +*/ +bool Relay_log_info::flush() +{ + bool error=0; + + DBUG_ENTER("Relay_log_info::flush()"); + + IO_CACHE *file = &info_file; + // 2*file name, 2*long long, 2*unsigned long, 6*'\n' + char buff[FN_REFLEN * 2 + 22 * 2 + 10 * 2 + 6], *pos; + my_b_seek(file, 0L); + pos= longlong10_to_str(LINES_IN_RELAY_LOG_INFO_WITH_DELAY, buff, 10); + *pos++='\n'; + pos=strmov(pos, group_relay_log_name); + *pos++='\n'; + pos=longlong10_to_str(group_relay_log_pos, pos, 10); + *pos++='\n'; + pos=strmov(pos, group_master_log_name); + *pos++='\n'; + pos=longlong10_to_str(group_master_log_pos, pos, 10); + *pos++='\n'; + pos= longlong10_to_str(sql_delay, pos, 10); + *pos++= '\n'; + if (my_b_write(file, (uchar*) buff, (size_t) (pos-buff))) + error=1; + if (flush_io_cache(file)) + error=1; + if (sync_relayloginfo_period && + !error && + ++sync_counter >= sync_relayloginfo_period) + { + if (my_sync(info_fd, MYF(MY_WME))) + error=1; + sync_counter= 0; + } + /* + Flushing the relay log is done by the slave I/O thread + or by the user on STOP SLAVE. + */ + DBUG_RETURN(error); +} + +#endif diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h new file mode 100644 index 00000000..0fd90704 --- /dev/null +++ b/sql/rpl_rli.h @@ -0,0 +1,1054 @@ +/* Copyright (c) 2005, 2017, Oracle and/or its affiliates. + Copyright (c) 2009, 2017, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_RLI_H +#define RPL_RLI_H + +#include "rpl_tblmap.h" +#include "rpl_reporting.h" +#include "rpl_utility.h" +#include "log.h" /* LOG_INFO, MYSQL_BIN_LOG */ +#include "sql_class.h" /* THD */ +#include "log_event.h" +#include "rpl_parallel.h" + +struct RPL_TABLE_LIST; +class Master_info; +class Rpl_filter; + + +/**************************************************************************** + + Replication SQL Thread + + Relay_log_info contains: + - the current relay log + - the current relay log offset + - master log name + - master log sequence corresponding to the last update + - misc information specific to the SQL thread + + Relay_log_info is initialized from the slave.info file if such + exists. Otherwise, data members are intialized with defaults. The + initialization is done with Relay_log_info::init() call. + + The format of slave.info file: + + relay_log_name + relay_log_pos + master_log_name + master_log_pos + + To clean up, call end_relay_log_info() + +*****************************************************************************/ + +struct rpl_group_info; +struct inuse_relaylog; + +class Relay_log_info : public Slave_reporting_capability +{ +public: + /** + Flags for the state of reading the relay log. Note that these are + bit masks. + */ + enum enum_state_flag { + /** We are inside a group of events forming a statement */ + IN_STMT=1, + /** We have inside a transaction */ + IN_TRANSACTION=2 + }; + + /* + The SQL thread owns one Relay_log_info, and each client that has + executed a BINLOG statement owns one Relay_log_info. This function + returns zero for the Relay_log_info object that belongs to the SQL + thread and nonzero for Relay_log_info objects that belong to + clients. + */ + inline bool belongs_to_client() + { + DBUG_ASSERT(sql_driver_thd); + return !sql_driver_thd->slave_thread; + } + + /* + If true, events with the same server id should be replicated. This + field is set on creation of a relay log info structure by copying + the value of ::replicate_same_server_id and can be overridden if + necessary. For example of when this is done, check sql_binlog.cc, + where the BINLOG statement can be used to execute "raw" events. + */ + bool replicate_same_server_id; + + /*** The following variables can only be read when protect by data lock ****/ + + /* + info_fd - file descriptor of the info file. set only during + initialization or clean up - safe to read anytime + cur_log_fd - file descriptor of the current read relay log + */ + File info_fd,cur_log_fd; + + /* + Protected with internal locks. + Must get data_lock when resetting the logs. + */ + MYSQL_BIN_LOG relay_log; + LOG_INFO linfo; + + /* + cur_log + Pointer that either points at relay_log.get_log_file() or + &rli->cache_buf, depending on whether the log is hot or there was + the need to open a cold relay_log. + + cache_buf + IO_CACHE used when opening cold relay logs. + */ + IO_CACHE cache_buf,*cur_log; + + /* + Keeps track of the number of transactions that commits + before fsyncing. The option --sync-relay-log-info determines + how many transactions should commit before fsyncing. + */ + uint sync_counter; + + /* + Identifies when the recovery process is going on. + See sql/slave.cc:init_recovery for further details. + */ + bool is_relay_log_recovery; + + /* The following variables are safe to read any time */ + + /* IO_CACHE of the info file - set only during init or end */ + IO_CACHE info_file; + + /* + List of temporary tables used by this connection. + This is updated when a temporary table is created or dropped by + a replication thread. + + Not reset when replication ends, to allow one to access the tables + when replication restarts. + + Protected by data_lock. + */ + All_tmp_tables_list *save_temporary_tables; + + /* + standard lock acquisition order to avoid deadlocks: + run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index + */ + mysql_mutex_t data_lock, run_lock; + /* + start_cond is broadcast when SQL thread is started + stop_cond - when stopped + data_cond - when data protected by data_lock changes + */ + mysql_cond_t start_cond, stop_cond, data_cond; + /* parent Master_info structure */ + Master_info *mi; + + /* + List of active relay log files. + (This can be more than one in case of parallel replication). + */ + inuse_relaylog *inuse_relaylog_list; + inuse_relaylog *last_inuse_relaylog; + + /* + Needed to deal properly with cur_log getting closed and re-opened with + a different log under our feet + */ + uint32 cur_log_old_open_count; + + /* + If on init_info() call error_on_rli_init_info is true that means + that previous call to init_info() terminated with an error, RESET + SLAVE must be executed and the problem fixed manually. + */ + bool error_on_rli_init_info; + + /* + Let's call a group (of events) : + - a transaction + or + - an autocommiting query + its associated events (INSERT_ID, + TIMESTAMP...) + We need these rli coordinates : + - relay log name and position of the beginning of the group we currently + are executing. Needed to know where we have to restart when replication has + stopped in the middle of a group (which has been rolled back by the slave). + - relay log name and position just after the event we have just + executed. This event is part of the current group. + Formerly we only had the immediately above coordinates, plus a 'pending' + variable, but this dealt wrong with the case of a transaction starting on a + relay log and finishing (commiting) on another relay log. Case which can + happen when, for example, the relay log gets rotated because of + max_binlog_size. + + Note: group_relay_log_name, group_relay_log_pos must only be + written from the thread owning the Relay_log_info (SQL thread if + !belongs_to_client(); client thread executing BINLOG statement if + belongs_to_client()). + */ + char group_relay_log_name[FN_REFLEN]; + ulonglong group_relay_log_pos; + char event_relay_log_name[FN_REFLEN]; + ulonglong event_relay_log_pos; + ulonglong future_event_relay_log_pos; + /* + The master log name for current event. Only used in parallel replication. + */ + char future_event_master_log_name[FN_REFLEN]; + + /* + Original log name and position of the group we're currently executing + (whose coordinates are group_relay_log_name/pos in the relay log) + in the master's binlog. These concern the *group*, because in the master's + binlog the log_pos that comes with each event is the position of the + beginning of the group. + + Note: group_master_log_name, group_master_log_pos must only be + written from the thread owning the Relay_log_info (SQL thread if + !belongs_to_client(); client thread executing BINLOG statement if + belongs_to_client()). + */ + char group_master_log_name[FN_REFLEN]; + volatile my_off_t group_master_log_pos; + + /* + Handling of the relay_log_space_limit optional constraint. + ignore_log_space_limit is used to resolve a deadlock between I/O and SQL + threads, the SQL thread sets it to unblock the I/O thread and make it + temporarily forget about the constraint. + */ + ulonglong log_space_limit; + Atomic_counter log_space_total; + bool ignore_log_space_limit; + + /* + Used by the SQL thread to instructs the IO thread to rotate + the logs when the SQL thread needs to purge to release some + disk space. + */ + bool sql_force_rotate_relay; + + time_t last_master_timestamp; + /* + The SQL driver thread sets this true while it is waiting at the end of the + relay log for more events to arrive. SHOW SLAVE STATUS uses this to report + Seconds_Behind_Master as zero while the SQL thread is so waiting. + */ + bool sql_thread_caught_up; + + void clear_until_condition(); + /** + Reset the delay. + This is used by RESET SLAVE to clear the delay. + */ + void clear_sql_delay() + { + sql_delay= 0; + } + + + /* + Needed for problems when slave stops and we want to restart it + skipping one or more events in the master log that have caused + errors, and have been manually applied by DBA already. + Must be ulong as it's referred to from set_var.cc + */ + volatile ulonglong slave_skip_counter; + ulonglong max_relay_log_size; + + volatile ulong abort_pos_wait; /* Incremented on change master */ + volatile ulong slave_run_id; /* Incremented on slave start */ + mysql_mutex_t log_space_lock; + mysql_cond_t log_space_cond; + /* + THD for the main sql thread, the one that starts threads to process + slave requests. If there is only one thread, then this THD is also + used for SQL processing. + A kill sent to this THD will kill the replication. + */ + THD *sql_driver_thd; +#ifndef DBUG_OFF + int events_till_abort; +#endif + + enum_gtid_skip_type gtid_skip_flag; + + /* + inited changes its value within LOCK_active_mi-guarded critical + sections at times of start_slave_threads() (0->1) and end_slave() (1->0). + Readers may not acquire the mutex while they realize potential concurrency + issue. + If not set, the value of other members of the structure are undefined. + */ + volatile bool inited; + volatile bool abort_slave; + volatile bool stop_for_until; + volatile uint slave_running; + + /* + Condition and its parameters from START SLAVE UNTIL clause. + + UNTIL condition is tested with is_until_satisfied() method that is + called by exec_relay_log_event(). is_until_satisfied() caches the result + of the comparison of log names because log names don't change very often; + this cache is invalidated by parts of code which change log names with + notify_*_log_name_updated() methods. (They need to be called only if SQL + thread is running). + */ + + enum { + UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS, UNTIL_GTID + } until_condition; + char until_log_name[FN_REFLEN]; + ulonglong until_log_pos; + /* extension extracted from log_name and converted to int */ + ulong until_log_name_extension; + /* + Cached result of comparison of until_log_name and current log name + -2 means unitialised, -1,0,1 are comarison results + */ + enum + { + UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, + UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 + } until_log_names_cmp_result; + /* Condition for UNTIL master_gtid_pos. */ + slave_connection_state until_gtid_pos; + + /* + retried_trans is a cumulative counter: how many times the slave + has retried a transaction (any) since slave started. + Protected by data_lock. + */ + ulong retried_trans; + /* + Number of executed events for SLAVE STATUS. + Protected by slave_executed_entries_lock + */ + Atomic_counter executed_entries; + + /* + If the end of the hot relay log is made of master's events ignored by the + slave I/O thread, these two keep track of the coords (in the master's + binlog) of the last of these events seen by the slave I/O thread. If not, + ign_master_log_name_end[0] == 0. + As they are like a Rotate event read/written from/to the relay log, they + are both protected by rli->relay_log.LOCK_log. + */ + char ign_master_log_name_end[FN_REFLEN]; + ulonglong ign_master_log_pos_end; + /* Similar for ignored GTID events. */ + slave_connection_state ign_gtids; + + /* + Indentifies where the SQL Thread should create temporary files for the + LOAD DATA INFILE. This is used for security reasons. + */ + char slave_patternload_file[FN_REFLEN]; + size_t slave_patternload_file_size; + + rpl_parallel parallel; + /* + The relay_log_state keeps track of the current binlog state of the + execution of the relay log. This is used to know where to resume + current GTID position if the slave thread is stopped and + restarted. It is only accessed from the SQL thread, so it does + not need any locking. + */ + rpl_binlog_state relay_log_state; + /* + The restart_gtid_state is used when the SQL thread restarts on a relay log + in GTID mode. In multi-domain parallel replication, each domain may have a + separat position, so some events in more progressed domains may need to be + skipped. This keeps track of the domains that have not yet reached their + starting event. + */ + slave_connection_state restart_gtid_pos; + + Relay_log_info(bool is_slave_recovery, const char* thread_name= "SQL"); + ~Relay_log_info(); + + /* + Invalidate cached until_log_name and group_relay_log_name comparison + result. Should be called after any update of group_realy_log_name if + there chances that sql_thread is running. + */ + inline void notify_group_relay_log_name_update() + { + if (until_condition==UNTIL_RELAY_POS) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + /* + The same as previous but for group_master_log_name. + */ + inline void notify_group_master_log_name_update() + { + if (until_condition==UNTIL_MASTER_POS) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + void inc_group_relay_log_pos(ulonglong log_pos, + rpl_group_info *rgi, + bool skip_lock=0); + + int wait_for_pos(THD* thd, String* log_name, longlong log_pos, + longlong timeout); + void close_temporary_tables(); + + /* Check if UNTIL condition is satisfied. See slave.cc for more. */ + bool is_until_satisfied(Log_event *ev); + inline ulonglong until_pos() + { + DBUG_ASSERT(until_condition == UNTIL_MASTER_POS || + until_condition == UNTIL_RELAY_POS); + return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos : + group_relay_log_pos); + } + inline char *until_name() + { + DBUG_ASSERT(until_condition == UNTIL_MASTER_POS || + until_condition == UNTIL_RELAY_POS); + return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_name : + group_relay_log_name); + } + /** + Helper function to do after statement completion. + + This function is called from an event to complete the group by + either stepping the group position, if the "statement" is not + inside a transaction; or increase the event position, if the + "statement" is inside a transaction. + + @param event_log_pos + Master log position of the event. The position is recorded in the + relay log info and used to produce information for SHOW + SLAVE STATUS. + */ + bool stmt_done(my_off_t event_log_pos, THD *thd, rpl_group_info *rgi); + int alloc_inuse_relaylog(const char *name); + void free_inuse_relaylog(inuse_relaylog *ir); + void reset_inuse_relaylog(); + int update_relay_log_state(rpl_gtid *gtid_list, uint32 count); + + /** + Is the replication inside a group? + + The reader of the relay log is inside a group if either: + - The IN_TRANSACTION flag is set, meaning we're inside a transaction + - The IN_STMT flag is set, meaning we have read at least one row from + a multi-event entry. + + This flag reflects the state of the log 'just now', ie after the last + read event would be executed. + This allow us to test if we can stop replication before reading + the next entry. + + @retval true Replication thread is currently inside a group + @retval false Replication thread is currently not inside a group + */ + bool is_in_group() const { + return (m_flags & (IN_STMT | IN_TRANSACTION)); + } + + /** + Set the value of a replication state flag. + + @param flag Flag to set + */ + void set_flag(enum_state_flag flag) + { + m_flags|= flag; + } + + /** + Get the value of a replication state flag. + + @param flag Flag to get value of + + @return @c true if the flag was set, @c false otherwise. + */ + bool get_flag(enum_state_flag flag) + { + return m_flags & flag; + } + + /** + Clear the value of a replication state flag. + + @param flag Flag to clear + */ + void clear_flag(enum_state_flag flag) + { + m_flags&= ~flag; + } + + /** + Text used in THD::proc_info when the slave SQL thread is delaying. + */ + static const char *const state_delaying_string; + + bool flush(); + + /** + Reads the relay_log.info file. + */ + int init(const char* info_filename); + + /** + Indicate that a delay starts. + + This does not actually sleep; it only sets the state of this + Relay_log_info object to delaying so that the correct state can be + reported by SHOW SLAVE STATUS and SHOW PROCESSLIST. + + Requires rli->data_lock. + + @param delay_end The time when the delay shall end. + */ + void start_sql_delay(time_t delay_end) + { + mysql_mutex_assert_owner(&data_lock); + sql_delay_end= delay_end; + thd_proc_info(sql_driver_thd, state_delaying_string); + } + + int32 get_sql_delay() { return sql_delay; } + void set_sql_delay(int32 _sql_delay) { sql_delay= _sql_delay; } + time_t get_sql_delay_end() { return sql_delay_end; } + rpl_gtid last_seen_gtid; + ulong last_trans_retry_count; +private: + + + /** + Delay slave SQL thread by this amount, compared to master (in + seconds). This is set with CHANGE MASTER TO MASTER_DELAY=X. + + Guarded by data_lock. Initialized by the client thread executing + START SLAVE. Written by client threads executing CHANGE MASTER TO + MASTER_DELAY=X. Read by SQL thread and by client threads + executing SHOW SLAVE STATUS. Note: must not be written while the + slave SQL thread is running, since the SQL thread reads it without + a lock when executing Relay_log_info::flush(). + */ + int sql_delay; + + /** + During a delay, specifies the point in time when the delay ends. + + This is used for the SQL_Remaining_Delay column in SHOW SLAVE STATUS. + + Guarded by data_lock. Written by the sql thread. Read by client + threads executing SHOW SLAVE STATUS. + */ + time_t sql_delay_end; + + /* + Before the MASTER_DELAY parameter was added (WL#344), + relay_log.info had 4 lines. Now it has 5 lines. + */ + static const int LINES_IN_RELAY_LOG_INFO_WITH_DELAY= 5; + /* + Hint for when to stop event distribution by sql driver thread. + The flag is set ON by a non-group event when this event is in the middle + of a group (e.g a transaction group) so it's too early + to refresh the current-relay-log vs until-log cached comparison result. + And it is checked and to decide whether it's a right time to do so + when the being processed group has been fully scheduled. + */ + bool until_relay_log_names_defer; + + /* + Holds the state of the data in the relay log. + We need this to ensure that we are not in the middle of a + statement or inside BEGIN ... COMMIT when should rotate the + relay log. + */ + uint32 m_flags; +}; + + +/* + In parallel replication, if we need to re-try a transaction due to a + deadlock or other temporary error, we may need to go back and re-read events + out of an earlier relay log. + + This structure keeps track of the relaylogs that are potentially in use. + Each rpl_group_info has a pointer to one of those, corresponding to the + first GTID event. + + A pair of reference count keeps track of how long a relay log is potentially + in use. When the `completed' flag is set, all events have been read out of + the relay log, but the log might still be needed for retry in worker + threads. As worker threads complete an event group, they increment + atomically the `dequeued_count' with number of events queued. Thus, when + completed is set and dequeued_count equals queued_count, the relay log file + is finally done with and can be purged. + + By separating the queued and dequeued count, only the dequeued_count needs + multi-thread synchronisation; the completed flag and queued_count fields + are only accessed by the SQL driver thread and need no synchronisation. +*/ +struct inuse_relaylog { + inuse_relaylog *next; + Relay_log_info *rli; + /* + relay_log_state holds the binlog state corresponding to the start of this + relay log file. It is an array with relay_log_state_count elements. + */ + rpl_gtid *relay_log_state; + uint32 relay_log_state_count; + /* Number of events in this relay log queued for worker threads. */ + int64 queued_count; + /* Number of events completed by worker threads. */ + Atomic_counter dequeued_count; + /* Set when all events have been read from a relaylog. */ + bool completed; + char name[FN_REFLEN]; + + inuse_relaylog(Relay_log_info *rli_arg, rpl_gtid *relay_log_state_arg, + uint32 relay_log_state_count_arg, + const char *name_arg): + next(0), rli(rli_arg), relay_log_state(relay_log_state_arg), + relay_log_state_count(relay_log_state_count_arg), queued_count(0), + dequeued_count(0), completed(false) + { + strmake_buf(name, name_arg); + } +}; + +enum start_alter_state +{ + INVALID= 0, + REGISTERED, // Start Alter exist, Default state + COMMIT_ALTER, // COMMIT the alter + ROLLBACK_ALTER, // Rollback the alter + COMPLETED // COMMIT/ROLLBACK Alter written in binlog +}; + +struct start_alter_info +{ + /* + ALTER id is defined as a pair of GTID's seq_no and domain_id. + */ + decltype(rpl_gtid::seq_no) sa_seq_no; // key for searching (SA's id) + uint32 domain_id; + bool direct_commit_alter; // when true CA thread executes the whole query + /* + 0 prepared and not error from commit and rollback + >0 error expected in commit/rollback + Rollback can be logged with 0 error if master is killed + */ + uint error; + enum start_alter_state state; + /* We are not using mysql_cond_t because we do not need PSI */ + mysql_cond_t start_alter_cond; +}; + +/* + This is data for various state needed to be kept for the processing of + one event group (transaction) during replication. + + In single-threaded replication, there will be one global rpl_group_info and + one global Relay_log_info per master connection. They will be linked + together. + + In parallel replication, there will be one rpl_group_info object for + each running sql thread, each having their own thd. + + All rpl_group_info will share the same Relay_log_info. +*/ + +struct rpl_group_info +{ + rpl_group_info *next; /* For free list in rpl_parallel_thread */ + Relay_log_info *rli; + THD *thd; + /* + Current GTID being processed. + The sub_id gives the binlog order within one domain_id. A zero sub_id + means that there is no active GTID. + */ + uint64 gtid_sub_id; + rpl_gtid current_gtid; + /* Currently applied event or NULL */ + Log_event *current_event; + uint64 commit_id; + /* + This is used to keep transaction commit order. + We will signal this when we commit, and can register it to wait for the + commit_orderer of the previous commit to signal us. + */ + wait_for_commit commit_orderer; + /* + If non-zero, the sub_id of a prior event group whose commit we have to wait + for before committing ourselves. Then wait_commit_group_info points to the + event group to wait for. + + Before using this, rpl_parallel_entry::last_committed_sub_id should be + compared against wait_commit_sub_id. Only if last_committed_sub_id is + smaller than wait_commit_sub_id must the wait be done (otherwise the + waited-for transaction is already committed, so we would otherwise wait + for the wrong commit). + */ + uint64 wait_commit_sub_id; + rpl_group_info *wait_commit_group_info; + /* + This holds a pointer to a struct that keeps track of the need to wait + for the previous batch of event groups to reach the commit stage, before + this batch can start to execute. + + (When we execute in parallel the transactions that group committed + together on the master, we still need to wait for any prior transactions + to have reached the commit stage). + + The pointed-to gco is only valid for as long as + gtid_sub_id < parallel_entry->last_committed_sub_id. After that, it can + be freed by another thread. + */ + group_commit_orderer *gco; + + struct rpl_parallel_entry *parallel_entry; + + /* + A container to hold on Intvar-, Rand-, Uservar- log-events in case + the slave is configured with table filtering rules. + The withhold events are executed when their parent Query destiny is + determined for execution as well. + */ + Deferred_log_events *deferred_events; + + /* + State of the container: true stands for IRU events gathering, + false does for execution, either deferred or direct. + */ + bool deferred_events_collecting; + + Annotate_rows_log_event *m_annotate_event; + + RPL_TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */ + uint tables_to_lock_count; /* RBR: Count of tables to lock */ + table_mapping m_table_map; /* RBR: Mapping table-id to table */ + mysql_mutex_t sleep_lock; + mysql_cond_t sleep_cond; + + /* + trans_retries varies between 0 to slave_transaction_retries and counts how + many times the slave has retried the present transaction; gets reset to 0 + when the transaction finally succeeds. + */ + ulong trans_retries; + + /* + Used to defer stopping the SQL thread to give it a chance + to finish up the current group of events. + The timestamp is set and reset in @c sql_slave_killed(). + */ + time_t last_event_start_time; + + char *event_relay_log_name; + char event_relay_log_name_buf[FN_REFLEN]; + ulonglong event_relay_log_pos; + ulonglong future_event_relay_log_pos; + /* + The master log name for current event. Only used in parallel replication. + */ + char future_event_master_log_name[FN_REFLEN]; + bool is_parallel_exec; + /* When gtid_pending is true, we have not yet done record_gtid(). */ + bool gtid_pending; + int worker_error; + /* + Set true when we signalled that we reach the commit phase. Used to avoid + counting one event group twice. + */ + bool did_mark_start_commit; + /* Copy of flags2 from GTID event. */ + uchar gtid_ev_flags2; + /* Copy of flags3 from GTID event. */ + uint16 gtid_ev_flags_extra; + uint64 gtid_ev_sa_seq_no; + enum { + GTID_DUPLICATE_NULL=0, + GTID_DUPLICATE_IGNORE=1, + GTID_DUPLICATE_OWNER=2 + }; + /* + When --gtid-ignore-duplicates, this is set to one of the above three + values: + GTID_DUPLICATE_NULL - Not using --gtid-ignore-duplicates. + GTID_DUPLICATE_IGNORE - This gtid already applied, skip the event group. + GTID_DUPLICATE_OWNER - We are the current owner of the domain, and must + apply the event group and then release the domain. + */ + uint8 gtid_ignore_duplicate_state; + + /* + Runtime state for printing a note when slave is taking + too long while processing a row event. + */ + longlong row_stmt_start_timestamp; + bool long_find_row_note_printed; + /* Needs room for "Gtid D-S-N\x00". */ + char gtid_info_buf[5+10+1+10+1+20+1]; + + /* + The timestamp, from the master, of the commit event. + Used to do delayed update of rli->last_master_timestamp, for getting + reasonable values out of Seconds_Behind_Master in SHOW SLAVE STATUS. + */ + time_t last_master_timestamp; + + /* + Information to be able to re-try an event group in case of a deadlock or + other temporary error. + */ + inuse_relaylog *relay_log; + uint64 retry_start_offset; + uint64 retry_event_count; + /* + If `speculation' is != SPECULATE_NO, then we are optimistically running + this transaction in parallel, even though it might not be safe (there may + be a conflict with a prior event group). + + In this case, a conflict can cause other errors than deadlocks (like + duplicate key for example). So in case of _any_ error, we need to roll + back and retry the event group. + */ + enum enum_speculation { + /* + This transaction was group-committed together on the master with the + other transactions with which it is replicated in parallel. + */ + SPECULATE_NO, + /* + We will optimistically try to run this transaction in parallel with + other transactions, even though it is not known to be conflict free. + If we get a conflict, we will detect it as a deadlock, roll back and + retry. + */ + SPECULATE_OPTIMISTIC, + /* + This transaction got a conflict during speculative parallel apply, or + it was marked on the master as likely to cause a conflict or unsafe to + speculate. So it will wait for the prior transaction to commit before + starting to replicate. + */ + SPECULATE_WAIT + } speculation; + enum enum_retry_killed { + RETRY_KILL_NONE = 0, + RETRY_KILL_PENDING, + RETRY_KILL_KILLED + }; + uchar killed_for_retry; + bool reserved_start_alter_thread; + bool finish_event_group_called; + /* + Used for two phase alter table + */ + rpl_parallel_thread *rpt; + Query_log_event *start_alter_ev; + bool direct_commit_alter; + start_alter_info *sa_info; + + rpl_group_info(Relay_log_info *rli_); + ~rpl_group_info(); + void reinit(Relay_log_info *rli); + + /* + Returns true if the argument event resides in the containter; + more specifically, the checking is done against the last added event. + */ + bool is_deferred_event(Log_event * ev) + { + return deferred_events_collecting ? deferred_events->is_last(ev) : false; + }; + /* The general cleanup that slave applier may need at the end of query. */ + inline void cleanup_after_query() + { + if (deferred_events) + deferred_events->rewind(); + }; + /* The general cleanup that slave applier may need at the end of session. */ + void cleanup_after_session() + { + if (deferred_events) + { + delete deferred_events; + deferred_events= NULL; + } + }; + + /** + Save pointer to Annotate_rows event and switch on the + binlog_annotate_row_events for this sql thread. + To be called when sql thread receives an Annotate_rows event. + */ + inline void set_annotate_event(Annotate_rows_log_event *event) + { + DBUG_ASSERT(m_annotate_event == NULL); + m_annotate_event= event; + this->thd->variables.binlog_annotate_row_events= 1; + } + + /** + Returns pointer to the saved Annotate_rows event or NULL if there is + no saved event. + */ + inline Annotate_rows_log_event* get_annotate_event() + { + return m_annotate_event; + } + + /** + Delete saved Annotate_rows event (if any) and switch off the + binlog_annotate_row_events for this sql thread. + To be called when sql thread has applied the last (i.e. with + STMT_END_F flag) rbr event. + */ + inline void free_annotate_event() + { + if (m_annotate_event) + { + this->thd->variables.binlog_annotate_row_events= 0; + delete m_annotate_event; + m_annotate_event= 0; + } + } + + bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const + { + DBUG_ASSERT(tabledef_var && conv_table_var); + for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global) + if (ptr->table == table_arg) + { + *tabledef_var= &static_cast(ptr)->m_tabledef; + *conv_table_var= static_cast(ptr)->m_conv_table; + DBUG_PRINT("debug", ("Fetching table data for table %s.%s:" + " tabledef: %p, conv_table: %p", + table_arg->s->db.str, table_arg->s->table_name.str, + *tabledef_var, *conv_table_var)); + return true; + } + return false; + } + + void clear_tables_to_lock(); + void cleanup_context(THD *, bool); + void slave_close_thread_tables(THD *); + void mark_start_commit_no_lock(); + void mark_start_commit(); + char *gtid_info(); + void unmark_start_commit(); + + longlong get_row_stmt_start_timestamp() + { + return row_stmt_start_timestamp; + } + + void set_row_stmt_start_timestamp() + { + if (row_stmt_start_timestamp == 0) + row_stmt_start_timestamp= microsecond_interval_timer(); + } + + void reset_row_stmt_start_timestamp() + { + row_stmt_start_timestamp= 0; + } + + void set_long_find_row_note_printed() + { + long_find_row_note_printed= true; + } + + void unset_long_find_row_note_printed() + { + long_find_row_note_printed= false; + } + + bool is_long_find_row_note_printed() + { + return long_find_row_note_printed; + } + + inline void inc_event_relay_log_pos() + { + if (!is_parallel_exec) + rli->event_relay_log_pos= future_event_relay_log_pos; + } + + void finish_start_alter_event_group(); + + bool get_finish_event_group_called() + { + return finish_event_group_called; + } + + void set_finish_event_group_called(bool value) + { + finish_event_group_called= value; + } + +}; + + +/* + The class rpl_sql_thread_info is the THD::system_thread_info for an SQL + thread; this is either the driver SQL thread or a worker thread for parallel + replication. +*/ +class rpl_sql_thread_info +{ +public: + char cached_charset[6]; + Rpl_filter* rpl_filter; + + rpl_sql_thread_info(Rpl_filter *filter); + + /* + Last charset (6 bytes) seen by slave SQL thread is cached here; it helps + the thread save 3 get_charset() per Query_log_event if the charset is not + changing from event to event (common situation). + When the 6 bytes are equal to 0 is used to mean "cache is invalidated". + */ + void cached_charset_invalidate(); + bool cached_charset_compare(char *charset) const; +}; + + +extern struct rpl_slave_state *rpl_global_gtid_slave_state; +extern gtid_waiting rpl_global_gtid_waiting; + +int rpl_load_gtid_slave_state(THD *thd); +int find_gtid_slave_pos_tables(THD *thd); +int event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev); +void delete_or_keep_event_post_apply(rpl_group_info *rgi, + Log_event_type typ, Log_event *ev); + +#endif /* RPL_RLI_H */ diff --git a/sql/rpl_tblmap.cc b/sql/rpl_tblmap.cc new file mode 100644 index 00000000..a230b9f6 --- /dev/null +++ b/sql/rpl_tblmap.cc @@ -0,0 +1,182 @@ +/* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" + +#ifdef HAVE_REPLICATION + +#include "rpl_tblmap.h" +#ifndef MYSQL_CLIENT +#include "table.h" +#endif + +#ifdef MYSQL_CLIENT +#define MAYBE_TABLE_NAME(T) ("") +#else +#define MAYBE_TABLE_NAME(T) ((T) ? (T)->s->table_name.str : "<>") +#endif +#define TABLE_ID_HASH_SIZE 32 +#define TABLE_ID_CHUNK 256 + +table_mapping::table_mapping() + : m_free(0) +{ +#ifdef MYSQL_CLIENT + PSI_memory_key psi_key= PSI_NOT_INSTRUMENTED; +#else + PSI_memory_key psi_key= key_memory_table_mapping_root; +#endif + + DBUG_ENTER("table_mapping::table_mapping"); + /* + No "free_element" function for entries passed here, as the entries are + allocated in a MEM_ROOT (freed as a whole in the destructor), they cannot + be freed one by one. + Note that below we don't test if my_hash_init() succeeded. This + constructor is called at startup only. + */ + (void) my_hash_init(psi_key, &m_table_ids,&my_charset_bin,TABLE_ID_HASH_SIZE, + offsetof(entry,table_id),sizeof(ulonglong), + 0,0,0); + /* We don't preallocate any block, this is consistent with m_free=0 above */ + init_alloc_root(psi_key, &m_mem_root, TABLE_ID_HASH_SIZE*sizeof(entry), 0, MYF(0)); + DBUG_VOID_RETURN; +} + +table_mapping::~table_mapping() +{ +#ifdef MYSQL_CLIENT + clear_tables(); +#endif + my_hash_free(&m_table_ids); + free_root(&m_mem_root, MYF(0)); +} + +TABLE* table_mapping::get_table(ulonglong table_id) +{ + DBUG_ENTER("table_mapping::get_table(ulong)"); + DBUG_PRINT("enter", ("table_id: %llu", table_id)); + entry *e= find_entry(table_id); + if (e) + { + DBUG_PRINT("info", ("tid %llu -> table %p (%s)", + table_id, e->table, + MAYBE_TABLE_NAME(e->table))); + DBUG_RETURN(e->table); + } + + DBUG_PRINT("info", ("tid %llu is not mapped!", table_id)); + DBUG_RETURN(NULL); +} + +/* + Called when we are out of table id entries. Creates TABLE_ID_CHUNK + new entries, chain them and attach them at the head of the list of free + (free for use) entries. +*/ +int table_mapping::expand() +{ + /* + If we wanted to use "tmp= new (&m_mem_root) entry[TABLE_ID_CHUNK]", + we would have to make "entry" derive from Sql_alloc but then it would not + be a POD anymore and we want it to be (see rpl_tblmap.h). So we allocate + in C. + */ + entry *tmp= (entry *)alloc_root(&m_mem_root, TABLE_ID_CHUNK*sizeof(entry)); + if (tmp == NULL) + return ERR_MEMORY_ALLOCATION; // Memory allocation failed + + /* Find the end of this fresh new array of free entries */ + entry *e_end= tmp+TABLE_ID_CHUNK-1; + for (entry *e= tmp; e < e_end; e++) + e->next= e+1; + e_end->next= m_free; + m_free= tmp; + return 0; +} + +int table_mapping::set_table(ulonglong table_id, TABLE* table) +{ + DBUG_ENTER("table_mapping::set_table(ulong,TABLE*)"); + DBUG_PRINT("enter", ("table_id: %llu table: %p (%s)", + table_id, + table, MAYBE_TABLE_NAME(table))); + entry *e= find_entry(table_id); + if (e == 0) + { + if (m_free == 0 && expand()) + DBUG_RETURN(ERR_MEMORY_ALLOCATION); // Memory allocation failed + e= m_free; + m_free= m_free->next; + } + else + { +#ifdef MYSQL_CLIENT + free_table_map_log_event(e->table); +#endif + my_hash_delete(&m_table_ids,(uchar *)e); + } + e->table_id= table_id; + e->table= table; + if (my_hash_insert(&m_table_ids,(uchar *)e)) + { + /* we add this entry to the chain of free (free for use) entries */ + e->next= m_free; + m_free= e; + DBUG_RETURN(ERR_MEMORY_ALLOCATION); + } + + DBUG_PRINT("info", ("tid %llu -> table %p (%s)", + table_id, e->table, + MAYBE_TABLE_NAME(e->table))); + DBUG_RETURN(0); // All OK +} + +int table_mapping::remove_table(ulonglong table_id) +{ + entry *e= find_entry(table_id); + if (e) + { + my_hash_delete(&m_table_ids,(uchar *)e); + /* we add this entry to the chain of free (free for use) entries */ + e->next= m_free; + m_free= e; + return 0; // All OK + } + return 1; // No table to remove +} + +/* + Puts all entries into the list of free-for-use entries (does not free any + memory), and empties the hash. +*/ +void table_mapping::clear_tables() +{ + DBUG_ENTER("table_mapping::clear_tables()"); + for (uint i= 0; i < m_table_ids.records; i++) + { + entry *e= (entry *)my_hash_element(&m_table_ids, i); +#ifdef MYSQL_CLIENT + free_table_map_log_event(e->table); +#endif + e->next= m_free; + m_free= e; + } + my_hash_reset(&m_table_ids); + DBUG_VOID_RETURN; +} + +#endif diff --git a/sql/rpl_tblmap.h b/sql/rpl_tblmap.h new file mode 100644 index 00000000..63bac5e6 --- /dev/null +++ b/sql/rpl_tblmap.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef TABLE_MAPPING_H +#define TABLE_MAPPING_H + +/* Forward declarations */ +#ifndef MYSQL_CLIENT +struct TABLE; +#else +class Table_map_log_event; +typedef Table_map_log_event TABLE; +void free_table_map_log_event(TABLE *table); +#endif + + +/* + CLASS table_mapping + + RESPONSIBILITIES + The table mapping is used to map table id's to table pointers + + COLLABORATION + RELAY_LOG For mapping table id:s to tables when receiving events. + */ + +/* + Guilhem to Mats: + in the table_mapping class, the memory is allocated and never freed (until + destruction). So this is a good candidate for allocating inside a MEM_ROOT: + it gives the efficient allocation in chunks (like in expand()). So I have + introduced a MEM_ROOT. + + Note that inheriting from Sql_alloc had no effect: it has effects only when + "ptr= new table_mapping" is called, and this is never called. And it would + then allocate from thd->mem_root which is a highly volatile object (reset + from example after executing each query, see dispatch_command(), it has a + free_root() at end); as the table_mapping object is supposed to live longer + than a query, it was dangerous. + A dedicated MEM_ROOT needs to be used, see below. +*/ + +#include "hash.h" /* HASH */ + +class table_mapping { + +private: + MEM_ROOT m_mem_root; + +public: + + enum enum_error { + ERR_NO_ERROR = 0, + ERR_LIMIT_EXCEEDED, + ERR_MEMORY_ALLOCATION + }; + + table_mapping(); + ~table_mapping(); + + TABLE* get_table(ulonglong table_id); + + int set_table(ulonglong table_id, TABLE* table); + int remove_table(ulonglong table_id); + void clear_tables(); + ulong count() const { return m_table_ids.records; } + +private: + /* + This is a POD (Plain Old Data). Keep it that way (we apply offsetof() to + it, which only works for PODs) + */ + struct entry { + ulonglong table_id; + union { + TABLE *table; + entry *next; + }; + }; + + entry *find_entry(ulonglong table_id) + { + return (entry *) my_hash_search(&m_table_ids, + (uchar*)&table_id, + sizeof(table_id)); + } + int expand(); + + /* + Head of the list of free entries; "free" in the sense that it's an + allocated entry free for use, NOT in the sense that it's freed + memory. + */ + entry *m_free; + + /* Correspondance between an id (a number) and a TABLE object */ + HASH m_table_ids; +}; + +#endif diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc new file mode 100644 index 00000000..04a2efb3 --- /dev/null +++ b/sql/rpl_utility.cc @@ -0,0 +1,344 @@ +/* Copyright (c) 2006, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2013, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include +#include "rpl_utility.h" +#include "log_event.h" + + +/********************************************************************* + * table_def member definitions * + *********************************************************************/ + +/* + This function returns the field size in raw bytes based on the type + and the encoded field data from the master's raw data. +*/ +uint32 table_def::calc_field_size(uint col, uchar *master_data) const +{ + uint32 length= 0; + + switch (type(col)) { + case MYSQL_TYPE_NEWDECIMAL: + length= my_decimal_get_binary_size(m_field_metadata[col] >> 8, + m_field_metadata[col] & 0xff); + break; + case MYSQL_TYPE_DECIMAL: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + length= m_field_metadata[col]; + break; + /* + The cases for SET and ENUM are include for completeness, however + both are mapped to type MYSQL_TYPE_STRING and their real types + are encoded in the field metadata. + */ + case MYSQL_TYPE_SET: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_STRING: + { + uchar type= m_field_metadata[col] >> 8U; + if ((type == MYSQL_TYPE_SET) || (type == MYSQL_TYPE_ENUM)) + length= m_field_metadata[col] & 0x00ff; + else + { + /* + We are reading the actual size from the master_data record + because this field has the actual lengh stored in the first + byte. + */ + length= (uint) *master_data + 1; + DBUG_ASSERT(length != 0); + } + break; + } + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_TINY: + length= 1; + break; + case MYSQL_TYPE_SHORT: + length= 2; + break; + case MYSQL_TYPE_INT24: + length= 3; + break; + case MYSQL_TYPE_LONG: + length= 4; + break; +#ifdef HAVE_LONG_LONG + case MYSQL_TYPE_LONGLONG: + length= 8; + break; +#endif + case MYSQL_TYPE_NULL: + length= 0; + break; + case MYSQL_TYPE_NEWDATE: + length= 3; + break; + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_TIME: + length= 3; + break; + case MYSQL_TYPE_TIME2: + length= my_time_binary_length(m_field_metadata[col]); + break; + case MYSQL_TYPE_TIMESTAMP: + length= 4; + break; + case MYSQL_TYPE_TIMESTAMP2: + length= my_timestamp_binary_length(m_field_metadata[col]); + break; + case MYSQL_TYPE_DATETIME: + length= 8; + break; + case MYSQL_TYPE_DATETIME2: + length= my_datetime_binary_length(m_field_metadata[col]); + break; + case MYSQL_TYPE_BIT: + { + /* + Decode the size of the bit field from the master. + from_len is the length in bytes from the master + from_bit_len is the number of extra bits stored in the master record + If from_bit_len is not 0, add 1 to the length to account for accurate + number of bytes needed. + */ + uint from_len= (m_field_metadata[col] >> 8U) & 0x00ff; + uint from_bit_len= m_field_metadata[col] & 0x00ff; + DBUG_ASSERT(from_bit_len <= 7); + length= from_len + ((from_bit_len > 0) ? 1 : 0); + break; + } + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VARCHAR_COMPRESSED: + { + length= m_field_metadata[col] > 255 ? 2 : 1; // c&p of Field_varstring::data_length() + length+= length == 1 ? (uint32) *master_data : uint2korr(master_data); + break; + } + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_BLOB_COMPRESSED: + case MYSQL_TYPE_GEOMETRY: + { + /* + Compute the length of the data. We cannot use get_length() here + since it is dependent on the specific table (and also checks the + packlength using the internal 'table' pointer) and replication + is using a fixed format for storing data in the binlog. + */ + switch (m_field_metadata[col]) { + case 1: + length= *master_data; + break; + case 2: + length= uint2korr(master_data); + break; + case 3: + length= uint3korr(master_data); + break; + case 4: + length= uint4korr(master_data); + break; + default: + DBUG_ASSERT(0); // Should not come here + break; + } + + length+= m_field_metadata[col]; + break; + } + default: + length= ~(uint32) 0; + } + return length; +} + +PSI_memory_key key_memory_table_def_memory; + +table_def::table_def(unsigned char *types, ulong size, + uchar *field_metadata, int metadata_size, + uchar *null_bitmap, uint16 flags) + : m_size(size), m_type(0), m_field_metadata_size(metadata_size), + m_field_metadata(0), m_null_bits(0), m_flags(flags), + m_memory(NULL) +{ + m_memory= (uchar *)my_multi_malloc(key_memory_table_def_memory, MYF(MY_WME), + &m_type, size, + &m_field_metadata, + size * sizeof(uint16), + &m_null_bits, (size + 7) / 8, + NULL); + + bzero(m_field_metadata, size * sizeof(uint16)); + + if (m_type) + memcpy(m_type, types, size); + else + m_size= 0; + /* + Extract the data from the table map into the field metadata array + iff there is field metadata. The variable metadata_size will be + 0 if we are replicating from an older version server since no field + metadata was written to the table map. This can also happen if + there were no fields in the master that needed extra metadata. + */ + if (m_size && metadata_size) + { + int index= 0; + for (unsigned int i= 0; i < m_size; i++) + { + switch (binlog_type(i)) { + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_BLOB_COMPRESSED: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_DOUBLE: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_GEOMETRY: + { + /* + These types store a single byte. + */ + m_field_metadata[i]= field_metadata[index]; + index++; + break; + } + case MYSQL_TYPE_SET: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_STRING: + { + uint16 x= field_metadata[index++] << 8U; // real_type + x+= field_metadata[index++]; // pack or field length + m_field_metadata[i]= x; + break; + } + case MYSQL_TYPE_BIT: + { + uint16 x= field_metadata[index++]; + x = x + (field_metadata[index++] << 8U); + m_field_metadata[i]= x; + break; + } + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VARCHAR_COMPRESSED: + { + /* + These types store two bytes. + */ + char *ptr= (char *)&field_metadata[index]; + m_field_metadata[i]= uint2korr(ptr); + index= index + 2; + break; + } + case MYSQL_TYPE_NEWDECIMAL: + { + uint16 x= field_metadata[index++] << 8U; // precision + x+= field_metadata[index++]; // decimals + m_field_metadata[i]= x; + break; + } + case MYSQL_TYPE_TIME2: + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_TIMESTAMP2: + m_field_metadata[i]= field_metadata[index++]; + break; + default: + m_field_metadata[i]= 0; + break; + } + } + } + if (m_size && null_bitmap) + memcpy(m_null_bits, null_bitmap, (m_size + 7) / 8); +} + + +table_def::~table_def() +{ + my_free(m_memory); +#ifndef DBUG_OFF + m_type= 0; + m_size= 0; +#endif +} + + +/** + @param even_buf point to the buffer containing serialized event + @param event_len length of the event accounting possible checksum alg + + @return TRUE if test fails + FALSE as success + + @notes + event_buf will have same values on return. However during the process of + caluclating the checksum, it's temporary changed. Because of this the + event_buf argument is not a pointer to const. + +*/ +bool event_checksum_test(uchar *event_buf, ulong event_len, + enum enum_binlog_checksum_alg alg) +{ + bool res= FALSE; + uint16 flags= 0; // to store in FD's buffer flags orig value + + if (alg != BINLOG_CHECKSUM_ALG_OFF && alg != BINLOG_CHECKSUM_ALG_UNDEF) + { + ha_checksum incoming; + ha_checksum computed; + + if (event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) + { +#ifdef DBUG_ASSERT_EXISTS + int8 fd_alg= event_buf[event_len - BINLOG_CHECKSUM_LEN - + BINLOG_CHECKSUM_ALG_DESC_LEN]; +#endif + /* + FD event is checksummed and therefore verified w/o the binlog-in-use flag + */ + flags= uint2korr(event_buf + FLAGS_OFFSET); + if (flags & LOG_EVENT_BINLOG_IN_USE_F) + event_buf[FLAGS_OFFSET] &= ~LOG_EVENT_BINLOG_IN_USE_F; + /* + The only algorithm currently is CRC32. Zero indicates + the binlog file is checksum-free *except* the FD-event. + */ + DBUG_ASSERT(fd_alg == BINLOG_CHECKSUM_ALG_CRC32 || fd_alg == 0); + DBUG_ASSERT(alg == BINLOG_CHECKSUM_ALG_CRC32); + /* + Complile time guard to watch over the max number of alg + */ + compile_time_assert(BINLOG_CHECKSUM_ALG_ENUM_END <= 0x80); + } + incoming= uint4korr(event_buf + event_len - BINLOG_CHECKSUM_LEN); + /* checksum the event content without the checksum part itself */ + computed= my_checksum(0, event_buf, event_len - BINLOG_CHECKSUM_LEN); + if (flags != 0) + { + /* restoring the orig value of flags of FD */ + DBUG_ASSERT(event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT); + event_buf[FLAGS_OFFSET]= (uchar) flags; + } + res= (DBUG_IF("simulate_checksum_test_failure") || computed != incoming); + } + return res; +} diff --git a/sql/rpl_utility.h b/sql/rpl_utility.h new file mode 100644 index 00000000..c28e8aa1 --- /dev/null +++ b/sql/rpl_utility.h @@ -0,0 +1,309 @@ +/* + Copyright (c) 2006, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef RPL_UTILITY_H +#define RPL_UTILITY_H + +#ifndef __cplusplus +#error "Don't include this C++ header file from a non-C++ file!" +#endif + +#include "sql_priv.h" +#include "m_string.h" /* bzero, memcpy */ +#ifdef MYSQL_SERVER +#include "table.h" /* TABLE_LIST */ +#endif +#include "mysql_com.h" + +class Relay_log_info; +class Log_event; +struct rpl_group_info; + +/** + A table definition from the master. + + The responsibilities of this class is: + - Extract and decode table definition data from the table map event + - Check if table definition in table map is compatible with table + definition on slave + */ + +class table_def +{ +public: + /** + Constructor. + + @param types Array of types, each stored as a byte + @param size Number of elements in array 'types' + @param field_metadata Array of extra information about fields + @param metadata_size Size of the field_metadata array + @param null_bitmap The bitmap of fields that can be null + */ + table_def(unsigned char *types, ulong size, uchar *field_metadata, + int metadata_size, uchar *null_bitmap, uint16 flags); + + ~table_def(); + + /** + Return the number of fields there is type data for. + + @return The number of fields that there is type data for. + */ + ulong size() const { return m_size; } + + + /** + Returns internal binlog type code for one field, + without translation to real types. + */ + enum_field_types binlog_type(ulong index) const + { + return static_cast(m_type[index]); + } + /* + Return a representation of the type data for one field. + + @param index Field index to return data for + + @return Will return a representation of the type data for field + index. Currently, only the type identifier is + returned. + */ + enum_field_types type(ulong index) const + { + DBUG_ASSERT(index < m_size); + /* + If the source type is MYSQL_TYPE_STRING, it can in reality be + either MYSQL_TYPE_STRING, MYSQL_TYPE_ENUM, or MYSQL_TYPE_SET, so + we might need to modify the type to get the real type. + */ + enum_field_types source_type= binlog_type(index); + uint16 source_metadata= m_field_metadata[index]; + switch (source_type) + { + case MYSQL_TYPE_STRING: + { + int real_type= source_metadata >> 8; + if (real_type == MYSQL_TYPE_ENUM || real_type == MYSQL_TYPE_SET) + source_type= static_cast(real_type); + break; + } + + /* + This type has not been used since before row-based replication, + so we can safely assume that it really is MYSQL_TYPE_NEWDATE. + */ + case MYSQL_TYPE_DATE: + source_type= MYSQL_TYPE_NEWDATE; + break; + + default: + /* Do nothing */ + break; + } + + return source_type; + } +#ifdef MYSQL_SERVER + const Type_handler *field_type_handler(uint index) const; +#endif + + /* + This function allows callers to get the extra field data from the + table map for a given field. If there is no metadata for that field + or there is no extra metadata at all, the function returns 0. + + The function returns the value for the field metadata for column at + position indicated by index. As mentioned, if the field was a type + that stores field metadata, that value is returned else zero (0) is + returned. This method is used in the unpack() methods of the + corresponding fields to properly extract the data from the binary log + in the event that the master's field is smaller than the slave. + */ + uint16 field_metadata(uint index) const + { + DBUG_ASSERT(index < m_size); + if (m_field_metadata_size) + return m_field_metadata[index]; + else + return 0; + } + + /* + This function returns whether the field on the master can be null. + This value is derived from field->maybe_null(). + */ + my_bool maybe_null(uint index) const + { + DBUG_ASSERT(index < m_size); + return ((m_null_bits[(index / 8)] & + (1 << (index % 8))) == (1 << (index %8))); + } + + /* + This function returns the field size in raw bytes based on the type + and the encoded field data from the master's raw data. This method can + be used for situations where the slave needs to skip a column (e.g., + WL#3915) or needs to advance the pointer for the fields in the raw + data from the master to a specific column. + */ + uint32 calc_field_size(uint col, uchar *master_data) const; + + /** + Decide if the table definition is compatible with a table. + + Compare the definition with a table to see if it is compatible + with it. + + A table definition is compatible with a table if: + - The columns types of the table definition is a (not + necessarily proper) prefix of the column type of the table. + + - The other way around. + + - Each column on the master that also exists on the slave can be + converted according to the current settings of @c + SLAVE_TYPE_CONVERSIONS. + + @param thd + @param rli Pointer to relay log info + @param table Pointer to table to compare with. + + @param[out] tmp_table_var Pointer to temporary table for holding + conversion table. + + @retval 1 if the table definition is not compatible with @c table + @retval 0 if the table definition is compatible with @c table + */ +#ifndef MYSQL_CLIENT + bool compatible_with(THD *thd, rpl_group_info *rgi, TABLE *table, + TABLE **conv_table_var) const; + + /** + Create a virtual in-memory temporary table structure. + + The table structure has records and field array so that a row can + be unpacked into the record for further processing. + + In the virtual table, each field that requires conversion will + have a non-NULL value, while fields that do not require + conversion will have a NULL value. + + Some information that is missing in the events, such as the + character set for string types, are taken from the table that the + field is going to be pushed into, so the target table that the data + eventually need to be pushed into need to be supplied. + + @param thd Thread to allocate memory from. + @param rli Relay log info structure, for error reporting. + @param target_table Target table for fields. + + @return A pointer to a temporary table with memory allocated in the + thread's memroot, NULL if the table could not be created + */ + TABLE *create_conversion_table(THD *thd, rpl_group_info *rgi, + TABLE *target_table) const; +#endif + + +private: + ulong m_size; // Number of elements in the types array + unsigned char *m_type; // Array of type descriptors + uint m_field_metadata_size; + uint16 *m_field_metadata; + uchar *m_null_bits; + uint16 m_flags; // Table flags + uchar *m_memory; +}; + + +#ifndef MYSQL_CLIENT +/** + Extend the normal table list with a few new fields needed by the + slave thread, but nowhere else. + */ +struct RPL_TABLE_LIST + : public TABLE_LIST +{ + bool m_tabledef_valid; + table_def m_tabledef; + TABLE *m_conv_table; + bool master_had_triggers; +}; + + +/* Anonymous namespace for template functions/classes */ +CPP_UNNAMED_NS_START + + /* + Smart pointer that will automatically call my_afree (a macro) when + the pointer goes out of scope. This is used so that I do not have + to remember to call my_afree() before each return. There is no + overhead associated with this, since all functions are inline. + + I (Matz) would prefer to use the free function as a template + parameter, but that is not possible when the "function" is a + macro. + */ + template + class auto_afree_ptr + { + Obj* m_ptr; + public: + auto_afree_ptr(Obj* ptr) : m_ptr(ptr) { } + ~auto_afree_ptr() { if (m_ptr) my_afree(m_ptr); } + void assign(Obj* ptr) { + /* Only to be called if it hasn't been given a value before. */ + DBUG_ASSERT(m_ptr == NULL); + m_ptr= ptr; + } + Obj* get() { return m_ptr; } + }; + +CPP_UNNAMED_NS_END + +class Deferred_log_events +{ +private: + DYNAMIC_ARRAY array; + Log_event *last_added; + +public: + Deferred_log_events(Relay_log_info *rli); + ~Deferred_log_events(); + /* queue for exection at Query-log-event time prior the Query */ + int add(Log_event *ev); + bool is_empty(); + bool execute(struct rpl_group_info *rgi); + void rewind(); + bool is_last(Log_event *ev) { return ev == last_added; }; +}; + +#endif + +// NB. number of printed bit values is limited to sizeof(buf) - 1 +#define DBUG_PRINT_BITSET(N,FRM,BS) \ + do { \ + char buf[256]; \ + uint i; \ + for (i = 0 ; i < MY_MIN(sizeof(buf) - 1, (BS)->n_bits) ; i++) \ + buf[i] = bitmap_is_set((BS), i) ? '1' : '0'; \ + buf[i] = '\0'; \ + DBUG_PRINT((N), ((FRM), buf)); \ + } while (0) + +#endif /* RPL_UTILITY_H */ diff --git a/sql/rpl_utility_server.cc b/sql/rpl_utility_server.cc new file mode 100644 index 00000000..ccad7bd0 --- /dev/null +++ b/sql/rpl_utility_server.cc @@ -0,0 +1,1187 @@ +/* Copyright (c) 2006, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2013, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include +#include "rpl_utility.h" +#include "log_event.h" + +#if defined(MYSQL_CLIENT) +#error MYSQL_CLIENT must not be defined here +#endif + +#if !defined(MYSQL_SERVER) +#error MYSQL_SERVER must be defined here +#endif + +#if defined(HAVE_REPLICATION) +#include "rpl_rli.h" +#include "sql_select.h" +#endif + + +/** + Compute the maximum display length of a field. + + @param sql_type Type of the field + @param metadata The metadata from the master for the field. + @return Maximum length of the field in bytes. + + The precise values calculated by field->max_display_length() and + calculated by max_display_length_for_field() can differ (by +1 or -1) + for integer data types (TINYINT, SMALLINT, MEDIUMINT, INT, BIGINT). + This slight difference is not important here, because we call + this function only for two *different* integer data types. + */ +static uint32 +max_display_length_for_field(const Conv_source &source) +{ + DBUG_PRINT("debug", ("sql_type: %s, metadata: 0x%x", + source.type_handler()->name().ptr(), source.metadata())); + return source.type_handler()->max_display_length_for_field(source); +} + + +/* + Compare the pack lengths of a source field (on the master) and a + target field (on the slave). + + @param sh Source type handler + @param source_length Source length + @param th Target type hander + @param target_length Target length + + @retval CONV_TYPE_SUBSET_TO_SUPERSET The length of the source field is + smaller than the target field. + @retval CONV_TYPE_PRECISE The length of the source and + the target fields are equal. + @retval CONV_TYPE_SUPERSET_TO_SUBSET The length of the source field is + greater than the target field. + */ +static enum_conv_type +compare_lengths(const Type_handler *sh, uint32 source_length, + const Type_handler *th, uint32 target_length) +{ + DBUG_ENTER("compare_lengths"); + DBUG_PRINT("debug", ("source_length: %lu, source_type: %s," + " target_length: %lu, target_type: %s", + (unsigned long) source_length, sh->name().ptr(), + (unsigned long) target_length, th->name().ptr())); + enum_conv_type result= + source_length < target_length ? CONV_TYPE_SUBSET_TO_SUPERSET : + source_length > target_length ? CONV_TYPE_SUPERSET_TO_SUBSET : + CONV_TYPE_PRECISE; + DBUG_PRINT("result", ("%d", result)); + DBUG_RETURN(result); +} + + +/** + Calculate display length for MySQL56 temporal data types from their metadata. + It contains fractional precision in the low 16-bit word. +*/ +static uint32 +max_display_length_for_temporal2_field(uint32 int_display_length, + unsigned int metadata) +{ + metadata&= 0x00ff; + return int_display_length + metadata + (metadata ? 1 : 0); +} + + +uint32 +Type_handler_newdecimal::max_display_length_for_field(const Conv_source &src) + const +{ + return src.metadata() >> 8; +} + + +uint32 +Type_handler_typelib::max_display_length_for_field(const Conv_source &src) + const +{ + /* + Field_enum::rpl_conv_type_from() does not use compare_lengths(). + So we should not come here. + */ + DBUG_ASSERT(0); + return src.metadata() & 0x00ff; +} + + +uint32 +Type_handler_string::max_display_length_for_field(const Conv_source &src) + const +{ + /* + ENUM and SET are transferred using as STRING, + with the exact type code in metadata. + Make sure that we previously detected ENUM/SET and + translated them into a proper type handler. + See table_def::field_type_handler() for details. + */ + DBUG_ASSERT((src.metadata() >> 8) != MYSQL_TYPE_SET); + DBUG_ASSERT((src.metadata() >> 8) != MYSQL_TYPE_ENUM); + /* This is taken from Field_string::unpack. */ + return (((src.metadata() >> 4) & 0x300) ^ 0x300) + (src.metadata() & 0x00ff); +} + + +uint32 +Type_handler_time2::max_display_length_for_field(const Conv_source &src) + const +{ + return max_display_length_for_temporal2_field(MIN_TIME_WIDTH, + src.metadata()); +} + + +uint32 +Type_handler_timestamp2::max_display_length_for_field(const Conv_source &src) + const +{ + return max_display_length_for_temporal2_field(MAX_DATETIME_WIDTH, + src.metadata()); +} + + +uint32 +Type_handler_datetime2::max_display_length_for_field(const Conv_source &src) + const +{ + return max_display_length_for_temporal2_field(MAX_DATETIME_WIDTH, + src.metadata()); +} + + +uint32 +Type_handler_bit::max_display_length_for_field(const Conv_source &src) + const +{ + /* + Decode the size of the bit field from the master. + */ + DBUG_ASSERT((src.metadata() & 0xff) <= 7); + return 8 * (src.metadata() >> 8U) + (src.metadata() & 0x00ff); +} + + +uint32 +Type_handler_var_string::max_display_length_for_field(const Conv_source &src) + const +{ + return src.metadata(); +} + + +uint32 +Type_handler_varchar::max_display_length_for_field(const Conv_source &src) + const +{ + return src.metadata(); +} + + +uint32 +Type_handler_varchar_compressed:: + max_display_length_for_field(const Conv_source &src) const +{ + DBUG_ASSERT(src.metadata() > 0); + return src.metadata() - 1; +} + + +/* + The actual length for these types does not really matter since + they are used to calc_pack_length, which ignores the given + length for these types. + + Since we want this to be accurate for other uses, we return the + maximum size in bytes of these BLOBs. +*/ +uint32 +Type_handler_tiny_blob::max_display_length_for_field(const Conv_source &src) + const +{ + return (uint32) my_set_bits(1 * 8); +} + + +uint32 +Type_handler_medium_blob::max_display_length_for_field(const Conv_source &src) + const +{ + return (uint32) my_set_bits(3 * 8); +} + + +uint32 +Type_handler_blob::max_display_length_for_field(const Conv_source &src) + const +{ + /* + For the blob type, Field::real_type() lies and say that all + blobs are of type MYSQL_TYPE_BLOB. In that case, we have to look + at the length instead to decide what the max display size is. + */ + return (uint32) my_set_bits(src.metadata() * 8); +} + + +uint32 +Type_handler_blob_compressed::max_display_length_for_field(const Conv_source &src) + const +{ + return (uint32) my_set_bits(src.metadata() * 8); +} + + +uint32 +Type_handler_long_blob::max_display_length_for_field(const Conv_source &src) + const +{ + return (uint32) my_set_bits(4 * 8); +} + + +uint32 +Type_handler_olddecimal::max_display_length_for_field(const Conv_source &src) + const +{ + return ~(uint32) 0; +} + + +void Type_handler::show_binlog_type(const Conv_source &src, const Field &, + String *str) const +{ + str->set_ascii(name().ptr(), name().length()); +} + + +void Type_handler_var_string::show_binlog_type(const Conv_source &src, + const Field &dst, + String *str) const +{ + CHARSET_INFO *cs= str->charset(); + const char* fmt= dst.cmp_type() != STRING_RESULT || dst.has_charset() + ? "char(%u octets)" : "binary(%u)"; + size_t length= cs->cset->snprintf(cs, (char*) str->ptr(), + str->alloced_length(), + fmt, src.metadata()); + str->length(length); +} + + +void Type_handler_varchar::show_binlog_type(const Conv_source &src, + const Field &dst, + String *str) const +{ + CHARSET_INFO *cs= str->charset(); + const char* fmt= dst.cmp_type() != STRING_RESULT || dst.has_charset() + ? "varchar(%u octets)" : "varbinary(%u)"; + size_t length= cs->cset->snprintf(cs, (char*) str->ptr(), + str->alloced_length(), + fmt, src.metadata()); + str->length(length); +} + + +void Type_handler_varchar_compressed::show_binlog_type(const Conv_source &src, + const Field &dst, + String *str) const +{ + CHARSET_INFO *cs= str->charset(); + const char* fmt= dst.cmp_type() != STRING_RESULT || dst.has_charset() + ? "varchar(%u octets) compressed" : "varbinary(%u) compressed"; + size_t length= cs->cset->snprintf(cs, (char*) str->ptr(), + str->alloced_length(), + fmt, src.metadata()); + str->length(length); +} + +void Type_handler_bit::show_binlog_type(const Conv_source &src, const Field &, + String *str) const +{ + CHARSET_INFO *cs= str->charset(); + int bit_length= 8 * (src.metadata() >> 8) + (src.metadata() & 0xFF); + size_t length= + cs->cset->snprintf(cs, (char*) str->ptr(), str->alloced_length(), + "bit(%d)", bit_length); + str->length(length); +} + + +void Type_handler_olddecimal::show_binlog_type(const Conv_source &src, + const Field &, + String *str) const +{ + CHARSET_INFO *cs= str->charset(); + size_t length= + cs->cset->snprintf(cs, (char*) str->ptr(), str->alloced_length(), + "decimal(%d,?)/*old*/", src.metadata()); + str->length(length); + +} + + +void Type_handler_newdecimal::show_binlog_type(const Conv_source &src, + const Field &, + String *str) const +{ + CHARSET_INFO *cs= str->charset(); + size_t length= + cs->cset->snprintf(cs, (char*) str->ptr(), str->alloced_length(), + "decimal(%d,%d)", + src.metadata() >> 8, src.metadata() & 0xff); + str->length(length); +} + + +void Type_handler_blob_compressed::show_binlog_type(const Conv_source &src, + const Field &, + String *str) const +{ + /* + Field::real_type() lies regarding the actual type of a BLOB, so + it is necessary to check the pack length to figure out what kind + of blob it really is. + */ + switch (src.metadata()) { + case 1: + str->set_ascii(STRING_WITH_LEN("tinyblob compressed")); + break; + case 2: + str->set_ascii(STRING_WITH_LEN("blob compressed")); + break; + case 3: + str->set_ascii(STRING_WITH_LEN("mediumblob compressed")); + break; + default: + DBUG_ASSERT(0); + // Fall through + case 4: + str->set_ascii(STRING_WITH_LEN("longblob compressed")); + } +} + + +void Type_handler_string::show_binlog_type(const Conv_source &src, + const Field &dst, + String *str) const +{ + /* + This is taken from Field_string::unpack. + */ + CHARSET_INFO *cs= str->charset(); + uint bytes= (((src.metadata() >> 4) & 0x300) ^ 0x300) + + (src.metadata() & 0x00ff); + const char* fmt= dst.cmp_type() != STRING_RESULT || dst.has_charset() + ? "char(%u octets)" : "binary(%u)"; + size_t length= cs->cset->snprintf(cs, (char*) str->ptr(), + str->alloced_length(), + fmt, bytes); + str->length(length); +} + + +enum_conv_type +Field::rpl_conv_type_from_same_data_type(uint16 metadata, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (metadata == 0) // Metadata can only be zero if no metadata was provided + { + /* + If there is no metadata, we either have an old event where no + metadata were supplied, or a type that does not require any + metadata. In either case, conversion can be done but no + conversion table is necessary. + */ + DBUG_PRINT("debug", ("Base types are identical, but there is no metadata")); + return CONV_TYPE_PRECISE; + } + + DBUG_PRINT("debug", ("Base types are identical, doing field size comparison")); + int order= 0; + if (!compatible_field_size(metadata, rli, param.table_def_flags(), &order)) + return CONV_TYPE_IMPOSSIBLE; + return order == 0 ? CONV_TYPE_PRECISE : + order < 0 ? CONV_TYPE_SUBSET_TO_SUPERSET : + CONV_TYPE_SUPERSET_TO_SUBSET; +} + + +enum_conv_type +Field_new_decimal::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + if (source.type_handler() == &type_handler_olddecimal || + source.type_handler() == &type_handler_newdecimal || + source.type_handler() == &type_handler_float || + source.type_handler() == &type_handler_double) + { + /* + Then the other type is either FLOAT, DOUBLE, or old style + DECIMAL, so we require lossy conversion. + */ + return CONV_TYPE_SUPERSET_TO_SUBSET; + } + return CONV_TYPE_IMPOSSIBLE; +} + + +/* + This covers FLOAT, DOUBLE and old DECIMAL +*/ +enum_conv_type +Field_real::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + if (source.type_handler() == &type_handler_olddecimal || + source.type_handler() == &type_handler_newdecimal) + return CONV_TYPE_SUPERSET_TO_SUBSET; // Always require lossy conversions + if (source.type_handler() == &type_handler_float || + source.type_handler() == &type_handler_double) + { + enum_conv_type order= compare_lengths(source.type_handler(), + max_display_length_for_field(source), + type_handler(), max_display_length()); + DBUG_ASSERT(order != CONV_TYPE_PRECISE); + return order; + } + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_int::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + /* + The length comparison check will do the correct job of comparing + the field lengths (in bytes) of two integer types. + */ + if (source.type_handler() == &type_handler_stiny || + source.type_handler() == &type_handler_sshort || + source.type_handler() == &type_handler_sint24 || + source.type_handler() == &type_handler_slong || + source.type_handler() == &type_handler_slonglong) + { + /* + max_display_length_for_field() is not fully precise for the integer + data types. So its result cannot be compared to the result of + max_dispay_length() when the table field and the binlog field + are of the same type. + This code should eventually be rewritten not to use + compare_lengths(), to detect subtype/supetype relations + just using the type codes. + */ + DBUG_ASSERT(source.real_field_type() != real_type()); + enum_conv_type order= compare_lengths(source.type_handler(), + max_display_length_for_field(source), + type_handler(), max_display_length()); + DBUG_ASSERT(order != CONV_TYPE_PRECISE); + return order; + } + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_enum::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + /* + For some reasons Field_enum and Field_set store MYSQL_TYPE_STRING + as a type code in the binary log and encode the real type in metadata. + So we need to test real_type() here instread of binlog_type(). + */ + return real_type() == source.real_field_type() ? + rpl_conv_type_from_same_data_type(source.metadata(), rli, param) : + CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_longstr::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + /** + @todo + Implement Field_varstring_compressed::real_type() and + Field_blob_compressed::real_type() properly. All occurencies + of Field::real_type() have to be inspected and adjusted if needed. + + Until it is not ready we have to compare source_type against + binlog_type() when replicating from or to compressed data types. + + @sa Comment for Field::binlog_type() + */ + bool same_type; + if (source.real_field_type() == MYSQL_TYPE_VARCHAR_COMPRESSED || + source.real_field_type() == MYSQL_TYPE_BLOB_COMPRESSED || + binlog_type() == MYSQL_TYPE_VARCHAR_COMPRESSED || + binlog_type() == MYSQL_TYPE_BLOB_COMPRESSED) + same_type= binlog_type() == source.real_field_type(); + else if (Type_handler_json_common::is_json_type_handler(type_handler())) + same_type= type_handler()->type_handler_base() == source.type_handler(); + else + same_type= type_handler() == source.type_handler(); + + if (same_type) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + + if (source.type_handler() == &type_handler_tiny_blob || + source.type_handler() == &type_handler_medium_blob || + source.type_handler() == &type_handler_long_blob || + source.type_handler() == &type_handler_blob || + source.type_handler() == &type_handler_blob_compressed || + source.type_handler() == &type_handler_string || + source.type_handler() == &type_handler_var_string || + source.type_handler() == &type_handler_varchar || + source.type_handler() == &type_handler_varchar_compressed) + { + enum_conv_type order= compare_lengths(source.type_handler(), + max_display_length_for_field(source), + type_handler(), max_display_length()); + /* + Here we know that the types are different, so if the order + gives that they do not require any conversion, we still need + to have non-lossy conversion enabled to allow conversion + between different (string) types of the same length. + + Also, if all conversions are disabled, it is not allowed to convert + between these types. Since the TEXT vs. BINARY is distinguished by + the charset, and the charset is not replicated, we cannot + currently distinguish between , e.g., TEXT and BLOB. + */ + if (order == CONV_TYPE_PRECISE) + order= CONV_TYPE_SUBSET_TO_SUPERSET; + return order; + } + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_newdate::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (real_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + if (source.type_handler() == &type_handler_datetime2) + return CONV_TYPE_SUPERSET_TO_SUBSET; + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_time::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + // 'MySQL56 TIME(N)' -> 'MariaDB-5.3 TIME(N)' is non-lossy + if (decimals() == source.metadata() && + source.type_handler() == &type_handler_time2) + return CONV_TYPE_VARIANT; // TODO: conversion from FSP1>FSP2 + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_timef::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + /* + See comment in Field_datetimef::rpl_conv_type_from() + 'MariaDB-5.3 TIME(0)' to 'MySQL56 TIME(0)' is non-lossy + */ + if (source.metadata() == 0 && source.type_handler() == &type_handler_time) + return CONV_TYPE_VARIANT; + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_timestamp::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + // 'MySQL56 TIMESTAMP(N)' -> MariaDB-5.3 TIMESTAMP(N)' is non-lossy + if (source.metadata() == decimals() && + source.type_handler() == &type_handler_timestamp2) + return CONV_TYPE_VARIANT; // TODO: conversion from FSP1>FSP2 + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_timestampf::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + /* + See comment in Field_datetimef::rpl_conv_type_from() + 'MariaDB-5.3 TIMESTAMP(0)' to 'MySQL56 TIMESTAMP(0)' is non-lossy + */ + if (source.metadata() == 0 && + source.type_handler() == &type_handler_timestamp) + return CONV_TYPE_VARIANT; + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_datetime::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + // 'MySQL56 DATETIME(N)' -> MariaDB-5.3 DATETIME(N) is non-lossy + if (source.metadata() == decimals() && + source.type_handler() == &type_handler_datetime2) + return CONV_TYPE_VARIANT; // TODO: conversion from FSP1>FSP2 + if (source.type_handler() == &type_handler_newdate) + return CONV_TYPE_SUBSET_TO_SUPERSET; + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_datetimef::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + if (binlog_type() == source.real_field_type()) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + /* + 'MariaDB-5.3 DATETIME(N)' does not provide information about fractional + precision in metadata. So we assume the precision on the master is equal + to the precision on the slave. + TODO: See MDEV-17394 what happend in case precisions are in case different + 'MariaDB-5.3 DATETIME(0)' to 'MySQL56 DATETIME(0)' is non-lossy + */ + if (source.metadata() == 0 && + source.type_handler() == &type_handler_datetime) + return CONV_TYPE_VARIANT; + if (source.type_handler() == &type_handler_newdate) + return CONV_TYPE_SUBSET_TO_SUPERSET; + return CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_date::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + // old DATE + return binlog_type() == source.real_field_type() ? + rpl_conv_type_from_same_data_type(source.metadata(), rli, param) : + CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_bit::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + return binlog_type() == source.real_field_type() ? + rpl_conv_type_from_same_data_type(source.metadata(), rli, param) : + CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_year::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + return binlog_type() == source.real_field_type() ? + rpl_conv_type_from_same_data_type(source.metadata(), rli, param) : + CONV_TYPE_IMPOSSIBLE; +} + + +enum_conv_type +Field_null::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + DBUG_ASSERT(0); + return CONV_TYPE_IMPOSSIBLE; +} + + +/**********************************************************************/ + + +#if defined(HAVE_REPLICATION) + +/** + */ +static void show_sql_type(const Conv_source &src, const Field &dst, + String *str) +{ + DBUG_ENTER("show_sql_type"); + DBUG_ASSERT(src.type_handler() != NULL); + DBUG_PRINT("enter", ("type: %s, metadata: 0x%x", + src.type_handler()->name().ptr(), src.metadata())); + src.type_handler()->show_binlog_type(src, dst, str); + DBUG_VOID_RETURN; +} + + +/** + Check the order variable and print errors if the order is not + acceptable according to the current settings. + + @param order The computed order of the conversion needed. + @param rli The relay log info data structure: for error reporting. + */ +static bool is_conversion_ok(enum_conv_type type, const Relay_log_info *rli, + ulonglong type_conversion_options) +{ + DBUG_ENTER("is_conversion_ok"); + bool allow_non_lossy, allow_lossy; + + allow_non_lossy= type_conversion_options & + (1ULL << SLAVE_TYPE_CONVERSIONS_ALL_NON_LOSSY); + allow_lossy= type_conversion_options & + (1ULL << SLAVE_TYPE_CONVERSIONS_ALL_LOSSY); + + DBUG_PRINT("enter", ("order: %d, flags:%s%s", (int) type, + allow_non_lossy ? " ALL_NON_LOSSY" : "", + allow_lossy ? " ALL_LOSSY" : "")); + switch (type) { + case CONV_TYPE_PRECISE: + case CONV_TYPE_VARIANT: + DBUG_RETURN(true); + case CONV_TYPE_SUBSET_TO_SUPERSET: + /* !!! Add error message saying that non-lossy conversions need to be allowed. */ + DBUG_RETURN(allow_non_lossy); + case CONV_TYPE_SUPERSET_TO_SUBSET: + /* !!! Add error message saying that lossy conversions need to be allowed. */ + DBUG_RETURN(allow_lossy); + case CONV_TYPE_IMPOSSIBLE: + DBUG_RETURN(false); + } + + DBUG_RETURN(false); +} + + +/** + Can a type potentially be converted to another type? + + This function check if the types are convertible and what + conversion is required. + + If conversion is not possible, and error is printed. + + If conversion is possible: + + - *order will be set to -1 if source type is smaller than target + type and a non-lossy conversion can be required. This includes + the case where the field types are different but types could + actually be converted in either direction. + + - *order will be set to 0 if no conversion is required. + + - *order will be set to 1 if the source type is strictly larger + than the target type and that conversion is potentially lossy. + + @param[in] field Target field + @param[in] type Source field type + @param[in] metadata Source field metadata + @param[in] rli Relay log info (for error reporting) + @param[in] mflags Flags from the table map event + @param[out] order Order between source field and target field + + @return @c true if conversion is possible according to the current + settings, @c false if conversion is not possible according to the + current setting. + */ +static enum_conv_type +can_convert_field_to(Field *field, const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) +{ + DBUG_ENTER("can_convert_field_to"); +#ifndef DBUG_OFF + char field_type_buf[MAX_FIELD_WIDTH]; + String field_type(field_type_buf, sizeof(field_type_buf), &my_charset_latin1); + field->sql_type(field_type); + DBUG_PRINT("enter", ("field_type: %s, target_type: %d, source_type: %d, source_metadata: 0x%x", + field_type.c_ptr_safe(), field->real_type(), + source.real_field_type(), source.metadata())); +#endif + DBUG_RETURN(field->rpl_conv_type_from(source, rli, param)); +} + + +const Type_handler *table_def::field_type_handler(uint col) const +{ + enum_field_types typecode= binlog_type(col); + uint16 metadata= field_metadata(col); + DBUG_ASSERT(typecode != MYSQL_TYPE_ENUM); + DBUG_ASSERT(typecode != MYSQL_TYPE_SET); + + if (typecode == MYSQL_TYPE_BLOB) + { + switch (metadata & 0xff) { + case 1: return &type_handler_tiny_blob; + case 2: return &type_handler_blob; + case 3: return &type_handler_medium_blob; + case 4: return &type_handler_long_blob; + default: return NULL; + } + } + if (typecode == MYSQL_TYPE_STRING) + { + uchar typecode2= metadata >> 8; + if (typecode2 == MYSQL_TYPE_SET) + return &type_handler_set; + if (typecode2 == MYSQL_TYPE_ENUM) + return &type_handler_enum; + return &type_handler_string; + } + /* + This type has not been used since before row-based replication, + so we can safely assume that it really is MYSQL_TYPE_NEWDATE. + */ + if (typecode == MYSQL_TYPE_DATE) + return &type_handler_newdate; + return Type_handler::get_handler_by_real_type(typecode); +} + + +/** + Is the definition compatible with a table? + + This function will compare the master table with an existing table + on the slave and see if they are compatible with respect to the + current settings of @c SLAVE_TYPE_CONVERSIONS. + + If the tables are compatible and conversions are required, @c + *tmp_table_var will be set to a virtual temporary table with field + pointers for the fields that require conversions. This allow simple + checking of whether a conversion are to be applied or not. + + If tables are compatible, but no conversions are necessary, @c + *tmp_table_var will be set to NULL. + + @param rli_arg[in] + Relay log info, for error reporting. + + @param table[in] + Table to compare with + + @param tmp_table_var[out] + Virtual temporary table for performing conversions, if necessary. + + @retval true Master table is compatible with slave table. + @retval false Master table is not compatible with slave table. +*/ +bool +table_def::compatible_with(THD *thd, rpl_group_info *rgi, + TABLE *table, TABLE **conv_table_var) + const +{ + /* + We only check the initial columns for the tables. + */ + uint const cols_to_check= MY_MIN(table->s->fields, size()); + Relay_log_info *rli= rgi->rli; + TABLE *tmp_table= NULL; + + for (uint col= 0 ; col < cols_to_check ; ++col) + { + Field *const field= table->field[col]; + const Type_handler *h= field_type_handler(col); + if (!h) + { + sql_print_error("In RBR mode, Slave received unknown field type field %d " + " for column Name: %s.%s.%s.", + binlog_type(col), + field->table->s->db.str, + field->table->s->table_name.str, + field->field_name.str); + return false; + } + + if (!h) + return false; // An unknown data type found in the binary log + Conv_source source(h, field_metadata(col), field->charset()); + enum_conv_type convtype= can_convert_field_to(field, source, rli, + Conv_param(m_flags)); + if (is_conversion_ok(convtype, rli, slave_type_conversions_options)) + { + DBUG_PRINT("debug", ("Checking column %d -" + " field '%s' can be converted - order: %d", + col, field->field_name.str, convtype)); + /* + If conversion type is not CONV_TYPE_RECISE, a conversion is required, + so we need to set up the conversion table. + */ + if (convtype != CONV_TYPE_PRECISE && tmp_table == NULL) + { + /* + This will create the full table with all fields. This is + necessary to ge the correct field lengths for the record. + */ + tmp_table= create_conversion_table(thd, rgi, table); + if (tmp_table == NULL) + return false; + /* + Clear all fields up to, but not including, this column. + */ + for (unsigned int i= 0; i < col; ++i) + tmp_table->field[i]= NULL; + } + + if (convtype == CONV_TYPE_PRECISE && tmp_table != NULL) + tmp_table->field[col]= NULL; + } + else + { + DBUG_PRINT("debug", ("Checking column %d -" + " field '%s' can not be converted", + col, field->field_name.str)); + DBUG_ASSERT(col < size() && col < table->s->fields); + DBUG_ASSERT(table->s->db.str && table->s->table_name.str); + DBUG_ASSERT(table->in_use); + const char *db_name= table->s->db.str; + const char *tbl_name= table->s->table_name.str; + StringBuffer source_type(&my_charset_latin1); + StringBuffer target_type(&my_charset_latin1); + THD *thd= table->in_use; + + show_sql_type(source, *field, &source_type); + field->sql_rpl_type(&target_type); + DBUG_ASSERT(source_type.length() > 0); + DBUG_ASSERT(target_type.length() > 0); + rli->report(ERROR_LEVEL, ER_SLAVE_CONVERSION_FAILED, rgi->gtid_info(), + ER_THD(thd, ER_SLAVE_CONVERSION_FAILED), + col, db_name, tbl_name, + source_type.c_ptr_safe(), target_type.c_ptr_safe()); + return false; + } + } + +#ifndef DBUG_OFF + if (tmp_table) + { + for (unsigned int col= 0; col < tmp_table->s->fields; ++col) + if (tmp_table->field[col]) + { + char source_buf[MAX_FIELD_WIDTH]; + char target_buf[MAX_FIELD_WIDTH]; + String source_type(source_buf, sizeof(source_buf), &my_charset_latin1); + String target_type(target_buf, sizeof(target_buf), &my_charset_latin1); + tmp_table->field[col]->sql_type(source_type); + table->field[col]->sql_type(target_type); + DBUG_PRINT("debug", ("Field %s - conversion required." + " Source type: '%s', Target type: '%s'", + tmp_table->field[col]->field_name.str, + source_type.c_ptr_safe(), target_type.c_ptr_safe())); + } + } +#endif + + *conv_table_var= tmp_table; + return true; +} + + +/** + A wrapper to Virtual_tmp_table, to get access to its constructor, + which is protected for safety purposes (against illegal use on stack). +*/ +class Virtual_conversion_table: public Virtual_tmp_table +{ +public: + Virtual_conversion_table(THD *thd) :Virtual_tmp_table(thd) { } + /** + Add a new field into the virtual table. + @param handler - The type handler of the field. + @param metadata - The RBR binary log metadata for this field. + @param target_field - The field from the target table, to get extra + attributes from (e.g. typelib in case of ENUM). + */ + bool add(const Type_handler *handler, + uint16 metadata, const Field *target_field) + { + Field *tmp= handler->make_conversion_table_field(in_use->mem_root, + this, metadata, + target_field); + if (!tmp) + return true; + Virtual_tmp_table::add(tmp); + DBUG_PRINT("debug", ("sql_type: %s, target_field: '%s', max_length: %d, decimals: %d," + " maybe_null: %d, unsigned_flag: %d, pack_length: %u", + handler->name().ptr(), target_field->field_name.str, + tmp->field_length, tmp->decimals(), TRUE, + tmp->flags, tmp->pack_length())); + return false; + } +}; + + +/** + Create a conversion table. + + If the function is unable to create the conversion table, an error + will be printed and NULL will be returned. + + @return Pointer to conversion table, or NULL if unable to create + conversion table. + */ + +TABLE *table_def::create_conversion_table(THD *thd, rpl_group_info *rgi, + TABLE *target_table) const +{ + DBUG_ENTER("table_def::create_conversion_table"); + + Virtual_conversion_table *conv_table; + Relay_log_info *rli= rgi->rli; + /* + At slave, columns may differ. So we should create + MY_MIN(columns@master, columns@slave) columns in the + conversion table. + */ + uint const cols_to_create= MY_MIN(target_table->s->fields, size()); + if (!(conv_table= new(thd) Virtual_conversion_table(thd)) || + conv_table->init(cols_to_create)) + goto err; + for (uint col= 0 ; col < cols_to_create; ++col) + { + const Type_handler *ha= field_type_handler(col); + DBUG_ASSERT(ha); // Checked at compatible_with() time + if (conv_table->add(ha, field_metadata(col), target_table->field[col])) + { + DBUG_PRINT("debug", ("binlog_type: %d, metadata: %04X, target_field: '%s'" + " make_conversion_table_field() failed", + binlog_type(col), field_metadata(col), + target_table->field[col]->field_name.str)); + goto err; + } + } + + if (conv_table->open()) + goto err; // Could not allocate record buffer? + + DBUG_RETURN(conv_table); + +err: + if (conv_table) + delete conv_table; + rli->report(ERROR_LEVEL, ER_SLAVE_CANT_CREATE_CONVERSION, rgi->gtid_info(), + ER_THD(thd, ER_SLAVE_CANT_CREATE_CONVERSION), + target_table->s->db.str, + target_table->s->table_name.str); + DBUG_RETURN(NULL); +} + + + +Deferred_log_events::Deferred_log_events(Relay_log_info *rli) : last_added(NULL) +{ + my_init_dynamic_array(PSI_INSTRUMENT_ME, &array, sizeof(Log_event *), 32, 16, MYF(0)); +} + +Deferred_log_events::~Deferred_log_events() +{ + delete_dynamic(&array); +} + +int Deferred_log_events::add(Log_event *ev) +{ + last_added= ev; + insert_dynamic(&array, (uchar*) &ev); + return 0; +} + +bool Deferred_log_events::is_empty() +{ + return array.elements == 0; +} + +bool Deferred_log_events::execute(rpl_group_info *rgi) +{ + bool res= false; + DBUG_ENTER("Deferred_log_events::execute"); + DBUG_ASSERT(rgi->deferred_events_collecting); + + rgi->deferred_events_collecting= false; + for (uint i= 0; !res && i < array.elements; i++) + { + Log_event *ev= (* (Log_event **) + dynamic_array_ptr(&array, i)); + res= ev->apply_event(rgi); + } + rgi->deferred_events_collecting= true; + DBUG_RETURN(res); +} + +void Deferred_log_events::rewind() +{ + /* + Reset preceding Query log event events which execution was + deferred because of slave side filtering. + */ + if (!is_empty()) + { + for (uint i= 0; i < array.elements; i++) + { + Log_event *ev= *(Log_event **) dynamic_array_ptr(&array, i); + delete ev; + } + last_added= NULL; + if (array.elements > array.max_element) + freeze_size(&array); + reset_dynamic(&array); + } + last_added= NULL; +} + +#endif // defined(HAVE_REPLICATION) + diff --git a/sql/scheduler.cc b/sql/scheduler.cc new file mode 100644 index 00000000..7261c5f3 --- /dev/null +++ b/sql/scheduler.cc @@ -0,0 +1,142 @@ +/* Copyright (c) 2007, 2013, Oracle and/or its affiliates. + Copyright (c) 2012, 2014, SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Implementation for the thread scheduler +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma implementation +#endif + +#include "mariadb.h" +#include "mysqld.h" +#include "scheduler.h" +#include "sql_class.h" +#include "sql_callback.h" +#include + +/** @internal + Helper functions to allow mysys to call the thread scheduler when + waiting for locks. +*/ + +/**@{*/ +extern "C" +{ +static void scheduler_wait_lock_begin(void) { + thd_wait_begin(NULL, THD_WAIT_TABLE_LOCK); +} + +static void scheduler_wait_lock_end(void) { + thd_wait_end(NULL); +} + +static void scheduler_wait_sync_begin(void) { + thd_wait_begin(NULL, THD_WAIT_SYNC); +} + +static void scheduler_wait_sync_end(void) { + thd_wait_end(NULL); +} + +static void scheduler_wait_net_begin(void) { + thd_wait_begin(NULL, THD_WAIT_NET); +} + +static void scheduler_wait_net_end(void) { + thd_wait_end(NULL); +} + +}; +/**@}*/ + +/** + Common scheduler init function. + + The scheduler is either initialized by calling + one_thread_scheduler() or one_thread_per_connection_scheduler() in + mysqld.cc, so this init function will always be called. + */ + +void scheduler_init() +{ + thr_set_lock_wait_callback(scheduler_wait_lock_begin, + scheduler_wait_lock_end); + thr_set_sync_wait_callback(scheduler_wait_sync_begin, + scheduler_wait_sync_end); + + vio_set_wait_callback(scheduler_wait_net_begin, + scheduler_wait_net_end); +} + + +/** + Kill notification callback, used by one-thread-per-connection + and threadpool scheduler. + + Wakes up a thread that is stuck in read/poll/epoll/event-poll + routines used by threadpool, such that subsequent attempt to + read from client connection will result in IO error. +*/ + +void post_kill_notification(THD *thd) +{ + DBUG_ENTER("post_kill_notification"); + if (current_thd == thd || thd->system_thread) + DBUG_VOID_RETURN; + + if (thd->net.vio) + vio_shutdown(thd->net.vio, SHUT_RD); + DBUG_VOID_RETURN; +} + +/* + Initialize scheduler for --thread-handling=one-thread-per-connection +*/ + +#ifndef EMBEDDED_LIBRARY + +void one_thread_per_connection_scheduler(scheduler_functions *func, + ulong *arg_max_connections, + Atomic_counter *arg_connection_count) +{ + scheduler_init(); + func->max_threads= *arg_max_connections + 1; + func->max_connections= arg_max_connections; + func->connection_count= arg_connection_count; + func->add_connection= create_thread_to_handle_connection; + func->post_kill_notification= post_kill_notification; +} +#else +void handle_connection_in_main_thread(CONNECT *connect) +{ +} +#endif + +/* + Initialize scheduler for --thread-handling=no-threads +*/ + +void one_thread_scheduler(scheduler_functions *func, + Atomic_counter *arg_connection_count) +{ + scheduler_init(); + func->max_threads= 1; + func->max_connections= &max_connections; + func->connection_count= arg_connection_count; + func->add_connection= handle_connection_in_main_thread; +} diff --git a/sql/scheduler.h b/sql/scheduler.h new file mode 100644 index 00000000..c2686aad --- /dev/null +++ b/sql/scheduler.h @@ -0,0 +1,99 @@ +#ifndef SCHEDULER_INCLUDED +#define SCHEDULER_INCLUDED + +/* Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Classes for the thread scheduler +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface +#endif + +class THD; + +/* Functions used when manipulating threads */ + +struct scheduler_functions +{ + uint max_threads; + Atomic_counter *connection_count; + ulong *max_connections; + bool (*init)(void); + void (*add_connection)(CONNECT *connect); + void (*thd_wait_begin)(THD *thd, int wait_type); + void (*thd_wait_end)(THD *thd); + void (*post_kill_notification)(THD *thd); + void (*end)(void); + /** resume previous unfinished command (threadpool only)*/ + void (*thd_resume)(THD* thd); +}; + + +/** + Scheduler types enumeration. + + The default of --thread-handling is the first one in the + thread_handling_names array, this array has to be consistent with + the order in this array, so to change default one has to change the + first entry in this enum and the first entry in the + thread_handling_names array. + + @note The last entry of the enumeration is also used to mark the + thread handling as dynamic. In this case the name of the thread + handling is fetched from the name of the plugin that implements it. +*/ +enum scheduler_types +{ + /* + The default of --thread-handling is the first one in the + thread_handling_names array, this array has to be consistent with + the order in this array, so to change default one has to change + the first entry in this enum and the first entry in the + thread_handling_names array. + */ + SCHEDULER_ONE_THREAD_PER_CONNECTION=0, + SCHEDULER_NO_THREADS, + SCHEDULER_TYPES_COUNT +}; + +void one_thread_per_connection_scheduler(scheduler_functions *func, + ulong *arg_max_connections, Atomic_counter *arg_connection_count); +void one_thread_scheduler(scheduler_functions *func, Atomic_counter *arg_connection_count); + +extern void scheduler_init(); +extern void post_kill_notification(THD *); +/* + To be used for pool-of-threads (implemeneted differently on various OSs) +*/ +struct thd_scheduler +{ +public: + void *data; /* scheduler-specific data structure */ +}; + +#ifdef HAVE_POOL_OF_THREADS +void pool_of_threads_scheduler(scheduler_functions* func, + ulong *arg_max_connections, + Atomic_counter *arg_connection_count); +#else +#define pool_of_threads_scheduler(A,B,C) \ + one_thread_per_connection_scheduler(A, B, C) +#endif /*HAVE_POOL_OF_THREADS*/ + +#endif /* SCHEDULER_INCLUDED */ diff --git a/sql/select_handler.cc b/sql/select_handler.cc new file mode 100644 index 00000000..b0b8e586 --- /dev/null +++ b/sql/select_handler.cc @@ -0,0 +1,173 @@ +/* + Copyright (c) 2018, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" +#include "select_handler.h" + + +/** + The methods of the select_handler class. + + The objects of this class are used for pushdown of the select queries + into engines. The main method of the class is select_handler::execute() + that initiates execution of a select query by a foreign engine, receives the + rows of the result set, put it in a buffer of a temporary table and send + them from the buffer directly into output. + + The method uses the functions of the select_handle interface to do this. + It also employes plus some helper functions to create the needed temporary + table and to send rows from the temporary table into output. + The constructor of the class gets the select_handler interface as a parameter. +*/ + + +select_handler::select_handler(THD *thd_arg, handlerton *ht_arg) + : thd(thd_arg), ht(ht_arg), table(NULL), + is_analyze(thd_arg->lex->analyze_stmt) +{} + + +select_handler::~select_handler() +{ + if (table) + free_tmp_table(thd, table); +} + + +TABLE *select_handler::create_tmp_table(THD *thd, SELECT_LEX *select) +{ + List types; + TMP_TABLE_PARAM tmp_table_param; + TABLE *table; + DBUG_ENTER("select_handler::create_tmp_table"); + + if (select->master_unit()->join_union_item_types(thd, types, 1)) + DBUG_RETURN(NULL); + tmp_table_param.init(); + tmp_table_param.field_count= tmp_table_param.func_count= types.elements; + table= ::create_tmp_table(thd, &tmp_table_param, types, + (ORDER *) 0, false, 0, + TMP_TABLE_ALL_COLUMNS, 1, + &empty_clex_str, true, false); + DBUG_RETURN(table); +} + + +bool select_handler::prepare() +{ + DBUG_ENTER("select_handler::prepare"); + /* + Some engines (e.g. XPand) initialize "table" on their own. + So we need to create a temporary table only if "table" is NULL. + */ + if (!table && !(table= create_tmp_table(thd, select))) + DBUG_RETURN(true); + DBUG_RETURN(table->fill_item_list(&result_columns)); +} + + +bool select_handler::send_result_set_metadata() +{ + DBUG_ENTER("select_handler::send_result_set_metadata"); + +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_retry_query) + { + WSREP_DEBUG("skipping select metadata"); + DBUG_RETURN(false); + } + #endif /* WITH_WSREP */ + if (select->join->result->send_result_set_metadata(result_columns, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +bool select_handler::send_data() +{ + DBUG_ENTER("Pushdown_select::send_data"); + + if (select->join->result->send_data(result_columns)) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +bool select_handler::send_eof() +{ + DBUG_ENTER("select_handler::send_eof"); + + if (select->join->result->send_eof()) + DBUG_RETURN(true); + DBUG_RETURN(false); +} + + +int select_handler::execute() +{ + int err; + + DBUG_ENTER("select_handler::execute"); + + if ((err= init_scan())) + goto error; + + if (is_analyze) + { + end_scan(); + DBUG_RETURN(0); + } + + if (send_result_set_metadata()) + DBUG_RETURN(-1); + + while (!(err= next_row())) + { + if (thd->check_killed() || send_data()) + { + end_scan(); + DBUG_RETURN(-1); + } + } + + if (err != 0 && err != HA_ERR_END_OF_FILE) + goto error; + + if ((err= end_scan())) + goto error_2; + + if (send_eof()) + DBUG_RETURN(-1); + + DBUG_RETURN(0); + +error: + end_scan(); +error_2: + print_error(err, MYF(0)); + DBUG_RETURN(-1); // Error not sent to client +} + +void select_handler::print_error(int error, myf errflag) +{ + my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str); +} diff --git a/sql/select_handler.h b/sql/select_handler.h new file mode 100644 index 00000000..5cc63231 --- /dev/null +++ b/sql/select_handler.h @@ -0,0 +1,86 @@ +/* + Copyright (c) 2018, 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SELECT_HANDLER_INCLUDED +#define SELECT_HANDLER_INCLUDED + +#include "mariadb.h" +#include "sql_priv.h" + +/** + @class select_handler + + This interface class is to be used for execution of select queries + by foreign engines +*/ + +class select_handler +{ + public: + THD *thd; + handlerton *ht; + + SELECT_LEX *select; // Select to be excuted + + /* + Temporary table where all results should be stored in record[0] + The table has a field for every item from the select_lex::item_list. + The table is actually never filled. Only its record buffer is used. + */ + TABLE *table; + List result_columns; + + bool is_analyze; + + bool send_result_set_metadata(); + bool send_data(); + + select_handler(THD *thd_arg, handlerton *ht_arg); + + virtual ~select_handler(); + + int execute(); + + virtual bool prepare(); + + static TABLE *create_tmp_table(THD *thd, SELECT_LEX *sel); + +protected: + /* + Functions to scan the select result set. + All these returns 0 if ok, error code in case of error. + */ + + /* Initialize the process of producing rows of result set */ + virtual int init_scan() = 0; + + /* + Put the next produced row of the result set in table->record[0] + and return 0. Return HA_ERR_END_OF_FILE if there are no more rows, + return other error number in case of fatal error. + */ + virtual int next_row() = 0; + + /* Finish scanning */ + virtual int end_scan() = 0; + + /* Report errors */ + virtual void print_error(int error, myf errflag); + + bool send_eof(); +}; + +#endif /* SELECT_HANDLER_INCLUDED */ diff --git a/sql/semisync.cc b/sql/semisync.cc new file mode 100644 index 00000000..e3638d8b --- /dev/null +++ b/sql/semisync.cc @@ -0,0 +1,32 @@ +/* Copyright (C) 2007 Google Inc. + Copyright (C) 2008 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include +#include "semisync.h" + +const unsigned char Repl_semi_sync_base::k_packet_magic_num= 0xef; +const unsigned char Repl_semi_sync_base::k_packet_flag_sync= 0x01; + + +const unsigned long Trace::k_trace_general= 0x0001; +const unsigned long Trace::k_trace_detail= 0x0010; +const unsigned long Trace::k_trace_net_wait= 0x0020; +const unsigned long Trace::k_trace_function= 0x0040; + +const unsigned char Repl_semi_sync_base::k_sync_header[2]= + {Repl_semi_sync_base::k_packet_magic_num, 0}; diff --git a/sql/semisync.h b/sql/semisync.h new file mode 100644 index 00000000..44f23660 --- /dev/null +++ b/sql/semisync.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2007 Google Inc. + Copyright (C) 2008 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef SEMISYNC_H +#define SEMISYNC_H + +#include "mysqld.h" +#include "log_event.h" +#include "replication.h" + +/** + This class is used to trace function calls and other process + information +*/ +class Trace { +public: + static const unsigned long k_trace_function; + static const unsigned long k_trace_general; + static const unsigned long k_trace_detail; + static const unsigned long k_trace_net_wait; + + unsigned long m_trace_level; /* the level for tracing */ + + Trace() + :m_trace_level(0L) + {} + Trace(unsigned long trace_level) + :m_trace_level(trace_level) + {} +}; + +/** + Base class for semi-sync master and slave classes +*/ +class Repl_semi_sync_base + :public Trace { +public: + static const unsigned char k_sync_header[2]; /* three byte packet header */ + + /* Constants in network packet header. */ + static const unsigned char k_packet_magic_num; + static const unsigned char k_packet_flag_sync; +}; + +/* The layout of a semisync slave reply packet: + 1 byte for the magic num + 8 bytes for the binlog positon + n bytes for the binlog filename, terminated with a '\0' +*/ +#define REPLY_MAGIC_NUM_LEN 1 +#define REPLY_BINLOG_POS_LEN 8 +#define REPLY_BINLOG_NAME_LEN (FN_REFLEN + 1) +#define REPLY_MAGIC_NUM_OFFSET 0 +#define REPLY_BINLOG_POS_OFFSET (REPLY_MAGIC_NUM_OFFSET + REPLY_MAGIC_NUM_LEN) +#define REPLY_BINLOG_NAME_OFFSET (REPLY_BINLOG_POS_OFFSET + REPLY_BINLOG_POS_LEN) +#define REPLY_MESSAGE_MAX_LENGTH \ + (REPLY_MAGIC_NUM_LEN + REPLY_BINLOG_POS_LEN + REPLY_BINLOG_NAME_LEN) + +#endif /* SEMISYNC_H */ diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc new file mode 100644 index 00000000..670a6d8d --- /dev/null +++ b/sql/semisync_master.cc @@ -0,0 +1,1419 @@ +/* Copyright (C) 2007 Google Inc. + Copyright (c) 2008, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include +#include "semisync_master.h" +#include +#include + +#define TIME_THOUSAND 1000 +#define TIME_MILLION 1000000 +#define TIME_BILLION 1000000000 + +/* This indicates whether semi-synchronous replication is enabled. */ +my_bool rpl_semi_sync_master_enabled= 0; +unsigned long long rpl_semi_sync_master_request_ack = 0; +unsigned long long rpl_semi_sync_master_get_ack = 0; +my_bool rpl_semi_sync_master_wait_no_slave = 1; +my_bool rpl_semi_sync_master_status = 0; +ulong rpl_semi_sync_master_wait_point = + SEMI_SYNC_MASTER_WAIT_POINT_AFTER_STORAGE_COMMIT; +ulong rpl_semi_sync_master_timeout; +ulong rpl_semi_sync_master_trace_level; +ulong rpl_semi_sync_master_yes_transactions = 0; +ulong rpl_semi_sync_master_no_transactions = 0; +ulong rpl_semi_sync_master_off_times = 0; +ulong rpl_semi_sync_master_timefunc_fails = 0; +ulong rpl_semi_sync_master_wait_timeouts = 0; +ulong rpl_semi_sync_master_wait_sessions = 0; +ulong rpl_semi_sync_master_wait_pos_backtraverse = 0; +ulong rpl_semi_sync_master_avg_trx_wait_time = 0; +ulonglong rpl_semi_sync_master_trx_wait_num = 0; +ulong rpl_semi_sync_master_avg_net_wait_time = 0; +ulonglong rpl_semi_sync_master_net_wait_num = 0; +ulong rpl_semi_sync_master_clients = 0; +ulonglong rpl_semi_sync_master_net_wait_time = 0; +ulonglong rpl_semi_sync_master_trx_wait_time = 0; + +Repl_semi_sync_master repl_semisync_master; +Ack_receiver ack_receiver; + +/* + structure to save transaction log filename and position +*/ +typedef struct Trans_binlog_info { + my_off_t log_pos; + char log_file[FN_REFLEN]; +} Trans_binlog_info; + +static int get_wait_time(const struct timespec& start_ts); + +static ulonglong timespec_to_usec(const struct timespec *ts) +{ + return (ulonglong) ts->tv_sec * TIME_MILLION + ts->tv_nsec / TIME_THOUSAND; +} + +/******************************************************************************* + * + * class : manage all active transaction nodes + * + ******************************************************************************/ + +Active_tranx::Active_tranx(mysql_mutex_t *lock, + ulong trace_level) + : Trace(trace_level), m_allocator(max_connections), + m_num_entries(max_connections << 1), /* Transaction hash table size + * is set to double the size + * of max_connections */ + m_lock(lock) +{ + /* No transactions are in the list initially. */ + m_trx_front = NULL; + m_trx_rear = NULL; + + /* Create the hash table to find a transaction's ending event. */ + m_trx_htb = new Tranx_node *[m_num_entries]; + for (int idx = 0; idx < m_num_entries; ++idx) + m_trx_htb[idx] = NULL; + + sql_print_information("Semi-sync replication initialized for transactions."); +} + +Active_tranx::~Active_tranx() +{ + delete [] m_trx_htb; + m_trx_htb = NULL; + m_num_entries = 0; +} + +unsigned int Active_tranx::calc_hash(const unsigned char *key, size_t length) +{ + unsigned int nr = 1, nr2 = 4; + + /* The hash implementation comes from calc_hashnr() in mysys/hash.c. */ + while (length--) + { + nr ^= (((nr & 63)+nr2)*((unsigned int) (unsigned char) *key++))+ (nr << 8); + nr2 += 3; + } + return((unsigned int) nr); +} + +unsigned int Active_tranx::get_hash_value(const char *log_file_name, + my_off_t log_file_pos) +{ + unsigned int hash1 = calc_hash((const unsigned char *)log_file_name, + strlen(log_file_name)); + unsigned int hash2 = calc_hash((const unsigned char *)(&log_file_pos), + sizeof(log_file_pos)); + + return (hash1 + hash2) % m_num_entries; +} + +int Active_tranx::compare(const char *log_file_name1, my_off_t log_file_pos1, + const char *log_file_name2, my_off_t log_file_pos2) +{ + int cmp = strcmp(log_file_name1, log_file_name2); + + if (cmp != 0) + return cmp; + + if (log_file_pos1 > log_file_pos2) + return 1; + else if (log_file_pos1 < log_file_pos2) + return -1; + return 0; +} + +int Active_tranx::insert_tranx_node(const char *log_file_name, + my_off_t log_file_pos) +{ + Tranx_node *ins_node; + int result = 0; + unsigned int hash_val; + + DBUG_ENTER("Active_tranx:insert_tranx_node"); + + ins_node = m_allocator.allocate_node(); + if (!ins_node) + { + sql_print_error("%s: transaction node allocation failed for: (%s, %lu)", + "Active_tranx:insert_tranx_node", + log_file_name, (ulong)log_file_pos); + result = -1; + goto l_end; + } + + /* insert the binlog position in the active transaction list. */ + strncpy(ins_node->log_name, log_file_name, FN_REFLEN-1); + ins_node->log_name[FN_REFLEN-1] = 0; /* make sure it ends properly */ + ins_node->log_pos = log_file_pos; + + if (!m_trx_front) + { + /* The list is empty. */ + m_trx_front = m_trx_rear = ins_node; + } + else + { + int cmp = compare(ins_node, m_trx_rear); + if (cmp > 0) + { + /* Compare with the tail first. If the transaction happens later in + * binlog, then make it the new tail. + */ + m_trx_rear->next = ins_node; + m_trx_rear = ins_node; + } + else + { + /* Otherwise, it is an error because the transaction should hold the + * mysql_bin_log.LOCK_log when appending events. + */ + sql_print_error("%s: binlog write out-of-order, tail (%s, %lu), " + "new node (%s, %lu)", "Active_tranx:insert_tranx_node", + m_trx_rear->log_name, (ulong)m_trx_rear->log_pos, + ins_node->log_name, (ulong)ins_node->log_pos); + result = -1; + goto l_end; + } + } + + hash_val = get_hash_value(ins_node->log_name, ins_node->log_pos); + ins_node->hash_next = m_trx_htb[hash_val]; + m_trx_htb[hash_val] = ins_node; + + DBUG_PRINT("semisync", ("%s: insert (%s, %lu) in entry(%u)", + "Active_tranx:insert_tranx_node", + ins_node->log_name, (ulong)ins_node->log_pos, + hash_val)); + l_end: + + DBUG_RETURN(result); +} + +bool Active_tranx::is_tranx_end_pos(const char *log_file_name, + my_off_t log_file_pos) +{ + DBUG_ENTER("Active_tranx::is_tranx_end_pos"); + + unsigned int hash_val = get_hash_value(log_file_name, log_file_pos); + Tranx_node *entry = m_trx_htb[hash_val]; + + while (entry != NULL) + { + if (compare(entry, log_file_name, log_file_pos) == 0) + break; + + entry = entry->hash_next; + } + + DBUG_PRINT("semisync", ("%s: probe (%s, %lu) in entry(%u)", + "Active_tranx::is_tranx_end_pos", + log_file_name, (ulong)log_file_pos, hash_val)); + + DBUG_RETURN(entry != NULL); +} + +void Active_tranx::clear_active_tranx_nodes(const char *log_file_name, + my_off_t log_file_pos) +{ + Tranx_node *new_front; + + DBUG_ENTER("Active_tranx::::clear_active_tranx_nodes"); + + if (log_file_name != NULL) + { + new_front = m_trx_front; + + while (new_front) + { + if (compare(new_front, log_file_name, log_file_pos) > 0) + break; + new_front = new_front->next; + } + } + else + { + /* If log_file_name is NULL, clear everything. */ + new_front = NULL; + } + + if (new_front == NULL) + { + /* No active transaction nodes after the call. */ + + /* Clear the hash table. */ + memset(m_trx_htb, 0, m_num_entries * sizeof(Tranx_node *)); + m_allocator.free_all_nodes(); + + /* Clear the active transaction list. */ + if (m_trx_front != NULL) + { + m_trx_front = NULL; + m_trx_rear = NULL; + } + + DBUG_PRINT("semisync", ("%s: cleared all nodes", + "Active_tranx::::clear_active_tranx_nodes")); + } + else if (new_front != m_trx_front) + { + Tranx_node *curr_node, *next_node; + + /* Delete all transaction nodes before the confirmation point. */ +#ifdef DBUG_TRACE + int n_frees = 0; +#endif + curr_node = m_trx_front; + while (curr_node != new_front) + { + next_node = curr_node->next; +#ifdef DBUG_TRACE + n_frees++; +#endif + + /* Remove the node from the hash table. */ + unsigned int hash_val = get_hash_value(curr_node->log_name, curr_node->log_pos); + Tranx_node **hash_ptr = &(m_trx_htb[hash_val]); + while ((*hash_ptr) != NULL) + { + if ((*hash_ptr) == curr_node) + { + (*hash_ptr) = curr_node->hash_next; + break; + } + hash_ptr = &((*hash_ptr)->hash_next); + } + + curr_node = next_node; + } + + m_trx_front = new_front; + m_allocator.free_nodes_before(m_trx_front); + + DBUG_PRINT("semisync", ("%s: cleared %d nodes back until pos (%s, %lu)", + "Active_tranx::::clear_active_tranx_nodes", + n_frees, + m_trx_front->log_name, (ulong)m_trx_front->log_pos)); + } + + DBUG_VOID_RETURN; +} + + +/******************************************************************************* + * + * class: the basic code layer for semisync master. + * class: the basic code layer for semisync slave. + * + * The most important functions during semi-syn replication listed: + * + * Master: + * . report_reply_binlog(): called by the binlog dump thread when it receives + * the slave's status information. + * . update_sync_header(): based on transaction waiting information, decide + * whether to request the slave to reply. + * . write_tranx_in_binlog(): called by the transaction thread when it finishes + * writing all transaction events in binlog. + * . commit_trx(): transaction thread wait for the slave reply. + * + * Slave: + * . slave_read_sync_header(): read the semi-sync header from the master, get + * the sync status and get the payload for events. + * . slave_reply(): reply to the master about the replication progress. + * + ******************************************************************************/ + +Repl_semi_sync_master::Repl_semi_sync_master() + : m_active_tranxs(NULL), + m_init_done(false), + m_reply_file_name_inited(false), + m_reply_file_pos(0L), + m_wait_file_name_inited(false), + m_wait_file_pos(0), + m_master_enabled(false), + m_wait_timeout(0L), + m_state(0), + m_wait_point(0) +{ + strcpy(m_reply_file_name, ""); + strcpy(m_wait_file_name, ""); +} + +int Repl_semi_sync_master::init_object() +{ + int result= 0; + + m_init_done = true; + + /* References to the parameter works after set_options(). */ + set_wait_timeout(rpl_semi_sync_master_timeout); + set_trace_level(rpl_semi_sync_master_trace_level); + set_wait_point(rpl_semi_sync_master_wait_point); + + /* Mutex initialization can only be done after MY_INIT(). */ + mysql_mutex_init(key_LOCK_rpl_semi_sync_master_enabled, + &LOCK_rpl_semi_sync_master_enabled, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_binlog, + &LOCK_binlog, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_binlog_send, + &COND_binlog_send, NULL); + + if (rpl_semi_sync_master_enabled) + { + result = enable_master(); + if (!result) + { + result= ack_receiver.start(); /* Start the ACK thread. */ + /* + If rpl_semi_sync_master_wait_no_slave is disabled, let's temporarily + switch off semisync to avoid hang if there's none active slave. + */ + if (!rpl_semi_sync_master_wait_no_slave) + switch_off(); + } + } + else + { + disable_master(); + } + + return result; +} + +int Repl_semi_sync_master::enable_master() +{ + int result = 0; + + /* Must have the lock when we do enable of disable. */ + lock(); + + if (!get_master_enabled()) + { + m_active_tranxs = new Active_tranx(&LOCK_binlog, m_trace_level); + if (m_active_tranxs != NULL) + { + m_commit_file_name_inited = false; + m_reply_file_name_inited = false; + m_wait_file_name_inited = false; + + set_master_enabled(true); + m_state = true; + sql_print_information("Semi-sync replication enabled on the master."); + } + else + { + sql_print_error("Cannot allocate memory to enable semi-sync on the master."); + result = -1; + } + } + + unlock(); + + return result; +} + +void Repl_semi_sync_master::disable_master() +{ + /* Must have the lock when we do enable of disable. */ + lock(); + + if (get_master_enabled()) + { + /* Switch off the semi-sync first so that waiting transaction will be + * waken up. + */ + switch_off(); + + assert(m_active_tranxs != NULL); + delete m_active_tranxs; + m_active_tranxs = NULL; + + m_reply_file_name_inited = false; + m_wait_file_name_inited = false; + m_commit_file_name_inited = false; + + set_master_enabled(false); + sql_print_information("Semi-sync replication disabled on the master."); + } + + unlock(); +} + +void Repl_semi_sync_master::cleanup() +{ + if (m_init_done) + { + mysql_mutex_destroy(&LOCK_rpl_semi_sync_master_enabled); + mysql_mutex_destroy(&LOCK_binlog); + mysql_cond_destroy(&COND_binlog_send); + m_init_done= 0; + } + + delete m_active_tranxs; +} + +int Repl_semi_sync_master::sync_get_master_wait_sessions() +{ + int wait_sessions; + lock(); + wait_sessions= rpl_semi_sync_master_wait_sessions; + unlock(); + return wait_sessions; +} + +void Repl_semi_sync_master::create_timeout(struct timespec *out, + struct timespec *start_arg) +{ + struct timespec *start_ts; + struct timespec now_ts; + if (!start_arg) + { + set_timespec(now_ts, 0); + start_ts= &now_ts; + } + else + { + start_ts= start_arg; + } + + long diff_secs= (long) (m_wait_timeout / TIME_THOUSAND); + long diff_nsecs= (long) ((m_wait_timeout % TIME_THOUSAND) * TIME_MILLION); + long nsecs= start_ts->tv_nsec + diff_nsecs; + out->tv_sec= start_ts->tv_sec + diff_secs + nsecs / TIME_BILLION; + out->tv_nsec= nsecs % TIME_BILLION; +} + +void Repl_semi_sync_master::lock() +{ + mysql_mutex_lock(&LOCK_binlog); +} + +void Repl_semi_sync_master::unlock() +{ + mysql_mutex_unlock(&LOCK_binlog); +} + +void Repl_semi_sync_master::cond_broadcast() +{ + mysql_cond_broadcast(&COND_binlog_send); +} + +int Repl_semi_sync_master::cond_timewait(struct timespec *wait_time) +{ + int wait_res; + + DBUG_ENTER("Repl_semi_sync_master::cond_timewait()"); + + wait_res= mysql_cond_timedwait(&COND_binlog_send, + &LOCK_binlog, wait_time); + + DBUG_RETURN(wait_res); +} + +void Repl_semi_sync_master::add_slave() +{ + lock(); + rpl_semi_sync_master_clients++; + unlock(); +} + +void Repl_semi_sync_master::remove_slave() +{ + lock(); + rpl_semi_sync_master_clients--; + + /* Only switch off if semi-sync is enabled and is on */ + if (get_master_enabled() && is_on()) + { + /* If user has chosen not to wait if no semi-sync slave available + and the last semi-sync slave exits, turn off semi-sync on master + immediately. + */ + if (!rpl_semi_sync_master_wait_no_slave && + rpl_semi_sync_master_clients == 0) + switch_off(); + } + unlock(); +} + +int Repl_semi_sync_master::report_reply_packet(uint32 server_id, + const uchar *packet, + ulong packet_len) +{ + int result= -1; + char log_file_name[FN_REFLEN+1]; + my_off_t log_file_pos; + ulong log_file_len = 0; + + DBUG_ENTER("Repl_semi_sync_master::report_reply_packet"); + + DBUG_EXECUTE_IF("semisync_corrupt_magic", + const_cast(packet)[REPLY_MAGIC_NUM_OFFSET]= 0;); + if (unlikely(packet[REPLY_MAGIC_NUM_OFFSET] != + Repl_semi_sync_master::k_packet_magic_num)) + { + sql_print_error("Read semi-sync reply magic number error"); + goto l_end; + } + + if (unlikely(packet_len < REPLY_BINLOG_NAME_OFFSET)) + { + sql_print_error("Read semi-sync reply length error: packet is too small"); + goto l_end; + } + + log_file_pos = uint8korr(packet + REPLY_BINLOG_POS_OFFSET); + log_file_len = packet_len - REPLY_BINLOG_NAME_OFFSET; + if (unlikely(log_file_len >= FN_REFLEN)) + { + sql_print_error("Read semi-sync reply binlog file length too large"); + goto l_end; + } + strncpy(log_file_name, (const char*)packet + REPLY_BINLOG_NAME_OFFSET, log_file_len); + log_file_name[log_file_len] = 0; + + DBUG_ASSERT(dirname_length(log_file_name) == 0); + + DBUG_PRINT("semisync", ("%s: Got reply(%s, %lu) from server %u", + "Repl_semi_sync_master::report_reply_packet", + log_file_name, (ulong)log_file_pos, server_id)); + + rpl_semi_sync_master_get_ack++; + report_reply_binlog(server_id, log_file_name, log_file_pos); + result= 0; + +l_end: + if (result == -1) + { + char buf[256]; + octet2hex(buf, (const char*) packet, std::min(static_cast(sizeof(buf)-1), + packet_len)); + sql_print_information("First bytes of the packet from semisync slave " + "server-id %d: %s", server_id, buf); + + } + DBUG_RETURN(result); +} + +int Repl_semi_sync_master::report_reply_binlog(uint32 server_id, + const char *log_file_name, + my_off_t log_file_pos) +{ + int cmp; + bool can_release_threads = false; + bool need_copy_send_pos = true; + + DBUG_ENTER("Repl_semi_sync_master::report_reply_binlog"); + + if (!(get_master_enabled())) + DBUG_RETURN(0); + + lock(); + + /* This is the real check inside the mutex. */ + if (!get_master_enabled()) + goto l_end; + + if (!is_on()) + /* We check to see whether we can switch semi-sync ON. */ + try_switch_on(server_id, log_file_name, log_file_pos); + + /* The position should increase monotonically, if there is only one + * thread sending the binlog to the slave. + * In reality, to improve the transaction availability, we allow multiple + * sync replication slaves. So, if any one of them get the transaction, + * the transaction session in the primary can move forward. + */ + if (m_reply_file_name_inited) + { + cmp = Active_tranx::compare(log_file_name, log_file_pos, + m_reply_file_name, m_reply_file_pos); + + /* If the requested position is behind the sending binlog position, + * would not adjust sending binlog position. + * We based on the assumption that there are multiple semi-sync slave, + * and at least one of them shou/ld be up to date. + * If all semi-sync slaves are behind, at least initially, the primary + * can find the situation after the waiting timeout. After that, some + * slaves should catch up quickly. + */ + if (cmp < 0) + { + /* If the position is behind, do not copy it. */ + need_copy_send_pos = false; + } + } + + if (need_copy_send_pos) + { + strmake_buf(m_reply_file_name, log_file_name); + m_reply_file_pos = log_file_pos; + m_reply_file_name_inited = true; + + /* Remove all active transaction nodes before this point. */ + assert(m_active_tranxs != NULL); + m_active_tranxs->clear_active_tranx_nodes(log_file_name, log_file_pos); + + DBUG_PRINT("semisync", ("%s: Got reply at (%s, %lu)", + "Repl_semi_sync_master::report_reply_binlog", + log_file_name, (ulong)log_file_pos)); + } + + if (rpl_semi_sync_master_wait_sessions > 0) + { + /* Let us check if some of the waiting threads doing a trx + * commit can now proceed. + */ + cmp = Active_tranx::compare(m_reply_file_name, m_reply_file_pos, + m_wait_file_name, m_wait_file_pos); + if (cmp >= 0) + { + /* Yes, at least one waiting thread can now proceed: + * let us release all waiting threads with a broadcast + */ + can_release_threads = true; + m_wait_file_name_inited = false; + } + } + + l_end: + unlock(); + + if (can_release_threads) + { + DBUG_PRINT("semisync", ("%s: signal all waiting threads.", + "Repl_semi_sync_master::report_reply_binlog")); + + cond_broadcast(); + } + + DBUG_RETURN(0); +} + +int Repl_semi_sync_master::wait_after_sync(const char *log_file, my_off_t log_pos) +{ + if (!get_master_enabled()) + return 0; + + int ret= 0; + if(log_pos && + wait_point() == SEMI_SYNC_MASTER_WAIT_POINT_AFTER_BINLOG_SYNC) + ret= commit_trx(log_file + dirname_length(log_file), log_pos); + + return ret; +} + +int Repl_semi_sync_master::wait_after_commit(THD* thd, bool all) +{ + if (!get_master_enabled()) + return 0; + + int ret= 0; + const char *log_file; + my_off_t log_pos; + + bool is_real_trans= + (all || thd->transaction->all.ha_list == 0); + /* + The coordinates are propagated to this point having been computed + in report_binlog_update + */ + Trans_binlog_info *log_info= thd->semisync_info; + log_file= log_info && log_info->log_file[0] ? log_info->log_file : 0; + log_pos= log_info ? log_info->log_pos : 0; + + DBUG_ASSERT(!log_file || dirname_length(log_file) == 0); + + if (is_real_trans && + log_pos && + wait_point() == SEMI_SYNC_MASTER_WAIT_POINT_AFTER_STORAGE_COMMIT) + ret= commit_trx(log_file, log_pos); + + if (is_real_trans && log_info) + { + log_info->log_file[0]= 0; + log_info->log_pos= 0; + } + + return ret; +} + +int Repl_semi_sync_master::wait_after_rollback(THD *thd, bool all) +{ + return wait_after_commit(thd, all); +} + +/** + The method runs after flush to binary log is done. +*/ +int Repl_semi_sync_master::report_binlog_update(THD* thd, const char *log_file, + my_off_t log_pos) +{ + if (get_master_enabled()) + { + Trans_binlog_info *log_info; + + if (!(log_info= thd->semisync_info)) + { + if(!(log_info= (Trans_binlog_info*)my_malloc(PSI_INSTRUMENT_ME, + sizeof(Trans_binlog_info), MYF(0)))) + return 1; + thd->semisync_info= log_info; + } + strcpy(log_info->log_file, log_file + dirname_length(log_file)); + log_info->log_pos = log_pos; + + return write_tranx_in_binlog(log_info->log_file, log_pos); + } + + return 0; +} + +int Repl_semi_sync_master::dump_start(THD* thd, + const char *log_file, + my_off_t log_pos) +{ + if (!thd->semi_sync_slave) + return 0; + + if (ack_receiver.add_slave(thd)) + { + sql_print_error("Failed to register slave to semi-sync ACK receiver " + "thread. Turning off semisync"); + thd->semi_sync_slave= 0; + return 1; + } + + add_slave(); + report_reply_binlog(thd->variables.server_id, + log_file + dirname_length(log_file), log_pos); + sql_print_information("Start semi-sync binlog_dump to slave " + "(server_id: %ld), pos(%s, %lu)", + (long) thd->variables.server_id, log_file, + (ulong) log_pos); + + return 0; +} + +void Repl_semi_sync_master::dump_end(THD* thd) +{ + if (!thd->semi_sync_slave) + return; + + sql_print_information("Stop semi-sync binlog_dump to slave (server_id: %ld)", + (long) thd->variables.server_id); + + remove_slave(); + ack_receiver.remove_slave(thd); +} + +int Repl_semi_sync_master::commit_trx(const char* trx_wait_binlog_name, + my_off_t trx_wait_binlog_pos) +{ + DBUG_ENTER("Repl_semi_sync_master::commit_trx"); + + if (get_master_enabled() && trx_wait_binlog_name) + { + struct timespec start_ts; + struct timespec abstime; + int wait_result; + PSI_stage_info old_stage; + THD *thd= current_thd; + + set_timespec(start_ts, 0); + + DEBUG_SYNC(thd, "rpl_semisync_master_commit_trx_before_lock"); + /* Acquire the mutex. */ + lock(); + + /* This must be called after acquired the lock */ + THD_ENTER_COND(thd, &COND_binlog_send, &LOCK_binlog, + & stage_waiting_for_semi_sync_ack_from_slave, + & old_stage); + + /* This is the real check inside the mutex. */ + if (!get_master_enabled() || !is_on()) + goto l_end; + + DBUG_PRINT("semisync", ("%s: wait pos (%s, %lu), repl(%d)", + "Repl_semi_sync_master::commit_trx", + trx_wait_binlog_name, (ulong)trx_wait_binlog_pos, + (int)is_on())); + + while (is_on() && !thd_killed(thd)) + { + if (m_reply_file_name_inited) + { + int cmp = Active_tranx::compare(m_reply_file_name, m_reply_file_pos, + trx_wait_binlog_name, + trx_wait_binlog_pos); + if (cmp >= 0) + { + /* We have already sent the relevant binlog to the slave: no need to + * wait here. + */ + DBUG_PRINT("semisync", ("%s: Binlog reply is ahead (%s, %lu),", + "Repl_semi_sync_master::commit_trx", + m_reply_file_name, + (ulong)m_reply_file_pos)); + break; + } + } + + /* Let us update the info about the minimum binlog position of waiting + * threads. + */ + if (m_wait_file_name_inited) + { + int cmp = Active_tranx::compare(trx_wait_binlog_name, + trx_wait_binlog_pos, + m_wait_file_name, m_wait_file_pos); + if (cmp <= 0) + { + /* This thd has a lower position, let's update the minimum info. */ + strmake_buf(m_wait_file_name, trx_wait_binlog_name); + m_wait_file_pos = trx_wait_binlog_pos; + + rpl_semi_sync_master_wait_pos_backtraverse++; + DBUG_PRINT("semisync", ("%s: move back wait position (%s, %lu),", + "Repl_semi_sync_master::commit_trx", + m_wait_file_name, (ulong)m_wait_file_pos)); + } + } + else + { + strmake_buf(m_wait_file_name, trx_wait_binlog_name); + m_wait_file_pos = trx_wait_binlog_pos; + m_wait_file_name_inited = true; + + DBUG_PRINT("semisync", ("%s: init wait position (%s, %lu),", + "Repl_semi_sync_master::commit_trx", + m_wait_file_name, (ulong)m_wait_file_pos)); + } + + /* In semi-synchronous replication, we wait until the binlog-dump + * thread has received the reply on the relevant binlog segment from the + * replication slave. + * + * Let us suspend this thread to wait on the condition; + * when replication has progressed far enough, we will release + * these waiting threads. + */ + rpl_semi_sync_master_wait_sessions++; + + /* We keep track of when this thread is awaiting an ack to ensure it is + * not killed while awaiting an ACK if a shutdown is issued. + */ + set_thd_awaiting_semisync_ack(thd, TRUE); + + DBUG_PRINT("semisync", ("%s: wait %lu ms for binlog sent (%s, %lu)", + "Repl_semi_sync_master::commit_trx", + m_wait_timeout, + m_wait_file_name, (ulong)m_wait_file_pos)); + + create_timeout(&abstime, &start_ts); + wait_result = cond_timewait(&abstime); + + set_thd_awaiting_semisync_ack(thd, FALSE); + rpl_semi_sync_master_wait_sessions--; + + if (wait_result != 0) + { + /* This is a real wait timeout. */ + sql_print_warning("Timeout waiting for reply of binlog (file: %s, pos: %lu), " + "semi-sync up to file %s, position %lu.", + trx_wait_binlog_name, (ulong)trx_wait_binlog_pos, + m_reply_file_name, (ulong)m_reply_file_pos); + rpl_semi_sync_master_wait_timeouts++; + + /* switch semi-sync off */ + switch_off(); + } + else + { + int wait_time; + + wait_time = get_wait_time(start_ts); + if (wait_time < 0) + { + DBUG_PRINT("semisync", ("Replication semi-sync getWaitTime fail at " + "wait position (%s, %lu)", + trx_wait_binlog_name, + (ulong)trx_wait_binlog_pos)); + rpl_semi_sync_master_timefunc_fails++; + } + else + { + rpl_semi_sync_master_trx_wait_num++; + rpl_semi_sync_master_trx_wait_time += wait_time; + } + } + } + + /* + At this point, the binlog file and position of this transaction + must have been removed from Active_tranx. + m_active_tranxs may be NULL if someone disabled semi sync during + cond_timewait() + */ + assert(thd_killed(thd) || !m_active_tranxs || + !m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name, + trx_wait_binlog_pos)); + + l_end: + /* Update the status counter. */ + if (is_on()) + rpl_semi_sync_master_yes_transactions++; + else + rpl_semi_sync_master_no_transactions++; + + /* The lock held will be released by thd_exit_cond, so no need to + call unlock() here */ + THD_EXIT_COND(thd, &old_stage); + } + + DBUG_RETURN(0); +} + +/* Indicate that semi-sync replication is OFF now. + * + * What should we do when it is disabled? The problem is that we want + * the semi-sync replication enabled again when the slave catches up + * later. But, it is not that easy to detect that the slave has caught + * up. This is caused by the fact that MySQL's replication protocol is + * asynchronous, meaning that if the master does not use the semi-sync + * protocol, the slave would not send anything to the master. + * Still, if the master is sending (N+1)-th event, we assume that it is + * an indicator that the slave has received N-th event and earlier ones. + * + * If semi-sync is disabled, all transactions still update the wait + * position with the last position in binlog. But no transactions will + * wait for confirmations and the active transaction list would not be + * maintained. In binlog dump thread, update_sync_header() checks whether + * the current sending event catches up with last wait position. If it + * does match, semi-sync will be switched on again. + */ +void Repl_semi_sync_master::switch_off() +{ + DBUG_ENTER("Repl_semi_sync_master::switch_off"); + + m_state = false; + + /* Clear the active transaction list. */ + assert(m_active_tranxs != NULL); + m_active_tranxs->clear_active_tranx_nodes(NULL, 0); + + rpl_semi_sync_master_off_times++; + m_wait_file_name_inited = false; + m_reply_file_name_inited = false; + sql_print_information("Semi-sync replication switched OFF."); + cond_broadcast(); /* wake up all waiting threads */ + + DBUG_VOID_RETURN; +} + +int Repl_semi_sync_master::try_switch_on(int server_id, + const char *log_file_name, + my_off_t log_file_pos) +{ + bool semi_sync_on = false; + + DBUG_ENTER("Repl_semi_sync_master::try_switch_on"); + + /* If the current sending event's position is larger than or equal to the + * 'largest' commit transaction binlog position, the slave is already + * catching up now and we can switch semi-sync on here. + * If m_commit_file_name_inited indicates there are no recent transactions, + * we can enable semi-sync immediately. + */ + if (m_commit_file_name_inited) + { + int cmp = Active_tranx::compare(log_file_name, log_file_pos, + m_commit_file_name, m_commit_file_pos); + semi_sync_on = (cmp >= 0); + } + else + { + semi_sync_on = true; + } + + if (semi_sync_on) + { + /* Switch semi-sync replication on. */ + m_state = true; + + sql_print_information("Semi-sync replication switched ON with slave (server_id: %d) " + "at (%s, %lu)", + server_id, log_file_name, + (ulong)log_file_pos); + } + + DBUG_RETURN(0); +} + +int Repl_semi_sync_master::reserve_sync_header(String* packet) +{ + DBUG_ENTER("Repl_semi_sync_master::reserve_sync_header"); + + /* Set the magic number and the sync status. By default, no sync + * is required. + */ + packet->append(reinterpret_cast(k_sync_header), + sizeof(k_sync_header)); + DBUG_RETURN(0); +} + +int Repl_semi_sync_master::update_sync_header(THD* thd, unsigned char *packet, + const char *log_file_name, + my_off_t log_file_pos, + bool* need_sync) +{ + int cmp = 0; + bool sync = false; + + DBUG_ENTER("Repl_semi_sync_master::update_sync_header"); + + /* If the semi-sync master is not enabled, or the slave is not a semi-sync + * target, do not request replies from the slave. + */ + if (!get_master_enabled() || !thd->semi_sync_slave) + { + *need_sync = false; + DBUG_RETURN(0); + } + + lock(); + + /* This is the real check inside the mutex. */ + if (!get_master_enabled()) + { + assert(sync == false); + goto l_end; + } + + if (is_on()) + { + /* semi-sync is ON */ + sync = false; /* No sync unless a transaction is involved. */ + + if (m_reply_file_name_inited) + { + cmp = Active_tranx::compare(log_file_name, log_file_pos, + m_reply_file_name, m_reply_file_pos); + if (cmp <= 0) + { + /* If we have already got the reply for the event, then we do + * not need to sync the transaction again. + */ + goto l_end; + } + } + + if (m_wait_file_name_inited) + { + cmp = Active_tranx::compare(log_file_name, log_file_pos, + m_wait_file_name, m_wait_file_pos); + } + else + { + cmp = 1; + } + + /* If we are already waiting for some transaction replies which + * are later in binlog, do not wait for this one event. + */ + if (cmp >= 0) + { + /* + * We only wait if the event is a transaction's ending event. + */ + assert(m_active_tranxs != NULL); + sync = m_active_tranxs->is_tranx_end_pos(log_file_name, + log_file_pos); + } + } + else + { + if (m_commit_file_name_inited) + { + int cmp = Active_tranx::compare(log_file_name, log_file_pos, + m_commit_file_name, m_commit_file_pos); + sync = (cmp >= 0); + } + else + { + sync = true; + } + } + + DBUG_PRINT("semisync", ("%s: server(%lu), (%s, %lu) sync(%d), repl(%d)", + "Repl_semi_sync_master::update_sync_header", + thd->variables.server_id, log_file_name, + (ulong)log_file_pos, sync, (int)is_on())); + *need_sync= sync; + + l_end: + unlock(); + + /* We do not need to clear sync flag because we set it to 0 when we + * reserve the packet header. + */ + if (sync) + { + (packet)[2] = k_packet_flag_sync; + } + + DBUG_RETURN(0); +} + +int Repl_semi_sync_master::write_tranx_in_binlog(const char* log_file_name, + my_off_t log_file_pos) +{ + int result = 0; + + DBUG_ENTER("Repl_semi_sync_master::write_tranx_in_binlog"); + + lock(); + + /* This is the real check inside the mutex. */ + if (!get_master_enabled()) + goto l_end; + + /* Update the 'largest' transaction commit position seen so far even + * though semi-sync is switched off. + * It is much better that we update m_commit_file* here, instead of + * inside commit_trx(). This is mostly because update_sync_header() + * will watch for m_commit_file* to decide whether to switch semi-sync + * on. The detailed reason is explained in function update_sync_header(). + */ + if (m_commit_file_name_inited) + { + int cmp = Active_tranx::compare(log_file_name, log_file_pos, + m_commit_file_name, m_commit_file_pos); + if (cmp > 0) + { + /* This is a larger position, let's update the maximum info. */ + strncpy(m_commit_file_name, log_file_name, FN_REFLEN-1); + m_commit_file_name[FN_REFLEN-1] = 0; /* make sure it ends properly */ + m_commit_file_pos = log_file_pos; + } + } + else + { + strncpy(m_commit_file_name, log_file_name, FN_REFLEN-1); + m_commit_file_name[FN_REFLEN-1] = 0; /* make sure it ends properly */ + m_commit_file_pos = log_file_pos; + m_commit_file_name_inited = true; + } + + if (is_on()) + { + assert(m_active_tranxs != NULL); + if(m_active_tranxs->insert_tranx_node(log_file_name, log_file_pos)) + { + /* + if insert tranx_node failed, print a warning message + and turn off semi-sync + */ + sql_print_warning("Semi-sync failed to insert tranx_node for binlog file: %s, position: %lu", + log_file_name, (ulong)log_file_pos); + switch_off(); + } + else + { + rpl_semi_sync_master_request_ack++; + } + } + + l_end: + unlock(); + + DBUG_RETURN(result); +} + +int Repl_semi_sync_master::flush_net(THD *thd, + const char *event_buf) +{ + int result = -1; + NET* net= &thd->net; + + DBUG_ENTER("Repl_semi_sync_master::flush_net"); + + assert((unsigned char)event_buf[1] == k_packet_magic_num); + if ((unsigned char)event_buf[2] != k_packet_flag_sync) + { + /* current event does not require reply */ + result = 0; + goto l_end; + } + + /* We flush to make sure that the current event is sent to the network, + * instead of being buffered in the TCP/IP stack. + */ + if (net_flush(net)) + { + sql_print_error("Semi-sync master failed on net_flush() " + "before waiting for slave reply"); + goto l_end; + } + + net_clear(net, 0); + net->pkt_nr++; + net->compress_pkt_nr++; + result = 0; + rpl_semi_sync_master_net_wait_num++; + + l_end: + thd->clear_error(); + + DBUG_RETURN(result); +} + +int Repl_semi_sync_master::after_reset_master() +{ + int result = 0; + + DBUG_ENTER("Repl_semi_sync_master::after_reset_master"); + + if (rpl_semi_sync_master_enabled) + { + sql_print_information("Enable Semi-sync Master after reset master"); + enable_master(); + } + + lock(); + + if (rpl_semi_sync_master_clients == 0 && + !rpl_semi_sync_master_wait_no_slave) + m_state = 0; + else + m_state = get_master_enabled()? 1 : 0; + + m_wait_file_name_inited = false; + m_reply_file_name_inited = false; + m_commit_file_name_inited = false; + + rpl_semi_sync_master_yes_transactions = 0; + rpl_semi_sync_master_no_transactions = 0; + rpl_semi_sync_master_off_times = 0; + rpl_semi_sync_master_timefunc_fails = 0; + rpl_semi_sync_master_wait_sessions = 0; + rpl_semi_sync_master_wait_pos_backtraverse = 0; + rpl_semi_sync_master_trx_wait_num = 0; + rpl_semi_sync_master_trx_wait_time = 0; + rpl_semi_sync_master_net_wait_num = 0; + rpl_semi_sync_master_net_wait_time = 0; + + unlock(); + + DBUG_RETURN(result); +} + +int Repl_semi_sync_master::before_reset_master() +{ + int result = 0; + + DBUG_ENTER("Repl_semi_sync_master::before_reset_master"); + + if (rpl_semi_sync_master_enabled) + disable_master(); + + DBUG_RETURN(result); +} + +void Repl_semi_sync_master::check_and_switch() +{ + lock(); + if (get_master_enabled() && is_on()) + { + if (!rpl_semi_sync_master_wait_no_slave + && rpl_semi_sync_master_clients == 0) + switch_off(); + } + unlock(); +} + +void Repl_semi_sync_master::set_export_stats() +{ + lock(); + + rpl_semi_sync_master_status = m_state; + rpl_semi_sync_master_avg_trx_wait_time= + ((rpl_semi_sync_master_trx_wait_num) ? + (ulong)((double)rpl_semi_sync_master_trx_wait_time / + ((double)rpl_semi_sync_master_trx_wait_num)) : 0); + rpl_semi_sync_master_avg_net_wait_time= + ((rpl_semi_sync_master_net_wait_num) ? + (ulong)((double)rpl_semi_sync_master_net_wait_time / + ((double)rpl_semi_sync_master_net_wait_num)) : 0); + + unlock(); +} + +void Repl_semi_sync_master::await_slave_reply() +{ + struct timespec abstime; + + DBUG_ENTER("Repl_semi_sync_master::::await_slave_reply"); + lock(); + + /* Just return if there is nothing to wait for */ + if (!rpl_semi_sync_master_wait_sessions) + goto end; + + create_timeout(&abstime, NULL); + cond_timewait(&abstime); + +end: + unlock(); + DBUG_VOID_RETURN; +} + +/* Get the waiting time given the wait's staring time. + * + * Return: + * >= 0: the waiting time in microsecons(us) + * < 0: error in get time or time back traverse + */ +static int get_wait_time(const struct timespec& start_ts) +{ + ulonglong start_usecs, end_usecs; + struct timespec end_ts; + + /* Starting time in microseconds(us). */ + start_usecs = timespec_to_usec(&start_ts); + + /* Get the wait time interval. */ + set_timespec(end_ts, 0); + + /* Ending time in microseconds(us). */ + end_usecs = timespec_to_usec(&end_ts); + + if (end_usecs < start_usecs) + return -1; + + return (int)(end_usecs - start_usecs); +} + +void semi_sync_master_deinit() +{ + repl_semisync_master.cleanup(); + ack_receiver.cleanup(); +} diff --git a/sql/semisync_master.h b/sql/semisync_master.h new file mode 100644 index 00000000..5451ad51 --- /dev/null +++ b/sql/semisync_master.h @@ -0,0 +1,712 @@ +/* Copyright (C) 2007 Google Inc. + Copyright (c) 2008 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef SEMISYNC_MASTER_H +#define SEMISYNC_MASTER_H + +#include "semisync.h" +#include "semisync_master_ack_receiver.h" + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_rpl_semi_sync_master_enabled; +extern PSI_mutex_key key_LOCK_binlog; +extern PSI_cond_key key_COND_binlog_send; +#endif + +struct Tranx_node { + char log_name[FN_REFLEN]; + my_off_t log_pos; + struct Tranx_node *next; /* the next node in the sorted list */ + struct Tranx_node *hash_next; /* the next node during hash collision */ +}; + +/** + @class Tranx_node_allocator + + This class provides memory allocating and freeing methods for + Tranx_node. The main target is performance. + + @section ALLOCATE How to allocate a node + The pointer of the first node after 'last_node' in current_block is + returned. current_block will move to the next free Block when all nodes of + it are in use. A new Block is allocated and is put into the rear of the + Block link table if no Block is free. + + The list starts up empty (ie, there is no allocated Block). + + After some nodes are freed, there probably are some free nodes before + the sequence of the allocated nodes, but we do not reuse it. It is better + to keep the allocated nodes are in the sequence, for it is more efficient + for allocating and freeing Tranx_node. + + @section FREENODE How to free nodes + There are two methods for freeing nodes. They are free_all_nodes and + free_nodes_before. + + 'A Block is free' means all of its nodes are free. + @subsection free_nodes_before + As all allocated nodes are in the sequence, 'Before one node' means all + nodes before given node in the same Block and all Blocks before the Block + which containing the given node. As such, all Blocks before the given one + ('node') are free Block and moved into the rear of the Block link table. + The Block containing the given 'node', however, is not. For at least the + given 'node' is still in use. This will waste at most one Block, but it is + more efficient. + */ +#define BLOCK_TRANX_NODES 16 +class Tranx_node_allocator +{ +public: + /** + @param reserved_nodes + The number of reserved Tranx_nodes. It is used to set 'reserved_blocks' + which can contain at least 'reserved_nodes' number of Tranx_nodes. When + freeing memory, we will reserve at least reserved_blocks of Blocks not + freed. + */ + Tranx_node_allocator(uint reserved_nodes) : + reserved_blocks(reserved_nodes/BLOCK_TRANX_NODES + + (reserved_nodes%BLOCK_TRANX_NODES > 1 ? 2 : 1)), + first_block(NULL), last_block(NULL), + current_block(NULL), last_node(-1), block_num(0) {} + + ~Tranx_node_allocator() + { + Block *block= first_block; + while (block != NULL) + { + Block *next= block->next; + free_block(block); + block= next; + } + } + + /** + The pointer of the first node after 'last_node' in current_block is + returned. current_block will move to the next free Block when all nodes of + it are in use. A new Block is allocated and is put into the rear of the + Block link table if no Block is free. + + @return Return a Tranx_node *, or NULL if an error occurred. + */ + Tranx_node *allocate_node() + { + Tranx_node *trx_node; + Block *block= current_block; + + if (last_node == BLOCK_TRANX_NODES-1) + { + current_block= current_block->next; + last_node= -1; + } + + if (current_block == NULL && allocate_block()) + { + current_block= block; + if (current_block) + last_node= BLOCK_TRANX_NODES-1; + return NULL; + } + + trx_node= &(current_block->nodes[++last_node]); + trx_node->log_name[0] = '\0'; + trx_node->log_pos= 0; + trx_node->next= 0; + trx_node->hash_next= 0; + return trx_node; + } + + /** + All nodes are freed. + + @return Return 0, or 1 if an error occurred. + */ + int free_all_nodes() + { + current_block= first_block; + last_node= -1; + free_blocks(); + return 0; + } + + /** + All Blocks before the given 'node' are free Block and moved into the rear + of the Block link table. + + @param node All nodes before 'node' will be freed + + @return Return 0, or 1 if an error occurred. + */ + int free_nodes_before(Tranx_node* node) + { + Block *block; + Block *prev_block= NULL; + + block= first_block; + while (block != current_block->next) + { + /* Find the Block containing the given node */ + if (&(block->nodes[0]) <= node && &(block->nodes[BLOCK_TRANX_NODES]) >= node) + { + /* All Blocks before the given node are put into the rear */ + if (first_block != block) + { + last_block->next= first_block; + first_block= block; + last_block= prev_block; + last_block->next= NULL; + free_blocks(); + } + return 0; + } + prev_block= block; + block= block->next; + } + + /* Node does not find should never happen */ + DBUG_ASSERT(0); + return 1; + } + +private: + uint reserved_blocks; + + /** + A sequence memory which contains BLOCK_TRANX_NODES Tranx_nodes. + + BLOCK_TRANX_NODES The number of Tranx_nodes which are in a Block. + + next Every Block has a 'next' pointer which points to the next Block. + These linking Blocks constitute a Block link table. + */ + struct Block { + Block *next; + Tranx_node nodes[BLOCK_TRANX_NODES]; + }; + + /** + The 'first_block' is the head of the Block link table; + */ + Block *first_block; + /** + The 'last_block' is the rear of the Block link table; + */ + Block *last_block; + + /** + current_block always points the Block in the Block link table in + which the last allocated node is. The Blocks before it are all in use + and the Blocks after it are all free. + */ + Block *current_block; + + /** + It always points to the last node which has been allocated in the + current_block. + */ + int last_node; + + /** + How many Blocks are in the Block link table. + */ + uint block_num; + + /** + Allocate a block and then assign it to current_block. + */ + int allocate_block() + { + Block *block= (Block *)my_malloc(PSI_INSTRUMENT_ME, sizeof(Block), MYF(0)); + if (block) + { + block->next= NULL; + + if (first_block == NULL) + first_block= block; + else + last_block->next= block; + + /* New Block is always put into the rear */ + last_block= block; + /* New Block is always the current_block */ + current_block= block; + ++block_num; + return 0; + } + return 1; + } + + /** + Free a given Block. + @param block The Block will be freed. + */ + void free_block(Block *block) + { + my_free(block); + --block_num; + } + + + /** + If there are some free Blocks and the total number of the Blocks in the + Block link table is larger than the 'reserved_blocks', Some free Blocks + will be freed until the total number of the Blocks is equal to the + 'reserved_blocks' or there is only one free Block behind the + 'current_block'. + */ + void free_blocks() + { + if (current_block == NULL || current_block->next == NULL) + return; + + /* One free Block is always kept behind the current block */ + Block *block= current_block->next->next; + while (block_num > reserved_blocks && block != NULL) + { + Block *next= block->next; + free_block(block); + block= next; + } + current_block->next->next= block; + if (block == NULL) + last_block= current_block->next; + } +}; + +/** + This class manages memory for active transaction list. + + We record each active transaction with a Tranx_node, each session + can have only one open transaction. Because of EVENT, the total + active transaction nodes can exceed the maximum allowed + connections. +*/ +class Active_tranx + :public Trace { +private: + + Tranx_node_allocator m_allocator; + /* These two record the active transaction list in sort order. */ + Tranx_node *m_trx_front, *m_trx_rear; + + Tranx_node **m_trx_htb; /* A hash table on active transactions. */ + + int m_num_entries; /* maximum hash table entries */ + mysql_mutex_t *m_lock; /* mutex lock */ + + inline void assert_lock_owner(); + + inline unsigned int calc_hash(const unsigned char *key, size_t length); + unsigned int get_hash_value(const char *log_file_name, my_off_t log_file_pos); + + int compare(const char *log_file_name1, my_off_t log_file_pos1, + const Tranx_node *node2) { + return compare(log_file_name1, log_file_pos1, + node2->log_name, node2->log_pos); + } + int compare(const Tranx_node *node1, + const char *log_file_name2, my_off_t log_file_pos2) { + return compare(node1->log_name, node1->log_pos, + log_file_name2, log_file_pos2); + } + int compare(const Tranx_node *node1, const Tranx_node *node2) { + return compare(node1->log_name, node1->log_pos, + node2->log_name, node2->log_pos); + } + +public: + Active_tranx(mysql_mutex_t *lock, unsigned long trace_level); + ~Active_tranx(); + + /* Insert an active transaction node with the specified position. + * + * Return: + * 0: success; non-zero: error + */ + int insert_tranx_node(const char *log_file_name, my_off_t log_file_pos); + + /* Clear the active transaction nodes until(inclusive) the specified + * position. + * If log_file_name is NULL, everything will be cleared: the sorted + * list and the hash table will be reset to empty. + */ + void clear_active_tranx_nodes(const char *log_file_name, + my_off_t log_file_pos); + + /* Given a position, check to see whether the position is an active + * transaction's ending position by probing the hash table. + */ + bool is_tranx_end_pos(const char *log_file_name, my_off_t log_file_pos); + + /* Given two binlog positions, compare which one is bigger based on + * (file_name, file_position). + */ + static int compare(const char *log_file_name1, my_off_t log_file_pos1, + const char *log_file_name2, my_off_t log_file_pos2); + +}; + +/** + The extension class for the master of semi-synchronous replication +*/ +class Repl_semi_sync_master + :public Repl_semi_sync_base { + Active_tranx *m_active_tranxs; /* active transaction list: the list will + be cleared when semi-sync switches off. */ + + /* True when init_object has been called */ + bool m_init_done; + + /* This cond variable is signaled when enough binlog has been sent to slave, + * so that a waiting trx can return the 'ok' to the client for a commit. + */ + mysql_cond_t COND_binlog_send; + + /* Mutex that protects the following state variables and the active + * transaction list. + * Under no cirumstances we can acquire mysql_bin_log.LOCK_log if we are + * already holding m_LOCK_binlog because it can cause deadlocks. + */ + mysql_mutex_t LOCK_binlog; + + /* This is set to true when m_reply_file_name contains meaningful data. */ + bool m_reply_file_name_inited; + + /* The binlog name up to which we have received replies from any slaves. */ + char m_reply_file_name[FN_REFLEN]; + + /* The position in that file up to which we have the reply from any slaves. */ + my_off_t m_reply_file_pos; + + /* This is set to true when we know the 'smallest' wait position. */ + bool m_wait_file_name_inited; + + /* NULL, or the 'smallest' filename that a transaction is waiting for + * slave replies. + */ + char m_wait_file_name[FN_REFLEN]; + + /* The smallest position in that file that a trx is waiting for: the trx + * can proceed and send an 'ok' to the client when the master has got the + * reply from the slave indicating that it already got the binlog events. + */ + my_off_t m_wait_file_pos; + + /* This is set to true when we know the 'largest' transaction commit + * position in the binlog file. + * We always maintain the position no matter whether semi-sync is switched + * on switched off. When a transaction wait timeout occurs, semi-sync will + * switch off. Binlog-dump thread can use the three fields to detect when + * slaves catch up on replication so that semi-sync can switch on again. + */ + bool m_commit_file_name_inited; + + /* The 'largest' binlog filename that a commit transaction is seeing. */ + char m_commit_file_name[FN_REFLEN]; + + /* The 'largest' position in that file that a commit transaction is seeing. */ + my_off_t m_commit_file_pos; + + /* All global variables which can be set by parameters. */ + volatile bool m_master_enabled; /* semi-sync is enabled on the master */ + unsigned long m_wait_timeout; /* timeout period(ms) during tranx wait */ + + bool m_state; /* whether semi-sync is switched */ + + /*Waiting for ACK before/after innodb commit*/ + ulong m_wait_point; + + void lock(); + void unlock(); + void cond_broadcast(); + int cond_timewait(struct timespec *wait_time); + + /* Is semi-sync replication on? */ + bool is_on() { + return (m_state); + } + + void set_master_enabled(bool enabled) { + m_master_enabled = enabled; + } + + /* Switch semi-sync off because of timeout in transaction waiting. */ + void switch_off(); + + /* Switch semi-sync on when slaves catch up. */ + int try_switch_on(int server_id, + const char *log_file_name, my_off_t log_file_pos); + + public: + Repl_semi_sync_master(); + ~Repl_semi_sync_master() = default; + + void cleanup(); + + bool get_master_enabled() { + return m_master_enabled; + } + void set_trace_level(unsigned long trace_level) { + m_trace_level = trace_level; + if (m_active_tranxs) + m_active_tranxs->m_trace_level = trace_level; + } + + /* Set the transaction wait timeout period, in milliseconds. */ + void set_wait_timeout(unsigned long wait_timeout) { + m_wait_timeout = wait_timeout; + } + + int sync_get_master_wait_sessions(); + + /* + Calculates a timeout that is m_wait_timeout after start_arg and saves it + in out. If start_arg is NULL, the timeout is m_wait_timeout after the + current system time. + */ + void create_timeout(struct timespec *out, struct timespec *start_arg); + + /* + Blocks the calling thread until the ack_receiver either receives an ACK + or times out (from rpl_semi_sync_master_timeout) + */ + void await_slave_reply(); + + /*set the ACK point, after binlog sync or after transaction commit*/ + void set_wait_point(unsigned long ack_point) + { + m_wait_point = ack_point; + } + + ulong wait_point() //no cover line + { + return m_wait_point; //no cover line + } + + /* Initialize this class after MySQL parameters are initialized. this + * function should be called once at bootstrap time. + */ + int init_object(); + + /* Enable the object to enable semi-sync replication inside the master. */ + int enable_master(); + + /* Disable the object to disable semi-sync replication inside the master. */ + void disable_master(); + + /* Add a semi-sync replication slave */ + void add_slave(); + + /* Remove a semi-sync replication slave */ + void remove_slave(); + + /* It parses a reply packet and call report_reply_binlog to handle it. */ + int report_reply_packet(uint32 server_id, const uchar *packet, + ulong packet_len); + + /* In semi-sync replication, reports up to which binlog position we have + * received replies from the slave indicating that it already get the events. + * + * Input: + * server_id - (IN) master server id number + * log_file_name - (IN) binlog file name + * end_offset - (IN) the offset in the binlog file up to which we have + * the replies from the slave + * + * Return: + * 0: success; non-zero: error + */ + int report_reply_binlog(uint32 server_id, + const char* log_file_name, + my_off_t end_offset); + + /* Commit a transaction in the final step. This function is called from + * InnoDB before returning from the low commit. If semi-sync is switch on, + * the function will wait to see whether binlog-dump thread get the reply for + * the events of the transaction. Remember that this is not a direct wait, + * instead, it waits to see whether the binlog-dump thread has reached the + * point. If the wait times out, semi-sync status will be switched off and + * all other transaction would not wait either. + * + * Input: (the transaction events' ending binlog position) + * trx_wait_binlog_name - (IN) ending position's file name + * trx_wait_binlog_pos - (IN) ending position's file offset + * + * Return: + * 0: success; non-zero: error + */ + int commit_trx(const char* trx_wait_binlog_name, + my_off_t trx_wait_binlog_pos); + + /*Wait for ACK after writing/sync binlog to file*/ + int wait_after_sync(const char* log_file, my_off_t log_pos); + + /*Wait for ACK after commting the transaction*/ + int wait_after_commit(THD* thd, bool all); + + /*Wait after the transaction is rollback*/ + int wait_after_rollback(THD *thd, bool all); + /*Store the current binlog position in m_active_tranxs. This position should + * be acked by slave*/ + int report_binlog_update(THD *thd, const char *log_file,my_off_t log_pos); + + int dump_start(THD* thd, + const char *log_file, + my_off_t log_pos); + + void dump_end(THD* thd); + + /* Reserve space in the replication event packet header: + * . slave semi-sync off: 1 byte - (0) + * . slave semi-sync on: 3 byte - (0, 0xef, 0/1} + * + * Input: + * packet - (IN) the header buffer + * + * Return: + * size of the bytes reserved for header + */ + int reserve_sync_header(String* packet); + + /* Update the sync bit in the packet header to indicate to the slave whether + * the master will wait for the reply of the event. If semi-sync is switched + * off and we detect that the slave is catching up, we switch semi-sync on. + * + * Input: + * THD - (IN) current dump thread + * packet - (IN) the packet containing the replication event + * log_file_name - (IN) the event ending position's file name + * log_file_pos - (IN) the event ending position's file offset + * need_sync - (IN) identify if flush_net is needed to call. + * server_id - (IN) master server id number + * + * Return: + * 0: success; non-zero: error + */ + int update_sync_header(THD* thd, unsigned char *packet, + const char *log_file_name, + my_off_t log_file_pos, + bool* need_sync); + + /* Called when a transaction finished writing binlog events. + * . update the 'largest' transactions' binlog event position + * . insert the ending position in the active transaction list if + * semi-sync is on + * + * Input: (the transaction events' ending binlog position) + * log_file_name - (IN) transaction ending position's file name + * log_file_pos - (IN) transaction ending position's file offset + * + * Return: + * 0: success; non-zero: error + */ + int write_tranx_in_binlog(const char* log_file_name, my_off_t log_file_pos); + + /* Read the slave's reply so that we know how much progress the slave makes + * on receive replication events. + */ + int flush_net(THD* thd, const char *event_buf); + + /* Export internal statistics for semi-sync replication. */ + void set_export_stats(); + + /* 'reset master' command is issued from the user and semi-sync need to + * go off for that. + */ + int after_reset_master(); + + /*called before reset master*/ + int before_reset_master(); + + void check_and_switch(); + + /* + Determines if the given thread is currently awaiting a semisync_ack. Note + that the thread's value is protected by this class's LOCK_binlog, so this + function (indirectly) provides safe access. + */ + my_bool is_thd_awaiting_semisync_ack(THD *thd) + { + lock(); + my_bool ret= thd->is_awaiting_semisync_ack; + unlock(); + return ret; + } + + /* + Update the thread's value for is_awaiting_semisync_ack. LOCK_binlog (from + this class) should be acquired before calling this function. + */ + void set_thd_awaiting_semisync_ack(THD *thd, + my_bool _is_awaiting_semisync_ack) + { + mysql_mutex_assert_owner(&LOCK_binlog); + thd->is_awaiting_semisync_ack= _is_awaiting_semisync_ack; + } + + mysql_mutex_t LOCK_rpl_semi_sync_master_enabled; +}; + +enum rpl_semi_sync_master_wait_point_t { + SEMI_SYNC_MASTER_WAIT_POINT_AFTER_BINLOG_SYNC, + SEMI_SYNC_MASTER_WAIT_POINT_AFTER_STORAGE_COMMIT, +}; + +extern Repl_semi_sync_master repl_semisync_master; +extern Ack_receiver ack_receiver; + +/* System and status variables for the master component */ +extern my_bool rpl_semi_sync_master_enabled; +extern my_bool rpl_semi_sync_master_status; +extern ulong rpl_semi_sync_master_wait_point; +extern ulong rpl_semi_sync_master_clients; +extern ulong rpl_semi_sync_master_timeout; +extern ulong rpl_semi_sync_master_trace_level; +extern ulong rpl_semi_sync_master_yes_transactions; +extern ulong rpl_semi_sync_master_no_transactions; +extern ulong rpl_semi_sync_master_off_times; +extern ulong rpl_semi_sync_master_wait_timeouts; +extern ulong rpl_semi_sync_master_timefunc_fails; +extern ulong rpl_semi_sync_master_num_timeouts; +extern ulong rpl_semi_sync_master_wait_sessions; +extern ulong rpl_semi_sync_master_wait_pos_backtraverse; +extern ulong rpl_semi_sync_master_avg_trx_wait_time; +extern ulong rpl_semi_sync_master_avg_net_wait_time; +extern ulonglong rpl_semi_sync_master_net_wait_num; +extern ulonglong rpl_semi_sync_master_trx_wait_num; +extern ulonglong rpl_semi_sync_master_net_wait_time; +extern ulonglong rpl_semi_sync_master_trx_wait_time; +extern unsigned long long rpl_semi_sync_master_request_ack; +extern unsigned long long rpl_semi_sync_master_get_ack; + +/* + This indicates whether we should keep waiting if no semi-sync slave + is available. + 0 : stop waiting if detected no avaialable semi-sync slave. + 1 (default) : keep waiting until timeout even no available semi-sync slave. +*/ +extern char rpl_semi_sync_master_wait_no_slave; +extern Repl_semi_sync_master repl_semisync_master; + +extern PSI_stage_info stage_waiting_for_semi_sync_ack_from_slave; +extern PSI_stage_info stage_reading_semi_sync_ack; +extern PSI_stage_info stage_waiting_for_semi_sync_slave; + +void semi_sync_master_deinit(); + +#endif /* SEMISYNC_MASTER_H */ diff --git a/sql/semisync_master_ack_receiver.cc b/sql/semisync_master_ack_receiver.cc new file mode 100644 index 00000000..559f939c --- /dev/null +++ b/sql/semisync_master_ack_receiver.cc @@ -0,0 +1,303 @@ +/* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include +#include "semisync_master.h" +#include "semisync_master_ack_receiver.h" + +#ifdef HAVE_PSI_MUTEX_INTERFACE +extern PSI_mutex_key key_LOCK_ack_receiver; +extern PSI_cond_key key_COND_ack_receiver; +#endif +#ifdef HAVE_PSI_THREAD_INTERFACE +extern PSI_thread_key key_thread_ack_receiver; +#endif +extern Repl_semi_sync_master repl_semisync; + +/* Callback function of ack receive thread */ +pthread_handler_t ack_receive_handler(void *arg) +{ + Ack_receiver *recv= reinterpret_cast(arg); + + my_thread_init(); + recv->run(); + my_thread_end(); + + return NULL; +} + +Ack_receiver::Ack_receiver() +{ + DBUG_ENTER("Ack_receiver::Ack_receiver"); + + m_status= ST_DOWN; + mysql_mutex_init(key_LOCK_ack_receiver, &m_mutex, NULL); + mysql_cond_init(key_COND_ack_receiver, &m_cond, NULL); + m_pid= 0; + + DBUG_VOID_RETURN; +} + +void Ack_receiver::cleanup() +{ + DBUG_ENTER("Ack_receiver::~Ack_receiver"); + + stop(); + mysql_mutex_destroy(&m_mutex); + mysql_cond_destroy(&m_cond); + + DBUG_VOID_RETURN; +} + +bool Ack_receiver::start() +{ + DBUG_ENTER("Ack_receiver::start"); + + mysql_mutex_lock(&m_mutex); + if(m_status == ST_DOWN) + { + pthread_attr_t attr; + + m_status= ST_UP; + + if (DBUG_IF("rpl_semisync_simulate_create_thread_failure") || + pthread_attr_init(&attr) != 0 || + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE) != 0 || +#ifndef _WIN32 + pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) != 0 || +#endif + mysql_thread_create(key_thread_ack_receiver, &m_pid, + &attr, ack_receive_handler, this)) + { + sql_print_error("Failed to start semi-sync ACK receiver thread, " + " could not create thread(errno:%d)", errno); + + m_status= ST_DOWN; + mysql_mutex_unlock(&m_mutex); + + DBUG_RETURN(true); + } + (void) pthread_attr_destroy(&attr); + } + mysql_mutex_unlock(&m_mutex); + + DBUG_RETURN(false); +} + +void Ack_receiver::stop() +{ + DBUG_ENTER("Ack_receiver::stop"); + + mysql_mutex_lock(&m_mutex); + if (m_status == ST_UP) + { + m_status= ST_STOPPING; + mysql_cond_broadcast(&m_cond); + + while (m_status == ST_STOPPING) + mysql_cond_wait(&m_cond, &m_mutex); + + DBUG_ASSERT(m_status == ST_DOWN); + + m_pid= 0; + } + mysql_mutex_unlock(&m_mutex); + + DBUG_VOID_RETURN; +} + +bool Ack_receiver::add_slave(THD *thd) +{ + Slave *slave; + DBUG_ENTER("Ack_receiver::add_slave"); + + if (!(slave= new Slave)) + DBUG_RETURN(true); + + slave->thd= thd; + slave->vio= *thd->net.vio; + slave->vio.mysql_socket.m_psi= NULL; + slave->vio.read_timeout= 1; + + mysql_mutex_lock(&m_mutex); + m_slaves.push_back(slave); + m_slaves_changed= true; + mysql_cond_broadcast(&m_cond); + mysql_mutex_unlock(&m_mutex); + + DBUG_RETURN(false); +} + +void Ack_receiver::remove_slave(THD *thd) +{ + I_List_iterator it(m_slaves); + Slave *slave; + DBUG_ENTER("Ack_receiver::remove_slave"); + + mysql_mutex_lock(&m_mutex); + + while ((slave= it++)) + { + if (slave->thd == thd) + { + delete slave; + m_slaves_changed= true; + break; + } + } + mysql_mutex_unlock(&m_mutex); + + DBUG_VOID_RETURN; +} + +inline void Ack_receiver::set_stage_info(const PSI_stage_info &stage) +{ + (void)MYSQL_SET_STAGE(stage.m_key, __FILE__, __LINE__); +} + +inline void Ack_receiver::wait_for_slave_connection() +{ + set_stage_info(stage_waiting_for_semi_sync_slave); + mysql_cond_wait(&m_cond, &m_mutex); +} + +/* Auxilary function to initialize a NET object with given net buffer. */ +static void init_net(NET *net, unsigned char *buff, unsigned int buff_len) +{ + memset(net, 0, sizeof(NET)); + net->max_packet= buff_len; + net->buff= buff; + net->buff_end= buff + buff_len; + net->read_pos= net->buff; +} + +void Ack_receiver::run() +{ + THD *thd= new THD(next_thread_id()); + NET net; + unsigned char net_buff[REPLY_MESSAGE_MAX_LENGTH]; + + my_thread_init(); + + DBUG_ENTER("Ack_receiver::run"); + +#ifdef HAVE_POLL + Poll_socket_listener listener(m_slaves); +#else + Select_socket_listener listener(m_slaves); +#endif //HAVE_POLL + + sql_print_information("Starting ack receiver thread"); + thd->system_thread= SYSTEM_THREAD_SEMISYNC_MASTER_BACKGROUND; + thd->thread_stack= (char*) &thd; + thd->store_globals(); + thd->security_ctx->skip_grants(); + thd->set_command(COM_DAEMON); + init_net(&net, net_buff, REPLY_MESSAGE_MAX_LENGTH); + + mysql_mutex_lock(&m_mutex); + m_slaves_changed= true; + mysql_mutex_unlock(&m_mutex); + + while (1) + { + int ret; + uint slave_count __attribute__((unused))= 0; + Slave *slave; + + mysql_mutex_lock(&m_mutex); + if (unlikely(m_status == ST_STOPPING)) + goto end; + + set_stage_info(stage_waiting_for_semi_sync_ack_from_slave); + if (unlikely(m_slaves_changed)) + { + if (unlikely(m_slaves.is_empty())) + { + wait_for_slave_connection(); + mysql_mutex_unlock(&m_mutex); + continue; + } + + if ((slave_count= listener.init_slave_sockets()) == 0) + goto end; + m_slaves_changed= false; +#ifdef HAVE_POLL + DBUG_PRINT("info", ("fd count %u", slave_count)); +#else + DBUG_PRINT("info", ("fd count %u, max_fd %d", slave_count, + (int) listener.get_max_fd())); +#endif + } + + ret= listener.listen_on_sockets(); + if (ret <= 0) + { + mysql_mutex_unlock(&m_mutex); + + ret= DBUG_IF("rpl_semisync_simulate_select_error") ? -1 : ret; + + if (ret == -1 && errno != EINTR) + sql_print_information("Failed to wait on semi-sync sockets, " + "error: errno=%d", socket_errno); + /* Sleep 1us, so other threads can catch the m_mutex easily. */ + my_sleep(1); + continue; + } + + set_stage_info(stage_reading_semi_sync_ack); + Slave_ilist_iterator it(m_slaves); + while ((slave= it++)) + { + if (listener.is_socket_active(slave)) + { + ulong len; + + net_clear(&net, 0); + net.vio= &slave->vio; + /* + Set compress flag. This is needed to support + Slave_compress_protocol flag enabled Slaves + */ + net.compress= slave->thd->net.compress; + + len= my_net_read(&net); + if (likely(len != packet_error)) + repl_semisync_master.report_reply_packet(slave->server_id(), + net.read_pos, len); + else + { + if (net.last_errno == ER_NET_READ_ERROR) + { + listener.clear_socket_info(slave); + } + if (net.last_errno > 0 && global_system_variables.log_warnings > 2) + sql_print_warning("Semisync ack receiver got error %d \"%s\" " + "from slave server-id %d", + net.last_errno, ER_DEFAULT(net.last_errno), + slave->server_id()); + } + } + } + mysql_mutex_unlock(&m_mutex); + } +end: + sql_print_information("Stopping ack receiver thread"); + m_status= ST_DOWN; + delete thd; + mysql_cond_broadcast(&m_cond); + mysql_mutex_unlock(&m_mutex); + DBUG_VOID_RETURN; +} diff --git a/sql/semisync_master_ack_receiver.h b/sql/semisync_master_ack_receiver.h new file mode 100644 index 00000000..d869bd2e --- /dev/null +++ b/sql/semisync_master_ack_receiver.h @@ -0,0 +1,240 @@ +/* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SEMISYNC_MASTER_ACK_RECEIVER_DEFINED +#define SEMISYNC_MASTER_ACK_RECEIVER_DEFINED + +#include "my_global.h" +#include "my_pthread.h" +#include "sql_class.h" +#include "semisync.h" +#include + +struct Slave :public ilink +{ + THD *thd; + Vio vio; +#ifdef HAVE_POLL + uint m_fds_index; +#endif + my_socket sock_fd() const { return vio.mysql_socket.fd; } + uint server_id() const { return thd->variables.server_id; } +}; + +typedef I_List Slave_ilist; +typedef I_List_iterator Slave_ilist_iterator; + +/** + Ack_receiver is responsible to control ack receive thread and maintain + slave information used by ack receive thread. + + There are mainly four operations on ack receive thread: + start: start ack receive thread + stop: stop ack receive thread + add_slave: maintain a new semisync slave's information + remove_slave: remove a semisync slave's information + */ +class Ack_receiver : public Repl_semi_sync_base +{ +public: + Ack_receiver(); + ~Ack_receiver() = default; + void cleanup(); + /** + Notify ack receiver to receive acks on the dump session. + + It adds the given dump thread into the slave list and wakes + up ack thread if it is waiting for any slave coming. + + @param[in] thd THD of a dump thread. + + @return it return false if succeeds, otherwise true is returned. + */ + bool add_slave(THD *thd); + + /** + Notify ack receiver not to receive ack on the dump session. + + it removes the given dump thread from slave list. + + @param[in] thd THD of a dump thread. + */ + void remove_slave(THD *thd); + + /** + Start ack receive thread + + @return it return false if succeeds, otherwise true is returned. + */ + bool start(); + + /** + Stop ack receive thread + */ + void stop(); + + /** + The core of ack receive thread. + + It monitors all slaves' sockets and receives acks when they come. + */ + void run(); + + void set_trace_level(unsigned long trace_level) + { + m_trace_level= trace_level; + } +private: + enum status {ST_UP, ST_DOWN, ST_STOPPING}; + uint8 m_status; + /* + Protect m_status, m_slaves_changed and m_slaves. ack thread and other + session may access the variables at the same time. + */ + mysql_mutex_t m_mutex; + mysql_cond_t m_cond; + /* If slave list is updated(add or remove). */ + bool m_slaves_changed; + + Slave_ilist m_slaves; + pthread_t m_pid; + +/* Declare them private, so no one can copy the object. */ + Ack_receiver(const Ack_receiver &ack_receiver); + Ack_receiver& operator=(const Ack_receiver &ack_receiver); + + void set_stage_info(const PSI_stage_info &stage); + void wait_for_slave_connection(); +}; + + +#ifdef HAVE_POLL +#include +#include + +class Poll_socket_listener +{ +public: + Poll_socket_listener(const Slave_ilist &slaves) + :m_slaves(slaves) + { + } + + bool listen_on_sockets() + { + return poll(m_fds.data(), m_fds.size(), 1000 /*1 Second timeout*/); + } + + bool is_socket_active(const Slave *slave) + { + return m_fds[slave->m_fds_index].revents & POLLIN; + } + + void clear_socket_info(const Slave *slave) + { + m_fds[slave->m_fds_index].fd= -1; + m_fds[slave->m_fds_index].events= 0; + } + + uint init_slave_sockets() + { + Slave_ilist_iterator it(const_cast(m_slaves)); + Slave *slave; + uint fds_index= 0; + + m_fds.clear(); + while ((slave= it++)) + { + pollfd poll_fd; + poll_fd.fd= slave->sock_fd(); + poll_fd.events= POLLIN; + m_fds.push_back(poll_fd); + slave->m_fds_index= fds_index++; + } + return fds_index; + } + +private: + const Slave_ilist &m_slaves; + std::vector m_fds; +}; + +#else //NO POLL + +class Select_socket_listener +{ +public: + Select_socket_listener(const Slave_ilist &slaves) + :m_slaves(slaves), m_max_fd(INVALID_SOCKET) + { + } + + bool listen_on_sockets() + { + /* Reinitialize the fds with active fds before calling select */ + m_fds= m_init_fds; + struct timeval tv= {1,0}; + /* select requires max fd + 1 for the first argument */ + return select((int) m_max_fd+1, &m_fds, NULL, NULL, &tv); + } + + bool is_socket_active(const Slave *slave) + { + return FD_ISSET(slave->sock_fd(), &m_fds); + } + + void clear_socket_info(const Slave *slave) + { + FD_CLR(slave->sock_fd(), &m_init_fds); + } + + uint init_slave_sockets() + { + Slave_ilist_iterator it(const_cast(m_slaves)); + Slave *slave; + uint fds_index= 0; + + FD_ZERO(&m_init_fds); + while ((slave= it++)) + { + my_socket socket_id= slave->sock_fd(); + m_max_fd= (socket_id > m_max_fd ? socket_id : m_max_fd); +#ifndef _WIN32 + if (socket_id > FD_SETSIZE) + { + sql_print_error("Semisync slave socket fd is %u. " + "select() cannot handle if the socket fd is " + "greater than %u (FD_SETSIZE).", socket_id, FD_SETSIZE); + return 0; + } +#endif //_WIN32 + FD_SET(socket_id, &m_init_fds); + fds_index++; + } + return fds_index; + } + my_socket get_max_fd() { return m_max_fd; } + +private: + const Slave_ilist &m_slaves; + my_socket m_max_fd; + fd_set m_init_fds; + fd_set m_fds; +}; + +#endif //HAVE_POLL + +extern Ack_receiver ack_receiver; +#endif diff --git a/sql/semisync_slave.cc b/sql/semisync_slave.cc new file mode 100644 index 00000000..788aab78 --- /dev/null +++ b/sql/semisync_slave.cc @@ -0,0 +1,275 @@ +/* Copyright (c) 2008 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include +#include "semisync_slave.h" + +Repl_semi_sync_slave repl_semisync_slave; + +my_bool rpl_semi_sync_slave_enabled= 0; + +char rpl_semi_sync_slave_delay_master; +my_bool rpl_semi_sync_slave_status= 0; +ulong rpl_semi_sync_slave_trace_level; + +/* + indicate whether or not the slave should send a reply to the master. + + This is set to true in repl_semi_slave_read_event if the current + event read is the last event of a transaction. And the value is + checked in repl_semi_slave_queue_event. +*/ +bool semi_sync_need_reply= false; +unsigned int rpl_semi_sync_slave_kill_conn_timeout; +unsigned long long rpl_semi_sync_slave_send_ack = 0; + +int Repl_semi_sync_slave::init_object() +{ + int result= 0; + + m_init_done = true; + + /* References to the parameter works after set_options(). */ + set_slave_enabled(rpl_semi_sync_slave_enabled); + set_trace_level(rpl_semi_sync_slave_trace_level); + set_delay_master(rpl_semi_sync_slave_delay_master); + set_kill_conn_timeout(rpl_semi_sync_slave_kill_conn_timeout); + + return result; +} + +int Repl_semi_sync_slave::slave_read_sync_header(const uchar *header, + unsigned long total_len, + int *semi_flags, + const uchar **payload, + unsigned long *payload_len) +{ + int read_res = 0; + DBUG_ENTER("Repl_semi_sync_slave::slave_read_sync_header"); + + if (rpl_semi_sync_slave_status) + { + if (!DBUG_IF("semislave_corrupt_log") + && header[0] == k_packet_magic_num) + { + semi_sync_need_reply = (header[1] & k_packet_flag_sync); + *payload_len = total_len - 2; + *payload = header + 2; + + DBUG_PRINT("semisync", ("%s: reply - %d", + "Repl_semi_sync_slave::slave_read_sync_header", + semi_sync_need_reply)); + + if (semi_sync_need_reply) + *semi_flags |= SEMI_SYNC_NEED_ACK; + if (is_delay_master()) + *semi_flags |= SEMI_SYNC_SLAVE_DELAY_SYNC; + } + else + { + sql_print_error("Missing magic number for semi-sync packet, packet " + "len: %lu", total_len); + read_res = -1; + } + } else { + *payload= header; + *payload_len= total_len; + } + + DBUG_RETURN(read_res); +} + +int Repl_semi_sync_slave::slave_start(Master_info *mi) +{ + bool semi_sync= get_slave_enabled(); + + sql_print_information("Slave I/O thread: Start %s replication to\ + master '%s@%s:%d' in log '%s' at position %lu", + semi_sync ? "semi-sync" : "asynchronous", + const_cast(mi->user), mi->host, mi->port, + const_cast(mi->master_log_name), + (unsigned long)(mi->master_log_pos)); + + if (semi_sync && !rpl_semi_sync_slave_status) + rpl_semi_sync_slave_status= 1; + + /*clear the counter*/ + rpl_semi_sync_slave_send_ack= 0; + return 0; +} + +int Repl_semi_sync_slave::slave_stop(Master_info *mi) +{ + if (get_slave_enabled()) + kill_connection(mi->mysql); + + if (rpl_semi_sync_slave_status) + rpl_semi_sync_slave_status= 0; + + return 0; +} + +int Repl_semi_sync_slave::reset_slave(Master_info *mi) +{ + return 0; +} + +void Repl_semi_sync_slave::kill_connection(MYSQL *mysql) +{ + if (!mysql) + return; + + char kill_buffer[30]; + MYSQL *kill_mysql = NULL; + size_t kill_buffer_length; + + kill_mysql = mysql_init(kill_mysql); + mysql_options(kill_mysql, MYSQL_OPT_CONNECT_TIMEOUT, &m_kill_conn_timeout); + mysql_options(kill_mysql, MYSQL_OPT_READ_TIMEOUT, &m_kill_conn_timeout); + mysql_options(kill_mysql, MYSQL_OPT_WRITE_TIMEOUT, &m_kill_conn_timeout); + + bool ret= (!mysql_real_connect(kill_mysql, mysql->host, + mysql->user, mysql->passwd,0, mysql->port, mysql->unix_socket, 0)); + if (DBUG_IF("semisync_slave_failed_kill") || ret) + { + sql_print_information("cannot connect to master to kill slave io_thread's " + "connection"); + goto failed_graceful_kill; + } + + DBUG_EXECUTE_IF("slave_delay_killing_semisync_connection", my_sleep(400000);); + + kill_buffer_length= my_snprintf(kill_buffer, 30, "KILL %lu", + mysql->thread_id); + if (mysql_real_query(kill_mysql, kill_buffer, (ulong)kill_buffer_length)) + { + sql_print_information( + "Failed to gracefully kill our active semi-sync connection with " + "primary. Silently closing the connection."); + goto failed_graceful_kill; + } + +end: + mysql_close(kill_mysql); + return; + +failed_graceful_kill: + /* + If we fail to issue `KILL` on the primary to kill the active semi-sync + connection; we need to locally clean up our side of the connection. This + is because mysql_close will send COM_QUIT on the active semi-sync + connection, causing the primary to error. + */ + net_clear(&(mysql->net), 0); + end_server(mysql); + goto end; +} + +int Repl_semi_sync_slave::request_transmit(Master_info *mi) +{ + MYSQL *mysql= mi->mysql; + MYSQL_RES *res= 0; + MYSQL_ROW row; + const char *query; + + if (!get_slave_enabled()) + return 0; + + query= "SHOW VARIABLES LIKE 'rpl_semi_sync_master_enabled'"; + if (mysql_real_query(mysql, query, (ulong)strlen(query)) || + !(res= mysql_store_result(mysql))) + { + sql_print_error("Execution failed on master: %s, error :%s", query, mysql_error(mysql)); + return 1; + } + + row= mysql_fetch_row(res); + if (DBUG_IF("master_not_support_semisync") || !row) + { + /* Master does not support semi-sync */ + sql_print_warning("Master server does not support semi-sync, " + "fallback to asynchronous replication"); + rpl_semi_sync_slave_status= 0; + mysql_free_result(res); + return 0; + } + mysql_free_result(res); + + /* + Tell master dump thread that we want to do semi-sync + replication + */ + query= "SET @rpl_semi_sync_slave= 1"; + if (mysql_real_query(mysql, query, (ulong)strlen(query))) + { + sql_print_error("Set 'rpl_semi_sync_slave=1' on master failed"); + return 1; + } + mysql_free_result(mysql_store_result(mysql)); + rpl_semi_sync_slave_status= 1; + + return 0; +} + +int Repl_semi_sync_slave::slave_reply(Master_info *mi) +{ + MYSQL* mysql= mi->mysql; + const char *binlog_filename= const_cast(mi->master_log_name); + my_off_t binlog_filepos= mi->master_log_pos; + + NET *net= &mysql->net; + uchar reply_buffer[REPLY_MAGIC_NUM_LEN + + REPLY_BINLOG_POS_LEN + + REPLY_BINLOG_NAME_LEN]; + int reply_res = 0; + size_t name_len = strlen(binlog_filename); + + DBUG_ENTER("Repl_semi_sync_slave::slave_reply"); + + if (rpl_semi_sync_slave_status && semi_sync_need_reply) + { + /* Prepare the buffer of the reply. */ + reply_buffer[REPLY_MAGIC_NUM_OFFSET] = k_packet_magic_num; + int8store(reply_buffer + REPLY_BINLOG_POS_OFFSET, binlog_filepos); + memcpy(reply_buffer + REPLY_BINLOG_NAME_OFFSET, + binlog_filename, + name_len + 1 /* including trailing '\0' */); + + DBUG_PRINT("semisync", ("%s: reply (%s, %lu)", + "Repl_semi_sync_slave::slave_reply", + binlog_filename, (ulong)binlog_filepos)); + + net_clear(net, 0); + /* Send the reply. */ + reply_res = my_net_write(net, reply_buffer, + name_len + REPLY_BINLOG_NAME_OFFSET); + if (!reply_res) + { + reply_res = (DBUG_IF("semislave_failed_net_flush") || net_flush(net)); + if (reply_res) + sql_print_error("Semi-sync slave net_flush() reply failed"); + rpl_semi_sync_slave_send_ack++; + } + else + { + sql_print_error("Semi-sync slave send reply failed: %s (%d)", + net->last_error, net->last_errno); + } + } + + DBUG_RETURN(reply_res); +} diff --git a/sql/semisync_slave.h b/sql/semisync_slave.h new file mode 100644 index 00000000..a8229245 --- /dev/null +++ b/sql/semisync_slave.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2006 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#ifndef SEMISYNC_SLAVE_H +#define SEMISYNC_SLAVE_H + +#include "semisync.h" +#include "my_global.h" +#include "sql_priv.h" +#include "rpl_mi.h" +#include "mysql.h" +#include + +class Master_info; + +/** + The extension class for the slave of semi-synchronous replication +*/ +class Repl_semi_sync_slave + :public Repl_semi_sync_base { +public: + Repl_semi_sync_slave() :m_slave_enabled(false) {} + ~Repl_semi_sync_slave() = default; + + void set_trace_level(unsigned long trace_level) { + m_trace_level = trace_level; + } + + /* Initialize this class after MySQL parameters are initialized. this + * function should be called once at bootstrap time. + */ + int init_object(); + + bool get_slave_enabled() { + return m_slave_enabled; + } + + void set_slave_enabled(bool enabled) { + m_slave_enabled = enabled; + } + + bool is_delay_master(){ + return m_delay_master; + } + + void set_delay_master(bool enabled) { + m_delay_master = enabled; + } + + void set_kill_conn_timeout(unsigned int timeout) { + m_kill_conn_timeout = timeout; + } + + /* A slave reads the semi-sync packet header and separate the metadata + * from the payload data. + * + * Input: + * header - (IN) packet header pointer + * total_len - (IN) total packet length: metadata + payload + * semi_flags - (IN) store flags: SEMI_SYNC_SLAVE_DELAY_SYNC and + SEMI_SYNC_NEED_ACK + * payload - (IN) payload: the replication event + * payload_len - (IN) payload length + * + * Return: + * 0: success; non-zero: error + */ + int slave_read_sync_header(const uchar *header, unsigned long total_len, + int *semi_flags, + const uchar **payload, unsigned long *payload_len); + + /* A slave replies to the master indicating its replication process. It + * indicates that the slave has received all events before the specified + * binlog position. + */ + int slave_reply(Master_info* mi); + int slave_start(Master_info *mi); + int slave_stop(Master_info *mi); + int request_transmit(Master_info*); + void kill_connection(MYSQL *mysql); + int reset_slave(Master_info *mi); + +private: + /* True when init_object has been called */ + bool m_init_done; + bool m_slave_enabled; /* semi-sycn is enabled on the slave */ + bool m_delay_master; + unsigned int m_kill_conn_timeout; +}; + + +/* System and status variables for the slave component */ +extern my_bool rpl_semi_sync_slave_enabled; +extern my_bool rpl_semi_sync_slave_status; +extern ulong rpl_semi_sync_slave_trace_level; +extern Repl_semi_sync_slave repl_semisync_slave; + +extern char rpl_semi_sync_slave_delay_master; +extern unsigned int rpl_semi_sync_slave_kill_conn_timeout; +extern unsigned long long rpl_semi_sync_slave_send_ack; + +#endif /* SEMISYNC_SLAVE_H */ diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc new file mode 100644 index 00000000..e1a4a25b --- /dev/null +++ b/sql/service_wsrep.cc @@ -0,0 +1,416 @@ +/* Copyright 2018-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "mariadb.h" + +#include "mysql/service_wsrep.h" +#include "wsrep/key.hpp" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#include "sql_class.h" +#include "debug_sync.h" +#include "log.h" + +extern "C" my_bool wsrep_on(const THD *thd) +{ + return my_bool(WSREP(thd)); +} + +extern "C" void wsrep_thd_LOCK(const THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_thd_data); +} + +extern "C" int wsrep_thd_TRYLOCK(const THD *thd) +{ + return mysql_mutex_trylock(&thd->LOCK_thd_data); +} + +extern "C" void wsrep_thd_UNLOCK(const THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_thd_data); +} + +extern "C" void wsrep_thd_kill_LOCK(const THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_thd_kill); +} + +extern "C" void wsrep_thd_kill_UNLOCK(const THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_thd_kill); +} + +extern "C" const char* wsrep_thd_client_state_str(const THD *thd) +{ + return wsrep::to_c_string(thd->wsrep_cs().state()); +} + +extern "C" const char* wsrep_thd_client_mode_str(const THD *thd) +{ + return wsrep::to_c_string(thd->wsrep_cs().mode()); +} + +extern "C" const char* wsrep_thd_transaction_state_str(const THD *thd) +{ + return wsrep::to_c_string(thd->wsrep_cs().transaction().state()); +} + +extern "C" const char *wsrep_thd_query(const THD *thd) +{ + if (!thd) + return "NULL"; + + switch(thd->lex->sql_command) + { + // Mask away some security related details from error log + case SQLCOM_CREATE_USER: + return "CREATE USER"; + case SQLCOM_GRANT: + return "GRANT"; + case SQLCOM_REVOKE: + return "REVOKE"; + case SQLCOM_SET_OPTION: + if (thd->lex->definer) + return "SET PASSWORD"; + /* fallthrough */ + default: + return (thd->query() ? thd->query() : "NULL"); + } + return "NULL"; +} + +extern "C" query_id_t wsrep_thd_transaction_id(const THD *thd) +{ + return thd->wsrep_cs().transaction().id().get(); +} + +extern "C" long long wsrep_thd_trx_seqno(const THD *thd) +{ + const wsrep::client_state& cs= thd->wsrep_cs(); + if (cs.mode() == wsrep::client_state::m_toi) + { + return cs.toi_meta().seqno().get(); + } + else + { + return cs.transaction().ws_meta().seqno().get(); + } +} + +extern "C" void wsrep_thd_self_abort(THD *thd) +{ + thd->wsrep_cs().bf_abort(wsrep::seqno(0)); +} + +extern "C" const char* wsrep_get_sr_table_name() +{ + return wsrep_sr_table_name_full; +} + +extern "C" my_bool wsrep_get_debug() +{ + return wsrep_debug; +} + +/* + Test if this connection is a true local (user) connection and not + a replication or wsrep applier thread. + + Note that this is only usable for galera (as there are other kinds + of system threads, and only if WSREP_NNULL() is tested by the caller. + */ +extern "C" my_bool wsrep_thd_is_local(const THD *thd) +{ + /* + async replication IO and background threads have nothing to + replicate in the cluster, marking them as non-local here to + prevent write set population and replication + + async replication SQL thread, applies client transactions from + mariadb master and will be replicated into cluster + */ + return ( + thd->system_thread != SYSTEM_THREAD_SLAVE_BACKGROUND && + thd->system_thread != SYSTEM_THREAD_SLAVE_IO && + thd->wsrep_cs().mode() == wsrep::client_state::m_local); +} + +extern "C" my_bool wsrep_thd_is_applying(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_high_priority; +} + +extern "C" my_bool wsrep_thd_is_toi(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_toi; +} + +extern "C" my_bool wsrep_thd_is_local_toi(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_toi && + thd->wsrep_cs().toi_mode() == wsrep::client_state::m_local; + +} + +extern "C" my_bool wsrep_thd_is_in_rsu(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_rsu; +} + +extern "C" my_bool wsrep_thd_is_BF(const THD *thd, my_bool sync) +{ + my_bool status = FALSE; + if (thd && WSREP(thd)) + { + if (sync) mysql_mutex_lock(&thd->LOCK_thd_data); + status = (wsrep_thd_is_applying(thd) || wsrep_thd_is_toi(thd)); + if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data); + } + return status; +} + +extern "C" my_bool wsrep_thd_is_SR(const THD *thd) +{ + return thd && thd->wsrep_cs().transaction().is_streaming(); +} + +extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, + THD *victim_thd) +{ + DBUG_ASSERT(victim_thd); + DBUG_ASSERT(wsrep_thd_is_SR(victim_thd)); + if (!victim_thd || !wsrep_on(bf_thd)) return; + + WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s", + victim_thd->thread_id, + victim_thd->wsrep_trx_id(), + victim_thd->wsrep_sr().fragments_certified(), + wsrep_thd_transaction_state_str(victim_thd)); + + /* Note: do not store/reset globals before wsrep_bf_abort() call + to avoid losing BF thd context. */ + mysql_mutex_lock(&victim_thd->LOCK_thd_data); + if (!(bf_thd && bf_thd != victim_thd)) + { + DEBUG_SYNC(victim_thd, "wsrep_before_SR_rollback"); + } + if (bf_thd) + { + wsrep_bf_abort(bf_thd, victim_thd); + } + else + { + wsrep_thd_self_abort(victim_thd); + } + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + if (bf_thd) + { + wsrep_store_threadvars(bf_thd); + } +} + +extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, + my_bool signal) +{ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + my_bool ret= wsrep_bf_abort(bf_thd, victim_thd); + /* + Send awake signal if victim was BF aborted or does not + have wsrep on. Note that this should never interrupt RSU + as RSU has paused the provider. + */ + if ((ret || !wsrep_on(victim_thd)) && signal) + { + victim_thd->wsrep_aborter= bf_thd->thread_id; + victim_thd->awake_no_mutex(KILL_QUERY_HARD); + } else { + WSREP_DEBUG("wsrep_thd_bf_abort skipped awake, signal %d", signal); + } + return ret; +} + +extern "C" my_bool wsrep_thd_skip_locking(const THD *thd) +{ + return thd && thd->wsrep_skip_locking; +} + +extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right) +{ + if (wsrep_thd_is_BF(left, false) && + wsrep_thd_is_BF(right, false) && + wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) { + WSREP_DEBUG("BF conflict, order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno(left), + (long long)wsrep_thd_trx_seqno(right)); + return TRUE; + } + WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno(left), + (long long)wsrep_thd_trx_seqno(right)); + return FALSE; +} + +extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) +{ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + + const wsrep::client_state& cs(thd->wsrep_cs()); + const enum wsrep::transaction::state tx_state(cs.transaction().state()); + switch (tx_state) + { + case wsrep::transaction::s_must_abort: + return (cs.state() == wsrep::client_state::s_exec || + cs.state() == wsrep::client_state::s_result); + case wsrep::transaction::s_aborting: + return true; + default: + return false; + } + + return false; +} + +static inline enum wsrep::key::type +map_key_type(enum Wsrep_service_key_type type) +{ + switch (type) + { + case WSREP_SERVICE_KEY_SHARED: return wsrep::key::shared; + case WSREP_SERVICE_KEY_REFERENCE: return wsrep::key::reference; + case WSREP_SERVICE_KEY_UPDATE: return wsrep::key::update; + case WSREP_SERVICE_KEY_EXCLUSIVE: return wsrep::key::exclusive; + } + return wsrep::key::exclusive; +} + +extern "C" int wsrep_thd_append_key(THD *thd, + const struct wsrep_key* key, + int n_keys, + enum Wsrep_service_key_type key_type) +{ + Wsrep_client_state& client_state(thd->wsrep_cs()); + DBUG_ASSERT(client_state.transaction().active()); + int ret= 0; + for (int i= 0; i < n_keys && ret == 0; ++i) + { + wsrep::key wsrep_key(map_key_type(key_type)); + for (size_t kp= 0; kp < key[i].key_parts_num; ++kp) + { + wsrep_key.append_key_part(key[i].key_parts[kp].ptr, key[i].key_parts[kp].len); + } + ret= client_state.append_key(wsrep_key); + } + /* + In case of `wsrep_gtid_mode` when WS will be replicated, we need to set + `server_id` for events that are going to be written in IO, and in case of + manual SET gtid_seq_no=X we are ignoring value. + */ + if (!ret && wsrep_gtid_mode && !thd->slave_thread && !wsrep_thd_is_applying(thd)) + { + thd->variables.server_id= wsrep_gtid_server.server_id; + thd->variables.gtid_seq_no= 0; + } + return ret; +} + +extern "C" void wsrep_commit_ordered(THD *thd) +{ + if (wsrep_is_active(thd) && + (thd->wsrep_trx().state() == wsrep::transaction::s_committing || + thd->wsrep_trx().state() == wsrep::transaction::s_ordered_commit)) + { + wsrep_gtid_server.signal_waiters(thd->wsrep_current_gtid_seqno, false); + if (wsrep_thd_is_local(thd)) + { + thd->wsrep_last_written_gtid_seqno= thd->wsrep_current_gtid_seqno; + } + if (thd->wsrep_trx().state() != wsrep::transaction::s_ordered_commit && + !wsrep_commit_will_write_binlog(thd)) + { + DEBUG_SYNC(thd, "before_wsrep_ordered_commit"); + thd->wsrep_cs().ordered_commit(); + } + } +} + +extern "C" my_bool wsrep_thd_has_ignored_error(const THD *thd) +{ + return thd->wsrep_has_ignored_error; +} + +extern "C" void wsrep_thd_set_ignored_error(THD *thd, my_bool val) +{ + thd->wsrep_has_ignored_error= val; +} + +extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd) +{ + if (thd) + return(thd->variables.wsrep_OSU_method); + else + return(global_system_variables.wsrep_OSU_method); +} + +extern "C" void wsrep_report_bf_lock_wait(const THD *thd, + unsigned long long trx_id) +{ + if (thd) + { + WSREP_ERROR("Thread %s trx_id: %llu thread: %ld " + "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " + "applier: %d toi: %d local: %d " + "query: %s", + wsrep_thd_is_BF(thd, false) ? "BF" : "normal", + trx_id, + thd_get_thread_id(thd), + wsrep_thd_trx_seqno(thd), + wsrep_thd_client_state_str(thd), + wsrep_thd_client_mode_str(thd), + wsrep_thd_transaction_state_str(thd), + wsrep_thd_is_applying(thd), + wsrep_thd_is_toi(thd), + wsrep_thd_is_local(thd), + wsrep_thd_query(thd)); + } +} + +extern "C" void wsrep_thd_set_PA_unsafe(THD *thd) +{ + if (thd && thd->wsrep_cs().mark_transaction_pa_unsafe()) + { + WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe"); + } +} + +extern "C" int wsrep_thd_append_table_key(MYSQL_THD thd, + const char* db, + const char* table, + enum Wsrep_service_key_type key_type) +{ + wsrep_key_arr_t key_arr = {0, 0}; + int ret = wsrep_prepare_keys_for_isolation(thd, db, table, NULL, &key_arr); + ret = ret || wsrep_thd_append_key(thd, key_arr.keys, + (int)key_arr.keys_len, key_type); + wsrep_keys_free(&key_arr); + return ret; +} + +extern "C" my_bool wsrep_thd_is_local_transaction(const THD *thd) +{ + return (wsrep_thd_is_local(thd) && + thd->wsrep_cs().transaction().active()); +} + diff --git a/sql/session_tracker.cc b/sql/session_tracker.cc new file mode 100644 index 00000000..14cb78e8 --- /dev/null +++ b/sql/session_tracker.cc @@ -0,0 +1,1312 @@ +/* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2016, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include "sql_plugin.h" +#include "table.h" +#include "rpl_gtid.h" +#include "sql_class.h" +#include "sql_show.h" +#include "sql_plugin.h" +#include "set_var.h" + +void State_tracker::set_changed(THD *thd) +{ + m_changed= true; + thd->lex->safe_to_cache_query= 0; + thd->server_status|= SERVER_SESSION_STATE_CHANGED; +} + + +/* To be used in expanding the buffer. */ +static const unsigned int EXTRA_ALLOC= 1024; + + +void Session_sysvars_tracker::vars_list::reinit() +{ + track_all= 0; + if (m_registered_sysvars.records) + my_hash_reset(&m_registered_sysvars); +} + +/** + Copy the given list. + + @param from Source vars_list object. + @param thd THD handle to retrive the charset in use. + + @retval true there is something to track + @retval false nothing to track +*/ + +void Session_sysvars_tracker::vars_list::copy(vars_list* from, THD *thd) +{ + track_all= from->track_all; + free_hash(); + m_registered_sysvars= from->m_registered_sysvars; + from->init(); +} + +/** + Inserts the variable to be tracked into m_registered_sysvars hash. + + @param svar address of the system variable + + @retval false success + @retval true error +*/ + +bool Session_sysvars_tracker::vars_list::insert(const sys_var *svar) +{ + sysvar_node_st *node; + if (!(node= (sysvar_node_st *) my_malloc(PSI_INSTRUMENT_ME, + sizeof(sysvar_node_st), + MYF(MY_WME | + (mysqld_server_initialized ? + MY_THREAD_SPECIFIC : 0))))) + return true; + + node->m_svar= (sys_var *)svar; + node->test_load= node->m_svar->test_load; + node->m_changed= false; + if (my_hash_insert(&m_registered_sysvars, (uchar *) node)) + { + my_free(node); + if (!search((sys_var *)svar)) + { + //EOF (error is already reported) + return true; + } + } + return false; +} + +/** + Parse the specified system variables list. + + @Note In case of invalid entry a warning is raised per invalid entry. + This is done in order to handle 'potentially' valid system + variables from uninstalled plugins which might get installed in + future. + + + @param thd [IN] The thd handle. + @param var_list [IN] System variable list. + @param throw_error [IN] bool when set to true, returns an error + in case of invalid/duplicate values. + @param char_set [IN] charecter set information used for string + manipulations. + + @return + true Error + false Success +*/ +bool Session_sysvars_tracker::vars_list::parse_var_list(THD *thd, + LEX_STRING var_list, + bool throw_error, + CHARSET_INFO *char_set) +{ + const char separator= ','; + char *token, *lasts= NULL; + size_t rest= var_list.length; + + if (!var_list.str || var_list.length == 0) + return false; + + if(!strcmp(var_list.str, "*")) + { + track_all= true; + return false; + } + + token= var_list.str; + + track_all= false; + for (;;) + { + sys_var *svar; + LEX_CSTRING var; + + lasts= (char *) memchr(token, separator, rest); + + var.str= token; + if (lasts) + { + var.length= (lasts - token); + rest-= var.length + 1; + } + else + var.length= rest; + + /* Remove leading/trailing whitespace. */ + trim_whitespace(char_set, &var); + + if(!strcmp(var.str, "*")) + { + track_all= true; + } + else if ((svar= find_sys_var(thd, var.str, var.length, throw_error))) + { + if (insert(svar) == TRUE) + return true; + } + else if (throw_error && thd) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "%.*s is not a valid system variable and will " + "be ignored.", (int)var.length, token); + } + else + return true; + + if (lasts) + token= lasts + 1; + else + break; + } + return false; +} + + +bool sysvartrack_validate_value(THD *thd, const char *str, size_t len) +{ + LEX_STRING var_list= { (char *) str, len }; + const char separator= ','; + char *token, *lasts= NULL; + size_t rest= var_list.length; + + if (!var_list.str) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), + "session_track_system_variables", "NULL"); + return false; + } + if (var_list.length == 0 || + !strcmp(var_list.str, "*")) + { + return false; + } + + token= var_list.str; + + for (;;) + { + LEX_CSTRING var; + + lasts= (char *) memchr(token, separator, rest); + + var.str= token; + if (lasts) + { + var.length= (lasts - token); + rest-= var.length + 1; + } + else + var.length= rest; + + /* Remove leading/trailing whitespace. */ + trim_whitespace(system_charset_info, &var); + + if (strcmp(var.str, "*") && !find_sys_var(thd, var.str, var.length)) + return true; + + if (lasts) + token= lasts + 1; + else + break; + } + return false; +} + + +/* Sorts variable references array */ +static int name_array_sorter(const void *a, const void *b) +{ + LEX_CSTRING **an= (LEX_CSTRING **)a, **bn=(LEX_CSTRING **)b; + size_t min= MY_MIN((*an)->length, (*bn)->length); + int res= strncmp((*an)->str, (*bn)->str, min); + if (res == 0) + res= ((int)(*bn)->length)- ((int)(*an)->length); + return res; +} + +/** + Construct variable list by internal hash with references +*/ + +bool Session_sysvars_tracker::vars_list::construct_var_list(char *buf, + size_t buf_len) +{ + LEX_CSTRING **names; + uint idx; + size_t left= buf_len; + size_t names_size= m_registered_sysvars.records * sizeof(LEX_CSTRING *); + const char separator= ','; + + if (unlikely(buf_len < 1)) + return true; + + if (unlikely(track_all)) + { + if (buf_len < 2) + return true; + buf[0]= '*'; + buf[1]= '\0'; + return false; + } + + if (m_registered_sysvars.records == 0) + { + buf[0]= '\0'; + return false; + } + + if (unlikely(!(names= (LEX_CSTRING**) my_safe_alloca(names_size)))) + return true; + + idx= 0; + + mysql_mutex_lock(&LOCK_plugin); + for (ulong i= 0; i < m_registered_sysvars.records; i++) + { + sysvar_node_st *node= at(i); + if (*node->test_load) + names[idx++]= &node->m_svar->name; + } + DBUG_ASSERT(idx <= m_registered_sysvars.records); + + /* + We check number of records again here because number of variables + could be reduced in case of plugin unload. + */ + if (m_registered_sysvars.records == 0) + { + mysql_mutex_unlock(&LOCK_plugin); + buf[0]= '\0'; + return false; + } + + my_qsort(names, idx, sizeof(LEX_CSTRING*), &name_array_sorter); + + for(uint i= 0; i < idx; i++) + { + LEX_CSTRING *nm= names[i]; + size_t ln= nm->length + 1; + if (ln > left) + { + mysql_mutex_unlock(&LOCK_plugin); + my_safe_afree(names, names_size); + return true; + } + memcpy(buf, nm->str, nm->length); + buf[nm->length]= separator; + buf+= ln; + left-= ln; + } + mysql_mutex_unlock(&LOCK_plugin); + + buf--; buf[0]= '\0'; + my_safe_afree(names, names_size); + + return false; +} + + +void Session_sysvars_tracker::init(THD *thd) +{ + mysql_mutex_assert_owner(&LOCK_global_system_variables); + DBUG_ASSERT(thd->variables.session_track_system_variables == + global_system_variables.session_track_system_variables); + thd->variables.session_track_system_variables= + my_strdup(PSI_INSTRUMENT_ME, + safe_str(global_system_variables.session_track_system_variables), + MYF(MY_WME | MY_THREAD_SPECIFIC)); +} + + +void Session_sysvars_tracker::deinit(THD *thd) +{ + my_free(thd->variables.session_track_system_variables); + thd->variables.session_track_system_variables= 0; +} + + +/** + Enable session tracker by parsing global value of tracked variables. + + @param thd [IN] The thd handle. + + @retval true Error + @retval false Success +*/ + +bool Session_sysvars_tracker::enable(THD *thd) +{ + orig_list.reinit(); + m_parsed= false; + m_enabled= thd->variables.session_track_system_variables && + *thd->variables.session_track_system_variables; + reset_changed(); + return false; +} + + +/** + Once the value of the @@session_track_system_variables has been + successfully updated, this function calls + Session_sysvars_tracker::vars_list::copy updating the hash in orig_list + which represents the system variables to be tracked. + + We are doing via tool list because there possible errors with memory + in this case value will be unchanged. + + @note This function is called from the ON_UPDATE() function of the + session_track_system_variables' sys_var class. + + @param thd [IN] The thd handle. + + @retval true Error + @retval false Success +*/ + +bool Session_sysvars_tracker::update(THD *thd, set_var *var) +{ + vars_list tool_list; + size_t length= 1; + + void *copy= var->save_result.string_value.str ? + my_memdup(PSI_INSTRUMENT_ME, var->save_result.string_value.str, + (length= var->save_result.string_value.length + 1), + MYF(MY_WME | MY_THREAD_SPECIFIC)) : + my_strdup(PSI_INSTRUMENT_ME, "", + MYF(MY_WME | MY_THREAD_SPECIFIC)); + if (!copy) + return true; + + if (tool_list.parse_var_list(thd, var->save_result.string_value, true, + thd->charset())) + { + my_free(copy); + return true; + } + + my_free(thd->variables.session_track_system_variables); + thd->variables.session_track_system_variables= static_cast(copy); + + m_parsed= true; + orig_list.copy(&tool_list, thd); + orig_list.construct_var_list(thd->variables.session_track_system_variables, + length); + return false; +} + + +bool Session_sysvars_tracker::vars_list::store(THD *thd, String *buf) +{ + for (ulong i= 0; i < m_registered_sysvars.records; i++) + { + sysvar_node_st *node= at(i); + + if (!node->m_changed) + continue; + + char val_buf[SHOW_VAR_FUNC_BUFF_SIZE]; + SHOW_VAR show; + CHARSET_INFO *charset; + size_t val_length, length; + mysql_mutex_lock(&LOCK_plugin); + if (!*node->test_load) + { + mysql_mutex_unlock(&LOCK_plugin); + continue; + } + sys_var *svar= node->m_svar; + bool is_plugin= svar->cast_pluginvar(); + if (!is_plugin) + mysql_mutex_unlock(&LOCK_plugin); + + /* As its always system variable. */ + show.type= SHOW_SYS; + show.name= svar->name.str; + show.value= (char *) svar; + + mysql_mutex_lock(&LOCK_global_system_variables); + const char *value= get_one_variable(thd, &show, OPT_SESSION, SHOW_SYS, NULL, + &charset, val_buf, &val_length); + mysql_mutex_unlock(&LOCK_global_system_variables); + + if (is_plugin) + mysql_mutex_unlock(&LOCK_plugin); + + length= net_length_size(svar->name.length) + + svar->name.length + + net_length_size(val_length) + + val_length; + + compile_time_assert(SESSION_TRACK_SYSTEM_VARIABLES < 251); + if (unlikely((1 + net_length_size(length) + length + buf->length() >= + MAX_PACKET_LENGTH) || + buf->reserve(1 + net_length_size(length) + length, + EXTRA_ALLOC))) + return true; + + + /* Session state type (SESSION_TRACK_SYSTEM_VARIABLES) */ + buf->q_append((char)SESSION_TRACK_SYSTEM_VARIABLES); + + /* Length of the overall entity. */ + buf->q_net_store_length((ulonglong)length); + + /* System variable's name (length-encoded string). */ + buf->q_net_store_data((const uchar*)svar->name.str, svar->name.length); + + /* System variable's value (length-encoded string). */ + buf->q_net_store_data((const uchar*)value, val_length); + } + return false; +} + + +/** + Store the data for changed system variables in the specified buffer. + Once the data is stored, we reset the flags related to state-change + (see reset()). + + @param thd [IN] The thd handle. + @paran buf [INOUT] Buffer to store the information to. + + @retval true Error + @retval false Success +*/ + +bool Session_sysvars_tracker::store(THD *thd, String *buf) +{ + if (!orig_list.is_enabled()) + return false; + + if (orig_list.store(thd, buf)) + return true; + + orig_list.reset(); + + return false; +} + + +/** + Mark the system variable as changed. + + @param [IN] pointer on a variable +*/ + +void Session_sysvars_tracker::mark_as_changed(THD *thd, const sys_var *var) +{ + sysvar_node_st *node; + + if (!is_enabled()) + return; + + if (!m_parsed) + { + DBUG_ASSERT(thd->variables.session_track_system_variables); + LEX_STRING tmp= { thd->variables.session_track_system_variables, + strlen(thd->variables.session_track_system_variables) }; + if (orig_list.parse_var_list(thd, tmp, true, thd->charset())) + { + orig_list.reinit(); + return; + } + m_parsed= true; + } + + /* + Check if the specified system variable is being tracked, if so + mark it as changed and also set the class's m_changed flag. + */ + if (orig_list.is_enabled() && (node= orig_list.insert_or_search(var))) + { + node->m_changed= true; + set_changed(thd); + } +} + + +/** + Supply key to a hash. + + @param entry [IN] A single entry. + @param length [OUT] Length of the key. + @param not_used Unused. + + @return Pointer to the key buffer. +*/ + +uchar *Session_sysvars_tracker::sysvars_get_key(const char *entry, + size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= sizeof(sys_var *); + return (uchar *) &(((sysvar_node_st *) entry)->m_svar); +} + + +void Session_sysvars_tracker::vars_list::reset() +{ + for (ulong i= 0; i < m_registered_sysvars.records; i++) + at(i)->m_changed= false; +} + + +bool sysvartrack_global_update(THD *thd, char *str, size_t len) +{ + LEX_STRING tmp= { str, len }; + Session_sysvars_tracker::vars_list dummy; + DBUG_EXECUTE_IF("dbug_session_tracker_parse_error", + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); + return true; + }); + + if (!dummy.parse_var_list(thd, tmp, false, system_charset_info)) + { + dummy.construct_var_list(str, len + 1); + return false; + } + return true; +} + + +int session_tracker_init() +{ + DBUG_ASSERT(global_system_variables.session_track_system_variables); + if (sysvartrack_validate_value(0, + global_system_variables.session_track_system_variables, + strlen(global_system_variables.session_track_system_variables))) + { + sql_print_error("The variable session_track_system_variables has " + "invalid values."); + return 1; + } + return 0; +} + + +/////////////////////////////////////////////////////////////////////////////// + +/** + Enable/disable the tracker based on @@session_track_schema's value. + + @param thd [IN] The thd handle. + + @return + false (always) +*/ + +bool Current_schema_tracker::update(THD *thd, set_var *) +{ + m_enabled= thd->variables.session_track_schema; + return false; +} + + +/** + Store the schema name as length-encoded string in the specified buffer. + + @param thd [IN] The thd handle. + @paran buf [INOUT] Buffer to store the information to. + + @reval false Success + @retval true Error +*/ + +bool Current_schema_tracker::store(THD *thd, String *buf) +{ + size_t db_length, length; + + /* + Protocol made (by unknown reasons) redundant: + It saves length of database name and name of database name + + length of saved length of database length. + */ + length= db_length= thd->db.length; + length += net_length_size(length); + + compile_time_assert(SESSION_TRACK_SCHEMA < 251); + compile_time_assert(NAME_LEN < 251); + DBUG_ASSERT(length < 251); + if (unlikely((1 + 1 + length + buf->length() >= MAX_PACKET_LENGTH) || + buf->reserve(1 + 1 + length, EXTRA_ALLOC))) + return true; + + /* Session state type (SESSION_TRACK_SCHEMA) */ + buf->q_append((char)SESSION_TRACK_SCHEMA); + + /* Length of the overall entity. */ + buf->q_net_store_length(length); + + /* Length and current schema name */ + buf->q_net_store_data((const uchar *)thd->db.str, thd->db.length); + + return false; +} + + +/////////////////////////////////////////////////////////////////////////////// + +/** + Enable/disable the tracker based on @@session_track_transaction_info. + + @param thd [IN] The thd handle. + + @retval true if updating the tracking level failed + @retval false otherwise +*/ + +bool Transaction_state_tracker::update(THD *thd, set_var *) +{ + if (thd->variables.session_track_transaction_info != TX_TRACK_NONE) + { + /* + If we only just turned reporting on (rather than changing between + state and characteristics reporting), start from a defined state. + */ + if (!m_enabled) + { + tx_curr_state = + tx_reported_state = TX_EMPTY; + tx_changed |= TX_CHG_STATE; + m_enabled= true; + } + if (thd->variables.session_track_transaction_info == TX_TRACK_CHISTICS) + tx_changed |= TX_CHG_CHISTICS; + set_changed(thd); + } + else + m_enabled= false; + + return false; +} + + +/** + Store the transaction state (and, optionally, characteristics) + as length-encoded string in the specified buffer. Once the data + is stored, we reset the flags related to state-change (see reset()). + + + @param thd [IN] The thd handle. + @paran buf [INOUT] Buffer to store the information to. + + @retval false Success + @retval true Error +*/ + +static LEX_CSTRING isol[]= { + { STRING_WITH_LEN("READ UNCOMMITTED") }, + { STRING_WITH_LEN("READ COMMITTED") }, + { STRING_WITH_LEN("REPEATABLE READ") }, + { STRING_WITH_LEN("SERIALIZABLE") } +}; + +bool Transaction_state_tracker::store(THD *thd, String *buf) +{ + /* STATE */ + if (tx_changed & TX_CHG_STATE) + { + if (unlikely((11 + buf->length() >= MAX_PACKET_LENGTH) || + buf->reserve(11, EXTRA_ALLOC))) + return true; + + buf->q_append((char)SESSION_TRACK_TRANSACTION_STATE); + + buf->q_append((char)9); // whole packet length + buf->q_append((char)8); // results length + + buf->q_append((char)((tx_curr_state & TX_EXPLICIT) ? 'T' : + ((tx_curr_state & TX_IMPLICIT) ? 'I' : '_'))); + buf->q_append((char)((tx_curr_state & TX_READ_UNSAFE) ? 'r' : '_')); + buf->q_append((char)(((tx_curr_state & TX_READ_TRX) || + (tx_curr_state & TX_WITH_SNAPSHOT)) ? 'R' : '_')); + buf->q_append((char)((tx_curr_state & TX_WRITE_UNSAFE) ? 'w' : '_')); + buf->q_append((char)((tx_curr_state & TX_WRITE_TRX) ? 'W' : '_')); + buf->q_append((char)((tx_curr_state & TX_STMT_UNSAFE) ? 's' : '_')); + buf->q_append((char)((tx_curr_state & TX_RESULT_SET) ? 'S' : '_')); + buf->q_append((char)((tx_curr_state & TX_LOCKED_TABLES) ? 'L' : '_')); + } + + /* CHARACTERISTICS -- How to restart the transaction */ + + if ((thd->variables.session_track_transaction_info == TX_TRACK_CHISTICS) && + (tx_changed & TX_CHG_CHISTICS)) + { + bool is_xa= thd->transaction->xid_state.is_explicit_XA(); + size_t start; + + /* 2 length by 1 byte and code */ + if (unlikely((1 + 1 + 1 + 110 + buf->length() >= MAX_PACKET_LENGTH) || + buf->reserve(1 + 1 + 1, EXTRA_ALLOC))) + return true; + + compile_time_assert(SESSION_TRACK_TRANSACTION_CHARACTERISTICS < 251); + /* Session state type (SESSION_TRACK_TRANSACTION_CHARACTERISTICS) */ + buf->q_append((char)SESSION_TRACK_TRANSACTION_CHARACTERISTICS); + + /* placeholders for lengths. will be filled in at the end */ + buf->q_append('\0'); + buf->q_append('\0'); + + start= buf->length(); + + { + /* + We have four basic replay scenarios: + + a) SET TRANSACTION was used, but before an actual transaction + was started, the load balancer moves the connection elsewhere. + In that case, the same one-shots should be set up in the + target session. (read-only/read-write; isolation-level) + + b) The initial transaction has begun; the relevant characteristics + are the session defaults, possibly overridden by previous + SET TRANSACTION statements, possibly overridden or extended + by options passed to the START TRANSACTION statement. + If the load balancer wishes to move this transaction, + it needs to be replayed with the correct characteristics. + (read-only/read-write from SET or START; + isolation-level from SET only, snapshot from START only) + + c) A subsequent transaction started with START TRANSACTION + (which is legal syntax in lieu of COMMIT AND CHAIN in MySQL) + may add/modify the current one-shots: + + - It may set up a read-only/read-write one-shot. + This one-shot will override the value used in the previous + transaction (whether that came from the default or a one-shot), + and, like all one-shots currently do, it will carry over into + any subsequent transactions that don't explicitly override them + in turn. This behavior is not guaranteed in the docs and may + change in the future, but the tracker item should correctly + reflect whatever behavior a given version of mysqld implements. + + - It may also set up a WITH CONSISTENT SNAPSHOT one-shot. + This one-shot does not currently carry over into subsequent + transactions (meaning that with "traditional syntax", WITH + CONSISTENT SNAPSHOT can only be requested for the first part + of a transaction chain). Again, the tracker item should reflect + mysqld behavior. + + d) A subsequent transaction started using COMMIT AND CHAIN + (or, for that matter, BEGIN WORK, which is currently + legal and equivalent syntax in MySQL, or START TRANSACTION + sans options) will re-use any one-shots set up so far + (with SET before the first transaction started, and with + all subsequent STARTs), except for WITH CONSISTANT SNAPSHOT, + which will never be chained and only applies when explicitly + given. + + It bears noting that if we switch sessions in a follow-up + transaction, SET TRANSACTION would be illegal in the old + session (as a transaction is active), whereas in the target + session which is being prepared, it should be legal, as no + transaction (chain) should have started yet. + + Therefore, we are free to generate SET TRANSACTION as a replay + statement even for a transaction that isn't the first in an + ongoing chain. Consider + + SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + START TRANSACTION READ ONLY, WITH CONSISTENT SNAPSHOT; + # work + COMMIT AND CHAIN; + + If we switch away at this point, the replay in the new session + needs to be + + SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + START TRANSACTION READ ONLY; + + When a transaction ends (COMMIT/ROLLBACK sans CHAIN), all + per-transaction characteristics are reset to the session's + defaults. + + This also holds for a transaction ended implicitly! (transaction.cc) + Once again, the aim is to have the tracker item reflect on a + given mysqld's actual behavior. + */ + + /* + "ISOLATION LEVEL" + Only legal in SET TRANSACTION, so will always be replayed as such. + */ + if (tx_isol_level != TX_ISOL_INHERIT) + { + /* + Unfortunately, we can't re-use tx_isolation_names / + tx_isolation_typelib as it hyphenates its items. + */ + buf->append(STRING_WITH_LEN("SET TRANSACTION ISOLATION LEVEL ")); + buf->append(&isol[tx_isol_level - 1]); + buf->append(STRING_WITH_LEN("; ")); + } + + /* + Start transaction will usually result in TX_EXPLICIT (transaction + started, but no data attached yet), except when WITH CONSISTENT + SNAPSHOT, in which case we may have data pending. + If it's an XA transaction, we don't go through here so we can + first print the trx access mode ("SET TRANSACTION READ ...") + separately before adding XA START (whereas with START TRANSACTION, + we can merge the access mode into the same statement). + */ + if ((tx_curr_state & TX_EXPLICIT) && !is_xa) + { + buf->append(STRING_WITH_LEN("START TRANSACTION")); + + /* + "WITH CONSISTENT SNAPSHOT" + Defaults to no, can only be enabled. + Only appears in START TRANSACTION. + */ + if (tx_curr_state & TX_WITH_SNAPSHOT) + { + buf->append(STRING_WITH_LEN(" WITH CONSISTENT SNAPSHOT")); + if (tx_read_flags != TX_READ_INHERIT) + buf->append(STRING_WITH_LEN(",")); + } + + /* + "READ WRITE / READ ONLY" can be set globally, per-session, + or just for one transaction. + + The latter case can take the form of + START TRANSACTION READ (WRITE|ONLY), or of + SET TRANSACTION READ (ONLY|WRITE). + (Both set thd->read_only for the upcoming transaction; + it will ultimately be re-set to the session default.) + + As the regular session-variable tracker does not monitor the one-shot, + we'll have to do it here. + + If READ is flagged as set explicitly (rather than just inherited + from the session's default), we'll get the actual bool from the THD. + */ + if (tx_read_flags != TX_READ_INHERIT) + { + if (tx_read_flags == TX_READ_ONLY) + buf->append(STRING_WITH_LEN(" READ ONLY")); + else + buf->append(STRING_WITH_LEN(" READ WRITE")); + } + buf->append(STRING_WITH_LEN("; ")); + } + else if (tx_read_flags != TX_READ_INHERIT) + { + /* + "READ ONLY" / "READ WRITE" + We could transform this to SET TRANSACTION even when it occurs + in START TRANSACTION, but for now, we'll resysynthesize the original + command as closely as possible. + */ + buf->append(STRING_WITH_LEN("SET TRANSACTION ")); + if (tx_read_flags == TX_READ_ONLY) + buf->append(STRING_WITH_LEN("READ ONLY; ")); + else + buf->append(STRING_WITH_LEN("READ WRITE; ")); + } + + if ((tx_curr_state & TX_EXPLICIT) && is_xa) + { + XID *xid= thd->transaction->xid_state.get_xid(); + long glen, blen; + + buf->append(STRING_WITH_LEN("XA START")); + + if ((glen= xid->gtrid_length) > 0) + { + buf->append(STRING_WITH_LEN(" '")); + buf->append(xid->data, glen); + + if ((blen= xid->bqual_length) > 0) + { + buf->append(STRING_WITH_LEN("','")); + buf->append(xid->data + glen, blen); + } + buf->append(STRING_WITH_LEN("'")); + + if (xid->formatID != 1) + { + buf->append(STRING_WITH_LEN(",")); + buf->append_ulonglong(xid->formatID); + } + } + + buf->append(STRING_WITH_LEN("; ")); + } + + // discard trailing space + if (buf->length() > start) + buf->length(buf->length() - 1); + } + + { + size_t length= buf->length() - start; + uchar *place= (uchar *)(buf->ptr() + (start - 2)); + DBUG_ASSERT(length < 249); // in fact < 110 + DBUG_ASSERT(start >= 3); + + DBUG_ASSERT((place - 1)[0] == SESSION_TRACK_TRANSACTION_CHARACTERISTICS); + /* Length of the overall entity. */ + place[0]= (uchar)length + 1; + /* Transaction characteristics (length-encoded string). */ + place[1]= (uchar)length; + } + } + + tx_reported_state= tx_curr_state; + tx_changed= TX_CHG_NONE; + + return false; +} + + +/** + Helper function: turn table info into table access flag. + Accepts table lock type and engine type flag (transactional/ + non-transactional), and returns the corresponding access flag + out of TX_READ_TRX, TX_READ_UNSAFE, TX_WRITE_TRX, TX_WRITE_UNSAFE. + + @param thd [IN] The thd handle + @param set [IN] The table's access/lock type + @param set [IN] Whether the table's engine is transactional + + @return The table access flag +*/ + +enum_tx_state Transaction_state_tracker::calc_trx_state(THD *thd, + thr_lock_type l, + bool has_trx) +{ + enum_tx_state s; + bool read= (l < TL_FIRST_WRITE); + + if (read) + s= has_trx ? TX_READ_TRX : TX_READ_UNSAFE; + else + s= has_trx ? TX_WRITE_TRX : TX_WRITE_UNSAFE; + + return s; +} + + +/** + Register the end of an (implicit or explicit) transaction. + + @param thd [IN] The thd handle +*/ +void Transaction_state_tracker::end_trx(THD *thd) +{ + DBUG_ASSERT(thd->variables.session_track_transaction_info > TX_TRACK_NONE); + + if ((!m_enabled) || (thd->state_flags & Open_tables_state::BACKUPS_AVAIL)) + return; + + if (tx_curr_state != TX_EMPTY) + { + if (tx_curr_state & TX_EXPLICIT) + tx_changed |= TX_CHG_CHISTICS; + tx_curr_state &= TX_LOCKED_TABLES; + } + update_change_flags(thd); +} + + +/** + Clear flags pertaining to the current statement or transaction. + May be called repeatedly within the same execution cycle. + + @param thd [IN] The thd handle. + @param set [IN] The flags to clear +*/ + +void Transaction_state_tracker::clear_trx_state(THD *thd, uint clear) +{ + if ((!m_enabled) || (thd->state_flags & Open_tables_state::BACKUPS_AVAIL)) + return; + + tx_curr_state &= ~clear; + update_change_flags(thd); +} + + +/** + Add flags pertaining to the current statement or transaction. + May be called repeatedly within the same execution cycle, + e.g. to add access info for more tables. + + @param thd [IN] The thd handle. + @param set [IN] The flags to add +*/ + +void Transaction_state_tracker::add_trx_state(THD *thd, uint add) +{ + if ((!m_enabled) || (thd->state_flags & Open_tables_state::BACKUPS_AVAIL)) + return; + + if (add == TX_EXPLICIT) + { + /* Always send characteristic item (if tracked), always replace state. */ + tx_changed |= TX_CHG_CHISTICS; + tx_curr_state = TX_EXPLICIT; + } + + /* + If we're not in an implicit or explicit transaction, but + autocommit==0 and tables are accessed, we flag "implicit transaction." + */ + else if (!(tx_curr_state & (TX_EXPLICIT|TX_IMPLICIT)) && + (thd->variables.option_bits & OPTION_NOT_AUTOCOMMIT) && + (add & + (TX_READ_TRX | TX_READ_UNSAFE | TX_WRITE_TRX | TX_WRITE_UNSAFE))) + tx_curr_state |= TX_IMPLICIT; + + /* + Only flag state when in transaction or LOCK TABLES is added. + */ + if ((tx_curr_state & (TX_EXPLICIT | TX_IMPLICIT)) || + (add & TX_LOCKED_TABLES)) + tx_curr_state |= add; + + update_change_flags(thd); +} + + +/** + Add "unsafe statement" flag if applicable. + + @param thd [IN] The thd handle. + @param set [IN] The flags to add +*/ + +void Transaction_state_tracker::add_trx_state_from_thd(THD *thd) +{ + if (m_enabled) + { + if (thd->lex->is_stmt_unsafe()) + add_trx_state(thd, TX_STMT_UNSAFE); + } +} + + +/** + Set read flags (read only/read write) pertaining to the next + transaction. + + @param thd [IN] The thd handle. + @param set [IN] The flags to set +*/ + +void Transaction_state_tracker::set_read_flags(THD *thd, + enum enum_tx_read_flags flags) +{ + if (m_enabled && (tx_read_flags != flags)) + { + tx_read_flags = flags; + tx_changed |= TX_CHG_CHISTICS; + set_changed(thd); + } +} + + +/** + Set isolation level pertaining to the next transaction. + + @param thd [IN] The thd handle. + @param set [IN] The isolation level to set +*/ + +void Transaction_state_tracker::set_isol_level(THD *thd, + enum enum_tx_isol_level level) +{ + if (m_enabled && (tx_isol_level != level)) + { + tx_isol_level = level; + tx_changed |= TX_CHG_CHISTICS; + set_changed(thd); + } +} + + +/////////////////////////////////////////////////////////////////////////////// + +/** + @Enable/disable the tracker based on @@session_track_state_change value. + + @param thd [IN] The thd handle. + @return false (always) + +**/ + +bool Session_state_change_tracker::update(THD *thd, set_var *) +{ + m_enabled= thd->variables.session_track_state_change; + return false; +} + +/** + Store the '1' in the specified buffer when state is changed. + + @param thd [IN] The thd handle. + @paran buf [INOUT] Buffer to store the information to. + + @reval false Success + @retval true Error +**/ + +bool Session_state_change_tracker::store(THD *thd, String *buf) +{ + if (unlikely((1 + 1 + 1 + buf->length() >= MAX_PACKET_LENGTH) || + buf->reserve(1 + 1 + 1, EXTRA_ALLOC))) + return true; + + compile_time_assert(SESSION_TRACK_STATE_CHANGE < 251); + /* Session state type (SESSION_TRACK_STATE_CHANGE) */ + buf->q_append((char)SESSION_TRACK_STATE_CHANGE); + + /* Length of the overall entity (1 byte) */ + buf->q_append('\1'); + + DBUG_ASSERT(is_changed()); + buf->q_append('1'); + + return false; +} + +#ifdef USER_VAR_TRACKING + +bool User_variables_tracker::update(THD *thd, set_var *) +{ + m_enabled= thd->variables.session_track_user_variables; + return false; +} + + +bool User_variables_tracker::store(THD *thd, String *buf) +{ + for (ulong i= 0; i < m_changed_user_variables.size(); i++) + { + const user_var_entry *var= m_changed_user_variables.at(i); + String value_str; + bool null_value; + size_t length; + + var->val_str(&null_value, &value_str, DECIMAL_MAX_SCALE); + length= net_length_size(var->name.length) + var->name.length; + if (!null_value) + length+= net_length_size(value_str.length()) + value_str.length(); + else + length+= 1; + + if (buf->reserve(sizeof(char) + length + net_length_size(length))) + return true; + + // TODO: check max packet length MDEV-22709 + buf->q_append(static_cast(SESSION_TRACK_USER_VARIABLES)); + buf->q_net_store_length(length); + buf->q_net_store_data(reinterpret_cast(var->name.str), + var->name.length); + if (!null_value) + buf->q_net_store_data(reinterpret_cast(value_str.ptr()), + value_str.length()); + else + { + char nullbuff[1]= { (char)251 }; + buf->q_append(nullbuff, sizeof(nullbuff)); + } + } + m_changed_user_variables.clear(); + return false; +} +#endif // USER_VAR_TRACKING + +/////////////////////////////////////////////////////////////////////////////// + +/** + @brief Store all change information in the specified buffer. + + @param thd [IN] The thd handle. + @param buf [OUT] Reference to the string buffer to which the state + change data needs to be written. +*/ + +void Session_tracker::store(THD *thd, String *buf) +{ + size_t start; + + /* track data ID fit into one byte in net coding */ + compile_time_assert(SESSION_TRACK_always_at_the_end < 251); + /* one tracker could serv several tracking data */ + compile_time_assert((uint) SESSION_TRACK_always_at_the_end >= + (uint) SESSION_TRACKER_END); + + /* + Probably most track result will fit in 251 byte so lets made it at + least efficient. We allocate 1 byte for length and then will move + string if there is more. + */ + buf->append('\0'); + start= buf->length(); + + /* Get total length. */ + for (int i= 0; i < SESSION_TRACKER_END; i++) + { + if (m_trackers[i]->is_changed()) + { + if (m_trackers[i]->store(thd, buf)) + { + // it is safer to have 0-length block in case of error + buf->length(start); + return; + } + m_trackers[i]->reset_changed(); + } + } + + size_t length= buf->length() - start; + uchar *data; + uint size; + + if ((size= net_length_size(length)) != 1) + { + if (buf->reserve(size - 1, 0)) + { + buf->length(start); // it is safer to have 0-length block in case of error + return; + } + + /* + The 'buf->reserve()' can change the buf->ptr() so we cannot + calculate the 'data' earlier. + */ + buf->length(buf->length() + (size - 1)); + data= (uchar *)(buf->ptr() + start); + memmove(data + (size - 1), data, length); + } + else + data= (uchar *)(buf->ptr() + start); + + net_store_length(data - 1, length); +} diff --git a/sql/session_tracker.h b/sql/session_tracker.h new file mode 100644 index 00000000..5715b583 --- /dev/null +++ b/sql/session_tracker.h @@ -0,0 +1,498 @@ +#ifndef SESSION_TRACKER_INCLUDED +#define SESSION_TRACKER_INCLUDED + +/* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2016, 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "m_string.h" +#include "thr_lock.h" +#include "sql_hset.h" + +#ifndef EMBEDDED_LIBRARY +/* forward declarations */ +class THD; +class set_var; +class String; +class user_var_entry; + + +enum enum_session_tracker +{ + SESSION_SYSVARS_TRACKER, /* Session system variables */ + CURRENT_SCHEMA_TRACKER, /* Current schema */ + SESSION_STATE_CHANGE_TRACKER, + TRANSACTION_INFO_TRACKER, /* Transaction state */ +#ifdef USER_VAR_TRACKING + USER_VARIABLES_TRACKER, +#endif // USER_VAR_TRACKING + SESSION_TRACKER_END /* must be the last */ +}; + +/** + State_tracker + + An abstract class that defines the interface for any of the server's + 'session state change tracker'. A tracker, however, is a sub- class of + this class which takes care of tracking the change in value of a part- + icular session state type and thus defines various methods listed in this + interface. The change information is later serialized and transmitted to + the client through protocol's OK packet. + + Tracker system variables :- + A tracker is normally mapped to a system variable. So in order to enable, + disable or modify the sub-entities of a tracker, the user needs to modify + the respective system variable either through SET command or via command + line option. As required in system variable handling, this interface also + includes two functions to help in the verification of the supplied value + (ON_UPDATE) of the tracker system variable, namely - update(). +*/ + +class State_tracker +{ +protected: + /** + Is tracking enabled for a particular session state type ? + + @note: it is a cache of the corresponding thd->variables.session_track_xxx + variable + */ + bool m_enabled; + + void set_changed(THD *thd); + +private: + /** Has the session state type changed ? */ + bool m_changed; + +public: + virtual ~State_tracker() = default; + + /** Getters */ + bool is_enabled() const + { return m_enabled; } + + bool is_changed() const + { return m_changed; } + + void reset_changed() { m_changed= false; } + + /** + Called by THD::init() when new connection is being created + + We may inherit m_changed from previous connection served by this THD if + connection was broken or client didn't have session tracking capability. + Thus we have to reset it here. + */ + virtual bool enable(THD *thd) + { + reset_changed(); + return update(thd, 0); + } + + /** To be invoked when the tracker's system variable is updated (ON_UPDATE).*/ + virtual bool update(THD *thd, set_var *var)= 0; + + /** Store changed data into the given buffer. */ + virtual bool store(THD *thd, String *buf)= 0; + + /** Mark the entity as changed. */ + void mark_as_changed(THD *thd) { if (is_enabled()) set_changed(thd); } +}; + + +/** + Session_sysvars_tracker + + This is a tracker class that enables & manages the tracking of session + system variables. It internally maintains a hash of user supplied variable + references and a boolean field to store if the variable was changed by the + last statement. +*/ + +class Session_sysvars_tracker: public State_tracker +{ + struct sysvar_node_st { + sys_var *m_svar; + bool *test_load; + bool m_changed; + }; + + class vars_list + { + /** + Registered system variables. (@@session_track_system_variables) + A hash to store the name of all the system variables specified by the + user. + */ + HASH m_registered_sysvars; + /** + If TRUE then we want to check all session variable. + */ + bool track_all; + void init() + { + my_hash_init(PSI_INSTRUMENT_ME, &m_registered_sysvars, &my_charset_bin, + 0, 0, 0, (my_hash_get_key) sysvars_get_key, my_free, + HASH_UNIQUE | (mysqld_server_initialized ? HASH_THREAD_SPECIFIC : 0)); + } + void free_hash() + { + DBUG_ASSERT(my_hash_inited(&m_registered_sysvars)); + my_hash_free(&m_registered_sysvars); + } + + sysvar_node_st *search(const sys_var *svar) + { + return reinterpret_cast( + my_hash_search(&m_registered_sysvars, + reinterpret_cast(&svar), + sizeof(sys_var*))); + } + + sysvar_node_st *at(ulong i) + { + DBUG_ASSERT(i < m_registered_sysvars.records); + return reinterpret_cast( + my_hash_element(&m_registered_sysvars, i)); + } + public: + vars_list(): track_all(false) { init(); } + ~vars_list() { if (my_hash_inited(&m_registered_sysvars)) free_hash(); } + void deinit() { free_hash(); } + + sysvar_node_st *insert_or_search(const sys_var *svar) + { + sysvar_node_st *res= search(svar); + if (!res) + { + if (track_all) + { + insert(svar); + return search(svar); + } + } + return res; + } + + bool insert(const sys_var *svar); + void reinit(); + void reset(); + inline bool is_enabled() + { + return track_all || m_registered_sysvars.records; + } + void copy(vars_list* from, THD *thd); + bool parse_var_list(THD *thd, LEX_STRING var_list, bool throw_error, + CHARSET_INFO *char_set); + bool construct_var_list(char *buf, size_t buf_len); + bool store(THD *thd, String *buf); + }; + /** + Two objects of vars_list type are maintained to manage + various operations. + */ + vars_list orig_list; + bool m_parsed; + +public: + void init(THD *thd); + void deinit(THD *thd); + bool enable(THD *thd); + bool update(THD *thd, set_var *var); + bool store(THD *thd, String *buf); + void mark_as_changed(THD *thd, const sys_var *var); + void deinit() { orig_list.deinit(); } + /* callback */ + static uchar *sysvars_get_key(const char *entry, size_t *length, + my_bool not_used __attribute__((unused))); + + friend bool sysvartrack_global_update(THD *thd, char *str, size_t len); +}; + + +bool sysvartrack_validate_value(THD *thd, const char *str, size_t len); +bool sysvartrack_global_update(THD *thd, char *str, size_t len); + + +/** + Current_schema_tracker, + + This is a tracker class that enables & manages the tracking of current + schema for a particular connection. +*/ + +class Current_schema_tracker: public State_tracker +{ +public: + bool update(THD *thd, set_var *var); + bool store(THD *thd, String *buf); +}; + + +/* + Session_state_change_tracker + + This is a boolean tracker class that will monitor any change that contributes + to a session state change. + Attributes that contribute to session state change include: + - Successful change to System variables + - User defined variables assignments + - temporary tables created, altered or deleted + - prepared statements added or removed + - change in current database + - change of current role +*/ + +class Session_state_change_tracker: public State_tracker +{ +public: + bool update(THD *thd, set_var *var); + bool store(THD *thd, String *buf); +}; + + +/* + Transaction_state_tracker +*/ + +/** + Transaction state (no transaction, transaction active, work attached, etc.) +*/ +enum enum_tx_state { + TX_EMPTY = 0, ///< "none of the below" + TX_EXPLICIT = 1, ///< an explicit transaction is active + TX_IMPLICIT = 2, ///< an implicit transaction is active + TX_READ_TRX = 4, ///< transactional reads were done + TX_READ_UNSAFE = 8, ///< non-transaction reads were done + TX_WRITE_TRX = 16, ///< transactional writes were done + TX_WRITE_UNSAFE = 32, ///< non-transactional writes were done + TX_STMT_UNSAFE = 64, ///< "unsafe" (non-deterministic like UUID()) stmts + TX_RESULT_SET = 128, ///< result set was sent + TX_WITH_SNAPSHOT= 256, ///< WITH CONSISTENT SNAPSHOT was used + TX_LOCKED_TABLES= 512 ///< LOCK TABLES is active +}; + + +/** + Transaction access mode +*/ +enum enum_tx_read_flags { + TX_READ_INHERIT = 0, ///< not explicitly set, inherit session.tx_read_only + TX_READ_ONLY = 1, ///< START TRANSACTION READ ONLY, or tx_read_only=1 + TX_READ_WRITE = 2, ///< START TRANSACTION READ WRITE, or tx_read_only=0 +}; + + +/** + Transaction isolation level +*/ +enum enum_tx_isol_level { + TX_ISOL_INHERIT = 0, ///< not explicitly set, inherit session.tx_isolation + TX_ISOL_UNCOMMITTED = 1, + TX_ISOL_COMMITTED = 2, + TX_ISOL_REPEATABLE = 3, + TX_ISOL_SERIALIZABLE= 4 +}; + + +/** + Transaction tracking level +*/ +enum enum_session_track_transaction_info { + TX_TRACK_NONE = 0, ///< do not send tracker items on transaction info + TX_TRACK_STATE = 1, ///< track transaction status + TX_TRACK_CHISTICS = 2 ///< track status and characteristics +}; + + +/** + This is a tracker class that enables & manages the tracking of + current transaction info for a particular connection. +*/ + +class Transaction_state_tracker : public State_tracker +{ + /** Helper function: turn table info into table access flag */ + enum_tx_state calc_trx_state(THD *thd, thr_lock_type l, bool has_trx); +public: + + bool enable(THD *thd) + { + m_enabled= false; + tx_changed= TX_CHG_NONE; + tx_curr_state= TX_EMPTY; + tx_reported_state= TX_EMPTY; + tx_read_flags= TX_READ_INHERIT; + tx_isol_level= TX_ISOL_INHERIT; + return State_tracker::enable(thd); + } + + bool update(THD *thd, set_var *var); + bool store(THD *thd, String *buf); + + /** Change transaction characteristics */ + void set_read_flags(THD *thd, enum enum_tx_read_flags flags); + void set_isol_level(THD *thd, enum enum_tx_isol_level level); + + /** Change transaction state */ + void clear_trx_state(THD *thd, uint clear); + void add_trx_state(THD *thd, uint add); + void inline add_trx_state(THD *thd, thr_lock_type l, bool has_trx) + { + add_trx_state(thd, calc_trx_state(thd, l, has_trx)); + } + void add_trx_state_from_thd(THD *thd); + void end_trx(THD *thd); + + +private: + enum enum_tx_changed { + TX_CHG_NONE = 0, ///< no changes from previous stmt + TX_CHG_STATE = 1, ///< state has changed from previous stmt + TX_CHG_CHISTICS = 2 ///< characteristics have changed from previous stmt + }; + + /** any trackable changes caused by this statement? */ + uint tx_changed; + + /** transaction state */ + uint tx_curr_state, tx_reported_state; + + /** r/w or r/o set? session default? */ + enum enum_tx_read_flags tx_read_flags; + + /** isolation level */ + enum enum_tx_isol_level tx_isol_level; + + inline void update_change_flags(THD *thd) + { + tx_changed &= uint(~TX_CHG_STATE); + tx_changed |= (tx_curr_state != tx_reported_state) ? TX_CHG_STATE : 0; + if (tx_changed != TX_CHG_NONE) + set_changed(thd); + } +}; + +#define TRANSACT_TRACKER(X) \ + do { if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) \ + thd->session_tracker.transaction_info.X; } while(0) + + +/** + User_variables_tracker + + This is a tracker class that enables & manages the tracking of user variables. +*/ + +#ifdef USER_VAR_TRACKING +class User_variables_tracker: public State_tracker +{ + Hash_set m_changed_user_variables; +public: + User_variables_tracker(): + m_changed_user_variables(PSI_INSTRUMENT_ME, &my_charset_bin, 0, 0, + sizeof(const user_var_entry*), 0, 0, HASH_UNIQUE | + mysqld_server_initialized ? HASH_THREAD_SPECIFIC : 0) {} + bool update(THD *thd, set_var *var); + bool store(THD *thd, String *buf); + void mark_as_changed(THD *thd, const user_var_entry *var) + { + if (is_enabled()) + { + m_changed_user_variables.insert(var); + set_changed(thd); + } + } + void deinit() { m_changed_user_variables.~Hash_set(); } +}; +#endif // USER_VAR_TRACKING + + +/** + Session_tracker + + This class holds an object each for all tracker classes and provides + methods necessary for systematic detection and generation of session + state change information. +*/ + +class Session_tracker +{ + State_tracker *m_trackers[SESSION_TRACKER_END]; + + /* The following two functions are private to disable copying. */ + Session_tracker(Session_tracker const &other) + { + DBUG_ASSERT(FALSE); + } + Session_tracker& operator= (Session_tracker const &rhs) + { + DBUG_ASSERT(FALSE); + return *this; + } + +public: + Current_schema_tracker current_schema; + Session_state_change_tracker state_change; + Transaction_state_tracker transaction_info; + Session_sysvars_tracker sysvars; +#ifdef USER_VAR_TRACKING + User_variables_tracker user_variables; +#endif // USER_VAR_TRACKING + + Session_tracker() + { + m_trackers[SESSION_SYSVARS_TRACKER]= &sysvars; + m_trackers[CURRENT_SCHEMA_TRACKER]= ¤t_schema; + m_trackers[SESSION_STATE_CHANGE_TRACKER]= &state_change; + m_trackers[TRANSACTION_INFO_TRACKER]= &transaction_info; +#ifdef USER_VAR_TRACKING + m_trackers[USER_VARIABLES_TRACKER]= &user_variables; +#endif // USER_VAR_TRACKING + } + + void enable(THD *thd) + { + for (int i= 0; i < SESSION_TRACKER_END; i++) + m_trackers[i]->enable(thd); + } + + void store(THD *thd, String *main_buf); +}; + + +int session_tracker_init(); +#else + +#define TRANSACT_TRACKER(X) do{}while(0) + +class Session_tracker +{ + class Dummy_tracker + { + public: + void mark_as_changed(THD *thd) {} + void mark_as_changed(THD *thd, const sys_var *var) {} + }; +public: + Dummy_tracker current_schema; + Dummy_tracker state_change; + Dummy_tracker sysvars; +}; + +#endif //EMBEDDED_LIBRARY + +#endif /* SESSION_TRACKER_INCLUDED */ diff --git a/sql/set_var.cc b/sql/set_var.cc new file mode 100644 index 00000000..aa9ec5ab --- /dev/null +++ b/sql/set_var.cc @@ -0,0 +1,1553 @@ +/* Copyright (c) 2002, 2013, Oracle and/or its affiliates. + Copyright (c) 2008, 2014, SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* variable declarations are in sys_vars.cc now !!! */ + +#include "sql_plugin.h" +#include "sql_class.h" // set_var.h: session_var_ptr +#include "set_var.h" +#include "sql_priv.h" +#include "unireg.h" +#include "mysqld.h" // lc_messages_dir +#include "sys_vars_shared.h" +#include "transaction.h" +#include "sql_locale.h" // my_locale_by_number, + // my_locale_by_name +#include "strfunc.h" // find_set_from_flags, find_set +#include "sql_parse.h" // check_global_access +#include "sql_table.h" // reassign_keycache_tables +#include "sql_time.h" // date_time_format_copy, + // date_time_format_make +#include "derror.h" +#include "tztime.h" // my_tz_find, my_tz_SYSTEM, struct Time_zone +#include "sql_select.h" // free_underlaid_joins +#include "sql_i_s.h" +#include "sql_view.h" // updatable_views_with_limit_typelib +#include "lock.h" // lock_global_read_lock, + // make_global_read_lock_block_commit, + // unlock_global_read_lock + +static HASH system_variable_hash; +static PolyLock_mutex PLock_global_system_variables(&LOCK_global_system_variables); +static ulonglong system_variable_hash_version= 0; + +/** + Return variable name and length for hashing of variables. +*/ + +static uchar *get_sys_var_length(const sys_var *var, size_t *length, + my_bool first) +{ + *length= var->name.length; + return (uchar*) var->name.str; +} + +sys_var_chain all_sys_vars = { NULL, NULL }; + +int sys_var_init() +{ + DBUG_ENTER("sys_var_init"); + + /* Must be already initialized. */ + DBUG_ASSERT(system_charset_info != NULL); + + if (my_hash_init(PSI_INSTRUMENT_ME, &system_variable_hash, system_charset_info, 700, 0, + 0, (my_hash_get_key) get_sys_var_length, 0, HASH_UNIQUE)) + goto error; + + if (mysql_add_sys_var_chain(all_sys_vars.first)) + goto error; + + DBUG_RETURN(0); + +error: + fprintf(stderr, "failed to initialize System variables"); + DBUG_RETURN(1); +} + +uint sys_var_elements() +{ + return system_variable_hash.records; +} + +int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags) +{ + size_t saved_elements= long_options->elements; + + DBUG_ENTER("sys_var_add_options"); + + for (sys_var *var=all_sys_vars.first; var; var= var->next) + { + if (var->register_option(long_options, parse_flags)) + goto error; + } + + DBUG_RETURN(0); + +error: + fprintf(stderr, "failed to initialize System variables"); + long_options->elements= saved_elements; + DBUG_RETURN(1); +} + +void sys_var_end() +{ + DBUG_ENTER("sys_var_end"); + + my_hash_free(&system_variable_hash); + + for (sys_var *var=all_sys_vars.first; var; var= var->next) + var->cleanup(); + + DBUG_VOID_RETURN; +} + + +static bool static_test_load= TRUE; + +/** + sys_var constructor + + @param chain variables are linked into chain for mysql_add_sys_var_chain() + @param name_arg the name of the variable. Must be 0-terminated and exist + for the liftime of the sys_var object. @sa my_option::name + @param comment shown in mysqld --help, @sa my_option::comment + @param flags_arg or'ed flag_enum values + @param off offset of the global variable value from the + &global_system_variables. + @param getopt_id -1 for no command-line option, otherwise @sa my_option::id + @param getopt_arg_type @sa my_option::arg_type + @param show_val_type_arg what value_ptr() returns for sql_show.cc + @param def_val default value, @sa my_option::def_value + @param lock mutex or rw_lock that protects the global variable + *in addition* to LOCK_global_system_variables. + @param binlog_status_enum @sa binlog_status_enum + @param on_check_func a function to be called at the end of sys_var::check, + put your additional checks here + @param on_update_func a function to be called at the end of sys_var::update, + any post-update activity should happen here + @param substitute If non-NULL, this variable is deprecated and the + string describes what one should use instead. If an empty string, + the variable is deprecated but no replacement is offered. +*/ +sys_var::sys_var(sys_var_chain *chain, const char *name_arg, + const char *comment, int flags_arg, ptrdiff_t off, + int getopt_id, enum get_opt_arg_type getopt_arg_type, + SHOW_TYPE show_val_type_arg, longlong def_val, + PolyLock *lock, enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func, + on_update_function on_update_func, + const char *substitute) : + next(0), binlog_status(binlog_status_arg), value_origin(COMPILE_TIME), + flags(flags_arg), show_val_type(show_val_type_arg), + guard(lock), offset(off), on_check(on_check_func), on_update(on_update_func), + deprecation_substitute(substitute) +{ + /* + There is a limitation in handle_options() related to short options: + - either all short options should be declared when parsing in multiple stages, + - or none should be declared. + Because a lot of short options are used in the normal parsing phase + for mysqld, we enforce here that no short option is present + in the first (PARSE_EARLY) stage. + See handle_options() for details. + */ + DBUG_ASSERT(!(flags & PARSE_EARLY) || getopt_id <= 0 || getopt_id >= 255); + + name.str= name_arg; // ER_NO_DEFAULT relies on 0-termination of name_arg + name.length= strlen(name_arg); // and so does this. + DBUG_ASSERT(name.length <= NAME_CHAR_LEN); + + bzero(&option, sizeof(option)); + option.name= name_arg; + option.id= getopt_id; + option.comment= comment; + option.arg_type= getopt_arg_type; + option.value= (uchar **)global_var_ptr(); + option.def_value= def_val; + option.app_type= this; + option.var_type= flags & AUTO_SET ? GET_AUTO : 0; + + if (chain->last) + chain->last->next= this; + else + chain->first= this; + chain->last= this; + + test_load= &static_test_load; +} + +bool sys_var::update(THD *thd, set_var *var) +{ + enum_var_type type= var->type; + if (type == OPT_GLOBAL || scope() == GLOBAL) + { + /* + Yes, both locks need to be taken before an update, just as + both are taken to get a value. If we'll take only 'guard' here, + then value_ptr() for strings won't be safe in SHOW VARIABLES anymore, + to make it safe we'll need value_ptr_unlock(). + */ + AutoWLock lock1(&PLock_global_system_variables); + AutoWLock lock2(guard); + value_origin= SQL; + return global_update(thd, var) || + (on_update && on_update(this, thd, OPT_GLOBAL)); + } + else + { + bool ret= session_update(thd, var) || + (on_update && on_update(this, thd, OPT_SESSION)); + + /* + Make sure we don't session-track variables that are not actually + part of the session. tx_isolation and and tx_read_only for example + exist as GLOBAL, SESSION, and one-shot ("for next transaction only"). + */ + if ((var->type == OPT_SESSION) && (!ret)) + { + thd->session_tracker.sysvars.mark_as_changed(thd, var->var); + /* + Here MySQL sends variable name to avoid reporting change of + the tracker itself, but we decided that it is not needed + */ + thd->session_tracker.state_change.mark_as_changed(thd); + } + + return ret; + } +} + +const uchar *sys_var::session_value_ptr(THD *thd, const LEX_CSTRING *base) const +{ + return session_var_ptr(thd); +} + +const uchar *sys_var::global_value_ptr(THD *thd, const LEX_CSTRING *base) const +{ + return global_var_ptr(); +} + +bool sys_var::check(THD *thd, set_var *var) +{ + if (unlikely((var->value && do_check(thd, var)) || + (on_check && on_check(this, thd, var)))) + { + if (likely(!thd->is_error())) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + + if (!var->value) + { + str.set(STRING_WITH_LEN("DEFAULT"), &my_charset_latin1); + res= &str; + } + else if (!(res=var->value->val_str(&str))) + { + str.set(STRING_WITH_LEN("NULL"), &my_charset_latin1); + res= &str; + } + ErrConvString err(res); + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name.str, err.ptr()); + } + return true; + } + return false; +} + +const uchar *sys_var::value_ptr(THD *thd, enum_var_type type, + const LEX_CSTRING *base) const +{ + DBUG_ASSERT(base); + if (type == OPT_GLOBAL || scope() == GLOBAL) + { + mysql_mutex_assert_owner(&LOCK_global_system_variables); + AutoRLock lock(guard); + return global_value_ptr(thd, base); + } + else + return session_value_ptr(thd, base); +} + +bool sys_var::set_default(THD *thd, set_var* var) +{ + if (var->type == OPT_GLOBAL || scope() == GLOBAL) + global_save_default(thd, var); + else + session_save_default(thd, var); + + return check(thd, var) || update(thd, var); +} + + +#define do_num_val(T,CMD) \ +do { \ + T val= *(T*) value; \ + CMD; \ +} while (0) + +#define case_for_integers(CMD) \ + case SHOW_SINT: do_num_val (int,CMD); \ + case SHOW_SLONG: do_num_val (long,CMD); \ + case SHOW_SLONGLONG:do_num_val (longlong,CMD); \ + case SHOW_UINT: do_num_val (uint,CMD); \ + case SHOW_ULONG: do_num_val (ulong,CMD); \ + case SHOW_ULONGLONG:do_num_val (ulonglong,CMD); \ + case SHOW_HA_ROWS: do_num_val (ha_rows,CMD); + +#define case_for_double(CMD) \ + case SHOW_DOUBLE: do_num_val (double,CMD) + +#define case_get_string_as_lex_string \ + case SHOW_CHAR: \ + sval.str= (char*) value; \ + sval.length= sval.str ? strlen(sval.str) : 0; \ + break; \ + case SHOW_CHAR_PTR: \ + sval.str= *(char**) value; \ + sval.length= sval.str ? strlen(sval.str) : 0; \ + break; \ + case SHOW_LEX_STRING: \ + sval= *(LEX_CSTRING *) value; \ + break + +longlong sys_var::val_int(bool *is_null, + THD *thd, enum_var_type type, + const LEX_CSTRING *base) +{ + LEX_CSTRING sval; + AutoWLock lock(&PLock_global_system_variables); + const uchar *value= value_ptr(thd, type, base); + *is_null= false; + + switch (show_type()) + { + case_get_string_as_lex_string; + case_for_integers(return val); + case_for_double(return (longlong) val); + case SHOW_MY_BOOL: return *(my_bool*)value; + default: + my_error(ER_VAR_CANT_BE_READ, MYF(0), name.str); + return 0; + } + + longlong ret= 0; + if (!(*is_null= !sval.str)) + ret= longlong_from_string_with_check(charset(thd), + sval.str, sval.str + sval.length); + return ret; +} + + +String *sys_var::val_str_nolock(String *str, THD *thd, const uchar *value) +{ + static LEX_CSTRING bools[]= + { + { STRING_WITH_LEN("OFF") }, + { STRING_WITH_LEN("ON") } + }; + + LEX_CSTRING sval; + switch (show_type()) + { + case_get_string_as_lex_string; + case_for_integers(return str->set(val, system_charset_info) ? 0 : str); + case_for_double(return str->set_real(val, 6, system_charset_info) ? 0 : str); + case SHOW_MY_BOOL: + sval= bools[(int)*(my_bool*)value]; + break; + default: + my_error(ER_VAR_CANT_BE_READ, MYF(0), name.str); + return 0; + } + + if (!sval.str || str->copy(sval.str, sval.length, charset(thd))) + str= NULL; + return str; +} + + +String *sys_var::val_str(String *str, + THD *thd, enum_var_type type, const LEX_CSTRING *base) +{ + AutoWLock lock(&PLock_global_system_variables); + const uchar *value= value_ptr(thd, type, base); + return val_str_nolock(str, thd, value); +} + + +double sys_var::val_real(bool *is_null, + THD *thd, enum_var_type type, const LEX_CSTRING *base) +{ + LEX_CSTRING sval; + AutoWLock lock(&PLock_global_system_variables); + const uchar *value= value_ptr(thd, type, base); + *is_null= false; + + switch (show_type()) + { + case_get_string_as_lex_string; + case_for_integers(return (double)val); + case_for_double(return val); + case SHOW_MY_BOOL: return *(my_bool*)value; + default: + my_error(ER_VAR_CANT_BE_READ, MYF(0), name.str); + return 0; + } + + double ret= 0; + if (!(*is_null= !sval.str)) + ret= double_from_string_with_check(charset(thd), + sval.str, sval.str + sval.length); + return ret; +} + + +void sys_var::do_deprecated_warning(THD *thd) +{ + if (deprecation_substitute != NULL) + { + char buf1[NAME_CHAR_LEN + 3]; + strxnmov(buf1, sizeof(buf1)-1, "@@", name.str, 0); + + /* + if deprecation_substitute is an empty string, + there is no replacement for the syntax + */ + uint errmsg= deprecation_substitute[0] == '\0' + ? ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT + : ER_WARN_DEPRECATED_SYNTAX; + if (thd) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX, ER_THD(thd, errmsg), + buf1, deprecation_substitute); + else + sql_print_warning(ER_DEFAULT(errmsg), buf1, deprecation_substitute); + } +} + +/** + Throw warning (error in STRICT mode) if value for variable needed bounding. + Plug-in interface also uses this. + + @param thd thread handle + @param name variable's name + @param fixed did we have to correct the value? (throw warn/err if so) + @param is_unsigned is value's type unsigned? + @param v variable's value + + @retval true on error, false otherwise (warning or ok) + */ + + +bool throw_bounds_warning(THD *thd, const char *name,const char *v) +{ + if (thd->variables.sql_mode & MODE_STRICT_ALL_TABLES) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name, v); + return true; + } + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE, + ER_THD(thd, ER_TRUNCATED_WRONG_VALUE), name, v); + return false; +} + + +bool throw_bounds_warning(THD *thd, const char *name, + bool fixed, bool is_unsigned, longlong v) +{ + if (fixed) + { + char buf[22]; + + if (is_unsigned) + ullstr((ulonglong) v, buf); + else + llstr(v, buf); + + if (thd->variables.sql_mode & MODE_STRICT_ALL_TABLES) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name, buf); + return true; + } + return throw_bounds_warning(thd, name, buf); + } + return false; +} + +bool throw_bounds_warning(THD *thd, const char *name, bool fixed, double v) +{ + if (fixed) + { + char buf[64]; + + my_gcvt(v, MY_GCVT_ARG_DOUBLE, sizeof(buf) - 1, buf, NULL); + + if (thd->variables.sql_mode & MODE_STRICT_ALL_TABLES) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name, buf); + return true; + } + return throw_bounds_warning(thd, name, buf); + } + return false; +} + + +typedef struct old_names_map_st +{ + const char *old_name; + const char *new_name; +} my_old_conv; + +static my_old_conv old_conv[]= +{ + { "cp1251_koi8" , "cp1251" }, + { "cp1250_latin2" , "cp1250" }, + { "kam_latin2" , "keybcs2" }, + { "mac_latin2" , "MacRoman" }, + { "macce_latin2" , "MacCE" }, + { "pc2_latin2" , "pclatin2" }, + { "vga_latin2" , "pclatin1" }, + { "koi8_cp1251" , "koi8r" }, + { "win1251ukr_koi8_ukr" , "win1251ukr" }, + { "koi8_ukr_win1251ukr" , "koi8u" }, + { NULL , NULL } +}; + +CHARSET_INFO *get_old_charset_by_name(const char *name) +{ + my_old_conv *conv; + for (conv= old_conv; conv->old_name; conv++) + { + if (!my_strcasecmp(&my_charset_latin1, name, conv->old_name)) + return get_charset_by_csname(conv->new_name, MY_CS_PRIMARY, MYF(0)); + } + return NULL; +} + +/**************************************************************************** + Main handling of variables: + - Initialisation + - Searching during parsing + - Update loop +****************************************************************************/ + +/** + Add variables to the dynamic hash of system variables + + @param first Pointer to first system variable to add + + @retval + 0 SUCCESS + @retval + otherwise FAILURE +*/ + + +int mysql_add_sys_var_chain(sys_var *first) +{ + sys_var *var; + + /* A write lock should be held on LOCK_system_variables_hash */ + + for (var= first; var; var= var->next) + { + /* this fails if there is a conflicting variable name. see HASH_UNIQUE */ + if (my_hash_insert(&system_variable_hash, (uchar*) var)) + { + fprintf(stderr, "*** duplicate variable name '%s' ?\n", var->name.str); + goto error; + } + } + /* Update system_variable_hash version. */ + system_variable_hash_version++; + return 0; + +error: + for (; first != var; first= first->next) + my_hash_delete(&system_variable_hash, (uchar*) first); + return 1; +} + + +/* + Remove variables to the dynamic hash of system variables + + SYNOPSIS + mysql_del_sys_var_chain() + first Pointer to first system variable to remove + + RETURN VALUES + 0 SUCCESS + otherwise FAILURE +*/ + +int mysql_del_sys_var_chain(sys_var *first) +{ + int result= 0; + + mysql_prlock_wrlock(&LOCK_system_variables_hash); + for (sys_var *var= first; var; var= var->next) + result|= my_hash_delete(&system_variable_hash, (uchar*) var); + mysql_prlock_unlock(&LOCK_system_variables_hash); + + /* Update system_variable_hash version. */ + system_variable_hash_version++; + return result; +} + + +static int show_cmp(SHOW_VAR *a, SHOW_VAR *b) +{ + return strcmp(a->name, b->name); +} + + +/* + Number of records in the system_variable_hash. + Requires lock on LOCK_system_variables_hash. +*/ +ulong get_system_variable_hash_records(void) +{ + return system_variable_hash.records; +} + + +/** + Constructs an array of system variables for display to the user. + + @param thd current thread + @param sorted If TRUE, the system variables should be sorted + @param scope OPT_GLOBAL or OPT_SESSION for SHOW GLOBAL|SESSION VARIABLES + + @retval + pointer Array of SHOW_VAR elements for display + @retval + NULL FAILURE +*/ + +SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted, enum enum_var_type scope) +{ + int count= system_variable_hash.records, i; + int size= sizeof(SHOW_VAR) * (count + 1); + SHOW_VAR *result= (SHOW_VAR*) thd->alloc(size); + + if (result) + { + SHOW_VAR *show= result; + + for (i= 0; i < count; i++) + { + sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); + + // don't show session-only variables in SHOW GLOBAL VARIABLES + if (scope == OPT_GLOBAL && var->check_type(scope)) + continue; + + show->name= var->name.str; + show->value= (char*) var; + show->type= SHOW_SYS; + show++; + } + + /* sort into order */ + if (sorted) + my_qsort(result, show-result, sizeof(SHOW_VAR), + (qsort_cmp) show_cmp); + + /* make last element empty */ + bzero(show, sizeof(SHOW_VAR)); + } + return result; +} + +/** + Find a user set-table variable. + + @param str Name of system variable to find + @param length Length of variable. zero means that we should use strlen() + on the variable + + @retval + pointer pointer to variable definitions + @retval + 0 Unknown variable (error message is given) +*/ + +sys_var *intern_find_sys_var(const char *str, size_t length) +{ + sys_var *var; + + /* + This function is only called from the sql_plugin.cc. + A lock on LOCK_system_variable_hash should be held + */ + var= (sys_var*) my_hash_search(&system_variable_hash, + (uchar*) str, length ? length : strlen(str)); + + return var; +} + + +/** + Execute update of all variables. + + First run a check of all variables that all updates will go ok. + If yes, then execute all updates, returning an error if any one failed. + + This should ensure that in all normal cases none all or variables are + updated. + + @param THD Thread id + @param var_list List of variables to update + + @retval + 0 ok + @retval + 1 ERROR, message sent (normally no variables was updated) + @retval + -1 ERROR, message not sent +*/ + +int sql_set_variables(THD *thd, List *var_list, bool free) +{ + int error= 0; + bool was_error= thd->is_error(); + List_iterator_fast it(*var_list); + DBUG_ENTER("sql_set_variables"); + + set_var_base *var; + while ((var=it++)) + { + if (unlikely((error= var->check(thd)))) + goto err; + } + if (unlikely(was_error) || likely(!(error= MY_TEST(thd->is_error())))) + { + it.rewind(); + while ((var= it++)) + error|= var->update(thd); // Returns 0, -1 or 1 + } + +err: + if (free) + free_underlaid_joins(thd, thd->lex->first_select_lex()); + DBUG_RETURN(error); +} + +/***************************************************************************** + Functions to handle SET mysql_internal_variable=const_expr +*****************************************************************************/ + +bool sys_var::on_check_access_global(THD *thd) const +{ + return check_global_access(thd, PRIV_SET_GLOBAL_SYSTEM_VARIABLE); +} + +/** + Verify that the supplied value is correct. + + @param thd Thread handler + + @return status code + @retval -1 Failure + @retval 0 Success + */ + +int set_var::check(THD *thd) +{ + var->do_deprecated_warning(thd); + if (var->is_readonly()) + { + my_error(ER_INCORRECT_GLOBAL_LOCAL_VAR, MYF(0), var->name.str, "read only"); + return -1; + } + if (var->check_type(type)) + { + int err= type == OPT_GLOBAL ? ER_LOCAL_VARIABLE : ER_GLOBAL_VARIABLE; + my_error(err, MYF(0), var->name.str); + return -1; + } + if (type == OPT_GLOBAL && var->on_check_access_global(thd)) + return 1; + /* value is a NULL pointer if we are using SET ... = DEFAULT */ + if (!value) + return 0; + + if (value->fix_fields_if_needed_for_scalar(thd, &value)) + return -1; + if (var->check_update_type(value)) + { + my_error(ER_WRONG_TYPE_FOR_VAR, MYF(0), var->name.str); + return -1; + } + switch (type) { + case SHOW_OPT_DEFAULT: + case SHOW_OPT_SESSION: + DBUG_ASSERT(var->scope() != sys_var::GLOBAL); + if (var->on_check_access_session(thd)) + return -1; + break; + case SHOW_OPT_GLOBAL: // Checked earlier + break; + } + return var->check(thd, this) ? -1 : 0; +} + + +/** + Check variable, but without assigning value (used by PS). + + @param thd thread handler + + @retval + 0 ok + @retval + 1 ERROR, message sent (normally no variables was updated) + @retval + -1 ERROR, message not sent +*/ +int set_var::light_check(THD *thd) +{ + if (var->is_readonly()) + { + my_error(ER_INCORRECT_GLOBAL_LOCAL_VAR, MYF(0), var->name.str, "read only"); + return -1; + } + if (var->check_type(type)) + { + int err= type == OPT_GLOBAL ? ER_LOCAL_VARIABLE : ER_GLOBAL_VARIABLE; + my_error(err, MYF(0), var->name.str); + return -1; + } + + if (type == OPT_GLOBAL && var->on_check_access_global(thd)) + return 1; + + if (value && value->fix_fields_if_needed_for_scalar(thd, &value)) + return -1; + return 0; +} + +/** + Update variable + + @param thd thread handler + @returns 0|1 ok or ERROR + + @note ERROR can be only due to abnormal operations involving + the server's execution evironment such as + out of memory, hard disk failure or the computer blows up. + Consider set_var::check() method if there is a need to return + an error due to logics. +*/ + +int set_var::update(THD *thd) +{ + return value ? var->update(thd, this) : var->set_default(thd, this); +} + + +set_var::set_var(THD *thd, enum_var_type type_arg, sys_var *var_arg, + const LEX_CSTRING *base_name_arg, Item *value_arg) + :var(var_arg), type(type_arg), base(*base_name_arg) +{ + /* + If the set value is a field, change it to a string to allow things like + SET table_type=MYISAM; + */ + if (value_arg && value_arg->type() == Item::FIELD_ITEM) + { + Item_field *item= (Item_field*) value_arg; + // names are utf8 + if (!(value= new (thd->mem_root) Item_string_sys(thd, + item->field_name.str, + (uint)item->field_name.length))) + value=value_arg; /* Give error message later */ + } + else + value=value_arg; +} + + +/***************************************************************************** + Functions to handle SET @user_variable=const_expr +*****************************************************************************/ + +int set_var_user::check(THD *thd) +{ + /* + Item_func_set_user_var can't substitute something else on its place => + 0 can be passed as last argument (reference on item) + */ + return (user_var_item->fix_fields(thd, (Item**) 0) || + user_var_item->check(0)) ? -1 : 0; +} + + +/** + Check variable, but without assigning value (used by PS). + + @param thd thread handler + + @retval + 0 ok + @retval + 1 ERROR, message sent (normally no variables was updated) + @retval + -1 ERROR, message not sent +*/ +int set_var_user::light_check(THD *thd) +{ + /* + Item_func_set_user_var can't substitute something else on its place => + 0 can be passed as last argument (reference on item) + */ + return (user_var_item->fix_fields(thd, (Item**) 0)); +} + + +int set_var_user::update(THD *thd) +{ + if (user_var_item->update()) + { + /* Give an error if it's not given already */ + my_message(ER_SET_CONSTANTS_ONLY, ER_THD(thd, ER_SET_CONSTANTS_ONLY), + MYF(0)); + return -1; + } + + thd->session_tracker.state_change.mark_as_changed(thd); + return 0; +} + + +/***************************************************************************** + Functions to handle SET PASSWORD +*****************************************************************************/ + +int set_var_password::check(THD *thd) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + return check_change_password(thd, user); +#else + return 0; +#endif +} + +int set_var_password::update(THD *thd) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; + thd->m_reprepare_observer= 0; + int res= change_password(thd, user); + thd->m_reprepare_observer= save_reprepare_observer; + return res; +#else + return 0; +#endif +} + +/***************************************************************************** + Functions to handle SET ROLE +*****************************************************************************/ + +int set_var_role::check(THD *thd) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + int status= acl_check_setrole(thd, role.str, &access); + return status; +#else + return 0; +#endif +} + +int set_var_role::update(THD *thd) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + int res= acl_setrole(thd, role.str, access); + if (!res) + thd->session_tracker.state_change.mark_as_changed(thd); + return res; +#else + return 0; +#endif +} + +/***************************************************************************** + Functions to handle SET DEFAULT ROLE +*****************************************************************************/ + +int set_var_default_role::check(THD *thd) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + real_user= get_current_user(thd, user); + real_role= role.str; + if (role.str == current_role.str) + { + if (!thd->security_ctx->priv_role[0]) + real_role= "NONE"; + else + real_role= thd->security_ctx->priv_role; + } + + return acl_check_set_default_role(thd, real_user->host.str, + real_user->user.str, real_role); +#else + return 0; +#endif +} + +int set_var_default_role::update(THD *thd) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; + thd->m_reprepare_observer= 0; + int res= acl_set_default_role(thd, real_user->host.str, real_user->user.str, + real_role); + thd->m_reprepare_observer= save_reprepare_observer; + return res; +#else + return 0; +#endif +} + +/***************************************************************************** + Functions to handle SET NAMES and SET CHARACTER SET +*****************************************************************************/ + +int set_var_collation_client::check(THD *thd) +{ + /* Currently, UCS-2 cannot be used as a client character set */ + if (!is_supported_parser_charset(character_set_client)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "character_set_client", + character_set_client->cs_name.str); + return 1; + } + return 0; +} + +int set_var_collation_client::update(THD *thd) +{ + thd->update_charset(character_set_client, collation_connection, + character_set_results); + + /* Mark client collation variables as changed */ + thd->session_tracker.sysvars.mark_as_changed(thd, + Sys_character_set_client_ptr); + thd->session_tracker.sysvars.mark_as_changed(thd, + Sys_character_set_results_ptr); + thd->session_tracker.sysvars.mark_as_changed(thd, + Sys_character_set_connection_ptr); + thd->session_tracker.state_change.mark_as_changed(thd); + + thd->protocol_text.init(thd); + thd->protocol_binary.init(thd); + return 0; +} + +/***************************************************************************** + INFORMATION_SCHEMA.SYSTEM_VARIABLES +*****************************************************************************/ +static void store_value_ptr(Field *field, sys_var *var, String *str, + const uchar *value_ptr) +{ + field->set_notnull(); + str= var->val_str_nolock(str, field->table->in_use, value_ptr); + if (str) + field->store(str->ptr(), str->length(), str->charset()); +} + +static void store_var(Field *field, sys_var *var, enum_var_type scope, + String *str) +{ + if (var->check_type(scope)) + return; + + store_value_ptr(field, var, str, + var->value_ptr(field->table->in_use, scope, &null_clex_str)); +} + +int fill_sysvars(THD *thd, TABLE_LIST *tables, COND *cond) +{ + char name_buffer[NAME_CHAR_LEN]; + bool res= 1; + CHARSET_INFO *scs= system_charset_info; + StringBuffer strbuf(scs); + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : 0; + Field **fields=tables->table->field; + bool has_file_acl= !check_access(thd, FILE_ACL, any_db.str, NULL,NULL,0,1); + + DBUG_ASSERT(tables->table->in_use == thd); + + cond= make_cond_for_info_schema(thd, cond, tables); + mysql_prlock_rdlock(&LOCK_system_variables_hash); + + for (uint i= 0; i < system_variable_hash.records; i++) + { + sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); + + strmake_buf(name_buffer, var->name.str); + my_caseup_str(system_charset_info, name_buffer); + + /* this must be done before evaluating cond */ + restore_record(tables->table, s->default_values); + fields[0]->store(name_buffer, strlen(name_buffer), scs); + + if ((wild && wild_case_compare(system_charset_info, name_buffer, wild)) + || (cond && !cond->val_int())) + continue; + + mysql_mutex_lock(&LOCK_global_system_variables); + + // SESSION_VALUE + store_var(fields[1], var, OPT_SESSION, &strbuf); + + // GLOBAL_VALUE + store_var(fields[2], var, OPT_GLOBAL, &strbuf); + + // GLOBAL_VALUE_ORIGIN + static const LEX_CSTRING origins[]= + { + { STRING_WITH_LEN("CONFIG") }, + { STRING_WITH_LEN("COMMAND-LINE") }, + { STRING_WITH_LEN("AUTO") }, + { STRING_WITH_LEN("SQL") }, + { STRING_WITH_LEN("COMPILE-TIME") }, + { STRING_WITH_LEN("ENVIRONMENT") } + }; + const LEX_CSTRING *origin= origins + var->value_origin; + fields[3]->store(origin->str, origin->length, scs); + + // DEFAULT_VALUE + const uchar *def= var->is_readonly() && var->option.id < 0 + ? 0 : var->default_value_ptr(thd); + if (def) + store_value_ptr(fields[4], var, &strbuf, def); + + mysql_mutex_unlock(&LOCK_global_system_variables); + + // VARIABLE_SCOPE + static const LEX_CSTRING scopes[]= + { + { STRING_WITH_LEN("GLOBAL") }, + { STRING_WITH_LEN("SESSION") }, + { STRING_WITH_LEN("SESSION ONLY") } + }; + const LEX_CSTRING *scope= scopes + var->scope(); + fields[5]->store(scope->str, scope->length, scs); + + // VARIABLE_TYPE +#if SIZEOF_LONG == SIZEOF_INT +#define LONG_TYPE "INT" +#else +#define LONG_TYPE "BIGINT" +#endif + + static const LEX_CSTRING types[]= + { + { 0, 0 }, // unused 0 + { 0, 0 }, // GET_NO_ARG 1 + { STRING_WITH_LEN("BOOLEAN") }, // GET_BOOL 2 + { STRING_WITH_LEN("INT") }, // GET_INT 3 + { STRING_WITH_LEN("INT UNSIGNED") }, // GET_UINT 4 + { STRING_WITH_LEN(LONG_TYPE) }, // GET_LONG 5 + { STRING_WITH_LEN(LONG_TYPE " UNSIGNED") }, // GET_ULONG 6 + { STRING_WITH_LEN("BIGINT") }, // GET_LL 7 + { STRING_WITH_LEN("BIGINT UNSIGNED") }, // GET_ULL 8 + { STRING_WITH_LEN("VARCHAR") }, // GET_STR 9 + { STRING_WITH_LEN("VARCHAR") }, // GET_STR_ALLOC 10 + { 0, 0 }, // GET_DISABLED 11 + { STRING_WITH_LEN("ENUM") }, // GET_ENUM 12 + { STRING_WITH_LEN("SET") }, // GET_SET 13 + { STRING_WITH_LEN("DOUBLE") }, // GET_DOUBLE 14 + { STRING_WITH_LEN("FLAGSET") }, // GET_FLAGSET 15 + { STRING_WITH_LEN("BOOLEAN") }, // GET_BIT 16 + }; + const ulong vartype= (var->option.var_type & GET_TYPE_MASK); + const LEX_CSTRING *type= types + vartype; + fields[6]->store(type->str, type->length, scs); + + // VARIABLE_COMMENT + fields[7]->store(var->option.comment, strlen(var->option.comment), + scs); + + // NUMERIC_MIN_VALUE + // NUMERIC_MAX_VALUE + // NUMERIC_BLOCK_SIZE + bool is_unsigned= true; + switch (vartype) + { + case GET_INT: + case GET_LONG: + case GET_LL: + is_unsigned= false; + /* fall through */ + case GET_UINT: + case GET_ULONG: + case GET_ULL: + fields[8]->set_notnull(); + fields[9]->set_notnull(); + fields[10]->set_notnull(); + fields[8]->store(var->option.min_value, is_unsigned); + fields[9]->store(var->option.max_value, is_unsigned); + fields[10]->store(var->option.block_size, is_unsigned); + break; + case GET_DOUBLE: + fields[8]->set_notnull(); + fields[9]->set_notnull(); + fields[8]->store(getopt_ulonglong2double(var->option.min_value)); + fields[9]->store(getopt_ulonglong2double(var->option.max_value)); + } + + // ENUM_VALUE_LIST + TYPELIB *tl= var->option.typelib; + if (tl) + { + uint i; + strbuf.length(0); + for (i=0; i < tl->count; i++) + { + const char *name= tl->type_names[i]; + strbuf.append(name, strlen(name)); + strbuf.append(','); + } + if (!strbuf.is_empty()) + strbuf.chop(); + fields[11]->set_notnull(); + fields[11]->store(strbuf.ptr(), strbuf.length(), scs); + } + + // READ_ONLY + static const LEX_CSTRING yesno[]= + { + { STRING_WITH_LEN("NO") }, + { STRING_WITH_LEN("YES") } + }; + const LEX_CSTRING *yn = yesno + var->is_readonly(); + fields[12]->store(yn->str, yn->length, scs); + + // COMMAND_LINE_ARGUMENT + if (var->option.id >= 0) + { + static const LEX_CSTRING args[]= + { + { STRING_WITH_LEN("NONE") }, // NO_ARG + { STRING_WITH_LEN("OPTIONAL") }, // OPT_ARG + { STRING_WITH_LEN("REQUIRED") } // REQUIRED_ARG + }; + const LEX_CSTRING *arg= args + var->option.arg_type; + fields[13]->set_notnull(); + fields[13]->store(arg->str, arg->length, scs); + } + + // GLOBAL_VALUE_PATH + if (var->value_origin == sys_var::CONFIG && has_file_acl) + { + fields[14]->set_notnull(); + fields[14]->store(var->origin_filename, strlen(var->origin_filename), + files_charset_info); + } + + if (schema_table_store_record(thd, tables->table)) + goto end; + thd->get_stmt_da()->inc_current_row_for_warning(); + } + res= 0; +end: + mysql_prlock_unlock(&LOCK_system_variables_hash); + return res; +} + +/* + This is a simple and inefficient helper that sets sys_var::value_origin + for a specific sysvar. + It should *only* be used on server startup, if you need to do this later, + get yourself a pointer to your sysvar (see e.g. Sys_autocommit_ptr) + and update it directly. +*/ + +void set_sys_var_value_origin(void *ptr, enum sys_var::where here, + const char *filename) +{ + bool found __attribute__((unused))= false; + DBUG_ASSERT(!mysqld_server_started); // only to be used during startup + + for (uint i= 0; i < system_variable_hash.records; i++) + { + sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); + if (var->option.value == ptr) + { + found= true; + var->origin_filename= filename; + var->value_origin= here; + /* don't break early, search for all matches */ + } + } + + DBUG_ASSERT(found); // variable must have been found +} + +enum sys_var::where get_sys_var_value_origin(void *ptr) +{ + DBUG_ASSERT(!mysqld_server_started); // only to be used during startup + + for (uint i= 0; i < system_variable_hash.records; i++) + { + sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); + if (var->option.value == ptr) + { + return var->value_origin; //first match + } + } + + DBUG_ASSERT(0); // variable must have been found + return sys_var::CONFIG; +} + + +/* + Find the next item in string of comma-separated items. + END_POS points at the end of the string. + ITEM_START and ITEM_END return the limits of the next item. + Returns true while items are available, false at the end. +*/ +static bool +engine_list_next_item(const char **pos, const char *end_pos, + const char **item_start, const char **item_end) +{ + if (*pos >= end_pos) + return false; + *item_start= *pos; + while (*pos < end_pos && **pos != ',') + ++*pos; + *item_end= *pos; + ++*pos; + return true; +} + + +static bool +resolve_engine_list_item(THD *thd, plugin_ref *list, uint32 *idx, + const char *pos, const char *pos_end, + bool error_on_unknown_engine, bool temp_copy) +{ + LEX_CSTRING item_str; + plugin_ref ref; + uint32 i; + THD *thd_or_null = (temp_copy ? thd : NULL); + + item_str.str= pos; + item_str.length= pos_end-pos; + ref= ha_resolve_by_name(thd_or_null, &item_str, false); + if (!ref) + { + if (error_on_unknown_engine) + { + ErrConvString err(pos, pos_end-pos, system_charset_info); + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), err.ptr()); + return true; + } + return false; + } + /* Ignore duplicates, like --plugin-load does. */ + for (i= 0; i < *idx; ++i) + { + if (plugin_hton(list[i]) == plugin_hton(ref)) + { + if (!temp_copy) + plugin_unlock(NULL, ref); + return false; + } + } + list[*idx]= ref; + ++*idx; + return false; +} + + +/* + Helper for class Sys_var_pluginlist. + Resolve a comma-separated list of storage engine names to a null-terminated + array of plugin_ref. + + If TEMP_COPY is true, a THD must be given as well. In this case, the + allocated memory and locked plugins are registered in the THD and will + be freed / unlocked automatically. If TEMP_COPY is true, THD can be + passed as NULL, and resources must be freed explicitly later with + free_engine_list(). +*/ +plugin_ref * +resolve_engine_list(THD *thd, const char *str_arg, size_t str_arg_len, + bool error_on_unknown_engine, bool temp_copy) +{ + uint32 count, idx; + const char *pos, *item_start, *item_end; + const char *str_arg_end= str_arg + str_arg_len; + plugin_ref *res; + + count= 0; + pos= str_arg; + for (;;) + { + if (!engine_list_next_item(&pos, str_arg_end, &item_start, &item_end)) + break; + ++count; + } + + if (temp_copy) + res= (plugin_ref *)thd->calloc((count+1)*sizeof(*res)); + else + res= (plugin_ref *)my_malloc(PSI_INSTRUMENT_ME, (count+1)*sizeof(*res), MYF(MY_ZEROFILL|MY_WME)); + if (!res) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)((count+1)*sizeof(*res))); + goto err; + } + + idx= 0; + pos= str_arg; + for (;;) + { + if (!engine_list_next_item(&pos, str_arg_end, &item_start, &item_end)) + break; + DBUG_ASSERT(idx < count); + if (idx >= count) + break; + if (resolve_engine_list_item(thd, res, &idx, item_start, item_end, + error_on_unknown_engine, temp_copy)) + goto err; + } + + return res; + +err: + if (!temp_copy) + free_engine_list(res); + return NULL; +} + + +void +free_engine_list(plugin_ref *list) +{ + plugin_ref *p; + + if (!list) + return; + for (p= list; *p; ++p) + plugin_unlock(NULL, *p); + my_free(list); +} + + +plugin_ref * +copy_engine_list(plugin_ref *list) +{ + plugin_ref *p; + uint32 count, i; + + for (p= list, count= 0; *p; ++p, ++count) + ; + p= (plugin_ref *)my_malloc(PSI_INSTRUMENT_ME, (count+1)*sizeof(*p), MYF(0)); + if (!p) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)((count+1)*sizeof(*p))); + return NULL; + } + for (i= 0; i < count; ++i) + p[i]= my_plugin_lock(NULL, list[i]); + p[i] = NULL; + return p; +} + + +/* + Create a temporary copy of an engine list. The memory will be freed + (and the plugins unlocked) automatically, on the passed THD. +*/ +plugin_ref * +temp_copy_engine_list(THD *thd, plugin_ref *list) +{ + plugin_ref *p; + uint32 count, i; + + for (p= list, count= 0; *p; ++p, ++count) + ; + p= (plugin_ref *)thd->alloc((count+1)*sizeof(*p)); + if (!p) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int)((count+1)*sizeof(*p))); + return NULL; + } + for (i= 0; i < count; ++i) + p[i]= my_plugin_lock(thd, list[i]); + p[i] = NULL; + return p; +} + + +char * +pretty_print_engine_list(THD *thd, plugin_ref *list) +{ + plugin_ref *p; + size_t size; + char *buf, *pos; + + if (!list || !*list) + return thd->strmake("", 0); + + size= 0; + for (p= list; *p; ++p) + size+= plugin_name(*p)->length + 1; + buf= static_cast(thd->alloc(size)); + if (!buf) + return NULL; + pos= buf; + for (p= list; *p; ++p) + { + LEX_CSTRING *name; + size_t remain; + + remain= buf + size - pos; + DBUG_ASSERT(remain > 0); + if (remain <= 1) + break; + if (pos != buf) + { + pos= strmake(pos, ",", remain-1); + --remain; + } + name= plugin_name(*p); + pos= strmake(pos, name->str, MY_MIN(name->length, remain-1)); + } + *pos= '\0'; + return buf; +} + +/* + Current version of the system_variable_hash. + Requires lock on LOCK_system_variables_hash. +*/ +ulonglong get_system_variable_hash_version(void) +{ + return system_variable_hash_version; +} + diff --git a/sql/set_var.h b/sql/set_var.h new file mode 100644 index 00000000..ce1d01b9 --- /dev/null +++ b/sql/set_var.h @@ -0,0 +1,489 @@ +#ifndef SET_VAR_INCLUDED +#define SET_VAR_INCLUDED +/* Copyright (c) 2002, 2013, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + "public" interface to sys_var - server configuration variables. +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include + +class sys_var; +class set_var; +class sys_var_pluginvar; +class PolyLock; +class Item_func_set_user_var; + +// This include needs to be here since item.h requires enum_var_type :-P +#include "item.h" /* Item */ +#include "sql_class.h" /* THD */ + +extern TYPELIB bool_typelib; + +struct sys_var_chain +{ + sys_var *first; + sys_var *last; +}; + +int mysql_add_sys_var_chain(sys_var *chain); +int mysql_del_sys_var_chain(sys_var *chain); + + +/** + A class representing one system variable - that is something + that can be accessed as @@global.variable_name or @@session.variable_name, + visible in SHOW xxx VARIABLES and in INFORMATION_SCHEMA.xxx_VARIABLES, + optionally it can be assigned to, optionally it can have a command-line + counterpart with the same name. +*/ +class sys_var: protected Value_source // for double_from_string_with_check +{ +public: + sys_var *next; + LEX_CSTRING name; + bool *test_load; + enum flag_enum { GLOBAL, SESSION, ONLY_SESSION, SCOPE_MASK=1023, + READONLY=1024, ALLOCATED=2048, PARSE_EARLY=4096, + NO_SET_STATEMENT=8192, AUTO_SET=16384}; + enum { NO_GETOPT=-1, GETOPT_ONLY_HELP=-2 }; + enum where { CONFIG, COMMAND_LINE, AUTO, SQL, COMPILE_TIME, ENV }; + + /** + Enumeration type to indicate for a system variable whether + it will be written to the binlog or not. + */ + enum binlog_status_enum { VARIABLE_NOT_IN_BINLOG, + SESSION_VARIABLE_IN_BINLOG } binlog_status; + + my_option option; ///< min, max, default values are stored here + enum where value_origin; + const char *origin_filename; + +protected: + typedef bool (*on_check_function)(sys_var *self, THD *thd, set_var *var); + typedef bool (*on_update_function)(sys_var *self, THD *thd, enum_var_type type); + + int flags; ///< or'ed flag_enum values + const SHOW_TYPE show_val_type; ///< what value_ptr() returns for sql_show.cc + PolyLock *guard; ///< *second* lock that protects the variable + ptrdiff_t offset; ///< offset to the value from global_system_variables + on_check_function on_check; + on_update_function on_update; + const char *const deprecation_substitute; + +public: + sys_var(sys_var_chain *chain, const char *name_arg, const char *comment, + int flag_args, ptrdiff_t off, int getopt_id, + enum get_opt_arg_type getopt_arg_type, SHOW_TYPE show_val_type_arg, + longlong def_val, PolyLock *lock, enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func, on_update_function on_update_func, + const char *substitute); + + virtual ~sys_var() = default; + + /** + All the cleanup procedures should be performed here + */ + virtual void cleanup() {} + /** + downcast for sys_var_pluginvar. Returns this if it's an instance + of sys_var_pluginvar, and 0 otherwise. + */ + virtual sys_var_pluginvar *cast_pluginvar() { return 0; } + + bool check(THD *thd, set_var *var); + const uchar *value_ptr(THD *thd, enum_var_type type, const LEX_CSTRING *base) const; + + /** + Update the system variable with the default value from either + session or global scope. The default value is stored in the + 'var' argument. Return false when successful. + */ + bool set_default(THD *thd, set_var *var); + bool update(THD *thd, set_var *var); + + String *val_str_nolock(String *str, THD *thd, const uchar *value); + longlong val_int(bool *is_null, THD *thd, enum_var_type type, const LEX_CSTRING *base); + String *val_str(String *str, THD *thd, enum_var_type type, const LEX_CSTRING *base); + double val_real(bool *is_null, THD *thd, enum_var_type type, const LEX_CSTRING *base); + + SHOW_TYPE show_type() const { return show_val_type; } + int scope() const { return flags & SCOPE_MASK; } + virtual CHARSET_INFO *charset(THD *thd) const + { + return system_charset_info; + } + bool is_readonly() const { return flags & READONLY; } + /** + the following is only true for keycache variables, + that support the syntax @@keycache_name.variable_name + */ + bool is_struct() { return option.var_type & GET_ASK_ADDR; } + bool is_set_stmt_ok() const { return !(flags & NO_SET_STATEMENT); } + bool is_written_to_binlog(enum_var_type type) + { return type != OPT_GLOBAL && binlog_status == SESSION_VARIABLE_IN_BINLOG; } + bool check_update_type(const Item *item) + { + Item_result type= item->result_type(); + switch (option.var_type & GET_TYPE_MASK) { + case GET_INT: + case GET_UINT: + case GET_LONG: + case GET_ULONG: + case GET_LL: + case GET_ULL: + return type != INT_RESULT && + (type != DECIMAL_RESULT || item->decimals != 0); + case GET_STR: + case GET_STR_ALLOC: + return type != STRING_RESULT; + case GET_ENUM: + case GET_BOOL: + case GET_SET: + case GET_FLAGSET: + case GET_BIT: + return type != STRING_RESULT && type != INT_RESULT; + case GET_DOUBLE: + return type != INT_RESULT && type != REAL_RESULT && type != DECIMAL_RESULT; + default: + return true; + } + } + + bool check_type(enum_var_type type) + { + switch (scope()) + { + case GLOBAL: return type != OPT_GLOBAL; + case SESSION: return false; // always ok + case ONLY_SESSION: return type == OPT_GLOBAL; + } + return true; // keep gcc happy + } + bool register_option(DYNAMIC_ARRAY *array, int parse_flags) + { + DBUG_ASSERT(parse_flags == GETOPT_ONLY_HELP || + parse_flags == PARSE_EARLY || parse_flags == 0); + if (option.id == NO_GETOPT) + return 0; + if (parse_flags == GETOPT_ONLY_HELP) + { + if (option.id != GETOPT_ONLY_HELP) + return 0; + } + else + { + if (option.id == GETOPT_ONLY_HELP) + return 0; + if ((flags & PARSE_EARLY) != parse_flags) + return 0; + } + return insert_dynamic(array, (uchar*)&option); + } + void do_deprecated_warning(THD *thd); + /** + whether session value of a sysvar is a default one. + + in this simple implementation we don't distinguish between default + and non-default values. for most variables it's ok, they don't treat + default values specially. this method is overwritten in descendant + classes as necessary. + */ + virtual bool session_is_default(THD *thd) { return false; } + + virtual const uchar *default_value_ptr(THD *thd) const + { return (uchar*)&option.def_value; } + + virtual bool on_check_access_global(THD *thd) const; + virtual bool on_check_access_session(THD *thd) const + { + return false; + } + +private: + virtual bool do_check(THD *thd, set_var *var) = 0; + /** + save the session default value of the variable in var + */ + virtual void session_save_default(THD *thd, set_var *var) = 0; + /** + save the global default value of the variable in var + */ + virtual void global_save_default(THD *thd, set_var *var) = 0; + virtual bool session_update(THD *thd, set_var *var) = 0; + virtual bool global_update(THD *thd, set_var *var) = 0; + +protected: + /** + A pointer to a value of the variable for SHOW. + It must be of show_val_type type (my_bool for SHOW_MY_BOOL, + int for SHOW_INT, longlong for SHOW_LONGLONG, etc). + */ + virtual const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const; + virtual const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const; + + /** + A pointer to a storage area of the variable, to the raw data. + Typically it's the same as session_value_ptr(), but it's different, + for example, for ENUM, that is printed as a string, but stored as a number. + */ + uchar *session_var_ptr(THD *thd) const + { return ((uchar*)&(thd->variables)) + offset; } + + uchar *global_var_ptr() const + { return ((uchar*)&global_system_variables) + offset; } + + void *max_var_ptr() + { + return scope() == SESSION ? (((uchar*)&max_system_variables) + offset) : + 0; + } + + friend class Session_sysvars_tracker; + friend class Session_tracker; +}; + +#include "sql_plugin.h" /* SHOW_HA_ROWS, SHOW_MY_BOOL */ + + +/**************************************************************************** + Classes for parsing of the SET command +****************************************************************************/ + +/** + A base class for everything that can be set with SET command. + It's similar to Items, an instance of this is created by the parser + for every assigmnent in SET (or elsewhere, e.g. in SELECT). +*/ +class set_var_base :public Sql_alloc +{ +public: + set_var_base() = default; + virtual ~set_var_base() = default; + virtual int check(THD *thd)=0; /* To check privileges etc. */ + virtual int update(THD *thd)=0; /* To set the value */ + virtual int light_check(THD *thd) { return check(thd); } /* for PS */ + virtual bool is_system() { return FALSE; } + /** + @returns whether this variable is @@@@optimizer_trace. + */ + virtual bool is_var_optimizer_trace() const { return false; } +}; + + +/** + Structure for holding unix timestamp and high precision second part. + */ +typedef struct my_time_t_hires +{ + my_time_t unix_time; + ulong second_part; +} my_time_t_hires; + + +/** + set_var_base descendant for assignments to the system variables. +*/ +class set_var :public set_var_base +{ +public: + sys_var *var; ///< system variable to be updated + Item *value; ///< the expression that provides the new value of the variable + enum_var_type type; + union ///< temp storage to hold a value between sys_var::check and ::update + { + ulonglong ulonglong_value; ///< for unsigned integer, set, enum sysvars + longlong longlong_value; ///< for signed integer + double double_value; ///< for Sys_var_double + plugin_ref plugin; ///< for Sys_var_plugin + plugin_ref *plugins; ///< for Sys_var_pluginlist + Time_zone *time_zone; ///< for Sys_var_tz + LEX_STRING string_value; ///< for Sys_var_charptr and others + my_time_t_hires timestamp; ///< for Sys_var_vers_asof + const void *ptr; ///< for Sys_var_struct + } save_result; + LEX_CSTRING base; /**< for structured variables, like keycache_name.variable_name */ + + set_var(THD *thd, enum_var_type type_arg, sys_var *var_arg, + const LEX_CSTRING *base_name_arg, Item *value_arg); + virtual bool is_system() { return 1; } + int check(THD *thd); + int update(THD *thd); + int light_check(THD *thd); + virtual bool is_var_optimizer_trace() const + { + extern sys_var *Sys_optimizer_trace_ptr; + return var == Sys_optimizer_trace_ptr; + } +}; + + +/* User variables like @my_own_variable */ +class set_var_user: public set_var_base +{ + Item_func_set_user_var *user_var_item; +public: + set_var_user(Item_func_set_user_var *item) + :user_var_item(item) + {} + int check(THD *thd); + int update(THD *thd); + int light_check(THD *thd); +}; + +/* For SET PASSWORD */ + +class set_var_password: public set_var_base +{ + LEX_USER *user; +public: + set_var_password(LEX_USER *user_arg) :user(user_arg) + {} + int check(THD *thd); + int update(THD *thd); +}; + +/* For SET ROLE */ + +class set_var_role: public set_var_base +{ + LEX_CSTRING role; + privilege_t access; +public: + set_var_role(LEX_CSTRING role_arg) : role(role_arg), access(NO_ACL) {} + int check(THD *thd); + int update(THD *thd); +}; + +/* For SET DEFAULT ROLE */ + +class set_var_default_role: public set_var_base +{ + LEX_USER *user, *real_user; + LEX_CSTRING role; + const char *real_role; +public: + set_var_default_role(LEX_USER *user_arg, LEX_CSTRING role_arg) : + user(user_arg), role(role_arg) {} + int check(THD *thd); + int update(THD *thd); +}; + +/* For SET NAMES and SET CHARACTER SET */ + +class set_var_collation_client: public set_var_base +{ + CHARSET_INFO *character_set_client; + CHARSET_INFO *character_set_results; + CHARSET_INFO *collation_connection; +public: + set_var_collation_client(CHARSET_INFO *client_coll_arg, + CHARSET_INFO *connection_coll_arg, + CHARSET_INFO *result_coll_arg) + :character_set_client(client_coll_arg), + character_set_results(result_coll_arg), + collation_connection(connection_coll_arg) + {} + int check(THD *thd); + int update(THD *thd); +}; + + +/* optional things, have_* variables */ +extern SHOW_COMP_OPTION have_csv, have_innodb; +extern SHOW_COMP_OPTION have_ndbcluster, have_partitioning; +extern SHOW_COMP_OPTION have_profiling; + +extern SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen; +extern SHOW_COMP_OPTION have_query_cache; +extern SHOW_COMP_OPTION have_geometry, have_rtree_keys; +extern SHOW_COMP_OPTION have_crypt; +extern SHOW_COMP_OPTION have_compress; +extern SHOW_COMP_OPTION have_openssl; + +/* + Prototypes for helper functions +*/ +ulong get_system_variable_hash_records(void); +ulonglong get_system_variable_hash_version(void); + +SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted, enum enum_var_type type); +int fill_sysvars(THD *thd, TABLE_LIST *tables, COND *cond); + +sys_var *find_sys_var(THD *thd, const char *str, size_t length= 0, + bool throw_error= false); +int sql_set_variables(THD *thd, List *var_list, bool free); + +#define SYSVAR_AUTOSIZE(VAR,VAL) \ + do { \ + VAR= (VAL); \ + set_sys_var_value_origin(&VAR, sys_var::AUTO); \ + } while(0) + +#define SYSVAR_AUTOSIZE_IF_CHANGED(VAR,VAL,TYPE) \ + do { \ + TYPE tmp= (VAL); \ + if (VAR != tmp) \ + { \ + VAR= (VAL); \ + set_sys_var_value_origin(&VAR, sys_var::AUTO); \ + } \ + } while(0) + +void set_sys_var_value_origin(void *ptr, enum sys_var::where here, + const char *filename= NULL); + +enum sys_var::where get_sys_var_value_origin(void *ptr); +inline bool IS_SYSVAR_AUTOSIZE(void *ptr) +{ + enum sys_var::where res= get_sys_var_value_origin(ptr); + return (res == sys_var::AUTO || res == sys_var::COMPILE_TIME); +} + +bool fix_delay_key_write(sys_var *self, THD *thd, enum_var_type type); + +sql_mode_t expand_sql_mode(sql_mode_t sql_mode); +const char *sql_mode_string_representation(uint bit_number); +bool sql_mode_string_representation(THD *thd, sql_mode_t sql_mode, + LEX_CSTRING *ls); +int default_regex_flags_pcre(THD *thd); + +extern sys_var *Sys_autocommit_ptr, *Sys_last_gtid_ptr, + *Sys_character_set_client_ptr, *Sys_character_set_connection_ptr, + *Sys_character_set_results_ptr; + +CHARSET_INFO *get_old_charset_by_name(const char *old_name); + +int sys_var_init(); +uint sys_var_elements(); +int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags); +void sys_var_end(void); +bool check_has_super(sys_var *self, THD *thd, set_var *var); +plugin_ref *resolve_engine_list(THD *thd, const char *str_arg, size_t str_arg_len, + bool error_on_unknown_engine, bool temp_copy); +void free_engine_list(plugin_ref *list); +plugin_ref *copy_engine_list(plugin_ref *list); +plugin_ref *temp_copy_engine_list(THD *thd, plugin_ref *list); +char *pretty_print_engine_list(THD *thd, plugin_ref *list); + +#endif diff --git a/sql/share/CMakeLists.txt b/sql/share/CMakeLists.txt new file mode 100644 index 00000000..33d0bea2 --- /dev/null +++ b/sql/share/CMakeLists.txt @@ -0,0 +1,61 @@ +# Copyright (c) 2006 MySQL AB, 2009, 2010 Sun Microsystems, Inc. +# Use is subject to license terms. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA + +SET (dirs +bulgarian +chinese +czech +danish +dutch +english +estonian +french +german +georgian +greek +hindi +hungarian +italian +japanese +korean +norwegian-ny +norwegian +polish +portuguese +romanian +russian +serbian +slovak +spanish +swedish +ukrainian +) + +SET(files + errmsg-utf8.txt +) + +INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/charsets DESTINATION ${INSTALL_MYSQLSHAREDIR} + COMPONENT Common PATTERN "languages.html" EXCLUDE +) + +IF (NOT WITHOUT_SERVER) + FOREACH (dir ${dirs}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${dir} + DESTINATION ${INSTALL_MYSQLSHAREDIR} COMPONENT Server) + ENDFOREACH() + INSTALL(FILES ${files} DESTINATION ${INSTALL_MYSQLSHAREDIR} COMPONENT Server) +ENDIF() diff --git a/sql/share/charsets/Index.xml b/sql/share/charsets/Index.xml new file mode 100644 index 00000000..cec3bfcf --- /dev/null +++ b/sql/share/charsets/Index.xml @@ -0,0 +1,736 @@ + + + + + + Copyright (c) 2003-2005 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + +This file lists all of the available character sets. +To make maintaining easier please: + - keep records sorted by collation number. + - change charsets.max-id when adding a new collation. + + + + Traditional Chinese + Big5 Traditional Chinese + big-5 + bigfive + big-five + cn-big5 + csbig5 + + primary + compiled + + + binary + compiled + + + + + Central European + ISO 8859-2 Central European + csisolatin2 + iso-8859-2 + iso-ir-101 + iso_8859-2 + iso_8859-2:1987 + l2 + + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + + + + + + + + + + Western + DEC West European + + + Dutch + English + French + German Duden + Italian + Latin + Portuguese + Spanish + + + + + + + + + + + Western + DOS West European + 850 + cspc850multilingual + ibm850 + + Dutch + English + French + German Duden + Italian + Latin + Portuguese + Spanish + + + + + + + + + + + + Western + cp1252 West European + csisolatin1 + iso-8859-1 + iso-ir-100 + iso_8859-1 + iso_8859-1:1987 + l1 + latin1 + + + primary + compiled + + + + + + binary + compiled + + + Dutch + English + French + German Duden + Italian + Latin + Portuguese + Spanish + + + Dutch + English + French + German Duden + Italian + Latin + Portuguese + Spanish + + + + + Western + HP West European + hproman8 + + + Dutch + English + French + German Duden + Italian + Latin + Portuguese + Spanish + + + + + + + + + + + Cyrillic + KOI8-R Relcom Russian + koi8-r + cskoi8r + + + + + + + + + + + + Western + 7bit Swedish + iso-646-se + + + + + + + + + + + + Western + US ASCII + us + us-ascii + csascii + iso-ir-6 + iso646-us + + + + + + + + + + + + Japanese + EUC-JP Japanese + euc-jp + + primary + compiled + + + binary + compiled + + + + + Japanese + Shift-JIS Japanese + s-jis + shift-jis + x-sjis + + primary + compiled + + + binary + compiled + + + + + Cyrillic + Windows Cyrillic + windows-1251 + ms-cyr + ms-cyrillic + + Belarusian + Bulgarian + Macedonian + Russian + Serbian + Mongolian + Ukrainian + + + + + Belarusian + Bulgarian + Macedonian + Russian + Serbian + Mongolian + Ukrainian + + + Belarusian + Bulgarian + Macedonian + Russian + Serbian + Mongolian + Ukrainian + + + + + + + + + + + Hebrew + ISO 8859-8 Hebrew + csisolatinhebrew + iso-8859-8 + iso-ir-138 + + + + + + + + + + + + Thai + TIS620 Thai + tis-620 + + primary + compiled + + + binary + compiled + + + + + Korean + EUC-KR Korean + euc_kr + euc-kr + + primary + compiled + + + binary + compiled + + + + + Baltic + ISO 8859-13 Baltic + BalticRim + iso-8859-13 + l7 + + Estonian + + + Latvian + Lithuanian + primary + + + Latvian + Lithuanian + + + + + + + + + + + + Cyrillic + KOI8-U Ukrainian + koi8-u + + + + + + + + + + + + Simplified Chinese + GB2312 Simplified Chinese + chinese + iso-ir-58 + + primary + compiled + + + binary + compiled + + + + + Greek + ISO 8859-7 Greek + csisolatingreek + ecma-118 + greek8 + iso-8859-7 + iso-ir-126 + + + + + + + + + + + + Central European + Windows Central European + ms-ce + windows-1250 + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + Croatian + + + Polish + + + compiled + + + + + + + + + + + + East Asian + GBK Simplified Chinese + cp936 + + primary + compiled + + + binary + compiled + + + + + Baltic + Windows Baltic + WinBaltRim + windows-1257 + + + + Latvian + Lithuanian + + + + + + + + + + + + + South Asian + ISO 8859-9 Turkish + csisolatin5 + iso-8859-9 + iso-ir-148 + l5 + latin5 + turkish + + + + + + + + + + + + South Asian + ARMSCII-8 Armenian + armscii-8 + + + + + + + + + + + + Unicode + UTF-8 Unicode + utf-8 + + primary + compiled + + + binary + compiled + + + + + Unicode + UCS-2 Unicode + + primary + compiled + + + binary + compiled + + + + + Cyrillic + DOS Russian + 866 + csibm866 + ibm866 + DOSCyrillicRussian + + + + + + + + + + + + Central European + DOS Kamenicky Czech-Slovak + + + + + + + + + + + + Central European + Mac Central European + MacCentralEurope + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + + + + + + + + Western + Mac West European + Mac + Macintosh + csmacintosh + + Dutch + English + French + German Duden + Italian + Latin + Portuguese + Spanish + + + + + + + + + + + + + + + Central European + DOS Central European + 852 + cp852 + ibm852 + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + + + + + + + + Arabic + Windows Arabic + ms-arab + windows-1256 + + + Arabic + Persian + Pakistani + Urdu + + + + + + + + + + + South Asian + GEOSTD8 Georgian + + + + + + + + + + + + Binary pseudo charset + + primary + compiled + + + + + Japanese + SJIS for Windows Japanese + ms_cp932 + sjis_cp932 + sjis_ms + + primary + compiled + + + binary + compiled + + + + + Japanese + UJIS for Windows Japanese + eucjpms + eucJP_ms + ujis_ms + ujis_cp932 + + primary + compiled + + + binary + compiled + + + + diff --git a/sql/share/charsets/README b/sql/share/charsets/README new file mode 100644 index 00000000..3c5b3206 --- /dev/null +++ b/sql/share/charsets/README @@ -0,0 +1,39 @@ +This directory holds configuration files that enable MySQL to work with +different character sets. It contains: + +charset_name.xml + Each charset_name.xml file contains information for a simple character + set. The information in the file describes character types, + lower- and upper-case equivalencies and sorting orders for the + character values in the set. + +Index.xml + The Index.xml file lists all of the available charset configurations, + including collations. + + Each collation must have a unique number. The number is stored + IN THE DATABASE TABLE FILES and must not be changed. + + The max-id attribute of the element must be set to + the largest collation number. + +Compiled in or configuration file? + When should a character set be compiled in to MySQL's string library + (libmystrings), and when should it be placed in a charset_name.xml + configuration file? + + If the character set requires the strcoll functions or is a + multi-byte character set, it MUST be compiled in to the string + library. If it does not require these functions, it should be + placed in a charset_name.xml configuration file. + + If the character set uses any one of the strcoll functions, it + must define all of them. Likewise, if the set uses one of the + multi-byte functions, it must define them all. See the manual for + more information on how to add a complex character set to MySQL. + +Syntax of configuration files + The syntax is very simple. Words in array elements are + separated by arbitrary amounts of whitespace. Each word must be a + number in hexadecimal format. The ctype array has 257 words; the + other arrays (lower, upper, etc.) take up 256 words each after that. diff --git a/sql/share/charsets/armscii8.xml b/sql/share/charsets/armscii8.xml new file mode 100644 index 00000000..161d469c --- /dev/null +++ b/sql/share/charsets/armscii8.xml @@ -0,0 +1,139 @@ + + + + + + Copyright (c) 2003, 2004 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 01 02 01 02 01 02 01 02 01 02 01 02 01 02 + 01 02 01 02 01 02 01 02 01 02 01 02 01 02 01 02 + 01 02 01 02 01 02 01 02 01 02 01 02 01 02 01 02 + 01 02 01 02 01 02 01 02 01 02 01 02 01 02 01 02 + 01 02 01 02 01 02 01 02 01 02 01 02 01 02 10 10 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 B8 A9 AA AB AC AD AE AF + B0 B1 B3 B3 B5 B5 B7 B7 B9 B9 BB BB BD BD BF BF + C1 C1 C3 C3 C5 C5 C7 C7 C9 C9 CB CB CD CD CF CF + D1 D1 D3 D3 D5 D5 D7 D7 D9 D9 DB DB DD DD DF DF + E1 E1 E3 E3 E5 E5 E7 E7 E9 E9 EB EB ED ED EF EF + F1 F1 F3 F3 F5 F5 F7 F7 F9 F9 FB FB FD FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B2 B4 B4 B6 B6 B8 B8 BA BA BC BC BE BE + C0 C0 C2 C2 C4 C4 C6 C6 C8 C8 CA CA CC CC CE CE + D0 D0 D2 D2 D4 D4 D6 D6 D8 D8 DA DA DC DC DE DE + E0 E0 E2 E2 E4 E4 E6 E6 E8 E8 EA EA EC EC EE EE + F0 F0 F2 F2 F4 F4 F6 F6 F8 F8 FA FA FC FC FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F +0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F +00A0 2741 00A7 0589 0029 0028 00BB 00AB 2014 002E 055D 002C 002D 055F 2026 055C +055B 055E 0531 0561 0532 0562 0533 0563 0534 0564 0535 0565 0536 0566 0537 0567 +0538 0568 0539 0569 053A 056A 053B 056B 053C 056C 053D 056D 053E 056E 053F 056F +0540 0570 0541 0571 0542 0572 0543 0573 0544 0574 0545 0575 0546 0576 0547 0577 +0548 0578 0549 0579 054A 057A 054B 057B 054C 057C 054D 057D 054E 057E 054F 057F +0550 0580 0551 0581 0552 0582 0553 0583 0554 0584 0555 0585 0556 0586 2019 0027 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + + diff --git a/sql/share/charsets/ascii.xml b/sql/share/charsets/ascii.xml new file mode 100644 index 00000000..0f90f498 --- /dev/null +++ b/sql/share/charsets/ascii.xml @@ -0,0 +1,139 @@ + + + + + + Copyright (c) 2003, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + + diff --git a/sql/share/charsets/cp1250.xml b/sql/share/charsets/cp1250.xml new file mode 100644 index 00000000..55379ef6 --- /dev/null +++ b/sql/share/charsets/cp1250.xml @@ -0,0 +1,183 @@ + + + + + + Copyright (c) 2003, 2005 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 20 20 10 20 10 10 10 10 20 10 01 10 01 01 01 01 + 20 10 10 10 10 10 10 10 20 10 02 10 02 02 02 02 + 48 10 10 01 10 01 10 01 10 10 01 10 10 10 10 01 + 10 10 10 02 10 10 10 10 10 02 02 10 01 10 02 02 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 10 01 01 01 01 01 01 01 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 10 02 02 02 02 02 02 02 10 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 9A 8B 9C 9D 9E 9F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 B3 A4 B9 A6 A7 A8 A9 BA AB AC AD AE BF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BE BD BE BF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC FD FE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 8A 9B 8C 8D 8E 8F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 A3 B4 B5 B6 B7 B8 A5 AA BB BC BD BC AF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +20AC 0000 201A 0000 201E 2026 2020 2021 0000 2030 0160 2039 015A 0164 017D 0179 +0000 2018 2019 201C 201D 2022 2013 2014 0000 2122 0161 203A 015B 0165 017E 017A +00A0 02C7 02D8 0141 00A4 0104 00A6 00A7 00A8 00A9 015E 00AB 00AC 00AD 00AE 017B +00B0 00B1 02DB 0142 00B4 00B5 00B6 00B7 00B8 0105 015F 00BB 013D 02DD 013E 017C +0154 00C1 00C2 0102 00C4 0139 0106 00C7 010C 00C9 0118 00CB 011A 00CD 00CE 010E +0110 0143 0147 00D3 00D4 0150 00D6 00D7 0158 016E 00DA 0170 00DC 00DD 0162 00DF +0155 00E1 00E2 0103 00E4 013A 0107 00E7 010D 00E9 0119 00EB 011B 00ED 00EE 010F +0111 0144 0148 00F3 00F4 0151 00F6 00F7 0159 016F 00FA 0171 00FC 00FD 0163 02D9 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 46 49 4A 4B 4C 4D 4E 4F 50 52 53 55 + 56 57 58 59 5B 5C 5D 5E 5F 60 61 63 64 65 66 67 + 68 41 42 43 46 49 4A 4B 4C 4D 4E 4F 50 52 53 55 + 56 57 58 59 5B 5C 5D 5E 5F 60 61 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 5A 8B 5A 5B 62 62 + 90 91 92 93 94 95 96 97 98 99 5A 9B 5A 5B 62 62 + 20 A1 A2 50 A4 41 A6 59 A8 A9 59 AB AC AD AE 62 + B0 B1 B2 50 B4 B5 B6 B7 B8 41 59 BB 50 BD 50 62 + 58 41 41 41 41 50 45 43 44 49 49 49 49 4D 4D 46 + 47 53 53 55 55 55 55 D7 58 5C 5C 5C 5C 60 5B 59 + 58 41 41 41 41 50 45 43 44 49 49 49 49 4D 4D 46 + 47 53 53 55 55 55 55 F7 58 5C 5C 5C 5C 60 5B FF + + + + + +00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F +10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F +20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F +30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F +40 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 90 91 92 93 94 +95 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 96 97 98 99 9A +9B 9C 9E 9F A0 A1 A2 A3 A4 A5 60 A6 5F 62 6C 6B +A7 A8 A9 AA AB AC AD AE AF B0 60 B1 5F 62 6C 6B +B2 B3 B4 54 B5 41 B6 B7 B8 B9 5F BA BB BC BD 6B +BE BF C0 54 C1 C2 C3 C4 C5 41 5F C6 54 C7 54 6B +5D 41 41 41 41 54 47 44 46 4B 4B 4B 4B 50 50 48 +4A 57 57 59 59 59 59 C8 5D 64 64 64 64 69 62 5F +5D 41 41 41 41 54 47 44 46 4B 4B 4B 4B 50 50 48 +4A 57 57 59 59 59 59 C9 5D 64 64 64 64 69 62 FF + + + + + +00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F +10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F +20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F +30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F +40 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 90 91 92 93 94 +95 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 96 97 98 99 9A +9B 9C 9E 9F A0 A1 A2 A3 A4 A5 5F A6 60 62 6B 6C +A7 A8 A9 AA AB AC AD AE AF B0 5F B1 60 62 6B 6C +B2 B3 B4 55 B5 42 B6 B7 B8 B9 5F BA BB BC BD 6D +BE BF C0 55 C1 C2 C3 C4 C5 42 5F C6 54 C7 54 6D +5D 41 41 41 41 54 47 44 44 4B 4C 4B 4B 50 50 48 +48 58 57 5A 59 59 59 C8 5D 64 64 64 64 69 62 5F +5D 41 41 41 41 54 47 44 44 4B 4C 4B 4B 50 50 48 +48 58 57 5A 59 59 59 C9 5D 64 64 64 64 69 62 FF + + + + + + + + + + diff --git a/sql/share/charsets/cp1251.xml b/sql/share/charsets/cp1251.xml new file mode 100644 index 00000000..33a36289 --- /dev/null +++ b/sql/share/charsets/cp1251.xml @@ -0,0 +1,214 @@ + + + + + + Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00 + 01 01 00 02 00 00 00 00 00 00 01 00 01 01 01 01 + 02 00 00 00 00 00 00 00 00 00 02 00 02 02 02 02 + 00 01 02 01 00 01 00 00 01 00 01 00 00 00 00 01 + 00 00 01 02 02 00 00 00 02 00 02 00 02 01 02 02 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 90 83 82 83 84 85 86 87 88 89 9A 8B 9C 9D 9E 9F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A2 A2 BC A4 B4 A6 A7 B8 A9 BA AB AC AD AE BF + B0 B1 B3 B3 B4 B5 B6 B7 B8 B9 BA BB BC BE BE BF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 81 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 80 91 92 93 94 95 96 97 98 99 8A 9B 8C 9D 8E 8F + A0 A1 A1 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B2 A5 B5 B6 B7 A8 B9 AA BB A3 BD BD AF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + + + + + + + 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F + 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F + 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F + 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F + 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F + 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F + 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F + 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F + 0402 0403 201A 0453 201E 2026 2020 2021 20AC 2030 0409 2039 040A 040C 040B 040F + 0452 2018 2019 201C 201D 2022 2013 2014 0000 2122 0459 203A 045A 045C 045B 045F + 00A0 040E 045E 0408 00A4 0490 00A6 00A7 0401 00A9 0404 00AB 00AC 00AD 00AE 0407 + 00B0 00B1 0406 0456 0491 00B5 00B6 00B7 0451 2116 0454 00BB 0458 0405 0455 0457 + 0410 0411 0412 0413 0414 0415 0416 0417 0418 0419 041A 041B 041C 041D 041E 041F + 0420 0421 0422 0423 0424 0425 0426 0427 0428 0429 042A 042B 042C 042D 042E 042F + 0430 0431 0432 0433 0434 0435 0436 0437 0438 0439 043A 043B 043C 043D 043E 043F + 0440 0441 0442 0443 0444 0445 0446 0447 0448 0449 044A 044B 044C 044D 044E 044F + + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7C 7D 7E 7F 80 + 81 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 82 83 84 85 FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF 61 FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF 61 FF FF FF FF FF FF FF + 5B 5C 5D 5E 5F 60 62 63 64 65 66 67 68 69 6A 6B + 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B + 5B 5C 5D 5E 5F 60 62 63 64 65 66 67 68 69 6A 6B + 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B + + + + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 43 45 47 49 4B 4D 4F 51 53 55 57 59 5B 5D + 5F 61 63 65 67 69 6B 6D 6F 71 73 D3 D4 D5 D6 D7 + D8 41 43 45 47 49 4B 4D 4F 51 53 55 57 59 5B 5D + 5F 61 63 65 67 69 6B 6D 6F 71 73 D9 DA DB DC DD + 81 83 DE 83 DF E0 E1 E2 E3 E4 A1 E5 A7 9D B3 C1 + 81 E6 E7 E8 E9 EA EB EC ED EE A1 EF A7 9D B3 C1 + F0 B7 B7 99 F1 7D F2 F3 87 F4 89 F5 F6 F7 F8 95 + F9 FA 93 93 7D FB FC FD 87 FE 89 FF 99 8F 8F 95 + 75 77 79 7B 7F 85 8B 8D 91 97 9B 9F A3 A5 A9 AB + AD AF B1 B5 B9 BB BD BF C3 C5 C7 C9 CB CD CF D1 + 75 77 79 7B 7F 85 8B 8D 91 97 9B 9F A3 A5 A9 AB + AD AF B1 B5 B9 BB BD BF C3 C5 C7 C9 CB CD CF D1 + + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 43 45 47 49 4B 4D 4F 51 53 55 57 59 5B 5D + 5F 61 63 65 67 69 6B 6D 6F 71 73 D3 D4 D5 D6 D7 + D8 42 44 46 48 4A 4C 4E 50 52 54 56 58 5A 5C 5E + 60 62 64 66 68 6A 6C 6E 70 72 74 D9 DA DB DC DD + 81 83 DE 84 DF E0 E1 E2 E3 E4 A1 E5 A7 9D B3 C1 + 82 E6 E7 E8 E9 EA EB EC ED EE A2 EF A8 9E B4 C2 + F0 B7 B8 99 F1 7D F2 F3 87 F4 89 F5 F6 F7 F8 95 + F9 FA 93 94 7E FB FC FD 88 FE 8A FF 9A 8F 90 96 + 75 77 79 7B 7F 85 8B 8D 91 97 9B 9F A3 A5 A9 AB + AD AF B1 B5 B9 BB BD BF C3 C5 C7 C9 CB CD CF D1 + 76 78 7A 7C 80 86 8C 8E 92 98 9C A0 A4 A6 AA AC + AE B0 B2 B6 BA BC BE C0 C4 C6 C8 CA CC CE D0 D2 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 20 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + A5 A6 A7 A8 A9 AA AB AC AD AE AF B0 B1 B2 B3 B4 + B5 B6 B7 B8 B9 BA BB BC BD BE BF C0 C1 C2 C3 C4 + C5 C6 C7 C8 C9 84 CA CB 88 CC 87 CD CE CF D0 8D + D1 D2 8C 8C 84 D3 D4 D5 88 D6 87 D7 D8 D9 DA 8D + 80 81 82 83 85 86 89 8A 8B 8E 8F 90 91 92 93 94 + 95 96 97 98 99 9A 9B 9C 9D 9E 9F A0 A1 A2 A3 A4 + 80 81 82 83 85 86 89 8A 8B 8E 8F 90 91 92 93 94 + 95 96 97 98 99 9A 9B 9C 9D 9E 9F A0 A1 A2 A3 A4 + + + + + + + diff --git a/sql/share/charsets/cp1256.xml b/sql/share/charsets/cp1256.xml new file mode 100644 index 00000000..4584b30c --- /dev/null +++ b/sql/share/charsets/cp1256.xml @@ -0,0 +1,142 @@ + + + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 00 00 + 00 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 03 00 02 00 00 00 00 00 00 00 00 01 03 03 00 + 03 10 10 10 10 00 00 00 00 00 00 00 02 00 00 00 + 00 10 00 00 00 00 00 00 00 00 00 10 10 10 00 00 + 10 10 00 00 00 00 00 00 00 00 10 10 00 00 00 10 + 00 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 + 03 03 03 03 03 03 03 00 03 03 03 03 03 03 03 03 + 02 03 02 03 03 03 03 02 02 02 02 02 03 03 02 02 + 03 03 03 03 02 03 03 00 03 02 03 02 02 00 00 00 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 54 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 54 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 9C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 74 55 56 57 58 59 5A 5B 5C 5F 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 74 55 56 57 58 59 5A 7B 7C 7F 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 8C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F + 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F + 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F + 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F + 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F + 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F + 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F + 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F + 20AC 067E 201A 0192 201E 2026 2020 2021 02C6 2030 0000 2039 0152 0686 0698 0000 + 06AF 2018 2019 201C 201D 2022 2013 2014 0000 2122 0000 203A 0153 200C 200D 0000 + 00A0 060C 00A2 00A3 00A4 00A5 00A6 00A7 00A8 00A9 0000 00AB 00AC 00AD 00AE 00AF + 00B0 00B1 00B2 00B3 00B4 00B5 00B6 00B7 00B8 00B9 061B 00BB 00BC 00BD 00BE 061F + 0000 0621 0622 0623 0624 0625 0626 0627 0628 0629 062A 062B 062C 062D 062E 062F + 0630 0631 0632 0633 0634 0635 0636 00D7 0637 0638 0639 063A 0640 0641 0642 0643 + 00E0 0644 00E2 0645 0646 0647 0648 00E7 00E8 00E9 00EA 00EB 0649 064A 00EE 00EF + 064B 064C 064D 064E 00F4 064F 0650 00F7 0651 00F9 0652 00FB 00FC 200E 200F 0000 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 45 47 4A 4C 52 55 57 59 5D 5F 61 63 65 67 + 6C 6E 70 72 74 76 7B 7D 7F 81 83 B9 BA BB BC BD + BE 41 45 47 4A 4C 52 55 57 59 5D 5F 61 63 65 67 + 6C 6E 70 72 74 76 7B 7D 7F 81 83 BF C0 C1 C2 C3 + C4 8E C5 54 C6 C7 C8 C9 CA CB CC CD 6A 92 99 CE + A5 CF D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 6A DA DB DC + DD B6 DE DF E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB + EC ED EE EF F0 F1 F2 F3 F4 F5 B7 F6 F7 F8 F9 B8 + FA 85 86 87 88 89 8A 8B 8C 8D 9F 90 91 93 94 95 + 96 97 98 9A 9B 9C 9D FB 9E 9F A0 A1 AD A2 A3 A4 + 43 A6 44 A7 A8 A9 AA 49 4E 4F 50 51 AB AC 5B 5C + AE AF B0 B1 69 B2 B3 FC B4 78 B5 79 7A FD FE FF + + + + + + + + + diff --git a/sql/share/charsets/cp1257.xml b/sql/share/charsets/cp1257.xml new file mode 100644 index 00000000..d2f3d59f --- /dev/null +++ b/sql/share/charsets/cp1257.xml @@ -0,0 +1,228 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 01 00 01 00 00 00 00 01 + 00 00 00 00 00 00 00 00 02 00 02 00 00 00 00 02 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 00 01 01 01 01 01 01 01 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 00 02 02 02 02 02 02 02 00 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 B8 A9 BA AB AC AD AE BF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC FD FE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 BA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 A8 B9 BA BB BC BD BE AF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF + + + + + + + 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F + 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F + 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F + 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F + 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F + 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F + 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F + 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F + 20AC 0000 201A 0000 201E 2026 2020 2021 0000 2030 0000 2039 0000 00A8 02C7 00B8 + 0000 2018 2019 201C 201D 2022 2013 2014 0000 2122 0000 203A 0000 00AF 02DB 0000 + 00A0 0000 00A2 00A3 00A4 0000 00A6 00A7 00D8 00A9 0156 00AB 00AC 00AD 00AE 00C6 + 00B0 00B1 00B2 00B3 00B4 00B5 00B6 00B7 00F8 00B9 0157 00BB 00BC 00BD 00BE 00E6 + 0104 012E 0100 0106 00C4 00C5 0118 0112 010C 00C9 0179 0116 0122 0136 012A 013B + 0160 0143 0145 00D3 014C 00D5 00D6 00D7 0172 0141 015A 016A 00DC 017B 017D 00DF + 0105 012F 0101 0107 00E4 00E5 0119 0113 010D 00E9 017A 0117 0123 0137 012B 013C + 0161 0144 0146 00F3 014D 00F5 00F6 00F7 0173 0142 015B 016B 00FC 017C 017E 02D9 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 43 44 46 47 4A 4B 4C 4D 50 51 52 53 54 55 + 56 57 58 59 5B 5C 5F 60 61 4E FF 62 63 64 65 66 + 67 41 43 44 46 47 4A 4B 4C 4D 50 51 52 53 54 55 + 56 57 58 59 5B 5C 5F 60 61 4E FF 68 69 6A 6B FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + 42 4F FF FF FF FF 48 FF 45 FF FF 49 FF FF FF FF + 5A FF FF FF FF FF FF FF 5E FF FF 5D FF FF FF FF + FF 4F FF FF FF FF 48 FF 45 FF FF 49 FF FF FF FF + 5A FF FF FF FF FF FF FF 5E FF FF 5D FF FF FF FF + + + + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 4D 4F 55 57 61 63 67 69 6F 71 75 7B 7D 83 + 8F 91 93 97 9E A0 A8 AA AC AE B0 B8 B9 BA BB BC + BD 41 4D 4F 55 57 61 63 67 69 6F 71 75 7B 7D 83 + 8F 91 93 97 9E A0 A8 AA AC AE B0 BE BF C0 C1 C4 + C5 C6 C7 C8 C9 CA CB CC CD CE CF D0 D1 D2 D3 D4 + D5 D6 D7 D8 D9 DA DB DC DD DE DF E0 E1 E2 E3 E4 + E5 E6 E7 E8 E9 EA EB EC 83 ED 93 EE EF F0 F1 41 + F2 F3 F4 F5 F6 F7 F8 F9 83 FA 93 FB FC FD FE 41 + 41 69 41 4F 41 41 57 57 4F 57 B0 57 63 71 69 75 + 97 7D 7D 83 83 83 83 C2 A0 75 97 A0 A0 B0 B0 97 + 41 69 41 4F 41 41 57 57 4F 57 B0 57 63 71 69 75 + 97 7D 7D 83 83 83 83 C3 A0 75 97 A0 A0 B0 B0 FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 4D 4F 55 57 61 63 67 69 6F 71 75 7B 7D 83 + 8F 91 93 97 9E A0 A8 AA AC AE B0 B8 B9 BA BB BC + BD 41 4D 4F 55 57 61 63 67 69 6F 71 75 7B 7D 83 + 8F 91 93 97 9E A0 A8 AA AC AE B0 BE BF C0 C1 C4 + C5 C6 C7 C8 C9 CA CB CC CD CE CF D0 D1 D2 D3 D4 + D5 D6 D7 D8 D9 DA DB DC DD DE DF E0 E1 E2 E3 E4 + E5 E6 E7 E8 E9 EA EB EC 85 ED 95 EE EF F0 F1 4B + F2 F3 F4 F5 F6 F7 F8 F9 85 FA 95 FB FC FD FE 4B + 43 6B 45 51 47 49 59 5B 53 5D B2 5F 65 73 6D 77 + 99 7F 81 87 89 8B 8D C2 A2 79 9B A4 A6 B4 B6 9D + 43 6B 45 51 47 49 59 5B 53 5D B2 5F 65 73 6D 77 + 99 7F 81 87 89 8B 8D C3 A2 79 9B A4 A6 B4 B6 FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 4D 4F 55 57 61 63 67 69 6F 71 75 7B 7D 83 + 8F 91 93 97 9E A0 A8 AA AC AE B0 B8 B9 BA BB BC + BD 42 4E 50 56 58 62 64 68 6A 70 72 76 7C 7E 84 + 90 92 94 98 9F A1 A9 AB AD AF B1 BE BF C0 C1 C4 + C5 C6 C7 C8 C9 CA CB CC CD CE CF D0 D1 D2 D3 D4 + D5 D6 D7 D8 D9 DA DB DC DD DE DF E0 E1 E2 E3 E4 + E5 E6 E7 E8 E9 EA EB EC 85 ED 95 EE EF F0 F1 4B + F2 F3 F4 F5 F6 F7 F8 F9 86 FA 96 FB FC FD FE 4C + 43 6B 45 51 47 49 59 5B 53 5D B2 5F 65 73 6D 77 + 99 7F 81 87 89 8B 8D C2 A2 79 9B A4 A6 B4 B6 9D + 44 6C 46 52 48 4A 5A 5C 54 5E B3 60 66 74 6E 78 + 9A 80 82 88 8A 8C 8E C3 A3 7A 9C A5 A7 B5 B7 FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 47 49 4D 4F 57 59 5D 5F 65 67 6B 6F 71 75 + 79 7B 7D 81 85 87 8D 8F 91 93 95 FF FF FF FF FF + FF 42 48 4A 4E 50 58 5A 5E 60 66 68 6C 70 72 76 + 7A 7C 7E 82 86 88 8E 90 92 94 96 FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF 7F FF FF FF FF FF + FF FF FF FF FF FF FF FF FF FF 80 FF FF FF FF FF + 45 63 43 FF FF FF 53 51 4B FF FF 55 5B 69 61 6D + 83 FF 73 FF 77 FF FF FF 8B FF FF 89 FF 99 97 FF + 46 64 44 FF FF FF 54 52 4C FF FF 56 5C 6A 62 6E + 84 FF 74 FF 78 FF FF FF 8C FF FF 8A FF 9A 98 FF + + + + + + diff --git a/sql/share/charsets/cp850.xml b/sql/share/charsets/cp850.xml new file mode 100644 index 00000000..32eb37e3 --- /dev/null +++ b/sql/share/charsets/cp850.xml @@ -0,0 +1,139 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 30 30 30 30 30 30 20 20 28 28 28 28 28 30 30 + 30 30 30 30 30 30 30 30 30 30 20 30 30 30 30 30 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 30 + 01 02 02 02 02 02 02 02 02 02 02 02 02 02 01 01 + 01 02 01 02 02 02 02 02 02 01 01 02 10 01 10 10 + 02 02 02 02 02 01 10 10 10 10 10 10 10 10 10 10 + 10 10 10 10 10 01 01 01 10 10 10 10 10 10 10 10 + 10 10 10 10 10 10 02 01 10 10 10 10 10 10 10 10 + 02 01 01 01 01 02 01 01 01 10 10 10 10 10 01 10 + 01 02 01 01 02 01 10 02 01 01 01 01 02 01 10 10 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 20 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 87 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 84 86 + 82 91 91 93 94 95 96 97 98 94 81 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A4 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 9A 90 41 8E 41 8F 80 45 45 45 49 49 49 8E 8F + 90 92 92 4F 99 4F 55 55 59 99 9A 9B 9C 9D 9E 9F + 41 49 4F 55 A5 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000a 000b 000c 000d 000e 000f +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001a 001b 001c 001d 001e 001f +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 005e 005f +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 007d 007e 007f +00c7 00fc 00e9 00e2 00e4 00e0 00e5 00e7 00ea 00eb 00e8 00ef 00ee 00ec 00c4 00c5 +00c9 00e6 00c6 00f4 00f6 00f2 00fb 00f9 00ff 00d6 00dc 00f8 00a3 00d8 00d7 0192 +00e1 00ed 00f3 00fa 00f1 00d1 00aa 00ba 00bf 00ae 00ac 00bd 00bc 00a1 00ab 00bb +2591 2592 2593 2502 2524 00c1 00c2 00c0 00a9 2563 2551 2557 255d 00a2 00a5 2510 +2514 2534 252c 251c 2500 253c 00e3 00c3 255a 2554 2569 2566 2560 2550 256c 00a4 +00f0 00d0 00ca 00cb 00c8 0131 00cd 00ce 00cf 2518 250c 2588 2584 00a6 00cc 2580 +00d3 00df 00d4 00d2 00f5 00d5 00b5 00fe 00de 00da 00db 00d9 00fd 00dd 00af 00b4 +00ad 00b1 2017 00be 00b6 00a7 00f7 00b8 00b0 00a8 00b7 00b9 00b3 00b2 25a0 00a0 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 51 53 55 59 63 65 67 69 74 76 78 7A 7C 80 + 8E 90 92 94 97 99 A3 A5 A7 A9 AE B1 B2 B3 B4 B5 + B6 41 51 53 55 59 63 65 67 69 74 76 78 7A 7C 80 + 8E 90 92 94 97 99 A3 A5 A7 A9 AE B7 B8 B9 BA BB + 54 A1 5D 47 4B 43 4D 54 5F 61 5B 71 6F 6B 4B 4D + 5D 4F 4F 86 8A 82 9F 9B AD 8A A1 8C E3 8C BD BE + 45 6D 84 9D 7E 7E EA FA FF EE EC FD FC CE EB FB + DC DD DE C3 C9 45 47 43 E9 D5 CF D1 D3 E2 E5 C5 + C6 CB CA C8 C2 CC 49 49 D2 D0 D7 D6 D4 CD D8 E4 + 57 57 5F 61 5B 73 6D 6F 71 C7 C4 DB DA E6 6B D9 + 84 96 86 82 88 88 F5 B0 B0 9D 9F 9B AB AB EF F4 + ED F1 C1 FE F6 E7 BF BC F0 E8 F7 F9 F3 F2 DF E0 + + + + + + + + diff --git a/sql/share/charsets/cp852.xml b/sql/share/charsets/cp852.xml new file mode 100644 index 00000000..f038fa92 --- /dev/null +++ b/sql/share/charsets/cp852.xml @@ -0,0 +1,139 @@ + + + + + + Copyright (c) 2003, 2004 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00 + 01 02 02 02 02 02 02 02 02 02 01 02 02 01 01 01 + 01 01 02 02 02 01 02 01 02 01 01 01 02 01 00 02 + 02 02 02 02 01 02 01 02 01 02 00 02 01 01 00 00 + 00 00 00 00 00 01 01 01 02 00 00 00 00 01 02 00 + 00 00 00 00 00 00 01 02 00 00 00 00 00 00 00 00 + 02 01 01 01 02 01 01 01 02 00 00 00 00 01 01 00 + 01 02 01 01 02 02 01 02 01 01 02 01 02 01 02 00 + 00 00 00 00 00 00 00 00 00 00 00 02 01 02 00 48 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 87 81 82 83 84 85 86 87 88 89 8B 8B 8C AB 84 86 + 82 92 92 93 94 96 96 98 98 94 81 9C 9C 88 9E 9F + A0 A1 A2 A3 A5 A5 A7 A7 A9 A9 AA AB 9F B8 AE AF + B0 B1 B2 B3 B4 A0 83 D8 B8 B9 BA BB BC BE BE BF + C0 C1 C2 C3 C4 C5 C7 C7 C8 C9 CA CB CC CD CE CF + D0 D0 D4 89 D4 E5 A1 8C D8 D9 DA DB DC EE 85 DF + A2 E1 93 E4 E4 E5 E7 E7 EA A3 E8 FB EC EC EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 9A 90 B6 8E DE 8F 80 9D D3 8A 8A D7 8D 8E 8F + 90 91 91 E2 99 95 95 97 97 99 9A 9B 9B 9D 9E AC + B5 D6 E0 E9 A4 A4 A6 A6 A8 A8 AA 8D AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 AD B9 BA BB BC BE BD BF + C0 C1 C2 C3 C4 C5 C6 C6 C8 C9 CA CB CC CD CE CF + D1 D1 D2 D3 D2 D5 D6 D7 B7 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E3 D5 E6 E6 E8 E9 E8 EB ED ED DD EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA EB FC FC FE FF + + + + + + + 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F + 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F + 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F + 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F + 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F + 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F + 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F + 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F + 00C7 00FC 00E9 00E2 00E4 016F 0107 00E7 0142 00EB 0150 0151 00EE 0179 00C4 0106 + 00C9 0139 013A 00F4 00F6 013D 013E 015A 015B 00D6 00DC 0164 0165 0141 00D7 010D + 00E1 00ED 00F3 00FA 0104 0105 017D 017E 0118 0119 00AC 017A 010C 015F 00AB 00BB + 2591 2592 2593 2502 2524 00C1 00C2 011A 015E 2563 2551 2557 255D 017B 017C 2510 + 2514 2534 252C 251C 2500 253C 0102 0103 255A 2554 2569 2566 2560 2550 256C 00A4 + 0111 0110 010E 00CB 010F 0147 00CD 00CE 011B 2518 250C 2588 2584 0162 016E 2580 + 00D3 00DF 00D4 0143 0144 0148 0160 0161 0154 00DA 0155 0170 00FD 00DD 0163 00B4 + 00AD 02DD 02DB 02C7 02D8 00A7 00F7 00B8 00B0 00A8 02D9 0171 0158 0159 25A0 00A0 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 47 48 4C 4F 54 55 56 57 5A 5B 5C 5E 5F 62 + 67 68 69 6C 71 74 75 76 77 78 7B 90 91 92 93 94 + 95 41 47 48 4C 4F 54 55 56 57 5A 5B 5C 5E 5F 62 + 67 68 69 6C 71 74 75 76 77 78 7B 96 97 98 99 9A + 48 74 4F 41 41 74 48 48 5C 4F 62 62 57 7B 41 48 + 4F 5C 5C 62 62 5C 5C 6C 6C 62 74 71 71 5C 9E 48 + 41 57 62 74 41 41 7B 7B 4F 4F AA 7B 48 6C AE AF + B0 B1 B2 B3 B4 41 41 4F 6C B5 BA BB BC 7B 7B BF + C0 C1 C2 C3 C4 C5 41 41 C8 C9 CA CB CC CD CE CF + 4C 4C 4C 4F 4C 60 57 57 4F D9 DA DB DC 71 74 DF + 62 70 62 60 60 60 6C 6C 69 74 69 74 78 78 71 EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA 74 69 69 FE FF + + + + + + + + diff --git a/sql/share/charsets/cp866.xml b/sql/share/charsets/cp866.xml new file mode 100644 index 00000000..3245326a --- /dev/null +++ b/sql/share/charsets/cp866.xml @@ -0,0 +1,142 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 01 02 01 02 01 02 01 02 00 00 00 00 00 00 00 48 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + A0 A1 A2 A3 A4 A5 86 87 88 89 AA AB AC AD AE AF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + A0 A1 A2 A3 A4 A5 86 87 88 89 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F1 F1 F3 F3 F5 F5 F7 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + F0 F0 F2 F2 F4 F4 F6 F6 F8 F9 FA FB FC FD FE FF + + + + + + + 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F + 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F + 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F + 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F + 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F + 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F + 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F + 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F + 0410 0411 0412 0413 0414 0415 0416 0417 0418 0419 041A 041B 041C 041D 041E 041F + 0420 0421 0422 0423 0424 0425 0426 0427 0428 0429 042A 042B 042C 042D 042E 042F + 0430 0431 0432 0433 0434 0435 0436 0437 0438 0439 043A 043B 043C 043D 043E 043F + 2591 2592 2593 2502 2524 2561 2562 2556 2555 2563 2551 2557 255D 255C 255B 2510 + 2514 2534 252C 251C 2500 253C 255E 255F 255A 2554 2569 2566 2560 2550 256C 2567 + 2568 2564 2565 2559 2558 2552 2553 256B 256A 2518 250C 2588 2584 258C 2590 2580 + 0440 0441 0442 0443 0444 0445 0446 0447 0448 0449 044A 044B 044C 044D 044E 044F + 0401 0451 0404 0454 0407 0457 040E 045E 00B0 2219 00B7 221A 207F 00B2 25A0 00A0 + + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 43 45 47 49 4B 4D 4F 51 53 55 57 59 5B 5D + 5F 61 63 65 67 69 6B 6D 6F 71 73 BD BE BF C0 C1 + C2 41 43 45 47 49 4B 4D 4F 51 54 55 57 59 5B 5D + 5F 61 63 65 67 69 6B 6D 6F 71 73 C3 C4 C5 C6 C7 + 75 77 79 7B 7D 7F 85 87 89 8D 8F 91 93 95 97 99 + 9B 9D 9F A1 A5 A7 A9 AB AD AF B1 B3 B5 B7 B9 BB + 75 77 79 7B 7D 7F 85 87 89 8D 8F 91 93 95 97 99 + C8 C9 CA D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + 9B 9D 9F A1 A5 A7 A9 AB AD AF B1 B3 B5 B7 B9 BB + 81 81 83 83 8B 8B A3 A3 CB CC CD CE CF D0 D1 D2 + + + + + + + + + diff --git a/sql/share/charsets/dec8.xml b/sql/share/charsets/dec8.xml new file mode 100644 index 00000000..25697133 --- /dev/null +++ b/sql/share/charsets/dec8.xml @@ -0,0 +1,140 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 10 01 01 01 01 01 01 01 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 10 02 02 02 02 02 02 02 02 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC FD FE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F +0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F +00A0 00A1 00A2 00A3 0000 00A5 0000 00A7 00A4 00A9 00AA 00AB 0000 0000 0000 0000 +00B0 00B1 00B2 00B3 0000 00B5 00B6 00B7 0000 00B9 00BA 00BB 00BC 00BD 0000 00BF +00C0 00C1 00C2 00C3 00C4 00C5 00C6 00C7 00C8 00C9 00CA 00CB 00CC 00CD 00CE 00CF +0000 00D1 00D2 00D3 00D4 00D5 00D6 0152 00D8 00D9 00DA 00DB 00DC 0178 0000 00DF +00E0 00E1 00E2 00E3 00E4 00E5 00E6 00E7 00E8 00E9 00EA 00EB 00EC 00ED 00EE 00EF +0000 00F1 00F2 00F3 00F4 00F5 00F6 0153 00F8 00F9 00FA 00FB 00FC 00FF 0000 0000 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + 41 41 41 41 5C 5B 5C 43 45 45 45 45 49 49 49 49 + 44 4E 4F 4F 4F 4F 5D D7 D8 55 55 55 59 59 DE DF + 41 41 41 41 5C 5B 5C 43 45 45 45 45 49 49 49 49 + 44 4E 4F 4F 4F 4F 5D F7 D8 55 55 55 59 59 DE FF + + + + + + + + + diff --git a/sql/share/charsets/geostd8.xml b/sql/share/charsets/geostd8.xml new file mode 100644 index 00000000..bcb3148f --- /dev/null +++ b/sql/share/charsets/geostd8.xml @@ -0,0 +1,139 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 10 00 10 10 10 10 00 10 00 10 00 00 00 00 + 00 10 10 10 10 10 10 10 00 00 00 10 00 00 00 00 + 48 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 + 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 + 03 03 03 03 03 03 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +20AC 0000 201A 0000 201E 2026 2020 2021 0000 2030 0000 2039 0000 0000 0000 0000 +0000 2018 2019 201C 201D 2022 2013 2014 0000 0000 0000 203A 0000 0000 0000 0000 +00A0 00A1 00A2 00A3 00A4 00A5 00A6 00A7 00A8 00A9 00AA 00AB 00AC 00AD 00AE 00AF +00B0 00B1 00B2 00B3 00B4 00B5 00B6 00B7 00B8 00B9 00BA 00BB 00BC 00BD 00BE 00BF +10D0 10D1 10D2 10D3 10D4 10D5 10D6 10F1 10D7 10D8 10D9 10DA 10DB 10DC 10F2 10DD +10DE 10DF 10E0 10E1 10E2 10F3 10E3 10E4 10E5 10E6 10E7 10E8 10E9 10EA 10EB 10EC +10ED 10EE 10F4 10EF 10F0 10F5 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 2116 0000 0000 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + + diff --git a/sql/share/charsets/greek.xml b/sql/share/charsets/greek.xml new file mode 100644 index 00000000..9ab0cb68 --- /dev/null +++ b/sql/share/charsets/greek.xml @@ -0,0 +1,144 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 48 10 10 10 00 00 10 10 10 10 00 10 10 10 00 10 + 10 10 10 10 10 10 01 10 01 01 01 10 01 10 01 01 + 02 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 00 01 01 01 01 01 01 01 01 01 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 00 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 DC B7 DD DE DF BB FC BD FD FE + C0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 D2 F3 F4 F5 F6 F7 F8 F9 FA FB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + DA C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB C1 C5 C7 C9 + DB C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D3 D3 D4 D5 D6 D7 D8 D9 DA DB CF D5 D9 FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F +0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F +00A0 02BD 02BC 00A3 0000 0000 00A6 00A7 00A8 00A9 0000 00AB 00AC 00AD 0000 2015 +00B0 00B1 00B2 00B3 0384 0385 0386 00B7 0388 0389 038A 00BB 038C 00BD 038E 038F +0390 0391 0392 0393 0394 0395 0396 0397 0398 0399 039A 039B 039C 039D 039E 039F +03A0 03A1 0000 03A3 03A4 03A5 03A6 03A7 03A8 03A9 03AA 03AB 03AC 03AD 03AE 03AF +03B0 03B1 03B2 03B3 03B4 03B5 03B6 03B7 03B8 03B9 03BA 03BB 03BC 03BD 03BE 03BF +03C0 03C1 03C2 03C3 03C4 03C5 03C6 03C7 03C8 03C9 03CA 03CB 03CC 03CD 03CE 0000 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 C1 B7 C5 C7 C9 BB CF BD D5 D9 + C9 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 C9 D5 C1 C5 C7 C9 + D5 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D3 D3 D4 D5 D6 D7 D8 D9 C9 D5 CF D5 D9 FF + + + + + + + + + diff --git a/sql/share/charsets/hebrew.xml b/sql/share/charsets/hebrew.xml new file mode 100644 index 00000000..295440b4 --- /dev/null +++ b/sql/share/charsets/hebrew.xml @@ -0,0 +1,140 @@ + + + + + + Copyright (c) 2003, 2006 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 00 00 20 20 00 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F +0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F +00A0 0000 00A2 00A3 00A4 00A5 00A6 00A7 00A8 00A9 00D7 00AB 00AC 00AD 00AE 203E +00B0 00B1 00B2 00B3 00B4 00B5 00B6 00B7 00B8 00B9 00F7 00BB 00BC 00BD 00BE 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 2017 +05D0 05D1 05D2 05D3 05D4 05D5 05D6 05D7 05D8 05D9 05DA 05DB 05DC 05DD 05DE 05DF +05E0 05E1 05E2 05E3 05E4 05E5 05E6 05E7 05E8 05E9 05EA 0000 0000 200E 200F 0000 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + + + diff --git a/sql/share/charsets/hp8.xml b/sql/share/charsets/hp8.xml new file mode 100644 index 00000000..628ebe7f --- /dev/null +++ b/sql/share/charsets/hp8.xml @@ -0,0 +1,140 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 20 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 20 20 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 10 10 02 10 10 10 10 10 10 02 10 02 02 + 01 10 10 01 02 10 10 02 01 10 01 01 01 10 10 10 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 20 20 20 20 10 10 10 10 10 10 10 10 10 20 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 C8 C0 C9 C1 CD D1 DD A8 A9 AA AB AC CB C3 AF + B0 B2 B2 B3 B5 B5 B7 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D4 D1 D6 D7 D4 D5 D6 D7 CC D9 CE CF C5 DD DE C2 + C4 E2 E2 E4 E4 D5 D9 C6 CA EA EA EC EC C7 EF EF + F1 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B1 B3 B4 B4 B6 B6 B8 B9 BA BB BC BD BE BF + A2 A4 DF AE E0 DC E7 ED A1 A3 E8 AD D8 A5 DA DB + D0 A6 D2 D3 D0 E5 D2 D3 D8 E6 DA DB DC A7 DE DF + E0 E1 E1 E3 E3 E5 E6 E7 E8 E9 E9 EB EB ED EE EE + F0 F0 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F +0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F +00A0 00C0 00C2 00C8 00CA 00CB 00CE 00CF 00B4 02CB 02C6 00A8 02DC 00D9 00DB 20A4 +00AF 00DD 00FD 00B0 00C7 00E7 00D1 00F1 00A1 00BF 00A4 00A3 00A5 00A7 0192 00A2 +00E2 00EA 00F4 00FB 00E1 00E9 00F3 00FA 00E0 00E8 00F2 00F9 00E4 00EB 00F6 00FC +00C5 00EE 00D8 00C6 00E5 00ED 00F8 00E6 00C4 00EC 00D6 00DC 00C9 00EF 00DF 00D4 +00C1 00C3 00E3 00D0 00F0 00CD 00CC 00D3 00D2 00D5 00F5 0160 0161 00DA 0178 00FF +00DE 00FE 00B7 00B5 00B6 00BE 2014 00BC 00BD 00AA 00BA 00AB 25A0 00BB 00B1 0000 + + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5C 5D 5B 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + + diff --git a/sql/share/charsets/keybcs2.xml b/sql/share/charsets/keybcs2.xml new file mode 100644 index 00000000..a6079319 --- /dev/null +++ b/sql/share/charsets/keybcs2.xml @@ -0,0 +1,140 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00 + 01 02 82 02 02 01 01 02 82 81 01 01 02 02 01 01 + 81 02 01 02 02 01 02 01 02 01 01 01 01 01 01 02 + 02 02 02 02 02 01 01 01 02 02 02 01 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 02 02 01 02 01 02 00 02 01 01 01 02 00 02 02 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 48 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 87 81 82 83 84 83 86 87 88 88 8D A1 8C 8D 84 A0 + 82 91 91 93 94 A2 96 A3 98 94 81 9B 8C 98 A9 9F + A0 A1 A2 A3 A4 A4 96 93 9B A9 AA AA AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 ED E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 68 59 5A 7B 7C 7D 7E 7F + 87 9A 90 85 8E 85 86 80 89 89 8A 8B 9C 8A 8E 8F + 90 92 92 A7 99 95 A6 97 9D 99 9A A8 9C 9D 9E 9F + 8F 8B 95 97 A5 A5 A6 A7 A8 9E AB AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC E8 EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F + 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F + 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F + 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F + 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F + 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F + 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F + 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F + 010C 00FC 00E9 010F 00E4 010E 0164 010D 011B 011A 0139 00CD 013E 013A 00C4 00C1 + 00C9 017E 017D 00F4 00F6 00D3 016F 00DA 00FD 00D6 00DC 0160 013D 00DD 0158 0165 + 00E1 00ED 00F3 00FA 0148 0147 016E 00D4 0161 0159 0155 0154 00BC 00A1 00AB 00BB + 2591 2592 2593 2502 2524 2561 2562 2556 2555 2563 2551 2557 255D 255C 255B 2510 + 2514 2534 252C 251C 2500 253C 255E 255F 255A 2554 2569 2566 2560 2550 256C 2567 + 2568 2564 2565 2559 2558 2552 2553 256B 256A 2518 250C 2588 2584 258C 2590 2580 + 03B1 00DF 0393 03C0 03A3 03C3 00B5 03C4 03A6 0398 03A9 03B4 221E 03C6 03B5 2229 + 2261 00B1 2265 2264 2320 2321 00F7 2248 00B0 2219 00B7 221A 207F 00B2 25A0 00A0 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 44 45 47 49 50 51 52 53 54 55 56 57 58 5A + 5E 5F 60 63 66 68 6C 6D 6E 6F 72 90 91 92 93 94 + 95 41 44 45 47 49 50 51 52 53 54 55 56 57 58 5A + 5E 5F 60 63 66 68 6C 6D 6E 6F 72 96 97 98 99 9A + 45 68 49 47 41 47 66 45 49 49 56 53 56 56 41 41 + 49 72 72 5A 5A 5A 68 68 6F 5A 68 63 56 6F 60 66 + 41 53 5A 68 58 58 68 5A 63 60 60 60 A0 A1 A2 A3 + A4 A5 A6 B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC + BD BE BF C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC + CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC + 80 65 83 87 88 89 DD 8A 85 8B 84 81 DE 85 82 DF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + + + + diff --git a/sql/share/charsets/koi8r.xml b/sql/share/charsets/koi8r.xml new file mode 100644 index 00000000..7a8a936f --- /dev/null +++ b/sql/share/charsets/koi8r.xml @@ -0,0 +1,139 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 02 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 01 10 10 10 10 10 10 10 10 10 10 10 10 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 A3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 B3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000a 000b 000c 000d 000e 000f +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001a 001b 001c 001d 001e 001f +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004a 004b 004c 004d 004e 004f +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005a 005b 005c 005d 005e 005f +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006a 006b 006c 006d 006e 006f +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007a 007b 007c 007d 007e 007f +2500 2502 250c 2510 2514 2518 251c 2524 252c 2534 253c 2580 2584 2588 258c 2590 +2591 2592 2593 2320 25a0 2219 221a 2248 2264 2265 00a0 2321 00b0 00b2 00b7 00f7 +2550 2551 2552 0451 2553 2554 2555 2556 2557 2558 2559 255a 255b 255c 255d 255e +255f 2560 2561 0401 2562 2563 2564 2565 2566 2567 2568 2569 256a 256b 256c 00a9 +044e 0430 0431 0446 0434 0435 0444 0433 0445 0438 0439 043a 043b 043c 043d 043e +043f 044f 0440 0441 0442 0443 0436 0432 044c 044b 0437 0448 044d 0449 0447 044a +042e 0410 0411 0426 0414 0415 0424 0413 0425 0418 0419 041a 041b 041c 041d 041e +041f 042f 0420 0421 0422 0423 0416 0412 042c 042b 0417 0428 042d 0429 0427 042a + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 E5 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE + AF B0 B1 E5 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD + FE DF E0 F6 E3 E4 F4 E2 F5 E8 E9 EA EB EC ED EE + EF FF F0 F1 F2 F3 E6 E1 FC FB E7 F8 FD F9 F7 FA + FE DF E0 F6 E3 E4 F4 E2 F5 E8 E9 EA EB EC ED EE + EF FF F0 F1 F2 F3 E6 E1 FC FB E7 F8 FD F9 F7 FA + + + + + + + + diff --git a/sql/share/charsets/koi8u.xml b/sql/share/charsets/koi8u.xml new file mode 100644 index 00000000..83e85b84 --- /dev/null +++ b/sql/share/charsets/koi8u.xml @@ -0,0 +1,140 @@ + + + + + + Copyright (C) 2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + + + + + + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 10 02 02 10 02 02 10 10 10 10 10 02 10 10 + 10 10 10 01 01 10 01 01 10 10 10 10 10 01 10 10 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 20 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 20 20 20 A3 A4 20 A6 A7 20 20 20 20 20 AD 20 20 + 20 20 20 A3 A4 20 A6 A7 20 20 20 20 20 AD 20 20 + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 20 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 20 20 20 B3 B4 20 B6 B7 20 20 20 20 20 BD 20 20 + 20 20 20 B3 B4 20 B6 B7 20 20 20 20 20 BD 20 20 + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + + + + + + +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +2500 2502 250C 2510 2514 2518 251C 2524 252C 2534 253C 2580 2584 2588 258C 2590 +2591 2592 2593 2320 25A0 2022 221A 2248 2264 2265 00A0 2321 00B0 00B2 00B7 00F7 +2550 2551 2552 0451 0454 2554 0456 0457 2557 2558 2559 255A 255B 0491 255D 255E +255F 2560 2561 0401 0404 2563 0406 0407 2566 2567 2568 2569 256A 0490 256C 00A9 +044E 0430 0431 0446 0434 0435 0444 0433 0445 0438 0439 043A 043B 043C 043D 043E +043F 044F 0440 0441 0442 0443 0436 0432 044C 044B 0437 0448 044D 0449 0447 044A +042E 0410 0411 0426 0414 0415 0424 0413 0425 0418 0419 041A 041B 041C 041D 041E +041F 042F 0420 0421 0422 0423 0416 0412 042C 042B 0417 0428 042D 0429 0427 042A + + + + + + + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 20 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + A5 A6 A7 A8 A9 AA AB AC AD AE AF B0 B1 B2 B3 B4 + B5 B6 B7 B8 B9 BA BB BC BD BE BF C0 C1 C2 C3 C4 + C5 C6 C7 88 87 C8 8C 8D C9 CA CB CC CD 84 CE CF + D0 D1 D2 88 87 D3 8C 8D D4 D5 D6 D7 D8 84 D9 DA + A3 80 81 9B 85 86 99 83 9A 8B 8E 8F 90 91 92 93 + 94 A4 95 96 97 98 89 82 A1 A0 8A 9D A2 9E 9C 9F + A3 80 81 9B 85 86 99 83 9A 8B 8E 8F 90 91 92 93 + 94 A4 95 96 97 98 89 82 A1 A0 8A 9D A2 9E 9C 9F + + + + + + + + + diff --git a/sql/share/charsets/languages.html b/sql/share/charsets/languages.html new file mode 100644 index 00000000..9adce32f --- /dev/null +++ b/sql/share/charsets/languages.html @@ -0,0 +1,274 @@ +#!/bin/sh + +# Copyright (C) 2003 MySQL AB +# Use is subject to license terms +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +#
+(
+echo "DROP TABLE lang;"
+echo "CREATE TABLE lang (lang varchar(128), letters text character set utf8);"
+(
+grep -v "^#" << END
+#
+Greenlandic	ÁÂÃÊÍÎÔÚÛáâãêíîôúûĨĩĸŨũ
+#Use of these letters was abolished in a spelling reform in 1973:
+#Greenlandic	ÅÆØåæø
+#Characters not found in the UCS:
+#	K LATIN CAPITAL LETTER KRA
+#############################################################
+#Basque	ÑÜñüŔŕ
+#Characters not found in the UCS:
+#	D LATIN CAPITAL LETTER D WITH MACRON
+#	d LATIN SMALL LETTER D WITH MACRON
+#	L LATIN CAPITAL LETTER L WITH MACRON
+#	l LATIN SMALL LETTER L WITH MACRON
+#	T LATIN CAPITAL LETTER T WITH MACRON
+#	t LATIN SMALL LETTER T WITH MACRON
+#############################################################
+#Maltese	#ÀÁÂÈÉÊÌÍÎÒÓÔÙÚÛ#àáâèéêìíîòÓôùúû#ĊċĠġĦħŻżʼ
+#BosnianCyr	ЂЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшђјљњћџ
+#Scots	A
+#Scots1	ƷȜȝʒ
+###########################################
+#### Hiragana 3040-309F
+Hiragana	ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん
+Hiragana1	ゔ゙゚
+Hiragana2	゛゜ゝゞ
+#### Katakana 30A0-30FF
+Katakana	ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ
+Katakana1	ヷヸヹヺ
+Katakana2	・ーヽヾ
+############################################
+Albanian	ÂÇËâçë
+Bosnian	ĆćČčĐ𩹮ž
+Breton	ÂÊÑÙÜâêñùü
+Catalan	ÀÇÈÉÍÏÒÓÚÜàçèéíïòóúü
+#Catalan1	·Ŀŀ
+Croatian	ĆćČčĐ𩹮ž
+CroatianLig	DZDzdzDŽDždžLJLjljNJNjnj
+Czech	ÁÉÍÓÚÝáéíóúýČčĎďĚěŇňŘřŠšŤťŮůŽž
+Danish	ÁÄÅÆÉÓÖØÜáäåæéóöøü
+Dutch	ÀÂÄÆÇÈÉÊËÎÏÑÒÓÔÖÙÚÛÜàâäæçèéêëîïñòóôöùúûü
+Esperanto	ĈĉĜĝĤĥĴĵŜŝŬŭ
+Estonian	ÄÕÖÜäõöüŠšŽž
+Faroese	ÅÆÐÓÖØÚÝåæðóöøúý
+Finnish	ÄÅÖÜäåöü
+#Finnish1	ŠšŽž
+French(limited)	ÀÂÆÇÈÉÊËÎÏÑÔÙÛàâæçèéêëîïñôùûÿ
+French	ŒœŸ
+German	ÄÖÜßäöü
+Hungarian	ÁÉÍÓÖÚÜáéíóöúüŐőŰű
+Icelandic	ÁÆÉÍÐÓÖÚÝÞáæéíðóöúýþ
+Italian	ÀÈÉÌÍÏÒÓÙÚàèéìíïòóùú
+#Latin	A
+Latvian	ĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž
+Lithuanian	ĄąČčĖėĘęĮįŠšŪūŲųŽž
+Norwegian	ÅÆØåæø
+Polish	ÓóĄąĆćĘꣳŃńŚśŹźŻż
+Portuguese	ÀÁÂÃÇÉÊÍÓÔÕÚÜàáâãçéêíóôõúü
+#http://en.wikipedia.org/wiki/Special_Romanian_Unicode_characters
+Romanian	ÂÎâîĂăȘșȚț
+Romanian(ErrorST)	ÂÎâîĂ㪺Ţţ
+Slovak	ÁÄÉÍÓÔÚÝáäéíóôúýČčĎďĹ弾ŇňŔ੹ŤťŽž
+Slovene	Č芚Žž
+Sorbian-Lower	ĆćČčĚ죳ŃńŘřŚśŠšŹźŽž
+Sorbian-Upper	ÓóĆćČčĚ죳ŃńŘřŠšŽž
+Spanish	ÁÉÍÑÓÚÜáéíñóúü
+Swedish	ÄÅÖäåö
+Turkish	ÂÇÖÛÜâçöûüĞğİı
+Welsh	ÀÁÂÄÈÉÊËÌÍÎÏÒÓÔÖÙÚÛÜÝàáâäèéêëìíîïòóôöùúûüýÿŴŵŶŷŸẀẁẂẃẄẅỲỳ
+##################################
+Belarusian	ЁІЎАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяёіў
+Bulgarian	АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя
+Bulgarian1	ЀҭѐѝѢѣѪѫ
+Macedonian	ЃЅЈЉЊЌЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшѓѕјљњќџ
+Russian	ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё
+RussianOLD	ІіѢѣѲѳѴѵ
+Serbian	ЂЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшђјљњћџ
+Ukrainian	ЄІЇАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЭЮЯабвгдежзийклмнопрстуфхцчшщьэюяєіїҐґ
+##################################
+Armenian	ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՑՒՓՔՕՖ՛՜՝՞՟աբգդեֆ։
+#Armenian1	՚֊
+#Characters not found in the UCS:
+#	ARMENIAN ETERNITY SIGN
+#
+GeorgianOld	ႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅ
+Georgian	აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ
+GeorgianArc	ჱჲჳჴჵჶ
+GeorgianPunc	჻
+#
+GreekExt1	΄΅Ά·ΈΉΊ»Ό½ΎΏΐ
+Greek	ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρστυφχψω
+GreekExt2	ΪΫάέήίΰϊϋόύώ
+GreekExt4	ς
+#
+Hebrew	אבגדהוזחטיךכלםמןנסעףפץצקרשת
+##################################
+#Abaza
+#Abkhaz
+#Adyghe
+#Agul *
+#(Aisor)
+#Akhvakh *
+#(?lvdalska)
+#(Andi) *
+#(Aragonese)
+#Archi *
+#Arumanian
+#(Arvanite)
+#Asturian
+#Avar
+#Azerbaijani
+#(Bagulal) *
+#Balkar
+#Bashkir
+#Basque			!
+#Bats *
+#Bezhta *
+#(Botlikh) *
+#Budukh *
+#(Chamalal)
+#Chechen
+#Chuvash
+#Cornish		!
+#(Corsican)
+#Dargwa
+#Erzya
+#(Franco-Proven?al)
+#(Frisian, East)
+#(Frisian, North)
+#Frisian, West
+#Friulian
+#Gagauz
+#Gaelic, Irish		!
+#Gaelic, Manx		!
+#Gaelic, Scottish	!
+#Galician		!
+#(German, Low)		!
+#(German, Swiss)	!
+#Godoberi *
+#(Hinukh) *
+#(Hunzib) *
+#Ingrian
+#Ingush
+#Istro-Romanian
+#(Judeo-Georgian)
+#(Judeo-Kurdish)
+#(Judeo-Tati)
+#Kabardian
+#Kalmyk
+#Karachay
+#(Karaim)
+#(Karata) *
+#Karelian
+#Kashubian
+#Kazakh
+#Khinalug
+#(Khvarshi) *
+#(Kirmanji)
+#Komi
+#Komi-Permyak
+#(Kryts)
+#Kumyk
+#(Kurdish)
+#(Ladin)
+#(Ladino)
+#Lak
+#Laz
+#Lezgian
+#Livonian
+#(Ludian)
+#Luxemburgish		!
+#Mari, Hill
+#Mari, Meadow
+#Megleno-Romanian
+#(Mingrelian)
+#Moksha
+#Moldavian
+#Nenets, Tundra
+#Nogai
+#Occitan
+#Old Church Slavonic
+#(Olonets)
+#Ossetian
+#(Romani)
+#Romansch
+#(Rusyn)
+#Rutul
+#Sami, Inari
+#Sami, Kildin
+#Sami, Lule
+#Sami, Northern
+#Sami, Skolt
+#Sami, Southern
+#(Sami, Ter) *
+#(Sami, Ume) *
+#(Sardinian) *
+#Scots			!
+#Svan
+#Tabasaran
+#(Talysh)
+#Tatar, Crimean
+#Tatar, Kazan
+#Tati
+#(Tindi) *
+#(Tsakonian) *
+#Tsakhur *
+#(Tsez) *
+#(Turkish, Crimean)
+#Ubykh *
+#Udi
+#Udmurt
+#(V?mhusm?l)
+#Vepsian
+#Votic
+#(Walloon)
+#(Yiddish) 
+################################
+#      4	Gaelic-new-orthography
+#      4	Frisian
+#      3	Rhaeto-Romanic
+#      2	S&AACUTEmi-with-restrictions
+#      1	Rhjaeto-Romanic
+#      1	Gaelic-old-and-new-orthographies
+END
+) | 
+
+while read a b
+do
+  c=`echo $b | replace "&#x" "" ";" ""`
+  printf "INSERT INTO lang VALUES ('$a',_ucs2 X'$c');\n"
+done
+) | mysql -f test
+
+#mysql test << END
+#SELECT * FROM lang WHERE CONVERT(letters USING latin1) NOT LIKE _binary'%?%';
+#SELECT * FROM lang WHERE CONVERT(letters USING latin2) NOT LIKE _binary'%?%';
+#END
+
+
+
+list="big5 dec8 cp850 hp8 koi8r latin1 latin2 swe7 ascii ujis sjis hebrew euckr koi8u gb2312 greek cp1250 gbk latin5 armscii8 cp866 keybcs2 macce macroman cp852 latin7 cp1251 cp1256 cp1257 geostd8"
+
+for p in $list
+do
+echo "-----------------"
+echo $p:
+mysql  -sss test << END
+SELECT lang FROM lang WHERE CONVERT(letters USING $p) NOT LIKE _binary'%?%' ORDER BY lang;
+END
+done
+
diff --git a/sql/share/charsets/latin1.xml b/sql/share/charsets/latin1.xml
new file mode 100644
index 00000000..f88a2840
--- /dev/null
+++ b/sql/share/charsets/latin1.xml
@@ -0,0 +1,253 @@
+
+
+
+
+
+ Copyright (c) 2003, 2005 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+ 00
+ 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
+ 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
+ 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
+ 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20
+ 10 00 10 02 10 10 10 10 10 10 01 10 01 00 01 00
+ 00 10 10 10 10 10 10 10 10 10 02 10 02 00 02 01
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 10 01 01 01 01 01 01 01 02
+ 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 10 02 02 02 02 02 02 02 02
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F
+ 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC FD FE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF
+
+
+
+
+
+
+ 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+ 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+ 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+ 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+ 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+ 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
+ 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+ 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
+ 20AC 0081 201A 0192 201E 2026 2020 2021 02C6 2030 0160 2039 0152 008D 017D 008F
+ 0090 2018 2019 201C 201D 2022 2013 2014 02DC 2122 0161 203A 0153 009D 017E 0178
+ 00A0 00A1 00A2 00A3 00A4 00A5 00A6 00A7 00A8 00A9 00AA 00AB 00AC 00AD 00AE 00AF
+ 00B0 00B1 00B2 00B3 00B4 00B5 00B6 00B7 00B8 00B9 00BA 00BB 00BC 00BD 00BE 00BF
+ 00C0 00C1 00C2 00C3 00C4 00C5 00C6 00C7 00C8 00C9 00CA 00CB 00CC 00CD 00CE 00CF
+ 00D0 00D1 00D2 00D3 00D4 00D5 00D6 00D7 00D8 00D9 00DA 00DB 00DC 00DD 00DE 00DF
+ 00E0 00E1 00E2 00E3 00E4 00E5 00E6 00E7 00E8 00E9 00EA 00EB 00EC 00ED 00EE 00EF
+ 00F0 00F1 00F2 00F3 00F4 00F5 00F6 00F7 00F8 00F9 00FA 00FB 00FC 00FD 00FE 00FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ 41 41 41 41 5C 5B 5C 43 45 45 45 45 49 49 49 49
+ 44 4E 4F 4F 4F 4F 5D D7 D8 55 55 55 59 59 DE DF
+ 41 41 41 41 5C 5B 5C 43 45 45 45 45 49 49 49 49
+ 44 4E 4F 4F 4F 4F 5D F7 D8 55 55 55 59 59 DE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ 41 41 41 41 41 41 41 43 45 45 45 45 49 49 49 49
+ D0 4E 4F 4F 4F 4F 4F D7 4F 55 55 55 55 59 DE 53
+ 41 41 41 41 41 41 41 43 45 45 45 45 49 49 49 49
+ D0 4E 4F 4F 4F 4F 4F F7 4F 55 55 55 55 59 DE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ 41 41 41 41 5B 5D 5B 43 45 45 45 45 49 49 49 49
+ 44 4E 4F 4F 4F 4F 5C D7 5C 55 55 55 59 59 DE DF
+ 41 41 41 41 5B 5D 5B 43 45 45 45 45 49 49 49 49
+ 44 4E 4F 4F 4F 4F 5C F7 5C 55 55 55 59 59 DE FF
+
+
+
+
+
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81
+ 8F 91 93 95 98 9A A4 A6 A8 AA AF B3 B4 B5 B6 B7
+ B8 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81
+ 8F 91 93 95 98 9A A4 A6 A8 AA AF B9 BA BB BC BF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+ 43 45 47 49 4B 4D 4F 55 5D 5F 61 63 6D 6F 71 73
+ 59 7F 83 85 87 89 8B BD 8D 9C 9E A0 A2 AC B1 97
+ 43 45 47 49 4B 4D 4F 55 5D 5F 61 63 6D 6F 71 73
+ 59 7F 83 85 87 89 8B BE 8D 9C 9E A0 A2 AC B1 AE
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81
+ 8F 91 93 95 98 9A A4 A6 A8 AA AF B3 B4 B5 B6 B7
+ B8 42 52 54 58 5C 66 68 6A 6C 76 78 7A 7C 7E 82
+ 90 92 94 96 99 9B A5 A7 A9 AB B0 B9 BA BB BC BF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+ 43 45 47 49 4B 4D 4F 55 5D 5F 61 63 6D 6F 71 73
+ 59 7F 83 85 87 89 8B BD 8D 9C 9E A0 A2 AC B1 97
+ 44 46 48 4A 4C 4E 50 56 5E 60 62 64 6E 70 72 74
+ 5A 80 84 86 88 8A 8C BE 8E 9D 9F A1 A3 AD B2 AE
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81
+ 8F 91 93 95 98 9A A4 A6 A8 AA AF B3 B4 B5 B6 B7
+ B8 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81
+ 8F 91 93 95 98 9A A4 A6 A8 AA AF B9 BA BB BC BF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+ 41 41 41 41 41 41 41 53 5B 5B 5B 5B 6B 6B 6B 6B
+ 57 7F 81 81 81 81 81 BD 81 9A 9A 9A 9A AA B1 97
+ 41 41 41 41 41 41 41 53 5B 5B 5B 5B 6B 6B 6B 6B
+ 57 7F 81 81 81 81 81 BE 81 9A 9A 9A 9A AA B1 AA
+
+
+
+
+
+
diff --git a/sql/share/charsets/latin2.xml b/sql/share/charsets/latin2.xml
new file mode 100644
index 00000000..20e7803c
--- /dev/null
+++ b/sql/share/charsets/latin2.xml
@@ -0,0 +1,186 @@
+
+
+
+
+
+ Copyright (c) 2003, 2005 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+ 00
+ 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
+ 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
+ 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
+ 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 48 01 10 01 10 01 01 10 10 01 01 01 01 10 01 01
+ 10 02 10 02 10 02 02 10 10 02 02 02 02 10 02 02
+ 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ 10 01 01 01 01 01 01 10 01 01 01 01 01 01 01 10
+ 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 10 02 02 02 02 02 02 02 10
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F
+ 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 B1 A2 B3 A4 B5 B6 A7 A8 B9 BA BB BC AD BE BF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC FD FE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 A1 B2 A3 B4 A5 A6 B7 B8 A9 AA AB AC BD AE AF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF
+
+
+
+
+
+
+0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
+0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
+0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
+0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
+00A0 0104 02D8 0141 00A4 013D 015A 00A7 00A8 0160 015E 0164 0179 00AD 017D 017B
+00B0 0105 02DB 0142 00B4 013E 015B 02C7 00B8 0161 015F 0165 017A 02DD 017E 017C
+0154 00C1 00C2 0102 00C4 0139 0106 00C7 010C 00C9 0118 00CB 011A 00CD 00CE 010E
+0110 0143 0147 00D3 00D4 0150 00D6 00D7 0158 016E 00DA 0170 00DC 00DD 0162 00DF
+0155 00E1 00E2 0103 00E4 013A 0107 00E7 010D 00E9 0119 00EB 011B 00ED 00EE 010F
+0111 0144 0148 00F3 00F4 0151 00F6 00F7 0159 016F 00FA 0171 00FC 00FD 0163 02D9
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 44 45 48 49 4B 4C 4D 4E 4F 50 51 53 54 56
+ 58 59 5A 5B 5E 5F 60 61 62 63 64 68 69 6A 6B 6C
+ 6D 41 44 45 48 49 4B 4C 4D 4E 4F 50 51 53 54 56
+ 58 59 5A 5B 5E 5F 60 61 62 63 64 6E 6F 70 71 FF
+ FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF
+ FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF
+ FF 42 FF 52 FF 51 5C FF FF 5D 5B 5E 65 FF 67 66
+ FF 42 FF 52 FF 51 5C FF FF 5D 5B 5E 65 FF 67 66
+ 5A 43 43 43 43 51 46 45 47 49 4A 49 49 4E 4E 48
+ FF 55 54 57 56 56 56 FF 5A 5F 5F 5F 5F 63 5E FF
+ 5A 43 43 43 43 51 46 45 47 49 4A 49 49 4E 4E 48
+ FF 55 54 57 56 56 56 FF 5A 5F 5F 5F 5F 63 5E FF
+
+
+
+
+
+
+00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+40 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59
+5B 5C 5D 5F 62 64 66 67 68 69 6B C6 C7 C8 C9 CA
+CB 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59
+5B 5C 5D 5F 62 64 66 67 68 69 6B CC CD CE CF D0
+D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF E0
+E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF F0
+F1 41 F2 54 F3 54 5F F4 F5 61 5F 62 6B F6 8E 6B
+F7 41 F8 54 F9 54 5F FA FB 61 5F 62 6B FC 8E 6B
+5D 41 41 41 41 54 47 44 46 4B 4B 4B 4B 50 50 48
+4A 57 57 59 59 59 59 FD 5D 64 64 64 64 69 62 5F
+5D 41 41 41 41 54 47 44 46 4B 4B 4B 4B 50 50 48
+4A 57 57 59 59 59 59 FE 5D 64 64 64 64 69 62 FF
+
+
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 47 48 4C 4E 53 54 55 56 5A 5B 5C 60 61 64
+ 69 6A 6B 6E 72 75 7A 7B 7C 7D 7F 83 84 85 86 87
+ 88 41 47 48 4C 4E 53 54 55 56 5A 5B 5C 60 61 64
+ 69 6A 6B 6E 72 75 7A 7B 7C 7D 7F 89 8A 8B 8C 00
+ 01 78 4E 04 05 06 07 08 09 0A 67 67 56 56 0F 41
+ 4E 12 13 67 67 64 78 75 78 67 78 1C 1D 1E 1F FF
+ 41 56 64 75 5E 6F FF 67 FF 70 71 73 80 FF 81 82
+ FF 42 FF 5D FF 41 6F FF FF 70 71 73 80 FF 81 82
+ 6C 41 44 45 46 5F 49 4B 4A 4E 51 52 50 56 57 4D
+ FF 62 63 64 66 67 67 FF 6D 77 75 78 78 7E 74 FF
+ 64 41 44 45 46 5F 49 4B 4A 4E 51 78 50 56 58 4D
+ FF 62 63 64 66 67 67 FF 6D 77 75 78 78 7E 74 FF
+
+
+
+
+
+
+
+
diff --git a/sql/share/charsets/latin5.xml b/sql/share/charsets/latin5.xml
new file mode 100644
index 00000000..727019cc
--- /dev/null
+++ b/sql/share/charsets/latin5.xml
@@ -0,0 +1,139 @@
+
+
+
+
+
+ Copyright (c) 2003, 2005 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+ 00
+ 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
+ 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
+ 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
+ 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 10 01 01 01 01 01 01 01 02
+ 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 10 02 02 02 02 02 02 02 02
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 61 62 63 64 65 66 67 68 FD 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F
+ 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC 69 FE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 DD 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC 49 DE FF
+
+
+
+
+
+
+0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
+0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
+0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
+0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
+00A0 00A1 00A2 00A3 00A4 00A5 00A6 00A7 00A8 00A9 00AA 00AB 00AC 00AD 00AE 00AF
+00B0 00B1 00B2 00B3 00B4 00B5 00B6 00B7 00B8 00B9 00BA 00BB 00BC 00BD 00BE 00BF
+00C0 00C1 00C2 00C3 00C4 00C5 00C6 00C7 00C8 00C9 00CA 00CB 00CC 00CD 00CE 00CF
+011E 00D1 00D2 00D3 00D4 00D5 00D6 00D7 00D8 00D9 00DA 00DB 00DC 0130 015E 00DF
+00E0 00E1 00E2 00E3 00E4 00E5 00E6 00E7 00E8 00E9 00EA 00EB 00EC 00ED 00EE 00EF
+011F 00F1 00F2 00F3 00F4 00F5 00F6 00F7 00F8 00F9 00FA 00FB 00FC 0131 015F 00FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 45 46 47 48 4A 4B 4D 4E 4F 50 51 52
+ 54 55 56 57 59 5A 5C 5D 5E 5F 60 61 62 63 64 65
+ 66 41 42 43 45 46 47 48 4A 4C 4D 4E 4F 50 51 52
+ 54 55 56 57 59 5A 5C 5D 5E 5F 60 87 88 89 8A 8B
+ 8C 8D 8E 8F 90 91 92 93 94 95 96 97 98 99 9A 9B
+ 9C 9D 9E 9F A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB
+ AC AD AE AF B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB
+ BC BD BE BF C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB
+ 41 41 41 41 41 41 41 44 46 46 46 46 4C 4C 4C 4C
+ 49 51 52 52 52 52 53 E0 52 5A 5A 5A 5B 4C 58 57
+ 41 41 41 41 41 41 41 44 46 46 46 46 4C 4C 4C 4C
+ 49 51 52 52 52 52 53 FA 52 5A 5A 5A 5B 4B 58 5F
+
+
+
+
+
+
+
+
diff --git a/sql/share/charsets/latin7.xml b/sql/share/charsets/latin7.xml
new file mode 100644
index 00000000..78b7dda6
--- /dev/null
+++ b/sql/share/charsets/latin7.xml
@@ -0,0 +1,187 @@
+
+
+
+
+
+ Copyright (C) 2003 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+ 00
+ 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
+ 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
+ 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
+ 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20
+ 01 20 10 20 10 10 00 00 20 10 20 10 20 10 10 10
+ 20 10 10 10 10 10 10 10 20 00 20 10 20 10 10 20
+ 48 20 10 10 10 20 10 10 10 10 01 10 10 10 10 01
+ 10 10 10 10 10 10 10 10 10 10 02 10 10 10 10 02
+ 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 10 01 01 01 01 01 01 01 02
+ 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 10 02 02 02 02 02 02 02 10
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F
+ 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 B8 A9 BA AB AC AD AE BF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 D7 F8 F9 FA FB FC FD FE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 A8 B9 AA BB BC BD BE AF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 F7 D8 D9 DA DB DC DD DE FF
+
+
+
+
+
+
+0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
+0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
+0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 008A 008B 008C 008D 008E 008F
+0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 009A 009B 009C 009D 009E 009F
+00A0 201D 00A2 00A3 00A4 201E 00A6 00A7 00D8 00A9 0156 00AB 00AC 00AD 00AE 00C6
+00B0 00B1 00B2 00B3 201C 00B5 00B6 00B7 00F8 00B9 0157 00BB 00BC 00BD 00BE 00E6
+0104 012E 0100 0106 00C4 00C5 0118 0112 010C 00C9 0179 0116 0122 0136 012A 013B
+0160 0143 0145 00D3 014C 00D5 00D6 00D7 0172 0141 015A 016A 00DC 017B 017D 00DF
+0105 012F 0101 0107 00E4 00E5 0119 0113 010D 00E9 017A 0117 0123 0137 012B 013C
+0161 0144 0146 00F3 014D 00F5 00F6 00F7 0173 0142 015B 016B 00FC 017C 017E 2019
+
+
+
+
+
+
+ 00 02 03 04 05 06 07 08 09 2E 2F 30 31 32 0A 0B
+ 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B
+ 2C 33 34 35 36 37 38 27 39 3A 3B 5D 3C 28 3D 3E
+ 76 7A 7C 7E 80 81 82 83 84 85 3F 40 5E 5F 60 41
+ 42 86 90 92 98 9A A4 A6 AA AC B2 B4 B8 BE C0 C6
+ CE D0 D2 D6 E5 E8 EE F0 FA FC DD 43 44 45 46 47
+ 48 87 91 93 99 9B A5 A7 AB AD B3 B5 B9 BF C1 C7
+ CF D1 D3 D7 E6 E9 EF F1 FB FD DE 49 4A 4B 4C 1C
+ 01 1D 57 1E 5A 74 71 72 1F 75 20 5B 21 4E 52 51
+ 22 55 56 58 59 73 2A 2B 23 E7 24 5C 25 4F 54 26
+ 2D FE 66 67 68 FF 4D 69 CC 6A D4 62 6B 29 6C 8E
+ 6D 61 7D 7F 50 6E 6F 70 CD 7B D5 63 77 78 79 8F
+ 8C B0 88 94 F4 8A A2 A0 96 9C DF 9E A8 B6 AE BA
+ DB C2 C4 C8 CA F2 F6 64 EC BC D8 EA F8 E1 E3 DA
+ 8D B1 89 95 F5 8B A3 A1 97 9D E0 9F A9 B7 AF BB
+ DC C3 C5 C9 CB F3 F7 65 ED BD D9 EB F9 E2 E4 53
+
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 30 32 33 34 35 36 37 2B 38 39 3A 5C 3B 2C 3C 3D
+ 76 7A 7C 7E 80 81 82 83 84 85 3E 3F 5D 5E 5F 40
+ 41 86 92 94 9A 9C A6 A8 AC AE B4 B6 BA C0 C2 C8
+ D4 D6 D8 DC E3 E6 EE F0 F2 F4 F6 42 43 44 45 46
+ 47 87 93 95 9B 9D A7 A9 AD AF B5 B7 BB C1 C3 C9
+ D5 D7 D9 DD E4 E7 EF F1 F3 F5 F7 48 49 4A 4B 20
+ 75 21 56 22 59 73 70 71 23 74 24 5A 25 4D 51 50
+ 26 54 55 57 58 72 2E 2F 27 E5 28 5B 29 4E 53 2A
+ 31 FE 65 66 67 FF 4C 68 D3 69 DA 61 6A 2D 6B 90
+ 6C 60 7D 7F 4F 6D 6E 6F D2 7B DB 62 77 78 79 91
+ 8E B2 8A 96 88 8C A4 A2 98 9E F8 A0 AA B8 B0 BE
+ E1 C4 C6 CA CE D0 CC 63 EC BC DE EA E8 FA FC E0
+ 8F B3 8B 97 89 8D A5 A3 99 9F F9 A1 AB B9 B1 BF
+ E2 C5 C7 CB CF D1 CD 64 ED BD DF EB E9 FB FD 52
+
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 30 32 33 34 35 36 37 2B 38 39 3A 5C 3B 2C 3C 3D
+ 76 7A 7C 7E 80 81 82 83 84 85 3E 3F 5D 5E 5F 40
+ 41 86 92 94 9A 9C A6 A8 AC AE B4 B6 BA C0 C2 C8
+ D4 D6 D8 DC E3 E6 EE F0 F2 F4 F6 42 43 44 45 46
+ 47 86 92 94 9A 9C A6 A8 AC AE B4 B6 BA C0 C2 C8
+ D4 D6 D8 DC E2 E6 EE F0 F2 F4 F6 48 49 4A 4B 20
+ 75 21 56 22 59 73 70 71 23 74 24 5A 25 4D 51 50
+ 26 54 55 57 58 72 2E 2F 27 E5 28 5B 29 4E 53 2A
+ 31 FE 65 66 67 FF 4C 68 2D 69 DA 61 6A 2D 6B 90
+ 6C 60 7D 7F 4F 6D 6E 6F D3 7B DB 62 77 78 79 90
+ 8E B2 8A 96 88 8C A4 A2 98 9E F8 A0 AA B8 B0 BE
+ E1 C4 C6 CA CE D0 CC 63 EC BC DE EA E8 FA FC E0
+ 8E B2 8A 96 88 8C A4 A2 98 9E F8 A0 AA B8 B0 BE
+ E1 C4 C6 CA CE D0 CC 64 EC BC DE EA E8 FA FC 52
+
+
+
+
+
+
+
+
diff --git a/sql/share/charsets/macce.xml b/sql/share/charsets/macce.xml
new file mode 100644
index 00000000..2a601b7b
--- /dev/null
+++ b/sql/share/charsets/macce.xml
@@ -0,0 +1,207 @@
+
+
+
+
+
+ Copyright (C) 2003 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+ 00
+ 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
+ 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
+ 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
+ 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 00
+ 01 01 02 01 01 01 01 02 02 01 02 02 01 02 02 01
+ 02 01 02 02 01 02 01 02 02 02 02 02 02 01 02 02
+ 00 00 01 00 00 00 00 02 00 00 00 02 00 00 02 01
+ 02 01 00 00 02 01 00 00 02 01 02 01 02 01 02 01
+ 02 01 00 00 02 01 00 00 00 00 00 02 01 01 02 01
+ 00 00 00 00 00 00 00 00 02 01 02 01 00 00 02 01
+ 02 01 00 00 02 01 02 01 01 02 01 01 02 01 01 01
+ 02 01 01 02 01 02 01 02 01 02 02 01 01 02 01 00
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 54 75 76 77 78 79 7A 5B 5C 5D 5E 5F
+ 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 54 75 76 77 78 79 7A 7B 7C 7D 7E 7F
+ 8A 82 82 8E 88 9A 9F 87 88 8B 8A 8B 8D 8D 8E 90
+ 90 93 92 93 95 95 98 97 98 99 9A 9B 9C 9E 9E 9F
+ A0 A1 AB A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE B0
+ B0 B4 B2 B3 B4 FA B6 B7 B8 BA BA BC BC BE BE C0
+ C0 C4 C2 C3 C4 CB C6 C7 C8 C9 CA CB CE 9B CE D8
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 DA DA DE DC DD DE E0
+ E0 E4 E2 E3 E4 E6 E6 87 E9 E9 92 EC EC F0 97 99
+ F0 F3 9C F3 F5 F5 F7 F7 F9 F9 FA FD B8 FD AE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 74 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 74 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 81 83 84 85 86 E7 84 89 80 89 8C 8C 83 8F
+ 8F 91 EA 91 94 94 96 EE 96 EF 85 CD F2 9D 9D 86
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA A2 AC AD FE AF
+ AF B1 B2 B3 B1 B5 B6 B7 FC B9 B9 BB BB BD BD BF
+ BF C1 C2 C3 C1 C5 C6 C7 C8 C9 CA C5 CC CD CC CF
+ D0 D1 D2 D3 D4 D5 D6 D7 CF D9 D9 DB DC DD DB DF
+ DF E1 E2 E3 E1 E5 E5 E7 E8 E8 EA EB EB ED EE EF
+ ED F1 F2 F1 F4 F4 F6 F6 F8 F8 B5 FB FC FB FE FF
+
+
+
+
+
+
+ 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+ 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+ 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+ 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+ 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+ 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
+ 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+ 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
+ 00C4 0100 0101 00C9 0104 00D6 00DC 00E1 0105 010C 00E4 010D 0106 0107 00E9 0179
+ 017A 010E 00ED 010F 0112 0113 0116 00F3 0117 00F4 00F6 00F5 00FA 011A 011B 00FC
+ 2020 00B0 0118 00A3 00A7 2022 00B6 00DF 00AE 00A9 2122 0119 00A8 2260 0123 012E
+ 012F 012A 2264 2265 012B 0136 2202 2211 0142 013B 013C 013D 013E 0139 013A 0145
+ 0146 0143 00AC 221A 0144 0147 2206 00AB 00BB 2026 00A0 0148 0150 00D5 0151 014C
+ 2013 2014 201C 201D 2018 2019 00F7 25CA 014D 0154 0155 0158 2039 203A 0159 0156
+ 0157 0160 201A 201E 0161 015A 015B 00C1 0164 0165 00CD 017D 017E 016A 00D3 00D4
+ 016B 016E 00DA 016F 0170 0171 0172 0173 00DD 00FD 0137 017B 0141 017C 0122 02C7
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 46 47 4A 4C 52 53 55 56 5A 5B 5D 62 62 67
+ 6F 70 71 75 79 81 88 89 8A 8B 8D 90 91 92 93 94
+ 95 41 46 47 4A 4C 52 53 55 56 5A 5B 5D 62 62 67
+ 6F 70 71 75 79 81 88 89 8A 8B 8D 96 97 98 99 9A
+ 41 41 41 4C 41 67 81 41 41 47 41 47 47 47 4C 8D
+ 8D 4A 56 4A 4C 4C 4C 67 4C 67 67 67 81 4C 4C 81
+ A0 A1 4C A3 A4 A5 A6 75 A8 A9 AA 4C AC AD 53 56
+ 56 56 B2 B3 56 5B B6 B7 5D 5D 5D 5D 5D 5D 5D 62
+ 62 62 C2 C3 62 62 C6 C7 C8 C9 CA 62 67 67 67 67
+ D0 D1 D2 D3 D4 D5 D6 D7 67 71 71 71 DC DD 71 71
+ 71 75 E2 E3 75 75 75 41 79 79 56 8D 8D 81 67 67
+ 81 81 81 81 81 81 81 81 8B 8B 5B 8D 5D 8D 53 FF
+
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 4B 4D 53 57 63 65 69 6B 73 75 79 83 85 8D
+ 9B 9D 9F A7 AE B2 C0 C2 C4 C6 CA D2 D3 D4 D5 D6
+ D7 41 4B 4D 53 57 63 65 69 6B 73 75 79 83 85 8D
+ 9B 9D 9F A7 AE B2 C0 C2 C4 C6 CA D8 D9 DA DB DC
+ 41 41 41 57 41 8D B2 41 41 4D 41 4D 4D 4D 57 CA
+ CA 53 6B 53 57 57 57 8D 57 8D 8D 8D B2 57 57 B2
+ DD DE 57 DF E0 E1 E2 A7 E3 E4 E5 57 E6 E7 65 6B
+ 6B 6B E8 E9 6B 75 EA EB 79 79 79 79 79 79 79 85
+ 85 85 EC ED 85 85 EE EF F0 F1 F2 85 8D 8D 8D 8D
+ F3 F4 F5 F6 F7 F8 F9 FA 8D 9F 9F 9F FB FC 9F 9F
+ 9F A7 FD FE A7 A7 A7 41 BE BE 6B CA CA B2 8D 8D
+ B2 B2 B2 B2 B2 B2 B2 B2 C6 C6 75 CA 79 CA 65 FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 4B 4D 53 57 63 65 69 6B 73 75 79 83 85 8D
+ 9B 9D 9F A7 AE B2 C0 C2 C4 C6 CA D2 D3 D4 D5 D6
+ D7 41 4B 4D 53 57 63 65 69 6B 73 75 79 83 85 8D
+ 9B 9D 9F A7 AE B2 C0 C2 C4 C6 CA D8 D9 DA DB DC
+ 45 47 47 59 49 91 B6 43 49 4F 45 4F 51 51 59 CE
+ CE 55 71 55 5B 5B 5D 8F 5D 99 91 97 B8 5F 5F B6
+ DD DE 61 DF E0 E1 E2 AD E3 E4 E5 61 E6 E7 67 6F
+ 6F 6D E8 E9 6D 77 EA EB 7B 81 82 7F 7F 7D 7D 8B
+ 8B 87 EC ED 87 89 EE EF F0 F1 F2 89 93 97 93 95
+ F3 F4 F5 F6 F7 F8 F9 FA 95 A1 A1 A3 FB FC A3 A5
+ A5 A9 FD FE A9 AB AB 43 B0 B0 71 CC CC BC 8F 99
+ BC B4 B8 B4 BA BA BE BE C8 C8 77 D0 7B D0 67 FF
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 4B 4D 53 57 63 65 69 6B 73 75 79 83 85 8D
+ 9B 9D 9F A7 AE B2 C0 C2 C4 C6 CA D2 D3 D4 D5 D6
+ D7 42 4C 4E 54 58 64 66 6A 6C 74 76 7A 84 86 8E
+ 9C 9E A0 A8 AF B3 C1 C3 C5 C7 CB D8 D9 DA DB DC
+ 45 47 48 59 49 91 B6 44 4A 4F 46 50 51 52 5A CE
+ CF 55 72 56 5B 5C 5D 90 5E 9A 92 98 B8 5F 60 B7
+ DD DE 61 DF E0 E1 E2 AD E3 E4 E5 62 E6 E7 68 6F
+ 70 6D E8 E9 6E 77 EA EB 7C 81 82 7F 80 7D 7E 8B
+ 8C 87 EC ED 88 89 EE EF F0 F1 F2 8A 93 97 94 95
+ F3 F4 F5 F6 F7 F8 F9 FA 96 A1 A2 A3 FB FC A4 A5
+ A6 A9 FD FE AA AB AC 43 B0 B1 71 CC CD BC 8F 99
+ BD B4 B9 B5 BA BB BE BF C8 C9 78 D0 7B D1 67 FF
+
+
+
+
+
+
+
diff --git a/sql/share/charsets/macroman.xml b/sql/share/charsets/macroman.xml
new file mode 100644
index 00000000..711a3914
--- /dev/null
+++ b/sql/share/charsets/macroman.xml
@@ -0,0 +1,200 @@
+
+
+
+
+
+ Copyright (C) 2003 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+ 00
+ 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
+ 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
+ 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+ 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
+ 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
+ 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
+ 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 10
+ 20 01 01 01 01 01 01 02 02 02 02 02 02 02 02 02
+ 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
+ 00 00 00 00 00 00 00 02 00 00 00 00 00 00 01 01
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 02
+ 00 00 00 00 02 00 00 00 00 00 00 20 01 01 00 00
+ 00 00 00 00 00 00 00 00 02 01 00 00 00 00 00 00
+ 00 00 00 00 00 20 01 01 01 01 01 01 01 01 01 01
+ 00 01 01 01 01 02 00 00 00 00 00 00 00 00 00 00
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F
+ 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
+ 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F
+ 8A 8C 8D 8E 96 9A 9F 87 88 89 8A 8B 8C 8D 8E 8F
+ 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD BE BF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA 88 8B 9B CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D8 D8 DA DB DC DD DE DF
+ E0 E1 E2 E3 E4 89 90 87 91 8F 92 94 95 93 97 99
+ F0 98 9C 9E 9D F5 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
+ 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
+ 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
+ 80 81 82 83 84 85 86 E7 CB E5 80 CC 81 82 83 E9
+ E6 E8 EA ED EB EC 84 EE F1 EF 85 CD F2 F4 F3 86
+ A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
+ B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD AE AF
+ C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
+ D0 D1 D2 D3 D4 D5 D6 D7 D9 D9 DA DB DC DD DE DF
+ E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+ 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+ 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+ 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+ 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+ 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+ 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
+ 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+ 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
+ 00C4 00C5 00C7 00C9 00D1 00D6 00DC 00E1 00E0 00E2 00E4 00E3 00E5 00E7 00E9 00E8
+ 00EA 00EB 00ED 00EC 00EE 00EF 00F1 00F3 00F2 00F4 00F6 00F5 00FA 00F9 00FB 00FC
+ 2020 00B0 00A2 00A3 00A7 2022 00B6 00DF 00AE 00A9 2122 00B4 00A8 2260 00C6 00D8
+ 221E 00B1 2264 2265 00A5 00B5 2202 2211 220F 03C0 222B 00AA 00BA 03A9 00E6 00F8
+ 00BF 00A1 00AC 221A 0192 2248 2206 00AB 00BB 2026 00A0 00C0 00C3 00D5 0152 0153
+ 2013 2014 201C 201D 2018 2019 00F7 25CA 00FF 0178 2044 20AC 2039 203A FB01 FB02
+ 2021 00B7 201A 201E 2030 00C2 00CA 00C1 00CB 00C8 00CD 00CE 00CF 00CC 00D3 00D4
+ F8FF 00D2 00DA 00DB 00D9 0131 02C6 02DC 00AF 02D8 02D9 02DA 00B8 02DD 02DB 02C7
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 49 50 52 53 57 59 60 61 67 68 69 70 71 72
+ 79 80 81 82 84 85 90 91 92 93 95 A0 A1 A2 A3 A4
+ A5 41 49 50 52 53 57 59 60 61 67 68 69 70 71 72
+ 79 80 81 82 84 85 90 91 92 93 95 A6 A7 A8 A9 AA
+ 41 41 50 53 71 72 85 41 41 41 41 41 41 50 53 53
+ 53 53 61 61 61 61 71 72 72 72 72 72 85 85 85 85
+ AB AC AD AE AF B0 B1 82 B2 B3 B4 B5 B6 B7 48 72
+ B8 B9 BA BB BC BD BE BF C0 C1 C2 C3 C4 C5 48 72
+ C6 C7 C8 C9 57 CA CB CC CD CE CF 41 41 72 D0 D1
+ D2 D3 D4 D5 D6 D7 D8 D9 93 93 DA DB DC DD DE DF
+ E0 E1 E2 E3 E4 41 53 41 53 53 61 61 61 61 72 72
+ F0 72 85 85 85 61 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 51 53 57 59 63 66 68 6A 75 77 79 7B 7D 81
+ 91 93 95 97 9A 9C A6 A8 AA AC B0 B2 B3 B4 B5 B6
+ B7 41 51 53 57 59 63 66 68 6A 75 77 79 7B 7D 81
+ 91 93 95 97 9A 9C A6 A8 AA AC B0 B8 B9 BA BB BC
+ 41 41 53 59 7D 81 9C 41 41 41 41 41 41 53 59 59
+ 59 59 6A 6A 6A 6A 7D 81 81 81 81 81 9C 9C 9C 9C
+ BD BE BF C0 C1 C2 C3 97 C4 C5 C6 C7 C8 C9 41 81
+ CA CB CC CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 41 81
+ D8 D9 DA DB 63 DC DD DE DF E0 E1 41 41 81 81 81
+ E2 E3 E4 E5 E6 E7 E8 E9 AC AC EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 41 59 41 59 59 6A 6A 6A 6A 81 81
+ F0 81 9C 9C 9C 6A F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 51 53 57 59 63 66 68 6A 75 77 79 7B 7D 81
+ 91 93 95 97 9A 9C A6 A8 AA AC B0 B2 B3 B4 B5 B6
+ B7 41 51 53 57 59 63 66 68 6A 75 77 79 7B 7D 81
+ 91 93 95 97 9A 9C A6 A8 AA AC B0 B8 B9 BA BB BC
+ 4B 4D 55 5D 7F 8B A4 45 43 47 4B 49 4D 55 5D 5B
+ 5F 61 6E 6C 70 72 7F 85 83 87 8B 89 A0 9E A2 A4
+ BD BE BF C0 C1 C2 C3 99 C4 C5 C6 C7 C8 C9 4F 8D
+ CA CB CC CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 4F 8D
+ D8 D9 DA DB 65 DC DD DE DF E0 E1 43 49 89 8F 8F
+ E2 E3 E4 E5 E6 E7 E8 E9 AE AE EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 47 5F 45 61 5B 6E 70 70 6C 85 87
+ F0 83 A0 A2 9E 72 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+ 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
+ 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
+ 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
+ 40 41 51 53 57 59 63 66 68 6A 75 77 79 7B 7D 81
+ 91 93 95 97 9A 9C A6 A8 AA AC B0 B2 B3 B4 B5 B6
+ B7 42 52 54 58 5A 64 67 69 6B 76 78 7A 7C 7E 82
+ 92 94 96 98 9B 9D A7 A9 AB AD B1 B8 B9 BA BB BC
+ 4B 4D 55 5D 7F 8B A4 46 44 48 4C 4A 4E 56 5E 5C
+ 60 62 6F 6D 71 73 80 86 84 88 8C 8A A1 9F A3 A5
+ BD BE BF C0 C1 C2 C3 99 C4 C5 C6 C7 C8 C9 4F 8D
+ CA CB CC CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 50 8E
+ D8 D9 DA DB 65 DC DD DE DF E0 E1 43 49 89 8F 90
+ E2 E3 E4 E5 E6 E7 E8 E9 AF AE EA EB EC ED EE EF
+ F0 F1 F2 F3 F4 47 5F 45 61 5B 6E 70 72 6C 85 87
+ F0 83 A0 A2 9E 74 F6 F7 F8 F9 FA FB FC FD FE FF
+
+
+
+
+
+
diff --git a/sql/share/charsets/swe7.xml b/sql/share/charsets/swe7.xml
new file mode 100644
index 00000000..513c3eeb
--- /dev/null
+++ b/sql/share/charsets/swe7.xml
@@ -0,0 +1,141 @@
+
+
+
+
+
+ Copyright (C) 2003 MySQL AB
+   Use is subject to license terms
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+
+
+
+
+
+  00
+  20  20  20  20  20  20  20  20  20  28  28  28  28  28  20  20
+  20  20  20  20  20  20  20  20  20  20  20  20  20  20  20  20
+  48  10  10  10  10  10  10  10  10  10  10  10  10  10  10  10
+  84  84  84  84  84  84  84  84  84  84  10  10  10  10  10  10
+  01  81  81  81  81  81  81  01  01  01  01  01  01  01  01  01
+  01  01  01  01  01  01  01  01  01  01  01  01  01  01  01  10
+  01  82  82  82  82  82  82  02  02  02  02  02  02  02  02  02
+  02  02  02  02  02  02  02  02  02  02  02  02  02  02  02  20
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00  00
+
+
+
+
+
+
+  00  01  02  03  04  05  06  07  08  09  0A  0B  0C  0D  0E  0F
+  10  11  12  13  14  15  16  17  18  19  1A  1B  1C  1D  1E  1F
+  20  21  22  23  24  25  26  27  28  29  2A  2B  2C  2D  2E  2F
+  30  31  32  33  34  35  36  37  38  39  3A  3B  3C  3D  3E  3F
+  60  61  62  63  64  65  66  67  68  69  6A  6B  6C  6D  6E  6F
+  70  71  72  73  74  75  76  77  78  79  7A  7B  7C  7D  7E  5F
+  60  61  62  63  64  65  66  67  68  69  6A  6B  6C  6D  6E  6F
+  70  71  72  73  74  75  76  77  78  79  7A  7B  7C  7D  7E  7F
+  80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F
+  90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F
+  A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF
+  B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF
+  C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF
+  D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF
+  E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF
+  F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF
+
+
+
+
+
+
+  00  01  02  03  04  05  06  07  08  09  0A  0B  0C  0D  0E  0F
+  10  11  12  13  14  15  16  17  18  19  1A  1B  1C  1D  1E  1F
+  20  21  22  23  24  25  26  27  28  29  2A  2B  2C  2D  2E  2F
+  30  31  32  33  34  35  36  37  38  39  3A  3B  3C  3D  3E  3F
+  40  41  42  43  44  45  46  47  48  49  4A  4B  4C  4D  4E  4F
+  50  51  52  53  54  55  56  57  58  59  5A  5B  5C  5D  5E  5F
+  40  41  42  43  44  45  46  47  48  49  4A  4B  4C  4D  4E  4F
+  50  51  52  53  54  55  56  57  58  59  5A  5B  5C  5D  5E  7F
+  80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F
+  90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F
+  A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF
+  B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF
+  C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF
+  D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF
+  E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF
+  F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF
+
+
+
+
+
+
+0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
+0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
+0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
+0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
+00C9 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
+0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 00C4 00D6 00C5 00DC 005F
+00E9 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
+0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 00E4 00F6 00E5 00FC 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+
+
+
+
+
+
+  00  01  02  03  04  05  06  07  08  09  0A  0B  0C  0D  0E  0F
+  10  11  12  13  14  15  16  17  18  19  1A  1B  1C  1D  1E  1F
+  20  21  22  23  24  25  26  27  28  29  2A  2B  2C  2D  2E  2F
+  30  31  32  33  34  35  36  37  38  39  3A  3B  3C  3D  3E  3F
+  45  41  42  43  44  45  46  47  48  49  4A  4B  4C  4D  4E  4F
+  50  51  52  53  54  55  56  57  58  59  5A  5C  5D  5B  59  5F
+  45  41  42  43  44  45  46  47  48  49  4A  4B  4C  4D  4E  4F
+  50  51  52  53  54  55  56  57  58  59  5A  5C  5D  5B  59  7F
+  80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F
+  90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F
+  A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF
+  B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF
+  C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF
+  D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF
+  E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF
+  F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF
+
+
+
+
+
+
+
+
+
+
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
new file mode 100644
index 00000000..0922beba
--- /dev/null
+++ b/sql/share/errmsg-utf8.txt
@@ -0,0 +1,10761 @@
+languages bulgarian=bgn, chinese=chi, czech=cze, danish=dan, dutch=nla, english=eng, estonian=est, french=fre, georgian=geo, german=ger, greek=greek, hindi=hindi, hungarian=hun, italian=ita, japanese=jpn, korean=kor, norwegian-ny=norwegian-ny, norwegian=nor, polish=pol, portuguese=por, romanian=rum, russian=rus, serbian=serbian, slovak=slo, spanish=spa, swedish=swe, ukrainian=ukr;
+
+default-language eng
+
+start-error-number 1000
+
+ER_HASHCHK  
+        eng "hashchk"
+ER_NISAMCHK  
+        eng "isamchk"
+ER_NO  
+        chi "不"
+        cze "NE"
+        dan "NEJ"
+        eng "NO"
+        est "EI"
+        fre "NON"
+        ger "Nein"
+        geo "არა"
+        greek "ΟΧΙ"
+        hindi "नहीं"
+        hun "NEM"
+        kor "아니오"
+        nla "NEE"
+        nor "NEI"
+        norwegian-ny "NEI"
+        pol "NIE"
+        por "NÃO"
+        rum "NU"
+        rus "НЕТ"
+        serbian "NE"
+        slo "NIE"
+        ukr "НІ"
+ER_YES  
+        chi "是的"
+        cze "ANO"
+        dan "JA"
+        eng "YES"
+        est "JAH"
+        fre "OUI"
+        ger "Ja"
+        geo "დიახ"
+        greek "ΝΑΙ"
+        hindi "हाँ"
+        hun "IGEN"
+        ita "SI"
+        kor "예"
+        nla "JA"
+        nor "JA"
+        norwegian-ny "JA"
+        pol "TAK"
+        por "SIM"
+        rum "DA"
+        rus "ДА"
+        serbian "DA"
+        slo "Áno"
+        spa "SÍ"
+        ukr "ТАК"
+ER_CANT_CREATE_FILE  
+        chi "无法创建文件'%-.200s'(错误号码:%M)"
+        cze "Nemohu vytvořit soubor '%-.200s' (chybový kód: %M)"
+        dan "Kan ikke oprette filen '%-.200s' (Fejlkode: %M)"
+        eng "Can't create file '%-.200s' (errno: %M)"
+        est "Ei suuda luua faili '%-.200s' (veakood: %M)"
+        fre "Ne peut créer le fichier '%-.200s' (Errcode: %M)"
+        ger "Kann Datei '%-.200s' nicht erzeugen (Fehler: %M)"
+        geo "ფაილის '%-.200s' შექმნა შეუძლებელია (შეცდომა: %M)"
+        greek "Αδύνατη η δημιουργία του αρχείου '%-.200s' (κωδικός λάθους: %M)"
+        hindi "फ़ाइल '%-.200s' नहीं बन सका (errno: %M)"
+        hun "A '%-.200s' file nem hozhato letre (hibakod: %M)"
+        ita "Impossibile creare il file '%-.200s' (errno: %M)"
+        jpn "ファイル '%-.200s' を作成できません。(エラー番号: %M)"
+        kor "화일 '%-.200s'를 만들지 못했습니다. (에러번호: %M)"
+        nla "Kan file '%-.200s' niet aanmaken (Errcode: %M)"
+        nor "Kan ikke opprette fila '%-.200s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje opprette fila '%-.200s' (Feilkode: %M)"
+        pol "Nie można stworzyć pliku '%-.200s' (Kod błędu: %M)"
+        por "Não pode criar o arquivo '%-.200s' (erro no. %M)"
+        rum "Nu pot sa creez fisierul '%-.200s' (Eroare: %M)"
+        rus "Невозможно создать файл '%-.200s' (ошибка: %M)"
+        serbian "Ne mogu da kreiram file '%-.200s' (errno: %M)"
+        slo "Nemôžem vytvoriť súbor '%-.200s' (chybový kód: %M)"
+        spa "No puedo crear el fichero/archivo '%-.200s' (Error: %M)"
+        swe "Kan inte skapa filen '%-.200s' (Felkod: %M)"
+        ukr "Не можу створити файл '%-.200s' (помилка: %M)"
+ER_CANT_CREATE_TABLE  
+        chi "无法创建表%`s.%`s(错误号码:%M)"
+        cze "Nemohu vytvořit tabulku %`s.%`s (chybový kód: %M)"
+        dan "Kan ikke oprette tabellen %`s.%`s (Fejlkode: %M)"
+        eng "Can't create table %`s.%`s (errno: %M)"
+        est "Ei suuda luua tabelit %`s.%`s (veakood: %M)"
+        fre "Ne peut créer la table %`s.%`s (Errcode: %M)"
+        ger "Kann Tabelle %`s.%`s nicht erzeugen (Fehler: %M)"
+        geo "შეცდომა ცხრილის (%`s.%`s) შექმნისას (შეცდ. კოდი: %M)"
+        greek "Αδύνατη η δημιουργία του πίνακα %`s.%`s (κωδικός λάθους: %M)"
+        hindi "टेबल '%`s.%`s' नहीं बन सका (errno: %M)"
+        hun "A %`s.%`s tabla nem hozhato letre (hibakod: %M)"
+        ita "Impossibile creare la tabella %`s.%`s (errno: %M)"
+        jpn "%`s.%`s テーブルが作れません.(errno: %M)"
+        kor "테이블 %`s.%`s를 만들지 못했습니다. (에러번호: %M)"
+        nla "Kan tabel %`s.%`s niet aanmaken (Errcode: %M)"
+        nor "Kan ikke opprette tabellen %`s.%`s (Feilkode: %M)"
+        norwegian-ny "Kan ikkje opprette tabellen %`s.%`s (Feilkode: %M)"
+        pol "Nie można stworzyć tabeli %`s.%`s (Kod błędu: %M)"
+        por "Não pode criar a tabela %`s.%`s (erro no. %M)"
+        rum "Nu pot sa creez tabla %`s.%`s (Eroare: %M)"
+        rus "Невозможно создать таблицу %`s.%`s (ошибка: %M)"
+        serbian "Ne mogu da kreiram tabelu %`s.%`s (errno: %M)"
+        slo "Nemôžem vytvoriť tabuľku %`s.%`s (chybový kód: %M)"
+        spa "No puedo crear la tabla %`s.%`s (Error: %M)"
+        swe "Kan inte skapa tabellen %`s.%`s (Felkod: %M)"
+        ukr "Не можу створити таблицю %`s.%`s (помилка: %M)"
+ER_CANT_CREATE_DB  
+        chi "无法创建数据库'%-.192s'(错误号码:%M)"
+        cze "Nemohu vytvořit databázi '%-.192s' (chybový kód: %M)"
+        dan "Kan ikke oprette databasen '%-.192s' (Fejlkode: %M)"
+        eng "Can't create database '%-.192s' (errno: %M)"
+        est "Ei suuda luua andmebaasi '%-.192s' (veakood: %M)"
+        fre "Ne peut créer la base '%-.192s' (Erreur %M)"
+        ger "Kann Datenbank '%-.192s' nicht erzeugen (Fehler: %M)"
+        geo "მონაცემთა ბაზის ('%-.192s') შექმნა შეუძლებელია (შეცდომა: %M)"
+        greek "Αδύνατη η δημιουργία της βάσης δεδομένων '%-.192s' (κωδικός λάθους: %M)"
+        hindi "डेटाबेस '%-.192s' नहीं बन सका (errno: %M)"
+        hun "Az '%-.192s' adatbazis nem hozhato letre (hibakod: %M)"
+        ita "Impossibile creare il database '%-.192s' (errno: %M)"
+        jpn "データベース '%-.192s' を作成できません。(エラー番号: %M)"
+        kor "데이타베이스 '%-.192s'를 만들지 못했습니다.. (에러번호: %M)"
+        nla "Kan database '%-.192s' niet aanmaken (Errcode: %M)"
+        nor "Kan ikke opprette databasen '%-.192s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje opprette databasen '%-.192s' (Feilkode: %M)"
+        pol "Nie można stworzyć bazy danych '%-.192s' (Kod błędu: %M)"
+        por "Não pode criar o banco de dados '%-.192s' (erro no. %M)"
+        rum "Nu pot sa creez baza de date '%-.192s' (Eroare: %M)"
+        rus "Невозможно создать базу данных '%-.192s' (ошибка: %M)"
+        serbian "Ne mogu da kreiram bazu '%-.192s' (errno: %M)"
+        slo "Nemôžem vytvoriť databázu '%-.192s' (chybový kód: %M)"
+        spa "No puedo crear la base de datos '%-.192s' (Error: %M)"
+        swe "Kan inte skapa databasen '%-.192s' (Felkod: %M)"
+        ukr "Не можу створити базу данних '%-.192s' (помилка: %M)"
+ER_DB_CREATE_EXISTS  
+        chi "无法创建数据库'%-.192s';已经存在"
+        cze "Nemohu vytvořit databázi '%-.192s'; databáze již existuje"
+        dan "Kan ikke oprette databasen '%-.192s'; databasen eksisterer"
+        eng "Can't create database '%-.192s'; database exists"
+        est "Ei suuda luua andmebaasi '%-.192s': andmebaas juba eksisteerib"
+        fre "Ne peut créer la base '%-.192s'; elle existe déjà"
+        ger "Kann Datenbank '%-.192s' nicht erzeugen. Datenbank existiert bereits"
+        geo "მონაცემთა ბაზის ('%-.192s') შექმნა შეუძლებელია. მონაცემთა ბაზა უკვე არსებობს"
+        greek "Αδύνατη η δημιουργία της βάσης δεδομένων '%-.192s'; Η βάση δεδομένων υπάρχει ήδη"
+        hindi "डेटाबेस '%-.192s' नहीं बन सकता है; यह डेटाबेस पहले से ही मौजूद है"
+        hun "Az '%-.192s' adatbazis nem hozhato letre Az adatbazis mar letezik"
+        ita "Impossibile creare il database '%-.192s'; il database esiste"
+        jpn "データベース '%-.192s' を作成できません。データベースはすでに存在します。"
+        kor "데이타베이스 '%-.192s'를 만들지 못했습니다.. 데이타베이스가 존재함"
+        nla "Kan database '%-.192s' niet aanmaken; database bestaat reeds"
+        nor "Kan ikke opprette databasen '%-.192s'; databasen eksisterer"
+        norwegian-ny "Kan ikkje opprette databasen '%-.192s'; databasen eksisterer"
+        pol "Nie można stworzyć bazy danych '%-.192s'; baza danych już istnieje"
+        por "Não pode criar o banco de dados '%-.192s'; este banco de dados já existe"
+        rum "Nu pot sa creez baza de date '%-.192s'; baza de date exista deja"
+        rus "Невозможно создать базу данных '%-.192s'. База данных уже существует"
+        serbian "Ne mogu da kreiram bazu '%-.192s'; baza već postoji"
+        slo "Nemôžem vytvoriť databázu '%-.192s'; databáza existuje"
+        spa "No puedo crear la base de datos '%-.192s'; la base de datos ya existe"
+        swe "Databasen '%-.192s' existerar redan"
+        ukr "Не можу створити базу данних '%-.192s'. База данних існує"
+ER_DB_DROP_EXISTS  
+        chi "无法删除数据库'%-.192s';数据库不存在"
+        cze "Nemohu zrušit databázi '%-.192s', databáze neexistuje"
+        dan "Kan ikke slette (droppe) '%-.192s'; databasen eksisterer ikke"
+        eng "Can't drop database '%-.192s'; database doesn't exist"
+        est "Ei suuda kustutada andmebaasi '%-.192s': andmebaasi ei eksisteeri"
+        fre "Ne peut effacer la base '%-.192s'; elle n'existe pas"
+        ger "Kann Datenbank '%-.192s' nicht löschen; Datenbank nicht vorhanden"
+        geo "მონაცემთა ბაზის ('%-.192s') წაშლა შეუძლებელია. ასეთი მონაცემთა ბაზა არ არსებობს"
+        greek "Αδύνατη η διαγραφή της βάσης δεδομένων '%-.192s'. Η βάση δεδομένων δεν υπάρχει"
+        hindi "डेटाबेस '%-.192s' ड्रॉप नहीं कर सकते हैं; यह डेटाबेस मौजूद नहीं है"
+        hun "A(z) '%-.192s' adatbazis nem szuntetheto meg. Az adatbazis nem letezik"
+        ita "Impossibile cancellare '%-.192s'; il database non esiste"
+        jpn "データベース '%-.192s' を削除できません。データベースは存在しません。"
+        kor "데이타베이스 '%-.192s'를 제거하지 못했습니다. 데이타베이스가 존재하지 않음 "
+        nla "Kan database '%-.192s' niet verwijderen; database bestaat niet"
+        nor "Kan ikke fjerne (drop) '%-.192s'; databasen eksisterer ikke"
+        norwegian-ny "Kan ikkje fjerne (drop) '%-.192s'; databasen eksisterer ikkje"
+        pol "Nie można usun?ć bazy danych '%-.192s'; baza danych nie istnieje"
+        por "Não pode eliminar o banco de dados '%-.192s'; este banco de dados não existe"
+        rum "Nu pot sa drop baza de date '%-.192s'; baza da date este inexistenta"
+        rus "Невозможно удалить базу данных '%-.192s'. Такой базы данных нет"
+        serbian "Ne mogu da izbrišem bazu '%-.192s'; baza ne postoji"
+        slo "Nemôžem zmazať databázu '%-.192s'; databáza neexistuje"
+        spa "No puedo eliminar la base de datos '%-.192s'; la base de datos no existe"
+        swe "Kan inte radera databasen '%-.192s'; databasen finns inte"
+        ukr "Не можу видалити базу данних '%-.192s'. База данних не існує"
+ER_DB_DROP_DELETE  
+        chi "删除数据库错误(无法删除'%-.192s',错误号码:%M)"
+        cze "Chyba při rušení databáze (nemohu vymazat '%-.192s', chyba %M)"
+        dan "Fejl ved sletning (drop) af databasen (kan ikke slette '%-.192s', Fejlkode %M)"
+        eng "Error dropping database (can't delete '%-.192s', errno: %M)"
+        est "Viga andmebaasi kustutamisel (ei suuda kustutada faili '%-.192s', veakood: %M)"
+        fre "Ne peut effacer la base '%-.192s' (erreur %M)"
+        ger "Fehler beim Löschen der Datenbank ('%-.192s' kann nicht gelöscht werden, Fehler: %M)"
+        geo "შეცდომა მონაცემთა ბაზის წაშლისას ('%-.192s'-ის წაშლა შეუძლებელია. შეცდ. კოდი: %M)"
+        greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή της βάσης δεδομένων (αδύνατη η διαγραφή '%-.192s', κωδικός λάθους: %M)"
+        hindi "डेटाबेस ड्रॉप में त्रुटि हुई ('%-.192s' हटा नहीं सकते, errno: %M)"
+        hun "Adatbazis megszuntetesi hiba ('%-.192s' nem torolheto, hibakod: %M)"
+        ita "Errore durante la cancellazione del database (impossibile cancellare '%-.192s', errno: %M)"
+        jpn "データベース削除エラー ('%-.192s' を削除できません。エラー番号: %M)"
+        kor "데이타베이스 제거 에러('%-.192s'를 삭제할 수 없습니다, 에러번호: %M)"
+        nla "Fout bij verwijderen database (kan '%-.192s' niet verwijderen, Errcode: %M)"
+        nor "Feil ved fjerning (drop) av databasen (kan ikke slette '%-.192s', feil %M)"
+        norwegian-ny "Feil ved fjerning (drop) av databasen (kan ikkje slette '%-.192s', feil %M)"
+        pol "Bł?d podczas usuwania bazy danych (nie można usun?ć '%-.192s', bł?d %M)"
+        por "Erro ao eliminar banco de dados (não pode eliminar '%-.192s' - erro no. %M)"
+        rum "Eroare dropuind baza de date (nu pot sa sterg '%-.192s', Eroare: %M)"
+        rus "Ошибка при удалении базы данных (невозможно удалить '%-.192s', ошибка: %M)"
+        serbian "Ne mogu da izbrišem bazu (ne mogu da izbrišem '%-.192s', errno: %M)"
+        slo "Chyba pri mazaní databázy (nemôžem zmazať '%-.192s', chybový kód: %M)"
+        spa "Error eliminando la base de datos (no puedo borrar '%-.192s', error %M)"
+        swe "Fel vid radering av databasen (Kan inte radera '%-.192s'. Felkod: %M)"
+        ukr "Не можу видалити базу данних (Не можу видалити '%-.192s', помилка: %M)"
+ER_DB_DROP_RMDIR  
+        chi "删除数据库错误(无法rmdir '%-.192s',错误号码:%M)"
+        cze "Chyba při rušení databáze (nemohu vymazat adresář '%-.192s', chyba %M)"
+        dan "Fejl ved sletting af database (kan ikke slette folderen '%-.192s', Fejlkode %M)"
+        eng "Error dropping database (can't rmdir '%-.192s', errno: %M)"
+        est "Viga andmebaasi kustutamisel (ei suuda kustutada kataloogi '%-.192s', veakood: %M)"
+        fre "Erreur en effaçant la base (rmdir '%-.192s', erreur %M)"
+        ger "Fehler beim Löschen der Datenbank (Verzeichnis '%-.192s' kann nicht gelöscht werden, Fehler: %M)"
+        geo "მონაცემთა ბაზის წაშლა შეუძლებელია. (ვერ წავშალე საქაღალდე '%-.192s'. შეცდ. კოდი: %M) "
+        greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή της βάσης δεδομένων (αδύνατη η διαγραφή του φακέλλου '%-.192s', κωδικός λάθους: %M)"
+        hindi "डेटाबेस ड्रॉप में त्रुटि हुई ('%-.192s' rmdir नहीं कर सकते, errno: %M)"
+        hun "Adatbazis megszuntetesi hiba ('%-.192s' nem szuntetheto meg, hibakod: %M)"
+        ita "Errore durante la cancellazione del database (impossibile rmdir '%-.192s', errno: %M)"
+        jpn "データベース削除エラー (ディレクトリ '%-.192s' を削除できません。エラー番号: %M)"
+        kor "데이타베이스 제거 에러(rmdir '%-.192s'를 할 수 없습니다, 에러번호: %M)"
+        nla "Fout bij verwijderen database (kan rmdir '%-.192s' niet uitvoeren, Errcode: %M)"
+        nor "Feil ved sletting av database (kan ikke slette katalogen '%-.192s', feil %M)"
+        norwegian-ny "Feil ved sletting av database (kan ikkje slette katalogen '%-.192s', feil %M)"
+        pol "Bł?d podczas usuwania bazy danych (nie można wykonać rmdir '%-.192s', bł?d %M)"
+        por "Erro ao eliminar banco de dados (não pode remover diretório '%-.192s' - erro no. %M)"
+        rum "Eroare dropuind baza de date (nu pot sa rmdir '%-.192s', Eroare: %M)"
+        rus "Невозможно удалить базу данных (невозможно удалить каталог '%-.192s', ошибка: %M)"
+        serbian "Ne mogu da izbrišem bazu (ne mogu da izbrišem direktorijum '%-.192s', errno: %M)"
+        slo "Chyba pri mazaní databázy (nemôžem vymazať adresár '%-.192s', chybový kód: %M)"
+        spa "Error eliminando la base de datos (No puedo borrar el directorio '%-.192s', error %M)"
+        swe "Fel vid radering av databasen (Kan inte radera biblioteket '%-.192s'. Felkod: %M)"
+        ukr "Не можу видалити базу данних (Не можу видалити теку '%-.192s', помилка: %M)"
+ER_CANT_DELETE_FILE  
+        chi "删除'%-.192s'出错(错误号码:%M)"
+        cze "Chyba při výmazu '%-.192s' (chybový kód: %M)"
+        dan "Fejl ved sletning af '%-.192s' (Fejlkode: %M)"
+        eng "Error on delete of '%-.192s' (errno: %M)"
+        est "Viga '%-.192s' kustutamisel (veakood: %M)"
+        fre "Erreur en effaçant '%-.192s' (Errcode: %M)"
+        ger "Fehler beim Löschen von '%-.192s' (Fehler: %M)"
+        geo "'%-.192s'-ის წაშლა შეუძლებელია (შეცდ. კოდი: %M)"
+        greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή '%-.192s' (κωδικός λάθους: %M)"
+        hindi "'%-.192s' के हटाने पर त्रुटि हुई (errno: %M)"
+        hun "Torlesi hiba: '%-.192s' (hibakod: %M)"
+        ita "Errore durante la cancellazione di '%-.192s' (errno: %M)"
+        jpn "ファイル '%-.192s' の削除エラー (エラー番号: %M)"
+        kor "'%-.192s' 삭제 중 에러 (에러번호: %M)"
+        nla "Fout bij het verwijderen van '%-.192s' (Errcode: %M)"
+        nor "Feil ved sletting av '%-.192s' (Feilkode: %M)"
+        norwegian-ny "Feil ved sletting av '%-.192s' (Feilkode: %M)"
+        pol "Bł?d podczas usuwania '%-.192s' (Kod błędu: %M)"
+        por "Erro na remoção de '%-.192s' (erro no. %M)"
+        rum "Eroare incercind sa delete '%-.192s' (Eroare: %M)"
+        rus "Ошибка при удалении '%-.192s' (ошибка: %M)"
+        serbian "Greška pri brisanju '%-.192s' (errno: %M)"
+        slo "Chyba pri mazaní '%-.192s' (chybový kód: %M)"
+        spa "Error en el borrado de '%-.192s' (Error: %M)"
+        swe "Kan inte radera filen '%-.192s' (Felkod: %M)"
+        ukr "Не можу видалити '%-.192s' (помилка: %M)"
+ER_CANT_FIND_SYSTEM_REC  
+        chi "无法在系统表中读取记录"
+        cze "Nemohu číst záznam v systémové tabulce"
+        dan "Kan ikke læse posten i systemfolderen"
+        eng "Can't read record in system table"
+        est "Ei suuda lugeda kirjet süsteemsest tabelist"
+        fre "Ne peut lire un enregistrement de la table 'system'"
+        ger "Datensatz in der Systemtabelle nicht lesbar"
+        geo "სისტემური ცხრილიდან ჩანაწერის წაკითხვა შეუძლებელია"
+        greek "Αδύνατη η ανάγνωση εγγραφής από πίνακα του συστήματος"
+        hindi "सिस्टम टेबल से रिकॉर्ड नहीं पढ़ सके"
+        hun "Nem olvashato rekord a rendszertablaban"
+        ita "Impossibile leggere il record dalla tabella di sistema"
+        jpn "システム表のレコードを読み込めません。"
+        kor "system 테이블에서 레코드를 읽을 수 없습니다."
+        nla "Kan record niet lezen in de systeem tabel"
+        nor "Kan ikke lese posten i systemkatalogen"
+        norwegian-ny "Kan ikkje lese posten i systemkatalogen"
+        pol "Nie można odczytać rekordu z tabeli systemowej"
+        por "Não pode ler um registro numa tabela do sistema"
+        rum "Nu pot sa citesc cimpurile in tabla de system (system table)"
+        rus "Невозможно прочитать запись в системной таблице"
+        serbian "Ne mogu da pročitam slog iz sistemske tabele"
+        slo "Nemôžem čítať záznam v systémovej tabuľke"
+        spa "No puedo leer el registro en la tabla del sistema"
+        swe "Hittar inte posten i systemregistret"
+        ukr "Не можу зчитати запис з системної таблиці"
+ER_CANT_GET_STAT  
+        chi "无法获得'%-.200s'的状态(错误号码:%M)"
+        cze "Nemohu získat stav '%-.200s' (chybový kód: %M)"
+        dan "Kan ikke læse status af '%-.200s' (Fejlkode: %M)"
+        eng "Can't get status of '%-.200s' (errno: %M)"
+        est "Ei suuda lugeda '%-.200s' olekut (veakood: %M)"
+        fre "Ne peut obtenir le status de '%-.200s' (Errcode: %M)"
+        ger "Kann Status von '%-.200s' nicht ermitteln (Fehler: %M)"
+        geo "'%-.200s'-ის მდგომარეობის მიღება შეუძლებელია (შეცდ.კოდი: %M)"
+        greek "Αδύνατη η λήψη πληροφοριών για την κατάσταση του '%-.200s' (κωδικός λάθους: %M)"
+        hindi "'%-.200s' की अवस्था प्राप्त नहीं कर सके (errno: %M)"
+        hun "A(z) '%-.200s' statusza nem allapithato meg (hibakod: %M)"
+        ita "Impossibile leggere lo stato di '%-.200s' (errno: %M)"
+        jpn "'%-.200s' の状態を取得できません。(エラー番号: %M)"
+        kor "'%-.200s'의 상태를 얻지 못했습니다. (에러번호: %M)"
+        nla "Kan de status niet krijgen van '%-.200s' (Errcode: %M)"
+        nor "Kan ikke lese statusen til '%-.200s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje lese statusen til '%-.200s' (Feilkode: %M)"
+        pol "Nie można otrzymać statusu '%-.200s' (Kod błędu: %M)"
+        por "Não pode obter o status de '%-.200s' (erro no. %M)"
+        rum "Nu pot sa obtin statusul lui '%-.200s' (Eroare: %M)"
+        rus "Невозможно получить статусную информацию о '%-.200s' (ошибка: %M)"
+        serbian "Ne mogu da dobijem stanje file-a '%-.200s' (errno: %M)"
+        slo "Nemôžem zistiť stav '%-.200s' (chybový kód: %M)"
+        spa "No puedo obtener el estado de '%-.200s' (Error: %M)"
+        swe "Kan inte läsa filinformationen (stat) från '%-.200s' (Felkod: %M)"
+        ukr "Не можу отримати статус '%-.200s' (помилка: %M)"
+ER_CANT_GET_WD  
+        chi "无法获取工作目录(错误号码:%M)"
+        cze "Chyba při zjišťování pracovní adresář (chybový kód: %M)"
+        dan "Kan ikke læse aktive folder (Fejlkode: %M)"
+        eng "Can't get working directory (errno: %M)"
+        est "Ei suuda identifitseerida jooksvat kataloogi (veakood: %M)"
+        fre "Ne peut obtenir le répertoire de travail (Errcode: %M)"
+        ger "Kann Arbeitsverzeichnis nicht ermitteln (Fehler: %M)"
+        geo "სამუშაო საქაღალდის მიღების შეცდომა (შეცდ. კოდი: %M)"
+        greek "Ο φάκελλος εργασίας δεν βρέθηκε (κωδικός λάθους: %M)"
+        hindi "Working डाइरेक्टरी प्राप्त नहीं कर सके (errno: %M)"
+        hun "A munkakonyvtar nem allapithato meg (hibakod: %M)"
+        ita "Impossibile leggere la directory di lavoro (errno: %M)"
+        jpn "作業ディレクトリを取得できません。(エラー番号: %M)"
+        kor "수행 디렉토리를 찾지 못했습니다. (에러번호: %M)"
+        nla "Kan de werkdirectory niet krijgen (Errcode: %M)"
+        nor "Kan ikke lese aktiv katalog(Feilkode: %M)"
+        norwegian-ny "Kan ikkje lese aktiv katalog(Feilkode: %M)"
+        pol "Nie można rozpoznać aktualnego katalogu (Kod błędu: %M)"
+        por "Não pode obter o diretório corrente (erro no. %M)"
+        rum "Nu pot sa obtin directorul current (working directory) (Eroare: %M)"
+        rus "Невозможно определить рабочий каталог (ошибка: %M)"
+        serbian "Ne mogu da dobijem trenutni direktorijum (errno: %M)"
+        slo "Nemôžem zistiť pracovný adresár (chybový kód: %M)"
+        spa "No puedo obtener directorio de trabajo (Error: %M)"
+        swe "Kan inte inte läsa aktivt bibliotek. (Felkod: %M)"
+        ukr "Не можу визначити робочу теку (помилка: %M)"
+ER_CANT_LOCK  
+        chi "无法锁定文件(错误号码:%M)"
+        cze "Nemohu uzamknout soubor (chybový kód: %M)"
+        dan "Kan ikke låse fil (Fejlkode: %M)"
+        eng "Can't lock file (errno: %M)"
+        est "Ei suuda lukustada faili (veakood: %M)"
+        fre "Ne peut verrouiller le fichier (Errcode: %M)"
+        ger "Datei kann nicht gesperrt werden (Fehler: %M)"
+        geo "ფაილის დაბლოკვა შეუძლებელია (შეცდ. კოდი: %M)"
+        greek "Το αρχείο δεν μπορεί να κλειδωθεί (κωδικός λάθους: %M)"
+        hindi "फ़ाइल लॉक नहीं कर सके (errno: %M)"
+        hun "A file nem zarolhato. (hibakod: %M)"
+        ita "Impossibile il locking il file (errno: %M)"
+        jpn "ファイルをロックできません。(エラー番号: %M)"
+        kor "화일을 잠그지(lock) 못했습니다. (에러번호: %M)"
+        nla "Kan de file niet blokeren (Errcode: %M)"
+        nor "Kan ikke låse fila (Feilkode: %M)"
+        norwegian-ny "Kan ikkje låse fila (Feilkode: %M)"
+        pol "Nie można zablokować pliku (Kod błędu: %M)"
+        por "Não pode travar o arquivo (erro no. %M)"
+        rum "Nu pot sa lock fisierul (Eroare: %M)"
+        rus "Невозможно поставить блокировку на файле (ошибка: %M)"
+        serbian "Ne mogu da zaključam file (errno: %M)"
+        slo "Nemôžem zamknúť súbor (chybový kód: %M)"
+        spa "No puedo bloquear fichero/archivo: (Error: %M)"
+        swe "Kan inte låsa filen. (Felkod: %M)"
+        ukr "Не можу заблокувати файл (помилка: %M)"
+ER_CANT_OPEN_FILE  
+        chi "无法打开文件:'%-.200s'(错误号码:%M)"
+        cze "Nemohu otevřít soubor '%-.200s' (chybový kód: %M)"
+        dan "Kan ikke åbne fil: '%-.200s' (Fejlkode: %M)"
+        eng "Can't open file: '%-.200s' (errno: %M)"
+        est "Ei suuda avada faili '%-.200s' (veakood: %M)"
+        fre "Ne peut ouvrir le fichier: '%-.200s' (Errcode: %M)"
+        ger "Kann Datei '%-.200s' nicht öffnen (Fehler: %M)"
+        geo "ფაილი ვერ გავხსენი: '%-.200s' (შეცდ. კოდი: %M)"
+        greek "Δεν είναι δυνατό να ανοιχτεί το αρχείο: '%-.200s' (κωδικός λάθους: %M)"
+        hindi "फ़ाइल '%-.200s' नहीं खोल सकते (errno: %M)"
+        hun "A '%-.200s' file nem nyithato meg (hibakod: %M)"
+        ita "Impossibile aprire il file: '%-.200s' (errno: %M)"
+        jpn "ファイル '%-.200s' をオープンできません。(エラー番号: %M)"
+        kor "화일을 열지 못했습니다.: '%-.200s' (에러번호: %M)"
+        nla "Kan de file '%-.200s' niet openen (Errcode: %M)"
+        nor "Kan ikke åpne fila: '%-.200s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje åpne fila: '%-.200s' (Feilkode: %M)"
+        pol "Nie można otworzyć pliku: '%-.200s' (Kod błędu: %M)"
+        por "Não pode abrir o arquivo '%-.200s' (erro no. %M)"
+        rum "Nu pot sa deschid fisierul: '%-.200s' (Eroare: %M)"
+        rus "Невозможно открыть файл: '%-.200s' (ошибка: %M)"
+        serbian "Ne mogu da otvorim file: '%-.200s' (errno: %M)"
+        slo "Nemôžem otvoriť súbor: '%-.200s' (chybový kód: %M)"
+        spa "No puedo abrir el fichero/archivo: '%-.200s' (Error: %M)"
+        swe "Kan inte använda '%-.200s' (Felkod: %M)"
+        ukr "Не можу відкрити файл: '%-.200s' (помилка: %M)"
+ER_FILE_NOT_FOUND  
+        chi "找不到文件:'%-.200s'(错误号码:%M)"
+        cze "Nemohu najít soubor '%-.200s' (chybový kód: %M)"
+        dan "Kan ikke finde fila: '%-.200s' (Fejlkode: %M)"
+        eng "Can't find file: '%-.200s' (errno: %M)"
+        est "Ei suuda leida faili '%-.200s' (veakood: %M)"
+        fre "Ne peut trouver le fichier: '%-.200s' (Errcode: %M)"
+        ger "Kann Datei '%-.200s' nicht finden (Fehler: %M)"
+        geo "ფაილი ვერ ვიპოვე: '%-.200s' (შეცდ. კოდი: %M)"
+        greek "Δεν βρέθηκε το αρχείο: '%-.200s' (κωδικός λάθους: %M)"
+        hindi "फ़ाइल '%-.200s' नहीं मिला (errno: %M)"
+        hun "A(z) '%-.200s' file nem talalhato (hibakod: %M)"
+        ita "Impossibile trovare il file: '%-.200s' (errno: %M)"
+        jpn "ファイル '%-.200s' が見つかりません。(エラー番号: %M)"
+        kor "화일을 찾지 못했습니다.: '%-.200s' (에러번호: %M)"
+        nla "Kan de file: '%-.200s' niet vinden (Errcode: %M)"
+        nor "Kan ikke finne fila: '%-.200s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje finne fila: '%-.200s' (Feilkode: %M)"
+        pol "Nie można znaleĽć pliku: '%-.200s' (Kod błędu: %M)"
+        por "Não pode encontrar o arquivo '%-.200s' (erro no. %M)"
+        rum "Nu pot sa gasesc fisierul: '%-.200s' (Eroare: %M)"
+        rus "Невозможно найти файл: '%-.200s' (ошибка: %M)"
+        serbian "Ne mogu da pronađem file: '%-.200s' (errno: %M)"
+        slo "Nemôžem nájsť súbor: '%-.200s' (chybový kód: %M)"
+        spa "No puedo hallar el fichero/archivo: '%-.200s' (Error: %M)"
+        swe "Hittar inte filen '%-.200s' (Felkod: %M)"
+        ukr "Не можу знайти файл: '%-.200s' (помилка: %M)"
+ER_CANT_READ_DIR  
+        chi "无法读取'%-.192s'的文件夹(错误号码:%M)"
+        cze "Nemohu číst adresář '%-.192s' (chybový kód: %M)"
+        dan "Kan ikke læse folder '%-.192s' (Fejlkode: %M)"
+        eng "Can't read dir of '%-.192s' (errno: %M)"
+        est "Ei suuda lugeda kataloogi '%-.192s' (veakood: %M)"
+        fre "Ne peut lire le répertoire de '%-.192s' (Errcode: %M)"
+        ger "Verzeichnis von '%-.192s' nicht lesbar (Fehler: %M)"
+        geo "საქაღალდის ('%-.192s') წაკითხვა შეუძლებელია. (შეცდ. კოდი: %M)"
+        greek "Δεν είναι δυνατό να διαβαστεί ο φάκελλος του '%-.192s' (κωδικός λάθους: %M)"
+        hindi "'%-.192s' की डायरेक्टरी नहीं पढ़ सके (errno: %M)"
+        hun "A(z) '%-.192s' konyvtar nem olvashato. (hibakod: %M)"
+        ita "Impossibile leggere la directory di '%-.192s' (errno: %M)"
+        jpn "ディレクトリ '%-.192s' を読み込めません。(エラー番号: %M)"
+        kor "'%-.192s'디렉토리를 읽지 못했습니다. (에러번호: %M)"
+        nla "Kan de directory niet lezen van '%-.192s' (Errcode: %M)"
+        nor "Kan ikke lese katalogen '%-.192s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje lese katalogen '%-.192s' (Feilkode: %M)"
+        pol "Nie można odczytać katalogu '%-.192s' (Kod błędu: %M)"
+        por "Não pode ler o diretório de '%-.192s' (erro no. %M)"
+        rum "Nu pot sa citesc directorul '%-.192s' (Eroare: %M)"
+        rus "Невозможно прочитать каталог '%-.192s' (ошибка: %M)"
+        serbian "Ne mogu da pročitam direktorijum '%-.192s' (errno: %M)"
+        slo "Nemôžem čítať adresár '%-.192s' (chybový kód: %M)"
+        spa "No puedo leer el directorio de '%-.192s' (Error: %M)"
+        swe "Kan inte läsa från bibliotek '%-.192s' (Felkod: %M)"
+        ukr "Не можу прочитати теку '%-.192s' (помилка: %M)"
+ER_CANT_SET_WD  
+        chi "无法将dir更改为'%-.192s'(错误号码:%M)"
+        cze "Nemohu změnit adresář na '%-.192s' (chybový kód: %M)"
+        dan "Kan ikke skifte folder til '%-.192s' (Fejlkode: %M)"
+        eng "Can't change dir to '%-.192s' (errno: %M)"
+        est "Ei suuda siseneda kataloogi '%-.192s' (veakood: %M)"
+        fre "Ne peut changer le répertoire pour '%-.192s' (Errcode: %M)"
+        ger "Kann nicht in das Verzeichnis '%-.192s' wechseln (Fehler: %M)"
+        geo "საქაღალდის '%-.192s'-ზე შეცვლა შეუძლებელია (შეცდ. კოდი: %M)"
+        greek "Αδύνατη η αλλαγή του τρέχοντος καταλόγου σε '%-.192s' (κωδικός λάθους: %M)"
+        hindi "'%-.192s' डायरेक्टरी में नहीं बदल सके (errno: %M)"
+        hun "Konyvtarvaltas nem lehetseges a(z) '%-.192s'-ba. (hibakod: %M)"
+        ita "Impossibile cambiare la directory in '%-.192s' (errno: %M)"
+        jpn "ディレクトリ '%-.192s' に移動できません。(エラー番号: %M)"
+        kor "'%-.192s'디렉토리로 이동할 수 없었습니다. (에러번호: %M)"
+        nla "Kan de directory niet veranderen naar '%-.192s' (Errcode: %M)"
+        nor "Kan ikke skifte katalog til '%-.192s' (Feilkode: %M)"
+        norwegian-ny "Kan ikkje skifte katalog til '%-.192s' (Feilkode: %M)"
+        pol "Nie można zmienić katalogu na '%-.192s' (Kod błędu: %M)"
+        por "Não pode mudar para o diretório '%-.192s' (erro no. %M)"
+        rum "Nu pot sa schimb directorul '%-.192s' (Eroare: %M)"
+        rus "Невозможно перейти в каталог '%-.192s' (ошибка: %M)"
+        serbian "Ne mogu da promenim direktorijum na '%-.192s' (errno: %M)"
+        slo "Nemôžem vojsť do adresára '%-.192s' (chybový kód: %M)"
+        spa "No puedo cambiar al directorio a '%-.192s' (Error: %M)"
+        swe "Kan inte byta till '%-.192s' (Felkod: %M)"
+        ukr "Не можу перейти у теку '%-.192s' (помилка: %M)"
+ER_CHECKREAD  
+        chi "这个表自上次读后数据有变化'%-.192s'"
+        cze "Záznam byl změněn od posledního čtení v tabulce '%-.192s'"
+        dan "Posten er ændret siden sidste læsning '%-.192s'"
+        eng "Record has changed since last read in table '%-.192s'"
+        est "Kirje tabelis '%-.192s' on muutunud viimasest lugemisest saadik"
+        fre "Enregistrement modifié depuis sa dernière lecture dans la table '%-.192s'"
+        ger "Datensatz hat sich seit dem letzten Zugriff auf Tabelle '%-.192s' geändert"
+        geo "ცხრილში '%-.192s' ჩანაწერი შეიცვალა ბოლო წაკითხვის შემდეგ"
+        greek "Η εγγραφή έχει αλλάξει από την τελευταία φορά που ανασύρθηκε από τον πίνακα '%-.192s'"
+        hindi "रिकॉर्ड टेबल '%-.192s' पिछली बार पढ़े जाने के बाद से बदल गया है"
+        hun "A(z) '%-.192s' tablaban talalhato rekord megvaltozott az utolso olvasas ota"
+        ita "Il record e` cambiato dall'ultima lettura della tabella '%-.192s'"
+        jpn "表 '%-.192s' の最後の読み込み時点から、レコードが変化しました。"
+        kor "테이블 '%-.192s'에서 마지막으로 읽은 후 Record가 변경되었습니다."
+        nla "Record is veranderd sinds de laatste lees activiteit in de tabel '%-.192s'"
+        nor "Posten har blitt endret siden den ble lest '%-.192s'"
+        norwegian-ny "Posten har vorte endra sidan den sist vart lesen '%-.192s'"
+        pol "Rekord został zmieniony od ostaniego odczytania z tabeli '%-.192s'"
+        por "Registro alterado desde a última leitura da tabela '%-.192s'"
+        rum "Cimpul a fost schimbat de la ultima citire a tabelei '%-.192s'"
+        rus "Запись изменилась с момента последней выборки в таблице '%-.192s'"
+        serbian "Slog je promenjen od zadnjeg čitanja tabele '%-.192s'"
+        slo "Záznam bol zmenený od posledného čítania v tabuľke '%-.192s'"
+        spa "El registro ha cambiado desde la ultima lectura de la tabla '%-.192s'"
+        swe "Posten har förändrats sedan den lästes i register '%-.192s'"
+        ukr "Запис було змінено з часу останнього читання з таблиці '%-.192s'"
+ER_DISK_FULL
+        chi "磁盘已满(%s);等待释放一些空间...(错误号码:%M)"
+        cze "Disk je plný (%s), čekám na uvolnění nějakého místa ... (chybový kód: %M)"
+        dan "Ikke mere diskplads (%s). Venter på at få frigjort plads... (Fejlkode: %M)"
+        eng "Disk full (%s); waiting for someone to free some space... (errno: %M)"
+        est "Ketas täis (%s). Ootame kuni tekib vaba ruumi... (veakood: %M)"
+        fre "Disque plein (%s). J'attend que quelqu'un libère de l'espace... (Errcode: %M)"
+        ger "Festplatte voll (%s). Warte, bis jemand Platz schafft ... (Fehler: %M)"
+        geo "დისკი სავსეა (%s). ველოდები, სანამ ვინმე ადგილის გაათავისუფლებს... (შეცდ. კოდი: %M)"
+        greek "Δεν υπάρχει χώρος στο δίσκο (%s). Παρακαλώ, περιμένετε να ελευθερωθεί χώρος... (κωδικός λάθους: %M)"
+        hindi "डिस्क पूरी तरह से भरा हुआ है (%s); कुछ स्थान खाली करें (errno: %M)"
+        hun "A lemez megtelt (%s). (hibakod: %M)"
+        ita "Disco pieno (%s). In attesa che qualcuno liberi un po' di spazio... (errno: %M)"
+        jpn "ディスク領域不足です(%s)。(エラー番号: %M)"
+        kor "Disk full (%s). 다른 사람이 지울때까지 기다립니다... (에러번호: %M)"
+        nla "Schijf vol (%s). Aan het wachten totdat er ruimte vrij wordt gemaakt... (Errcode: %M)"
+        nor "Ikke mer diskplass (%s). Venter på å få frigjort plass... (Feilkode: %M)"
+        norwegian-ny "Ikkje meir diskplass (%s). Ventar på å få frigjort plass... (Feilkode: %M)"
+        pol "Dysk pełny (%s). Oczekiwanie na zwolnienie miejsca... (Kod błędu: %M)"
+        por "Disco cheio (%s). Aguardando alguém liberar algum espaço... (erro no. %M)"
+        rum "Hard-disk-ul este plin (%s). Astept sa se elibereze ceva spatiu... (Eroare: %M)"
+        rus "Диск заполнен. (%s). Ожидаем, пока кто-то не уберет после себя мусор... (ошибка: %M)"
+        serbian "Disk je pun (%s). Čekam nekoga da dođe i oslobodi nešto mesta... (errno: %M)"
+        slo "Disk je plný (%s), čakám na uvoľnenie miesta... (chybový kód: %M)"
+        spa "Disco lleno (%s). Esperando a que alguien libere algo de espacio... (Error: %M)"
+        swe "Disken är full (%s). Väntar tills det finns ledigt utrymme... (Felkod: %M)"
+        ukr "Диск заповнений (%s). Вичикую, доки звільниться трохи місця... (помилка: %M)"
+ER_DUP_KEY 23000 
+        chi "不能写;表'%-.192s'中有重复索引"
+        cze "Nemohu zapsat, zdvojený klíč v tabulce '%-.192s'"
+        dan "Kan ikke skrive, flere ens nøgler i tabellen '%-.192s'"
+        eng "Can't write; duplicate key in table '%-.192s'"
+        est "Ei saa kirjutada, korduv võti tabelis '%-.192s'"
+        fre "Ecriture impossible, doublon dans une clé de la table '%-.192s'"
+        ger "Kann nicht speichern, Grund: doppelter Schlüssel in Tabelle '%-.192s'"
+        geo "ჩაწერის შეცდომა. დუბლირებული გასაღები ცხრილში '%-.192s'"
+        greek "Δεν είναι δυνατή η καταχώρηση, η τιμή υπάρχει ήδη στον πίνακα '%-.192s'"
+        hindi "टेबल '%-.192s' में DUPLICATE KEY मौजूद होने के कारण नहीं लिख सके"
+        hun "Irasi hiba, duplikalt kulcs a '%-.192s' tablaban"
+        ita "Scrittura impossibile: chiave duplicata nella tabella '%-.192s'"
+        jpn "書き込めません。表 '%-.192s' に重複するキーがあります。"
+        kor "기록할 수 없습니다., 테이블 '%-.192s'에서 중복 키"
+        nla "Kan niet schrijven, dubbele zoeksleutel in tabel '%-.192s'"
+        nor "Kan ikke skrive, flere like nøkler i tabellen '%-.192s'"
+        norwegian-ny "Kan ikkje skrive, flere like nyklar i tabellen '%-.192s'"
+        pol "Nie można zapisać, powtórzone klucze w tabeli '%-.192s'"
+        por "Não pode gravar. Chave duplicada na tabela '%-.192s'"
+        rum "Nu pot sa scriu (can't write), cheie duplicata in tabela '%-.192s'"
+        rus "Невозможно произвести запись, дублирующийся ключ в таблице '%-.192s'"
+        serbian "Ne mogu da pišem pošto postoji duplirani ključ u tabeli '%-.192s'"
+        slo "Nemôžem zapísať, duplikát kľúča v tabuľke '%-.192s'"
+        spa "No puedo escribir, clave duplicada en la tabla '%-.192s'"
+        swe "Kan inte skriva, dubbel söknyckel i register '%-.192s'"
+        ukr "Не можу записати, дублюючийся ключ в таблиці '%-.192s'"
+ER_ERROR_ON_CLOSE  
+        chi "'%-.192s'关闭时出错(错误号码:%M)"
+        cze "Chyba při zavírání '%-.192s' (chybový kód: %M)"
+        dan "Fejl ved lukning af '%-.192s' (Fejlkode: %M)"
+        eng "Error on close of '%-.192s' (errno: %M)"
+        est "Viga faili '%-.192s' sulgemisel (veakood: %M)"
+        fre "Erreur a la fermeture de '%-.192s' (Errcode: %M)"
+        ger "Fehler beim Schließen von '%-.192s' (Fehler: %M)"
+        geo "შეცდომა '%-.192s'-ის დახურვიას (შეცდ. კოდი: %M)"
+        greek "Παρουσιάστηκε πρόβλημα κλείνοντας το '%-.192s' (κωδικός λάθους: %M)"
+        hindi "'%-.192s' के बंद पर त्रुटि हुई (errno: %M)"
+        hun "Hiba a(z) '%-.192s' zarasakor. (hibakod: %M)"
+        ita "Errore durante la chiusura di '%-.192s' (errno: %M)"
+        jpn "'%-.192s' のクローズ時エラー (エラー番号: %M)"
+        kor "'%-.192s'닫는 중 에러 (에러번호: %M)"
+        nla "Fout bij het sluiten van '%-.192s' (Errcode: %M)"
+        nor "Feil ved lukking av '%-.192s' (Feilkode: %M)"
+        norwegian-ny "Feil ved lukking av '%-.192s' (Feilkode: %M)"
+        pol "Bł?d podczas zamykania '%-.192s' (Kod błędu: %M)"
+        por "Erro ao fechar '%-.192s' (erro no. %M)"
+        rum "Eroare inchizind '%-.192s' (errno: %M)"
+        rus "Ошибка при закрытии '%-.192s' (ошибка: %M)"
+        serbian "Greška pri zatvaranju '%-.192s' (errno: %M)"
+        slo "Chyba pri zatváraní '%-.192s' (chybový kód: %M)"
+        spa "Error en el cierre de '%-.192s' (Error: %M)"
+        swe "Fick fel vid stängning av '%-.192s' (Felkod: %M)"
+        ukr "Не можу закрити '%-.192s' (помилка: %M)"
+ER_ERROR_ON_READ  
+        chi "读取文件'%-.200s'错误(错误号码:%M)"
+        cze "Chyba při čtení souboru '%-.200s' (chybový kód: %M)"
+        dan "Fejl ved læsning af '%-.200s' (Fejlkode: %M)"
+        eng "Error reading file '%-.200s' (errno: %M)"
+        est "Viga faili '%-.200s' lugemisel (veakood: %M)"
+        fre "Erreur en lecture du fichier '%-.200s' (Errcode: %M)"
+        ger "Fehler beim Lesen der Datei '%-.200s' (Fehler: %M)"
+        geo "შეცდომა ფაილის ('%-.200s') წაკითხვისას (შეცდ. კოდი: %M)"
+        greek "Πρόβλημα κατά την ανάγνωση του αρχείου '%-.200s' (κωδικός λάθους: %M)"
+        hindi "फ़ाइल '%-.200s' पढ़ने में त्रुटि हुई (errno: %M)"
+        hun "Hiba a '%-.200s'file olvasasakor. (hibakod: %M)"
+        ita "Errore durante la lettura del file '%-.200s' (errno: %M)"
+        jpn "ファイル '%-.200s' の読み込みエラー (エラー番号: %M)"
+        kor "'%-.200s'화일 읽기 에러 (에러번호: %M)"
+        nla "Fout bij het lezen van file '%-.200s' (Errcode: %M)"
+        nor "Feil ved lesing av '%-.200s' (Feilkode: %M)"
+        norwegian-ny "Feil ved lesing av '%-.200s' (Feilkode: %M)"
+        pol "Bł?d podczas odczytu pliku '%-.200s' (Kod błędu: %M)"
+        por "Erro ao ler arquivo '%-.200s' (erro no. %M)"
+        rum "Eroare citind fisierul '%-.200s' (errno: %M)"
+        rus "Ошибка чтения файла '%-.200s' (ошибка: %M)"
+        serbian "Greška pri čitanju file-a '%-.200s' (errno: %M)"
+        slo "Chyba pri čítaní súboru '%-.200s' (chybový kód: %M)"
+        spa "Error leyendo el fichero/archivo '%-.200s' (Error: %M)"
+        swe "Fick fel vid läsning av '%-.200s' (Felkod %M)"
+        ukr "Не можу прочитати файл '%-.200s' (помилка: %M)"
+ER_ERROR_ON_RENAME  
+        chi "重命名'%-.210s'到'%-.210s'错误(错误号码:%M)"
+        cze "Chyba při přejmenování '%-.210s' na '%-.210s' (chybový kód: %M)"
+        dan "Fejl ved omdøbning af '%-.210s' til '%-.210s' (Fejlkode: %M)"
+        eng "Error on rename of '%-.210s' to '%-.210s' (errno: %M)"
+        est "Viga faili '%-.210s' ümbernimetamisel '%-.210s'-ks (veakood: %M)"
+        fre "Erreur en renommant '%-.210s' en '%-.210s' (Errcode: %M)"
+        ger "Fehler beim Umbenennen von '%-.210s' in '%-.210s' (Fehler: %M)"
+        geo "შეცდომა '%-.210s'-ის სახელის '%-.210s'-ზე გადარქმევისას (შეცდ. კოდი: %M)"
+        greek "Πρόβλημα κατά την μετονομασία του αρχείου '%-.210s' to '%-.210s' (κωδικός λάθους: %M)"
+        hindi "'%-.210s' का नाम '%-.210s' बदलने पर त्रुटि हुई (errno: %M)"
+        hun "Hiba a '%-.210s' file atnevezesekor '%-.210s'. (hibakod: %M)"
+        ita "Errore durante la rinominazione da '%-.210s' a '%-.210s' (errno: %M)"
+        jpn "'%-.210s' の名前を '%-.210s' に変更できません (エラー番号: %M)"
+        kor "'%-.210s'를 '%-.210s'로 이름 변경중 에러 (에러번호: %M)"
+        nla "Fout bij het hernoemen van '%-.210s' naar '%-.210s' (Errcode: %M)"
+        nor "Feil ved omdøping av '%-.210s' til '%-.210s' (Feilkode: %M)"
+        norwegian-ny "Feil ved omdøyping av '%-.210s' til '%-.210s' (Feilkode: %M)"
+        pol "Bł?d podczas zmieniania nazwy '%-.210s' na '%-.210s' (Kod błędu: %M)"
+        por "Erro ao renomear '%-.210s' para '%-.210s' (erro no. %M)"
+        rum "Eroare incercind sa renumesc '%-.210s' in '%-.210s' (errno: %M)"
+        rus "Ошибка при переименовании '%-.210s' в '%-.210s' (ошибка: %M)"
+        serbian "Greška pri promeni imena '%-.210s' na '%-.210s' (errno: %M)"
+        slo "Chyba pri premenovávaní '%-.210s' na '%-.210s' (chybový kód: %M)"
+        spa "Error en el renombrado de '%-.210s' a '%-.210s' (Error: %M)"
+        swe "Kan inte byta namn från '%-.210s' till '%-.210s' (Felkod: %M)"
+        ukr "Не можу перейменувати '%-.210s' у '%-.210s' (помилка: %M)"
+ER_ERROR_ON_WRITE  
+        chi "写文件'%-.200s'错误(错误号码:%M)"
+        cze "Chyba při zápisu do souboru '%-.200s' (chybový kód: %M)"
+        dan "Fejl ved skriving av filen '%-.200s' (Fejlkode: %M)"
+        eng "Error writing file '%-.200s' (errno: %M)"
+        est "Viga faili '%-.200s' kirjutamisel (veakood: %M)"
+        fre "Erreur d'écriture du fichier '%-.200s' (Errcode: %M)"
+        ger "Fehler beim Speichern der Datei '%-.200s' (Fehler: %M)"
+        geo "ფაილში ('%-.200s') ჩაწერის შეცდომა. (შეცდ. კოდი: %M)"
+        greek "Πρόβλημα κατά την αποθήκευση του αρχείου '%-.200s' (κωδικός λάθους: %M)"
+        hindi "फ़ाइल '%-.200s' लिखने में त्रुटि हुई (errno: %M)"
+        hun "Hiba a '%-.200s' file irasakor. (hibakod: %M)"
+        ita "Errore durante la scrittura del file '%-.200s' (errno: %M)"
+        jpn "ファイル '%-.200s' の書き込みエラー (エラー番号: %M)"
+        kor "'%-.200s'화일 기록 중 에러 (에러번호: %M)"
+        nla "Fout bij het wegschrijven van file '%-.200s' (Errcode: %M)"
+        nor "Feil ved skriving av fila '%-.200s' (Feilkode: %M)"
+        norwegian-ny "Feil ved skriving av fila '%-.200s' (Feilkode: %M)"
+        pol "Bł?d podczas zapisywania pliku '%-.200s' (Kod błędu: %M)"
+        por "Erro ao gravar arquivo '%-.200s' (erro no. %M)"
+        rum "Eroare scriind fisierul '%-.200s' (errno: %M)"
+        rus "Ошибка записи в файл '%-.200s' (ошибка: %M)"
+        serbian "Greška pri upisu '%-.200s' (errno: %M)"
+        slo "Chyba pri zápise do súboru '%-.200s' (chybový kód: %M)"
+        spa "Error escribiendo el fichero/archivo '%-.200s' (Error: %M)"
+        swe "Fick fel vid skrivning till '%-.200s' (Felkod %M)"
+        ukr "Не можу записати файл '%-.200s' (помилка: %M)"
+ER_FILE_USED  
+        chi "'%-.192s'被锁定,不能改变"
+        cze "'%-.192s' je zamčen proti změnám"
+        dan "'%-.192s' er låst mod opdateringer"
+        eng "'%-.192s' is locked against change"
+        est "'%-.192s' on lukustatud muudatuste vastu"
+        fre "'%-.192s' est verrouillé contre les modifications"
+        ger "'%-.192s' ist für Änderungen gesperrt"
+        geo "'%-.192s' ცვლილებებისთვის დაბლოკილია"
+        greek "'%-.192s' δεν επιτρέπονται αλλαγές"
+        hindi "फ़ाइल '%-.192s' में कोई बदलाव नहीं कर सकते"
+        hun "'%-.192s' a valtoztatas ellen zarolva"
+        ita "'%-.192s' e` soggetto a lock contro i cambiamenti"
+        jpn "'%-.192s' はロックされています。"
+        kor "'%-.192s'가 변경할 수 없도록 잠겨있습니다."
+        nla "'%-.192s' is geblokeerd tegen veranderingen"
+        nor "'%-.192s' er låst mot oppdateringer"
+        norwegian-ny "'%-.192s' er låst mot oppdateringar"
+        pol "'%-.192s' jest zablokowany na wypadek zmian"
+        por "'%-.192s' está com travamento contra alterações"
+        rum "'%-.192s' este blocat pentry schimbari (loccked against change)"
+        rus "'%-.192s' заблокирован для изменений"
+        serbian "'%-.192s' je zaključan za upis"
+        slo "'%-.192s' je zamknutý proti zmenám"
+        spa "'%-.192s' esta bloqueado contra cambios"
+        swe "'%-.192s' är låst mot användning"
+        ukr "'%-.192s' заблокований на внесення змін"
+ER_FILSORT_ABORT  
+        chi "排序中止"
+        cze "Třídění přerušeno"
+        dan "Sortering afbrudt"
+        eng "Sort aborted"
+        est "Sorteerimine katkestatud"
+        fre "Tri alphabétique abandonné"
+        ger "Sortiervorgang abgebrochen"
+        geo "დალაგება გაუქმდა"
+        greek "Η διαδικασία ταξινόμισης ακυρώθηκε"
+        hindi "SORT निरस्त"
+        hun "Sikertelen rendezes"
+        ita "Operazione di ordinamento abbandonata"
+        jpn "ソート処理を中断しました。"
+        kor "소트가 중단되었습니다."
+        nla "Sorteren afgebroken"
+        nor "Sortering avbrutt"
+        norwegian-ny "Sortering avbrote"
+        pol "Sortowanie przerwane"
+        por "Ordenação abortada"
+        rum "Sortare intrerupta"
+        rus "Сортировка прервана"
+        serbian "Sortiranje je prekinuto"
+        slo "Triedenie prerušené"
+        spa "Ordenación cancelada"
+        swe "Sorteringen avbruten"
+        ukr "Сортування перервано"
+ER_FORM_NOT_FOUND  
+        chi "视图'%-.192s'不存在'%-.192s'"
+        cze "Pohled '%-.192s' pro '%-.192s' neexistuje"
+        dan "View '%-.192s' eksisterer ikke for '%-.192s'"
+        eng "View '%-.192s' doesn't exist for '%-.192s'"
+        est "Vaade '%-.192s' ei eksisteeri '%-.192s' jaoks"
+        fre "La vue (View) '%-.192s' n'existe pas pour '%-.192s'"
+        ger "View '%-.192s' existiert für '%-.192s' nicht"
+        geo "ხედი '%-.192s'-ი '%-.192s'-სთვის არ არსებობს"
+        greek "Το View '%-.192s' δεν υπάρχει για '%-.192s'"
+        hindi "VIEW '%-.192s', '%-.192s' के लिए मौजूद नहीं है"
+        hun "A(z) '%-.192s' nezet nem letezik a(z) '%-.192s'-hoz"
+        ita "La view '%-.192s' non esiste per '%-.192s'"
+        jpn "ビュー '%-.192s' は '%-.192s' に存在しません。"
+        kor "뷰 '%-.192s'가 '%-.192s'에서는 존재하지 않습니다."
+        nla "View '%-.192s' bestaat niet voor '%-.192s'"
+        nor "View '%-.192s' eksisterer ikke for '%-.192s'"
+        norwegian-ny "View '%-.192s' eksisterar ikkje for '%-.192s'"
+        pol "Widok '%-.192s' nie istnieje dla '%-.192s'"
+        por "Visão '%-.192s' não existe para '%-.192s'"
+        rum "View '%-.192s' nu exista pentru '%-.192s'"
+        rus "Представление '%-.192s' не существует для '%-.192s'"
+        serbian "View '%-.192s' ne postoji za '%-.192s'"
+        slo "Pohľad '%-.192s' neexistuje pre '%-.192s'"
+        spa "La vista '%-.192s' no existe para '%-.192s'"
+        swe "Formulär '%-.192s' finns inte i '%-.192s'"
+        ukr "Вигляд '%-.192s' не існує для '%-.192s'"
+ER_GET_ERRNO  
+        chi "错误 %M, 来自存储引擎 %s"
+        eng "Got error %M from storage engine %s"
+        fre "Reçu l'erreur %M du handler de la table %s"
+        ger "Fehler %M von Speicher-Engine %s"
+        geo "მივიღე შეცდომა %M საცავის ძრავიდან %s"
+        greek "Ελήφθη μήνυμα λάθους %M από τον χειριστή πίνακα (table handler) %s"
+        hindi "%M त्रुटि %s स्टोरेज इंजन से"
+        ita "Rilevato l'errore %M dal gestore delle tabelle %s"
+        nla "Fout %M van tabel handler %s"
+        nor "Mottok feil %M fra tabell håndterer %s"
+        norwegian-ny "Mottok feil %M fra tabell handterar %s"
+        pol "Otrzymano bł?d %M z obsługi tabeli %s"
+        por "Obteve erro %M no manipulador de tabelas %s"
+        rum "Eroarea %M obtinuta din handlerul tabelei %s"
+        rus "Получена ошибка %M от обработчика таблиц %s"
+        spa "Obtenido error %M desde el motor de almacenaje %s"
+        swe "Fick felkod %M från databashanteraren %s"
+        ukr "Отримано помилку %M від дескриптора таблиці %s"
+ER_ILLEGAL_HA  
+        chi "存储引擎%s %`s.%`s 表没有该选项"
+        eng "Storage engine %s of the table %`s.%`s doesn't have this option"
+        ger "Diese Option gibt es nicht in Speicher-Engine %s für %`s.%`s"
+        geo "საცავის ძრავს %s ცხრილისთვის %`s.%`s ამ პარამეტრის მხარდაჭერა არ გააჩნია"
+        hindi "स्टोरेज इंजन %s में यह विकल्प उपलब्ध नहीं है (टेबल: %`s.%`s)"
+        rus "Обработчик %s таблицы %`s.%`s не поддерживает эту возможность"
+        spa "El motor de almacenaje %s de la tabla %`s.%`s no contiene esta opción"
+        ukr "Дескриптор %s таблиці %`s.%`s не має цієї властивості"
+ER_KEY_NOT_FOUND  
+        chi "无法在'%-.192s'中找到记录"
+        cze "Nemohu najít záznam v '%-.192s'"
+        dan "Kan ikke finde posten i '%-.192s'"
+        eng "Can't find record in '%-.192s'"
+        est "Ei suuda leida kirjet '%-.192s'-s"
+        fre "Ne peut trouver l'enregistrement dans '%-.192s'"
+        ger "Kann Datensatz in '%-.192s' nicht finden"
+        geo "'%-.192s'-ში ჩანაწერი ვერ ვიპოვე"
+        greek "Αδύνατη η ανεύρεση εγγραφής στο '%-.192s'"
+        hindi "'%-.192s' में रिकॉर्ड नहीं मिला"
+        hun "Nem talalhato a rekord '%-.192s'-ben"
+        ita "Impossibile trovare il record in '%-.192s'"
+        jpn "'%-.192s' にレコードが見つかりません。"
+        kor "'%-.192s'에서 레코드를 찾을 수 없습니다."
+        nla "Kan record niet vinden in '%-.192s'"
+        nor "Kan ikke finne posten i '%-.192s'"
+        norwegian-ny "Kan ikkje finne posten i '%-.192s'"
+        pol "Nie można znaleĽć rekordu w '%-.192s'"
+        por "Não pode encontrar registro em '%-.192s'"
+        rum "Nu pot sa gasesc recordul in '%-.192s'"
+        rus "Невозможно найти запись в '%-.192s'"
+        serbian "Ne mogu da pronađem slog u '%-.192s'"
+        slo "Nemôžem nájsť záznam v '%-.192s'"
+        spa "No puedo encontrar el registro en '%-.192s'"
+        swe "Hittar inte posten '%-.192s'"
+        ukr "Не можу записати у '%-.192s'"
+ER_NOT_FORM_FILE  
+        chi "文件中的信息不正确:'%-.200s'"
+        cze "Nesprávná informace v souboru '%-.200s'"
+        dan "Forkert indhold i: '%-.200s'"
+        eng "Incorrect information in file: '%-.200s'"
+        est "Vigane informatsioon failis '%-.200s'"
+        fre "Information erronnée dans le fichier: '%-.200s'"
+        ger "Falsche Information in Datei '%-.200s'"
+        geo "არასწორი ინფორმაცია ფაილში: '%-.200s'"
+        greek "Λάθος πληροφορίες στο αρχείο: '%-.200s'"
+        hindi "फ़ाइल '%-.200s' में गलत जानकारी है"
+        hun "Ervenytelen info a file-ban: '%-.200s'"
+        ita "Informazione errata nel file: '%-.200s'"
+        jpn "ファイル '%-.200s' 内の情報が不正です。"
+        kor "화일의 부정확한 정보: '%-.200s'"
+        nla "Verkeerde info in file: '%-.200s'"
+        nor "Feil informasjon i filen: '%-.200s'"
+        norwegian-ny "Feil informasjon i fila: '%-.200s'"
+        pol "Niewła?ciwa informacja w pliku: '%-.200s'"
+        por "Informação incorreta no arquivo '%-.200s'"
+        rum "Informatie incorecta in fisierul: '%-.200s'"
+        rus "Некорректная информация в файле '%-.200s'"
+        serbian "Pogrešna informacija u file-u: '%-.200s'"
+        slo "Nesprávna informácia v súbore: '%-.200s'"
+        spa "Información incorrecta en el fichero/archivo: '%-.200s'"
+        swe "Felaktig fil: '%-.200s'"
+        ukr "Хибна інформація у файлі: '%-.200s'"
+ER_NOT_KEYFILE  
+        chi "表'%-.200s'的索引损坏;试着修复"
+        cze "Nesprávný klíč pro tabulku '%-.200s'; pokuste se ho opravit"
+        dan "Fejl i indeksfilen til tabellen '%-.200s'; prøv at reparere den"
+        eng "Index for table '%-.200s' is corrupt; try to repair it"
+        est "Tabeli '%-.200s' võtmefail on vigane; proovi seda parandada"
+        fre "Index corrompu dans la table: '%-.200s'; essayez de le réparer"
+        ger "Fehlerhafte Index-Datei für Tabelle '%-.200s'; versuche zu reparieren"
+        geo "ინდექსი ცხრილისთვის '%-.200s' დაზიანებულია. სცადეთ, შეაკეთოთ ის"
+        greek "Λάθος αρχείο ταξινόμισης (key file) για τον πίνακα: '%-.200s'; Παρακαλώ, διορθώστε το!"
+        hindi "टेबल '%-.200s' का इंडेक्स CORRUPT हो गया है; इसे REPAIR करने की कोशिश करें"
+        hun "Ervenytelen kulcsfile a tablahoz: '%-.200s'; probalja kijavitani!"
+        ita "File chiave errato per la tabella : '%-.200s'; prova a riparalo"
+        jpn "表 '%-.200s' の索引ファイル(key file)の内容が不正です。修復を試行してください。"
+        kor "'%-.200s' 테이블의 부정확한 키 존재. 수정하시오!"
+        nla "Verkeerde zoeksleutel file voor tabel: '%-.200s'; probeer het te repareren"
+        nor "Tabellen '%-.200s' har feil i nøkkelfilen; forsøk å reparer den"
+        norwegian-ny "Tabellen '%-.200s' har feil i nykkelfila; prøv å reparere den"
+        pol "Niewła?ciwy plik kluczy dla tabeli: '%-.200s'; spróbuj go naprawić"
+        por "Arquivo de índice incorreto para tabela '%-.200s'; tente repará-lo"
+        rum "Cheia fisierului incorecta pentru tabela: '%-.200s'; incearca s-o repari"
+        rus "Некорректный индексный файл для таблицы: '%-.200s'. Попробуйте восстановить его"
+        serbian "Pogrešan key file za tabelu: '%-.200s'; probajte da ga ispravite"
+        slo "Nesprávny kľúč pre tabuľku '%-.200s'; pokúste sa ho opraviť"
+        spa "El índice para la tabla: '%-.200s' está corrupto; intente repararlo"
+        swe "Fatalt fel vid hantering av register '%-.200s'; kör en reparation"
+        ukr "Хибний файл ключей для таблиці: '%-.200s'; Спробуйте його відновити"
+ER_OLD_KEYFILE  
+        chi "表'%-.192s'的老索引文件损坏; 修复!"
+        cze "Starý klíčový soubor pro '%-.192s'; opravte ho"
+        dan "Gammel indeksfil for tabellen '%-.192s'; reparer den"
+        eng "Old key file for table '%-.192s'; repair it!"
+        est "Tabeli '%-.192s' võtmefail on aegunud; paranda see!"
+        fre "Vieux fichier d'index pour la table '%-.192s'; réparez le!"
+        ger "Alte Index-Datei für Tabelle '%-.192s'. Bitte reparieren"
+        geo "ინდექსის ფაილი ცხრილისთვის '%-.200s' ძველია. შეაკეთეთ ის!"
+        greek "Παλαιό αρχείο ταξινόμισης (key file) για τον πίνακα '%-.192s'; Παρακαλώ, διορθώστε το!"
+        hindi "टेबल '%-.192s' के लिए पुरानी KEY फ़ाइल; इसे REPAIR करने की कोशिश करें"
+        hun "Regi kulcsfile a '%-.192s'tablahoz; probalja kijavitani!"
+        ita "File chiave vecchio per la tabella '%-.192s'; riparalo!"
+        jpn "表 '%-.192s' の索引ファイル(key file)は古い形式です。修復してください。"
+        kor "'%-.192s' 테이블의 이전버젼의 키 존재. 수정하시오!"
+        nla "Oude zoeksleutel file voor tabel '%-.192s'; repareer het!"
+        nor "Gammel nøkkelfil for tabellen '%-.192s'; reparer den!"
+        norwegian-ny "Gammel nykkelfil for tabellen '%-.192s'; reparer den!"
+        pol "Plik kluczy dla tabeli '%-.192s' jest starego typu; napraw go!"
+        por "Arquivo de índice desatualizado para tabela '%-.192s'; repare-o!"
+        rum "Cheia fisierului e veche pentru tabela '%-.192s'; repar-o!"
+        rus "Старый индексный файл для таблицы '%-.192s'; отремонтируйте его!"
+        serbian "Zastareo key file za tabelu '%-.192s'; ispravite ga"
+        slo "Starý kľúčový súbor pre '%-.192s'; opravte ho!"
+        spa "Clave antigua de fichero/archivo para la tabla '%-.192s'; ¡repárela!"
+        swe "Gammal nyckelfil '%-.192s'; reparera registret"
+        ukr "Старий файл ключей для таблиці '%-.192s'; Відновіть його!"
+ER_OPEN_AS_READONLY  
+        chi "表'%-.192s'只可读"
+        cze "'%-.192s' je jen pro čtení"
+        dan "'%-.192s' er skrivebeskyttet"
+        eng "Table '%-.192s' is read only"
+        est "Tabel '%-.192s' on ainult lugemiseks"
+        fre "'%-.192s' est en lecture seulement"
+        ger "Tabelle '%-.192s' ist nur lesbar"
+        geo "ცხილი '%-.192s' მხოლოდ კითხვადია"
+        greek "'%-.192s' επιτρέπεται μόνο η ανάγνωση"
+        hindi "टेबल '%-.192s' READ-ONLY है"
+        hun "'%-.192s' irasvedett"
+        ita "'%-.192s' e` di sola lettura"
+        jpn "表 '%-.192s' は読み込み専用です。"
+        kor "테이블 '%-.192s'는 읽기전용 입니다."
+        nla "'%-.192s' is alleen leesbaar"
+        nor "'%-.192s' er skrivebeskyttet"
+        norwegian-ny "'%-.192s' er skrivetryggja"
+        pol "'%-.192s' jest tylko do odczytu"
+        por "Tabela '%-.192s' é somente para leitura"
+        rum "Tabela '%-.192s' e read-only"
+        rus "Таблица '%-.192s' предназначена только для чтения"
+        serbian "Tabelu '%-.192s' je dozvoljeno samo čitati"
+        slo "'%-.192s' is čítať only"
+        spa "La tabla '%-.192s' es de sólo lectura"
+        swe "'%-.192s' är skyddad mot förändring"
+        ukr "Таблиця '%-.192s' тільки для читання"
+ER_OUTOFMEMORY HY001 S1001
+        chi "内存不足; 重启后重试(需要 %d bytes)"
+        cze "Málo paměti. Přestartujte daemona a zkuste znovu (je potřeba %d bytů)"
+        dan "Ikke mere hukommelse. Genstart serveren og prøv igen (mangler %d bytes)"
+        eng "Out of memory; restart server and try again (needed %d bytes)"
+        est "Mälu  sai otsa. Proovi MariaDB uuesti käivitada (puudu jäi %d baiti)"
+        fre "Manque de mémoire. Redémarrez le démon et ré-essayez (%d octets nécessaires)"
+        ger "Kein Speicher vorhanden (%d Bytes benötigt). Bitte Server neu starten"
+        geo "არასაკმარისი მეხსიერება. გადატვირთეთ სერვერი და კიდევ სცადეთ (მჭირდებოდა %d ბაიტი"
+        greek "Δεν υπάρχει διαθέσιμη μνήμη. Προσπαθήστε πάλι, επανεκινώντας τη διαδικασία (demon) (χρειάζονται %d bytes)"
+        hun "Nincs eleg memoria. Inditsa ujra a demont, es probalja ismet. (%d byte szukseges.)"
+        ita "Memoria esaurita. Fai ripartire il demone e riprova (richiesti %d bytes)"
+        jpn "メモリが不足しています。サーバーを再起動してみてください。(%d バイトの割り当てに失敗)"
+        kor "Out of memory. 데몬을 재 실행 후 다시 시작하시오 (needed %d bytes)"
+        nla "Geen geheugen meer. Herstart server en probeer opnieuw (%d bytes nodig)"
+        nor "Ikke mer minne. Star på nytt tjenesten og prøv igjen (trengte %d byter)"
+        norwegian-ny "Ikkje meir minne. Start på nytt tenesten og prøv igjen (trengte %d bytar)"
+        pol "Zbyt mało pamięci. Uruchom ponownie demona i spróbuj ponownie (potrzeba %d bajtów)"
+        por "Sem memória. Reinicie o programa e tente novamente (necessita de %d bytes)"
+        rum "Out of memory. Porneste daemon-ul din nou si incearca inca o data (e nevoie de %d bytes)"
+        rus "Недостаточно памяти. Перезапустите сервер и попробуйте еще раз (нужно %d байт)"
+        serbian "Nema memorije. Restartujte MariaDB server i probajte ponovo (potrebno je %d byte-ova)"
+        slo "Málo pamäti. Reštartujte daemona a skúste znova (je potrebných %d bytov)"
+        spa "Memoria insuficiente. Reinicie el servidor e inténtelo otra vez (necesita %d bytes)"
+        swe "Oväntat slut på minnet, starta om programmet och försök på nytt (Behövde %d bytes)"
+        ukr "Брак пам'яті. Рестартуйте сервер та спробуйте знову (потрібно %d байтів)"
+ER_OUT_OF_SORTMEMORY HY001 S1001
+        chi "排序内存不足,可以考虑增加实例排序缓存量"
+        cze "Málo paměti pro třídění. Zvyšte velikost třídícího bufferu"
+        dan "Ikke mere sorteringshukommelse. Øg sorteringshukommelse (sort buffer size) for serveren"
+        eng "Out of sort memory, consider increasing server sort buffer size"
+        est "Mälu sai sorteerimisel otsa. Suurenda MariaDB-i sorteerimispuhvrit"
+        fre "Manque de mémoire pour le tri. Augmentez-la"
+        ger "Kein Speicher zum Sortieren vorhanden. sort_buffer_size sollte im Server erhöht werden"
+        geo "არასაკმარისი მეხსიერება. გადახედეთ სერვერის დალაგების ბუფერის ზომის პარამეტრებს"
+        greek "Δεν υπάρχει διαθέσιμη μνήμη για ταξινόμιση. Αυξήστε το sort buffer size για τη διαδικασία (demon)"
+        hun "Nincs eleg memoria a rendezeshez. Novelje a rendezo demon puffermeretet"
+        ita "Memoria per gli ordinamenti esaurita. Incrementare il 'sort_buffer' al demone"
+        jpn "ソートメモリが不足しています。ソートバッファサイズ(sort buffer size)の増加を検討してください。"
+        kor "Out of sort memory. daemon sort buffer의 크기를 증가시키세요"
+        nla "Geen geheugen om te sorteren. Verhoog de server sort buffer size"
+        nor "Ikke mer sorteringsminne. Vurder å øke sorteringsminnet (sort buffer size) for tjenesten"
+        norwegian-ny "Ikkje meir sorteringsminne. Vurder å auke sorteringsminnet (sorteringsbuffer storleik) for tenesten"
+        pol "Zbyt mało pamięci dla sortowania. Zwiększ wielko?ć bufora demona dla sortowania"
+        por "Não há memória suficiente para ordenação. Considere aumentar o tamanho do retentor (buffer) de ordenação"
+        rum "Out of memory pentru sortare. Largeste marimea buffer-ului pentru sortare in daemon (sort buffer size)"
+        rus "Недостаточно памяти для сортировки. Увеличьте размер буфера сортировки на сервере"
+        serbian "Nema memorije za sortiranje. Povećajte veličinu sort buffer-a MariaDB server-u"
+        slo "Málo pamäti pre triedenie, zvýšte veľkosť triediaceho bufferu"
+        spa "Memoria de ordenación insuficiente. Considere el incrementar el tamaño del búfer de ordenación del servidor"
+        swe "Sorteringsbufferten räcker inte till. Kontrollera startparametrarna"
+        ukr "Брак пам'яті для сортування. Треба збільшити розмір буфера сортування у сервера"
+ER_UNEXPECTED_EOF  
+        chi "阅读文件'%-.192s'时出现意外的EOF(错误号码:%M)"
+        cze "Neočekávaný konec souboru při čtení '%-.192s' (chybový kód: %M)"
+        dan "Uventet afslutning på fil (eof) ved læsning af filen '%-.192s' (Fejlkode: %M)"
+        eng "Unexpected EOF found when reading file '%-.192s' (errno: %M)"
+        est "Ootamatu faililõpumärgend faili '%-.192s' lugemisel (veakood: %M)"
+        fre "Fin de fichier inattendue en lisant '%-.192s' (Errcode: %M)"
+        ger "Unerwartetes Ende beim Lesen der Datei '%-.192s' (Fehler: %M)"
+        geo "მოულოდნელი EOF ფაილის ('%-.192s') წაკითხვისას (შეცდ. კოდი: %M)"
+        greek "Κατά τη διάρκεια της ανάγνωσης, βρέθηκε απροσδόκητα το τέλος του αρχείου '%-.192s' (κωδικός λάθους: %M)"
+        hun "Varatlan filevege-jel a '%-.192s'olvasasakor. (hibakod: %M)"
+        ita "Fine del file inaspettata durante la lettura del file '%-.192s' (errno: %M)"
+        jpn "ファイル '%-.192s' を読み込み中に予期せずファイルの終端に達しました。(エラー番号: %M)"
+        kor "'%-.192s' 화일을 읽는 도중 잘못된 eof을 발견 (에러번호: %M)"
+        nla "Onverwachte eof gevonden tijdens het lezen van file '%-.192s' (Errcode: %M)"
+        nor "Uventet slutt på fil (eof) ved lesing av filen '%-.192s' (Feilkode: %M)"
+        norwegian-ny "Uventa slutt på fil (eof) ved lesing av fila '%-.192s' (Feilkode: %M)"
+        pol "Nieoczekiwany 'eof' napotkany podczas czytania z pliku '%-.192s' (Kod błędu: %M)"
+        por "Encontrado fim de arquivo inesperado ao ler arquivo '%-.192s' (erro no. %M)"
+        rum "Sfirsit de fisier neasteptat in citirea fisierului '%-.192s' (errno: %M)"
+        rus "Неожиданный конец файла '%-.192s' (ошибка: %M)"
+        serbian "Neočekivani kraj pri čitanju file-a '%-.192s' (errno: %M)"
+        slo "Neočakávaný koniec súboru pri čítaní '%-.192s' (chybový kód: %M)"
+        spa "Inesperado fin de fichero/archivo mientras leíamos el fichero/archivo '%-.192s' (Error: %M)"
+        swe "Oväntat filslut vid läsning från '%-.192s' (Felkod: %M)"
+        ukr "Хибний кінець файлу '%-.192s' (помилка: %M)"
+ER_CON_COUNT_ERROR 08004 
+        chi "太多连接"
+        cze "Příliš mnoho spojení"
+        dan "For mange forbindelser (connections)"
+        eng "Too many connections"
+        est "Liiga palju samaaegseid ühendusi"
+        fre "Trop de connexions"
+        ger "Zu viele Verbindungen"
+        geo "მეტისმეტად ბევრი მიერთება"
+        greek "Υπάρχουν πολλές συνδέσεις..."
+        hindi "अत्यधिक कनेक्शन"
+        hun "Tul sok kapcsolat"
+        ita "Troppe connessioni"
+        jpn "接続が多すぎます。"
+        kor "너무 많은 연결... max_connection을 증가 시키시오..."
+        nla "Te veel verbindingen"
+        nor "For mange tilkoblinger (connections)"
+        norwegian-ny "For mange tilkoplingar (connections)"
+        pol "Zbyt wiele poł?czeń"
+        por "Excesso de conexões"
+        rum "Prea multe conectiuni"
+        rus "Слишком много соединений"
+        serbian "Previše konekcija"
+        slo "Príliš mnoho spojení"
+        spa "Demasiadas conexiones"
+        swe "För många anslutningar"
+        ukr "Забагато з'єднань"
+ER_OUT_OF_RESOURCES  
+        chi "内存不足."
+        cze "Málo prostoru/paměti pro thread"
+        dan "Udgået for tråde/hukommelse"
+        eng "Out of memory."
+        est "Mälu sai otsa. Võimalik, et aitab swap-i lisamine või käsu 'ulimit' abil MariaDB-le rohkema mälu kasutamise lubamine"
+        fre "Manque de 'threads'/mémoire"
+        ger "Kein Speicher mehr vorhanden."
+        geo "არასაკმარისი მეხსიერება."
+        greek "Πρόβλημα με τη διαθέσιμη μνήμη (Out of thread space/memory)"
+        hun "Elfogyott a thread-memoria"
+        ita "Fine dello spazio/memoria per i thread"
+        jpn "メモリが不足しています。mariadbd やその他のプロセスがメモリーを使い切っていないか確認して下さい。メモリーを使い切っていない場合、'ulimit'の設定等で mariadbd のメモリー使用最大量を多くするか、スワップ領域を増やす必要があるかもしれません。"
+# This message failed to convert from euc-kr, skipped
+        nla "Geen thread geheugen meer; controleer of mariadbd of andere processen al het beschikbare geheugen gebruikt. Zo niet, dan moet u wellicht 'ulimit' gebruiken om mariadbd toe te laten meer geheugen te benutten, of u kunt extra swap ruimte toevoegen"
+        nor "Tomt for tråd plass/minne"
+        norwegian-ny "Tomt for tråd plass/minne"
+        pol "Zbyt mało miejsca/pamięci dla w?tku"
+        por "Sem memória."
+        rum "Out of memory."
+        rus "Недостаточно памяти."
+        serbian "Nema memorije."
+        slo "Málo miesta-pamäti pre vlákno"
+        spa "Memoria agotada"
+        swe "Fick slut på minnet."
+        ukr "Брак пам'яті."
+ER_BAD_HOST_ERROR 08S01 
+        chi "不能从你的地址获取主机名称"
+        cze "Nemohu zjistit jméno stroje pro Vaši adresu"
+        dan "Kan ikke få værtsnavn for din adresse"
+        eng "Can't get hostname for your address"
+        est "Ei suuda lahendada IP aadressi masina nimeks"
+        fre "Ne peut obtenir de hostname pour votre adresse"
+        ger "Kann Hostnamen für diese Adresse nicht erhalten"
+        geo "თქვენი მისამართისთვის ჰოსტის სახელის მიღება შეუძლებელია"
+        greek "Δεν έγινε γνωστό το hostname για την address σας"
+        hindi "आपके I.P. ऐड्रेस के लिए होस्टनेम प्राप्त करने में विफल रहे"
+        hun "A gepnev nem allapithato meg a cimbol"
+        ita "Impossibile risalire al nome dell'host dall'indirizzo (risoluzione inversa)"
+        jpn "IPアドレスからホスト名を解決できません。"
+        kor "당신의 컴퓨터의 호스트이름을 얻을 수 없습니다."
+        nla "Kan de hostname niet krijgen van uw adres"
+        nor "Kan ikke få tak i vertsnavn for din adresse"
+        norwegian-ny "Kan ikkje få tak i vertsnavn for di adresse"
+        pol "Nie można otrzymać nazwy hosta dla twojego adresu"
+        por "Não pode obter nome do 'host' para seu endereço"
+        rum "Nu pot sa obtin hostname-ul adresei tale"
+        rus "Невозможно получить имя хоста для вашего адреса"
+        serbian "Ne mogu da dobijem ime host-a za vašu IP adresu"
+        slo "Nemôžem zistiť meno hostiteľa pre vašu adresu"
+        spa "No puedo obtener el nombre de equipo de la dirección de vd."
+        swe "Kan inte hitta 'hostname' för din adress"
+        ukr "Не можу визначити ім'я хосту для вашої адреси"
+ER_HANDSHAKE_ERROR 08S01 
+        chi "坏握手"
+        cze "Chyba při ustavování spojení"
+        dan "Forkert håndtryk (handshake)"
+        eng "Bad handshake"
+        est "Väär handshake"
+        fre "Mauvais 'handshake'"
+        ger "Ungültiger Handshake"
+        geo "არასწორი მისასალმებელი შეტყობინება"
+        greek "Η αναγνώριση (handshake) δεν έγινε σωστά"
+        hindi "संपर्क स्थापित करते समय त्रुटि हुई (BAD HANDSHAKE)"
+        hun "A kapcsolatfelvetel nem sikerult (Bad handshake)"
+        ita "Negoziazione impossibile"
+        jpn "ハンドシェイクエラー"
+        nla "Verkeerde handshake"
+        nor "Feil håndtrykk (handshake)"
+        norwegian-ny "Feil handtrykk (handshake)"
+        pol "Zły uchwyt(handshake)"
+        por "Negociação de acesso falhou"
+        rum "Prost inceput de conectie (bad handshake)"
+        rus "Некорректное приветствие"
+        serbian "Loš početak komunikacije (handshake)"
+        slo "Chyba pri nadväzovaní spojenia"
+        spa "Mal apretón de manos (handshake)"
+        swe "Fel vid initiering av kommunikationen med klienten"
+        ukr "Невірна установка зв'язку"
+ER_DBACCESS_DENIED_ERROR 42000 
+        chi "用户'%s'@'%s'无权访问数据库'%-.192s'"
+        cze "Přístup pro uživatele '%s'@'%s' k databázi '%-.192s' není povolen"
+        dan "Adgang nægtet bruger: '%s'@'%s' til databasen '%-.192s'"
+        eng "Access denied for user '%s'@'%s' to database '%-.192s'"
+        est "Ligipääs keelatud kasutajale '%s'@'%s' andmebaasile '%-.192s'"
+        fre "Accès refusé pour l'utilisateur: '%s'@'%s'. Base '%-.192s'"
+        ger "Benutzer '%s'@'%s' hat keine Zugriffsberechtigung für Datenbank '%-.192s'"
+        geo "მომხმარებლისთვის '%s'@'%s' ბაზაზე '%-.192s' წვდომა აკრძალულია"
+        greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%s'@'%s' στη βάση δεδομένων '%-.192s'"
+        hindi "यूज़र '%s'@'%s' को डेटाबेस '%-.192s' की अनुमति नहीं है"
+        hun "A(z) '%s'@'%s' felhasznalo szamara tiltott eleres az '%-.192s' adabazishoz"
+        ita "Accesso non consentito per l'utente: '%s'@'%s' al database '%-.192s'"
+        jpn "ユーザー '%s'@'%s' の '%-.192s' データベースへのアクセスを拒否します"
+        kor "'%s'@'%s' 사용자는 '%-.192s' 데이타베이스에 접근이 거부 되었습니다."
+        nla "Toegang geweigerd voor gebruiker: '%s'@'%s' naar database '%-.192s'"
+        nor "Tilgang nektet for bruker: '%s'@'%s' til databasen '%-.192s' nektet"
+        norwegian-ny "Tilgang ikkje tillate for brukar: '%s'@'%s' til databasen '%-.192s' nekta"
+        por "Acesso negado para o usuário '%s'@'%s' ao banco de dados '%-.192s'"
+        rum "Acces interzis pentru utilizatorul: '%s'@'%s' la baza de date '%-.192s'"
+        rus "Для пользователя '%s'@'%s' доступ к базе данных '%-.192s' закрыт"
+        serbian "Pristup je zabranjen korisniku '%s'@'%s' za bazu '%-.192s'"
+        slo "Zakázaný prístup pre užívateľa: '%s'@'%s' k databázi '%-.192s'"
+        spa "Acceso denegado para usuario: '%s'@'%s' a la base de datos '%-.192s'"
+        swe "Användare '%s'@'%s' är ej berättigad att använda databasen %-.192s"
+        ukr "Доступ заборонено для користувача: '%s'@'%s' до бази данних '%-.192s'"
+ER_ACCESS_DENIED_ERROR 28000 
+        chi "'%s'@'%s' 用户无权访问 (使用密码: %s)"
+        cze "Přístup pro uživatele '%s'@'%s' (s heslem %s)"
+        dan "Adgang nægtet bruger: '%s'@'%s' (Bruger adgangskode: %s)"
+        eng "Access denied for user '%s'@'%s' (using password: %s)"
+        est "Ligipääs keelatud kasutajale '%s'@'%s' (kasutab parooli: %s)"
+        fre "Accès refusé pour l'utilisateur: '%s'@'%s' (mot de passe: %s)"
+        ger "Benutzer '%s'@'%s' hat keine Zugriffsberechtigung (verwendetes Passwort: %s)"
+        geo "მომხმარებლისთვის '%s'@'%s' წვდომა აკრძალულია (პაროლის გამოყენებით: %s)"
+        greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%s'@'%s' (χρήση password: %s)"
+        hindi "यूज़र '%s'@'%s' को अनुमति नहीं है (पासवर्ड का उपयोग: %s)"
+        hun "A(z) '%s'@'%s' felhasznalo szamara tiltott eleres. (Hasznalja a jelszot: %s)"
+        ita "Accesso non consentito per l'utente: '%s'@'%s' (Password: %s)"
+        jpn "ユーザー '%s'@'%s' を拒否します.uUsing password: %s)"
+        kor "'%s'@'%s' 사용자는 접근이 거부 되었습니다. (using password: %s)"
+        nla "Toegang geweigerd voor gebruiker: '%s'@'%s' (Wachtwoord gebruikt: %s)"
+        nor "Tilgang nektet for bruker: '%s'@'%s' (Bruker passord: %s)"
+        norwegian-ny "Tilgang ikke tillate for brukar: '%s'@'%s' (Brukar passord: %s)"
+        por "Acesso negado para o usuário '%s'@'%s' (senha usada: %s)"
+        rum "Acces interzis pentru utilizatorul: '%s'@'%s' (Folosind parola: %s)"
+        rus "Доступ закрыт для пользователя '%s'@'%s' (был использован пароль: %s)"
+        serbian "Pristup je zabranjen korisniku '%s'@'%s' (koristi lozinku: '%s')"
+        slo "Zakázaný prístup pre užívateľa: '%s'@'%s' (použitie hesla: %s)"
+        spa "Acceso denegado para usuario: '%s'@'%s' (Usando contraseña: %s)"
+        swe "Användare '%s'@'%s' är ej berättigad att logga in (Använder lösen: %s)"
+        ukr "Доступ заборонено для користувача: '%s'@'%s' (Використано пароль: %s)"
+ER_NO_DB_ERROR 3D000 
+        chi "没有选择数据库"
+        cze "Nebyla vybrána žádná databáze"
+        dan "Ingen database valgt"
+        eng "No database selected"
+        est "Andmebaasi ei ole valitud"
+        fre "Aucune base n'a été sélectionnée"
+        ger "Keine Datenbank ausgewählt"
+        geo "მონაცემთა ბაზა არჩეული არაა"
+        greek "Δεν επιλέχθηκε βάση δεδομένων"
+        hindi "किसी भी डेटाबेस का चयन नहीं किया गया है"
+        hun "Nincs kivalasztott adatbazis"
+        ita "Nessun database selezionato"
+        jpn "データベースが選択されていません。"
+        kor "선택된 데이타베이스가 없습니다."
+        nla "Geen database geselecteerd"
+        nor "Ingen database valgt"
+        norwegian-ny "Ingen database vald"
+        pol "Nie wybrano żadnej bazy danych"
+        por "Nenhum banco de dados foi selecionado"
+        rum "Nici o baza de data nu a fost selectata inca"
+        rus "База данных не выбрана"
+        serbian "Ni jedna baza nije selektovana"
+        slo "Nebola vybraná databáza"
+        spa "Base de datos no seleccionada"
+        swe "Ingen databas i användning"
+        ukr "Базу данних не вибрано"
+ER_UNKNOWN_COM_ERROR 08S01 
+        chi "未知的命令"
+        cze "Neznámý příkaz"
+        dan "Ukendt kommando"
+        eng "Unknown command"
+        est "Tundmatu käsk"
+        fre "Commande inconnue"
+        ger "Unbekannter Befehl"
+        geo "უცნობი ბრძანება"
+        greek "Αγνωστη εντολή"
+        hindi "अज्ञात आदेश"
+        hun "Ervenytelen parancs"
+        ita "Comando sconosciuto"
+        jpn "不明なコマンドです。"
+        kor "명령어가 뭔지 모르겠어요..."
+        nla "Onbekend commando"
+        nor "Ukjent kommando"
+        norwegian-ny "Ukjent kommando"
+        pol "Nieznana komenda"
+        por "Comando desconhecido"
+        rum "Comanda invalida"
+        rus "Неизвестная команда коммуникационного протокола"
+        serbian "Nepoznata komanda"
+        slo "Neznámy príkaz"
+        spa "Comando desconocido"
+        swe "Okänt kommando"
+        ukr "Невідома команда"
+ER_BAD_NULL_ERROR 23000 
+        chi "列'%-.192s'不能为NULL"
+        cze "Sloupec '%-.192s' nemůže být null"
+        dan "Kolonne '%-.192s' kan ikke være NULL"
+        eng "Column '%-.192s' cannot be null"
+        est "Tulp '%-.192s' ei saa omada nullväärtust"
+        fre "Le champ '%-.192s' ne peut être vide (null)"
+        ger "Feld '%-.192s' darf nicht NULL sein"
+        geo "სვეტს '%-.192s' მნიშვნელობა NULL ვერ ექნება"
+        greek "Το πεδίο '%-.192s' δεν μπορεί να είναι κενό (null)"
+        hindi "काँलम '%-.192s' NULL नहीं हो सकता"
+        hun "A(z) '%-.192s' oszlop erteke nem lehet nulla"
+        ita "La colonna '%-.192s' non puo` essere nulla"
+        jpn "列 '%-.192s' は null にできません。"
+        kor "칼럼 '%-.192s'는 널(Null)이 되면 안됩니다. "
+        nla "Kolom '%-.192s' kan niet null zijn"
+        nor "Kolonne '%-.192s' kan ikke vere null"
+        norwegian-ny "Kolonne '%-.192s' kan ikkje vere null"
+        pol "Kolumna '%-.192s' nie może być null"
+        por "Coluna '%-.192s' não pode ser vazia"
+        rum "Coloana '%-.192s' nu poate sa fie null"
+        rus "Столбец '%-.192s' не может принимать величину NULL"
+        serbian "Kolona '%-.192s' ne može biti NULL"
+        slo "Pole '%-.192s' nemôže byť null"
+        spa "La columna '%-.192s' no puede ser nula"
+        swe "Kolumn '%-.192s' får inte vara NULL"
+        ukr "Стовбець '%-.192s' не може бути нульовим"
+ER_BAD_DB_ERROR 42000 
+        chi "未知数据库'%-.192s'"
+        cze "Neznámá databáze '%-.192s'"
+        dan "Ukendt database '%-.192s'"
+        eng "Unknown database '%-.192s'"
+        est "Tundmatu andmebaas '%-.192s'"
+        fre "Base '%-.192s' inconnue"
+        ger "Unbekannte Datenbank '%-.192s'"
+        geo "უცნობი მონაცემთა ბაზა '%-.192s'"
+        greek "Αγνωστη βάση δεδομένων '%-.192s'"
+        hindi "अज्ञात डाटाबेस '%-.192s'"
+        hun "Ervenytelen adatbazis: '%-.192s'"
+        ita "Database '%-.192s' sconosciuto"
+        jpn "'%-.192s' は不明なデータベースです。"
+        kor "데이타베이스 '%-.192s'는 알수 없음"
+        nla "Onbekende database '%-.192s'"
+        nor "Ukjent database '%-.192s'"
+        norwegian-ny "Ukjent database '%-.192s'"
+        pol "Nieznana baza danych '%-.192s'"
+        por "Banco de dados '%-.192s' desconhecido"
+        rum "Baza de data invalida '%-.192s'"
+        rus "Неизвестная база данных '%-.192s'"
+        serbian "Nepoznata baza '%-.192s'"
+        slo "Neznáma databáza '%-.192s'"
+        spa "Base de datos '%-.192s' desconocida"
+        swe "Okänd databas: '%-.192s'"
+        ukr "Невідома база данних '%-.192s'"
+ER_TABLE_EXISTS_ERROR 42S01 
+        chi "表'%-.192s'已经存在"
+        cze "Tabulka '%-.192s' již existuje"
+        dan "Tabellen '%-.192s' findes allerede"
+        eng "Table '%-.192s' already exists"
+        est "Tabel '%-.192s' juba eksisteerib"
+        fre "La table '%-.192s' existe déjà"
+        ger "Tabelle '%-.192s' bereits vorhanden"
+        geo "ცხრილი '%-.192s' უკვე არსებობს"
+        greek "Ο πίνακας '%-.192s' υπάρχει ήδη"
+        hindi "टेबल '%-.192s' पहले से ही मौजूद है"
+        hun "A(z) '%-.192s' tabla mar letezik"
+        ita "La tabella '%-.192s' esiste gia`"
+        jpn "表 '%-.192s' はすでに存在します。"
+        kor "테이블 '%-.192s'는 이미 존재함"
+        nla "Tabel '%-.192s' bestaat al"
+        nor "Tabellen '%-.192s' eksisterer allerede"
+        norwegian-ny "Tabellen '%-.192s' eksisterar allereide"
+        pol "Tabela '%-.192s' już istnieje"
+        por "Tabela '%-.192s' já existe"
+        rum "Tabela '%-.192s' exista deja"
+        rus "Таблица '%-.192s' уже существует"
+        serbian "Tabela '%-.192s' već postoji"
+        slo "Tabuľka '%-.192s' už existuje"
+        spa "La tabla '%-.192s' ya existe"
+        swe "Tabellen '%-.192s' finns redan"
+        ukr "Таблиця '%-.192s' вже існує"
+ER_BAD_TABLE_ERROR 42S02 
+        chi "未知表'%-.100T'"
+        cze "Neznámá tabulka '%-.100T'"
+        dan "Ukendt tabel '%-.100T'"
+        eng "Unknown table '%-.100T'"
+        est "Tundmatu tabel '%-.100T'"
+        fre "Table '%-.100T' inconnue"
+        ger "Unbekannte Tabelle '%-.100T'"
+        geo "უცნობი ცხრილი '%-.100T'"
+        greek "Αγνωστος πίνακας '%-.100T'"
+        hindi "अज्ञात टेबल '%-.100T'"
+        hun "Ervenytelen tabla: '%-.100T'"
+        ita "Tabella '%-.100T' sconosciuta"
+        jpn "'%-.100T' は不明な表です。"
+        kor "테이블 '%-.100T'는 알수 없음"
+        nla "Onbekende tabel '%-.100T'"
+        nor "Ukjent tabell '%-.100T'"
+        norwegian-ny "Ukjent tabell '%-.100T'"
+        pol "Nieznana tabela '%-.100T'"
+        por "Tabela '%-.100T' desconhecida"
+        rum "Tabela '%-.100T' este invalida"
+        rus "Неизвестная таблица '%-.100T'"
+        serbian "Nepoznata tabela '%-.100T'"
+        slo "Neznáma tabuľka '%-.100T'"
+        spa "Tabla '%-.100T' no reconocida"
+        swe "Okänd tabell '%-.100T'"
+        ukr "Невідома таблиця '%-.100T'"
+ER_NON_UNIQ_ERROR 23000 
+        chi "列名 '%-.192s' 在 %-.192s 定义模糊"
+        cze "Sloupec '%-.192s' v %-.192s není zcela jasný"
+        dan "Felt: '%-.192s' i tabel %-.192s er ikke entydigt"
+        eng "Column '%-.192s' in %-.192s is ambiguous"
+        est "Väli '%-.192s' %-.192s-s ei ole ühene"
+        fre "Champ: '%-.192s' dans %-.192s est ambigu"
+        ger "Feld '%-.192s' in %-.192s ist nicht eindeutig"
+        geo "სვეტი '%-.192s' %-.192s-ში ბუნდოვანია"
+        greek "Το πεδίο: '%-.192s' σε %-.192s δεν έχει καθοριστεί"
+        hindi "काँलम '%-.192s' अस्पष्ट है (टेबल: %-.192s)"
+        hun "A(z) '%-.192s' oszlop %-.192s-ben ketertelmu"
+        ita "Colonna: '%-.192s' di %-.192s e` ambigua"
+        jpn "列 '%-.192s' は %-.192s 内で曖昧です。"
+        kor "칼럼: '%-.192s' in '%-.192s' 이 모호함"
+        nla "Kolom: '%-.192s' in %-.192s is niet eenduidig"
+        nor "Felt: '%-.192s' i tabell %-.192s er ikke entydig"
+        norwegian-ny "Kolonne: '%-.192s' i tabell %-.192s er ikkje eintydig"
+        pol "Kolumna: '%-.192s' w  %-.192s jest dwuznaczna"
+        por "Coluna '%-.192s' em '%-.192s' é ambígua"
+        rum "Coloana: '%-.192s' in %-.192s este ambigua"
+        rus "Столбец '%-.192s' в %-.192s задан неоднозначно"
+        serbian "Kolona '%-.192s' u %-.192s nije jedinstvena u kontekstu"
+        slo "Pole: '%-.192s' v %-.192s je nejasné"
+        spa "La columna: '%-.192s' en %-.192s es ambigua"
+        swe "Kolumn '%-.192s' i %-.192s är inte unik"
+        ukr "Стовбець '%-.192s' у %-.192s визначений неоднозначно"
+ER_SERVER_SHUTDOWN 08S01 
+        chi "服务器正在关闭"
+        cze "Probíhá ukončování práce serveru"
+        dan "Database nedlukning er i gang"
+        eng "Server shutdown in progress"
+        est "Serveri seiskamine käib"
+        fre "Arrêt du serveur en cours"
+        ger "Der Server wird heruntergefahren"
+        geo "მიმდინარეობს სერვერის გამორთვა"
+        greek "Εναρξη διαδικασίας αποσύνδεσης του εξυπηρετητή (server shutdown)"
+        hindi "सर्वर बंद हो रहा है"
+        hun "A szerver leallitasa folyamatban"
+        ita "Shutdown del server in corso"
+        jpn "サーバーをシャットダウン中です。"
+        kor "Server가 셧다운 중입니다."
+        nla "Bezig met het stoppen van de server"
+        nor "Database nedkobling er i gang"
+        norwegian-ny "Tenar nedkopling er i gang"
+        pol "Trwa kończenie działania serwera"
+        por "'Shutdown' do servidor em andamento"
+        rum "Terminarea serverului este in desfasurare"
+        rus "Сервер находится в процессе остановки"
+        serbian "Gašenje servera je u toku"
+        slo "Prebieha ukončovanie práce servera"
+        spa "Desconexión de servidor en proceso"
+        swe "Servern går nu ned"
+        ukr "Завершується работа сервера"
+ER_BAD_FIELD_ERROR 42S22 S0022
+        chi "未知列'%-.192s'在'%-.192s'"
+        cze "Neznámý sloupec '%-.192s' v %-.192s"
+        dan "Ukendt kolonne '%-.192s' i tabel %-.192s"
+        eng "Unknown column '%-.192s' in '%-.192s'"
+        est "Tundmatu tulp '%-.192s' '%-.192s'-s"
+        fre "Champ '%-.192s' inconnu dans %-.192s"
+        ger "Unbekanntes Tabellenfeld '%-.192s' in %-.192s"
+        geo "უცნობი სვეტი '%-.192s' '%-.192s'-ში"
+        greek "Αγνωστο πεδίο '%-.192s' σε '%-.192s'"
+        hindi "अज्ञात काँलम '%-.192s'(टेबल: '%-.192s')"
+        hun "A(z) '%-.192s' oszlop ervenytelen '%-.192s'-ben"
+        ita "Colonna sconosciuta '%-.192s' in '%-.192s'"
+        jpn "列 '%-.192s' は '%-.192s' にはありません。"
+        kor "Unknown 칼럼 '%-.192s' in '%-.192s'"
+        nla "Onbekende kolom '%-.192s' in %-.192s"
+        nor "Ukjent kolonne '%-.192s' i tabell %-.192s"
+        norwegian-ny "Ukjent felt '%-.192s' i tabell %-.192s"
+        pol "Nieznana kolumna '%-.192s' w  %-.192s"
+        por "Coluna '%-.192s' desconhecida em '%-.192s'"
+        rum "Coloana invalida '%-.192s' in '%-.192s'"
+        rus "Неизвестный столбец '%-.192s' в '%-.192s'"
+        serbian "Nepoznata kolona '%-.192s' u '%-.192s'"
+        slo "Neznáme pole '%-.192s' v '%-.192s'"
+        spa "No se reconoce la columna '%-.192s' en %-.192s"
+        swe "Okänd kolumn '%-.192s' i %-.192s"
+        ukr "Невідомий стовбець '%-.192s' у '%-.192s'"
+ER_WRONG_FIELD_WITH_GROUP 42000 S1009
+        chi "'%-.192s' 不在 GROUP BY"
+        cze "Použité '%-.192s' nebylo v group by"
+        dan "Brugte '%-.192s' som ikke var i group by"
+        eng "'%-.192s' isn't in GROUP BY"
+        est "'%-.192s' puudub GROUP BY klauslis"
+        fre "'%-.192s' n'est pas dans 'group by'"
+        ger "'%-.192s' ist nicht in GROUP BY vorhanden"
+        geo "'%-.192s' GROUP BY-ში არაა"
+        greek "Χρησιμοποιήθηκε '%-.192s' που δεν υπήρχε στο group by"
+        hindi "'%-.192s' GROUP BY में नहीं है"
+        hun "Used '%-.192s' with wasn't in group by"
+        ita "Usato '%-.192s' che non e` nel GROUP BY"
+        jpn "'%-.192s' はGROUP BY句で指定されていません。"
+        kor "'%-.192s'은 GROUP BY속에 없음"
+        nla "Opdracht gebruikt '%-.192s' dat niet in de GROUP BY voorkomt"
+        nor "Brukte '%-.192s' som ikke var i group by"
+        norwegian-ny "Brukte '%-.192s' som ikkje var i group by"
+        pol "Użyto '%-.192s' bez umieszczenia w group by"
+        por "'%-.192s' não está em 'GROUP BY'"
+        rum "'%-.192s' nu exista in clauza GROUP BY"
+        rus "'%-.192s' не присутствует в GROUP BY"
+        serbian "Entitet '%-.192s' nije naveden u komandi 'GROUP BY'"
+        slo "Použité '%-.192s' nebolo v 'group by'"
+        spa "'%-.192s' no se encuentra en GROUP BY"
+        swe "'%-.192s' finns inte i GROUP BY"
+        ukr "'%-.192s' не є у GROUP BY"
+ER_WRONG_GROUP_FIELD 42000 S1009
+        chi "不能在'%-.192s'上分组"
+        cze "Nemohu použít group na '%-.192s'"
+        dan "Kan ikke gruppere på '%-.192s'"
+        eng "Can't group on '%-.192s'"
+        est "Ei saa grupeerida '%-.192s' järgi"
+        fre "Ne peut regrouper '%-.192s'"
+        ger "Gruppierung über '%-.192s' nicht möglich"
+        geo "'%-.192s'-ის მიხედვით დაჯგუფების შეცდომა"
+        greek "Αδύνατη η ομαδοποίηση (group on) '%-.192s'"
+        hindi "'%-.192s' पर GROUP नहीं कर सकते"
+        hun "A group nem hasznalhato: '%-.192s'"
+        ita "Impossibile raggruppare per '%-.192s'"
+        jpn "'%-.192s' でのグループ化はできません。"
+        kor "'%-.192s'를 그룹할 수 없음"
+        nla "Kan '%-.192s' niet groeperen"
+        nor "Kan ikke gruppere på '%-.192s'"
+        norwegian-ny "Kan ikkje gruppere på '%-.192s'"
+        pol "Nie można grupować po '%-.192s'"
+        por "Não pode agrupar em '%-.192s'"
+        rum "Nu pot sa grupez pe (group on) '%-.192s'"
+        rus "Невозможно произвести группировку по '%-.192s'"
+        serbian "Ne mogu da grupišem po '%-.192s'"
+        slo "Nemôžem použiť 'group' na '%-.192s'"
+        spa "No puedo agrupar por '%-.192s'"
+        swe "Kan inte använda GROUP BY med '%-.192s'"
+        ukr "Не можу групувати по '%-.192s'"
+ER_WRONG_SUM_SELECT 42000 S1009
+        chi "语句在同一语句里有求和函数和列"
+        cze "Příkaz obsahuje zároveň funkci sum a sloupce"
+        dan "Udtrykket har summer (sum) funktioner og kolonner i samme udtryk"
+        eng "Statement has sum functions and columns in same statement"
+        est "Lauses on korraga nii tulbad kui summeerimisfunktsioonid"
+        fre "Vous demandez la fonction sum() et des champs dans la même commande"
+        ger "Die Verwendung von Summierungsfunktionen und Spalten im selben Befehl ist nicht erlaubt"
+        geo "გამოსახულება შეჯამების ფუნქციებს და სვეტებს ერთდროულად შეიცავს"
+        greek "Η διατύπωση περιέχει sum functions και columns στην ίδια διατύπωση"
+        ita "Il comando ha una funzione SUM e una colonna non specificata nella GROUP BY"
+        jpn "集計関数と通常の列が同時に指定されています。"
+        kor "Statement 가 sum기능을 동작중이고 칼럼도 동일한 statement입니다."
+        nla "Opdracht heeft totaliseer functies en kolommen in dezelfde opdracht"
+        nor "Uttrykket har summer (sum) funksjoner og kolonner i samme uttrykk"
+        norwegian-ny "Uttrykket har summer (sum) funksjoner og kolonner i same uttrykk"
+        pol "Zapytanie ma funkcje sumuj?ce i kolumny w tym samym zapytaniu"
+        por "Cláusula contém funções de soma e colunas juntas"
+        rum "Comanda are functii suma si coloane in aceeasi comanda"
+        rus "Выражение содержит групповые функции и столбцы, но не включает GROUP BY. А как вы умудрились получить это сообщение об ошибке?"
+        serbian "Izraz ima 'SUM' agregatnu funkciju i kolone u isto vreme"
+        slo "Príkaz obsahuje zároveň funkciu 'sum' a poľa"
+        spa "La sentencia tiene funciones de suma y columnas en la misma sentencia"
+        swe "Kommandot har både sum functions och enkla funktioner"
+        ukr "У виразі використано підсумовуючі функції поряд з іменами стовбців"
+ER_WRONG_VALUE_COUNT 21S01 
+        chi "列数与值数不匹配"
+        cze "Počet sloupců neodpovídá zadané hodnotě"
+        dan "Kolonne tæller stemmer ikke med antallet af værdier"
+        eng "Column count doesn't match value count"
+        est "Tulpade arv erineb väärtuste arvust"
+        ger "Die Anzahl der Spalten entspricht nicht der Anzahl der Werte"
+        geo "სვეტების რაოდენობა მნიშვნელობების რაოდენობას არ ემთხვევა"
+        greek "Το Column count δεν ταιριάζει με το value count"
+        hindi "कॉलम की गिनती मूल्य की गिनती के समान नही है"
+        hun "Az oszlopban levo ertek nem egyezik meg a szamitott ertekkel"
+        ita "Il numero delle colonne non e` uguale al numero dei valori"
+        jpn "列数が値の個数と一致しません。"
+        kor "칼럼의 카운트가 값의 카운트와 일치하지 않습니다."
+        nla "Het aantal kolommen komt niet overeen met het aantal opgegeven waardes"
+        nor "Felt telling stemmer verdi telling"
+        norwegian-ny "Kolonne telling stemmer verdi telling"
+        pol "Liczba kolumn nie odpowiada liczbie warto?ci"
+        por "Contagem de colunas não confere com a contagem de valores"
+        rum "Numarul de coloane nu este acelasi cu numarul valoarei"
+        rus "Количество столбцов не совпадает с количеством значений"
+        serbian "Broj kolona ne odgovara broju vrednosti"
+        slo "Počet polí nezodpovedá zadanej hodnote"
+        spa "El contador de columnas no coincide con el contador de valores"
+        swe "Antalet kolumner motsvarar inte antalet värden"
+        ukr "Кількість стовбців не співпадає з кількістю значень"
+ER_TOO_LONG_IDENT 42000 S1009
+        chi "标识符'%-.100T'太长"
+        cze "Jméno identifikátoru '%-.100T' je příliš dlouhé"
+        dan "Navnet '%-.100T' er for langt"
+        eng "Identifier name '%-.100T' is too long"
+        est "Identifikaatori '%-.100T' nimi on liiga pikk"
+        fre "Le nom de l'identificateur '%-.100T' est trop long"
+        ger "Name des Bezeichners '%-.100T' ist zu lang"
+        geo "იდენტიფიკატორის სახელი ('%-.100T') ძალიან გრძელია"
+        greek "Το identifier name '%-.100T' είναι πολύ μεγάλο"
+        hindi "पहचानकर्ता का नाम '%-.100T' बहुत लंबा है"
+        hun "A(z) '%-.100T' azonositonev tul hosszu"
+        ita "Il nome dell'identificatore '%-.100T' e` troppo lungo"
+        jpn "識別子名 '%-.100T' は長すぎます。"
+        kor "Identifier '%-.100T'는 너무 길군요."
+        nla "Naam voor herkenning '%-.100T' is te lang"
+        nor "Identifikator '%-.100T' er for lang"
+        norwegian-ny "Identifikator '%-.100T' er for lang"
+        pol "Nazwa identyfikatora '%-.100T' jest zbyt długa"
+        por "Nome identificador '%-.100T' é longo demais"
+        rum "Numele indentificatorului '%-.100T' este prea lung"
+        rus "Слишком длинный идентификатор '%-.100T'"
+        serbian "Ime '%-.100T' je predugačko"
+        slo "Meno identifikátora '%-.100T' je príliš dlhé"
+        spa "El nombre del identificador '%-.100T' es demasiado grande"
+        swe "Kolumnnamn '%-.100T' är för långt"
+        ukr "Ім'я ідентифікатора '%-.100T' задовге"
+ER_DUP_FIELDNAME 42S21 S1009
+        chi "列名重复 '%-.192s'"
+        cze "Zdvojené jméno sloupce '%-.192s'"
+        dan "Feltnavnet '%-.192s' findes allerede"
+        eng "Duplicate column name '%-.192s'"
+        est "Kattuv tulba nimi '%-.192s'"
+        fre "Nom du champ '%-.192s' déjà utilisé"
+        ger "Doppelter Spaltenname: '%-.192s'"
+        geo "სვეტის დუბლირებული სახელი '%-.192s'"
+        greek "Επανάληψη column name '%-.192s'"
+        hindi "समान कॉलम '%-.192s' मौजूद है"
+        hun "Duplikalt oszlopazonosito: '%-.192s'"
+        ita "Nome colonna duplicato '%-.192s'"
+        jpn "列名 '%-.192s' は重複してます。"
+        kor "중복된 칼럼 이름: '%-.192s'"
+        nla "Dubbele kolom naam '%-.192s'"
+        nor "Feltnavnet '%-.192s' eksisterte fra før"
+        norwegian-ny "Feltnamnet '%-.192s' eksisterte frå før"
+        pol "Powtórzona nazwa kolumny '%-.192s'"
+        por "Nome da coluna '%-.192s' duplicado"
+        rum "Numele coloanei '%-.192s' e duplicat"
+        rus "Дублирующееся имя столбца '%-.192s'"
+        serbian "Duplirano ime kolone '%-.192s'"
+        slo "Opakované meno poľa '%-.192s'"
+        spa "Nombre duplicado de columna '%-.192s'"
+        swe "Kolumnnamn '%-.192s finns flera gånger"
+        ukr "Дублююче ім'я стовбця '%-.192s'"
+ER_DUP_KEYNAME 42000 S1009
+        chi "索引名重复 '%-.192s'"
+        cze "Zdvojené jméno klíče '%-.192s'"
+        dan "Indeksnavnet '%-.192s' findes allerede"
+        eng "Duplicate key name '%-.192s'"
+        est "Kattuv võtme nimi '%-.192s'"
+        fre "Nom de clef '%-.192s' déjà utilisé"
+        ger "Doppelter Name für Schlüssel vorhanden: '%-.192s'"
+        geo "გასაღების დუბლირებული სახელი '%-.192s'"
+        greek "Επανάληψη key name '%-.192s'"
+        hindi "समान KEY '%-.192s' मौजूद है"
+        hun "Duplikalt kulcsazonosito: '%-.192s'"
+        ita "Nome chiave duplicato '%-.192s'"
+        jpn "索引名 '%-.192s' は重複しています。"
+        kor "중복된 키 이름 : '%-.192s'"
+        nla "Dubbele zoeksleutel naam '%-.192s'"
+        nor "Nøkkelnavnet '%-.192s' eksisterte fra før"
+        norwegian-ny "Nøkkelnamnet '%-.192s' eksisterte frå før"
+        pol "Powtórzony nazwa klucza '%-.192s'"
+        por "Nome da chave '%-.192s' duplicado"
+        rum "Numele cheiei '%-.192s' e duplicat"
+        rus "Дублирующееся имя ключа '%-.192s'"
+        serbian "Duplirano ime ključa '%-.192s'"
+        slo "Opakované meno kľúča '%-.192s'"
+        spa "Nombre duplicado de clave '%-.192s'"
+        swe "Nyckelnamn '%-.192s' finns flera gånger"
+        ukr "Дублююче ім'я ключа '%-.192s'"
+# When using this error code, please use ER(ER_DUP_ENTRY_WITH_KEY_NAME)
+# for the message string.  See, for example, code in handler.cc.
+ER_DUP_ENTRY 23000 S1009
+        chi "重复条目'%-.192T'在索引%d"
+        cze "Zdvojený klíč '%-.192T' (číslo klíče %d)"
+        dan "Ens værdier '%-.192T' for indeks %d"
+        eng "Duplicate entry '%-.192T' for key %d"
+        est "Kattuv väärtus '%-.192T' võtmele %d"
+        fre "Duplicata du champ '%-.192T' pour la clef %d"
+        ger "Doppelter Eintrag '%-.192T' für Schlüssel %d"
+        geo "დუბლირებული ჩანაწერი '%-.192T' გასაღებისთვის %d"
+        greek "Διπλή εγγραφή '%-.192T' για το κλειδί %d"
+        hindi "सामान प्रवेश '%-.192T' KEY %d के लिए"
+        hun "Duplikalt bejegyzes '%-.192T' a %d kulcs szerint"
+        ita "Valore duplicato '%-.192T' per la chiave %d"
+        jpn "'%-.192T' は索引 %d で重複しています。"
+        kor "중복된 입력 값 '%-.192T': key %d"
+        nla "Dubbele ingang '%-.192T' voor zoeksleutel %d"
+        nor "Like verdier '%-.192T' for nøkkel %d"
+        norwegian-ny "Like verdiar '%-.192T' for nykkel %d"
+        pol "Powtórzone wystąpienie '%-.192T' dla klucza %d"
+        por "Entrada '%-.192T' duplicada para a chave %d"
+        rum "Cimpul '%-.192T' e duplicat pentru cheia %d"
+        rus "Дублирующаяся запись '%-.192T' по ключу %d"
+        serbian "Dupliran unos '%-.192T' za ključ '%d'"
+        slo "Opakovaný kľúč '%-.192T' (číslo kľúča %d)"
+        spa "Entrada duplicada '%-.192T' para la clave %d"
+        swe "Dublett '%-.192T' för nyckel %d"
+        ukr "Дублюючий запис '%-.192T' для ключа %d"
+ER_WRONG_FIELD_SPEC 42000 S1009
+        chi "列的说明符不对 '%-.192s'"
+        cze "Chybná specifikace sloupce '%-.192s'"
+        dan "Forkert kolonnespecifikaton for felt '%-.192s'"
+        eng "Incorrect column specifier for column '%-.192s'"
+        est "Vigane tulba kirjeldus tulbale '%-.192s'"
+        fre "Mauvais paramètre de champ pour le champ '%-.192s'"
+        geo "სვეტის არასწორი მსაზღვრავი სვეტისთვის '%-.192s'"
+        ger "Falsche Spezifikation für Feld '%-.192s'"
+        greek "Εσφαλμένο column specifier για το πεδίο '%-.192s'"
+        hindi "कॉलम '%-.192s' के लिए गलत कॉलम विनिर्देशक"
+        hun "Rossz oszlopazonosito: '%-.192s'"
+        ita "Specifica errata per la colonna '%-.192s'"
+        jpn "列 '%-.192s' の定義が不正です。"
+        kor "칼럼 '%-.192s'의 부정확한 칼럼 정의자"
+        nla "Verkeerde kolom specificatie voor kolom '%-.192s'"
+        nor "Feil kolonne spesifikator for felt '%-.192s'"
+        norwegian-ny "Feil kolonne spesifikator for kolonne '%-.192s'"
+        pol "Błędna specyfikacja kolumny dla kolumny '%-.192s'"
+        por "Especificador de coluna incorreto para a coluna '%-.192s'"
+        rum "Specificandul coloanei '%-.192s' este incorect"
+        rus "Некорректный определитель столбца для столбца '%-.192s'"
+        serbian "Pogrešan naziv kolone za kolonu '%-.192s'"
+        slo "Chyba v špecifikácii poľa '%-.192s'"
+        spa "Especificador de columna incorrecto para la columna '%-.192s'"
+        swe "Felaktigt kolumntyp för kolumn '%-.192s'"
+        ukr "Невірний специфікатор стовбця '%-.192s'"
+ER_PARSE_ERROR 42000 s1009
+        chi "%s 附近'%-.80T'在第%d行"
+        cze "%s blízko '%-.80T' na řádku %d"
+        dan "%s nær '%-.80T' på linje %d"
+        eng "%s near '%-.80T' at line %d"
+        est "%s '%-.80T' ligidal real %d"
+        fre "%s près de '%-.80T' à la ligne %d"
+        ger "%s bei '%-.80T' in Zeile %d"
+        geo "%s '%-.80T'-სთან ახლოს ხაზზე %d"
+        greek "%s πλησίον '%-.80T' στη γραμμή %d"
+        hindi "%s के पास '%-.80T' लाइन %d में"
+        hun "A %s a '%-.80T'-hez kozeli a %d sorban"
+        ita "%s vicino a '%-.80T' linea %d"
+        jpn "%s : '%-.80T' 付近 %d 行目"
+        kor "'%s' 에러 같습니다. ('%-.80T' 명령어 라인 %d)"
+        nla "%s bij '%-.80T' in regel %d"
+        nor "%s nær '%-.80T' på linje %d"
+        norwegian-ny "%s attmed '%-.80T' på line %d"
+        pol "%s obok '%-.80T' w linii %d"
+        por "%s próximo a '%-.80T' na linha %d"
+        rum "%s linga '%-.80T' pe linia %d"
+        rus "%s около '%-.80T' на строке %d"
+        serbian "'%s' u iskazu '%-.80T' na liniji %d"
+        slo "%s blízko '%-.80T' na riadku %d"
+        spa "%s cerca de '%-.80T' en la línea %d"
+        swe "%s nära '%-.80T' på rad %d"
+        ukr "%s біля '%-.80T' в строці %d"
+ER_EMPTY_QUERY 42000  
+        chi "查询为空"
+        cze "Výsledek dotazu je prázdný"
+        dan "Forespørgsel var tom"
+        eng "Query was empty"
+        est "Tühi päring"
+        fre "Query est vide"
+        ger "Leere Abfrage"
+        geo "მოთხოვნა ცარიელია"
+        greek "Το ερώτημα (query) που θέσατε ήταν κενό"
+        hindi "क्वेरी खली थी"
+        hun "Ures lekerdezes"
+        ita "La query e` vuota"
+        jpn "クエリが空です。"
+        kor "쿼리결과가 없습니다."
+        nla "Query was leeg"
+        nor "Forespørsel var tom"
+        norwegian-ny "Førespurnad var tom"
+        pol "Zapytanie było puste"
+        por "Consulta (query) estava vazia"
+        rum "Query-ul a fost gol"
+        rus "Запрос оказался пустым"
+        serbian "Upit je bio prazan"
+        slo "Výsledok požiadavky bol prázdny"
+        spa "La consulta (query) estaba vacia"
+        swe "Frågan var tom"
+        ukr "Пустий запит"
+ER_NONUNIQ_TABLE 42000 S1009
+        chi "表或别名不唯一:'%-.192s'"
+        cze "Nejednoznačná tabulka/alias: '%-.192s'"
+        dan "Tabellen/aliaset: '%-.192s' er ikke unikt"
+        eng "Not unique table/alias: '%-.192s'"
+        est "Ei ole unikaalne tabel/alias '%-.192s'"
+        fre "Table/alias: '%-.192s' non unique"
+        ger "Tabellenname/Alias '%-.192s' nicht eindeutig"
+        geo "ცხრილი/ფსევდონიმი უნიკალური არაა: '%-.192s'"
+        greek "Αδύνατη η ανεύρεση unique table/alias: '%-.192s'"
+        hindi "टेबल या उसका उपनाम '%-.192s' अद्वितीय नहीं है"
+        hun "Nem egyedi tabla/alias: '%-.192s'"
+        ita "Tabella/alias non unico: '%-.192s'"
+        jpn "表名/別名 '%-.192s' は一意ではありません。"
+        kor "Unique 하지 않은 테이블/alias: '%-.192s'"
+        nla "Niet unieke waarde tabel/alias: '%-.192s'"
+        nor "Ikke unikt tabell/alias: '%-.192s'"
+        norwegian-ny "Ikkje unikt tabell/alias: '%-.192s'"
+        pol "Tabela/alias nie s? unikalne: '%-.192s'"
+        por "Tabela/alias '%-.192s' não única"
+        rum "Tabela/alias: '%-.192s' nu este unic"
+        rus "Повторяющаяся таблица/псевдоним '%-.192s'"
+        serbian "Tabela ili alias nisu bili jedinstveni: '%-.192s'"
+        slo "Nie jednoznačná tabuľka/alias: '%-.192s'"
+        spa "La tabla/alias: '%-.192s' no es única"
+        swe "Icke unikt tabell/alias: '%-.192s'"
+        ukr "Неунікальна таблиця/псевдонім: '%-.192s'"
+ER_INVALID_DEFAULT 42000 S1009
+        chi "'%-.192s'的默认值无效"
+        cze "Chybná defaultní hodnota pro '%-.192s'"
+        dan "Ugyldig standardværdi for '%-.192s'"
+        eng "Invalid default value for '%-.192s'"
+        est "Vigane vaikeväärtus '%-.192s' jaoks"
+        fre "Valeur par défaut invalide pour '%-.192s'"
+        ger "Fehlerhafter Vorgabewert (DEFAULT) für '%-.192s'"
+        geo "არასწორი ნაგულისხმები მნიშვნელობა '%-.192s'-სთვის"
+        greek "Εσφαλμένη προκαθορισμένη τιμή (default value) για '%-.192s'"
+        hindi "'%-.192s' के लिए अवैध डिफ़ॉल्ट मान"
+        hun "Ervenytelen ertek: '%-.192s'"
+        ita "Valore di default non valido per '%-.192s'"
+        jpn "'%-.192s' へのデフォルト値が無効です。"
+        kor "'%-.192s'의 유효하지 못한 디폴트 값을 사용하셨습니다."
+        nla "Foutieve standaard waarde voor '%-.192s'"
+        nor "Ugyldig standardverdi for '%-.192s'"
+        norwegian-ny "Ugyldig standardverdi for '%-.192s'"
+        pol "Niewła?ciwa warto?ć domy?lna dla '%-.192s'"
+        por "Valor padrão (default) inválido para '%-.192s'"
+        rum "Valoarea de default este invalida pentru '%-.192s'"
+        rus "Некорректное значение по умолчанию для '%-.192s'"
+        serbian "Loša default vrednost za '%-.192s'"
+        slo "Chybná implicitná hodnota pre '%-.192s'"
+        spa "Valor por defecto inválido para '%-.192s'"
+        swe "Ogiltigt DEFAULT värde för '%-.192s'"
+        ukr "Невірне значення по замовчуванню для '%-.192s'"
+ER_MULTIPLE_PRI_KEY 42000 S1009
+        chi "定义了多个主键"
+        cze "Definováno více primárních klíčů"
+        dan "Flere primærnøgler specificeret"
+        eng "Multiple primary key defined"
+        est "Mitut primaarset võtit ei saa olla"
+        fre "Plusieurs clefs primaires définies"
+        ger "Mehrere Primärschlüssel (PRIMARY KEY) definiert"
+        geo "აღწერილია ერთზე მეტი ძირითადი გასაღები (PRIMARY KEY)"
+        greek "Περισσότερα από ένα primary key ορίστηκαν"
+        hindi "कई PRIMARY KEY परिभाषित"
+        hun "Tobbszoros elsodleges kulcs definialas"
+        ita "Definite piu` chiave primarie"
+        jpn "PRIMARY KEY が複数定義されています。"
+        kor "Multiple primary key가 정의되어 있슴"
+        nla "Meerdere primaire zoeksleutels gedefinieerd"
+        nor "Fleire primærnøkle spesifisert"
+        norwegian-ny "Fleire primærnyklar spesifisert"
+        pol "Zdefiniowano wiele kluczy podstawowych"
+        por "Definida mais de uma chave primária"
+        rum "Chei primare definite de mai multe ori"
+        rus "Указано несколько первичных ключей"
+        serbian "Definisani višestruki primarni ključevi"
+        slo "Zadefinovaných viac primárnych kľúčov"
+        spa "Múltiples claves primarias definidas"
+        swe "Flera PRIMARY KEY använda"
+        ukr "Первинного ключа визначено неодноразово"
+ER_TOO_MANY_KEYS 42000 S1009
+        chi "定义的索引太多; 最多允许%d 个索引"
+        cze "Zadáno příliš mnoho klíčů, je povoleno nejvíce %d klíčů"
+        dan "For mange nøgler specificeret. Kun %d nøgler må bruges"
+        eng "Too many keys specified; max %d keys allowed"
+        est "Liiga palju võtmeid. Maksimaalselt võib olla %d võtit"
+        fre "Trop de clefs sont définies. Maximum de %d clefs alloué"
+        ger "Zu viele Schlüssel definiert. Maximal %d Schlüssel erlaubt"
+        geo "მითითებულია მეტისმეტად ბევრი გასაღები. დაშვებულია მაქსიმუმ %d"
+        greek "Πάρα πολλά key ορίσθηκαν. Το πολύ %d επιτρέπονται"
+        hindi "बहुत सारी KEYS निर्दिष्ट हैं; अधिकतम %d KEYS की अनुमति है"
+        hun "Tul sok kulcs. Maximum %d kulcs engedelyezett"
+        ita "Troppe chiavi. Sono ammesse max %d chiavi"
+        jpn "索引の数が多すぎます。最大 %d 個までです。"
+        kor "너무 많은 키가 정의되어 있습니다.. 최대 %d의 키가 가능함"
+        nla "Teveel zoeksleutels gedefinieerd. Maximaal zijn %d zoeksleutels toegestaan"
+        nor "For mange nøkler spesifisert. Maks %d nøkler tillatt"
+        norwegian-ny "For mange nykler spesifisert. Maks %d nyklar tillatt"
+        pol "Okre?lono zbyt wiele kluczy. Dostępnych jest maksymalnie %d kluczy"
+        por "Especificadas chaves demais. O máximo permitido são %d chaves"
+        rum "Prea multe chei. Numarul de chei maxim este %d"
+        rus "Указано слишком много ключей. Разрешается указывать не более %d ключей"
+        serbian "Navedeno je previše ključeva. Maksimum %d ključeva je dozvoljeno"
+        slo "Zadaných ríliš veľa kľúčov. Najviac %d kľúčov je povolených"
+        spa "Demasiadas claves primarias declaradas. Se permite un máximo de %d claves"
+        swe "För många nycklar använda. Man får ha högst %d nycklar"
+        ukr "Забагато ключів зазначено. Дозволено не більше %d ключів"
+ER_TOO_MANY_KEY_PARTS 42000 S1009
+        chi "指定的索引部分过多; 最多允许%d个部分"
+        cze "Zadáno příliš mnoho část klíčů, je povoleno nejvíce %d částí"
+        dan "For mange nøgledele specificeret. Kun %d dele må bruges"
+        eng "Too many key parts specified; max %d parts allowed"
+        est "Võti koosneb liiga paljudest osadest. Maksimaalselt võib olla %d osa"
+        fre "Trop de parties specifiées dans la clef. Maximum de %d parties"
+        ger "Zu viele Teilschlüssel definiert. Maximal %d Teilschlüssel erlaubt"
+        geo "მითითებულია მეტისმეტად ბევრი ნაწილი. დაშვებულია მაქსიმუმ %d"
+        greek "Πάρα πολλά key parts ορίσθηκαν. Το πολύ %d επιτρέπονται"
+        hindi "बहुत सारे KEY के भाग निर्दिष्ट हैं; अधिकतम %d भागों की अनुमति है"
+        hun "Tul sok kulcsdarabot definialt. Maximum %d resz engedelyezett"
+        ita "Troppe parti di chiave specificate. Sono ammesse max %d parti"
+        jpn "索引のキー列指定が多すぎます。最大 %d 個までです。"
+        kor "너무 많은 키 부분(parts)들이 정의되어 있습니다.. 최대 %d 부분이 가능함"
+        nla "Teveel zoeksleutel onderdelen gespecificeerd. Maximaal %d onderdelen toegestaan"
+        nor "For mange nøkkeldeler spesifisert. Maks %d deler tillatt"
+        norwegian-ny "For mange nykkeldelar spesifisert. Maks %d delar tillatt"
+        pol "Okre?lono zbyt wiele czę?ci klucza. Dostępnych jest maksymalnie %d czę?ci"
+        por "Especificadas partes de chave demais. O máximo permitido são %d partes"
+        rum "Prea multe chei. Numarul de chei maxim este %d"
+        rus "Указано слишком много частей составного ключа. Разрешается указывать не более %d частей"
+        serbian "Navedeno je previše delova ključa. Maksimum %d delova je dozvoljeno"
+        slo "Zadaných ríliš veľa častí kľúčov. Je povolených najviac %d častí"
+        spa "Demasiadas partes de clave declaradas. Se permite un máximo de %d partes"
+        swe "För många nyckeldelar använda. Man får ha högst %d nyckeldelar"
+        ukr "Забагато частин ключа зазначено. Дозволено не більше %d частин"
+ER_TOO_LONG_KEY 42000 S1009
+        chi "指定索引太长; 最大索引长度是 %d字节"
+        cze "Zadaný klíč byl příliš dlouhý, největší délka klíče je %d"
+        dan "Specificeret nøgle var for lang. Maksimal nøglelængde er %d"
+        eng "Specified key was too long; max key length is %d bytes"
+        est "Võti on liiga pikk. Maksimaalne võtmepikkus on %d"
+        fre "La clé est trop longue. Longueur maximale: %d"
+        ger "Schlüssel ist zu lang. Die maximale Schlüssellänge beträgt %d"
+        geo "მითითებული გასაღები მეტისმეტად გრძელია. დასაშვები მაქსიმალური სიგრძე: %d"
+        greek "Το κλειδί που ορίσθηκε είναι πολύ μεγάλο. Το μέγιστο μήκος είναι %d"
+        hindi "निर्दिष्ट KEY बहुत लंबी थी; KEY की अधिकतम लंबाई %d बाइट है"
+        hun "A megadott kulcs tul hosszu. Maximalis kulcshosszusag: %d"
+        ita "La chiave specificata e` troppo lunga. La max lunghezza della chiave e` %d"
+        jpn "索引のキーが長すぎます。最大 %d バイトまでです。"
+        kor "정의된 키가 너무 깁니다. 최대 키의 길이는 %d입니다."
+        nla "Gespecificeerde zoeksleutel was te lang. De maximale lengte is %d"
+        nor "Spesifisert nøkkel var for lang. Maks nøkkellengde er is %d"
+        norwegian-ny "Spesifisert nykkel var for lang. Maks nykkellengde er %d"
+        pol "Zdefinowany klucz jest zbyt długi. Maksymaln? długo?ci? klucza jest %d"
+        por "Chave especificada longa demais. O comprimento de chave máximo permitido é %d"
+        rum "Cheia specificata este prea lunga. Marimea maxima a unei chei este de %d"
+        rus "Указан слишком длинный ключ. Максимальная длина ключа составляет %d байт"
+        serbian "Navedeni ključ je predug. Maksimalna dužina ključa je %d"
+        slo "Zadaný kľúč je príliš dlhý, najväčšia dĺžka kľúča je %d"
+        spa "Declaración de clave demasiado larga. La máxima longitud de clave es de %d"
+        swe "För lång nyckel. Högsta tillåtna nyckellängd är %d"
+        ukr "Зазначений ключ задовгий. Найбільша довжина ключа %d байтів"
+ER_KEY_COLUMN_DOES_NOT_EXIST 42000 S1009
+        chi "索引列'%-.192s'不在表里"
+        cze "Klíčový sloupec '%-.192s' v tabulce neexistuje"
+        dan "Nøglefeltet '%-.192s' eksisterer ikke i tabellen"
+        eng "Key column '%-.192s' doesn't exist in table"
+        est "Võtme tulp '%-.192s' puudub tabelis"
+        fre "La clé '%-.192s' n'existe pas dans la table"
+        ger "In der Tabelle gibt es kein Schlüsselfeld '%-.192s'"
+        geo "გასაღების სვეტი '%-.192s' ცხრილში არ არსებობს"
+        greek "Το πεδίο κλειδί '%-.192s' δεν υπάρχει στον πίνακα"
+        hindi "KEY कॉलम '%-.192s' टेबल में मौजूद नहीं है"
+        hun "A(z) '%-.192s'kulcsoszlop nem letezik a tablaban"
+        ita "La colonna chiave '%-.192s' non esiste nella tabella"
+        jpn "キー列 '%-.192s' は表にありません。"
+        kor "Key 칼럼 '%-.192s'는 테이블에 존재하지 않습니다."
+        nla "Zoeksleutel kolom '%-.192s' bestaat niet in tabel"
+        nor "Nøkkel felt '%-.192s' eksiterer ikke i tabellen"
+        norwegian-ny "Nykkel kolonne '%-.192s' eksiterar ikkje i tabellen"
+        pol "Kolumna '%-.192s' zdefiniowana w kluczu nie istnieje w tabeli"
+        por "Coluna chave '%-.192s' não existe na tabela"
+        rum "Coloana cheie '%-.192s' nu exista in tabela"
+        rus "Ключевой столбец '%-.192s' в таблице не существует"
+        serbian "Ključna kolona '%-.192s' ne postoji u tabeli"
+        slo "Kľúčový stĺpec '%-.192s' v tabuľke neexistuje"
+        spa "La columna de clave '%-.192s' no existe en la tabla"
+        swe "Nyckelkolumn '%-.192s' finns inte"
+        ukr "Ключовий стовбець '%-.192s' не існує у таблиці"
+ER_BLOB_USED_AS_KEY 42000 S1009
+        chi "索引里不能含有BLOB列%`s表%s"
+        eng "BLOB column %`s can't be used in key specification in the %s table"
+        ger "BLOB-Feld %`s kann beim %s Tabellen nicht als Schlüssel verwendet werden"
+        geo "BLOB-ის ტიპის სვეტი %`s %s ცხრილში გასაღების მნიშვნელობა ვერ იქნება"
+        hindi "BLOB कॉलम %`s टेबल %s में KEY विनिर्देश में इस्तेमाल नहीं किया जा सकता"
+        rus "Столбец типа BLOB %`s не может быть использован как значение ключа в %s таблице"
+        spa "La columna BLOB %`s no se puede usar en la especificación de clave en la tabla %s"
+        ukr "BLOB стовбець %`s не може бути використаний у визначенні ключа в %s таблиці"
+ER_TOO_BIG_FIELDLENGTH 42000 S1009
+        chi "数据太长超过列容量 '%-.192s' (最长 = %lu); 用 BLOB 或 TEXT 替代"
+        cze "Příliš velká délka sloupce '%-.192s' (nejvíce %lu). Použijte BLOB"
+        dan "For stor feltlængde for kolonne '%-.192s' (maks = %lu). Brug BLOB i stedet"
+        eng "Column length too big for column '%-.192s' (max = %lu); use BLOB or TEXT instead"
+        est "Tulba '%-.192s' pikkus on liiga pikk (maksimaalne pikkus: %lu). Kasuta BLOB väljatüüpi"
+        fre "Champ '%-.192s' trop long (max = %lu). Utilisez un BLOB"
+        ger "Feldlänge für Feld '%-.192s' zu groß (maximal %lu). BLOB- oder TEXT-Spaltentyp verwenden!"
+        geo "სვეტის სიგრძე სვეტისთვის '%-.192s' მეტისმეტად დიდია (მაქს = %lu). სცადეთ, ამის მაგიერ BLOB ან TEXT გამოიყენოთ"
+        greek "Πολύ μεγάλο μήκος για το πεδίο '%-.192s' (max = %lu). Παρακαλώ χρησιμοποιείστε τον τύπο BLOB"
+        hindi "कॉलम की लंबाई कॉलम '%-.192s' के लिए बड़ी है (अधिकतम = %lu); BLOB या TEXT का उपयोग करें"
+        hun "A(z) '%-.192s' oszlop tul hosszu. (maximum = %lu). Hasznaljon BLOB tipust inkabb"
+        ita "La colonna '%-.192s' e` troppo grande (max=%lu). Utilizza un BLOB"
+        jpn "列 '%-.192s' のサイズ定義が大きすぎます (最大 %lu まで)。代わりに BLOB または TEXT を使用してください。"
+        kor "칼럼 '%-.192s'의 칼럼 길이가 너무 깁니다 (최대 = %lu). 대신에 BLOB를 사용하세요."
+        nla "Te grote kolomlengte voor '%-.192s' (max = %lu). Maak hiervoor gebruik van het type BLOB"
+        nor "For stor nøkkellengde for kolonne '%-.192s' (maks = %lu). Bruk BLOB istedenfor"
+        norwegian-ny "For stor nykkellengde for felt '%-.192s' (maks = %lu). Bruk BLOB istadenfor"
+        pol "Zbyt duża długo?ć kolumny '%-.192s' (maks. = %lu). W zamian użyj typu BLOB"
+        por "Comprimento da coluna '%-.192s' grande demais (max = %lu); use BLOB em seu lugar"
+        rum "Lungimea coloanei '%-.192s' este prea lunga (maximum = %lu). Foloseste BLOB mai bine"
+        rus "Слишком большая длина столбца '%-.192s' (максимум = %lu). Используйте тип BLOB или TEXT вместо текущего"
+        serbian "Previše podataka za kolonu '%-.192s' (maksimum je %lu). Upotrebite BLOB polje"
+        slo "Príliš veľká dĺžka pre pole '%-.192s' (maximum = %lu). Použite BLOB"
+        spa "Longitud de columna demasiado grande para la columna '%-.192s' (máximo = %lu). Use BLOB o TEXT en su lugar"
+        swe "För stor kolumnlängd angiven för '%-.192s' (max= %lu). Använd en BLOB instället"
+        ukr "Задовга довжина стовбця '%-.192s' (max = %lu). Використайте тип BLOB"
+ER_WRONG_AUTO_KEY 42000 S1009
+        chi "表定义不正确;只能有一个自动列,并且必须将其定义为索引"
+        cze "Můžete mít pouze jedno AUTO pole a to musí být definováno jako klíč"
+        dan "Der kan kun specificeres eet AUTO_INCREMENT-felt, og det skal være indekseret"
+        eng "Incorrect table definition; there can be only one auto column and it must be defined as a key"
+        est "Vigane tabelikirjeldus; Tabelis tohib olla üks auto_increment tüüpi tulp ning see peab olema defineeritud võtmena"
+        fre "Un seul champ automatique est permis et il doit être indexé"
+        ger "Falsche Tabellendefinition. Es darf nur eine AUTO_INCREMENT-Spalte geben, und diese muss als Schlüssel definiert werden"
+        geo "ცხრილის არასწორი აღწერა. შეიძლება არსებობდეს მხოლოდ ერთი თვითზრდადი სვეტი და ის როგორც გასაღები, ისე უნდა აღწეროთ"
+        greek "Μπορεί να υπάρχει μόνο ένα auto field και πρέπει να έχει ορισθεί σαν key"
+        hindi "गलत टेबल परिभाषा; टेबल में केवल एक AUTO_INCREMENT कॉलम हो सकता है और इसे एक KEY के रूप में परिभाषित किया जाना चाहिए"
+        hun "Csak egy auto mezo lehetseges, es azt kulcskent kell definialni"
+        ita "Puo` esserci solo un campo AUTO e deve essere definito come chiave"
+        jpn "不正な表定義です。AUTO_INCREMENT列は1個までで、索引を定義する必要があります。"
+        kor "부정확한 테이블 정의; 테이블은 하나의 auto 칼럼이 존재하고 키로 정의되어져야 합니다."
+        nla "Er kan slechts 1 autofield zijn en deze moet als zoeksleutel worden gedefinieerd"
+        nor "Bare ett auto felt kan være definert som nøkkel"
+        norwegian-ny "Bare eitt auto felt kan være definert som nøkkel"
+        pol "W tabeli może być tylko jedno pole auto i musi ono być zdefiniowane jako klucz"
+        por "Definição incorreta de tabela. Somente é permitido um único campo auto-incrementado e ele tem que ser definido como chave"
+        rum "Definitia tabelei este incorecta; Nu pot fi mai mult de o singura coloana de tip auto si aceasta trebuie definita ca cheie"
+        rus "Некорректное определение таблицы: может существовать только один автоинкрементный столбец, и он должен быть определен как ключ"
+        serbian "Pogrešna definicija tabele; U tabeli može postojati samo jedna 'AUTO' kolona i ona mora biti istovremeno definisana kao kolona ključa"
+        slo "Môžete mať iba jedno AUTO pole a to musí byť definované ako kľúč"
+        spa "Definición incorrecta de tabla; sólo puede haber una columna automática y ésta debe de ser definida como una clave"
+        swe "Det får finnas endast ett AUTO_INCREMENT-fält och detta måste vara en nyckel"
+        ukr "Хибне визначення таблиці; Може бути лише один автоматичний стовбець, що повинен бути визначений як ключ"
+ER_BINLOG_CANT_DELETE_GTID_DOMAIN
+        chi "无法删除gtid域. 原因: %s."
+        eng "Could not delete gtid domain. Reason: %s."
+        geo "შეცდომა gtid დომენის წაშლისას. მიზეზი: %s."
+        spa "No pude borrar el dominio gtid. Motivo: %s."
+        ukr "Не можу видалити домен gtid. Причина: %s."
+ER_NORMAL_SHUTDOWN  
+        chi "%s(%s):正常关闭"
+        cze "%s (%s): normální ukončení"
+        dan "%s (%s): Normal nedlukning"
+        eng "%s (initiated by: %s): Normal shutdown"
+        est "%s (%s): MariaDB lõpetas"
+        fre "%s (%s): Arrêt normal du serveur"
+        ger "%s (%s): Normal heruntergefahren"
+        geo "%s (%s): მუშაობის ნორმალური დასრულება"
+        greek "%s (%s): Φυσιολογική διαδικασία shutdown"
+        hindi "%s (%s): सामान्य शटडाउन"
+        hun "%s (%s): Normal leallitas"
+        ita "%s (%s): Shutdown normale"
+        jpn "%s (%s): 通常シャットダウン"
+        kor "%s (%s): 정상적인 shutdown"
+        nla "%s (%s): Normaal afgesloten "
+        nor "%s (%s): Normal avslutning"
+        norwegian-ny "%s (%s): Normal nedkopling"
+        pol "%s (%s): Standardowe zakończenie działania"
+        por "%s (%s): 'Shutdown' normal"
+        rum "%s (%s): Terminare normala"
+        rus "%s (инициирована пользователем: %s): Корректная остановка"
+        serbian "%s (%s): Normalno gašenje"
+        slo "%s (%s): normálne ukončenie"
+        spa "%s (iniciado por: %s): Apagado normal"
+        swe "%s (%s): Normal avslutning"
+        ukr "%s (%s): Нормальне завершення"
+ER_GOT_SIGNAL  
+        chi "%s: 收到信号 %d. 强行中止!\n"
+        cze "%s: přijat signal %d, končím\n"
+        dan "%s: Fangede signal %d. Afslutter!!\n"
+        eng "%s: Got signal %d. Aborting!\n"
+        est "%s: sain signaali %d. Lõpetan!\n"
+        fre "%s: Reçu le signal %d. Abandonne!\n"
+        ger "%s: Signal %d erhalten. Abbruch!\n"
+        geo "%s: მიღებულია სიგნალი %d. გაუქმება! \n"
+        greek "%s: Ελήφθη το μήνυμα %d. Η διαδικασία εγκαταλείπεται!\n"
+        hindi "%s: सिग्नल %d मिलने के कारण सिस्टम बंद किया जा रहा है!\n"
+        hun "%s: %d jelzes. Megszakitva!\n"
+        ita "%s: Ricevuto segnale %d. Interruzione!\n"
+        jpn "%s: シグナル %d を受信しました。強制終了します!\n"
+        kor "%s: %d 신호가 들어왔음. 중지!\n"
+        nla "%s: Signaal %d. Systeem breekt af!\n"
+        nor "%s: Oppdaget signal %d. Avslutter!\n"
+        norwegian-ny "%s: Oppdaga signal %d. Avsluttar!\n"
+        pol "%s: Otrzymano sygnał %d. Kończenie działania!\n"
+        por "%s: Obteve sinal %d. Abortando!\n"
+        rum "%s: Semnal %d obtinut. Aborting!\n"
+        rus "%s: Получен сигнал %d. Прекращаем!\n"
+        serbian "%s: Dobio signal %d. Prekidam!\n"
+        slo "%s: prijatý signál %d, ukončenie (Abort)!\n"
+        spa "%s: Obtenida señal %d. ¡Abortando!\n"
+        swe "%s: Fick signal %d. Avslutar!\n"
+        ukr "%s: Отримано сигнал %d. Перериваюсь!\n"
+ER_SHUTDOWN_COMPLETE  
+        chi "%s:关闭完成\n"
+        cze "%s: ukončení práce hotovo\n"
+        dan "%s: Server lukket\n"
+        eng "%s: Shutdown complete\n"
+        est "%s: Lõpp\n"
+        fre "%s: Arrêt du serveur terminé\n"
+        ger "%s: Herunterfahren beendet\n"
+        geo "%s: მუშაობა წარმატებიტ დასრულდა\n"
+        greek "%s: Η διαδικασία Shutdown ολοκληρώθηκε\n"
+        hindi "%s: शटडाउन पूर्ण\n"
+        hun "%s: A leallitas kesz\n"
+        ita "%s: Shutdown completato\n"
+        jpn "%s: シャットダウン完了\n"
+        kor "%s: Shutdown 이 완료됨!\n"
+        nla "%s: Afsluiten afgerond\n"
+        nor "%s: Avslutning komplett\n"
+        norwegian-ny "%s: Nedkopling komplett\n"
+        pol "%s: Zakończenie działania wykonane\n"
+        por "%s: 'Shutdown' completo\n"
+        rum "%s: Terminare completa\n"
+        rus "%s: Остановка завершена\n"
+        serbian "%s: Gašenje završeno\n"
+        slo "%s: práca ukončená\n"
+        spa "%s: Apagado completado\n"
+        swe "%s: Avslutning klar\n"
+        ukr "%s: Роботу завершено\n"
+ER_FORCING_CLOSE 08S01 
+        chi "%s: 强行关闭线程 %ld  用户: '%-.48s'\n"
+        cze "%s: násilné uzavření threadu %ld uživatele '%-.48s'\n"
+        dan "%s: Forceret nedlukning af tråd: %ld  bruger: '%-.48s'\n"
+        eng "%s: Forcing close of thread %ld  user: '%-.48s'\n"
+        est "%s: Sulgen jõuga lõime %ld  kasutaja: '%-.48s'\n"
+        fre "%s: Arrêt forcé de la tâche (thread) %ld  utilisateur: '%-.48s'\n"
+        ger "%s: Thread %ld zwangsweise beendet. Benutzer: '%-.48s'\n"
+        geo "%s: ნაძალადევი დახურვა: ნაკადი: %ld, მომხმარებელი: '%-.48s'\n"
+        greek "%s: Το thread θα κλείσει %ld  user: '%-.48s'\n"
+        hindi "%s: %ld थ्रेड बंद किया जा रहा है (यूज़र: '%-.48s')\n" 
+        hun "%s: A(z) %ld thread kenyszeritett zarasa. Felhasznalo: '%-.48s'\n"
+        ita "%s: Forzata la chiusura del thread %ld utente: '%-.48s'\n"
+        jpn "%s: スレッド %ld を強制終了します (ユーザー: '%-.48s')\n"
+        kor "%s: thread %ld의 강제 종료 user: '%-.48s'\n"
+        nla "%s: Afsluiten afgedwongen van thread %ld  gebruiker: '%-.48s'\n"
+        nor "%s: Påtvinget avslutning av tråd %ld  bruker: '%-.48s'\n"
+        norwegian-ny "%s: Påtvinga avslutning av tråd %ld  brukar: '%-.48s'\n"
+        pol "%s: Wymuszenie zamknięcia w?tku %ld  użytkownik: '%-.48s'\n"
+        por "%s: Forçando finalização da 'thread' %ld - usuário '%-.48s'\n"
+        rum "%s: Terminare fortata a thread-ului %ld  utilizatorului: '%-.48s'\n"
+        rus "%s: Принудительно закрываем поток %ld  пользователя: '%-.48s'\n"
+        serbian "%s: Usiljeno gašenje thread-a %ld koji pripada korisniku: '%-.48s'\n"
+        slo "%s: násilné ukončenie vlákna %ld užívateľa '%-.48s'\n"
+        spa "%s: Forzando a cerrar el hilo (thread) %ld usuario: '%-.48s'\n"
+        swe "%s: Stänger av tråd %ld; användare: '%-.48s'\n"
+        ukr "%s: Прискорюю закриття гілки %ld користувача: '%-.48s'\n"
+ER_IPSOCK_ERROR 08S01 
+        chi "无法创建IP插口"
+        cze "Nemohu vytvořit IP socket"
+        dan "Kan ikke oprette IP socket"
+        eng "Can't create IP socket"
+        est "Ei suuda luua IP socketit"
+        fre "Ne peut créer la connexion IP (socket)"
+        ger "Kann IP-Socket nicht erzeugen"
+        geo "IP სოკეტის შექმნა შეუძლებელია"
+        greek "Δεν είναι δυνατή η δημιουργία IP socket"
+        hindi "IP SOCKET नहीं बना सकते"
+        hun "Az IP socket nem hozhato letre"
+        ita "Impossibile creare il socket IP"
+        jpn "IPソケットを作成できません。"
+        kor "IP 소켓을 만들지 못했습니다."
+        nla "Kan IP-socket niet openen"
+        nor "Kan ikke opprette IP socket"
+        norwegian-ny "Kan ikkje opprette IP socket"
+        pol "Nie można stworzyć socket'u IP"
+        por "Não pode criar o soquete IP"
+        rum "Nu pot crea IP socket"
+        rus "Невозможно создать IP-сокет"
+        serbian "Ne mogu da kreiram IP socket"
+        slo "Nemôžem vytvoriť IP socket"
+        spa "No puedo crear IP socket"
+        swe "Kan inte skapa IP-socket"
+        ukr "Не можу створити IP роз'єм"
+ER_NO_SUCH_INDEX 42S12 S1009
+        chi "表 '%-.192s' 没有像 CREATE INDEX 中使用的索引;重新创建表"
+        cze "Tabulka '%-.192s' nemá index odpovídající CREATE INDEX. Vytvořte tabulku znovu"
+        dan "Tabellen '%-.192s' har ikke den nøgle, som blev brugt i CREATE INDEX. Genopret tabellen"
+        eng "Table '%-.192s' has no index like the one used in CREATE INDEX; recreate the table"
+        est "Tabelil '%-.192s' puuduvad võtmed. Loo tabel uuesti"
+        fre "La table '%-.192s' n'a pas d'index comme celle utilisée dans CREATE INDEX. Recréez la table"
+        ger "Tabelle '%-.192s' besitzt keinen wie den in CREATE INDEX verwendeten Index. Tabelle neu anlegen"
+        geo "ცხრილს '%-.192s' ინდექსი, როგორიც CREATE INDEX-ში იყო გამოყენებული, არ გააჩნია. ცხრილი თავიდან შექმენით"
+        greek "Ο πίνακας '%-.192s' δεν έχει ευρετήριο (index) σαν αυτό που χρησιμοποιείτε στην CREATE INDEX. Παρακαλώ, ξαναδημιουργήστε τον πίνακα"
+        hindi "CREATE INDEX में इस्तेमाल की गयी सूचि टेबल '%-.192s' में उपलब्ध नहीं है; टेबल को पुनः बनायें"
+        hun "A(z) '%-.192s' tablahoz nincs meg a CREATE INDEX altal hasznalt index. Alakitsa at a tablat"
+        ita "La tabella '%-.192s' non ha nessun indice come quello specificatato dalla CREATE INDEX. Ricrea la tabella"
+        jpn "表 '%-.192s' に以前CREATE INDEXで作成された索引がありません。表を作り直してください。"
+        kor "테이블 '%-.192s'는 인덱스를 만들지 않았습니다. alter 테이블명령을 이용하여 테이블을 수정하세요..."
+        nla "Tabel '%-.192s' heeft geen INDEX zoals deze gemaakt worden met CREATE INDEX. Maak de tabel opnieuw"
+        nor "Tabellen '%-.192s' har ingen index som den som er brukt i CREATE INDEX. Gjenopprett tabellen"
+        norwegian-ny "Tabellen '%-.192s' har ingen index som den som er brukt i CREATE INDEX. Oprett tabellen på nytt"
+        pol "Tabela '%-.192s' nie ma indeksu takiego jak w CREATE INDEX. Stwórz tabelę"
+        por "Tabela '%-.192s' não possui um índice como o usado em CREATE INDEX. Recrie a tabela"
+        rum "Tabela '%-.192s' nu are un index ca acela folosit in CREATE INDEX. Re-creeaza tabela"
+        rus "В таблице '%-.192s' нет такого индекса, как в CREATE INDEX. Создайте таблицу заново"
+        serbian "Tabela '%-.192s' nema isti indeks kao onaj upotrebljen pri komandi 'CREATE INDEX'. Napravite tabelu ponovo"
+        slo "Tabuľka '%-.192s' nemá index zodpovedajúci CREATE INDEX. Vytvorte tabulku znova"
+        spa "La tabla '%-.192s' no tiene un índice como el usado en CREATE INDEX. Crea de nuevo la tabla"
+        swe "Tabellen '%-.192s' har inget index som motsvarar det angivna i CREATE INDEX. Skapa om tabellen"
+        ukr "Таблиця '%-.192s' має індекс, що не співпадає з вказанним у CREATE INDEX. Створіть таблицю знову"
+ER_WRONG_FIELD_TERMINATORS 42000 S1009
+        chi "字段分隔符参数不合预期;查看文档"
+        cze "Argument separátoru položek nebyl očekáván. Přečtěte si manuál"
+        dan "Felt adskiller er ikke som forventet, se dokumentationen"
+        eng "Field separator argument is not what is expected; check the manual"
+        est "Väljade eraldaja erineb oodatust. Tutvu kasutajajuhendiga"
+        fre "Séparateur de champs inconnu.  Vérifiez dans le manuel"
+        ger "Feldbegrenzer-Argument ist nicht in der erwarteten Form. Bitte im Handbuch nachlesen"
+        geo "ველის გამყოფის არგუმენტი მოსალოდნელს არ ემთხვევა. გადაამოწმეთ დოკუმენტაციაში"
+        greek "Ο διαχωριστής πεδίων δεν είναι αυτός που αναμενόταν. Παρακαλώ ανατρέξτε στο manual"
+        hindi "फील्ड विभाजक आर्गुमेंट गलत है; मैनुअल की जाँच करें"
+        hun "A mezoelvalaszto argumentumok nem egyeznek meg a varttal. Nezze meg a kezikonyvben!"
+        ita "L'argomento 'Field separator' non e` quello atteso. Controlla il manuale"
+        jpn "フィールド区切り文字が予期せぬ使われ方をしています。マニュアルを確認して下さい。"
+        kor "필드 구분자 인수들이 완전하지 않습니다. 메뉴얼을 찾아 보세요."
+        nla "De argumenten om velden te scheiden zijn anders dan verwacht. Raadpleeg de handleiding"
+        nor "Felt skiller argumentene er ikke som forventet, se dokumentasjonen"
+        norwegian-ny "Felt skiljer argumenta er ikkje som venta, sjå dokumentasjonen"
+        pol "Nie oczekiwano separatora. SprawdĽ podręcznik"
+        por "Argumento separador de campos não é o esperado. Cheque o manual"
+        rum "Argumentul pentru separatorul de cimpuri este diferit de ce ma asteptam. Verifica manualul"
+        rus "Аргумент разделителя полей - не тот, который ожидался. Обращайтесь к документации"
+        serbian "Argument separatora polja nije ono što se očekivalo. Proverite uputstvo MariaDB server-a"
+        slo "Argument oddeľovač polí nezodpovedá požiadavkám. Skontrolujte v manuáli"
+        spa "Los separadores de argumentos del campo no son los especificados. Compruebe el manual"
+        swe "Fältseparatorerna är vad som förväntades. Kontrollera mot manualen"
+        ukr "Хибний розділювач полів. Почитайте документацію"
+ER_BLOBS_AND_NO_TERMINATED 42000 S1009
+        chi "您不能对 BLOB 使用固定的行长度;请使用 'fields terminated by'"
+        cze "Není možné použít pevný rowlength s BLOBem. Použijte 'fields terminated by'"
+        dan "Man kan ikke bruge faste feltlængder med BLOB. Brug i stedet 'fields terminated by'"
+        eng "You can't use fixed rowlength with BLOBs; please use 'fields terminated by'"
+        est "BLOB-tüüpi väljade olemasolul ei saa kasutada fikseeritud väljapikkust. Vajalik 'fields terminated by' määrang"
+        fre "Vous ne pouvez utiliser des lignes de longueur fixe avec des BLOBs. Utiliser 'fields terminated by'"
+        ger "Eine feste Zeilenlänge kann für BLOB-Felder nicht verwendet werden. Bitte 'fields terminated by' verwenden"
+        geo "ფიქსირებულზომიანი მწკრივის სიგრძეს BLOB-თან ერთად ვერ გამოიყენებთ. გამოიყენეთ 'fieds terminated by'"
+        greek "Δεν μπορείτε να χρησιμοποιήσετε fixed rowlength σε BLOBs. Παρακαλώ χρησιμοποιείστε 'fields terminated by'"
+        hindi "BLOBs को निश्चित लंबाई की पंक्ति के साथ प्रयोग नहीं किया जा सकता है; 'FIELDS TERMINATED BY' का इस्तेमाल करें"
+        hun "Fix hosszusagu BLOB-ok nem hasznalhatok. Hasznalja a 'mezoelvalaszto jelet' "
+        ita "Non possono essere usate righe a lunghezza fissa con i BLOB. Usa 'FIELDS TERMINATED BY'"
+        jpn "BLOBには固定長レコードが使用できません。'FIELDS TERMINATED BY'句を使用して下さい。"
+        kor "BLOB로는 고정길이의 lowlength를 사용할 수 없습니다. 'fields terminated by'를 사용하세요."
+        nla "Bij het gebruik van BLOBs is het niet mogelijk om vaste rijlengte te gebruiken. Maak s.v.p. gebruik van 'fields terminated by'"
+        nor "En kan ikke bruke faste feltlengder med BLOB. Vennlisgt bruk 'fields terminated by'"
+        norwegian-ny "Ein kan ikkje bruke faste feltlengder med BLOB. Vennlisgt bruk 'fields terminated by'"
+        pol "Nie można użyć stałej długo?ci wiersza z polami typu BLOB. Użyj 'fields terminated by'"
+        por "Você não pode usar comprimento de linha fixo com BLOBs. Por favor, use campos com comprimento limitado"
+        rum "Nu poti folosi lungime de cimp fix pentru BLOB-uri. Foloseste 'fields terminated by'"
+        rus "Фиксированный размер записи с полями типа BLOB использовать нельзя, применяйте 'fields terminated by'"
+        serbian "Ne možete koristiti fiksnu veličinu sloga kada imate BLOB polja. Molim koristite 'fields terminated by' opciju"
+        slo "Nie je možné použiť fixnú dĺžku s BLOBom. Použite 'fields terminated by'"
+        spa "No se pueden usar longitudes fijas de filas con BLOBs. Por favor, use 'campos terminados por'"
+        swe "Man kan inte använda fast radlängd med blobs. Använd 'fields terminated by'"
+        ukr "Не можна використовувати сталу довжину строки з BLOB. Зкористайтеся 'fields terminated by'"
+ER_TEXTFILE_NOT_READABLE  
+        chi "文件'%-.128s'必须位于数据库目录中,或所有人都可以读取"
+        cze "Soubor '%-.128s' musí být v adresáři databáze nebo čitelný pro všechny"
+        dan "Filen '%-.128s' skal være i database-folderen, eller kunne læses af alle"
+        eng "The file '%-.128s' must be in the database directory or be readable by all"
+        est "Fail '%-.128s' peab asuma andmebaasi kataloogis või olema kõigile loetav"
+        fre "Le fichier '%-.128s' doit être dans le répertoire de la base et lisible par tous"
+        ger "Datei '%-.128s' muss im Datenbank-Verzeichnis vorhanden oder lesbar für alle sein"
+        geo "ფაილი '%-.128s' მონაცემთა ბაზის საქაღალდეში უნდა მდებარეობდეს, ან ყველას მიერ უნდა იყოს წაკითხვადი"
+        greek "Το αρχείο '%-.128s' πρέπει να υπάρχει στο database directory ή να μπορεί να διαβαστεί από όλους"
+        hindi "फ़ाइल '%-.128s' डेटाबेस डायरेक्टरी में या सभी के द्वारा पठनीय होना चाहिए"
+        hun "A(z) '%-.128s'-nak az adatbazis konyvtarban kell lennie, vagy mindenki szamara olvashatonak"
+        ita "Il file '%-.128s' deve essere nella directory del database e deve essere leggibile da tutti"
+        jpn "ファイル '%-.128s' はデータベースディレクトリにあるか、全てのユーザーから読める必要があります。"
+        kor "'%-.128s' 화일는 데이타베이스 디렉토리에 존재하거나 모두에게 읽기 가능하여야 합니다."
+        nla "Het bestand '%-.128s' dient in de database directory voor the komen of leesbaar voor iedereen te zijn"
+        nor "Filen '%-.128s' må være i database-katalogen for å være lesbar for alle"
+        norwegian-ny "Filen '%-.128s' må være i database-katalogen for å være lesbar for alle"
+        pol "Plik '%-.128s' musi znajdować sie w katalogu bazy danych lub mieć prawa czytania przez wszystkich"
+        por "Arquivo '%-.128s' tem que estar no diretório do banco de dados ou ter leitura possível para todos"
+        rum "Fisierul '%-.128s' trebuie sa fie in directorul bazei de data sau trebuie sa poata sa fie citit de catre toata lumea (verifica permisiile)"
+        rus "Файл '%-.128s' должен находиться в том же каталоге, что и база данных, или быть общедоступным для чтения"
+        serbian "File '%-.128s' mora biti u direktorijumu gde su file-ovi baze i mora imati odgovarajuća prava pristupa"
+        slo "Súbor '%-.128s' musí byť v adresári databázy, alebo čitateľný pre všetkých"
+        spa "El fichero/archivo '%-.128s' debe de estar en el directorio de la base de datos o debe de ser legible por todos"
+        swe "Textfilen '%-.128s' måste finnas i databasbiblioteket eller vara läsbar för alla"
+        ukr "Файл '%-.128s' повинен бути у теці бази данних або мати встановлене право на читання для усіх"
+ER_FILE_EXISTS_ERROR  
+        chi "文件'%-.200s'已经存在"
+        cze "Soubor '%-.200s' již existuje"
+        dan "Filen '%-.200s' eksisterer allerede"
+        eng "File '%-.200s' already exists"
+        est "Fail '%-.200s' juba eksisteerib"
+        fre "Le fichier '%-.200s' existe déjà"
+        ger "Datei '%-.200s' bereits vorhanden"
+        geo "ფაილი '%-.200s' უკვე არსებობს"
+        greek "Το αρχείο '%-.200s' υπάρχει ήδη"
+        hindi "फ़ाइल '%-.200s' पहले से मौजूद है"
+        hun "A '%-.200s' file mar letezik"
+        ita "Il file '%-.200s' esiste gia`"
+        jpn "ファイル '%-.200s' はすでに存在します。"
+        kor "'%-.200s' 화일은 이미 존재합니다."
+        nla "Het bestand '%-.200s' bestaat reeds"
+        nor "Filen '%-.200s' eksisterte allerede"
+        norwegian-ny "Filen '%-.200s' eksisterte allereide"
+        pol "Plik '%-.200s' już istnieje"
+        por "Arquivo '%-.200s' já existe"
+        rum "Fisierul '%-.200s' exista deja"
+        rus "Файл '%-.200s' уже существует"
+        serbian "File '%-.200s' već postoji"
+        slo "Súbor '%-.200s' už existuje"
+        spa "El fichero/archivo '%-.200s' ya existe"
+        swe "Filen '%-.200s' existerar redan"
+        ukr "Файл '%-.200s' вже існує"
+ER_LOAD_INFO  
+        chi "记录:%ld删除:%ld跳过:%ld警告:%ld"
+        cze "Záznamů: %ld  Vymazáno: %ld  Přeskočeno: %ld  Varování: %ld"
+        dan "Poster: %ld  Fjernet: %ld  Sprunget over: %ld  Advarsler: %ld"
+        eng "Records: %ld  Deleted: %ld  Skipped: %ld  Warnings: %ld"
+        est "Kirjeid: %ld  Kustutatud: %ld  Vahele jäetud: %ld  Hoiatusi: %ld"
+        fre "Enregistrements: %ld  Effacés: %ld  Non traités: %ld  Avertissements: %ld"
+        ger "Datensätze: %ld  Gelöscht: %ld  Ausgelassen: %ld  Warnungen: %ld"
+        geo "ჩანაწერები: %ld წაშლილია: %ld გამოტოვებულია: %ld გაფრთხილებები: %ld"
+        greek "Εγγραφές: %ld  Διαγραφές: %ld  Παρεκάμφθησαν: %ld  Προειδοποιήσεις: %ld"
+        hindi "रिकॉर्ड: %ld हटाए गए: %ld छोड़ दिए गए: %ld चेतावनी: %ld"
+        hun "Rekordok: %ld  Torolve: %ld  Skipped: %ld  Warnings: %ld"
+        ita "Records: %ld  Cancellati: %ld  Saltati: %ld  Avvertimenti: %ld"
+        jpn "レコード数: %ld  削除: %ld  スキップ: %ld  警告: %ld"
+        kor "레코드: %ld개  삭제: %ld개  스킵: %ld개  경고: %ld개"
+        nla "Records: %ld  Verwijderd: %ld  Overgeslagen: %ld  Waarschuwingen: %ld"
+        nor "Poster: %ld  Fjernet: %ld  Hoppet over: %ld  Advarsler: %ld"
+        norwegian-ny "Poster: %ld  Fjerna: %ld  Hoppa over: %ld  Åtvaringar: %ld"
+        pol "Recordów: %ld  Usuniętych: %ld  Pominiętych: %ld  Ostrzeżeń: %ld"
+        por "Registros: %ld - Deletados: %ld - Ignorados: %ld - Avisos: %ld"
+        rum "Recorduri: %ld  Sterse: %ld  Sarite (skipped): %ld  Atentionari (warnings): %ld"
+        rus "Записей: %ld  Удалено: %ld  Пропущено: %ld  Предупреждений: %ld"
+        serbian "Slogova: %ld  Izbrisano: %ld  Preskočeno: %ld  Upozorenja: %ld"
+        slo "Záznamov: %ld  Zmazaných: %ld  Preskočených: %ld  Varovania: %ld"
+        spa "Registros: %ld Borrados: %ld Saltados: %ld Avisos: %ld"
+        swe "Rader: %ld  Bortagna: %ld  Dubletter: %ld  Varningar: %ld"
+        ukr "Записів: %ld  Видалено: %ld  Пропущено: %ld  Застережень: %ld"
+ER_ALTER_INFO  
+        chi "记录: %ld  重复: %ld"
+        cze "Záznamů: %ld  Zdvojených: %ld"
+        dan "Poster: %ld  Ens: %ld"
+        eng "Records: %ld  Duplicates: %ld"
+        est "Kirjeid: %ld  Kattuvaid: %ld"
+        fre "Enregistrements: %ld  Doublons: %ld"
+        ger "Datensätze: %ld  Duplikate: %ld"
+        geo "ჩანაწერები: %ld დუბლიკატები: %ld"
+        greek "Εγγραφές: %ld  Επαναλήψεις: %ld"
+        hindi "रिकॉर्ड: %ld डुप्लिकेट: %ld"
+        hun "Rekordok: %ld  Duplikalva: %ld"
+        ita "Records: %ld  Duplicati: %ld"
+        jpn "レコード数: %ld  重複: %ld"
+        kor "레코드: %ld개  중복: %ld개"
+        nla "Records: %ld  Dubbel: %ld"
+        nor "Poster: %ld  Like: %ld"
+        norwegian-ny "Poster: %ld  Like: %ld"
+        pol "Rekordów: %ld  Duplikatów: %ld"
+        por "Registros: %ld - Duplicados: %ld"
+        rum "Recorduri: %ld  Duplicate: %ld"
+        rus "Записей: %ld  Дубликатов: %ld"
+        serbian "Slogova: %ld  Duplikata: %ld"
+        slo "Záznamov: %ld  Opakovaných: %ld"
+        spa "Registros: %ld  Duplicados: %ld"
+        swe "Rader: %ld  Dubletter: %ld"
+        ukr "Записів: %ld  Дублікатів: %ld"
+ER_WRONG_SUB_KEY  
+        chi "前缀索引不正确;使用的索引部分不是字符串,使用的长度比索引部分长,或者存储引擎不支持独特前缀索引"
+        cze "Chybná podčást klíče -- není to řetězec nebo je delší než délka části klíče"
+        dan "Forkert indeksdel. Den anvendte nøgledel er ikke en streng eller længden er større end nøglelængden"
+        eng "Incorrect prefix key; the used key part isn't a string, the used length is longer than the key part, or the storage engine doesn't support unique prefix keys"
+        est "Vigane võtme osa. Kasutatud võtmeosa ei ole string tüüpi, määratud pikkus on pikem kui võtmeosa või tabelihandler ei toeta seda tüüpi võtmeid"
+        fre "Mauvaise sous-clef. Ce n'est pas un 'string' ou la longueur dépasse celle définie dans la clef"
+        ger "Falscher Unterteilschlüssel. Der verwendete Schlüsselteil ist entweder kein String, die verwendete Länge ist länger als der Teilschlüssel oder die Speicher-Engine unterstützt keine Unterteilschlüssel"
+        geo "პრეფიქსის არასწორი გასაღები. გასაღების გამოყენებადი ნაწილი სტრიქონს არ წარმოადგენს. მითითებული სიგრძე უფრო დიდია, ვიდრე გასაღების ნაწილის სიგრძე, ან საცავის ძრავას უნიკალური პრეფიქსის გასაღებების მხარდაჭერა არ გააჩნია"
+        greek "Εσφαλμένο sub part key. Το χρησιμοποιούμενο key part δεν είναι string ή το μήκος του είναι μεγαλύτερο"
+        hun "Rossz alkulcs. A hasznalt kulcsresz nem karaktersorozat vagy hosszabb, mint a kulcsresz"
+        ita "Sotto-parte della chiave errata. La parte di chiave utilizzata non e` una stringa o la lunghezza e` maggiore della parte di chiave"
+        jpn "キーのプレフィックスが不正です。キーが文字列ではないか、プレフィックス長がキーよりも長いか、ストレージエンジンが一意索引のプレフィックス指定をサポートしていません。"
+        kor "부정확한 서버 파트 키. 사용된 키 파트가 스트링이 아니거나 키 파트의 길이가 너무 깁니다."
+        nla "Foutief sub-gedeelte van de zoeksleutel. De gebruikte zoeksleutel is geen onderdeel van een string of of de gebruikte lengte is langer dan de zoeksleutel"
+        nor "Feil delnøkkel. Den brukte delnøkkelen er ikke en streng eller den oppgitte lengde er lengre enn nøkkel lengden"
+        norwegian-ny "Feil delnykkel. Den brukte delnykkelen er ikkje ein streng eller den oppgitte lengda er lengre enn nykkellengden"
+        pol "Błędna podczę?ć klucza. Użyta czę?ć klucza nie jest łańcuchem lub użyta długo?ć  jest większa niż czę?ć klucza"
+        por "Sub parte da chave incorreta. A parte da chave usada não é uma 'string' ou o comprimento usado é maior que parte da chave ou o manipulador de tabelas não suporta sub chaves únicas"
+        rum "Componentul cheii este incorrect. Componentul folosit al cheii nu este un sir sau lungimea folosita este mai lunga decit lungimea cheii"
+        rus "Некорректная часть ключа. Используемая часть ключа не является строкой, указанная длина больше, чем длина части ключа, или обработчик таблицы не поддерживает уникальные части ключа"
+        serbian "Pogrešan pod-ključ dela ključa. Upotrebljeni deo ključa nije string, upotrebljena dužina je veća od dela ključa ili handler tabela ne podržava jedinstvene pod-ključeve"
+        slo "Incorrect prefix key; the used key part isn't a string or the used length is longer than the key part"
+        spa "Prefijo incorrecto de clave; la parte utilizada de la clave no es una cadena, la longitud usada es mayor que la parte de la clave o el motor de almacenaje no soporta claves con prefijo único"
+        swe "Felaktig delnyckel. Nyckeldelen är inte en sträng eller den angivna längden är längre än kolumnlängden"
+        ukr "Невірна частина ключа. Використана частина ключа не є строкою, задовга або вказівник таблиці не підтримує унікальних частин ключей"
+ER_CANT_REMOVE_ALL_FIELDS 42000 
+        chi "您不能使用 ALTER TABLE 删除所有列;改用 DROP TABLE"
+        cze "Není možné vymazat všechny položky s ALTER TABLE. Použijte DROP TABLE"
+        dan "Man kan ikke slette alle felter med ALTER TABLE. Brug DROP TABLE i stedet"
+        eng "You can't delete all columns with ALTER TABLE; use DROP TABLE instead"
+        est "ALTER TABLE kasutades ei saa kustutada kõiki tulpasid. Kustuta tabel DROP TABLE abil"
+        fre "Vous ne pouvez effacer tous les champs avec ALTER TABLE. Utilisez DROP TABLE"
+        ger "Mit ALTER TABLE können nicht alle Felder auf einmal gelöscht werden. Dafür DROP TABLE verwenden"
+        geo "ALTER TABLE-ით ყველა სვეტს ვერ წაშლით. გამოიყენეთ DROP TABLE"
+        greek "Δεν είναι δυνατή η διαγραφή όλων των πεδίων με ALTER TABLE. Παρακαλώ χρησιμοποιείστε DROP TABLE"
+        hindi "ALTER TABLE का इस्तेमाल कर सभी कॉलम्स को हटाया नहीं जा सकता; DROP TABLE का इस्तेमाल करें"
+        hun "Az osszes mezo nem torolheto az ALTER TABLE-lel. Hasznalja a DROP TABLE-t helyette"
+        ita "Non si possono cancellare tutti i campi con una ALTER TABLE. Utilizzare DROP TABLE"
+        jpn "ALTER TABLE では全ての列の削除はできません。DROP TABLE を使用してください。"
+        kor "ALTER TABLE 명령으로는 모든 칼럼을 지울 수 없습니다. DROP TABLE 명령을 이용하세요."
+        nla "Het is niet mogelijk alle velden te verwijderen met ALTER TABLE. Gebruik a.u.b. DROP TABLE hiervoor!"
+        nor "En kan ikke slette alle felt med ALTER TABLE. Bruk DROP TABLE isteden"
+        norwegian-ny "Ein kan ikkje slette alle felt med ALTER TABLE. Bruk DROP TABLE istadenfor"
+        pol "Nie można usun?ć wszystkich pól wykorzystuj?c ALTER TABLE. W zamian użyj DROP TABLE"
+        por "Você não pode deletar todas as colunas com ALTER TABLE; use DROP TABLE em seu lugar"
+        rum "Nu poti sterge toate coloanele cu ALTER TABLE. Foloseste DROP TABLE in schimb"
+        rus "Нельзя удалить все столбцы с помощью ALTER TABLE. Используйте  DROP TABLE"
+        serbian "Ne možete da izbrišete sve kolone pomoću komande 'ALTER TABLE'. Upotrebite komandu 'DROP TABLE' ako želite to da uradite"
+        slo "One nemôžem zmazať all fields with ALTER TABLE; use DROP TABLE instead"
+        spa "No se pueden borrar todas las columnas con ALTER TABLE. Use DROP TABLE para hacerlo"
+        swe "Man kan inte radera alla fält med ALTER TABLE. Använd DROP TABLE istället"
+        ukr "Не можливо видалити всі стовбці за допомогою ALTER TABLE. Для цього скористайтеся DROP TABLE"
+ER_CANT_DROP_FIELD_OR_KEY 42000 
+        chi "不能 DROP %s %`-.192s; 检查它是否存在"
+        cze "Nemohu zrušit (DROP %s) %`-.192s. Zkontrolujte, zda neexistují záznamy/klíče"
+        dan "Kan ikke udføre DROP %s %`-.192s. Undersøg om feltet/nøglen eksisterer"
+        eng "Can't DROP %s %`-.192s; check that it exists"
+        est "Ei suuda kustutada (DROP %s) %`-.192s. Kontrolli kas tulp/võti eksisteerib"
+        fre "Ne peut effacer (DROP %s) %`-.192s. Vérifiez s'il existe"
+        ger "DROP %s: Kann %`-.192s nicht löschen. Existiert es?"
+        geo "DROP %s %`-.192s შეუძლებელია. შეამოწმეთ, არსებობს თუ არა ის"
+        greek "Αδύνατη η διαγραφή (DROP %s) %`-.192s. Παρακαλώ ελέγξτε αν το πεδίο/κλειδί υπάρχει"
+        hindi "%s %`-.192s को ड्रॉप नहीं कर सकते हैं; कृपया जाँच करें कि यह मौजूद है"
+        hun "A DROP %s %`-.192s nem lehetseges. Ellenorizze, hogy a mezo/kulcs letezik-e"
+        ita "Impossibile cancellare (DROP %s) %`-.192s. Controllare che il campo chiave esista"
+        nla "DROP %s: Kan %`-.192s niet weggooien. Controleer of het veld of de zoeksleutel daadwerkelijk bestaat"
+        nor "Kan ikke DROP %s %`-.192s. Undersøk om felt/nøkkel eksisterer"
+        norwegian-ny "Kan ikkje DROP %s %`-.192s. Undersøk om felt/nøkkel eksisterar"
+        pol "Nie można wykonać operacji DROP %s %`-.192s. SprawdĽ, czy to pole/klucz istnieje"
+        por "Não se pode fazer DROP %s %`-.192s. Confira se esta coluna/chave existe"
+        rum "Nu pot sa DROP %s %`-.192s. Verifica daca coloana/cheia exista"
+        rus "Невозможно удалить (DROP %s) %`-.192s. Убедитесь что он действительно существует"
+        serbian "Ne mogu da izvršim komandu drop 'DROP %s' na %`-.192s. Proverite da li ta kolona (odnosno ključ) postoji"
+        slo "Nemôžem zrušiť (DROP %s) %`-.192s. Skontrolujte, či neexistujú záznamy/kľúče"
+        spa "No puedo eliminar %s %`-.192s; compruebe que ya existe"
+        swe "Kan inte ta bort (DROP %s) %`-.192s. Kontrollera att begränsningen/fältet/nyckel finns"
+        ukr "Не можу DROP %s %`-.192s. Перевірте, чи він існує"
+ER_INSERT_INFO  
+        chi "记录: %ld  重复: %ld  警告: %ld"
+        cze "Záznamů: %ld  Zdvojených: %ld  Varování: %ld"
+        dan "Poster: %ld  Ens: %ld  Advarsler: %ld"
+        eng "Records: %ld  Duplicates: %ld  Warnings: %ld"
+        est "Kirjeid: %ld  Kattuvaid: %ld  Hoiatusi: %ld"
+        fre "Enregistrements: %ld  Doublons: %ld  Avertissements: %ld"
+        ger "Datensätze: %ld  Duplikate: %ld  Warnungen: %ld"
+        geo "ჩანაწერები: %ld დუბლიკატები: %ld გაფრთხილებები: %ld"
+        greek "Εγγραφές: %ld  Επαναλήψεις: %ld  Προειδοποιήσεις: %ld"
+        hindi "रिकॉर्ड: %ld डुप्लिकेट: %ld चेतावनी: %ld"
+        hun "Rekordok: %ld  Duplikalva: %ld  Warnings: %ld"
+        ita "Records: %ld  Duplicati: %ld  Avvertimenti: %ld"
+        jpn "レコード数: %ld  重複数: %ld  警告: %ld"
+        kor "레코드: %ld개  중복: %ld개  경고: %ld개"
+        nla "Records: %ld  Dubbel: %ld  Waarschuwing: %ld"
+        nor "Poster: %ld  Like: %ld  Advarsler: %ld"
+        norwegian-ny "Postar: %ld  Like: %ld  Åtvaringar: %ld"
+        pol "Rekordów: %ld  Duplikatów: %ld  Ostrzeżeń: %ld"
+        por "Registros: %ld - Duplicados: %ld - Avisos: %ld"
+        rum "Recorduri: %ld  Duplicate: %ld  Atentionari (warnings): %ld"
+        rus "Записей: %ld  Дубликатов: %ld  Предупреждений: %ld"
+        serbian "Slogova: %ld  Duplikata: %ld  Upozorenja: %ld"
+        slo "Záznamov: %ld  Opakovaných: %ld  Varovania: %ld"
+        spa "Registros: %ld  Duplicados: %ld  Avisos: %ld"
+        swe "Rader: %ld  Dubletter: %ld  Varningar: %ld"
+        ukr "Записів: %ld  Дублікатів: %ld  Застережень: %ld"
+ER_UPDATE_TABLE_USED
+        chi "表 '%-.192s' 被指定了两次, 即作为 '%s' 的目标,又作为数据的独立源"
+        eng "Table '%-.192s' is specified twice, both as a target for '%s' and as a separate source for data"
+        geo "ცხრილი '%-.192s' ორჯერაა მითითებული, ორივეჯერ როგორც '%s'-ის სამიზნე და როგორც მონაცემების ცალკე წყარო"
+        swe "Table '%-.192s' är använd två gånger. Både för '%s' och för att hämta data"
+        ukr "Таблиця '%-.192s' вказується двічі, як цільова для '%s', так і як окреме джерело даних"
+ER_NO_SUCH_THREAD  
+        chi "未知线程ID:%lu"
+        cze "Neznámá identifikace threadu: %lu"
+        dan "Ukendt tråd id: %lu"
+        eng "Unknown thread id: %lu"
+        est "Tundmatu lõim: %lu"
+        fre "Numéro de tâche inconnu: %lu"
+        ger "Unbekannte Thread-ID: %lu"
+        geo "უცნობი ნაკადის ID: %lu"
+        greek "Αγνωστο thread id: %lu"
+        hindi "अज्ञात थ्रेड ID: %lu"
+        hun "Ervenytelen szal (thread) id: %lu"
+        ita "Thread id: %lu sconosciuto"
+        jpn "不明なスレッドIDです: %lu"
+        kor "알수 없는 쓰레드 id: %lu"
+        nla "Onbekend thread id: %lu"
+        nor "Ukjent tråd id: %lu"
+        norwegian-ny "Ukjent tråd id: %lu"
+        pol "Nieznany identyfikator w?tku: %lu"
+        por "'Id' de 'thread' %lu desconhecido"
+        rum "Id-ul: %lu thread-ului este necunoscut"
+        rus "Неизвестный номер потока: %lu"
+        serbian "Nepoznat thread identifikator: %lu"
+        slo "Neznáma identifikácia vlákna: %lu"
+        spa "Identificador del hilo (thread): %lu desconocido"
+        swe "Finns ingen tråd med id %lu"
+        ukr "Невідомий ідентифікатор гілки: %lu"
+ER_KILL_DENIED_ERROR  
+        chi "你不是线程%lld的所有者"
+        cze "Nejste vlastníkem threadu %lld"
+        dan "Du er ikke ejer af tråden %lld"
+        eng "You are not owner of thread %lld"
+        est "Ei ole lõime %lld omanik"
+        fre "Vous n'êtes pas propriétaire de la tâche no: %lld"
+        ger "Sie sind nicht Eigentümer von Thread %lld"
+        geo "თქვენ ნაკადის %lld მფლობელი არ ბრძანდებით"
+        greek "Δεν είσθε owner του thread %lld"
+        hindi "आप थ्रेड %lld के OWNER नहीं हैं"
+        hun "A %lld thread-nek mas a tulajdonosa"
+        ita "Utente non proprietario del thread %lld"
+        jpn "スレッド %lld のオーナーではありません。"
+        kor "쓰레드(Thread) %lld의 소유자가 아닙니다."
+        nla "U bent geen bezitter van thread %lld"
+        nor "Du er ikke eier av tråden %lld"
+        norwegian-ny "Du er ikkje eigar av tråd %lld"
+        pol "Nie jeste? wła?cicielem w?tku %lld"
+        por "Você não é proprietário da 'thread' %lld"
+        rum "Nu sinteti proprietarul threadului %lld"
+        rus "Вы не являетесь владельцем потока %lld"
+        serbian "Vi niste vlasnik thread-a %lld"
+        slo "Nie ste vlastníkom vlákna %lld"
+        spa "No eres el propietario del hilo (thread) %lld"
+        swe "Du är inte ägare till tråd %lld"
+        ukr "Ви не володар гілки %lld"
+ER_NO_TABLES_USED  
+        chi "没有使用表"
+        cze "Nejsou použity žádné tabulky"
+        dan "Ingen tabeller i brug"
+        eng "No tables used"
+        est "Ühtegi tabelit pole kasutusel"
+        fre "Aucune table utilisée"
+        ger "Keine Tabellen verwendet"
+        geo "ცხრილები გამოყენებული არაა"
+        greek "Δεν χρησιμοποιήθηκαν πίνακες"
+        hindi "कोई टेबल का इस्तेमाल नहीं हुआ"
+        hun "Nincs hasznalt tabla"
+        ita "Nessuna tabella usata"
+        jpn "表が指定されていません。"
+        kor "어떤 테이블도 사용되지 않았습니다."
+        nla "Geen tabellen gebruikt"
+        nor "Ingen tabeller i bruk"
+        norwegian-ny "Ingen tabellar i bruk"
+        pol "Nie ma żadej użytej tabeli"
+        por "Nenhuma tabela usada"
+        rum "Nici o tabela folosita"
+        rus "Никакие таблицы не использованы"
+        serbian "Nema upotrebljenih tabela"
+        slo "Nie je použitá žiadna tabuľka"
+        spa "No se están usando tablas"
+        swe "Inga tabeller angivna"
+        ukr "Не використано таблиць"
+ER_TOO_BIG_SET  
+        chi "列 %-.192s 和SET的字符串过多"
+        cze "Příliš mnoho řetězců pro sloupec %-.192s a SET"
+        dan "For mange tekststrenge til specifikationen af SET i kolonne %-.192s"
+        eng "Too many strings for column %-.192s and SET"
+        est "Liiga palju string tulbale %-.192s tüübile SET"
+        fre "Trop de chaînes dans la colonne %-.192s avec SET"
+        ger "Zu viele Strings für Feld %-.192s und SET angegeben"
+        geo "მეტისმეტად ბევრი სტრიქონი სვეტისთვის %-.192s და SET"
+        greek "Πάρα πολλά strings για το πεδίο %-.192s και SET"
+        hun "Tul sok karakter: %-.192s es SET"
+        ita "Troppe stringhe per la colonna %-.192s e la SET"
+        jpn "SET型の列 '%-.192s' のメンバーの数が多すぎます。"
+        kor "칼럼 %-.192s와 SET에서 스트링이 너무 많습니다."
+        nla "Teveel strings voor kolom %-.192s en SET"
+        nor "For mange tekststrenger kolonne %-.192s og SET"
+        norwegian-ny "For mange tekststrengar felt %-.192s og SET"
+        pol "Zbyt wiele łańcuchów dla kolumny %-.192s i polecenia SET"
+        por "'Strings' demais para coluna '%-.192s' e SET"
+        rum "Prea multe siruri pentru coloana %-.192s si SET"
+        rus "Слишком много значений для столбца %-.192s в SET"
+        serbian "Previše string-ova za kolonu '%-.192s' i komandu 'SET'"
+        slo "Príliš mnoho reťazcov pre pole %-.192s a SET"
+        spa "Demasiadas cadenas para la columna %-.192s y SET"
+        swe "För många alternativ till kolumn %-.192s för SET"
+        ukr "Забагато строк для стовбця %-.192s та SET"
+ER_NO_UNIQUE_LOGFILE  
+        chi "无法生成唯一的log-filename%-.200s.(1-999)\ n"
+        cze "Nemohu vytvořit jednoznačné jméno logovacího souboru %-.200s.(1-999)\n"
+        dan "Kan ikke lave unikt log-filnavn %-.200s.(1-999)\n"
+        eng "Can't generate a unique log-filename %-.200s.(1-999)\n"
+        est "Ei suuda luua unikaalset logifaili nime %-.200s.(1-999)\n"
+        fre "Ne peut générer un unique nom de journal %-.200s.(1-999)\n"
+        ger "Kann keinen eindeutigen Dateinamen für die Logdatei %-.200s(1-999) erzeugen\n"
+        geo "უნიკალური ჟურნალის ფაილის სახელის %-.200s.(1-999) შექმნის შეცდომა\n"
+        greek "Αδύνατη η δημιουργία unique log-filename %-.200s.(1-999)\n"
+        hindi "एक अनूठा लॉग-फ़ाइल नाम %-.200s.(1-999) उत्पन्न नहीं कर सके\n"
+        hun "Egyedi log-filenev nem generalhato: %-.200s.(1-999)\n"
+        ita "Impossibile generare un nome del file log unico %-.200s.(1-999)\n"
+        jpn "一意なログファイル名 %-.200s.(1-999) を生成できません。\n"
+        kor "Unique 로그화일 '%-.200s'를 만들수 없습니다.(1-999)\n"
+        nla "Het is niet mogelijk een unieke naam te maken voor de logfile %-.200s.(1-999)\n"
+        nor "Kan ikke lage unikt loggfilnavn %-.200s.(1-999)\n"
+        norwegian-ny "Kan ikkje lage unikt loggfilnavn %-.200s.(1-999)\n"
+        pol "Nie można stworzyć unikalnej nazwy pliku z logiem %-.200s.(1-999)\n"
+        por "Não pode gerar um nome de arquivo de 'log' único '%-.200s'.(1-999)\n"
+        rum "Nu pot sa generez un nume de log unic %-.200s.(1-999)\n"
+        rus "Невозможно создать уникальное имя файла журнала %-.200s.(1-999)\n"
+        serbian "Ne mogu da generišem jedinstveno ime log-file-a: '%-.200s.(1-999)'\n"
+        slo "Nemôžem vytvoriť unikátne meno log-súboru %-.200s.(1-999)\n"
+        spa "No puedo generar un único fichero/archivo de historial (log) llamado %-.200s.(1-999)\n"
+        swe "Kan inte generera ett unikt filnamn %-.200s.(1-999)\n"
+        ukr "Не можу згенерувати унікальне ім'я log-файлу %-.200s.(1-999)\n"
+ER_TABLE_NOT_LOCKED_FOR_WRITE  
+        chi "表 '%-.192s' 有 READ 锁,无法更新"
+        cze "Tabulka '%-.192s' byla zamčena s READ a nemůže být změněna"
+        dan "Tabellen '%-.192s' var låst med READ lås og kan ikke opdateres"
+        eng "Table '%-.192s' was locked with a READ lock and can't be updated"
+        est "Tabel '%-.192s' on lukustatud READ lukuga ning ei ole muudetav"
+        fre "Table '%-.192s' verrouillée lecture (READ): modification impossible"
+        ger "Tabelle '%-.192s' ist mit Lesesperre versehen und kann nicht aktualisiert werden"
+        geo "ცხრილი '%-.192s' დაბლოკილია READ ბლოკით და მისი განახლება შეუძლებელია"
+        greek "Ο πίνακας '%-.192s' έχει κλειδωθεί με READ lock και δεν επιτρέπονται αλλαγές"
+        hindi "टेबल '%-.192s' READ लॉक से बंद है और उसे बदल नहीं सकते"
+        hun "A(z) '%-.192s' tabla zarolva lett (READ lock) es nem lehet frissiteni"
+        ita "La tabella '%-.192s' e` soggetta a lock in lettura e non puo` essere aggiornata"
+        jpn "表 '%-.192s' はREADロックされていて、更新できません。"
+        kor "테이블 '%-.192s'는 READ 락이 잠겨있어서 갱신할 수 없습니다."
+        nla "Tabel '%-.192s' was gelocked met een lock om te lezen. Derhalve kunnen geen wijzigingen worden opgeslagen"
+        nor "Tabellen '%-.192s' var låst med READ lås og kan ikke oppdateres"
+        norwegian-ny "Tabellen '%-.192s' var låst med READ lås og kan ikkje oppdaterast"
+        pol "Tabela '%-.192s' została zablokowana przez READ i nie może zostać zaktualizowana"
+        por "Tabela '%-.192s' foi travada com trava de leitura e não pode ser atualizada"
+        rum "Tabela '%-.192s' a fost locked cu un READ lock si nu poate fi actualizata"
+        rus "Таблица '%-.192s' заблокирована уровнем READ lock и не может быть изменена"
+        serbian "Tabela '%-.192s' je zaključana READ lock-om; iz nje se može samo čitati ali u nju se ne može pisati"
+        slo "Tabuľka '%-.192s' bola zamknutá s READ a nemôže byť zmenená"
+        spa "La tabla '%-.192s' ha sido bloqueada con un READ lock y no puede ser actualizada"
+        swe "Tabell '%-.192s' kan inte uppdateras emedan den är låst för läsning"
+        ukr "Таблицю '%-.192s' заблоковано тільки для читання, тому її не можна оновити"
+ER_TABLE_NOT_LOCKED  
+        chi "表 '%-.192s' 未使用 LOCK TABLES 锁定"
+        cze "Tabulka '%-.192s' nebyla zamčena s LOCK TABLES"
+        dan "Tabellen '%-.192s' var ikke låst med LOCK TABLES"
+        eng "Table '%-.192s' was not locked with LOCK TABLES"
+        est "Tabel '%-.192s' ei ole lukustatud käsuga LOCK TABLES"
+        fre "Table '%-.192s' non verrouillée: utilisez LOCK TABLES"
+        ger "Tabelle '%-.192s' wurde nicht mit LOCK TABLES gesperrt"
+        geo "ცხრილი '%-.192s' LOCK TABLES-ით დაბლოკილი არაა"
+        greek "Ο πίνακας '%-.192s' δεν έχει κλειδωθεί με LOCK TABLES"
+        hindi "टेबल '%-.192s' LOCK TABLES से बंद नहीं है"
+        hun "A(z) '%-.192s' tabla nincs zarolva a LOCK TABLES-szel"
+        ita "Non e` stato impostato il lock per la tabella '%-.192s' con LOCK TABLES"
+        jpn "表 '%-.192s' は LOCK TABLES でロックされていません。"
+        kor "테이블 '%-.192s'는 LOCK TABLES 명령으로 잠기지 않았습니다."
+        nla "Tabel '%-.192s' was niet gelocked met LOCK TABLES"
+        nor "Tabellen '%-.192s' var ikke låst med LOCK TABLES"
+        norwegian-ny "Tabellen '%-.192s' var ikkje låst med LOCK TABLES"
+        pol "Tabela '%-.192s' nie została zablokowana poleceniem LOCK TABLES"
+        por "Tabela '%-.192s' não foi travada com LOCK TABLES"
+        rum "Tabela '%-.192s' nu a fost locked cu LOCK TABLES"
+        rus "Таблица '%-.192s' не была заблокирована с помощью LOCK TABLES"
+        serbian "Tabela '%-.192s' nije bila zaključana komandom 'LOCK TABLES'"
+        slo "Tabuľka '%-.192s' nebola zamknutá s LOCK TABLES"
+        spa "La tabla '%-.192s' no fue bloqueada con LOCK TABLES"
+        swe "Tabell '%-.192s' är inte låst med LOCK TABLES"
+        ukr "Таблицю '%-.192s' не було блоковано з LOCK TABLES"
+ER_UNUSED_17
+        eng "You should never see it"
+        geo "ის ვერასდროს უნდა დაგენახათ"
+        spa "Nunca debería vd de ver esto"
+ER_WRONG_DB_NAME 42000 
+        chi "数据库名称不正确'%-.100T'"
+        cze "Nepřípustné jméno databáze '%-.100T'"
+        dan "Ugyldigt database navn '%-.100T'"
+        eng "Incorrect database name '%-.100T'"
+        est "Vigane andmebaasi nimi '%-.100T'"
+        fre "Nom de base de donnée illégal: '%-.100T'"
+        ger "Unerlaubter Datenbankname '%-.100T'"
+        geo "მონაცემთა ბაზის არასწორი სახელი '%-.100T'"
+        greek "Λάθος όνομα βάσης δεδομένων '%-.100T'"
+        hindi "डेटाबेस नाम '%-.100T' गलत है"
+        hun "Hibas adatbazisnev: '%-.100T'"
+        ita "Nome database errato '%-.100T'"
+        jpn "データベース名 '%-.100T' は不正です。"
+        kor "'%-.100T' 데이타베이스의 이름이 부정확합니다."
+        nla "Databasenaam '%-.100T' is niet getoegestaan"
+        nor "Ugyldig database navn '%-.100T'"
+        norwegian-ny "Ugyldig database namn '%-.100T'"
+        pol "Niedozwolona nazwa bazy danych '%-.100T'"
+        por "Nome de banco de dados '%-.100T' incorreto"
+        rum "Numele bazei de date este incorect '%-.100T'"
+        rus "Некорректное имя базы данных '%-.100T'"
+        serbian "Pogrešno ime baze '%-.100T'"
+        slo "Neprípustné meno databázy '%-.100T'"
+        spa "Nombre incorrecto de base de datos '%-.100T'"
+        swe "Felaktigt databasnamn '%-.100T'"
+        ukr "Невірне ім'я бази данних '%-.100T'"
+ER_WRONG_TABLE_NAME 42000 
+        chi "表名不正确'%-.100s'"
+        cze "Nepřípustné jméno tabulky '%-.100s'"
+        dan "Ugyldigt tabel navn '%-.100s'"
+        eng "Incorrect table name '%-.100s'"
+        est "Vigane tabeli nimi '%-.100s'"
+        fre "Nom de table illégal: '%-.100s'"
+        ger "Unerlaubter Tabellenname '%-.100s'"
+        geo "არასწორი ცხრილის სახელი '%-.100s'"
+        greek "Λάθος όνομα πίνακα '%-.100s'"
+        hindi "टेबल  नाम '%-.100s' गलत है"
+        hun "Hibas tablanev: '%-.100s'"
+        ita "Nome tabella errato '%-.100s'"
+        jpn "表名 '%-.100s' は不正です。"
+        kor "'%-.100s' 테이블 이름이 부정확합니다."
+        nla "Niet toegestane tabelnaam '%-.100s'"
+        nor "Ugyldig tabell navn '%-.100s'"
+        norwegian-ny "Ugyldig tabell namn '%-.100s'"
+        pol "Niedozwolona nazwa tabeli '%-.100s'..."
+        por "Nome de tabela '%-.100s' incorreto"
+        rum "Numele tabelei este incorect '%-.100s'"
+        rus "Некорректное имя таблицы '%-.100s'"
+        serbian "Pogrešno ime tabele '%-.100s'"
+        slo "Neprípustné meno tabuľky '%-.100s'"
+        spa "Nombre incorrecto de tabla '%-.100s'"
+        swe "Felaktigt tabellnamn '%-.100s'"
+        ukr "Невірне ім'я таблиці '%-.100s'"
+ER_TOO_BIG_SELECT 42000 
+        chi "SELECT 将检查超过 MAX_JOIN_SIZE 行;检查您的 WHERE 并使用 SET SQL_BIG_SELECTS=1 或 SET MAX_JOIN_SIZE=# 如果 SELECT 没问题"
+        cze "Zadaný SELECT by procházel příliš mnoho záznamů a trval velmi dlouho. Zkontrolujte tvar WHERE a je-li SELECT v pořádku, použijte SET SQL_BIG_SELECTS=1"
+        dan "SELECT ville undersøge for mange poster og ville sandsynligvis tage meget lang tid. Undersøg WHERE delen og brug SET SQL_BIG_SELECTS=1 hvis udtrykket er korrekt"
+        eng "The SELECT would examine more than MAX_JOIN_SIZE rows; check your WHERE and use SET SQL_BIG_SELECTS=1 or SET MAX_JOIN_SIZE=# if the SELECT is okay"
+        est "SELECT lause peab läbi vaatama suure hulga kirjeid ja võtaks tõenäoliselt liiga kaua aega. Tasub kontrollida WHERE klauslit ja vajadusel kasutada käsku SET SQL_BIG_SELECTS=1"
+        fre "SELECT va devoir examiner beaucoup d'enregistrements ce qui va prendre du temps. Vérifiez la clause WHERE et utilisez SET SQL_BIG_SELECTS=1 si SELECT se passe bien"
+        ger "Die Ausführung des SELECT würde zu viele Datensätze untersuchen und wahrscheinlich sehr lange dauern. Bitte WHERE-Klausel überprüfen und gegebenenfalls SET SQL_BIG_SELECTS=1 oder SET MAX_JOIN_SIZE=# verwenden"
+        geo "SELECT-ი MAX_JOIN_SIZE-ის მნიშვნელობაზე მეტ ჩანაწერს დაამუშავებდა. შეამოწმეთ თქვენი WHERE და გამოიყენეთ SET SQL_BIG_SELECTS=1 ან SET MAX_JOIN_SIZE=#, თუ SELECT-ი კარგადაა დაწერილი"
+        greek "Το SELECT θα εξετάσει μεγάλο αριθμό εγγραφών και πιθανώς θα καθυστερήσει. Παρακαλώ εξετάστε τις παραμέτρους του WHERE και χρησιμοποιείστε SET SQL_BIG_SELECTS=1 αν το SELECT είναι σωστό"
+        hindi "SELECT कमांड MAX_JOIN_SIZE पंक्तियों से भी ज्यादा की जांच करेगा; कृपया WHERE क्लॉज़ को जाचें अथवा SET SQL_BIG_SELECTS=1 या SET MAX_JOIN_SIZE=# का इस्तेमाल करें"
+        hun "A SELECT tul sok rekordot fog megvizsgalni es nagyon sokaig fog tartani. Ellenorizze a WHERE-t es hasznalja a SET SQL_BIG_SELECTS=1 beallitast, ha a SELECT okay"
+        ita "La SELECT dovrebbe esaminare troppi record e usare troppo tempo. Controllare la WHERE e usa SET SQL_BIG_SELECTS=1 se e` tutto a posto"
+        jpn "SELECTがMAX_JOIN_SIZEを超える行数を処理しました。WHERE句を確認し、SELECT文に問題がなければ、 SET SQL_BIG_SELECTS=1 または SET MAX_JOIN_SIZE=# を使用して下さい。"
+        kor "SELECT 명령에서 너무 많은 레코드를 찾기 때문에 많은 시간이 소요됩니다. 따라서 WHERE 문을 점검하거나, 만약 SELECT가 ok되면  SET SQL_BIG_SELECTS=1 옵션을 사용하세요."
+        nla "Het SELECT-statement zou te veel records analyseren en dus veel tijd in beslagnemen. Kijk het WHERE-gedeelte van de query na en kies SET SQL_BIG_SELECTS=1 als het stament in orde is"
+        nor "SELECT ville undersøke for mange poster og ville sannsynligvis ta veldig lang tid. Undersøk WHERE klausulen og bruk SET SQL_BIG_SELECTS=1 om SELECTen er korrekt"
+        norwegian-ny "SELECT ville undersøkje for mange postar og ville sannsynligvis ta veldig lang tid. Undersøk WHERE klausulen og bruk SET SQL_BIG_SELECTS=1 om SELECTen er korrekt"
+        pol "Operacja SELECT będzie dotyczyła zbyt wielu rekordów i prawdopodobnie zajmie bardzo dużo czasu. SprawdĽ warunek WHERE i użyj SQL_OPTION BIG_SELECTS=1 je?li operacja SELECT jest poprawna"
+        por "O SELECT examinaria registros demais e provavelmente levaria muito tempo. Cheque sua cláusula WHERE e use SET SQL_BIG_SELECTS=1, se o SELECT estiver correto"
+        rum "SELECT-ul ar examina prea multe cimpuri si probabil ar lua prea mult timp; verifica clauza WHERE si foloseste SET SQL_BIG_SELECTS=1 daca SELECT-ul e okay"
+        rus "Для такой выборки SELECT должен будет просмотреть слишком много записей и, видимо, это займет очень много времени. Проверьте ваше указание WHERE, и, если в нем все в порядке, укажите SET SQL_BIG_SELECTS=1"
+        serbian "Komanda 'SELECT' će ispitati previše slogova i potrošiti previše vremena. Proverite vaš 'WHERE' filter i upotrebite 'SET OPTION SQL_BIG_SELECTS=1' ako želite baš ovakvu komandu"
+        slo "Zadaná požiadavka SELECT by prechádzala príliš mnoho záznamov a trvala by príliš dlho. Skontrolujte tvar WHERE a ak je v poriadku, použite SET SQL_BIG_SELECTS=1"
+        spa "El SELECT debería de examinar más de MAX_JOIN_SIZE filas; revise su WHERE y utilice SET SQL_BIG_SELECTS=1 o SET MAX_JOIN_SIZE=# si el SELECT está ok"
+        swe "Den angivna frågan skulle läsa mer än MAX_JOIN_SIZE rader.  Kontrollera din WHERE och använd SET SQL_BIG_SELECTS=1 eller SET MAX_JOIN_SIZE=# ifall du vill hantera stora joins"
+        ukr "Запиту SELECT потрібно обробити багато записів, що, певне, займе дуже багато часу. Перевірте ваше WHERE та використовуйте SET SQL_BIG_SELECTS=1, якщо цей запит SELECT є вірним"
+ER_UNKNOWN_ERROR  
+        chi "未知错误"
+        cze "Neznámá chyba"
+        dan "Ukendt fejl"
+        eng "Unknown error"
+        est "Tundmatu viga"
+        fre "Erreur inconnue"
+        ger "Unbekannter Fehler"
+        geo "უცნობი შეცდომა"
+        greek "Προέκυψε άγνωστο λάθος"
+        hindi "अज्ञात त्रुटि हुई"
+        hun "Ismeretlen hiba"
+        ita "Errore sconosciuto"
+        jpn "不明なエラー"
+        kor "알수 없는 에러입니다."
+        nla "Onbekende Fout"
+        nor "Ukjent feil"
+        norwegian-ny "Ukjend feil"
+        por "Erro desconhecido"
+        rum "Eroare unknown"
+        rus "Неизвестная ошибка"
+        serbian "Nepoznata greška"
+        slo "Neznámá chyba"
+        spa "Error desconocido"
+        swe "Okänt fel"
+        ukr "Невідома помилка"
+ER_UNKNOWN_PROCEDURE 42000 
+        chi "未知存储过程 '%-.192s'"
+        cze "Neznámá procedura %-.192s"
+        dan "Ukendt procedure %-.192s"
+        eng "Unknown procedure '%-.192s'"
+        est "Tundmatu protseduur '%-.192s'"
+        fre "Procédure %-.192s inconnue"
+        ger "Unbekannte Prozedur '%-.192s'"
+        geo "უცნობი პროცედურა '%-.192s'"
+        greek "Αγνωστη διαδικασία '%-.192s'"
+        hindi "अज्ञात प्रोसीजर '%-.192s'"
+        hun "Ismeretlen eljaras: '%-.192s'"
+        ita "Procedura '%-.192s' sconosciuta"
+        jpn "'%-.192s' は不明なプロシージャです。"
+        kor "알수 없는 수행문 : '%-.192s'"
+        nla "Onbekende procedure %-.192s"
+        nor "Ukjent prosedyre %-.192s"
+        norwegian-ny "Ukjend prosedyre %-.192s"
+        pol "Unkown procedure %-.192s"
+        por "'Procedure' '%-.192s' desconhecida"
+        rum "Procedura unknown '%-.192s'"
+        rus "Неизвестная процедура '%-.192s'"
+        serbian "Nepoznata procedura '%-.192s'"
+        slo "Neznámá procedúra '%-.192s'"
+        spa "Procedimiento desconocido %-.192s"
+        swe "Okänd procedur: %-.192s"
+        ukr "Невідома процедура '%-.192s'"
+ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000 
+        chi "存储过程 '%-.192s' 需要的参数和提供的参数不吻合"
+        cze "Chybný počet parametrů procedury %-.192s"
+        dan "Forkert antal  parametre til proceduren %-.192s"
+        eng "Incorrect parameter count to procedure '%-.192s'"
+        est "Vale parameetrite hulk protseduurile '%-.192s'"
+        fre "Mauvais nombre de paramètres pour la procedure %-.192s"
+        ger "Falsche Parameterzahl für Prozedur '%-.192s'"
+        geo "არასწორი პარამეტრების რაოდენობა პროცედურისთვის '%-.192s'"
+        greek "Λάθος αριθμός παραμέτρων στη διαδικασία '%-.192s'"
+        hindi "प्रोसीजर '%-.192s' के लिए पैरामीटर की संख्या गलत है"
+        hun "Rossz parameter a(z) '%-.192s'eljaras szamitasanal"
+        ita "Numero di parametri errato per la procedura '%-.192s'"
+        jpn "プロシージャ '%-.192s' へのパラメータ数が不正です。"
+        kor "'%-.192s' 수행문에 대한 부정확한 파라메터"
+        nla "Foutief aantal parameters doorgegeven aan procedure %-.192s"
+        nor "Feil parameter antall til prosedyren %-.192s"
+        norwegian-ny "Feil parameter tal til prosedyra %-.192s"
+        pol "Incorrect parameter count to procedure %-.192s"
+        por "Número de parâmetros incorreto para a 'procedure' '%-.192s'"
+        rum "Procedura '%-.192s' are un numar incorect de parametri"
+        rus "Некорректное количество параметров для процедуры '%-.192s'"
+        serbian "Pogrešan broj parametara za proceduru '%-.192s'"
+        slo "Chybný počet parametrov procedúry '%-.192s'"
+        spa "Contador de parámetros incorrecto para procedimiento %-.192s"
+        swe "Felaktigt antal parametrar till procedur %-.192s"
+        ukr "Хибна кількість параметрів процедури '%-.192s'"
+ER_WRONG_PARAMETERS_TO_PROCEDURE  
+        chi "存储过程 '%-.192s' 的参数不对"
+        cze "Chybné parametry procedury %-.192s"
+        dan "Forkert(e) parametre til proceduren %-.192s"
+        eng "Incorrect parameters to procedure '%-.192s'"
+        est "Vigased parameetrid protseduurile '%-.192s'"
+        fre "Paramètre erroné pour la procedure %-.192s"
+        ger "Falsche Parameter für Prozedur '%-.192s'"
+        geo "არასწორი პარამეტრები პროცედურისთვის '%-.192s'"
+        greek "Λάθος παράμετροι στην διαδικασία '%-.192s'"
+        hindi "प्रोसीजर '%-.192s' के लिए पैरामीटर्स गलत हैं"
+        hun "Rossz parameter a(z) '%-.192s' eljarasban"
+        ita "Parametri errati per la procedura '%-.192s'"
+        jpn "プロシージャ '%-.192s' へのパラメータが不正です。"
+        kor "'%-.192s' 수행문에 대한 부정확한 파라메터"
+        nla "Foutieve parameters voor procedure %-.192s"
+        nor "Feil parametre til prosedyren %-.192s"
+        norwegian-ny "Feil parameter til prosedyra %-.192s"
+        pol "Incorrect parameters to procedure %-.192s"
+        por "Parâmetros incorretos para a 'procedure' '%-.192s'"
+        rum "Procedura '%-.192s' are parametrii incorecti"
+        rus "Некорректные параметры для процедуры '%-.192s'"
+        serbian "Pogrešni parametri prosleđeni proceduri '%-.192s'"
+        slo "Chybné parametre procedúry '%-.192s'"
+        spa "Parámetros incorrectos para procedimiento %-.192s"
+        swe "Felaktiga parametrar till procedur %-.192s"
+        ukr "Хибний параметер процедури '%-.192s'"
+ER_UNKNOWN_TABLE 42S02 
+        chi "未知表名 '%-.192s' 在 %-.32s"
+        cze "Neznámá tabulka '%-.192s' v %-.32s"
+        dan "Ukendt tabel '%-.192s' i %-.32s"
+        eng "Unknown table '%-.192s' in %-.32s"
+        est "Tundmatu tabel '%-.192s' %-.32s-s"
+        fre "Table inconnue '%-.192s' dans %-.32s"
+        ger "Unbekannte Tabelle '%-.192s' in '%-.32s'"
+        geo "უცნობი ცხრილი '%-.192s' %-.32s-ში"
+        greek "Αγνωστος πίνακας '%-.192s' σε %-.32s"
+        hindi "टेबल '%-.192s', %-.32s में नहीं मिला"
+        hun "Ismeretlen tabla: '%-.192s' %-.32s-ban"
+        ita "Tabella '%-.192s' sconosciuta in %-.32s"
+        jpn "'%-.192s' は %-.32s では不明な表です。"
+        kor "알수 없는 테이블 '%-.192s' (데이타베이스 %-.32s)"
+        nla "Onbekende tabel '%-.192s' in %-.32s"
+        nor "Ukjent tabell '%-.192s' i %-.32s"
+        norwegian-ny "Ukjend tabell '%-.192s' i %-.32s"
+        pol "Unknown table '%-.192s' in %-.32s"
+        por "Tabela '%-.192s' desconhecida em '%-.32s'"
+        rum "Tabla '%-.192s' invalida in %-.32s"
+        rus "Неизвестная таблица '%-.192s' в %-.32s"
+        serbian "Nepoznata tabela '%-.192s' u '%-.32s'"
+        slo "Neznáma tabuľka '%-.192s' v %-.32s"
+        spa "Tabla desconocida '%-.192s' en %-.32s"
+        swe "Okänd tabell '%-.192s' i '%-.32s'"
+        ukr "Невідома таблиця '%-.192s' у %-.32s"
+ER_FIELD_SPECIFIED_TWICE 42000 
+        chi "列 '%-.192s' 被指定了两次"
+        cze "Položka '%-.192s' je zadána dvakrát"
+        dan "Feltet '%-.192s' er anvendt to gange"
+        eng "Column '%-.192s' specified twice"
+        est "Tulp '%-.192s' on määratletud topelt"
+        fre "Champ '%-.192s' spécifié deux fois"
+        ger "Feld '%-.192s' wurde zweimal angegeben"
+        geo "სვეტი '%-.192s' ორჯერაა მითითებული"
+        greek "Το πεδίο '%-.192s' έχει ορισθεί δύο φορές"
+        hindi "कॉलम '%-.192s' दो बार निर्दिष्ट किया गया है"
+        hun "A(z) '%-.192s' mezot ketszer definialta"
+        ita "Campo '%-.192s' specificato 2 volte"
+        jpn "列 '%-.192s' は2回指定されています。"
+        kor "칼럼 '%-.192s'는 두번 정의되어 있습니다."
+        nla "Veld '%-.192s' is dubbel gespecificeerd"
+        nor "Feltet '%-.192s' er spesifisert to ganger"
+        norwegian-ny "Feltet '%-.192s' er spesifisert to gangar"
+        pol "Field '%-.192s' specified twice"
+        por "Coluna '%-.192s' especificada duas vezes"
+        rum "Coloana '%-.192s' specificata de doua ori"
+        rus "Столбец '%-.192s' указан дважды"
+        serbian "Kolona '%-.192s' je navedena dva puta"
+        slo "Pole '%-.192s' je zadané dvakrát"
+        spa "Columna '%-.192s' especificada dos veces"
+        swe "Fält '%-.192s' är redan använt"
+        ukr "Стовбець '%-.192s' зазначено двічі"
+ER_INVALID_GROUP_FUNC_USE  
+        chi "组函数使用无效"
+        cze "Nesprávné použití funkce group"
+        dan "Forkert brug af grupperings-funktion"
+        eng "Invalid use of group function"
+        est "Vigane grupeerimisfunktsiooni kasutus"
+        fre "Utilisation invalide de la clause GROUP"
+        ger "Falsche Verwendung einer Gruppierungsfunktion"
+        geo "GROUP ფუნქციის არასწორი გამოყენება"
+        greek "Εσφαλμένη χρήση της group function"
+        hindi "ग्रुप फंक्शन का अवैध उपयोग"
+        hun "A group funkcio ervenytelen hasznalata"
+        ita "Uso non valido di una funzione di raggruppamento"
+        jpn "集計関数の使用方法が不正です。"
+        kor "잘못된 그룹 함수를 사용하였습니다."
+        nla "Ongeldig gebruik van GROUP-functie"
+        por "Uso inválido de função de agrupamento (GROUP)"
+        rum "Folosire incorecta a functiei group"
+        rus "Неправильное использование групповых функций"
+        serbian "Pogrešna upotreba 'GROUP' funkcije"
+        slo "Nesprávne použitie funkcie GROUP"
+        spa "Inválido uso de función de grupo"
+        swe "Felaktig användning av SQL grupp function"
+        ukr "Хибне використання функції групування"
+ER_UNSUPPORTED_EXTENSION 42000 
+        chi "表'%-.192s'使用此MariaDB版本不存在的扩展"
+        cze "Tabulka '%-.192s' používá rozšíření, které v této verzi MariaDB není"
+        dan "Tabellen '%-.192s' bruger et filtypenavn som ikke findes i denne MariaDB version"
+        eng "Table '%-.192s' uses an extension that doesn't exist in this MariaDB version"
+        est "Tabel '%-.192s' kasutab laiendust, mis ei eksisteeri antud MariaDB versioonis"
+        fre "Table '%-.192s' : utilise une extension invalide pour cette version de MariaDB"
+        ger "Tabelle '%-.192s' verwendet eine Erweiterung, die in dieser MariaDB-Version nicht verfügbar ist"
+        geo "ცხრილი '%-.192s' იყენებს გაფართოებას, რომელიც MariaDB-ის ამ ვერსიაში არ არსებობს"
+        greek "Ο πίνακς '%-.192s' χρησιμοποιεί κάποιο extension που δεν υπάρχει στην έκδοση αυτή της MariaDB"
+        hindi "टेबल '%-.192s' जिस इक्स्टेन्शन का उपयोग कर रहा है, वह इस MariaDB संस्करण में उपलब्ध नहीं है"
+        hun "A(z) '%-.192s' tabla olyan bovitest hasznal, amely nem letezik ebben a MariaDB versioban"
+        ita "La tabella '%-.192s' usa un'estensione che non esiste in questa versione di MariaDB"
+        jpn "表 '%-.192s' は、このMariaDBバージョンには無い機能を使用しています。"
+        kor "테이블 '%-.192s'는 확장명령을 이용하지만 현재의 MariaDB 버젼에서는 존재하지 않습니다."
+        nla "Tabel '%-.192s' gebruikt een extensie, die niet in deze MariaDB-versie voorkomt"
+        nor "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
+        norwegian-ny "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
+        pol "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
+        por "Tabela '%-.192s' usa uma extensão que não existe nesta versão do MariaDB"
+        rum "Tabela '%-.192s' foloseste o extensire inexistenta in versiunea curenta de MariaDB"
+        rus "В таблице '%-.192s' используются возможности, не поддерживаемые в этой версии MariaDB"
+        serbian "Tabela '%-.192s' koristi ekstenziju koje ne postoji u ovoj verziji MariaDB-a"
+        slo "Tabuľka '%-.192s' používa rozšírenie, ktoré v tejto verzii MariaDB nie je"
+        spa "La tabla '%-.192s' usa una extensión que no existe en esta versión de MariaDB"
+        swe "Tabell '%-.192s' har en extension som inte finns i denna version av MariaDB"
+        ukr "Таблиця '%-.192s' використовує розширення, що не існує у цій версії MariaDB"
+ER_TABLE_MUST_HAVE_COLUMNS 42000 
+        chi "表必须至少有1列"
+        cze "Tabulka musí mít alespoň jeden sloupec"
+        dan "En tabel skal have mindst een kolonne"
+        eng "A table must have at least 1 column"
+        est "Tabelis peab olema vähemalt üks tulp"
+        fre "Une table doit comporter au moins une colonne"
+        ger "Eine Tabelle muss mindestens eine Spalte besitzen"
+        geo "ცხრილს აუცილებლად უნდა ჰქონდეს 1 სვეტი მაინც"
+        greek "Ενας πίνακας πρέπει να έχει τουλάχιστον ένα πεδίο"
+        hindi "एक टेबल में कम से कम एक कॉलम होना चाहिए"
+        hun "A tablanak legalabb egy oszlopot tartalmazni kell"
+        ita "Una tabella deve avere almeno 1 colonna"
+        jpn "表には最低でも1個の列が必要です。"
+        kor "하나의 테이블에서는 적어도 하나의 칼럼이 존재하여야 합니다."
+        nla "Een tabel moet minstens 1 kolom bevatten"
+        por "Uma tabela tem que ter pelo menos uma (1) coluna"
+        rum "O tabela trebuie sa aiba cel putin o coloana"
+        rus "В таблице должен быть как минимум один столбец"
+        serbian "Tabela mora imati najmanje jednu kolonu"
+        slo "Tabuľka musí mať aspoň 1 pole"
+        spa "Una tabla debe de tener al menos 1 columna"
+        swe "Tabeller måste ha minst 1 kolumn"
+        ukr "Таблиця повинна мати хочаб один стовбець"
+ER_RECORD_FILE_FULL  
+        chi "表'%-.192s'已满"
+        cze "Tabulka '%-.192s' je plná"
+        dan "Tabellen '%-.192s' er fuld"
+        eng "The table '%-.192s' is full"
+        est "Tabel '%-.192s' on täis"
+        fre "La table '%-.192s' est pleine"
+        ger "Tabelle '%-.192s' ist voll"
+        geo "ცხრილი '%-.192s' სავსეა"
+        greek "Ο πίνακας '%-.192s' είναι γεμάτος"
+        hindi "टेबल '%-.192s' पूरा भरा है"
+        hun "A '%-.192s' tabla megtelt"
+        ita "La tabella '%-.192s' e` piena"
+        jpn "表 '%-.192s' は満杯です。"
+        kor "테이블 '%-.192s'가 full났습니다. "
+        nla "De tabel '%-.192s' is vol"
+        por "Tabela '%-.192s' está cheia"
+        rum "Tabela '%-.192s' e plina"
+        rus "Таблица '%-.192s' переполнена"
+        serbian "Tabela '%-.192s' je popunjena do kraja"
+        slo "Tabuľka '%-.192s' je plná"
+        spa "La tabla '%-.192s' está llena"
+        swe "Tabellen '%-.192s' är full"
+        ukr "Таблиця '%-.192s' заповнена"
+ER_UNKNOWN_CHARACTER_SET 42000 
+        chi "未知字符集:'%-.64s'"
+        cze "Neznámá znaková sada: '%-.64s'"
+        dan "Ukendt tegnsæt: '%-.64s'"
+        eng "Unknown character set: '%-.64s'"
+        est "Vigane kooditabel '%-.64s'"
+        fre "Jeu de caractères inconnu: '%-.64s'"
+        ger "Unbekannter Zeichensatz: '%-.64s'"
+        geo "უცნობი სიმბოლოების სეტი: '%-.64s'"
+        greek "Αγνωστο character set: '%-.64s'"
+        hindi "अज्ञात CHARACTER SET: '%-.64s'"
+        hun "Ervenytelen karakterkeszlet: '%-.64s'"
+        ita "Set di caratteri '%-.64s' sconosciuto"
+        jpn "不明な文字コードセット: '%-.64s'"
+        kor "알수없는 언어 Set: '%-.64s'"
+        nla "Onbekende character set: '%-.64s'"
+        por "Conjunto de caracteres '%-.64s' desconhecido"
+        rum "Set de caractere invalid: '%-.64s'"
+        rus "Неизвестная кодировка '%-.64s'"
+        serbian "Nepoznati karakter-set: '%-.64s'"
+        slo "Neznáma znaková sada: '%-.64s'"
+        spa "Juego desconocido de caracteres: '%-.64s'"
+        swe "Okänd teckenuppsättning: '%-.64s'"
+        ukr "Невідома кодова таблиця: '%-.64s'"
+ER_TOO_MANY_TABLES  
+        chi "表太多; MariaDB 只能在join中使用 %d 个表"
+        cze "Příliš mnoho tabulek, MariaDB jich může mít v joinu jen %d"
+        dan "For mange tabeller. MariaDB kan kun bruge %d tabeller i et join"
+        eng "Too many tables; MariaDB can only use %d tables in a join"
+        est "Liiga palju tabeleid. MariaDB suudab JOINiga ühendada kuni %d tabelit"
+        fre "Trop de tables. MariaDB ne peut utiliser que %d tables dans un JOIN"
+        ger "Zu viele Tabellen. MariaDB kann in einem Join maximal %d Tabellen verwenden"
+        geo "მეტისმეტად ბევრი ცხრილი. MariaDB-ს JOIN-ში მხოლოდ %d ცხრილის გამოყენება შეუძლია"
+        greek "Πολύ μεγάλος αριθμός πινάκων. Η MariaDB μπορεί να χρησιμοποιήσει %d πίνακες σε διαδικασία join"
+        hindi "बहुत अधिक टेबल्स, MariaDB एक JOIN में केवल %d टेबल्स का उपयोग कर सकता है"
+        hun "Tul sok tabla. A MariaDB csak %d tablat tud kezelni osszefuzeskor"
+        ita "Troppe tabelle. MariaDB puo` usare solo %d tabelle in una join"
+        jpn "表が多すぎます。MariaDBがJOINできる表は %d 個までです。"
+        kor "너무 많은 테이블이 Join되었습니다. MariaDB에서는 JOIN시 %d개의 테이블만 사용할 수 있습니다."
+        nla "Teveel tabellen. MariaDB kan slechts %d tabellen in een join bevatten"
+        por "Tabelas demais. O MariaDB pode usar somente %d tabelas em uma junção (JOIN)"
+        rum "Prea multe tabele. MariaDB nu poate folosi mai mult de %d tabele intr-un join"
+        rus "Слишком много таблиц. MariaDB может использовать только %d таблиц в соединении"
+        serbian "Previše tabela. MariaDB može upotrebiti maksimum %d tabela pri 'JOIN' operaciji"
+        slo "Príliš mnoho tabuliek. MariaDB môže použiť len %d v JOIN-e"
+        spa "Demasiadas tablas. MariaDB solamente puede usar %d tablas en un join"
+        swe "För många tabeller. MariaDB can ha högst %d tabeller i en och samma join"
+        ukr "Забагато таблиць. MariaDB може використовувати лише %d таблиць у об'єднанні"
+ER_TOO_MANY_FIELDS  
+        chi "太多列"
+        cze "Příliš mnoho položek"
+        dan "For mange felter"
+        eng "Too many columns"
+        est "Liiga palju tulpasid"
+        fre "Trop de champs"
+        ger "Zu viele Felder"
+        geo "მეტისმეტად ბევრი სვეტი"
+        greek "Πολύ μεγάλος αριθμός πεδίων"
+        hindi "बहुत अधिक कॉलम्स"
+        hun "Tul sok mezo"
+        ita "Troppi campi"
+        jpn "列が多すぎます。"
+        kor "칼럼이 너무 많습니다."
+        nla "Te veel velden"
+        por "Colunas demais"
+        rum "Prea multe coloane"
+        rus "Слишком много столбцов"
+        serbian "Previše kolona"
+        slo "Príliš mnoho polí"
+        spa "Demasiadas columnas"
+        swe "För många fält"
+        ukr "Забагато стовбців"
+ER_TOO_BIG_ROWSIZE 42000 
+        chi "行尺寸太大. 不包括BLOB,表的最大的行大小是 %ld. 这包括存储开销,请查看文档。您必须将某些列更改为 TEXT 或 BLOB"
+        cze "Řádek je příliš velký. Maximální velikost řádku, nepočítaje položky blob, je %ld. Musíte změnit některé položky na blob"
+        dan "For store poster. Max post størrelse, uden BLOB's, er %ld. Du må lave nogle felter til BLOB's"
+        eng "Row size too large. The maximum row size for the used table type, not counting BLOBs, is %ld. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs"
+        est "Liiga pikk kirje. Kirje maksimumpikkus arvestamata BLOB-tüüpi välju on %ld. Muuda mõned väljad BLOB-tüüpi väljadeks"
+        fre "Ligne trop grande. Le taille maximale d'une ligne, sauf les BLOBs, est %ld. Changez le type de quelques colonnes en BLOB"
+        ger "Zeilenlänge zu groß. Die maximale Zeilenlänge für den verwendeten Tabellentyp (ohne BLOB-Felder) beträgt %ld. Einige Felder müssen in BLOB oder TEXT umgewandelt werden"
+        geo "მწკრივის ზომა მეტისმეტად დიდია. მწკრივის მაქსიმალური ზომა გამოყენებული ცხრილის ტიპისთვის, BLOB-ების არჩათვლით, %ld-ს წარმოადგენს. ეს შეიცავს საცავის დამატებით ინფორმაციას. მეტი დეტალებისთვის გადაამოწმეთ დოკუმენტაცია. ზოგიერთ სვეტის ტიპი TEXT-ზე ან BLOB-ზე უნდა შეცვალოთ"
+        greek "Πολύ μεγάλο μέγεθος εγγραφής. Το μέγιστο μέγεθος εγγραφής, χωρίς να υπολογίζονται τα blobs, είναι %ld. Πρέπει να ορίσετε κάποια πεδία σαν blobs"
+        hun "Tul nagy sormeret. A maximalis sormeret (nem szamolva a blob objektumokat) %ld. Nehany mezot meg kell valtoztatnia"
+        ita "Riga troppo grande. La massima grandezza di una riga, non contando i BLOB, e` %ld. Devi cambiare alcuni campi in BLOB"
+        jpn "行サイズが大きすぎます。この表の最大行サイズは BLOB を含まずに %ld です。格納時のオーバーヘッドも含まれます(マニュアルを確認してください)。列をTEXTまたはBLOBに変更する必要があります。"
+        kor "너무 큰 row 사이즈입니다. BLOB를 계산하지 않고 최대 row 사이즈는 %ld입니다. 일부열을 BLOB 또는 TEXT로 변경해야 합니다."
+        nla "Rij-grootte is groter dan toegestaan. Maximale rij grootte, blobs niet meegeteld, is %ld. U dient sommige velden in blobs te veranderen"
+        por "Tamanho de linha grande demais. O máximo tamanho de linha, não contando BLOBs, é %ld. Você tem que mudar alguns campos para BLOBs"
+        rum "Marimea liniei (row) prea mare. Marimea maxima a liniei, excluzind BLOB-urile este de %ld. Trebuie sa schimbati unele cimpuri in BLOB-uri"
+        rus "Слишком большой размер записи. Максимальный размер строки, исключая поля BLOB, - %ld. Возможно, вам следует изменить тип некоторых полей на BLOB"
+        serbian "Prevelik slog. Maksimalna veličina sloga, ne računajući BLOB polja, je %ld. Trebali bi da promenite tip nekih polja u BLOB"
+        slo "Riadok je príliš veľký. Maximálna veľkosť riadku, okrem 'BLOB', je %ld. Musíte zmeniť niektoré položky na BLOB"
+        spa "Tamaño de fila muy grande. El máximo tamaño de fila para el tipo de tabla usada, sin contar BLOBs, es de %ld. Esto incluye sobrecarga de almacenaje, revise el manual. Tiene que cambiar algunas columnas a TEXT o BLOBs"
+        swe "För stor total radlängd. Den högst tillåtna radlängden, förutom BLOBs, är %ld. Ändra några av dina fält till BLOB"
+        ukr "Задовга строка. Найбільшою довжиною строки, не рахуючи BLOB, є %ld. Вам потрібно привести деякі стовбці до типу BLOB"
+ER_STACK_OVERRUN  
+        chi "线程栈溢出:  已使用了: %ld 堆栈 %ld.  使用 'mariadbd --thread_stack=#' 指定更大的堆栈"
+        cze "Přetečení zásobníku threadu: použito %ld z %ld. Použijte 'mariadbd --thread_stack=#' k zadání většího zásobníku"
+        dan "Thread stack brugt:  Brugt: %ld af en %ld stak.  Brug 'mariadbd --thread_stack=#' for at allokere en større stak om nødvendigt"
+        eng "Thread stack overrun:  Used: %ld of a %ld stack.  Use 'mariadbd --thread_stack=#' to specify a bigger stack if needed"
+        fre "Débordement de la pile des tâches (Thread stack). Utilisées: %ld pour une pile de %ld.  Essayez 'mariadbd --thread_stack=#' pour indiquer une plus grande valeur"
+        ger "Thread-Stack-Überlauf. Benutzt: %ld von %ld Stack. 'mariadbd --thread_stack=#' verwenden, um bei Bedarf einen größeren Stack anzulegen"
+        geo "ნაკადის სტეკის გადავსება: გამოყენებულია %ld ბაიტი %ld სტეკიდან. უფრო დიდი მნიშვნელობის მისათითებლად, როცა ეს აუცილებელია, გამოიყენეთ 'mariadbd --thread_stack=#'"
+        greek "Stack overrun στο thread:  Used: %ld of a %ld stack.  Παρακαλώ χρησιμοποιείστε 'mariadbd --thread_stack=#' για να ορίσετε ένα μεγαλύτερο stack αν χρειάζεται"
+        hun "Thread verem tullepes:  Used: %ld of a %ld stack. Hasznalja a 'mariadbd --thread_stack=#' nagyobb verem definialasahoz"
+        ita "Thread stack overrun:  Usati: %ld di uno stack di %ld.  Usa 'mariadbd --thread_stack=#' per specificare uno stack piu` grande"
+        jpn "スレッドスタック不足です(使用: %ld ; サイズ: %ld)。必要に応じて、より大きい値で 'mariadbd --thread_stack=#' の指定をしてください。"
+        kor "쓰레드 스택이 넘쳤습니다.  사용: %ld개 스택: %ld개.  만약 필요시 더큰 스택을 원할때에는 'mariadbd --thread_stack=#' 를 정의하세요"
+        nla "Thread stapel overrun:  Gebruikte: %ld van een %ld stack. Gebruik 'mariadbd --thread_stack=#' om een grotere stapel te definieren (indien noodzakelijk)"
+        por "Estouro da pilha do 'thread'. Usados %ld de uma pilha de %ld. Use 'mariadbd --thread_stack=#' para especificar uma pilha maior, se necessário"
+        rum "Stack-ul thread-ului a fost depasit (prea mic):  Folositi: %ld intr-un stack de %ld.  Folositi 'mariadbd --thread_stack=#' ca sa specifici un stack mai mare"
+        rus "Стек потоков переполнен:  использовано: %ld из %ld стека.  Применяйте 'mariadbd --thread_stack=#' для указания большего размера стека, если необходимо"
+        serbian "Prepisivanje thread stack-a:  Upotrebljeno: %ld od %ld stack memorije.  Upotrebite 'mariadbd --thread_stack=#' da navedete veći stack ako je potrebno"
+        slo "Pretečenie zásobníku vlákna:  použité: %ld z %ld.  Použite 'mariadbd --thread_stack=#' k zadaniu väčšieho zásobníka"
+        spa "Desbordamiento de la pila de hilos (threads): Usado: %ld de una pila de %ld. Considere el incrementar la variable de sistema thread_stack"
+        swe "Trådstacken tog slut:  Har använt %ld av %ld bytes.  Använd 'mariadbd --thread_stack=#' ifall du behöver en större stack"
+        ukr "Стек гілок переповнено:  Використано: %ld з %ld. Використовуйте 'mariadbd --thread_stack=#' аби зазначити більший стек, якщо необхідно"
+ER_WRONG_OUTER_JOIN 42000 
+        chi "在 OUTER JOIN 中发现交叉依赖;检查您的 ON 条件"
+        cze "V OUTER JOIN byl nalezen křížový odkaz. Prověřte ON podmínky"
+        dan "Krydsreferencer fundet i OUTER JOIN; check dine ON conditions"
+        eng "Cross dependency found in OUTER JOIN; examine your ON conditions"
+        est "Ristsõltuvus OUTER JOIN klauslis. Kontrolli oma ON tingimusi"
+        fre "Dépendance croisée dans une clause OUTER JOIN. Vérifiez la condition ON"
+        ger "OUTER JOIN enthält fehlerhafte Abhängigkeiten. In ON verwendete Bedingungen überprüfen"
+        geo "OUTER JOIN-ში აღმოჩენილია ჯვარედინი დამოკიდებულებები. შეამოწმეთ თქვენი ON-ის პირობები"
+        greek "Cross dependency βρέθηκε σε OUTER JOIN.  Παρακαλώ εξετάστε τις συνθήκες που θέσατε στο ON"
+        hun "Keresztfuggoseg van az OUTER JOIN-ban. Ellenorizze az ON felteteleket"
+        ita "Trovata una dipendenza incrociata nella OUTER JOIN. Controlla le condizioni ON"
+        jpn "OUTER JOINに相互依存が見つかりました。ON句の条件を確認して下さい。"
+        nla "Gekruiste afhankelijkheid gevonden in OUTER JOIN. Controleer uw ON-conditions"
+        por "Dependência cruzada encontrada em junção externa (OUTER JOIN); examine as condições utilizadas nas cláusulas 'ON'"
+        rum "Dependinta incrucisata (cross dependency) gasita in OUTER JOIN.  Examinati conditiile ON"
+        rus "В OUTER JOIN обнаружена перекрестная зависимость. Внимательно проанализируйте свои условия ON"
+        serbian "Unakrsna zavisnost pronađena u komandi 'OUTER JOIN'. Istražite vaše 'ON' uslove"
+        slo "V OUTER JOIN bol nájdený krížový odkaz.  Skontrolujte podmienky ON"
+        spa "Dependencia cruzada hallada en OUTER JOIN. Examina tus condiciones ON"
+        swe "Felaktigt referens i OUTER JOIN.  Kontrollera ON-uttrycket"
+        ukr "Перехресна залежність у OUTER JOIN. Перевірте умову ON"
+ER_NULL_COLUMN_IN_INDEX 42000 
+        chi "表处理程序不支持给定索引中的 NULL. 请将列 '%-.192s' 改为 NOT NULL 或使用其他处理程序"
+        eng "Table handler doesn't support NULL in given index. Please change column '%-.192s' to be NOT NULL or use another handler"
+        geo "ცხრილის დამმუშავებელს მითითებულ ინდექში NULL-ის მხარადაჭერა არ გააჩნია. შეცვალეთ სვეტი '%-.192s', რომ გახდეს NOT NULL ტიპის, ან სხვა დამმუშავებელი გამოიყენეთ"
+        spa "El manejador de tabla no soporta NULL en índice suministrado. Por favor, cambie la columna '%-.192s' para que sea NOT NULL o utilice otro manejador"
+        swe "Tabell hanteraren kan inte indexera NULL kolumner för den givna index typen. Ändra '%-.192s' till NOT NULL eller använd en annan hanterare"
+        ukr "Вказівник таблиці не підтримує NULL у зазначенному індексі. Будь ласка, зменіть стовпчик '%-.192s' на NOT NULL або використайте інший вказівник таблиці."
+ER_CANT_FIND_UDF  
+        chi "无法加载函数'%-.192s'"
+        cze "Nemohu načíst funkci '%-.192s'"
+        dan "Kan ikke læse funktionen '%-.192s'"
+        eng "Can't load function '%-.192s'"
+        est "Ei suuda avada funktsiooni '%-.192s'"
+        fre "Imposible de charger la fonction '%-.192s'"
+        ger "Kann Funktion '%-.192s' nicht laden"
+        geo "ფუნქციის ('%-.192s') ჩატვირთვა შეუძლებელია"
+        greek "Δεν είναι δυνατή η διαδικασία load για τη συνάρτηση '%-.192s'"
+        hindi "फंक्शन '%-.192s' लोड नहीं किया जा सका"
+        hun "A(z) '%-.192s' fuggveny nem toltheto be"
+        ita "Impossibile caricare la funzione '%-.192s'"
+        jpn "関数 '%-.192s' をロードできません。"
+        kor "'%-.192s' 함수를 로드하지 못했습니다."
+        nla "Kan functie '%-.192s' niet laden"
+        por "Não pode carregar a função '%-.192s'"
+        rum "Nu pot incarca functia '%-.192s'"
+        rus "Невозможно загрузить функцию '%-.192s'"
+        serbian "Ne mogu da učitam funkciju '%-.192s'"
+        slo "Nemôžem načítať funkciu '%-.192s'"
+        spa "No puedo cargar la función '%-.192s'"
+        swe "Kan inte ladda funktionen '%-.192s'"
+        ukr "Не можу завантажити функцію '%-.192s'"
+ER_CANT_INITIALIZE_UDF  
+        chi "无法初始化函数 '%-.192s'; %-.80s"
+        cze "Nemohu inicializovat funkci '%-.192s'; %-.80s"
+        dan "Kan ikke starte funktionen '%-.192s'; %-.80s"
+        eng "Can't initialize function '%-.192s'; %-.80s"
+        est "Ei suuda algväärtustada funktsiooni '%-.192s'; %-.80s"
+        fre "Impossible d'initialiser la fonction '%-.192s'; %-.80s"
+        ger "Kann Funktion '%-.192s' nicht initialisieren: %-.80s"
+        geo "ფუნქციის ('%-.192s') ინიციალიზაციის შეცდომა.'%-.80s'"
+        greek "Δεν είναι δυνατή η έναρξη της συνάρτησης '%-.192s'; %-.80s"
+        hindi "फंक्शन '%-.192s' को प्रारंभ नहीं किया जा सका; %-.80s"
+        hun "A(z) '%-.192s' fuggveny nem inicializalhato; %-.80s"
+        ita "Impossibile inizializzare la funzione '%-.192s'; %-.80s"
+        jpn "関数 '%-.192s' を初期化できません。; %-.80s"
+        kor "'%-.192s' 함수를 초기화 하지 못했습니다.; %-.80s"
+        nla "Kan functie '%-.192s' niet initialiseren; %-.80s"
+        por "Não pode inicializar a função '%-.192s' - '%-.80s'"
+        rum "Nu pot initializa functia '%-.192s'; %-.80s"
+        rus "Невозможно инициализировать функцию '%-.192s'; %-.80s"
+        serbian "Ne mogu da inicijalizujem funkciju '%-.192s'; %-.80s"
+        slo "Nemôžem inicializovať funkciu '%-.192s'; %-.80s"
+        spa "No puedo inicializar la función '%-.192s'; %-.80s"
+        swe "Kan inte initialisera funktionen '%-.192s'; '%-.80s'"
+        ukr "Не можу ініціалізувати функцію '%-.192s'; %-.80s"
+ER_UDF_NO_PATHS  
+        chi "共享库不允许使用路径"
+        cze "Pro sdílenou knihovnu nejsou povoleny cesty"
+        dan "Angivelse af sti ikke tilladt for delt bibliotek"
+        eng "No paths allowed for shared library"
+        est "Teegi nimes ei tohi olla kataloogi"
+        fre "Chemin interdit pour les bibliothèques partagées"
+        ger "Keine Pfade gestattet für Shared Library"
+        geo "გაზიარებული ბიბლიოთეკისთვის ბილიკის მითითება დაუშვებელია"
+        greek "Δεν βρέθηκαν paths για την shared library"
+        hun "Nincs ut a megosztott konyvtarakhoz (shared library)"
+        ita "Non sono ammessi path per le librerie condivisa"
+        jpn "共有ライブラリにはパスを指定できません。"
+        kor "공유 라이버러리를 위한 패스가 정의되어 있지 않습니다."
+        nla "Geen pad toegestaan voor shared library"
+        por "Não há caminhos (paths) permitidos para biblioteca compartilhada"
+        rum "Nici un paths nu e permis pentru o librarie shared"
+        rus "Недопустимо указывать пути для динамических библиотек"
+        serbian "Ne postoje dozvoljene putanje do share-ovane biblioteke"
+        slo "Neprípustné žiadne cesty k zdieľanej knižnici"
+        spa "No existen rutas autorizadas para biblioteca compartida"
+        swe "Man får inte ange sökväg för dynamiska bibliotek"
+        ukr "Не дозволено використовувати путі для розділюваних бібліотек"
+ER_UDF_EXISTS  
+        chi "函数 '%-.192s' 已经存在"
+        cze "Funkce '%-.192s' již existuje"
+        dan "Funktionen '%-.192s' findes allerede"
+        eng "Function '%-.192s' already exists"
+        est "Funktsioon '%-.192s' juba eksisteerib"
+        fre "La fonction '%-.192s' existe déjà"
+        ger "Funktion '%-.192s' existiert schon"
+        geo "ფუნქცია '%-.192s' უკვე არსებობს"
+        greek "Η συνάρτηση '%-.192s' υπάρχει ήδη"
+        hindi "फंक्शन '%-.192s' पहले से मौजूद है"
+        hun "A '%-.192s' fuggveny mar letezik"
+        ita "La funzione '%-.192s' esiste gia`"
+        jpn "関数 '%-.192s' はすでに定義されています。"
+        kor "'%-.192s' 함수는 이미 존재합니다."
+        nla "Functie '%-.192s' bestaat reeds"
+        por "Função '%-.192s' já existe"
+        rum "Functia '%-.192s' exista deja"
+        rus "Функция '%-.192s' уже существует"
+        serbian "Funkcija '%-.192s' već postoji"
+        slo "Funkcia '%-.192s' už existuje"
+        spa "La función '%-.192s' ya existe"
+        swe "Funktionen '%-.192s' finns redan"
+        ukr "Функція '%-.192s' вже існує"
+ER_CANT_OPEN_LIBRARY  
+        chi "不能打开共享库 '%-.192s' (错误号码: %d, %-.128s)"
+        cze "Nemohu otevřít sdílenou knihovnu '%-.192s' (errno: %d, %-.128s)"
+        dan "Kan ikke åbne delt bibliotek '%-.192s' (errno: %d, %-.128s)"
+        eng "Can't open shared library '%-.192s' (errno: %d, %-.128s)"
+        est "Ei suuda avada jagatud teeki '%-.192s' (veakood: %d, %-.128s)"
+        fre "Impossible d'ouvrir la bibliothèque partagée '%-.192s' (errno: %d, %-.128s)"
+        ger "Kann Shared Library '%-.192s' nicht öffnen (Fehler: %d, %-.128s)"
+        geo "გაზიარებული ბიბლიოთეკის ('%-.192s') გახსნის შეცდომა. (შეცდ. კოდი: %d, '%-.128s')"
+        greek "Δεν είναι δυνατή η ανάγνωση της shared library '%-.192s' (κωδικός λάθους: %d, %-.128s)"
+        hun "A(z) '%-.192s' megosztott konyvtar nem hasznalhato (hibakod: %d, %-.128s)"
+        ita "Impossibile aprire la libreria condivisa '%-.192s' (errno: %d, %-.128s)"
+        jpn "共有ライブラリ '%-.192s' を開く事ができません。(エラー番号: %d, %-.128s)"
+        kor "'%-.192s' 공유 라이버러리를 열수 없습니다.(에러번호: %d, %-.128s)"
+        nla "Kan shared library '%-.192s' niet openen (Errcode: %d, %-.128s)"
+        nor "Can't open shared library '%-.192s' (errno: %d, %-.128s)"
+        norwegian-ny "Can't open shared library '%-.192s' (errno: %d, %-.128s)"
+        pol "Can't open shared library '%-.192s' (errno: %d, %-.128s)"
+        por "Não pode abrir biblioteca compartilhada '%-.192s' (erro no. %d, %-.128s)"
+        rum "Nu pot deschide libraria shared '%-.192s' (Eroare: %d, %-.128s)"
+        rus "Невозможно открыть динамическую библиотеку '%-.192s' (ошибка: %d, %-.128s)"
+        serbian "Ne mogu da otvorim share-ovanu biblioteku '%-.192s' (errno: %d, %-.128s)"
+        slo "Nemôžem otvoriť zdieľanú knižnicu '%-.192s' (chybový kód: %d, %-.128s)"
+        spa "No puedo abrir la biblioteca compartida '%-.192s' (error: %d, %-.128s)"
+        swe "Kan inte öppna det dynamiska biblioteket '%-.192s' (Felkod: %d, %-.128s)"
+        ukr "Не можу відкрити розділювану бібліотеку '%-.192s' (помилка: %d, %-.128s)"
+ER_CANT_FIND_DL_ENTRY
+        chi "在库中找不到符号 '%-.128s'"
+        cze "Nemohu najít funkci '%-.128s' v knihovně"
+        dan "Kan ikke finde funktionen '%-.128s' i bibliotek"
+        eng "Can't find symbol '%-.128s' in library"
+        est "Ei leia funktsiooni '%-.128s' antud teegis"
+        fre "Impossible de trouver la fonction '%-.128s' dans la bibliothèque"
+        ger "Kann Funktion '%-.128s' in der Library nicht finden"
+        geo "ბიბლიოთეკაში სიმბოლო '%-.128s' ვერ ვიპოვე"
+        greek "Δεν είναι δυνατή η ανεύρεση της συνάρτησης '%-.128s' στην βιβλιοθήκη"
+        hun "A(z) '%-.128s' fuggveny nem talalhato a konyvtarban"
+        ita "Impossibile trovare la funzione '%-.128s' nella libreria"
+        jpn "関数 '%-.128s' は共有ライブラリー中にありません。"
+        kor "라이버러리에서 '%-.128s' 함수를 찾을 수 없습니다."
+        nla "Kan functie '%-.128s' niet in library vinden"
+        por "Não pode encontrar a função '%-.128s' na biblioteca"
+        rum "Nu pot gasi functia '%-.128s' in libraria"
+        rus "Невозможно отыскать символ '%-.128s' в библиотеке"
+        serbian "Ne mogu da pronadjem funkciju '%-.128s' u biblioteci"
+        slo "Nemôžem nájsť funkciu '%-.128s' v knižnici"
+        spa "No puedo encontrar el símbolo '%-.128s' en biblioteca"
+        swe "Hittar inte funktionen '%-.128s' in det dynamiska biblioteket"
+        ukr "Не можу знайти функцію '%-.128s' у бібліотеці"
+ER_FUNCTION_NOT_DEFINED  
+        chi "未定义函数 '%-.192s'"
+        cze "Funkce '%-.192s' není definována"
+        dan "Funktionen '%-.192s' er ikke defineret"
+        eng "Function '%-.192s' is not defined"
+        est "Funktsioon '%-.192s' ei ole defineeritud"
+        fre "La fonction '%-.192s' n'est pas définie"
+        ger "Funktion '%-.192s' ist nicht definiert"
+        geo "ფუნქცია '%-.192s' აღწერილი არაა"
+        greek "Η συνάρτηση '%-.192s' δεν έχει ορισθεί"
+        hindi "फंक्शन '%-.192s' की परिभाषा नहीं मिली"
+        hun "A '%-.192s' fuggveny nem definialt"
+        ita "La funzione '%-.192s' non e` definita"
+        jpn "関数 '%-.192s' は定義されていません。"
+        kor "'%-.192s' 함수가 정의되어 있지 않습니다."
+        nla "Functie '%-.192s' is niet gedefinieerd"
+        por "Função '%-.192s' não está definida"
+        rum "Functia '%-.192s' nu e definita"
+        rus "Функция '%-.192s' не определена"
+        serbian "Funkcija '%-.192s' nije definisana"
+        slo "Funkcia '%-.192s' nie je definovaná"
+        spa "La función '%-.192s' no está definida"
+        swe "Funktionen '%-.192s' är inte definierad"
+        ukr "Функцію '%-.192s' не визначено"
+ER_HOST_IS_BLOCKED  
+        chi "主机 '%-.64s' 由于许多连接错误而被阻止;使用 'mariadb-admin flush-hosts' 解除阻塞"
+        cze "Stroj '%-.64s' je zablokován kvůli mnoha chybám při připojování. Odblokujete použitím 'mariadb-admin flush-hosts'"
+        dan "Værten '%-.64s' er blokeret på grund af mange fejlforespørgsler. Lås op med 'mariadb-admin flush-hosts'"
+        eng "Host '%-.64s' is blocked because of many connection errors; unblock with 'mariadb-admin flush-hosts'"
+        est "Masin '%-.64s' on blokeeritud hulgaliste ühendusvigade tõttu. Blokeeringu saab tühistada 'mariadb-admin flush-hosts' käsuga"
+        fre "L'hôte '%-.64s' est bloqué à cause d'un trop grand nombre d'erreur de connexion. Débloquer le par 'mariadb-admin flush-hosts'"
+        ger "Host '%-.64s' blockiert wegen zu vieler Verbindungsfehler. Aufheben der Blockierung mit 'mariadb-admin flush-hosts'"
+        geo "ჰოსტი '%-.64s' დაიბლოკა მაკავშირებისას მეტისმეტად ბევრი შეცდომის გამო. განბლოკეთ ს ბრძანებით 'mariadb-admin flush-hosts'"
+        greek "Ο υπολογιστής '%-.64s' έχει αποκλεισθεί λόγω πολλαπλών λαθών σύνδεσης. Προσπαθήστε να διορώσετε με 'mariadb-admin flush-hosts'"
+        hindi "होस्ट '%-.64s' को कई कनेक्शन में त्रुटियों के कारण ब्लॉक कर दिया गया है; 'mariadb-admin flush-hosts' का इस्तेमाल कर अनब्लॉक करें"
+        hun "A '%-.64s' host blokkolodott, tul sok kapcsolodasi hiba miatt. Hasznalja a 'mariadb-admin flush-hosts' parancsot"
+        ita "Sistema '%-.64s' bloccato a causa di troppi errori di connessione. Per sbloccarlo: 'mariadb-admin flush-hosts'"
+        jpn "接続エラーが多いため、ホスト '%-.64s' は拒否されました。'mariadb-admin flush-hosts' で解除できます。"
+        kor "너무 많은 연결오류로 인하여 호스트 '%-.64s'는 블락되었습니다. 'mariadb-admin flush-hosts'를 이용하여 블락을 해제하세요"
+        nla "Host '%-.64s' is geblokkeeerd vanwege te veel verbindings fouten. Deblokkeer met 'mariadb-admin flush-hosts'"
+        por "'Host' '%-.64s' está bloqueado devido a muitos erros de conexão. Desbloqueie com 'mariadb-admin flush-hosts'"
+        rum "Host-ul '%-.64s' e blocat din cauza multelor erori de conectie. Poti deploca folosind 'mariadb-admin flush-hosts'"
+        rus "Хост '%-.64s' заблокирован из-за слишком большого количества ошибок соединения. Разблокировать его можно с помощью 'mariadb-admin flush-hosts'"
+        serbian "Host '%-.64s' je blokiran zbog previše grešaka u konekciji.  Možete ga odblokirati pomoću komande 'mariadb-admin flush-hosts'"
+        spa "El equipo '%-.64s' está bloqueado debido a muchos errores de conexión; desbloquea con 'mariadb-admin flush-hosts'"
+        swe "Denna dator, '%-.64s', är blockerad pga många felaktig paket. Gör 'mariadb-admin flush-hosts' för att ta bort alla blockeringarna"
+        ukr "Хост '%-.64s' заблоковано з причини великої кількості помилок з'єднання. Для розблокування використовуйте 'mariadb-admin flush-hosts'"
+ER_HOST_NOT_PRIVILEGED  
+        chi "Host'%-.64s'不允许连接到此MariaDB服务器"
+        cze "Stroj '%-.64s' nemá povoleno se k tomuto MariaDB serveru připojit"
+        dan "Værten '%-.64s' kan ikke tilkoble denne MariaDB-server"
+        eng "Host '%-.64s' is not allowed to connect to this MariaDB server"
+        est "Masinal '%-.64s' puudub ligipääs sellele MariaDB serverile"
+        fre "Le hôte '%-.64s' n'est pas authorisé à se connecter à ce serveur MariaDB"
+        ger "Host '%-.64s' hat keine Berechtigung, sich mit diesem MariaDB-Server zu verbinden"
+        geo "ჰოსტს '%-.64s' MariaDB-ის ამ სერვერთან მიერთება აკრძალული აქვს"
+        greek "Ο υπολογιστής '%-.64s' δεν έχει δικαίωμα σύνδεσης με τον MariaDB server"
+        hindi "होस्ट '%-.64s' को इस MariaDB सर्वर से कनेक्ट करने के लिए अनुमति नहीं है"
+        hun "A '%-.64s' host szamara nem engedelyezett a kapcsolodas ehhez a MariaDB szerverhez"
+        ita "Al sistema '%-.64s' non e` consentita la connessione a questo server MariaDB"
+        jpn "ホスト '%-.64s' からのこの MariaDB server への接続は許可されていません。"
+        kor "'%-.64s' 호스트는 이 MariaDB서버에 접속할 허가를 받지 못했습니다."
+        nla "Het is host '%-.64s' is niet toegestaan verbinding te maken met deze MariaDB server"
+        por "'Host' '%-.64s' não tem permissão para se conectar com este servidor MariaDB"
+        rum "Host-ul '%-.64s' nu este permis a se conecta la aceste server MariaDB"
+        rus "Хосту '%-.64s' не разрешается подключаться к этому серверу MariaDB"
+        serbian "Host-u '%-.64s' nije dozvoljeno da se konektuje na ovaj MariaDB server"
+        spa "El equipo '%-.64s' no está autorizado a conectar con este servidor MariaDB"
+        swe "Denna dator, '%-.64s', har inte privileger att använda denna MariaDB server"
+        ukr "Хосту '%-.64s' не доволено зв'язуватись з цим сервером MariaDB"
+ER_PASSWORD_ANONYMOUS_USER 42000 
+        chi "您正在以匿名用户身份使用 MariaDB,匿名用户不能修改用户设置"
+        cze "Používáte MariaDB jako anonymní uživatel a anonymní uživatelé nemají povoleno měnit hesla"
+        dan "Du bruger MariaDB som anonym bruger. Anonyme brugere må ikke ændre adgangskoder"
+        eng "You are using MariaDB as an anonymous user and anonymous users are not allowed to modify user settings"
+        est "Te kasutate MariaDB-i anonüümse kasutajana, kelledel pole parooli muutmise õigust"
+        fre "Vous utilisez un utilisateur anonyme et les utilisateurs anonymes ne sont pas autorisés à changer les mots de passe"
+        ger "Sie benutzen MariaDB als anonymer Benutzer und dürfen daher keine Passwörter ändern"
+        geo "MariaDB-ს ანონიმური მომხმარებლით იყენებთ და ანონიმურ მომხმარებლებს მომხმარებლების პარამეტრები შეცვლის უფლება არ გააჩნიათ"
+        greek "Χρησιμοποιείτε την MariaDB σαν anonymous user και έτσι δεν μπορείτε να αλλάξετε τα passwords άλλων χρηστών"
+        hindi "आप MariaDB का उपयोग एक बेनाम यूज़र की तरह कर रहे हैं; बेनाम यूज़र्स को 'यूज़र सेटिंग्स' बदलने की अनुमति नहीं है"
+        hun "Nevtelen (anonymous) felhasznalokent nem negedelyezett a jelszovaltoztatas"
+        ita "Impossibile cambiare la password usando MariaDB come utente anonimo"
+        jpn "MariaDB を匿名ユーザーで使用しているので、パスワードの変更はできません。"
+        kor "당신은 MariaDB서버에 익명의 사용자로 접속을 하셨습니다.익명의 사용자는 암호를 변경할 수 없습니다."
+        nla "U gebruikt MariaDB als anonieme gebruiker en deze mogen geen wachtwoorden wijzigen"
+        por "Você está usando o MariaDB como usuário anônimo e usuários anônimos não têm permissão para mudar senhas"
+        rum "Dumneavoastra folositi MariaDB ca un utilizator anonim si utilizatorii anonimi nu au voie sa schimbe setarile utilizatorilor"
+        rus "Вы используете MariaDB от имени анонимного пользователя, а анонимным пользователям не разрешается менять пароли"
+        serbian "Vi koristite MariaDB kao anonimni korisnik a anonimnim korisnicima nije dozvoljeno da menjaju lozinke"
+        spa "Está usando MariaDB como un usuario anónimo y lo usuarios anónimos no tienen permiso para cambiar las propiedades de usuario"
+        swe "Du använder MariaDB som en anonym användare och som sådan får du inte ändra ditt lösenord"
+        ukr "Ви використовуєте MariaDB як анонімний користувач, тому вам не дозволено змінювати паролі"
+ER_PASSWORD_NOT_ALLOWED 42000 
+        chi "您必须具有更新 MariaDB 数据库中的表的权限才能更改其他人的密码"
+        cze "Na změnu hesel ostatním musíte mít právo provést update tabulek v databázi mysql"
+        dan "Du skal have tilladelse til at opdatere tabeller i MariaDB databasen for at ændre andres adgangskoder"
+        eng "You must have privileges to update tables in the mysql database to be able to change passwords for others"
+        est "Teiste paroolide muutmiseks on nõutav tabelite muutmisõigus 'mysql' andmebaasis"
+        fre "Vous devez avoir le privilège update sur les tables de la base de donnée mysql pour pouvoir changer les mots de passe des autres"
+        ger "Sie benötigen die Berechtigung zum Aktualisieren von Tabellen in der Datenbank 'mysql', um die Passwörter anderer Benutzer ändern zu können"
+        geo "სხვებისთვის პაროლების შესაცვლელდ mysql მონაცემთა ბაზაში ცხრილების განახლების უფლება უნდა გქონდეთ"
+        greek "Πρέπει να έχετε δικαίωμα διόρθωσης πινάκων (update) στη βάση δεδομένων mysql για να μπορείτε να αλλάξετε τα passwords άλλων χρηστών"
+        hun "Onnek tabla-update joggal kell rendelkeznie a mysql adatbazisban masok jelszavanak megvaltoztatasahoz"
+        ita "E` necessario il privilegio di update sulle tabelle del database mysql per cambiare le password per gli altri utenti"
+        jpn "他のユーザーのパスワードを変更するためには、mysqlデータベースの表を更新する権限が必要です。"
+        kor "당신은 다른사용자들의 암호를 변경할 수 있도록 데이타베이스 변경권한을 가져야 합니다."
+        nla "U moet tabel update priveleges hebben in de MariaDB database om wachtwoorden voor anderen te mogen wijzigen"
+        por "Você deve ter privilégios para atualizar tabelas no banco de dados mysql para ser capaz de mudar a senha de outros"
+        rum "Trebuie sa aveti privilegii sa actualizati tabelele in bazele de date mysql ca sa puteti sa schimati parolele altora"
+        rus "Для того чтобы изменять пароли других пользователей, у вас должны быть привилегии на изменение таблиц в базе данных mysql"
+        serbian "Morate imati privilegije da možete da update-ujete određene tabele ako želite da menjate lozinke za druge korisnike"
+        spa "Vd debe de tener privilegios para actualizar tablas en la base de datos mysql para poder cambiar las contraseñas de otros"
+        swe "För att ändra lösenord för andra måste du ha rättigheter att uppdatera mysql-databasen"
+        ukr "Ви повині мати право на оновлення таблиць у базі данних mysql, аби мати можливість змінювати пароль іншим"
+ER_PASSWORD_NO_MATCH 28000 
+        chi "在用户表中找不到任何匹配的行"
+        cze "V tabulce user není žádný odpovídající řádek"
+        dan "Kan ikke finde nogen tilsvarende poster i bruger tabellen"
+        eng "Can't find any matching row in the user table"
+        est "Ei leia vastavat kirjet kasutajate tabelis"
+        fre "Impossible de trouver un enregistrement correspondant dans la table user"
+        ger "Kann keinen passenden Datensatz in Tabelle 'user' finden"
+        geo "მომხმარებლის ცხრილში შესატყვისი მწკრივი ვერ ვიპოვე"
+        greek "Δεν είναι δυνατή η ανεύρεση της αντίστοιχης εγγραφής στον πίνακα των χρηστών"
+        hindi "यूज़र टेबल में रिकॉर्ड नहीं मिला"
+        hun "Nincs megegyezo sor a user tablaban"
+        ita "Impossibile trovare la riga corrispondente nella tabella user"
+        jpn "ユーザーテーブルに該当するレコードが見つかりません。"
+        kor "사용자 테이블에서 일치하는 것을 찾을 수 없습니다."
+        nla "Kan geen enkele passende rij vinden in de gebruikers tabel"
+        por "Não pode encontrar nenhuma linha que combine na tabela usuário (user table)"
+        rum "Nu pot gasi nici o linie corespunzatoare in tabela utilizatorului"
+        rus "Невозможно отыскать подходящую запись в таблице пользователей"
+        serbian "Ne mogu da pronađem odgovarajući slog u 'user' tabeli"
+        spa "No puedo encontrar una fila coincidente en la tabla de usuario"
+        swe "Hittade inte användaren i 'user'-tabellen"
+        ukr "Не можу знайти відповідних записів у таблиці користувача"
+ER_UPDATE_INFO  
+        chi "匹配行:%ld已更改:%ld警告:%ld"
+        cze "Nalezených řádků: %ld  Změněno: %ld  Varování: %ld"
+        dan "Poster fundet: %ld  Ændret: %ld  Advarsler: %ld"
+        eng "Rows matched: %ld  Changed: %ld  Warnings: %ld"
+        est "Sobinud kirjeid: %ld  Muudetud: %ld  Hoiatusi: %ld"
+        fre "Enregistrements correspondants: %ld  Modifiés: %ld  Warnings: %ld"
+        ger "Datensätze gefunden: %ld  Geändert: %ld  Warnungen: %ld"
+        geo "ემთხვევა მწკრივები: %ld შეიცვალა: %ld გაფრთხილებები: %ld"
+        hun "Megegyezo sorok szama: %ld  Valtozott: %ld  Warnings: %ld"
+        ita "Rows riconosciute: %ld  Cambiate: %ld  Warnings: %ld"
+        jpn "該当した行: %ld  変更: %ld  警告: %ld"
+        kor "일치하는 Rows : %ld개 변경됨: %ld개  경고: %ld개"
+        nla "Passende rijen: %ld  Gewijzigd: %ld  Waarschuwingen: %ld"
+        por "Linhas que combinaram: %ld - Alteradas: %ld - Avisos: %ld"
+        rum "Linii identificate (matched): %ld  Schimbate: %ld  Atentionari (warnings): %ld"
+        rus "Совпало записей: %ld  Изменено: %ld  Предупреждений: %ld"
+        serbian "Odgovarajućih slogova: %ld  Promenjeno: %ld  Upozorenja: %ld"
+        spa "Líneas coincidentes: %ld Cambiadas: %ld Avisos: %ld"
+        swe "Rader: %ld  Uppdaterade: %ld  Varningar: %ld"
+        ukr "Записів відповідає: %ld  Змінено: %ld  Застережень: %ld"
+ER_CANT_CREATE_THREAD  
+        chi "无法创建新线程 (错误号码 %M); 如果您没有用完剩余内存,您可以查阅文档以了解可能与操作系统相关的错误"
+        cze "Nemohu vytvořit nový thread (errno %M). Pokud je ještě nějaká volná paměť, podívejte se do manuálu na část o chybách specifických pro jednotlivé operační systémy"
+        dan "Kan ikke danne en ny tråd (fejl nr. %M). Hvis computeren ikke er løbet tør for hukommelse, kan du se i brugervejledningen for en mulig operativ-system - afhængig fejl"
+        eng "Can't create a new thread (errno %M); if you are not out of available memory, you can consult the manual for a possible OS-dependent bug"
+        est "Ei suuda luua uut lõime (veakood %M). Kui mälu ei ole otsas, on tõenäoliselt tegemist operatsioonisüsteemispetsiifilise veaga"
+        fre "Impossible de créer une nouvelle tâche (errno %M). S'il reste de la mémoire libre, consultez le manual pour trouver un éventuel bug dépendant de l'OS"
+        ger "Kann keinen neuen Thread erzeugen (Fehler: %M). Sollte noch Speicher verfügbar sein, bitte im Handbuch wegen möglicher Fehler im Betriebssystem nachschlagen"
+        geo "ახალი ნაკადის შექმნა შეუძლებელია (შეცდ. კოდი %M). თუ მეხსიერება ნამდვილად საკმარისია, გადაავლეთ თვალი დოკუმენტაციას, გადაამოწმეთ ოს-ზე დამოკიდებული შეცდომის არსებობა"
+        hun "Uj thread letrehozasa nem lehetseges (Hibakod: %M). Amenyiben van meg szabad memoria, olvassa el a kezikonyv operacios rendszerfuggo hibalehetosegekrol szolo reszet"
+        ita "Impossibile creare un nuovo thread (errno %M). Se non ci sono problemi di memoria disponibile puoi consultare il manuale per controllare possibili problemi dipendenti dal SO"
+        jpn "新規にスレッドを作成できません。(エラー番号 %M) もしも使用可能メモリーの不足でなければ、OS依存のバグである可能性があります。"
+        kor "새로운 쓰레드를 만들 수 없습니다.(에러번호 %M). 만약 여유메모리가 있다면 OS-dependent버그 의 메뉴얼 부분을 찾아보시오."
+        nla "Kan geen nieuwe thread aanmaken (Errcode: %M). Indien er geen tekort aan geheugen is kunt u de handleiding consulteren over een mogelijke OS afhankelijke fout"
+        nor "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
+        norwegian-ny "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
+        pol "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
+        por "Não pode criar uma nova 'thread' (erro no. %M). Se você não estiver sem memória disponível, você pode consultar o manual sobre um possível 'bug' dependente do sistema operacional"
+        rum "Nu pot crea un thread nou (Eroare %M). Daca mai aveti memorie disponibila in sistem, puteti consulta manualul - ar putea exista un potential bug in legatura cu sistemul de operare"
+        rus "Невозможно создать новый поток (ошибка %M). Если это не ситуация, связанная с нехваткой памяти, то вам следует изучить документацию на предмет описания возможной ошибки работы в конкретной ОС"
+        serbian "Ne mogu da kreiram novi thread (errno %M). Ako imate još slobodne memorije, trebali biste da pogledate u priručniku da li je ovo specifična greška vašeg operativnog sistema"
+        spa "No puedo crear un nuevo hilo (thread) (error %M). Si no está falto de memoria disponible, vd puede consultar el manual para un posible error dependiente del SO"
+        swe "Kan inte skapa en ny tråd (errno %M)"
+        ukr "Не можу створити нову гілку (помилка %M). Якщо ви не використали усю пам'ять, то прочитайте документацію до вашої ОС - можливо це помилка ОС"
+ER_WRONG_VALUE_COUNT_ON_ROW 21S01 
+        chi "列计数与行%lu的值计数不匹配"
+        cze "Počet sloupců neodpovídá počtu hodnot na řádku %lu"
+        dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %lu"
+        eng "Column count doesn't match value count at row %lu"
+        est "Tulpade hulk erineb väärtuste hulgast real %lu"
+        ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %lu überein"
+        geo "სვეტების რაოდენობა ჩანაწერში %lu მითითებულ რაოდენობას არ ემთხვევა"
+        hun "Az oszlopban talalhato ertek nem egyezik meg a %lu sorban szamitott ertekkel"
+        ita "Il numero delle colonne non corrisponde al conteggio alla riga %lu"
+        jpn "%lu 行目で、列の数が値の数と一致しません。"
+        kor "Row %lu에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
+        nla "Kolom aantal komt niet overeen met waarde aantal in rij %lu"
+        por "Contagem de colunas não confere com a contagem de valores na linha %lu"
+        rum "Numarul de coloane nu corespunde cu numarul de valori la linia %lu"
+        rus "Количество столбцов не совпадает с количеством значений в записи %lu"
+        serbian "Broj kolona ne odgovara broju vrednosti u slogu %lu"
+        spa "El número de columnas no se corresponde con el número de valores en la línea %lu"
+        swe "Antalet kolumner motsvarar inte antalet värden på rad: %lu"
+        ukr "Кількість стовбців не співпадає з кількістю значень у строці %lu"
+ER_CANT_REOPEN_TABLE  
+        chi "无法重新打开表:'%-.192s'"
+        cze "Nemohu znovuotevřít tabulku: '%-.192s"
+        dan "Kan ikke genåbne tabel '%-.192s"
+        eng "Can't reopen table: '%-.192s'"
+        est "Ei suuda taasavada tabelit '%-.192s'"
+        fre "Impossible de réouvrir la table: '%-.192s"
+        ger "Kann Tabelle'%-.192s' nicht erneut öffnen"
+        geo "ცხრილის თავიდან გახსნის შეცდომა: '%-.192s'"
+        hindi "टेबल '%-.192s' फिर से खोल नहीं सकते"
+        hun "Nem lehet ujra-megnyitni a tablat: '%-.192s"
+        ita "Impossibile riaprire la tabella: '%-.192s'"
+        jpn "表を再オープンできません。: '%-.192s'"
+        kor "테이블을 다시 열수 없군요: '%-.192s"
+        nla "Kan tabel niet opnieuw openen: '%-.192s"
+        nor "Can't reopen table: '%-.192s"
+        norwegian-ny "Can't reopen table: '%-.192s"
+        pol "Can't reopen table: '%-.192s"
+        por "Não pode reabrir a tabela '%-.192s"
+        rum "Nu pot redeschide tabela: '%-.192s'"
+        rus "Невозможно заново открыть таблицу '%-.192s'"
+        serbian "Ne mogu da ponovo otvorim tabelu '%-.192s'"
+        slo "Can't reopen table: '%-.192s"
+        spa "No puedo reabrir la tabla: '%-.192s"
+        swe "Kunde inte stänga och öppna tabell '%-.192s"
+        ukr "Не можу перевідкрити таблицю: '%-.192s'"
+ER_INVALID_USE_OF_NULL 22004 
+        chi "无效使用 NULL 值"
+        cze "Neplatné užití hodnoty NULL"
+        dan "Forkert brug af nulværdi (NULL)"
+        eng "Invalid use of NULL value"
+        est "NULL väärtuse väärkasutus"
+        fre "Utilisation incorrecte de la valeur NULL"
+        ger "Unerlaubte Verwendung eines NULL-Werts"
+        geo "NULL მნიშვნელობის არასწორი გამოყენება"
+        hindi "NULL मान का अवैध उपयोग"
+        hun "A NULL ervenytelen hasznalata"
+        ita "Uso scorretto del valore NULL"
+        jpn "NULL 値の使用方法が不適切です。"
+        kor "NULL 값을 잘못 사용하셨군요..."
+        nla "Foutief gebruik van de NULL waarde"
+        por "Uso inválido do valor NULL"
+        rum "Folosirea unei value NULL e invalida"
+        rus "Неправильное использование величины NULL"
+        serbian "Pogrešna upotreba vrednosti NULL"
+        spa "Uso inválido del valor NULL"
+        swe "Felaktig använding av NULL"
+        ukr "Хибне використання значення NULL"
+ER_REGEXP_ERROR 42000 
+        chi "正则表达错误 '%s'"
+        cze "Regulární výraz vrátil chybu: %s"
+        dan "Fik fejl '%s' fra regexp"
+        eng "Regex error '%s'"
+        est "regexp tagastas vea: %s"
+        fre "Erreur '%s' provenant de regexp"
+        ger "Regexp Fehler %s"
+        geo "რეგულარული გამოსახულების შეცდომა '%s'"
+        hindi "regexp में '%s' त्रुटि हुई"
+        hun "'%s' hiba a regularis kifejezes hasznalata soran (regexp)"
+        ita "Errore '%s' da regexp"
+        jpn "regexp がエラー '%s' を返しました。"
+        kor "regexp에서 '%s'가 났습니다."
+        nla "Fout '%s' ontvangen van regexp"
+        por "Obteve erro '%s' em regexp"
+        rum "Eroarea '%s' obtinuta din expresia regulara (regexp)"
+        rus "Ошибка регулярного выражения: %s"
+        serbian "Funkcija regexp je vratila grešku: %s"
+        spa "Obtenido error '%s' de regexp"
+        swe "Fick fel '%s' från REGEXP"
+        ukr "Помилка регулярного виразу: %s"
+ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000 
+        chi "如果没有 GROUP BY 子句,不能混合没有 GROUP 列的 GROUP 列 (MIN(),MAX(),COUNT(),...)"
+        cze "Pokud není žádná GROUP BY klauzule, není dovoleno současné použití GROUP položek (MIN(),MAX(),COUNT()...) s ne GROUP položkami"
+        dan "Sammenblanding af GROUP kolonner (MIN(),MAX(),COUNT()...) uden GROUP kolonner er ikke tilladt, hvis der ikke er noget GROUP BY prædikat"
+        eng "Mixing of GROUP columns (MIN(),MAX(),COUNT(),...) with no GROUP columns is illegal if there is no GROUP BY clause"
+        est "GROUP tulpade (MIN(),MAX(),COUNT()...) kooskasutamine tavaliste tulpadega ilma GROUP BY klauslita ei ole lubatud"
+        fre "Mélanger les colonnes GROUP (MIN(),MAX(),COUNT()...) avec des colonnes normales est interdit s'il n'y a pas de clause GROUP BY"
+        ger "Das Vermischen von GROUP-Feldern (MIN(),MAX(),COUNT()...) mit Nicht-GROUP-Feldern ist nicht zulässig, wenn keine GROUP-BY-Klausel vorhanden ist"
+        geo "GROUP-ის სვეტების (MIN(),MAX(),COUNT(),...) შერევა არა-GROUP-ის სვეტებთან დაუშვებელია, თუ თუ გამოსახულებაში არსებობს GROUP BY"
+        hun "A GROUP mezok (MIN(),MAX(),COUNT()...) kevert hasznalata nem lehetseges GROUP BY hivatkozas nelkul"
+        ita "Il mescolare funzioni di aggregazione (MIN(),MAX(),COUNT()...) e non e` illegale se non c'e` una clausula GROUP BY"
+        jpn "GROUP BY句が無い場合、集計関数(MIN(),MAX(),COUNT(),...)と通常の列を同時に使用できません。"
+        kor "GROUP BY 절 없이 혼합된 GROUP 함수 (MIN(),MAX(),COUNT(),...) 를 사용할 수 없습니다."
+        nla "Het mixen van GROUP kolommen (MIN(),MAX(),COUNT()...) met no-GROUP kolommen is foutief indien er geen GROUP BY clausule is"
+        por "Mistura de colunas agrupadas (com MIN(), MAX(), COUNT(), ...) com colunas não agrupadas é ilegal, se não existir uma cláusula de agrupamento (cláusula GROUP BY)"
+        rum "Amestecarea de coloane GROUP (MIN(),MAX(),COUNT()...) fara coloane GROUP este ilegala daca nu exista o clauza GROUP BY"
+        rus "Одновременное использование сгруппированных (GROUP) столбцов (MIN(),MAX(),COUNT(),...) с несгруппированными столбцами является некорректным, если в выражении есть GROUP BY"
+        serbian "Upotreba agregatnih funkcija (MIN(),MAX(),COUNT()...) bez 'GROUP' kolona je pogrešna ako ne postoji 'GROUP BY' iskaz"
+        spa "La mezcla de columnas GROUP (MIN(),MAX(),COUNT()...) con columnas no GROUP es ilegal si no exite la cláusula GROUP BY"
+        swe "Man får ha både GROUP-kolumner (MIN(),MAX(),COUNT()...) och fält i en fråga om man inte har en GROUP BY-del"
+        ukr "Змішування GROUP стовбців (MIN(),MAX(),COUNT()...) з не GROUP стовбцями є забороненим, якщо не має GROUP BY"
+ER_NONEXISTING_GRANT 42000 
+        chi "用户 '%-.48s' 来自主机 '%-.64s'没有此类授权"
+        cze "Neexistuje odpovídající grant pro uživatele '%-.48s' na stroji '%-.64s'"
+        dan "Denne tilladelse findes ikke for brugeren '%-.48s' på vært '%-.64s'"
+        eng "There is no such grant defined for user '%-.48s' on host '%-.64s'"
+        est "Sellist õigust ei ole defineeritud kasutajale '%-.48s' masinast '%-.64s'"
+        fre "Un tel droit n'est pas défini pour l'utilisateur '%-.48s' sur l'hôte '%-.64s'"
+        ger "Für Benutzer '%-.48s' auf Host '%-.64s' gibt es keine solche Berechtigung"
+        geo "მომხმარებლისთვის '%-.48s' ჰოსტზე '%-.64s' ასეთი უფლებები აღწერილი არაა"
+        hun "A '%-.48s' felhasznalonak nincs ilyen joga a '%-.64s' host-on"
+        ita "GRANT non definita per l'utente '%-.48s' dalla macchina '%-.64s'"
+        jpn "ユーザー '%-.48s' (ホスト '%-.64s' 上) は許可されていません。"
+        kor "사용자 '%-.48s' (호스트 '%-.64s')를 위하여 정의된 그런 승인은 없습니다."
+        nla "Deze toegang (GRANT) is niet toegekend voor gebruiker '%-.48s' op host '%-.64s'"
+        por "Não existe tal permissão (grant) definida para o usuário '%-.48s' no 'host' '%-.64s'"
+        rum "Nu exista un astfel de grant definit pentru utilzatorul '%-.48s' de pe host-ul '%-.64s'"
+        rus "Такие права не определены для пользователя '%-.48s' на хосте '%-.64s'"
+        serbian "Ne postoji odobrenje za pristup korisniku '%-.48s' na host-u '%-.64s'"
+        spa "No existe tal concesión (grant) definida para usuario '%-.48s' en el equipo '%-.64s'"
+        swe "Det finns inget privilegium definierat för användare '%-.48s' på '%-.64s'"
+        ukr "Повноважень не визначено для користувача '%-.48s' з хосту '%-.64s'"
+ER_TABLEACCESS_DENIED_ERROR 42000 
+        chi "%-.100T 命令的权限拒绝用户 '%s'@'%s' 用在表 %`s.%`s"
+        cze "%-.100T příkaz nepřístupný pro uživatele: '%s'@'%s' pro tabulku %`s.%`s"
+        dan "%-.100T-kommandoen er ikke tilladt for brugeren '%s'@'%s' for tabellen %`s.%`s"
+        nla "%-.100T commando geweigerd voor gebruiker: '%s'@'%s' voor tabel %`s.%`s"
+        eng "%-.100T command denied to user '%s'@'%s' for table %`s.%`s"
+        est "%-.100T käsk ei ole lubatud kasutajale '%s'@'%s' tabelis %`s.%`s"
+        fre "La commande '%-.100T' est interdite à l'utilisateur: '%s'@'%s' sur la table %`s.%`s"
+        ger "%-.100T Befehl nicht erlaubt für Benutzer '%s'@'%s' auf Tabelle %`s.%`s"
+        geo "%-.100T ბრძანება უარყოფილია მომხმარებლისთვის '%s'@'%s' ცხრილისთვის %`s.%`s"
+        hun "%-.100T parancs a '%s'@'%s' felhasznalo szamara nem engedelyezett a %`s.%`s tablaban"
+        ita "Comando %-.100T negato per l'utente: '%s'@'%s' sulla tabella %`s.%`s"
+        jpn "コマンド %-.100T は ユーザー '%s'@'%s' ,テーブル %`s.%`s に対して許可されていません"
+        kor "'%-.100T' 명령은 다음 사용자에게 거부되었습니다. : '%s'@'%s' for 테이블 %`s.%`s"
+        por "Comando '%-.100T' negado para o usuário '%s'@'%s' na tabela %`s.%`s"
+        rum "Comanda %-.100T interzisa utilizatorului: '%s'@'%s' pentru tabela %`s.%`s"
+        rus "Команда %-.100T запрещена пользователю '%s'@'%s' для таблицы %`s.%`s"
+        serbian "%-.100T komanda zabranjena za korisnika '%s'@'%s' za tabelu %`s.%`s"
+        spa "%-.100T comando denegado a usuario '%s'@'%s' para la tabla %`s.%`s"
+        swe "%-.100T ej tillåtet för '%s'@'%s' för tabell %`s.%`s"
+        ukr "%-.100T команда заборонена користувачу: '%s'@'%s' у таблиці %`s.%`s"
+ER_COLUMNACCESS_DENIED_ERROR 42000 
+        chi "%-.32s 命令的权限拒绝用户 '%s'@'%s' 用在列 '%-.192s' 在表 '%-.192s'"
+        cze "%-.32s příkaz nepřístupný pro uživatele: '%s'@'%s' pro sloupec '%-.192s' v tabulce '%-.192s'"
+        dan "%-.32s-kommandoen er ikke tilladt for brugeren '%s'@'%s' for kolonne '%-.192s' in tabellen '%-.192s'"
+        eng "%-.32s command denied to user '%s'@'%s' for column '%-.192s' in table '%-.192s'"
+        est "%-.32s käsk ei ole lubatud kasutajale '%s'@'%s' tulbale '%-.192s' tabelis '%-.192s'"
+        fre "La commande '%-.32s' est interdite à l'utilisateur: '%s'@'%s' sur la colonne '%-.192s' de la table '%-.192s'"
+        ger "%-.32s Befehl nicht erlaubt für Benutzer '%s'@'%s' und Feld '%-.192s' in Tabelle '%-.192s'"
+        geo "ბრძანება %-.32s უარყოფილია მომხმარებლისთვის '%s'@'%s' სვეტისთვის '%-.192s' ცხრილში '%-.192s'"
+        hun "%-.32s parancs a '%s'@'%s' felhasznalo szamara nem engedelyezett a '%-.192s' mezo eseten a '%-.192s' tablaban"
+        ita "Comando %-.32s negato per l'utente: '%s'@'%s' sulla colonna '%-.192s' della tabella '%-.192s'"
+        jpn "コマンド %-.32s は ユーザー '%s'@'%s'\n カラム '%-.192s' テーブル '%-.192s' に対して許可されていません"
+        kor "'%-.32s' 명령은 다음 사용자에게 거부되었습니다. : '%s'@'%s' for 칼럼 '%-.192s' in 테이블 '%-.192s'"
+        nla "%-.32s commando geweigerd voor gebruiker: '%s'@'%s' voor kolom '%-.192s' in tabel '%-.192s'"
+        por "Comando '%-.32s' negado para o usuário '%s'@'%s' na coluna '%-.192s', na tabela '%-.192s'"
+        rum "Comanda %-.32s interzisa utilizatorului: '%s'@'%s' pentru coloana '%-.192s' in tabela '%-.192s'"
+        rus "Команда %-.32s запрещена пользователю '%s'@'%s' для столбца '%-.192s' в таблице '%-.192s'"
+        serbian "%-.32s komanda zabranjena za korisnika '%s'@'%s' za kolonu '%-.192s' iz tabele '%-.192s'"
+        spa "%-.32s comando denegado a el usuario '%s'@'%s' para la columna '%-.192s' en la tabla '%-.192s'"
+        swe "%-.32s ej tillåtet för '%s'@'%s' för kolumn '%-.192s' i tabell '%-.192s'"
+        ukr "%-.32s команда заборонена користувачу: '%s'@'%s' для стовбця '%-.192s' у таблиці '%-.192s'"
+ER_ILLEGAL_GRANT_FOR_TABLE 42000 
+        chi "非法的 GRANT/REVOKE 命令;请查阅文档查看可以使用哪些权限"
+        cze "Neplatný příkaz GRANT/REVOKE. Prosím, přečtěte si v manuálu, jaká privilegia je možné použít"
+        dan "Forkert GRANT/REVOKE kommando. Se i brugervejledningen hvilke privilegier der kan specificeres"
+        eng "Illegal GRANT/REVOKE command; please consult the manual to see which privileges can be used"
+        est "Vigane GRANT/REVOKE käsk. Tutvu kasutajajuhendiga"
+        fre "Commande GRANT/REVOKE incorrecte. Consultez le manuel"
+        ger "Unzulässiger GRANT- oder REVOKE-Befehl. Verfügbare Berechtigungen sind im Handbuch aufgeführt"
+        geo "არასწორი GRANT/REVOKE ბრძანება. გამოყენებადი პრივილეგიების სიის მოსაძებნად სახელმძღვანელოს მიმართეთ"
+        greek "Illegal GRANT/REVOKE command; please consult the manual to see which privileges can be used"
+        hun "Ervenytelen GRANT/REVOKE parancs. Kerem, nezze meg a kezikonyvben, milyen jogok lehetsegesek"
+        ita "Comando GRANT/REVOKE illegale. Prego consultare il manuale per sapere quali privilegi possono essere usati"
+        jpn "不正な GRANT/REVOKE コマンドです。どの権限で利用可能かはマニュアルを参照して下さい。"
+        kor "잘못된 GRANT/REVOKE 명령. 어떤 권리와 승인이 사용되어 질 수 있는지 메뉴얼을 보시오."
+        nla "Foutief GRANT/REVOKE commando. Raadpleeg de handleiding welke priveleges gebruikt kunnen worden"
+        nor "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used"
+        norwegian-ny "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used"
+        pol "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used"
+        por "Comando GRANT/REVOKE ilegal. Por favor consulte no manual quais privilégios podem ser usados"
+        rum "Comanda GRANT/REVOKE ilegala. Consultati manualul in privinta privilegiilor ce pot fi folosite"
+        rus "Неверная команда GRANT или REVOKE. Обратитесь к документации, чтобы выяснить, какие привилегии можно использовать"
+        serbian "Pogrešna 'GRANT' odnosno 'REVOKE' komanda. Molim Vas pogledajte u priručniku koje vrednosti mogu biti upotrebljene"
+        slo "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used"
+        spa "Comando GRANT/REVOKE ilegal; por favor, consulte el manual para ver los permisos que se pueden usar"
+        swe "Felaktigt GRANT-privilegium använt"
+        ukr "Хибна GRANT/REVOKE команда; прочитайте документацію стосовно того, які права можна використовувати"
+ER_GRANT_WRONG_HOST_OR_USER 42000 
+        chi "GRANT 语句的主机或用户参数太长"
+        cze "Argument příkazu GRANT uživatel nebo stroj je příliš dlouhý"
+        dan "Værts- eller brugernavn for langt til GRANT"
+        eng "The host or user argument to GRANT is too long"
+        est "Masina või kasutaja nimi GRANT lauses on liiga pikk"
+        fre "L'hôte ou l'utilisateur donné en argument à GRANT est trop long"
+        ger "Das Host- oder User-Argument für GRANT ist zu lang"
+        geo "ჰოსტის ან მომხმარებლის არგუმენტები GRANT-სთვის ძალიან გრძელია"
+        hindi "GRANT के लिए होस्ट या यूज़र आर्गुमेंट बहुत लंबा है"
+        hun "A host vagy felhasznalo argumentuma tul hosszu a GRANT parancsban"
+        ita "L'argomento host o utente per la GRANT e` troppo lungo"
+        jpn "GRANTコマンドへの、ホスト名やユーザー名が長すぎます。"
+        kor "승인(GRANT)을 위하여 사용한 사용자나 호스트의 값들이 너무 깁니다."
+        nla "De host of gebruiker parameter voor GRANT is te lang"
+        por "Argumento de 'host' ou de usuário para o GRANT é longo demais"
+        rum "Argumentul host-ului sau utilizatorului pentru GRANT e prea lung"
+        rus "Слишком длинное имя пользователя/хоста для GRANT"
+        serbian "Argument 'host' ili 'korisnik' prosleđen komandi 'GRANT' je predugačak"
+        spa "El argumento de GRANT para el equipo o usuario es demasiado grande"
+        swe "Felaktigt maskinnamn eller användarnamn använt med GRANT"
+        ukr "Аргумент host або user для GRANT задовгий"
+ER_NO_SUCH_TABLE 42S02 
+        chi "表 '%-.192s.%-.192s' 不存在"
+        cze "Tabulka '%-.192s.%-.192s' neexistuje"
+        dan "Tabellen '%-.192s.%-.192s' eksisterer ikke"
+        eng "Table '%-.192s.%-.192s' doesn't exist"
+        est "Tabelit '%-.192s.%-.192s' ei eksisteeri"
+        fre "La table '%-.192s.%-.192s' n'existe pas"
+        ger "Tabelle '%-.192s.%-.192s' existiert nicht"
+        geo "ცხრილი '%-.192s.%-.192s' არ არსებობს"
+        hindi "टेबल '%-.192s.%-.192s' मौजूद नहीं है"
+        hun "A '%-.192s.%-.192s' tabla nem letezik"
+        ita "La tabella '%-.192s.%-.192s' non esiste"
+        jpn "表 '%-.192s.%-.192s' は存在しません。"
+        kor "테이블 '%-.192s.%-.192s' 는 존재하지 않습니다."
+        nla "Tabel '%-.192s.%-.192s' bestaat niet"
+        nor "Table '%-.192s.%-.192s' doesn't exist"
+        norwegian-ny "Table '%-.192s.%-.192s' doesn't exist"
+        pol "Table '%-.192s.%-.192s' doesn't exist"
+        por "Tabela '%-.192s.%-.192s' não existe"
+        rum "Tabela '%-.192s.%-.192s' nu exista"
+        rus "Таблица '%-.192s.%-.192s' не существует"
+        serbian "Tabela '%-.192s.%-.192s' ne postoji"
+        slo "Table '%-.192s.%-.192s' doesn't exist"
+        spa "La tabla '%-.192s.%-.192s' no existe"
+        swe "Det finns ingen tabell som heter '%-.192s.%-.192s'"
+        ukr "Таблиця '%-.192s.%-.192s' не існує"
+ER_NONEXISTING_TABLE_GRANT 42000 
+        chi "没有为用户 '%-.48s' 来自主机 '%-.64s' 在表 '%-.192s' 上授权"
+        cze "Neexistuje odpovídající grant pro uživatele '%-.48s' na stroji '%-.64s' pro tabulku '%-.192s'"
+        dan "Denne tilladelse eksisterer ikke for brugeren '%-.48s' på vært '%-.64s' for tabellen '%-.192s'"
+        eng "There is no such grant defined for user '%-.48s' on host '%-.64s' on table '%-.192s'"
+        est "Sellist õigust ei ole defineeritud kasutajale '%-.48s' masinast '%-.64s' tabelile '%-.192s'"
+        fre "Un tel droit n'est pas défini pour l'utilisateur '%-.48s' sur l'hôte '%-.64s' sur la table '%-.192s'"
+        ger "Eine solche Berechtigung ist für User '%-.48s' auf Host '%-.64s' an Tabelle '%-.192s' nicht definiert"
+        geo "ასეთი უფლებები აღწერილი არაა მომხმარებლისთვის '%-.48s' ჰოსტზე '%-.64s' ცხრილზე '%-.192s'"
+        hun "A '%-.48s' felhasznalo szamara a '%-.64s' host '%-.192s' tablajaban ez a parancs nem engedelyezett"
+        ita "GRANT non definita per l'utente '%-.48s' dalla macchina '%-.64s' sulla tabella '%-.192s'"
+        jpn "ユーザー '%-.48s' (ホスト '%-.64s' 上) の表 '%-.192s' への権限は定義されていません。"
+        kor "사용자 '%-.48s'(호스트 '%-.64s')는 테이블 '%-.192s'를 사용하기 위하여 정의된 승인은 없습니다. "
+        nla "Deze toegang (GRANT) is niet toegekend voor gebruiker '%-.48s' op host '%-.64s' op tabel '%-.192s'"
+        por "Não existe tal permissão (grant) definido para o usuário '%-.48s' no 'host' '%-.64s', na tabela '%-.192s'"
+        rum "Nu exista un astfel de privilegiu (grant) definit pentru utilizatorul '%-.48s' de pe host-ul '%-.64s' pentru tabela '%-.192s'"
+        rus "Такие права не определены для пользователя '%-.48s' на компьютере '%-.64s' для таблицы '%-.192s'"
+        serbian "Ne postoji odobrenje za pristup korisniku '%-.48s' na host-u '%-.64s' tabeli '%-.192s'"
+        spa "No existe tal concesión (grant) definida para el usuario '%-.48s' en el equipo '%-.64s' en la tabla '%-.192s'"
+        swe "Det finns inget privilegium definierat för användare '%-.48s' på '%-.64s' för tabell '%-.192s'"
+        ukr "Повноважень не визначено для користувача '%-.48s' з хосту '%-.64s' для таблиці '%-.192s'"
+ER_NOT_ALLOWED_COMMAND 42000 
+        chi "本 MariaDB 版本不允许使用这个命令"
+        cze "Použitý příkaz není v této verzi MariaDB povolen"
+        dan "Den brugte kommando er ikke tilladt med denne udgave af MariaDB"
+        eng "The used command is not allowed with this MariaDB version"
+        est "Antud käsk ei ole lubatud käesolevas MariaDB versioonis"
+        fre "Cette commande n'existe pas dans cette version de MariaDB"
+        ger "Der verwendete Befehl ist in dieser MariaDB-Version nicht zulässig"
+        geo "გამოყენებული ბრძანება MariaDB-ის ამ ვერსიაში დაუსვებელია"
+        hindi "यह कमांड इस MariaDB संस्करण के साथ इस्तेमाल नहीं किया जा सकता है"
+        hun "A hasznalt parancs nem engedelyezett ebben a MariaDB verzioban"
+        ita "Il comando utilizzato non e` supportato in questa versione di MariaDB"
+        jpn "このMariaDBバージョンでは利用できないコマンドです。"
+        kor "사용된 명령은 현재의 MariaDB 버젼에서는 이용되지 않습니다."
+        nla "Het used commando is niet toegestaan in deze MariaDB versie"
+        por "Comando usado não é permitido para esta versão do MariaDB"
+        rum "Comanda folosita nu este permisa pentru aceasta versiune de MariaDB"
+        rus "Эта команда не допускается в данной  версии MariaDB"
+        serbian "Upotrebljena komanda nije dozvoljena sa ovom verzijom MariaDB servera"
+        spa "El comando usado no está permitido con esta versión de MariaDB"
+        swe "Du kan inte använda detta kommando med denna MariaDB version"
+        ukr "Використовувана команда не дозволена у цій версії MariaDB"
+ER_SYNTAX_ERROR 42000 
+        chi "您的 SQL 语法有错误;请查看相关文档"
+        cze "Vaše syntaxe je nějaká divná"
+        dan "Der er en fejl i SQL syntaksen"
+        eng "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use"
+        est "Viga SQL süntaksis"
+        fre "Erreur de syntaxe"
+        ger "Fehler in der SQL-Syntax. Bitte die korrekte Syntax im Handbuch nachschlagen"
+        geo "SQL-ის სინტაქსის შეცდომა. სწორი სინტაქსის შესასწავლად თქვენი MariaDB-ის სერვერის ვერსიის შესაბამისი სახელმძღვანელო გამოიყენეთ"
+        greek "You have an error in your SQL syntax"
+        hindi "आपके SQL सिंटेक्स मैं गलती है; सही सिंटेक्स के लिए अपने MariaDB सर्वर संस्करण के मैन्युअल की सहायता लें"
+        hun "Szintaktikai hiba"
+        ita "Errore di sintassi nella query SQL"
+        jpn "SQL構文エラーです。バージョンに対応するマニュアルを参照して正しい構文を確認してください。"
+        kor "SQL 구문에 오류가 있습니다."
+        nla "Er is iets fout in de gebruikte syntax"
+        nor "Something is wrong in your syntax"
+        norwegian-ny "Something is wrong in your syntax"
+        pol "Something is wrong in your syntax"
+        por "Você tem um erro de sintaxe no seu SQL"
+        rum "Aveti o eroare in sintaxa RSQL"
+        rus "У вас ошибка в запросе. Изучите документацию по используемой версии MariaDB на предмет корректного синтаксиса"
+        serbian "Imate grešku u vašoj SQL sintaksi"
+        slo "Something is wrong in your syntax"
+        spa "Existe un error en su sintaxis SQL; revise el manual que se corresponde con su versión del servidor MariaDB para averiguar la sintaxis correcta a utilizar"
+        swe "Du har något fel i din syntax"
+        ukr "У вас помилка у синтаксисі SQL"
+ER_DELAYED_CANT_CHANGE_LOCK  
+        chi "延迟插入线程无法获得 %-.192s 的请求锁定"
+        cze "Zpožděný insert threadu nebyl schopen získat požadovaný zámek pro tabulku %-.192s"
+        dan "Forsinket indsættelse tråden (delayed insert thread) kunne ikke opnå lås på tabellen %-.192s"
+        eng "Delayed insert thread couldn't get requested lock for table %-.192s"
+        est "INSERT DELAYED lõim ei suutnud saada soovitud lukku tabelile %-.192s"
+        fre "La tâche 'delayed insert' n'a pas pu obtenir le verrou démandé sur la table %-.192s"
+        ger "Verzögerter (DELAYED) Einfüge-Thread konnte die angeforderte Sperre für Tabelle '%-.192s' nicht erhalten"
+        geo "დაყოვნებული ჩამატების ნაკადმა მოთხოვნილი ბლოკი ცხრილზე %-.192s ვერ მიიღო"
+        hun "A kesleltetett beillesztes (delayed insert) thread nem kapott zatolast a %-.192s tablahoz"
+        ita "Il thread di inserimento ritardato non riesce ad ottenere il lock per la tabella %-.192s"
+        jpn "'Delayed insert'スレッドが表 '%-.192s' のロックを取得できませんでした。"
+        kor "지연된 insert 쓰레드가 테이블 %-.192s의 요구된 락킹을 처리할 수 없었습니다."
+        nla "'Delayed insert' thread kon de aangevraagde 'lock' niet krijgen voor tabel %-.192s"
+        por "'Thread' de inserção retardada (atrasada) pois não conseguiu obter a trava solicitada para tabela '%-.192s'"
+        rum "Thread-ul pentru inserarea aminata nu a putut obtine lacatul (lock) pentru tabela %-.192s"
+        rus "Поток, обслуживающий отложенную вставку (delayed insert), не смог получить запрашиваемую блокировку на таблицу %-.192s"
+        serbian "Prolongirani 'INSERT' thread nije mogao da dobije traženo zaključavanje tabele '%-.192s'"
+        spa "El hilo (thread) de inserción retardada no pudo obtener bloqueo requerido para la tabla %-.192s"
+        swe "DELAYED INSERT-tråden kunde inte låsa tabell '%-.192s'"
+        ukr "Гілка для INSERT DELAYED не може отримати блокування для таблиці %-.192s"
+ER_TOO_MANY_DELAYED_THREADS  
+        chi "使用中的延迟线程太多"
+        cze "Příliš mnoho zpožděných threadů"
+        dan "For mange slettede tråde (threads) i brug"
+        eng "Too many delayed threads in use"
+        est "Liiga palju DELAYED lõimesid kasutusel"
+        fre "Trop de tâche 'delayed' en cours"
+        geo "გამოიყენება დაყოვნებული ჩამატების მეტისმეტად ბევრი ნაკადი"
+        ger "Zu viele verzögerte (DELAYED) Threads in Verwendung"
+        hindi "बहुत से DELAYED थ्रेड्स उपयोग में हैं"
+        hun "Tul sok kesletetett thread (delayed)"
+        ita "Troppi threads ritardati in uso"
+        jpn "'Delayed insert'スレッドが多すぎます。"
+        kor "너무 많은 지연 쓰레드를 사용하고 있습니다."
+        nla "Te veel 'delayed' threads in gebruik"
+        por "Excesso de 'threads' retardadas (atrasadas) em uso"
+        rum "Prea multe threaduri aminate care sint in uz"
+        rus "Слишком много потоков, обслуживающих отложенную вставку (delayed insert)"
+        serbian "Previše prolongiranih thread-ova je u upotrebi"
+        spa "Demasiados hilos (threads) retardados en uso"
+        swe "Det finns redan 'max_delayed_threads' trådar i använding"
+        ukr "Забагато затриманих гілок використовується"
+ER_ABORTING_CONNECTION 08S01 
+        chi "终止连线 %ld 数据库: '%-.192s' 用户: '%-.48s' (%-.64s)"
+        cze "Zrušeno spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' (%-.64s)"
+        dan "Afbrudt forbindelse %ld til database: '%-.192s' bruger: '%-.48s' (%-.64s)"
+        eng "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+        est "Ühendus katkestatud %ld andmebaasile: '%-.192s' kasutajale: '%-.48s' (%-.64s)"
+        fre "Connection %ld avortée vers la bd: '%-.192s' utilisateur: '%-.48s' (%-.64s)"
+        ger "Abbruch der Verbindung %ld zur Datenbank '%-.192s'. Benutzer: '%-.48s' (%-.64s)"
+        geo "გაუქმდა კავშირი %ld ბაზამდე: '%-.192s' მომხმარებელი: '%-.48s' (%-.64s)"
+        hun "Megszakitott kapcsolat %ld db: '%-.192s' adatbazishoz, felhasznalo: '%-.48s' (%-.64s)"
+        ita "Interrotta la connessione %ld al db: '%-.192s' utente: '%-.48s' (%-.64s)"
+        jpn "接続 %ld が中断されました。データベース: '%-.192s' ユーザー: '%-.48s' (%-.64s)"
+        kor "데이타베이스 접속을 위한 연결 %ld가 중단됨 : '%-.192s' 사용자: '%-.48s' (%-.64s)"
+        nla "Afgebroken verbinding %ld naar db: '%-.192s' gebruiker: '%-.48s' (%-.64s)"
+        nor "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+        norwegian-ny "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+        pol "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+        por "Conexão %ld abortou para o banco de dados '%-.192s' - usuário '%-.48s' (%-.64s)"
+        rum "Conectie terminata %ld la baza de date: '%-.192s' utilizator: '%-.48s' (%-.64s)"
+        rus "Прервано соединение %ld к базе данных '%-.192s' пользователя '%-.48s' (%-.64s)"
+        serbian "Prekinuta konekcija broj %ld ka bazi: '%-.192s' korisnik je bio: '%-.48s' (%-.64s)"
+        slo "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+        spa "Conexión %ld abortada para la base de datos: '%-.192s' usuario: '%-.48s' (%-.64s)"
+        swe "Avbröt länken för tråd %ld till db '%-.192s', användare '%-.48s' (%-.64s)"
+        ukr "Перервано з'єднання %ld до бази данних: '%-.192s' користувача: '%-.48s' (%-.64s)"
+ER_NET_PACKET_TOO_LARGE 08S01 
+        chi "收到的数据包大于 'max_allowed_packet' 字节"
+        cze "Zjištěn příchozí packet delší než 'max_allowed_packet'"
+        dan "Modtog en datapakke som var større end 'max_allowed_packet'"
+        eng "Got a packet bigger than 'max_allowed_packet' bytes"
+        est "Saabus suurem pakett kui lubatud 'max_allowed_packet' muutujaga"
+        fre "Paquet plus grand que 'max_allowed_packet' reçu"
+        ger "Empfangenes Paket ist größer als 'max_allowed_packet' Bytes"
+        geo "მიღებული პაკეტი 'max_allowed_packet'-ზე დიდია"
+        hindi "'max_allowed_packet' से भी बड़ा एक पैकेट मिला"
+        hun "A kapott csomag nagyobb, mint a maximalisan engedelyezett: 'max_allowed_packet'"
+        ita "Ricevuto un pacchetto piu` grande di 'max_allowed_packet'"
+        jpn "'max_allowed_packet'よりも大きなパケットを受信しました。"
+        kor "'max_allowed_packet'보다 더큰 패킷을 받았습니다."
+        nla "Groter pakket ontvangen dan 'max_allowed_packet'"
+        por "Obteve um pacote maior do que a taxa máxima de pacotes definida (max_allowed_packet)"
+        rum "Un packet mai mare decit 'max_allowed_packet' a fost primit"
+        rus "Полученный пакет больше, чем 'max_allowed_packet'"
+        serbian "Primio sam mrežni paket veći od definisane vrednosti 'max_allowed_packet'"
+        spa "Obtenido un paquete mayor de 'max_allowed_packet' bytes"
+        swe "Kommunkationspaketet är större än 'max_allowed_packet'"
+        ukr "Отримано пакет більший ніж max_allowed_packet"
+ER_NET_READ_ERROR_FROM_PIPE 08S01 
+        chi "连接管道读取错误"
+        cze "Zjištěna chyba při čtení z roury spojení"
+        dan "Fik læsefejl fra forbindelse (connection pipe)"
+        eng "Got a read error from the connection pipe"
+        est "Viga ühendustoru lugemisel"
+        fre "Erreur de lecture reçue du pipe de connexion"
+        ger "Lese-Fehler bei einer Verbindungs-Pipe"
+        geo "კავშირის ნაკადიდან (connection pipe) მონაცემების წაკითხვის შეცდომა"
+        hindi "कनेक्शन पाइप से एक READ त्रुटि हुई"
+        hun "Olvasasi hiba a kapcsolat soran"
+        ita "Rilevato un errore di lettura dalla pipe di connessione"
+        jpn "接続パイプの読み込みエラーです。"
+        kor "연결 파이프로부터 에러가 발생하였습니다."
+        nla "Kreeg leesfout van de verbindings pipe"
+        por "Obteve um erro de leitura no 'pipe' da conexão"
+        rum "Eroare la citire din cauza lui 'connection pipe'"
+        rus "Получена ошибка чтения от потока соединения (connection pipe)"
+        serbian "Greška pri čitanju podataka sa pipe-a"
+        spa "Obtenido un error de lectura desde la tubería de la conexión"
+        swe "Fick läsfel från klienten vid läsning från 'PIPE'"
+        ukr "Отримано помилку читання з комунікаційного каналу"
+ER_NET_FCNTL_ERROR 08S01 
+        chi "fcntl()的错误"
+        cze "Zjištěna chyba fcntl()"
+        dan "Fik fejlmeddelelse fra fcntl()"
+        eng "Got an error from fcntl()"
+        est "fcntl() tagastas vea"
+        fre "Erreur reçue de fcntl() "
+        ger "fcntl() lieferte einen Fehler"
+        geo "მიღებულია შეცდომა ფუნქციიდან fnctl()"
+        hindi "fcntl() से एक त्रुटि हुई"
+        hun "Hiba a fcntl() fuggvenyben"
+        ita "Rilevato un errore da fcntl()"
+        jpn "fcntl()がエラーを返しました。"
+        kor "fcntl() 함수로부터 에러가 발생하였습니다."
+        nla "Kreeg fout van fcntl()"
+        por "Obteve um erro em fcntl()"
+        rum "Eroare obtinuta de la fcntl()"
+        rus "Получена ошибка от fcntl()"
+        serbian "Greška pri izvršavanju funkcije fcntl()"
+        spa "Obtenido un error de fcntl()"
+        swe "Fick fatalt fel från 'fcntl()'"
+        ukr "Отримано помилкку від fcntl()"
+ER_NET_PACKETS_OUT_OF_ORDER 08S01 
+        chi "数据包乱序"
+        cze "Příchozí packety v chybném pořadí"
+        dan "Modtog ikke datapakker i korrekt rækkefølge"
+        eng "Got packets out of order"
+        est "Paketid saabusid vales järjekorras"
+        fre "Paquets reçus dans le désordre"
+        ger "Pakete nicht in der richtigen Reihenfolge empfangen"
+        geo "პაკეტების მიღების მიმდევრობა არასწორია"
+        hindi "पैकेट्स क्रम में नहीं प्राप्त हुए"
+        hun "Helytelen sorrendben erkezett adatcsomagok"
+        ita "Ricevuti pacchetti non in ordine"
+        jpn "不正な順序のパケットを受信しました。"
+        kor "순서가 맞지않는 패킷을 받았습니다."
+        nla "Pakketten in verkeerde volgorde ontvangen"
+        por "Obteve pacotes fora de ordem"
+        rum "Packets care nu sint ordonati au fost gasiti"
+        rus "Пакеты получены в неверном порядке"
+        serbian "Primio sam mrežne pakete van reda"
+        spa "Obtenidos paquetes desordenados"
+        swe "Kommunikationspaketen kom i fel ordning"
+        ukr "Отримано пакети у неналежному порядку"
+ER_NET_UNCOMPRESS_ERROR 08S01 
+        chi "无法解压缩通信数据包"
+        cze "Nemohu rozkomprimovat komunikační packet"
+        dan "Kunne ikke dekomprimere kommunikations-pakke (communication packet)"
+        eng "Couldn't uncompress communication packet"
+        est "Viga andmepaketi lahtipakkimisel"
+        fre "Impossible de décompresser le paquet reçu"
+        ger "Kommunikationspaket lässt sich nicht entpacken"
+        geo "კომუნიკაციის პაკეტის გაშლის შეცდომა"
+        hindi "संचार पैकेट UNCOMPRESS नहीं कर सके"
+        hun "A kommunikacios adatcsomagok nem tomorithetok ki"
+        ita "Impossibile scompattare i pacchetti di comunicazione"
+        jpn "圧縮パケットの展開ができませんでした。"
+        kor "통신 패킷의 압축해제를 할 수 없었습니다."
+        nla "Communicatiepakket kon niet worden gedecomprimeerd"
+        por "Não conseguiu descomprimir pacote de comunicação"
+        rum "Nu s-a putut decompresa pachetul de comunicatie (communication packet)"
+        rus "Невозможно распаковать пакет, полученный через коммуникационный протокол"
+        serbian "Ne mogu da dekompresujem mrežne pakete"
+        spa "No pude descomprimir paquete de comunicación"
+        swe "Kunde inte packa up kommunikationspaketet"
+        ukr "Не можу декомпресувати комунікаційний пакет"
+ER_NET_READ_ERROR 08S01 
+        chi "读取通信数据包出错"
+        cze "Zjištěna chyba při čtení komunikačního packetu"
+        dan "Fik fejlmeddelelse ved læsning af kommunikations-pakker (communication packets)"
+        eng "Got an error reading communication packets"
+        est "Viga andmepaketi lugemisel"
+        fre "Erreur de lecture des paquets reçus"
+        ger "Fehler beim Lesen eines Kommunikationspakets"
+        geo "შეცდომა კომუნიკაციის პაკეტების წაკითხვისას"
+        hindi "संचार पैकेट्स पढ़ते समय एक त्रुटि हुई"
+        hun "HIba a kommunikacios adatcsomagok olvasasa soran"
+        ita "Rilevato un errore ricevendo i pacchetti di comunicazione"
+        jpn "パケットの受信でエラーが発生しました。"
+        kor "통신 패킷을 읽는 중 오류가 발생하였습니다."
+        nla "Fout bij het lezen van communicatiepakketten"
+        por "Obteve um erro na leitura de pacotes de comunicação"
+        rum "Eroare obtinuta citind pachetele de comunicatie (communication packets)"
+        rus "Получена ошибка в процессе получения пакета через коммуникационный протокол "
+        serbian "Greška pri primanju mrežnih paketa"
+        spa "Obtenido un error leyendo paquetes de comunicación"
+        swe "Fick ett fel vid läsning från klienten"
+        ukr "Отримано помилку читання комунікаційних пакетів"
+ER_NET_READ_INTERRUPTED 08S01 
+        chi "读取通信包超时"
+        cze "Zjištěn timeout při čtení komunikačního packetu"
+        dan "Timeout-fejl ved læsning af kommunukations-pakker (communication packets)"
+        eng "Got timeout reading communication packets"
+        est "Kontrollaja ületamine andmepakettide lugemisel"
+        fre "Timeout en lecture des paquets reçus"
+        ger "Zeitüberschreitung beim Lesen eines Kommunikationspakets"
+        geo "კომუნიკაციის პაკეტების წაკითხვის მოლოდინის ვადა გავიდა"
+        hindi "संचार पैकेट्स पढ़ने के दौरान टाइमआउट"
+        hun "Idotullepes a kommunikacios adatcsomagok olvasasa soran"
+        ita "Rilevato un timeout ricevendo i pacchetti di comunicazione"
+        jpn "パケットの受信でタイムアウトが発生しました。"
+        kor "통신 패킷을 읽는 중 timeout이 발생하였습니다."
+        nla "Timeout bij het lezen van communicatiepakketten"
+        por "Obteve expiração de tempo (timeout) na leitura de pacotes de comunicação"
+        rum "Timeout obtinut citind pachetele de comunicatie (communication packets)"
+        rus "Получен таймаут ожидания пакета через коммуникационный протокол "
+        serbian "Vremenski limit za čitanje mrežnih paketa je istekao"
+        spa "Obtenido tiempo agotado (timeout) leyendo paquetes de comunicación"
+        swe "Fick 'timeout' vid läsning från klienten"
+        ukr "Отримано затримку читання комунікаційних пакетів"
+ER_NET_ERROR_ON_WRITE 08S01 
+        chi "写入通信包时出错"
+        cze "Zjištěna chyba při zápisu komunikačního packetu"
+        dan "Fik fejlmeddelelse ved skrivning af kommunukations-pakker (communication packets)"
+        eng "Got an error writing communication packets"
+        est "Viga andmepaketi kirjutamisel"
+        fre "Erreur d'écriture des paquets envoyés"
+        ger "Fehler beim Schreiben eines Kommunikationspakets"
+        geo "შეცდომა კომუნიკაციის პაკეტების ჩაწერისას"
+        hindi "संचार पैकेट्स लिखते समय एक त्रुटि हुई"
+        hun "Hiba a kommunikacios csomagok irasa soran"
+        ita "Rilevato un errore inviando i pacchetti di comunicazione"
+        jpn "パケットの送信でエラーが発生しました。"
+        kor "통신 패킷을 기록하는 중 오류가 발생하였습니다."
+        nla "Fout bij het schrijven van communicatiepakketten"
+        por "Obteve um erro na escrita de pacotes de comunicação"
+        rum "Eroare in scrierea pachetelor de comunicatie (communication packets)"
+        rus "Получена ошибка при передаче пакета через коммуникационный протокол "
+        serbian "Greška pri slanju mrežnih paketa"
+        spa "Obtenido un error escribiendo paquetes de comunicación"
+        swe "Fick ett fel vid skrivning till klienten"
+        ukr "Отримано помилку запису комунікаційних пакетів"
+ER_NET_WRITE_INTERRUPTED 08S01 
+        chi "写入通信包超时"
+        cze "Zjištěn timeout při zápisu komunikačního packetu"
+        dan "Timeout-fejl ved skrivning af kommunukations-pakker (communication packets)"
+        eng "Got timeout writing communication packets"
+        est "Kontrollaja ületamine andmepakettide kirjutamisel"
+        fre "Timeout d'écriture des paquets envoyés"
+        ger "Zeitüberschreitung beim Schreiben eines Kommunikationspakets"
+        geo "კომუნიკაციის პაკეტების ჩაწერის მოლოდინის ვადა ამოიწურა"
+        hindi "संचार पैकेट्स लिखने के दौरान टाइमआउट"
+        hun "Idotullepes a kommunikacios csomagok irasa soran"
+        ita "Rilevato un timeout inviando i pacchetti di comunicazione"
+        jpn "パケットの送信でタイムアウトが発生しました。"
+        kor "통신 패팃을 기록하는 중 timeout이 발생하였습니다."
+        nla "Timeout bij het schrijven van communicatiepakketten"
+        por "Obteve expiração de tempo ('timeout') na escrita de pacotes de comunicação"
+        rum "Timeout obtinut scriind pachetele de comunicatie (communication packets)"
+        rus "Получен таймаут в процессе передачи пакета через коммуникационный протокол "
+        serbian "Vremenski limit za slanje mrežnih paketa je istekao"
+        spa "Obtenido tiempo agotado (timeout) escribiendo paquetes de comunicación"
+        swe "Fick 'timeout' vid skrivning till klienten"
+        ukr "Отримано затримку запису комунікаційних пакетів"
+ER_TOO_LONG_STRING 42000 
+        chi "结果字符串长于'max_allowed_packet'字节"
+        cze "Výsledný řetězec je delší než 'max_allowed_packet'"
+        dan "Strengen med resultater er større end 'max_allowed_packet'"
+        eng "Result string is longer than 'max_allowed_packet' bytes"
+        est "Tulemus on pikem kui lubatud 'max_allowed_packet' muutujaga"
+        fre "La chaîne résultat est plus grande que 'max_allowed_packet'"
+        ger "Ergebnis-String ist länger als 'max_allowed_packet' Bytes"
+        geo "შედეგად მიღებული სტრიქონი 'max_allowed_packet' ბაიტზე გრძელია"
+        hindi "रिजल्ट स्ट्रिंग 'max_allowed_packet' से लंबा है"
+        hun "Ez eredmeny sztring nagyobb, mint a lehetseges maximum: 'max_allowed_packet'"
+        ita "La stringa di risposta e` piu` lunga di 'max_allowed_packet'"
+        jpn "結果の文字列が 'max_allowed_packet' よりも大きいです。"
+        kor "결과 문자열이 설정된 max_allowed_packet 값보다 큽니다."
+        nla "Resultaat string is langer dan 'max_allowed_packet'"
+        por "'String' resultante é mais longa do que 'max_allowed_packet'"
+        rum "Sirul rezultat este mai lung decit 'max_allowed_packet'"
+        rus "Результирующая строка больше, чем 'max_allowed_packet'"
+        serbian "Rezultujuči string je duži nego što to dozvoljava parametar servera 'max_allowed_packet'"
+        spa "La cadena resultante es mayor de max_allowed_packet bytes"
+        swe "Resultatsträngen är längre än max_allowed_packet"
+        ukr "Строка результату довша ніж max_allowed_packet"
+ER_TABLE_CANT_HANDLE_BLOB 42000 
+        chi "存储引擎%s不支持blob / text列"
+        cze "Typ použité tabulky (%s) nepodporuje BLOB/TEXT sloupce"
+        dan "Denne tabeltype (%s) understøtter ikke brug af BLOB og TEXT kolonner"
+        eng "Storage engine %s doesn't support BLOB/TEXT columns"
+        est "Valitud tabelitüüp (%s) ei toeta BLOB/TEXT tüüpi välju"
+        fre "Ce type de table (%s) ne supporte pas les colonnes BLOB/TEXT"
+        ger "Der verwendete Tabellentyp (%s) unterstützt keine BLOB- und TEXT-Felder"
+        geo "საცავის ძრავას %s BLOB/TEXT ტიპის სვეტების მხარდაჭერა არ გააჩნია"
+        hindi "स्टोरेज इंजन %s BLOB/TEXT कॉलम्स को सपोर्ट नहीं करता"
+        hun "A hasznalt tabla tipus (%s) nem tamogatja a BLOB/TEXT mezoket"
+        ita "Il tipo di tabella usata (%s) non supporta colonne di tipo BLOB/TEXT"
+        kor "스토리지 엔진 (%s)는 BLOB/TEXT 컬럼을 지원하지 않습니다."
+        nla "Het gebruikte tabel type (%s) ondersteunt geen BLOB/TEXT kolommen"
+        por "Tipo de tabela usado (%s) não permite colunas BLOB/TEXT"
+        rum "Tipul de tabela folosit (%s) nu suporta coloane de tip BLOB/TEXT"
+        rus "%s таблицы не поддерживают типы BLOB/TEXT"
+        serbian "Iskorišteni tip tabele (%s) ne podržava kolone tipa 'BLOB' odnosno 'TEXT'"
+        spa "El motor de almacenaje %s no soporta columnas de tipo BLOB/TEXT"
+        swe "Den använda tabelltypen (%s) kan inte hantera BLOB/TEXT-kolumner"
+        ukr "%s таблиці не підтримують BLOB/TEXT стовбці"
+ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000 
+        chi "存储引擎%s不支持auto_increment列"
+        cze "Typ použité tabulky (%s) nepodporuje AUTO_INCREMENT sloupce"
+        dan "Denne tabeltype understøtter (%s) ikke brug af AUTO_INCREMENT kolonner"
+        eng "Storage engine %s doesn't support AUTO_INCREMENT columns"
+        est "Valitud tabelitüüp (%s) ei toeta AUTO_INCREMENT tüüpi välju"
+        fre "Ce type de table (%s) ne supporte pas les colonnes AUTO_INCREMENT"
+        ger "Der verwendete Tabellentyp (%s) unterstützt keine AUTO_INCREMENT-Felder"
+        geo "საცავის ძრავას (%s) AUTO_INCREMENT ტიპის სვეტების მხარდაჭერა არ გააჩნია"
+        hindi "स्टोरेज इंजन %s AUTO_INCREMENT कॉलम्स को सपोर्ट नहीं करता"
+        hun "A hasznalt tabla tipus (%s) nem tamogatja az AUTO_INCREMENT tipusu mezoket"
+        ita "Il tipo di tabella usata (%s) non supporta colonne di tipo AUTO_INCREMENT"
+        kor "스토리지 엔진 (%s)는 AUTO_INCREMENT를 지원하지 않습니다."
+        nla "Het gebruikte tabel type (%s) ondersteunt geen AUTO_INCREMENT kolommen"
+        por "Tipo de tabela usado (%s) não permite colunas AUTO_INCREMENT"
+        rum "Tipul de tabela folosit (%s) nu suporta coloane de tip AUTO_INCREMENT"
+        rus "%s таблицы не поддерживают автоинкрементные столбцы"
+        serbian "Iskorišteni tip tabele (%s) ne podržava kolone tipa 'AUTO_INCREMENT'"
+        spa "El motor de almacenaje %s no soporta columnas AUTO_INCREMENT"
+        swe "Den använda tabelltypen (%s) kan inte hantera AUTO_INCREMENT-kolumner"
+        ukr "%s таблиці не підтримують AUTO_INCREMENT стовбці"
+ER_DELAYED_INSERT_TABLE_LOCKED  
+        chi "INSERT DELAYED 不能用在表 '%-.192s' 因为它被LOCK TABLES 锁定"
+        cze "INSERT DELAYED není možno s tabulkou '%-.192s' použít, protože je zamčená pomocí LOCK TABLES"
+        dan "INSERT DELAYED kan ikke bruges med tabellen '%-.192s', fordi tabellen er låst med LOCK TABLES"
+        eng "INSERT DELAYED can't be used with table '%-.192s' because it is locked with LOCK TABLES"
+        est "INSERT DELAYED ei saa kasutada tabeli '%-.192s' peal, kuna see on lukustatud LOCK TABLES käsuga"
+        fre "INSERT DELAYED ne peut être utilisé avec la table '%-.192s', car elle est verrouée avec LOCK TABLES"
+        ger "INSERT DELAYED kann für Tabelle '%-.192s' nicht verwendet werden, da sie mit LOCK TABLES gesperrt ist"
+        geo "INSERT DELAYED-ს ცხრილზე '%-.192s' ვერ გამოიყენებთ, რადგან ის LOCK TABLES-ითაა დაბლოკილი"
+        greek "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        hun "Az INSERT DELAYED nem hasznalhato a '%-.192s' tablahoz, mert a tabla zarolt (LOCK TABLES)"
+        ita "L'inserimento ritardato (INSERT DELAYED) non puo` essere usato con la tabella '%-.192s', perche` soggetta a lock da 'LOCK TABLES'"
+        jpn "表 '%-.192s' はLOCK TABLESでロックされているため、INSERT DELAYEDを使用できません。"
+        kor "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        nla "INSERT DELAYED kan niet worden gebruikt bij table '%-.192s', vanwege een 'lock met LOCK TABLES"
+        nor "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        norwegian-ny "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        pol "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        por "INSERT DELAYED não pode ser usado com a tabela '%-.192s', porque ela está travada com LOCK TABLES"
+        rum "INSERT DELAYED nu poate fi folosit cu tabela '%-.192s', deoarece este locked folosing LOCK TABLES"
+        rus "Нельзя использовать INSERT DELAYED для таблицы '%-.192s', потому что она заблокирована с помощью LOCK TABLES"
+        serbian "Komanda 'INSERT DELAYED' ne može biti iskorištena u tabeli '%-.192s', zbog toga što je zaključana komandom 'LOCK TABLES'"
+        slo "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        spa "INSERT DELAYED no puede ser usado con la tabla '%-.192s' porque esta bloqueada con LOCK TABLES"
+        swe "INSERT DELAYED kan inte användas med tabell '%-.192s', emedan den är låst med LOCK TABLES"
+        ukr "INSERT DELAYED не може бути використано з таблицею '%-.192s', тому що її заблоковано з LOCK TABLES"
+ER_WRONG_COLUMN_NAME 42000 
+        chi "错误列名 '%-.100s'"
+        cze "Nesprávné jméno sloupce '%-.100s'"
+        dan "Forkert kolonnenavn '%-.100s'"
+        eng "Incorrect column name '%-.100s'"
+        est "Vigane tulba nimi '%-.100s'"
+        fre "Nom de colonne '%-.100s' incorrect"
+        ger "Falscher Spaltenname '%-.100s'"
+        geo "სვეტის ('%-.100s') სახელი არასწორია"
+        hindi "कॉलम नाम '%-.100s' गलत है"
+        hun "Ervenytelen mezonev: '%-.100s'"
+        ita "Nome colonna '%-.100s' non corretto"
+        jpn "列名 '%-.100s' は不正です。"
+        nla "Incorrecte kolom naam '%-.100s'"
+        por "Nome de coluna '%-.100s' incorreto"
+        rum "Nume increct de coloana '%-.100s'"
+        rus "Неверное имя столбца '%-.100s'"
+        serbian "Pogrešno ime kolone '%-.100s'"
+        spa "Nombre Incorrecto de columna '%-.100s'"
+        swe "Felaktigt kolumnnamn '%-.100s'"
+        ukr "Невірне ім'я стовбця '%-.100s'"
+ER_WRONG_KEY_COLUMN 42000 
+        chi "存储引擎 %s 不能给 %`s 列建索引"
+        eng "The storage engine %s can't index column %`s"
+        ger "Die Speicher-Engine %s kann die Spalte %`s nicht indizieren"
+        geo "საცავის ძრავას (%s) სვეტზე (%`s) ინდექსის დადება არ შეუძლია"
+        hindi "स्टोरेज इंजन %s, कॉलम %`s को इंडेक्स नहीं कर सकता"
+        rus "Обработчик таблиц %s не может проиндексировать столбец %`s"
+        spa "El motor de almacenaje %s no puede indizar la columna %`s"
+        ukr "Вказівник таблиц %s не може індексувати стовбець %`s"
+ER_WRONG_MRG_TABLE  
+        chi "无法打开定义不同或非 MyISAM 类型或不存在的表"
+        cze "Všechny tabulky v MERGE tabulce nejsou definovány stejně"
+        dan "Tabellerne i MERGE er ikke defineret ens"
+        eng "Unable to open underlying table which is differently defined or of non-MyISAM type or doesn't exist"
+        est "Kõik tabelid MERGE tabeli määratluses ei ole identsed"
+        fre "Toutes les tables de la table de type MERGE n'ont pas la même définition"
+        ger "Nicht alle Tabellen in der MERGE-Tabelle sind gleich definiert"
+        geo "MERGE-ში ყველა ცხრილი ერთნაირად არაა აღწერილი"
+        hun "A MERGE tablaban talalhato tablak definicioja nem azonos"
+        ita "Non tutte le tabelle nella tabella di MERGE sono definite in maniera identica"
+        jpn "MERGE表の構成表がオープンできません。列定義が異なるか、MyISAM表ではないか、存在しません。"
+        kor "All tables in the MERGE table are not defined identically"
+        nla "Niet alle tabellen in de MERGE tabel hebben identieke gedefinities"
+        nor "All tables in the MERGE table are not defined identically"
+        norwegian-ny "All tables in the MERGE table are not defined identically"
+        pol "All tables in the MERGE table are not defined identically"
+        por "Todas as tabelas contidas na tabela fundida (MERGE) não estão definidas identicamente"
+        rum "Toate tabelele din tabela MERGE nu sint definite identic"
+        rus "Не все таблицы в MERGE определены одинаково"
+        serbian "Tabele iskorištene u 'MERGE' tabeli nisu definisane na isti način"
+        slo "All tables in the MERGE table are not defined identically"
+        spa "Incapaz de abrir la tabla subyacente por estar definida de forma diferente o por no ser del tipo no-MyISAM o por no existir"
+        swe "Tabellerna i MERGE-tabellen är inte identiskt definierade"
+        ukr "Таблиці у MERGE TABLE мають різну структуру"
+ER_DUP_UNIQUE 23000 
+        chi "因为表'%-.192s' 唯一约束,无法写入"
+        cze "Kvůli unique constraintu nemozu zapsat do tabulky '%-.192s'"
+        dan "Kan ikke skrive til tabellen '%-.192s' fordi det vil bryde CONSTRAINT regler"
+        eng "Can't write, because of unique constraint, to table '%-.192s'"
+        est "Ei suuda kirjutada tabelisse '%-.192s', kuna see rikub ühesuse kitsendust"
+        fre "Écriture impossible à cause d'un index UNIQUE sur la table '%-.192s'"
+        ger "Schreiben in Tabelle '%-.192s' nicht möglich wegen einer Eindeutigkeitsbeschränkung (unique constraint)"
+        geo "ცხრილში '%-.192s' უნიკალური გასაღების შეზღუდვის გამო ჩაწერა შეუძლებელია"
+        hun "A '%-.192s' nem irhato, az egyedi mezok miatt"
+        ita "Impossibile scrivere nella tabella '%-.192s' per limitazione di unicita`"
+        jpn "一意性制約違反のため、表 '%-.192s' に書き込めません。"
+        nla "Kan niet opslaan naar table '%-.192s' vanwege 'unique' beperking"
+        por "Não pode gravar, devido à restrição UNIQUE, na tabela '%-.192s'"
+        rum "Nu pot scrie pe hard-drive, din cauza constraintului unic (unique constraint) pentru tabela '%-.192s'"
+        rus "Невозможно записать в таблицу '%-.192s' из-за ограничений уникального ключа"
+        serbian "Zbog provere jedinstvenosti ne mogu da upišem podatke u tabelu '%-.192s'"
+        spa "No puedo grabar, debido a restricción única, en la tabla '%-.192s'"
+        swe "Kan inte skriva till tabell '%-.192s'; UNIQUE-test"
+        ukr "Не можу записати до таблиці '%-.192s', з причини вимог унікальності"
+ER_BLOB_KEY_WITHOUT_LENGTH 42000 
+        chi "BLOB/TEXT 列 '%-.192s' 在没有索引长度的索引规范中使用"
+        cze "BLOB sloupec '%-.192s' je použit ve specifikaci klíče bez délky"
+        dan "BLOB kolonnen '%-.192s' brugt i nøglespecifikation uden nøglelængde"
+        eng "BLOB/TEXT column '%-.192s' used in key specification without a key length"
+        est "BLOB-tüüpi tulp '%-.192s' on kasutusel võtmes ilma pikkust määratlemata"
+        fre "La colonne '%-.192s' de type BLOB est utilisée dans une définition d'index sans longueur d'index"
+        ger "BLOB- oder TEXT-Spalte '%-.192s' wird in der Schlüsseldefinition ohne Schlüssellängenangabe verwendet"
+        geo "სვეტი ტიპით BLOB '%-.192s' მითითებულია გასაღების აღწერაში გასაღების სიგრძის მითითების გარეშე"
+        greek "BLOB column '%-.192s' used in key specification without a key length"
+        hun "BLOB mezo '%-.192s' hasznalt a mezo specifikacioban, a mezohossz megadasa nelkul"
+        ita "La colonna '%-.192s' di tipo BLOB e` usata in una chiave senza specificarne la lunghezza"
+        jpn "BLOB列 '%-.192s' をキーに使用するには長さ指定が必要です。"
+        kor "BLOB column '%-.192s' used in key specification without a key length"
+        nla "BLOB kolom '%-.192s' gebruikt in zoeksleutel specificatie zonder zoeksleutel lengte"
+        nor "BLOB column '%-.192s' used in key specification without a key length"
+        norwegian-ny "BLOB column '%-.192s' used in key specification without a key length"
+        pol "BLOB column '%-.192s' used in key specification without a key length"
+        por "Coluna BLOB '%-.192s' usada na especificação de chave sem o comprimento da chave"
+        rum "Coloana BLOB '%-.192s' este folosita in specificarea unei chei fara ca o lungime de cheie sa fie folosita"
+        rus "Столбец типа BLOB '%-.192s' был указан в определении ключа без указания длины ключа"
+        serbian "BLOB kolona '%-.192s' je upotrebljena u specifikaciji ključa bez navođenja dužine ključa"
+        slo "BLOB column '%-.192s' used in key specification without a key length"
+        spa "Columna BLOB/TEXT '%-.192s', usada en especificación de clave, sin tamaño"
+        swe "Du har inte angett någon nyckellängd för BLOB '%-.192s'"
+        ukr "Стовбець BLOB '%-.192s' використано у визначенні ключа без вказання довжини ключа"
+ER_PRIMARY_CANT_HAVE_NULL 42000 
+        chi "PRIMARY KEY 的所有部分都不能为NULL;如果您需要在键中使用 NULL,请改用 UNIQUE"
+        cze "Všechny části primárního klíče musejí být NOT NULL; pokud potřebujete NULL, použijte UNIQUE"
+        dan "Alle dele af en PRIMARY KEY skal være NOT NULL;  Hvis du skal bruge NULL i nøglen, brug UNIQUE istedet"
+        eng "All parts of a PRIMARY KEY must be NOT NULL; if you need NULL in a key, use UNIQUE instead"
+        est "Kõik PRIMARY KEY peavad olema määratletud NOT NULL piiranguga; vajadusel kasuta UNIQUE tüüpi võtit"
+        fre "Toutes les parties d'un index PRIMARY KEY doivent être NOT NULL; Si vous avez besoin d'un NULL dans l'index, utilisez un index UNIQUE"
+        ger "Alle Teile eines PRIMARY KEY müssen als NOT NULL definiert sein. Wenn NULL in einem Schlüssel benötigt wird, muss ein UNIQUE-Schlüssel verwendet werden"
+        geo "PRIMARY KEY-ის ყველა ნაწილი NOT NULL პარამეტრით უნდა იყოს აღწერილი. თუ გასაღებში NULL გჭირდებათ, UNIQUE გამოიყენეთ"
+        hindi "PRIMARY KEY के सभी भागों को NOT NULL होना चाहिए; यदि आपको एक KEY में NULL की जरूरत है, तो UNIQUE का उपयोग करें"
+        hun "Az elsodleges kulcs teljes egeszeben csak NOT NULL tipusu lehet; Ha NULL mezot szeretne a kulcskent, hasznalja inkabb a UNIQUE-ot"
+        ita "Tutte le parti di una chiave primaria devono essere dichiarate NOT NULL; se necessitano valori NULL nelle chiavi utilizzare UNIQUE"
+        jpn "PRIMARY KEYの列は全てNOT NULLでなければいけません。UNIQUE索引であればNULLを含むことが可能です。"
+        nla "Alle delen van een PRIMARY KEY moeten NOT NULL zijn; Indien u NULL in een zoeksleutel nodig heeft kunt u UNIQUE gebruiken"
+        por "Todas as partes de uma chave primária devem ser não-nulas. Se você precisou usar um valor nulo (NULL) em uma chave, use a cláusula UNIQUE em seu lugar"
+        rum "Toate partile unei chei primare (PRIMARY KEY) trebuie sa fie NOT NULL; Daca aveti nevoie de NULL in vreo cheie, folositi UNIQUE in schimb"
+        rus "Все части первичного ключа (PRIMARY KEY) должны быть определены как NOT NULL; Если вам нужна поддержка величин NULL в ключе, воспользуйтесь индексом UNIQUE"
+        serbian "Svi delovi primarnog ključa moraju biti različiti od NULL;  Ako Vam ipak treba NULL vrednost u ključu, upotrebite 'UNIQUE'"
+        spa "Todas las partes de una PRIMARY KEY deben de ser NOT NULL; si necesita NULL en una clave, use UNIQUE en su lugar"
+        swe "Alla delar av en PRIMARY KEY måste vara NOT NULL;  Om du vill ha en nyckel med NULL, använd UNIQUE istället"
+        ukr "Усі частини PRIMARY KEY повинні бути NOT NULL; Якщо ви потребуєте NULL у ключі, скористайтеся UNIQUE"
+ER_TOO_MANY_ROWS 42000 
+        chi "结果多于一行"
+        cze "Výsledek obsahuje více než jeden řádek"
+        dan "Resultatet bestod af mere end een række"
+        eng "Result consisted of more than one row"
+        est "Tulemis oli rohkem kui üks kirje"
+        fre "Le résultat contient plus d'un enregistrement"
+        ger "Ergebnis besteht aus mehr als einer Zeile"
+        geo "შედეგი ერთ სტრიქონზე მეტს შეიცავს"
+        hindi "परिणाम एक से अधिक पंक्ति का है"
+        hun "Az eredmeny tobb, mint egy sort tartalmaz"
+        ita "Il risultato consiste di piu` di una riga"
+        jpn "結果が2行以上です。"
+        nla "Resultaat bevatte meer dan een rij"
+        por "O resultado consistiu em mais do que uma linha"
+        rum "Resultatul constista din mai multe linii"
+        rus "В результате возвращена более чем одна строка"
+        serbian "Rezultat je sačinjen od više slogova"
+        spa "Resultado compuesto de más de una fila"
+        swe "Resultet bestod av mera än en rad"
+        ukr "Результат знаходиться у більше ніж одній строці"
+ER_REQUIRES_PRIMARY_KEY 42000 
+        chi "此表类型需要主索引"
+        cze "Tento typ tabulky vyžaduje primární klíč"
+        dan "Denne tabeltype kræver en primærnøgle"
+        eng "This table type requires a primary key"
+        est "Antud tabelitüüp nõuab primaarset võtit"
+        fre "Ce type de table nécessite une clé primaire (PRIMARY KEY)"
+        ger "Dieser Tabellentyp benötigt einen Primärschlüssel (PRIMARY KEY)"
+        geo "ცხრილის ეს ტიპი ძირითად გასაღებს (PRIMARY KEY) მოითხოვს"
+        hindi "इस प्रकार के टेबल को एक PRIMARY KEY की आवश्यकता है"
+        hun "Az adott tablatipushoz elsodleges kulcs hasznalata kotelezo"
+        ita "Questo tipo di tabella richiede una chiave primaria"
+        jpn "使用のストレージエンジンでは、PRIMARY KEYが必要です。"
+        nla "Dit tabel type heeft een primaire zoeksleutel nodig"
+        por "Este tipo de tabela requer uma chave primária"
+        rum "Aceast tip de tabela are nevoie de o cheie primara"
+        rus "Этот тип таблицы требует определения первичного ключа"
+        serbian "Ovaj tip tabele zahteva da imate definisan primarni ključ"
+        spa "Este tipo de tabla necesita de una clave primaria"
+        swe "Denna tabelltyp kräver en PRIMARY KEY"
+        ukr "Цей тип таблиці потребує первинного ключа"
+ER_NO_RAID_COMPILED  
+        chi "这个版本的 MariaDB 编译时不支持 RAID"
+        cze "Tato verze MariaDB není zkompilována s podporou RAID"
+        dan "Denne udgave af MariaDB er ikke oversat med understøttelse af RAID"
+        eng "This version of MariaDB is not compiled with RAID support"
+        est "Antud MariaDB versioon on kompileeritud ilma RAID toeta"
+        fre "Cette version de MariaDB n'est pas compilée avec le support RAID"
+        ger "Diese MariaDB-Version ist nicht mit RAID-Unterstützung kompiliert"
+        geo "MariaDB-ის ეს ვერსია RAID-ის მხარდაჭერით არაა აგებული"
+        hindi "MariaDB का यह संस्करण RAID सपोर्ट के साथ कॉम्पाईल्ड नहीं है"
+        hun "Ezen leforditott MariaDB verzio nem tartalmaz RAID support-ot"
+        ita "Questa versione di MariaDB non e` compilata con il supporto RAID"
+        jpn "このバージョンのMariaDBはRAIDサポートを含めてコンパイルされていません。"
+        nla "Deze versie van MariaDB is niet gecompileerd met RAID ondersteuning"
+        por "Esta versão do MariaDB não foi compilada com suporte a RAID"
+        rum "Aceasta versiune de MariaDB, nu a fost compilata cu suport pentru RAID"
+        rus "Эта версия MariaDB скомпилирована без поддержки RAID"
+        serbian "Ova verzija MariaDB servera nije kompajlirana sa podrškom za RAID uređaje"
+        spa "Esta versión de MariaDB no ha sido compilada con soporte para RAID"
+        swe "Denna version av MariaDB är inte kompilerad med RAID"
+        ukr "Ця версія MariaDB не зкомпільована з підтримкою RAID"
+ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE  
+        chi "您正在使用安全更新模式,同时您尝试更新时没有使用含有 KEY 列的 WHERE 语句"
+        cze "Update tabulky bez WHERE s klíčem není v módu bezpečných update dovoleno"
+        dan "Du bruger sikker opdaterings modus ('safe update mode') og du forsøgte at opdatere en tabel uden en WHERE klausul, der gør brug af et KEY felt"
+        eng "You are using safe update mode and you tried to update a table without a WHERE that uses a KEY column"
+        est "Katse muuta tabelit turvalises rezhiimis ilma WHERE klauslita"
+        fre "Vous êtes en mode 'safe update' et vous essayez de faire un UPDATE sans clause WHERE utilisant un index"
+        ger "MariaDB läuft im sicheren Aktualisierungsmodus (safe update mode). Sie haben versucht, eine Tabelle zu aktualisieren, ohne in der WHERE-Klausel ein KEY-Feld anzugeben"
+        geo "იყენებთ უსაფრთხო განახლებების რეჟიმს და სცადეთ ცხრილი WHERE-ის, რომელიც KEY სვეტს შეიცავს, გარეშე გაგეახლებინათ"
+        hun "On a biztonsagos update modot hasznalja, es        WHERE that uses a KEY column"
+        ita "In modalita` 'safe update' si e` cercato di aggiornare una tabella senza clausola WHERE su una chiave"
+        jpn "'safe update mode'で、索引を利用するWHERE句の無い更新処理を実行しようとしました。"
+        nla "U gebruikt 'safe update mode' en u probeerde een tabel te updaten zonder een WHERE met een KEY kolom"
+        por "Você está usando modo de atualização seguro e tentou atualizar uma tabela sem uma cláusula WHERE que use uma coluna chave"
+        rus "Вы работаете в режиме безопасных обновлений (safe update mode) и попробовали изменить таблицу без использования ключевого столбца в части WHERE"
+        serbian "Vi koristite safe update mod servera, a probali ste da promenite podatke bez 'WHERE' komande koja koristi kolonu ključa"
+        spa "Está usando modo de actualización segura y ha intentado actualizar una tabla sin un WHERE que use una columna KEY"
+        swe "Du använder 'säker uppdateringsmod' och försökte uppdatera en tabell utan en WHERE-sats som använder sig av en nyckel"
+        ukr "Ви у режимі безпечного оновлення та намагаєтесь оновити таблицю без оператора WHERE, що використовує KEY стовбець"
+ER_KEY_DOES_NOT_EXISTS 42000 S1009
+        chi "索引 '%-.192s' 在表 '%-.192s' 里不存在"
+        cze "Klíč '%-.192s' v tabulce '%-.192s' neexistuje"
+        dan "Nøglen '%-.192s' eksisterer ikke i tabellen '%-.192s'"
+        eng "Key '%-.192s' doesn't exist in table '%-.192s'"
+        est "Võti '%-.192s' ei eksisteeri tabelis '%-.192s'"
+        fre "L'index '%-.192s' n'existe pas sur la table '%-.192s'"
+        ger "Schlüssel '%-.192s' existiert in der Tabelle '%-.192s' nicht"
+        geo "გასაღები '%-.192s' არ არსებობს ცხრილში '%-.192s'"
+        hindi "KEY '%-.192s', टेबल '%-.192s' में मौजूद नहीं है"
+        hun "A '%-.192s' kulcs nem letezik a '%-.192s' tablaban"
+        ita "La chiave '%-.192s' non esiste nella tabella '%-.192s'"
+        jpn "索引 '%-.192s' は表 '%-.192s' には存在しません。"
+        nla "Zoeksleutel '%-.192s' bestaat niet in tabel '%-.192s'"
+        por "Chave '%-.192s' não existe na tabela '%-.192s'"
+        rus "Ключ '%-.192s' не существует в таблице '%-.192s'"
+        serbian "Ključ '%-.192s' ne postoji u tabeli '%-.192s'"
+        spa "La clave '%-.192s' no existe en la tabla '%-.192s'"
+        swe "Nyckel '%-.192s' finns inte in tabell '%-.192s'"
+        ukr "Ключ '%-.192s' не існує в таблиці '%-.192s'"
+ER_CHECK_NO_SUCH_TABLE 42000 
+        chi "无法打开表"
+        cze "Nemohu otevřít tabulku"
+        dan "Kan ikke åbne tabellen"
+        eng "Can't open table"
+        est "Ei suuda avada tabelit"
+        fre "Impossible d'ouvrir la table"
+        ger "Kann Tabelle nicht öffnen"
+        geo "ცხრილის გახსნის შეცდომა"
+        hindi "टेबल नहीं खुल सकता है"
+        hun "Nem tudom megnyitni a tablat"
+        ita "Impossibile aprire la tabella"
+        jpn "表をオープンできません。"
+        nla "Kan tabel niet openen"
+        por "Não pode abrir a tabela"
+        rus "Невозможно открыть таблицу"
+        serbian "Ne mogu da otvorim tabelu"
+        spa "No puedo abrir tabla"
+        swe "Kan inte öppna tabellen"
+        ukr "Не можу відкрити таблицю"
+ER_CHECK_NOT_IMPLEMENTED 42000 
+        chi "该表的存储引擎不支持%s"
+        cze "Handler tabulky nepodporuje %s"
+        dan "Denne tabeltype understøtter ikke %s"
+        eng "The storage engine for the table doesn't support %s"
+        est "Antud tabelitüüp ei toeta %s käske"
+        fre "Ce type de table ne supporte pas les %s"
+        ger "Die Speicher-Engine für diese Tabelle unterstützt kein %s"
+        geo "ცხრილის საცავის ზრავს %s-ის მხარდაჭერა არ გააჩნია"
+        greek "The handler for the table doesn't support %s"
+        hindi "इस टेबल का स्टोरेज इंजन '%s' को सपोर्ट नहीं करता"
+        hun "A tabla kezeloje (handler) nem tamogatja az %s"
+        ita "Il gestore per la tabella non supporta il %s"
+        jpn "この表のストレージエンジンは '%s' を利用できません。"
+        kor "The handler for the table doesn't support %s"
+        nla "De 'handler' voor de tabel ondersteund geen %s"
+        nor "The handler for the table doesn't support %s"
+        norwegian-ny "The handler for the table doesn't support %s"
+        pol "The handler for the table doesn't support %s"
+        por "O manipulador de tabela não suporta %s"
+        rum "The handler for the table doesn't support %s"
+        rus "Обработчик таблицы не поддерживает этого: %s"
+        serbian "Handler za ovu tabelu ne dozvoljava %s komande"
+        slo "The handler for the table doesn't support %s"
+        spa "El motor de almacenaje para la tabla no soporta %s"
+        swe "Tabellhanteraren för denna tabell kan inte göra %s"
+        ukr "Вказівник таблиці не підтримуе %s"
+ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000 
+        chi "不允许在事务中执行此命令"
+        cze "Provedení tohoto příkazu není v transakci dovoleno"
+        dan "Du må ikke bruge denne kommando i en transaktion"
+        eng "You are not allowed to execute this command in a transaction"
+        est "Seda käsku ei saa kasutada transaktsiooni sees"
+        fre "Vous n'êtes pas autorisé à exécute cette commande dans une transaction"
+        ger "Sie dürfen diesen Befehl nicht in einer Transaktion ausführen"
+        geo "ტრანზაქციაში ამ ბრძანების შესრულების უფლება არ გაქვთ"
+        hun "Az On szamara nem engedelyezett a parancs vegrehajtasa a tranzakcioban"
+        ita "Non puoi eseguire questo comando in una transazione"
+        jpn "このコマンドはトランザクション内で実行できません。"
+        nla "Het is u niet toegestaan dit commando uit te voeren binnen een transactie"
+        por "Não lhe é permitido executar este comando em uma transação"
+        rus "Вам не разрешено выполнять эту команду в транзакции"
+        serbian "Nije Vam dozvoljeno da izvršite ovu komandu u transakciji"
+        spa "No tiene el permiso para ejecutar este comando en una transacción"
+        swe "Du får inte utföra detta kommando i en transaktion"
+        ukr "Вам не дозволено виконувати цю команду в транзакції"
+ER_ERROR_DURING_COMMIT  
+        chi "COMMIT时发生错误 %M "
+        cze "Chyba %M při COMMIT"
+        dan "Modtog fejl %M mens kommandoen COMMIT blev udført"
+        eng "Got error %M during COMMIT"
+        est "Viga %M käsu COMMIT täitmisel"
+        fre "Erreur %M lors du COMMIT"
+        ger "Fehler %M beim COMMIT"
+        geo "შეცდომა %M COMMIT-ის დროს"
+        hindi "COMMIT के दौरान %M त्रुटि हुई"
+        hun "%M hiba a COMMIT vegrehajtasa soran"
+        ita "Rilevato l'errore %M durante il COMMIT"
+        jpn "COMMIT中にエラー %M が発生しました。"
+        nla "Kreeg fout %M tijdens COMMIT"
+        por "Obteve erro %M durante COMMIT"
+        rus "Получена ошибка %M в процессе COMMIT"
+        serbian "Greška %M za vreme izvršavanja komande 'COMMIT'"
+        spa "Obtenido error %M durante COMMIT"
+        swe "Fick fel %M vid COMMIT"
+        ukr "Отримано помилку %M під час COMMIT"
+ER_ERROR_DURING_ROLLBACK  
+        chi "回滚时出错%M"
+        cze "Chyba %M při ROLLBACK"
+        dan "Modtog fejl %M mens kommandoen ROLLBACK blev udført"
+        eng "Got error %M during ROLLBACK"
+        est "Viga %M käsu ROLLBACK täitmisel"
+        fre "Erreur %M lors du ROLLBACK"
+        ger "Fehler %M beim ROLLBACK"
+        geo "შეცდომა %M ROLLBACK-ის დროს"
+        hindi "ROLLBACK के दौरान %M त्रुटि हुई"
+        hun "%M hiba a ROLLBACK vegrehajtasa soran"
+        ita "Rilevato l'errore %M durante il ROLLBACK"
+        jpn "ROLLBACK中にエラー %M が発生しました。"
+        nla "Kreeg fout %M tijdens ROLLBACK"
+        por "Obteve erro %M durante ROLLBACK"
+        rus "Получена ошибка %M в процессе ROLLBACK"
+        serbian "Greška %M za vreme izvršavanja komande 'ROLLBACK'"
+        spa "Obtenido error %M durante ROLLBACK"
+        swe "Fick fel %M vid ROLLBACK"
+        ukr "Отримано помилку %M під час ROLLBACK"
+ER_ERROR_DURING_FLUSH_LOGS  
+        chi "flush_logs时出错%M"
+        cze "Chyba %M při FLUSH_LOGS"
+        dan "Modtog fejl %M mens kommandoen FLUSH_LOGS blev udført"
+        eng "Got error %M during FLUSH_LOGS"
+        est "Viga %M käsu FLUSH_LOGS täitmisel"
+        fre "Erreur %M lors du FLUSH_LOGS"
+        ger "Fehler %M bei FLUSH_LOGS"
+        geo "შეცდომა %M FLUSH LOGS-ის დროს"
+        hindi "FLUSH_LOGS के दौरान %M त्रुटि हुई"
+        hun "%M hiba a FLUSH_LOGS vegrehajtasa soran"
+        ita "Rilevato l'errore %M durante il FLUSH_LOGS"
+        jpn "FLUSH_LOGS中にエラー %M が発生しました。"
+        nla "Kreeg fout %M tijdens FLUSH_LOGS"
+        por "Obteve erro %M durante FLUSH_LOGS"
+        rus "Получена ошибка %M в процессе FLUSH_LOGS"
+        serbian "Greška %M za vreme izvršavanja komande 'FLUSH_LOGS'"
+        spa "Obtenido error %M durante FLUSH_LOGS"
+        swe "Fick fel %M vid FLUSH_LOGS"
+        ukr "Отримано помилку %M під час FLUSH_LOGS"
+ER_ERROR_DURING_CHECKPOINT  
+        chi "CHECKPOINT时出错%M "
+        cze "Chyba %M při CHECKPOINT"
+        dan "Modtog fejl %M mens kommandoen CHECKPOINT blev udført"
+        eng "Got error %M during CHECKPOINT"
+        est "Viga %M käsu CHECKPOINT täitmisel"
+        fre "Erreur %M lors du CHECKPOINT"
+        ger "Fehler %M bei CHECKPOINT"
+        geo "შეცდომა %M CHECKPOINT-ის დროს"
+        hindi "CHECKPOINT के दौरान %M त्रुटि हुई"
+        hun "%M hiba a CHECKPOINT vegrehajtasa soran"
+        ita "Rilevato l'errore %M durante il CHECKPOINT"
+        jpn "CHECKPOINT中にエラー %M が発生しました。"
+        nla "Kreeg fout %M tijdens CHECKPOINT"
+        por "Obteve erro %M durante CHECKPOINT"
+        rus "Получена ошибка %M в процессе CHECKPOINT"
+        serbian "Greška %M za vreme izvršavanja komande 'CHECKPOINT'"
+        spa "Obtenido error %M durante CHECKPOINT"
+        swe "Fick fel %M vid CHECKPOINT"
+        ukr "Отримано помилку %M під час CHECKPOINT"
+ER_NEW_ABORTING_CONNECTION 08S01 
+        chi "终止的连接 %lld 到数据库: '%-.192s' 用户: '%-.48s' 主机: '%-.64s' (%-.64s)"
+        cze "Spojení %lld do databáze: '%-.192s' uživatel: '%-.48s' stroj: '%-.64s' (%-.64s) bylo přerušeno"
+        dan "Afbrød forbindelsen %lld til databasen '%-.192s' bruger: '%-.48s' vært: '%-.64s' (%-.64s)"
+        eng "Aborted connection %lld to db: '%-.192s' user: '%-.48s' host: '%-.64s' (%-.64s)"
+        est "Ühendus katkestatud %lld andmebaas: '%-.192s' kasutaja: '%-.48s' masin: '%-.64s' (%-.64s)"
+        fre "Connection %lld avortée vers la bd: '%-.192s' utilisateur: '%-.48s' hôte: '%-.64s' (%-.64s)"
+        ger "Abbruch der Verbindung %lld zur Datenbank '%-.192s'. Benutzer: '%-.48s', Host: '%-.64s' (%-.64s)"
+        geo "შეწყდა კავშირი %lld ბაზამდე: '%-.192s' მომხმარებელი: '%-.48s' ჰოსტი: '%-.64s' (%-.64s)"
+        ita "Interrotta la connessione %lld al db: ''%-.192s' utente: '%-.48s' host: '%-.64s' (%-.64s)"
+        jpn "接続 %lld が中断されました。データベース: '%-.192s' ユーザー: '%-.48s' ホスト: '%-.64s' (%-.64s)"
+        nla "Afgebroken verbinding %lld naar db: '%-.192s' gebruiker: '%-.48s' host: '%-.64s' (%-.64s)"
+        por "Conexão %lld abortada para banco de dados '%-.192s' - usuário '%-.48s' - 'host' '%-.64s' ('%-.64s')"
+        rus "Прервано соединение %lld к базе данных '%-.192s' пользователя '%-.48s' с хоста '%-.64s' (%-.64s)"
+        serbian "Prekinuta konekcija broj %lld ka bazi: '%-.192s' korisnik je bio: '%-.48s' a host: '%-.64s' (%-.64s)"
+        spa "Abortada conexión %lld a la base de datos: '%-.192s' usuario: '%-.48s' equipo: '%-.64s' (%-.64s)"
+        swe "Avbröt länken för tråd %lld till db '%-.192s', användare '%-.48s', host '%-.64s' (%-.64s)"
+        ukr "Перервано з'єднання %lld до бази данних: '%-.192s' користувач: '%-.48s' хост: '%-.64s' (%-.64s)"
+ER_UNUSED_10
+        eng "You should never see it"
+        geo "ეს ვერასდროს უნდა დაინახოთ"
+        spa "Nunca lo debería vd de ver"
+ER_FLUSH_MASTER_BINLOG_CLOSED  
+        chi "Binlog 已关闭, 不能 RESET MASTER"
+        eng "Binlog closed, cannot RESET MASTER"
+        ger "Binlog geschlossen. Kann RESET MASTER nicht ausführen"
+        geo "Binlog-ი დახურულია. RESET MASTER-ის გაშვება შეუძლებელია"
+        jpn "バイナリログがクローズされています。RESET MASTER を実行できません。"
+        por "Binlog fechado. Não pode fazer RESET MASTER"
+        rus "Двоичный журнал обновления закрыт, невозможно выполнить RESET MASTER"
+        serbian "Binarni log file zatvoren, ne mogu da izvršim komandu 'RESET MASTER'"
+        spa "Binlog cerrado, no puedo hacer RESET MASTER"
+        ukr "Реплікаційний лог закрито, не можу виконати RESET MASTER"
+ER_INDEX_REBUILD  
+        chi "重建 dumped table '%-.192s' 的索引失败"
+        cze "Přebudování indexu dumpnuté tabulky '%-.192s' nebylo úspěšné"
+        dan "Kunne ikke genopbygge indekset for den dumpede tabel '%-.192s'"
+        eng "Failed rebuilding the index of  dumped table '%-.192s'"
+        fre "La reconstruction de l'index de la table copiée '%-.192s' a échoué"
+        ger "Neuerstellung des Index der Dump-Tabelle '%-.192s' fehlgeschlagen"
+        geo "შეცდომა შენახული ცხრილის ('%-.192s') ინდექსის თავიდან აგებისას"
+        greek "Failed rebuilding the index of dumped table '%-.192s'"
+        hun "Failed rebuilding the index of dumped table '%-.192s'"
+        ita "Fallita la ricostruzione dell'indice della tabella copiata '%-.192s'"
+        jpn "ダンプ表 '%-.192s' の索引再構築に失敗しました。"
+        nla "Gefaald tijdens heropbouw index van gedumpte tabel '%-.192s'"
+        por "Falhou na reconstrução do índice da tabela 'dumped' '%-.192s'"
+        rus "Ошибка перестройки индекса сохраненной таблицы '%-.192s'"
+        serbian "Izgradnja indeksa dump-ovane tabele '%-.192s' nije uspela"
+        spa "Fallo reconstruyendo el índice del volcado de la tabla '%-.192s'"
+        ukr "Невдале відновлення індекса переданої таблиці '%-.192s'"
+ER_MASTER  
+        chi "Master错误:'%-.64s'"
+        cze "Chyba masteru: '%-.64s'"
+        dan "Fejl fra master: '%-.64s'"
+        eng "Error from master: '%-.64s'"
+        fre "Erreur reçue du maître: '%-.64s'"
+        ger "Fehler vom Master: '%-.64s'"
+        geo "შეცდომა მთავარი სერვერიდან: '%-.64s'"
+        ita "Errore dal master: '%-.64s"
+        jpn "マスターでエラーが発生: '%-.64s'"
+        nla "Fout van master: '%-.64s'"
+        por "Erro no 'master' '%-.64s'"
+        rus "Ошибка от головного сервера: '%-.64s'"
+        serbian "Greška iz glavnog servera '%-.64s' u klasteru"
+        spa "Error del maestro (master): '%-.64s'"
+        swe "Fel från master: '%-.64s'"
+        ukr "Помилка від головного: '%-.64s'"
+ER_MASTER_NET_READ 08S01 
+        chi "读master时有网络错误"
+        cze "Síťová chyba při čtení z masteru"
+        dan "Netværksfejl ved læsning fra master"
+        eng "Net error reading from master"
+        fre "Erreur de lecture réseau reçue du maître"
+        ger "Netzfehler beim Lesen vom Master"
+        geo "ქსელის შეცდომა მთავარი სერვერიდან წაკითხვისას"
+        ita "Errore di rete durante la ricezione dal master"
+        jpn "マスターからのデータ受信中のネットワークエラー"
+        nla "Net fout tijdens lezen van master"
+        por "Erro de rede lendo do 'master'"
+        rus "Возникла ошибка чтения в процессе коммуникации с головным сервером"
+        serbian "Greška u primanju mrežnih paketa sa glavnog servera u klasteru"
+        spa "Error de red leyendo del maestro (master)"
+        swe "Fick nätverksfel vid läsning från master"
+        ukr "Мережева помилка читання від головного"
+ER_MASTER_NET_WRITE 08S01 
+        chi "写master时有网络错误"
+        cze "Síťová chyba při zápisu na master"
+        dan "Netværksfejl ved skrivning til master"
+        eng "Net error writing to master"
+        fre "Erreur d'écriture réseau reçue du maître"
+        ger "Netzfehler beim Schreiben zum Master"
+        geo "ქსელის შეცდომა მთავარ სერვერზე ჩაწერისას"
+        ita "Errore di rete durante l'invio al master"
+        jpn "マスターへのデータ送信中のネットワークエラー"
+        nla "Net fout tijdens schrijven naar master"
+        por "Erro de rede gravando no 'master'"
+        rus "Возникла ошибка записи в процессе коммуникации с головным сервером"
+        serbian "Greška u slanju mrežnih paketa na glavni server u klasteru"
+        spa "Error de red grabando en maestro (master)"
+        swe "Fick nätverksfel vid skrivning till master"
+        ukr "Мережева помилка запису до головного"
+ER_FT_MATCHING_KEY_NOT_FOUND  
+        chi "找不到与列列表匹配的全文索引"
+        cze "Žádný sloupec nemá vytvořen fulltextový index"
+        dan "Kan ikke finde en FULLTEXT nøgle som svarer til kolonne listen"
+        eng "Can't find FULLTEXT index matching the column list"
+        est "Ei suutnud leida FULLTEXT indeksit, mis kattuks kasutatud tulpadega"
+        fre "Impossible de trouver un index FULLTEXT correspondant à cette liste de colonnes"
+        ger "Kann keinen FULLTEXT-Index finden, der der Feldliste entspricht"
+        geo "სვეტების სიის შესაბამისი FULLTEXT ინდექსი ვერ ვიპოვე"
+        ita "Impossibile trovare un indice FULLTEXT che corrisponda all'elenco delle colonne"
+        jpn "列リストに対応する全文索引(FULLTEXT)が見つかりません。"
+        nla "Kan geen FULLTEXT index vinden passend bij de kolom lijst"
+        por "Não pode encontrar um índice para o texto todo que combine com a lista de colunas"
+        rus "Невозможно отыскать полнотекстовый (FULLTEXT) индекс, соответствующий списку столбцов"
+        serbian "Ne mogu da pronađem 'FULLTEXT' indeks koli odgovara listi kolona"
+        spa "No puedo encontrar índice FULLTEXT coincidente con la lista de columnas"
+        swe "Hittar inte ett FULLTEXT-index i kolumnlistan"
+        ukr "Не можу знайти FULLTEXT індекс, що відповідає переліку стовбців"
+ER_LOCK_OR_ACTIVE_TRANSACTION  
+        chi "无法执行给定的命令,因为表上有锁或有活动事务"
+        cze "Nemohu provést zadaný příkaz, protože existují aktivní zamčené tabulky nebo aktivní transakce"
+        dan "Kan ikke udføre den givne kommando fordi der findes aktive, låste tabeller eller fordi der udføres en transaktion"
+        eng "Can't execute the given command because you have active locked tables or an active transaction"
+        est "Ei suuda täita antud käsku kuna on aktiivseid lukke või käimasolev transaktsioon"
+        fre "Impossible d'exécuter la commande car vous avez des tables verrouillées ou une transaction active"
+        ger "Kann den angegebenen Befehl wegen einer aktiven Tabellensperre oder einer aktiven Transaktion nicht ausführen"
+        geo "მითითებული ბრძანების გაშვება აქტიური დაბლოკილი ცხრილების ან აქტიური ტრანზაქციის არსებობის გამო შეუძლებელია"
+        ita "Impossibile eseguire il comando richiesto: tabelle sotto lock o transazione in atto"
+        jpn "すでにアクティブな表ロックやトランザクションがあるため、コマンドを実行できません。"
+        nla "Kan het gegeven commando niet uitvoeren, want u heeft actieve gelockte tabellen of een actieve transactie"
+        por "Não pode executar o comando dado porque você tem tabelas ativas travadas ou uma transação ativa"
+        rus "Невозможно выполнить указанную команду, поскольку у вас присутствуют активно заблокированные таблица или открытая транзакция"
+        serbian "Ne mogu da izvršim datu komandu zbog toga što su tabele zaključane ili je transakcija u toku"
+        spa "No puedo ejecutar el comando dado porque tiene tablas activas bloqueadas o una transacción activa"
+        swe "Kan inte utföra kommandot emedan du har en låst tabell eller an aktiv transaktion"
+        ukr "Не можу виконати подану команду тому, що таблиця заблокована або виконується транзакція"
+ER_UNKNOWN_SYSTEM_VARIABLE  
+        chi "未知系统变量 '%-.*s'"
+        cze "Neznámá systémová proměnná '%-.*s'"
+        dan "Ukendt systemvariabel '%-.*s'"
+        eng "Unknown system variable '%-.*s'"
+        est "Tundmatu süsteemne muutuja '%-.*s'"
+        fre "Variable système '%-.*s' inconnue"
+        ger "Unbekannte Systemvariable '%-.*s'"
+        geo "უცნობი სისტემური ცვლადი '%-.*s'"
+        hindi "अज्ञात सिस्टम वैरिएबल '%-.*s'"
+        ita "Variabile di sistema '%-.*s' sconosciuta"
+        jpn "'%-.*s' は不明なシステム変数です。"
+        nla "Onbekende systeem variabele '%-.*s'"
+        por "Variável de sistema '%-.*s' desconhecida"
+        rus "Неизвестная системная переменная '%-.*s'"
+        serbian "Nepoznata sistemska promenljiva '%-.*s'"
+        spa "Variable de sistema '%-.*s' desconocida"
+        swe "Okänd systemvariabel: '%-.*s'"
+        ukr "Невідома системна змінна '%-.*s'"
+ER_CRASHED_ON_USAGE  
+        chi "表'%-.192s'标记为崩溃,应该修复"
+        cze "Tabulka '%-.192s' je označena jako porušená a měla by být opravena"
+        dan "Tabellen '%-.192s' er markeret med fejl og bør repareres"
+        eng "Table '%-.192s' is marked as crashed and should be repaired"
+        est "Tabel '%-.192s' on märgitud vigaseks ja tuleb parandada"
+        fre "La table '%-.192s' est marquée 'crashed' et devrait être réparée"
+        ger "Tabelle '%-.192s' ist als defekt markiert und sollte repariert werden"
+        geo "ცხრილი '%-.192s' მონიშნულია, როგორც ავარიული და შეკეთებას საჭიროებს"
+        ita "La tabella '%-.192s' e` segnalata come corrotta e deve essere riparata"
+        jpn "表 '%-.192s' は壊れています。修復が必要です。"
+        nla "Tabel '%-.192s' staat als gecrashed gemarkeerd en dient te worden gerepareerd"
+        por "Tabela '%-.192s' está marcada como danificada e deve ser reparada"
+        rus "Таблица '%-.192s' помечена как испорченная и должна пройти проверку и ремонт"
+        serbian "Tabela '%-.192s' je markirana kao oštećena i trebala bi biti popravljena"
+        spa "La tabla '%-.192s' está marcada como estropeada y debe de ser reparada"
+        swe "Tabell '%-.192s' är trasig och bör repareras med REPAIR TABLE"
+        ukr "Таблицю '%-.192s' марковано як зіпсовану та її потрібно відновити"
+ER_CRASHED_ON_REPAIR  
+        chi "表 '%-.192s' 被标记为崩溃,上一次的修复(自动?)失败"
+        cze "Tabulka '%-.192s' je označena jako porušená a poslední (automatická?) oprava se nezdařila"
+        dan "Tabellen '%-.192s' er markeret med fejl og sidste (automatiske?) REPAIR fejlede"
+        eng "Table '%-.192s' is marked as crashed and last (automatic?) repair failed"
+        est "Tabel '%-.192s' on märgitud vigaseks ja viimane (automaatne?) parandus ebaõnnestus"
+        fre "La table '%-.192s' est marquée 'crashed' et le dernier 'repair' a échoué"
+        ger "Tabelle '%-.192s' ist als defekt markiert und der letzte (automatische?) Reparaturversuch schlug fehl"
+        geo "ცხრილი '%-.192s' მონიშნულია, როგორც ავარიული და ბოლო (ავტომატური?) შეკეთება წარუმატებლად დასრულდა"
+        ita "La tabella '%-.192s' e` segnalata come corrotta e l'ultima ricostruzione (automatica?) e` fallita"
+        jpn "表 '%-.192s' は壊れています。修復(自動?)にも失敗しています。"
+        nla "Tabel '%-.192s' staat als gecrashed gemarkeerd en de laatste (automatische?) reparatie poging mislukte"
+        por "Tabela '%-.192s' está marcada como danificada e a última reparação (automática?) falhou"
+        rus "Таблица '%-.192s' помечена как испорченная и последний (автоматический?) ремонт не был успешным"
+        serbian "Tabela '%-.192s' je markirana kao oštećena, a zadnja (automatska?) popravka je bila neuspela"
+        spa "La tabla '%-.192s' está marcada como estropeada y la última reparación (¿automática?) falló"
+        swe "Tabell '%-.192s' är trasig och senast (automatiska?) reparation misslyckades"
+        ukr "Таблицю '%-.192s' марковано як зіпсовану та останнє (автоматичне?) відновлення не вдалося"
+ER_WARNING_NOT_COMPLETE_ROLLBACK  
+        chi "某些非事务性更改的表无法回滚"
+        dan "Advarsel: Visse data i tabeller der ikke understøtter transaktioner kunne ikke tilbagestilles"
+        eng "Some non-transactional changed tables couldn't be rolled back"
+        est "Hoiatus: mõnesid transaktsioone mittetoetavaid tabeleid ei suudetud tagasi kerida"
+        fre "Attention: certaines tables ne supportant pas les transactions ont été changées et elles ne pourront pas être restituées"
+        ger "Änderungen an einigen nicht transaktionalen Tabellen konnten nicht zurückgerollt werden"
+        geo "გაფრთხილება: ზოგიერთი არატრანზაქციულად შეცვლილი ცხრილების დაბრუნება შეუძლებელია"
+        ita "Attenzione: Alcune delle modifiche alle tabelle non transazionali non possono essere ripristinate (roll back impossibile)"
+        jpn "トランザクション対応ではない表への変更はロールバックされません。"
+        nla "Waarschuwing: Roll back mislukt voor sommige buiten transacties gewijzigde tabellen"
+        por "Aviso: Algumas tabelas não-transacionais alteradas não puderam ser reconstituídas (rolled back)"
+        rus "Внимание: по некоторым измененным нетранзакционным таблицам невозможно будет произвести откат транзакции"
+        serbian "Upozorenje: Neke izmenjene tabele ne podržavaju komandu 'ROLLBACK'"
+        spa "Algunas tablas no transaccionales ya cambiadas no puedieron ser retrocedidas (rolled back)"
+        swe "Warning:  Några icke transaktionella tabeller kunde inte återställas vid ROLLBACK"
+        ukr "Застереження: Деякі нетранзакційні зміни таблиць не можна буде повернути"
+ER_TRANS_CACHE_FULL  
+        chi "多语句事务需要超过 'max_binlog_cache_size' 字节的存储空间;增加这个 mariadbd 变量后再试一次"
+        dan "Fler-udtryks transaktion krævede mere plads en 'max_binlog_cache_size' bytes. Forhøj værdien af denne variabel og prøv igen"
+        eng "Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again"
+        est "Mitme lausendiga transaktsioon nõudis rohkem ruumi kui lubatud 'max_binlog_cache_size' muutujaga. Suurenda muutuja väärtust ja proovi uuesti"
+        fre "Cette transaction à commandes multiples nécessite plus de 'max_binlog_cache_size' octets de stockage, augmentez cette variable de mariadbd et réessayez"
+        ger "Transaktionen, die aus mehreren Befehlen bestehen, benötigten mehr als 'max_binlog_cache_size' Bytes an Speicher. Btte vergrössern Sie diese Server-Variable versuchen Sie es noch einmal"
+        geo "ტრანზაქციებს, რომლებიც ბევრ ბრძანებას შეიცავენ, max_binlog_cache_size ბაიტზე მეტი დასჭირდა. გაზარდეთ ცვლადის მნიშვნელობა ან თავიდან სცადეთ"
+        ita "La transazione a comandi multipli (multi-statement) ha richiesto piu` di 'max_binlog_cache_size' bytes di disco: aumentare questa variabile di mariadbd e riprovare"
+        jpn "複数ステートメントから成るトランザクションが 'max_binlog_cache_size' 以上の容量を必要としました。このシステム変数を増加して、再試行してください。"
+        nla "Multi-statement transactie vereist meer dan 'max_binlog_cache_size' bytes opslag. Verhoog deze mariadbd variabele en probeer opnieuw"
+        por "Transações multi-declaradas (multi-statement transactions) requeriram mais do que o valor limite (max_binlog_cache_size) de bytes para armazenagem. Aumente o valor desta variável do mariadbd e tente novamente"
+        rus "Транзакции, включающей большое количество команд, потребовалось более чем 'max_binlog_cache_size' байт. Увеличьте эту переменную сервера mariadbd и попробуйте еще раз"
+        spa "Transacción multi-sentencia requirió de más de 'max_binlog_cache_size' bytes de almacenamiento"
+        swe "Transaktionen krävde mera än 'max_binlog_cache_size' minne. Öka denna mariadbd-variabel och försök på nytt"
+        ukr "Транзакція з багатьма виразами вимагає більше ніж 'max_binlog_cache_size' байтів для зберігання. Збільште цю змінну mariadbd та спробуйте знову"
+ER_SLAVE_MUST_STOP  
+        chi "这个操作不能执行,因为你有个正在运行的 slave '%2$*1$s'; 先运行 STOP SLAVE '%2$*1$s' 后再试"
+        dan "Denne handling kunne ikke udføres med kørende slave '%2$*1$s', brug først kommandoen STOP SLAVE '%2$*1$s'"
+        eng "This operation cannot be performed as you have a running slave '%2$*1$s'; run STOP SLAVE '%2$*1$s' first"
+        fre "Cette opération ne peut être réalisée avec un esclave '%2$*1$s' actif, faites STOP SLAVE '%2$*1$s' d'abord"
+        ger "Diese Operation kann bei einem aktiven Slave '%2$*1$s' nicht durchgeführt werden. Bitte zuerst STOP SLAVE '%2$*1$s' ausführen"
+        geo "დამორჩილებულ სერვერზე ('%2$*1$s') ამ ოპერაციის განხორციელება შეუძლებელია. ჯერ გაუშვით STOP SLAVE '%2$*1$s'"
+        ita "Questa operazione non puo' essere eseguita con un database 'slave' '%2$*1$s' che gira, lanciare prima STOP SLAVE '%2$*1$s'"
+        nla "Deze operatie kan niet worden uitgevoerd met een actieve slave '%2$*1$s', doe eerst STOP SLAVE '%2$*1$s'"
+        por "Esta operação não pode ser realizada com um 'slave' '%2$*1$s' em execução. Execute STOP SLAVE '%2$*1$s' primeiro"
+        rus "Эту операцию невозможно выполнить при работающем потоке подчиненного сервера %2$*1$s. Сначала выполните STOP SLAVE '%2$*1$s'"
+        serbian "Ova operacija ne može biti izvršena dok je aktivan podređeni '%2$*1$s' server. Zadajte prvo komandu 'STOP SLAVE '%2$*1$s'' da zaustavite podređeni server"
+        spa "Esta operación no puede ser realizada con el esclavo '%2$*1$s' en marcha; primero ejecute STOP SLAVE '%2$*1$s'"
+        swe "Denna operation kan inte göras under replikering; Du har en aktiv förbindelse till '%2$*1$s'. Gör STOP SLAVE '%2$*1$s' först"
+        ukr "Операція не може бути виконана з запущеним підлеглим '%2$*1$s', спочатку виконайте STOP SLAVE '%2$*1$s'"
+ER_SLAVE_NOT_RUNNING  
+        chi "这个操作需要一个正在运行的slave;配置从机并执行 START SLAVE"
+        dan "Denne handling kræver en kørende slave. Konfigurer en slave og brug kommandoen START SLAVE"
+        eng "This operation requires a running slave; configure slave and do START SLAVE"
+        fre "Cette opération nécessite un esclave actif, configurez les esclaves et faites START SLAVE"
+        ger "Diese Operation benötigt einen aktiven Slave. Bitte Slave konfigurieren und mittels START SLAVE aktivieren"
+        geo "ამ ოპერაციას გაშვებული დამორჩილებული სერვერი ესაჭიროება. ჯერ გაუშვით START SLAVE"
+        ita "Questa operaione richiede un database 'slave', configurarlo ed eseguire START SLAVE"
+        jpn "この処理は、稼働中のスレーブでなければ実行できません。スレーブの設定をしてSTART SLAVEコマンドを実行してください。"
+        nla "Deze operatie vereist een actieve slave, configureer slave en doe dan START SLAVE"
+        por "Esta operação requer um 'slave' em execução. Configure  o 'slave' e execute START SLAVE"
+        rus "Для этой операции требуется работающий подчиненный сервер. Сначала выполните START SLAVE"
+        serbian "Ova operacija zahteva da je aktivan podređeni server. Konfigurišite prvo podređeni server i onda izvršite komandu 'START SLAVE'"
+        spa "Esta operación requiere de un esclavo funcionando; configure el esclavo y haga el START SLAVE"
+        swe "Denna operation kan endast göras under replikering; Konfigurera slaven och gör START SLAVE"
+        ukr "Операція вимагає запущеного підлеглого, зконфігуруйте підлеглого та виконайте START SLAVE"
+ER_BAD_SLAVE  
+        chi "服务器未配置为从站;修复配置文件或使用CHANGE MASTER TO"
+        dan "Denne server er ikke konfigureret som slave. Ret in config-filen eller brug kommandoen CHANGE MASTER TO"
+        eng "The server is not configured as slave; fix in config file or with CHANGE MASTER TO"
+        fre "Le server n'est pas configuré comme un esclave, changez le fichier de configuration ou utilisez CHANGE MASTER TO"
+        ger "Der Server ist nicht als Slave konfiguriert. Bitte in der Konfigurationsdatei oder mittels CHANGE MASTER TO beheben"
+        geo "სერვერი დამორჩილებული როლისთვის მორგებული არაა. ჩაასწორეთ კონფიგურაციის ფაილი ან გაუშვით ბრძანება CHANGE MASTER TO"
+        ita "Il server non e' configurato come 'slave', correggere il file di configurazione cambiando CHANGE MASTER TO"
+        jpn "このサーバーはスレーブとして設定されていません。コンフィグファイルかCHANGE MASTER TOコマンドで設定して下さい。"
+        nla "De server is niet geconfigureerd als slave, fix in configuratie bestand of met CHANGE MASTER TO"
+        por "O servidor não está configurado como 'slave'. Acerte o arquivo de configuração ou use CHANGE MASTER TO"
+        rus "Этот сервер не настроен как подчиненный. Внесите исправления в конфигурационном файле или с помощью CHANGE MASTER TO"
+        serbian "Server nije konfigurisan kao podređeni server, ispravite konfiguracioni file ili na njemu izvršite komandu 'CHANGE MASTER TO'"
+        spa "El servidor no está configurado como esclavo; arréglelo en el fichero/archivo de configuración o con CHANGE MASTER TO"
+        swe "Servern är inte konfigurerade som en replikationsslav. Ändra konfigurationsfilen eller gör CHANGE MASTER TO"
+        ukr "Сервер не зконфігуровано як підлеглий, виправте це у файлі конфігурації або з CHANGE MASTER TO"
+ER_MASTER_INFO  
+        chi "无法初始化'%.*s'的master info structure;MariaDB错误日志中可以找到更多错误消息"
+        eng "Could not initialize master info structure for '%.*s'; more error messages can be found in the MariaDB error log"
+        fre "Impossible d'initialiser les structures d'information de maître '%.*s', vous trouverez des messages d'erreur supplémentaires dans le journal des erreurs de MariaDB"
+        ger "Konnte Master-Info-Struktur '%.*s' nicht initialisieren. Weitere Fehlermeldungen können im MariaDB-Error-Log eingesehen werden"
+        geo "'%.*s'-სთვის მთავარი სერვერის ინფორმაციის სტრუქტურის ინიციალიზაცია სეუძლებელია. მეტი ინფორმაციისთვის MariaDB-ის შეცდომების ჟურნალი იხილეთ"
+        jpn "'master info '%.*s''構造体の初期化ができませんでした。MariaDBエラーログでエラーメッセージを確認してください。"
+        serbian "Nisam mogao da inicijalizujem informacionu strukturu glavnog servera, proverite da li imam privilegije potrebne za pristup file-u 'master.info' '%.*s'"
+        spa "No pude inicializar estructura info de maestro (master) para '%.*s'; se pueden ver más mensajes de error en el historial (log) de errores de MariaDB"
+        swe "Kunde inte initialisera replikationsstrukturerna för '%.*s'. See MariaDB fel fil för mera information"
+        ukr "Інформаційна структура з'єднання головного і підлеглого (master.info) для '%.*s' не може бути ініціалізована"
+ER_SLAVE_THREAD
+        chi "无法创建slave线程;检查系统资源"
+        dan "Kunne ikke danne en slave-tråd; check systemressourcerne"
+        eng "Could not create slave thread; check system resources"
+        fre "Impossible de créer une tâche esclave, vérifiez les ressources système"
+        ger "Konnte Slave-Thread nicht starten. Bitte System-Ressourcen überprüfen"
+        geo "დამორჩილებული სერვერის ნაკადის შექმნის შეცდომა. გადაამოწმეთ სისტემური რესურსები"
+        ita "Impossibile creare il thread 'slave', controllare le risorse di sistema"
+        jpn "スレーブスレッドを作成できません。システムリソースを確認してください。"
+        nla "Kon slave thread niet aanmaken, controleer systeem resources"
+        por "Não conseguiu criar 'thread' de 'slave'. Verifique os recursos do sistema"
+        rus "Невозможно создать поток подчиненного сервера. Проверьте системные ресурсы"
+        serbian "Nisam mogao da startujem thread za podređeni server, proverite sistemske resurse"
+        spa "No puedo crear el hilo (thread) esclavo; verifique recursos del sistema"
+        swe "Kunde inte starta en tråd för replikering"
+        ukr "Не можу створити підлеглу гілку, перевірте системні ресурси"
+ER_TOO_MANY_USER_CONNECTIONS 42000 
+        chi "用户%-.64s已经拥有超过“max_user_conections”的活连接"
+        dan "Brugeren %-.64s har allerede mere end 'max_user_connections' aktive forbindelser"
+        eng "User %-.64s already has more than 'max_user_connections' active connections"
+        est "Kasutajal %-.64s on juba rohkem ühendusi kui lubatud 'max_user_connections' muutujaga"
+        fre "L'utilisateur %-.64s possède déjà plus de 'max_user_connections' connexions actives"
+        ger "Benutzer '%-.64s' hat mehr als 'max_user_connections' aktive Verbindungen"
+        geo "მომხმარებელს %-.64s უკვე 'max_user_connections'-ზე მეტი აქტიური კავშირი გააჩნია"
+        hindi "यूज़र %-.64s के पहले से ही 'max_user_connections' से अधिक सक्रिय कनेक्शन्स हैं"
+        ita "L'utente %-.64s ha gia' piu' di 'max_user_connections' connessioni attive"
+        jpn "ユーザー '%-.64s' はすでに 'max_user_connections' 以上のアクティブな接続を行っています。"
+        nla "Gebruiker %-.64s heeft reeds meer dan 'max_user_connections' actieve verbindingen"
+        por "Usuário '%-.64s' já possui mais que o valor máximo de conexões (max_user_connections) ativas"
+        rus "У пользователя %-.64s уже больше чем 'max_user_connections' активных соединений"
+        serbian "Korisnik %-.64s već ima više aktivnih konekcija nego što je to određeno 'max_user_connections' promenljivom"
+        spa "El usuario %-.64s ya tiene más de 'max_user_connections' conexiones activas"
+        swe "Användare '%-.64s' har redan 'max_user_connections' aktiva inloggningar"
+        ukr "Користувач %-.64s вже має більше ніж 'max_user_connections' активних з'єднань"
+ER_SET_CONSTANTS_ONLY  
+        chi "您只能在此语句中使用常量表达式"
+        dan "Du må kun bruge konstantudtryk med SET"
+        eng "You may only use constant expressions in this statement"
+        est "Ainult konstantsed suurused on lubatud SET klauslis"
+        fre "Seules les expressions constantes sont autorisées avec SET"
+        ger "Bei diesem Befehl dürfen nur konstante Ausdrücke verwendet werden"
+        geo "ამ ბრძანებასთან ერთად მხოლოდ კონსტანტური გამოსახულებების გამოყენება შეგიძლიათ"
+        hindi "इस स्टेटमेंट में आप केवल CONSTANT EXPRESSIONS का उपयोग कर सकते हैं"
+        ita "Si possono usare solo espressioni costanti con SET"
+        jpn "SET処理が失敗しました。"
+        nla "U mag alleen constante expressies gebruiken bij SET"
+        por "Você pode usar apenas expressões constantes com SET"
+        rus "С этой командой вы можете использовать только константные выражения"
+        serbian "Možete upotrebiti samo konstantan iskaz sa komandom 'SET'"
+        spa "Sólo puede usar expresiones constantes en esta sentencia"
+        swe "Man kan endast använda konstantuttryck med SET"
+        ukr "Можна використовувати лише вирази зі сталими у SET"
+ER_LOCK_WAIT_TIMEOUT  
+        chi "锁等待超时;尝试重新启动事务"
+        dan "Lock wait timeout overskredet"
+        eng "Lock wait timeout exceeded; try restarting transaction"
+        est "Kontrollaeg ületatud luku järel ootamisel; Proovi transaktsiooni otsast alata"
+        fre "Timeout sur l'obtention du verrou"
+        ger "Beim Warten auf eine Sperre wurde die zulässige Wartezeit überschritten. Bitte versuchen Sie, die Transaktion neu zu starten"
+        geo "ბლოკის მოლოდინის ვადა ამოიწურა. სცადეთ, ტრანზაქცია თავიდან გაუშვათ"
+        ita "E' scaduto il timeout per l'attesa del lock"
+        jpn "ロック待ちがタイムアウトしました。トランザクションを再試行してください。"
+        nla "Lock wacht tijd overschreden"
+        por "Tempo de espera (timeout) de travamento excedido. Tente reiniciar a transação"
+        rus "Таймаут ожидания блокировки истек; попробуйте перезапустить транзакцию"
+        serbian "Vremenski limit za zaključavanje tabele je istekao; Probajte da ponovo startujete transakciju"
+        spa "Tiempo de espera de bloqueo excedido; intente rearrancar la transacción"
+        swe "Fick inte ett lås i tid ; Försök att starta om transaktionen"
+        ukr "Затримку очікування блокування вичерпано"
+ER_LOCK_TABLE_FULL  
+        chi "锁的总数超过锁定表大小"
+        dan "Det totale antal låse overstiger størrelsen på låse-tabellen"
+        eng "The total number of locks exceeds the lock table size"
+        est "Lukkude koguarv ületab lukutabeli suuruse"
+        fre "Le nombre total de verrou dépasse la taille de la table des verrous"
+        ger "Die Gesamtzahl der Sperren überschreitet die Größe der Sperrtabelle"
+        geo "ბლოკების ჯამური რიცხვი ბლოკების ცხრილის ზომას აჭარბებს"
+        hindi "लॉक्स की कुल संख्या लॉक टेबल के साइज से अधिक है"
+        ita "Il numero totale di lock e' maggiore della grandezza della tabella di lock"
+        jpn "ロックの数が多すぎます。"
+        nla "Het totale aantal locks overschrijdt de lock tabel grootte"
+        por "O número total de travamentos excede o tamanho da tabela de travamentos"
+        rus "Общее количество блокировок превысило размеры таблицы блокировок"
+        serbian "Broj totalnih zaključavanja tabele premašuje veličinu tabele zaključavanja"
+        spa "El número total de bloqueos excede el tamaño de bloqueo de la tabla"
+        swe "Antal lås överskrider antalet reserverade lås"
+        ukr "Загальна кількість блокувань перевищила розмір блокувань для таблиці"
+ER_READ_ONLY_TRANSACTION 25000 
+        chi "在READ UNCOMMITTED事务期间无法获取更新锁定"
+        dan "Update lås kan ikke opnås under en READ UNCOMMITTED transaktion"
+        eng "Update locks cannot be acquired during a READ UNCOMMITTED transaction"
+        est "Uuenduslukke ei saa kasutada READ UNCOMMITTED transaktsiooni käigus"
+        fre "Un verrou en update ne peut être acquit pendant une transaction READ UNCOMMITTED"
+        ger "Während einer READ-UNCOMMITTED-Transaktion können keine UPDATE-Sperren angefordert werden"
+        geo "განახლების დაბლოკვების მიღება შეუძლებელია READ UNCOMMITTED ტრანზაქციის დროს"
+        ita "I lock di aggiornamento non possono essere acquisiti durante una transazione 'READ UNCOMMITTED'"
+        jpn "読み込み専用トランザクションです。"
+        nla "Update locks kunnen niet worden verkregen tijdens een READ UNCOMMITTED transactie"
+        por "Travamentos de atualização não podem ser obtidos durante uma transação de tipo READ UNCOMMITTED"
+        rus "Блокировки обновлений нельзя получить в процессе чтения не принятой (в режиме READ UNCOMMITTED) транзакции"
+        serbian "Zaključavanja izmena ne mogu biti realizovana sve dok traje 'READ UNCOMMITTED' transakcija"
+        spa "No se pueden adquirir bloqueos de actualización durante una transacción READ UNCOMMITTED"
+        swe "Updateringslås kan inte göras när man använder READ UNCOMMITTED"
+        ukr "Оновити блокування не можливо на протязі транзакції READ UNCOMMITTED"
+ER_DROP_DB_WITH_READ_LOCK  
+        chi "线程持有全局读锁时,不允许删除数据库"
+        dan "DROP DATABASE er ikke tilladt mens en tråd holder på globalt read lock"
+        eng "DROP DATABASE not allowed while thread is holding global read lock"
+        est "DROP DATABASE ei ole lubatud kui lõim omab globaalset READ lukku"
+        fre "DROP DATABASE n'est pas autorisée pendant qu'une tâche possède un verrou global en lecture"
+        ger "DROP DATABASE ist nicht erlaubt, solange der Thread eine globale Lesesperre hält"
+        geo "DROP DATABASE დაუშვებელია, სანამ ნაკადი კითხვის გლობალური ბლოკს შეიცავს"
+        ita "DROP DATABASE non e' permesso mentre il thread ha un lock globale di lettura"
+        jpn "グローバルリードロックを保持している間は、DROP DATABASE を実行できません。"
+        nla "DROP DATABASE niet toegestaan terwijl thread een globale 'read lock' bezit"
+        por "DROP DATABASE não permitido enquanto uma 'thread' está mantendo um travamento global de leitura"
+        rus "Не допускается DROP DATABASE, пока поток держит глобальную блокировку чтения"
+        serbian "Komanda 'DROP DATABASE' nije dozvoljena dok thread globalno zaključava čitanje podataka"
+        spa "DROP DATABASE no permitido mientras un hilo (thread) está ejerciendo un bloqueo de lectura global"
+        swe "DROP DATABASE är inte tillåtet när man har ett globalt läslås"
+        ukr "DROP DATABASE не дозволено доки гілка перебуває під загальним блокуванням читання"
+ER_CREATE_DB_WITH_READ_LOCK  
+        chi "线程持有全局读锁时,不允许创建数据库"
+        dan "CREATE DATABASE er ikke tilladt mens en tråd holder på globalt read lock"
+        eng "CREATE DATABASE not allowed while thread is holding global read lock"
+        est "CREATE DATABASE ei ole lubatud kui lõim omab globaalset READ lukku"
+        fre "CREATE DATABASE n'est pas autorisée pendant qu'une tâche possède un verrou global en lecture"
+        ger "CREATE DATABASE ist nicht erlaubt, solange der Thread eine globale Lesesperre hält"
+        geo "CREATE DATABASE დაუშვებელია, სანამ ნაკადი კითხვის გლობალურ ბლოკს შეიცავს"
+        ita "CREATE DATABASE non e' permesso mentre il thread ha un lock globale di lettura"
+        jpn "グローバルリードロックを保持している間は、CREATE DATABASE を実行できません。"
+        nla "CREATE DATABASE niet toegestaan terwijl thread een globale 'read lock' bezit"
+        por "CREATE DATABASE não permitido enquanto uma 'thread' está mantendo um travamento global de leitura"
+        rus "Не допускается CREATE DATABASE, пока поток держит глобальную блокировку чтения"
+        serbian "Komanda 'CREATE DATABASE' nije dozvoljena dok thread globalno zaključava čitanje podataka"
+        spa "CREATE DATABASE no permitido mientras un hilo (thread) está manteniendo un bloqueo de lectura global"
+        swe "CREATE DATABASE är inte tillåtet när man har ett globalt läslås"
+        ukr "CREATE DATABASE не дозволено доки гілка перебуває під загальним блокуванням читання"
+ER_WRONG_ARGUMENTS  
+        chi "%s的参数不正确"
+        eng "Incorrect arguments to %s"
+        est "Vigased parameetrid %s-le"
+        fre "Mauvais arguments à %s"
+        ger "Falsche Argumente für %s"
+        geo "%s-ის არგუმენტები არასწორია"
+        hindi "%s को गलत आर्ग्यूमेंट्स"
+        ita "Argomenti errati a %s"
+        jpn "%s の引数が不正です"
+        nla "Foutieve parameters voor %s"
+        por "Argumentos errados para %s"
+        rus "Неверные параметры для %s"
+        serbian "Pogrešni argumenti prosleđeni na %s"
+        spa "Argumentos incorrectos para %s"
+        swe "Felaktiga argument till %s"
+        ukr "Хибний аргумент для %s"
+ER_NO_PERMISSION_TO_CREATE_USER 42000 
+        chi "'%s'@'%s'不允许创建新用户"
+        eng "'%s'@'%s' is not allowed to create new users"
+        est "Kasutajal '%s'@'%s' ei ole lubatud luua uusi kasutajaid"
+        fre "'%s'@'%s' n'est pas autorisé à créer de nouveaux utilisateurs"
+        ger "'%s'@'%s' ist nicht berechtigt, neue Benutzer hinzuzufügen"
+        geo "'%s'@'%s'-ს ახალი მომხმარებლების შექმნის უფლება არ აქვს"
+        hindi "'%s'@'%s' को नए यूज़र्स बनाने की अनुमति नहीं है"
+        ita "A '%s'@'%s' non e' permesso creare nuovi utenti"
+        nla "'%s'@'%s' mag geen nieuwe gebruikers creeren"
+        por "Não é permitido a '%s'@'%s' criar novos usuários"
+        rus "'%s'@'%s' не разрешается создавать новых пользователей"
+        serbian "Korisniku '%s'@'%s' nije dozvoljeno da kreira nove korisnike"
+        spa "'%s'@'%s' no está permitido para crear nuevos usuarios"
+        swe "'%s'@'%s' har inte rättighet att skapa nya användare"
+        ukr "Користувачу '%s'@'%s' не дозволено створювати нових користувачів"
+ER_UNION_TABLES_IN_DIFFERENT_DIR  
+        chi "表定义不正确;所有合并表必须在同一数据库中"
+        eng "Incorrect table definition; all MERGE tables must be in the same database"
+        est "Vigane tabelimääratlus; kõik MERGE tabeli liikmed peavad asuma samas andmebaasis"
+        fre "Définition de table incorrecte; toutes les tables MERGE doivent être dans la même base de donnée"
+        ger "Falsche Tabellendefinition. Alle MERGE-Tabellen müssen sich in derselben Datenbank befinden"
+        geo "ცხრილის არასწორი აღწერა. ყველა MERGE ცხრილი ერთი და იგივე მონაცემთა ბაზაში უნდა იყოს"
+        ita "Definizione della tabella errata; tutte le tabelle di tipo MERGE devono essere nello stesso database"
+        jpn "不正な表定義です。MERGE表の構成表はすべて同じデータベース内になければなりません。"
+        nla "Incorrecte tabel definitie; alle MERGE tabellen moeten tot dezelfde database behoren"
+        por "Definição incorreta da tabela. Todas as tabelas contidas na junção devem estar no mesmo banco de dados"
+        rus "Неверное определение таблицы; Все таблицы в MERGE должны принадлежать одной и той же базе данных"
+        serbian "Pogrešna definicija tabele; sve 'MERGE' tabele moraju biti u istoj bazi podataka"
+        spa "Definición incorrecta de la tabla; todas las tablas MERGE deben de estar en la misma base de datos"
+        swe "Felaktig tabelldefinition; alla tabeller i en MERGE-tabell måste vara i samma databas"
+        ukr "Хибне визначення таблиці; всі MERGE-таблиці повинні належити до однієї бази ланних."
+ER_LOCK_DEADLOCK 40001 
+        chi "试图锁定时发现僵局;尝试重新启动事务"
+        eng "Deadlock found when trying to get lock; try restarting transaction"
+        est "Lukustamisel tekkis tupik (deadlock); alusta transaktsiooni otsast"
+        fre "Deadlock découvert en essayant d'obtenir les verrous : essayez de redémarrer la transaction"
+        ger "Beim Versuch, eine Sperre anzufordern, ist ein Deadlock aufgetreten. Versuchen Sie, die Transaktion neu zu starten"
+        geo "ჩიხი ბლოკის მიღების მცდელობისას. სცადეთ, ტრანზაქცია თავიდან გაუშვათ"
+        ita "Trovato deadlock durante il lock; Provare a far ripartire la transazione"
+        jpn "ロック取得中にデッドロックが検出されました。トランザクションを再試行してください。"
+        nla "Deadlock gevonden tijdens lock-aanvraag poging; Probeer herstart van de transactie"
+        por "Encontrado um travamento fatal (deadlock) quando tentava obter uma trava. Tente reiniciar a transação"
+        rus "Возникла тупиковая ситуация в процессе получения блокировки; Попробуйте перезапустить транзакцию"
+        serbian "Unakrsno zaključavanje pronađeno kada sam pokušao da dobijem pravo na zaključavanje; Probajte da restartujete transakciju"
+        spa "Encontrado estancamiento (deadlock) al intentar obtener el bloqueo; intente volver a comenzar la transacción"
+        swe "Fick 'DEADLOCK' vid låsförsök av block/rad. Försök att starta om transaktionen"
+        ukr "Взаємне блокування знайдено під час спроби отримати блокування; спробуйте перезапустити транзакцію."
+ER_TABLE_CANT_HANDLE_FT  
+        chi "存储引擎%s不支持fulltext索引"
+        eng "The storage engine %s doesn't support FULLTEXT indexes"
+        est "Antud tabelitüüp (%s) ei toeta FULLTEXT indekseid"
+        fre "Le type de table utilisé (%s) ne supporte pas les index FULLTEXT"
+        ger "Der verwendete Tabellentyp (%s) unterstützt keine FULLTEXT-Indizes"
+        geo "საცავის ძრავს (%s) FULLTEXT ინდექსების მხარდაჭერა არ გააჩნია"
+        hindi "स्टोरेज इंजन '%s' FULLTEXT इन्डेक्सेस को सपोर्ट नहीं करता"
+        ita "La tabella usata (%s) non supporta gli indici FULLTEXT"
+        nla "Het gebruikte tabel type (%s) ondersteund geen FULLTEXT indexen"
+        por "O tipo de tabela utilizado (%s) não suporta índices de texto completo (fulltext indexes)"
+        rus "Используемый тип таблиц (%s) не поддерживает полнотекстовых индексов"
+        serbian "Upotrebljeni tip tabele (%s) ne podržava 'FULLTEXT' indekse"
+        spa "El motor de almacenaje %s no soporta índices FULLTEXT"
+        swe "Tabelltypen (%s) har inte hantering av FULLTEXT-index"
+        ukr "Використаний тип таблиці (%s) не підтримує FULLTEXT індексів"
+ER_CANNOT_ADD_FOREIGN
+        chi "不能为`%s`添加外键约束"
+        eng "Cannot add foreign key constraint for `%s`"
+        fre "Impossible d'ajouter des contraintes d'index externe à `%s`"
+        ger "Fremdschlüssel-Beschränkung kann nicht hinzugefügt werden für `%s`"
+        geo "'%s'-სთვის გარე გასაღების შეზღუდვის დამატება შეუძლებელია"
+        ita "Impossibile aggiungere il vincolo di integrita' referenziale (foreign key constraint) a `%s`"
+        jpn "`%s` 外部キー制約を追加できません。"
+        nla "Kan foreign key beperking niet toevoegen vor `%s`"
+        por "Não pode acrescentar uma restrição de chave estrangeira para `%s`"
+        rus "Невозможно добавить ограничения внешнего ключа для `%s`"
+        serbian "Ne mogu da dodam proveru spoljnog ključa na `%s`"
+        spa "No puedo añadir restricción de clave foránea para `%s`"
+        swe "Kan inte lägga till 'FOREIGN KEY constraint' för `%s`'"
+        ukr "Не можу додати обмеження зовнішнього ключа Ha `%s`"
+ER_NO_REFERENCED_ROW 23000 
+        chi "无法添加或更新子行:外键约束失败"
+        eng "Cannot add or update a child row: a foreign key constraint fails"
+        fre "Impossible d'ajouter un enregistrement fils : une constrainte externe l'empèche"
+        ger "Hinzufügen oder Aktualisieren eines Kind-Datensatzes schlug aufgrund einer Fremdschlüssel-Beschränkung fehl"
+        geo "შვილი მწკრივის დამატება ან განახლება შეუძლებელია. გარე გასაღების შეზღუდვა არ მოწმდება"
+        greek "Cannot add a child row: a foreign key constraint fails"
+        hun "Cannot add a child row: a foreign key constraint fails"
+        ita "Impossibile aggiungere la riga: un vincolo d'integrita' referenziale non e' soddisfatto"
+        jpn "親キーがありません。外部キー制約違反です。"
+        nla "Kan onderliggende rij niet toevoegen: foreign key beperking gefaald"
+        norwegian-ny "Cannot add a child row: a foreign key constraint fails"
+        por "Não pode acrescentar uma linha filha: uma restrição de chave estrangeira falhou"
+        rus "Невозможно добавить или обновить дочернюю строку: проверка ограничений внешнего ключа не выполняется"
+        spa "No puedo añadir o actualizar una fila hija: ha fallado una restrición de clave foránea"
+        swe "FOREIGN KEY-konflikt:  Kan inte skriva barn"
+        ukr "Не вдається додати або оновити дочірній рядок: невдала перевірка обмеження зовнішнього ключа"
+ER_ROW_IS_REFERENCED 23000 
+        chi "无法删除或更新父行:外键约束失败"
+        eng "Cannot delete or update a parent row: a foreign key constraint fails"
+        fre "Impossible de supprimer un enregistrement père : une constrainte externe l'empèche"
+        ger "Löschen oder Aktualisieren eines Eltern-Datensatzes schlug aufgrund einer Fremdschlüssel-Beschränkung fehl"
+        geo "მშობელი მწკრივის წაშლა ან განახლება შეუძლებელია. გარე გასაღების შეზღუდვა არ მოწმდება"
+        greek "Cannot delete a parent row: a foreign key constraint fails"
+        hun "Cannot delete a parent row: a foreign key constraint fails"
+        ita "Impossibile cancellare la riga: un vincolo d'integrita' referenziale non e' soddisfatto"
+        jpn "子レコードがあります。外部キー制約違反です。"
+        por "Não pode apagar uma linha pai: uma restrição de chave estrangeira falhou"
+        rus "Невозможно удалить или обновить родительскую строку: проверка ограничений внешнего ключа не выполняется"
+        serbian "Ne mogu da izbrišem roditeljski slog: provera spoljnog ključa je neuspela"
+        spa "No puedo borrar o actualizar una fila padre: ha fallado una restrición de clave foránea"
+        swe "FOREIGN KEY-konflikt:  Kan inte radera fader"
+ER_CONNECT_TO_MASTER 08S01 
+        chi "连接master时出错:%-.128s"
+        eng "Error connecting to master: %-.128s"
+        ger "Fehler bei der Verbindung zum Master: %-.128s"
+        geo "მთავარ სერვერთან მიერთების შეცდომა: %-.128s"
+        ita "Errore durante la connessione al master: %-.128s"
+        jpn "マスターへの接続エラー: %-.128s"
+        nla "Fout bij opbouwen verbinding naar master: %-.128s"
+        por "Erro conectando com o master: %-.128s"
+        rus "Ошибка соединения с головным сервером: %-.128s"
+        spa "Error conectando al maestro (master): %-.128s"
+        swe "Fick fel vid anslutning till master: %-.128s"
+ER_QUERY_ON_MASTER  
+        chi "在Master上运行查询时出错:%-.128s"
+        eng "Error running query on master: %-.128s"
+        ger "Beim Ausführen einer Abfrage auf dem Master trat ein Fehler auf: %-.128s"
+        geo "მთავარ სერვერზე მოთხოვნის გაშვების შეცდომა: %-.128s"
+        ita "Errore eseguendo una query sul master: %-.128s"
+        jpn "マスターでのクエリ実行エラー: %-.128s"
+        nla "Fout bij uitvoeren query op master: %-.128s"
+        por "Erro rodando consulta no master: %-.128s"
+        rus "Ошибка выполнения запроса на головном сервере: %-.128s"
+        spa "Error ejecutando consulta (query) en maestro (master): %-.128s"
+        swe "Fick fel vid utförande av command på mastern: %-.128s"
+ER_ERROR_WHEN_EXECUTING_COMMAND  
+        chi "执行命令%s时出错:%-.128s"
+        eng "Error when executing command %s: %-.128s"
+        est "Viga käsu %s täitmisel: %-.128s"
+        ger "Fehler beim Ausführen des Befehls %s: %-.128s"
+        geo "შეცდომა ბრძანების (%s) შესრულებისას: %-.128s"
+        ita "Errore durante l'esecuzione del comando %s: %-.128s"
+        jpn "%s コマンドの実行エラー: %-.128s"
+        nla "Fout tijdens uitvoeren van commando %s: %-.128s"
+        por "Erro quando executando comando %s: %-.128s"
+        rus "Ошибка при выполнении команды %s: %-.128s"
+        serbian "Greška pri izvršavanju komande %s: %-.128s"
+        spa "Error al ejecutar comando %s: %-.128s"
+        swe "Fick fel vid utförande av %s: %-.128s"
+ER_WRONG_USAGE  
+        chi "%s和%s使用不正确"
+        eng "Incorrect usage of %s and %s"
+        est "Vigane %s ja %s kasutus"
+        ger "Falsche Verwendung von %s und %s"
+        geo "%s-ის და %s-ის არასწორი გამოყენება"
+        ita "Uso errato di %s e %s"
+        jpn "%s の %s に関する不正な使用法です。"
+        nla "Foutief gebruik van %s en %s"
+        por "Uso errado de %s e %s"
+        rus "Неверное использование %s и %s"
+        serbian "Pogrešna upotreba %s i %s"
+        spa "Uso incorrecto de %s y %s"
+        swe "Felaktig använding av %s and %s"
+        ukr "Wrong usage of %s and %s"
+ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT 21000 
+        chi "使用的SELECT语句具有不同数量的列"
+        eng "The used SELECT statements have a different number of columns"
+        est "Tulpade arv kasutatud SELECT lausetes ei kattu"
+        ger "Die verwendeten SELECT-Befehle liefern unterschiedliche Anzahlen von Feldern zurück"
+        geo "SELECT-ის გამოყენებულ ოპერატორებს სხვადასხვა რაოდენობის სვეტები გააჩნიათ"
+        ita "La SELECT utilizzata ha un numero di colonne differente"
+        jpn "使用のSELECT文が返す列数が違います。"
+        nla "De gebruikte SELECT commando's hebben een verschillend aantal kolommen"
+        por "Os comandos SELECT usados têm diferente número de colunas"
+        rus "Использованные операторы выборки (SELECT) дают разное количество столбцов"
+        serbian "Upotrebljene 'SELECT' komande adresiraju različit broj kolona"
+        spa "Las sentencias SELECT usadas tienen un número diferente de columnas"
+        swe "SELECT-kommandona har olika antal kolumner"
+ER_CANT_UPDATE_WITH_READLOCK  
+        chi "无法执行查询,因为您有冲突的读锁"
+        eng "Can't execute the query because you have a conflicting read lock"
+        est "Ei suuda täita päringut konfliktse luku tõttu"
+        ger "Augrund eines READ-LOCK-Konflikts kann die Abfrage nicht ausgeführt werden"
+        geo "მოთხოვნის შესრულება შეუძლებელია, რადგან თქვენი წაკითხვის ბლოკები კონფლიქტურია"
+        ita "Impossibile eseguire la query perche' c'e' un conflitto con in lock di lettura"
+        jpn "競合するリードロックを保持しているので、クエリを実行できません。"
+        nla "Kan de query niet uitvoeren vanwege een conflicterende read lock"
+        por "Não posso executar a consulta porque você tem um conflito de travamento de leitura"
+        rus "Невозможно исполнить запрос, поскольку у вас установлены конфликтующие блокировки чтения"
+        serbian "Ne mogu da izvršim upit zbog toga što imate zaključavanja čitanja podataka u konfliktu"
+        spa "No puedo ejecutar la consulta (query) porque vd tiene un conflicto de bloqueo de lectura"
+        swe "Kan inte utföra kommandot emedan du har ett READ-lås"
+ER_MIXING_NOT_ALLOWED  
+        chi "事务和非事务表的混合被禁用"
+        eng "Mixing of transactional and non-transactional tables is disabled"
+        est "Transaktsioone toetavate ning mittetoetavate tabelite kooskasutamine ei ole lubatud"
+        ger "Die gleichzeitige Verwendung von Tabellen mit und ohne Transaktionsunterstützung ist deaktiviert"
+        geo "ტრანზაქციური და არატრანზაქციური ცხრილების შერევა აკრძალულია"
+        ita "E' disabilitata la possibilita' di mischiare tabelle transazionali e non-transazionali"
+        jpn "トランザクション対応の表と非対応の表の同時使用は無効化されています。"
+        nla "Het combineren van transactionele en niet-transactionele tabellen is uitgeschakeld"
+        por "Mistura de tabelas transacional e não-transacional está desabilitada"
+        rus "Использование транзакционных таблиц наряду с нетранзакционными запрещено"
+        serbian "Mešanje tabela koje podržavaju transakcije i onih koje ne podržavaju transakcije je isključeno"
+        spa "Desactivada la mezcla de tablas transaccionales y no transaccionales"
+        swe "Blandning av transaktionella och icke-transaktionella tabeller är inaktiverat"
+ER_DUP_ARGUMENT  
+        chi "选项'%s'在语句中使用两次"
+        eng "Option '%s' used twice in statement"
+        est "Määrangut '%s' on lauses kasutatud topelt"
+        ger "Option '%s' wird im Befehl zweimal verwendet"
+        geo "გამოსახულებაში პარამეტრი '%s' ორჯერ გამოიყენება"
+        ita "L'opzione '%s' e' stata usata due volte nel comando"
+        jpn "オプション '%s' が2度使用されています。"
+        nla "Optie '%s' tweemaal gebruikt in opdracht"
+        por "Opção '%s' usada duas vezes no comando"
+        rus "Опция '%s' дважды использована в выражении"
+        spa "Opción '%s' usada dos veces en la sentencia"
+        swe "Option '%s' användes två gånger"
+ER_USER_LIMIT_REACHED 42000 
+        chi "用户'%-.64s'已超过'%s'资源(当前值:%ld)"
+        eng "User '%-.64s' has exceeded the '%s' resource (current value: %ld)"
+        ger "Benutzer '%-.64s' hat die Ressourcenbeschränkung '%s' überschritten (aktueller Wert: %ld)"
+        geo "მომხმარებელი '%-.64s' აჭარბებს '%s' რესურსის გამოყენებას (მიმდინარე მნიშვნელობაა %ld)"
+        ita "L'utente '%-.64s' ha ecceduto la risorsa '%s' (valore corrente: %ld)"
+        jpn "ユーザー '%-.64s' はリソースの上限 '%s' に達しました。(現在値: %ld)"
+        nla "Gebruiker '%-.64s' heeft het maximale gebruik van de '%s' faciliteit overschreden (huidige waarde: %ld)"
+        por "Usuário '%-.64s' tem excedido o '%s' recurso (atual valor: %ld)"
+        rus "Пользователь '%-.64s' превысил использование ресурса '%s' (текущее значение: %ld)"
+        spa "El usuario '%-.64s' ha excedido el recurso '%s' (valor actual: %ld)"
+        swe "Användare '%-.64s' har överskridit '%s' (nuvarande värde: %ld)"
+ER_SPECIFIC_ACCESS_DENIED_ERROR 42000 
+        chi "拒绝访问;您需要(至少一个)%-.128s特权用于此操作"
+        eng "Access denied; you need (at least one of) the %-.128s privilege(s) for this operation"
+        ger "Kein Zugriff. Hierfür wird die Berechtigung %-.128s benötigt"
+        geo "წვდომა აკრძალულია. ამ ოპერაციისთვის საჭიროა %-.128s პრივილეგია"
+        ita "Accesso non consentito. Serve il privilegio %-.128s per questa operazione"
+        jpn "アクセスは拒否されました。この操作には %-.128s 権限が(複数の場合はどれか1つ)必要です。"
+        nla "Toegang geweigerd. U moet het %-.128s privilege hebben voor deze operatie"
+        por "Acesso negado. Você precisa o privilégio %-.128s para essa operação"
+        rus "В доступе отказано. Вам нужны привилегии %-.128s для этой операции"
+        spa "Acceso denegado. Usted necesita (al menos un(os)) privilegio(s) %-.128s para esta operación"
+        swe "Du har inte privlegiet '%-.128s' som behövs för denna operation"
+        ukr "Access denied. You need the %-.128s privilege for this operation"
+ER_LOCAL_VARIABLE  
+        chi "变量'%-.64s'是一个SESSION变量,不能与Set Global一起使用"
+        eng "Variable '%-.64s' is a SESSION variable and can't be used with SET GLOBAL"
+        ger "Variable '%-.64s' ist eine lokale Variable und kann nicht mit SET GLOBAL verändert werden"
+        geo "ცვლადი %-.64s SESSION-ის ცვლადია და SET GLOBAL-ის საშუალებით ვერ შეიცვლება"
+        ita "La variabile '%-.64s' e' una variabile locale ( SESSION ) e non puo' essere cambiata usando SET GLOBAL"
+        jpn "変数 '%-.64s' はセッション変数です。SET GLOBALでは使用できません。"
+        nla "Variabele '%-.64s' is SESSION en kan niet worden gebruikt met SET GLOBAL"
+        por "Variável '%-.64s' é uma SESSION variável e não pode ser usada com SET GLOBAL"
+        rus "Переменная '%-.64s' является потоковой (SESSION) переменной и не может быть изменена с помощью SET GLOBAL"
+        spa "La variable '%-.64s' es una variable de SESSION y no puede ser usada con SET GLOBAL"
+        swe "Variabel '%-.64s' är en SESSION variabel och kan inte ändrad med SET GLOBAL"
+ER_GLOBAL_VARIABLE  
+        chi "变量'%-.64s'是全局变量,应该用SET GLOBAL设置"
+        eng "Variable '%-.64s' is a GLOBAL variable and should be set with SET GLOBAL"
+        ger "Variable '%-.64s' ist eine globale Variable und muss mit SET GLOBAL verändert werden"
+        geo "ცვლადი %-.64s გლობალური (GLOBAL) ცვლადია და SET GLOBAL-ით უნდა შეიცვალოს"
+        ita "La variabile '%-.64s' e' una variabile globale ( GLOBAL ) e deve essere cambiata usando SET GLOBAL"
+        jpn "変数 '%-.64s' はグローバル変数です。SET GLOBALを使用してください。"
+        nla "Variabele '%-.64s' is GLOBAL en dient te worden gewijzigd met SET GLOBAL"
+        por "Variável '%-.64s' é uma GLOBAL variável e deve ser configurada com SET GLOBAL"
+        rus "Переменная '%-.64s' является глобальной (GLOBAL) переменной, и ее следует изменять с помощью SET GLOBAL"
+        spa "La variable '%-.64s' es una variable GLOBAL y debería de ser configurada con SET GLOBAL"
+        swe "Variabel '%-.64s' är en GLOBAL variabel och bör sättas med SET GLOBAL"
+ER_NO_DEFAULT 42000 
+        chi "变量'%-.64s'没有默认值"
+        eng "Variable '%-.64s' doesn't have a default value"
+        ger "Variable '%-.64s' hat keinen Vorgabewert"
+        geo "ცვლადს %-.64s ნაგულისხმები მნიშვნელობა არ გააჩნია"
+        ita "La variabile '%-.64s' non ha un valore di default"
+        jpn "変数 '%-.64s' にはデフォルト値がありません。"
+        nla "Variabele '%-.64s' heeft geen standaard waarde"
+        por "Variável '%-.64s' não tem um valor padrão"
+        rus "Переменная '%-.64s' не имеет значения по умолчанию"
+        spa "La variable '%-.64s' no tiene un valor por defecto"
+        swe "Variabel '%-.64s' har inte ett DEFAULT-värde"
+ER_WRONG_VALUE_FOR_VAR 42000 
+        chi "变量'%-.64s'无法设置为'%-.200T'的值"
+        eng "Variable '%-.64s' can't be set to the value of '%-.200T'"
+        ger "Variable '%-.64s' kann nicht auf '%-.200T' gesetzt werden"
+        geo "ცვლადს '%-.64s' მნიშვნელობას '%-.200T' ვერ მიანიჭებთ"
+        ita "Alla variabile '%-.64s' non puo' essere assegato il valore '%-.200T'"
+        jpn "変数 '%-.64s' に値 '%-.200T' を設定できません。"
+        nla "Variabele '%-.64s' kan niet worden gewijzigd naar de waarde '%-.200T'"
+        por "Variável '%-.64s' não pode ser configurada para o valor de '%-.200T'"
+        rus "Переменная '%-.64s' не может быть установлена в значение '%-.200T'"
+        spa "La variable '%-.64s' no puede ser configurada para el valor de '%-.200T'"
+        swe "Variabel '%-.64s' kan inte sättas till '%-.200T'"
+ER_WRONG_TYPE_FOR_VAR 42000 
+        chi "变量'%-.64s'的参数类型不正确"
+        eng "Incorrect argument type to variable '%-.64s'"
+        ger "Falscher Argumenttyp für Variable '%-.64s'"
+        geo "არასწორი არგუმენტის ტიპი ცვლადისთვის '%-.64s'"
+        ita "Tipo di valore errato per la variabile '%-.64s'"
+        jpn "変数 '%-.64s' への値の型が不正です。"
+        nla "Foutief argumenttype voor variabele '%-.64s'"
+        por "Tipo errado de argumento para variável '%-.64s'"
+        rus "Неверный тип аргумента для переменной '%-.64s'"
+        spa "Tipo de argumento incorrecto para variable '%-.64s'"
+        swe "Fel typ av argument till variabel '%-.64s'"
+ER_VAR_CANT_BE_READ  
+        chi "变量'%-.64s'只能设置,不能读"
+        eng "Variable '%-.64s' can only be set, not read"
+        ger "Variable '%-.64s' kann nur verändert, nicht gelesen werden"
+        geo "ცვლადი '%-.64s' შეგიძლიათ მხოლოდ დააყენოთ, მისი წაკითხვა შეუძლებელია"
+        ita "Alla variabile '%-.64s' e' di sola scrittura quindi puo' essere solo assegnato un valore, non letto"
+        jpn "変数 '%-.64s' は書き込み専用です。読み込みはできません。"
+        nla "Variabele '%-.64s' kan alleen worden gewijzigd, niet gelezen"
+        por "Variável '%-.64s' somente pode ser configurada, não lida"
+        rus "Переменная '%-.64s' может быть только установлена, но не считана"
+        spa "La variable '%-.64s' solamente puede ser configurada, no leída"
+        swe "Variabeln '%-.64s' kan endast sättas, inte läsas"
+ER_CANT_USE_OPTION_HERE 42000 
+        chi "'%s'的使用/放置不正确"
+        eng "Incorrect usage/placement of '%s'"
+        ger "Falsche Verwendung oder Platzierung von '%s'"
+        geo "'%s' არასწორად გამოიყენება ან არასწორ ადგილასაა მითითებული"
+        ita "Uso/posizione di '%s' sbagliato"
+        jpn "'%s' の使用法または場所が不正です。"
+        nla "Foutieve toepassing/plaatsing van '%s'"
+        por "Errado uso/colocação de '%s'"
+        rus "Неверное использование или в неверном месте указан '%s'"
+        spa "Incorrecto uso/colocación de '%s'"
+        swe "Fel använding/placering av '%s'"
+ER_NOT_SUPPORTED_YET 42000 
+        chi "此版本的MariaDB尚未支持'%s'"
+        eng "This version of MariaDB doesn't yet support '%s'"
+        ger "Diese MariaDB-Version unterstützt '%s' nicht"
+        geo "MariaDB-ის ამ ვერსიას '%s'-ის მხარდაჭერა ჯერ არ გააჩნია"
+        ita "Questa versione di MariaDB non supporta ancora '%s'"
+        jpn "このバージョンのMariaDBでは、まだ '%s' を利用できません。"
+        nla "Deze versie van MariaDB ondersteunt nog geen '%s'"
+        por "Esta versão de MariaDB não suporta ainda '%s'"
+        rus "Эта версия MariaDB пока еще не поддерживает '%s'"
+        spa "Esta versión de MariaDB no soporta todavía '%s'"
+        swe "Denna version av MariaDB kan ännu inte utföra '%s'"
+ER_MASTER_FATAL_ERROR_READING_BINLOG  
+        chi "从二进制日志读取数据时,从master遇到致命错误%d:'%-.320s'"
+        eng "Got fatal error %d from master when reading data from binary log: '%-.320s'"
+        ger "Schwerer Fehler %d: '%-.320s vom Master beim Lesen des binären Logs"
+        geo "ფატალური შეცდომა %d მთავარი სერვერიდან ბინარული ჟურნალიდან მონაცემების კითხვისას: '%-.320s'"
+        ita "Errore fatale %d: '%-.320s' dal master leggendo i dati dal log binario"
+        jpn "致命的なエラー %d: '%-.320s' がマスターでバイナリログ読み込み中に発生しました。"
+        nla "Kreeg fatale fout %d: '%-.320s' van master tijdens lezen van data uit binaire log"
+        por "Obteve fatal erro %d: '%-.320s' do master quando lendo dados do binary log"
+        rus "Получена неисправимая ошибка %d: '%-.320s' от головного сервера в процессе выборки данных из двоичного журнала"
+        spa "Obtenido error fatal %d del maestro (master) al leer datos del historial (log) binario: '%-.320s'"
+        swe "Fick fatalt fel %d: '%-.320s' från master vid läsning av binärloggen"
+ER_SLAVE_IGNORED_TABLE  
+        chi "由于复制replicate-*-table规则,Slave SQL线程忽略了查询"
+        eng "Slave SQL thread ignored the query because of replicate-*-table rules"
+        ger "Slave-SQL-Thread hat die Abfrage aufgrund von replicate-*-table-Regeln ignoriert"
+        geo "დამორჩილებულმა SQL ნაკადმა replicate-*-table წესების გამო მოთხოვნა გამოტოვა"
+        jpn "replicate-*-table ルールに従って、スレーブSQLスレッドはクエリを無視しました。"
+        nla "Slave SQL thread negeerde de query vanwege replicate-*-table opties"
+        por "Slave SQL thread ignorado a consulta devido às normas de replicação-*-tabela"
+        spa "El hilo (thread) SQL esclavo ha ignorado la consulta (query) debido a las reglas de replicar-*-tabla"
+        swe "Slav SQL tråden ignorerade frågan pga en replicate-*-table regel"
+ER_INCORRECT_GLOBAL_LOCAL_VAR  
+        chi "变量'%-.192s'是一个%s变量"
+        eng "Variable '%-.192s' is a %s variable"
+        ger "Variable '%-.192s' ist eine %s-Variable"
+        geo "ცვლადი '%-.192s' %s ცვლადია"
+        jpn "変数 '%-.192s' は %s 変数です。"
+        nla "Variabele '%-.192s' is geen %s variabele"
+        serbian "Promenljiva '%-.192s' je %s promenljiva"
+        spa "La variable '%-.192s' es una variable %s"
+        swe "Variabel '%-.192s' är av typ %s"
+ER_WRONG_FK_DEF 42000 
+        chi "'%-.192s'的外键定义不正确:%s"
+        eng "Incorrect foreign key definition for '%-.192s': %s"
+        ger "Falsche Fremdschlüssel-Definition für '%-.192s': %s"
+        geo "არასწორი გარე გასაღების აღწერა '%-.192s'-სთვის: %s"
+        jpn "外部キー '%-.192s' の定義の不正: %s"
+        nla "Incorrecte foreign key definitie voor '%-.192s': %s"
+        por "Definição errada da chave estrangeira para '%-.192s': %s"
+        spa "Definición de clave foránea incorrecta para '%-.192s': %s"
+        swe "Felaktig FOREIGN KEY-definition för '%-.192s': %s"
+ER_KEY_REF_DO_NOT_MATCH_TABLE_REF  
+        chi "索引参考和表参考不匹配"
+        eng "Key reference and table reference don't match"
+        ger "Schlüssel- und Tabellenverweis passen nicht zusammen"
+        geo "გასაღების მიმართვა და ცხრილის მიმართვა ერთმანეთს არ ემთხვევა"
+        jpn "外部キーの参照表と定義が一致しません。"
+        nla "Sleutel- en tabelreferentie komen niet overeen"
+        por "Referência da chave e referência da tabela não coincidem"
+        spa "La referencia de clave y la referencia de tabla no coinciden"
+        swe "Nyckelreferensen och tabellreferensen stämmer inte överens"
+ER_OPERAND_COLUMNS 21000 
+        chi "操作数应包含%d列"
+        eng "Operand should contain %d column(s)"
+        ger "Operand sollte %d Spalte(n) enthalten"
+        geo "ოპერანდი %d სვეტს უნდა შეიცავდეს"
+        jpn "オペランドに %d 個の列が必要です。"
+        nla "Operand behoort %d kolommen te bevatten"
+        rus "Операнд должен содержать %d колонок"
+        spa "El operando debería de contener %d columna(s)"
+        ukr "Операнд має складатися з %d стовбців"
+ER_SUBQUERY_NO_1_ROW 21000 
+        chi "子查询返回超过1行"
+        eng "Subquery returns more than 1 row"
+        ger "Unterabfrage lieferte mehr als einen Datensatz zurück"
+        geo "ქვემოთხოვნა ერთ ჩანაწერზე მეტს აბრუნებს"
+        jpn "サブクエリが2行以上の結果を返します。"
+        nla "Subquery retourneert meer dan 1 rij"
+        por "Subconsulta retorna mais que 1 registro"
+        rus "Подзапрос возвращает более одной записи"
+        spa "La subconsulta (subquery) devuelve más de 1 fila"
+        swe "Subquery returnerade mer än 1 rad"
+        ukr "Підзапит повертає більш нiж 1 запис"
+ER_UNKNOWN_STMT_HANDLER  
+        chi "未知prepared statement处理程序(%.*s)给予%s"
+        dan "Unknown prepared statement handler (%.*s) given to %s"
+        eng "Unknown prepared statement handler (%.*s) given to %s"
+        ger "Unbekannter Prepared-Statement-Handler (%.*s) für %s angegeben"
+        geo "უცნობი მომზადებული გამოსახულების დამმუშავებელი (%.*s) გადაცემულია %s-სთვის"
+        jpn "'%.*s' はプリペアードステートメントの不明なハンドルです。(%s で指定されました)"
+        nla "Onebekende prepared statement handler (%.*s) voor %s aangegeven"
+        por "Desconhecido manipulador de declaração preparado (%.*s) determinado para %s"
+        spa "Manejador desconocido de sentencia preparada (%.*s) dado para %s"
+        swe "Okänd PREPARED STATEMENT id (%.*s) var given till %s"
+        ukr "Unknown prepared statement handler (%.*s) given to %s"
+ER_CORRUPT_HELP_DB  
+        chi "帮助数据库已损坏或不存在"
+        eng "Help database is corrupt or does not exist"
+        ger "Die Hilfe-Datenbank ist beschädigt oder existiert nicht"
+        geo "დახმარების ბაზა დაზიანებულია ან არ არსებობს"
+        jpn "ヘルプデータベースは壊れているか存在しません。"
+        nla "Help database is beschadigd of bestaat niet"
+        por "Banco de dado de ajuda corrupto ou não existente"
+        spa "O la Base de datos de Ayuda está corrupta o no existe"
+        swe "Hjälpdatabasen finns inte eller är skadad"
+ER_CYCLIC_REFERENCE  
+        chi "亚查询的死环参考"
+        eng "Cyclic reference on subqueries"
+        ger "Zyklischer Verweis in Unterabfragen"
+        geo "ციკლური მიმართვა ქვემოთხოვნაზე"
+        jpn "サブクエリの参照がループしています。"
+        nla "Cyclische verwijzing in subqueries"
+        por "Referência cíclica em subconsultas"
+        rus "Циклическая ссылка на подзапрос"
+        spa "Referencia cíclica en subconsultas (subqueries)"
+        swe "Cyklisk referens i subqueries"
+        ukr "Циклічне посилання на підзапит"
+ER_AUTO_CONVERT  
+        chi "将列'%s'从%s转换为%s"
+        eng "Converting column '%s' from %s to %s"
+        ger "Feld '%s' wird von %s nach %s umgewandelt"
+        geo "'%s' სვეტის გარდაქმნა '%s'-დან '%s'-მდე"
+        jpn "列 '%s' を %s から %s へ変換します。"
+        nla "Veld '%s' wordt van %s naar %s geconverteerd"
+        por "Convertendo coluna '%s' de %s para %s"
+        rus "Преобразование поля '%s' из %s в %s"
+        spa "Convirtiendo la columna '%s' de %s a %s"
+        swe "Konvertar kolumn '%s' från %s till %s"
+        ukr "Перетворення стовбца '%s' з %s у %s"
+ER_ILLEGAL_REFERENCE 42S22 
+        chi "参考'%-.64s'不支持(%s)"
+        eng "Reference '%-.64s' not supported (%s)"
+        ger "Verweis '%-.64s' wird nicht unterstützt (%s)"
+        geo "მიმართვა '%-.64s' მხარდაუჭერელია (%s)"
+        jpn "'%-.64s' の参照はできません。(%s)"
+        nla "Verwijzing '%-.64s' niet ondersteund (%s)"
+        por "Referência '%-.64s' não suportada (%s)"
+        rus "Ссылка '%-.64s' не поддерживается (%s)"
+        spa "Referencia '%-.64s' no soportada (%s)"
+        swe "Referens '%-.64s' stöds inte (%s)"
+        ukr "Посилання '%-.64s' не пiдтримуется (%s)"
+ER_DERIVED_MUST_HAVE_ALIAS 42000 
+        chi "每个派生的表必须有自己的别名"
+        eng "Every derived table must have its own alias"
+        ger "Für jede abgeleitete Tabelle muss ein eigener Alias angegeben werden"
+        geo "ყოველ მიღებულ ცხრილს საკუთარი ფსევდონიმი უნდა ჰქონდეს"
+        jpn "導出表には別名が必須です。"
+        nla "Voor elke afgeleide tabel moet een unieke alias worden gebruikt"
+        por "Cada tabela derivada deve ter seu próprio alias"
+        spa "Cada tabla derivada debe de tener su propio alias"
+        swe "Varje 'derived table' måste ha sitt eget alias"
+ER_SELECT_REDUCED 01000 
+        chi "SELECT %u在优化期间被减"
+        eng "Select %u was reduced during optimization"
+        ger "Select %u wurde während der Optimierung reduziert"
+        geo "Select %u ოპტიმიზაციის პროცესში შემცირდა"
+        jpn "Select %u は最適化によって減らされました。"
+        nla "Select %u werd geredureerd tijdens optimtalisatie"
+        por "Select %u foi reduzido durante otimização"
+        rus "Select %u был упразднен в процессе оптимизации"
+        spa "La selección %u fué reducida durante optimización"
+        swe "Select %u reducerades vid optimiering"
+        ukr "Select %u was скасовано при оптимiзацii"
+ER_TABLENAME_NOT_ALLOWED_HERE 42000 
+        chi "表'%-.192s'从其中一个SELECT中不能用于%-.32s"
+        eng "Table '%-.192s' from one of the SELECTs cannot be used in %-.32s"
+        ger "Tabelle '%-.192s', die in einem der SELECT-Befehle verwendet wurde, kann nicht in %-.32s verwendet werden"
+        geo "ცხრილი '%-.192s' ერთ-ერთი SELECT-დან არ შეიძლება %-.32s-ში გამოიყენოთ"
+        jpn "特定のSELECTのみで使用の表 '%-.192s' は %-.32s では使用できません。"
+        nla "Tabel '%-.192s' uit een van de SELECTS kan niet in %-.32s gebruikt worden"
+        por "Tabela '%-.192s' de um dos SELECTs não pode ser usada em %-.32s"
+        spa "La tabla '%-.192s' de uno de los SELECT no puede ser usada en %-.32s"
+        swe "Tabell '%-.192s' från en SELECT kan inte användas i %-.32s"
+ER_NOT_SUPPORTED_AUTH_MODE 08004 
+        chi "客户端不支持服务器请求的身份验证协议;考虑升级MariaDB客户端"
+        eng "Client does not support authentication protocol requested by server; consider upgrading MariaDB client"
+        ger "Client unterstützt das vom Server erwartete Authentifizierungsprotokoll nicht. Bitte aktualisieren Sie Ihren MariaDB-Client"
+        geo "კლიენტს სერვერის მიერ მოთხოვნილი ავთენტიკაციის პროტოკოლის მხარდაჭერა არ გააჩნია. სცადეთ, განაახლოთ MariaDB-ის კლიენტი"
+        jpn "クライアントはサーバーが要求する認証プロトコルに対応できません。MariaDBクライアントのアップグレードを検討してください。"
+        nla "Client ondersteunt het door de server verwachtte authenticatieprotocol niet. Overweeg een nieuwere MariaDB client te gebruiken"
+        por "Cliente não suporta o protocolo de autenticação exigido pelo servidor; considere a atualização do cliente MariaDB"
+        spa "El cliente no soporta protocolo de autenticación requerido por el servidor; considere mejorar el cliente MariaDB"
+        swe "Klienten stöder inte autentiseringsprotokollet som begärts av servern; överväg uppgradering av klientprogrammet"
+ER_SPATIAL_CANT_HAVE_NULL 42000 
+        chi "SPATIAL索引的所有部分必须不为null"
+        eng "All parts of a SPATIAL index must be NOT NULL"
+        ger "Alle Teile eines SPATIAL-Index müssen als NOT NULL deklariert sein"
+        geo "SPATIAL ინდექსის ყველა ნაწილი NOT NULL უნდა იყოს"
+        jpn "空間索引のキー列は NOT NULL でなければいけません。"
+        nla "Alle delete van een  SPATIAL index dienen als NOT NULL gedeclareerd te worden"
+        por "Todas as partes de uma SPATIAL index devem ser NOT NULL"
+        spa "Todas las partes de un índice SPATIAL deben de ser NOT NULL"
+        swe "Alla delar av en SPATIAL index måste vara NOT NULL"
+ER_COLLATION_CHARSET_MISMATCH 42000 
+        chi "COLLATION'%s'无效地用于字符集'%s'"
+        eng "COLLATION '%s' is not valid for CHARACTER SET '%s'"
+        ger "COLLATION '%s' ist für CHARACTER SET '%s' ungültig"
+        geo "COLLATION '%s' არასწორია CHARACTER SET '%s'-სთვის"
+        jpn "COLLATION '%s' は CHARACTER SET '%s' に適用できません。"
+        nla "COLLATION '%s' is niet geldig voor CHARACTER SET '%s'"
+        por "COLLATION '%s' não é válida para CHARACTER SET '%s'"
+        spa "El COTEJO (COLLATION) '%s' no es válido para CHARACTER SET '%s'"
+        swe "COLLATION '%s' är inte tillåtet för CHARACTER SET '%s'"
+ER_SLAVE_WAS_RUNNING  
+        chi "Slave已经在运行"
+        eng "Slave is already running"
+        ger "Slave läuft bereits"
+        geo "დამორჩილებული ნაკადი უკვე გაშვებულია"
+        jpn "スレーブはすでに稼働中です。"
+        nla "Slave is reeds actief"
+        por "O slave já está rodando"
+        spa "El esclavo ya está funcionando"
+        swe "Slaven har redan startat"
+ER_SLAVE_WAS_NOT_RUNNING  
+        chi "slave已经停止了"
+        eng "Slave already has been stopped"
+        ger "Slave wurde bereits angehalten"
+        geo "დამორჩილებული ნაკადი უკვე გაჩერებულია"
+        jpn "スレーブはすでに停止しています。"
+        nla "Slave is reeds gestopt"
+        por "O slave já está parado"
+        spa "El esclavo ya fué parado"
+        swe "Slaven har redan stoppat"
+ER_TOO_BIG_FOR_UNCOMPRESS  
+        chi "未压缩的数据量太大;最大量为%d(可能未压缩数据的长度已损坏)"
+        eng "Uncompressed data size too large; the maximum size is %d (probably, length of uncompressed data was corrupted)"
+        ger "Unkomprimierte Daten sind zu groß. Die maximale Größe beträgt %d (wahrscheinlich wurde die Länge der unkomprimierten Daten beschädigt)"
+        geo "შუკუმშავი მონაცემები ძალიან დიდია. მაქსიმალური ზომაა %d (ალბათ გაშლილი მონაცემების ზომა დაზიანებული იყო)"
+        jpn "展開後のデータが大きすぎます。最大サイズは %d です。(展開後データの長さ情報が壊れている可能性もあります。)"
+        nla "Ongecomprimeerder data is te groot; de maximum lengte is %d (waarschijnlijk, de lengte van de gecomprimeerde data was beschadigd)"
+        por "Tamanho muito grande dos dados des comprimidos. O máximo tamanho é %d. (provavelmente, o comprimento dos dados descomprimidos está corrupto)"
+        spa "Tamaño demasiado grande para datos descomprimidos; el máximo tamaño es %d. (probablemente, el tamaño de datos descomprimidos fué corrompido)"
+ER_ZLIB_Z_MEM_ERROR  
+        chi "ZLIB:内存不足"
+        eng "ZLIB: Not enough memory"
+        ger "ZLIB: Nicht genug Speicher"
+        geo "ZLIP: არასაკმარისი მეხსიერება"
+        jpn "ZLIB: メモリ不足です。"
+        nla "ZLIB: Onvoldoende geheugen"
+        por "ZLIB: Não suficiente memória disponível"
+        spa "ZLIB: No hay suficiente memoria"
+ER_ZLIB_Z_BUF_ERROR  
+        chi "ZLIB:输出缓冲区中没有足够的空间(可能未压缩数据的长度已损坏)"
+        eng "ZLIB: Not enough room in the output buffer (probably, length of uncompressed data was corrupted)"
+        ger "ZLIB: Im Ausgabepuffer ist nicht genug Platz vorhanden (wahrscheinlich wurde die Länge der unkomprimierten Daten beschädigt)"
+        geo "ZLIP: გამოტანის ბაფერში საკმარისი ადგილი არაა (ალბათ გაშლილი მონაცემების ზომა დაზიანებული იყო)"
+        jpn "ZLIB: 出力バッファに十分な空きがありません。(展開後データの長さ情報が壊れている可能性もあります。)"
+        nla "ZLIB: Onvoldoende ruimte in uitgaande buffer (waarschijnlijk, de lengte van de ongecomprimeerde data was beschadigd)"
+        por "ZLIB: Não suficiente espaço no buffer emissor (provavelmente, o comprimento dos dados descomprimidos está corrupto)"
+        spa "ZLIB: No hay suficiente espacio en el búfer de salida (probablemente, el tamaño de datos descomprimidos fué corrompido)"
+ER_ZLIB_Z_DATA_ERROR  
+        chi "ZLIB:输入数据已损坏"
+        eng "ZLIB: Input data corrupted"
+        ger "ZLIB: Eingabedaten beschädigt"
+        geo "ZLIB: შეყვანილი მონაცემების დაზიანებულია"
+        jpn "ZLIB: 入力データが壊れています。"
+        nla "ZLIB: Invoer data beschadigd"
+        por "ZLIB: Dados de entrada está corrupto"
+        spa "ZLIB: Dato de entrada corrupto"
+ER_CUT_VALUE_GROUP_CONCAT  
+        chi "group_concat()削减了行%u. %s"
+        eng "Row %u was cut by %s)"
+        geo "მწკრივი %u გაჭრილია %s-ის მიერ)"
+        spa "La fila %u ha sido cortada por %s)"
+ER_WARN_TOO_FEW_RECORDS 01000 
+        chi "行%lu不包含所有列的数据"
+        eng "Row %lu doesn't contain data for all columns"
+        ger "Zeile %lu enthält nicht für alle Felder Daten"
+        geo "მწკრივი %lu ყველა სვეტისთვის მონაცემებს არ შეიცავს"
+        jpn "行 %lu はすべての列へのデータを含んでいません。"
+        nla "Rij %lu bevat niet de data voor alle kolommen"
+        por "Conta de registro é menor que a conta de coluna na linha %lu"
+        spa "La fila %lu no contiene datos para todas las columnas"
+ER_WARN_TOO_MANY_RECORDS 01000 
+        chi "行%lu被截断;它包含的数据比输入列更多"
+        eng "Row %lu was truncated; it contained more data than there were input columns"
+        ger "Zeile %lu gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
+        geo "მწკრივი %lu წაკვეთილია. როცა შეყვანის სვეტები არსებობდა, ის მეტ მონაცემებს შეიცავდა"
+        jpn "行 %lu はデータを切り捨てられました。列よりも多いデータを含んでいました。"
+        nla "Regel %lu ingekort, bevatte meer data dan invoer kolommen"
+        por "Conta de registro é maior que a conta de coluna na linha %lu"
+        spa "La fila %lu fué truncada; contenía más datos que columnas de entrada"
+ER_WARN_NULL_TO_NOTNULL 22004 
+        chi "列设置为默认值; NULL在行'%s'中提供给了NOT NULL列%lu"
+        eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %lu"
+        ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %lu angegeben"
+        geo "ველი ნაგულისხმებ მნიშვნელობას უდრის. მიწოდებულია NULL, როცა სვეტი %s' მწკრივში %lu მონიშნულია, როგორც NOT NULL"
+        jpn "列にデフォルト値が設定されました。NOT NULLの列 '%s' に 行 %lu で NULL が与えられました。"
+        por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %lu"
+        spa "Columna puesta a valor por defecto; NULL suministrado para columna NOT NULL '%s' en la fila %lu"
+ER_WARN_DATA_OUT_OF_RANGE 22003 
+        chi "列'%s'行%lu的值超出范围"
+        eng "Out of range value for column '%s' at row %lu"
+        geo "სვეტის '%s' მწკრვში %lu მნიშვნელობა დიაპაზონს გარეთაა"
+        spa "Valor fuera de rango para la columna '%s' en la fila %lu"
+WARN_DATA_TRUNCATED 01000 
+        chi "数据被截断,在列'%s', 行%lu"
+        eng "Data truncated for column '%s' at row %lu"
+        ger "Daten abgeschnitten für Feld '%s' in Zeile %lu"
+        geo "სვეტისთვის '%s' მწკრივში '%lu მონაცემები წაკვეთილია"
+        jpn "列 '%s' の 行 %lu でデータが切り捨てられました。"
+        por "Dado truncado para coluna '%s' na linha %lu"
+        spa "Datos truncados para la columna '%s' en la fila %lu"
+ER_WARN_USING_OTHER_HANDLER  
+        chi "使用存储引擎%s 表格'%s'"
+        eng "Using storage engine %s for table '%s'"
+        ger "Speicher-Engine %s wird für Tabelle '%s' benutzt"
+        geo "გამოიყენება საცავის ძრავა %s ცხრილისთვის '%s'"
+        hindi "स्टोरेज इंजन %s का इस्तेमाल टेबल '%s' के लिए  किया जा रहा है"
+        jpn "ストレージエンジン %s が表 '%s' に利用されています。"
+        por "Usando engine de armazenamento %s para tabela '%s'"
+        spa "Usando motor de almacenaje %s para la tabla '%s'"
+        swe "Använder handler %s för tabell '%s'"
+ER_CANT_AGGREGATE_2COLLATIONS
+        chi "非法混合collations(%s,%s)和(%s,%s),用于操作'%s'"
+        eng "Illegal mix of collations (%s,%s) and (%s,%s) for operation '%s'"
+        ger "Unerlaubte Mischung von Sortierreihenfolgen (%s, %s) und (%s, %s) für Operation '%s'"
+        geo "დაუშვებელია კოლაციების (%s,%s) და (%s,%s) შერევა ოპერაციისთვის '%s'"
+        jpn "照合順序 (%s,%s) と (%s,%s) の混在は操作 '%s' では不正です。"
+        por "Combinação ilegal de collations (%s,%s) e (%s,%s) para operação '%s'"
+        spa "Mezcla ilegal de cotejos (collations) (%s,%s) y (%s,%s) para la operación '%s'"
+ER_DROP_USER  
+        chi "无法删除一个或多个请求的用户"
+        eng "Cannot drop one or more of the requested users"
+        ger "Kann einen oder mehrere der angegebenen Benutzer nicht löschen"
+        geo "ერთი ან მეტი მოთხოვნილი მომხმარებელის წაშლის შეცდომა"
+        spa "No puedo eliminar uno o más de los usuarios solicitados"
+ER_REVOKE_GRANTS  
+        chi "无法为一个或多个请求的用户撤消所有权限"
+        eng "Can't revoke all privileges for one or more of the requested users"
+        ger "Kann nicht alle Berechtigungen widerrufen, die für einen oder mehrere Benutzer gewährt wurden"
+        geo "ერთი ან მეტი მოთხოვნილი მომხმარებლისთვის პრივილეგიების ჩამორთმევის შეცდომა"
+        jpn "指定されたユーザーから指定された全ての権限を剥奪することができませんでした。"
+        por "Não pode revocar todos os privilégios, grant para um ou mais dos usuários pedidos"
+        spa "No puedo revocar todos los privilegios para uno o más de los usuarios solicitados"
+ER_CANT_AGGREGATE_3COLLATIONS  
+        chi "非法混合collations(%s,%s),(%s,%s)和(%s,%s),用于操作'%s'"
+        eng "Illegal mix of collations (%s,%s), (%s,%s), (%s,%s) for operation '%s'"
+        ger "Unerlaubte Mischung von Sortierreihenfolgen (%s, %s), (%s, %s), (%s, %s) für Operation '%s'"
+        geo "კოლაციების (%s,%s), (%s,%s), (%s,%s) დაუშვებელი შერევა ოპერაციისთვის '%s'"
+        jpn "照合順序 (%s,%s), (%s,%s), (%s,%s) の混在は操作 '%s' では不正です。"
+        por "Ilegal combinação de collations (%s,%s), (%s,%s), (%s,%s) para operação '%s'"
+        spa "Mezcla ilegal de cotejos (collations) (%s,%s), (%s,%s), (%s,%s) para la operación '%s'"
+ER_CANT_AGGREGATE_NCOLLATIONS  
+        chi "非法混合collations操作'%s'"
+        eng "Illegal mix of collations for operation '%s'"
+        ger "Unerlaubte Mischung von Sortierreihenfolgen für Operation '%s'"
+        geo "კოლაციების დაუშვებელი შერევა ოპერაციისთვის '%s'"
+        jpn "操作 '%s' では不正な照合順序の混在です。"
+        por "Ilegal combinação de collations para operação '%s'"
+        spa "Mezcla ilegal de cotejos (collations) para la operación '%s'"
+ER_VARIABLE_IS_NOT_STRUCT  
+        chi "变量'%-.64s'不是可变组件(不能用作xxxx.variable_name)"
+        eng "Variable '%-.64s' is not a variable component (can't be used as XXXX.variable_name)"
+        ger "Variable '%-.64s' ist keine Variablen-Komponente (kann nicht als XXXX.variablen_name verwendet werden)"
+        geo "ცვლადი '%-.64s' ცვლადის კომპონენტი არაა (xxxx.variable_name სახით ვერ გამოიყენებთ)"
+        jpn "変数 '%-.64s' は構造変数の構成要素ではありません。(XXXX.変数名 という指定はできません。)"
+        por "Variável '%-.64s' não é uma variável componente (Não pode ser usada como XXXX.variável_nome)"
+        spa "La variable '%-.64s' no es un componente variable (No puede ser usada como XXXX.variable_name)"
+ER_UNKNOWN_COLLATION  
+        chi "未知的collation:'%-.64s'"
+        eng "Unknown collation: '%-.64s'"
+        ger "Unbekannte Sortierreihenfolge: '%-.64s'"
+        geo "უცნობი კოლაცია: '%-.64s'"
+        jpn "不明な照合順序: '%-.64s'"
+        por "Collation desconhecida: '%-.64s'"
+        spa "Cotejo (Collation) desconocido: '%-.64s'"
+ER_SLAVE_IGNORED_SSL_PARAMS  
+        chi "CHANGE MASTER中的SSL参数被忽略,因为此MariaDB从站未在没有SSL支持的情况下编译;如果启动了SSL的MariaDB从站,则可以使用它们"
+        eng "SSL parameters in CHANGE MASTER are ignored because this MariaDB slave was compiled without SSL support; they can be used later if MariaDB slave with SSL is started"
+        geo "ბრძანებაში 'CHANGE MASTER' SSL-ის პარამეტრები იგნორირებულია, რადგან დამორჩილებული 'MariaDB' SSL-ის მხარდაჭერის გარეშეა აგებული. ისინი მოგვიანებით შეგიძლიატ გამოიყენთ, თუ დამოკიდებულ MariaDB-ს SSL-ის მხარდაჭერა ექნება"
+        ger "SSL-Parameter in CHANGE MASTER werden ignoriert, weil dieser MariaDB-Slave ohne SSL-Unterstützung kompiliert wurde. Sie können aber später verwendet werden, wenn ein MariaDB-Slave mit SSL gestartet wird"
+        jpn "このMariaDBスレーブはSSLサポートを含めてコンパイルされていないので、CHANGE MASTER のSSLパラメータは無視されました。今後SSLサポートを持つMariaDBスレーブを起動する際に利用されます。"
+        por "SSL parâmetros em CHANGE MASTER são ignorados porque este escravo MariaDB foi compilado sem o SSL suporte. Os mesmos podem ser usados mais tarde quando o escravo MariaDB com SSL seja iniciado."
+        spa "Los parámetros SSL en CHANGE MASTER son ignorados porque este esclavo MariaDB fue compilado sin soporte SSL; pueden ser usados después cuando el esclavo MariaDB con SSL sea arrancado"
+ER_SERVER_IS_IN_SECURE_AUTH_MODE  
+        chi "服务器在--secure-auth模式下运行,但'%s'@'%s'具有旧格式的密码;请将密码更改为新格式"
+        eng "Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format"
+        ger "Server läuft im Modus --secure-auth, aber '%s'@'%s' hat ein Passwort im alten Format. Bitte Passwort ins neue Format ändern"
+        geo "სერვერი --secure-auth რეჟიმშია გაშვებული, მაგრამ '%s'@'%s'-ის პაროლი ძველ ფორმატშია. შეცვალეთ პაროლი ახალი ფორმატით"
+        jpn "サーバーは --secure-auth モードで稼働しています。しかし '%s'@'%s' は古い形式のパスワードを使用しています。新しい形式のパスワードに変更してください。"
+        por "Servidor está rodando em --secure-auth modo, porêm '%s'@'%s' tem senha no formato antigo; por favor troque a senha para o novo formato"
+        rus "Сервер запущен в режиме --secure-auth (безопасной авторизации), но для пользователя '%s'@'%s' пароль сохранён в старом формате; необходимо обновить формат пароля"
+        spa "El servidor se está ejecutando en modo --secure-auth, pero '%s'@'%s' tiene una contraseña con formato antiguo; por favor cambie la contraseña al nuevo formato"
+ER_WARN_FIELD_RESOLVED  
+        chi "列或参考'%-.192s%s%-.192s%s%-.192s' 在SELECT #%d 中, 在SELECT #%d中得到解决"
+        eng "Field or reference '%-.192s%s%-.192s%s%-.192s' of SELECT #%d was resolved in SELECT #%d"
+        ger "Feld oder Verweis '%-.192s%s%-.192s%s%-.192s' im SELECT-Befehl Nr. %d wurde im SELECT-Befehl Nr. %d aufgelöst"
+        geo "ველი ან მიმართვა '%-.192s%s%-.192s%s%-.192s' SELECT-დან #%d ამოხსნილია SELECT-ში #%d"
+        jpn "フィールドまたは参照 '%-.192s%s%-.192s%s%-.192s' は SELECT #%d ではなく、SELECT #%d で解決されました。"
+        por "Campo ou referência '%-.192s%s%-.192s%s%-.192s' de SELECT #%d foi resolvido em SELECT #%d"
+        rus "Поле или ссылка '%-.192s%s%-.192s%s%-.192s' из SELECTа #%d была найдена в SELECTе #%d"
+        spa "El campo o la referencia '%-.192s%s%-.192s%s%-.192s' de SELECT #%d se resolvió en SELECT #%d"
+        ukr "Стовбець або посилання '%-.192s%s%-.192s%s%-.192s' із SELECTу #%d було знайдене у SELECTі #%d"
+ER_BAD_SLAVE_UNTIL_COND  
+        chi "START SLAVE UNTIL的参数或参数的组合不正确"
+        eng "Incorrect parameter or combination of parameters for START SLAVE UNTIL"
+        ger "Falscher Parameter oder falsche Kombination von Parametern für START SLAVE UNTIL"
+        geo "არასწორი პარამეტრი ან პარამეტრების კომბინაცია START SLAVE UNTIL-სთვის"
+        jpn "START SLAVE UNTIL へのパラメータまたはその組み合わせが不正です。"
+        por "Parâmetro ou combinação de parâmetros errado para START SLAVE UNTIL"
+        spa "Parámetro incorrecto o combinación de parámetros para START SLAVE UNTIL"
+ER_MISSING_SKIP_SLAVE  
+        chi "START SLAVE UNTIL进行逐步复制时建议使用--skip-slave-start;否则,如果有意外的Slave的mariadbd重启,可能有问题"
+        eng "It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mariadbd restart"
+        ger "Es wird empfohlen, mit --skip-slave-start zu starten, wenn mit START SLAVE UNTIL eine Schritt-für-Schritt-Replikation ausgeführt wird. Ansonsten gibt es Probleme, wenn ein Slave-Server unerwartet neu startet"
+        geo "START SLAVE UNTIL-ით ნაბიჯ-ნაბიჯ რეპლიკაციისთვის რეკომენდებულია გამოიყენოთ --skip-slave-start, რადგან დამორჩილებული mariadbd სერვერის გადატვირთვისას შეიძლებ პრობლემები შეგექმნათ"
+        jpn "START SLAVE UNTIL で段階的にレプリケーションを行う際には、--skip-slave-start オプションを使うことを推奨します。使わない場合、スレーブのmariadbdが不慮の再起動をすると問題が発生します。"
+        por "É recomendado para rodar com --skip-slave-start quando fazendo replicação passo-por-passo com START SLAVE UNTIL, de outra forma você não está seguro em caso de inesperada reinicialição do mariadbd escravo"
+        spa "Se recomienda usar --skip-slave-start al hacer réplica paso a paso con START SLAVE UNTIL; en caso contrario, obtendrá problemas si tiene lugar un rearranque inesperado del esclavo mariadb"
+ER_UNTIL_COND_IGNORED  
+        chi "不能启动SQL线程所以UNTIL选项被忽略"
+        eng "SQL thread is not to be started so UNTIL options are ignored"
+        ger "SQL-Thread soll nicht gestartet werden. Daher werden UNTIL-Optionen ignoriert"
+        geo "SQL ნაკადი არ გაშვებულა, ასე რომ UNTIL პარამეტრები იგნორირებულია"
+        jpn "スレーブSQLスレッドが開始されないため、UNTILオプションは無視されました。"
+        por "Thread SQL não pode ser inicializado tal que opções UNTIL são ignoradas"
+        spa "Un hilo (thread) SQL no ha de ser arrancado de esa manera HASTA que las opciones sean ignordas"
+ER_WRONG_NAME_FOR_INDEX 42000 
+        chi "索引名称'%-.100s'不正确"
+        eng "Incorrect index name '%-.100s'"
+        ger "Falscher Indexname '%-.100s'"
+        geo "ინდექსის სახელი ('%-.100s') არასწორია"
+        jpn "索引名 '%-.100s' は不正です。"
+        por "Incorreto nome de índice '%-.100s'"
+        spa "Nombre de índice incorrecto '%-.100s'"
+        swe "Felaktigt index namn '%-.100s'"
+ER_WRONG_NAME_FOR_CATALOG 42000 
+        chi "目录名称'%-.100s'不正确"
+        eng "Incorrect catalog name '%-.100s'"
+        ger "Falscher Katalogname '%-.100s'"
+        geo "კატალოგის სახელი ('%-.100s') არასწორია"
+        jpn "カタログ名 '%-.100s' は不正です。"
+        por "Incorreto nome de catálogo '%-.100s'"
+        spa "Nombre de catálogo incorrecto '%-.100s'"
+        swe "Felaktigt katalog namn '%-.100s'"
+ER_WARN_QC_RESIZE  
+        chi "设置查询缓存值%llu失败;新查询缓存值为%lu"
+        eng "Query cache failed to set size %llu; new query cache size is %lu"
+        ger "Änderung der Query-Cache-Größe auf %llu fehlgeschlagen; neue Query-Cache-Größe ist %lu"
+        geo "შეცდომა მოთხოვნების კეშის ზომის %llu-ზე დაყენებისას. ახალი მოთხოვნების კეშის ზომაა %lu"
+        por "Falha em Query cache para configurar tamanho %llu, novo tamanho de query cache é %lu"
+        rus "Кеш запросов не может установить размер %llu, новый размер кеша зпросов - %lu"
+        spa "La caché de consulta (query) ha fallado al poner el tamaño %llu; el nuevo tamaño de caché de consulta (query) es %lu"
+        swe "Storleken av "Query cache" kunde inte sättas till %llu, ny storlek är %lu"
+        ukr "Кеш запитів неспроможен встановити розмір %llu, новий розмір кеша запитів - %lu"
+ER_BAD_FT_COLUMN  
+        chi "列'%-.192s'不能成为全文索引的一部分"
+        eng "Column '%-.192s' cannot be part of FULLTEXT index"
+        ger "Feld '%-.192s' kann nicht Teil eines FULLTEXT-Index sein"
+        geo "'%-.192s' სვეტი FULLTEXT ინდექსის ნაწილი ვერ იქნება"
+        jpn "列 '%-.192s' は全文索引のキーにはできません。"
+        por "Coluna '%-.192s' não pode ser parte de índice FULLTEXT"
+        spa "La columna '%-.192s' no puede format parte de índice FULLTEXT"
+        swe "Kolumn '%-.192s' kan inte vara del av ett FULLTEXT index"
+ER_UNKNOWN_KEY_CACHE  
+        chi "未知索引缓存'%-.100s'"
+        eng "Unknown key cache '%-.100s'"
+        ger "Unbekannter Schlüssel-Cache '%-.100s'"
+        geo "უცნობი გასაღების კეში '%-.100s'"
+        jpn "'%-.100s' は不明なキーキャッシュです。"
+        por "Key cache desconhecida '%-.100s'"
+        spa "Caché de clave desconocida '%-.100s'"
+        swe "Okänd nyckel cache '%-.100s'"
+ER_WARN_HOSTNAME_WONT_WORK  
+        chi "MariaDB以-skip-name-resolve模式启动;想用grant,您必须重新启动,不用这个选项"
+        eng "MariaDB is started in --skip-name-resolve mode; you must restart it without this switch for this grant to work"
+        ger "MariaDB wurde mit --skip-name-resolve gestartet. Diese Option darf nicht verwendet werden, damit diese Rechtevergabe möglich ist"
+        geo "MariaDB გაშვებულია რეჟიმში --skip-name-resolve. ამ GRANT-მა რომ იმუშაოს, სერვერი ამ პარამეტრის გარეშე უნდა იყოს გაშვებული"
+        jpn "MariaDBは --skip-name-resolve モードで起動しています。このオプションを外して再起動しなければ、この権限操作は機能しません。"
+        por "MariaDB foi inicializado em modo --skip-name-resolve. Você necesita reincializá-lo sem esta opção para este grant funcionar"
+        spa "MariaDB ha sido arrancada en modo --skip-name-resolve; vd necesita reinicializarla sin esta opción para que esta concesión funcione"
+ER_UNKNOWN_STORAGE_ENGINE 42000 
+        chi "未知的存储引擎'%s'"
+        eng "Unknown storage engine '%s'"
+        ger "Unbekannte Speicher-Engine '%s'"
+        geo "უცნობი საცავის ძრავა '%s'"
+        hindi "अज्ञात स्टोरेज इंजन '%s'"
+        jpn "'%s' は不明なストレージエンジンです。"
+        por "Motor de tabela desconhecido '%s'"
+        spa "Motor de almacenaje '%s' desconocido"
+ER_WARN_DEPRECATED_SYNTAX  
+        chi "弃用'%s',将在将来的版本中删除。请使用%s"
+        eng "'%s' is deprecated and will be removed in a future release. Please use %s instead"
+        ger "'%s' ist veraltet. Bitte benutzen Sie '%s'"
+        geo "'%s' მოძველებულია და მომავალში ამოღებული იქნება. გამოიყენეთ %s"
+        jpn "'%s' は将来のリリースで廃止予定です。代わりに %s を使用してください。"
+        por "'%s' é desatualizado. Use '%s' em seu lugar"
+        spa "'%s' está obsoleto y será quitado en una entrega futura, use '%s' en su lugar"
+ER_NON_UPDATABLE_TABLE  
+        chi "目标表%-.100s多个%s不可更新"
+        eng "The target table %-.100s of the %s is not updatable"
+        ger "Die Zieltabelle %-.100s von %s ist nicht aktualisierbar"
+        geo "სამიზნე ცხრილი %-.100s, რომელიც %s-ის ნაწილია, განახლებადი არაა"
+        jpn "対象表 %-.100s は更新可能ではないので、%s を行えません。"
+        por "A tabela destino %-.100s do %s não é atualizável"
+        rus "Таблица %-.100s в %s не может изменятся"
+        spa "La tabla destino %-.100s del %s no es actualizable"
+        swe "Tabell %-.100s använd med '%s' är inte uppdateringsbar"
+        ukr "Таблиця %-.100s у %s не може оновлюватись"
+ER_FEATURE_DISABLED  
+        chi "'%s'功能被禁用;您需要MariaDB以'%s'构建以使其工作"
+        eng "The '%s' feature is disabled; you need MariaDB built with '%s' to have it working"
+        ger "Das Feature '%s' ist ausgeschaltet, Sie müssen MariaDB mit '%s' übersetzen, damit es verfügbar ist"
+        geo "ფუნქცია '%s' გამორთულია. ასამუშავებლად MariaDB '%s'-ის მხარდაჭერით უნდა ააგოთ"
+        jpn "機能 '%s' は無効です。利用するためには '%s' を含めてビルドしたMariaDBが必要です。"
+        por "O recurso '%s' foi desativado; você necessita MariaDB construído com '%s' para ter isto funcionando"
+        spa "La característica '%s' fue deshabilitada; usted necesita construir MariaDB con '%s' para tener eso funcionando"
+        swe "'%s' är inte aktiverad; För att aktivera detta måste du bygga om MariaDB med '%s' definierad"
+ER_OPTION_PREVENTS_STATEMENT  
+        chi "MariaDB服务器使用%s选项运行,因此无法执行此语句"
+        eng "The MariaDB server is running with the %s option so it cannot execute this statement"
+        ger "Der MariaDB-Server läuft mit der Option %s und kann diese Anweisung deswegen nicht ausführen"
+        geo "MariaDB სერვერი %s პარამეტრითაა გაშვებული და ამ გამოსახულების შესრულება არ შეუძლია"
+        jpn "MariaDBサーバーが %s オプションで実行されているので、このステートメントは実行できません。"
+        por "O servidor MariaDB está rodando com a opção %s razão pela qual não pode executar esse commando"
+        spa "El servidor MariaDB se está ejecutando con la opción %s por lo que no se puede ejecutar esta sentencia"
+        swe "MariaDB är startad med %s. Pga av detta kan du inte använda detta kommando"
+ER_DUPLICATED_VALUE_IN_TYPE  
+        chi "列'%-.100s'有重复的值'%-.64s'在%s"
+        eng "Column '%-.100s' has duplicated value '%-.64s' in %s"
+        ger "Feld '%-.100s' hat doppelten Wert '%-.64s' in %s"
+        geo "სვეტს '%-.100s' გააჩნია დუბლირებული მნიშვნელობა '%-.64s' %s-ში"
+        jpn "列 '%-.100s' で、重複する値 '%-.64s' が %s に指定されています。"
+        por "Coluna '%-.100s' tem valor duplicado '%-.64s' em %s"
+        spa "La columna '%-.100s' tiene valor duplicado '%-.64s' en %s"
+ER_TRUNCATED_WRONG_VALUE 22007 
+        chi "截断的不正确%-.32T值:'%-.128T'"
+        eng "Truncated incorrect %-.32T value: '%-.128T'"
+        ger "Falscher %-.32T-Wert gekürzt: '%-.128T'"
+        geo "წაკვეთილია არასწორი %-.32T მნიშვნელობა: '%-.128T'"
+        jpn "不正な %-.32T の値が切り捨てられました。: '%-.128T'"
+        por "Truncado errado %-.32T valor: '%-.128T'"
+        spa "Truncado incorrecto %-.32T valor: '%-.128T'"
+ER_TOO_MUCH_AUTO_TIMESTAMP_COLS  
+        chi "表定义不正确;默认或ON UPDATE中只能有一个带有CURRENT_TIMESTAMP的TIMESTAMP列"
+        eng "Incorrect table definition; there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
+        ger "Fehlerhafte Tabellendefinition. Es kann nur eine einzige TIMESTAMP-Spalte mit CURRENT_TIMESTAMP als DEFAULT oder in einer ON-UPDATE-Klausel geben"
+        geo "ცხრილის არასწორი აღწერა. DEFAULT ან ON UPDATE პირობაში CURRENT TIMESTAMP-ით მხოლოდ ერთი TIMESTAMP სვეტი შეიძლება არსებობდეს"
+        jpn "不正な表定義です。DEFAULT句またはON UPDATE句に CURRENT_TIMESTAMP をともなうTIMESTAMP型の列は1つまでです。"
+        por "Incorreta definição de tabela; Pode ter somente uma coluna TIMESTAMP com CURRENT_TIMESTAMP em DEFAULT ou ON UPDATE cláusula"
+        spa "Definición incorrecta de tabla; solamente puede haber una columna TIMESTAMP con CURRENT_TIMESTAMP en DEFAULT o en cláusula ON UPDATE"
+ER_INVALID_ON_UPDATE  
+        chi "在'%-.192s'列的ON UPDATE子句上无效"
+        eng "Invalid ON UPDATE clause for '%-.192s' column"
+        ger "Ungültige ON-UPDATE-Klausel für Spalte '%-.192s'"
+        geo "არასწორი ON UPDATE პირობა '%-.192s' სვეტისთვის"
+        jpn "列 '%-.192s' に ON UPDATE句は無効です。"
+        por "Inválida cláusula ON UPDATE para campo '%-.192s'"
+        spa "Cláusula ON UPDATE inválida para la columna '%-.192s'"
+ER_UNSUPPORTED_PS  
+        chi "尚未在prepared statement协议中支持此命令"
+        eng "This command is not supported in the prepared statement protocol yet"
+        ger "Dieser Befehl wird im Protokoll für vorbereitete Anweisungen noch nicht unterstützt"
+        geo "ეს ბრძანება მომზადებული გამოსახულების პროტოკოლშიჯერ მხარდაჭერილი არაა"
+        spa "Este comando no se encuentra soportado para protocolo de sentencia preparada, aún"
+ER_GET_ERRMSG  
+        chi "出错%d '%-.200s'来自%s"
+        dan "Modtog fejl %d '%-.200s' fra %s"
+        eng "Got error %d '%-.200s' from %s"
+        ger "Fehler %d '%-.200s' von %s"
+        geo "მიღებულია შეცდომა %d '%-.200s' %s-დან"
+        jpn "エラー %d '%-.200s' が %s から返されました。"
+        nor "Mottok feil %d '%-.200s' fa %s"
+        norwegian-ny "Mottok feil %d '%-.200s' fra %s"
+        spa "Obtenido error %d '%-.200s' desde %s"
+ER_GET_TEMPORARY_ERRMSG  
+        chi "出临时错误%d '%-.200s'来自%s"
+        dan "Modtog temporary fejl %d '%-.200s' fra %s"
+        eng "Got temporary error %d '%-.200s' from %s"
+        ger "Temporärer Fehler %d '%-.200s' von %s"
+        geo "მიღებულია დროებითი შეცდომა %d '%-.200s' %s-დან"
+        jpn "一時エラー %d '%-.200s' が %s から返されました。"
+        nor "Mottok temporary feil %d '%-.200s' fra %s"
+        norwegian-ny "Mottok temporary feil %d '%-.200s' fra %s"
+        spa "Obtenido error temporal %d '%-.200s' desde %s"
+ER_UNKNOWN_TIME_ZONE  
+        chi "未知或不正确的时区:'%-.64s'"
+        eng "Unknown or incorrect time zone: '%-.64s'"
+        ger "Unbekannte oder falsche Zeitzone: '%-.64s'"
+        geo "უცნობი ან არასწორი დროის სარტყელი: '%-.64s'"
+        spa "Zona temporal desconocida o incorrecta: '%-.64s'"
+ER_WARN_INVALID_TIMESTAMP  
+        chi "无效TIMESTAMP值:列'%s' 行'%lu'"
+        eng "Invalid TIMESTAMP value in column '%s' at row %lu"
+        ger "Ungültiger TIMESTAMP-Wert in Feld '%s', Zeile %lu"
+        geo "TIMESTAMP-ის არასწორი მნიშვნელობა სვეტში '%s' მწკრივში %lu"
+        spa "Valor inválido de SELLO TEMPORAL (TIMESTAMP) en la columna '%s' de la fila %lu"
+ER_INVALID_CHARACTER_STRING  
+        chi "无效的%s字符串:'%.64T'"
+        eng "Invalid %s character string: '%.64T'"
+        ger "Ungültiger %s-Zeichen-String: '%.64T'"
+        geo "არასწორი %s სტრიქონი: '%.64T'"
+        spa "Cadena de carácter %s inválida: '%.64T'"
+ER_WARN_ALLOWED_PACKET_OVERFLOWED  
+        chi "%s()的结果大于max_allowed_packet(%ld) - 截断"
+        eng "Result of %s() was larger than max_allowed_packet (%ld) - truncated"
+        ger "Ergebnis von %s() war größer als max_allowed_packet (%ld) Bytes und wurde deshalb gekürzt"
+        geo "%s()-ის პასუხი max_allowed_packet-ზე (%ld) დიდია - წაკვეთა"
+        spa "El resultado de %s() ha sido mayor que max_allowed_packet (%ld) - truncado"
+ER_CONFLICTING_DECLARATIONS  
+        chi "矛盾语句:'%s%s'和'%s%s'"
+        eng "Conflicting declarations: '%s%s' and '%s%s'"
+        ger "Widersprüchliche Deklarationen: '%s%s' und '%s%s'"
+        geo "კონფლიქტური აღწერები: '%s%s' და '%s%s'"
+        spa "Declaraciones conflictivas: '%s%s' y '%s%s'"
+ER_SP_NO_RECURSIVE_CREATE 2F003 
+        chi "无法从另一个存储过程中创建%s"
+        eng "Can't create a %s from within another stored routine"
+        ger "Kann kein %s innerhalb einer anderen gespeicherten Routine erzeugen"
+        geo "%s-ის სხვა დამახსოვრებულ ქვეპროგრამიდან შექმნა შეუძლებელია"
+        spa "No puedo crear una %s desde dentro de otra rutina almacenada"
+ER_SP_ALREADY_EXISTS 42000 
+        chi "%s%s已经存在"
+        eng "%s %s already exists"
+        ger "%s %s existiert bereits"
+        geo "%s %s უკვე არსებობს"
+        hindi "%s %s पहले से ही मौजूद है"
+        spa "%s %s ya existe"
+ER_SP_DOES_NOT_EXIST 42000 
+        chi "%s%s不存在"
+        eng "%s %s does not exist"
+        ger "%s %s existiert nicht"
+        geo "%s %s არ არსებობს"
+        hindi "%s %s मौजूद नहीं है"
+        spa "%s %s no existe"
+ER_SP_DROP_FAILED  
+        chi "未能DROP%s%s"
+        eng "Failed to DROP %s %s"
+        ger "DROP %s %s ist fehlgeschlagen"
+        geo "%s %s-ის DROP შეუძლებელია"
+        hindi "%s %s को ड्रॉप करने में असफल रहे"
+        spa "No pude ELIMINAR (DROP) %s %s"
+ER_SP_STORE_FAILED  
+        chi "无法创建%s%s"
+        eng "Failed to CREATE %s %s"
+        ger "CREATE %s %s ist fehlgeschlagen"
+        geo "%s %s-ის CREATE შეუძლებელია"
+        hindi "%s %s को बनाने में असफल रहे"
+        spa "No pude CREAR %s %s"
+ER_SP_LILABEL_MISMATCH 42000 
+        chi "%s,没有匹配标签:%s"
+        eng "%s with no matching label: %s"
+        ger "%s ohne passende Marke: %s"
+        geo "%s-ს ჭდე არ ემთხვევა: %s"
+        spa "%s sin etiqueta coincidente: %s"
+ER_SP_LABEL_REDEFINE 42000 
+        chi "重新定义标签%s"
+        eng "Redefining label %s"
+        ger "Neudefinition der Marke %s"
+        geo "ჭდე %s თავიდ აღიწერა"
+        spa "Redefiniendo etiqueta %s"
+ER_SP_LABEL_MISMATCH 42000 
+        chi "没有匹配的最终标签%s"
+        eng "End-label %s without match"
+        ger "Ende-Marke %s ohne zugehörigen Anfang"
+        geo "ბოლო-ჭდე %s დამთხვევის გარეშე"
+        spa "Etiqueta-Final %s sin coincidencia"
+ER_SP_UNINIT_VAR 01000 
+        chi "参考未初始化的变量%s"
+        eng "Referring to uninitialized variable %s"
+        ger "Zugriff auf nichtinitialisierte Variable %s"
+        geo "მიმართვა არაინიციალიზებულ ცვლადზე %s"
+        spa "Refiriéndose a variable %s sin inicializar"
+ER_SP_BADSELECT 0A000 
+        chi "PROCEDURE%s不能返回给定上下文中的结果集"
+        eng "PROCEDURE %s can't return a result set in the given context"
+        ger "PROCEDURE %s kann im gegebenen Kontext keine Ergebnismenge zurückgeben"
+        geo "PROCEDURE %s-ს მითითებულ კონტექსტში პასუხის დაბრუნება  არ შეუძლია"
+        spa "El PROCEDIMIENTO (PROCEDURE) %s no puede devolver un conjunto de resultados en el contexto dado"
+ER_SP_BADRETURN 42000 
+        chi "RETURN仅允许在函数中"
+        eng "RETURN is only allowed in a FUNCTION"
+        ger "RETURN ist nur innerhalb einer FUNCTION erlaubt"
+        geo "RETURN მხოლოდ FUNCTION-შია დაშვებული"
+        hindi "RETURN को केवल FUNCTION में इस्तेमाल किया जा सकता है"
+        spa "RETURN sólo se permite dentro de una FUNCIÓN"
+ER_SP_BADSTATEMENT 0A000 
+        chi "%s不允许在存储过程中"
+        eng "%s is not allowed in stored procedures"
+        ger "%s ist in gespeicherten Prozeduren nicht erlaubt"
+        geo "%s დამახსოვრებულ პროცედურებში დაშვებული არაა"
+        hindi "%s को STORED PROCEDURE में इस्तेमाल नहीं किया जा सकता है"
+        spa "%s no permitido en procedimientos almacenados"
+ER_UPDATE_LOG_DEPRECATED_IGNORED 42000 
+        chi "更新日志被弃用并由二进制日志替换;SET SQL_LOG_UPDATE已被忽略。此选项将在MariaDB 5.6中删除"
+        eng "The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored. This option will be removed in MariaDB 5.6"
+        ger "Das Update-Log ist veraltet und wurde durch das Binär-Log ersetzt. SET SQL_LOG_UPDATE wird ignoriert. Diese Option wird in MariaDB 5.6 entfernt"
+        geo "Update-Log მოძველებულია და ხდება მისი ჩანაცვლება Binary-Log-ით. SET SQL LOG UPDATE იგნორირებულია. ეს პარამეტრი წაიშლება MariaDB-ის ვერსიაში 5.6"
+        spa "El historial (log) de actualización se encuentra obsoleto y reemplazado por el historial (log) binario; SET SQL_LOG_UPDATE ha sido ignorado. Esta opción será quitada en MariaDB 5.6"
+ER_UPDATE_LOG_DEPRECATED_TRANSLATED 42000 
+        chi "更新日志被弃用并由二进制日志替换;SET SQL_LOG_UPDATE已被转换为设置SQL_LOG_BIN。此选项将在MariaDB 5.6中删除"
+        eng "The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN. This option will be removed in MariaDB 5.6"
+        ger "Das Update-Log ist veraltet und wurde durch das Binär-Log ersetzt. SET SQL_LOG_UPDATE wurde in SET SQL_LOG_BIN übersetzt. Diese Option wird in MariaDB 5.6 entfernt"
+        geo "Update-Log მოძველებულია და ხდება მისი ჩანაცვლება Binary-Log-ით. SET SQL_LOG_UPGRATE გარდაიქმნა SET SQL_LOG_BIN პარამეტრად. ეს პარამეტრი წაიშლება MariaDB-ის ვერსიაში 5.6"
+        spa "El historial (log) de actualización se encuentra obsoleto y reemplazado por el historial binario; SET SQL_LOG_UPDATE ha sido traducido a SET SQL_LOG_BIN. Esta opción será quitada en MariaDB 5.6"
+ER_QUERY_INTERRUPTED 70100 
+        chi "查询执行中断"
+        eng "Query execution was interrupted"
+        ger "Ausführung der Abfrage wurde unterbrochen"
+        geo "მოთხოვნის შესრულება შეწყვეტილია"
+        spa "Se ha interrumpido la ejecución de la consulta (query)"
+ER_SP_WRONG_NO_OF_ARGS 42000 
+        chi "%s%s的参数数量不正确;预期%u,得到%u"
+        eng "Incorrect number of arguments for %s %s; expected %u, got %u"
+        ger "Falsche Anzahl von Argumenten für %s %s; erwarte %u, erhalte %u"
+        geo "%s %s-ის არგუმენტების არასწორი რაოდენობა. მოველოდი %u-ს, მივიღე %u"
+        spa "Número incorrecto de argumentos para %s %s; se esperaba %u, se obtuvo %u"
+ER_SP_COND_MISMATCH 42000 
+        chi "未定义的CONDITION:%s"
+        eng "Undefined CONDITION: %s"
+        ger "Undefinierte CONDITION: %s"
+        geo "განუსაზღვრელი CONDITION: %s"
+        spa "CONDICIÓN no definida: %s"
+ER_SP_NORETURN 42000 
+        chi "FUNCTION%s中没有RETURN"
+        eng "No RETURN found in FUNCTION %s"
+        ger "Kein RETURN in FUNCTION %s gefunden"
+        geo "ფუნქციაში %s RETURN ვერ ვიპოვე"
+        hindi "FUNCTION %s में कोई RETURN है"
+        spa "No se hallado RETURN en FUNCIÓN %s"
+ER_SP_NORETURNEND 2F005 
+        chi "FUNCTION%s结束但无RETURN"
+        eng "FUNCTION %s ended without RETURN"
+        ger "FUNCTION %s endete ohne RETURN"
+        geo "FUNCTION %s დასრულდა RETURN-ის გარეშე"
+        hindi "FUNCTION %s RETURN के बिना समाप्त हो गया"
+        spa "La FUNCIÓN %s termina sin RETURN"
+ER_SP_BAD_CURSOR_QUERY 42000 
+        chi "Cursor语句必须是选择"
+        eng "Cursor statement must be a SELECT"
+        ger "Cursor-Anweisung muss ein SELECT sein"
+        geo "კურსორის გამოსახულება SELECT-ს უნდა წარმოადგენდეს"
+        spa "La sentencia de cursor debe de ser un SELECT"
+ER_SP_BAD_CURSOR_SELECT 42000 
+        chi "Cursor SELECT不能有INTO"
+        eng "Cursor SELECT must not have INTO"
+        ger "Cursor-SELECT darf kein INTO haben"
+        geo "Cursor-Select-ი არ შეიძლება, INTO-ს შეიცავდეს"
+        spa "El SELECT de cursor no debe de tener INTO"
+ER_SP_CURSOR_MISMATCH 42000 
+        chi "未定义的CURSOR:%s"
+        eng "Undefined CURSOR: %s"
+        ger "Undefinierter CURSOR: %s"
+        geo "აღუწერელი CURSOR: %s"
+        hindi "CURSOR %s अपरिभाषित है"
+        spa "CURSOR indefinido: %s"
+ER_SP_CURSOR_ALREADY_OPEN 24000 
+        chi "Cursor已经打开"
+        eng "Cursor is already open"
+        ger "Cursor ist schon geöffnet"
+        geo "CURSOR-ი უკვე ღიაა"
+        hindi "CURSOR पहले से ही खुला है"
+        spa "Cursor ya abierto"
+ER_SP_CURSOR_NOT_OPEN 24000 
+        chi "Cursor未打开"
+        eng "Cursor is not open"
+        ger "Cursor ist nicht geöffnet"
+        geo "Cursor-ი ღია არაა"
+        spa "Cursor no abierto"
+ER_SP_UNDECLARED_VAR 42000 
+        chi "未定义的变量:%s"
+        eng "Undeclared variable: %s"
+        ger "Nicht deklarierte Variable: %s"
+        geo "ცვლადი აღწერილი არაა: %s"
+        spa "Variable sin declarar: %s"
+ER_SP_WRONG_NO_OF_FETCH_ARGS  
+        chi "FETCH变量数不正确"
+        eng "Incorrect number of FETCH variables"
+        ger "Falsche Anzahl von FETCH-Variablen"
+        geo "FETCH-ის ცვლადების არასწორი რაოდენობა"
+        spa "Incorrecto número de variables FETCH"
+ER_SP_FETCH_NO_DATA 02000 
+        chi "没有数据 - 零行被选择或处理"
+        eng "No data - zero rows fetched, selected, or processed"
+        ger "Keine Daten - null Zeilen geholt (fetch), ausgewählt oder verarbeitet"
+        geo "მონაცემების გარეშე - მიღებულია, არჩეულია ან დამუშავებულია 0 ჩანაწერი"
+        spa "No hay datos - cero filas logradas, seleccionadas o procesadas"
+ER_SP_DUP_PARAM 42000 
+        chi "重复参数:%s"
+        eng "Duplicate parameter: %s"
+        ger "Doppelter Parameter: %s"
+        geo "დუბლირებული პარამეტრი: %s"
+        spa "Parámetro duplicado: %s"
+ER_SP_DUP_VAR 42000 
+        chi "重复变量:%s"
+        eng "Duplicate variable: %s"
+        ger "Doppelte Variable: %s"
+        geo "დუბლირებული ცვლადი: %s"
+        spa "Variable duplicada: %s"
+ER_SP_DUP_COND 42000 
+        chi "重复条件:%s"
+        eng "Duplicate condition: %s"
+        ger "Doppelte Bedingung: %s"
+        geo "დუბლირებული პირობა: %s"
+        spa "Condición duplicada: %s"
+ER_SP_DUP_CURS 42000 
+        chi "重复Cursor:%s"
+        eng "Duplicate cursor: %s"
+        ger "Doppelter Cursor: %s"
+        geo "დუბლირებული კურსორი: %s"
+        spa "Cursor duplicado: %s"
+ER_SP_CANT_ALTER  
+        chi "未能ALTER %s%s"
+        eng "Failed to ALTER %s %s"
+        ger "ALTER %s %s fehlgeschlagen"
+        geo "%s %s-ის ALTER-ის შეცდომა"
+        hindi "%s %s को ALTER करने में असफल रहे"
+        spa "Fallo en ALTER %s %s"
+ER_SP_SUBSELECT_NYI 0A000 
+        chi "不支持子查询值"
+        eng "Subquery value not supported"
+        ger "Subquery-Wert wird nicht unterstützt"
+        geo "ქვემოთხოვნის მნიშვნელობა მხარდაუჭერელია"
+        spa "Valor de Subconsulta (subquery) no soportado"
+ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG 0A000
+        chi "在存储的函数或触发中不允许%s"
+        eng "%s is not allowed in stored function or trigger"
+        ger "%s ist in gespeicherten Funktionen und in Triggern nicht erlaubt"
+        geo "%s დამახსოვრებულ ფუნქციაში და ტრიგერში დაშვებული არაა"
+        spa "%s no permitido en función almacenada o en disparador"
+ER_SP_VARCOND_AFTER_CURSHNDLR 42000 
+        chi "变量或条件声明在cursor或处理程序定义之后"
+        eng "Variable or condition declaration after cursor or handler declaration"
+        ger "Deklaration einer Variablen oder einer Bedingung nach der Deklaration eines Cursors oder eines Handlers"
+        geo "ცვლადის ან პირობის აღწერა კურსორის ან დამმუშავებლის აღწერის შემდეგ"
+        spa "Declaración de variable o condición tras declaración de cursor o manejador"
+ER_SP_CURSOR_AFTER_HANDLER 42000 
+        chi "处理程序声明后的cursor声明"
+        eng "Cursor declaration after handler declaration"
+        ger "Deklaration eines Cursors nach der Deklaration eines Handlers"
+        geo "კურსორის აღწერა დამმუშავებლის აღწერის შემდეგ"
+        spa "Declaración de cursor tras declaración de manejador"
+ER_SP_CASE_NOT_FOUND 20000 
+        chi "未能在CASE语句找到Case"
+        eng "Case not found for CASE statement"
+        ger "Fall für CASE-Anweisung nicht gefunden"
+        geo "CASE გამოსახულებისთვის პირობა ვერ ვიპოვე"
+        spa "Caso no hallado para sentencia CASE"
+ER_FPARSER_TOO_BIG_FILE  
+        chi "配置文件'%-.192s'太大了"
+        eng "Configuration file '%-.192s' is too big"
+        ger "Konfigurationsdatei '%-.192s' ist zu groß"
+        geo "კონფიგურაციის ფაილი '%-.192s' ძალიან დიდია"
+        rus "Слишком большой конфигурационный файл '%-.192s'"
+        spa "El fichero/archivo de configuración '%-.192s' es demasiado grande"
+        ukr "Занадто великий конфігураційний файл '%-.192s'"
+ER_FPARSER_BAD_HEADER  
+        chi "文件'%-.192s'中的文件类型格式有问题"
+        eng "Malformed file type header in file '%-.192s'"
+        ger "Nicht wohlgeformter Dateityp-Header in Datei '%-.192s'"
+        geo "ფაილის ტიპის არასწორი თავსართი ფაილში '%-.192s'"
+        rus "Неверный заголовок типа файла '%-.192s'"
+        spa "Cabecera de tipo de fichero/archivo malformada en fichero/archivo '%-.192s'"
+        ukr "Невірний заголовок типу у файлі '%-.192s'"
+ER_FPARSER_EOF_IN_COMMENT  
+        chi "解析评论'%-.200s'时意外碰到EOF"
+        eng "Unexpected end of file while parsing comment '%-.200s'"
+        ger "Unerwartetes Dateiende beim Parsen des Kommentars '%-.200s'"
+        geo "ფაილის მოულოდნელი დასასრული კომენტარის დამუშავებისას '%-.200s'"
+        rus "Неожиданный конец файла в коментарии '%-.200s'"
+        spa "Inesperado fin de fichero/archivo mientras se analizaba comentario '%-.200s'"
+        ukr "Несподіванний кінець файлу у коментарі '%-.200s'"
+ER_FPARSER_ERROR_IN_PARAMETER  
+        chi "解析参数'%-.192s'时出错(行:'%-.192s')"
+        eng "Error while parsing parameter '%-.192s' (line: '%-.192s')"
+        ger "Fehler beim Parsen des Parameters '%-.192s' (Zeile: '%-.192s')"
+        geo "შეცდომა პარამეტრის ('%-.192s') დამუშავებისას (ხაზი: '%-.192s')"
+        rus "Ошибка при распознавании параметра '%-.192s' (строка: '%-.192s')"
+        spa "Error mientras se analizaba parámetro '%-.192s' (línea: '%-.192s')"
+        ukr "Помилка в роспізнаванні параметру '%-.192s' (рядок: '%-.192s')"
+ER_FPARSER_EOF_IN_UNKNOWN_PARAMETER  
+        chi "跳过未知参数'%-.192s'时意外碰到EOF"
+        eng "Unexpected end of file while skipping unknown parameter '%-.192s'"
+        ger "Unerwartetes Dateiende beim Überspringen des unbekannten Parameters '%-.192s'"
+        geo "ფაილის მოულოდნელი დასასრული უცნობი პარამეტრის (%-.192s') დამუშავებისას"
+        rus "Неожиданный конец файла при пропуске неизвестного параметра '%-.192s'"
+        spa "Inesperado fin de fichero/archivo mientras se saltaba parámetro desconocido '%-.192s'"
+        ukr "Несподіванний кінець файлу у спробі проминути невідомий параметр '%-.192s'"
+ER_VIEW_NO_EXPLAIN  
+        chi "ANALYZE/EXPLAIN/SHOW无法进行;缺乏底层表的特权"
+        eng "ANALYZE/EXPLAIN/SHOW can not be issued; lacking privileges for underlying table"
+        ger "ANALYZE/EXPLAIN/SHOW kann nicht verlangt werden. Rechte für zugrunde liegende Tabelle fehlen"
+        geo "ANALYZE/EXPLAIN/SHOW-ის გაშვება შეუძლებელია. მოთხოვნილ ცხრილზე საკმარისი წვდომები არ გაგაჩნიათ"
+        rus "ANALYZE/EXPLAIN/SHOW не может быть выполнено; недостаточно прав на таблицы запроса"
+        spa "ANALYZE/EXPLAIN/SHOW no puede ser emitdo; privilegios insuficientes para tabla subyacente"
+        ukr "ANALYZE/EXPLAIN/SHOW не може бути виконано; немає прав на таблиці запиту"
+ER_FRM_UNKNOWN_TYPE  
+        chi "文件'%-.192s'在其标题中有未知的'%-.64s'"
+        eng "File '%-.192s' has unknown type '%-.64s' in its header"
+        ger "Datei '%-.192s' hat unbekannten Typ '%-.64s' im Header"
+        geo "ფაილის '%-.192s' უცნობი ტიპი '%-.64s' მის თავსართში"
+        rus "Файл '%-.192s' содержит неизвестный тип '%-.64s' в заголовке"
+        spa "El fichero/archivo '%-.192s' es de un tipo desconocido '%-.64s' en su cabecera"
+        ukr "Файл '%-.192s' має невідомий тип '%-.64s' у заголовку"
+ER_WRONG_OBJECT  
+        chi "'%-.192s.%-.192s'不是'%s'类"
+        eng "'%-.192s.%-.192s' is not of type '%s'"
+        ger "'%-.192s.%-.192s' ist nicht %s"
+        geo "'%-.192s.%-.192s'-ი '%s'-ის ტიპს არ წარმოადგენს"
+        rus "'%-.192s.%-.192s' - не %s"
+        spa "'%-.192s.%-.192s' no es del tipo '%s'"
+        ukr "'%-.192s.%-.192s' не є %s"
+ER_NONUPDATEABLE_COLUMN  
+        chi "列'%-.192s'不可更新"
+        eng "Column '%-.192s' is not updatable"
+        ger "Feld '%-.192s' ist nicht aktualisierbar"
+        geo "სვეტი '%-.192s' განახლებადი არაა"
+        rus "Столбец '%-.192s' не обновляемый"
+        spa "La columna '%-.192s' no es actualiable"
+        ukr "Стовбець '%-.192s' не може бути зминений"
+ER_VIEW_SELECT_DERIVED  
+        chi "View的Select的FROM包含子查询"
+        eng "View's SELECT contains a subquery in the FROM clause"
+        ger "SELECT der View enthält eine Subquery in der FROM-Klausel"
+        geo "View-ის SELECT-ი FROM პირობაში ქვემოთხოვნას შეიცავს"
+        rus "View SELECT содержит подзапрос в конструкции FROM"
+        spa "El SELECT de la vista contiene una subconsulta (subquery) en la cláusula FROM"
+        ukr "View SELECT має підзапит у конструкції FROM"
+
+# Not used any more, syntax error is returned instead
+ER_VIEW_SELECT_CLAUSE  
+        chi "View的Select包含“%s”子句"
+        eng "View's SELECT contains a '%s' clause"
+        ger "SELECT der View enthält eine '%s'-Klausel"
+        geo "View-ის SELECT შეიცავს პირობას '%s'"
+        rus "View SELECT содержит конструкцию '%s'"
+        spa "El SELECT de la vista contiene una cláusula '%s'"
+        ukr "View SELECT має конструкцію '%s'"
+ER_VIEW_SELECT_VARIABLE  
+        chi "View的选择包含变量或参数"
+        eng "View's SELECT contains a variable or parameter"
+        ger "SELECT der View enthält eine Variable oder einen Parameter"
+        geo "View-ის SELECT-ი ცვლადს ან პარამეტრს შეიცავს"
+        rus "View SELECT содержит переменную или параметр"
+        spa "El SELECT de la vista contiene una variable o un parámetro"
+        ukr "View SELECT має зминну або параметер"
+ER_VIEW_SELECT_TMPTABLE  
+        chi "View的SELECT指的是临时表'%-.192s'"
+        eng "View's SELECT refers to a temporary table '%-.192s'"
+        ger "SELECT der View verweist auf eine temporäre Tabelle '%-.192s'"
+        geo "View-ის SELECT-ი დროებით ცხრილზე '%-.192s' მიმართვას შეიცავს"
+        rus "View SELECT содержит ссылку на временную таблицу '%-.192s'"
+        spa "El SELECT de la vista se refiere a una tabla temporal '%-.192s'"
+        ukr "View SELECT використовує тимчасову таблицю '%-.192s'"
+ER_VIEW_WRONG_LIST  
+        chi "View的选择和VIEW的字段列表具有不同的列计数"
+        eng "View's SELECT and view's field list have different column counts"
+        ger "SELECT- und Feldliste der Views haben unterschiedliche Anzahlen von Spalten"
+        geo "View-ის SELECT-ი და View-ის ველების სია განსხვავებულ სვეტების რაოდენობას შეიცავენ"
+        rus "View SELECT и список полей view имеют разное количество столбцов"
+        spa "El SELECT de la vista y la lista de campos de la vista tienen un contador diferente de columnas"
+        ukr "View SELECT і перелік стовбців view мають різну кількість сковбців"
+ER_WARN_VIEW_MERGE  
+        chi "View合并算法目前不能使用(假设未定义的算法)"
+        eng "View merge algorithm can't be used here for now (assumed undefined algorithm)"
+        ger "View-Merge-Algorithmus kann hier momentan nicht verwendet werden (undefinierter Algorithmus wird angenommen)"
+        geo "View-ის შერწყმის ალგორითმს ახლა ვერ გამოიყენებთ (ალგორითმი აღუწერელი იქნება)"
+        rus "Алгоритм слияния view не может быть использован сейчас (алгоритм будет неопеределенным)"
+        spa "El algoritmo de fusión de la vista no se puede usar aquí por ahora (se asume algoritmo indefinido)"
+        ukr "Алгоритм зливання view не може бути використаний зараз (алгоритм буде невизначений)"
+ER_WARN_VIEW_WITHOUT_KEY  
+        chi "更新的视图没有底层表的完整键"
+        eng "View being updated does not have complete key of underlying table in it"
+        ger "Die aktualisierte View enthält nicht den vollständigen Schlüssel der zugrunde liegenden Tabelle"
+        geo "განახლებადი View მასში გამოყენებული ცხრილის სრულ გასაღებს არ შეიცავს"
+        rus "Обновляемый view не содержит ключа использованных(ой) в нем таблиц(ы)"
+        spa "La vista que se está actualizando no tiene clave completa de la tabla subyacente que contiene"
+        ukr "View, що оновлюеться, не містить повного ключа таблиці(ь), що викорістана в ньюому"
+ER_VIEW_INVALID  
+        chi "View'%-.192s.%-.192s'引用无效的表、列、函数、或者函数或View缺乏使用权"
+        eng "View '%-.192s.%-.192s' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them"
+        geo "View '%-.192s.%-.192s' მიმართავს არასწორ ცხრილებს ან სვეტებს ან ფუნქციებს ან View-ის აღმწერს/ჩამწოდებელს მისი გამოყენების უფლებები არ გააჩნია"
+        spa "La vista '%-.192s.%-.192s' hace referencia a tabla(s) o columna(s) o función(es) inválida(s) o al definidor/invocador de vista le faltan derechos para usarlos"
+ER_SP_NO_DROP_SP  
+        chi "无法从另一个存储的例程中删除或更改%s"
+        eng "Can't drop or alter a %s from within another stored routine"
+        ger "Kann eine %s nicht von innerhalb einer anderen gespeicherten Routine löschen oder ändern"
+        geo "%s-ის წაშლა ან შეცვლა სხვა დამახსოვრებული ქვეპროგრამიდან შეუძლებელია"
+        spa "No puedo eliminar o alterar una %s desde dentro de otra rutina almacenada"
+ER_SP_GOTO_IN_HNDLR  
+        chi "在存储过程处理程序中不允许GOTO"
+        eng "GOTO is not allowed in a stored procedure handler"
+        ger "GOTO ist im Handler einer gespeicherten Prozedur nicht erlaubt"
+        geo "GOTO დამახსოვრებული პროცედურის დამმუშავებელში დაუშვებელია"
+        spa "GOTO no permitido en manejador de procedimiento almacenado"
+ER_TRG_ALREADY_EXISTS  
+        chi "触发'%s'已经存在"
+        eng "Trigger '%s' already exists"
+        ger "Trigger '%s' existiert bereits"
+        geo "ტრიგერი '%s' უკვე არსებობს"
+        hindi "TRIGGER '%s' पहले से मौजूद है"
+        spa "Ya existe el disparador `%s`"
+ER_TRG_DOES_NOT_EXIST  
+        chi "触发不存在"
+        eng "Trigger does not exist"
+        ger "Trigger existiert nicht"
+        geo "ტრიგერი არ არსებობს"
+        hindi "TRIGGER मौजूद नहीं है"
+        spa "El disparador no existe"
+ER_TRG_ON_VIEW_OR_TEMP_TABLE  
+        chi "触发器的'%-.192s'是视图或临时表"
+        eng "Trigger's '%-.192s' is a view, temporary table or sequence"
+        ger "'%-.192s' des Triggers ist ein View, temporäre Tabelle oder Sequence"
+        geo "ტრიგერის '%-.192s' View-ს, დროებით ცხრილს ან მიმდევრობას წარმოადგენს"
+        spa "El disparador '%-.192s' es una vista, tabla temporal o secuencia"
+        hindi "Trigger का '%-.192s' एक व्यू, टेम्पररी टेबल या सीक्वेंस है"
+ER_TRG_CANT_CHANGE_ROW  
+        chi "更新%s行在%s触发器中不允许"
+        eng "Updating of %s row is not allowed in %strigger"
+        ger "Aktualisieren einer %s-Zeile ist in einem %s-Trigger nicht erlaubt"
+        geo "%s მწკრივის განახლება %s ტრიგერში შეუძლებელია"
+        spa "Actualizar la fila %s no está permitido en disparador %s"
+ER_TRG_NO_SUCH_ROW_IN_TRG  
+        chi "没有%s行,触发%s"
+        eng "There is no %s row in %s trigger"
+        ger "Es gibt keine %s-Zeile im %s-Trigger"
+        geo "%s მწრივი %s ტრიგერში არ არსებობს"
+        spa "No hay fila %s en disparador %s"
+ER_NO_DEFAULT_FOR_FIELD  
+        chi "字段'%-.192s'没有默认值"
+        eng "Field '%-.192s' doesn't have a default value"
+        ger "Feld '%-.192s' hat keinen Vorgabewert"
+        geo "ველს '%-.192s' ნაგულისხმები მნიშვნელობა არ გააჩნია"
+        spa "El campo '%-.192s' no tiene un valor por defecto"
+ER_DIVISION_BY_ZERO 22012 
+        chi "除0错误"
+        eng "Division by 0"
+        ger "Division durch 0"
+        geo "ნულზე გაყოფა"
+        hindi "0 से विभाजन"
+        spa "División por 0"
+ER_TRUNCATED_WRONG_VALUE_FOR_FIELD  22007
+        chi "不正确的%-.32s值:'%-.128T'用于列`%.192s`%.192s`%.192s`在%lu行"
+        eng "Incorrect %-.32s value: '%-.128T' for column `%.192s`.`%.192s`.`%.192s` at row %lu"
+        ger "Falscher %-.32s-Wert: '%-.128T' für Feld '`%.192s`.`%.192s`.`%.192s` in Zeile %lu"
+        geo "%-.32s-ის არასწორი მნიშვნელობა '%-.128T' სვეტისთვის `%.192s`.`%.192s`.`%.192s` მწკრივთან %lu"
+        spa "Incorrecto %-.32s valor: '%-.128T' para columna `%.192s`.`%.192s`.`%.192s` en la fila %lu"
+ER_ILLEGAL_VALUE_FOR_TYPE 22007 
+        chi "在解析期间发现的非法%s '%-.192T'值"
+        eng "Illegal %s '%-.192T' value found during parsing"
+        ger "Nicht zulässiger %s-Wert '%-.192T' beim Parsen gefunden"
+        geo "დამუშავებისას აღმოჩენილია %s '%-.192T'-ის დაუშვებელი მნიშვნელობა"
+        spa "Hallado valor ilegal %s '%-.192T' durante el análisi"
+ER_VIEW_NONUPD_CHECK  
+        chi "在不可更新的视图%`-.192s.%`-.192s上CHECK OPTION"
+        eng "CHECK OPTION on non-updatable view %`-.192s.%`-.192s"
+        ger "CHECK OPTION auf nicht-aktualisierbarem View %`-.192s.%`-.192s"
+        geo "CHECK OPTION განუახლებელი VIEW-სთვის %`-.192s.%`-.192s"
+        rus "CHECK OPTION для необновляемого VIEW %`-.192s.%`-.192s"
+        spa "CHECK OPTION en vista no actualizable %`-.192s.%`-.192s"
+        ukr "CHECK OPTION для VIEW %`-.192s.%`-.192s що не може бути оновленним"
+ER_VIEW_CHECK_FAILED 44000
+        chi "CHECK OPTION失败%`-.192s.%`-.192s"
+        eng "CHECK OPTION failed %`-.192s.%`-.192s"
+        ger "CHECK OPTION fehlgeschlagen: %`-.192s.%`-.192s"
+        geo "CHECK OPTION-ის შეცდომა %`-.192s.%`-.192s"
+        rus "Проверка CHECK OPTION для VIEW %`-.192s.%`-.192s провалилась"
+        spa "CHECK OPTION falló %`-.192s.%`-.192s"
+        ukr "Перевірка CHECK OPTION для VIEW %`-.192s.%`-.192s не пройшла"
+ER_PROCACCESS_DENIED_ERROR 42000 
+        chi "%-.32s命令被拒绝。用户为'%s'@'%s' 例程'%-.192s'"
+        eng "%-.32s command denied to user '%s'@'%s' for routine '%-.192s'"
+        ger "Befehl %-.32s nicht zulässig für Benutzer '%s'@'%s' in Routine '%-.192s'"
+        geo "%-.32s ბრძანება აკრძალულია მომხმარებლისთვის '%s'@'%s' ქვეპროგრამისთვის '%-.192s'"
+        spa "Comando %-.32s denegado para el usuario '%s'@'%s' para rutina '%-.192s'"
+ER_RELAY_LOG_FAIL  
+        chi "清除旧继relay日志失败:%s"
+        eng "Failed purging old relay logs: %s"
+        ger "Bereinigen alter Relais-Logs fehlgeschlagen: %s"
+        geo "ძველი Relay-Log-ის წაშლის შეცდომა: %s"
+        spa "Falló la purga de viejos historiales (logs) de reenvío: %s"
+ER_PASSWD_LENGTH  
+        chi "密码哈希应该是一个%d-digit十六进制数"
+        eng "Password hash should be a %d-digit hexadecimal number"
+        ger "Passwort-Hash sollte eine Hexdaezimalzahl mit %d Stellen sein"
+        geo "პაროლი %d-ციფრიან თექვსმეტობით რიცხვს უნდა წარმოადგენდეს"
+        spa "El valor calculado de la contraseña debería de ser un número hexadecimal de %d-dígitos"
+ER_UNKNOWN_TARGET_BINLOG  
+        chi "在Binlog索引中找不到目标日志"
+        eng "Target log not found in binlog index"
+        ger "Ziel-Log im Binlog-Index nicht gefunden"
+        geo "სამიზნე ჟურნალი binlog-ის ინდექსში აღმოჩენილი არაა"
+        spa "Historial (log) de destino no hallado en índice binlog"
+ER_IO_ERR_LOG_INDEX_READ  
+        chi "读取日志索引文件时I/O错误"
+        eng "I/O error reading log index file"
+        ger "Fehler beim Lesen der Log-Index-Datei"
+        geo "შეტ/გამოტ შეცდომა ჟურნალის ინდექსის ფაილის კითხვისას"
+        spa "Error de E/S leyendo fichero/archivo índice de historial (log)"
+ER_BINLOG_PURGE_PROHIBITED  
+        chi "服务器配置不允许Binlog清除"
+        eng "Server configuration does not permit binlog purge"
+        ger "Server-Konfiguration erlaubt keine Binlog-Bereinigung"
+        geo "სერვერის კონფიგურაცია Binlog ჟურნალის წაშლის უფლებას არ იძლევა"
+        spa "La configuración del servidor no permite purgar binlog"
+ER_FSEEK_FAIL  
+        chi "fseek()失败"
+        eng "Failed on fseek()"
+        ger "fseek() fehlgeschlagen"
+        geo "fseek()-ის შეცდომა"
+        hindi "fseek() विफल रहा"
+        spa "Fallo en fseek()"
+ER_BINLOG_PURGE_FATAL_ERR  
+        chi "日志清除期间的致命错误"
+        eng "Fatal error during log purge"
+        ger "Schwerwiegender Fehler bei der Log-Bereinigung"
+        geo "ფატალური შეცდომა ჟურნალის წაშლისას"
+        spa "Error fatal durante la purga del historial (log)"
+ER_LOG_IN_USE  
+        chi "日志在用,不会清除"
+        eng "A purgeable log is in use, will not purge"
+        ger "Ein zu bereinigendes Log wird gerade benutzt, daher keine Bereinigung"
+        geo "წასაშლელი ჟურნალი უკვე გამოიყენება. არ წაიშლება"
+        spa "Se encuentra en uso un historial purgable, no lo purgaré"
+ER_LOG_PURGE_UNKNOWN_ERR  
+        chi "日志清除期间未知错误"
+        eng "Unknown error during log purge"
+        ger "Unbekannter Fehler bei Log-Bereinigung"
+        geo "უცნობი შეცდომა ჟურნალის წაშლისას"
+        spa "Error desconocido durante la purga del historial (log)"
+ER_RELAY_LOG_INIT  
+        chi "初始化relay日志失败。位置:%s"
+        eng "Failed initializing relay log position: %s"
+        ger "Initialisierung der Relais-Log-Position fehlgeschlagen: %s"
+        geo "Relay-Log-ის მდებარეობის ინიციალიზაციის შეცდომა: %s"
+        spa "Fallo inicializando la posición del historial de reenvío: %s"
+ER_NO_BINARY_LOGGING  
+        chi "您不使用二进制日志记录"
+        eng "You are not using binary logging"
+        ger "Sie verwenden keine Binärlogs"
+        geo "თქვენ Binary-Log-ს არ იყენებთ"
+        spa "No está usando historial (log) binario"
+ER_RESERVED_SYNTAX  
+        chi "'%-.64s'语法保留用于MariaDB服务器内部"
+        eng "The '%-.64s' syntax is reserved for purposes internal to the MariaDB server"
+        ger "Die Schreibweise '%-.64s' ist für interne Zwecke des MariaDB-Servers reserviert"
+        geo "'%-.64s' სინტაქსი MariaDB სერვერის შიდა მოხმარებისთვის გამოიყენება"
+        spa "La sintaxis '%-.64s' está reservada para propósitos internos del servidor MariaDB"
+ER_WSAS_FAILED  
+        chi "WSAStartup失败了"
+        eng "WSAStartup Failed"
+        ger "WSAStartup fehlgeschlagen"
+        geo "WSAStartup-ის შეცდომა"
+        spa "Falló WSAStartup"
+ER_DIFF_GROUPS_PROC  
+        chi "无法处理具有不同组的过程"
+        eng "Can't handle procedures with different groups yet"
+        ger "Kann Prozeduren mit unterschiedlichen Gruppen noch nicht verarbeiten"
+        geo "სხვაადსხვა ჯგუფების მქონე პროცედურების დამუშავება ჯერ შეუძლებელია"
+        spa "No puedo manejar procedimientos con grupos diferentes, aún"
+ER_NO_GROUP_FOR_PROC  
+        chi "SELECT必须具有此过程的组"
+        eng "Select must have a group with this procedure"
+        ger "SELECT muss bei dieser Prozedur ein GROUP BY haben"
+        geo "ამ პროცედურაში SELECT-ს ჯგუფი უნდა ჰქონდეს"
+        spa "La selección debe de tener un grupo con este procedimiento"
+ER_ORDER_WITH_PROC  
+        chi "无法在次存储过程使用ORDER子句"
+        eng "Can't use ORDER clause with this procedure"
+        ger "Kann bei dieser Prozedur keine ORDER-BY-Klausel verwenden"
+        geo "ამ პროცედურაში ORDER პირობას ვერ გამოიყენებთ"
+        spa "No puedo usar la cláusula ORDER con este procedimiento"
+ER_LOGGING_PROHIBIT_CHANGING_OF  
+        chi "二进制日志记录和复制禁止更改全局服务器%s"
+        eng "Binary logging and replication forbid changing the global server %s"
+        ger "Binärlogs und Replikation verhindern Wechsel des globalen Servers %s"
+        geo "Binary-Log-ი და რეპლიკაცია კრძალავს გლობალური სერვერის %s ცვლილებას"
+        spa "El historial (log) binario y la réplica prohibe cambiar el servidor global %s"
+ER_NO_FILE_MAPPING  
+        chi "无法映射文件:%-.200s,错误号码:%M"
+        eng "Can't map file: %-.200s, errno: %M"
+        ger "Kann Datei nicht abbilden: %-.200s, Fehler: %M"
+        geo "ფაილის მიბმის შეცდომა %-.200s, შეცდ. კოდი: %M"
+        spa "No puedo mapear fichero/archivo: %-.200s, error: %M"
+ER_WRONG_MAGIC  
+        chi "魔法错误%-.64s"
+        eng "Wrong magic in %-.64s"
+        ger "Falsche magische Zahlen in %-.64s"
+        geo "არასწორი მაგიური რიცხვი %-.64s-ში"
+        spa "Magia equivocada en %-.64s"
+ER_PS_MANY_PARAM  
+        chi "Prepared statement包含太多占位符"
+        eng "Prepared statement contains too many placeholders"
+        ger "Vorbereitete Anweisung enthält zu viele Platzhalter"
+        geo "მომზდადებული გამოსახულება მეტისმეტად ბევრ ადგილმჭერს შეიცავს"
+        spa "Sentencia preparada contiene demasiados marcadores de posición"
+ER_KEY_PART_0  
+        chi "索引部分'%-.192s'长度不能为0"
+        eng "Key part '%-.192s' length cannot be 0"
+        ger "Länge des Schlüsselteils '%-.192s' kann nicht 0 sein"
+        geo "გასაღების ნაწილის '%-.192s' სიგრძე ნულის ტოლი ვერ იქნება"
+        spa "El tamaño de trozo de clave '%-.192s' no puede ser 0"
+ER_VIEW_CHECKSUM  
+        chi "查看文本checksum失败"
+        eng "View text checksum failed"
+        ger "View-Text-Prüfsumme fehlgeschlagen"
+        geo "View-Text-ის საკონტროლო ჯამის პრობლემა"
+        rus "Проверка контрольной суммы текста VIEW провалилась"
+        spa "Ha fallado la suma de comprobación del texto de la vista"
+        ukr "Перевірка контрольної суми тексту VIEW не пройшла"
+ER_VIEW_MULTIUPDATE  
+        chi "无法通过JOIN视图'%-.192s.%-.192s'修改多个基础表。"
+        eng "Can not modify more than one base table through a join view '%-.192s.%-.192s'"
+        ger "Kann nicht mehr als eine Basistabelle über Join-View '%-.192s.%-.192s' ändern"
+        geo "JOIN VIEW '%-.192s.%-.192s'-ით ერთი ან მეტი საბაზისო ცხრილის ცვლილება შეუძლებელია"
+        rus "Нельзя изменить больше чем одну базовую таблицу используя многотабличный VIEW '%-.192s.%-.192s'"
+        spa "No puedo modificar más de una tabla base a través de una vista de unión '%-.192s.%-.192s'"
+        ukr "Неможливо оновити більш ниж одну базову таблицю выкористовуючи VIEW '%-.192s.%-.192s', що містіть декілька таблиць"
+ER_VIEW_NO_INSERT_FIELD_LIST  
+        chi "无法写入JOIN视图'%-.192s.%-.192s'没有字段列表"
+        eng "Can not insert into join view '%-.192s.%-.192s' without fields list"
+        ger "Kann nicht ohne Feldliste in Join-View '%-.192s.%-.192s' einfügen"
+        geo "join view '%-.192s.%-.192s'-ში ველების სიის გარეშე ჩანაწერეების დამატება შეუძლებელია"
+        rus "Нельзя вставлять записи в многотабличный VIEW '%-.192s.%-.192s' без списка полей"
+        spa "No puedo insertar dentro de vista de unión '%-.192s.%-.192s' sin lista de campos"
+        ukr "Неможливо уставити рядки у VIEW '%-.192s.%-.192s', що містить декілька таблиць, без списку стовбців"
+ER_VIEW_DELETE_MERGE_VIEW  
+        chi "无法从JOIN视图'%-.192s.%-.192s'删除"
+        eng "Can not delete from join view '%-.192s.%-.192s'"
+        ger "Kann nicht aus Join-View '%-.192s.%-.192s' löschen"
+        geo "JOIN VIEW '%-.192s.%-.192s'-დან წაშლა შეუძლებელია"
+        rus "Нельзя удалять из многотабличного VIEW '%-.192s.%-.192s'"
+        spa "No puedo borrar desde vista de unión '%-.192s.%-.192s'"
+        ukr "Неможливо видалити рядки у VIEW '%-.192s.%-.192s', що містить декілька таблиць"
+ER_CANNOT_USER  
+        chi "操作%s失败%.256s"
+        eng "Operation %s failed for %.256s"
+        ger "Operation %s schlug fehl für %.256s"
+        geo "%s ოპერაციის შეცდომა %.256s-სთვის"
+        norwegian-ny "Operation %s failed for '%.256s'"
+        spa "Ha fallado la operación %s para %.256s"
+ER_XAER_NOTA XAE04
+        chi "XAER_NOTA:未知的XID"
+        eng "XAER_NOTA: Unknown XID"
+        ger "XAER_NOTA: Unbekannte XID"
+        geo "XAER_NOTA: უცნობი XID"
+        spa "XAER_NOTA: XID desconocido"
+ER_XAER_INVAL XAE05
+        chi "XAER_INVAL:无效的参数(或不支持的命令)"
+        eng "XAER_INVAL: Invalid arguments (or unsupported command)"
+        ger "XAER_INVAL: Ungültige Argumente (oder nicht unterstützter Befehl)"
+        geo "XAER_INVAL: არასწორი არგუმენტები (ან მხარდაუჭერელი ბრძანება)"
+        spa "XAER_INVAL: Argumentos inválidos (o comando no soportado)"
+ER_XAER_RMFAIL XAE07
+        chi "XAER_RMFAIL:当全局事务处于%.64s状态时,无法执行该命令"
+        eng "XAER_RMFAIL: The command cannot be executed when global transaction is in the  %.64s state"
+        ger "XAER_RMFAIL: DEr Befehl kann nicht ausgeführt werden, wenn die globale Transaktion im Zustand %.64s ist"
+        geo "XAER_RMFAIL: ბრძანების შესრულება შეუძლებელია, სანამ გლობალური ტრანზაქცია %.64s მდგომარეობაშია"
+        rus "XAER_RMFAIL: эту команду нельзя выполнять когда глобальная транзакция находится в состоянии '%.64s'"
+        spa "XAER_RMFAIL: El comando no se puede ejecutar cuando la transacción global se encuentra en estado %.64s"
+ER_XAER_OUTSIDE XAE09
+        chi "XAER_OUTSIDE:一些工作是在全局交易之外完成的"
+        eng "XAER_OUTSIDE: Some work is done outside global transaction"
+        ger "XAER_OUTSIDE: Einige Arbeiten werden außerhalb der globalen Transaktion verrichtet"
+        geo "XAER_OUTSIDE: ზოგიერთი სამუსაო გლობალური ტრანზაქცის გარეთ სრულდება"
+        spa "XAER_OUTSIDE: Algún trabajo se ha realizado fuera de la transacción global"
+ER_XAER_RMERR XAE03
+        chi "XAER_RMERR:事务分支中发生致命错误 - 检查您的数据以获得一致性"
+        eng "XAER_RMERR: Fatal error occurred in the transaction branch - check your data for consistency"
+        ger "XAER_RMERR: Schwerwiegender Fehler im Transaktionszweig - prüfen Sie Ihre Daten auf Konsistenz"
+        geo "XAER_RMERR: ტრანზაქციის ბრენჩის ფატალური შეცდომა - შეამოწმეთ თქვენი მონაცემების სიმთელე"
+        spa "XAER_RMERR: Ha ocurrido un error fatal en la rama de la transacción - revise la consitencia de sus datos"
+ER_XA_RBROLLBACK XA100
+        chi "XA_RBROLBACK:交易分支回滚"
+        eng "XA_RBROLLBACK: Transaction branch was rolled back"
+        ger "XA_RBROLLBACK: Transaktionszweig wurde zurückgerollt"
+        geo "XA_RBROLLBACK: ტრნზაქციის ბრენჩი დაბრუნდა"
+        spa "XA_RBROLLBACK: La rama de la transacción ha sido retrocedida (rolled back)"
+ER_NONEXISTING_PROC_GRANT 42000 
+        chi "无授权:用户'%-.48s'主机'%-.64s'ROUTINE'%-.192s'"
+        eng "There is no such grant defined for user '%-.48s' on host '%-.64s' on routine '%-.192s'"
+        ger "Es gibt diese Berechtigung für Benutzer '%-.48s' auf Host '%-.64s' für Routine '%-.192s' nicht"
+        geo "მომხმარებლისთვის '%-.48s' ჰოსტზე '%-.64s' ქვეპროგრამაზე '%-.192s' უფლება აღწერილი არაა"
+        spa "No existe tal concesión definida para el usuario '%-.48s' en equipo '%-.64s' en rutina '%-.192s'"
+ER_PROC_AUTO_GRANT_FAIL
+        chi "无法授予EXECUTE和ALTER ROUTINE权限"
+        eng "Failed to grant EXECUTE and ALTER ROUTINE privileges"
+        ger "Gewährung von EXECUTE- und ALTER-ROUTINE-Rechten fehlgeschlagen"
+        geo "EXECUTE და ALTER ROUTINE პრივილეგიების მინიჭების შეცდომა"
+        spa "Fallo al conceder privilegios de EXECUTE y ALTER ROUTINE"
+ER_PROC_AUTO_REVOKE_FAIL
+        chi "无法撤消所有权限以删除例程"
+        eng "Failed to revoke all privileges to dropped routine"
+        ger "Rücknahme aller Rechte für die gelöschte Routine fehlgeschlagen"
+        geo "მოცილებული პროცედურიდან ყველა პრივილეგიის გაუქმების შეცდომა"
+        spa "Fallo al rescindir todos los privilegios de la rutina anulada"
+ER_DATA_TOO_LONG 22001
+        chi "列'%s'行%lu数据太长"
+        eng "Data too long for column '%s' at row %lu"
+        ger "Daten zu lang für Feld '%s' in Zeile %lu"
+        geo "მონაცემი ძალიან გრძელია სვეტისთვის '%s' მწკრივზე %lu"
+        spa "Datos demasiado largos para la columna '%s' en la fila %lu"
+ER_SP_BAD_SQLSTATE 42000
+        chi "坏SQLSTATE:'%s'"
+        eng "Bad SQLSTATE: '%s'"
+        ger "Ungültiger SQLSTATE: '%s'"
+        geo "ცუდი SQLSTATE: '%s'"
+        spa "Mal SQLSTATE: '%s'"
+ER_STARTUP
+        chi "%s:已经准备好接受连接\nVersion:'%s'套接字:'%s'端口:%d %s"
+        eng "%s: ready for connections.\nVersion: '%s'  socket: '%s'  port: %d  %s"
+        ger "%s: bereit für Verbindungen.\nVersion: '%s'  Socket: '%s'  Port: %d  %s"
+        geo "%s: დასაკავშირებლად მზადაა.\nვერსია: '%s'  სოკეტი: '%s'  პორტი: %d  %s"
+        spa "%s: preparada para conexiones.\nVersión: '%s' conector: '%s' puerto: %d  %s"
+ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR
+        chi "无法从带有固定大小行的文件中加载值到变量"
+        eng "Can't load value from file with fixed size rows to variable"
+        ger "Kann Wert aus Datei mit Zeilen fester Größe nicht in Variable laden"
+        geo "ფიქსირებული ზომის მწკრივების მქონე ფაილიდან ცვლადში მნისვნელობის ჩატვირთვის შეცდომა"
+        spa "No puedo cargar valor desde fichero/archivo con filas de tamaño fijo en variable"
+ER_CANT_CREATE_USER_WITH_GRANT 42000
+        chi "您不允许使用创建用户时给予GRANT"
+        eng "You are not allowed to create a user with GRANT"
+        ger "Sie dürfen keinen Benutzer mit GRANT anlegen"
+        geo "GRANT-ით მომხმარებლის შექმნის უფლება არ გაქვთ"
+        spa "No está autorizado a crear un usuario con GRANT"
+ER_WRONG_VALUE_FOR_TYPE  
+        chi "不正确的%-.32s值:'%-.128T' 函数:%-.32s"
+        eng "Incorrect %-.32s value: '%-.128T' for function %-.32s"
+        ger "Falscher %-.32s-Wert: '%-.128T' für Funktion %-.32s"
+        geo "არასწორი %-.32s მნიშვნელობა: '%-.128T' ფუნქციისთვის %-.32s"
+        spa "Incorrecto valor %-.32s: '%-.128T' para la función %-.32s"
+ER_TABLE_DEF_CHANGED
+        chi "表定义已更改,请重试"
+        eng "Table definition has changed, please retry transaction"
+        ger "Tabellendefinition wurde geändert, bitte starten Sie die Transaktion neu"
+        geo "ცხრილის აღწერა შეიცვალა. გთხოვთ, თავიდან გაუშვათ ტრანზაქცია"
+        spa "Ha cambiado la definición de la tabla, por favor reintente la transacción"
+ER_SP_DUP_HANDLER 42000
+        chi "在同一块中声明的处理程序重复"
+        eng "Duplicate handler declared in the same block"
+        ger "Doppelter Handler im selben Block deklariert"
+        geo "იგივე ბლოკში დუბლირებული დამმუშავებელია აღწერილი"
+        spa "Manejador duplicado declarado en mismo bloque"
+ER_SP_NOT_VAR_ARG 42000
+        chi "OUT或INOUT参数%d 例程 %s的不是BEFORE触发器里的变量或新伪变量"
+        eng "OUT or INOUT argument %d for routine %s is not a variable or NEW pseudo-variable in BEFORE trigger"
+        ger "OUT- oder INOUT-Argument %d für Routine %s ist keine Variable"
+        geo "OUT და INOUT არგუმენტი %d ქვეპროგრამისთვის %s ცვლადს არ წარმოადგენს, ან NEW ფსევდოცვლადი BEFORE ტრიგერში"
+        spa "El argumento %d OUT o INOUT para la rutina %s no es una variable o pseudo-variable NEW en disparador BEFORE"
+ER_SP_NO_RETSET 0A000
+        chi "不允许从%s返回结果集"
+        eng "Not allowed to return a result set from a %s"
+        ger "Rückgabe einer Ergebnismenge aus einer %s ist nicht erlaubt"
+        geo "%s-დან შედეგების ნაკრების დაბრუნება დაუშვებელია"
+        spa "No autorizado a devolver un conjunto de resultados desde un %s"
+ER_CANT_CREATE_GEOMETRY_OBJECT 22003 
+        chi "无法从发送到几何字段的数据中获取几何对象"
+        eng "Cannot get geometry object from data you send to the GEOMETRY field"
+        ger "Kann kein Geometrieobjekt aus den Daten machen, die Sie dem GEOMETRY-Feld übergeben haben"
+        geo "GEOMETRY ველში გაგზავნილი მონაცემებიდან გეომეტრიული ობიექტის მიღება შეუძლებელია"
+        spa "No puedo obtener objeto de geometría desde los datos que vd envía al campo GEOMETRY"
+ER_FAILED_ROUTINE_BREAK_BINLOG
+        chi "ROUTINE失败,定义中既没有NO SQL也没有READ SQL DAT。启用二进制日志记录;如果更新非事务性表,则二进制日志将会错过其更改"
+        eng "A routine failed and has neither NO SQL nor READS SQL DATA in its declaration and binary logging is enabled; if non-transactional tables were updated, the binary log will miss their changes"
+        ger "Eine Routine, die weder NO SQL noch READS SQL DATA in der Deklaration hat, schlug fehl und Binärlogging ist aktiv. Wenn Nicht-Transaktions-Tabellen aktualisiert wurden, enthält das Binärlog ihre Änderungen nicht"
+        geo "ქვეპროგრამა ავარიულად დასრულდა და გააჩნია არც NO SQL, არც READS SQL DATA მის აღწერაში და ბინარული ჟურნალი ჩართულია. თუ განახლებულია არატრანზაქციული ცხრილები, ბინარული ჟურნალი მათ ცვლილებებს არ შეიცავს"
+        spa "Ha fallado una rutina y no tiene ni NO SQL ni READS SQL DATA en su declaración y el historial (log) binario se encuentra activado; si han sido actualizadas tablas no transaccionales, el fichero/archivo binario de historial (log) perderá sus cambios"
+ER_BINLOG_UNSAFE_ROUTINE
+        chi "此函数定义中没有DETERMINISTIC,NO SQL,或者READS SQL DATA,并且已启用二进制日志记录(您*可能*希望使用较少的安全性的log_bin_trust_function_creators变量)"
+        eng "This function has none of DETERMINISTIC, NO SQL, or READS SQL DATA in its declaration and binary logging is enabled (you *might* want to use the less safe log_bin_trust_function_creators variable)"
+        ger "Diese Routine hat weder DETERMINISTIC, NO SQL noch READS SQL DATA in der Deklaration und Binärlogging ist aktiv (*vielleicht* sollten Sie die weniger sichere Variable log_bin_trust_function_creators verwenden)"
+        geo "ამ ფუნქციას არ გააჩნია DETERMINISTIC, NO SQL, ან READS SQL DATA მის აღწერაში და ბინარული ჟურნალი ჩართულია (შეიძლება თქვენ ნაკლებად უსაფრთხო log_bin_trust_function_creators ცვლადის გამოყენება გჭირდებათ)"
+        spa "Esta función no tiene ninguno de DETERMINISTIC, NO SQL o READS SQL DATA en su declaración y está activado el historial binario (vd *podría* querer usar la variable menos segura log_bin_trust_function_creators)"
+ER_BINLOG_CREATE_ROUTINE_NEED_SUPER
+        chi "您没有超级特权和二进制日志记录已启用(您*可能*想要使用较少的安全log_bin_trust_function_creators变量)"
+        eng "You do not have the SUPER privilege and binary logging is enabled (you *might* want to use the less safe log_bin_trust_function_creators variable)"
+        ger "Sie haben keine SUPER-Berechtigung und Binärlogging ist aktiv (*vielleicht* sollten Sie die weniger sichere Variable log_bin_trust_function_creators verwenden)"
+        geo "თქვენ SUPER პრივილეგია არ გაგაჩნიათ და ბინარული ჟურნალი ჩართულია (შეიძლება თქვენ ნაკლებად უსაფრთხო log_bin_trust_function_creators ცვლადის გამოყენება გჭირდებათ)"
+        spa "No tiene el privilegio SUPER y está activado el historial binario (*podría* querer usar la variable menos segura log_bin_trust_function_creators)"
+ER_EXEC_STMT_WITH_OPEN_CURSOR
+        chi "您无法执行具有与之关联的打开Cursor的prepared statement。重置语句以重新执行它"
+        eng "You can't execute a prepared statement which has an open cursor associated with it. Reset the statement to re-execute it"
+        ger "Sie können keine vorbereitete Anweisung ausführen, die mit einem geöffneten Cursor verknüpft ist. Setzen Sie die Anweisung zurück, um sie neu auszuführen"
+        geo "თქვენ არ შეგიძლიათ გაუშვათ მომზადებული გამოსახულება, რომელსაც მასთან ასოცირებული ღია კურსორი გააჩნია. გამოსახულების თავიდან შესასრულებლად მისი საწყის მნიშვნელობებზე დაბრუნებაა საჭირო"
+        spa "No puede ejecutar una sentencia preparada que tiene abierto un cursor asociado con ella. Renueve la sentencia para re-ejecutarla"
+ER_STMT_HAS_NO_OPEN_CURSOR
+        chi "语句(%lu)没有开放的Cursor"
+        eng "The statement (%lu) has no open cursor"
+        ger "Die Anweisung (%lu) hat keinen geöffneten Cursor"
+        geo "გამოსახულებას (%lu) ღია კურსორი არ გააჩნია"
+        spa "La sentencia (%lu) no tiene cursor abierto"
+ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG
+        chi "在存储的函数或触发器中不允许显式或隐式提交"
+        eng "Explicit or implicit commit is not allowed in stored function or trigger"
+        ger "Explizites oder implizites Commit ist in gespeicherten Funktionen und in Triggern nicht erlaubt"
+        spa "Comisión (commit) implícita o explícita no permitida en funciones almacenadas o en disparadores"
+ER_NO_DEFAULT_FOR_VIEW_FIELD
+        chi "VIEW的列'%-.192s.%-.192s'底层表没有默认值"
+        eng "Field of view '%-.192s.%-.192s' underlying table doesn't have a default value"
+        ger "Ein Feld der dem View '%-.192s.%-.192s' zugrundeliegenden Tabelle hat keinen Vorgabewert"
+        geo "ხედის ველის '%-.192s.%-.192s' ქვედა ცხრილს ნაგულისხმები მნიშვნელობა არ გააჩნია"
+        spa "El campo de tabla subyacente de vista '%-.192s.%-.192s' no tiene valor por defecto"
+ER_SP_NO_RECURSION
+        chi "不允许递归存储功能和触发器"
+        eng "Recursive stored functions and triggers are not allowed"
+        ger "Rekursive gespeicherte Routinen und Triggers sind nicht erlaubt"
+        geo "რეკურსიული დამახსოვრებული ფუნქციები და ტრიგერები დაშვებული არაა"
+        spa "No autorizadas funciones almacenadas recursivas ni disparadores"
+ER_TOO_BIG_SCALE 42000 S1009
+        chi "指定的大规模为'%-.192s'。最大是%u"
+        eng "Too big scale specified for '%-.192s'. Maximum is %u"
+        ger "Zu großer Skalierungsfaktor für '%-.192s' angegeben. Maximum ist %u"
+        geo "'%-.192s'-სთვის მითითებული მასშტაბი მეტისმეტად დიდია. მაქსიმუმია %u"
+        spa "Escala demasiado grande especificada para '%-.192s'. El máximo es de %u"
+ER_TOO_BIG_PRECISION 42000 S1009
+        chi "指定的精度太大 '%-.192s'。最大是%u"
+        eng "Too big precision specified for '%-.192s'. Maximum is %u"
+        ger "Zu große Genauigkeit für '%-.192s' angegeben. Maximum ist %u"
+        geo "'%-.192s'-სთვის მითითებული სიზუსთე მეტისმეტად დიდია. მაქსიმუმია %u"
+        spa "Precisión demasiado grande especificada para '%-.192s'. El máximo es de %u"
+ER_M_BIGGER_THAN_D 42000 S1009
+        chi "对于FLOAT(M,D),DOUBLE(M,D)或DECIMAL(M,D),M必须> = D(列'%-.192s')"
+        eng "For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '%-.192s')"
+        ger "Für FLOAT(M,D), DOUBLE(M,D) oder DECIMAL(M,D) muss M >= D sein (Feld '%-.192s')"
+        geo "float(M,D), double(M,D) და decimal(M,D)-სთვის M >= D უნდა იყოს (სვეტი '%-.192s')"
+        spa "Para flotante(M,D), doble(M,D) o decimal(M,D), M debe de ser >= D (columna '%-.192s')"
+ER_WRONG_LOCK_OF_SYSTEM_TABLE
+        chi "您无法将系统表的写入锁定与其他表或锁定类型相结合"
+        eng "You can't combine write-locking of system tables with other tables or lock types"
+        ger "Sie können Schreibsperren auf der Systemtabelle nicht mit anderen Tabellen kombinieren"
+        geo "სისტემური ცხრილების ჩაწერის დაბლოკვის სხვა ცხრილების ან დაბლოკვის ტიპებთან შერევა შეუძლებელია"
+        spa "No puede combinar bloqueo de escritura de tablas de sistema con otras tablas o tipos de bloqueo"
+ER_CONNECT_TO_FOREIGN_DATA_SOURCE
+        chi "无法连接到外数据源:%.64s"
+        eng "Unable to connect to foreign data source: %.64s"
+        ger "Kann nicht mit Fremddatenquelle verbinden: %.64s"
+        geo "გარე მონაცემების წყაროსთან მიერთების შეცდომა: %.64s"
+        spa "No puedo conectar a fuente foránea de datos: %.64s"
+ER_QUERY_ON_FOREIGN_DATA_SOURCE
+        chi "处理对外数据源上的查询时出现问题。数据源错误:%-.64s"
+        eng "There was a problem processing the query on the foreign data source. Data source error: %-.64s"
+        ger "Bei der Verarbeitung der Abfrage ist in der Fremddatenquelle ein Problem aufgetreten. Datenquellenfehlermeldung: %-.64s"
+        geo "გარე მონაცემების წყაროზე მოთხოვნის დამუშავების პრობლემა. მონაცემების წყაროს შეცდომა: %-.64s"
+        spa "Hubo un problema procesando la consulta (query) en la fuente foránea de datos. Error de fuente de datos: %-.64s"
+ER_FOREIGN_DATA_SOURCE_DOESNT_EXIST
+        chi "您尝试引用的外数据源不存在。数据源错误:%-.64s"
+        eng "The foreign data source you are trying to reference does not exist. Data source error:  %-.64s"
+        ger "Die Fremddatenquelle, auf die Sie zugreifen wollen, existiert nicht. Datenquellenfehlermeldung:  %-.64s"
+        geo "მონაცემების გარე წყარო, რომელსაც ცდილობთ მიმართოთ, არ არსებობს. მონაცემების წყაროს შეცდომა: %-.64s"
+        spa "La fuente foránea de datos que intenta referenciar no existe. Error en fuente de datos:  %-.64s"
+ER_FOREIGN_DATA_STRING_INVALID_CANT_CREATE
+        chi "无法创建联合表。数据源连接字符串'%-.64s'不是正确的格式"
+        eng "Can't create federated table. The data source connection string '%-.64s' is not in the correct format"
+        ger "Kann föderierte Tabelle nicht erzeugen. Der Datenquellen-Verbindungsstring '%-.64s' hat kein korrektes Format"
+        spa "No puedo crear tabla federada. La cadena de conexión de la fuente de datos '%-.64s' no tiene el formato correcto"
+ER_FOREIGN_DATA_STRING_INVALID
+        chi "数据源连接字符串'%-.64s'不是正确的格式"
+        eng "The data source connection string '%-.64s' is not in the correct format"
+        ger "Der Datenquellen-Verbindungsstring '%-.64s' hat kein korrektes Format"
+        spa "La cadena de conexón de la fuente de datos '%-.64s' no tiene el formato correcto"
+ER_CANT_CREATE_FEDERATED_TABLE  
+        chi "无法创建联合表。外数据SRC错误:%-.64s"
+        eng "Can't create federated table. Foreign data src error:  %-.64s"
+        ger "Kann föderierte Tabelle nicht erzeugen. Fremddatenquellenfehlermeldung:  %-.64s"
+        spa "No puedo crear tabla federada. Error en fuente de datos foráneos:  %-.64s"
+ER_TRG_IN_WRONG_SCHEMA  
+        chi "触发在错的SCHEMA"
+        eng "Trigger in wrong schema"
+        ger "Trigger im falschen Schema"
+        geo "ტრიგერი არასწორ სქემაშია"
+        spa "Disparador en esquema equivocado"
+ER_STACK_OVERRUN_NEED_MORE
+        chi "线程堆栈溢出:%ld字节堆栈的%ld字节,以及所需的%ld字节。使用'mariadbd --thread_stack =#'指定更大的堆栈"
+        eng "Thread stack overrun:  %ld bytes used of a %ld byte stack, and %ld bytes needed. Consider increasing the thread_stack system variable."
+        ger "Thread-Stack-Überlauf: %ld Bytes eines %ld-Byte-Stacks in Verwendung, und %ld Bytes benötigt. Verwenden Sie 'mariadbd --thread_stack=#', um einen größeren Stack anzugeben"
+        geo "ნაკადის სტეკის გადავსება: გამოყენებულია %ld ბაიტი %ld ბაიტიანი სტეკიდან და საჭიროა %ld ბაიტი. გაზარდეთ სისტემის ცვლადი thread_stack."
+        jpn "スレッドスタック不足です(使用: %ld ; サイズ: %ld ; 要求: %ld)。より大きい値で 'mariadbd --thread_stack=#' の指定をしてください。"
+        spa "Desbordamiento en pila de hilos (threads): %ld bytes usados de una pila de %ld y son necesarios %ld bytes. Considere el incrementar la variable de sistema --thread_stack=#."
+ER_TOO_LONG_BODY 42000 S1009
+        chi "'%-.100s”的ROUTINE太长了"
+        eng "Routine body for '%-.100s' is too long"
+        ger "Routinen-Body für '%-.100s' ist zu lang"
+        geo "qveprogramis სხეული '%-.100s' ძალიან დიდია"
+        spa "El cuerpo de rutina para '%-.100s' es demasiado largo"
+ER_WARN_CANT_DROP_DEFAULT_KEYCACHE
+        chi "无法删除默认索引缓存"
+        eng "Cannot drop default keycache"
+        ger "Der vorgabemäßige Schlüssel-Cache kann nicht gelöscht werden"
+        geo "ნაგულისხმები გასაღების კეშის წაშლა შეუძლებელია"
+        spa "No puedo eliminar keycache por defecto"
+ER_TOO_BIG_DISPLAYWIDTH 42000 S1009
+        chi "显示宽度超过'%-.192s'的范围(max =%lu)"
+        eng "Display width out of range for '%-.192s' (max = %lu)"
+        ger "Anzeigebreite außerhalb des zulässigen Bereichs für '%-.192s' (Maximum = %lu)"
+        geo "'%-.192s'-ის ჩვენების სიგანე დიაპაზონს გარეთაა (მაქს. = %lu)"
+        spa "Ancho a mostrar fuera de rango para '%-.192s' (máx = %lu)"
+ER_XAER_DUPID XAE08
+        chi "XAER_DUPID:xid已存在"
+        eng "XAER_DUPID: The XID already exists"
+        ger "XAER_DUPID: Die XID existiert bereits"
+        geo "XAER_DUPID: XID-ი უკვე არსებობს"
+        spa "XAER_DUPID: El XID ya existe"
+ER_DATETIME_FUNCTION_OVERFLOW 22008
+        chi "DateTime函数:%-.32s字段溢出"
+        eng "Datetime function: %-.32s field overflow"
+        ger "Datetime-Funktion: %-.32s Feldüberlauf"
+        geo "დრო-თარიღის ფუნქცია: %-.32s ველი გადავსებულია"
+        spa "Función Datetime: %-.32s desbordamiento de campo"
+ER_CANT_UPDATE_USED_TABLE_IN_SF_OR_TRG
+        chi "在存储的函数/触发器中无法更新表'%-.192s',因为它已被调用此存储的函数/触发器调用的语句"
+        eng "Can't update table '%-.192s' in stored function/trigger because it is already used by statement which invoked this stored function/trigger"
+        ger "Kann Tabelle '%-.192s' in gespeicherter Funktion oder Trigger nicht aktualisieren, weil sie bereits von der Anweisung verwendet wird, die diese gespeicherte Funktion oder den Trigger aufrief"
+        geo "ცხრილის ('%-.192s') შენახულ პროცედურაში/ტრიგერში განახლება შეუძლებელია, რადგან ის უკვე გამოიყენება გამოსახულების მიერ, რომელსაც ეს შენახული ფუნქცია/ტრიგერი იყენებს"
+        spa "No puedo actualizar tabla '%-.192s' en función almacenada/disparador porque ya está siendo usada por la sentencia que invocó esta función almacenada/disparador"
+ER_VIEW_PREVENT_UPDATE
+        chi "表'%-.192s'的定义可防止在表'%-.192s'上的操作'%-.192s'"
+        eng "The definition of table '%-.192s' prevents operation %-.192s on table '%-.192s'"
+        ger "Die Definition der Tabelle '%-.192s' verhindert die Operation %-.192s auf Tabelle '%-.192s'"
+        geo "ცხრილის ('%-.192s') აღწერა ხელს უშლის ოპერაციას ('%-.192s') ცხრილზე '%-.192s'"
+        spa "La definición de la tabla '%-.192s' previene la operación %-.192s en la tabla '%-.192s'"
+ER_PS_NO_RECURSION
+        chi "prepared statement包含一个有关该语句的存储例程调用。它不允许以这种递归方式执行prepared statement"
+        eng "The prepared statement contains a stored routine call that refers to that same statement. It's not allowed to execute a prepared statement in such a recursive manner"
+        ger "Die vorbereitete Anweisung enthält einen Aufruf einer gespeicherten Routine, die auf eben dieselbe Anweisung verweist. Es ist nicht erlaubt, eine vorbereitete Anweisung in solch rekursiver Weise auszuführen"
+        geo "მომზადებული გამოსახულება შეიცავს დამახსოვრებულ ქვეპროგრამის გამოძახებას, რომელიც იგივე გამოსახულებას მიმართავს. შენახული გამოსახულების რეკურსიულად გაშვება დაშვებული არაა"
+        spa "La sentencia preparada contiene una llamada a rutina almacenada que se refiere a esa misma sentencia. No está permitido ejecutar una sentencia preparada de esta manera recursiva"
+ER_SP_CANT_SET_AUTOCOMMIT
+        chi "不允许从存储的函数或触发器设置自动判处"
+        eng "Not allowed to set autocommit from a stored function or trigger"
+        ger "Es ist nicht erlaubt, innerhalb einer gespeicherten Funktion oder eines Triggers AUTOCOMMIT zu setzen"
+        geo "შენახული ფუნქციიდან და ტრიგერიდან ავტოკომიტის დაყენება დაშვებული არაა"
+        spa "No permitido usar auto acometida (autocommit) desde una función almacenada o disparador"
+ER_MALFORMED_DEFINER 0L000
+        chi "无效的定义"
+        eng "Invalid definer"
+        geo "არასწორი აღმწერი"
+        spa "Definidor inválido"
+ER_VIEW_FRM_NO_USER
+        chi "VIEW'%-.192s'。'%-.192s'没有绝定的信息(旧表格式)。当前用户用作定义。请重新创建视图!"
+        eng "View '%-.192s'.'%-.192s' has no definer information (old table format). Current user is used as definer. Please recreate the view!"
+        ger "View '%-.192s'.'%-.192s' hat keine Definierer-Information (altes Tabellenformat). Der aktuelle Benutzer wird als Definierer verwendet. Bitte erstellen Sie den View neu"
+        geo "ხედს '%-.192s'.'%-.192s' აღმწერის ინფორმაცია არ გააჩნია (ცხრილის ფორმატი ძველია). აღმწერად მიმდინარე მომხმარებელი გამოიყენება. გთხოვთ, შექმენით ეს ხედი თავიდან!"
+        spa "La vista '%-.192s'.'%-.192s' no tiene información de definidor (formato viejo de tabla). Se usa el usuario actual como definidor. Por favor, ¡recrea la vista!"
+ER_VIEW_OTHER_USER
+        chi "您需要使用'%-.192s'@'%-.192s'的创建视图的超级特权"
+        eng "You need the SUPER privilege for creation view with '%-.192s'@'%-.192s' definer"
+        ger "Sie brauchen die SUPER-Berechtigung, um einen View mit dem Definierer '%-.192s'@'%-.192s' zu erzeugen"
+        spa "Vd necesita el privilegio SUPER para la creación de la vista con definidor '%-.192s'@'%-.192s'"
+ER_NO_SUCH_USER
+        chi "指定为定义的用户('%-.64s'@'%-.64s')不存在"
+        eng "The user specified as a definer ('%-.64s'@'%-.64s') does not exist"
+        ger "Der als Definierer angegebene Benutzer ('%-.64s'@'%-.64s') existiert nicht"
+        geo "('%-.64s'@'%-.64s')-ის აღმწერად მითითებული მომხმარებელი არ არსებობს"
+        spa "El usuario especificado como definidor ('%-.64s'@'%-.64s') no existe"
+ER_FORBID_SCHEMA_CHANGE
+        chi "不允许从'%-.192s'到'%-.192s'的SCHEMA更改"
+        eng "Changing schema from '%-.192s' to '%-.192s' is not allowed"
+        ger "Wechsel des Schemas von '%-.192s' auf '%-.192s' ist nicht erlaubt"
+        geo "სქემის '%-.192s'-დან '%-.192s'-ზე შეცვლა დაშვებული არაა"
+        spa "Vd no está autorizado a cambiar el esquema de '%-.192s' a '%-.192s'"
+ER_ROW_IS_REFERENCED_2 23000
+        chi "无法删除或更新父行:外键约束失败(%.192s)"
+        eng "Cannot delete or update a parent row: a foreign key constraint fails (%.192s)"
+        ger "Kann Eltern-Zeile nicht löschen oder aktualisieren: eine Fremdschlüsselbedingung schlägt fehl (%.192s)"
+        geo "მშობელი მწკრივის წაშლა ან განახლება შეუძლებელია: გარე გასაღების შეზღუდვა შეცდომას შეიცავს (%.192s)"
+        spa "No puedo borrar o actualizar una fila padre: falla una restricción de clave foránea (%.192s)"
+ER_NO_REFERENCED_ROW_2 23000
+        chi "无法添加或更新子行:外键约束失败(%.192s)"
+        eng "Cannot add or update a child row: a foreign key constraint fails (%.192s)"
+        ger "Kann Kind-Zeile nicht hinzufügen oder aktualisieren: eine Fremdschlüsselbedingung schlägt fehl (%.192s)"
+        geo "შვილი მწკრივის წაშლა ან განახლება შეუძლებელია: გარე გასაღების შეზღუდვა შეცდომას შეიცავს (%.192s)"
+        spa "No puedo añadir o actualizar una fila hija: falla una restricción de clave foránea (%.192s)"
+ER_SP_BAD_VAR_SHADOW 42000
+        chi "变量'%-.64s'必须用`...`,或重命名"
+        eng "Variable '%-.64s' must be quoted with `...`, or renamed"
+        ger "Variable '%-.64s' muss mit `...` geschützt oder aber umbenannt werden"
+        geo "ცვლადი '%-.64s' `...` ბრჭყალებში უნდა ჩასვათ, ან სახელი გადაარქვათ"
+        spa "La variable '%-.64s' debe de ser entrecomillada con `...` o renombrada"
+ER_TRG_NO_DEFINER
+        chi "触发'%-.192s'的绝对属性。'%-.192s'。触发器将在调用者的授权下激活,该权限可能不足。请重新创建触发器"
+        eng "No definer attribute for trigger '%-.192s'.'%-.192s'. The trigger will be activated under the authorization of the invoker, which may have insufficient privileges. Please recreate the trigger"
+        ger "Kein Definierer-Attribut für Trigger '%-.192s'.'%-.192s'. Der Trigger wird mit der Autorisierung des Aufrufers aktiviert, der möglicherweise keine zureichenden Berechtigungen hat. Bitte legen Sie den Trigger neu an"
+        geo "ტრიგერს '%-.192s'.'%-.192s' აღმწერის ატრიბუტი არ გააჩნია. ტრიგერი გამშვების ავტორიზაციის ქვეშ გაეშვება, რომლის პრივილეგიებიც შეიძლება საკმარისი არ აღმოჩნდეს. თავიდან შექმენით ტრიგერი"
+        spa "No hay atributo de definidor para disparador '%-.192s'.'%-.192s'. El disparador será activado bajo la autorización del invocador, el cual puede tener insuficientes privilegios. Por favor, vuelva a crear el disparador"
+ER_OLD_FILE_FORMAT
+        chi "'%-.192s'具有旧格式,您应该重新创建'%s'对象"
+        eng "'%-.192s' has an old format, you should re-create the '%s' object(s)"
+        ger "'%-.192s' hat altes Format, Sie sollten die '%s'-Objekt(e) neu erzeugen"
+        geo "'%-.192s'-ის ფორმატი მოძველებულია. საჭიროა '%s' ობიექტის თავიდან შექმნა"
+        spa "'%-.192s' tiene un formato viejo, debería vd de volver a crear el/los objeto(s) '%s'"
+ER_SP_RECURSION_LIMIT
+        chi "递归限制%d(如max_sp_recursion_depth变量设置)的例程%.192s"
+        eng "Recursive limit %d (as set by the max_sp_recursion_depth variable) was exceeded for routine %.192s"
+        ger "Rekursionsgrenze %d (durch Variable max_sp_recursion_depth gegeben) wurde für Routine %.192s überschritten"
+        geo "რეკურსიულობის ლიმიტი %d (დაყენებულია ცვლადით max_sp_recursion_depth) გადაცილებულია ქვეპროგრამისთვის  %.192s"
+        spa "El límite recursivo %d (según se indica mediante la variable max_sp_recursion_depth) se ha excedido para la rutina %.192s"
+ER_SP_PROC_TABLE_CORRUPT
+        chi "无法加载常规%-.192s(内部代码%d)。有关更多详细信息,请运行SHOW WARNINGS"
+        eng "Failed to load routine %-.192s (internal code %d). For more details, run SHOW WARNINGS"
+        ger "Routine %-.192s (interner Code %d) konnte nicht geladen werden. Weitere Einzelheiten erhalten Sie, wenn Sie SHOW WARNINGS ausführen"
+        geo "ქვეპროგრამის (%-.192s) ჩატვირთვის შეცდომა (შიდა კოდი %d). მეტი დეტალებისთვის გაუშვით SHOW WARNINGS"
+        ukr "Невдала спроба завантажити процедуру %-.192s (внутрішний код %d). Для отримання детальної інформації використовуйте SHOW WARNINGS"
+        spa "No pude cargar la rutina %-.192s (código interno %d). Para más detalles, ejecute SHOW WARNINGS"
+ER_SP_WRONG_NAME 42000
+        chi "常规名称错误不正确'%-.192s'"
+        eng "Incorrect routine name '%-.192s'"
+        ger "Ungültiger Routinenname '%-.192s'"
+        geo "ქვეპროგრამის არასწორი სახელი '%-.192s'"
+        spa "Nombre incorrecto de rutina '%-.192s'"
+ER_TABLE_NEEDS_UPGRADE
+        chi "需要升级。请做\"修复%s%`s \"或转储/重新加载以修复!"
+        eng "Upgrade required. Please do \"REPAIR %s %`s\" or dump/reload to fix it!"
+        ger "Aktualisierung erforderlich. Bitte zum Reparieren \"REPAIR %s %`s\" eingeben!"
+        geo "აუცილებელია განახლება. გასასწორებლად გაუშვით \"REPAIR %s %`s\" ან დამპი აიღეთ და თავიდან ჩატვირთეთ!"
+        spa "Es necesaria una mejora. Por favor, ¡haga \"REPAIR %s %`s\" o vuelque/recargue para arreglarlo!"
+ER_SP_NO_AGGREGATE 42000
+        chi "存储函数不支持聚合"
+        eng "AGGREGATE is not supported for stored functions"
+        ger "AGGREGATE wird bei gespeicherten Funktionen nicht unterstützt"
+        geo "დამახსოვრებული ფუნქციებისთვის AGGREGATE მხარდაჭერილი არაა"
+        spa "AGGREGATE no está soportado en funciones almacenadas"
+ER_MAX_PREPARED_STMT_COUNT_REACHED 42000
+        chi "无法创建超过max_prepared_stmt_count语句(当前值:%u)"
+        eng "Can't create more than max_prepared_stmt_count statements (current value: %u)"
+        ger "Kann nicht mehr Anweisungen als max_prepared_stmt_count erzeugen (aktueller Wert: %u)"
+        geo "max_prepared_stmt_count-ზე მეტი გამოსახულების შექმნა შეუძლებელია (მიმდინარე მნიშვნელობაა: %u)"
+        spa "No puedo crear más de max_prepared_stmt_count sentencias (valor en curso: %u)"
+ER_VIEW_RECURSIVE
+        chi "%`s.%`s包含视图递归"
+        eng "%`s.%`s contains view recursion"
+        ger "%`s.%`s enthält View-Rekursion"
+        geo "%`s.%`s ხედის რეკურსიას შეიცავს"
+        spa "`%-.192s`.`%-.192s` contiene recursividad de vista""
+ER_NON_GROUPING_FIELD_USED 42000
+        chi "非分组字段'%-.192s'用于%-.64s条款"
+        eng "Non-grouping field '%-.192s' is used in %-.64s clause"
+        ger "In der %-.192s-Klausel wird das die Nicht-Gruppierungsspalte '%-.64s' verwendet"
+        geo "არა-დაჯგუფებადი ველი '%-.192s' გამოიყენება პირობაში %-.64s"
+        spa "No hay campo agrupado '%-.192s' usado en cláusula %-.64s"
+ER_TABLE_CANT_HANDLE_SPKEYS
+        chi "存储引擎%s不支持SPATIAL索引"
+        eng "The storage engine %s doesn't support SPATIAL indexes"
+        ger "Der verwendete Tabellentyp (%s) unterstützt keine SPATIAL-Indizes"
+        geo "საცავის ძრავას "%s" SPATIAL ტიპის ინდექსების მხარდაჭერა არ გააჩნია"
+        spa "El motor de almacenaje %s no soporta índices SPATIAL"
+ER_NO_TRIGGERS_ON_SYSTEM_SCHEMA
+        chi "无法在系统表上创建触发器"
+        eng "Triggers can not be created on system tables"
+        ger "Trigger können nicht auf Systemtabellen erzeugt werden"
+        geo "სისტემურ ცხრილებზე ტრიგერებს ვერ შექმნით"
+        spa "Los disparadores no pueden ser creados en las tablas del sistema"
+ER_REMOVED_SPACES
+        chi "前面的空格从名称'%s'删除"
+        eng "Leading spaces are removed from name '%s'"
+        ger "Führende Leerzeichen werden aus dem Namen '%s' entfernt"
+        geo "დასაწყისში გამოტოვებები წაშლილია სახელიდა '%s'"
+        spa "Se quitan los espacios iniciales del nombre '%s'"
+ER_AUTOINC_READ_FAILED
+        chi "无法从存储引擎读取自动增量值"
+        eng "Failed to read auto-increment value from storage engine"
+        ger "Lesen des Autoincrement-Werts von der Speicher-Engine fehlgeschlagen"
+        geo "საცავის ძრავიდან ავტომატური გაზრდის მნიშვნელობის წაკითხვის შეცდომა"
+        hindi "स्टोरेज इंजन से auto-increment का मान पढ़ने में असफल रहे"
+        spa "No pude leer valor de auto-incremento del motor de almacenaje"
+ER_USERNAME
+        chi "用户名"
+        eng "user name"
+        ger "Benutzername"
+        geo "მომხმარებლის სახელი"
+        hindi "यूज़र का नाम"
+        spa "nombre de usuario"
+ER_HOSTNAME
+        chi "主机名"
+        eng "host name"
+        ger "Hostname"
+        geo "ჰოსტის სახელი"
+        hindi "होस्ट का नाम"
+        spa "nombre de equipo"
+ER_WRONG_STRING_LENGTH
+        chi "字符串'%-.70T'对于%s(应不超过%d)太长"
+        eng "String '%-.70T' is too long for %s (should be no longer than %d)"
+        ger "String '%-.70T' ist zu lang für %s (sollte nicht länger sein als %d)"
+        geo "სტრიქონი '%-.70T' %s-სთვის ძალიან გრძელია (სიგრძე %d-ზე მეტს არ უნდა უდრიდეს)"
+        spa "La cadena '%-.70T' es demasiado larga para %s (no debería de ser mayor de %d)"
+ER_NON_INSERTABLE_TABLE  
+        chi "目标表%-.100s %s不可插入"
+        eng "The target table %-.100s of the %s is not insertable-into"
+        ger "Die Zieltabelle %-.100s von %s ist nicht einfügbar"
+        geo "სამიზნე ცხრილი %-.100s %s-დან ჩამატებადი არაა"
+        jpn "対象表 %-.100s は挿入可能ではないので、%s を行えません。"
+        spa "La tabla destino %-.100s de la %s no es insertable-dentro"
+ER_ADMIN_WRONG_MRG_TABLE
+        chi "表'%-.64s'不同定义、或非myisam类型、或不存在"
+        eng "Table '%-.64s' is differently defined or of non-MyISAM type or doesn't exist"
+        ger "Tabelle '%-.64s' ist unterschiedlich definiert, nicht vom Typ MyISAM oder existiert nicht"
+        geo "ცხრილი '%-.64s' სხვანაირადაა აღწერილი, არაა MyISAM-ის ტიპის, ან არ არსებობს"
+        spa "La tabla '%-.64s' está definida de forma diferente o es del tipo no-MyISAM o no existe"
+ER_TOO_HIGH_LEVEL_OF_NESTING_FOR_SELECT
+        chi "太高的嵌套SELECT"
+        eng "Too high level of nesting for select"
+        ger "Zu tief verschachtelte SELECT-Anweisungen"
+        geo "SELECT-ის მეტისმეტად მაღალი ან ჩალაგების დონე"
+        spa "Demasiado alto el nivel de anidamiento para la selección"
+ER_NAME_BECOMES_EMPTY
+        chi "名'%-.64s'已成为''"
+        eng "Name '%-.64s' has become ''"
+        ger "Name '%-.64s' wurde zu ''"
+        geo "სახელი '%-.64s' გადაიქცა ''"
+        spa "El nombre '%-.64s' ha pasado a ser ''"
+ER_AMBIGUOUS_FIELD_TERM
+        chi "FIELDS TERMINATED字符串的第一个字符是模棱两可的;请使用非空字段FIELDS ENCLOSED BY"
+        eng "First character of the FIELDS TERMINATED string is ambiguous; please use non-optional and non-empty FIELDS ENCLOSED BY"
+        ger "Das erste Zeichen der Zeichenkette FIELDS TERMINATED ist mehrdeutig; bitte benutzen Sie nicht optionale und nicht leere FIELDS ENCLOSED BY"
+        geo "FIELDS TERMINATED სტრქოინის პირველი სიმბოლო ბუნდოვანია. გამოიყენეთ სავალდებულო და არაცარიელი FIELDS ENCLOSED BY"
+        spa "El primer carácter de la cadena de los FIELDS TERMINATED es ambiguo; por favor, use FIELDS ENCLOSED BY no opcionales y no vacíos"
+ER_FOREIGN_SERVER_EXISTS
+        chi "无法创建外部服务器'%s',因为它已经存在"
+        eng "Cannot create foreign server '%s' as it already exists"
+        fin "Vieraata palvelinta '%s' ei voida luoda, koska se on jo olemassa"
+        fre "Impossible de créer le serveur étranger '%s' car il existe déjà"
+        ger "Der auswärtige Server '%s' kann nicht erstellt werden, da er bereits vorhanden ist"
+        geo "გარე სერვერის '%s' შექმნა შეუძლებელია. ის უკვე არსებობს"
+        greek "Δεν είναι δυνατή η δημιουργία ξένου διακομιστή '%s' επειδή υπάρχει ήδη"
+        ita "Impossibile creare il server esterno '%s' poiché esiste già"
+        jpn "外部サーバー '%s'は既に存在するため、作成できません"
+        nla "Kan geen externe server '%s' maken omdat deze al bestaat"
+        nor "Kan ikke opprette utenlandsk server '%s' fordi den allerede eksisterer"
+        pol "Nie można utworzyć obcego serwera '%s', ponieważ już istnieje"
+        por "Não foi possível criar o servidor externo '%s' porque ele já existe"
+        rus "Невозможно создать сторонний сервер '%s', так как он уже существует"
+        spa "No se puede crear el servidor externo '%s' porque ya existe"
+        swe "Det gick inte att skapa främmande server '%s' eftersom den redan finns"
+ER_FOREIGN_SERVER_DOESNT_EXIST
+        chi "您尝试引用的外部服务器名称不存在。数据源错误:%-.64s"
+        eng "The foreign server name you are trying to reference does not exist. Data source error:  %-.64s"
+        ger "Die externe Verbindung, auf die Sie zugreifen wollen, existiert nicht. Datenquellenfehlermeldung:  %-.64s"
+        geo "გარე სერვერის სახელი, რომელსაც ცდილობთ, მიმართოთ, არ არსებობს. მონაცემების წყაროს შეცდომა:  %-.64s"
+        spa "El nombre del servidor foráneo que intenta referenciar no existe. Error en fuentes de datos:  %-.64s"
+ER_ILLEGAL_HA_CREATE_OPTION
+        chi "表存储引擎'%-.64s'不支持创建选项'%.64s'"
+        eng "Table storage engine '%-.64s' does not support the create option '%.64s'"
+        ger "Speicher-Engine '%-.64s' der Tabelle unterstützt die Option '%.64s' nicht"
+        geo "ცხრილის საცავის ძრავა '%-.64s' შექმნის პარამეტრის '%-.64s' მხარდაჭერა არ გააჩნია"
+        spa "El motor de almacenaje de la tabla '%-.64s' no soporta la opción de creación '%.64s'"
+ER_PARTITION_REQUIRES_VALUES_ERROR
+        chi "语法错误:%-.64s PARTITIONING需要定义给每个分区VALUES %-.64s"
+        eng "Syntax error: %-.64s PARTITIONING requires definition of VALUES %-.64s for each partition"
+        ger "Fehler in der SQL-Syntax: %-.64s-PARTITIONierung erfordert Definition von VALUES %-.64s für jede Partition"
+        geo "სინტაქსის შეცდომა: %-.64s. PARTITIONING-ს VALUES %-.64s-ის დაყენება სჭირდება თითოეული დანაყოფისთვის"
+        spa "Error de sintaxis: %-.64s PARTITIONING requiere de la definición de VALUES %-.64s para cada partición"
+        swe "Syntaxfel: %-.64s PARTITIONering kräver definition av VALUES %-.64s för varje partition"
+ER_PARTITION_WRONG_VALUES_ERROR
+        chi "只有%-.64s PARTITIONING可以使用VALUES %-.64s在分区定义中"
+        eng "Only %-.64s PARTITIONING can use VALUES %-.64s in partition definition"
+        ger "Nur %-.64s-PARTITIONierung kann VALUES %-.64s in der Partitionsdefinition verwenden"
+        geo "მხოლოდ %-.64s PARTITIONING-ს შეუძლია გამოიყენოს VALUES %-.64s დანაყოფის აღწერისას"
+        spa "Sólo %-.64s PARTITIONING puede usar VALUES %-.64s en la definición de la partición"
+        swe "Endast %-.64s partitionering kan använda VALUES %-.64s i definition av partitionen" 
+ER_PARTITION_MAXVALUE_ERROR
+        chi "MAXVALUE只能在最后一个分区定义中使用"
+        eng "MAXVALUE can only be used in last partition definition"
+        ger "MAXVALUE kann nur für die Definition der letzten Partition verwendet werden"
+        geo "MAXVALUE-ის გამოყენება მხოლოდ ბოლო დანაყოფის აღწერისას შეგიძლიათ"
+        spa "MAXVALUE sólo se puede usar en la definición de la última partición"
+        swe "MAXVALUE kan bara användas i definitionen av den sista partitionen"
+ER_PARTITION_SUBPARTITION_ERROR
+        chi "子分区只能是哈希分区和分区列"
+        eng "Subpartitions can only be hash partitions and by key"
+        ger "Unterpartitionen dürfen nur HASH- oder KEY-Partitionen sein"
+        geo "ქვედაყანოფები მხოლოდ HASH და KEY დანაყოფებს შეიძლება წარმოადგენდეს"
+        spa "Las subparticiones sólo pueden ser particiones dispersas (hash) y mediante clave"
+        swe "Subpartitioner kan bara vara hash och key partitioner"
+ER_PARTITION_SUBPART_MIX_ERROR
+        chi "如果在一个分区上,必须在所有分区上定义子组分"
+        eng "Must define subpartitions on all partitions if on one partition"
+        ger "Wenn Sie Unterpartitionen auf einer Partition definieren, müssen Sie das für alle Partitionen tun"
+        geo "თუ ქვედანაყოფს ერთ დანაყოფზე მაინც აღწერთ, ისინი ყველა დანაყოფზე უნდა აღწეროთ"
+        spa "Se deben de definir subparticiones en todas las particiones si se está en una partición"
+        swe "Subpartitioner måste definieras på alla partitioner om på en"
+ER_PARTITION_WRONG_NO_PART_ERROR
+        chi "定义了错误的分区数,与以前的设置不匹配"
+        eng "Wrong number of partitions defined, mismatch with previous setting"
+        ger "Falsche Anzahl von Partitionen definiert, stimmt nicht mit vorherigen Einstellungen überein"
+        geo "აღწერილია დანაყოფების არასწორი რაოდენობა. ის წინა პარამეტრს არ ემთხვევა"
+        spa "Definido un número equivocado de particiones, no coincide con configuración previa"
+        swe "Antal partitioner definierade och antal partitioner är inte lika"
+ER_PARTITION_WRONG_NO_SUBPART_ERROR
+        chi "错误的子组分数定义,与以前的设置不匹配"
+        eng "Wrong number of subpartitions defined, mismatch with previous setting"
+        ger "Falsche Anzahl von Unterpartitionen definiert, stimmt nicht mit vorherigen Einstellungen überein"
+        geo "აღწერილია ქვედაყანოფეის არასწორი რაოდენობა. ის წინა პარამეტრს არ ემთხვევა"
+        spa "Definido un número equivocado de subparticiones, no coincide con configuración previa"
+        swe "Antal subpartitioner definierade och antal subpartitioner är inte lika"
+ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR
+        chi "不允许(子)分区功能中的常量,随机或时区依赖表达式"
+        eng "Constant, random or timezone-dependent expressions in (sub)partitioning function are not allowed"
+        ger "Konstante oder Random-Ausdrücke in (Unter-)Partitionsfunktionen sind nicht erlaubt"
+        geo "(ქვე)დაყოფის ფუნქციაში მუდმივი, შემთხვევითი და დროის სარტყელზე დამოკიდებული გამოსახულებების გამოყენება დაშვებული არაა"
+        spa "Las expresiones constantes, al azar o dependientes de zona en función de (sub)particionado no están permitidas"
+        swe "Konstanta uttryck eller slumpmässiga uttryck är inte tillåtna (sub)partitioneringsfunktioner"
+ER_NOT_CONSTANT_EXPRESSION
+        chi "%s中的表达必须是恒定的"
+        eng "Expression in %s must be constant"
+        ger "Ausdrücke in %s müssen konstant sein"
+        geo "%s-ში გამოსახულება მუდმივი უნდა იყოს"
+        spa "Las expresiones incluidas en %s deben de ser constantes"
+        swe "Uttryck i %s måste vara ett konstant uttryck"
+ER_FIELD_NOT_FOUND_PART_ERROR
+        chi "在表中找不到分区功能的字段列表中的字段"
+        eng "Field in list of fields for partition function not found in table"
+        ger "Felder in der Feldliste der Partitionierungsfunktion wurden in der Tabelle nicht gefunden"
+        geo "ველი დაყოფის ფუნქციისთვის ცხრილის ველების სიაში აღმოჩენილი არაა"
+        spa "Campo en la lista de campos para función de partición no hallado en tabla"
+        swe "Fält i listan av fält för partitionering med key inte funnen i tabellen"
+ER_LIST_OF_FIELDS_ONLY_IN_HASH_ERROR
+        chi "只允许在索引分区中允许字段列表"
+        eng "List of fields is only allowed in KEY partitions"
+        ger "Eine Feldliste ist nur in KEY-Partitionen erlaubt"
+        geo "ველების სია მხოლოდ KEY დანაყოფებზეა დაშვებული"
+        spa "Lista de campos sólo se permite en particiones KEY"
+        swe "En lista av fält är endast tillåtet för KEY partitioner"
+ER_INCONSISTENT_PARTITION_INFO_ERROR
+        chi "FRM文件中的分区信息不与可以写入FRM文件的内容一致"
+        eng "The partition info in the frm file is not consistent with what can be written into the frm file"
+        ger "Die Partitionierungsinformationen in der frm-Datei stimmen nicht mit dem überein, was in die frm-Datei geschrieben werden kann"
+        geo "დანაყოფების შესახებ ინფორმაცია frm ფაილშიარ ემთხვევა იმას, რაც frm ფაილში წერია"
+        spa "La información de partición en el fichero/archivo frm no es consistente con lo que se puede grabar en un fichero/archivo frm"
+        swe "Partitioneringsinformationen i frm-filen är inte konsistent med vad som kan skrivas i frm-filen"
+ER_PARTITION_FUNC_NOT_ALLOWED_ERROR
+        chi "%-.192s函数返回错误的类型"
+        eng "The %-.192s function returns the wrong type"
+        ger "Die %-.192s-Funktion gibt einen falschen Typ zurück"
+        geo "ფუნქცია %-.192s არასწორ ტიპს აბრუნებს"
+        spa "La función %-.192s devueve un tipo equivocado"
+        swe "%-.192s-funktionen returnerar felaktig typ"
+ER_PARTITIONS_MUST_BE_DEFINED_ERROR
+        chi "对于%-.64s分区必须定义每个分区"
+        eng "For %-.64s partitions each partition must be defined"
+        ger "Für %-.64s-Partitionen muss jede Partition definiert sein"
+        geo "%-.64s დანაყოფებისთვის ყველა დანაყოფი უნდა აღიწეროს"
+        spa "Para las particiones %-.64s, se debe de definir cada partición"
+        swe "För %-.64s partitionering så måste varje partition definieras"
+ER_RANGE_NOT_INCREASING_ERROR
+        chi "每个分区的VALUES LESS THAN的值必须严格增加"
+        eng "VALUES LESS THAN value must be strictly increasing for each partition"
+        ger "Werte in VALUES LESS THAN müssen für jede Partition strikt aufsteigend sein"
+        geo "VALUES LESS THAN მნიშვნელობები თითოეული დანაყოფისთვის მხოლოდ უნდა იზრდებოდეს"
+        spa "El valor VALUES LESS THAN debe de ser estrictamente incremental para cada partición"
+        swe "Värden i VALUES LESS THAN måste vara strikt växande för varje partition"
+ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR
+        chi "VALUES值必须与分区函数相同"
+        eng "VALUES value must be of same type as partition function"
+        ger "VALUES-Werte müssen vom selben Typ wie die Partitionierungsfunktion sein"
+        geo "VALUES მნისვნელობა დანაყოფის ფუნქციის ტიპის უნდა იყოს"
+        spa "El valor VALUES debe de ser del mismo tipo que la función de partición"
+        swe "Värden i VALUES måste vara av samma typ som partitioneringsfunktionen"
+ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR
+        chi "列表分区中相同常量的多个定义"
+        eng "Multiple definition of same constant in list partitioning"
+        ger "Mehrfachdefinition derselben Konstante bei Listen-Partitionierung"
+        geo "სიის დანაყოფებში აღმოჩენილია მუდმივის ერთზე მეტი აღწერა"
+        spa "Definición múltiple de la misma constante en el particionamiento de la lista"
+        swe "Multipel definition av samma konstant i list partitionering"
+ER_PARTITION_ENTRY_ERROR
+        chi "分区不能在查询中独立使用"
+        eng "Partitioning can not be used stand-alone in query"
+        ger "Partitionierung kann in einer Abfrage nicht alleinstehend benutzt werden"
+        geo "დაყოფა არ შეიძლება, მოთხოვნაში ცალკე გამოიყენოთ"
+        spa "El particionado no puede ser usado de forma autónoma en consulta (query)"
+        swe "Partitioneringssyntax kan inte användas på egen hand i en SQL-fråga"
+ER_MIX_HANDLER_ERROR
+        chi "此版本的MariaDB中不允许分区中的处理程序混合"
+        eng "The mix of handlers in the partitions is not allowed in this version of MariaDB"
+        ger "Das Vermischen von Handlern in Partitionen ist in dieser Version von MariaDB nicht erlaubt"
+        geo "MariaDB-ის ამ ვერსიაში დანაყოფებში დამმუშავებლების შერევა დაშვებული არაა"
+        spa "La mezcla de manejadores en las particiones no está autorizada en esta versión de MariaDB"
+        swe "Denna mix av lagringsmotorer är inte tillåten i denna version av MariaDB"
+ER_PARTITION_NOT_DEFINED_ERROR
+        chi "对于分区引擎,需要定义所有%-.64s"
+        eng "For the partitioned engine it is necessary to define all %-.64s"
+        ger "Für die partitionierte Engine müssen alle %-.64s definiert sein"
+        geo "დაყოფის ძრავისთვის ყველა %-.64s-ის აღწერა აუცილებელია"
+        spa "Para el motor de particionado es necesario definir todas %-.64s"
+        swe "För partitioneringsmotorn så är det nödvändigt att definiera alla %-.64s"
+ER_TOO_MANY_PARTITIONS_ERROR
+        chi "定义了太多分区(包括子组分)"
+        eng "Too many partitions (including subpartitions) were defined"
+        ger "Es wurden zu vielen Partitionen (einschließlich Unterpartitionen) definiert"
+        geo "აღწერილია მეტისმეტად ბევრი დანაყოფი (ქვედანაყოფების ჩათვლით)"
+        spa "Definidas demasiadas particiones (incluyendo las subparticiones)"
+        swe "För många partitioner (inkluderande subpartitioner) definierades"
+ER_SUBPARTITION_ERROR
+        chi "只有在子节分节的HASH/KEY分区中可以混合RANGE/LIST分区"
+        eng "It is only possible to mix RANGE/LIST partitioning with HASH/KEY partitioning for subpartitioning"
+        ger "RANGE/LIST-Partitionierung kann bei Unterpartitionen nur zusammen mit HASH/KEY-Partitionierung verwendet werden"
+        spa "Sólo es posible mezclar particionado RANGE/LIST con particionado HASH/KEY para el subparticionado"
+        swe "Det är endast möjligt att blanda RANGE/LIST partitionering med HASH/KEY partitionering för subpartitionering"
+ER_CANT_CREATE_HANDLER_FILE
+        chi "无法创建特定的处理程序文件"
+        eng "Failed to create specific handler file"
+        ger "Erzeugen einer spezifischen Handler-Datei fehlgeschlagen"
+        geo "მითითებული დამმუშავებლის ფაილის შექმნის შეცდომა"
+        spa "No pude crear fichero/archivo de manejador específico"
+        swe "Misslyckades med att skapa specifik fil i lagringsmotor"
+ER_BLOB_FIELD_IN_PART_FUNC_ERROR
+        chi "分区功能中不允许BLOB字段"
+        eng "A BLOB field is not allowed in partition function"
+        ger "In der Partitionierungsfunktion sind BLOB-Spalten nicht erlaubt"
+        geo "დანაყოფის ფუნქციაში BLOB ველი დაშვებული არაა"
+        spa "No se autoriza campo BLOB en la función de partición"
+        swe "Ett BLOB-fält är inte tillåtet i partitioneringsfunktioner"
+ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF
+        chi "A%-.192s必须包含表的分区功能中的所有列"
+        eng "A %-.192s must include all columns in the table's partitioning function"
+        geo "%-.192s ცხრილის დაყოფის ფუნქციაში ყველა სვეტს უნდა შეიცავდეს"
+        spa "Un %-.192s debe de incluir todas las columnas en la función de particionado de la tabla"
+ER_NO_PARTS_ERROR
+        chi "不允许%-.64s = 0"
+        eng "Number of %-.64s = 0 is not an allowed value"
+        ger "Eine Anzahl von %-.64s = 0 ist kein erlaubter Wert"
+        geo "%-.64s = 0 რაოდენობა დაშვებული არაა"
+        spa "El número de %-.64s = 0 no es un valor autorizado"
+        swe "Antal %-.64s = 0 är inte ett tillåten värde"
+ER_PARTITION_MGMT_ON_NONPARTITIONED
+        chi "不分区表上的分区管理是不可能的"
+        eng "Partition management on a not partitioned table is not possible"
+        ger "Partitionsverwaltung einer nicht partitionierten Tabelle ist nicht möglich"
+        geo "დანაყოფების მართვა ცხრილზე, რომელიც დაყოფილი არაა, შეუძლებელია"
+        spa "La gestión de particiones en una tabla no particionada no es posible"
+        swe "Partitioneringskommando på en opartitionerad tabell är inte möjligt"
+ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING
+        eng "Partitioned tables do not support %s"
+        geo "დაყოფილ ცხრილებს %s-ის მხარდაჭერა არ გააჩნიათ"
+        spa "Las tablas particionadas no soportan %s"
+ER_PARTITION_DOES_NOT_EXIST
+        chi "分区名称或分区列表错误"
+        eng "Wrong partition name or partition list"
+        ger "Falscher Name einer Partition oder Fehler in der Partitionsliste"
+        geo "დანაყოფის არასწორი სახელი ან დანაყოფების სია"
+        spa "Error en lista de particiones"
+        swe "Fel namn av en partition eller fel i listan av partitioner"
+ER_DROP_LAST_PARTITION
+        chi "无法删除所有分区,请使用删除表"
+        eng "Cannot remove all partitions, use DROP TABLE instead"
+        ger "Es lassen sich nicht sämtliche Partitionen löschen, benutzen Sie statt dessen DROP TABLE"
+        geo "ყველა დანაყოფის წაშლა შეუძლებელია. ამისთვის DROP TABLE გამოიყენეთ"
+        spa "No puedo quitar todas las particiones, use DROP TABLE en su lugar"
+        swe "Det är inte tillåtet att ta bort alla partitioner, använd DROP TABLE istället"
+ER_COALESCE_ONLY_ON_HASH_PARTITION
+        chi "COALESCE分区只能用于哈希/索引分区"
+        eng "COALESCE PARTITION can only be used on HASH/KEY partitions"
+        ger "COALESCE PARTITION kann nur auf HASH- oder KEY-Partitionen benutzt werden"
+        geo "COALESCE PARTITION-ის გამოყენება მხოლოდ HASH/KEY დანაყოფებზეა შესაძლებელი"
+        spa "COALESCE PARTITION sólo se puede usar en particiones HASH/KEY"
+        swe "COALESCE PARTITION kan bara användas på HASH/KEY partitioner"
+ER_REORG_HASH_ONLY_ON_SAME_NO
+        chi "REORGANIZE PARTITION只能用于重新组织不改变他们的数字的分区"
+        eng "REORGANIZE PARTITION can only be used to reorganize partitions not to change their numbers"
+        ger "REORGANIZE PARTITION kann nur zur Reorganisation von Partitionen verwendet werden, nicht, um ihre Nummern zu ändern"
+        geo "REORGANIZE PARTITION-ის გამოყენება მხოლოდ დანაყოფების გადასალაგებლადაა შესაძლებელი და არა მათი ნომრების შესაცვლელად"
+        spa "REORGANIZE PARTITION sólo se puede usar para reorganizar particiones no para cambiar sus números"
+        swe "REORGANIZE PARTITION kan bara användas för att omorganisera partitioner, inte för att ändra deras antal"
+ER_REORG_NO_PARAM_ERROR
+        chi "没有参数的REORGANIZE PARTITION只能用于HASH PARTITION的自动分区表"
+        eng "REORGANIZE PARTITION without parameters can only be used on auto-partitioned tables using HASH PARTITIONs"
+        ger "REORGANIZE PARTITION ohne Parameter kann nur für auto-partitionierte Tabellen verwendet werden, die HASH-Partitionierung benutzen"
+        geo "REORGANIZE PARTITION-ის პარამეტრების გარეშე გამოყენება HASH PARTITION-ის გამოყენებით ავტომატურად დაყოფილ ცხრილებზეა შესაძლებელი"
+        spa "REORGANIZE PARTITION sin parámetros sólo se puede usar en tablas auto-particionadas usando HASH PARTITIONs"
+        swe "REORGANIZE PARTITION utan parametrar kan bara användas på auto-partitionerade tabeller som använder HASH partitionering"
+ER_ONLY_ON_RANGE_LIST_PARTITION
+        chi "%-.64s分区只能用于RANGE/LIST分区"
+        eng "%-.64s PARTITION can only be used on RANGE/LIST partitions"
+        ger "%-.64s PARTITION kann nur für RANGE- oder LIST-Partitionen verwendet werden"
+        geo "%-.64s PARTITION-ის გამოყენება მხოლოდ RANGE/LIST ტიპის დანაყოფებზეა შესაძლებელი"
+        spa "%-.64s PARTITION sólo puede ser usada en particiones RANGE/LIST"
+        swe "%-.64s PARTITION kan bara användas på RANGE/LIST-partitioner"
+ER_ADD_PARTITION_SUBPART_ERROR
+        chi "尝试用错误数量的子分区添加分区"
+        eng "Trying to Add partition(s) with wrong number of subpartitions"
+        ger "Es wurde versucht, eine oder mehrere Partitionen mit der falschen Anzahl von Unterpartitionen hinzuzufügen"
+        geo "არასწორი ქვედანაყოფების რაოდენობის მქონე დანაყოფების დამატების მცდელობა"
+        spa "Intentando añadir particion(es) usando un número equivocado de subparticiones"
+        swe "ADD PARTITION med fel antal subpartitioner"
+ER_ADD_PARTITION_NO_NEW_PARTITION
+        chi "必须添加至少一个分区"
+        eng "At least one partition must be added"
+        ger "Es muss zumindest eine Partition hinzugefügt werden"
+        geo "აუცილებელია, ერთი დანაყოფი მაინც დაამატოთ"
+        spa "Se debe de añadir una partición, al menos"
+        swe "Åtminstone en partition måste läggas till vid ADD PARTITION"
+ER_COALESCE_PARTITION_NO_PARTITION
+        chi "至少一个分区必须合并"
+        eng "At least one partition must be coalesced"
+        ger "Zumindest eine Partition muss mit COALESCE PARTITION zusammengefügt werden"
+        spa "Se debe de fusionar una partición, al menos"
+        swe "Åtminstone en partition måste slås ihop vid COALESCE PARTITION"
+ER_REORG_PARTITION_NOT_EXIST
+        chi "分区重组量超过而不是分区量"
+        eng "More partitions to reorganize than there are partitions"
+        ger "Es wurde versucht, mehr Partitionen als vorhanden zu reorganisieren"
+        spa "Hay más particiones a reorganizar que las que existen"
+        swe "Fler partitioner att reorganisera än det finns partitioner"
+ER_SAME_NAME_PARTITION
+        chi "重复分区名称%-.192s"
+        eng "Duplicate partition name %-.192s"
+        ger "Doppelter Partitionsname: %-.192s"
+        geo "დუბლირებული დანაყოფის სახელი %-.192s"
+        spa "Nombre de partición duplicado %-.192s"
+        swe "Duplicerat partitionsnamn %-.192s"
+ER_NO_BINLOG_ERROR
+        chi "在此命令上不允许关闭binlog"
+        eng "It is not allowed to shut off binlog on this command"
+        ger "Es es nicht erlaubt, bei diesem Befehl binlog abzuschalten"
+        geo "ამ ბრძანებაზე binlog-ის გამორთვა აკრძალულია"
+        spa "No se autoriza a apagar binlog con este comando"
+        swe "Det är inte tillåtet att stänga av binlog på detta kommando"
+ER_CONSECUTIVE_REORG_PARTITIONS
+        chi "在重新组织一组分区时,它们必须按照次序"
+        eng "When reorganizing a set of partitions they must be in consecutive order"
+        ger "Bei der Reorganisation eines Satzes von Partitionen müssen diese in geordneter Reihenfolge vorliegen"
+        spa "Para reorganizar un conjunto de particiones, éstas deben de estar ordenadas consecutivamente"
+        swe "När ett antal partitioner omorganiseras måste de vara i konsekutiv ordning"
+ER_REORG_OUTSIDE_RANGE
+        chi "重组范围分区无法更改除最后分区之外的总范围,无法扩展范围"
+        eng "Reorganize of range partitions cannot change total ranges except for last partition where it can extend the range"
+        ger "Die Reorganisation von RANGE-Partitionen kann Gesamtbereiche nicht verändern, mit Ausnahme der letzten Partition, die den Bereich erweitern kann"
+        spa "El reorganizar un rango de particiones no puede cambiar los rangos totales excepto para la última partición donde se puede extender el rango"
+        swe "Reorganisering av rangepartitioner kan inte ändra den totala intervallet utom för den sista partitionen där intervallet kan utökas"
+ER_PARTITION_FUNCTION_FAILURE
+        chi "此版不支持此处理程序的分区功能"
+        eng "Partition function not supported in this version for this handler"
+        ger "Partitionsfunktion in dieser Version dieses Handlers nicht unterstützt"
+        spa "Función de partición no soportada en esta versión para este manejador"
+ER_PART_STATE_ERROR
+        chi "无法从CREATE/ALTER表中定义分区状态"
+        eng "Partition state cannot be defined from CREATE/ALTER TABLE"
+        ger "Partitionszustand kann nicht von CREATE oder ALTER TABLE aus definiert werden"
+        spa "El estado de una partición no se puede definir desde CREATE/ALTER TABLE"
+        swe "Partition state kan inte definieras från CREATE/ALTER TABLE"
+ER_LIMITED_PART_RANGE
+        chi "%-.64s处理程序仅支持32-bit整数"
+        eng "The %-.64s handler only supports 32 bit integers in VALUES"
+        ger "Der Handler %-.64s unterstützt in VALUES nur 32-Bit-Integers"
+        geo "'%-.64s'-ის დამმუშავებელს VALUES-ში მხოლოდ 32 ბიტიანი მთელი რიცხვების მხარდაჭერა გააჩნია"
+        spa "El manejador %-.64s sólo soporta enteros de 32 bit en VALUES"
+        swe "%-.64s stödjer endast 32 bitar i integers i VALUES"
+ER_PLUGIN_IS_NOT_LOADED
+        chi "插件'%-.192s'未加载"
+        eng "Plugin '%-.192s' is not loaded"
+        ger "Plugin '%-.192s' ist nicht geladen"
+        geo "დამატება '%-.192s' ჩატვირთული არაა"
+        spa "Enchufe (plugin) '%-.192s' no cargado"
+ER_WRONG_VALUE
+        chi "错误%-.32s值:'%-.128T'"
+        eng "Incorrect %-.32s value: '%-.128T'"
+        ger "Falscher %-.32s-Wert: '%-.128T'"
+        geo "%-.32s-ის არასწორი მნიშვნელობა: '%-.128T'"
+        spa "Incorrecto %-.32s valor: '%-.128T'"
+ER_NO_PARTITION_FOR_GIVEN_VALUE
+        chi "表没有%-.64s的分区"
+        eng "Table has no partition for value %-.64s"
+        ger "Tabelle hat für den Wert %-.64s keine Partition"
+        geo "ცხრილს მნიშვნელობისთვის %-.64s დანაყოფი არ გააჩნია"
+        spa "La tabla no tiene partición para valor %-.64s"
+ER_FILEGROUP_OPTION_ONLY_ONCE
+        chi "设置%s不能超过一次"
+        eng "It is not allowed to specify %s more than once"
+        ger "%s darf nicht mehr als einmal angegegeben werden"
+        geo "%s-ის მითითება მხოლოდ ერთხელ შეგიძლიათ"
+        spa "No se permite especificar %s más de unva vez"
+ER_CREATE_FILEGROUP_FAILED
+        chi "无法创建%s"
+        eng "Failed to create %s"
+        ger "Anlegen von %s fehlgeschlagen"
+        geo "%s-ის შექმნის შეცდომა"
+        hindi "%s को बनाने में असफल रहे"
+        spa "No pude crear %s"
+ER_DROP_FILEGROUP_FAILED
+        chi "未能DROP%s"
+        eng "Failed to drop %s"
+        ger "Löschen von %s fehlgeschlagen"
+        geo "%s-ის წაშლის შეცდომა"
+        hindi "%s को हटाने में असफल रहे"
+        spa "No pude eliminar %s"
+ER_TABLESPACE_AUTO_EXTEND_ERROR
+        chi "处理程序不支持表空间的自动扩展名"
+        eng "The handler doesn't support autoextend of tablespaces"
+        ger "Der Handler unterstützt keine automatische Erweiterung (Autoextend) von Tablespaces"
+        geo "დამმუშავებელს ცხრილის სივრცეების ავტომატური გაფართოების მხარდაჭერა არ გააჩნია"
+        spa "El manejador no soporta autoextensión de espacios de tabla"
+ER_WRONG_SIZE_NUMBER
+        chi "尺寸参数被错误地指定,编号或表单10M"
+        eng "A size parameter was incorrectly specified, either number or on the form 10M"
+        ger "Ein Größen-Parameter wurde unkorrekt angegeben, muss entweder Zahl sein oder im Format 10M"
+        geo "ზომის პარამეტრი არასწორადაა მითითბული, ან რიცხვით, ან ფორმით '1M'"
+        spa "Se ha especificado de forma incorrecta un parámetro de medida o el número o en la forma 10M"
+ER_SIZE_OVERFLOW_ERROR
+        chi "尺寸编号是正确的,但我们不允许数字部分超过20亿"
+        eng "The size number was correct but we don't allow the digit part to be more than 2 billion"
+        ger "Die Zahl für die Größe war korrekt, aber der Zahlanteil darf nicht größer als 2 Milliarden sein"
+        geo "ზომის სიდიდე სწორია, მაგრამ ამჟამად ჩვენ 2 მილიარდზე მეტი ნაწილის მხარდაჭერ არ გაგვაჩნია"
+        spa "El número de medida es correcto pero no permitimos que la parte del dígito tenga más de 2 billones"
+ER_ALTER_FILEGROUP_FAILED
+        chi "未能改变:%s"
+        eng "Failed to alter: %s"
+        ger "Änderung von %s fehlgeschlagen"
+        geo "ცვლილების შეცდომა: %s"
+        hindi "%s को ALTER करने में असफल रहे"
+        spa "No pude alterar: %s"
+ER_BINLOG_ROW_LOGGING_FAILED
+        chi "将一行写入基于行的二进制日志失败"
+        eng "Writing one row to the row-based binary log failed"
+        ger "Schreiben einer Zeilen ins zeilenbasierte Binärlog fehlgeschlagen"
+        geo "მწკრივებზე დაფუძლებულ ბინარულ ჟურნალში ერთი მწკრივის ჩაწერის შეცდომა"
+        spa "Ha fallado el grabar una fila en historial (log) binario basado en fila"
+ER_BINLOG_ROW_WRONG_TABLE_DEF
+        chi "表定义主机和从站不匹配:%s"
+        eng "Table definition on master and slave does not match: %s"
+        ger "Tabellendefinition auf Master und Slave stimmt nicht überein: %s"
+        geo "მთავარ და დამორჩილებულ სერვერებზე ცხრილის აღწერა ერთმანეთს არ ემთხვევა: %s"
+        spa "La definición de tabla en maestro (master) y esclavo no coincide: %s"
+ER_BINLOG_ROW_RBR_TO_SBR
+        chi "使用--log-slave-updates的从站必须使用基于行的二进制日志记录,以便能够复制基于行的二进制日志事件"
+        eng "Slave running with --log-slave-updates must use row-based binary logging to be able to replicate row-based binary log events"
+        ger "Slave, die mit --log-slave-updates laufen, müssen zeilenbasiertes Loggen verwenden, um zeilenbasierte Binärlog-Ereignisse loggen zu können"
+        geo "დამორჩილებული, რომელიც --log-slave-updates პარამეტრითაა გაშვებული, მწკრივებზე დაფუძნებული ბინარული ჟურნალის მოვლენების რეპლიკაციისთვის მწკრივებზე-დამოკიდებულ ბინარულ ჟურნალს უნდა იყენებდეს"
+        spa "La ejecución esclava con --log-slave-updates debe de usar un historial (log) binario basado en fila para que pueda replicar eventos de historial (log) binario basados en fila"
+ER_EVENT_ALREADY_EXISTS
+        chi "事件'%-.192s'已经存在"
+        eng "Event '%-.192s' already exists"
+        ger "Event '%-.192s' existiert bereits"
+        geo "მოვლენა '%-.192s' უკვე არსებობს"
+        spa "El evento '%-.192s' ya existe"
+ER_EVENT_STORE_FAILED
+        chi "无法存储事件%s。错误代码%M来自存储引擎"
+        eng "Failed to store event %s. Error code %M from storage engine"
+        ger "Speichern von Event %s fehlgeschlagen. Fehlercode der Speicher-Engine: %M"
+        geo "%s მოვლენის დამახსოვრების შეცდომა. საცავის ძრავიდან მიღებული შეცდომის კოდია %M"
+        spa "No pude almacenar evento %s. Código de error %M desde motor de almacenaje"
+ER_EVENT_DOES_NOT_EXIST
+        chi "未知事件'%-.192s'"
+        eng "Unknown event '%-.192s'"
+        ger "Unbekanntes Event '%-.192s'"
+        geo "უცნობი მოვლენა '%-.192s'"
+        spa "Evento desconocido '%-.192s'"
+ER_EVENT_CANT_ALTER
+        chi "无法改变事件'%-.192s'"
+        eng "Failed to alter event '%-.192s'"
+        ger "Ändern des Events '%-.192s' fehlgeschlagen"
+        geo "მოვლენის ('%-.192s') ჩასწორების შეცდომა"
+        hindi "'%-.192s' EVENT को ALTER करने में असफल रहे"
+        spa "No pude alterar evento '%-.192s'"
+ER_EVENT_DROP_FAILED
+        chi "未能DROP%s"
+        eng "Failed to drop %s"
+        ger "Löschen von %s fehlgeschlagen"
+        geo "%s-ის წაშლის შეცდომა"
+        hindi "%s को हटाने में असफल रहे"
+        spa "No pude eliminar %s"
+ER_EVENT_INTERVAL_NOT_POSITIVE_OR_TOO_BIG
+        chi "INTERVAL为负或太大"
+        eng "INTERVAL is either not positive or too big"
+        ger "INTERVAL ist entweder nicht positiv oder zu groß"
+        geo "INTERVAL-ის მნიშვნელობა დადებითი არაა ან ძალიან დიდია"
+        spa "O INTERVAL no es positivo o es demasiado grande"
+ER_EVENT_ENDS_BEFORE_STARTS
+        chi "ENDS无效的或在STARTS之前"
+        eng "ENDS is either invalid or before STARTS"
+        ger "ENDS ist entweder ungültig oder liegt vor STARTS"
+        geo "ENDS ან არასწორია, ან STARTS-ის წინაა"
+        spa "O ENDS es inválido o anterior a STARTS"
+ER_EVENT_EXEC_TIME_IN_THE_PAST
+        chi "事件执行时间在过去。事件已被禁用"
+        eng "Event execution time is in the past. Event has been disabled"
+        ger "Ausführungszeit des Events liegt in der Vergangenheit. Event wurde deaktiviert"
+        geo "მოვლენის შესრულების დრო წარსულშია. მოვლენა გაითიშა"
+        spa "El tiempo de ejecución de evento se encuentra en el pasado. El evento ha sido desactivado"
+ER_EVENT_OPEN_TABLE_FAILED
+        chi "无法打开mysql.event"
+        eng "Failed to open mysql.event"
+        ger "Öffnen von mysql.event fehlgeschlagen"
+        geo "mysql.event-ის გახსნის შეცდომა"
+        hindi "mysql.event को खोलने में असफल रहे"
+        spa "No puede abrir mysql.event"
+ER_EVENT_NEITHER_M_EXPR_NOR_M_AT
+        chi "没有提供DateTime表达式"
+        eng "No datetime expression provided"
+        ger "Kein DATETIME-Ausdruck angegeben"
+        geo "დრო/თარიღის გამოსახულება მითითებული არაა"
+        spa "No se ha suministrado expresión datetime"
+ER_UNUSED_2
+        eng "You should never see it"
+        geo "ამას წესით ვერ უნდა ხედავდეთ"
+        spa "No lo debería vd de ver nunca"
+ER_UNUSED_3
+        eng "You should never see it"
+        geo "ამას წესით ვერასდროს უნდა ხედავდეთ"
+        spa "No lo debería vd  de ver nunca"
+ER_EVENT_CANNOT_DELETE
+        chi "无法从mysql.event删除该事件"
+        eng "Failed to delete the event from mysql.event"
+        ger "Löschen des Events aus mysql.event fehlgeschlagen"
+        geo "mysql.event-დან მოვლენის წაშლის შეცდომა"
+        hindi "EVENT को mysql.event से हटाने मैं असफल रहे"
+        spa "No pude borrar el evento desde mysql.event"
+ER_EVENT_COMPILE_ERROR
+        chi "在汇编事件的主体时出错"
+        eng "Error during compilation of event's body"
+        ger "Fehler beim Kompilieren des Event-Bodys"
+        geo "მოვლენის ტანის აგების შეცდომა"
+        spa "Error durante compilación de cuerpo de evento"
+ER_EVENT_SAME_NAME
+        chi "相同的旧活动名称"
+        eng "Same old and new event name"
+        ger "Alter und neuer Event-Name sind gleich"
+        geo "ძველი და ახალი მოვლენის სახელების იგივეა"
+        spa "Mismo nombre de evento viejo y nuevo"
+ER_EVENT_DATA_TOO_LONG
+        chi "列'%s'数据太长"
+        eng "Data for column '%s' too long"
+        ger "Daten der Spalte '%s' zu lang"
+        geo "მონაცემები სვეტისთვის '%s' ძალიან გრძელია"
+        spa "Datos demasiado largos para la columna '%s'"
+ER_DROP_INDEX_FK
+        chi "无法删除索引'%-.192s':外部索引约束中需要它"
+        eng "Cannot drop index '%-.192s': needed in a foreign key constraint"
+        ger "Kann Index '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung benötigt"
+        spa "No puedo eliminar índice '%-.192s': necesario en una restricción de clave foránea"
+# When using this error message, use the ER_WARN_DEPRECATED_SYNTAX error
+# code.
+ER_WARN_DEPRECATED_SYNTAX_WITH_VER  
+        chi "语法'%s'被弃用,将在Mariadb%s中删除。请使用%s"
+        eng  "The syntax '%s' is deprecated and will be removed in MariaDB %s. Please use %s instead"
+        ger "Die Syntax '%s' ist veraltet und wird in MariaDB %s entfernt. Bitte benutzen Sie statt dessen %s"
+        geo "სინტაქსი '%s' მოძველებულია და MariaDB %s-ში წაიშლება. გამოიეყენეთ %s"
+        spa  "La sintaxis '%s' está obsoleta y será quitada en MariaDB %s. Por favor, use %s en su lugar"
+ER_CANT_WRITE_LOCK_LOG_TABLE
+        chi "您无法获得日志表的写锁。只有读访问是可能的"
+        eng "You can't write-lock a log table. Only read access is possible"
+        ger "Eine Log-Tabelle kann nicht schreibgesperrt werden. Es ist ohnehin nur Lesezugriff möglich"
+        geo "ჟურნალის ცხრილის ჩაწერაზე დაბლოკვა შეუძლებელია. შესაძლებელია მხოლოდ წაკითხვა"
+        spa "No puede hacer bloqueo de escritura en una tabla de historial (log). Sólo es posible acceso de lectura"
+ER_CANT_LOCK_LOG_TABLE
+        chi "您无法使用带日志表的锁"
+        eng "You can't use locks with log tables"
+        ger "Log-Tabellen können nicht gesperrt werden"
+        geo "ჟურნალის ცხრილის დაბლოკვა შეუძლებელია"
+        spa "No puede usar bloqueos con tablas de historial (log)"
+ER_UNUSED_4
+        eng "You should never see it"
+        geo "ამას ვერ უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE
+        chi "mysql的列计数。%s是错误的。预期%d,找到%d。使用MariaDB%d创建,现在运行%d。请使用mariadb-upgrade来修复此错误"
+        eng "Column count of mysql.%s is wrong. Expected %d, found %d. Created with MariaDB %d, now running %d. Please use mariadb-upgrade to fix this error"
+        ger "Spaltenanzahl von mysql.%s falsch. %d erwartet, aber %d erhalten. Erzeugt mit MariaDB %d, jetzt unter %d. Bitte benutzen Sie mariadb-upgrade, um den Fehler zu beheben"
+        geo "mysql.%s-ის სვეტების რაოდენობა არასწორია. მოველოდი: %d, მივიღე: %d. შექმნილია MaraDB-ის ვერსიით %d, ახლა კი გაქვთ %d. am შეცდომის გასასწორებლად mariadb-upgrade გაუშვით"
+        spa "El contador de columnas de mysql.%s está equivocado. Se esperaba %d, hallado %d. Creado con MariaDB %d, ahora ejecuando %d. Por favor, use mariadb-upgrade para solucionar este error"
+ER_TEMP_TABLE_PREVENTS_SWITCH_OUT_OF_RBR
+        chi "当会话打开临时表时,无法切换出基于行的二进制日志格式"
+        eng "Cannot switch out of the row-based binary log format when the session has open temporary tables"
+        ger "Kann nicht aus dem zeilenbasierten Binärlog-Format herauswechseln, wenn die Sitzung offene temporäre Tabellen hat"
+        geo "როცა სესიას ღია დროებითი ცხრილები გააჩნია, მწკრივებზე დაფუძნებული ბინარული ჟურნალის ფორმატის გადართვა შეუძლებელია"
+        spa "No puedo conmutar fuera del formato de historial (log) binario basado en fila cuando la sesión ha abierto tablas temporales"
+ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_FORMAT
+        chi "无法更改存储函数或触发器内的二进制记录格式"
+        eng "Cannot change the binary logging format inside a stored function or trigger"
+        ger "Das Binärlog-Format kann innerhalb einer gespeicherten Funktion oder eines Triggers nicht geändert werden"
+        geo "შენახულიფუნქციის და ტრიგერის შიგნიდან ბინარული ჟურნალის ფორმატის შეცვლა შეუძლებელია"
+        spa "No puedo cambiar el formato de historial (log) binario dentro de funciones almacenadas o disparadores"
+ER_UNUSED_13
+        eng "You should never see it"
+        geo "მას ვერ უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+ER_PARTITION_NO_TEMPORARY
+        chi "无法使用分区创建临时表"
+        eng "Cannot create temporary table with partitions"
+        ger "Anlegen temporärer Tabellen mit Partitionen nicht möglich"
+        geo "დანაყოფების მქონე დროებითი ცხრილის შექმნა შეუძლებელია"
+        hindi "अस्थाई टेबल को पार्टिशन्स के साथ नहीं बनाया जा सकता"
+        spa "No puedo crear tabla temporal con particiones"
+ER_PARTITION_CONST_DOMAIN_ERROR
+        chi "分区常量超出分区功能域"
+        eng "Partition constant is out of partition function domain"
+        ger "Partitionskonstante liegt außerhalb der Partitionsfunktionsdomäne"
+        geo "დანაყოფის მუდმივა დანაყოფის ფუნქციის დომენის გარეთაა"
+        spa "La constante de partición está fuera del dominio de función de partición"
+        swe "Partitionskonstanten är utanför partitioneringsfunktionens domän"
+ER_PARTITION_FUNCTION_IS_NOT_ALLOWED
+        chi "不允许此分区功能"
+        eng "This partition function is not allowed"
+        ger "Diese Partitionierungsfunktion ist nicht erlaubt"
+        geo "დაყოფის ეს ფუნქცია დაშვებული არაა"
+        spa "Esta función de partición no está permitida"
+        swe "Denna partitioneringsfunktion är inte tillåten"
+ER_DDL_LOG_ERROR
+        chi "DDL日志中的错误"
+        eng "Error in DDL log"
+        ger "Fehler im DDL-Log"
+        geo "შეცდომა DDL-ის ჟურნალში"
+        hindi "DDL लॉग में त्रुटि हुई"
+        spa "Error en historial (log) DDL"
+ER_NULL_IN_VALUES_LESS_THAN
+        chi "VALUES LESS THAN不允许使用NULL"
+        eng "Not allowed to use NULL value in VALUES LESS THAN"
+        ger "In VALUES LESS THAN dürfen keine NULL-Werte verwendet werden"
+        geo "VALUES LESS THAN-ში NULL მნიშვნელობა დაუშვებელია"
+        spa "No autorizado a usar valor NULL en VALUES LESS THAN"
+        swe "Det är inte tillåtet att använda NULL-värden i VALUES LESS THAN"
+ER_WRONG_PARTITION_NAME
+        chi "分区名称不正确"
+        eng "Incorrect partition name"
+        ger "Falscher Partitionsname"
+        geo "დანაყოფის არასწორი სახელი"
+        hindi "पार्टीशन का नाम गलत है"
+        spa "Nombre incorrecto de partición"
+        swe "Felaktigt partitionsnamn"
+ER_CANT_CHANGE_TX_CHARACTERISTICS 25001
+        chi "交易正在进行,无法更改事务特性"
+        eng "Transaction characteristics can't be changed while a transaction is in progress"
+        geo "ტრანზაქციის მიმდინარეობისას მისი მახასიათებლების შეცვლა შეუძლებელია"
+        spa "No se pueden cambiar las característias de transacción mientras que una transacción se ecuentre en proceso"
+ER_DUP_ENTRY_AUTOINCREMENT_CASE
+        chi "ALTER TABLE表会导致AUTO_INCREMENT重建,导致重复的条目'%-.192T'用于索引'%-.192s'"
+        eng "ALTER TABLE causes auto_increment resequencing, resulting in duplicate entry '%-.192T' for key '%-.192s'"
+        ger "ALTER TABLE führt zur Neusequenzierung von auto_increment, wodurch der doppelte Eintrag '%-.192T' für Schlüssel '%-.192s' auftritt"
+        spa "ALTER TABLE causa resecuenciado de auto_incremento resultando en entrada duplicada '%-.192T' para la clave '%-.192s'"
+ER_EVENT_MODIFY_QUEUE_ERROR
+        chi "内部调度器错误%d"
+        eng "Internal scheduler error %d"
+        ger "Interner Scheduler-Fehler %d"
+        geo "შიდა დამგეგმავის შეცდომა %d"
+        spa "Error en organizador interno %d"
+ER_EVENT_SET_VAR_ERROR
+        chi "在开始/停止调度程序期间出错。错误代码%M"
+        eng "Error during starting/stopping of the scheduler. Error code %M"
+        ger "Fehler während des Startens oder Anhalten des Schedulers. Fehlercode %M"
+        geo "შეცდომა დამგეგმავის გაშვება/გაჩერებისას. შეცდომის კოდია %M"
+        spa "Error durante arranque/parada del organizador. Código de error %M"
+ER_PARTITION_MERGE_ERROR
+        chi "引擎不能用于分区表"
+        eng "Engine cannot be used in partitioned tables"
+        ger "Engine kann in partitionierten Tabellen nicht verwendet werden"
+        geo "ძრავას დაყოფილ ცხრილებში ვერ გამოიყენებთ"
+        spa "No se puede usar el motor en tablas particionadas"
+        swe "Engine inte användas i en partitionerad tabell"
+ER_CANT_ACTIVATE_LOG
+        chi "无法激活'%-.64s'日志"
+        eng "Cannot activate '%-.64s' log"
+        ger "Kann Logdatei '%-.64s' nicht aktivieren"
+        geo "'%-.64s' ჟურნალის აქტივაცია შეუძლებელია"
+        spa "No puedo activar historial (log) '%-.64s'"
+ER_RBR_NOT_AVAILABLE
+        chi "服务器不是基于行的复制构建的"
+        eng "The server was not built with row-based replication"
+        ger "Der Server wurde nicht mit zeilenbasierter Replikation gebaut"
+        geo "სერვერის აგების დროს მწკრივებზე დაფუძნებული რეპლიკაცია ჩართული არ იყო"
+        spa "El servidor no ha sido construido con réplica basada en fila"
+ER_BASE64_DECODE_ERROR
+        chi "Base64字符串的解码失败"
+        eng "Decoding of base64 string failed"
+        ger "Der Server hat keine zeilenbasierte Replikation"
+        geo "base64 სტრიქონის გაშიფვრის შეცდომა"
+        spa "Ha fallado la decodificación de cadena base64"
+        swe "Avkodning av base64 sträng misslyckades"
+ER_EVENT_RECURSION_FORBIDDEN
+        chi "EVENT主体存在时EVENT DDL语句递归被禁止"
+        eng "Recursion of EVENT DDL statements is forbidden when body is present"
+        ger "Rekursivität von EVENT-DDL-Anweisungen ist unzulässig wenn ein Hauptteil (Body) existiert"
+        geo "როცა სხეული არსებობს, EVENT DDL გამოსახულებების რეკურსია დაშვებული არაა"
+        spa "Se prohiben sentencias de EVENT DDL cuando se encuentra presente el cuerpo"
+ER_EVENTS_DB_ERROR
+        chi "无法继续,因为事件调度程序已禁用"
+        eng "Cannot proceed, because event scheduler is disabled"
+        ger "Die Operation kann nicht fortgesetzt werden, da Event Scheduler deaktiviert ist."
+        geo "გაგრძელება შეუძლებელია, რადგან მოვლენების დამგეგმავი გამორთულია"
+        spa "No puedo proceder porque el organizado de eventos está desactivado"
+ER_ONLY_INTEGERS_ALLOWED
+        chi "这里只允许整数作为数字"
+        eng "Only integers allowed as number here"
+        ger "An dieser Stelle sind nur Ganzzahlen zulässig"
+        geo "აქ მხოლოდ მთელი რიცხვის გამოყენება შეგიძლიათ"
+        spa "Sólo se permiten enteros como número aquí"
+ER_UNSUPORTED_LOG_ENGINE
+        chi "存储引擎%s不能用于日志表"
+        eng "Storage engine %s cannot be used for log tables"
+        ger "Speicher-Engine %s kann für Logtabellen nicht verwendet werden"
+        geo "ჟურნალის ცხრილებისთვის საცავის ძრავას %s ვერ გამოიყენებთ"
+        hindi "स्टोरेज इंजन %s को लॉग टेबल्स के लिए इस्तेमाल नहीं किया जा सकता है"
+        spa "No se puede usar el motor de almacenaje %s para tablas de historial (log)"
+ER_BAD_LOG_STATEMENT
+        chi "如果启用日志记录,则无法'%s'日志表"
+        eng "You cannot '%s' a log table if logging is enabled"
+        ger "Sie können eine Logtabelle nicht '%s', wenn Loggen angeschaltet ist"
+        geo "თუ ჟურნალი ჩართულია, მის ცხრილზე '%s' ოპერაციას ვერ შეასრულებთ"
+        spa "No puede '%s' una tabla de historial (log) cuando se encuentra activado el llevar historial (log)"
+ER_CANT_RENAME_LOG_TABLE
+        chi "无法重命名'%s'。启用日志记录时,重命名日志表必须重命名两个表:日志表到存档表,另一个表返回'%s'"
+        eng "Cannot rename '%s'. When logging enabled, rename to/from log table must rename two tables: the log table to an archive table and another table back to '%s'"
+        ger "Kann '%s' nicht umbenennen. Wenn Loggen angeschaltet ist, müssen zwei Tabellen umbenannt werden: die Logtabelle zu einer Archivtabelle, und eine weitere Tabelle zu '%s'"
+        spa "No puedo renombrar '%s'. Si se encuentra activo el llevar historial (log), el renombrar a/desde tabla de historial (log) debe de renombrar dos tablas: la tabla de historial (log) a una tabla archivo y otra tabla de vuelta a '%s'"
+ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT 42000
+        chi "对本机函数的呼叫中的参数计数不正确'%-.192s'"
+        eng "Incorrect parameter count in the call to native function '%-.192s'"
+        ger "Falsche Anzahl von Parametern beim Aufruf der nativen Funktion '%-.192s'"
+        spa "Contador de parámetro incorrecto en la llamada a función nativa '%-.192s'"
+ER_WRONG_PARAMETERS_TO_NATIVE_FCT 42000
+        chi "对本机函数'%-.192s'呼叫中的参数不正确"
+        eng "Incorrect parameters in the call to native function '%-.192s'"
+        ger "Falscher Parameter beim Aufruf der nativen Funktion '%-.192s'"
+        spa "Parámetros incorrectos en la llamada a función nativa '%-.192s'"
+ER_WRONG_PARAMETERS_TO_STORED_FCT 42000  
+        chi "呼叫中的参数不正确为存储函数'%-.192s'"
+        eng "Incorrect parameters in the call to stored function '%-.192s'"
+        ger "Falsche Parameter beim Aufruf der gespeicherten Funktion '%-.192s'"
+        geo "არასწორი პარამეტრები დამახსოვრებული ფუნქციის გამოძახებისას '%-.192s'"
+        spa "Parámetros incorrectos en la llamada a función almacenada '%-.192s'"
+ER_NATIVE_FCT_NAME_COLLISION
+        chi "此功能'%-.192s'具有与本机函数相同的名称"
+        eng "This function '%-.192s' has the same name as a native function"
+        ger "Die Funktion '%-.192s' hat denselben Namen wie eine native Funktion"
+        spa "Esta función '%-.192s' tiene el mismo nombre que una función nativa"
+# When using this error message, use the ER_DUP_ENTRY error code.  See, for
+# example, code in handler.cc.
+ER_DUP_ENTRY_WITH_KEY_NAME 23000 S1009
+        chi "重复条目'%-.64T'键'%-.192s'"
+        cze "Zvojený klíč '%-.64T' (číslo klíče '%-.192s')"
+        dan "Ens værdier '%-.64T' for indeks '%-.192s'"
+        eng "Duplicate entry '%-.64T' for key '%-.192s'"
+        est "Kattuv väärtus '%-.64T' võtmele '%-.192s'"
+        fre "Duplicata du champ '%-.64T' pour la clef '%-.192s'"
+        ger "Doppelter Eintrag '%-.64T' für Schlüssel '%-.192s'"
+        geo "დუბლირებული ჩანაწერი '%-.64T' გასაღებისთვის '%-.192s'"
+        greek "Διπλή εγγραφή '%-.64T' για το κλειδί '%-.192s'"
+        hun "Duplikalt bejegyzes '%-.64T' a '%-.192s' kulcs szerint"
+        ita "Valore duplicato '%-.64T' per la chiave '%-.192s'"
+        jpn "'%-.64T' は索引 '%-.192s' で重複しています。"
+        kor "중복된 입력 값 '%-.64T': key '%-.192s'"
+        nla "Dubbele ingang '%-.64T' voor zoeksleutel '%-.192s'"
+        nor "Like verdier '%-.64T' for nøkkel '%-.192s'"
+        norwegian-ny "Like verdiar '%-.64T' for nykkel '%-.192s'"
+        pol "Powtórzone wystąpienie '%-.64T' dla klucza '%-.192s'"
+        por "Entrada '%-.64T' duplicada para a chave '%-.192s'"
+        rum "Cimpul '%-.64T' e duplicat pentru cheia '%-.192s'"
+        rus "Дублирующаяся запись '%-.64T' по ключу '%-.192s'"
+        serbian "Dupliran unos '%-.64T' za ključ '%-.192s'"
+        slo "Opakovaný kľúč '%-.64T' (číslo kľúča '%-.192s')"
+        spa "Entrada duplicada '%-.64T' para la clave '%-.192s'"
+        swe "Dublett '%-.64T' för nyckel '%-.192s'"
+        ukr "Дублюючий запис '%-.64T' для ключа '%-.192s'"
+ER_BINLOG_PURGE_EMFILE
+        chi "打开太多文件,请再次执行命令"
+        eng "Too many files opened, please execute the command again"
+        ger "Zu viele offene Dateien, bitte führen Sie den Befehl noch einmal aus"
+        geo "გახსნილია მეტისმეტად ბევრი ფაილი. ბრძანება თავიდან გაუშვით"
+        spa "Demasiados ficheros/archivos abiertos. Por favor, ejecute el comando otra vez"
+ER_EVENT_CANNOT_CREATE_IN_THE_PAST
+        chi "事件执行时间在过去,并ON COMPLETION NOT PRESERVE。创建后,事件立即丢弃"
+        eng "Event execution time is in the past and ON COMPLETION NOT PRESERVE is set. The event was dropped immediately after creation"
+        ger "Ausführungszeit des Events liegt in der Vergangenheit, und es wurde ON COMPLETION NOT PRESERVE gesetzt. Das Event wurde unmittelbar nach Erzeugung gelöscht"
+        spa "El tiempo de jecución del evento se encuentra en el pasado y está configurado ON COMPLETION NOT PRESERVE. El evento fue eliminado inmediatamente tras su creación"
+ER_EVENT_CANNOT_ALTER_IN_THE_PAST
+        chi "事件执行时间在过去,并ON COMPLETION NOT PRESERVE。事件没有改变。指定将来的时间"
+        eng "Event execution time is in the past and ON COMPLETION NOT PRESERVE is set. The event was not changed. Specify a time in the future"
+        ger "Execution Zeitpunkt des Ereignisses in der Vergangenheit liegt, und es war NACH ABSCHLUSS Set nicht erhalten. Die Veranstaltung wurde nicht verändert. Geben Sie einen Zeitpunkt in der Zukunft"
+        spa "El tiempo de jecución del evento se encuentra en el pasado y está configurado ON COMPLETION NOT PRESERVE. El evento no fue cambiado. Especifique un tiempo del futuro"
+ER_SLAVE_INCIDENT
+        chi "事件%s发生在master上。消息:%-.64s"
+        eng "The incident %s occurred on the master. Message: %-.64s"
+        ger "Der Vorfall %s passierte auf dem Master. Meldung: %-.64s"
+        geo "მთავარი სერვერის ინციდენტი %s. შეტყობინება: %-.64s"
+        spa "Ha ocurrido un incidente %s en el maestro (master). Mensaje: %-.64s"
+ER_NO_PARTITION_FOR_GIVEN_VALUE_SILENT
+        chi "表对某些现有值没有分区"
+        eng "Table has no partition for some existing values"
+        ger "Tabelle hat für einige bestehende Werte keine Partition"
+        geo "ზოგიერთი არსებული მნიშვნელობისთვის ცხრილს დანაყოფი არ გააჩნია"
+        spa "La tabla no tiene partición para algunos valores existentes"
+ER_BINLOG_UNSAFE_STATEMENT
+        chi "自从BINLOG_FORMAT =STATEMENT以来,使用语句格式写入二进制日志的不安全语句。%s."
+        eng "Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. %s"
+        ger "Unsichere Anweisung ins Binärlog geschrieben, weil Anweisungsformat BINLOG_FORMAT = STATEMENT. %s"
+        spa "Se ha grabado una sentencia no segura en historial (log) binario usando el formato de sentencia desde BINLOG_FORMAT = STATEMENT. %s"
+        swe "Detta är inte säkert att logga i statement-format, för BINLOG_FORMAT = STATEMENT. %s"
+ER_SLAVE_FATAL_ERROR
+        chi "致命错误:%s"
+        eng "Fatal error: %s"
+        ger "Fataler Fehler: %s"
+        geo "ფატალური შეცდომა: %s"
+        spa "Error fatal: %s"
+ER_SLAVE_RELAY_LOG_READ_FAILURE
+        chi "relay日志读取失败:%s"
+        eng "Relay log read failure: %s"
+        ger "Relaylog-Lesefehler: %s"
+        geo "RelayLog-ის წაკითხვის შეცდომა: %s"
+        spa "Fallo de lectura en historial (log) de reenvío: %s"
+ER_SLAVE_RELAY_LOG_WRITE_FAILURE
+        chi "relay日志写入失败:%s"
+        eng "Relay log write failure: %s"
+        ger "Relaylog-Schreibfehler: %s"
+        geo "RelayLog-ში ჩაწერის შეცდომა: %s"
+        spa "Fallo de grabación en historial (log) de reenvío: %s"
+ER_SLAVE_CREATE_EVENT_FAILURE
+        chi "无法创建%s"
+        eng "Failed to create %s"
+        ger "Erzeugen von %s fehlgeschlagen"
+        geo "%s-ის შექმნის შეცდომა"
+        hindi "%s को बनाने मैं असफल रहे"
+        spa "Fallo al crear %s"
+ER_SLAVE_MASTER_COM_FAILURE
+        chi "Master命令%s失败:%s"
+        eng "Master command %s failed: %s"
+        ger "Master-Befehl %s fehlgeschlagen: %s"
+        geo "მთავარი სერვერის ბრძანება %s ავარიულად დასრულდა: %s"
+        spa "El comando maestro (master) %s ha fallado: %s"
+ER_BINLOG_LOGGING_IMPOSSIBLE
+        chi "二进制记录不可能。消息:%s"
+        eng "Binary logging not possible. Message: %s"
+        ger "Binärlogging nicht möglich. Meldung: %s"
+        geo "ბინარული ჟურნალი შეუძლებელია. შეტყობინება: %s"
+        spa "No es posible llevar historial (log) binario. Mensaje: %s"
+ER_VIEW_NO_CREATION_CTX
+        chi "View%`s.%`s没有创建上下文"
+        eng "View %`s.%`s has no creation context"
+        ger "View %`s.%`s hat keinen Erzeugungskontext"
+        geo "ხედს %`s.%`s შექმნის კონტექსტი არ გააჩნია"
+        spa "La vista %`s.%`s no tiene contexto de creación"
+ER_VIEW_INVALID_CREATION_CTX
+        chi "Creation View%`s.%`s的上下文无效"
+        eng "Creation context of view %`s.%`s is invalid"
+        ger "Erzeugungskontext des Views%`s.%`s ist ungültig"
+        geo "ხედის %`s.%`s შექნის კონტექსტი არასწორია"
+        spa "El contexto de creación de la vista %`s.%`s es inválido"
+ER_SR_INVALID_CREATION_CTX
+        chi "存储例程%`s.%`s的创建上下文无效"
+        eng "Creation context of stored routine %`s.%`s is invalid"
+        ger "Erzeugungskontext der gespeicherten Routine%`s.%`s ist ungültig"
+        geo "დამახსოვრებული ქვეპროგრამის %`s.%`s შექმნის კონტექსტი არასწორია"
+        spa "El contexto de creación de la rutina almacenada %`s.%`s es inválido"
+ER_TRG_CORRUPTED_FILE
+        chi "表的trg文件损坏了。%`s.%`s"
+        eng "Corrupted TRG file for table %`s.%`s"
+        ger "Beschädigte TRG-Datei für Tabelle %`s.%`s"
+        geo "დაზიანებული TRG ფაილი ცხრილისთვის %`s.%`s"
+        spa "Fichero/archivo TRG estropeado para la tabla %`s.%`s`"
+ER_TRG_NO_CREATION_CTX
+        chi "表%`s.%`s的触发器没有创建上下文"
+        eng "Triggers for table %`s.%`s have no creation context"
+        ger "Trigger für Tabelle %`s.%`s haben keinen Erzeugungskontext"
+        geo "ტრიგერებს ცხრილისთვის %`s.%`s შექმნის კონტექსტი არ გააჩნიათ"
+        spa "Los disparadores para la tabla %`s.%`s no tienen contexto de creación"
+ER_TRG_INVALID_CREATION_CTX
+        chi "触发表%`s.%`s的创建上下文无效"
+        eng "Trigger creation context of table %`s.%`s is invalid"
+        ger "Trigger-Erzeugungskontext der Tabelle %`s.%`s ist ungültig"
+        geo "ტრიგერის შექმნის კონტექსტი ცხრილისთვის  %`s.%`s  არასწორია"
+        spa "El contexto de creación del disparador de la tabla %`s.%`s es inválido"
+ER_EVENT_INVALID_CREATION_CTX
+        chi "事件%`s.%`s的创建上下文无效"
+        eng "Creation context of event %`s.%`s is invalid"
+        ger "Erzeugungskontext des Events %`s.%`s ist ungültig"
+        geo "მოვლენის  %`s.%`s შექმნის კონტექსტი არასწორია"
+        spa "El contexto de creación del evento %`s.%`s es inválido"
+ER_TRG_CANT_OPEN_TABLE
+        chi "无法打开触发%`s.%`s的表"
+        eng "Cannot open table for trigger %`s.%`s"
+        ger "Kann Tabelle für den Trigger %`s.%`s nicht öffnen"
+        geo "ცხრილის გახსნის შეცდომა ტრიგერისთვის %`s.%`s"
+        spa "No puedo abrir tabla para disparador %`s.%`s"
+ER_CANT_CREATE_SROUTINE
+        chi "无法创建存储过程%`s。检查警告"
+        eng "Cannot create stored routine %`s. Check warnings"
+        ger "Kann gespeicherte Routine %`s nicht erzeugen. Beachten Sie die Warnungen"
+        geo "შენახული ქვეპროგრამის %`s შექმნის შეცდომა. შეამოწმეთ გაფრთხილებები"
+        spa "No puedo crear rutina alnacenada %`s. Revise los avisos"
+ER_UNUSED_11
+        eng "You should never see it"
+        geo "ამას ვერ უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+ER_NO_FORMAT_DESCRIPTION_EVENT_BEFORE_BINLOG_STATEMENT
+        chi "类型%s的Binlog语句未在格式描述binlog语句之前"
+        eng "The BINLOG statement of type %s was not preceded by a format description BINLOG statement"
+        ger "Der BINLOG-Anweisung vom Typ %s ging keine BINLOG-Anweisung zur Formatbeschreibung voran"
+        spa "La sentencia BINLOG del tipo `%s` no ha sido precedida por una sentencia de descripción de formato BINLOG"
+ER_SLAVE_CORRUPT_EVENT
+        chi "检测到损坏的复制事件"
+        eng "Corrupted replication event was detected"
+        ger "Beschädigtes Replikationsereignis entdeckt"
+        geo "აღმოჩენილია დაზიანებული რეპლიკაციის მოვლენა"
+        spa "Se ha detectado un evento de réplica estropeado"
+ER_LOAD_DATA_INVALID_COLUMN
+        chi "LOAD DATA中的列引用(%-.64s)无效"
+        eng "Invalid column reference (%-.64s) in LOAD DATA"
+        ger "Ungültige Spaltenreferenz (%-.64s) bei LOAD DATA"
+        geo "არასწორი სვეტის მიმართვა (%-.64s) LOAD DATA-ში"
+        spa "Referencia inválida a columna (%-.64s) en LOAD DATA"
+ER_LOG_PURGE_NO_FILE
+        chi "未找到清除的log%s"
+        eng "Being purged log %s was not found"
+        ger "Zu bereinigende Logdatei %s wurde nicht gefunden"
+        geo "წასაშლელი ჟურნალი %s ვერ ვიპოვე"
+        spa "No ha sido hallado historial (log) %s siendo purgado"
+ER_XA_RBTIMEOUT XA106
+        chi "XA_RBTIMEOUT:交易分支回滚:花了太久了"
+        eng "XA_RBTIMEOUT: Transaction branch was rolled back: took too long"
+        ger "XA_RBTIMEOUT: Transaktionszweig wurde zurückgerollt: Zeitüberschreitung"
+        geo "XA_RBTIMEOUT: ტრანზაქციის ბრენჩი დაბრუნდა: მეტისმეტად დიდი დრო დასჭირდა"
+        spa "XA_RBTIMEOUT: Rama de transacción ha sido retrocedida (rolled back): transcurrido demasiado tiempo"
+ER_XA_RBDEADLOCK XA102
+        chi "XA_RBDEADLOCK:交易分支回滚:检测到死锁"
+        eng "XA_RBDEADLOCK: Transaction branch was rolled back: deadlock was detected"
+        ger "XA_RBDEADLOCK: Transaktionszweig wurde zurückgerollt: Deadlock entdeckt"
+        geo "XA_RBDEADLOCK: ტრანაქციის ბრენჩი დაბრუნდა: აღმოჩენილია უსასრულო ციკლი"
+        spa "XA_RBDEADLOCK: Rama de transacción ha dido retrocedida (rolled back): se ha detectado estancamiento (deadlock)"
+ER_NEED_REPREPARE
+        chi "prepared statement需要重新准备"
+        eng "Prepared statement needs to be re-prepared"
+        ger "Vorbereitete Anweisungen müssen noch einmal vorbereitet werden"
+        geo "საჭიროა მომზადებული გამოსახულება თავიდან მოამზადოთ"
+        spa "Sentencia preparada necesita volver a ser preparada"
+ER_DELAYED_NOT_SUPPORTED
+        chi "表'%-.192s'不支持延迟选项"
+        eng "DELAYED option not supported for table '%-.192s'"
+        ger "Die DELAYED-Option wird für Tabelle '%-.192s' nicht unterstützt"
+        geo "ცხრილისთვის '%-.192s' პარამეტრი DELAYED მხარდაჭერილი არაა"
+        spa "Opción DELAYED no soportada para la tabla '%-.192s'"
+WARN_NO_MASTER_INFO
+        eng "There is no master connection '%.*s'"
+        ger "Die Master-Info-Struktur existiert nicht '%.*s'"
+        geo "მთავარ სერვერთან მიერთება '%.*s' არ არსებობს"
+        spa "No existe conexión maestra '%.*s'"
+WARN_OPTION_IGNORED
+        eng "<%-.64s> option ignored"
+        ger "Option <%-.64s> ignoriert"
+        geo "პარამეტრი <%-.64s> იგნორირებულია"
+        spa "Opción <%-.64s> ignorada"
+ER_PLUGIN_DELETE_BUILTIN
+        chi "内置插件无法删除"
+        eng "Built-in plugins cannot be deleted"
+        ger "Eingebaute Plugins können nicht gelöscht werden"
+        geo "ჩაშენებული დამატებების წაშლა შეუძლებელია"
+        spa "No se pueden borrar los enchufes (plugins) internos"
+WARN_PLUGIN_BUSY
+        chi "插件很忙,将在关机时卸载"
+        eng "Plugin is busy and will be uninstalled on shutdown"
+        ger "Plugin wird verwendet und wird erst beim Herunterfahren deinstalliert"
+        geo "დამატება დაკავებულია და გამორთვისას წაიშლება"
+        spa "El enchufe (plugin) está ocupado y será desinstalado cuando se apague"
+ER_VARIABLE_IS_READONLY
+        chi "%s变量'%s'是只读的。使用set%s付值"
+        eng "%s variable '%s' is read-only. Use SET %s to assign the value"
+        ger "%s Variable '%s' ist nur lesbar. Benutzen Sie SET %s, um einen Wert zuzuweisen"
+        geo "%s ცვლადი '%s' მხოლოდ წაკითხვადია. მნიშვნელობის მისანიჭებლად გამოიყენეთ ბრძანება SET %s"
+        spa "%s variable '%s' es de sólo lectura. Use SET %s para asignar el valor"
+ER_WARN_ENGINE_TRANSACTION_ROLLBACK
+        chi "存储引擎%s不支持此语句的回滚。交易回滚并必须重新启动"
+        eng "Storage engine %s does not support rollback for this statement. Transaction rolled back and must be restarted"
+        ger "Speicher-Engine %s unterstützt für diese Anweisung kein Rollback. Transaktion wurde zurückgerollt und muss neu gestartet werden"
+        spa "El motor de almacenaje %s no soporta retroceso (rollback) para esta sentencia. Transacción retrocedida (rolled back) y debe de ser rearrancada"
+ER_SLAVE_HEARTBEAT_FAILURE
+        chi "意外的master心跳数据:%s"
+        eng "Unexpected master's heartbeat data: %s"
+        ger "Unerwartete Daten vom Heartbeat des Masters: %s"
+        geo "მთავარი სერვერის გულისცემის მოულოდნელი მონაცემები: %s"
+        spa "Datos inesperados de latido (heartbeat) de maestro (master): %s"
+ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE
+        chi "心跳周期的请求值是负的或超过允许的最大值(%u秒)"
+        eng "The requested value for the heartbeat period is either negative or exceeds the maximum allowed (%u seconds)"
+        geo "გულისცემის პერიოდის მოთოხვნილი მნიშვნელობა ან უარყოფითა, ან მაქსიმალურ დაშვებულ მნიშვნელობას (%u წამი) აჭარბებს"
+        spa "El valor requerido para el período de latido o es negativo o excede al máximo permitido (%u segundos)"
+ER_UNUSED_14
+        eng "You should never see it"
+        geo "ამას ვერ უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+ER_CONFLICT_FN_PARSE_ERROR
+        chi "解析冲突功能时出错。消息:%-.64s"
+        eng "Error in parsing conflict function. Message: %-.64s"
+        ger "Fehler beim Parsen einer Konflikt-Funktion. Meldung: %-.64s"
+        geo "კონფლიქტური ფუნქციის დამუშავების შეცდომა. შეტყობინება: %-.64s"
+        spa "Error al analizar función de conflicto. Mensaje: %-.64s"
+ER_EXCEPTIONS_WRITE_ERROR
+        chi "写入异常表失败。消息:%-.128s"
+        eng "Write to exceptions table failed. Message: %-.128s""
+        ger "Schreiben in Ausnahme-Tabelle fehlgeschlagen. Meldung: %-.128s""
+        geo "გამონაკლისების ცხრილში ჩაწერის შეცდომა. შეტყობინება: %-.128s""
+        spa "Ha fallado el grabar en tabla de excepciones. Mensaje: %-.128s""
+ER_TOO_LONG_TABLE_COMMENT
+        chi "表格备注'%-.64s'太长(max =%u)"
+        eng "Comment for table '%-.64s' is too long (max = %u)"
+        ger "Kommentar für Tabelle '%-.64s' ist zu lang (max = %u)"
+        geo "კომენტარი ცხრილისთვის '%-.64s' ძალიან გრძელია (მაქს. = %u)"
+        por "Comentário para a tabela '%-.64s' é longo demais (max = %u)"
+        spa "El comentario para tabla '%-.64s' es demasiado largo (máx = %u)"
+ER_TOO_LONG_FIELD_COMMENT
+        chi "字段'%-.64s'太长(max =%u)"
+        eng "Comment for field '%-.64s' is too long (max = %u)"
+        ger "Kommentar für Feld '%-.64s' ist zu lang (max = %u)"
+        geo "კომენტარი ველისთვის '%-.64s' ძალიან გრძელია (მაქს. = %u)"
+        por "Comentário para o campo '%-.64s' é longo demais (max = %u)"
+        spa "El comentario para el campo '%-.64s' es demasiado largo (máx = %u)"
+ER_FUNC_INEXISTENT_NAME_COLLISION 42000 
+        chi "FUNCTION %s不存在。在参考手册中查看“函数名称解析”部分"
+        eng "FUNCTION %s does not exist. Check the 'Function Name Parsing and Resolution' section in the Reference Manual"
+        ger "FUNCTION %s existiert nicht. Erläuterungen im Abschnitt 'Function Name Parsing and Resolution' im Referenzhandbuch"
+        geo "FUNCTION %s არ არსებობს. სახელმძღვანელოში 'ფუნქციის სახელის დამუშავების და გადაჭრის' სექცია მოძებნეთ"
+        spa "La FUNCTION %s no existe. Revise la sección de 'Function Name Parsing and Resolution' en el Manual de Referencia"
+# When updating these, please update EXPLAIN_FILENAME_MAX_EXTRA_LENGTH in
+# sql_table.h with the new maximal additional length for explain_filename.
+ER_DATABASE_NAME
+        chi "数据库"
+        eng "Database"
+        ger "Datenbank"
+        geo "მონაცემთა ბაზა"
+        hindi "डेटाबेस"
+        spa "Base de datos"
+        swe "Databas"
+ER_TABLE_NAME
+        chi "表"
+        eng "Table"
+        ger "Tabelle"
+        geo "ცხრილი"
+        hindi "टेबल"
+        spa "Tabla"
+        swe "Tabell"
+ER_PARTITION_NAME
+        chi "分区"
+        eng "Partition"
+        ger "Partition"
+        geo "დანაყოფი"
+        hindi "पार्टीशन"
+        spa "Partición"
+        swe "Partition"
+ER_SUBPARTITION_NAME
+        chi "下分区"
+        eng "Subpartition"
+        ger "Unterpartition"
+        geo "ქვედანაყოფი"
+        hindi "सब-पार्टीशन"
+        spa "Subpartición"
+        swe "Subpartition"
+ER_TEMPORARY_NAME
+        chi "暂时的"
+        eng "Temporary"
+        ger "Temporär"
+        geo "დროებითი"
+        hindi "अस्थायी"
+        spa "Temporaria"
+        swe "Temporär"
+ER_RENAMED_NAME
+        chi "重命名"
+        eng "Renamed"
+        ger "Umbenannt"
+        geo "გადარქმეულია"
+        spa "Renombrado"
+        swe "Namnändrad"
+ER_TOO_MANY_CONCURRENT_TRXS
+        chi "“太多并发交易"
+        eng "Too many active concurrent transactions"
+        ger "Zu viele aktive simultane Transaktionen"
+        geo "გაშვებულია მეტისმეტად ბევრი აქტიური ერთდროული ტრანზაქცია"
+        spa  "Demasiadas transacciones concurrentes activas"
+WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED
+        chi "非ASCII分隔符参数不完全支持"
+        eng "Non-ASCII separator arguments are not fully supported"
+        ger "Nicht-ASCII-Trennargumente werden nicht vollständig unterstützt"
+        geo "არა-ASCII გამყოფი არგუმენტები სრულად მხარდაჭერილი არაა"
+        spa "Los argumentos con separador No ASCII no están completamente soportados"
+ER_DEBUG_SYNC_TIMEOUT
+        chi "调试同步点等待超时"
+        eng "debug sync point wait timed out"
+        ger "Debug Sync Point Wartezeit überschritten"
+        geo "გამართვის სინქრონიზაციის წერტილის მოლოდინის დრო ამოიწურა"
+        spa "agotado tiempo de espera de punto de sincronización de depuración"
+ER_DEBUG_SYNC_HIT_LIMIT
+        chi "调试同步点限制达到"
+        eng "debug sync point hit limit reached"
+        ger "Debug Sync Point Hit Limit erreicht"
+        geo "მიღწეულია სინქრონიზაციის წერტილის ზღვარი"
+        spa "alcanzado límite de punto de sincronización de depuración"
+ER_DUP_SIGNAL_SET 42000
+        chi "重复条件信息项'%s'"
+        eng "Duplicate condition information item '%s'"
+        ger "Informationselement '%s' für Duplikatbedingung"
+        geo "დუბლირებული პირობის ინფორმაცია ჩანაწერზე '%s'"
+        spa "Duplicado elemento de información de condición '%s'"
+# Note that the SQLSTATE is not 01000, it is provided by SIGNAL/RESIGNAL
+ER_SIGNAL_WARN 01000
+        chi "未处理用户定义的警告条件"
+        eng "Unhandled user-defined warning condition"
+        ger "Unbehandelte benutzerdefinierte Warnbedingung"
+        geo "დაუმუშავებელი მომხმარებლის მიერ აღწერილი გაფრთხილების პირობა"
+        spa "Condición de aviso definida por usuario sin manejar"
+# Note that the SQLSTATE is not 02000, it is provided by SIGNAL/RESIGNAL
+ER_SIGNAL_NOT_FOUND 02000
+        chi "未找到的用户定义未找到条件"
+        eng "Unhandled user-defined not found condition"
+        ger "Unbehandelte benutzerdefinierte Nicht-gefunden-Bedingung"
+        spa "Condición de no hallado definida por usuario sin manejar"
+# Note that the SQLSTATE is not HY000, it is provided by SIGNAL/RESIGNAL
+ER_SIGNAL_EXCEPTION HY000
+        chi "未处理用户定义的异常条件"
+        eng "Unhandled user-defined exception condition"
+        ger "Unbehandelte benutzerdefinierte Ausnahmebedingung"
+        spa "Condición de excepción definida por usuario sin manejar"
+ER_RESIGNAL_WITHOUT_ACTIVE_HANDLER 0K000
+        chi "RESIGNAL处理程序不活跃"
+        eng "RESIGNAL when handler not active"
+        ger "RESIGNAL bei nicht aktivem Handler"
+        geo "RESIGNAL როცა დამმუშავებელი აქტიური არაა"
+        spa "RESIGNAL cuando el manejador no activo"
+ER_SIGNAL_BAD_CONDITION_TYPE
+        chi "SIGNAL/RESIGNAL只能使用SQLState定义的条件"
+        eng "SIGNAL/RESIGNAL can only use a CONDITION defined with SQLSTATE"
+        ger "SIGNAL/RESIGNAL kann nur mit einer Bedingung (CONDITION) benutzt werden, die bei SQLSTATE definiert wurde"
+        geo "SIGNAL/RESIGNAL-ს მხოლოდ SQLSTATE-ით აღწერილი CONDITION-ის გამოყენება შეუძლია"
+        spa "SIGNAL/RESIGNAL sólo pueden usar una CONDITION definida mediante SQLSTATE"
+WARN_COND_ITEM_TRUNCATED
+        chi "数据被截断为条件项目'%s'"
+        eng "Data truncated for condition item '%s'"
+        ger "Daten gekürzt für Bedingungselement '%s'"
+        geo "პირობის ჩანაწერისთვის '%s' მონაცემები წაკვეთილია"
+        spa "Datos truncados para elemento de condición '%s'"
+ER_COND_ITEM_TOO_LONG
+        chi "条件项目'%s'的数据太长"
+        eng "Data too long for condition item '%s'"
+        ger "Daten zu lang für Bedingungselement '%s'"
+        geo "პირობის ჩანაწერისთვის '%s' მონაცემები ძალიან გრძელია"
+        spa "Datos demasiados largos para elemento de condición '%s'"
+ER_UNKNOWN_LOCALE
+        chi "未知区域设置:'%-.64s'"
+        eng "Unknown locale: '%-.64s'"
+        ger "Unbekannte Locale: '%-.64s'"
+        geo "უცნობი ლოკალი: '%-.64s'"
+        spa "Localización (locale) desconocida: '%-.64s'"
+ER_SLAVE_IGNORE_SERVER_IDS
+        chi "请求的服务器ID%d与SLAVE启动选项--replicate-same-server-id冲突"
+        eng "The requested server id %d clashes with the slave startup option --replicate-same-server-id"
+        ger "Die angeforderte Server-ID %d steht im Konflikt mit der Startoption --replicate-same-server-id für den Slave"
+        spa "La id %d del servidor requerido choca con la opción de arranque del esclavo --replicate-same-server-id"
+ER_QUERY_CACHE_DISABLED
+        chi "查询缓存已禁用;将query_cache_type设置为ON或DEMAND启用它"
+        eng "Query cache is disabled; set query_cache_type to ON or DEMAND to enable it"
+        spa "Caché de consulta (query) desactivada; configura query_cache_type a ON o DEMAND para activarla"
+ER_SAME_NAME_PARTITION_FIELD
+        chi "重复分区字段名称'%-.192s'"
+        eng "Duplicate partition field name '%-.192s'"
+        ger "Partitionsfeld '%-.192s' ist ein Duplikat"
+        spa "Nombre de campo de partición duplicado '%-.192s'"
+ER_PARTITION_COLUMN_LIST_ERROR
+        chi "分区用的列和列表使用不一致"
+        eng "Inconsistency in usage of column lists for partitioning"
+        ger "Inkonsistenz bei der Benutzung von Spaltenlisten für Partitionierung"
+        spa "Inconsistencia en uso de listas de columna para particionar"
+ER_WRONG_TYPE_COLUMN_VALUE_ERROR
+        chi "不正确类型的分区列值"
+        eng "Partition column values of incorrect type"
+        ger "Partitionsspaltenwerte sind vom falschen Typ"
+        spa "Valores de columna de partición de tipo incorrecto"
+ER_TOO_MANY_PARTITION_FUNC_FIELDS_ERROR
+        chi "'%-.192s'中的太多字段"
+        eng "Too many fields in '%-.192s'"
+        ger "Zu viele Felder in '%-.192s'"
+        geo "მეტისმეტად ბევრი ველი '%-.192s'-ში"
+        spa "Demasiados campos en '%-.192s'"
+ER_MAXVALUE_IN_VALUES_IN
+        chi "不能在VALUES IN使用MAXVALUE"
+        eng "Cannot use MAXVALUE as value in VALUES IN"
+        ger "MAXVALUE kann nicht als Wert in VALUES IN verwendet werden"
+        geo "MAXVALUE-ის გამოყენება VALUES IN-ში შეუძლებელია"
+        spa "No puedo usar MAXVALUE como valor en VALUES IN"
+ER_TOO_MANY_VALUES_ERROR
+        chi "这种类型不能有多个值%-.64s 分区"
+        eng "Cannot have more than one value for this type of %-.64s partitioning"
+        ger "Für den Partionierungstyp %-.64s darf es nicht mehr als einen Wert geben"
+        geo "%-.64s ტიპის დაყოფისთვის ერთზე მეტი მნიშვნელობა ვერ გექნებათ"
+        spa "No puedo tener más de un valor para este tipo de particionamiento %-.64s"
+ER_ROW_SINGLE_PARTITION_FIELD_ERROR
+        chi "仅允许的多字段列分区的VALUES IN的行表达式"
+        eng "Row expressions in VALUES IN only allowed for multi-field column partitioning"
+        ger "Zeilenausdrücke in VALUES IN sind nur für Mehrfeld-Spaltenpartionierung erlaubt"
+        spa "Expresiones de fila en VALUES IN sólo permitidas para particionamiento de columna multi-campo"
+ER_FIELD_TYPE_NOT_ALLOWED_AS_PARTITION_FIELD
+        chi "字段'%-.192s'类型不允许为此类型的分区类型"
+        eng "Field '%-.192s' is of a not allowed type for this type of partitioning"
+        ger "Feld '%-.192s' ist für diese Art von Partitionierung von einem nicht zulässigen Typ"
+        geo "ველი %-.192s ამ ტიპის დაყოფისთვის დაუშვებელია"
+        spa "El campo '%-.192s' es de un tipo no permitido para este tipo de particionado"
+ER_PARTITION_FIELDS_TOO_LONG
+        chi "分区字段的总长度太大"
+        eng "The total length of the partitioning fields is too large"
+        ger "Die Gesamtlänge der Partitionsfelder ist zu groß"
+        geo "დაყოფის ველების ჯამური სიგრძე მეტისმეტეად დიდია"
+        spa "El tamaño total de los campos de particionado es demasiado grande"
+ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE
+        chi "无法执行语句:由于引擎不能支持行和语句,因此无法写入二进制日志"
+        eng "Cannot execute statement: impossible to write to binary log since both row-incapable engines and statement-incapable engines are involved"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ რეჟიმში ჩაწერა შეუძლებელია, რადგან ჩარეულია ორივე, მწკრივობის შეუძლებელი და გამოსახულების შეუძლებელი ძრავები"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que ambos motores de fila-incapaz y de sentencia-incapaz están involucrados"
+ER_BINLOG_ROW_MODE_AND_STMT_ENGINE
+        chi "无法执行语句:由于BINLOG_FORMAT =ROW和至少一个表使用存储引擎限制为基于语句的日志记录,因此无法写入二进制日志"
+        eng "Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = ROW and at least one table uses a storage engine limited to statement-based logging"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ ჟურნალში ჩაწერა შეუძლებელია, რადგან BINLOG_FORMAT = ROW და სულ ცოტა ერთი ცხრილი იყენებს შენახვის ძრავას, რომელიც გამოსახულების-ჟურნალითაა შეზღუდული"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que BINLOG_FORMAT = ROW y al menos una tabla utiliza motor de almacenaje limitado a historiales basados en sentencia"
+ER_BINLOG_UNSAFE_AND_STMT_ENGINE
+        chi "无法执行语句:由于语句不安全,无法写入二进制日志,存储引擎仅限于基于语句的日志记录,而BINLOG_FORMAT = MIXED。%s."
+        eng "Cannot execute statement: impossible to write to binary log since statement is unsafe, storage engine is limited to statement-based logging, and BINLOG_FORMAT = MIXED. %s"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ ჟურნალში ჩაწერა შეუძლებელია, რადგან გამოსახულება უსაფრთხო არაა, საცავის ძრავა შეზღუდულია გამოსახულებებზე-დაფუძნებულ ჟურნალზე და BINLOG_FORMAT = MIXED. %s"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que la sentencia no es segura, el motor de almacenaje está limitado a historial basado en sentencia y BINLOG_FORMAT = MIXED. %s"
+ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE
+        chi "无法执行语句:由于语句以行格式,至少一个表使用基于语句的日志记录的存储引擎,因此无法写入二进制日志。"
+        eng "Cannot execute statement: impossible to write to binary log since statement is in row format and at least one table uses a storage engine limited to statement-based logging"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ ჟურნალში ჩაწერა შეუძლებელია, რადგან გამოსახულება მწკრივის ფორმატშია და სულ ცოტა ერთი ცხრილი იყენებს საცავის ძრავას, რომელიც გამოსახულებაზე ბაზირებული ჟურნალითაა შეზღუდული"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que la sentencia está en un formato de fila y al menos una tabla utiliza un motor de almacenaje limitado a historial basado en sentencia"
+ER_BINLOG_STMT_MODE_AND_ROW_ENGINE
+        chi "无法执行语句:由于BINLOG_FORMAT = STATEMENT,并且至少一个表使用存储引擎限制为基于行的日志记录,因此无法写入二进制日志。%s"
+        eng "Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging.%s"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ ჟურნალში ჩაწერა შეუძლებელია, რადგან BINLOG_FORMAT = STATEMENT და სულ ცოტა ერთი ცხრილი იყენებს საცავის ძრავას, რომელიც მწკრივებზე ბაზირებული ჟურნალითაა შეზღუდული. %s"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que BINLOG_FORMAT = STATEMENT y al menos una tabla utilizan un motor de almacenaje limitado a historial basado en fila. %s"
+ER_BINLOG_ROW_INJECTION_AND_STMT_MODE
+        chi "无法执行语句:由于语句的正常格式和BINLOG_FORMAT = STATEMENT,因此无法写入二进制日志"
+        eng "Cannot execute statement: impossible to write to binary log since statement is in row format and BINLOG_FORMAT = STATEMENT"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ ჟურნალში ჩაწერა შეუძლებელია, რადგან  გამოსახულება მწკრივის ფორმატშია და BINLOG_FORMAT = STATEMENT"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que la sentencia está en formato de fila y BINLOG_FORMAT = STATEMENT"
+ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE
+        chi "无法执行语句:由于涉及多个引擎并且至少有一个引擎是自记录的,因此无法写入二进制日志。"
+        eng "Cannot execute statement: impossible to write to binary log since more than one engine is involved and at least one engine is self-logging"
+        geo "გამოსახულების შესრულება შეუძლებელია: ბინარულ ჟურნალში ჩაწერა შეუძლებელია, რადგან ჩარეულია ერთზე მეტი ძრავა და სულ ცოტა ერთი ძრავა ჟურნალს თავისთან ინახავს"
+        spa "No puedo ejecutar sentencia: imposible grabar historial (log) binario ya que hay más de un motor involucrado y al menos un motor usa auto-historial"
+ER_BINLOG_UNSAFE_LIMIT
+        chi "该语句不安全,因为它使用限制子句。这不安全,因为所包含的一组行无法预测"
+        eng "The statement is unsafe because it uses a LIMIT clause. This is unsafe because the set of rows included cannot be predicted"
+        geo "გამოსახულება უსაფრთხო არაა, რადგან ის LIMIT პირობას იყენებს. ეს უსაფრთხო არაა, რადგან მწკრივების ნაკრები, რომელიც ჩასმული იქნება, წინასწარ უცნობია"
+        spa "La sentencia no es segura debido a usar una cláusula LIMIT. No es segura porque el conjunto incluido de filas no se puede predecir"
+ER_BINLOG_UNSAFE_INSERT_DELAYED
+        chi "该声明不安全,因为它使用插入延迟。这是不安全的,因为无法预测插入行的时间"
+        eng "The statement is unsafe because it uses INSERT DELAYED. This is unsafe because the times when rows are inserted cannot be predicted"
+        geo "გამოსახულება უსაფრთხო არაა, რადგან ის INSERT DELAYED-ს იყენებს. ეს უსაფრთხო არაა, რადგან  წინასწარ უცნობია, რამდენჯერ მოხდება მწკრივების ჩასმა"
+        spa "La sentencia no es segura debido a usar una INSERT DELAYED. No es segura porque los momentos en que las filas han de insertarse no se pueden predecir"
+ER_BINLOG_UNSAFE_SYSTEM_TABLE
+        chi "该声明不安全,因为它使用常规日志,慢查询日志或performance_schema表。这是不安全的,因为系统表可能在slave上不同"
+        eng "The statement is unsafe because it uses the general log, slow query log, or performance_schema table(s). This is unsafe because system tables may differ on slaves"
+        spa "La sentencia no es segura debido a usar el historial (log) general, un historial (log) lento o tabla(s) de performance_schema. No es segura porque las tablas de sistema pueden diferir de las esclavas"
+ER_BINLOG_UNSAFE_AUTOINC_COLUMNS
+        chi "语句不安全,因为它调用了插入AUTO_INCREMENT列的触发器或存储函数。插入的值无法正确记录"
+        eng "Statement is unsafe because it invokes a trigger or a stored function that inserts into an AUTO_INCREMENT column. Inserted values cannot be logged correctly"
+        geo "გამოსახულება უსაფრთხო არაა, რადგან ის უშვებს ტრიგერს ან დამახსოვრებულ ფუნქციას, რომელიც AUTO_INCREMENT სვეტში ამატებს. ჩასული მნიშვნელობების ჟურნალში სწორად ჩაწერა შეუძლებელია"
+        spa "La sentencia no es segura debido a invocar un disparador o una función almacenada que inserta una columna de AUTO_INCREMENT. No se puede llevar historial correcto de Los valores insertados".
+ER_BINLOG_UNSAFE_UDF
+        chi "语句不安全,因为它使用了一个可能在从设备上返回相同值的UDF"
+        eng "Statement is unsafe because it uses a UDF which may not return the same value on the slave"
+        geo "გამოსახულება უსაფრთხო არაა, რადგან ის იყენებს UDF-ს, რომელმაც შეიძლება დამორჩილებულ სერვერებზე იგივე მნიშვნელობა არ დააბრუნოს"
+        spa "La sentencia no es segura porque usa un UDF que puede no devolver el mismo valor en el esclavo"
+ER_BINLOG_UNSAFE_SYSTEM_VARIABLE
+        chi "语句不安全,因为它使用的系统变量可能在从站上具有不同的值"
+        eng "Statement is unsafe because it uses a system variable that may have a different value on the slave"
+        spa "La sentencia no es segura porque usa una variable de sistema que puede tener un valor diferente en el esclavo"
+ER_BINLOG_UNSAFE_SYSTEM_FUNCTION
+        chi "语句不安全,因为它使用系统函数可能在从站上返回不同的值"
+        eng "Statement is unsafe because it uses a system function that may return a different value on the slave"
+        geo "გამოსახულება უსაფრთხო არაა, რადგან ის იყენებს სისტემურ ფუნქციას, რომელმაც შეიძლება დამორჩილებულ სერვერებზე იგივე მნიშვნელობა არ დააბრუნოს"
+        spa "La sentencia no es segura porque usa una función de sistema que puede devolver un valor diferente en el esclavo"
+ER_BINLOG_UNSAFE_NONTRANS_AFTER_TRANS
+        chi "语句不安全,因为它在访问同一事务中访问事务表后访问非事务性表"
+        eng "Statement is unsafe because it accesses a non-transactional table after accessing a transactional table within the same transaction"
+        geo "გამოსახულება უსაფრთხო არაა, რადგან ის ერთი ტრანზაქციიდან არატრანზაქციულ და ტრანზაქციულ ცხრილებს ერთდროულად მიმართავს"
+        spa "La sentencia no es segura porque accede a una tabla no transaccional tras acceder a una transaccional dentro de la misma transacción"
+ER_MESSAGE_AND_STATEMENT
+  chi "%s.语句:%s"
+  eng "%s. Statement: %s"
+  geo "%s. გამოსახულება: %s"
+  spa "%s. Sentencia: %s"
+
+ER_SLAVE_CONVERSION_FAILED
+        chi "列%d表'%-.192s.%-.192s'无法从'%-.50s'类型为'%-.50s'"
+        eng "Column %d of table '%-.192s.%-.192s' cannot be converted from type '%-.50s' to type '%-.50s'"
+        geo "სვეტის %d ცხრილიდან '%-.192s.%-.192s' გადაყვანა ტიპიდან  '%-.50s' ტიპში  '%-.50s' შეუძლებელია"
+        spa "La columna %d de la tabla '%-.192s.%-.192s' no puede ser convertida desde el tipo '%-.50s' al tipo '%-.50s'"
+ER_SLAVE_CANT_CREATE_CONVERSION
+        chi "无法为表创建转换表'%-.192s.%-.192s'"
+        eng "Can't create conversion table for table '%-.192s.%-.192s'"
+        geo "ცხრილისთვის '%-.192s.%-.192s' გადაყვანის ცხრილის შექმნა შეუძლებელია"
+        spa "No puedo crear tabla de conversión para la tabla '%-.192s.%-.192s'"
+ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_BINLOG_FORMAT
+        chi "无法在事务中修改@@session.binlog_format"
+        eng "Cannot modify @@session.binlog_format inside a transaction"
+        geo "@@session.binlog_format-ის შეცვლა ტრანზაქციის შიგნიდან შეუძლებელია"
+        spa "No puedo modificar @@session.binlog_format dentro de una transacción"
+ER_PATH_LENGTH
+        chi "指定%.64T的路径太长了"
+        eng "The path specified for %.64T is too long"
+        geo "%.64T-სთვის მითითებული ბილიკი მეტისმეტად გრძელია"
+        hindi "%.64T के लिए निर्दिष्ट पथ बहुत लंबा है"
+        spa "La ruta especificada para %.64T es demasiado larga"
+ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT  
+        chi "'%s'被弃用,将在将来的版本中删除"
+        eng "'%s' is deprecated and will be removed in a future release"
+        ger "'%s' ist veraltet und wird in einer zukünftigen Version entfernt werden"
+        geo "'%s' მოძველებულია და მომავალ ვერსიებში წაიშლება"
+        spa "'%s' está obsoleto y será quitada en una entrega futura"
+ER_WRONG_NATIVE_TABLE_STRUCTURE
+        chi "本机表'%-.64s'。'%-.64s'具有错误的结构"
+        eng "Native table '%-.64s'.'%-.64s' has the wrong structure"
+        spa "La tabla nativa '%-.64s'.'%-.64s' tiene una estructura equivocada"
+ER_WRONG_PERFSCHEMA_USAGE
+        chi "performance_schema使用无效"
+        eng "Invalid performance_schema usage"
+        geo "performance_schema-ის არასწორი გამოყენება"
+        hindi "performance_schema का अवैध उपयोग"
+        spa "Uso inválido de performance_schema"
+ER_WARN_I_S_SKIPPED_TABLE
+        chi "表'%s'.'%s'由于并发DDL语句正在修改其定义,因此跳过"
+        eng "Table '%s'.'%s' was skipped since its definition is being modified by concurrent DDL statement"
+        geo "ცხრილი '%s'.'%s' გამოტოვებულია, რადგან მისი აღწერა ერთდროული DDL გამოსახულების მიერ იცვლება"
+        spa "La tabla '%s'.'%s' fue saltada ya que su definición está siendo modificada por la sentencia DDL concurrente"
+ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_BINLOG_DIRECT
+        chi "无法在交易事务中修改@@session.binlog_direct_non_transactional_updates"
+        eng "Cannot modify @@session.binlog_direct_non_transactional_updates inside a transaction"
+        geo "@@session.binlog_direct_non_transactional_updates-ის ტრანზაქციის შიგნიდან შეცვლა შეუძლებელია"
+        spa "No puedo modificar @@session.binlog_direct_non_transactional_updates dentro de una transacción"
+ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_DIRECT
+        chi "无法在存储的函数或触发器内更改Binlog Direct标志"
+        eng "Cannot change the binlog direct flag inside a stored function or trigger"
+        geo "binlog-ის direct ალმის დამახსოვრებული ფუნქციის ან ტრიგერის შიგნიდან შეცვლა შეუძლებელია"
+        spa "No puedo cambiar la bandera directa de binlog dentro de una función almacenada o de un disparador"
+ER_SPATIAL_MUST_HAVE_GEOM_COL 42000
+        chi "空间索引可以仅包含几何类型列"
+        eng "A SPATIAL index may only contain a geometrical type column"
+        ger "Ein raumbezogener Index (SPATIAL) darf nur Spalten geometrischen Typs enthalten"
+        geo "SPATIAL ტიპის ინდექსი მხოლოდ გეომეტრიული ტიპის სვეტს შეიძლება შეიცავდეს"
+        spa "Un índice SPATIAL sólo puede contener una columna de tipo geométrico"
+ER_TOO_LONG_INDEX_COMMENT
+        chi "索引评论'%-.64s'太长(max =%lu)"
+        eng "Comment for index '%-.64s' is too long (max = %lu)"
+        geo "კომენტარი ინდექსისთვის '%-.64s' ძალიან გრძელია (მაქს. = %lu)"
+        spa "El comentario para el índice '%-.64s' es demasiado largo (máx = %lu)"
+ER_LOCK_ABORTED
+        chi "由于待处理的独家锁,等待锁被中止"
+        eng "Wait on a lock was aborted due to a pending exclusive lock"
+        spa "Se ha abortado la espera por un bloqueo debido a bloqueo exclusivo pendiente"
+ER_DATA_OUT_OF_RANGE 22003 
+        chi "%s值超出'%s'范围"
+        eng "%s value is out of range in '%s'"
+        geo "%s-ის მნიშვნელობა '%s'-ში დიაპაზონს გარეთაა"
+        spa "%s valor se encuentra fuera de rango '%s'"
+ER_WRONG_SPVAR_TYPE_IN_LIMIT
+        chi "基于非整数类型的基于LIMIT子句的变量"
+        eng "A variable of a non-integer based type in LIMIT clause"
+        geo "LIMIT პირობაში ცვლადი არამთელი ტიპისაა"
+        spa "Una variable de tipo basado en no entero en cláusula LIMIT"
+ER_BINLOG_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE
+        chi "混合声明中的自记录和非自动记录引擎是不安全的"
+        eng "Mixing self-logging and non-self-logging engines in a statement is unsafe"
+        spa "No es segura la mezcla de motores de auto-historial (log) y de no auto-historial en una sentencia"
+ER_BINLOG_UNSAFE_MIXED_STATEMENT
+        chi "语句访问非致突变表以及事务性或临时表,并写入其中任何一个"
+        eng "Statement accesses nontransactional table as well as transactional or temporary table, and writes to any of them"
+        spa "La sentencia accede a tabla no transaccional así como transaccional o tabla temporal y graba en cualquiera de ellas"
+ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SQL_LOG_BIN
+        chi "无法修改事务中的@@sessient.sql_log_bin"
+        eng "Cannot modify @@session.sql_log_bin inside a transaction"
+        geo "@@session.sql_log_bin-ის ტრანზაქციის შიგნით შეცვლა შეუძლებელია"
+        spa "No puedo modificar @@session.sql_log_bin dentro de una transacción"
+ER_STORED_FUNCTION_PREVENTS_SWITCH_SQL_LOG_BIN
+        chi "无法在存储的函数或触发器内更改SQL_LOG_BIN"
+        eng "Cannot change the sql_log_bin inside a stored function or trigger"
+        geo "sql_log_bin-ის შეცვლა დამახსოვრებული ფუნქციის და ტრიგერის შიგნით შეუძლებელია"
+        spa "No puedo cambiar sql_log_bin dentro de una función almacenada o disparador"
+ER_FAILED_READ_FROM_PAR_FILE
+        chi "无法从.par文件中读取"
+        eng "Failed to read from the .par file"
+        geo ".par ფაილიდან წაკითხვის შეცდომა"
+        hindi ".par फ़ाइल से पढ़ने में असफल रहे"
+        spa "No pude leer desde fichero/archivo .par"
+        swe "Misslyckades läsa från .par filen"
+ER_VALUES_IS_NOT_INT_TYPE_ERROR
+        chi "分区的值'%-.64s'必须具有类型INT"
+        eng "VALUES value for partition '%-.64s' must have type INT"
+        geo "VALUES-ის მნიშვნელობა დანაყოფისთვის '%-.64s' INT ტიპის უნდა იყოს"
+        spa "El valor VALUES para la partición '%-.64s' debe de tener el tipo INT"
+        swe "Värden i VALUES för partition '%-.64s' måste ha typen INT"
+ER_ACCESS_DENIED_NO_PASSWORD_ERROR 28000 
+        chi "拒绝用户'%s'@'%s'"
+        cze "Přístup pro uživatele '%s'@'%s'"
+        dan "Adgang nægtet bruger: '%s'@'%s'"
+        eng "Access denied for user '%s'@'%s'"
+        est "Ligipääs keelatud kasutajale '%s'@'%s'"
+        fre "Accès refusé pour l'utilisateur: '%s'@'%s'"
+        ger "Benutzer '%s'@'%s' hat keine Zugriffsberechtigung"
+        geo "მომხმარებლისთვის '%s'@'%s' წვდომა აკრძალულია"
+        greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%s'@'%s'"
+        hindi "यूज़र '%s'@'%s' को अनुमति नहीं है"
+        hun "A(z) '%s'@'%s' felhasznalo szamara tiltott eleres"
+        ita "Accesso non consentito per l'utente: '%s'@'%s'"
+        kor "'%s'@'%s' 사용자는 접근이 거부 되었습니다."
+        nla "Toegang geweigerd voor gebruiker: '%s'@'%s'"
+        nor "Tilgang nektet for bruker: '%s'@'%s'"
+        norwegian-ny "Tilgang ikke tillate for brukar: '%s'@'%s'"
+        por "Acesso negado para o usuário '%s'@'%s'"
+        rum "Acces interzis pentru utilizatorul: '%s'@'%s'"
+        rus "Доступ закрыт для пользователя '%s'@'%s'"
+        serbian "Pristup je zabranjen korisniku '%s'@'%s'"
+        slo "Zakázaný prístup pre užívateľa: '%s'@'%s'"
+        spa "Acceso denegado para usuario: '%s'@'%s'"
+        swe "Användare '%s'@'%s' är ej berättigad att logga in"
+        ukr "Доступ заборонено для користувача: '%s'@'%s'"
+
+ER_SET_PASSWORD_AUTH_PLUGIN
+        chi "通过%s插件验证的用户忽略SET PASSWORD"
+        eng "SET PASSWORD is ignored for users authenticating via %s plugin"
+        geo "მომხმარებლებისთვის, რომლებიც ავთენტიკაციას %s დამატებით გადიან, SET PASSWORD იგნორირებულია"
+        spa "SET PASSWORD no tiene significado para usuarios que se autentican vía enchufe (plugin)  %s"
+
+ER_GRANT_PLUGIN_USER_EXISTS
+        chi "由于用户%-.*s已经存在,GRANT IDENTIFIED WITH授权是非法的"
+        eng "GRANT with IDENTIFIED WITH is illegal because the user %-.*s already exists"
+        geo "ბრძანება GRANT პარამეტრით IDENTIFIED WITH დაუშვებელია, რადგან მოხმარებელი %-.*s უკვე არსებობს"
+        spa "GRANT con IDENTIFIED WITH es ilegal porque el usuario %-.*s ya existe"
+
+ER_TRUNCATE_ILLEGAL_FK 42000
+        chi "无法截断外键约束中引用的表(%.192s)"
+        eng "Cannot truncate a table referenced in a foreign key constraint (%.192s)"
+        spa "No puedo truncar una tabla referenciada en una restricción de clave foránea (%.192s)"
+
+ER_PLUGIN_IS_PERMANENT
+        chi "插件'%s'是force_plus_permanent,无法卸载"
+        eng "Plugin '%s' is force_plus_permanent and can not be unloaded"
+        geo "დამატება '%s'-ი force_plus_permanent ტიპისაა და მისი გამოტვირთვა შეუძლებელია"
+        spa "El enchufe (plugin) '%s' está force_plus_permanent y no puede ser descargado"
+
+ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MIN
+        chi "心跳期的要求值小于1毫秒。该值重置为0,这意味着心跳将有效地禁用"
+        eng "The requested value for the heartbeat period is less than 1 millisecond. The value is reset to 0, meaning that heartbeating will effectively be disabled"
+        geo "გულისცემის პერიოდის მოთხოვნილი მნიშვნელობა 1 მილიწამზე ნაკლებია. მნიშვნელობა ნულს გაუტოლდა, რაც ნიშნავს, რომ გულისცემა გამორთულია"
+        spa "El valor de requerimiento para el período de latido es menor de 1 milisegundo. El valor se vuelve a poner a 0, indicando que el latido será efectivamente desactivado"
+
+ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX
+        chi "心跳期的请求值超出了“slave_net_timeout”秒的值。该期间的明智价值应小于超时"
+        eng "The requested value for the heartbeat period exceeds the value of `slave_net_timeout' seconds. A sensible value for the period should be less than the timeout"
+        spa "El valor de requerimiento para el período de latido excede el valor de `slave_net_timeout` segundos. Un valor sensible para el período debería de ser menor que el 'tiempo agotado'"
+
+ER_STMT_CACHE_FULL  
+        chi "需要多行语句超过“max_binlog_stmt_cache_size”字节的存储;增加这个mariadbd变量,然后重试"
+        eng "Multi-row statements required more than 'max_binlog_stmt_cache_size' bytes of storage."
+        spa "Las sentencias Multi-fila requieren de más de 'max_binlog_stmt_cache_size' bytes para almacenaje."
+
+ER_MULTI_UPDATE_KEY_CONFLICT
+        chi "由于表格被更新为'%-.192s'和'%-.192s',因此不允许允许主键/分区索引更新。"
+        eng "Primary key/partition key update is not allowed since the table is updated both as '%-.192s' and '%-.192s'"
+        spa "La actualización de clave primaria de o de partición no está permitida por estar la tabla actualizada como '%-.192s' y '%-.192s'"
+# When translating this error message make sure to include "ALTER TABLE" in the
+# message as mariadb-check parses the error message looking for ALTER TABLE.
+ER_TABLE_NEEDS_REBUILD
+        chi "表需重建。请做ALTER TABLE %`s FORCE”或转储/重新加载以修复它!"
+        eng "Table rebuild required. Please do \"ALTER TABLE %`s FORCE\" or dump/reload to fix it!"
+        spa "Se requiere reconstrucción de la tabla. Por favor, ¡haga un \"ALTER TABLE %`s FORCE\" o volcado/recarga para solucionarlo!"
+
+WARN_OPTION_BELOW_LIMIT
+        chi "'%s'的值应该不小于'%s'的值"
+        eng "The value of '%s' should be no less than the value of '%s'"
+        geo "'%s'-ის მნიშვნელობა '%s'-ის მნიშვნელობაზე ნაკლები არ უნდა იყოს"
+        spa "El valor de '%s' debería de ser no menor que el valor de '%s'"
+
+ER_INDEX_COLUMN_TOO_LONG
+        chi "索引列太大。最大列大小为%lu字节"
+        eng "Index column size too large. The maximum column size is %lu bytes"
+        geo "ინდექსის სვეტის ზომა ძალიან დიდია. სვეტის მაქსიმალური ზომაა %lu ბაიტი"
+        spa "El tamaño de índice de columna es demasiado grande. El tamaño máximo de columna es de %lu bytes"
+
+ER_ERROR_IN_TRIGGER_BODY
+        chi "触发器'%-.64s'内存在错误:'%-.256s'"
+        eng "Trigger '%-.64s' has an error in its body: '%-.256s'"
+        geo "შეცდომა ტრიგერის ('%-.64s') სხეულში: '%-.256s'"
+        spa "El disparador '%-.64s' tiene una error en su cuerpo: '%-.256s'"
+
+ER_ERROR_IN_UNKNOWN_TRIGGER_BODY
+        chi "未知触发器内存在错误:'%-.256s'"
+        eng "Unknown trigger has an error in its body: '%-.256s'"
+        geo "შეცდომა უცნობი ტრიგერის სხეულში: '%-.256s'"
+        spa "El disparador desconocido tiene un error en su cuerpo: '%-.256s'"
+
+ER_INDEX_CORRUPT
+        chi "索引%s已损坏"
+        eng "Index %s is corrupted"
+        geo "ინდექსი %s დაზიანებულია"
+        spa "El índice %s está estropeado"
+
+ER_UNDO_RECORD_TOO_BIG
+        chi "撤消日志记录太大"
+        eng "Undo log record is too big"
+        geo "დაბრუნების ჟურნალის ჩანაწერი მეტისმეტად დიდია"
+        spa "El registro de historial (log) para deshacer es demasiado grande"
+
+ER_BINLOG_UNSAFE_INSERT_IGNORE_SELECT
+        chi "INSERT IGNORE...SELECT不安全,因为选择由select检索行的顺序确定哪个(如果有)行被忽略。无法预测此顺序,并且在master和slave方面可能有所不同" 
+        eng "INSERT IGNORE... SELECT is unsafe because the order in which rows are retrieved by the SELECT determines which (if any) rows are ignored. This order cannot be predicted and may differ on master and the slave"
+        spa "INSERT IGNORE... SELECT es no seguro porque el orden en el que las filas se recuperan por el SELECT determina qué filas (si hay alguna) son ignoradas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_INSERT_SELECT_UPDATE
+        chi "INSERT... SELECT... ON DUPLICATE KEY UPDATE是不安全的,因为SELECT检索行的顺序确定哪个(如果有的话)是更新的。无法预测此顺序,并且在master和slave方面可能有所不同"
+        eng "INSERT... SELECT... ON DUPLICATE KEY UPDATE is unsafe because the order in which rows are retrieved by the SELECT determines which (if any) rows are updated. This order cannot be predicted and may differ on master and the slave"
+        spa "INSERT... SELECT... ON DUPLICATE KEY UPDATE no es seguro porque el orden en el que las filas se recuperan por el SELECT determina qué filas (si hay alguna) son actualizadas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_REPLACE_SELECT
+        chi "REPLACE... SELECT 不安全,因为选择由select检索行的顺序确定哪个(如果有)行被替换。无法预测此顺序,并且在master和slave方面可能有所不同"
+        eng "REPLACE... SELECT is unsafe because the order in which rows are retrieved by the SELECT determines which (if any) rows are replaced. This order cannot be predicted and may differ on master and the slave"
+        spa "REPLACE... SELECT no es seguro porque el orden en el que las filas se recuperan por el SELECT determina qué filas (si hay alguna) son sustituidas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_CREATE_IGNORE_SELECT
+        chi "CREATE... IGNORE SELECT是不安全,因为选择由SELECT检索行的顺序确定哪个(如果有)行被忽略。无法预测此顺序,并且在master和slave方面可能有所不同"
+        eng "CREATE... IGNORE SELECT is unsafe because the order in which rows are retrieved by the SELECT determines which (if any) rows are ignored. This order cannot be predicted and may differ on master and the slave"
+        spa "CREATE... IGNORE SELECT no es seguro porque el orden en el que las filas se recuperan por el SELECT determina qué filas (si hay alguna) son ignoradas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_CREATE_REPLACE_SELECT
+        chi "CREATE... REPLACE SELECT不安全,因为选择由SELECT检索行的顺序确定哪个(如果有)是替换哪个(如果有的话)。无法预测此顺序,并且在master和slave方面可能有所不同"
+        eng "CREATE... REPLACE SELECT is unsafe because the order in which rows are retrieved by the SELECT determines which (if any) rows are replaced. This order cannot be predicted and may differ on master and the slave"
+        spa "CREATE... REPLACE SELECT no es seguro porque el orden en el que las filas se recuperan por el SELECT determina qué filas (si hay alguna) son sustituidas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_UPDATE_IGNORE
+        chi "UPDATE IGNORE不安全,因为更新行的顺序确定了哪个(如果有)行被忽略。无法预测此顺序,并且在master和slave方面可能有所不同"
+        eng "UPDATE IGNORE is unsafe because the order in which rows are updated determines which (if any) rows are ignored. This order cannot be predicted and may differ on master and the slave"
+        spa "UPDATE IGNORE no es seguro porque el orden en el que las filas son actualizadas determina qué filas (si hay alguna) son ignoradas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_UNUSED_15
+        eng "You should never see it"
+        geo "ამას ვერასდროს უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+
+ER_UNUSED_16
+        eng "You should never see it"
+        geo "ამას ვერასდროს უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+
+ER_BINLOG_UNSAFE_WRITE_AUTOINC_SELECT
+        chi "从另一个表选择后,使用自动增量列的表格写入的语句是不安全的,因为检索行的顺序确定将写入哪些(如果有)行。无法预测此顺序,并且在主站和slave方面可能有所不同"
+        eng "Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave"
+        spa "Las sentencias que graban en una tabla con columna de auto-incremento tras seleccionar desde otra tabla no son seguras porque el orden en el que las filas son recuperadas determina qué filas (si hay alguna) serán grabadas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_CREATE_SELECT_AUTOINC
+        chi "创建表...在具有自动增量列的表上选择...不安全,因为选择的顺序是由select检索行的顺序,确定插入哪个(如果有)行。无法预测此订单,并且在主站和slave方面可能有所不同"
+        eng "CREATE TABLE... SELECT...  on a table with an auto-increment column is unsafe because the order in which rows are retrieved by the SELECT determines which (if any) rows are inserted. This order cannot be predicted and may differ on master and the slave"
+        spa "CREATE TABLE... SELECT... en una tabla con columna de auto-incremento no es segura porque el orden en el que las filas son recuperadas por el SELECT determina qué filas (si hay alguna) serán insertadas. Este orden no puede ser predicho y puede diferir entre maestro (master) y esclavo"
+
+ER_BINLOG_UNSAFE_INSERT_TWO_KEYS
+        chi "在具有多个唯一键的表上INSERT... ON DUPLICATE KEY UPDATE的重复索引更新是不安全的"
+        eng "INSERT... ON DUPLICATE KEY UPDATE  on a table with more than one UNIQUE KEY is unsafe"
+        spa "INSERT... ON DUPLICATE KEY UPDATE en una tabla con más de una UNIQUE KEY no es segura"
+
+ER_UNUSED_28
+        chi "你永远不应该看到它"
+        eng "You should never see it"
+        geo "ამას ვერასდროს უნდა ხედავდეთ"
+        spa "Nunca debería vd de ver esto"
+
+ER_VERS_NOT_ALLOWED
+        chi "系统版本的表%`s.%`s不允许"
+        eng "Not allowed for system-versioned table %`s.%`s"
+
+ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST
+        chi "插入AutoIncrement字段,该字段不是组成的主键中的第一部分是不安全的"
+        eng "INSERT into autoincrement field which is not the first part in the composed primary key is unsafe"
+        spa "INSERT dentro de campo con autoincremento que no es la primera parte en la clave primaria compuesta no es seguro"
+#
+#  End of 5.5 error messages.
+#
+
+ER_CANNOT_LOAD_FROM_TABLE_V2
+        chi "不能从加载%s.%s。表可能损坏了"
+        eng "Cannot load from %s.%s. The table is probably corrupted"
+        ger "Kann %s.%s nicht einlesen. Tabelle ist wahrscheinlich beschädigt"
+        geo "%s.%s-დან ჩატვირთვის პრობლემა. ალბათ ცხრილი დაზიანებულია"
+        spa "No puedo cargar desde %s.%s. La tabla está probablemente estropeada"
+
+ER_MASTER_DELAY_VALUE_OUT_OF_RANGE
+        chi "主延迟的所需值%lu超过最大%lu"
+        eng "The requested value %lu for the master delay exceeds the maximum %lu"
+        spa "El valor requerido %lu para retraso en maestro (master) excede el máximo de %lu"
+ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT
+        chi "在Binlog语句中只允许Format_Description_Log_Event和行事件(但是提供了%s)"
+        eng "Only Format_description_log_event and row events are allowed in BINLOG statements (but %s was provided)"
+        spa "Sólo se permiten Format_description_log_event y eventos de fila en sentencias BINLOG (pero %s fue suministrado)"
+
+ER_PARTITION_EXCHANGE_DIFFERENT_OPTION
+        chi "分区和表之间的非匹配属性'%-.64s'"
+        eng "Non matching attribute '%-.64s' between partition and table"
+        spa "Atributo no coincidente '%-.64s' entre la partición y la tabla"
+        swe "Attributet '%-.64s' är olika mellan partition och tabell"
+ER_PARTITION_EXCHANGE_PART_TABLE
+        chi "用分区交换的表是分区:'%-.64s'"
+        eng "Table to exchange with partition is partitioned: '%-.64s'"
+        spa "La tabla para intercambiar con la partición está particionada: '%-.64s'"
+        swe "Tabellen att byta ut mot partition är partitionerad: '%-.64s'"
+ER_PARTITION_EXCHANGE_TEMP_TABLE
+        chi "与分区交换的表是临时的:'%-.64s'"
+        eng "Table to exchange with partition is temporary: '%-.64s'"
+        spa "La tabla para intercambiar con la partición es temporal: '%-.64s'"
+        swe "Tabellen att byta ut mot partition är temporär: '%-.64s'"
+ER_PARTITION_INSTEAD_OF_SUBPARTITION
+        chi "子分区表,使用子分区代替分区"
+        eng "Subpartitioned table, use subpartition instead of partition"
+        spa "La tabla subparticionada utiliza subpartición en lugar de partición"
+        swe "Subpartitionerad tabell, använd subpartition istället för partition"
+ER_UNKNOWN_PARTITION
+        chi "未知分区'%-.64s'在表'%-.64s'"
+        eng "Unknown partition '%-.64s' in table '%-.64s'"
+        spa "Partición desconocida '%-.64s' en la tabla '%-.64s'"
+        swe "Okänd partition '%-.64s' i tabell '%-.64s'"
+ER_TABLES_DIFFERENT_METADATA
+        chi "表有不同的定义"
+        eng "Tables have different definitions"
+        spa "Las tablas tienen diferentes definiciones"
+        swe "Tabellerna har olika definitioner"
+ER_ROW_DOES_NOT_MATCH_PARTITION
+        chi "找到了与分区不匹配的行"
+        eng "Found a row that does not match the partition"
+        spa "Hallada una fila que no coincide con la partición"
+        swe "Hittade en rad som inte passar i partitionen"
+ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX
+        chi "选项binlog_cache_size(%lu)大于max_binlog_cache_size(%lu);设置binlog_cache_size等于max_binlog_cache_size"
+        eng "Option binlog_cache_size (%lu) is greater than max_binlog_cache_size (%lu); setting binlog_cache_size equal to max_binlog_cache_size"
+        spa "La opción binlog_cache_size (%lu) es mayor que max_binlog_cache_size (%lu); configurando binlog_cache_size igual a max_binlog_cache_size"
+ER_WARN_INDEX_NOT_APPLICABLE
+        chi "不能使用%-.64s在索引'%-.64s'上的访问,由于字段'%-.64s”的类型或排序规则转换"
+        eng "Cannot use %-.64s access on index '%-.64s' due to type or collation conversion on field '%-.64s'"
+        spa "No puedo usar acceso %-.64s en índice '%-.64s' debido al tipo o conversión de cotejo en campo '%-.64s'"
+
+ER_PARTITION_EXCHANGE_FOREIGN_KEY
+        chi "与分区交换的表具有外键参考:'%-.64s'"
+        eng "Table to exchange with partition has foreign key references: '%-.64s'"
+        spa "La tabla para intercambiar con la partición tiene referencias a clave foránea: '%-.64s'"
+        swe "Tabellen att byta ut mot partition har foreign key referenser: '%-.64s'"
+ER_NO_SUCH_KEY_VALUE
+        chi "键值'%-.192s'在表'%-.192s%-.192s'不存在"
+        eng "Key value '%-.192s' was not found in table '%-.192s.%-.192s'"
+        spa "Valor de clave '%-.192s' no hallado en la tabla '%-.192s.%-.192s'"
+ER_VALUE_TOO_LONG
+        chi "'%s'的价值太长了"
+        eng "Too long value for '%s'"
+        spa "Valor demasiado largo para '%s'"
+ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE
+        chi "从网络读取时,复制事件校验和验证失败"
+        eng "Replication event checksum verification failed while reading from network"
+        spa "Ha fallado la verificación de la suma de revisión de evento de réplica mientras se leía desde la red"
+ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE
+        chi "从日志文件读取时复制事件校验和验证失败"
+        eng "Replication event checksum verification failed while reading from a log file"
+        spa "Ha fallado la verificación de la suma de revisión de evento de réplica mientras se leía desde fichero/archivo de historial (log)"
+
+ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX
+        chi "选项binlog_stmt_cache_size(%lu)大于max_binlog_stmt_cache_size(%lu);设置binlog_stmt_cache_size等于max_binlog_stmt_cache_size"
+        eng "Option binlog_stmt_cache_size (%lu) is greater than max_binlog_stmt_cache_size (%lu); setting binlog_stmt_cache_size equal to max_binlog_stmt_cache_size"
+        spa "La opción binlog_stmt_cache_size (%lu) es mayor que max_binlog_stmt_cache_size (%lu); configurando binlog_stmt_cache_size igual a max_binlog_stmt_cache_size"
+ER_CANT_UPDATE_TABLE_IN_CREATE_TABLE_SELECT
+        chi "无法更新表'%-.192s'正在创建'%-.192s'"
+        eng "Can't update table '%-.192s' while '%-.192s' is being created"
+        spa "No puedo actualizar tabla '%-.192s' mientras '%-.192s' está siendo creada"
+
+ER_PARTITION_CLAUSE_ON_NONPARTITIONED
+        chi "非分区表上的PARTITION()子句"
+        eng "PARTITION () clause on non partitioned table"
+        spa "Cláusula PARTITION () en tabla no particionada"
+        swe "PARTITION () klausul för en icke partitionerad tabell"
+ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET
+        chi "发现不匹配给定分区集的行"
+        eng "Found a row not matching the given partition set"
+        spa "Hallada una fila que no coincide con el conjunto dado de partición"
+        swe "Hittade en rad som inte passar i någon given partition"
+
+ER_UNUSED_5
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+
+ER_CHANGE_RPL_INFO_REPOSITORY_FAILURE
+        chi "更改Replication存储库类型时失败:%s"
+        eng "Failure while changing the type of replication repository: %s"
+        spa "Fallo mientras cambiaba el tipo de repositorio de réplica: %s"
+
+ER_WARNING_NOT_COMPLETE_ROLLBACK_WITH_CREATED_TEMP_TABLE
+        chi "无法回滚一些临时表的创建"
+        eng "The creation of some temporary tables could not be rolled back"
+        spa "La creación de algunas tablas temporales no se pudo retroceder (ROLLBACK)"
+ER_WARNING_NOT_COMPLETE_ROLLBACK_WITH_DROPPED_TEMP_TABLE
+        chi "一些临时表被删除,但这些操作无法回滚"
+        eng "Some temporary tables were dropped, but these operations could not be rolled back"
+        spa "Algunas tablas temporales fueron eliminadas, pero estas operaciones no se pudieron retroceder (ROLLBACK)"
+
+ER_MTS_FEATURE_IS_NOT_SUPPORTED
+        chi "%s不支持多线程从模式。%s."
+        eng "%s is not supported in multi-threaded slave mode. %s"
+        geo "დამორჩილებული სერვერის მრავალკანადიან რეჟიმში %s მხარდაჭერილი არაა. %s"
+        spa "%s no se soporta en modo esclavo multi-hilo (thread). %s"
+ER_MTS_UPDATED_DBS_GREATER_MAX
+        chi "修改的数据库的数量超过了最大%d;数据库名称不会包含在Replication事件元数据中"
+        eng "The number of modified databases exceeds the maximum %d; the database names will not be included in the replication event metadata"
+        spa "El núermo de bases de datos modificadas excede el máximo de %d; los nombres de base de datos no serán incluidos en los metadatos de eventos de réplica"
+ER_MTS_CANT_PARALLEL
+        chi "无法以并行模式执行当前事件组。遇到事件%s,中继日志名称%s,position%s,它防止并行模式执行此事件组。原因:%s"
+        eng "Cannot execute the current event group in the parallel mode. Encountered event %s, relay-log name %s, position %s which prevents execution of this event group in parallel mode. Reason: %s"
+        spa "No puedo ejecutar el grupo de eventos actual en modo paralelo. Encontrado evento %s, nombre de historial (log) de reenvío %s, posición %s que previene la ejecución de este grupo de eventos en modo paralelo. Motivo: %s"
+ER_MTS_INCONSISTENT_DATA
+        eng "%s"
+        geo "%s"
+
+ER_FULLTEXT_NOT_SUPPORTED_WITH_PARTITIONING
+        chi "分区表不支持FullText索引"
+        eng "FULLTEXT index is not supported for partitioned tables"
+        geo "დაყოფილ ცხრილებზე FULLTEXT ინდექსი მხარდაჭერილი არაა"
+        spa "El índice FULLTEXT no está soportado para tablas particionadas"
+        swe "FULLTEXT index stöds ej för partitionerade tabeller"
+
+ER_DA_INVALID_CONDITION_NUMBER 35000
+        chi "无效条件号"
+        eng "Invalid condition number"
+        geo "პირობის არასწორი ნომერი"
+        por "Número de condição inválido"
+        spa "Número inválido de condición"
+
+ER_INSECURE_PLAIN_TEXT
+        chi "在没有SSL/TLS的纯文本中发送密码非常不安全"
+        eng "Sending passwords in plain text without SSL/TLS is extremely insecure"
+        spa "Enviar contraseñas en texto plano sin SSL/TLS es extremadamente inseguro"
+
+ER_INSECURE_CHANGE_MASTER
+        chi "在Master.Info存储库中存储MariaDB用户名或密码信息不安全,因此不建议使用。有关此问题和可能的替代方案,请参阅MariaDB手册"
+        eng "Storing MariaDB user name or password information in the master.info repository is not secure and is therefore not recommended. Please see the MariaDB Manual for more about this issue and possible alternatives"
+        spa "Almacenar nombre de usuario de MariaDB o información de contraseña en el repositorio master.info no es seguro y por ello no se recomienda. Por favor, mira el manual de MariaDB para saber más acerca de este asunto y sus posibles alternativas"
+
+ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO 23000 S1009
+        chi "表'%.192s'的外键约束,记录'%-.192s'会导致表'%.192s'中的重复条目,键'%.192s'"
+        eng "Foreign key constraint for table '%.192s', record '%-.192s' would lead to a duplicate entry in table '%.192s', key '%.192s'"
+        ger "Fremdschlüssel-Beschränkung für Tabelle '%.192s', Datensatz '%-.192s' würde zu einem doppelten Eintrag in Tabelle '%.192s', Schlüssel '%.192s' führen"
+        spa "La restricción de clave foránea para tabla '%.192s', registro '%-.192s' llevaría a una entrada duplicada en la tabla '%.192s', clave '%.192s'"
+        swe "FOREIGN KEY constraint för tabell '%.192s', posten '%-.192s' kan inte uppdatera barntabell '%.192s' på grund av nyckel '%.192s'"
+
+ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO 23000 S1009
+        chi "表'%.192s'的外键约束,记录'%-.192s'会导致子表中的重复条目"
+        eng "Foreign key constraint for table '%.192s', record '%-.192s' would lead to a duplicate entry in a child table"
+        ger "Fremdschlüssel-Beschränkung für Tabelle '%.192s', Datensatz '%-.192s' würde zu einem doppelten Eintrag in einer Kind-Tabelle führen"
+        spa "La restricción de clave foránea para tabla '%.192s', registro '%-.192s' llevaría a una entrada duplicada en una tabla hija"
+        swe "FOREIGN KEY constraint för tabell '%.192s', posten '%-.192s' kan inte uppdatera en barntabell på grund av UNIQUE-test"
+
+ER_SQLTHREAD_WITH_SECURE_SLAVE
+        chi "仅在启动从SQL线程时无法设置身份验证选项"
+        eng "Setting authentication options is not possible when only the Slave SQL Thread is being started"
+        spa "Configurar opciones de autenticación no es posible si sólo se ha arrancado el Hilo (thread) SQL Esclavo"
+
+ER_TABLE_HAS_NO_FT
+        chi "该表没有全文索引来支持此查询"
+        eng "The table does not have FULLTEXT index to support this query"
+        geo "ამ მოთხოვნის მხარდასაჭერად ცხრილს FULLTEXT ინდექსი არ გააჩნია"
+        spa "La tabla no tiene índice FULLTEXT que soporte esta consulta (query)"
+
+ER_VARIABLE_NOT_SETTABLE_IN_SF_OR_TRIGGER
+        chi "无法在存储的函数或触发器中设置系统变量%.200s"
+        eng "The system variable %.200s cannot be set in stored functions or triggers"
+        geo "სისტემური ცვლადის %.200s მნიშვნელობას დამახსოვრებული ფუნქციებიდან და ტრიგერებიდან ვერ დააყენებთ"
+        spa "La variable de sistema %.200s no se puede poner en funciones almacenadas o disparadores"
+
+ER_VARIABLE_NOT_SETTABLE_IN_TRANSACTION
+        chi "持续交易时,无法设置系统变量%.200s"
+        eng "The system variable %.200s cannot be set when there is an ongoing transaction"
+        spa "La variable de sistema %.200s no se puede poner si hay una transacción en curso"
+
+ER_GTID_NEXT_IS_NOT_IN_GTID_NEXT_LIST
+        chi "系统变量@@session.gtid_next具有值%.200s,该值未在@@session.gtid_next_list中列出"
+        eng "The system variable @@SESSION.GTID_NEXT has the value %.200s, which is not listed in @@SESSION.GTID_NEXT_LIST"
+        spa "La variable de sistema @@SESSION.GTID_NEXT tiene el valor %.200s, el cual no está listado en @@SESSION.GTID_NEXT_LIST"
+
+ER_CANT_CHANGE_GTID_NEXT_IN_TRANSACTION_WHEN_GTID_NEXT_LIST_IS_NULL
+        chi "当@@session.gtid_next_list == null时,系统变量@@session.gtid_next无法在事务内更改"
+        eng "When @@SESSION.GTID_NEXT_LIST == NULL, the system variable @@SESSION.GTID_NEXT cannot change inside a transaction"
+        spa "Cuando @@SESSION.GTID_NEXT_LIST == NULL, la variable de sistema @@SESSION.GTID_NEXT no puede cambiar dentro de una transacción"
+
+ER_SET_STATEMENT_CANNOT_INVOKE_FUNCTION
+        chi "语句'SET %.200s'无法调用存储的函数"
+        eng "The statement 'SET %.200s' cannot invoke a stored function"
+        spa "La sentencia 'SET %.200s' no puede invocar una función almacenada"
+
+ER_GTID_NEXT_CANT_BE_AUTOMATIC_IF_GTID_NEXT_LIST_IS_NON_NULL
+        chi "系统变量@@sessient.gtid_next不能是'自动'@@sessient.gtid_next_list非null时"
+        eng "The system variable @@SESSION.GTID_NEXT cannot be 'AUTOMATIC' when @@SESSION.GTID_NEXT_LIST is non-NULL"
+        spa "La variable de sistema @@SESSION.GTID_NEXT no puede ser 'AUTOMATIC' si la @@SESSION.GTID_NEXT_LIST es no-NULL"
+
+ER_SKIPPING_LOGGED_TRANSACTION
+        chi "跳过事务%.200s,因为它已经被执行和记录"
+        eng "Skipping transaction %.200s because it has already been executed and logged"
+        spa "Saltando transacción %.200s porque ya ha sido ejecutada y puesta en historial (log)"
+
+ER_MALFORMED_GTID_SET_SPECIFICATION
+        chi "畸形GTID设置规范'%.200s'"
+        eng "Malformed GTID set specification '%.200s'"
+        spa "GTID malformado pone especificación '%.200s'"
+
+ER_MALFORMED_GTID_SET_ENCODING
+        chi "格式错误的GTID集编码"
+        eng "Malformed GTID set encoding"
+        spa "GTID malformado pone codificación"
+
+ER_MALFORMED_GTID_SPECIFICATION
+        chi "畸形GTID规范'%.200s'"
+        eng "Malformed GTID specification '%.200s'"
+        geo "GTID-ის არასწორი სპეციფიკაცია '%.200s'"
+        spa "GTID malformado especificación '%.200s'"
+
+ER_GNO_EXHAUSTED
+        chi "无法生成全局事务标识符:整数组件达到了最大值。用新server_uuId重新启动服务器"
+        eng "Impossible to generate Global Transaction Identifier: the integer component reached the maximal value. Restart the server with a new server_uuid"
+        spa "Imposible generar Identificador Global de Transacción: el componente entero alcanzó el máximo valor. Rearranque el servidor con un nuevo server_uuid"
+
+ER_BAD_SLAVE_AUTO_POSITION
+        chi "当MASTER_AUTO_POSITION处于活动状态时,无法设置参数MASTER_LOG_FILE,MASTER_LOG_POS,RELAY_LOG_FILE和RELAY_LOG_POS"
+        eng "Parameters MASTER_LOG_FILE, MASTER_LOG_POS, RELAY_LOG_FILE and RELAY_LOG_POS cannot be set when MASTER_AUTO_POSITION is active"
+        spa "Los parámetros MASTER_LOG_FILE, MASTER_LOG_POS, RELAY_LOG_FILE y RELAY_LOG_POS no pueden ser puestos cuando MASTER_AUTO_POSITION esté activo"
+
+ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON
+        chi "CHANGE MASTER TO MASTER_AUTO_POSITION = 1只能在GTID_MODE = ON上执行"
+        eng "CHANGE MASTER TO MASTER_AUTO_POSITION = 1 can only be executed when GTID_MODE = ON"
+        spa "CHANGE MASTER TO MASTER_AUTO_POSITION = 1 sólo se puede ejecutar cuando GTID_MODE = ON"
+
+ER_CANT_DO_IMPLICIT_COMMIT_IN_TRX_WHEN_GTID_NEXT_IS_SET
+        chi "当GTID_Next != AUTOMATIC 或GTID_NEXT_LIST != NULL时,无法在事务中执行语句"
+        eng "Cannot execute statements with implicit commit inside a transaction when GTID_NEXT != AUTOMATIC or GTID_NEXT_LIST != NULL"
+        spa "No puedo ejecutar sentencias con acometidas (commit) implícitas dentro de una transacción cuando GTID_NEXT != AUTOMATIC o GTID_NEXT_LIST != NULL"
+
+ER_GTID_MODE_2_OR_3_REQUIRES_ENFORCE_GTID_CONSISTENCY_ON
+        chi "GTID_MODE = ON或GTID_MODE = UPGRADE_STEP_2需要ENFORCE_GTID_CONSISTY = 1"
+        eng "GTID_MODE = ON or GTID_MODE = UPGRADE_STEP_2 requires ENFORCE_GTID_CONSISTENCY = 1"
+        spa "GTID_MODE = ON o GTID_MODE = UPGRADE_STEP_2 requiere que ENFORCE_GTID_CONSISTENCY = 1"
+
+ER_GTID_MODE_REQUIRES_BINLOG
+        chi "GTID_MODE = ON或UPGRADE_STEP_1或UPGRADE_STEP_2需要--log-bin和-log-slave-updates"
+        eng "GTID_MODE = ON or UPGRADE_STEP_1 or UPGRADE_STEP_2 requires --log-bin and --log-slave-updates"
+        spa "GTID_MODE = ON o UPGRADE_STEP_1 o UPGRADE_STEP_2 requiere --log-bin y --log-slave-updates"
+
+ER_CANT_SET_GTID_NEXT_TO_GTID_WHEN_GTID_MODE_IS_OFF
+        chi "GTID_NEXT无法设置为UUID:NUMBER 当GTID_MODE = OFF"
+        eng "GTID_NEXT cannot be set to UUID:NUMBER when GTID_MODE = OFF"
+        spa "GTID_NEXT no se puede poner a UUID:NUMBER cuando GTID_MODE = OFF"
+
+ER_CANT_SET_GTID_NEXT_TO_ANONYMOUS_WHEN_GTID_MODE_IS_ON
+        chi "GTID_NEXT无法在当GTID_MODE = ON上时设置为ANONYMOUS"
+        eng "GTID_NEXT cannot be set to ANONYMOUS when GTID_MODE = ON"
+        spa "GTID_NEXT no se puede poner como ANONYMOUS cuando GTID_MODE = ON"
+
+ER_CANT_SET_GTID_NEXT_LIST_TO_NON_NULL_WHEN_GTID_MODE_IS_OFF
+        chi "GTID_NEXT_LIST无法设置为非空值当GTID_MODE = OFF"
+        eng "GTID_NEXT_LIST cannot be set to a non-NULL value when GTID_MODE = OFF"
+        spa "GTID_NEXT_LIST no se puede poner como valor no-NULL cuando GTID_MODE = OFF"
+
+ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF
+        chi "找到一个Gtid_log_event或Previous_gtids_log_event,当gtid_mode = OFF时"
+        eng "Found a Gtid_log_event or Previous_gtids_log_event when GTID_MODE = OFF"
+        spa "Hallado un Gtid_log_event o Previous_gtids_log_event cuando GTID_MODE = OFF"
+
+ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE
+        chi "当Enforce_gtid_consistenty = 1时,对非事务性表的更新只能在Autocomated语句或单一语句事务中完成,而不是在与事务表中的更新相同的语句中"
+        eng "When ENFORCE_GTID_CONSISTENCY = 1, updates to non-transactional tables can only be done in either autocommitted statements or single-statement transactions, and never in the same statement as updates to transactional tables"
+        spa "Cuando ENFORCE_GTID_CONSISTENCY = 1, las actualiaciones a tablas no transaccionales sólo se puede hacer o bien en sentencias autoacometidas o en transacciones de sentencias simples y nunca dentro de la misma sentencia como actualizaciones a tablas transaccionales"
+
+ER_GTID_UNSAFE_CREATE_SELECT
+        chi "CREATE TABLE...SELECT在ENFORCE_GTID_CONSISTENCY = 1时被禁止"
+        eng "CREATE TABLE ... SELECT is forbidden when ENFORCE_GTID_CONSISTENCY = 1"
+        spa "CREATE TABLE ... SELECT está prohibido si ENFORCE_GTID_CONSISTENCY = 1"
+
+ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION
+        chi "当ENFORCE_GTID_CONSISTENCY = 1时,语句CREATE TEMPORARY TABL和DROP TEMPORARY TABLE,只能在非事务性上下文中执行,并且要求autocommit = 1"
+        eng "When ENFORCE_GTID_CONSISTENCY = 1, the statements CREATE TEMPORARY TABLE and DROP TEMPORARY TABLE can be executed in a non-transactional context only, and require that AUTOCOMMIT = 1"
+        spa "Cuando ENFORCE_GTID_CONSISTENCY = 1, las sentencias CREATE TEMPORARY TABLE y DROP TEMPORARY TABLE pueden ser ejecutadas sólo en contextos no-transaccionales y requieren que AUTOCOMMIT = 1"
+
+ER_GTID_MODE_CAN_ONLY_CHANGE_ONE_STEP_AT_A_TIME
+        chi "GTID_MODE的值只能一次更改一步:OFF<-> UPGRODE_STEP_1 <-> UPGRODE_STEP_2 <-> ON。另请注意,此值必须在所有服务器上同时上升或下降;有关说明,请参阅手册。“"
+        eng "The value of GTID_MODE can only change one step at a time: OFF <-> UPGRADE_STEP_1 <-> UPGRADE_STEP_2 <-> ON. Also note that this value must be stepped up or down simultaneously on all servers; see the Manual for instructions." 
+        spa "El valor de GTID_MODE sólo puede cambiar un paso a la vez: OFF <-> UPGRADE_STEP_1 <-> UPGRADE_STEP_2 <-> ON. También ten en cuenta que este valor debe de ser aumentado y disminuido simultaneamente en todos los servidores; mira el Manual para instrucciones."
+
+ER_MASTER_HAS_PURGED_REQUIRED_GTIDS
+        chi "从机更改主站到Master_Auto_Position = 1,因此主站连接,但主设备已清除了slave需要的GTID的二进制日志"
+        eng "The slave is connecting using CHANGE MASTER TO MASTER_AUTO_POSITION = 1, but the master has purged binary logs containing GTIDs that the slave requires"
+        spa "El esclavo se está conectando usando CHANGE MASTER TO MASTER_AUTO_POSITION = 1, pero el maestro (master) ha purgado los historiales (logs) binarios que contienen GTIDs requeridos por el esclavo"
+
+ER_CANT_SET_GTID_NEXT_WHEN_OWNING_GTID
+        chi "无法由拥有GTID的客户端更改GTID_NEXT。客户拥有%s。所有权在提交或回滚上发布"
+        eng "GTID_NEXT cannot be changed by a client that owns a GTID. The client owns %s. Ownership is released on COMMIT or ROLLBACK"
+        spa "GTID_NEXT no se puede cambiar por un cliente que posee un GTID. El cliente posee %s. La propiedad se libera con COMMIT o ROLLBACK"
+
+ER_UNKNOWN_EXPLAIN_FORMAT
+        chi "未知%s格式名称:'%s'"
+        eng "Unknown %s format name: '%s'"
+        geo "უცნობი ფორმატის სახელი ბრძანებისთვის %s: '%s'"
+        rus "Неизвестное имя формата команды %s: '%s'"
+        spa "Nombre de formato %s desconocido: '%s'"
+
+ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION 25006
+        chi "无法在只读事务中执行语句"
+        eng "Cannot execute statement in a READ ONLY transaction"
+        spa "No puedo ejecutar sentencia en una transacción READ ONLY"
+
+ER_TOO_LONG_TABLE_PARTITION_COMMENT
+        chi "表分区的评论'%-.64s'太长(max =%lu)"
+        eng "Comment for table partition '%-.64s' is too long (max = %lu)"
+        spa "El comentario para partición de tabla '%-.64s' es demasido largo (máx = %lu)"
+
+ER_SLAVE_CONFIGURATION
+        chi "从站未配置或未能正确初始化。您必须至少set --server-id以启用主站或从站。可以在MariaDB错误日志中找到其他错误消息"
+        eng "Slave is not configured or failed to initialize properly. You must at least set --server-id to enable either a master or a slave. Additional error messages can be found in the MariaDB error log"
+        spa "El esclavo no está configurado o falló al inicializarse de forma adecuada. Vd debe de poner al menos --server-id para activar o bien un maestro (master) o un esclavo. Mensajes de error adicionales pueden ser hallados en historial (log) de errores de MariaDB"
+
+ER_INNODB_FT_LIMIT
+        chi "InnoDB目前一次支持一个全文索引创建"
+        eng "InnoDB presently supports one FULLTEXT index creation at a time"
+        spa "Actualmente InnoDB soporta la creación de un índice FULLTEXT a la vez"
+
+ER_INNODB_NO_FT_TEMP_TABLE
+        chi "无法在临时InnoDB表上创建FullText索引"
+        eng "Cannot create FULLTEXT index on temporary InnoDB table"
+        spa "No puedo crear índice FULLTEXT en tabla temporaria InnoDB"
+
+ER_INNODB_FT_WRONG_DOCID_COLUMN
+        chi "列'%-.192s'是innodb fulltext索引的错误类型"
+        eng "Column '%-.192s' is of wrong type for an InnoDB FULLTEXT index"
+        spa "La columna '%-.192s' es de tipo equivocado para un índice InnoDB FULLTEXT"
+
+ER_INNODB_FT_WRONG_DOCID_INDEX
+        chi "InnoDB全文索引的索引'%-.192s'是错误的类型错误"
+        eng "Index '%-.192s' is of wrong type for an InnoDB FULLTEXT index"
+        spa "El índice '%-.192s' es de tipo equivocado para un índice InnoDB FULLTEXT"
+
+ER_INNODB_ONLINE_LOG_TOO_BIG
+        chi "创建索引'%-.192s'所需的多于'innodb_online_alter_log_max_size'字节的修改日志。请再试一次"
+        eng "Creating index '%-.192s' required more than 'innodb_online_alter_log_max_size' bytes of modification log. Please try again"
+        spa "La creación de índice '%-.192s' requirió más de 'innodb_online_alter_log_max_size' bytes de historial (log) de modificaciones. Por favor, pruebe otra vez"
+
+ER_UNKNOWN_ALTER_ALGORITHM
+        chi "未知算法'%s'"
+        eng "Unknown ALGORITHM '%s'"
+        geo "უცნობი ALGORITHM '%s'"
+        spa "ALGORITHM desconocido '%s'"
+
+ER_UNKNOWN_ALTER_LOCK
+        chi "未知锁定类型'%s'"
+        eng "Unknown LOCK type '%s'"
+        geo "უცნობი LOCK-ის ტიპი '%s'"
+        spa "Tipo de LOCK desconocido '%s'"
+
+ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS
+        chi "当从站因为错误停止或以MTS模式终止时,不呢执行CHANGE MASTER。考虑使用RESET SLAVE或START SLAVE UNTIL"
+        eng "CHANGE MASTER cannot be executed when the slave was stopped with an error or killed in MTS mode. Consider using RESET SLAVE or START SLAVE UNTIL"
+        spa "CHANGE MASTER no se puede ejecutar cuando se ha parado el esclavo con un error o matado en modo MTS. Considere el usar RESET SLAVE o START SLAVE UNTIL"
+
+ER_MTS_RECOVERY_FAILURE
+        chi "从并行执行模式下的从站错误后无法恢复。可以在MariaDB错误日志中找到其他错误消息"
+        eng "Cannot recover after SLAVE errored out in parallel execution mode. Additional error messages can be found in the MariaDB error log"
+        spa "No puedo recuperar después de que SLAVE diera error en modo paralelo de ejecución. Mensajes de error adicionales se pueden hallar en el historial (log) de error de MariaDB"
+
+ER_MTS_RESET_WORKERS
+        chi "无法清理工作者信息表。可以在MariaDB错误日志中找到其他错误消息"
+        eng "Cannot clean up worker info tables. Additional error messages can be found in the MariaDB error log"
+        spa "No puedo limpiar tablas de información de trabajador. Mensajes de error adicionales se pueden hallar en el historial (log) de error de MariaDB"
+
+ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2
+        chi "列数为%s.%s是错误的。预期的%d,找到%d。表可能损坏了"
+        eng "Column count of %s.%s is wrong. Expected %d, found %d. The table is probably corrupted"
+        ger "Spaltenanzahl von %s.%s falsch. %d erwartet, aber %d gefunden. Tabelle ist wahrscheinlich beschädigt"
+        spa "El contador de columnas %s.%s está equivocado. Se esperaba %d, hallado %d. La tabla está probablemente estropeada"
+
+ER_SLAVE_SILENT_RETRY_TRANSACTION
+        chi "从站必须静默地重试当前事务"
+        eng "Slave must silently retry current transaction"
+        spa "El esclavo debe de reintentar silenciosamente la transacción en curso"
+
+ER_UNUSED_22
+        eng "You should never see it"
+        geo "ეს არასდროს უნდა დაინახოთ"
+        spa "Nunca debería vd de ver esto"
+
+ER_TABLE_SCHEMA_MISMATCH
+        chi "架构不匹配(%s)"
+        eng "Schema mismatch (%s)"
+        geo "სქემა არ ემთხვევა (%s)"
+        spa "Esquema no coincide (%s)"
+
+ER_TABLE_IN_SYSTEM_TABLESPACE
+        chi "表%-.192s在系统表空间中"
+        eng "Table %-.192s in system tablespace"
+        spa "Tabla %-.192s en espacio de tablas del sitema"
+
+ER_IO_READ_ERROR
+        chi "IO读取错误:(%lu,%s)%s"
+        eng "IO Read error: (%lu, %s) %s"
+        geo "IO წაკითხვის შეცდომა: (%lu, %s) %s"
+        spa "Error de Lectura de E/S: (%lu, %s) %s"
+
+ER_IO_WRITE_ERROR
+        chi "IO写错错误:(%lu,%s)%s"
+        eng "IO Write error: (%lu, %s) %s"
+        geo "IO ჩაწერის შეცდომა: (%lu, %s) %s"
+        spa "Error de Escritura de E/S: (%lu, %s) %s"
+
+ER_TABLESPACE_MISSING
+        chi "表空间缺少表'%-.192s'"
+        eng "Tablespace is missing for table '%-.192s'"
+        spa "Espacio de tabla falta para la tabla '%-.192s'"
+
+ER_TABLESPACE_EXISTS
+        chi "表格'%-.192s'的表空间。请在导入之前丢弃表空间"
+        eng "Tablespace for table '%-.192s' exists. Please DISCARD the tablespace before IMPORT"
+        spa "Existe Espacio de tabla para la tabla '%-.192s'. Por favor, haga DISCARD del espacio de tabla antes de hacer IMPORT"
+
+ER_TABLESPACE_DISCARDED
+        chi "表空间已被丢弃为表%`s"
+        eng "Tablespace has been discarded for table %`s"
+        spa "El espacio de tabla ha sido descartado para la tabla %`s"
+
+ER_INTERNAL_ERROR
+        chi "内部错误:%-.192s"
+        eng "Internal error: %-.192s"
+        geo "შიდა შეცდომა: %-.192s"
+        spa "Error interno: %-.192s"
+
+ER_INNODB_IMPORT_ERROR
+        chi "ALTER TABLE '%-.192s' IMPORT TABLESPACE 失败,错误%lu:'%s'"
+        eng "ALTER TABLE '%-.192s' IMPORT TABLESPACE failed with error %lu : '%s'"
+        geo "ALTER TABLE '%-.192s' IMPORT TABLESPACE დასრულდა შეცდომით %lu : '%s'"
+        spa "ALTER TABLE '%-.192s' IMPORT TABLESPACE ha fallado con error %lu : '%s'"
+
+ER_INNODB_INDEX_CORRUPT
+        chi "索引损坏:%s"
+        eng "Index corrupt: %s"
+        geo "ინდექსი დაზიანებულია: %s"
+        spa "Índice corrupto: %s"
+
+ER_INVALID_YEAR_COLUMN_LENGTH
+        chi "已弃用YEAR(%lu)列类型。创建YEAR(4)列代替"
+        eng "YEAR(%lu) column type is deprecated. Creating YEAR(4) column instead"
+        rus "Тип YEAR(%lu) более не поддерживается, вместо него будет создана колонка с типом YEAR(4)"
+        spa "El tipo de columna YEAR(%lu) está obsoleto. Creando columna YEAR(4) en su lugar"
+
+ER_NOT_VALID_PASSWORD
+        eng "Your password does not satisfy the current policy requirements (%s)"
+        geo "თქვენი პაროლი მიმდინარე პოლიტიკის მოთხოვნებს არ აკმაყოფილებს (%s)"
+        ukr "Ваш пароль не відповідає поточним правилам (%s)"
+
+ER_MUST_CHANGE_PASSWORD
+        bgn "Трябва първо да си смените паролата със SET PASSWORD за да можете да изпълните тази команда"
+        chi "您必须在执行此语句之前设置密码"
+        eng "You must SET PASSWORD before executing this statement"
+        geo "ამ გამოსახულების გაშვებამდე SET PASSWORD უნდა გაუშვათ"
+        rum "Trebuie sa iti schimbi parola folosind SET PASSWORD inainte de a executa aceasta comanda"
+        spa "Debe vd de poner SET PASSWORD antes de ejecutar esta sentencia"
+
+ER_FK_NO_INDEX_CHILD
+        chi "无法添加外键约束。外表'%s'中的约束'%s'缺少索引"
+        eng "Failed to add the foreign key constraint. Missing index for constraint '%s' in the foreign table '%s'"
+        spa "No pude añadir la restricción de clave foránea. Falta índice para restricción '%s' en la tabla foránea '%s'"
+
+ER_FK_NO_INDEX_PARENT
+        chi "无法添加外键约束。引用的表'%s'中的约束'%s'缺少索引"
+        eng "Failed to add the foreign key constraint. Missing index for constraint '%s' in the referenced table '%s'"
+        spa "No pude añadir la restricción de clave foránea. Falta índice para restricción '%s' en la tabla referenciada '%s'"
+
+ER_FK_FAIL_ADD_SYSTEM
+        chi "无法将外键约束'%s'添加到系统表"
+        eng "Failed to add the foreign key constraint '%s' to system tables"
+        spa "No pude añadir la restricción de clave foránea '%s' a las tablas del sistema"
+
+ER_FK_CANNOT_OPEN_PARENT
+        chi "无法打开引用的表'%s'"
+        eng "Failed to open the referenced table '%s'"
+        spa "No pude abrir la tabla referenciada '%s'"
+
+ER_FK_INCORRECT_OPTION
+        chi "无法在表'%s'上添加外键约束。外键约束'%s'中的选项不正确"
+        eng "Failed to add the foreign key constraint on table '%s'. Incorrect options in FOREIGN KEY constraint '%s'"
+        spa "No pude añadir restricción de clave foránea en la tabla '%s'. Opciones incorrectas en restricción FOREIGN KEY '%s'"
+
+ER_DUP_CONSTRAINT_NAME
+        chi "重复%s约束名称'%s'"
+        eng "Duplicate %s constraint name '%s'"
+        spa "Duplicada restricción %s llamada '%s'"
+
+ER_PASSWORD_FORMAT
+        chi "密码哈希没有预期的格式。检查密码()函数是否使用正确的密码算法"
+        eng "The password hash doesn't have the expected format. Check if the correct password algorithm is being used with the PASSWORD() function"
+        spa "El cálculo de contraseña no tiene el formato esperado. Revise si se está usando el algoritmo correcto de contraseña con la función PASSWORD()"
+
+ER_FK_COLUMN_CANNOT_DROP
+        chi "无法删除'%-.192s'列:在外部索引约束'%-.192s'中需要"
+        eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s'"
+        ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' benötigt"
+        spa "No puedo eliminar la columna '%-.192s': necesaria en restricción de clave foránea '%-.192s'"
+
+ER_FK_COLUMN_CANNOT_DROP_CHILD
+        chi "无法删除列'%-.192s':在外键约束'%-.192s'中需要,表%-.192s"
+        eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s' of table %-.192s"
+        ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' der Tabelle %-.192s benötigt"
+        spa "No puedo eliminar la columna '%-.192s': necesaria en restriccón de clave foránea '%-.192s' de la tabla %-.192s"
+
+ER_FK_COLUMN_NOT_NULL
+        chi "列'%-.192s'不能没有null:在外键约束'%-.192s'设置为null"
+        eng "Column '%-.192s' cannot be NOT NULL: needed in a foreign key constraint '%-.192s' SET NULL"
+        ger "Spalte '%-.192s' kann nicht NOT NULL sein: wird für eine Fremdschlüsselbeschränkung '%-.192s' SET NULL benötigt"
+        spa "La columna '%-.192s' no puede ser NOT NULL: necesaria en restricción de clave foránea '%-.192s' SET NULL"
+
+ER_DUP_INDEX
+        chi "重复索引%`s。这已弃用,将在未来的版本中不允许"
+        eng "Duplicate index %`s. This is deprecated and will be disallowed in a future release"
+        spa "Índice duplicado %`s. Éste está obsoleto y será quitado en entregas futuras"
+
+ER_FK_COLUMN_CANNOT_CHANGE
+        chi "无法更改列'%-.192s':用于外部键约束'%-.192s'"
+        eng "Cannot change column '%-.192s': used in a foreign key constraint '%-.192s'"
+        spa "No puedo cambiar la columna '%-.192s': usada en una restricción de clave foránea '%-.192s'"
+
+ER_FK_COLUMN_CANNOT_CHANGE_CHILD
+        chi "无法更改列'%-.192s':用于在外部键约束'%-.192s'的表'%-.192s'"
+        eng "Cannot change column '%-.192s': used in a foreign key constraint '%-.192s' of table '%-.192s'"
+        spa "No puedo cambiar la columna '%-.192s': usada en restricción de clave foránea '%-.192s' de la tabla '%-.192s'"
+
+ER_FK_CANNOT_DELETE_PARENT
+        chi "无法从表中删除来自表中的父级的表中的行'%-.192s”表'%-.192s'"
+        eng "Cannot delete rows from table which is parent in a foreign key constraint '%-.192s' of table '%-.192s'"
+        spa "No puedo borrar filas de la tabla que es padre en restricción de clave foránea '%-.192s' de la tabla '%-.192s'"
+
+ER_MALFORMED_PACKET
+        chi "畸形通信包"
+        eng "Malformed communication packet"
+        geo "კომუნიკაციის არასწორი პაკეტი"
+        spa "Paquete de comunicación malformado"
+
+ER_READ_ONLY_MODE
+        chi "以只读模式运行"
+        eng "Running in read-only mode"
+        geo "გაშვებულია მხოლოდ-წაკითხვის რეჟიმში"
+        spa "Ejecutando em modo sólo-lectura"
+
+ER_GTID_NEXT_TYPE_UNDEFINED_GROUP
+        chi "当GTID_NEXT设置为GTID时,必须在提交或回滚后立即将其再次设置。如果在从SQL线程中看到此错误消息,则表示当前事务中的表是在主站和从站上的非交易的事务性。在客户端连接中,它意味着您在事务之前执行SET GTID_NEXT并忘记将GTID_NEXT设置为不同的标识符或在提交或回滚后“自动”。当前gtid_next是'%s'"
+        eng "When GTID_NEXT is set to a GTID, you must explicitly set it again after a COMMIT or ROLLBACK. If you see this error message in the slave SQL thread, it means that a table in the current transaction is transactional on the master and non-transactional on the slave. In a client connection, it means that you executed SET GTID_NEXT before a transaction and forgot to set GTID_NEXT to a different identifier or to 'AUTOMATIC' after COMMIT or ROLLBACK. Current GTID_NEXT is '%s'"
+        spa "Cuando GTID_NEXT se pone a GTID, debe vd de ponerlo de nuevo de forma explícita tras un COMMIT o ROLLBACK. Si vd ve este mensaje de error en el hilo (thread) de SQL esclavo, indica que una tabla de la transacción en curso es transaccional en el maestro (master) y no transaccional en el esclavo. En una conexión cliente, indica que has ejecutado SET GTID_NEXT antes de una transacción y has olvidado poner GTID_NEXT a un identificador diferente o a 'AUTOMATIC' tras COMMIT o ROLLBACK. El GTID_NEXT actual es '%s'"
+
+ER_VARIABLE_NOT_SETTABLE_IN_SP
+        chi "无法在存储过程中设置系统变量%.200s"
+        eng "The system variable %.200s cannot be set in stored procedures"
+        spa "La variable de sistema %.200s no se puede poner en procedimentos almacenados"
+
+ER_CANT_SET_GTID_PURGED_WHEN_GTID_MODE_IS_OFF
+        chi "只能在GTID_MODE = ON设置GTID_PURGED"
+        eng "GTID_PURGED can only be set when GTID_MODE = ON"
+        spa "GTID_PURGED sólo se puede usar cuando GTID_MODE = ON"
+
+ER_CANT_SET_GTID_PURGED_WHEN_GTID_EXECUTED_IS_NOT_EMPTY
+        chi "只有在GTID_EXECUTED为空时才可以设置GTID_PURGED"
+        eng "GTID_PURGED can only be set when GTID_EXECUTED is empty"
+        spa "GTID_PURGED sólo se puede poner cuando GTID_EXECUTED está vacío"
+
+ER_CANT_SET_GTID_PURGED_WHEN_OWNED_GTIDS_IS_NOT_EMPTY
+        chi "只有在没有持续的事务时才可以设置GTID_PURGED(即使在其他客户端中不)"
+        eng "GTID_PURGED can only be set when there are no ongoing transactions (not even in other clients)"
+        spa "GTID_PURGED sólo se puede poner cuando no hay trasacciones en curso (ni incluso en otros clientes)"
+
+ER_GTID_PURGED_WAS_CHANGED
+        chi "GTID_PURGED从'%s'更改为'%s'"
+        eng "GTID_PURGED was changed from '%s' to '%s'"
+        geo "GTID_PURGED შეიცვალა '%s'-დან '%s'-ზე"
+        spa "GTID_PURGED se cambió de '%s' a '%s'"
+
+ER_GTID_EXECUTED_WAS_CHANGED
+        chi "GTID_EXECUTE从'%s'更改为'%s'"
+        eng "GTID_EXECUTED was changed from '%s' to '%s'"
+        geo "GTID_EXECUTED შეიცვალა '%s'-დან '%s'-ზე"
+        spa "GTID_EXECUTED se cambió de '%s' a '%s'"
+
+ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES
+        chi "无法执行语句:由于BINLOG_FORMAT = STATEMENT,因此无法写入二进制日志,并将复制和非复制表写入"
+        eng "Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT, and both replicated and non replicated tables are written to"
+        spa "No puedo ejecutar sentencia: imposible escribir en historial (log) binario desde BINLOG_FORMAT = STATEMENT y en tablas replicadas y no replicadas"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED 0A000
+        chi "此操作不支持%s。试试%s"
+        eng "%s is not supported for this operation. Try %s"
+        spa "%s no está soportado para esta operación. Pruebe %s"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON 0A000
+        chi "不支持%s。原因:%s。试试%s"
+        eng "%s is not supported. Reason: %s. Try %s"
+        spa "%s no está soportado. Motivo: %s. Pruebe %s"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COPY
+        chi "复制算法需要锁定"
+        eng "COPY algorithm requires a lock"
+        spa "El algoritmo de COPY requiere de un bloqueo"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_PARTITION
+        chi "分区特定操作尚不支持锁定/算法"
+        eng "Partition specific operations do not yet support LOCK/ALGORITHM"
+        spa "Las operaciones específicas de partición aún no soportan LOCK/ALGORITHM"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME
+        chi "参与外键的列被更名"
+        eng "Columns participating in a foreign key are renamed"
+        spa "Las columnas que participan en una clave foránea son renombradas"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE
+        chi "无法更改列类型"
+        eng "Cannot change column type"
+        spa "No puedo cambiar el tipo de la columna"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK
+        chi "添加外键需要figner_key_checks = OFF"
+        eng "Adding foreign keys needs foreign_key_checks=OFF"
+        spa "El añadir claves foráneas necesita de foreign_key_checks=OFF"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE
+        chi "使用忽略创建唯一索引需要复制算法删除重复行"
+        eng "Creating unique indexes with IGNORE requires COPY algorithm to remove duplicate rows"
+        spa "El crear ídices únicos con IGNORE requiere del algoritmo COPY para quitar filas duplicadas"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK
+        chi "不允许删除主键,而不添加新的主键"
+        eng "Dropping a primary key is not allowed without also adding a new primary key"
+        spa "Eliminar una clave primaria no está permitido sin añadir también una nueva clave primaria"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC
+        chi "添加自动增量列需要锁定"
+        eng "Adding an auto-increment column requires a lock"
+        spa "Para añadir una columna auto-incrementable se requiere de un bloqueo"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS
+        chi "无法使用用户可见的替换隐藏的FTS_DOC_ID"
+        eng "Cannot replace hidden FTS_DOC_ID with a user-visible one"
+        spa "No puedo reemplazar FTS_DOC_ID oculta con una visible-por-usuario"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS
+        chi "无法删除或重命名FTS_DOC_ID"
+        eng "Cannot drop or rename FTS_DOC_ID"
+        spa "No puedo eliminar o renombrar FTS_DOC_ID"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS
+        chi "fulltext索引创建需要锁定"
+        eng "Fulltext index creation requires a lock"
+        spa "La creación de un índice Fulltext requiere de un bloqueo"
+
+ER_SQL_SLAVE_SKIP_COUNTER_NOT_SETTABLE_IN_GTID_MODE
+        chi "使用GTID_Mode = ON运行时,无法设置SQL_SLAVE_SKIP_COUNTER。相反,对于要跳过的每个事务,使用与事务相同的GTID生成空事务"
+        eng "sql_slave_skip_counter can not be set when the server is running with GTID_MODE = ON. Instead, for each transaction that you want to skip, generate an empty transaction with the same GTID as the transaction"
+        spa "sql_slave_skip_counter no se puede poner cuando el servidor se ejecuta con GTID_MODE = ON. En su lugar, para cada transacción que desees saltar, genera una transacción vacía con el mismo GTID que la transacción"
+
+ER_DUP_UNKNOWN_IN_INDEX 23000
+        chi "索引的重复条目'%-.192s'"
+        cze "Zdvojený klíč (číslo klíče '%-.192s')"
+        dan "Flere ens nøgler for indeks '%-.192s'"
+        eng "Duplicate entry for key '%-.192s'"
+        est "Kattuv väärtus võtmele '%-.192s'"
+        fre "Duplicata du champ pour la clef '%-.192s'"
+        ger "Doppelter Eintrag für Schlüssel '%-.192s'"
+        geo "დუბლირებული ჩანაწერი გასაღებისთვის '%-.192s'"
+        greek "Διπλή εγγραφή για το κλειδί '%-.192s'"
+        hun "Duplikalt bejegyzes a '%-.192s' kulcs szerint"
+        ita "Valore duplicato per la chiave '%-.192s'"
+        jpn "は索引 '%-.192s' で重複しています。"
+        kor "중복된 입력 값: key '%-.192s'"
+        nla "Dubbele ingang voor zoeksleutel '%-.192s'"
+        nor "Like verdier for nøkkel '%-.192s'"
+        norwegian-ny "Like verdiar for nykkel '%-.192s'"
+        pol "Powtórzone wystąpienie dla klucza '%-.192s'"
+        por "Entrada duplicada para a chave '%-.192s'"
+        rum "Cimpul e duplicat pentru cheia '%-.192s'"
+        rus "Дублирующаяся запись по ключу '%-.192s'"
+        serbian "Dupliran unos za ključ '%-.192s'"
+        slo "Opakovaný kľúč (číslo kľúča '%-.192s')"
+        spa "Entrada duplicada para la clave '%-.192s'"
+        swe "Dublett för nyckel '%-.192s'"
+        ukr "Дублюючий запис для ключа '%-.192s'"
+
+ER_IDENT_CAUSES_TOO_LONG_PATH
+        chi "对象的长数据库名称和标识符导致路径长度超过%d字符。路径:'%s'"
+        eng "Long database name and identifier for object resulted in path length exceeding %d characters. Path: '%s'"
+        spa "Nombre largo de base de datos e identificador para objeto resultó en que el tamaño de la ruta excedió de %d caracteres. Ruta: '%s'"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL
+        chi "无法将null转换为非常量默认值"
+        eng "cannot convert NULL to non-constant DEFAULT"
+        spa "no puedo convertir NULL a DEFAULT no-constante"
+
+ER_MUST_CHANGE_PASSWORD_LOGIN
+        bgn "Паролата ви е изтекла. За да влезете трябва да я смените използвайки клиент който поддрържа такива пароли"
+        chi "您的密码已过期。要登录您必须使用支持过期密码的客户端更改它"
+        eng "Your password has expired. To log in you must change it using a client that supports expired passwords"
+        rum "Parola ta a expirat. Pentru a te loga, trebuie sa o schimbi folosind un client ce suporta parole expirate"
+        spa "Su contraseña ha expirado. Para ingresar, vd debe de cambiarla usando un cliente que soporte contraseñas expiradas"
+
+ER_ROW_IN_WRONG_PARTITION
+        chi "在错误分区%s中找到了一行"
+        eng "Found a row in wrong partition %s"
+        spa "Hallada una fila en partición equivocada %s"
+        swe "Hittade en rad i fel partition %s"
+
+ER_MTS_EVENT_BIGGER_PENDING_JOBS_SIZE_MAX
+        chi "无法安排事件%s,中继日志名称%s,position%s对工作线程,因为它的大小%lu超过了slave_pending_jobs_size_max (%lu)"
+        eng "Cannot schedule event %s, relay-log name %s, position %s to Worker thread because its size %lu exceeds %lu of slave_pending_jobs_size_max"
+        spa "No puedo organizar evento %s, nombre de historial (log)-reenvío %s, posiciona %s a hilo (thread) de Trabajador porque su tamaño %lu excede %lu de slave_pending_jobs_size_max"
+
+ER_INNODB_NO_FT_USES_PARSER
+        chi "无法在InnoDB表上CREATE FULLTEXT INDEX WITH PARSER"
+        eng "Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table"
+        spa "No puedo CREATE FULLTEXT INDEX WITH PARSER en tabla InnoDB"
+ER_BINLOG_LOGICAL_CORRUPTION
+        chi "二进制日志文件'%s'逻辑损坏:%s"
+        eng "The binary log file '%s' is logically corrupted: %s"
+        spa "El fichero/archivo de historial (log) binario '%s' está lógicamente corrupto: %s"
+
+ER_WARN_PURGE_LOG_IN_USE
+        chi "未清除文件%s,因为它被%d线程读取,只清除%d文件中的%d"
+        eng "file %s was not purged because it was being read by %d thread(s), purged only %d out of %d files"
+        spa "el fchero %s no se ha purgado porque estaba siendo leído por hilo(s) (thread) %d, purgado sólo %d de %d ficheros/archivos"
+
+ER_WARN_PURGE_LOG_IS_ACTIVE
+        chi "文件%s未清除,因为它是活动日志文件"
+        eng "file %s was not purged because it is the active log file"
+        spa "el fichero/archivo %s no fue purgado porque es el fichero/archivo activo de historial (log)"
+
+ER_AUTO_INCREMENT_CONFLICT
+        chi "更新中的自动增量值与内部生成的值冲突"
+        eng "Auto-increment value in UPDATE conflicts with internally generated values"
+        spa "Valor de Auto-incremento en UPDATE está en conflicto con valores generados internamente"
+WARN_ON_BLOCKHOLE_IN_RBR
+        chi "未记录行事件的%s语句,该语句以行格式修改BlackHole表。表:'%-.192s'"
+        eng "Row events are not logged for %s statements that modify BLACKHOLE tables in row format. Table(s): '%-.192s'"
+        spa "Los eventos de fila no son puestos en historial (log) para sentencias %s que modifican tablas BLACKHOLE en formato de fila. Tabla(s): '%-.192s'"
+
+ER_SLAVE_MI_INIT_REPOSITORY
+        chi "从设备无法从存储库初始化主信息结构"
+        eng "Slave failed to initialize master info structure from the repository"
+        spa "El esclavo falló al inicializar información de estructura del maestro (master) desde el repositorio"
+
+ER_SLAVE_RLI_INIT_REPOSITORY
+        chi "从站无法从存储库初始化中继日志信息结构"
+        eng "Slave failed to initialize relay log info structure from the repository"
+        spa "El esclavo falló al inicializar estructura de información de historial (log) de reenvío desde el repositorio"
+
+ER_ACCESS_DENIED_CHANGE_USER_ERROR 28000 
+        bgn "Отказан достъп при опит за смяна към потребител %-.48s'@'%-.64s' (използвана парола: %s). Затваряне на връзката"
+        chi "访问拒绝尝试更改为用户'%-.48s'@'%-.64s'(使用密码:%s)。断开连接"
+        eng "Access denied trying to change to user '%-.48s'@'%-.64s' (using password: %s). Disconnecting"
+        spa "Acceso denegado intentando cambiar a usuario '%-.48s'@'%-.64s' (usando contraseña: %s). Desconectando"
+
+ER_INNODB_READ_ONLY
+        chi "innodb是只读模式"
+        eng "InnoDB is in read only mode"
+        hindi "InnoDB केवल READ-ONLY मोड में है"
+        spa "InnoDB está en modo de sólo lectura"
+
+ER_STOP_SLAVE_SQL_THREAD_TIMEOUT
+        chi "STOP SLAVE命令执行不完整:从SQL线程获得停止信号,线程正忙,一旦当前任务完成后,SQL线程将停止"
+        eng "STOP SLAVE command execution is incomplete: Slave SQL thread got the stop signal, thread is busy, SQL thread will stop once the current task is complete"
+        spa "La ejecución del comando STOP SLAVE está incompleta: El hilo (thread) de SQL esclavo recibió la señal de parada, hilo (thread) ocupado, el hilo (thread) SQL se parará una vez que se complete la tarea actual"
+
+ER_STOP_SLAVE_IO_THREAD_TIMEOUT
+        chi "STOP SLAVE命令执行不完整:从机动程线程得到停止信号,线程很忙,一旦当前任务完成后,IO线程将停止。"
+        eng "STOP SLAVE command execution is incomplete: Slave IO thread got the stop signal, thread is busy, IO thread will stop once the current task is complete"
+        spa "La ejecución del comando STOP SLAVE está incompleta: El hilo (thread) de E/S esclavo recibió la señal de parada, hilo (thread) ocupado, el hilo (thread) de E/S se parará una vez que se complete la tarea actual"
+
+ER_TABLE_CORRUPT
+        chi "无法执行操作。表'%-.64s。%-.64s'丢失,损坏或包含不良数据"
+        eng "Operation cannot be performed. The table '%-.64s.%-.64s' is missing, corrupt or contains bad data"
+        spa "La operación no se puede realizar. Falta la tabla '%-.64s.%-.64s', está corrupta o contiene datos malos"
+
+ER_TEMP_FILE_WRITE_FAILURE
+        chi "临时文件写入失败"
+        eng "Temporary file write failure"
+        spa "Fallo al escribir fichero/archivo temporal"
+
+ER_INNODB_FT_AUX_NOT_HEX_ID
+        chi "升级索引名称失败,请使用创建索引(ALTER TABLE)算法复制来重建索引"
+        eng "Upgrade index name failed, please use create index(alter table) algorithm copy to rebuild index"
+        spa "Falló la mejora de nombre de índice. Por favor, use una copia del algoritmo de create index(alter table) para reconstruir el índice"
+#
+#
+# MariaDB error messages section starts here
+
+
+# The following is here to allow us to detect if there was missing
+# error messages in the errmsg.sys file
+
+ER_LAST_MYSQL_ERROR_MESSAGE
+        eng ""
+
+# MariaDB error numbers starts from 1900
+start-error-number 1900
+
+ER_UNUSED_18
+        eng "You should never see it"
+ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED
+        chi "函数或表达式'%s'不能用于%s的%`s"
+        eng "Function or expression '%s' cannot be used in the %s clause of %`s"
+        spa "La Función o expresión '%s' no se puede usar en la cláusula %s de %`s"
+ER_UNUSED_19
+        eng "You should never see it"
+ER_PRIMARY_KEY_BASED_ON_GENERATED_COLUMN
+        chi "主键无法在生成的列上定义"
+        eng "Primary key cannot be defined upon a generated column"
+        spa "La clave primaria no se puede definir sobre una columna generada"
+ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN
+        chi "无法在虚拟生成的列上定义键/索引"
+        eng "Key/Index cannot be defined on a virtual generated column"
+        spa "Key/Index no se puede definir en una columna virtual generada"
+ER_WRONG_FK_OPTION_FOR_GENERATED_COLUMN
+        chi "无法在生成的列上定义%s子句的外键"
+        eng "Cannot define foreign key with %s clause on a generated column"
+        spa "No puedo definir clave foránea con cláusula %s en una columna generada"
+ER_WARNING_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN
+        chi "忽略了表'%s'中为生成的列'%s'指定的值已被忽略"
+        eng "The value specified for generated column '%s' in table '%s' has been ignored"
+        spa "El valor especificado para columna generada '%s' en la tabla '%s' ha sido ignorado"
+ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN
+        chi "生成的列尚未支持这一点"
+        eng "This is not yet supported for generated columns"
+        spa "Esto no está aún soportado para columnas generadas"
+ER_UNUSED_20
+        eng "You should never see it"
+ER_UNUSED_21
+        eng "You should never see it"
+ER_UNSUPPORTED_ENGINE_FOR_GENERATED_COLUMNS
+        chi "%s存储引擎不支持生成的列"
+        eng "%s storage engine does not support generated columns"
+        hindi "स्टोरेज इंजन %s COMPUTED कॉलम्स को सपोर्ट नहीं करता"
+        spa "El motor de almacenaje %s no soporta columnas generadas"
+ER_UNKNOWN_OPTION
+        chi "未知选项'%-.64s'"
+        eng "Unknown option '%-.64s'"
+        hindi "अज्ञात विकल्प '%-.64s'"
+        spa "Opción desconocida '%-.64s'"
+ER_BAD_OPTION_VALUE
+        chi "值不正确'%-.64T'选项'%-.64s'"
+        eng "Incorrect value '%-.64T' for option '%-.64s'"
+        hindi "गलत मान '%-.64T' विकल्प '%-.64s' के लिए"
+        spa "Valor incorrecto '%-.64T' para opción '%-.64s'"
+ER_UNUSED_6
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+ER_UNUSED_7
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+ER_UNUSED_8
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+ER_DATA_OVERFLOW 22003
+        chi "转换'%-.128s'到%-.32s时溢出。值截断"
+        eng "Got overflow when converting '%-.128s' to %-.32s. Value truncated"
+        spa "Obtenido desbordamiento al convertir '%-.128s' a %-.32s. Valor truncado"
+ER_DATA_TRUNCATED 22003
+        chi "转换'%-.128s'到%-.32s时值截断"
+        eng "Truncated value '%-.128s' when converting to %-.32s"
+        spa "Valor truncado '%-.128s' al convertir a %-.32s"
+ER_BAD_DATA 22007
+        chi "非法值'%-.128s',在转换%-.32s时遇到"
+        eng "Encountered illegal value '%-.128s' when converting to %-.32s"
+        spa "Encontrado valor ilegal '%-.128s' al convertir a %-.32s"
+ER_DYN_COL_WRONG_FORMAT
+        chi "遇到非法格式的动态列字符串"
+        eng "Encountered illegal format of dynamic column string"
+        spa "Encontrado formato ilegal de cadena en columna dinámica"
+ER_DYN_COL_IMPLEMENTATION_LIMIT
+        chi "达到动态列实现限制"
+        eng "Dynamic column implementation limit reached"
+        spa "Alcanzado límite de implementación de columna dinámica"
+ER_DYN_COL_DATA 22007
+        chi "非法值用作动态列函数的参数"
+        eng "Illegal value used as argument of dynamic column function"
+        spa "Valor ilegal usado como argumento de función de columna dinámica"
+ER_DYN_COL_WRONG_CHARSET
+        chi "动态列包含未知字符集"
+        eng "Dynamic column contains unknown character set"
+        spa "Columna dinámica contiene conjunto desconocido de caracteres"
+ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
+        chi "“in_to_exists”或'materialization'optimizer_switch标志中的至少一个必须是'开启'"
+        eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'"
+        hindi "कम से कम 'in_to_exists' या 'materialization' optimizer_switch फ्लैग 'ON' होना चाहिए"
+        spa "Al menos una de las banderas de optimizer_switch 'in_to_exists' o 'materialization' debe de estar a 'on'"
+ER_QUERY_CACHE_IS_DISABLED
+        chi "查询缓存已禁用(调整大小或类似命令正在进行中);稍后重复此命令"
+        eng "Query cache is disabled (resize or similar command in progress); repeat this command later"
+        spa "Caché de consulta (query) desactivada (el comando ajustar o similar está en proceso); repite este comando más tarde"
+ER_QUERY_CACHE_IS_GLOBALY_DISABLED
+        chi "查询缓存全局禁用,您无法为此会话启用它"
+        eng "Query cache is globally disabled and you can't enable it only for this session"
+        hindi "क्वेरी कैश ग्लोबल स्तर पर DISABLED है और आप इसे केवल सत्र के लिए ENABLE नहीं कर सकते"
+        spa "La caché de consulta (query) está desactivada de forma global y no puede activarla sólo para esta sesión"
+ER_VIEW_ORDERBY_IGNORED
+        chi "查看'%-.192s'.'%-.192s's ORDER BY子句被忽略,因为还有其他ORDER BY子句"
+        eng "View '%-.192s'.'%-.192s' ORDER BY clause ignored because there is other ORDER BY clause already"
+        spa "Cláusula de vista '%-.192s'.'%-.192s' ORDER BY ignorada porque ya hay otra clásula ORDER BY"
+ER_CONNECTION_KILLED 70100 
+        chi "连接被杀死"
+        eng "Connection was killed"
+        hindi "कनेक्शन को समाप्त कर दिया गया है"
+        spa "La conexión fue matada"
+ER_UNUSED_12
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SKIP_REPLICATION
+        chi "无法修改事务中的@@session.skip_replication"
+        eng "Cannot modify @@session.skip_replication inside a transaction"
+        spa "No puedo modificar @@session.skip_replication dentro de una transacción"
+ER_STORED_FUNCTION_PREVENTS_SWITCH_SKIP_REPLICATION
+        chi "无法修改存储函数或触发器内的@@session.skip_replication"
+        eng "Cannot modify @@session.skip_replication inside a stored function or trigger"
+        spa "No puedo modificar @@session.skip_replication dentro de una función almacenada o disparador"
+ER_QUERY_RESULT_INCOMPLETE
+        eng "Query execution was interrupted. The query exceeded %s %llu. The query result may be incomplete"
+ER_NO_SUCH_TABLE_IN_ENGINE 42S02 
+        chi "表'%-.192s.%-.192s'在引擎中不存在"
+        eng "Table '%-.192s.%-.192s' doesn't exist in engine"
+        hindi "टेबल '%-.192s.%-.192s' इंजन में मौजूद नहीं है"
+        spa "La tabla '%-.192s.%-.192s' no existe en el motor"
+        swe "Det finns ingen tabell som heter '%-.192s.%-.192s' i handlern"
+ER_TARGET_NOT_EXPLAINABLE
+        eng "Target is not executing an operation with a query plan"
+        chi "目标未运行可解释的命令"
+        spa "El objetivo no está ejecutando una operación con un plan de consulta (query)"
+ER_CONNECTION_ALREADY_EXISTS
+        chi "连接'%.*s'与现有连接'%.*s'冲突"
+        eng "Connection '%.*s' conflicts with existing connection '%.*s'"
+        spa "La conexión '%.*s' está en conflicto con la conexión existente '%.*s'"
+ER_MASTER_LOG_PREFIX
+        chi "Master'%.*s':"
+        eng "Master '%.*s': "
+        geo "მთავარი სერვერი '%.*s': "
+        spa "Maestro (master) '%.*s': "
+ER_CANT_START_STOP_SLAVE
+        chi "不能%sSLAVE'%.*s'"
+        eng "Can't %s SLAVE '%.*s'"
+        spa "No puedo %s ESCLAVO '%.*s'"
+ER_SLAVE_STARTED
+        chi "SLAVE '%.*s'开始了"
+        eng "SLAVE '%.*s' started"
+        geo "SLAVE '%.*s' გაეშვა"
+        spa "ESCLAVO '%.*s' arrancado"
+ER_SLAVE_STOPPED
+        chi "slave'%.*s'停止了"
+        eng "SLAVE '%.*s' stopped"
+        geo "SLAVE '%.*s' გაჩერდა"
+        spa "ESCLAVO '%.*s' parado"
+ER_SQL_DISCOVER_ERROR
+        chi "引擎%s无法发现表%`-.192s.%`-.192s,'%s'"
+        eng "Engine %s failed to discover table %`-.192s.%`-.192s with '%s'"
+        spa "El motor %s no pudo descubr la tabla %`-.192s.%`-.192s con '%s'"
+ER_FAILED_GTID_STATE_INIT
+        chi "初始化复制GTID状态失败"
+        eng "Failed initializing replication GTID state"
+        spa "Fallo inicializando estado de réplica GTID"
+ER_INCORRECT_GTID_STATE
+        chi "无法解析GTID列表"
+        eng "Could not parse GTID list"
+        spa "No pude analizar la lista GTID"
+ER_CANNOT_UPDATE_GTID_STATE
+        chi "无法更新Replication Slave GTID状态"
+        eng "Could not update replication slave gtid state"
+        spa "No pude actualizar estado gtid de esclavo de réplica"
+ER_DUPLICATE_GTID_DOMAIN
+        chi "GTID %u-%u-%llu和%u-%u-%llu冲突(重复域ID%u)"
+        eng "GTID %u-%u-%llu and %u-%u-%llu conflict (duplicate domain id %u)"
+        spa "Conflicto GTID %u-%u-%llu y %u-%u-%llu (duplicado id de dominio %u)"
+ER_GTID_OPEN_TABLE_FAILED
+        chi "未能打开%s.%s"
+        eng "Failed to open %s.%s"
+        ger "Öffnen von %s.%s fehlgeschlagen"
+        geo "%s.%s-ის გახსნის შეცდომა"
+        spa "No pude abrir %s.%s"
+ER_GTID_POSITION_NOT_FOUND_IN_BINLOG
+        chi "连接从站请求从GTID%u-%u-%llu开始,这不在Master的Binlog中"
+        eng "Connecting slave requested to start from GTID %u-%u-%llu, which is not in the master's binlog"
+	spa "Se ha requerido que conectar esclavo arranque desde GTID %u-%u-%llu, el cual no está en el binlog del maestro (master)"
+ER_CANNOT_LOAD_SLAVE_GTID_STATE
+        chi "无法从表%s中加载Replication Slave GTID位置。%s"
+        eng "Failed to load replication slave GTID position from table %s.%s"
+	spa "No pude cargar posición GTID de esclavo de réplica desde la tabla %s.%s"
+ER_MASTER_GTID_POS_CONFLICTS_WITH_BINLOG
+        chi "指定的GTID%u-%u-%llu与二进制日志冲突,其中包含更新的GTID%u-%u-%llu。如果使用master_gtid_pos = current_pos,则Binlog位置将覆盖@@gtid_slave_pos的新值"
+        eng "Specified GTID %u-%u-%llu conflicts with the binary log which contains a more recent GTID %u-%u-%llu. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos"
+	spa "El especificado GTID %u-%u-%llu está en conflicto con el historial (log) binario el cual contiene un más reciente GTID %u-%u-%llu. Si se usa MASTER_GTID_POS=CURRENT_POS, la posición de binlog sobreescribirá el nuevo valor de @@gtid_slave_pos"
+ER_MASTER_GTID_POS_MISSING_DOMAIN
+        chi "指定值为@@gtid_slave_pos不包含复制域%u的值。这与二进制日志冲突,其中包含gtid%u-%u-%llu。如果使用master_gtid_pos = current_pos,则Binlog位置将覆盖@@ gtid_slave_pos的新值"
+        eng "Specified value for @@gtid_slave_pos contains no value for replication domain %u. This conflicts with the binary log which contains GTID %u-%u-%llu. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos"
+	spa "El valor especificado para @@gtid_slave_pos no contiene valor para dominio de réplica %u. Esto está en conflicto con el historial (log) binario el cual contiene un GTID %u-%u-%llu. Si se usa MASTER_GTID_POS=CURRENT_POS, la posición binlog sobreescribirá el nuevo valor de @@gtid_slave_pos"
+ER_UNTIL_REQUIRES_USING_GTID
+        chi "启动从站,直到master_gtid_pos要求从站使用gtid"
+        eng "START SLAVE UNTIL master_gtid_pos requires that slave is using GTID"
+	spa "START SLAVE UNTIL master_gtid_pos requiere que esclavo esté usando GTID"
+ER_GTID_STRICT_OUT_OF_ORDER
+        chi "尝试对Binlog GTID%u-%u-%llu进行,这将创建具有现有GTID%u-%u-%llu的订单无序序列号,并且启用了GTID严格模式"
+        eng "An attempt was made to binlog GTID %u-%u-%llu which would create an out-of-order sequence number with existing GTID %u-%u-%llu, and gtid strict mode is enabled"
+	spa "Se ha intentado hacer binlog de GTID %u-%u-%llu lo cual crearía un número de secuencia fuera de orden con el existente GTID %u-%u-%llu y está activado gtid en modo estricto"
+ER_GTID_START_FROM_BINLOG_HOLE
+        chi "主机上的Binlog缺少从站所需请求的GTID%u-%u-%llu(即使存在后续的序列号),并启用GTID严格模式"
+        eng "The binlog on the master is missing the GTID %u-%u-%llu requested by the slave (even though a subsequent sequence number does exist), and GTID strict mode is enabled"
+	spa "Al binlog del maestro (master) le falta el GTID %u-%u-%llu requerido por el esclavo (incluso aunque existe un número posterior de secuencia) y está activado GTID en modo estricto"
+ER_SLAVE_UNEXPECTED_MASTER_SWITCH
+        chi "重新连接后,从master收到意外的GTID。这通常表示在不重新启动从线程的情况下替换主服务器。%s."
+        eng "Unexpected GTID received from master after reconnect. This normally indicates that the master server was replaced without restarting the slave threads. %s"
+	spa "Se ha recibido un GTID inesperado desde el maestro (master) tras reconectar. Esto indica normalmente que el servidor maestro (master) ha sido reemplazado sin rearrancar los hilos (threads) del esclavo. %s"
+ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO
+        chi "无法修改@@sessient.gtid_domain_id或@@session.gtid_seq_no"
+        eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a transaction"
+        spa "No puedo modificar @@session.gtid_domain_id o @@session.gtid_seq_no dentro de una transacción"
+ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO
+        chi "无法修改@@sessient.gtid_domain_id或@@session.gtid_seq_no或触发器"
+        eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger"
+        spa "No puedo modificar @@session.gtid_domain_id o @@session.gtid_seq_no dentro de una función almacenada o de un disparador"
+ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2
+        chi "连接从站请求从GTID%u-%u-%llu开始,这不在Master的Binlog中。由于Master的Binlog包含具有更高序列号的GTID,因此它可能意味着由于执行额外错误的交易,因此slave已经分歧"
+        eng "Connecting slave requested to start from GTID %u-%u-%llu, which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged due to executing extra erroneous transactions"
+	spa "Se ha requerido arrancar la conexión a esclavo desde GTID %u-%u-%llu, el cual no está en el binlog del maestro (master). Ya que el binlog del maestro (master) contiene GTIDs con números mayores de secuencia, es probable que indique que el esclavo diverge debido a ejecutar transacciones extra erróneas"
+ER_BINLOG_MUST_BE_EMPTY
+        chi "如果已将任何GTID记录到二进制日志,则不允许此操作。首先运行RESET MASTER擦除日志"
+        eng "This operation is not allowed if any GTID has been logged to the binary log. Run RESET MASTER first to erase the log"
+	spa "Esta operación no está permitida si cualquier GTID ha sido puesto en historial (log) binario. Ejecuta primero RESET MASTER para borrar el historial (log)"
+ER_NO_SUCH_QUERY
+        chi "未知查询ID:%lld"
+        eng "Unknown query id: %lld"
+        ger "Unbekannte Abfrage-ID: %lld"
+        geo "უცნობი მოთხოვნის id: %lld"
+        hindi "अज्ञात क्वेरी ID: %lld"
+        rus "Неизвестный номер запроса: %lld"
+        spa "Id desconocido de consulta (query): %lld"
+ER_BAD_BASE64_DATA
+        chi "错误Base64数据作为位置%u"
+        eng "Bad base64 data as position %u"
+	spa "Datos base64 malos en posición %u"
+ER_INVALID_ROLE OP000
+        chi "无效的角色规范%`s"
+        eng "Invalid role specification %`s"
+        geo "არასწორი როლის აღწერა %`s"
+        hindi "अमान्य रोल विनिर्देश %`s"
+        rum "Rolul %`s este invalid"
+        spa "Especificación inválida de rol %`s"
+ER_INVALID_CURRENT_USER 0L000
+        chi "当前用户无效"
+        eng "The current user is invalid"
+        hindi "वर्तमान यूज़र अमान्य है"
+        rum "Utilizatorul curent este invalid"
+	spa "El usuario en curso no es válido"
+ER_CANNOT_GRANT_ROLE
+        chi "无法将角色'%s'授予:%s"
+        eng "Cannot grant role '%s' to: %s"
+        hindi "रोल '%s', %s को प्रदान नहीं कर सकते"
+        rum "Rolul '%s' nu poate fi acordat catre: %s"
+	spa "No puedo conceder rol '%s' a: %s"
+ER_CANNOT_REVOKE_ROLE
+        chi "无法撤消来自:%s的角色'%s'"
+        eng "Cannot revoke role '%s' from: %s"
+        hindi "रोल '%s', %s से हटाया नहीं जा सका"
+        rum "Rolul '%s' nu poate fi revocat de la: %s"
+	spa "No puedo revocar rol '%s' desde: %s"
+ER_CHANGE_SLAVE_PARALLEL_THREADS_ACTIVE
+        chi "无法更改@@slave_parallel_threads,而另一个更改正在进行中"
+        eng "Cannot change @@slave_parallel_threads while another change is in progress"
+	spa "No puedo cambiar @@slave_parallel_threads mientras otro cambio esté en proceso"
+ER_PRIOR_COMMIT_FAILED
+        chi "由于早期提交的失败取决于依赖于哪个依赖性,提交失败"
+        eng "Commit failed due to failure of an earlier commit on which this one depends"
+	spa "Ha fallado la acometida (commit) debido a un fallo previo en acometida (commit) de la que depende ésta"
+ER_IT_IS_A_VIEW 42S02
+        chi "'%-.192s'是一个VIEW"
+        eng "'%-.192s' is a view"
+        hindi "'%-.192s' एक VIEW है"
+        spa "'%-.192s' es una vista"
+ER_SLAVE_SKIP_NOT_IN_GTID
+        chi "使用并行复制和带有多个复制域的GTID时,无法使用@@SQL_SLAVE_SKIP_COUNTER。相反,可以使用明确设置@@gtid_slave_pos以在给定的gtid位置之后跳到"
+        eng "When using parallel replication and GTID with multiple replication domains, @@sql_slave_skip_counter can not be used. Instead, setting @@gtid_slave_pos explicitly can be used to skip to after a given GTID position"
+	spa "Al usar réplica paralela y GTID con múltiples dominios de réplica, no se puede usar @@sql_slave_skip_counter. En su lugar, poner @@gtid_slave_pos de forma explícita se puede usar para saltar tras una posición GTID dada"
+ER_TABLE_DEFINITION_TOO_BIG
+        chi "表%`s的定义太大了"
+        eng "The definition for table %`s is too big"
+        hindi "टेबल %`s की परिभाषा बहुत बड़ी है"
+        spa "La definición para la tabla %`s es demasiado larga"
+ER_PLUGIN_INSTALLED
+        chi "插件'%-.192s'已安装"
+        eng "Plugin '%-.192s' already installed"
+        hindi "प्लग-इन '%-.192s' पहले से ही इन्स्टॉल्ड है"
+        rus "Плагин '%-.192s' уже установлен"
+        spa "Ya instalado el enchufe (plugin) '%-.192s'"
+ER_STATEMENT_TIMEOUT 70100
+        chi "查询执行中断(超出MAX_STATEMENT_TIME)"
+        eng "Query execution was interrupted (max_statement_time exceeded)"
+        spa "Se ha interrumpido la ejecución de una consulta (query) (excedido max_statement_time)
+ER_SUBQUERIES_NOT_SUPPORTED 42000
+        chi "%s不支持子查询或存储的函数"
+        eng "%s does not support subqueries or stored functions"
+        spa "%s no soporta subconsultas (subqueries) o funciones almacenadas"
+ER_SET_STATEMENT_NOT_SUPPORTED 42000
+        chi "系统变量%.200s无法在set语句中设置。“"
+        eng "The system variable %.200s cannot be set in SET STATEMENT." 
+        spa "La variable del sistema %.200s no se puede poner en SET STATEMENT." 
+ER_UNUSED_9
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+ER_USER_CREATE_EXISTS
+        chi "无法创建用户'%-.64s'@'%-.64s';它已经存在"
+        eng "Can't create user '%-.64s'@'%-.64s'; it already exists"
+        hindi "यूज़र '%-.64s'@'%-.64s' को नहीं बना सकते; यह पहले से ही मौजूद है"
+        spa "No puedo crear usuario '%-.64s'@'%-.64s'; ya existe"
+ER_USER_DROP_EXISTS
+        chi "无法删除用户'%-.64s'@'%-.64s';它不存在"
+        eng "Can't drop user '%-.64s'@'%-.64s'; it doesn't exist"
+        hindi "यूज़र '%-.64s'@'%-.64s' को ड्रॉप नहीं कर सकते; यह मौजूद नहीं है"
+        spa "No puedo eliminar usuario '%-.64s'@'%-.64s'; no existe"
+ER_ROLE_CREATE_EXISTS
+        chi "无法创建角色'%-.64s';它已经存在"
+        eng "Can't create role '%-.64s'; it already exists"
+        hindi "रोल '%-.64s' को नहीं बना सकते; यह पहले से ही मौजूद है"
+        spa "No puedo crear rol '%-.64s'; ya existe"
+ER_ROLE_DROP_EXISTS
+        chi "无法删除'%-.64s'。它不存在"
+        eng "Can't drop role '%-.64s'; it doesn't exist"
+        hindi "रोल '%-.64s' को ड्रॉप नहीं कर सकते; यह मौजूद नहीं है"
+        spa "No puedo eliminar rol '%-.64s'; no existe"
+ER_CANNOT_CONVERT_CHARACTER
+        chi "无法将'%s'字符0x转换为0x%-.64s到'%s'"
+        eng "Cannot convert '%s' character 0x%-.64s to '%s'"
+        spa "No puedo convertir '%s' carácter 0x%-.64s a '%s'"
+ER_INVALID_DEFAULT_VALUE_FOR_FIELD  22007
+        chi "列的默认值不正确'%-.128T' '%.192s'"
+        eng "Incorrect default value '%-.128T' for column '%.192s'"
+        hindi "गलत डिफ़ॉल्ट मान '%-.128T' कॉलम '%.192s' के लिए"
+        spa "Valor por defecto incorrecto '%-.128T' para columna '%.192s'"
+ER_KILL_QUERY_DENIED_ERROR
+        chi "你不是查询%lld的所有者"
+        eng "You are not owner of query %lld"
+        ger "Sie sind nicht Eigentümer von Abfrage %lld"
+        hindi "आप क्वेरी %lld के OWNER नहीं हैं"
+        rus "Вы не являетесь владельцем запроса %lld"
+        spa "No eres el propietario de la consulta (query) %lld"
+ER_NO_EIS_FOR_FIELD
+        chi "没有收集无关的统计信息列'%s'"
+        eng "Engine-independent statistics are not collected for column '%s'"
+        hindi "Engine-independent सांख्यिकी कॉलम '%s' के लिए एकत्रित नहीं किया जा रहा है"
+        spa "No se han recolectado estadísticas independientes del motor para la columna '%s'"
+        ukr "Незалежна від типу таблиці статистика не збирається для стовбця '%s'"
+ER_WARN_AGGFUNC_DEPENDENCE
+        chi "聚合函数'%-.192s)'SELECT#%d的属于选择#%d"
+        eng "Aggregate function '%-.192s)' of SELECT #%d belongs to SELECT #%d"
+        spa "La función de agregación '%-.192s)' del SELECT #%d pertenece a SELECT #%d"
+        ukr "Агрегатна функція '%-.192s)' з SELECTу #%d належить до SELECTу #%d"
+WARN_INNODB_PARTITION_OPTION_IGNORED
+        chi "<%-.64s> innodb分区忽略的选项"
+        eng "<%-.64s> option ignored for InnoDB partition"
+        spa "<%-.64s> opción ignorada para partición InnoDB"
+
+#
+# Internal errors, not used
+#
+skip-to-error-number 2000
+
+# MySQL 5.7 error numbers starts here
+skip-to-error-number 3000
+
+ER_FILE_CORRUPT
+        chi "文件%s已损坏"
+        eng "File %s is corrupted"
+        spa "El fichero/archivo %s está corrupto"
+
+ER_ERROR_ON_MASTER
+        chi "查询在主设备上部分完成(主设备:%d)并中止。你的master在这一点上有可能不一致。如果您确定您的主站是可以的,请在从站上手动运行此查询,然后使用SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1; START SLAVE ;查询:'%s'"
+        eng "Query partially completed on the master (error on master: %d) and was aborted. There is a chance that your master is inconsistent at this point. If you are sure that your master is ok, run this query manually on the slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1; START SLAVE;. Query:'%s'"
+        spa "Consulta (query) completada de forma parcial en el maestro (master) (error en maestro (master): %d) y se ha abortado. Existe una posibilidad de que su maestro (master) esté inconsitente en este punto. Si está seguro de que su maestro (master) está ok, ejecute esta consulta (query) de forma manual en el esclavo y luego rearranque el esclavo mediante SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1; START SLAVE;. Consulta (query):'%s'"
+
+ER_INCONSISTENT_ERROR
+        chi "查询在主站和从站上引起了不同的错误。主站错误:消息(格式)='%s'错误代码=%d;从站错误:实际消息='%s',错误代码=%d。默认数据库:'%s'。查询:'%s'"
+        eng "Query caused different errors on master and slave. Error on master: message (format)='%s' error code=%d; Error on slave:actual message='%s', error code=%d. Default database:'%s'. Query:'%s'"
+        spa "La consulta (query) ha ocasionado diferentes errores en el maestro (master) y en el esclavo. Error en maestro (master): mensaje (formato)='%s' código de error=%d; Error en esclavo: mensaje actual='%s', código de error=%d. Base de datos por defecto:'%s'. Consulta (query):'%s'"
+
+ER_STORAGE_ENGINE_NOT_LOADED
+        chi "表'%s'的存储引擎'%s'没有加载。"
+        eng "Storage engine for table '%s'.'%s' is not loaded."
+        spa "El motor de almacenaje para la tabla '%s'.'%s' no ha sido cargado."
+
+ER_GET_STACKED_DA_WITHOUT_ACTIVE_HANDLER 0Z002
+        chi "处理程序未激活时GET STACKED DIAGNOSTICS"
+        eng "GET STACKED DIAGNOSTICS when handler not active"
+        spa "GET STACKED DIAGNOSTICS cuando el manejador no está activo"
+
+ER_WARN_LEGACY_SYNTAX_CONVERTED
+        chi "不再支持%s。该语句被转换为%s。"
+        eng "%s is no longer supported. The statement was converted to %s."
+        spa "%s ya no está soportada. Se ha convertido la sentencia a %s."
+
+ER_BINLOG_UNSAFE_FULLTEXT_PLUGIN
+        chi "语句不安全,因为它使用全文解析器插件,它可能不会在从站上返回相同的值。"
+        eng "Statement is unsafe because it uses a fulltext parser plugin which may not return the same value on the slave."
+        spa "La sentencia no es segura porque usa un enchufe (plugin) analizador de fulltext que puede que no devuelva el mismo valor en el esclavo."
+
+ER_CANNOT_DISCARD_TEMPORARY_TABLE
+        chi "无法丢弃与临时表相关联的/导入表空间"
+        eng "Cannot DISCARD/IMPORT tablespace associated with temporary table"
+        spa "No puedo DISCARD/IMPORT espacio de tabla asociado con tabla temporal"
+
+ER_FK_DEPTH_EXCEEDED
+        chi "外键级联删除/更新超出了%d的最大深度。"
+        eng "Foreign key cascade delete/update exceeds max depth of %d."
+        spa "La cascada borrar/actualizar en clave foránea excede la máxima profundidad de %d."
+
+ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE_V2
+        chi "列数为%s.%s是错误的。预期的%d,找到%d。使用MariaDB%d创建,现在运行%d。请使用mariadb-upgrade来修复此错误。"
+        eng "Column count of %s.%s is wrong. Expected %d, found %d. Created with MariaDB %d, now running %d. Please use mariadb-upgrade to fix this error."
+        ger "Spaltenanzahl von %s.%s falsch. %d erwartet, aber %d erhalten. Erzeugt mit MariaDB %d, jetzt unter %d. Bitte benutzen Sie mariadb-upgrade, um den Fehler zu beheben"
+        spa "El contador de columna de %s.%s está equivocado. Se esperaba %d, hallado %d, Creado con MariaDB %d, ahora ejecutando %d. Por favor, use mariadb-upgrade para arreglar este error."
+
+ER_WARN_TRIGGER_DOESNT_HAVE_CREATED
+        chi "触发器%s.%s.%s没有CREATE属性。"
+        eng "Trigger %s.%s.%s does not have CREATED attribute."
+        spa "El disparador %s.%s.%s no tiene el atributo CREATED."
+
+ER_REFERENCED_TRG_DOES_NOT_EXIST_MYSQL
+        chi "引用的触发器'%s'用于给定的动作时间和事件类型不存在。"
+        eng "Referenced trigger '%s' for the given action time and event type does not exist."
+        spa "El disparador referenciado '%s' para el momento dado de acción y el tipo de evento no existe."
+
+ER_EXPLAIN_NOT_SUPPORTED
+        chi "EXPLAIN FOR CONNECTION仅支持SELECT/UPDATE/INSERT/DELETE/REPLACE"
+        eng "EXPLAIN FOR CONNECTION command is supported only for SELECT/UPDATE/INSERT/DELETE/REPLACE"
+        spa "El comando EXPLAIN FOR CONNECTION sólo se soporta para SELECT/UPDATE/INSERT/DELETE/REPLACE"
+ER_INVALID_FIELD_SIZE
+        chi "列'%-.192s'的大小无效。"
+        eng "Invalid size for column '%-.192s'."
+        spa "Tamaño inválido para columna '%-.192s'."
+
+ER_MISSING_HA_CREATE_OPTION
+        chi "表存储引擎'%-.64s'所找到的必备创建选项丢失"
+        eng "Table storage engine '%-.64s' found required create option missing"
+        spa "El motor hallado de almacenaje de tabla '%-.64s' requería de la opción de crear que falta"
+
+ER_ENGINE_OUT_OF_MEMORY
+        chi "存储引擎'%-.64s'中的内存不足。"
+        eng "Out of memory in storage engine '%-.64s'."
+        spa "Memoria agotada en motor de almacenaje '%-.64s'."
+
+ER_PASSWORD_EXPIRE_ANONYMOUS_USER
+        chi "匿名用户的密码不能过期。"
+        eng "The password for anonymous user cannot be expired."
+        spa "La contraseña para usuario anónimo no puede expirar."
+
+ER_SLAVE_SQL_THREAD_MUST_STOP
+        chi "无法使用正在运行的从SQL线程执行此操作;首先运行STOP SLAVE SQL_THREAD"
+        eng "This operation cannot be performed with a running slave sql thread; run STOP SLAVE SQL_THREAD first"
+        spa "Esta operación no se puede realizar con hilo (thread) sql esclavo en ejecución; ejecuta STOP SLAVE SQL_THREAD primero"
+
+ER_NO_FT_MATERIALIZED_SUBQUERY
+        chi "无法在物化的子查询上创建FullText索引"
+        eng "Cannot create FULLTEXT index on materialized subquery"
+        spa "No puedo crear índice FULLTEXT en subconsulta (subquery) materializada"
+
+ER_INNODB_UNDO_LOG_FULL
+        chi "撤消日志错误:%s"
+        eng "Undo Log error: %s"
+        spa "Error de Historial (log) de Deshacer: %s"
+
+ER_INVALID_ARGUMENT_FOR_LOGARITHM 2201E
+        chi "对数的参数无效"
+        eng "Invalid argument for logarithm"
+        spa "Argumento inválido para logaritmo"
+
+ER_SLAVE_CHANNEL_IO_THREAD_MUST_STOP
+        chi "无法使用正在运行的slave IO线程执行此操作;首先运行STOP SLAVE IO_THREAD FOR CHANNEL'%s'。"
+        eng "This operation cannot be performed with a running slave io thread; run STOP SLAVE IO_THREAD FOR CHANNEL '%s' first."
+        spa "Esta operación no se puede realizar con un hilo (thread) de e/s de esclavo en ejecución; ejecuta STOP SLAVE IO_THREAD FOR CHANNEL '%s' primero."
+
+ER_WARN_OPEN_TEMP_TABLES_MUST_BE_ZERO
+        chi "当从站具有临时表时,此操作可能不安全。表将保持打开,直到服务器重新启动或通过任何复制的DROP语句删除表。建议等到Slave_open_temp_tables = 0。"
+        eng "This operation may not be safe when the slave has temporary tables. The tables will be kept open until the server restarts or until the tables are deleted by any replicated DROP statement. Suggest to wait until slave_open_temp_tables = 0."
+        spa "Esta operación puede no ser segura cuando el esclavo tenga tablas temporales. Las tablas serán mantenidas abiertas hasta que el servidor rearranque o hasta que las tablas sean borradas por cualquier sentencia DROP replicada. Se sugiere esperar hasta slave_open_temp_tables = 0."
+
+ER_WARN_ONLY_MASTER_LOG_FILE_NO_POS
+        chi "使用CHANGE MASTER TO master_log_file子句更改master,但没有master_log_pos子句可能不安全。旧位置值可能对新的二进制日志文件无效。"
+        eng "CHANGE MASTER TO with a MASTER_LOG_FILE clause but no MASTER_LOG_POS clause may not be safe. The old position value may not be valid for the new binary log file."
+        spa "CHANGE MASTER TO mediante una cláusula MASTER_LOG_FILE pero sin existir cláusula MASTER_LOG_POS puede no ser seguro. El valor viejo de la posición puede no ser válido para el nuevo fichero/archivo binario de historial (log)."
+
+ER_UNUSED_1
+        eng "You should never see it"
+
+ER_NON_RO_SELECT_DISABLE_TIMER
+        chi "SELECT不是只读语句,禁用计时器"
+        eng "Select is not a read only statement, disabling timer"
+        spa "Select no es una sentencia de sólo lectura, desactivando cronómetro"
+
+ER_DUP_LIST_ENTRY
+        chi "重复条目'%-.192s'。"
+        eng "Duplicate entry '%-.192s'."
+        spa "Entrada duplicada '%-.192s'."
+
+ER_SQL_MODE_NO_EFFECT
+        chi "'%s'模式不再有任何效果。使用STRICT_ALL_TABLES或STRICT_TRANS_TABLES。"
+        eng "'%s' mode no longer has any effect. Use STRICT_ALL_TABLES or STRICT_TRANS_TABLES instead."
+        spa "El modo '%s' ya no tiene efecto alguno. Use STRICT_ALL_TABLES o STRICT_TRANS_TABLES en su lugar"
+
+ER_AGGREGATE_ORDER_FOR_UNION
+        chi "表达式#%u ORDER BY包含聚合函数并适用于UNION"
+        eng "Expression #%u of ORDER BY contains aggregate function and applies to a UNION"
+        spa "La expresión #%u de ORDER BY contiene función de agregación y se aplica a UNION"
+
+ER_AGGREGATE_ORDER_NON_AGG_QUERY
+        chi "表达式#%u通过包含聚合函数,并适用于非聚合查询的结果"
+        eng "Expression #%u of ORDER BY contains aggregate function and applies to the result of a non-aggregated query"
+        spa "La expresión #%u de ORDER BY contiene función de agregación y se aplica al resultado de una consulta (query) no agregada"
+
+ER_SLAVE_WORKER_STOPPED_PREVIOUS_THD_ERROR
+        chi "在启用了slave保存提交次序时至少有一个以前的工人遇到错误后,slave工作者已停止。要保留提交次序,此线程执行的最后一项事务尚未提交。在修复任何故障线程后重新启动从站时,您也应该修复此工作人。"
+        eng "Slave worker has stopped after at least one previous worker encountered an error when slave-preserve-commit-order was enabled. To preserve commit order, the last transaction executed by this thread has not been committed. When restarting the slave after fixing any failed threads, you should fix this worker as well."
+        spa "El trabajador esclavo se ha parado tras al menos encontrar un error en trabajador previo cuando slave-preserve-commit-order fue activado. Para preserver el orden de acometida (commit), la última transacción ejecutada por este hilo (thread) no se ha acometido (commit). Al rearrancar el esclavo tras arreglar cualquier hilo (thread) fallido, vd debería de arreglar este trabajador también"
+
+ER_DONT_SUPPORT_SLAVE_PRESERVE_COMMIT_ORDER
+        chi "slave_preerve_commit_order不支持%s。"
+        eng "slave_preserve_commit_order is not supported %s."
+        spa "slave_preserve_commit_order no está soportado %s."
+
+ER_SERVER_OFFLINE_MODE
+        chi "服务器目前处于离线模式"
+        eng "The server is currently in offline mode"
+        spa "El servidor se encuentra actualmente en modo fuera de línea"
+
+ER_GIS_DIFFERENT_SRIDS
+        chi "二进制几何函数%s给定两个不同SRID的几何形状:%u和%u,应该是相同的。"
+        eng "Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
+        spa "La función binaria de geomertía %s ha dado dos geometrías de diferente srids: %u y %u, que deberían de haber sido idénticas"
+
+ER_GIS_UNSUPPORTED_ARGUMENT
+        chi "调用几何函数%s与不受支持类型的参数。"
+        eng "Calling geometry function %s with unsupported types of arguments."
+        spa "Llamando a función de geometría %s con tipos de argumento no soportados."
+
+ER_GIS_UNKNOWN_ERROR
+        chi "未知的GIS错误发生在功能%s中。"
+        eng "Unknown GIS error occurred in function %s."
+        spa "Ha ocurrido un error GIS desconocido en función %s."
+
+ER_GIS_UNKNOWN_EXCEPTION
+        chi "在GIS功能%s中捕获的未知异常。"
+        eng "Unknown exception caught in GIS function %s."
+        spa "Excepción desconocida capturada en función GIS %s."
+
+ER_GIS_INVALID_DATA 22023
+        chi "提供给功能%s的GIS数据无效。"
+        eng "Invalid GIS data provided to function %s."
+        spa "Suministrados datos GIS inválidos a función %s."
+
+ER_BOOST_GEOMETRY_EMPTY_INPUT_EXCEPTION
+        chi "几何形状在功能%s中没有数据。"
+        eng "The geometry has no data in function %s."
+        spa "La geometría no tiene datos en función %s."
+
+ER_BOOST_GEOMETRY_CENTROID_EXCEPTION
+        chi "无法计算质心,因为在功能%s中几何为空。"
+        eng "Unable to calculate centroid because geometry is empty in function %s."
+        spa "Imposible calcular centroid porque la geometría está vacía en la función %s."
+
+ER_BOOST_GEOMETRY_OVERLAY_INVALID_INPUT_EXCEPTION
+        chi "几何叠加计算错误:几何数据在功能%s中无效。"
+        eng "Geometry overlay calculation error: geometry data is invalid in function %s."
+        spa "Error de cálculo de superposición de geometría: el dato de geometría es inválido en la función %s."
+
+ER_BOOST_GEOMETRY_TURN_INFO_EXCEPTION
+        chi "几何旋转信息计算错误:几何数据在功能%s中无效。"
+        eng "Geometry turn info calculation error: geometry data is invalid in function %s."
+        spa "Error de cálculo de información devuelto: los datos de geometría son inválidos en la función %s."
+
+ER_BOOST_GEOMETRY_SELF_INTERSECTION_POINT_EXCEPTION
+        chi "在功能%s中出乎意料地中断交叉点的分析程序。"
+        eng "Analysis procedures of intersection points interrupted unexpectedly in function %s."
+        spa "Los procedimientos de análisis de puntos de intersección se interrumpieron inesperadamente en la función %s."
+
+ER_BOOST_GEOMETRY_UNKNOWN_EXCEPTION
+        chi "在功能%s中抛出的未知异常。"
+        eng "Unknown exception thrown in function %s."
+        spa "Excepción desconocida lanzada en la función %s."
+
+ER_STD_BAD_ALLOC_ERROR
+        chi "内存分配错误:%-.256s。函数%s。"
+        eng "Memory allocation error: %-.256s in function %s."
+        spa "Error en adjudicación de memoria: %-.256s en la función %s."
+
+ER_STD_DOMAIN_ERROR
+        chi "域名错误:%-.256s. 函数%s"
+        eng "Domain error: %-.256s in function %s."
+        spa "Error en dominio: %-.256s en función %s."
+
+ER_STD_LENGTH_ERROR
+        chi "长度误差:%-.256s函数%s。"
+        eng "Length error: %-.256s in function %s."
+        spa "Error de tamaño: %-.256s en función %s."
+
+ER_STD_INVALID_ARGUMENT
+        chi "无效的参数错误:%-.256s函数%s。"
+        eng "Invalid argument error: %-.256s in function %s."
+        spa "Error de argumento inválido: %-.256s en función %s."
+
+ER_STD_OUT_OF_RANGE_ERROR
+        chi "超出范围错误:%-.256s 函数%s。"
+        eng "Out of range error: %-.256s in function %s."
+        spa "Error de fuera de rango: %-.256s en función %s."
+
+ER_STD_OVERFLOW_ERROR
+        chi "溢出错误:%-.256s。功能%s。"
+        eng "Overflow error: %-.256s in function %s."
+        spa "Error de desbordamiento: %-.256s en función %s."
+
+ER_STD_RANGE_ERROR
+        chi "范围错误:%-.256s函数%s。"
+        eng "Range error: %-.256s in function %s."
+        spa "Error de rango: %-.256s en función %s."
+
+ER_STD_UNDERFLOW_ERROR
+        chi "下溢错误:%-.256s函数%s。"
+        eng "Underflow error: %-.256s in function %s."
+        spa "Error de refreno (underflow): %-.256s en la función %s."
+
+ER_STD_LOGIC_ERROR
+        chi "逻辑错误:%-.256s 函数%s。"
+        eng "Logic error: %-.256s in function %s."
+        spa "Error lógico: %-.256s en la función %s."
+
+ER_STD_RUNTIME_ERROR
+        chi "运行时错误:%-.256s函数%s。"
+        eng "Runtime error: %-.256s in function %s."
+        spa "Error en tiempo de ejecución: %-.256s en la función %s."
+
+ER_STD_UNKNOWN_EXCEPTION
+        chi "未知例外:%-.384s在函数%s中。"
+        eng "Unknown exception: %-.384s in function %s."
+        spa "Excepción desconocida: %-.384s en la función %s."
+
+ER_GIS_DATA_WRONG_ENDIANESS
+        chi "几何字节字符串必须是小endian。"
+        eng "Geometry byte string must be little endian."
+        spa "La cadena de byte en Geometría debe de ser 'little endian'."
+
+ER_CHANGE_MASTER_PASSWORD_LENGTH
+        chi "为Replication User提供的密码超过32个字符的最大长度"
+        eng "The password provided for the replication user exceeds the maximum length of 32 characters"
+        spa "La contraseña suministrada para el usuario de réplica excede el tamaño máximo de 32 caracteres"
+
+ER_USER_LOCK_WRONG_NAME 42000
+        chi "用户级锁名名称'%-.192s'不正确。"
+        eng "Incorrect user-level lock name '%-.192s'."
+        spa "Nombre de bloqueo incorrecto a nivel de usuario '%-.192s'."
+
+# Should be different from ER_LOCK_DEADLOCK since it doesn't cause implicit
+# rollback. Should not be mapped to SQLSTATE 40001 for the same reason.
+ER_USER_LOCK_DEADLOCK
+        chi "在尝试获得用户级锁时发现死锁;尝试回滚交易/释放锁定并重新启动锁定采集。"
+        eng "Deadlock found when trying to get user-level lock; try rolling back transaction/releasing locks and restarting lock acquisition."
+        spa "Hallado estancamiento (deadlock) al intentar obtener bloqueo a nivel de usuario; intente retroceder (roll back) bloqueos de transacción/entrega y rearranque la adquisición de bloqueo."
+
+ER_REPLACE_INACCESSIBLE_ROWS
+        chi "无法执行REPLACE,因为它需要删除不在视图中的行"
+        eng "REPLACE cannot be executed as it requires deleting rows that are not in the view"
+        spa "REPLACE no se puede ejecutar ya que requiere borrar filas que no están en la vista"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS
+        chi "不要支持使用GIS索引的表中的在线操作"
+        eng "Do not support online operation on table with GIS index"
+        spa "No soporta operación en línea en tabla con índice GIS"
+# MariaDB extra error numbers starts from 4000
+skip-to-error-number 4000
+
+ER_UNUSED_26 0A000
+        eng "This error never happens"
+        spa "Este error nunca ocurre"
+ER_UNUSED_27
+        eng "This error never happens"
+        spa "Este error nunca ocurre"
+ER_WITH_COL_WRONG_LIST
+        chi "使用列列表并选择字段列表具有不同的列计数"
+        eng "WITH column list and SELECT field list have different column counts"
+        spa "La lista de columnas de WITH y lista de campos de SELECT tienen diferentes contadores de columna"
+ER_TOO_MANY_DEFINITIONS_IN_WITH_CLAUSE
+        chi "WITH条款中的元素太多了"
+        eng "Too many WITH elements in WITH clause"
+        spa "Demasiados elementos WITH en cláusua WITH"
+ER_DUP_QUERY_NAME
+        chi "WITH子句重复查询名称%`-.64s"
+        eng "Duplicate query name %`-.64s in WITH clause"
+        spa "Nombre de consulta (query) %`-.64s duplicada en cláusula WITH"
+ER_RECURSIVE_WITHOUT_ANCHORS
+        chi "没有元素'%s'递归的锚点"
+        eng "No anchors for recursive WITH element '%s'"
+        spa "No hay anclajes para elemento WITH recursivo '%s'"
+ER_UNACCEPTABLE_MUTUAL_RECURSION
+        chi "锚定表'%s'不可接受的相互递归"
+        eng "Unacceptable mutual recursion with anchored table '%s'"
+        spa "Recursión mutua inaceptable con tabla anclada '%s'"
+ER_REF_TO_RECURSIVE_WITH_TABLE_IN_DERIVED
+        chi "物质化的衍生参考指向递归的WITH 表'%s'"
+        eng "Reference to recursive WITH table '%s' in materialized derived"
+        spa "Referencia recursiva con WITH tabla '%s' en derivada materializada"
+ER_NOT_STANDARD_COMPLIANT_RECURSIVE
+        chi "表'%s'R_WRONG_WINDOW_SPEC_NAME违反了递归定义的限制"
+        eng "Restrictions imposed on recursive definitions are violated for table '%s'"
+ER_WRONG_WINDOW_SPEC_NAME
+        chi "没有定义名称'%s'的窗口规范"
+        eng "Window specification with name '%s' is not defined"
+        spa "Especificación de ventana con nombre '%s' no definida"
+ER_DUP_WINDOW_NAME
+        chi "具有相同名称'%s'的多个窗口规范"
+        eng "Multiple window specifications with the same name '%s'"
+        spa "Múltiples especificaciones de ventana con el mismo nombre '%s'"
+ER_PARTITION_LIST_IN_REFERENCING_WINDOW_SPEC
+        chi "窗口规范引用另一个'%s'不能包含分区列表"
+        eng "Window specification referencing another one '%s' cannot contain partition list"
+        spa "La especificación de ventana que referencia a otra '%s' no puede contener una lista de partición"
+ER_ORDER_LIST_IN_REFERENCING_WINDOW_SPEC
+        chi "引用的窗口规范'%s'已包含次序列表"
+        eng "Referenced window specification '%s' already contains order list"
+        spa "La especificación de ventana referenciada '%s' ya contiene lista de orden"
+ER_WINDOW_FRAME_IN_REFERENCED_WINDOW_SPEC
+        chi "引用的窗口规范'%s'不能包含窗口框架"
+        eng "Referenced window specification '%s' cannot contain window frame"
+        spa "La especificación referenciada de ventana '%s' no puede contener marco de ventana"
+ER_BAD_COMBINATION_OF_WINDOW_FRAME_BOUND_SPECS
+        chi "窗框绑定规格的不可接受的组合"
+        eng "Unacceptable combination of window frame bound specifications"
+        spa "Combinación inaceptable de especificaciones ligadas a marco de ventana"
+ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION
+        chi "窗口函数仅在SELECT列表和ORDER BY子句中允许"
+        eng "Window function is allowed only in SELECT list and ORDER BY clause"
+        spa "La función de ventana sólo se permite en lista SELECT y en cláusula ORDER BY"
+ER_WINDOW_FUNCTION_IN_WINDOW_SPEC
+        chi "窗口规范中不允许窗口功能"
+        eng "Window function is not allowed in window specification"
+        spa "La función de ventana no está permitida en especificación de ventana"
+ER_NOT_ALLOWED_WINDOW_FRAME
+        chi "窗框不允许使用'%s'"
+        eng "Window frame is not allowed with '%s'"
+        spa "El marco de ventana no está permitido con '%s'"
+ER_NO_ORDER_LIST_IN_WINDOW_SPEC
+        chi "在“%s”的窗口规范中没有订单列表"
+        eng "No order list in window specification for '%s'"
+        spa "No exite lista de orden en especificación de ventana para '%s'"
+ER_RANGE_FRAME_NEEDS_SIMPLE_ORDERBY
+        chi "范围型框架需要单个排序键订购逐个条款"
+        eng "RANGE-type frame requires ORDER BY clause with single sort key"
+        spa "El marco tipo-RANGE requiere de la cláusula ORDER BY con clave única de clasificación"
+ER_WRONG_TYPE_FOR_ROWS_FRAME
+        chi "行类型框架需要整数"
+        eng "Integer is required for ROWS-type frame"
+        spa "Se requiere de un entero para marco tipo-ROWS"
+ER_WRONG_TYPE_FOR_RANGE_FRAME
+        chi "范围类型框架需要数字数据类型"
+        eng "Numeric datatype is required for RANGE-type frame"
+        spa "Se requiere de tipo de dato numérico para marco tipo-RANGE"
+ER_FRAME_EXCLUSION_NOT_SUPPORTED
+        chi "帧排除尚不支持"
+        eng "Frame exclusion is not supported yet"
+        spa "No se seporta aún la exclusión del marco"
+ER_WINDOW_FUNCTION_DONT_HAVE_FRAME
+        chi "此窗口功能可能没有窗口框架"
+        eng "This window function may not have a window frame"
+        spa "Esta función de ventana puede no tener un marco de ventana"
+ER_INVALID_NTILE_ARGUMENT
+        chi "NTILE的参数必须大于0"
+        eng "Argument of NTILE must be greater than 0"
+        spa "El argumento de NTILE debe de ser mayor de 0"
+ER_CONSTRAINT_FAILED 23000
+        chi "CONSTRAINT %`s失败的%`-.192s。%`-.192s"
+        eng "CONSTRAINT %`s failed for %`-.192s.%`-.192s"
+        ger "CONSTRAINT %`s fehlgeschlagen: %`-.192s.%`-.192s"
+        rus "проверка CONSTRAINT %`s для %`-.192s.%`-.192s провалилась"
+        spa "No se cumple la RESTRICCIÓN %`s para %`-.192s.%`-.192s"
+        ukr "Перевірка CONSTRAINT %`s для %`-.192s.%`-.192s не пройшла"
+ER_EXPRESSION_IS_TOO_BIG
+        chi "%s条款中的表达太大了"
+        eng "Expression in the %s clause is too big"
+        spa "La expresión en la cláusula %s es demasiado grande"
+ER_ERROR_EVALUATING_EXPRESSION
+        chi "获得了一个错误评估存储的表达式%s"
+        eng "Got an error evaluating stored expression %s"
+        spa "Obtenido error evaluando expresión almacenada %s"
+ER_CALCULATING_DEFAULT_VALUE
+        chi "计算默认值为%`s时出错"
+        eng "Got an error when calculating default value for %`s"
+        spa "Obtenido un error al calcular valor por defecto para %`s"
+ER_EXPRESSION_REFERS_TO_UNINIT_FIELD 01000
+        chi "字段%`-.64s的表达指的是未初始化的字段%`s"
+        eng "Expression for field %`-.64s is referring to uninitialized field %`s"
+        spa "La expresión para campo %`-.64s se refiere a un campo sin inicializar %`s"
+ER_PARTITION_DEFAULT_ERROR
+        chi "只允许一个默认分区"
+        eng "Only one DEFAULT partition allowed"
+        spa "Sólo se permite una partición DEFAULT"
+        ukr "Припустимо мати тільки один DEFAULT розділ" 
+ER_REFERENCED_TRG_DOES_NOT_EXIST
+        chi "给定动作时间和事件类型的引用触发'%s'不存在"
+        eng "Referenced trigger '%s' for the given action time and event type does not exist"
+        spa "No existe disparador referenciado '%s' para el momento dado de acción y para el tipo de evento"
+ER_INVALID_DEFAULT_PARAM
+        chi "此类参数使用不支持默认/忽略值"
+        eng "Default/ignore value is not supported for such parameter usage"
+        spa "El valor por defecto/ignorado no está soportado para tal utilización de parámetro"
+        ukr "Значення за замовчуванням або ігнороване значення не підтримано для цього випадку використання параьетра"
+ER_BINLOG_NON_SUPPORTED_BULK
+        chi "仅支持基于行的复制,支持批量操作"
+        eng "Only row based replication supported for bulk operations"
+        spa "Sólo la réplica basada en fila es soportada para operaciones enormes"
+ER_BINLOG_UNCOMPRESS_ERROR
+        chi "解压压缩的binlog失败"
+        eng "Uncompress the compressed binlog failed"
+        spa "Ha fallado la descompresión del binlog comprimido"
+ER_JSON_BAD_CHR
+        chi "坏JSON,参数%d 函数'%s' 位置%d"
+        eng "Broken JSON string in argument %d to function '%s' at position %d"
+        spa "Cadena JSON rota en argumento %d para función '%s' en posición %d"
+ER_JSON_NOT_JSON_CHR
+        chi "变量%d出现禁止字符,函数'%s'在%d处"
+        eng "Character disallowed in JSON in argument %d to function '%s' at position %d"
+        spa "Carácter no permitido en JSON en argumento %d para función '%s' en la posición %d"
+ER_JSON_EOS
+        chi "JSON文本中的意外结尾,参数%d 函数'%s'"
+        eng "Unexpected end of JSON text in argument %d to function '%s'"
+        spa "Fin inesperado de texto JSON en argumento %d a función '%s'"
+ER_JSON_SYNTAX
+        chi "JSON文本语法错误 参数%d 函数'%s' 位置%d"
+        eng "Syntax error in JSON text in argument %d to function '%s' at position %d"
+        spa "Error de sintaxis en texto JSON en argumento %d a función '%s' en la posición %d"
+ER_JSON_ESCAPING
+        chi "JSON文本中逸出不正确 参数%d 函数'%s' 位置%d"
+        eng "Incorrect escaping in JSON text in argument %d to function '%s' at position %d"
+        spa "Incorrecta escapatoria en texto JSON en argumento %d a función '%s' en la posicón %d"
+ER_JSON_DEPTH
+        chi "超过JSON嵌套深度的%d限制 参数%d 函数'%s' 位置%d的"
+        eng "Limit of %d on JSON nested structures depth is reached in argument %d to function '%s' at position %d"
+        spa "El límite de %d en profundidad de estructuras JSON anidadas se ha alcanzado en argumento %d a función '%s' en la posición %d"
+ER_JSON_PATH_EOS
+        chi "JSON文本路径错误 参数%d 函数'%s'"
+        eng "Unexpected end of JSON path in argument %d to function '%s'"
+        spa "Fin inesperado de ruta JSON en argumento %d a función '%s'"
+ER_JSON_PATH_SYNTAX
+        chi "JSON路径语法错误 参数%d 函数'%s' 位置%d"
+        eng "Syntax error in JSON path in argument %d to function '%s' at position %d"
+        spa "Error de sintaxis en ruta JSON en argumento %d a función '%s' en la posición %d"
+ER_JSON_PATH_DEPTH
+        chi "JSON路径深度上限达到:%d 参数%d 函数'%s' 位置%d"
+        eng "Limit of %d on JSON path depth is reached in argument %d to function '%s' at position %d"
+        spa "El límite de %d en profundidad de ruta JSON se ha alcanzado en argumento %d a función '%s' en la posición %d"
+ER_JSON_PATH_NO_WILDCARD
+        chi "JSON路径中的通配符不允许 参数%d 函数'%s'"
+        eng "Wildcards or range in JSON path not allowed in argument %d to function '%s'"
+        spa "Comodines en ruta JSON no permitidos en argumento %d a función '%s'"
+ER_JSON_PATH_ARRAY
+        chi "JSON路径应当以排列为终 参数%d 函数'%s'"
+        eng "JSON path should end with an array identifier in argument %d to function '%s'"
+        spa "La ruta JSON debería de terminar con identificador de arreglo en argumento %d a función '%s'"
+ER_JSON_ONE_OR_ALL
+        chi "函数'%s'的第二个参数必须是'一个'或'全部'"
+        eng "Argument 2 to function '%s' must be "one" or "all"."
+        spa "El argumento 2 a función '%s' debe de ser "one" o "all"."
+ER_UNSUPPORTED_COMPRESSED_TABLE
+        chi "CREATE TEMPORARY TABLE 不允许用ROW_FORMAT=COMPRESSED或KEY_BLOCK_SIZE"
+	eng "InnoDB refuses to write tables with ROW_FORMAT=COMPRESSED or KEY_BLOCK_SIZE."
+	spa "InnoDB rechaza grabar en tablas con ROW_FORMAT=COMPRESSED o KEY_BLOCK_SIZE."
+ER_GEOJSON_INCORRECT
+        chi "为st_geomfromgeojson函数指定了不正确的GeoJSON格式。"
+        eng "Incorrect GeoJSON format specified for st_geomfromgeojson function."
+        spa "Especficado formato GeoJSON incorrecto para función st_geomfromgeojson."
+ER_GEOJSON_TOO_FEW_POINTS
+        chi "Geojson格式不正确 -  Linestring指定的太少点。"
+        eng "Incorrect GeoJSON format - too few points for linestring specified."
+        spa "Formato GeoJSON incorrecto - demasiados pocos puntos especificados para linestring."
+ER_GEOJSON_NOT_CLOSED
+        chi "Geojson格式不正确 - 多边形未关闭。"
+        eng "Incorrect GeoJSON format - polygon not closed."
+        spa "Formato GeoJSON incorrect - polígono no cerrado."
+ER_JSON_PATH_EMPTY
+        chi "path表达式'$'不允许在参数%d中允许运行'%s'。"
+        eng "Path expression '$' is not allowed in argument %d to function '%s'."
+        spa "La expresión de ruta '$' no está permitida en argumento %d a función '%s'."
+ER_SLAVE_SAME_ID
+        chi "与此从站相同的server_uuId / server_id的从站已连接到主设备"
+        eng "A slave with the same server_uuid/server_id as this slave has connected to the master"
+        spa "Un esclavo con el mismo server_uuid/server_id que este esclavo se ha conectado al maestro (master)"
+ER_FLASHBACK_NOT_SUPPORTED
+        chi "闪回不支持%s%s"
+        eng "Flashback does not support %s %s"
+        spa "Retrospectiva no soporta %s %s"
+
+#
+# MyRocks error messages
+#
+ER_KEYS_OUT_OF_ORDER
+        chi "钥匙在散装负载期间出现订单"
+        eng "Keys are out order during bulk load"
+        spa "Claves desordenadas durante carga enorme"
+
+ER_OVERLAPPING_KEYS
+        chi "批量负载行重叠现有行"
+        eng "Bulk load rows overlap existing rows"
+        spa "La carga enorme de filas se superpone con filas existentes"
+
+ER_REQUIRE_ROW_BINLOG_FORMAT
+        chi "binlog_format != ROW时无法在master上执行更新"
+        eng "Can't execute updates on master with binlog_format != ROW."
+        spa "No puedo ejecutar actualizaciones en maestro (master) con binlog_format != ROW."
+
+ER_ISOLATION_MODE_NOT_SUPPORTED
+        chi "MyRocks仅支持读取承诺和可重复读取隔离级别。请从当前隔离级别的%s改变"
+        eng "MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level %s"
+        spa "MyRocks soporta sólo niveles de aislamiento READ COMMITTED y REPEATABLE READ. Por favor, cambie desde nivel de aislamiento actual %s"
+
+ER_ON_DUPLICATE_DISABLED
+        chi "当在MyRocks禁用唯一检查时,INSERT,UPDATE, LOAD,使用Clauses更新或替换索引的子句(即,在重复的重复键更新,替换)中,不允许使用。查询:%s"
+        eng "When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: %s"
+        spa "Al desactivar chequeo de único en MyRocks, las sentencias INSERT, UPDATE, LOAD con cláusulas que actualizan o reemplazan la clave (p.ej. INSERT ON DUPLICATE KEY UPDATE, REPLACE) no se permiten. Consulte (query): %s"
+
+ER_UPDATES_WITH_CONSISTENT_SNAPSHOT
+        chi "START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT时,无法执行更新。"
+        eng "Can't execute updates when you started a transaction with START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT."
+        spa "No puedo ejecutar actualizaciones cuando has iniciado una transacción mediante START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT."
+
+ER_ROLLBACK_ONLY
+        chi "此交易回滚并无法承诺。只支持支持的操作是滚动,因此将丢弃所有待处理的更改。请重新启动其他事务。"
+        eng "This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction."
+        spa "Esta transacción se ha retrocedido (rolled back) y no puede ser acometida (commit). La única operación soportada es retroceder (roll back), de tal forma que se descartarán todos los cambios pendientes. Por favor, rearranque otra transacción."
+
+ER_ROLLBACK_TO_SAVEPOINT
+        chi "如果修改行,MyRocks目前不支持保存点的回滚。"
+        eng "MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows."
+        spa "MyRocks en este momento no soporta ROLLBACK TO SAVEPOINT si se están modificando filas."
+
+ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+        chi "在RockSDB存储引擎中,START TRANSACTION WITH CONSISTENT SNAPSHOT 只支持REPEATABLE READ隔离"
+        eng "Only REPEATABLE READ isolation level is supported for START TRANSACTION WITH CONSISTENT SNAPSHOT in RocksDB Storage Engine."
+        spa "Sólo el nivel de aislamiento REPEATABLE READ se soporta para START TRANSACTION WITH CONSISTENT SNAPSHOT en Motor de Almacenaje RocksDB."
+
+ER_UNSUPPORTED_COLLATION
+        chi "字符串索引列%s的不受支持的归类。%s使用二进制校构(%s)。"
+        eng "Unsupported collation on string indexed column %s.%s Use binary collation (%s)."
+        spa "Cotejo (collation) no soportado en columna indizada de cadena %s.%s Use cotejo binario (%s)."
+
+ER_METADATA_INCONSISTENCY
+        chi "表'%s'不存在,但MyRocks内存存在元数据信息。这是数据不一致的标志。请检查是否存在'%s.frm',并尝试恢复如果它不存在。"
+        eng "Table '%s' does not exist, but metadata information exists inside MyRocks. This is a sign of data inconsistency. Please check if '%s.frm' exists, and try to restore it if it does not exist."
+        spa "La tabla '%s' no existe, pero existe información de metadatos dentro de MyRocks. Esto es una señal de datos inconsistentes. Por favor, revise si existe '%s.frm' e intente restaurarla si no existe."
+
+ER_CF_DIFFERENT
+        chi "列族('%s')标志(%d)与现有标志(%d)不同。分配新的CF标志,或者不要更改现有的CF标志。"
+        eng "Column family ('%s') flag (%d) is different from an existing flag (%d). Assign a new CF flag, or do not change existing CF flag."
+        spa "La familia de columna ('%s') bandera (%d) es diferente de una bandera existente (%d). Asigne una nueva bandera CF o no cambie la bandera CF."
+
+ER_RDB_TTL_DURATION_FORMAT
+        chi "Myrocks中的TTL持续时间(%s)必须是无符号非空64位整数。"
+        eng "TTL duration (%s) in MyRocks must be an unsigned non-null 64-bit integer."
+        spa "La duración de TTL (%s) en MyRocks debe de ser un entero sin signo no-null de 64-bit."
+
+ER_RDB_STATUS_GENERAL
+        chi "状态误差%d从RockSDB收到:%s"
+        eng "Status error %d received from RocksDB: %s"
+        spa "Recibido error de estado %d desde RocksDB: %s"
+
+ER_RDB_STATUS_MSG
+        chi "%s,状态误差%d从rocksdb收到:%s"
+        eng "%s, Status error %d received from RocksDB: %s"
+        spa "%s, Recibido error de estado %d desde RocksDB: %s"
+
+ER_RDB_TTL_UNSUPPORTED
+        chi "当表有隐藏的PK时,目前禁用TTL支持。"
+        eng "TTL support is currently disabled when table has a hidden PK."
+        spa "El soporte TTL está desactivado en este momento cuando la tabla tiene una PK oculta."
+
+ER_RDB_TTL_COL_FORMAT
+        chi "Myrocks中的TTL列(%s)必须是一个无符号的非空64位整数,存在于表内,并具有伴随的TTL持续时间。"
+        eng "TTL column (%s) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration."
+        spa "La columna TTL (%s) en MyRocks debe de ser un entero sin signo no-null de 64-bit, debe de existir dentro de la tabla y debe de tener una duración ttl acompañante."
+
+ER_PER_INDEX_CF_DEPRECATED
+        chi "已弃用每个索引列族选项"
+        eng "The per-index column family option has been deprecated"
+        spa "La opcion de familia de columna por-índice está obsoleta"
+
+ER_KEY_CREATE_DURING_ALTER
+        chi "MyRocks在Alter期间创建新的索引定义失败。"
+        eng "MyRocks failed creating new key definitions during alter."
+        spa "MyRocks no pudo crear nuevas definiciones de clave durante 'alter'."
+
+ER_SK_POPULATE_DURING_ALTER
+        chi "MyRocks在Alter期间失败填充次要索引。"
+        eng "MyRocks failed populating secondary key during alter."
+        spa "MyRocks falló al poblar clave secundaria duante el 'alter'."
+# MyRocks messages end
+
+ER_SUM_FUNC_WITH_WINDOW_FUNC_AS_ARG
+        chi "窗口函数不能用作组函数的参数。"
+        eng "Window functions can not be used as arguments to group functions."
+        spa "Las funciones de ventana no se pueden usar como argumentos para agrupar funciones."
+
+ER_NET_OK_PACKET_TOO_LARGE
+        chi "好的包太大了"
+        eng "OK packet too large"
+        spa "Paquete OK demasiado grande"
+
+ER_GEOJSON_EMPTY_COORDINATES
+        chi "Geojson格式不正确 - 空的'coordinates'阵列。"
+        eng "Incorrect GeoJSON format - empty 'coordinates' array."
+        spa "Formato GeoJSON incorrecto - arreglo vacío de coordenadas."
+
+ER_MYROCKS_CANT_NOPAD_COLLATION
+        chi "MyRocks目前不支持与“No Pad \”属性的归类。"
+        eng "MyRocks doesn't currently support collations with \"No pad\" attribute."
+        spa "MyRocks no soporta en la actualidad cotejos con atributo \"No pad\"."
+
+ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION
+        chi "非法参数数据类型%s和%s为操作'%s'"
+        eng "Illegal parameter data types %s and %s for operation '%s'"
+        spa "Tipos de datos de parámetro ilegales %s y %s para operación '%s'"
+ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
+        chi "非法参数数据类型%s用于操作'%s'"
+        eng "Illegal parameter data type %s for operation '%s'"
+        spa "Tipo de dato %s de parámetro ilegal para operación '%s'"
+ER_WRONG_PARAMCOUNT_TO_CURSOR 42000
+        chi "对Cursor的参数计数不正确'%-.192s'"
+        eng "Incorrect parameter count to cursor '%-.192s'"
+        spa "Contador incorrecto de parámetro para cursor '%-.192s'"
+        rus "Некорректное количество параметров для курсора '%-.192s'"
+ER_UNKNOWN_STRUCTURED_VARIABLE
+        chi "未知的结构系统变量或行程变量'%-.*s'"
+        eng "Unknown structured system variable or ROW routine variable '%-.*s'"
+        spa "Variable de sistema con estructura desconocida o variable de rutina ROW '%-.*s'"
+ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD
+        chi "行变量'%-.192s'没有字段'%-.192s'"
+        eng "Row variable '%-.192s' does not have a field '%-.192s'"
+        spa "La variable de fila '%-.192s' no tiene un campo '%-.192s'"
+ER_END_IDENTIFIER_DOES_NOT_MATCH
+        chi "结束标识符'%-.192s'不匹配'%-.192s'"
+        eng "END identifier '%-.192s' does not match '%-.192s'"
+        spa "Identificador END '%-.192s' no coincide con '%-.192s'"
+ER_SEQUENCE_RUN_OUT
+        chi "序列'%-.64s。%-.64s'已经用完了"
+        eng "Sequence '%-.64s.%-.64s' has run out"
+        spa "La secuencia '%-.64s.%-.64s' se ha agotado"
+ER_SEQUENCE_INVALID_DATA
+        chi "序列'%-.64s。%-.64s的值冲突"
+        eng "Sequence '%-.64s.%-.64s' has out of range value for options"
+        spa "La secuencia '%-.64s.%-.64s' tiene un valor fuera de rango para las opciones"
+ER_SEQUENCE_INVALID_TABLE_STRUCTURE
+        chi "序列'%-.64s。%-.64s'表结构无效(%s)"
+        eng "Sequence '%-.64s.%-.64s' table structure is invalid (%s)"
+        spa "La estuctura de tabla de secuencia '%-.64s.%-.64s' es inválida (%s)"
+ER_SEQUENCE_ACCESS_ERROR
+        chi "序列'%-.64s。%-.64s的访问错误"
+        eng "Sequence '%-.64s.%-.64s' access error"
+        spa "Error en acceso a secuencia '%-.64s.%-.64s'"
+ER_SEQUENCE_BINLOG_FORMAT
+          eng "Sequences requires binlog_format mixed or row"
+          spa "Las secuencias requieren binlog_format mixto o fila"
+ER_NOT_SEQUENCE 42S02
+        chi "'%-.64s。%-.64s'不是序列"
+        eng "'%-.64s.%-.64s' is not a SEQUENCE"
+        spa "'%-.64s.%-.64s' no es una SECUENCIA"
+ER_NOT_SEQUENCE2 42S02
+        chi "'%-.192s'不是序列"
+        eng "'%-.192s' is not a SEQUENCE"
+        spa "'%-.192s' no es una SECUENCIA"
+ER_UNKNOWN_SEQUENCES 42S02
+        chi "未知序列:'%-.300s'"
+        eng "Unknown SEQUENCE: '%-.300s'"
+        spa "SECUENCIA desconocida: '%-.300s'"
+ER_UNKNOWN_VIEW 42S02
+        chi "未知视图:'%-.300s'"
+        eng "Unknown VIEW: '%-.300s'"
+        spa "VISTA desconocida: '%-.300s'"
+ER_WRONG_INSERT_INTO_SEQUENCE
+        chi "错误插入序列。人们只能将单表插入到序列对象(与mariadb-dump)中进行。如果要更改序列,请使用更改序列。"
+        eng "Wrong INSERT into a SEQUENCE. One can only do single table INSERT into a sequence object (like with mariadb-dump).  If you want to change the SEQUENCE, use ALTER SEQUENCE instead."
+        spa "INSERT equivocado dentro de SEQUENCE. Uno sólo puede hacer INSERT único en tabla dentro de un objeto de secuencia (como con volcado-mariadb). Si desea cambiar la SECUENCIA, use ALTER SEQUENCE en su lugar."
+ER_SP_STACK_TRACE
+        chi "在%u中以%s"
+        eng "At line %u in %s"
+        spa "En la línea %u en %s"
+ER_PACKAGE_ROUTINE_IN_SPEC_NOT_DEFINED_IN_BODY
+        chi "在包规范中声明子程序'%-.192s',但未在包主体中定义"
+        eng "Subroutine '%-.192s' is declared in the package specification but is not defined in the package body"
+        spa "La subrutina '%-.192s' está declarada en la especificación del paquete pero no está definida en el cuerpo del paquete"
+ER_PACKAGE_ROUTINE_FORWARD_DECLARATION_NOT_DEFINED
+        chi "子程序'%-.192s'具有前向声明但未定义"
+        eng "Subroutine '%-.192s' has a forward declaration but is not defined"
+        spa "La subrutina '%-.192s' tiene una declaración adelantada pero no está definida"
+ER_COMPRESSED_COLUMN_USED_AS_KEY
+        chi "压缩列'%-.192s'不能用于索引规范"
+        eng "Compressed column '%-.192s' can't be used in key specification"
+        spa "Una columna comprimida '%-.192s' no se puede usar en especificación de clave"
+ER_UNKNOWN_COMPRESSION_METHOD
+        chi "未知压缩方法:%s"
+        eng "Unknown compression method: %s"
+        spa "Método de compresión desconocido: %s"
+ER_WRONG_NUMBER_OF_VALUES_IN_TVC
+        chi "使用的表值构造函数具有不同数量的值"
+        eng "The used table value constructor has a different number of values"
+        spa "El constructor del valor de tabla usado tiene un número diferente de valores"
+ER_FIELD_REFERENCE_IN_TVC
+        chi "字段参考'%-.192s'不能用于表值构造函数"
+        eng "Field reference '%-.192s' can't be used in table value constructor"
+	spa "La referencia a campo '%-.192s' no se puede usar en constructor de valor de tabla"
+ER_WRONG_TYPE_FOR_PERCENTILE_FUNC
+        chi "%s函数需要数字数据类型"
+        eng "Numeric datatype is required for %s function"
+        spa "Se requiere de tipo de dato numérico para función %s"
+ER_ARGUMENT_NOT_CONSTANT
+        chi "%s函数的参数不是分区的常量"
+        eng "Argument to the %s function is not a constant for a partition"
+        spa "El argumento de la función %s no es una constante para una partición"
+ER_ARGUMENT_OUT_OF_RANGE
+        chi "%s函数的参数不属于范围[0,1]"
+        eng "Argument to the %s function does not belong to the range [0,1]"
+        spa "El argumento de la función %s no pertenece al rango [0,1]"
+ER_WRONG_TYPE_OF_ARGUMENT
+        chi "%s函数仅接受可以转换为数字类型的参数"
+        eng "%s function only accepts arguments that can be converted to numerical types"
+        spa "La función %s sólo acepta argumentos que se puedan convertir a tipos numéricos"
+ER_NOT_AGGREGATE_FUNCTION
+        chi "在错误的上下文中使用的聚合特定指令(fetch组下一行)"
+        eng "Aggregate specific instruction (FETCH GROUP NEXT ROW) used in a wrong context"
+        spa "Instrucción específica de agregación (FETCH GROUP NEXT ROW) usada en contexto equivocado"
+ER_INVALID_AGGREGATE_FUNCTION
+        chi "聚合函数丢失的聚合特定指令(fetch组下一行)"
+        eng "Aggregate specific instruction(FETCH GROUP NEXT ROW) missing from the aggregate function"
+        spa "Falta instrucción específica de agregación (FETCH GROUP NEXT ROW) de la función de agregación"
+ER_INVALID_VALUE_TO_LIMIT
+        chi "限制仅接受整数值"
+        eng "Limit only accepts integer values"
+        spa "El límite sólo acepta valores enteros"
+ER_INVISIBLE_NOT_NULL_WITHOUT_DEFAULT
+        chi "隐形列%`s必须具有默认值"
+        eng "Invisible column %`s must have a default value"
+        spa "Una columna invisible %`s debe de tener valor por defecto"
+
+
+# MariaDB error numbers related to System Versioning
+
+
+ER_UPDATE_INFO_WITH_SYSTEM_VERSIONING
+        chi "匹配的行:%ld已更改:%ld插入:%ld警告:%ld"
+        eng "Rows matched: %ld  Changed: %ld  Inserted: %ld  Warnings: %ld"
+        spa "Filas coincidentes: %ld Cambiadas: %ld Insertadas: %ld Avisos: %ld"
+
+ER_VERS_FIELD_WRONG_TYPE
+        chi "%`s必须为系统版本为表%s的类型%`s"
+        eng "%`s must be of type %s for system-versioned table %`s"
+        spa "%`s debe de ser del tipo %s para tabla versionada del sistema %`s"
+
+ER_VERS_ENGINE_UNSUPPORTED
+        chi "Transaction-Precise系统版本控制%`s不受支持"
+        eng "Transaction-precise system versioning for %`s is not supported"
+        spa "No se soporta versionado de sistema de transacción precisa para %`s"
+
+ER_UNUSED_23
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+
+ER_PARTITION_WRONG_TYPE
+        chi "错误分区类型%`s,应当是%`s"
+        eng "Wrong partition type %`s for partitioning by %`s"
+        spa "Tipo de partición equivocada %`s para particionado mediante %`s"
+
+WARN_VERS_PART_FULL
+        chi "版本化表%`s.%`s:partition%`s已满,添加更多历史分区(out of %s)"
+        eng "Versioned table %`s.%`s: last HISTORY partition (%`s) is out of %s, need more HISTORY partitions"
+        spa "Tabla versionada %`s.%`s: última partición HISTORY (%`s) fuera de %s, necesita de más particiones HISTORY"
+
+WARN_VERS_PARAMETERS
+        chi "也许缺少参数:%s"
+        eng "Maybe missing parameters: %s"
+        spa "Parámetros que quizás faltan: %s"
+
+ER_VERS_DROP_PARTITION_INTERVAL
+        chi "只能在旋转间隔时丢弃最旧的分区"
+        eng "Can only drop oldest partitions when rotating by INTERVAL"
+        spa "Sólo se pueden eliminar viejas particiones al rotar mediante INTERVAL"
+
+ER_UNUSED_25
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+
+WARN_VERS_PART_NON_HISTORICAL
+        chi "分区%`s包含非历史数据"
+        eng "Partition %`s contains non-historical data"
+        spa "La partición %`s contiene datos no históricos"
+
+ER_VERS_ALTER_NOT_ALLOWED
+        chi "系统版本为%`s.%`s不允许。更改@@system_versioning_alter_history用ALTER。"
+        eng "Not allowed for system-versioned %`s.%`s. Change @@system_versioning_alter_history to proceed with ALTER."
+        spa "No permitido para versionado del sistema %`s.%`s. Cambie @@system_versioning_alter_history para proceder con ALTER."
+
+ER_VERS_ALTER_ENGINE_PROHIBITED
+        chi "不允许系统版本为%`s.%`s。不支持更改返回/来自本机系统版本传输引擎。"
+        eng "Not allowed for system-versioned %`s.%`s. Change to/from native system versioning engine is not supported."
+        spa "No permitido para versionado del sistema %`s.%`s. Cambio a/desde motor de versionado nativo no soportado."
+
+ER_VERS_RANGE_PROHIBITED
+        chi "不允许使用SYSTEM_TIME范围选择器"
+        eng "SYSTEM_TIME range selector is not allowed"
+        spa "Selector de rango SYSTEM_TIME no permitido"
+
+ER_CONFLICTING_FOR_SYSTEM_TIME
+        chi "与递归的System_time子句相冲突"
+        eng "Conflicting FOR SYSTEM_TIME clauses in WITH RECURSIVE"
+        spa "Cláusulas conflictivas FOR SYSTEM_TIME en WITH RECURSIVE"
+
+ER_VERS_TABLE_MUST_HAVE_COLUMNS
+        chi "表%`s必须至少有一个版本后的列"
+        eng "Table %`s must have at least one versioned column"
+        spa "La tabla %`s debe de tener al menos una columna versionada"
+
+ER_VERS_NOT_VERSIONED
+        chi "表%`s不是系统版本的"
+        eng "Table %`s is not system-versioned"
+        spa "La tabla %`s no es versionada del sistema"
+
+ER_MISSING
+        chi "%`s的错误参数:缺少'%s'"
+        eng "Wrong parameters for %`s: missing '%s'"
+        spa "Parámetros equivocados para %`s: falta '%s'"
+
+ER_VERS_PERIOD_COLUMNS
+        chi "system_time的时期必须使用列%`s和%`s"
+        eng "PERIOD FOR SYSTEM_TIME must use columns %`s and %`s"
+        spa "PERIOD FOR SYSTEM_TIME debe de usar columnas %`s y %`s"
+
+ER_PART_WRONG_VALUE
+        chi "用于分区%`s的错误参数:'%s'的错误值"
+        eng "Wrong parameters for partitioned %`s: wrong value for '%s'"
+        spa "Parámetros equivocados para particionado %`s: valor equivocado para '%s'"
+
+ER_VERS_WRONG_PARTS
+        chi "%`s的错误分区:必须至少有一个HISTORY,只能有一个CURRENT"
+        eng "Wrong partitions for %`s: must have at least one HISTORY and exactly one last CURRENT"
+        spa "Particiones equivocadas para %`s: debe de tener al menos una HISTORY y exactamente un último CURRENT"
+
+ER_VERS_NO_TRX_ID
+        chi "TRX_ID%llu在`mysql.transaction_registry`中找不到"
+        eng "TRX_ID %llu not found in `mysql.transaction_registry`"
+        spa "TRX_ID %llu no hallado en `mysql.transaction_registry`"
+
+ER_VERS_ALTER_SYSTEM_FIELD
+        chi "无法更改系统版本配置字段%`s"
+        eng "Can not change system versioning field %`s"
+        spa "No puedo cambiar campo de versionado de sistema %`s"
+
+ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION
+        chi "无法删除由SYSTEM_TIME分区的表%`s的系统版本"
+        eng "Can not DROP SYSTEM VERSIONING for table %`s partitioned BY SYSTEM_TIME"
+        spa "No puedo DROP SYSTEM VERSIONING para la tabla %`s particionada BY SYSTEM_TIME"
+
+ER_VERS_DB_NOT_SUPPORTED
+        chi "不支持%`s数据库中的系统版本化表"
+        eng "System-versioned tables in the %`s database are not supported"
+        spa "No se soportan las tablas versionadas del sistema en la base de datos %`s"
+
+ER_VERS_TRT_IS_DISABLED
+        chi "事务注册表已禁用"
+        eng "Transaction registry is disabled"
+        spa "El registro de transaciones está desactivado"
+
+ER_VERS_DUPLICATE_ROW_START_END
+        chi "重复行%s列%`s"
+        eng "Duplicate ROW %s column %`s"
+	spa "Duplicada FILA %s columna %`s"
+
+ER_VERS_ALREADY_VERSIONED
+        chi "表%`s已经是系统版本的"
+        eng "Table %`s is already system-versioned"
+        spa "La tabla %`s ya es versionada del sistema"
+
+ER_UNUSED_24
+        eng "You should never see it"
+        spa "Nunca debería vd de ver esto"
+
+ER_VERS_NOT_SUPPORTED
+        chi "系统版本的表不支持%s"
+        eng "System-versioned tables do not support %s"
+	spa "Las tablas versionadas del sistema no soportan %s"
+
+ER_VERS_TRX_PART_HISTORIC_ROW_NOT_SUPPORTED
+        chi "事务 - 精确的系统 - 版本的表不支持按行开始或行末端分区"
+        eng "Transaction-precise system-versioned tables do not support partitioning by ROW START or ROW END"
+        spa "Las tablas versionadas del sistemas de transacción precisa no soportan particionado mediante ROW START o ROW END"
+ER_INDEX_FILE_FULL
+        chi "表'%-.192s'的索引文件已满"
+        eng "The index file for table '%-.192s' is full"
+        spa "El fichero/archivo índice para la tabla '%-.192s' está lleno"
+ER_UPDATED_COLUMN_ONLY_ONCE
+        chi "列%`s.%`s在单个更新语句中不能更换一次"
+        eng "The column %`s.%`s cannot be changed more than once in a single UPDATE statement"
+        spa "La columna %`s.%`s no se pude cambiar más de ua vez en una sentencia UPDATE única"
+ER_EMPTY_ROW_IN_TVC
+        chi "在此上下文中,表值构造函数不允许在没有元素的行"
+        eng "Row with no elements is not allowed in table value constructor in this context"
+        spa "Fila sin elementos no se permite en constructor de valor de tabla en este contexto"
+ER_VERS_QUERY_IN_PARTITION
+        chi "表%`s的SYSTEM_TIME分区不支持历史查询"
+        eng "SYSTEM_TIME partitions in table %`s does not support historical query"
+        spa "Las particiones SYSTEM_TIME en la tabla %`s no soportan consulta (query) histórica"
+ER_KEY_DOESNT_SUPPORT
+        chi "%s索引%`s不支持此操作"
+        eng "%s index %`s does not support this operation"
+        spa "%s índice %`s no soporta esta operación"
+ER_ALTER_OPERATION_TABLE_OPTIONS_NEED_REBUILD
+        chi "更改表选项需要将要重建的表格重建"
+        eng "Changing table options requires the table to be rebuilt"
+	spa "Cambiar las opciones de tabla requiere que la tabla sea reconstruida"
+ER_BACKUP_LOCK_IS_ACTIVE
+        chi "由于您在运行BACKUP STAGE,无法执行命令"
+        eng "Can't execute the command as you have a BACKUP STAGE active"
+        spa "No puedo ejecutar el comando cuando vd tiene activo un BACKUP STAGE"
+ER_BACKUP_NOT_RUNNING
+        chi "您必须启动备份“备份阶段开始”"
+        eng "You must start backup with \"BACKUP STAGE START\""
+        spa "Vd debe de arracar respaldo mediante \"BACKUP STAGE START\""
+ER_BACKUP_WRONG_STAGE
+        chi "备份阶段'%s'相同或在当前备份阶段'%s'之前"
+        eng "Backup stage '%s' is same or before current backup stage '%s'"
+        spa "La fase de respaldo '%s' es la misma o anterior a la fase de respaldo actual '%s'"
+ER_BACKUP_STAGE_FAILED
+        chi "备份阶段'%s'失败"
+        eng "Backup stage '%s' failed"
+        spa "La fase de respaldo '%s' ha fallado"
+ER_BACKUP_UNKNOWN_STAGE
+        chi "未知备份阶段:'%s'。阶段应该是START,FLUSH,BLOCK_DDL,BLOCK_COMIT或END之一"
+        eng "Unknown backup stage: '%s'. Stage should be one of START, FLUSH, BLOCK_DDL, BLOCK_COMMIT or END"
+        spa "Fase de respaldo desconocida: '%s'. La fase debería de ser una de START, FLUSH, BLOCK_DDL, BLOCK_COMMIT o END"
+ER_USER_IS_BLOCKED
+        chi "由于凭证错误太多,用户被阻止;用'FLUSH PRIVILEGES'解锁"
+        eng "User is blocked because of too many credential errors; unblock with 'FLUSH PRIVILEGES'"
+        spa "El usuario está bloqueado a causa de demasiados errores de credenciales; desbloquee mediante 'FLUSH PRIVILEGES'"
+ER_ACCOUNT_HAS_BEEN_LOCKED
+        chi "访问拒绝,此帐户已锁定"
+        eng "Access denied, this account is locked"
+        rum "Acces refuzat, acest cont este blocat"
+        spa "Acceso denegado, esta cuenta está bloqueada"
+ER_PERIOD_TEMPORARY_NOT_ALLOWED
+        chi "应用程序时间段表不能临时"
+        eng "Application-time period table cannot be temporary"
+        spa "Una tabla de período de momento-de-aplicación no puede ser temporal"
+ER_PERIOD_TYPES_MISMATCH
+        chi "%`s的期间的字段有不同的类型"
+        eng "Fields of PERIOD FOR %`s have different types"
+        spa "Los campos de PERIOD FOR %`s tienen tipos diferentes"
+ER_MORE_THAN_ONE_PERIOD
+        chi "无法指定多个应用程序时间段"
+        eng "Cannot specify more than one application-time period"
+        spa "No se puede especificar más de un período de momento de aplicación"
+ER_PERIOD_FIELD_WRONG_ATTRIBUTES
+        chi "期间字段%`s不能是%s"
+        eng "Period field %`s cannot be %s"
+        spa "El campo de período %`s no puede ser %s"
+ER_PERIOD_NOT_FOUND
+        chi "期间%`s未在表中找到"
+        eng "Period %`s is not found in table"
+        spa "El período %`s no se ha hallado en la tabla"
+ER_PERIOD_COLUMNS_UPDATED
+        chi "列%`s在更新集列表中指定的周期%`s中使用"
+        eng "Column %`s used in period %`s specified in update SET list"
+        spa "La columna %`s usada en período %`s especificado en lista de actualizar SET"
+ER_PERIOD_CONSTRAINT_DROP
+        chi "无法DROP CONSTRAINT `%s`。使用DROP PERIOD `%s`"
+        eng "Can't DROP CONSTRAINT `%s`. Use DROP PERIOD `%s` for this"
+        spa "No puedo DROP CONSTRAINT `%s`. Use DROP PERIOD `%s` para esto"
+ER_TOO_LONG_KEYPART 42000 S1009
+        chi "指定的索引部分太长;最大索引部分长度为 %u 个字节"
+        eng "Specified key part was too long; max key part length is %u bytes"
+        spa "La parte de clave especificada es demasiado larga; el tamaño máximo de la parte de clave es de %u bytes"
+ER_TOO_LONG_DATABASE_COMMENT
+        eng "Comment for database '%-.64s' is too long (max = %u)"
+        spa "El comentario para la base de datos '%-.64s' es demasiado largo (máx = %u)"
+ER_UNKNOWN_DATA_TYPE
+        eng "Unknown data type: '%-.64s'"
+        spa "Tipo de datos desconocido: '%-.64s'"
+ER_UNKNOWN_OPERATOR
+        eng "Operator does not exists: '%-.128s'"
+        spa "El operador no existe: '%-.128s'"
+ER_UNUSED_29
+	eng "You should never see it"
+ER_PART_STARTS_BEYOND_INTERVAL
+        eng "%`s: STARTS is later than query time, first history partition may exceed INTERVAL value"
+        spa "%`s: STARTS es posterior al momento de consulta (query), la primera partición de historia puede exceder el valor INTERVAL"
+ER_GALERA_REPLICATION_NOT_SUPPORTED
+        eng "Galera replication not supported"
+        spa "La replicación en Galera no está soportada"
+ER_LOAD_INFILE_CAPABILITY_DISABLED
+        eng "The used command is not allowed because the MariaDB server or client has disabled the local infile capability"
+        rum "Comanda folosită nu este permisă deoarece clientul sau serverul MariaDB a dezactivat această capabilitate"
+        spa "El comando usado no está permitido porque el servidor MariaDB o el cliente han desactivado la capacidad 'local infile'"
+ER_NO_SECURE_TRANSPORTS_CONFIGURED
+        eng "No secure transports are configured, unable to set --require_secure_transport=ON"
+        spa "No se han configurado transportes seguros, imposible poner --require_secure_transport=ON"
+ER_SLAVE_IGNORED_SHARED_TABLE
+        eng "Slave SQL thread ignored the '%s' because table is shared"
+        ger "Slave-SQL-Thread hat die Abfrage '%s' ignoriert"
+        nla "Slave SQL thread negeerde de query '%s'"
+        por "Slave SQL thread ignorado a consulta devido '%s'"
+        spa "Hilo (thread) SQL esclavo ignoró la '%s' porque la tabla está compartida"
+        swe "Slav SQL tråden ignorerade '%s' pga tabellen är delad"
+ER_NO_AUTOINCREMENT_WITH_UNIQUE
+        eng  "AUTO_INCREMENT column %`s cannot be used in the UNIQUE index %`s"
+        spa  "La columna %'s con AUTO_INCREMENT no se puede usar en índice UNIQUE %`s"
+ER_KEY_CONTAINS_PERIOD_FIELDS
+        eng "Key %`s cannot explicitly include column %`s"
+        spa "La clave %`s no puede incluir de forma explícita la columna %`s"
+ER_KEY_CANT_HAVE_WITHOUT_OVERLAPS
+        eng "Key %`s cannot have WITHOUT OVERLAPS"
+        spa "La clave %`s no puede tener WITHOUT OVERLAPS"
+ER_NOT_ALLOWED_IN_THIS_CONTEXT
+        eng "'%-.128s' is not allowed in this context"
+        spa "'%-.128s' no está permitido en este contexto"
+ER_DATA_WAS_COMMITED_UNDER_ROLLBACK
+        eng "Engine %s does not support rollback. Changes were committed during rollback call"
+        spa "El motor %s no soporta retroceso (rollback). Los cambios se acometieron (commit) durante la llamada a retroceso (rollback)"
+ER_PK_INDEX_CANT_BE_IGNORED
+        eng "A primary key cannot be marked as IGNORE"
+        spa "Una clave primaria no se puede marcar como IGNORE"
+ER_BINLOG_UNSAFE_SKIP_LOCKED
+	eng "SKIP LOCKED makes this statement unsafe"
+	spa "SKIP LOCKED hace que esta sentencia sea no segura"
+ER_JSON_TABLE_ERROR_ON_FIELD
+        eng "Field '%s' can't be set for JSON_TABLE '%s'."
+        spa "El campo '%s' no se puede poner para JSON_TABLE '%s'."
+ER_JSON_TABLE_ALIAS_REQUIRED
+        eng "Every table function must have an alias."
+        spa "Cada función de tabla debe de tener un alias."
+ER_JSON_TABLE_SCALAR_EXPECTED
+        eng "Can't store an array or an object in the scalar column '%s' of JSON_TABLE '%s'."
+        spa "No puedo guardar un arreglo o un objeto en la columna escalar '%s' de JSON_TABLE '%s'."
+ER_JSON_TABLE_MULTIPLE_MATCHES
+        eng "Can't store multiple matches of the path in the column '%s' of JSON_TABLE '%s'."
+        spa "No puedo guardar múltiples coincidencias de la ruta en la columna '%s' de JSON_TABLE '%s'."
+ER_WITH_TIES_NEEDS_ORDER
+        eng "FETCH ... WITH TIES requires ORDER BY clause to be present"
+        spa "FETCH ... WITH TIES requiere que esté presente la cláusula ORDER BY"
+ER_REMOVED_ORPHAN_TRIGGER
+        eng "Dropped orphan trigger '%-.64s', originally created for table: '%-.192s'"
+        spa "Eliminado disparador huérfano '%-.64s', creado originálmente para la tabla: '%-.192s'"
+ER_STORAGE_ENGINE_DISABLED
+        eng "Storage engine %s is disabled"
+        spa "El motor de almacenaje %s está desactivado"
+WARN_SFORMAT_ERROR
+        eng "SFORMAT error: %s"
+ER_PARTITION_CONVERT_SUBPARTITIONED
+        eng "Convert partition is not supported for subpartitioned table."
+ER_PROVIDER_NOT_LOADED
+        eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
+ER_JSON_HISTOGRAM_PARSE_FAILED
+        eng "Failed to parse histogram for table %s.%s: %s at offset %d."
+ER_SF_OUT_INOUT_ARG_NOT_ALLOWED
+	eng "OUT or INOUT argument %d for function %s is not allowed here"
+ER_INCONSISTENT_SLAVE_TEMP_TABLE
+        eng "Replicated query '%s' table `%s.%s` can not be temporary"
+ER_VERS_HIST_PART_FAILED
+        eng "Versioned table %`s.%`s: adding HISTORY partition(s) failed"
+WARN_OPTION_CHANGING
+        eng "%s is implicitly changing the value of '%s' from '%s' to '%s'"
+ER_CM_OPTION_MISSING_REQUIREMENT
+        eng "CHANGE MASTER TO option '%s=%s' is missing requirement %s"
+ER_SLAVE_STATEMENT_TIMEOUT 70100
+        eng "Slave log event execution was interrupted (slave_max_statement_time exceeded)"
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
new file mode 100644
index 00000000..534a9a1c
--- /dev/null
+++ b/sql/signal_handler.cc
@@ -0,0 +1,373 @@
+/* Copyright (c) 2011, 2012, Oracle and/or its affiliates.
+   Copyright (c) 2011, 2021, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1335  USA */
+
+#include "mariadb.h"
+#include "my_dbug.h"
+#include 
+
+//#include "sys_vars.h"
+#include 
+#include "mysqld.h"
+#include "sql_class.h"
+#include "my_stacktrace.h"
+#include 
+
+#ifdef _WIN32
+#include 
+#include 
+#define SIGNAL_FMT "exception 0x%x"
+#else
+#define SIGNAL_FMT "signal %d"
+#endif
+
+
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include 
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+/*
+  We are handling signals/exceptions in this file.
+  Any global variables we read should be 'volatile sig_atomic_t'
+  to guarantee that we read some consistent value.
+ */
+static volatile sig_atomic_t segfaulted= 0;
+extern ulong max_used_connections;
+extern volatile sig_atomic_t calling_initgroups;
+
+extern const char *optimizer_switch_names[];
+
+static inline void output_core_info()
+{
+  /* proc is optional on some BSDs so it can't hurt to look */
+#if defined(HAVE_READLINK) && !defined(__APPLE__) && !defined(__FreeBSD__)
+  char buff[PATH_MAX];
+  ssize_t len;
+  int fd;
+  if ((len= readlink("/proc/self/cwd", buff, sizeof(buff)-1)) >= 0)
+  {
+    buff[len]= 0;
+    my_safe_printf_stderr("Writing a core file...\nWorking directory at %.*s\n",
+                          (int) len, buff);
+  }
+#ifdef __FreeBSD__
+  if ((fd= open("/proc/curproc/rlimit", O_RDONLY)) >= 0)
+#else
+  if ((fd= open("/proc/self/limits", O_RDONLY)) >= 0)
+#endif
+  {
+    my_safe_printf_stderr("Resource Limits:\n");
+    while ((len= read(fd, (uchar*)buff, sizeof(buff))) > 0)
+    {
+      my_write_stderr(buff, len);
+    }
+    close(fd);
+  }
+#ifdef __linux__
+  if ((fd= open("/proc/sys/kernel/core_pattern", O_RDONLY)) >= 0)
+  {
+    len= read(fd, (uchar*)buff, sizeof(buff));
+    my_safe_printf_stderr("Core pattern: %.*s\n", (int) len, buff);
+    close(fd);
+  }
+  if ((fd= open("/proc/version", O_RDONLY)) >= 0)
+  {
+    len= read(fd, (uchar*)buff, sizeof(buff));
+    my_safe_printf_stderr("Kernel version: %.*s\n", (int) len, buff);
+    close(fd);
+  }
+#endif
+#elif defined(__APPLE__) || defined(__FreeBSD__)
+  char buff[PATH_MAX];
+  size_t len = sizeof(buff);
+  if (sysctlbyname("kern.corefile", buff, &len, NULL, 0) == 0)
+  {
+    my_safe_printf_stderr("Core pattern: %.*s\n", (int) len, buff);
+  }
+  if (sysctlbyname("kern.version", buff, &len, NULL, 0) == 0)
+  {
+    my_safe_printf_stderr("Kernel version: %.*s\n", (int) len, buff);
+  }
+#elif defined(HAVE_GETCWD)
+  char buff[80];
+
+  if (getcwd(buff, sizeof(buff)))
+  {
+    my_safe_printf_stderr("Writing a core file at %.*s\n", (int) sizeof(buff), buff);
+    fflush(stderr);
+  }
+#endif
+}
+
+/**
+ * Handler for fatal signals on POSIX, exception handler on Windows.
+ *
+ * Fatal events (seg.fault, bus error etc.) will trigger
+ * this signal handler.  The handler will try to dump relevant
+ * debugging information to stderr and dump a core image.
+ *
+ * POSIX : Signal handlers should, if possible, only use a set of 'safe' system 
+ * calls and library functions.  A list of safe calls in POSIX systems
+ * are available at:
+ *  http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+ *
+ * @param sig Signal number /Exception code
+*/
+extern "C" sig_handler handle_fatal_signal(int sig)
+{
+  time_t curr_time;
+  struct tm tm;
+#ifdef HAVE_STACKTRACE
+  THD *thd;
+  /*
+     This flag remembers if the query pointer was found invalid.
+     We will try and print the query at the end of the signal handler, in case
+     we're wrong.
+  */
+  bool print_invalid_query_pointer= false;
+#endif
+
+  if (segfaulted)
+  {
+    my_safe_printf_stderr("Fatal " SIGNAL_FMT " while backtracing\n", sig);
+    goto end;
+  }
+  segfaulted = 1;
+  DBUG_PRINT("error", ("handling fatal signal"));
+
+  curr_time= my_time(0);
+  localtime_r(&curr_time, &tm);
+
+  my_safe_printf_stderr("%02d%02d%02d %2d:%02d:%02d ",
+                        tm.tm_year % 100, tm.tm_mon+1, tm.tm_mday,
+                        tm.tm_hour, tm.tm_min, tm.tm_sec);
+  if (opt_expect_abort
+#ifdef _WIN32
+    && sig == (int)EXCEPTION_BREAKPOINT /* __debugbreak in my_sigabrt_hander() */
+#else
+    && sig == SIGABRT
+#endif
+    )
+  {
+    fprintf(stderr,"[Note] mysqld did an expected abort\n");
+    goto end;
+  }
+
+  my_safe_printf_stderr("[ERROR] mysqld got " SIGNAL_FMT " ;\n",sig);
+
+  my_safe_printf_stderr("%s",
+                        "Sorry, we probably made a mistake, and this is a bug.\n\n"
+                        "Your assistance in bug reporting will enable us to fix this for the next release.\n"
+                        "To report this bug, see https://mariadb.com/kb/en/reporting-bugs\n\n");
+
+  my_safe_printf_stderr("%s",
+    "We will try our best to scrape up some info that will hopefully help\n"
+    "diagnose the problem, but since we have already crashed, \n"
+    "something is definitely wrong and this may fail.\n\n");
+
+  set_server_version(server_version, sizeof(server_version));
+  my_safe_printf_stderr("Server version: %s source revision: %s\n",
+		        server_version, SOURCE_REVISION);
+
+  if (dflt_key_cache)
+    my_safe_printf_stderr("key_buffer_size=%zu\n",
+                          dflt_key_cache->key_cache_mem_size);
+
+  my_safe_printf_stderr("read_buffer_size=%lu\n",
+                        global_system_variables.read_buff_size);
+
+  my_safe_printf_stderr("max_used_connections=%lu\n",
+                        max_used_connections);
+
+  if (thread_scheduler)
+    my_safe_printf_stderr("max_threads=%lu\n",
+                          thread_scheduler->max_threads +
+                          extra_max_connections);
+
+  my_safe_printf_stderr("thread_count=%u\n", THD_count::value());
+
+  if (dflt_key_cache && thread_scheduler)
+  {
+    size_t used_mem=
+        (dflt_key_cache->key_cache_mem_size +
+         (global_system_variables.read_buff_size +
+          (size_t) global_system_variables.sortbuff_size) *
+             (thread_scheduler->max_threads + extra_max_connections) +
+         (max_connections + extra_max_connections) * sizeof(THD)) / 1024;
+
+    my_safe_printf_stderr("It is possible that mysqld could use up to \n"
+                          "key_buffer_size + "
+                          "(read_buffer_size + sort_buffer_size)*max_threads = "
+                          "%zu K  bytes of memory\n", used_mem);
+
+    my_safe_printf_stderr("%s",
+                          "Hope that's ok; if not, decrease some variables in "
+                          "the equation.\n\n");
+  }
+
+#ifdef HAVE_STACKTRACE
+  thd= current_thd;
+
+  if (opt_stack_trace)
+  {
+    my_safe_printf_stderr("Thread pointer: %p\n", thd);
+    my_safe_printf_stderr("%s",
+      "Attempting backtrace. You can use the following "
+      "information to find out\n"
+      "where mysqld died. If you see no messages after this, something went\n"
+      "terribly wrong...\n");
+    my_print_stacktrace(thd ? (uchar*) thd->thread_stack : NULL,
+                        (ulong)my_thread_stack_size, 0);
+  }
+  if (thd)
+  {
+    const char *kreason= "UNKNOWN";
+    switch (thd->killed) {
+    case NOT_KILLED:
+    case KILL_HARD_BIT:
+      kreason= "NOT_KILLED";
+      break;
+    case KILL_BAD_DATA:
+    case KILL_BAD_DATA_HARD:
+      kreason= "KILL_BAD_DATA";
+      break;
+    case KILL_CONNECTION:
+    case KILL_CONNECTION_HARD:
+      kreason= "KILL_CONNECTION";
+      break;
+    case KILL_QUERY:
+    case KILL_QUERY_HARD:
+      kreason= "KILL_QUERY";
+      break;
+    case KILL_TIMEOUT:
+    case KILL_TIMEOUT_HARD:
+      kreason= "KILL_TIMEOUT";
+      break;
+    case KILL_SYSTEM_THREAD:
+    case KILL_SYSTEM_THREAD_HARD:
+      kreason= "KILL_SYSTEM_THREAD";
+      break;
+    case KILL_SERVER:
+    case KILL_SERVER_HARD:
+      kreason= "KILL_SERVER";
+      break;
+    case ABORT_QUERY:
+    case ABORT_QUERY_HARD:
+      kreason= "ABORT_QUERY";
+      break;
+    case KILL_SLAVE_SAME_ID:
+      kreason= "KILL_SLAVE_SAME_ID";
+      break;
+    case KILL_WAIT_TIMEOUT:
+    case KILL_WAIT_TIMEOUT_HARD:
+      kreason= "KILL_WAIT_TIMEOUT";
+      break;
+    }
+    my_safe_printf_stderr("%s", "\n"
+      "Trying to get some variables.\n"
+      "Some pointers may be invalid and cause the dump to abort.\n");
+
+    my_safe_printf_stderr("Query (%p): ", thd->query());
+    if (my_safe_print_str(thd->query(), MY_MIN(65536U, thd->query_length())))
+    {
+      // Query was found invalid. We will try to print it at the end.
+      print_invalid_query_pointer= true;
+    }
+
+    my_safe_printf_stderr("\nConnection ID (thread ID): %lu\n",
+                          (ulong) thd->thread_id);
+    my_safe_printf_stderr("Status: %s\n\n", kreason);
+    my_safe_printf_stderr("%s", "Optimizer switch: ");
+    ulonglong optsw= thd->variables.optimizer_switch;
+    for (uint i= 0; optimizer_switch_names[i+1]; i++, optsw >>= 1)
+    {
+      if (i)
+        my_safe_printf_stderr("%s", ",");
+      my_safe_printf_stderr("%s=%s",
+              optimizer_switch_names[i], optsw & 1 ? "on" : "off");
+    }
+    my_safe_printf_stderr("%s", "\n\n");
+  }
+  my_safe_printf_stderr("%s",
+    "The manual page at "
+    "https://mariadb.com/kb/en/how-to-produce-a-full-stack-trace-for-mariadbd/ contains\n"
+    "information that should help you find out what is causing the crash.\n");
+
+#endif /* HAVE_STACKTRACE */
+
+#ifdef HAVE_INITGROUPS
+  if (calling_initgroups)
+  {
+    my_safe_printf_stderr("%s", "\n"
+      "This crash occurred while the server was calling initgroups(). This is\n"
+      "often due to the use of a mysqld that is statically linked against \n"
+      "glibc and configured to use LDAP in /etc/nsswitch.conf.\n"
+      "You will need to either upgrade to a version of glibc that does not\n"
+      "have this problem (2.3.4 or later when used with nscd),\n"
+      "disable LDAP in your nsswitch.conf, or use a "
+      "mysqld that is not statically linked.\n");
+  }
+#endif
+
+  if (locked_in_memory)
+  {
+    my_safe_printf_stderr("%s", "\n"
+      "The \"--memlock\" argument, which was enabled, "
+      "uses system calls that are\n"
+      "unreliable and unstable on some operating systems and "
+      "operating-system versions (notably, some versions of Linux).\n"
+      "This crash could be due to use of those buggy OS calls.\n"
+      "You should consider whether you really need the "
+      "\"--memlock\" parameter and/or consult the OS distributer about "
+      "\"mlockall\" bugs.\n");
+  }
+
+#ifdef HAVE_STACKTRACE
+  if (print_invalid_query_pointer)
+  {
+    my_safe_printf_stderr(
+        "\nWe think the query pointer is invalid, but we will try "
+        "to print it anyway. \n"
+        "Query: ");
+    my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
+    my_safe_printf_stderr("\n\n");
+  }
+#endif
+
+  output_core_info();
+#ifdef HAVE_WRITE_CORE
+  if (test_flags & TEST_CORE_ON_SIGNAL)
+  {
+    my_write_core(sig);
+  }
+#endif
+
+end:
+#ifndef _WIN32
+  /*
+     Quit, without running destructors (etc.)
+     Use a signal, because the parent (systemd) can check that with WIFSIGNALED
+     On Windows, do not terminate, but pass control to exception filter.
+  */
+  signal(sig, SIG_DFL);
+  kill(getpid(), sig);
+#else
+  return;
+#endif
+}
diff --git a/sql/slave.cc b/sql/slave.cc
new file mode 100644
index 00000000..e781c461
--- /dev/null
+++ b/sql/slave.cc
@@ -0,0 +1,8505 @@
+/* Copyright (c) 2000, 2017, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2022, MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+
+/**
+  @addtogroup Replication
+  @{
+
+  @file
+
+  @brief Code to run the io thread and the sql thread on the
+  replication slave.
+*/
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "slave.h"
+#include "sql_parse.h"                         // execute_init_command
+#include "sql_table.h"                         // mysql_rm_table
+#include "rpl_mi.h"
+#include "rpl_rli.h"
+#include "sql_repl.h"
+#include "rpl_filter.h"
+#include "repl_failsafe.h"
+#include "transaction.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "unireg.h"
+#include 
+#include 
+#include 
+#include 
+
+#include "sql_base.h"                           // close_thread_tables
+#include "tztime.h"                             // struct Time_zone
+#include "log_event.h"                          // Rotate_log_event,
+                                                // Create_file_log_event,
+                                                // Format_description_log_event
+#include "wsrep_mysqld.h"
+#ifdef WITH_WSREP
+#include "wsrep_trans_observer.h"
+#endif
+
+class Master_info_index;
+Master_info_index *master_info_index;
+
+#ifdef HAVE_REPLICATION
+
+#include "rpl_tblmap.h"
+#include "debug_sync.h"
+#include "rpl_parallel.h"
+#include "sql_show.h"
+#include "semisync_slave.h"
+#include "sql_manager.h"
+
+#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
+
+#define MAX_SLAVE_RETRY_PAUSE 5
+/*
+  a parameter of sql_slave_killed() to defer the killed status
+*/
+#define SLAVE_WAIT_GROUP_DONE 60
+bool use_slave_mask = 0;
+MY_BITMAP slave_error_mask;
+char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
+uint *slave_transaction_retry_errors;
+uint slave_transaction_retry_error_length= 0;
+char slave_transaction_retry_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
+
+char* slave_load_tmpdir = 0;
+Master_info *active_mi= 0;
+my_bool replicate_same_server_id;
+ulonglong relay_log_space_limit = 0;
+ulonglong opt_read_binlog_speed_limit = 0;
+
+const char *relay_log_index= 0;
+const char *relay_log_basename= 0;
+
+LEX_CSTRING default_master_connection_name= { (char*) "", 0 };
+
+/*
+  When slave thread exits, we need to remember the temporary tables so we
+  can re-use them on slave start.
+
+  TODO: move the vars below under Master_info
+*/
+
+int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
+
+static pthread_key(Master_info*, RPL_MASTER_INFO);
+
+enum enum_slave_reconnect_actions
+{
+  SLAVE_RECON_ACT_REG= 0,
+  SLAVE_RECON_ACT_DUMP= 1,
+  SLAVE_RECON_ACT_EVENT= 2,
+  SLAVE_RECON_ACT_MAX
+};
+
+enum enum_slave_reconnect_messages
+{
+  SLAVE_RECON_MSG_WAIT= 0,
+  SLAVE_RECON_MSG_KILLED_WAITING= 1,
+  SLAVE_RECON_MSG_AFTER= 2,
+  SLAVE_RECON_MSG_FAILED= 3,
+  SLAVE_RECON_MSG_COMMAND= 4,
+  SLAVE_RECON_MSG_KILLED_AFTER= 5,
+  SLAVE_RECON_MSG_MAX
+};
+
+static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
+{
+  {
+    "Waiting to reconnect after a failed registration on master",
+    "Slave I/O thread killed while waiting to reconnect after a failed \
+registration on master",
+    "Reconnecting after a failed registration on master",
+    "failed registering on master, reconnecting to try again, \
+log '%s' at position %llu%s",
+    "COM_REGISTER_SLAVE",
+    "Slave I/O thread killed during or after reconnect"
+  },
+  {
+    "Waiting to reconnect after a failed binlog dump request",
+    "Slave I/O thread killed while retrying master dump",
+    "Reconnecting after a failed binlog dump request",
+    "failed dump request, reconnecting to try again, log '%s' at position %llu%s",
+    "COM_BINLOG_DUMP",
+    "Slave I/O thread killed during or after reconnect"
+  },
+  {
+    "Waiting to reconnect after a failed master event read",
+    "Slave I/O thread killed while waiting to reconnect after a failed read",
+    "Reconnecting after a failed master event read",
+    "Slave I/O thread: Failed reading log event, reconnecting to retry, \
+log '%s' at position %llu%s",
+    "",
+    "Slave I/O thread killed during or after a reconnect done to recover from \
+failed read"
+  }
+};
+ 
+
+typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
+
+static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
+static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
+static bool wait_for_relay_log_space(Relay_log_info* rli);
+static bool io_slave_killed(Master_info* mi);
+static bool sql_slave_killed(rpl_group_info *rgi);
+static int init_slave_thread(THD*, Master_info *, SLAVE_THD_TYPE);
+static void make_slave_skip_errors_printable(void);
+static void make_slave_transaction_retry_errors_printable(void);
+static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
+static int safe_reconnect(THD*, MYSQL*, Master_info*, bool);
+static int connect_to_master(THD*, MYSQL*, Master_info*, bool, bool);
+static Log_event* next_event(rpl_group_info* rgi, ulonglong *event_size);
+static int queue_event(Master_info *mi,const uchar *buf, ulong event_len);
+static int terminate_slave_thread(THD *, mysql_mutex_t *, mysql_cond_t *,
+                                  volatile uint *, bool);
+static bool check_io_slave_killed(Master_info *mi, const char *info);
+static bool send_show_master_info_data(THD *, Master_info *, bool, String *);
+/*
+  Function to set the slave's max_allowed_packet based on the value
+  of slave_max_allowed_packet.
+
+    @in_param    thd    Thread handler for slave
+    @in_param    mysql  MySQL connection handle
+*/
+
+static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql)
+{
+  DBUG_ENTER("set_slave_max_allowed_packet");
+  // thd and mysql must be valid
+  DBUG_ASSERT(thd && mysql);
+
+  thd->variables.max_allowed_packet= slave_max_allowed_packet;
+  thd->net.max_packet_size= slave_max_allowed_packet;
+  /*
+    Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
+    thread and the mysql->option max_allowed_packet, since a
+    replication event can become this much  larger than
+    the corresponding packet (query) sent from client to master.
+  */
+  thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
+  /*
+    Skipping the setting of mysql->net.max_packet size to slave
+    max_allowed_packet since this is done during mysql_real_connect.
+  */
+  mysql->options.max_allowed_packet=
+    slave_max_allowed_packet+MAX_LOG_EVENT_HEADER;
+  DBUG_VOID_RETURN;
+}
+
+/*
+  Find out which replications threads are running
+
+  SYNOPSIS
+    init_thread_mask()
+    mask                Return value here
+    mi                  master_info for slave
+    inverse             If set, returns which threads are not running
+
+  IMPLEMENTATION
+    Get a bit mask for which threads are running so that we can later restart
+    these threads.
+
+  RETURN
+    mask        If inverse == 0, running threads
+                If inverse == 1, stopped threads
+*/
+
+void init_thread_mask(int* mask,Master_info* mi,bool inverse)
+{
+  bool set_io = mi->slave_running, set_sql = mi->rli.slave_running;
+  int tmp_mask=0;
+  DBUG_ENTER("init_thread_mask");
+
+  if (set_io)
+    tmp_mask |= SLAVE_IO;
+  if (set_sql)
+    tmp_mask |= SLAVE_SQL;
+  if (inverse)
+    tmp_mask^= (SLAVE_IO | SLAVE_SQL);
+  *mask = tmp_mask;
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  lock_slave_threads() against other threads doing STOP, START or RESET SLAVE
+
+*/
+
+void Master_info::lock_slave_threads()
+{
+  DBUG_ENTER("lock_slave_threads");
+  mysql_mutex_lock(&start_stop_lock);
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  unlock_slave_threads()
+*/
+
+void Master_info::unlock_slave_threads()
+{
+  DBUG_ENTER("unlock_slave_threads");
+  mysql_mutex_unlock(&start_stop_lock);
+  DBUG_VOID_RETURN;
+}
+
+#ifdef HAVE_PSI_INTERFACE
+static PSI_thread_key key_thread_slave_io, key_thread_slave_sql;
+
+static PSI_thread_info all_slave_threads[]=
+{
+  { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL},
+  { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL}
+};
+
+static void init_slave_psi_keys(void)
+{
+  const char* category= "sql";
+  int count;
+
+  if (PSI_server == NULL)
+    return;
+
+  count= array_elements(all_slave_threads);
+  PSI_server->register_thread(category, all_slave_threads, count);
+}
+#endif /* HAVE_PSI_INTERFACE */
+
+
+/*
+  Note: This definition needs to be kept in sync with the one in
+  mysql_system_tables.sql which is used by mysql_create_db.
+*/
+static const char gtid_pos_table_definition1[]=
+  "CREATE TABLE ";
+static const char gtid_pos_table_definition2[]=
+  " (domain_id INT UNSIGNED NOT NULL, "
+  "sub_id BIGINT UNSIGNED NOT NULL, "
+  "server_id INT UNSIGNED NOT NULL, "
+  "seq_no BIGINT UNSIGNED NOT NULL, "
+  "PRIMARY KEY (domain_id, sub_id)) CHARSET=latin1 "
+  "COMMENT='Replication slave GTID position' "
+  "ENGINE=";
+
+/*
+  Build a query string
+    CREATE TABLE mysql.gtid_slave_pos_ ... ENGINE=
+*/
+static bool
+build_gtid_pos_create_query(THD *thd, String *query,
+                            LEX_CSTRING *table_name,
+                            LEX_CSTRING *engine_name)
+{
+  bool err= false;
+  err|= query->append(gtid_pos_table_definition1,
+                      sizeof(gtid_pos_table_definition1)-1);
+  err|= append_identifier(thd, query, table_name);
+  err|= query->append(gtid_pos_table_definition2,
+                      sizeof(gtid_pos_table_definition2)-1);
+  err|= append_identifier(thd, query, engine_name);
+  return err;
+}
+
+
+static int
+gtid_pos_table_creation(THD *thd, plugin_ref engine, LEX_CSTRING *table_name)
+{
+  int err;
+  StringBuffer query;
+
+  if (build_gtid_pos_create_query(thd, &query, table_name, plugin_name(engine)))
+  {
+    my_error(ER_OUT_OF_RESOURCES, MYF(0));
+    return 1;
+  }
+
+  thd->set_db(&MYSQL_SCHEMA_NAME);
+  thd->clear_error();
+  ulonglong thd_saved_option= thd->variables.option_bits;
+  /* This query shuold not be binlogged. */
+  thd->variables.option_bits&= ~(ulonglong)OPTION_BIN_LOG;
+  thd->set_query_and_id(query.c_ptr(), query.length(), thd->charset(),
+                        next_query_id());
+  Parser_state parser_state;
+  err= parser_state.init(thd, thd->query(), thd->query_length());
+  if (err)
+    goto end;
+  mysql_parse(thd, thd->query(), thd->query_length(), &parser_state);
+  if (unlikely(thd->is_error()))
+    err= 1;
+  /* The warning is relevant to 10.3 and earlier. */
+  sql_print_warning("The automatically created table '%s' name may not be "
+                    "entirely in lowercase. The table name will be converted "
+                    "to lowercase to any future upgrade to 10.4.0 and later "
+                    "version where it will be auto-created at once "
+                    "in lowercase.",
+                    table_name->str);
+end:
+  thd->variables.option_bits= thd_saved_option;
+  thd->reset_query();
+  return err;
+}
+
+static THD *new_bg_THD()
+{
+  THD *thd= new THD(next_thread_id());
+  thd->thread_stack= (char*) &thd;
+  thd->store_globals();
+  thd->system_thread = SYSTEM_THREAD_SLAVE_BACKGROUND;
+  thd->security_ctx->skip_grants();
+  thd->set_command(COM_DAEMON);
+  thd->variables.wsrep_on= 0;
+  return thd;
+}
+
+static void bg_gtid_delete_pending(void *)
+{
+  THD *thd= new_bg_THD();
+
+  rpl_slave_state::list_element *list;
+  list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
+  rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list);
+  if (list)
+    rpl_global_gtid_slave_state->put_back_list(list);
+  delete thd;
+}
+
+static void bg_gtid_pos_auto_create(void *hton)
+{
+  THD *thd= NULL;
+  int UNINIT_VAR(err);
+  plugin_ref engine= NULL, *auto_engines;
+  rpl_slave_state::gtid_pos_table *entry;
+  StringBuffer loc_table_name;
+  LEX_CSTRING table_name;
+
+  /*
+    Check that the plugin is still in @@gtid_pos_auto_engines, and lock
+    it.
+  */
+  mysql_mutex_lock(&LOCK_global_system_variables);
+  for (auto_engines= opt_gtid_pos_auto_plugins;
+       auto_engines && *auto_engines;
+       ++auto_engines)
+  {
+    if (plugin_hton(*auto_engines) == hton)
+    {
+      engine= my_plugin_lock(NULL, *auto_engines);
+      break;
+    }
+  }
+  mysql_mutex_unlock(&LOCK_global_system_variables);
+  if (!engine)
+  {
+    /* The engine is gone from @@gtid_pos_auto_engines, so no action. */
+    goto end;
+  }
+
+  /* Find the entry for the table to auto-create. */
+  mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+  entry= rpl_global_gtid_slave_state->
+         gtid_pos_tables.load(std::memory_order_relaxed);
+  while (entry)
+  {
+    if (entry->table_hton == hton &&
+        entry->state == rpl_slave_state::GTID_POS_CREATE_REQUESTED)
+      break;
+    entry= entry->next;
+  }
+  if (entry)
+  {
+    entry->state = rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS;
+    err= loc_table_name.append(entry->table_name.str, entry->table_name.length);
+  }
+  mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+  if (!entry)
+    goto end;
+  if (err)
+  {
+    sql_print_error("Out of memory while trying to auto-create GTID position table");
+    goto end;
+  }
+  table_name.str= loc_table_name.c_ptr_safe();
+  table_name.length= loc_table_name.length();
+
+  thd= new_bg_THD();
+  err= gtid_pos_table_creation(thd, engine, &table_name);
+  if (err)
+  {
+    sql_print_error("Error auto-creating GTID position table `mysql.%s`: %s Error_code: %d",
+                    table_name.str, thd->get_stmt_da()->message(),
+                    thd->get_stmt_da()->sql_errno());
+    thd->clear_error();
+    goto end;
+  }
+
+  /* Now enable the entry for the auto-created table. */
+  mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+  entry= rpl_global_gtid_slave_state->
+         gtid_pos_tables.load(std::memory_order_relaxed);
+  while (entry)
+  {
+    if (entry->table_hton == hton &&
+        entry->state == rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS)
+    {
+      entry->state= rpl_slave_state::GTID_POS_AVAILABLE;
+      break;
+    }
+    entry= entry->next;
+  }
+  mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+
+end:
+  delete thd;
+  if (engine)
+    plugin_unlock(NULL, engine);
+}
+
+static bool slave_background_thread_gtid_loaded;
+
+static void bg_rpl_load_gtid_slave_state(void *)
+{
+  THD *thd= new_bg_THD();
+  thd->set_psi(PSI_CALL_get_thread());
+  thd_proc_info(thd, "Loading slave GTID position from table");
+  if (rpl_load_gtid_slave_state(thd))
+    sql_print_warning("Failed to load slave replication state from table "
+                      "%s.%s: %u: %s", "mysql",
+                      rpl_gtid_slave_state_table_name.str,
+                      thd->get_stmt_da()->sql_errno(),
+                      thd->get_stmt_da()->message());
+
+  // hijacking global_rpl_thread_pool cond here - it's only once on startup
+  mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
+  slave_background_thread_gtid_loaded= true;
+  mysql_cond_signal(&global_rpl_thread_pool.COND_rpl_thread_pool);
+  mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
+  delete thd;
+}
+
+static void bg_slave_kill(void *victim)
+{
+  THD *to_kill= (THD *)victim;
+  DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000););
+  to_kill->awake(KILL_CONNECTION);
+  mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
+  to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
+  mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
+  mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
+}
+
+void slave_background_kill_request(THD *to_kill)
+{
+  if (to_kill->rgi_slave->killed_for_retry)
+    return;                                     // Already deadlock killed.
+  to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_PENDING;
+  mysql_manager_submit(bg_slave_kill, to_kill);
+}
+
+/*
+  This function must only be called from a slave SQL thread (or worker thread),
+  to ensure that the table_entry will not go away before we can lock the
+  LOCK_slave_state.
+*/
+void slave_background_gtid_pos_create_request(
+        rpl_slave_state::gtid_pos_table *table_entry)
+{
+  if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE)
+    return;
+  mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+  if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE)
+  {
+    mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+    return;
+  }
+  table_entry->state= rpl_slave_state::GTID_POS_CREATE_REQUESTED;
+  mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+
+  mysql_manager_submit(bg_gtid_pos_auto_create, table_entry->table_hton);
+}
+
+
+/*
+  Request the manager thread to delete no longer used rows from the
+  mysql.gtid_slave_pos* tables.
+*/
+void slave_background_gtid_pending_delete_request(void)
+{
+  mysql_manager_submit(bg_gtid_delete_pending, NULL);
+}
+
+
+/* Initialize slave structures */
+
+int init_slave()
+{
+  DBUG_ENTER("init_slave");
+  int error= 0;
+
+#ifdef HAVE_PSI_INTERFACE
+  init_slave_psi_keys();
+#endif
+
+  if (global_rpl_thread_pool.init(opt_slave_parallel_threads))
+    return 1;
+
+  slave_background_thread_gtid_loaded= false;
+  mysql_manager_submit(bg_rpl_load_gtid_slave_state, NULL);
+
+  // hijacking global_rpl_thread_pool cond here - it's only once on startup
+  mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
+  while (!slave_background_thread_gtid_loaded)
+    mysql_cond_wait(&global_rpl_thread_pool.COND_rpl_thread_pool,
+                    &global_rpl_thread_pool.LOCK_rpl_thread_pool);
+  mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
+
+  /*
+    This is called when mysqld starts. Before client connections are
+    accepted. However bootstrap may conflict with us if it does START SLAVE.
+    So it's safer to take the lock.
+  */
+
+  if (pthread_key_create(&RPL_MASTER_INFO, NULL))
+    goto err;
+
+  master_info_index= new Master_info_index;
+  if (!master_info_index || master_info_index->init_all_master_info())
+  {
+    sql_print_error("Failed to initialize multi master structures");
+    DBUG_RETURN(1);
+  }
+  if (!(active_mi= new Master_info(&default_master_connection_name,
+                                   relay_log_recovery)) ||
+      active_mi->error())
+  {
+    delete active_mi;
+    active_mi= 0;
+    sql_print_error("Failed to allocate memory for the Master Info structure");
+    goto err;
+  }
+
+  if (master_info_index->add_master_info(active_mi, FALSE))
+  {
+    delete active_mi;
+    active_mi= 0;
+    goto err;
+  }
+
+  /*
+    If master_host is not specified, try to read it from the master_info file.
+    If master_host is specified, create the master_info file if it doesn't
+    exists.
+  */
+
+  if (init_master_info(active_mi,master_info_file,relay_log_info_file,
+                       1, (SLAVE_IO | SLAVE_SQL)))
+  {
+    sql_print_error("Failed to initialize the master info structure");
+    goto err;
+  }
+
+  /* If server id is not set, start_slave_thread() will say it */
+
+  if (active_mi->host[0] && !opt_skip_slave_start)
+  {
+    int error;
+    THD *thd= new THD(next_thread_id());
+    thd->thread_stack= (char*) &thd;
+    thd->store_globals();
+
+    error= start_slave_threads(0, /* No active thd */
+                               1 /* need mutex */,
+                               1 /* wait for start*/,
+                               active_mi,
+                               master_info_file,
+                               relay_log_info_file,
+                               SLAVE_IO | SLAVE_SQL);
+
+    thd->reset_globals();
+    delete thd;
+    if (unlikely(error))
+    {
+      sql_print_error("Failed to create slave threads");
+      goto err;
+    }
+  }
+
+end:
+  DBUG_RETURN(error);
+
+err:
+  error= 1;
+  goto end;
+}
+
+/*
+  Updates the master info based on the information stored in the
+  relay info and ignores relay logs previously retrieved by the IO 
+  thread, which thus starts fetching again based on to the  
+  group_master_log_pos and group_master_log_name. Eventually, the old
+  relay logs will be purged by the normal purge mechanism.
+
+  In the feature, we should improve this routine in order to avoid throwing
+  away logs that are safely stored in the disk. Note also that this recovery 
+  routine relies on the correctness of the relay-log.info and only tolerates 
+  coordinate problems in master.info.
+  
+  In this function, there is no need for a mutex as the caller 
+  (i.e. init_slave) already has one acquired.
+  
+  Specifically, the following structures are updated:
+ 
+  1 - mi->master_log_pos  <-- rli->group_master_log_pos
+  2 - mi->master_log_name <-- rli->group_master_log_name
+  3 - It moves the relay log to the new relay log file, by
+      rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+      rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+      rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
+      rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
+  
+   If there is an error, it returns (1), otherwise returns (0).
+ */
+int init_recovery(Master_info* mi, const char** errmsg)
+{
+  DBUG_ENTER("init_recovery");
+ 
+  Relay_log_info *rli= &mi->rli;
+  if (rli->group_master_log_name[0])
+  {
+    mi->master_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE,
+                             rli->group_master_log_pos);
+    strmake_buf(mi->master_log_name, rli->group_master_log_name);
+ 
+    sql_print_warning("Recovery from master pos %ld and file %s.",
+                      (ulong) mi->master_log_pos, mi->master_log_name);
+ 
+    strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname());
+    strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname());
+ 
+    rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Convert slave skip errors bitmap into a printable string.
+*/
+
+static void make_slave_skip_errors_printable(void)
+{
+  /*
+    To be safe, we want 10 characters of room in the buffer for a number
+    plus terminators. Also, we need some space for constant strings.
+    10 characters must be sufficient for a number plus {',' | '...'}
+    plus a NUL terminator. That is a max 6 digit number.
+  */
+  const size_t MIN_ROOM= 10;
+  DBUG_ENTER("make_slave_skip_errors_printable");
+  DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM);
+  DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits
+
+  /* Make @@slave_skip_errors show the nice human-readable value.  */
+  opt_slave_skip_errors= slave_skip_error_names;
+
+  if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask))
+  {
+    /* purecov: begin tested */
+    memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF"));
+    /* purecov: end */
+  }
+  else if (bitmap_is_set_all(&slave_error_mask))
+  {
+    /* purecov: begin tested */
+    memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL"));
+    /* purecov: end */
+  }
+  else
+  {
+    char *buff= slave_skip_error_names;
+    char *bend= buff + sizeof(slave_skip_error_names) - MIN_ROOM;
+    int  errnum;
+
+    for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++)
+    {
+      if (bitmap_is_set(&slave_error_mask, errnum))
+      {
+        if (buff >= bend)
+          break; /* purecov: tested */
+        buff= int10_to_str(errnum, buff, 10);
+        *buff++= ',';
+      }
+    }
+    if (buff != slave_skip_error_names)
+      buff--; // Remove last ','
+    if (errnum < MAX_SLAVE_ERROR)
+    {
+      /* Couldn't show all errors */
+      buff= strmov(buff, "..."); /* purecov: tested */
+    }
+    *buff=0;
+  }
+  DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names));
+  DBUG_VOID_RETURN;
+}
+
+/*
+  Init function to set up array for errors that should be skipped for slave
+
+  SYNOPSIS
+    init_slave_skip_errors()
+    arg         List of errors numbers to skip, separated with ','
+
+  NOTES
+    Called from get_options() in mysqld.cc on start-up
+*/
+
+bool init_slave_skip_errors(const char* arg)
+{
+  const char *p;
+  DBUG_ENTER("init_slave_skip_errors");
+
+  if (!arg || !*arg)                            // No errors defined
+    goto end;
+
+  if (my_bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR))
+    DBUG_RETURN(1);
+
+  use_slave_mask= 1;
+  for (;my_isspace(system_charset_info,*arg);++arg)
+    /* empty */;
+  if (!system_charset_info->strnncoll((uchar*)arg,4,(const uchar*)"all",4))
+  {
+    bitmap_set_all(&slave_error_mask);
+    goto end;
+  }
+  for (p= arg ; *p; )
+  {
+    long err_code;
+    if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
+      break;
+    if (err_code < MAX_SLAVE_ERROR)
+       bitmap_set_bit(&slave_error_mask,(uint)err_code);
+    while (!my_isdigit(system_charset_info,*p) && *p)
+      p++;
+  }
+
+end:
+  make_slave_skip_errors_printable();
+  DBUG_RETURN(0);
+}
+
+/**
+  Make printable version if slave_transaction_retry_errors
+  This is never empty as at least ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT
+  will be there
+*/
+
+static void make_slave_transaction_retry_errors_printable(void)
+{
+  /*
+    To be safe, we want 10 characters of room in the buffer for a number
+    plus terminators. Also, we need some space for constant strings.
+    10 characters must be sufficient for a number plus {',' | '...'}
+    plus a NUL terminator. That is a max 6 digit number.
+  */
+  const size_t MIN_ROOM= 10;
+  char *buff= slave_transaction_retry_error_names;
+  char *bend= buff + sizeof(slave_transaction_retry_error_names) - MIN_ROOM;
+  uint  i;
+  DBUG_ENTER("make_slave_transaction_retry_errors_printable");
+  DBUG_ASSERT(sizeof(slave_transaction_retry_error_names) > MIN_ROOM);
+
+  /* Make @@slave_transaction_retry_errors show a human-readable value */
+  opt_slave_transaction_retry_errors= slave_transaction_retry_error_names;
+
+  for (i= 0; i < slave_transaction_retry_error_length && buff < bend; i++)
+  {
+    buff= int10_to_str(slave_transaction_retry_errors[i], buff, 10);
+    *buff++= ',';
+  }
+  if (buff != slave_transaction_retry_error_names)
+    buff--; // Remove last ','
+  if (i < slave_transaction_retry_error_length)
+  {
+    /* Couldn't show all errors */
+    buff= strmov(buff, "..."); /* purecov: tested */
+  }
+  *buff=0;
+  DBUG_PRINT("exit", ("error_names: '%s'",
+                      slave_transaction_retry_error_names));
+  DBUG_VOID_RETURN;
+}
+
+
+#define DEFAULT_SLAVE_RETRY_ERRORS 9
+
+bool init_slave_transaction_retry_errors(const char* arg)
+{
+  const char *p;
+  long err_code;
+  uint i;
+  DBUG_ENTER("init_slave_transaction_retry_errors");
+
+  /* Handle empty strings */
+  if (!arg)
+    arg= "";
+
+  slave_transaction_retry_error_length= DEFAULT_SLAVE_RETRY_ERRORS;
+  for (;my_isspace(system_charset_info,*arg);++arg)
+    /* empty */;
+  for (p= arg; *p; )
+  {
+    if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
+      break;
+    slave_transaction_retry_error_length++;
+    while (!my_isdigit(system_charset_info,*p) && *p)
+      p++;
+  }
+
+  if (unlikely(!(slave_transaction_retry_errors=
+                 (uint *) my_once_alloc(sizeof(int) *
+                                        slave_transaction_retry_error_length,
+                                        MYF(MY_WME)))))
+    DBUG_RETURN(1);
+
+  /*
+    Temporary error codes:
+    currently, InnoDB deadlock detected by InnoDB or lock
+    wait timeout (innodb_lock_wait_timeout exceeded
+  */
+  slave_transaction_retry_errors[0]= ER_NET_READ_ERROR;
+  slave_transaction_retry_errors[1]= ER_NET_READ_INTERRUPTED;
+  slave_transaction_retry_errors[2]= ER_NET_ERROR_ON_WRITE;
+  slave_transaction_retry_errors[3]= ER_NET_WRITE_INTERRUPTED;
+  slave_transaction_retry_errors[4]= ER_LOCK_WAIT_TIMEOUT;
+  slave_transaction_retry_errors[5]= ER_LOCK_DEADLOCK;
+  slave_transaction_retry_errors[6]= ER_CONNECT_TO_FOREIGN_DATA_SOURCE;
+  slave_transaction_retry_errors[7]= 2013; /* CR_SERVER_LOST */
+  slave_transaction_retry_errors[8]= 12701; /* ER_SPIDER_REMOTE_SERVER_GONE_AWAY_NUM */
+
+  /* Add user codes after this */
+  for (p= arg, i= DEFAULT_SLAVE_RETRY_ERRORS; *p; )
+  {
+    if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
+      break;
+    if (err_code > 0)
+      slave_transaction_retry_errors[i++]= (uint) err_code;
+    while (!my_isdigit(system_charset_info,*p) && *p)
+      p++;
+  }
+  slave_transaction_retry_error_length= i;
+
+  make_slave_transaction_retry_errors_printable();
+  DBUG_RETURN(0);
+}
+
+
+int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
+{
+  DBUG_ENTER("terminate_slave_threads");
+
+  if (!mi->inited)
+    DBUG_RETURN(0); /* successfully do nothing */
+  int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
+  int retval= 0;
+  mysql_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
+  mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
+
+  if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
+  {
+    DBUG_PRINT("info",("Terminating SQL thread"));
+    if (mi->using_parallel() && mi->rli.abort_slave && mi->rli.stop_for_until)
+    {
+      mi->rli.stop_for_until= false;
+      mi->rli.parallel.stop_during_until();
+    }
+    else
+      mi->rli.abort_slave=1;
+    if (unlikely((error= terminate_slave_thread(mi->rli.sql_driver_thd,
+                                                sql_lock,
+                                                &mi->rli.stop_cond,
+                                                &mi->rli.slave_running,
+                                                skip_lock))) &&
+                 !force_all)
+      DBUG_RETURN(error);
+    retval= error;
+
+    mysql_mutex_lock(log_lock);
+
+    DBUG_PRINT("info",("Flushing relay-log info file."));
+    if (current_thd)
+      THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
+    if (mi->rli.flush() || my_sync(mi->rli.info_fd, MYF(MY_WME)))
+      retval= ER_ERROR_DURING_FLUSH_LOGS;
+
+    mysql_mutex_unlock(log_lock);
+  }
+  if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
+  {
+    DBUG_PRINT("info",("Terminating IO thread"));
+    mi->abort_slave=1;
+    if (unlikely((error= terminate_slave_thread(mi->io_thd, io_lock,
+                                                &mi->stop_cond,
+                                                &mi->slave_running,
+                                                skip_lock))) &&
+                 !force_all)
+      DBUG_RETURN(error);
+    if (!retval)
+      retval= error;
+
+    mysql_mutex_lock(log_lock);
+
+    DBUG_PRINT("info",("Flushing relay log and master info file."));
+    if (current_thd)
+      THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository);
+    if (likely(mi->fd >= 0))
+    {
+      if (flush_master_info(mi, TRUE, FALSE) || my_sync(mi->fd, MYF(MY_WME)))
+        retval= ER_ERROR_DURING_FLUSH_LOGS;
+    }
+    if (mi->rli.relay_log.is_open() &&
+        my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME)))
+      retval= ER_ERROR_DURING_FLUSH_LOGS;
+
+    mysql_mutex_unlock(log_lock);
+  }
+  DBUG_RETURN(retval);
+}
+
+
+/**
+   Wait for a slave thread to terminate.
+
+   This function is called after requesting the thread to terminate
+   (by setting @c abort_slave member of @c Relay_log_info or @c
+   Master_info structure to 1). Termination of the thread is
+   controlled with the the predicate *slave_running.
+
+   Function will acquire @c term_lock before waiting on the condition
+   unless @c skip_lock is true in which case the mutex should be owned
+   by the caller of this function and will remain acquired after
+   return from the function.
+
+   @param term_lock
+          Associated lock to use when waiting for @c term_cond
+
+   @param term_cond
+          Condition that is signalled when the thread has terminated
+
+   @param slave_running
+          Pointer to predicate to check for slave thread termination
+
+   @param skip_lock
+          If @c true the lock will not be acquired before waiting on
+          the condition. In this case, it is assumed that the calling
+          function acquires the lock before calling this function.
+
+   @retval 0 All OK ER_SLAVE_NOT_RUNNING otherwise.
+
+   @note  If the executing thread has to acquire term_lock (skip_lock
+          is false), the negative running status does not represent
+          any issue therefore no error is reported.
+
+ */
+static int
+terminate_slave_thread(THD *thd,
+                       mysql_mutex_t *term_lock,
+                       mysql_cond_t *term_cond,
+                       volatile uint *slave_running,
+                       bool skip_lock)
+{
+  DBUG_ENTER("terminate_slave_thread");
+  if (!skip_lock)
+  {
+    mysql_mutex_lock(term_lock);
+  }
+  else
+  {
+    mysql_mutex_assert_owner(term_lock);
+  }
+  if (!*slave_running)
+  {
+    if (!skip_lock)
+    {
+      /*
+        if run_lock (term_lock) is acquired locally then either
+        slave_running status is fine
+      */
+      mysql_mutex_unlock(term_lock);
+      DBUG_RETURN(0);
+    }
+    else
+    {
+      DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
+    }
+  }
+  DBUG_ASSERT(thd != 0);
+  THD_CHECK_SENTRY(thd);
+
+  /*
+    Is is critical to test if the slave is running. Otherwise, we might
+    be referening freed memory trying to kick it
+  */
+
+  while (*slave_running)                        // Should always be true
+  {
+    int error __attribute__((unused));
+    DBUG_PRINT("loop", ("killing slave thread"));
+
+    mysql_mutex_lock(&thd->LOCK_thd_kill);
+    mysql_mutex_lock(&thd->LOCK_thd_data);
+#ifndef DONT_USE_THR_ALARM
+    /*
+      Error codes from pthread_kill are:
+      EINVAL: invalid signal number (can't happen)
+      ESRCH: thread already killed (can happen, should be ignored)
+    */
+    int err __attribute__((unused))= pthread_kill(thd->real_id, thr_client_alarm);
+    DBUG_ASSERT(err != EINVAL);
+#endif
+    thd->awake_no_mutex(NOT_KILLED);
+
+    mysql_mutex_unlock(&thd->LOCK_thd_kill);
+    mysql_mutex_unlock(&thd->LOCK_thd_data);
+
+    /*
+      There is a small chance that slave thread might miss the first
+      alarm. To protect againts it, resend the signal until it reacts
+    */
+    struct timespec abstime;
+    set_timespec(abstime,2);
+    error= mysql_cond_timedwait(term_cond, term_lock, &abstime);
+    DBUG_ASSERT(error == ETIMEDOUT || error == 0);
+  }
+
+  DBUG_ASSERT(*slave_running == 0);
+
+  if (!skip_lock)
+    mysql_mutex_unlock(term_lock);
+  DBUG_RETURN(0);
+}
+
+
+int start_slave_thread(
+#ifdef HAVE_PSI_INTERFACE
+                       PSI_thread_key thread_key,
+#endif
+                       pthread_handler h_func, mysql_mutex_t *start_lock,
+                       mysql_mutex_t *cond_lock,
+                       mysql_cond_t *start_cond,
+                       volatile uint *slave_running,
+                       volatile ulong *slave_run_id,
+                       Master_info* mi)
+{
+  pthread_t th;
+  ulong start_id;
+  int error;
+  DBUG_ENTER("start_slave_thread");
+
+  DBUG_ASSERT(mi->inited);
+
+  if (start_lock)
+    mysql_mutex_lock(start_lock);
+  if (!global_system_variables.server_id)
+  {
+    if (start_cond)
+      mysql_cond_broadcast(start_cond);
+    if (start_lock)
+      mysql_mutex_unlock(start_lock);
+    sql_print_error("Server id not set, will not start slave");
+    DBUG_RETURN(ER_BAD_SLAVE);
+  }
+
+  if (*slave_running)
+  {
+    if (start_cond)
+      mysql_cond_broadcast(start_cond);
+    if (start_lock)
+      mysql_mutex_unlock(start_lock);
+    DBUG_RETURN(ER_SLAVE_MUST_STOP);
+  }
+  start_id= *slave_run_id;
+  DBUG_PRINT("info",("Creating new slave thread"));
+  if (unlikely((error= mysql_thread_create(thread_key,
+                                           &th, &connection_attrib, h_func,
+                                           (void*)mi))))
+  {
+    sql_print_error("Can't create slave thread (errno= %d).", error);
+    if (start_lock)
+      mysql_mutex_unlock(start_lock);
+    DBUG_RETURN(ER_SLAVE_THREAD);
+  }
+
+  /*
+    In the following loop we can't check for thd->killed as we have to
+    wait until THD structures for the slave thread are created
+    before we can return.
+    This should be ok as there is no major work done in the slave
+    threads before they signal that we can stop waiting.
+  */
+
+  if (start_cond && cond_lock) // caller has cond_lock
+  {
+    THD* thd = current_thd;
+    while (start_id == *slave_run_id)
+    {
+      DBUG_PRINT("sleep",("Waiting for slave thread to start"));
+      PSI_stage_info saved_stage= {0, "", 0};
+      thd->ENTER_COND(start_cond, cond_lock,
+                      & stage_waiting_for_slave_thread_to_start,
+                      & saved_stage);
+      /*
+        It is not sufficient to test this at loop bottom. We must test
+        it after registering the mutex in enter_cond(). If the kill
+        happens after testing of thd->killed and before the mutex is
+        registered, we could otherwise go waiting though thd->killed is
+        set.
+      */
+      mysql_cond_wait(start_cond, cond_lock);
+      thd->EXIT_COND(& saved_stage);
+      mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
+    }
+  }
+  if (start_lock)
+    mysql_mutex_unlock(start_lock);
+  DBUG_RETURN(0);
+}
+
+
+/*
+  start_slave_threads()
+
+  NOTES
+    SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
+    sense to do that for starting a slave--we always care if it actually
+    started the threads that were not previously running
+*/
+
+int start_slave_threads(THD *thd,
+                        bool need_slave_mutex, bool wait_for_start,
+                        Master_info* mi, const char* master_info_fname,
+                        const char* slave_info_fname, int thread_mask)
+{
+  mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0;
+  mysql_cond_t* cond_io=0, *cond_sql=0;
+  int error=0;
+  const char *errmsg;
+  DBUG_ENTER("start_slave_threads");
+
+  if (need_slave_mutex)
+  {
+    lock_io = &mi->run_lock;
+    lock_sql = &mi->rli.run_lock;
+  }
+  if (wait_for_start)
+  {
+    cond_io = &mi->start_cond;
+    cond_sql = &mi->rli.start_cond;
+    lock_cond_io = &mi->run_lock;
+    lock_cond_sql = &mi->rli.run_lock;
+  }
+
+  /*
+    If we are using GTID and both SQL and IO threads are stopped, then get
+    rid of all relay logs.
+
+    Relay logs are not very useful when using GTID, except as a buffer
+    between the fetch in the IO thread and the apply in SQL thread. However
+    while one of the threads is running, they are in use and cannot be
+    removed.
+  */
+  if (mi->using_gtid != Master_info::USE_GTID_NO &&
+      !mi->slave_running && !mi->rli.slave_running)
+  {
+    /*
+      purge_relay_logs() clears the mi->rli.group_master_log_pos.
+      So save and restore them, like we do in CHANGE MASTER.
+      (We are not going to use them for GTID, but it might be worth to
+      keep them in case connection with GTID fails and user wants to go
+      back and continue with previous old-style replication coordinates).
+    */
+    mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE,
+                                mi->rli.group_master_log_pos);
+    strmake(mi->master_log_name, mi->rli.group_master_log_name,
+            sizeof(mi->master_log_name)-1);
+    purge_relay_logs(&mi->rli, thd, 0, &errmsg);
+    mi->rli.group_master_log_pos= mi->master_log_pos;
+    strmake(mi->rli.group_master_log_name, mi->master_log_name,
+            sizeof(mi->rli.group_master_log_name)-1);
+
+    error= rpl_load_gtid_state(&mi->gtid_current_pos, mi->using_gtid ==
+                                             Master_info::USE_GTID_CURRENT_POS);
+    mi->events_queued_since_last_gtid= 0;
+    mi->gtid_reconnect_event_skip_count= 0;
+
+    mi->rli.restart_gtid_pos.reset();
+  }
+
+  if (likely(!error) && likely((thread_mask & SLAVE_IO)))
+    error= start_slave_thread(
+#ifdef HAVE_PSI_INTERFACE
+                              key_thread_slave_io,
+#endif
+                              handle_slave_io, lock_io, lock_cond_io,
+                              cond_io,
+                              &mi->slave_running, &mi->slave_run_id,
+                              mi);
+  if (likely(!error) && likely(thread_mask & SLAVE_SQL))
+  {
+    error= start_slave_thread(
+#ifdef HAVE_PSI_INTERFACE
+                              key_thread_slave_sql,
+#endif
+                              handle_slave_sql, lock_sql, lock_cond_sql,
+                              cond_sql,
+                              &mi->rli.slave_running, &mi->rli.slave_run_id,
+                              mi);
+    if (unlikely(error))
+      terminate_slave_threads(mi, thread_mask & SLAVE_IO, !need_slave_mutex);
+  }
+  DBUG_RETURN(error);
+}
+
+
+/*
+  Kill slaves preparing for shutdown
+*/
+
+void slave_prepare_for_shutdown()
+{
+  mysql_mutex_lock(&LOCK_active_mi);
+  master_info_index->free_connections();
+  mysql_mutex_unlock(&LOCK_active_mi);
+  // It's safe to destruct worker pool now when
+  // all driver threads are gone.
+  global_rpl_thread_pool.deactivate();
+}
+
+/*
+  Release slave threads at time of executing shutdown.
+*/
+
+void end_slave()
+{
+  DBUG_ENTER("end_slave");
+
+  /*
+    This is called when the server terminates, in close_connections().
+    It terminates slave threads. However, some CHANGE MASTER etc may still be
+    running presently. If a START SLAVE was in progress, the mutex lock below
+    will make us wait until slave threads have started, and START SLAVE
+    returns, then we terminate them here.
+
+    We can also be called by cleanup(), which only happens if some
+    startup parameter to the server was wrong.
+  */
+  mysql_mutex_lock(&LOCK_active_mi);
+  /*
+    master_info_index should not have any threads anymore as they where
+    killed as part of slave_prepare_for_shutdown()
+  */
+  delete master_info_index;
+  master_info_index= 0;
+  active_mi= 0;
+  mysql_mutex_unlock(&LOCK_active_mi);
+
+  global_rpl_thread_pool.destroy();
+  free_all_rpl_filters();
+  DBUG_VOID_RETURN;
+}
+
+static bool io_slave_killed(Master_info* mi)
+{
+  DBUG_ENTER("io_slave_killed");
+
+  DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
+  DBUG_RETURN(mi->abort_slave || mi->io_thd->killed);
+}
+
+/**
+   The function analyzes a possible killed status and makes
+   a decision whether to accept it or not.
+   Normally upon accepting the sql thread goes to shutdown.
+   In the event of deffering decision @rli->last_event_start_time waiting
+   timer is set to force the killed status be accepted upon its expiration.
+
+   @param thd   pointer to a THD instance
+   @param rli   pointer to Relay_log_info instance
+
+   @return TRUE the killed status is recognized, FALSE a possible killed
+           status is deferred.
+*/
+static bool sql_slave_killed(rpl_group_info *rgi)
+{
+  bool ret= FALSE;
+  Relay_log_info *rli= rgi->rli;
+  THD *thd= rgi->thd;
+  DBUG_ENTER("sql_slave_killed");
+
+  DBUG_ASSERT(rli->sql_driver_thd == thd);
+  DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
+  if (rli->sql_driver_thd->killed || rli->abort_slave)
+  {
+    /*
+      The transaction should always be binlogged if OPTION_BINLOG_THIS_TRX is
+      set (it implies that something can not be rolled back). And such
+      case should be regarded similarly as modifing a
+      non-transactional table because retrying of the transaction will
+      lead to an error or inconsistency as well.
+
+      Example: OPTION_BINLOG_THIS_TRX is set if a temporary table is created
+      or dropped.
+
+      Note that transaction.all.modified_non_trans_table may be 1
+      if last statement was a single row transaction without begin/end.
+      Testing this flag must always be done in connection with
+      rli->is_in_group().
+    */
+
+    if ((thd->transaction->all.modified_non_trans_table ||
+         (thd->variables.option_bits & OPTION_BINLOG_THIS_TRX)) &&
+        rli->is_in_group())
+    {
+      char msg_stopped[]=
+        "... Slave SQL Thread stopped with incomplete event group "
+        "having non-transactional changes. "
+        "If the group consists solely of row-based events, you can try "
+        "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
+        "ignores duplicate key, key not found, and similar errors (see "
+        "documentation for details).";
+
+      DBUG_PRINT("info", ("modified_non_trans_table: %d  OPTION_BEGIN: %d  "
+                          "OPTION_BINLOG_THIS_TRX: %d  is_in_group: %d",
+                          thd->transaction->all.modified_non_trans_table,
+                          MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
+                          MY_TEST(thd->variables.option_bits & OPTION_BINLOG_THIS_TRX),
+                          rli->is_in_group()));
+
+      if (rli->abort_slave)
+      {
+        DBUG_PRINT("info",
+                   ("Request to stop slave SQL Thread received while "
+                    "applying a group that has non-transactional "
+                    "changes; waiting for completion of the group ... "));
+
+        /*
+          Slave sql thread shutdown in face of unfinished group
+          modified Non-trans table is handled via a timer. The slave
+          may eventually give out to complete the current group and in
+          that case there might be issues at consequent slave restart,
+          see the error message.  WL#2975 offers a robust solution
+          requiring to store the last exectuted event's coordinates
+          along with the group's coordianates instead of waiting with
+          @c last_event_start_time the timer.
+        */
+
+        if (rgi->last_event_start_time == 0)
+          rgi->last_event_start_time= my_time(0);
+        ret= difftime(my_time(0), rgi->last_event_start_time) <=
+          SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE;
+
+        DBUG_EXECUTE_IF("stop_slave_middle_group", 
+                        DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
+                                        ret= TRUE;);); // time is over
+
+        if (ret == 0)
+        {
+          rli->report(WARNING_LEVEL, 0, rgi->gtid_info(),
+                      "Request to stop slave SQL Thread received while "
+                      "applying a group that has non-transactional "
+                      "changes; waiting for completion of the group ... ");
+        }
+        else
+        {
+          rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(),
+                      ER_THD(thd, ER_SLAVE_FATAL_ERROR), msg_stopped);
+        }
+      }
+      else
+      {
+        ret= TRUE;
+        rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(),
+                    ER_THD(thd, ER_SLAVE_FATAL_ERROR),
+                    msg_stopped);
+      }
+    }
+    else
+    {
+      ret= TRUE;
+    }
+  }
+  if (ret)
+    rgi->last_event_start_time= 0;
+  
+  DBUG_RETURN(ret);
+}
+
+
+/*
+  skip_load_data_infile()
+
+  NOTES
+    This is used to tell a 3.23 master to break send_file()
+*/
+
+void skip_load_data_infile(NET *net)
+{
+  DBUG_ENTER("skip_load_data_infile");
+
+  (void)net_request_file(net, "/dev/null");
+  (void)my_net_read(net);                               // discard response
+  (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
+  DBUG_VOID_RETURN;
+}
+
+
+bool net_request_file(NET* net, const char* fname)
+{
+  DBUG_ENTER("net_request_file");
+  DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
+                                (uchar*) "", 0));
+}
+
+/*
+  From other comments and tests in code, it looks like
+  sometimes Query_log_event and Load_log_event can have db == 0
+  (see rewrite_db() above for example)
+  (cases where this happens are unclear; it may be when the master is 3.23).
+*/
+
+const char *print_slave_db_safe(const char* db)
+{
+  DBUG_ENTER("*print_slave_db_safe");
+
+  DBUG_RETURN((db ? db : ""));
+}
+
+#endif /* HAVE_REPLICATION */
+
+bool Sql_cmd_show_slave_status::execute(THD *thd)
+{
+#ifndef HAVE_REPLICATION
+  my_ok(thd);
+  return false;
+#else
+  DBUG_ENTER("Sql_cmd_show_slave_status::execute");
+  bool res= true;
+
+  /* Accept one of two privileges */
+  if (check_global_access(thd, PRIV_STMT_SHOW_SLAVE_STATUS))
+    goto error;
+  if (is_show_all_slaves_stat())
+  {
+    mysql_mutex_lock(&LOCK_active_mi);
+    res= show_all_master_info(thd);
+    mysql_mutex_unlock(&LOCK_active_mi);
+  }
+  else
+  {
+    LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
+    Master_info *mi;
+    if ((mi= get_master_info(&lex_mi->connection_name,
+                             Sql_condition::WARN_LEVEL_ERROR)))
+    {
+      res= show_master_info(thd, mi, 0);
+      mi->release();
+    }
+  }
+error:
+  DBUG_RETURN(res);
+#endif
+}
+
+int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
+                                 const char *default_val)
+{
+  size_t length;
+  DBUG_ENTER("init_strvar_from_file");
+
+  if ((length=my_b_gets(f,var, max_size)))
+  {
+    char* last_p = var + length -1;
+    if (*last_p == '\n')
+      *last_p = 0; // if we stopped on newline, kill it
+    else
+    {
+      /*
+        If we truncated a line or stopped on last char, remove all chars
+        up to and including newline.
+      */
+      int c;
+      while (((c=my_b_get(f)) != '\n' && c != my_b_EOF)) ;
+    }
+    DBUG_RETURN(0);
+  }
+  else if (default_val)
+  {
+    strmake(var,  default_val, max_size-1);
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(1);
+}
+
+/*
+  when moving these functions to mysys, don't forget to
+  remove slave.cc from libmysqld/CMakeLists.txt
+*/
+int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
+{
+  char buf[32];
+  DBUG_ENTER("init_intvar_from_file");
+
+
+  if (my_b_gets(f, buf, sizeof(buf)))
+  {
+    *var = atoi(buf);
+    DBUG_RETURN(0);
+  }
+  else if (default_val)
+  {
+    *var = default_val;
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(1);
+}
+
+int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val)
+{
+  char buf[16];
+  DBUG_ENTER("init_floatvar_from_file");
+
+
+  if (my_b_gets(f, buf, sizeof(buf)))
+  {
+    if (sscanf(buf, "%f", var) != 1)
+      DBUG_RETURN(1);
+    else
+      DBUG_RETURN(0);
+  }
+  else if (default_val != 0.0)
+  {
+    *var = default_val;
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(1);
+}
+
+
+/**
+   A master info read method
+
+   This function is called from @c init_master_info() along with
+   relatives to restore some of @c active_mi members.
+   Particularly, this function is responsible for restoring
+   IGNORE_SERVER_IDS list of servers whose events the slave is
+   going to ignore (to not log them in the relay log).
+   Items being read are supposed to be decimal output of values of a
+   type shorter or equal of @c long and separated by the single space.
+   It also used to restore DO_DOMAIN_IDS & IGNORE_DOMAIN_IDS lists.
+
+   @param arr         @c DYNAMIC_ARRAY pointer to storage for servers id
+   @param f           @c IO_CACHE pointer to the source file
+
+   @retval 0         All OK
+   @retval non-zero  An error
+*/
+
+int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f)
+{
+  int ret= 0;
+  char buf[16 * (sizeof(long)*4 + 1)]; // static buffer to use most of times
+  char *buf_act= buf; // actual buffer can be dynamic if static is short
+  char *token, *last;
+  uint num_items;     // number of items of `arr'
+  size_t read_size;
+  DBUG_ENTER("init_dynarray_intvar_from_file");
+
+  if ((read_size= my_b_gets(f, buf_act, sizeof(buf))) == 0)
+  {
+    DBUG_RETURN(0);                             // no line in master.info
+  }
+  if (read_size + 1 == sizeof(buf) && buf[sizeof(buf) - 2] != '\n')
+  {
+    /*
+      short read happend; allocate sufficient memory and make the 2nd read
+    */
+    char buf_work[(sizeof(long)*3 + 1)*16];
+    memcpy(buf_work, buf, sizeof(buf_work));
+    num_items= atoi(strtok_r(buf_work, " ", &last));
+    size_t snd_size;
+    /*
+      max size lower bound approximate estimation bases on the formula:
+      (the items number + items themselves) * 
+          (decimal size + space) - 1 + `\n' + '\0'
+    */
+    size_t max_size= (1 + num_items) * (sizeof(long)*3 + 1) + 1;
+    buf_act= (char*) my_malloc(key_memory_Rpl_info_file_buffer, max_size,
+                               MYF(MY_WME));
+    memcpy(buf_act, buf, read_size);
+    snd_size= my_b_gets(f, buf_act + read_size, max_size - read_size);
+    if (snd_size == 0 ||
+        ((snd_size + 1 == max_size - read_size) &&  buf_act[max_size - 2] != '\n'))
+    {
+      /*
+        failure to make the 2nd read or short read again
+      */
+      ret= 1;
+      goto err;
+    }
+  }
+  token= strtok_r(buf_act, " ", &last);
+  if (token == NULL)
+  {
+    ret= 1;
+    goto err;
+  }
+  num_items= atoi(token);
+  for (uint i=0; i < num_items; i++)
+  {
+    token= strtok_r(NULL, " ", &last);
+    if (token == NULL)
+    {
+      ret= 1;
+      goto err;
+    }
+    else
+    {
+      ulong val= atol(token);
+      insert_dynamic(arr, (uchar *) &val);
+    }
+  }
+err:
+  if (buf_act != buf)
+    my_free(buf_act);
+  DBUG_RETURN(ret);
+}
+
+#ifdef HAVE_REPLICATION
+
+/*
+  Check if the error is caused by network.
+  @param[in]   errorno   Number of the error.
+  RETURNS:
+  TRUE         network error
+  FALSE        not network error
+*/
+
+bool is_network_error(uint errorno)
+{ 
+  if (errorno == CR_CONNECTION_ERROR || 
+      errorno == CR_CONN_HOST_ERROR ||
+      errorno == CR_SERVER_GONE_ERROR ||
+      errorno == CR_SERVER_LOST ||
+      errorno == ER_CON_COUNT_ERROR ||
+      errorno == ER_CONNECTION_KILLED ||
+      errorno == ER_NEW_ABORTING_CONNECTION ||
+      errorno == ER_NET_READ_INTERRUPTED ||
+      errorno == ER_SERVER_SHUTDOWN)
+    return TRUE;
+#ifdef WITH_WSREP
+  if (errorno == ER_UNKNOWN_COM_ERROR)
+    return TRUE;
+#endif
+
+  return FALSE;   
+}
+
+
+/*
+  Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
+  relying on the binlog's version. This is not perfect: imagine an upgrade
+  of the master without waiting that all slaves are in sync with the master;
+  then a slave could be fooled about the binlog's format. This is what happens
+  when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
+  slaves are fooled. So we do this only to distinguish between 3.23 and more
+  recent masters (it's too late to change things for 3.23).
+
+  RETURNS
+  0       ok
+  1       error
+  2       transient network problem, the caller should try to reconnect
+*/
+
+static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
+{
+  char err_buff[MAX_SLAVE_ERRMSG], err_buff2[MAX_SLAVE_ERRMSG];
+  const char* errmsg= 0;
+  int err_code= 0;
+  MYSQL_RES *master_res= 0;
+  MYSQL_ROW master_row;
+  uint version= mysql_get_server_version(mysql) / 10000;
+  DBUG_ENTER("get_master_version_and_clock");
+
+  /*
+    Free old description_event_for_queue (that is needed if we are in
+    a reconnection).
+  */
+  delete mi->rli.relay_log.description_event_for_queue;
+  mi->rli.relay_log.description_event_for_queue= 0;
+
+  if (!my_isdigit(&my_charset_bin,*mysql->server_version))
+  {
+    errmsg= err_buff2;
+    snprintf(err_buff2, sizeof(err_buff2),
+             "Master reported unrecognized MariaDB version: %s",
+             mysql->server_version);
+    err_code= ER_SLAVE_FATAL_ERROR;
+    sprintf(err_buff, ER_DEFAULT(err_code), err_buff2);
+  }
+  else
+  {
+    DBUG_EXECUTE_IF("mock_mariadb_primary_v5_in_get_master_version",
+                    version= 5;);
+
+    /*
+      Note the following switch will bug when we have MySQL branch 30 ;)
+    */
+    switch (version) {
+    case 0:
+    case 1:
+    case 2:
+      errmsg= err_buff2;
+      snprintf(err_buff2, sizeof(err_buff2),
+               "Master reported unrecognized MariaDB version: %s",
+               mysql->server_version);
+      err_code= ER_SLAVE_FATAL_ERROR;
+      sprintf(err_buff, ER_DEFAULT(err_code), err_buff2);
+      break;
+    case 3:
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(1, mysql->server_version);
+      break;
+    case 4:
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(3, mysql->server_version);
+      break;
+    default:
+      /*
+        Master is MySQL >=5.0. Give a default Format_desc event, so that we can
+        take the early steps (like tests for "is this a 3.23 master") which we
+        have to take before we receive the real master's Format_desc which will
+        override this one. Note that the Format_desc we create below is garbage
+        (it has the format of the *slave*); it's only good to help know if the
+        master is 3.23, 4.0, etc.
+      */
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(4, mysql->server_version);
+      break;
+    }
+  }
+
+  /*
+     This does not mean that a 5.0 slave will be able to read a 6.0 master; but
+     as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
+     can't read a 6.0 master, this will show up when the slave can't read some
+     events sent by the master, and there will be error messages.
+  */
+
+  if (errmsg)
+    goto err;
+
+  /* as we are here, we tried to allocate the event */
+  if (!mi->rli.relay_log.description_event_for_queue)
+  {
+    errmsg= "default Format_description_log_event";
+    err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
+    sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
+    goto err;
+  }
+
+  /*
+    FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
+    It's necessary to adjust FD_q.(A) at this point because in the following
+    course FD_q is going to be dumped to RL.
+    Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m) 
+    in queue_event and the master's (A) is installed.
+    At one step with the assignment the Relay-Log's checksum alg is set to 
+    a new value: RL.(A) := FD_q.(A). If the slave service is stopped
+    the last time assigned RL.(A) will be passed over to the restarting
+    service (to the current execution point).
+    RL.A is a "codec" to verify checksum in queue_event() almost all the time
+    the first fake Rotate event.
+    Starting from this point IO thread will executes the following checksum
+    warmup sequence  of actions:
+
+    FD_q.A := RL.A,
+    A_m^0 := master.@@global.binlog_checksum,
+    {queue_event(R_f): verifies(R_f, A_m^0)},
+    {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
+                        FD_q := FD_m, RL.A := FD_q.A)}
+
+    See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
+    docs lines (binlog.h).
+    In above A_m^0 - the value of master's
+    @@binlog_checksum determined in the upcoming handshake (stored in
+    mi->checksum_alg_before_fd).
+
+
+    After the warm-up sequence IO gets to "normal" checksum verification mode
+    to use RL.A in 
+    
+    {queue_event(E_m): verifies(E_m, RL.A)}
+
+    until it has received a new FD_m.
+  */
+  mi->rli.relay_log.description_event_for_queue->checksum_alg=
+    mi->rli.relay_log.relay_log_checksum_alg;
+
+  DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF);
+  DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF); 
+  /*
+    Compare the master and slave's clock. Do not die if master's clock is
+    unavailable (very old master not supporting UNIX_TIMESTAMP()?).
+  */
+
+#ifdef ENABLED_DEBUG_SYNC
+  DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
+                  {
+                    const char act[]=
+                      "now "
+                      "wait_for signal.get_unix_timestamp";
+                    DBUG_ASSERT(debug_sync_service);
+                    DBUG_ASSERT(!debug_sync_set_action(current_thd,
+                                                       STRING_WITH_LEN(act)));
+                  };);
+#endif
+
+  master_res= NULL;
+  if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
+      (master_res= mysql_store_result(mysql)) &&
+      (master_row= mysql_fetch_row(master_res)))
+  {
+    mysql_mutex_lock(&mi->data_lock);
+    mi->clock_diff_with_master=
+      (DBUG_IF("negate_clock_diff_with_master") ?
+       0:
+       (long) (time((time_t *) 0) - strtoul(master_row[0], 0, 10)));
+
+    mysql_mutex_unlock(&mi->data_lock);
+  }
+  else if (check_io_slave_killed(mi, NULL))
+    goto slave_killed_err;
+  else if (is_network_error(mysql_errno(mysql)))
+  {
+    mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+               "Get master clock failed with error: %s", mysql_error(mysql));
+    goto network_err;
+  }
+  else 
+  {
+    mysql_mutex_lock(&mi->data_lock);
+    mi->clock_diff_with_master= 0; /* The "most sensible" value */
+    mysql_mutex_unlock(&mi->data_lock);
+    sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
+                      "do not trust column Seconds_Behind_Master of SHOW "
+                      "SLAVE STATUS. Error: %s (%d)",
+                      mysql_error(mysql), mysql_errno(mysql));
+  }
+  if (master_res)
+  {
+    mysql_free_result(master_res);
+    master_res= NULL;
+  }
+
+  /*
+    Check that the master's server id and ours are different. Because if they
+    are equal (which can result from a simple copy of master's datadir to slave,
+    thus copying some my.cnf), replication will work but all events will be
+    skipped.
+    Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
+    master?).
+    Note: we could have put a @@SERVER_ID in the previous SELECT
+    UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
+  */
+#ifdef ENABLED_DEBUG_SYNC
+  DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
+                  {
+                    const char act[]=
+                      "now "
+                      "wait_for signal.get_server_id";
+                    DBUG_ASSERT(debug_sync_service);
+                    DBUG_ASSERT(!debug_sync_set_action(current_thd, 
+                                                       STRING_WITH_LEN(act)));
+                  };);
+#endif
+  master_res= NULL;
+  master_row= NULL;
+  if (!mysql_real_query(mysql,
+                        STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
+      (master_res= mysql_store_result(mysql)) &&
+      (master_row= mysql_fetch_row(master_res)))
+  {
+    if ((global_system_variables.server_id ==
+             (mi->master_id= strtoul(master_row[1], 0, 10))) &&
+        !mi->rli.replicate_same_server_id)
+    {
+      errmsg= "The slave I/O thread stops because master and slave have equal \
+MariaDB server ids; these ids must be different for replication to work (or \
+the --replicate-same-server-id option must be used on slave but this does \
+not always make sense; please check the manual before using it).";
+      err_code= ER_SLAVE_FATAL_ERROR;
+      sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
+      goto err;
+    }
+  }
+  else if (mysql_errno(mysql))
+  {
+    if (check_io_slave_killed(mi, NULL))
+      goto slave_killed_err;
+    else if (is_network_error(mysql_errno(mysql)))
+    {
+      mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                 "Get master SERVER_ID failed with error: %s", mysql_error(mysql));
+      goto network_err;
+    }
+    /* Fatal error */
+    errmsg= "The slave I/O thread stops because a fatal error is encountered \
+when it try to get the value of SERVER_ID variable from master.";
+    err_code= mysql_errno(mysql);
+    sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+    goto err;
+  }
+  else if (!master_row && master_res)
+  {
+    mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL,
+               "Unknown system variable 'SERVER_ID' on master, \
+maybe it is a *VERY OLD MASTER*.");
+  }
+  if (master_res)
+  {
+    mysql_free_result(master_res);
+    master_res= NULL;
+  }
+  if (mi->master_id == 0 && mi->ignore_server_ids.elements > 0)
+  {
+    errmsg= "Slave configured with server id filtering could not detect the master server id.";
+    err_code= ER_SLAVE_FATAL_ERROR;
+    sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
+    goto err;
+  }
+
+  /*
+    Check that the master's global character_set_server and ours are the same.
+    Not fatal if query fails (old master?).
+    Note that we don't check for equality of global character_set_client and
+    collation_connection (neither do we prevent their setting in
+    set_var.cc). That's because from what I (Guilhem) have tested, the global
+    values of these 2 are never used (new connections don't use them).
+    We don't test equality of global collation_database either as it's is
+    going to be deprecated (made read-only) in 4.1 very soon.
+    The test is only relevant if master < 5.0.3 (we'll test only if it's older
+    than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
+    charset info in each binlog event.
+    We don't do it for 3.23 because masters <3.23.50 hang on
+    SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
+    test only if master is 4.x.
+  */
+
+  /* redundant with rest of code but safer against later additions */
+  if (version == 3)
+    goto err;
+
+  if (version == 4)
+  {
+    master_res= NULL;
+    if (!mysql_real_query(mysql,
+                          STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
+        (master_res= mysql_store_result(mysql)) &&
+        (master_row= mysql_fetch_row(master_res)))
+    {
+      if (strcmp(master_row[0],
+                 global_system_variables.collation_server->coll_name.str))
+      {
+        errmsg= "The slave I/O thread stops because master and slave have \
+different values for the COLLATION_SERVER global variable. The values must \
+be equal for the Statement-format replication to work";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
+        goto err;
+      }
+    }
+    else if (check_io_slave_killed(mi, NULL))
+      goto slave_killed_err;
+    else if (is_network_error(mysql_errno(mysql)))
+    {
+      mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                 "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql));
+      goto network_err;
+    }
+    else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE)
+    {
+      /* Fatal error */
+      errmsg= "The slave I/O thread stops because a fatal error is encountered \
+when it try to get the value of COLLATION_SERVER global variable from master.";
+      err_code= mysql_errno(mysql);
+      sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+      goto err;
+    }
+    else
+      mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL,
+                 "Unknown system variable 'COLLATION_SERVER' on master, \
+maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \
+inconsistency if replicated data deals with collation.");
+
+    if (master_res)
+    {
+      mysql_free_result(master_res);
+      master_res= NULL;
+    }
+  }
+
+  /*
+    Perform analogous check for time zone. Theoretically we also should
+    perform check here to verify that SYSTEM time zones are the same on
+    slave and master, but we can't rely on value of @@system_time_zone
+    variable (it is time zone abbreviation) since it determined at start
+    time and so could differ for slave and master even if they are really
+    in the same system time zone. So we are omiting this check and just
+    relying on documentation. Also according to Monty there are many users
+    who are using replication between servers in various time zones. Hence
+    such check will broke everything for them. (And now everything will
+    work for them because by default both their master and slave will have
+    'SYSTEM' time zone).
+    This check is only necessary for 4.x masters (and < 5.0.4 masters but
+    those were alpha).
+  */
+  if (version == 4)
+  {
+    master_res= NULL;
+    if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
+        (master_res= mysql_store_result(mysql)) &&
+        (master_row= mysql_fetch_row(master_res)))
+    {
+      if (strcmp(master_row[0],
+                 global_system_variables.time_zone->get_name()->ptr()))
+      {
+        errmsg= "The slave I/O thread stops because master and slave have \
+different values for the TIME_ZONE global variable. The values must \
+be equal for the Statement-format replication to work";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
+        goto err;
+      }
+    }
+    else if (check_io_slave_killed(mi, NULL))
+      goto slave_killed_err;
+    else if (is_network_error(err_code= mysql_errno(mysql)))
+    {
+      mi->report(ERROR_LEVEL, err_code, NULL,
+                 "Get master TIME_ZONE failed with error: %s",
+                 mysql_error(mysql));
+      goto network_err;
+    }
+    else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
+    {
+      /* We use ERROR_LEVEL to get the error logged to file */
+      mi->report(ERROR_LEVEL, err_code, NULL,
+
+                 "MariaDB master doesn't have a TIME_ZONE variable. Note that"
+                 "if your timezone is not same between master and slave, your "
+                 "slave may get wrong data into timestamp columns");
+    }
+    else
+    {
+      /* Fatal error */
+      errmsg= "The slave I/O thread stops because a fatal error is encountered \
+when it try to get the value of TIME_ZONE global variable from master.";
+      sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+      goto err;
+    }
+    if (master_res)
+    {
+      mysql_free_result(master_res);
+      master_res= NULL;
+    }
+  }
+
+  if (mi->heartbeat_period != 0.0)
+  {
+    const char query_format[]= "SET @master_heartbeat_period= %llu";
+    char query[sizeof(query_format) + 32];
+    /* 
+       the period is an ulonglong of nano-secs. 
+    */
+    my_snprintf(query, sizeof(query), query_format,
+                (ulonglong) (mi->heartbeat_period*1000000000UL));
+
+    DBUG_EXECUTE_IF("simulate_slave_heartbeat_network_error",
+                    { static ulong dbug_count= 0;
+                      if (++dbug_count < 3)
+                        goto heartbeat_network_error;
+                    });
+    if (mysql_real_query(mysql, query, (ulong)strlen(query)))
+    {
+      if (check_io_slave_killed(mi, NULL))
+        goto slave_killed_err;
+
+      if (is_network_error(mysql_errno(mysql)))
+      {
+      IF_DBUG(heartbeat_network_error: , )
+        mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                   "SET @master_heartbeat_period to master failed with error: %s",
+                   mysql_error(mysql));
+        mysql_free_result(mysql_store_result(mysql));
+        goto network_err;
+      }
+      else
+      {
+        /* Fatal error */
+        errmsg= "The slave I/O thread stops because a fatal error is encountered "
+          "when it tries to SET @master_heartbeat_period on master.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        mysql_free_result(mysql_store_result(mysql));
+        goto err;
+      }
+    }
+    mysql_free_result(mysql_store_result(mysql));
+  }
+ 
+  /*
+    Querying if master is capable to checksum and notifying it about own
+    CRC-awareness. The master's side instant value of @@global.binlog_checksum 
+    is stored in the dump thread's uservar area as well as cached locally
+    to become known in consensus by master and slave.
+  */
+  DBUG_EXECUTE_IF("simulate_slave_unaware_checksum",
+                  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
+                  goto past_checksum;);
+  {
+    int rc;
+    const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
+    master_res= NULL;
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
+    /*
+      @c checksum_alg_before_fd is queried from master in this block.
+      If master is old checksum-unaware the value stays undefined.
+      Once the first FD will be received its alg descriptor will replace
+      the being queried one.
+    */
+    rc= mysql_real_query(mysql, query,(ulong)strlen(query));
+    if (rc != 0)
+    {
+      if (check_io_slave_killed(mi, NULL))
+        goto slave_killed_err;
+
+      if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
+      {
+        /* Ignore this expected error if not a high error level */
+        if (global_system_variables.log_warnings > 1)
+        {
+          // this is tolerable as OM -> NS is supported
+          mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                     "Notifying master by %s failed with "
+                     "error: %s", query, mysql_error(mysql));
+        }
+      }
+      else
+      {
+        if (is_network_error(mysql_errno(mysql)))
+        {
+          mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                     "Notifying master by %s failed with "
+                     "error: %s", query, mysql_error(mysql));
+          mysql_free_result(mysql_store_result(mysql));
+          goto network_err;
+        }
+        else
+        {
+          errmsg= "The slave I/O thread stops because a fatal error is encountered "
+            "when it tried to SET @master_binlog_checksum on master.";
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+          mysql_free_result(mysql_store_result(mysql));
+          goto err;
+        }
+      }
+    }
+    else
+    {
+      mysql_free_result(mysql_store_result(mysql));
+      if (!mysql_real_query(mysql,
+                            STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
+          (master_res= mysql_store_result(mysql)) &&
+          (master_row= mysql_fetch_row(master_res)) &&
+          (master_row[0] != NULL))
+      {
+        mi->checksum_alg_before_fd= (enum_binlog_checksum_alg)
+          (find_type(master_row[0], &binlog_checksum_typelib, 1) - 1);
+        // valid outcome is either of
+        DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
+                    mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
+      }
+      else if (check_io_slave_killed(mi, NULL))
+        goto slave_killed_err;
+      else if (is_network_error(mysql_errno(mysql)))
+      {
+        mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                   "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        errmsg= "The slave I/O thread stops because a fatal error is encountered "
+          "when it tried to SELECT @master_binlog_checksum.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        mysql_free_result(mysql_store_result(mysql));
+        goto err;
+      }
+    }
+    if (master_res)
+    {
+      mysql_free_result(master_res);
+      master_res= NULL;
+    }
+  }
+
+#ifndef DBUG_OFF
+past_checksum:
+#endif
+
+  /*
+    Request the master to filter away events with the @@skip_replication flag
+    set, if we are running with
+    --replicate-events-marked-for-skip=FILTER_ON_MASTER.
+  */
+  if (opt_replicate_events_marked_for_skip == RPL_SKIP_FILTER_ON_MASTER)
+  {
+    if (unlikely(mysql_real_query(mysql,
+                                  STRING_WITH_LEN("SET skip_replication=1"))))
+    {
+      err_code= mysql_errno(mysql);
+      if (is_network_error(err_code))
+      {
+        mi->report(ERROR_LEVEL, err_code, NULL,
+                   "Setting master-side filtering of @@skip_replication failed "
+                   "with error: %s", mysql_error(mysql));
+        goto network_err;
+      }
+      else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
+      {
+        /*
+          The master is older than the slave and does not support the
+          @@skip_replication feature.
+          This is not a problem, as such master will not generate events with
+          the @@skip_replication flag set in the first place. We will still
+          do slave-side filtering of such events though, to handle the (rare)
+          case of downgrading a master and receiving old events generated from
+          before the downgrade with the @@skip_replication flag set.
+        */
+        DBUG_PRINT("info", ("Old master does not support master-side filtering "
+                            "of @@skip_replication events."));
+      }
+      else
+      {
+        /* Fatal error */
+        errmsg= "The slave I/O thread stops because a fatal error is "
+          "encountered when it tries to request filtering of events marked "
+          "with the @@skip_replication flag.";
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        goto err;
+      }
+    }
+  }
+
+  /* Announce MariaDB slave capabilities. */
+  DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_capability;);
+  {
+    int rc= DBUG_IF("simulate_slave_capability_old_53") ?
+        mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
+                         STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))) :
+        mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
+                         STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE)));
+    if (unlikely(rc))
+    {
+      err_code= mysql_errno(mysql);
+      if (is_network_error(err_code))
+      {
+        mi->report(ERROR_LEVEL, err_code, NULL,
+                   "Setting @mariadb_slave_capability failed with error: %s",
+                   mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        /* Fatal error */
+        errmsg= "The slave I/O thread stops because a fatal error is "
+          "encountered when it tries to set @mariadb_slave_capability.";
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        goto err;
+      }
+    }
+  }
+#ifndef DBUG_OFF
+after_set_capability:
+#endif
+
+  if (!(mi->master_supports_gtid= version >= 10))
+  {
+    sql_print_information(
+        "Slave I/O thread: Falling back to Using_Gtid=No because "
+        "master does not support GTIDs");
+    mi->using_gtid= Master_info::USE_GTID_NO;
+  }
+
+  if (mi->using_gtid != Master_info::USE_GTID_NO)
+  {
+    /* Request dump to start from slave replication GTID state. */
+    int rc;
+    char str_buf[256];
+    String query_str(str_buf, sizeof(str_buf), system_charset_info);
+    query_str.length(0);
+
+    /*
+      Read the master @@GLOBAL.gtid_domain_id variable.
+      This is mostly to check that master is GTID aware, but we could later
+      perhaps use it to check that different multi-source masters are correctly
+      configured with distinct domain_id.
+    */
+    if (mysql_real_query(mysql,
+                         STRING_WITH_LEN("SELECT @@GLOBAL.gtid_domain_id")) ||
+        !(master_res= mysql_store_result(mysql)) ||
+        !(master_row= mysql_fetch_row(master_res)))
+    {
+      err_code= mysql_errno(mysql);
+      if (is_network_error(err_code))
+      {
+        mi->report(ERROR_LEVEL, err_code, NULL,
+                   "Get master @@GLOBAL.gtid_domain_id failed with error: %s",
+                   mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        errmsg= "The slave I/O thread stops because master does not support "
+          "MariaDB global transaction id. A fatal error is encountered when "
+          "it tries to SELECT @@GLOBAL.gtid_domain_id.";
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        goto err;
+      }
+    }
+    mysql_free_result(master_res);
+    master_res= NULL;
+
+    query_str.append(STRING_WITH_LEN("SET @slave_connect_state='"),
+                     system_charset_info);
+    if (mi->gtid_current_pos.append_to_string(&query_str))
+    {
+      err_code= ER_OUTOFMEMORY;
+      errmsg= "The slave I/O thread stops because a fatal out-of-memory "
+        "error is encountered when it tries to compute @slave_connect_state.";
+      sprintf(err_buff, "%s Error: Out of memory", errmsg);
+      goto err;
+    }
+    query_str.append(STRING_WITH_LEN("'"), system_charset_info);
+
+    rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
+    if (unlikely(rc))
+    {
+      err_code= mysql_errno(mysql);
+      if (is_network_error(err_code))
+      {
+        mi->report(ERROR_LEVEL, err_code, NULL,
+                   "Setting @slave_connect_state failed with error: %s",
+                   mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        /* Fatal error */
+        errmsg= "The slave I/O thread stops because a fatal error is "
+          "encountered when it tries to set @slave_connect_state.";
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        goto err;
+      }
+    }
+
+    query_str.length(0);
+    if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_strict_mode="),
+                         system_charset_info) ||
+        query_str.append_ulonglong(opt_gtid_strict_mode != false))
+    {
+      err_code= ER_OUTOFMEMORY;
+      errmsg= "The slave I/O thread stops because a fatal out-of-memory "
+        "error is encountered when it tries to set @slave_gtid_strict_mode.";
+      sprintf(err_buff, "%s Error: Out of memory", errmsg);
+      goto err;
+    }
+
+    rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
+    if (unlikely(rc))
+    {
+      err_code= mysql_errno(mysql);
+      if (is_network_error(err_code))
+      {
+        mi->report(ERROR_LEVEL, err_code, NULL,
+                   "Setting @slave_gtid_strict_mode failed with error: %s",
+                   mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        /* Fatal error */
+        errmsg= "The slave I/O thread stops because a fatal error is "
+          "encountered when it tries to set @slave_gtid_strict_mode.";
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        goto err;
+      }
+    }
+
+    query_str.length(0);
+    if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_ignore_duplicates="),
+                         system_charset_info) ||
+        query_str.append_ulonglong(opt_gtid_ignore_duplicates != false))
+    {
+      err_code= ER_OUTOFMEMORY;
+      errmsg= "The slave I/O thread stops because a fatal out-of-memory error "
+        "is encountered when it tries to set @slave_gtid_ignore_duplicates.";
+      sprintf(err_buff, "%s Error: Out of memory", errmsg);
+      goto err;
+    }
+
+    rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
+    if (unlikely(rc))
+    {
+      err_code= mysql_errno(mysql);
+      if (is_network_error(err_code))
+      {
+        mi->report(ERROR_LEVEL, err_code, NULL,
+                   "Setting @slave_gtid_ignore_duplicates failed with "
+                   "error: %s", mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        /* Fatal error */
+        errmsg= "The slave I/O thread stops because a fatal error is "
+          "encountered when it tries to set @slave_gtid_ignore_duplicates.";
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        goto err;
+      }
+    }
+
+    if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID)
+    {
+      query_str.length(0);
+      query_str.append(STRING_WITH_LEN("SET @slave_until_gtid='"),
+                       system_charset_info);
+      if (mi->rli.until_gtid_pos.append_to_string(&query_str))
+      {
+        err_code= ER_OUTOFMEMORY;
+        errmsg= "The slave I/O thread stops because a fatal out-of-memory "
+          "error is encountered when it tries to compute @slave_until_gtid.";
+        sprintf(err_buff, "%s Error: Out of memory", errmsg);
+        goto err;
+      }
+      query_str.append(STRING_WITH_LEN("'"), system_charset_info);
+
+      rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
+      if (unlikely(rc))
+      {
+        err_code= mysql_errno(mysql);
+        if (is_network_error(err_code))
+        {
+          mi->report(ERROR_LEVEL, err_code, NULL,
+                     "Setting @slave_until_gtid failed with error: %s",
+                     mysql_error(mysql));
+          goto network_err;
+        }
+        else
+        {
+          /* Fatal error */
+          errmsg= "The slave I/O thread stops because a fatal error is "
+            "encountered when it tries to set @slave_until_gtid.";
+          sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+          goto err;
+        }
+      }
+    }
+  }
+  else
+  {
+    /*
+      If we are not using GTID to connect this time, then instead request
+      the corresponding GTID position from the master, so that the user
+      can reconnect the next time using MASTER_GTID_POS=AUTO.
+    */
+    char quote_buf[2*sizeof(mi->master_log_name)+1];
+    char str_buf[28+2*sizeof(mi->master_log_name)+10];
+    String query(str_buf, sizeof(str_buf), system_charset_info);
+    size_t quote_length;
+    my_bool overflow;
+    query.length(0);
+
+    query.append(STRING_WITH_LEN("SELECT binlog_gtid_pos('"));
+    quote_length= escape_quotes_for_mysql(&my_charset_bin, quote_buf,
+                                          sizeof(quote_buf),
+                                          mi->master_log_name,
+                                          strlen(mi->master_log_name),
+                                          &overflow);
+    query.append(quote_buf, quote_length);
+    query.append(STRING_WITH_LEN("',"));
+    query.append_ulonglong(mi->master_log_pos);
+    query.append(')');
+
+    if (!mysql_real_query(mysql, query.c_ptr_safe(), query.length()) &&
+        (master_res= mysql_store_result(mysql)) &&
+        (master_row= mysql_fetch_row(master_res)) &&
+        (master_row[0] != NULL))
+    {
+      rpl_global_gtid_slave_state->load(mi->io_thd, master_row[0],
+                                        strlen(master_row[0]), false, false);
+    }
+    else if (check_io_slave_killed(mi, NULL))
+      goto slave_killed_err;
+    else if (is_network_error(mysql_errno(mysql)))
+    {
+      mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
+                 "Get master GTID position failed with error: %s", mysql_error(mysql));
+      goto network_err;
+    }
+    else
+    {
+      /*
+        ToDo: If the master does not have the binlog_gtid_pos() function, it
+        just means that it is an old master with no GTID support, so we should
+        do nothing.
+
+        However, if binlog_gtid_pos() exists, but fails or returns NULL, then
+        it means that the requested position is not valid. We could use this
+        to catch attempts to replicate from within the middle of an event,
+        avoiding strange failures or possible corruption.
+      */
+    }
+    if (master_res)
+    {
+      mysql_free_result(master_res);
+      master_res= NULL;
+    }
+  }
+
+err:
+  if (errmsg)
+  {
+    if (master_res)
+      mysql_free_result(master_res);
+    DBUG_ASSERT(err_code != 0);
+    mi->report(ERROR_LEVEL, err_code, NULL, "%s", err_buff);
+    DBUG_RETURN(1);
+  }
+
+  DBUG_RETURN(0);
+
+network_err:
+  if (master_res)
+    mysql_free_result(master_res);
+  DBUG_RETURN(2);
+
+slave_killed_err:
+  if (master_res)
+    mysql_free_result(master_res);
+  DBUG_RETURN(2);
+}
+
+
+static bool wait_for_relay_log_space(Relay_log_info* rli)
+{
+  bool slave_killed=0;
+  bool ignore_log_space_limit;
+  Master_info* mi = rli->mi;
+  PSI_stage_info old_stage;
+  THD* thd = mi->io_thd;
+  DBUG_ENTER("wait_for_relay_log_space");
+
+  mysql_mutex_lock(&rli->log_space_lock);
+  thd->ENTER_COND(&rli->log_space_cond,
+                  &rli->log_space_lock,
+                  &stage_waiting_for_relay_log_space,
+                  &old_stage);
+  while (rli->log_space_limit < rli->log_space_total &&
+         !(slave_killed=io_slave_killed(mi)) &&
+         !rli->ignore_log_space_limit)
+    mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
+
+  ignore_log_space_limit= rli->ignore_log_space_limit;
+  rli->ignore_log_space_limit= 0;
+
+  thd->EXIT_COND(&old_stage);
+
+  /* 
+    Makes the IO thread read only one event at a time
+    until the SQL thread is able to purge the relay 
+    logs, freeing some space.
+
+    Therefore, once the SQL thread processes this next 
+    event, it goes to sleep (no more events in the queue),
+    sets ignore_log_space_limit=true and wakes the IO thread. 
+    However, this event may have been enough already for 
+    the SQL thread to purge some log files, freeing 
+    rli->log_space_total .
+
+    This guarantees that the SQL and IO thread move
+    forward only one event at a time (to avoid deadlocks), 
+    when the relay space limit is reached. It also 
+    guarantees that when the SQL thread is prepared to
+    rotate (to be able to purge some logs), the IO thread
+    will know about it and will rotate.
+
+    NOTE: The ignore_log_space_limit is only set when the SQL
+          thread sleeps waiting for events.
+
+   */
+
+  if (ignore_log_space_limit)
+  {
+#ifndef DBUG_OFF
+    {
+      DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu "
+                          "ignore_log_space_limit=%d "
+                          "sql_force_rotate_relay=%d", 
+                        rli->log_space_limit, uint64(rli->log_space_total),
+                        (int) rli->ignore_log_space_limit,
+                        (int) rli->sql_force_rotate_relay));
+    }
+#endif
+    if (rli->sql_force_rotate_relay)
+    {
+      mysql_mutex_lock(&mi->data_lock);
+      rotate_relay_log(rli->mi);
+      mysql_mutex_unlock(&mi->data_lock);
+      rli->sql_force_rotate_relay= false;
+    }
+  }
+
+  DBUG_RETURN(slave_killed);
+}
+
+
+/*
+  Builds a Rotate from the ignored events' info and writes it to relay log.
+
+  SYNOPSIS
+  write_ignored_events_info_to_relay_log()
+    thd             pointer to I/O thread's thd
+    mi
+
+  DESCRIPTION
+    Slave I/O thread, going to die, must leave a durable trace of the
+    ignored events' end position for the use of the slave SQL thread, by
+    calling this function. Only that thread can call it (see assertion).
+ */
+static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
+{
+  Relay_log_info *rli= &mi->rli;
+  mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
+  DBUG_ENTER("write_ignored_events_info_to_relay_log");
+
+  DBUG_ASSERT(thd == mi->io_thd);
+  mysql_mutex_lock(log_lock);
+  if (rli->ign_master_log_name_end[0] || rli->ign_gtids.count())
+  {
+    Rotate_log_event *rev= NULL;
+    Gtid_list_log_event *glev= NULL;
+    if (rli->ign_master_log_name_end[0])
+    {
+      rev= new Rotate_log_event(rli->ign_master_log_name_end,
+                                0, rli->ign_master_log_pos_end,
+                                Rotate_log_event::DUP_NAME);
+      rli->ign_master_log_name_end[0]= 0;
+      if (unlikely(!(bool)rev))
+        mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL,
+                   ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE),
+                   "Rotate_event (out of memory?),"
+                   " SHOW SLAVE STATUS may be inaccurate");
+    }
+    if (rli->ign_gtids.count())
+    {
+      DBUG_ASSERT(!rli->is_in_group());         // Ensure no active transaction
+      glev= new Gtid_list_log_event(&rli->ign_gtids,
+                                    Gtid_list_log_event::FLAG_IGN_GTIDS);
+      rli->ign_gtids.reset();
+      if (unlikely(!(bool)glev))
+        mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL,
+                   ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE),
+                   "Gtid_list_event (out of memory?),"
+                   " gtid_slave_pos may be inaccurate");
+    }
+
+    /* Can unlock before writing as slave SQL thd will soon see our event. */
+    mysql_mutex_unlock(log_lock);
+    if (rev)
+    {
+      DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
+      rev->server_id= 0; // don't be ignored by slave SQL thread
+      if (unlikely(rli->relay_log.append(rev)))
+        mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
+                   ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
+                   "failed to write a Rotate event"
+                   " to the relay log, SHOW SLAVE STATUS may be"
+                   " inaccurate");
+      delete rev;
+    }
+    if (glev)
+    {
+      DBUG_PRINT("info",("writing a Gtid_list event to track down ignored events"));
+      glev->server_id= 0; // don't be ignored by slave SQL thread
+      glev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos
+      if (unlikely(rli->relay_log.append(glev)))
+        mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
+                   ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
+                   "failed to write a Gtid_list event to the relay log, "
+                   "gtid_slave_pos may be inaccurate");
+      delete glev;
+    }
+    if (likely (rev || glev))
+    {
+      rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+      if (flush_master_info(mi, TRUE, TRUE))
+        sql_print_error("Failed to flush master info file");
+    }
+  }
+  else
+    mysql_mutex_unlock(log_lock);
+  DBUG_VOID_RETURN;
+}
+
+
+int register_slave_on_master(MYSQL* mysql, Master_info *mi,
+                             bool *suppress_warnings)
+{
+  uchar buf[1024], *pos= buf;
+  size_t report_host_len=0, report_user_len=0, report_password_len=0;
+  DBUG_ENTER("register_slave_on_master");
+
+  *suppress_warnings= FALSE;
+  if (report_host)
+    report_host_len= strlen(report_host);
+  if (report_host_len > HOSTNAME_LENGTH)
+  {
+    sql_print_warning("The length of report_host is %zu. "
+                      "It is larger than the max length(%d), so this "
+                      "slave cannot be registered to the master.",
+                      report_host_len, HOSTNAME_LENGTH);
+    DBUG_RETURN(0);
+  }
+
+  if (report_user)
+    report_user_len= strlen(report_user);
+  if (report_user_len > USERNAME_LENGTH)
+  {
+    sql_print_warning("The length of report_user is %zu. "
+                      "It is larger than the max length(%d), so this "
+                      "slave cannot be registered to the master.",
+                      report_user_len, USERNAME_LENGTH);
+    DBUG_RETURN(0);
+  }
+
+  if (report_password)
+    report_password_len= strlen(report_password);
+  if (report_password_len > MAX_PASSWORD_LENGTH)
+  {
+    sql_print_warning("The length of report_password is %zu. "
+                      "It is larger than the max length(%d), so this "
+                      "slave cannot be registered to the master.",
+                      report_password_len, MAX_PASSWORD_LENGTH);
+    DBUG_RETURN(0);
+  }
+
+  int4store(pos, global_system_variables.server_id); pos+= 4;
+  pos= net_store_data(pos, (uchar*) report_host, report_host_len);
+  pos= net_store_data(pos, (uchar*) report_user, report_user_len);
+  pos= net_store_data(pos, (uchar*) report_password, report_password_len);
+  int2store(pos, (uint16) report_port); pos+= 2;
+  /* 
+    Fake rpl_recovery_rank, which was removed in BUG#13963,
+    so that this server can register itself on old servers,
+    see BUG#49259.
+   */
+  int4store(pos, /* rpl_recovery_rank */ 0);    pos+= 4;
+  /* The master will fill in master_id */
+  int4store(pos, 0);                    pos+= 4;
+
+  if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (ulong) (pos- buf), 0))
+  {
+    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
+    {
+      *suppress_warnings= TRUE;                 // Suppress reconnect warning
+    }
+    else if (!check_io_slave_killed(mi, NULL))
+    {
+      char buf[256];
+      my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql), 
+                  mysql_errno(mysql));
+      mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL,
+                 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
+    }
+    DBUG_RETURN(1);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Execute a SHOW SLAVE STATUS statement.
+
+  @param thd Pointer to THD object for the client thread executing the
+  statement.
+
+  @param mi Pointer to Master_info object for the IO thread.
+
+  @retval FALSE success
+  @retval TRUE failure
+*/
+
+bool show_master_info(THD *thd, Master_info *mi, bool full)
+{
+  DBUG_ENTER("show_master_info");
+  String gtid_pos;
+  List field_list;
+
+  if (full && rpl_global_gtid_slave_state->tostring(>id_pos, NULL, 0))
+    DBUG_RETURN(TRUE);
+  show_master_info_get_fields(thd, &field_list, full, gtid_pos.length());
+  if (thd->protocol->send_result_set_metadata(&field_list,
+                       Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+    DBUG_RETURN(TRUE);
+  if (send_show_master_info_data(thd, mi, full, >id_pos))
+    DBUG_RETURN(TRUE);
+  my_eof(thd);
+  DBUG_RETURN(FALSE);
+}
+
+void show_master_info_get_fields(THD *thd, List *field_list,
+                                 bool full, size_t gtid_pos_length)
+{
+  Master_info *mi;
+  MEM_ROOT *mem_root= thd->mem_root;
+  DBUG_ENTER("show_master_info_get_fields");
+
+  if (full)
+  {
+    field_list->push_back(new (mem_root)
+                          Item_empty_string(thd, "Connection_name",
+                                            MAX_CONNECTION_NAME),
+                          mem_root);
+    field_list->push_back(new (mem_root)
+                          Item_empty_string(thd, "Slave_SQL_State", 30),
+                          mem_root);
+  }
+
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Slave_IO_State", 30),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_Host", sizeof(mi->host)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_User", sizeof(mi->user)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Master_Port", 7, MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Connect_Retry", 10,
+                                        MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_Log_File", FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Read_Master_Log_Pos", 10,
+                                        MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Relay_Log_File", FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Relay_Log_Pos", 10,
+                                        MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Relay_Master_Log_File",
+                                          FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Slave_IO_Running", 3),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Slave_SQL_Running", 3),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Rewrite_DB", 23),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Do_DB", 20),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Ignore_DB", 20),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Do_Table", 20),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Ignore_Table", 23),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Wild_Do_Table", 24),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Wild_Ignore_Table",
+                                          28),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Last_Errno", 4, MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Last_Error", 20),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Skip_Counter", 10,
+                                        MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Exec_Master_Log_Pos", 10,
+                                        MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Relay_Log_Space", 10,
+                                        MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Until_Condition", 6),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Until_Log_File", FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Until_Log_Pos", 10,
+                                        MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Allowed", 7),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_CA_File",
+                                          sizeof(mi->ssl_ca)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_CA_Path",
+                                          sizeof(mi->ssl_capath)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Cert",
+                                          sizeof(mi->ssl_cert)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Cipher",
+                                          sizeof(mi->ssl_cipher)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Key",
+                                          sizeof(mi->ssl_key)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Seconds_Behind_Master", 10,
+                                        MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Verify_Server_Cert",
+                                          3),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Last_IO_Errno", 4,
+                                        MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Last_IO_Error", 20),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Last_SQL_Errno", 4,
+                                        MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Last_SQL_Error", 20),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Ignore_Server_Ids",
+                                          FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "Master_Server_Id", sizeof(ulong),
+                                            MYSQL_TYPE_LONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Crl",
+                                          sizeof(mi->ssl_crl)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Master_SSL_Crlpath",
+                                          sizeof(mi->ssl_crlpath)),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Using_Gtid",
+                                          sizeof("Current_Pos")-1),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Gtid_IO_Pos", 30),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Do_Domain_Ids",
+                                          FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Replicate_Ignore_Domain_Ids",
+                                          FN_REFLEN),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Parallel_Mode",
+                                          sizeof("conservative")-1),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "SQL_Delay", 10,
+                                        MYSQL_TYPE_LONG));
+  field_list->push_back(new (mem_root)
+                        Item_return_int(thd, "SQL_Remaining_Delay", 8,
+                                        MYSQL_TYPE_LONG));
+  field_list->push_back(new (mem_root)
+                        Item_empty_string(thd, "Slave_SQL_Running_State",
+                                          20));
+  field_list->push_back(new (mem_root)
+                       Item_return_int(thd, "Slave_DDL_Groups", 20,
+                                       MYSQL_TYPE_LONGLONG),
+                       mem_root);
+  field_list->push_back(new (mem_root)
+                       Item_return_int(thd, "Slave_Non_Transactional_Groups", 20,
+                                       MYSQL_TYPE_LONGLONG),
+                        mem_root);
+  field_list->push_back(new (mem_root)
+                       Item_return_int(thd, "Slave_Transactional_Groups", 20,
+                                       MYSQL_TYPE_LONGLONG),
+                        mem_root);
+
+  if (full)
+  {
+    field_list->push_back(new (mem_root)
+                          Item_return_int(thd, "Retried_transactions", 10,
+                                          MYSQL_TYPE_LONG),
+                          mem_root);
+    field_list->push_back(new (mem_root)
+                          Item_return_int(thd, "Max_relay_log_size", 10,
+                                          MYSQL_TYPE_LONGLONG),
+                          mem_root);
+    field_list->push_back(new (mem_root)
+                          Item_return_int(thd, "Executed_log_entries", 10,
+                                          MYSQL_TYPE_LONG),
+                          mem_root);
+    field_list->push_back(new (mem_root)
+                          Item_return_int(thd, "Slave_received_heartbeats", 10,
+                                          MYSQL_TYPE_LONG),
+                          mem_root);
+    field_list->push_back(new (mem_root)
+                          Item_float(thd, "Slave_heartbeat_period", 0.0, 3, 10),
+                          mem_root);
+    field_list->push_back(new (mem_root)
+                          Item_empty_string(thd, "Gtid_Slave_Pos",
+                                            (uint)gtid_pos_length),
+                          mem_root);
+  }
+  DBUG_VOID_RETURN;
+}
+
+/* Text for Slave_IO_Running */
+static const LEX_CSTRING slave_running[]=
+{
+  { STRING_WITH_LEN("No") },
+  { STRING_WITH_LEN("Connecting") },
+  { STRING_WITH_LEN("Preparing") },
+  { STRING_WITH_LEN("Yes") }
+};
+
+static const LEX_CSTRING msg_yes= { STRING_WITH_LEN("Yes") };
+static const LEX_CSTRING msg_no=  { STRING_WITH_LEN("No") };
+#ifndef HAVE_OPENSSL
+static const LEX_CSTRING msg_ignored=  { STRING_WITH_LEN("Ignored") };
+#endif
+
+
+static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
+                                       String *gtid_pos)
+{
+  DBUG_ENTER("send_show_master_info_data");
+
+  if (mi->host[0])
+  {
+    DBUG_PRINT("info",("host is set: '%s'", mi->host));
+    String *packet= &thd->packet;
+    Protocol *protocol= thd->protocol;
+    Rpl_filter *rpl_filter= mi->rpl_filter;
+    StringBuffer<256> tmp;
+    const char *msg;
+
+    protocol->prepare_for_resend();
+
+    /*
+      slave_running can be accessed without run_lock but not other
+      non-volotile members like mi->io_thd, which is guarded by the mutex.
+    */
+    if (full)
+      protocol->store(mi->connection_name.str, mi->connection_name.length,
+                      &my_charset_bin);
+    mysql_mutex_lock(&mi->run_lock);
+    if (full)
+    {
+      /*
+        Show what the sql driver replication thread is doing
+        This is only meaningful if there is only one slave thread.
+      */
+      msg= (mi->rli.sql_driver_thd ?
+            mi->rli.sql_driver_thd->get_proc_info() : "");
+      protocol->store_string_or_null(msg, &my_charset_bin);
+    }
+    msg= mi->io_thd ? mi->io_thd->get_proc_info() : "";
+    protocol->store_string_or_null(msg, &my_charset_bin);
+
+    mysql_mutex_unlock(&mi->run_lock);
+
+    mysql_mutex_lock(&mi->data_lock);
+    mysql_mutex_lock(&mi->rli.data_lock);
+    /* err_lock is to protect mi->last_error() */
+    mysql_mutex_lock(&mi->err_lock);
+    /* err_lock is to protect mi->rli.last_error() */
+    mysql_mutex_lock(&mi->rli.err_lock);
+    protocol->store_string_or_null(mi->host, &my_charset_bin);
+    protocol->store_string_or_null(mi->user, &my_charset_bin);
+    protocol->store((uint32) mi->port);
+    protocol->store((uint32) mi->connect_retry);
+    protocol->store(mi->master_log_name, strlen(mi->master_log_name),
+                    &my_charset_bin);
+    protocol->store((ulonglong) mi->master_log_pos);
+    msg= (mi->rli.group_relay_log_name +
+          dirname_length(mi->rli.group_relay_log_name));
+    protocol->store(msg, strlen(msg), &my_charset_bin);
+    protocol->store((ulonglong) mi->rli.group_relay_log_pos);
+    protocol->store(mi->rli.group_master_log_name,
+                    strlen(mi->rli.group_master_log_name),
+                    &my_charset_bin);
+    protocol->store(&slave_running[mi->slave_running], &my_charset_bin);
+    protocol->store(mi->rli.slave_running ? &msg_yes : &msg_no, &my_charset_bin);
+    protocol->store(rpl_filter->get_rewrite_db());
+    protocol->store(rpl_filter->get_do_db());
+    protocol->store(rpl_filter->get_ignore_db());
+
+    rpl_filter->get_do_table(&tmp);
+    protocol->store(&tmp);
+    rpl_filter->get_ignore_table(&tmp);
+    protocol->store(&tmp);
+    rpl_filter->get_wild_do_table(&tmp);
+    protocol->store(&tmp);
+    rpl_filter->get_wild_ignore_table(&tmp);
+    protocol->store(&tmp);
+
+    protocol->store(mi->rli.last_error().number);
+    protocol->store_string_or_null(mi->rli.last_error().message,
+                                   &my_charset_bin);
+    protocol->store((uint32) mi->rli.slave_skip_counter);
+    protocol->store((ulonglong) mi->rli.group_master_log_pos);
+    protocol->store((ulonglong) mi->rli.log_space_total);
+
+    msg= (mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None" :
+          (mi->rli.until_condition==Relay_log_info::UNTIL_MASTER_POS? "Master":
+          (mi->rli.until_condition==Relay_log_info::UNTIL_RELAY_POS? "Relay":
+           "Gtid")));
+    protocol->store(msg, strlen(msg), &my_charset_bin);
+    protocol->store_string_or_null(mi->rli.until_log_name, &my_charset_bin);
+    protocol->store((ulonglong) mi->rli.until_log_pos);
+
+#ifdef HAVE_OPENSSL
+    protocol->store(mi->ssl ? &msg_yes : &msg_no, &my_charset_bin);
+#else
+    protocol->store(mi->ssl ? &msg_ignored: &msg_no, &my_charset_bin);
+#endif
+    protocol->store_string_or_null(mi->ssl_ca, &my_charset_bin);
+    protocol->store_string_or_null(mi->ssl_capath, &my_charset_bin);
+    protocol->store_string_or_null(mi->ssl_cert, &my_charset_bin);
+    protocol->store_string_or_null(mi->ssl_cipher, &my_charset_bin);
+    protocol->store_string_or_null(mi->ssl_key, &my_charset_bin);
+
+    /*
+      Seconds_Behind_Master: if SQL thread is running and I/O thread is
+      connected, we can compute it otherwise show NULL (i.e. unknown).
+    */
+    if ((mi->slave_running == MYSQL_SLAVE_RUN_READING) &&
+        mi->rli.slave_running)
+    {
+      long time_diff;
+      bool idle;
+      time_t stamp= mi->rli.last_master_timestamp;
+
+      if (!stamp)
+        idle= true;
+      else
+      {
+        idle= mi->rli.sql_thread_caught_up;
+
+        /*
+          The idleness of the SQL thread is needed for the parallel slave
+          because events can be ignored before distribution to a worker thread.
+          That is, Seconds_Behind_Master should still be calculated and visible
+          while the slave is processing ignored events, such as those skipped
+          due to slave_skip_counter.
+        */
+        if (mi->using_parallel() && idle && !mi->rli.parallel.workers_idle())
+          idle= false;
+      }
+      if (idle)
+        time_diff= 0;
+      else
+      {
+        time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master);
+      /*
+        Apparently on some systems time_diff can be <0. Here are possible
+        reasons related to MySQL:
+        - the master is itself a slave of another master whose time is ahead.
+        - somebody used an explicit SET TIMESTAMP on the master.
+        Possible reason related to granularity-to-second of time functions
+        (nothing to do with MySQL), which can explain a value of -1:
+        assume the master's and slave's time are perfectly synchronized, and
+        that at slave's connection time, when the master's timestamp is read,
+        it is at the very end of second 1, and (a very short time later) when
+        the slave's timestamp is read it is at the very beginning of second
+        2. Then the recorded value for master is 1 and the recorded value for
+        slave is 2. At SHOW SLAVE STATUS time, assume that the difference
+        between timestamp of slave and rli->last_master_timestamp is 0
+        (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
+        This confuses users, so we don't go below 0.
+
+        last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
+        special marker to say "consider we have caught up".
+      */
+        if (time_diff < 0)
+          time_diff= 0;
+      }
+      protocol->store((longlong)time_diff);
+    }
+    else
+    {
+      protocol->store_null();
+    }
+    protocol->store(mi->ssl_verify_server_cert? &msg_yes : &msg_no,
+                    &my_charset_bin);
+
+    // Last_IO_Errno
+    protocol->store(mi->last_error().number);
+    // Last_IO_Error
+    protocol->store_string_or_null(mi->last_error().message, &my_charset_bin);
+    // Last_SQL_Errno
+    protocol->store(mi->rli.last_error().number);
+    // Last_SQL_Error
+    protocol->store_string_or_null(mi->rli.last_error().message,
+                                   &my_charset_bin);
+    // Replicate_Ignore_Server_Ids
+    prot_store_ids(thd, &mi->ignore_server_ids);
+    // Master_Server_id
+    protocol->store((uint32) mi->master_id);
+    // SQL_Delay
+    // Master_Ssl_Crl
+    protocol->store_string_or_null(mi->ssl_crl, &my_charset_bin);
+    // Master_Ssl_Crlpath
+    protocol->store_string_or_null(mi->ssl_crlpath, &my_charset_bin);
+    // Using_Gtid
+    protocol->store_string_or_null(mi->using_gtid_astext(mi->using_gtid),
+                                   &my_charset_bin);
+    // Gtid_IO_Pos
+    {
+      mi->gtid_current_pos.to_string(&tmp);
+      protocol->store(tmp.ptr(), tmp.length(), &my_charset_bin);
+    }
+
+    // Replicate_Do_Domain_Ids & Replicate_Ignore_Domain_Ids
+    mi->domain_id_filter.store_ids(thd);
+
+    // Parallel_Mode
+    {
+      const char *mode_name= get_type(&slave_parallel_mode_typelib,
+                                      mi->parallel_mode);
+      protocol->store(mode_name, strlen(mode_name), &my_charset_bin);
+    }
+
+    protocol->store((uint32) mi->rli.get_sql_delay());
+    // SQL_Remaining_Delay
+    // THD::proc_info is not protected by any lock, so we read it once
+    // to ensure that we use the same value throughout this function.
+    const char *slave_sql_running_state=
+      mi->rli.sql_driver_thd ? mi->rli.sql_driver_thd->proc_info : "";
+    if (slave_sql_running_state == Relay_log_info::state_delaying_string)
+    {
+      time_t t= my_time(0), sql_delay_end= mi->rli.get_sql_delay_end();
+      protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
+    }
+    else
+      protocol->store_null();
+    // Slave_SQL_Running_State
+    protocol->store_string_or_null(slave_sql_running_state, &my_charset_bin);
+
+    protocol->store(mi->total_ddl_groups);
+    protocol->store(mi->total_non_trans_groups);
+    protocol->store(mi->total_trans_groups);
+
+    if (full)
+    {
+      protocol->store((uint32)    mi->rli.retried_trans);
+      protocol->store((ulonglong) mi->rli.max_relay_log_size);
+      protocol->store(mi->rli.executed_entries);
+      protocol->store((uint32)    mi->received_heartbeats);
+      protocol->store_double(mi->heartbeat_period, 3);
+      protocol->store(gtid_pos->ptr(), gtid_pos->length(), &my_charset_bin);
+    }
+
+    mysql_mutex_unlock(&mi->rli.err_lock);
+    mysql_mutex_unlock(&mi->err_lock);
+    mysql_mutex_unlock(&mi->rli.data_lock);
+    mysql_mutex_unlock(&mi->data_lock);
+
+    if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
+      DBUG_RETURN(TRUE);
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/* Used to sort connections by name */
+
+static int cmp_mi_by_name(const Master_info **arg1,
+                          const Master_info **arg2)
+{
+  return my_strcasecmp(system_charset_info, (*arg1)->connection_name.str,
+                       (*arg2)->connection_name.str);
+}
+
+
+/**
+  Execute a SHOW FULL SLAVE STATUS statement.
+
+  @param thd Pointer to THD object for the client thread executing the
+  statement.
+
+  Elements are sorted according to the original connection_name.
+
+  @retval FALSE success
+  @retval TRUE failure
+
+  @note
+  master_info_index is protected by LOCK_active_mi.
+*/
+
+bool show_all_master_info(THD* thd)
+{
+  uint i, elements;
+  String gtid_pos;
+  Master_info **tmp;
+  List field_list;
+  DBUG_ENTER("show_all_master_info");
+  mysql_mutex_assert_owner(&LOCK_active_mi);
+
+  gtid_pos.length(0);
+  if (rpl_append_gtid_state(>id_pos, true))
+  {
+    my_error(ER_OUT_OF_RESOURCES, MYF(0));
+    DBUG_RETURN(TRUE);
+  }
+
+  show_master_info_get_fields(thd, &field_list, 1, gtid_pos.length());
+  if (thd->protocol->send_result_set_metadata(&field_list,
+                       Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+    DBUG_RETURN(TRUE);
+
+  if (!master_info_index ||
+      !(elements= master_info_index->master_info_hash.records))
+    goto end;
+
+  /*
+    Sort lines to get them into a predicted order
+    (needed for test cases and to not confuse users)
+  */
+  if (!(tmp= (Master_info**) thd->alloc(sizeof(Master_info*) * elements)))
+    DBUG_RETURN(TRUE);
+
+  for (i= 0; i < elements; i++)
+  {
+    tmp[i]= (Master_info *) my_hash_element(&master_info_index->
+                                            master_info_hash, i);
+  }
+  my_qsort(tmp, elements, sizeof(Master_info*), (qsort_cmp) cmp_mi_by_name);
+
+  for (i= 0; i < elements; i++)
+  {
+    if (send_show_master_info_data(thd, tmp[i], 1, >id_pos))
+      DBUG_RETURN(TRUE);
+  }
+
+end:
+  my_eof(thd);
+  DBUG_RETURN(FALSE);
+}
+
+
+void set_slave_thread_options(THD* thd)
+{
+  DBUG_ENTER("set_slave_thread_options");
+  /*
+     It's nonsense to constrain the slave threads with max_join_size; if a
+     query succeeded on master, we HAVE to execute it. So set
+     OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
+     (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
+     SELECT examining more than 4 billion rows would still fail (yes, because
+     when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
+     only for client threads.
+  */
+  ulonglong options= (thd->variables.option_bits |
+                      OPTION_BIG_SELECTS | OPTION_BIN_LOG);
+  if (!opt_log_slave_updates)
+    options&= ~OPTION_BIN_LOG;
+  /* For easier test in LOGGER::log_command */
+  if (thd->variables.log_disabled_statements & LOG_DISABLE_SLAVE)
+    options|= OPTION_LOG_OFF;
+  thd->variables.option_bits= options;
+
+  thd->variables.completion_type= 0;
+  thd->variables.sql_log_slow=
+    !MY_TEST(thd->variables.log_slow_disabled_statements &
+             LOG_SLOW_DISABLE_SLAVE);
+  DBUG_VOID_RETURN;
+}
+
+void set_slave_thread_default_charset(THD* thd, rpl_group_info *rgi)
+{
+  DBUG_ENTER("set_slave_thread_default_charset");
+
+  thd->variables.collation_server=
+    global_system_variables.collation_server;
+  thd->update_charset(global_system_variables.character_set_client,
+                      global_system_variables.collation_connection);
+
+  thd->system_thread_info.rpl_sql_info->cached_charset_invalidate();
+  DBUG_VOID_RETURN;
+}
+
+/*
+  init_slave_thread()
+*/
+
+static int init_slave_thread(THD* thd, Master_info *mi,
+                             SLAVE_THD_TYPE thd_type)
+{
+  DBUG_ENTER("init_slave_thread");
+  int simulate_error __attribute__((unused))= 0;
+  DBUG_EXECUTE_IF("simulate_io_slave_error_on_init",
+                  simulate_error|= (1 << SLAVE_THD_IO););
+  DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
+                  simulate_error|= (1 << SLAVE_THD_SQL););
+
+  thd->system_thread = (thd_type == SLAVE_THD_SQL) ?
+    SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
+
+  if (init_thr_lock())
+  {
+    thd->cleanup();
+    DBUG_RETURN(-1);
+  }
+
+  /* We must call store_globals() before doing my_net_init() */
+  thd->store_globals();
+
+  if (my_net_init(&thd->net, 0, thd, MYF(MY_THREAD_SPECIFIC)) ||
+      IF_DBUG(simulate_error & (1<< thd_type), 0))
+  {
+    thd->cleanup();
+    DBUG_RETURN(-1);
+  }
+
+  thd->security_ctx->skip_grants();
+  thd->slave_thread= 1;
+  thd->connection_name= mi->connection_name;
+  thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE);
+  set_slave_thread_options(thd);
+
+  if (thd_type == SLAVE_THD_SQL)
+    THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log);
+  else
+    THD_STAGE_INFO(thd, stage_waiting_for_master_update);
+  thd->set_time();
+  /* Do not use user-supplied timeout value for system threads. */
+  thd->variables.lock_wait_timeout= LONG_TIMEOUT;
+  DBUG_RETURN(0);
+}
+
+/*
+  Sleep for a given amount of time or until killed.
+
+  @param thd        Thread context of the current thread.
+  @param seconds    The number of seconds to sleep.
+  @param func       Function object to check if the thread has been killed.
+  @param info       The Rpl_info object associated with this sleep.
+
+  @retval True if the thread has been killed, false otherwise.
+*/
+template 
+static bool slave_sleep(THD *thd, time_t seconds,
+                        killed_func func, rpl_info info)
+{
+
+  bool ret;
+  struct timespec abstime;
+
+  mysql_mutex_t *lock= &info->sleep_lock;
+  mysql_cond_t *cond= &info->sleep_cond;
+
+  /* Absolute system time at which the sleep time expires. */
+  set_timespec(abstime, seconds);
+  mysql_mutex_lock(lock);
+  thd->ENTER_COND(cond, lock, NULL, NULL);
+
+  while (! (ret= func(info)))
+  {
+    int error= mysql_cond_timedwait(cond, lock, &abstime);
+    if (error == ETIMEDOUT || error == ETIME)
+      break;
+  }
+  /* Implicitly unlocks the mutex. */
+  thd->EXIT_COND(NULL);
+  return ret;
+}
+
+
+static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
+			bool *suppress_warnings)
+{
+  uchar buf[FN_REFLEN + 10];
+  int len;
+  ushort binlog_flags = 0; // for now
+  char* logname = mi->master_log_name;
+  DBUG_ENTER("request_dump");
+  
+  *suppress_warnings= FALSE;
+
+  if (opt_log_slave_updates && opt_replicate_annotate_row_events)
+    binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
+
+  if (repl_semisync_slave.request_transmit(mi))
+    DBUG_RETURN(1);
+
+  // TODO if big log files: Change next to int8store()
+  int4store(buf, (ulong) mi->master_log_pos);
+  int2store(buf + 4, binlog_flags);
+  int4store(buf + 6, global_system_variables.server_id);
+  len = (uint) strlen(logname);
+  memcpy(buf + 10, logname,len);
+  if (simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1))
+  {
+    /*
+      Something went wrong, so we will just reconnect and retry later
+      in the future, we should do a better error analysis, but for
+      now we just fill up the error log :-)
+    */
+    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED ||
+        mysql_errno(mysql) == ER_NET_ERROR_ON_WRITE)
+      *suppress_warnings= TRUE;                 // Suppress reconnect warning
+    else
+      sql_print_error("Error on COM_BINLOG_DUMP: %d  %s, will retry in %d secs",
+                      mysql_errno(mysql), mysql_error(mysql),
+                      mi->connect_retry);
+    DBUG_RETURN(1);
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Read one event from the master
+
+  SYNOPSIS
+    read_event()
+    mysql               MySQL connection
+    mi                  Master connection information
+    suppress_warnings   TRUE when a normal net read timeout has caused us to
+                        try a reconnect.  We do not want to print anything to
+                        the error log in this case because this a anormal
+                        event in an idle server.
+    network_read_len    get the real network read length in VIO, especially using compressed protocol 
+
+    RETURN VALUES
+    'packet_error'      Error
+    number              Length of packet
+*/
+
+static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings, 
+                        ulong* network_read_len)
+{
+  ulong len;
+  DBUG_ENTER("read_event");
+
+  *suppress_warnings= FALSE;
+  /*
+    my_real_read() will time us out
+    We check if we were told to die, and if not, try reading again
+  */
+#ifndef DBUG_OFF
+  if (disconnect_slave_event_count && !(mi->events_till_disconnect--))
+    DBUG_RETURN(packet_error);
+#endif
+
+  len = cli_safe_read_reallen(mysql, network_read_len);
+  if (unlikely(len == packet_error || (long) len < 1))
+  {
+    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
+    {
+      /*
+        We are trying a normal reconnect after a read timeout;
+        we suppress prints to .err file as long as the reconnect
+        happens without problems
+      */
+      *suppress_warnings=
+        global_system_variables.log_warnings < 2 ? TRUE : FALSE;
+    }
+    else
+    {
+      if (!mi->rli.abort_slave)
+      {
+        sql_print_error("Error reading packet from server: %s (server_errno=%d)",
+                        mysql_error(mysql), mysql_errno(mysql));
+      }
+    }
+    DBUG_RETURN(packet_error);
+  }
+
+  /* Check if eof packet */
+  if (len < 8 && mysql->net.read_pos[0] == 254)
+  {
+    sql_print_information("Slave: received end packet from server, apparent "
+                          "master shutdown: %s",
+                     mysql_error(mysql));
+     DBUG_RETURN(packet_error);
+  }
+
+  DBUG_PRINT("exit", ("len: %lu  net->read_pos[4]: %d",
+                      len, mysql->net.read_pos[4]));
+  DBUG_RETURN(len - 1);
+}
+
+
+/**
+  Check if the current error is of temporary nature of not.
+  Some errors are temporary in nature, such as
+  ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT.
+
+  @retval 0 if fatal error
+  @retval 1 temporary error, do retry
+*/
+
+int
+has_temporary_error(THD *thd)
+{
+  uint current_errno;
+  DBUG_ENTER("has_temporary_error");
+
+  DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
+                  if (thd->get_stmt_da()->is_error())
+                  {
+                    thd->clear_error();
+                    my_error(ER_LOCK_DEADLOCK, MYF(0));
+                  });
+
+  /*
+    If there is no message in THD, we can't say if it's a temporary
+    error or not. This is currently the case for Incident_log_event,
+    which sets no message. Return FALSE.
+  */
+  if (!likely(thd->is_error()))
+    DBUG_RETURN(0);
+
+  current_errno= thd->get_stmt_da()->sql_errno();
+  for (uint i= 0; i < slave_transaction_retry_error_length; i++)
+  {
+    if (current_errno == slave_transaction_retry_errors[i])
+      DBUG_RETURN(1);
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+/**
+  If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock.
+
+  Design note: this is the place to unlock rli->data_lock. The lock
+  must be held when reading delay info from rli, but it should not be
+  held while sleeping.
+
+  @param ev Event that is about to be executed.
+
+  @param thd The sql thread's THD object.
+
+  @param rli The sql thread's Relay_log_info structure.
+
+  @retval 0 If the delay timed out and the event shall be executed.
+
+  @retval nonzero If the delay was interrupted and the event shall be skipped.
+*/
+int
+sql_delay_event(Log_event *ev, THD *thd, rpl_group_info *rgi)
+{
+  Relay_log_info* rli= rgi->rli;
+  long sql_delay= rli->get_sql_delay();
+
+  DBUG_ENTER("sql_delay_event");
+  mysql_mutex_assert_owner(&rli->data_lock);
+  DBUG_ASSERT(!rli->belongs_to_client());
+
+  int type= ev->get_type_code();
+  if (sql_delay && type != ROTATE_EVENT &&
+      type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3)
+  {
+    // The time when we should execute the event.
+    time_t sql_delay_end=
+      ev->when + rli->mi->clock_diff_with_master + sql_delay;
+    // The current time.
+    time_t now= my_time(0);
+    // The time we will have to sleep before executing the event.
+    unsigned long nap_time= 0;
+    if (sql_delay_end > now)
+      nap_time= (ulong)(sql_delay_end - now);
+
+    DBUG_PRINT("info", ("sql_delay= %lu "
+                        "ev->when= %lu "
+                        "rli->mi->clock_diff_with_master= %lu "
+                        "now= %ld "
+                        "sql_delay_end= %llu "
+                        "nap_time= %ld",
+                        sql_delay, (long)ev->when,
+                        rli->mi->clock_diff_with_master,
+                        (long)now, (ulonglong)sql_delay_end, (long)nap_time));
+
+    if (sql_delay_end > now)
+    {
+      DBUG_PRINT("info", ("delaying replication event %lu secs",
+                          nap_time));
+      rli->start_sql_delay(sql_delay_end);
+      mysql_mutex_unlock(&rli->data_lock);
+      DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rgi));
+    }
+  }
+
+  mysql_mutex_unlock(&rli->data_lock);
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  First half of apply_event_and_update_pos(), see below.
+  Setup some THD variables for applying the event.
+
+  Split out so that it can run with rli->data_lock held in non-parallel
+  replication, but without the mutex held in the parallel case.
+*/
+static int
+apply_event_and_update_pos_setup(Log_event* ev, THD* thd, rpl_group_info *rgi)
+{
+  DBUG_ENTER("apply_event_and_update_pos_setup");
+
+  DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
+                           ev->get_type_str(), ev->get_type_code(),
+                           ev->server_id));
+  DBUG_PRINT("info", ("thd->options: '%s%s%s'  rgi->last_event_start_time: %lu",
+                      FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
+                      FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
+                      FLAGSTR(thd->variables.option_bits, OPTION_GTID_BEGIN),
+                      (ulong) rgi->last_event_start_time));
+
+  /*
+    Execute the event to change the database and update the binary
+    log coordinates, but first we set some data that is needed for
+    the thread.
+
+    The event will be executed unless it is supposed to be skipped.
+
+    Queries originating from this server must be skipped.  Low-level
+    events (Format_description_log_event, Rotate_log_event,
+    Stop_log_event) from this server must also be skipped. But for
+    those we don't want to modify 'group_master_log_pos', because
+    these events did not exist on the master.
+    Format_description_log_event is not completely skipped.
+
+    Skip queries specified by the user in 'slave_skip_counter'.  We
+    can't however skip events that has something to do with the log
+    files themselves.
+
+    Filtering on own server id is extremely important, to ignore
+    execution of events created by the creation/rotation of the relay
+    log (remember that now the relay log starts with its Format_desc,
+    has a Rotate etc).
+  */
+
+  /* Use the original server id for logging. */
+  thd->variables.server_id = ev->server_id;
+  thd->set_time();                            // time the query
+  thd->lex->current_select= 0;
+  thd->variables.option_bits=
+    (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) |
+    (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0);
+  ev->thd = thd; // because up to this point, ev->thd == 0
+
+  DBUG_RETURN(ev->shall_skip(rgi));
+}
+
+
+/*
+  Second half of apply_event_and_update_pos(), see below.
+
+  Do the actual event apply (or skip), and position update.
+ */
+static int
+apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi,
+                                 int reason)
+{
+  int exec_res= 0;
+  Relay_log_info* rli= rgi->rli;
+
+  DBUG_ENTER("apply_event_and_update_pos_apply");
+#ifdef ENABLED_DEBUG_SYNC
+  DBUG_EXECUTE_IF("inject_slave_sql_before_apply_event",
+    {
+      DBUG_ASSERT(!debug_sync_set_action
+                  (thd, STRING_WITH_LEN("now WAIT_FOR continue")));
+      DBUG_SET_INITIAL("-d,inject_slave_sql_before_apply_event");
+    };);
+#endif
+  if (reason == Log_event::EVENT_SKIP_NOT)
+    exec_res= ev->apply_event(rgi);
+
+#ifdef WITH_WSREP
+  if (WSREP(thd)) {
+
+    if (exec_res) {
+      mysql_mutex_lock(&thd->LOCK_thd_data);
+      switch(thd->wsrep_trx().state()) {
+      case wsrep::transaction::s_must_replay:
+        /* this transaction will be replayed,
+           so not raising slave error here */
+        WSREP_DEBUG("SQL apply failed for MUST_REPLAY, res %d", exec_res);
+	exec_res = 0;
+        break;
+      default:
+          WSREP_DEBUG("SQL apply failed, res %d conflict state: %s",
+                      exec_res, wsrep_thd_transaction_state_str(thd));
+          /*
+            async replication thread should be stopped, if failure was
+            not due to optimistic parallel applying or if node
+            has dropped from cluster
+           */
+          if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL &&
+              ((rli->mi->using_parallel() &&
+                rli->mi->parallel_mode <= SLAVE_PARALLEL_CONSERVATIVE) ||
+               wsrep_ready == 0)) {
+            rli->abort_slave= 1;
+            rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(),
+                        "Node has dropped from cluster");
+          }
+          break;
+      }
+      mysql_mutex_unlock(&thd->LOCK_thd_data);
+    }
+  }
+#endif
+
+#ifdef DBUG_TRACE
+  /*
+    This only prints information to the debug trace.
+
+    TODO: Print an informational message to the error log?
+  */
+  static const char *const explain[] = {
+    // EVENT_SKIP_NOT,
+    "not skipped",
+    // EVENT_SKIP_IGNORE,
+    "skipped because event should be ignored",
+    // EVENT_SKIP_COUNT
+    "skipped because event skip counter was non-zero"
+  };
+  DBUG_PRINT("info", ("OPTION_BEGIN: %d  IN_STMT: %d  IN_TRANSACTION: %d",
+                      MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
+                      rli->get_flag(Relay_log_info::IN_STMT),
+                      rli->get_flag(Relay_log_info::IN_TRANSACTION)));
+  DBUG_PRINT("skip_event", ("%s event was %s",
+                            ev->get_type_str(), explain[reason]));
+#endif
+
+  DBUG_PRINT("info", ("apply_event error = %d", exec_res));
+  if (exec_res == 0)
+  {
+    if (thd->rgi_slave && (thd->rgi_slave->gtid_ev_flags_extra &
+                           Gtid_log_event::FL_START_ALTER_E1) &&
+        thd->rgi_slave->get_finish_event_group_called())
+      DBUG_RETURN(exec_res ? 1 : 0);
+    int error= ev->update_pos(rgi);
+#ifdef DBUG_TRACE
+    DBUG_PRINT("info", ("update_pos error = %d", error));
+    if (!rli->belongs_to_client())
+    {
+      DBUG_PRINT("info", ("group %llu %s", rli->group_relay_log_pos,
+                          rli->group_relay_log_name));
+      DBUG_PRINT("info", ("event %llu %s", rli->event_relay_log_pos,
+                          rli->event_relay_log_name));
+    }
+#endif
+    /*
+      The update should not fail, so print an error message and
+      return an error code.
+
+      TODO: Replace this with a decent error message when merged
+      with BUG#24954 (which adds several new error message).
+    */
+    if (unlikely(error))
+    {
+      rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, rgi->gtid_info(),
+                  "It was not possible to update the positions"
+                  " of the relay log information: the slave may"
+                  " be in an inconsistent state."
+                  " Stopped in %s position %llu",
+                  rli->group_relay_log_name, rli->group_relay_log_pos);
+      DBUG_RETURN(2);
+    }
+  }
+  else
+  {
+    /*
+      Make sure we do not erroneously update gtid_slave_pos with a lingering
+      GTID from this failed event group (MDEV-4906).
+    */
+    rgi->gtid_pending= false;
+  }
+
+  DBUG_RETURN(exec_res ? 1 : 0);
+}
+
+
+/**
+  Applies the given event and advances the relay log position.
+
+  This is needed by the sql thread to execute events from the binlog,
+  and by clients executing BINLOG statements.  Conceptually, this
+  function does:
+
+  @code
+    ev->apply_event(rli);
+    ev->update_pos(rli);
+  @endcode
+
+  It also does the following maintainance:
+
+   - Initializes the thread's server_id and time; and the event's
+     thread.
+
+   - If !rli->belongs_to_client() (i.e., if it belongs to the slave
+     sql thread instead of being used for executing BINLOG
+     statements), it does the following things: (1) skips events if it
+     is needed according to the server id or slave_skip_counter; (2)
+     unlocks rli->data_lock; (3) sleeps if required by 'CHANGE MASTER
+     TO MASTER_DELAY=X'; (4) maintains the running state of the sql
+     thread (rli->thread_state).
+
+   - Reports errors as needed.
+
+  @param ev The event to apply.
+
+  @param thd The client thread that executes the event (i.e., the
+  slave sql thread if called from a replication slave, or the client
+  thread if called to execute a BINLOG statement).
+
+  @param rli The relay log info (i.e., the slave's rli if called from
+  a replication slave, or the client's thd->rli_fake if called to
+  execute a BINLOG statement).
+
+  @retval 0 OK.
+
+  @retval 1 Error calling ev->apply_event().
+
+  @retval 2 No error calling ev->apply_event(), but error calling
+  ev->update_pos().
+
+  This function is only used in non-parallel replication, where it is called
+  with rli->data_lock held; this lock is released during this function.
+*/
+int
+apply_event_and_update_pos(Log_event* ev, THD* thd, rpl_group_info *rgi)
+{
+  Relay_log_info* rli= rgi->rli;
+  mysql_mutex_assert_owner(&rli->data_lock);
+  int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
+  if (reason == Log_event::EVENT_SKIP_COUNT)
+  {
+    DBUG_ASSERT(rli->slave_skip_counter > 0);
+    rli->slave_skip_counter--;
+  }
+
+  if (reason == Log_event::EVENT_SKIP_NOT)
+  {
+    // Sleeps if needed, and unlocks rli->data_lock.
+    if (sql_delay_event(ev, thd, rgi))
+      return 0;
+  }
+  else
+    mysql_mutex_unlock(&rli->data_lock);
+
+  return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
+}
+
+
+/*
+  The version of above apply_event_and_update_pos() used in parallel
+  replication. Unlike the non-parallel case, this function is called without
+  rli->data_lock held.
+*/
+int
+apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd,
+                                        rpl_group_info *rgi)
+{
+  int rc= 0;
+  ulong retries= 0;
+  bool  is_sa= rgi->gtid_ev_flags_extra == Gtid_log_event::FL_START_ALTER_E1;
+  bool  is_sa_temp_err= false;
+
+  mysql_mutex_assert_not_owner(&rgi->rli->data_lock);
+  int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
+  /*
+    In parallel replication, sql_slave_skip_counter is handled in the SQL
+    driver thread, so 23 should never see EVENT_SKIP_COUNT here.
+  */
+  DBUG_ASSERT(reason != Log_event::EVENT_SKIP_COUNT);
+  /*
+    Calling sql_delay_event() was handled in the SQL driver thread when
+    doing parallel replication.
+  */
+  do
+  {
+    rc= apply_event_and_update_pos_apply(ev, thd, rgi, reason);
+    if (rc && is_sa)
+    {
+      is_sa_temp_err=
+        is_parallel_retry_error(rgi, thd->get_stmt_da()->sql_errno());
+    }
+  }
+  while(is_sa_temp_err && retries++ < slave_trans_retries);
+
+  if (is_sa_temp_err)
+  {
+    Master_info *mi= rgi->rli->mi;
+    mysql_mutex_lock(&mi->start_alter_lock);
+
+    DBUG_ASSERT(!rgi->sa_info->direct_commit_alter);
+    /*
+      Give up retrying to hand the whole ALTER execution over to
+      the "Complete" ALTER.
+    */
+    rgi->sa_info->direct_commit_alter= true;
+    rgi->sa_info->state= start_alter_state::COMPLETED;
+    mysql_cond_broadcast(&rgi->sa_info->start_alter_cond);
+    mysql_mutex_unlock(&mi->start_alter_lock);
+    if (global_system_variables.log_warnings > 2)
+    {
+      rpl_gtid *gtid= &rgi->current_gtid;
+      sql_print_information("Start Alter Query '%s' "
+                            "GTID %u-%u-%llu having a temporary error %d code "
+                            "has been unsuccessfully retried %lu times; its "
+                            "parallel optimistic execution now proceeds in "
+                            "legacy mode",
+                            static_cast(ev)->query,
+                            gtid->domain_id, gtid->server_id, gtid->seq_no,
+                            thd->get_stmt_da()->sql_errno(), retries - 1);
+    }
+    thd->clear_error();
+    thd->reset_killed();
+    rgi->killed_for_retry = rpl_group_info::RETRY_KILL_NONE;
+
+    rc= false;
+  }
+
+  return rc;
+}
+
+
+/**
+   Keep the relay log transaction state up to date.
+
+   The state reflects how things are after the given event, that has just been
+   read from the relay log, is executed.
+
+   This is only needed to ensure we:
+   - Don't abort the sql driver thread in the middle of an event group.
+   - Don't rotate the io thread in the middle of a statement or transaction.
+     The mechanism is that the io thread, when it needs to rotate the relay
+     log, will wait until the sql driver has read all the cached events
+     and then continue reading events one by one from the master until
+     the sql threads signals that log doesn't have an active group anymore.
+
+     There are two possible cases. We keep them as 2 separate flags mainly
+     to make debugging easier.
+
+     - IN_STMT is set when we have read an event that should be used
+       together with the next event.  This is for example setting a
+       variable that is used when executing the next statement.
+     - IN_TRANSACTION is set when we are inside a BEGIN...COMMIT group
+
+     To test the state one should use the is_in_group() function.
+*/
+
+inline void update_state_of_relay_log(Relay_log_info *rli, Log_event *ev)
+{
+  Log_event_type typ= ev->get_type_code();
+
+  /* check if we are in a multi part event */
+  if (ev->is_part_of_group())
+    rli->set_flag(Relay_log_info::IN_STMT);
+  else if (Log_event::is_group_event(typ))
+  {
+    /*
+      If it was not a is_part_of_group() and not a group event (like
+      rotate) then we can reset the IN_STMT flag.  We have the above
+      if only to allow us to have a rotate element anywhere.
+    */
+    rli->clear_flag(Relay_log_info::IN_STMT);
+  }
+
+  /* Check for an event that starts or stops a transaction */
+  if (LOG_EVENT_IS_QUERY(typ))
+  {
+    Query_log_event *qev= (Query_log_event*) ev;
+    /*
+      Trivial optimization to avoid the following somewhat expensive
+      checks.
+    */
+    if (qev->q_len <= sizeof("ROLLBACK"))
+    {
+      if (qev->is_begin())
+        rli->set_flag(Relay_log_info::IN_TRANSACTION);
+      if (qev->is_commit() || qev->is_rollback())
+        rli->clear_flag(Relay_log_info::IN_TRANSACTION);
+    }
+  }
+  if (typ == XID_EVENT || typ == XA_PREPARE_LOG_EVENT)
+    rli->clear_flag(Relay_log_info::IN_TRANSACTION);
+  if (typ == GTID_EVENT &&
+      !(((Gtid_log_event*) ev)->flags2 & Gtid_log_event::FL_STANDALONE))
+  {
+    /* This GTID_EVENT will generate a BEGIN event */
+    rli->set_flag(Relay_log_info::IN_TRANSACTION);
+  }
+
+  DBUG_PRINT("info", ("event: %u  IN_STMT: %d  IN_TRANSACTION: %d",
+                      (uint) typ,
+                      rli->get_flag(Relay_log_info::IN_STMT),
+                      rli->get_flag(Relay_log_info::IN_TRANSACTION)));
+}
+
+
+/**
+  Top-level function for executing the next event in the relay log.
+  This is called from the SQL thread.
+
+  This function reads the event from the relay log, executes it, and
+  advances the relay log position.  It also handles errors, etc.
+
+  This function may fail to apply the event for the following reasons:
+
+   - The position specfied by the UNTIL condition of the START SLAVE
+     command is reached.
+
+   - It was not possible to read the event from the log.
+
+   - The slave is killed.
+
+   - An error occurred when applying the event, and the event has been
+     tried slave_trans_retries times.  If the event has been retried
+     fewer times, 0 is returned.
+
+   - init_master_info or init_relay_log_pos failed. (These are called
+     if a failure occurs when applying the event.)
+
+   - An error occurred when updating the binlog position.
+
+  @retval 0 The event was applied.
+
+  @retval 1 The event was not applied.
+*/
+
+static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
+                                rpl_group_info *serial_rgi)
+{
+  ulonglong event_size;
+  DBUG_ENTER("exec_relay_log_event");
+
+  /*
+    We acquire this mutex since we need it for all operations except
+    event execution. But we will release it in places where we will
+    wait for something for example inside of next_event().
+  */
+  mysql_mutex_lock(&rli->data_lock);
+
+  Log_event *ev= next_event(serial_rgi, &event_size);
+
+  if (sql_slave_killed(serial_rgi))
+  {
+    mysql_mutex_unlock(&rli->data_lock);
+    delete ev;
+    DBUG_RETURN(1);
+  }
+  if (ev)
+  {
+#ifdef WITH_WSREP
+    if (wsrep_before_statement(thd))
+    {
+      mysql_mutex_unlock(&rli->data_lock);
+      delete ev;
+      WSREP_INFO("Wsrep before statement error");
+      DBUG_RETURN(1);
+    }
+#endif /* WITH_WSREP */
+    int exec_res;
+    Log_event_type typ= ev->get_type_code();
+
+    DBUG_EXECUTE_IF(
+        "pause_sql_thread_on_next_event",
+        {
+          /*
+            Temporarily unlock data_lock so we can check-in with the IO thread
+          */
+          mysql_mutex_unlock(&rli->data_lock);
+          DBUG_ASSERT(!debug_sync_set_action(
+              thd,
+              STRING_WITH_LEN(
+                  "now SIGNAL paused_on_event WAIT_FOR sql_thread_continue")));
+          mysql_mutex_lock(&rli->data_lock);
+        });
+
+    /*
+      Even if we don't execute this event, we keep the master timestamp,
+      so that seconds behind master shows correct delta (there are events
+      that are not replayed, so we keep falling behind).
+
+      If it is an artificial event, or a relay log event (IO thread generated
+      event) or ev->when is set to 0, we don't update the
+      last_master_timestamp.
+
+      In parallel replication, we might queue a large number of events, and
+      the user might be surprised to see a claim that the slave is up to date
+      long before those queued events are actually executed.
+     */
+    if ((!rli->mi->using_parallel()) &&
+        event_can_update_last_master_timestamp(ev))
+    {
+      rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
+      rli->sql_thread_caught_up= false;
+      DBUG_ASSERT(rli->last_master_timestamp >= 0);
+    }
+
+    /*
+      This tests if the position of the beginning of the current event
+      hits the UNTIL barrier.
+    */
+    if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
+         rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) &&
+        (ev->server_id != global_system_variables.server_id ||
+         rli->replicate_same_server_id) &&
+        rli->is_until_satisfied(ev))
+    {
+      /*
+        Setting abort_slave flag because we do not want additional
+        message about error in query execution to be printed.
+      */
+      rli->abort_slave= 1;
+      rli->stop_for_until= true;
+      mysql_mutex_unlock(&rli->data_lock);
+#ifdef WITH_WSREP
+      wsrep_after_statement(thd);
+#endif /* WITH_WSREP */
+      delete ev;
+      DBUG_RETURN(1);
+    }
+
+    { /**
+         The following failure injecion works in cooperation with tests 
+         setting @@global.debug= 'd,incomplete_group_in_relay_log'.
+         Xid or Commit events are not executed to force the slave sql
+         read hanging if the realy log does not have any more events.
+      */
+      DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
+                      if ((typ == XID_EVENT) ||
+                          (LOG_EVENT_IS_QUERY(typ) &&
+                           strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
+                      {
+                        DBUG_ASSERT(thd->transaction->all.modified_non_trans_table);
+                        rli->abort_slave= 1;
+                        mysql_mutex_unlock(&rli->data_lock);
+                        delete ev;
+                        serial_rgi->inc_event_relay_log_pos();
+                        DBUG_RETURN(0);
+                      };);
+    }
+
+    update_state_of_relay_log(rli, ev);
+
+    if (rli->mi->using_parallel())
+    {
+      /*
+        rli->sql_thread_caught_up is checked and negated here to ensure that
+        the value of Seconds_Behind_Master in SHOW SLAVE STATUS is consistent
+        with the update of last_master_timestamp. It was previously unset
+        immediately after reading an event from the relay log; however, for the
+        duration between that unset and the time that LMT would be updated
+        could lead to spikes in SBM.
+
+        The check for queued_count == dequeued_count ensures the worker threads
+        are all idle (i.e. all events have been executed).
+      */
+      if ((unlikely(rli->last_master_timestamp == 0) ||
+           (rli->sql_thread_caught_up &&
+            (rli->last_inuse_relaylog->queued_count ==
+             rli->last_inuse_relaylog->dequeued_count))) &&
+          event_can_update_last_master_timestamp(ev))
+      {
+        if (rli->last_master_timestamp < ev->when)
+        {
+          rli->last_master_timestamp= ev->when;
+        }
+        rli->sql_thread_caught_up= false;
+      }
+
+      int res= rli->parallel.do_event(serial_rgi, ev, event_size);
+      /*
+        In parallel replication, we need to update the relay log position
+        immediately so that it will be the correct position from which to
+        read the next event.
+      */
+      if (res == 0)
+        rli->event_relay_log_pos= rli->future_event_relay_log_pos;
+      if (res >= 0)
+      {
+#ifdef WITH_WSREP
+	wsrep_after_statement(thd);
+#endif /* WITH_WSREP */
+        DBUG_RETURN(res);
+      }
+      /*
+        Else we proceed to execute the event non-parallel.
+        This is the case for pre-10.0 events without GTID, and for handling
+        slave_skip_counter.
+      */
+      if (event_can_update_last_master_timestamp(ev))
+      {
+        /*
+          Ignore FD's timestamp as it does not reflect the slave execution
+          state but likely to reflect a deep past. Consequently when the first
+          data modification event execution last long all this time
+          Seconds_Behind_Master is zero.
+        */
+        if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
+            rli->last_master_timestamp < ev->when)
+          rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
+
+        DBUG_ASSERT(rli->last_master_timestamp >= 0);
+      }
+    }
+
+    if (typ == GTID_EVENT)
+    {
+      Gtid_log_event *gev= static_cast(ev);
+
+      /*
+        For GTID, allocate a new sub_id for the given domain_id.
+        The sub_id must be allocated in increasing order of binlog order.
+      */
+      if (event_group_new_gtid(serial_rgi, gev))
+      {
+        sql_print_error("Error reading relay log event: %s", "slave SQL thread "
+                        "aborted because of out-of-memory error");
+        mysql_mutex_unlock(&rli->data_lock);
+        delete ev;
+#ifdef WITH_WSREP
+	  wsrep_after_statement(thd);
+#endif /* WITH_WSREP */
+        DBUG_RETURN(1);
+      }
+
+      rli->last_seen_gtid= serial_rgi->current_gtid;
+      rli->last_trans_retry_count= serial_rgi->trans_retries;
+      if (opt_gtid_ignore_duplicates &&
+          rli->mi->using_gtid != Master_info::USE_GTID_NO)
+      {
+        int res= rpl_global_gtid_slave_state->check_duplicate_gtid
+          (&serial_rgi->current_gtid, serial_rgi);
+        if (res < 0)
+        {
+          sql_print_error("Error processing GTID event: %s", "slave SQL "
+                          "thread aborted because of out-of-memory error");
+          mysql_mutex_unlock(&rli->data_lock);
+          delete ev;
+#ifdef WITH_WSREP
+          wsrep_after_statement(thd);
+#endif /* WITH_WSREP */
+          DBUG_RETURN(1);
+        }
+        /*
+          If we need to skip this event group (because the GTID was already
+          applied), then do it using the code for slave_skip_counter, which
+          is able to handle skipping until the end of the event group.
+        */
+        if (!res)
+          rli->slave_skip_counter= 1;
+      }
+    }
+
+    serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos;
+    serial_rgi->event_relay_log_name= rli->event_relay_log_name;
+    serial_rgi->event_relay_log_pos= rli->event_relay_log_pos;
+    exec_res= apply_event_and_update_pos(ev, thd, serial_rgi);
+
+#ifdef WITH_WSREP
+    WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res);
+#endif /* WITH_WSREP */
+
+    delete_or_keep_event_post_apply(serial_rgi, typ, ev);
+
+    /*
+      update_log_pos failed: this should not happen, so we don't
+      retry.
+    */
+    if (unlikely(exec_res == 2))
+    {
+#ifdef WITH_WSREP
+      wsrep_after_statement(thd);
+#endif /* WITH_WSREP */
+      DBUG_RETURN(1);
+    }
+#ifdef WITH_WSREP
+    mysql_mutex_lock(&thd->LOCK_thd_data);
+    enum wsrep::client_error wsrep_error= thd->wsrep_cs().current_error();
+    mysql_mutex_unlock(&thd->LOCK_thd_data);
+    if (wsrep_error == wsrep::e_success)
+#endif /* WITH_WSREP */
+    if (slave_trans_retries)
+    {
+      int UNINIT_VAR(temp_err);
+      if (unlikely(exec_res) && (temp_err= has_temporary_error(thd)))
+      {
+        const char *errmsg;
+        rli->clear_error();
+        /*
+          We were in a transaction which has been rolled back because of a
+          temporary error;
+          let's seek back to BEGIN log event and retry it all again.
+          Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
+          there is no rollback since 5.0.13 (ref: manual).
+          We have to not only seek but also
+
+          a) init_master_info(), to seek back to hot relay log's start
+          for later (for when we will come back to this hot log after
+          re-processing the possibly existing old logs where BEGIN is:
+          check_binlog_magic() will then need the cache to be at
+          position 0 (see comments at beginning of
+          init_master_info()).
+          b) init_relay_log_pos(), because the BEGIN may be an older relay log.
+        */
+        if (serial_rgi->trans_retries < slave_trans_retries)
+        {
+          if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
+            sql_print_error("Failed to initialize the master info structure");
+          else if (init_relay_log_pos(rli,
+                                      rli->group_relay_log_name,
+                                      rli->group_relay_log_pos,
+                                      1, &errmsg, 1))
+            sql_print_error("Error initializing relay log position: %s",
+                            errmsg);
+          else
+          {
+            exec_res= 0;
+            serial_rgi->cleanup_context(thd, 1);
+            /* chance for concurrent connection to get more locks */
+            slave_sleep(thd, MY_MAX(MY_MIN(serial_rgi->trans_retries,
+                                    MAX_SLAVE_RETRY_PAUSE),
+                                    slave_trans_retry_interval),
+                       sql_slave_killed, serial_rgi);
+            serial_rgi->trans_retries++;
+            mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
+            rli->retried_trans++;
+            statistic_increment(slave_retried_transactions, LOCK_status);
+            mysql_mutex_unlock(&rli->data_lock);
+            DBUG_PRINT("info", ("Slave retries transaction "
+                                "rgi->trans_retries: %lu",
+                                serial_rgi->trans_retries));
+          }
+        }
+        else
+          sql_print_error("Slave SQL thread retried transaction %lu time(s) "
+                          "in vain, giving up. Consider raising the value of "
+                          "the slave_transaction_retries variable.",
+                          slave_trans_retries);
+      }
+      else if ((exec_res && !temp_err) ||
+               (opt_using_transactions &&
+                rli->group_relay_log_pos == rli->event_relay_log_pos))
+      {
+        /*
+          Only reset the retry counter if the entire group succeeded
+          or failed with a non-transient error.  On a successful
+          event, the execution will proceed as usual; in the case of a
+          non-transient error, the slave will stop with an error.
+         */
+        serial_rgi->trans_retries= 0; // restart from fresh
+        DBUG_PRINT("info", ("Resetting retry counter, rgi->trans_retries: %lu",
+                            serial_rgi->trans_retries));
+      }
+    }
+
+    rli->executed_entries++;
+#ifdef WITH_WSREP
+    wsrep_after_statement(thd);
+#endif /* WITH_WSREP */
+#ifdef ENABLED_DEBUG_SYNC
+    DBUG_EXECUTE_IF(
+        "pause_sql_thread_on_fde",
+        if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
+          DBUG_ASSERT(!debug_sync_set_action(
+              thd,
+              STRING_WITH_LEN(
+                  "now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
+        });
+#endif
+
+    DBUG_RETURN(exec_res);
+  }
+  mysql_mutex_unlock(&rli->data_lock);
+  rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE, NULL,
+              ER_THD(thd, ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
+Could not parse relay log event entry. The possible reasons are: the master's \
+binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
+binary log), the slave's relay log is corrupted (you can check this by running \
+'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
+or slave's MariaDB code. If you want to check the master's binary log or slave's \
+relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
+on this slave.\
+");
+  DBUG_RETURN(1);
+}
+
+
+static bool check_io_slave_killed(Master_info *mi, const char *info)
+{
+  if (io_slave_killed(mi))
+  {
+    if (info && global_system_variables.log_warnings)
+      sql_print_information("%s", info);
+    return TRUE;
+  }
+  return FALSE;
+}
+
+/**
+  @brief Try to reconnect slave IO thread.
+
+  @details Terminates current connection to master, sleeps for
+  @c mi->connect_retry msecs and initiates new connection with
+  @c safe_reconnect(). Variable pointed by @c retry_count is increased -
+  if it exceeds @c master_retry_count then connection is not re-established
+  and function signals error.
+  Unless @c suppres_warnings is TRUE, a warning is put in the server error log
+  when reconnecting. The warning message and messages used to report errors
+  are taken from @c messages array. In case @c master_retry_count is exceeded,
+  no messages are added to the log.
+
+  @param[in]     thd                 Thread context.
+  @param[in]     mysql               MySQL connection.
+  @param[in]     mi                  Master connection information.
+  @param[in,out] retry_count         Number of attempts to reconnect.
+  @param[in]     suppress_warnings   TRUE when a normal net read timeout
+                                     has caused to reconnecting.
+  @param[in]     messages            Messages to print/log, see 
+                                     reconnect_messages[] array.
+
+  @retval        0                   OK.
+  @retval        1                   There was an error.
+*/
+
+static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
+                            uint *retry_count, bool suppress_warnings,
+                            const char *messages[SLAVE_RECON_MSG_MAX])
+{
+  mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
+  thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
+#ifdef SIGNAL_WITH_VIO_CLOSE  
+  thd->clear_active_vio();
+#endif
+  end_server(mysql);
+  if ((*retry_count)++)
+  {
+    if (*retry_count > master_retry_count)
+      return 1;                             // Don't retry forever
+    slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
+  }
+  if (check_io_slave_killed(mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
+    return 1;
+  thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
+  if (!suppress_warnings) 
+  {
+    char buf[256];
+    StringBuffer<100> tmp;
+    if (mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      tmp.append(STRING_WITH_LEN("; GTID position '"));
+      mi->gtid_current_pos.append_to_string(&tmp);
+      if (mi->events_queued_since_last_gtid == 0)
+        tmp.append(STRING_WITH_LEN("'"));
+      else
+      {
+        tmp.append(STRING_WITH_LEN("', GTID event skip "));
+        tmp.append_ulonglong((ulonglong)mi->events_queued_since_last_gtid);
+      }
+    }
+    my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED], 
+                IO_RPL_LOG_NAME, mi->master_log_pos,
+                tmp.c_ptr_safe());
+    /* 
+      Raise a warining during registering on master/requesting dump.
+      Log a message reading event.
+    */
+    if (messages[SLAVE_RECON_MSG_COMMAND][0])
+    {
+      mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL,
+                 ER_THD(thd, ER_SLAVE_MASTER_COM_FAILURE), 
+                 messages[SLAVE_RECON_MSG_COMMAND], buf);
+    }
+    else
+    {
+      sql_print_information("%s", buf);
+    }
+  }
+  if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(mi))
+  {
+    if (global_system_variables.log_warnings)
+      sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]);
+    return 1;
+  }
+  return 0;
+}
+
+
+/**
+  Slave IO thread entry point.
+
+  @param arg Pointer to Master_info struct that holds information for
+  the IO thread.
+
+  @return Always 0.
+*/
+pthread_handler_t handle_slave_io(void *arg)
+{
+  THD *thd; // needs to be first for thread_stack
+  MYSQL *mysql;
+  Master_info *mi = (Master_info*)arg;
+  Relay_log_info *rli= &mi->rli;
+  uint retry_count;
+  bool suppress_warnings;
+  int ret;
+  rpl_io_thread_info io_info;
+#ifndef DBUG_OFF
+  mi->dbug_do_disconnect= false;
+#endif
+  // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
+  my_thread_init();
+  DBUG_ENTER("handle_slave_io");
+
+  DBUG_ASSERT(mi->inited);
+  mysql= NULL ;
+  retry_count= 0;
+
+  thd= new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
+
+  mysql_mutex_lock(&mi->run_lock);
+  /* Inform waiting threads that slave has started */
+  mi->slave_run_id++;
+
+#ifndef DBUG_OFF
+  mi->events_till_disconnect = disconnect_slave_event_count;
+#endif
+
+  THD_CHECK_SENTRY(thd);
+  mi->io_thd = thd;
+
+  thd->set_psi(PSI_CALL_get_thread());
+
+  pthread_detach_this_thread();
+  thd->thread_stack= (char*) &thd; // remember where our stack is
+  mi->clear_error();
+  if (init_slave_thread(thd, mi, SLAVE_THD_IO))
+  {
+    mysql_cond_broadcast(&mi->start_cond);
+    sql_print_error("Failed during slave I/O thread initialization");
+    goto err_during_init;
+  }
+  thd->system_thread_info.rpl_io_info= &io_info;
+  server_threads.insert(thd);
+  mi->slave_running = MYSQL_SLAVE_RUN_NOT_CONNECT;
+  mi->abort_slave = 0;
+  mysql_mutex_unlock(&mi->run_lock);
+  mysql_cond_broadcast(&mi->start_cond);
+  mi->rows_event_tracker.reset();
+
+  DBUG_PRINT("master_info",("log_file_name: '%s'  position: %llu",
+                            mi->master_log_name, mi->master_log_pos));
+
+  /* This must be called before run any binlog_relay_io hooks */
+  my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi);
+
+  /* Load the set of seen GTIDs, if we did not already. */
+  if (rpl_load_gtid_slave_state(thd))
+  {
+    mi->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
+                "Unable to load replication GTID slave state from mysql.%s: %s",
+                rpl_gtid_slave_state_table_name.str,
+                thd->get_stmt_da()->message());
+    /*
+      If we are using old-style replication, we can continue, even though we
+      then will not be able to record the GTIDs we receive. But if using GTID,
+      we must give up.
+    */
+    if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode)
+      goto err;
+  }
+
+  thd->variables.wsrep_on= 0;
+  if (DBUG_IF("failed_slave_start")
+      || repl_semisync_slave.slave_start(mi))
+  {
+    mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+               ER_THD(thd, ER_SLAVE_FATAL_ERROR),
+               "Failed to run 'thread_start' hook");
+    goto err;
+  }
+
+  if (!(mi->mysql = mysql = mysql_init(NULL)))
+  {
+    mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+               ER_THD(thd, ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
+    goto err;
+  }
+
+  THD_STAGE_INFO(thd, stage_connecting_to_master);
+  // we can get killed during safe_connect
+  if (!safe_connect(thd, mysql, mi))
+  {
+    if (mi->using_gtid == Master_info::USE_GTID_NO)
+      sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
+                            "replication started in log '%s' at position %llu",
+                            mi->user, mi->host, mi->port,
+                            IO_RPL_LOG_NAME, mi->master_log_pos);
+    else
+    {
+      StringBuffer<100> tmp;
+      mi->gtid_current_pos.to_string(&tmp);
+      sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
+                            "replication starts at GTID position '%s'",
+                            mi->user, mi->host, mi->port, tmp.c_ptr_safe());
+    }
+  }
+  else
+  {
+    sql_print_information("Slave I/O thread killed while connecting to master");
+    goto err;
+  }
+
+connected:
+
+  if (mi->using_gtid != Master_info::USE_GTID_NO)
+  {
+    /*
+      When the IO thread (re)connects to the master using GTID, it will
+      connect at the start of an event group. But the IO thread may have
+      previously logged part of the following event group to the relay
+      log.
+
+      When the IO and SQL thread are started together, we erase any previous
+      relay logs, but this is not possible/desirable while the SQL thread is
+      running. To avoid duplicating partial event groups in the relay logs in
+      this case, we remember the count of events in any partially logged event
+      group before the reconnect, and then here at connect we set up a counter
+      to skip the already-logged part of the group.
+    */
+    mi->gtid_reconnect_event_skip_count= mi->events_queued_since_last_gtid;
+    mi->gtid_event_seen= false;
+    /*
+      Reset stale state of the rows-event group tracker at reconnect.
+    */
+    mi->rows_event_tracker.reset();
+  }
+
+#ifdef ENABLED_DEBUG_SYNC
+    DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
+                    {
+                      const char act[]=
+                        "now "
+                        "wait_for signal.io_thread_let_running";
+                      DBUG_ASSERT(debug_sync_service);
+                      DBUG_ASSERT(!debug_sync_set_action(thd, 
+                                                         STRING_WITH_LEN(act)));
+                    };);
+#endif
+
+  mysql_mutex_lock(&mi->run_lock);
+  mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
+  mysql_mutex_unlock(&mi->run_lock);
+
+  thd->slave_net = &mysql->net;
+  THD_STAGE_INFO(thd, stage_checking_master_version);
+  ret= get_master_version_and_clock(mysql, mi);
+  if (ret == 1)
+    /* Fatal error */
+    goto err;
+
+  if (ret == 2) 
+  { 
+    if (check_io_slave_killed(mi, "Slave I/O thread killed "
+                              "while calling get_master_version_and_clock(...)"))
+      goto err;
+    suppress_warnings= FALSE;
+    /*
+      Try to reconnect because the error was caused by a transient network
+      problem
+    */
+    if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                             reconnect_messages[SLAVE_RECON_ACT_REG]))
+      goto err;
+    goto connected;
+  } 
+
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
+  {
+    /*
+      Register ourselves with the master.
+    */
+    THD_STAGE_INFO(thd, stage_registering_slave_on_master);
+    if (register_slave_on_master(mysql, mi, &suppress_warnings))
+    {
+      if (!check_io_slave_killed(mi, "Slave I/O thread killed "
+                                "while registering slave on master"))
+      {
+        sql_print_error("Slave I/O thread couldn't register on master");
+        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                             reconnect_messages[SLAVE_RECON_ACT_REG]))
+          goto err;
+      }
+      else
+        goto err;
+      goto connected;
+    }
+    DBUG_EXECUTE_IF("fail_com_register_slave", goto err;);
+  }
+
+  DBUG_PRINT("info",("Starting reading binary log from master"));
+  thd->set_command(COM_SLAVE_IO);
+  while (!io_slave_killed(mi))
+  {
+    const uchar *event_buf;
+
+    THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
+    if (request_dump(thd, mysql, mi, &suppress_warnings))
+    {
+      sql_print_error("Failed on request_dump()");
+      if (check_io_slave_killed(mi, NullS) ||
+        try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                         reconnect_messages[SLAVE_RECON_ACT_DUMP]))
+        goto err;
+      goto connected;
+    }
+
+    mi->slave_running= MYSQL_SLAVE_RUN_READING;
+    DBUG_ASSERT(mi->last_error().number == 0);
+    ulonglong lastchecktime = my_hrtime().val;
+    ulonglong tokenamount   = opt_read_binlog_speed_limit*1024;
+    while (!io_slave_killed(mi))
+    {
+      ulong event_len, network_read_len = 0;
+      /*
+         We say "waiting" because read_event() will wait if there's nothing to
+         read. But if there's something to read, it will not wait. The
+         important thing is to not confuse users by saying "reading" whereas
+         we're in fact receiving nothing.
+      */
+      THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event);
+      event_len= read_event(mysql, mi, &suppress_warnings, &network_read_len);
+      if (check_io_slave_killed(mi, NullS))
+        goto err;
+
+      if (unlikely(event_len == packet_error))
+      {
+        uint mysql_error_number= mysql_errno(mysql);
+        switch (mysql_error_number) {
+        case CR_NET_PACKET_TOO_LARGE:
+          sql_print_error("\
+Log entry on master is longer than slave_max_allowed_packet (%lu) on \
+slave. If the entry is correct, restart the server with a higher value of \
+slave_max_allowed_packet",
+                         slave_max_allowed_packet);
+          mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE, NULL,
+                     "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes");
+          goto err;
+        case ER_MASTER_FATAL_ERROR_READING_BINLOG:
+          mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG, NULL,
+                     ER_THD(thd, ER_MASTER_FATAL_ERROR_READING_BINLOG),
+                     mysql_error_number, mysql_error(mysql));
+          goto err;
+        case ER_OUT_OF_RESOURCES:
+          sql_print_error("\
+Stopping slave I/O thread due to out-of-memory error from master");
+          mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES, NULL,
+                     "%s", ER_THD(thd, ER_OUT_OF_RESOURCES));
+          goto err;
+        }
+        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                             reconnect_messages[SLAVE_RECON_ACT_EVENT]))
+          goto err;
+        goto connected;
+      } // if (event_len == packet_error)
+
+      retry_count=0;                    // ok event, reset retry counter
+      THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
+      event_buf= mysql->net.read_pos + 1;
+      mi->semi_ack= 0;
+      if (repl_semisync_slave.
+          slave_read_sync_header((const uchar*) mysql->net.read_pos + 1,
+                                 event_len,
+                                 &(mi->semi_ack), &event_buf, &event_len))
+      {
+        mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+                   ER_THD(thd, ER_SLAVE_FATAL_ERROR),
+                   "Failed to run 'after_read_event' hook");
+        goto err;
+      }
+
+      /* Control the binlog read speed of master 
+         when read_binlog_speed_limit is non-zero
+      */
+      ulonglong speed_limit_in_bytes = opt_read_binlog_speed_limit * 1024;
+      if (speed_limit_in_bytes) 
+      {
+        /* Prevent the tokenamount become a large value, 
+           for example, the IO thread doesn't work for a long time
+        */
+        if (tokenamount > speed_limit_in_bytes * 2) 
+        {
+          lastchecktime = my_hrtime().val;
+          tokenamount = speed_limit_in_bytes * 2;
+        }
+
+        do
+        {
+          ulonglong currenttime = my_hrtime().val;
+          tokenamount += (currenttime - lastchecktime) * speed_limit_in_bytes / (1000*1000);
+          lastchecktime = currenttime;
+          if(tokenamount < network_read_len)
+          {
+            ulonglong duration =1000ULL*1000 * (network_read_len - tokenamount) / speed_limit_in_bytes;
+            time_t second_time = (time_t)(duration / (1000 * 1000));
+            uint micro_time = duration % (1000 * 1000);
+
+            // at least sleep 1000 micro second
+            my_sleep(MY_MAX(micro_time,1000));
+
+            /*
+              If it sleep more than one second, 
+              it should use slave_sleep() to avoid the STOP SLAVE hang.   
+            */
+            if (second_time)
+              slave_sleep(thd, second_time, io_slave_killed, mi);
+
+          }
+        }while(tokenamount < network_read_len);
+        tokenamount -= network_read_len;
+      }
+
+      if (queue_event(mi, event_buf, event_len))
+      {
+        mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
+                   ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
+                   "could not queue event from master");
+        goto err;
+      }
+
+      if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK))
+      {
+        /*
+          We deliberately ignore the error in slave_reply, such error should
+          not cause the slave IO thread to stop, and the error messages are
+          already reported.
+        */
+        DBUG_EXECUTE_IF("simulate_delay_semisync_slave_reply", my_sleep(800000););
+        (void)repl_semisync_slave.slave_reply(mi);
+      }
+
+      if (mi->using_gtid == Master_info::USE_GTID_NO &&
+          /*
+            If rpl_semi_sync_slave_delay_master is enabled, we will flush
+            master info only when ack is needed. This may lead to at least one
+            group transaction delay but affords better performance improvement.
+          */
+          (!repl_semisync_slave.get_slave_enabled() ||
+           (!(mi->semi_ack & SEMI_SYNC_SLAVE_DELAY_SYNC) ||
+            (mi->semi_ack & (SEMI_SYNC_NEED_ACK)))) &&
+          (DBUG_IF("failed_flush_master_info") ||
+           flush_master_info(mi, TRUE, TRUE)))
+      {
+        sql_print_error("Failed to flush master info file");
+        goto err;
+      }
+      /*
+        See if the relay logs take too much space.
+        We don't lock mi->rli.log_space_lock here; this dirty read saves time
+        and does not introduce any problem:
+        - if mi->rli.ignore_log_space_limit is 1 but becomes 0 just after (so
+        the clean value is 0), then we are reading only one more event as we
+        should, and we'll block only at the next event. No big deal.
+        - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just
+        after (so the clean value is 1), then we are going into
+        wait_for_relay_log_space() for no reason, but this function
+        will do a clean read, notice the clean value and exit
+        immediately.
+      */
+#ifndef DBUG_OFF
+      {
+        DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu "
+                            "ignore_log_space_limit=%d",
+                            rli->log_space_limit, uint64(rli->log_space_total),
+                            (int) rli->ignore_log_space_limit));
+      }
+#endif
+
+      if (rli->log_space_limit && rli->log_space_limit <
+          rli->log_space_total &&
+          !rli->ignore_log_space_limit)
+        if (wait_for_relay_log_space(rli))
+        {
+          sql_print_error("Slave I/O thread aborted while waiting for relay \
+log space");
+          goto err;
+        }
+    }
+  }
+
+  // error = 0;
+err:
+  // print the current replication position
+  if (mi->using_gtid == Master_info::USE_GTID_NO)
+    sql_print_information("Slave I/O thread exiting, read up to log '%s', "
+                          "position %llu, master %s:%d", IO_RPL_LOG_NAME, mi->master_log_pos,
+                           mi->host, mi->port);
+  else
+  {
+    StringBuffer<100> tmp;
+    mi->gtid_current_pos.to_string(&tmp);
+    sql_print_information("Slave I/O thread exiting, read up to log '%s', "
+                          "position %llu; GTID position %s, master %s:%d",
+                          IO_RPL_LOG_NAME, mi->master_log_pos,
+                          tmp.c_ptr_safe(), mi->host, mi->port);
+  }
+  repl_semisync_slave.slave_stop(mi);
+  thd->reset_query();
+  thd->reset_db(&null_clex_str);
+  if (mysql)
+  {
+    /*
+      Here we need to clear the active VIO before closing the
+      connection with the master.  The reason is that THD::awake()
+      might be called from terminate_slave_thread() because somebody
+      issued a STOP SLAVE.  If that happends, the close_active_vio()
+      can be called in the middle of closing the VIO associated with
+      the 'mysql' object, causing a crash.
+    */
+#ifdef SIGNAL_WITH_VIO_CLOSE
+    thd->clear_active_vio();
+#endif
+    mysql_close(mysql);
+    mi->mysql=0;
+  }
+  write_ignored_events_info_to_relay_log(thd, mi);
+  if (mi->using_gtid != Master_info::USE_GTID_NO)
+    flush_master_info(mi, TRUE, TRUE);
+  THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
+  thd->add_status_to_global();
+  server_threads.erase(thd);
+  mysql_mutex_lock(&mi->run_lock);
+
+err_during_init:
+  /* Forget the relay log's format */
+  delete mi->rli.relay_log.description_event_for_queue;
+  mi->rli.relay_log.description_event_for_queue= 0;
+  // TODO: make rpl_status part of Master_info
+  change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
+
+  thd->assert_not_linked();
+  delete thd;
+
+  mi->abort_slave= 0;
+  mi->slave_running= MYSQL_SLAVE_NOT_RUN;
+  mi->io_thd= 0;
+  mi->do_accept_own_server_id= false;
+  /*
+    Note: the order of the two following calls (first broadcast, then unlock)
+    is important. Otherwise a killer_thread can execute between the calls and
+    delete the mi structure leading to a crash! (see BUG#25306 for details)
+   */ 
+  mysql_cond_broadcast(&mi->stop_cond);       // tell the world we are done
+  DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
+  mysql_mutex_unlock(&mi->run_lock);
+
+  DBUG_LEAVE;                                   // Must match DBUG_ENTER()
+  my_thread_end();
+  ERR_remove_state(0);
+  return nullptr;
+}
+
+/*
+  Check the temporary directory used by commands like
+  LOAD DATA INFILE.
+
+  As the directory never changes during a mysqld run, we only
+  test this once and cache the result. This also resolve a race condition
+  when this can be run by multiple threads at the same time.
+ */
+
+static bool check_temp_dir_run= 0;
+static int check_temp_dir_result= 0;
+
+static 
+int check_temp_dir(char* tmp_file)
+{
+  File fd;
+  int result= 1;                                // Assume failure
+  MY_DIR *dirp;
+  char tmp_dir[FN_REFLEN];
+  size_t tmp_dir_size;
+  DBUG_ENTER("check_temp_dir");
+
+  /* This look is safe to use as this function is only called once */
+  mysql_mutex_lock(&LOCK_start_thread);
+  if (check_temp_dir_run)
+  {
+    if ((result= check_temp_dir_result))
+      my_message(result, tmp_file, MYF(0));
+    goto end;
+  }
+  check_temp_dir_run= 1;
+
+  /*
+    Get the directory from the temporary file.
+  */
+  dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
+
+  /*
+    Check if the directory exists.
+   */
+  if (!(dirp=my_dir(tmp_dir,MYF(MY_WME))))
+    goto end;
+  my_dirend(dirp);
+
+  /*
+    Check permissions to create a file. We use O_TRUNC to ensure that
+    things works even if we happen to have and old file laying around.
+   */
+  if ((fd= mysql_file_create(key_file_misc,
+                             tmp_file, CREATE_MODE,
+                             O_WRONLY | O_BINARY | O_TRUNC | O_NOFOLLOW,
+                             MYF(MY_WME))) < 0)
+    goto end;
+
+  result= 0;                                    // Directory name ok
+  /*
+    Clean up.
+   */
+  mysql_file_close(fd, MYF(0));
+  mysql_file_delete(key_file_misc, tmp_file, MYF(0));
+
+end:
+  mysql_mutex_unlock(&LOCK_start_thread);
+  DBUG_RETURN(result);
+}
+
+
+void
+slave_output_error_info(rpl_group_info *rgi, THD *thd)
+{
+  /*
+    retrieve as much info as possible from the thd and, error
+    codes and warnings and print this to the error log as to
+    allow the user to locate the error
+  */
+  Relay_log_info *rli= rgi->rli;
+  uint32 const last_errno= rli->last_error().number;
+
+  if (unlikely(thd->is_error()))
+  {
+    char const *const errmsg= thd->get_stmt_da()->message();
+
+    DBUG_PRINT("info",
+               ("thd->get_stmt_da()->sql_errno()=%d; rli->last_error.number=%d",
+                thd->get_stmt_da()->sql_errno(), last_errno));
+    if (last_errno == 0)
+    {
+      /*
+        This function is reporting an error which was not reported
+        while executing exec_relay_log_event().
+      */ 
+      rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
+                  rgi->gtid_info(), "%s", errmsg);
+    }
+    else if (last_errno != thd->get_stmt_da()->sql_errno())
+    {
+      /*
+       * An error was reported while executing exec_relay_log_event()
+       * however the error code differs from what is in the thread.
+       * This function prints out more information to help finding
+       * what caused the problem.
+       */  
+      sql_print_error("Slave (additional info): %s Error_code: %d",
+                      errmsg, thd->get_stmt_da()->sql_errno());
+    }
+  }
+
+  /* Print any warnings issued */
+  Diagnostics_area::Sql_condition_iterator it=
+    thd->get_stmt_da()->sql_conditions();
+  const Sql_condition *err;
+  /*
+    Added controlled slave thread cancel for replication
+    of user-defined variables.
+  */
+  bool udf_error = false;
+  while ((err= it++))
+  {
+    if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY)
+      udf_error = true;
+    sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
+  }
+  if (unlikely(udf_error))
+  {
+    StringBuffer<100> tmp;
+    if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      tmp.append(STRING_WITH_LEN("; GTID position '"));
+      rpl_append_gtid_state(&tmp, false);
+      tmp.append(STRING_WITH_LEN("'"));
+    }
+    sql_print_error("Error loading user-defined library, slave SQL "
+      "thread aborted. Install the missing library, and restart the "
+      "slave SQL thread with \"SLAVE START\". We stopped at log '%s' "
+      "position %llu%s", RPL_LOG_NAME, rli->group_master_log_pos,
+      tmp.c_ptr_safe());
+  }
+  else
+  {
+    StringBuffer<100> tmp;
+    if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      tmp.append(STRING_WITH_LEN("; GTID position '"));
+      rpl_append_gtid_state(&tmp, false);
+      tmp.append(STRING_WITH_LEN("'"));
+    }
+    sql_print_error("Error running query, slave SQL thread aborted. "
+                    "Fix the problem, and restart the slave SQL thread "
+                    "with \"SLAVE START\". We stopped at log '%s' position "
+                    "%llu%s", RPL_LOG_NAME, rli->group_master_log_pos,
+                    tmp.c_ptr_safe());
+  }
+}
+
+
+/**
+  Slave SQL thread entry point.
+
+  @param arg Pointer to Relay_log_info object that holds information
+  for the SQL thread.
+
+  @return Always 0.
+*/
+pthread_handler_t handle_slave_sql(void *arg)
+{
+  THD *thd;                     /* needs to be first for thread_stack */
+  char saved_log_name[FN_REFLEN];
+  char saved_master_log_name[FN_REFLEN];
+  my_off_t UNINIT_VAR(saved_log_pos);
+  my_off_t UNINIT_VAR(saved_master_log_pos);
+  String saved_skip_gtid_pos;
+  my_off_t saved_skip= 0;
+  Master_info *mi= ((Master_info*)arg);
+  Relay_log_info* rli = &mi->rli;
+  my_bool wsrep_node_dropped __attribute__((unused)) = FALSE;
+  const char *errmsg;
+  rpl_group_info *serial_rgi;
+  rpl_sql_thread_info sql_info(mi->rpl_filter);
+
+  // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
+  my_thread_init();
+  DBUG_ENTER("handle_slave_sql");
+
+#ifdef WITH_WSREP
+ wsrep_restart_point:
+#endif
+
+  serial_rgi= new rpl_group_info(rli);
+  thd = new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
+  thd->thread_stack = (char*)&thd; // remember where our stack is
+  thd->system_thread_info.rpl_sql_info= &sql_info;
+
+  DBUG_ASSERT(rli->inited);
+  DBUG_ASSERT(rli->mi == mi);
+
+  /*
+    Reset errors for a clean start (otherwise, if the master is idle, the SQL
+    thread may execute no Query_log_event, so the error will remain even
+    though there's no problem anymore). Do not reset the master timestamp
+    (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
+    as we are not sure that we are going to receive a query, we want to
+    remember the last master timestamp (to say how many seconds behind we are
+    now.
+    But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
+  */
+  rli->clear_error();
+
+  mysql_mutex_lock(&rli->run_lock);
+  DBUG_ASSERT(!rli->slave_running);
+  errmsg= 0;
+#ifndef DBUG_OFF
+  rli->events_till_abort = abort_slave_event_count;
+#endif
+
+  /*
+    THD for the sql driver thd. In parallel replication this is the thread
+    that reads things from the relay log and calls rpl_parallel::do_event()
+    to execute queries.
+
+    In single thread replication this is the THD for the thread that is
+    executing SQL queries too.
+  */
+  serial_rgi->thd= rli->sql_driver_thd= thd;
+
+  thd->set_psi(PSI_CALL_get_thread());
+  
+  /* Inform waiting threads that slave has started */
+  rli->slave_run_id++;
+  rli->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
+
+  pthread_detach_this_thread();
+
+  if (opt_slave_parallel_threads > 0 && 
+      rpl_parallel_activate_pool(&global_rpl_thread_pool))
+  {
+    mysql_cond_broadcast(&rli->start_cond);
+    rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+                "Failed during parallel slave pool activation");
+    goto err_during_init;
+  }
+
+  if (init_slave_thread(thd, mi, SLAVE_THD_SQL))
+  {
+    /*
+      TODO: this is currently broken - slave start and change master
+      will be stuck if we fail here
+    */
+    mysql_cond_broadcast(&rli->start_cond);
+    rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+                "Failed during slave thread initialization");
+    goto err_during_init;
+  }
+  thd->init_for_queries();
+  thd->rgi_slave= serial_rgi;
+  if ((serial_rgi->deferred_events_collecting= mi->rpl_filter->is_on()))
+  {
+    serial_rgi->deferred_events= new Deferred_log_events(rli);
+  }
+
+  /*
+    binlog_annotate_row_events must be TRUE only after an Annotate_rows event
+    has been received and only till the last corresponding rbr event has been
+    applied. In all other cases it must be FALSE.
+  */
+  thd->variables.binlog_annotate_row_events= 0;
+
+  /* Ensure that slave can exeute any alter table it gets from master */
+  thd->variables.alter_algorithm= (ulong) Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT;
+
+  server_threads.insert(thd);
+  /*
+    We are going to set slave_running to 1. Assuming slave I/O thread is
+    alive and connected, this is going to make Seconds_Behind_Master be 0
+    i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
+    the moment we start we can think we are caught up, and the next second we
+    start receiving data so we realize we are not caught up and
+    Seconds_Behind_Master grows. No big deal.
+  */
+  rli->abort_slave = 0;
+  rli->stop_for_until= false;
+  mysql_mutex_unlock(&rli->run_lock);
+  mysql_cond_broadcast(&rli->start_cond);
+
+#ifdef ENABLED_DEBUG_SYNC
+  DBUG_EXECUTE_IF("delay_sql_thread_after_release_run_lock", {
+    const char act[]= "now "
+                      "signal sql_thread_run_lock_released "
+                      "wait_for sql_thread_continue";
+    DBUG_ASSERT(debug_sync_service);
+    DBUG_ASSERT(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
+  };);
+#endif
+
+  rli->parallel.reset();
+
+  //tell the I/O thread to take relay_log_space_limit into account from now on
+  rli->ignore_log_space_limit= 0;
+
+  serial_rgi->gtid_sub_id= 0;
+  serial_rgi->gtid_pending= false;
+  rli->last_seen_gtid= serial_rgi->current_gtid;
+  if (mi->using_gtid != Master_info::USE_GTID_NO && mi->using_parallel() &&
+      rli->restart_gtid_pos.count() > 0)
+  {
+    /*
+      With parallel replication in GTID mode, if we have a multi-domain GTID
+      position, we need to start some way back in the relay log and skip any
+      GTID that was already applied before. Since event groups can be split
+      across multiple relay logs, this earlier starting point may be in the
+      middle of an already applied event group, so we also need to skip any
+      remaining part of such group.
+    */
+    rli->gtid_skip_flag = GTID_SKIP_TRANSACTION;
+  }
+  else
+    rli->gtid_skip_flag = GTID_SKIP_NOT;
+  if (init_relay_log_pos(rli,
+                         rli->group_relay_log_name,
+                         rli->group_relay_log_pos,
+                         1 /*need data lock*/, &errmsg,
+                         1 /*look for a description_event*/))
+  { 
+    rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+                "Error initializing relay log position: %s", errmsg);
+    goto err_before_start;
+  }
+  rli->reset_inuse_relaylog();
+  if (rli->alloc_inuse_relaylog(rli->group_relay_log_name))
+    goto err_before_start;
+
+  strcpy(rli->future_event_master_log_name, rli->group_master_log_name);
+  THD_CHECK_SENTRY(thd);
+#ifndef DBUG_OFF
+  {
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%llu "
+                        "rli->event_relay_log_pos=%llu",
+                        my_b_tell(rli->cur_log), rli->event_relay_log_pos));
+    DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
+    /*
+      Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
+      correct position when it's called just after my_b_seek() (the questionable
+      stuff is those "seek is done on next read" comments in the my_b_seek()
+      source code).
+      The crude reality is that this assertion randomly fails whereas
+      replication seems to work fine. And there is no easy explanation why it
+      fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
+      init_relay_log_pos() called above). Maybe the assertion would be
+      meaningful if we held rli->data_lock between the my_b_seek() and the
+      DBUG_ASSERT().
+    */
+#ifdef SHOULD_BE_CHECKED
+    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+#endif
+  }
+#endif
+
+  DBUG_PRINT("master_info",("log_file_name: %s  position: %llu",
+                            rli->group_master_log_name,
+                            rli->group_master_log_pos));
+  if (global_system_variables.log_warnings)
+  {
+    StringBuffer<100> tmp;
+    if (mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      tmp.append(STRING_WITH_LEN("; GTID position '"));
+      rpl_append_gtid_state(&tmp,
+                            mi->using_gtid==Master_info::USE_GTID_CURRENT_POS);
+      tmp.append(STRING_WITH_LEN("'"));
+    }
+    sql_print_information("Slave SQL thread initialized, starting replication "
+                          "in log '%s' at position %llu, relay log '%s' "
+                          "position: %llu%s", RPL_LOG_NAME,
+                    rli->group_master_log_pos, rli->group_relay_log_name,
+                    rli->group_relay_log_pos, tmp.c_ptr_safe());
+  }
+
+  if (check_temp_dir(rli->slave_patternload_file))
+  {
+    check_temp_dir_result= thd->get_stmt_da()->sql_errno();
+    rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
+                "Unable to use slave's temporary directory %s - %s", 
+                slave_load_tmpdir, thd->get_stmt_da()->message());
+    goto err;
+  }
+  else
+    check_temp_dir_result= 0;
+
+  /* Load the set of seen GTIDs, if we did not already. */
+  if (rpl_load_gtid_slave_state(thd))
+  {
+    rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
+                "Unable to load replication GTID slave state from mysql.%s: %s",
+                rpl_gtid_slave_state_table_name.str,
+                thd->get_stmt_da()->message());
+    /*
+      If we are using old-style replication, we can continue, even though we
+      then will not be able to record the GTIDs we receive. But if using GTID,
+      we must give up.
+    */
+    if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode)
+      goto err;
+  }
+  /* Re-load the set of mysql.gtid_slave_posXXX tables available. */
+  if (find_gtid_slave_pos_tables(thd))
+  {
+    rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
+                "Error processing replication GTID position tables: %s",
+                thd->get_stmt_da()->message());
+    goto err;
+  }
+
+  /* execute init_slave variable */
+  if (opt_init_slave.length)
+  {
+    execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave);
+    if (unlikely(thd->is_slave_error))
+    {
+      rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
+                  "Slave SQL thread aborted. Can't execute init_slave query");
+      goto err;
+    }
+  }
+
+  /*
+    First check until condition - probably there is nothing to execute. We
+    do not want to wait for next event in this case.
+  */
+  mysql_mutex_lock(&rli->data_lock);
+  if (rli->slave_skip_counter)
+  {
+    strmake_buf(saved_log_name, rli->group_relay_log_name);
+    strmake_buf(saved_master_log_name, rli->group_master_log_name);
+    saved_log_pos= rli->group_relay_log_pos;
+    saved_master_log_pos= rli->group_master_log_pos;
+    if (mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '"));
+      rpl_append_gtid_state(&saved_skip_gtid_pos, false);
+      saved_skip_gtid_pos.append(STRING_WITH_LEN("'; "));
+    }
+    saved_skip= rli->slave_skip_counter;
+  }
+  if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
+       rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) &&
+      rli->is_until_satisfied(NULL))
+  {
+    sql_print_information("Slave SQL thread stopped because it reached its"
+                          " UNTIL position %llu in %s %s file",
+                          rli->until_pos(), rli->until_name(),
+                          rli->until_condition ==
+                          Relay_log_info::UNTIL_MASTER_POS ?
+                          "binlog" : "relaylog");
+    mysql_mutex_unlock(&rli->data_lock);
+    goto err;
+  }
+  mysql_mutex_unlock(&rli->data_lock);
+#ifdef WITH_WSREP
+  wsrep_open(thd);
+  if (WSREP_ON_)
+    wsrep_wait_ready(thd);
+  if (wsrep_before_command(thd))
+  {
+    WSREP_WARN("Slave SQL wsrep_before_command() failed");
+    goto err;
+  }
+#endif /* WITH_WSREP */
+  /* Read queries from the IO/THREAD until this thread is killed */
+
+  thd->set_command(COM_SLAVE_SQL);
+  while (!sql_slave_killed(serial_rgi))
+  {
+    THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
+    THD_CHECK_SENTRY(thd);
+
+    if (saved_skip && rli->slave_skip_counter == 0)
+    {
+      StringBuffer<100> tmp;
+      if (mi->using_gtid != Master_info::USE_GTID_NO)
+      {
+        tmp.append(STRING_WITH_LEN(", GTID '"));
+        rpl_append_gtid_state(&tmp, false);
+        tmp.append(STRING_WITH_LEN("'; "));
+      }
+
+      sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
+        "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
+        "master_log_pos='%ld'%s and new position at "
+        "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
+        "master_log_pos='%ld'%s ",
+        (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
+        saved_master_log_name, (ulong) saved_master_log_pos,
+        saved_skip_gtid_pos.c_ptr_safe(),
+        rli->group_relay_log_name, (ulong) rli->group_relay_log_pos,
+        rli->group_master_log_name, (ulong) rli->group_master_log_pos,
+        tmp.c_ptr_safe());
+      saved_skip= 0;
+      saved_skip_gtid_pos.free();
+    }
+
+    if (exec_relay_log_event(thd, rli, serial_rgi))
+    {
+#ifdef WITH_WSREP
+      if (WSREP(thd))
+      {
+        mysql_mutex_lock(&thd->LOCK_thd_data);
+
+        if (thd->wsrep_cs().current_error())
+        {
+          wsrep_node_dropped = TRUE;
+          rli->abort_slave   = TRUE;
+        }
+        mysql_mutex_unlock(&thd->LOCK_thd_data);
+      }
+#endif /* WITH_WSREP */
+
+      DBUG_PRINT("info", ("exec_relay_log_event() failed"));
+      // do not scare the user if SQL thread was simply killed or stopped
+      if (!sql_slave_killed(serial_rgi))
+      {
+        slave_output_error_info(serial_rgi, thd);
+        if (WSREP(thd) && rli->last_error().number == ER_UNKNOWN_COM_ERROR)
+        {
+          wsrep_node_dropped= TRUE;
+        }
+      }
+      goto err;
+    }
+  }
+
+ err:
+  if (mi->using_parallel())
+  {
+    rli->parallel.wait_for_done(thd, rli);
+  };
+ /* Gtid_list_log_event::do_apply_event has already reported the GTID until */
+  if (rli->stop_for_until && rli->until_condition != Relay_log_info::UNTIL_GTID)
+  {
+    if (global_system_variables.log_warnings > 2)
+      sql_print_information("Slave SQL thread UNTIL stop was requested at position "
+                            "%llu in %s %s file",
+                            rli->until_log_pos, rli->until_log_name,
+                            rli->until_condition ==
+                            Relay_log_info::UNTIL_MASTER_POS ?
+                            "binlog" : "relaylog");
+    sql_print_information("Slave SQL thread stopped because it reached its"
+                          " UNTIL position %llu in %s %s file",
+                          rli->until_pos(), rli->until_name(),
+                          rli->until_condition ==
+                          Relay_log_info::UNTIL_MASTER_POS ?
+                          "binlog" : "relaylog");
+
+  };
+  /* Thread stopped. Print the current replication position to the log */
+  {
+    StringBuffer<100> tmp;
+    if (mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      tmp.append(STRING_WITH_LEN("; GTID position '"));
+      rpl_append_gtid_state(&tmp, false);
+      tmp.append(STRING_WITH_LEN("'"));
+    }
+    sql_print_information("Slave SQL thread exiting, replication stopped in "
+                          "log '%s' at position %llu%s, master: %s:%d", RPL_LOG_NAME,
+                          rli->group_master_log_pos, tmp.c_ptr_safe(),
+                          mi->host, mi->port);
+  }
+#ifdef WITH_WSREP
+  wsrep_after_command_before_result(thd);
+  wsrep_after_command_after_result(thd);
+#endif /* WITH_WSREP */
+
+ err_before_start:
+
+  /*
+    Some events set some playgrounds, which won't be cleared because thread
+    stops. Stopping of this thread may not be known to these events ("stop"
+    request is detected only by the present function, not by events), so we
+    must "proactively" clear playgrounds:
+  */
+  thd->clear_error();
+  serial_rgi->cleanup_context(thd, 1);
+  /*
+    Some extra safety, which should not been needed (normally, event deletion
+    should already have done these assignments (each event which sets these
+    variables is supposed to set them to 0 before terminating)).
+  */
+  thd->catalog= 0;
+  thd->reset_query();
+  thd->reset_db(&null_clex_str);
+  if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
+  {
+    ulong domain_count;
+    my_bool save_log_all_errors= thd->log_all_errors;
+
+    /*
+      We don't need to check return value for rli->flush()
+      as any errors should be logged to stderr
+    */
+    thd->log_all_errors= 1;
+    rli->flush();
+    thd->log_all_errors= save_log_all_errors;
+    if (mi->using_parallel())
+    {
+      /*
+        In parallel replication GTID mode, we may stop with different domains
+        at different positions in the relay log.
+
+        To handle this when we restart the SQL thread, mark the current
+        per-domain position in the Relay_log_info.
+      */
+      mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+      domain_count= rpl_global_gtid_slave_state->count();
+      mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+      if (domain_count > 1)
+      {
+        inuse_relaylog *ir;
+
+        /*
+          Load the starting GTID position, so that we can skip already applied
+          GTIDs when we restart the SQL thread. And set the start position in
+          the relay log back to a known safe place to start (prior to any not
+          yet applied transaction in any domain).
+        */
+        rli->restart_gtid_pos.load(rpl_global_gtid_slave_state, NULL, 0);
+        if ((ir= rli->inuse_relaylog_list))
+        {
+          rpl_gtid *gtid= ir->relay_log_state;
+          uint32 count= ir->relay_log_state_count;
+          while (count > 0)
+          {
+            process_gtid_for_restart_pos(rli, gtid);
+            ++gtid;
+            --count;
+          }
+          strmake_buf(rli->group_relay_log_name, ir->name);
+          rli->group_relay_log_pos= BIN_LOG_HEADER_SIZE;
+          rli->relay_log_state.load(ir->relay_log_state, ir->relay_log_state_count);
+        }
+      }
+    }
+  }
+  THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
+  thd->add_status_to_global();
+  server_threads.erase(thd);
+  mysql_mutex_lock(&rli->run_lock);
+
+err_during_init:
+  /* We need data_lock, at least to wake up any waiting master_pos_wait() */
+  mysql_mutex_lock(&rli->data_lock);
+  DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT); // tracking buffer overrun
+  /* When master_pos_wait() wakes up it will check this and terminate */
+  rli->slave_running= MYSQL_SLAVE_NOT_RUN;
+  /* Forget the relay log's format */
+  delete rli->relay_log.description_event_for_exec;
+  rli->relay_log.description_event_for_exec= 0;
+  rli->reset_inuse_relaylog();
+  /* Wake up master_pos_wait() */
+  mysql_mutex_unlock(&rli->data_lock);
+  DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
+  mysql_cond_broadcast(&rli->data_cond);
+  rli->ignore_log_space_limit= 0; /* don't need any lock */
+  /* we die so won't remember charset - re-update them on next thread start */
+  thd->system_thread_info.rpl_sql_info->cached_charset_invalidate();
+
+  /*
+    TODO: see if we can do this conditionally in next_event() instead
+    to avoid unneeded position re-init
+
+    We only reset THD::temporary_tables to 0 here and not free it, as this
+    could be used by slave through Relay_log_info::save_temporary_tables.
+  */
+  thd->temporary_tables= 0;
+  rli->sql_driver_thd= 0;
+  thd->rgi_fake= thd->rgi_slave= NULL;
+
+#ifdef WITH_WSREP
+  /*
+    If slave stopped due to node going non primary, we set global flag to
+    trigger automatic restart of slave when node joins back to cluster.
+  */
+  if (WSREP(thd) && wsrep_node_dropped && wsrep_restart_slave)
+  {
+    if (wsrep_ready_get())
+    {
+      WSREP_INFO("Slave error due to node temporarily non-primary"
+                 "SQL slave will continue");
+      wsrep_node_dropped= FALSE;
+      mysql_mutex_unlock(&rli->run_lock);
+      goto wsrep_restart_point;
+    }
+    else
+    {
+      WSREP_INFO("Slave error due to node going non-primary");
+      WSREP_INFO("wsrep_restart_slave was set and therefore slave will be "
+                 "automatically restarted when node joins back to cluster");
+      wsrep_restart_slave_activated= TRUE;
+    }
+  }
+  wsrep_close(thd);
+#endif /* WITH_WSREP */
+
+ /*
+   Note: the order of the broadcast and unlock calls below (first
+   broadcast, then unlock) is important. Otherwise a killer_thread can
+   execute between the calls and delete the mi structure leading to a
+   crash! (see BUG#25306 for details)
+ */
+  mysql_cond_broadcast(&rli->stop_cond);
+  DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
+  mysql_mutex_unlock(&rli->run_lock);  // tell the world we are done
+
+  rpl_parallel_resize_pool_if_no_slaves();
+
+  delete serial_rgi;
+  delete thd;
+
+  DBUG_LEAVE;                                   // Must match DBUG_ENTER()
+  my_thread_end();
+  ERR_remove_state(0);
+  return nullptr;
+}
+
+
+/*
+  process_io_create_file()
+*/
+
+static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
+{
+  int error = 1;
+  ulong num_bytes;
+  bool cev_not_written;
+  THD *thd = mi->io_thd;
+  NET *net = &mi->mysql->net;
+  DBUG_ENTER("process_io_create_file");
+
+  if (unlikely(!cev->is_valid()))
+    DBUG_RETURN(1);
+
+  if (!mi->rpl_filter->db_ok(cev->db))
+  {
+    skip_load_data_infile(net);
+    DBUG_RETURN(0);
+  }
+  DBUG_ASSERT(cev->inited_from_old);
+  thd->file_id = cev->file_id = mi->file_id++;
+  thd->variables.server_id = cev->server_id;
+  cev_not_written = 1;
+
+  if (unlikely(net_request_file(net,cev->fname)))
+  {
+    sql_print_error("Slave I/O: failed requesting download of '%s'",
+                    cev->fname);
+    goto err;
+  }
+
+  /*
+    This dummy block is so we could instantiate Append_block_log_event
+    once and then modify it slightly instead of doing it multiple times
+    in the loop
+  */
+  {
+    Append_block_log_event aev(thd,0,0,0,0);
+
+    for (;;)
+    {
+      if (unlikely((num_bytes=my_net_read(net)) == packet_error))
+      {
+        sql_print_error("Network read error downloading '%s' from master",
+                        cev->fname);
+        goto err;
+      }
+      if (unlikely(!num_bytes)) /* eof */
+      {
+	/* 3.23 master wants it */
+        net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
+        /*
+          If we wrote Create_file_log_event, then we need to write
+          Execute_load_log_event. If we did not write Create_file_log_event,
+          then this is an empty file and we can just do as if the LOAD DATA
+          INFILE had not existed, i.e. write nothing.
+        */
+        if (unlikely(cev_not_written))
+          break;
+        Execute_load_log_event xev(thd,0,0);
+        xev.log_pos = cev->log_pos;
+        if (unlikely(mi->rli.relay_log.append(&xev)))
+        {
+          mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
+                     ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
+                     "error writing Exec_load event to relay log");
+          goto err;
+        }
+        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
+        break;
+      }
+      if (unlikely(cev_not_written))
+      {
+        cev->block = net->read_pos;
+        cev->block_len = num_bytes;
+        if (unlikely(mi->rli.relay_log.append(cev)))
+        {
+          mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
+                     ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
+                     "error writing Create_file event to relay log");
+          goto err;
+        }
+        cev_not_written=0;
+        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
+      }
+      else
+      {
+        aev.block = net->read_pos;
+        aev.block_len = num_bytes;
+        aev.log_pos = cev->log_pos;
+        if (unlikely(mi->rli.relay_log.append(&aev)))
+        {
+          mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
+                     ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
+                     "error writing Append_block event to relay log");
+          goto err;
+        }
+        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ;
+      }
+    }
+  }
+  error=0;
+err:
+  DBUG_RETURN(error);
+}
+
+
+/*
+  Start using a new binary log on the master
+
+  SYNOPSIS
+    process_io_rotate()
+    mi                  master_info for the slave
+    rev                 The rotate log event read from the binary log
+
+  DESCRIPTION
+    Updates the master info with the place in the next binary
+    log where we should start reading.
+    Rotate the relay log to avoid mixed-format relay logs.
+
+  NOTES
+    We assume we already locked mi->data_lock
+
+  RETURN VALUES
+    0           ok
+    1           Log event is illegal
+
+*/
+
+static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
+{
+  DBUG_ENTER("process_io_rotate");
+  mysql_mutex_assert_owner(&mi->data_lock);
+
+  if (unlikely(!rev->is_valid()))
+    DBUG_RETURN(1);
+
+  /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
+  memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1);
+  mi->master_log_pos= rev->pos;
+  DBUG_PRINT("info", ("master_log_pos: '%s' %lu",
+                      mi->master_log_name, (ulong) mi->master_log_pos));
+#ifndef DBUG_OFF
+  /*
+    If we do not do this, we will be getting the first
+    rotate event forever, so we need to not disconnect after one.
+  */
+  if (disconnect_slave_event_count)
+    mi->events_till_disconnect++;
+#endif
+
+  /*
+    If description_event_for_queue is format <4, there is conversion in the
+    relay log to the slave's format (4). And Rotate can mean upgrade or
+    nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
+    no need to reset description_event_for_queue now. And if it's nothing (same
+    master version as before), no need (still using the slave's format).
+  */
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
+  {
+    DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                mi->rli.relay_log.relay_log_checksum_alg);
+    
+    delete mi->rli.relay_log.description_event_for_queue;
+    /* start from format 3 (MySQL 4.0) again */
+    mi->rli.relay_log.description_event_for_queue= new
+      Format_description_log_event(3);
+    mi->rli.relay_log.description_event_for_queue->checksum_alg=
+      mi->rli.relay_log.relay_log_checksum_alg;    
+  }
+  /*
+    Rotate the relay log makes binlog format detection easier (at next slave
+    start or mysqlbinlog)
+  */
+  DBUG_RETURN(rotate_relay_log(mi) /* will take the right mutexes */);
+}
+
+/*
+  Reads a 3.23 event and converts it to the slave's format. This code was
+  copied from MySQL 4.0.
+*/
+static int queue_binlog_ver_1_event(Master_info *mi, const uchar *buf,
+                                    ulong event_len)
+{
+  const char *errmsg = 0;
+  ulong inc_pos;
+  bool ignore_event= 0;
+  uchar *tmp_buf = 0;
+  Relay_log_info *rli= &mi->rli;
+  DBUG_ENTER("queue_binlog_ver_1_event");
+
+  /*
+    If we get Load event, we need to pass a non-reusable buffer
+    to read_log_event, so we do a trick
+  */
+  if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
+  {
+    if (unlikely(!(tmp_buf= (uchar*) my_malloc(key_memory_binlog_ver_1_event,
+                                               event_len+1, MYF(MY_WME)))))
+    {
+      mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+                 ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
+      DBUG_RETURN(1);
+    }
+    memcpy(tmp_buf,buf,event_len);
+    /*
+      Create_file constructor wants a 0 as last char of buffer, this 0 will
+      serve as the string-termination char for the file's name (which is at the
+      end of the buffer)
+      We must increment event_len, otherwise the event constructor will not see
+      this end 0, which leads to segfault.
+    */
+    tmp_buf[event_len++]=0;
+    int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
+    buf= tmp_buf;
+  }
+  /*
+    This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
+    send the loaded file, and write it to the relay log in the form of
+    Append_block/Exec_load (the SQL thread needs the data, as that thread is not
+    connected to the master).
+  */
+  Log_event *ev=
+    Log_event::read_log_event(buf, event_len, &errmsg,
+                              mi->rli.relay_log.description_event_for_queue, 0);
+  if (unlikely(!ev))
+  {
+    sql_print_error("Read invalid event from master: '%s',\
+ master could be corrupt but a more likely cause of this is a bug",
+                    errmsg);
+    my_free(tmp_buf);
+    DBUG_RETURN(1);
+  }
+
+  mysql_mutex_lock(&mi->data_lock);
+  ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
+  switch (ev->get_type_code()) {
+  case STOP_EVENT:
+    ignore_event= 1;
+    inc_pos= event_len;
+    break;
+  case ROTATE_EVENT:
+    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    {
+      delete ev;
+      mysql_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(1);
+    }
+    inc_pos= 0;
+    break;
+  case CREATE_FILE_EVENT:
+    /*
+      Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
+      queue_old_event() which is for 3.23 events which don't comprise
+      CREATE_FILE_EVENT. This is because read_log_event() above has just
+      transformed LOAD_EVENT into CREATE_FILE_EVENT.
+    */
+  {
+    /* We come here when and only when tmp_buf != 0 */
+    DBUG_ASSERT(tmp_buf != 0);
+    inc_pos=event_len;
+    ev->log_pos+= inc_pos;
+    int error = process_io_create_file(mi,(Create_file_log_event*)ev);
+    delete ev;
+    mi->master_log_pos += inc_pos;
+    DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
+    mysql_mutex_unlock(&mi->data_lock);
+    my_free(tmp_buf);
+    DBUG_RETURN(error);
+  }
+  default:
+    inc_pos= event_len;
+    break;
+  }
+  if (likely(!ignore_event))
+  {
+    if (ev->log_pos)
+      /*
+         Don't do it for fake Rotate events (see comment in
+      Log_event::Log_event(const char* buf...) in log_event.cc).
+      */
+      ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
+    if (unlikely(rli->relay_log.append(ev)))
+    {
+      delete ev;
+      mysql_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(1);
+    }
+    rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  }
+  delete ev;
+  mi->master_log_pos+= inc_pos;
+  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
+  mysql_mutex_unlock(&mi->data_lock);
+  DBUG_RETURN(0);
+}
+
+/*
+  Reads a 4.0 event and converts it to the slave's format. This code was copied
+  from queue_binlog_ver_1_event(), with some affordable simplifications.
+*/
+static int queue_binlog_ver_3_event(Master_info *mi, const uchar *buf,
+                                    ulong event_len)
+{
+  const char *errmsg = 0;
+  ulong inc_pos;
+  char *tmp_buf = 0;
+  Relay_log_info *rli= &mi->rli;
+  DBUG_ENTER("queue_binlog_ver_3_event");
+
+  /* read_log_event() will adjust log_pos to be end_log_pos */
+  Log_event *ev=
+    Log_event::read_log_event(buf, event_len, &errmsg,
+                              mi->rli.relay_log.description_event_for_queue, 0);
+  if (unlikely(!ev))
+  {
+    sql_print_error("Read invalid event from master: '%s',\
+ master could be corrupt but a more likely cause of this is a bug",
+                    errmsg);
+    my_free(tmp_buf);
+    DBUG_RETURN(1);
+  }
+  mysql_mutex_lock(&mi->data_lock);
+  switch (ev->get_type_code()) {
+  case STOP_EVENT:
+    goto err;
+  case ROTATE_EVENT:
+    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    {
+      delete ev;
+      mysql_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(1);
+    }
+    inc_pos= 0;
+    break;
+  default:
+    inc_pos= event_len;
+    break;
+  }
+
+  if (unlikely(rli->relay_log.append(ev)))
+  {
+    delete ev;
+    mysql_mutex_unlock(&mi->data_lock);
+    DBUG_RETURN(1);
+  }
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  delete ev;
+  mi->master_log_pos+= inc_pos;
+err:
+  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
+  mysql_mutex_unlock(&mi->data_lock);
+  DBUG_RETURN(0);
+}
+
+/*
+  queue_old_event()
+
+  Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
+  (exactly, slave's) format. To do the conversion, we create a 5.0 event from
+  the 3.23/4.0 bytes, then write this event to the relay log.
+
+  TODO:
+    Test this code before release - it has to be tested on a separate
+    setup with 3.23 master or 4.0 master
+*/
+
+static int queue_old_event(Master_info *mi, const uchar *buf, ulong event_len)
+{
+  DBUG_ENTER("queue_old_event");
+
+  switch (mi->rli.relay_log.description_event_for_queue->binlog_version) {
+  case 1:
+      DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
+  case 3:
+      DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
+  default: /* unsupported format; eg version 2 */
+    DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
+                       mi->rli.relay_log.description_event_for_queue->binlog_version));
+    DBUG_RETURN(1);
+  }
+}
+
+/*
+  queue_event()
+
+  If the event is 3.23/4.0, passes it to queue_old_event() which will convert
+  it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
+  no format conversion, it's pure read/write of bytes.
+  So a 5.0.0 slave's relay log can contain events in the slave's format or in
+  any >=5.0.0 format.
+*/
+
+static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
+{
+  int error= 0;
+  StringBuffer<1024> error_msg;
+  ulonglong inc_pos= 0;
+  ulonglong event_pos;
+  Relay_log_info *rli= &mi->rli;
+  mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
+  ulong s_id;
+  bool unlock_data_lock= TRUE;
+  bool gtid_skip_enqueue= false;
+  bool got_gtid_event= false;
+  rpl_gtid event_gtid;
+  static uint dbug_rows_event_count __attribute__((unused))= 0;
+  bool is_compress_event = false;
+  uchar *new_buf = NULL;
+  uchar new_buf_arr[4096];
+  bool is_malloc = false;
+  bool is_rows_event= false;
+  /*
+    FD_q must have been prepared for the first R_a event
+    inside get_master_version_and_clock()
+    Show-up of FD:s affects checksum_alg at once because
+    that changes FD_queue.
+  */
+  enum enum_binlog_checksum_alg checksum_alg=
+    mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ?
+    mi->checksum_alg_before_fd : mi->rli.relay_log.relay_log_checksum_alg;
+
+  const uchar *save_buf= NULL; // needed for checksumming the fake Rotate event
+  uchar rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
+
+  DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF || 
+              checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || 
+              checksum_alg == BINLOG_CHECKSUM_ALG_CRC32); 
+
+  DBUG_ENTER("queue_event");
+  /*
+    FD_queue checksum alg description does not apply in a case of
+    FD itself. The one carries both parts of the checksum data.
+  */
+  if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
+  {
+    checksum_alg= get_checksum_alg(buf, event_len);
+  }
+  else if (buf[EVENT_TYPE_OFFSET] == START_EVENT_V3)
+  {
+    // checksum behaviour is similar to the pre-checksum FD handling
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
+    mi->rli.relay_log.description_event_for_queue->checksum_alg=
+      mi->rli.relay_log.relay_log_checksum_alg= checksum_alg=
+      BINLOG_CHECKSUM_ALG_OFF;
+  }
+
+  // does not hold always because of old binlog can work with NM 
+  // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
+
+  // should hold unless manipulations with RL. Tests that do that
+  // will have to refine the clause.
+  DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF);
+              
+  // Emulate the network corruption
+  DBUG_EXECUTE_IF("corrupt_queue_event",
+    if (buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
+    {
+      uchar *debug_event_buf_c= const_cast(buf);
+      int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
+      debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+      DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
+      DBUG_SET("-d,corrupt_queue_event");
+    }
+  );
+                                              
+  if (event_checksum_test((uchar*) buf, event_len, checksum_alg))
+  {
+    error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
+    unlock_data_lock= FALSE;
+    goto err;
+  }
+  DBUG_ASSERT(((uchar) buf[FLAGS_OFFSET] & LOG_EVENT_ACCEPT_OWN_F) == 0);
+
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
+      buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
+    DBUG_RETURN(queue_old_event(mi,buf,event_len));
+
+#ifdef ENABLED_DEBUG_SYNC
+  /*
+    A (+d,dbug.rows_events_to_delay_relay_logging)-test is supposed to
+    create a few Write_log_events and after receiving the 1st of them
+    the IO thread signals to launch the SQL thread, and sets itself to
+    wait for a release signal.
+  */
+  DBUG_EXECUTE_IF("dbug.rows_events_to_delay_relay_logging",
+                  if ((buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT_V1 ||
+                       buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT) &&
+                      ++dbug_rows_event_count == 2)
+                  {
+                    const char act[]=
+                      "now SIGNAL start_sql_thread "
+                      "WAIT_FOR go_on_relay_logging";
+                    DBUG_ASSERT(debug_sync_service);
+                    DBUG_ASSERT(!debug_sync_set_action(current_thd,
+                                                       STRING_WITH_LEN(act)));
+                    dbug_rows_event_count = 0;
+                  };);
+#endif
+  s_id= uint4korr(buf + SERVER_ID_OFFSET);
+
+  mysql_mutex_lock(&mi->data_lock);
+
+  switch (buf[EVENT_TYPE_OFFSET]) {
+  case STOP_EVENT:
+    /*
+      We needn't write this event to the relay log. Indeed, it just indicates a
+      master server shutdown. The only thing this does is cleaning. But
+      cleaning is already done on a per-master-thread basis (as the master
+      server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
+      prepared statements' deletion are TODO only when we binlog prep stmts).
+
+      We don't even increment mi->master_log_pos, because we may be just after
+      a Rotate event. Btw, in a few milliseconds we are going to have a Start
+      event from the next binlog (unless the master is presently running
+      without --log-bin).
+    */
+    goto err;
+  case ROTATE_EVENT:
+  {
+    Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
+                         event_len - BINLOG_CHECKSUM_LEN : event_len,
+                         mi->rli.relay_log.description_event_for_queue);
+    bool master_changed= false;
+    bool maybe_crashed= false;
+    // Exclude server start scenario
+    if ((mi->prev_master_id && mi->master_id) &&
+        (mi->prev_master_id != mi->master_id))
+      master_changed= true;
+    if ((mi->master_log_name[0]!='\0') &&
+        (strcmp(rev.new_log_ident, mi->master_log_name) != 0))
+      maybe_crashed= true;
+
+    if (unlikely((mi->gtid_reconnect_event_skip_count && master_changed) ||
+                 maybe_crashed) &&
+        unlikely(!mi->gtid_event_seen) && rev.is_artificial_event())
+    {
+      /*
+        Artificial Rotate_log_event is the first event we receive at the start
+        of each master binlog file. It gives the name of the new binlog file.
+
+        Normally, we already have this name from the real rotate event at the
+        end of the previous binlog file (unless we are making a new connection
+        using GTID). But if the master server restarted/crashed, there is no
+        rotate event at the end of the prior binlog file, so the name is new.
+
+        We use this fact to handle a special case of master crashing. If the
+        master crashed while writing the binlog, it might end with a partial
+        event group lacking the COMMIT/XID event, which must be rolled
+        back. If the slave IO thread happens to get a disconnect in the middle
+        of exactly this event group, it will try to reconnect at the same GTID
+        and skip already fetched events. However, that GTID did not commit on
+        the master before the crash, so it does not really exist, and the
+        master will connect the slave at the next following GTID starting in
+        the next binlog. This could confuse the slave and make it mix the
+        start of one event group with the end of another.
+
+        But we detect this case here, by noticing the change of binlog name
+        which detects the missing rotate event at the end of the previous
+        binlog file. In this case, we reset the counters to make us not skip
+        the next event group, and queue an artificial Format Description
+        event. The previously fetched incomplete event group will then be
+        rolled back when the Format Description event is executed by the SQL
+        thread.
+
+        A similar case is if the reconnect somehow connects to a different
+        master server (like due to a network proxy or IP address takeover).
+        We detect this case by noticing a change of server_id and in this
+        case likewise rollback the partially received event group.
+      */
+      Format_description_log_event fdle(4);
+      fdle.checksum_alg= checksum_alg;
+
+      /*
+        Possible crash is flagged in being created FD' common header
+        to conduct any necessary cleanup by the slave applier.
+      */
+      if (maybe_crashed)
+        fdle.flags |= LOG_EVENT_BINLOG_IN_USE_F;
+
+
+      if (mi->gtid_reconnect_event_skip_count)
+      {
+        if (master_changed)
+          sql_print_warning("The server_id of master server changed in the "
+                            "middle of GTID %u-%u-%llu. Assuming a change of "
+                            "master server, so rolling back the previously "
+                            "received partial transaction. Expected: %lu, "
+                            "received: %lu", mi->last_queued_gtid.domain_id,
+                            mi->last_queued_gtid.server_id,
+                            mi->last_queued_gtid.seq_no,
+                            mi->prev_master_id, mi->master_id);
+        else
+          sql_print_warning("Unexpected change of master binlog file name in "
+                            "the middle of GTID %u-%u-%llu, assuming that "
+                            "master has crashed and rolling back the "
+                            "transaction. Expected: '%s', received: '%s'",
+                            mi->last_queued_gtid.domain_id,
+                            mi->last_queued_gtid.server_id,
+                            mi->last_queued_gtid.seq_no, mi->master_log_name,
+                            rev.new_log_ident);
+      }
+      mysql_mutex_lock(log_lock);
+      if (likely(!rli->relay_log.write_event(&fdle) &&
+                 !rli->relay_log.flush_and_sync(NULL)))
+      {
+        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+      }
+      else
+      {
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+        mysql_mutex_unlock(log_lock);
+        goto err;
+      }
+      rli->relay_log.signal_relay_log_update();
+      mysql_mutex_unlock(log_lock);
+
+      mi->gtid_reconnect_event_skip_count= 0;
+      mi->events_queued_since_last_gtid= 0;
+    }
+    mi->prev_master_id= mi->master_id;
+
+    if (unlikely(process_io_rotate(mi, &rev)))
+    {
+      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      goto err;
+    }
+    /* 
+       Checksum special cases for the fake Rotate (R_f) event caused by the protocol
+       of events generation and serialization in RL where Rotate of master is 
+       queued right next to FD of slave.
+       Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
+       Two special rules apply only to the first R_f which comes in before any FD_m.
+       The 2nd R_f should be compatible with the FD_s that must have taken over
+       the last seen FD_m's (A).
+       
+       RSC_1: If OM \and fake Rotate \and slave is configured to
+              to compute checksum for its first FD event for RL
+              the fake Rotate gets checksummed here.
+    */
+    if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
+        mi->rli.relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+    {
+      ha_checksum rot_crc= 0;
+      event_len += BINLOG_CHECKSUM_LEN;
+      memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
+      int4store(&rot_buf[EVENT_LEN_OFFSET],
+                uint4korr(&rot_buf[EVENT_LEN_OFFSET]) + BINLOG_CHECKSUM_LEN);
+      rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
+                           event_len - BINLOG_CHECKSUM_LEN);
+      int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
+      DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
+      DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                  mi->rli.relay_log.relay_log_checksum_alg);
+      /* the first one */
+      DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
+      save_buf= buf;
+      buf= rot_buf;
+    }
+    else
+      /*
+        RSC_2: If NM \and fake Rotate \and slave does not compute checksum
+        the fake Rotate's checksum is stripped off before relay-logging.
+      */
+      if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+          mi->rli.relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
+      {
+        event_len -= BINLOG_CHECKSUM_LEN;
+        memcpy(rot_buf, buf, event_len);
+        int4store(&rot_buf[EVENT_LEN_OFFSET],
+                  uint4korr(&rot_buf[EVENT_LEN_OFFSET]) - BINLOG_CHECKSUM_LEN);
+        DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
+        DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                    mi->rli.relay_log.relay_log_checksum_alg);
+        /* the first one */
+        DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
+        save_buf= buf;
+        buf= rot_buf;
+      }
+    /*
+      Now the I/O thread has just changed its mi->master_log_name, so
+      incrementing mi->master_log_pos is nonsense.
+    */
+    inc_pos= 0;
+    break;
+  }
+  case FORMAT_DESCRIPTION_EVENT:
+  {
+    /*
+      Create an event, and save it (when we rotate the relay log, we will have
+      to write this event again).
+    */
+    /*
+      We are the only thread which reads/writes description_event_for_queue.
+      The relay_log struct does not move (though some members of it can
+      change), so we needn't any lock (no rli->data_lock, no log lock).
+    */
+    Format_description_log_event* tmp;
+    const char* errmsg;
+    // mark it as undefined that is irrelevant anymore
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
+    if (!(tmp= (Format_description_log_event*)
+          Log_event::read_log_event(buf, event_len, &errmsg,
+                                    mi->rli.relay_log.description_event_for_queue,
+                                    1)))
+    {
+      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      goto err;
+    }
+    tmp->copy_crypto_data(mi->rli.relay_log.description_event_for_queue);
+    delete mi->rli.relay_log.description_event_for_queue;
+    mi->rli.relay_log.description_event_for_queue= tmp;
+    if (tmp->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+      tmp->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+
+    /* installing new value of checksum Alg for relay log */
+    mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg;
+
+    /*
+      Do not queue any format description event that we receive after a
+      reconnect where we are skipping over a partial event group received
+      before the reconnect.
+
+      (If we queued such an event, and it was the first format_description
+      event after master restart, the slave SQL thread would think that
+      the partial event group before it in the relay log was from a
+      previous master crash and should be rolled back).
+    */
+    if (unlikely(mi->gtid_reconnect_event_skip_count && !mi->gtid_event_seen))
+        gtid_skip_enqueue= true;
+
+    /*
+       Though this does some conversion to the slave's format, this will
+       preserve the master's binlog format version, and number of event types.
+    */
+    /*
+       If the event was not requested by the slave (the slave did not ask for
+       it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+    */
+    inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+    DBUG_PRINT("info",("binlog format is now %d",
+                       mi->rli.relay_log.description_event_for_queue->binlog_version));
+
+  }
+  break;
+
+  case HEARTBEAT_LOG_EVENT:
+  {
+    /*
+      HB (heartbeat) cannot come before RL (Relay)
+    */
+    Heartbeat_log_event hb(buf,
+                           mi->rli.relay_log.relay_log_checksum_alg
+                           != BINLOG_CHECKSUM_ALG_OFF ?
+                           event_len - BINLOG_CHECKSUM_LEN : event_len,
+                           mi->rli.relay_log.description_event_for_queue);
+    if (!hb.is_valid())
+    {
+      error= ER_SLAVE_HEARTBEAT_FAILURE;
+      error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;"));
+      error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
+      error_msg.append((char*) hb.get_log_ident(), (uint) hb.get_ident_len());
+      error_msg.append(STRING_WITH_LEN(" log_pos "));
+      error_msg.append_ulonglong(hb.log_pos);
+      goto err;
+    }
+    mi->received_heartbeats++;
+    /* 
+       compare local and event's versions of log_file, log_pos.
+       
+       Heartbeat is sent only after an event corresponding to the corrdinates
+       the heartbeat carries.
+       Slave can not have a higher coordinate except in the only
+       special case when mi->master_log_name, master_log_pos have never
+       been updated by Rotate event i.e when slave does not have any history
+       with the master (and thereafter mi->master_log_pos is NULL).
+
+       Slave can have lower coordinates, if some event from master was omitted.
+
+       TODO: handling `when' for SHOW SLAVE STATUS' snds behind
+    */
+    if (memcmp(mi->master_log_name, hb.get_log_ident(), hb.get_ident_len()) ||
+        mi->master_log_pos > hb.log_pos) {
+      /* missed events of heartbeat from the past */
+      error= ER_SLAVE_HEARTBEAT_FAILURE;
+      error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;"));
+      error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
+      error_msg.append((char*) hb.get_log_ident(), (uint) hb.get_ident_len());
+      error_msg.append(STRING_WITH_LEN(" log_pos "));
+      error_msg.append_ulonglong(hb.log_pos);
+      goto err;
+    }
+
+    /*
+      Heartbeat events doesn't count in the binlog size, so we don't have to
+      increment mi->master_log_pos
+    */
+    goto skip_relay_logging;
+  }
+  break;
+
+  case GTID_LIST_EVENT:
+  {
+    const char *errmsg;
+    Gtid_list_log_event *glev;
+    Log_event *tmp;
+    uint32 flags;
+
+    if (!(tmp= Log_event::read_log_event(buf, event_len, &errmsg,
+           mi->rli.relay_log.description_event_for_queue,
+           opt_slave_sql_verify_checksum)))
+    {
+      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      goto err;
+    }
+    glev= static_cast(tmp);
+    event_pos= glev->log_pos;
+    flags= glev->gl_flags;
+    delete glev;
+
+    /*
+      We use fake Gtid_list events to update the old-style position (among
+      other things).
+
+      Early code created fake Gtid_list events with zero log_pos, those should
+      not modify old-style position.
+    */
+    if (event_pos == 0 || event_pos <= mi->master_log_pos)
+      inc_pos= 0;
+    else
+      inc_pos= event_pos - mi->master_log_pos;
+
+    if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID &&
+        flags & Gtid_list_log_event::FLAG_UNTIL_REACHED)
+    {
+      char str_buf[128];
+      String str(str_buf, sizeof(str_buf), system_charset_info);
+      mi->rli.until_gtid_pos.to_string(&str);
+      sql_print_information("Slave I/O thread stops because it reached its"
+                            " UNTIL master_gtid_pos %s", str.c_ptr_safe());
+      mi->abort_slave= true;
+    }
+  }
+  break;
+
+  case GTID_EVENT:
+  {
+    DBUG_EXECUTE_IF("kill_slave_io_after_2_events",
+                    {
+                      mi->dbug_do_disconnect= true;
+                      mi->dbug_event_counter= 2;
+                    };);
+
+    uchar gtid_flag;
+
+    if (Gtid_log_event::peek(buf, event_len, checksum_alg,
+                             &event_gtid.domain_id, &event_gtid.server_id,
+                             &event_gtid.seq_no, >id_flag,
+                             rli->relay_log.description_event_for_queue))
+    {
+      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      goto err;
+    }
+    got_gtid_event= true;
+    if (mi->using_gtid == Master_info::USE_GTID_NO)
+      goto default_action;
+    if (unlikely(mi->gtid_reconnect_event_skip_count))
+    {
+      if (likely(!mi->gtid_event_seen))
+      {
+        mi->gtid_event_seen= true;
+        /*
+          If we are reconnecting, and we need to skip a partial event group
+          already queued to the relay log before the reconnect, then we check
+          that we actually get the same event group (same GTID) as before, so
+          we do not end up with half of one group and half another.
+
+          The only way we should be able to receive a different GTID than what
+          we expect is if the binlog on the master (or more likely the whole
+          master server) was replaced with a different one, on the same IP
+          address, _and_ the new master happens to have domains in a different
+          order so we get the GTID from a different domain first. Still, it is
+          best to protect against this case.
+        */
+        if (event_gtid.domain_id != mi->last_queued_gtid.domain_id ||
+            event_gtid.server_id != mi->last_queued_gtid.server_id ||
+            event_gtid.seq_no != mi->last_queued_gtid.seq_no)
+        {
+          bool first;
+          error= ER_SLAVE_UNEXPECTED_MASTER_SWITCH;
+          error_msg.append(STRING_WITH_LEN("Expected: "));
+          first= true;
+          rpl_slave_state_tostring_helper(&error_msg, &mi->last_queued_gtid,
+                                          &first);
+          error_msg.append(STRING_WITH_LEN(", received: "));
+          first= true;
+          rpl_slave_state_tostring_helper(&error_msg, &event_gtid, &first);
+          goto err;
+        }
+        if (global_system_variables.log_warnings > 1)
+        {
+          bool first= true;
+          StringBuffer<1024> gtid_text;
+          rpl_slave_state_tostring_helper(>id_text, &mi->last_queued_gtid,
+                                          &first);
+          sql_print_information("Slave IO thread is reconnected to "
+                                "receive Gtid_log_event %s. It is to skip %llu "
+                                "already received events including the gtid one",
+                                gtid_text.ptr(),
+                                mi->events_queued_since_last_gtid);
+        }
+        goto default_action;
+      }
+      else
+      {
+        bool first;
+        StringBuffer<1024> gtid_text;
+
+        gtid_text.append(STRING_WITH_LEN("Last received gtid: "));
+        first= true;
+        rpl_slave_state_tostring_helper(>id_text, &mi->last_queued_gtid,
+                                          &first);
+        gtid_text.append(STRING_WITH_LEN(", currently received: "));
+        first= true;
+        rpl_slave_state_tostring_helper(>id_text, &event_gtid, &first);
+
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+        sql_print_error("Slave IO thread has received a new Gtid_log_event "
+                        "while skipping already logged events "
+                        "after reconnect. %s. %llu remains to be skipped. "
+                        "The number of originally read events was %llu",
+                        gtid_text.ptr(),
+                        mi->gtid_reconnect_event_skip_count,
+                        mi->events_queued_since_last_gtid);
+        goto err;
+      }
+    }
+    mi->gtid_event_seen= true;
+
+    /*
+      Unless the previous group is malformed,
+      we have successfully queued to relay log everything before this GTID, so
+      in case of reconnect we can start from after any previous GTID.
+      (We must have updated gtid_current_pos earlier at the end of
+      the previous event group. Unless ...)
+    */
+    if (unlikely(mi->events_queued_since_last_gtid > 0))
+    {
+      /*
+        ...unless the last group has not been completed. An assert below
+        can be satisfied only with the strict mode that ensures
+        against "genuine" gtid duplicates.
+      */
+      IF_DBUG(rpl_gtid *gtid_in_slave_state=
+              mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id),);
+
+      // Slave gtid state must not have updated yet to the last received gtid.
+      DBUG_ASSERT((mi->using_gtid == Master_info::USE_GTID_NO ||
+                   !opt_gtid_strict_mode) ||
+                  (!gtid_in_slave_state ||
+                   !(*gtid_in_slave_state == mi->last_queued_gtid)));
+
+      DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
+      {
+        /* Inject an event group that is missing its XID commit event. */
+        if ((mi->last_queued_gtid.domain_id == 0 &&
+             mi->last_queued_gtid.seq_no == 1000) ||
+            (mi->last_queued_gtid.domain_id == 1 &&
+             mi->last_queued_gtid.seq_no == 32))
+        {
+          sql_print_warning(
+            "Unexpected break of being relay-logged GTID %u-%u-%llu "
+            "event group by the current GTID event %u-%u-%llu",
+            PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid));
+          DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000");
+          goto dbug_gtid_accept;
+        }
+      });
+      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      sql_print_error("Unexpected break of being relay-logged GTID %u-%u-%llu "
+                      "event group by the current GTID event %u-%u-%llu",
+                      PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid));
+      goto err;
+    }
+    else if (unlikely(mi->gtid_reconnect_event_skip_count > 0))
+    {
+      if (mi->gtid_reconnect_event_skip_count ==
+          mi->events_queued_since_last_gtid)
+      {
+        DBUG_ASSERT(event_gtid == mi->last_queued_gtid);
+
+        goto default_action;
+      }
+
+      DBUG_ASSERT(0);
+    }
+    // else_likely{...
+#ifndef DBUG_OFF
+dbug_gtid_accept:
+    DBUG_EXECUTE_IF("slave_discard_gtid_0_x_1002",
+    {
+      if (mi->last_queued_gtid.server_id == 27697 &&
+          mi->last_queued_gtid.seq_no == 1002)
+      {
+        DBUG_SET("-d,slave_discard_gtid_0_x_1002");
+        goto skip_relay_logging;
+      }
+    });
+#endif
+    mi->last_queued_gtid= event_gtid;
+    mi->last_queued_gtid_standalone=
+      (gtid_flag & Gtid_log_event::FL_STANDALONE) != 0;
+
+    /* Should filter all the subsequent events in the current GTID group? */
+    mi->domain_id_filter.do_filter(event_gtid.domain_id);
+
+    ++mi->events_queued_since_last_gtid;
+    inc_pos= event_len;
+
+    /*
+      To compute `true` is normal for this *now* semisync slave server when
+      it has passed its crash-recovery as a former master.
+    */
+    mi->do_accept_own_server_id=
+      (s_id == global_system_variables.server_id &&
+       rpl_semi_sync_slave_enabled && opt_gtid_strict_mode &&
+       mi->using_gtid != Master_info::USE_GTID_NO &&
+        !mysql_bin_log.check_strict_gtid_sequence(event_gtid.domain_id,
+                                                  event_gtid.server_id,
+                                                  event_gtid.seq_no,
+                                                  true));
+    // ...} eof else_likely
+  }
+  break;
+  /*
+    Binlog compressed event should uncompress in IO thread
+  */
+  case QUERY_COMPRESSED_EVENT:
+    inc_pos= event_len;
+    if (query_event_uncompress(rli->relay_log.description_event_for_queue,
+                               checksum_alg == BINLOG_CHECKSUM_ALG_CRC32,
+                               buf, event_len, new_buf_arr, sizeof(new_buf_arr),
+                               &is_malloc, &new_buf, &event_len))
+    {
+      char  llbuf[22];
+      error = ER_BINLOG_UNCOMPRESS_ERROR;
+      error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: "));
+      llstr(mi->master_log_pos, llbuf);
+      error_msg.append(llbuf, strlen(llbuf));
+      goto err;
+    }
+    buf = new_buf;
+    is_compress_event = true;
+    goto default_action;
+
+  case WRITE_ROWS_COMPRESSED_EVENT:
+  case UPDATE_ROWS_COMPRESSED_EVENT:
+  case DELETE_ROWS_COMPRESSED_EVENT:
+  case WRITE_ROWS_COMPRESSED_EVENT_V1:
+  case UPDATE_ROWS_COMPRESSED_EVENT_V1:
+  case DELETE_ROWS_COMPRESSED_EVENT_V1:
+    inc_pos = event_len;
+    {
+      if (row_log_event_uncompress(rli->relay_log.description_event_for_queue,
+                                   checksum_alg == BINLOG_CHECKSUM_ALG_CRC32,
+                                   buf, event_len, new_buf_arr,
+                                   sizeof(new_buf_arr),
+                                   &is_malloc, &new_buf, &event_len))
+      {
+        char  llbuf[22];
+        error = ER_BINLOG_UNCOMPRESS_ERROR;
+        error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: "));
+        llstr(mi->master_log_pos, llbuf);
+        error_msg.append(llbuf, strlen(llbuf));
+        goto err;
+      }
+    }
+    is_compress_event = true;
+    buf = new_buf;
+    /*
+      As we are uncertain about compressed V2 rows events, we don't track
+      them
+    */
+    if (LOG_EVENT_IS_ROW_V2((Log_event_type) buf[EVENT_TYPE_OFFSET]))
+      goto default_action;
+    /* fall through */
+  case WRITE_ROWS_EVENT_V1:
+  case UPDATE_ROWS_EVENT_V1:
+  case DELETE_ROWS_EVENT_V1:
+  case WRITE_ROWS_EVENT:
+  case UPDATE_ROWS_EVENT:
+  case DELETE_ROWS_EVENT:
+    {
+      is_rows_event= true;
+      mi->rows_event_tracker.update(mi->master_log_name,
+                                    mi->master_log_pos,
+                                    buf,
+                                    mi->rli.relay_log.
+                                    description_event_for_queue);
+
+      DBUG_EXECUTE_IF("simulate_stmt_end_rows_event_loss",
+                      {
+                        mi->rows_event_tracker.stmt_end_seen= false;
+                      });
+    }
+    goto default_action;
+
+#ifndef DBUG_OFF
+  case XID_EVENT:
+    DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
+    {
+      if (mi->last_queued_gtid.server_id == 27697 &&
+          mi->last_queued_gtid.seq_no == 1000)
+      {
+        DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000");
+        goto skip_relay_logging;
+      }
+      /* Inject an event group that is missing its XID commit event. */
+      if (mi->last_queued_gtid.domain_id == 0 &&
+          mi->last_queued_gtid.seq_no == 1000)
+        goto skip_relay_logging;
+    });
+    goto default_action;
+#endif
+  case START_ENCRYPTION_EVENT:
+    if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+    {
+      /*
+         If the event was not requested by the slave (the slave did not ask for
+         it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+      */
+      inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+      break;
+    }
+    /* fall through */
+  default:
+  default_action:
+    DBUG_EXECUTE_IF("kill_slave_io_after_2_events",
+                    {
+                      if (mi->dbug_do_disconnect &&
+                          (LOG_EVENT_IS_QUERY((Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET]) ||
+                           ((uchar)buf[EVENT_TYPE_OFFSET] == TABLE_MAP_EVENT))
+                          && (--mi->dbug_event_counter == 0))
+                      {
+                        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+                        mi->dbug_do_disconnect= false;  /* Safety */
+                        goto err;
+                      }
+                    };);
+
+    DBUG_EXECUTE_IF("kill_slave_io_before_commit",
+                    {
+                      if ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT ||
+                          ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT &&    /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */
+                           Query_log_event::peek_is_commit_rollback(buf,
+                                                                    event_len,
+                                                                    checksum_alg)))
+                      {
+                        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+                        goto err;
+                      }
+                    };);
+
+    if (mi->using_gtid != Master_info::USE_GTID_NO)
+    {
+      if (likely(mi->gtid_event_seen))
+      {
+        if (unlikely(mi->gtid_reconnect_event_skip_count))
+        {
+          if (!got_gtid_event &&
+              mi->gtid_reconnect_event_skip_count ==
+              mi->events_queued_since_last_gtid)
+            goto gtid_not_start; // the 1st re-sent must be gtid
+
+          --mi->gtid_reconnect_event_skip_count;
+          gtid_skip_enqueue= true;
+        }
+        else if (likely(mi->events_queued_since_last_gtid))
+        {
+          DBUG_ASSERT(!got_gtid_event);
+
+          ++mi->events_queued_since_last_gtid;
+        }
+        else if (Log_event::is_group_event((Log_event_type) (uchar)
+                                           buf[EVENT_TYPE_OFFSET]))
+        {
+          goto gtid_not_start; // no first gtid event in this group
+        }
+      }
+      else if (Log_event::is_group_event((Log_event_type) (uchar)
+                                           buf[EVENT_TYPE_OFFSET]))
+      {
+      gtid_not_start:
+
+        DBUG_ASSERT(!got_gtid_event);
+
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+        sql_print_error("The current group of events starts with "
+                        "a non-GTID %s event; "
+                        "the last seen GTID is %u-%u-%llu",
+                        Log_event::get_type_str((Log_event_type) (uchar)
+                                                buf[EVENT_TYPE_OFFSET]),
+                        mi->last_queued_gtid);
+        goto err;
+      }
+    }
+
+    if (!is_compress_event)
+      inc_pos= event_len;
+
+    break;
+  }
+
+  /*
+    Integrity of Rows- event group check.
+    A sequence of Rows- events must end with STMT_END_F flagged one.
+    Even when Heartbeat event interrupts Rows- events flow this must indicate a
+    malfunction e.g logging on the master.
+  */
+  if (((uchar) buf[EVENT_TYPE_OFFSET] != HEARTBEAT_LOG_EVENT) &&
+      !is_rows_event &&
+      mi->rows_event_tracker.check_and_report(mi->master_log_name,
+                                              mi->master_log_pos))
+  {
+    error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+    goto err;
+  }
+
+  /*
+    If we filter events master-side (eg. @@skip_replication), we will see holes
+    in the event positions from the master. If we see such a hole, adjust
+    mi->master_log_pos accordingly so we maintain the correct position (for
+    reconnect, MASTER_POS_WAIT(), etc.)
+  */
+  if (inc_pos > 0 &&
+      event_len >= LOG_POS_OFFSET+4 &&
+      (event_pos= uint4korr(buf+LOG_POS_OFFSET)) > mi->master_log_pos + inc_pos)
+  {
+    inc_pos= event_pos - mi->master_log_pos;
+    DBUG_PRINT("info", ("Adjust master_log_pos %llu->%llu to account for "
+                        "master-side filtering",
+                        mi->master_log_pos + inc_pos, event_pos));
+  }
+
+  /*
+     If this event is originating from this server, don't queue it.
+     We don't check this for 3.23 events because it's simpler like this; 3.23
+     will be filtered anyway by the SQL slave thread which also tests the
+     server id (we must also keep this test in the SQL thread, in case somebody
+     upgrades a 4.0 slave which has a not-filtered relay log).
+
+     ANY event coming from ourselves can be ignored: it is obvious for queries;
+     for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
+     (--log-slave-updates would not log that) unless this slave is also its
+     direct master (an unsupported, useless setup!).
+  */
+
+  mysql_mutex_lock(log_lock);
+  /*
+    Write the event to the relay log, unless we reconnected in the middle
+    of an event group and now need to skip the initial part of the group that
+    we already wrote before reconnecting.
+  */
+  if (unlikely(gtid_skip_enqueue))
+  {
+    mi->master_log_pos+= inc_pos;
+    if ((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT &&
+        s_id == mi->master_id)
+    {
+      /*
+        If we write this master's description event in the middle of an event
+        group due to GTID reconnect, SQL thread will think that master crashed
+        in the middle of the group and roll back the first half, so we must not.
+
+        But we still have to write an artificial copy of the masters description
+        event, to override the initial slave-version description event so that
+        SQL thread has the right information for parsing the events it reads.
+      */
+      rli->relay_log.description_event_for_queue->created= 0;
+      rli->relay_log.description_event_for_queue->set_artificial_event();
+      if (rli->relay_log.append_no_lock
+          (rli->relay_log.description_event_for_queue))
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      else
+        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+    }
+    else if (mi->gtid_reconnect_event_skip_count == 0)
+    {
+      /*
+        Add a fake rotate event so that SQL thread can see the old-style
+        position where we re-connected in the middle of a GTID event group.
+      */
+      Rotate_log_event fake_rev(mi->master_log_name, 0, mi->master_log_pos, 0);
+      fake_rev.server_id= mi->master_id;
+      if (rli->relay_log.append_no_lock(&fake_rev))
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+      else
+        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+    }
+  }
+  else
+  if ((s_id == global_system_variables.server_id &&
+       !(mi->rli.replicate_same_server_id ||
+         mi->do_accept_own_server_id)) ||
+      event_that_should_be_ignored(buf) ||
+      /*
+        the following conjunction deals with IGNORE_SERVER_IDS, if set
+        If the master is on the ignore list, execution of
+        format description log events and rotate events is necessary.
+      */
+      (mi->ignore_server_ids.elements > 0 &&
+       mi->shall_ignore_server_id(s_id) &&
+       /* everything is filtered out from non-master */
+       (s_id != mi->master_id ||
+        /* for the master meta information is necessary */
+        ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT &&
+         (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT))) ||
+
+      /*
+        Check whether it needs to be filtered based on domain_id
+        (DO_DOMAIN_IDS/IGNORE_DOMAIN_IDS).
+      */
+      (mi->domain_id_filter.is_group_filtered() &&
+       Log_event::is_group_event((Log_event_type)(uchar)
+                                 buf[EVENT_TYPE_OFFSET])))
+  {
+    /*
+      Do not write it to the relay log.
+      a) We still want to increment mi->master_log_pos, so that we won't
+      re-read this event from the master if the slave IO thread is now
+      stopped/restarted (more efficient if the events we are ignoring are big
+      LOAD DATA INFILE).
+      b) We want to record that we are skipping events, for the information of
+      the slave SQL thread, otherwise that thread may let
+      rli->group_relay_log_pos stay too small if the last binlog's event is
+      ignored.
+      But events which were generated by this slave and which do not exist in
+      the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
+      mi->master_log_pos.
+      If the event is originated remotely and is being filtered out by
+      IGNORE_SERVER_IDS it increments mi->master_log_pos
+      as well as rli->group_relay_log_pos.
+    */
+    if (!(s_id == global_system_variables.server_id &&
+          !mi->rli.replicate_same_server_id) ||
+        ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT &&
+         (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT &&
+         (uchar)buf[EVENT_TYPE_OFFSET] != STOP_EVENT))
+    {
+      mi->master_log_pos+= inc_pos;
+      memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN);
+      DBUG_ASSERT(rli->ign_master_log_name_end[0]);
+      rli->ign_master_log_pos_end= mi->master_log_pos;
+      if (got_gtid_event)
+        rli->ign_gtids.update(&event_gtid);
+    }
+    // the slave SQL thread needs to re-check
+    rli->relay_log.signal_relay_log_update();
+    DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored",
+                        (ulong) mi->master_log_pos, uint4korr(buf + SERVER_ID_OFFSET)));
+  }
+  else
+  {
+    if (mi->do_accept_own_server_id)
+    {
+      int2store(const_cast(buf + FLAGS_OFFSET),
+                uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F);
+      if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+      {
+        ha_checksum crc= 0;
+
+        crc= my_checksum(crc, (const uchar *) buf,
+                         event_len - BINLOG_CHECKSUM_LEN);
+        int4store(&buf[event_len - BINLOG_CHECKSUM_LEN], crc);
+      }
+    }
+    if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len)))
+    {
+      mi->master_log_pos+= inc_pos;
+      DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
+      rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+    }
+    else
+    {
+      error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+    }
+    rli->ign_master_log_name_end[0]= 0; // last event is not ignored
+    if (got_gtid_event)
+      rli->ign_gtids.remove_if_present(&event_gtid);
+    if (save_buf != NULL)
+      buf= save_buf;
+  }
+  mysql_mutex_unlock(log_lock);
+
+  if (likely(!error) &&
+      mi->using_gtid != Master_info::USE_GTID_NO &&
+      mi->events_queued_since_last_gtid > 0 &&
+      ( (mi->last_queued_gtid_standalone &&
+         (LOG_EVENT_IS_QUERY((Log_event_type)(uchar)
+                             buf[EVENT_TYPE_OFFSET]) ||
+          (uchar)buf[EVENT_TYPE_OFFSET] == INCIDENT_EVENT)) ||
+        (!mi->last_queued_gtid_standalone &&
+         ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT ||
+          (uchar)buf[EVENT_TYPE_OFFSET] == XA_PREPARE_LOG_EVENT ||
+          ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT &&    /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */
+           Query_log_event::peek_is_commit_rollback(buf, event_len,
+                                                    checksum_alg))))))
+    {
+      /*
+        The whole of the current event group is queued. So in case of
+        reconnect we can start from after the current GTID.
+      */
+      if (gtid_skip_enqueue)
+      {
+        bool first= true;
+        StringBuffer<1024> gtid_text;
+
+        DBUG_ASSERT(mi->events_queued_since_last_gtid > 1);
+
+        rpl_slave_state_tostring_helper(>id_text, &mi->last_queued_gtid,
+                                        &first);
+        sql_print_error("Slave IO thread received a terminal event from "
+                        "group %s whose retrieval was interrupted "
+                        "with reconnect. We still had %llu events to read. "
+                        "The number of originally read events was %llu",
+                        gtid_text.ptr(),
+                        mi->gtid_reconnect_event_skip_count,
+                        mi->events_queued_since_last_gtid);
+        error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+        goto err;
+      }
+      mi->gtid_current_pos.update(&mi->last_queued_gtid);
+      mi->events_queued_since_last_gtid= 0;
+
+      /* Reset the domain_id_filter flag. */
+      mi->domain_id_filter.reset_filter();
+    }
+
+skip_relay_logging:
+
+err:
+  if (unlock_data_lock)
+    mysql_mutex_unlock(&mi->data_lock);
+  DBUG_PRINT("info", ("error: %d", error));
+
+  /*
+    Do not print ER_SLAVE_RELAY_LOG_WRITE_FAILURE error here, as the caller
+    handle_slave_io() prints it on return.
+  */
+  if (unlikely(error) && error != ER_SLAVE_RELAY_LOG_WRITE_FAILURE)
+    mi->report(ERROR_LEVEL, error, NULL, ER_DEFAULT(error),
+               error_msg.ptr());
+
+  if (unlikely(is_malloc))
+    my_free((void *)new_buf);
+
+  DBUG_RETURN(error);
+}
+
+
+void end_relay_log_info(Relay_log_info* rli)
+{
+  mysql_mutex_t *log_lock;
+  DBUG_ENTER("end_relay_log_info");
+
+  rli->error_on_rli_init_info= false;
+  if (!rli->inited)
+    DBUG_VOID_RETURN;
+  if (rli->info_fd >= 0)
+  {
+    end_io_cache(&rli->info_file);
+    mysql_file_close(rli->info_fd, MYF(MY_WME));
+    rli->info_fd = -1;
+  }
+  if (rli->cur_log_fd >= 0)
+  {
+    end_io_cache(&rli->cache_buf);
+    mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
+    rli->cur_log_fd = -1;
+  }
+  rli->inited = 0;
+  log_lock= rli->relay_log.get_log_lock();
+  mysql_mutex_lock(log_lock);
+  rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  mysql_mutex_unlock(log_lock);
+  /*
+    Delete the slave's temporary tables from memory.
+    In the future there will be other actions than this, to ensure persistance
+    of slave's temp tables after shutdown.
+  */
+  rli->close_temporary_tables();
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Hook to detach the active VIO before closing a connection handle.
+
+  The client API might close the connection (and associated data)
+  in case it encounters a unrecoverable (network) error. This hook
+  is called from the client code before the VIO handle is deleted
+  allows the thread to detach the active vio so it does not point
+  to freed memory.
+
+  Other calls to THD::clear_active_vio throughout this module are
+  redundant due to the hook but are left in place for illustrative
+  purposes.
+*/
+
+extern "C" void slave_io_thread_detach_vio()
+{
+#ifdef SIGNAL_WITH_VIO_CLOSE
+  THD *thd= current_thd;
+  if (thd && thd->slave_thread)
+    thd->clear_active_vio();
+#endif
+}
+
+
+/*
+  Try to connect until successful or slave killed
+
+  SYNPOSIS
+    safe_connect()
+    thd                 Thread handler for slave
+    mysql               MySQL connection handle
+    mi                  Replication handle
+
+  RETURN
+    0   ok
+    #   Error
+*/
+
+static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
+{
+  DBUG_ENTER("safe_connect");
+
+  DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
+}
+
+
+/*
+  SYNPOSIS
+    connect_to_master()
+
+  IMPLEMENTATION
+    Try to connect until successful or slave killed or we have retried
+    master_retry_count times
+*/
+
+static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
+                             bool reconnect, bool suppress_warnings)
+{
+  int slave_was_killed;
+  int last_errno= -2;                           // impossible error
+  ulong err_count=0;
+  my_bool my_true= 1;
+  DBUG_ENTER("connect_to_master");
+  set_slave_max_allowed_packet(thd, mysql);
+#ifndef DBUG_OFF
+  mi->events_till_disconnect = disconnect_slave_event_count;
+#endif
+  ulong client_flag= CLIENT_REMEMBER_OPTIONS;
+  if (opt_slave_compressed_protocol)
+    client_flag|= CLIENT_COMPRESS;                /* We will use compression */
+
+  mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
+  mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
+  mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY,
+                (char*) &my_true);
+
+#ifdef HAVE_OPENSSL
+  if (mi->ssl)
+  {
+    mysql_ssl_set(mysql,
+                  mi->ssl_key[0]?mi->ssl_key:0,
+                  mi->ssl_cert[0]?mi->ssl_cert:0,
+                  mi->ssl_ca[0]?mi->ssl_ca:0,
+                  mi->ssl_capath[0]?mi->ssl_capath:0,
+                  mi->ssl_cipher[0]?mi->ssl_cipher:0);
+    mysql_options(mysql, MYSQL_OPT_SSL_CRL,
+                  mi->ssl_crl[0] ? mi->ssl_crl : 0);
+    mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
+                  mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
+    mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
+                  &mi->ssl_verify_server_cert);
+  }
+#endif
+
+  /*
+    If server's default charset is not supported (like utf16, utf32) as client
+    charset, then set client charset to 'latin1' (default client charset).
+  */
+  if (is_supported_parser_charset(default_charset_info))
+    mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->cs_name.str);
+  else
+  {
+    sql_print_information("'%s' can not be used as client character set. "
+                          "'%s' will be used as default client character set "
+                          "while connecting to master.",
+                          default_charset_info->cs_name.str,
+                          default_client_charset_info->cs_name.str);
+    mysql_options(mysql, MYSQL_SET_CHARSET_NAME,
+                  default_client_charset_info->cs_name.str);
+  }
+
+  /* This one is not strictly needed but we have it here for completeness */
+  mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
+
+  /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
+  if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
+    mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
+
+  /* we disallow empty users */
+  if (mi->user[0] == 0)
+  {
+    mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
+               ER_THD(thd, ER_SLAVE_FATAL_ERROR),
+               "Invalid (empty) username when attempting to "
+               "connect to the master server. Connection attempt "
+               "terminated.");
+    DBUG_RETURN(1);
+  }
+  while (!(slave_was_killed = io_slave_killed(mi)) &&
+         (reconnect ? mysql_reconnect(mysql) != 0 :
+          mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
+                             mi->port, 0, client_flag) == 0))
+  {
+    /* Don't repeat last error */
+    if ((int)mysql_errno(mysql) != last_errno)
+    {
+      last_errno=mysql_errno(mysql);
+      suppress_warnings= 0;
+      mi->report(ERROR_LEVEL, last_errno, NULL,
+                 "error %s to master '%s@%s:%d'"
+                 " - retry-time: %d  maximum-retries: %lu  message: %s",
+                 (reconnect ? "reconnecting" : "connecting"),
+                 mi->user, mi->host, mi->port,
+                 mi->connect_retry, master_retry_count,
+                 mysql_error(mysql));
+    }
+    /*
+      By default we try forever. The reason is that failure will trigger
+      master election, so if the user did not set master_retry_count we
+      do not want to have election triggered on the first failure to
+      connect
+    */
+    if (++err_count == master_retry_count)
+    {
+      slave_was_killed=1;
+      if (reconnect)
+        change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
+      break;
+    }
+    slave_sleep(thd,mi->connect_retry,io_slave_killed, mi);
+  }
+
+  if (!slave_was_killed)
+  {
+    mi->clear_error(); // clear possible left over reconnect error
+    if (reconnect)
+    {
+      if (!suppress_warnings && global_system_variables.log_warnings)
+        sql_print_information("Slave: connected to master '%s@%s:%d',"
+                              "replication resumed in log '%s' at "
+                              "position %llu", mi->user, mi->host, mi->port,
+                              IO_RPL_LOG_NAME, mi->master_log_pos);
+    }
+    else
+    {
+      change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE);
+      general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
+                        mi->user, mi->host, mi->port);
+    }
+#ifdef SIGNAL_WITH_VIO_CLOSE
+    thd->set_active_vio(mysql->net.vio);
+#endif
+  }
+  mysql->reconnect= 1;
+  DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
+  DBUG_RETURN(slave_was_killed);
+}
+
+
+/*
+  safe_reconnect()
+
+  IMPLEMENTATION
+    Try to connect until successful or slave killed or we have retried
+    master_retry_count times
+*/
+
+static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
+                          bool suppress_warnings)
+{
+  DBUG_ENTER("safe_reconnect");
+  DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
+}
+
+
+#ifdef NOT_USED
+MYSQL *rpl_connect_master(MYSQL *mysql)
+{
+  Master_info *mi= my_pthread_getspecific_ptr(Master_info*, RPL_MASTER_INFO);
+  bool allocated= false;
+  my_bool my_true= 1;
+  THD *thd;
+
+  if (!mi)
+  {
+    sql_print_error("'rpl_connect_master' must be called in slave I/O thread context.");
+    return NULL;
+  }
+  thd= mi->io_thd;
+  if (!mysql)
+  {
+    if(!(mysql= mysql_init(NULL)))
+    {
+      sql_print_error("rpl_connect_master: failed in mysql_init()");
+      return NULL;
+    }
+    allocated= true;
+  }
+
+  /*
+    XXX: copied from connect_to_master, this function should not
+    change the slave status, so we cannot use connect_to_master
+    directly
+    
+    TODO: make this part a seperate function to eliminate duplication
+  */
+  mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
+  mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
+  mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY,
+                (char*) &my_true);
+
+#ifdef HAVE_OPENSSL
+  if (mi->ssl)
+  {
+    mysql_ssl_set(mysql,
+                  mi->ssl_key[0]?mi->ssl_key:0,
+                  mi->ssl_cert[0]?mi->ssl_cert:0,
+                  mi->ssl_ca[0]?mi->ssl_ca:0,
+                  mi->ssl_capath[0]?mi->ssl_capath:0,
+                  mi->ssl_cipher[0]?mi->ssl_cipher:0);
+    mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
+                  &mi->ssl_verify_server_cert);
+  }
+#endif
+
+  mysql_options(mysql, MYSQL_SET_CHARSET_NAME,
+                default_charset_info->cs_name.str);
+  /* This one is not strictly needed but we have it here for completeness */
+  mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
+
+  if (mi->user == NULL
+      || mi->user[0] == 0
+      || io_slave_killed( mi)
+      || !mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
+                             mi->port, 0, 0))
+  {
+    if (!io_slave_killed( mi))
+      sql_print_error("rpl_connect_master: error connecting to master: %s (server_error: %d)",
+                      mysql_error(mysql), mysql_errno(mysql));
+    
+    if (allocated)
+      mysql_close(mysql);                       // this will free the object
+    return NULL;
+  }
+  return mysql;
+}
+#endif
+
+
+/*
+  Called when we notice that the current "hot" log got rotated under our feet.
+*/
+
+static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
+{
+  DBUG_ENTER("reopen_relay_log");
+  DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
+  DBUG_ASSERT(rli->cur_log_fd == -1);
+
+  IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
+  if ((rli->cur_log_fd=open_binlog(cur_log,rli->event_relay_log_name,
+                                   errmsg)) <0)
+    DBUG_RETURN(0);
+  /*
+    We want to start exactly where we was before:
+    relay_log_pos       Current log pos
+    pending             Number of bytes already processed from the event
+  */
+  rli->event_relay_log_pos= MY_MAX(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
+  my_b_seek(cur_log,rli->event_relay_log_pos);
+  DBUG_RETURN(cur_log);
+}
+
+
+/**
+  Reads next event from the relay log.  Should be called from the
+  slave IO thread.
+
+  @param rli Relay_log_info structure for the slave IO thread.
+
+  @return The event read, or NULL on error.  If an error occurs, the
+  error is reported through the sql_print_information() or
+  sql_print_error() functions.
+
+  The size of the read event (in bytes) is returned in *event_size.
+*/
+static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
+{
+  Log_event* ev;
+  Relay_log_info *rli= rgi->rli;
+  IO_CACHE* cur_log = rli->cur_log;
+  mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
+  const char* errmsg=0;
+  DBUG_ENTER("next_event");
+
+  DBUG_ASSERT(rgi->thd != 0 && rgi->thd == rli->sql_driver_thd);
+  *event_size= 0;
+
+#ifndef DBUG_OFF
+  if (abort_slave_event_count && !rli->events_till_abort--)
+    DBUG_RETURN(0);
+#endif
+
+  /*
+    For most operations we need to protect rli members with data_lock,
+    so we assume calling function acquired this mutex for us and we will
+    hold it for the most of the loop below However, we will release it
+    whenever it is worth the hassle,  and in the cases when we go into a
+    mysql_cond_wait() with the non-data_lock mutex
+  */
+  mysql_mutex_assert_owner(&rli->data_lock);
+
+  while (!sql_slave_killed(rgi))
+  {
+    /*
+      We can have two kinds of log reading:
+      hot_log:
+        rli->cur_log points at the IO_CACHE of relay_log, which
+        is actively being updated by the I/O thread. We need to be careful
+        in this case and make sure that we are not looking at a stale log that
+        has already been rotated. If it has been, we reopen the log.
+
+      The other case is much simpler:
+        We just have a read only log that nobody else will be updating.
+    */
+    ulonglong old_pos;
+    bool hot_log;
+    if ((hot_log = (cur_log != &rli->cache_buf)))
+    {
+      DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
+      mysql_mutex_lock(log_lock);
+
+      /*
+        Reading xxx_file_id is safe because the log will only
+        be rotated when we hold relay_log.LOCK_log
+      */
+      if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count)
+      {
+        // The master has switched to a new log file; Reopen the old log file
+        cur_log=reopen_relay_log(rli, &errmsg);
+        mysql_mutex_unlock(log_lock);
+        if (!cur_log)                           // No more log files
+          goto err;
+        hot_log=0;                              // Using old binary log
+      }
+    }
+    /* 
+      As there is no guarantee that the relay is open (for example, an I/O
+      error during a write by the slave I/O thread may have closed it), we
+      have to test it.
+    */
+    if (!my_b_inited(cur_log))
+      goto err;
+#ifndef DBUG_OFF
+    {
+      /* This is an assertion which sometimes fails, let's try to track it */
+      DBUG_PRINT("info", ("my_b_tell(cur_log)=%llu rli->event_relay_log_pos=%llu",
+                          my_b_tell(cur_log), rli->event_relay_log_pos));
+      DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
+      DBUG_ASSERT(rli->mi->using_parallel() ||
+                  my_b_tell(cur_log) == rli->event_relay_log_pos);
+    }
+#endif
+    /*
+      Relay log is always in new format - if the master is 3.23, the
+      I/O thread will convert the format for us.
+      A problem: the description event may be in a previous relay log. So if
+      the slave has been shutdown meanwhile, we would have to look in old relay
+      logs, which may even have been deleted. So we need to write this
+      description event at the beginning of the relay log.
+      When the relay log is created when the I/O thread starts, easy: the
+      master will send the description event and we will queue it.
+      But if the relay log is created by new_file(): then the solution is:
+      MYSQL_BIN_LOG::open() will write the buffered description event.
+    */
+    old_pos= rli->event_relay_log_pos;
+    if ((ev= Log_event::read_log_event(cur_log,
+                                       rli->relay_log.description_event_for_exec,
+                                       opt_slave_sql_verify_checksum)))
+
+    {
+      /*
+        read it while we have a lock, to avoid a mutex lock in
+        inc_event_relay_log_pos()
+      */
+      rli->future_event_relay_log_pos= my_b_tell(cur_log);
+      *event_size= rli->future_event_relay_log_pos - old_pos;
+
+      if (hot_log)
+        mysql_mutex_unlock(log_lock);
+      DBUG_RETURN(ev);
+    }
+    if (opt_reckless_slave)                     // For mysql-test
+      cur_log->error = 0;
+    if (unlikely(cur_log->error < 0))
+    {
+      errmsg = "slave SQL thread aborted because of I/O error";
+      if (hot_log)
+        mysql_mutex_unlock(log_lock);
+      goto err;
+    }
+    if (!cur_log->error) /* EOF */
+    {
+      /*
+        On a hot log, EOF means that there are no more updates to
+        process and we must block until I/O thread adds some and
+        signals us to continue
+      */
+      if (hot_log)
+      {
+        /*
+          We say in Seconds_Behind_Master that we have "caught up". Note that
+          for example if network link is broken but I/O slave thread hasn't
+          noticed it (slave_net_timeout not elapsed), then we'll say "caught
+          up" whereas we're not really caught up. Fixing that would require
+          internally cutting timeout in smaller pieces in network read, no
+          thanks. Another example: SQL has caught up on I/O, now I/O has read
+          a new event and is queuing it; the false "0" will exist until SQL
+          finishes executing the new event; it will be look abnormal only if
+          the events have old timestamps (then you get "many", 0, "many").
+
+          Transient phases like this can be fixed with implemeting
+          Heartbeat event which provides the slave the status of the
+          master at time the master does not have any new update to send.
+          Seconds_Behind_Master would be zero only when master has no
+          more updates in binlog for slave. The heartbeat can be sent
+          in a (small) fraction of slave_net_timeout. Until it's done
+          rli->sql_thread_caught_up is temporarely (for time of waiting for
+          the following event) set whenever EOF is reached.
+        */
+        rli->sql_thread_caught_up= true;
+
+        DBUG_ASSERT(rli->relay_log.get_open_count() ==
+                    rli->cur_log_old_open_count);
+
+        if (rli->ign_master_log_name_end[0])
+        {
+          /* We generate and return a Rotate, to make our positions advance */
+          DBUG_PRINT("info",("seeing an ignored end segment"));
+          ev= new Rotate_log_event(rli->ign_master_log_name_end,
+                                   0, rli->ign_master_log_pos_end,
+                                   Rotate_log_event::DUP_NAME);
+          rli->ign_master_log_name_end[0]= 0;
+          mysql_mutex_unlock(log_lock);
+          if (unlikely(!ev))
+          {
+            errmsg= "Slave SQL thread failed to create a Rotate event "
+              "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
+            goto err;
+          }
+          ev->server_id= 0; // don't be ignored by slave SQL thread
+          DBUG_RETURN(ev);
+        }
+
+        if (rli->ign_gtids.count() && !rli->is_in_group())
+        {
+          /*
+            We generate and return a Gtid_list, to update gtid_slave_pos,
+            unless being in the middle of a group.
+          */
+          DBUG_PRINT("info",("seeing ignored end gtids"));
+          ev= new Gtid_list_log_event(&rli->ign_gtids,
+                                      Gtid_list_log_event::FLAG_IGN_GTIDS);
+          rli->ign_gtids.reset();
+          mysql_mutex_unlock(log_lock);
+          if (unlikely(!ev))
+          {
+            errmsg= "Slave SQL thread failed to create a Gtid_list event "
+              "(out of memory?), gtid_slave_pos may be inaccurate";
+            goto err;
+          }
+          ev->server_id= 0; // don't be ignored by slave SQL thread
+          ev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos
+          DBUG_RETURN(ev);
+        }
+
+        /*
+          We have to check sql_slave_killed() here an extra time.
+          Otherwise we may miss a wakeup, since last check was done
+          without holding LOCK_log.
+        */
+        if (sql_slave_killed(rgi))
+        {
+          mysql_mutex_unlock(log_lock);
+          break;
+        }
+
+        /*
+          We can, and should release data_lock while we are waiting for
+          update. If we do not, show slave status will block
+        */
+        mysql_mutex_unlock(&rli->data_lock);
+
+        /*
+          Possible deadlock :
+          - the I/O thread has reached log_space_limit
+          - the SQL thread has read all relay logs, but cannot purge for some
+          reason:
+            * it has already purged all logs except the current one
+            * there are other logs than the current one but they're involved in
+            a transaction that finishes in the current one (or is not finished)
+          Solution :
+          Wake up the possibly waiting I/O thread, and set a boolean asking
+          the I/O thread to temporarily ignore the log_space_limit
+          constraint, because we do not want the I/O thread to block because of
+          space (it's ok if it blocks for any other reason (e.g. because the
+          master does not send anything). Then the I/O thread stops waiting
+          and reads one more event and starts honoring log_space_limit again.
+
+          If the SQL thread needs more events to be able to rotate the log (it
+          might need to finish the current group first), then it can ask for
+          one more at a time. Thus we don't outgrow the relay log indefinitely,
+          but rather in a controlled manner, until the next rotate.
+
+          When the SQL thread starts it sets ignore_log_space_limit to false. 
+          We should also reset ignore_log_space_limit to 0 when the user does 
+          RESET SLAVE, but in fact, no need as RESET SLAVE requires that the
+          slave be stopped, and the SQL thread sets ignore_log_space_limit
+          to 0 when
+          it stops.
+        */
+        mysql_mutex_lock(&rli->log_space_lock);
+
+        /* 
+          If we have reached the limit of the relay space and we
+          are going to sleep, waiting for more events:
+
+          1. If outside a group, SQL thread asks the IO thread 
+             to force a rotation so that the SQL thread purges 
+             logs next time it processes an event (thus space is
+             freed).
+
+          2. If in a group, SQL thread asks the IO thread to 
+             ignore the limit and queues yet one more event 
+             so that the SQL thread finishes the group and 
+             is are able to rotate and purge sometime soon.
+         */
+        if (rli->log_space_limit && 
+            rli->log_space_limit < rli->log_space_total)
+        {
+          /* force rotation if not in an unfinished group */
+          rli->sql_force_rotate_relay= !rli->is_in_group();
+
+          /* ask for one more event */
+          rli->ignore_log_space_limit= true;
+        }
+
+        mysql_cond_broadcast(&rli->log_space_cond);
+        mysql_mutex_unlock(&rli->log_space_lock);
+        // Note that wait_for_update_relay_log unlocks lock_log !
+        rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
+        // re-acquire data lock since we released it earlier
+        mysql_mutex_lock(&rli->data_lock);
+        continue;
+      }
+      /*
+        If the log was not hot, we need to move to the next log in
+        sequence. The next log could be hot or cold, we deal with both
+        cases separately after doing some common initialization
+      */
+      end_io_cache(cur_log);
+      DBUG_ASSERT(rli->cur_log_fd >= 0);
+      mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
+      rli->cur_log_fd = -1;
+      rli->last_inuse_relaylog->completed= true;
+      rli->relay_log.description_event_for_exec->reset_crypto();
+
+      if (relay_log_purge)
+      {
+        /*
+          purge_first_log will properly set up relay log coordinates in rli.
+          If the group's coordinates are equal to the event's coordinates
+          (i.e. the relay log was not rotated in the middle of a group),
+          we can purge this relay log too.
+          We do ulonglong and string comparisons, this may be slow but
+          - purging the last relay log is nice (it can save 1GB of disk), so we
+          like to detect the case where we can do it, and given this,
+          - I see no better detection method
+          - purge_first_log is not called that often
+        */
+        if (rli->relay_log.purge_first_log
+            (rli,
+             rli->group_relay_log_pos == rli->event_relay_log_pos
+             && !strcmp(rli->group_relay_log_name,rli->event_relay_log_name)))
+        {
+          errmsg = "Error purging processed logs";
+          goto err;
+        }
+      }
+      else
+      {
+        /*
+          If hot_log is set, then we already have a lock on
+          LOCK_log.  If not, we have to get the lock.
+
+          According to Sasha, the only time this code will ever be executed
+          is if we are recovering from a bug.
+        */
+        if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
+        {
+          errmsg = "error switching to the next log";
+          goto err;
+        }
+        rli->event_relay_log_pos = BIN_LOG_HEADER_SIZE;
+        strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
+        if (rli->flush())
+        {
+          errmsg= "error flushing relay log";
+          goto err;
+        }
+      }
+      /*
+        Now we want to open this next log. To know if it's a hot log (the one
+        being written by the I/O thread now) or a cold log, we can use
+        is_active(); if it is hot, we use the I/O cache; if it's cold we open
+        the file normally. But if is_active() reports that the log is hot, this
+        may change between the test and the consequence of the test. So we may
+        open the I/O cache whereas the log is now cold, which is nonsense.
+        To guard against this, we need to have LOCK_log.
+      */
+
+      DBUG_PRINT("info",("hot_log: %d",hot_log));
+      if (!hot_log) /* if hot_log, we already have this mutex */
+        mysql_mutex_lock(log_lock);
+      if (rli->relay_log.is_active(rli->linfo.log_file_name))
+      {
+        rli->cur_log= cur_log= rli->relay_log.get_log_file();
+        rli->cur_log_old_open_count= rli->relay_log.get_open_count();
+        DBUG_ASSERT(rli->cur_log_fd == -1);
+
+        /*
+           When the SQL thread is [stopped and] (re)started the
+           following may happen:
+
+           1. Log was hot at stop time and remains hot at restart
+
+              SQL thread reads again from hot_log (SQL thread was
+              reading from the active log when it was stopped and the
+              very same log is still active on SQL thread restart).
+
+              In this case, my_b_seek is performed on cur_log, while
+              cur_log points to relay_log.get_log_file();
+
+           2. Log was hot at stop time but got cold before restart
+
+              The log was hot when SQL thread stopped, but it is not
+              anymore when the SQL thread restarts.
+
+              In this case, the SQL thread reopens the log, using
+              cache_buf, ie, cur_log points to &cache_buf, and thence
+              its coordinates are reset.
+
+           3. Log was already cold at stop time
+
+              The log was not hot when the SQL thread stopped, and, of
+              course, it will not be hot when it restarts.
+
+              In this case, the SQL thread opens the cold log again,
+              using cache_buf, ie, cur_log points to &cache_buf, and
+              thence its coordinates are reset.
+
+           4. Log was hot at stop time, DBA changes to previous cold
+              log and restarts SQL thread
+
+              The log was hot when the SQL thread was stopped, but the
+              user changed the coordinates of the SQL thread to
+              restart from a previous cold log.
+
+              In this case, at start time, cur_log points to a cold
+              log, opened using &cache_buf as cache, and coordinates
+              are reset. However, as it moves on to the next logs, it
+              will eventually reach the hot log. If the hot log is the
+              same at the time the SQL thread was stopped, then
+              coordinates were not reset - the cur_log will point to
+              relay_log.get_log_file(), and not a freshly opened
+              IO_CACHE through cache_buf. For this reason we need to
+              deploy a my_b_seek before calling check_binlog_magic at
+              this point of the code (see: BUG#55263 for more
+              details).
+          
+          NOTES: 
+            - We must keep the LOCK_log to read the 4 first bytes, as
+              this is a hot log (same as when we call read_log_event()
+              above: for a hot log we take the mutex).
+
+            - Because of scenario #4 above, we need to have a
+              my_b_seek here. Otherwise, we might hit the assertion
+              inside check_binlog_magic.
+        */
+
+        my_b_seek(cur_log, (my_off_t) 0);
+        if (check_binlog_magic(cur_log,&errmsg))
+        {
+          if (!hot_log)
+            mysql_mutex_unlock(log_lock);
+          goto err;
+        }
+        if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name))
+        {
+          if (!hot_log)
+            mysql_mutex_unlock(log_lock);
+          goto err;
+        }
+        if (!hot_log)
+          mysql_mutex_unlock(log_lock);
+        continue;
+      }
+      if (!hot_log)
+        mysql_mutex_unlock(log_lock);
+      /*
+        if we get here, the log was not hot, so we will have to open it
+        ourselves. We are sure that the log is still not hot now (a log can get
+        from hot to cold, but not from cold to hot). No need for LOCK_log.
+      */
+      // open_binlog() will check the magic header
+      if ((rli->cur_log_fd=open_binlog(cur_log,rli->linfo.log_file_name,
+                                       &errmsg)) <0)
+        goto err;
+      if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name))
+        goto err;
+    }
+    else
+    {
+      /*
+        Read failed with a non-EOF error.
+        TODO: come up with something better to handle this error
+      */
+      if (hot_log)
+        mysql_mutex_unlock(log_lock);
+      sql_print_error("Slave SQL thread: I/O error reading \
+event(errno: %d  cur_log->error: %d)",
+                      my_errno,cur_log->error);
+      // set read position to the beginning of the event
+      my_b_seek(cur_log,rli->event_relay_log_pos);
+      /* otherwise, we have had a partial read */
+      errmsg = "Aborting slave SQL thread because of partial event read";
+      break;                                    // To end of function
+    }
+  }
+  if (!errmsg && global_system_variables.log_warnings)
+  {
+    sql_print_information("Error reading relay log event: %s",
+                          "slave SQL thread was killed");
+    goto end;
+  }
+
+err:
+  if (errmsg)
+    sql_print_error("Error reading relay log event: %s", errmsg);
+
+end:
+  /*
+    Set that we are not caught up so if there is a hang/problem on restart,
+    Seconds_Behind_Master will still grow.
+  */
+  rli->sql_thread_caught_up= false;
+  DBUG_RETURN(0);
+}
+#ifdef WITH_WSREP
+enum Log_event_type wsrep_peak_event(rpl_group_info *rgi, ulonglong* event_size)
+{
+  enum Log_event_type ev_type;
+
+  mysql_mutex_lock(&rgi->rli->data_lock);
+
+  unsigned long long event_pos= rgi->event_relay_log_pos;
+  unsigned long long orig_future_pos= rgi->future_event_relay_log_pos;
+  unsigned long long future_pos= rgi->future_event_relay_log_pos;
+
+  /* scan the log to read next event and we skip
+     annotate events. */
+  do {
+    my_b_seek(rgi->rli->cur_log, future_pos);
+    rgi->rli->event_relay_log_pos= future_pos;
+    rgi->event_relay_log_pos= future_pos;
+    Log_event* ev= next_event(rgi, event_size);
+    ev_type= (ev) ? ev->get_type_code() : UNKNOWN_EVENT;
+    delete ev;
+    future_pos+= *event_size;
+  } while (ev_type == ANNOTATE_ROWS_EVENT || ev_type == XID_EVENT);
+
+  /* scan the log back and re-set the positions to original values */
+  rgi->rli->event_relay_log_pos= event_pos;
+  rgi->event_relay_log_pos= event_pos;
+  my_b_seek(rgi->rli->cur_log, orig_future_pos);
+
+  mysql_mutex_unlock(&rgi->rli->data_lock);
+
+  return ev_type;
+}
+#endif /* WITH_WSREP */
+/*
+  Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
+  because of size is simpler because when we do it we already have all relevant
+  locks; here we don't, so this function is mainly taking locks).
+  Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
+  is void).
+*/
+
+int rotate_relay_log(Master_info* mi)
+{
+  DBUG_ENTER("rotate_relay_log");
+  Relay_log_info* rli= &mi->rli;
+  int error= 0;
+
+  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
+
+  /*
+     We need to test inited because otherwise, new_file() will attempt to lock
+     LOCK_log, which may not be inited (if we're not a slave).
+  */
+  if (!rli->inited)
+  {
+    DBUG_PRINT("info", ("rli->inited == 0"));
+    goto end;
+  }
+
+  /* If the relay log is closed, new_file() will do nothing. */
+  if ((error= rli->relay_log.new_file()))
+    goto end;
+
+  /*
+    We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
+    be counted, so imagine a succession of FLUSH LOGS  and assume the slave
+    threads are started:
+    relay_log_space decreases by the size of the deleted relay log, but does
+    not increase, so flush-after-flush we may become negative, which is wrong.
+    Even if this will be corrected as soon as a query is replicated on the
+    slave (because the I/O thread will then call harvest_bytes_written() which
+    will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
+    output in SHOW SLAVE STATUS meanwhile. So we harvest now.
+    If the log is closed, then this will just harvest the last writes, probably
+    0 as they probably have been harvested.
+
+    Note that it needs to be protected by mi->data_lock.
+  */
+  mysql_mutex_assert_owner(&mi->data_lock);
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+end:
+  DBUG_RETURN(error);
+}
+
+
+/**
+   Detects, based on master's version (as found in the relay log), if master
+   has a certain bug.
+   @param rli Relay_log_info which tells the master's version
+   @param bug_id Number of the bug as found in bugs.mysql.com
+   @param report bool report error message, default TRUE
+
+   @param pred Predicate function that will be called with @c param to
+   check for the bug. If the function return @c true, the bug is present,
+   otherwise, it is not.
+
+   @param param  State passed to @c pred function.
+
+   @return TRUE if master has the bug, FALSE if it does not.
+*/
+bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
+                        bool (*pred)(const void *), const void *param,
+                        bool maria_master)
+{
+  struct st_version_range_for_one_bug {
+    uint        bug_id;
+    Version introduced_in; // first version with bug
+    Version fixed_in;      // first version with fix
+  };
+  static struct st_version_range_for_one_bug versions_for_their_bugs[]=
+  {
+    {24432, { 5, 0, 24 }, { 5, 0, 38 } },
+    {24432, { 5, 1, 12 }, { 5, 1, 17 } },
+    {33029, { 5, 0,  0 }, { 5, 0, 58 } },
+    {33029, { 5, 1,  0 }, { 5, 1, 12 } },
+    {37426, { 5, 1,  0 }, { 5, 1, 26 } },
+  };
+  static struct st_version_range_for_one_bug versions_for_our_bugs[]=
+  {
+    {29621, { 10, 3, 36 }, { 10, 3, 39 } },
+    {29621, { 10, 4, 26 }, { 10, 4, 29 } },
+    {29621, { 10, 5, 17 }, { 10, 5, 20 } },
+    {29621, { 10, 6, 9  }, { 10, 6, 13 } },
+    {29621, { 10, 7, 5  }, { 10, 7, 9 } },
+    {29621, { 10, 8, 4  }, { 10, 8, 8  } },
+    {29621, { 10, 9, 2  }, { 10, 9, 6  } },
+    {29621, { 10, 10,1  }, { 10, 10,4  } },
+    {29621, { 10, 11,1  }, { 10, 11,3  } },
+  };
+  const Version &master_ver=
+    rli->relay_log.description_event_for_exec->server_version_split;
+  struct st_version_range_for_one_bug* versions_for_all_bugs= maria_master ?
+    versions_for_our_bugs : versions_for_their_bugs;
+  uint all_size= maria_master ?
+    sizeof(versions_for_our_bugs)/sizeof(*versions_for_our_bugs) :
+    sizeof(versions_for_their_bugs)/sizeof(*versions_for_their_bugs);
+
+  for (uint i= 0; i < all_size; i++)
+  {
+    const Version &introduced_in= versions_for_all_bugs[i].introduced_in;
+    const Version &fixed_in= versions_for_all_bugs[i].fixed_in;
+    if ((versions_for_all_bugs[i].bug_id == bug_id) &&
+        introduced_in <= master_ver &&
+        fixed_in > master_ver &&
+        (pred == NULL || (*pred)(param)))
+    {
+      const char *bug_source= maria_master ?
+        "https://jira.mariadb.org/browse/MDEV-" :
+        "http://bugs.mysql.com/bug.php?id=";
+      if (!report)
+	return TRUE;
+      // a short message for SHOW SLAVE STATUS (message length constraints)
+      my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
+                      " %s%u"
+                      " so slave stops; check error log on slave"
+                      " for more info", MYF(0), bug_source, bug_id);
+      // a verbose message for the error log
+      rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, NULL,
+                  "According to the master's version ('%s'),"
+                  " it is probable that master suffers from this bug:"
+                      " %s%u"
+                      " and thus replicating the current binary log event"
+                      " may make the slave's data become different from the"
+                      " master's data."
+                      " To take no risk, slave refuses to replicate"
+                      " this event and stops."
+                      " We recommend that all updates be stopped on the"
+                      " master and slave, that the data of both be"
+                      " manually synchronized,"
+                      " that master's binary logs be deleted,"
+                      " that master be upgraded to a version at least"
+                      " equal to '%d.%d.%d'. Then replication can be"
+                      " restarted.",
+                      rli->relay_log.description_event_for_exec->server_version,
+                      bug_source,
+                      bug_id,
+                      fixed_in[0], fixed_in[1], fixed_in[2]);
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+/**
+   BUG#33029, For all 5.0 up to 5.0.58 exclusive, and 5.1 up to 5.1.12
+   exclusive, if one statement in a SP generated AUTO_INCREMENT value
+   by the top statement, all statements after it would be considered
+   generated AUTO_INCREMENT value by the top statement, and a
+   erroneous INSERT_ID value might be associated with these statement,
+   which could cause duplicate entry error and stop the slave.
+
+   Detect buggy master to work around.
+ */
+bool rpl_master_erroneous_autoinc(THD *thd)
+{
+  if (thd->rgi_slave)
+  {
+    DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;);
+    return rpl_master_has_bug(thd->rgi_slave->rli, 33029, FALSE, NULL, NULL);
+  }
+  return FALSE;
+}
+
+
+static bool get_row_event_stmt_end(const uchar *buf,
+                                   const Format_description_log_event *fdle)
+{
+  uint8 const common_header_len= fdle->common_header_len;
+  Log_event_type event_type= (Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET];
+
+  uint8 const post_header_len= fdle->post_header_len[event_type-1];
+  const uchar *flag_start= buf + common_header_len;
+  /*
+    The term 4 below signifies that master is of 'an intermediate source', see
+    Rows_log_event::Rows_log_event.
+  */
+  flag_start += RW_MAPID_OFFSET + ((post_header_len == 6) ? 4 : RW_FLAGS_OFFSET);
+
+  return (uint2korr(flag_start) & Rows_log_event::STMT_END_F) != 0;
+}
+
+
+/*
+  Reset log event tracking data.
+*/
+
+void Rows_event_tracker::reset()
+{
+  binlog_file_name[0]= 0;
+  first_seen= last_seen= 0;
+  stmt_end_seen= false;
+}
+
+
+/*
+  Update log event tracking data.
+
+  The first- and last- seen event binlog position get memorized, as
+  well as the end-of-statement status of the last one.
+*/
+
+void Rows_event_tracker::update(const char *file_name, my_off_t pos,
+                                const uchar *buf,
+                                const Format_description_log_event *fdle)
+{
+  DBUG_ENTER("Rows_event_tracker::update");
+  if (!first_seen)
+  {
+    first_seen= pos;
+    strmake(binlog_file_name, file_name, sizeof(binlog_file_name) - 1);
+  }
+  last_seen= pos;
+  DBUG_ASSERT(stmt_end_seen == 0);              // We can only have one
+  stmt_end_seen= get_row_event_stmt_end(buf, fdle);
+  DBUG_VOID_RETURN;
+};
+
+
+/**
+  The function is called at next event reading
+  after a sequence of Rows- log-events. It checks the end-of-statement status
+  of the past sequence to report on any isssue.
+  In the positive case the tracker gets reset.
+
+  @return true  when the Rows- event group integrity found compromised,
+                false otherwise.
+*/
+bool Rows_event_tracker::check_and_report(const char* file_name,
+                                          my_off_t pos)
+{
+  if (last_seen)
+  {
+    // there was at least one "block" event previously
+    if (!stmt_end_seen)
+    {
+        sql_print_error("Slave IO thread did not receive an expected "
+                        "Rows-log end-of-statement for event starting "
+                        "at log '%s' position %llu "
+                        "whose last block was seen at log '%s' position %llu. "
+                        "The end-of-statement should have been delivered "
+                        "before the current one at log '%s' position %llu",
+                        binlog_file_name, first_seen,
+                        binlog_file_name, last_seen, file_name, pos);
+        return true;
+    }
+    reset();
+  }
+
+  return false;
+}
+
+/**
+  @} (end of group Replication)
+*/
+
+#endif /* HAVE_REPLICATION */
diff --git a/sql/slave.h b/sql/slave.h
new file mode 100644
index 00000000..02de9135
--- /dev/null
+++ b/sql/slave.h
@@ -0,0 +1,321 @@
+/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2016, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef SLAVE_H
+#define SLAVE_H
+
+/**
+  MASTER_DELAY can be at most (1 << 31) - 1.
+*/
+#define MASTER_DELAY_MAX (0x7FFFFFFF)
+#if INT_MAX < 0x7FFFFFFF
+#error "don't support platforms where INT_MAX < 0x7FFFFFFF"
+#endif
+
+/**
+  @defgroup Replication Replication
+  @{
+
+  @file
+*/
+
+/** 
+   Some of defines are need in parser even though replication is not 
+   compiled in (embedded).
+*/
+
+/**
+   The maximum is defined as (ULONG_MAX/1000) with 4 bytes ulong
+*/
+#define SLAVE_MAX_HEARTBEAT_PERIOD 4294967
+
+#ifdef HAVE_REPLICATION
+
+#include "log.h"
+#include "my_list.h"
+#include "rpl_filter.h"
+#include "rpl_tblmap.h"
+#include "rpl_gtid.h"
+#include "log_event.h"
+
+#define SLAVE_NET_TIMEOUT  60
+
+#define MAX_SLAVE_ERROR    ER_ERROR_LAST+1
+
+#define MAX_REPLICATION_THREAD 64
+
+// Forward declarations
+class Relay_log_info;
+class Master_info;
+class Master_info_index;
+struct rpl_group_info;
+struct rpl_parallel_thread;
+
+int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
+int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
+                          const char *default_val);
+int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val);
+int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f);
+
+/*****************************************************************************
+
+  MySQL Replication
+
+  Replication is implemented via two types of threads:
+
+    I/O Thread - One of these threads is started for each master server.
+                 They maintain a connection to their master server, read log
+                 events from the master as they arrive, and queues them into
+                 a single, shared relay log file.  A Master_info 
+                 represents each of these threads.
+
+    SQL Thread - One of these threads is started and reads from the relay log
+                 file, executing each event.  A Relay_log_info 
+                 represents this thread.
+
+  Buffering in the relay log file makes it unnecessary to reread events from
+  a master server across a slave restart.  It also decouples the slave from
+  the master where long-running updates and event logging are concerned--ie
+  it can continue to log new events while a slow query executes on the slave.
+
+*****************************************************************************/
+
+/*
+  MUTEXES in replication:
+
+  LOCK_active_mi: [note: this was originally meant for multimaster, to switch
+  from a master to another, to protect active_mi] It is used to SERIALIZE ALL
+  administrative commands of replication: START SLAVE, STOP SLAVE, CHANGE
+  MASTER, RESET SLAVE, end_slave() (when mysqld stops) [init_slave() does not
+  need it it's called early]. Any of these commands holds the mutex from the
+  start till the end. This thus protects us against a handful of deadlocks
+  (consider start_slave_thread() which, when starting the I/O thread, releases
+  mi->run_lock, keeps rli->run_lock, and tries to re-acquire mi->run_lock).
+
+  Currently active_mi never moves (it's created at startup and deleted at
+  shutdown, and not changed: it always points to the same Master_info struct),
+  because we don't have multimaster. So for the moment, mi does not move, and
+  mi->rli does not either.
+
+  In Master_info: run_lock, data_lock
+  run_lock protects all information about the run state: slave_running, thd
+  and the existence of the I/O thread (to stop/start it, you need this mutex).
+  data_lock protects some moving members of the struct: counters (log name,
+  position) and relay log (MYSQL_BIN_LOG object).
+
+  In Relay_log_info: run_lock, data_lock
+  see Master_info
+  However, note that run_lock does not protect
+  Relay_log_info.run_state; that is protected by data_lock.
+  
+  Order of acquisition: if you want to have LOCK_active_mi and a run_lock, you
+  must acquire LOCK_active_mi first.
+
+  In MYSQL_BIN_LOG: LOCK_log, LOCK_index of the binlog and the relay log
+  LOCK_log: when you write to it. LOCK_index: when you create/delete a binlog
+  (so that you have to update the .index file).
+*/
+
+extern ulong master_retry_count;
+extern MY_BITMAP slave_error_mask;
+extern char slave_skip_error_names[];
+extern bool use_slave_mask;
+extern char slave_transaction_retry_error_names[];
+extern uint *slave_transaction_retry_errors;
+extern uint slave_transaction_retry_error_length;
+extern char *slave_load_tmpdir;
+extern char *master_info_file;
+extern MYSQL_PLUGIN_IMPORT char *relay_log_info_file;
+extern char *opt_relay_logname, *opt_relaylog_index_name;
+extern my_bool opt_skip_slave_start, opt_reckless_slave;
+extern my_bool opt_log_slave_updates;
+extern char *opt_slave_skip_errors;
+extern char *opt_slave_transaction_retry_errors;
+extern my_bool opt_replicate_annotate_row_events;
+extern ulonglong relay_log_space_limit;
+extern ulonglong opt_read_binlog_speed_limit;
+extern ulonglong slave_skipped_errors;
+extern const char *relay_log_index;
+extern const char *relay_log_basename;
+
+/*
+  4 possible values for Master_info::slave_running and
+  Relay_log_info::slave_running.
+  The values 0,1,2,3 are very important: to keep the diff small, I didn't
+  substitute places where we use 0/1 with the newly defined symbols.
+  So don't change these values.
+  The same way, code is assuming that in Relay_log_info we use only values
+  0/1.
+  I started with using an enum, but
+  enum_variable=1; is not legal so would have required many line changes.
+*/
+#define MYSQL_SLAVE_NOT_RUN         0
+#define MYSQL_SLAVE_RUN_NOT_CONNECT 1
+#define MYSQL_SLAVE_RUN_CONNECT     2
+#define MYSQL_SLAVE_RUN_READING     3
+
+#define RPL_LOG_NAME (rli->group_master_log_name[0] ? rli->group_master_log_name :\
+ "FIRST")
+#define IO_RPL_LOG_NAME (mi->master_log_name[0] ? mi->master_log_name :\
+ "FIRST")
+
+/*
+  If the following is set, if first gives an error, second will be
+  tried. Otherwise, if first fails, we fail.
+*/
+#define SLAVE_FORCE_ALL 4
+
+/*
+  Values for the option --replicate-events-marked-for-skip.
+  Must match the names in replicate_events_marked_for_skip_names in sys_vars.cc
+*/
+#define RPL_SKIP_REPLICATE 0
+#define RPL_SKIP_FILTER_ON_SLAVE 1
+#define RPL_SKIP_FILTER_ON_MASTER 2
+
+
+int init_slave();
+int init_recovery(Master_info* mi, const char** errmsg);
+bool init_slave_skip_errors(const char* arg);
+bool init_slave_transaction_retry_errors(const char* arg);
+int register_slave_on_master(MYSQL* mysql);
+int terminate_slave_threads(Master_info* mi, int thread_mask,
+			     bool skip_lock = 0);
+int start_slave_threads(THD *thd,
+                        bool need_slave_mutex, bool wait_for_start,
+			Master_info* mi, const char* master_info_fname,
+			const char* slave_info_fname, int thread_mask);
+/*
+  cond_lock is usually same as start_lock. It is needed for the case when
+  start_lock is 0 which happens if start_slave_thread() is called already
+  inside the start_lock section, but at the same time we want a
+  mysql_cond_wait() on start_cond, start_lock
+*/
+int start_slave_thread(
+#ifdef HAVE_PSI_INTERFACE
+                       PSI_thread_key thread_key,
+#endif
+                       pthread_handler h_func,
+                       mysql_mutex_t *start_lock,
+                       mysql_mutex_t *cond_lock,
+                       mysql_cond_t *start_cond,
+                       volatile uint *slave_running,
+                       volatile ulong *slave_run_id,
+                       Master_info *mi);
+
+/* If fd is -1, dump to NET */
+int mysql_table_dump(THD* thd, const char* db,
+		     const char* tbl_name, int fd = -1);
+
+/* retrieve table from master and copy to slave*/
+int fetch_master_table(THD* thd, const char* db_name, const char* table_name,
+		       Master_info* mi, MYSQL* mysql, bool overwrite);
+
+void show_master_info_get_fields(THD *thd, List *field_list,
+                                     bool full, size_t gtid_pos_length);
+bool show_master_info(THD* thd, Master_info* mi, bool full);
+bool show_all_master_info(THD* thd);
+void show_binlog_info_get_fields(THD *thd, List *field_list);
+bool show_binlog_info(THD* thd);
+bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
+                        bool (*pred)(const void *), const void *param,
+                        bool maria_master= false);
+bool rpl_master_erroneous_autoinc(THD* thd);
+
+const char *print_slave_db_safe(const char *db);
+void skip_load_data_infile(NET* net);
+
+void slave_prepare_for_shutdown();
+void end_slave(); /* release slave threads */
+void close_active_mi(); /* clean up slave threads data */
+void clear_until_condition(Relay_log_info* rli);
+void clear_slave_error(Relay_log_info* rli);
+void end_relay_log_info(Relay_log_info* rli);
+void init_thread_mask(int* mask,Master_info* mi,bool inverse);
+Format_description_log_event *
+read_relay_log_description_event(IO_CACHE *cur_log, ulonglong start_pos,
+                                 const char **errmsg);
+
+int init_relay_log_pos(Relay_log_info* rli,const char* log,ulonglong pos,
+		       bool need_data_lock, const char** errmsg,
+                       bool look_for_description_event);
+
+int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
+		     const char** errmsg);
+void set_slave_thread_options(THD* thd);
+void set_slave_thread_default_charset(THD *thd, rpl_group_info *rgi);
+int rotate_relay_log(Master_info* mi);
+int has_temporary_error(THD *thd);
+int sql_delay_event(Log_event *ev, THD *thd, rpl_group_info *rgi);
+int apply_event_and_update_pos(Log_event* ev, THD* thd,
+                               struct rpl_group_info *rgi);
+int apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd,
+                                            struct rpl_group_info *rgi);
+
+int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
+int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val);
+int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
+                          const char *default_val);
+int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f);
+
+pthread_handler_t handle_slave_io(void *arg);
+void slave_output_error_info(rpl_group_info *rgi, THD *thd);
+pthread_handler_t handle_slave_sql(void *arg);
+bool net_request_file(NET* net, const char* fname);
+void slave_background_kill_request(THD *to_kill);
+void slave_background_gtid_pos_create_request
+        (rpl_slave_state::gtid_pos_table *table_entry);
+void slave_background_gtid_pending_delete_request(void);
+
+extern Master_info *active_mi; /* active_mi for multi-master */
+extern Master_info *default_master_info; /* To replace active_mi */
+extern Master_info_index *master_info_index;
+extern LEX_CSTRING default_master_connection_name;
+extern my_bool replicate_same_server_id;
+
+extern int disconnect_slave_event_count, abort_slave_event_count ;
+
+/* the master variables are defaults read from my.cnf or command line */
+extern uint report_port;
+extern char *master_info_file, *report_user;
+extern char *report_host, *report_password;
+
+extern I_List threads;
+
+/*
+  Check that a binlog event (read from the relay log) is valid to update
+  last_master_timestamp. That is, a valid event is one with a consistent
+  timestamp which originated from a primary server.
+*/
+static inline bool event_can_update_last_master_timestamp(Log_event *ev)
+{
+  return ev && !(ev->is_artificial_event() || ev->is_relay_log_event() ||
+                 (ev->when == 0));
+}
+
+#else
+#define close_active_mi() /* no-op */
+#endif /* HAVE_REPLICATION */
+
+/* masks for start/stop operations on io and sql slave threads */
+#define SLAVE_IO  1
+#define SLAVE_SQL 2
+
+/**
+  @} (end of group Replication)
+*/
+
+#endif
diff --git a/sql/sp.cc b/sql/sp.cc
new file mode 100644
index 00000000..f51851d1
--- /dev/null
+++ b/sql/sp.cc
@@ -0,0 +1,3097 @@
+/*
+   Copyright (c) 2002, 2018, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2022, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "unireg.h"
+#include "sp.h"
+#include "sql_base.h"                           // close_thread_tables
+#include "sql_lex.h"                            // empty_clex_str
+#include "sql_parse.h"                          // parse_sql
+#include "key.h"                                // key_copy
+#include "sql_show.h"             // append_definer, append_identifier
+#include "sql_db.h" // get_default_db_collation, mysql_opt_change_db,
+                    // mysql_change_db, check_db_dir_existence,
+                    // load_db_opt_by_name
+#include "sql_table.h"                          // write_bin_log
+#include "sp_head.h"
+#include "sp_cache.h"
+#include "transaction.h"
+#include "lock.h"                               // lock_object_name
+
+#include 
+#include "mysql/psi/mysql_sp.h"
+
+sp_cache **Sp_handler_procedure::get_cache(THD *thd) const
+{
+  return &thd->sp_proc_cache;
+}
+
+sp_cache **Sp_handler_function::get_cache(THD *thd) const
+{
+  return &thd->sp_func_cache;
+}
+
+sp_cache **Sp_handler_package_spec::get_cache(THD *thd) const
+{
+  return &thd->sp_package_spec_cache;
+}
+
+sp_cache **Sp_handler_package_body::get_cache(THD *thd) const
+{
+  return &thd->sp_package_body_cache;
+}
+
+
+ulong Sp_handler_procedure::recursion_depth(THD *thd) const
+{
+  return thd->variables.max_sp_recursion_depth;
+}
+
+
+bool Sp_handler::add_instr_freturn(THD *thd, sp_head *sp,
+                                   sp_pcontext *spcont,
+                                   Item *item, LEX *lex) const
+{
+  my_error(ER_SP_BADRETURN, MYF(0));
+  return true;
+}
+
+
+bool Sp_handler::add_instr_preturn(THD *thd, sp_head *sp,
+                                   sp_pcontext *spcont) const
+{
+  thd->parse_error();
+  return true;
+}
+
+
+bool Sp_handler_function::add_instr_freturn(THD *thd, sp_head *sp,
+                                            sp_pcontext *spcont,
+                                            Item *item, LEX *lex) const
+{
+  return sp->add_instr_freturn(thd, spcont, item, lex);
+}
+
+
+bool Sp_handler_procedure::add_instr_preturn(THD *thd, sp_head *sp,
+                                             sp_pcontext *spcont) const
+{
+  return sp->add_instr_preturn(thd, spcont);
+}
+
+
+Sp_handler_procedure sp_handler_procedure;
+Sp_handler_function sp_handler_function;
+Sp_handler_package_spec sp_handler_package_spec;
+Sp_handler_package_body sp_handler_package_body;
+Sp_handler_trigger sp_handler_trigger;
+Sp_handler_package_procedure sp_handler_package_procedure;
+Sp_handler_package_function sp_handler_package_function;
+
+
+const Sp_handler *Sp_handler_procedure::package_routine_handler() const
+{
+  return &sp_handler_package_procedure;
+}
+
+
+const Sp_handler *Sp_handler_function::package_routine_handler() const
+{
+  return &sp_handler_package_function;
+}
+
+
+static const
+TABLE_FIELD_TYPE proc_table_fields[MYSQL_PROC_FIELD_COUNT] =
+{
+  {
+    { STRING_WITH_LEN("db") },
+    { STRING_WITH_LEN("char(64)") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("name") },
+    { STRING_WITH_LEN("char(64)") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("type") },
+    { STRING_WITH_LEN("enum('FUNCTION','PROCEDURE')") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("specific_name") },
+    { STRING_WITH_LEN("char(64)") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("language") },
+    { STRING_WITH_LEN("enum('SQL')") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("sql_data_access") },
+    { STRING_WITH_LEN("enum('CONTAINS_SQL','NO_SQL','READS_SQL_DATA','MODIFIES_SQL_DATA')") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("is_deterministic") },
+    { STRING_WITH_LEN("enum('YES','NO')") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("security_type") },
+    { STRING_WITH_LEN("enum('INVOKER','DEFINER')") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("param_list") },
+    { STRING_WITH_LEN("blob") },
+    { NULL, 0 }
+  },
+
+  {
+    { STRING_WITH_LEN("returns") },
+    { STRING_WITH_LEN("longblob") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("body") },
+    { STRING_WITH_LEN("longblob") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("definer") },
+    { STRING_WITH_LEN("varchar(") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("created") },
+    { STRING_WITH_LEN("timestamp") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("modified") },
+    { STRING_WITH_LEN("timestamp") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("sql_mode") },
+    { STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES',"
+    "'IGNORE_SPACE','IGNORE_BAD_TABLE_OPTIONS','ONLY_FULL_GROUP_BY',"
+    "'NO_UNSIGNED_SUBTRACTION',"
+    "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB',"
+    "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40',"
+    "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES',"
+    "'STRICT_ALL_TABLES','NO_ZERO_IN_DATE','NO_ZERO_DATE','INVALID_DATES',"
+    "'ERROR_FOR_DIVISION_BY_ZERO','TRADITIONAL','NO_AUTO_CREATE_USER',"
+    "'HIGH_NOT_PRECEDENCE','NO_ENGINE_SUBSTITUTION','PAD_CHAR_TO_FULL_LENGTH',"
+    "'EMPTY_STRING_IS_NULL','SIMULTANEOUS_ASSIGNMENT',"
+    "'TIME_ROUND_FRACTIONAL')") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("comment") },
+    { STRING_WITH_LEN("text") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("character_set_client") },
+    { STRING_WITH_LEN("char(32)") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("collation_connection") },
+    { STRING_WITH_LEN("char(") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("db_collation") },
+    { STRING_WITH_LEN("char(") },
+    { STRING_WITH_LEN("utf8mb") }
+  },
+  {
+    { STRING_WITH_LEN("body_utf8") },
+    { STRING_WITH_LEN("longblob") },
+    { NULL, 0 }
+  },
+  {
+    { STRING_WITH_LEN("aggregate") },
+    { STRING_WITH_LEN("enum('NONE','GROUP')") },
+    { NULL, 0 }
+  }
+};
+
+static const TABLE_FIELD_DEF
+proc_table_def= {MYSQL_PROC_FIELD_COUNT, proc_table_fields, 0, (uint*) 0 };
+
+/*************************************************************************/
+
+/**
+  Stored_routine_creation_ctx -- creation context of stored routines
+  (stored procedures and functions).
+*/
+
+class Stored_routine_creation_ctx : public Stored_program_creation_ctx,
+                                    public Sql_alloc
+{
+public:
+  static Stored_routine_creation_ctx *
+  load_from_db(THD *thd, const Database_qualified_name *name, TABLE *proc_tbl);
+
+public:
+  virtual Stored_program_creation_ctx *clone(MEM_ROOT *mem_root)
+  {
+    return new (mem_root) Stored_routine_creation_ctx(m_client_cs,
+                                                      m_connection_cl,
+                                                      m_db_cl);
+  }
+
+protected:
+  virtual Object_creation_ctx *create_backup_ctx(THD *thd) const
+  {
+    DBUG_ENTER("Stored_routine_creation_ctx::create_backup_ctx");
+    DBUG_RETURN(new Stored_routine_creation_ctx(thd));
+  }
+
+private:
+  Stored_routine_creation_ctx(THD *thd)
+    : Stored_program_creation_ctx(thd)
+  { }
+
+  Stored_routine_creation_ctx(CHARSET_INFO *client_cs,
+                              CHARSET_INFO *connection_cl,
+                              CHARSET_INFO *db_cl)
+    : Stored_program_creation_ctx(client_cs, connection_cl, db_cl)
+  { }
+};
+
+/**************************************************************************
+  Stored_routine_creation_ctx implementation.
+**************************************************************************/
+
+bool load_charset(THD *thd,
+                  MEM_ROOT *mem_root,
+                  Field *field,
+                  CHARSET_INFO *dflt_cs,
+                  CHARSET_INFO **cs)
+{
+  LEX_CSTRING cs_name;
+  myf utf8_flag= thd->get_utf8_flag();
+
+  if (field->val_str_nopad(mem_root, &cs_name))
+  {
+    *cs= dflt_cs;
+    return TRUE;
+  }
+
+  DBUG_ASSERT(cs_name.str[cs_name.length] == 0);
+  *cs= get_charset_by_csname(cs_name.str, MY_CS_PRIMARY, MYF(utf8_flag));
+
+  if (*cs == NULL)
+  {
+    *cs= dflt_cs;
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+/*************************************************************************/
+
+bool load_collation(THD *thd, MEM_ROOT *mem_root,
+                    Field *field,
+                    CHARSET_INFO *dflt_cl,
+                    CHARSET_INFO **cl)
+{
+  LEX_CSTRING cl_name;
+
+  if (field->val_str_nopad(mem_root, &cl_name))
+  {
+    *cl= dflt_cl;
+    return TRUE;
+  }
+  myf utf8_flag= thd->get_utf8_flag();
+
+  DBUG_ASSERT(cl_name.str[cl_name.length] == 0);
+  *cl= get_charset_by_name(cl_name.str, MYF(utf8_flag));
+
+  if (*cl == NULL)
+  {
+    *cl= dflt_cl;
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+/*************************************************************************/
+
+Stored_routine_creation_ctx *
+Stored_routine_creation_ctx::load_from_db(THD *thd,
+                                          const Database_qualified_name *name,
+                                          TABLE *proc_tbl)
+{
+  /* Load character set/collation attributes. */
+
+  CHARSET_INFO *client_cs;
+  CHARSET_INFO *connection_cl;
+  CHARSET_INFO *db_cl;
+
+  const char *db_name= thd->strmake(name->m_db.str, name->m_db.length);
+  const char *sr_name= thd->strmake(name->m_name.str, name->m_name.length);
+
+  bool invalid_creation_ctx= FALSE;
+
+  if (load_charset(thd, thd->mem_root,
+                   proc_tbl->field[MYSQL_PROC_FIELD_CHARACTER_SET_CLIENT],
+                   thd->variables.character_set_client,
+                   &client_cs))
+  {
+    sql_print_warning("Stored routine '%s'.'%s': invalid value "
+                      "in column mysql.proc.character_set_client.",
+                      (const char *) db_name,
+                      (const char *) sr_name);
+
+    invalid_creation_ctx= TRUE;
+  }
+
+  if (load_collation(thd,thd->mem_root,
+                     proc_tbl->field[MYSQL_PROC_FIELD_COLLATION_CONNECTION],
+                     thd->variables.collation_connection,
+                     &connection_cl))
+  {
+    sql_print_warning("Stored routine '%s'.'%s': invalid value "
+                      "in column mysql.proc.collation_connection.",
+                      (const char *) db_name,
+                      (const char *) sr_name);
+
+    invalid_creation_ctx= TRUE;
+  }
+
+  if (load_collation(thd,thd->mem_root,
+                     proc_tbl->field[MYSQL_PROC_FIELD_DB_COLLATION],
+                     NULL,
+                     &db_cl))
+  {
+    sql_print_warning("Stored routine '%s'.'%s': invalid value "
+                      "in column mysql.proc.db_collation.",
+                      (const char *) db_name,
+                      (const char *) sr_name);
+
+    invalid_creation_ctx= TRUE;
+  }
+
+  if (invalid_creation_ctx)
+  {
+    push_warning_printf(thd,
+                        Sql_condition::WARN_LEVEL_WARN,
+                        ER_SR_INVALID_CREATION_CTX,
+                        ER_THD(thd, ER_SR_INVALID_CREATION_CTX),
+                        (const char *) db_name,
+                        (const char *) sr_name);
+  }
+
+  /*
+    If we failed to retrieve the database collation, load the default one
+    from the disk.
+  */
+
+  if (!db_cl)
+    db_cl= get_default_db_collation(thd, name->m_db.str);
+
+  /* Create the context. */
+
+  return new Stored_routine_creation_ctx(client_cs, connection_cl, db_cl);
+}
+
+/*************************************************************************/
+
+class Proc_table_intact : public Table_check_intact
+{
+private:
+  bool m_print_once;
+
+public:
+  Proc_table_intact() : m_print_once(TRUE) { has_keys= TRUE; }
+
+protected:
+  void report_error(uint code, const char *fmt, ...);
+};
+
+
+/**
+  Report failure to validate the mysql.proc table definition.
+  Print a message to the error log only once.
+*/
+
+void Proc_table_intact::report_error(uint code, const char *fmt, ...)
+{
+  va_list args;
+  char buf[512];
+
+  va_start(args, fmt);
+  my_vsnprintf(buf, sizeof(buf), fmt, args);
+  va_end(args);
+
+  if (code)
+    my_message(code, buf, MYF(0));
+  else
+    my_error(ER_CANNOT_LOAD_FROM_TABLE_V2, MYF(0), "mysql", "proc");
+
+  if (m_print_once)
+  {
+    m_print_once= FALSE;
+    sql_print_error("%s", buf);
+  }
+};
+
+
+/** Single instance used to control printing to the error log. */
+static Proc_table_intact proc_table_intact;
+
+
+/**
+  Open the mysql.proc table for read.
+
+  @param thd     Thread context
+  @param backup  Pointer to Open_tables_state instance where information about
+                 currently open tables will be saved, and from which will be
+                 restored when we will end work with mysql.proc.
+
+  NOTES
+    On must have a start_new_trans object active when calling this function
+
+  @retval
+    0	Error
+  @retval
+    \#	Pointer to TABLE object of mysql.proc
+*/
+
+TABLE *open_proc_table_for_read(THD *thd)
+{
+  TABLE_LIST table;
+  DBUG_ENTER("open_proc_table_for_read");
+
+  DBUG_ASSERT(thd->internal_transaction());
+
+  table.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_PROC_NAME, NULL, TL_READ);
+
+  if (open_system_tables_for_read(thd, &table))
+    DBUG_RETURN(NULL);
+
+  if (!proc_table_intact.check(table.table, &proc_table_def))
+    DBUG_RETURN(table.table);
+
+  thd->commit_whole_transaction_and_close_tables();
+
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Open the mysql.proc table for update.
+
+  @param thd  Thread context
+
+  @note
+    Table opened with this call should closed using close_thread_tables().
+
+    We don't need to use the start_new_transaction object when calling this
+    as there can't be any active transactions when we create or alter
+    stored procedures
+
+  @retval
+    0	Error
+  @retval
+    \#	Pointer to TABLE object of mysql.proc
+*/
+
+static TABLE *open_proc_table_for_update(THD *thd)
+{
+  TABLE_LIST table_list;
+  TABLE *table;
+  MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  DBUG_ENTER("open_proc_table_for_update");
+
+  DBUG_ASSERT(!thd->internal_transaction());
+
+  table_list.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_PROC_NAME, NULL,
+                            TL_WRITE);
+
+  if (!(table= open_system_table_for_update(thd, &table_list)))
+    DBUG_RETURN(NULL);
+
+  if (!proc_table_intact.check(table, &proc_table_def))
+    DBUG_RETURN(table);
+
+  thd->commit_whole_transaction_and_close_tables();
+  thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Find row in open mysql.proc table representing stored routine.
+
+  @param thd    Thread context
+  @param name   Name of routine
+  @param table  TABLE object for open mysql.proc table.
+
+  @retval
+    SP_OK             Routine found
+  @retval
+    SP_KEY_NOT_FOUND  No routine with given name
+*/
+
+int
+Sp_handler::db_find_routine_aux(THD *thd,
+                                const Database_qualified_name *name,
+                                TABLE *table) const
+{
+  uchar key[MAX_KEY_LENGTH];	// db, name, optional key length type
+  DBUG_ENTER("db_find_routine_aux");
+  DBUG_PRINT("enter", ("type: %s  name: %.*s",
+		       type_str(),
+		       (int) name->m_name.length, name->m_name.str));
+
+  /*
+    Create key to find row. We have to use field->store() to be able to
+    handle VARCHAR and CHAR fields.
+    Assumption here is that the three first fields in the table are
+    'db', 'name' and 'type' and the first key is the primary key over the
+    same fields.
+  */
+  if (name->m_name.length > table->field[1]->field_length)
+    DBUG_RETURN(SP_KEY_NOT_FOUND);
+  table->field[0]->store(name->m_db, &my_charset_bin);
+  table->field[1]->store(name->m_name, &my_charset_bin);
+  table->field[2]->store((longlong) type(), true);
+  key_copy(key, table->record[0], table->key_info,
+           table->key_info->key_length);
+
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
+    DBUG_RETURN(SP_KEY_NOT_FOUND);
+
+  DBUG_RETURN(SP_OK);
+}
+
+
+bool st_sp_chistics::read_from_mysql_proc_row(THD *thd, TABLE *table)
+{
+  LEX_CSTRING str;
+
+  if (table->field[MYSQL_PROC_FIELD_ACCESS]->val_str_nopad(thd->mem_root,
+                                                           &str))
+    return true;
+
+  switch (str.str[0]) {
+  case 'N':
+    daccess= SP_NO_SQL;
+    break;
+  case 'C':
+    daccess= SP_CONTAINS_SQL;
+    break;
+  case 'R':
+    daccess= SP_READS_SQL_DATA;
+    break;
+  case 'M':
+    daccess= SP_MODIFIES_SQL_DATA;
+    break;
+  default:
+    daccess= SP_DEFAULT_ACCESS_MAPPING;
+  }
+
+  if (table->field[MYSQL_PROC_FIELD_DETERMINISTIC]->val_str_nopad(thd->mem_root,
+                                                                  &str))
+    return true;
+  detistic= str.str[0] == 'N' ? false : true;
+
+  if (table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]->val_str_nopad(thd->mem_root,
+                                                                  &str))
+    return true;
+  suid= str.str[0] == 'I' ? SP_IS_NOT_SUID : SP_IS_SUID;
+
+  if (table->field[MYSQL_PROC_FIELD_AGGREGATE]->val_str_nopad(thd->mem_root,
+                                                              &str))
+    return true;
+
+  switch (str.str[0]) {
+  case 'N':
+    agg_type= NOT_AGGREGATE;
+    break;
+  case 'G':
+    agg_type= GROUP_AGGREGATE;
+    break;
+  default:
+    agg_type= DEFAULT_AGGREGATE;
+  }
+
+
+  if (table->field[MYSQL_PROC_FIELD_COMMENT]->val_str_nopad(thd->mem_root,
+                                                            &comment))
+    return true;
+
+  return false;
+}
+
+
+bool AUTHID::read_from_mysql_proc_row(THD *thd, TABLE *table)
+{
+  LEX_CSTRING str;
+  if (table->field[MYSQL_PROC_FIELD_DEFINER]->val_str_nopad(thd->mem_root,
+                                                            &str))
+    return true;
+  parse(str.str, str.length);
+  if (user.str[user.length])
+    ((char *) user.str)[user.length]= '\0'; // 0-terminate if was truncated
+  return false;
+}
+
+
+/**
+  Find routine definition in mysql.proc table and create corresponding
+  sp_head object for it.
+
+  @param thd   Thread context
+  @param name  Name of routine
+  @param sphp  Out parameter in which pointer to created sp_head
+               object is returned (0 in case of error).
+
+  @note
+    This function may damage current LEX during execution, so it is good
+    idea to create temporary LEX and make it active before calling it.
+
+  @retval
+    0       Success
+  @retval
+    non-0   Error (may be one of special codes like SP_KEY_NOT_FOUND)
+*/
+
+int
+Sp_handler::db_find_routine(THD *thd, const Database_qualified_name *name,
+                            sp_head **sphp) const
+{
+  TABLE *table;
+  LEX_CSTRING params, returns, body;
+  int ret;
+  longlong created;
+  longlong modified;
+  Sp_chistics chistics;
+  THD::used_t saved_time_zone_used= thd->used & THD::TIME_ZONE_USED;
+  bool trans_commited= 0;
+  sql_mode_t sql_mode;
+  Stored_program_creation_ctx *creation_ctx;
+  AUTHID definer;
+  DBUG_ENTER("db_find_routine");
+  DBUG_PRINT("enter", ("type: %s name: %.*s",
+		       type_str(),
+		       (int) name->m_name.length, name->m_name.str));
+
+  *sphp= 0;                                     // In case of errors
+
+  start_new_trans new_trans(thd);
+  Sql_mode_instant_set sms(thd, 0);
+
+  if (!(table= open_proc_table_for_read(thd)))
+  {
+    ret= SP_OPEN_TABLE_FAILED;
+    goto done;
+  }
+
+  if ((ret= db_find_routine_aux(thd, name, table)) != SP_OK)
+    goto done;
+
+  if (table->s->fields < MYSQL_PROC_FIELD_COUNT)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  if (chistics.read_from_mysql_proc_row(thd, table) ||
+      definer.read_from_mysql_proc_row(thd, table))
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  table->field[MYSQL_PROC_FIELD_PARAM_LIST]->val_str_nopad(thd->mem_root,
+                                                           ¶ms);
+  if (type() != SP_TYPE_FUNCTION)
+    returns= empty_clex_str;
+  else if (table->field[MYSQL_PROC_FIELD_RETURNS]->val_str_nopad(thd->mem_root,
+                                                                 &returns))
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  if (table->field[MYSQL_PROC_FIELD_BODY]->val_str_nopad(thd->mem_root,
+                                                         &body))
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  // Get additional information
+  modified= table->field[MYSQL_PROC_FIELD_MODIFIED]->val_int();
+  created= table->field[MYSQL_PROC_FIELD_CREATED]->val_int();
+  sql_mode= (sql_mode_t) table->field[MYSQL_PROC_FIELD_SQL_MODE]->val_int();
+
+  creation_ctx= Stored_routine_creation_ctx::load_from_db(thd, name, table);
+
+  trans_commited= 1;
+  thd->commit_whole_transaction_and_close_tables();
+  new_trans.restore_old_transaction();
+
+  ret= db_load_routine(thd, name, sphp, sql_mode, params, returns, body,
+                      chistics, definer, created, modified, NULL, creation_ctx);
+ done:
+  /* 
+    Restore the time zone flag as the timezone usage in proc table
+    does not affect replication.
+  */  
+  thd->used= (thd->used & ~THD::TIME_ZONE_USED) | saved_time_zone_used;
+  if (!trans_commited)
+  {
+    if (table)
+      thd->commit_whole_transaction_and_close_tables();
+    new_trans.restore_old_transaction();
+  }
+  DBUG_RETURN(ret);
+}
+
+
+int
+Sp_handler::db_find_and_cache_routine(THD *thd,
+                                      const Database_qualified_name *name,
+                                      sp_head **sp) const
+{
+  int rc= db_find_routine(thd, name, sp);
+  if (rc == SP_OK)
+  {
+    sp_cache_insert(get_cache(thd), *sp);
+    DBUG_PRINT("info", ("added new: %p, level: %lu, flags %x",
+                        sp[0], sp[0]->m_recursion_level,
+                        sp[0]->m_flags));
+  }
+  return rc;
+}
+
+
+/**
+  Silence DEPRECATED SYNTAX warnings when loading a stored procedure
+  into the cache.
+*/
+
+struct Silence_deprecated_warning : public Internal_error_handler
+{
+public:
+  virtual bool handle_condition(THD *thd,
+                                uint sql_errno,
+                                const char* sqlstate,
+                                Sql_condition::enum_warning_level *level,
+                                const char* msg,
+                                Sql_condition ** cond_hdl);
+};
+
+bool
+Silence_deprecated_warning::handle_condition(
+  THD *,
+  uint sql_errno,
+  const char*,
+  Sql_condition::enum_warning_level *level,
+  const char*,
+  Sql_condition ** cond_hdl)
+{
+  *cond_hdl= NULL;
+  if (sql_errno == ER_WARN_DEPRECATED_SYNTAX &&
+      *level == Sql_condition::WARN_LEVEL_WARN)
+    return TRUE;
+
+  return FALSE;
+}
+
+
+/**
+  @brief    The function parses input strings and returns SP stucture.
+
+  @param[in]      thd               Thread handler
+  @param[in]      defstr            CREATE... string
+  @param[in]      sql_mode          SQL mode
+  @param[in]      parent            The owner package for package routines,
+                                    or NULL for standalone routines.
+  @param[in]      creation_ctx      Creation context of stored routines
+                                    
+  @return     Pointer on sp_head struct
+    @retval   #                     Pointer on sp_head struct
+    @retval   0                     error
+*/
+
+static sp_head *sp_compile(THD *thd, String *defstr, sql_mode_t sql_mode,
+                           sp_package *parent,
+                           Stored_program_creation_ctx *creation_ctx)
+{
+  sp_head *sp;
+  sql_mode_t old_sql_mode= thd->variables.sql_mode;
+  ha_rows old_select_limit= thd->variables.select_limit;
+  sp_rcontext *old_spcont= thd->spcont;
+  Silence_deprecated_warning warning_handler;
+  Parser_state parser_state;
+
+  thd->variables.sql_mode= sql_mode;
+  thd->variables.select_limit= HA_POS_ERROR;
+
+  if (parser_state.init(thd, defstr->c_ptr_safe(), defstr->length()))
+  {
+    thd->variables.sql_mode= old_sql_mode;
+    thd->variables.select_limit= old_select_limit;
+    return NULL;
+  }
+
+  lex_start(thd);
+  thd->lex->sphead= parent;
+  thd->push_internal_handler(&warning_handler);
+  thd->spcont= 0;
+
+  if (parse_sql(thd, & parser_state, creation_ctx) || thd->lex == NULL)
+  {
+    sp= thd->lex->sphead;
+    sp_head::destroy(sp);
+    sp= 0;
+  }
+  else
+  {
+    sp= thd->lex->sphead;
+  }
+
+  thd->pop_internal_handler();
+  thd->spcont= old_spcont;
+  thd->variables.sql_mode= old_sql_mode;
+  thd->variables.select_limit= old_select_limit;
+  if (sp != NULL)
+    sp->init_psi_share();
+  return sp;
+}
+
+
+class Bad_db_error_handler : public Internal_error_handler
+{
+public:
+  Bad_db_error_handler()
+    :m_error_caught(false)
+  {}
+
+  virtual bool handle_condition(THD *thd,
+                                uint sql_errno,
+                                const char* sqlstate,
+                                Sql_condition::enum_warning_level *level,
+                                const char* message,
+                                Sql_condition ** cond_hdl);
+
+  bool error_caught() const { return m_error_caught; }
+
+private:
+  bool m_error_caught;
+};
+
+bool
+Bad_db_error_handler::handle_condition(THD *thd,
+                                       uint sql_errno,
+                                       const char* sqlstate,
+                                       Sql_condition::enum_warning_level
+                                       *level,
+                                       const char* message,
+                                       Sql_condition ** cond_hdl)
+{
+  if (sql_errno == ER_BAD_DB_ERROR)
+  {
+    m_error_caught= true;
+    return true;
+  }
+  return false;
+}
+
+
+int
+Sp_handler::db_load_routine(THD *thd, const Database_qualified_name *name,
+                            sp_head **sphp,
+                            sql_mode_t sql_mode,
+                            const LEX_CSTRING ¶ms,
+                            const LEX_CSTRING &returns,
+                            const LEX_CSTRING &body,
+                            const st_sp_chistics &chistics,
+                            const AUTHID &definer,
+                            longlong created, longlong modified,
+                            sp_package *parent,
+                            Stored_program_creation_ctx *creation_ctx) const
+{
+  LEX *old_lex= thd->lex, newlex;
+  String defstr;
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
+  LEX_STRING saved_cur_db_name=
+    { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
+  bool cur_db_changed;
+  Bad_db_error_handler db_not_exists_handler;
+
+  int ret= 0;
+
+  thd->lex= &newlex;
+  newlex.current_select= NULL;
+
+  defstr.set_charset(creation_ctx->get_client_cs());
+  defstr.set_thread_specific();
+
+  /*
+    We have to add DEFINER clause and provide proper routine characterstics in
+    routine definition statement that we build here to be able to use this
+    definition for SHOW CREATE PROCEDURE later.
+   */
+
+  if (show_create_sp(thd, &defstr,
+                     null_clex_str, name->m_name,
+                     params, returns, body,
+                     chistics, definer, DDL_options(), sql_mode))
+  {
+    ret= SP_INTERNAL_ERROR;
+    goto end;
+  }
+
+  thd->push_internal_handler(&db_not_exists_handler);
+  /*
+    Change the current database (if needed).
+
+    TODO: why do we force switch here?
+  */
+
+  if (mysql_opt_change_db(thd, &name->m_db, &saved_cur_db_name, TRUE,
+                          &cur_db_changed))
+  {
+    ret= SP_INTERNAL_ERROR;
+    thd->pop_internal_handler();
+    goto end;
+  }
+  thd->pop_internal_handler();
+  if (db_not_exists_handler.error_caught())
+  {
+    ret= SP_INTERNAL_ERROR;
+    my_error(ER_BAD_DB_ERROR, MYF(0), name->m_db.str);
+
+    goto end;
+  }
+
+  {
+    *sphp= sp_compile(thd, &defstr, sql_mode, parent, creation_ctx);
+    /*
+      Force switching back to the saved current database (if changed),
+      because it may be NULL. In this case, mysql_change_db() would
+      generate an error.
+    */
+
+    if (cur_db_changed && mysql_change_db(thd,
+                                          (LEX_CSTRING*) &saved_cur_db_name,
+                                          TRUE))
+    {
+      ret= SP_INTERNAL_ERROR;
+      goto end;
+    }
+
+    if (!*sphp)
+    {
+      ret= SP_PARSE_ERROR;
+      goto end;
+    }
+
+    (*sphp)->set_definer(&definer.user, &definer.host);
+    (*sphp)->set_info(created, modified, chistics, sql_mode);
+    (*sphp)->set_creation_ctx(creation_ctx);
+    (*sphp)->optimize();
+
+    if (type() == SP_TYPE_PACKAGE_BODY)
+    {
+      sp_package *package= (*sphp)->get_package();
+      List_iterator it(package->m_routine_implementations);
+      for (LEX *lex; (lex= it++); )
+      {
+        DBUG_ASSERT(lex->sphead);
+        lex->sphead->set_definer(&definer.user, &definer.host);
+        lex->sphead->set_suid(package->suid());
+        lex->sphead->m_sql_mode= sql_mode;
+        lex->sphead->set_creation_ctx(creation_ctx);
+        lex->sphead->optimize();
+      }
+    }
+
+    /*
+      Not strictly necessary to invoke this method here, since we know
+      that we've parsed CREATE PROCEDURE/FUNCTION and not an
+      UPDATE/DELETE/INSERT/REPLACE/LOAD/CREATE TABLE, but we try to
+      maintain the invariant that this method is called for each
+      distinct statement, in case its logic is extended with other
+      types of analyses in future.
+    */
+    newlex.set_trg_event_type_for_tables();
+  }
+
+end:
+  thd->lex->sphead= NULL;
+  lex_end(thd->lex);
+  thd->lex= old_lex;
+  return ret;
+}
+
+
+void
+sp_returns_type(THD *thd, String &result, const sp_head *sp)
+{
+  TABLE table;
+  TABLE_SHARE share;
+  Field *field;
+  bzero((char*) &table, sizeof(table));
+  bzero((char*) &share, sizeof(share));
+  table.in_use= thd;
+  table.s = &share;
+  field= sp->create_result_field(0, 0, &table);
+  field->sql_type(result);
+
+  if (field->has_charset())
+  {
+    result.append(STRING_WITH_LEN(" CHARSET "));
+    result.append(field->charset()->cs_name);
+    if (Charset(field->charset()).can_have_collate_clause())
+    {
+      result.append(STRING_WITH_LEN(" COLLATE "));
+      result.append(field->charset()->coll_name);
+    }
+  }
+
+  delete field;
+}
+
+
+/**
+  Delete the record for the stored routine object from mysql.proc,
+  which is already opened, locked, and positioned to the record with the
+  record to be deleted.
+
+  The operation deletes the record for the current record in "table"
+  and invalidates the stored-routine cache.
+
+  @param thd    Thread context.
+  @param name   Stored routine name.
+  @param table  A pointer to the opened mysql.proc table
+
+  @returns      Error code.
+  @return       SP_OK on success, or SP_DELETE_ROW_FAILED on error.
+  used to indicate about errors.
+*/
+
+int
+Sp_handler::sp_drop_routine_internal(THD *thd,
+                                     const Database_qualified_name *name,
+                                     TABLE *table) const
+{
+  DBUG_ENTER("sp_drop_routine_internal");
+
+  if (table->file->ha_delete_row(table->record[0]))
+    DBUG_RETURN(SP_DELETE_ROW_FAILED);
+
+  /* Make change permanent and avoid 'table is marked as crashed' errors */
+  table->file->extra(HA_EXTRA_FLUSH);
+
+  sp_cache_invalidate();
+  /*
+    A lame workaround for lack of cache flush:
+    make sure the routine is at least gone from the
+    local cache.
+  */
+  sp_head *sp;
+  sp_cache **spc= get_cache(thd);
+  DBUG_ASSERT(spc);
+  if ((sp= sp_cache_lookup(spc, name)))
+    sp_cache_flush_obsolete(spc, &sp);
+  /* Drop statistics for this stored program from performance schema. */
+  MYSQL_DROP_SP(type(), name->m_db.str, static_cast(name->m_db.length),
+                        name->m_name.str, static_cast(name->m_name.length));
+  DBUG_RETURN(SP_OK);
+}
+
+
+int
+Sp_handler::sp_find_and_drop_routine(THD *thd, TABLE *table,
+                                     const Database_qualified_name *name) const
+{
+  int ret;
+  if ((ret= db_find_routine_aux(thd, name, table)) != SP_OK)
+    return ret;
+  return sp_drop_routine_internal(thd, name, table);
+}
+
+
+int
+Sp_handler_package_spec::
+  sp_find_and_drop_routine(THD *thd, TABLE *table,
+                           const Database_qualified_name *name) const
+{
+  int ret;
+  if ((ret= db_find_routine_aux(thd, name, table)) != SP_OK)
+    return ret;
+  /*
+    When we do "DROP PACKAGE pkg", we should also perform
+    "DROP PACKAGE BODY pkg" automatically.
+  */
+  ret= sp_handler_package_body.sp_find_and_drop_routine(thd, table, name);
+  if (ret != SP_KEY_NOT_FOUND && ret != SP_OK)
+  {
+    /*
+      - SP_KEY_NOT_FOUND means that "CREATE PACKAGE pkg" did not
+        have a correspoinding "CREATE PACKAGE BODY pkg" yet.
+      - SP_OK means that "CREATE PACKAGE pkg" had a correspoinding
+        "CREATE PACKAGE BODY pkg", which was successfully dropped.
+    */
+    return ret; // Other codes mean an unexpecte error
+  }
+  return Sp_handler::sp_find_and_drop_routine(thd, table, name);
+}
+
+
+/**
+  Write stored-routine object into mysql.proc.
+
+  This operation stores attributes of the stored procedure/function into
+  the mysql.proc.
+
+  @param thd  Thread context.
+  @param sp   Stored routine object to store.
+
+  @note Opens and closes the thread tables. Therefore assumes
+  that there are no locked tables in this thread at the time of
+  invocation.
+  Unlike some other DDL statements, *does* close the tables
+  in the end, since the call to this function is normally
+  followed by an implicit grant (sp_grant_privileges())
+  and this subsequent call opens and closes mysql.procs_priv.
+
+  @return Error status.
+    @retval FALSE on success
+    @retval TRUE on error
+*/
+
+bool
+Sp_handler::sp_create_routine(THD *thd, const sp_head *sp) const
+{
+  LEX *lex= thd->lex;
+  bool ret= TRUE;
+  TABLE *table;
+  char definer_buf[USER_HOST_BUFF_SIZE];
+  LEX_CSTRING definer;
+  sql_mode_t org_sql_mode= thd->variables.sql_mode;
+  enum_check_fields org_count_cuted_fields= thd->count_cuted_fields;
+  CHARSET_INFO *db_cs= get_default_db_collation(thd, sp->m_db.str);
+  bool store_failed= FALSE;
+  DBUG_ENTER("sp_create_routine");
+  DBUG_PRINT("enter", ("type: %s  name: %.*s",
+                       type_str(),
+                       (int) sp->m_name.length,
+                       sp->m_name.str));
+  MDL_key::enum_mdl_namespace mdl_type= get_mdl_type();
+  LEX_CSTRING returns= empty_clex_str;
+  String retstr(64);
+  retstr.set_charset(system_charset_info);
+
+  /* Grab an exclusive MDL lock. */
+  if (lock_object_name(thd, mdl_type, sp->m_db.str, sp->m_name.str))
+  {
+    my_error(ER_BAD_DB_ERROR, MYF(0), sp->m_db.str);
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Check that a database directory with this name
+    exists. Design note: This won't work on virtual databases
+    like information_schema.
+  */
+  if (check_db_dir_existence(sp->m_db.str))
+  {
+    my_error(ER_BAD_DB_ERROR, MYF(0), sp->m_db.str);
+    DBUG_RETURN(TRUE);
+  }
+
+
+  /* Reset sql_mode during data dictionary operations. */
+  thd->variables.sql_mode= 0;
+  thd->count_cuted_fields= CHECK_FIELD_WARN;
+
+  if (!(table= open_proc_table_for_update(thd)))
+  {
+    my_error(ER_SP_STORE_FAILED, MYF(0), type_str(), sp->m_name.str);
+    goto done;
+  }
+  else
+  {
+    /* Checking if the routine already exists */
+    if (db_find_routine_aux(thd, sp, table) == SP_OK)
+    {
+      if (lex->create_info.or_replace())
+      {
+        switch (type()) {
+        case SP_TYPE_PACKAGE:
+          // Drop together with its PACKAGE BODY mysql.proc record
+          if (sp_handler_package_spec.sp_find_and_drop_routine(thd, table, sp))
+            goto done;
+          break;
+        case SP_TYPE_PACKAGE_BODY:
+        case SP_TYPE_FUNCTION:
+        case SP_TYPE_PROCEDURE:
+          if (sp_drop_routine_internal(thd, sp, table))
+            goto done;
+          break;
+        case SP_TYPE_TRIGGER:
+        case SP_TYPE_EVENT:
+          DBUG_ASSERT(0);
+          ret= SP_OK;
+        }
+      }
+      else if (lex->create_info.if_not_exists())
+      {
+        push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                            ER_SP_ALREADY_EXISTS,
+                            ER_THD(thd, ER_SP_ALREADY_EXISTS),
+                            type_str(), sp->m_name.str);
+
+        ret= FALSE;
+
+        // Setting retstr as it is used for logging.
+        if (type() == SP_TYPE_FUNCTION)
+        {
+          sp_returns_type(thd, retstr, sp);
+          retstr.get_value(&returns);
+        }
+        goto log;
+      }
+      else
+      {
+        my_error(ER_SP_ALREADY_EXISTS, MYF(0), type_str(), sp->m_name.str);
+        goto done;
+      }
+    }
+
+    restore_record(table, s->default_values); // Get default values for fields
+
+    /* NOTE: all needed privilege checks have been already done. */
+    thd->lex->definer->set_lex_string(&definer, definer_buf);
+
+    if (table->s->fields < MYSQL_PROC_FIELD_COUNT)
+    {
+      my_error(ER_SP_STORE_FAILED, MYF(0), type_str(), sp->m_name.str);
+      goto done;
+    }
+
+    if (system_charset_info->numchars(sp->m_name.str,
+                                      sp->m_name.str + sp->m_name.length) >
+        table->field[MYSQL_PROC_FIELD_NAME]->char_length())
+    {
+      my_error(ER_TOO_LONG_IDENT, MYF(0), sp->m_name.str);
+      goto done;
+    }
+    if (sp->m_body.length > table->field[MYSQL_PROC_FIELD_BODY]->field_length)
+    {
+      my_error(ER_TOO_LONG_BODY, MYF(0), sp->m_name.str);
+      goto done;
+    }
+
+    store_failed=
+      table->field[MYSQL_PROC_FIELD_DB]->
+        store(sp->m_db, system_charset_info);
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_NAME]->
+        store(sp->m_name, system_charset_info);
+
+    if (sp->agg_type() != DEFAULT_AGGREGATE)
+    {
+      store_failed= store_failed ||
+        table->field[MYSQL_PROC_FIELD_AGGREGATE]->
+          store((longlong)sp->agg_type(),TRUE);
+    }
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_MYSQL_TYPE]->
+        store((longlong) type(), true);
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_SPECIFIC_NAME]->
+        store(sp->m_name, system_charset_info);
+
+    if (sp->daccess() != SP_DEFAULT_ACCESS)
+    {
+      store_failed= store_failed ||
+        table->field[MYSQL_PROC_FIELD_ACCESS]->
+          store((longlong)sp->daccess(), TRUE);
+    }
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_DETERMINISTIC]->
+        store((longlong)(sp->detistic() ? 1 : 2), TRUE);
+
+    if (sp->suid() != SP_IS_DEFAULT_SUID)
+    {
+      store_failed= store_failed ||
+        table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]->
+          store((longlong)sp->suid(), TRUE);
+    }
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_PARAM_LIST]->
+        store(sp->m_params, system_charset_info);
+
+    if (type() == SP_TYPE_FUNCTION)
+    {
+      sp_returns_type(thd, retstr, sp);
+      retstr.get_value(&returns);
+
+      store_failed= store_failed ||
+        table->field[MYSQL_PROC_FIELD_RETURNS]->
+          store(retstr.ptr(), retstr.length(), system_charset_info);
+    }
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_BODY]->
+        store(sp->m_body, system_charset_info);
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_DEFINER]->
+        store(definer, system_charset_info);
+
+    table->field[MYSQL_PROC_FIELD_CREATED]->set_time();
+    table->field[MYSQL_PROC_FIELD_MODIFIED]->set_time();
+
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_SQL_MODE]->
+        store((longlong) org_sql_mode, TRUE);
+
+    if (sp->comment().str)
+    {
+      store_failed= store_failed ||
+        table->field[MYSQL_PROC_FIELD_COMMENT]->
+          store(sp->comment(), system_charset_info);
+    }
+
+    if (type() == SP_TYPE_FUNCTION &&
+        !trust_function_creators && mysql_bin_log.is_open())
+    {
+      if (!sp->detistic())
+      {
+	/*
+	  Note that this test is not perfect; one could use
+	  a non-deterministic read-only function in an update statement.
+	*/
+	enum enum_sp_data_access access=
+	  (sp->daccess() == SP_DEFAULT_ACCESS) ?
+	  SP_DEFAULT_ACCESS_MAPPING : sp->daccess();
+	if (access == SP_CONTAINS_SQL ||
+	    access == SP_MODIFIES_SQL_DATA)
+	{
+          my_error(ER_BINLOG_UNSAFE_ROUTINE, MYF(0));
+	  goto done;
+	}
+      }
+      if (!(thd->security_ctx->master_access & PRIV_LOG_BIN_TRUSTED_SP_CREATOR))
+      {
+        my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER,MYF(0));
+	goto done;
+      }
+    }
+
+    table->field[MYSQL_PROC_FIELD_CHARACTER_SET_CLIENT]->set_notnull();
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_CHARACTER_SET_CLIENT]->
+      store(&thd->charset()->cs_name, system_charset_info);
+
+    table->field[MYSQL_PROC_FIELD_COLLATION_CONNECTION]->set_notnull();
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_COLLATION_CONNECTION]->
+      store(&thd->variables.collation_connection->coll_name,
+            system_charset_info);
+
+    table->field[MYSQL_PROC_FIELD_DB_COLLATION]->set_notnull();
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_DB_COLLATION]->
+      store(&db_cs->coll_name, system_charset_info);
+
+    table->field[MYSQL_PROC_FIELD_BODY_UTF8]->set_notnull();
+    store_failed= store_failed ||
+      table->field[MYSQL_PROC_FIELD_BODY_UTF8]->store(
+        sp->m_body_utf8, system_charset_info);
+
+    if (store_failed)
+    {
+      my_error(ER_CANT_CREATE_SROUTINE, MYF(0), sp->m_name.str);
+      goto done;
+    }
+
+    if (table->file->ha_write_row(table->record[0]))
+    {
+      my_error(ER_SP_ALREADY_EXISTS, MYF(0), type_str(), sp->m_name.str);
+      goto done;
+    }
+    /* Make change permanent and avoid 'table is marked as crashed' errors */
+    table->file->extra(HA_EXTRA_FLUSH);
+
+    sp_cache_invalidate();
+  }
+
+log:
+  if (mysql_bin_log.is_open())
+  {
+    thd->clear_error();
+
+    StringBuffer<128> log_query(thd->variables.character_set_client);
+    DBUG_ASSERT(log_query.charset()->mbminlen == 1);
+
+    if (show_create_sp(thd, &log_query,
+                       sp->m_explicit_name ? sp->m_db : null_clex_str,
+                       sp->m_name,
+                       sp->m_params, returns, sp->m_body,
+                       sp->chistics(),
+                       thd->lex->definer[0],
+                       thd->lex->create_info,
+                       org_sql_mode))
+    {
+      my_error(ER_OUT_OF_RESOURCES, MYF(0));
+      goto done;
+    }
+    /* restore sql_mode when binloging */
+    thd->variables.sql_mode= org_sql_mode;
+    /* Such a statement can always go directly to binlog, no trans cache */
+    if (thd->binlog_query(THD::STMT_QUERY_TYPE,
+                          log_query.ptr(), log_query.length(),
+                          FALSE, FALSE, FALSE, 0) > 0)
+    {
+      my_error(ER_ERROR_ON_WRITE, MYF(0), "binary log", -1);
+      goto done;
+    }
+  }
+  ret= FALSE;
+
+done:
+  thd->variables.sql_mode= org_sql_mode;
+  thd->count_cuted_fields= org_count_cuted_fields;
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+  DBUG_RETURN(ret);
+}
+
+
+static bool
+append_suid(String *buf, enum_sp_suid_behaviour suid)
+{
+  return suid == SP_IS_NOT_SUID &&
+         buf->append(STRING_WITH_LEN("    SQL SECURITY INVOKER\n"));
+}
+
+
+static bool
+append_comment(String *buf, const LEX_CSTRING &comment)
+{
+  if (!comment.length)
+    return false;
+  if (buf->append(STRING_WITH_LEN("    COMMENT ")))
+    return true;
+  append_unescaped(buf, comment.str, comment.length);
+  return buf->append('\n');
+}
+
+
+static bool
+append_package_chistics(String *buf, const st_sp_chistics &chistics)
+{
+  return append_suid(buf, chistics.suid) ||
+         append_comment(buf, chistics.comment);
+}
+
+
+bool
+Sp_handler_package::show_create_sp(THD *thd, String *buf,
+                                   const LEX_CSTRING &db,
+                                   const LEX_CSTRING &name,
+                                   const LEX_CSTRING ¶ms,
+                                   const LEX_CSTRING &returns,
+                                   const LEX_CSTRING &body,
+                                   const st_sp_chistics &chistics,
+                                   const AUTHID &definer,
+                                   const DDL_options_st ddl_options,
+                                   sql_mode_t sql_mode) const
+{
+  Sql_mode_instant_set sms(thd, sql_mode);
+  bool rc=
+    buf->append(STRING_WITH_LEN("CREATE ")) ||
+    (ddl_options.or_replace() &&
+     buf->append(STRING_WITH_LEN("OR REPLACE "))) ||
+    append_definer(thd, buf, &definer.user, &definer.host) ||
+    buf->append(type_lex_cstring()) ||
+    buf->append(' ') ||
+    (ddl_options.if_not_exists() &&
+     buf->append(STRING_WITH_LEN("IF NOT EXISTS "))) ||
+    (db.length > 0 &&
+     (append_identifier(thd, buf, db.str, db.length) ||
+      buf->append('.'))) ||
+    append_identifier(thd, buf, name.str, name.length) ||
+    append_package_chistics(buf, chistics) ||
+    buf->append(' ') ||
+    buf->append(body.str, body.length);
+  return rc;
+}
+
+
+/**
+  Delete the record for the stored routine object from mysql.proc
+  and do binary logging.
+
+  The operation deletes the record for the stored routine specified by name
+  from the mysql.proc table and invalidates the stored-routine cache.
+
+  @param thd  Thread context.
+  @param name Stored routine name.
+
+  @return Error code. SP_OK is returned on success. Other SP_ constants are
+  used to indicate about errors.
+*/
+
+int
+Sp_handler::sp_drop_routine(THD *thd,
+                            const Database_qualified_name *name) const
+{
+  TABLE *table;
+  int ret;
+  DBUG_ENTER("sp_drop_routine");
+  DBUG_PRINT("enter", ("type: %s  name: %.*s",
+		       type_str(),
+		       (int) name->m_name.length, name->m_name.str));
+  MDL_key::enum_mdl_namespace mdl_type= get_mdl_type();
+
+  /* Grab an exclusive MDL lock. */
+  if (lock_object_name(thd, mdl_type, name->m_db.str, name->m_name.str))
+    DBUG_RETURN(SP_DELETE_ROW_FAILED);
+
+  if (!(table= open_proc_table_for_update(thd)))
+    DBUG_RETURN(SP_OPEN_TABLE_FAILED);
+
+  if ((ret= sp_find_and_drop_routine(thd, table, name)) == SP_OK &&
+      write_bin_log(thd, TRUE, thd->query(), thd->query_length()))
+    ret= SP_INTERNAL_ERROR;
+  /*
+    This statement will be replicated as a statement, even when using
+    row-based replication.  The flag will be reset at the end of the
+    statement.
+  */
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  Find and updated the record for the stored routine object in mysql.proc.
+
+  The operation finds the record for the stored routine specified by name
+  in the mysql.proc table and updates it with new attributes. After
+  successful update, the cache is invalidated.
+
+  @param thd      Thread context.
+  @param name     Stored routine name.
+  @param chistics New values of stored routine attributes to write.
+
+  @return Error code. SP_OK is returned on success. Other SP_ constants are
+  used to indicate about errors.
+*/
+
+int
+Sp_handler::sp_update_routine(THD *thd, const Database_qualified_name *name,
+                              const st_sp_chistics *chistics) const
+{
+  TABLE *table;
+  int ret;
+  DBUG_ENTER("sp_update_routine");
+  DBUG_PRINT("enter", ("type: %s  name: %.*s",
+                       type_str(),
+                       (int) name->m_name.length, name->m_name.str));
+  MDL_key::enum_mdl_namespace mdl_type= get_mdl_type();
+
+  /* Grab an exclusive MDL lock. */
+  if (lock_object_name(thd, mdl_type, name->m_db.str, name->m_name.str))
+    DBUG_RETURN(SP_OPEN_TABLE_FAILED);
+
+  if (!(table= open_proc_table_for_update(thd)))
+    DBUG_RETURN(SP_OPEN_TABLE_FAILED);
+
+  if ((ret= db_find_routine_aux(thd, name, table)) == SP_OK)
+  {
+    if (type() == SP_TYPE_FUNCTION && ! trust_function_creators &&
+        mysql_bin_log.is_open() &&
+        (chistics->daccess == SP_CONTAINS_SQL ||
+         chistics->daccess == SP_MODIFIES_SQL_DATA))
+    {
+      char *ptr;
+      bool is_deterministic;
+      ptr= get_field(thd->mem_root,
+                     table->field[MYSQL_PROC_FIELD_DETERMINISTIC]);
+      if (ptr == NULL)
+      {
+        ret= SP_INTERNAL_ERROR;
+        goto err;
+      }
+      is_deterministic= ptr[0] == 'N' ? FALSE : TRUE;
+      if (!is_deterministic)
+      {
+        my_message(ER_BINLOG_UNSAFE_ROUTINE,
+                   ER_THD(thd, ER_BINLOG_UNSAFE_ROUTINE), MYF(0));
+        ret= SP_INTERNAL_ERROR;
+        goto err;
+      }
+    }
+
+    store_record(table,record[1]);
+    table->field[MYSQL_PROC_FIELD_MODIFIED]->set_time();
+    if (chistics->suid != SP_IS_DEFAULT_SUID)
+      table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]->
+	store((longlong)chistics->suid, TRUE);
+    if (chistics->daccess != SP_DEFAULT_ACCESS)
+      table->field[MYSQL_PROC_FIELD_ACCESS]->
+	store((longlong)chistics->daccess, TRUE);
+    if (chistics->comment.str)
+      table->field[MYSQL_PROC_FIELD_COMMENT]->store(chistics->comment,
+						    system_charset_info);
+    if (chistics->agg_type != DEFAULT_AGGREGATE)
+      table->field[MYSQL_PROC_FIELD_AGGREGATE]->
+         store((longlong)chistics->agg_type, TRUE);
+    if ((ret= table->file->ha_update_row(table->record[1],table->record[0])) &&
+        ret != HA_ERR_RECORD_IS_THE_SAME)
+      ret= SP_WRITE_ROW_FAILED;
+    else
+      ret= 0;
+    /* Make change permanent and avoid 'table is marked as crashed' errors */
+    table->file->extra(HA_EXTRA_FLUSH);
+  }
+
+  if (ret == SP_OK)
+  {
+    if (write_bin_log(thd, TRUE, thd->query(), thd->query_length()))
+      ret= SP_INTERNAL_ERROR;
+    sp_cache_invalidate();
+  }
+err:
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  This internal handler is used to trap errors from opening mysql.proc.
+*/
+
+class Lock_db_routines_error_handler : public Internal_error_handler
+{
+public:
+  bool handle_condition(THD *thd,
+                        uint sql_errno,
+                        const char* sqlstate,
+                        Sql_condition::enum_warning_level *level,
+                        const char* msg,
+                        Sql_condition ** cond_hdl)
+  {
+    if (sql_errno == ER_NO_SUCH_TABLE ||
+        sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE ||
+        sql_errno == ER_CANNOT_LOAD_FROM_TABLE_V2 ||
+        sql_errno == ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE ||
+        sql_errno == ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2)
+      return true;
+    return false;
+  }
+};
+
+
+/**
+   Acquires exclusive metadata lock on all stored routines in the
+   given database.
+
+   @note Will also return false (=success) if mysql.proc can't be opened
+         or is outdated. This allows DROP DATABASE to continue in these
+         cases.
+ */
+
+bool lock_db_routines(THD *thd, const char *db)
+{
+  TABLE *table;
+  uint key_len;
+  MDL_request_list mdl_requests;
+  Lock_db_routines_error_handler err_handler;
+  uchar keybuf[MAX_KEY_LENGTH];
+  DBUG_ENTER("lock_db_routines");
+
+  DBUG_SLOW_ASSERT(ok_for_lower_case_names(db));
+
+  start_new_trans new_trans(thd);
+
+  /*
+    mysql.proc will be re-opened during deletion, so we can ignore
+    errors when opening the table here. The error handler is
+    used to avoid getting the same warning twice.
+  */
+  thd->push_internal_handler(&err_handler);
+  table= open_proc_table_for_read(thd);
+  thd->pop_internal_handler();
+  if (!table)
+  {
+    /*
+      DROP DATABASE should not fail even if mysql.proc does not exist
+      or is outdated. We therefore only abort mysql_rm_db() if we
+      have errors not handled by the error handler.
+    */
+    new_trans.restore_old_transaction();
+    DBUG_RETURN(thd->is_error() || thd->killed);
+  }
+
+  table->field[MYSQL_PROC_FIELD_DB]->store(db, strlen(db), system_charset_info);
+  key_len= table->key_info->key_part[0].store_length;
+  table->field[MYSQL_PROC_FIELD_DB]->get_key_image(keybuf, key_len, Field::itRAW);
+  int nxtres= table->file->ha_index_init(0, 1);
+  if (nxtres)
+  {
+    table->file->print_error(nxtres, MYF(0));
+    goto error;
+  }
+
+  if (!table->file->ha_index_read_map(table->record[0], keybuf, (key_part_map)1,
+                                       HA_READ_KEY_EXACT))
+  {
+    do
+    {
+      char *sp_name= get_field(thd->mem_root,
+                               table->field[MYSQL_PROC_FIELD_NAME]);
+      if (sp_name == NULL) // skip invalid sp names (hand-edited mysql.proc?)
+        continue;
+
+      longlong sp_type= table->field[MYSQL_PROC_MYSQL_TYPE]->val_int();
+      MDL_request *mdl_request= new (thd->mem_root) MDL_request;
+      const Sp_handler *sph= Sp_handler::handler((enum_sp_type)
+                                                 sp_type);
+      if (!sph)
+        sph= &sp_handler_procedure;
+      MDL_REQUEST_INIT(mdl_request, sph->get_mdl_type(), db, sp_name,
+                        MDL_EXCLUSIVE, MDL_TRANSACTION);
+      mdl_requests.push_front(mdl_request);
+    } while (! (nxtres= table->file->ha_index_next_same(table->record[0], keybuf, key_len)));
+  }
+  table->file->ha_index_end();
+  if (nxtres != 0 && nxtres != HA_ERR_END_OF_FILE)
+  {
+    table->file->print_error(nxtres, MYF(0));
+    goto error;
+  }
+  thd->commit_whole_transaction_and_close_tables();
+  new_trans.restore_old_transaction();
+
+  /* We should already hold a global IX lock and a schema X lock. */
+  DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::BACKUP, "", "",
+                                             MDL_BACKUP_DDL) &&
+              thd->mdl_context.is_lock_owner(MDL_key::SCHEMA, db, "",
+                                             MDL_EXCLUSIVE));
+  DBUG_RETURN(thd->mdl_context.acquire_locks(&mdl_requests,
+                                             thd->variables.lock_wait_timeout));
+error:
+  thd->commit_whole_transaction_and_close_tables();
+  new_trans.restore_old_transaction();
+  DBUG_RETURN(true);
+}
+
+
+/**
+  Drop all routines in database 'db'
+
+  @note Close the thread tables, the calling code might want to
+  delete from other system tables afterwards.
+*/
+
+int
+sp_drop_db_routines(THD *thd, const char *db)
+{
+  TABLE *table;
+  int ret;
+  uint key_len;
+  MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  uchar keybuf[MAX_KEY_LENGTH];
+  size_t db_length= strlen(db);
+  Sql_mode_instant_remove smir(thd, MODE_PAD_CHAR_TO_FULL_LENGTH); // see below
+  DBUG_ENTER("sp_drop_db_routines");
+  DBUG_PRINT("enter", ("db: %s", db));
+
+  ret= SP_OPEN_TABLE_FAILED;
+  if (!(table= open_proc_table_for_update(thd)))
+    goto err;
+
+  table->field[MYSQL_PROC_FIELD_DB]->store(db, db_length, system_charset_info);
+  key_len= table->key_info->key_part[0].store_length;
+  table->field[MYSQL_PROC_FIELD_DB]->get_key_image(keybuf, key_len, Field::itRAW);
+
+  ret= SP_OK;
+  if (table->file->ha_index_init(0, 1))
+  {
+    ret= SP_KEY_NOT_FOUND;
+    goto err_idx_init;
+  }
+  if (!table->file->ha_index_read_map(table->record[0], keybuf, (key_part_map)1,
+                                      HA_READ_KEY_EXACT))
+  {
+    int nxtres;
+    bool deleted= FALSE;
+
+    do
+    {
+      if (! table->file->ha_delete_row(table->record[0]))
+      {
+	deleted= TRUE;		/* We deleted something */
+#ifdef HAVE_PSI_SP_INTERFACE
+        String buf;
+        // the following assumes MODE_PAD_CHAR_TO_FULL_LENGTH being *unset*
+        String *name= table->field[MYSQL_PROC_FIELD_NAME]->val_str(&buf);
+
+        enum_sp_type sp_type= (enum_sp_type) table->field[MYSQL_PROC_MYSQL_TYPE]->ptr[0];
+        /* Drop statistics for this stored program from performance schema. */
+        MYSQL_DROP_SP(sp_type, db, static_cast(db_length), name->ptr(), name->length());
+#endif
+      }
+      else
+      {
+	ret= SP_DELETE_ROW_FAILED;
+	nxtres= 0;
+	break;
+      }
+    } while (!(nxtres= table->file->ha_index_next_same(table->record[0],
+                                                       keybuf, key_len)));
+    if (nxtres != HA_ERR_END_OF_FILE)
+      ret= SP_KEY_NOT_FOUND;
+    if (deleted)
+    {
+      sp_cache_invalidate();
+      /* Make change permanent and avoid 'table is marked as crashed' errors */
+      table->file->extra(HA_EXTRA_FLUSH);
+    }
+  }
+  table->file->ha_index_end();
+
+err_idx_init:
+  trans_commit_stmt(thd);
+  close_thread_tables(thd);
+  /*
+    Make sure to only release the MDL lock on mysql.proc, not other
+    metadata locks DROP DATABASE might have acquired.
+  */
+  thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+
+err:
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  Implement SHOW CREATE statement for stored routines.
+
+  The operation finds the stored routine object specified by name and then
+  calls sp_head::show_create_routine() for the object.
+
+  @param thd  Thread context.
+  @param name Stored routine name.
+
+  @return Error status.
+    @retval FALSE on success
+    @retval TRUE on error
+*/
+
+bool
+Sp_handler::sp_show_create_routine(THD *thd,
+                                   const Database_qualified_name *name) const
+{
+  DBUG_ENTER("sp_show_create_routine");
+  DBUG_PRINT("enter", ("type: %s name: %.*s",
+                       type_str(),
+                       (int) name->m_name.length,
+                       name->m_name.str));
+  /*
+    @todo: Consider using prelocking for this code as well. Currently
+    SHOW CREATE PROCEDURE/FUNCTION is a dirty read of the data
+    dictionary, i.e. takes no metadata locks.
+    It is "safe" to do as long as it doesn't affect the results
+    of the binary log or the query cache, which currently it does not.
+  */
+  sp_head *sp= 0;
+
+  DBUG_EXECUTE_IF("cache_sp_in_show_create",
+    /* Some tests need just need a way to cache SP without other side-effects.*/
+    sp_cache_routine(thd, name, false, &sp);
+    sp->show_create_routine(thd, this);
+    DBUG_RETURN(false);
+  );
+
+  bool free_sp= db_find_routine(thd, name, &sp) == SP_OK;
+  bool ret= !sp || sp->show_create_routine(thd, this);
+  if (ret)
+  {
+    /*
+      If we have insufficient privileges, pretend the routine
+      does not exist.
+    */
+    my_error(ER_SP_DOES_NOT_EXIST, MYF(0), type_str(), name->m_name.str);
+  }
+  if (free_sp)
+    sp_head::destroy(sp);
+  DBUG_RETURN(ret);
+}
+
+
+/*
+  A helper class to split package name from a dot-qualified name
+  and return it as a 0-terminated string
+    'pkg.name' -> 'pkg\0'
+*/
+
+class Prefix_name_buf: public LEX_CSTRING
+{
+  char m_buf[SAFE_NAME_LEN + 1];
+public:
+  Prefix_name_buf(const THD *thd, const LEX_CSTRING &name)
+  {
+    const char *end;
+    if (!(end= strrchr(name.str, '.')))
+    {
+      static_cast(this)[0]= null_clex_str;
+    }
+    else
+    {
+      str= m_buf;
+      length= end - name.str;
+      set_if_smaller(length, sizeof(m_buf) - 1);
+      memcpy(m_buf, name.str, length);
+      m_buf[length]= '\0';
+    }
+  }
+};
+
+
+/*
+  In case of recursions, we create multiple copies of the same SP.
+  This methods checks the current recursion depth.
+  In case if the recursion limit exceeded, it throws an error
+  and returns NULL.
+  Otherwise, depending on the current recursion level, it:
+  - either returns the original SP,
+  - or makes and returns a new clone of SP
+*/
+
+sp_head *
+Sp_handler::sp_clone_and_link_routine(THD *thd,
+                                      const Database_qualified_name *name,
+                                      sp_head *sp) const
+{
+  DBUG_ENTER("sp_link_routine");
+  int rc;
+  ulong level;
+  sp_head *new_sp;
+  LEX_CSTRING returns= empty_clex_str;
+  Database_qualified_name lname(name->m_db, name->m_name);
+#ifndef DBUG_OFF
+  uint parent_subroutine_count=
+    !sp->m_parent ? 0 :
+     sp->m_parent->m_routine_declarations.elements +
+     sp->m_parent->m_routine_implementations.elements;
+#endif
+
+  /*
+    String buffer for RETURNS data type must have system charset;
+    64 -- size of "returns" column of mysql.proc.
+  */
+  String retstr(64);
+  retstr.set_charset(sp->get_creation_ctx()->get_client_cs());
+
+  DBUG_PRINT("info", ("found: %p", sp));
+  if (sp->m_first_free_instance)
+  {
+    DBUG_PRINT("info", ("first free: %p  level: %lu  flags %x",
+                        sp->m_first_free_instance,
+                        sp->m_first_free_instance->m_recursion_level,
+                        sp->m_first_free_instance->m_flags));
+    DBUG_ASSERT(!(sp->m_first_free_instance->m_flags & sp_head::IS_INVOKED));
+    if (sp->m_first_free_instance->m_recursion_level > recursion_depth(thd))
+    {
+      recursion_level_error(thd, sp);
+      DBUG_RETURN(0);
+    }
+    DBUG_RETURN(sp->m_first_free_instance);
+  }
+  /*
+    Actually depth could be +1 than the actual value in case a SP calls
+    SHOW CREATE PROCEDURE. Hence, the linked list could hold up to one more
+    instance.
+  */
+
+  level= sp->m_last_cached_sp->m_recursion_level + 1;
+  if (level > recursion_depth(thd))
+  {
+    recursion_level_error(thd, sp);
+    DBUG_RETURN(0);
+  }
+
+  if (type() == SP_TYPE_FUNCTION)
+  {
+    sp_returns_type(thd, retstr, sp);
+    retstr.get_value(&returns);
+  }
+
+  if (sp->m_parent)
+  {
+    /*
+      If we're cloning a recursively called package routine,
+      we need to take some special measures:
+      1. Cut the package name prefix from the routine name: 'pkg1.p1' -> 'p1',
+         to have db_load_routine() generate and parse a query like this:
+           CREATE PROCEDURE p1 ...;
+         rather than:
+           CREATE PROCEDURE pkg1.p1 ...;
+         The latter would be misinterpreted by the parser as a standalone
+         routine 'p1' in the database 'pkg1', which is not what we need.
+      2. We pass m_parent to db_load_routine() to have it set
+         thd->lex->sphead to sp->m_parent before calling parse_sql().
+      These two measures allow to parse a package subroutine using
+      the grammar for standalone routines, e.g.:
+        CREATE PROCEDURE p1 ... END;
+      instead of going through a more complex query, e.g.:
+        CREATE PACKAGE BODY pkg1 AS
+          PROCEDURE p1 ... END;
+        END;
+    */
+    size_t prefix_length= sp->m_parent->m_name.length + 1;
+    DBUG_ASSERT(prefix_length < lname.m_name.length);
+    DBUG_ASSERT(lname.m_name.str[sp->m_parent->m_name.length] == '.');
+    lname.m_name.str+= prefix_length;
+    lname.m_name.length-= prefix_length;
+    sp->m_parent->m_is_cloning_routine= true;
+  }
+
+
+  rc= db_load_routine(thd, &lname, &new_sp,
+                      sp->m_sql_mode, sp->m_params, returns,
+                      sp->m_body, sp->chistics(),
+                      sp->m_definer,
+                      sp->m_created, sp->m_modified,
+                      sp->m_parent,
+                      sp->get_creation_ctx());
+  if (sp->m_parent)
+    sp->m_parent->m_is_cloning_routine= false;
+
+  if (rc == SP_OK)
+  {
+#ifndef DBUG_OFF
+    /*
+      We've just called the parser to clone the routine.
+      In case of a package routine, make sure that the parser
+      has not added any new subroutines directly to the parent package.
+      The cloned subroutine instances get linked below to the first instance,
+      they must have no direct links from the parent package.
+    */
+    DBUG_ASSERT(!sp->m_parent ||
+                parent_subroutine_count ==
+                sp->m_parent->m_routine_declarations.elements +
+                sp->m_parent->m_routine_implementations.elements);
+#endif
+    sp->m_last_cached_sp->m_next_cached_sp= new_sp;
+    new_sp->m_recursion_level= level;
+    new_sp->m_first_instance= sp;
+    sp->m_last_cached_sp= sp->m_first_free_instance= new_sp;
+    DBUG_PRINT("info", ("added level: %p, level: %lu, flags %x",
+                        new_sp, new_sp->m_recursion_level,
+                        new_sp->m_flags));
+    DBUG_RETURN(new_sp);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Obtain object representing stored procedure/function by its name from
+  stored procedures cache and looking into mysql.proc if needed.
+
+  @param thd          thread context
+  @param name         name of procedure
+  @param cp           hash to look routine in
+  @param cache_only   if true perform cache-only lookup
+                      (Don't look in mysql.proc).
+
+  @retval
+    NonNULL pointer to sp_head object for the procedure
+  @retval
+    NULL    in case of error.
+*/
+
+sp_head *
+Sp_handler::sp_find_routine(THD *thd, const Database_qualified_name *name,
+                            bool cache_only) const
+{
+  DBUG_ENTER("Sp_handler::sp_find_routine");
+  DBUG_PRINT("enter", ("name:  %.*s.%.*s  type: %s  cache only %d",
+                       (int) name->m_db.length, name->m_db.str,
+                       (int) name->m_name.length, name->m_name.str,
+                       type_str(), cache_only));
+  sp_cache **cp= get_cache(thd);
+  sp_head *sp;
+
+  if ((sp= sp_cache_lookup(cp, name)))
+    DBUG_RETURN(sp_clone_and_link_routine(thd, name, sp));
+  if (!cache_only)
+    db_find_and_cache_routine(thd, name, &sp);
+  DBUG_RETURN(sp);
+}
+
+
+/**
+  Find a package routine.
+  See sp_cache_routine() for more information on parameters and return value.
+
+  @param thd         - current THD
+  @param pkgname_str - package name
+  @param name        - a mixed qualified name, with:
+                       * name->m_db set to the database, e.g. "dbname"
+                       * name->m_name set to a package-qualified name,
+                         e.g. "pkgname.spname".
+  @param cache_only  - don't load mysql.proc if not cached
+  @retval non-NULL   - a pointer to an sp_head object
+  @retval NULL       - an error happened.
+*/
+
+sp_head *
+Sp_handler::sp_find_package_routine(THD *thd,
+                                    const LEX_CSTRING pkgname_str,
+                                    const Database_qualified_name *name,
+                                    bool cache_only) const
+{
+  DBUG_ENTER("sp_find_package_routine");
+  Database_qualified_name pkgname(&name->m_db, &pkgname_str);
+  sp_head *ph= sp_cache_lookup(&thd->sp_package_body_cache, &pkgname);
+  if (!ph && !cache_only)
+    sp_handler_package_body.db_find_and_cache_routine(thd, &pkgname, &ph);
+  if (ph)
+  {
+    LEX_CSTRING tmp= name->m_name;
+    const char *dot= strrchr(tmp.str, '.');
+    size_t prefix_length= dot ? dot - tmp.str + 1 : 0;
+    sp_package *pkg= ph->get_package();
+    tmp.str+= prefix_length;
+    tmp.length-= prefix_length;
+    LEX *plex= pkg ? pkg->m_routine_implementations.find(tmp, type()) : NULL;
+    sp_head *sp= plex ? plex->sphead : NULL;
+    if (sp)
+      DBUG_RETURN(sp_clone_and_link_routine(thd, name, sp));
+  }
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Find a package routine.
+  See sp_cache_routine() for more information on parameters and return value.
+
+  @param thd        - current THD
+  @param name       - Qualified name with the following format:
+                      * name->m_db is set to the database name, e.g. "dbname"
+                      * name->m_name is set to a package-qualified name,
+                        e.g. "pkgname.spname", as a single string with a
+                        dot character as a separator.
+  @param cache_only - don't load mysql.proc if not cached
+  @retval non-NULL  - a pointer to an sp_head object
+  @retval NULL      - an error happened
+*/
+
+sp_head *
+Sp_handler::sp_find_package_routine(THD *thd,
+                                    const Database_qualified_name *name,
+                                    bool cache_only) const
+{
+  DBUG_ENTER("Sp_handler::sp_find_package_routine");
+  Prefix_name_buf pkgname(thd, name->m_name);
+  DBUG_ASSERT(pkgname.length);
+  DBUG_RETURN(sp_find_package_routine(thd, pkgname, name, cache_only));
+}
+
+
+/**
+  This is used by sql_acl.cc:mysql_routine_grant() and is used to find
+  the routines in 'routines'.
+
+  @param thd Thread handler
+  @param routines List of needles in the hay stack
+
+  @return
+    @retval FALSE Found.
+    @retval TRUE  Not found
+*/
+
+bool
+Sp_handler::sp_exist_routines(THD *thd, TABLE_LIST *routines) const
+{
+  TABLE_LIST *routine;
+  bool sp_object_found;
+  DBUG_ENTER("sp_exists_routine");
+  for (routine= routines; routine; routine= routine->next_global)
+  {
+    sp_name *name;
+    LEX_CSTRING lex_db;
+    LEX_CSTRING lex_name;
+    thd->make_lex_string(&lex_db, routine->db.str, routine->db.length);
+    thd->make_lex_string(&lex_name, routine->table_name.str,
+                         routine->table_name.length);
+    name= new sp_name(&lex_db, &lex_name, true);
+    sp_object_found= sp_find_routine(thd, name, false) != NULL;
+    thd->get_stmt_da()->clear_warning_info(thd->query_id);
+    if (! sp_object_found)
+    {
+      my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "FUNCTION or PROCEDURE",
+               routine->table_name.str);
+      DBUG_RETURN(TRUE);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+extern "C" uchar* sp_sroutine_key(const uchar *ptr, size_t *plen,
+                                  my_bool first)
+{
+  Sroutine_hash_entry *rn= (Sroutine_hash_entry *)ptr;
+  *plen= rn->mdl_request.key.length();
+  return (uchar *)rn->mdl_request.key.ptr();
+}
+
+
+/**
+  Auxilary function that adds new element to the set of stored routines
+  used by statement.
+
+  In case when statement uses stored routines but does not need
+  prelocking (i.e. it does not use any tables) we will access the
+  elements of Query_tables_list::sroutines set on prepared statement
+  re-execution. Because of this we have to allocate memory for both
+  hash element and copy of its key in persistent arena.
+
+  @param prelocking_ctx  Prelocking context of the statement
+  @param arena           Arena in which memory for new element will be
+                         allocated
+  @param key             Key for the hash representing set
+  @param belong_to_view  Uppermost view which uses this routine
+                         (0 if routine is not used by view)
+
+  @note
+    Will also add element to end of 'Query_tables_list::sroutines_list' list.
+
+  @todo
+    When we will got rid of these accesses on re-executions we will be
+    able to allocate memory for hash elements in non-persitent arena
+    and directly use key values from sp_head::m_sroutines sets instead
+    of making their copies.
+
+  @retval
+    TRUE   new element was added.
+  @retval
+    FALSE  element was not added (because it is already present in
+    the set).
+*/
+
+bool sp_add_used_routine(Query_tables_list *prelocking_ctx, Query_arena *arena,
+                         const MDL_key *key,
+                         const Sp_handler *handler,
+                         TABLE_LIST *belong_to_view)
+{
+  my_hash_init_opt(PSI_INSTRUMENT_ME, &prelocking_ctx->sroutines, system_charset_info,
+                   Query_tables_list::START_SROUTINES_HASH_SIZE,
+                   0, 0, sp_sroutine_key, 0, 0);
+
+  if (!my_hash_search(&prelocking_ctx->sroutines, key->ptr(), key->length()))
+  {
+    Sroutine_hash_entry *rn=
+      (Sroutine_hash_entry *)arena->alloc(sizeof(Sroutine_hash_entry));
+    if (unlikely(!rn)) // OOM. Error will be reported using fatal_error().
+      return FALSE;
+    MDL_REQUEST_INIT_BY_KEY(&rn->mdl_request, key, MDL_SHARED, MDL_TRANSACTION);
+    if (my_hash_insert(&prelocking_ctx->sroutines, (uchar *)rn))
+      return FALSE;
+    prelocking_ctx->sroutines_list.link_in_list(rn, &rn->next);
+    rn->belong_to_view= belong_to_view;
+    rn->m_handler= handler;
+    rn->m_sp_cache_version= 0;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Find and cache a routine in a parser-safe reentrant mode.
+
+  If sp_head is not in the cache,
+  its loaded from mysql.proc, parsed using parse_sql(), and cached.
+  Note, as it is called from inside parse_sql() itself,
+  we need to preserve and restore the parser state.
+
+  It's used during parsing of CREATE PACKAGE BODY,
+  to load the corresponding CREATE PACKAGE.
+*/
+
+int
+Sp_handler::sp_cache_routine_reentrant(THD *thd,
+                                       const Database_qualified_name *name,
+                                       sp_head **sp) const
+{
+  int ret;
+  Parser_state *oldps= thd->m_parser_state;
+  thd->m_parser_state= NULL;
+  ret= sp_cache_routine(thd, name, false, sp);
+  thd->m_parser_state= oldps;
+  return ret;
+}
+
+
+/**
+  Check if a routine has a declaration in the CREATE PACKAGE statement,
+  by looking up in thd->sp_package_spec_cache, and by loading from mysql.proc
+  if needed.
+
+    @param thd      current thd
+    @param db       the database name
+    @param package  the package name
+    @param name     the routine name
+    @param type     the routine type
+    @retval         true, if the routine has a declaration
+    @retval         false, if the routine does not have a declaration
+
+  This function can be called in arbitrary context:
+  - inside a package routine
+  - inside a standalone routine
+  - inside a anonymous block
+  - outside of any routines
+
+  The state of the package specification (i.e. the CREATE PACKAGE statement)
+  for "package" before the call of this function is not known:
+   it can be cached, or not cached.
+  After the call of this function, the package specification is always cached,
+  unless a fatal error happens.
+*/
+
+static bool
+is_package_public_routine(THD *thd,
+                          const LEX_CSTRING &db,
+                          const LEX_CSTRING &package,
+                          const LEX_CSTRING &routine,
+                          enum_sp_type type)
+{
+  sp_head *sp= NULL;
+  Database_qualified_name tmp(db, package);
+  bool ret= sp_handler_package_spec.
+              sp_cache_routine_reentrant(thd, &tmp, &sp);
+  sp_package *spec= (!ret && sp) ? sp->get_package() : NULL;
+  return spec && spec->m_routine_declarations.find(routine, type);
+}
+
+
+/**
+  Check if a routine has a declaration in the CREATE PACKAGE statement
+  by looking up in sp_package_spec_cache.
+
+    @param thd      current thd
+    @param db       the database name
+    @param pkgname  the package name
+    @param name     the routine name
+    @param type     the routine type
+    @retval         true, if the routine has a declaration
+    @retval         false, if the routine does not have a declaration
+
+  This function is called in the middle of CREATE PACKAGE BODY parsing,
+  to lookup the current package routines.
+  The package specification (i.e. the CREATE PACKAGE statement) for
+  the current package body must already be loaded and cached at this point.
+*/
+
+static bool
+is_package_public_routine_quick(THD *thd,
+                                const LEX_CSTRING &db,
+                                const LEX_CSTRING &pkgname,
+                                const LEX_CSTRING &name,
+                                enum_sp_type type)
+{
+  Database_qualified_name tmp(db, pkgname);
+  sp_head *sp= sp_cache_lookup(&thd->sp_package_spec_cache, &tmp);
+  sp_package *pkg= sp ? sp->get_package() : NULL;
+  DBUG_ASSERT(pkg); // Must already be cached
+  return pkg && pkg->m_routine_declarations.find(name, type);
+}
+
+
+/**
+  Check if a qualified name, e.g. "CALL name1.name2",
+  refers to a known routine in the package body "pkg".
+*/
+
+static bool
+is_package_body_routine(THD *thd, sp_package *pkg,
+                        const LEX_CSTRING &name1,
+                        const LEX_CSTRING &name2,
+                        enum_sp_type type)
+{
+  return Sp_handler::eq_routine_name(pkg->m_name, name1) &&
+         (pkg->m_routine_declarations.find(name2, type) ||
+          pkg->m_routine_implementations.find(name2, type));
+}
+
+
+/**
+  Resolve a qualified routine reference xxx.yyy(), between:
+  - A standalone routine: xxx.yyy
+  - A package routine:    current_database.xxx.yyy
+*/
+
+bool Sp_handler::
+  sp_resolve_package_routine_explicit(THD *thd,
+                                      sp_head *caller,
+                                      sp_name *name,
+                                      const Sp_handler **pkg_routine_handler,
+                                      Database_qualified_name *pkgname) const
+{
+  sp_package *pkg;
+
+  /*
+    If a qualified routine name was used, e.g. xxx.yyy(),
+    we possibly have a call to a package routine.
+    Rewrite name if name->m_db (xxx) is a known package,
+    and name->m_name (yyy) is a known routine in this package.
+  */
+  LEX_CSTRING tmpdb= thd->db;
+  if (is_package_public_routine(thd, tmpdb, name->m_db, name->m_name, type()) ||
+      // Check if a package routine calls a private routine
+      (caller && caller->m_parent &&
+       is_package_body_routine(thd, caller->m_parent,
+                               name->m_db, name->m_name, type())) ||
+      // Check if a package initialization sections calls a private routine
+      (caller && (pkg= caller->get_package()) &&
+       is_package_body_routine(thd, pkg, name->m_db, name->m_name, type())))
+  {
+    pkgname->m_db= tmpdb;
+    pkgname->m_name= name->m_db;
+    *pkg_routine_handler= package_routine_handler();
+    return name->make_package_routine_name(thd->mem_root, tmpdb,
+                                           name->m_db, name->m_name);
+  }
+  return false;
+}
+
+
+/**
+  Resolve a non-qualified routine reference yyy(), between:
+  - A standalone routine: current_database.yyy
+  - A package routine:    current_database.current_package.yyy
+*/
+
+bool Sp_handler::
+  sp_resolve_package_routine_implicit(THD *thd,
+                                      sp_head *caller,
+                                      sp_name *name,
+                                      const Sp_handler **pkg_routine_handler,
+                                      Database_qualified_name *pkgname) const
+{
+  sp_package *pkg;
+
+  if (!caller || !caller->m_name.length)
+  {
+    /*
+      We are either in a an anonymous block,
+      or not in a routine at all.
+    */
+    return false; // A standalone routine is called
+  }
+
+  if (caller->m_parent)
+  {
+    // A package routine calls a non-qualified routine
+    int ret= SP_OK;
+    Prefix_name_buf pkgstr(thd, caller->m_name);
+    DBUG_ASSERT(pkgstr.length);
+    LEX_CSTRING tmpname; // Non-qualified m_name
+    tmpname.str= caller->m_name.str + pkgstr.length + 1;
+    tmpname.length= caller->m_name.length - pkgstr.length - 1;
+
+    /*
+      We're here if a package routine calls another non-qualified
+      function or procedure, e.g. yyy().
+      We need to distinguish two cases:
+      - yyy() is another routine from the same package
+      - yyy() is a standalone routine from the same database
+      To detect if yyy() is a package (rather than a standalone) routine,
+      we check if:
+      - yyy() recursively calls itself
+      - yyy() is earlier implemented in the current CREATE PACKAGE BODY
+      - yyy() has a forward declaration
+      - yyy() is declared in the corresponding CREATE PACKAGE
+    */
+    if (eq_routine_name(tmpname, name->m_name) ||
+        caller->m_parent->m_routine_implementations.find(name->m_name, type()) ||
+        caller->m_parent->m_routine_declarations.find(name->m_name, type()) ||
+        is_package_public_routine_quick(thd, caller->m_db,
+                                        pkgstr, name->m_name, type()))
+    {
+      DBUG_ASSERT(ret == SP_OK);
+      pkgname->copy(thd->mem_root, caller->m_db, pkgstr);
+      *pkg_routine_handler= package_routine_handler();
+      if (name->make_package_routine_name(thd->mem_root, pkgstr, name->m_name))
+        return true;
+    }
+    return ret != SP_OK;
+  }
+
+  if ((pkg= caller->get_package()) &&
+       pkg->m_routine_implementations.find(name->m_name, type()))
+  {
+    pkgname->m_db= caller->m_db;
+    pkgname->m_name= caller->m_name;
+    // Package initialization section is calling a non-qualified routine
+    *pkg_routine_handler= package_routine_handler();
+    return name->make_package_routine_name(thd->mem_root,
+                                           caller->m_name, name->m_name);
+  }
+
+  return false; // A standalone routine is called
+
+}
+
+
+/**
+  Detect cases when a package routine (rather than a standalone routine)
+  is called, and rewrite sp_name accordingly.
+
+  @param thd              Current thd
+  @param caller           The caller routine (or NULL if outside of a routine)
+  @param [IN/OUT] name    The called routine name
+  @param [OUT]    pkgname If the routine is found to be a package routine,
+                          pkgname is populated with the package name.
+                          Otherwise, it's not touched.
+  @retval         false   on success
+  @retval         true    on error (e.g. EOM, could not read CREATE PACKAGE)
+*/
+
+bool
+Sp_handler::sp_resolve_package_routine(THD *thd,
+                                       sp_head *caller,
+                                       sp_name *name,
+                                       const Sp_handler **pkg_routine_handler,
+                                       Database_qualified_name *pkgname) const
+{
+  if (!thd->db.length || !(thd->variables.sql_mode & MODE_ORACLE))
+    return false;
+
+  return name->m_explicit_name ?
+         sp_resolve_package_routine_explicit(thd, caller, name,
+                                             pkg_routine_handler, pkgname) :
+         sp_resolve_package_routine_implicit(thd, caller, name,
+                                             pkg_routine_handler, pkgname);
+}
+
+
+/**
+  Add routine which is explicitly used by statement to the set of stored
+  routines used by this statement.
+
+  To be friendly towards prepared statements one should pass
+  persistent arena as second argument.
+
+  @param prelocking_ctx  Prelocking context of the statement
+  @param arena           Arena in which memory for new element of the set
+                         will be allocated
+  @param rt              Routine name
+
+  @note
+    Will also add element to end of 'Query_tables_list::sroutines_list' list
+    (and will take into account that this is an explicitly used routine).
+*/
+
+void Sp_handler::add_used_routine(Query_tables_list *prelocking_ctx,
+                                  Query_arena *arena,
+                                  const Database_qualified_name *rt) const
+{
+  MDL_key key(get_mdl_type(), rt->m_db.str, rt->m_name.str);
+  (void) sp_add_used_routine(prelocking_ctx, arena, &key, this, 0);
+  prelocking_ctx->sroutines_list_own_last= prelocking_ctx->sroutines_list.next;
+  prelocking_ctx->sroutines_list_own_elements=
+                    prelocking_ctx->sroutines_list.elements;
+}
+
+
+/**
+  Remove routines which are only indirectly used by statement from
+  the set of routines used by this statement.
+
+  @param prelocking_ctx  Prelocking context of the statement
+*/
+
+void sp_remove_not_own_routines(Query_tables_list *prelocking_ctx)
+{
+  Sroutine_hash_entry *not_own_rt, *next_rt;
+  for (not_own_rt= *prelocking_ctx->sroutines_list_own_last;
+       not_own_rt; not_own_rt= next_rt)
+  {
+    /*
+      It is safe to obtain not_own_rt->next after calling hash_delete() now
+      but we want to be more future-proof.
+    */
+    next_rt= not_own_rt->next;
+    my_hash_delete(&prelocking_ctx->sroutines, (uchar *)not_own_rt);
+  }
+
+  *prelocking_ctx->sroutines_list_own_last= NULL;
+  prelocking_ctx->sroutines_list.next= prelocking_ctx->sroutines_list_own_last;
+  prelocking_ctx->sroutines_list.elements= 
+                    prelocking_ctx->sroutines_list_own_elements;
+}
+
+
+/**
+  Merge contents of two hashes representing sets of routines used
+  by statements or by other routines.
+
+  @param dst   hash to which elements should be added
+  @param src   hash from which elements merged
+
+  @note
+    This procedure won't create new Sroutine_hash_entry objects,
+    instead it will simply add elements from source to destination
+    hash. Thus time of life of elements in destination hash becomes
+    dependant on time of life of elements from source hash. It also
+    won't touch lists linking elements in source and destination
+    hashes.
+
+  @returns
+    @return TRUE Failure
+    @return FALSE Success
+*/
+
+bool sp_update_sp_used_routines(HASH *dst, HASH *src)
+{
+  for (uint i=0 ; i < src->records ; i++)
+  {
+    Sroutine_hash_entry *rt= (Sroutine_hash_entry *)my_hash_element(src, i);
+    if (!my_hash_search(dst, (uchar *)rt->mdl_request.key.ptr(),
+                        rt->mdl_request.key.length()))
+    {
+      if (my_hash_insert(dst, (uchar *)rt))
+        return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+/**
+  Add contents of hash representing set of routines to the set of
+  routines used by statement.
+
+  @param thd             Thread context
+  @param prelocking_ctx  Prelocking context of the statement
+  @param src             Hash representing set from which routines will
+                         be added
+  @param belong_to_view  Uppermost view which uses these routines, 0 if none
+
+  @note It will also add elements to end of
+        'Query_tables_list::sroutines_list' list.
+*/
+
+void
+sp_update_stmt_used_routines(THD *thd, Query_tables_list *prelocking_ctx,
+                             HASH *src, TABLE_LIST *belong_to_view)
+{
+  for (uint i=0 ; i < src->records ; i++)
+  {
+    Sroutine_hash_entry *rt= (Sroutine_hash_entry *)my_hash_element(src, i);
+    (void)sp_add_used_routine(prelocking_ctx, thd->stmt_arena,
+                              &rt->mdl_request.key, rt->m_handler,
+                              belong_to_view);
+  }
+}
+
+
+/**
+  Add contents of list representing set of routines to the set of
+  routines used by statement.
+
+  @param thd             Thread context
+  @param prelocking_ctx  Prelocking context of the statement
+  @param src             List representing set from which routines will
+                         be added
+  @param belong_to_view  Uppermost view which uses these routines, 0 if none
+
+  @note It will also add elements to end of
+        'Query_tables_list::sroutines_list' list.
+*/
+
+void sp_update_stmt_used_routines(THD *thd, Query_tables_list *prelocking_ctx,
+                                  SQL_I_List *src,
+                                  TABLE_LIST *belong_to_view)
+{
+  for (Sroutine_hash_entry *rt= src->first; rt; rt= rt->next)
+    (void)sp_add_used_routine(prelocking_ctx, thd->stmt_arena,
+                              &rt->mdl_request.key, rt->m_handler,
+                              belong_to_view);
+}
+
+
+/**
+  A helper wrapper around sp_cache_routine() to use from
+  prelocking until 'sp_name' is eradicated as a class.
+*/
+
+int Sroutine_hash_entry::sp_cache_routine(THD *thd,
+                                          bool lookup_only,
+                                          sp_head **sp) const
+{
+  char qname_buff[NAME_LEN*2+1+1];
+  sp_name name(&mdl_request.key, qname_buff);
+  /*
+    Check that we have an MDL lock on this routine, unless it's a top-level
+    CALL. The assert below should be unambiguous: the first element
+    in sroutines_list has an MDL lock unless it's a top-level call, or a
+    trigger, but triggers can't occur here (see the preceding assert).
+  */
+  DBUG_ASSERT(mdl_request.ticket || this == thd->lex->sroutines_list.first);
+
+  return m_handler->sp_cache_routine(thd, &name, lookup_only, sp);
+}
+
+
+/**
+  Ensure that routine is present in cache by loading it from the mysql.proc
+  table if needed. If the routine is present but old, reload it.
+  Emit an appropriate error if there was a problem during
+  loading.
+
+  @param[in]  thd   Thread context.
+  @param[in]  name  Name of routine.
+  @param[in]  lookup_only Only check that the routine is in the cache.
+                    If it's not, don't try to load. If it is present,
+                    but old, don't try to reload.
+  @param[out] sp    Pointer to sp_head object for routine, NULL if routine was
+                    not found.
+
+  @retval 0      Either routine is found and was successfully loaded into cache
+                 or it does not exist.
+  @retval non-0  Error while loading routine from mysql,proc table.
+*/
+
+int Sp_handler::sp_cache_routine(THD *thd,
+                                 const Database_qualified_name *name,
+                                 bool lookup_only,
+                                 sp_head **sp) const
+{
+  int ret= 0;
+  sp_cache **spc= get_cache(thd);
+
+  DBUG_ENTER("Sp_handler::sp_cache_routine");
+
+  DBUG_ASSERT(spc);
+
+  *sp= sp_cache_lookup(spc, name);
+
+  if (lookup_only)
+    DBUG_RETURN(SP_OK);
+
+  if (*sp)
+  {
+    sp_cache_flush_obsolete(spc, sp);
+    if (*sp)
+      DBUG_RETURN(SP_OK);
+  }
+
+  switch ((ret= db_find_and_cache_routine(thd, name, sp)))
+  {
+    case SP_OK:
+      break;
+    case SP_KEY_NOT_FOUND:
+      ret= SP_OK;
+      break;
+    default:
+      /* Query might have been killed, don't set error. */
+      if (thd->killed)
+        break;
+      /*
+        Any error when loading an existing routine is either some problem
+        with the mysql.proc table, or a parse error because the contents
+        has been tampered with (in which case we clear that error).
+      */
+      if (ret == SP_PARSE_ERROR)
+        thd->clear_error();
+      /*
+        If we cleared the parse error, or when db_find_routine() flagged
+        an error with it's return value without calling my_error(), we
+        set the generic "mysql.proc table corrupt" error here.
+      */
+      if (!thd->is_error())
+      {
+        my_error(ER_SP_PROC_TABLE_CORRUPT, MYF(0),
+                 ErrConvDQName(name).ptr(), ret);
+      }
+      break;
+  }
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  Cache a package routine using its package name and a qualified name.
+  See sp_cache_routine() for more information on parameters and return values.
+
+  @param thd         - current THD
+  @param pkgname_str - package name, e.g. "pkgname"
+  @param name        - name with the following format:
+                       * name->m_db is a database name, e.g. "dbname"
+                       * name->m_name is a package-qualified name,
+                         e.g. "pkgname.spname"
+  @param lookup_only - don't load mysql.proc if not cached
+  @param [OUT] sp    - the result is returned here.
+  @retval false      - loaded or does not exists
+  @retval true       - error while loading mysql.proc
+*/
+
+int
+Sp_handler::sp_cache_package_routine(THD *thd,
+                                     const LEX_CSTRING &pkgname_cstr,
+                                     const Database_qualified_name *name,
+                                     bool lookup_only, sp_head **sp) const
+{
+  DBUG_ENTER("sp_cache_package_routine");
+  DBUG_ASSERT(type() == SP_TYPE_FUNCTION || type() == SP_TYPE_PROCEDURE);
+  sp_name pkgname(&name->m_db, &pkgname_cstr, false);
+  sp_head *ph= NULL;
+  int ret= sp_handler_package_body.sp_cache_routine(thd, &pkgname,
+                                                    lookup_only,
+                                                    &ph);
+  if (!ret)
+  {
+    sp_package *pkg= ph ? ph->get_package() : NULL;
+    LEX_CSTRING tmp= name->m_name;
+    const char *dot= strrchr(tmp.str, '.');
+    size_t prefix_length= dot ? dot - tmp.str + 1 : 0;
+    tmp.str+= prefix_length;
+    tmp.length-= prefix_length;
+    LEX *rlex= pkg ? pkg->m_routine_implementations.find(tmp, type()) : NULL;
+    *sp= rlex ? rlex->sphead : NULL;
+  }
+
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  Cache a package routine by its fully qualified name.
+  See sp_cache_routine() for more information on parameters and return values.
+
+  @param thd       - current THD
+  @param name      - name with the following format:
+                     * name->m_db is a database name, e.g. "dbname"
+                     * name->m_name is a package-qualified name,
+                       e.g. "pkgname.spname"
+  @param lookup_only - don't load mysql.proc if not cached
+  @param [OUT] sp    -  the result is returned here
+  @retval false      - loaded or does not exists
+  @retval true       - error while loading mysql.proc
+*/
+
+int Sp_handler::sp_cache_package_routine(THD *thd,
+                                         const Database_qualified_name *name,
+                                         bool lookup_only, sp_head **sp) const
+{
+  DBUG_ENTER("Sp_handler::sp_cache_package_routine");
+  Prefix_name_buf pkgname(thd, name->m_name);
+  DBUG_ASSERT(pkgname.length);
+  DBUG_RETURN(sp_cache_package_routine(thd, pkgname, name, lookup_only, sp));
+}
+
+
+/**
+  Generates the CREATE... string from the table information.
+
+  @return
+    Returns false on success, true on (alloc) failure.
+*/
+
+bool
+Sp_handler::show_create_sp(THD *thd, String *buf,
+                           const LEX_CSTRING &db,
+                           const LEX_CSTRING &name,
+                           const LEX_CSTRING ¶ms,
+                           const LEX_CSTRING &returns,
+                           const LEX_CSTRING &body,
+                           const st_sp_chistics &chistics,
+                           const AUTHID &definer,
+                           const DDL_options_st ddl_options,
+                           sql_mode_t sql_mode) const
+{
+  size_t agglen= (chistics.agg_type == GROUP_AGGREGATE)? 10 : 0;
+  LEX_CSTRING tmp;
+
+  /* Make some room to begin with */
+  if (buf->alloc(100 + db.length + 1 + name.length +
+                 params.length + returns.length +
+                 chistics.comment.length + 10 /* length of " DEFINER= "*/ +
+                 agglen + USER_HOST_BUFF_SIZE))
+    return true;
+
+  Sql_mode_instant_set sms(thd, sql_mode);
+  buf->append(STRING_WITH_LEN("CREATE "));
+  if (ddl_options.or_replace())
+    buf->append(STRING_WITH_LEN("OR REPLACE "));
+  append_definer(thd, buf, &definer.user, &definer.host);
+  if (chistics.agg_type == GROUP_AGGREGATE)
+    buf->append(STRING_WITH_LEN("AGGREGATE "));
+  tmp= type_lex_cstring();
+  buf->append(&tmp);
+  buf->append(STRING_WITH_LEN(" "));
+  if (ddl_options.if_not_exists())
+    buf->append(STRING_WITH_LEN("IF NOT EXISTS "));
+
+  if (db.length > 0)
+  {
+    append_identifier(thd, buf, &db);
+    buf->append('.');
+  }
+  append_identifier(thd, buf, &name);
+  buf->append('(');
+  buf->append(¶ms);
+  buf->append(')');
+  if (type() == SP_TYPE_FUNCTION)
+  {
+    if (sql_mode & MODE_ORACLE)
+      buf->append(STRING_WITH_LEN(" RETURN "));
+    else
+      buf->append(STRING_WITH_LEN(" RETURNS "));
+    buf->append(returns.str, returns.length);   // Not \0 terminated
+  }
+  buf->append('\n');
+  switch (chistics.daccess) {
+  case SP_NO_SQL:
+    buf->append(STRING_WITH_LEN("    NO SQL\n"));
+    break;
+  case SP_READS_SQL_DATA:
+    buf->append(STRING_WITH_LEN("    READS SQL DATA\n"));
+    break;
+  case SP_MODIFIES_SQL_DATA:
+    buf->append(STRING_WITH_LEN("    MODIFIES SQL DATA\n"));
+    break;
+  case SP_DEFAULT_ACCESS:
+  case SP_CONTAINS_SQL:
+    /* Do nothing */
+    break;
+  }
+  if (chistics.detistic)
+    buf->append(STRING_WITH_LEN("    DETERMINISTIC\n"));
+  append_suid(buf, chistics.suid);
+  append_comment(buf, chistics.comment);
+  buf->append(body.str, body.length);           // Not \0 terminated
+  return false;
+}
+
+
+/**
+  @brief    The function loads sp_head struct for information schema purposes
+            (used for I_S ROUTINES & PARAMETERS tables).
+
+  @param[in]      thd               thread handler
+  @param[in]      proc_table        mysql.proc table structurte
+  @param[in]      db                database name
+  @param[in]      name              sp name
+  @param[in]      sql_mode          SQL mode
+  @param[in]      type              Routine type
+  @param[in]      returns           'returns' string
+  @param[in]      params            parameters definition string
+  @param[out]     free_sp_head      returns 1 if we need to free sp_head struct
+                                    otherwise returns 0
+                                    
+  @return     Pointer on sp_head struct
+    @retval   #                     Pointer on sp_head struct
+    @retval   0                     error
+*/
+
+sp_head *
+Sp_handler::sp_load_for_information_schema(THD *thd, TABLE *proc_table,
+                                           const LEX_CSTRING &db,
+                                           const LEX_CSTRING &name,
+                                           const LEX_CSTRING ¶ms,
+                                           const LEX_CSTRING &returns,
+                                           sql_mode_t sql_mode,
+                                           bool *free_sp_head) const
+{
+  String defstr;
+  const AUTHID definer= {{STRING_WITH_LEN("")}, {STRING_WITH_LEN("")}};
+  sp_head *sp;
+  sp_cache **spc= get_cache(thd);
+  sp_name sp_name_obj(&db, &name, true); // This can change "name"
+  *free_sp_head= 0;
+  sp= sp_cache_lookup(spc, &sp_name_obj);
+
+  if (sp && !(sp->sp_cache_version() < sp_cache_version()))
+  {
+    return sp;
+  }
+
+  LEX *old_lex= thd->lex, newlex;
+  Stored_program_creation_ctx *creation_ctx= 
+    Stored_routine_creation_ctx::load_from_db(thd, &sp_name_obj, proc_table);
+  defstr.set_charset(creation_ctx->get_client_cs());
+  if (show_create_sp(thd, &defstr,
+                     sp_name_obj.m_db, sp_name_obj.m_name,
+                     params, returns, empty_body_lex_cstring(sql_mode),
+                     Sp_chistics(), definer, DDL_options(), sql_mode))
+    return 0;
+
+  thd->lex= &newlex;
+  newlex.current_select= NULL; 
+  sp= sp_compile(thd, &defstr, sql_mode, NULL, creation_ctx);
+  *free_sp_head= 1;
+  thd->lex->sphead= NULL;
+  lex_end(thd->lex);
+  thd->lex= old_lex;
+  return sp;
+}
+
+
+LEX_CSTRING Sp_handler_procedure::empty_body_lex_cstring(sql_mode_t mode) const
+{
+  static LEX_CSTRING m_empty_body_std= {STRING_WITH_LEN("BEGIN END")};
+  static LEX_CSTRING m_empty_body_ora= {STRING_WITH_LEN("AS BEGIN NULL; END")};
+  return mode & MODE_ORACLE ? m_empty_body_ora : m_empty_body_std;
+}
+
+
+LEX_CSTRING Sp_handler_function::empty_body_lex_cstring(sql_mode_t mode) const
+{
+  static LEX_CSTRING m_empty_body_std= {STRING_WITH_LEN("RETURN NULL")};
+  static LEX_CSTRING m_empty_body_ora= {STRING_WITH_LEN("AS BEGIN RETURN NULL; END")};
+  return mode & MODE_ORACLE ? m_empty_body_ora : m_empty_body_std;
+}
diff --git a/sql/sp.h b/sql/sp.h
new file mode 100644
index 00000000..c73ff287
--- /dev/null
+++ b/sql/sp.h
@@ -0,0 +1,675 @@
+/* -*- C++ -*- */
+/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2009, 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef _SP_H_
+#define _SP_H_
+
+#include "my_global.h"                          /* NO_EMBEDDED_ACCESS_CHECKS */
+#include "sql_string.h"                         // LEX_STRING
+#include "sql_cmd.h"
+#include "mdl.h"
+
+class Field;
+class Open_tables_backup;
+class Open_tables_state;
+class Query_arena;
+class Query_tables_list;
+class Sroutine_hash_entry;
+class THD;
+class sp_cache;
+class sp_head;
+class sp_package;
+class sp_pcontext;
+class sp_name;
+class Database_qualified_name;
+struct st_sp_chistics;
+class Stored_program_creation_ctx;
+struct LEX;
+struct TABLE;
+struct TABLE_LIST;
+typedef struct st_hash HASH;
+template  class SQL_I_List;
+
+/*
+  Values for the type enum. This reflects the order of the enum declaration
+  in the CREATE TABLE command.
+  See also storage/perfschema/my_thread.h
+*/
+enum enum_sp_type
+{
+  SP_TYPE_FUNCTION=1,
+  SP_TYPE_PROCEDURE=2,
+  SP_TYPE_PACKAGE=3,
+  SP_TYPE_PACKAGE_BODY=4,
+  SP_TYPE_TRIGGER=5,
+  SP_TYPE_EVENT=6,
+};
+
+class Sp_handler
+{
+  bool sp_resolve_package_routine_explicit(THD *thd,
+                                           sp_head *caller,
+                                           sp_name *name,
+                                           const Sp_handler **pkg_routine_hndlr,
+                                           Database_qualified_name *pkgname)
+                                           const;
+  bool sp_resolve_package_routine_implicit(THD *thd,
+                                           sp_head *caller,
+                                           sp_name *name,
+                                           const Sp_handler **pkg_routine_hndlr,
+                                           Database_qualified_name *pkgname)
+                                           const;
+protected:
+  int db_find_routine_aux(THD *thd, const Database_qualified_name *name,
+                          TABLE *table) const;
+  int db_find_routine(THD *thd, const Database_qualified_name *name,
+                      sp_head **sphp) const;
+  int db_find_and_cache_routine(THD *thd,
+                                const Database_qualified_name *name,
+                                sp_head **sp) const;
+  int db_load_routine(THD *thd, const Database_qualified_name *name,
+                      sp_head **sphp,
+                      sql_mode_t sql_mode,
+                      const LEX_CSTRING ¶ms,
+                      const LEX_CSTRING &returns,
+                      const LEX_CSTRING &body,
+                      const st_sp_chistics &chistics,
+                      const AUTHID &definer,
+                      longlong created, longlong modified,
+                      sp_package *parent,
+                      Stored_program_creation_ctx *creation_ctx) const;
+  int sp_drop_routine_internal(THD *thd,
+                               const Database_qualified_name *name,
+                               TABLE *table) const;
+
+  sp_head *sp_clone_and_link_routine(THD *thd,
+                                     const Database_qualified_name *name,
+                                     sp_head *sp) const;
+  int sp_cache_package_routine(THD *thd,
+                               const LEX_CSTRING &pkgname_cstr,
+                               const Database_qualified_name *name,
+                               bool lookup_only, sp_head **sp) const;
+  int sp_cache_package_routine(THD *thd,
+                               const Database_qualified_name *name,
+                               bool lookup_only, sp_head **sp) const;
+  sp_head *sp_find_package_routine(THD *thd,
+                                   const LEX_CSTRING pkgname_str,
+                                   const Database_qualified_name *name,
+                                   bool cache_only) const;
+  sp_head *sp_find_package_routine(THD *thd,
+                                   const Database_qualified_name *name,
+                                   bool cache_only) const;
+public: // TODO: make it private or protected
+  virtual int sp_find_and_drop_routine(THD *thd, TABLE *table,
+                                       const Database_qualified_name *name)
+                                       const;
+
+public:
+  virtual ~Sp_handler() = default;
+  static const Sp_handler *handler(enum enum_sql_command cmd);
+  static const Sp_handler *handler(enum_sp_type type);
+  static const Sp_handler *handler(MDL_key::enum_mdl_namespace ns);
+  /*
+    Return a handler only those SP objects that store
+    definitions in the mysql.proc system table
+  */
+  static const Sp_handler *handler_mysql_proc(enum_sp_type type)
+  {
+    const Sp_handler *sph= handler(type);
+    return sph ? sph->sp_handler_mysql_proc() : NULL;
+  }
+
+  static bool eq_routine_name(const LEX_CSTRING &name1,
+                              const LEX_CSTRING &name2)
+  {
+    return system_charset_info->strnncoll(name1.str, name1.length,
+                                          name2.str, name2.length) == 0;
+  }
+  const char *type_str() const { return type_lex_cstring().str; }
+  virtual const char *show_create_routine_col1_caption() const
+  {
+    DBUG_ASSERT(0);
+    return "";
+  }
+  virtual const char *show_create_routine_col3_caption() const
+  {
+    DBUG_ASSERT(0);
+    return "";
+  }
+  virtual const Sp_handler *package_routine_handler() const
+  {
+    return this;
+  }
+  virtual enum_sp_type type() const= 0;
+  virtual LEX_CSTRING type_lex_cstring() const= 0;
+  virtual LEX_CSTRING empty_body_lex_cstring(sql_mode_t mode) const
+  {
+    static LEX_CSTRING m_empty_body= {STRING_WITH_LEN("???")};
+    DBUG_ASSERT(0);
+    return m_empty_body;
+  }
+  virtual MDL_key::enum_mdl_namespace get_mdl_type() const= 0;
+  virtual const Sp_handler *sp_handler_mysql_proc() const { return this; }
+  virtual sp_cache **get_cache(THD *) const { return NULL; }
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  virtual HASH *get_priv_hash() const { return NULL; }
+#endif
+  virtual ulong recursion_depth(THD *thd) const { return 0; }
+  /**
+    Return appropriate error about recursion limit reaching
+
+    @param thd  Thread handle
+    @param sp   SP routine
+
+    @remark For functions and triggers we return error about
+            prohibited recursion. For stored procedures we
+            return about reaching recursion limit.
+  */
+  virtual void recursion_level_error(THD *thd, const sp_head *sp) const
+  {
+    my_error(ER_SP_NO_RECURSION, MYF(0));
+  }
+  virtual bool add_instr_freturn(THD *thd, sp_head *sp,
+                                 sp_pcontext *spcont,
+                                 Item *item, LEX *lex) const;
+  virtual bool add_instr_preturn(THD *thd, sp_head *sp,
+                                 sp_pcontext *spcont) const;
+
+  void add_used_routine(Query_tables_list *prelocking_ctx,
+                        Query_arena *arena,
+                        const Database_qualified_name *name) const;
+
+  bool sp_resolve_package_routine(THD *thd,
+                                  sp_head *caller,
+                                  sp_name *name,
+                                  const Sp_handler **pkg_routine_handler,
+                                  Database_qualified_name *pkgname) const;
+  virtual sp_head *sp_find_routine(THD *thd,
+                                   const Database_qualified_name *name,
+                                   bool cache_only) const;
+  virtual int sp_cache_routine(THD *thd, const Database_qualified_name *name,
+                               bool lookup_only, sp_head **sp) const;
+
+  int sp_cache_routine_reentrant(THD *thd,
+                                 const Database_qualified_name *nm,
+                                 sp_head **sp) const;
+
+  bool sp_exist_routines(THD *thd, TABLE_LIST *procs) const;
+  bool sp_show_create_routine(THD *thd,
+                              const Database_qualified_name *name) const;
+
+  bool sp_create_routine(THD *thd, const sp_head *sp) const;
+
+  int sp_update_routine(THD *thd, const Database_qualified_name *name,
+                        const st_sp_chistics *chistics) const;
+
+  int sp_drop_routine(THD *thd, const Database_qualified_name *name) const;
+
+  sp_head *sp_load_for_information_schema(THD *thd, TABLE *proc_table,
+                                          const LEX_CSTRING &db,
+                                          const LEX_CSTRING &name,
+                                          const LEX_CSTRING ¶ms,
+                                          const LEX_CSTRING &returns,
+                                          sql_mode_t sql_mode,
+                                          bool *free_sp_head) const;
+
+  /*
+    Make a SHOW CREATE statement.
+      @retval   true on error
+      @retval   false on success
+  */
+  virtual bool show_create_sp(THD *thd, String *buf,
+                              const LEX_CSTRING &db,
+                              const LEX_CSTRING &name,
+                              const LEX_CSTRING ¶ms,
+                              const LEX_CSTRING &returns,
+                              const LEX_CSTRING &body,
+                              const st_sp_chistics &chistics,
+                              const AUTHID &definer,
+                              const DDL_options_st ddl_options,
+                              sql_mode_t sql_mode) const;
+
+};
+
+
+class Sp_handler_procedure: public Sp_handler
+{
+public:
+  enum_sp_type type() const { return SP_TYPE_PROCEDURE; }
+  LEX_CSTRING type_lex_cstring() const
+  {
+    static LEX_CSTRING m_type_str= { STRING_WITH_LEN("PROCEDURE")};
+    return m_type_str;
+  }
+  LEX_CSTRING empty_body_lex_cstring(sql_mode_t mode) const;
+  const char *show_create_routine_col1_caption() const
+  {
+    return "Procedure";
+  }
+  const char *show_create_routine_col3_caption() const
+  {
+    return "Create Procedure";
+  }
+  MDL_key::enum_mdl_namespace get_mdl_type() const
+  {
+    return MDL_key::PROCEDURE;
+  }
+  const Sp_handler *package_routine_handler() const;
+  sp_cache **get_cache(THD *) const;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  HASH *get_priv_hash() const;
+#endif
+  ulong recursion_depth(THD *thd) const;
+  void recursion_level_error(THD *thd, const sp_head *sp) const;
+  bool add_instr_preturn(THD *thd, sp_head *sp, sp_pcontext *spcont) const;
+};
+
+
+class Sp_handler_package_procedure: public Sp_handler_procedure
+{
+public:
+  int sp_cache_routine(THD *thd, const Database_qualified_name *name,
+                       bool lookup_only, sp_head **sp) const
+  {
+    return sp_cache_package_routine(thd, name, lookup_only, sp);
+  }
+  sp_head *sp_find_routine(THD *thd,
+                           const Database_qualified_name *name,
+                           bool cache_only) const
+  {
+    return sp_find_package_routine(thd, name, cache_only);
+  }
+};
+
+
+class Sp_handler_function: public Sp_handler
+{
+public:
+  enum_sp_type type() const { return SP_TYPE_FUNCTION; }
+  LEX_CSTRING type_lex_cstring() const
+  {
+    static LEX_CSTRING m_type_str= { STRING_WITH_LEN("FUNCTION")};
+    return m_type_str;
+  }
+  LEX_CSTRING empty_body_lex_cstring(sql_mode_t mode) const;
+  const char *show_create_routine_col1_caption() const
+  {
+    return "Function";
+  }
+  const char *show_create_routine_col3_caption() const
+  {
+    return "Create Function";
+  }
+  MDL_key::enum_mdl_namespace get_mdl_type() const
+  {
+    return MDL_key::FUNCTION;
+  }
+  const Sp_handler *package_routine_handler() const;
+  sp_cache **get_cache(THD *) const;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  HASH *get_priv_hash() const;
+#endif
+  bool add_instr_freturn(THD *thd, sp_head *sp, sp_pcontext *spcont,
+                         Item *item, LEX *lex) const;
+};
+
+
+class Sp_handler_package_function: public Sp_handler_function
+{
+public:
+  int sp_cache_routine(THD *thd, const Database_qualified_name *name,
+                       bool lookup_only, sp_head **sp) const
+  {
+    return sp_cache_package_routine(thd, name, lookup_only, sp);
+  }
+  sp_head *sp_find_routine(THD *thd,
+                           const Database_qualified_name *name,
+                           bool cache_only) const
+  {
+    return sp_find_package_routine(thd, name, cache_only);
+  }
+};
+
+
+class Sp_handler_package: public Sp_handler
+{
+public:
+  bool show_create_sp(THD *thd, String *buf,
+                      const LEX_CSTRING &db,
+                      const LEX_CSTRING &name,
+                      const LEX_CSTRING ¶ms,
+                      const LEX_CSTRING &returns,
+                      const LEX_CSTRING &body,
+                      const st_sp_chistics &chistics,
+                      const AUTHID &definer,
+                      const DDL_options_st ddl_options,
+                      sql_mode_t sql_mode) const;
+};
+
+
+class Sp_handler_package_spec: public Sp_handler_package
+{
+public: // TODO: make it private or protected
+  int sp_find_and_drop_routine(THD *thd, TABLE *table,
+                               const Database_qualified_name *name)
+                               const;
+public:
+  enum_sp_type type() const { return SP_TYPE_PACKAGE; }
+  LEX_CSTRING type_lex_cstring() const
+  {
+    static LEX_CSTRING m_type_str= {STRING_WITH_LEN("PACKAGE")};
+    return m_type_str;
+  }
+  LEX_CSTRING empty_body_lex_cstring(sql_mode_t mode) const
+  {
+    static LEX_CSTRING m_empty_body= {STRING_WITH_LEN("BEGIN END")};
+    return m_empty_body;
+  }
+  const char *show_create_routine_col1_caption() const
+  {
+    return "Package";
+  }
+  const char *show_create_routine_col3_caption() const
+  {
+    return "Create Package";
+  }
+  MDL_key::enum_mdl_namespace get_mdl_type() const
+  {
+    return MDL_key::PACKAGE_BODY;
+  }
+  sp_cache **get_cache(THD *) const;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  HASH *get_priv_hash() const;
+#endif
+};
+
+
+class Sp_handler_package_body: public Sp_handler_package
+{
+public:
+  enum_sp_type type() const { return SP_TYPE_PACKAGE_BODY; }
+  LEX_CSTRING type_lex_cstring() const
+  {
+    static LEX_CSTRING m_type_str= {STRING_WITH_LEN("PACKAGE BODY")};
+    return m_type_str;
+  }
+  LEX_CSTRING empty_body_lex_cstring(sql_mode_t mode) const
+  {
+    static LEX_CSTRING m_empty_body= {STRING_WITH_LEN("BEGIN END")};
+    return m_empty_body;
+  }
+  const char *show_create_routine_col1_caption() const
+  {
+    return "Package body";
+  }
+  const char *show_create_routine_col3_caption() const
+  {
+    return "Create Package Body";
+  }
+  MDL_key::enum_mdl_namespace get_mdl_type() const
+  {
+    return MDL_key::PACKAGE_BODY;
+  }
+  sp_cache **get_cache(THD *) const;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  HASH *get_priv_hash() const;
+#endif
+};
+
+
+class Sp_handler_trigger: public Sp_handler
+{
+public:
+  enum_sp_type type() const { return SP_TYPE_TRIGGER; }
+  LEX_CSTRING type_lex_cstring() const
+  {
+    static LEX_CSTRING m_type_str= { STRING_WITH_LEN("TRIGGER")};
+    return m_type_str;
+  }
+  MDL_key::enum_mdl_namespace get_mdl_type() const
+  {
+    DBUG_ASSERT(0);
+    return MDL_key::TRIGGER;
+  }
+  const Sp_handler *sp_handler_mysql_proc() const { return NULL; }
+};
+
+
+extern MYSQL_PLUGIN_IMPORT Sp_handler_function sp_handler_function;
+extern MYSQL_PLUGIN_IMPORT Sp_handler_procedure sp_handler_procedure;
+extern MYSQL_PLUGIN_IMPORT Sp_handler_package_spec sp_handler_package_spec;
+extern MYSQL_PLUGIN_IMPORT Sp_handler_package_body sp_handler_package_body;
+extern MYSQL_PLUGIN_IMPORT Sp_handler_package_function sp_handler_package_function;
+extern MYSQL_PLUGIN_IMPORT Sp_handler_package_procedure sp_handler_package_procedure;
+extern MYSQL_PLUGIN_IMPORT Sp_handler_trigger sp_handler_trigger;
+
+
+inline const Sp_handler *Sp_handler::handler(enum_sql_command cmd)
+{
+  switch (cmd) {
+  case SQLCOM_CREATE_PROCEDURE:
+  case SQLCOM_ALTER_PROCEDURE:
+  case SQLCOM_DROP_PROCEDURE:
+  case SQLCOM_SHOW_PROC_CODE:
+  case SQLCOM_SHOW_CREATE_PROC:
+  case SQLCOM_SHOW_STATUS_PROC:
+    return &sp_handler_procedure;
+  case SQLCOM_CREATE_SPFUNCTION:
+  case SQLCOM_ALTER_FUNCTION:
+  case SQLCOM_DROP_FUNCTION:
+  case SQLCOM_SHOW_FUNC_CODE:
+  case SQLCOM_SHOW_CREATE_FUNC:
+  case SQLCOM_SHOW_STATUS_FUNC:
+    return &sp_handler_function;
+  case SQLCOM_CREATE_PACKAGE:
+  case SQLCOM_DROP_PACKAGE:
+  case SQLCOM_SHOW_CREATE_PACKAGE:
+  case SQLCOM_SHOW_STATUS_PACKAGE:
+    return &sp_handler_package_spec;
+  case SQLCOM_CREATE_PACKAGE_BODY:
+  case SQLCOM_DROP_PACKAGE_BODY:
+  case SQLCOM_SHOW_CREATE_PACKAGE_BODY:
+  case SQLCOM_SHOW_STATUS_PACKAGE_BODY:
+  case SQLCOM_SHOW_PACKAGE_BODY_CODE:
+    return &sp_handler_package_body;
+  default:
+    break;
+  }
+  return NULL;
+}
+
+
+inline const Sp_handler *Sp_handler::handler(enum_sp_type type)
+{
+  switch (type) {
+  case SP_TYPE_PROCEDURE:
+    return &sp_handler_procedure;
+  case SP_TYPE_FUNCTION:
+    return &sp_handler_function;
+  case SP_TYPE_PACKAGE:
+    return &sp_handler_package_spec;
+  case SP_TYPE_PACKAGE_BODY:
+    return &sp_handler_package_body;
+  case SP_TYPE_TRIGGER:
+    return &sp_handler_trigger;
+  case SP_TYPE_EVENT:
+    break;
+  }
+  return NULL;
+}
+
+
+inline const Sp_handler *Sp_handler::handler(MDL_key::enum_mdl_namespace type)
+{
+  switch (type) {
+  case MDL_key::FUNCTION:
+    return &sp_handler_function;
+  case MDL_key::PROCEDURE:
+    return &sp_handler_procedure;
+  case MDL_key::PACKAGE_BODY:
+    return &sp_handler_package_body;
+  case MDL_key::BACKUP:
+  case MDL_key::SCHEMA:
+  case MDL_key::TABLE:
+  case MDL_key::TRIGGER:
+  case MDL_key::EVENT:
+  case MDL_key::USER_LOCK:
+  case MDL_key::NAMESPACE_END:
+    break;
+  }
+  return NULL;
+}
+
+
+/* Tells what SP_DEFAULT_ACCESS should be mapped to */
+#define SP_DEFAULT_ACCESS_MAPPING SP_CONTAINS_SQL
+
+// Return codes from sp_create_*, sp_drop_*, and sp_show_*:
+#define SP_OK                 0
+#define SP_KEY_NOT_FOUND     -1
+#define SP_OPEN_TABLE_FAILED -2
+#define SP_WRITE_ROW_FAILED  -3
+#define SP_DELETE_ROW_FAILED -4
+#define SP_GET_FIELD_FAILED  -5
+#define SP_PARSE_ERROR       -6
+#define SP_INTERNAL_ERROR    -7
+#define SP_NO_DB_ERROR       -8
+#define SP_BAD_IDENTIFIER    -9
+#define SP_BODY_TOO_LONG    -10
+#define SP_FLD_STORE_FAILED -11
+
+/* DB storage of Stored PROCEDUREs and FUNCTIONs */
+enum
+{
+  MYSQL_PROC_FIELD_DB = 0,
+  MYSQL_PROC_FIELD_NAME,
+  MYSQL_PROC_MYSQL_TYPE,
+  MYSQL_PROC_FIELD_SPECIFIC_NAME,
+  MYSQL_PROC_FIELD_LANGUAGE,
+  MYSQL_PROC_FIELD_ACCESS,
+  MYSQL_PROC_FIELD_DETERMINISTIC,
+  MYSQL_PROC_FIELD_SECURITY_TYPE,
+  MYSQL_PROC_FIELD_PARAM_LIST,
+  MYSQL_PROC_FIELD_RETURNS,
+  MYSQL_PROC_FIELD_BODY,
+  MYSQL_PROC_FIELD_DEFINER,
+  MYSQL_PROC_FIELD_CREATED,
+  MYSQL_PROC_FIELD_MODIFIED,
+  MYSQL_PROC_FIELD_SQL_MODE,
+  MYSQL_PROC_FIELD_COMMENT,
+  MYSQL_PROC_FIELD_CHARACTER_SET_CLIENT,
+  MYSQL_PROC_FIELD_COLLATION_CONNECTION,
+  MYSQL_PROC_FIELD_DB_COLLATION,
+  MYSQL_PROC_FIELD_BODY_UTF8,
+  MYSQL_PROC_FIELD_AGGREGATE,
+  MYSQL_PROC_FIELD_COUNT
+};
+
+/* Drop all routines in database 'db' */
+int
+sp_drop_db_routines(THD *thd, const char *db);
+
+/**
+   Acquires exclusive metadata lock on all stored routines in the
+   given database.
+
+   @param  thd  Thread handler
+   @param  db   Database name
+
+   @retval  false  Success
+   @retval  true   Failure
+ */
+bool lock_db_routines(THD *thd, const char *db);
+
+/**
+  Structure that represents element in the set of stored routines
+  used by statement or routine.
+*/
+
+class Sroutine_hash_entry
+{
+public:
+  /**
+    Metadata lock request for routine.
+    MDL_key in this request is also used as a key for set.
+  */
+  MDL_request mdl_request;
+  /**
+    Next element in list linking all routines in set. See also comments
+    for LEX::sroutine/sroutine_list and sp_head::m_sroutines.
+  */
+  Sroutine_hash_entry *next;
+  /**
+    Uppermost view which directly or indirectly uses this routine.
+    0 if routine is not used in view. Note that it also can be 0 if
+    statement uses routine both via view and directly.
+  */
+  TABLE_LIST *belong_to_view;
+  /**
+    This is for prepared statement validation purposes.
+    A statement looks up and pre-loads all its stored functions
+    at prepare. Later on, if a function is gone from the cache,
+    execute may fail.
+    Remember the version of sp_head at prepare to be able to
+    invalidate the prepared statement at execute if it
+    changes.
+  */
+  ulong m_sp_cache_version;
+
+  const Sp_handler *m_handler;
+
+  int sp_cache_routine(THD *thd, bool lookup_only, sp_head **sp) const;
+};
+
+
+bool sp_add_used_routine(Query_tables_list *prelocking_ctx, Query_arena *arena,
+                         const MDL_key *key,
+                         const Sp_handler *handler,
+                         TABLE_LIST *belong_to_view);
+void sp_remove_not_own_routines(Query_tables_list *prelocking_ctx);
+bool sp_update_sp_used_routines(HASH *dst, HASH *src);
+void sp_update_stmt_used_routines(THD *thd, Query_tables_list *prelocking_ctx,
+                                  HASH *src, TABLE_LIST *belong_to_view);
+void sp_update_stmt_used_routines(THD *thd, Query_tables_list *prelocking_ctx,
+                                  SQL_I_List *src,
+                                  TABLE_LIST *belong_to_view);
+
+extern "C" uchar* sp_sroutine_key(const uchar *ptr, size_t *plen,
+                                  my_bool first);
+
+/*
+  Routines which allow open/lock and close mysql.proc table even when
+  we already have some tables open and locked.
+*/
+TABLE *open_proc_table_for_read(THD *thd);
+
+bool load_charset(THD *thd,
+                  MEM_ROOT *mem_root,
+                  Field *field,
+                  CHARSET_INFO *dflt_cs,
+                  CHARSET_INFO **cs);
+
+bool load_collation(THD *thd,MEM_ROOT *mem_root,
+                    Field *field,
+                    CHARSET_INFO *dflt_cl,
+                    CHARSET_INFO **cl);
+
+void sp_returns_type(THD *thd,
+                     String &result,
+                     const sp_head *sp);
+
+#endif /* _SP_H_ */
diff --git a/sql/sp_cache.cc b/sql/sp_cache.cc
new file mode 100644
index 00000000..36ad3710
--- /dev/null
+++ b/sql/sp_cache.cc
@@ -0,0 +1,323 @@
+/* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "unireg.h"
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation
+#endif
+#include "sp_cache.h"
+#include "sp_head.h"
+
+static mysql_mutex_t Cversion_lock;
+static ulong volatile Cversion= 1;
+
+
+/*
+  Cache of stored routines.
+*/
+
+class sp_cache
+{
+public:
+  sp_cache();
+  ~sp_cache();
+
+  /**
+   Inserts a sp_head object into a hash table.
+
+   @returns Success status
+     @return TRUE Failure
+     @return FALSE Success
+  */
+  inline bool insert(sp_head *sp)
+  {
+    return my_hash_insert(&m_hashtable, (const uchar *)sp);
+  }
+
+  inline sp_head *lookup(char *name, size_t namelen)
+  {
+    return (sp_head *) my_hash_search(&m_hashtable, (const uchar *)name,
+                                      namelen);
+  }
+
+  inline void remove(sp_head *sp)
+  {
+    my_hash_delete(&m_hashtable, (uchar *)sp);
+  }
+
+  /**
+    Remove all elements from a stored routine cache if the current
+    number of elements exceeds the argument value.
+
+    @param[in] upper_limit_for_elements  Soft upper limit of elements that
+                                         can be stored in the cache.
+  */
+  void enforce_limit(ulong upper_limit_for_elements)
+  {
+    if (m_hashtable.records > upper_limit_for_elements)
+      my_hash_reset(&m_hashtable);
+  }
+
+private:
+  void init();
+  void cleanup();
+
+  /* All routines in this cache */
+  HASH m_hashtable;
+}; // class sp_cache
+
+#ifdef HAVE_PSI_INTERFACE
+static PSI_mutex_key key_Cversion_lock;
+
+static PSI_mutex_info all_sp_cache_mutexes[]=
+{
+  { &key_Cversion_lock, "Cversion_lock", PSI_FLAG_GLOBAL}
+};
+
+static void init_sp_cache_psi_keys(void)
+{
+  const char* category= "sql";
+  int count;
+
+  if (PSI_server == NULL)
+    return;
+
+  count= array_elements(all_sp_cache_mutexes);
+  PSI_server->register_mutex(category, all_sp_cache_mutexes, count);
+}
+#endif
+
+/* Initialize the SP caching once at startup */
+
+void sp_cache_init()
+{
+#ifdef HAVE_PSI_INTERFACE
+  init_sp_cache_psi_keys();
+#endif
+
+  mysql_mutex_init(key_Cversion_lock, &Cversion_lock, MY_MUTEX_INIT_FAST);
+}
+
+
+/*
+  Clear the cache *cp and set *cp to NULL.
+
+  SYNOPSIS
+    sp_cache_clear()
+    cp  Pointer to cache to clear
+
+  NOTE
+    This function doesn't invalidate other caches.
+*/
+
+void sp_cache_clear(sp_cache **cp)
+{
+  sp_cache *c= *cp;
+
+  if (c)
+  {
+    delete c;
+    *cp= NULL;
+  }
+}
+
+
+void sp_cache_end()
+{
+  mysql_mutex_destroy(&Cversion_lock);
+}
+
+
+/*
+  Insert a routine into the cache.
+
+  SYNOPSIS
+    sp_cache_insert()
+     cp  The cache to put routine into
+     sp  Routine to insert.
+
+  TODO: Perhaps it will be more straightforward if in case we returned an
+        error from this function when we couldn't allocate sp_cache. (right
+        now failure to put routine into cache will cause a 'SP not found'
+        error to be reported at some later time)
+*/
+
+void sp_cache_insert(sp_cache **cp, sp_head *sp)
+{
+  sp_cache *c;
+
+  if (!(c= *cp))
+  {
+    if (!(c= new sp_cache()))
+      return;                                   // End of memory error
+  }
+  /* Reading a ulong variable with no lock. */
+  sp->set_sp_cache_version(Cversion);
+  DBUG_PRINT("info",("sp_cache: inserting: %s", ErrConvDQName(sp).ptr()));
+  c->insert(sp);
+  *cp= c;                                       // Update *cp if it was NULL
+}
+
+
+/*
+  Look up a routine in the cache.
+  SYNOPSIS
+    sp_cache_lookup()
+      cp    Cache to look into
+      name  Name of rutine to find
+
+  NOTE
+    An obsolete (but not more obsolete then since last
+    sp_cache_flush_obsolete call) routine may be returned.
+
+  RETURN
+    The routine or
+    NULL if the routine not found.
+*/
+
+sp_head *sp_cache_lookup(sp_cache **cp, const Database_qualified_name *name)
+{
+  char buf[NAME_LEN * 2 + 2];
+  sp_cache *c= *cp;
+  if (! c)
+    return NULL;
+  return c->lookup(buf, name->make_qname(buf, sizeof(buf)));
+}
+
+
+/*
+  Invalidate all routines in all caches.
+
+  SYNOPSIS
+    sp_cache_invalidate()
+
+  NOTE
+    This is called when a VIEW definition is created or modified (and in some
+    other contexts). We can't destroy sp_head objects here as one may modify
+    VIEW definitions from prelocking-free SPs.
+*/
+
+void sp_cache_invalidate()
+{
+  DBUG_PRINT("info",("sp_cache: invalidating"));
+  thread_safe_increment(Cversion, &Cversion_lock);
+}
+
+
+/**
+  Remove an out-of-date SP from the cache.
+
+  @param[in] cp  Cache to flush
+  @param[in] sp  SP to remove.
+
+  @note This invalidates pointers to sp_head objects this thread
+  uses. In practice that means don't call this function when
+  inside SP'.
+*/
+
+void sp_cache_flush_obsolete(sp_cache **cp, sp_head **sp)
+{
+  if ((*sp)->sp_cache_version() < Cversion && !(*sp)->is_invoked())
+  {
+    (*cp)->remove(*sp);
+    *sp= NULL;
+  }
+}
+
+/**
+  Return the current global version of the cache.
+*/
+
+ulong sp_cache_version()
+{
+  return Cversion;
+}
+
+
+/**
+  Enforce that the current number of elements in the cache don't exceed
+  the argument value by flushing the cache if necessary.
+
+  @param[in] c  Cache to check
+  @param[in] upper_limit_for_elements  Soft upper limit for number of sp_head
+                                       objects that can be stored in the cache.
+*/
+void
+sp_cache_enforce_limit(sp_cache *c, ulong upper_limit_for_elements)
+{
+ if (c)
+   c->enforce_limit(upper_limit_for_elements);
+}
+
+/*************************************************************************
+  Internal functions
+ *************************************************************************/
+
+extern "C" uchar *hash_get_key_for_sp_head(const uchar *ptr, size_t *plen,
+                                           my_bool first);
+extern "C" void hash_free_sp_head(void *p);
+
+uchar *hash_get_key_for_sp_head(const uchar *ptr, size_t *plen,
+                                my_bool first)
+{
+  sp_head *sp= (sp_head *)ptr;
+  *plen= sp->m_qname.length;
+  return (uchar*) sp->m_qname.str;
+}
+
+
+void hash_free_sp_head(void *p)
+{
+  sp_head *sp= (sp_head *)p;
+  sp_head::destroy(sp);
+}
+
+
+sp_cache::sp_cache()
+{
+  init();
+}
+
+
+sp_cache::~sp_cache()
+{
+  my_hash_free(&m_hashtable);
+}
+
+
+void
+sp_cache::init()
+{
+  my_hash_init(key_memory_sp_cache, &m_hashtable, system_charset_info, 0, 0, 0,
+               hash_get_key_for_sp_head, hash_free_sp_head, 0);
+}
+
+
+void
+sp_cache::cleanup()
+{
+  my_hash_free(&m_hashtable);
+}
+
+
+void Sp_caches::sp_caches_clear()
+{
+  sp_cache_clear(&sp_proc_cache);
+  sp_cache_clear(&sp_func_cache);
+  sp_cache_clear(&sp_package_spec_cache);
+  sp_cache_clear(&sp_package_body_cache);
+}
diff --git a/sql/sp_cache.h b/sql/sp_cache.h
new file mode 100644
index 00000000..7506edff
--- /dev/null
+++ b/sql/sp_cache.h
@@ -0,0 +1,66 @@
+/* -*- C++ -*- */
+/* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef _SP_CACHE_H_
+#define _SP_CACHE_H_
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+/*
+  Stored procedures/functions cache. This is used as follows:
+   * Each thread has its own cache.
+   * Each sp_head object is put into its thread cache before it is used, and
+     then remains in the cache until deleted.
+*/
+
+class sp_head;
+class sp_cache;
+class Database_qualified_name;
+
+/*
+  Cache usage scenarios:
+  1. Application-wide init:
+    sp_cache_init();
+
+  2. SP execution in thread:
+  2.1 While holding sp_head* pointers:
+  
+    // look up a routine in the cache (no checks if it is up to date or not)
+    sp_cache_lookup(); 
+    
+    sp_cache_insert();
+    sp_cache_invalidate();
+  
+  2.2 When not holding any sp_head* pointers:
+    sp_cache_flush_obsolete();
+  
+  3. Before thread exit:
+    sp_cache_clear();
+*/
+
+void sp_cache_init();
+void sp_cache_end();
+void sp_cache_clear(sp_cache **cp);
+void sp_cache_insert(sp_cache **cp, sp_head *sp);
+sp_head *sp_cache_lookup(sp_cache **cp, const Database_qualified_name *name);
+void sp_cache_invalidate();
+void sp_cache_flush_obsolete(sp_cache **cp, sp_head **sp);
+ulong sp_cache_version();
+void sp_cache_enforce_limit(sp_cache *cp, ulong upper_limit_for_elements);
+
+#endif /* _SP_CACHE_H_ */
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
new file mode 100644
index 00000000..5fd6ab52
--- /dev/null
+++ b/sql/sp_head.cc
@@ -0,0 +1,5607 @@
+/*
+   Copyright (c) 2002, 2016, Oracle and/or its affiliates.
+   Copyright (c) 2011, 2020, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"                          /* NO_EMBEDDED_ACCESS_CHECKS */
+#include "sql_priv.h"
+#include "unireg.h"
+#include "sql_prepare.h"
+#include "sql_cache.h"                          // query_cache_*
+#include "probes_mysql.h"
+#include "sql_show.h"                           // append_identifier
+#include "sql_db.h"            // mysql_opt_change_db, mysql_change_db
+#include "sql_array.h"         // Dynamic_array
+#include "log_event.h"         // Query_log_event
+#include "sql_derived.h"       // mysql_handle_derived
+#include "sql_cte.h"
+#include "sql_select.h"        // Virtual_tmp_table
+#include "opt_trace.h"
+#include "my_json_writer.h"
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation
+#endif
+#include "sp_head.h"
+#include "sp.h"
+#include "sp_pcontext.h"
+#include "sp_rcontext.h"
+#include "sp_cache.h"
+#include "set_var.h"
+#include "sql_parse.h"                          // cleanup_items
+#include "sql_base.h"                           // close_thread_tables
+#include "transaction.h"       // trans_commit_stmt
+#include "sql_audit.h"
+#include "debug_sync.h"
+#ifdef WITH_WSREP
+#include "wsrep.h"
+#include "wsrep_trans_observer.h"
+#endif /* WITH_WSREP */
+
+/*
+  Sufficient max length of printed destinations and frame offsets (all uints).
+*/
+#define SP_INSTR_UINT_MAXLEN  8
+#define SP_STMT_PRINT_MAXLEN 40
+
+#include 
+#include "mysql/psi/mysql_statement.h"
+#include "mysql/psi/mysql_sp.h"
+
+#ifdef HAVE_PSI_INTERFACE
+void init_sp_psi_keys()
+{
+  const char *category= "sp";
+  const int num __attribute__((unused)) = __LINE__ + 3;
+
+  PSI_server->register_statement(category, & sp_instr_stmt::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_set::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_set_trigger_field::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_jump::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_jump_if_not::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_freturn::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_preturn::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_hpush_jump::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_hpop::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_hreturn::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_cpush::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_cpop::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_copen::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_cclose::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_cfetch::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_agg_cfetch::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_cursor_copy_struct::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_error::psi_info, 1);
+  PSI_server->register_statement(category, & sp_instr_set_case_expr::psi_info, 1);
+
+  DBUG_ASSERT(SP_PSI_STATEMENT_INFO_COUNT == __LINE__ - num);
+}
+#endif
+
+#ifdef HAVE_PSI_SP_INTERFACE
+#define MYSQL_RUN_SP(SP,CODE)                                           \
+  do {                                                                  \
+       PSI_sp_locker_state psi_state;                                   \
+       PSI_sp_locker *locker= MYSQL_START_SP(&psi_state, (SP)->m_sp_share); \
+       CODE;                                                            \
+       MYSQL_END_SP(locker);                                            \
+  } while(0)
+#else
+#define MYSQL_RUN_SP(SP, CODE) do { CODE; } while(0)
+#endif
+
+extern "C" uchar *sp_table_key(const uchar *ptr, size_t *plen, my_bool first);
+
+/**
+  Helper function which operates on a THD object to set the query start_time to
+  the current time.
+
+  @param[in, out] thd The session object
+
+*/
+
+static void reset_start_time_for_sp(THD *thd)
+{
+  if (!thd->in_sub_stmt)
+    thd->set_start_time();
+}
+
+
+bool Item_splocal::append_for_log(THD *thd, String *str)
+{
+  if (fix_fields_if_needed(thd, NULL))
+    return true;
+
+  if (limit_clause_param)
+    return str->append_ulonglong(val_uint());
+
+  /*
+    ROW variables are currently not allowed in select_list, e.g.:
+      SELECT row_variable;
+    ROW variables can appear in query parts where name is not important, e.g.:
+      SELECT ROW(1,2)=row_variable FROM t1;
+    So we can skip using NAME_CONST() and use ROW() constants directly.
+  */
+  if (type_handler() == &type_handler_row)
+    return append_value_for_log(thd, str);
+
+  if (str->append(STRING_WITH_LEN(" NAME_CONST('")) ||
+      str->append(&m_name) ||
+      str->append(STRING_WITH_LEN("',")))
+    return true;
+  return append_value_for_log(thd, str) || str->append(')');
+}
+
+
+bool Item_splocal::append_value_for_log(THD *thd, String *str)
+{
+  StringBuffer str_value_holder(&my_charset_latin1);
+  Item *item= this_item();
+  String *str_value= item->type_handler()->print_item_value(thd, item,
+                                                            &str_value_holder);
+  return (str_value ?
+          str->append(*str_value) :
+          str->append(NULL_clex_str));
+}
+
+
+bool Item_splocal_row_field::append_for_log(THD *thd, String *str)
+{
+  if (fix_fields_if_needed(thd, NULL))
+    return true;
+
+  if (limit_clause_param)
+    return str->append_ulonglong(val_uint());
+
+  if (str->append(STRING_WITH_LEN(" NAME_CONST('")) ||
+      str->append(&m_name) ||
+      str->append('.') ||
+      str->append(&m_field_name) ||
+      str->append(STRING_WITH_LEN("',")))
+    return true;
+  return append_value_for_log(thd, str) || str->append(')');
+}
+
+
+/**
+   Returns a combination of:
+   - sp_head::MULTI_RESULTS: added if the 'cmd' is a command that might
+     result in multiple result sets being sent back.
+   - sp_head::CONTAINS_DYNAMIC_SQL: added if 'cmd' is one of PREPARE,
+     EXECUTE, DEALLOCATE.
+*/
+
+uint
+sp_get_flags_for_command(LEX *lex)
+{
+  uint flags;
+
+  switch (lex->sql_command) {
+  case SQLCOM_SELECT:
+    if (lex->result && !lex->analyze_stmt)
+    {
+      flags= 0;                      /* This is a SELECT with INTO clause */
+      break;
+    }
+    /* fallthrough */
+  case SQLCOM_ANALYZE:
+  case SQLCOM_OPTIMIZE:
+  case SQLCOM_PRELOAD_KEYS:
+  case SQLCOM_ASSIGN_TO_KEYCACHE:
+  case SQLCOM_CHECKSUM:
+  case SQLCOM_CHECK:
+  case SQLCOM_HA_READ:
+  case SQLCOM_SHOW_AUTHORS:
+  case SQLCOM_SHOW_BINLOGS:
+  case SQLCOM_SHOW_BINLOG_EVENTS:
+  case SQLCOM_SHOW_RELAYLOG_EVENTS:
+  case SQLCOM_SHOW_CHARSETS:
+  case SQLCOM_SHOW_COLLATIONS:
+  case SQLCOM_SHOW_CONTRIBUTORS:
+  case SQLCOM_SHOW_CREATE:
+  case SQLCOM_SHOW_CREATE_DB:
+  case SQLCOM_SHOW_CREATE_FUNC:
+  case SQLCOM_SHOW_CREATE_PROC:
+  case SQLCOM_SHOW_CREATE_PACKAGE:
+  case SQLCOM_SHOW_CREATE_PACKAGE_BODY:
+  case SQLCOM_SHOW_CREATE_EVENT:
+  case SQLCOM_SHOW_CREATE_TRIGGER:
+  case SQLCOM_SHOW_CREATE_USER:
+  case SQLCOM_SHOW_DATABASES:
+  case SQLCOM_SHOW_ERRORS:
+  case SQLCOM_SHOW_EXPLAIN:
+  case SQLCOM_SHOW_ANALYZE:
+  case SQLCOM_SHOW_FIELDS:
+  case SQLCOM_SHOW_FUNC_CODE:
+  case SQLCOM_SHOW_GENERIC:
+  case SQLCOM_SHOW_GRANTS:
+  case SQLCOM_SHOW_ENGINE_STATUS:
+  case SQLCOM_SHOW_ENGINE_LOGS:
+  case SQLCOM_SHOW_ENGINE_MUTEX:
+  case SQLCOM_SHOW_EVENTS:
+  case SQLCOM_SHOW_KEYS:
+  case SQLCOM_SHOW_BINLOG_STAT:
+  case SQLCOM_SHOW_OPEN_TABLES:
+  case SQLCOM_SHOW_PRIVILEGES:
+  case SQLCOM_SHOW_PROCESSLIST:
+  case SQLCOM_SHOW_PROC_CODE:
+  case SQLCOM_SHOW_PACKAGE_BODY_CODE:
+  case SQLCOM_SHOW_SLAVE_HOSTS:
+  case SQLCOM_SHOW_SLAVE_STAT:
+  case SQLCOM_SHOW_STATUS:
+  case SQLCOM_SHOW_STATUS_FUNC:
+  case SQLCOM_SHOW_STATUS_PROC:
+  case SQLCOM_SHOW_STATUS_PACKAGE:
+  case SQLCOM_SHOW_STATUS_PACKAGE_BODY:
+  case SQLCOM_SHOW_STORAGE_ENGINES:
+  case SQLCOM_SHOW_TABLES:
+  case SQLCOM_SHOW_TABLE_STATUS:
+  case SQLCOM_SHOW_VARIABLES:
+  case SQLCOM_SHOW_WARNS:
+  case SQLCOM_REPAIR:
+    flags= sp_head::MULTI_RESULTS;
+    break;
+  /*
+    EXECUTE statement may return a result set, but doesn't have to.
+    We can't, however, know it in advance, and therefore must add
+    this statement here. This is ok, as is equivalent to a result-set
+    statement within an IF condition.
+  */
+  case SQLCOM_EXECUTE:
+  case SQLCOM_EXECUTE_IMMEDIATE:
+    flags= sp_head::MULTI_RESULTS | sp_head::CONTAINS_DYNAMIC_SQL;
+    break;
+  case SQLCOM_PREPARE:
+  case SQLCOM_DEALLOCATE_PREPARE:
+    flags= sp_head::CONTAINS_DYNAMIC_SQL;
+    break;
+  case SQLCOM_CREATE_TABLE:
+  case SQLCOM_CREATE_SEQUENCE:
+    if (lex->tmp_table())
+      flags= 0;
+    else
+      flags= sp_head::HAS_COMMIT_OR_ROLLBACK;
+    break;
+  case SQLCOM_DROP_TABLE:
+  case SQLCOM_DROP_SEQUENCE:
+    if (lex->tmp_table())
+      flags= 0;
+    else
+      flags= sp_head::HAS_COMMIT_OR_ROLLBACK;
+    break;
+  case SQLCOM_FLUSH:
+    flags= sp_head::HAS_SQLCOM_FLUSH;
+    break;
+  case SQLCOM_RESET:
+    flags= sp_head::HAS_SQLCOM_RESET;
+    break;
+  case SQLCOM_CREATE_INDEX:
+  case SQLCOM_CREATE_DB:
+  case SQLCOM_CREATE_PACKAGE:
+  case SQLCOM_CREATE_PACKAGE_BODY:
+  case SQLCOM_CREATE_VIEW:
+  case SQLCOM_CREATE_TRIGGER:
+  case SQLCOM_CREATE_USER:
+  case SQLCOM_CREATE_ROLE:
+  case SQLCOM_ALTER_TABLE:
+  case SQLCOM_ALTER_SEQUENCE:
+  case SQLCOM_ALTER_USER:
+  case SQLCOM_GRANT:
+  case SQLCOM_GRANT_ROLE:
+  case SQLCOM_REVOKE:
+  case SQLCOM_REVOKE_ROLE:
+  case SQLCOM_BEGIN:
+  case SQLCOM_RENAME_TABLE:
+  case SQLCOM_RENAME_USER:
+  case SQLCOM_DROP_INDEX:
+  case SQLCOM_DROP_DB:
+  case SQLCOM_DROP_PACKAGE:
+  case SQLCOM_DROP_PACKAGE_BODY:
+  case SQLCOM_REVOKE_ALL:
+  case SQLCOM_DROP_USER:
+  case SQLCOM_DROP_ROLE:
+  case SQLCOM_DROP_VIEW:
+  case SQLCOM_DROP_TRIGGER:
+  case SQLCOM_TRUNCATE:
+  case SQLCOM_COMMIT:
+  case SQLCOM_ROLLBACK:
+  case SQLCOM_LOAD:
+  case SQLCOM_LOCK_TABLES:
+  case SQLCOM_CREATE_PROCEDURE:
+  case SQLCOM_CREATE_SPFUNCTION:
+  case SQLCOM_ALTER_PROCEDURE:
+  case SQLCOM_ALTER_FUNCTION:
+  case SQLCOM_DROP_PROCEDURE:
+  case SQLCOM_DROP_FUNCTION:
+  case SQLCOM_CREATE_EVENT:
+  case SQLCOM_ALTER_EVENT:
+  case SQLCOM_DROP_EVENT:
+  case SQLCOM_INSTALL_PLUGIN:
+  case SQLCOM_UNINSTALL_PLUGIN:
+    flags= sp_head::HAS_COMMIT_OR_ROLLBACK;
+    break;
+  case SQLCOM_DELETE:
+  case SQLCOM_DELETE_MULTI:
+  case SQLCOM_INSERT:
+  case SQLCOM_REPLACE:
+  case SQLCOM_REPLACE_SELECT:
+  case SQLCOM_INSERT_SELECT:
+  {
+    /* 
+      DELETE normally doesn't return resultset, but there are 3 exceptions:
+       - DELETE ... RETURNING
+       - EXPLAIN DELETE ...
+       - ANALYZE DELETE ...
+    */
+    if (!lex->has_returning() && !lex->describe && !lex->analyze_stmt)
+      flags= 0;
+    else
+      flags= sp_head::MULTI_RESULTS; 
+    break;
+  }
+  case SQLCOM_UPDATE:
+  case SQLCOM_UPDATE_MULTI:
+  {
+    if (!lex->describe && !lex->analyze_stmt)
+      flags= 0;
+    else
+      flags= sp_head::MULTI_RESULTS; 
+    break;
+  }
+  default:
+    flags= 0;
+    break;
+  }
+  return flags;
+}
+
+/**
+  Prepare an Item for evaluation (call of fix_fields).
+
+  @param it_addr   pointer on item refernce
+  @param cols      expected number of elements (1 for scalar, >=1 for ROWs)
+
+  @retval
+    NULL      error
+  @retval
+    non-NULL  prepared item
+*/
+
+Item *THD::sp_prepare_func_item(Item **it_addr, uint cols)
+{
+  DBUG_ENTER("THD::sp_prepare_func_item");
+  Item *res= sp_fix_func_item(it_addr);
+  if (res && res->check_cols(cols))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Fix an Item for evaluation for SP.
+*/
+
+Item *THD::sp_fix_func_item(Item **it_addr)
+{
+  DBUG_ENTER("THD::sp_fix_func_item");
+  if ((*it_addr)->fix_fields_if_needed(this, it_addr))
+  {
+    DBUG_PRINT("info", ("fix_fields() failed"));
+    DBUG_RETURN(NULL);
+  }
+  it_addr= (*it_addr)->this_item_addr(this, it_addr);
+
+  if ((*it_addr)->fix_fields_if_needed(this, it_addr))
+  {
+    DBUG_PRINT("info", ("fix_fields() failed"));
+    DBUG_RETURN(NULL);
+  }
+  DBUG_RETURN(*it_addr);
+}
+
+
+/**
+  Prepare an Item for evaluation as an assignment source,
+  for assignment to the given target.
+
+  @param to        - the assignment target
+  @param it_addr   - a pointer on item refernce
+
+  @retval          -  NULL on error
+  @retval          -  a prepared item pointer on success
+*/
+Item *THD::sp_fix_func_item_for_assignment(const Field *to, Item **it_addr)
+{
+  DBUG_ENTER("THD::sp_fix_func_item_for_assignment");
+  Item *res= sp_fix_func_item(it_addr);
+  if (res && (!res->check_assignability_to(to, false)))
+    DBUG_RETURN(res);
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Evaluate an expression and store the result in the field.
+
+  @param result_field           the field to store the result
+  @param expr_item_ptr          the root item of the expression
+
+  @retval
+    FALSE  on success
+  @retval
+    TRUE   on error
+*/
+
+bool THD::sp_eval_expr(Field *result_field, Item **expr_item_ptr)
+{
+  DBUG_ENTER("THD::sp_eval_expr");
+  DBUG_ASSERT(*expr_item_ptr);
+  Sp_eval_expr_state state(this);
+  /* Save the value in the field. Convert the value if needed. */
+  DBUG_RETURN(result_field->sp_prepare_and_store_item(this, expr_item_ptr));
+}
+
+
+/**
+  Create temporary sp_name object from MDL key.
+
+  @note The lifetime of this object is bound to the lifetime of the MDL_key.
+        This should be fine as sp_name objects created by this constructor
+        are mainly used for SP-cache lookups.
+
+  @param key         MDL key containing database and routine name.
+  @param qname_buff  Buffer to be used for storing quoted routine name
+                     (should be at least 2*NAME_LEN+1+1 bytes).
+*/
+
+sp_name::sp_name(const MDL_key *key, char *qname_buff)
+ :Database_qualified_name(key->db_name(), key->db_name_length(),
+                          key->name(),  key->name_length()),
+  m_explicit_name(false)
+{
+  if (m_db.length)
+    strxmov(qname_buff, m_db.str, ".", m_name.str, NullS);
+  else
+    strmov(qname_buff, m_name.str);
+}
+
+
+/**
+  Check that the name 'ident' is ok.  It's assumed to be an 'ident'
+  from the parser, so we only have to check length and trailing spaces.
+  The former is a standard requirement (and 'show status' assumes a
+  non-empty name), the latter is a mysql:ism as trailing spaces are
+  removed by get_field().
+
+  @retval
+    TRUE    bad name
+  @retval
+    FALSE   name is ok
+*/
+
+bool
+check_routine_name(const LEX_CSTRING *ident)
+{
+  DBUG_ASSERT(ident);
+  DBUG_ASSERT(ident->str);
+
+  if (!ident->str[0] || ident->str[ident->length-1] == ' ')
+  {
+    my_error(ER_SP_WRONG_NAME, MYF(0), ident->str);
+    return TRUE;
+  }
+  if (check_ident_length(ident))
+    return TRUE;
+
+  return FALSE;
+}
+
+
+/*
+ *
+ *  sp_head
+ *
+ */
+ 
+sp_head *sp_head::create(sp_package *parent, const Sp_handler *handler,
+                         enum_sp_aggregate_type agg_type)
+{
+  MEM_ROOT own_root;
+  init_sql_alloc(key_memory_sp_head_main_root, &own_root, MEM_ROOT_BLOCK_SIZE,
+                 MEM_ROOT_PREALLOC, MYF(0));
+  sp_head *sp;
+  if (!(sp= new (&own_root) sp_head(&own_root, parent, handler, agg_type)))
+    free_root(&own_root, MYF(0));
+
+  return sp;
+}
+
+
+void sp_head::destroy(sp_head *sp)
+{
+  if (sp)
+  {
+    /* Make a copy of main_mem_root as free_root will free the sp */
+    MEM_ROOT own_root= sp->main_mem_root;
+    DBUG_PRINT("info", ("mem_root %p moved to %p",
+                        &sp->mem_root, &own_root));
+    delete sp;
+
+ 
+    free_root(&own_root, MYF(0));
+  }
+}
+
+/*
+ *
+ *  sp_head
+ *
+ */
+
+sp_head::sp_head(MEM_ROOT *mem_root_arg, sp_package *parent,
+                 const Sp_handler *sph, enum_sp_aggregate_type agg_type)
+  :Query_arena(NULL, STMT_INITIALIZED_FOR_SP),
+   Database_qualified_name(&null_clex_str, &null_clex_str),
+   main_mem_root(*mem_root_arg),
+#ifdef PROTECT_STATEMENT_MEMROOT
+   executed_counter(0),
+#endif
+   m_parent(parent),
+   m_handler(sph),
+   m_flags(0),
+   m_tmp_query(NULL),
+   m_explicit_name(false),
+   /*
+     FIXME: the only use case when name is NULL is events, and it should
+     be rewritten soon. Remove the else part and replace 'if' with
+     an assert when this is done.
+   */
+   m_qname(null_clex_str),
+   m_params(null_clex_str),
+   m_body(null_clex_str),
+   m_body_utf8(null_clex_str),
+   m_defstr(null_clex_str),
+   m_sp_cache_version(0),
+   m_creation_ctx(0),
+   unsafe_flags(0),
+   m_created(0),
+   m_modified(0),
+   m_recursion_level(0),
+   m_next_cached_sp(0),
+   m_param_begin(NULL),
+   m_param_end(NULL),
+   m_cpp_body_begin(NULL),
+   m_thd_root(NULL),
+   m_thd(NULL),
+   m_pcont(new (&main_mem_root) sp_pcontext()),
+   m_cont_level(0)
+{
+  mem_root= &main_mem_root;
+
+  set_chistics_agg_type(agg_type);
+  m_first_instance= this;
+  m_first_free_instance= this;
+  m_last_cached_sp= this;
+
+  m_return_field_def.charset = NULL;
+
+  DBUG_ENTER("sp_head::sp_head");
+
+  m_security_ctx.init();
+  m_backpatch.empty();
+  m_backpatch_goto.empty();
+  m_cont_backpatch.empty();
+  m_lex.empty();
+  my_init_dynamic_array(key_memory_sp_head_main_root, &m_instr,
+                        sizeof(sp_instr *), 16, 8, MYF(0));
+  my_hash_init(key_memory_sp_head_main_root, &m_sptabs, system_charset_info, 0,
+               0, 0, sp_table_key, 0, 0);
+  my_hash_init(key_memory_sp_head_main_root, &m_sroutines, system_charset_info,
+               0, 0, 0, sp_sroutine_key, 0, 0);
+
+  DBUG_VOID_RETURN;
+}
+
+
+sp_package *sp_package::create(LEX *top_level_lex, const sp_name *name,
+                               const Sp_handler *sph)
+{
+  MEM_ROOT own_root;
+  init_sql_alloc(key_memory_sp_head_main_root, &own_root, MEM_ROOT_BLOCK_SIZE,
+                 MEM_ROOT_PREALLOC, MYF(0));
+  sp_package *sp;
+  if (!(sp= new (&own_root) sp_package(&own_root, top_level_lex, name, sph)))
+    free_root(&own_root, MYF(0));
+
+  return sp;
+}
+
+
+sp_package::sp_package(MEM_ROOT *mem_root_arg,
+                       LEX *top_level_lex,
+                       const sp_name *name,
+                       const Sp_handler *sph)
+ :sp_head(mem_root_arg, NULL, sph, DEFAULT_AGGREGATE),
+  m_current_routine(NULL),
+  m_top_level_lex(top_level_lex),
+  m_rcontext(NULL),
+  m_invoked_subroutine_count(0),
+  m_is_instantiated(false),
+  m_is_cloning_routine(false)
+{
+  init_sp_name(name);
+}
+
+
+sp_package::~sp_package()
+{
+  m_routine_implementations.cleanup();
+  m_routine_declarations.cleanup();
+  m_body= null_clex_str;
+  if (m_current_routine)
+    sp_head::destroy(m_current_routine->sphead);
+  delete m_rcontext;
+}
+
+
+/*
+  Test if two routines have equal specifications
+*/
+
+bool sp_head::eq_routine_spec(const sp_head *sp) const
+{
+  // TODO: Add tests for equal return data types (in case of FUNCTION)
+  // TODO: Add tests for equal argument data types
+  return
+    m_handler->type() == sp->m_handler->type() &&
+    m_pcont->context_var_count() == sp->m_pcont->context_var_count();
+}
+
+
+bool sp_package::validate_after_parser(THD *thd)
+{
+  if (m_handler->type() != SP_TYPE_PACKAGE_BODY)
+    return false;
+  sp_head *sp= sp_cache_lookup(&thd->sp_package_spec_cache, this);
+  sp_package *spec= sp ? sp->get_package() : NULL;
+  DBUG_ASSERT(spec); // CREATE PACKAGE must already be cached
+  return validate_public_routines(thd, spec) ||
+         validate_private_routines(thd);
+}
+
+
+bool sp_package::validate_public_routines(THD *thd, sp_package *spec)
+{
+  /*
+    Check that all routines declared in CREATE PACKAGE
+    have implementations in CREATE PACKAGE BODY.
+  */
+  List_iterator it(spec->m_routine_declarations);
+  for (LEX *lex; (lex= it++); )
+  {
+    bool found= false;
+    DBUG_ASSERT(lex->sphead);
+    List_iterator it2(m_routine_implementations);
+    for (LEX *lex2; (lex2= it2++); )
+    {
+      DBUG_ASSERT(lex2->sphead);
+      if (Sp_handler::eq_routine_name(lex2->sphead->m_name,
+                                      lex->sphead->m_name) &&
+          lex2->sphead->eq_routine_spec(lex->sphead))
+      {
+        found= true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      my_error(ER_PACKAGE_ROUTINE_IN_SPEC_NOT_DEFINED_IN_BODY, MYF(0),
+               ErrConvDQName(lex->sphead).ptr());
+      return true;
+    }
+  }
+  return false;
+}
+
+
+bool sp_package::validate_private_routines(THD *thd)
+{
+  /*
+    Check that all forwad declarations in
+    CREATE PACKAGE BODY have implementations.
+  */
+  List_iterator it(m_routine_declarations);
+  for (LEX *lex; (lex= it++); )
+  {
+    bool found= false;
+    DBUG_ASSERT(lex->sphead);
+    List_iterator it2(m_routine_implementations);
+    for (LEX *lex2; (lex2= it2++); )
+    {
+      DBUG_ASSERT(lex2->sphead);
+      if (Sp_handler::eq_routine_name(lex2->sphead->m_name,
+                                      lex->sphead->m_name) &&
+          lex2->sphead->eq_routine_spec(lex->sphead))
+      {
+        found= true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      my_error(ER_PACKAGE_ROUTINE_FORWARD_DECLARATION_NOT_DEFINED, MYF(0),
+               ErrConvDQName(lex->sphead).ptr());
+      return true;
+    }
+  }
+  return false;
+}
+
+
+LEX *sp_package::LexList::find(const LEX_CSTRING &name,
+                               enum_sp_type type)
+{
+  List_iterator it(*this);
+  for (LEX *lex; (lex= it++); )
+  {
+    DBUG_ASSERT(lex->sphead);
+    const char *dot;
+    if (lex->sphead->m_handler->type() == type &&
+        (dot= strrchr(lex->sphead->m_name.str, '.')))
+    {
+      size_t ofs= dot + 1 - lex->sphead->m_name.str;
+      LEX_CSTRING non_qualified_sphead_name= lex->sphead->m_name;
+      non_qualified_sphead_name.str+= ofs;
+      non_qualified_sphead_name.length-= ofs;
+      if (Sp_handler::eq_routine_name(non_qualified_sphead_name, name))
+        return lex;
+    }
+  }
+  return NULL;
+}
+
+
+LEX *sp_package::LexList::find_qualified(const LEX_CSTRING &name,
+                                         enum_sp_type type)
+{
+  List_iterator it(*this);
+  for (LEX *lex; (lex= it++); )
+  {
+    DBUG_ASSERT(lex->sphead);
+    if (lex->sphead->m_handler->type() == type &&
+        Sp_handler::eq_routine_name(lex->sphead->m_name, name))
+      return lex;
+  }
+  return NULL;
+}
+
+
+void sp_package::init_psi_share()
+{
+  List_iterator it(m_routine_implementations);
+  for (LEX *lex; (lex= it++); )
+  {
+    DBUG_ASSERT(lex->sphead);
+    lex->sphead->init_psi_share();
+  }
+  sp_head::init_psi_share();
+}
+
+void
+sp_head::init(LEX *lex)
+{
+  DBUG_ENTER("sp_head::init");
+
+  lex->spcont= m_pcont;
+
+  if (!lex->spcont)
+    DBUG_VOID_RETURN;
+
+  /*
+    Altough trg_table_fields list is used only in triggers we init for all
+    types of stored procedures to simplify reset_lex()/restore_lex() code.
+  */
+  lex->trg_table_fields.empty();
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+  executed_counter= 0;
+#endif
+
+  DBUG_VOID_RETURN;
+}
+
+
+void
+sp_head::init_sp_name(const sp_name *spname)
+{
+  DBUG_ENTER("sp_head::init_sp_name");
+
+  /* Must be initialized in the parser. */
+
+  DBUG_ASSERT(spname && spname->m_db.str && spname->m_db.length);
+
+  /* We have to copy strings to get them into the right memroot. */
+  Database_qualified_name::copy(&main_mem_root, spname->m_db, spname->m_name);
+  m_explicit_name= spname->m_explicit_name;
+  DBUG_VOID_RETURN;
+}
+
+void
+sp_head::init_psi_share()
+{
+  m_sp_share= MYSQL_GET_SP_SHARE(m_handler->type(), m_db.str, static_cast(m_db.length),
+                                 m_name.str, static_cast(m_name.length));
+}
+
+
+void
+sp_head::set_body_start(THD *thd, const char *cpp_body_start)
+{
+  m_cpp_body_begin= cpp_body_start;
+  if (!m_parent)
+    thd->m_parser_state->m_lip.body_utf8_start(thd, cpp_body_start);
+}
+
+
+void
+sp_head::set_stmt_end(THD *thd, const char *cpp_body_end)
+{
+  Lex_input_stream *lip= & thd->m_parser_state->m_lip; /* shortcut */
+
+  /* Make the string of parameters. */
+
+  if (m_param_begin && m_param_end)
+  {
+    m_params.length= m_param_end - m_param_begin;
+    m_params.str= thd->strmake(m_param_begin, m_params.length);
+  }
+
+  /* Remember end pointer for further dumping of whole statement. */
+
+  thd->lex->stmt_definition_end= cpp_body_end;
+
+  /* Make the string of body (in the original character set). */
+
+  m_body= thd->strmake_lex_cstring_trim_whitespace(
+                 Lex_cstring(m_cpp_body_begin, cpp_body_end));
+
+  /* Make the string of UTF-body. */
+
+  lip->body_utf8_append(cpp_body_end);
+
+  if (!m_parent)
+    m_body_utf8= thd->strmake_lex_cstring_trim_whitespace(lip->body_utf8());
+
+  /*
+    Make the string of whole stored-program-definition query (in the
+    original character set).
+  */
+
+  m_defstr= thd->strmake_lex_cstring_trim_whitespace(
+                   Lex_cstring(lip->get_cpp_buf(), cpp_body_end));
+}
+
+
+sp_head::~sp_head()
+{
+  LEX *lex;
+  sp_instr *i;
+  DBUG_ENTER("sp_head::~sp_head");
+
+  /* sp_head::restore_thd_mem_root() must already have been called. */
+  DBUG_ASSERT(m_thd == NULL);
+
+  for (uint ip = 0 ; (i = get_instr(ip)) ; ip++)
+    delete i;
+  delete_dynamic(&m_instr);
+  delete m_pcont;
+  free_items();
+
+  /*
+    If we have non-empty LEX stack then we just came out of parser with
+    error. Now we should delete all auxilary LEXes and restore original
+    THD::lex. It is safe to not update LEX::ptr because further query
+    string parsing and execution will be stopped anyway.
+  */
+  while ((lex= (LEX *)m_lex.pop()))
+  {
+    THD *thd= lex->thd;
+    thd->lex->sphead= NULL;
+    lex_end(thd->lex);
+    delete thd->lex;
+    thd->lex= lex;
+  }
+
+  my_hash_free(&m_sptabs);
+  my_hash_free(&m_sroutines);
+
+  sp_head::destroy(m_next_cached_sp);
+
+  DBUG_VOID_RETURN;
+}
+
+
+void sp_package::LexList::cleanup()
+{
+  List_iterator it(*this);
+  for (LEX *lex; (lex= it++); )
+  {
+    lex_end(lex);
+    delete lex;
+  }
+}
+
+
+/**
+  This is only used for result fields from functions (both during
+  fix_length_and_dec() and evaluation).
+*/
+
+Field *
+sp_head::create_result_field(uint field_max_length, const LEX_CSTRING *field_name,
+                             TABLE *table) const
+{
+  Field *field;
+  LEX_CSTRING name;
+
+  DBUG_ENTER("sp_head::create_result_field");
+
+  /*
+    m_return_field_def.length is always set to the field length calculated
+    by the parser, according to the RETURNS clause. See prepare_create_field()
+    in sql_table.cc. Value examples, depending on data type:
+    - 11 for INT                          (character representation length)
+    - 20 for BIGINT                       (character representation length)
+    - 22 for DOUBLE                       (character representation length)
+    - N for CHAR(N) CHARACTER SET latin1  (octet length)
+    - 3*N for CHAR(N) CHARACTER SET utf8  (octet length)
+    - 8 for blob-alike data types         (packed length !!!)
+
+    field_max_length is also set according to the data type in the RETURNS
+    clause but can have different values depending on the execution stage:
+
+    1. During direct execution:
+    field_max_length is 0, because Item_func_sp::fix_length_and_dec() has
+    not been called yet, so Item_func_sp::max_length is 0 by default.
+
+    2a. During PREPARE:
+    field_max_length is 0, because Item_func_sp::fix_length_and_dec()
+    has not been called yet. It's called after create_result_field().
+
+    2b. During EXEC:
+    field_max_length is set to the maximum possible octet length of the
+    RETURNS data type.
+    - N for CHAR(N) CHARACTER SET latin1  (octet length)
+    - 3*N for CHAR(N) CHARACTER SET utf8  (octet length)
+    - 255 for TINYBLOB                    (octet length, not packed length !!!)
+
+    Perhaps we should refactor prepare_create_field() to set
+    Create_field::length to maximum octet length for BLOBs,
+    instead of packed length).
+
+    Note, for integer data types, field_max_length can be bigger
+    than the user specified length, e.g. a field of the INT(1) data type
+    is translated to the item with max_length=11.
+  */
+  DBUG_ASSERT(field_max_length <= m_return_field_def.length ||
+              m_return_field_def.type_handler()->cmp_type() == INT_RESULT ||
+              (current_thd->stmt_arena->is_stmt_execute() &&
+               m_return_field_def.length == 8 &&
+               (m_return_field_def.pack_flag &
+                (FIELDFLAG_BLOB|FIELDFLAG_GEOM))));
+
+  if (field_name)
+    name= *field_name;
+  else
+    name= m_name;
+  field= m_return_field_def.make_field(table->s, /* TABLE_SHARE ptr */
+                                       table->in_use->mem_root,
+                                       &name);
+
+  field->vcol_info= m_return_field_def.vcol_info;
+  if (field)
+    field->init(table);
+
+  DBUG_RETURN(field);
+}
+
+
+int cmp_rqp_locations(Rewritable_query_parameter * const *a,
+                      Rewritable_query_parameter * const *b)
+{
+  return (int)((*a)->pos_in_query - (*b)->pos_in_query);
+}
+
+
+/*
+  StoredRoutinesBinlogging
+  This paragraph applies only to statement-based binlogging. Row-based
+  binlogging does not need anything special like this.
+
+  Top-down overview:
+
+  1. Statements
+
+  Statements that have is_update_query(stmt) == TRUE are written into the
+  binary log verbatim.
+  Examples:
+    UPDATE tbl SET tbl.x = spfunc_w_side_effects()
+    UPDATE tbl SET tbl.x=1 WHERE spfunc_w_side_effect_that_returns_false(tbl.y)
+
+  Statements that have is_update_query(stmt) == FALSE (e.g. SELECTs) are not
+  written into binary log. Instead we catch function calls the statement
+  makes and write it into binary log separately (see #3).
+
+  2. PROCEDURE calls
+
+  CALL statements are not written into binary log. Instead
+  * Any FUNCTION invocation (in SET, IF, WHILE, OPEN CURSOR and other SP
+    instructions) is written into binlog separately.
+
+  * Each statement executed in SP is binlogged separately, according to rules
+    in #1, with the exception that we modify query string: we replace uses
+    of SP local variables with NAME_CONST('spvar_name', ) calls.
+    This substitution is done in subst_spvars().
+
+  3. FUNCTION calls
+
+  In sp_head::execute_function(), we check
+   * If this function invocation is done from a statement that is written
+     into the binary log.
+   * If there were any attempts to write events to the binary log during
+     function execution (grep for start_union_events and stop_union_events)
+
+   If the answers are No and Yes, we write the function call into the binary
+   log as "SELECT spfunc(, , ...)"
+
+
+  4. Miscellaneous issues.
+
+  4.1 User variables.
+
+  When we call mysql_bin_log.write() for an SP statement, thd->user_var_events
+  must hold set<{var_name, value}> pairs for all user variables used during
+  the statement execution.
+  This set is produced by tracking user variable reads during statement
+  execution.
+
+  For SPs, this has the following implications:
+  1) thd->user_var_events may contain events from several SP statements and
+     needs to be valid after exection of these statements was finished. In
+     order to achieve that, we
+     * Allocate user_var_events array elements on appropriate mem_root (grep
+       for user_var_events_alloc).
+     * Use is_query_in_union() to determine if user_var_event is created.
+
+  2) We need to empty thd->user_var_events after we have wrote a function
+     call. This is currently done by making
+     reset_dynamic(&thd->user_var_events);
+     calls in several different places. (TODO cosider moving this into
+     mysql_bin_log.write() function)
+
+  4.2 Auto_increment storage in binlog
+
+  As we may write two statements to binlog from one single logical statement
+  (case of "SELECT func1(),func2()": it is binlogged as "SELECT func1()" and
+  then "SELECT func2()"), we need to reset auto_increment binlog variables
+  after each binlogged SELECT. Otherwise, the auto_increment value of the
+  first SELECT would be used for the second too.
+*/
+
+
+/**
+  Replace thd->query{_length} with a string that one can write to
+  the binlog.
+
+  The binlog-suitable string is produced by replacing references to SP local
+  variables with NAME_CONST('sp_var_name', value) calls.
+
+  @param thd        Current thread.
+  @param instr      Instruction (we look for Item_splocal instances in
+                    instr->free_list)
+  @param query_str  Original query string
+
+  @return
+    - FALSE  on success.
+    thd->query{_length} either has been appropriately replaced or there
+    is no need for replacements.
+    - TRUE   out of memory error.
+*/
+
+static bool
+subst_spvars(THD *thd, sp_instr *instr, LEX_STRING *query_str)
+{
+  DBUG_ENTER("subst_spvars");
+
+  Dynamic_array rewritables(PSI_INSTRUMENT_MEM);
+  char *pbuf;
+  StringBuffer<512> qbuf;
+  Copy_query_with_rewrite acc(thd, query_str->str, query_str->length, &qbuf);
+
+  /* Find rewritable Items used in this statement */
+  for (Item *item= instr->free_list; item; item= item->next)
+  {
+    Rewritable_query_parameter *rqp= item->get_rewritable_query_parameter();
+    if (rqp && rqp->pos_in_query)
+      rewritables.append(rqp);
+  }
+  if (!rewritables.elements())
+    DBUG_RETURN(FALSE);
+
+  rewritables.sort(cmp_rqp_locations);
+
+  thd->query_name_consts= (uint)rewritables.elements();
+
+  for (Rewritable_query_parameter **rqp= rewritables.front();
+       rqp <= rewritables.back(); rqp++)
+  {
+    if (acc.append(*rqp))
+      DBUG_RETURN(TRUE);
+  }
+  if (acc.finalize())
+    DBUG_RETURN(TRUE);
+
+  /*
+    Allocate additional space at the end of the new query string for the
+    query_cache_send_result_to_client function.
+
+    The query buffer layout is:
+       buffer :==
+               The input statement(s)
+            '\0'          Terminating null char
+                  Length of following current database name 2
+                 Name of current database
+                   Flags struct
+  */
+  size_t buf_len= (qbuf.length() + 1 + QUERY_CACHE_DB_LENGTH_SIZE +
+                thd->db.length + QUERY_CACHE_FLAGS_SIZE + 1);
+  if ((pbuf= (char *) alloc_root(thd->mem_root, buf_len)))
+  {
+    char *ptr= pbuf + qbuf.length();
+    memcpy(pbuf, qbuf.ptr(), qbuf.length());
+    *ptr= 0;
+    int2store(ptr+1, thd->db.length);
+  }
+  else
+    DBUG_RETURN(TRUE);
+
+  thd->set_query(pbuf, qbuf.length());
+
+  DBUG_RETURN(FALSE);
+}
+
+
+void Sp_handler_procedure::recursion_level_error(THD *thd,
+                                                 const sp_head *sp) const
+{
+  my_error(ER_SP_RECURSION_LIMIT, MYF(0),
+           static_cast(thd->variables.max_sp_recursion_depth),
+           sp->m_name.str);
+}
+
+
+/**
+  Execute the routine. The main instruction jump loop is there.
+  Assume the parameters already set.
+
+  @param thd                  Thread context.
+  @param merge_da_on_success  Flag specifying if Warning Info should be
+                              propagated to the caller on Completion
+                              Condition or not.
+
+  @todo
+    - Will write this SP statement into binlog separately
+    (TODO: consider changing the condition to "not inside event union")
+
+  @return Error status.
+  @retval
+    FALSE  on success
+  @retval
+    TRUE   on error
+*/
+
+bool
+sp_head::execute(THD *thd, bool merge_da_on_success)
+{
+  DBUG_ENTER("sp_head::execute");
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
+  LEX_STRING saved_cur_db_name=
+    { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
+  bool cur_db_changed= FALSE;
+  sp_rcontext *ctx= thd->spcont;
+  bool err_status= FALSE;
+  uint ip= 0;
+  sql_mode_t save_sql_mode;
+
+  // TODO(cvicentiu) See if you can drop this bit. This is used to resume
+  // execution from where we left off.
+  if (m_chistics.agg_type == GROUP_AGGREGATE)
+    ip= thd->spcont->instr_ptr;
+
+  bool save_abort_on_warning;
+  Query_arena *old_arena;
+  /* per-instruction arena */
+  MEM_ROOT execute_mem_root;
+  Query_arena execute_arena(&execute_mem_root, STMT_INITIALIZED_FOR_SP),
+              backup_arena;
+  query_id_t old_query_id;
+  CSET_STRING old_query;
+  TABLE *old_derived_tables;
+  TABLE *old_rec_tables;
+  LEX *old_lex;
+  Item_change_list old_change_list;
+  String old_packet;
+  uint old_server_status;
+  const uint status_backup_mask= SERVER_STATUS_CURSOR_EXISTS |
+                                 SERVER_STATUS_LAST_ROW_SENT;
+  MEM_ROOT *user_var_events_alloc_saved= 0;
+  Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer;
+  Object_creation_ctx *UNINIT_VAR(saved_creation_ctx);
+  Diagnostics_area *da= thd->get_stmt_da();
+  Warning_info sp_wi(da->warning_info_id(), false, true);
+
+  /* this 7*STACK_MIN_SIZE is a complex matter with a long history (see it!) */
+  if (check_stack_overrun(thd, 7 * STACK_MIN_SIZE, (uchar*)&old_packet))
+    DBUG_RETURN(TRUE);
+
+  opt_trace_disable_if_no_security_context_access(thd);
+
+  /* init per-instruction memroot */
+  init_sql_alloc(key_memory_sp_head_execute_root, &execute_mem_root,
+                 MEM_ROOT_BLOCK_SIZE, 0, MYF(0));
+
+  DBUG_ASSERT(!(m_flags & IS_INVOKED));
+  m_flags|= IS_INVOKED;
+  if (m_parent)
+    m_parent->m_invoked_subroutine_count++;
+  m_first_instance->m_first_free_instance= m_next_cached_sp;
+  if (m_next_cached_sp)
+  {
+    DBUG_PRINT("info",
+               ("first free for %p ++: %p->%p  level: %lu  flags %x",
+               m_first_instance, this,
+                m_next_cached_sp,
+                m_next_cached_sp->m_recursion_level,
+                m_next_cached_sp->m_flags));
+  }
+  /*
+    Check that if there are not any instances after this one then
+    pointer to the last instance points on this instance or if there are
+    some instances after this one then recursion level of next instance
+    greater then recursion level of current instance on 1
+  */
+  DBUG_ASSERT((m_next_cached_sp == 0 &&
+               m_first_instance->m_last_cached_sp == this) ||
+              (m_recursion_level + 1 == m_next_cached_sp->m_recursion_level));
+
+  /*
+    NOTE: The SQL Standard does not specify the context that should be
+    preserved for stored routines. However, at SAP/Walldorf meeting it was
+    decided that current database should be preserved.
+  */
+
+  if (m_db.length &&
+      (err_status= mysql_opt_change_db(thd, &m_db, &saved_cur_db_name, FALSE,
+                                       &cur_db_changed)))
+  {
+    goto done;
+  }
+
+  thd->is_slave_error= 0;
+  old_arena= thd->stmt_arena;
+
+  /* Push a new warning information area. */
+  da->copy_sql_conditions_to_wi(thd, &sp_wi);
+  da->push_warning_info(&sp_wi);
+
+  /*
+    Switch query context. This has to be done early as this is sometimes
+    allocated on THD::mem_root
+  */
+  if (m_creation_ctx)
+    saved_creation_ctx= m_creation_ctx->set_n_backup(thd);
+
+  /*
+    We have to save/restore this info when we are changing call level to
+    be able properly do close_thread_tables() in instructions.
+  */
+  old_query_id= thd->query_id;
+  old_query= thd->query_string;
+  old_derived_tables= thd->derived_tables;
+  thd->derived_tables= 0;
+  old_rec_tables= thd->rec_tables;
+  thd->rec_tables= 0;
+  save_sql_mode= thd->variables.sql_mode;
+  thd->variables.sql_mode= m_sql_mode;
+  save_abort_on_warning= thd->abort_on_warning;
+  thd->abort_on_warning= 0;
+  /**
+    When inside a substatement (a stored function or trigger
+    statement), clear the metadata observer in THD, if any.
+    Remember the value of the observer here, to be able
+    to restore it when leaving the substatement.
+
+    We reset the observer to suppress errors when a substatement
+    uses temporary tables. If a temporary table does not exist
+    at start of the main statement, it's not prelocked
+    and thus is not validated with other prelocked tables.
+
+    Later on, when the temporary table is opened, metadata
+    versions mismatch, expectedly.
+
+    The proper solution for the problem is to re-validate tables
+    of substatements (Bug#12257, Bug#27011, Bug#32868, Bug#33000),
+    but it's not implemented yet.
+  */
+  thd->m_reprepare_observer= 0;
+
+  /*
+    It is also more efficient to save/restore current thd->lex once when
+    do it in each instruction
+  */
+  old_lex= thd->lex;
+  /*
+    We should also save Item tree change list to avoid rollback something
+    too early in the calling query.
+  */
+  thd->Item_change_list::move_elements_to(&old_change_list);
+  /*
+    Cursors will use thd->packet, so they may corrupt data which was prepared
+    for sending by upper level. OTOH cursors in the same routine can share this
+    buffer safely so let use routine-local packet instead of having own
+    packet buffer for each cursor.
+
+    It is probably safe to use same thd->convert_buff everywhere.
+  */
+  old_packet.swap(thd->packet);
+  old_server_status= thd->server_status & status_backup_mask;
+
+  /*
+    Switch to per-instruction arena here. We can do it since we cleanup
+    arena after every instruction.
+  */
+  thd->set_n_backup_active_arena(&execute_arena, &backup_arena);
+
+  /*
+    Save callers arena in order to store instruction results and out
+    parameters in it later during sp_eval_func_item()
+  */
+  thd->spcont->callers_arena= &backup_arena;
+
+#if defined(ENABLED_PROFILING)
+  /* Discard the initial part of executing routines. */
+  thd->profiling.discard_current_query();
+#endif
+  sp_instr *i;
+  DEBUG_SYNC(thd, "sp_head_execute_before_loop");
+  do
+  {
+#if defined(ENABLED_PROFILING)
+    /*
+     Treat each "instr" of a routine as discrete unit that could be profiled.
+     Profiling only records information for segments of code that set the
+     source of the query, and almost all kinds of instructions in s-p do not.
+    */
+    thd->profiling.finish_current_query();
+    thd->profiling.start_new_query("continuing inside routine");
+#endif
+
+    /* get_instr returns NULL when we're done. */
+    i = get_instr(ip);
+    if (i == NULL)
+    {
+#if defined(ENABLED_PROFILING)
+      thd->profiling.discard_current_query();
+#endif
+      thd->spcont->quit_func= TRUE;
+      break;
+    }
+
+    /* Reset number of warnings for this query. */
+    thd->get_stmt_da()->reset_for_next_command();
+
+    DBUG_PRINT("execute", ("Instruction %u", ip));
+
+    /*
+      We need to reset start_time to allow for time to flow inside a stored
+      procedure. This is only done for SP since time is suppose to be constant
+      during execution of triggers and functions.
+    */
+    reset_start_time_for_sp(thd);
+
+    /*
+      We have to set thd->stmt_arena before executing the instruction
+      to store in the instruction free_list all new items, created
+      during the first execution (for example expanding of '*' or the
+      items made during other permanent subquery transformations).
+    */
+    thd->stmt_arena= i;
+
+    /*
+      Will write this SP statement into binlog separately.
+      TODO: consider changing the condition to "not inside event union".
+    */
+    if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
+    {
+      user_var_events_alloc_saved= thd->user_var_events_alloc;
+      thd->user_var_events_alloc= thd->mem_root;
+    }
+
+    sql_digest_state *parent_digest= thd->m_digest;
+    thd->m_digest= NULL;
+
+#ifdef WITH_WSREP
+    if (WSREP(thd) && thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID)
+    {
+      thd->set_wsrep_next_trx_id(thd->query_id);
+      WSREP_DEBUG("assigned new next trx ID for SP,  trx id: %" PRIu64, thd->wsrep_next_trx_id());
+    }
+#endif /* WITH_WSREP */
+
+#ifdef HAVE_PSI_STATEMENT_INTERFACE
+    PSI_statement_locker_state state;
+    PSI_statement_locker *parent_locker;
+    PSI_statement_info *psi_info = i->get_psi_info();
+
+    parent_locker= thd->m_statement_psi;
+    thd->m_statement_psi= MYSQL_START_STATEMENT(& state, psi_info->m_key,
+      thd->db.str, thd->db.length, thd->charset(), m_sp_share);
+#endif
+
+    err_status= i->execute(thd, &ip);
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+    if (!err_status)
+      i->mark_as_run();
+#endif
+
+#ifdef HAVE_PSI_STATEMENT_INTERFACE
+    MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da());
+    thd->m_statement_psi= parent_locker;
+#endif
+
+#ifdef WITH_WSREP
+    if (WSREP(thd))
+    {
+      if (((thd->wsrep_trx().state() == wsrep::transaction::s_executing || thd->in_sub_stmt) &&
+           (thd->is_fatal_error || thd->killed)))
+      {
+        WSREP_DEBUG("SP abort err status %d in sub %d trx state %d",
+                    err_status, thd->in_sub_stmt, thd->wsrep_trx().state());
+        err_status= 1;
+        thd->is_fatal_error= 1;
+        /*
+          SP was killed, and it is not due to a wsrep conflict.
+          We skip after_command hook at this point because
+          otherwise it clears the error, and cleans up the
+          whole transaction. For now we just return and finish
+          our handling once we are back to mysql_parse.
+
+          Same applies to a SP execution, which was aborted due
+          to wsrep related conflict, but which is executing as sub statement.
+          SP in sub statement level should not commit not rollback,
+          we have to call for rollback is up-most SP level.
+        */
+        WSREP_DEBUG("Skipping after_command hook for killed SP");
+      }
+      else
+      {
+        const bool must_replay= wsrep_must_replay(thd);
+        if (must_replay)
+        {
+          WSREP_DEBUG("MUST_REPLAY set after SP, err_status %d trx state: %d",
+                      err_status, thd->wsrep_trx().state());
+        }
+
+        if (wsrep_thd_is_local(thd))
+          (void) wsrep_after_statement(thd);
+
+        /*
+          Reset the return code to zero if the transaction was
+          replayed successfully.
+        */
+        if (must_replay && !wsrep_current_error(thd))
+        {
+          err_status= 0;
+          thd->get_stmt_da()->reset_diagnostics_area();
+        }
+        /*
+          Final wsrep error status for statement is known only after
+          wsrep_after_statement() call. If the error is set, override
+          error in thd diagnostics area and reset wsrep client_state error
+          so that the error does not get propagated via client-server protocol.
+        */
+        if (wsrep_current_error(thd))
+        {
+          wsrep_override_error(thd, wsrep_current_error(thd),
+                               wsrep_current_error_status(thd));
+          thd->wsrep_cs().reset_error();
+          /* Reset also thd->killed if it has been set during BF abort. */
+          if (killed_mask_hard(thd->killed) == KILL_QUERY)
+            thd->killed= NOT_KILLED;
+          /* if failed transaction was not replayed, must return with error from here */
+          if (!must_replay) err_status = 1;
+        }
+      }
+    }
+#endif /* WITH_WSREP */
+    thd->m_digest= parent_digest;
+
+    if (i->free_list)
+      cleanup_items(i->free_list);
+
+    /*
+      If we've set thd->user_var_events_alloc to mem_root of this SP
+      statement, clean all the events allocated in it.
+    */
+    if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
+    {
+      reset_dynamic(&thd->user_var_events);
+      thd->user_var_events_alloc= user_var_events_alloc_saved;
+    }
+
+    /* we should cleanup free_list and memroot, used by instruction */
+    thd->cleanup_after_query();
+    free_root(&execute_mem_root, MYF(0));
+
+    /*
+      Find and process SQL handlers unless it is a fatal error (fatal
+      errors are not catchable by SQL handlers) or the connection has been
+      killed during execution.
+    */
+    if (likely(!thd->is_fatal_error) && likely(!thd->killed_errno()) &&
+        ctx->handle_sql_condition(thd, &ip, i))
+    {
+      err_status= FALSE;
+    }
+
+    /* Reset sp_rcontext::end_partial_result_set flag. */
+    ctx->end_partial_result_set= FALSE;
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+    if (thd->is_error())
+    {
+      // Don't count a call ended with an error as normal run
+      executed_counter= 0;
+      main_mem_root.read_only= 0;
+      reset_instrs_executed_counter();
+    }
+#endif
+
+  } while (!err_status && likely(!thd->killed) &&
+           likely(!thd->is_fatal_error) &&
+           !thd->spcont->pause_state);
+
+#if defined(ENABLED_PROFILING)
+  thd->profiling.finish_current_query();
+  thd->profiling.start_new_query("tail end of routine");
+#endif
+
+  /* Restore query context. */
+
+  if (m_creation_ctx)
+    m_creation_ctx->restore_env(thd, saved_creation_ctx);
+
+  /* Restore arena. */
+
+  thd->restore_active_arena(&execute_arena, &backup_arena);
+
+  /* Only pop cursors when we're done with group aggregate running. */
+  if (m_chistics.agg_type != GROUP_AGGREGATE ||
+      (m_chistics.agg_type == GROUP_AGGREGATE && thd->spcont->quit_func))
+    thd->spcont->pop_all_cursors(thd); // To avoid memory leaks after an error
+
+  /* Restore all saved */
+  if (m_chistics.agg_type == GROUP_AGGREGATE)
+    thd->spcont->instr_ptr= ip;
+  thd->server_status= (thd->server_status & ~status_backup_mask) | old_server_status;
+  old_packet.swap(thd->packet);
+  DBUG_ASSERT(thd->Item_change_list::is_empty());
+  old_change_list.move_elements_to(thd);
+  thd->lex= old_lex;
+  thd->set_query_id(old_query_id);
+  thd->set_query_inner(old_query);
+  DBUG_ASSERT(!thd->derived_tables);
+  thd->derived_tables= old_derived_tables;
+  thd->rec_tables= old_rec_tables;
+  thd->variables.sql_mode= save_sql_mode;
+  thd->abort_on_warning= save_abort_on_warning;
+  thd->m_reprepare_observer= save_reprepare_observer;
+
+  thd->stmt_arena= old_arena;
+  state= STMT_EXECUTED;
+
+  /*
+    Restore the caller's original warning information area:
+      - warnings generated during trigger execution should not be
+        propagated to the caller on success;
+      - if there was an exception during execution, warning info should be
+        propagated to the caller in any case.
+  */
+  da->pop_warning_info();
+
+  if (err_status || merge_da_on_success)
+  {
+    /*
+      If a routine body is empty or if a routine did not generate any warnings,
+      do not duplicate our own contents by appending the contents of the called
+      routine. We know that the called routine did not change its warning info.
+
+      On the other hand, if the routine body is not empty and some statement in
+      the routine generates a warning or uses tables, warning info is guaranteed
+      to have changed. In this case we know that the routine warning info
+      contains only new warnings, and thus we perform a copy.
+    */
+    if (da->warning_info_changed(&sp_wi))
+    {
+      /*
+        If the invocation of the routine was a standalone statement,
+        rather than a sub-statement, in other words, if it's a CALL
+        of a procedure, rather than invocation of a function or a
+        trigger, we need to clear the current contents of the caller's
+        warning info.
+
+        This is per MySQL rules: if a statement generates a warning,
+        warnings from the previous statement are flushed.  Normally
+        it's done in push_warning(). However, here we don't use
+        push_warning() to avoid invocation of condition handlers or
+        escalation of warnings to errors.
+      */
+      da->opt_clear_warning_info(thd->query_id);
+      da->copy_sql_conditions_from_wi(thd, &sp_wi);
+      da->remove_marked_sql_conditions();
+      if (i != NULL)
+        push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                            ER_SP_STACK_TRACE,
+                            ER_THD(thd, ER_SP_STACK_TRACE),
+                            i->m_lineno,
+                            m_qname.str != NULL ? m_qname.str :
+                                                  "anonymous block");
+    }
+  }
+
+ done:
+  DBUG_PRINT("info", ("err_status: %d  killed: %d  is_slave_error: %d  report_error: %d",
+                      err_status, thd->killed, thd->is_slave_error,
+                      thd->is_error()));
+
+  if (thd->killed)
+    err_status= TRUE;
+  /*
+    If the DB has changed, the pointer has changed too, but the
+    original thd->db will then have been freed
+  */
+  if (cur_db_changed && thd->killed != KILL_CONNECTION)
+  {
+    /*
+      Force switching back to the saved current database, because it may be
+      NULL. In this case, mysql_change_db() would generate an error.
+    */
+
+    err_status|= mysql_change_db(thd, (LEX_CSTRING*)&saved_cur_db_name, TRUE) != 0;
+  }
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+  if (!err_status)
+  {
+    if (!main_mem_root.read_only &&
+        has_all_instrs_executed())
+    {
+      main_mem_root.read_only= 1;
+    }
+    ++executed_counter;
+    DBUG_PRINT("info", ("execute counter: %lu", executed_counter));
+  }
+#endif
+
+  m_flags&= ~IS_INVOKED;
+  if (m_parent)
+    m_parent->m_invoked_subroutine_count--;
+  DBUG_PRINT("info",
+             ("first free for %p --: %p->%p, level: %lu, flags %x",
+              m_first_instance,
+              m_first_instance->m_first_free_instance,
+              this, m_recursion_level, m_flags));
+  /*
+    Check that we have one of following:
+
+    1) there are not free instances which means that this instance is last
+    in the list of instances (pointer to the last instance point on it and
+    ther are not other instances after this one in the list)
+
+    2) There are some free instances which mean that first free instance
+    should go just after this one and recursion level of that free instance
+    should be on 1 more then recursion level of this instance.
+  */
+  DBUG_ASSERT((m_first_instance->m_first_free_instance == 0 &&
+               this == m_first_instance->m_last_cached_sp &&
+               m_next_cached_sp == 0) ||
+              (m_first_instance->m_first_free_instance != 0 &&
+               m_first_instance->m_first_free_instance == m_next_cached_sp &&
+               m_first_instance->m_first_free_instance->m_recursion_level ==
+               m_recursion_level + 1));
+  m_first_instance->m_first_free_instance= this;
+
+  DBUG_RETURN(err_status);
+}
+
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+/**
+  set_routine_security_ctx() changes routine security context, and
+  checks if there is an EXECUTE privilege in new context.  If there is
+  no EXECUTE privilege, it changes the context back and returns a
+  error.
+
+  @param thd         thread handle
+  @param sp          stored routine to change the context for
+  @param save_ctx    pointer to an old security context
+
+  @todo
+    - Cache if the definer has the right to use the object on the
+    first usage and only reset the cache if someone does a GRANT
+    statement that 'may' affect this.
+
+  @retval
+    TRUE   if there was a error, and the context wasn't changed.
+  @retval
+    FALSE  if the context was changed.
+*/
+
+bool
+set_routine_security_ctx(THD *thd, sp_head *sp, Security_context **save_ctx)
+{
+  *save_ctx= 0;
+  if (sp->suid() != SP_IS_NOT_SUID &&
+      sp->m_security_ctx.change_security_context(thd, &sp->m_definer.user,
+                                                 &sp->m_definer.host,
+                                                 &sp->m_db,
+                                                 save_ctx))
+    return TRUE;
+
+  /*
+    If we changed context to run as another user, we need to check the
+    access right for the new context again as someone may have revoked
+    the right to use the procedure from this user.
+
+    TODO:
+      Cache if the definer has the right to use the object on the
+      first usage and only reset the cache if someone does a GRANT
+      statement that 'may' affect this.
+  */
+  if (*save_ctx &&
+      sp->check_execute_access(thd))
+  {
+    sp->m_security_ctx.restore_security_context(thd, *save_ctx);
+    *save_ctx= 0;
+    return TRUE;
+  }
+
+  return FALSE;
+}
+#endif // ! NO_EMBEDDED_ACCESS_CHECKS
+
+
+bool sp_head::check_execute_access(THD *thd) const
+{
+  return m_parent ? m_parent->check_execute_access(thd) :
+                    check_routine_access(thd, EXECUTE_ACL,
+                                         &m_db, &m_name,
+                                         m_handler, false);
+}
+
+
+/**
+  Create rcontext optionally using the routine security.
+  This is important for sql_mode=ORACLE to make sure that the invoker has
+  access to the tables mentioned in the %TYPE references.
+
+  In non-Oracle sql_modes we do not need access to any tables,
+  so we can omit the security context switch for performance purposes.
+
+  @param thd
+  @param ret_value
+  @retval           NULL - error (access denided or EOM)
+  @retval          !NULL - success (the invoker has rights to all %TYPE tables)
+*/
+
+sp_rcontext *sp_head::rcontext_create(THD *thd, Field *ret_value,
+                                      Row_definition_list *defs,
+                                      bool switch_security_ctx)
+{
+  if (!(m_flags & HAS_COLUMN_TYPE_REFS))
+    return sp_rcontext::create(thd, this, m_pcont, ret_value, *defs);
+  sp_rcontext *res= NULL;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  Security_context *save_security_ctx;
+  if (switch_security_ctx &&
+      set_routine_security_ctx(thd, this, &save_security_ctx))
+    return NULL;
+#endif
+  if (!defs->resolve_type_refs(thd))
+    res= sp_rcontext::create(thd, this, m_pcont, ret_value, *defs);
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  if (switch_security_ctx)
+    m_security_ctx.restore_security_context(thd, save_security_ctx);
+#endif
+  return res;
+}
+
+
+sp_rcontext *sp_head::rcontext_create(THD *thd, Field *ret_value,
+                                      List *args)
+{
+  DBUG_ASSERT(args);
+  Row_definition_list defs;
+  m_pcont->retrieve_field_definitions(&defs);
+  if (defs.adjust_formal_params_to_actual_params(thd, args))
+    return NULL;
+  return rcontext_create(thd, ret_value, &defs, true);
+}
+
+
+sp_rcontext *sp_head::rcontext_create(THD *thd, Field *ret_value,
+                                      Item **args, uint arg_count)
+{
+  Row_definition_list defs;
+  m_pcont->retrieve_field_definitions(&defs);
+  if (defs.adjust_formal_params_to_actual_params(thd, args, arg_count))
+    return NULL;
+  return rcontext_create(thd, ret_value, &defs, true);
+}
+
+
+/**
+  Execute trigger stored program.
+
+  - changes security context for triggers
+  - switch to new memroot
+  - call sp_head::execute
+  - restore old memroot
+  - restores security context
+
+  @param thd               Thread handle
+  @param db                database name
+  @param table             table name
+  @param grant_info        GRANT_INFO structure to be filled with
+                           information about definer's privileges
+                           on subject table
+
+  @todo
+    - TODO: we should create sp_rcontext once per command and reuse it
+    on subsequent executions of a trigger.
+
+  @retval
+    FALSE  on success
+  @retval
+    TRUE   on error
+*/
+
+bool
+sp_head::execute_trigger(THD *thd,
+                         const LEX_CSTRING *db_name,
+                         const LEX_CSTRING *table_name,
+                         GRANT_INFO *grant_info)
+{
+  sp_rcontext *octx = thd->spcont;
+  sp_rcontext *nctx = NULL;
+  bool err_status= FALSE;
+  MEM_ROOT call_mem_root;
+  Query_arena call_arena(&call_mem_root, Query_arena::STMT_INITIALIZED_FOR_SP);
+  Query_arena backup_arena;
+  DBUG_ENTER("sp_head::execute_trigger");
+  DBUG_PRINT("info", ("trigger %s", m_name.str));
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  Security_context *save_ctx= NULL;
+
+
+  if (suid() != SP_IS_NOT_SUID &&
+      m_security_ctx.change_security_context(thd,
+                                             &m_definer.user,
+                                             &m_definer.host,
+                                             &m_db,
+                                             &save_ctx))
+    DBUG_RETURN(TRUE);
+
+  /*
+    Fetch information about table-level privileges for subject table into
+    GRANT_INFO instance. The access check itself will happen in
+    Item_trigger_field, where this information will be used along with
+    information about column-level privileges.
+  */
+
+  fill_effective_table_privileges(thd,
+                                  grant_info,
+                                  db_name->str,
+                                  table_name->str);
+
+  /* Check that the definer has TRIGGER privilege on the subject table. */
+
+  if (!(grant_info->privilege & TRIGGER_ACL))
+  {
+    char priv_desc[128];
+    get_privilege_desc(priv_desc, sizeof(priv_desc), TRIGGER_ACL);
+
+    my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), priv_desc,
+             thd->security_ctx->priv_user, thd->security_ctx->host_or_ip,
+             db_name->str, table_name->str);
+
+    m_security_ctx.restore_security_context(thd, save_ctx);
+    DBUG_RETURN(TRUE);
+  }
+#endif // NO_EMBEDDED_ACCESS_CHECKS
+
+  /*
+    Prepare arena and memroot for objects which lifetime is whole
+    duration of trigger call (sp_rcontext, it's tables and items,
+    sp_cursor and Item_cache holders for case expressions).  We can't
+    use caller's arena/memroot for those objects because in this case
+    some fixed amount of memory will be consumed for each trigger
+    invocation and so statements which involve lot of them will hog
+    memory.
+
+    TODO: we should create sp_rcontext once per command and reuse it
+    on subsequent executions of a trigger.
+  */
+  init_sql_alloc(key_memory_sp_head_call_root,
+                 &call_mem_root, MEM_ROOT_BLOCK_SIZE, 0, MYF(0));
+  thd->set_n_backup_active_arena(&call_arena, &backup_arena);
+
+  Row_definition_list defs;
+  m_pcont->retrieve_field_definitions(&defs);
+  if (!(nctx= rcontext_create(thd, NULL, &defs, false)))
+  {
+    err_status= TRUE;
+    goto err_with_cleanup;
+  }
+
+  thd->spcont= nctx;
+
+  MYSQL_RUN_SP(this, err_status= execute(thd, FALSE));
+
+err_with_cleanup:
+  thd->restore_active_arena(&call_arena, &backup_arena);
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  m_security_ctx.restore_security_context(thd, save_ctx);
+#endif // NO_EMBEDDED_ACCESS_CHECKS
+
+  delete nctx;
+  call_arena.free_items();
+  free_root(&call_mem_root, MYF(0));
+  thd->spcont= octx;
+
+  if (thd->killed)
+    thd->send_kill_message();
+
+  DBUG_RETURN(err_status);
+}
+
+
+/*
+  Execute the package initialization section.
+*/
+
+bool sp_package::instantiate_if_needed(THD *thd)
+{
+  List args;
+  if (m_is_instantiated)
+    return false;
+  /*
+    Set m_is_instantiated to true early, to avoid recursion in case if
+    the package initialization section calls routines from the same package.
+  */
+  m_is_instantiated= true;
+  /*
+    Check that the initialization section doesn't contain Dynamic SQL
+    and doesn't return result sets: such stored procedures can't
+    be called from a function or trigger.
+  */
+  if (thd->in_sub_stmt)
+  {
+    const char *where= (thd->in_sub_stmt & SUB_STMT_TRIGGER ?
+                        "trigger" : "function");
+    if (is_not_allowed_in_function(where))
+      goto err;
+  }
+
+  args.elements= 0;
+  if (execute_procedure(thd, &args))
+    goto err;
+  return false;
+err:
+  m_is_instantiated= false;
+  return true;
+}
+
+
+/**
+  Execute a function.
+
+   - evaluate parameters
+   - changes security context for SUID routines
+   - switch to new memroot
+   - call sp_head::execute
+   - restore old memroot
+   - evaluate the return value
+   - restores security context
+
+  @param thd               Thread handle
+  @param argp              Passed arguments (these are items from containing
+                           statement?)
+  @param argcount          Number of passed arguments. We need to check if
+                           this is correct.
+  @param return_value_fld  Save result here.
+
+  @todo
+    We should create sp_rcontext once per command and reuse
+    it on subsequent executions of a function/trigger.
+
+  @todo
+    In future we should associate call arena/mem_root with
+    sp_rcontext and allocate all these objects (and sp_rcontext
+    itself) on it directly rather than juggle with arenas.
+
+  @retval
+    FALSE  on success
+  @retval
+    TRUE   on error
+*/
+
+bool
+sp_head::execute_function(THD *thd, Item **argp, uint argcount,
+                          Field *return_value_fld, sp_rcontext **func_ctx,
+                          Query_arena *call_arena)
+{
+  ulonglong UNINIT_VAR(binlog_save_options);
+  bool need_binlog_call= FALSE;
+  uint arg_no;
+  sp_rcontext *octx = thd->spcont;
+  char buf[STRING_BUFFER_USUAL_SIZE];
+  String binlog_buf(buf, sizeof(buf), &my_charset_bin);
+  bool err_status= FALSE;
+  Query_arena backup_arena;
+  DBUG_ENTER("sp_head::execute_function");
+  DBUG_PRINT("info", ("function %s", m_name.str));
+
+  if (m_parent && m_parent->instantiate_if_needed(thd))
+    DBUG_RETURN(true);
+
+  /*
+    Check that the function is called with all specified arguments.
+
+    If it is not, use my_error() to report an error, or it will not terminate
+    the invoking query properly.
+  */
+  if (argcount != m_pcont->context_var_count())
+  {
+    /*
+      Need to use my_error here, or it will not terminate the
+      invoking query properly.
+    */
+    my_error(ER_SP_WRONG_NO_OF_ARGS, MYF(0),
+             "FUNCTION", ErrConvDQName(this).ptr(),
+             m_pcont->context_var_count(), argcount);
+    DBUG_RETURN(TRUE);
+  }
+  /*
+    Prepare arena and memroot for objects which lifetime is whole
+    duration of function call (sp_rcontext, it's tables and items,
+    sp_cursor and Item_cache holders for case expressions).
+    We can't use caller's arena/memroot for those objects because
+    in this case some fixed amount of memory will be consumed for
+    each function/trigger invocation and so statements which involve
+    lot of them will hog memory.
+    TODO: we should create sp_rcontext once per command and reuse
+    it on subsequent executions of a function/trigger.
+  */
+  if (!(*func_ctx))
+  {
+    thd->set_n_backup_active_arena(call_arena, &backup_arena);
+
+    if (!(*func_ctx= rcontext_create(thd, return_value_fld, argp, argcount)))
+    {
+      thd->restore_active_arena(call_arena, &backup_arena);
+      err_status= TRUE;
+      goto err_with_cleanup;
+    }
+
+    /*
+      We have to switch temporarily back to callers arena/memroot.
+      Function arguments belong to the caller and so the may reference
+      memory which they will allocate during calculation long after
+      this function call will be finished (e.g. in Item::cleanup()).
+    */
+    thd->restore_active_arena(call_arena, &backup_arena);
+  }
+
+  /* Pass arguments. */
+  for (arg_no= 0; arg_no < argcount; arg_no++)
+  {
+    /* Arguments must be fixed in Item_func_sp::fix_fields */
+    DBUG_ASSERT(argp[arg_no]->fixed());
+
+    err_status= bind_input_param(thd, argp[arg_no], arg_no, *func_ctx, TRUE);
+    if (err_status)
+      goto err_with_cleanup;
+  }
+
+  /*
+    If row-based binlogging, we don't need to binlog the function's call, let
+    each substatement be binlogged its way.
+  */
+  need_binlog_call= mysql_bin_log.is_open() &&
+                    (thd->variables.option_bits & OPTION_BIN_LOG) &&
+                    !thd->is_current_stmt_binlog_format_row();
+
+  /*
+    Remember the original arguments for unrolled replication of functions
+    before they are changed by execution.
+  */
+  if (need_binlog_call)
+  {
+    binlog_buf.length(0);
+    binlog_buf.append(STRING_WITH_LEN("SELECT "));
+    append_identifier(thd, &binlog_buf, &m_db);
+    binlog_buf.append('.');
+    append_identifier(thd, &binlog_buf, &m_name);
+    binlog_buf.append('(');
+    for (arg_no= 0; arg_no < argcount; arg_no++)
+    {
+      String str_value_holder;
+      String *str_value;
+
+      if (arg_no)
+        binlog_buf.append(',');
+
+      Item_field *item= (*func_ctx)->get_parameter(arg_no);
+      str_value= item->type_handler()->print_item_value(thd, item,
+                                                        &str_value_holder);
+      if (str_value)
+        binlog_buf.append(*str_value);
+      else
+        binlog_buf.append(NULL_clex_str);
+    }
+    binlog_buf.append(')');
+  }
+  thd->spcont= *func_ctx;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  Security_context *save_security_ctx;
+  if (set_routine_security_ctx(thd, this, &save_security_ctx))
+  {
+    err_status= TRUE;
+    goto err_with_cleanup;
+  }
+#endif
+
+  if (need_binlog_call)
+  {
+    query_id_t q;
+    reset_dynamic(&thd->user_var_events);
+    /*
+      In case of artificially constructed events for function calls
+      we have separate union for each such event and hence can't use
+      query_id of real calling statement as the start of all these
+      unions (this will break logic of replication of user-defined
+      variables). So we use artifical value which is guaranteed to
+      be greater than all query_id's of all statements belonging
+      to previous events/unions.
+      Possible alternative to this is logging of all function invocations
+      as one select and not resetting THD::user_var_events before
+      each invocation.
+    */
+    q= get_query_id();
+    mysql_bin_log.start_union_events(thd, q + 1);
+    binlog_save_options= thd->variables.option_bits;
+    thd->variables.option_bits&= ~OPTION_BIN_LOG;
+  }
+
+  opt_trace_disable_if_no_stored_proc_func_access(thd, this);
+  /*
+    Switch to call arena/mem_root so objects like sp_cursor or
+    Item_cache holders for case expressions can be allocated on it.
+
+    TODO: In future we should associate call arena/mem_root with
+          sp_rcontext and allocate all these objects (and sp_rcontext
+          itself) on it directly rather than juggle with arenas.
+  */
+  thd->set_n_backup_active_arena(call_arena, &backup_arena);
+
+  MYSQL_RUN_SP(this, err_status= execute(thd, TRUE));
+
+  thd->restore_active_arena(call_arena, &backup_arena);
+
+  if (need_binlog_call)
+  {
+    mysql_bin_log.stop_union_events(thd);
+    thd->variables.option_bits= binlog_save_options;
+    if (thd->binlog_evt_union.unioned_events)
+    {
+      int errcode = query_error_code(thd, thd->killed == NOT_KILLED);
+      Query_log_event qinfo(thd, binlog_buf.ptr(), binlog_buf.length(),
+                            thd->binlog_evt_union.unioned_events_trans, FALSE, FALSE, errcode);
+      if (mysql_bin_log.write(&qinfo) &&
+          thd->binlog_evt_union.unioned_events_trans)
+      {
+        push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+                     "Invoked ROUTINE modified a transactional table but MySQL "
+                     "failed to reflect this change in the binary log");
+        err_status= TRUE;
+      }
+      reset_dynamic(&thd->user_var_events);
+      /* Forget those values, in case more function calls are binlogged: */
+      thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
+      thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty();
+    }
+  }
+
+  if (!err_status && thd->spcont->quit_func)
+  {
+    /* We need result only in function but not in trigger */
+
+    if (!(*func_ctx)->is_return_value_set())
+    {
+      my_error(ER_SP_NORETURNEND, MYF(0), m_name.str);
+      err_status= TRUE;
+    }
+    else
+    {
+      /*
+        Copy back all OUT or INOUT values to the previous frame, or
+        set global user variables
+      */
+      for (arg_no= 0; arg_no < argcount; arg_no++)
+      {
+        err_status= bind_output_param(thd, argp[arg_no], arg_no, octx, *func_ctx);
+        if (err_status)
+          break;
+      }
+    }
+  }
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  m_security_ctx.restore_security_context(thd, save_security_ctx);
+#endif
+
+err_with_cleanup:
+  thd->spcont= octx;
+
+  /*
+    If not insided a procedure and a function printing warning
+    messsages.
+  */
+  if (need_binlog_call && 
+      thd->spcont == NULL && !thd->binlog_evt_union.do_union)
+    thd->issue_unsafe_warnings();
+
+  DBUG_RETURN(err_status);
+}
+
+
+/**
+  Execute a procedure.
+
+  The function does the following steps:
+   - Set all parameters
+   - changes security context for SUID routines
+   - call sp_head::execute
+   - copy back values of INOUT and OUT parameters
+   - restores security context
+
+  @param thd    Thread handle
+  @param args   List of values passed as arguments.
+
+  @retval
+    FALSE  on success
+  @retval
+    TRUE   on error
+*/
+
+bool
+sp_head::execute_procedure(THD *thd, List *args)
+{
+  bool err_status= FALSE;
+  uint params = m_pcont->context_var_count();
+  /* Query start time may be reset in a multi-stmt SP; keep this for later. */
+  ulonglong utime_before_sp_exec= thd->utime_after_lock;
+  sp_rcontext *save_spcont, *octx;
+  sp_rcontext *nctx = NULL;
+  bool save_enable_slow_log;
+  bool save_log_general= false;
+  sp_package *pkg= get_package();
+  DBUG_ENTER("sp_head::execute_procedure");
+  DBUG_PRINT("info", ("procedure %s", m_name.str));
+
+  if (m_parent && m_parent->instantiate_if_needed(thd))
+    DBUG_RETURN(true);
+
+  if (args->elements != params)
+  {
+    my_error(ER_SP_WRONG_NO_OF_ARGS, MYF(0), "PROCEDURE",
+             ErrConvDQName(this).ptr(), params, args->elements);
+    DBUG_RETURN(TRUE);
+  }
+
+  save_spcont= octx= thd->spcont;
+  if (! octx)
+  {
+    /* Create a temporary old context. */
+    if (!(octx= rcontext_create(thd, NULL, args)))
+    {
+      DBUG_PRINT("error", ("Could not create octx"));
+      DBUG_RETURN(TRUE);
+    }
+
+    thd->spcont= octx;
+
+    /* set callers_arena to thd, for upper-level function to work */
+    thd->spcont->callers_arena= thd;
+  }
+
+  if (!pkg)
+  {
+    if (!(nctx= rcontext_create(thd, NULL, args)))
+    {
+      delete nctx; /* Delete nctx if it was init() that failed. */
+      thd->spcont= save_spcont;
+      DBUG_RETURN(TRUE);
+    }
+  }
+  else
+  {
+    if (!pkg->m_rcontext)
+    {
+      Query_arena backup_arena;
+      thd->set_n_backup_active_arena(this, &backup_arena);
+      nctx= pkg->rcontext_create(thd, NULL, args);
+      thd->restore_active_arena(this, &backup_arena);
+      if (!nctx)
+      {
+        thd->spcont= save_spcont;
+        DBUG_RETURN(TRUE);
+      }
+      pkg->m_rcontext= nctx;
+    }
+    else
+      nctx= pkg->m_rcontext;
+  }
+
+  if (params > 0)
+  {
+    List_iterator it_args(*args);
+
+    DBUG_PRINT("info",(" %.*s: eval args", (int) m_name.length, m_name.str));
+
+    for (uint i= 0 ; i < params ; i++)
+    {
+      Item *arg_item= it_args++;
+
+      if (!arg_item)
+        break;
+
+      err_status= bind_input_param(thd, arg_item, i, nctx, FALSE);
+      if (err_status)
+        break;
+    }
+
+    /*
+      Okay, got values for all arguments. Close tables that might be used by
+      arguments evaluation. If arguments evaluation required prelocking mode,
+      we'll leave it here.
+    */
+    thd->lex->unit.cleanup();
+
+    if (!thd->in_sub_stmt)
+    {
+      thd->get_stmt_da()->set_overwrite_status(true);
+      thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd);
+      thd->get_stmt_da()->set_overwrite_status(false);
+    }
+
+    close_thread_tables(thd);
+    thd_proc_info(thd, 0);
+
+    if (! thd->in_sub_stmt)
+    {
+      if (thd->transaction_rollback_request)
+      {
+        trans_rollback_implicit(thd);
+        thd->release_transactional_locks();
+      }
+      else if (! thd->in_multi_stmt_transaction_mode())
+        thd->release_transactional_locks();
+      else
+        thd->mdl_context.release_statement_locks();
+    }
+
+    thd->rollback_item_tree_changes();
+
+    DBUG_PRINT("info",(" %.*s: eval args done", (int) m_name.length, 
+                       m_name.str));
+  }
+
+  save_enable_slow_log= thd->enable_slow_log;
+
+  /*
+    Disable slow log if:
+    - Slow logging is enabled (no change needed)
+    - This is a normal SP (not event log)
+    - If we have not explicitly disabled logging of SP
+  */
+  if (save_enable_slow_log &&
+      ((!(m_flags & LOG_SLOW_STATEMENTS) &&
+        (thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SP))))
+  {
+    DBUG_PRINT("info", ("Disabling slow log for the execution"));
+    thd->enable_slow_log= FALSE;
+  }
+
+  /*
+    Disable general log if:
+    - If general log is enabled (no change needed)
+    - This is a normal SP (not event log)
+    - If we have not explicitly disabled logging of SP
+  */
+  if (!(thd->variables.option_bits & OPTION_LOG_OFF) &&
+      (!(m_flags & LOG_GENERAL_LOG) &&
+       (thd->variables.log_disabled_statements & LOG_DISABLE_SP)))
+  {
+    DBUG_PRINT("info", ("Disabling general log for the execution"));
+    save_log_general= true;
+    /* disable this bit */
+    thd->variables.option_bits |= OPTION_LOG_OFF;
+  }
+  thd->spcont= nctx;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  Security_context *save_security_ctx= 0;
+  if (!err_status)
+    err_status= set_routine_security_ctx(thd, this, &save_security_ctx);
+#endif
+
+  opt_trace_disable_if_no_stored_proc_func_access(thd, this);
+
+  if (!err_status)
+    MYSQL_RUN_SP(this, err_status= execute(thd, TRUE));
+
+  if (save_log_general)
+    thd->variables.option_bits &= ~OPTION_LOG_OFF;
+  thd->enable_slow_log= save_enable_slow_log;
+
+  /*
+    In the case when we weren't able to employ reuse mechanism for
+    OUT/INOUT paranmeters, we should reallocate memory. This
+    allocation should be done on the arena which will live through
+    all execution of calling routine.
+  */
+  thd->spcont->callers_arena= octx->callers_arena;
+
+  if (!err_status && params > 0)
+  {
+    List_iterator it_args(*args);
+
+    /*
+      Copy back all OUT or INOUT values to the previous frame, or
+      set global user variables
+    */
+    for (uint i= 0 ; i < params ; i++)
+    {
+      Item *arg_item= it_args++;
+
+      if (!arg_item)
+        break;
+
+      err_status= bind_output_param(thd, arg_item, i, octx, nctx);
+      if (err_status)
+        break;
+    }
+  }
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  if (save_security_ctx)
+    m_security_ctx.restore_security_context(thd, save_security_ctx);
+#endif
+
+  if (!save_spcont)
+    delete octx;
+
+  if (!pkg)
+    delete nctx;
+  thd->spcont= save_spcont;
+  thd->utime_after_lock= utime_before_sp_exec;
+
+  /*
+    If not insided a procedure and a function printing warning
+    messsages.
+  */ 
+  bool need_binlog_call= mysql_bin_log.is_open() &&
+                         (thd->variables.option_bits & OPTION_BIN_LOG) &&
+                         !thd->is_current_stmt_binlog_format_row();
+  if (need_binlog_call && thd->spcont == NULL &&
+      !thd->binlog_evt_union.do_union)
+    thd->issue_unsafe_warnings();
+
+  DBUG_RETURN(err_status);
+}
+
+bool
+sp_head::bind_input_param(THD *thd,
+                          Item *arg_item,
+                          uint arg_no,
+                          sp_rcontext *nctx,
+                          bool is_function)
+{
+  DBUG_ENTER("sp_head::bind_input_param");
+
+  sp_variable *spvar= m_pcont->find_variable(arg_no);
+  if (!spvar)
+    DBUG_RETURN(FALSE);
+
+  if (spvar->mode != sp_variable::MODE_IN)
+  {
+    Settable_routine_parameter *srp=
+      arg_item->get_settable_routine_parameter();
+
+    if (!srp)
+    {
+      my_error(ER_SP_NOT_VAR_ARG, MYF(0), arg_no+1, ErrConvDQName(this).ptr());
+      DBUG_RETURN(TRUE);
+    }
+
+    if (is_function)
+    {
+      /*
+        Check if the function is called from SELECT/INSERT/UPDATE/DELETE query
+        and parameter is OUT or INOUT.
+        If yes, it is an invalid call - throw error.
+      */
+      if (thd->lex->sql_command == SQLCOM_SELECT || 
+          thd->lex->sql_command == SQLCOM_INSERT ||
+          thd->lex->sql_command == SQLCOM_INSERT_SELECT ||
+          thd->lex->sql_command == SQLCOM_UPDATE ||
+          thd->lex->sql_command == SQLCOM_DELETE)
+      {
+        my_error(ER_SF_OUT_INOUT_ARG_NOT_ALLOWED, MYF(0), arg_no+1, m_name.str);
+        DBUG_RETURN(TRUE);
+      }
+    }
+
+    srp->set_required_privilege(spvar->mode == sp_variable::MODE_INOUT);
+  }
+
+  if (spvar->mode == sp_variable::MODE_OUT)
+  {
+    Item_null *null_item= new (thd->mem_root) Item_null(thd);
+    Item *tmp_item= null_item;
+
+    if (!null_item ||
+        nctx->set_parameter(thd, arg_no, &tmp_item))
+    {
+      DBUG_PRINT("error", ("set variable failed"));
+      DBUG_RETURN(TRUE);
+    }
+  }
+  else
+  {
+    if (nctx->set_parameter(thd, arg_no, &arg_item))
+    {
+      DBUG_PRINT("error", ("set variable 2 failed"));
+      DBUG_RETURN(TRUE);
+    }
+  }
+
+  TRANSACT_TRACKER(add_trx_state_from_thd(thd));
+
+  DBUG_RETURN(FALSE);
+}
+
+bool
+sp_head::bind_output_param(THD *thd,
+                           Item *arg_item,
+                           uint arg_no,
+                           sp_rcontext *octx,
+                           sp_rcontext *nctx)
+{
+  DBUG_ENTER("sp_head::bind_output_param");
+
+  sp_variable *spvar= m_pcont->find_variable(arg_no);
+  if (spvar->mode == sp_variable::MODE_IN)
+    DBUG_RETURN(FALSE);
+
+  Settable_routine_parameter *srp=
+    arg_item->get_settable_routine_parameter();
+
+  DBUG_ASSERT(srp);
+
+  if (srp->set_value(thd, octx, nctx->get_variable_addr(arg_no)))
+  {
+    DBUG_PRINT("error", ("set value failed"));
+    DBUG_RETURN(TRUE);
+  }
+
+  Send_field *out_param_info= new (thd->mem_root) Send_field(thd, nctx->get_parameter(arg_no));
+  out_param_info->db_name= m_db;
+  out_param_info->table_name= m_name;
+  out_param_info->org_table_name= m_name;
+  out_param_info->col_name= spvar->name;
+  out_param_info->org_col_name= spvar->name;
+
+  srp->set_out_param_info(out_param_info);
+
+  DBUG_RETURN(FALSE);
+}
+
+/**
+  Reset lex during parsing, before we parse a sub statement.
+
+  @param thd Thread handler.
+
+  @return Error state
+    @retval true An error occurred.
+    @retval false Success.
+*/
+
+bool
+sp_head::reset_lex(THD *thd, sp_lex_local *sublex)
+{
+  DBUG_ENTER("sp_head::reset_lex");
+  LEX *oldlex= thd->lex;
+
+  thd->set_local_lex(sublex);
+
+  DBUG_RETURN(m_lex.push_front(oldlex));
+}
+
+
+bool
+sp_head::reset_lex(THD *thd)
+{
+  DBUG_ENTER("sp_head::reset_lex");
+  sp_lex_local *sublex= new (thd->mem_root) sp_lex_local(thd, thd->lex);
+  DBUG_RETURN(sublex ? reset_lex(thd, sublex) : true);
+}
+
+
+/**
+  Restore lex during parsing, after we have parsed a sub statement.
+
+  @param thd Thread handle
+  @param oldlex The upper level lex we're near to restore to
+  @param sublex The local lex we're near to restore from
+
+  @return
+    @retval TRUE failure
+    @retval FALSE success
+*/
+
+bool
+sp_head::merge_lex(THD *thd, LEX *oldlex, LEX *sublex)
+{
+  DBUG_ENTER("sp_head::merge_lex");
+
+  sublex->set_trg_event_type_for_tables();
+
+  oldlex->trg_table_fields.push_back(&sublex->trg_table_fields);
+
+  /* If this substatement is unsafe, the entire routine is too. */
+  DBUG_PRINT("info", ("sublex->get_stmt_unsafe_flags: 0x%x",
+                      sublex->get_stmt_unsafe_flags()));
+  unsafe_flags|= sublex->get_stmt_unsafe_flags();
+
+  /*
+    Add routines which are used by statement to respective set for
+    this routine.
+  */
+  if (sp_update_sp_used_routines(&m_sroutines, &sublex->sroutines))
+    DBUG_RETURN(TRUE);
+
+  /* If this substatement is a update query, then mark MODIFIES_DATA */
+  if (is_update_query(sublex->sql_command))
+    m_flags|= MODIFIES_DATA;
+
+  /*
+    Merge tables used by this statement (but not by its functions or
+    procedures) to multiset of tables used by this routine.
+  */
+  merge_table_list(thd, sublex->query_tables, sublex);
+  /* Merge lists of PS parameters. */
+  oldlex->param_list.append(&sublex->param_list);
+
+  DBUG_RETURN(FALSE);
+}
+
+/**
+  Put the instruction on the backpatch list, associated with the label.
+*/
+
+int
+sp_head::push_backpatch(THD *thd, sp_instr *i, sp_label *lab,
+                        List *list, backpatch_instr_type itype)
+{
+  bp_t *bp= (bp_t *) thd->alloc(sizeof(bp_t));
+
+  if (!bp)
+    return 1;
+  bp->lab= lab;
+  bp->instr= i;
+  bp->instr_type= itype;
+  return list->push_front(bp);
+}
+
+int
+sp_head::push_backpatch(THD *thd, sp_instr *i, sp_label *lab)
+{
+  return push_backpatch(thd, i, lab, &m_backpatch, GOTO);
+}
+
+int
+sp_head::push_backpatch_goto(THD *thd, sp_pcontext *ctx, sp_label *lab)
+{
+  uint ip= instructions();
+
+  /*
+    Add cpop/hpop : they will be removed or updated later if target is in
+    the same block or not
+  */
+  sp_instr_hpop *hpop= new (thd->mem_root) sp_instr_hpop(ip++, ctx, 0);
+  if (hpop == NULL || add_instr(hpop))
+    return true;
+  if (push_backpatch(thd, hpop, lab, &m_backpatch_goto, HPOP))
+    return true;
+
+  sp_instr_cpop *cpop= new (thd->mem_root) sp_instr_cpop(ip++, ctx, 0);
+  if (cpop == NULL || add_instr(cpop))
+    return true;
+  if (push_backpatch(thd, cpop, lab, &m_backpatch_goto, CPOP))
+    return true;
+
+  // Add jump with ip=0. IP will be updated when label is found.
+  sp_instr_jump *i= new (thd->mem_root) sp_instr_jump(ip, ctx);
+  if (i == NULL || add_instr(i))
+    return true;
+  if (push_backpatch(thd, i, lab, &m_backpatch_goto, GOTO))
+    return true;
+
+  return false;
+}
+
+/**
+  Update all instruction with this label in the backpatch list to
+  the current position.
+*/
+
+void
+sp_head::backpatch(sp_label *lab)
+{
+  bp_t *bp;
+  uint dest= instructions();
+  List_iterator_fast li(m_backpatch);
+
+  DBUG_ENTER("sp_head::backpatch");
+  while ((bp= li++))
+  {
+    if (bp->lab == lab)
+    {
+      DBUG_PRINT("info", ("backpatch: (m_ip %d, label %p <%s>) to dest %d",
+                          bp->instr->m_ip, lab, lab->name.str, dest));
+      bp->instr->backpatch(dest, lab->ctx);
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+void
+sp_head::backpatch_goto(THD *thd, sp_label *lab,sp_label *lab_begin_block)
+{
+  bp_t *bp;
+  uint dest= instructions();
+  List_iterator li(m_backpatch_goto);
+
+  DBUG_ENTER("sp_head::backpatch_goto");
+  while ((bp= li++))
+  {
+    if (bp->instr->m_ip < lab_begin_block->ip || bp->instr->m_ip > lab->ip)
+    {
+      /*
+        Update only jump target from the beginning of the block where the
+        label is defined.
+      */
+      continue;
+    }
+    if (lex_string_cmp(system_charset_info, &bp->lab->name, &lab->name) == 0)
+    {
+      if (bp->instr_type == GOTO)
+      {
+        DBUG_PRINT("info",
+                   ("backpatch_goto: (m_ip %d, label %p <%s>) to dest %d",
+                    bp->instr->m_ip, lab, lab->name.str, dest));
+        bp->instr->backpatch(dest, lab->ctx);
+        // Jump resolved, remove from the list
+        li.remove();
+        continue;
+      }
+      if (bp->instr_type == CPOP)
+      {
+        uint n= bp->instr->m_ctx->diff_cursors(lab_begin_block->ctx, true);
+        if (n == 0)
+        {
+          // Remove cpop instr
+          replace_instr_to_nop(thd,bp->instr->m_ip);
+        }
+        else
+        {
+          // update count of cpop
+          static_cast(bp->instr)->update_count(n);
+          n= 1;
+        }
+        li.remove();
+        continue;
+      }
+      if (bp->instr_type == HPOP)
+      {
+        uint n= bp->instr->m_ctx->diff_handlers(lab_begin_block->ctx, true);
+        if (n == 0)
+        {
+          // Remove hpop instr
+          replace_instr_to_nop(thd,bp->instr->m_ip);
+        }
+        else
+        {
+          // update count of cpop
+          static_cast(bp->instr)->update_count(n);
+          n= 1;
+        }
+        li.remove();
+        continue;
+      }
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+bool
+sp_head::check_unresolved_goto()
+{
+  DBUG_ENTER("sp_head::check_unresolved_goto");
+  bool has_unresolved_label=false;
+  if (m_backpatch_goto.elements > 0)
+  {
+    List_iterator_fast li(m_backpatch_goto);
+    while (bp_t* bp= li++)
+    {
+      if (bp->instr_type == GOTO)
+      {
+        my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "GOTO", bp->lab->name.str);
+        has_unresolved_label=true;
+      }
+    }
+  }
+  DBUG_RETURN(has_unresolved_label);
+}
+
+int
+sp_head::new_cont_backpatch(sp_instr_opt_meta *i)
+{
+  m_cont_level+= 1;
+  if (i)
+  {
+    /* Use the cont. destination slot to store the level */
+    i->m_cont_dest= m_cont_level;
+    if (m_cont_backpatch.push_front(i))
+      return 1;
+  }
+  return 0;
+}
+
+int
+sp_head::add_cont_backpatch(sp_instr_opt_meta *i)
+{
+  i->m_cont_dest= m_cont_level;
+  return m_cont_backpatch.push_front(i);
+}
+
+void
+sp_head::do_cont_backpatch()
+{
+  uint dest= instructions();
+  uint lev= m_cont_level--;
+  sp_instr_opt_meta *i;
+
+  while ((i= m_cont_backpatch.head()) && i->m_cont_dest == lev)
+  {
+    i->m_cont_dest= dest;
+    (void)m_cont_backpatch.pop();
+  }
+}
+
+
+bool
+sp_head::sp_add_instr_cpush_for_cursors(THD *thd, sp_pcontext *pcontext)
+{
+  for (uint i= 0; i < pcontext->frame_cursor_count(); i++)
+  {
+    const sp_pcursor *c= pcontext->get_cursor_by_local_frame_offset(i);
+    sp_instr_cpush *instr= new (thd->mem_root)
+                             sp_instr_cpush(instructions(), pcontext, c->lex(),
+                                            pcontext->cursor_offset() + i);
+    if (instr == NULL || add_instr(instr))
+      return true;
+  }
+  return false;
+}
+
+
+void
+sp_head::set_chistics(const st_sp_chistics &chistics)
+{
+  m_chistics.set(chistics);
+  if (m_chistics.comment.length == 0)
+    m_chistics.comment.str= 0;
+  else
+    m_chistics.comment.str= strmake_root(mem_root,
+                                         m_chistics.comment.str,
+                                         m_chistics.comment.length);
+}
+
+
+void
+sp_head::set_c_chistics(const st_sp_chistics &chistics)
+{
+  // Set all chistics but preserve agg_type.
+  enum_sp_aggregate_type save_agg_type= agg_type();
+  set_chistics(chistics);
+  set_chistics_agg_type(save_agg_type);
+}
+
+
+void
+sp_head::set_info(longlong created, longlong modified,
+                  const st_sp_chistics &chistics, sql_mode_t sql_mode)
+{
+  m_created= created;
+  m_modified= modified;
+  set_chistics(chistics);
+  m_sql_mode= sql_mode;
+}
+
+
+void
+sp_head::reset_thd_mem_root(THD *thd)
+{
+  DBUG_ENTER("sp_head::reset_thd_mem_root");
+  m_thd_root= thd->mem_root;
+  thd->mem_root= &main_mem_root;
+  DBUG_PRINT("info", ("mem_root %p moved to thd mem root %p",
+                      &mem_root, &thd->mem_root));
+  free_list= thd->free_list; // Keep the old list
+  thd->free_list= NULL; // Start a new one
+  m_thd= thd;
+  DBUG_VOID_RETURN;
+}
+
+void
+sp_head::restore_thd_mem_root(THD *thd)
+{
+  DBUG_ENTER("sp_head::restore_thd_mem_root");
+
+  /*
+   In some cases our parser detects a syntax error and calls
+   LEX::cleanup_lex_after_parse_error() method only after
+   finishing parsing the whole routine. In such a situation
+   sp_head::restore_thd_mem_root() will be called twice - the
+   first time as part of normal parsing process and the second
+   time by cleanup_lex_after_parse_error().
+   To avoid ruining active arena/mem_root state in this case we
+   skip restoration of old arena/mem_root if this method has been
+   already called for this routine.
+  */
+  if (!m_thd)
+    DBUG_VOID_RETURN;
+
+  Item *flist= free_list;	// The old list
+  set_query_arena(thd);         // Get new free_list and mem_root
+  state= STMT_INITIALIZED_FOR_SP;
+
+  DBUG_PRINT("info", ("mem_root %p returned from thd mem root %p",
+                      &mem_root, &thd->mem_root));
+  thd->free_list= flist;        // Restore the old one
+  thd->mem_root= m_thd_root;
+  m_thd= NULL;
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Check if a user has access right to a routine.
+
+  @param thd          Thread handler
+  @param sp           SP
+  @param full_access  Set to 1 if the user has SELECT right to the
+                      'mysql.proc' able or is the owner of the routine
+  @retval
+    false ok
+  @retval
+    true  error
+*/
+
+bool check_show_routine_access(THD *thd, sp_head *sp, bool *full_access)
+{
+  TABLE_LIST tables;
+  bzero((char*) &tables,sizeof(tables));
+  tables.db= MYSQL_SCHEMA_NAME;
+  tables.table_name= MYSQL_PROC_NAME;
+  tables.alias= MYSQL_PROC_NAME;
+
+  *full_access= ((!check_table_access(thd, SELECT_ACL, &tables, FALSE,
+                                     1, TRUE) &&
+                  (tables.grant.privilege & SELECT_ACL) != NO_ACL) ||
+                 /* Check if user owns the routine. */
+                 (!strcmp(sp->m_definer.user.str,
+                          thd->security_ctx->priv_user) &&
+                  !strcmp(sp->m_definer.host.str,
+                          thd->security_ctx->priv_host)) ||
+                 /* Check if current role or any of the sub-granted roles
+                    own the routine. */
+                 (sp->m_definer.host.length == 0 &&
+                  (!strcmp(sp->m_definer.user.str,
+                           thd->security_ctx->priv_role) ||
+                   check_role_is_granted(thd->security_ctx->priv_role, NULL,
+                                         sp->m_definer.user.str))));
+  if (!*full_access)
+    return check_some_routine_access(thd, sp->m_db.str, sp->m_name.str,
+                                     sp->m_handler);
+  return 0;
+}
+
+
+/**
+  Collect metadata for SHOW CREATE statement for stored routines.
+
+  @param thd  Thread context.
+  @param sph          Stored routine handler
+  @param fields       Item list to populate
+
+  @return Error status.
+    @retval FALSE on success
+    @retval TRUE on error
+*/
+
+void
+sp_head::show_create_routine_get_fields(THD *thd, const Sp_handler *sph,
+                                                  List *fields)
+{
+  const char *col1_caption= sph->show_create_routine_col1_caption();
+  const char *col3_caption= sph->show_create_routine_col3_caption();
+
+  MEM_ROOT *mem_root= thd->mem_root;
+
+  /* Send header. */
+
+  fields->push_back(new (mem_root)
+                    Item_empty_string(thd, col1_caption, NAME_CHAR_LEN),
+                    mem_root);
+  fields->push_back(new (mem_root)
+                    Item_empty_string(thd, "sql_mode", 256),
+                    mem_root);
+
+  {
+    /*
+      NOTE: SQL statement field must be not less than 1024 in order not to
+      confuse old clients.
+    */
+
+    Item_empty_string *stmt_fld=
+      new (mem_root) Item_empty_string(thd, col3_caption, 1024);
+    stmt_fld->set_maybe_null();
+
+    fields->push_back(stmt_fld, mem_root);
+  }
+
+  fields->push_back(new (mem_root)
+                   Item_empty_string(thd, "character_set_client",
+                                     MY_CS_CHARACTER_SET_NAME_SIZE),
+                   mem_root);
+
+  fields->push_back(new (mem_root)
+                   Item_empty_string(thd, "collation_connection",
+                                     MY_CS_COLLATION_NAME_SIZE),
+                   mem_root);
+
+  fields->push_back(new (mem_root)
+                   Item_empty_string(thd, "Database Collation",
+                                     MY_CS_COLLATION_NAME_SIZE),
+                   mem_root);
+}
+
+
+/**
+  Implement SHOW CREATE statement for stored routines.
+
+  @param thd  Thread context.
+  @param sph  Stored routine handler
+
+  @return Error status.
+    @retval FALSE on success
+    @retval TRUE on error
+*/
+
+bool
+sp_head::show_create_routine(THD *thd, const Sp_handler *sph)
+{
+  const char *col1_caption= sph->show_create_routine_col1_caption();
+  const char *col3_caption= sph->show_create_routine_col3_caption();
+
+  bool err_status;
+
+  Protocol *protocol= thd->protocol;
+  List fields;
+
+  LEX_CSTRING sql_mode;
+
+  bool full_access;
+  MEM_ROOT *mem_root= thd->mem_root;
+
+  DBUG_ENTER("sp_head::show_create_routine");
+  DBUG_PRINT("info", ("routine %s", m_name.str));
+
+  if (check_show_routine_access(thd, this, &full_access))
+    DBUG_RETURN(TRUE);
+
+  sql_mode_string_representation(thd, m_sql_mode, &sql_mode);
+
+  /* Send header. */
+
+  fields.push_back(new (mem_root)
+                   Item_empty_string(thd, col1_caption, NAME_CHAR_LEN),
+                   thd->mem_root);
+  fields.push_back(new (mem_root)
+                   Item_empty_string(thd, "sql_mode", (uint)sql_mode.length),
+                   thd->mem_root);
+
+  {
+    /*
+      NOTE: SQL statement field must be not less than 1024 in order not to
+      confuse old clients.
+    */
+
+    Item_empty_string *stmt_fld=
+      new (mem_root) Item_empty_string(thd, col3_caption,
+                            (uint)MY_MAX(m_defstr.length, 1024));
+
+    stmt_fld->set_maybe_null();
+
+    fields.push_back(stmt_fld, thd->mem_root);
+  }
+
+  fields.push_back(new (mem_root)
+                   Item_empty_string(thd, "character_set_client",
+                                     MY_CS_CHARACTER_SET_NAME_SIZE),
+                   thd->mem_root);
+
+  fields.push_back(new (mem_root)
+                   Item_empty_string(thd, "collation_connection",
+                                     MY_CS_COLLATION_NAME_SIZE),
+                   thd->mem_root);
+
+  fields.push_back(new (mem_root)
+                   Item_empty_string(thd, "Database Collation",
+                                     MY_CS_CHARACTER_SET_NAME_SIZE),
+                   thd->mem_root);
+
+  if (protocol->send_result_set_metadata(&fields,
+                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+  {
+    DBUG_RETURN(TRUE);
+  }
+
+  /* Send data. */
+
+  protocol->prepare_for_resend();
+
+  protocol->store(m_name.str, m_name.length, system_charset_info);
+  protocol->store(sql_mode.str, sql_mode.length, system_charset_info);
+
+  if (full_access)
+    protocol->store(m_defstr.str, m_defstr.length,
+                    m_creation_ctx->get_client_cs());
+  else
+    protocol->store_null();
+
+
+  protocol->store(&m_creation_ctx->get_client_cs()->cs_name,
+                  system_charset_info);
+  protocol->store(&m_creation_ctx->get_connection_cl()->coll_name,
+                  system_charset_info);
+  protocol->store(&m_creation_ctx->get_db_cl()->coll_name,
+                  system_charset_info);
+
+  err_status= protocol->write();
+
+  if (!err_status)
+    my_eof(thd);
+
+  DBUG_RETURN(err_status);
+}
+
+
+/**
+  Add instruction to SP.
+
+  @param instr   Instruction
+*/
+
+int sp_head::add_instr(sp_instr *instr)
+{
+  instr->free_list= m_thd->free_list;
+  m_thd->free_list= 0;
+  /*
+    Memory root of every instruction is designated for permanent
+    transformations (optimizations) made on the parsed tree during
+    the first execution. It points to the memory root of the
+    entire stored procedure, as their life span is equal.
+  */
+  instr->mem_root= &main_mem_root;
+  instr->m_lineno= m_thd->m_parser_state->m_lip.yylineno;
+  return insert_dynamic(&m_instr, (uchar*)&instr);
+}
+
+
+bool sp_head::add_instr_jump(THD *thd, sp_pcontext *spcont)
+{
+  sp_instr_jump *i= new (thd->mem_root) sp_instr_jump(instructions(), spcont);
+  return i == NULL || add_instr(i);
+}
+
+
+bool sp_head::add_instr_jump(THD *thd, sp_pcontext *spcont, uint dest)
+{
+  sp_instr_jump *i= new (thd->mem_root) sp_instr_jump(instructions(),
+                                                      spcont, dest);
+  return i == NULL || add_instr(i);
+}
+
+
+bool sp_head::add_instr_jump_forward_with_backpatch(THD *thd,
+                                                    sp_pcontext *spcont,
+                                                    sp_label *lab)
+{
+  sp_instr_jump  *i= new (thd->mem_root) sp_instr_jump(instructions(), spcont);
+  if (i == NULL || add_instr(i))
+    return true;
+  push_backpatch(thd, i, lab);
+  return false;
+}
+
+
+bool sp_head::add_instr_freturn(THD *thd, sp_pcontext *spcont,
+                                Item *item, LEX *lex)
+{
+  sp_instr_freturn *i= new (thd->mem_root)
+                       sp_instr_freturn(instructions(), spcont, item,
+                       m_return_field_def.type_handler(), lex);
+  if (i == NULL || add_instr(i))
+    return true;
+  m_flags|= sp_head::HAS_RETURN;
+  return false;
+}
+
+
+bool sp_head::add_instr_preturn(THD *thd, sp_pcontext *spcont)
+{
+  sp_instr_preturn *i= new (thd->mem_root)
+                       sp_instr_preturn(instructions(), spcont);
+  if (i == NULL || add_instr(i))
+    return true;
+  return false;
+}
+
+
+/*
+  Replace an instruction at position to "no operation".
+
+  @param thd - use mem_root of this THD for "new".
+  @param ip  - position of the operation
+  @returns   - true on error, false on success
+
+  When we need to remove an instruction that during compilation
+  appeared to be useless (typically as useless jump), we replace
+  it to a jump to exactly the next instruction.
+  Such jumps are later removed during sp_head::optimize().
+
+  QQ: Perhaps we need a dedicated sp_instr_nop for this purpose.
+*/
+
+bool sp_head::replace_instr_to_nop(THD *thd, uint ip)
+{
+  sp_instr *instr= get_instr(ip);
+  sp_instr_jump *nop= new (thd->mem_root) sp_instr_jump(instr->m_ip,
+                                                        instr->m_ctx,
+                                                        instr->m_ip + 1);
+  if (!nop)
+    return true;
+  delete instr;
+  set_dynamic(&m_instr, (uchar *) &nop, ip);
+  return false;
+}
+
+
+/**
+  Do some minimal optimization of the code:
+    -# Mark used instructions
+    -# While doing this, shortcut jumps to jump instructions
+    -# Compact the code, removing unused instructions.
+
+  This is the main mark and move loop; it relies on the following methods
+  in sp_instr and its subclasses:
+
+    - opt_mark()         :  Mark instruction as reachable
+    - opt_shortcut_jump():  Shortcut jumps to the final destination;
+                           used by opt_mark().
+    - opt_move()         :  Update moved instruction
+    - set_destination()  :  Set the new destination (jump instructions only)
+*/
+
+void sp_head::optimize()
+{
+  List bp;
+  sp_instr *i;
+  uint src, dst;
+
+  DBUG_EXECUTE_IF("sp_head_optimize_disable", return; );
+
+  opt_mark();
+
+  bp.empty();
+  src= dst= 0;
+  while ((i= get_instr(src)))
+  {
+    if (! i->marked)
+    {
+      delete i;
+      src+= 1;
+    }
+    else
+    {
+      if (src != dst)
+      {
+        /* Move the instruction and update prev. jumps */
+        sp_instr *ibp;
+        List_iterator_fast li(bp);
+
+        set_dynamic(&m_instr, (uchar*)&i, dst);
+        while ((ibp= li++))
+        {
+          sp_instr_opt_meta *im= static_cast(ibp);
+          im->set_destination(src, dst);
+        }
+      }
+      i->opt_move(dst, &bp);
+      src+= 1;
+      dst+= 1;
+    }
+  }
+  m_instr.elements= dst;
+  bp.empty();
+}
+
+void sp_head::add_mark_lead(uint ip, List *leads)
+{
+  sp_instr *i= get_instr(ip);
+
+  if (i && ! i->marked)
+    leads->push_front(i);
+}
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+
+int sp_head::has_all_instrs_executed()
+{
+  sp_instr *ip;
+  uint count= 0;
+
+  for (uint i= 0; i < m_instr.elements; ++i)
+  {
+    get_dynamic(&m_instr, (uchar*)&ip, i);
+    if (ip->has_been_run())
+      ++count;
+  }
+
+  return count == m_instr.elements;
+}
+
+
+void sp_head::reset_instrs_executed_counter()
+{
+  sp_instr *ip;
+
+  for (uint i= 0; i < m_instr.elements; ++i)
+  {
+    get_dynamic(&m_instr, (uchar*)&ip, i);
+    ip->mark_as_not_run();
+  }
+}
+
+#endif
+
+void
+sp_head::opt_mark()
+{
+  uint ip;
+  sp_instr *i;
+  List leads;
+
+  /*
+    Forward flow analysis algorithm in the instruction graph:
+    - first, add the entry point in the graph (the first instruction) to the
+      'leads' list of paths to explore.
+    - while there are still leads to explore:
+      - pick one lead, and follow the path forward. Mark instruction reached.
+        Stop only if the end of the routine is reached, or the path converge
+        to code already explored (marked).
+      - while following a path, collect in the 'leads' list any fork to
+        another path (caused by conditional jumps instructions), so that these
+        paths can be explored as well.
+  */
+
+  /* Add the entry point */
+  i= get_instr(0);
+  leads.push_front(i);
+
+  /* For each path of code ... */
+  while (leads.elements != 0)
+  {
+    i= leads.pop();
+
+    /* Mark the entire path, collecting new leads. */
+    while (i && ! i->marked)
+    {
+      ip= i->opt_mark(this, & leads);
+      i= get_instr(ip);
+    }
+  }
+}
+
+
+#ifndef DBUG_OFF
+/**
+  Return the routine instructions as a result set.
+  @return
+    0 if ok, !=0 on error.
+*/
+
+int
+sp_head::show_routine_code(THD *thd)
+{
+  Protocol *protocol= thd->protocol;
+  char buff[2048];
+  String buffer(buff, sizeof(buff), system_charset_info);
+  List field_list;
+  sp_instr *i;
+  bool full_access;
+  int res= 0;
+  uint ip;
+  DBUG_ENTER("sp_head::show_routine_code");
+  DBUG_PRINT("info", ("procedure: %s", m_name.str));
+
+  if (check_show_routine_access(thd, this, &full_access) || !full_access)
+    DBUG_RETURN(1);
+
+  field_list.push_back(new (thd->mem_root) Item_uint(thd, "Pos", 9),
+                       thd->mem_root);
+  // 1024 is for not to confuse old clients
+  field_list.push_back(new (thd->mem_root)
+                       Item_empty_string(thd, "Instruction",
+                                         MY_MAX(buffer.length(), 1024)),
+                       thd->mem_root);
+  if (protocol->send_result_set_metadata(&field_list, Protocol::SEND_NUM_ROWS |
+                                         Protocol::SEND_EOF))
+    DBUG_RETURN(1);
+
+  for (ip= 0; (i = get_instr(ip)) ; ip++)
+  {
+    /*
+      Consistency check. If these are different something went wrong
+      during optimization.
+    */
+    if (ip != i->m_ip)
+    {
+      const char *format= "Instruction at position %u has m_ip=%u";
+      char tmp[sizeof(format) + 2*SP_INSTR_UINT_MAXLEN + 1];
+
+      my_snprintf(tmp, sizeof(tmp), format, ip, i->m_ip);
+      /*
+        Since this is for debugging purposes only, we don't bother to
+        introduce a special error code for it.
+      */
+      push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, tmp);
+    }
+    protocol->prepare_for_resend();
+    protocol->store_long(ip);
+
+    buffer.set("", 0, system_charset_info);
+    i->print(&buffer);
+    protocol->store(buffer.ptr(), buffer.length(), system_charset_info);
+    if ((res= protocol->write()))
+      break;
+  }
+
+  if (!res)
+    my_eof(thd);
+
+  DBUG_RETURN(res);
+}
+#endif // ifndef DBUG_OFF
+
+
+/**
+  Prepare LEX and thread for execution of instruction, if requested open
+  and lock LEX's tables, execute instruction's core function, perform
+  cleanup afterwards.
+
+  @param thd           thread context
+  @param nextp         out - next instruction
+  @param open_tables   if TRUE then check read access to tables in LEX's table
+                       list and open and lock them (used in instructions which
+                       need to calculate some expression and don't execute
+                       complete statement).
+  @param sp_instr      instruction for which we prepare context, and which core
+                       function execute by calling its exec_core() method.
+
+  @note
+    We are not saving/restoring some parts of THD which may need this because
+    we do this once for whole routine execution in sp_head::execute().
+
+  @return
+    0/non-0 - Success/Failure
+*/
+
+int
+sp_lex_keeper::reset_lex_and_exec_core(THD *thd, uint *nextp,
+                                       bool open_tables, sp_instr* instr)
+{
+  int res= 0;
+  DBUG_ENTER("reset_lex_and_exec_core");
+
+  /*
+    The flag is saved at the entry to the following substatement.
+    It's reset further in the common code part.
+    It's merged with the saved parent's value at the exit of this func.
+  */
+  bool parent_modified_non_trans_table=
+    thd->transaction->stmt.modified_non_trans_table;
+  unsigned int parent_unsafe_rollback_flags=
+    thd->transaction->stmt.m_unsafe_rollback_flags;
+  thd->transaction->stmt.modified_non_trans_table= FALSE;
+  thd->transaction->stmt.m_unsafe_rollback_flags= 0;
+
+  DBUG_ASSERT(!thd->derived_tables);
+  DBUG_ASSERT(thd->Item_change_list::is_empty());
+  /*
+    Use our own lex.
+    We should not save old value since it is saved/restored in
+    sp_head::execute() when we are entering/leaving routine.
+  */
+  thd->lex= m_lex;
+
+  thd->set_query_id(next_query_id());
+
+  if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
+  {
+    /*
+      This statement will enter/leave prelocked mode on its own.
+      Entering prelocked mode changes table list and related members
+      of LEX, so we'll need to restore them.
+    */
+    if (lex_query_tables_own_last)
+    {
+      /*
+        We've already entered/left prelocked mode with this statement.
+        Attach the list of tables that need to be prelocked and mark m_lex
+        as having such list attached.
+      */
+      *lex_query_tables_own_last= prelocking_tables;
+      m_lex->mark_as_requiring_prelocking(lex_query_tables_own_last);
+    }
+  }
+
+  reinit_stmt_before_use(thd, m_lex);
+
+#ifndef EMBEDDED_LIBRARY
+  /*
+    If there was instruction which changed tracking state,
+    the result of changed tracking state send to client in OK packed.
+    So it changes result sent to client and probably can be different
+    independent on query text. So we can't cache such results.
+  */
+  if ((thd->client_capabilities & CLIENT_SESSION_TRACK) &&
+      (thd->server_status & SERVER_SESSION_STATE_CHANGED))
+    thd->lex->safe_to_cache_query= 0;
+#endif
+
+  Opt_trace_start ots(thd);
+  ots.init(thd, m_lex->query_tables, SQLCOM_SELECT, &m_lex->var_list,
+           NULL, 0, thd->variables.character_set_client);
+
+  Json_writer_object trace_command(thd);
+  Json_writer_array trace_command_steps(thd, "steps");
+  if (open_tables)
+    res= instr->exec_open_and_lock_tables(thd, m_lex->query_tables);
+
+  if (likely(!res))
+  {
+    res= instr->exec_core(thd, nextp);
+    DBUG_PRINT("info",("exec_core returned: %d", res));
+  }
+
+  /*
+    Call after unit->cleanup() to close open table
+    key read.
+  */
+  if (open_tables)
+  {
+    m_lex->unit.cleanup();
+    /* Here we also commit or rollback the current statement. */
+    if (! thd->in_sub_stmt)
+    {
+      thd->get_stmt_da()->set_overwrite_status(true);
+      thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd);
+      thd->get_stmt_da()->set_overwrite_status(false);
+    }
+    close_thread_tables(thd);
+    thd_proc_info(thd, 0);
+
+    if (! thd->in_sub_stmt)
+    {
+      if (thd->transaction_rollback_request)
+      {
+        trans_rollback_implicit(thd);
+        thd->release_transactional_locks();
+      }
+      else if (! thd->in_multi_stmt_transaction_mode())
+        thd->release_transactional_locks();
+      else
+        thd->mdl_context.release_statement_locks();
+    }
+  }
+  //TODO: why is this here if log_slow_query is in sp_instr_stmt::execute?
+  delete_explain_query(m_lex);
+
+  if (m_lex->query_tables_own_last)
+  {
+    /*
+      We've entered and left prelocking mode when executing statement
+      stored in m_lex. 
+      m_lex->query_tables(->next_global)* list now has a 'tail' - a list
+      of tables that are added for prelocking. (If this is the first
+      execution, the 'tail' was added by open_tables(), otherwise we've
+      attached it above in this function).
+      Now we'll save the 'tail', and detach it.
+    */
+    lex_query_tables_own_last= m_lex->query_tables_own_last;
+    prelocking_tables= *lex_query_tables_own_last;
+    *lex_query_tables_own_last= NULL;
+    m_lex->query_tables_last= m_lex->query_tables_own_last;
+    m_lex->mark_as_requiring_prelocking(NULL);
+  }
+  thd->rollback_item_tree_changes();
+  /*
+    Update the state of the active arena if no errors on
+    open_tables stage.
+  */
+  if (likely(!res) || likely(!thd->is_error()))
+    thd->stmt_arena->state= Query_arena::STMT_EXECUTED;
+
+  /*
+    Merge here with the saved parent's values
+    what is needed from the substatement gained
+  */
+  thd->transaction->stmt.modified_non_trans_table |= parent_modified_non_trans_table;
+  thd->transaction->stmt.m_unsafe_rollback_flags |= parent_unsafe_rollback_flags;
+
+  TRANSACT_TRACKER(add_trx_state_from_thd(thd));
+
+  /*
+    Unlike for PS we should not call Item's destructors for newly created
+    items after execution of each instruction in stored routine. This is
+    because SP often create Item (like Item_int, Item_string etc...) when
+    they want to store some value in local variable, pass return value and
+    etc... So their life time should be longer than one instruction.
+
+    cleanup_items() is called in sp_head::execute()
+  */
+  thd->lex->restore_set_statement_var();
+  DBUG_RETURN(res || thd->is_error());
+}
+
+
+int sp_lex_keeper::cursor_reset_lex_and_exec_core(THD *thd, uint *nextp,
+                                                  bool open_tables,
+                                                  sp_instr *instr)
+{
+  Query_arena *old_arena= thd->stmt_arena;
+  /*
+    Get the Query_arena from the cursor statement LEX, which contains
+    the free_list of the query, so new items (if any) are stored in
+    the right free_list, and we can cleanup after each cursor operation,
+    e.g. open or cursor_copy_struct (for cursor%ROWTYPE variables).
+  */
+  thd->stmt_arena= m_lex->query_arena();
+  int res= reset_lex_and_exec_core(thd, nextp, open_tables, instr);
+  cleanup_items(thd->stmt_arena->free_list);
+  thd->stmt_arena= old_arena;
+  return res;
+}
+
+
+/*
+  sp_instr class functions
+*/
+
+int sp_instr::exec_open_and_lock_tables(THD *thd, TABLE_LIST *tables)
+{
+  int result;
+
+  /*
+    Check whenever we have access to tables for this statement
+    and open and lock them before executing instructions core function.
+  */
+  if (thd->open_temporary_tables(tables) ||
+      check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)
+      || open_and_lock_tables(thd, tables, TRUE, 0))
+    result= -1;
+  else
+    result= 0;
+  /* Prepare all derived tables/views to catch possible errors. */
+  if (!result)
+    result= mysql_handle_derived(thd->lex, DT_PREPARE) ? -1 : 0;
+
+  return result;
+}
+
+uint sp_instr::get_cont_dest() const
+{
+  return (m_ip+1);
+}
+
+
+int sp_instr::exec_core(THD *thd, uint *nextp)
+{
+  DBUG_ASSERT(0);
+  return 0;
+}
+
+/*
+  sp_instr_stmt class functions
+*/
+
+PSI_statement_info sp_instr_stmt::psi_info=
+{ 0, "stmt", 0};
+
+int
+sp_instr_stmt::execute(THD *thd, uint *nextp)
+{
+  int res;
+  bool save_enable_slow_log;
+  const CSET_STRING query_backup= thd->query_string;
+  Sub_statement_state backup_state;
+  DBUG_ENTER("sp_instr_stmt::execute");
+  DBUG_PRINT("info", ("command: %d", m_lex_keeper.sql_command()));
+
+  MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, m_query.str, static_cast(m_query.length));
+
+#if defined(ENABLED_PROFILING)
+  /* This s-p instr is profilable and will be captured. */
+  thd->profiling.set_query_source(m_query.str, m_query.length);
+#endif
+
+  save_enable_slow_log= thd->enable_slow_log;
+  thd->store_slow_query_state(&backup_state);
+
+  if (!(res= alloc_query(thd, m_query.str, m_query.length)) &&
+      !(res=subst_spvars(thd, this, &m_query)))
+  {
+    /*
+      (the order of query cache and subst_spvars calls is irrelevant because
+      queries with SP vars can't be cached)
+    */
+    general_log_write(thd, COM_QUERY, thd->query(), thd->query_length());
+
+    if (query_cache_send_result_to_client(thd, thd->query(),
+                                          thd->query_length()) <= 0)
+    {
+      thd->reset_slow_query_state();
+      res= m_lex_keeper.reset_lex_and_exec_core(thd, nextp, FALSE, this);
+      bool log_slow= !res && thd->enable_slow_log;
+
+      /* Finalize server status flags after executing a statement. */
+      if (log_slow || thd->get_stmt_da()->is_eof())
+        thd->update_server_status();
+
+      if (thd->get_stmt_da()->is_eof())
+        thd->protocol->end_statement();
+
+      query_cache_end_of_result(thd);
+
+      mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_STATUS,
+                          thd->get_stmt_da()->is_error() ?
+                                 thd->get_stmt_da()->sql_errno() : 0,
+                          command_name[COM_QUERY].str);
+
+      if (log_slow)
+        log_slow_statement(thd);
+
+      /*
+        Restore enable_slow_log, that can be changed by a admin or call
+        command
+      */
+      thd->enable_slow_log= save_enable_slow_log;
+
+      /* Add the number of rows to thd for the 'call' statistics */
+      thd->add_slow_query_state(&backup_state);
+    }
+    else
+    {
+      /* change statistics */
+      enum_sql_command save_sql_command= thd->lex->sql_command;
+      thd->lex->sql_command= SQLCOM_SELECT;
+      status_var_increment(thd->status_var.com_stat[SQLCOM_SELECT]);
+      thd->update_stats();
+      thd->lex->sql_command= save_sql_command;
+      *nextp= m_ip+1;
+    }
+    thd->set_query(query_backup);
+    thd->query_name_consts= 0;
+
+    if (likely(!thd->is_error()))
+    {
+      res= 0;
+      thd->get_stmt_da()->reset_diagnostics_area();
+    }
+  }
+
+  DBUG_RETURN(res || thd->is_error());
+}
+
+
+void
+sp_instr_stmt::print(String *str)
+{
+  size_t i, len;
+
+  /* stmt CMD "..." */
+  if (str->reserve(SP_STMT_PRINT_MAXLEN+SP_INSTR_UINT_MAXLEN+8))
+    return;
+  str->qs_append(STRING_WITH_LEN("stmt "));
+  str->qs_append((uint)m_lex_keeper.sql_command());
+  str->qs_append(STRING_WITH_LEN(" \""));
+  len= m_query.length;
+  /*
+    Print the query string (but not too much of it), just to indicate which
+    statement it is.
+  */
+  if (len > SP_STMT_PRINT_MAXLEN)
+    len= SP_STMT_PRINT_MAXLEN-3;
+  /* Copy the query string and replace '\n' with ' ' in the process */
+  for (i= 0 ; i < len ; i++)
+  {
+    char c= m_query.str[i];
+    if (c == '\n')
+      c= ' ';
+    str->qs_append(c);
+  }
+  if (m_query.length > SP_STMT_PRINT_MAXLEN)
+    str->qs_append(STRING_WITH_LEN("...")); /* Indicate truncated string */
+  str->qs_append('"');
+}
+
+
+int
+sp_instr_stmt::exec_core(THD *thd, uint *nextp)
+{
+  MYSQL_QUERY_EXEC_START(thd->query(),
+                         thd->thread_id,
+                         thd->get_db(),
+                         &thd->security_ctx->priv_user[0],
+                         (char *)thd->security_ctx->host_or_ip,
+                         3);
+  int res= mysql_execute_command(thd);
+  MYSQL_QUERY_EXEC_DONE(res);
+  *nextp= m_ip+1;
+  return res;
+}
+
+
+/*
+  sp_instr_set class functions
+*/
+
+PSI_statement_info sp_instr_set::psi_info=
+{ 0, "set", 0};
+
+int
+sp_instr_set::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_set::execute");
+  DBUG_PRINT("info", ("offset: %u", m_offset));
+
+  DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
+}
+
+
+sp_rcontext *sp_instr_set::get_rcontext(THD *thd) const
+{
+  return m_rcontext_handler->get_rcontext(thd->spcont);
+}
+
+
+int
+sp_instr_set::exec_core(THD *thd, uint *nextp)
+{
+  int res= get_rcontext(thd)->set_variable(thd, m_offset, &m_value);
+  *nextp = m_ip+1;
+  return res;
+}
+
+void
+sp_instr_set::print(String *str)
+{
+  /* set name@offset ... */
+  size_t rsrv = SP_INSTR_UINT_MAXLEN+6;
+  sp_variable *var = m_ctx->find_variable(m_offset);
+  const LEX_CSTRING *prefix= m_rcontext_handler->get_name_prefix();
+
+  /* 'var' should always be non-null, but just in case... */
+  if (var)
+    rsrv+= var->name.length + prefix->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("set "));
+  str->qs_append(prefix->str, prefix->length);
+  if (var)
+  {
+    str->qs_append(&var->name);
+    str->qs_append('@');
+  }
+  str->qs_append(m_offset);
+  str->qs_append(' ');
+  m_value->print(str, enum_query_type(QT_ORDINARY |
+                                      QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+
+/*
+  sp_instr_set_field class functions
+*/
+
+int
+sp_instr_set_row_field::exec_core(THD *thd, uint *nextp)
+{
+  int res= get_rcontext(thd)->set_variable_row_field(thd, m_offset,
+                                                     m_field_offset,
+                                                     &m_value);
+  *nextp= m_ip + 1;
+  return res;
+}
+
+
+void
+sp_instr_set_row_field::print(String *str)
+{
+  /* set name@offset[field_offset] ... */
+  size_t rsrv= SP_INSTR_UINT_MAXLEN + 6 + 6 + 3;
+  sp_variable *var= m_ctx->find_variable(m_offset);
+  const LEX_CSTRING *prefix= m_rcontext_handler->get_name_prefix();
+  DBUG_ASSERT(var);
+  DBUG_ASSERT(var->field_def.is_row());
+  const Column_definition *def=
+    var->field_def.row_field_definitions()->elem(m_field_offset);
+  DBUG_ASSERT(def);
+
+  rsrv+= var->name.length + def->field_name.length + prefix->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("set "));
+  str->qs_append(prefix);
+  str->qs_append(&var->name);
+  str->qs_append('.');
+  str->qs_append(&def->field_name);
+  str->qs_append('@');
+  str->qs_append(m_offset);
+  str->qs_append('[');
+  str->qs_append(m_field_offset);
+  str->qs_append(']');
+  str->qs_append(' ');
+  m_value->print(str, enum_query_type(QT_ORDINARY |
+                                      QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+
+/*
+  sp_instr_set_field_by_name class functions
+*/
+
+int
+sp_instr_set_row_field_by_name::exec_core(THD *thd, uint *nextp)
+{
+  int res= get_rcontext(thd)->set_variable_row_field_by_name(thd, m_offset,
+                                                             m_field_name,
+                                                             &m_value);
+  *nextp= m_ip + 1;
+  return res;
+}
+
+
+void
+sp_instr_set_row_field_by_name::print(String *str)
+{
+  /* set name.field@offset["field"] ... */
+  size_t rsrv= SP_INSTR_UINT_MAXLEN + 6 + 6 + 3 + 2;
+  sp_variable *var= m_ctx->find_variable(m_offset);
+  const LEX_CSTRING *prefix= m_rcontext_handler->get_name_prefix();
+  DBUG_ASSERT(var);
+  DBUG_ASSERT(var->field_def.is_table_rowtype_ref() ||
+              var->field_def.is_cursor_rowtype_ref());
+
+  rsrv+= var->name.length + 2 * m_field_name.length + prefix->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("set "));
+  str->qs_append(prefix);
+  str->qs_append(&var->name);
+  str->qs_append('.');
+  str->qs_append(&m_field_name);
+  str->qs_append('@');
+  str->qs_append(m_offset);
+  str->qs_append("[\"",2);
+  str->qs_append(&m_field_name);
+  str->qs_append("\"]",2);
+  str->qs_append(' ');
+  m_value->print(str, enum_query_type(QT_ORDINARY |
+                                      QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+
+/*
+  sp_instr_set_trigger_field class functions
+*/
+
+PSI_statement_info sp_instr_set_trigger_field::psi_info=
+{ 0, "set_trigger_field", 0};
+
+int
+sp_instr_set_trigger_field::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_set_trigger_field::execute");
+  thd->count_cuted_fields= CHECK_FIELD_ERROR_FOR_NULL;
+  DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
+}
+
+
+int
+sp_instr_set_trigger_field::exec_core(THD *thd, uint *nextp)
+{
+  Abort_on_warning_instant_set aws(thd, thd->is_strict_mode() && !thd->lex->ignore);
+  const int res= (trigger_field->set_value(thd, &value) ? -1 : 0);
+  *nextp = m_ip+1;
+  return res;
+}
+
+void
+sp_instr_set_trigger_field::print(String *str)
+{
+  str->append(STRING_WITH_LEN("set_trigger_field "));
+  trigger_field->print(str, enum_query_type(QT_ORDINARY |
+                                            QT_ITEM_ORIGINAL_FUNC_NULLIF));
+  str->append(STRING_WITH_LEN(":="));
+  value->print(str, enum_query_type(QT_ORDINARY |
+                                    QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+/*
+  sp_instr_opt_meta
+*/
+
+uint sp_instr_opt_meta::get_cont_dest() const
+{
+  return m_cont_dest;
+}
+
+
+/*
+ sp_instr_jump class functions
+*/
+
+PSI_statement_info sp_instr_jump::psi_info=
+{ 0, "jump", 0};
+
+int
+sp_instr_jump::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_jump::execute");
+  DBUG_PRINT("info", ("destination: %u", m_dest));
+
+  *nextp= m_dest;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_jump::print(String *str)
+{
+  /* jump dest */
+  if (str->reserve(SP_INSTR_UINT_MAXLEN+5))
+    return;
+  str->qs_append(STRING_WITH_LEN("jump "));
+  str->qs_append(m_dest);
+}
+
+uint
+sp_instr_jump::opt_mark(sp_head *sp, List *leads)
+{
+  m_dest= opt_shortcut_jump(sp, this);
+  if (m_dest != m_ip+1)   /* Jumping to following instruction? */
+    marked= 1;
+  m_optdest= sp->get_instr(m_dest);
+  return m_dest;
+}
+
+uint
+sp_instr_jump::opt_shortcut_jump(sp_head *sp, sp_instr *start)
+{
+  uint dest= m_dest;
+  sp_instr *i;
+
+  while ((i= sp->get_instr(dest)))
+  {
+    uint ndest;
+
+    if (start == i || this == i)
+      break;
+    ndest= i->opt_shortcut_jump(sp, start);
+    if (ndest == dest)
+      break;
+    dest= ndest;
+  }
+  return dest;
+}
+
+void
+sp_instr_jump::opt_move(uint dst, List *bp)
+{
+  if (m_dest > m_ip)
+    bp->push_back(this);      // Forward
+  else if (m_optdest)
+    m_dest= m_optdest->m_ip;  // Backward
+  m_ip= dst;
+}
+
+
+/*
+  sp_instr_jump_if_not class functions
+*/
+
+PSI_statement_info sp_instr_jump_if_not::psi_info=
+{ 0, "jump_if_not", 0};
+
+int
+sp_instr_jump_if_not::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_jump_if_not::execute");
+  DBUG_PRINT("info", ("destination: %u", m_dest));
+  DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
+}
+
+
+int
+sp_instr_jump_if_not::exec_core(THD *thd, uint *nextp)
+{
+  Item *it;
+  int res;
+
+  it= thd->sp_prepare_func_item(&m_expr, 1);
+  if (! it)
+  {
+    res= -1;
+  }
+  else
+  {
+    res= 0;
+    if (! it->val_bool())
+      *nextp = m_dest;
+    else
+      *nextp = m_ip+1;
+  }
+
+  return res;
+}
+
+
+void
+sp_instr_jump_if_not::print(String *str)
+{
+  /* jump_if_not dest(cont) ... */
+  if (str->reserve(2*SP_INSTR_UINT_MAXLEN+14+32)) // Add some for the expr. too
+    return;
+  str->qs_append(STRING_WITH_LEN("jump_if_not "));
+  str->qs_append(m_dest);
+  str->qs_append('(');
+  str->qs_append(m_cont_dest);
+  str->qs_append(STRING_WITH_LEN(") "));
+  m_expr->print(str, enum_query_type(QT_ORDINARY |
+                                     QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+
+uint
+sp_instr_jump_if_not::opt_mark(sp_head *sp, List *leads)
+{
+  sp_instr *i;
+
+  marked= 1;
+  if ((i= sp->get_instr(m_dest)))
+  {
+    m_dest= i->opt_shortcut_jump(sp, this);
+    m_optdest= sp->get_instr(m_dest);
+  }
+  sp->add_mark_lead(m_dest, leads);
+  if ((i= sp->get_instr(m_cont_dest)))
+  {
+    m_cont_dest= i->opt_shortcut_jump(sp, this);
+    m_cont_optdest= sp->get_instr(m_cont_dest);
+  }
+  sp->add_mark_lead(m_cont_dest, leads);
+  return m_ip+1;
+}
+
+void
+sp_instr_jump_if_not::opt_move(uint dst, List *bp)
+{
+  /*
+    cont. destinations may point backwards after shortcutting jumps
+    during the mark phase. If it's still pointing forwards, only
+    push this for backpatching if sp_instr_jump::opt_move() will not
+    do it (i.e. if the m_dest points backwards).
+   */
+  if (m_cont_dest > m_ip)
+  {                             // Forward
+    if (m_dest < m_ip)
+      bp->push_back(this);
+  }
+  else if (m_cont_optdest)
+    m_cont_dest= m_cont_optdest->m_ip; // Backward
+  /* This will take care of m_dest and m_ip */
+  sp_instr_jump::opt_move(dst, bp);
+}
+
+
+/*
+  sp_instr_freturn class functions
+*/
+
+PSI_statement_info sp_instr_freturn::psi_info=
+{ 0, "freturn", 0};
+
+int
+sp_instr_freturn::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_freturn::execute");
+  DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
+}
+
+
+int
+sp_instr_freturn::exec_core(THD *thd, uint *nextp)
+{
+  /*
+    RETURN is a "procedure statement" (in terms of the SQL standard).
+    That means, Diagnostics Area should be clean before its execution.
+  */
+
+  if (!(thd->variables.sql_mode & MODE_ORACLE))
+  {
+    /*
+      Don't clean warnings in ORACLE mode,
+      as they are needed for SQLCODE and SQLERRM:
+        BEGIN
+          SELECT a INTO a FROM t1;
+          RETURN 'No exception ' || SQLCODE || ' ' || SQLERRM;
+        EXCEPTION WHEN NO_DATA_FOUND THEN
+          RETURN 'Exception ' || SQLCODE || ' ' || SQLERRM;
+        END;
+    */
+    Diagnostics_area *da= thd->get_stmt_da();
+    da->clear_warning_info(da->warning_info_id());
+  }
+
+  /*
+    Change , so that this will be the last
+    instruction in the stored function.
+  */
+
+  *nextp= UINT_MAX;
+
+  /*
+    Evaluate the value of return expression and store it in current runtime
+    context.
+
+    NOTE: It's necessary to evaluate result item right here, because we must
+    do it in scope of execution the current context/block.
+  */
+
+  return thd->spcont->set_return_value(thd, &m_value);
+}
+
+void
+sp_instr_freturn::print(String *str)
+{
+  /* freturn type expr... */
+  if (str->reserve(1024+8+32)) // Add some for the expr. too
+    return;
+  str->qs_append(STRING_WITH_LEN("freturn "));
+  LEX_CSTRING name= m_type_handler->name().lex_cstring();
+  str->qs_append(&name);
+  str->qs_append(' ');
+  m_value->print(str, enum_query_type(QT_ORDINARY |
+                                      QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+/*
+  sp_instr_preturn class functions
+*/
+
+PSI_statement_info sp_instr_preturn::psi_info=
+{ 0, "preturn", 0};
+
+int
+sp_instr_preturn::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_preturn::execute");
+  *nextp= UINT_MAX;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_preturn::print(String *str)
+{
+  str->append(STRING_WITH_LEN("preturn"));
+}
+
+/*
+  sp_instr_hpush_jump class functions
+*/
+
+PSI_statement_info sp_instr_hpush_jump::psi_info=
+{ 0, "hpush_jump", 0};
+
+int
+sp_instr_hpush_jump::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_hpush_jump::execute");
+
+  int ret= thd->spcont->push_handler(this);
+
+  *nextp= m_dest;
+
+  DBUG_RETURN(ret);
+}
+
+
+void
+sp_instr_hpush_jump::print(String *str)
+{
+  /* hpush_jump dest fsize type */
+  if (str->reserve(SP_INSTR_UINT_MAXLEN*2 + 21))
+    return;
+
+  str->qs_append(STRING_WITH_LEN("hpush_jump "));
+  str->qs_append(m_dest);
+  str->qs_append(' ');
+  str->qs_append(m_frame);
+
+  switch (m_handler->type) {
+  case sp_handler::EXIT:
+    str->qs_append(STRING_WITH_LEN(" EXIT"));
+    break;
+  case sp_handler::CONTINUE:
+    str->qs_append(STRING_WITH_LEN(" CONTINUE"));
+    break;
+  default:
+    // The handler type must be either CONTINUE or EXIT.
+    DBUG_ASSERT(0);
+  }
+}
+
+
+uint
+sp_instr_hpush_jump::opt_mark(sp_head *sp, List *leads)
+{
+  sp_instr *i;
+
+  marked= 1;
+  if ((i= sp->get_instr(m_dest)))
+  {
+    m_dest= i->opt_shortcut_jump(sp, this);
+    m_optdest= sp->get_instr(m_dest);
+  }
+  sp->add_mark_lead(m_dest, leads);
+
+  /*
+    For continue handlers, all instructions in the scope of the handler
+    are possible leads. For example, the instruction after freturn might
+    be executed if the freturn triggers the condition handled by the
+    continue handler.
+
+    m_dest marks the start of the handler scope. It's added as a lead
+    above, so we start on m_dest+1 here.
+    m_opt_hpop is the hpop marking the end of the handler scope.
+  */
+  if (m_handler->type == sp_handler::CONTINUE)
+  {
+    for (uint scope_ip= m_dest+1; scope_ip <= m_opt_hpop; scope_ip++)
+      sp->add_mark_lead(scope_ip, leads);
+  }
+
+  return m_ip+1;
+}
+
+
+/*
+  sp_instr_hpop class functions
+*/
+
+PSI_statement_info sp_instr_hpop::psi_info=
+{ 0, "hpop", 0};
+
+int
+sp_instr_hpop::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_hpop::execute");
+  thd->spcont->pop_handlers(m_count);
+  *nextp= m_ip+1;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_hpop::print(String *str)
+{
+  /* hpop count */
+  if (str->reserve(SP_INSTR_UINT_MAXLEN+5))
+    return;
+  str->qs_append(STRING_WITH_LEN("hpop "));
+  str->qs_append(m_count);
+}
+
+
+/*
+  sp_instr_hreturn class functions
+*/
+
+PSI_statement_info sp_instr_hreturn::psi_info=
+{ 0, "hreturn", 0};
+
+int
+sp_instr_hreturn::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_hreturn::execute");
+
+  uint continue_ip= thd->spcont->exit_handler(thd->get_stmt_da());
+
+  *nextp= m_dest ? m_dest : continue_ip;
+
+  DBUG_RETURN(0);
+}
+
+
+void
+sp_instr_hreturn::print(String *str)
+{
+  /* hreturn framesize dest */
+  if (str->reserve(SP_INSTR_UINT_MAXLEN*2 + 9))
+    return;
+  str->qs_append(STRING_WITH_LEN("hreturn "));
+  if (m_dest)
+  {
+    // NOTE: this is legacy: hreturn instruction for EXIT handler
+    // should print out 0 as frame index.
+    str->qs_append(STRING_WITH_LEN("0 "));
+    str->qs_append(m_dest);
+  }
+  else
+  {
+    str->qs_append(m_frame);
+  }
+}
+
+
+uint
+sp_instr_hreturn::opt_mark(sp_head *sp, List *leads)
+{
+  marked= 1;
+
+  if (m_dest)
+  {
+    /*
+      This is an EXIT handler; next instruction step is in m_dest.
+     */
+    return m_dest;
+  }
+
+  /*
+    This is a CONTINUE handler; next instruction step will come from
+    the handler stack and not from opt_mark.
+   */
+  return UINT_MAX;
+}
+
+
+/*
+  sp_instr_cpush class functions
+*/
+
+PSI_statement_info sp_instr_cpush::psi_info=
+{ 0, "cpush", 0};
+
+int
+sp_instr_cpush::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_cpush::execute");
+
+  sp_cursor::reset(thd, &m_lex_keeper);
+  m_lex_keeper.disable_query_cache();
+  thd->spcont->push_cursor(this);
+
+  *nextp= m_ip+1;
+
+  DBUG_RETURN(false);
+}
+
+
+void
+sp_instr_cpush::print(String *str)
+{
+  const LEX_CSTRING *cursor_name= m_ctx->find_cursor(m_cursor);
+
+  /* cpush name@offset */
+  size_t rsrv= SP_INSTR_UINT_MAXLEN+7;
+
+  if (cursor_name)
+    rsrv+= cursor_name->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("cpush "));
+  if (cursor_name)
+  {
+    str->qs_append(cursor_name->str, cursor_name->length);
+    str->qs_append('@');
+  }
+  str->qs_append(m_cursor);
+}
+
+
+/*
+  sp_instr_cpop class functions
+*/
+
+PSI_statement_info sp_instr_cpop::psi_info=
+{ 0, "cpop", 0};
+
+int
+sp_instr_cpop::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_cpop::execute");
+  thd->spcont->pop_cursors(thd, m_count);
+  *nextp= m_ip+1;
+  DBUG_RETURN(0);
+}
+
+
+void
+sp_instr_cpop::print(String *str)
+{
+  /* cpop count */
+  if (str->reserve(SP_INSTR_UINT_MAXLEN+5))
+    return;
+  str->qs_append(STRING_WITH_LEN("cpop "));
+  str->qs_append(m_count);
+}
+
+
+/*
+  sp_instr_copen class functions
+*/
+
+/**
+  @todo
+    Assert that we either have an error or a cursor
+*/
+
+PSI_statement_info sp_instr_copen::psi_info=
+{ 0, "copen", 0};
+
+int
+sp_instr_copen::execute(THD *thd, uint *nextp)
+{
+  /*
+    We don't store a pointer to the cursor in the instruction to be
+    able to reuse the same instruction among different threads in future.
+  */
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res;
+  DBUG_ENTER("sp_instr_copen::execute");
+
+  if (! c)
+    res= -1;
+  else
+  {
+    sp_lex_keeper *lex_keeper= c->get_lex_keeper();
+    res= lex_keeper->cursor_reset_lex_and_exec_core(thd, nextp, FALSE, this);
+    /* TODO: Assert here that we either have an error or a cursor */
+  }
+  DBUG_RETURN(res);
+}
+
+
+int
+sp_instr_copen::exec_core(THD *thd, uint *nextp)
+{
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res= c->open(thd);
+  *nextp= m_ip+1;
+  return res;
+}
+
+void
+sp_instr_copen::print(String *str)
+{
+  const LEX_CSTRING *cursor_name= m_ctx->find_cursor(m_cursor);
+
+  /* copen name@offset */
+  size_t rsrv= SP_INSTR_UINT_MAXLEN+7;
+
+  if (cursor_name)
+    rsrv+= cursor_name->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("copen "));
+  if (cursor_name)
+  {
+    str->qs_append(cursor_name->str, cursor_name->length);
+    str->qs_append('@');
+  }
+  str->qs_append(m_cursor);
+}
+
+
+/*
+  sp_instr_cclose class functions
+*/
+
+PSI_statement_info sp_instr_cclose::psi_info=
+{ 0, "cclose", 0};
+
+int
+sp_instr_cclose::execute(THD *thd, uint *nextp)
+{
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res;
+  DBUG_ENTER("sp_instr_cclose::execute");
+
+  if (! c)
+    res= -1;
+  else
+    res= c->close(thd);
+  *nextp= m_ip+1;
+  DBUG_RETURN(res);
+}
+
+
+void
+sp_instr_cclose::print(String *str)
+{
+  const LEX_CSTRING *cursor_name= m_ctx->find_cursor(m_cursor);
+
+  /* cclose name@offset */
+  size_t rsrv= SP_INSTR_UINT_MAXLEN+8;
+
+  if (cursor_name)
+    rsrv+= cursor_name->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("cclose "));
+  if (cursor_name)
+  {
+    str->qs_append(cursor_name->str, cursor_name->length);
+    str->qs_append('@');
+  }
+  str->qs_append(m_cursor);
+}
+
+
+/*
+  sp_instr_cfetch class functions
+*/
+
+PSI_statement_info sp_instr_cfetch::psi_info=
+{ 0, "cfetch", 0};
+
+int
+sp_instr_cfetch::execute(THD *thd, uint *nextp)
+{
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res;
+  Query_arena backup_arena;
+  DBUG_ENTER("sp_instr_cfetch::execute");
+
+  res= c ? c->fetch(thd, &m_varlist, m_error_on_no_data) : -1;
+
+  *nextp= m_ip+1;
+  DBUG_RETURN(res);
+}
+
+
+void
+sp_instr_cfetch::print(String *str)
+{
+  List_iterator_fast li(m_varlist);
+  sp_variable *pv;
+  const LEX_CSTRING *cursor_name= m_ctx->find_cursor(m_cursor);
+
+  /* cfetch name@offset vars... */
+  size_t rsrv= SP_INSTR_UINT_MAXLEN+8;
+
+  if (cursor_name)
+    rsrv+= cursor_name->length;
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("cfetch "));
+  if (cursor_name)
+  {
+    str->qs_append(cursor_name->str, cursor_name->length);
+    str->qs_append('@');
+  }
+  str->qs_append(m_cursor);
+  while ((pv= li++))
+  {
+    if (str->reserve(pv->name.length+SP_INSTR_UINT_MAXLEN+2))
+      return;
+    str->qs_append(' ');
+    str->qs_append(&pv->name);
+    str->qs_append('@');
+    str->qs_append(pv->offset);
+  }
+}
+
+/*
+  sp_instr_agg_cfetch class functions
+*/
+
+PSI_statement_info sp_instr_agg_cfetch::psi_info=
+{ 0, "agg_cfetch", 0};
+
+int
+sp_instr_agg_cfetch::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_agg_cfetch::execute");
+  int res= 0;
+  if (!thd->spcont->instr_ptr)
+  {
+    *nextp= m_ip+1;
+    thd->spcont->instr_ptr= m_ip + 1;
+  }
+  else if (!thd->spcont->pause_state)
+    thd->spcont->pause_state= TRUE;
+  else
+  {
+    thd->spcont->pause_state= FALSE;
+    if (thd->server_status & SERVER_STATUS_LAST_ROW_SENT)
+    {
+      my_message(ER_SP_FETCH_NO_DATA,
+                 ER_THD(thd, ER_SP_FETCH_NO_DATA), MYF(0));
+      res= -1;
+      thd->spcont->quit_func= TRUE;
+    }
+    else
+      *nextp= m_ip + 1;
+  }
+  DBUG_RETURN(res);
+}
+
+void
+sp_instr_agg_cfetch::print(String *str)
+{
+
+  uint rsrv= SP_INSTR_UINT_MAXLEN+11;
+
+  if (str->reserve(rsrv))
+    return;
+  str->qs_append(STRING_WITH_LEN("agg_cfetch"));
+}
+
+/*
+  sp_instr_cursor_copy_struct class functions
+*/
+
+/**
+  This methods processes cursor %ROWTYPE declarations, e.g.:
+    CURSOR cur IS SELECT * FROM t1;
+    rec cur%ROWTYPE;
+  and does the following:
+  - opens the cursor without copying data (materialization).
+  - copies the cursor structure to the associated %ROWTYPE variable.
+*/
+
+PSI_statement_info sp_instr_cursor_copy_struct::psi_info=
+{ 0, "cursor_copy_struct", 0};
+
+int
+sp_instr_cursor_copy_struct::exec_core(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_cursor_copy_struct::exec_core");
+  int ret= 0;
+  Item_field_row *row= (Item_field_row*) thd->spcont->get_variable(m_var);
+  DBUG_ASSERT(row->type_handler() == &type_handler_row);
+
+  /*
+    Copy structure only once. If the cursor%ROWTYPE variable is declared
+    inside a LOOP block, it gets its structure on the first loop interation
+    and remembers the structure for all consequent loop iterations.
+    It we recreated the structure on every iteration, we would get
+    potential memory leaks, and it would be less efficient.
+  */
+  if (!row->arguments())
+  {
+    sp_cursor tmp(thd, &m_lex_keeper, true);
+    // Open the cursor without copying data
+    if (!(ret= tmp.open(thd)))
+    {
+      Row_definition_list defs;
+      /*
+        Create row elements on the caller arena.
+        It's the same arena that was used during sp_rcontext::create().
+        This puts cursor%ROWTYPE elements on the same mem_root
+        where explicit ROW elements and table%ROWTYPE reside:
+        - tmp.export_structure() allocates new Spvar_definition instances
+          and their components (such as TYPELIBs).
+        - row->row_create_items() creates new Item_field instances.
+        They all are created on the same mem_root.
+      */
+      Query_arena current_arena;
+      thd->set_n_backup_active_arena(thd->spcont->callers_arena, ¤t_arena);
+      if (!(ret= tmp.export_structure(thd, &defs)))
+        row->row_create_items(thd, &defs);
+      thd->restore_active_arena(thd->spcont->callers_arena, ¤t_arena);
+      tmp.close(thd);
+    }
+  }
+  *nextp= m_ip + 1;
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_instr_cursor_copy_struct::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_cursor_copy_struct::execute");
+  int ret= m_lex_keeper.cursor_reset_lex_and_exec_core(thd, nextp, FALSE, this);
+  DBUG_RETURN(ret);
+}
+
+
+void
+sp_instr_cursor_copy_struct::print(String *str)
+{
+  sp_variable *var= m_ctx->find_variable(m_var);
+  const LEX_CSTRING *name= m_ctx->find_cursor(m_cursor);
+  str->append(STRING_WITH_LEN("cursor_copy_struct "));
+  str->append(name);
+  str->append(' ');
+  str->append(&var->name);
+  str->append('@');
+  str->append_ulonglong(m_var);
+}
+
+
+/*
+  sp_instr_error class functions
+*/
+
+PSI_statement_info sp_instr_error::psi_info=
+{ 0, "error", 0};
+
+int
+sp_instr_error::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_error::execute");
+  my_message(m_errcode, ER_THD(thd, m_errcode), MYF(0));
+  WSREP_DEBUG("sp_instr_error: %s %d", ER_THD(thd, m_errcode), thd->is_error());
+  *nextp= m_ip+1;
+  DBUG_RETURN(-1);
+}
+
+
+void
+sp_instr_error::print(String *str)
+{
+  /* error code */
+  if (str->reserve(SP_INSTR_UINT_MAXLEN+6))
+    return;
+  str->qs_append(STRING_WITH_LEN("error "));
+  str->qs_append(m_errcode);
+}
+
+
+/**************************************************************************
+  sp_instr_set_case_expr class implementation
+**************************************************************************/
+
+PSI_statement_info sp_instr_set_case_expr::psi_info=
+{ 0, "set_case_expr", 0};
+
+int
+sp_instr_set_case_expr::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_set_case_expr::execute");
+
+  DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
+}
+
+
+int
+sp_instr_set_case_expr::exec_core(THD *thd, uint *nextp)
+{
+  int res= thd->spcont->set_case_expr(thd, m_case_expr_id, &m_case_expr);
+
+  if (res && !thd->spcont->get_case_expr(m_case_expr_id))
+  {
+    /*
+      Failed to evaluate the value, the case expression is still not
+      initialized. Set to NULL so we can continue.
+    */
+
+    Item *null_item= new (thd->mem_root) Item_null(thd);
+
+    if (!null_item ||
+        thd->spcont->set_case_expr(thd, m_case_expr_id, &null_item))
+    {
+      /* If this also failed, we have to abort. */
+      my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL));
+    }
+  }
+  else
+    *nextp= m_ip+1;
+
+  return res;
+}
+
+
+void
+sp_instr_set_case_expr::print(String *str)
+{
+  /* set_case_expr (cont) id ... */
+  str->reserve(2*SP_INSTR_UINT_MAXLEN+18+32); // Add some extra for expr too
+  str->qs_append(STRING_WITH_LEN("set_case_expr ("));
+  str->qs_append(m_cont_dest);
+  str->qs_append(STRING_WITH_LEN(") "));
+  str->qs_append(m_case_expr_id);
+  str->qs_append(' ');
+  m_case_expr->print(str, enum_query_type(QT_ORDINARY |
+                                          QT_ITEM_ORIGINAL_FUNC_NULLIF));
+}
+
+uint
+sp_instr_set_case_expr::opt_mark(sp_head *sp, List *leads)
+{
+  sp_instr *i;
+
+  marked= 1;
+  if ((i= sp->get_instr(m_cont_dest)))
+  {
+    m_cont_dest= i->opt_shortcut_jump(sp, this);
+    m_cont_optdest= sp->get_instr(m_cont_dest);
+  }
+  sp->add_mark_lead(m_cont_dest, leads);
+  return m_ip+1;
+}
+
+void
+sp_instr_set_case_expr::opt_move(uint dst, List *bp)
+{
+  if (m_cont_dest > m_ip)
+    bp->push_back(this);        // Forward
+  else if (m_cont_optdest)
+    m_cont_dest= m_cont_optdest->m_ip; // Backward
+  m_ip= dst;
+}
+
+
+/* ------------------------------------------------------------------ */
+
+
+/*
+  Structure that represent all instances of one table
+  in optimized multi-set of tables used by routine.
+*/
+
+typedef struct st_sp_table
+{
+  /*
+    Multi-set key:
+      db_name\0table_name\0alias\0 - for normal tables
+      db_name\0table_name\0        - for temporary tables
+  */
+  LEX_STRING qname;
+  size_t db_length, table_name_length;
+  bool temp;               /* true if corresponds to a temporary table */
+  thr_lock_type lock_type; /* lock type used for prelocking */
+  uint lock_count;
+  uint query_lock_count;
+  uint8 trg_event_map;
+  my_bool for_insert_data;
+} SP_TABLE;
+
+
+uchar *sp_table_key(const uchar *ptr, size_t *plen, my_bool first)
+{
+  SP_TABLE *tab= (SP_TABLE *)ptr;
+  *plen= tab->qname.length;
+  return (uchar *)tab->qname.str;
+}
+
+
+/**
+  Merge the list of tables used by some query into the multi-set of
+  tables used by routine.
+
+  @param thd                 thread context
+  @param table               table list
+  @param lex_for_tmp_check   LEX of the query for which we are merging
+                             table list.
+
+  @note
+    This method will use LEX provided to check whenever we are creating
+    temporary table and mark it as such in target multi-set.
+
+  @retval
+    TRUE    Success
+  @retval
+    FALSE   Error
+*/
+
+bool
+sp_head::merge_table_list(THD *thd, TABLE_LIST *table, LEX *lex_for_tmp_check)
+{
+  SP_TABLE *tab;
+
+  if ((lex_for_tmp_check->sql_command == SQLCOM_DROP_TABLE ||
+      lex_for_tmp_check->sql_command == SQLCOM_DROP_SEQUENCE) &&
+      lex_for_tmp_check->tmp_table())
+    return TRUE;
+
+  for (uint i= 0 ; i < m_sptabs.records ; i++)
+  {
+    tab= (SP_TABLE*) my_hash_element(&m_sptabs, i);
+    tab->query_lock_count= 0;
+  }
+
+  for (; table ; table= table->next_global)
+    if (!table->derived && !table->schema_table && !table->table_function)
+    {
+      /*
+        Structure of key for the multi-set is "db\0table\0alias\0".
+        Since "alias" part can have arbitrary length we use String
+        object to construct the key. By default String will use
+        buffer allocated on stack with NAME_LEN bytes reserved for
+        alias, since in most cases it is going to be smaller than
+        NAME_LEN bytes.
+      */
+      char tname_buff[(SAFE_NAME_LEN + 1) * 3];
+      String tname(tname_buff, sizeof(tname_buff), &my_charset_bin);
+      uint temp_table_key_length;
+
+      tname.length(0);
+      tname.append(&table->db);
+      tname.append('\0');
+      tname.append(&table->table_name);
+      tname.append('\0');
+      temp_table_key_length= tname.length();
+      tname.append(&table->alias);
+      tname.append('\0');
+
+      /*
+        Upgrade the lock type because this table list will be used
+        only in pre-locked mode, in which DELAYED inserts are always
+        converted to normal inserts.
+      */
+      if (table->lock_type == TL_WRITE_DELAYED)
+        table->lock_type= TL_WRITE;
+
+      /*
+        We ignore alias when we check if table was already marked as temporary
+        (and therefore should not be prelocked). Otherwise we will erroneously
+        treat table with same name but with different alias as non-temporary.
+      */
+      if ((tab= (SP_TABLE*) my_hash_search(&m_sptabs, (uchar *)tname.ptr(),
+                                           tname.length())) ||
+          ((tab= (SP_TABLE*) my_hash_search(&m_sptabs, (uchar *)tname.ptr(),
+                                            temp_table_key_length)) &&
+           tab->temp))
+      {
+        if (tab->lock_type < table->lock_type)
+          tab->lock_type= table->lock_type; // Use the table with the highest lock type
+        tab->query_lock_count++;
+        if (tab->query_lock_count > tab->lock_count)
+          tab->lock_count++;
+        tab->trg_event_map|= table->trg_event_map;
+        tab->for_insert_data|= table->for_insert_data;
+      }
+      else
+      {
+        if (!(tab= (SP_TABLE *)thd->calloc(sizeof(SP_TABLE))))
+          return FALSE;
+        if ((lex_for_tmp_check->sql_command == SQLCOM_CREATE_TABLE ||
+             lex_for_tmp_check->sql_command == SQLCOM_CREATE_SEQUENCE) &&
+            lex_for_tmp_check->query_tables == table &&
+            lex_for_tmp_check->tmp_table())
+        {
+          tab->temp= TRUE;
+          tab->qname.length= temp_table_key_length;
+        }
+        else
+          tab->qname.length= tname.length();
+        tab->qname.str= (char*) thd->memdup(tname.ptr(), tab->qname.length);
+        if (!tab->qname.str)
+          return FALSE;
+        tab->table_name_length= table->table_name.length;
+        tab->db_length= table->db.length;
+        tab->lock_type= table->lock_type;
+        tab->lock_count= tab->query_lock_count= 1;
+        tab->trg_event_map= table->trg_event_map;
+        tab->for_insert_data= table->for_insert_data;
+        if (my_hash_insert(&m_sptabs, (uchar *)tab))
+          return FALSE;
+      }
+    }
+  return TRUE;
+}
+
+
+/**
+  Add tables used by routine to the table list.
+
+    Converts multi-set of tables used by this routine to table list and adds
+    this list to the end of table list specified by 'query_tables_last_ptr'.
+
+    Elements of list will be allocated in PS memroot, so this list will be
+    persistent between PS executions.
+
+  @param[in] thd                        Thread context
+  @param[in,out] query_tables_last_ptr  Pointer to the next_global member of
+    last element of the list where tables
+    will be added (or to its root).
+  @param[in] belong_to_view             Uppermost view which uses this routine,
+    0 if none.
+
+  @retval
+    TRUE    if some elements were added
+  @retval
+    FALSE   otherwise.
+*/
+
+bool
+sp_head::add_used_tables_to_table_list(THD *thd,
+                                       TABLE_LIST ***query_tables_last_ptr,
+                                       TABLE_LIST *belong_to_view)
+{
+  uint i;
+  Query_arena *arena, backup;
+  bool result= FALSE;
+  DBUG_ENTER("sp_head::add_used_tables_to_table_list");
+
+  /*
+    Use persistent arena for table list allocation to be PS/SP friendly.
+    Note that we also have to copy database/table names and alias to PS/SP
+    memory since current instance of sp_head object can pass away before
+    next execution of PS/SP for which tables are added to prelocking list.
+    This will be fixed by introducing of proper invalidation mechanism
+    once new TDC is ready.
+  */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+
+  for (i=0 ; i < m_sptabs.records ; i++)
+  {
+    char *tab_buff, *key_buff;
+    SP_TABLE *stab= (SP_TABLE*) my_hash_element(&m_sptabs, i);
+    LEX_CSTRING db_name;
+    if (stab->temp)
+      continue;
+
+    if (!(tab_buff= (char *)thd->alloc(ALIGN_SIZE(sizeof(TABLE_LIST)) *
+                                        stab->lock_count)) ||
+        !(key_buff= (char*)thd->memdup(stab->qname.str,
+                                       stab->qname.length)))
+      DBUG_RETURN(FALSE);
+
+    db_name.str=    key_buff;
+    db_name.length= stab->db_length;
+
+
+    for (uint j= 0; j < stab->lock_count; j++)
+    {
+      TABLE_LIST *table= (TABLE_LIST *)tab_buff;
+      LEX_CSTRING table_name= { key_buff + stab->db_length + 1,
+                                stab->table_name_length };
+      LEX_CSTRING alias= { table_name.str + table_name.length + 1,
+                           strlen(table_name.str + table_name.length + 1) };
+
+      table->init_one_table_for_prelocking(&db_name,
+                                           &table_name,
+                                           &alias,
+                                           stab->lock_type,
+                                           TABLE_LIST::PRELOCK_ROUTINE,
+                                           belong_to_view,
+                                           stab->trg_event_map,
+                                           query_tables_last_ptr,
+                                           stab->for_insert_data);
+      tab_buff+= ALIGN_SIZE(sizeof(TABLE_LIST));
+      result= TRUE;
+    }
+  }
+
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  DBUG_RETURN(result);
+}
+
+
+/**
+  Simple function for adding an explicitly named (systems) table to
+  the global table list, e.g. "mysql", "proc".
+*/
+
+TABLE_LIST *
+sp_add_to_query_tables(THD *thd, LEX *lex,
+		       const LEX_CSTRING *db, const LEX_CSTRING *name,
+                       thr_lock_type locktype,
+                       enum_mdl_type mdl_type)
+{
+  TABLE_LIST *table;
+
+  if (!(table= (TABLE_LIST *)thd->calloc(sizeof(TABLE_LIST))))
+    return NULL;
+  if (!thd->make_lex_string(&table->db, db->str, db->length) ||
+      !thd->make_lex_string(&table->table_name, name->str, name->length) ||
+      !thd->make_lex_string(&table->alias, name->str, name->length))
+    return NULL;
+
+  table->lock_type= locktype;
+  table->select_lex= lex->current_select;
+  table->cacheable_table= 1;
+  MDL_REQUEST_INIT(&table->mdl_request, MDL_key::TABLE, table->db.str,
+                   table->table_name.str, mdl_type, MDL_TRANSACTION);
+
+  lex->add_to_query_tables(table);
+  return table;
+}
+
+
+Item *sp_head::adjust_assignment_source(THD *thd, Item *val, Item *val2)
+{
+  return val ? val : val2 ? val2 : new (thd->mem_root) Item_null(thd);
+}
+
+/**
+  Helper action for a SET statement.
+  Used to push a SP local variable into the assignment list.
+
+  @param var_type the SP local variable
+  @param val      the value being assigned to the variable
+
+  @return TRUE if error, FALSE otherwise.
+*/
+
+bool
+sp_head::set_local_variable(THD *thd, sp_pcontext *spcont,
+                            const Sp_rcontext_handler *rh,
+                            sp_variable *spv, Item *val, LEX *lex,
+                            bool responsible_to_free_lex)
+{
+  if (!(val= adjust_assignment_source(thd, val, spv->default_value)))
+    return true;
+
+  if (val->walk(&Item::unknown_splocal_processor, false, NULL))
+    return true;
+
+  sp_instr_set *sp_set= new (thd->mem_root)
+                        sp_instr_set(instructions(), spcont, rh,
+                                     spv->offset, val, lex,
+                                     responsible_to_free_lex);
+
+  return sp_set == NULL || add_instr(sp_set);
+}
+
+
+/**
+  Similar to set_local_variable(), but for ROW variable fields.
+*/
+
+bool
+sp_head::set_local_variable_row_field(THD *thd, sp_pcontext *spcont,
+                                      const Sp_rcontext_handler *rh,
+                                      sp_variable *spv, uint field_idx,
+                                      Item *val, LEX *lex)
+{
+  if (!(val= adjust_assignment_source(thd, val, NULL)))
+    return true;
+
+  sp_instr_set_row_field *sp_set= new (thd->mem_root)
+                                  sp_instr_set_row_field(instructions(),
+                                                         spcont, rh,
+                                                         spv->offset,
+                                                         field_idx, val,
+                                                         lex, true);
+  return sp_set == NULL || add_instr(sp_set);
+}
+
+
+bool
+sp_head::set_local_variable_row_field_by_name(THD *thd, sp_pcontext *spcont,
+                                              const Sp_rcontext_handler *rh,
+                                              sp_variable *spv,
+                                              const LEX_CSTRING *field_name,
+                                              Item *val, LEX *lex)
+{
+  if (!(val= adjust_assignment_source(thd, val, NULL)))
+    return true;
+
+  sp_instr_set_row_field_by_name *sp_set=
+    new (thd->mem_root) sp_instr_set_row_field_by_name(instructions(),
+                                                       spcont, rh,
+                                                       spv->offset,
+                                                       *field_name,
+                                                       val,
+                                                       lex, true);
+  return sp_set == NULL || add_instr(sp_set);
+}
+
+
+bool sp_head::add_open_cursor(THD *thd, sp_pcontext *spcont, uint offset,
+                              sp_pcontext *param_spcont,
+                              List *parameters)
+{
+  /*
+    The caller must make sure that the number of formal parameters matches
+    the number of actual parameters.
+  */
+  DBUG_ASSERT((param_spcont ? param_spcont->context_var_count() :  0) ==
+              (parameters ? parameters->elements : 0));
+
+  if (parameters &&
+      add_set_cursor_param_variables(thd, param_spcont, parameters))
+    return true;
+
+  sp_instr_copen *i= new (thd->mem_root)
+                     sp_instr_copen(instructions(), spcont, offset);
+  return i == NULL || add_instr(i);
+}
+
+
+bool sp_head::add_for_loop_open_cursor(THD *thd, sp_pcontext *spcont,
+                                       sp_variable *index,
+                                       const sp_pcursor *pcursor, uint coffset,
+                                       sp_assignment_lex *param_lex,
+                                       Item_args *parameters)
+{
+  if (parameters &&
+      add_set_for_loop_cursor_param_variables(thd, pcursor->param_context(),
+                                              param_lex, parameters))
+    return true;
+
+  sp_instr *instr_copy_struct=
+    new (thd->mem_root) sp_instr_cursor_copy_struct(instructions(),
+                                                    spcont, coffset,
+                                                    pcursor->lex(),
+                                                    index->offset);
+  if (instr_copy_struct == NULL || add_instr(instr_copy_struct))
+    return true;
+
+  sp_instr_copen *instr_copen=
+    new (thd->mem_root) sp_instr_copen(instructions(), spcont, coffset);
+  if (instr_copen == NULL || add_instr(instr_copen))
+    return true;
+
+  sp_instr_cfetch *instr_cfetch=
+    new (thd->mem_root) sp_instr_cfetch(instructions(),
+                                        spcont, coffset, false);
+  if (instr_cfetch == NULL || add_instr(instr_cfetch))
+    return true;
+  instr_cfetch->add_to_varlist(index);
+  return false;
+}
+
+
+bool
+sp_head::add_set_for_loop_cursor_param_variables(THD *thd,
+                                                 sp_pcontext *param_spcont,
+                                                 sp_assignment_lex *param_lex,
+                                                 Item_args *parameters)
+{
+  DBUG_ASSERT(param_spcont->context_var_count() == parameters->argument_count());
+  for (uint idx= 0; idx < parameters->argument_count(); idx ++)
+  {
+    /*
+      param_lex is shared between multiple items (cursor parameters).
+      Only the last sp_instr_set is responsible for freeing param_lex.
+      See more comments in LEX::sp_for_loop_cursor_declarations in sql_lex.cc.
+    */
+    bool last= idx + 1 == parameters->argument_count();
+    sp_variable *spvar= param_spcont->get_context_variable(idx);
+    if (set_local_variable(thd, param_spcont,
+                           &sp_rcontext_handler_local,
+                           spvar, parameters->arguments()[idx],
+                           param_lex, last))
+      return true;
+  }
+  return false;
+}
+
+
+bool sp_head::spvar_fill_row(THD *thd,
+                             sp_variable *spvar,
+                             Row_definition_list *defs)
+{
+  spvar->field_def.set_row_field_definitions(defs);
+  spvar->field_def.field_name= spvar->name;
+  if (fill_spvar_definition(thd, &spvar->field_def))
+    return true;
+  row_fill_field_definitions(thd, defs);
+  return false;
+}
+
+
+bool sp_head::spvar_fill_type_reference(THD *thd,
+                                        sp_variable *spvar,
+                                        const LEX_CSTRING &table,
+                                        const LEX_CSTRING &col)
+{
+  Qualified_column_ident *ref;
+  if (!(ref= new (thd->mem_root) Qualified_column_ident(&table, &col)))
+    return true;
+  fill_spvar_using_type_reference(spvar, ref);
+  return false;
+}
+
+
+bool sp_head::spvar_fill_type_reference(THD *thd,
+                                        sp_variable *spvar,
+                                        const LEX_CSTRING &db,
+                                        const LEX_CSTRING &table,
+                                        const LEX_CSTRING &col)
+{
+  Qualified_column_ident *ref;
+  if (!(ref= new (thd->mem_root) Qualified_column_ident(thd, &db, &table, &col)))
+    return true;
+  fill_spvar_using_type_reference(spvar, ref);
+  return false;
+}
+
+
+bool sp_head::spvar_fill_table_rowtype_reference(THD *thd,
+                                                 sp_variable *spvar,
+                                                 const LEX_CSTRING &table)
+{
+  Table_ident *ref;
+  if (!(ref= new (thd->mem_root) Table_ident(&table)))
+    return true;
+  fill_spvar_using_table_rowtype_reference(thd, spvar, ref);
+  return false;
+}
+
+
+bool sp_head::spvar_fill_table_rowtype_reference(THD *thd,
+                                                 sp_variable *spvar,
+                                                 const LEX_CSTRING &db,
+                                                 const LEX_CSTRING &table)
+{
+  Table_ident *ref;
+  if (!(ref= new (thd->mem_root) Table_ident(thd, &db, &table, false)))
+    return true;
+  fill_spvar_using_table_rowtype_reference(thd, spvar, ref);
+  return false;
+}
+
+
+bool sp_head::check_group_aggregate_instructions_forbid() const
+{
+  if (unlikely(m_flags & sp_head::HAS_AGGREGATE_INSTR))
+  {
+    my_error(ER_NOT_AGGREGATE_FUNCTION, MYF(0));
+    return true;
+  }
+  return false;
+}
+
+
+bool sp_head::check_group_aggregate_instructions_require() const
+{
+  if (unlikely(!(m_flags & HAS_AGGREGATE_INSTR)))
+  {
+    my_error(ER_INVALID_AGGREGATE_FUNCTION, MYF(0));
+    return true;
+  }
+  return false;
+}
+
+
+bool sp_head::check_group_aggregate_instructions_function() const
+{
+  return agg_type() == GROUP_AGGREGATE ?
+         check_group_aggregate_instructions_require() :
+         check_group_aggregate_instructions_forbid();
+}
+
+
+/*
+  In Oracle mode stored routines have an optional name
+  at the end of a declaration:
+    PROCEDURE p1 AS
+    BEGIN
+      NULL
+    END p1;
+  Check that the first p1 and the last p1 match.
+*/
+
+bool sp_head::check_package_routine_end_name(const LEX_CSTRING &end_name) const
+{
+  LEX_CSTRING non_qualified_name= m_name;
+  const char *errpos;
+  size_t ofs;
+  if (!end_name.length)
+    return false; // No end name
+  if (!(errpos= strrchr(m_name.str, '.')))
+  {
+    errpos= m_name.str;
+    goto err;
+  }
+  errpos++;
+  ofs= errpos - m_name.str;
+  non_qualified_name.str+= ofs;
+  non_qualified_name.length-= ofs;
+  if (Sp_handler::eq_routine_name(end_name, non_qualified_name))
+    return false;
+err:
+  my_error(ER_END_IDENTIFIER_DOES_NOT_MATCH, MYF(0), end_name.str, errpos);
+  return true;
+}
+
+
+bool
+sp_head::check_standalone_routine_end_name(const sp_name *end_name) const
+{
+  if (end_name && !end_name->eq(this))
+  {
+    my_error(ER_END_IDENTIFIER_DOES_NOT_MATCH, MYF(0),
+             ErrConvDQName(end_name).ptr(), ErrConvDQName(this).ptr());
+    return true;
+  }
+  return false;
+}
+
+
+ulong sp_head::sp_cache_version() const
+{
+  return m_parent ? m_parent->sp_cache_version() : m_sp_cache_version;
+}
diff --git a/sql/sp_head.h b/sql/sp_head.h
new file mode 100644
index 00000000..da4e5763
--- /dev/null
+++ b/sql/sp_head.h
@@ -0,0 +1,2188 @@
+/* -*- C++ -*- */
+/*
+   Copyright (c) 2002, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2020, 2022, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef _SP_HEAD_H_
+#define _SP_HEAD_H_
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+/*
+  It is necessary to include set_var.h instead of item.h because there
+  are dependencies on include order for set_var.h and item.h. This
+  will be resolved later.
+*/
+#include "sql_class.h"                          // THD, set_var.h: THD
+#include "set_var.h"                            // Item
+#include "sp_pcontext.h"                        // sp_pcontext
+#include 
+#include "sp.h"
+
+/**
+  @defgroup Stored_Routines Stored Routines
+  @ingroup Runtime_Environment
+  @{
+*/
+
+uint
+sp_get_flags_for_command(LEX *lex);
+
+class sp_instr;
+class sp_instr_opt_meta;
+class sp_instr_jump_if_not;
+
+/**
+  Number of PSI_statement_info instruments
+  for internal stored programs statements.
+*/
+#ifdef HAVE_PSI_INTERFACE
+void init_sp_psi_keys(void);
+#endif
+
+/*************************************************************************/
+
+/**
+  Stored_program_creation_ctx -- base class for creation context of stored
+  programs (stored routines, triggers, events).
+*/
+
+class Stored_program_creation_ctx :public Default_object_creation_ctx
+{
+public:
+  CHARSET_INFO *get_db_cl()
+  {
+    return m_db_cl;
+  }
+
+public:
+  virtual Stored_program_creation_ctx *clone(MEM_ROOT *mem_root) = 0;
+
+protected:
+  Stored_program_creation_ctx(THD *thd)
+    : Default_object_creation_ctx(thd),
+      m_db_cl(thd->variables.collation_database)
+  { }
+
+  Stored_program_creation_ctx(CHARSET_INFO *client_cs,
+                              CHARSET_INFO *connection_cl,
+                              CHARSET_INFO *db_cl)
+    : Default_object_creation_ctx(client_cs, connection_cl),
+      m_db_cl(db_cl)
+  { }
+
+protected:
+  virtual void change_env(THD *thd) const
+  {
+    thd->variables.collation_database= m_db_cl;
+
+    Default_object_creation_ctx::change_env(thd);
+  }
+
+protected:
+  /**
+    db_cl stores the value of the database collation. Both character set
+    and collation attributes are used.
+
+    Database collation is included into the context because it defines the
+    default collation for stored-program variables.
+  */
+  CHARSET_INFO *m_db_cl;
+};
+
+/*************************************************************************/
+
+class sp_name : public Sql_alloc,
+                public Database_qualified_name
+{
+public:
+  bool       m_explicit_name;                   /**< Prepend the db name? */
+
+  sp_name(const LEX_CSTRING *db, const LEX_CSTRING *name,
+          bool use_explicit_name)
+    : Database_qualified_name(db, name), m_explicit_name(use_explicit_name)
+  {
+    if (lower_case_table_names && m_db.length)
+      m_db.length= my_casedn_str(files_charset_info, (char*) m_db.str);
+  }
+
+  /** Create temporary sp_name object from MDL key. Store in qname_buff */
+  sp_name(const MDL_key *key, char *qname_buff);
+
+  ~sp_name() = default;
+};
+
+
+bool
+check_routine_name(const LEX_CSTRING *ident);
+
+class sp_head :private Query_arena,
+               public Database_qualified_name,
+               public Sql_alloc
+{
+  sp_head(const sp_head &)= delete;
+  void operator=(sp_head &)= delete;
+
+protected:
+  MEM_ROOT main_mem_root;
+#ifdef PROTECT_STATEMENT_MEMROOT
+  /*
+    The following data member is wholly for debugging purpose.
+    It can be used for possible crash analysis to determine how many times
+    the stored routine was executed before the mem_root marked read_only
+    was requested for a memory chunk. Additionally, a value of this data
+    member is output to the log with DBUG_PRINT.
+  */
+  ulong executed_counter;
+#endif
+public:
+  /** Possible values of m_flags */
+  enum {
+    HAS_RETURN= 1,              // For FUNCTIONs only: is set if has RETURN
+    MULTI_RESULTS= 8,           // Is set if a procedure with SELECT(s)
+    CONTAINS_DYNAMIC_SQL= 16,   // Is set if a procedure with PREPARE/EXECUTE
+    IS_INVOKED= 32,             // Is set if this sp_head is being used
+    HAS_SET_AUTOCOMMIT_STMT= 64,// Is set if a procedure with 'set autocommit'
+    /* Is set if a procedure with COMMIT (implicit or explicit) | ROLLBACK */
+    HAS_COMMIT_OR_ROLLBACK= 128,
+    LOG_SLOW_STATEMENTS= 256,   // Used by events
+    LOG_GENERAL_LOG= 512,        // Used by events
+    HAS_SQLCOM_RESET= 1024,
+    HAS_SQLCOM_FLUSH= 2048,
+
+    /**
+      Marks routines that directly (i.e. not by calling other routines)
+      change tables. Note that this flag is set automatically based on
+      type of statements used in the stored routine and is different
+      from routine characteristic provided by user in a form of CONTAINS
+      SQL, READS SQL DATA, MODIFIES SQL DATA clauses. The latter are
+      accepted by parser but pretty much ignored after that.
+      We don't rely on them:
+      a) for compatibility reasons.
+      b) because in CONTAINS SQL case they don't provide enough
+      information anyway.
+     */
+    MODIFIES_DATA= 4096,
+    /*
+      Marks routines that have column type references: DECLARE a t1.a%TYPE;
+    */
+    HAS_COLUMN_TYPE_REFS= 8192,
+    /* Set if has FETCH GROUP NEXT ROW instr. Used to ensure that only
+       functions with AGGREGATE keyword use the instr. */
+    HAS_AGGREGATE_INSTR= 16384
+  };
+
+  sp_package *m_parent;
+  const Sp_handler *m_handler;
+  uint m_flags;                 // Boolean attributes of a stored routine
+
+  /**
+    Instrumentation interface for SP.
+  */
+  PSI_sp_share *m_sp_share;
+
+  Column_definition m_return_field_def; /**< This is used for FUNCTIONs only. */
+
+  const char *m_tmp_query;	///< Temporary pointer to sub query string
+private:
+  /*
+    Private to guarantee that m_chistics.comment is properly set to:
+    - a string which is alloced on this->mem_root
+    - or (NULL,0)
+    set_chistics() makes sure this.
+  */
+  Sp_chistics m_chistics;
+  void set_chistics(const st_sp_chistics &chistics);
+  inline void set_chistics_agg_type(enum enum_sp_aggregate_type type)
+  {
+    m_chistics.agg_type= type;
+  }
+public:
+  sql_mode_t m_sql_mode;		///< For SHOW CREATE and execution
+  bool       m_explicit_name;                   /**< Prepend the db name? */
+  LEX_CSTRING m_qname;		///< db.name
+  LEX_CSTRING m_params;
+  LEX_CSTRING m_body;
+  LEX_CSTRING m_body_utf8;
+  LEX_CSTRING m_defstr;
+  AUTHID      m_definer;
+
+  const st_sp_chistics &chistics() const { return m_chistics; }
+  const LEX_CSTRING &comment() const { return m_chistics.comment; }
+  void set_suid(enum_sp_suid_behaviour suid) { m_chistics.suid= suid; }
+  enum_sp_suid_behaviour suid() const { return m_chistics.suid; }
+  bool detistic() const { return m_chistics.detistic; }
+  enum_sp_data_access daccess() const { return m_chistics.daccess; }
+  enum_sp_aggregate_type agg_type() const { return m_chistics.agg_type; }
+  /**
+    Is this routine being executed?
+  */
+  virtual bool is_invoked() const { return m_flags & IS_INVOKED; }
+
+  /**
+    Get the value of the SP cache version, as remembered
+    when the routine was inserted into the cache.
+  */
+  ulong sp_cache_version() const;
+
+  /** Set the value of the SP cache version.  */
+  void set_sp_cache_version(ulong version_arg) const
+  {
+    m_sp_cache_version= version_arg;
+  }
+
+  sp_rcontext *rcontext_create(THD *thd, Field *retval, List *args);
+  sp_rcontext *rcontext_create(THD *thd, Field *retval,
+                               Item **args, uint arg_count);
+  sp_rcontext *rcontext_create(THD *thd, Field *retval,
+                               Row_definition_list *list,
+                               bool switch_security_ctx);
+  bool eq_routine_spec(const sp_head *) const;
+private:
+  /**
+    Version of the stored routine cache at the moment when the
+    routine was added to it. Is used only for functions and
+    procedures, not used for triggers or events.  When sp_head is
+    created, its version is 0. When it's added to the cache, the
+    version is assigned the global value 'Cversion'.
+    If later on Cversion is incremented, we know that the routine
+    is obsolete and should not be used --
+    sp_cache_flush_obsolete() will purge it.
+  */
+  mutable ulong m_sp_cache_version;
+  Stored_program_creation_ctx *m_creation_ctx;
+  /**
+    Boolean combination of (1<clone(mem_root);
+  }
+
+  longlong m_created;
+  longlong m_modified;
+  /** Recursion level of the current SP instance. The levels are numbered from 0 */
+  ulong m_recursion_level;
+  /**
+    A list of diferent recursion level instances for the same procedure.
+    For every recursion level we have a sp_head instance. This instances
+    connected in the list. The list ordered by increasing recursion level
+    (m_recursion_level).
+  */
+  sp_head *m_next_cached_sp;
+  /**
+    Pointer to the first element of the above list
+  */
+  sp_head *m_first_instance;
+  /**
+    Pointer to the first free (non-INVOKED) routine in the list of
+    cached instances for this SP. This pointer is set only for the first
+    SP in the list of instences (see above m_first_cached_sp pointer).
+    The pointer equal to 0 if we have no free instances.
+    For non-first instance value of this pointer meanless (point to itself);
+  */
+  sp_head *m_first_free_instance;
+  /**
+    Pointer to the last element in the list of instances of the SP.
+    For non-first instance value of this pointer meanless (point to itself);
+  */
+  sp_head *m_last_cached_sp;
+  /**
+    Set containing names of stored routines used by this routine.
+    Note that unlike elements of similar set for statement elements of this
+    set are not linked in one list. Because of this we are able save memory
+    by using for this set same objects that are used in 'sroutines' sets
+    for statements of which this stored routine consists.
+  */
+  HASH m_sroutines;
+  // Pointers set during parsing
+  const char *m_param_begin;
+  const char *m_param_end;
+
+private:
+  /*
+    A pointer to the body start inside the cpp buffer.
+    Used only during parsing. Should be removed eventually.
+    The affected functions/methods should be fixed to get the cpp body start
+    as a parameter, rather than through this member.
+  */
+  const char *m_cpp_body_begin;
+
+public:
+  /*
+    Security context for stored routine which should be run under
+    definer privileges.
+  */
+  Security_context m_security_ctx;
+
+  /**
+    List of all items (Item_trigger_field objects) representing fields in
+    old/new version of row in trigger. We use this list for checking whenever
+    all such fields are valid at trigger creation time and for binding these
+    fields to TABLE object at table open (although for latter pointer to table
+    being opened is probably enough).
+  */
+  SQL_I_List m_trg_table_fields;
+
+protected:
+  sp_head(MEM_ROOT *mem_root, sp_package *parent, const Sp_handler *handler,
+          enum_sp_aggregate_type agg_type);
+  virtual ~sp_head();
+public:
+  static void destroy(sp_head *sp);
+  static sp_head *create(sp_package *parent, const Sp_handler *handler,
+                         enum_sp_aggregate_type agg_type);
+
+  /// Initialize after we have reset mem_root
+  void
+  init(LEX *lex);
+
+  /** Copy sp name from parser. */
+  void
+  init_sp_name(const sp_name *spname);
+
+  /** Set the body-definition start position. */
+  void
+  set_body_start(THD *thd, const char *cpp_body_start);
+
+  /** Set the statement-definition (body-definition) end position. */
+  void
+  set_stmt_end(THD *thd, const char *cpp_body_end);
+
+  bool
+  execute_trigger(THD *thd,
+                  const LEX_CSTRING *db_name,
+                  const LEX_CSTRING *table_name,
+                  GRANT_INFO *grant_info);
+
+  bool
+  execute_function(THD *thd, Item **args, uint argcount, Field *return_fld,
+                   sp_rcontext **nctx, Query_arena *call_arena);
+
+  bool
+  execute_procedure(THD *thd, List *args);
+
+  static void
+  show_create_routine_get_fields(THD *thd, const Sp_handler *sph,
+                                 List *fields);
+
+  bool
+  show_create_routine(THD *thd, const Sp_handler *sph);
+
+  MEM_ROOT *get_main_mem_root() { return &main_mem_root; }
+
+  int
+  add_instr(sp_instr *instr);
+
+  bool
+  add_instr_jump(THD *thd, sp_pcontext *spcont);
+
+  bool
+  add_instr_jump(THD *thd, sp_pcontext *spcont, uint dest);
+
+  bool
+  add_instr_jump_forward_with_backpatch(THD *thd, sp_pcontext *spcont,
+                                        sp_label *lab);
+  bool
+  add_instr_jump_forward_with_backpatch(THD *thd, sp_pcontext *spcont)
+  {
+    return add_instr_jump_forward_with_backpatch(thd, spcont,
+                                                 spcont->last_label());
+  }
+
+  bool
+  add_instr_freturn(THD *thd, sp_pcontext *spcont, Item *item, LEX *lex);
+
+  bool
+  add_instr_preturn(THD *thd, sp_pcontext *spcont);
+
+  Item *adjust_assignment_source(THD *thd, Item *val, Item *val2);
+  /**
+    @param thd                     - the current thd
+    @param spcont                  - the current parse context
+    @param spv                     - the SP variable
+    @param val                     - the value to be assigned to the variable
+    @param lex                     - the LEX that was used to create "val"
+    @param responsible_to_free_lex - if the generated sp_instr_set should
+                                     free "lex".
+    @retval true                   - on error
+    @retval false                  - on success
+  */
+  bool set_local_variable(THD *thd, sp_pcontext *spcont,
+                          const Sp_rcontext_handler *rh,
+                          sp_variable *spv, Item *val, LEX *lex,
+                          bool responsible_to_free_lex);
+  bool set_local_variable_row_field(THD *thd, sp_pcontext *spcont,
+                                    const Sp_rcontext_handler *rh,
+                                    sp_variable *spv, uint field_idx,
+                                    Item *val, LEX *lex);
+  bool set_local_variable_row_field_by_name(THD *thd, sp_pcontext *spcont,
+                                            const Sp_rcontext_handler *rh,
+                                            sp_variable *spv,
+                                            const LEX_CSTRING *field_name,
+                                            Item *val, LEX *lex);
+  bool check_package_routine_end_name(const LEX_CSTRING &end_name) const;
+  bool check_standalone_routine_end_name(const sp_name *end_name) const;
+  bool check_group_aggregate_instructions_function() const;
+  bool check_group_aggregate_instructions_forbid() const;
+  bool check_group_aggregate_instructions_require() const;
+private:
+  /**
+    Generate a code to set a single cursor parameter variable.
+    @param thd          - current thd, for mem_root allocations.
+    @param param_spcont - the context of the parameter block
+    @param idx          - the index of the parameter
+    @param prm          - the actual parameter (contains information about
+                          the assignment source expression Item,
+                          its free list, and its LEX)
+  */
+  bool add_set_cursor_param_variable(THD *thd,
+                                     sp_pcontext *param_spcont, uint idx,
+                                     sp_assignment_lex *prm)
+  {
+    DBUG_ASSERT(idx < param_spcont->context_var_count());
+    sp_variable *spvar= param_spcont->get_context_variable(idx);
+    /*
+      add_instr() gets free_list from m_thd->free_list.
+      Initialize it before the set_local_variable() call.
+    */
+    DBUG_ASSERT(m_thd->free_list == NULL);
+    m_thd->free_list= prm->get_free_list();
+    if (set_local_variable(thd, param_spcont,
+                           &sp_rcontext_handler_local,
+                           spvar, prm->get_item(), prm, true))
+      return true;
+    /*
+      Safety:
+      The item and its free_list are now fully owned by the sp_instr_set
+      instance, created by set_local_variable(). The sp_instr_set instance
+      is now responsible for freeing the item and the free_list.
+      Reset the "item" and the "free_list" members of "prm",
+      to avoid double pointers to the same objects from "prm" and
+      from the sp_instr_set instance.
+    */
+    prm->set_item_and_free_list(NULL, NULL);
+    return false;
+  }
+
+  /**
+    Generate a code to set all cursor parameter variables.
+    This method is called only when parameters exists,
+    and the number of formal parameters matches the number of actual
+    parameters. See also comments to add_open_cursor().
+  */
+  bool add_set_cursor_param_variables(THD *thd, sp_pcontext *param_spcont,
+                                      List *parameters)
+  {
+    DBUG_ASSERT(param_spcont->context_var_count() == parameters->elements);
+    sp_assignment_lex *prm;
+    List_iterator li(*parameters);
+    for (uint idx= 0; (prm= li++); idx++)
+    {
+      if (add_set_cursor_param_variable(thd, param_spcont, idx, prm))
+        return true;
+    }
+    return false;
+  }
+
+  /**
+    Generate a code to set all cursor parameter variables for a FOR LOOP, e.g.:
+      FOR index IN cursor(1,2,3)
+    @param
+  */
+  bool add_set_for_loop_cursor_param_variables(THD *thd,
+                                               sp_pcontext *param_spcont,
+                                               sp_assignment_lex *param_lex,
+                                               Item_args *parameters);
+
+  bool bind_input_param(THD *thd,
+                        Item *arg_item,
+                        uint arg_no,
+                        sp_rcontext *nctx,
+                        bool is_function);
+
+  bool bind_output_param(THD *thd,
+                         Item *arg_item,
+                         uint arg_no,
+                         sp_rcontext *octx,
+                         sp_rcontext *nctx);
+
+public:
+  /**
+    Generate a code for an "OPEN cursor" statement.
+    @param thd          - current thd, for mem_root allocations
+    @param spcont       - the context of the cursor
+    @param offset       - the offset of the cursor
+    @param param_spcont - the context of the cursor parameter block
+    @param parameters   - the list of the OPEN actual parameters
+
+    The caller must make sure that the number of local variables
+    in "param_spcont" (formal parameters) matches the number of list elements
+    in "parameters" (actual parameters).
+    NULL in either of them means 0 parameters.
+  */
+  bool add_open_cursor(THD *thd, sp_pcontext *spcont,
+                       uint offset,
+                       sp_pcontext *param_spcont,
+                       List *parameters);
+
+  /**
+    Generate an initiation code for a CURSOR FOR LOOP, e.g.:
+      FOR index IN cursor         -- cursor without parameters
+      FOR index IN cursor(1,2,3)  -- cursor with parameters
+
+    The code generated by this method does the following during SP run-time:
+    - Sets all cursor parameter vartiables from "parameters"
+    - Initializes the index ROW-type variable from the cursor
+      (the structure is copied from the cursor to the index variable)
+    - The cursor gets opened
+    - The first records is fetched from the cursor to the variable "index".
+
+    @param thd        - the current thread (for mem_root and error reporting)
+    @param spcont     - the current parse context
+    @param index      - the loop "index" ROW-type variable
+    @param pcursor    - the cursor
+    @param coffset    - the cursor offset
+    @param param_lex  - the LEX that owns Items in "parameters"
+    @param parameters - the cursor parameters Item array
+    @retval true      - on error (EOM)
+    @retval false     - on success
+  */
+  bool add_for_loop_open_cursor(THD *thd, sp_pcontext *spcont,
+                                sp_variable *index,
+                                const sp_pcursor *pcursor, uint coffset,
+                                sp_assignment_lex *param_lex,
+                                Item_args *parameters);
+  /**
+    Returns true if any substatement in the routine directly
+    (not through another routine) modifies data/changes table.
+
+    @sa Comment for MODIFIES_DATA flag.
+  */
+  bool modifies_data() const
+  { return m_flags & MODIFIES_DATA; }
+
+  inline uint instructions()
+  { return (uint)m_instr.elements; }
+
+  inline sp_instr *
+  last_instruction()
+  {
+    sp_instr *i;
+
+    get_dynamic(&m_instr, (uchar*)&i, m_instr.elements-1);
+    return i;
+  }
+
+  bool replace_instr_to_nop(THD *thd, uint ip);
+
+  /*
+    Resets lex in 'thd' and keeps a copy of the old one.
+
+    @todo Conflicting comment in sp_head.cc
+  */
+  bool
+  reset_lex(THD *thd);
+
+  bool
+  reset_lex(THD *thd, sp_lex_local *sublex);
+
+  /**
+    Merge two LEX instances.
+    @param oldlex - the upper level LEX we're going to restore to.
+    @param sublex - the local lex that have just parsed some substatement.
+    @returns      - false on success, true on error (e.g. failed to
+                    merge the routine list or the table list).
+    This method is shared by:
+    - restore_lex(), when the old LEX is popped by sp_head::m_lex.pop()
+    - THD::restore_from_local_lex_to_old_lex(), when the old LEX
+      is stored in the caller's local variable.
+  */
+  bool
+  merge_lex(THD *thd, LEX *oldlex, LEX *sublex);
+
+  /**
+    Restores lex in 'thd' from our copy, but keeps some status from the
+    one in 'thd', like ptr, tables, fields, etc.
+
+    @todo Conflicting comment in sp_head.cc
+  */
+  bool
+  restore_lex(THD *thd)
+  {
+    DBUG_ENTER("sp_head::restore_lex");
+    /*
+      There is no a need to free the current thd->lex here.
+      - In the majority of the cases restore_lex() is called
+        on success and thd->lex does not need to be deleted.
+      - In cases when restore_lex() is called on error,
+        e.g. from sp_create_assignment_instr(), thd->lex is
+        already linked to some sp_instr_xxx (using sp_lex_keeper).
+
+      Note, we don't get to here in case of a syntax error
+      when the current thd->lex is not yet completely
+      initialized and linked. It gets automatically deleted
+      by the Bison %destructor in sql_yacc.yy.
+    */
+    LEX *oldlex= (LEX *) m_lex.pop();
+    if (!oldlex)
+      DBUG_RETURN(false); // Nothing to restore
+    // This restores thd->lex and thd->stmt_lex
+    DBUG_RETURN(thd->restore_from_local_lex_to_old_lex(oldlex));
+  }
+
+  /**
+    Iterate through the LEX stack from the top (the newest) to the bottom
+    (the oldest) and find the one that contains a non-zero spname.
+    @returns - the address of spname, or NULL of no spname found.
+  */
+  const sp_name *find_spname_recursive()
+  {
+    uint count= m_lex.elements;
+    for (uint i= 0; i < count; i++)
+    {
+      const LEX *tmp= m_lex.elem(count - i - 1);
+      if (tmp->spname)
+        return tmp->spname;
+    }
+    return NULL;
+  }
+
+  /// Put the instruction on the backpatch list, associated with the label.
+  int
+  push_backpatch(THD *thd, sp_instr *, sp_label *);
+  int
+  push_backpatch_goto(THD *thd, sp_pcontext *ctx, sp_label *lab);
+
+  /// Update all instruction with this label in the backpatch list to
+  /// the current position.
+  void
+  backpatch(sp_label *);
+  void
+  backpatch_goto(THD *thd, sp_label *, sp_label *);
+
+  /// Check for unresolved goto label
+  bool
+  check_unresolved_goto();
+
+  /// Start a new cont. backpatch level. If 'i' is NULL, the level is just incr.
+  int
+  new_cont_backpatch(sp_instr_opt_meta *i);
+
+  /// Add an instruction to the current level
+  int
+  add_cont_backpatch(sp_instr_opt_meta *i);
+
+  /// Backpatch (and pop) the current level to the current position.
+  void
+  do_cont_backpatch();
+
+  /// Add cpush instructions for all cursors declared in the current frame
+  bool sp_add_instr_cpush_for_cursors(THD *thd, sp_pcontext *pcontext);
+
+  const LEX_CSTRING *name() const
+  { return &m_name; }
+
+  char *create_string(THD *thd, ulong *lenp);
+
+  Field *create_result_field(uint field_max_length, const LEX_CSTRING *field_name,
+                             TABLE *table) const;
+
+
+  /**
+    Check and prepare an instance of Column_definition for field creation
+    (fill all necessary attributes), for variables, parameters and
+    function return values.
+
+    @param[in]  thd          Thread handle
+    @param[in]  lex          Yacc parsing context
+    @param[out] field_def    An instance of create_field to be filled
+
+    @retval false on success
+    @retval true  on error
+  */
+  bool fill_field_definition(THD *thd, Column_definition *field_def)
+  {
+    const Type_handler *h= field_def->type_handler();
+    return h->Column_definition_fix_attributes(field_def) ||
+           field_def->sp_prepare_create_field(thd, mem_root);
+  }
+  bool row_fill_field_definitions(THD *thd, Row_definition_list *row)
+  {
+    /*
+      Prepare all row fields. This will (among other things)
+      - convert VARCHAR lengths from character length to octet length
+      - calculate interval lengths for SET and ENUM
+    */
+    List_iterator it(*row);
+    for (Spvar_definition *def= it++; def; def= it++)
+    {
+      if (fill_spvar_definition(thd, def))
+        return true;
+    }
+    return false;
+  }
+  /**
+    Check and prepare a Column_definition for a variable or a parameter.
+  */
+  bool fill_spvar_definition(THD *thd, Column_definition *def)
+  {
+    if (fill_field_definition(thd, def))
+      return true;
+    def->pack_flag|= FIELDFLAG_MAYBE_NULL;
+    return false;
+  }
+  bool fill_spvar_definition(THD *thd, Column_definition *def,
+                             LEX_CSTRING *name)
+  {
+    def->field_name= *name;
+    return fill_spvar_definition(thd, def);
+  }
+
+private:
+  /**
+    Set a column type reference for a parameter definition
+  */
+  void fill_spvar_using_type_reference(sp_variable *spvar,
+                                       Qualified_column_ident *ref)
+  {
+    spvar->field_def.set_column_type_ref(ref);
+    spvar->field_def.field_name= spvar->name;
+    m_flags|= sp_head::HAS_COLUMN_TYPE_REFS;
+  }
+
+  void fill_spvar_using_table_rowtype_reference(THD *thd,
+                                                sp_variable *spvar,
+                                                Table_ident *ref)
+  {
+    spvar->field_def.set_table_rowtype_ref(ref);
+    spvar->field_def.field_name= spvar->name;
+    fill_spvar_definition(thd, &spvar->field_def);
+    m_flags|= sp_head::HAS_COLUMN_TYPE_REFS;
+  }
+
+public:
+  bool spvar_fill_row(THD *thd, sp_variable *spvar, Row_definition_list *def);
+  bool spvar_fill_type_reference(THD *thd, sp_variable *spvar,
+                                 const LEX_CSTRING &table,
+                                 const LEX_CSTRING &column);
+  bool spvar_fill_type_reference(THD *thd, sp_variable *spvar,
+                                 const LEX_CSTRING &db,
+                                 const LEX_CSTRING &table,
+                                 const LEX_CSTRING &column);
+  bool spvar_fill_table_rowtype_reference(THD *thd, sp_variable *spvar,
+                                          const LEX_CSTRING &table);
+  bool spvar_fill_table_rowtype_reference(THD *thd, sp_variable *spvar,
+                                          const LEX_CSTRING &db,
+                                          const LEX_CSTRING &table);
+
+  void set_c_chistics(const st_sp_chistics &chistics);
+  void set_info(longlong created, longlong modified,
+		const st_sp_chistics &chistics, sql_mode_t sql_mode);
+
+  void set_definer(const char *definer, size_t definerlen)
+  {
+    AUTHID tmp;
+    tmp.parse(definer, definerlen);
+    m_definer.copy(mem_root, &tmp.user, &tmp.host);
+  }
+  void set_definer(const LEX_CSTRING *user_name, const LEX_CSTRING *host_name)
+  {
+    m_definer.copy(mem_root, user_name, host_name);
+  }
+
+  void reset_thd_mem_root(THD *thd);
+
+  void restore_thd_mem_root(THD *thd);
+
+  /**
+    Optimize the code.
+  */
+  void optimize();
+
+  /**
+    Helper used during flow analysis during code optimization.
+    See the implementation of opt_mark().
+    @param ip the instruction to add to the leads list
+    @param leads the list of remaining paths to explore in the graph that
+    represents the code, during flow analysis.
+  */
+  void add_mark_lead(uint ip, List *leads);
+
+  inline sp_instr *
+  get_instr(uint i)
+  {
+    sp_instr *ip;
+
+    if (i < m_instr.elements)
+      get_dynamic(&m_instr, (uchar*)&ip, i);
+    else
+      ip= NULL;
+    return ip;
+  }
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+  int has_all_instrs_executed();
+  void reset_instrs_executed_counter();
+#endif
+
+  /* Add tables used by routine to the table list. */
+  bool add_used_tables_to_table_list(THD *thd,
+                                     TABLE_LIST ***query_tables_last_ptr,
+                                     TABLE_LIST *belong_to_view);
+
+  /**
+    Check if this stored routine contains statements disallowed
+    in a stored function or trigger, and set an appropriate error message
+    if this is the case.
+  */
+  bool is_not_allowed_in_function(const char *where)
+  {
+    if (m_flags & CONTAINS_DYNAMIC_SQL)
+      my_error(ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0), "Dynamic SQL");
+    else if (m_flags & MULTI_RESULTS)
+      my_error(ER_SP_NO_RETSET, MYF(0), where);
+    else if (m_flags & HAS_SET_AUTOCOMMIT_STMT)
+      my_error(ER_SP_CANT_SET_AUTOCOMMIT, MYF(0));
+    else if (m_flags & HAS_COMMIT_OR_ROLLBACK)
+      my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
+    else if (m_flags & HAS_SQLCOM_RESET)
+      my_error(ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0), "RESET");
+    else if (m_flags & HAS_SQLCOM_FLUSH)
+      my_error(ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0), "FLUSH");
+
+    return MY_TEST(m_flags &
+                  (CONTAINS_DYNAMIC_SQL | MULTI_RESULTS |
+                   HAS_SET_AUTOCOMMIT_STMT | HAS_COMMIT_OR_ROLLBACK |
+                   HAS_SQLCOM_RESET | HAS_SQLCOM_FLUSH));
+  }
+
+#ifndef DBUG_OFF
+  int show_routine_code(THD *thd);
+#endif
+
+  /*
+    This method is intended for attributes of a routine which need
+    to propagate upwards to the Query_tables_list of the caller (when
+    a property of a sp_head needs to "taint" the calling statement).
+  */
+  void propagate_attributes(Query_tables_list *prelocking_ctx)
+  {
+    DBUG_ENTER("sp_head::propagate_attributes");
+    /*
+      If this routine needs row-based binary logging, the entire top statement
+      too (we cannot switch from statement-based to row-based only for this
+      routine, as in statement-based the top-statement may be binlogged and
+      the substatements not).
+    */
+    DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
+                        prelocking_ctx->get_stmt_unsafe_flags()));
+    DBUG_PRINT("info", ("sp_head(%p=%s)->unsafe_flags: 0x%x",
+                        this, name()->str, unsafe_flags));
+    prelocking_ctx->set_stmt_unsafe_flags(unsafe_flags);
+    DBUG_VOID_RETURN;
+  }
+
+  sp_pcontext *get_parse_context() { return m_pcont; }
+
+  /*
+    Check EXECUTE access:
+    - in case of a standalone rotuine, for the routine itself
+    - in case of a package routine, for the owner package body
+  */
+  bool check_execute_access(THD *thd) const;
+
+  virtual sp_package *get_package()
+  {
+    return NULL;
+  }
+
+  virtual void init_psi_share();
+
+protected:
+
+  MEM_ROOT *m_thd_root;		///< Temp. store for thd's mem_root
+  THD *m_thd;			///< Set if we have reset mem_root
+
+  sp_pcontext *m_pcont;		///< Parse context
+  List m_lex;		///< Temp. store for the other lex
+  DYNAMIC_ARRAY m_instr;	///< The "instructions"
+
+  enum backpatch_instr_type { GOTO, CPOP, HPOP };
+  typedef struct
+  {
+    sp_label *lab;
+    sp_instr *instr;
+    backpatch_instr_type instr_type;
+  } bp_t;
+  List m_backpatch;	///< Instructions needing backpatching
+  List m_backpatch_goto; // Instructions needing backpatching (for goto)
+
+  /**
+    We need a special list for backpatching of instructions with a continue
+    destination (in the case of a continue handler catching an error in
+    the test), since it would otherwise interfere with the normal backpatch
+    mechanism - e.g. jump_if_not instructions have two different destinations
+    which are to be patched differently.
+    Since these occur in a more restricted way (always the same "level" in
+    the code), we don't need the label.
+  */
+  List m_cont_backpatch;
+  uint m_cont_level;            // The current cont. backpatch level
+
+  /**
+    Multi-set representing optimized list of tables to be locked by this
+    routine. Does not include tables which are used by invoked routines.
+
+    @note
+    For prelocking-free SPs this multiset is constructed too.
+    We do so because the same instance of sp_head may be called both
+    in prelocked mode and in non-prelocked mode.
+  */
+  HASH m_sptabs;
+
+  bool
+  execute(THD *thd, bool merge_da_on_success);
+
+  /**
+    Perform a forward flow analysis in the generated code.
+    Mark reachable instructions, for the optimizer.
+  */
+  void opt_mark();
+
+  /**
+    Merge the list of tables used by query into the multi-set of tables used
+    by routine.
+  */
+  bool merge_table_list(THD *thd, TABLE_LIST *table, LEX *lex_for_tmp_check);
+
+  /// Put the instruction on the a backpatch list, associated with the label.
+  int
+  push_backpatch(THD *thd, sp_instr *, sp_label *, List *list,
+                 backpatch_instr_type itype);
+
+}; // class sp_head : public Sql_alloc
+
+
+class sp_package: public sp_head
+{
+  bool validate_public_routines(THD *thd, sp_package *spec);
+  bool validate_private_routines(THD *thd);
+public:
+  class LexList: public List
+  {
+  public:
+    LexList() { elements= 0; }
+    // Find a package routine by a non qualified name
+    LEX *find(const LEX_CSTRING &name, enum_sp_type type);
+    // Find a package routine by a package-qualified name, e.g. 'pkg.proc'
+    LEX *find_qualified(const LEX_CSTRING &name, enum_sp_type type);
+    // Check if a routine with the given qualified name already exists
+    bool check_dup_qualified(const LEX_CSTRING &name, const Sp_handler *sph)
+    {
+      if (!find_qualified(name, sph->type()))
+        return false;
+      my_error(ER_SP_ALREADY_EXISTS, MYF(0), sph->type_str(), name.str);
+      return true;
+    }
+    bool check_dup_qualified(const sp_head *sp)
+    {
+      return check_dup_qualified(sp->m_name, sp->m_handler);
+    }
+    void cleanup();
+  };
+  /*
+    The LEX for a new package subroutine is initially assigned to
+    m_current_routine. After scanning parameters, return type and chistics,
+    the parser detects if we have a declaration or a definition, e.g.:
+         PROCEDURE p1(a INT);
+      vs
+         PROCEDURE p1(a INT) AS BEGIN NULL; END;
+    (i.e. either semicolon or the "AS" keyword)
+    m_current_routine is then added either to m_routine_implementations,
+    or m_routine_declarations, and then m_current_routine is set to NULL.
+  */
+  LEX *m_current_routine;
+  LexList m_routine_implementations;
+  LexList m_routine_declarations;
+
+  LEX *m_top_level_lex;
+  sp_rcontext *m_rcontext;
+  uint m_invoked_subroutine_count;
+  bool m_is_instantiated;
+  bool m_is_cloning_routine;
+
+private:
+  sp_package(MEM_ROOT *mem_root,
+             LEX *top_level_lex,
+             const sp_name *name,
+             const Sp_handler *sph);
+  ~sp_package();
+public:
+  static sp_package *create(LEX *top_level_lex, const sp_name *name,
+                            const Sp_handler *sph);
+
+  bool add_routine_declaration(LEX *lex)
+  {
+    return m_routine_declarations.check_dup_qualified(lex->sphead) ||
+           m_routine_declarations.push_back(lex, &main_mem_root);
+  }
+  bool add_routine_implementation(LEX *lex)
+  {
+    return m_routine_implementations.check_dup_qualified(lex->sphead) ||
+           m_routine_implementations.push_back(lex, &main_mem_root);
+  }
+  sp_package *get_package() { return this; }
+  void init_psi_share();
+  bool is_invoked() const
+  {
+    /*
+      Cannot flush a package out of the SP cache when:
+      - its initialization block is running
+      - one of its subroutine is running
+    */
+    return sp_head::is_invoked() || m_invoked_subroutine_count > 0;
+  }
+  sp_variable *find_package_variable(const LEX_CSTRING *name) const
+  {
+    /*
+      sp_head::m_pcont is a special level for routine parameters.
+      Variables declared inside CREATE PACKAGE BODY reside in m_children.at(0).
+    */
+    sp_pcontext *ctx= m_pcont->child_context(0);
+    return ctx ? ctx->find_variable(name, true) : NULL;
+  }
+  bool validate_after_parser(THD *thd);
+  bool instantiate_if_needed(THD *thd);
+};
+
+
+class sp_lex_cursor: public sp_lex_local, public Query_arena
+{
+public:
+  sp_lex_cursor(THD *thd, const LEX *oldlex, MEM_ROOT *mem_root_arg)
+   :sp_lex_local(thd, oldlex),
+    Query_arena(mem_root_arg, STMT_INITIALIZED_FOR_SP)
+  { }
+  sp_lex_cursor(THD *thd, const LEX *oldlex)
+   :sp_lex_local(thd, oldlex),
+    Query_arena(thd->lex->sphead->get_main_mem_root(), STMT_INITIALIZED_FOR_SP)
+  { }
+  ~sp_lex_cursor() { free_items(); }
+  virtual bool cleanup_stmt(bool /*restore_set_statement_vars*/) override
+  { return false; }
+  Query_arena *query_arena() override { return this; }
+  bool validate()
+  {
+    DBUG_ASSERT(sql_command == SQLCOM_SELECT);
+    if (result)
+    {
+      my_error(ER_SP_BAD_CURSOR_SELECT, MYF(0));
+      return true;
+    }
+    return false;
+  }
+  bool stmt_finalize(THD *thd)
+  {
+    if (validate())
+      return true;
+    sp_lex_in_use= true;
+    free_list= thd->free_list;
+    thd->free_list= NULL;
+    return false;
+  }
+};
+
+
+//
+// "Instructions"...
+//
+
+class sp_instr :public Query_arena, public Sql_alloc
+{
+  sp_instr(const sp_instr &);	/**< Prevent use of these */
+  void operator=(sp_instr &);
+
+public:
+
+  uint marked;
+  uint m_ip;			///< My index
+  sp_pcontext *m_ctx;		///< My parse context
+  uint m_lineno;
+
+  /// Should give each a name or type code for debugging purposes?
+  sp_instr(uint ip, sp_pcontext *ctx)
+    :Query_arena(0, STMT_INITIALIZED_FOR_SP), marked(0), m_ip(ip), m_ctx(ctx)
+#ifdef PROTECT_STATEMENT_MEMROOT
+  , m_has_been_run(false)
+#endif
+  {}
+
+  virtual ~sp_instr()
+  { free_items(); }
+
+
+  /**
+    Execute this instruction
+
+   
+    @param thd         Thread handle
+    @param[out] nextp  index of the next instruction to execute. (For most
+                       instructions this will be the instruction following this
+                       one). Note that this parameter is undefined in case of
+                       errors, use get_cont_dest() to find the continuation
+                       instruction for CONTINUE error handlers.
+   
+    @retval 0      on success, 
+    @retval other  if some error occurred
+  */
+
+  virtual int execute(THD *thd, uint *nextp) = 0;
+
+  /**
+    Execute open_and_lock_tables() for this statement.
+    Open and lock the tables used by this statement, as a pre-requisite
+    to execute the core logic of this instruction with
+    exec_core().
+    @param thd the current thread
+    @param tables the list of tables to open and lock
+    @return zero on success, non zero on failure.
+  */
+  int exec_open_and_lock_tables(THD *thd, TABLE_LIST *tables);
+
+  /**
+    Get the continuation destination of this instruction.
+    @return the continuation destination
+  */
+  virtual uint get_cont_dest() const;
+
+  /*
+    Execute core function of instruction after all preparations (e.g.
+    setting of proper LEX, saving part of the thread context have been
+    done).
+
+    Should be implemented for instructions using expressions or whole
+    statements (thus having to have own LEX). Used in concert with
+    sp_lex_keeper class and its descendants (there are none currently).
+  */
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str) = 0;
+
+  virtual void backpatch(uint dest, sp_pcontext *dst_ctx)
+  {}
+
+  /**
+    Mark this instruction as reachable during optimization and return the
+    index to the next instruction. Jump instruction will add their
+    destination to the leads list.
+  */
+  virtual uint opt_mark(sp_head *sp, List *leads)
+  {
+    marked= 1;
+    return m_ip+1;
+  }
+
+  /**
+    Short-cut jumps to jumps during optimization. This is used by the
+    jump instructions' opt_mark() methods. 'start' is the starting point,
+    used to prevent the mark sweep from looping for ever. Return the
+    end destination.
+  */
+  virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
+  {
+    return m_ip;
+  }
+
+  /**
+    Inform the instruction that it has been moved during optimization.
+    Most instructions will simply update its index, but jump instructions
+    must also take care of their destination pointers. Forward jumps get
+    pushed to the backpatch list 'ibp'.
+  */
+  virtual void opt_move(uint dst, List *ibp)
+  {
+    m_ip= dst;
+  }
+  virtual PSI_statement_info* get_psi_info() = 0;
+
+#ifdef PROTECT_STATEMENT_MEMROOT
+  bool has_been_run() const
+  {
+    return m_has_been_run;
+  }
+
+  void mark_as_run()
+  {
+    m_has_been_run= true;
+  }
+
+  void mark_as_not_run()
+  {
+    m_has_been_run= false;
+  }
+
+private:
+  bool m_has_been_run;
+#endif
+}; // class sp_instr : public Sql_alloc
+
+
+/**
+  Auxilary class to which instructions delegate responsibility
+  for handling LEX and preparations before executing statement
+  or calculating complex expression.
+
+  Exist mainly to avoid having double hierarchy between instruction
+  classes.
+
+  @todo
+    Add ability to not store LEX and do any preparations if
+    expression used is simple.
+*/
+
+class sp_lex_keeper
+{
+  /** Prevent use of these */
+  sp_lex_keeper(const sp_lex_keeper &);
+  void operator=(sp_lex_keeper &);
+public:
+
+  sp_lex_keeper(LEX *lex, bool lex_resp)
+    : m_lex(lex), m_lex_resp(lex_resp), 
+      lex_query_tables_own_last(NULL)
+  {
+    lex->sp_lex_in_use= TRUE;
+  }
+  virtual ~sp_lex_keeper()
+  {
+    if (m_lex_resp)
+    {
+      /* Prevent endless recursion. */
+      m_lex->sphead= NULL;
+      lex_end(m_lex);
+      delete m_lex;
+    }
+  }
+
+  /**
+    Prepare execution of instruction using LEX, if requested check whenever
+    we have read access to tables used and open/lock them, call instruction's
+    exec_core() method, perform cleanup afterwards.
+   
+    @todo Conflicting comment in sp_head.cc
+  */
+  int reset_lex_and_exec_core(THD *thd, uint *nextp, bool open_tables,
+                              sp_instr* instr);
+
+  int cursor_reset_lex_and_exec_core(THD *thd, uint *nextp, bool open_tables,
+                                     sp_instr *instr);
+
+  inline uint sql_command() const
+  {
+    return (uint)m_lex->sql_command;
+  }
+
+  void disable_query_cache()
+  {
+    m_lex->safe_to_cache_query= 0;
+  }
+
+private:
+
+  LEX *m_lex;
+  /**
+    Indicates whenever this sp_lex_keeper instance responsible
+    for LEX deletion.
+  */
+  bool m_lex_resp;
+
+  /*
+    Support for being able to execute this statement in two modes:
+    a) inside prelocked mode set by the calling procedure or its ancestor.
+    b) outside of prelocked mode, when this statement enters/leaves
+       prelocked mode itself.
+  */
+  
+  /**
+    List of additional tables this statement needs to lock when it
+    enters/leaves prelocked mode on its own.
+  */
+  TABLE_LIST *prelocking_tables;
+
+  /**
+    The value m_lex->query_tables_own_last should be set to this when the
+    statement enters/leaves prelocked mode on its own.
+  */
+  TABLE_LIST **lex_query_tables_own_last;
+};
+
+
+/**
+  Call out to some prepared SQL statement.
+*/
+class sp_instr_stmt : public sp_instr
+{
+  sp_instr_stmt(const sp_instr_stmt &);	/**< Prevent use of these */
+  void operator=(sp_instr_stmt &);
+
+public:
+
+  LEX_STRING m_query;		///< For thd->query
+
+  sp_instr_stmt(uint ip, sp_pcontext *ctx, LEX *lex)
+    : sp_instr(ip, ctx), m_lex_keeper(lex, TRUE)
+  {
+    m_query.str= 0;
+    m_query.length= 0;
+  }
+
+  virtual ~sp_instr_stmt() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  sp_lex_keeper m_lex_keeper;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+
+}; // class sp_instr_stmt : public sp_instr
+
+
+class sp_instr_set : public sp_instr
+{
+  sp_instr_set(const sp_instr_set &);	/**< Prevent use of these */
+  void operator=(sp_instr_set &);
+
+public:
+
+  sp_instr_set(uint ip, sp_pcontext *ctx,
+               const Sp_rcontext_handler *rh,
+	       uint offset, Item *val,
+               LEX *lex, bool lex_resp)
+    : sp_instr(ip, ctx),
+      m_rcontext_handler(rh), m_offset(offset), m_value(val),
+      m_lex_keeper(lex, lex_resp)
+  {}
+
+  virtual ~sp_instr_set() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+protected:
+  sp_rcontext *get_rcontext(THD *thd) const;
+  const Sp_rcontext_handler *m_rcontext_handler;
+  uint m_offset;		///< Frame offset
+  Item *m_value;
+  sp_lex_keeper m_lex_keeper;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_set : public sp_instr
+
+
+/*
+  This class handles assignments of a ROW fields:
+    DECLARE rec ROW (a INT,b INT);
+    SET rec.a= 10;
+*/
+class sp_instr_set_row_field : public sp_instr_set
+{
+  sp_instr_set_row_field(const sp_instr_set_row_field &); // Prevent use of this
+  void operator=(sp_instr_set_row_field &);
+  uint m_field_offset;
+
+public:
+
+  sp_instr_set_row_field(uint ip, sp_pcontext *ctx,
+                         const Sp_rcontext_handler *rh,
+                         uint offset, uint field_offset,
+                         Item *val,
+                         LEX *lex, bool lex_resp)
+    : sp_instr_set(ip, ctx, rh, offset, val, lex, lex_resp),
+      m_field_offset(field_offset)
+  {}
+
+  virtual ~sp_instr_set_row_field() = default;
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+}; // class sp_instr_set_field : public sp_instr_set
+
+
+/**
+  This class handles assignment instructions like this:
+  DECLARE
+    CURSOR cur IS SELECT * FROM t1;
+    rec cur%ROWTYPE;
+  BEGIN
+    rec.column1:= 10; -- This instruction
+  END;
+
+  The idea is that during sp_rcontext::create() we do not know the extact
+  structure of "rec". It gets resolved at run time, during the corresponding
+  sp_instr_cursor_copy_struct::exec_core().
+
+  So sp_instr_set_row_field_by_name searches for ROW fields by name,
+  while sp_instr_set_row_field (see above) searches for ROW fields by index.
+*/
+class sp_instr_set_row_field_by_name : public sp_instr_set
+{
+  // Prevent use of this
+  sp_instr_set_row_field_by_name(const sp_instr_set_row_field &);
+  void operator=(sp_instr_set_row_field_by_name &);
+  const LEX_CSTRING m_field_name;
+
+public:
+
+  sp_instr_set_row_field_by_name(uint ip, sp_pcontext *ctx,
+                                 const Sp_rcontext_handler *rh,
+                                 uint offset, const LEX_CSTRING &field_name,
+                                 Item *val,
+                                 LEX *lex, bool lex_resp)
+    : sp_instr_set(ip, ctx, rh, offset, val, lex, lex_resp),
+      m_field_name(field_name)
+  {}
+
+  virtual ~sp_instr_set_row_field_by_name() = default;
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+}; // class sp_instr_set_field_by_name : public sp_instr_set
+
+
+/**
+  Set NEW/OLD row field value instruction. Used in triggers.
+*/
+class sp_instr_set_trigger_field : public sp_instr
+{
+  sp_instr_set_trigger_field(const sp_instr_set_trigger_field &);
+  void operator=(sp_instr_set_trigger_field &);
+
+public:
+
+  sp_instr_set_trigger_field(uint ip, sp_pcontext *ctx,
+                             Item_trigger_field *trg_fld,
+                             Item *val, LEX *lex)
+    : sp_instr(ip, ctx),
+      trigger_field(trg_fld),
+      value(val), m_lex_keeper(lex, TRUE)
+  {}
+
+  virtual ~sp_instr_set_trigger_field() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+  Item_trigger_field *trigger_field;
+  Item *value;
+  sp_lex_keeper m_lex_keeper;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_trigger_field : public sp_instr
+
+
+/**
+  An abstract class for all instructions with destinations that
+  needs to be updated by the optimizer.
+
+  Even if not all subclasses will use both the normal destination and
+  the continuation destination, we put them both here for simplicity.
+*/
+class sp_instr_opt_meta : public sp_instr
+{
+public:
+
+  uint m_dest;			///< Where we will go
+  uint m_cont_dest;             ///< Where continue handlers will go
+
+  sp_instr_opt_meta(uint ip, sp_pcontext *ctx)
+    : sp_instr(ip, ctx),
+      m_dest(0), m_cont_dest(0), m_optdest(0), m_cont_optdest(0)
+  {}
+
+  sp_instr_opt_meta(uint ip, sp_pcontext *ctx, uint dest)
+    : sp_instr(ip, ctx),
+      m_dest(dest), m_cont_dest(0), m_optdest(0), m_cont_optdest(0)
+  {}
+
+  virtual ~sp_instr_opt_meta() = default;
+
+  virtual void set_destination(uint old_dest, uint new_dest)
+    = 0;
+
+  virtual uint get_cont_dest() const;
+
+protected:
+
+  sp_instr *m_optdest;		///< Used during optimization
+  sp_instr *m_cont_optdest;     ///< Used during optimization
+
+}; // class sp_instr_opt_meta : public sp_instr
+
+class sp_instr_jump : public sp_instr_opt_meta
+{
+  sp_instr_jump(const sp_instr_jump &);	/**< Prevent use of these */
+  void operator=(sp_instr_jump &);
+
+public:
+
+  sp_instr_jump(uint ip, sp_pcontext *ctx)
+    : sp_instr_opt_meta(ip, ctx)
+  {}
+
+  sp_instr_jump(uint ip, sp_pcontext *ctx, uint dest)
+    : sp_instr_opt_meta(ip, ctx, dest)
+  {}
+
+  virtual ~sp_instr_jump() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads);
+
+  virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start);
+
+  virtual void opt_move(uint dst, List *ibp);
+
+  virtual void backpatch(uint dest, sp_pcontext *dst_ctx)
+  {
+    /* Calling backpatch twice is a logic flaw in jump resolution. */
+    DBUG_ASSERT(m_dest == 0);
+    m_dest= dest;
+  }
+
+  /**
+    Update the destination; used by the optimizer.
+  */
+  virtual void set_destination(uint old_dest, uint new_dest)
+  {
+    if (m_dest == old_dest)
+      m_dest= new_dest;
+  }
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_jump : public sp_instr_opt_meta
+
+
+class sp_instr_jump_if_not : public sp_instr_jump
+{
+  sp_instr_jump_if_not(const sp_instr_jump_if_not &); /**< Prevent use of these */
+  void operator=(sp_instr_jump_if_not &);
+
+public:
+
+  sp_instr_jump_if_not(uint ip, sp_pcontext *ctx, Item *i, LEX *lex)
+    : sp_instr_jump(ip, ctx), m_expr(i),
+      m_lex_keeper(lex, TRUE)
+  {}
+
+  sp_instr_jump_if_not(uint ip, sp_pcontext *ctx, Item *i, uint dest, LEX *lex)
+    : sp_instr_jump(ip, ctx, dest), m_expr(i),
+      m_lex_keeper(lex, TRUE)
+  {}
+
+  virtual ~sp_instr_jump_if_not() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads);
+
+  /** Override sp_instr_jump's shortcut; we stop here */
+  virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
+  {
+    return m_ip;
+  }
+
+  virtual void opt_move(uint dst, List *ibp);
+
+  virtual void set_destination(uint old_dest, uint new_dest)
+  {
+    sp_instr_jump::set_destination(old_dest, new_dest);
+    if (m_cont_dest == old_dest)
+      m_cont_dest= new_dest;
+  }
+
+private:
+
+  Item *m_expr;			///< The condition
+  sp_lex_keeper m_lex_keeper;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_jump_if_not : public sp_instr_jump
+
+
+class sp_instr_preturn : public sp_instr
+{
+  sp_instr_preturn(const sp_instr_preturn &);	/**< Prevent use of these */
+  void operator=(sp_instr_preturn &);
+
+public:
+
+  sp_instr_preturn(uint ip, sp_pcontext *ctx)
+    : sp_instr(ip, ctx)
+  {}
+
+  virtual ~sp_instr_preturn() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads)
+  {
+    marked= 1;
+    return UINT_MAX;
+  }
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_preturn : public sp_instr
+
+
+class sp_instr_freturn : public sp_instr
+{
+  sp_instr_freturn(const sp_instr_freturn &);	/**< Prevent use of these */
+  void operator=(sp_instr_freturn &);
+
+public:
+
+  sp_instr_freturn(uint ip, sp_pcontext *ctx,
+		   Item *val, const Type_handler *handler, LEX *lex)
+    : sp_instr(ip, ctx), m_value(val), m_type_handler(handler),
+      m_lex_keeper(lex, TRUE)
+  {}
+
+  virtual ~sp_instr_freturn() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads)
+  {
+    marked= 1;
+    return UINT_MAX;
+  }
+
+protected:
+
+  Item *m_value;
+  const Type_handler *m_type_handler;
+  sp_lex_keeper m_lex_keeper;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_freturn : public sp_instr
+
+
+class sp_instr_hpush_jump : public sp_instr_jump
+{
+  sp_instr_hpush_jump(const sp_instr_hpush_jump &); /**< Prevent use of these */
+  void operator=(sp_instr_hpush_jump &);
+
+public:
+
+  sp_instr_hpush_jump(uint ip,
+                      sp_pcontext *ctx,
+                      sp_handler *handler)
+   :sp_instr_jump(ip, ctx),
+    m_handler(handler),
+    m_opt_hpop(0),
+    m_frame(ctx->current_var_count())
+  {
+    DBUG_ASSERT(m_handler->condition_values.elements == 0);
+  }
+
+  virtual ~sp_instr_hpush_jump()
+  {
+    m_handler->condition_values.empty();
+    m_handler= NULL;
+  }
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads);
+
+  /** Override sp_instr_jump's shortcut; we stop here. */
+  virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
+  {
+    return m_ip;
+  }
+
+  virtual void backpatch(uint dest, sp_pcontext *dst_ctx)
+  {
+    DBUG_ASSERT(!m_dest || !m_opt_hpop);
+    if (!m_dest)
+      m_dest= dest;
+    else
+      m_opt_hpop= dest;
+  }
+
+  void add_condition(sp_condition_value *condition_value)
+  { m_handler->condition_values.push_back(condition_value); }
+
+  sp_handler *get_handler()
+  { return m_handler; }
+
+private:
+  /// Handler.
+  sp_handler *m_handler;
+
+  /// hpop marking end of handler scope.
+  uint m_opt_hpop;
+
+  // This attribute is needed for SHOW PROCEDURE CODE only (i.e. it's needed in
+  // debug version only). It's used in print().
+  uint m_frame;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_hpush_jump : public sp_instr_jump
+
+
+class sp_instr_hpop : public sp_instr
+{
+  sp_instr_hpop(const sp_instr_hpop &);	/**< Prevent use of these */
+  void operator=(sp_instr_hpop &);
+
+public:
+
+  sp_instr_hpop(uint ip, sp_pcontext *ctx, uint count)
+    : sp_instr(ip, ctx), m_count(count)
+  {}
+
+  virtual ~sp_instr_hpop() = default;
+
+  void update_count(uint count)
+  {
+    m_count= count;
+  }
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_count;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_hpop : public sp_instr
+
+
+class sp_instr_hreturn : public sp_instr_jump
+{
+  sp_instr_hreturn(const sp_instr_hreturn &);	/**< Prevent use of these */
+  void operator=(sp_instr_hreturn &);
+
+public:
+
+  sp_instr_hreturn(uint ip, sp_pcontext *ctx)
+   :sp_instr_jump(ip, ctx),
+    m_frame(ctx->current_var_count())
+  {}
+
+  virtual ~sp_instr_hreturn() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  /* This instruction will not be short cut optimized. */
+  virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
+  {
+    return m_ip;
+  }
+
+  virtual uint opt_mark(sp_head *sp, List *leads);
+
+private:
+
+  uint m_frame;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_hreturn : public sp_instr_jump
+
+
+/** This is DECLARE CURSOR */
+class sp_instr_cpush : public sp_instr, public sp_cursor
+{
+  sp_instr_cpush(const sp_instr_cpush &); /**< Prevent use of these */
+  void operator=(sp_instr_cpush &);
+
+public:
+
+  sp_instr_cpush(uint ip, sp_pcontext *ctx, LEX *lex, uint offset)
+    : sp_instr(ip, ctx), m_lex_keeper(lex, TRUE), m_cursor(offset)
+  {}
+
+  virtual ~sp_instr_cpush() = default;
+
+  int execute(THD *thd, uint *nextp) override;
+
+  void print(String *str) override;
+
+  /**
+    This call is used to cleanup the instruction when a sensitive
+    cursor is closed. For now stored procedures always use materialized
+    cursors and the call is not used.
+  */
+  virtual bool cleanup_stmt(bool /*restore_set_statement_vars*/) override
+  { return false; }
+private:
+
+  sp_lex_keeper m_lex_keeper;
+  uint m_cursor;                /**< Frame offset (for debugging) */
+
+public:
+  PSI_statement_info* get_psi_info() override { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_cpush : public sp_instr
+
+
+class sp_instr_cpop : public sp_instr
+{
+  sp_instr_cpop(const sp_instr_cpop &); /**< Prevent use of these */
+  void operator=(sp_instr_cpop &);
+
+public:
+
+  sp_instr_cpop(uint ip, sp_pcontext *ctx, uint count)
+    : sp_instr(ip, ctx), m_count(count)
+  {}
+
+  virtual ~sp_instr_cpop() = default;
+
+  void update_count(uint count)
+  {
+    m_count= count;
+  }
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_count;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_cpop : public sp_instr
+
+
+class sp_instr_copen : public sp_instr
+{
+  sp_instr_copen(const sp_instr_copen &); /**< Prevent use of these */
+  void operator=(sp_instr_copen &);
+
+public:
+
+  sp_instr_copen(uint ip, sp_pcontext *ctx, uint c)
+    : sp_instr(ip, ctx), m_cursor(c)
+  {}
+
+  virtual ~sp_instr_copen() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_cursor;		///< Stack index
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_copen : public sp_instr_stmt
+
+
+/**
+  Initialize the structure of a cursor%ROWTYPE variable
+  from the LEX containing the cursor SELECT statement.
+*/
+class sp_instr_cursor_copy_struct: public sp_instr
+{
+  /**< Prevent use of these */
+  sp_instr_cursor_copy_struct(const sp_instr_cursor_copy_struct &);
+  void operator=(sp_instr_cursor_copy_struct &);
+  sp_lex_keeper m_lex_keeper;
+  uint m_cursor;
+  uint m_var;
+public:
+  sp_instr_cursor_copy_struct(uint ip, sp_pcontext *ctx, uint coffs,
+                              sp_lex_cursor *lex, uint voffs)
+    : sp_instr(ip, ctx), m_lex_keeper(lex, FALSE),
+      m_cursor(coffs),
+      m_var(voffs)
+  {}
+  virtual ~sp_instr_cursor_copy_struct() = default;
+  virtual int execute(THD *thd, uint *nextp);
+  virtual int exec_core(THD *thd, uint *nextp);
+  virtual void print(String *str);
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+};
+
+
+class sp_instr_cclose : public sp_instr
+{
+  sp_instr_cclose(const sp_instr_cclose &); /**< Prevent use of these */
+  void operator=(sp_instr_cclose &);
+
+public:
+
+  sp_instr_cclose(uint ip, sp_pcontext *ctx, uint c)
+    : sp_instr(ip, ctx), m_cursor(c)
+  {}
+
+  virtual ~sp_instr_cclose() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_cursor;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_cclose : public sp_instr
+
+
+class sp_instr_cfetch : public sp_instr
+{
+  sp_instr_cfetch(const sp_instr_cfetch &); /**< Prevent use of these */
+  void operator=(sp_instr_cfetch &);
+
+public:
+
+  sp_instr_cfetch(uint ip, sp_pcontext *ctx, uint c, bool error_on_no_data)
+    : sp_instr(ip, ctx), m_cursor(c), m_error_on_no_data(error_on_no_data)
+  {
+    m_varlist.empty();
+  }
+
+  virtual ~sp_instr_cfetch() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  void add_to_varlist(sp_variable *var)
+  {
+    m_varlist.push_back(var);
+  }
+
+private:
+
+  uint m_cursor;
+  List m_varlist;
+  bool m_error_on_no_data;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_cfetch : public sp_instr
+
+/*
+This class is created for the special fetch instruction
+FETCH GROUP NEXT ROW, used in the user-defined aggregate
+functions
+*/
+
+class sp_instr_agg_cfetch : public sp_instr
+{
+  sp_instr_agg_cfetch(const sp_instr_cfetch &); /**< Prevent use of these */
+  void operator=(sp_instr_cfetch &);
+
+public:
+
+  sp_instr_agg_cfetch(uint ip, sp_pcontext *ctx)
+    : sp_instr(ip, ctx){}
+
+  virtual ~sp_instr_agg_cfetch() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_agg_cfetch : public sp_instr
+
+
+
+
+class sp_instr_error : public sp_instr
+{
+  sp_instr_error(const sp_instr_error &); /**< Prevent use of these */
+  void operator=(sp_instr_error &);
+
+public:
+
+  sp_instr_error(uint ip, sp_pcontext *ctx, int errcode)
+    : sp_instr(ip, ctx), m_errcode(errcode)
+  {}
+
+  virtual ~sp_instr_error() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads)
+  {
+    marked= 1;
+    return UINT_MAX;
+  }
+
+private:
+
+  int m_errcode;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_error : public sp_instr
+
+
+class sp_instr_set_case_expr : public sp_instr_opt_meta
+{
+public:
+
+  sp_instr_set_case_expr(uint ip, sp_pcontext *ctx, uint case_expr_id,
+                         Item *case_expr, LEX *lex)
+    : sp_instr_opt_meta(ip, ctx),
+      m_case_expr_id(case_expr_id), m_case_expr(case_expr),
+      m_lex_keeper(lex, TRUE)
+  {}
+
+  virtual ~sp_instr_set_case_expr() = default;
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual int exec_core(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual uint opt_mark(sp_head *sp, List *leads);
+
+  virtual void opt_move(uint dst, List *ibp);
+
+  virtual void set_destination(uint old_dest, uint new_dest)
+  {
+    if (m_cont_dest == old_dest)
+      m_cont_dest= new_dest;
+  }
+
+private:
+
+  uint m_case_expr_id;
+  Item *m_case_expr;
+  sp_lex_keeper m_lex_keeper;
+
+public:
+  virtual PSI_statement_info* get_psi_info() { return & psi_info; }
+  static PSI_statement_info psi_info;
+}; // class sp_instr_set_case_expr : public sp_instr_opt_meta
+
+bool check_show_routine_access(THD *thd, sp_head *sp, bool *full_access);
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+bool
+sp_change_security_context(THD *thd, sp_head *sp,
+                           Security_context **backup);
+void
+sp_restore_security_context(THD *thd, Security_context *backup);
+
+bool
+set_routine_security_ctx(THD *thd, sp_head *sp, Security_context **save_ctx);
+#endif /* NO_EMBEDDED_ACCESS_CHECKS */
+
+TABLE_LIST *
+sp_add_to_query_tables(THD *thd, LEX *lex,
+		       const LEX_CSTRING *db, const LEX_CSTRING *name,
+                       thr_lock_type locktype,
+                       enum_mdl_type mdl_type);
+
+/**
+  @} (end of group Stored_Routines)
+*/
+
+#endif /* _SP_HEAD_H_ */
diff --git a/sql/sp_pcontext.cc b/sql/sp_pcontext.cc
new file mode 100644
index 00000000..848d1f0c
--- /dev/null
+++ b/sql/sp_pcontext.cc
@@ -0,0 +1,742 @@
+/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2009, 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "unireg.h"
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation
+#endif
+
+#include "sp_pcontext.h"
+#include "sp_head.h"
+
+bool sp_condition_value::equals(const sp_condition_value *cv) const
+{
+  DBUG_ASSERT(cv);
+
+  /*
+    The following test disallows duplicate handlers,
+    including user defined exceptions with the same WHEN clause:
+      DECLARE
+        a EXCEPTION;
+        b EXCEPTION;
+      BEGIN
+        RAUSE a;
+      EXCEPTION
+        WHEN a THEN RETURN 'a0';
+        WHEN a THEN RETURN 'a1';
+      END
+  */
+  if (this == cv)
+    return true;
+
+  /*
+    The test below considers two conditions of the same type as equal
+    (except for the user defined exceptions) to avoid declaring duplicate
+    handlers.
+
+    All user defined conditions have type==SQLSTATE
+    with the same SQL state and error code.
+    It's OK to have multiple user defined conditions:
+    DECLARE
+      a EXCEPTION;
+      b EXCEPTION;
+    BEGIN
+      RAISE a;
+    EXCEPTION
+      WHEN a THEN RETURN 'a';
+      WHEN b THEN RETURN 'b';
+    END;
+  */
+  if (type != cv->type || m_is_user_defined || cv->m_is_user_defined)
+    return false;
+
+  switch (type)
+  {
+  case sp_condition_value::ERROR_CODE:
+    return (get_sql_errno() == cv->get_sql_errno());
+
+  case sp_condition_value::SQLSTATE:
+    return Sql_state::eq(cv);
+
+  default:
+    return true;
+  }
+}
+
+
+void sp_pcontext::init(uint var_offset,
+                       uint cursor_offset,
+                       int num_case_expressions)
+{
+  m_var_offset= var_offset;
+  m_cursor_offset= cursor_offset;
+  m_num_case_exprs= num_case_expressions;
+
+  m_labels.empty();
+  m_goto_labels.empty();
+}
+
+
+sp_pcontext::sp_pcontext()
+  : Sql_alloc(),
+  m_max_var_index(0), m_max_cursor_index(0),
+  m_parent(NULL), m_pboundary(0),
+  m_vars(PSI_INSTRUMENT_MEM), m_case_expr_ids(PSI_INSTRUMENT_MEM),
+  m_conditions(PSI_INSTRUMENT_MEM), m_cursors(PSI_INSTRUMENT_MEM),
+  m_handlers(PSI_INSTRUMENT_MEM), m_children(PSI_INSTRUMENT_MEM),
+  m_scope(REGULAR_SCOPE)
+{
+  init(0, 0, 0);
+}
+
+
+sp_pcontext::sp_pcontext(sp_pcontext *prev, sp_pcontext::enum_scope scope)
+  : Sql_alloc(),
+  m_max_var_index(0), m_max_cursor_index(0),
+  m_parent(prev), m_pboundary(0),
+  m_vars(PSI_INSTRUMENT_MEM), m_case_expr_ids(PSI_INSTRUMENT_MEM),
+  m_conditions(PSI_INSTRUMENT_MEM), m_cursors(PSI_INSTRUMENT_MEM),
+  m_handlers(PSI_INSTRUMENT_MEM), m_children(PSI_INSTRUMENT_MEM),
+  m_scope(scope)
+{
+  init(prev->m_var_offset + prev->m_max_var_index,
+       prev->current_cursor_count(),
+       prev->get_num_case_exprs());
+}
+
+
+sp_pcontext::~sp_pcontext()
+{
+  for (size_t i= 0; i < m_children.elements(); ++i)
+    delete m_children.at(i);
+}
+
+
+sp_pcontext *sp_pcontext::push_context(THD *thd, sp_pcontext::enum_scope scope)
+{
+  sp_pcontext *child= new (thd->mem_root) sp_pcontext(this, scope);
+
+  if (child)
+    m_children.append(child);
+  return child;
+}
+
+
+bool cmp_labels(sp_label *a, sp_label *b)
+{
+  return (lex_string_cmp(system_charset_info, &a->name, &b->name) == 0 &&
+          a->type == b->type);
+}
+
+sp_pcontext *sp_pcontext::pop_context()
+{
+  m_parent->m_max_var_index+= m_max_var_index;
+
+  uint submax= max_cursor_index();
+  if (submax > m_parent->m_max_cursor_index)
+    m_parent->m_max_cursor_index= submax;
+
+  if (m_num_case_exprs > m_parent->m_num_case_exprs)
+    m_parent->m_num_case_exprs= m_num_case_exprs;
+
+  /*
+  ** Push unresolved goto label to parent context
+  */
+  sp_label *label;
+  List_iterator_fast li(m_goto_labels);
+  while ((label= li++))
+  {
+    if (label->ip == 0)
+    {
+      m_parent->m_goto_labels.add_unique(label, &cmp_labels);
+    }
+  }
+  return m_parent;
+}
+
+
+uint sp_pcontext::diff_handlers(const sp_pcontext *ctx, bool exclusive) const
+{
+  uint n= 0;
+  const sp_pcontext *pctx= this;
+  const sp_pcontext *last_ctx= NULL;
+
+  while (pctx && pctx != ctx)
+  {
+    n+= (uint)pctx->m_handlers.elements();
+    last_ctx= pctx;
+    pctx= pctx->parent_context();
+  }
+  if (pctx)
+    return (exclusive && last_ctx ? n -(uint) last_ctx->m_handlers.elements() : n);
+  return 0;			// Didn't find ctx
+}
+
+
+uint sp_pcontext::diff_cursors(const sp_pcontext *ctx, bool exclusive) const
+{
+  uint n= 0;
+  const sp_pcontext *pctx= this;
+  const sp_pcontext *last_ctx= NULL;
+
+  while (pctx && pctx != ctx)
+  {
+    n+= (uint)pctx->m_cursors.elements();
+    last_ctx= pctx;
+    pctx= pctx->parent_context();
+  }
+  if (pctx)
+    return  (exclusive && last_ctx ? (uint)(n - last_ctx->m_cursors.elements()) : n);
+  return 0;			// Didn't find ctx
+}
+
+
+sp_variable *sp_pcontext::find_variable(const LEX_CSTRING *name,
+                                        bool current_scope_only) const
+{
+  size_t i= m_vars.elements() - m_pboundary;
+
+  while (i--)
+  {
+    sp_variable *p= m_vars.at(i);
+
+    if (system_charset_info->strnncoll(name->str, name->length,
+		                       p->name.str, p->name.length) == 0)
+    {
+      return p;
+    }
+  }
+
+  return (!current_scope_only && m_parent) ?
+    m_parent->find_variable(name, false) :
+    NULL;
+}
+
+
+/*
+  Find a variable by its run-time offset.
+  If the variable with a desired run-time offset is not found in this
+  context frame, it's recursively searched on parent context frames.
+
+  Note, context frames can have holes:
+    CREATE PROCEDURE p1() AS
+      x0 INT:=100;
+      CURSOR cur(p0 INT, p1 INT) IS SELECT p0, p1;
+      x1 INT:=101;
+    BEGIN
+      ...
+    END;
+  The variables (x0 and x1) and the cursor parameters (p0 and p1)
+  reside in separate parse context frames.
+
+  The variables reside on the top level parse context frame:
+  - x0 has frame offset 0 and run-time offset 0
+  - x1 has frame offset 1 and run-time offset 3
+
+  The cursor parameters reside on the second level parse context frame:
+  - p0 has frame offset 0 and run-time offset 1
+  - p1 has frame offset 1 and run-time offset 2
+
+  Run-time offsets on a frame can have holes, but offsets monotonocally grow,
+  so run-time offsets of all variables are not greater than the run-time offset
+  of the very last variable in this frame.
+*/
+sp_variable *sp_pcontext::find_variable(uint offset) const
+{
+  if (m_var_offset <= offset &&
+      m_vars.elements() &&
+      offset <= get_last_context_variable()->offset)
+  {
+    for (uint i= 0; i < m_vars.elements(); i++)
+    {
+      if (m_vars.at(i)->offset == offset)
+        return m_vars.at(i); // This frame
+    }
+  }
+
+  return m_parent ?
+         m_parent->find_variable(offset) :    // Some previous frame
+         NULL;                                // Index out of bounds
+}
+
+
+sp_variable *sp_pcontext::add_variable(THD *thd, const LEX_CSTRING *name)
+{
+  sp_variable *p=
+    new (thd->mem_root) sp_variable(name, m_var_offset + m_max_var_index);
+
+  if (!p)
+    return NULL;
+
+  ++m_max_var_index;
+
+  return m_vars.append(p) ? NULL : p;
+}
+
+sp_label *sp_pcontext::push_label(THD *thd, const LEX_CSTRING *name, uint ip,
+                                  sp_label::enum_type type,
+                                  List *list)
+{
+  sp_label *label=
+    new (thd->mem_root) sp_label(name, ip, type, this);
+
+  if (!label)
+    return NULL;
+
+  list->push_front(label, thd->mem_root);
+
+  return label;
+}
+
+sp_label *sp_pcontext::find_goto_label(const LEX_CSTRING *name, bool recusive)
+{
+  List_iterator_fast li(m_goto_labels);
+  sp_label *lab;
+
+  while ((lab= li++))
+  {
+    if (lex_string_cmp(system_charset_info, name, &lab->name) == 0)
+      return lab;
+  }
+
+  if (!recusive)
+    return NULL;
+
+  /*
+    Note about exception handlers.
+    See SQL:2003 SQL/PSM (ISO/IEC 9075-4:2003),
+    section 13.1 ,
+    syntax rule 4.
+    In short, a DECLARE HANDLER block can not refer
+    to labels from the parent context, as they are out of scope.
+  */
+  if (m_scope == HANDLER_SCOPE && m_parent)
+  {
+    if (m_parent->m_parent)
+    {
+      // Skip the parent context
+      return m_parent->m_parent->find_goto_label(name);
+    }
+  }
+
+  return m_parent && (m_scope == REGULAR_SCOPE) ?
+         m_parent->find_goto_label(name) :
+         NULL;
+}
+
+
+sp_label *sp_pcontext::find_label(const LEX_CSTRING *name)
+{
+  List_iterator_fast li(m_labels);
+  sp_label *lab;
+
+  while ((lab= li++))
+  {
+    if (lex_string_cmp(system_charset_info, name, &lab->name) == 0)
+      return lab;
+  }
+
+  /*
+    Note about exception handlers.
+    See SQL:2003 SQL/PSM (ISO/IEC 9075-4:2003),
+    section 13.1 ,
+    syntax rule 4.
+    In short, a DECLARE HANDLER block can not refer
+    to labels from the parent context, as they are out of scope.
+  */
+  return (m_parent && (m_scope == REGULAR_SCOPE)) ?
+         m_parent->find_label(name) :
+         NULL;
+}
+
+
+sp_label *sp_pcontext::find_label_current_loop_start()
+{
+  List_iterator_fast li(m_labels);
+  sp_label *lab;
+
+  while ((lab= li++))
+  {
+    if (lab->type == sp_label::ITERATION)
+      return lab;
+  }
+  // See a comment in sp_pcontext::find_label()
+  return (m_parent && (m_scope == REGULAR_SCOPE)) ?
+         m_parent->find_label_current_loop_start() :
+         NULL;
+}
+
+
+bool sp_pcontext::add_condition(THD *thd,
+                                const LEX_CSTRING *name,
+                                sp_condition_value *value)
+{
+  sp_condition *p= new (thd->mem_root) sp_condition(name, value);
+
+  if (p == NULL)
+    return true;
+
+  return m_conditions.append(p);
+}
+
+
+sp_condition_value *sp_pcontext::find_condition(const LEX_CSTRING *name,
+                                                bool current_scope_only) const
+{
+  size_t i= m_conditions.elements();
+
+  while (i--)
+  {
+    sp_condition *p= m_conditions.at(i);
+
+    if (p->eq_name(name))
+    {
+      return p->value;
+    }
+  }
+
+  return (!current_scope_only && m_parent) ?
+    m_parent->find_condition(name, false) :
+    NULL;
+}
+
+sp_condition_value *
+sp_pcontext::find_declared_or_predefined_condition(THD *thd,
+                                                   const LEX_CSTRING *name)
+                                                   const
+{
+  sp_condition_value *p= find_condition(name, false);
+  if (p)
+    return p;
+  if (thd->variables.sql_mode & MODE_ORACLE)
+    return find_predefined_condition(name);
+  return NULL;
+}
+
+
+static sp_condition_value
+  // Warnings
+  cond_no_data_found(ER_SP_FETCH_NO_DATA, "01000"),
+  // Errors
+  cond_invalid_cursor(ER_SP_CURSOR_NOT_OPEN, "24000"),
+  cond_dup_val_on_index(ER_DUP_ENTRY, "23000"),
+  cond_dup_val_on_index2(ER_DUP_ENTRY_WITH_KEY_NAME, "23000"),
+  cond_too_many_rows(ER_TOO_MANY_ROWS, "42000");
+
+
+static sp_condition sp_predefined_conditions[]=
+{
+  // Warnings
+  sp_condition(STRING_WITH_LEN("NO_DATA_FOUND"), &cond_no_data_found),
+  // Errors
+  sp_condition(STRING_WITH_LEN("INVALID_CURSOR"), &cond_invalid_cursor),
+  sp_condition(STRING_WITH_LEN("DUP_VAL_ON_INDEX"), &cond_dup_val_on_index),
+  sp_condition(STRING_WITH_LEN("DUP_VAL_ON_INDEX"), &cond_dup_val_on_index2),
+  sp_condition(STRING_WITH_LEN("TOO_MANY_ROWS"), &cond_too_many_rows)
+};
+
+
+sp_condition_value *
+sp_pcontext::find_predefined_condition(const LEX_CSTRING *name) const
+{
+  for (uint i= 0; i < array_elements(sp_predefined_conditions) ; i++)
+  {
+    if (sp_predefined_conditions[i].eq_name(name))
+      return sp_predefined_conditions[i].value;
+  }
+  return NULL;
+}
+
+
+sp_handler *sp_pcontext::add_handler(THD *thd,
+                                     sp_handler::enum_type type)
+{
+  sp_handler *h= new (thd->mem_root) sp_handler(type);
+
+  if (!h)
+    return NULL;
+
+  return m_handlers.append(h) ? NULL : h;
+}
+
+
+bool sp_pcontext::check_duplicate_handler(
+  const sp_condition_value *cond_value) const
+{
+  for (size_t i= 0; i < m_handlers.elements(); ++i)
+  {
+    sp_handler *h= m_handlers.at(i);
+
+    List_iterator_fast li(h->condition_values);
+    sp_condition_value *cv;
+
+    while ((cv= li++))
+    {
+      if (cond_value->equals(cv))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+
+bool sp_condition_value::matches(const Sql_condition_identity &value,
+                                 const sp_condition_value *found_cv) const
+{
+  bool user_value_matched= !value.get_user_condition_value() ||
+                           this == value.get_user_condition_value();
+
+  switch (type)
+  {
+  case sp_condition_value::ERROR_CODE:
+    return user_value_matched &&
+           value.get_sql_errno() == get_sql_errno() &&
+           (!found_cv || found_cv->type > sp_condition_value::ERROR_CODE);
+
+  case sp_condition_value::SQLSTATE:
+    return user_value_matched &&
+           Sql_state::eq(&value) &&
+           (!found_cv || found_cv->type > sp_condition_value::SQLSTATE);
+
+  case sp_condition_value::WARNING:
+    return user_value_matched &&
+           (value.Sql_state::is_warning() ||
+            value.get_level() == Sql_condition::WARN_LEVEL_WARN) &&
+           !found_cv;
+
+  case sp_condition_value::NOT_FOUND:
+    return user_value_matched &&
+           value.Sql_state::is_not_found() &&
+           !found_cv;
+
+  case sp_condition_value::EXCEPTION:
+    /*
+      In sql_mode=ORACLE this construct should catch both errors and warnings:
+        EXCEPTION
+          WHEN OTHERS THEN ...;
+      E.g. NO_DATA_FOUND is more like a warning than an error,
+      and it should be caught.
+
+      We don't check user_value_matched here.
+      "WHEN OTHERS" catches all user defined exception.
+    */
+    return (((current_thd->variables.sql_mode & MODE_ORACLE) ||
+           (value.Sql_state::is_exception() &&
+            value.get_level() == Sql_condition::WARN_LEVEL_ERROR)) &&
+           !found_cv);
+  }
+  return false;
+}
+
+
+sp_handler*
+sp_pcontext::find_handler(const Sql_condition_identity &value) const
+{
+  sp_handler *found_handler= NULL;
+  sp_condition_value *found_cv= NULL;
+
+  for (size_t i= 0; i < m_handlers.elements(); ++i)
+  {
+    sp_handler *h= m_handlers.at(i);
+
+    List_iterator_fast li(h->condition_values);
+    sp_condition_value *cv;
+
+    while ((cv= li++))
+    {
+      if (cv->matches(value, found_cv))
+      {
+        found_cv= cv;
+        found_handler= h;
+      }
+    }
+  }
+
+  if (found_handler)
+    return found_handler;
+
+
+  // There is no appropriate handler in this parsing context. We need to look up
+  // in parent contexts. There might be two cases here:
+  //
+  // 1. The current context has REGULAR_SCOPE. That means, it's a simple
+  // BEGIN..END block:
+  //     ...
+  //     BEGIN
+  //       ... # We're here.
+  //     END
+  //     ...
+  // In this case we simply call find_handler() on parent's context recursively.
+  //
+  // 2. The current context has HANDLER_SCOPE. That means, we're inside an
+  // SQL-handler block:
+  //   ...
+  //   DECLARE ... HANDLER FOR ...
+  //   BEGIN
+  //     ... # We're here.
+  //   END
+  //   ...
+  // In this case we can not just call parent's find_handler(), because
+  // parent's handler don't catch conditions from this scope. Instead, we should
+  // try to find first parent context (we might have nested handler
+  // declarations), which has REGULAR_SCOPE (i.e. which is regular BEGIN..END
+  // block).
+
+  const sp_pcontext *p= this;
+
+  while (p && p->m_scope == HANDLER_SCOPE)
+    p= p->m_parent;
+
+  if (!p || !p->m_parent)
+    return NULL;
+
+  return p->m_parent->find_handler(value);
+}
+
+
+bool sp_pcontext::add_cursor(const LEX_CSTRING *name, sp_pcontext *param_ctx,
+                             sp_lex_cursor *lex)
+{
+  if (m_cursors.elements() == m_max_cursor_index)
+    ++m_max_cursor_index;
+
+  return m_cursors.append(sp_pcursor(name, param_ctx, lex));
+}
+
+
+const sp_pcursor *sp_pcontext::find_cursor(const LEX_CSTRING *name,
+                                           uint *poff,
+                                           bool current_scope_only) const
+{
+  uint i= (uint)m_cursors.elements();
+
+  while (i--)
+  {
+    LEX_CSTRING n= m_cursors.at(i);
+
+    if (system_charset_info->strnncoll(name->str, name->length,
+		                       n.str, n.length) == 0)
+    {
+      *poff= m_cursor_offset + i;
+      return &m_cursors.at(i);
+    }
+  }
+
+  return (!current_scope_only && m_parent) ?
+    m_parent->find_cursor(name, poff, false) :
+    NULL;
+}
+
+
+void sp_pcontext::retrieve_field_definitions(
+  List *field_def_lst) const
+{
+  /* Put local/context fields in the result list. */
+
+  size_t next_child= 0;
+  for (size_t i= 0; i < m_vars.elements(); ++i)
+  {
+    sp_variable *var_def= m_vars.at(i);
+
+    /*
+      The context can have holes in run-time offsets,
+      the missing offsets reside on the children contexts in such cases.
+      Example:
+        CREATE PROCEDURE p1() AS
+          x0 INT:=100;        -- context 0, position 0, run-time 0
+          CURSOR cur(
+            p0 INT,           -- context 1, position 0, run-time 1
+            p1 INT            -- context 1, position 1, run-time 2
+          ) IS SELECT p0, p1;
+          x1 INT:=101;        -- context 0, position 1, run-time 3
+        BEGIN
+          ...
+        END;
+      See more comments in sp_pcontext::find_variable().
+      We must retrieve the definitions in the order of their run-time offsets.
+      Check that there are children that should go before the current variable.
+    */
+    for ( ; next_child < m_children.elements(); next_child++)
+    {
+      sp_pcontext *child= m_children.at(next_child);
+      if (!child->context_var_count() ||
+          child->get_context_variable(0)->offset > var_def->offset)
+        break;
+      /*
+        All variables on the embedded context (that fills holes of the parent)
+        should have the run-time offset strictly less than var_def.
+      */
+      DBUG_ASSERT(child->get_context_variable(0)->offset < var_def->offset);
+      DBUG_ASSERT(child->get_last_context_variable()->offset < var_def->offset);
+      child->retrieve_field_definitions(field_def_lst);
+    }
+    field_def_lst->push_back(&var_def->field_def);
+  }
+
+  /* Put the fields of the remaining enclosed contexts in the result list. */
+
+  for (size_t i= next_child; i < m_children.elements(); ++i)
+    m_children.at(i)->retrieve_field_definitions(field_def_lst);
+}
+
+
+const sp_pcursor *sp_pcontext::find_cursor(uint offset) const
+{
+  if (m_cursor_offset <= offset &&
+      offset < m_cursor_offset + m_cursors.elements())
+  {
+    return &m_cursors.at(offset - m_cursor_offset);   // This frame
+  }
+
+  return m_parent ?
+         m_parent->find_cursor(offset) :  // Some previous frame
+         NULL;                            // Index out of bounds
+}
+
+
+bool sp_pcursor::check_param_count_with_error(uint param_count) const
+{
+  if (param_count != (m_param_context ?
+                      m_param_context->context_var_count() : 0))
+  {
+    my_error(ER_WRONG_PARAMCOUNT_TO_CURSOR, MYF(0), LEX_CSTRING::str);
+    return true;
+  }
+  return false;
+}
+
+
+const Spvar_definition *
+sp_variable::find_row_field(const LEX_CSTRING *var_name,
+                            const LEX_CSTRING *field_name,
+                            uint *row_field_offset)
+{
+  if (!field_def.is_row())
+  {
+    my_printf_error(ER_UNKNOWN_ERROR,
+                    "'%s' is not a row variable", MYF(0), var_name->str);
+    return NULL;
+  }
+  const Spvar_definition *def;
+  if ((def= field_def.find_row_field_by_name(field_name, row_field_offset)))
+    return def;
+  my_error(ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD, MYF(0),
+           var_name->str, field_name->str);
+  return NULL;
+}
diff --git a/sql/sp_pcontext.h b/sql/sp_pcontext.h
new file mode 100644
index 00000000..71846ad4
--- /dev/null
+++ b/sql/sp_pcontext.h
@@ -0,0 +1,808 @@
+/* -*- C++ -*- */
+/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2009, 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef _SP_PCONTEXT_H_
+#define _SP_PCONTEXT_H_
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+#include "sql_string.h"                         // LEX_STRING
+#include "field.h"                              // Create_field
+#include "sql_array.h"                          // Dynamic_array
+
+
+/// This class represents a stored program variable or a parameter
+/// (also referenced as 'SP-variable').
+
+class sp_variable : public Sql_alloc
+{
+public:
+  enum enum_mode
+  {
+    MODE_IN,
+    MODE_OUT,
+    MODE_INOUT
+  };
+
+  /// Name of the SP-variable.
+  LEX_CSTRING name;
+
+  /// Mode of the SP-variable.
+  enum_mode mode;
+
+  /// The index to the variable's value in the runtime frame.
+  ///
+  /// It is calculated during parsing and used when creating sp_instr_set
+  /// instructions and Item_splocal items. I.e. values are set/referred by
+  /// array indexing in runtime.
+  uint offset;
+
+  /// Default value of the SP-variable (if any).
+  Item *default_value;
+
+  /// Full type information (field meta-data) of the SP-variable.
+  Spvar_definition field_def;
+
+  /// Field-type of the SP-variable.
+  const Type_handler *type_handler() const
+    { return field_def.type_handler(); }
+
+public:
+  sp_variable(const LEX_CSTRING *name_arg, uint offset_arg)
+   :Sql_alloc(),
+    name(*name_arg),
+    mode(MODE_IN),
+    offset(offset_arg),
+    default_value(NULL)
+  { }
+  /*
+    Find a ROW field by its qualified name.
+    @param      var_name - the name of the variable
+    @param      field_name - the name of the variable field
+    @param[OUT] row_field_offset - the index of the field
+
+    @retval  NULL if the variable with the given name was not found,
+             or it is not a row variable, or it does not have a field
+             with the given name, or a non-null pointer otherwise.
+             row_field_offset[0] is set only when the method returns !NULL.
+  */
+  const Spvar_definition *find_row_field(const LEX_CSTRING *var_name,
+                                         const LEX_CSTRING *field_name,
+                                         uint *row_field_offset);
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+/// This class represents an SQL/PSM label. Can refer to the identifier
+/// used with the "label_name:" construct which may precede some SQL/PSM
+/// statements, or to an implicit implementation-dependent identifier which
+/// the parser inserts before a high-level flow control statement such as
+/// IF/WHILE/REPEAT/LOOP, when such statement is rewritten into a
+/// combination of low-level jump/jump_if instructions and labels.
+
+
+class sp_label : public Sql_alloc
+{
+public:
+  enum enum_type
+  {
+    /// Implicit label generated by parser.
+    IMPLICIT,
+
+    /// Label at BEGIN.
+    BEGIN,
+
+    /// Label at iteration control
+    ITERATION,
+
+    /// Label for jump
+    GOTO
+  };
+
+  /// Name of the label.
+  LEX_CSTRING name;
+
+  /// Instruction pointer of the label.
+  uint ip;
+
+  /// Type of the label.
+  enum_type type;
+
+  /// Scope of the label.
+  class sp_pcontext *ctx;
+
+public:
+  sp_label(const LEX_CSTRING *_name,
+           uint _ip, enum_type _type, sp_pcontext *_ctx)
+   :Sql_alloc(),
+    name(*_name),
+    ip(_ip),
+    type(_type),
+    ctx(_ctx)
+  { }
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+
+/// This class represents condition-value term in DECLARE CONDITION or
+/// DECLARE HANDLER statements. sp_condition_value has little to do with
+/// SQL-conditions.
+///
+/// In some sense, this class is a union -- a set of filled attributes
+/// depends on the sp_condition_value::type value.
+
+class sp_condition_value : public Sql_alloc, public Sql_state_errno
+{
+  bool m_is_user_defined;
+public:
+  enum enum_type
+  {
+    ERROR_CODE,
+    SQLSTATE,
+    WARNING,
+    NOT_FOUND,
+    EXCEPTION
+  };
+
+  /// Type of the condition value.
+  enum_type type;
+
+public:
+  sp_condition_value(uint _mysqlerr)
+   :Sql_alloc(),
+    Sql_state_errno(_mysqlerr),
+    m_is_user_defined(false),
+    type(ERROR_CODE)
+  { }
+
+  sp_condition_value(uint _mysqlerr, const char *_sql_state)
+   :Sql_alloc(),
+    Sql_state_errno(_mysqlerr, _sql_state),
+    m_is_user_defined(false),
+    type(ERROR_CODE)
+  { }
+
+  sp_condition_value(const char *_sql_state, bool is_user_defined= false)
+   :Sql_alloc(),
+    Sql_state_errno(0, _sql_state),
+    m_is_user_defined(is_user_defined),
+    type(SQLSTATE)
+  { }
+
+  sp_condition_value(enum_type _type)
+   :Sql_alloc(),
+    m_is_user_defined(false),
+    type(_type)
+  {
+    DBUG_ASSERT(type != ERROR_CODE && type != SQLSTATE);
+  }
+
+  /// Check if two instances of sp_condition_value are equal or not.
+  ///
+  /// @param cv another instance of sp_condition_value to check.
+  ///
+  /// @return true if the instances are equal, false otherwise.
+  bool equals(const sp_condition_value *cv) const;
+
+
+  /**
+    Checks if this condition is OK for search.
+    See also sp_context::find_handler().
+
+    @param identity - The condition identity
+    @param found_cv - A previously found matching condition or NULL.
+    @return true    - If the current value matches identity and
+                      makes a stronger match than the previously
+                      found condition found_cv.
+    @return false   - If the current value does not match identity,
+                      of the current value makes a weaker match than found_cv.
+  */
+  bool matches(const Sql_condition_identity &identity,
+               const sp_condition_value *found_cv) const;
+
+  Sql_user_condition_identity get_user_condition_identity() const
+  {
+    return Sql_user_condition_identity(m_is_user_defined ? this : NULL);
+  }
+};
+
+
+class sp_condition_value_user_defined: public sp_condition_value
+{
+public:
+  sp_condition_value_user_defined()
+   :sp_condition_value("45000", true)
+  { }
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+
+/// This class represents 'DECLARE CONDITION' statement.
+/// sp_condition has little to do with SQL-conditions.
+
+class sp_condition : public Sql_alloc
+{
+public:
+  /// Name of the condition.
+  LEX_CSTRING name;
+
+  /// Value of the condition.
+  sp_condition_value *value;
+
+public:
+  sp_condition(const LEX_CSTRING *name_arg, sp_condition_value *value_arg)
+   :Sql_alloc(),
+    name(*name_arg),
+    value(value_arg)
+  { }
+  sp_condition(const char *name_arg, size_t name_length_arg,
+               sp_condition_value *value_arg)
+   :value(value_arg)
+  {
+    name.str=    name_arg;
+    name.length= name_length_arg;
+  }
+  bool eq_name(const LEX_CSTRING *str) const
+  {
+    return system_charset_info->strnncoll(name.str, name.length,
+                                          str->str, str->length) == 0;
+  }
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+
+/**
+  class sp_pcursor.
+  Stores information about a cursor:
+  - Cursor's name in LEX_STRING.
+  - Cursor's formal parameter descriptions.
+
+    Formal parameter descriptions reside in a separate context block,
+    pointed by the "m_param_context" member.
+
+    m_param_context can be NULL. This means a cursor with no parameters.
+    Otherwise, the number of variables in m_param_context means
+    the number of cursor's formal parameters.
+
+    Note, m_param_context can be not NULL, but have no variables.
+    This is also means a cursor with no parameters (similar to NULL).
+*/
+class sp_pcursor: public LEX_CSTRING
+{
+  class sp_pcontext *m_param_context; // Formal parameters
+  class sp_lex_cursor *m_lex;         // The cursor statement LEX
+public:
+  sp_pcursor(const LEX_CSTRING *name, class sp_pcontext *param_ctx,
+             class sp_lex_cursor *lex)
+   :LEX_CSTRING(*name), m_param_context(param_ctx), m_lex(lex)
+  { }
+  class sp_pcontext *param_context() const { return m_param_context; }
+  class sp_lex_cursor *lex() const { return m_lex; }
+  bool check_param_count_with_error(uint param_count) const;
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+
+/// This class represents 'DECLARE HANDLER' statement.
+
+class sp_handler : public Sql_alloc
+{
+public:
+  /// Enumeration of possible handler types.
+  /// Note: UNDO handlers are not (and have never been) supported.
+  enum enum_type
+  {
+    EXIT,
+    CONTINUE
+  };
+
+  /// Handler type.
+  enum_type type;
+
+  /// Conditions caught by this handler.
+  List condition_values;
+
+public:
+  /// The constructor.
+  ///
+  /// @param _type SQL-handler type.
+  sp_handler(enum_type _type)
+   :Sql_alloc(),
+    type(_type)
+  { }
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+/// The class represents parse-time context, which keeps track of declared
+/// variables/parameters, conditions, handlers, cursors and labels.
+///
+/// sp_pcontext objects are organized in a tree according to the following
+/// rules:
+///   - one sp_pcontext object corresponds for for each BEGIN..END block;
+///   - one sp_pcontext object corresponds for each exception handler;
+///   - one additional sp_pcontext object is created to contain
+///     Stored Program parameters.
+///
+/// sp_pcontext objects are used both at parse-time and at runtime.
+///
+/// During the parsing stage sp_pcontext objects are used:
+///   - to look up defined names (e.g. declared variables and visible
+///     labels);
+///   - to check for duplicates;
+///   - for error checking;
+///   - to calculate offsets to be used at runtime.
+///
+/// During the runtime phase, a tree of sp_pcontext objects is used:
+///   - for error checking (e.g. to check correct number of parameters);
+///   - to resolve SQL-handlers.
+
+class sp_pcontext : public Sql_alloc
+{
+public:
+  enum enum_scope
+  {
+    /// REGULAR_SCOPE designates regular BEGIN ... END blocks.
+    REGULAR_SCOPE,
+
+    /// HANDLER_SCOPE designates SQL-handler blocks.
+    HANDLER_SCOPE
+  };
+
+  class Lex_for_loop: public Lex_for_loop_st
+  {
+  public:
+    /*
+      The label poiting to the body start,
+      either explicit or automatically generated.
+      Used during generation of "ITERATE loop_label"
+      to check if "loop_label" is a FOR loop label.
+      - In case of a FOR loop, some additional code
+        (cursor fetch or iteger increment) is generated before
+        the backward jump to the beginning of the loop body.
+      - In case of other loop types (WHILE, REPEAT)
+        only the jump is generated.
+    */
+    const sp_label *m_start_label;
+
+    Lex_for_loop()
+     :m_start_label(NULL)
+    { Lex_for_loop_st::init(); }
+
+    Lex_for_loop(const Lex_for_loop_st &for_loop, const sp_label *start)
+     :m_start_label(start)
+    {
+      Lex_for_loop_st::operator=(for_loop);
+    }
+  };
+
+public:
+  sp_pcontext();
+  ~sp_pcontext();
+
+
+  /// Create and push a new context in the tree.
+
+  /// @param thd   thread context.
+  /// @param scope scope of the new parsing context.
+  /// @return the node created.
+  sp_pcontext *push_context(THD *thd, enum_scope scope);
+
+  /// Pop a node from the parsing context tree.
+  /// @return the parent node.
+  sp_pcontext *pop_context();
+
+  sp_pcontext *parent_context() const
+  { return m_parent; }
+
+  sp_pcontext *child_context(uint i) const
+  { return i < m_children.elements() ? m_children.at(i) : NULL; }
+
+  /// Calculate and return the number of handlers to pop between the given
+  /// context and this one.
+  ///
+  /// @param ctx       the other parsing context.
+  /// @param exclusive specifies if the last scope should be excluded.
+  ///
+  /// @return the number of handlers to pop between the given context and
+  /// this one.  If 'exclusive' is true, don't count the last scope we are
+  /// leaving; this is used for LEAVE where we will jump to the hpop
+  /// instructions.
+  uint diff_handlers(const sp_pcontext *ctx, bool exclusive) const;
+
+  /// Calculate and return the number of cursors to pop between the given
+  /// context and this one.
+  ///
+  /// @param ctx       the other parsing context.
+  /// @param exclusive specifies if the last scope should be excluded.
+  ///
+  /// @return the number of cursors to pop between the given context and
+  /// this one.  If 'exclusive' is true, don't count the last scope we are
+  /// leaving; this is used for LEAVE where we will jump to the cpop
+  /// instructions.
+  uint diff_cursors(const sp_pcontext *ctx, bool exclusive) const;
+
+  /////////////////////////////////////////////////////////////////////////
+  // SP-variables (parameters and variables).
+  /////////////////////////////////////////////////////////////////////////
+
+  /// @return the maximum number of variables used in this and all child
+  /// contexts. For the root parsing context, this gives us the number of
+  /// slots needed for variables during the runtime phase.
+  uint max_var_index() const
+  { return m_max_var_index; }
+
+  /// @return the current number of variables used in the parent contexts
+  /// (from the root), including this context.
+  uint current_var_count() const
+  { return m_var_offset + (uint)m_vars.elements(); }
+
+  /// @return the number of variables in this context alone.
+  uint context_var_count() const
+  { return (uint)m_vars.elements(); }
+
+  /// return the i-th variable on the current context
+  sp_variable *get_context_variable(uint i) const
+  {
+    DBUG_ASSERT(i < m_vars.elements());
+    return m_vars.at(i);
+  }
+
+  /*
+    Return the i-th last context variable.
+    If i is 0, then return the very last variable in m_vars.
+  */
+  sp_variable *get_last_context_variable(uint i= 0) const
+  {
+    DBUG_ASSERT(i < m_vars.elements());
+    return m_vars.at(m_vars.elements() - i - 1);
+  }
+
+  /// Add SP-variable to the parsing context.
+  ///
+  /// @param thd  Thread context.
+  /// @param name Name of the SP-variable.
+  ///
+  /// @return instance of newly added SP-variable.
+  sp_variable *add_variable(THD *thd, const LEX_CSTRING *name);
+
+  /// Retrieve full type information about SP-variables in this parsing
+  /// context and its children.
+  ///
+  /// @param field_def_lst[out] Container to store type information.
+  void retrieve_field_definitions(List *field_def_lst) const;
+
+  /// Find SP-variable by name.
+  ///
+  /// The function does a linear search (from newer to older variables,
+  /// in case we have shadowed names).
+  ///
+  /// The function is called only at parsing time.
+  ///
+  /// @param name               Variable name.
+  /// @param current_scope_only A flag if we search only in current scope.
+  ///
+  /// @return instance of found SP-variable, or NULL if not found.
+  sp_variable *find_variable(const LEX_CSTRING *name, bool current_scope_only) const;
+
+  /// Find SP-variable by the offset in the root parsing context.
+  ///
+  /// The function is used for two things:
+  /// - When evaluating parameters at the beginning, and setting out parameters
+  ///   at the end, of invocation. (Top frame only, so no recursion then.)
+  /// - For printing of sp_instr_set. (Debug mode only.)
+  ///
+  /// @param offset Variable offset in the root parsing context.
+  ///
+  /// @return instance of found SP-variable, or NULL if not found.
+  sp_variable *find_variable(uint offset) const;
+
+  /// Set the current scope boundary (for default values).
+  ///
+  /// @param n The number of variables to skip.
+  void declare_var_boundary(uint n)
+  { m_pboundary= n; }
+
+  /////////////////////////////////////////////////////////////////////////
+  // CASE expressions.
+  /////////////////////////////////////////////////////////////////////////
+
+  int register_case_expr()
+  { return m_num_case_exprs++; }
+
+  int get_num_case_exprs() const
+  { return m_num_case_exprs; }
+
+  bool push_case_expr_id(int case_expr_id)
+  { return m_case_expr_ids.append(case_expr_id); }
+
+  void pop_case_expr_id()
+  { m_case_expr_ids.pop(); }
+
+  int get_current_case_expr_id() const
+  { return *m_case_expr_ids.back(); }
+
+  /////////////////////////////////////////////////////////////////////////
+  // Labels.
+  /////////////////////////////////////////////////////////////////////////
+
+  sp_label *push_label(THD *thd, const LEX_CSTRING *name, uint ip,
+                       sp_label::enum_type type, List * list);
+
+  sp_label *push_label(THD *thd, const LEX_CSTRING *name, uint ip,
+                       sp_label::enum_type type)
+  { return push_label(thd, name, ip, type, &m_labels); }
+
+  sp_label *push_goto_label(THD *thd, const LEX_CSTRING *name, uint ip,
+                            sp_label::enum_type type)
+  { return push_label(thd, name, ip, type, &m_goto_labels); }
+
+  sp_label *push_label(THD *thd, const LEX_CSTRING *name, uint ip)
+  { return push_label(thd, name, ip, sp_label::IMPLICIT); }
+
+  sp_label *push_goto_label(THD *thd, const LEX_CSTRING *name, uint ip)
+  { return push_goto_label(thd, name, ip, sp_label::GOTO); }
+
+  sp_label *find_label(const LEX_CSTRING *name);
+
+  sp_label *find_goto_label(const LEX_CSTRING *name, bool recusive);
+
+  sp_label *find_goto_label(const LEX_CSTRING *name)
+  { return find_goto_label(name, true); }
+
+  sp_label *find_label_current_loop_start();
+
+  sp_label *last_label()
+  {
+    sp_label *label= m_labels.head();
+
+    if (!label && m_parent)
+      label= m_parent->last_label();
+
+    return label;
+  }
+
+  sp_label *last_goto_label()
+  {
+    return m_goto_labels.head();
+  }
+
+  sp_label *pop_label()
+  { return m_labels.pop(); }
+
+  bool block_label_declare(LEX_CSTRING *label)
+  {
+    sp_label *lab= find_label(label);
+    if (lab)
+    {
+      my_error(ER_SP_LABEL_REDEFINE, MYF(0), label->str);
+      return true;
+    }
+    return false;
+  }
+
+  /////////////////////////////////////////////////////////////////////////
+  // Conditions.
+  /////////////////////////////////////////////////////////////////////////
+
+  bool add_condition(THD *thd, const LEX_CSTRING *name,
+                               sp_condition_value *value);
+
+  /// See comment for find_variable() above.
+  sp_condition_value *find_condition(const LEX_CSTRING *name,
+                                     bool current_scope_only) const;
+
+  sp_condition_value *
+  find_declared_or_predefined_condition(THD *thd, const LEX_CSTRING *name) const;
+
+  bool declare_condition(THD *thd, const LEX_CSTRING *name,
+                                   sp_condition_value *val)
+  {
+    if (find_condition(name, true))
+    {
+      my_error(ER_SP_DUP_COND, MYF(0), name->str);
+      return true;
+    }
+    return add_condition(thd, name, val);
+  }
+
+  /////////////////////////////////////////////////////////////////////////
+  // Handlers.
+  /////////////////////////////////////////////////////////////////////////
+
+  sp_handler *add_handler(THD* thd, sp_handler::enum_type type);
+
+  /// This is an auxilary parsing-time function to check if an SQL-handler
+  /// exists in the current parsing context (current scope) for the given
+  /// SQL-condition. This function is used to check for duplicates during
+  /// the parsing phase.
+  ///
+  /// This function can not be used during the runtime phase to check
+  /// SQL-handler existence because it searches for the SQL-handler in the
+  /// current scope only (during runtime, current and parent scopes
+  /// should be checked according to the SQL-handler resolution rules).
+  ///
+  /// @param condition_value the handler condition value
+  ///                        (not SQL-condition!).
+  ///
+  /// @retval true if such SQL-handler exists.
+  /// @retval false otherwise.
+  bool check_duplicate_handler(const sp_condition_value *cond_value) const;
+
+  /// Find an SQL handler for the given SQL condition according to the
+  /// SQL-handler resolution rules. This function is used at runtime.
+  ///
+  /// @param value            The error code and the SQL state
+  /// @param level            The SQL condition level
+  ///
+  /// @return a pointer to the found SQL-handler or NULL.
+  sp_handler *find_handler(const Sql_condition_identity &identity) const;
+
+  /////////////////////////////////////////////////////////////////////////
+  // Cursors.
+  /////////////////////////////////////////////////////////////////////////
+
+  bool add_cursor(const LEX_CSTRING *name, sp_pcontext *param_ctx,
+                  class sp_lex_cursor *lex);
+
+  /// See comment for find_variable() above.
+  const sp_pcursor *find_cursor(const LEX_CSTRING *name,
+                                uint *poff, bool current_scope_only) const;
+
+  const sp_pcursor *find_cursor_with_error(const LEX_CSTRING *name,
+                                           uint *poff,
+                                           bool current_scope_only) const
+  {
+    const sp_pcursor *pcursor= find_cursor(name, poff, current_scope_only);
+    if (!pcursor)
+    {
+      my_error(ER_SP_CURSOR_MISMATCH, MYF(0), name->str);
+      return NULL;
+    }
+    return pcursor;
+  }
+  /// Find cursor by offset (for SHOW {PROCEDURE|FUNCTION} CODE only).
+  const sp_pcursor *find_cursor(uint offset) const;
+
+  const sp_pcursor *get_cursor_by_local_frame_offset(uint offset) const
+  { return &m_cursors.at(offset); }
+
+  uint cursor_offset() const
+  { return m_cursor_offset; }
+
+  uint frame_cursor_count() const
+  { return (uint)m_cursors.elements(); }
+
+  uint max_cursor_index() const
+  { return m_max_cursor_index + (uint)m_cursors.elements(); }
+
+  uint current_cursor_count() const
+  { return m_cursor_offset + (uint)m_cursors.elements(); }
+
+  void set_for_loop(const Lex_for_loop_st &for_loop)
+  {
+    m_for_loop= Lex_for_loop(for_loop, last_label());
+  }
+  const Lex_for_loop &for_loop()
+  {
+    return m_for_loop;
+  }
+
+private:
+  /// Constructor for a tree node.
+  /// @param prev the parent parsing context
+  /// @param scope scope of this parsing context
+  sp_pcontext(sp_pcontext *prev, enum_scope scope);
+
+  void init(uint var_offset, uint cursor_offset, int num_case_expressions);
+
+  /* Prevent use of these */
+  sp_pcontext(const sp_pcontext &);
+  void operator=(sp_pcontext &);
+
+  sp_condition_value *find_predefined_condition(const LEX_CSTRING *name) const;
+
+private:
+  /// m_max_var_index -- number of variables (including all types of arguments)
+  /// in this context including all children contexts.
+  ///
+  /// m_max_var_index >= m_vars.elements().
+  ///
+  /// m_max_var_index of the root parsing context contains number of all
+  /// variables (including arguments) in all enclosed contexts.
+  uint m_max_var_index;
+
+  /// The maximum sub context's framesizes.
+  uint m_max_cursor_index;
+
+  /// Parent context.
+  sp_pcontext *m_parent;
+
+  /// An index of the first SP-variable in this parsing context. The index
+  /// belongs to a runtime table of SP-variables.
+  ///
+  /// Note:
+  ///   - m_var_offset is 0 for root parsing context;
+  ///   - m_var_offset is different for all nested parsing contexts.
+  uint m_var_offset;
+
+  /// Cursor offset for this context.
+  uint m_cursor_offset;
+
+  /// Boundary for finding variables in this context. This is the number of
+  /// variables currently "invisible" to default clauses. This is normally 0,
+  /// but will be larger during parsing of DECLARE ... DEFAULT, to get the
+  /// scope right for DEFAULT values.
+  uint m_pboundary;
+
+  int m_num_case_exprs;
+
+  /// SP parameters/variables.
+  Dynamic_array m_vars;
+
+  /// Stack of CASE expression ids.
+  Dynamic_array m_case_expr_ids;
+
+  /// Stack of SQL-conditions.
+  Dynamic_array m_conditions;
+
+  /// Stack of cursors.
+  Dynamic_array m_cursors;
+
+  /// Stack of SQL-handlers.
+  Dynamic_array m_handlers;
+
+  /*
+   In the below example the label <> has two meanings:
+   - GOTO lab : must go before the beginning of the loop
+   - CONTINUE lab : must go to the beginning of the loop
+   We solve this by storing block labels and goto labels into separate lists.
+
+   BEGIN
+     <>
+     FOR i IN a..10 LOOP
+       ...
+       GOTO lab;
+       ...
+       CONTINUE lab;
+       ...
+     END LOOP;
+   END;
+  */
+  /// List of block labels
+  List m_labels;
+  /// List of goto labels
+  List m_goto_labels;
+
+  /// Children contexts, used for destruction.
+  Dynamic_array m_children;
+
+  /// Scope of this parsing context.
+  enum_scope m_scope;
+
+  /// FOR LOOP characteristics
+  Lex_for_loop m_for_loop;
+}; // class sp_pcontext : public Sql_alloc
+
+
+#endif /* _SP_PCONTEXT_H_ */
diff --git a/sql/sp_rcontext.cc b/sql/sp_rcontext.cc
new file mode 100644
index 00000000..d2fe53a2
--- /dev/null
+++ b/sql/sp_rcontext.cc
@@ -0,0 +1,909 @@
+/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "unireg.h"
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation
+#endif
+
+#include "mysql.h"
+#include "sp_head.h"
+#include "sql_cursor.h"
+#include "sp_rcontext.h"
+#include "sp_pcontext.h"
+#include "sql_select.h"                     // create_virtual_tmp_table
+#include "sql_base.h"                       // open_tables_only_view_structure
+#include "sql_acl.h"                        // SELECT_ACL
+#include "sql_parse.h"                      // check_table_access
+
+
+Sp_rcontext_handler_local sp_rcontext_handler_local;
+Sp_rcontext_handler_package_body sp_rcontext_handler_package_body;
+
+sp_rcontext *Sp_rcontext_handler_local::get_rcontext(sp_rcontext *ctx) const
+{
+  return ctx;
+}
+
+sp_rcontext *Sp_rcontext_handler_package_body::get_rcontext(sp_rcontext *ctx) const
+{
+  return ctx->m_sp->m_parent->m_rcontext;
+}
+
+const LEX_CSTRING *Sp_rcontext_handler_local::get_name_prefix() const
+{
+  return &empty_clex_str;
+}
+
+const LEX_CSTRING *Sp_rcontext_handler_package_body::get_name_prefix() const
+{
+  static const LEX_CSTRING sp_package_body_variable_prefix_clex_str=
+                           {STRING_WITH_LEN("PACKAGE_BODY.")};
+  return &sp_package_body_variable_prefix_clex_str;
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+// sp_rcontext implementation.
+///////////////////////////////////////////////////////////////////////////
+
+
+sp_rcontext::sp_rcontext(const sp_head *owner,
+                         const sp_pcontext *root_parsing_ctx,
+                         Field *return_value_fld,
+                         bool in_sub_stmt)
+  :end_partial_result_set(false),
+   pause_state(false), quit_func(false), instr_ptr(0),
+   m_sp(owner),
+   m_root_parsing_ctx(root_parsing_ctx),
+   m_var_table(NULL),
+   m_return_value_fld(return_value_fld),
+   m_return_value_set(false),
+   m_in_sub_stmt(in_sub_stmt),
+   m_handlers(PSI_INSTRUMENT_MEM), m_handler_call_stack(PSI_INSTRUMENT_MEM),
+   m_ccount(0)
+{
+}
+
+
+sp_rcontext::~sp_rcontext()
+{
+  delete m_var_table;
+  // Leave m_handlers, m_handler_call_stack, m_var_items, m_cstack
+  // and m_case_expr_holders untouched.
+  // They are allocated in mem roots and will be freed accordingly.
+}
+
+
+sp_rcontext *sp_rcontext::create(THD *thd,
+                                 const sp_head *owner,
+                                 const sp_pcontext *root_parsing_ctx,
+                                 Field *return_value_fld,
+                                 Row_definition_list &field_def_lst)
+{
+  SELECT_LEX *save_current_select;
+  sp_rcontext *ctx= new (thd->mem_root) sp_rcontext(owner,
+                                                    root_parsing_ctx,
+                                                    return_value_fld,
+                                                    thd->in_sub_stmt);
+  if (!ctx)
+    return NULL;
+
+  /* Reset current_select as it's checked in Item_ident::Item_ident */
+  save_current_select= thd->lex->current_select;
+  thd->lex->current_select= 0;
+
+  if (ctx->alloc_arrays(thd) ||
+      ctx->init_var_table(thd, field_def_lst) ||
+      ctx->init_var_items(thd, field_def_lst))
+  {
+    delete ctx;
+    ctx= 0;
+  }
+
+  thd->lex->current_select= save_current_select;
+  return ctx;
+}
+
+
+bool Row_definition_list::append_uniq(MEM_ROOT *mem_root, Spvar_definition *var)
+{
+  DBUG_ASSERT(elements);
+  uint unused;
+  if (unlikely(find_row_field_by_name(&var->field_name, &unused)))
+  {
+    my_error(ER_DUP_FIELDNAME, MYF(0), var->field_name.str);
+    return true;
+  }
+  return push_back(var, mem_root);
+}
+
+
+bool Row_definition_list::
+       adjust_formal_params_to_actual_params(THD *thd, List *args)
+{
+  List_iterator it(*this);
+  List_iterator it_args(*args);
+  DBUG_ASSERT(elements >= args->elements );
+  Spvar_definition *def;
+  Item *arg;
+  while ((def= it++) && (arg= it_args++))
+  {
+    if (def->type_handler()->adjust_spparam_type(def, arg))
+      return true;
+  }
+  return false;
+}
+
+
+bool Row_definition_list::
+       adjust_formal_params_to_actual_params(THD *thd,
+                                             Item **args, uint arg_count)
+{
+  List_iterator it(*this);
+  DBUG_ASSERT(elements >= arg_count );
+  Spvar_definition *def;
+  for (uint i= 0; (def= it++) && (i < arg_count) ; i++)
+  {
+    if (def->type_handler()->adjust_spparam_type(def, args[i]))
+      return true;
+  }
+  return false;
+}
+
+
+bool sp_rcontext::alloc_arrays(THD *thd)
+{
+  {
+    size_t n= m_root_parsing_ctx->max_cursor_index();
+    m_cstack.reset(
+      static_cast (
+        thd->alloc(n * sizeof (sp_cursor*))),
+      n);
+  }
+
+  {
+    size_t n= m_root_parsing_ctx->get_num_case_exprs();
+    m_case_expr_holders.reset(
+      static_cast (
+        thd->calloc(n * sizeof (Item_cache*))),
+      n);
+  }
+
+  return !m_cstack.array() || !m_case_expr_holders.array();
+}
+
+
+bool sp_rcontext::init_var_table(THD *thd,
+                                 List &field_def_lst)
+{
+  if (!m_root_parsing_ctx->max_var_index())
+    return false;
+
+  DBUG_ASSERT(field_def_lst.elements == m_root_parsing_ctx->max_var_index());
+
+  if (!(m_var_table= create_virtual_tmp_table(thd, field_def_lst)))
+    return true;
+
+  return false;
+}
+
+
+/**
+  Check if we have access to use a column as a %TYPE reference.
+  @return false - OK
+  @return true  - access denied
+*/
+static inline bool
+check_column_grant_for_type_ref(THD *thd, TABLE_LIST *table_list,
+                                const char *str, size_t length,
+                                Field *fld)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  table_list->table->grant.want_privilege= SELECT_ACL;
+  return check_column_grant_in_table_ref(thd, table_list, str, length, fld);
+#else
+  return false;
+#endif
+}
+
+
+/**
+  This method implementation is very close to fill_schema_table_by_open().
+*/
+bool Qualified_column_ident::resolve_type_ref(THD *thd, Column_definition *def)
+{
+  Open_tables_backup open_tables_state_backup;
+  thd->reset_n_backup_open_tables_state(&open_tables_state_backup);
+
+  TABLE_LIST *table_list;
+  Field *src;
+  LEX *save_lex= thd->lex;
+  bool rc= true;
+
+  sp_lex_local lex(thd, thd->lex);
+  thd->lex= &lex;
+
+  lex.context_analysis_only= CONTEXT_ANALYSIS_ONLY_VIEW;
+  // Make %TYPE variables see temporary tables that shadow permanent tables
+  thd->temporary_tables= open_tables_state_backup.temporary_tables;
+
+  if ((table_list=
+         lex.first_select_lex()->add_table_to_list(thd, this, NULL, 0,
+                                                   TL_READ_NO_INSERT,
+                                                   MDL_SHARED_READ)) &&
+      !check_table_access(thd, SELECT_ACL, table_list, TRUE, UINT_MAX, FALSE) &&
+      !open_tables_only_view_structure(thd, table_list,
+                                       thd->mdl_context.has_locks()))
+  {
+    if (likely((src= lex.query_tables->table->find_field_by_name(&m_column))))
+    {
+      if (!(rc= check_column_grant_for_type_ref(thd, table_list,
+                                                m_column.str,
+                                                m_column.length, src)))
+      {
+        *def= Column_definition(thd, src, NULL/*No defaults,no constraints*/);
+        def->flags&= (uint) ~NOT_NULL_FLAG;
+        rc= def->sp_prepare_create_field(thd, thd->mem_root);
+      }
+    }
+    else
+      my_error(ER_BAD_FIELD_ERROR, MYF(0), m_column.str, table.str);
+  }
+
+  lex.unit.cleanup();
+  thd->temporary_tables= NULL; // Avoid closing temporary tables
+  close_thread_tables(thd);
+  thd->lex= save_lex;
+  thd->restore_backup_open_tables_state(&open_tables_state_backup);
+  return rc;
+}
+
+
+/**
+  This method resolves the structure of a variable declared as:
+     rec t1%ROWTYPE;
+  It opens the table "t1" and copies its structure to %ROWTYPE variable.
+*/
+bool Table_ident::resolve_table_rowtype_ref(THD *thd,
+                                            Row_definition_list &defs)
+{
+  Open_tables_backup open_tables_state_backup;
+  thd->reset_n_backup_open_tables_state(&open_tables_state_backup);
+
+  TABLE_LIST *table_list;
+  LEX *save_lex= thd->lex;
+  bool rc= true;
+
+  /*
+    Create a temporary LEX on stack and switch to it.
+    In case of VIEW, open_tables_only_view_structure() will open more
+    tables/views recursively. We want to avoid them to stick to the current LEX.
+  */
+  sp_lex_local lex(thd, thd->lex);
+  thd->lex= &lex;
+
+  lex.context_analysis_only= CONTEXT_ANALYSIS_ONLY_VIEW;
+  // Make %ROWTYPE variables see temporary tables that shadow permanent tables
+  thd->temporary_tables= open_tables_state_backup.temporary_tables;
+
+  if ((table_list=
+         lex.first_select_lex()->add_table_to_list(thd, this, NULL, 0,
+                                                   TL_READ_NO_INSERT,
+                                                   MDL_SHARED_READ)) &&
+      !check_table_access(thd, SELECT_ACL, table_list, TRUE, UINT_MAX, FALSE) &&
+      !open_tables_only_view_structure(thd, table_list,
+                                       thd->mdl_context.has_locks()))
+  {
+    for (Field **src= lex.query_tables->table->field; *src; src++)
+    {
+      /*
+         Make field names on the THD memory root,
+         as the table will be closed and freed soon,
+         in the end of this method.
+      */
+      LEX_CSTRING tmp= src[0]->field_name;
+      Spvar_definition *def;
+      if ((rc= check_column_grant_for_type_ref(thd, table_list,
+                                               tmp.str, tmp.length,src[0])) ||
+          (rc= !(src[0]->field_name.str= thd->strmake(tmp.str, tmp.length))) ||
+          (rc= !(def= new (thd->mem_root) Spvar_definition(thd, *src))))
+        break;
+      src[0]->field_name.str= tmp.str; // Restore field name, just in case.
+      def->flags&= (uint) ~NOT_NULL_FLAG;
+      if ((rc= def->sp_prepare_create_field(thd, thd->mem_root)))
+        break;
+      defs.push_back(def, thd->mem_root);
+    }
+  }
+
+  lex.unit.cleanup();
+  thd->temporary_tables= NULL; // Avoid closing temporary tables
+  close_thread_tables(thd);
+  thd->lex= save_lex;
+  thd->restore_backup_open_tables_state(&open_tables_state_backup);
+  return rc;
+}
+
+
+bool Row_definition_list::resolve_type_refs(THD *thd)
+{
+  List_iterator it(*this);
+  Spvar_definition *def;
+  while ((def= it++))
+  {
+    if (def->is_column_type_ref() &&
+        def->column_type_ref()->resolve_type_ref(thd, def))
+      return true;
+  }
+  return false;
+};
+
+
+bool sp_rcontext::init_var_items(THD *thd,
+                                 List &field_def_lst)
+{
+  uint num_vars= m_root_parsing_ctx->max_var_index();
+
+  m_var_items.reset(
+    static_cast (
+      thd->alloc(num_vars * sizeof (Item *))),
+    num_vars);
+
+  if (!m_var_items.array())
+    return true;
+
+  DBUG_ASSERT(field_def_lst.elements == num_vars);
+  List_iterator it(field_def_lst);
+  Spvar_definition *def= it++;
+
+  for (uint idx= 0; idx < num_vars; ++idx, def= it++)
+  {
+    Field *field= m_var_table->field[idx];
+    if (def->is_table_rowtype_ref())
+    {
+      Row_definition_list defs;
+      Item_field_row *item= new (thd->mem_root) Item_field_row(thd, field);
+      if (!(m_var_items[idx]= item) ||
+          def->table_rowtype_ref()->resolve_table_rowtype_ref(thd, defs) ||
+          item->row_create_items(thd, &defs))
+        return true;
+    }
+    else if (def->is_cursor_rowtype_ref())
+    {
+      Row_definition_list defs;
+      Item_field_row *item= new (thd->mem_root) Item_field_row(thd, field);
+      if (!(m_var_items[idx]= item))
+        return true;
+    }
+    else if (def->is_row())
+    {
+      Item_field_row *item= new (thd->mem_root) Item_field_row(thd, field);
+      if (!(m_var_items[idx]= item) ||
+          item->row_create_items(thd, def->row_field_definitions()))
+        return true;
+    }
+    else
+    {
+      if (!(m_var_items[idx]= new (thd->mem_root) Item_field(thd, field)))
+        return true;
+    }
+  }
+  return false;
+}
+
+
+bool Item_field_row::row_create_items(THD *thd, List *list)
+{
+  DBUG_ASSERT(list);
+  DBUG_ASSERT(field);
+  Virtual_tmp_table **ptable= field->virtual_tmp_table_addr();
+  DBUG_ASSERT(ptable);
+  if (!(ptable[0]= create_virtual_tmp_table(thd, *list)))
+    return true;
+
+  if (alloc_arguments(thd, list->elements))
+    return true;
+
+  List_iterator it(*list);
+  Spvar_definition *def;
+  for (arg_count= 0; (def= it++); arg_count++)
+  {
+    if (!(args[arg_count]= new (thd->mem_root)
+                           Item_field(thd, ptable[0]->field[arg_count])))
+      return true;
+  }
+  return false;
+}
+
+
+bool sp_rcontext::set_return_value(THD *thd, Item **return_value_item)
+{
+  DBUG_ASSERT(m_return_value_fld);
+
+  m_return_value_set = true;
+
+  return thd->sp_eval_expr(m_return_value_fld, return_value_item);
+}
+
+
+void sp_rcontext::push_cursor(sp_cursor *c)
+{
+  m_cstack[m_ccount++]= c;
+}
+
+
+void sp_rcontext::pop_cursor(THD *thd)
+{
+  DBUG_ASSERT(m_ccount > 0);
+  if (m_cstack[m_ccount - 1]->is_open())
+    m_cstack[m_ccount - 1]->close(thd);
+  m_ccount--;
+}
+
+
+void sp_rcontext::pop_cursors(THD *thd, size_t count)
+{
+  DBUG_ASSERT(m_ccount >= count);
+  while (count--)
+    pop_cursor(thd);
+}
+
+
+bool sp_rcontext::push_handler(sp_instr_hpush_jump *entry)
+{
+  return m_handlers.append(entry);
+}
+
+
+void sp_rcontext::pop_handlers(size_t count)
+{
+  DBUG_ASSERT(m_handlers.elements() >= count);
+
+  for (size_t i= 0; i < count; ++i)
+    m_handlers.pop();
+}
+
+
+bool sp_rcontext::handle_sql_condition(THD *thd,
+                                       uint *ip,
+                                       const sp_instr *cur_spi)
+{
+  DBUG_ENTER("sp_rcontext::handle_sql_condition");
+
+  /*
+    If this is a fatal sub-statement error, and this runtime
+    context corresponds to a sub-statement, no CONTINUE/EXIT
+    handlers from this context are applicable: try to locate one
+    in the outer scope.
+  */
+  if (unlikely(thd->is_fatal_sub_stmt_error) && m_in_sub_stmt)
+    DBUG_RETURN(false);
+
+  Diagnostics_area *da= thd->get_stmt_da();
+  const sp_handler *found_handler= NULL;
+  const Sql_condition *found_condition= NULL;
+
+  if (unlikely(thd->is_error()))
+  {
+    found_handler=
+      cur_spi->m_ctx->find_handler(da->get_error_condition_identity());
+
+    if (found_handler)
+      found_condition= da->get_error_condition();
+
+    /*
+      Found condition can be NULL if the diagnostics area was full
+      when the error was raised. It can also be NULL if
+      Diagnostics_area::set_error_status(uint sql_error) was used.
+      In these cases, make a temporary Sql_condition here so the
+      error can be handled.
+    */
+    if (!found_condition)
+    {
+      found_condition=
+        new (callers_arena->mem_root) Sql_condition(callers_arena->mem_root,
+                                                    da->get_error_condition_identity(),
+                                                    da->message(),
+                                                    da->current_row_for_warning());
+    }
+  }
+  else if (da->current_statement_warn_count())
+  {
+    Diagnostics_area::Sql_condition_iterator it= da->sql_conditions();
+    const Sql_condition *c;
+
+    // Here we need to find the last warning/note from the stack.
+    // In MySQL most substantial warning is the last one.
+    // (We could have used a reverse iterator here if one existed)
+
+    while ((c= it++))
+    {
+      if (c->get_level() == Sql_condition::WARN_LEVEL_WARN ||
+          c->get_level() == Sql_condition::WARN_LEVEL_NOTE)
+      {
+        const sp_handler *handler= cur_spi->m_ctx->find_handler(*c);
+        if (handler)
+        {
+          found_handler= handler;
+          found_condition= c;
+        }
+      }
+    }
+  }
+
+  if (!found_handler)
+    DBUG_RETURN(false);
+
+  // At this point, we know that:
+  //  - there is a pending SQL-condition (error or warning);
+  //  - there is an SQL-handler for it.
+
+  DBUG_ASSERT(found_condition);
+
+  sp_instr_hpush_jump *handler_entry= NULL;
+  for (size_t i= 0; i < m_handlers.elements(); ++i)
+  {
+    sp_instr_hpush_jump *h= m_handlers.at(i);
+
+    if (h->get_handler() == found_handler)
+    {
+      handler_entry= h;
+      break;
+    }
+  }
+
+  /*
+    handler_entry usually should not be NULL here, as that indicates
+    that the parser context thinks a HANDLER should be activated,
+    but the runtime context cannot find it.
+
+    However, this can happen (and this is in line with the Standard)
+    if SQL-condition has been raised before DECLARE HANDLER instruction
+    is processed.
+
+    For example:
+    CREATE PROCEDURE p()
+    BEGIN
+      DECLARE v INT DEFAULT 'get'; -- raises SQL-warning here
+      DECLARE EXIT HANDLER ...     -- this handler does not catch the warning
+    END
+  */
+  if (!handler_entry)
+    DBUG_RETURN(false);
+
+  // Mark active conditions so that they can be deleted when the handler exits.
+  da->mark_sql_conditions_for_removal();
+
+  uint continue_ip= handler_entry->get_handler()->type == sp_handler::CONTINUE ?
+    cur_spi->get_cont_dest() : 0;
+
+  /* End aborted result set. */
+  if (end_partial_result_set)
+    thd->protocol->end_partial_result_set(thd);
+
+  /* Reset error state. */
+  thd->clear_error();
+  thd->reset_killed();      // Some errors set thd->killed, (e.g. "bad data").
+
+  /* Add a frame to handler-call-stack. */
+  Sql_condition_info *cond_info=
+    new (callers_arena->mem_root) Sql_condition_info(found_condition,
+                                                     callers_arena);
+  Handler_call_frame *frame=
+    new (callers_arena->mem_root) Handler_call_frame(cond_info, continue_ip);
+  m_handler_call_stack.append(frame);
+
+  *ip= handler_entry->m_ip + 1;
+
+  DBUG_RETURN(true);
+}
+
+
+uint sp_rcontext::exit_handler(Diagnostics_area *da)
+{
+  DBUG_ENTER("sp_rcontext::exit_handler");
+  DBUG_ASSERT(m_handler_call_stack.elements() > 0);
+
+  Handler_call_frame *f= m_handler_call_stack.pop();
+
+  /*
+    Remove the SQL conditions that were present in DA when the
+    handler was activated.
+  */
+  da->remove_marked_sql_conditions();
+
+  uint continue_ip= f->continue_ip;
+
+  DBUG_RETURN(continue_ip);
+}
+
+
+int sp_rcontext::set_variable(THD *thd, uint idx, Item **value)
+{
+  DBUG_ENTER("sp_rcontext::set_variable");
+  DBUG_ASSERT(value);
+  DBUG_RETURN(thd->sp_eval_expr(m_var_table->field[idx], value));
+}
+
+
+int sp_rcontext::set_variable_row_field(THD *thd, uint var_idx, uint field_idx,
+                                        Item **value)
+{
+  DBUG_ENTER("sp_rcontext::set_variable_row_field");
+  DBUG_ASSERT(value);
+  Virtual_tmp_table *vtable= virtual_tmp_table_for_row(var_idx);
+  DBUG_RETURN(thd->sp_eval_expr(vtable->field[field_idx], value));
+}
+
+
+int sp_rcontext::set_variable_row_field_by_name(THD *thd, uint var_idx,
+                                                const LEX_CSTRING &field_name,
+                                                Item **value)
+{
+  DBUG_ENTER("sp_rcontext::set_variable_row_field_by_name");
+  uint field_idx;
+  if (find_row_field_by_name_or_error(&field_idx, var_idx, field_name))
+    DBUG_RETURN(1);
+  DBUG_RETURN(set_variable_row_field(thd, var_idx, field_idx, value));
+}
+
+
+int sp_rcontext::set_variable_row(THD *thd, uint var_idx, List &items)
+{
+  DBUG_ENTER("sp_rcontext::set_variable_row");
+  DBUG_ASSERT(get_variable(var_idx)->cols() == items.elements);
+  Virtual_tmp_table *vtable= virtual_tmp_table_for_row(var_idx);
+  Sp_eval_expr_state state(thd);
+  DBUG_RETURN(vtable->sp_set_all_fields_from_item_list(thd, items));
+}
+
+
+Virtual_tmp_table *sp_rcontext::virtual_tmp_table_for_row(uint var_idx)
+{
+  DBUG_ASSERT(get_variable(var_idx)->type() == Item::FIELD_ITEM);
+  DBUG_ASSERT(get_variable(var_idx)->cmp_type() == ROW_RESULT);
+  Field *field= m_var_table->field[var_idx];
+  Virtual_tmp_table **ptable= field->virtual_tmp_table_addr();
+  DBUG_ASSERT(ptable);
+  DBUG_ASSERT(ptable[0]);
+  return ptable[0];
+}
+
+
+bool sp_rcontext::find_row_field_by_name_or_error(uint *field_idx,
+                                                  uint var_idx,
+                                                  const LEX_CSTRING &field_name)
+{
+  Virtual_tmp_table *vtable= virtual_tmp_table_for_row(var_idx);
+  Field *row= m_var_table->field[var_idx];
+  return vtable->sp_find_field_by_name_or_error(field_idx,
+                                                row->field_name, field_name);
+}
+
+
+Item_cache *sp_rcontext::create_case_expr_holder(THD *thd,
+                                                 const Item *item) const
+{
+  Item_cache *holder;
+  Query_arena current_arena;
+
+  thd->set_n_backup_active_arena(thd->spcont->callers_arena, ¤t_arena);
+
+  holder= item->get_cache(thd);
+
+  thd->restore_active_arena(thd->spcont->callers_arena, ¤t_arena);
+
+  return holder;
+}
+
+
+bool sp_rcontext::set_case_expr(THD *thd, int case_expr_id,
+                                Item **case_expr_item_ptr)
+{
+  Item *case_expr_item= thd->sp_prepare_func_item(case_expr_item_ptr, 1);
+  if (!case_expr_item)
+    return true;
+
+  if (!m_case_expr_holders[case_expr_id] ||
+      m_case_expr_holders[case_expr_id]->result_type() !=
+        case_expr_item->result_type())
+  {
+    m_case_expr_holders[case_expr_id]=
+      create_case_expr_holder(thd, case_expr_item);
+  }
+
+  m_case_expr_holders[case_expr_id]->store(case_expr_item);
+  m_case_expr_holders[case_expr_id]->cache_value();
+  return false;
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+// sp_cursor implementation.
+///////////////////////////////////////////////////////////////////////////
+
+
+/*
+  Open an SP cursor
+
+  SYNOPSIS
+    open()
+    THD		         Thread handler
+
+
+  RETURN
+   0 in case of success, -1 otherwise
+*/
+
+int sp_cursor::open(THD *thd)
+{
+  if (server_side_cursor)
+  {
+    my_message(ER_SP_CURSOR_ALREADY_OPEN,
+               ER_THD(thd, ER_SP_CURSOR_ALREADY_OPEN),
+               MYF(0));
+    return -1;
+  }
+  if (mysql_open_cursor(thd, &result, &server_side_cursor))
+    return -1;
+  return 0;
+}
+
+
+int sp_cursor::close(THD *thd)
+{
+  if (! server_side_cursor)
+  {
+    my_message(ER_SP_CURSOR_NOT_OPEN, ER_THD(thd, ER_SP_CURSOR_NOT_OPEN),
+               MYF(0));
+    return -1;
+  }
+  sp_cursor_statistics::reset();
+  destroy();
+  return 0;
+}
+
+
+void sp_cursor::destroy()
+{
+  delete server_side_cursor;
+  server_side_cursor= NULL;
+}
+
+
+int sp_cursor::fetch(THD *thd, List *vars, bool error_on_no_data)
+{
+  if (! server_side_cursor)
+  {
+    my_message(ER_SP_CURSOR_NOT_OPEN, ER_THD(thd, ER_SP_CURSOR_NOT_OPEN),
+               MYF(0));
+    return -1;
+  }
+  if (vars->elements != result.get_field_count() &&
+      (vars->elements != 1 ||
+       result.get_field_count() !=
+       thd->spcont->get_variable(vars->head()->offset)->cols()))
+  {
+    my_message(ER_SP_WRONG_NO_OF_FETCH_ARGS,
+               ER_THD(thd, ER_SP_WRONG_NO_OF_FETCH_ARGS), MYF(0));
+    return -1;
+  }
+
+  m_fetch_count++;
+  DBUG_EXECUTE_IF("bug23032_emit_warning",
+                  push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+                               ER_UNKNOWN_ERROR,
+                               ER_THD(thd, ER_UNKNOWN_ERROR)););
+
+  result.set_spvar_list(vars);
+
+  DBUG_ASSERT(!thd->is_error());
+
+  /* Attempt to fetch one row */
+  if (server_side_cursor->is_open())
+  {
+    server_side_cursor->fetch(1);
+    if (thd->is_error())
+      return -1; // e.g. data type conversion failed
+  }
+
+  /*
+    If the cursor was pointing after the last row, the fetch will
+    close it instead of sending any rows.
+  */
+  if (! server_side_cursor->is_open())
+  {
+    m_found= false;
+    if (!error_on_no_data)
+      return 0;
+    my_message(ER_SP_FETCH_NO_DATA, ER_THD(thd, ER_SP_FETCH_NO_DATA), MYF(0));
+    return -1;
+  }
+
+  m_found= true;
+  m_row_count++;
+  return 0;
+}
+
+
+bool sp_cursor::export_structure(THD *thd, Row_definition_list *list)
+{
+  return server_side_cursor->export_structure(thd, list);
+}
+
+///////////////////////////////////////////////////////////////////////////
+// sp_cursor::Select_fetch_into_spvars implementation.
+///////////////////////////////////////////////////////////////////////////
+
+
+int sp_cursor::Select_fetch_into_spvars::prepare(List &fields,
+                                                 SELECT_LEX_UNIT *u)
+{
+  /*
+    Cache the number of columns in the result set in order to easily
+    return an error if column count does not match value count.
+  */
+  field_count= fields.elements;
+  return select_result_interceptor::prepare(fields, u);
+}
+
+
+bool sp_cursor::Select_fetch_into_spvars::
+       send_data_to_variable_list(List &vars, List &items)
+{
+  List_iterator_fast spvar_iter(vars);
+  List_iterator_fast item_iter(items);
+  sp_variable *spvar;
+  Item *item;
+
+  /* Must be ensured by the caller */
+  DBUG_ASSERT(vars.elements == items.elements);
+
+  /*
+    Assign the row fetched from a server side cursor to stored
+    procedure variables.
+  */
+  for (; spvar= spvar_iter++, item= item_iter++; )
+  {
+    if (thd->spcont->set_variable(thd, spvar->offset, &item))
+      return true;
+  }
+  return false;
+}
+
+
+int sp_cursor::Select_fetch_into_spvars::send_data(List &items)
+{
+  Item *item;
+  /*
+    If we have only one variable in spvar_list, and this is a ROW variable,
+    and the number of fields in the ROW variable matches the number of
+    fields in the query result, we fetch to this ROW variable.
+
+    If there is one variable, and it is a ROW variable, but its number
+    of fields does not match the number of fields in the query result,
+    we go through send_data_to_variable_list(). It will report an error
+    on attempt to assign a scalar value to a ROW variable.
+  */
+  return spvar_list->elements == 1 &&
+         (item= thd->spcont->get_variable(spvar_list->head()->offset)) &&
+         item->type_handler() == &type_handler_row &&
+         item->cols() == items.elements ?
+    thd->spcont->set_variable_row(thd, spvar_list->head()->offset, items) :
+    send_data_to_variable_list(*spvar_list, items);
+}
diff --git a/sql/sp_rcontext.h b/sql/sp_rcontext.h
new file mode 100644
index 00000000..ea669b2d
--- /dev/null
+++ b/sql/sp_rcontext.h
@@ -0,0 +1,410 @@
+/* -*- C++ -*- */
+/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef _SP_RCONTEXT_H_
+#define _SP_RCONTEXT_H_
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+#include "sql_class.h"                    // select_result_interceptor
+#include "sp_pcontext.h"                  // sp_condition_value
+
+///////////////////////////////////////////////////////////////////////////
+// sp_rcontext declaration.
+///////////////////////////////////////////////////////////////////////////
+
+class sp_cursor;
+class sp_lex_keeper;
+class sp_instr_cpush;
+class sp_instr_hpush_jump;
+class Query_arena;
+class sp_head;
+class Item_cache;
+class Virtual_tmp_table;
+
+
+/*
+  This class is a runtime context of a Stored Routine. It is used in an
+  execution and is intended to contain all dynamic objects (i.e.  objects, which
+  can be changed during execution), such as:
+    - stored routine variables;
+    - cursors;
+    - handlers;
+
+  Runtime context is used with sp_head class. sp_head class is intended to
+  contain all static things, related to the stored routines (code, for example).
+  sp_head instance creates runtime context for the execution of a stored
+  routine.
+
+  There is a parsing context (an instance of sp_pcontext class), which is used
+  on parsing stage. However, now it contains some necessary for an execution
+  things, such as definition of used stored routine variables. That's why
+  runtime context needs a reference to the parsing context.
+*/
+
+class sp_rcontext : public Sql_alloc
+{
+public:
+  /// Construct and properly initialize a new sp_rcontext instance. The static
+  /// create-function is needed because we need a way to return an error from
+  /// the constructor.
+  ///
+  /// @param thd              Thread handle.
+  /// @param root_parsing_ctx Top-level parsing context for this stored program.
+  /// @param return_value_fld Field object to store the return value
+  ///                         (for stored functions only).
+  ///
+  /// @return valid sp_rcontext object or NULL in case of OOM-error.
+  static sp_rcontext *create(THD *thd,
+                             const sp_head *owner,
+                             const sp_pcontext *root_parsing_ctx,
+                             Field *return_value_fld,
+                             Row_definition_list &defs);
+
+  ~sp_rcontext();
+
+private:
+  sp_rcontext(const sp_head *owner,
+              const sp_pcontext *root_parsing_ctx,
+              Field *return_value_fld,
+              bool in_sub_stmt);
+
+  // Prevent use of copying constructor and operator.
+  sp_rcontext(const sp_rcontext &);
+  void operator=(sp_rcontext &);
+
+public:
+  /// This class stores basic information about SQL-condition, such as:
+  ///   - SQL error code;
+  ///   - error level;
+  ///   - SQLSTATE;
+  ///   - text message.
+  ///
+  /// It's used to organize runtime SQL-handler call stack.
+  ///
+  /// Standard Sql_condition class can not be used, because we don't always have
+  /// an Sql_condition object for an SQL-condition in Diagnostics_area.
+  ///
+  /// Eventually, this class should be moved to sql_error.h, and be a part of
+  /// standard SQL-condition processing (Diagnostics_area should contain an
+  /// object for active SQL-condition, not just information stored in DA's
+  /// fields).
+  class Sql_condition_info : public Sql_alloc,
+                             public Sql_condition_identity
+  {
+  public:
+    /// Text message.
+    char *message;
+
+    /** Row number where the condition has happened */
+    ulong m_row_number;
+
+    /// The constructor.
+    ///
+    /// @param _sql_condition  The SQL condition.
+    /// @param arena           Query arena for SP
+    Sql_condition_info(const Sql_condition *_sql_condition, Query_arena *arena)
+      :Sql_condition_identity(*_sql_condition)
+    {
+      message= strdup_root(arena->mem_root, _sql_condition->get_message_text());
+      m_row_number= _sql_condition->m_row_number;
+    }
+  };
+
+private:
+  /// This class represents a call frame of SQL-handler (one invocation of a
+  /// handler). Basically, it's needed to store continue instruction pointer for
+  /// CONTINUE SQL-handlers.
+  class Handler_call_frame : public Sql_alloc
+  {
+  public:
+    /// SQL-condition, triggered handler activation.
+    const Sql_condition_info *sql_condition;
+
+    /// Continue-instruction-pointer for CONTINUE-handlers.
+    /// The attribute contains 0 for EXIT-handlers.
+    uint continue_ip;
+
+    /// The constructor.
+    ///
+    /// @param _sql_condition SQL-condition, triggered handler activation.
+    /// @param _continue_ip   Continue instruction pointer.
+    Handler_call_frame(const Sql_condition_info *_sql_condition,
+                       uint _continue_ip)
+     :sql_condition(_sql_condition),
+      continue_ip(_continue_ip)
+    { }
+ };
+
+public:
+  /// Arena used to (re) allocate items on. E.g. reallocate INOUT/OUT
+  /// SP-variables when they don't fit into prealloced items. This is common
+  /// situation with String items. It is used mainly in sp_eval_func_item().
+  Query_arena *callers_arena;
+
+  /// Flag to end an open result set before start executing an SQL-handler
+  /// (if one is found). Otherwise the client will hang due to a violation
+  /// of the client/server protocol.
+  bool end_partial_result_set;
+  bool pause_state;
+  bool quit_func;
+  uint instr_ptr;
+
+  /// The stored program for which this runtime context is created. Used for
+  /// checking if correct runtime context is used for variable handling,
+  /// and to access the package run-time context.
+  /// Also used by slow log.
+  const sp_head *m_sp;
+
+  /////////////////////////////////////////////////////////////////////////
+  // SP-variables.
+  /////////////////////////////////////////////////////////////////////////
+
+  uint argument_count() const
+  {
+    return m_root_parsing_ctx->context_var_count();
+  }
+
+  int set_variable(THD *thd, uint var_idx, Item **value);
+  int set_variable_row_field(THD *thd, uint var_idx, uint field_idx,
+                             Item **value);
+  int set_variable_row_field_by_name(THD *thd, uint var_idx,
+                                     const LEX_CSTRING &field_name,
+                                     Item **value);
+  int set_variable_row(THD *thd, uint var_idx, List &items);
+
+  int set_parameter(THD *thd, uint var_idx, Item **value)
+  {
+    DBUG_ASSERT(var_idx < argument_count());
+    return set_variable(thd, var_idx, value);
+  }
+
+  Item_field *get_variable(uint var_idx) const
+  { return m_var_items[var_idx]; }
+
+  Item **get_variable_addr(uint var_idx) const
+  { return ((Item **) m_var_items.array()) + var_idx; }
+
+  Item_field *get_parameter(uint var_idx) const
+  {
+    DBUG_ASSERT(var_idx < argument_count());
+    return get_variable(var_idx);
+  }
+
+  bool find_row_field_by_name_or_error(uint *field_idx, uint var_idx,
+                                       const LEX_CSTRING &field_name);
+
+  bool set_return_value(THD *thd, Item **return_value_item);
+
+  bool is_return_value_set() const
+  { return m_return_value_set; }
+
+  /////////////////////////////////////////////////////////////////////////
+  // SQL-handlers.
+  /////////////////////////////////////////////////////////////////////////
+
+  /// Push an sp_instr_hpush_jump instance to the handler call stack.
+  ///
+  /// @param entry    The condition handler entry
+  ///
+  /// @return error flag.
+  /// @retval false on success.
+  /// @retval true on error.
+  bool push_handler(sp_instr_hpush_jump *entry);
+
+  /// Pop and delete given number of instances from the handler
+  /// call stack.
+  ///
+  /// @param count Number of handler entries to pop & delete.
+  void pop_handlers(size_t count);
+
+  const Sql_condition_info *raised_condition() const
+  {
+    return m_handler_call_stack.elements() ?
+      (*m_handler_call_stack.back())->sql_condition : NULL;
+  }
+
+  /// Handle current SQL condition (if any).
+  ///
+  /// This is the public-interface function to handle SQL conditions in
+  /// stored routines.
+  ///
+  /// @param thd            Thread handle.
+  /// @param ip[out]        Instruction pointer to the first handler
+  ///                       instruction.
+  /// @param cur_spi        Current SP instruction.
+  ///
+  /// @retval true if an SQL-handler has been activated. That means, all of
+  /// the following conditions are satisfied:
+  ///   - the SP-instruction raised SQL-condition(s),
+  ///   - and there is an SQL-handler to process at least one of those
+  ///     SQL-conditions,
+  ///   - and that SQL-handler has been activated.
+  /// Note, that the return value has nothing to do with "error flag"
+  /// semantics.
+  ///
+  /// @retval false otherwise.
+  bool handle_sql_condition(THD *thd,
+                            uint *ip,
+                            const sp_instr *cur_spi);
+
+  /// Remove latest call frame from the handler call stack.
+  ///
+  /// @param da Diagnostics area containing handled conditions.
+  ///
+  /// @return continue instruction pointer of the removed handler.
+  uint exit_handler(Diagnostics_area *da);
+
+  /////////////////////////////////////////////////////////////////////////
+  // Cursors.
+  /////////////////////////////////////////////////////////////////////////
+
+  /// Push a cursor to the cursor stack.
+  ///
+  /// @param cursor The cursor
+  ///
+  void push_cursor(sp_cursor *cur);
+
+  void pop_cursor(THD *thd);
+  /// Pop and delete given number of sp_cursor instance from the cursor stack.
+  ///
+  /// @param count Number of cursors to pop & delete.
+  void pop_cursors(THD *thd, size_t count);
+
+  void pop_all_cursors(THD *thd)
+  { pop_cursors(thd, m_ccount); }
+
+  sp_cursor *get_cursor(uint i) const
+  { return m_cstack[i]; }
+
+  /////////////////////////////////////////////////////////////////////////
+  // CASE expressions.
+  /////////////////////////////////////////////////////////////////////////
+
+  /// Set CASE expression to the specified value.
+  ///
+  /// @param thd             Thread handler.
+  /// @param case_expr_id    The CASE expression identifier.
+  /// @param case_expr_item  The CASE expression value
+  ///
+  /// @return error flag.
+  /// @retval false on success.
+  /// @retval true on error.
+  ///
+  /// @note The idea is to reuse Item_cache for the expression of the one
+  /// CASE statement. This optimization takes place when there is CASE
+  /// statement inside of a loop. So, in other words, we will use the same
+  /// object on each iteration instead of creating a new one for each
+  /// iteration.
+  ///
+  /// TODO
+  ///   Hypothetically, a type of CASE expression can be different for each
+  ///   iteration. For instance, this can happen if the expression contains
+  ///   a session variable (something like @@VAR) and its type is changed
+  ///   from one iteration to another.
+  ///
+  ///   In order to cope with this problem, we check type each time, when we
+  ///   use already created object. If the type does not match, we re-create
+  ///   Item.  This also can (should?) be optimized.
+  bool set_case_expr(THD *thd, int case_expr_id, Item **case_expr_item_ptr);
+
+  Item *get_case_expr(int case_expr_id) const
+  { return m_case_expr_holders[case_expr_id]; }
+
+  Item ** get_case_expr_addr(int case_expr_id) const
+  { return (Item**) m_case_expr_holders.array() + case_expr_id; }
+
+private:
+  /// Internal function to allocate memory for arrays.
+  ///
+  /// @param thd Thread handle.
+  ///
+  /// @return error flag: false on success, true in case of failure.
+  bool alloc_arrays(THD *thd);
+
+  /// Create and initialize a table to store SP-variables.
+  ///
+  /// param thd Thread handle.
+  ///
+  /// @return error flag.
+  /// @retval false on success.
+  /// @retval true on error.
+  bool init_var_table(THD *thd, List &defs);
+
+  /// Create and initialize an Item-adapter (Item_field) for each SP-var field.
+  ///
+  /// param thd Thread handle.
+  ///
+  /// @return error flag.
+  /// @retval false on success.
+  /// @retval true on error.
+  bool init_var_items(THD *thd, List &defs);
+
+  /// Create an instance of appropriate Item_cache class depending on the
+  /// specified type in the callers arena.
+  ///
+  /// @note We should create cache items in the callers arena, as they are
+  /// used between in several instructions.
+  ///
+  /// @param thd   Thread handler.
+  /// @param item  Item to get the expression type.
+  ///
+  /// @return Pointer to valid object on success, or NULL in case of error.
+  Item_cache *create_case_expr_holder(THD *thd, const Item *item) const;
+
+  Virtual_tmp_table *virtual_tmp_table_for_row(uint idx);
+
+private:
+  /// Top-level (root) parsing context for this runtime context.
+  const sp_pcontext *m_root_parsing_ctx;
+
+  /// Virtual table for storing SP-variables.
+  Virtual_tmp_table *m_var_table;
+
+  /// Collection of Item_field proxies, each of them points to the
+  /// corresponding field in m_var_table.
+  Bounds_checked_array m_var_items;
+
+  /// This is a pointer to a field, which should contain return value for
+  /// stored functions (only). For stored procedures, this pointer is NULL.
+  Field *m_return_value_fld;
+
+  /// Indicates whether the return value (in m_return_value_fld) has been
+  /// set during execution.
+  bool m_return_value_set;
+
+  /// Flag to tell if the runtime context is created for a sub-statement.
+  bool m_in_sub_stmt;
+
+  /// Stack of visible handlers.
+  Dynamic_array m_handlers;
+
+  /// Stack of caught SQL conditions.
+  Dynamic_array m_handler_call_stack;
+
+  /// Stack of cursors.
+  Bounds_checked_array m_cstack;
+
+  /// Current number of cursors in m_cstack.
+  uint m_ccount;
+
+  /// Array of CASE expression holders.
+  Bounds_checked_array m_case_expr_holders;
+}; // class sp_rcontext : public Sql_alloc
+
+#endif /* _SP_RCONTEXT_H_ */
diff --git a/sql/spatial.cc b/sql/spatial.cc
new file mode 100644
index 00000000..9bf6110c
--- /dev/null
+++ b/sql/spatial.cc
@@ -0,0 +1,3804 @@
+/*
+   Copyright (c) 2002, 2013, Oracle and/or its affiliates.
+   Copyright (c) 2011, 2021, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "spatial.h"
+#include "gstream.h"                            // Gis_read_stream
+#include "sql_string.h"                         // String
+
+/* This is from item_func.h. Didn't want to #include the whole file. */
+double my_double_round(double value, longlong dec, bool dec_unsigned,
+                       bool truncate);
+
+#ifdef HAVE_SPATIAL
+
+/* 
+  exponential notation :
+  1   sign
+  1   number before the decimal point
+  1   decimal point
+  14  number of significant digits (see String::qs_append(double))
+  1   'e' sign
+  1   exponent sign
+  3   exponent digits
+  ==
+  22
+
+  "f" notation :
+  1   optional 0
+  1   sign
+  14  number significant digits (see String::qs_append(double) )
+  1   decimal point
+  ==
+  17
+*/
+
+#define MAX_DIGITS_IN_DOUBLE MY_GCVT_MAX_FIELD_WIDTH
+
+int MBR::within(const MBR *mbr)
+{
+  /*
+    We have to take into account the 'dimension' of
+    the MBR, where the dimension of a single point is 0,
+    the dimesion of an vertical or horizontal line is 1,
+    and finally the dimension of the solid rectangle is 2.
+  */
+    
+  int dim1= dimension();
+  int dim2= mbr->dimension();
+
+  DBUG_ASSERT(dim1 >= 0 && dim1 <= 2 && dim2 >= 0 && dim2 <= 2);
+
+  /*
+    Either/both of the two operands can degrade to a point or a
+    horizontal/vertical line segment, and we have to treat such cases
+    separately.
+   */
+  switch (dim1)
+  {
+  case 0:
+    DBUG_ASSERT(xmin == xmax && ymin == ymax);
+    switch (dim2)
+    {
+    case 0:
+      DBUG_ASSERT(mbr->xmin == mbr->xmax && mbr->ymin == mbr->ymax);
+      return equals(mbr);
+      break;
+    case 1:
+      DBUG_ASSERT((mbr->xmin == mbr->xmax && mbr->ymin != mbr->ymax) ||
+                  (mbr->ymin == mbr->ymax && mbr->xmin != mbr->xmax));
+      return ((xmin > mbr->xmin && xmin < mbr->xmax && ymin == mbr->ymin) ||
+              (ymin > mbr->ymin && ymin < mbr->ymax && xmin == mbr->xmin));
+      break;
+    case 2:
+      DBUG_ASSERT(mbr->xmin != mbr->xmax && mbr->ymin != mbr->ymax);
+      return (xmin > mbr->xmin && xmax < mbr->xmax &&
+              ymin > mbr->ymin && ymax < mbr->ymax);
+      break;
+    }
+    break;
+  case 1:
+    DBUG_ASSERT((xmin == xmax && ymin != ymax) ||
+                (ymin == ymax && xmin != xmax));
+    switch (dim2)
+    {
+    case 0:
+      DBUG_ASSERT(mbr->xmin == mbr->xmax && mbr->ymin == mbr->ymax);
+      return 0;
+      break;
+    case 1:
+      DBUG_ASSERT((mbr->xmin == mbr->xmax && mbr->ymin != mbr->ymax) ||
+                  (mbr->ymin == mbr->ymax && mbr->xmin != mbr->xmax));
+      return ((xmin == xmax && mbr->xmin == mbr->xmax && mbr->xmin == xmin &&
+               mbr->ymin <= ymin && mbr->ymax >= ymax) ||
+              (ymin == ymax && mbr->ymin == mbr->ymax && mbr->ymin == ymin &&
+               mbr->xmin <= xmin && mbr->xmax >= xmax));
+      break;
+    case 2:
+      DBUG_ASSERT(mbr->xmin != mbr->xmax && mbr->ymin != mbr->ymax);
+      return ((xmin == xmax && xmin > mbr->xmin && xmax < mbr->xmax &&
+               ymin >= mbr->ymin && ymax <= mbr->ymax) ||
+              (ymin == ymax && ymin > mbr->ymin && ymax < mbr->ymax &&
+               xmin >= mbr->xmin && xmax <= mbr->xmax));
+      break;
+    }
+    break;
+  case 2:
+    DBUG_ASSERT(xmin != xmax && ymin != ymax);
+    switch (dim2)
+    {
+    case 0:
+    case 1:
+      return 0;
+      break;
+    case 2:
+      DBUG_ASSERT(mbr->xmin != mbr->xmax && mbr->ymin != mbr->ymax);
+      return ((mbr->xmin <= xmin) && (mbr->ymin <= ymin) &&
+              (mbr->xmax >= xmax) && (mbr->ymax >= ymax));
+      break;
+
+    }
+    break;
+  }
+
+  // Never reached.
+  DBUG_ASSERT(false);
+  return 0;
+}
+
+
+/***************************** Gis_class_info *******************************/
+
+Geometry::Class_info *Geometry::ci_collection[Geometry::wkb_last+1]=
+{
+  NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+
+static Geometry::Class_info **ci_collection_end=
+                                Geometry::ci_collection+Geometry::wkb_last + 1;
+
+Geometry::Class_info::Class_info(const char *name, const char *geojson_name,
+                                 int type_id, create_geom_t create_func):
+  m_type_id(type_id), m_create_func(create_func)
+{
+  m_name.str= (char *) name;
+  m_name.length= strlen(name);
+  m_geojson_name.str= (char *) geojson_name;
+  m_geojson_name.length= strlen(geojson_name);
+
+  ci_collection[type_id]= this;
+}
+
+static Geometry *create_point(char *buffer)
+{
+  return new (buffer) Gis_point;
+}
+
+static Geometry *create_linestring(char *buffer)
+{
+  return new (buffer) Gis_line_string;
+}
+
+static Geometry *create_polygon(char *buffer)
+{
+  return new (buffer) Gis_polygon;
+}
+
+static Geometry *create_multipoint(char *buffer)
+{
+  return new (buffer) Gis_multi_point;
+}
+
+static Geometry *create_multipolygon(char *buffer)
+{
+  return new (buffer) Gis_multi_polygon;
+}
+
+static Geometry *create_multilinestring(char *buffer)
+{
+  return new (buffer) Gis_multi_line_string;
+}
+
+static Geometry *create_geometrycollection(char *buffer)
+{
+  return new (buffer) Gis_geometry_collection;
+}
+
+
+
+static Geometry::Class_info point_class("POINT", "Point",
+					Geometry::wkb_point, create_point);
+
+static Geometry::Class_info linestring_class("LINESTRING", "LineString",
+					     Geometry::wkb_linestring,
+					     create_linestring);
+static Geometry::Class_info polygon_class("POLYGON", "Polygon",
+					      Geometry::wkb_polygon,
+					      create_polygon);
+static Geometry::Class_info multipoint_class("MULTIPOINT", "MultiPoint",
+						 Geometry::wkb_multipoint,
+						 create_multipoint);
+static Geometry::Class_info 
+multilinestring_class("MULTILINESTRING", "MultiLineString",
+		      Geometry::wkb_multilinestring, create_multilinestring);
+static Geometry::Class_info multipolygon_class("MULTIPOLYGON", "MultiPolygon",
+						   Geometry::wkb_multipolygon,
+						   create_multipolygon);
+static Geometry::Class_info 
+geometrycollection_class("GEOMETRYCOLLECTION", "GeometryCollection",
+                         Geometry::wkb_geometrycollection,
+			 create_geometrycollection);
+
+static void get_point(double *x, double *y, const char *data)
+{
+  float8get(*x, data);
+  float8get(*y, data + SIZEOF_STORED_DOUBLE);
+}
+
+/***************************** Geometry *******************************/
+
+Geometry::Class_info *Geometry::find_class(const char *name, size_t len)
+{
+  for (Class_info **cur_rt= ci_collection;
+       cur_rt < ci_collection_end; cur_rt++)
+  {
+    if (*cur_rt &&
+	((*cur_rt)->m_name.length == len) &&
+	(my_charset_latin1.strnncoll((*cur_rt)->m_name.str, len,
+		                     name, len) == 0))
+      return *cur_rt;
+  }
+  return 0;
+}
+
+
+Geometry *Geometry::create_by_typeid(Geometry_buffer *buffer, int type_id)
+{
+  Class_info *ci;
+  if (!(ci= find_class(type_id)))
+    return NULL;
+  return (*ci->m_create_func)(buffer->data);
+}
+
+
+Geometry *Geometry::construct(Geometry_buffer *buffer,
+                              const char *data, uint32 data_len)
+{
+  uint32 geom_type;
+  Geometry *result;
+
+  if (data_len < SRID_SIZE + WKB_HEADER_SIZE)   // < 4 + (1 + 4)
+    return NULL;
+  /* + 1 to skip the byte order (stored in position SRID_SIZE). */
+  geom_type= uint4korr(data + SRID_SIZE + 1);
+  if (!(result= create_by_typeid(buffer, (int) geom_type)))
+    return NULL;
+  result->m_data= data+ SRID_SIZE + WKB_HEADER_SIZE;
+  result->m_data_end= data + data_len;
+  return result;
+}
+
+
+uint Geometry::get_key_image_itMBR(LEX_CSTRING &src, uchar *buff, uint length)
+{
+  const char *dummy;
+  MBR mbr;
+  Geometry_buffer buffer;
+  Geometry *gobj;
+  const uint image_length= SIZEOF_STORED_DOUBLE*4;
+
+  if (src.length < SRID_SIZE)
+  {
+    bzero(buff, image_length);
+    return image_length;
+  }
+  gobj= Geometry::construct(&buffer, (char*) src.str, (uint32) src.length);
+  if (!gobj || gobj->get_mbr(&mbr, &dummy))
+    bzero(buff, image_length);
+  else
+  {
+    float8store(buff,    mbr.xmin);
+    float8store(buff+8,  mbr.xmax);
+    float8store(buff+16, mbr.ymin);
+    float8store(buff+24, mbr.ymax);
+  }
+  return image_length;
+}
+
+
+Geometry *Geometry::create_from_wkt(Geometry_buffer *buffer,
+				    Gis_read_stream *trs, String *wkt,
+				    bool init_stream)
+{
+  LEX_STRING name;
+  Class_info *ci;
+  char next_sym;
+
+  if (trs->get_next_word(&name))
+  {
+    trs->set_error_msg("Geometry name expected");
+    return NULL;
+  }
+  if (!(ci= find_class(name.str, name.length)) ||
+      wkt->reserve(1 + 4, 512))
+    return NULL;
+  Geometry *result= (*ci->m_create_func)(buffer->data);
+  wkt->q_append((char) wkb_ndr);
+  wkt->q_append((uint32) result->get_class_info()->m_type_id);
+  if (!(next_sym= trs->next_symbol()))
+    return NULL;
+  if (!(next_sym= trs->next_symbol()))
+    return NULL;
+  if ((next_sym == '(' && trs->check_next_symbol('(')) ||
+      result->init_from_wkt(trs, wkt) ||
+      (next_sym == '(' && trs->check_next_symbol(')')))
+    return NULL;
+  if (init_stream)  
+  {
+    result->set_data_ptr(wkt->ptr(), wkt->length());
+    result->shift_wkb_header();
+  }
+  return result;
+}
+
+
+int Geometry::as_wkt(String *wkt, const char **end)
+{
+  uint32 len= (uint) get_class_info()->m_name.length;
+  if (wkt->reserve(len + 2, 512))
+    return 1;
+  wkt->qs_append(get_class_info()->m_name.str, len);
+  if (get_class_info() != &geometrycollection_class)
+    wkt->qs_append('(');
+  if (get_data_as_wkt(wkt, end))
+    return 1;
+  if (get_class_info() != &geometrycollection_class)
+    wkt->qs_append(')');
+  return 0;
+}
+
+
+static const uchar type_keyname[]= "type";
+static const uint type_keyname_len= 4;
+static const uchar coord_keyname[]= "coordinates";
+static const uint coord_keyname_len= 11;
+static const uchar geometries_keyname[]= "geometries";
+static const uint geometries_keyname_len= 10;
+static const uchar features_keyname[]= "features";
+static const uint features_keyname_len= 8;
+static const uchar geometry_keyname[]= "geometry";
+static const uint geometry_keyname_len= 8;
+
+static const uint max_keyname_len= 11; /*'coordinates' keyname is the longest.*/
+
+static const uchar feature_type[]= "feature";
+static const int feature_type_len= 7;
+static const uchar feature_coll_type[]= "featurecollection";
+static const int feature_coll_type_len= 17;
+static const uchar bbox_keyname[]= "bbox";
+static const int bbox_keyname_len= 4;
+
+
+int Geometry::as_json(String *wkt, uint max_dec_digits, const char **end)
+{
+  uint32 len= (uint) get_class_info()->m_geojson_name.length;
+  if (wkt->reserve(4 + type_keyname_len + 2 + len + 2 + 2 +
+                   coord_keyname_len + 4, 512))
+    return 1;
+  wkt->qs_append('"');
+  wkt->qs_append((const char *) type_keyname, type_keyname_len);
+  wkt->qs_append("\": \"", 4);
+  wkt->qs_append(get_class_info()->m_geojson_name.str, len);
+  wkt->qs_append("\", \"", 4);
+  if (get_class_info() == &geometrycollection_class)
+    wkt->qs_append((const char *) geometries_keyname, geometries_keyname_len);
+  else
+    wkt->qs_append((const char *) coord_keyname, coord_keyname_len);
+
+  wkt->qs_append("\": ", 3);
+  if (get_data_as_json(wkt, max_dec_digits, end))
+    return 1;
+
+  return 0;
+}
+
+
+int Geometry::bbox_as_json(String *wkt)
+{
+  MBR mbr;
+  const char *end;
+  if (wkt->reserve(5 + bbox_keyname_len + (FLOATING_POINT_DECIMALS+2)*4, 512))
+    return 1;
+  wkt->qs_append('"');
+  wkt->qs_append((const char *) bbox_keyname, bbox_keyname_len);
+  wkt->qs_append("\": [", 4);
+
+  if (get_mbr(&mbr, &end))
+    return 1;
+
+  wkt->qs_append(mbr.xmin);
+  wkt->qs_append(", ", 2);
+  wkt->qs_append(mbr.ymin);
+  wkt->qs_append(", ", 2);
+  wkt->qs_append(mbr.xmax);
+  wkt->qs_append(", ", 2);
+  wkt->qs_append(mbr.ymax);
+  wkt->qs_append(']');
+
+  return 0;
+}
+
+
+static double wkb_get_double(const char *ptr, Geometry::wkbByteOrder bo)
+{
+  double res;
+  if (bo != Geometry::wkb_xdr)
+  {
+    float8get(res, ptr);
+  }
+  else
+  {
+    char inv_array[8];
+    inv_array[0]= ptr[7];
+    inv_array[1]= ptr[6];
+    inv_array[2]= ptr[5];
+    inv_array[3]= ptr[4];
+    inv_array[4]= ptr[3];
+    inv_array[5]= ptr[2];
+    inv_array[6]= ptr[1];
+    inv_array[7]= ptr[0];
+    float8get(res, inv_array);
+  }
+  return res;
+}
+
+
+static uint32 wkb_get_uint(const char *ptr, Geometry::wkbByteOrder bo)
+{
+  if (bo != Geometry::wkb_xdr)
+    return uint4korr(ptr);
+  /* else */
+  {
+    char inv_array[4];
+    inv_array[0]= ptr[3];
+    inv_array[1]= ptr[2];
+    inv_array[2]= ptr[1];
+    inv_array[3]= ptr[0];
+    return uint4korr(inv_array);
+  }
+}
+
+
+Geometry *Geometry::create_from_wkb(Geometry_buffer *buffer,
+                                    const char *wkb, uint32 len, String *res)
+{
+  uint32 geom_type;
+  Geometry *geom;
+
+  if (len < WKB_HEADER_SIZE)
+    return NULL;
+  geom_type= wkb_get_uint(wkb+1, (wkbByteOrder)wkb[0]);
+  if (!(geom= create_by_typeid(buffer, (int) geom_type)) ||
+      res->reserve(WKB_HEADER_SIZE, 512))
+    return NULL;
+
+  res->q_append((char) wkb_ndr);
+  res->q_append(geom_type);
+
+  return geom->init_from_wkb(wkb + WKB_HEADER_SIZE, len - WKB_HEADER_SIZE,
+                             (wkbByteOrder) wkb[0], res) ? geom : NULL;
+}
+
+
+Geometry *Geometry::create_from_json(Geometry_buffer *buffer,
+                      json_engine_t *je, bool er_on_3D, String *res)
+{
+  Class_info *ci= NULL;
+  const uchar *coord_start= NULL, *geom_start= NULL,
+              *features_start= NULL, *geometry_start= NULL;
+  Geometry *result;
+  uchar key_buf[max_keyname_len];
+  uint key_len;
+  int fcoll_type_found= 0, feature_type_found= 0;
+
+
+  if (json_read_value(je))
+    goto err_return;
+  
+  if (je->value_type != JSON_VALUE_OBJECT)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    goto err_return;
+  }
+
+  while (json_scan_next(je) == 0 && je->state != JST_OBJ_END)
+  {
+    DBUG_ASSERT(je->state == JST_KEY);
+
+    key_len=0;
+    while (json_read_keyname_chr(je) == 0)
+    {
+      if (je->s.c_next > 127 || key_len >= max_keyname_len)
+      {
+        /* Symbol out of range, or keyname too long. No need to compare.. */
+        key_len=0;
+        break;
+      }
+      key_buf[key_len++]= (uchar)je->s.c_next | 0x20; /* make it lowercase. */
+    }
+
+    if (unlikely(je->s.error))
+      goto err_return;
+
+    if (key_len == type_keyname_len &&
+        memcmp(key_buf, type_keyname, type_keyname_len) == 0)
+    {
+      /*
+         Found the "type" key. Let's check it's a string and remember
+         the feature's type.
+      */
+      if (json_read_value(je))
+        goto err_return;
+
+      if (je->value_type == JSON_VALUE_STRING)
+      {
+        if ((ci= find_class((const char *) je->value, je->value_len)))
+        {
+          if ((coord_start=
+                (ci == &geometrycollection_class) ? geom_start : coord_start))
+            goto create_geom;
+        }
+        else if (je->value_len == feature_coll_type_len &&
+            my_charset_latin1.strnncoll(je->value, je->value_len,
+		                        feature_coll_type, feature_coll_type_len) == 0)
+        {
+          /*
+            'FeatureCollection' type found. Handle the 'Featurecollection'/'features'
+            GeoJSON construction.
+          */
+          if (features_start)
+            goto handle_feature_collection;
+          fcoll_type_found= 1;
+        }
+        else if (je->value_len == feature_type_len &&
+                 my_charset_latin1.strnncoll(je->value, je->value_len,
+		                             feature_type, feature_type_len) == 0)
+        {
+          if (geometry_start)
+            goto handle_geometry_key;
+          feature_type_found= 1;
+        }
+        else /* can't understand the type. */
+          break;
+      }
+      else /* The "type" value can only be string. */
+        break;
+    }
+    else if (key_len == coord_keyname_len &&
+             memcmp(key_buf, coord_keyname, coord_keyname_len) == 0)
+    {
+      /*
+        Found the "coordinates" key. Let's check it's an array
+        and remember where it starts.
+      */
+      if (json_read_value(je))
+        goto err_return;
+
+      if (je->value_type == JSON_VALUE_ARRAY)
+      {
+        coord_start= je->value_begin;
+        if (ci && ci != &geometrycollection_class)
+          goto create_geom;
+        if (json_skip_level(je))
+          goto err_return;
+      }
+    }
+    else if (key_len == geometries_keyname_len &&
+             memcmp(key_buf, geometries_keyname, geometries_keyname_len) == 0)
+    {
+      /*
+        Found the "geometries" key. Let's check it's an array
+        and remember where it starts.
+      */
+      if (json_read_value(je))
+        goto err_return;
+
+      if (je->value_type == JSON_VALUE_ARRAY)
+      {
+        geom_start= je->value_begin;
+        if (ci == &geometrycollection_class)
+        {
+          coord_start= geom_start;
+          goto create_geom;
+        }
+      }
+    }
+    else if (key_len == features_keyname_len &&
+             memcmp(key_buf, features_keyname, features_keyname_len) == 0)
+    {
+      /*
+        'features' key found. Handle the 'Featurecollection'/'features'
+        GeoJSON construction.
+      */
+      if (json_read_value(je))
+        goto err_return;
+      if (je->value_type == JSON_VALUE_ARRAY)
+      {
+        features_start= je->value_begin;
+        if (fcoll_type_found)
+          goto handle_feature_collection;
+      }
+    }
+    else if (key_len == geometry_keyname_len &&
+             memcmp(key_buf, geometry_keyname, geometry_keyname_len) == 0)
+    {
+      if (json_read_value(je))
+        goto err_return;
+      if (je->value_type == JSON_VALUE_OBJECT)
+      {
+        geometry_start= je->value_begin;
+        if (feature_type_found)
+          goto handle_geometry_key;
+      }
+      else
+        goto err_return;
+    }
+    else
+    {
+      if (json_skip_key(je))
+        goto err_return;
+    }
+  }
+
+  if (je->s.error == 0)
+  {
+    /*
+      We didn't find all the required keys. That are "type" and "coordinates"
+      or "geometries" for GeometryCollection.
+    */
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+  }
+  goto err_return;
+
+handle_feature_collection:
+  ci= &geometrycollection_class;
+  coord_start= features_start;
+
+create_geom:
+
+  json_scan_start(je, je->s.cs, coord_start, je->s.str_end);
+
+  if (res->reserve(1 + 4, 512))
+    goto err_return;
+
+  result= (*ci->m_create_func)(buffer->data);
+  res->q_append((char) wkb_ndr);
+  res->q_append((uint32) result->get_class_info()->m_type_id);
+  if (result->init_from_json(je, er_on_3D, res))
+    goto err_return;
+
+  return result;
+
+handle_geometry_key:
+  json_scan_start(je, je->s.cs, geometry_start, je->s.str_end);
+  return create_from_json(buffer, je, er_on_3D, res);
+
+err_return:
+  return NULL;
+}
+
+
+Geometry *Geometry::create_from_opresult(Geometry_buffer *g_buf,
+                                   String *res, Gcalc_result_receiver &rr)
+{
+  uint32 geom_type= rr.get_result_typeid();
+  Geometry *obj= create_by_typeid(g_buf, geom_type);
+
+  if (!obj || res->reserve(WKB_HEADER_SIZE, 512))
+    return NULL;
+
+  res->q_append((char) wkb_ndr);
+  res->q_append(geom_type);
+  return obj->init_from_opresult(res, rr.result(), rr.length()) ? obj : NULL;
+}
+
+
+bool Geometry::envelope(String *result) const
+{
+  MBR mbr;
+  const char *end;
+
+  if (get_mbr(&mbr, &end))
+    return 1;
+
+  if (!mbr.valid())
+  {
+    /* Empty geometry */
+    if (result->reserve(1 + 4*2))
+      return 1;
+    result->q_append((char) wkb_ndr);
+    result->q_append((uint32) wkb_geometrycollection);
+    result->q_append((uint32) 0);
+    return 0;
+  }
+  if (result->reserve(1 + 4 * 3 + SIZEOF_STORED_DOUBLE * 10))
+    return 1;
+
+  result->q_append((char) wkb_ndr);
+  result->q_append((uint32) wkb_polygon);
+  result->q_append((uint32) 1);
+  result->q_append((uint32) 5);
+  result->q_append(mbr.xmin);
+  result->q_append(mbr.ymin);
+  result->q_append(mbr.xmax);
+  result->q_append(mbr.ymin);
+  result->q_append(mbr.xmax);
+  result->q_append(mbr.ymax);
+  result->q_append(mbr.xmin);
+  result->q_append(mbr.ymax);
+  result->q_append(mbr.xmin);
+  result->q_append(mbr.ymin);
+
+  return 0;
+}
+
+
+/*
+  Create a point from data.
+
+  SYNPOSIS
+    create_point()
+    result		Put result here
+    data		Data for point is here.
+
+  RETURN
+    0	ok
+    1	Can't reallocate 'result'
+*/
+
+bool Geometry::create_point(String *result, const char *data) const
+{
+  if (no_data(data, POINT_DATA_SIZE) ||
+      result->reserve(1 + 4 + POINT_DATA_SIZE))
+    return 1;
+  result->q_append((char) wkb_ndr);
+  result->q_append((uint32) wkb_point);
+  /* Copy two double in same format */
+  result->q_append(data, POINT_DATA_SIZE);
+  return 0;
+}
+
+/*
+  Create a point from coordinates.
+
+  SYNPOSIS
+    create_point()
+    result		Put result here
+    x			x coordinate for point
+    y			y coordinate for point
+
+  RETURN
+    0	ok
+    1	Can't reallocate 'result'
+*/
+
+bool Geometry::create_point(String *result, double x, double y)
+{
+  if (result->reserve(1 + 4 + POINT_DATA_SIZE))
+    return 1;
+
+  result->q_append((char) wkb_ndr);
+  result->q_append((uint32) wkb_point);
+  result->q_append(x);
+  result->q_append(y);
+  return 0;
+}
+
+/*
+  Append N points from packed format to text
+
+  SYNOPSIS
+    append_points()
+    txt			Append points here
+    n_points		Number of points
+    data		Packed data
+    offset		Offset between points
+
+  RETURN
+    # end of data
+*/
+
+const char *Geometry::append_points(String *txt, uint32 n_points,
+				    const char *data, uint32 offset) const
+{			     
+  while (n_points--)
+  {
+    double x,y;
+    data+= offset;
+    get_point(&x, &y, data);
+    data+= POINT_DATA_SIZE;
+    txt->qs_append(x);
+    txt->qs_append(' ');
+    txt->qs_append(y);
+    txt->qs_append(',');
+  }
+  return data;
+}
+
+
+static void append_json_point(String *txt, uint max_dec, const char *data)
+{
+  double x,y;
+  get_point(&x, &y, data);
+  if (max_dec < FLOATING_POINT_DECIMALS)
+  {
+    x= my_double_round(x, max_dec, FALSE, FALSE);
+    y= my_double_round(y, max_dec, FALSE, FALSE);
+  }
+  txt->qs_append('[');
+  txt->qs_append(x);
+  txt->qs_append(", ", 2);
+  txt->qs_append(y);
+  txt->qs_append(']');
+}
+
+
+/*
+  Append N points from packed format to json
+
+  SYNOPSIS
+    append_json_points()
+    txt			Append points here
+    n_points		Number of points
+    data		Packed data
+    offset		Offset between points
+
+  RETURN
+    # end of data
+*/
+
+static const char *append_json_points(String *txt, uint max_dec,
+    uint32 n_points, const char *data, uint32 offset)
+{			     
+  txt->qs_append('[');
+  while (n_points--)
+  {
+    data+= offset;
+    append_json_point(txt, max_dec, data);
+    data+= POINT_DATA_SIZE;
+    txt->qs_append(", ", 2);
+  }
+  txt->length(txt->length() - 2);// Remove ending ', '
+  txt->qs_append(']');
+  return data;
+}
+/*
+  Get most bounding rectangle (mbr) for X points
+
+  SYNOPSIS
+    get_mbr_for_points()
+    mbr			MBR (store rectangle here)
+    points		Number of points
+    data		Packed data
+    offset		Offset between points
+
+  RETURN
+    0	Wrong data
+    #	end of data
+*/
+
+const char *Geometry::get_mbr_for_points(MBR *mbr, const char *data,
+					 uint offset) const
+{
+  uint32 points;
+  /* read number of points */
+  if (no_data(data, 4))
+    return 0;
+  points= uint4korr(data);
+  data+= 4;
+
+  if (not_enough_points(data, points, offset))
+    return 0;
+
+  /* Calculate MBR for points */
+  while (points--)
+  {
+    data+= offset;
+    mbr->add_xy(data, data + SIZEOF_STORED_DOUBLE);
+    data+= POINT_DATA_SIZE;
+  }
+  return data;
+}
+
+
+/***************************** Point *******************************/
+
+uint32 Gis_point::get_data_size() const
+{
+  return POINT_DATA_SIZE;
+}
+
+
+bool Gis_point::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  double x, y;
+  if (trs->get_next_number(&x) || trs->get_next_number(&y) ||
+      wkb->reserve(POINT_DATA_SIZE, 512))
+    return 1;
+  wkb->q_append(x);
+  wkb->q_append(y);
+  return 0;
+}
+
+
+uint Gis_point::init_from_wkb(const char *wkb, uint len,
+                              wkbByteOrder bo, String *res)
+{
+  double x, y;
+  if (len < POINT_DATA_SIZE || res->reserve(POINT_DATA_SIZE))
+    return 0;
+  x= wkb_get_double(wkb, bo);
+  y= wkb_get_double(wkb + SIZEOF_STORED_DOUBLE, bo);
+  res->q_append(x);
+  res->q_append(y);
+  return POINT_DATA_SIZE;
+}
+
+
+static int read_point_from_json(json_engine_t *je, bool er_on_3D,
+                                double *x, double *y)
+{
+  int n_coord= 0, err;
+  double tmp, *d;
+  char *endptr;
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    DBUG_ASSERT(je->state == JST_VALUE);
+    if (json_read_value(je))
+      return 1;
+
+    if (je->value_type != JSON_VALUE_NUMBER)
+      goto bad_coordinates;
+
+    d= (n_coord == 0) ? x : ((n_coord == 1) ? y : &tmp);
+    *d= je->s.cs->strntod((char *) je->value, je->value_len, &endptr, &err);
+    if (err)
+      goto bad_coordinates;
+    n_coord++;
+  }
+
+  if (n_coord <= 2 || !er_on_3D)
+    return 0;
+  je->s.error= Geometry::GEOJ_DIMENSION_NOT_SUPPORTED;
+  return 1;
+bad_coordinates:
+  je->s.error= Geometry::GEOJ_INCORRECT_GEOJSON;
+  return 1;
+}
+
+
+bool Gis_point::init_from_json(json_engine_t *je, bool er_on_3D, String *wkb)
+{
+  double x, y;
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (read_point_from_json(je, er_on_3D, &x, &y) ||
+      wkb->reserve(POINT_DATA_SIZE))
+    return TRUE;
+
+  wkb->q_append(x);
+  wkb->q_append(y);
+  return FALSE;
+}
+
+
+bool Gis_point::get_data_as_wkt(String *txt, const char **end) const
+{
+  double x, y;
+  if (get_xy(&x, &y))
+    return 1;
+  if (txt->reserve(MAX_DIGITS_IN_DOUBLE * 2 + 1))
+    return 1;
+  txt->qs_append(x);
+  txt->qs_append(' ');
+  txt->qs_append(y);
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+bool Gis_point::get_data_as_json(String *txt, uint max_dec_digits,
+                                 const char **end) const
+{
+  if (txt->reserve(MAX_DIGITS_IN_DOUBLE * 2 + 4))
+    return 1;
+  append_json_point(txt, max_dec_digits, m_data);
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+bool Gis_point::get_mbr(MBR *mbr, const char **end) const
+{
+  double x, y;
+  if (get_xy(&x, &y))
+    return 1;
+  mbr->add_xy(x, y);
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+int Gis_point::area(double *ar, const char **end) const
+{
+  *ar= 0;
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+int Gis_point::geom_length(double *len, const char **end) const
+{
+  *len= 0;
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+int Gis_point::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  double x, y;
+
+  return get_xy(&x, &y) || trn->single_point(x, y);
+}
+
+
+const Geometry::Class_info *Gis_point::get_class_info() const
+{
+  return &point_class;
+}
+
+
+/**
+  Function to calculate haversine.
+  Taking as arguments Point and Multipoint geometries.
+  Multipoint geometry has to be single point only.
+  It is up to caller to ensure valid input.
+
+  @param    g      pointer to the Geometry
+  @param    r      sphere radius
+  @param    error  pointer describing the error in case of the boundary conditions
+
+  @return distance in case without error, it is caclulcated distance (non-negative),
+                   in case error exist, negative value.
+*/
+double Gis_point::calculate_haversine(const Geometry *g,
+                                      const double sphere_radius,
+                                      int *error)
+{
+  DBUG_ASSERT(sphere_radius > 0);
+  double x1r, x2r, y1r, y2r;
+
+  // This check is done only for optimization purposes where we know it will
+  // be one and only one point in Multipoint
+  if (g->get_class_info()->m_type_id == Geometry::wkb_multipoint)
+  {
+    const char point_size= 4 + WKB_HEADER_SIZE + POINT_DATA_SIZE+1; //1 for the type
+    char point_temp[point_size];
+    memset(point_temp+4, Geometry::wkb_point, 1);
+    memcpy(point_temp+5, static_cast(g)->get_data_ptr()+5, 4);
+    memcpy(point_temp+4+WKB_HEADER_SIZE, g->get_data_ptr()+4+WKB_HEADER_SIZE,
+           POINT_DATA_SIZE);
+    point_temp[point_size-1]= '\0';
+    Geometry_buffer gbuff;
+    Geometry *gg= Geometry::construct(&gbuff, point_temp, point_size-1);
+    if (!gg || static_cast(gg)->get_xy_radian(&x2r, &y2r))
+    {
+      *error= 2;
+      return -1;
+    }
+  }
+  else
+  {
+    if (static_cast(g)->get_xy_radian(&x2r, &y2r))
+    {
+      *error= 2;
+      return -1;
+    }
+  }
+  if (this->get_xy_radian(&x1r, &y1r))
+  {
+    *error= 2;
+    return -1;
+  }
+  //
+  // Check boundary conditions: longitude[-180,180]
+  if (!((x2r >= -M_PI && x2r <= M_PI) && (x1r >= -M_PI && x1r <= M_PI)))
+  {
+    *error=1;
+    return -1;
+  }
+  // Check boundary conditions: latitude[-90,90]
+  if (!((y2r >= -M_PI/2 && y2r <= M_PI/2) && (y1r >= -M_PI/2 && y1r <= M_PI/2)))
+  {
+    *error=-1;
+    return -1;
+  }
+  double dlat= sin((y2r - y1r)/2)*sin((y2r - y1r)/2);
+  double dlong= sin((x2r - x1r)/2)*sin((x2r - x1r)/2);
+  return 2*sphere_radius*asin((sqrt(dlat + cos(y1r)*cos(y2r)*dlong)));
+}
+
+
+/**
+  Function that calculate spherical distance of Point from Multipoint geometries.
+  In case there is single point in Multipoint geometries calculate_haversine()
+  can handle such case. Otherwise, new geometry (Point) has to be constructed.
+
+  @param    g pointer to the Geometry
+  @param    r sphere radius
+  @param    result pointer to the result
+  @param    err    pointer to the error obtained from calculate_haversin()
+
+  @return state
+  @retval TRUE  failed
+  @retval FALSE success
+*/
+int Gis_point::spherical_distance_multipoints(Geometry *g, const double r,
+                                              double *result, int *err)
+{  
+  uint32 num_of_points2;
+    // To find the minimum radius it cannot be greater than Earth radius
+  double res= 6370986.0;
+  double temp_res= 0.0;
+  const uint32 len= 4 + WKB_HEADER_SIZE + POINT_DATA_SIZE + 1;
+  char s[len];
+  g->num_geometries(&num_of_points2);
+  DBUG_ASSERT(num_of_points2 >= 1);
+  if (num_of_points2 == 1)
+  {
+    *result= this->calculate_haversine(g, r, err);
+    return 0;
+  }
+  for (uint32 i=1; i <= num_of_points2; i++)
+  {
+    Geometry_buffer buff_temp;
+    Geometry *temp;
+    const char *pt_ptr= g->get_data_ptr()+
+      4+WKB_HEADER_SIZE*i + POINT_DATA_SIZE*(i-1);
+
+    // First 4 bytes are handled already, make sure to create a Point
+    memset(s + 4, Geometry::wkb_point, 1);
+    if (g->no_data(pt_ptr, POINT_DATA_SIZE))
+      return 1;
+
+    memcpy(s + 5, g->get_data_ptr() + 5, 4);
+    memcpy(s + 4 + WKB_HEADER_SIZE, pt_ptr, POINT_DATA_SIZE);
+    s[len-1]= '\0';
+    temp= Geometry::construct(&buff_temp, s, len);
+    if (!temp)
+      return 1;
+    temp_res= this->calculate_haversine(temp, r, err);
+    if (res > temp_res)
+      res= temp_res;
+  }
+  *result= res;
+  return 0;
+}
+/***************************** LineString *******************************/
+
+uint32 Gis_line_string::get_data_size() const 
+{
+  uint32 n_points;
+  if (no_data(m_data, 4))
+    return GET_SIZE_ERROR;
+
+  n_points= uint4korr(m_data);
+
+  if (not_enough_points(m_data + 4, n_points))
+    return GET_SIZE_ERROR;
+
+  return 4 + n_points * POINT_DATA_SIZE;
+}
+
+
+bool Gis_line_string::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  uint32 n_points= 0;
+  uint32 np_pos= wkb->length();
+  Gis_point p;
+
+  if (wkb->reserve(4, 512))
+    return 1;
+  wkb->length(wkb->length()+4);			// Reserve space for points  
+
+  for (;;)
+  {
+    if (p.init_from_wkt(trs, wkb))
+      return 1;
+    n_points++;
+    if (trs->skip_char(','))			// Didn't find ','
+      break;
+  }
+  if (n_points < 1)
+  {
+    trs->set_error_msg("Too few points in LINESTRING");
+    return 1;
+  }
+  wkb->write_at_position(np_pos, n_points);
+  return 0;
+}
+
+
+uint Gis_line_string::init_from_wkb(const char *wkb, uint len,
+                                    wkbByteOrder bo, String *res)
+{
+  uint32 n_points, proper_length;
+  const char *wkb_end;
+  Gis_point p;
+
+  if (len < 4 || (n_points= wkb_get_uint(wkb, bo)) < 1 ||
+      ((len - 4) / POINT_DATA_SIZE) < n_points)
+    return 0;
+  proper_length= 4 + n_points * POINT_DATA_SIZE;
+
+  if (len < proper_length || res->reserve(proper_length))
+    return 0;
+
+  res->q_append(n_points);
+  wkb_end= wkb + proper_length;
+  for (wkb+= 4; wkblength();
+  Gis_point p;
+
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (wkb->reserve(4, 512))
+    return TRUE;
+  wkb->length(wkb->length()+4);	// Reserve space for n_points  
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    DBUG_ASSERT(je->state == JST_VALUE);
+
+    if (p.init_from_json(je, er_on_3D, wkb))
+      return TRUE;
+    n_points++;
+  }
+  if (n_points < 1)
+  {
+    je->s.error= Geometry::GEOJ_TOO_FEW_POINTS;
+    return TRUE;
+  }
+  wkb->write_at_position(np_pos, n_points);
+  return FALSE;
+}
+
+
+bool Gis_line_string::get_data_as_wkt(String *txt, const char **end) const
+{
+  uint32 n_points;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data += 4;
+
+  if (n_points < 1 ||
+      not_enough_points(data, n_points) ||
+      txt->reserve(((MAX_DIGITS_IN_DOUBLE + 1)*2 + 1) * n_points))
+    return 1;
+
+  while (n_points--)
+  {
+    double x, y;
+    get_point(&x, &y, data);
+    data+= POINT_DATA_SIZE;
+    txt->qs_append(x);
+    txt->qs_append(' ');
+    txt->qs_append(y);
+    txt->qs_append(',');
+  }
+  txt->length(txt->length() - 1);		// Remove end ','
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_line_string::get_data_as_json(String *txt, uint max_dec_digits,
+                                       const char **end) const
+{
+  uint32 n_points;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data += 4;
+
+  if (n_points < 1 ||
+      not_enough_points(data, n_points) ||
+      txt->reserve((MAX_DIGITS_IN_DOUBLE*2 + 6) * n_points + 2))
+    return 1;
+
+  *end= append_json_points(txt, max_dec_digits, n_points, data, 0);
+
+  return 0;
+}
+
+
+bool Gis_line_string::get_mbr(MBR *mbr, const char **end) const
+{
+  return (*end=get_mbr_for_points(mbr, m_data, 0)) == 0;
+}
+
+
+int Gis_line_string::geom_length(double *len, const char **end) const
+{
+  uint32 n_points;
+  double prev_x, prev_y;
+  const char *data= m_data;
+
+  *len= 0;					// In case of errors
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4;
+  if (n_points < 1 || not_enough_points(data, n_points))
+    return 1;
+
+  get_point(&prev_x, &prev_y, data);
+  data+= POINT_DATA_SIZE;
+  while (--n_points)
+  {
+    double x, y;
+    get_point(&x, &y, data);
+    data+= POINT_DATA_SIZE;
+    *len+= sqrt(pow(prev_x-x,2)+pow(prev_y-y,2));
+    prev_x= x;
+    prev_y= y;
+  }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_line_string::area(double *ar, const char **end) const
+{
+  uint32 n_points;
+  *ar= 0.0;
+
+  /* read number of points */
+  if (no_data(m_data, 4))
+    return 1;
+  n_points= uint4korr(m_data);
+  *end= m_data + 4 + POINT_DATA_SIZE * n_points;
+  return 0;
+}
+
+
+int Gis_line_string::is_closed(int *closed) const
+{
+  uint32 n_points;
+  double x1, y1, x2, y2;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  if (n_points == 1)
+  {
+    *closed=1;
+    return 0;
+  }
+  data+= 4;
+  if (n_points == 0 || not_enough_points(data, n_points))
+    return 1;
+
+  /* Get first point */
+  get_point(&x1, &y1, data);
+
+  /* get last point */
+  data+= POINT_DATA_SIZE + (n_points-2)*POINT_DATA_SIZE;
+  get_point(&x2, &y2, data);
+
+  *closed= (x1==x2) && (y1==y2);
+  return 0;
+}
+
+
+int Gis_line_string::num_points(uint32 *n_points) const
+{
+  *n_points= uint4korr(m_data);
+  return 0;
+}
+
+
+int Gis_line_string::start_point(String *result) const
+{
+  /* +4 is for skipping over number of points */
+  return create_point(result, m_data + 4);
+}
+
+
+int Gis_line_string::end_point(String *result) const
+{
+  uint32 n_points;
+  if (no_data(m_data, 4))
+    return 1;
+  n_points= uint4korr(m_data);
+  if (n_points == 0 || not_enough_points(m_data+4, n_points))
+    return 1;
+  return create_point(result, m_data + 4 + (n_points - 1) * POINT_DATA_SIZE);
+}
+
+
+int Gis_line_string::point_n(uint32 num, String *result) const
+{
+  uint32 n_points;
+  if (no_data(m_data, 4))
+    return 1;
+  num--;
+  n_points= uint4korr(m_data);
+  if (num >= n_points || not_enough_points(m_data+4, n_points))
+    return 1;
+
+  return create_point(result, m_data + 4 + num*POINT_DATA_SIZE);
+}
+
+
+int Gis_line_string::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_points;
+  double x, y;
+  double UNINIT_VAR(prev_x), UNINIT_VAR(prev_y);
+  int first_point= 1;
+  const char *data= m_data;
+
+  if (no_data(m_data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4;
+  if (n_points < 1 || not_enough_points(data, n_points))
+    return 1;
+
+  trn->start_line();
+
+  while (n_points--)
+  {
+    get_point(&x, &y, data);
+    data+= POINT_DATA_SIZE;
+    if (!first_point && x == prev_x && y == prev_y)
+      continue;
+    if (trn->add_point(x, y))
+      return 1;
+    first_point= 0;
+    prev_x= x;
+    prev_y= y;
+  }
+
+  return trn->complete_line();
+}
+
+const Geometry::Class_info *Gis_line_string::get_class_info() const
+{
+  return &linestring_class;
+}
+
+
+/***************************** Polygon *******************************/
+
+uint32 Gis_polygon::get_data_size() const 
+{
+  uint32 n_linear_rings;
+  uint32 n_points;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return GET_SIZE_ERROR;
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    if (no_data(data, 4) ||
+        not_enough_points(data+4, n_points= uint4korr(data)))
+      return GET_SIZE_ERROR;
+    data+= 4 + n_points*POINT_DATA_SIZE;
+  }
+  if (no_data(data, 0))
+    return GET_SIZE_ERROR;
+  return (uint32) (data - m_data);
+}
+
+
+bool Gis_polygon::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  uint32 n_linear_rings= 0;
+  uint32 lr_pos= wkb->length();
+  int closed;
+
+  if (wkb->reserve(4, 512))
+    return 1;
+  wkb->length(wkb->length()+4);	// Reserve space for n_rings
+  for (;;)  
+  {
+    Gis_line_string ls;
+    uint32 ls_pos=wkb->length();
+    if (trs->check_next_symbol('(') ||
+	ls.init_from_wkt(trs, wkb) ||
+	trs->check_next_symbol(')'))
+      return 1;
+
+    ls.set_data_ptr(wkb->ptr() + ls_pos, wkb->length() - ls_pos);
+    if (ls.is_closed(&closed) || !closed)
+    {
+      trs->set_error_msg("POLYGON's linear ring isn't closed");
+      return 1;
+    }
+    n_linear_rings++;
+    if (trs->skip_char(','))			// Didn't find ','
+      break;
+  }
+  wkb->write_at_position(lr_pos, n_linear_rings);
+  return 0;
+}
+
+
+uint Gis_polygon::init_from_opresult(String *bin,
+                                     const char *opres, uint res_len)
+{
+  const char *opres_orig= opres;
+  uint32 position= bin->length();
+  uint32 poly_shapes= 0;
+
+  if (bin->reserve(4, 512))
+    return 0;
+  bin->q_append(poly_shapes);
+
+  while (opres_orig + res_len > opres)
+  {
+    uint32 n_points, proper_length;
+    const char *op_end, *p1_position;
+    Gis_point p;
+    Gcalc_function::shape_type st;
+
+    st= (Gcalc_function::shape_type) uint4korr(opres);
+    if (poly_shapes && st != Gcalc_function::shape_hole)
+      break;
+    poly_shapes++;
+    n_points= uint4korr(opres + 4) + 1; /* skip shape type id */
+    proper_length= 4 + n_points * POINT_DATA_SIZE;
+
+    if (bin->reserve(proper_length, 512))
+      return 0;
+
+    bin->q_append(n_points);
+    op_end= opres + 8 + (n_points-1) * 8 * 2;
+    p1_position= (opres+= 8);
+    for (; opreswrite_at_position(position, poly_shapes);
+
+  return (uint) (opres - opres_orig);
+}
+
+
+uint Gis_polygon::init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
+                                String *res)
+{
+  uint32 n_linear_rings;
+  const char *wkb_orig= wkb;
+
+  if (len < 4)
+    return 0;
+
+  if (!(n_linear_rings= wkb_get_uint(wkb, bo)))
+    return 0;
+
+  if (res->reserve(4, 512))
+    return 0;
+  wkb+= 4;
+  len-= 4;
+  res->q_append(n_linear_rings);
+
+  while (n_linear_rings--)
+  {
+    Gis_line_string ls;
+    uint32 ls_pos= res->length();
+    int ls_len;
+    int closed;
+
+    if (!(ls_len= ls.init_from_wkb(wkb, len, bo, res)))
+      return 0;
+
+    ls.set_data_ptr(res->ptr() + ls_pos, res->length() - ls_pos);
+
+    if (ls.is_closed(&closed) || !closed)
+      return 0;
+    wkb+= ls_len;
+  }
+
+  return (uint) (wkb - wkb_orig);
+}
+
+
+bool Gis_polygon::init_from_json(json_engine_t *je, bool er_on_3D, String *wkb)
+{
+  uint32 n_linear_rings= 0;
+  uint32 lr_pos= wkb->length();
+  int closed;
+
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (wkb->reserve(4, 512))
+    return TRUE;
+  wkb->length(wkb->length()+4);	// Reserve space for n_rings
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    Gis_line_string ls;
+    DBUG_ASSERT(je->state == JST_VALUE);
+
+    uint32 ls_pos=wkb->length();
+    if (ls.init_from_json(je, er_on_3D, wkb))
+      return TRUE;
+    ls.set_data_ptr(wkb->ptr() + ls_pos, wkb->length() - ls_pos);
+    if (ls.is_closed(&closed) || !closed)
+    {
+      je->s.error= GEOJ_POLYGON_NOT_CLOSED;
+      return TRUE;
+    }
+    n_linear_rings++;
+  }
+
+  if (je->s.error)
+    return TRUE;
+
+  if (n_linear_rings == 0)
+  {
+    je->s.error= Geometry::GEOJ_EMPTY_COORDINATES;
+    return TRUE;
+  }
+  wkb->write_at_position(lr_pos, n_linear_rings);
+  return FALSE;
+}
+
+
+bool Gis_polygon::get_data_as_wkt(String *txt, const char **end) const
+{
+  uint32 n_linear_rings;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    uint32 n_points;
+    if (no_data(data, 4))
+      return 1;
+    n_points= uint4korr(data);
+    data+= 4;
+    if (not_enough_points(data, n_points) ||
+	txt->reserve(2 + ((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points))
+      return 1;
+    txt->qs_append('(');
+    data= append_points(txt, n_points, data, 0);
+    (*txt) [txt->length() - 1]= ')';		// Replace end ','
+    txt->qs_append(',');
+  }
+  txt->length(txt->length() - 1);		// Remove end ','
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_polygon::get_data_as_json(String *txt, uint max_dec_digits,
+                                   const char **end) const
+{
+  uint32 n_linear_rings;
+  const char *data= m_data;
+
+  if (no_data(data, 4) || txt->reserve(1, 512))
+    return 1;
+
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  txt->qs_append('[');
+  while (n_linear_rings--)
+  {
+    uint32 n_points;
+    if (no_data(data, 4))
+      return 1;
+    n_points= uint4korr(data);
+    data+= 4;
+    if (not_enough_points(data, n_points) ||
+	txt->reserve(4 + (MAX_DIGITS_IN_DOUBLE * 2 + 6) * n_points))
+      return 1;
+    data= append_json_points(txt, max_dec_digits, n_points, data, 0);
+    txt->qs_append(", ", 2);
+  }
+  txt->length(txt->length() - 2);// Remove ending ', '
+  txt->qs_append(']');
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_polygon::get_mbr(MBR *mbr, const char **end) const
+{
+  uint32 n_linear_rings;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    if (!(data= get_mbr_for_points(mbr, data, 0)))
+      return 1;
+  }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_polygon::area(double *ar, const char **end_of_data) const
+{
+  uint32 n_linear_rings;
+  double result= -1.0;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    double prev_x, prev_y;
+    double lr_area= 0;
+    uint32 n_points;
+
+    if (no_data(data, 4))
+      return 1;
+    n_points= uint4korr(data);
+    if (n_points == 0 ||
+        not_enough_points(data, n_points))
+      return 1;
+    get_point(&prev_x, &prev_y, data+4);
+    data+= (4+POINT_DATA_SIZE);
+
+    while (--n_points)				// One point is already read
+    {
+      double x, y;
+      get_point(&x, &y, data);
+      data+= POINT_DATA_SIZE;
+      lr_area+= (prev_x + x)* (prev_y - y);
+      prev_x= x;
+      prev_y= y;
+    }
+    lr_area= fabs(lr_area)/2;
+    if (result == -1.0)
+      result= lr_area;
+    else
+      result-= lr_area;
+  }
+  *ar= fabs(result);
+  *end_of_data= data;
+  return 0;
+}
+
+
+int Gis_polygon::exterior_ring(String *result) const
+{
+  uint32 n_points, length;
+  const char *data= m_data + 4; // skip n_linerings
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4;
+  length= n_points * POINT_DATA_SIZE;
+  if (not_enough_points(data, n_points) || result->reserve(1+4+4+ length))
+    return 1;
+
+  result->q_append((char) wkb_ndr);
+  result->q_append((uint32) wkb_linestring);
+  result->q_append(n_points);
+  result->q_append(data, n_points * POINT_DATA_SIZE); 
+  return 0;
+}
+
+
+int Gis_polygon::num_interior_ring(uint32 *n_int_rings) const
+{
+  if (no_data(m_data, 4))
+    return 1;
+  *n_int_rings= uint4korr(m_data)-1;
+  return 0;
+}
+
+
+int Gis_polygon::interior_ring_n(uint32 num, String *result) const
+{
+  const char *data= m_data;
+  uint32 n_linear_rings;
+  uint32 n_points;
+  uint32 points_size;
+
+  if (no_data(data, 4))
+    return 1;
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  if (num >= n_linear_rings || num < 1)
+    return 1;
+
+  while (num--)
+  {
+    if (no_data(data, 4))
+      return 1;
+    data+= 4 + uint4korr(data) * POINT_DATA_SIZE;
+  }
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  points_size= n_points * POINT_DATA_SIZE;
+  data+= 4;
+  if (not_enough_points(data, n_points) || result->reserve(1+4+4+ points_size))
+    return 1;
+
+  result->q_append((char) wkb_ndr);
+  result->q_append((uint32) wkb_linestring);
+  result->q_append(n_points);
+  result->q_append(data, points_size); 
+
+  return 0;
+}
+
+
+int Gis_polygon::centroid_xy(double *x, double *y) const
+{
+  uint32 n_linear_rings;
+  double UNINIT_VAR(res_area);
+  double UNINIT_VAR(res_cx), UNINIT_VAR(res_cy);
+  const char *data= m_data;
+  bool first_loop= 1;
+
+  if (no_data(data, 4) ||
+      (n_linear_rings= uint4korr(data)) == 0)
+    return 1;
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    uint32 n_points, org_n_points;
+    double prev_x, prev_y;
+    double cur_area= 0;
+    double cur_cx= 0, cur_cy= 0;
+    double sum_cx= 0, sum_cy= 0;
+
+    if (no_data(data, 4))
+      return 1;
+    org_n_points= n_points= uint4korr(data);
+    data+= 4;
+    if (n_points == 0 || not_enough_points(data, n_points))
+      return 1;
+    get_point(&prev_x, &prev_y, data);
+    data+= POINT_DATA_SIZE;
+
+    while (--n_points)				// One point is already read
+    {
+      double tmp_x, tmp_y;
+      double loc_area;
+      get_point(&tmp_x, &tmp_y, data);
+      data+= POINT_DATA_SIZE;
+      loc_area= prev_x * tmp_y - tmp_x * prev_y;
+      cur_area+= loc_area;
+      cur_cx+= tmp_x;
+      cur_cy+= tmp_y;
+      sum_cx+= (prev_x + tmp_x) * loc_area;
+      sum_cy+= (prev_y + tmp_y) * loc_area;
+
+      prev_x= tmp_x;
+      prev_y= tmp_y;
+    }
+
+    if (fabs(cur_area) > 1e-10)
+    {
+      cur_cx= sum_cx / cur_area / 3.0;
+      cur_cy= sum_cy / cur_area / 3.0;
+    }
+    else
+    {
+      cur_cx= cur_cx / (org_n_points - 1);
+      cur_cy= cur_cy / (org_n_points - 1);
+    }
+
+    cur_area= fabs(cur_area);
+
+    if (!first_loop)
+    {
+      double d_area= fabs(res_area - cur_area);
+      res_cx= (res_area * res_cx - cur_area * cur_cx) / d_area;
+      res_cy= (res_area * res_cy - cur_area * cur_cy) / d_area;
+    }
+    else
+    {
+      first_loop= 0;
+      res_area= cur_area;
+      res_cx= cur_cx;
+      res_cy= cur_cy;
+    }
+  }
+
+  *x= res_cx;
+  *y= res_cy;
+  return 0;
+}
+
+
+int Gis_polygon::centroid(String *result) const
+{
+  double x, y;
+  if (centroid_xy(&x, &y))
+    return 1;
+  return create_point(result, x, y);
+}
+
+
+int Gis_polygon::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_linear_rings;
+  const char *data= m_data;
+  double first_x, first_y;
+  double prev_x, prev_y;
+  int was_equal_first= 0;
+
+  if (trn->start_poly())
+    return 1;
+
+  if (no_data(data, 4))
+    return 1;
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    uint32 n_points;
+
+    if (no_data(data, 4))
+      return 1;
+    n_points= uint4korr(data);
+    data+= 4;
+    if (!n_points || not_enough_points(data, n_points))
+      return 1;
+
+    trn->start_ring();
+    get_point(&first_x, &first_y, data);
+    data+= POINT_DATA_SIZE;
+
+    prev_x= first_x;
+    prev_y= first_y;
+    if (trn->add_point(first_x, first_y))
+      return 1;
+
+    if (--n_points == 0)
+      goto single_point_ring;
+
+    while (--n_points)
+    {
+      double x, y;
+      get_point(&x, &y, data);
+      data+= POINT_DATA_SIZE;
+      if (x == prev_x && y == prev_y)
+        continue;
+      prev_x= x;
+      prev_y= y;
+      if (was_equal_first)
+      {
+        if (trn->add_point(first_x, first_y))
+          return 1;
+        was_equal_first= 0;
+      }
+      if (x == first_x && y == first_y)
+      {
+        was_equal_first= 1;
+        continue;
+      }
+      if (trn->add_point(x, y))
+        return 1;
+    }
+    data+= POINT_DATA_SIZE;
+
+single_point_ring:
+    trn->complete_ring();
+  }
+
+  trn->complete_poly();
+  return 0;
+}
+
+
+const Geometry::Class_info *Gis_polygon::get_class_info() const
+{
+  return &polygon_class;
+}
+
+
+/***************************** MultiPoint *******************************/
+
+uint32 Gis_multi_point::get_data_size() const 
+{
+  uint32 n_points;
+
+  if (no_data(m_data, 4) ||
+      not_enough_points(m_data+4, (n_points= uint4korr(m_data)),
+        WKB_HEADER_SIZE))
+     return GET_SIZE_ERROR;
+  return  4 + n_points * (POINT_DATA_SIZE + WKB_HEADER_SIZE);
+}
+
+
+bool Gis_multi_point::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  uint32 n_points= 0;
+  uint32 np_pos= wkb->length();
+  Gis_point p;
+
+  if (wkb->reserve(4, 512))
+    return 1;
+  wkb->length(wkb->length()+4);			// Reserve space for points
+
+  for (;;)
+  {
+    if (wkb->reserve(1 + 4, 512))
+      return 1;
+    wkb->q_append((char) wkb_ndr);
+    wkb->q_append((uint32) wkb_point);
+    if (p.init_from_wkt(trs, wkb))
+      return 1;
+    n_points++;
+    if (trs->skip_char(','))			// Didn't find ','
+      break;
+  }
+  wkb->write_at_position(np_pos, n_points);	// Store number of found points
+  return 0;
+}
+
+
+uint Gis_multi_point::init_from_opresult(String *bin,
+                                         const char *opres, uint res_len)
+{
+  uint bin_size, n_points;
+  Gis_point p;
+  const char *opres_end;
+
+  n_points= res_len/(4+8*2);
+  bin_size= n_points * (WKB_HEADER_SIZE + POINT_DATA_SIZE) + 4;
+ 
+  if (bin->reserve(bin_size, 512))
+    return 0;
+    
+  bin->q_append(n_points);
+  opres_end= opres + res_len;
+  for (; opres < opres_end; opres+= (4 + 8*2))
+  {
+    bin->q_append((char)wkb_ndr);
+    bin->q_append((uint32)wkb_point);
+    if (!p.init_from_wkb(opres + 4, POINT_DATA_SIZE, wkb_ndr, bin))
+      return 0;
+  }
+  return res_len;
+}
+
+
+uint Gis_multi_point::init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
+                                    String *res)
+{
+  uint32 n_points;
+  uint proper_size;
+  Gis_point p;
+  const char *wkb_end;
+
+  if (len < 4 ||
+      (n_points= wkb_get_uint(wkb, bo)) > max_n_points)
+    return 0;
+  proper_size= 4 + n_points * (WKB_HEADER_SIZE + POINT_DATA_SIZE);
+ 
+  if (len < proper_size || res->reserve(proper_size))
+    return 0;
+    
+  res->q_append(n_points);
+  wkb_end= wkb + proper_size;
+  for (wkb+=4; wkb < wkb_end; wkb+= (WKB_HEADER_SIZE + POINT_DATA_SIZE))
+  {
+    res->q_append((char)wkb_ndr);
+    res->q_append((uint32)wkb_point);
+    if (!p.init_from_wkb(wkb + WKB_HEADER_SIZE,
+                         POINT_DATA_SIZE, (wkbByteOrder) wkb[0], res))
+      return 0;
+  }
+  return proper_size;
+}
+
+
+bool Gis_multi_point::init_from_json(json_engine_t *je, bool er_on_3D,
+                                     String *wkb)
+{
+  uint32 n_points= 0;
+  uint32 np_pos= wkb->length();
+  Gis_point p;
+
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (wkb->reserve(4, 512))
+    return TRUE;
+  wkb->length(wkb->length()+4);	// Reserve space for n_points  
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    DBUG_ASSERT(je->state == JST_VALUE);
+
+    if (wkb->reserve(1 + 4, 512))
+      return TRUE;
+    wkb->q_append((char) wkb_ndr);
+    wkb->q_append((uint32) wkb_point);
+
+    if (p.init_from_json(je, er_on_3D, wkb))
+      return TRUE;
+    n_points++;
+  }
+
+  if (je->s.error)
+    return TRUE;
+
+  if (n_points == 0)
+  {
+    je->s.error= Geometry::GEOJ_EMPTY_COORDINATES;
+    return TRUE;
+  }
+
+  wkb->write_at_position(np_pos, n_points);
+  return FALSE;
+}
+
+
+bool Gis_multi_point::get_data_as_wkt(String *txt, const char **end) const
+{
+  uint32 n_points;
+  if (no_data(m_data, 4))
+    return 1;
+
+  n_points= uint4korr(m_data);
+  if (n_points > max_n_points ||
+      not_enough_points(m_data+4, n_points, WKB_HEADER_SIZE) ||
+      txt->reserve(((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points))
+    return 1;
+  *end= append_points(txt, n_points, m_data+4, WKB_HEADER_SIZE);
+  txt->length(txt->length()-1);			// Remove end ','
+  return 0;
+}
+
+
+bool Gis_multi_point::get_data_as_json(String *txt, uint max_dec_digits,
+                                       const char **end) const
+{
+  uint32 n_points;
+  if (no_data(m_data, 4))
+    return 1;
+
+  n_points= uint4korr(m_data);
+  if (n_points > max_n_points ||
+      not_enough_points(m_data+4, n_points, WKB_HEADER_SIZE) ||
+      txt->reserve((MAX_DIGITS_IN_DOUBLE * 2 + 6) * n_points + 2))
+    return 1;
+  *end= append_json_points(txt, max_dec_digits, n_points, m_data+4,
+                           WKB_HEADER_SIZE);
+  return 0;
+}
+
+
+bool Gis_multi_point::get_mbr(MBR *mbr, const char **end) const
+{
+  return (*end= get_mbr_for_points(mbr, m_data, WKB_HEADER_SIZE)) == 0;
+}
+
+
+int Gis_multi_point::num_geometries(uint32 *num) const
+{
+  *num= uint4korr(m_data);
+  return 0;
+}
+
+
+int Gis_multi_point::geometry_n(uint32 num, String *result) const
+{
+  const char *data= m_data;
+  uint32 n_points;
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4+ (num - 1) * (WKB_HEADER_SIZE + POINT_DATA_SIZE);
+
+  if (num > n_points || num < 1 ||
+      no_data(data, WKB_HEADER_SIZE + POINT_DATA_SIZE) ||
+      result->reserve(WKB_HEADER_SIZE + POINT_DATA_SIZE))
+    return 1;
+
+  result->q_append(data, WKB_HEADER_SIZE + POINT_DATA_SIZE);
+  return 0;
+}
+
+
+int Gis_multi_point::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_points;
+  Gis_point pt;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4;
+
+  if (trn->start_collection(n_points))
+    return 1;
+
+  while (n_points--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    data+= WKB_HEADER_SIZE;
+    pt.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (pt.store_shapes(trn))
+      return 1;
+    data+= pt.get_data_size();
+  }
+  return 0;
+}
+
+
+const Geometry::Class_info *Gis_multi_point::get_class_info() const
+{
+  return &multipoint_class;
+}
+
+
+/**
+  Function that calculate spherical distance of Multipoints geometries.
+  In case there is single point in Multipoint geometries calculate_haversine()
+  can handle such case. Otherwise, new geometry (Point) has to be constructed.
+
+  @param    g pointer to the Geometry
+  @param    r sphere radius
+  @param    result pointer to the result
+  @param    err    pointer to the error obtained from calculate_haversin()
+
+  @return state
+  @retval TRUE  failed
+  @retval FALSE success
+*/
+int Gis_multi_point::spherical_distance_multipoints(Geometry *g, const double r,
+                                                    double *result, int *err)
+{
+  const uint32 len= 4 + WKB_HEADER_SIZE + POINT_DATA_SIZE + 1;
+  // Check how many points are stored in Multipoints
+  uint32 num_of_points1, num_of_points2;
+  // To find the minimum radius it cannot be greater than Earth radius
+  double res= 6370986.0;
+
+  /* From Item_func_sphere_distance::spherical_distance_points,
+     we are sure that there will be multiple points and we have to construct
+     Point geometry and return the smallest result.
+  */
+  num_geometries(&num_of_points1);
+  DBUG_ASSERT(num_of_points1 >= 1);
+  g->num_geometries(&num_of_points2);
+  DBUG_ASSERT(num_of_points2 >= 1);
+
+  for (uint32 i=1; i <= num_of_points1; i++)
+  {
+    Geometry_buffer buff_temp;
+    Geometry *temp;
+    double temp_res= 0.0;
+    char s[len];
+    const char *pt_ptr= get_data_ptr()+
+      4+WKB_HEADER_SIZE*i + POINT_DATA_SIZE*(i-1);
+    // First 4 bytes are handled already, make sure to create a Point
+    memset(s + 4, Geometry::wkb_point, 1);
+    if (no_data(pt_ptr, POINT_DATA_SIZE))
+      return 1;
+    memcpy(s + 5, this->get_data_ptr() + 5, 4);
+    memcpy(s + 4 + WKB_HEADER_SIZE, pt_ptr, POINT_DATA_SIZE);
+    s[len-1]= '\0';
+    temp= Geometry::construct(&buff_temp, s, len);
+    if (!temp)
+      return 1;
+    // Optimization for single Multipoint
+    if (num_of_points2 == 1)
+    {
+      *result= static_cast(temp)->calculate_haversine(g, r, err);
+      return 0;
+    }
+    for (uint32 j=1; j<= num_of_points2; j++)
+    {
+      Geometry_buffer buff_temp2;
+      Geometry *temp2;
+      char s2[len];
+      const char *pt_ptr= g->get_data_ptr()+
+        4+WKB_HEADER_SIZE*j + POINT_DATA_SIZE*(j-1);
+      // First 4 bytes are handled already, make sure to create a Point
+      memset(s2 + 4, Geometry::wkb_point, 1);
+      if (g->no_data(pt_ptr, POINT_DATA_SIZE))
+        return 1;
+      memcpy(s2 + 5, g->get_data_ptr() + 5, 4);
+      memcpy(s2 + 4 + WKB_HEADER_SIZE, pt_ptr, POINT_DATA_SIZE);
+      s2[len-1]= '\0';
+      temp2= Geometry::construct(&buff_temp2, s2, len);
+      if (!temp2)
+        return 1;
+      temp_res= static_cast(temp)->calculate_haversine(temp2, r, err);
+      if (res > temp_res)
+        res= temp_res;
+    }
+  }
+  *result= res;
+  return 0;
+}
+
+
+/***************************** MultiLineString *******************************/
+
+uint32 Gis_multi_line_string::get_data_size() const 
+{
+  uint32 n_line_strings;
+  uint32 n_points;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return GET_SIZE_ERROR;
+  n_line_strings= uint4korr(data);
+  data+= 4;
+
+  while (n_line_strings--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE + 4) ||
+        not_enough_points(data + WKB_HEADER_SIZE+4,
+                          (n_points= uint4korr(data + WKB_HEADER_SIZE))))
+      return GET_SIZE_ERROR;
+    data+= (WKB_HEADER_SIZE + 4 + n_points*POINT_DATA_SIZE);
+  }
+  if (no_data(data, 0))
+    return GET_SIZE_ERROR;
+  return (uint32) (data - m_data);
+}
+
+
+bool Gis_multi_line_string::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  uint32 n_line_strings= 0;
+  uint32 ls_pos= wkb->length();
+
+  if (wkb->reserve(4, 512))
+    return 1;
+  wkb->length(wkb->length()+4);			// Reserve space for points
+  
+  for (;;)
+  {
+    Gis_line_string ls;
+
+    if (wkb->reserve(1 + 4, 512))
+      return 1;
+    wkb->q_append((char) wkb_ndr); wkb->q_append((uint32) wkb_linestring);
+
+    if (trs->check_next_symbol('(') ||
+	ls.init_from_wkt(trs, wkb) ||
+	trs->check_next_symbol(')'))
+      return 1;
+    n_line_strings++;
+    if (trs->skip_char(','))			// Didn't find ','
+      break;
+  }
+  wkb->write_at_position(ls_pos, n_line_strings);
+  return 0;
+}
+
+
+uint Gis_multi_line_string::init_from_opresult(String *bin,
+                                               const char *opres, uint res_len)
+{
+  const char *opres_orig= opres;
+  int ns_pos= bin->length();
+  uint n_linestring= 0;
+
+  if (bin->reserve(4, 512))
+    return 0;
+  bin->q_append(n_linestring);
+  
+  while (res_len)
+  {
+    Gis_line_string ls;
+    int ls_len;
+
+    if (bin->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+
+    bin->q_append((char) wkb_ndr);
+    bin->q_append((uint32) wkb_linestring);
+
+    if (!(ls_len= ls.init_from_opresult(bin, opres, res_len)))
+      return 0;
+    opres+= ls_len;
+    res_len-= ls_len;
+    n_linestring++;
+  }
+  bin->write_at_position(ns_pos, n_linestring);
+  return (uint) (opres - opres_orig);
+}
+
+
+uint Gis_multi_line_string::init_from_wkb(const char *wkb, uint len,
+                                          wkbByteOrder bo, String *res)
+{
+  uint32 n_line_strings;
+  const char *wkb_orig= wkb;
+
+  if (len < 4 ||
+      (n_line_strings= wkb_get_uint(wkb, bo))< 1)
+    return 0;
+
+  if (res->reserve(4, 512))
+    return 0;
+  res->q_append(n_line_strings);
+  
+  wkb+= 4;
+  while (n_line_strings--)
+  {
+    Gis_line_string ls;
+    int ls_len;
+
+    if ((len < WKB_HEADER_SIZE) ||
+        res->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+
+    res->q_append((char) wkb_ndr);
+    res->q_append((uint32) wkb_linestring);
+
+    if (!(ls_len= ls.init_from_wkb(wkb + WKB_HEADER_SIZE, len,
+                                   (wkbByteOrder) wkb[0], res)))
+      return 0;
+    ls_len+= WKB_HEADER_SIZE;;
+    wkb+= ls_len;
+    len-= ls_len;
+  }
+  return (uint) (wkb - wkb_orig);
+}
+
+
+bool Gis_multi_line_string::init_from_json(json_engine_t *je, bool er_on_3D,
+                                           String *wkb)
+{
+  uint32 n_line_strings= 0;
+  uint32 ls_pos= wkb->length();
+
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (wkb->reserve(4, 512))
+    return TRUE;
+  wkb->length(wkb->length()+4);	// Reserve space for n_rings
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    Gis_line_string ls;
+    DBUG_ASSERT(je->state == JST_VALUE);
+
+    if (wkb->reserve(1 + 4, 512))
+      return TRUE;
+    wkb->q_append((char) wkb_ndr);
+    wkb->q_append((uint32) wkb_linestring);
+
+    if (ls.init_from_json(je, er_on_3D, wkb))
+      return TRUE;
+
+    n_line_strings++;
+  }
+
+  if (je->s.error)
+    return TRUE;
+
+  if (n_line_strings == 0)
+  {
+    je->s.error= Geometry::GEOJ_EMPTY_COORDINATES;
+    return TRUE;
+  }
+
+  wkb->write_at_position(ls_pos, n_line_strings);
+  return FALSE;
+}
+
+
+bool Gis_multi_line_string::get_data_as_wkt(String *txt, 
+					     const char **end) const
+{
+  uint32 n_line_strings;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_line_strings= uint4korr(data);
+  data+= 4;
+
+  while (n_line_strings--)
+  {
+    uint32 n_points;
+    if (no_data(data, (WKB_HEADER_SIZE + 4)))
+      return 1;
+    n_points= uint4korr(data + WKB_HEADER_SIZE);
+    data+= WKB_HEADER_SIZE + 4;
+    if (not_enough_points(data, n_points) ||
+	txt->reserve(2 + ((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points))
+      return 1;
+    txt->qs_append('(');
+    data= append_points(txt, n_points, data, 0);
+    (*txt) [txt->length() - 1]= ')';
+    txt->qs_append(',');
+  }
+  txt->length(txt->length() - 1);
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_multi_line_string::get_data_as_json(String *txt, uint max_dec_digits,
+                                             const char **end) const
+{
+  uint32 n_line_strings;
+  const char *data= m_data;
+
+  if (no_data(data, 4) || txt->reserve(1, 512))
+    return 1;
+  n_line_strings= uint4korr(data);
+  data+= 4;
+
+  txt->qs_append('[');
+  while (n_line_strings--)
+  {
+    uint32 n_points;
+    if (no_data(data, (WKB_HEADER_SIZE + 4)))
+      return 1;
+    n_points= uint4korr(data + WKB_HEADER_SIZE);
+    data+= WKB_HEADER_SIZE + 4;
+    if (not_enough_points(data, n_points) ||
+	txt->reserve(2 + (MAX_DIGITS_IN_DOUBLE * 2 + 6) * n_points))
+      return 1;
+    data= append_json_points(txt, max_dec_digits, n_points, data, 0);
+    txt->qs_append(", ", 2);
+  }
+  txt->length(txt->length() - 2);
+  txt->qs_append(']');
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_multi_line_string::get_mbr(MBR *mbr, const char **end) const
+{
+  uint32 n_line_strings;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_line_strings= uint4korr(data);
+  data+= 4;
+
+  while (n_line_strings--)
+  {
+    data+= WKB_HEADER_SIZE;
+    if (!(data= get_mbr_for_points(mbr, data, 0)))
+      return 1;
+  }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_multi_line_string::num_geometries(uint32 *num) const
+{
+  *num= uint4korr(m_data);
+  return 0;
+}
+
+
+int Gis_multi_line_string::geometry_n(uint32 num, String *result) const
+{
+  uint32 n_line_strings, n_points, length;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_line_strings= uint4korr(data);
+  data+= 4;
+
+  if ((num > n_line_strings) || (num < 1))
+    return 1;
+ 
+  for (;;)
+  {
+    if (no_data(data, WKB_HEADER_SIZE + 4))
+      return 1;
+    n_points= uint4korr(data + WKB_HEADER_SIZE);
+    length= WKB_HEADER_SIZE + 4+ POINT_DATA_SIZE * n_points;
+    if (not_enough_points(data+WKB_HEADER_SIZE+4, n_points))
+      return 1;
+    if (!--num)
+      break;
+    data+= length;
+  }
+  return result->append(data, length, (uint32) 0);
+}
+
+
+int Gis_multi_line_string::geom_length(double *len, const char **end) const
+{
+  uint32 n_line_strings;
+  const char *data= m_data;
+  const char *line_end;
+
+  if (no_data(data, 4))
+    return 1;
+  n_line_strings= uint4korr(data);
+  data+= 4;
+
+  *len=0;
+  while (n_line_strings--)
+  {
+    double ls_len;
+    Gis_line_string ls;
+    data+= WKB_HEADER_SIZE;
+    ls.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (ls.geom_length(&ls_len, &line_end))
+      return 1;
+    *len+= ls_len;
+    /*
+      We know here that ls was ok, so we can call the trivial function
+      Gis_line_string::get_data_size without error checking
+    */
+    data+= ls.get_data_size();
+  }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_multi_line_string::is_closed(int *closed) const
+{
+  uint32 n_line_strings;
+  const char *data= m_data;
+
+  if (no_data(data, 4 + WKB_HEADER_SIZE))
+    return 1;
+  n_line_strings= uint4korr(data);
+  data+= 4 + WKB_HEADER_SIZE;
+
+  while (n_line_strings--)
+  {
+    Gis_line_string ls;
+    if (no_data(data, 0))
+      return 1;
+    ls.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (ls.is_closed(closed))
+      return 1;
+    if (!*closed)
+      return 0;
+    /*
+      We know here that ls was ok, so we can call the trivial function
+      Gis_line_string::get_data_size without error checking
+    */
+    data+= ls.get_data_size() + WKB_HEADER_SIZE;
+  }
+  return 0;
+}
+
+
+int Gis_multi_line_string::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_lines;
+  Gis_line_string ls;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_lines= uint4korr(data);
+  data+= 4;
+
+  if (trn->start_collection(n_lines))
+    return 1;
+
+  while (n_lines--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    data+= WKB_HEADER_SIZE;
+    ls.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (ls.store_shapes(trn))
+      return 1;
+    data+= ls.get_data_size();
+  }
+  return 0;
+}
+
+
+const Geometry::Class_info *Gis_multi_line_string::get_class_info() const
+{
+  return &multilinestring_class;
+}
+
+
+/***************************** MultiPolygon *******************************/
+
+uint32 Gis_multi_polygon::get_data_size() const 
+{
+  uint32 n_polygons;
+  uint32 n_points;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return GET_SIZE_ERROR;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  while (n_polygons--)
+  {
+    uint32 n_linear_rings;
+    if (no_data(data, 4 + WKB_HEADER_SIZE))
+      return GET_SIZE_ERROR;
+
+    n_linear_rings= uint4korr(data + WKB_HEADER_SIZE);
+    data+= 4 + WKB_HEADER_SIZE;
+
+    while (n_linear_rings--)
+    {
+      if (no_data(data, 4) ||
+          not_enough_points(data+4, (n_points= uint4korr(data))))
+	return GET_SIZE_ERROR;
+      data+= 4 + n_points * POINT_DATA_SIZE;
+    }
+  }
+  if (no_data(data, 0))
+    return GET_SIZE_ERROR;
+  return (uint32) (data - m_data);
+}
+
+
+bool Gis_multi_polygon::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  uint32 n_polygons= 0;
+  int np_pos= wkb->length();
+  Gis_polygon p;
+
+  if (wkb->reserve(4, 512))
+    return 1;
+  wkb->length(wkb->length()+4);			// Reserve space for points
+
+  for (;;)  
+  {
+    if (wkb->reserve(1 + 4, 512))
+      return 1;
+    wkb->q_append((char) wkb_ndr);
+    wkb->q_append((uint32) wkb_polygon);
+
+    if (trs->check_next_symbol('(') ||
+	p.init_from_wkt(trs, wkb) ||
+	trs->check_next_symbol(')'))
+      return 1;
+    n_polygons++;
+    if (trs->skip_char(','))			// Didn't find ','
+      break;
+  }
+  wkb->write_at_position(np_pos, n_polygons);
+  return 0;
+}
+
+
+uint Gis_multi_polygon::init_from_wkb(const char *wkb, uint len,
+                                      wkbByteOrder bo, String *res)
+{
+  uint32 n_poly;
+  const char *wkb_orig= wkb;
+
+  if (len < 4)
+    return 0;
+  n_poly= wkb_get_uint(wkb, bo);
+
+  if (res->reserve(4, 512))
+    return 0;
+  res->q_append(n_poly);
+  
+  wkb+=4;
+  while (n_poly--)
+  {
+    Gis_polygon p;
+    int p_len;
+
+    if (len < WKB_HEADER_SIZE ||
+        res->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+    res->q_append((char) wkb_ndr);
+    res->q_append((uint32) wkb_polygon);
+
+    if (!(p_len= p.init_from_wkb(wkb + WKB_HEADER_SIZE, len,
+                                 (wkbByteOrder) wkb[0], res)))
+      return 0;
+    p_len+= WKB_HEADER_SIZE;
+    wkb+= p_len;
+    len-= p_len;
+  }
+  return (uint) (wkb - wkb_orig);
+}
+
+
+uint Gis_multi_polygon::init_from_opresult(String *bin,
+                                           const char *opres, uint res_len)
+{
+  Gis_polygon p;
+  const char *opres_orig= opres;
+  uint p_len;
+  uint32 n_poly= 0;
+  uint32 np_pos= bin->length();
+
+  if (bin->reserve(4, 512))
+    return 0;
+    
+  bin->q_append(n_poly);
+  while (res_len)
+  {
+    if (bin->reserve(1 + 4, 512))
+      return 0;
+    bin->q_append((char)wkb_ndr);
+    bin->q_append((uint32)wkb_polygon);
+    if (!(p_len= p.init_from_opresult(bin, opres, res_len)))
+      return 0;
+    opres+= p_len;
+    res_len-= p_len;
+    n_poly++;
+  }
+  bin->write_at_position(np_pos, n_poly);
+  return (uint)(opres - opres_orig);
+}
+
+
+bool Gis_multi_polygon::init_from_json(json_engine_t *je, bool er_on_3D,
+                                       String *wkb)
+{
+  uint32 n_polygons= 0;
+  int np_pos= wkb->length();
+  Gis_polygon p;
+
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (wkb->reserve(4, 512))
+    return TRUE;
+  wkb->length(wkb->length()+4);	// Reserve space for n_rings
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    DBUG_ASSERT(je->state == JST_VALUE);
+
+    if (wkb->reserve(1 + 4, 512))
+      return TRUE;
+    wkb->q_append((char) wkb_ndr);
+    wkb->q_append((uint32) wkb_polygon);
+
+    if (p.init_from_json(je, er_on_3D, wkb))
+      return TRUE;
+
+    n_polygons++;
+  }
+
+  if (je->s.error)
+    return TRUE;
+
+  if (n_polygons == 0)
+  {
+    je->s.error= Geometry::GEOJ_EMPTY_COORDINATES;
+    return TRUE;
+  }
+  wkb->write_at_position(np_pos, n_polygons);
+  return FALSE;
+}
+
+
+bool Gis_multi_polygon::get_data_as_wkt(String *txt, const char **end) const
+{
+  uint32 n_polygons;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  while (n_polygons--)
+  {
+    uint32 n_linear_rings;
+    if (no_data(data, 4 + WKB_HEADER_SIZE) ||
+	txt->reserve(1, 512))
+      return 1;
+    n_linear_rings= uint4korr(data+WKB_HEADER_SIZE);
+    data+= 4 + WKB_HEADER_SIZE;
+    txt->q_append('(');
+
+    while (n_linear_rings--)
+    {
+      if (no_data(data, 4))
+        return 1;
+      uint32 n_points= uint4korr(data);
+      data+= 4;
+      if (not_enough_points(data, n_points) ||
+	  txt->reserve(2 + ((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points,
+		       512))
+	return 1;
+      txt->qs_append('(');
+      data= append_points(txt, n_points, data, 0);
+      (*txt) [txt->length() - 1]= ')';
+      txt->qs_append(',');
+    }
+    (*txt) [txt->length() - 1]= ')';
+    txt->qs_append(',');
+  }
+  txt->length(txt->length() - 1);
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_multi_polygon::get_data_as_json(String *txt, uint max_dec_digits,
+                                         const char **end) const
+{
+  uint32 n_polygons;
+  const char *data= m_data;
+
+  if (no_data(data, 4) || txt->reserve(1, 512))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  txt->q_append('[');
+  while (n_polygons--)
+  {
+    uint32 n_linear_rings;
+    if (no_data(data, 4 + WKB_HEADER_SIZE) ||
+	txt->reserve(1, 512))
+      return 1;
+    n_linear_rings= uint4korr(data+WKB_HEADER_SIZE);
+    data+= 4 + WKB_HEADER_SIZE;
+    txt->q_append('[');
+
+    while (n_linear_rings--)
+    {
+      if (no_data(data, 4))
+        return 1;
+      uint32 n_points= uint4korr(data);
+      data+= 4;
+      if (not_enough_points(data, n_points) ||
+	  txt->reserve(2 + (MAX_DIGITS_IN_DOUBLE * 2 + 6) * n_points,
+		       512))
+	return 1;
+      data= append_json_points(txt, max_dec_digits, n_points, data, 0);
+      txt->qs_append(", ", 2);
+    }
+    txt->length(txt->length() - 2);
+    txt->qs_append("], ", 3);
+  }
+  txt->length(txt->length() - 2);
+  txt->q_append(']');
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_multi_polygon::get_mbr(MBR *mbr, const char **end) const
+{
+  uint32 n_polygons;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  while (n_polygons--)
+  {
+    uint32 n_linear_rings;
+    if (no_data(data, 4+WKB_HEADER_SIZE))
+      return 1;
+    n_linear_rings= uint4korr(data + WKB_HEADER_SIZE);
+    data+= WKB_HEADER_SIZE + 4;
+
+    while (n_linear_rings--)
+    {
+      if (!(data= get_mbr_for_points(mbr, data, 0)))
+	return 1;
+    }
+  }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_multi_polygon::num_geometries(uint32 *num) const
+{
+  *num= uint4korr(m_data);
+  return 0;
+}
+
+
+int Gis_multi_polygon::geometry_n(uint32 num, String *result) const
+{
+  uint32 n_polygons;
+  const char *data= m_data, *start_of_polygon;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  if (num > n_polygons || num < 1)
+    return -1;
+
+  do
+  {
+    uint32 n_linear_rings;
+    start_of_polygon= data;
+
+    if (no_data(data, WKB_HEADER_SIZE + 4))
+      return 1;
+    n_linear_rings= uint4korr(data + WKB_HEADER_SIZE);
+    data+= WKB_HEADER_SIZE + 4;
+
+    while (n_linear_rings--)
+    {
+      uint32 n_points;
+      if (no_data(data, 4))
+	return 1;
+      n_points= uint4korr(data);
+      if (not_enough_points(data + 4, n_points))
+        return 1;
+      data+= 4 + POINT_DATA_SIZE * n_points;
+    }
+  } while (--num);
+  if (no_data(data, 0))				// We must check last segment
+    return 1;
+  return result->append(start_of_polygon, (uint32) (data - start_of_polygon),
+			(uint32) 0);
+}
+
+
+int Gis_multi_polygon::area(double *ar,  const char **end_of_data) const
+{
+  uint32 n_polygons;
+  const char *data= m_data;
+  double result= 0;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  while (n_polygons--)
+  {
+    double p_area;
+    Gis_polygon p;
+
+    data+= WKB_HEADER_SIZE;
+    p.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (p.area(&p_area, &data))
+      return 1;
+    result+= p_area;
+  }
+  *ar= result;
+  *end_of_data= data;
+  return 0;
+}
+
+
+int Gis_multi_polygon::centroid(String *result) const
+{
+  uint32 n_polygons;
+  Gis_polygon p;
+  double res_area= 0.0, res_cx= 0.0, res_cy= 0.0;
+  double cur_area, cur_cx, cur_cy;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  while (n_polygons--)
+  {
+    data+= WKB_HEADER_SIZE;
+    p.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (p.area(&cur_area, &data) ||
+	p.centroid_xy(&cur_cx, &cur_cy))
+      return 1;
+
+    res_area+= cur_area;
+    res_cx+= cur_area * cur_cx;
+    res_cy+= cur_area * cur_cy;
+  }
+   
+  res_cx/= res_area;
+  res_cy/= res_area;
+
+  return create_point(result, res_cx, res_cy);
+}
+
+
+int Gis_multi_polygon::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_polygons;
+  Gis_polygon p;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  if (trn->start_collection(n_polygons))
+    return 1;
+
+  while (n_polygons--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    data+= WKB_HEADER_SIZE;
+    p.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (p.store_shapes(trn))
+      return 1;
+    data+= p.get_data_size();
+  }
+  return 0;
+}
+
+
+const Geometry::Class_info *Gis_multi_polygon::get_class_info() const
+{
+  return &multipolygon_class;
+}
+
+
+/************************* GeometryCollection ****************************/
+
+uint32 Gis_geometry_collection::get_data_size() const 
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+
+  if (no_data(data, 4))
+    return GET_SIZE_ERROR;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  while (n_objects--)
+  {
+    uint32 wkb_type,object_size;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return GET_SIZE_ERROR;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return GET_SIZE_ERROR;
+    geom->set_data_ptr(data, (uint) (m_data_end - data));
+    if ((object_size= geom->get_data_size()) == GET_SIZE_ERROR)
+      return GET_SIZE_ERROR;
+    data+= object_size;
+  }
+  return (uint32) (data - m_data);
+}
+
+
+bool Gis_geometry_collection::init_from_wkt(Gis_read_stream *trs, String *wkb)
+{
+  uint32 n_objects= 0;
+  uint32 no_pos= wkb->length();
+  Geometry_buffer buffer;
+  Geometry *g;
+  char next_sym;
+
+  if (wkb->reserve(4, 512))
+    return 1;
+  wkb->length(wkb->length()+4);			// Reserve space for points
+
+  if (!(next_sym= trs->next_symbol()))
+    return 1;
+
+  if (next_sym != ')')
+  {
+    LEX_STRING next_word;
+    if (trs->lookup_next_word(&next_word))
+      return 1;
+
+    if (next_word.length != 5 ||
+	(my_charset_latin1.strnncoll("empty", 5, next_word.str, 5) != 0))
+    {
+      for (;;)
+      {
+        if (!(g= create_from_wkt(&buffer, trs, wkb)))
+          return 1;
+
+        if (g->get_class_info()->m_type_id == wkb_geometrycollection)
+        {
+          trs->set_error_msg("Unexpected GEOMETRYCOLLECTION");
+          return 1;
+        }
+        n_objects++;
+        if (trs->skip_char(','))			// Didn't find ','
+          break;
+      }
+    }
+  }
+
+  wkb->write_at_position(no_pos, n_objects);
+  return 0;
+}
+
+
+uint Gis_geometry_collection::init_from_opresult(String *bin,
+                                                 const char *opres,
+                                                 uint res_len)
+{
+  const char *opres_orig= opres;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  int g_len;
+  uint32 wkb_type;
+  int no_pos= bin->length();
+  uint32 n_objects= 0;
+
+  if (bin->reserve(4, 512))
+    return 0;
+  bin->q_append(n_objects);
+  
+  if (res_len == 0)
+  {
+    /* Special case of GEOMETRYCOLLECTION EMPTY. */
+    opres+= 1;
+    goto empty_geom;
+  }
+  
+  while (res_len)
+  {
+    switch ((Gcalc_function::shape_type) uint4korr(opres))
+    {
+      case Gcalc_function::shape_point:   wkb_type= wkb_point; break;
+      case Gcalc_function::shape_line:    wkb_type= wkb_linestring; break;
+      case Gcalc_function::shape_polygon: wkb_type= wkb_polygon; break;
+      default: wkb_type= 0; DBUG_ASSERT(FALSE);
+    };
+
+    if (bin->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+
+    bin->q_append((char) wkb_ndr);
+    bin->q_append(wkb_type);
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)) ||
+        !(g_len= geom->init_from_opresult(bin, opres, res_len)))
+      return 0;
+    opres+= g_len;
+    res_len-= g_len;
+    n_objects++;
+  }
+empty_geom:
+  bin->write_at_position(no_pos, n_objects);
+  return (uint) (opres - opres_orig);
+}
+
+
+uint Gis_geometry_collection::init_from_wkb(const char *wkb, uint len,
+                                            wkbByteOrder bo, String *res)
+{
+  uint32 n_geom;
+  const char *wkb_orig= wkb;
+
+  if (len < 4)
+    return 0;
+  n_geom= wkb_get_uint(wkb, bo);
+
+  if (res->reserve(4, 512))
+    return 0;
+  res->q_append(n_geom);
+  
+  wkb+= 4;
+  while (n_geom--)
+  {
+    Geometry_buffer buffer;
+    Geometry *geom;
+    int g_len;
+    uint32 wkb_type;
+
+    if (len < WKB_HEADER_SIZE ||
+        res->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+
+    res->q_append((char) wkb_ndr);
+    wkb_type= wkb_get_uint(wkb+1, (wkbByteOrder) wkb[0]);
+    res->q_append(wkb_type);
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)) ||
+        !(g_len= geom->init_from_wkb(wkb + WKB_HEADER_SIZE, len,
+                                     (wkbByteOrder)  wkb[0], res)))
+      return 0;
+    g_len+= WKB_HEADER_SIZE;
+    wkb+= g_len;
+    len-= g_len;
+  }
+  return (uint) (wkb - wkb_orig);
+}
+
+
+bool Gis_geometry_collection::init_from_json(json_engine_t *je, bool er_on_3D,
+                                             String *wkb)
+{
+  uint32 n_objects= 0;
+  uint32 no_pos= wkb->length();
+  Geometry_buffer buffer;
+  Geometry *g;
+
+  if (json_read_value(je))
+    return TRUE;
+
+  if (je->value_type != JSON_VALUE_ARRAY)
+  {
+    je->s.error= GEOJ_INCORRECT_GEOJSON;
+    return TRUE;
+  }
+
+  if (wkb->reserve(4, 512))
+    return TRUE;
+  wkb->length(wkb->length()+4);	// Reserve space for n_objects
+
+  while (json_scan_next(je) == 0 && je->state != JST_ARRAY_END)
+  {
+    json_engine_t sav_je= *je;
+
+    DBUG_ASSERT(je->state == JST_VALUE);
+
+    if (!(g= create_from_json(&buffer, je, er_on_3D, wkb)))
+      return TRUE;
+
+    *je= sav_je;
+    if (json_skip_array_item(je))
+      return TRUE;
+
+    n_objects++;
+  }
+
+  wkb->write_at_position(no_pos, n_objects);
+  return FALSE;
+}
+
+
+bool Gis_geometry_collection::get_data_as_wkt(String *txt,
+					     const char **end) const
+{
+  uint32 n_objects;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  if (n_objects == 0)
+  {
+    txt->append(STRING_WITH_LEN(" EMPTY"), 512);
+    goto exit;
+  }
+
+  txt->qs_append('(');
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint) (m_data_end - data));
+    if (geom->as_wkt(txt, &data))
+      return 1;
+    if (n_objects && txt->append(STRING_WITH_LEN(","), 512))
+      return 1;
+  }
+  txt->qs_append(')');
+exit:
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_geometry_collection::get_data_as_json(String *txt, uint max_dec_digits,
+                                               const char **end) const
+{
+  uint32 n_objects;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  const char *data= m_data;
+
+  if (no_data(data, 4) || txt->reserve(1, 512))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  txt->qs_append('[');
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint) (m_data_end - data));
+    if (txt->append('{') ||
+        geom->as_json(txt, max_dec_digits, &data) ||
+        txt->append(STRING_WITH_LEN("}, "), 512))
+      return 1;
+  }
+  txt->length(txt->length() - 2);
+  if (txt->append(']'))
+    return 1;
+
+  *end= data;
+  return 0;
+}
+
+
+bool Gis_geometry_collection::get_mbr(MBR *mbr, const char **end) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+  if (n_objects == 0)
+    goto exit;
+
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->get_mbr(mbr, &data))
+      return 1;
+  }
+exit:
+  *end= data;
+  return 0;
+}
+
+
+int Gis_geometry_collection::area(double *ar,  const char **end) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  double result;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  result= 0.0;
+  if (n_objects == 0)
+    goto exit;
+
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->area(ar, &data))
+      return 1;
+    result+= *ar;
+  }
+exit:
+  *end= data;
+  *ar= result;
+  return 0;
+}
+
+
+int Gis_geometry_collection::geom_length(double *len, const char **end) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  double result;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+  result= 0.0;
+
+  if (n_objects == 0)
+    goto exit;
+
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->geom_length(len, &data))
+      return 1;
+    result+= *len;
+  }
+
+exit:
+  *end= data;
+  *len= result;
+  return 0;
+}
+
+
+int Gis_geometry_collection::num_geometries(uint32 *num) const
+{
+  if (no_data(m_data, 4))
+    return 1;
+  *num= uint4korr(m_data);
+  return 0;
+}
+
+
+int Gis_geometry_collection::geometry_n(uint32 num, String *result) const
+{
+  uint32 n_objects, wkb_type, length;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+  if (num > n_objects || num < 1)
+    return 1;
+
+  do
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint) (m_data_end - data));
+    if ((length= geom->get_data_size()) == GET_SIZE_ERROR)
+      return 1;
+    data+= length;
+  } while (--num);
+
+  /* Copy found object to result */
+  if (result->reserve(1 + 4 + length))
+    return 1;
+  result->q_append((char) wkb_ndr);
+  result->q_append((uint32) wkb_type);
+  result->q_append(data-length, length);	// data-length = start_of_data
+  return 0;
+}
+
+
+/*
+  Return dimension for object
+
+  SYNOPSIS
+    dimension()
+    res_dim		Result dimension
+    end			End of object will be stored here. May be 0 for
+			simple objects!
+  RETURN
+    0	ok
+    1	error
+*/
+
+bool Gis_geometry_collection::dimension(uint32 *res_dim, const char **end) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  *res_dim= 0;
+  while (n_objects--)
+  {
+    uint32 wkb_type, length, dim;
+    const char *end_data;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->dimension(&dim, &end_data))
+      return 1;
+    set_if_bigger(*res_dim, dim);
+    if (end_data)				// Complex object
+      data= end_data;
+    else if ((length= geom->get_data_size()) == GET_SIZE_ERROR)
+      return 1;
+    else
+      data+= length;
+  }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_geometry_collection::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  if (!n_objects)
+  {
+    trn->empty_shape();
+    return 0;
+  }
+
+  if (trn->start_collection(n_objects))
+    return 1;
+
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->store_shapes(trn))
+      return 1;
+
+    data+= geom->get_data_size();
+  }
+  return 0;
+}
+
+
+const Geometry::Class_info *Gis_geometry_collection::get_class_info() const
+{
+  return &geometrycollection_class;
+}
+
+#endif /*HAVE_SPATIAL*/
diff --git a/sql/spatial.h b/sql/spatial.h
new file mode 100644
index 00000000..d85bdc86
--- /dev/null
+++ b/sql/spatial.h
@@ -0,0 +1,655 @@
+/*
+   Copyright (c) 2002, 2013, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2013, Monty Program Ab.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef _spatial_h
+#define _spatial_h
+
+#include "sql_string.h"                         /* String, LEX_STRING */
+#include 
+#include 
+
+#ifdef HAVE_SPATIAL
+
+class Gis_read_stream;
+
+#include "gcalc_tools.h"
+
+const uint SRID_SIZE= 4;
+const uint SIZEOF_STORED_DOUBLE= 8;
+const uint POINT_DATA_SIZE= (SIZEOF_STORED_DOUBLE * 2); 
+const uint WKB_HEADER_SIZE= 1+4;
+const uint32 GET_SIZE_ERROR= ((uint32) -1);
+
+struct st_point_2d
+{
+  double x;
+  double y;
+};
+
+struct st_linear_ring
+{
+  uint32 n_points;
+  st_point_2d points;
+};
+
+/***************************** MBR *******************************/
+
+
+/*
+  It's ok that a lot of the functions are inline as these are only used once
+  in MySQL
+*/
+
+struct MBR
+{
+  double xmin, ymin, xmax, ymax;
+
+  MBR()
+  {
+    xmin= ymin= DBL_MAX;
+    xmax= ymax= -DBL_MAX;
+  }
+
+  MBR(const double xmin_arg, const double ymin_arg,
+      const double xmax_arg, const double ymax_arg)
+    :xmin(xmin_arg), ymin(ymin_arg), xmax(xmax_arg), ymax(ymax_arg)
+  {}
+
+  MBR(const st_point_2d &min, const st_point_2d &max)
+    :xmin(min.x), ymin(min.y), xmax(max.x), ymax(max.y)
+  {}
+
+  MBR(const MBR &mbr1, const MBR &mbr2)
+    :xmin(mbr1.xmin), ymin(mbr1.ymin), xmax(mbr1.xmax), ymax(mbr1.ymax)
+  { add_mbr(&mbr2); }
+
+  inline void add_xy(double x, double y)
+  {
+    /* Not using "else" for proper one point MBR calculation */
+    if (x < xmin)
+      xmin= x;
+    if (x > xmax)
+      xmax= x;
+    if (y < ymin)
+      ymin= y;
+    if (y > ymax)
+      ymax= y;
+  }
+  void add_xy(const char *px, const char *py)
+  {
+    double x, y;
+    float8get(x, px);
+    float8get(y, py);
+    add_xy(x,y);
+  }
+  void add_mbr(const MBR *mbr)
+  {
+    if (mbr->xmin < xmin)
+      xmin= mbr->xmin;
+    if (mbr->xmax > xmax)
+      xmax= mbr->xmax;
+    if (mbr->ymin < ymin)
+      ymin= mbr->ymin;
+    if (mbr->ymax > ymax)
+      ymax= mbr->ymax;
+  }
+  void buffer(double d)
+  {
+    xmin-= d;
+    ymin-= d;
+    xmax+= d;
+    ymax+= d;
+  }
+
+  int equals(const MBR *mbr)
+  {
+    /* The following should be safe, even if we compare doubles */
+    return ((mbr->xmin == xmin) && (mbr->ymin == ymin) &&
+	    (mbr->xmax == xmax) && (mbr->ymax == ymax));
+  }
+
+  int disjoint(const MBR *mbr)
+  {
+    /* The following should be safe, even if we compare doubles */
+    return ((mbr->xmin > xmax) || (mbr->ymin > ymax) ||
+	    (mbr->xmax < xmin) || (mbr->ymax < ymin));
+  }
+
+  int intersects(const MBR *mbr)
+  {
+    return !disjoint(mbr);
+  }
+
+  int touches(const MBR *mbr)
+  {
+    /* The following should be safe, even if we compare doubles */
+    return ((mbr->xmin == xmax || mbr->xmax == xmin) &&
+            ((mbr->ymin >= ymin && mbr->ymin <= ymax) ||
+             (mbr->ymax >= ymin && mbr->ymax <= ymax))) ||
+           ((mbr->ymin == ymax || mbr->ymax == ymin) &&
+            ((mbr->xmin >= xmin && mbr->xmin <= xmax) ||
+             (mbr->xmax >= xmin && mbr->xmax <= xmax)));
+  }
+
+  int within(const MBR *mbr);
+
+  int contains(const MBR *mbr)
+  {
+    /* The following should be safe, even if we compare doubles */
+    return ((mbr->xmin >= xmin) && (mbr->ymin >= ymin) &&
+	    (mbr->xmax <= xmax) && (mbr->ymax <= ymax));
+  }
+
+  bool inner_point(double x, double y) const
+  {
+    /* The following should be safe, even if we compare doubles */
+    return (xminx) && (yminy);
+  }
+
+  /**
+    The dimension maps to an integer as:
+    - Polygon -> 2
+    - Horizontal or vertical line -> 1
+    - Point -> 0
+    - Invalid MBR -> -1
+  */
+  int dimension() const
+  {
+    int d= 0;
+
+    if (xmin > xmax)
+      return -1;
+    else if (xmin < xmax)
+      d++;
+
+    if (ymin > ymax)
+      return -1;
+    else if (ymin < ymax)
+      d++;
+
+    return d;
+  }
+
+  int overlaps(const MBR *mbr)
+  {
+    /*
+      overlaps() requires that some point inside *this is also inside
+      *mbr, and that both geometries and their intersection are of the
+      same dimension.
+    */
+    int d = dimension();
+
+    if (d != mbr->dimension() || d <= 0 || contains(mbr) || within(mbr))
+      return 0;
+
+    MBR intersection(MY_MAX(xmin, mbr->xmin), MY_MAX(ymin, mbr->ymin),
+                     MY_MIN(xmax, mbr->xmax), MY_MIN(ymax, mbr->ymax));
+
+    return (d == intersection.dimension());
+  }
+
+  int valid() const
+  { return xmin <= xmax && ymin <= ymax; }
+};
+
+
+/***************************** Geometry *******************************/
+
+struct Geometry_buffer;
+
+class Geometry
+{
+public:
+  Geometry() = default;                              /* Remove gcc warning */
+  virtual ~Geometry() = default;                     /* Remove gcc warning */
+  static void *operator new(size_t size, void *buffer)
+  {
+    return buffer;
+  }
+
+  static void operator delete(void *ptr, void *buffer)
+  {}
+
+  static void operator delete(void *buffer)
+  {}
+
+  enum wkbType
+  {
+    wkb_point= 1,
+    wkb_linestring= 2,
+    wkb_polygon= 3,
+    wkb_multipoint= 4,
+    wkb_multilinestring= 5,
+    wkb_multipolygon= 6,
+    wkb_geometrycollection= 7,
+    wkb_last=7
+  };
+  enum wkbByteOrder
+  {
+    wkb_xdr= 0,    /* Big Endian */
+    wkb_ndr= 1     /* Little Endian */
+  };
+  enum geojson_errors
+  {
+    GEOJ_INCORRECT_GEOJSON= 1,
+    GEOJ_TOO_FEW_POINTS= 2,
+    GEOJ_POLYGON_NOT_CLOSED= 3,
+    GEOJ_DIMENSION_NOT_SUPPORTED= 4,
+    GEOJ_EMPTY_COORDINATES= 5,
+  };
+
+
+  /** Callback which creates Geometry objects on top of a given placement. */
+  typedef Geometry *(*create_geom_t)(char *);
+
+  class Class_info
+  {
+  public:
+    LEX_STRING m_name;
+    LEX_STRING m_geojson_name;
+    int m_type_id;
+    create_geom_t m_create_func;
+    Class_info(const char *name, const char *gejson_name,
+               int type_id, create_geom_t create_func);
+  };
+
+  virtual const Class_info *get_class_info() const=0;
+  virtual uint32 get_data_size() const=0;
+  virtual bool init_from_wkt(Gis_read_stream *trs, String *wkb)=0;
+  /* returns the length of the wkb that was read */
+  virtual uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
+                             String *res)=0;
+  virtual uint init_from_opresult(String *bin,
+                                  const char *opres, uint res_len)
+  { return init_from_wkb(opres + 4, UINT_MAX32, wkb_ndr, bin) + 4; }
+  virtual bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb)
+  { return true; }
+
+  virtual bool get_data_as_wkt(String *txt, const char **end) const=0;
+  virtual bool get_data_as_json(String *txt, uint max_dec_digits,
+                                const char **end) const=0;
+  virtual bool get_mbr(MBR *mbr, const char **end) const=0;
+  virtual bool dimension(uint32 *dim, const char **end) const=0;
+  virtual int get_x(double *x) const { return -1; }
+  virtual int get_y(double *y) const { return -1; }
+  virtual int geom_length(double *len, const char **end) const  { return -1; }
+  virtual int area(double *ar, const char **end) const { return -1;}
+  virtual int is_closed(int *closed) const { return -1; }
+  virtual int num_interior_ring(uint32 *n_int_rings) const { return -1; }
+  virtual int num_points(uint32 *n_points) const { return -1; }
+  virtual int num_geometries(uint32 *num) const { return -1; }
+  virtual int start_point(String *point) const { return -1; }
+  virtual int end_point(String *point) const { return -1; }
+  virtual int exterior_ring(String *ring) const { return -1; }
+  virtual int centroid(String *point) const { return -1; }
+  virtual int point_n(uint32 num, String *result) const { return -1; }
+  virtual int interior_ring_n(uint32 num, String *result) const { return -1; }
+  virtual int geometry_n(uint32 num, String *result) const { return -1; }
+  virtual int store_shapes(Gcalc_shape_transporter *trn) const=0;
+
+public:
+  static Geometry *create_by_typeid(Geometry_buffer *buffer, int type_id);
+
+  static Geometry *construct(Geometry_buffer *buffer,
+                             const char *data, uint32 data_len);
+  static Geometry *create_from_wkt(Geometry_buffer *buffer,
+				   Gis_read_stream *trs, String *wkt,
+				   bool init_stream=1);
+  static Geometry *create_from_wkb(Geometry_buffer *buffer,
+                                   const char *wkb, uint32 len, String *res);
+  static Geometry *create_from_json(Geometry_buffer *buffer, json_engine_t *je,
+                                    bool er_on_3D, String *res);
+  static Geometry *create_from_opresult(Geometry_buffer *g_buf,
+                                  String *res, Gcalc_result_receiver &rr);
+  static uint get_key_image_itMBR(LEX_CSTRING &src, uchar *buff, uint length);
+  int as_wkt(String *wkt, const char **end);
+  int as_json(String *wkt, uint max_dec_digits, const char **end);
+  int bbox_as_json(String *wkt);
+
+  inline void set_data_ptr(const char *data, uint32 data_len)
+  {
+    m_data= data;
+    m_data_end= data + data_len;
+  }
+
+  inline void shift_wkb_header()
+  {
+    m_data+= WKB_HEADER_SIZE;
+  }
+
+  const char *get_data_ptr() const
+  {
+    return m_data;
+  }
+
+  bool envelope(String *result) const;
+  static Class_info *ci_collection[wkb_last+1];
+
+  static bool create_point(String *result, double x, double y);
+protected:
+  static Class_info *find_class(int type_id)
+  {
+    return ((type_id < wkb_point) || (type_id > wkb_last)) ?
+      NULL : ci_collection[type_id];
+  }  
+  static Class_info *find_class(const char *name, size_t len);
+  const char *append_points(String *txt, uint32 n_points,
+			    const char *data, uint32 offset) const;
+  bool create_point(String *result, const char *data) const;
+  const char *get_mbr_for_points(MBR *mbr, const char *data, uint offset)
+    const;
+
+public:
+  /**
+     Check if there're enough data remaining as requested
+
+     @arg cur_data     pointer to the position in the binary form
+     @arg data_amount  number of points expected
+     @return           true if not enough data
+  */
+  inline bool no_data(const char *cur_data, size_t data_amount) const
+  {
+    return (cur_data + data_amount > m_data_end);
+  }
+
+  /**
+     Check if there're enough points remaining as requested
+
+     Need to perform the calculation in logical units, since multiplication
+     can overflow the size data type.
+
+     @arg data              pointer to the beginning of the points array
+     @arg expected_points   number of points expected
+     @arg extra_point_space extra space for each point element in the array
+     @return               true if there are not enough points
+  */
+  inline bool not_enough_points(const char *data, uint32 expected_points,
+                                uint32 extra_point_space = 0) const
+  {
+    return (m_data_end < data ||
+            (expected_points > ((m_data_end - data) /
+                                (POINT_DATA_SIZE + extra_point_space))));
+  }
+protected:
+  const char *m_data;
+  const char *m_data_end;
+};
+
+
+/***************************** Point *******************************/
+ 
+class Gis_point: public Geometry
+{
+public:
+  Gis_point() = default;                              /* Remove gcc warning */
+  virtual ~Gis_point() = default;                     /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  
+  int get_xy(double *x, double *y) const
+  {
+    const char *data= m_data;
+    if (no_data(data, SIZEOF_STORED_DOUBLE * 2))
+      return 1;
+    float8get(*x, data);
+    float8get(*y, data + SIZEOF_STORED_DOUBLE);
+    return 0;
+  }
+
+  int get_xy_radian(double *x, double *y) const
+  {
+    if (!get_xy(x, y))
+    {
+      *x= (*x)*M_PI/180;
+      *y= (*y)*M_PI/180;
+      return 0;
+    }
+    return 1;
+  }
+
+  int get_x(double *x) const
+  {
+    if (no_data(m_data, SIZEOF_STORED_DOUBLE))
+      return 1;
+    float8get(*x, m_data);
+    return 0;
+  }
+
+  int get_y(double *y) const
+  {
+    const char *data= m_data;
+    if (no_data(data, SIZEOF_STORED_DOUBLE * 2)) return 1;
+    float8get(*y, data + SIZEOF_STORED_DOUBLE);
+    return 0;
+  }
+
+  int geom_length(double *len, const char **end) const;
+  int area(double *ar, const char **end) const;
+  bool dimension(uint32 *dim, const char **end) const
+  {
+    *dim= 0;
+    *end= 0;					/* No default end */
+    return 0;
+  }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+  double calculate_haversine(const Geometry *g, const double sphere_radius,
+                             int *error);
+  int spherical_distance_multipoints(Geometry *g, const double r, double *result,
+                                     int *error);
+};
+
+
+/***************************** LineString *******************************/
+
+class Gis_line_string: public Geometry
+{
+public:
+  Gis_line_string() = default;                        /* Remove gcc warning */
+  virtual ~Gis_line_string() = default;               /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  int geom_length(double *len, const char **end) const;
+  int area(double *ar, const char **end) const;
+  int is_closed(int *closed) const;
+  int num_points(uint32 *n_points) const;
+  int start_point(String *point) const;
+  int end_point(String *point) const;
+  int point_n(uint32 n, String *result) const;
+  bool dimension(uint32 *dim, const char **end) const
+  {
+    *dim= 1;
+    *end= 0;					/* No default end */
+    return 0;
+  }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+};
+
+
+/***************************** Polygon *******************************/
+
+class Gis_polygon: public Geometry
+{
+public:
+  Gis_polygon() = default;                            /* Remove gcc warning */
+  virtual ~Gis_polygon() = default;                   /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  int area(double *ar, const char **end) const;
+  int exterior_ring(String *result) const;
+  int num_interior_ring(uint32 *n_int_rings) const;
+  int interior_ring_n(uint32 num, String *result) const;
+  int centroid_xy(double *x, double *y) const;
+  int centroid(String *result) const;
+  bool dimension(uint32 *dim, const char **end) const
+  {
+    *dim= 2;
+    *end= 0;					/* No default end */
+    return 0;
+  }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+};
+
+
+/***************************** MultiPoint *******************************/
+
+class Gis_multi_point: public Geometry
+{
+  // Maximum number of points in MultiPoint that can fit into String
+  static const uint32 max_n_points=
+    (uint32) (UINT_MAX32 - WKB_HEADER_SIZE - 4 /* n_points */) /
+    (WKB_HEADER_SIZE + POINT_DATA_SIZE);
+public:
+  Gis_multi_point() = default;                        /* Remove gcc warning */
+  virtual ~Gis_multi_point() = default;               /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  int num_geometries(uint32 *num) const;
+  int geometry_n(uint32 num, String *result) const;
+  bool dimension(uint32 *dim, const char **end) const
+  {
+    *dim= 0;
+    *end= 0;					/* No default end */
+    return 0;
+  }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+  int spherical_distance_multipoints(Geometry *g, const double r, double *res,
+                                     int *error);
+};
+
+
+/***************************** MultiLineString *******************************/
+
+class Gis_multi_line_string: public Geometry
+{
+public:
+  Gis_multi_line_string() = default;                  /* Remove gcc warning */
+  virtual ~Gis_multi_line_string() = default;         /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  int num_geometries(uint32 *num) const;
+  int geometry_n(uint32 num, String *result) const;
+  int geom_length(double *len, const char **end) const;
+  int is_closed(int *closed) const;
+  bool dimension(uint32 *dim, const char **end) const
+  {
+    *dim= 1;
+    *end= 0;					/* No default end */
+    return 0;
+  }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+};
+
+
+/***************************** MultiPolygon *******************************/
+
+class Gis_multi_polygon: public Geometry
+{
+public:
+  Gis_multi_polygon() = default;                      /* Remove gcc warning */
+  virtual ~Gis_multi_polygon() = default;             /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  int num_geometries(uint32 *num) const;
+  int geometry_n(uint32 num, String *result) const;
+  int area(double *ar, const char **end) const;
+  int centroid(String *result) const;
+  bool dimension(uint32 *dim, const char **end) const
+  {
+    *dim= 2;
+    *end= 0;					/* No default end */
+    return 0;
+  }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
+};
+
+
+/*********************** GeometryCollection *******************************/
+
+class Gis_geometry_collection: public Geometry
+{
+public:
+  Gis_geometry_collection() = default;                /* Remove gcc warning */
+  virtual ~Gis_geometry_collection() = default;       /* Remove gcc warning */
+  uint32 get_data_size() const;
+  bool init_from_wkt(Gis_read_stream *trs, String *wkb);
+  uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
+  bool init_from_json(json_engine_t *je, bool er_on_3D, String *wkb);
+  bool get_data_as_wkt(String *txt, const char **end) const;
+  bool get_data_as_json(String *txt, uint max_dec_digits,
+                        const char **end) const;
+  bool get_mbr(MBR *mbr, const char **end) const;
+  int area(double *ar, const char **end) const;
+  int geom_length(double *len, const char **end) const;
+  int num_geometries(uint32 *num) const;
+  int geometry_n(uint32 num, String *result) const;
+  bool dimension(uint32 *dim, const char **end) const;
+  int store_shapes(Gcalc_shape_transporter *trn) const;
+  const Class_info *get_class_info() const;
+};
+
+struct Geometry_buffer : public
+  my_aligned_storage {};
+
+#endif /*HAVE_SPATIAL*/
+#endif
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
new file mode 100644
index 00000000..029b12ad
--- /dev/null
+++ b/sql/sql_acl.cc
@@ -0,0 +1,15107 @@
+/* Copyright (c) 2000, 2018, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2023, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
+
+
+/*
+  The privileges are saved in the following tables:
+  mysql/user	 ; super user who are allowed to do almost anything
+  mysql/host	 ; host privileges. This is used if host is empty in mysql/db.
+  mysql/db	 ; database privileges / user
+
+  data in tables is sorted according to how many not-wild-cards there is
+  in the relevant fields. Empty strings comes last.
+*/
+
+#include "mariadb.h"                          /* NO_EMBEDDED_ACCESS_CHECKS */
+#include "sql_priv.h"
+#include "sql_acl.h"         // MYSQL_DB_FIELD_COUNT, ACL_ACCESS
+#include "sql_base.h"                           // close_mysql_tables
+#include "key.h"             // key_copy, key_cmp_if_same, key_restore
+#include "sql_show.h"        // append_identifier
+#include "sql_table.h"                         // write_bin_log
+#include "hash_filo.h"
+#include "sql_parse.h"                          // check_access
+#include "sql_view.h"                           // VIEW_ANY_ACL
+#include "records.h"              // READ_RECORD, read_record_info,
+                                  // init_read_record, end_read_record
+#include "rpl_filter.h"           // rpl_filter
+#include "rpl_rli.h"
+#include 
+#include 
+#include "sp_head.h"
+#include "sp.h"
+#include "transaction.h"
+#include "lock.h"                               // MYSQL_LOCK_IGNORE_TIMEOUT
+#include 
+#include 
+#include 
+#include "sql_connect.h"
+#include "hostname.h"
+#include "sql_db.h"
+#include "sql_array.h"
+#include "sql_hset.h"
+#include "password.h"
+
+#include "sql_plugin_compat.h"
+#include "wsrep_mysqld.h"
+
+#define MAX_SCRAMBLE_LENGTH 1024
+
+bool mysql_user_table_is_in_short_password_format= false;
+bool using_global_priv_table= true;
+
+// set that from field length in acl_load?
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+const uint max_hostname_length= HOSTNAME_LENGTH;
+const uint max_dbname_length= NAME_CHAR_LEN;
+#endif
+
+const char *safe_vio_type_name(Vio *vio)
+{
+  size_t unused;
+#ifdef EMBEDDED_LIBRARY
+  if (!vio) return "Internal";
+#endif
+  return vio_type_name(vio_type(vio), &unused);
+}
+
+#include "sql_acl_getsort.ic"
+
+static LEX_CSTRING native_password_plugin_name= {
+  STRING_WITH_LEN("mysql_native_password")
+};
+
+static LEX_CSTRING old_password_plugin_name= {
+  STRING_WITH_LEN("mysql_old_password")
+};
+
+/// @todo make it configurable
+LEX_CSTRING *default_auth_plugin_name= &native_password_plugin_name;
+
+/*
+  Wildcard host, matches any hostname
+*/
+LEX_CSTRING host_not_specified= { STRING_WITH_LEN("%") };
+
+/*
+  Constants, used in the SHOW GRANTS command.
+  Their actual string values are irrelevant, they're always compared
+  as pointers to these string constants.
+*/
+LEX_CSTRING current_user= { STRING_WITH_LEN("*current_user") };
+LEX_CSTRING current_role= { STRING_WITH_LEN("*current_role") };
+LEX_CSTRING current_user_and_current_role=
+ { STRING_WITH_LEN("*current_user_and_current_role") };
+LEX_CSTRING none= {STRING_WITH_LEN("NONE") };
+LEX_CSTRING public_name= {STRING_WITH_LEN("PUBLIC") };
+
+static plugin_ref old_password_plugin;
+static plugin_ref native_password_plugin;
+
+static plugin_ref get_auth_plugin(THD *thd, const LEX_CSTRING &name, bool *locked)
+{
+  if (name.str == native_password_plugin_name.str)
+    return native_password_plugin;
+  else if (name.str == old_password_plugin_name.str)
+    return old_password_plugin;
+  *locked=true;
+  return my_plugin_lock_by_name(thd, &name, MYSQL_AUTHENTICATION_PLUGIN);
+}
+
+/* Classes */
+
+struct acl_host_and_ip
+{
+  char *hostname;
+  long ip, ip_mask;                      // Used with masked ip:s
+};
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+static bool compare_hostname(const acl_host_and_ip *, const char *, const char *);
+static inline bool is_public(const char *l) { return l == public_name.str; }
+static inline bool is_public(const LEX_CSTRING *l) { return is_public(l->str); }
+static inline bool is_public(const LEX_USER *l) { return is_public(&l->user); }
+
+#else
+#define compare_hostname(X,Y,Z) 0
+#endif
+
+class ACL_ACCESS {
+public:
+  ulonglong sort;
+  privilege_t access;
+  ACL_ACCESS()
+   :sort(0), access(NO_ACL)
+  { }
+};
+
+/* ACL_HOST is used if no host is specified */
+
+class ACL_HOST :public ACL_ACCESS
+{
+public:
+  acl_host_and_ip host;
+  char *db;
+};
+
+class ACL_USER_BASE :public ACL_ACCESS, public Sql_alloc
+{
+
+public:
+  ACL_USER_BASE()
+   :flags(0), user(null_clex_str)
+  {
+    bzero(&role_grants, sizeof(role_grants));
+  }
+  uchar flags;           // field used to store various state information
+  LEX_CSTRING user;
+  /* list to hold references to granted roles (ACL_ROLE instances) */
+  DYNAMIC_ARRAY role_grants;
+  const char *get_username() { return user.str; }
+};
+
+class ACL_USER_PARAM
+{
+public:
+  ACL_USER_PARAM()
+  {
+    bzero(this, sizeof(*this));
+  }
+  acl_host_and_ip host;
+  size_t hostname_length;
+  USER_RESOURCES user_resource;
+  enum SSL_type ssl_type;
+  uint password_errors;
+  const char *ssl_cipher, *x509_issuer, *x509_subject;
+  LEX_CSTRING default_rolename;
+  struct AUTH { LEX_CSTRING plugin, auth_string, salt; } *auth;
+  uint nauth;
+  bool account_locked;
+  bool password_expired;
+  my_time_t password_last_changed;
+  longlong password_lifetime;
+
+  bool alloc_auth(MEM_ROOT *root, uint n)
+  {
+    return !(auth= (AUTH*) alloc_root(root, (nauth= n)*sizeof(AUTH)));
+  }
+};
+
+
+class ACL_USER :public ACL_USER_BASE,
+                public ACL_USER_PARAM
+{
+public:
+
+  ACL_USER() = default;
+  ACL_USER(THD *thd, const LEX_USER &combo,
+           const Account_options &options,
+           const privilege_t privileges);
+
+  ACL_USER *copy(MEM_ROOT *root)
+  {
+    ACL_USER *dst;
+    AUTH *dauth;
+    if (!multi_alloc_root(root, &dst, sizeof(ACL_USER),
+                                &dauth, sizeof(AUTH)*nauth, NULL))
+      return 0;
+    *dst= *this;
+    dst->user= safe_lexcstrdup_root(root, user);
+    dst->ssl_cipher= safe_strdup_root(root, ssl_cipher);
+    dst->x509_issuer= safe_strdup_root(root, x509_issuer);
+    dst->x509_subject= safe_strdup_root(root, x509_subject);
+    dst->auth= dauth;
+    for (uint i=0; i < nauth; i++, dauth++)
+    {
+      if (auth[i].plugin.str == native_password_plugin_name.str ||
+          auth[i].plugin.str == old_password_plugin_name.str)
+        dauth->plugin= auth[i].plugin;
+      else
+        dauth->plugin= safe_lexcstrdup_root(root, auth[i].plugin);
+      dauth->auth_string= safe_lexcstrdup_root(root, auth[i].auth_string);
+      if (auth[i].salt.length == 0)
+        dauth->salt= auth[i].salt;
+      else
+        dauth->salt= safe_lexcstrdup_root(root, auth[i].salt);
+    }
+    dst->host.hostname= safe_strdup_root(root, host.hostname);
+    dst->default_rolename= safe_lexcstrdup_root(root, default_rolename);
+    bzero(&dst->role_grants, sizeof(role_grants));
+    return dst;
+  }
+
+  int cmp(const char *user2, const char *host2)
+  {
+    CHARSET_INFO *cs= system_charset_info;
+    int res;
+    res= strcmp(user.str, user2);
+    if (!res)
+      res= my_strcasecmp(cs, host.hostname, host2);
+    return res;
+  }
+
+  bool eq(const char *user2, const char *host2) { return !cmp(user2, host2); }
+
+  bool wild_eq(const char *user2, const char *host2, const char *ip2)
+  {
+    if (strcmp(user.str, user2))
+      return false;
+
+    return compare_hostname(&host, host2, ip2 ? ip2 : host2);
+  }
+};
+
+class ACL_ROLE :public ACL_USER_BASE
+{
+public:
+  /*
+    In case of granting a role to a role, the access bits are merged together
+    via a bit OR operation and placed in the ACL_USER::access field.
+
+    When rebuilding role_grants via the rebuild_role_grant function,
+    the ACL_USER::access field needs to be reset first. The field
+    initial_role_access holds initial grants, as granted directly to the role
+  */
+  privilege_t initial_role_access;
+  /*
+    In subgraph traversal, when we need to traverse only a part of the graph
+    (e.g. all direct and indirect grantees of a role X), the counter holds the
+    number of affected neighbour nodes.
+    See also propagate_role_grants()
+  */
+  uint  counter;
+  DYNAMIC_ARRAY parent_grantee; // array of backlinks to elements granted
+
+  ACL_ROLE(ACL_USER *user);
+  ACL_ROLE(const char *rolename, privilege_t privileges, MEM_ROOT *mem);
+
+};
+
+class ACL_DB :public ACL_ACCESS
+{
+public:
+  ACL_DB() :initial_access(NO_ACL) { }
+  acl_host_and_ip host;
+  const char *user,*db;
+  privilege_t initial_access; /* access bits present in the table */
+
+  const char *get_username() { return user; }
+};
+
+#ifndef DBUG_OFF
+/* status variables, only visible in SHOW STATUS after -#d,role_merge_stats */
+ulong role_global_merges= 0, role_db_merges= 0, role_table_merges= 0,
+      role_column_merges= 0, role_routine_merges= 0;
+#endif
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+static void update_hostname(acl_host_and_ip *host, const char *hostname);
+static bool show_proxy_grants (THD *, const char *, const char *,
+                               char *, size_t);
+static bool show_role_grants(THD *, const char *,
+                             ACL_USER_BASE *, char *, size_t);
+static bool show_default_role(THD *, ACL_USER *, char *, size_t);
+static bool show_global_privileges(THD *, ACL_USER_BASE *,
+                                   bool, char *, size_t);
+static bool show_database_privileges(THD *, const char *, const char *,
+                                     char *, size_t);
+static bool show_table_and_column_privileges(THD *, const char *, const char *,
+                                             char *, size_t);
+static int show_routine_grants(THD *, const char *, const char *,
+                               const Sp_handler *sph, char *, int);
+
+static ACL_ROLE *acl_public= NULL;
+
+inline privilege_t public_access()
+{
+  return (acl_public ? acl_public->access : NO_ACL);
+}
+
+class Grant_tables;
+class User_table;
+class Proxies_priv_table;
+
+class ACL_PROXY_USER :public ACL_ACCESS
+{
+  acl_host_and_ip host;
+  const char *user;
+  acl_host_and_ip proxied_host;
+  const char *proxied_user;
+  bool with_grant;
+
+  typedef enum {
+    MYSQL_PROXIES_PRIV_HOST,
+    MYSQL_PROXIES_PRIV_USER,
+    MYSQL_PROXIES_PRIV_PROXIED_HOST,
+    MYSQL_PROXIES_PRIV_PROXIED_USER,
+    MYSQL_PROXIES_PRIV_WITH_GRANT,
+    MYSQL_PROXIES_PRIV_GRANTOR,
+    MYSQL_PROXIES_PRIV_TIMESTAMP } proxy_table_fields;
+public:
+  ACL_PROXY_USER () = default;
+
+  void init(const char *host_arg, const char *user_arg,
+       const char *proxied_host_arg, const char *proxied_user_arg,
+       bool with_grant_arg)
+  {
+    user= user_arg;
+    update_hostname (&host, (host_arg && *host_arg) ? host_arg : NULL);
+    proxied_user= proxied_user_arg;
+    update_hostname (&proxied_host,
+                     (proxied_host_arg && *proxied_host_arg) ?
+                     proxied_host_arg : NULL);
+    with_grant= with_grant_arg;
+    sort= get_magic_sort("huhu", host.hostname, user, proxied_host.hostname,
+                         proxied_user);
+  }
+
+  void init(MEM_ROOT *mem, const char *host_arg, const char *user_arg,
+       const char *proxied_host_arg, const char *proxied_user_arg,
+       bool with_grant_arg)
+  {
+    init ((host_arg && *host_arg) ? strdup_root (mem, host_arg) : NULL,
+          strdup_root (mem, user_arg),
+          (proxied_host_arg && *proxied_host_arg) ?
+            strdup_root (mem, proxied_host_arg) : NULL,
+          strdup_root (mem, proxied_user_arg),
+          with_grant_arg);
+  }
+
+  void init(const Proxies_priv_table& proxies_priv_table, MEM_ROOT *mem);
+
+  bool get_with_grant() { return with_grant; }
+  const char *get_user() { return user; }
+  const char *get_host() { return host.hostname; }
+  const char *get_proxied_user() { return proxied_user; }
+  const char *get_proxied_host() { return proxied_host.hostname; }
+  void set_user(MEM_ROOT *mem, const char *user_arg)
+  {
+    user= *user_arg ? strdup_root(mem, user_arg) : "";
+  }
+  void set_host(MEM_ROOT *mem, const char *host_arg)
+  {
+    update_hostname(&host, safe_strdup_root(mem, host_arg));
+  }
+
+  bool check_validity()
+  {
+    if (opt_skip_name_resolve &&
+        (hostname_requires_resolving(host.hostname) ||
+         hostname_requires_resolving(proxied_host.hostname)))
+    {
+      sql_print_warning("'proxies_priv' entry '%s@%s %s@%s' "
+                        "ignored in --skip-name-resolve mode.",
+                        proxied_user,
+                        safe_str(proxied_host.hostname), user,
+                        safe_str(host.hostname));
+      return TRUE;
+    }
+    return FALSE;
+  }
+
+  bool matches(const char *host_arg, const char *user_arg, const char *ip_arg,
+                const char *proxied_user_arg)
+  {
+    DBUG_ENTER("ACL_PROXY_USER::matches");
+    DBUG_PRINT("info", ("compare_hostname(%s,%s,%s) &&"
+                        "compare_hostname(%s,%s,%s) &&"
+                        "wild_compare (%s,%s) &&"
+                        "wild_compare (%s,%s)",
+                        host.hostname, host_arg, ip_arg, proxied_host.hostname,
+                        host_arg, ip_arg, user_arg, user,
+                        proxied_user_arg, proxied_user));
+    DBUG_RETURN(compare_hostname(&host, host_arg, ip_arg) &&
+                compare_hostname(&proxied_host, host_arg, ip_arg) &&
+                (!*user || !strcmp(user_arg, user)) &&
+                (!*proxied_user || !strcmp(proxied_user_arg, proxied_user)));
+  }
+
+
+  inline static bool auth_element_equals(const char *a, const char *b)
+  {
+    return (a == b || (a != NULL && b != NULL && !strcmp(a,b)));
+  }
+
+
+  bool pk_equals(ACL_PROXY_USER *grant)
+  {
+    DBUG_ENTER("pk_equals");
+    DBUG_PRINT("info", ("strcmp(%s,%s) &&"
+                        "strcmp(%s,%s) &&"
+                        "wild_compare (%s,%s) &&"
+                        "wild_compare (%s,%s)",
+                        user, grant->user, proxied_user, grant->proxied_user,
+                        host.hostname, grant->host.hostname,
+                        proxied_host.hostname, grant->proxied_host.hostname));
+
+    bool res= auth_element_equals(user, grant->user) &&
+              auth_element_equals(proxied_user, grant->proxied_user) &&
+              auth_element_equals(host.hostname, grant->host.hostname) &&
+              auth_element_equals(proxied_host.hostname,
+                                  grant->proxied_host.hostname);
+    DBUG_RETURN(res);
+  }
+
+
+  bool granted_on(const char *host_arg, const char *user_arg)
+  {
+    return (!strcmp(user, user_arg) &&
+            ((!host.hostname && (!host_arg || !host_arg[0])) ||
+             (host.hostname && host_arg && !strcmp(host.hostname, host_arg))));
+  }
+
+
+  void print_grant(String *str)
+  {
+    str->append(STRING_WITH_LEN("GRANT PROXY ON '"));
+    str->append(proxied_user, strlen(proxied_user));
+    str->append(STRING_WITH_LEN("'@'"));
+    if (proxied_host.hostname)
+      str->append(proxied_host.hostname, strlen(proxied_host.hostname));
+    str->append(STRING_WITH_LEN("' TO '"));
+    str->append(user, strlen(user));
+    str->append(STRING_WITH_LEN("'@'"));
+    if (host.hostname)
+      str->append(host.hostname, strlen(host.hostname));
+    str->append(STRING_WITH_LEN("'"));
+    if (with_grant)
+      str->append(STRING_WITH_LEN(" WITH GRANT OPTION"));
+  }
+
+  void set_data(ACL_PROXY_USER *grant)
+  {
+    with_grant= grant->with_grant;
+  }
+
+  static int store_pk(TABLE *table,
+                      const LEX_CSTRING *host,
+                      const LEX_CSTRING *user,
+                      const LEX_CSTRING *proxied_host,
+                      const LEX_CSTRING *proxied_user)
+  {
+    DBUG_ENTER("ACL_PROXY_USER::store_pk");
+    DBUG_PRINT("info", ("host=%s, user=%s, proxied_host=%s, proxied_user=%s",
+                        host->str, user->str,
+                        proxied_host->str, proxied_user->str));
+    if (table->field[MYSQL_PROXIES_PRIV_HOST]->store(host->str,
+                                                   host->length,
+                                                   system_charset_info))
+      DBUG_RETURN(TRUE);
+    if (table->field[MYSQL_PROXIES_PRIV_USER]->store(user->str,
+                                                   user->length,
+                                                   system_charset_info))
+      DBUG_RETURN(TRUE);
+    if (table->field[MYSQL_PROXIES_PRIV_PROXIED_HOST]->store(proxied_host->str,
+                                                           proxied_host->length,
+                                                           system_charset_info))
+      DBUG_RETURN(TRUE);
+    if (table->field[MYSQL_PROXIES_PRIV_PROXIED_USER]->store(proxied_user->str,
+                                                           proxied_user->length,
+                                                           system_charset_info))
+      DBUG_RETURN(TRUE);
+
+    DBUG_RETURN(FALSE);
+  }
+
+  static int store_data_record(TABLE *table,
+                               const LEX_CSTRING *host,
+                               const LEX_CSTRING *user,
+                               const LEX_CSTRING *proxied_host,
+                               const LEX_CSTRING *proxied_user,
+                               bool with_grant,
+                               const char *grantor)
+  {
+    DBUG_ENTER("ACL_PROXY_USER::store_pk");
+    if (store_pk(table,  host, user, proxied_host, proxied_user))
+      DBUG_RETURN(TRUE);
+    DBUG_PRINT("info", ("with_grant=%s", with_grant ? "TRUE" : "FALSE"));
+    if (table->field[MYSQL_PROXIES_PRIV_WITH_GRANT]->store(with_grant ? 1 : 0,
+                                                           TRUE))
+      DBUG_RETURN(TRUE);
+    if (table->field[MYSQL_PROXIES_PRIV_GRANTOR]->store(grantor,
+                                                        strlen(grantor),
+                                                        system_charset_info))
+      DBUG_RETURN(TRUE);
+
+    DBUG_RETURN(FALSE);
+  }
+};
+
+#define FIRST_NON_YN_FIELD 26
+
+class acl_entry :public hash_filo_element
+{
+public:
+  privilege_t access;
+  uint16 length;
+  char key[1];					// Key will be stored here
+};
+
+
+static uchar* acl_entry_get_key(acl_entry *entry, size_t *length,
+                                my_bool not_used __attribute__((unused)))
+{
+  *length=(uint) entry->length;
+  return (uchar*) entry->key;
+}
+
+static uchar* acl_role_get_key(ACL_ROLE *entry, size_t *length,
+                               my_bool not_used __attribute__((unused)))
+{
+  *length=(uint) entry->user.length;
+  return (uchar*) entry->user.str;
+}
+
+struct ROLE_GRANT_PAIR : public Sql_alloc
+{
+  char *u_uname;
+  char *u_hname;
+  char *r_uname;
+  LEX_STRING hashkey;
+  bool with_admin;
+
+  bool init(MEM_ROOT *mem, const char *username, const char *hostname,
+            const char *rolename, bool with_admin_option);
+};
+
+static uchar* acl_role_map_get_key(ROLE_GRANT_PAIR *entry, size_t *length,
+                                  my_bool not_used __attribute__((unused)))
+{
+  *length=(uint) entry->hashkey.length;
+  return (uchar*) entry->hashkey.str;
+}
+
+bool ROLE_GRANT_PAIR::init(MEM_ROOT *mem, const char *username,
+                           const char *hostname, const char *rolename,
+                           bool with_admin_option)
+{
+  size_t uname_l = safe_strlen(username);
+  size_t hname_l = safe_strlen(hostname);
+  size_t rname_l = safe_strlen(rolename);
+  /*
+    Create a buffer that holds all 3 NULL terminated strings in succession
+    To save memory space, the same buffer is used as the hashkey
+  */
+  size_t bufflen = uname_l + hname_l + rname_l + 3; //add the '\0' aswell
+  char *buff= (char *)alloc_root(mem, bufflen);
+  if (!buff)
+    return true;
+
+  /*
+    Offsets in the buffer for all 3 strings
+  */
+  char *username_pos= buff;
+  char *hostname_pos= buff + uname_l + 1;
+  char *rolename_pos= buff + uname_l + hname_l + 2;
+
+  if (username) //prevent undefined behaviour
+    memcpy(username_pos, username, uname_l);
+  username_pos[uname_l]= '\0';         //#1 string terminator
+  u_uname= username_pos;
+
+  if (hostname) //prevent undefined behaviour
+    memcpy(hostname_pos, hostname, hname_l);
+  hostname_pos[hname_l]= '\0';         //#2 string terminator
+  u_hname= hostname_pos;
+
+  if (rolename) //prevent undefined behaviour
+    memcpy(rolename_pos, rolename, rname_l);
+  rolename_pos[rname_l]= '\0';         //#3 string terminator
+  r_uname= rolename_pos;
+
+  hashkey.str = buff;
+  hashkey.length = bufflen;
+
+  with_admin= with_admin_option;
+
+  return false;
+}
+
+#define IP_ADDR_STRLEN (3 + 1 + 3 + 1 + 3 + 1 + 3)
+#define ACL_KEY_LENGTH (IP_ADDR_STRLEN + 1 + NAME_LEN + \
+                        1 + USERNAME_LENGTH + 1)
+
+#if defined(HAVE_OPENSSL)
+/*
+  Without SSL the handshake consists of one packet. This packet
+  has both client capabilities and scrambled password.
+  With SSL the handshake might consist of two packets. If the first
+  packet (client capabilities) has CLIENT_SSL flag set, we have to
+  switch to SSL and read the second packet. The scrambled password
+  is in the second packet and client_capabilities field will be ignored.
+  Maybe it is better to accept flags other than CLIENT_SSL from the
+  second packet?
+*/
+#define SSL_HANDSHAKE_SIZE      2
+#define MIN_HANDSHAKE_SIZE      2
+#else
+#define MIN_HANDSHAKE_SIZE      6
+#endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
+#define NORMAL_HANDSHAKE_SIZE   6
+
+#define ROLE_ASSIGN_COLUMN_IDX  44
+#define DEFAULT_ROLE_COLUMN_IDX 45
+#define MAX_STATEMENT_TIME_COLUMN_IDX 46
+
+/* various flags valid for ACL_USER */
+#define IS_ROLE                 (1L << 0)
+/* Flag to mark that a ROLE is on the recursive DEPTH_FIRST_SEARCH stack */
+#define ROLE_ON_STACK            (1L << 1)
+/*
+  Flag to mark that a ROLE and all it's neighbours have
+  been visited
+*/
+#define ROLE_EXPLORED           (1L << 2)
+/* Flag to mark that on_node was already called for this role */
+#define ROLE_OPENED             (1L << 3)
+
+static DYNAMIC_ARRAY acl_hosts, acl_users, acl_proxy_users;
+static Dynamic_array acl_dbs(PSI_INSTRUMENT_MEM, 0, 50);
+typedef Dynamic_array::CMP_FUNC acl_dbs_cmp;
+static HASH acl_roles;
+/*
+  An hash containing mappings user <--> role
+
+  A hash is used so as to make updates quickly
+  The hashkey used represents all the entries combined
+*/
+static HASH acl_roles_mappings;
+static MEM_ROOT acl_memroot, grant_memroot;
+static bool initialized=0;
+static bool allow_all_hosts=1;
+static HASH acl_check_hosts, column_priv_hash, proc_priv_hash, func_priv_hash;
+static HASH package_spec_priv_hash, package_body_priv_hash;
+static DYNAMIC_ARRAY acl_wild_hosts;
+static Hash_filo *acl_cache;
+static uint grant_version=0; /* Version of priv tables. incremented by acl_load */
+static privilege_t get_access(TABLE *form, uint fieldnr, uint *next_field=0);
+static int acl_compare(const ACL_ACCESS *a, const ACL_ACCESS *b);
+static int acl_user_compare(const ACL_USER *a, const ACL_USER *b);
+static void rebuild_acl_users();
+static int acl_db_compare(const ACL_DB *a, const ACL_DB *b);
+static void rebuild_acl_dbs();
+static void init_check_host(void);
+static void rebuild_check_host(void);
+static void rebuild_role_grants(void);
+static ACL_USER *find_user_exact(const char *host, const char *user);
+static ACL_USER *find_user_wild(const char *host, const char *user, const char *ip= 0);
+static ACL_ROLE *find_acl_role(const char *user, bool allow_public);
+static ROLE_GRANT_PAIR *find_role_grant_pair(const LEX_CSTRING *u, const LEX_CSTRING *h, const LEX_CSTRING *r);
+static ACL_USER_BASE *find_acl_user_base(const char *user, const char *host);
+static bool update_user_table_password(THD *, const User_table&, const ACL_USER&);
+static bool acl_load(THD *thd, const Grant_tables& grant_tables);
+static inline void get_grantor(THD *thd, char* grantor);
+static bool add_role_user_mapping(const char *uname, const char *hname, const char *rname);
+static bool get_YN_as_bool(Field *field);
+
+#define ROLE_CYCLE_FOUND 2
+static int traverse_role_graph_up(ACL_ROLE *, void *,
+                                  int (*) (ACL_ROLE *, void *),
+                                  int (*) (ACL_ROLE *, ACL_ROLE *, void *));
+
+static int traverse_role_graph_down(ACL_USER_BASE *, void *,
+                             int (*) (ACL_USER_BASE *, void *),
+                             int (*) (ACL_USER_BASE *, ACL_ROLE *, void *));
+
+
+HASH *Sp_handler_procedure::get_priv_hash() const
+{
+  return &proc_priv_hash;
+}
+
+
+HASH *Sp_handler_function::get_priv_hash() const
+{
+  return &func_priv_hash;
+}
+
+
+HASH *Sp_handler_package_spec::get_priv_hash() const
+{
+  return &package_spec_priv_hash;
+}
+
+
+HASH *Sp_handler_package_body::get_priv_hash() const
+{
+  return &package_body_priv_hash;
+}
+
+
+/*
+ Enumeration of ACL/GRANT tables in the mysql database
+*/
+enum enum_acl_tables
+{
+  DB_TABLE,
+  TABLES_PRIV_TABLE,
+  COLUMNS_PRIV_TABLE,
+#define FIRST_OPTIONAL_TABLE HOST_TABLE
+  HOST_TABLE,
+  PROCS_PRIV_TABLE,
+  PROXIES_PRIV_TABLE,
+  ROLES_MAPPING_TABLE,
+  USER_TABLE // <== always the last
+};
+
+static const int Table_user= 1 << USER_TABLE;
+static const int Table_db= 1 << DB_TABLE;
+static const int Table_tables_priv= 1 << TABLES_PRIV_TABLE;
+static const int Table_columns_priv= 1 << COLUMNS_PRIV_TABLE;
+static const int Table_host= 1 << HOST_TABLE;
+static const int Table_procs_priv= 1 << PROCS_PRIV_TABLE;
+static const int Table_proxies_priv= 1 << PROXIES_PRIV_TABLE;
+static const int Table_roles_mapping= 1 << ROLES_MAPPING_TABLE;
+
+static LEX_CSTRING MYSQL_TABLE_NAME[USER_TABLE+1]= {
+  {STRING_WITH_LEN("db")},
+  {STRING_WITH_LEN("tables_priv")},
+  {STRING_WITH_LEN("columns_priv")},
+  {STRING_WITH_LEN("host")},
+  {STRING_WITH_LEN("procs_priv")},
+  {STRING_WITH_LEN("proxies_priv")},
+  {STRING_WITH_LEN("roles_mapping")},
+  {STRING_WITH_LEN("global_priv")}
+};
+static LEX_CSTRING MYSQL_TABLE_NAME_USER={STRING_WITH_LEN("user")};
+
+/**
+  Choose from either native or old password plugins when assigning a password
+*/
+
+static LEX_CSTRING &guess_auth_plugin(THD *thd, size_t password_len)
+{
+  if (thd->variables.old_passwords == 1 ||
+      password_len == SCRAMBLED_PASSWORD_CHAR_LENGTH_323)
+    return old_password_plugin_name;
+  else
+    return native_password_plugin_name;
+}
+
+/**
+  Base class representing a generic grant table from the mysql database.
+
+  The potential tables that this class can represent are:
+  user, db, columns_priv, tables_priv, host, procs_priv, proxies_priv,
+  roles_mapping
+
+  Objects belonging to this parent class can only be constructed by the
+  Grants_table class. This ensures the correct initialization of the objects.
+*/
+class Grant_table_base
+{
+ public:
+  /* Number of fields for this Grant Table. */
+  uint num_fields() const { return m_table->s->fields; }
+  /* Check if the table exists after an attempt to open it was made.
+     Some tables, such as the host table in MySQL 5.6.7+ are missing. */
+  bool table_exists() const { return m_table; };
+  /* Initializes the READ_RECORD structure provided as a parameter
+     to read through the whole table, with all columns available. Cleaning up
+     is the caller's job. */
+  bool init_read_record(READ_RECORD* info) const
+  {
+    DBUG_ASSERT(m_table);
+
+    if (num_fields() < min_columns)
+    {
+      my_printf_error(ER_UNKNOWN_ERROR, "Fatal error: mysql.%s table is "
+                      "damaged or in unsupported 3.20 format",
+                      MYF(ME_ERROR_LOG), m_table->s->table_name.str);
+      return 1;
+    }
+
+    bool result= ::init_read_record(info, m_table->in_use, m_table,
+                                    NULL, NULL, 1, true, false);
+    if (!result)
+      m_table->use_all_columns();
+    return result;
+  }
+
+  /* Return the underlying TABLE handle. */
+  TABLE* table() const { return m_table; }
+
+  privilege_t get_access() const
+  {
+    ulonglong access_bits= 0, bit= 1;
+    for (uint i = start_priv_columns; i < end_priv_columns; i++, bit<<=1)
+    {
+      if (get_YN_as_bool(m_table->field[i]))
+        access_bits|= bit;
+    }
+    return ALL_KNOWN_ACL & access_bits;
+  }
+
+ protected:
+  friend class Grant_tables;
+
+  Grant_table_base() : min_columns(3), start_priv_columns(0), end_priv_columns(0), m_table(0)
+  { }
+
+  /* Compute how many privilege columns this table has. This method
+     can only be called after the table has been opened.
+
+     IMPLEMENTATION
+     A privilege column is of type enum('Y', 'N'). Privilege columns are
+     expected to be one after another.
+  */
+  void set_table(TABLE *table)
+  {
+    if (!(m_table= table)) // Table does not exist or not opened.
+      return;
+
+    for (end_priv_columns= 0; end_priv_columns < num_fields(); end_priv_columns++)
+    {
+      Field *field= m_table->field[end_priv_columns];
+      if (field->real_type() == MYSQL_TYPE_ENUM &&
+          static_cast(field)->typelib->count == 2)
+      {
+        if (!start_priv_columns)
+          start_priv_columns= end_priv_columns;
+      }
+      else if (start_priv_columns)
+          break;
+    }
+  }
+
+
+  /* the min number of columns a table should have */
+  uint min_columns;
+  /* The index at which privilege columns start. */
+  uint start_priv_columns;
+  /* The index after the last privilege column */
+  uint end_priv_columns;
+
+  TABLE *m_table;
+};
+
+class User_table: public Grant_table_base
+{
+ public:
+  bool init_read_record(READ_RECORD* info) const
+  {
+    return Grant_table_base::init_read_record(info) || setup_sysvars();
+  }
+
+  virtual LEX_CSTRING& name() const = 0;
+  virtual int get_auth(THD *, MEM_ROOT *, ACL_USER *u) const= 0;
+  virtual bool set_auth(const ACL_USER &u) const = 0;
+  virtual privilege_t get_access() const = 0;
+  virtual void set_access(const privilege_t rights, bool revoke) const = 0;
+
+  char *get_host(MEM_ROOT *root) const
+  { return ::get_field(root, m_table->field[0]); }
+  int set_host(const char *s, size_t l) const
+  { return m_table->field[0]->store(s, l, system_charset_info); };
+  char *get_user(MEM_ROOT *root) const
+  { return ::get_field(root, m_table->field[1]); }
+  int set_user(const char *s, size_t l) const
+  { return m_table->field[1]->store(s, l, system_charset_info); };
+
+  virtual SSL_type get_ssl_type () const = 0;
+  virtual int set_ssl_type (SSL_type x) const = 0;
+  virtual const char* get_ssl_cipher (MEM_ROOT *root) const = 0;
+  virtual int set_ssl_cipher (const char *s, size_t l) const = 0;
+  virtual const char* get_x509_issuer (MEM_ROOT *root) const = 0;
+  virtual int set_x509_issuer (const char *s, size_t l) const = 0;
+  virtual const char* get_x509_subject (MEM_ROOT *root) const = 0;
+  virtual int set_x509_subject (const char *s, size_t l) const = 0;
+  virtual longlong get_max_questions () const = 0;
+  virtual int set_max_questions (longlong x) const = 0;
+  virtual longlong get_max_updates () const = 0;
+  virtual int set_max_updates (longlong x) const = 0;
+  virtual longlong get_max_connections () const = 0;
+  virtual int set_max_connections (longlong x) const = 0;
+  virtual longlong get_max_user_connections () const = 0;
+  virtual int set_max_user_connections (longlong x) const = 0;
+  virtual double get_max_statement_time () const = 0;
+  virtual int set_max_statement_time (double x) const = 0;
+  virtual bool get_is_role () const = 0;
+  virtual int set_is_role (bool x) const = 0;
+  virtual const char* get_default_role (MEM_ROOT *root) const = 0;
+  virtual int set_default_role (const char *s, size_t l) const = 0;
+  virtual bool get_account_locked () const = 0;
+  virtual int set_account_locked (bool x) const = 0;
+  virtual bool get_password_expired () const = 0;
+  virtual int set_password_expired (bool x) const = 0;
+  virtual my_time_t get_password_last_changed () const = 0;
+  virtual int set_password_last_changed (my_time_t x) const = 0;
+  virtual longlong get_password_lifetime () const = 0;
+  virtual int set_password_lifetime (longlong x) const = 0;
+
+  virtual ~User_table() = default;
+ private:
+  friend class Grant_tables;
+  virtual int setup_sysvars() const = 0;
+};
+
+/* MySQL-3.23 to MariaDB 10.3 `user` table */
+class User_table_tabular: public User_table
+{
+ public:
+
+  LEX_CSTRING& name() const { return MYSQL_TABLE_NAME_USER; }
+
+  int get_auth(THD *thd, MEM_ROOT *root, ACL_USER *u) const
+  {
+    u->alloc_auth(root, 1);
+    if (have_password())
+    {
+      const char *as= safe_str(::get_field(&acl_memroot, password()));
+      u->auth->auth_string.str= as;
+      u->auth->auth_string.length= strlen(as);
+      u->auth->plugin= guess_auth_plugin(thd, u->auth->auth_string.length);
+    }
+    else
+    {
+      u->auth->plugin= native_password_plugin_name;
+      u->auth->auth_string= empty_clex_str;
+    }
+    if (plugin() && authstr())
+    {
+      char *tmpstr= ::get_field(&acl_memroot, plugin());
+      if (tmpstr)
+      {
+        const char *pw= u->auth->auth_string.str;
+        const char *as= safe_str(::get_field(&acl_memroot, authstr()));
+        if (*pw)
+        {
+          if (*as && strcmp(as, pw))
+          {
+            sql_print_warning("'user' entry '%s@%s' has both a password and an "
+              "authentication plugin specified. The password will be ignored.",
+              safe_str(get_user(thd->mem_root)), safe_str(get_host(thd->mem_root)));
+          }
+          else
+            as= pw;
+        }
+        u->auth->plugin.str= tmpstr;
+        u->auth->plugin.length= strlen(tmpstr);
+        u->auth->auth_string.str= as;
+        u->auth->auth_string.length= strlen(as);
+      }
+    }
+    return 0;
+  }
+
+  bool set_auth(const ACL_USER &u) const
+  {
+    if (u.nauth != 1)
+      return 1;
+    if (plugin())
+    {
+      if (have_password())
+        password()->reset();
+      plugin()->store(u.auth->plugin.str, u.auth->plugin.length, system_charset_info);
+      authstr()->store(u.auth->auth_string.str, u.auth->auth_string.length, system_charset_info);
+    }
+    else
+    {
+      if (u.auth->plugin.str != native_password_plugin_name.str &&
+          u.auth->plugin.str != old_password_plugin_name.str)
+        return 1;
+      password()->store(u.auth->auth_string.str, u.auth->auth_string.length, system_charset_info);
+    }
+    return 0;
+  }
+
+  privilege_t get_access() const
+  {
+    privilege_t access(Grant_table_base::get_access());
+    if ((num_fields() <= 13) && (access & CREATE_ACL))
+      access|=REFERENCES_ACL | INDEX_ACL | ALTER_ACL;
+
+    if (num_fields() <= 18)
+    {
+      access|= LOCK_TABLES_ACL | CREATE_TMP_ACL | SHOW_DB_ACL;
+      if (access & FILE_ACL)
+        access|= BINLOG_MONITOR_ACL | REPL_SLAVE_ACL | BINLOG_ADMIN_ACL |
+                 BINLOG_REPLAY_ACL;
+      if (access & PROCESS_ACL)
+        access|= SUPER_ACL | EXECUTE_ACL;
+    }
+
+    if (num_fields() <= 31 && (access & CREATE_ACL))
+      access|= (CREATE_VIEW_ACL | SHOW_VIEW_ACL);
+
+    if (num_fields() <= 33)
+    {
+      if (access & CREATE_ACL)
+        access|= CREATE_PROC_ACL;
+      if (access & ALTER_ACL)
+        access|= ALTER_PROC_ACL;
+    }
+
+    if (num_fields() <= 36 && (access & GRANT_ACL))
+      access|= CREATE_USER_ACL;
+
+    if (num_fields() <= 37 && (access & SUPER_ACL))
+      access|= EVENT_ACL;
+
+    if (num_fields() <= 38 && (access & SUPER_ACL))
+      access|= TRIGGER_ACL;
+
+    if (num_fields() <= 46 && (access & DELETE_ACL))
+      access|= DELETE_HISTORY_ACL;
+
+    if (access & SUPER_ACL)
+      access|= GLOBAL_SUPER_ADDED_SINCE_USER_TABLE_ACLS;
+
+    /*
+      The SHOW SLAVE HOSTS statement :
+      - required REPLICATION SLAVE privilege prior to 10.5.2
+      - requires REPLICATION MASTER ADMIN privilege since 10.5.2
+      There is no a way to GRANT MASTER ADMIN with User_table_tabular.
+      So let's automatically add REPLICATION MASTER ADMIN for all users
+      that had REPLICATION SLAVE. This will allow to do SHOW SLAVE HOSTS.
+    */
+    if (access & REPL_SLAVE_ACL)
+      access|= REPL_MASTER_ADMIN_ACL;
+
+    if (access & REPL_SLAVE_ACL)
+      access|= SLAVE_MONITOR_ACL;
+
+    return access & GLOBAL_ACLS;
+  }
+
+  void set_access(const privilege_t rights, bool revoke) const
+  {
+    ulonglong priv(SELECT_ACL);
+    for (uint i= start_priv_columns; i < end_priv_columns; i++, priv <<= 1)
+    {
+      if (priv & rights)
+        m_table->field[i]->store(1 + !revoke, 0);
+    }
+  }
+
+  SSL_type get_ssl_type () const
+  {
+    Field *f= get_field(end_priv_columns, MYSQL_TYPE_ENUM);
+    return (SSL_type)(f ? f->val_int()-1 : 0);
+  }
+  int set_ssl_type (SSL_type x) const
+  {
+    if (Field *f= get_field(end_priv_columns, MYSQL_TYPE_ENUM))
+      return f->store(x+1, 0);
+    else
+      return 1;
+  }
+  const char* get_ssl_cipher (MEM_ROOT *root) const
+  {
+    Field *f= get_field(end_priv_columns + 1, MYSQL_TYPE_BLOB);
+    return f ? ::get_field(root,f) : 0;
+  }
+  int set_ssl_cipher (const char *s, size_t l) const
+  {
+    if (Field *f= get_field(end_priv_columns + 1, MYSQL_TYPE_BLOB))
+      return f->store(s, l, &my_charset_latin1);
+    else
+      return 1;
+  }
+  const char* get_x509_issuer (MEM_ROOT *root) const
+  {
+    Field *f= get_field(end_priv_columns + 2, MYSQL_TYPE_BLOB);
+    return f ? ::get_field(root,f) : 0;
+  }
+  int set_x509_issuer (const char *s, size_t l) const
+  {
+    if (Field *f= get_field(end_priv_columns + 2, MYSQL_TYPE_BLOB))
+      return f->store(s, l, &my_charset_latin1);
+    else
+      return 1;
+  }
+  const char* get_x509_subject (MEM_ROOT *root) const
+  {
+    Field *f= get_field(end_priv_columns + 3, MYSQL_TYPE_BLOB);
+    return f ? ::get_field(root,f) : 0;
+  }
+  int set_x509_subject (const char *s, size_t l) const
+  {
+    if (Field *f= get_field(end_priv_columns + 3, MYSQL_TYPE_BLOB))
+      return f->store(s, l, &my_charset_latin1);
+    else
+      return 1;
+  }
+  longlong get_max_questions () const
+  {
+    Field *f= get_field(end_priv_columns + 4, MYSQL_TYPE_LONG);
+    return f ? f->val_int() : 0;
+  }
+  int set_max_questions (longlong x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 4, MYSQL_TYPE_LONG))
+      return f->store(x, 0);
+    else
+      return 1;
+  }
+  longlong get_max_updates () const
+  {
+    Field *f= get_field(end_priv_columns + 5, MYSQL_TYPE_LONG);
+    return f ? f->val_int() : 0;
+  }
+  int set_max_updates (longlong x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 5, MYSQL_TYPE_LONG))
+      return f->store(x, 0);
+    else
+      return 1;
+  }
+  longlong get_max_connections () const
+  {
+    Field *f= get_field(end_priv_columns + 6, MYSQL_TYPE_LONG);
+    return f ? f->val_int() : 0;
+  }
+  int set_max_connections (longlong x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 6, MYSQL_TYPE_LONG))
+      return f->store(x, 0);
+    else
+      return 1;
+  }
+  longlong get_max_user_connections () const
+  {
+    Field *f= get_field(end_priv_columns + 7, MYSQL_TYPE_LONG);
+    return f ? f->val_int() : 0;
+  }
+  int set_max_user_connections (longlong x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 7, MYSQL_TYPE_LONG))
+      return f->store(x, 0);
+    else
+      return 1;
+  }
+  double get_max_statement_time () const
+  {
+    Field *f= get_field(end_priv_columns + 13, MYSQL_TYPE_NEWDECIMAL);
+    return f ? f->val_real() : 0;
+  }
+  int set_max_statement_time (double x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 13, MYSQL_TYPE_NEWDECIMAL))
+      return f->store(x);
+    else
+      return 1;
+  }
+  bool get_is_role () const
+  {
+    Field *f= get_field(end_priv_columns + 11, MYSQL_TYPE_ENUM);
+    return f ? f->val_int()-1 : 0;
+  }
+  int set_is_role (bool x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 11, MYSQL_TYPE_ENUM))
+      return f->store(x+1, 0);
+    else
+      return 1;
+  }
+  const char* get_default_role (MEM_ROOT *root) const
+  {
+    Field *f= get_field(end_priv_columns + 12, MYSQL_TYPE_STRING);
+    return f ? ::get_field(root,f) : 0;
+  }
+  int set_default_role (const char *s, size_t l) const
+  {
+    if (Field *f= get_field(end_priv_columns + 12, MYSQL_TYPE_STRING))
+      return f->store(s, l, system_charset_info);
+    else
+      return 1;
+  }
+  /* On a MariaDB 10.3 user table, the account locking accessors will try to
+     get the content of the max_statement_time column, but they will fail due
+     to the typecheck in get_field. */
+  bool get_account_locked () const
+  {
+    Field *f= get_field(end_priv_columns + 13, MYSQL_TYPE_ENUM);
+    return f ? f->val_int()-1 : 0;
+  }
+  int set_account_locked (bool x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 13, MYSQL_TYPE_ENUM))
+      return f->store(x+1, 0);
+
+    return 1;
+  }
+
+  bool get_password_expired () const
+  {
+    uint field_num= end_priv_columns + 10;
+
+    Field *f= get_field(field_num, MYSQL_TYPE_ENUM);
+    return f ? f->val_int()-1 : 0;
+  }
+  int set_password_expired (bool x) const
+  {
+    uint field_num= end_priv_columns + 10;
+
+    if (Field *f= get_field(field_num, MYSQL_TYPE_ENUM))
+      return f->store(x+1, 0);
+    return 1;
+  }
+  my_time_t get_password_last_changed () const
+  {
+    ulong unused_dec;
+    if (Field *f= get_field(end_priv_columns + 11, MYSQL_TYPE_TIMESTAMP2))
+      return f->get_timestamp(&unused_dec);
+    return 0;
+  }
+  int set_password_last_changed (my_time_t x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 11, MYSQL_TYPE_TIMESTAMP2))
+    {
+      f->set_notnull();
+      return f->store_timestamp(x, 0);
+    }
+    return 1;
+  }
+  longlong get_password_lifetime () const
+  {
+    if (Field *f= get_field(end_priv_columns + 12, MYSQL_TYPE_SHORT))
+    {
+      if (f->is_null())
+        return -1;
+      return f->val_int();
+    }
+    return 0;
+  }
+  int set_password_lifetime (longlong x) const
+  {
+    if (Field *f= get_field(end_priv_columns + 12, MYSQL_TYPE_SHORT))
+    {
+      if (x < 0)
+      {
+        f->set_null();
+        return 0;
+      }
+      f->set_notnull();
+      return f->store(x, 0);
+    }
+    return 1;
+  }
+
+  virtual ~User_table_tabular() = default;
+ private:
+  friend class Grant_tables;
+
+  /* Only Grant_tables can instantiate this class. */
+  User_table_tabular() { min_columns= 13; /* As in 3.20.13 */ }
+
+  /* The user table is a bit different compared to the other Grant tables.
+     Usually, we only add columns to the grant tables when adding functionality.
+     This makes it easy to test which version of the table we are using, by
+     just looking at the number of fields present in the table.
+
+     In MySQL 5.7.6 the Password column was removed. We need to guard for that.
+     The field-fetching methods for the User table return NULL if the field
+     doesn't exist. This simplifies checking of table "version", as we don't
+     have to make use of num_fields() any more.
+  */
+  inline Field* get_field(uint field_num, enum enum_field_types type) const
+  {
+    if (field_num >= num_fields())
+      return NULL;
+    Field *f= m_table->field[field_num];
+    return f->real_type() == type ? f : NULL;
+  }
+
+  int setup_sysvars() const
+  {
+    username_char_length= MY_MIN(m_table->field[1]->char_length(),
+                                 USERNAME_CHAR_LENGTH);
+    using_global_priv_table= false;
+
+    if (have_password()) // Password column might be missing. (MySQL 5.7.6+)
+    {
+      int password_length= password()->field_length /
+                           password()->charset()->mbmaxlen;
+      if (password_length < SCRAMBLED_PASSWORD_CHAR_LENGTH_323)
+      {
+        sql_print_error("Fatal error: mysql.user table is damaged or in "
+                        "unsupported 3.20 format.");
+        return 1;
+      }
+
+      mysql_mutex_lock(&LOCK_global_system_variables);
+      if (password_length < SCRAMBLED_PASSWORD_CHAR_LENGTH)
+      {
+        if (opt_secure_auth)
+        {
+          mysql_mutex_unlock(&LOCK_global_system_variables);
+          sql_print_error("Fatal error: mysql.user table is in old format, "
+                          "but server started with --secure-auth option.");
+          return 1;
+        }
+        mysql_user_table_is_in_short_password_format= true;
+        if (global_system_variables.old_passwords)
+          mysql_mutex_unlock(&LOCK_global_system_variables);
+        else
+        {
+          extern sys_var *Sys_old_passwords_ptr;
+          Sys_old_passwords_ptr->value_origin= sys_var::AUTO;
+          global_system_variables.old_passwords= 1;
+          mysql_mutex_unlock(&LOCK_global_system_variables);
+          sql_print_warning("mysql.user table is not updated to new password format; "
+                            "Disabling new password usage until "
+                            "mysql_fix_privilege_tables is run");
+        }
+        m_table->in_use->variables.old_passwords= 1;
+      }
+      else
+      {
+        mysql_user_table_is_in_short_password_format= false;
+        mysql_mutex_unlock(&LOCK_global_system_variables);
+      }
+    }
+    return 0;
+  }
+
+  /* Normally password column is the third column in the table. If privileges
+     start on the third column instead, we are missing the password column.
+     This means we are using a MySQL 5.7.6+ data directory. */
+  bool have_password() const { return start_priv_columns == 3; }
+
+  Field* password() const { return m_table->field[2]; }
+  Field* plugin() const   { return get_field(end_priv_columns + 8, MYSQL_TYPE_STRING); }
+  Field* authstr() const  { return get_field(end_priv_columns + 9, MYSQL_TYPE_BLOB); }
+};
+
+/*
+  MariaDB 10.4 and up `global_priv` table
+
+  TODO possible optimizations:
+  * update json in-place if the new value can fit
+  * don't repeat get_value for every key, but use a streaming parser
+    to convert json into in-memory object (ACL_USER?) in one json scan.
+    - this makes sense for acl_load(), but hardly for GRANT
+  * similarly, pack ACL_USER (?) into json in one go.
+    - doesn't make sense? GRANT rarely updates more than one field.
+*/
+class User_table_json: public User_table
+{
+  LEX_CSTRING& name() const { return MYSQL_TABLE_NAME[USER_TABLE]; }
+
+  int get_auth(THD *thd, MEM_ROOT *root, ACL_USER *u) const
+  {
+    size_t array_len;
+    const char *array;
+    int vl;
+    const char *v;
+
+    if (get_value("auth_or", JSV_ARRAY, &array, &array_len))
+    {
+      u->alloc_auth(root, 1);
+      return get_auth1(thd, root, u, 0);
+    }
+
+    if (json_get_array_item(array, array + array_len, (int)array_len,
+                            &v, &vl) != JSV_NOTHING)
+      return 1;
+    u->alloc_auth(root, vl);
+    for (uint i=0; i < u->nauth; i++)
+    {
+      if (json_get_array_item(array, array + array_len, i, &v, &vl) != JSV_OBJECT)
+        return 1;
+
+      const char *p, *a;
+      int pl, al;
+      switch (json_get_object_key(v, v + vl, "plugin", &p, &pl)) {
+      case JSV_STRING: u->auth[i].plugin.str= strmake_root(root, p, pl);
+                       u->auth[i].plugin.length= pl;
+                       break;
+      case JSV_NOTHING: if (get_auth1(thd, root, u, i))
+                          return 1;
+                        else
+                          continue;
+      default: return 1;
+      }
+      switch (json_get_object_key(v, v + vl, "authentication_string", &a, &al)) {
+      case JSV_NOTHING: u->auth[i].auth_string= empty_clex_str;
+                        break;
+      case JSV_STRING: u->auth[i].auth_string.str= strmake_root(root, a, al);
+                       u->auth[i].auth_string.length= al;
+                       break;
+      default: return 1;
+      }
+    }
+    return 0;
+  }
+
+  int get_auth1(THD *thd, MEM_ROOT *root, ACL_USER *u, uint n) const
+  {
+    const char *authstr= get_str_value(root, "authentication_string");
+    const char *plugin= get_str_value(root, "plugin");
+    if (plugin && authstr)
+    {
+      if (plugin && *plugin)
+      {
+        u->auth[n].plugin.str= plugin;
+        u->auth[n].plugin.length= strlen(plugin);
+      }
+      else
+        u->auth[n].plugin= native_password_plugin_name;
+      u->auth[n].auth_string.str= authstr;
+      u->auth[n].auth_string.length= strlen(authstr);
+      return 0;
+    }
+    return 1;
+  }
+
+  bool append_str_value(String *to, const LEX_CSTRING &str) const
+  {
+    to->append('"');
+    to->reserve(str.length*2);
+    int len= json_escape(system_charset_info, (uchar*)str.str, (uchar*)str.str + str.length,
+                         to->charset(), (uchar*)to->end(), (uchar*)to->end() + str.length*2);
+    if (len < 0)
+      return 1;
+    to->length(to->length() + len);
+    to->append('"');
+    return 0;
+  }
+
+  bool set_auth(const ACL_USER &u) const
+  {
+    size_t array_len;
+    const char *array;
+    if (u.nauth == 1 && get_value("auth_or", JSV_ARRAY, &array, &array_len))
+      return set_auth1(u, 0);
+
+    StringBuffer json(m_table->field[2]->charset());
+    bool top_done = false;
+    json.append('[');
+    for (uint i=0; i < u.nauth; i++)
+    {
+      ACL_USER::AUTH * const auth= u.auth + i;
+      if (i)
+        json.append(',');
+      json.append('{');
+      if (!top_done &&
+          (auth->plugin.str == native_password_plugin_name.str ||
+           auth->plugin.str == old_password_plugin_name.str ||
+           i == u.nauth - 1))
+      {
+        if (set_auth1(u, i))
+          return 1;
+        top_done= true;
+      }
+      else
+      {
+        json.append(STRING_WITH_LEN("\"plugin\":"));
+        if (append_str_value(&json, auth->plugin))
+          return 1;
+        if (auth->auth_string.length)
+        {
+          json.append(STRING_WITH_LEN(",\"authentication_string\":"));
+          if (append_str_value(&json, auth->auth_string))
+            return 1;
+        }
+      }
+      json.append('}');
+    }
+    json.append(']');
+    return set_value("auth_or", json.ptr(), json.length(), false) == JSV_BAD_JSON;
+  }
+  bool set_auth1(const ACL_USER &u, uint i) const
+  {
+    return set_str_value("plugin",
+                         u.auth[i].plugin.str, u.auth[i].plugin.length) ||
+            set_str_value("authentication_string",
+                         u.auth[i].auth_string.str, u.auth[i].auth_string.length);
+  }
+
+  void print_warning_bad_version_id(ulonglong version_id) const
+  {
+    sql_print_warning("'user' entry '%s@%s' has a wrong 'version_id' value %lld",
+                      safe_str(get_user(current_thd->mem_root)),
+                      safe_str(get_host(current_thd->mem_root)),
+                      version_id);
+  }
+
+  void print_warning_bad_access(ulonglong version_id,
+                                privilege_t mask,
+                                ulonglong access) const
+  {
+    sql_print_warning("'user' entry '%s@%s' "
+                      "has a wrong 'access' value 0x%llx "
+                      "(allowed mask is 0x%llx, version_id=%lld)",
+                      safe_str(get_user(current_thd->mem_root)),
+                      safe_str(get_host(current_thd->mem_root)),
+                      access, mask, version_id);
+  }
+
+  privilege_t adjust_access(ulonglong version_id, ulonglong access) const
+  {
+    privilege_t mask= ALL_KNOWN_ACL_100304;
+    ulonglong orig_access= access;
+    if (version_id < 101100)
+    {
+      if (access & SUPER_ACL)
+        access|= READ_ONLY_ADMIN_ACL;
+    }
+    if (version_id >= 100509)
+    {
+      mask= ALL_KNOWN_ACL_100509;
+    }
+    else if (version_id >= 100502)
+    {
+      if (version_id >= 100508)
+        mask= ALL_KNOWN_ACL_100508;
+      else
+        mask= ALL_KNOWN_ACL_100502;
+      if (access & REPL_SLAVE_ADMIN_ACL)
+        access|= SLAVE_MONITOR_ACL;
+    }
+    else // 100501 or earlier
+    {
+      /*
+        Address changes in SUPER and REPLICATION SLAVE made in 10.5.2.
+        This also covers a special case: if the user had ALL PRIVILEGES before
+        the upgrade, it gets ALL PRIVILEGES after the upgrade.
+      */
+      if (access & SUPER_ACL)
+      {
+        if (access & REPL_SLAVE_ACL)
+        {
+          /*
+            The user could do both before the upgrade:
+            - set global variables       (because of SUPER_ACL)
+            - execute "SHOW SLAVE HOSTS" (because of REPL_SLAVE_ACL)
+            Grant all new privileges that were splitted from SUPER (in 10.5.2),
+            and REPLICATION MASTER ADMIN, so it still can do "SHOW SLAVE HOSTS".
+          */
+          access|= REPL_MASTER_ADMIN_ACL;
+        }
+        access|= GLOBAL_SUPER_ADDED_SINCE_USER_TABLE_ACLS;
+      }
+      /*
+        REPLICATION_CLIENT(BINLOG_MONITOR_ACL) should allow SHOW SLAVE STATUS
+        REPLICATION SLAVE should allow SHOW RELAYLOG EVENTS
+      */
+      if (access & BINLOG_MONITOR_ACL || access & REPL_SLAVE_ACL)
+        access|= SLAVE_MONITOR_ACL;
+    }
+
+    if (orig_access & ~mask)
+    {
+      print_warning_bad_access(version_id, mask, orig_access);
+      return NO_ACL;
+    }
+    return access & ALL_KNOWN_ACL;
+  }
+
+  privilege_t get_access() const
+  {
+    ulonglong version_id= (ulonglong) get_int_value("version_id");
+    ulonglong access= (ulonglong) get_int_value("access");
+
+    /*
+      Special case:
+      mysql_system_tables_data.sql populates "ALL PRIVILEGES"
+      for the super user this way:
+            {"access":18446744073709551615}
+    */
+    if (access == (ulonglong) ~0)
+      return GLOBAL_ACLS;
+
+    /*
+      Reject obviously bad (negative and too large) version_id values.
+      Also reject versions before 10.4.0 (when JSON table was added).
+    */
+    if ((longlong) version_id < 0 || version_id > 999999 ||
+        (version_id > 0 && version_id < 100400))
+    {
+      print_warning_bad_version_id(version_id);
+      return NO_ACL;
+    }
+    return adjust_access(version_id, access) & GLOBAL_ACLS;
+  }
+
+  void set_access(const privilege_t rights, bool revoke) const
+  {
+    privilege_t access= get_access();
+    if (revoke)
+      access&= ~rights;
+    else
+      access|= rights;
+    set_int_value("access", (longlong) (access & GLOBAL_ACLS));
+    set_int_value("version_id", (longlong) MYSQL_VERSION_ID);
+  }
+  const char *unsafe_str(const char *s) const
+  { return s[0] ? s : NULL; }
+
+  SSL_type get_ssl_type () const
+  { return (SSL_type)get_int_value("ssl_type"); }
+  int set_ssl_type (SSL_type x) const
+  { return set_int_value("ssl_type", x); }
+  const char* get_ssl_cipher (MEM_ROOT *root) const
+  { return unsafe_str(get_str_value(root, "ssl_cipher")); }
+  int set_ssl_cipher (const char *s, size_t l) const
+  { return set_str_value("ssl_cipher", s, l); }
+  const char* get_x509_issuer (MEM_ROOT *root) const
+  { return unsafe_str(get_str_value(root, "x509_issuer")); }
+  int set_x509_issuer (const char *s, size_t l) const
+  { return set_str_value("x509_issuer", s, l); }
+  const char* get_x509_subject (MEM_ROOT *root) const
+  { return unsafe_str(get_str_value(root, "x509_subject")); }
+  int set_x509_subject (const char *s, size_t l) const
+  { return set_str_value("x509_subject", s, l); }
+  longlong get_max_questions () const
+  { return get_int_value("max_questions"); }
+  int set_max_questions (longlong x) const
+  { return set_int_value("max_questions", x); }
+  longlong get_max_updates () const
+  { return get_int_value("max_updates"); }
+  int set_max_updates (longlong x) const
+  { return set_int_value("max_updates", x); }
+  longlong get_max_connections () const
+  { return get_int_value("max_connections"); }
+  int set_max_connections (longlong x) const
+  { return set_int_value("max_connections", x); }
+  longlong get_max_user_connections () const
+  { return get_int_value("max_user_connections"); }
+  int set_max_user_connections (longlong x) const
+  { return set_int_value("max_user_connections", x); }
+  double get_max_statement_time () const
+  { return get_double_value("max_statement_time"); }
+  int set_max_statement_time (double x) const
+  { return set_double_value("max_statement_time", x); }
+  bool get_is_role () const
+  { return get_bool_value("is_role"); }
+  int set_is_role (bool x) const
+  { return set_bool_value("is_role", x); }
+  const char* get_default_role (MEM_ROOT *root) const
+  { return get_str_value(root, "default_role"); }
+  int set_default_role (const char *s, size_t l) const
+  { return set_str_value("default_role", s, l); }
+  bool get_account_locked () const
+  { return get_bool_value("account_locked"); }
+  int set_account_locked (bool x) const
+  { return set_bool_value("account_locked", x); }
+  my_time_t get_password_last_changed () const
+  { return static_cast(get_int_value("password_last_changed")); }
+  int set_password_last_changed (my_time_t x) const
+  { return set_int_value("password_last_changed", static_cast(x)); }
+  int set_password_lifetime (longlong x) const
+  { return set_int_value("password_lifetime", x); }
+  longlong get_password_lifetime () const
+  { return get_int_value("password_lifetime", -1); }
+  /*
+     password_last_changed=0 means the password is manually expired.
+     In MySQL 5.7+ this state is described using the password_expired column
+     in mysql.user
+  */
+  bool get_password_expired () const
+  { return get_int_value("password_last_changed", -1) == 0; }
+  int set_password_expired (bool x) const
+  { return x ? set_password_last_changed(0) : 0; }
+
+  ~User_table_json() = default;
+ private:
+  friend class Grant_tables;
+  static const uint JSON_SIZE=1024;
+  int setup_sysvars() const
+  {
+    using_global_priv_table= true;
+    username_char_length= MY_MIN(m_table->field[1]->char_length(),
+                                 USERNAME_CHAR_LENGTH);
+    return 0;
+  }
+  bool get_value(const char *key, 
+                 enum json_types vt, const char **v, size_t *vl) const
+  {
+    enum json_types value_type;
+    int int_vl;
+    String str, *res= m_table->field[2]->val_str(&str);
+    if (!res ||
+        (value_type= json_get_object_key(res->ptr(), res->end(), key,
+                                             v, &int_vl)) == JSV_BAD_JSON)
+      return 1; // invalid
+    *vl= int_vl;
+    return value_type != vt;
+  }
+  const char *get_str_value(MEM_ROOT *root, const char *key) const
+  {
+    size_t value_len;
+    const char *value_start;
+    if (get_value(key, JSV_STRING, &value_start, &value_len))
+      return "";
+    char *ptr= (char*)alloca(value_len);
+    int len= json_unescape(m_table->field[2]->charset(),
+                           (const uchar*)value_start,
+                           (const uchar*)value_start + value_len,
+                           system_charset_info,
+                           (uchar*)ptr, (uchar*)ptr + value_len);
+    if (len < 0)
+      return NULL;
+    return strmake_root(root, ptr, len);
+  }
+  longlong get_int_value(const char *key, longlong def_val= 0) const
+  {
+    int err;
+    size_t value_len;
+    const char *value_start;
+    if (get_value(key, JSV_NUMBER, &value_start, &value_len))
+      return def_val;
+    const char *value_end= value_start + value_len;
+    return my_strtoll10(value_start, (char**)&value_end, &err);
+  }
+  double get_double_value(const char *key) const
+  {
+    int err;
+    size_t value_len;
+    const char *value_start;
+    if (get_value(key, JSV_NUMBER, &value_start, &value_len))
+      return 0;
+    const char *value_end= value_start + value_len;
+    return my_strtod(value_start, (char**)&value_end, &err);
+  }
+  bool get_bool_value(const char *key) const
+  {
+    size_t value_len;
+    const char *value_start;
+    if (get_value(key, JSV_TRUE, &value_start, &value_len))
+      return false;
+    return true;
+  }
+  enum json_types set_value(const char *key,
+                            const char *val, size_t vlen, bool string) const
+  {
+    int value_len;
+    const char *value_start;
+    enum json_types value_type;
+    String str, *res= m_table->field[2]->val_str(&str);
+    if (!res || !res->length())
+      (res= &str)->set(STRING_WITH_LEN("{}"), m_table->field[2]->charset());
+    value_type= json_get_object_key(res->ptr(), res->end(), key,
+                                    &value_start, &value_len);
+    if (value_type == JSV_BAD_JSON)
+      return value_type; // invalid
+    StringBuffer json(res->charset());
+    json.copy(res->ptr(), value_start - res->ptr(), res->charset());
+    if (value_type == JSV_NOTHING)
+    {
+      if (value_len)
+        json.append(',');
+      json.append('"');
+      json.append(key, strlen(key));
+      json.append(STRING_WITH_LEN("\":"));
+      if (string)
+        json.append('"');
+    }
+    else
+      value_start+= value_len;
+    json.append(val, vlen);
+    if (!value_type && string)
+      json.append('"');
+    json.append(value_start, res->end() - value_start);
+    DBUG_ASSERT(json_valid(json.ptr(), json.length(), json.charset()));
+    m_table->field[2]->store(json.ptr(), json.length(), json.charset());
+    return value_type;
+  }
+  bool set_str_value(const char *key, const char *val, size_t vlen) const
+  {
+    char buf[JSON_SIZE];
+    int blen= json_escape(system_charset_info,
+                          (const uchar*)val, (const uchar*)val + vlen,
+                          m_table->field[2]->charset(),
+                          (uchar*)buf, (uchar*)buf+sizeof(buf));
+    if (blen < 0)
+      return 1;
+    return set_value(key, buf, blen, true) == JSV_BAD_JSON;
+  }
+  bool set_int_value(const char *key, longlong val) const
+  {
+    char v[MY_INT64_NUM_DECIMAL_DIGITS+1];
+    size_t vlen= longlong10_to_str(val, v, -10) - v;
+    return set_value(key, v, vlen, false) == JSV_BAD_JSON;
+  }
+  bool set_double_value(const char *key, double val) const
+  {
+    char v[FLOATING_POINT_BUFFER+1];
+    size_t vlen= my_fcvt(val, TIME_SECOND_PART_DIGITS, v, NULL);
+    return set_value(key, v, vlen, false) == JSV_BAD_JSON;
+  }
+  bool set_bool_value(const char *key, bool val) const
+  {
+    return set_value(key, val ? "true" : "false", val ? 4 : 5, false) == JSV_BAD_JSON;
+  }
+};
+
+class Db_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* db() const { return m_table->field[1]; }
+  Field* user() const { return m_table->field[2]; }
+
+ private:
+  friend class Grant_tables;
+
+  Db_table() { min_columns= 9; /* as in 3.20.13 */ }
+};
+
+class Tables_priv_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* db() const { return m_table->field[1]; }
+  Field* user() const { return m_table->field[2]; }
+  Field* table_name() const { return m_table->field[3]; }
+  Field* grantor() const { return m_table->field[4]; }
+  Field* timestamp() const { return m_table->field[5]; }
+  Field* table_priv() const { return m_table->field[6]; }
+  Field* column_priv() const { return m_table->field[7]; }
+
+ private:
+  friend class Grant_tables;
+
+  Tables_priv_table() { min_columns= 8; /* as in 3.22.26a */ }
+};
+
+class Columns_priv_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* db() const { return m_table->field[1]; }
+  Field* user() const { return m_table->field[2]; }
+  Field* table_name() const { return m_table->field[3]; }
+  Field* column_name() const { return m_table->field[4]; }
+  Field* timestamp() const { return m_table->field[5]; }
+  Field* column_priv() const { return m_table->field[6]; }
+
+ private:
+  friend class Grant_tables;
+
+  Columns_priv_table() { min_columns= 7; /* as in 3.22.26a */ }
+};
+
+class Host_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* db() const { return m_table->field[1]; }
+
+ private:
+  friend class Grant_tables;
+
+  Host_table() { min_columns= 8; /* as in 3.20.13 */ }
+};
+
+class Procs_priv_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* db() const { return m_table->field[1]; }
+  Field* user() const { return m_table->field[2]; }
+  Field* routine_name() const { return m_table->field[3]; }
+  Field* routine_type() const { return m_table->field[4]; }
+  Field* grantor() const { return m_table->field[5]; }
+  Field* proc_priv() const { return m_table->field[6]; }
+  Field* timestamp() const { return m_table->field[7]; }
+
+ private:
+  friend class Grant_tables;
+
+  Procs_priv_table() { min_columns=8; }
+};
+
+class Proxies_priv_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* user() const { return m_table->field[1]; }
+  Field* proxied_host() const { return m_table->field[2]; }
+  Field* proxied_user() const { return m_table->field[3]; }
+  Field* with_grant() const { return m_table->field[4]; }
+  Field* grantor() const { return m_table->field[5]; }
+  Field* timestamp() const { return m_table->field[6]; }
+
+ private:
+  friend class Grant_tables;
+
+  Proxies_priv_table() { min_columns= 7; }
+};
+
+class Roles_mapping_table: public Grant_table_base
+{
+ public:
+  Field* host() const { return m_table->field[0]; }
+  Field* user() const { return m_table->field[1]; }
+  Field* role() const { return m_table->field[2]; }
+  Field* admin_option() const { return m_table->field[3]; }
+
+ private:
+  friend class Grant_tables;
+
+  Roles_mapping_table() { min_columns= 4; }
+};
+
+/**
+  Class that represents a collection of grant tables.
+*/
+class Grant_tables
+{
+ public:
+  Grant_tables() : p_user_table(&m_user_table_json) { }
+
+  /**
+    An auxiliary to build a list of involved tables.
+
+    @retval  0 Success
+    @retval -1 A my_error reported error
+   */
+  int build_table_list(THD *thd, TABLE_LIST** ptr_first,
+                       int which_tables, enum thr_lock_type lock_type,
+                       TABLE_LIST *tables)
+  {
+    DBUG_ENTER("Grant_tables::build_table_list");
+
+    DBUG_ASSERT(which_tables); /* At least one table must be opened. */
+    /*
+       We can read privilege tables even when !initialized.
+       This can be acl_load() - server startup or FLUSH PRIVILEGES
+       */
+    if (lock_type >= TL_FIRST_WRITE && !initialized)
+    {
+      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
+      DBUG_RETURN(-1);
+    }
+
+    for (int i=USER_TABLE; i >=0; i--)
+    {
+      TABLE_LIST *tl= tables + i;
+      if (which_tables & (1 << i))
+      {
+        tl->init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_TABLE_NAME[i],
+                           NULL, lock_type);
+        tl->open_type= OT_BASE_ONLY;
+        tl->i_s_requested_object= OPEN_TABLE_ONLY;
+        tl->updating= lock_type >= TL_FIRST_WRITE;
+        if (i >= FIRST_OPTIONAL_TABLE)
+          tl->open_strategy= TABLE_LIST::OPEN_IF_EXISTS;
+        tl->next_global= tl->next_local= *ptr_first;
+        *ptr_first= tl;
+      }
+      else
+        tl->table= NULL;
+    }
+    DBUG_RETURN(0);
+  }
+
+  int open_and_lock(THD *thd, int which_tables, enum thr_lock_type lock_type)
+  {
+    DBUG_ENTER("Grant_tables::open_and_lock");
+
+    TABLE_LIST tables[USER_TABLE+1], *first= NULL;
+
+    if (build_table_list(thd, &first, which_tables, lock_type, tables))
+      DBUG_RETURN(-1);
+
+    uint counter;
+    int res= really_open(thd, first, &counter);
+
+    /* if User_table_json wasn't found, let's try User_table_tabular */
+    if (!res && (which_tables & Table_user) && !tables[USER_TABLE].table)
+    {
+      uint unused;
+      TABLE_LIST *tl= tables + USER_TABLE;
+      TABLE *backup_open_tables= thd->open_tables;
+      thd->set_open_tables(NULL);
+
+      tl->init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_TABLE_NAME_USER,
+                         NULL, lock_type);
+      tl->open_type= OT_BASE_ONLY;
+      tl->i_s_requested_object= OPEN_TABLE_ONLY;
+      tl->updating= lock_type >= TL_FIRST_WRITE;
+      p_user_table= &m_user_table_tabular;
+      counter++;
+      res= really_open(thd, tl, &unused);
+      thd->set_open_tables(backup_open_tables);
+      if (tables[USER_TABLE].table)
+      {
+        tables[USER_TABLE].table->next= backup_open_tables;
+        thd->set_open_tables(tables[USER_TABLE].table);
+      }
+    }
+    if (res)
+      DBUG_RETURN(res);
+
+    if (lock_tables(thd, first, counter,
+                    MYSQL_LOCK_IGNORE_TIMEOUT |
+                    MYSQL_OPEN_IGNORE_LOGGING_FORMAT))
+      DBUG_RETURN(-1);
+
+    p_user_table->set_table(tables[USER_TABLE].table);
+    m_db_table.set_table(tables[DB_TABLE].table);
+    m_tables_priv_table.set_table(tables[TABLES_PRIV_TABLE].table);
+    m_columns_priv_table.set_table(tables[COLUMNS_PRIV_TABLE].table);
+    m_host_table.set_table(tables[HOST_TABLE].table);
+    m_procs_priv_table.set_table(tables[PROCS_PRIV_TABLE].table);
+    m_proxies_priv_table.set_table(tables[PROXIES_PRIV_TABLE].table);
+    m_roles_mapping_table.set_table(tables[ROLES_MAPPING_TABLE].table);
+    DBUG_RETURN(0);
+  }
+
+  inline const User_table& user_table() const
+  { return *p_user_table; }
+
+  inline const Db_table& db_table() const
+  { return m_db_table; }
+
+  inline const Tables_priv_table& tables_priv_table() const
+  { return m_tables_priv_table; }
+
+  inline const Columns_priv_table& columns_priv_table() const
+  { return m_columns_priv_table; }
+
+  inline const Host_table& host_table() const
+  { return m_host_table; }
+
+  inline const Procs_priv_table& procs_priv_table() const
+  { return m_procs_priv_table; }
+
+  inline const Proxies_priv_table& proxies_priv_table() const
+  { return m_proxies_priv_table; }
+
+  inline const Roles_mapping_table& roles_mapping_table() const
+  { return m_roles_mapping_table; }
+
+#ifdef HAVE_REPLICATION
+  /**
+    Checks if the tables targeted by a grant command should be ignored because
+    of the configured replication filters
+
+    @retval 1 Tables are excluded for replication
+    @retval 0 tables are included for replication
+  */
+  int rpl_ignore_tables(THD *thd, TABLE_LIST* tables, int which_tables= 0,
+                        enum thr_lock_type lock_type= TL_IGNORE)
+  {
+    DBUG_ENTER("Grant_tables::rpl_ignore_tables");
+
+    if (!(thd->slave_thread && !thd->spcont))
+      DBUG_RETURN(0);
+
+    TABLE_LIST all_tables[USER_TABLE+1];
+
+    if (!tables)
+    {
+      int rc __attribute__((unused))=
+        build_table_list(thd, &tables, which_tables, lock_type, all_tables);
+
+      DBUG_ASSERT(!rc);  // Grant_tables must be already initialized
+      DBUG_ASSERT(tables);
+    }
+
+    if (tables->lock_type >= TL_FIRST_WRITE)
+    {
+      /*
+        GRANT and REVOKE are applied the slave in/exclusion rules as they are
+        some kind of updates to the mysql.% tables.
+      */
+      Rpl_filter *rpl_filter= thd->system_thread_info.rpl_sql_info->rpl_filter;
+      if (rpl_filter->is_on() && !rpl_filter->tables_ok(0, tables))
+      {
+        thd->slave_expected_error= 0;
+        DBUG_RETURN(1);
+      }
+    }
+    DBUG_RETURN(0);
+  }
+#endif
+
+ private:
+
+  /* Before any operation is possible on grant tables, they must be opened.
+
+     @retval  1 replication filters matched. Abort the operation,
+                but return OK (!)
+     @retval  0 tables were opened successfully
+     @retval -1 error, tables could not be opened
+  */
+  int really_open(THD *thd, TABLE_LIST* tables, uint *counter)
+  {
+    DBUG_ENTER("Grant_tables::really_open:");
+#ifdef HAVE_REPLICATION
+    if (rpl_ignore_tables(thd, tables))
+    {
+      DBUG_RETURN(1);
+    }
+#endif
+    if (open_tables(thd, &tables, counter, MYSQL_LOCK_IGNORE_TIMEOUT))
+      DBUG_RETURN(-1);
+    DBUG_RETURN(0);
+  }
+
+  User_table *p_user_table;
+  User_table_json m_user_table_json;
+  User_table_tabular m_user_table_tabular;
+  Db_table m_db_table;
+  Tables_priv_table m_tables_priv_table;
+  Columns_priv_table m_columns_priv_table;
+  Host_table m_host_table;
+  Procs_priv_table m_procs_priv_table;
+  Proxies_priv_table m_proxies_priv_table;
+  Roles_mapping_table m_roles_mapping_table;
+};
+
+
+void ACL_PROXY_USER::init(const Proxies_priv_table& proxies_priv_table,
+                          MEM_ROOT *mem)
+{
+  init(get_field(mem, proxies_priv_table.host()),
+       safe_str(get_field(mem, proxies_priv_table.user())),
+       get_field(mem, proxies_priv_table.proxied_host()),
+       safe_str(get_field(mem, proxies_priv_table.proxied_user())),
+       proxies_priv_table.with_grant()->val_int() != 0);
+}
+
+
+/*
+ Enumeration of various ACL's and Hashes used in handle_grant_struct()
+*/
+enum enum_acl_lists
+{
+  USER_ACL= 0,
+  ROLE_ACL,
+  DB_ACL,
+  COLUMN_PRIVILEGES_HASH,
+  PROC_PRIVILEGES_HASH,
+  FUNC_PRIVILEGES_HASH,
+  PACKAGE_SPEC_PRIVILEGES_HASH,
+  PACKAGE_BODY_PRIVILEGES_HASH,
+  PROXY_USERS_ACL,
+  ROLES_MAPPINGS_HASH
+};
+
+ACL_ROLE::ACL_ROLE(ACL_USER *user)
+ :
+  /* set initial role access the same as the table row privileges */
+  initial_role_access(user->access),
+  counter(0)
+{
+  access= user->access;
+  this->user= user->user;
+  bzero(&parent_grantee, sizeof(parent_grantee));
+  flags= IS_ROLE;
+}
+
+ACL_ROLE::ACL_ROLE(const char *rolename, privilege_t privileges, MEM_ROOT *root)
+  : initial_role_access(privileges), counter(0)
+{
+  this->access= initial_role_access;
+  if (is_public(rolename))
+    this->user= public_name;
+  else
+  {
+    this->user.str= safe_strdup_root(root, rolename);
+    this->user.length= strlen(rolename);
+  }
+  bzero(&parent_grantee, sizeof(parent_grantee));
+  flags= IS_ROLE;
+}
+
+enum role_name_check_result
+{
+  ROLE_NAME_OK= 0,
+  ROLE_NAME_PUBLIC,
+  ROLE_NAME_INVALID
+};
+
+static role_name_check_result check_role_name(LEX_CSTRING *str,
+                                              bool public_is_ok)
+{
+  if (str->length)
+  {
+    if (str->length == public_name.length &&
+        strcasecmp(str->str, public_name.str) == 0)
+    {
+      *str= public_name;
+      if (public_is_ok)
+        return ROLE_NAME_PUBLIC;
+      else
+        goto error;
+    }
+    if (str->length != none.length || strcasecmp(str->str, none.str) != 0)
+      return ROLE_NAME_OK;
+  }
+
+error:
+  my_error(ER_INVALID_ROLE, MYF(0), str->str);
+  return ROLE_NAME_INVALID;
+}
+
+
+static void free_acl_user(ACL_USER *user)
+{
+  delete_dynamic(&(user->role_grants));
+}
+
+static void free_acl_role(ACL_ROLE *role)
+{
+  delete_dynamic(&(role->role_grants));
+  delete_dynamic(&(role->parent_grantee));
+}
+
+static my_bool check_if_exists(THD *, plugin_ref, void *)
+{
+  return TRUE;
+}
+
+static bool has_validation_plugins()
+{
+  return plugin_foreach(NULL, check_if_exists,
+                        MariaDB_PASSWORD_VALIDATION_PLUGIN, NULL);
+}
+
+struct validation_data { const LEX_CSTRING *user, *password, *host; };
+
+static my_bool do_validate(THD *, plugin_ref plugin, void *arg)
+{
+  struct validation_data *data= (struct validation_data *)arg;
+  struct st_mariadb_password_validation *handler=
+    (st_mariadb_password_validation *)plugin_decl(plugin)->info;
+  if (handler->validate_password(data->user, data->password, data->host))
+  {
+    my_error(ER_NOT_VALID_PASSWORD, MYF(0), plugin_ref_to_int(plugin)->name.str);
+    return true;
+  }
+  return false;
+}
+
+
+static bool validate_password(THD *thd, const LEX_CSTRING &user,
+                              const LEX_CSTRING &host,
+                              const LEX_CSTRING &pwtext, bool has_hash)
+{
+  if (pwtext.length || !has_hash)
+  {
+    struct validation_data data= { &user,
+                                   pwtext.str ? &pwtext : &empty_clex_str,
+                                   &host };
+    if (plugin_foreach(NULL, do_validate,
+                       MariaDB_PASSWORD_VALIDATION_PLUGIN, &data))
+    {
+      return true;
+    }
+  }
+  else
+  {
+    if (!thd->slave_thread &&
+        strict_password_validation && has_validation_plugins()
+#ifdef WITH_WSREP
+        && !thd->wsrep_applier
+#endif
+       )
+    {
+      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--strict-password-validation");
+      return true;
+    }
+  }
+  return false;
+}
+
+static int set_user_salt(ACL_USER::AUTH *auth, plugin_ref plugin)
+{
+  st_mysql_auth *info= (st_mysql_auth *) plugin_decl(plugin)->info;
+  if (info->interface_version >= 0x0202 && info->preprocess_hash &&
+      auth->auth_string.length)
+  {
+    uchar buf[MAX_SCRAMBLE_LENGTH];
+    size_t len= sizeof(buf);
+    if (info->preprocess_hash(auth->auth_string.str,
+                              auth->auth_string.length, buf, &len))
+      return 1;
+    auth->salt.str= (char*)memdup_root(&acl_memroot, buf, len);
+    auth->salt.length= len;
+  }
+  else
+    auth->salt= safe_lexcstrdup_root(&acl_memroot, auth->auth_string);
+
+  return 0;
+}
+
+/**
+  Fills in ACL_USER::auth_string and ACL_USER::salt fields, as needed
+
+  hashes the plain-text password (if provided) to auth_string,
+  converts auth_string to salt.
+
+  Fails if the plain-text password fails validation, if the plugin is
+  not loaded, if the auth_string is invalid, if the password is not applicable
+*/
+static int set_user_auth(THD *thd, const LEX_CSTRING &user,
+                         const LEX_CSTRING &host,
+                         ACL_USER::AUTH *auth, const LEX_CSTRING &pwtext)
+{
+  const char *plugin_name= auth->plugin.str;
+  bool unlock_plugin= false;
+  plugin_ref plugin= get_auth_plugin(thd, auth->plugin, &unlock_plugin);
+  int res= 1;
+
+  if (!plugin)
+  {
+    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                        ER_PLUGIN_IS_NOT_LOADED,
+                        ER_THD(thd, ER_PLUGIN_IS_NOT_LOADED), plugin_name);
+    return ER_PLUGIN_IS_NOT_LOADED;
+  }
+
+  auth->salt= auth->auth_string;
+
+  st_mysql_auth *info= (st_mysql_auth *) plugin_decl(plugin)->info;
+  if (info->interface_version < 0x0202)
+  {
+    res= pwtext.length ? ER_SET_PASSWORD_AUTH_PLUGIN : 0;
+    goto end;
+  }
+
+  if (thd->lex->sql_command == SQLCOM_SET_OPTION && !info->hash_password)
+  {
+    res= ER_SET_PASSWORD_AUTH_PLUGIN;
+    goto end;
+  }
+
+  if (info->hash_password &&
+      validate_password(thd, user, host, pwtext, auth->auth_string.length))
+  {
+    res= ER_NOT_VALID_PASSWORD;
+    goto end;
+  }
+  if (pwtext.length)
+  {
+    if (info->hash_password)
+    {
+      char buf[MAX_SCRAMBLE_LENGTH];
+      size_t len= sizeof(buf) - 1;
+      if (info->hash_password(pwtext.str, pwtext.length, buf, &len))
+      {
+        res= ER_OUTOFMEMORY;
+        goto end;
+      }
+      buf[len] = 0;
+      auth->auth_string.str= (char*)memdup_root(&acl_memroot, buf, len+1);
+      auth->auth_string.length= len;
+    }
+    else
+    {
+      res= ER_SET_PASSWORD_AUTH_PLUGIN;
+      goto end;
+    }
+  }
+  if (set_user_salt(auth, plugin))
+  {
+    res= ER_PASSWD_LENGTH;
+    goto end;
+  }
+
+  res= 0;
+end:
+  if (unlock_plugin)
+    plugin_unlock(thd, plugin);
+  return res;
+}
+
+
+/**
+  Lazily computes user's salt from the password hash
+*/
+static bool set_user_salt_if_needed(ACL_USER *user_copy, int curr_auth,
+                                    plugin_ref plugin)
+{
+  ACL_USER::AUTH *auth_copy= user_copy->auth + curr_auth;
+  DBUG_ASSERT(!strcasecmp(auth_copy->plugin.str, plugin_name(plugin)->str));
+
+  if (auth_copy->salt.str)
+    return 0; // already done
+
+  if (set_user_salt(auth_copy, plugin))
+    return 1;
+
+  mysql_mutex_lock(&acl_cache->lock);
+  ACL_USER *user= find_user_exact(user_copy->host.hostname, user_copy->user.str);
+  // make sure the user wasn't altered or dropped meanwhile
+  if (user)
+  {
+    ACL_USER::AUTH *auth= user->auth + curr_auth;
+    if (!auth->salt.str && auth->plugin.length == auth_copy->plugin.length &&
+        auth->auth_string.length == auth_copy->auth_string.length &&
+        !memcmp(auth->plugin.str, auth_copy->plugin.str, auth->plugin.length) &&
+        !memcmp(auth->auth_string.str, auth_copy->auth_string.str, auth->auth_string.length))
+      auth->salt= auth_copy->salt;
+  }
+  mysql_mutex_unlock(&acl_cache->lock);
+  return 0;
+}
+
+
+/**
+  Fix ACL::plugin pointer to point to a hard-coded string, if appropriate
+
+  Make sure that if ACL_USER's plugin is a built-in, then it points
+  to a hard coded string, not to an allocated copy. Run-time, for
+  authentication, we want to be able to detect built-ins by comparing
+  pointers, not strings.
+
+  @retval 0 the pointers were fixed
+  @retval 1 this ACL_USER uses a not built-in plugin
+*/
+static bool fix_user_plugin_ptr(ACL_USER::AUTH *auth)
+{
+  if (lex_string_eq(&auth->plugin, &native_password_plugin_name))
+    auth->plugin= native_password_plugin_name;
+  else
+  if (lex_string_eq(&auth->plugin, &old_password_plugin_name))
+    auth->plugin= old_password_plugin_name;
+  else
+    return true;
+  return false;
+}
+
+
+static bool get_YN_as_bool(Field *field)
+{
+  char buff[2];
+  String res(buff,sizeof(buff),&my_charset_latin1);
+  field->val_str(&res);
+  return res[0] == 'Y' || res[0] == 'y';
+}
+
+
+/*
+  Initialize structures responsible for user/db-level privilege checking and
+  load privilege information for them from tables in the 'mysql' database.
+
+  SYNOPSIS
+    acl_init()
+      dont_read_acl_tables  TRUE if we want to skip loading data from
+                            privilege tables and disable privilege checking.
+
+  NOTES
+    This function is mostly responsible for preparatory steps, main work
+    on initialization and grants loading is done in acl_reload().
+
+  RETURN VALUES
+    0	ok
+    1	Could not initialize grant's
+*/
+
+bool acl_init(bool dont_read_acl_tables)
+{
+  THD  *thd;
+  bool return_val;
+  DBUG_ENTER("acl_init");
+
+  acl_cache= new Hash_filo(key_memory_acl_cache, ACL_CACHE_SIZE, 0, 0,
+                           (my_hash_get_key) acl_entry_get_key,
+                           (my_hash_free_key) my_free,
+                           &my_charset_utf8mb3_bin);
+
+  /*
+    cache built-in native authentication plugins,
+    to avoid hash searches and a global mutex lock on every connect
+  */
+  native_password_plugin= my_plugin_lock_by_name(0,
+           &native_password_plugin_name, MYSQL_AUTHENTICATION_PLUGIN);
+  old_password_plugin= my_plugin_lock_by_name(0,
+           &old_password_plugin_name, MYSQL_AUTHENTICATION_PLUGIN);
+
+  if (!native_password_plugin || !old_password_plugin)
+    DBUG_RETURN(1);
+
+  if (dont_read_acl_tables)
+  {
+    DBUG_RETURN(0); /* purecov: tested */
+  }
+
+  /*
+    To be able to run this from boot, we allocate a temporary THD
+  */
+  if (!(thd=new THD(0)))
+    DBUG_RETURN(1); /* purecov: inspected */
+  thd->thread_stack= (char*) &thd;
+  thd->store_globals();
+  /*
+    It is safe to call acl_reload() since acl_* arrays and hashes which
+    will be freed there are global static objects and thus are initialized
+    by zeros at startup.
+  */
+  return_val= acl_reload(thd);
+  delete thd;
+  DBUG_RETURN(return_val);
+}
+
+static void push_new_user(const ACL_USER &user)
+{
+  push_dynamic(&acl_users, &user);
+  if (!user.host.hostname ||
+      (user.host.hostname[0] == wild_many && !user.host.hostname[1]))
+    allow_all_hosts=1;                  // Anyone can connect
+}
+
+
+/*
+  Initialize structures responsible for user/db-level privilege checking
+  and load information about grants from open privilege tables.
+
+  SYNOPSIS
+    acl_load()
+      thd     Current thread
+      tables  List containing open "mysql.host", "mysql.user",
+              "mysql.db", "mysql.proxies_priv" and "mysql.roles_mapping"
+              tables.
+
+  RETURN VALUES
+    FALSE  Success
+    TRUE   Error
+*/
+
+static bool acl_load(THD *thd, const Grant_tables& tables)
+{
+  READ_RECORD read_record_info;
+  char tmp_name[SAFE_NAME_LEN+1];
+  Sql_mode_save old_mode_save(thd);
+  DBUG_ENTER("acl_load");
+
+  thd->variables.sql_mode&= ~MODE_PAD_CHAR_TO_FULL_LENGTH;
+
+  grant_version++; /* Privileges updated */
+
+  const Host_table& host_table= tables.host_table();
+  init_sql_alloc(key_memory_acl_mem, &acl_memroot, ACL_ALLOC_BLOCK_SIZE, 0, MYF(0));
+  if (host_table.table_exists()) // "host" table may not exist (e.g. in MySQL 5.6.7+)
+  {
+    if (host_table.init_read_record(&read_record_info))
+      DBUG_RETURN(true);
+    while (!(read_record_info.read_record()))
+    {
+      ACL_HOST host;
+      update_hostname(&host.host, get_field(&acl_memroot, host_table.host()));
+      host.db= get_field(&acl_memroot, host_table.db());
+      if (lower_case_table_names && host.db)
+      {
+        /*
+          convert db to lower case and give a warning if the db wasn't
+          already in lower case
+        */
+        char *end = strnmov(tmp_name, host.db, sizeof(tmp_name));
+        if (end >= tmp_name + sizeof(tmp_name))
+        {
+          sql_print_warning(ER_THD(thd, ER_WRONG_DB_NAME), host.db);
+          continue;
+        }
+        my_casedn_str(files_charset_info, host.db);
+        if (strcmp(host.db, tmp_name) != 0)
+          sql_print_warning("'host' entry '%s|%s' had database in mixed "
+                            "case that has been forced to lowercase because "
+                            "lower_case_table_names is set. It will not be "
+                            "possible to remove this privilege using REVOKE.",
+                            host.host.hostname, host.db);
+      }
+      else if (!host.db)
+        host.db= const_cast(host_not_specified.str);
+      host.access= host_table.get_access();
+      host.access= fix_rights_for_db(host.access);
+      host.sort= get_magic_sort("hd", host.host.hostname, host.db);
+      if (opt_skip_name_resolve &&
+          hostname_requires_resolving(host.host.hostname))
+      {
+        sql_print_warning("'host' entry '%s|%s' "
+                        "ignored in --skip-name-resolve mode.",
+                         host.host.hostname, host.db);
+        continue;
+      }
+#ifndef TO_BE_REMOVED
+      if (host_table.num_fields() == 8)
+      {						// Without grant
+        if (host.access & CREATE_ACL)
+          host.access|=REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL;
+      }
+#endif
+      (void) push_dynamic(&acl_hosts,(uchar*) &host);
+    }
+    my_qsort((uchar*) dynamic_element(&acl_hosts, 0, ACL_HOST*),
+             acl_hosts.elements, sizeof(ACL_HOST),(qsort_cmp) acl_compare);
+    end_read_record(&read_record_info);
+  }
+  freeze_size(&acl_hosts);
+
+  const User_table& user_table= tables.user_table();
+  if (user_table.init_read_record(&read_record_info))
+    DBUG_RETURN(true);
+
+  allow_all_hosts=0;
+  while (!(read_record_info.read_record()))
+  {
+    ACL_USER user;
+    bool is_role= FALSE;
+    update_hostname(&user.host, user_table.get_host(&acl_memroot));
+    char *username= safe_str(user_table.get_user(&acl_memroot));
+    user.user.str= username;
+    user.user.length= strlen(username);
+
+    is_role= user_table.get_is_role();
+
+    user.access= user_table.get_access();
+
+    user.sort= get_magic_sort("hu", user.host.hostname, user.user.str);
+    user.hostname_length= safe_strlen(user.host.hostname);
+
+    my_init_dynamic_array(key_memory_acl_mem, &user.role_grants,
+                          sizeof(ACL_ROLE *), 0, 8, MYF(0));
+
+    user.account_locked= user_table.get_account_locked();
+
+    user.password_expired= user_table.get_password_expired();
+    user.password_last_changed= user_table.get_password_last_changed();
+    user.password_lifetime= user_table.get_password_lifetime();
+
+    if (is_role)
+    {
+      role_name_check_result result= check_role_name(&user.user, true);
+      if (result == ROLE_NAME_INVALID)
+      {
+        thd->clear_error(); // the warning is still issued
+        continue;
+      }
+
+      ACL_ROLE *entry= new (&acl_memroot) ACL_ROLE(&user);
+      entry->role_grants = user.role_grants;
+      my_init_dynamic_array(key_memory_acl_mem, &entry->parent_grantee,
+                            sizeof(ACL_USER_BASE *), 0, 8, MYF(0));
+      if (result == ROLE_NAME_PUBLIC)
+        acl_public= entry;
+
+      my_hash_insert(&acl_roles, (uchar *)entry);
+
+      continue;
+    }
+    else
+    {
+      if (opt_skip_name_resolve &&
+          hostname_requires_resolving(user.host.hostname))
+      {
+        sql_print_warning("'user' entry '%s@%s' "
+                          "ignored in --skip-name-resolve mode.", user.user.str,
+                          safe_str(user.host.hostname));
+        continue;
+      }
+
+      if (user_table.get_auth(thd, &acl_memroot, &user))
+        continue;
+      for (uint i= 0; i < user.nauth; i++)
+      {
+        ACL_USER::AUTH *auth= user.auth + i;
+        auth->salt= null_clex_str;
+        fix_user_plugin_ptr(auth);
+      }
+
+      user.ssl_type=     user_table.get_ssl_type();
+      user.ssl_cipher=   user_table.get_ssl_cipher(&acl_memroot);
+      user.x509_issuer=  safe_str(user_table.get_x509_issuer(&acl_memroot));
+      user.x509_subject= safe_str(user_table.get_x509_subject(&acl_memroot));
+      user.user_resource.questions= (uint)user_table.get_max_questions();
+      user.user_resource.updates= (uint)user_table.get_max_updates();
+      user.user_resource.conn_per_hour= (uint)user_table.get_max_connections();
+      if (user.user_resource.questions || user.user_resource.updates ||
+          user.user_resource.conn_per_hour)
+        mqh_used=1;
+
+      user.user_resource.user_conn= (int)user_table.get_max_user_connections();
+      user.user_resource.max_statement_time= user_table.get_max_statement_time();
+
+      user.default_rolename.str= user_table.get_default_role(&acl_memroot);
+      user.default_rolename.length= safe_strlen(user.default_rolename.str);
+    }
+    push_new_user(user);
+  }
+  rebuild_acl_users();
+  end_read_record(&read_record_info);
+  freeze_size(&acl_users);
+
+  const Db_table& db_table= tables.db_table();
+  if (db_table.init_read_record(&read_record_info))
+    DBUG_RETURN(TRUE);
+  while (!(read_record_info.read_record()))
+  {
+    ACL_DB db;
+    char *db_name;
+    db.user=safe_str(get_field(&acl_memroot, db_table.user()));
+    const char *hostname= get_field(&acl_memroot, db_table.host());
+    if (!hostname && find_acl_role(db.user, true))
+      hostname= "";
+    update_hostname(&db.host, hostname);
+    db.db= db_name= get_field(&acl_memroot, db_table.db());
+    if (!db.db)
+    {
+      sql_print_warning("Found an entry in the 'db' table with empty database name; Skipped");
+      continue;
+    }
+    if (opt_skip_name_resolve && hostname_requires_resolving(db.host.hostname))
+    {
+      sql_print_warning("'db' entry '%s %s@%s' "
+                        "ignored in --skip-name-resolve mode.",
+		        db.db, db.user, safe_str(db.host.hostname));
+      continue;
+    }
+    db.access= db_table.get_access();
+    db.access=fix_rights_for_db(db.access);
+    db.initial_access= db.access;
+    if (lower_case_table_names)
+    {
+      /*
+        convert db to lower case and give a warning if the db wasn't
+        already in lower case
+      */
+      char *end = strnmov(tmp_name, db.db, sizeof(tmp_name));
+      if (end >= tmp_name + sizeof(tmp_name))
+      {
+        sql_print_warning(ER_THD(thd, ER_WRONG_DB_NAME), db.db);
+        continue;
+      }
+      my_casedn_str(files_charset_info, db_name);
+      if (strcmp(db_name, tmp_name) != 0)
+      {
+        sql_print_warning("'db' entry '%s %s@%s' had database in mixed "
+                          "case that has been forced to lowercase because "
+                          "lower_case_table_names is set. It will not be "
+                          "possible to remove this privilege using REVOKE.",
+		          db.db, db.user, safe_str(db.host.hostname));
+      }
+    }
+    db.sort=get_magic_sort("hdu", db.host.hostname, db.db, db.user);
+#ifndef TO_BE_REMOVED
+    if (db_table.num_fields() <=  9)
+    {						// Without grant
+      if (db.access & CREATE_ACL)
+	db.access|=REFERENCES_ACL | INDEX_ACL | ALTER_ACL;
+    }
+#endif
+    acl_dbs.push(db);
+  }
+  end_read_record(&read_record_info);
+  rebuild_acl_dbs();
+  acl_dbs.freeze();
+
+  const Proxies_priv_table& proxies_priv_table= tables.proxies_priv_table();
+  if (proxies_priv_table.table_exists())
+  {
+    if (proxies_priv_table.init_read_record(&read_record_info))
+      DBUG_RETURN(TRUE);
+    while (!(read_record_info.read_record()))
+    {
+      ACL_PROXY_USER proxy;
+      proxy.init(proxies_priv_table, &acl_memroot);
+      if (proxy.check_validity())
+        continue;
+      if (push_dynamic(&acl_proxy_users, (uchar*) &proxy))
+        DBUG_RETURN(TRUE);
+    }
+    my_qsort((uchar*) dynamic_element(&acl_proxy_users, 0, ACL_PROXY_USER*),
+             acl_proxy_users.elements,
+             sizeof(ACL_PROXY_USER), (qsort_cmp) acl_compare);
+    end_read_record(&read_record_info);
+  }
+  else
+  {
+    sql_print_error("Missing system table mysql.proxies_priv; "
+                    "please run mysql_upgrade to create it");
+  }
+  freeze_size(&acl_proxy_users);
+
+  const Roles_mapping_table& roles_mapping_table= tables.roles_mapping_table();
+  if (roles_mapping_table.table_exists())
+  {
+    if (roles_mapping_table.init_read_record(&read_record_info))
+      DBUG_RETURN(TRUE);
+
+    MEM_ROOT temp_root;
+    init_alloc_root(key_memory_acl_mem, &temp_root, ACL_ALLOC_BLOCK_SIZE, 0, MYF(0));
+    while (!(read_record_info.read_record()))
+    {
+      char *hostname= safe_str(get_field(&temp_root, roles_mapping_table.host()));
+      char *username= safe_str(get_field(&temp_root, roles_mapping_table.user()));
+      char *rolename= safe_str(get_field(&temp_root, roles_mapping_table.role()));
+      bool with_grant_option= get_YN_as_bool(roles_mapping_table.admin_option());
+
+      if (add_role_user_mapping(username, hostname, rolename)) {
+        sql_print_error("Invalid roles_mapping table entry user:'%s@%s', rolename:'%s'",
+                        username, hostname, rolename);
+        continue;
+      }
+
+      ROLE_GRANT_PAIR *mapping= new (&acl_memroot) ROLE_GRANT_PAIR;
+
+      if (mapping->init(&acl_memroot, username, hostname, rolename, with_grant_option))
+        continue;
+
+      my_hash_insert(&acl_roles_mappings, (uchar*) mapping);
+    }
+
+    free_root(&temp_root, MYF(0));
+    end_read_record(&read_record_info);
+  }
+  else
+  {
+    sql_print_error("Missing system table mysql.roles_mapping; "
+                    "please run mysql_upgrade to create it");
+  }
+
+  init_check_host();
+
+  thd->bootstrap= !initialized; // keep FLUSH PRIVILEGES connection special
+  initialized=1;
+  DBUG_RETURN(FALSE);
+}
+
+
+void acl_free(bool end)
+{
+  my_hash_free(&acl_roles);
+  acl_public= NULL;
+  free_root(&acl_memroot,MYF(0));
+  delete_dynamic(&acl_hosts);
+  delete_dynamic_with_callback(&acl_users, (FREE_FUNC) free_acl_user);
+  acl_dbs.free_memory();
+  delete_dynamic(&acl_wild_hosts);
+  delete_dynamic(&acl_proxy_users);
+  my_hash_free(&acl_check_hosts);
+  my_hash_free(&acl_roles_mappings);
+  if (!end)
+    acl_cache->clear(1); /* purecov: inspected */
+  else
+  {
+    plugin_unlock(0, native_password_plugin);
+    plugin_unlock(0, old_password_plugin);
+    delete acl_cache;
+    acl_cache=0;
+  }
+}
+
+
+/*
+  Forget current user/db-level privileges and read new privileges
+  from the privilege tables.
+
+  SYNOPSIS
+    acl_reload()
+      thd  Current thread
+
+  NOTE
+    All tables of calling thread which were open and locked by LOCK TABLES
+    statement will be unlocked and closed.
+    This function is also used for initialization of structures responsible
+    for user/db-level privilege checking.
+
+  RETURN VALUE
+    FALSE  Success
+    TRUE   Failure
+*/
+
+bool acl_reload(THD *thd)
+{
+  DYNAMIC_ARRAY old_acl_hosts, old_acl_users, old_acl_proxy_users;
+  Dynamic_array old_acl_dbs(PSI_INSTRUMENT_MEM, 0, 0);
+  HASH old_acl_roles, old_acl_roles_mappings;
+  ACL_ROLE *old_acl_public;
+  MEM_ROOT old_mem;
+  int result;
+  DBUG_ENTER("acl_reload");
+
+
+  Grant_tables tables;
+  /*
+    To avoid deadlocks we should obtain table locks before
+    obtaining acl_cache->lock mutex.
+  */
+  const uint tables_to_open= Table_host | Table_user | Table_db |
+                             Table_proxies_priv | Table_roles_mapping;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_READ)))
+  {
+    DBUG_ASSERT(result <= 0);
+    /*
+      Execution might have been interrupted; only print the error message
+      if an error condition has been raised.
+    */
+    if (thd->get_stmt_da()->is_error())
+      sql_print_error("Fatal error: Can't open and lock privilege tables: %s",
+                      thd->get_stmt_da()->message());
+    goto end;
+  }
+
+  acl_cache->clear(0);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  old_acl_hosts= acl_hosts;
+  old_acl_users= acl_users;
+  old_acl_roles= acl_roles;
+  old_acl_public= acl_public;
+  old_acl_roles_mappings= acl_roles_mappings;
+  old_acl_proxy_users= acl_proxy_users;
+  old_acl_dbs= acl_dbs;
+  my_init_dynamic_array(key_memory_acl_mem, &acl_hosts, sizeof(ACL_HOST), 20, 50, MYF(0));
+  my_init_dynamic_array(key_memory_acl_mem, &acl_users, sizeof(ACL_USER), 50, 100, MYF(0));
+  acl_dbs.init(key_memory_acl_mem, 50, 100);
+  my_init_dynamic_array(key_memory_acl_mem, &acl_proxy_users, sizeof(ACL_PROXY_USER), 50, 100, MYF(0));
+  my_hash_init2(key_memory_acl_mem, &acl_roles,50, &my_charset_utf8mb3_bin,
+                0, 0, 0, (my_hash_get_key) acl_role_get_key, 0,
+                (void (*)(void *))free_acl_role, 0);
+  my_hash_init2(key_memory_acl_mem, &acl_roles_mappings, 50,
+                &my_charset_utf8mb3_bin, 0, 0, 0, (my_hash_get_key)
+                acl_role_map_get_key, 0, 0, 0);
+  old_mem= acl_memroot;
+  delete_dynamic(&acl_wild_hosts);
+  my_hash_free(&acl_check_hosts);
+  acl_public= NULL;
+
+  if ((result= acl_load(thd, tables)))
+  {					// Error. Revert to old list
+    DBUG_PRINT("error",("Reverting to old privileges"));
+    acl_free();				/* purecov: inspected */
+    acl_hosts= old_acl_hosts;
+    acl_users= old_acl_users;
+    acl_roles= old_acl_roles;
+    acl_public= old_acl_public;
+    acl_roles_mappings= old_acl_roles_mappings;
+    acl_proxy_users= old_acl_proxy_users;
+    acl_dbs= old_acl_dbs;
+    old_acl_dbs.init(0,0);
+    acl_memroot= old_mem;
+    init_check_host();
+  }
+  else
+  {
+    my_hash_free(&old_acl_roles);
+    free_root(&old_mem,MYF(0));
+    delete_dynamic(&old_acl_hosts);
+    delete_dynamic_with_callback(&old_acl_users, (FREE_FUNC) free_acl_user);
+    delete_dynamic(&old_acl_proxy_users);
+    my_hash_free(&old_acl_roles_mappings);
+  }
+  mysql_mutex_unlock(&acl_cache->lock);
+end:
+  close_mysql_tables(thd);
+  DBUG_RETURN(result);
+}
+
+/*
+  Get all access bits from table after fieldnr
+
+  IMPLEMENTATION
+  We know that the access privileges ends when there is no more fields
+  or the field is not an enum with two elements.
+
+  SYNOPSIS
+    get_access()
+    form        an open table to read privileges from.
+                The record should be already read in table->record[0]
+    fieldnr     number of the first privilege (that is ENUM('N','Y') field
+    next_field  on return - number of the field next to the last ENUM
+                (unless next_field == 0)
+
+  RETURN VALUE
+    privilege mask
+*/
+
+static privilege_t get_access(TABLE *form, uint fieldnr, uint *next_field)
+{
+  ulonglong access_bits=0,bit;
+  char buff[2];
+  String res(buff,sizeof(buff),&my_charset_latin1);
+  Field **pos;
+
+  for (pos=form->field+fieldnr, bit=1;
+       *pos && (*pos)->real_type() == MYSQL_TYPE_ENUM &&
+	 ((Field_enum*) (*pos))->typelib->count == 2 ;
+       pos++, fieldnr++, bit<<=1)
+  {
+    if (get_YN_as_bool(*pos))
+      access_bits|= bit;
+  }
+  if (next_field)
+    *next_field=fieldnr;
+  return ALL_KNOWN_ACL & access_bits;
+}
+
+
+static int acl_compare(const ACL_ACCESS *a, const ACL_ACCESS *b)
+{
+  if (a->sort > b->sort)
+    return -1;
+  if (a->sort < b->sort)
+    return 1;
+  return 0;
+}
+
+static int acl_user_compare(const ACL_USER *a, const ACL_USER *b)
+{
+  int res= strcmp(a->user.str, b->user.str);
+  if (res)
+    return res;
+
+  res= acl_compare(a, b);
+  if (res)
+    return res;
+
+  /*
+    For more deterministic results, resolve ambiguity between
+    "localhost" and "127.0.0.1"/"::1" by sorting "localhost" before
+    loopback addresses.
+    Test suite (on Windows) expects "root@localhost", even if
+    root@::1 would also match.
+  */
+  return -strcmp(a->host.hostname, b->host.hostname);
+}
+
+static int acl_db_compare(const ACL_DB *a, const ACL_DB *b)
+{
+  int res= strcmp(a->user, b->user);
+  if (res)
+    return res;
+
+  return acl_compare(a, b);
+}
+
+static void rebuild_acl_users()
+{
+   my_qsort((uchar*)dynamic_element(&acl_users, 0, ACL_USER*), acl_users.elements,
+     sizeof(ACL_USER), (qsort_cmp)acl_user_compare);
+}
+
+static void rebuild_acl_dbs()
+{
+  acl_dbs.sort(acl_db_compare);
+}
+
+
+/*
+  Return index of the first entry with given user in the array,
+  or SIZE_T_MAX if not found.
+
+  Assumes the array is sorted by get_username
+*/
+template size_t find_first_user(T* arr, size_t len, const char *user)
+{
+  size_t low= 0;
+  size_t high= len;
+  size_t mid;
+
+  bool found= false;
+  if(!len)
+    return  SIZE_T_MAX;
+
+#ifndef DBUG_OFF
+  for (uint i = 0; i < len - 1; i++)
+    DBUG_ASSERT(strcmp(arr[i].get_username(), arr[i + 1].get_username()) <= 0);
+#endif
+  while (low < high)
+  {
+    mid= low + (high - low) / 2;
+    int cmp= strcmp(arr[mid].get_username(),user);
+    if (cmp == 0)
+      found= true;
+
+    if (cmp >= 0 )
+      high= mid;
+    else
+      low= mid + 1;
+  }
+  return  (!found || low == len || strcmp(arr[low].get_username(), user)!=0 )?SIZE_T_MAX:low;
+}
+
+static size_t acl_find_user_by_name(const char *user)
+{
+  return find_first_user((ACL_USER *)acl_users.buffer,acl_users.elements,user);
+}
+
+static size_t acl_find_db_by_username(const char *user)
+{
+  return find_first_user(acl_dbs.front(), acl_dbs.elements(), user);
+}
+
+static bool match_db(ACL_DB *acl_db, const char *db, my_bool db_is_pattern)
+{
+  return !acl_db->db || (db && !wild_compare(db, acl_db->db, db_is_pattern));
+}
+
+
+/*
+  Lookup in the acl_users or acl_dbs for the best matching entry corresponding to
+  given user, host and ip parameters (also db, in case of ACL_DB)
+
+  Historical note:
+
+  In the past, both arrays were sorted just by ACL_ENTRY::sort field and were
+  searched linearly, until the first match of (username,host) pair was found.
+
+  This function uses optimizations (binary search by username), yet preserves the
+  historical behavior, i.e the returns a match with highest ACL_ENTRY::sort.
+*/
+template  T* find_by_username_or_anon(T* arr, size_t len, const char *user,
+  const char *host, const char *ip,
+  const char *db, my_bool db_is_pattern, bool (*match_db_func)(T*,const char *,my_bool))
+{
+  size_t i;
+  T *ret = NULL;
+
+  // Check  entries matching user name.
+  size_t start = find_first_user(arr, len, user);
+  for (i= start; i < len; i++)
+  {
+    T *entry= &arr[i];
+    if (i > start && strcmp(user, entry->get_username()))
+      break;
+
+    if (compare_hostname(&entry->host, host, ip) && (!match_db_func || match_db_func(entry, db, db_is_pattern)))
+    {
+      ret= entry;
+      break;
+    }
+  }
+
+  // Look also for anonymous user (username is empty string)
+  // Due to sort by name, entries for anonymous user start at the start of array.
+  for (i= 0; i < len; i++)
+  {
+    T *entry = &arr[i];
+    if (*entry->get_username() || (ret && acl_compare(entry, ret) >= 0))
+      break;
+    if (compare_hostname(&entry->host, host, ip) && (!match_db_func || match_db_func(entry, db, db_is_pattern)))
+    {
+      ret= entry;
+      break;
+    }
+  }
+  return ret;
+}
+
+static ACL_DB *acl_db_find(const char *db, const char *user, const char *host, const char *ip, my_bool db_is_pattern)
+{
+  return find_by_username_or_anon(acl_dbs.front(), acl_dbs.elements(),
+                                  user, host, ip, db, db_is_pattern, match_db);
+}
+
+
+/*
+  Gets user credentials without authentication and resource limit checks.
+
+  SYNOPSIS
+    acl_getroot()
+      sctx               Context which should be initialized
+      user               user name
+      host               host name
+      ip                 IP
+      db                 current data base name
+
+  RETURN
+    FALSE  OK
+    TRUE   Error
+*/
+
+bool acl_getroot(Security_context *sctx, const char *user, const char *host,
+                 const char *ip, const char *db)
+{
+  int res= 1;
+  ACL_USER *acl_user= 0;
+  DBUG_ENTER("acl_getroot");
+
+  DBUG_PRINT("enter", ("Host: '%s', Ip: '%s', User: '%s', db: '%s'",
+                       host, ip, user, db));
+  sctx->init();
+  sctx->user= *user ? user : NULL;
+  sctx->host= host;
+  sctx->ip= ip;
+  sctx->host_or_ip= host ? host : (safe_str(ip));
+
+  if (!initialized)
+  {
+    /*
+      here if mysqld's been started with --skip-grant-tables option.
+    */
+    sctx->skip_grants();
+    DBUG_RETURN(FALSE);
+  }
+
+  mysql_mutex_lock(&acl_cache->lock);
+
+  sctx->db_access= NO_ACL;
+
+  if (host[0]) // User, not Role
+  {
+    acl_user= find_user_wild(host, user, ip);
+
+    if (acl_user)
+    {
+      res= 0;
+      if (ACL_DB *acl_db= acl_db_find(db, user, host, ip, FALSE))
+        sctx->db_access= acl_db->access;
+
+      sctx->master_access= acl_user->access;
+
+      strmake_buf(sctx->priv_user, user);
+
+      if (acl_user->host.hostname)
+        strmake_buf(sctx->priv_host, acl_user->host.hostname);
+    }
+  }
+  else // Role, not User
+  {
+    ACL_ROLE *acl_role= find_acl_role(user, false);
+    if (acl_role)
+    {
+      res= 0;
+      if (ACL_DB *acl_db= acl_db_find(db, user, "", "", FALSE))
+        sctx->db_access = acl_db->access;
+
+      sctx->master_access= acl_role->access;
+
+      strmake_buf(sctx->priv_role, user);
+    }
+  }
+
+  if (acl_public)
+  {
+    if (ACL_DB *acl_db= acl_db_find(db, public_name.str, "", "", FALSE))
+      sctx->db_access|= acl_db->access;
+
+    sctx->master_access|= acl_public->access;
+  }
+
+  mysql_mutex_unlock(&acl_cache->lock);
+  DBUG_RETURN(res);
+}
+
+static int check_role_is_granted_callback(ACL_USER_BASE *grantee, void *data)
+{
+  LEX_CSTRING *rolename= static_cast(data);
+  if (rolename->length == grantee->user.length &&
+      !strcmp(rolename->str, grantee->user.str))
+    return -1; // End search, we've found our role.
+
+  /* Keep looking, we haven't found our role yet. */
+  return 0;
+}
+
+/*
+  unlike find_user_exact and find_user_wild,
+  this function finds anonymous users too, it's when a
+  user is not empty, but priv_user (acl_user->user) is empty.
+*/
+static ACL_USER *find_user_or_anon(const char *host, const char *user, const char *ip)
+{
+  return find_by_username_or_anon
+    (reinterpret_cast(acl_users.buffer), acl_users.elements,
+     user, host, ip, NULL, FALSE, NULL);
+}
+
+
+static int check_user_can_set_role(THD *thd, const char *user,
+                                   const char *host, const char *ip,
+                                   const char *rolename, privilege_t *access)
+{
+  ACL_ROLE *role;
+  ACL_USER_BASE *acl_user_base;
+  ACL_USER *UNINIT_VAR(acl_user);
+  bool is_granted= FALSE;
+  int result= 0;
+
+  /* clear role privileges */
+  mysql_mutex_lock(&acl_cache->lock);
+
+  if (!strcasecmp(rolename, none.str))
+  {
+    /* have to clear the privileges */
+    /* get the current user */
+    acl_user= find_user_wild(host, user, ip);
+    if (acl_user == NULL)
+      result= ER_INVALID_CURRENT_USER;
+    else if (access)
+      *access= acl_user->access;
+
+    goto end;
+  }
+
+  role= find_acl_role(rolename, false);
+
+  /* According to SQL standard, the same error message must be presented */
+  if (role == NULL)
+  {
+    result= ER_INVALID_ROLE;
+    goto end;
+  }
+
+  for (uint i=0 ; i < role->parent_grantee.elements ; i++)
+  {
+    acl_user_base= *(dynamic_element(&role->parent_grantee, i, ACL_USER_BASE**));
+    if (acl_user_base->flags & IS_ROLE)
+      continue;
+
+    acl_user= (ACL_USER *)acl_user_base;
+    if (acl_user->wild_eq(user, host, ip))
+    {
+      is_granted= TRUE;
+      break;
+    }
+  }
+
+  /* According to SQL standard, the same error message must be presented */
+  if (!is_granted)
+  {
+    result= 1;
+    goto end;
+  }
+
+  if (access)
+    *access = acl_user->access | role->access;
+
+end:
+  if (acl_public && access)
+    *access|= acl_public->access;
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  /* We present different error messages depending if the user has sufficient
+     privileges to know if the INVALID_ROLE exists. */
+  switch (result)
+  {
+    case ER_INVALID_CURRENT_USER:
+      my_error(ER_INVALID_CURRENT_USER, MYF(0), rolename);
+      break;
+    case ER_INVALID_ROLE:
+      /* Role doesn't exist at all */
+      my_error(ER_INVALID_ROLE, MYF(0), rolename);
+      break;
+    case 1:
+      LEX_CSTRING role_lex;
+      /* First, check if current user can see mysql database. */
+      bool read_access= !check_access(thd, SELECT_ACL, "mysql", NULL, NULL, 1, 1);
+
+      role_lex.str= rolename;
+      role_lex.length= strlen(rolename);
+      mysql_mutex_lock(&acl_cache->lock);
+      ACL_USER *cur_user= find_user_or_anon(thd->security_ctx->priv_host,
+                                            thd->security_ctx->priv_user,
+                                            thd->security_ctx->ip);
+
+      /* If the current user does not have select priv to mysql database,
+         see if the current user can discover the role if it was granted to him.
+      */
+      if (cur_user && (read_access ||
+                       traverse_role_graph_down(cur_user, &role_lex,
+                                                check_role_is_granted_callback,
+                                                NULL) == -1))
+      {
+        /* Role is not granted but current user can see the role */
+        my_printf_error(ER_INVALID_ROLE, "User %`s@%`s has not been granted role %`s",
+                        MYF(0), thd->security_ctx->priv_user,
+                        thd->security_ctx->priv_host, rolename);
+      }
+      else
+      {
+        /* Role is not granted and current user cannot see the role */
+        my_error(ER_INVALID_ROLE, MYF(0), rolename);
+      }
+      mysql_mutex_unlock(&acl_cache->lock);
+      break;
+  }
+
+  return result;
+}
+
+
+int acl_check_setrole(THD *thd, const char *rolename, privilege_t *access)
+{
+  if (!initialized)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
+    return 1;
+  }
+
+  return check_user_can_set_role(thd, thd->security_ctx->priv_user,
+           thd->security_ctx->host, thd->security_ctx->ip, rolename, access);
+}
+
+
+int acl_setrole(THD *thd, const char *rolename, privilege_t access)
+{
+  /* merge the privileges */
+  Security_context *sctx= thd->security_ctx;
+  sctx->master_access= access;
+  if (!strcasecmp(rolename, none.str))
+  {
+    thd->security_ctx->priv_role[0]= 0;
+  }
+  else
+  {
+    /* mark the current role */
+    strmake_buf(thd->security_ctx->priv_role, rolename);
+  }
+  if (thd->db.str)
+    sctx->db_access= acl_get_all3(sctx, thd->db.str, FALSE);
+
+  return 0;
+}
+
+static uchar* check_get_key(ACL_USER *buff, size_t *length,
+                            my_bool not_used __attribute__((unused)))
+{
+  *length=buff->hostname_length;
+  return (uchar*) buff->host.hostname;
+}
+
+
+static void acl_update_role(const char *rolename, const privilege_t privileges)
+{
+  ACL_ROLE *role= find_acl_role(rolename, true);
+  if (role)
+  {
+    role->initial_role_access= role->access= privileges;
+    DBUG_ASSERT(strcasecmp(rolename, public_name.str) || acl_public == role);
+  }
+}
+
+
+ACL_USER::ACL_USER(THD *thd, const LEX_USER &combo,
+                   const Account_options &options,
+                   const privilege_t privileges)
+{
+  user= safe_lexcstrdup_root(&acl_memroot, combo.user);
+  update_hostname(&host, safe_strdup_root(&acl_memroot, combo.host.str));
+  hostname_length= combo.host.length;
+  sort= get_magic_sort("hu", host.hostname, user.str);
+  password_last_changed= thd->query_start();
+  password_lifetime= -1;
+  my_init_dynamic_array(PSI_INSTRUMENT_ME, &role_grants, sizeof(ACL_USER *), 0, 8, MYF(0));
+}
+
+
+static int acl_user_update(THD *thd, ACL_USER *acl_user, uint nauth,
+                           const LEX_USER &combo,
+                           const Account_options &options,
+                           const privilege_t privileges)
+{
+  ACL_USER_PARAM::AUTH *work_copy= NULL;
+  if (nauth)
+  {
+    if (!(work_copy= (ACL_USER_PARAM::AUTH*)
+            alloc_root(thd->mem_root, nauth * sizeof(ACL_USER_PARAM::AUTH))))
+      return 1;
+
+    USER_AUTH *auth= combo.auth;
+    for (uint i= 0; i < nauth; i++, auth= auth->next)
+    {
+      work_copy[i].plugin= auth->plugin;
+      work_copy[i].auth_string= safe_lexcstrdup_root(&acl_memroot,
+                                                     auth->auth_str);
+      if (fix_user_plugin_ptr(work_copy + i))
+        work_copy[i].plugin= safe_lexcstrdup_root(&acl_memroot, auth->plugin);
+      if (set_user_auth(thd, acl_user->user,
+                        {acl_user->host.hostname, acl_user->hostname_length},
+                        work_copy + i, auth->pwtext))
+        return 1;
+    }
+  }
+
+  acl_user->access= privileges;
+  if (options.specified_limits & USER_RESOURCES::QUERIES_PER_HOUR)
+    acl_user->user_resource.questions= options.questions;
+  if (options.specified_limits & USER_RESOURCES::UPDATES_PER_HOUR)
+    acl_user->user_resource.updates= options.updates;
+  if (options.specified_limits & USER_RESOURCES::CONNECTIONS_PER_HOUR)
+    acl_user->user_resource.conn_per_hour= options.conn_per_hour;
+  if (options.specified_limits & USER_RESOURCES::USER_CONNECTIONS)
+    acl_user->user_resource.user_conn= options.user_conn;
+  if (options.specified_limits & USER_RESOURCES::MAX_STATEMENT_TIME)
+    acl_user->user_resource.max_statement_time= options.max_statement_time;
+  if (options.ssl_type != SSL_TYPE_NOT_SPECIFIED)
+  {
+    acl_user->ssl_type= options.ssl_type;
+    acl_user->ssl_cipher= safe_strdup_root(&acl_memroot, options.ssl_cipher.str);
+    acl_user->x509_issuer= safe_strdup_root(&acl_memroot,
+                                            safe_str(options.x509_issuer.str));
+    acl_user->x509_subject= safe_strdup_root(&acl_memroot,
+                                             safe_str(options.x509_subject.str));
+  }
+  if (options.account_locked != ACCOUNTLOCK_UNSPECIFIED)
+    acl_user->account_locked= options.account_locked == ACCOUNTLOCK_LOCKED;
+
+  if (thd->is_error())
+  {
+    // If something went wrong (including OOM) we will not spoil acl cache
+    return 1;
+  }
+  /* Unexpire the user password and copy AUTH (when no more errors possible)*/
+  if (nauth)
+  {
+    acl_user->password_expired= false;
+    acl_user->password_last_changed= thd->query_start();
+
+    if (acl_user->nauth >= nauth)
+    {
+      acl_user->nauth= nauth;
+    }
+    else
+    {
+      if (acl_user->alloc_auth(&acl_memroot, nauth))
+      {
+        /*
+          acl_user is a copy, so NULL assigned in case of an error do not
+          change the acl cache
+        */
+        return 1;
+      }
+    }
+    DBUG_ASSERT(work_copy); // allocated under the same condinition
+    memcpy(acl_user->auth, work_copy,  nauth * sizeof(ACL_USER_PARAM::AUTH));
+  }
+
+  switch (options.password_expire) {
+  case PASSWORD_EXPIRE_UNSPECIFIED:
+    break;
+  case PASSWORD_EXPIRE_NOW:
+    acl_user->password_expired= true;
+    break;
+  case PASSWORD_EXPIRE_NEVER:
+    acl_user->password_lifetime= 0;
+    break;
+  case PASSWORD_EXPIRE_DEFAULT:
+    acl_user->password_lifetime= -1;
+    break;
+  case PASSWORD_EXPIRE_INTERVAL:
+    acl_user->password_lifetime= options.num_expiration_days;
+    break;
+  }
+
+  return 0;
+}
+
+
+static void acl_insert_role(const char *rolename, privilege_t privileges)
+{
+  ACL_ROLE *entry;
+  DBUG_ENTER("acl_insert_role");
+  DBUG_PRINT("enter", ("Role: '%s'", rolename));
+
+  mysql_mutex_assert_owner(&acl_cache->lock);
+  entry= new (&acl_memroot) ACL_ROLE(rolename, privileges, &acl_memroot);
+  my_init_dynamic_array(key_memory_acl_mem, &entry->parent_grantee,
+                        sizeof(ACL_USER_BASE *), 0, 8, MYF(0));
+  my_init_dynamic_array(key_memory_acl_mem, &entry->role_grants,
+                        sizeof(ACL_ROLE *), 0, 8, MYF(0));
+
+  my_hash_insert(&acl_roles, (uchar *)entry);
+  DBUG_ASSERT(strcasecmp(rolename, public_name.str) || is_public(rolename));
+  if (is_public(rolename))
+    acl_public= entry;
+
+  DBUG_VOID_RETURN;
+}
+
+
+static bool acl_update_db(const char *user, const char *host, const char *db,
+                          privilege_t privileges)
+{
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  bool updated= false;
+
+  for (size_t i= acl_find_db_by_username(user); i < acl_dbs.elements(); i++)
+  {
+    ACL_DB *acl_db= &acl_dbs.at(i);
+    if (!strcmp(user,acl_db->user))
+    {
+      if ((!acl_db->host.hostname && !host[0]) ||
+          (acl_db->host.hostname && !strcmp(host, acl_db->host.hostname)))
+      {
+        if ((!acl_db->db && !db[0]) ||
+            (acl_db->db && !strcmp(db,acl_db->db)))
+
+        {
+          if (privileges)
+          {
+            acl_db->access= privileges;
+            acl_db->initial_access= acl_db->access;
+          }
+          else
+            acl_dbs.del(i);
+          updated= true;
+        }
+      }
+    }
+    else
+     break;
+  }
+
+  return updated;
+}
+
+
+/*
+  Insert a user/db/host combination into the global acl_cache
+
+  SYNOPSIS
+    acl_insert_db()
+    user		User name
+    host		Host name
+    db			Database name
+    privileges		Bitmap of privileges
+
+  NOTES
+    acl_cache->lock must be locked when calling this
+*/
+
+static void acl_insert_db(const char *user, const char *host, const char *db,
+                          const privilege_t privileges)
+{
+  ACL_DB acl_db;
+  mysql_mutex_assert_owner(&acl_cache->lock);
+  acl_db.user=strdup_root(&acl_memroot,user);
+  update_hostname(&acl_db.host, safe_strdup_root(&acl_memroot, host));
+  acl_db.db=strdup_root(&acl_memroot,db);
+  acl_db.initial_access= acl_db.access= privileges;
+  acl_db.sort=get_magic_sort("hdu", acl_db.host.hostname, acl_db.db, acl_db.user);
+  acl_dbs.push(acl_db);
+  rebuild_acl_dbs();
+}
+
+
+/*
+  Get privilege for a host, user and db combination
+
+  as db_is_pattern changes the semantics of comparison,
+  acl_cache is not used if db_is_pattern is set.
+*/
+
+privilege_t acl_get(const char *host, const char *ip,
+                    const char *user, const char *db, my_bool db_is_pattern)
+{
+  privilege_t host_access(ALL_KNOWN_ACL), db_access(NO_ACL);
+  uint i;
+  size_t key_length;
+  char key[ACL_KEY_LENGTH],*tmp_db,*end;
+  acl_entry *entry;
+  DBUG_ENTER("acl_get");
+
+  tmp_db= strmov(strmov(key, safe_str(ip)) + 1, user) + 1;
+  end= strnmov(tmp_db, db, key + sizeof(key) - tmp_db);
+
+  if (end >= key + sizeof(key)) // db name was truncated
+    DBUG_RETURN(NO_ACL);        // no privileges for an invalid db name
+
+  if (lower_case_table_names)
+  {
+    my_casedn_str(files_charset_info, tmp_db);
+    db=tmp_db;
+  }
+  key_length= (size_t) (end-key);
+
+  mysql_mutex_lock(&acl_cache->lock);
+  if (!db_is_pattern && (entry=acl_cache->search((uchar*) key, key_length)))
+  {
+    db_access=entry->access;
+    mysql_mutex_unlock(&acl_cache->lock);
+    DBUG_PRINT("exit", ("access: 0x%llx",  (longlong) db_access));
+    DBUG_RETURN(db_access);
+  }
+
+  /*
+    Check if there are some access rights for database and user
+  */
+  if (ACL_DB *acl_db= acl_db_find(db,user, host, ip, db_is_pattern))
+  {
+    db_access= acl_db->access;
+    if (acl_db->host.hostname)
+      goto exit; // Fully specified. Take it
+    /* the host table is not used for roles */
+    if ((!host || !host[0]) && !acl_db->host.hostname &&
+        find_acl_role(user, false))
+      goto exit;
+  }
+
+  if (!db_access)
+    goto exit;					// Can't be better
+
+  /*
+    No host specified for user. Get hostdata from host table
+  */
+  host_access= NO_ACL;                          // Host must be found
+  for (i=0 ; i < acl_hosts.elements ; i++)
+  {
+    ACL_HOST *acl_host=dynamic_element(&acl_hosts,i,ACL_HOST*);
+    if (compare_hostname(&acl_host->host,host,ip))
+    {
+      if (!wild_compare(db, acl_host->db, db_is_pattern))
+      {
+	host_access=acl_host->access;		// Fully specified. Take it
+	break;
+      }
+    }
+  }
+exit:
+  /* Save entry in cache for quick retrieval */
+  if (!db_is_pattern &&
+      (entry= (acl_entry*) my_malloc(key_memory_acl_cache,
+                                     sizeof(acl_entry)+key_length, MYF(MY_WME))))
+  {
+    entry->access=(db_access & host_access);
+    DBUG_ASSERT(key_length < 0xffff);
+    entry->length=(uint16)key_length;
+    memcpy((uchar*) entry->key,key,key_length);
+    acl_cache->add(entry);
+  }
+  mysql_mutex_unlock(&acl_cache->lock);
+  DBUG_PRINT("exit", ("access: 0x%llx", (longlong) (db_access & host_access)));
+  DBUG_RETURN(db_access & host_access);
+}
+
+/*
+  Check if there is access for the host/user, role, public on the database
+*/
+
+privilege_t acl_get_all3(Security_context *sctx, const char *db,
+                         bool db_is_patern)
+{
+  privilege_t access= acl_get(sctx->host, sctx->ip,
+                              sctx->priv_user, db, db_is_patern);
+  if (sctx->priv_role[0])
+    access|= acl_get("", "", sctx->priv_role, db, db_is_patern);
+  if (acl_public)
+    access|= acl_get("", "", public_name.str, db, db_is_patern);
+  return access;
+}
+
+
+/*
+  Check if there are any possible matching entries for this host
+
+  NOTES
+    All host names without wild cards are stored in a hash table,
+    entries with wildcards are stored in a dynamic array
+*/
+
+static void init_check_host(void)
+{
+  DBUG_ENTER("init_check_host");
+  (void) my_init_dynamic_array(key_memory_acl_mem, &acl_wild_hosts,
+                               sizeof(struct acl_host_and_ip),
+                               acl_users.elements, 1, MYF(0));
+  (void) my_hash_init(key_memory_acl_mem, &acl_check_hosts,system_charset_info,
+                      acl_users.elements, 0, 0,
+                      (my_hash_get_key) check_get_key, 0, 0);
+  if (!allow_all_hosts)
+  {
+    for (size_t i=0 ; i < acl_users.elements ; i++)
+    {
+      ACL_USER *acl_user=dynamic_element(&acl_users,i,ACL_USER*);
+      if (strchr(acl_user->host.hostname,wild_many) ||
+	  strchr(acl_user->host.hostname,wild_one) ||
+	  acl_user->host.ip_mask)
+      {						// Has wildcard
+	size_t j;
+	for (j=0 ; j < acl_wild_hosts.elements ; j++)
+	{					// Check if host already exists
+	  acl_host_and_ip *acl=dynamic_element(&acl_wild_hosts,j,
+					       acl_host_and_ip *);
+	  if (!my_strcasecmp(system_charset_info,
+                             acl_user->host.hostname, acl->hostname))
+	    break;				// already stored
+	}
+	if (j == acl_wild_hosts.elements)	// If new
+	  (void) push_dynamic(&acl_wild_hosts,(uchar*) &acl_user->host);
+      }
+      else if (!my_hash_search(&acl_check_hosts,(uchar*)
+                               acl_user->host.hostname,
+                               strlen(acl_user->host.hostname)))
+      {
+	if (my_hash_insert(&acl_check_hosts,(uchar*) acl_user))
+	{					// End of memory
+	  allow_all_hosts=1;			// Should never happen
+	  DBUG_VOID_RETURN;
+	}
+      }
+    }
+  }
+  freeze_size(&acl_wild_hosts);
+  freeze_size(&acl_check_hosts.array);
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Rebuild lists used for checking of allowed hosts
+
+  We need to rebuild 'acl_check_hosts' and 'acl_wild_hosts' after adding,
+  dropping or renaming user, since they contain pointers to elements of
+  'acl_user' array, which are invalidated by drop operation, and use
+  ACL_USER::host::hostname as a key, which is changed by rename.
+*/
+static void rebuild_check_host(void)
+{
+  delete_dynamic(&acl_wild_hosts);
+  my_hash_free(&acl_check_hosts);
+  init_check_host();
+}
+
+/*
+  Reset a role role_grants dynamic array.
+  Also, the role's access bits are reset to the ones present in the table.
+*/
+static my_bool acl_role_reset_role_arrays(void *ptr,
+                                    void * not_used __attribute__((unused)))
+{
+  ACL_ROLE *role= (ACL_ROLE *)ptr;
+  reset_dynamic(&role->role_grants);
+  reset_dynamic(&role->parent_grantee);
+  role->counter= 0;
+  return 0;
+}
+
+/*
+   Add a the coresponding pointers present in the mapping to the entries in
+   acl_users and acl_roles
+*/
+static bool add_role_user_mapping(ACL_USER_BASE *grantee, ACL_ROLE *role)
+{
+  return push_dynamic(&grantee->role_grants, (uchar*) &role)
+      || push_dynamic(&role->parent_grantee, (uchar*) &grantee);
+
+}
+
+/*
+  Revert the last add_role_user_mapping() action
+*/
+static void undo_add_role_user_mapping(ACL_USER_BASE *grantee, ACL_ROLE *role)
+{
+  void *pop __attribute__((unused));
+
+  pop= pop_dynamic(&grantee->role_grants);
+  DBUG_ASSERT(role == *(ACL_ROLE**)pop);
+
+  pop= pop_dynamic(&role->parent_grantee);
+  DBUG_ASSERT(grantee == *(ACL_USER_BASE**)pop);
+}
+
+/*
+  this helper is used when building role_grants and parent_grantee arrays
+  from scratch.
+
+  this happens either on initial loading of data from tables, in acl_load().
+  or in rebuild_role_grants after acl_role_reset_role_arrays().
+*/
+static bool add_role_user_mapping(const char *uname, const char *hname,
+                                  const char *rname)
+{
+  ACL_USER_BASE *grantee= find_acl_user_base(uname, hname);
+  ACL_ROLE *role= find_acl_role(rname, false);
+
+  if (grantee == NULL || role == NULL)
+    return 1;
+
+  /*
+    because all arrays are rebuilt completely, and counters were also reset,
+    we can increment them here, and after the rebuild all counters will
+    have correct values (equal to the number of roles granted).
+  */
+  if (grantee->flags & IS_ROLE)
+    ((ACL_ROLE*)grantee)->counter++;
+  return add_role_user_mapping(grantee, role);
+}
+
+/*
+  This helper function is used to removes roles and grantees
+  from the corresponding cross-reference arrays. see remove_role_user_mapping().
+  as such, it asserts that an element to delete is present in the array,
+  and is present only once.
+*/
+static void remove_ptr_from_dynarray(DYNAMIC_ARRAY *array, void *ptr)
+{
+  bool found __attribute__((unused))= false;
+  for (size_t i= 0; i < array->elements; i++)
+  {
+    if (ptr == *dynamic_element(array, i, void**))
+    {
+      DBUG_ASSERT(!found);
+      delete_dynamic_element(array, i);
+      IF_DBUG_ASSERT(found= true, break);
+    }
+  }
+  DBUG_ASSERT(found);
+}
+
+static void remove_role_user_mapping(ACL_USER_BASE *grantee, ACL_ROLE *role,
+                                     int grantee_idx=-1, int role_idx=-1)
+{
+  remove_ptr_from_dynarray(&grantee->role_grants, role);
+  remove_ptr_from_dynarray(&role->parent_grantee, grantee);
+}
+
+
+static my_bool add_role_user_mapping_action(void *ptr, void *unused __attribute__((unused)))
+{
+  ROLE_GRANT_PAIR *pair= (ROLE_GRANT_PAIR*)ptr;
+  bool status __attribute__((unused));
+  status= add_role_user_mapping(pair->u_uname, pair->u_hname, pair->r_uname);
+  /*
+     The invariant chosen is that acl_roles_mappings should _always_
+     only contain valid entries, referencing correct user and role grants.
+     If add_role_user_mapping detects an invalid entry, it will not add
+     the mapping into the ACL_USER::role_grants array.
+  */
+  DBUG_ASSERT(status == 0);
+  return 0;
+}
+
+
+/*
+  Rebuild the role grants every time the acl_users is modified
+
+  The role grants in the ACL_USER class need to be rebuilt, as they contain
+  pointers to elements of the acl_users array.
+*/
+
+static void rebuild_role_grants(void)
+{
+  DBUG_ENTER("rebuild_role_grants");
+  /*
+    Reset every user's and role's role_grants array
+  */
+  for (size_t i=0; i < acl_users.elements; i++) {
+    ACL_USER *user= dynamic_element(&acl_users, i, ACL_USER *);
+    reset_dynamic(&user->role_grants);
+  }
+  my_hash_iterate(&acl_roles, acl_role_reset_role_arrays, NULL);
+
+  /* Rebuild the direct links between users and roles in ACL_USER::role_grants */
+  my_hash_iterate(&acl_roles_mappings, add_role_user_mapping_action, NULL);
+
+  DBUG_VOID_RETURN;
+}
+
+
+/* Return true if there is no users that can match the given host */
+bool acl_check_host(const char *host, const char *ip)
+{
+  if (allow_all_hosts)
+    return 0;
+  mysql_mutex_lock(&acl_cache->lock);
+
+  if ((host && my_hash_search(&acl_check_hosts,(uchar*) host,strlen(host))) ||
+      (ip && my_hash_search(&acl_check_hosts,(uchar*) ip, strlen(ip))))
+  {
+    mysql_mutex_unlock(&acl_cache->lock);
+    return 0;					// Found host
+  }
+  for (size_t i=0 ; i < acl_wild_hosts.elements ; i++)
+  {
+    acl_host_and_ip *acl=dynamic_element(&acl_wild_hosts,i,acl_host_and_ip*);
+    if (compare_hostname(acl, host, ip))
+    {
+      mysql_mutex_unlock(&acl_cache->lock);
+      return 0;					// Host ok
+    }
+  }
+  mysql_mutex_unlock(&acl_cache->lock);
+  if (ip != NULL)
+  {
+    /* Increment HOST_CACHE.COUNT_HOST_ACL_ERRORS. */
+    Host_errors errors;
+    errors.m_host_acl= 1;
+    inc_host_errors(ip, &errors);
+  }
+  return 1;					// Host is not allowed
+}
+
+/**
+  Check if the user is allowed to alter the mysql.user table
+
+ @param thd              THD
+ @param host             Hostname for the user
+ @param user             User name
+
+ @return Error status
+   @retval 0 OK
+   @retval 1 Error
+*/
+
+static int check_alter_user(THD *thd, const char *host, const char *user)
+{
+  int error = 1;
+  if (!initialized)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
+    goto end;
+  }
+
+  if (IF_WSREP((!WSREP(thd) || !thd->wsrep_applier), 1) &&
+      !thd->slave_thread && !thd->security_ctx->priv_user[0] &&
+      !thd->bootstrap)
+  {
+    my_message(ER_PASSWORD_ANONYMOUS_USER,
+               ER_THD(thd, ER_PASSWORD_ANONYMOUS_USER),
+               MYF(0));
+    goto end;
+  }
+  if (!host) // Role
+  {
+    my_error(ER_PASSWORD_NO_MATCH, MYF(0));
+    goto end;
+  }
+
+  if (!thd->slave_thread &&
+      IF_WSREP((!WSREP(thd) || !thd->wsrep_applier),1) &&
+      !thd->security_ctx->is_priv_user(user, host))
+  {
+    if (thd->security_ctx->password_expired)
+    {
+      my_error(ER_MUST_CHANGE_PASSWORD, MYF(0));
+      goto end;
+    }
+    if (check_access(thd, UPDATE_ACL, "mysql", NULL, NULL, 1, 0))
+      goto end;
+  }
+
+  error = 0;
+
+end:
+  return error;
+}
+/**
+  Check if the user is allowed to change password
+
+ @param thd              THD
+ @param user             User, hostname, new password or password hash
+
+ @return Error status
+   @retval 0 OK
+   @retval 1 ERROR; In this case the error is sent to the client.
+*/
+
+bool check_change_password(THD *thd, LEX_USER *user)
+{
+  LEX_USER *real_user= get_current_user(thd, user);
+  user->user= real_user->user;
+  user->host= real_user->host;
+  return check_alter_user(thd, user->host.str, user->user.str);
+}
+
+
+/**
+  Change a password for a user.
+
+  @param thd            THD
+  @param user           User, hostname, new password hash
+ 
+  @return Error code
+   @retval 0 ok
+   @retval 1 ERROR; In this case the error is sent to the client.
+*/
+bool change_password(THD *thd, LEX_USER *user)
+{
+  Grant_tables tables;
+  /* Buffer should be extended when password length is extended. */
+  char buff[512];
+  ulong query_length= 0;
+  enum_binlog_format save_binlog_format;
+  bool result= false, acl_cache_is_locked= false;
+  ACL_USER *acl_user;
+  ACL_USER::AUTH auth;
+  const char *password_plugin= 0;
+  const CSET_STRING query_save __attribute__((unused)) = thd->query_string;
+  DBUG_ENTER("change_password");
+  DBUG_PRINT("enter",("host: '%s'  user: '%s'  new_password: '%s'",
+		      user->host.str, user->user.str, user->auth->auth_str.str));
+  DBUG_ASSERT(user->host.str != 0);                     // Ensured by caller
+
+  /*
+    This statement will be replicated as a statement, even when using
+    row-based replication.  The flag will be reset at the end of the
+    statement.
+    This has to be handled here as it's called by set_var.cc, which is
+    not automaticly handled by sql_parse.cc
+  */
+  save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
+
+  if (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0))
+    WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+
+  if ((result= tables.open_and_lock(thd, Table_user, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  acl_cache_is_locked= 1;
+  mysql_mutex_lock(&acl_cache->lock);
+
+  if (!(acl_user= find_user_exact(user->host.str, user->user.str)))
+  {
+    my_error(ER_PASSWORD_NO_MATCH, MYF(0));
+    goto end;
+  }
+
+  if (acl_user->nauth == 1 &&
+      (acl_user->auth[0].plugin.str == native_password_plugin_name.str ||
+       acl_user->auth[0].plugin.str == old_password_plugin_name.str))
+  {
+    /* historical hack of auto-changing the plugin */
+    acl_user->auth[0].plugin= guess_auth_plugin(thd, user->auth->auth_str.length);
+  }
+
+  for (uint i=0; i < acl_user->nauth; i++)
+  {
+    auth= acl_user->auth[i];
+    auth.auth_string= safe_lexcstrdup_root(&acl_memroot, user->auth->auth_str);
+    int r= set_user_auth(thd, user->user, user->host,
+                         &auth, user->auth->pwtext);
+    if (r == ER_SET_PASSWORD_AUTH_PLUGIN)
+      password_plugin= auth.plugin.str;
+    else if (r)
+      goto end;
+    else
+    {
+      acl_user->auth[i]= auth;
+      password_plugin= 0;
+      break;
+    }
+  }
+  if (password_plugin)
+  {
+    my_error(ER_SET_PASSWORD_AUTH_PLUGIN, MYF(0), password_plugin);
+    goto end;
+  }
+
+  /* Update the acl password expired state of user */
+  acl_user->password_last_changed= thd->query_start();
+  acl_user->password_expired= false;
+
+  /* If user is the connected user, reset the password expired field on sctx
+     and allow the user to exit sandbox mode */
+  if (thd->security_ctx->is_priv_user(user->user.str, user->host.str))
+    thd->security_ctx->password_expired= false;
+
+  if (update_user_table_password(thd, tables.user_table(), *acl_user))
+    goto end;
+
+  hostname_cache_refresh();                    // Clear locked hostname cache
+  mysql_mutex_unlock(&acl_cache->lock);
+  result= acl_cache_is_locked= 0;
+  if (mysql_bin_log.is_open())
+  {
+    query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
+           user->user.str, safe_str(user->host.str), auth.auth_string.str);
+    DBUG_ASSERT(query_length);
+    thd->clear_error();
+    result= thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length,
+                              FALSE, FALSE, FALSE, 0) > 0;
+  }
+end:
+  if (acl_cache_is_locked)
+    mysql_mutex_unlock(&acl_cache->lock);
+  close_mysql_tables(thd);
+
+#ifdef WITH_WSREP
+wsrep_error_label:
+  if (WSREP(thd))
+  {
+    wsrep_to_isolation_end(thd);
+    thd->set_query(query_save);
+  }
+#endif /* WITH_WSREP */
+  thd->restore_stmt_binlog_format(save_binlog_format);
+
+  DBUG_RETURN(result);
+}
+
+int acl_check_set_default_role(THD *thd, const char *host, const char *user,
+                               const char *role)
+{
+  DBUG_ENTER("acl_check_set_default_role");
+#ifdef HAVE_REPLICATION
+  /*
+    If the roles_mapping table is excluded by the replication filter, we return
+    successful without validating the user/role data because the command will
+    be ignored in a later call to `acl_set_default_role()` for a graceful exit.
+  */
+  Grant_tables tables;
+  TABLE_LIST* first= NULL;
+  if (tables.rpl_ignore_tables(thd, first, Table_roles_mapping, TL_WRITE))
+    DBUG_RETURN(0);
+#endif
+  DBUG_RETURN(check_alter_user(thd, host, user) ||
+              check_user_can_set_role(thd, user, host, NULL, role, NULL));
+}
+
+int acl_set_default_role(THD *thd, const char *host, const char *user,
+                         const char *rolename)
+{
+  Grant_tables tables;
+  char user_key[MAX_KEY_LENGTH];
+  int result= 1;
+  int error;
+  ulong query_length= 0;
+  bool clear_role= FALSE;
+  char buff[512];
+  enum_binlog_format save_binlog_format= thd->get_current_stmt_binlog_format();
+  const CSET_STRING query_save __attribute__((unused)) = thd->query_string;
+
+  DBUG_ENTER("acl_set_default_role");
+  DBUG_PRINT("enter",("host: '%s'  user: '%s'  rolename: '%s'",
+                      user, safe_str(host), safe_str(rolename)));
+
+  if (!strcasecmp(rolename, none.str))
+    clear_role= TRUE;
+
+  if (mysql_bin_log.is_open() ||
+      (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0)))
+  {
+    query_length=
+      sprintf(buff,"SET DEFAULT ROLE '%-.120s' FOR '%-.120s'@'%-.120s'",
+              safe_str(rolename), user, safe_str(host));
+  }
+
+  /*
+    This statement will be replicated as a statement, even when using
+    row-based replication.  The flag will be reset at the end of the
+    statement.
+    This has to be handled here as it's called by set_var.cc, which is
+    not automaticly handled by sql_parse.cc
+  */
+  save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
+
+  if (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0))
+  {
+    thd->set_query(buff, query_length, system_charset_info);
+    // Attention!!! here is implicit goto error;
+    WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+  }
+
+  /*
+    Extra block due to WSREP_TO_ISOLATION_BEGIN using goto.
+    TODO(cvicentiu) Should move  this block out in a new function.
+  */
+  {
+    if ((result= tables.open_and_lock(thd, Table_user, TL_WRITE)))
+      DBUG_RETURN(result != 1);
+
+    const User_table& user_table= tables.user_table();
+    TABLE *table= user_table.table();
+
+    result= 1;
+
+    mysql_mutex_lock(&acl_cache->lock);
+    ACL_USER *acl_user;
+    if (!(acl_user= find_user_exact(host, user)))
+    {
+      mysql_mutex_unlock(&acl_cache->lock);
+      my_message(ER_PASSWORD_NO_MATCH, ER_THD(thd, ER_PASSWORD_NO_MATCH),
+                 MYF(0));
+      goto end;
+    }
+
+    if (!clear_role)
+    {
+      /* set new default_rolename */
+      acl_user->default_rolename.str= safe_strdup_root(&acl_memroot, rolename);
+      acl_user->default_rolename.length= strlen(rolename);
+    }
+    else
+    {
+      /* clear the default_rolename */
+      acl_user->default_rolename.str = NULL;
+      acl_user->default_rolename.length = 0;
+    }
+
+    /* update the mysql.user table with the new default role */
+    tables.user_table().table()->use_all_columns();
+    user_table.set_host(host, strlen(host));
+    user_table.set_user(user, strlen(user));
+    key_copy((uchar *) user_key, table->record[0], table->key_info,
+             table->key_info->key_length);
+
+    if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                           (uchar *) user_key, HA_WHOLE_KEY,
+                                           HA_READ_KEY_EXACT))
+    {
+      mysql_mutex_unlock(&acl_cache->lock);
+      my_message(ER_PASSWORD_NO_MATCH, ER_THD(thd, ER_PASSWORD_NO_MATCH),
+                 MYF(0));
+      goto end;
+    }
+    store_record(table, record[1]);
+    user_table.set_default_role(acl_user->default_rolename.str,
+                                acl_user->default_rolename.length);
+    if (unlikely(error= table->file->ha_update_row(table->record[1],
+                                                   table->record[0])) &&
+        error != HA_ERR_RECORD_IS_THE_SAME)
+    {
+      mysql_mutex_unlock(&acl_cache->lock);
+      table->file->print_error(error,MYF(0));	/* purecov: deadcode */
+      goto end;
+    }
+
+    mysql_mutex_unlock(&acl_cache->lock);
+    result= 0;
+    if (mysql_bin_log.is_open())
+    {
+      DBUG_ASSERT(query_length);
+      thd->clear_error();
+      result= thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length,
+                                FALSE, FALSE, FALSE, 0) > 0;
+    }
+  end:
+    close_mysql_tables(thd);
+  }
+
+#ifdef WITH_WSREP
+wsrep_error_label:
+  if (WSREP(thd))
+  {
+    wsrep_to_isolation_end(thd);
+    thd->set_query(query_save);
+  }
+#endif /* WITH_WSREP */
+
+  thd->restore_stmt_binlog_format(save_binlog_format);
+
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Find user in ACL. Uses to check a definer
+
+  SYNOPSIS
+    is_acl_user()
+    host                 host name
+    user                 user name
+
+  RETURN
+   FALSE  definer not fond
+   TRUE   there is such definer
+*/
+
+bool is_acl_user(const char *host, const char *user)
+{
+  bool res;
+
+  /* --skip-grants */
+  if (!initialized)
+    return TRUE;
+
+  mysql_mutex_lock(&acl_cache->lock);
+
+  if (*host) // User
+    res= find_user_exact(host, user) != NULL;
+  else // Role
+    res= find_acl_role(user, false) != NULL;
+
+  mysql_mutex_unlock(&acl_cache->lock);
+  return res;
+}
+
+
+/*
+  Find first entry that matches the specified user@host pair
+*/
+static ACL_USER *find_user_exact(const char *host, const char *user)
+{
+  mysql_mutex_assert_owner(&acl_cache->lock);
+  size_t start= acl_find_user_by_name(user);
+
+  for (size_t i= start; i < acl_users.elements; i++)
+  {
+    ACL_USER *acl_user= dynamic_element(&acl_users, i, ACL_USER*);
+    if (i > start && strcmp(acl_user->user.str, user))
+      return 0;
+
+    if (!my_strcasecmp(system_charset_info, acl_user->host.hostname, host))
+      return acl_user;
+  }
+  return 0;
+}
+
+/*
+  Find first entry that matches the specified user@host pair
+*/
+static ACL_USER * find_user_wild(const char *host, const char *user, const char *ip)
+{
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  size_t start = acl_find_user_by_name(user);
+
+  for (size_t i= start; i < acl_users.elements; i++)
+  {
+    ACL_USER *acl_user=dynamic_element(&acl_users,i,ACL_USER*);
+    if (i > start && strcmp(acl_user->user.str, user))
+      break;
+    if (compare_hostname(&acl_user->host, host, ip ? ip : host))
+      return acl_user;
+  }
+  return 0;
+}
+
+/*
+  Find a role with the specified name
+*/
+static ACL_ROLE *find_acl_role(const char *role, bool allow_public)
+{
+  size_t length= strlen(role);
+  DBUG_ENTER("find_acl_role");
+  DBUG_PRINT("enter",("role: '%s'", role));
+  DBUG_PRINT("info", ("Hash elements: %ld", acl_roles.records));
+
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  if (!length || (!allow_public && strcasecmp(role, public_name.str) == 0))
+    DBUG_RETURN(NULL);
+
+  ACL_ROLE *r= (ACL_ROLE *)my_hash_search(&acl_roles, (uchar *)role, length);
+  DBUG_RETURN(r);
+}
+
+/*
+  Finds a grantee - something that privileges or roles can be granted to.
+*/
+static ACL_USER_BASE *find_acl_user_base(const char *user, const char *host)
+{
+  if (*host)
+    return find_user_exact(host, user);
+
+  return find_acl_role(user, true);
+}
+
+
+/*
+  Comparing of hostnames
+
+  NOTES
+  A hostname may be of type:
+  hostname   (May include wildcards);   monty.pp.sci.fi
+  ip	   (May include wildcards);   192.168.0.0
+  ip/netmask			      192.168.0.0/255.255.255.0
+
+  A net mask of 0.0.0.0 is not allowed.
+*/
+
+static const char *calc_ip(const char *ip, long *val, char end)
+{
+  long ip_val,tmp;
+  if (!(ip=str2int(ip,10,0,255,&ip_val)) || *ip != '.')
+    return 0;
+  ip_val<<=24;
+  if (!(ip=str2int(ip+1,10,0,255,&tmp)) || *ip != '.')
+    return 0;
+  ip_val+=tmp<<16;
+  if (!(ip=str2int(ip+1,10,0,255,&tmp)) || *ip != '.')
+    return 0;
+  ip_val+=tmp<<8;
+  if (!(ip=str2int(ip+1,10,0,255,&tmp)) || *ip != end)
+    return 0;
+  *val=ip_val+tmp;
+  return ip;
+}
+
+
+static void update_hostname(acl_host_and_ip *host, const char *hostname)
+{
+  // fix historical undocumented convention that empty host is the same as '%'
+  hostname=const_cast(hostname ? hostname : host_not_specified.str);
+  host->hostname=(char*) hostname;             // This will not be modified!
+  if (!(hostname= calc_ip(hostname,&host->ip,'/')) ||
+      !(hostname= calc_ip(hostname+1,&host->ip_mask,'\0')))
+  {
+    host->ip= host->ip_mask=0;			// Not a masked ip
+  }
+}
+
+
+static bool compare_hostname(const acl_host_and_ip *host, const char *hostname,
+			     const char *ip)
+{
+  long tmp;
+  if (host->ip_mask && ip && calc_ip(ip,&tmp,'\0'))
+  {
+    return (tmp & host->ip_mask) == host->ip;
+  }
+  return (!host->hostname ||
+	  (hostname && !wild_case_compare(system_charset_info,
+                                          hostname, host->hostname)) ||
+	  (ip && !wild_compare(ip, host->hostname, 0)));
+}
+
+/**
+  Check if the given host name needs to be resolved or not.
+  Host name has to be resolved if it actually contains *name*.
+
+  For example:
+    192.168.1.1               --> FALSE
+    192.168.1.0/255.255.255.0 --> FALSE
+    %                         --> FALSE
+    192.168.1.%               --> FALSE
+    AB%                       --> FALSE
+
+    AAAAFFFF                  --> TRUE (Hostname)
+    AAAA:FFFF:1234:5678       --> FALSE
+    ::1                       --> FALSE
+
+  This function does not check if the given string is a valid host name or
+  not. It assumes that the argument is a valid host name.
+
+  @param hostname   the string to check.
+
+  @return a flag telling if the argument needs to be resolved or not.
+  @retval TRUE the argument is a host name and needs to be resolved.
+  @retval FALSE the argument is either an IP address, or a patter and
+          should not be resolved.
+*/
+
+bool hostname_requires_resolving(const char *hostname)
+{
+  if (!hostname)
+    return FALSE;
+
+  /* Check if hostname is the localhost. */
+
+  size_t hostname_len= strlen(hostname);
+  size_t localhost_len= strlen(my_localhost);
+
+  if (hostname == my_localhost ||
+      (hostname_len == localhost_len &&
+       !system_charset_info->strnncoll(
+                     (const uchar *) hostname,  hostname_len,
+                     (const uchar *) my_localhost, strlen(my_localhost))))
+  {
+    return FALSE;
+  }
+
+  /*
+    If the string contains any of {':', '%', '_', '/'}, it is definitely
+    not a host name:
+      - ':' means that the string is an IPv6 address;
+      - '%' or '_' means that the string is a pattern;
+      - '/' means that the string is an IPv4 network address;
+  */
+
+  for (const char *p= hostname; *p; ++p)
+  {
+    switch (*p) {
+      case ':':
+      case '%':
+      case '_':
+      case '/':
+        return FALSE;
+    }
+  }
+
+  /*
+    Now we have to tell a host name (ab.cd, 12.ab) from an IPv4 address
+    (12.34.56.78). The assumption is that if the string contains only
+    digits and dots, it is an IPv4 address. Otherwise -- a host name.
+  */
+
+  for (const char *p= hostname; *p; ++p)
+  {
+    if (*p != '.' && !my_isdigit(&my_charset_latin1, *p))
+      return TRUE; /* a "letter" has been found. */
+  }
+
+  return FALSE; /* all characters are either dots or digits. */
+}
+
+
+/**
+  Update record for user in mysql.user privilege table with new password.
+
+  @see change_password
+*/
+
+static bool update_user_table_password(THD *thd, const User_table& user_table,
+                                       const ACL_USER &user)
+{
+  char user_key[MAX_KEY_LENGTH];
+  int error;
+  DBUG_ENTER("update_user_table_password");
+
+  TABLE *table= user_table.table();
+  table->use_all_columns();
+  user_table.set_host(user.host.hostname, user.hostname_length);
+  user_table.set_user(user.user.str, user.user.length);
+  key_copy((uchar *) user_key, table->record[0], table->key_info,
+           table->key_info->key_length);
+
+  if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                         (uchar *) user_key, HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
+  {
+    my_message(ER_PASSWORD_NO_MATCH, ER_THD(thd, ER_PASSWORD_NO_MATCH),
+               MYF(0));
+    DBUG_RETURN(1);
+  }
+  store_record(table, record[1]);
+
+  if (user_table.set_auth(user))
+  {
+    my_error(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE, MYF(0),
+             user_table.name().str, 3, user_table.num_fields(),
+             static_cast(table->s->mysql_version), MYSQL_VERSION_ID);
+    DBUG_RETURN(1);
+  }
+
+  user_table.set_password_expired(user.password_expired);
+  user_table.set_password_last_changed(user.password_last_changed);
+
+  if (unlikely(error= table->file->ha_update_row(table->record[1],
+                                                 table->record[0])) &&
+      error != HA_ERR_RECORD_IS_THE_SAME)
+  {
+    table->file->print_error(error,MYF(0));
+    DBUG_RETURN(1);
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Return 1 if we are allowed to create new users
+  the logic here is: INSERT_ACL is sufficient.
+  It's also a requirement in opt_safe_user_create,
+  otherwise CREATE_USER_ACL is enough.
+*/
+
+static bool test_if_create_new_users(THD *thd)
+{
+  Security_context *sctx= thd->security_ctx;
+  bool create_new_users= MY_TEST(sctx->master_access & INSERT_ACL) ||
+                         (!opt_safe_user_create &&
+                          MY_TEST(sctx->master_access & CREATE_USER_ACL));
+  if (!create_new_users)
+  {
+    TABLE_LIST tl;
+    privilege_t db_access(NO_ACL);
+    tl.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_TABLE_NAME[USER_TABLE],
+                      NULL, TL_WRITE);
+    create_new_users= 1;
+
+    db_access= acl_get_all3(sctx, tl.db.str, FALSE);
+    if (!(db_access & INSERT_ACL))
+    {
+      if (check_grant(thd, INSERT_ACL, &tl, FALSE, UINT_MAX, TRUE))
+        create_new_users=0;
+    }
+  }
+  return create_new_users;
+}
+
+
+/****************************************************************************
+  Handle GRANT commands
+****************************************************************************/
+static USER_AUTH auth_no_password;
+
+static int replace_user_table(THD *thd, const User_table &user_table,
+                              LEX_USER * const combo, privilege_t rights,
+                              const bool revoke_grant,
+                              const bool can_create_user,
+                              const bool no_auto_create)
+{
+  int error = -1;
+  uint nauth= 0;
+  bool old_row_exists=0;
+  uchar user_key[MAX_KEY_LENGTH];
+  bool handle_as_role= combo->is_role();
+  LEX *lex= thd->lex;
+  TABLE *table= user_table.table();
+  ACL_USER new_acl_user, *old_acl_user= 0;
+  DBUG_ENTER("replace_user_table");
+
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  table->use_all_columns();
+  user_table.set_host(combo->host.str,combo->host.length);
+  user_table.set_user(combo->user.str,combo->user.length);
+  key_copy(user_key, table->record[0], table->key_info,
+           table->key_info->key_length);
+
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                         HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  {
+    if (revoke_grant)
+    {
+      if (combo->host.length)
+        my_error(ER_NONEXISTING_GRANT, MYF(0), combo->user.str,
+                 combo->host.str);
+      else
+        my_error(ER_INVALID_ROLE, MYF(0), combo->user.str);
+      goto end;
+    }
+    /*
+      There are four options which affect the process of creation of
+      a new user (mysqld option --safe-create-user, 'insert' privilege
+      on 'mysql.user' table, using 'GRANT' with 'IDENTIFIED BY' and
+      SQL_MODE flag NO_AUTO_CREATE_USER). Below is the simplified rule
+      how it should work.
+      if (safe-user-create && ! INSERT_priv) => reject
+      else if (identified_by) => create
+      else if (no_auto_create_user) => reject
+      else create
+
+      see also test_if_create_new_users()
+    */
+    else if (!combo->has_auth() && no_auto_create)
+    {
+      my_error(ER_PASSWORD_NO_MATCH, MYF(0));
+      goto end;
+    }
+    else if (!can_create_user)
+    {
+      my_error(ER_CANT_CREATE_USER_WITH_GRANT, MYF(0));
+      goto end;
+    }
+
+    if (!combo->auth)
+      combo->auth= &auth_no_password;
+
+    old_row_exists = 0;
+    restore_record(table, s->default_values);
+    user_table.set_host(combo->host.str, combo->host.length);
+    user_table.set_user(combo->user.str, combo->user.length);
+  }
+  else
+  {
+    old_row_exists = 1;
+    store_record(table,record[1]);			// Save copy for update
+  }
+
+  for (USER_AUTH *auth= combo->auth; auth; auth= auth->next)
+  {
+    nauth++;
+    if (auth->plugin.length)
+    {
+      if (!plugin_is_ready(&auth->plugin, MYSQL_AUTHENTICATION_PLUGIN))
+      {
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), auth->plugin.str);
+        goto end;
+      }
+    }
+    else
+      auth->plugin= guess_auth_plugin(thd, auth->auth_str.length);
+  }
+
+  /* Update table columns with new privileges */
+  user_table.set_access(rights, revoke_grant);
+  rights= user_table.get_access();
+
+  if (handle_as_role)
+  {
+    if (old_row_exists && !user_table.get_is_role())
+    {
+      goto end;
+    }
+    if (user_table.set_is_role(true))
+    {
+      my_error(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE, MYF(0),
+               user_table.name().str,
+               ROLE_ASSIGN_COLUMN_IDX + 1, user_table.num_fields(),
+               static_cast(table->s->mysql_version), MYSQL_VERSION_ID);
+      goto end;
+    }
+  }
+  else
+  {
+    old_acl_user= find_user_exact(combo->host.str, combo->user.str);
+    if ((old_acl_user != NULL) != old_row_exists)
+    {
+      my_error(ER_PASSWORD_NO_MATCH, MYF(0));
+      goto end;
+    }
+    new_acl_user= old_row_exists ? *old_acl_user :
+                  ACL_USER(thd, *combo, lex->account_options, rights);
+    if (acl_user_update(thd, &new_acl_user, nauth,
+                        *combo, lex->account_options, rights))
+      goto end;
+
+    if (user_table.set_auth(new_acl_user))
+    {
+      my_error(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE, MYF(0),
+               user_table.name().str, 3, user_table.num_fields(),
+               static_cast(table->s->mysql_version), MYSQL_VERSION_ID);
+      DBUG_RETURN(1);
+    }
+
+    switch (lex->account_options.ssl_type) {
+    case SSL_TYPE_NOT_SPECIFIED:
+      break;
+    case SSL_TYPE_NONE:
+    case SSL_TYPE_ANY:
+    case SSL_TYPE_X509:
+      user_table.set_ssl_type(lex->account_options.ssl_type);
+      user_table.set_ssl_cipher("", 0);
+      user_table.set_x509_issuer("", 0);
+      user_table.set_x509_subject("", 0);
+      break;
+    case SSL_TYPE_SPECIFIED:
+      user_table.set_ssl_type(lex->account_options.ssl_type);
+      if (lex->account_options.ssl_cipher.str)
+        user_table.set_ssl_cipher(lex->account_options.ssl_cipher.str,
+                                  lex->account_options.ssl_cipher.length);
+      else
+        user_table.set_ssl_cipher("", 0);
+      if (lex->account_options.x509_issuer.str)
+        user_table.set_x509_issuer(lex->account_options.x509_issuer.str,
+                                   lex->account_options.x509_issuer.length);
+      else
+        user_table.set_x509_issuer("", 0);
+      if (lex->account_options.x509_subject.str)
+        user_table.set_x509_subject(lex->account_options.x509_subject.str,
+                                    lex->account_options.x509_subject.length);
+      else
+        user_table.set_x509_subject("", 0);
+      break;
+    }
+
+    if (lex->account_options.specified_limits & USER_RESOURCES::QUERIES_PER_HOUR)
+      user_table.set_max_questions(lex->account_options.questions);
+    if (lex->account_options.specified_limits & USER_RESOURCES::UPDATES_PER_HOUR)
+      user_table.set_max_updates(lex->account_options.updates);
+    if (lex->account_options.specified_limits & USER_RESOURCES::CONNECTIONS_PER_HOUR)
+      user_table.set_max_connections(lex->account_options.conn_per_hour);
+    if (lex->account_options.specified_limits & USER_RESOURCES::USER_CONNECTIONS)
+      user_table.set_max_user_connections(lex->account_options.user_conn);
+    if (lex->account_options.specified_limits & USER_RESOURCES::MAX_STATEMENT_TIME)
+      user_table.set_max_statement_time(lex->account_options.max_statement_time);
+
+    mqh_used= (mqh_used || lex->account_options.questions || lex->account_options.updates ||
+               lex->account_options.conn_per_hour || lex->account_options.user_conn ||
+               lex->account_options.max_statement_time != 0.0);
+
+    if (lex->account_options.account_locked != ACCOUNTLOCK_UNSPECIFIED)
+      user_table.set_account_locked(new_acl_user.account_locked);
+
+    if (nauth)
+      user_table.set_password_last_changed(new_acl_user.password_last_changed);
+    if (lex->account_options.password_expire != PASSWORD_EXPIRE_UNSPECIFIED)
+    {
+      user_table.set_password_lifetime(new_acl_user.password_lifetime);
+      user_table.set_password_expired(new_acl_user.password_expired);
+    }
+  }
+
+  if (old_row_exists)
+  {
+    /*
+      We should NEVER delete from the user table, as a uses can still
+      use mysqld even if he doesn't have any privileges in the user table!
+    */
+    if (cmp_record(table, record[1]))
+    {
+      if (unlikely(error= table->file->ha_update_row(table->record[1],
+                                                     table->record[0])) &&
+          error != HA_ERR_RECORD_IS_THE_SAME)
+      {                                         // This should never happen
+        table->file->print_error(error,MYF(0)); /* purecov: deadcode */
+        error= -1;                              /* purecov: deadcode */
+        goto end;                               /* purecov: deadcode */
+      }
+      else
+        error= 0;
+    }
+  }
+  else if (unlikely(error=table->file->ha_write_row(table->record[0])))
+  {
+    // This should never happen
+    if (table->file->is_fatal_error(error, HA_CHECK_DUP))
+    {
+      table->file->print_error(error,MYF(0));	/* purecov: deadcode */
+      error= -1;				/* purecov: deadcode */
+      goto end;					/* purecov: deadcode */
+    }
+  }
+  error=0;					// Privileges granted / revoked
+
+end:
+  if (likely(!error))
+  {
+    acl_cache->clear(1);			// Clear privilege cache
+    if (handle_as_role)
+    {
+      if (old_row_exists)
+        acl_update_role(combo->user.str, rights);
+      else
+        acl_insert_role(combo->user.str, rights);
+    }
+    else
+    {
+      if (old_acl_user)
+        *old_acl_user= new_acl_user;
+      else
+      {
+        push_new_user(new_acl_user);
+        rebuild_acl_users();
+
+        /* Rebuild 'acl_check_hosts' since 'acl_users' has been modified */
+        rebuild_check_host();
+
+        /*
+          Rebuild every user's role_grants since 'acl_users' has been sorted
+          and old pointers to ACL_USER elements are no longer valid
+        */
+        rebuild_role_grants();
+      }
+    }
+  }
+  DBUG_RETURN(error);
+}
+
+
+/*
+  change grants in the mysql.db table
+*/
+
+static int replace_db_table(TABLE *table, const char *db,
+			    const LEX_USER &combo,
+			    privilege_t rights, const bool revoke_grant)
+{
+  uint i;
+  ulonglong priv;
+  privilege_t store_rights(NO_ACL);
+  bool old_row_exists=0;
+  int error;
+  char what= revoke_grant ? 'N' : 'Y';
+  uchar user_key[MAX_KEY_LENGTH];
+  DBUG_ENTER("replace_db_table");
+
+  /* Check if there is such a user in user table in memory? */
+  if (!find_user_wild(combo.host.str,combo.user.str))
+  {
+    /* The user could be a role, check if the user is registered as a role */
+    if (!combo.host.length && !find_acl_role(combo.user.str, true))
+    {
+      my_message(ER_PASSWORD_NO_MATCH, ER_THD(table->in_use,
+                                              ER_PASSWORD_NO_MATCH), MYF(0));
+      DBUG_RETURN(-1);
+    }
+  }
+
+  table->use_all_columns();
+  table->field[0]->store(combo.host.str,combo.host.length,
+                         system_charset_info);
+  table->field[1]->store(db,(uint) strlen(db), system_charset_info);
+  table->field[2]->store(combo.user.str,combo.user.length,
+                         system_charset_info);
+  key_copy(user_key, table->record[0], table->key_info,
+           table->key_info->key_length);
+
+  if (table->file->ha_index_read_idx_map(table->record[0],0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
+  {
+    if (revoke_grant)
+    { // no row, no revoke
+      my_error(ER_NONEXISTING_GRANT, MYF(0), combo.user.str, combo.host.str);
+      goto abort;
+    }
+    old_row_exists = 0;
+    restore_record(table, s->default_values);
+    table->field[0]->store(combo.host.str,combo.host.length,
+                           system_charset_info);
+    table->field[1]->store(db,(uint) strlen(db), system_charset_info);
+    table->field[2]->store(combo.user.str,combo.user.length,
+                           system_charset_info);
+  }
+  else
+  {
+    old_row_exists = 1;
+    store_record(table,record[1]);
+  }
+
+  store_rights=get_rights_for_db(rights);
+  for (i= 3, priv= 1; i < table->s->fields; i++, priv <<= 1)
+  {
+    if (priv & store_rights)			// do it if priv is chosen
+      table->field [i]->store(&what,1, &my_charset_latin1);// set requested privileges
+  }
+  rights=get_access(table,3);
+  rights=fix_rights_for_db(rights);
+
+  if (old_row_exists)
+  {
+    /* update old existing row */
+    if (rights)
+    {
+      if (unlikely((error= table->file->ha_update_row(table->record[1],
+                                                      table->record[0]))) &&
+          error != HA_ERR_RECORD_IS_THE_SAME)
+	goto table_error;			/* purecov: deadcode */
+    }
+    else	/* must have been a revoke of all privileges */
+    {
+      if (unlikely((error= table->file->ha_delete_row(table->record[1]))))
+	goto table_error;			/* purecov: deadcode */
+    }
+  }
+  else if (rights &&
+           (unlikely(error= table->file->ha_write_row(table->record[0]))))
+  {
+    if (table->file->is_fatal_error(error, HA_CHECK_DUP_KEY))
+      goto table_error; /* purecov: deadcode */
+  }
+
+  acl_cache->clear(1);				// Clear privilege cache
+  if (old_row_exists)
+    acl_update_db(combo.user.str,combo.host.str,db,rights);
+  else if (rights)
+  {
+    /*
+       If we did not have an already existing row, for users, we must always
+       insert an ACL_DB entry. For roles however, it is possible that one was
+       already created when DB privileges were propagated from other granted
+       roles onto the current role. For this case, first try to update the
+       existing entry, otherwise insert a new one.
+    */
+    if (!combo.is_role() ||
+        !acl_update_db(combo.user.str, combo.host.str, db, rights))
+    {
+      acl_insert_db(combo.user.str,combo.host.str,db,rights);
+    }
+  }
+  DBUG_RETURN(0);
+
+  /* This could only happen if the grant tables got corrupted */
+table_error:
+  table->file->print_error(error,MYF(0));	/* purecov: deadcode */
+
+abort:
+  DBUG_RETURN(-1);
+}
+
+/**
+  Updates the mysql.roles_mapping table
+
+  @param table          TABLE to update
+  @param user           user name of the grantee
+  @param host           host name of the grantee
+  @param role           role name to grant
+  @param with_admin     WITH ADMIN OPTION flag
+  @param existing       the entry in the acl_roles_mappings hash or NULL.
+                        it is never NULL if revoke_grant is true.
+                        it is NULL when a new pair is added, it's not NULL
+                        when an existing pair is updated.
+  @param revoke_grant   true for REVOKE, false for GRANT
+*/
+static int
+replace_roles_mapping_table(TABLE *table, LEX_CSTRING *user, LEX_CSTRING *host,
+                            LEX_CSTRING *role, bool with_admin,
+                            ROLE_GRANT_PAIR *existing, bool revoke_grant)
+{
+  DBUG_ENTER("replace_roles_mapping_table");
+
+  uchar row_key[MAX_KEY_LENGTH];
+  int error;
+  table->use_all_columns();
+  restore_record(table, s->default_values);
+  table->field[0]->store(host->str, host->length, system_charset_info);
+  table->field[1]->store(user->str, user->length, system_charset_info);
+  table->field[2]->store(role->str, role->length, system_charset_info);
+
+  DBUG_ASSERT(!revoke_grant || existing);
+
+  if (existing) // delete or update
+  {
+    key_copy(row_key, table->record[0], table->key_info,
+             table->key_info->key_length);
+    if (table->file->ha_index_read_idx_map(table->record[1], 0, row_key,
+                                           HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+    {
+      /* No match */
+      DBUG_RETURN(1);
+    }
+    if (revoke_grant && !with_admin) 
+    {
+      if (unlikely((error= table->file->ha_delete_row(table->record[1]))))
+      {
+        DBUG_PRINT("info", ("error deleting row '%s' '%s' '%s'",
+                            host->str, user->str, role->str));
+        goto table_error;
+      }
+    }
+    else if (with_admin)
+    {
+      table->field[3]->store(!revoke_grant + 1);
+
+      if (unlikely((error= table->file->ha_update_row(table->record[1],
+                                                      table->record[0]))))
+      {
+        DBUG_PRINT("info", ("error updating row '%s' '%s' '%s'",
+                            host->str, user->str, role->str));
+        goto table_error;
+      }
+    }
+    DBUG_RETURN(0);
+  }
+
+  table->field[3]->store(with_admin + 1);
+
+  if (unlikely((error= table->file->ha_write_row(table->record[0]))))
+  {
+    DBUG_PRINT("info", ("error inserting row '%s' '%s' '%s'",
+                        host->str, user->str, role->str));
+    goto table_error;
+  }
+
+  /* all ok */
+  DBUG_RETURN(0);
+
+table_error:
+  DBUG_PRINT("info", ("table error"));
+  table->file->print_error(error, MYF(0));
+  DBUG_RETURN(1);
+}
+
+
+/**
+  Updates the acl_roles_mappings hash
+
+  @param user           user name of the grantee
+  @param host           host name of the grantee
+  @param role           role name to grant
+  @param with_admin     WITH ADMIN OPTION flag
+  @param existing       the entry in the acl_roles_mappings hash or NULL.
+                        it is never NULL if revoke_grant is true.
+                        it is NULL when a new pair is added, it's not NULL
+                        when an existing pair is updated.
+  @param revoke_grant   true for REVOKE, false for GRANT
+*/
+static int
+update_role_mapping(LEX_CSTRING *user, LEX_CSTRING *host, LEX_CSTRING *role,
+                    bool with_admin, ROLE_GRANT_PAIR *existing, bool revoke_grant)
+{
+  if (revoke_grant)
+  {
+    if (with_admin)
+    {
+      existing->with_admin= false;
+      return 0;
+    }
+    return my_hash_delete(&acl_roles_mappings, (uchar*)existing);
+  }
+
+  if (existing)
+  {
+    existing->with_admin|= with_admin;
+    return 0;
+  }
+
+  /* allocate a new entry that will go in the hash */
+  ROLE_GRANT_PAIR *hash_entry= new (&acl_memroot) ROLE_GRANT_PAIR;
+  if (hash_entry->init(&acl_memroot, user->str, host->str,
+                       role->str, with_admin))
+    return 1;
+  return my_hash_insert(&acl_roles_mappings, (uchar*) hash_entry);
+}
+
+static void
+acl_update_proxy_user(ACL_PROXY_USER *new_value, bool is_revoke)
+{
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  DBUG_ENTER("acl_update_proxy_user");
+  for (size_t i= 0; i < acl_proxy_users.elements; i++)
+  {
+    ACL_PROXY_USER *acl_user=
+      dynamic_element(&acl_proxy_users, i, ACL_PROXY_USER *);
+
+    if (acl_user->pk_equals(new_value))
+    {
+      if (is_revoke)
+      {
+        DBUG_PRINT("info", ("deleting ACL_PROXY_USER"));
+        delete_dynamic_element(&acl_proxy_users, i);
+      }
+      else
+      {
+        DBUG_PRINT("info", ("updating ACL_PROXY_USER"));
+        acl_user->set_data(new_value);
+      }
+      break;
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+static void
+acl_insert_proxy_user(ACL_PROXY_USER *new_value)
+{
+  DBUG_ENTER("acl_insert_proxy_user");
+  mysql_mutex_assert_owner(&acl_cache->lock);
+  (void) push_dynamic(&acl_proxy_users, (uchar *) new_value);
+  my_qsort((uchar*) dynamic_element(&acl_proxy_users, 0, ACL_PROXY_USER *),
+           acl_proxy_users.elements,
+           sizeof(ACL_PROXY_USER), (qsort_cmp) acl_compare);
+  DBUG_VOID_RETURN;
+}
+
+
+static int
+replace_proxies_priv_table(THD *thd, TABLE *table, const LEX_USER *user,
+                         const LEX_USER *proxied_user, bool with_grant_arg,
+                         bool revoke_grant)
+{
+  bool old_row_exists= 0;
+  int error;
+  uchar user_key[MAX_KEY_LENGTH];
+  ACL_PROXY_USER new_grant;
+  char grantor[USER_HOST_BUFF_SIZE];
+
+  DBUG_ENTER("replace_proxies_priv_table");
+
+  if (!table)
+  {
+    my_error(ER_NO_SUCH_TABLE, MYF(0), MYSQL_SCHEMA_NAME.str,
+             MYSQL_TABLE_NAME[PROXIES_PRIV_TABLE].str);
+    DBUG_RETURN(-1);
+  }
+
+  /* Check if there is such a user in user table in memory? */
+  if (!find_user_wild(user->host.str,user->user.str))
+  {
+    my_message(ER_PASSWORD_NO_MATCH,
+               ER_THD(thd, ER_PASSWORD_NO_MATCH), MYF(0));
+    DBUG_RETURN(-1);
+  }
+
+  table->use_all_columns();
+  ACL_PROXY_USER::store_pk (table, &user->host, &user->user,
+                            &proxied_user->host, &proxied_user->user);
+
+  key_copy(user_key, table->record[0], table->key_info,
+           table->key_info->key_length);
+
+  get_grantor(thd, grantor);
+
+  if (unlikely((error= table->file->ha_index_init(0, 1))))
+  {
+    table->file->print_error(error, MYF(0));
+    DBUG_PRINT("info", ("ha_index_init error"));
+    DBUG_RETURN(-1);
+  }
+
+  if (table->file->ha_index_read_map(table->record[0], user_key,
+                                     HA_WHOLE_KEY,
+                                     HA_READ_KEY_EXACT))
+  {
+    DBUG_PRINT ("info", ("Row not found"));
+    if (revoke_grant)
+    { // no row, no revoke
+      my_error(ER_NONEXISTING_GRANT, MYF(0), user->user.str, user->host.str);
+      goto abort;
+    }
+    old_row_exists= 0;
+    restore_record(table, s->default_values);
+    ACL_PROXY_USER::store_data_record(table, &user->host, &user->user,
+                                      &proxied_user->host,
+                                      &proxied_user->user,
+                                      with_grant_arg,
+                                      grantor);
+  }
+  else
+  {
+    DBUG_PRINT("info", ("Row found"));
+    old_row_exists= 1;
+    store_record(table, record[1]);
+  }
+
+  if (old_row_exists)
+  {
+    /* update old existing row */
+    if (!revoke_grant)
+    {
+      if (unlikely(error= table->file->ha_update_row(table->record[1],
+                                                     table->record[0])) &&
+          error != HA_ERR_RECORD_IS_THE_SAME)
+	goto table_error;			/* purecov: inspected */
+    }
+    else
+    {
+      if (unlikely((error= table->file->ha_delete_row(table->record[1]))))
+	goto table_error;			/* purecov: inspected */
+    }
+  }
+  else if (unlikely((error= table->file->ha_write_row(table->record[0]))))
+  {
+    DBUG_PRINT("info", ("error inserting the row"));
+    if (table->file->is_fatal_error(error, HA_CHECK_DUP_KEY))
+      goto table_error; /* purecov: inspected */
+  }
+
+  acl_cache->clear(1);				// Clear privilege cache
+  if (old_row_exists)
+  {
+    new_grant.init(user->host.str, user->user.str,
+                   proxied_user->host.str, proxied_user->user.str,
+                   with_grant_arg);
+    acl_update_proxy_user(&new_grant, revoke_grant);
+  }
+  else
+  {
+    new_grant.init(&acl_memroot, user->host.str, user->user.str,
+                   proxied_user->host.str, proxied_user->user.str,
+                   with_grant_arg);
+    acl_insert_proxy_user(&new_grant);
+  }
+
+  table->file->ha_index_end();
+  DBUG_RETURN(0);
+
+  /* This could only happen if the grant tables got corrupted */
+table_error:
+  DBUG_PRINT("info", ("table error"));
+  table->file->print_error(error, MYF(0));	/* purecov: inspected */
+
+abort:
+  DBUG_PRINT("info", ("aborting replace_proxies_priv_table"));
+  table->file->ha_index_end();
+  DBUG_RETURN(-1);
+}
+
+
+class GRANT_COLUMN :public Sql_alloc
+{
+public:
+  char *column;
+  privilege_t rights;
+  privilege_t init_rights;
+  uint key_length;
+  GRANT_COLUMN(String &c, privilege_t y) :rights (y), init_rights(y)
+  {
+    column= (char*) memdup_root(&grant_memroot,c.ptr(), key_length=c.length());
+  }
+
+  /* this constructor assumes thas source->column is allocated in grant_memroot */
+  GRANT_COLUMN(GRANT_COLUMN *source) : column(source->column),
+    rights (source->rights), init_rights(NO_ACL), key_length(source->key_length) { }
+};
+
+
+static uchar* get_key_column(GRANT_COLUMN *buff, size_t *length,
+			    my_bool not_used __attribute__((unused)))
+{
+  *length=buff->key_length;
+  return (uchar*) buff->column;
+}
+
+class GRANT_NAME :public Sql_alloc
+{
+public:
+  acl_host_and_ip host;
+  char *db, *user, *tname, *hash_key;
+  privilege_t privs;
+  privilege_t init_privs; /* privileges found in physical table */
+  ulonglong sort;
+  size_t key_length;
+  GRANT_NAME(const char *h, const char *d,const char *u,
+             const char *t, privilege_t p, bool is_routine);
+  GRANT_NAME (TABLE *form, bool is_routine);
+  virtual ~GRANT_NAME() = default;
+  virtual bool ok() { return privs != NO_ACL; }
+  void set_user_details(const char *h, const char *d,
+                        const char *u, const char *t,
+                        bool is_routine);
+};
+
+
+static privilege_t get_access_value_from_val_int(Field *field)
+{
+  return privilege_t(ALL_KNOWN_ACL & (ulonglong) field->val_int());
+}
+
+
+class GRANT_TABLE :public GRANT_NAME
+{
+public:
+  privilege_t cols;
+  privilege_t init_cols; /* privileges found in physical table */
+  HASH hash_columns;
+
+  GRANT_TABLE(const char *h, const char *d,const char *u,
+              const char *t, privilege_t p, privilege_t c);
+  GRANT_TABLE (TABLE *form, TABLE *col_privs);
+  ~GRANT_TABLE();
+  bool ok() { return privs != NO_ACL || cols != NO_ACL; }
+  void init_hash()
+  {
+    my_hash_init2(key_memory_acl_memex, &hash_columns, 4, system_charset_info,
+                  0, 0, 0, (my_hash_get_key) get_key_column, 0, 0, 0);
+  }
+};
+
+
+privilege_t GRANT_INFO::all_privilege()
+{
+  return (grant_table_user ? grant_table_user->cols : NO_ACL) |
+         (grant_table_role ? grant_table_role->cols : NO_ACL) |
+         (grant_public ?  grant_public->cols : NO_ACL) |
+         privilege;
+}
+
+
+void GRANT_NAME::set_user_details(const char *h, const char *d,
+                                  const char *u, const char *t,
+                                  bool is_routine)
+{
+  /* Host given by user */
+  update_hostname(&host, strdup_root(&grant_memroot, h));
+  if (db != d)
+  {
+    db= strdup_root(&grant_memroot, d);
+    if (lower_case_table_names)
+      my_casedn_str(files_charset_info, db);
+  }
+  user = strdup_root(&grant_memroot,u);
+  sort=  get_magic_sort("hdu", host.hostname, db, user);
+  if (tname != t)
+  {
+    tname= strdup_root(&grant_memroot, t);
+    if (lower_case_table_names || is_routine)
+      my_casedn_str(files_charset_info, tname);
+  }
+  key_length= strlen(d) + strlen(u)+ strlen(t)+3;
+  hash_key=   (char*) alloc_root(&grant_memroot,key_length);
+  strmov(strmov(strmov(hash_key,user)+1,db)+1,tname);
+}
+
+GRANT_NAME::GRANT_NAME(const char *h, const char *d,const char *u,
+                       const char *t, privilege_t p, bool is_routine)
+  :db(0), tname(0), privs(p), init_privs(p)
+{
+  set_user_details(h, d, u, t, is_routine);
+}
+
+GRANT_TABLE::GRANT_TABLE(const char *h, const char *d,const char *u,
+                         const char *t, privilege_t p, privilege_t c)
+  :GRANT_NAME(h,d,u,t,p, FALSE), cols(c), init_cols(c)
+{
+  init_hash();
+}
+
+/*
+  create a new GRANT_TABLE entry for role inheritance. init_* fields are set
+  to 0
+*/
+GRANT_NAME::GRANT_NAME(TABLE *form, bool is_routine)
+ :privs(NO_ACL), init_privs(NO_ACL)
+{
+  user= safe_str(get_field(&grant_memroot,form->field[2]));
+
+  const char *hostname= get_field(&grant_memroot, form->field[0]);
+  mysql_mutex_lock(&acl_cache->lock);
+  if (!hostname && find_acl_role(user, true))
+    hostname= "";
+  mysql_mutex_unlock(&acl_cache->lock);
+  update_hostname(&host, hostname);
+
+  db=    get_field(&grant_memroot,form->field[1]);
+  tname= get_field(&grant_memroot,form->field[3]);
+  if (!db || !tname)
+  {
+    /* Wrong table row; Ignore it */
+    return;					/* purecov: inspected */
+  }
+  sort=  get_magic_sort("hdu", host.hostname, db, user);
+  if (lower_case_table_names)
+  {
+    my_casedn_str(files_charset_info, db);
+  }
+  if (lower_case_table_names || is_routine)
+  {
+    my_casedn_str(files_charset_info, tname);
+  }
+  key_length= (strlen(db) + strlen(user) + strlen(tname) + 3);
+  hash_key=   (char*) alloc_root(&grant_memroot, key_length);
+  strmov(strmov(strmov(hash_key,user)+1,db)+1,tname);
+  privs = get_access_value_from_val_int(form->field[6]);
+  privs = fix_rights_for_table(privs);
+  init_privs= privs;
+}
+
+
+GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
+  :GRANT_NAME(form, FALSE), cols(NO_ACL), init_cols(NO_ACL)
+{
+  uchar key[MAX_KEY_LENGTH];
+
+  if (!db || !tname)
+  {
+    /* Wrong table row; Ignore it */
+    my_hash_clear(&hash_columns);               /* allow for destruction */
+    cols= NO_ACL;
+    return;
+  }
+  cols= get_access_value_from_val_int(form->field[7]);
+  cols= fix_rights_for_column(cols);
+  /*
+    Initial columns privileges are the same as column privileges on creation.
+    In case of roles, the cols privilege bits can get inherited and thus
+    cause the cols field to change. The init_cols field is always the same
+    as the physical table entry
+  */
+  init_cols= cols;
+
+  init_hash();
+
+  if (cols)
+  {
+    uint key_prefix_len;
+    KEY_PART_INFO *key_part= col_privs->key_info->key_part;
+    col_privs->field[0]->store(host.hostname,
+                               (uint) safe_strlen(host.hostname),
+                               system_charset_info);
+    col_privs->field[1]->store(db,(uint) strlen(db), system_charset_info);
+    col_privs->field[2]->store(user,(uint) strlen(user), system_charset_info);
+    col_privs->field[3]->store(tname,(uint) strlen(tname), system_charset_info);
+
+    key_prefix_len= (key_part[0].store_length +
+                     key_part[1].store_length +
+                     key_part[2].store_length +
+                     key_part[3].store_length);
+    key_copy(key, col_privs->record[0], col_privs->key_info, key_prefix_len);
+    col_privs->field[4]->store("",0, &my_charset_latin1);
+
+    if (col_privs->file->ha_index_init(0, 1))
+    {
+      cols= NO_ACL;
+      init_cols= NO_ACL;
+      return;
+    }
+
+    if (col_privs->file->ha_index_read_map(col_privs->record[0], (uchar*) key,
+                                           (key_part_map)15,
+                                           HA_READ_KEY_EXACT))
+    {
+      cols= NO_ACL; /* purecov: deadcode */
+      init_cols= NO_ACL;
+      col_privs->file->ha_index_end();
+      return;
+    }
+    do
+    {
+      String *res,column_name;
+      GRANT_COLUMN *mem_check;
+      /* As column name is a string, we don't have to supply a buffer */
+      res=col_privs->field[4]->val_str(&column_name);
+      privilege_t priv= get_access_value_from_val_int(col_privs->field[6]);
+      if (!(mem_check = new GRANT_COLUMN(*res,
+                                         fix_rights_for_column(priv))))
+      {
+        /* Don't use this entry */
+        privs= cols= init_privs= init_cols= NO_ACL;   /* purecov: deadcode */
+        return;				/* purecov: deadcode */
+      }
+      if (my_hash_insert(&hash_columns, (uchar *) mem_check))
+      {
+        /* Invalidate this entry */
+        privs= cols= init_privs= init_cols= NO_ACL;
+        return;
+      }
+    } while (!col_privs->file->ha_index_next(col_privs->record[0]) &&
+             !key_cmp_if_same(col_privs,key,0,key_prefix_len));
+    col_privs->file->ha_index_end();
+  }
+}
+
+
+GRANT_TABLE::~GRANT_TABLE()
+{
+  my_hash_free(&hash_columns);
+}
+
+
+static uchar* get_grant_table(GRANT_NAME *buff, size_t *length,
+			     my_bool not_used __attribute__((unused)))
+{
+  *length=buff->key_length;
+  return (uchar*) buff->hash_key;
+}
+
+
+static void free_grant_table(GRANT_TABLE *grant_table)
+{
+  grant_table->~GRANT_TABLE();
+}
+
+
+/* Search after a matching grant. Prefer exact grants before not exact ones */
+
+static GRANT_NAME *name_hash_search(HASH *name_hash,
+                                    const char *host,const char* ip,
+                                    const char *db,
+                                    const char *user, const char *tname,
+                                    bool exact, bool name_tolower)
+{
+  char helping[SAFE_NAME_LEN*2+USERNAME_LENGTH+3];
+  char *hend = helping + sizeof(helping);
+  uint len;
+  GRANT_NAME *grant_name,*found=0;
+  HASH_SEARCH_STATE state;
+
+  char *db_ptr= strmov(helping, user) + 1;
+  char *tname_ptr= strnmov(db_ptr, db, hend - db_ptr) + 1;
+  if (tname_ptr > hend)
+    return 0; // invalid name = not found
+  char *end= strnmov(tname_ptr, tname, hend - tname_ptr) + 1;
+  if (end > hend)
+    return 0; // invalid name = not found
+
+  len  = (uint) (end - helping);
+  if (name_tolower)
+    my_casedn_str(files_charset_info, tname_ptr);
+  for (grant_name= (GRANT_NAME*) my_hash_first(name_hash, (uchar*) helping,
+                                               len, &state);
+       grant_name ;
+       grant_name= (GRANT_NAME*) my_hash_next(name_hash,(uchar*) helping,
+                                              len, &state))
+  {
+    if (exact)
+    {
+      if (!grant_name->host.hostname ||
+          (host &&
+	   !my_strcasecmp(system_charset_info, host,
+                          grant_name->host.hostname)) ||
+	  (ip && !strcmp(ip, grant_name->host.hostname)))
+	return grant_name;
+    }
+    else
+    {
+      if (compare_hostname(&grant_name->host, host, ip) &&
+          (!found || found->sort < grant_name->sort))
+	found=grant_name;					// Host ok
+    }
+  }
+  return found;
+}
+
+
+static GRANT_NAME *
+routine_hash_search(const char *host, const char *ip, const char *db,
+                    const char *user, const char *tname, const Sp_handler *sph,
+                    bool exact)
+{
+  return (GRANT_NAME*)
+    name_hash_search(sph->get_priv_hash(),
+		     host, ip, db, user, tname, exact, TRUE);
+}
+
+
+static GRANT_TABLE *
+table_hash_search(const char *host, const char *ip, const char *db,
+		  const char *user, const char *tname, bool exact)
+{
+  return (GRANT_TABLE*) name_hash_search(&column_priv_hash, host, ip, db,
+					 user, tname, exact, (lower_case_table_names > 0));
+}
+
+static bool column_priv_insert(GRANT_TABLE *grant)
+{
+  return my_hash_insert(&column_priv_hash,(uchar*) grant);
+}
+
+static GRANT_COLUMN *
+column_hash_search(GRANT_TABLE *t, const char *cname, size_t length)
+{
+  if (!my_hash_inited(&t->hash_columns))
+    return (GRANT_COLUMN*) 0;
+  return (GRANT_COLUMN*)my_hash_search(&t->hash_columns, (uchar*)cname, length);
+}
+
+
+static int replace_column_table(GRANT_TABLE *g_t,
+				TABLE *table, const LEX_USER &combo,
+				List  &columns,
+				const char *db, const char *table_name,
+				privilege_t rights, bool revoke_grant)
+{
+  int result=0;
+  uchar key[MAX_KEY_LENGTH];
+  uint key_prefix_length;
+  KEY_PART_INFO *key_part= table->key_info->key_part;
+  DBUG_ENTER("replace_column_table");
+
+  table->use_all_columns();
+  table->field[0]->store(combo.host.str,combo.host.length,
+                         system_charset_info);
+  table->field[1]->store(db,(uint) strlen(db),
+                         system_charset_info);
+  table->field[2]->store(combo.user.str,combo.user.length,
+                         system_charset_info);
+  table->field[3]->store(table_name,(uint) strlen(table_name),
+                         system_charset_info);
+
+  /* Get length of 4 first key parts */
+  key_prefix_length= (key_part[0].store_length + key_part[1].store_length +
+                      key_part[2].store_length + key_part[3].store_length);
+  key_copy(key, table->record[0], table->key_info, key_prefix_length);
+
+  rights&= COL_ACLS;				// Only ACL for columns
+
+  /* first fix privileges for all columns in column list */
+
+  List_iterator  iter(columns);
+  class LEX_COLUMN *column;
+
+  int error= table->file->ha_index_init(0, 1);
+  if (unlikely(error))
+  {
+    table->file->print_error(error, MYF(0));
+    DBUG_RETURN(-1);
+  }
+
+  while ((column= iter++))
+  {
+    privilege_t privileges= column->rights;
+    bool old_row_exists=0;
+    uchar user_key[MAX_KEY_LENGTH];
+
+    key_restore(table->record[0],key,table->key_info,
+                key_prefix_length);
+    table->field[4]->store(column->column.ptr(), column->column.length(),
+                           system_charset_info);
+    /* Get key for the first 4 columns */
+    key_copy(user_key, table->record[0], table->key_info,
+             table->key_info->key_length);
+
+    if (table->file->ha_index_read_map(table->record[0], user_key,
+                                       HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+    {
+      if (revoke_grant)
+      {
+	my_error(ER_NONEXISTING_TABLE_GRANT, MYF(0),
+                 combo.user.str, combo.host.str,
+                 table_name);                   /* purecov: inspected */
+	result= -1;                             /* purecov: inspected */
+	continue;                               /* purecov: inspected */
+      }
+      old_row_exists = 0;
+      restore_record(table, s->default_values);		// Get empty record
+      key_restore(table->record[0],key,table->key_info,
+                  key_prefix_length);
+      table->field[4]->store(column->column.ptr(),column->column.length(),
+                             system_charset_info);
+    }
+    else
+    {
+      privilege_t tmp= get_access_value_from_val_int(table->field[6]);
+      tmp=fix_rights_for_column(tmp);
+
+      if (revoke_grant)
+	privileges = tmp & ~(privileges | rights);
+      else
+	privileges |= tmp;
+      old_row_exists = 1;
+      store_record(table,record[1]);			// copy original row
+    }
+
+    table->field[6]->store((longlong) get_rights_for_column(privileges), TRUE);
+
+    if (old_row_exists)
+    {
+      GRANT_COLUMN *grant_column;
+      if (privileges)
+	error=table->file->ha_update_row(table->record[1],table->record[0]);
+      else
+	error=table->file->ha_delete_row(table->record[1]);
+      if (unlikely(error) && error != HA_ERR_RECORD_IS_THE_SAME)
+      {
+	table->file->print_error(error,MYF(0)); /* purecov: inspected */
+	result= -1;				/* purecov: inspected */
+	goto end;				/* purecov: inspected */
+      }
+      else
+        error= 0;
+      grant_column= column_hash_search(g_t, column->column.ptr(),
+                                       column->column.length());
+      if (grant_column)  // Should always be true
+      {
+        grant_column->rights= privileges;	// Update hash
+        grant_column->init_rights= privileges;
+      }
+    }
+    else					// new grant
+    {
+      GRANT_COLUMN *grant_column;
+      if (unlikely((error=table->file->ha_write_row(table->record[0]))))
+      {
+	table->file->print_error(error,MYF(0)); /* purecov: inspected */
+	result= -1;				/* purecov: inspected */
+	goto end;				/* purecov: inspected */
+      }
+      grant_column= new GRANT_COLUMN(column->column,privileges);
+      if (my_hash_insert(&g_t->hash_columns,(uchar*) grant_column))
+      {
+        result= -1;
+        goto end;
+      }
+    }
+  }
+
+  /*
+    If revoke of privileges on the table level, remove all such privileges
+    for all columns
+  */
+
+  if (revoke_grant)
+  {
+    uchar user_key[MAX_KEY_LENGTH];
+    key_copy(user_key, table->record[0], table->key_info,
+             key_prefix_length);
+
+    if (table->file->ha_index_read_map(table->record[0], user_key,
+                                       (key_part_map)15,
+                                       HA_READ_KEY_EXACT))
+      goto end;
+
+    /* Scan through all rows with the same host,db,user and table */
+    do
+    {
+      privilege_t privileges = get_access_value_from_val_int(table->field[6]);
+      privileges=fix_rights_for_column(privileges);
+      store_record(table,record[1]);
+
+      if (privileges & rights)	// is in this record the priv to be revoked ??
+      {
+	GRANT_COLUMN *grant_column = NULL;
+	char  colum_name_buf[HOSTNAME_LENGTH+1];
+	String column_name(colum_name_buf,sizeof(colum_name_buf),
+                           system_charset_info);
+
+	privileges&= ~rights;
+	table->field[6]->store((longlong)
+	                       get_rights_for_column(privileges), TRUE);
+	table->field[4]->val_str(&column_name);
+	grant_column = column_hash_search(g_t,
+					  column_name.ptr(),
+					  column_name.length());
+	if (privileges)
+	{
+	  int tmp_error;
+	  if (unlikely(tmp_error=
+                       table->file->ha_update_row(table->record[1],
+                                                  table->record[0])) &&
+              tmp_error != HA_ERR_RECORD_IS_THE_SAME)
+	  {					/* purecov: deadcode */
+	    table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */
+	    result= -1;				/* purecov: deadcode */
+	    goto end;				/* purecov: deadcode */
+	  }
+	  if (grant_column)
+          {
+            grant_column->rights  = privileges; // Update hash
+            grant_column->init_rights = privileges;
+          }
+	}
+	else
+	{
+	  int tmp_error;
+	  if (unlikely((tmp_error=
+                        table->file->ha_delete_row(table->record[1]))))
+	  {					/* purecov: deadcode */
+	    table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */
+	    result= -1;				/* purecov: deadcode */
+	    goto end;				/* purecov: deadcode */
+	  }
+	  if (grant_column)
+	    my_hash_delete(&g_t->hash_columns,(uchar*) grant_column);
+	}
+      }
+    } while (!table->file->ha_index_next(table->record[0]) &&
+	     !key_cmp_if_same(table, key, 0, key_prefix_length));
+  }
+
+end:
+  table->file->ha_index_end();
+  DBUG_RETURN(result);
+}
+
+static inline void get_grantor(THD *thd, char *grantor)
+{
+  const char *user= thd->security_ctx->user;
+  const char *host= thd->security_ctx->host_or_ip;
+
+#if defined(HAVE_REPLICATION)
+  if (thd->slave_thread && thd->has_invoker())
+  {
+    user= thd->get_invoker_user().str;
+    host= thd->get_invoker_host().str;
+  }
+#endif
+  strxmov(grantor, user, "@", host, NullS);
+}
+
+
+/**
+   Revoke rights from a grant table entry.
+
+   @return 0  ok
+   @return 1  fatal error (error given)
+   @return -1 grant table was revoked
+*/
+
+static int replace_table_table(THD *thd, GRANT_TABLE *grant_table,
+			       TABLE *table, const LEX_USER &combo,
+			       const char *db, const char *table_name,
+			       privilege_t rights, privilege_t col_rights,
+			       bool revoke_grant)
+{
+  char grantor[USER_HOST_BUFF_SIZE];
+  int old_row_exists = 1;
+  int error=0;
+  privilege_t store_table_rights(NO_ACL), store_col_rights(NO_ACL);
+  uchar user_key[MAX_KEY_LENGTH];
+  DBUG_ENTER("replace_table_table");
+  DBUG_PRINT("enter", ("User: '%s'  Host: '%s'  Revoke:'%d'",
+                        combo.user.str, combo.host.str, (int) revoke_grant));
+
+  get_grantor(thd, grantor);
+  /*
+    The following should always succeed as new users are created before
+    this function is called!
+  */
+  if (!find_user_wild(combo.host.str,combo.user.str))
+  {
+    if (!combo.host.length && !find_acl_role(combo.user.str, true))
+    {
+      my_message(ER_PASSWORD_NO_MATCH, ER_THD(thd, ER_PASSWORD_NO_MATCH),
+                 MYF(0)); /* purecov: deadcode */
+      DBUG_RETURN(1);                            /* purecov: deadcode */
+    }
+  }
+
+  table->use_all_columns();
+  restore_record(table, s->default_values);     // Get empty record
+  table->field[0]->store(combo.host.str,combo.host.length,
+                         system_charset_info);
+  table->field[1]->store(db,(uint) strlen(db), system_charset_info);
+  table->field[2]->store(combo.user.str,combo.user.length,
+                         system_charset_info);
+  table->field[3]->store(table_name,(uint) strlen(table_name),
+                         system_charset_info);
+  store_record(table,record[1]);			// store at pos 1
+  key_copy(user_key, table->record[0], table->key_info,
+           table->key_info->key_length);
+
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
+  {
+    /*
+      The following should never happen as we first check the in memory
+      grant tables for the user.  There is however always a small change that
+      the user has modified the grant tables directly.
+    */
+    if (revoke_grant)
+    { // no row, no revoke
+      my_error(ER_NONEXISTING_TABLE_GRANT, MYF(0),
+               combo.user.str, combo.host.str,
+               table_name);		        /* purecov: deadcode */
+      DBUG_RETURN(1);				/* purecov: deadcode */
+    }
+    old_row_exists = 0;
+    restore_record(table,record[1]);			// Get saved record
+  }
+
+  store_table_rights= get_rights_for_table(rights);
+  store_col_rights=   get_rights_for_column(col_rights);
+  if (old_row_exists)
+  {
+    store_record(table,record[1]);
+    privilege_t j= get_access_value_from_val_int(table->field[6]);
+    privilege_t k= get_access_value_from_val_int(table->field[7]);
+
+    if (revoke_grant)
+    {
+      /* column rights are already fixed in mysql_table_grant */
+      store_table_rights=j & ~store_table_rights;
+    }
+    else
+    {
+      store_table_rights|= j;
+      store_col_rights|=   k;
+    }
+  }
+
+  table->field[4]->store(grantor,(uint) strlen(grantor), system_charset_info);
+  table->field[6]->store((longlong) store_table_rights, TRUE);
+  table->field[7]->store((longlong) store_col_rights, TRUE);
+  rights=fix_rights_for_table(store_table_rights);
+  col_rights=fix_rights_for_column(store_col_rights);
+
+  if (old_row_exists)
+  {
+    if (store_table_rights || store_col_rights)
+    {
+      if (unlikely(error=table->file->ha_update_row(table->record[1],
+                                                    table->record[0])) &&
+          error != HA_ERR_RECORD_IS_THE_SAME)
+	goto table_error;			/* purecov: deadcode */
+    }
+    else if (unlikely((error = table->file->ha_delete_row(table->record[1]))))
+      goto table_error;				/* purecov: deadcode */
+  }
+  else
+  {
+    error=table->file->ha_write_row(table->record[0]);
+    if (unlikely(table->file->is_fatal_error(error, HA_CHECK_DUP_KEY)))
+      goto table_error;				/* purecov: deadcode */
+  }
+
+  if (rights | col_rights)
+  {
+    grant_table->init_privs= rights;
+    grant_table->init_cols=  col_rights;
+
+    grant_table->privs= rights;
+    grant_table->cols=	col_rights;
+  }
+  else
+  {
+    my_hash_delete(&column_priv_hash,(uchar*) grant_table);
+    DBUG_RETURN(-1);                            // Entry revoked
+  }
+  DBUG_RETURN(0);
+
+  /* This should never happen */
+table_error:
+  table->file->print_error(error,MYF(0)); /* purecov: deadcode */
+  DBUG_RETURN(1); /* purecov: deadcode */
+}
+
+
+/**
+  @retval       0  success
+  @retval      -1  error
+*/
+static int replace_routine_table(THD *thd, GRANT_NAME *grant_name,
+			      TABLE *table, const LEX_USER &combo,
+			      const char *db, const char *routine_name,
+			      const Sp_handler *sph,
+			      privilege_t rights, bool revoke_grant)
+{
+  char grantor[USER_HOST_BUFF_SIZE];
+  int old_row_exists= 1;
+  int error=0;
+  HASH *hash= sph->get_priv_hash();
+  DBUG_ENTER("replace_routine_table");
+
+  if (!table)
+  {
+    my_error(ER_NO_SUCH_TABLE, MYF(0), MYSQL_SCHEMA_NAME.str,
+             MYSQL_TABLE_NAME[PROCS_PRIV_TABLE].str);
+    DBUG_RETURN(-1);
+  }
+
+  if (revoke_grant && !grant_name->init_privs) // only inherited role privs
+  {
+    my_hash_delete(hash, (uchar*) grant_name);
+    DBUG_RETURN(0);
+  }
+
+  get_grantor(thd, grantor);
+  /*
+    New users are created before this function is called.
+
+    There may be some cases where a routine's definer is removed but the
+    routine remains.
+  */
+
+  table->use_all_columns();
+  restore_record(table, s->default_values);            // Get empty record
+  table->field[0]->store(combo.host.str,combo.host.length, &my_charset_latin1);
+  table->field[1]->store(db,(uint) strlen(db), &my_charset_latin1);
+  table->field[2]->store(combo.user.str,combo.user.length, &my_charset_latin1);
+  table->field[3]->store(routine_name,(uint) strlen(routine_name),
+                         &my_charset_latin1);
+  table->field[4]->store((longlong) sph->type(), true);
+  store_record(table,record[1]);			// store at pos 1
+
+  if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                         (uchar*) table->field[0]->ptr,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
+  {
+    /*
+      The following should never happen as we first check the in memory
+      grant tables for the user.  There is however always a small change that
+      the user has modified the grant tables directly.
+
+      Also, there is also a second posibility that this routine entry
+      is created for a role by being inherited from a granted role.
+    */
+    if (revoke_grant)
+    { // no row, no revoke
+      my_error(ER_NONEXISTING_PROC_GRANT, MYF(0),
+               combo.user.str, combo.host.str, routine_name);
+      DBUG_RETURN(-1);
+    }
+    old_row_exists= 0;
+    restore_record(table,record[1]);			// Get saved record
+  }
+
+  privilege_t store_proc_rights= get_rights_for_procedure(rights);
+  if (old_row_exists)
+  {
+    store_record(table,record[1]);
+    privilege_t j= get_access_value_from_val_int(table->field[6]);
+
+    if (revoke_grant)
+    {
+      /* column rights are already fixed in mysql_table_grant */
+      store_proc_rights=j & ~store_proc_rights;
+    }
+    else
+    {
+      store_proc_rights|= j;
+    }
+  }
+
+  table->field[5]->store(grantor,(uint) strlen(grantor), &my_charset_latin1);
+  table->field[6]->store((longlong) store_proc_rights, TRUE);
+  rights=fix_rights_for_procedure(store_proc_rights);
+
+  if (old_row_exists)
+  {
+    if (store_proc_rights)
+    {
+      if (unlikely(error=table->file->ha_update_row(table->record[1],
+                                                    table->record[0])) &&
+                   error != HA_ERR_RECORD_IS_THE_SAME)
+	goto table_error;
+    }
+    else if (unlikely((error= table->file->ha_delete_row(table->record[1]))))
+      goto table_error;
+  }
+  else
+  {
+    error=table->file->ha_write_row(table->record[0]);
+    if (unlikely(table->file->is_fatal_error(error, HA_CHECK_DUP_KEY)))
+      goto table_error;
+  }
+
+  if (rights)
+  {
+    grant_name->init_privs= rights;
+    grant_name->privs= rights;
+  }
+  else
+  {
+    my_hash_delete(hash, (uchar*) grant_name);
+  }
+  DBUG_RETURN(0);
+
+  /* This should never happen */
+table_error:
+  table->file->print_error(error,MYF(0));
+  DBUG_RETURN(-1);
+}
+
+
+/*****************************************************************
+  Role privilege propagation and graph traversal functionality
+
+  According to the SQL standard, a role can be granted to a role,
+  thus role grants can create an arbitrarily complex directed acyclic
+  graph (a standard explicitly specifies that cycles are not allowed).
+
+  When a privilege is granted to a role, it becomes available to all grantees.
+  The code below recursively traverses a DAG of role grants, propagating
+  privilege changes.
+
+  The traversal function can work both ways, from roles to grantees or
+  from grantees to roles. The first is used for privilege propagation,
+  the second - for SHOW GRANTS and I_S.APPLICABLE_ROLES
+
+  The role propagation code is smart enough to propagate only privilege
+  changes to one specific database, table, or routine, if only they
+  were changed (like in GRANT ... ON ... TO ...) or it can propagate
+  everything (on startup or after FLUSH PRIVILEGES).
+
+  It traverses only a subgraph that's accessible from the modified role,
+  only visiting roles that can be possibly affected by the GRANT statement.
+
+  Additionally, it stops traversal early, if this particular GRANT statement
+  didn't result in any changes of privileges (e.g. both role1 and role2
+  are granted to the role3, both role1 and role2 have SELECT privilege.
+  if SELECT is revoked from role1 it won't change role3 privileges,
+  so we won't traverse from role3 to its grantees).
+******************************************************************/
+struct PRIVS_TO_MERGE
+{
+  enum what
+  {
+    ALL, GLOBAL, DB, TABLE_COLUMN, PROC, FUNC, PACKAGE_SPEC, PACKAGE_BODY
+  } what;
+  const char *db, *name;
+};
+
+
+static enum PRIVS_TO_MERGE::what sp_privs_to_merge(enum_sp_type type)
+{
+  switch (type) {
+  case SP_TYPE_FUNCTION:
+    return PRIVS_TO_MERGE::FUNC;
+  case SP_TYPE_PROCEDURE:
+    return PRIVS_TO_MERGE::PROC;
+  case SP_TYPE_PACKAGE:
+    return PRIVS_TO_MERGE::PACKAGE_SPEC;
+  case SP_TYPE_PACKAGE_BODY:
+    return PRIVS_TO_MERGE::PACKAGE_BODY;
+  case SP_TYPE_EVENT:
+  case SP_TYPE_TRIGGER:
+    break;
+  }
+  DBUG_ASSERT(0);
+  return PRIVS_TO_MERGE::PROC;
+}
+
+
+static int init_role_for_merging(ACL_ROLE *role, void *context)
+{
+  role->counter= 0;
+  return 0;
+}
+
+static int count_subgraph_nodes(ACL_ROLE *role, ACL_ROLE *grantee, void *context)
+{
+  grantee->counter++;
+  return 0;
+}
+
+static int merge_role_privileges(ACL_ROLE *, ACL_ROLE *, void *);
+static bool merge_one_role_privileges(ACL_ROLE *grantee, PRIVS_TO_MERGE what);
+
+/**
+  rebuild privileges of all affected roles
+
+  entry point into role privilege propagation. after privileges of the
+  'role' were changed, this function rebuilds privileges of all affected roles
+  as necessary.
+*/
+static void propagate_role_grants(ACL_ROLE *role,
+                                  enum PRIVS_TO_MERGE::what what,
+                                  const char *db= 0, const char *name= 0)
+{
+  if (!role)
+    return;
+
+  mysql_mutex_assert_owner(&acl_cache->lock);
+  PRIVS_TO_MERGE data= { what, db, name };
+
+  /*
+     Before updating grants to roles that inherit from this role, ensure that
+     the effective grants on this role are up-to-date from *its* granted roles.
+  */
+  merge_one_role_privileges(role, data);
+  /*
+     Changing privileges of a role causes all other roles that had
+     this role granted to them to have their rights invalidated.
+
+     We need to rebuild all roles' related access bits.
+
+     This cannot be a simple depth-first search, instead we have to merge
+     privieges for all roles granted to a specific grantee, *before*
+     merging privileges for this grantee. In other words, we must visit all
+     parent nodes of a specific node, before descencing into this node.
+
+     For example, if role1 is granted to role2 and role3, and role3 is
+     granted to role2, after "GRANT ... role1", we cannot merge privileges
+     for role2, until role3 is merged.  The counter will be 0 for role1, 2
+     for role2, 1 for role3. Traversal will start from role1, go to role2,
+     decrement the counter, backtrack, go to role3, merge it, go to role2
+     again, merge it.
+
+     And the counter is not just "all parent nodes", but only parent nodes
+     that are part of the subgraph we're interested in. For example, if
+     both roleA and roleB are granted to roleC, then roleC has two parent
+     nodes. But when granting a privilege to roleA, we're only looking at a
+     subgraph that includes roleA and roleC (roleB cannot be possibly
+     affected by that grant statement). In this subgraph roleC has only one
+     parent.
+
+     (on the other hand, in acl_load we want to update all roles, and
+     the counter is exactly equal to the number of all parent nodes)
+
+     Thus, we do two graph traversals here. First we only count parents
+     that are part of the subgraph. On the second traversal we decrement
+     the counter and actually merge privileges for a node when a counter
+     drops to zero.
+  */
+  traverse_role_graph_up(role, &data, init_role_for_merging, count_subgraph_nodes);
+  traverse_role_graph_up(role, &data, NULL, merge_role_privileges);
+}
+
+
+// State of a node during a Depth First Search exploration
+struct NODE_STATE
+{
+  ACL_USER_BASE *node_data; /* pointer to the node data */
+  uint neigh_idx;           /* the neighbour that needs to be evaluated next */
+};
+
+/**
+  Traverse the role grant graph and invoke callbacks at the specified points. 
+  
+  @param user           user or role to start traversal from
+  @param context        opaque parameter to pass to callbacks
+  @param offset         offset to ACL_ROLE::parent_grantee or to
+                        ACL_USER_BASE::role_grants. Depending on this value,
+                        traversal will go from roles to grantees or from
+                        grantees to roles.
+  @param on_node        called when a node is visited for the first time.
+                        Returning a value <0 will abort the traversal.
+  @param on_edge        called for every edge in the graph, when traversal
+                        goes from a node to a neighbour node.
+                        Returning <0 will abort the traversal. Returning >0
+                        will make the traversal not to follow this edge.
+
+  @note
+  The traverse method is a DEPTH FIRST SEARCH, but callbacks can influence
+  that (on_edge returning >0 value).
+
+  @note
+  This function should not be called directly, use
+  traverse_role_graph_up() and traverse_role_graph_down() instead.
+
+  @retval 0                 traversal finished successfully
+  @retval ROLE_CYCLE_FOUND  traversal aborted, cycle detected
+  @retval <0                traversal was aborted, because a callback returned
+                            this error code
+*/
+static int traverse_role_graph_impl(ACL_USER_BASE *user, void *context,
+       off_t offset,
+       int (*on_node) (ACL_USER_BASE *role, void *context),
+       int (*on_edge) (ACL_USER_BASE *current, ACL_ROLE *neighbour, void *context))
+{
+  DBUG_ENTER("traverse_role_graph_impl");
+  DBUG_ASSERT(user);
+  DBUG_PRINT("enter",("role: '%s'", user->user.str));
+  /*
+     The search operation should always leave the ROLE_ON_STACK and
+     ROLE_EXPLORED flags clean for all nodes involved in the search
+  */
+  DBUG_ASSERT(!(user->flags & ROLE_ON_STACK));
+  DBUG_ASSERT(!(user->flags & ROLE_EXPLORED));
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  /*
+     Stack used to simulate the recursive calls of DFS.
+     It uses a Dynamic_array to reduce the number of
+     malloc calls to a minimum
+  */
+  Dynamic_array stack(PSI_INSTRUMENT_MEM, 20,50);
+  Dynamic_array to_clear(PSI_INSTRUMENT_MEM, 20, 50);
+  NODE_STATE state;     /* variable used to insert elements in the stack */
+  int result= 0;
+
+  state.neigh_idx= 0;
+  state.node_data= user;
+  user->flags|= ROLE_ON_STACK;
+
+  stack.push(state);
+  to_clear.push(user);
+
+  user->flags|= ROLE_OPENED;
+  if (on_node && ((result= on_node(user, context)) < 0))
+    goto end;
+
+  while (stack.elements())
+  {
+    NODE_STATE *curr_state= stack.back();
+
+    DBUG_ASSERT(curr_state->node_data->flags & ROLE_ON_STACK);
+
+    ACL_USER_BASE *current= curr_state->node_data;
+    ACL_USER_BASE *neighbour= NULL;
+    DBUG_PRINT("info", ("Examining role %s", current->user.str));
+    /*
+      Iterate through the neighbours until a first valid jump-to
+      neighbour is found
+    */
+    bool found= FALSE;
+    uint i;
+    DYNAMIC_ARRAY *array= (DYNAMIC_ARRAY *)(((char*)current) + offset);
+
+    DBUG_ASSERT(array == ¤t->role_grants || current->flags & IS_ROLE);
+    for (i= curr_state->neigh_idx; i < array->elements; i++)
+    {
+      neighbour= *(dynamic_element(array, i, ACL_ROLE**));
+      if (!(neighbour->flags & IS_ROLE))
+        continue;
+
+      DBUG_PRINT("info", ("Examining neighbour role %s", neighbour->user.str));
+
+      /* check if it forms a cycle */
+      if (neighbour->flags & ROLE_ON_STACK)
+      {
+        DBUG_PRINT("info", ("Found cycle"));
+        result= ROLE_CYCLE_FOUND;
+        goto end;
+      }
+
+      if (!(neighbour->flags & ROLE_OPENED))
+      {
+        neighbour->flags|= ROLE_OPENED;
+        to_clear.push(neighbour);
+        if (on_node && ((result= on_node(neighbour, context)) < 0))
+          goto end;
+      }
+
+      if (on_edge)
+      {
+        result= on_edge(current, (ACL_ROLE*)neighbour, context);
+        if (result < 0)
+          goto end;
+        if (result > 0)
+          continue;
+      }
+
+      /* Check if it was already explored, in that case, move on */
+      if (neighbour->flags & ROLE_EXPLORED)
+        continue;
+
+      found= TRUE;
+      break;
+    }
+
+    /* found states that we have found a node to jump next into */
+    if (found)
+    {
+      curr_state->neigh_idx= i + 1;
+
+      /* some sanity checks */
+      DBUG_ASSERT(!(neighbour->flags & ROLE_ON_STACK));
+
+      /* add the neighbour on the stack */
+      neighbour->flags|= ROLE_ON_STACK;
+      state.neigh_idx= 0;
+      state.node_data= neighbour;
+      stack.push(state);
+    }
+    else
+    {
+      /* Make sure we got a correct node */
+      DBUG_ASSERT(curr_state->node_data->flags & ROLE_ON_STACK);
+      /* Finished with exploring the current node, pop it off the stack */
+      curr_state= &stack.pop();
+      curr_state->node_data->flags&= ~ROLE_ON_STACK; /* clear the on-stack bit */
+      curr_state->node_data->flags|= ROLE_EXPLORED;
+    }
+  }
+
+end:
+  /* Cleanup */
+  for (size_t i= 0; i < to_clear.elements(); i++)
+  {
+    ACL_USER_BASE *current= to_clear.at(i);
+    DBUG_ASSERT(current->flags & (ROLE_EXPLORED | ROLE_ON_STACK | ROLE_OPENED));
+    current->flags&= ~(ROLE_EXPLORED | ROLE_ON_STACK | ROLE_OPENED);
+  }
+  DBUG_RETURN(result);
+}
+
+/**
+  Traverse the role grant graph, going from a role to its grantees.
+
+  This is used to propagate changes in privileges, for example,
+  when GRANT or REVOKE is issued for a role.
+*/
+
+static int traverse_role_graph_up(ACL_ROLE *role, void *context,
+       int (*on_node) (ACL_ROLE *role, void *context),
+       int (*on_edge) (ACL_ROLE *current, ACL_ROLE *neighbour, void *context))
+{
+  return traverse_role_graph_impl(role, context,
+                    my_offsetof(ACL_ROLE, parent_grantee),
+                    (int (*)(ACL_USER_BASE *, void *))on_node,
+                    (int (*)(ACL_USER_BASE *, ACL_ROLE *, void *))on_edge);
+}
+
+/**
+  Traverse the role grant graph, going from a user or a role to granted roles.
+
+  This is used, for example, to print all grants available to a user or a role
+  (as in SHOW GRANTS).
+*/
+
+static int traverse_role_graph_down(ACL_USER_BASE *user, void *context,
+       int (*on_node) (ACL_USER_BASE *role, void *context),
+       int (*on_edge) (ACL_USER_BASE *current, ACL_ROLE *neighbour, void *context))
+{
+  return traverse_role_graph_impl(user, context,
+                             my_offsetof(ACL_USER_BASE, role_grants),
+                             on_node, on_edge);
+}
+
+/*
+  To find all db/table/routine privilege for a specific role
+  we need to scan the array of privileges. It can be big.
+  But the set of privileges granted to a role in question (or
+  to roles directly granted to the role in question) is supposedly
+  much smaller.
+
+  We put a role and all roles directly granted to it in a hash, and iterate
+  the (suposedly long) array of privileges, filtering out "interesting"
+  entries using the role hash. We put all these "interesting"
+  entries in a (suposedly small) dynamic array and them use it for merging.
+*/
+static uchar* role_key(const ACL_ROLE *role, size_t *klen, my_bool)
+{
+  *klen= role->user.length;
+  return (uchar*) role->user.str;
+}
+typedef Hash_set role_hash_t;
+
+static bool merge_role_global_privileges(ACL_ROLE *grantee)
+{
+  privilege_t old= grantee->access;
+  grantee->access= grantee->initial_role_access;
+
+  DBUG_EXECUTE_IF("role_merge_stats", role_global_merges++;);
+
+  for (size_t i= 0; i < grantee->role_grants.elements; i++)
+  {
+    ACL_ROLE *r= *dynamic_element(&grantee->role_grants, i, ACL_ROLE**);
+    grantee->access|= r->access;
+  }
+  return old != grantee->access;
+}
+
+static int db_name_sort(const int *db1, const int *db2)
+{
+  return strcmp(acl_dbs.at(*db1).db, acl_dbs.at(*db2).db);
+}
+
+/**
+  update ACL_DB for given database and a given role with merged privileges
+
+  @param merged ACL_DB of the role in question (or -1 if it wasn't found)
+  @param first  first ACL_DB in an array for the database in question
+  @param access new privileges for the given role on the gived database
+  @param role   the name of the given role
+
+  @return a bitmap of
+          1 - privileges were changed
+          2 - ACL_DB was added
+          4 - ACL_DB was deleted
+*/
+static int update_role_db(int merged, int first, privilege_t access,
+                          const char *role)
+{
+  if (first < 0)
+    return 0;
+
+  DBUG_EXECUTE_IF("role_merge_stats", role_db_merges++;);
+
+  if (merged < 0)
+  {
+    /*
+      there's no ACL_DB for this role (all db grants come from granted roles)
+      we need to create it
+
+      Note that we cannot use acl_insert_db() now:
+      1. it'll sort elements in the acl_dbs, so the pointers will become invalid
+      2. we may need many of them, no need to sort every time
+    */
+    DBUG_ASSERT(access);
+    ACL_DB acl_db;
+    acl_db.user= role;
+    acl_db.host.hostname= const_cast("");
+    acl_db.host.ip= acl_db.host.ip_mask= 0;
+    acl_db.db= acl_dbs.at(first).db;
+    acl_db.access= access;
+    acl_db.initial_access= NO_ACL;
+    acl_db.sort= get_magic_sort("hdu", "", acl_db.db, role);
+    acl_dbs.push(acl_db);
+    return 2;
+  }
+  else if (access == NO_ACL)
+  {
+    /*
+      there is ACL_DB but the role has no db privileges granted
+      (all privileges were coming from granted roles, and now those roles
+      were dropped or had their privileges revoked).
+      we need to remove this ACL_DB entry
+
+      Note, that we cannot delete now:
+      1. it'll shift elements in the acl_dbs, so the pointers will become invalid
+      2. it's O(N) operation, and we may need many of them
+      so we only mark elements deleted and will delete later.
+    */
+    acl_dbs.at(merged).sort= 0; // lower than any valid ACL_DB sort value, will be sorted last
+    return 4;
+  }
+  else if (acl_dbs.at(merged).access != access)
+  {
+    /* this is easy */
+    acl_dbs.at(merged).access= access;
+    return 1;
+  }
+  return 0;
+}
+
+/**
+  merges db privileges from roles granted to the role 'grantee'.
+
+  @return true if database privileges of the 'grantee' were changed
+
+*/
+static bool merge_role_db_privileges(ACL_ROLE *grantee, const char *dbname,
+                                     role_hash_t *rhash)
+{
+  Dynamic_array dbs(PSI_INSTRUMENT_MEM);
+
+  /*
+    Supposedly acl_dbs can be huge, but only a handful of db grants
+    apply to grantee or roles directly granted to grantee.
+
+    Collect these applicable db grants.
+  */
+  for (uint i=0 ; i < acl_dbs.elements() ; i++)
+  {
+    ACL_DB *db= &acl_dbs.at(i);
+    if (db->host.hostname[0])
+      continue;
+    if (dbname && strcmp(db->db, dbname))
+      continue;
+    ACL_ROLE *r= rhash->find(db->user, strlen(db->user));
+    if (!r)
+      continue;
+    dbs.append(i);
+  }
+  dbs.sort(db_name_sort);
+
+  /*
+    Because dbs array is sorted by the db name, all grants for the same db
+    (that should be merged) are sorted together. The grantee's ACL_DB element
+    is not necessarily the first and may be not present at all.
+  */
+  int first= -1, merged= -1;
+  privilege_t access(NO_ACL);
+  ulong update_flags= 0;
+  for (int *p= dbs.front(); p <= dbs.back(); p++)
+  {
+    if (first<0 || (!dbname && strcmp(acl_dbs.at(p[0]).db, acl_dbs.at(p[-1]).db)))
+    { // new db name series
+      update_flags|= update_role_db(merged, first, access, grantee->user.str);
+      merged= -1;
+      access= NO_ACL;
+      first= *p;
+    }
+    if (strcmp(acl_dbs.at(*p).user, grantee->user.str) == 0)
+      access|= acl_dbs.at(merged= *p).initial_access;
+    else
+      access|= acl_dbs.at(*p).access;
+  }
+  update_flags|= update_role_db(merged, first, access, grantee->user.str);
+
+  if (update_flags & 4)
+  {
+    // Remove elements marked for deletion.
+    size_t count= 0;
+    for(size_t i= 0; i < acl_dbs.elements(); i++)
+    {
+      ACL_DB *acl_db= &acl_dbs.at(i);
+      if (acl_db->sort)
+      {
+        if (i > count)
+          acl_dbs.set(count, *acl_db);
+        count++;
+      }
+    }
+    acl_dbs.elements(count);
+  }
+
+
+  if (update_flags & 2)
+  { // inserted, need to sort
+    rebuild_acl_dbs();
+  }
+
+  return update_flags;
+}
+
+static int table_name_sort(GRANT_TABLE * const *tbl1, GRANT_TABLE * const *tbl2)
+{
+  int res = strcmp((*tbl1)->db, (*tbl2)->db);
+  if (res) return res;
+  return strcmp((*tbl1)->tname, (*tbl2)->tname);
+}
+
+/**
+  merges column privileges for the entry 'merged'
+
+  @param merged GRANT_TABLE to merge the privileges into
+  @param cur    first entry in the array of GRANT_TABLE's for a given table
+  @param last   last entry in the array of GRANT_TABLE's for a given table,
+                all entries between cur and last correspond to the *same* table
+
+  @return 1 if the _set of columns_ in 'merged' was changed
+          (not if the _set of privileges_ was changed).
+*/
+static int update_role_columns(GRANT_TABLE *merged,
+                               GRANT_TABLE **cur, GRANT_TABLE **last)
+{
+  privilege_t rights __attribute__((unused)) (NO_ACL);
+  int changed= 0;
+  if (!merged->cols)
+  {
+    changed= merged->hash_columns.records > 0;
+    my_hash_reset(&merged->hash_columns);
+    return changed;
+  }
+
+  DBUG_EXECUTE_IF("role_merge_stats", role_column_merges++;);
+
+  HASH *mh= &merged->hash_columns;
+  for (uint i=0 ; i < mh->records ; i++)
+  {
+    GRANT_COLUMN *col = (GRANT_COLUMN *)my_hash_element(mh, i);
+    col->rights= col->init_rights;
+  }
+
+  for (; cur < last; cur++)
+  {
+    if (*cur == merged)
+      continue;
+    HASH *ch= &cur[0]->hash_columns;
+    for (uint i=0 ; i < ch->records ; i++)
+    {
+      GRANT_COLUMN *ccol = (GRANT_COLUMN *)my_hash_element(ch, i);
+      GRANT_COLUMN *mcol = (GRANT_COLUMN *)my_hash_search(mh,
+                                  (uchar *)ccol->column, ccol->key_length);
+      if (mcol)
+        mcol->rights|= ccol->rights;
+      else
+      {
+        changed= 1;
+        my_hash_insert(mh, (uchar*)new (&grant_memroot) GRANT_COLUMN(ccol));
+      }
+    }
+  }
+
+restart:
+  for (uint i=0 ; i < mh->records ; i++)
+  {
+    GRANT_COLUMN *col = (GRANT_COLUMN *)my_hash_element(mh, i);
+    rights|= col->rights;
+    if (!col->rights)
+    {
+      changed= 1;
+      my_hash_delete(mh, (uchar*)col);
+      goto restart;
+    }
+  }
+  DBUG_ASSERT(rights == merged->cols);
+  return changed;
+}
+
+/**
+  update GRANT_TABLE for a given table and a given role with merged privileges
+
+  @param merged GRANT_TABLE of the role in question (or NULL if it wasn't found)
+  @param first  first GRANT_TABLE in an array for the table in question
+  @param last   last entry in the array of GRANT_TABLE's for a given table,
+                all entries between first and last correspond to the *same* table
+  @param privs  new table-level privileges for 'merged'
+  @param cols   new OR-ed column-level privileges for 'merged'
+  @param role   the name of the given role
+
+  @return a bitmap of
+          1 - privileges were changed
+          2 - GRANT_TABLE was added
+          4 - GRANT_TABLE was deleted
+*/
+static int update_role_table_columns(GRANT_TABLE *merged,
+                                     GRANT_TABLE **first, GRANT_TABLE **last,
+                                     privilege_t privs, privilege_t cols,
+                                     const char *role)
+{
+  if (!first)
+    return 0;
+
+  DBUG_EXECUTE_IF("role_merge_stats", role_table_merges++;);
+
+  if (merged == NULL)
+  {
+    /*
+      there's no GRANT_TABLE for this role (all table grants come from granted
+      roles) we need to create it
+    */
+    DBUG_ASSERT(privs | cols);
+    merged= new (&grant_memroot) GRANT_TABLE("", first[0]->db, role, first[0]->tname,
+                                     privs, cols);
+    merged->init_privs= merged->init_cols= NO_ACL;
+    update_role_columns(merged, first, last);
+    column_priv_insert(merged);
+    return 2;
+  }
+  else if ((privs | cols) == NO_ACL)
+  {
+    /*
+      there is GRANT_TABLE object but the role has no table or column
+      privileges granted (all privileges were coming from granted roles, and
+      now those roles were dropped or had their privileges revoked).
+      we need to remove this GRANT_TABLE
+    */
+    DBUG_EXECUTE_IF("role_merge_stats",
+                    role_column_merges+= MY_TEST(merged->cols););
+    my_hash_delete(&column_priv_hash,(uchar*) merged);
+    return 4;
+  }
+  else
+  {
+    bool changed= merged->cols != cols || merged->privs != privs;
+    merged->cols= cols;
+    merged->privs= privs;
+    if (update_role_columns(merged, first, last))
+      changed= true;
+    return changed;
+  }
+}
+
+/**
+  merges table privileges from roles granted to the role 'grantee'.
+
+  @return true if table privileges of the 'grantee' were changed
+
+*/
+static bool merge_role_table_and_column_privileges(ACL_ROLE *grantee,
+                        const char *db, const char *tname, role_hash_t *rhash)
+{
+  Dynamic_array grants(PSI_INSTRUMENT_MEM);
+  DBUG_ASSERT(MY_TEST(db) == MY_TEST(tname)); // both must be set, or neither
+
+  /*
+    first, collect table/column privileges granted to
+    roles in question.
+  */
+  for (uint i=0 ; i < column_priv_hash.records ; i++)
+  {
+    GRANT_TABLE *grant= (GRANT_TABLE *) my_hash_element(&column_priv_hash, i);
+    if (grant->host.hostname[0])
+      continue;
+    if (tname && (strcmp(grant->db, db) || strcmp(grant->tname, tname)))
+      continue;
+    ACL_ROLE *r= rhash->find(grant->user, strlen(grant->user));
+    if (!r)
+      continue;
+    grants.append(grant);
+  }
+  grants.sort(table_name_sort);
+
+  GRANT_TABLE **first= NULL, *merged= NULL, **cur;
+  privilege_t privs(NO_ACL), cols(NO_ACL);
+  ulong update_flags= 0;
+  for (cur= grants.front(); cur <= grants.back(); cur++)
+  {
+    if (!first ||
+        (!tname && (strcmp(cur[0]->db, cur[-1]->db) ||
+                   strcmp(cur[0]->tname, cur[-1]->tname))))
+    { // new db.tname series
+      update_flags|= update_role_table_columns(merged, first, cur,
+                                               privs, cols, grantee->user.str);
+      merged= NULL;
+      privs= cols= NO_ACL;
+      first= cur;
+    }
+    if (strcmp(cur[0]->user, grantee->user.str) == 0)
+    {
+      merged= cur[0];
+      cols|= cur[0]->init_cols;
+      privs|= cur[0]->init_privs;
+    }
+    else
+    {
+      cols|= cur[0]->cols;
+      privs|= cur[0]->privs;
+    }
+  }
+  update_flags|= update_role_table_columns(merged, first, cur,
+                                           privs, cols, grantee->user.str);
+
+  return update_flags;
+}
+
+static int routine_name_sort(GRANT_NAME * const *r1, GRANT_NAME * const *r2)
+{
+  int res= strcmp((*r1)->db, (*r2)->db);
+  if (res) return res;
+  return strcmp((*r1)->tname, (*r2)->tname);
+}
+
+/**
+  update GRANT_NAME for a given routine and a given role with merged privileges
+
+  @param merged GRANT_NAME of the role in question (or NULL if it wasn't found)
+  @param first  first GRANT_NAME in an array for the routine in question
+  @param privs  new routine-level privileges for 'merged'
+  @param role   the name of the given role
+  @param hash   proc_priv_hash or func_priv_hash
+
+  @return a bitmap of
+          1 - privileges were changed
+          2 - GRANT_NAME was added
+          4 - GRANT_NAME was deleted
+*/
+static int update_role_routines(GRANT_NAME *merged, GRANT_NAME **first,
+                                privilege_t privs, const char *role, HASH *hash)
+{
+  if (!first)
+    return 0;
+
+  DBUG_EXECUTE_IF("role_merge_stats", role_routine_merges++;);
+
+  if (merged == NULL)
+  {
+    /*
+      there's no GRANT_NAME for this role (all routine grants come from granted
+      roles) we need to create it
+    */
+    DBUG_ASSERT(privs);
+    merged= new (&grant_memroot) GRANT_NAME("", first[0]->db, role, first[0]->tname,
+                                    privs, true);
+    merged->init_privs= NO_ACL; // all privs are inherited
+    my_hash_insert(hash, (uchar *)merged);
+    return 2;
+  }
+  else if (privs == NO_ACL)
+  {
+    /*
+      there is GRANT_NAME but the role has no privileges granted
+      (all privileges were coming from granted roles, and now those roles
+      were dropped or had their privileges revoked).
+      we need to remove this entry
+    */
+    my_hash_delete(hash, (uchar*)merged);
+    return 4;
+  }
+  else if (merged->privs != privs)
+  {
+    /* this is easy */
+    merged->privs= privs;
+    return 1;
+  }
+  return 0;
+}
+
+/**
+  merges routine privileges from roles granted to the role 'grantee'.
+
+  @return true if routine privileges of the 'grantee' were changed
+
+*/
+static bool merge_role_routine_grant_privileges(ACL_ROLE *grantee,
+            const char *db, const char *tname, role_hash_t *rhash, HASH *hash)
+{
+  ulong update_flags= 0;
+
+  DBUG_ASSERT(MY_TEST(db) == MY_TEST(tname)); // both must be set, or neither
+
+  Dynamic_array grants(PSI_INSTRUMENT_MEM);
+
+  /* first, collect routine privileges granted to roles in question */
+  for (uint i=0 ; i < hash->records ; i++)
+  {
+    GRANT_NAME *grant= (GRANT_NAME *) my_hash_element(hash, i);
+    if (grant->host.hostname[0])
+      continue;
+    if (tname && (strcmp(grant->db, db) || strcmp(grant->tname, tname)))
+      continue;
+    ACL_ROLE *r= rhash->find(grant->user, strlen(grant->user));
+    if (!r)
+      continue;
+    grants.append(grant);
+  }
+  grants.sort(routine_name_sort);
+
+  GRANT_NAME **first= NULL, *merged= NULL;
+  privilege_t privs(NO_ACL);
+  for (GRANT_NAME **cur= grants.front(); cur <= grants.back(); cur++)
+  {
+    if (!first ||
+        (!tname && (strcmp(cur[0]->db, cur[-1]->db) ||
+                    strcmp(cur[0]->tname, cur[-1]->tname))))
+    { // new db.tname series
+      update_flags|= update_role_routines(merged, first, privs,
+                                          grantee->user.str, hash);
+      merged= NULL;
+      privs= NO_ACL;
+      first= cur;
+    }
+    if (strcmp(cur[0]->user, grantee->user.str) == 0)
+    {
+      merged= cur[0];
+      privs|= cur[0]->init_privs;
+    }
+    else
+    {
+      privs|= cur[0]->privs;
+    }
+  }
+  update_flags|= update_role_routines(merged, first, privs,
+                                      grantee->user.str, hash);
+  return update_flags;
+}
+
+/**
+  update privileges of the 'grantee' from all roles, granted to it
+*/
+static int merge_role_privileges(ACL_ROLE *role __attribute__((unused)),
+                                 ACL_ROLE *grantee, void *context)
+{
+  PRIVS_TO_MERGE *data= (PRIVS_TO_MERGE *)context;
+
+  DBUG_ASSERT(grantee->counter > 0);
+  if (--grantee->counter)
+    return 1; // don't recurse into grantee just yet
+
+  grantee->counter= 1; // Mark the grantee as merged.
+
+  /* if we'll do db/table/routine privileges, create a hash of role names */
+  role_hash_t role_hash(PSI_INSTRUMENT_MEM, role_key);
+  if (data->what != PRIVS_TO_MERGE::GLOBAL)
+  {
+    role_hash.insert(grantee);
+    for (size_t i= 0; i < grantee->role_grants.elements; i++)
+      role_hash.insert(*dynamic_element(&grantee->role_grants, i, ACL_ROLE**));
+  }
+
+  bool all= data->what == PRIVS_TO_MERGE::ALL;
+  bool changed= false;
+  if (all || data->what == PRIVS_TO_MERGE::GLOBAL)
+    changed|= merge_role_global_privileges(grantee);
+  if (all || data->what == PRIVS_TO_MERGE::DB)
+    changed|= merge_role_db_privileges(grantee, data->db, &role_hash);
+  if (all || data->what == PRIVS_TO_MERGE::TABLE_COLUMN)
+    changed|= merge_role_table_and_column_privileges(grantee,
+                                             data->db, data->name, &role_hash);
+  if (all || data->what == PRIVS_TO_MERGE::PROC)
+    changed|= merge_role_routine_grant_privileges(grantee,
+                            data->db, data->name, &role_hash, &proc_priv_hash);
+  if (all || data->what == PRIVS_TO_MERGE::FUNC)
+    changed|= merge_role_routine_grant_privileges(grantee,
+                            data->db, data->name, &role_hash, &func_priv_hash);
+  if (all || data->what == PRIVS_TO_MERGE::PACKAGE_SPEC)
+    changed|= merge_role_routine_grant_privileges(grantee,
+                            data->db, data->name, &role_hash,
+                            &package_spec_priv_hash);
+  if (all || data->what == PRIVS_TO_MERGE::PACKAGE_BODY)
+    changed|= merge_role_routine_grant_privileges(grantee,
+                            data->db, data->name, &role_hash,
+                            &package_body_priv_hash);
+  return !changed; // don't recurse into the subgraph if privs didn't change
+}
+
+static
+bool merge_one_role_privileges(ACL_ROLE *grantee,
+                               PRIVS_TO_MERGE what)
+{
+  grantee->counter= 1;
+  return merge_role_privileges(0, grantee, &what);
+}
+
+/*****************************************************************
+  End of the role privilege propagation and graph traversal code
+******************************************************************/
+
+static bool has_auth(LEX_USER *user, LEX *lex)
+{
+  return user->has_auth() ||
+         lex->account_options.ssl_type != SSL_TYPE_NOT_SPECIFIED ||
+         lex->account_options.ssl_cipher.str ||
+         lex->account_options.x509_issuer.str ||
+         lex->account_options.x509_subject.str ||
+         lex->account_options.specified_limits;
+}
+
+static bool copy_and_check_auth(LEX_USER *to, LEX_USER *from, THD *thd)
+{
+  to->auth= from->auth;
+
+  // if changing auth for an existing user
+  if (has_auth(to, thd->lex) && find_user_exact(to->host.str, to->user.str))
+  {
+    mysql_mutex_unlock(&acl_cache->lock);
+    bool res= check_alter_user(thd, to->host.str, to->user.str);
+    mysql_mutex_lock(&acl_cache->lock);
+    return res;
+  }
+
+  return false;
+}
+
+
+/*
+  Store table level and column level grants in the privilege tables
+
+  SYNOPSIS
+    mysql_table_grant()
+    thd			Thread handle
+    table_list		List of tables to give grant
+    user_list		List of users to give grant
+    columns		List of columns to give grant
+    rights		Table level grant
+    revoke_grant	Set to 1 if this is a REVOKE command
+
+  RETURN
+    FALSE ok
+    TRUE  error
+*/
+
+int mysql_table_grant(THD *thd, TABLE_LIST *table_list,
+		      List  &user_list,
+		      List  &columns, privilege_t rights,
+		      bool revoke_grant)
+{
+  privilege_t column_priv(NO_ACL);
+  int result, res;
+  List_iterator  str_list (user_list);
+  LEX_USER *Str, *tmp_Str;
+  bool create_new_users=0;
+  const char *db_name, *table_name;
+  DBUG_ENTER("mysql_table_grant");
+
+  if (rights & ~TABLE_ACLS)
+  {
+    my_message(ER_ILLEGAL_GRANT_FOR_TABLE,
+               ER_THD(thd, ER_ILLEGAL_GRANT_FOR_TABLE),
+               MYF(0));
+    DBUG_RETURN(TRUE);
+  }
+
+  if (!revoke_grant)
+  {
+    if (columns.elements)
+    {
+      class LEX_COLUMN *column;
+      List_iterator  column_iter(columns);
+
+      if (open_normal_and_derived_tables(thd, table_list, 0, DT_PREPARE))
+        DBUG_RETURN(TRUE);
+
+      while ((column = column_iter++))
+      {
+        field_index_t unused_field_idx= NO_CACHED_FIELD_INDEX;
+        TABLE_LIST *dummy;
+        Field *f=find_field_in_table_ref(thd, table_list, column->column.ptr(),
+                                         column->column.length(),
+                                         column->column.ptr(), NULL, NULL,
+                                         ignored_tables_list_t(NULL), NULL,
+                                         TRUE, FALSE, &unused_field_idx, FALSE,
+                                         &dummy);
+        if (unlikely(f == (Field*)0))
+        {
+          my_error(ER_BAD_FIELD_ERROR, MYF(0),
+                   column->column.c_ptr(), table_list->alias.str);
+          DBUG_RETURN(TRUE);
+        }
+        if (unlikely(f == (Field *)-1))
+          DBUG_RETURN(TRUE);
+        column_priv|= column->rights;
+      }
+      close_mysql_tables(thd);
+    }
+    else
+    {
+      if (!(rights & CREATE_ACL))
+      {
+        if (!ha_table_exists(thd, &table_list->db, &table_list->table_name))
+        {
+          my_error(ER_NO_SUCH_TABLE, MYF(0), table_list->db.str,
+                   table_list->alias.str);
+          DBUG_RETURN(TRUE);
+        }
+      }
+      if (table_list->grant.want_privilege)
+      {
+        char command[128];
+        get_privilege_desc(command, sizeof(command),
+                           table_list->grant.want_privilege);
+        my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0),
+                 command, thd->security_ctx->priv_user,
+                 thd->security_ctx->host_or_ip, table_list->db.str,
+                 table_list->alias.str);
+        DBUG_RETURN(-1);
+      }
+    }
+  }
+
+  /*
+    Open the mysql.user and mysql.tables_priv tables.
+    Don't open column table if we don't need it !
+  */
+  int tables_to_open= Table_user | Table_tables_priv;
+  if (column_priv ||
+      (revoke_grant && ((rights & COL_ACLS) || columns.elements)))
+    tables_to_open|= Table_columns_priv;
+
+  /*
+    The lock api is depending on the thd->lex variable which needs to be
+    re-initialized.
+  */
+  Query_tables_list backup;
+  thd->lex->reset_n_backup_query_tables_list(&backup);
+  /*
+    Restore Query_tables_list::sql_command value, which was reset
+    above, as the code writing query to the binary log assumes that
+    this value corresponds to the statement being executed.
+  */
+  thd->lex->sql_command= backup.sql_command;
+
+  Grant_tables tables;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+  {
+    thd->lex->restore_backup_query_tables_list(&backup);
+    DBUG_RETURN(result != 1);
+  }
+
+  if (!revoke_grant)
+    create_new_users= test_if_create_new_users(thd);
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+  MEM_ROOT *old_root= thd->mem_root;
+  thd->mem_root= &grant_memroot;
+  grant_version++;
+
+  while ((tmp_Str = str_list++))
+  {
+    int error;
+    GRANT_TABLE *grant_table;
+    if (!(Str= get_current_user(thd, tmp_Str, false)))
+    {
+      result= TRUE;
+      continue;
+    }
+    /* Create user if needed */
+    error= copy_and_check_auth(Str, Str, thd) ||
+           replace_user_table(thd, tables.user_table(), Str,
+                               NO_ACL, revoke_grant, create_new_users,
+                               MY_TEST(!is_public(Str) &&
+                                       (thd->variables.sql_mode &
+                                        MODE_NO_AUTO_CREATE_USER)));
+    if (unlikely(error))
+    {
+      result= TRUE;				// Remember error
+      continue;					// Add next user
+    }
+
+    db_name= table_list->get_db_name();
+    table_name= table_list->get_table_name();
+
+    /* Find/create cached table grant */
+    grant_table= table_hash_search(Str->host.str, NullS, db_name,
+                                   Str->user.str, table_name, 1);
+    if (!grant_table)
+    {
+      if (revoke_grant)
+      {
+        my_error(ER_NONEXISTING_TABLE_GRANT, MYF(0),
+                 Str->user.str, Str->host.str, table_list->table_name.str);
+        result= TRUE;
+        continue;
+      }
+      grant_table= new (&grant_memroot) GRANT_TABLE(Str->host.str, db_name,
+                                                    Str->user.str, table_name,
+                                                    rights,
+                                                    column_priv);
+      if (!grant_table ||
+          column_priv_insert(grant_table))
+      {
+        result= TRUE;				/* purecov: deadcode */
+        continue;				/* purecov: deadcode */
+      }
+    }
+
+    /* If revoke_grant, calculate the new column privilege for tables_priv */
+    if (revoke_grant)
+    {
+      class LEX_COLUMN *column;
+      List_iterator  column_iter(columns);
+      GRANT_COLUMN *grant_column;
+
+      /* Fix old grants */
+      while ((column = column_iter++))
+      {
+        grant_column = column_hash_search(grant_table,
+                                          column->column.ptr(),
+                                          column->column.length());
+        if (grant_column)
+        {
+          grant_column->init_rights&= ~(column->rights | rights);
+          // If this is a role, rights will need to be reconstructed.
+          grant_column->rights= grant_column->init_rights;
+        }
+      }
+      /* scan trough all columns to get new column grant */
+      column_priv= NO_ACL;
+      for (uint idx=0 ; idx < grant_table->hash_columns.records ; idx++)
+      {
+        grant_column= (GRANT_COLUMN*)
+          my_hash_element(&grant_table->hash_columns, idx);
+        grant_column->init_rights&= ~rights;  // Fix other columns
+        grant_column->rights= grant_column->init_rights;
+        column_priv|= grant_column->init_rights;
+      }
+    }
+    else
+    {
+      column_priv|= grant_table->init_cols;
+    }
+
+
+    /* update table and columns */
+
+    /* TODO(cvicentiu) refactor replace_table_table to use Tables_priv_table
+       instead of TABLE directly. */
+    if (tables.columns_priv_table().table_exists())
+    {
+      /* TODO(cvicentiu) refactor replace_column_table to use Columns_priv_table
+         instead of TABLE directly. */
+      if (replace_column_table(grant_table, tables.columns_priv_table().table(),
+                               *Str, columns, db_name, table_name, rights,
+                               revoke_grant))
+	result= TRUE;
+    }
+    if ((res= replace_table_table(thd, grant_table,
+                                  tables.tables_priv_table().table(),
+                                  *Str, db_name, table_name,
+                                  rights, column_priv, revoke_grant)))
+    {
+      if (res > 0)
+      {
+        /* Should only happen if table is crashed */
+        result= TRUE;			       /* purecov: deadcode */
+      }
+    }
+    if (Str->is_role())
+      propagate_role_grants(find_acl_role(Str->user.str, true),
+                            PRIVS_TO_MERGE::TABLE_COLUMN, db_name, table_name);
+  }
+
+  thd->mem_root= old_root;
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (!result) /* success */
+    result= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  if (!result) /* success */
+    my_ok(thd);
+
+  thd->lex->restore_backup_query_tables_list(&backup);
+  DBUG_RETURN(result);
+}
+
+
+/**
+  Store routine level grants in the privilege tables
+
+  @param thd Thread handle
+  @param table_list List of routines to give grant
+  @param sph SP handler
+  @param user_list List of users to give grant
+  @param rights Table level grant
+  @param revoke_grant Is this is a REVOKE command?
+
+  @return
+    @retval FALSE Success.
+    @retval TRUE An error occurred.
+*/
+
+bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list,
+                         const Sp_handler *sph,
+			 List  &user_list, privilege_t rights,
+			 bool revoke_grant, bool write_to_binlog)
+{
+  List_iterator  str_list (user_list);
+  LEX_USER *Str, *tmp_Str;
+  bool create_new_users= 0;
+  int result;
+  const char *db_name, *table_name;
+  DBUG_ENTER("mysql_routine_grant");
+
+  if (rights & ~PROC_ACLS)
+  {
+    my_message(ER_ILLEGAL_GRANT_FOR_TABLE,
+               ER_THD(thd, ER_ILLEGAL_GRANT_FOR_TABLE),
+               MYF(0));
+    DBUG_RETURN(TRUE);
+  }
+
+  if (!revoke_grant)
+  {
+    if (sph->sp_exist_routines(thd, table_list))
+      DBUG_RETURN(TRUE);
+  }
+
+  Grant_tables tables;
+  if ((result= tables.open_and_lock(thd, Table_user | Table_procs_priv, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+
+  if (!revoke_grant)
+    create_new_users= test_if_create_new_users(thd);
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+  MEM_ROOT *old_root= thd->mem_root;
+  thd->mem_root= &grant_memroot;
+
+  DBUG_PRINT("info",("now time to iterate and add users"));
+
+  while ((tmp_Str= str_list++))
+  {
+    GRANT_NAME *grant_name;
+    if (!(Str= get_current_user(thd, tmp_Str, false)))
+    {
+      result= TRUE;
+      continue;
+    }
+    /* Create user if needed */
+    if (copy_and_check_auth(Str, tmp_Str, thd) ||
+        replace_user_table(thd, tables.user_table(), Str,
+			   NO_ACL, revoke_grant, create_new_users,
+                           !is_public(Str) && (thd->variables.sql_mode &
+                                     MODE_NO_AUTO_CREATE_USER)))
+    {
+      result= TRUE;
+      continue;
+    }
+
+    db_name= table_list->db.str;
+    table_name= table_list->table_name.str;
+    grant_name= routine_hash_search(Str->host.str, NullS, db_name,
+                                    Str->user.str, table_name, sph, 1);
+    if (revoke_grant && (!grant_name || !grant_name->init_privs))
+    {
+      my_error(ER_NONEXISTING_PROC_GRANT, MYF(0),
+               Str->user.str, Str->host.str, table_name);
+      result= TRUE;
+      continue;
+    }
+    if (!grant_name)
+    {
+      DBUG_ASSERT(!revoke_grant);
+      grant_name= new GRANT_NAME(Str->host.str, db_name,
+                                 Str->user.str, table_name,
+                                 rights, TRUE);
+      if (!grant_name ||
+          my_hash_insert(sph->get_priv_hash(), (uchar*) grant_name))
+      {
+        result= TRUE;
+        continue;
+      }
+    }
+
+    if (replace_routine_table(thd, grant_name, tables.procs_priv_table().table(),
+          *Str, db_name, table_name, sph, rights, revoke_grant) != 0)
+    {
+      result= TRUE;
+      continue;
+    }
+    if (Str->is_role())
+      propagate_role_grants(find_acl_role(Str->user.str, true),
+                            sp_privs_to_merge(sph->type()),
+                            db_name, table_name);
+  }
+  thd->mem_root= old_root;
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (write_to_binlog)
+  {
+    if (write_bin_log(thd, FALSE, thd->query(), thd->query_length()))
+      result= TRUE;
+  }
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  /* Tables are automatically closed */
+  DBUG_RETURN(result);
+}
+
+/**
+  append a user or role name to a buffer that will be later used as an error message
+*/
+static void append_user(THD *thd, String *str,
+                        const LEX_CSTRING *u, const LEX_CSTRING *h)
+{
+  if (str->length())
+    str->append(',');
+  append_query_string(system_charset_info, str, u->str, u->length,
+                      thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES);
+  /* hostname part is not relevant for roles, it is always empty */
+  if (u->length == 0 || h->length != 0)
+  {
+    str->append('@');
+    append_query_string(system_charset_info, str, h->str, h->length,
+                        thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES);
+  }
+}
+
+static void append_user(THD *thd, String *str, LEX_USER *user)
+{
+  append_user(thd, str, & user->user, & user->host);
+}
+
+/**
+  append a string to a buffer that will be later used as an error message
+
+  @note
+  a string can be either CURRENT_USER or CURRENT_ROLE or NONE, it should be
+  neither quoted nor escaped.
+*/
+static void append_str(String *str, const char *s, size_t l)
+{
+  if (str->length())
+    str->append(',');
+  str->append(s, l);
+}
+
+static int can_grant_role_callback(ACL_USER_BASE *grantee,
+                                   ACL_ROLE *role, void *data)
+{
+  ROLE_GRANT_PAIR *pair;
+
+  if (role != (ACL_ROLE*)data)
+    return 0; // keep searching
+
+  if (grantee->flags & IS_ROLE)
+    pair= find_role_grant_pair(&grantee->user, &empty_clex_str, &role->user);
+  else
+  {
+    ACL_USER *user= (ACL_USER *)grantee;
+    LEX_CSTRING host= { user->host.hostname, user->hostname_length };
+    pair= find_role_grant_pair(&user->user, &host, &role->user);
+  }
+  if (!pair->with_admin)
+    return 0; // keep searching
+
+  return -1; // abort the traversal
+}
+
+
+/*
+  One can only grant a role if SELECT * FROM I_S.APPLICABLE_ROLES shows this
+  role as grantable.
+  
+  What this really means - we need to traverse role graph for the current user
+  looking for our role being granted with the admin option.
+*/
+static bool can_grant_role(THD *thd, ACL_ROLE *role)
+{
+  Security_context *sctx= thd->security_ctx;
+
+  if (!sctx->user) // replication
+    return true;
+
+  ACL_USER *grantee= find_user_exact(sctx->priv_host, sctx->priv_user);
+  if (!grantee)
+    return false;
+
+  return traverse_role_graph_down(grantee, role, NULL,
+                                  can_grant_role_callback) == -1;
+}
+
+
+bool mysql_grant_role(THD *thd, List  &list, bool revoke)
+{
+  DBUG_ENTER("mysql_grant_role");
+  /*
+     The first entry in the list is the granted role. Need at least two
+     entries for the command to be valid
+   */
+  DBUG_ASSERT(list.elements >= 2);
+  int result;
+  bool create_new_user, no_auto_create_user;
+  String wrong_users;
+  LEX_USER *user, *granted_role;
+  LEX_CSTRING rolename;
+  LEX_CSTRING username;
+  LEX_CSTRING hostname;
+  ACL_ROLE *role, *role_as_user;
+
+  List_iterator  user_list(list);
+  granted_role= user_list++;
+  if (!(granted_role= get_current_user(thd, granted_role)))
+    DBUG_RETURN(TRUE);
+
+  DBUG_ASSERT(granted_role->is_role());
+  rolename= granted_role->user;
+
+  create_new_user= test_if_create_new_users(thd);
+  no_auto_create_user= thd->variables.sql_mode & MODE_NO_AUTO_CREATE_USER;
+
+  Grant_tables tables;
+  if ((result= tables.open_and_lock(thd, Table_user | Table_roles_mapping, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+  if (!(role= find_acl_role(rolename.str, false)))
+  {
+    mysql_mutex_unlock(&acl_cache->lock);
+    mysql_rwlock_unlock(&LOCK_grant);
+    my_error(ER_INVALID_ROLE, MYF(0), rolename.str);
+    DBUG_RETURN(TRUE);
+  }
+
+  if (!can_grant_role(thd, role))
+  {
+    mysql_mutex_unlock(&acl_cache->lock);
+    mysql_rwlock_unlock(&LOCK_grant);
+    my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
+             thd->security_ctx->priv_user, thd->security_ctx->priv_host);
+    DBUG_RETURN(TRUE);
+  }
+
+  while ((user= user_list++))
+  {
+    role_as_user= NULL;
+    /* current_role is treated slightly different */
+    if (user->user.str == current_role.str)
+    {
+      /* current_role is NONE */
+      if (!thd->security_ctx->priv_role[0])
+      {
+        my_error(ER_INVALID_ROLE, MYF(0), none.str);
+        append_str(&wrong_users, none.str, none.length);
+        result= 1;
+        continue;
+      }
+      if (!(role_as_user= find_acl_role(thd->security_ctx->priv_role, true)))
+      {
+        LEX_CSTRING ls= { thd->security_ctx->priv_role,
+                          strlen(thd->security_ctx->priv_role) };
+        append_user(thd, &wrong_users, &ls, &empty_clex_str);
+        result= 1;
+        continue;
+      }
+
+      /* can not grant current_role to current_role */
+      if (granted_role->user.str == current_role.str)
+      {
+        append_user(thd, &wrong_users, &role_as_user->user, &empty_clex_str);
+        result= 1;
+        continue;
+      }
+      username.str= thd->security_ctx->priv_role;
+      username.length= strlen(username.str);
+      hostname= empty_clex_str;
+    }
+    else if (user->user.str == current_user.str)
+    {
+      username.str= thd->security_ctx->priv_user;
+      username.length= strlen(username.str);
+      hostname.str= thd->security_ctx->priv_host;
+      hostname.length= strlen(hostname.str);
+    }
+    else
+    {
+      if (user->host.str)
+        hostname= user->host;
+      else
+      switch (check_role_name(&user->user, true)) {
+      case ROLE_NAME_INVALID:
+        append_user(thd, &wrong_users, &user->user, &empty_clex_str);
+        result= 1;
+        continue;
+      case ROLE_NAME_PUBLIC:
+        user->host= hostname= empty_clex_str;
+        role_as_user= acl_public;
+        break;
+      case ROLE_NAME_OK:
+        if ((role_as_user= find_acl_role(user->user.str, false)))
+          hostname= empty_clex_str;
+        else
+          hostname= host_not_specified;
+        break;
+      }
+      username= user->user;
+    }
+
+    ROLE_GRANT_PAIR *hash_entry= find_role_grant_pair(&username, &hostname,
+                                                      &rolename);
+    ACL_USER_BASE *grantee= role_as_user;
+
+    if (has_auth(user, thd->lex))
+      DBUG_ASSERT(!grantee);
+    else if (!grantee && !is_public(user))
+      grantee= find_user_exact(hostname.str, username.str);
+
+    if (!grantee && !revoke)
+    {
+      LEX_USER user_combo = *user;
+      user_combo.user = username;
+      user_combo.host = hostname;
+
+      if (copy_and_check_auth(&user_combo, &user_combo, thd) ||
+          replace_user_table(thd, tables.user_table(), &user_combo, NO_ACL,
+                             false, create_new_user,
+                             (!is_public(&user_combo) && no_auto_create_user)))
+      {
+        append_user(thd, &wrong_users, &username, &hostname);
+        result= 1;
+        continue;
+      }
+      if (!is_public(&user_combo))
+        grantee= find_user_exact(hostname.str, username.str);
+      else
+        grantee= role_as_user= acl_public;
+
+      /* either replace_user_table failed, or we've added the user */
+      DBUG_ASSERT(grantee);
+    }
+
+    if (!grantee)
+    {
+      append_user(thd, &wrong_users, &username, &hostname);
+      result= 1;
+      continue;
+    }
+
+    if (!revoke)
+    {
+      if (hash_entry)
+      {
+        // perhaps, updating an existing grant, adding WITH ADMIN OPTION
+      }
+      else
+      {
+        add_role_user_mapping(grantee, role);
+
+        /*
+          Check if this grant would cause a cycle. It only needs to be run
+          if we're granting a role to a role
+        */
+        if (role_as_user &&
+            traverse_role_graph_down(role, 0, 0, 0) == ROLE_CYCLE_FOUND)
+        {
+          append_user(thd, &wrong_users, &username, &empty_clex_str);
+          result= 1;
+          undo_add_role_user_mapping(grantee, role);
+          continue;
+        }
+      }
+    }
+    else
+    {
+      /* grant was already removed or never existed */
+      if (!hash_entry)
+      {
+        append_user(thd, &wrong_users, &username, &hostname);
+        result= 1;
+        continue;
+      }
+      if (thd->lex->with_admin_option)
+      {
+        // only revoking an admin option, not the complete grant
+      }
+      else
+      {
+        /* revoke a role grant */
+        remove_role_user_mapping(grantee, role);
+      }
+    }
+
+    /* write into the roles_mapping table */
+    /* TODO(cvicentiu) refactor replace_roles_mapping_table to use
+       Roles_mapping_table instead of TABLE directly. */
+    if (replace_roles_mapping_table(tables.roles_mapping_table().table(),
+                                    &username, &hostname, &rolename,
+                                    thd->lex->with_admin_option,
+                                    hash_entry, revoke))
+    {
+      append_user(thd, &wrong_users, &username, &empty_clex_str);
+      result= 1;
+      if (!revoke)
+      {
+        /* need to remove the mapping added previously */
+        undo_add_role_user_mapping(grantee, role);
+      }
+      else
+      {
+        /* need to restore the mapping deleted previously */
+        add_role_user_mapping(grantee, role);
+      }
+      continue;
+    }
+    update_role_mapping(&username, &hostname, &rolename,
+                        thd->lex->with_admin_option, hash_entry, revoke);
+
+    /*
+       Only need to propagate grants when granting/revoking a role to/from
+       a role
+    */
+    if (role_as_user)
+    {
+      propagate_role_grants(role_as_user, PRIVS_TO_MERGE::ALL);
+      acl_cache->clear(1);
+    }
+  }
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (result)
+    my_error(revoke ? ER_CANNOT_REVOKE_ROLE : ER_CANNOT_GRANT_ROLE, MYF(0),
+             rolename.str, wrong_users.c_ptr_safe());
+  else
+    result= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  DBUG_RETURN(result);
+}
+
+
+bool mysql_grant(THD *thd, const char *db, List  &list,
+                 privilege_t rights, bool revoke_grant, bool is_proxy)
+{
+  List_iterator  str_list (list);
+  LEX_USER *Str, *tmp_Str, *proxied_user= NULL;
+  char tmp_db[SAFE_NAME_LEN+1];
+  bool create_new_users=0;
+  int result;
+  DBUG_ENTER("mysql_grant");
+
+  if (lower_case_table_names && db)
+  {
+    char *end= strnmov(tmp_db,db, sizeof(tmp_db));
+    if (end >= tmp_db + sizeof(tmp_db))
+    {
+      my_error(ER_WRONG_DB_NAME ,MYF(0), db);
+      DBUG_RETURN(TRUE);
+    }
+    my_casedn_str(files_charset_info, tmp_db);
+    db=tmp_db;
+  }
+
+  if (is_proxy)
+  {
+    DBUG_ASSERT(!db);
+    proxied_user= str_list++;
+  }
+
+  const uint tables_to_open= Table_user | (is_proxy ? Table_proxies_priv : Table_db);
+  Grant_tables tables;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+
+  if (!revoke_grant)
+    create_new_users= test_if_create_new_users(thd);
+
+  /* go through users in user_list */
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+  grant_version++;
+
+  if (proxied_user)
+  {
+    if (!(proxied_user= get_current_user(thd, proxied_user, false)))
+      DBUG_RETURN(TRUE);
+    DBUG_ASSERT(proxied_user->host.length); // not a Role
+  }
+
+  while ((tmp_Str = str_list++))
+  {
+    if (!(Str= get_current_user(thd, tmp_Str, false)))
+    {
+      result= true;
+      continue;
+    }
+
+    if (copy_and_check_auth(Str, tmp_Str, thd) ||
+        replace_user_table(thd, tables.user_table(), Str,
+                           (!db ? rights : NO_ACL),
+                           revoke_grant, create_new_users,
+                           MY_TEST(!is_public(Str) &&
+                                   (thd->variables.sql_mode &
+                                    MODE_NO_AUTO_CREATE_USER))))
+      result= true;
+    else if (db)
+    {
+      privilege_t db_rights(rights & DB_ACLS);
+      if (db_rights  == rights)
+      {
+	if (replace_db_table(tables.db_table().table(), db, *Str, db_rights,
+			     revoke_grant))
+	  result= true;
+      }
+      else
+      {
+	my_error(ER_WRONG_USAGE, MYF(0), "DB GRANT", "GLOBAL PRIVILEGES");
+	result= true;
+      }
+    }
+    else if (is_proxy)
+    {
+      if (replace_proxies_priv_table(thd, tables.proxies_priv_table().table(),
+            Str, proxied_user, rights & GRANT_ACL ? TRUE : FALSE, revoke_grant))
+        result= true;
+    }
+    if (Str->is_role())
+      propagate_role_grants(find_acl_role(Str->user.str, true),
+                            db ? PRIVS_TO_MERGE::DB : PRIVS_TO_MERGE::GLOBAL,
+                            db);
+  }
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (!result)
+  {
+    result= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
+  }
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  if (!result)
+    my_ok(thd);
+
+  DBUG_RETURN(result);
+}
+
+
+/* Free grant array if possible */
+
+void  grant_free(void)
+{
+  DBUG_ENTER("grant_free");
+  my_hash_free(&column_priv_hash);
+  my_hash_free(&proc_priv_hash);
+  my_hash_free(&func_priv_hash);
+  my_hash_free(&package_spec_priv_hash);
+  my_hash_free(&package_body_priv_hash);
+  free_root(&grant_memroot,MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  @brief Initialize structures responsible for table/column-level privilege
+   checking and load information for them from tables in the 'mysql' database.
+
+  @return Error status
+    @retval 0 OK
+    @retval 1 Could not initialize grant subsystem.
+*/
+
+bool grant_init()
+{
+  THD  *thd;
+  bool return_val;
+  DBUG_ENTER("grant_init");
+
+  if (!(thd= new THD(0)))
+    DBUG_RETURN(1);				/* purecov: deadcode */
+  thd->thread_stack= (char*) &thd;
+  thd->store_globals();
+  return_val=  grant_reload(thd);
+  delete thd;
+  DBUG_RETURN(return_val);
+}
+
+
+/**
+  @brief Initialize structures responsible for table/column-level privilege
+    checking and load information about grants from open privilege tables.
+
+  @param thd Current thread
+  @param tables List containing open "mysql.tables_priv" and
+    "mysql.columns_priv" tables.
+
+  @see grant_reload
+
+  @return Error state
+    @retval FALSE Success
+    @retval TRUE Error
+*/
+
+static bool grant_load(THD *thd,
+                       const Tables_priv_table& tables_priv,
+                       const Columns_priv_table& columns_priv,
+                       const Procs_priv_table& procs_priv)
+{
+  bool return_val= 1;
+  TABLE *t_table, *c_table, *p_table;
+  MEM_ROOT *save_mem_root= thd->mem_root;
+  DBUG_ENTER("grant_load");
+
+  Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH);
+
+  (void) my_hash_init(key_memory_acl_memex, &column_priv_hash,
+                      &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key)
+                      get_grant_table, (my_hash_free_key) free_grant_table, 0);
+  (void) my_hash_init(key_memory_acl_memex, &proc_priv_hash,
+                      &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key)
+                      get_grant_table, 0,0);
+  (void) my_hash_init(key_memory_acl_memex, &func_priv_hash,
+                      &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key)
+                      get_grant_table, 0,0);
+  (void) my_hash_init(key_memory_acl_memex, &package_spec_priv_hash,
+                      &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key)
+                      get_grant_table, 0,0);
+  (void) my_hash_init(key_memory_acl_memex, &package_body_priv_hash,
+                      &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key)
+                      get_grant_table, 0,0);
+  init_sql_alloc(key_memory_acl_mem, &grant_memroot, ACL_ALLOC_BLOCK_SIZE, 0, MYF(0));
+
+  t_table= tables_priv.table();
+  c_table= columns_priv.table();
+  p_table= procs_priv.table(); // this can be NULL
+
+  if (t_table->file->ha_index_init(0, 1))
+    goto end_index_init;
+
+  t_table->use_all_columns();
+  c_table->use_all_columns();
+
+  thd->mem_root= &grant_memroot;
+
+  if (!t_table->file->ha_index_first(t_table->record[0]))
+  {
+    do
+    {
+      GRANT_TABLE *mem_check;
+      /* TODO(cvicentiu) convert this to use tables_priv and columns_priv. */
+      if (!(mem_check= new (&grant_memroot) GRANT_TABLE(t_table, c_table)))
+      {
+	/* This could only happen if we are out memory */
+	goto end_unlock;
+      }
+
+      if (opt_skip_name_resolve)
+      {
+	if (hostname_requires_resolving(mem_check->host.hostname))
+	{
+          sql_print_warning("'tables_priv' entry '%s %s@%s' "
+                            "ignored in --skip-name-resolve mode.",
+                            mem_check->tname, mem_check->user,
+                            safe_str(mem_check->host.hostname));
+	  continue;
+	}
+      }
+
+      if (! mem_check->ok())
+	delete mem_check;
+      else if (column_priv_insert(mem_check))
+      {
+	delete mem_check;
+	goto end_unlock;
+      }
+    }
+    while (!t_table->file->ha_index_next(t_table->record[0]));
+  }
+
+  return_val= 0;
+
+  if (p_table)
+  {
+    if (p_table->file->ha_index_init(0, 1))
+      goto end_unlock;
+
+    p_table->use_all_columns();
+
+    if (!p_table->file->ha_index_first(p_table->record[0]))
+    {
+      do
+      {
+        GRANT_NAME *mem_check;
+        HASH *hash;
+        if (!(mem_check= new (&grant_memroot) GRANT_NAME(p_table, TRUE)))
+        {
+          /* This could only happen if we are out memory */
+          goto end_unlock_p;
+        }
+
+        if (opt_skip_name_resolve)
+        {
+          if (hostname_requires_resolving(mem_check->host.hostname))
+          {
+            sql_print_warning("'procs_priv' entry '%s %s@%s' "
+                              "ignored in --skip-name-resolve mode.",
+                              mem_check->tname, mem_check->user,
+                              safe_str(mem_check->host.hostname));
+            continue;
+          }
+        }
+        enum_sp_type type= (enum_sp_type)procs_priv.routine_type()->val_int();
+        const Sp_handler *sph= Sp_handler::handler(type);
+        if (!sph || !(hash= sph->get_priv_hash()))
+        {
+          sql_print_warning("'procs_priv' entry '%s' "
+                            "ignored, bad routine type",
+                            mem_check->tname);
+          continue;
+        }
+
+        mem_check->privs= fix_rights_for_procedure(mem_check->privs);
+        mem_check->init_privs= mem_check->privs;
+        if (! mem_check->ok())
+          delete mem_check;
+        else if (my_hash_insert(hash, (uchar*) mem_check))
+        {
+          delete mem_check;
+          goto end_unlock_p;
+        }
+      }
+      while (!p_table->file->ha_index_next(p_table->record[0]));
+    }
+  }
+
+end_unlock_p:
+  if (p_table)
+    p_table->file->ha_index_end();
+end_unlock:
+  t_table->file->ha_index_end();
+  thd->mem_root= save_mem_root;
+end_index_init:
+  DBUG_RETURN(return_val);
+}
+
+static my_bool propagate_role_grants_action(void *role_ptr,
+                                            void *ptr __attribute__((unused)))
+{
+  ACL_ROLE *role= static_cast(role_ptr);
+  if (role->counter)
+    return 0;
+
+  mysql_mutex_assert_owner(&acl_cache->lock);
+  PRIVS_TO_MERGE data= { PRIVS_TO_MERGE::ALL, 0, 0 };
+  traverse_role_graph_up(role, &data, NULL, merge_role_privileges);
+  return 0;
+}
+
+
+/**
+  @brief Reload information about table and column level privileges if possible
+
+  @param thd Current thread
+
+  Locked tables are checked by acl_reload() and doesn't have to be checked
+  in this call.
+  This function is also used for initialization of structures responsible
+  for table/column-level privilege checking.
+
+  @return Error state
+    @retval FALSE Success
+    @retval TRUE  Error
+*/
+
+bool grant_reload(THD *thd)
+{
+  HASH old_column_priv_hash, old_proc_priv_hash, old_func_priv_hash;
+  HASH old_package_spec_priv_hash, old_package_body_priv_hash;
+  MEM_ROOT old_mem;
+  int result;
+  DBUG_ENTER("grant_reload");
+
+  /*
+    To avoid deadlocks we should obtain table locks before
+    obtaining LOCK_grant rwlock.
+  */
+
+  Grant_tables tables;
+  const uint tables_to_open= Table_tables_priv | Table_columns_priv| Table_procs_priv;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_READ)))
+    DBUG_RETURN(result != 1);
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  grant_version++;
+  old_column_priv_hash= column_priv_hash;
+  old_proc_priv_hash= proc_priv_hash;
+  old_func_priv_hash= func_priv_hash;
+  old_package_spec_priv_hash= package_spec_priv_hash;
+  old_package_body_priv_hash= package_body_priv_hash;
+
+  /*
+    Create a new memory pool but save the current memory pool to make an undo
+    opertion possible in case of failure.
+  */
+  old_mem= grant_memroot;
+
+  if ((result= grant_load(thd,
+                          tables.tables_priv_table(),
+                          tables.columns_priv_table(),
+                          tables.procs_priv_table())))
+  {						// Error. Revert to old hash
+    DBUG_PRINT("error",("Reverting to old privileges"));
+    grant_free();				/* purecov: deadcode */
+    column_priv_hash= old_column_priv_hash;	/* purecov: deadcode */
+    proc_priv_hash= old_proc_priv_hash;
+    func_priv_hash= old_func_priv_hash;
+    package_spec_priv_hash= old_package_spec_priv_hash;
+    package_body_priv_hash= old_package_body_priv_hash;
+    grant_memroot= old_mem;                     /* purecov: deadcode */
+  }
+  else
+  {
+    my_hash_free(&old_column_priv_hash);
+    my_hash_free(&old_proc_priv_hash);
+    my_hash_free(&old_func_priv_hash);
+    my_hash_free(&old_package_spec_priv_hash);
+    my_hash_free(&old_package_body_priv_hash);
+    free_root(&old_mem,MYF(0));
+  }
+
+  mysql_mutex_lock(&acl_cache->lock);
+  my_hash_iterate(&acl_roles, propagate_role_grants_action, NULL);
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  close_mysql_tables(thd);
+
+  DBUG_RETURN(result);
+}
+
+
+/**
+  @brief Check table level grants
+
+  @param thd          Thread handler
+  @param want_access  Bits of privileges user needs to have.
+  @param tables       List of tables to check. The user should have
+                      'want_access' to all tables in list.
+  @param any_combination_will_do TRUE if it's enough to have any privilege for
+    any combination of the table columns.
+  @param number       Check at most this number of tables.
+  @param no_errors    TRUE if no error should be sent directly to the client.
+
+  If table->grant.want_privilege != 0 then the requested privileges where
+  in the set of COL_ACLS but access was not granted on the table level. As
+  a consequence an extra check of column privileges is required.
+
+  Specifically if this function returns FALSE the user has some kind of
+  privilege on a combination of columns in each table.
+
+  This function is usually preceeded by check_access which establish the
+  User-, Db- and Host access rights.
+
+  @see check_access
+  @see check_table_access
+
+  @note
+     This function assumes that either number of tables to be inspected
+     by it is limited explicitly (i.e. is not UINT_MAX) or table list
+     used and thd->lex->query_tables_own_last value correspond to each
+     other (the latter should be either 0 or point to next_global member
+     of one of elements of this table list).
+
+     We delay locking of LOCK_grant until we really need it as we assume that
+     most privileges be resolved with user or db level accesses.
+
+   @return Access status
+     @retval FALSE Access granted; But column privileges might need to be
+      checked.
+     @retval TRUE The user did not have the requested privileges on any of the
+      tables.
+
+*/
+
+bool check_grant(THD *thd, privilege_t want_access, TABLE_LIST *tables,
+                 bool any_combination_will_do, uint number, bool no_errors)
+{
+  TABLE_LIST *tl;
+  TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table();
+  Security_context *sctx= thd->security_ctx;
+  uint i;
+  privilege_t original_want_access(want_access);
+  bool locked= 0;
+  DBUG_ENTER("check_grant");
+  DBUG_ASSERT(number > 0);
+
+  /*
+    Walk through the list of tables that belong to the query and save the
+    requested access (orig_want_privilege) to be able to use it when
+    checking access rights to the underlying tables of a view. Our grant
+    system gradually eliminates checked bits from want_privilege and thus
+    after all checks are done we can no longer use it.
+    The check that first_not_own_table is not reached is for the case when
+    the given table list refers to the list for prelocking (contains tables
+    of other queries). For simple queries first_not_own_table is 0.
+  */
+  for (i= 0, tl= tables;
+       i < number  && tl != first_not_own_table;
+       tl= tl->next_global, i++)
+  {
+    /*
+      Save a copy of the privileges without the SHOW_VIEW_ACL attribute.
+      It will be checked during making view.
+    */
+    tl->grant.orig_want_privilege= (want_access & ~SHOW_VIEW_ACL);
+  }
+  number= i;
+
+  for (tl= tables; number-- ; tl= tl->next_global)
+  {
+    TABLE_LIST *const t_ref=
+      tl->correspondent_table ? tl->correspondent_table : tl;
+    sctx= t_ref->security_ctx ? t_ref->security_ctx : thd->security_ctx;
+    privilege_t orig_want_access(original_want_access);
+
+    /*
+      If sequence is used as part of NEXT VALUE, PREVIOUS VALUE or SELECT,
+      we need to modify the requested access rights depending on how the
+      sequence is used.
+    */
+    if (t_ref->sequence &&
+        !(want_access & ~(SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL)))
+    {
+      /*
+        We want to have either SELECT or INSERT rights to sequences depending
+        on how they are accessed
+      */
+      orig_want_access= ((t_ref->lock_type >= TL_FIRST_WRITE) ?
+                         INSERT_ACL : SELECT_ACL);
+    }
+
+    if (tl->with || !tl->db.str ||
+        (tl->select_lex &&
+         (tl->with= tl->select_lex->find_table_def_in_with_clauses(tl))))
+      continue;
+
+    const ACL_internal_table_access *access=
+      get_cached_table_access(&t_ref->grant.m_internal,
+                              t_ref->get_db_name(),
+                              t_ref->get_table_name());
+
+    if (access)
+    {
+      switch(access->check(orig_want_access, &t_ref->grant.privilege))
+      {
+      case ACL_INTERNAL_ACCESS_GRANTED:
+        t_ref->grant.privilege|= orig_want_access;
+        t_ref->grant.want_privilege= NO_ACL;
+        continue;
+      case ACL_INTERNAL_ACCESS_DENIED:
+        goto err;
+      case ACL_INTERNAL_ACCESS_CHECK_GRANT:
+        break;
+      }
+    }
+
+    want_access= orig_want_access;
+    want_access&= ~sctx->master_access;
+    if (!want_access)
+      continue;                                 // ok
+
+    if (!(~t_ref->grant.privilege & want_access) ||
+        t_ref->is_anonymous_derived_table() || t_ref->schema_table ||
+        t_ref->table_function)
+    {
+      /*
+        It is subquery in the FROM clause. VIEW set t_ref->derived after
+        table opening, but this function always called before table opening.
+
+        NOTE: is_derived() can't be used here because subquery in this case
+        the FROM clase (derived tables) can be not be marked yet.
+      */
+      if (t_ref->is_anonymous_derived_table() || t_ref->schema_table ||
+          t_ref->table_function)
+      {
+        /*
+          If it's a temporary table created for a subquery in the FROM
+          clause, or an INFORMATION_SCHEMA table, drop the request for
+          a privilege.
+        */
+        t_ref->grant.want_privilege= NO_ACL;
+      }
+      continue;
+    }
+
+    if (is_temporary_table(t_ref))
+    {
+      /*
+        If this table list element corresponds to a pre-opened temporary
+        table skip checking of all relevant table-level privileges for it.
+        Note that during creation of temporary table we still need to check
+        if user has CREATE_TMP_ACL.
+      */
+      t_ref->grant.privilege|= TMP_TABLE_ACLS;
+      t_ref->grant.want_privilege= NO_ACL;
+      continue;
+    }
+
+    if (!locked)
+    {
+      locked= 1;
+      mysql_rwlock_rdlock(&LOCK_grant);
+    }
+
+    t_ref->grant.read(sctx, t_ref->get_db_name(), t_ref->get_table_name());
+
+    if (!t_ref->grant.grant_table_user &&
+        !t_ref->grant.grant_table_role &&
+        !t_ref->grant.grant_public)
+    {
+      want_access&= ~t_ref->grant.privilege;
+      goto err;					// No grants
+    }
+
+    /*
+      For SHOW COLUMNS, SHOW INDEX it is enough to have some
+      privileges on any column combination on the table.
+    */
+    if (any_combination_will_do)
+      continue;
+
+    t_ref->grant.privilege|= t_ref->grant.aggregate_privs();
+    t_ref->grant.want_privilege= ((want_access & COL_ACLS) & ~t_ref->grant.privilege);
+
+    if (!(~t_ref->grant.privilege & want_access))
+      continue;
+
+    if ((want_access&= ~t_ref->grant.all_privilege()))
+    {
+      goto err;                                 // impossible
+    }
+  }
+  if (locked)
+    mysql_rwlock_unlock(&LOCK_grant);
+  DBUG_RETURN(FALSE);
+
+err:
+  if (locked)
+    mysql_rwlock_unlock(&LOCK_grant);
+  if (!no_errors)				// Not a silent skip of table
+  {
+    char command[128];
+    get_privilege_desc(command, sizeof(command), want_access);
+    status_var_increment(thd->status_var.access_denied_errors);
+
+    my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0),
+             command,
+             sctx->priv_user,
+             sctx->host_or_ip, tl ? tl->db.str : "unknown",
+             tl ? tl->get_table_name() : "unknown");
+  }
+  DBUG_RETURN(TRUE);
+}
+
+
+static void check_grant_column_int(GRANT_TABLE *grant_table, const char *name,
+                                   uint length, privilege_t *want_access)
+{
+  if (grant_table)
+  {
+    *want_access&= ~grant_table->privs;
+    if (*want_access & grant_table->cols)
+    {
+      GRANT_COLUMN *grant_column= column_hash_search(grant_table, name, length);
+      if (grant_column)
+        *want_access&= ~grant_column->rights;
+    }
+  }
+}
+
+inline privilege_t GRANT_INFO::aggregate_privs()
+{
+  return (grant_table_user ? grant_table_user->privs : NO_ACL) |
+         (grant_table_role ?  grant_table_role->privs : NO_ACL) |
+         (grant_public ?  grant_public->privs : NO_ACL);
+}
+
+inline privilege_t GRANT_INFO::aggregate_cols()
+{
+  return (grant_table_user ? grant_table_user->cols : NO_ACL) |
+         (grant_table_role ?  grant_table_role->cols : NO_ACL) |
+         (grant_public ?  grant_public->cols : NO_ACL);
+}
+
+
+void GRANT_INFO::refresh(const Security_context *sctx,
+                         const char *db, const char *table)
+{
+  if (version != grant_version)
+    read(sctx, db, table);
+}
+
+void GRANT_INFO::read(const Security_context *sctx,
+                         const char *db, const char *table)
+{
+#ifdef EMBEDDED_LIBRARY
+  grant_table_user= grant_table_role= grant_public= NULL;
+#else
+  grant_table_user=
+    table_hash_search(sctx->host, sctx->ip, db, sctx->priv_user,
+                      table, FALSE);         /* purecov: inspected */
+  grant_table_role=
+    sctx->priv_role[0] ? table_hash_search("", NULL, db, sctx->priv_role,
+                                           table, TRUE) : NULL;
+  grant_public=
+    acl_public ? table_hash_search("", NULL, db, public_name.str,
+                                   table, TRUE) : NULL;
+#endif
+  version= grant_version;		/* purecov: inspected */
+}
+
+/*
+  Check column rights in given security context
+
+  SYNOPSIS
+    check_grant_column()
+    thd                  thread handler
+    grant                grant information structure
+    db_name              db name
+    table_name           table  name
+    name                 column name
+    length               column name length
+    sctx                 security context
+
+  RETURN
+    FALSE OK
+    TRUE  access denied
+*/
+
+bool check_grant_column(THD *thd, GRANT_INFO *grant,
+			const char *db_name, const char *table_name,
+			const char *name, size_t length,  Security_context *sctx)
+{
+  privilege_t want_access(grant->want_privilege & ~grant->privilege);
+  DBUG_ENTER("check_grant_column");
+  DBUG_PRINT("enter", ("table: %s  want_access: %llx",
+                       table_name, (longlong) want_access));
+
+  if (!want_access)
+    DBUG_RETURN(0);				// Already checked
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+
+  /* reload table if someone has modified any grants */
+  grant->refresh(sctx, db_name, table_name);
+
+  check_grant_column_int(grant->grant_table_user, name, (uint)length,
+                         &want_access);
+  check_grant_column_int(grant->grant_table_role, name, (uint)length,
+                         &want_access);
+  check_grant_column_int(grant->grant_public, name, (uint)length,
+                         &want_access);
+
+  mysql_rwlock_unlock(&LOCK_grant);
+  if (!want_access)
+    DBUG_RETURN(0);
+
+  char command[128];
+  get_privilege_desc(command, sizeof(command), want_access);
+  /* TODO perhaps error should print current rolename aswell */
+  my_error(ER_COLUMNACCESS_DENIED_ERROR, MYF(0), command, sctx->priv_user,
+           sctx->host_or_ip, name, table_name);
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check the access right to a column depending on the type of table.
+
+  SYNOPSIS
+    check_column_grant_in_table_ref()
+    thd              thread handler
+    table_ref        table reference where to check the field
+    name             name of field to check
+    length           length of name
+    fld              use fld object to check invisibility when it is
+                     not 0, not_found_field, view_ref_found
+
+  DESCRIPTION
+    Check the access rights to a column depending on the type of table
+    reference where the column is checked. The function provides a
+    generic interface to check column access rights that hides the
+    heterogeneity of the column representation - whether it is a view
+    or a stored table colum.
+
+  RETURN
+    FALSE OK
+    TRUE  access denied
+*/
+
+bool check_column_grant_in_table_ref(THD *thd, TABLE_LIST * table_ref,
+                                     const char *name, size_t length,
+                                     Field *fld)
+{
+  GRANT_INFO *grant;
+  const char *db_name;
+  const char *table_name;
+  Security_context *sctx= table_ref->security_ctx ?
+                          table_ref->security_ctx : thd->security_ctx;
+  if (fld && fld != not_found_field && fld != view_ref_found
+          && fld->invisible >= INVISIBLE_SYSTEM)
+      return false;
+
+  if (table_ref->view || table_ref->field_translation)
+  {
+    /* View or derived information schema table. */
+    privilege_t view_privs(NO_ACL);
+    grant= &(table_ref->grant);
+    db_name= table_ref->view_db.str;
+    table_name= table_ref->view_name.str;
+    if (table_ref->belong_to_view &&
+        thd->lex->sql_command == SQLCOM_SHOW_FIELDS)
+    {
+      view_privs= get_column_grant(thd, grant, db_name, table_name, name);
+      if (view_privs & VIEW_ANY_ACL)
+      {
+        table_ref->belong_to_view->allowed_show= TRUE;
+        return FALSE;
+      }
+      table_ref->belong_to_view->allowed_show= FALSE;
+      my_message(ER_VIEW_NO_EXPLAIN, ER_THD(thd, ER_VIEW_NO_EXPLAIN), MYF(0));
+      return TRUE;
+    }
+  }
+  else
+  {
+    /* Normal or temporary table. */
+    TABLE *table= table_ref->table;
+    grant= &(table->grant);
+    db_name= table->s->db.str;
+    table_name= table->s->table_name.str;
+  }
+
+  if (grant->want_privilege)
+    return check_grant_column(thd, grant, db_name, table_name, name,
+                              length, sctx);
+  else
+    return FALSE;
+
+}
+
+
+/**
+  @brief check if a query can access a set of columns
+
+  @param  thd  the current thread
+  @param  want_access_arg  the privileges requested
+  @param  fields an iterator over the fields of a table reference.
+  @return Operation status
+    @retval 0 Success
+    @retval 1 Falure
+  @details This function walks over the columns of a table reference
+   The columns may originate from different tables, depending on the kind of
+   table reference, e.g. join, view.
+   For each table it will retrieve the grant information and will use it
+   to check the required access privileges for the fields requested from it.
+*/
+bool check_grant_all_columns(THD *thd, privilege_t want_access_arg,
+                             Field_iterator_table_ref *fields)
+{
+  Security_context *sctx= thd->security_ctx;
+  privilege_t want_access(NO_ACL);
+  const char *table_name= NULL;
+  const char* db_name= NULL;
+  GRANT_INFO *grant;
+  GRANT_TABLE *UNINIT_VAR(grant_table);
+  GRANT_TABLE *UNINIT_VAR(grant_table_role);
+  GRANT_TABLE *UNINIT_VAR(grant_public);
+  /*
+     Flag that gets set if privilege checking has to be performed on column
+     level.
+  */
+  bool using_column_privileges= FALSE;
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+
+  for (; !fields->end_of_fields(); fields->next())
+  {
+    if (fields->field() &&
+        fields->field()->invisible >= INVISIBLE_SYSTEM)
+      continue;
+    LEX_CSTRING *field_name= fields->name();
+
+    if (table_name != fields->get_table_name())
+    {
+      table_name= fields->get_table_name();
+      db_name= fields->get_db_name();
+      grant= fields->grant();
+      /* get a fresh one for each table */
+      want_access= want_access_arg & ~grant->privilege;
+      if (want_access)
+      {
+        /* reload table if someone has modified any grants */
+        grant->refresh(sctx, db_name, table_name);
+
+        grant_table= grant->grant_table_user;
+        grant_table_role= grant->grant_table_role;
+        grant_public= grant->grant_public;
+        if (!grant_table && !grant_table_role && !grant_public)
+          goto err;
+      }
+    }
+
+    if (want_access)
+    {
+      privilege_t have_access(NO_ACL);
+      if (grant_table)
+      {
+        GRANT_COLUMN *grant_column=
+          column_hash_search(grant_table, field_name->str, field_name->length);
+        if (grant_column)
+          have_access= grant_column->rights;
+      }
+      if (grant_table_role)
+      {
+        GRANT_COLUMN *grant_column=
+          column_hash_search(grant_table_role, field_name->str,
+                             field_name->length);
+        if (grant_column)
+          have_access|= grant_column->rights;
+      }
+      if (grant_public)
+      {
+        GRANT_COLUMN *grant_column=
+          column_hash_search(grant_public, field_name->str,
+                             field_name->length);
+        if (grant_column)
+          have_access|= grant_column->rights;
+
+      }
+
+      if (have_access)
+        using_column_privileges= TRUE;
+      if (want_access & ~have_access)
+        goto err;
+    }
+  }
+  mysql_rwlock_unlock(&LOCK_grant);
+  return 0;
+
+err:
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  char command[128];
+  get_privilege_desc(command, sizeof(command), want_access);
+  /*
+    Do not give an error message listing a column name unless the user has
+    privilege to see all columns.
+  */
+  if (using_column_privileges)
+    my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0),
+             command, sctx->priv_user,
+             sctx->host_or_ip, db_name, table_name);
+  else
+    my_error(ER_COLUMNACCESS_DENIED_ERROR, MYF(0),
+             command,
+             sctx->priv_user,
+             sctx->host_or_ip,
+             fields->name()->str,
+             table_name);
+  return 1;
+}
+
+
+static bool check_grant_db_routine(THD *thd, const char *db, HASH *hash)
+{
+  Security_context *sctx= thd->security_ctx;
+
+  for (uint idx= 0; idx < hash->records; ++idx)
+  {
+    GRANT_NAME *item= (GRANT_NAME*) my_hash_element(hash, idx);
+
+    if (strcmp(item->user, sctx->priv_user) == 0 &&
+        strcmp(item->db, db) == 0 &&
+        compare_hostname(&item->host, sctx->host, sctx->ip))
+    {
+      return FALSE;
+    }
+    if (sctx->priv_role[0] && strcmp(item->user, sctx->priv_role) == 0 &&
+        strcmp(item->db, db) == 0 &&
+        (!item->host.hostname || !item->host.hostname[0]))
+    {
+      return FALSE; /* Found current role match */
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+  Check if a user has the right to access a database
+  Access is accepted if he has a grant for any table/routine in the database
+  Return 1 if access is denied
+*/
+
+bool check_grant_db(THD *thd, const char *db)
+{
+  Security_context *sctx= thd->security_ctx;
+  char helping [SAFE_NAME_LEN + USERNAME_LENGTH+2], *end;
+  char helping2 [SAFE_NAME_LEN + USERNAME_LENGTH+2], *tmp_db;
+  uint len, UNINIT_VAR(len2);
+  bool error= TRUE;
+
+  tmp_db= strmov(helping, sctx->priv_user) + 1;
+  end= strnmov(tmp_db, db, helping + sizeof(helping) - tmp_db);
+
+  if (end >= helping + sizeof(helping)) // db name was truncated
+    return 1;                           // no privileges for an invalid db name
+
+  if (lower_case_table_names)
+  {
+    end = tmp_db + my_casedn_str(files_charset_info, tmp_db);
+    db=tmp_db;
+  }
+
+  len= (uint) (end - helping) + 1;
+
+  /*
+     If a role is set, we need to check for privileges here as well.
+  */
+  if (sctx->priv_role[0])
+  {
+    end= strmov(helping2, sctx->priv_role) + 1;
+    end= strnmov(end, db, helping2 + sizeof(helping2) - end);
+    len2= (uint) (end - helping2) + 1;
+  }
+
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+
+  for (uint idx=0 ; idx < column_priv_hash.records ; idx++)
+  {
+    GRANT_TABLE *grant_table= (GRANT_TABLE*) my_hash_element(&column_priv_hash,
+                                                             idx);
+    if (len < grant_table->key_length &&
+        !memcmp(grant_table->hash_key, helping, len) &&
+        compare_hostname(&grant_table->host, sctx->host, sctx->ip))
+    {
+      error= FALSE; /* Found match. */
+      break;
+    }
+    if (sctx->priv_role[0] &&
+        len2 < grant_table->key_length &&
+        !memcmp(grant_table->hash_key, helping2, len2) &&
+        (!grant_table->host.hostname || !grant_table->host.hostname[0]))
+    {
+      error= FALSE; /* Found role match */
+      break;
+    }
+  }
+
+  if (error)
+    error= check_grant_db_routine(thd, db, &proc_priv_hash) &&
+           check_grant_db_routine(thd, db, &func_priv_hash) &&
+           check_grant_db_routine(thd, db, &package_spec_priv_hash) &&
+           check_grant_db_routine(thd, db, &package_body_priv_hash);
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  return error;
+}
+
+
+/****************************************************************************
+  Check routine level grants
+
+  SYNPOSIS
+   bool check_grant_routine()
+   thd		Thread handler
+   want_access  Bits of privileges user needs to have
+   procs	List of routines to check. The user should have 'want_access'
+   sph          SP handler
+   no_errors	If 0 then we write an error. The error is sent directly to
+		the client
+
+   RETURN
+     0  ok
+     1  Error: User did not have the requested privielges
+****************************************************************************/
+
+bool check_grant_routine(THD *thd, privilege_t want_access,
+			 TABLE_LIST *procs, const Sp_handler *sph,
+			 bool no_errors)
+{
+  TABLE_LIST *table;
+  Security_context *sctx= thd->security_ctx;
+  char *user= sctx->priv_user;
+  char *host= sctx->priv_host;
+  char *role= sctx->priv_role;
+  DBUG_ENTER("check_grant_routine");
+
+  want_access&= ~sctx->master_access;
+  if (!want_access)
+    DBUG_RETURN(0);                             // ok
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+  for (table= procs; table; table= table->next_global)
+  {
+    GRANT_NAME *grant_proc;
+    if ((grant_proc= routine_hash_search(host, sctx->ip, table->db.str, user,
+                                         table->table_name.str, sph, 0)))
+      table->grant.privilege|= grant_proc->privs;
+    if (role[0]) /* current role set check */
+    {
+      if ((grant_proc= routine_hash_search("", NULL, table->db.str, role,
+                                           table->table_name.str, sph, 0)))
+      table->grant.privilege|= grant_proc->privs;
+    }
+    if (acl_public)
+    {
+      if ((grant_proc= routine_hash_search("", NULL, table->db.str,
+                                           public_name.str,
+                                           table->table_name.str, sph, 0)))
+      table->grant.privilege|= grant_proc->privs;
+    }
+
+    if (want_access & ~table->grant.privilege)
+    {
+      want_access &= ~table->grant.privilege;
+      goto err;
+    }
+  }
+  mysql_rwlock_unlock(&LOCK_grant);
+  DBUG_RETURN(0);
+err:
+  mysql_rwlock_unlock(&LOCK_grant);
+  if (!no_errors)
+  {
+    char buff[1024];
+    const char *command="";
+    if (table)
+      strxmov(buff, table->db.str, ".", table->table_name.str, NullS);
+    if (want_access & EXECUTE_ACL)
+      command= "execute";
+    else if (want_access & ALTER_PROC_ACL)
+      command= "alter routine";
+    else if (want_access & GRANT_ACL)
+      command= "grant";
+    my_error(ER_PROCACCESS_DENIED_ERROR, MYF(0),
+             command, user, host, table ? buff : "unknown");
+  }
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check if routine has any of the
+  routine level grants
+
+  SYNPOSIS
+   bool    check_routine_level_acl()
+   thd	        Thread handler
+   db           Database name
+   name         Routine name
+
+  RETURN
+   0            Ok
+   1            error
+*/
+
+bool check_routine_level_acl(THD *thd, const char *db, const char *name,
+                             const Sp_handler *sph)
+{
+  bool no_routine_acl= 1;
+  GRANT_NAME *grant_proc;
+  Security_context *sctx= thd->security_ctx;
+  mysql_rwlock_rdlock(&LOCK_grant);
+  if ((grant_proc= routine_hash_search(sctx->priv_host,
+                                       sctx->ip, db,
+                                       sctx->priv_user,
+                                       name, sph, 0)))
+    no_routine_acl= !(grant_proc->privs & SHOW_PROC_ACLS);
+
+  if (no_routine_acl && sctx->priv_role[0]) /* current set role check */
+  {
+    if ((grant_proc= routine_hash_search("",
+                                         NULL, db,
+                                         sctx->priv_role,
+                                         name, sph, 0)))
+      no_routine_acl= !(grant_proc->privs & SHOW_PROC_ACLS);
+  }
+  mysql_rwlock_unlock(&LOCK_grant);
+  return no_routine_acl;
+}
+
+
+/*****************************************************************************
+  Functions to retrieve the grant for a table/column  (for SHOW functions)
+*****************************************************************************/
+
+privilege_t get_table_grant(THD *thd, TABLE_LIST *table)
+{
+  Security_context *sctx= thd->security_ctx;
+  const char *db = table->db.str ? table->db.str : thd->db.str;
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+  table->grant.read(sctx, db, table->table_name.str);
+  table->grant.privilege|= table->grant.aggregate_privs();
+  privilege_t privilege(table->grant.privilege);
+  mysql_rwlock_unlock(&LOCK_grant);
+  return privilege;
+}
+
+
+/*
+  Determine the access priviliges for a field.
+
+  SYNOPSIS
+    get_column_grant()
+    thd         thread handler
+    grant       grants table descriptor
+    db_name     name of database that the field belongs to
+    table_name  name of table that the field belongs to
+    field_name  name of field
+
+  DESCRIPTION
+    The procedure may also modify: grant->grant_table and grant->version.
+
+  RETURN
+    The access priviliges for the field db_name.table_name.field_name
+*/
+
+privilege_t get_column_grant(THD *thd, GRANT_INFO *grant,
+                        const char *db_name, const char *table_name,
+                        const char *field_name)
+{
+  GRANT_TABLE *grant_table;
+  GRANT_TABLE *grant_table_role;
+  GRANT_TABLE *grant_public;
+  GRANT_COLUMN *grant_column;
+  privilege_t priv(NO_ACL);
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+  /* reload table if someone has modified any grants */
+  grant->refresh(thd->security_ctx, db_name, table_name);
+
+  grant_table= grant->grant_table_user;
+  grant_table_role= grant->grant_table_role;
+  grant_public= grant->grant_public;
+
+  if (!grant_table && !grant_table_role && !grant_public)
+    priv= grant->privilege;
+  else
+  {
+    if (grant_table)
+    {
+      grant_column= column_hash_search(grant_table, field_name,
+                                       (uint) strlen(field_name));
+      if (!grant_column)
+        priv= (grant->privilege | grant_table->privs);
+      else
+        priv= (grant->privilege | grant_table->privs | grant_column->rights);
+    }
+
+    if (grant_table_role)
+    {
+      grant_column= column_hash_search(grant_table_role, field_name,
+                                       (uint) strlen(field_name));
+      if (!grant_column)
+        priv|= (grant->privilege | grant_table_role->privs);
+      else
+        priv|= (grant->privilege | grant_table_role->privs |
+                grant_column->rights);
+    }
+    if (grant_public)
+    {
+      grant_column= column_hash_search(grant_public, field_name,
+                                       (uint) strlen(field_name));
+      if (!grant_column)
+        priv|= (grant->privilege | grant_public->privs);
+      else
+        priv|= (grant->privilege | grant_public->privs |
+                grant_column->rights);
+    }
+  }
+  mysql_rwlock_unlock(&LOCK_grant);
+  return priv;
+}
+
+
+/* Help function for mysql_show_grants */
+
+static void add_user_option(String *grant, long value, const char *name,
+                            bool is_signed)
+{
+  if (value)
+  {
+    char buff[22], *p; // just as in int2str
+    grant->append(' ');
+    grant->append(name, strlen(name));
+    grant->append(' ');
+    p=int10_to_str(value, buff, is_signed ? -10 : 10);
+    grant->append(buff,p-buff);
+  }
+}
+
+
+static void add_user_option(String *grant, double value, const char *name)
+{
+  if (value != 0.0 )
+  {
+    char buff[FLOATING_POINT_BUFFER];
+    size_t len;
+    grant->append(' ');
+    grant->append(name, strlen(name));
+    grant->append(' ');
+    len= my_fcvt(value, 6, buff, NULL);
+    grant->append(buff, len);
+  }
+}
+
+static void add_user_parameters(THD *thd, String *result, ACL_USER* acl_user,
+                                bool with_grant)
+{
+  result->append('@');
+  append_identifier(thd, result, acl_user->host.hostname,
+                    acl_user->hostname_length);
+
+  if (acl_user->nauth == 1 &&
+      (acl_user->auth->plugin.str == native_password_plugin_name.str ||
+       acl_user->auth->plugin.str == old_password_plugin_name.str))
+  {
+    if (acl_user->auth->auth_string.length)
+    {
+      result->append(STRING_WITH_LEN(" IDENTIFIED BY PASSWORD '"));
+      result->append(&acl_user->auth->auth_string);
+      result->append('\'');
+    }
+  }
+  else
+  {
+    result->append(STRING_WITH_LEN(" IDENTIFIED VIA "));
+    for (uint i=0; i < acl_user->nauth; i++)
+    {
+      if (i)
+        result->append(STRING_WITH_LEN(" OR "));
+      result->append(&acl_user->auth[i].plugin);
+      if (acl_user->auth[i].auth_string.length)
+      {
+        result->append(STRING_WITH_LEN(" USING '"));
+        result->append(&acl_user->auth[i].auth_string);
+        result->append('\'');
+      }
+    }
+  }
+  /* "show grants" SSL related stuff */
+  if (acl_user->ssl_type == SSL_TYPE_ANY)
+    result->append(STRING_WITH_LEN(" REQUIRE SSL"));
+  else if (acl_user->ssl_type == SSL_TYPE_X509)
+    result->append(STRING_WITH_LEN(" REQUIRE X509"));
+  else if (acl_user->ssl_type == SSL_TYPE_SPECIFIED)
+  {
+    int ssl_options = 0;
+    result->append(STRING_WITH_LEN(" REQUIRE "));
+    if (acl_user->x509_issuer[0])
+    {
+      ssl_options++;
+      result->append(STRING_WITH_LEN("ISSUER \'"));
+      result->append(acl_user->x509_issuer,strlen(acl_user->x509_issuer));
+      result->append('\'');
+    }
+    if (acl_user->x509_subject[0])
+    {
+      if (ssl_options++)
+        result->append(' ');
+      result->append(STRING_WITH_LEN("SUBJECT \'"));
+      result->append(acl_user->x509_subject,strlen(acl_user->x509_subject),
+                    system_charset_info);
+      result->append('\'');
+    }
+    if (acl_user->ssl_cipher)
+    {
+      if (ssl_options++)
+        result->append(' ');
+      result->append(STRING_WITH_LEN("CIPHER '"));
+      result->append(acl_user->ssl_cipher,strlen(acl_user->ssl_cipher),
+                    system_charset_info);
+      result->append('\'');
+    }
+  }
+  if (with_grant ||
+      (acl_user->user_resource.questions ||
+       acl_user->user_resource.updates ||
+       acl_user->user_resource.conn_per_hour ||
+       acl_user->user_resource.user_conn ||
+       acl_user->user_resource.max_statement_time != 0.0))
+  {
+    result->append(STRING_WITH_LEN(" WITH"));
+    if (with_grant)
+      result->append(STRING_WITH_LEN(" GRANT OPTION"));
+    add_user_option(result, acl_user->user_resource.questions,
+                    "MAX_QUERIES_PER_HOUR", false);
+    add_user_option(result, acl_user->user_resource.updates,
+                    "MAX_UPDATES_PER_HOUR", false);
+    add_user_option(result, acl_user->user_resource.conn_per_hour,
+                    "MAX_CONNECTIONS_PER_HOUR", false);
+    add_user_option(result, acl_user->user_resource.user_conn,
+                    "MAX_USER_CONNECTIONS", true);
+    add_user_option(result, acl_user->user_resource.max_statement_time,
+                    "MAX_STATEMENT_TIME");
+  }
+}
+
+static const char *command_array[]=
+{
+  "SELECT", "INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "RELOAD",
+  "SHUTDOWN", "PROCESS","FILE", "GRANT", "REFERENCES", "INDEX",
+  "ALTER", "SHOW DATABASES", "SUPER", "CREATE TEMPORARY TABLES",
+  "LOCK TABLES", "EXECUTE", "REPLICATION SLAVE", "BINLOG MONITOR",
+  "CREATE VIEW", "SHOW VIEW", "CREATE ROUTINE", "ALTER ROUTINE",
+  "CREATE USER", "EVENT", "TRIGGER", "CREATE TABLESPACE", "DELETE HISTORY",
+  "SET USER", "FEDERATED ADMIN", "CONNECTION ADMIN", "READ_ONLY ADMIN",
+  "REPLICATION SLAVE ADMIN", "REPLICATION MASTER ADMIN", "BINLOG ADMIN",
+  "BINLOG REPLAY", "SLAVE MONITOR"
+};
+
+static uint command_lengths[]=
+{
+  6, 6, 6, 6, 6, 4, 6,
+  8, 7, 4, 5, 10, 5,
+  5, 14, 5, 23,
+  11, 7, 17, 14,
+  11, 9, 14, 13,
+  11, 5, 7, 17, 14,
+  8, 15, 16, 15,
+  23, 24, 12,
+  13, 13
+};
+
+
+static_assert(array_elements(command_array) == PRIVILEGE_T_MAX_BIT + 1,
+              "The definition of command_array does not match privilege_t");
+static_assert(array_elements(command_lengths) == PRIVILEGE_T_MAX_BIT + 1,
+              "The definition of command_lengths does not match privilege_t");
+
+
+static bool print_grants_for_role(THD *thd, ACL_ROLE * role)
+{
+  char buff[1024];
+
+  if (show_role_grants(thd, "", role, buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_global_privileges(thd, role, TRUE, buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_database_privileges(thd, role->user.str, "", buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_table_and_column_privileges(thd, role->user.str, "", buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_routine_grants(thd, role->user.str, "", &sp_handler_procedure,
+                          buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_routine_grants(thd, role->user.str, "", &sp_handler_function,
+                          buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_routine_grants(thd, role->user.str, "", &sp_handler_package_spec,
+                          buff, sizeof(buff)))
+    return TRUE;
+
+  if (show_routine_grants(thd, role->user.str, "", &sp_handler_package_body,
+                          buff, sizeof(buff)))
+    return TRUE;
+
+  return FALSE;
+
+}
+
+static void append_auto_expiration_policy(ACL_USER *acl_user, String *r) {
+    if (!acl_user->password_lifetime)
+      r->append(STRING_WITH_LEN(" PASSWORD EXPIRE NEVER"));
+    else if (acl_user->password_lifetime > 0)
+    {
+      r->append(STRING_WITH_LEN(" PASSWORD EXPIRE INTERVAL "));
+      r->append_longlong(acl_user->password_lifetime);
+      r->append(STRING_WITH_LEN(" DAY"));
+    }
+}
+
+bool mysql_show_create_user(THD *thd, LEX_USER *lex_user)
+{
+  const char *username= NULL, *hostname= NULL;
+  char buff[1024]; //Show create user should not take more than 1024 bytes.
+  Protocol *protocol= thd->protocol;
+  bool error= false;
+  ACL_USER *acl_user;
+  uint head_length;
+  DBUG_ENTER("mysql_show_create_user");
+
+  if (!initialized)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
+    DBUG_RETURN(TRUE);
+  }
+  if (get_show_user(thd, lex_user, &username, &hostname, NULL))
+    DBUG_RETURN(TRUE);
+
+  List field_list;
+  head_length= (uint) (strxmov(buff, "CREATE USER for ", username, "@",
+                               hostname, NullS) - buff);
+  Item_string *field = new (thd->mem_root) Item_string_ascii(thd, "", 0);
+  if (!field)
+    DBUG_RETURN(true);                          // Error given my my_alloc()
+
+  field->name.str= buff;
+  field->name.length= head_length;
+  field->max_length= sizeof(buff);
+  field_list.push_back(field, thd->mem_root);
+  if (protocol->send_result_set_metadata(&field_list,
+                                         Protocol::SEND_NUM_ROWS |
+                                         Protocol::SEND_EOF))
+    DBUG_RETURN(true);
+
+  String result(buff, sizeof(buff), system_charset_info);
+  result.length(0);
+  mysql_rwlock_rdlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  acl_user= find_user_exact(hostname, username);
+
+  // User not found in the internal data structures.
+  if (!acl_user)
+  {
+    my_error(ER_PASSWORD_NO_MATCH, MYF(0));
+    error= true;
+    goto end;
+  }
+
+  result.append(STRING_WITH_LEN("CREATE USER "));
+  append_identifier(thd, &result, username, strlen(username));
+  add_user_parameters(thd, &result, acl_user, false);
+
+  if (acl_user->account_locked)
+    result.append(STRING_WITH_LEN(" ACCOUNT LOCK"));
+
+  if (acl_user->password_expired)
+    result.append(STRING_WITH_LEN(" PASSWORD EXPIRE"));
+  else
+    append_auto_expiration_policy(acl_user, &result);
+
+  protocol->prepare_for_resend();
+  protocol->store(result.ptr(), result.length(), result.charset());
+  if (protocol->write())
+  {
+    error= true;
+  }
+
+  /* MDEV-24114 - PASSWORD EXPIRE and PASSWORD EXPIRE [NEVER | INTERVAL X DAY]
+   are two different mechanisms. To make sure a tool can restore the state
+   of a user account, including both the manual expiration state of the
+   account and the automatic expiration policy attached to it, we should
+   print two statements here, a CREATE USER (printed above) and an ALTER USER */
+  if (acl_user->password_expired && acl_user->password_lifetime > -1)
+  {
+    result.length(0);
+    result.append(STRING_WITH_LEN("ALTER USER "));
+    append_identifier(thd, &result, username, strlen(username));
+    result.append('@');
+    append_identifier(thd, &result, acl_user->host.hostname,
+                      acl_user->hostname_length);
+    append_auto_expiration_policy(acl_user, &result);
+    protocol->prepare_for_resend();
+    protocol->store(result.ptr(), result.length(), result.charset());
+    if (protocol->write())
+    {
+      error= true;
+    }
+  }
+
+  my_eof(thd);
+
+end:
+  mysql_rwlock_unlock(&LOCK_grant);
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  DBUG_RETURN(error);
+}
+
+
+static int show_grants_callback(ACL_USER_BASE *role, void *data)
+{
+  THD *thd= (THD *)data;
+  DBUG_ASSERT(role->flags & IS_ROLE);
+  if (print_grants_for_role(thd, (ACL_ROLE *)role))
+    return -1;
+  return 0;
+}
+
+void mysql_show_grants_get_fields(THD *thd, List *fields,
+                                  const char *name, size_t length)
+{
+  Item_string *field=new (thd->mem_root) Item_string_ascii(thd, "", 0);
+  /* Set name explicit to avoid character set conversions */
+  field->name.str= name;
+  field->name.length= length;
+  field->max_length=1024;
+  fields->push_back(field, thd->mem_root);
+}
+
+/** checks privileges for SHOW GRANTS and SHOW CREATE USER
+
+  @note that in case of SHOW CREATE USER the parser guarantees
+  that a role can never happen here, so *rolename will never
+  be assigned to
+*/
+bool get_show_user(THD *thd, LEX_USER *lex_user, const char **username,
+                   const char **hostname, const char **rolename)
+{
+  if (lex_user->user.str == current_user.str)
+  {
+    *username= thd->security_ctx->priv_user;
+    *hostname= thd->security_ctx->priv_host;
+    return 0;
+  }
+  if (lex_user->user.str == current_role.str)
+  {
+    *rolename= thd->security_ctx->priv_role;
+    return 0;
+  }
+  if (lex_user->user.str == current_user_and_current_role.str)
+  {
+    *username= thd->security_ctx->priv_user;
+    *hostname= thd->security_ctx->priv_host;
+    *rolename= thd->security_ctx->priv_role;
+    return 0;
+  }
+
+  Security_context *sctx= thd->security_ctx;
+  bool do_check_access;
+
+  if (!(lex_user= get_current_user(thd, lex_user)))
+    return 1;
+
+  if (lex_user->is_role())
+  {
+    *rolename= lex_user->user.str;
+    do_check_access= !is_public(lex_user) && strcmp(*rolename, sctx->priv_role);
+  }
+  else
+  {
+    *username= lex_user->user.str;
+    *hostname= lex_user->host.str;
+    do_check_access= strcmp(*username, sctx->priv_user) ||
+                     strcmp(*hostname, sctx->priv_host);
+  }
+
+  if (do_check_access && check_access(thd, SELECT_ACL, "mysql", 0, 0, 1, 0))
+    return 1;
+  return 0;
+}
+
+/*
+  SHOW GRANTS;  Send grants for a user to the client
+
+  IMPLEMENTATION
+   Send to client grant-like strings depicting user@host privileges
+*/
+
+bool mysql_show_grants(THD *thd, LEX_USER *lex_user)
+{
+  int  error = -1;
+  ACL_USER *UNINIT_VAR(acl_user);
+  ACL_ROLE *acl_role= NULL;
+  char buff[1024];
+  Protocol *protocol= thd->protocol;
+  const char *username= NULL, *hostname= NULL, *rolename= NULL, *end;
+  DBUG_ENTER("mysql_show_grants");
+
+  if (!initialized)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
+    DBUG_RETURN(TRUE);
+  }
+
+  if (get_show_user(thd, lex_user, &username, &hostname, &rolename))
+    DBUG_RETURN(TRUE);
+
+  DBUG_ASSERT(rolename || username);
+
+  List field_list;
+  if (username)
+    end= strxmov(buff,"Grants for ",username,"@",hostname, NullS);
+  else
+    end= strxmov(buff,"Grants for ",rolename, NullS);
+
+  mysql_show_grants_get_fields(thd, &field_list, buff, (uint) (end-buff));
+
+  if (protocol->send_result_set_metadata(&field_list,
+                               Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+    DBUG_RETURN(TRUE);
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  if (username)
+  {
+    acl_user= find_user_exact(hostname, username);
+    if (!acl_user)
+    {
+      mysql_mutex_unlock(&acl_cache->lock);
+      mysql_rwlock_unlock(&LOCK_grant);
+
+      my_error(ER_NONEXISTING_GRANT, MYF(0),
+               username, hostname);
+      DBUG_RETURN(TRUE);
+    }
+
+    /* Show granted roles to acl_user */
+    if (show_role_grants(thd, hostname, acl_user, buff, sizeof(buff)))
+      goto end;
+
+    /* Add first global access grants */
+    if (show_global_privileges(thd, acl_user, FALSE, buff, sizeof(buff)))
+      goto end;
+
+    /* Add database access */
+    if (show_database_privileges(thd, username, hostname, buff, sizeof(buff)))
+      goto end;
+
+    /* Add table & column access */
+    if (show_table_and_column_privileges(thd, username, hostname, buff, sizeof(buff)))
+      goto end;
+
+    if (show_routine_grants(thd, username, hostname, &sp_handler_procedure,
+                            buff, sizeof(buff)))
+      goto end;
+
+    if (show_routine_grants(thd, username, hostname, &sp_handler_function,
+                            buff, sizeof(buff)))
+      goto end;
+
+    if (show_routine_grants(thd, username, hostname, &sp_handler_package_spec,
+                            buff, sizeof(buff)))
+      goto end;
+
+    if (show_routine_grants(thd, username, hostname, &sp_handler_package_body,
+                            buff, sizeof(buff)))
+      goto end;
+
+    if (show_proxy_grants(thd, username, hostname, buff, sizeof(buff)))
+      goto end;
+  }
+
+  if (rolename)
+  {
+    acl_role= find_acl_role(rolename, true);
+    if (acl_role)
+    {
+      /* get a list of all inherited roles */
+      traverse_role_graph_down(acl_role, thd, show_grants_callback, NULL);
+    }
+    else
+    {
+      if (lex_user->user.str == current_role.str)
+      {
+        mysql_mutex_unlock(&acl_cache->lock);
+        mysql_rwlock_unlock(&LOCK_grant);
+        my_error(ER_NONEXISTING_GRANT, MYF(0),
+                 thd->security_ctx->priv_user,
+                 thd->security_ctx->priv_host);
+        DBUG_RETURN(TRUE);
+      }
+    }
+  }
+
+  if (username && rolename) // show everything, incl. PUBLIC
+  {
+    if (acl_public)
+      traverse_role_graph_down(acl_public, thd, show_grants_callback, NULL);
+  }
+
+  if (username)
+  {
+    /* Show default role to acl_user */
+    if (show_default_role(thd, acl_user, buff, sizeof(buff)))
+      goto end;
+  }
+
+
+  error= 0;
+end:
+  mysql_mutex_unlock(&acl_cache->lock);
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  my_eof(thd);
+  DBUG_RETURN(error);
+}
+
+static ROLE_GRANT_PAIR *find_role_grant_pair(const LEX_CSTRING *u,
+                                             const LEX_CSTRING *h,
+                                             const LEX_CSTRING *r)
+{
+  char buf[1024];
+  String pair_key(buf, sizeof(buf), &my_charset_bin);
+
+  size_t key_length= u->length + h->length + r->length + 3;
+  pair_key.alloc(key_length);
+
+  strmov(strmov(strmov(const_cast(pair_key.ptr()),
+                       safe_str(u->str)) + 1, h->str) + 1, r->str);
+
+  return (ROLE_GRANT_PAIR *)
+    my_hash_search(&acl_roles_mappings, (uchar*)pair_key.ptr(), key_length);
+}
+
+static bool show_default_role(THD *thd, ACL_USER *acl_entry,
+                              char *buff, size_t buffsize)
+{
+  Protocol *protocol= thd->protocol;
+  LEX_CSTRING def_rolename= acl_entry->default_rolename;
+
+  if (def_rolename.length)
+  {
+    String def_str(buff, buffsize, system_charset_info);
+    def_str.length(0);
+    def_str.append(STRING_WITH_LEN("SET DEFAULT ROLE "));
+    append_identifier(thd, &def_str, def_rolename.str, def_rolename.length);
+    def_str.append(STRING_WITH_LEN(" FOR "));
+    append_identifier(thd, &def_str, acl_entry->user.str, acl_entry->user.length);
+    DBUG_ASSERT(!(acl_entry->flags & IS_ROLE));
+    def_str.append('@');
+    append_identifier(thd, &def_str, acl_entry->host.hostname,
+                      acl_entry->hostname_length);
+    protocol->prepare_for_resend();
+    protocol->store(def_str.ptr(),def_str.length(),def_str.charset());
+    if (protocol->write())
+    {
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+static bool show_role_grants(THD *thd, const char *hostname,
+                             ACL_USER_BASE *acl_entry,
+                             char *buff, size_t buffsize)
+{
+  size_t counter;
+  Protocol *protocol= thd->protocol;
+  LEX_CSTRING host= {const_cast(hostname), strlen(hostname)};
+
+  String grant(buff, buffsize, system_charset_info);
+  for (counter= 0; counter < acl_entry->role_grants.elements; counter++)
+  {
+    grant.length(0);
+    grant.append(STRING_WITH_LEN("GRANT "));
+    ACL_ROLE *acl_role= *(dynamic_element(&acl_entry->role_grants, counter,
+                                          ACL_ROLE**));
+    append_identifier(thd, &grant, acl_role->user.str, acl_role->user.length);
+    grant.append(STRING_WITH_LEN(" TO "));
+    if (acl_entry == acl_public)
+      grant.append(public_name);
+    else
+      append_identifier(thd, &grant, acl_entry->user.str, acl_entry->user.length);
+    if (!(acl_entry->flags & IS_ROLE))
+    {
+      grant.append('@');
+      append_identifier(thd, &grant, host.str, host.length);
+    }
+
+    ROLE_GRANT_PAIR *pair=
+      find_role_grant_pair(&acl_entry->user, &host, &acl_role->user);
+    DBUG_ASSERT(pair);
+
+    if (pair->with_admin)
+      grant.append(STRING_WITH_LEN(" WITH ADMIN OPTION"));
+
+    protocol->prepare_for_resend();
+    protocol->store(grant.ptr(),grant.length(),grant.charset());
+    if (protocol->write())
+    {
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+static bool show_global_privileges(THD *thd, ACL_USER_BASE *acl_entry,
+                                   bool handle_as_role,
+                                   char *buff, size_t buffsize)
+{
+  uint counter;
+  privilege_t want_access(NO_ACL);
+  Protocol *protocol= thd->protocol;
+
+  String global(buff, buffsize, system_charset_info);
+  global.length(0);
+  global.append(STRING_WITH_LEN("GRANT "));
+
+  if (handle_as_role)
+    want_access= ((ACL_ROLE *)acl_entry)->initial_role_access;
+  else
+    want_access= acl_entry->access;
+
+  // suppress "GRANT USAGE ON *.* TO `PUBLIC`"
+  if (!(want_access & ~GRANT_ACL) && acl_entry == acl_public)
+    return FALSE;
+
+  if (test_all_bits(want_access, (GLOBAL_ACLS & ~ GRANT_ACL)))
+    global.append(STRING_WITH_LEN("ALL PRIVILEGES"));
+  else if (!(want_access & ~GRANT_ACL))
+    global.append(STRING_WITH_LEN("USAGE"));
+  else
+  {
+    bool found=0;
+    ulonglong j;
+    privilege_t test_access(want_access & ~GRANT_ACL);
+    for (counter=0, j = SELECT_ACL;j <= GLOBAL_ACLS;counter++,j <<= 1)
+    {
+      if (test_access & j)
+      {
+        if (found)
+          global.append(STRING_WITH_LEN(", "));
+        found=1;
+        global.append(command_array[counter],command_lengths[counter]);
+      }
+    }
+  }
+  global.append (STRING_WITH_LEN(" ON *.* TO "));
+  if (acl_entry == acl_public)
+    global.append(public_name);
+  else
+    append_identifier(thd, &global, acl_entry->user.str, acl_entry->user.length);
+
+  if (!handle_as_role)
+    add_user_parameters(thd, &global, (ACL_USER *)acl_entry,
+                        (want_access & GRANT_ACL));
+
+  else if (want_access & GRANT_ACL)
+    global.append(STRING_WITH_LEN(" WITH GRANT OPTION"));
+  protocol->prepare_for_resend();
+  protocol->store(global.ptr(),global.length(),global.charset());
+  if (protocol->write())
+    return TRUE;
+
+  return FALSE;
+
+}
+
+
+static void add_to_user(THD *thd, String *result, const char *user,
+                        bool is_user, const char *host)
+{
+  result->append(STRING_WITH_LEN(" TO "));
+  if (is_public(user))
+    result->append(public_name);
+  else
+    append_identifier(thd, result, user, strlen(user));
+  if (is_user)
+  {
+    result->append('@');
+    // host and lex_user->host are equal except for case
+    append_identifier(thd, result, host, strlen(host));
+  }
+}
+
+
+static bool show_database_privileges(THD *thd, const char *username,
+                                     const char *hostname,
+                                     char *buff, size_t buffsize)
+{
+  privilege_t want_access(NO_ACL);
+  Protocol *protocol= thd->protocol;
+
+  for (size_t i=0 ; i < acl_dbs.elements() ; i++)
+  {
+    const char *user, *host;
+
+    ACL_DB *acl_db= &acl_dbs.at(i);
+    user= acl_db->user;
+    host=acl_db->host.hostname;
+
+    /*
+      We do not make SHOW GRANTS case-sensitive here (like REVOKE),
+      but make it case-insensitive because that's the way they are
+      actually applied, and showing fewer privileges than are applied
+      would be wrong from a security point of view.
+    */
+
+    if (!strcmp(username, user) &&
+        !my_strcasecmp(system_charset_info, hostname, host))
+    {
+      /*
+        do not print inherited access bits for roles,
+        the role bits present in the table are what matters
+      */
+      if (*hostname) // User
+        want_access=acl_db->access;
+      else // Role
+        want_access=acl_db->initial_access;
+      if (want_access)
+      {
+        String db(buff, buffsize, system_charset_info);
+        db.length(0);
+        db.append(STRING_WITH_LEN("GRANT "));
+
+        if (test_all_bits(want_access,(DB_ACLS & ~GRANT_ACL)))
+          db.append(STRING_WITH_LEN("ALL PRIVILEGES"));
+        else if (!(want_access & ~GRANT_ACL))
+          db.append(STRING_WITH_LEN("USAGE"));
+        else
+        {
+          int found=0, cnt;
+          ulonglong j;
+          privilege_t test_access(want_access & ~GRANT_ACL);
+          for (cnt=0, j = SELECT_ACL; j <= DB_ACLS; cnt++,j <<= 1)
+          {
+            if (test_access & j)
+            {
+              if (found)
+                db.append(STRING_WITH_LEN(", "));
+              found = 1;
+              db.append(command_array[cnt],command_lengths[cnt]);
+            }
+          }
+        }
+        db.append (STRING_WITH_LEN(" ON "));
+        append_identifier(thd, &db, acl_db->db, strlen(acl_db->db));
+        db.append (STRING_WITH_LEN(".*"));
+        add_to_user(thd, &db, username, (*hostname), host);
+        if (want_access & GRANT_ACL)
+          db.append(STRING_WITH_LEN(" WITH GRANT OPTION"));
+        protocol->prepare_for_resend();
+        protocol->store(db.ptr(),db.length(),db.charset());
+        if (protocol->write())
+        {
+          return TRUE;
+        }
+      }
+    }
+  }
+  return FALSE;
+
+}
+
+static bool show_table_and_column_privileges(THD *thd, const char *username,
+                                             const char *hostname,
+                                             char *buff, size_t buffsize)
+{
+  uint counter, index;
+  Protocol *protocol= thd->protocol;
+
+  for (index=0 ; index < column_priv_hash.records ; index++)
+  {
+    const char *user, *host;
+    GRANT_TABLE *grant_table= (GRANT_TABLE*)
+      my_hash_element(&column_priv_hash, index);
+
+    user= grant_table->user;
+    host= grant_table->host.hostname;
+
+    /*
+      We do not make SHOW GRANTS case-sensitive here (like REVOKE),
+      but make it case-insensitive because that's the way they are
+      actually applied, and showing fewer privileges than are applied
+      would be wrong from a security point of view.
+    */
+
+    if (!strcmp(username,user) &&
+        !my_strcasecmp(system_charset_info, hostname, host))
+    {
+      privilege_t table_access(NO_ACL);
+      privilege_t cols_access(NO_ACL);
+      if (*hostname) // User
+      {
+        table_access= grant_table->privs;
+        cols_access= grant_table->cols;
+      }
+      else // Role
+      {
+        table_access= grant_table->init_privs;
+        cols_access= grant_table->init_cols;
+      }
+
+      if ((table_access | cols_access) != NO_ACL)
+      {
+        String global(buff, sizeof(buff), system_charset_info);
+        privilege_t test_access= (table_access | cols_access) & ~GRANT_ACL;
+
+        global.length(0);
+        global.append(STRING_WITH_LEN("GRANT "));
+
+        if (test_all_bits(table_access, (TABLE_ACLS & ~GRANT_ACL)))
+          global.append(STRING_WITH_LEN("ALL PRIVILEGES"));
+        else if (!test_access)
+          global.append(STRING_WITH_LEN("USAGE"));
+        else
+        {
+          /* Add specific column access */
+          int found= 0;
+          ulonglong j;
+
+          for (counter= 0, j= SELECT_ACL; j <= TABLE_ACLS; counter++, j<<= 1)
+          {
+            if (test_access & j)
+            {
+              if (found)
+                global.append(STRING_WITH_LEN(", "));
+              found= 1;
+              global.append(command_array[counter],command_lengths[counter]);
+
+              if (grant_table->cols)
+              {
+                uint found_col= 0;
+                HASH *hash_columns;
+                hash_columns= &grant_table->hash_columns;
+
+                for (uint col_index=0 ;
+                     col_index < hash_columns->records ;
+                     col_index++)
+                {
+                  GRANT_COLUMN *grant_column = (GRANT_COLUMN*)
+                    my_hash_element(hash_columns,col_index);
+                  if (j & (*hostname ? grant_column->rights         // User
+                                     : grant_column->init_rights))  // Role
+                  {
+                    if (!found_col)
+                    {
+                      found_col= 1;
+                      /*
+                        If we have a duplicated table level privilege, we
+                        must write the access privilege name again.
+                      */
+                      if (table_access & j)
+                      {
+                        global.append(STRING_WITH_LEN(", "));
+                        global.append(command_array[counter],
+                                      command_lengths[counter]);
+                      }
+                      global.append(STRING_WITH_LEN(" ("));
+                    }
+                    else
+                      global.append(STRING_WITH_LEN(", "));
+                    append_identifier(thd, &global, grant_column->column,
+                                      grant_column->key_length);
+                  }
+                }
+                if (found_col)
+                  global.append(')');
+              }
+            }
+          }
+        }
+        global.append(STRING_WITH_LEN(" ON "));
+        append_identifier(thd, &global, grant_table->db,
+                          strlen(grant_table->db));
+        global.append('.');
+        append_identifier(thd, &global, grant_table->tname,
+                          strlen(grant_table->tname));
+        add_to_user(thd, &global, username, (*hostname), host);
+        if (table_access & GRANT_ACL)
+          global.append(STRING_WITH_LEN(" WITH GRANT OPTION"));
+        protocol->prepare_for_resend();
+        protocol->store(global.ptr(),global.length(),global.charset());
+        if (protocol->write())
+        {
+          return TRUE;
+        }
+      }
+    }
+  }
+  return FALSE;
+
+}
+
+static int show_routine_grants(THD* thd, const char *username,
+                               const char *hostname, const Sp_handler *sph,
+                               char *buff, int buffsize)
+{
+  uint counter, index;
+  int error= 0;
+  Protocol *protocol= thd->protocol;
+  HASH *hash= sph->get_priv_hash();
+  /* Add routine access */
+  for (index=0 ; index < hash->records ; index++)
+  {
+    const char *user, *host;
+    GRANT_NAME *grant_proc= (GRANT_NAME*) my_hash_element(hash, index);
+
+    user= grant_proc->user;
+    host= grant_proc->host.hostname;
+
+    /*
+      We do not make SHOW GRANTS case-sensitive here (like REVOKE),
+      but make it case-insensitive because that's the way they are
+      actually applied, and showing fewer privileges than are applied
+      would be wrong from a security point of view.
+    */
+
+    if (!strcmp(username, user) &&
+        !my_strcasecmp(system_charset_info, hostname, host))
+    {
+      privilege_t proc_access(NO_ACL);
+      if (*hostname) // User
+        proc_access= grant_proc->privs;
+      else // Role
+        proc_access= grant_proc->init_privs;
+
+      if (proc_access != NO_ACL)
+      {
+	String global(buff, buffsize, system_charset_info);
+	privilege_t test_access(proc_access & ~GRANT_ACL);
+
+	global.length(0);
+	global.append(STRING_WITH_LEN("GRANT "));
+
+	if (!test_access)
+ 	  global.append(STRING_WITH_LEN("USAGE"));
+	else
+	{
+          /* Add specific procedure access */
+	  int found= 0;
+	  ulonglong j;
+
+	  for (counter= 0, j= SELECT_ACL; j <= PROC_ACLS; counter++, j<<= 1)
+	  {
+	    if (test_access & j)
+	    {
+	      if (found)
+		global.append(STRING_WITH_LEN(", "));
+	      found= 1;
+	      global.append(command_array[counter],command_lengths[counter]);
+	    }
+	  }
+	}
+	global.append(STRING_WITH_LEN(" ON "));
+        LEX_CSTRING tmp= sph->type_lex_cstring();
+        global.append(&tmp);
+        global.append(' ');
+	append_identifier(thd, &global, grant_proc->db,
+			  strlen(grant_proc->db));
+	global.append('.');
+	append_identifier(thd, &global, grant_proc->tname,
+			  strlen(grant_proc->tname));
+        add_to_user(thd, &global, username, (*hostname), host);
+	if (proc_access & GRANT_ACL)
+	  global.append(STRING_WITH_LEN(" WITH GRANT OPTION"));
+	protocol->prepare_for_resend();
+	protocol->store(global.ptr(),global.length(),global.charset());
+	if (protocol->write())
+	{
+	  error= -1;
+	  break;
+	}
+      }
+    }
+  }
+  return error;
+}
+
+
+/*
+  Make a clear-text version of the requested privilege.
+*/
+
+void get_privilege_desc(char *to, uint max_length, privilege_t access_arg)
+{
+  uint pos;
+  char *start=to;
+  DBUG_ASSERT(max_length >= 30);                // For end ', ' removal
+
+  if (ulonglong access= access_arg)
+  {
+    max_length--;				// Reserve place for end-zero
+    for (pos=0 ; access ; pos++, access>>=1)
+    {
+      if ((access & 1) &&
+	  command_lengths[pos] + (uint) (to-start) < max_length)
+      {
+	to= strmov(to, command_array[pos]);
+        *to++= ',';
+        *to++= ' ';
+      }
+    }
+    to--;                                       // Remove end ' '
+    to--;					// Remove end ','
+  }
+  *to=0;
+}
+
+
+void get_mqh(const char *user, const char *host, USER_CONN *uc)
+{
+  ACL_USER *acl_user;
+
+  mysql_mutex_lock(&acl_cache->lock);
+
+  if (initialized && (acl_user= find_user_wild(host,user)))
+    uc->user_resources= acl_user->user_resource;
+  else
+    bzero((char*) &uc->user_resources, sizeof(uc->user_resources));
+
+  mysql_mutex_unlock(&acl_cache->lock);
+}
+
+/*
+  Modify a privilege table.
+
+  SYNOPSIS
+    modify_grant_table()
+    table                       The table to modify.
+    host_field                  The host name field.
+    user_field                  The user name field.
+    user_to                     The new name for the user if to be renamed,
+                                NULL otherwise.
+
+  DESCRIPTION
+  Update user/host in the current record if user_to is not NULL.
+  Delete the current record if user_to is NULL.
+
+  RETURN
+    0           OK.
+    != 0        Error.
+*/
+
+static int modify_grant_table(TABLE *table, Field *host_field,
+                              Field *user_field, LEX_USER *user_to)
+{
+  int error;
+  DBUG_ENTER("modify_grant_table");
+
+  if (user_to)
+  {
+    /* rename */
+    store_record(table, record[1]);
+    host_field->store(user_to->host.str, user_to->host.length,
+                      system_charset_info);
+    user_field->store(user_to->user.str, user_to->user.length,
+                      system_charset_info);
+    if (unlikely(error= table->file->ha_update_row(table->record[1],
+                                                   table->record[0])) &&
+        error != HA_ERR_RECORD_IS_THE_SAME)
+      table->file->print_error(error, MYF(0));
+    else
+      error= 0;
+  }
+  else
+  {
+    /* delete */
+    if (unlikely((error=table->file->ha_delete_row(table->record[0]))))
+      table->file->print_error(error, MYF(0));
+  }
+
+  DBUG_RETURN(error);
+}
+
+/*
+  Handle the roles_mapping privilege table
+*/
+static int handle_roles_mappings_table(TABLE *table, bool drop,
+                                       LEX_USER *user_from, LEX_USER *user_to)
+{
+  /*
+    All entries (Host, User) that match user_from will be renamed,
+    as well as all Role entries that match if user_from.host.str == ""
+
+    Otherwise, only matching (Host, User) will be renamed.
+  */
+  DBUG_ENTER("handle_roles_mappings_table");
+
+  int error;
+  int result= 0;
+  THD *thd= table->in_use;
+  const char *host, *user, *role;
+  Field *host_field= table->field[0];
+  Field *user_field= table->field[1];
+  Field *role_field= table->field[2];
+
+  DBUG_PRINT("info", ("Rewriting entry in roles_mapping table: %s@%s",
+                      user_from->user.str, user_from->host.str));
+  table->use_all_columns();
+
+  if (unlikely(table->file->ha_rnd_init_with_error(1)))
+    result= -1;
+  else
+  {
+    while((error= table->file->ha_rnd_next(table->record[0])) !=
+          HA_ERR_END_OF_FILE)
+    {
+      if (error)
+      {
+        DBUG_PRINT("info", ("scan error: %d", error));
+        continue;
+      }
+
+      host= safe_str(get_field(thd->mem_root, host_field));
+      user= safe_str(get_field(thd->mem_root, user_field));
+
+      if (!(strcmp(user_from->user.str, user) ||
+            my_strcasecmp(system_charset_info, user_from->host.str, host)))
+        result= ((drop || user_to) &&
+                 modify_grant_table(table, host_field, user_field, user_to)) ?
+          -1 : result ? result : 1; /* Error or keep result or found. */
+      else
+      {
+        role= safe_str(get_field(thd->mem_root, role_field));
+
+        if (!user_from->is_role() || strcmp(user_from->user.str, role))
+          continue;
+
+        error= 0;
+
+        if (drop) /* drop if requested */
+        {
+          if (unlikely((error= table->file->ha_delete_row(table->record[0]))))
+            table->file->print_error(error, MYF(0));
+        }
+        else if (user_to)
+        {
+          store_record(table, record[1]);
+          role_field->store(user_to->user.str, user_to->user.length,
+                            system_charset_info);
+          if (unlikely(error= table->file->ha_update_row(table->record[1],
+                                                         table->record[0])) &&
+              error != HA_ERR_RECORD_IS_THE_SAME)
+            table->file->print_error(error, MYF(0));
+        }
+
+        /* Error or keep result or found. */
+        result= error ? -1 : result ? result : 1;
+      }
+    }
+    table->file->ha_rnd_end();
+  }
+  DBUG_RETURN(result);
+}
+
+/*
+  Handle a privilege table.
+
+  SYNOPSIS
+    handle_grant_table()
+    grant_table                 An open grant table handle.
+    which_table                 Which grant table to handle.
+    drop                        If user_from is to be dropped.
+    user_from                   The the user to be searched/dropped/renamed.
+    user_to                     The new name for the user if to be renamed,
+                                NULL otherwise.
+
+  DESCRIPTION
+    Scan through all records in a grant table and apply the requested
+    operation. For the "user" table, a single index access is sufficient,
+    since there is an unique index on (host, user).
+    Delete from grant table if drop is true.
+    Update in grant table if drop is false and user_to is not NULL.
+    Search in grant table if drop is false and user_to is NULL.
+
+  RETURN
+    > 0         At least one record matched.
+    0           OK, but no record matched.
+    < 0         Error.
+
+   TODO(cvicentiu) refactor handle_grant_table to use
+   Grant_table_base instead of TABLE directly.
+*/
+
+static int handle_grant_table(THD *thd, const Grant_table_base& grant_table,
+                              enum enum_acl_tables which_table, bool drop,
+                              LEX_USER *user_from, LEX_USER *user_to)
+{
+  int result= 0;
+  int error;
+  TABLE *table= grant_table.table();
+  Field *host_field= table->field[0];
+  Field *user_field= table->field[which_table == USER_TABLE ||
+                                  which_table == PROXIES_PRIV_TABLE ? 1 : 2];
+  const char *host_str= user_from->host.str;
+  const char *user_str= user_from->user.str;
+  const char *host;
+  const char *user;
+  uchar user_key[MAX_KEY_LENGTH];
+  uint key_prefix_length;
+  DBUG_ENTER("handle_grant_table");
+
+  if (which_table == ROLES_MAPPING_TABLE)
+  {
+    result= handle_roles_mappings_table(table, drop, user_from, user_to);
+    DBUG_RETURN(result);
+  }
+
+  table->use_all_columns();
+  if (which_table == USER_TABLE) // mysql.user table
+  {
+    /*
+      The 'user' table has an unique index on (host, user).
+      Thus, we can handle everything with a single index access.
+      The host- and user fields are consecutive in the user table records.
+      So we set host- and user fields of table->record[0] and use the
+      pointer to the host field as key.
+      index_read_idx() will replace table->record[0] (its first argument)
+      by the searched record, if it exists.
+    */
+    DBUG_PRINT("info",("read table: '%s'  search: '%s'@'%s'",
+                       table->s->table_name.str, user_str, host_str));
+    host_field->store(host_str, user_from->host.length, system_charset_info);
+    user_field->store(user_str, user_from->user.length, system_charset_info);
+
+    key_prefix_length= (table->key_info->key_part[0].store_length +
+                        table->key_info->key_part[1].store_length);
+    key_copy(user_key, table->record[0], table->key_info, key_prefix_length);
+
+    error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                              user_key, (key_part_map)3,
+                                              HA_READ_KEY_EXACT);
+    if (!unlikely(error) && !*host_str)
+    {
+      // verify that we got a role or a user, as needed
+      if (static_cast(grant_table).get_is_role() !=
+          user_from->is_role())
+        error= HA_ERR_KEY_NOT_FOUND;
+    }
+    if (unlikely(error))
+    {
+      if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+      {
+        table->file->print_error(error, MYF(0));
+        result= -1;
+      }
+    }
+    else
+    {
+      /* If requested, delete or update the record. */
+      result= ((drop || user_to) &&
+               modify_grant_table(table, host_field, user_field, user_to)) ?
+        -1 : 1; /* Error or found. */
+    }
+    DBUG_PRINT("info",("read result: %d", result));
+  }
+  else
+  {
+    /*
+      The non-'user' table do not have indexes on (host, user).
+      And their host- and user fields are not consecutive.
+      Thus, we need to do a table scan to find all matching records.
+    */
+    if (unlikely(table->file->ha_rnd_init_with_error(1)))
+      result= -1;
+    else
+    {
+#ifdef EXTRA_DEBUG
+      DBUG_PRINT("info",("scan table: '%s'  search: '%s'@'%s'",
+                         table->s->table_name.str, user_str, host_str));
+#endif
+      while ((error= table->file->ha_rnd_next(table->record[0])) !=
+             HA_ERR_END_OF_FILE)
+      {
+        if (error)
+        {
+          /* Most probable 'deleted record'. */
+          DBUG_PRINT("info",("scan error: %d", error));
+          continue;
+        }
+        host= safe_str(get_field(thd->mem_root, host_field));
+        user= safe_str(get_field(thd->mem_root, user_field));
+
+#ifdef EXTRA_DEBUG
+        if (which_table != PROXIES_PRIV_TABLE)
+        {
+          DBUG_PRINT("loop",("scan fields: '%s'@'%s' '%s' '%s' '%s'",
+                             user, host,
+                             get_field(thd->mem_root, table->field[1]) /*db*/,
+                             get_field(thd->mem_root, table->field[3]) /*table*/,
+                             get_field(thd->mem_root,
+                                       table->field[4]) /*column*/));
+        }
+#endif
+        if (strcmp(user_str, user) ||
+            my_strcasecmp(system_charset_info, host_str, host))
+          continue;
+
+        /* If requested, delete or update the record. */
+        result= ((drop || user_to) &&
+                 modify_grant_table(table, host_field, user_field, user_to)) ?
+          -1 : result ? result : 1; /* Error or keep result or found. */
+        /* If search is requested, we do not need to search further. */
+        if (! drop && ! user_to)
+          break ;
+      }
+      (void) table->file->ha_rnd_end();
+      DBUG_PRINT("info",("scan result: %d", result));
+    }
+  }
+
+  DBUG_RETURN(result);
+}
+
+
+/**
+  Handle an in-memory privilege structure.
+
+  @param struct_no  The number of the structure to handle (0..6).
+  @param drop       If user_from is to be dropped.
+  @param user_from  The the user to be searched/dropped/renamed.
+  @param user_to    The new name for the user if to be renamed, NULL otherwise.
+
+  @note
+    Scan through all elements in an in-memory grant structure and apply
+    the requested operation.
+    Delete from grant structure if drop is true.
+    Update in grant structure if drop is false and user_to is not NULL.
+    Search in grant structure if drop is false and user_to is NULL.
+
+  @retval > 0  At least one element matched.
+  @retval 0    OK, but no element matched.
+*/
+
+static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop,
+                               LEX_USER *user_from, LEX_USER *user_to)
+{
+  int result= 0;
+  int elements;
+  bool restart;
+  const char *UNINIT_VAR(user);
+  const char *UNINIT_VAR(host);
+  ACL_USER *acl_user= NULL;
+  ACL_ROLE *acl_role= NULL;
+  ACL_DB *acl_db= NULL;
+  ACL_PROXY_USER *acl_proxy_user= NULL;
+  GRANT_NAME *grant_name= NULL;
+  ROLE_GRANT_PAIR *UNINIT_VAR(role_grant_pair);
+  HASH *grant_name_hash= NULL;
+  HASH *roles_mappings_hash= NULL;
+  DBUG_ENTER("handle_grant_struct");
+  DBUG_PRINT("info",("scan struct: %u  search: '%s'@'%s'",
+                     struct_no, user_from->user.str, user_from->host.str));
+
+  mysql_mutex_assert_owner(&acl_cache->lock);
+
+  /* No point in querying ROLE ACL if user_from is not a role */
+  if (struct_no == ROLE_ACL && user_from->host.length)
+    DBUG_RETURN(0);
+
+  /* same. no roles in PROXY_USERS_ACL */
+  if (struct_no == PROXY_USERS_ACL && user_from->is_role())
+    DBUG_RETURN(0);
+
+  if (struct_no == ROLE_ACL) //no need to scan the structures in this case
+  {
+    acl_role= find_acl_role(user_from->user.str, true);
+    if (!acl_role)
+      DBUG_RETURN(0);
+
+    if (!drop && !user_to) //role was found
+      DBUG_RETURN(1);
+
+    /* this calls for a role update */
+    const char *old_key= acl_role->user.str;
+    size_t old_key_length= acl_role->user.length;
+    if (drop)
+    {
+      // delete the role from cross-reference arrays
+      for (size_t i=0; i < acl_role->role_grants.elements; i++)
+      {
+        ACL_ROLE *grant= *dynamic_element(&acl_role->role_grants,
+                                          i, ACL_ROLE**);
+        remove_ptr_from_dynarray(&grant->parent_grantee, acl_role);
+      }
+
+      for (size_t i=0; i < acl_role->parent_grantee.elements; i++)
+      {
+        ACL_USER_BASE *grantee= *dynamic_element(&acl_role->parent_grantee,
+                                                 i, ACL_USER_BASE**);
+        remove_ptr_from_dynarray(&grantee->role_grants, acl_role);
+      }
+
+      /* Remove all of the role_grants from this role. */
+      delete_dynamic(&acl_role->role_grants);
+
+      /* all grants must be revoked from this role by now. propagate this */
+      propagate_role_grants(acl_role, PRIVS_TO_MERGE::ALL);
+
+      my_hash_delete(&acl_roles, (uchar*) acl_role);
+      DBUG_RETURN(1);
+    }
+    acl_role->user= safe_lexcstrdup_root(&acl_memroot, user_to->user);
+
+    my_hash_update(&acl_roles, (uchar*) acl_role, (uchar*) old_key,
+                   old_key_length);
+    DBUG_RETURN(1);
+
+  }
+
+  /* Get the number of elements in the in-memory structure. */
+  switch (struct_no) {
+  case USER_ACL:
+    elements= int(acl_users.elements);
+    break;
+  case DB_ACL:
+    elements= int(acl_dbs.elements());
+    break;
+  case COLUMN_PRIVILEGES_HASH:
+    grant_name_hash= &column_priv_hash;
+    elements= grant_name_hash->records;
+    break;
+  case PROC_PRIVILEGES_HASH:
+    grant_name_hash= &proc_priv_hash;
+    elements= grant_name_hash->records;
+    break;
+  case FUNC_PRIVILEGES_HASH:
+    grant_name_hash= &func_priv_hash;
+    elements= grant_name_hash->records;
+    break;
+  case PACKAGE_SPEC_PRIVILEGES_HASH:
+    grant_name_hash= &package_spec_priv_hash;
+    elements= grant_name_hash->records;
+    break;
+  case PACKAGE_BODY_PRIVILEGES_HASH:
+    grant_name_hash= &package_body_priv_hash;
+    elements= grant_name_hash->records;
+    break;
+  case PROXY_USERS_ACL:
+    elements= int(acl_proxy_users.elements);
+    break;
+  case ROLES_MAPPINGS_HASH:
+    roles_mappings_hash= &acl_roles_mappings;
+    elements= roles_mappings_hash->records;
+    break;
+  default:
+    DBUG_ASSERT(0);
+    DBUG_RETURN(-1);
+  }
+
+
+#ifdef EXTRA_DEBUG
+    DBUG_PRINT("loop",("scan struct: %u  search    user: '%s'  host: '%s'",
+                       struct_no, user_from->user.str, user_from->host.str));
+#endif
+  /* Loop over elements backwards as it may reduce the number of mem-moves
+     for dynamic arrays.
+
+     We restart the loop, if we deleted or updated anything in a hash table
+     because calling my_hash_delete or my_hash_update shuffles elements indices
+     and we can miss some if we do only one scan.
+  */
+  do {
+    restart= false;
+    for (int idx= elements - 1; idx >= 0; idx--)
+    {
+      /*
+        Get a pointer to the element.
+      */
+      switch (struct_no) {
+      case USER_ACL:
+        acl_user= dynamic_element(&acl_users, idx, ACL_USER*);
+        user= acl_user->user.str;
+        host= acl_user->host.hostname;
+      break;
+
+      case DB_ACL:
+        acl_db= &acl_dbs.at(idx);
+        user= acl_db->user;
+        host= acl_db->host.hostname;
+        break;
+
+      case COLUMN_PRIVILEGES_HASH:
+      case PROC_PRIVILEGES_HASH:
+      case FUNC_PRIVILEGES_HASH:
+      case PACKAGE_SPEC_PRIVILEGES_HASH:
+      case PACKAGE_BODY_PRIVILEGES_HASH:
+        grant_name= (GRANT_NAME*) my_hash_element(grant_name_hash, idx);
+        user= grant_name->user;
+        host= grant_name->host.hostname;
+        break;
+
+      case PROXY_USERS_ACL:
+        acl_proxy_user= dynamic_element(&acl_proxy_users, idx, ACL_PROXY_USER*);
+        user= acl_proxy_user->get_user();
+        host= acl_proxy_user->get_host();
+        break;
+
+      case ROLES_MAPPINGS_HASH:
+        role_grant_pair= (ROLE_GRANT_PAIR *) my_hash_element(roles_mappings_hash, idx);
+        user= role_grant_pair->u_uname;
+        host= role_grant_pair->u_hname;
+        break;
+
+      default:
+        DBUG_ASSERT(0);
+      }
+      if (! host)
+        host= "";
+
+#ifdef EXTRA_DEBUG
+      DBUG_PRINT("loop",("scan struct: %u  index: %u  user: '%s'  host: '%s'",
+                         struct_no, idx, user, host));
+#endif
+
+      if (struct_no == ROLES_MAPPINGS_HASH)
+      {
+        const char* role= role_grant_pair->r_uname? role_grant_pair->r_uname: "";
+        if (user_from->is_role())
+        {
+          /* When searching for roles within the ROLES_MAPPINGS_HASH, we have
+             to check both the user field as well as the role field for a match.
+
+             It is possible to have a role granted to a role. If we are going
+             to modify the mapping entry, it needs to be done on either on the
+             "user" end (here represented by a role) or the "role" end. At least
+             one part must match.
+
+             If the "user" end has a not-empty host string, it can never match
+             as we are searching for a role here. A role always has an empty host
+             string.
+          */
+          if ((*host || strcmp(user_from->user.str, user)) &&
+              strcmp(user_from->user.str, role))
+            continue;
+        }
+        else
+        {
+          if (strcmp(user_from->user.str, user) ||
+              my_strcasecmp(system_charset_info, user_from->host.str, host))
+            continue;
+        }
+      }
+      else
+      {
+        if (strcmp(user_from->user.str, user) ||
+            my_strcasecmp(system_charset_info, user_from->host.str, host))
+          continue;
+      }
+
+      result= 1; /* At least one element found. */
+      if ( drop )
+      {
+        elements--;
+        switch ( struct_no ) {
+        case USER_ACL:
+          free_acl_user(dynamic_element(&acl_users, idx, ACL_USER*));
+          delete_dynamic_element(&acl_users, idx);
+          break;
+
+        case DB_ACL:
+          acl_dbs.del(idx);
+          break;
+
+        case COLUMN_PRIVILEGES_HASH:
+        case PROC_PRIVILEGES_HASH:
+        case FUNC_PRIVILEGES_HASH:
+        case PACKAGE_SPEC_PRIVILEGES_HASH:
+        case PACKAGE_BODY_PRIVILEGES_HASH:
+          my_hash_delete(grant_name_hash, (uchar*) grant_name);
+          restart= true;
+          break;
+
+        case PROXY_USERS_ACL:
+          delete_dynamic_element(&acl_proxy_users, idx);
+          break;
+
+        case ROLES_MAPPINGS_HASH:
+          my_hash_delete(roles_mappings_hash, (uchar*) role_grant_pair);
+          restart= true;
+          break;
+
+        default:
+          DBUG_ASSERT(0);
+          break;
+        }
+      }
+      else if ( user_to )
+      {
+        switch ( struct_no ) {
+        case USER_ACL:
+          acl_user->user= safe_lexcstrdup_root(&acl_memroot, user_to->user);
+          update_hostname(&acl_user->host, strdup_root(&acl_memroot, user_to->host.str));
+          acl_user->hostname_length= strlen(acl_user->host.hostname);
+          break;
+
+        case DB_ACL:
+          acl_db->user= strdup_root(&acl_memroot, user_to->user.str);
+          update_hostname(&acl_db->host, strdup_root(&acl_memroot, user_to->host.str));
+          break;
+
+        case COLUMN_PRIVILEGES_HASH:
+        case PROC_PRIVILEGES_HASH:
+        case FUNC_PRIVILEGES_HASH:
+        case PACKAGE_SPEC_PRIVILEGES_HASH:
+        case PACKAGE_BODY_PRIVILEGES_HASH:
+          {
+            /*
+              Save old hash key and its length to be able to properly update
+              element position in hash.
+            */
+            char *old_key= grant_name->hash_key;
+            size_t old_key_length= grant_name->key_length;
+
+            /*
+              Update the grant structure with the new user name and host name.
+            */
+            grant_name->set_user_details(user_to->host.str, grant_name->db,
+                                         user_to->user.str, grant_name->tname,
+                                         TRUE);
+
+            /*
+              Since username is part of the hash key, when the user name
+              is renamed, the hash key is changed. Update the hash to
+              ensure that the position matches the new hash key value
+            */
+            my_hash_update(grant_name_hash, (uchar*) grant_name, (uchar*) old_key,
+                           old_key_length);
+            restart= true;
+            break;
+          }
+
+        case PROXY_USERS_ACL:
+          acl_proxy_user->set_user (&acl_memroot, user_to->user.str);
+          acl_proxy_user->set_host (&acl_memroot, user_to->host.str);
+          break;
+
+        case ROLES_MAPPINGS_HASH:
+          {
+            /*
+              Save old hash key and its length to be able to properly update
+              element position in hash.
+            */
+            char *old_key= role_grant_pair->hashkey.str;
+            size_t old_key_length= role_grant_pair->hashkey.length;
+            bool oom;
+
+            if (user_to->is_role())
+              oom= role_grant_pair->init(&acl_memroot, role_grant_pair->u_uname,
+                                         role_grant_pair->u_hname,
+                                         user_to->user.str, false);
+            else
+              oom= role_grant_pair->init(&acl_memroot, user_to->user.str,
+                                         user_to->host.str,
+                                         role_grant_pair->r_uname, false);
+            if (oom)
+              DBUG_RETURN(-1);
+
+            my_hash_update(roles_mappings_hash, (uchar*) role_grant_pair,
+                           (uchar*) old_key, old_key_length);
+            restart= true;
+            break;
+          }
+
+        default:
+          DBUG_ASSERT(0);
+          break;
+        }
+
+      }
+      else
+      {
+        /* If search is requested, we do not need to search further. */
+        break;
+      }
+    }
+  } while (restart);
+#ifdef EXTRA_DEBUG
+  DBUG_PRINT("loop",("scan struct: %u  result %d", struct_no, result));
+#endif
+
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Handle all privilege tables and in-memory privilege structures.
+
+  SYNOPSIS
+    handle_grant_data()
+    tables                      The array with the four open tables.
+    drop                        If user_from is to be dropped.
+    user_from                   The the user to be searched/dropped/renamed.
+    user_to                     The new name for the user if to be renamed,
+                                NULL otherwise.
+
+  DESCRIPTION
+    Go through all grant tables and in-memory grant structures and apply
+    the requested operation.
+    Delete from grant data if drop is true.
+    Update in grant data if drop is false and user_to is not NULL.
+    Search in grant data if drop is false and user_to is NULL.
+
+  RETURN
+    > 0         At least one element matched.
+    0           OK, but no element matched.
+    < 0         Error.
+*/
+
+static int handle_grant_data(THD *thd, Grant_tables& tables, bool drop,
+                             LEX_USER *user_from, LEX_USER *user_to)
+{
+  int result= 0;
+  int found;
+  bool handle_as_role= user_from->is_role();
+  bool search_only= !drop && !user_to;
+  DBUG_ENTER("handle_grant_data");
+
+  if (user_to)
+    DBUG_ASSERT(handle_as_role == user_to->is_role());
+
+  if (search_only)
+  {
+    /* quickly search in-memory structures first */
+    if (handle_as_role && find_acl_role(user_from->user.str, true))
+      DBUG_RETURN(1); // found
+
+    if (!handle_as_role && find_user_exact(user_from->host.str, user_from->user.str))
+      DBUG_RETURN(1); // found
+  }
+
+  /* Handle db table. */
+  if ((found= handle_grant_table(thd, tables.db_table(),
+                                 DB_TABLE, drop, user_from,
+                                 user_to)) < 0)
+  {
+    /* Handle of table failed, don't touch the in-memory array. */
+    result= -1;
+  }
+  else
+  {
+    /* Handle db array. */
+    if ((handle_grant_struct(DB_ACL, drop, user_from, user_to) || found)
+        && ! result)
+    {
+      result= 1; /* At least one record/element found. */
+      /* If search is requested, we do not need to search further. */
+      if (search_only)
+        goto end;
+      acl_cache->clear(1);
+    }
+  }
+
+  /* Handle stored routines table. */
+  if ((found= handle_grant_table(thd, tables.procs_priv_table(),
+                                 PROCS_PRIV_TABLE, drop,
+                                 user_from, user_to)) < 0)
+  {
+    /* Handle of table failed, don't touch in-memory array. */
+    result= -1;
+  }
+  else
+  {
+    /* Handle procs array. */
+    if ((handle_grant_struct(PROC_PRIVILEGES_HASH, drop, user_from, user_to) || found)
+        && ! result)
+    {
+      result= 1; /* At least one record/element found. */
+      /* If search is requested, we do not need to search further. */
+      if (search_only)
+        goto end;
+    }
+    /* Handle funcs array. */
+    if ((handle_grant_struct(FUNC_PRIVILEGES_HASH, drop, user_from, user_to) || found)
+        && ! result)
+    {
+      result= 1; /* At least one record/element found. */
+      /* If search is requested, we do not need to search further. */
+      if (search_only)
+        goto end;
+    }
+    /* Handle package spec array. */
+    if ((handle_grant_struct(PACKAGE_SPEC_PRIVILEGES_HASH,
+                             drop, user_from, user_to) || found)
+        && ! result)
+    {
+      result= 1; /* At least one record/element found. */
+      /* If search is requested, we do not need to search further. */
+      if (search_only)
+        goto end;
+    }
+    /* Handle package body array. */
+    if ((handle_grant_struct(PACKAGE_BODY_PRIVILEGES_HASH,
+                             drop, user_from, user_to) || found)
+        && ! result)
+    {
+      result= 1; /* At least one record/element found. */
+      /* If search is requested, we do not need to search further. */
+      if (search_only)
+        goto end;
+    }
+  }
+
+  /* Handle tables table. */
+  if ((found= handle_grant_table(thd, tables.tables_priv_table(),
+                                 TABLES_PRIV_TABLE, drop,
+                                 user_from, user_to)) < 0)
+  {
+    /* Handle of table failed, don't touch columns and in-memory array. */
+    result= -1;
+  }
+  else
+  {
+    if (found && ! result)
+    {
+      result= 1; /* At least one record found. */
+      /* If search is requested, we do not need to search further. */
+      if (search_only)
+        goto end;
+    }
+
+    /* Handle columns table. */
+    if ((found= handle_grant_table(thd, tables.columns_priv_table(),
+                                   COLUMNS_PRIV_TABLE, drop,
+                                   user_from, user_to)) < 0)
+    {
+      /* Handle of table failed, don't touch the in-memory array. */
+      result= -1;
+    }
+    else
+    {
+      /* Handle columns hash. */
+      if ((handle_grant_struct(COLUMN_PRIVILEGES_HASH, drop, user_from, user_to) || found)
+          && ! result)
+        result= 1; /* At least one record/element found. */
+      if (search_only)
+        goto end;
+    }
+  }
+
+  /* Handle proxies_priv table. */
+  if (tables.proxies_priv_table().table_exists())
+  {
+    if ((found= handle_grant_table(thd, tables.proxies_priv_table(),
+                                   PROXIES_PRIV_TABLE, drop,
+                                   user_from, user_to)) < 0)
+    {
+      /* Handle of table failed, don't touch the in-memory array. */
+      result= -1;
+    }
+    else
+    {
+      /* Handle proxies_priv array. */
+      if ((handle_grant_struct(PROXY_USERS_ACL, drop, user_from, user_to) || found)
+          && ! result)
+        result= 1; /* At least one record/element found. */
+      if (search_only)
+        goto end;
+    }
+  }
+
+  /* Handle roles_mapping table. */
+  if (tables.roles_mapping_table().table_exists() &&
+      (found= handle_grant_table(thd, tables.roles_mapping_table(),
+                         ROLES_MAPPING_TABLE, drop, user_from, user_to)) < 0)
+  {
+    /* Handle of table failed, don't touch the in-memory array. */
+    result= -1;
+  }
+  else
+  {
+    /* Handle acl_roles_mappings array */
+    if ((handle_grant_struct(ROLES_MAPPINGS_HASH, drop, user_from, user_to) || found)
+        && ! result)
+      result= 1; /* At least one record/element found */
+    if (search_only)
+      goto end;
+  }
+
+  /* Handle user table. */
+  if ((found= handle_grant_table(thd, tables.user_table(), USER_TABLE,
+                                 drop, user_from, user_to)) < 0)
+  {
+    /* Handle of table failed, don't touch the in-memory array. */
+    result= -1;
+  }
+  else
+  {
+    enum enum_acl_lists what= handle_as_role ? ROLE_ACL : USER_ACL;
+    if (((handle_grant_struct(what, drop, user_from, user_to)) || found) && !result)
+    {
+      result= 1; /* At least one record/element found. */
+      DBUG_ASSERT(! search_only);
+    }
+  }
+
+end:
+  DBUG_RETURN(result);
+}
+
+/*
+  Create a list of users.
+
+  SYNOPSIS
+    mysql_create_user()
+    thd                         The current thread.
+    list                        The users to create.
+    handle_as_role              Handle the user list as roles if true
+
+  RETURN
+    FALSE       OK.
+    TRUE        Error.
+*/
+
+bool mysql_create_user(THD *thd, List  &list, bool handle_as_role)
+{
+  int result;
+  String wrong_users;
+  LEX_USER *user_name;
+  List_iterator  user_list(list);
+  bool binlog= false;
+  bool some_users_dropped= false;
+  DBUG_ENTER("mysql_create_user");
+  DBUG_PRINT("entry", ("Handle as %s", handle_as_role ? "role" : "user"));
+
+  if (handle_as_role && sp_process_definer(thd))
+    DBUG_RETURN(TRUE);
+
+  /* CREATE USER may be skipped on replication client. */
+  Grant_tables tables;
+  const uint tables_to_open= Table_user | Table_db | Table_tables_priv |
+                             Table_columns_priv | Table_procs_priv |
+                             Table_proxies_priv | Table_roles_mapping;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  while ((user_name= user_list++))
+  {
+    if (user_name->user.str == current_user.str)
+    {
+      append_str(&wrong_users, STRING_WITH_LEN("CURRENT_USER"));
+      result= TRUE;
+      continue;
+    }
+
+    if (user_name->user.str == current_role.str)
+    {
+      append_str(&wrong_users, STRING_WITH_LEN("CURRENT_ROLE"));
+      result= TRUE;
+      continue;
+    }
+
+    if (handle_as_role &&
+        (check_role_name(&user_name->user, false) == ROLE_NAME_INVALID))
+    {
+      append_user(thd, &wrong_users, user_name);
+      result= TRUE;
+      continue;
+    }
+
+    if (!user_name->host.str)
+      user_name->host= host_not_specified;
+
+    /*
+      Search all in-memory structures and grant tables
+      for a mention of the new user/role name.
+    */
+    if (handle_grant_data(thd, tables, 0, user_name, NULL))
+    {
+      if (thd->lex->create_info.or_replace())
+      {
+        // Drop the existing user
+        if (handle_grant_data(thd, tables, 1, user_name, NULL) <= 0)
+        {
+          // DROP failed
+          append_user(thd, &wrong_users, user_name);
+          result= true;
+          continue;
+        }
+        else
+          some_users_dropped= true;
+        // Proceed with the creation
+      }
+      else if (thd->lex->create_info.if_not_exists())
+      {
+        binlog= true;
+        if (handle_as_role)
+          push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                              ER_ROLE_CREATE_EXISTS,
+                              ER_THD(thd, ER_ROLE_CREATE_EXISTS),
+                              user_name->user.str);
+        else
+          push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                              ER_USER_CREATE_EXISTS,
+                              ER_THD(thd, ER_USER_CREATE_EXISTS),
+                              user_name->user.str, user_name->host.str);
+        continue;
+      }
+      else
+      {
+        // "CREATE USER user1" for an existing user
+        append_user(thd, &wrong_users, user_name);
+        result= true;
+        continue;
+      }
+    }
+
+    if (replace_user_table(thd, tables.user_table(), user_name,
+                           NO_ACL, 0, 1, 0))
+    {
+      append_user(thd, &wrong_users, user_name);
+      result= TRUE;
+      continue;
+    }
+    binlog= true;
+
+    // every created role is automatically granted to its creator-admin
+    if (handle_as_role)
+    {
+      ACL_USER_BASE *grantee= find_acl_user_base(thd->lex->definer->user.str,
+                                                 thd->lex->definer->host.str);
+      ACL_ROLE *role= find_acl_role(user_name->user.str, false);
+
+      /*
+        just like with routines, views, triggers, and events we allow
+        non-existant definers here with a warning (see sp_process_definer())
+      */
+      if (grantee)
+        add_role_user_mapping(grantee, role);
+
+      /* TODO(cvicentiu) refactor replace_roles_mapping_table to use
+         Roles_mapping_table instead of TABLE directly. */
+      if (replace_roles_mapping_table(tables.roles_mapping_table().table(),
+                                      &thd->lex->definer->user,
+                                      &thd->lex->definer->host,
+                                      &user_name->user, true,
+                                      NULL, false))
+      {
+        append_user(thd, &wrong_users, user_name);
+        if (grantee)
+          undo_add_role_user_mapping(grantee, role);
+        result= TRUE;
+      }
+      else if (grantee)
+             update_role_mapping(&thd->lex->definer->user,
+                                 &thd->lex->definer->host,
+                                 &user_name->user, true, NULL, false);
+    }
+  }
+
+  if (result && some_users_dropped && !handle_as_role)
+  {
+    /* Rebuild in-memory structs, since 'acl_users' has been modified */
+    rebuild_check_host();
+    rebuild_role_grants();
+  }
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (result)
+  {
+    my_error(ER_CANNOT_USER, MYF(0),
+             (handle_as_role) ? "CREATE ROLE" : "CREATE USER",
+             wrong_users.c_ptr_safe());
+  }
+
+  if (binlog)
+    result |= write_bin_log(thd, FALSE, thd->query(), thd->query_length());
+
+  mysql_rwlock_unlock(&LOCK_grant);
+  DBUG_RETURN(result);
+}
+
+/*
+  Drop a list of users and all their privileges.
+
+  SYNOPSIS
+    mysql_drop_user()
+    thd                         The current thread.
+    list                        The users to drop.
+
+  RETURN
+    FALSE       OK.
+    TRUE        Error.
+*/
+
+bool mysql_drop_user(THD *thd, List  &list, bool handle_as_role)
+{
+  int result;
+  String wrong_users;
+  LEX_USER *user_name, *tmp_user_name;
+  List_iterator  user_list(list);
+  bool binlog= false;
+  DBUG_ENTER("mysql_drop_user");
+  DBUG_PRINT("entry", ("Handle as %s", handle_as_role ? "role" : "user"));
+
+  /* DROP USER may be skipped on replication client. */
+  Grant_tables tables;
+  const uint tables_to_open= Table_user | Table_db | Table_tables_priv |
+                             Table_columns_priv | Table_procs_priv |
+                             Table_proxies_priv | Table_roles_mapping;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH);
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  while ((tmp_user_name= user_list++))
+  {
+    int rc;
+    user_name= get_current_user(thd, tmp_user_name, false);
+    if (!user_name || (handle_as_role && is_public(user_name)))
+    {
+      thd->clear_error();
+      if (!user_name)
+        append_str(&wrong_users, STRING_WITH_LEN("CURRENT_ROLE"));
+      else
+        append_str(&wrong_users, public_name.str, public_name.length);
+      result= TRUE;
+      continue;
+    }
+
+    if (handle_as_role != user_name->is_role())
+    {
+      append_user(thd, &wrong_users, user_name);
+      result= TRUE;
+      continue;
+    }
+
+    if ((rc= handle_grant_data(thd, tables, 1, user_name, NULL)) > 0)
+    {
+      // The user or role was successfully deleted
+      binlog= true;
+      continue;
+    }
+
+    if (rc == 0 && thd->lex->if_exists())
+    {
+      // "DROP USER IF EXISTS user1" for a non-existing user or role
+      if (handle_as_role)
+        push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                            ER_ROLE_DROP_EXISTS,
+                            ER_THD(thd, ER_ROLE_DROP_EXISTS),
+                            user_name->user.str);
+      else
+        push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                            ER_USER_DROP_EXISTS,
+                            ER_THD(thd, ER_USER_DROP_EXISTS),
+                            user_name->user.str, user_name->host.str);
+      binlog= true;
+      continue;
+    }
+    // Internal error, or "DROP USER user1" for a non-existing user
+    append_user(thd, &wrong_users, user_name);
+    result= TRUE;
+  }
+
+  if (!handle_as_role)
+  {
+    /* Rebuild 'acl_check_hosts' since 'acl_users' has been modified */
+    rebuild_check_host();
+
+    /*
+      Rebuild every user's role_grants since 'acl_users' has been sorted
+      and old pointers to ACL_USER elements are no longer valid
+    */
+    rebuild_role_grants();
+  }
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (result)
+    my_error(ER_CANNOT_USER, MYF(0),
+             (handle_as_role) ? "DROP ROLE" : "DROP USER",
+             wrong_users.c_ptr_safe());
+
+  if (binlog)
+    result |= write_bin_log(thd, FALSE, thd->query(), thd->query_length());
+
+  mysql_rwlock_unlock(&LOCK_grant);
+  DBUG_RETURN(result);
+}
+
+/*
+  Rename a user.
+
+  SYNOPSIS
+    mysql_rename_user()
+    thd                         The current thread.
+    list                        The user name pairs: (from, to).
+
+  RETURN
+    FALSE       OK.
+    TRUE        Error.
+*/
+
+bool mysql_rename_user(THD *thd, List  &list)
+{
+  int result;
+  String wrong_users;
+  LEX_USER *user_from, *tmp_user_from;
+  LEX_USER *user_to, *tmp_user_to;
+  List_iterator  user_list(list);
+  bool some_users_renamed= FALSE;
+  DBUG_ENTER("mysql_rename_user");
+
+  /* RENAME USER may be skipped on replication client. */
+  Grant_tables tables;
+  const uint tables_to_open= Table_user | Table_db | Table_tables_priv |
+                             Table_columns_priv | Table_procs_priv |
+                             Table_proxies_priv | Table_roles_mapping;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  while ((tmp_user_from= user_list++))
+  {
+    tmp_user_to= user_list++;
+    if (!(user_from= get_current_user(thd, tmp_user_from, false)))
+    {
+      append_user(thd, &wrong_users, user_from);
+      result= TRUE;
+      continue;
+    }
+    if (!(user_to= get_current_user(thd, tmp_user_to, false)))
+    {
+      append_user(thd, &wrong_users, user_to);
+      result= TRUE;
+      continue;
+    }
+    DBUG_ASSERT(!user_from->is_role());
+    DBUG_ASSERT(!user_to->is_role());
+
+    /*
+      Search all in-memory structures and grant tables
+      for a mention of the new user name.
+    */
+    if (handle_grant_data(thd, tables, 0, user_to, NULL) ||
+        handle_grant_data(thd, tables, 0, user_from, user_to) <= 0)
+    {
+      /* NOTE TODO renaming roles is not yet implemented */
+      append_user(thd, &wrong_users, user_from);
+      result= TRUE;
+      continue;
+    }
+    some_users_renamed= TRUE;
+    rebuild_acl_users();
+  }
+
+  /* Rebuild 'acl_dbs' since 'acl_users' has been modified */
+  rebuild_acl_dbs();
+
+  /* Rebuild 'acl_check_hosts' since 'acl_users' has been modified */
+  rebuild_check_host();
+
+  /*
+    Rebuild every user's role_grants since 'acl_users' has been sorted
+    and old pointers to ACL_USER elements are no longer valid
+  */
+  rebuild_role_grants();
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (result)
+    my_error(ER_CANNOT_USER, MYF(0), "RENAME USER", wrong_users.c_ptr_safe());
+
+  if (some_users_renamed && mysql_bin_log.is_open())
+    result |= write_bin_log(thd, FALSE, thd->query(), thd->query_length());
+
+  mysql_rwlock_unlock(&LOCK_grant);
+  DBUG_RETURN(result);
+}
+
+/*
+  Alter a user's connection and resource settings.
+
+  SYNOPSIS
+    mysql_alter_user()
+    thd                         The current thread.
+    list                        The users to alter.
+
+  RETURN
+    > 0         Error. Error message already sent.
+    0           OK.
+*/
+int mysql_alter_user(THD* thd, List &users_list)
+{
+  DBUG_ENTER("mysql_alter_user");
+  int result= 0;
+  String wrong_users;
+  bool some_users_altered= false;
+
+  /* The only table we're altering is the user table. */
+  Grant_tables tables;
+  if ((result= tables.open_and_lock(thd, Table_user, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  /* Lock ACL data structures until we finish altering all users. */
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  LEX_USER *tmp_lex_user;
+  List_iterator users_list_iterator(users_list);
+
+  while ((tmp_lex_user= users_list_iterator++))
+  {
+    LEX_USER* lex_user= get_current_user(thd, tmp_lex_user, false);
+    if (!lex_user ||
+        replace_user_table(thd, tables.user_table(), lex_user, NO_ACL,
+                           false, false, true))
+    {
+      thd->clear_error();
+      append_user(thd, &wrong_users, tmp_lex_user);
+      result= TRUE;
+      continue;
+    }
+    some_users_altered= true;
+  }
+
+  /* Unlock ACL data structures. */
+  mysql_mutex_unlock(&acl_cache->lock);
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  if (result)
+  {
+    /* 'if exists' flag leads to warnings instead of errors. */
+    if (thd->lex->create_info.if_exists())
+    {
+      push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                          ER_CANNOT_USER,
+                          ER_THD(thd, ER_CANNOT_USER),
+                          "ALTER USER", wrong_users.c_ptr_safe());
+      result= FALSE;
+    }
+    else
+    {
+      my_error(ER_CANNOT_USER, MYF(0),
+               "ALTER USER",
+               wrong_users.c_ptr_safe());
+    }
+  }
+
+  if (some_users_altered)
+    result|= write_bin_log(thd, FALSE, thd->query(),
+                                     thd->query_length());
+  DBUG_RETURN(result);
+}
+
+
+static bool
+mysql_revoke_sp_privs(THD *thd, Grant_tables *tables, const Sp_handler *sph,
+                      const LEX_USER *lex_user)
+{
+  bool rc= false;
+  uint counter, revoked;
+  do {
+    HASH *hash= sph->get_priv_hash();
+    for (counter= 0, revoked= 0 ; counter < hash->records ; )
+    {
+      const char *user,*host;
+      GRANT_NAME *grant_proc= (GRANT_NAME*) my_hash_element(hash, counter);
+      user= grant_proc->user;
+      host= safe_str(grant_proc->host.hostname);
+
+      if (!strcmp(lex_user->user.str, user) &&
+          !strcmp(lex_user->host.str, host))
+      {
+        if (replace_routine_table(thd, grant_proc,
+                                  tables->procs_priv_table().table(),
+                                  *lex_user,
+                                  grant_proc->db, grant_proc->tname,
+                                  sph, ALL_KNOWN_ACL, 1) == 0)
+        {
+          revoked= 1;
+          continue;
+        }
+        rc= true;  // Something went wrong
+      }
+      counter++;
+    }
+  } while (revoked);
+  return rc;
+}
+
+
+/*
+  Revoke all privileges from a list of users.
+
+  SYNOPSIS
+    mysql_revoke_all()
+    thd                         The current thread.
+    list                        The users to revoke all privileges from.
+
+  RETURN
+    > 0         Error. Error message already sent.
+    0           OK.
+    < 0         Error. Error message not yet sent.
+*/
+
+bool mysql_revoke_all(THD *thd,  List  &list)
+{
+  uint counter, revoked;
+  int result, res;
+  ACL_DB *acl_db;
+  DBUG_ENTER("mysql_revoke_all");
+
+  Grant_tables tables;
+  const uint tables_to_open= Table_user | Table_db | Table_tables_priv |
+                             Table_columns_priv | Table_procs_priv |
+                             Table_proxies_priv | Table_roles_mapping;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  LEX_USER *lex_user, *tmp_lex_user;
+  List_iterator  user_list(list);
+  while ((tmp_lex_user= user_list++))
+  {
+    if (!(lex_user= get_current_user(thd, tmp_lex_user, false)))
+    {
+      result= -1;
+      continue;
+    }
+
+    /* This is not a role and the user could not be found */
+    if (!lex_user->is_role() &&
+        !find_user_exact(lex_user->host.str, lex_user->user.str))
+    {
+      result= -1;
+      continue;
+    }
+
+    if (replace_user_table(thd, tables.user_table(), lex_user,
+                           ALL_KNOWN_ACL, 1, 0, 0))
+    {
+      result= -1;
+      continue;
+    }
+
+    /* Remove db access privileges */
+    /*
+      Because acl_dbs and column_priv_hash shrink and may re-order
+      as privileges are removed, removal occurs in a repeated loop
+      until no more privileges are revoked.
+     */
+    do
+    {
+      for (counter= 0, revoked= 0 ; counter < acl_dbs.elements() ; )
+      {
+        const char *user, *host;
+
+        acl_db= &acl_dbs.at(counter);
+
+        user= acl_db->user;
+        host= safe_str(acl_db->host.hostname);
+
+	if (!strcmp(lex_user->user.str, user) &&
+            !strcmp(lex_user->host.str, host))
+	{
+      /* TODO(cvicentiu) refactor replace_db_table to use
+         Db_table instead of TABLE directly. */
+	  if (!replace_db_table(tables.db_table().table(), acl_db->db, *lex_user,
+                                ALL_KNOWN_ACL, 1))
+	  {
+	    /*
+	      Don't increment counter as replace_db_table deleted the
+	      current element in acl_dbs.
+	     */
+	    revoked= 1;
+	    continue;
+	  }
+	  result= -1; // Something went wrong
+	}
+	counter++;
+      }
+    } while (revoked);
+
+    /* Remove column access */
+    do
+    {
+      for (counter= 0, revoked= 0 ; counter < column_priv_hash.records ; )
+      {
+	const char *user,*host;
+        GRANT_TABLE *grant_table= ((GRANT_TABLE*)
+                                   my_hash_element(&column_priv_hash, counter));
+
+        user= grant_table->user;
+        host= safe_str(grant_table->host.hostname);
+
+        if (!strcmp(lex_user->user.str,user) &&
+            !strcmp(lex_user->host.str, host))
+	{
+	    List columns;
+            /* TODO(cvicentiu) refactor replace_db_table to use
+               Db_table instead of TABLE directly. */
+	    if (replace_column_table(grant_table,
+                                     tables.columns_priv_table().table(),
+                                     *lex_user, columns, grant_table->db,
+                                     grant_table->tname, ALL_KNOWN_ACL, 1))
+              result= -1;
+
+          /* TODO(cvicentiu) refactor replace_db_table to use
+             Db_table instead of TABLE directly. */
+	  if ((res= replace_table_table(thd, grant_table,
+                                        tables.tables_priv_table().table(),
+                                        *lex_user, grant_table->db,
+                                        grant_table->tname, ALL_KNOWN_ACL,
+                                        NO_ACL, 1)))
+	  {
+            if (res > 0)
+              result= -1;
+            else
+            {
+              /*
+                Entry was deleted. We have to retry the loop as the
+                hash table has probably been reorganized.
+              */
+              revoked= 1;
+              continue;
+            }
+          }
+	}
+	counter++;
+      }
+    } while (revoked);
+
+    /* Remove procedure access */
+    if (mysql_revoke_sp_privs(thd, &tables, &sp_handler_function, lex_user) ||
+        mysql_revoke_sp_privs(thd, &tables, &sp_handler_procedure, lex_user) ||
+        mysql_revoke_sp_privs(thd, &tables, &sp_handler_package_spec, lex_user) ||
+        mysql_revoke_sp_privs(thd, &tables, &sp_handler_package_body, lex_user))
+      result= -1;
+
+    ACL_USER_BASE *user_or_role;
+    /* remove role grants */
+    if (lex_user->is_role())
+    {
+      /* this can not fail due to get_current_user already having searched for it */
+      user_or_role= find_acl_role(lex_user->user.str, true);
+    }
+    else
+    {
+      user_or_role= find_user_exact(lex_user->host.str, lex_user->user.str);
+    }
+    /*
+      Find every role grant pair matching the role_grants array and remove it,
+      both from the acl_roles_mappings and the roles_mapping table
+    */
+    for (counter= 0; counter < user_or_role->role_grants.elements; counter++)
+    {
+      ACL_ROLE *role_grant= *dynamic_element(&user_or_role->role_grants,
+                                             counter, ACL_ROLE**);
+      ROLE_GRANT_PAIR *pair = find_role_grant_pair(&lex_user->user,
+                                                   &lex_user->host,
+                                                   &role_grant->user);
+      /* TODO(cvicentiu) refactor replace_roles_mapping_table to use
+         Roles_mapping_table instead of TABLE directly. */
+      if (replace_roles_mapping_table(tables.roles_mapping_table().table(),
+                                      &lex_user->user, &lex_user->host,
+                                      &role_grant->user, false, pair, true))
+      {
+        result= -1; //Something went wrong
+      }
+      update_role_mapping(&lex_user->user, &lex_user->host,
+                          &role_grant->user, false, pair, true);
+      /*
+        Delete from the parent_grantee array of the roles granted,
+        the entry pointing to this user_or_role
+      */
+      remove_ptr_from_dynarray(&role_grant->parent_grantee, user_or_role);
+    }
+    /* TODO
+       How to handle an error in the replace_roles_mapping_table, in
+       regards to the privileges held in memory
+    */
+
+    /* Finally, clear the role_grants array */
+    if (counter == user_or_role->role_grants.elements)
+    {
+      reset_dynamic(&user_or_role->role_grants);
+    }
+    /*
+      If we are revoking from a role, we need to update all the parent grantees
+    */
+    if (lex_user->is_role())
+    {
+      propagate_role_grants((ACL_ROLE *)user_or_role, PRIVS_TO_MERGE::ALL);
+    }
+  }
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (result)
+    my_message(ER_REVOKE_GRANTS, ER_THD(thd, ER_REVOKE_GRANTS), MYF(0));
+  
+  result= result |
+    write_bin_log(thd, FALSE, thd->query(), thd->query_length());
+
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  DBUG_RETURN(result);
+}
+
+
+
+
+/**
+  If the defining user for a routine does not exist, then the ACL lookup
+  code should raise two errors which we should intercept.  We convert the more
+  descriptive error into a warning, and consume the other.
+
+  If any other errors are raised, then we set a flag that should indicate
+  that there was some failure we should complain at a higher level.
+*/
+class Silence_routine_definer_errors : public Internal_error_handler
+{
+public:
+  Silence_routine_definer_errors()
+    : is_grave(FALSE)
+  {}
+
+  virtual ~Silence_routine_definer_errors() = default;
+
+  virtual bool handle_condition(THD *thd,
+                                uint sql_errno,
+                                const char* sqlstate,
+                                Sql_condition::enum_warning_level *level,
+                                const char* msg,
+                                Sql_condition ** cond_hdl);
+
+  bool has_errors() { return is_grave; }
+
+private:
+  bool is_grave;
+};
+
+bool
+Silence_routine_definer_errors::handle_condition(
+  THD *thd,
+  uint sql_errno,
+  const char*,
+  Sql_condition::enum_warning_level *level,
+  const char* msg,
+  Sql_condition ** cond_hdl)
+{
+  *cond_hdl= NULL;
+  if (*level == Sql_condition::WARN_LEVEL_ERROR)
+  {
+    switch (sql_errno)
+    {
+      case ER_NONEXISTING_PROC_GRANT:
+        /* Convert the error into a warning. */
+        push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+                     sql_errno, msg);
+        return TRUE;
+      default:
+        is_grave= TRUE;
+    }
+  }
+
+  return FALSE;
+}
+
+
+/**
+  Revoke privileges for all users on a stored procedure.  Use an error handler
+  that converts errors about missing grants into warnings.
+
+  @param
+    thd                         The current thread.
+  @param
+    db				DB of the stored procedure
+  @param
+    name			Name of the stored procedure
+
+  @retval
+    0           OK.
+  @retval
+    < 0         Error. Error message not yet sent.
+*/
+
+bool sp_revoke_privileges(THD *thd, const char *sp_db, const char *sp_name,
+                          const Sp_handler *sph)
+{
+  uint counter, revoked;
+  int result;
+  HASH *hash= sph->get_priv_hash();
+  Silence_routine_definer_errors error_handler;
+  DBUG_ENTER("sp_revoke_privileges");
+
+  Grant_tables tables;
+  const uint tables_to_open= Table_user | Table_db | Table_tables_priv |
+                             Table_columns_priv | Table_procs_priv |
+                             Table_proxies_priv | Table_roles_mapping;
+  if ((result= tables.open_and_lock(thd, tables_to_open, TL_WRITE)))
+    DBUG_RETURN(result != 1);
+
+  DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
+
+  /* Be sure to pop this before exiting this scope! */
+  thd->push_internal_handler(&error_handler);
+
+  mysql_rwlock_wrlock(&LOCK_grant);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  /* Remove procedure access */
+  do
+  {
+    for (counter= 0, revoked= 0 ; counter < hash->records ; )
+    {
+      GRANT_NAME *grant_proc= (GRANT_NAME*) my_hash_element(hash, counter);
+      if (!my_strcasecmp(&my_charset_utf8mb3_bin, grant_proc->db, sp_db) &&
+	  !my_strcasecmp(system_charset_info, grant_proc->tname, sp_name))
+      {
+        LEX_USER lex_user;
+	lex_user.user.str= grant_proc->user;
+	lex_user.user.length= strlen(grant_proc->user);
+        lex_user.host.str= safe_str(grant_proc->host.hostname);
+        lex_user.host.length= strlen(lex_user.host.str);
+        if (replace_routine_table(thd, grant_proc,
+                                  tables.procs_priv_table().table(), lex_user,
+                                  grant_proc->db, grant_proc->tname,
+                                  sph, ALL_KNOWN_ACL, 1) == 0)
+	{
+	  revoked= 1;
+	  continue;
+	}
+      }
+      counter++;
+    }
+  } while (revoked);
+
+  mysql_mutex_unlock(&acl_cache->lock);
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  thd->pop_internal_handler();
+
+  DBUG_RETURN(error_handler.has_errors());
+}
+
+
+/**
+  Grant EXECUTE,ALTER privilege for a stored procedure
+
+  @param thd The current thread.
+  @param sp_db
+  @param sp_name
+  @param sph
+
+  @return
+    @retval FALSE Success
+    @retval TRUE An error occurred. Error message not yet sent.
+*/
+
+bool sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
+                         const Sp_handler *sph)
+{
+  Security_context *sctx= thd->security_ctx;
+  LEX_USER *combo;
+  TABLE_LIST tables[1];
+  List user_list;
+  bool result;
+  ACL_USER *au;
+  Dummy_error_handler error_handler;
+  DBUG_ENTER("sp_grant_privileges");
+
+  if (!(combo=(LEX_USER*) thd->alloc(sizeof(LEX_USER))))
+    DBUG_RETURN(TRUE);
+
+  combo->user.str= (char *) sctx->priv_user;
+
+  mysql_mutex_lock(&acl_cache->lock);
+  if ((au= find_user_exact(combo->host.str= (char *) sctx->priv_host,
+                           combo->user.str)))
+    goto found_acl;
+
+  mysql_mutex_unlock(&acl_cache->lock);
+  DBUG_RETURN(TRUE);
+
+ found_acl:
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  bzero((char*)tables, sizeof(TABLE_LIST));
+  user_list.empty();
+
+  tables->db.str= sp_db;
+  tables->db.length= sp_db ? strlen(sp_db) : 0;
+  tables->table_name.str= tables->alias.str= sp_name;
+  tables->table_name.length= tables->alias.length= sp_name ? strlen(sp_name) : 0;
+
+  thd->make_lex_string(&combo->user, combo->user.str, strlen(combo->user.str));
+  thd->make_lex_string(&combo->host, combo->host.str, strlen(combo->host.str));
+
+  combo->auth= NULL;
+
+  if (user_list.push_back(combo, thd->mem_root))
+    DBUG_RETURN(TRUE);
+
+  thd->lex->account_options.reset();
+
+  /*
+    Only care about whether the operation failed or succeeded
+    as all errors will be handled later.
+  */
+  thd->push_internal_handler(&error_handler);
+  result= mysql_routine_grant(thd, tables, sph, user_list,
+                              DEFAULT_CREATE_PROC_ACLS, FALSE, FALSE);
+  thd->pop_internal_handler();
+  DBUG_RETURN(result);
+}
+
+
+/**
+  Validate if a user can proxy as another user
+
+  @thd                     current thread
+  @param user              the logged in user (proxy user)
+  @param authenticated_as  the effective user a plugin is trying to
+                           impersonate as (proxied user)
+  @return                  proxy user definition
+    @retval NULL           proxy user definition not found or not applicable
+    @retval non-null       the proxy user data
+*/
+
+static ACL_PROXY_USER *
+acl_find_proxy_user(const char *user, const char *host, const char *ip,
+                    const char *authenticated_as, bool *proxy_used)
+{
+  uint i;
+  /* if the proxied and proxy user are the same return OK */
+  DBUG_ENTER("acl_find_proxy_user");
+  DBUG_PRINT("info", ("user=%s host=%s ip=%s authenticated_as=%s",
+                      user, host, ip, authenticated_as));
+
+  if (!strcmp(authenticated_as, user))
+  {
+    DBUG_PRINT ("info", ("user is the same as authenticated_as"));
+    DBUG_RETURN (NULL);
+  }
+
+  *proxy_used= TRUE;
+  for (i=0; i < acl_proxy_users.elements; i++)
+  {
+    ACL_PROXY_USER *proxy= dynamic_element(&acl_proxy_users, i,
+                                           ACL_PROXY_USER *);
+    if (proxy->matches(host, user, ip, authenticated_as))
+      DBUG_RETURN(proxy);
+  }
+
+  DBUG_RETURN(NULL);
+}
+
+
+bool
+acl_check_proxy_grant_access(THD *thd, const char *host, const char *user,
+                             bool with_grant)
+{
+  DBUG_ENTER("acl_check_proxy_grant_access");
+  DBUG_PRINT("info", ("user=%s host=%s with_grant=%d", user, host,
+                      (int) with_grant));
+  if (!initialized)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
+    DBUG_RETURN(1);
+  }
+
+  /* replication slave thread can do anything */
+  if (thd->slave_thread)
+  {
+    DBUG_PRINT("info", ("replication slave"));
+    DBUG_RETURN(FALSE);
+  }
+
+  /*
+    one can grant proxy for self to others.
+    Security context in THD contains two pairs of (user,host):
+    1. (user,host) pair referring to inbound connection.
+    2. (priv_user,priv_host) pair obtained from mysql.user table after doing
+        authentication of incoming connection.
+    Privileges should be checked wrt (priv_user, priv_host) tuple, because
+    (user,host) pair obtained from inbound connection may have different
+    values than what is actually stored in mysql.user table and while granting
+    or revoking proxy privilege, user is expected to provide entries mentioned
+    in mysql.user table.
+  */
+  if (thd->security_ctx->is_priv_user(user, host))
+  {
+    DBUG_PRINT("info", ("strcmp (%s, %s) my_casestrcmp (%s, %s) equal",
+                        thd->security_ctx->priv_user, user,
+                        host, thd->security_ctx->priv_host));
+    DBUG_RETURN(FALSE);
+  }
+
+  mysql_mutex_lock(&acl_cache->lock);
+
+  /* check for matching WITH PROXY rights */
+  for (uint i=0; i < acl_proxy_users.elements; i++)
+  {
+    ACL_PROXY_USER *proxy= dynamic_element(&acl_proxy_users, i,
+                                           ACL_PROXY_USER *);
+    if (proxy->matches(thd->security_ctx->host,
+                       thd->security_ctx->user,
+                       thd->security_ctx->ip,
+                       user) &&
+        proxy->get_with_grant())
+    {
+      DBUG_PRINT("info", ("found"));
+      mysql_mutex_unlock(&acl_cache->lock);
+      DBUG_RETURN(FALSE);
+    }
+  }
+
+  mysql_mutex_unlock(&acl_cache->lock);
+  my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
+           thd->security_ctx->user,
+           thd->security_ctx->host_or_ip);
+  DBUG_RETURN(TRUE);
+}
+
+
+static bool
+show_proxy_grants(THD *thd, const char *username, const char *hostname,
+                  char *buff, size_t buffsize)
+{
+  Protocol *protocol= thd->protocol;
+  int error= 0;
+
+  for (uint i=0; i < acl_proxy_users.elements; i++)
+  {
+    ACL_PROXY_USER *proxy= dynamic_element(&acl_proxy_users, i,
+                                           ACL_PROXY_USER *);
+    if (proxy->granted_on(hostname, username))
+    {
+      String global(buff, buffsize, system_charset_info);
+      global.length(0);
+      proxy->print_grant(&global);
+      protocol->prepare_for_resend();
+      protocol->store(global.ptr(), global.length(), global.charset());
+      if (protocol->write())
+      {
+        error= -1;
+        break;
+      }
+    }
+  }
+  return error;
+}
+
+static int enabled_roles_insert(ACL_USER_BASE *role, void *context_data)
+{
+  TABLE *table= (TABLE*) context_data;
+  DBUG_ASSERT(role->flags & IS_ROLE);
+
+  restore_record(table, s->default_values);
+  table->field[0]->set_notnull();
+  table->field[0]->store(role->user.str, role->user.length,
+                         system_charset_info);
+  if (schema_table_store_record(table->in_use, table))
+    return -1;
+  return 0;
+}
+
+struct APPLICABLE_ROLES_DATA
+{
+  TABLE *table;
+  const LEX_CSTRING host;
+  const LEX_CSTRING user_and_host;
+  ACL_USER *user;
+};
+
+static int
+applicable_roles_insert(ACL_USER_BASE *grantee, ACL_ROLE *role, void *ptr)
+{
+  APPLICABLE_ROLES_DATA *data= (APPLICABLE_ROLES_DATA *)ptr;
+  CHARSET_INFO *cs= system_charset_info;
+  TABLE *table= data->table;
+  bool is_role= grantee != data->user;
+  const LEX_CSTRING *user_and_host= is_role ? &grantee->user
+                                           : &data->user_and_host;
+  const LEX_CSTRING *host= is_role ? &empty_clex_str : &data->host;
+
+  restore_record(table, s->default_values);
+  table->field[0]->store(user_and_host->str, user_and_host->length, cs);
+  table->field[1]->store(role->user.str, role->user.length, cs);
+
+  ROLE_GRANT_PAIR *pair=
+    find_role_grant_pair(&grantee->user, host, &role->user);
+  DBUG_ASSERT(pair);
+
+  if (pair->with_admin)
+    table->field[2]->store(STRING_WITH_LEN("YES"), cs);
+  else
+    table->field[2]->store(STRING_WITH_LEN("NO"), cs);
+
+  /* Default role is only valid when looking at a role granted to a user. */
+  if (!is_role)
+  {
+    if (data->user->default_rolename.length &&
+        lex_string_eq(&data->user->default_rolename, &role->user))
+      table->field[3]->store(STRING_WITH_LEN("YES"), cs);
+    else
+      table->field[3]->store(STRING_WITH_LEN("NO"), cs);
+    table->field[3]->set_notnull();
+  }
+
+  if (schema_table_store_record(table->in_use, table))
+    return -1;
+  return 0;
+}
+
+/**
+  Hash iterate function to count the number of total column privileges granted.
+*/
+static my_bool count_column_grants(void *grant_table,
+                                       void *current_count)
+{
+  HASH hash_columns = ((GRANT_TABLE *)grant_table)->hash_columns;
+  *(ulong *)current_count+= hash_columns.records;
+  return 0;
+}
+
+/**
+  SHOW function that computes the number of column grants.
+
+  This must be performed under the mutex in order to make sure the
+  iteration does not fail.
+*/
+static int show_column_grants(THD *thd, SHOW_VAR *var, char *buff,
+                              enum enum_var_type scope)
+{
+  var->type= SHOW_ULONG;
+  var->value= buff;
+  *(ulong *)buff= 0;
+  if (initialized)
+  {
+    mysql_rwlock_rdlock(&LOCK_grant);
+    mysql_mutex_lock(&acl_cache->lock);
+    my_hash_iterate(&column_priv_hash, count_column_grants, buff);
+    mysql_mutex_unlock(&acl_cache->lock);
+    mysql_rwlock_unlock(&LOCK_grant);
+  }
+  return 0;
+}
+
+static int show_database_grants(THD *thd, SHOW_VAR *var, char *buff,
+                                enum enum_var_type scope)
+{
+  var->type= SHOW_UINT;
+  var->value= buff;
+  *(uint *)buff= uint(acl_dbs.elements());
+  return 0;
+}
+
+#else
+static bool set_user_salt_if_needed(ACL_USER *, int, plugin_ref)
+{ return 0; }
+bool check_grant(THD *, privilege_t, TABLE_LIST *, bool, uint, bool)
+{ return 0; }
+inline privilege_t public_access()
+{ return NO_ACL; }
+#endif /*NO_EMBEDDED_ACCESS_CHECKS */
+
+
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+
+bool Sql_cmd_grant_proxy::execute(THD *thd)
+{
+  my_ok(thd);
+  return false;
+}
+
+bool Sql_cmd_grant_table::execute(THD *thd)
+{
+  my_ok(thd);
+  return false;
+}
+
+
+bool Sql_cmd_grant_sp::execute(THD *thd)
+{
+  my_ok(thd);
+  return false;
+}
+
+#else // not NO_EMBEDDED_ACCESS_CHECKS
+
+
+void Sql_cmd_grant::warn_hostname_requires_resolving(THD *thd,
+                                                     List &users)
+{
+  LEX_USER *user;
+  List_iterator  it(users);
+  while ((user= it++))
+  {
+    if (opt_skip_name_resolve && hostname_requires_resolving(user->host.str))
+      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                          ER_WARN_HOSTNAME_WONT_WORK,
+                          ER_THD(thd, ER_WARN_HOSTNAME_WONT_WORK));
+  }
+}
+
+
+void Sql_cmd_grant::grant_stage0(THD *thd)
+{
+  thd->binlog_invoker(false);   // Replicate current user as grantor
+  if (thd->security_ctx->user)  // If not replication
+    warn_hostname_requires_resolving(thd, thd->lex->users_list);
+}
+
+
+bool Sql_cmd_grant::user_list_reset_mqh(THD *thd, List &users)
+{
+  List_iterator  it(users);
+  LEX_USER *user, *tmp_user;
+  while ((tmp_user= it++))
+  {
+    if (!(user= get_current_user(thd, tmp_user)))
+      return true;
+    reset_mqh(user, 0);
+  }
+  return false;
+}
+
+
+bool Sql_cmd_grant_proxy::check_access_proxy(THD *thd, List &users)
+{
+  LEX_USER *user;
+  List_iterator  it(users);
+  if ((user= it++))
+  {
+    // GRANT/REVOKE PROXY has the target user as a first entry in the list
+    if (!(user= get_current_user(thd, user)) || !user->host.str)
+      return true;
+    if (acl_check_proxy_grant_access(thd, user->host.str, user->user.str,
+                                     m_grant_option & GRANT_ACL))
+      return true;
+  }
+  return false;
+}
+
+
+bool Sql_cmd_grant_proxy::execute(THD *thd)
+{
+  LEX  *lex= thd->lex;
+
+  DBUG_ASSERT(lex->first_select_lex()->table_list.first == NULL);
+  DBUG_ASSERT((m_grant_option & ~GRANT_ACL) == NO_ACL); // only WITH GRANT OPTION
+
+  grant_stage0(thd);
+
+  if (thd->security_ctx->user /* If not replication */ &&
+      check_access_proxy(thd, lex->users_list))
+    return true;
+
+  WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+  /* Conditionally writes to binlog */
+  if (mysql_grant(thd, NULL/*db*/, lex->users_list, m_grant_option,
+                  is_revoke(), true/*proxy*/))
+    return true;
+
+  return !is_revoke() && user_list_reset_mqh(thd, lex->users_list);
+
+#ifdef WITH_WSREP
+wsrep_error_label:
+  return true;
+#endif // WITH_WSREP
+}
+
+
+bool Sql_cmd_grant_object::grant_stage0_exact_object(THD *thd,
+                                                     TABLE_LIST *table)
+{
+  privilege_t priv= m_object_privilege | m_column_privilege_total | GRANT_ACL;
+  if (check_access(thd, priv, table->db.str,
+                   &table->grant.privilege, &table->grant.m_internal,
+                   0, 0))
+    return true;
+  grant_stage0(thd);
+  return false;
+}
+
+
+bool Sql_cmd_grant_table::execute_exact_table(THD *thd, TABLE_LIST *table)
+{
+  LEX  *lex= thd->lex;
+  if (grant_stage0_exact_object(thd, table) ||
+      check_grant(thd, m_object_privilege | m_column_privilege_total | GRANT_ACL,
+                  lex->query_tables, FALSE, UINT_MAX, FALSE))
+    return true;
+  /* Conditionally writes to binlog */
+  WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+  return mysql_table_grant(thd, lex->query_tables, lex->users_list,
+                           m_columns, m_object_privilege,
+                           is_revoke());
+#ifdef WITH_WSREP
+wsrep_error_label:
+  return true;
+#endif // WITH_WSREP
+}
+
+
+bool Sql_cmd_grant_sp::execute(THD *thd)
+{
+  DBUG_ASSERT(!m_columns.elements);
+  DBUG_ASSERT(!m_column_privilege_total);
+  LEX  *lex= thd->lex;
+  TABLE_LIST *table= lex->first_select_lex()->table_list.first;
+  privilege_t grants= m_all_privileges
+               ? (PROC_ACLS & ~GRANT_ACL) | (m_object_privilege & GRANT_ACL)
+               : m_object_privilege;
+
+  if (!table) // e.g: GRANT EXECUTE ON PROCEDURE *.*
+  {
+    my_message(ER_ILLEGAL_GRANT_FOR_TABLE, ER_THD(thd, ER_ILLEGAL_GRANT_FOR_TABLE),
+               MYF(0));
+    return true;
+  }
+
+  if (grant_stage0_exact_object(thd, table) ||
+      check_grant_routine(thd, grants|GRANT_ACL, lex->query_tables, &m_sph, 0))
+    return true;
+
+  /* Conditionally writes to binlog */
+  WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+  if (mysql_routine_grant(thd, lex->query_tables, &m_sph,
+                          lex->users_list, grants,
+                          is_revoke(), true))
+    return true;
+  my_ok(thd);
+  return false;
+#ifdef WITH_WSREP
+wsrep_error_label:
+  return true;
+#endif // WITH_WSREP
+}
+
+
+bool Sql_cmd_grant_table::execute_table_mask(THD *thd)
+{
+  LEX  *lex= thd->lex;
+  DBUG_ASSERT(lex->first_select_lex()->table_list.first == NULL);
+
+  if (check_access(thd, m_object_privilege | m_column_privilege_total | GRANT_ACL,
+                   m_db.str, NULL, NULL, 1, 0))
+    return true;
+
+  grant_stage0(thd);
+
+  if (m_columns.elements) // e.g. GRANT SELECT (a) ON *.*
+  {
+    my_message(ER_ILLEGAL_GRANT_FOR_TABLE, ER_THD(thd, ER_ILLEGAL_GRANT_FOR_TABLE),
+               MYF(0));
+    return true;
+  }
+
+  WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+  /* Conditionally writes to binlog */
+  if (mysql_grant(thd, m_db.str, lex->users_list, m_object_privilege,
+                  is_revoke(), false/*not proxy*/))
+    return true;
+
+  return !is_revoke() && user_list_reset_mqh(thd, lex->users_list);
+
+#ifdef WITH_WSREP
+wsrep_error_label:
+  return true;
+#endif // WITH_WSREP
+}
+
+
+bool Sql_cmd_grant_table::execute(THD *thd)
+{
+  TABLE_LIST *table= thd->lex->first_select_lex()->table_list.first;
+  return table ? execute_exact_table(thd, table) :
+                 execute_table_mask(thd);
+}
+
+
+#endif // NO_EMBEDDED_ACCESS_CHECKS
+
+
+
+SHOW_VAR acl_statistics[] = {
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  {"column_grants",    (char*)show_column_grants,          SHOW_SIMPLE_FUNC},
+  {"database_grants",  (char*)show_database_grants,        SHOW_SIMPLE_FUNC},
+  {"function_grants",  (char*)&func_priv_hash.records,     SHOW_ULONG},
+  {"procedure_grants", (char*)&proc_priv_hash.records,     SHOW_ULONG},
+  {"package_spec_grants", (char*)&package_spec_priv_hash.records, SHOW_ULONG},
+  {"package_body_grants", (char*)&package_body_priv_hash.records, SHOW_ULONG},
+  {"proxy_users",      (char*)&acl_proxy_users.elements,   SHOW_SIZE_T},
+  {"role_grants",      (char*)&acl_roles_mappings.records, SHOW_ULONG},
+  {"roles",            (char*)&acl_roles.records,          SHOW_ULONG},
+  {"table_grants",     (char*)&column_priv_hash.records,   SHOW_ULONG},
+  {"users",            (char*)&acl_users.elements,         SHOW_SIZE_T},
+#endif
+  {NullS, NullS, SHOW_LONG},
+};
+
+/* Check if a role is granted to a user/role. We traverse the role graph
+   and return true if we find a match.
+
+   hostname == NULL means we are looking for a role as a starting point,
+   otherwise a user.
+*/
+bool check_role_is_granted(const char *username, const char *hostname,
+                           const char *rolename)
+{
+  DBUG_ENTER("check_role_is_granted");
+  bool result= false;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  ACL_USER_BASE *root;
+  mysql_mutex_lock(&acl_cache->lock);
+  if (hostname)
+    root= find_user_exact(hostname, username);
+  else
+    root= find_acl_role(username, false);
+
+  LEX_CSTRING role_lex;
+  role_lex.str= rolename;
+  role_lex.length= strlen(rolename);
+
+  if (root && /* No grantee, nothing to search. */
+      traverse_role_graph_down(root, &role_lex, check_role_is_granted_callback,
+                               NULL) == -1)
+  {
+    /* We have found the role during our search. */
+    result= true;
+  }
+
+  /* We haven't found the role or we had no initial grantee to start from. */
+  mysql_mutex_unlock(&acl_cache->lock);
+#endif
+  DBUG_RETURN(result);
+}
+
+int fill_schema_enabled_roles(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  TABLE *table= tables->table;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  if (thd->security_ctx->priv_role[0])
+  {
+    mysql_rwlock_rdlock(&LOCK_grant);
+    mysql_mutex_lock(&acl_cache->lock);
+    ACL_ROLE *acl_role= find_acl_role(thd->security_ctx->priv_role, false);
+    if (acl_role)
+      traverse_role_graph_down(acl_role, table, enabled_roles_insert, NULL);
+    mysql_mutex_unlock(&acl_cache->lock);
+    mysql_rwlock_unlock(&LOCK_grant);
+    if (acl_role)
+      return 0;
+  }
+#endif
+
+  restore_record(table, s->default_values);
+  table->field[0]->set_null();
+  return schema_table_store_record(table->in_use, table);
+}
+
+
+/*
+  This shows all roles granted to current user
+  and recursively all roles granted to those roles
+*/
+int fill_schema_applicable_roles(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  int res= 0;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  if (initialized)
+  {
+    TABLE *table= tables->table;
+    Security_context *sctx= thd->security_ctx;
+    mysql_rwlock_rdlock(&LOCK_grant);
+    mysql_mutex_lock(&acl_cache->lock);
+    ACL_USER *user= find_user_exact(sctx->priv_host, sctx->priv_user);
+    if (user)
+    {
+      char buff[USER_HOST_BUFF_SIZE+10];
+      DBUG_ASSERT(user->user.length + user->hostname_length +2 < sizeof(buff));
+      char *end= strxmov(buff, user->user.str, "@", user->host.hostname, NULL);
+      APPLICABLE_ROLES_DATA data= { table,
+        { user->host.hostname, user->hostname_length },
+        { buff, (size_t)(end - buff) }, user
+      };
+
+      res= traverse_role_graph_down(user, &data, 0, applicable_roles_insert);
+    }
+
+    mysql_mutex_unlock(&acl_cache->lock);
+    mysql_rwlock_unlock(&LOCK_grant);
+  }
+#endif
+
+  return res;
+}
+
+
+int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr)
+{
+  int flag;
+  DBUG_ENTER("wild_case_compare");
+  DBUG_PRINT("enter",("str: '%s'  wildstr: '%s'",str,wildstr));
+  while (*wildstr)
+  {
+    while (*wildstr && *wildstr != wild_many && *wildstr != wild_one)
+    {
+      if (*wildstr == wild_prefix && wildstr[1])
+	wildstr++;
+      if (my_toupper(cs, *wildstr++) !=
+          my_toupper(cs, *str++)) DBUG_RETURN(1);
+    }
+    if (! *wildstr ) DBUG_RETURN (*str != 0);
+    if (*wildstr++ == wild_one)
+    {
+      if (! *str++) DBUG_RETURN (1);	/* One char; skip */
+    }
+    else
+    {						/* Found '*' */
+      if (!*wildstr) DBUG_RETURN(0);		/* '*' as last char: OK */
+      flag=(*wildstr != wild_many && *wildstr != wild_one);
+      do
+      {
+	if (flag)
+	{
+	  char cmp;
+	  if ((cmp= *wildstr) == wild_prefix && wildstr[1])
+	    cmp=wildstr[1];
+	  cmp=my_toupper(cs, cmp);
+	  while (*str && my_toupper(cs, *str) != cmp)
+	    str++;
+	  if (!*str) DBUG_RETURN (1);
+	}
+	if (wild_case_compare(cs, str,wildstr) == 0) DBUG_RETURN (0);
+      } while (*str++);
+      DBUG_RETURN(1);
+    }
+  }
+  DBUG_RETURN (*str != '\0');
+}
+
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+static bool update_schema_privilege(THD *thd, TABLE *table, const char *buff,
+                                    const char* db, const char* t_name,
+                                    const char* column, uint col_length,
+                                    const char *priv, uint priv_length,
+                                    const char* is_grantable)
+{
+  int i= 2;
+  CHARSET_INFO *cs= system_charset_info;
+  restore_record(table, s->default_values);
+  table->field[0]->store(buff, (uint) strlen(buff), cs);
+  table->field[1]->store(STRING_WITH_LEN("def"), cs);
+  if (db)
+    table->field[i++]->store(db, (uint) strlen(db), cs);
+  if (t_name)
+    table->field[i++]->store(t_name, (uint) strlen(t_name), cs);
+  if (column)
+    table->field[i++]->store(column, col_length, cs);
+  table->field[i++]->store(priv, priv_length, cs);
+  table->field[i]->store(is_grantable, strlen(is_grantable), cs);
+  return schema_table_store_record(thd, table);
+}
+#endif
+
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+class Grantee_str
+{
+  char m_buff[USER_HOST_BUFF_SIZE + 6 /* 4 quotes, @, '\0' */];
+public:
+  Grantee_str(const char *user, const char *host)
+  {
+    DBUG_ASSERT(strlen(user) + strlen(host) + 6 < sizeof(m_buff));
+    strxmov(m_buff, "'", user, "'@'", host, "'", NullS);
+  }
+  operator const char *() const { return m_buff; }
+};
+#endif
+
+
+int fill_schema_user_privileges(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  int error= 0;
+  uint counter;
+  ACL_USER *acl_user;
+  TABLE *table= tables->table;
+  bool no_global_access= check_access(thd, SELECT_ACL, "mysql",
+                                      NULL, NULL, 1, 1);
+  DBUG_ENTER("fill_schema_user_privileges");
+
+  if (!initialized)
+    DBUG_RETURN(0);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  for (counter=0 ; counter < acl_users.elements ; counter++)
+  {
+    const char *user,*host, *is_grantable="YES";
+    acl_user=dynamic_element(&acl_users,counter,ACL_USER*);
+    user= acl_user->user.str;
+    host= safe_str(acl_user->host.hostname);
+
+    if (no_global_access &&
+        !thd->security_ctx->is_priv_user(user, host))
+      continue;
+
+    privilege_t want_access(acl_user->access);
+    if (!(want_access & GRANT_ACL))
+      is_grantable= "NO";
+
+    Grantee_str grantee(user, host);
+    if (!(want_access & ~GRANT_ACL))
+    {
+      if (update_schema_privilege(thd, table, grantee, 0, 0, 0, 0,
+                                  STRING_WITH_LEN("USAGE"), is_grantable))
+      {
+        error= 1;
+        goto err;
+      }
+    }
+    else
+    {
+      uint priv_id;
+      ulonglong j;
+      privilege_t test_access(want_access & ~GRANT_ACL);
+      for (priv_id=0, j = SELECT_ACL;j <= GLOBAL_ACLS; priv_id++,j <<= 1)
+      {
+        if (test_access & j)
+        {
+          if (update_schema_privilege(thd, table, grantee, 0, 0, 0, 0,
+                                      command_array[priv_id],
+                                      command_lengths[priv_id], is_grantable))
+          {
+            error= 1;
+            goto err;
+          }
+        }
+      }
+    }
+  }
+err:
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  DBUG_RETURN(error);
+#else
+  return(0);
+#endif
+}
+
+
+int fill_schema_schema_privileges(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  int error= 0;
+  uint counter;
+  ACL_DB *acl_db;
+  TABLE *table= tables->table;
+  bool no_global_access= check_access(thd, SELECT_ACL, "mysql",
+                                      NULL, NULL, 1, 1);
+  DBUG_ENTER("fill_schema_schema_privileges");
+
+  if (!initialized)
+    DBUG_RETURN(0);
+  mysql_mutex_lock(&acl_cache->lock);
+
+  for (counter=0 ; counter < acl_dbs.elements() ; counter++)
+  {
+    const char *user, *host, *is_grantable="YES";
+
+    acl_db=&acl_dbs.at(counter);
+    user= acl_db->user;
+    host= safe_str(acl_db->host.hostname);
+
+    if (no_global_access &&
+        !thd->security_ctx->is_priv_user(user, host))
+      continue;
+
+    privilege_t want_access(acl_db->access);
+    if (want_access)
+    {
+      if (!(want_access & GRANT_ACL))
+      {
+        is_grantable= "NO";
+      }
+      Grantee_str grantee(user, host);
+      if (!(want_access & ~GRANT_ACL))
+      {
+        if (update_schema_privilege(thd, table, grantee, acl_db->db, 0, 0,
+                                    0, STRING_WITH_LEN("USAGE"), is_grantable))
+        {
+          error= 1;
+          goto err;
+        }
+      }
+      else
+      {
+        int cnt;
+        ulonglong j;
+        privilege_t test_access(want_access & ~GRANT_ACL);
+        for (cnt=0, j = SELECT_ACL; j <= DB_ACLS; cnt++,j <<= 1)
+          if (test_access & j)
+          {
+            if (update_schema_privilege(thd, table,
+                                        grantee, acl_db->db, 0, 0, 0,
+                                        command_array[cnt], command_lengths[cnt],
+                                        is_grantable))
+            {
+              error= 1;
+              goto err;
+            }
+          }
+      }
+    }
+  }
+err:
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  DBUG_RETURN(error);
+#else
+  return (0);
+#endif
+}
+
+
+int fill_schema_table_privileges(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  int error= 0;
+  uint index;
+  TABLE *table= tables->table;
+  bool no_global_access= check_access(thd, SELECT_ACL, "mysql",
+                                      NULL, NULL, 1, 1);
+  DBUG_ENTER("fill_schema_table_privileges");
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+
+  for (index=0 ; index < column_priv_hash.records ; index++)
+  {
+    const char *user, *host, *is_grantable= "YES";
+    GRANT_TABLE *grant_table= (GRANT_TABLE*) my_hash_element(&column_priv_hash,
+                                                             index);
+    user= grant_table->user;
+    host= safe_str(grant_table->host.hostname);
+
+    if (no_global_access &&
+        !thd->security_ctx->is_priv_user(user, host))
+      continue;
+
+    privilege_t table_access(grant_table->privs);
+    if (table_access)
+    {
+      privilege_t test_access(table_access & ~GRANT_ACL);
+      /*
+        We should skip 'usage' privilege on table if
+        we have any privileges on column(s) of this table
+      */
+      if (!test_access && grant_table->cols)
+        continue;
+      if (!(table_access & GRANT_ACL))
+        is_grantable= "NO";
+
+      Grantee_str grantee(user, host);
+      if (!test_access)
+      {
+        if (update_schema_privilege(thd, table,
+                                    grantee, grant_table->db,
+                                    grant_table->tname, 0, 0,
+                                    STRING_WITH_LEN("USAGE"), is_grantable))
+        {
+          error= 1;
+          goto err;
+        }
+      }
+      else
+      {
+        ulonglong j;
+        int cnt;
+        for (cnt= 0, j= SELECT_ACL; j <= TABLE_ACLS; cnt++, j<<= 1)
+        {
+          if (test_access & j)
+          {
+            if (update_schema_privilege(thd, table,
+                                        grantee, grant_table->db,
+                                        grant_table->tname, 0, 0,
+                                        command_array[cnt],
+                                        command_lengths[cnt], is_grantable))
+            {
+              error= 1;
+              goto err;
+            }
+          }
+        }
+      }
+    }
+  }
+err:
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  DBUG_RETURN(error);
+#else
+  return (0);
+#endif
+}
+
+
+int fill_schema_column_privileges(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  int error= 0;
+  uint index;
+  TABLE *table= tables->table;
+  bool no_global_access= check_access(thd, SELECT_ACL, "mysql",
+                                      NULL, NULL, 1, 1);
+  DBUG_ENTER("fill_schema_table_privileges");
+
+  mysql_rwlock_rdlock(&LOCK_grant);
+
+  for (index=0 ; index < column_priv_hash.records ; index++)
+  {
+    const char *user, *host, *is_grantable= "YES";
+    GRANT_TABLE *grant_table= (GRANT_TABLE*) my_hash_element(&column_priv_hash,
+                                                          index);
+    user= grant_table->user;
+    host= safe_str(grant_table->host.hostname);
+
+    if (no_global_access &&
+        !thd->security_ctx->is_priv_user(user, host))
+      continue;
+
+    privilege_t table_access(grant_table->cols);
+    if (table_access != NO_ACL)
+    {
+      if (!(grant_table->privs & GRANT_ACL))
+        is_grantable= "NO";
+
+      privilege_t test_access(table_access & ~GRANT_ACL);
+      Grantee_str grantee(user, host);
+      if (!test_access)
+        continue;
+      else
+      {
+        ulonglong j;
+        int cnt;
+        for (cnt= 0, j= SELECT_ACL; j <= TABLE_ACLS; cnt++, j<<= 1)
+        {
+          if (test_access & j)
+          {
+            for (uint col_index=0 ;
+                 col_index < grant_table->hash_columns.records ;
+                 col_index++)
+            {
+              GRANT_COLUMN *grant_column = (GRANT_COLUMN*)
+                my_hash_element(&grant_table->hash_columns,col_index);
+              if ((grant_column->rights & j) && (table_access & j))
+              {
+                if (update_schema_privilege(thd, table,
+                                            grantee,
+                                            grant_table->db,
+                                            grant_table->tname,
+                                            grant_column->column,
+                                            grant_column->key_length,
+                                            command_array[cnt],
+                                            command_lengths[cnt], is_grantable))
+                {
+                  error= 1;
+                  goto err;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+err:
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  DBUG_RETURN(error);
+#else
+  return (0);
+#endif
+}
+
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+/*
+  fill effective privileges for table
+
+  SYNOPSIS
+    fill_effective_table_privileges()
+    thd     thread handler
+    grant   grants table descriptor
+    db      db name
+    table   table name
+*/
+
+void fill_effective_table_privileges(THD *thd, GRANT_INFO *grant,
+                                     const char *db, const char *table)
+{
+  Security_context *sctx= thd->security_ctx;
+  DBUG_ENTER("fill_effective_table_privileges");
+  DBUG_PRINT("enter", ("Host: '%s', Ip: '%s', User: '%s', table: `%s`.`%s`",
+                       sctx->priv_host, sctx->ip, sctx->priv_user, db, table));
+  /* --skip-grants */
+  if (!initialized)
+  {
+    DBUG_PRINT("info", ("skip grants"));
+    grant->privilege= ALL_KNOWN_ACL;             // everything is allowed
+    DBUG_PRINT("info", ("privilege 0x%llx", (longlong) grant->privilege));
+    DBUG_VOID_RETURN;
+  }
+
+  /* global privileges */
+  grant->privilege= sctx->master_access;
+
+  if (!thd->db.str || strcmp(db, thd->db.str))
+  {
+    grant->privilege|= acl_get_all3(sctx, db, FALSE);
+  }
+  else
+  {
+    grant->privilege|= sctx->db_access;
+  }
+
+  /* table privileges */
+  mysql_rwlock_rdlock(&LOCK_grant);
+  grant->refresh(sctx, db, table);
+
+  if (grant->grant_table_user != 0)
+  {
+    grant->privilege|= grant->grant_table_user->privs;
+  }
+  if (grant->grant_table_role != 0)
+  {
+    grant->privilege|= grant->grant_table_role->privs;
+  }
+  if (grant->grant_public != 0)
+  {
+    grant->privilege|= grant->grant_public->privs;
+  }
+  mysql_rwlock_unlock(&LOCK_grant);
+
+  DBUG_PRINT("info", ("privilege 0x%llx", (longlong) grant->privilege));
+  DBUG_VOID_RETURN;
+}
+
+#else /* NO_EMBEDDED_ACCESS_CHECKS */
+
+/****************************************************************************
+ Dummy wrappers when we don't have any access checks
+****************************************************************************/
+
+bool check_routine_level_acl(THD *thd, const char *db, const char *name,
+                             const Sp_handler *sph)
+{
+  return FALSE;
+}
+
+#endif
+
+/**
+  Return information about user or current user.
+
+  @param[in] thd          thread handler
+  @param[in] user         user
+  @param[in] lock         whether &acl_cache->lock mutex needs to be locked
+
+  @return
+    - On success, return a valid pointer to initialized
+    LEX_USER, which contains user information.
+    - On error, return 0.
+*/
+
+LEX_USER *get_current_user(THD *thd, LEX_USER *user, bool lock)
+{
+  if (user->user.str == current_user.str)  // current_user
+    return create_default_definer(thd, false);
+
+  if (user->user.str == current_role.str)  // current_role
+    return create_default_definer(thd, true);
+
+  if (user->host.str == NULL) // Possibly a role
+  {
+    // to be reexecution friendly we have to make a copy
+    LEX_USER *dup= (LEX_USER*) thd->memdup(user, sizeof(*user));
+    if (!dup)
+      return 0;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    if (has_auth(user, thd->lex))
+    {
+      dup->host= host_not_specified;
+      return dup;
+    }
+
+    role_name_check_result result= check_role_name(&dup->user, true);
+    if (result == ROLE_NAME_INVALID)
+      return 0;
+    if (result == ROLE_NAME_PUBLIC)
+    {
+      dup->host= empty_clex_str;
+      return dup;
+    }
+
+    if (lock)
+      mysql_mutex_lock(&acl_cache->lock);
+    if (find_acl_role(dup->user.str, false))
+      dup->host= empty_clex_str;
+    else
+      dup->host= host_not_specified;
+    if (lock)
+      mysql_mutex_unlock(&acl_cache->lock);
+#endif
+
+    return dup;
+  }
+
+  return user;
+}
+
+struct ACL_internal_schema_registry_entry
+{
+  const LEX_CSTRING *m_name;
+  const ACL_internal_schema_access *m_access;
+};
+
+/**
+  Internal schema registered.
+  Currently, this is only:
+  - performance_schema
+  - information_schema,
+  This can be reused later for:
+  - mysql
+*/
+static ACL_internal_schema_registry_entry registry_array[2];
+static uint m_registry_array_size= 0;
+
+/**
+  Add an internal schema to the registry.
+  @param name the schema name
+  @param access the schema ACL specific rules
+*/
+void ACL_internal_schema_registry::register_schema
+  (const LEX_CSTRING *name, const ACL_internal_schema_access *access)
+{
+  DBUG_ASSERT(m_registry_array_size < array_elements(registry_array));
+
+  /* Not thread safe, and does not need to be. */
+  registry_array[m_registry_array_size].m_name= name;
+  registry_array[m_registry_array_size].m_access= access;
+  m_registry_array_size++;
+}
+
+/**
+  Search per internal schema ACL by name.
+  @param name a schema name
+  @return per schema rules, or NULL
+*/
+const ACL_internal_schema_access *
+ACL_internal_schema_registry::lookup(const char *name)
+{
+  DBUG_ASSERT(name != NULL);
+
+  uint i;
+
+  for (i= 0; istr,
+                      name) == 0)
+      return registry_array[i].m_access;
+  }
+  return NULL;
+}
+
+/**
+  Get a cached internal schema access.
+  @param grant_internal_info the cache
+  @param schema_name the name of the internal schema
+*/
+const ACL_internal_schema_access *
+get_cached_schema_access(GRANT_INTERNAL_INFO *grant_internal_info,
+                         const char *schema_name)
+{
+  if (grant_internal_info)
+  {
+    if (! grant_internal_info->m_schema_lookup_done)
+    {
+      grant_internal_info->m_schema_access=
+        ACL_internal_schema_registry::lookup(schema_name);
+      grant_internal_info->m_schema_lookup_done= TRUE;
+    }
+    return grant_internal_info->m_schema_access;
+  }
+  return ACL_internal_schema_registry::lookup(schema_name);
+}
+
+/**
+  Get a cached internal table access.
+  @param grant_internal_info the cache
+  @param schema_name the name of the internal schema
+  @param table_name the name of the internal table
+*/
+const ACL_internal_table_access *
+get_cached_table_access(GRANT_INTERNAL_INFO *grant_internal_info,
+                        const char *schema_name,
+                        const char *table_name)
+{
+  DBUG_ASSERT(grant_internal_info);
+  if (! grant_internal_info->m_table_lookup_done)
+  {
+    const ACL_internal_schema_access *schema_access;
+    schema_access= get_cached_schema_access(grant_internal_info, schema_name);
+    if (schema_access)
+      grant_internal_info->m_table_access= schema_access->lookup(table_name);
+    grant_internal_info->m_table_lookup_done= TRUE;
+  }
+  return grant_internal_info->m_table_access;
+}
+
+
+/****************************************************************************
+   AUTHENTICATION CODE
+   including initial connect handshake, invoking appropriate plugins,
+   client-server plugin negotiation, COM_CHANGE_USER, and native
+   MySQL authentication plugins.
+****************************************************************************/
+
+/* few defines to have less ifdef's in the code below */
+#ifdef EMBEDDED_LIBRARY
+#undef HAVE_OPENSSL
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+#define initialized 0
+#define check_for_max_user_connections(X,Y)   0
+#define get_or_create_user_conn(A,B,C,D) 0
+#endif
+#endif
+#ifndef HAVE_OPENSSL
+#define ssl_acceptor_fd 0
+#define sslaccept(A,B,C,D) 1
+#endif
+
+/**
+  The internal version of what plugins know as MYSQL_PLUGIN_VIO,
+  basically the context of the authentication session
+*/
+struct MPVIO_EXT :public MYSQL_PLUGIN_VIO
+{
+  MYSQL_SERVER_AUTH_INFO auth_info;
+  ACL_USER *acl_user;       ///< a copy, independent from acl_users array
+  plugin_ref plugin;        ///< what plugin we're under
+  LEX_CSTRING db;           ///< db name from the handshake packet
+  /** when restarting a plugin this caches the last client reply */
+  struct {
+    const char *plugin;
+    char *pkt;              ///< pointer into NET::buff
+    uint pkt_len;
+  } cached_client_reply;
+  /** this caches the first plugin packet for restart request on the client */
+  struct {
+    char *pkt;
+    uint pkt_len;
+  } cached_server_packet;
+  uint curr_auth;                    ///< an index in acl_user->auth[]
+  int packets_read, packets_written; ///< counters for send/received packets
+  bool make_it_fail;
+  /** when plugin returns a failure this tells us what really happened */
+  enum { SUCCESS, FAILURE, RESTART } status;
+};
+
+/**
+  a helper function to report an access denied error in most proper places
+*/
+static void login_failed_error(THD *thd)
+{
+  my_error(access_denied_error_code(thd->password), MYF(0),
+           thd->main_security_ctx.user,
+           thd->main_security_ctx.host_or_ip,
+           thd->password ? ER_THD(thd, ER_YES) : ER_THD(thd, ER_NO));
+  general_log_print(thd, COM_CONNECT,
+                    ER_THD(thd, access_denied_error_code(thd->password)),
+                    thd->main_security_ctx.user,
+                    thd->main_security_ctx.host_or_ip,
+                    thd->password ? ER_THD(thd, ER_YES) : ER_THD(thd, ER_NO));
+  status_var_increment(thd->status_var.access_denied_errors);
+  /*
+    Log access denied messages to the error log when log-warnings = 2
+    so that the overhead of the general query log is not required to track
+    failed connections.
+  */
+  if (global_system_variables.log_warnings > 1)
+  {
+    sql_print_warning(ER_THD(thd, access_denied_error_code(thd->password)),
+                      thd->main_security_ctx.user,
+                      thd->main_security_ctx.host_or_ip,
+                      thd->password ? ER_THD(thd, ER_YES) : ER_THD(thd, ER_NO));
+  }
+}
+
+/**
+  sends a server handshake initialization packet, the very first packet
+  after the connection was established
+
+  Packet format:
+
+    Bytes       Content
+    -----       ----
+    1           protocol version (always 10)
+    n           server version string, \0-terminated
+    4           thread id
+    8           first 8 bytes of the plugin provided data (scramble)
+    1           \0 byte, terminating the first part of a scramble
+    2           server capabilities (two lower bytes)
+    1           server character set
+    2           server status
+    2           server capabilities (two upper bytes)
+    1           length of the scramble
+    10          reserved, always 0
+    n           rest of the plugin provided data (at least 12 bytes)
+    1           \0 byte, terminating the second part of a scramble
+
+  @retval 0 ok
+  @retval 1 error
+*/
+static bool send_server_handshake_packet(MPVIO_EXT *mpvio,
+                                         const char *data, uint data_len)
+{
+  DBUG_ASSERT(mpvio->status == MPVIO_EXT::RESTART);
+  DBUG_ASSERT(data_len <= 255);
+
+  THD *thd= mpvio->auth_info.thd;
+  char *buff= (char *) my_alloca(1 + SERVER_VERSION_LENGTH + 1 + data_len + 64);
+  char scramble_buf[SCRAMBLE_LENGTH];
+  char *end= buff;
+  DBUG_ENTER("send_server_handshake_packet");
+
+  *end++= protocol_version;
+
+  thd->client_capabilities= CLIENT_BASIC_FLAGS;
+
+  if (opt_using_transactions)
+    thd->client_capabilities|= CLIENT_TRANSACTIONS;
+
+  thd->client_capabilities|= CAN_CLIENT_COMPRESS;
+
+  if (ssl_acceptor_fd)
+  {
+    thd->client_capabilities |= CLIENT_SSL;
+  }
+
+  if (data_len)
+  {
+    mpvio->cached_server_packet.pkt= (char*)thd->memdup(data, data_len);
+    mpvio->cached_server_packet.pkt_len= data_len;
+  }
+
+  if (data_len < SCRAMBLE_LENGTH)
+  {
+    if (data_len)
+    {
+      /*
+        the first packet *must* have at least 20 bytes of a scramble.
+        if a plugin provided less, we pad it to 20 with zeros
+      */
+      memcpy(scramble_buf, data, data_len);
+      bzero(scramble_buf + data_len, SCRAMBLE_LENGTH - data_len);
+      data= scramble_buf;
+    }
+    else
+    {
+      /*
+        if the default plugin does not provide the data for the scramble at
+        all, we generate a scramble internally anyway, just in case the
+        user account (that will be known only later) uses a
+        native_password_plugin (which needs a scramble). If we don't send a
+        scramble now - wasting 20 bytes in the packet -
+        native_password_plugin will have to send it in a separate packet,
+        adding one more round trip.
+      */
+      thd_create_random_password(thd, thd->scramble, SCRAMBLE_LENGTH);
+      data= thd->scramble;
+    }
+    data_len= SCRAMBLE_LENGTH;
+  }
+
+  /* When server version is specified in config file, don't include
+     the replication hack prefix. */
+  if (using_custom_server_version)
+    end= strnmov(end, server_version, SERVER_VERSION_LENGTH) + 1;
+  else
+    end= strxnmov(end, SERVER_VERSION_LENGTH, RPL_VERSION_HACK, server_version, NullS) + 1;
+
+  int4store((uchar*) end, mpvio->auth_info.thd->thread_id);
+  end+= 4;
+
+  /*
+    Old clients does not understand long scrambles, but can ignore packet
+    tail: that's why first part of the scramble is placed here, and second
+    part at the end of packet.
+  */
+  end= (char*) memcpy(end, data, SCRAMBLE_LENGTH_323);
+  end+= SCRAMBLE_LENGTH_323;
+  *end++= 0;
+
+  int2store(end, thd->client_capabilities);
+  /* write server characteristics: up to 16 bytes allowed */
+  end[2]= (char) default_charset_info->number;
+  int2store(end+3, mpvio->auth_info.thd->server_status);
+  int2store(end+5, thd->client_capabilities >> 16);
+  end[7]= data_len;
+  DBUG_EXECUTE_IF("poison_srv_handshake_scramble_len", end[7]= -100;);
+  DBUG_EXECUTE_IF("increase_srv_handshake_scramble_len", end[7]= 50;);
+  bzero(end + 8, 6);
+  int4store(end + 14, thd->client_capabilities >> 32);
+  end+= 18;
+  /* write scramble tail */
+  end= (char*) memcpy(end, data + SCRAMBLE_LENGTH_323,
+                      data_len - SCRAMBLE_LENGTH_323);
+  end+= data_len - SCRAMBLE_LENGTH_323;
+  end= strmake(end, plugin_name(mpvio->plugin)->str,
+                    plugin_name(mpvio->plugin)->length);
+
+  int res= my_net_write(&mpvio->auth_info.thd->net, (uchar*) buff,
+                        (size_t) (end - buff + 1)) ||
+           net_flush(&mpvio->auth_info.thd->net);
+  my_afree(buff);
+  DBUG_RETURN (res);
+}
+
+static bool secure_auth(THD *thd)
+{
+  if (!opt_secure_auth)
+    return 0;
+
+  /*
+    If the server is running in secure auth mode, short scrambles are
+    forbidden. Extra juggling to report the same error as the old code.
+  */
+  if (thd->client_capabilities & CLIENT_PROTOCOL_41)
+  {
+    my_error(ER_SERVER_IS_IN_SECURE_AUTH_MODE, MYF(0),
+             thd->security_ctx->user,
+             thd->security_ctx->host_or_ip);
+    general_log_print(thd, COM_CONNECT,
+                      ER_THD(thd, ER_SERVER_IS_IN_SECURE_AUTH_MODE),
+                      thd->security_ctx->user,
+                      thd->security_ctx->host_or_ip);
+  }
+  else
+  {
+    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
+    general_log_print(thd, COM_CONNECT,
+                      ER_THD(thd, ER_NOT_SUPPORTED_AUTH_MODE));
+  }
+  return 1;
+}
+
+/**
+  sends a "change plugin" packet, requesting a client to restart authentication
+  using a different authentication plugin
+
+  Packet format:
+
+    Bytes       Content
+    -----       ----
+    1           byte with the value 254
+    n           client plugin to use, \0-terminated
+    n           plugin provided data
+
+  In a special case of switching from native_password_plugin to
+  old_password_plugin, the packet contains only one - the first - byte,
+  plugin name is omitted, plugin data aren't needed as the scramble was
+  already sent. This one-byte packet is identical to the "use the short
+  scramble" packet in the protocol before plugins were introduced.
+
+  @retval 0 ok
+  @retval 1 error
+*/
+static bool send_plugin_request_packet(MPVIO_EXT *mpvio,
+                                       const uchar *data, uint data_len)
+{
+  NET *net= &mpvio->auth_info.thd->net;
+  static uchar switch_plugin_request_buf[]= { 254 };
+  DBUG_ENTER("send_plugin_request_packet");
+
+  const char *client_auth_plugin=
+    ((st_mysql_auth *) (plugin_decl(mpvio->plugin)->info))->client_auth_plugin;
+
+  DBUG_EXECUTE_IF("auth_disconnect", { DBUG_RETURN(1); });
+  DBUG_EXECUTE_IF("auth_invalid_plugin", client_auth_plugin="foo/bar"; );
+  DBUG_ASSERT(client_auth_plugin);
+
+  /*
+    we send an old "short 4.0 scramble request", if we need to request a
+    client to use 4.0 auth plugin (short scramble) and the scramble was
+    already sent to the client
+
+    below, cached_client_reply.plugin is the plugin name that client has used,
+    client_auth_plugin is derived from mysql.user table, for the given
+    user account, it's the plugin that the client need to use to login.
+  */
+  bool switch_from_long_to_short_scramble=
+    client_auth_plugin == old_password_plugin_name.str &&
+    my_strcasecmp(system_charset_info, mpvio->cached_client_reply.plugin,
+                  native_password_plugin_name.str) == 0;
+
+  if (switch_from_long_to_short_scramble)
+    DBUG_RETURN (secure_auth(mpvio->auth_info.thd) ||
+                 my_net_write(net, switch_plugin_request_buf, 1) ||
+                 net_flush(net));
+
+  /*
+    We never request a client to switch from a short to long scramble.
+    Plugin-aware clients can do that, but traditionally it meant to
+    ask an old 4.0 client to use the new 4.1 authentication protocol.
+  */
+  bool switch_from_short_to_long_scramble=
+    client_auth_plugin == native_password_plugin_name.str &&
+    my_strcasecmp(system_charset_info, mpvio->cached_client_reply.plugin,
+                  old_password_plugin_name.str) == 0;
+
+  if (switch_from_short_to_long_scramble)
+  {
+    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
+    general_log_print(mpvio->auth_info.thd, COM_CONNECT,
+                      ER_THD(mpvio->auth_info.thd, ER_NOT_SUPPORTED_AUTH_MODE));
+    DBUG_RETURN (1);
+  }
+
+  DBUG_PRINT("info", ("requesting client to use the %s plugin",
+                      client_auth_plugin));
+  DBUG_RETURN(net_write_command(net, switch_plugin_request_buf[0],
+                                (uchar*) client_auth_plugin,
+                                strlen(client_auth_plugin) + 1,
+                                (uchar*) data, data_len));
+}
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+
+/**
+  Safeguard to avoid blocking the root, when max_password_errors
+  limit is reached.
+
+  Currently, we allow password errors for superuser on localhost.
+
+  @return true, if password errors should be ignored, and user should not be locked.
+*/
+static bool ignore_max_password_errors(const ACL_USER *acl_user)
+{
+ const char *host= acl_user->host.hostname;
+ return (acl_user->access & PRIV_IGNORE_MAX_PASSWORD_ERRORS)
+   && (!strcasecmp(host, "localhost") ||
+       !strcmp(host, "127.0.0.1") ||
+       !strcmp(host, "::1"));
+}
+/**
+   Finds acl entry in user database for authentication purposes.
+
+   Finds a user and copies it into mpvio. Creates a fake user
+   if no matching user account is found.
+
+   @retval 0    found
+   @retval 1    error
+*/
+static bool find_mpvio_user(MPVIO_EXT *mpvio)
+{
+  Security_context *sctx= mpvio->auth_info.thd->security_ctx;
+  DBUG_ENTER("find_mpvio_user");
+  DBUG_ASSERT(mpvio->acl_user == 0);
+
+  mysql_mutex_lock(&acl_cache->lock);
+
+  ACL_USER *user= find_user_or_anon(sctx->host, sctx->user, sctx->ip);
+
+  if (user)
+    mpvio->acl_user= user->copy(mpvio->auth_info.thd->mem_root);
+
+  mysql_mutex_unlock(&acl_cache->lock);
+
+  if (!mpvio->acl_user)
+  {
+    /*
+      A matching user was not found. Fake it. Take any user, make the
+      authentication fail later.
+      This way we get a realistically looking failure, with occasional
+      "change auth plugin" requests even for nonexistent users. The ratio
+      of "change auth plugin" request will be the same for real and
+      nonexistent users.
+      Note, that we cannot pick any user at random, it must always be
+      the same user account for the incoming sctx->user name.
+    */
+    ulong nr1=1, nr2=4;
+    CHARSET_INFO *cs= &my_charset_latin1;
+    cs->hash_sort((uchar*) sctx->user, strlen(sctx->user), &nr1, &nr2);
+
+    mysql_mutex_lock(&acl_cache->lock);
+    if (!acl_users.elements)
+    {
+      mysql_mutex_unlock(&acl_cache->lock);
+      login_failed_error(mpvio->auth_info.thd);
+      DBUG_RETURN(1);
+    }
+    uint i= nr1 % acl_users.elements;
+    ACL_USER *acl_user_tmp= dynamic_element(&acl_users, i, ACL_USER*);
+    mpvio->acl_user= acl_user_tmp->copy(mpvio->auth_info.thd->mem_root);
+    mysql_mutex_unlock(&acl_cache->lock);
+
+    mpvio->make_it_fail= true;
+  }
+
+  if (mpvio->acl_user->password_errors >= max_password_errors &&
+      !ignore_max_password_errors(mpvio->acl_user))
+  {
+    my_error(ER_USER_IS_BLOCKED, MYF(0));
+    general_log_print(mpvio->auth_info.thd, COM_CONNECT,
+      ER_THD(mpvio->auth_info.thd, ER_USER_IS_BLOCKED));
+    DBUG_RETURN(1);
+  }
+
+  /* user account requires non-default plugin and the client is too old */
+  if (mpvio->acl_user->auth->plugin.str != native_password_plugin_name.str &&
+      mpvio->acl_user->auth->plugin.str != old_password_plugin_name.str &&
+      !(mpvio->auth_info.thd->client_capabilities & CLIENT_PLUGIN_AUTH))
+  {
+    DBUG_ASSERT(my_strcasecmp(system_charset_info,
+      mpvio->acl_user->auth->plugin.str, native_password_plugin_name.str));
+    DBUG_ASSERT(my_strcasecmp(system_charset_info,
+      mpvio->acl_user->auth->plugin.str, old_password_plugin_name.str));
+    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
+    general_log_print(mpvio->auth_info.thd, COM_CONNECT,
+                      ER_THD(mpvio->auth_info.thd, ER_NOT_SUPPORTED_AUTH_MODE));
+    DBUG_RETURN (1);
+  }
+  DBUG_RETURN(0);
+}
+
+static bool
+read_client_connect_attrs(char **ptr, char *end, CHARSET_INFO *from_cs)
+{
+  ulonglong length;
+  char *ptr_save= *ptr;
+
+  /* not enough bytes to hold the length */
+  if (ptr_save >= end)
+    return true;
+
+  length= safe_net_field_length_ll((uchar **) ptr, end - ptr_save);
+
+  /* cannot even read the length */
+  if (*ptr == NULL)
+    return true;
+
+  /* length says there're more data than can fit into the packet */
+  if (*ptr + length > end)
+    return true;
+
+  /* impose an artificial length limit of 64k */
+  if (length > 65535)
+    return true;
+
+  if (PSI_CALL_set_thread_connect_attrs(*ptr, (uint)length, from_cs) &&
+      current_thd->variables.log_warnings)
+    sql_print_warning("Connection attributes of length %llu were truncated",
+                      length);
+  return false;
+}
+
+#endif
+
+/* the packet format is described in send_change_user_packet() */
+static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
+{
+  THD *thd= mpvio->auth_info.thd;
+  NET *net= &thd->net;
+  Security_context *sctx= thd->security_ctx;
+
+  char *user= (char*) net->read_pos;
+  char *end= user + packet_length;
+  /* Safe because there is always a trailing \0 at the end of the packet */
+  char *passwd= strend(user) + 1;
+  uint user_len= (uint)(passwd - user - 1);
+  char *db= passwd;
+  char db_buff[SAFE_NAME_LEN + 1];            // buffer to store db in utf8
+  char user_buff[USERNAME_LENGTH + 1];	      // buffer to store user in utf8
+  uint dummy_errors;
+  DBUG_ENTER ("parse_com_change_user_packet");
+
+  if (passwd >= end)
+  {
+    my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR),
+               MYF(0));
+    DBUG_RETURN (1);
+  }
+
+  /*
+    Old clients send null-terminated string as password; new clients send
+    the size (1 byte) + string (not null-terminated). Hence in case of empty
+    password both send '\0'.
+
+    This strlen() can't be easily deleted without changing protocol.
+
+    Cast *passwd to an unsigned char, so that it doesn't extend the sign for
+    *passwd > 127 and become 2**32-127+ after casting to uint.
+  */
+  uint passwd_len= (thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
+                    (uchar) (*passwd++) : (uint)strlen(passwd));
+
+  db+= passwd_len + 1;
+  /*
+    Database name is always NUL-terminated, so in case of empty database
+    the packet must contain at least the trailing '\0'.
+  */
+  if (db >= end)
+  {
+    my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR),
+               MYF(0));
+    DBUG_RETURN (1);
+  }
+
+  size_t db_len= strlen(db);
+
+  char *next_field= db + db_len + 1;
+
+  if (next_field + 1 < end)
+  {
+    if (thd_init_client_charset(thd, uint2korr(next_field)))
+      DBUG_RETURN(1);
+    next_field+= 2;
+  }
+
+  /* Convert database and user names to utf8 */
+  db_len= copy_and_convert(db_buff, sizeof(db_buff) - 1, system_charset_info,
+                           db, db_len, thd->charset(), &dummy_errors);
+
+  user_len= copy_and_convert(user_buff, sizeof(user_buff) - 1,
+                             system_charset_info, user, user_len,
+                             thd->charset(), &dummy_errors);
+
+  if (!(sctx->user= my_strndup(key_memory_MPVIO_EXT_auth_info, user_buff,
+                               user_len, MYF(MY_WME))))
+    DBUG_RETURN(1);
+
+  /* Clear variables that are allocated */
+  thd->user_connect= 0;
+  strmake_buf(sctx->priv_user, sctx->user);
+
+  if (thd->make_lex_string(&mpvio->db, db_buff, db_len) == 0)
+    DBUG_RETURN(1); /* The error is set by make_lex_string(). */
+
+  /*
+    Clear thd->db as it points to something, that will be freed when
+    connection is closed. We don't want to accidentally free a wrong
+    pointer if connect failed.
+  */
+  thd->reset_db(&null_clex_str);
+
+  if (!initialized)
+  {
+    // if mysqld's been started with --skip-grant-tables option
+    mpvio->status= MPVIO_EXT::SUCCESS;
+    DBUG_RETURN(0);
+  }
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  thd->password= passwd_len > 0;
+  if (find_mpvio_user(mpvio))
+    DBUG_RETURN(1);
+
+  const char *client_plugin;
+  if (thd->client_capabilities & CLIENT_PLUGIN_AUTH)
+  {
+    if (next_field >= end)
+    {
+      my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR),
+                 MYF(0));
+      DBUG_RETURN(1);
+    }
+    client_plugin= next_field;
+    next_field+= strlen(next_field) + 1;
+  }
+  else
+  {
+    if (thd->client_capabilities & CLIENT_SECURE_CONNECTION)
+      client_plugin= native_password_plugin_name.str;
+    else
+    {
+      /*
+        Normally old clients use old_password_plugin, but for
+        a passwordless accounts we use native_password_plugin.
+        See guess_auth_plugin().
+      */
+      client_plugin= passwd_len ? old_password_plugin_name.str
+                                : native_password_plugin_name.str;
+    }
+  }
+
+  if ((thd->client_capabilities & CLIENT_CONNECT_ATTRS) &&
+      read_client_connect_attrs(&next_field, end, thd->charset()))
+  {
+    my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR),
+               MYF(0));
+    DBUG_RETURN(1);
+  }
+
+  DBUG_PRINT("info", ("client_plugin=%s, restart", client_plugin));
+  /*
+    Remember the data part of the packet, to present it to plugin in
+    read_packet()
+  */
+  mpvio->cached_client_reply.pkt= passwd;
+  mpvio->cached_client_reply.pkt_len= passwd_len;
+  mpvio->cached_client_reply.plugin= client_plugin;
+  mpvio->status= MPVIO_EXT::RESTART;
+#endif
+
+  DBUG_RETURN (0);
+}
+
+
+/* the packet format is described in send_client_reply_packet() */
+static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
+                                           uchar **buff, ulong pkt_len)
+{
+#ifndef EMBEDDED_LIBRARY
+  THD *thd= mpvio->auth_info.thd;
+  NET *net= &thd->net;
+  char *end;
+  DBUG_ASSERT(mpvio->status == MPVIO_EXT::FAILURE);
+
+  if (pkt_len < MIN_HANDSHAKE_SIZE)
+    return packet_error;
+
+  /*
+    Protocol buffer is guaranteed to always end with \0. (see my_net_read())
+    As the code below depends on this, lets check that.
+  */
+  DBUG_ASSERT(net->read_pos[pkt_len] == 0);
+
+  ulonglong client_capabilities= uint2korr(net->read_pos);
+  compile_time_assert(sizeof(client_capabilities) >= 8);
+  if (client_capabilities & CLIENT_PROTOCOL_41)
+  {
+    if (pkt_len < 32)
+      return packet_error;
+    client_capabilities|= ((ulong) uint2korr(net->read_pos+2)) << 16;
+    if (!(client_capabilities & CLIENT_MYSQL))
+    {
+      // it is client with mariadb extensions
+      ulonglong ext_client_capabilities=
+        (((ulonglong)uint4korr(net->read_pos + 28)) << 32);
+      client_capabilities|= ext_client_capabilities;
+    }
+  }
+
+  /* Disable those bits which are not supported by the client. */
+  compile_time_assert(sizeof(thd->client_capabilities) >= 8);
+  thd->client_capabilities&= client_capabilities;
+
+  DBUG_PRINT("info", ("client capabilities: %llu", thd->client_capabilities));
+  if (thd->client_capabilities & CLIENT_SSL)
+  {
+    unsigned long errptr __attribute__((unused));
+
+    /* Do the SSL layering. */
+    if (!ssl_acceptor_fd)
+      return packet_error;
+
+    DBUG_PRINT("info", ("IO layer change in progress..."));
+    mysql_rwlock_rdlock(&LOCK_ssl_refresh);
+    int ssl_ret = sslaccept(ssl_acceptor_fd, net->vio, net->read_timeout, &errptr);
+    mysql_rwlock_unlock(&LOCK_ssl_refresh);
+    ssl_acceptor_stats_update(ssl_ret);
+
+    if(ssl_ret)
+    {
+      DBUG_PRINT("error", ("Failed to accept new SSL connection"));
+      return packet_error;
+    }
+
+    DBUG_PRINT("info", ("Reading user information over SSL layer"));
+    pkt_len= my_net_read(net);
+    if (unlikely(pkt_len == packet_error || pkt_len < NORMAL_HANDSHAKE_SIZE))
+    {
+      DBUG_PRINT("error", ("Failed to read user information (pkt_len= %lu)",
+			   pkt_len));
+      return packet_error;
+    }
+  }
+
+  if (client_capabilities & CLIENT_PROTOCOL_41)
+  {
+    thd->max_client_packet_length= uint4korr(net->read_pos+4);
+    DBUG_PRINT("info", ("client_character_set: %d", (uint) net->read_pos[8]));
+    if (thd_init_client_charset(thd, (uint) net->read_pos[8]))
+      return packet_error;
+    end= (char*) net->read_pos+32;
+  }
+  else
+  {
+    if (pkt_len < 5)
+      return packet_error;
+    thd->max_client_packet_length= uint3korr(net->read_pos+2);
+    end= (char*) net->read_pos+5;
+  }
+
+  if (end >= (char*) net->read_pos+ pkt_len +2)
+    return packet_error;
+
+  if (thd->client_capabilities & CLIENT_IGNORE_SPACE)
+    thd->variables.sql_mode|= MODE_IGNORE_SPACE;
+  if (thd->client_capabilities & CLIENT_INTERACTIVE)
+    thd->variables.net_wait_timeout= thd->variables.net_interactive_timeout;
+
+  if (end >= (char*) net->read_pos+ pkt_len +2)
+    return packet_error;
+
+  if ((thd->client_capabilities & CLIENT_TRANSACTIONS) &&
+      opt_using_transactions)
+    net->return_status= &thd->server_status;
+
+  char *user= end;
+  char *passwd= strend(user)+1;
+  size_t user_len= (size_t)(passwd - user - 1), db_len;
+  char *db= passwd;
+  char user_buff[USERNAME_LENGTH + 1];	// buffer to store user in utf8
+  uint dummy_errors;
+
+  /*
+    Old clients send null-terminated string as password; new clients send
+    the size (1 byte) + string (not null-terminated). Hence in case of empty
+    password both send '\0'.
+
+    This strlen() can't be easily deleted without changing protocol.
+
+    Cast *passwd to an unsigned char, so that it doesn't extend the sign for
+    *passwd > 127 and become 2**32-127+ after casting to uint.
+  */
+  ulonglong len;
+  size_t passwd_len;
+
+  if (!(thd->client_capabilities & CLIENT_SECURE_CONNECTION))
+    len= strlen(passwd);
+  else if (!(thd->client_capabilities & CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA))
+    len= (uchar)(*passwd++);
+  else
+  {
+    len= safe_net_field_length_ll((uchar**)&passwd,
+                                      net->read_pos + pkt_len - (uchar*)passwd);
+    if (len > pkt_len)
+      return packet_error;
+  }
+
+  passwd_len= (size_t)len;
+  db= thd->client_capabilities & CLIENT_CONNECT_WITH_DB ?
+    db + passwd_len + 1 : 0;
+
+  if (passwd == NULL ||
+      passwd + passwd_len + MY_TEST(db) > (char*) net->read_pos + pkt_len)
+    return packet_error;
+
+  /* strlen() can't be easily deleted without changing protocol */
+  db_len= safe_strlen(db);
+
+  char *next_field;
+  const char *client_plugin= next_field= passwd + passwd_len + (db ? db_len + 1 : 0);
+
+  /*
+    Since 4.1 all database names are stored in utf8
+    The cast is ok as copy_with_error will create a new area for db
+  */
+  if (unlikely(thd->copy_with_error(system_charset_info,
+                                    (LEX_STRING*) &mpvio->db,
+                                    thd->charset(), db, db_len)))
+    return packet_error;
+
+  user_len= copy_and_convert(user_buff, sizeof(user_buff) - 1,
+                             system_charset_info, user, user_len,
+                             thd->charset(), &dummy_errors);
+  user= user_buff;
+
+  /* If username starts and ends in "'", chop them off */
+  if (user_len > 1 && user[0] == '\'' && user[user_len - 1] == '\'')
+  {
+    user++;
+    user_len-= 2;
+  }
+
+  /*
+    Clip username to allowed length in characters (not bytes).  This is
+    mostly for backward compatibility (to truncate long usernames, as
+    old 5.1 did)
+  */
+  user_len= Well_formed_prefix(system_charset_info, user, user_len,
+                               username_char_length).length();
+  user[user_len]= '\0';
+
+  Security_context *sctx= thd->security_ctx;
+
+  my_free(const_cast(sctx->user));
+  if (!(sctx->user= my_strndup(key_memory_MPVIO_EXT_auth_info, user, user_len, MYF(MY_WME))))
+    return packet_error; /* The error is set by my_strdup(). */
+
+
+  /*
+    Clear thd->db as it points to something, that will be freed when
+    connection is closed. We don't want to accidentally free a wrong
+    pointer if connect failed.
+  */
+  thd->reset_db(&null_clex_str);
+
+  if (!initialized)
+  {
+    // if mysqld's been started with --skip-grant-tables option
+    mpvio->status= MPVIO_EXT::SUCCESS;
+    return packet_error;
+  }
+
+  thd->password= passwd_len > 0;
+  if (find_mpvio_user(mpvio))
+    return packet_error;
+
+  if ((thd->client_capabilities & CLIENT_PLUGIN_AUTH) &&
+      (client_plugin < (char *)net->read_pos + pkt_len))
+  {
+    next_field+= strlen(next_field) + 1;
+  }
+  else
+  {
+    /* Some clients lie. Sad, but true */
+    thd->client_capabilities &= ~CLIENT_PLUGIN_AUTH;
+
+    if (thd->client_capabilities & CLIENT_SECURE_CONNECTION)
+      client_plugin= native_password_plugin_name.str;
+    else
+    {
+      /*
+        Normally old clients use old_password_plugin, but for
+        a passwordless accounts we use native_password_plugin.
+        See guess_auth_plugin().
+      */
+      client_plugin= passwd_len ? old_password_plugin_name.str
+                                : native_password_plugin_name.str;
+    }
+  }
+
+  if ((thd->client_capabilities & CLIENT_CONNECT_ATTRS) &&
+      read_client_connect_attrs(&next_field, ((char *)net->read_pos) + pkt_len,
+                                mpvio->auth_info.thd->charset()))
+    return packet_error;
+
+  /*
+    if the acl_user needs a different plugin to authenticate
+    (specified in GRANT ... AUTHENTICATED VIA plugin_name ..)
+    we need to restart the authentication in the server.
+    But perhaps the client has already used the correct plugin -
+    in that case the authentication on the client may not need to be
+    restarted and a server auth plugin will read the data that the client
+    has just send. Cache them to return in the next server_mpvio_read_packet().
+  */
+  if (!lex_string_eq(&mpvio->acl_user->auth->plugin, plugin_name(mpvio->plugin)))
+  {
+    mpvio->cached_client_reply.pkt= passwd;
+    mpvio->cached_client_reply.pkt_len= (uint)passwd_len;
+    mpvio->cached_client_reply.plugin= client_plugin;
+    mpvio->status= MPVIO_EXT::RESTART;
+    return packet_error;
+  }
+
+  /*
+    ok, we don't need to restart the authentication on the server.
+    but if the client used the wrong plugin, we need to restart
+    the authentication on the client. Do it here, the server plugin
+    doesn't need to know.
+  */
+  const char *client_auth_plugin=
+    ((st_mysql_auth *) (plugin_decl(mpvio->plugin)->info))->client_auth_plugin;
+
+  if (client_auth_plugin &&
+      my_strcasecmp(system_charset_info, client_plugin, client_auth_plugin))
+  {
+    mpvio->cached_client_reply.plugin= client_plugin;
+    if (send_plugin_request_packet(mpvio,
+                                   (uchar*) mpvio->cached_server_packet.pkt,
+                                   mpvio->cached_server_packet.pkt_len))
+      return packet_error;
+
+    passwd_len= my_net_read(&thd->net);
+    passwd= (char*)thd->net.read_pos;
+  }
+
+  *buff= (uchar*) passwd;
+  return (ulong)passwd_len;
+#else
+  return 0;
+#endif
+}
+
+
+/**
+  vio->write_packet() callback method for server authentication plugins
+
+  This function is called by a server authentication plugin, when it wants
+  to send data to the client.
+
+  It transparently wraps the data into a handshake packet,
+  and handles plugin negotiation with the client. If necessary,
+  it escapes the plugin data, if it starts with a mysql protocol packet byte.
+*/
+static int server_mpvio_write_packet(MYSQL_PLUGIN_VIO *param,
+                                   const uchar *packet, int packet_len)
+{
+  MPVIO_EXT *mpvio= (MPVIO_EXT *) param;
+  int res;
+  DBUG_ENTER("server_mpvio_write_packet");
+
+  /* reset cached_client_reply */
+  mpvio->cached_client_reply.pkt= 0;
+
+  /* for the 1st packet we wrap plugin data into the handshake packet */
+  if (mpvio->packets_written == 0)
+    res= send_server_handshake_packet(mpvio, (char*) packet, packet_len);
+  else if (mpvio->status == MPVIO_EXT::RESTART)
+    res= send_plugin_request_packet(mpvio, packet, packet_len);
+  else if (packet_len > 0 && (*packet == 1 || *packet == 255 || *packet == 254))
+  {
+    /*
+      we cannot allow plugin data packet to start from 255 or 254 -
+      as the client will treat it as an error or "change plugin" packet.
+      We'll escape these bytes with \1. Consequently, we
+      have to escape \1 byte too.
+    */
+    res= net_write_command(&mpvio->auth_info.thd->net, 1, (uchar*)"", 0,
+                           packet, packet_len);
+  }
+  else
+  {
+    res= my_net_write(&mpvio->auth_info.thd->net, packet, packet_len) ||
+         net_flush(&mpvio->auth_info.thd->net);
+  }
+  mpvio->status= MPVIO_EXT::FAILURE; // the status is no longer RESTART
+  mpvio->packets_written++;
+  DBUG_RETURN(res);
+}
+
+/**
+  vio->read_packet() callback method for server authentication plugins
+
+  This function is called by a server authentication plugin, when it wants
+  to read data from the client.
+
+  It transparently extracts the client plugin data, if embedded into
+  a client authentication handshake packet, and handles plugin negotiation
+  with the client, if necessary.
+*/
+static int server_mpvio_read_packet(MYSQL_PLUGIN_VIO *param, uchar **buf)
+{
+  MPVIO_EXT * const mpvio= (MPVIO_EXT *) param;
+  MYSQL_SERVER_AUTH_INFO * const ai= &mpvio->auth_info;
+  ulong pkt_len;
+  DBUG_ENTER("server_mpvio_read_packet");
+  if (mpvio->status == MPVIO_EXT::RESTART)
+  {
+    const char *client_auth_plugin=
+      ((st_mysql_auth *) (plugin_decl(mpvio->plugin)->info))->client_auth_plugin;
+    if (client_auth_plugin == 0)
+    {
+      mpvio->status= MPVIO_EXT::FAILURE;
+      pkt_len= 0;
+      *buf= 0;
+      goto done;
+    }
+
+    if (mpvio->cached_client_reply.pkt)
+    {
+      DBUG_ASSERT(mpvio->packets_read > 0);
+      /*
+        if the have the data cached from the last server_mpvio_read_packet
+        (which can be the case if it's a restarted authentication)
+        and a client has used the correct plugin, then we can return the
+        cached data straight away and avoid one round trip.
+      */
+      if (my_strcasecmp(system_charset_info, mpvio->cached_client_reply.plugin,
+                        client_auth_plugin) == 0)
+      {
+        mpvio->status= MPVIO_EXT::FAILURE;
+        pkt_len= mpvio->cached_client_reply.pkt_len;
+        *buf= (uchar*) mpvio->cached_client_reply.pkt;
+        mpvio->packets_read++;
+        goto done;
+      }
+    }
+
+    /*
+      plugin wants to read the data without sending anything first.
+      send an empty packet to force a server handshake packet to be sent
+    */
+    if (server_mpvio_write_packet(mpvio, 0, 0))
+      pkt_len= packet_error;
+    else
+      pkt_len= my_net_read(&ai->thd->net);
+  }
+  else
+    pkt_len= my_net_read(&ai->thd->net);
+
+  if (unlikely(pkt_len == packet_error))
+    goto err;
+
+  mpvio->packets_read++;
+
+  /*
+    the 1st packet has the plugin data wrapped into the client authentication
+    handshake packet
+  */
+  if (mpvio->packets_read == 1)
+  {
+    pkt_len= parse_client_handshake_packet(mpvio, buf, pkt_len);
+    if (unlikely(pkt_len == packet_error))
+      goto err;
+  }
+  else
+    *buf= ai->thd->net.read_pos;
+
+done:
+  if (set_user_salt_if_needed(mpvio->acl_user, mpvio->curr_auth, mpvio->plugin))
+  {
+    ai->thd->clear_error(); // authenticating user should not see these errors
+    my_error(ER_ACCESS_DENIED_ERROR, MYF(0), ai->thd->security_ctx->user,
+             ai->thd->security_ctx->host_or_ip, ER_THD(ai->thd, ER_YES));
+    goto err;
+  }
+
+  ai->user_name= ai->thd->security_ctx->user;
+  ai->user_name_length= (uint) strlen(ai->user_name);
+  ai->auth_string= mpvio->acl_user->auth[mpvio->curr_auth].salt.str;
+  ai->auth_string_length= (ulong) mpvio->acl_user->auth[mpvio->curr_auth].salt.length;
+  strmake_buf(ai->authenticated_as, mpvio->acl_user->user.str);
+
+  DBUG_RETURN((int)pkt_len);
+
+err:
+  if (mpvio->status == MPVIO_EXT::FAILURE)
+  {
+    if (!ai->thd->is_error())
+      my_error(ER_HANDSHAKE_ERROR, MYF(0));
+  }
+  DBUG_RETURN(-1);
+}
+
+/**
+  fills MYSQL_PLUGIN_VIO_INFO structure with the information about the
+  connection
+*/
+static void server_mpvio_info(MYSQL_PLUGIN_VIO *vio,
+                              MYSQL_PLUGIN_VIO_INFO *info)
+{
+  MPVIO_EXT *mpvio= (MPVIO_EXT *) vio;
+  mpvio_info(mpvio->auth_info.thd->net.vio, info);
+}
+
+static bool acl_check_ssl(THD *thd, const ACL_USER *acl_user)
+{
+  Vio *vio= thd->net.vio;
+#ifdef HAVE_OPENSSL
+  SSL *ssl= (SSL *) vio->ssl_arg;
+  X509 *cert;
+#endif
+
+  /*
+    At this point we know that user is allowed to connect
+    from given host by given username/password pair. Now
+    we check if SSL is required, if user is using SSL and
+    if X509 certificate attributes are OK
+  */
+  switch (acl_user->ssl_type) {
+  case SSL_TYPE_NOT_SPECIFIED:                  // Impossible
+  case SSL_TYPE_NONE:                           // SSL is not required
+    if (opt_require_secure_transport)
+    {
+      enum enum_vio_type type= vio_type(vio);
+#ifdef HAVE_OPENSSL
+      return type != VIO_TYPE_SSL &&
+#ifndef _WIN32
+             type != VIO_TYPE_SOCKET;
+#else
+             type != VIO_TYPE_NAMEDPIPE;
+#endif
+#else
+#ifndef _WIN32
+      return type != VIO_TYPE_SOCKET;
+#else
+      return type != VIO_TYPE_NAMEDPIPE;
+#endif
+#endif
+    }
+    return 0;
+#ifdef HAVE_OPENSSL
+  case SSL_TYPE_ANY:                            // Any kind of SSL is ok
+    return vio_type(vio) != VIO_TYPE_SSL;
+  case SSL_TYPE_X509: /* Client should have any valid certificate. */
+    /*
+      Connections with non-valid certificates are dropped already
+      in sslaccept() anyway, so we do not check validity here.
+
+      We need to check for absence of SSL because without SSL
+      we should reject connection.
+    */
+    if (vio_type(vio) == VIO_TYPE_SSL &&
+        SSL_get_verify_result(ssl) == X509_V_OK &&
+        (cert= SSL_get_peer_certificate(ssl)))
+    {
+      X509_free(cert);
+      return 0;
+    }
+    return 1;
+  case SSL_TYPE_SPECIFIED: /* Client should have specified attrib */
+    /* If a cipher name is specified, we compare it to actual cipher in use. */
+    if (vio_type(vio) != VIO_TYPE_SSL ||
+        SSL_get_verify_result(ssl) != X509_V_OK)
+      return 1;
+    if (acl_user->ssl_cipher)
+    {
+      const char *ssl_cipher= SSL_get_cipher(ssl);
+      DBUG_PRINT("info", ("comparing ciphers: '%s' and '%s'",
+                         acl_user->ssl_cipher, ssl_cipher));
+      if (strcmp(acl_user->ssl_cipher, ssl_cipher))
+      {
+        if (global_system_variables.log_warnings)
+          sql_print_information("X509 ciphers mismatch: should be '%s' but is '%s'",
+                            acl_user->ssl_cipher, ssl_cipher);
+        return 1;
+      }
+    }
+    if (!acl_user->x509_issuer[0] && !acl_user->x509_subject[0])
+      return 0; // all done
+
+    /* Prepare certificate (if exists) */
+    if (!(cert= SSL_get_peer_certificate(ssl)))
+      return 1;
+    /* If X509 issuer is specified, we check it... */
+    if (acl_user->x509_issuer[0])
+    {
+      char *ptr= X509_NAME_oneline(X509_get_issuer_name(cert), 0, 0);
+      DBUG_PRINT("info", ("comparing issuers: '%s' and '%s'",
+                         acl_user->x509_issuer, ptr));
+      if (strcmp(acl_user->x509_issuer, ptr))
+      {
+        if (global_system_variables.log_warnings)
+          sql_print_information("X509 issuer mismatch: should be '%s' "
+                            "but is '%s'", acl_user->x509_issuer, ptr);
+        OPENSSL_free(ptr);
+        X509_free(cert);
+        return 1;
+      }
+      OPENSSL_free(ptr);
+    }
+    /* X509 subject is specified, we check it .. */
+    if (acl_user->x509_subject[0])
+    {
+      char *ptr= X509_NAME_oneline(X509_get_subject_name(cert), 0, 0);
+      DBUG_PRINT("info", ("comparing subjects: '%s' and '%s'",
+                         acl_user->x509_subject, ptr));
+      if (strcmp(acl_user->x509_subject, ptr))
+      {
+        if (global_system_variables.log_warnings)
+          sql_print_information("X509 subject mismatch: should be '%s' but is '%s'",
+                          acl_user->x509_subject, ptr);
+        OPENSSL_free(ptr);
+        X509_free(cert);
+        return 1;
+      }
+      OPENSSL_free(ptr);
+    }
+    X509_free(cert);
+    return 0;
+#else  /* HAVE_OPENSSL */
+  default:
+    /*
+      If we don't have SSL but SSL is required for this user the
+      authentication should fail.
+    */
+    return 1;
+#endif /* HAVE_OPENSSL */
+  }
+  return 1;
+}
+
+
+static int do_auth_once(THD *thd, const LEX_CSTRING *auth_plugin_name,
+                        MPVIO_EXT *mpvio)
+{
+  int res= CR_OK;
+  bool unlock_plugin= false;
+  plugin_ref plugin= get_auth_plugin(thd, *auth_plugin_name, &unlock_plugin);
+
+  mpvio->plugin= plugin;
+  mpvio->auth_info.user_name= NULL;
+
+  if (plugin)
+  {
+    st_mysql_auth *info= (st_mysql_auth *) plugin_decl(plugin)->info;
+    switch (info->interface_version >> 8) {
+    case 0x02:
+      res= info->authenticate_user(mpvio, &mpvio->auth_info);
+      break;
+    case 0x01:
+      {
+        MYSQL_SERVER_AUTH_INFO_0x0100 compat;
+        compat.downgrade(&mpvio->auth_info);
+        res= info->authenticate_user(mpvio, (MYSQL_SERVER_AUTH_INFO *)&compat);
+        compat.upgrade(&mpvio->auth_info);
+      }
+      break;
+    default: DBUG_ASSERT(0);
+    }
+
+    if (unlock_plugin)
+      plugin_unlock(thd, plugin);
+  }
+  else
+  {
+    /* Server cannot load the required plugin. */
+    Host_errors errors;
+    errors.m_no_auth_plugin= 1;
+    inc_host_errors(mpvio->auth_info.thd->security_ctx->ip, &errors);
+    my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), auth_plugin_name->str);
+    res= CR_ERROR;
+  }
+
+  return res;
+}
+
+enum PASSWD_ERROR_ACTION
+{
+  PASSWD_ERROR_CLEAR,
+  PASSWD_ERROR_INCREMENT
+};
+
+/* Increment, or clear password errors for a user. */
+static void handle_password_errors(const char *user, const char *hostname, PASSWD_ERROR_ACTION action)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  mysql_mutex_assert_not_owner(&acl_cache->lock);
+  mysql_mutex_lock(&acl_cache->lock);
+  ACL_USER *u = find_user_exact(hostname, user);
+  if (u)
+  {
+    switch(action)
+    {
+      case PASSWD_ERROR_INCREMENT:
+        u->password_errors++;
+        break;
+      case PASSWD_ERROR_CLEAR:
+        u->password_errors= 0;
+        break;
+      default:
+        DBUG_ASSERT(0);
+        break;
+    }
+  }
+  mysql_mutex_unlock(&acl_cache->lock);
+#endif
+}
+
+static bool check_password_lifetime(THD *thd, const ACL_USER &acl_user)
+{
+  /* the password should never expire */
+  if (!acl_user.password_lifetime)
+    return false;
+
+  longlong interval= acl_user.password_lifetime;
+  if (interval < 0)
+  {
+    interval= default_password_lifetime;
+
+    /* default global policy applies, and that is password never expires */
+    if (!interval)
+      return false;
+  }
+
+  thd->set_time();
+
+  if ((thd->query_start() - acl_user.password_last_changed)/3600/24 >= interval)
+    return true;
+
+  return false;
+}
+
+/**
+  Perform the handshake, authorize the client and update thd sctx variables.
+
+  @param thd                     thread handle
+  @param com_change_user_pkt_len size of the COM_CHANGE_USER packet
+                                 (without the first, command, byte) or 0
+                                 if it's not a COM_CHANGE_USER (that is, if
+                                 it's a new connection)
+
+  @retval 0  success, thd is updated.
+  @retval 1  error
+*/
+bool acl_authenticate(THD *thd, uint com_change_user_pkt_len)
+{
+  int res= CR_OK;
+  MPVIO_EXT mpvio;
+  enum  enum_server_command command= com_change_user_pkt_len ? COM_CHANGE_USER
+                                                             : COM_CONNECT;
+  DBUG_ENTER("acl_authenticate");
+
+  bzero(&mpvio, sizeof(mpvio));
+  mpvio.read_packet= server_mpvio_read_packet;
+  mpvio.write_packet= server_mpvio_write_packet;
+  mpvio.cached_client_reply.plugin= "";
+  mpvio.info= server_mpvio_info;
+  mpvio.status= MPVIO_EXT::RESTART;
+  mpvio.auth_info.thd= thd;
+  mpvio.auth_info.host_or_ip= thd->security_ctx->host_or_ip;
+  mpvio.auth_info.host_or_ip_length=
+    (unsigned int) strlen(thd->security_ctx->host_or_ip);
+
+  DBUG_PRINT("info", ("com_change_user_pkt_len=%u", com_change_user_pkt_len));
+
+  if (command == COM_CHANGE_USER)
+  {
+    mpvio.packets_written++; // pretend that a server handshake packet was sent
+    mpvio.packets_read++;    // take COM_CHANGE_USER packet into account
+
+    if (parse_com_change_user_packet(&mpvio, com_change_user_pkt_len))
+      DBUG_RETURN(1);
+
+    res= mpvio.status ==  MPVIO_EXT::SUCCESS ? CR_OK : CR_ERROR;
+
+    DBUG_ASSERT(mpvio.status == MPVIO_EXT::RESTART ||
+                mpvio.status == MPVIO_EXT::SUCCESS);
+  }
+  else
+  {
+    /* mark the thd as having no scramble yet */
+    thd->scramble[SCRAMBLE_LENGTH]= 1;
+
+    /*
+      perform the first authentication attempt, with the default plugin.
+      This sends the server handshake packet, reads the client reply
+      with a user name, and performs the authentication if everyone has used
+      the correct plugin.
+    */
+
+    res= do_auth_once(thd, default_auth_plugin_name, &mpvio);
+  }
+
+  PSI_CALL_set_connection_type(vio_type(thd->net.vio));
+
+  Security_context * const sctx= thd->security_ctx;
+  const ACL_USER * acl_user= mpvio.acl_user;
+  if (!acl_user)
+    statistic_increment(aborted_connects_preauth, &LOCK_status);
+
+  if (acl_user)
+  {
+    /*
+      retry the authentication with curr_auth==0 if after receiving the user
+      name we found that we need to switch to a non-default plugin
+    */
+    for (mpvio.curr_auth= mpvio.status != MPVIO_EXT::RESTART;
+         res != CR_OK && mpvio.curr_auth < acl_user->nauth;
+         mpvio.curr_auth++)
+    {
+      thd->clear_error();
+      mpvio.status= MPVIO_EXT::RESTART;
+      res= do_auth_once(thd, &acl_user->auth[mpvio.curr_auth].plugin, &mpvio);
+    }
+  }
+
+  if (mpvio.make_it_fail && res == CR_OK)
+  {
+    mpvio.status= MPVIO_EXT::FAILURE;
+    res= CR_ERROR;
+  }
+
+  thd->password= mpvio.auth_info.password_used;  // remember for error messages
+
+  /*
+    Log the command here so that the user can check the log
+    for the tried logins and also to detect break-in attempts.
+
+    if sctx->user is unset it's protocol failure, bad packet.
+  */
+  if (sctx->user)
+  {
+    general_log_print(thd, command, (char*) "%s@%s on %s using %s",
+                      sctx->user, sctx->host_or_ip,
+                      safe_str(mpvio.db.str), safe_vio_type_name(thd->net.vio));
+  }
+
+  if (res > CR_OK && mpvio.status != MPVIO_EXT::SUCCESS)
+  {
+    Host_errors errors;
+    switch (res)
+    {
+    case CR_AUTH_PLUGIN_ERROR:
+      errors.m_auth_plugin= 1;
+      break;
+    case CR_AUTH_HANDSHAKE:
+      errors.m_handshake= 1;
+      break;
+    case CR_AUTH_USER_CREDENTIALS:
+      errors.m_authentication= 1;
+      if (thd->password && !mpvio.make_it_fail)
+        handle_password_errors(acl_user->user.str, acl_user->host.hostname, PASSWD_ERROR_INCREMENT);
+      break;
+    case CR_ERROR:
+    default:
+      /* Unknown of unspecified auth plugin error. */
+      errors.m_auth_plugin= 1;
+      break;
+    }
+    inc_host_errors(mpvio.auth_info.thd->security_ctx->ip, &errors);
+    if (!thd->is_error())
+      login_failed_error(thd);
+    DBUG_RETURN(1);
+  }
+
+  sctx->proxy_user[0]= 0;
+  if (thd->password && acl_user->password_errors)
+  {
+    /* Login succeeded, clear password errors.*/
+    handle_password_errors(acl_user->user.str, acl_user->host.hostname, PASSWD_ERROR_CLEAR);
+  }
+
+  if (initialized) // if not --skip-grant-tables
+  {
+    /*
+      OK. Let's check the SSL. Historically it was checked after the password,
+      as an additional layer, not instead of the password
+      (in which case it would've been a plugin too).
+    */
+    if (acl_check_ssl(thd, acl_user))
+    {
+      Host_errors errors;
+      errors.m_ssl= 1;
+      inc_host_errors(mpvio.auth_info.thd->security_ctx->ip, &errors);
+      login_failed_error(thd);
+      DBUG_RETURN(1);
+    }
+
+    if (acl_user->account_locked) {
+      status_var_increment(denied_connections);
+      my_error(ER_ACCOUNT_HAS_BEEN_LOCKED, MYF(0));
+      DBUG_RETURN(1);
+    }
+
+    bool client_can_handle_exp_pass= thd->client_capabilities &
+                                     CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS;
+    bool password_expired= thd->password != PASSWORD_USED_NO_MENTION
+                           && (acl_user->password_expired ||
+                               check_password_lifetime(thd, *acl_user));
+
+    if (!client_can_handle_exp_pass && disconnect_on_expired_password &&
+        password_expired)
+    {
+      status_var_increment(denied_connections);
+      my_error(ER_MUST_CHANGE_PASSWORD_LOGIN, MYF(0));
+      DBUG_RETURN(1);
+    }
+
+    sctx->password_expired= password_expired;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    if (!password_expired)
+    {
+      bool is_proxy_user= FALSE;
+      const char *auth_user = acl_user->user.str;
+      ACL_PROXY_USER *proxy_user;
+      /* check if the user is allowed to proxy as another user */
+      proxy_user= acl_find_proxy_user(auth_user, sctx->host, sctx->ip,
+                                      mpvio.auth_info.authenticated_as,
+                                            &is_proxy_user);
+      if (is_proxy_user)
+      {
+        ACL_USER *acl_proxy_user;
+
+        /* we need to find the proxy user, but there was none */
+        if (!proxy_user)
+        {
+          Host_errors errors;
+          errors.m_proxy_user= 1;
+          inc_host_errors(mpvio.auth_info.thd->security_ctx->ip, &errors);
+          if (!thd->is_error())
+            login_failed_error(thd);
+          DBUG_RETURN(1);
+        }
+
+        my_snprintf(sctx->proxy_user, sizeof(sctx->proxy_user) - 1,
+                    "'%s'@'%s'", auth_user,
+                    safe_str(acl_user->host.hostname));
+
+        /* we're proxying : find the proxy user definition */
+        mysql_mutex_lock(&acl_cache->lock);
+        acl_proxy_user= find_user_exact(safe_str(proxy_user->get_proxied_host()),
+                                       mpvio.auth_info.authenticated_as);
+        if (!acl_proxy_user)
+        {
+          mysql_mutex_unlock(&acl_cache->lock);
+
+          Host_errors errors;
+          errors.m_proxy_user_acl= 1;
+          inc_host_errors(mpvio.auth_info.thd->security_ctx->ip, &errors);
+          if (!thd->is_error())
+            login_failed_error(thd);
+          DBUG_RETURN(1);
+        }
+        acl_user= acl_proxy_user->copy(thd->mem_root);
+        mysql_mutex_unlock(&acl_cache->lock);
+      }
+    }
+#endif
+
+    sctx->master_access= (acl_user->access | public_access());
+    strmake_buf(sctx->priv_user, acl_user->user.str);
+
+    if (acl_user->host.hostname)
+      strmake_buf(sctx->priv_host, acl_user->host.hostname);
+    else
+      *sctx->priv_host= 0;
+
+
+    /*
+      Don't allow the user to connect if he has done too many queries.
+      As we are testing max_user_connections == 0 here, it means that we
+      can't let the user change max_user_connections from 0 in the server
+      without a restart as it would lead to wrong connect counting.
+    */
+    if ((acl_user->user_resource.questions ||
+         acl_user->user_resource.updates ||
+         acl_user->user_resource.conn_per_hour ||
+         acl_user->user_resource.user_conn ||
+         acl_user->user_resource.max_statement_time != 0.0 ||
+         max_user_connections_checking) &&
+         get_or_create_user_conn(thd,
+           (opt_old_style_user_limits ? sctx->user : sctx->priv_user),
+           (opt_old_style_user_limits ? sctx->host_or_ip : sctx->priv_host),
+           &acl_user->user_resource))
+      DBUG_RETURN(1); // The error is set by get_or_create_user_conn()
+
+    if (acl_user->user_resource.max_statement_time != 0.0)
+    {
+      thd->variables.max_statement_time_double=
+        acl_user->user_resource.max_statement_time;
+      thd->variables.max_statement_time=
+        (ulonglong) (thd->variables.max_statement_time_double * 1e6 + 0.1);
+    }
+  }
+  else
+    sctx->skip_grants();
+
+  if (thd->user_connect &&
+      (thd->user_connect->user_resources.conn_per_hour ||
+       thd->user_connect->user_resources.user_conn ||
+       max_user_connections_checking) &&
+       check_for_max_user_connections(thd, thd->user_connect))
+  {
+    /* Ensure we don't decrement thd->user_connections->connections twice */
+    thd->user_connect= 0;
+    status_var_increment(denied_connections);
+    DBUG_RETURN(1); // The error is set in check_for_max_user_connections()
+  }
+
+  DBUG_PRINT("info",
+             ("Capabilities: %llu  packet_length: %ld  Host: '%s'  "
+              "Login user: '%s' Priv_user: '%s'  Using password: %s "
+              "Access: %llx  db: '%s'",
+              thd->client_capabilities, thd->max_client_packet_length,
+              sctx->host_or_ip, sctx->user, sctx->priv_user,
+              thd->password ? "yes": "no",
+              (longlong) sctx->master_access, mpvio.db.str));
+
+  if (command == COM_CONNECT &&
+      !(thd->main_security_ctx.master_access & PRIV_IGNORE_MAX_CONNECTIONS))
+  {
+    if (*thd->scheduler->connection_count > *thd->scheduler->max_connections)
+    {                                         // too many connections
+      my_error(ER_CON_COUNT_ERROR, MYF(0));
+      DBUG_RETURN(1);
+    }
+  }
+
+  /*
+    This is the default access rights for the current database.  It's
+    set to 0 here because we don't have an active database yet (and we
+    may not have an active database to set.
+  */
+  sctx->db_access= NO_ACL;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  /*
+    In case the user has a default role set, attempt to set that role
+  */
+  if (initialized && acl_user->default_rolename.length) {
+    privilege_t access(NO_ACL);
+    int result;
+    result= acl_check_setrole(thd, acl_user->default_rolename.str, &access);
+    if (!result)
+      result= acl_setrole(thd, acl_user->default_rolename.str, access);
+    if (result)
+      thd->clear_error(); // even if the default role was not granted, do not
+                          // close the connection
+  }
+#endif
+
+  /* Change a database if necessary */
+  if (mpvio.db.length)
+  {
+    uint err = mysql_change_db(thd, &mpvio.db, FALSE);
+    if(err)
+    {
+      if (err == ER_DBACCESS_DENIED_ERROR)
+      {
+        /*
+          Got an "access denied" error, which must be handled
+          other access denied errors (see login_failed_error()).
+          mysql_change_db() already sent error to client, and
+          wrote to general log, we only need to increment the counter
+          and maybe write a warning to error log.
+        */
+        status_var_increment(thd->status_var.access_denied_errors);
+        if (global_system_variables.log_warnings > 1)
+        {
+          Security_context* sctx = thd->security_ctx;
+          sql_print_warning(ER_THD(thd, err),
+            sctx->priv_user, sctx->priv_host, mpvio.db.str);
+        }
+      }
+      DBUG_RETURN(1);
+    }
+  }
+
+  thd->net.net_skip_rest_factor= 2;  // skip at most 2*max_packet_size
+
+  if (mpvio.auth_info.external_user[0])
+    sctx->external_user= my_strdup(key_memory_MPVIO_EXT_auth_info,
+                                   mpvio.auth_info.external_user, MYF(0));
+
+  if (res == CR_OK_HANDSHAKE_COMPLETE)
+    thd->get_stmt_da()->disable_status();
+  else
+    my_ok(thd);
+
+  PSI_CALL_set_thread_account
+    (thd->main_security_ctx.user, static_cast(strlen(thd->main_security_ctx.user)),
+    thd->main_security_ctx.host_or_ip, static_cast(strlen(thd->main_security_ctx.host_or_ip)));
+
+  /* Ready to handle queries */
+  DBUG_RETURN(0);
+}
+
+/**
+  MySQL Server Password Authentication Plugin
+
+  In the MySQL authentication protocol:
+  1. the server sends the random scramble to the client
+  2. client sends the encrypted password back to the server
+  3. the server checks the password.
+*/
+static int native_password_authenticate(MYSQL_PLUGIN_VIO *vio,
+                                        MYSQL_SERVER_AUTH_INFO *info)
+{
+  uchar *pkt;
+  int pkt_len;
+  MPVIO_EXT *mpvio= (MPVIO_EXT *) vio;
+  THD *thd=info->thd;
+  DBUG_ENTER("native_password_authenticate");
+
+  /* generate the scramble, or reuse the old one */
+  if (thd->scramble[SCRAMBLE_LENGTH])
+    thd_create_random_password(thd, thd->scramble, SCRAMBLE_LENGTH);
+
+  /* and send it to the client */
+  if (mpvio->write_packet(mpvio, (uchar*)thd->scramble, SCRAMBLE_LENGTH + 1))
+    DBUG_RETURN(CR_AUTH_HANDSHAKE);
+
+  /* reply and authenticate */
+
+  /*
+    
+      This is more complex than it looks.
+
+      The plugin (we) may be called right after the client was connected -
+      and will need to send a scramble, read reply, authenticate.
+
+      Or the plugin may be called after another plugin has sent a scramble,
+      and read the reply. If the client has used the correct client-plugin,
+      we won't need to read anything here from the client, the client
+      has already sent a reply with everything we need for authentication.
+
+      Or the plugin may be called after another plugin has sent a scramble,
+      and read the reply, but the client has used the wrong client-plugin.
+      We'll need to sent a "switch to another plugin" packet to the
+      client and read the reply. "Use the short scramble" packet is a special
+      case of "switch to another plugin" packet.
+
+      Or, perhaps, the plugin may be called after another plugin has
+      done the handshake but did not send a useful scramble. We'll need
+      to send a scramble (and perhaps a "switch to another plugin" packet)
+      and read the reply.
+
+      Besides, a client may be an old one, that doesn't understand plugins.
+      Or doesn't even understand 4.0 scramble.
+
+      And we want to keep the same protocol on the wire  unless non-native
+      plugins are involved.
+
+      Anyway, it still looks simple from a plugin point of view:
+      "send the scramble, read the reply and authenticate".
+      All the magic is transparently handled by the server.
+    
+  */
+
+  /* read the reply with the encrypted password */
+  if ((pkt_len= mpvio->read_packet(mpvio, &pkt)) < 0)
+    DBUG_RETURN(CR_AUTH_HANDSHAKE);
+  DBUG_PRINT("info", ("reply read : pkt_len=%d", pkt_len));
+
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+  DBUG_RETURN(CR_OK);
+#endif
+
+  DBUG_EXECUTE_IF("native_password_bad_reply", { pkt_len= 12; });
+
+  if (pkt_len == 0) /* no password */
+    DBUG_RETURN(info->auth_string_length != 0
+                ? CR_AUTH_USER_CREDENTIALS : CR_OK);
+
+  info->password_used= PASSWORD_USED_YES;
+  if (pkt_len == SCRAMBLE_LENGTH)
+  {
+    if (info->auth_string_length != SCRAMBLE_LENGTH)
+      DBUG_RETURN(CR_AUTH_USER_CREDENTIALS);
+
+    if (check_scramble(pkt, thd->scramble, (uchar*)info->auth_string))
+      DBUG_RETURN(CR_AUTH_USER_CREDENTIALS);
+    else
+      DBUG_RETURN(CR_OK);
+  }
+
+  my_error(ER_HANDSHAKE_ERROR, MYF(0));
+  DBUG_RETURN(CR_AUTH_HANDSHAKE);
+}
+
+static int native_password_make_scramble(const char *password,
+                      size_t password_length, char *hash, size_t *hash_length)
+{
+  DBUG_ASSERT(*hash_length >= SCRAMBLED_PASSWORD_CHAR_LENGTH);
+  if (password_length == 0)
+    *hash_length= 0;
+  else
+  {
+    *hash_length= SCRAMBLED_PASSWORD_CHAR_LENGTH;
+    my_make_scrambled_password(hash, password, password_length);
+  }
+  return 0;
+}
+
+/* As this contains is a string of not a valid SCRAMBLE_LENGTH */
+static const char invalid_password[] = "*THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE";
+
+static int native_password_get_salt(const char *hash, size_t hash_length,
+                                    unsigned char *out, size_t *out_length)
+{
+  DBUG_ASSERT(sizeof(invalid_password) > SCRAMBLE_LENGTH);
+  DBUG_ASSERT(*out_length >= SCRAMBLE_LENGTH);
+  DBUG_ASSERT(*out_length >= sizeof(invalid_password));
+  if (hash_length == 0)
+  {
+    *out_length= 0;
+    return 0;
+  }
+
+  if (hash_length != SCRAMBLED_PASSWORD_CHAR_LENGTH)
+  {
+    if (hash_length == 7 && strcmp(hash, "invalid") == 0)
+    {
+      memcpy(out, invalid_password, sizeof(invalid_password));
+      *out_length= sizeof(invalid_password);
+      return 0;
+    }
+    my_error(ER_PASSWD_LENGTH, MYF(0), SCRAMBLED_PASSWORD_CHAR_LENGTH);
+    return 1;
+  }
+
+  for (const char *c= hash + 1; c < (hash + hash_length); c++)
+  {
+    /* If any non-hex characters are found, mark the password as invalid. */
+    if (!(*c >= '0' && *c <= '9') &&
+        !(*c >= 'A' && *c <= 'F') &&
+        !(*c >= 'a' && *c <= 'f'))
+    {
+      memcpy(out, invalid_password, sizeof(invalid_password));
+      *out_length= sizeof(invalid_password);
+      return 0;
+    }
+  }
+
+  *out_length= SCRAMBLE_LENGTH;
+  get_salt_from_password(out, hash);
+  return 0;
+}
+
+static int old_password_authenticate(MYSQL_PLUGIN_VIO *vio,
+                                     MYSQL_SERVER_AUTH_INFO *info)
+{
+  uchar *pkt;
+  int pkt_len;
+  MPVIO_EXT *mpvio= (MPVIO_EXT *) vio;
+  THD *thd=info->thd;
+
+  /* generate the scramble, or reuse the old one */
+  if (thd->scramble[SCRAMBLE_LENGTH])
+    thd_create_random_password(thd, thd->scramble, SCRAMBLE_LENGTH);
+  /* and send it to the client */
+  if (mpvio->write_packet(mpvio, (uchar*)thd->scramble, SCRAMBLE_LENGTH + 1))
+    return CR_AUTH_HANDSHAKE;
+
+  /* read the reply and authenticate */
+  if ((pkt_len= mpvio->read_packet(mpvio, &pkt)) < 0)
+    return CR_AUTH_HANDSHAKE;
+
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+  return CR_OK;
+#endif
+
+  /*
+    legacy: if switch_from_long_to_short_scramble,
+    the password is sent \0-terminated, the pkt_len is always 9 bytes.
+    We need to figure out the correct scramble length here.
+  */
+  if (pkt_len == SCRAMBLE_LENGTH_323 + 1)
+    pkt_len= (int)strnlen((char*)pkt, pkt_len);
+
+  if (pkt_len == 0) /* no password */
+    return info->auth_string_length ? CR_AUTH_USER_CREDENTIALS : CR_OK;
+
+  if (secure_auth(thd))
+    return CR_AUTH_HANDSHAKE;
+
+  info->password_used= PASSWORD_USED_YES;
+
+  if (pkt_len == SCRAMBLE_LENGTH_323)
+  {
+    if (!info->auth_string_length)
+      return CR_AUTH_USER_CREDENTIALS;
+
+    return check_scramble_323(pkt, thd->scramble, (ulong *) info->auth_string)
+             ? CR_AUTH_USER_CREDENTIALS : CR_OK;
+  }
+
+  my_error(ER_HANDSHAKE_ERROR, MYF(0));
+  return CR_AUTH_HANDSHAKE;
+}
+
+static int old_password_make_scramble(const char *password,
+                      size_t password_length, char *hash, size_t *hash_length)
+{
+  DBUG_ASSERT(*hash_length >= SCRAMBLED_PASSWORD_CHAR_LENGTH_323);
+  if (password_length == 0)
+    *hash_length= 0;
+  else
+  {
+    *hash_length= SCRAMBLED_PASSWORD_CHAR_LENGTH_323;
+    my_make_scrambled_password_323(hash, password, password_length);
+  }
+  return 0;
+}
+
+#define SALT_LENGTH_323 (sizeof(ulong)*2)
+static int old_password_get_salt(const char *hash, size_t hash_length,
+                                 unsigned char *out, size_t *out_length)
+{
+  DBUG_ASSERT(*out_length >= SALT_LENGTH_323);
+
+  if (hash_length != SCRAMBLED_PASSWORD_CHAR_LENGTH_323)
+  {
+    my_error(ER_PASSWD_LENGTH, MYF(0), SCRAMBLED_PASSWORD_CHAR_LENGTH_323);
+    return 1;
+  }
+
+  *out_length= SALT_LENGTH_323;
+  get_salt_from_password_323((ulong*)out, hash);
+  return 0;
+}
+
+static struct st_mysql_auth native_password_handler=
+{
+  MYSQL_AUTHENTICATION_INTERFACE_VERSION,
+  native_password_plugin_name.str,
+  native_password_authenticate,
+  native_password_make_scramble,
+  native_password_get_salt
+};
+
+static struct st_mysql_auth old_password_handler=
+{
+  MYSQL_AUTHENTICATION_INTERFACE_VERSION,
+  old_password_plugin_name.str,
+  old_password_authenticate,
+  old_password_make_scramble,
+  old_password_get_salt
+};
+
+maria_declare_plugin(mysql_password)
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &native_password_handler,                     /* type descriptor  */
+  native_password_plugin_name.str,              /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Native MySQL authentication",                /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  "1.0",                                        /* String version   */
+  MariaDB_PLUGIN_MATURITY_STABLE                /* Maturity         */
+},
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &old_password_handler,                        /* type descriptor  */
+  old_password_plugin_name.str,                 /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Old MySQL-4.0 authentication",               /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  "1.0",                                        /* String version   */
+  MariaDB_PLUGIN_MATURITY_STABLE                /* Maturity         */
+}
+maria_declare_plugin_end;
+
+
+/*
+  Exporting functions that allow plugins to do server-style
+  host/user matching. Used in server_audit2 plugin.
+*/
+extern "C" int maria_compare_hostname(
+                  const char *wild_host, long wild_ip, long ip_mask,
+                  const char *host, const char *ip)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  acl_host_and_ip h;
+  h.hostname= (char *) wild_host;
+  h.ip= wild_ip;
+  h.ip_mask= ip_mask;
+
+  return compare_hostname(&h, host, ip);
+#else
+  return 0;
+#endif
+}
+
+
+extern "C" void maria_update_hostname(
+                  const char **wild_host, long *wild_ip, long *ip_mask,
+                  const char *host)
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  acl_host_and_ip h;
+  update_hostname(&h, host);
+  *wild_host= h.hostname;
+  *wild_ip= h.ip;
+  *ip_mask= h.ip_mask;
+#endif
+}
diff --git a/sql/sql_acl.h b/sql/sql_acl.h
new file mode 100644
index 00000000..fb8dd2c3
--- /dev/null
+++ b/sql/sql_acl.h
@@ -0,0 +1,374 @@
+#ifndef SQL_ACL_INCLUDED
+#define SQL_ACL_INCLUDED
+
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2017, 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "violite.h"                            /* SSL_type */
+#include "sql_class.h"                          /* LEX_COLUMN */
+#include "grant.h"
+#include "sql_cmd.h"                            /* Sql_cmd */
+
+
+enum mysql_db_table_field
+{
+  MYSQL_DB_FIELD_HOST = 0,
+  MYSQL_DB_FIELD_DB,
+  MYSQL_DB_FIELD_USER,
+  MYSQL_DB_FIELD_SELECT_PRIV,
+  MYSQL_DB_FIELD_INSERT_PRIV,
+  MYSQL_DB_FIELD_UPDATE_PRIV,
+  MYSQL_DB_FIELD_DELETE_PRIV,
+  MYSQL_DB_FIELD_CREATE_PRIV,
+  MYSQL_DB_FIELD_DROP_PRIV,
+  MYSQL_DB_FIELD_GRANT_PRIV,
+  MYSQL_DB_FIELD_REFERENCES_PRIV,
+  MYSQL_DB_FIELD_INDEX_PRIV,
+  MYSQL_DB_FIELD_ALTER_PRIV,
+  MYSQL_DB_FIELD_CREATE_TMP_TABLE_PRIV,
+  MYSQL_DB_FIELD_LOCK_TABLES_PRIV,
+  MYSQL_DB_FIELD_CREATE_VIEW_PRIV,
+  MYSQL_DB_FIELD_SHOW_VIEW_PRIV,
+  MYSQL_DB_FIELD_CREATE_ROUTINE_PRIV,
+  MYSQL_DB_FIELD_ALTER_ROUTINE_PRIV,
+  MYSQL_DB_FIELD_EXECUTE_PRIV,
+  MYSQL_DB_FIELD_EVENT_PRIV,
+  MYSQL_DB_FIELD_TRIGGER_PRIV,
+  MYSQL_DB_FIELD_DELETE_VERSIONING_ROWS_PRIV,
+  MYSQL_DB_FIELD_COUNT
+};
+
+extern const TABLE_FIELD_DEF mysql_db_table_def;
+extern bool mysql_user_table_is_in_short_password_format;
+
+extern LEX_CSTRING host_not_specified;
+extern LEX_CSTRING current_user;
+extern LEX_CSTRING current_role;
+extern LEX_CSTRING current_user_and_current_role;
+extern LEX_CSTRING none;
+extern LEX_CSTRING public_name;
+
+
+static inline int access_denied_error_code(int passwd_used)
+{
+#ifdef mysqld_error_find_printf_error_used
+  return 0;
+#else
+  return passwd_used == 2 ? ER_ACCESS_DENIED_NO_PASSWORD_ERROR
+                          : ER_ACCESS_DENIED_ERROR;
+#endif
+}
+
+/* prototypes */
+
+bool hostname_requires_resolving(const char *hostname);
+bool  acl_init(bool dont_read_acl_tables);
+bool acl_reload(THD *thd);
+void acl_free(bool end=0);
+privilege_t acl_get_all3(Security_context *sctx, const char *db,
+                         bool db_is_patern);
+bool acl_authenticate(THD *thd, uint com_change_user_pkt_len);
+bool acl_getroot(Security_context *sctx, const char *user, const char *host,
+                 const char *ip, const char *db);
+bool acl_check_host(const char *host, const char *ip);
+bool check_change_password(THD *thd, LEX_USER *user);
+bool change_password(THD *thd, LEX_USER *user);
+
+bool mysql_grant_role(THD *thd, List &user_list, bool revoke);
+bool mysql_grant(THD *thd, const char *db, List  &user_list,
+                 privilege_t rights, bool revoke, bool is_proxy);
+int mysql_table_grant(THD *thd, TABLE_LIST *table, List  &user_list,
+                       List  &column_list, privilege_t rights,
+                       bool revoke);
+bool mysql_routine_grant(THD *thd, TABLE_LIST *table, const Sp_handler *sph,
+                         List  &user_list, privilege_t rights,
+                         bool revoke, bool write_to_binlog);
+bool grant_init();
+void grant_free(void);
+bool grant_reload(THD *thd);
+bool check_grant(THD *thd, privilege_t want_access, TABLE_LIST *tables,
+                 bool any_combination_will_do, uint number, bool no_errors);
+bool check_grant_column (THD *thd, GRANT_INFO *grant,
+                         const char *db_name, const char *table_name,
+                         const char *name, size_t length, Security_context *sctx);
+bool check_column_grant_in_table_ref(THD *thd, TABLE_LIST * table_ref,
+                                     const char *name, size_t length, Field *fld);
+bool check_grant_all_columns(THD *thd, privilege_t want_access,
+                             Field_iterator_table_ref *fields);
+bool check_grant_routine(THD *thd, privilege_t want_access,
+                         TABLE_LIST *procs, const Sp_handler *sph,
+                         bool no_error);
+bool check_grant_db(THD *thd,const char *db);
+bool check_global_access(THD *thd, const privilege_t want_access, bool no_errors= false);
+bool check_access(THD *thd, privilege_t want_access,
+                  const char *db, privilege_t *save_priv,
+                  GRANT_INTERNAL_INFO *grant_internal_info,
+                  bool dont_check_global_grants, bool no_errors);
+privilege_t get_table_grant(THD *thd, TABLE_LIST *table);
+privilege_t get_column_grant(THD *thd, GRANT_INFO *grant,
+                             const char *db_name, const char *table_name,
+                             const char *field_name);
+bool get_show_user(THD *thd, LEX_USER *lex_user, const char **username,
+                   const char **hostname, const char **rolename);
+void mysql_show_grants_get_fields(THD *thd, List *fields,
+                                  const char *name, size_t length);
+bool mysql_show_grants(THD *thd, LEX_USER *user);
+bool mysql_show_create_user(THD *thd, LEX_USER *user);
+int fill_schema_enabled_roles(THD *thd, TABLE_LIST *tables, COND *cond);
+int fill_schema_applicable_roles(THD *thd, TABLE_LIST *tables, COND *cond);
+void get_privilege_desc(char *to, uint max_length, privilege_t access);
+void get_mqh(const char *user, const char *host, USER_CONN *uc);
+bool mysql_create_user(THD *thd, List  &list, bool handle_as_role);
+bool mysql_drop_user(THD *thd, List  &list, bool handle_as_role);
+bool mysql_rename_user(THD *thd, List  &list);
+int mysql_alter_user(THD *thd, List  &list);
+bool mysql_revoke_all(THD *thd, List  &list);
+void fill_effective_table_privileges(THD *thd, GRANT_INFO *grant,
+                                     const char *db, const char *table);
+bool sp_revoke_privileges(THD *thd, const char *sp_db, const char *sp_name,
+                          const Sp_handler *sph);
+bool sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
+                         const Sp_handler *sph);
+bool check_routine_level_acl(THD *thd, const char *db, const char *name,
+                             const Sp_handler *sph);
+bool is_acl_user(const char *host, const char *user);
+int fill_schema_user_privileges(THD *thd, TABLE_LIST *tables, COND *cond);
+int fill_schema_schema_privileges(THD *thd, TABLE_LIST *tables, COND *cond);
+int fill_schema_table_privileges(THD *thd, TABLE_LIST *tables, COND *cond);
+int fill_schema_column_privileges(THD *thd, TABLE_LIST *tables, COND *cond);
+int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr);
+
+/**
+  Result of an access check for an internal schema or table.
+  Internal ACL checks are always performed *before* using
+  the grant tables.
+  This mechanism enforces that the server implementation has full
+  control on its internal tables.
+  Depending on the internal check result, the server implementation
+  can choose to:
+  - always allow access,
+  - always deny access,
+  - delegate the decision to the database administrator,
+  by using the grant tables.
+*/
+enum ACL_internal_access_result
+{
+  /**
+    Access granted for all the requested privileges,
+    do not use the grant tables.
+  */
+  ACL_INTERNAL_ACCESS_GRANTED,
+  /** Access denied, do not use the grant tables. */
+  ACL_INTERNAL_ACCESS_DENIED,
+  /** No decision yet, use the grant tables. */
+  ACL_INTERNAL_ACCESS_CHECK_GRANT
+};
+
+/**
+  Per internal table ACL access rules.
+  This class is an interface.
+  Per table(s) specific access rule should be implemented in a subclass.
+  @sa ACL_internal_schema_access
+*/
+class ACL_internal_table_access
+{
+public:
+  ACL_internal_table_access() = default;
+
+  virtual ~ACL_internal_table_access() = default;
+
+  /**
+    Check access to an internal table.
+    When a privilege is granted, this method add the requested privilege
+    to save_priv.
+    @param want_access the privileges requested
+    @param [in, out] save_priv the privileges granted
+    @return
+      @retval ACL_INTERNAL_ACCESS_GRANTED All the requested privileges
+      are granted, and saved in save_priv.
+      @retval ACL_INTERNAL_ACCESS_DENIED At least one of the requested
+      privileges was denied.
+      @retval ACL_INTERNAL_ACCESS_CHECK_GRANT No requested privilege
+      was denied, and grant should be checked for at least one
+      privilege. Requested privileges that are granted, if any, are saved
+      in save_priv.
+  */
+  virtual ACL_internal_access_result check(privilege_t want_access,
+                                           privilege_t *save_priv) const= 0;
+};
+
+/**
+  Per internal schema ACL access rules.
+  This class is an interface.
+  Each per schema specific access rule should be implemented
+  in a different subclass, and registered.
+  Per schema access rules can control:
+  - every schema privileges on schema.*
+  - every table privileges on schema.table
+  @sa ACL_internal_schema_registry
+*/
+class ACL_internal_schema_access
+{
+public:
+  ACL_internal_schema_access() = default;
+
+  virtual ~ACL_internal_schema_access() = default;
+
+  /**
+    Check access to an internal schema.
+    @param want_access the privileges requested
+    @param [in, out] save_priv the privileges granted
+    @return
+      @retval ACL_INTERNAL_ACCESS_GRANTED All the requested privileges
+      are granted, and saved in save_priv.
+      @retval ACL_INTERNAL_ACCESS_DENIED At least one of the requested
+      privileges was denied.
+      @retval ACL_INTERNAL_ACCESS_CHECK_GRANT No requested privilege
+      was denied, and grant should be checked for at least one
+      privilege. Requested privileges that are granted, if any, are saved
+      in save_priv.
+  */
+  virtual ACL_internal_access_result check(privilege_t want_access,
+                                           privilege_t *save_priv) const= 0;
+
+  /**
+    Search for per table ACL access rules by table name.
+    @param name the table name
+    @return per table access rules, or NULL
+  */
+  virtual const ACL_internal_table_access *lookup(const char *name) const= 0;
+};
+
+/**
+  A registry for per internal schema ACL.
+  An 'internal schema' is a database schema maintained by the
+  server implementation, such as 'performance_schema' and 'INFORMATION_SCHEMA'.
+*/
+class ACL_internal_schema_registry
+{
+public:
+  static void register_schema(const LEX_CSTRING *name,
+                              const ACL_internal_schema_access *access);
+  static const ACL_internal_schema_access *lookup(const char *name);
+};
+
+const ACL_internal_schema_access *
+get_cached_schema_access(GRANT_INTERNAL_INFO *grant_internal_info,
+                         const char *schema_name);
+
+const ACL_internal_table_access *
+get_cached_table_access(GRANT_INTERNAL_INFO *grant_internal_info,
+                        const char *schema_name,
+                        const char *table_name);
+
+bool acl_check_proxy_grant_access (THD *thd, const char *host, const char *user,
+                                   bool with_grant);
+int acl_setrole(THD *thd, const char *rolename, privilege_t access);
+int acl_check_setrole(THD *thd, const char *rolename, privilege_t *access);
+int acl_check_set_default_role(THD *thd, const char *host, const char *user,
+                               const char *role);
+int acl_set_default_role(THD *thd, const char *host, const char *user,
+                         const char *rolename);
+
+extern SHOW_VAR acl_statistics[];
+
+/* Check if a role is granted to a user/role.
+
+   If hostname == NULL, search for a role as the starting grantee.
+*/
+bool check_role_is_granted(const char *username,
+                           const char *hostname,
+                           const char *rolename);
+
+#ifndef DBUG_OFF
+extern ulong role_global_merges, role_db_merges, role_table_merges,
+             role_column_merges, role_routine_merges;
+#endif
+
+
+class Sql_cmd_grant: public Sql_cmd
+{
+protected:
+  enum_sql_command m_command;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  void warn_hostname_requires_resolving(THD *thd, List &list);
+  bool user_list_reset_mqh(THD *thd, List &list);
+  void grant_stage0(THD *thd);
+#endif
+public:
+  Sql_cmd_grant(enum_sql_command command)
+   :m_command(command)
+  { }
+  bool is_revoke() const { return m_command == SQLCOM_REVOKE; }
+  enum_sql_command sql_command_code() const { return m_command; }
+};
+
+
+class Sql_cmd_grant_proxy: public Sql_cmd_grant
+{
+  privilege_t m_grant_option;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  bool check_access_proxy(THD *thd, List &list);
+#endif
+public:
+  Sql_cmd_grant_proxy(enum_sql_command command, privilege_t grant_option)
+   :Sql_cmd_grant(command), m_grant_option(grant_option)
+  { }
+  bool execute(THD *thd);
+};
+
+
+class Sql_cmd_grant_object: public Sql_cmd_grant, public Grant_privilege
+{
+protected:
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  bool grant_stage0_exact_object(THD *thd, TABLE_LIST *table);
+#endif
+public:
+  Sql_cmd_grant_object(enum_sql_command command, const Grant_privilege &grant)
+   :Sql_cmd_grant(command), Grant_privilege(grant)
+  { }
+};
+
+
+class Sql_cmd_grant_table: public Sql_cmd_grant_object
+{
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  bool execute_table_mask(THD *thd);
+  bool execute_exact_table(THD *thd, TABLE_LIST *table);
+#endif
+public:
+  Sql_cmd_grant_table(enum_sql_command command, const Grant_privilege &grant)
+   :Sql_cmd_grant_object(command, grant)
+  { }
+  bool execute(THD *thd);
+};
+
+
+
+class Sql_cmd_grant_sp: public Sql_cmd_grant_object
+{
+  const Sp_handler &m_sph;
+public:
+  Sql_cmd_grant_sp(enum_sql_command command, const Grant_privilege &grant,
+                   const Sp_handler &sph)
+   :Sql_cmd_grant_object(command, grant),
+    m_sph(sph)
+  { }
+  bool execute(THD *thd);
+};
+
+#endif /* SQL_ACL_INCLUDED */
diff --git a/sql/sql_acl_getsort.ic b/sql/sql_acl_getsort.ic
new file mode 100644
index 00000000..046b412d
--- /dev/null
+++ b/sql/sql_acl_getsort.ic
@@ -0,0 +1,212 @@
+/* Copyright (c) 2019, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+
+#define magic_bits 30
+/*
+  Returns a number which, if sorted in descending order, magically puts
+  patterns in the order from most specific (e.g. no wildcards) to most generic
+  (e.g. "%"). That is, the larger the number, the more specific the pattern is.
+
+  Takes a template that lists types of following patterns (by the first letter
+  of _h_ostname, _d_bname, _u_sername) and up to four patterns.
+  No more than two can be of 'h' or 'd' type (because one magic value takes
+  magic_bits bits, see below).
+
+  ========================================================================
+
+  Here's how the magic is created:
+
+  Let's look at one iteration of the for() loop. That's one pattern.  With
+  wildcards (usernames aren't interesting).
+
+  By definition a pattern A is "more specific" than pattern B if the set of
+  strings that match the pattern A is smaller than the set of strings that
+  match the pattern B. Strings are taken from the big superset of all valid
+  utf8 strings up to the maxlen.
+
+  Strings are matched character by character. For every non-wildcard
+  character there can be only one matching character in the matched string.
+
+  For a wild_one character ('_') any valid utf8 character will do. Below
+  numchars would mean a total number of vaid utf8 characters. It's a huge
+  number. A number of matching strings for wild_one will be numchars.
+
+  For a wild_many character ('%') any number of valid utf8 characters will do.
+  How many string will match it depends on the amount of non-wild_many
+  characters.  Say, if a number of non-wildcard characters is N, and a number
+  of wild_one characters is M, and the number of wild_many characters is K,
+  then for K=1 its wild_many character will match any number of valid utf8
+  characters from 0 to L=maxlen-N-M. The number of matching strings will be
+
+     1 + numchars + numchars^2 + numchars^3 + ... + numchars^L
+
+  Intermediate result: if M=K=0, the pattern will match only one string,
+  if M>0, K=0, the pattern will match numchars^M strings, if K=1, the
+  pattern will match
+
+     numchars^M + 1 + numchars + numchars^2 + ... + numchars^L
+
+  For a more visual notation, let's write these huge numbers not as
+  decimal or binary, but base numchars. Then the last number will be
+  a sum of two numbers: the first is one followed by M zeros, the second
+  constists of L+1 ones:
+
+    1000{...M...}000 + 111{...L+1...}1111
+
+  This could produce any of the following
+
+    111...112111...1111       if L > M, K = 1
+    100...001111...1111       if M > L, K = 1
+    2111111...111111111       if M = L, K = 1
+    1111111...111111111       if M = 0, K = 1
+    1000000...000000000       if K = 0, M > 0
+
+  There are two complications caused by multiple wild_many characters.
+  For, say, two wild_many characters, either can accept any number of utf8
+  characters, as long the the total amount of them is less then or equal to L.
+  Same logic applies to any number of non-consequent wild_many characters
+  (consequent wild_many characters count as one). This gives the number of
+  matching strings of
+
+    1 + F(K,1)*numchars + F(K,2)*numchars^2 + ... + F(K,L)*numchars^L
+
+  where F(K,R) is the "number of ways one can put R balls into K boxes",
+  that is C^{K-1}_{R+K-1}.
+
+  In the "base numchars" notation, it means that besides 0, 1, and 2,
+  an R-th digit can be F(K,R). For the purpose of comparison, we only need
+  to know the most significant digit, F(K, L).
+  While it can be huge, we don't need the exact value, it's a
+  a monotonously increasing function of K, so if K1>K2, F(K1,L) > F(K2,L)
+  and we can simply compare values of K instead of complex F(K,L).
+
+  The second complication: F(K,R) gives only an upper boundary, the
+  actual number of matched strings can be smaller.
+  Example: pattern "a%b%c" can match "abbc" as a(b)b()c, and as a()b(b)c.
+  F(2,1) = 2, but it's only one string "abbc".
+  We'll ignore it here under assumption that it almost never happens
+  in practice and this simplification won't noticeably disrupt the ordering.
+
+  The last detail: old get_sort function sorted by the non-wildcard prefix
+  length, so in "abc_" and "a_bc" the former one was sorted first. Strictly
+  speaking they're both equally specific, but to preserve the backward
+  compatible sorting we'll use the P "prefix length or 0 if no wildcards"
+  to break ties.
+
+  Now, let's compare two long numbers. Numbers are easy to compare,
+  the longer number is larger. If they both have the same lengths,
+  the one with the larger first digit is larger, and so on.
+
+  But there is no need to actually calculate these numbers.
+  Three numbers L, K, M (and P to break ties) are enough to describe a pattern
+  for a purpose of comparison. L/K/M triplets can be compared like this:
+
+  * case 1: if for both patterns L>M: compare L, K, M, in that order
+    because:
+      - if L1 > L2, the first number is longer
+      - If L1 == L2, then the first digit is a monotonously increasing function
+        of K, so the first digit is larger when K is larger
+      - if K1 == K2, then all other digits in these numbers would be the
+        same too, with the exception of one digit in the middle that
+        got +1 because of +1000{...M...}000. So, whatever number has a
+        larger M will get this +1 first.
+  * case 2: if for both patterns LM1, other L2=M2: compare L, K, M
+  * case 5: if one L1M1, the other M2>L2: first is more generic
+     unless (case 6a) K1=K2=1,M1=0,M2=L2+1 (in that case - equal)
+
+  note that in case 3 one can use a rule from the case either 1 or 2,
+  in the case 4 one can use the rule from the case 1,
+  in the case 5 one can use the rule from the case 2.
+
+  for the case 6 and ignoring the special case 6a, to compare patterns by a
+  magic number as a function z(a,b,c), we must ensure that z(L1,K1,M1) is
+  greater than z(M2,L2,K2) when L1=M2. This can be done by an extra bit,
+  which is 1 for K and 0 for L. Thus, the magic number could be
+
+  case 1: (((L*2 + 1)*(maxlen+1) + K)*(maxlen+1) + M)*(maxlen+1) + P
+  case 2: ((M*2*(maxlen+1) + L)*(maxlen+1) + K)*(maxlen+1) + P
+
+  upper bound: L<=maxlen, M<=maxlen, K<=maxlen/2, P M)
+      magic= (((L * 2 + 1) * d + K) * d1 + M) * d + P;
+    else
+      magic= (((M * 2 + 0) * d + L) * d1 + K) * d + P;
+    DBUG_ASSERT(magic < (1ULL << magic_bits));
+    sort= (sort << magic_bits) + magic;
+    IF_DBUG(bits_used+= magic_bits,);
+  }
+  DBUG_ASSERT(bits_used < 8*sizeof(sort));
+  va_end(args);
+  return ~sort;
+}
+#endif
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
new file mode 100644
index 00000000..fcbd8a55
--- /dev/null
+++ b/sql/sql_admin.cc
@@ -0,0 +1,1656 @@
+/* Copyright (c) 2010, 2015, Oracle and/or its affiliates.
+   Copyright (c) 2011, 2021, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_class.h"                       // THD
+#include "keycaches.h"                       // get_key_cache
+#include "sql_base.h"                        // Open_table_context
+#include "lock.h"                            // MYSQL_OPEN_*
+#include "sql_handler.h"                     // mysql_ha_rm_tables
+#include "partition_element.h"               // PART_ADMIN
+#include "sql_partition.h"                   // set_part_state
+#include "transaction.h"                     // trans_rollback_stmt
+#include "sql_view.h"                        // view_checksum
+#include "sql_table.h"                       // mysql_recreate_table
+#include "debug_sync.h"                      // DEBUG_SYNC
+#include "sp.h"                              // Sroutine_hash_entry
+#include "sql_parse.h"                       // check_table_access
+#include "strfunc.h"
+#include "sql_admin.h"
+#include "sql_statistics.h"
+#include "wsrep_mysqld.h"
+#ifdef WITH_WSREP
+#include "wsrep_trans_observer.h"
+#endif
+
+const LEX_CSTRING msg_status= {STRING_WITH_LEN("status")};
+const LEX_CSTRING msg_repair= { STRING_WITH_LEN("repair") };
+const LEX_CSTRING msg_assign_to_keycache=
+{ STRING_WITH_LEN("assign_to_keycache") };
+const LEX_CSTRING msg_analyze= { STRING_WITH_LEN("analyze") };
+const LEX_CSTRING msg_check= { STRING_WITH_LEN("check") };
+const LEX_CSTRING msg_preload_keys= { STRING_WITH_LEN("preload_keys") };
+const LEX_CSTRING msg_optimize= { STRING_WITH_LEN("optimize") };
+
+/* Prepare, run and cleanup for mysql_recreate_table() */
+
+static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list,
+                                 Recreate_info *recreate_info)
+{
+  bool result_code;
+  DBUG_ENTER("admin_recreate_table");
+
+  trans_rollback_stmt(thd);
+  trans_rollback(thd);
+  close_thread_tables(thd);
+  thd->release_transactional_locks();
+
+  /*
+    table_list->table has been closed and freed. Do not reference
+    uninitialized data. open_tables() could fail.
+  */
+  table_list->table= NULL;
+  /* Same applies to MDL ticket. */
+  table_list->mdl_request.ticket= NULL;
+
+  DEBUG_SYNC(thd, "ha_admin_try_alter");
+  tmp_disable_binlog(thd); // binlogging is done by caller if wanted
+  result_code= (thd->open_temporary_tables(table_list) ||
+                mysql_recreate_table(thd, table_list, recreate_info, false));
+  reenable_binlog(thd);
+  /*
+    mysql_recreate_table() can push OK or ERROR.
+    Clear 'OK' status. If there is an error, keep it:
+    we will store the error message in a result set row 
+    and then clear.
+  */
+  if (thd->get_stmt_da()->is_ok())
+    thd->get_stmt_da()->reset_diagnostics_area();
+  table_list->table= NULL;
+  DBUG_RETURN(result_code);
+}
+
+
+static int send_check_errmsg(THD *thd, TABLE_LIST* table,
+			     const LEX_CSTRING *operator_name,
+                             const char* errmsg)
+
+{
+  Protocol *protocol= thd->protocol;
+  protocol->prepare_for_resend();
+  protocol->store(table->alias.str, table->alias.length, system_charset_info);
+  protocol->store(operator_name, system_charset_info);
+  protocol->store(&error_clex_str, system_charset_info);
+  protocol->store(errmsg, strlen(errmsg), system_charset_info);
+  thd->clear_error();
+  if (protocol->write())
+    return -1;
+  return 1;
+}
+
+
+static int prepare_for_repair(THD *thd, TABLE_LIST *table_list,
+			      HA_CHECK_OPT *check_opt)
+{
+  int error= 0, create_error= 0;
+  TABLE tmp_table, *table;
+  TABLE_LIST *pos_in_locked_tables= 0;
+  TABLE_SHARE *share= 0;
+  bool has_mdl_lock= FALSE;
+  char from[FN_REFLEN],tmp[FN_REFLEN+32];
+  const char **ext;
+  MY_STAT stat_info;
+  Open_table_context ot_ctx(thd, (MYSQL_OPEN_IGNORE_FLUSH |
+                                  MYSQL_OPEN_HAS_MDL_LOCK |
+                                  MYSQL_LOCK_IGNORE_TIMEOUT));
+  DBUG_ENTER("prepare_for_repair");
+
+  if (!(check_opt->sql_flags & TT_USEFRM))
+    DBUG_RETURN(0);
+
+  if (!(table= table_list->table))
+  {
+    /*
+      If the table didn't exist, we have a shared metadata lock
+      on it that is left from mysql_admin_table()'s attempt to 
+      open it. Release the shared metadata lock before trying to
+      acquire the exclusive lock to satisfy MDL asserts and avoid
+      deadlocks.
+    */
+    thd->release_transactional_locks();
+    /*
+      Attempt to do full-blown table open in mysql_admin_table() has failed.
+      Let us try to open at least a .FRM for this table.
+    */
+
+    MDL_REQUEST_INIT(&table_list->mdl_request, MDL_key::TABLE,
+                     table_list->db.str, table_list->table_name.str,
+                     MDL_EXCLUSIVE, MDL_TRANSACTION);
+
+    if (lock_table_names(thd, table_list, table_list->next_global,
+                         thd->variables.lock_wait_timeout, 0))
+      DBUG_RETURN(0);
+    has_mdl_lock= TRUE;
+
+    share= tdc_acquire_share(thd, table_list, GTS_TABLE);
+    if (share == NULL)
+      DBUG_RETURN(0);				// Can't open frm file
+
+    if (open_table_from_share(thd, share, &empty_clex_str, 0, 0, 0,
+                              &tmp_table, FALSE))
+    {
+      tdc_release_share(share);
+      DBUG_RETURN(0);                           // Out of memory
+    }
+    table= &tmp_table;
+  }
+
+  /*
+    REPAIR TABLE ... USE_FRM for temporary tables makes little sense.
+  */
+  if (table->s->tmp_table)
+  {
+    error= send_check_errmsg(thd, table_list, &msg_repair,
+			     "Cannot repair temporary table from .frm file");
+    goto end;
+  }
+
+  /*
+    User gave us USE_FRM which means that the header in the index file is
+    trashed.
+    In this case we will try to fix the table the following way:
+    - Rename the data file to a temporary name
+    - Truncate the table
+    - Replace the new data file with the old one
+    - Run a normal repair using the new index file and the old data file
+  */
+
+  if (table->s->frm_version < FRM_VER_TRUE_VARCHAR &&
+      table->s->varchar_fields)
+  {
+    error= send_check_errmsg(thd, table_list, &msg_repair,
+                             "Failed repairing a very old .frm file as the "
+                             "data file format has changed between versions. "
+                             "Please dump the table in your old system with "
+                             "mysqldump and read it into this system with "
+                             "mysql or mysqlimport");
+    goto end;
+  }
+
+  /*
+    Check if this is a table type that stores index and data separately,
+    like ISAM or MyISAM. We assume fixed order of engine file name
+    extensions array. First element of engine file name extensions array
+    is meta/index file extention. Second element - data file extention. 
+  */
+  ext= table->file->bas_ext();
+  if (!ext[0] || !ext[1])
+    goto end;					// No data file
+
+  /* A MERGE table must not come here. */
+  DBUG_ASSERT(table->file->ht->db_type != DB_TYPE_MRG_MYISAM);
+
+  // Name of data file
+  strxmov(from, table->s->normalized_path.str, ext[1], NullS);
+  if (!mysql_file_stat(key_file_misc, from, &stat_info, MYF(0)))
+    goto end;				// Can't use USE_FRM flag
+
+  my_snprintf(tmp, sizeof(tmp), "%s-%lx_%llx",
+	      from, current_pid, thd->thread_id);
+
+  if (table_list->table)
+  {
+    /*
+      Table was successfully open in mysql_admin_table(). Now we need
+      to close it, but leave it protected by exclusive metadata lock.
+    */
+    pos_in_locked_tables= table->pos_in_locked_tables;
+    if (wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_FORCED_CLOSE))
+      goto end;
+    /* Close table but don't remove from locked list */
+    close_all_tables_for_name(thd, table_list->table->s,
+                              HA_EXTRA_NOT_USED, NULL);
+    table_list->table= 0;
+  }
+  else
+  {
+    /*
+      Table open failed, maybe because we run out of memory.
+      Close all open tables and relaese all MDL locks
+    */
+    tdc_release_share(share);
+    share->tdc->flush(thd, true);
+    share= 0;
+  }
+
+  /*
+    After this point we have an exclusive metadata lock on our table
+    in both cases when table was successfully open in mysql_admin_table()
+    and when it was open in prepare_for_repair().
+  */
+
+  if (my_rename(from, tmp, MYF(MY_WME)))
+  {
+    error= send_check_errmsg(thd, table_list, &msg_repair,
+			     "Failed renaming data file");
+    goto end;
+  }
+  if (dd_recreate_table(thd, table_list->db.str, table_list->table_name.str))
+    create_error= send_check_errmsg(thd, table_list, &msg_repair,
+                                    "Failed generating table from .frm file");
+  /*
+    'FALSE' for 'using_transactions' means don't postpone
+    invalidation till the end of a transaction, but do it
+    immediately.
+  */
+  query_cache_invalidate3(thd, table_list, FALSE);
+  if (mysql_file_rename(key_file_misc, tmp, from, MYF(MY_WME)))
+  {
+    error= send_check_errmsg(thd, table_list, &msg_repair,
+			     "Failed restoring .MYD file");
+    goto end;
+  }
+  if (create_error)
+    goto end;
+
+  if (thd->locked_tables_list.locked_tables())
+  {
+    if (thd->locked_tables_list.reopen_tables(thd, false))
+      goto end;
+    /* Restore the table in the table list with the new opened table */
+    table_list->table= pos_in_locked_tables->table;
+  }
+  else
+  {
+    /*
+      Now we should be able to open the partially repaired table
+      to finish the repair in the handler later on.
+    */
+    if (open_table(thd, table_list, &ot_ctx))
+    {
+      error= send_check_errmsg(thd, table_list, &msg_repair,
+                               "Failed to open partially repaired table");
+      goto end;
+    }
+  }
+
+end:
+  thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0);
+  if (table == &tmp_table)
+  {
+    closefrm(table);
+    if (share)
+      tdc_release_share(share);
+  }
+  /* In case of a temporary table there will be no metadata lock. */
+  if (unlikely(error) && has_mdl_lock)
+    thd->release_transactional_locks();
+
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Check if a given error is something that could occur during
+  open_and_lock_tables() that does not indicate table corruption.
+
+  @param  sql_errno  Error number to check.
+
+  @retval TRUE       Error does not indicate table corruption.
+  @retval FALSE      Error could indicate table corruption.
+*/
+
+static inline bool table_not_corrupt_error(uint sql_errno)
+{
+  return (sql_errno == ER_NO_SUCH_TABLE ||
+          sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE ||
+          sql_errno == ER_FILE_NOT_FOUND ||
+          sql_errno == ER_LOCK_WAIT_TIMEOUT ||
+          sql_errno == ER_LOCK_DEADLOCK ||
+          sql_errno == ER_CANT_LOCK_LOG_TABLE ||
+          sql_errno == ER_OPEN_AS_READONLY ||
+          sql_errno == ER_WRONG_OBJECT);
+}
+
+#ifndef DBUG_OFF
+// It is counter for debugging fail on second call of open_only_one_table
+static int debug_fail_counter= 0;
+#endif
+
+static bool open_only_one_table(THD* thd, TABLE_LIST* table,
+                                bool repair_table_use_frm,
+                                bool is_view_operator_func)
+{
+  LEX *lex= thd->lex;
+  SELECT_LEX *select= lex->first_select_lex();
+  TABLE_LIST *save_next_global, *save_next_local;
+  bool open_error;
+  save_next_global= table->next_global;
+  table->next_global= 0;
+  save_next_local= table->next_local;
+  table->next_local= 0;
+  select->table_list.first= table;
+  /*
+    Time zone tables and SP tables can be add to lex->query_tables list,
+    so it have to be prepared.
+    TODO: Investigate if we can put extra tables into argument instead of
+    using lex->query_tables
+  */
+  lex->query_tables= table;
+  lex->query_tables_last= &table->next_global;
+  lex->query_tables_own_last= 0;
+
+  DBUG_EXECUTE_IF("fail_2call_open_only_one_table", {
+                  if (debug_fail_counter)
+                  {
+                    open_error= TRUE;
+                    goto dbug_err;
+                  }
+                  else
+                    debug_fail_counter++;
+                  });
+
+  /*
+    CHECK TABLE command is allowed for views as well. Check on alter flags
+    to differentiate from ALTER TABLE...CHECK PARTITION on which view is not
+    allowed.
+  */
+  if (lex->alter_info.partition_flags & ALTER_PARTITION_ADMIN ||
+      !is_view_operator_func)
+  {
+    table->required_type= TABLE_TYPE_NORMAL;
+    DBUG_ASSERT(lex->table_type != TABLE_TYPE_VIEW);
+  }
+  else if (lex->table_type == TABLE_TYPE_VIEW)
+  {
+    table->required_type= lex->table_type;
+  }
+  else if ((lex->table_type != TABLE_TYPE_VIEW) &&
+           lex->sql_command == SQLCOM_REPAIR)
+  {
+    table->required_type= TABLE_TYPE_NORMAL;
+  }
+
+  if (lex->sql_command == SQLCOM_CHECK ||
+      lex->sql_command == SQLCOM_REPAIR ||
+      lex->sql_command == SQLCOM_ANALYZE ||
+      lex->sql_command == SQLCOM_OPTIMIZE)
+    thd->prepare_derived_at_open= TRUE;
+  if (!thd->locked_tables_mode && repair_table_use_frm)
+  {
+    /*
+      If we're not under LOCK TABLES and we're executing REPAIR TABLE
+      USE_FRM, we need to ignore errors from open_and_lock_tables().
+      REPAIR TABLE USE_FRM is a heavy weapon used when a table is
+      critically damaged, so open_and_lock_tables() will most likely
+      report errors. Those errors are not interesting for the user
+      because it's already known that the table is badly damaged.
+    */
+
+    Diagnostics_area *da= thd->get_stmt_da();
+    Warning_info tmp_wi(thd->query_id, false, true);
+
+    da->push_warning_info(&tmp_wi);
+
+    open_error= (thd->open_temporary_tables(table) ||
+                 open_and_lock_tables(thd, table, TRUE, 0));
+
+    da->pop_warning_info();
+  }
+  else
+  {
+    /*
+      It's assumed that even if it is REPAIR TABLE USE_FRM, the table
+      can be opened if we're under LOCK TABLES (otherwise LOCK TABLES
+      would fail). Thus, the only errors we could have from
+      open_and_lock_tables() are logical ones, like incorrect locking
+      mode. It does make sense for the user to see such errors.
+    */
+
+    open_error= (thd->open_temporary_tables(table) ||
+                 open_and_lock_tables(thd, table, TRUE, 0));
+  }
+
+#ifndef DBUG_OFF
+dbug_err:
+#endif
+
+  thd->prepare_derived_at_open= FALSE;
+
+  /*
+    MERGE engine may adjust table->next_global chain, thus we have to
+    append save_next_global after merge children.
+  */
+  if (save_next_global)
+  {
+    TABLE_LIST *table_list_iterator= table;
+    while (table_list_iterator->next_global)
+      table_list_iterator= table_list_iterator->next_global;
+    table_list_iterator->next_global= save_next_global;
+    save_next_global->prev_global= &table_list_iterator->next_global;
+  }
+
+  table->next_local= save_next_local;
+
+  return open_error;
+}
+
+#ifdef WITH_WSREP
+/** RAII class for temporarily disable wsrep_on in the connection. */
+class Disable_wsrep_on_guard
+{
+ public:
+  /**
+    @param thd     - pointer to the context of connection in which
+                     wsrep_on mode needs to be disabled.
+    @param disable - true if wsrep_on should be disabled
+  */
+  explicit Disable_wsrep_on_guard(THD *thd, bool disable)
+    : m_thd(thd), m_orig_wsrep_on(thd->variables.wsrep_on)
+  {
+    if (disable)
+      thd->variables.wsrep_on= false;
+  }
+
+  ~Disable_wsrep_on_guard()
+  {
+    m_thd->variables.wsrep_on= m_orig_wsrep_on;
+  }
+ private:
+  THD* m_thd;
+  bool m_orig_wsrep_on;
+};
+#endif /* WITH_WSREP */
+
+
+static void send_read_only_warning(THD *thd, const LEX_CSTRING *msg_status,
+                                   const LEX_CSTRING *table_name)
+{
+  Protocol *protocol= thd->protocol;
+  char buf[MYSQL_ERRMSG_SIZE];
+  size_t length;
+  length= my_snprintf(buf, sizeof(buf),
+                      ER_THD(thd, ER_OPEN_AS_READONLY),
+                      table_name->str);
+  protocol->store(msg_status, system_charset_info);
+  protocol->store(buf, length, system_charset_info);
+}
+
+
+/**
+  Collect field names of result set that will be sent to a client
+
+  @param      thd     Thread data object
+  @param[out] fields  List of fields whose metadata should be collected for
+                      sending to client
+*/
+
+void fill_check_table_metadata_fields(THD *thd, List* fields)
+{
+  Item *item;
+
+  item= new (thd->mem_root) Item_empty_string(thd, "Table", NAME_CHAR_LEN * 2);
+  item->set_maybe_null();
+  fields->push_back(item, thd->mem_root);
+
+  item= new (thd->mem_root) Item_empty_string(thd, "Op", 10);
+  item->set_maybe_null();
+  fields->push_back(item, thd->mem_root);
+
+  item= new (thd->mem_root) Item_empty_string(thd, "Msg_type", 10);
+  item->set_maybe_null();
+  fields->push_back(item, thd->mem_root);
+
+  item= new (thd->mem_root) Item_empty_string(thd, "Msg_text",
+                                              SQL_ADMIN_MSG_TEXT_SIZE);
+  item->set_maybe_null();
+  fields->push_back(item, thd->mem_root);
+}
+
+
+/*
+  RETURN VALUES
+    FALSE Message sent to net (admin operation went ok)
+    TRUE  Message should be sent by caller 
+          (admin operation or network communication failed)
+*/
+static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
+                              HA_CHECK_OPT* check_opt,
+                              const LEX_CSTRING *operator_name,
+                              thr_lock_type lock_type,
+                              bool org_open_for_modify,
+                              bool repair_table_use_frm,
+                              uint extra_open_options,
+                              int (*prepare_func)(THD *, TABLE_LIST *,
+                                                  HA_CHECK_OPT *),
+                              int (handler::*operator_func)(THD *,
+                                                            HA_CHECK_OPT *),
+                              int (view_operator_func)(THD *, TABLE_LIST*,
+                                                       HA_CHECK_OPT *),
+                              bool is_cmd_replicated)
+{
+  TABLE_LIST *table;
+  List field_list;
+  Protocol *protocol= thd->protocol;
+  LEX *lex= thd->lex;
+  bool need_repair_or_alter= 0;
+  wait_for_commit* suspended_wfc;
+  bool is_table_modified= false;
+  LEX_CUSTRING tabledef_version;
+  DBUG_ENTER("mysql_admin_table");
+  DBUG_PRINT("enter", ("extra_open_options: %u", extra_open_options));
+
+#ifdef WITH_WSREP
+  /*
+    CACHE INDEX and LOAD INDEX INTO CACHE statements are
+    local operations. Do not replicate them with Galera
+  */
+  const bool disable_wsrep_on= (WSREP(thd) &&
+    (lex->sql_command == SQLCOM_ASSIGN_TO_KEYCACHE ||
+     lex->sql_command == SQLCOM_PRELOAD_KEYS));
+
+  Disable_wsrep_on_guard wsrep_on_guard(thd, disable_wsrep_on);
+#endif /* WITH_WSREP */
+
+  fill_check_table_metadata_fields(thd, &field_list);
+
+  if (protocol->send_result_set_metadata(&field_list,
+                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+    DBUG_RETURN(TRUE);
+
+  /*
+    This function calls trans_commit() during its operation, but that does not
+    imply that the operation is complete or binlogged. So we have to suspend
+    temporarily the wakeup_subsequent_commits() calls (if used).
+  */
+  suspended_wfc= thd->suspend_subsequent_commits();
+
+  mysql_ha_rm_tables(thd, tables);
+
+  /*
+    Close all temporary tables which were pre-open to simplify
+    privilege checking. Clear all references to closed tables.
+  */
+  close_thread_tables(thd);
+  for (table= tables; table; table= table->next_local)
+    table->table= NULL;
+
+  for (table= tables; table; table= table->next_local)
+  {
+    char table_name_buff[SAFE_NAME_LEN*2+2];
+    LEX_CSTRING table_name= { table_name_buff, 0};
+    char storage_engine_name[NAME_LEN];
+    bool storage_engine_partitioned= 0;
+    uchar tabledef_version_buff[MY_UUID_SIZE];
+    const char *db= table->db.str;
+    bool fatal_error=0;
+    bool open_error= 0;
+    bool collect_eis=  FALSE;
+    bool open_for_modify= org_open_for_modify;
+    Recreate_info recreate_info;
+    int compl_result_code, result_code;
+
+    compl_result_code= result_code= HA_ADMIN_FAILED;
+    storage_engine_name[0]= 0;                  // Marker that's not used
+
+    DBUG_PRINT("admin", ("table: '%s'.'%s'", db, table->table_name.str));
+    DEBUG_SYNC(thd, "admin_command_kill_before_modify");
+
+    table_name.length= strxmov(table_name_buff, db, ".", table->table_name.str,
+                               NullS) - table_name_buff;
+    thd->open_options|= extra_open_options;
+    table->lock_type= lock_type;
+    /*
+      To make code safe for re-execution we need to reset type of MDL
+      request as code below may change it.
+      To allow concurrent execution of read-only operations we acquire
+      weak metadata lock for them.
+    */
+    table->mdl_request.set_type(lex->sql_command == SQLCOM_REPAIR
+                                ? MDL_SHARED_NO_READ_WRITE
+                                : lock_type >= TL_FIRST_WRITE
+                                ? MDL_SHARED_WRITE : MDL_SHARED_READ);
+    if (thd->check_killed())
+    {
+      open_error= false;
+      fatal_error= true;
+      result_code= HA_ADMIN_FAILED;
+      goto send_result;
+    }
+
+    /* open only one table from local list of command */
+    while (1)
+    {
+      open_error= open_only_one_table(thd, table,
+                                      repair_table_use_frm,
+                                      (view_operator_func != NULL));
+      thd->open_options&= ~extra_open_options;
+
+      /*
+        If open_and_lock_tables() failed, close_thread_tables() will close
+        the table and table->table can therefore be invalid.
+      */
+      if (unlikely(open_error))
+        table->table= NULL;
+
+      /*
+        Under locked tables, we know that the table can be opened,
+        so any errors opening the table are logical errors.
+        In these cases it does not make sense to try to repair.
+      */
+      if (unlikely(open_error) && thd->locked_tables_mode)
+      {
+        result_code= HA_ADMIN_FAILED;
+        goto send_result;
+      }
+
+      if (!table->table || table->mdl_request.type != MDL_SHARED_WRITE ||
+          table->table->file->ha_table_flags() & HA_CONCURRENT_OPTIMIZE)
+        break;
+
+      trans_rollback_stmt(thd);
+      trans_rollback(thd);
+      close_thread_tables(thd);
+      table->table= NULL;
+      thd->release_transactional_locks();
+      MDL_REQUEST_INIT(&table->mdl_request, MDL_key::TABLE, table->db.str,
+                       table->table_name.str, MDL_SHARED_NO_READ_WRITE,
+                       MDL_TRANSACTION);
+    }
+
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+      if (table->table)
+      {
+        /*
+          Set up which partitions that should be processed
+          if ALTER TABLE t ANALYZE/CHECK/OPTIMIZE/REPAIR PARTITION ..
+          CACHE INDEX/LOAD INDEX for specified partitions
+        */
+        Alter_info *alter_info= &lex->alter_info;
+
+        if (alter_info->partition_flags & ALTER_PARTITION_ADMIN)
+        {
+          if (!table->table->part_info)
+          {
+            my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0));
+            thd->resume_subsequent_commits(suspended_wfc);
+            DBUG_RETURN(TRUE);
+          }
+          if (set_part_state(alter_info, table->table->part_info, PART_ADMIN))
+          {
+            char buff[FN_REFLEN + MYSQL_ERRMSG_SIZE];
+            size_t length;
+            DBUG_PRINT("admin", ("sending non existent partition error"));
+            protocol->prepare_for_resend();
+            protocol->store(&table_name, system_charset_info);
+            protocol->store(operator_name, system_charset_info);
+            protocol->store(&error_clex_str, system_charset_info);
+            length= my_snprintf(buff, sizeof(buff),
+                                ER_THD(thd, ER_PARTITION_DOES_NOT_EXIST));
+            protocol->store(buff, length, system_charset_info);
+            if(protocol->write())
+              goto err;
+            my_eof(thd);
+            goto err;
+          }
+        }
+      }
+#endif
+    DBUG_PRINT("admin", ("table: %p", table->table));
+
+    if (table->schema_table)
+    {
+      result_code= HA_ADMIN_NOT_IMPLEMENTED;
+      goto send_result;
+    }
+
+    if (prepare_func)
+    {
+      DBUG_PRINT("admin", ("calling prepare_func"));
+      switch ((*prepare_func)(thd, table, check_opt)) {
+      case  1:           // error, message written to net
+        trans_rollback_stmt(thd);
+        trans_rollback(thd);
+        close_thread_tables(thd);
+        thd->release_transactional_locks();
+        DBUG_PRINT("admin", ("simple error, admin next table"));
+        continue;
+      case -1:           // error, message could be written to net
+        /* purecov: begin inspected */
+        DBUG_PRINT("admin", ("severe error, stop"));
+        goto err;
+        /* purecov: end */
+      default:           // should be 0 otherwise
+        DBUG_PRINT("admin", ("prepare_func succeeded"));
+        ;
+      }
+    }
+
+    /*
+      CHECK/REPAIR TABLE command is only command where VIEW allowed here and
+      this command use only temporary table method for VIEWs resolving =>
+      there can't be VIEW tree substitition of join view => if opening table
+      succeed then table->table will have real TABLE pointer as value (in
+      case of join view substitution table->table can be 0, but here it is
+      impossible)
+    */
+    if (!table->table)
+    {
+      DBUG_PRINT("admin", ("open table failed"));
+      if (thd->get_stmt_da()->is_warning_info_empty())
+        push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+                     ER_CHECK_NO_SUCH_TABLE,
+                     ER_THD(thd, ER_CHECK_NO_SUCH_TABLE));
+      /* if it was a view will check md5 sum */
+      if (table->view &&
+          view_check(thd, table, check_opt) == HA_ADMIN_WRONG_CHECKSUM)
+        push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+                     ER_VIEW_CHECKSUM, ER_THD(thd, ER_VIEW_CHECKSUM));
+      if (thd->get_stmt_da()->is_error() &&
+          table_not_corrupt_error(thd->get_stmt_da()->sql_errno()))
+        result_code= HA_ADMIN_FAILED;
+      else
+        /* Default failure code is corrupt table */
+        result_code= HA_ADMIN_CORRUPT;
+      goto send_result;
+    }
+
+    if (table->view)
+    {
+      DBUG_PRINT("admin", ("calling view_operator_func"));
+      result_code= (*view_operator_func)(thd, table, check_opt);
+      goto send_result;
+    }
+
+    if ((table->table->db_stat & HA_READ_ONLY) && open_for_modify &&
+        operator_func != &handler::ha_analyze)
+    {
+      /* purecov: begin inspected */
+      enum_sql_command save_sql_command= lex->sql_command;
+      DBUG_PRINT("admin", ("sending error message"));
+      protocol->prepare_for_resend();
+      protocol->store(&table_name, system_charset_info);
+      protocol->store(operator_name, system_charset_info);
+      send_read_only_warning(thd, &error_clex_str, &table_name);
+      trans_commit_stmt(thd);
+      trans_commit(thd);
+      close_thread_tables(thd);
+      thd->release_transactional_locks();
+      lex->reset_query_tables_list(FALSE);
+      /*
+        Restore Query_tables_list::sql_command value to make statement
+        safe for re-execution.
+      */
+      lex->sql_command= save_sql_command;
+      table->table=0;				// For query cache
+      if (protocol->write())
+	goto err;
+      thd->get_stmt_da()->reset_diagnostics_area();
+      continue;
+      /* purecov: end */
+    }
+
+    /*
+      Close all instances of the table to allow MyISAM "repair"
+      (which is internally also used from "optimize") to rename files.
+      @todo: This code does not close all instances of the table.
+      It only closes instances in other connections, but if this
+      connection has LOCK TABLE t1 a READ, t1 b WRITE,
+      both t1 instances will be kept open.
+
+      Note that this code is only executed for engines that request
+      MDL_SHARED_NO_READ_WRITE lock (MDL_SHARED_WRITE cannot be upgraded)
+      by *not* having HA_CONCURRENT_OPTIMIZE table_flag.
+    */
+    if (lock_type == TL_WRITE && table->mdl_request.type > MDL_SHARED_WRITE)
+    {
+      if (table->table->s->tmp_table)
+        thd->close_unused_temporary_table_instances(tables);
+      else
+      {
+        /* Store information about table for ddl log */
+        storage_engine_partitioned= table->table->file->partition_engine();
+        strmake(storage_engine_name, table->table->file->real_table_type(),
+                sizeof(storage_engine_name)-1);
+        tabledef_version.str= tabledef_version_buff;
+        if ((tabledef_version.length= table->table->s->tabledef_version.length))
+          memcpy((char*) tabledef_version.str,
+                 table->table->s->tabledef_version.str,
+                 MY_UUID_SIZE);
+
+        if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED))
+          goto err;
+        DEBUG_SYNC(thd, "after_admin_flush");
+        /* Flush entries in the query cache involving this table. */
+        query_cache_invalidate3(thd, table->table, 0);
+        /*
+          XXX: hack: switch off open_for_modify to skip the
+          flush that is made later in the execution flow.
+        */
+        open_for_modify= 0;
+      }
+    }
+
+    if (table->table->s->crashed && operator_func == &handler::ha_check)
+    {
+      /* purecov: begin inspected */
+      DBUG_PRINT("admin", ("sending crashed warning"));
+      protocol->prepare_for_resend();
+      protocol->store(&table_name, system_charset_info);
+      protocol->store(operator_name, system_charset_info);
+      protocol->store(STRING_WITH_LEN("warning"), system_charset_info);
+      protocol->store(STRING_WITH_LEN("Table is marked as crashed"),
+                      system_charset_info);
+      if (protocol->write())
+        goto err;
+      /* purecov: end */
+    }
+
+    if (operator_func == &handler::ha_repair &&
+        !(check_opt->sql_flags & TT_USEFRM))
+    {
+      handler *file= table->table->file;
+      int check_old_types=   file->check_old_types();
+      int check_for_upgrade= file->ha_check_for_upgrade(check_opt);
+
+      if (check_old_types == HA_ADMIN_NEEDS_ALTER ||
+          check_for_upgrade == HA_ADMIN_NEEDS_ALTER)
+      {
+        /* We use extra_open_options to be able to open crashed tables */
+        thd->open_options|= extra_open_options;
+        result_code= admin_recreate_table(thd, table, &recreate_info) ?
+                     HA_ADMIN_FAILED : HA_ADMIN_OK;
+        thd->open_options&= ~extra_open_options;
+        goto send_result;
+      }
+      if (check_old_types || check_for_upgrade)
+      {
+        /* If repair is not implemented for the engine, run ALTER TABLE */
+        need_repair_or_alter= 1;
+      }
+    }
+
+    result_code= compl_result_code= HA_ADMIN_OK;
+
+    if (operator_func == &handler::ha_analyze)
+    {
+      TABLE *tab= table->table;
+
+      if (lex->with_persistent_for_clause &&
+          tab->s->table_category != TABLE_CATEGORY_USER)
+      {
+        compl_result_code= result_code= HA_ADMIN_INVALID;
+      }
+
+      /*
+        The check for ALTER_PARTITION_ADMIN implements this logic:
+        do not collect EITS STATS for this syntax:
+          ALTER TABLE ... ANALYZE PARTITION p
+        EITS statistics is global (not per-partition). Collecting global stats
+        is much more expensive processing just one partition, so the most
+        appropriate action is to just not collect EITS stats for this command.
+      */
+      collect_eis=
+        (table->table->s->table_category == TABLE_CATEGORY_USER &&
+        !(lex->alter_info.flags & ALTER_PARTITION_ADMIN) &&
+         (check_eits_collection_allowed(thd) ||
+          lex->with_persistent_for_clause));
+    }
+
+    if (result_code == HA_ADMIN_OK)
+    {    
+      DBUG_PRINT("admin", ("calling operator_func '%s'", operator_name->str));
+      THD_STAGE_INFO(thd, stage_executing);
+      result_code = (table->table->file->*operator_func)(thd, check_opt);
+      THD_STAGE_INFO(thd, stage_sending_data);
+      DBUG_PRINT("admin", ("operator_func returned: %d", result_code));
+    }
+
+    /* Note: compl_result_code can be different from result_code here */
+    if (compl_result_code == HA_ADMIN_OK && collect_eis)
+    {
+      if (result_code == HA_ERR_TABLE_READONLY)
+      {
+        protocol->prepare_for_resend();
+        protocol->store(&table_name, system_charset_info);
+        protocol->store(operator_name, system_charset_info);
+        send_read_only_warning(thd, &msg_status, &table_name);
+        (void) protocol->write();
+        result_code= HA_ADMIN_OK;
+      }
+      /*
+        Here we close and reopen table in read mode because operation of
+        collecting statistics is long and it will be better do not block
+        the table completely.
+        InnoDB will allow read/write and MyISAM read/insert.
+      */
+      trans_commit_stmt(thd);
+      trans_commit(thd);
+      thd->open_options|= extra_open_options;
+      close_thread_tables(thd);
+      table->table= NULL;
+      thd->release_transactional_locks();
+      MDL_REQUEST_INIT(&table->mdl_request, MDL_key::TABLE, table->db.str,
+                       table->table_name.str, MDL_SHARED_NO_READ_WRITE,
+                       MDL_TRANSACTION);
+      table->mdl_request.set_type(MDL_SHARED_READ);
+
+      table->lock_type= TL_READ;
+      DBUG_ASSERT(view_operator_func == NULL);
+      open_error= open_only_one_table(thd, table,
+                                      repair_table_use_frm, FALSE);
+      thd->open_options&= ~extra_open_options;
+
+      if (unlikely(!open_error))
+      {
+        TABLE *tab= table->table;
+        Field **field_ptr= tab->field;
+        USED_MEM *memroot_block;
+
+        if (!lex->column_list)
+        {
+          /* Fields we have to read from the engine */
+          bitmap_clear_all(tab->read_set);
+          /* Fields we want to have statistics for */
+          bitmap_clear_all(&tab->has_value_set);
+
+          for (uint fields= 0; *field_ptr; field_ptr++, fields++)
+          {
+            Field *field= *field_ptr;
+            if (field->flags & LONG_UNIQUE_HASH_FIELD)
+            {
+              /*
+                No point in doing statistic for hash fields that should be
+                unique
+              */
+              continue;
+            }
+            /*
+              Note that type() always return MYSQL_TYPE_BLOB for
+              all blob types. Another function needs to be added
+              if we in the future want to distingush between blob
+              types here.
+            */
+            enum enum_field_types type= field->type();
+            if (type < MYSQL_TYPE_TINY_BLOB ||
+                type > MYSQL_TYPE_BLOB)
+            {
+              field->register_field_in_read_map();
+              bitmap_set_bit(&tab->has_value_set, field->field_index);
+            }
+            else
+              push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                                  ER_NO_EIS_FOR_FIELD,
+                                  ER_THD(thd, ER_NO_EIS_FOR_FIELD),
+                                  (*field_ptr)->field_name.str);
+          }
+        }
+        else
+        {
+          int pos;
+          LEX_STRING *column_name;
+          List_iterator_fast it(*lex->column_list);
+
+          /* Fields we have to read from the engine */
+          bitmap_clear_all(tab->read_set);
+          /* Fields we want to have statistics for */
+          bitmap_clear_all(&tab->has_value_set);
+
+          while ((column_name= it++))
+          {
+            Field *field;
+            enum enum_field_types type;
+            if (tab->s->fieldnames.type_names == 0 ||
+                (pos= find_type(&tab->s->fieldnames, column_name->str,
+                                column_name->length, 1)) <= 0)
+            {
+              compl_result_code= result_code= HA_ADMIN_INVALID;
+              break;
+            }
+            pos--;
+            field= tab->field[pos];
+            type= field->type();
+            if (!(field->flags & LONG_UNIQUE_HASH_FIELD) &&
+                (type < MYSQL_TYPE_TINY_BLOB ||
+                 type > MYSQL_TYPE_BLOB))
+            {
+              field->register_field_in_read_map();
+              bitmap_set_bit(&tab->has_value_set, field->field_index);
+            }
+            else
+              push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                                  ER_NO_EIS_FOR_FIELD,
+                                  ER_THD(thd, ER_NO_EIS_FOR_FIELD),
+                                  column_name->str);
+          }
+          tab->file->column_bitmaps_signal();
+        }
+        if (!lex->index_list)
+          tab->keys_in_use_for_query.init(tab->s->keys);
+        else
+        {
+          int pos;
+          LEX_STRING *index_name;
+          List_iterator_fast it(*lex->index_list);
+          tab->keys_in_use_for_query.clear_all();
+          while ((index_name= it++))
+          {
+            if (tab->s->keynames.type_names == 0 ||
+                (pos= find_type(&tab->s->keynames, index_name->str,
+                                index_name->length, 1)) <= 0)
+            {
+              compl_result_code= result_code= HA_ADMIN_INVALID;
+              break;
+            }
+            tab->keys_in_use_for_query.set_bit(--pos);
+          }
+        }
+        /* Ensure that number of records are updated */
+        tab->file->info(HA_STATUS_VARIABLE);
+        memroot_block= get_last_memroot_block(thd->mem_root);
+        if (!(compl_result_code=
+              alloc_statistics_for_table(thd, tab,
+                                         &tab->has_value_set)) &&
+            !(compl_result_code=
+              collect_statistics_for_table(thd, tab)))
+          compl_result_code= update_statistics_for_table(thd, tab);
+        free_statistics_for_table(tab);
+        free_all_new_blocks(thd->mem_root, memroot_block);
+      }
+      else
+        compl_result_code= HA_ADMIN_FAILED;
+
+      if (compl_result_code)
+        result_code= HA_ADMIN_FAILED;
+      else
+      {
+        protocol->prepare_for_resend();
+        protocol->store(&table_name, system_charset_info);
+        protocol->store(operator_name, system_charset_info);
+        protocol->store(&msg_status, system_charset_info);
+	protocol->store(STRING_WITH_LEN("Engine-independent statistics collected"), 
+                        system_charset_info);
+        if (protocol->write())
+          goto err;
+      }
+    }
+
+    if (result_code == HA_ADMIN_NOT_IMPLEMENTED && need_repair_or_alter)
+    {
+      /*
+        repair was not implemented and we need to upgrade the table
+        to a new version so we recreate the table with ALTER TABLE
+      */
+      result_code= admin_recreate_table(thd, table, &recreate_info);
+    }
+send_result:
+
+    lex->cleanup_after_one_table_open();
+    thd->clear_error();  // these errors shouldn't get client
+
+    if (recreate_info.records_duplicate())
+    {
+      protocol->prepare_for_resend();
+      protocol->store(&table_name, system_charset_info);
+      protocol->store(operator_name, system_charset_info);
+      protocol->store(warning_level_names[Sql_condition::WARN_LEVEL_WARN].str,
+                      warning_level_names[Sql_condition::WARN_LEVEL_WARN].length,
+                      system_charset_info);
+      char buf[80];
+      size_t length= my_snprintf(buf, sizeof(buf),
+                                 "Number of rows changed from %u to %u",
+                                 (uint) recreate_info.records_processed(),
+                                 (uint) recreate_info.records_copied());
+      protocol->store(buf, length, system_charset_info);
+      if (protocol->write())
+        goto err;
+    }
+
+    {
+      Diagnostics_area::Sql_condition_iterator it=
+        thd->get_stmt_da()->sql_conditions();
+      const Sql_condition *err;
+      while ((err= it++))
+      {
+        const char *err_msg= err->get_message_text();
+        protocol->prepare_for_resend();
+        protocol->store(&table_name, system_charset_info);
+        protocol->store(operator_name, system_charset_info);
+        protocol->store(warning_level_names[err->get_level()].str,
+                        warning_level_names[err->get_level()].length,
+                        system_charset_info);
+        protocol->store(err_msg, strlen(err_msg), system_charset_info);
+        if (protocol->write())
+          goto err;
+      }
+      thd->get_stmt_da()->clear_warning_info(thd->query_id);
+    }
+    protocol->prepare_for_resend();
+    protocol->store(&table_name, system_charset_info);
+    protocol->store(operator_name, system_charset_info);
+
+send_result_message:
+
+    DBUG_PRINT("info", ("result_code: %d", result_code));
+    switch (result_code) {
+    case HA_ADMIN_NOT_IMPLEMENTED:
+      {
+       char buf[MYSQL_ERRMSG_SIZE];
+       size_t length=my_snprintf(buf, sizeof(buf),
+                                 ER_THD(thd, ER_CHECK_NOT_IMPLEMENTED),
+                                 operator_name->str);
+	protocol->store(STRING_WITH_LEN("note"), system_charset_info);
+	protocol->store(buf, length, system_charset_info);
+      }
+      break;
+
+    case HA_ADMIN_NOT_BASE_TABLE:
+      {
+        char buf[MYSQL_ERRMSG_SIZE];
+        size_t length= my_snprintf(buf, sizeof(buf),
+                                   ER_THD(thd, ER_BAD_TABLE_ERROR),
+                                   table_name.str);
+        protocol->store(STRING_WITH_LEN("note"), system_charset_info);
+        protocol->store(buf, length, system_charset_info);
+      }
+      break;
+
+    case HA_ADMIN_OK:
+      protocol->store(&msg_status, system_charset_info);
+      protocol->store(STRING_WITH_LEN("OK"), system_charset_info);
+      break;
+
+    case HA_ADMIN_FAILED:
+      protocol->store(&msg_status, system_charset_info);
+      protocol->store(STRING_WITH_LEN("Operation failed"),
+                      system_charset_info);
+      break;
+
+    case HA_ADMIN_REJECT:
+      protocol->store(&msg_status, system_charset_info);
+      protocol->store(STRING_WITH_LEN("Operation need committed state"),
+                      system_charset_info);
+      open_for_modify= FALSE;
+      break;
+
+    case HA_ADMIN_ALREADY_DONE:
+      protocol->store(&msg_status, system_charset_info);
+      protocol->store(STRING_WITH_LEN("Table is already up to date"),
+                      system_charset_info);
+      break;
+
+    case HA_ADMIN_CORRUPT:
+      protocol->store(&error_clex_str, system_charset_info);
+      protocol->store(STRING_WITH_LEN("Corrupt"), system_charset_info);
+      fatal_error=1;
+      break;
+
+    case HA_ADMIN_INVALID:
+      protocol->store(&error_clex_str, system_charset_info);
+      protocol->store(STRING_WITH_LEN("Invalid argument"),
+                      system_charset_info);
+      break;
+
+    case HA_ADMIN_TRY_ALTER:
+    {
+      Alter_info *alter_info= &lex->alter_info;
+
+      protocol->store(STRING_WITH_LEN("note"), system_charset_info);
+      if (alter_info->partition_flags & ALTER_PARTITION_ADMIN)
+      {
+        protocol->store(STRING_WITH_LEN(
+        "Table does not support optimize on partitions. All partitions "
+        "will be rebuilt and analyzed."),system_charset_info);
+      }
+      else
+      {
+        protocol->store(STRING_WITH_LEN(
+        "Table does not support optimize, doing recreate + analyze instead"),
+        system_charset_info);
+      }
+      if (protocol->write())
+        goto err;
+      THD_STAGE_INFO(thd, stage_recreating_table);
+      DBUG_PRINT("info", ("HA_ADMIN_TRY_ALTER, trying analyze..."));
+      TABLE_LIST *save_next_local= table->next_local,
+                 *save_next_global= table->next_global;
+      table->next_local= table->next_global= 0;
+
+      result_code= admin_recreate_table(thd, table, &recreate_info);
+      trans_commit_stmt(thd);
+      trans_commit(thd);
+      close_thread_tables(thd);
+      thd->release_transactional_locks();
+      /* Clear references to TABLE and MDL_ticket after releasing them. */
+      table->mdl_request.ticket= NULL;
+
+      if (!result_code) // recreation went ok
+      {
+        /* Clear the ticket released above. */
+        table->mdl_request.ticket= NULL;
+        DEBUG_SYNC(thd, "ha_admin_open_ltable");
+        table->mdl_request.set_type(MDL_SHARED_WRITE);
+        if (!thd->open_temporary_tables(table) &&
+            (table->table= open_ltable(thd, table, lock_type, 0)))
+        {
+          ulonglong save_flags;
+          /* Store the original value of alter_info->flags */
+          save_flags= alter_info->flags;
+
+          /*
+           Reset the ALTER_PARTITION_ADMIN bit in alter_info->flags
+           to force analyze on all partitions.
+          */
+          alter_info->partition_flags &= ~(ALTER_PARTITION_ADMIN);
+          result_code= table->table->file->ha_analyze(thd, check_opt);
+          if (result_code == HA_ADMIN_ALREADY_DONE)
+            result_code= HA_ADMIN_OK;
+          else if (result_code)  // analyze failed
+            table->table->file->print_error(result_code, MYF(0));
+          alter_info->flags= save_flags;
+        }
+        else
+          result_code= -1; // open failed
+      }
+      /* Start a new row for the final status row */
+      protocol->prepare_for_resend();
+      protocol->store(&table_name, system_charset_info);
+      protocol->store(operator_name, system_charset_info);
+      if (result_code) // either mysql_recreate_table or analyze failed
+      {
+        DBUG_ASSERT(thd->is_error());
+        if (thd->is_error())
+        {
+          const char *err_msg= thd->get_stmt_da()->message();
+          if (!thd->vio_ok())
+          {
+            sql_print_error("%s", err_msg);
+          }
+          else
+          {
+            /* Hijack the row already in-progress. */
+            protocol->store(&error_clex_str, system_charset_info);
+            protocol->store(err_msg, strlen(err_msg), system_charset_info);
+            if (protocol->write())
+              goto err;
+            /* Start off another row for HA_ADMIN_FAILED */
+            protocol->prepare_for_resend();
+            protocol->store(&table_name, system_charset_info);
+            protocol->store(operator_name, system_charset_info);
+          }
+          thd->clear_error();
+        }
+        /* Make sure this table instance is not reused after the operation. */
+        if (table->table)
+          table->table->mark_table_for_reopen();
+      }
+      result_code= result_code ? HA_ADMIN_FAILED : HA_ADMIN_OK;
+      table->next_local= save_next_local;
+      table->next_global= save_next_global;
+      goto send_result_message;
+    }
+    case HA_ADMIN_WRONG_CHECKSUM:
+    {
+      protocol->store(STRING_WITH_LEN("note"), system_charset_info);
+      protocol->store(ER_THD(thd, ER_VIEW_CHECKSUM),
+                      strlen(ER_THD(thd, ER_VIEW_CHECKSUM)),
+                      system_charset_info);
+      break;
+    }
+
+    case HA_ADMIN_NEEDS_UPGRADE:
+    case HA_ADMIN_NEEDS_ALTER:
+    {
+      char buf[MYSQL_ERRMSG_SIZE];
+      size_t length;
+      const char *what_to_upgrade= table->view ? "VIEW" :
+          table->table->file->ha_table_flags() & HA_CAN_REPAIR ? "TABLE" : 0;
+
+      protocol->store(&error_clex_str, system_charset_info);
+      if (what_to_upgrade)
+        length= my_snprintf(buf, sizeof(buf),
+                            ER_THD(thd, ER_TABLE_NEEDS_UPGRADE),
+                            what_to_upgrade, table->table_name.str);
+      else
+        length= my_snprintf(buf, sizeof(buf),
+                            ER_THD(thd, ER_TABLE_NEEDS_REBUILD),
+                            table->table_name.str);
+      protocol->store(buf, length, system_charset_info);
+      fatal_error=1;
+      break;
+    }
+    case HA_ERR_TABLE_READONLY:
+    {
+      send_read_only_warning(thd, &msg_status, &table_name);
+      break;
+    }
+    default:				// Probably HA_ADMIN_INTERNAL_ERROR
+      {
+        char buf[MYSQL_ERRMSG_SIZE];
+        size_t length=my_snprintf(buf, sizeof(buf),
+                                "Unknown - internal error %d during operation",
+                                result_code);
+        protocol->store(&error_clex_str, system_charset_info);
+        protocol->store(buf, length, system_charset_info);
+        fatal_error=1;
+        break;
+      }
+    }
+    /*
+      Admin commands acquire table locks and these locks are not detected by
+      parallel replication deadlock detection-and-handling mechanism. Hence
+      they must be marked as DDL so that they are not scheduled in parallel
+      with conflicting DMLs resulting in deadlock.
+    */
+    thd->transaction->stmt.mark_executed_table_admin_cmd();
+
+    if (table->table && !table->view)
+    {
+      /* Skip FLUSH TABLES if we are doing analyze */
+      const bool skip_flush= (operator_func == &handler::ha_analyze);
+      if (table->table->s->tmp_table)
+      {
+        /*
+          If the table was not opened successfully, do not try to get
+          status information. (Bug#47633)
+        */
+        if (open_for_modify && !open_error)
+          table->table->file->info(HA_STATUS_CONST);
+      }
+      else if ((!skip_flush && open_for_modify) || fatal_error)
+      {
+        table->table->s->tdc->flush_unused(true);
+        /*
+          May be something modified. Consequently, we have to
+          invalidate the query cache.
+        */
+        table->table= 0;                        // For query cache
+        query_cache_invalidate3(thd, table, 0);
+      }
+      else if (collect_eis && skip_flush && compl_result_code == HA_ADMIN_OK)
+      {
+        TABLE_LIST *save_next_global= table->next_global;
+        table->next_global= 0;
+        read_statistics_for_tables(thd, table, true /* force_reload */);
+        table->next_global= save_next_global;
+      }
+    }
+    /* Error path, a admin command failed. */
+    if (thd->transaction_rollback_request || fatal_error)
+    {
+      /*
+        Unlikely, but transaction rollback was requested by one of storage
+        engines (e.g. due to deadlock). Perform it.
+      */
+      if (trans_rollback_stmt(thd) || trans_rollback_implicit(thd))
+        goto err;
+    }
+    else
+    {
+      if (trans_commit_stmt(thd))
+        goto err;
+      is_table_modified= true;
+    }
+    close_thread_tables(thd);
+
+    if (storage_engine_name[0])
+    {
+      /* Table was changed (repair, optimize or something similar) */
+      backup_log_info ddl_log;
+      bzero(&ddl_log, sizeof(ddl_log));
+      lex_string_set(&ddl_log.org_storage_engine_name,
+                     storage_engine_name);
+      ddl_log.query=            *operator_name;
+      ddl_log.org_partitioned=  storage_engine_partitioned;
+      ddl_log.org_database=     table->db;
+      ddl_log.org_table=        table->table_name;
+      ddl_log.org_table_id=     tabledef_version;
+      backup_log_ddl(&ddl_log);
+    }
+
+    thd->release_transactional_locks();
+
+    /*
+      If it is CHECK TABLE v1, v2, v3, and v1, v2, v3 are views, we will run
+      separate open_tables() for each CHECK TABLE argument.
+      Right now we do not have a separate method to reset the prelocking
+      state in the lex to the state after parsing, so each open will pollute
+      this state: add elements to lex->srotuines_list, TABLE_LISTs to
+      lex->query_tables. Below is a lame attempt to recover from this
+      pollution.
+      @todo: have a method to reset a prelocking context, or use separate
+      contexts for each open.
+    */
+    for (Sroutine_hash_entry *rt=
+           (Sroutine_hash_entry*)thd->lex->sroutines_list.first;
+         rt; rt= rt->next)
+      rt->mdl_request.ticket= NULL;
+
+    if (protocol->write())
+      goto err;
+    DEBUG_SYNC(thd, "admin_command_kill_after_modify");
+  }
+  thd->resume_subsequent_commits(suspended_wfc);
+  DBUG_EXECUTE_IF("inject_analyze_table_sleep", my_sleep(500000););
+  if (is_table_modified && is_cmd_replicated &&
+      (!opt_readonly || thd->slave_thread) && !thd->lex->no_write_to_binlog)
+  {
+    thd->get_stmt_da()->set_overwrite_status(true);
+    auto res= write_bin_log(thd, true, thd->query(), thd->query_length());
+    thd->get_stmt_da()->set_overwrite_status(false);
+    if (res)
+      goto err;
+  }
+  my_eof(thd);
+
+  DBUG_RETURN(FALSE);
+
+err:
+  /* Make sure this table instance is not reused after the failure. */
+  trans_rollback_stmt(thd);
+  if (stmt_causes_implicit_commit(thd, CF_IMPLICIT_COMMIT_END))
+    trans_rollback(thd);
+  if (table && table->table)
+  {
+    table->table->mark_table_for_reopen();
+    table->table= 0;
+  }
+  close_thread_tables(thd);			// Shouldn't be needed
+  thd->release_transactional_locks();
+  thd->resume_subsequent_commits(suspended_wfc);
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Assigned specified indexes for a table into key cache
+
+  SYNOPSIS
+    mysql_assign_to_keycache()
+    thd		Thread object
+    tables	Table list (one table only)
+
+  RETURN VALUES
+   FALSE ok
+   TRUE  error
+*/
+
+bool mysql_assign_to_keycache(THD* thd, TABLE_LIST* tables,
+			     const LEX_CSTRING *key_cache_name)
+{
+  HA_CHECK_OPT check_opt;
+  KEY_CACHE *key_cache;
+  DBUG_ENTER("mysql_assign_to_keycache");
+
+  THD_STAGE_INFO(thd, stage_finding_key_cache);
+  check_opt.init();
+  mysql_mutex_lock(&LOCK_global_system_variables);
+  if (!(key_cache= get_key_cache(key_cache_name)))
+  {
+    mysql_mutex_unlock(&LOCK_global_system_variables);
+    my_error(ER_UNKNOWN_KEY_CACHE, MYF(0), key_cache_name->str);
+    DBUG_RETURN(TRUE);
+  }
+  mysql_mutex_unlock(&LOCK_global_system_variables);
+  if (!key_cache->key_cache_inited)
+  {
+    my_error(ER_UNKNOWN_KEY_CACHE, MYF(0), key_cache_name->str);
+    DBUG_RETURN(true);
+  }
+  check_opt.key_cache= key_cache;
+  DBUG_RETURN(mysql_admin_table(thd, tables, &check_opt,
+                                &msg_assign_to_keycache, TL_READ_NO_INSERT, 0,
+                                0, 0, 0,
+                                &handler::assign_to_keycache, 0, false));
+}
+
+
+/*
+  Preload specified indexes for a table into key cache
+
+  SYNOPSIS
+    mysql_preload_keys()
+    thd		Thread object
+    tables	Table list (one table only)
+
+  RETURN VALUES
+    FALSE ok
+    TRUE  error
+*/
+
+bool mysql_preload_keys(THD* thd, TABLE_LIST* tables)
+{
+  DBUG_ENTER("mysql_preload_keys");
+  /*
+    We cannot allow concurrent inserts. The storage engine reads
+    directly from the index file, bypassing the cache. It could read
+    outdated information if parallel inserts into cache blocks happen.
+  */
+  DBUG_RETURN(mysql_admin_table(thd, tables, 0,
+                                &msg_preload_keys, TL_READ_NO_INSERT,
+                                0, 0, 0, 0,
+                                &handler::preload_keys, 0, false));
+}
+
+
+bool Sql_cmd_analyze_table::execute(THD *thd)
+{
+  LEX *m_lex= thd->lex;
+  TABLE_LIST *first_table= m_lex->first_select_lex()->table_list.first;
+  bool res= TRUE;
+  thr_lock_type lock_type = TL_READ_NO_INSERT;
+  DBUG_ENTER("Sql_cmd_analyze_table::execute");
+
+  if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table,
+                         FALSE, UINT_MAX, FALSE))
+    goto error;
+  if (thd->has_read_only_protection())
+    goto error;
+
+  WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
+  res= mysql_admin_table(thd, first_table, &m_lex->check_opt,
+                         &msg_analyze, lock_type, 1, 0, 0, 0,
+                         &handler::ha_analyze, 0, true);
+  m_lex->first_select_lex()->table_list.first= first_table;
+  m_lex->query_tables= first_table;
+
+#ifdef WITH_WSREP
+ wsrep_error_label:
+#endif /* WITH_WSREP */
+error:
+  DBUG_RETURN(res);
+}
+
+
+bool Sql_cmd_check_table::execute(THD *thd)
+{
+  LEX *m_lex= thd->lex;
+  TABLE_LIST *first_table= m_lex->first_select_lex()->table_list.first;
+  thr_lock_type lock_type = TL_READ_NO_INSERT;
+  bool res= TRUE;
+  DBUG_ENTER("Sql_cmd_check_table::execute");
+
+  if (check_table_access(thd, SELECT_ACL, first_table,
+                         TRUE, UINT_MAX, FALSE))
+    goto error; /* purecov: inspected */
+
+  res= mysql_admin_table(thd, first_table, &m_lex->check_opt, &msg_check,
+                         lock_type, 0, 0, HA_OPEN_FOR_REPAIR, 0,
+                         &handler::ha_check, &view_check, false);
+
+  m_lex->first_select_lex()->table_list.first= first_table;
+  m_lex->query_tables= first_table;
+
+error:
+  DBUG_RETURN(res);
+}
+
+
+bool Sql_cmd_optimize_table::execute(THD *thd)
+{
+  LEX *m_lex= thd->lex;
+  TABLE_LIST *first_table= m_lex->first_select_lex()->table_list.first;
+  bool res= TRUE;
+  Recreate_info recreate_info;
+  DBUG_ENTER("Sql_cmd_optimize_table::execute");
+
+  if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table,
+                         FALSE, UINT_MAX, FALSE))
+    goto error; /* purecov: inspected */
+
+  WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
+  res= (specialflag & SPECIAL_NO_NEW_FUNC) ?
+    mysql_recreate_table(thd, first_table, &recreate_info, true) :
+    mysql_admin_table(thd, first_table, &m_lex->check_opt,
+                      &msg_optimize, TL_WRITE, 1, 0, 0, 0,
+                      &handler::ha_optimize, 0, true);
+  m_lex->first_select_lex()->table_list.first= first_table;
+  m_lex->query_tables= first_table;
+
+#ifdef WITH_WSREP
+wsrep_error_label:
+#endif /* WITH_WSREP */
+error:
+  DBUG_RETURN(res);
+}
+
+
+bool Sql_cmd_repair_table::execute(THD *thd)
+{
+  LEX *m_lex= thd->lex;
+  TABLE_LIST *first_table= m_lex->first_select_lex()->table_list.first;
+  bool res= TRUE;
+  DBUG_ENTER("Sql_cmd_repair_table::execute");
+
+  if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table,
+                         FALSE, UINT_MAX, FALSE))
+    goto error; /* purecov: inspected */
+  WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
+  res= mysql_admin_table(thd, first_table, &m_lex->check_opt, &msg_repair,
+                         TL_WRITE, 1,
+                         MY_TEST(m_lex->check_opt.sql_flags & TT_USEFRM),
+                         HA_OPEN_FOR_REPAIR, &prepare_for_repair,
+                         &handler::ha_repair, &view_repair, true);
+
+  m_lex->first_select_lex()->table_list.first= first_table;
+  m_lex->query_tables= first_table;
+
+#ifdef WITH_WSREP
+wsrep_error_label:
+#endif /* WITH_WSREP */
+error:
+  DBUG_RETURN(res);
+}
diff --git a/sql/sql_admin.h b/sql/sql_admin.h
new file mode 100644
index 00000000..1a237d4f
--- /dev/null
+++ b/sql/sql_admin.h
@@ -0,0 +1,117 @@
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef SQL_TABLE_MAINTENANCE_H
+#define SQL_TABLE_MAINTENANCE_H
+
+/* Must be able to hold ALTER TABLE t PARTITION BY ... KEY ALGORITHM = 1 ... */
+#define SQL_ADMIN_MSG_TEXT_SIZE (128 * 1024)
+
+bool mysql_assign_to_keycache(THD* thd, TABLE_LIST* table_list,
+                              const LEX_CSTRING *key_cache_name);
+bool mysql_preload_keys(THD* thd, TABLE_LIST* table_list);
+int reassign_keycache_tables(THD* thd, KEY_CACHE *src_cache,
+                             KEY_CACHE *dst_cache);
+void fill_check_table_metadata_fields(THD *thd, List* fields);
+/**
+  Sql_cmd_analyze_table represents the ANALYZE TABLE statement.
+*/
+class Sql_cmd_analyze_table : public Sql_cmd
+{
+public:
+  /**
+    Constructor, used to represent a ANALYZE TABLE statement.
+  */
+  Sql_cmd_analyze_table() = default;
+
+  ~Sql_cmd_analyze_table() = default;
+
+  bool execute(THD *thd);
+
+  virtual enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_ANALYZE;
+  }
+};
+
+
+
+/**
+  Sql_cmd_check_table represents the CHECK TABLE statement.
+*/
+class Sql_cmd_check_table : public Sql_cmd
+{
+public:
+  /**
+    Constructor, used to represent a CHECK TABLE statement.
+  */
+  Sql_cmd_check_table() = default;
+
+  ~Sql_cmd_check_table() = default;
+
+  bool execute(THD *thd);
+
+  virtual enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_CHECK;
+  }
+};
+
+
+/**
+  Sql_cmd_optimize_table represents the OPTIMIZE TABLE statement.
+*/
+class Sql_cmd_optimize_table : public Sql_cmd
+{
+public:
+  /**
+    Constructor, used to represent a OPTIMIZE TABLE statement.
+  */
+  Sql_cmd_optimize_table() = default;
+
+  ~Sql_cmd_optimize_table() = default;
+
+  bool execute(THD *thd);
+
+  virtual enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_OPTIMIZE;
+  }
+};
+
+
+
+/**
+  Sql_cmd_repair_table represents the REPAIR TABLE statement.
+*/
+class Sql_cmd_repair_table : public Sql_cmd
+{
+public:
+  /**
+    Constructor, used to represent a REPAIR TABLE statement.
+  */
+  Sql_cmd_repair_table() = default;
+
+  ~Sql_cmd_repair_table() = default;
+
+  bool execute(THD *thd);
+
+  virtual enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_REPAIR;
+  }
+};
+
+#endif
diff --git a/sql/sql_alloc.h b/sql/sql_alloc.h
new file mode 100644
index 00000000..f5d2d4e8
--- /dev/null
+++ b/sql/sql_alloc.h
@@ -0,0 +1,44 @@
+#ifndef SQL_ALLOC_INCLUDED
+#define SQL_ALLOC_INCLUDED
+/* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+   Copyright (c) 2017, 2018, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+#include                     /* alloc_root, MEM_ROOT, TRASH */
+
+/* MariaDB standard class memory allocator */
+
+class Sql_alloc
+{
+public:
+  static void *operator new(size_t size) throw ()
+  {
+    return thd_alloc(_current_thd(), size);
+  }
+  static void *operator new[](size_t size) throw ()
+  {
+    return thd_alloc(_current_thd(), size);
+  }
+  static void *operator new[](size_t size, MEM_ROOT *mem_root) throw ()
+  { return alloc_root(mem_root, size); }
+  static void *operator new(size_t size, MEM_ROOT *mem_root) throw()
+  { return alloc_root(mem_root, size); }
+  static void operator delete(void *ptr, size_t size) { TRASH_FREE(ptr, size); }
+  static void operator delete(void *, MEM_ROOT *){}
+  static void operator delete[](void *, MEM_ROOT *)
+  { /* never called */ }
+  static void operator delete[](void *ptr, size_t size) { TRASH_FREE(ptr, size); }
+};
+#endif /* SQL_ALLOC_INCLUDED */
diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc
new file mode 100644
index 00000000..3b9c8402
--- /dev/null
+++ b/sql/sql_alter.cc
@@ -0,0 +1,728 @@
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2016, 2020, MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_parse.h"                       // check_access
+#include "sql_table.h"                       // mysql_alter_table,
+                                             // mysql_exchange_partition
+#include "sql_statistics.h"                  // delete_statistics_for_column
+#include "sql_alter.h"
+#include "rpl_mi.h"
+#include "slave.h"
+#include "debug_sync.h"
+#include "wsrep_mysqld.h"
+
+Alter_info::Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root)
+  :drop_list(rhs.drop_list, mem_root),
+  alter_list(rhs.alter_list, mem_root),
+  key_list(rhs.key_list, mem_root),
+  alter_rename_key_list(rhs.alter_rename_key_list, mem_root),
+  create_list(rhs.create_list, mem_root),
+  alter_index_ignorability_list(rhs.alter_index_ignorability_list, mem_root),
+  check_constraint_list(rhs.check_constraint_list, mem_root),
+  flags(rhs.flags), partition_flags(rhs.partition_flags),
+  keys_onoff(rhs.keys_onoff),
+  original_table(0),
+  partition_names(rhs.partition_names, mem_root),
+  num_parts(rhs.num_parts),
+  requested_algorithm(rhs.requested_algorithm),
+  requested_lock(rhs.requested_lock)
+{
+  /*
+    Make deep copies of used objects.
+    This is not a fully deep copy - clone() implementations
+    of Alter_drop, Alter_column, Key, foreign_key, Key_part_spec
+    do not copy string constants. At the same length the only
+    reason we make a copy currently is that ALTER/CREATE TABLE
+    code changes input Alter_info definitions, but string
+    constants never change.
+  */
+  list_copy_and_replace_each_value(drop_list, mem_root);
+  list_copy_and_replace_each_value(alter_list, mem_root);
+  list_copy_and_replace_each_value(key_list, mem_root);
+  list_copy_and_replace_each_value(alter_rename_key_list, mem_root);
+  list_copy_and_replace_each_value(create_list, mem_root);
+  /* partition_names are not deeply copied currently */
+}
+
+
+bool Alter_info::set_requested_algorithm(const LEX_CSTRING *str)
+{
+  // To avoid adding new keywords to the grammar, we match strings here.
+  if (lex_string_eq(str, STRING_WITH_LEN("INPLACE")))
+    requested_algorithm= ALTER_TABLE_ALGORITHM_INPLACE;
+  else if (lex_string_eq(str, STRING_WITH_LEN("COPY")))
+    requested_algorithm= ALTER_TABLE_ALGORITHM_COPY;
+  else if (lex_string_eq(str, STRING_WITH_LEN("DEFAULT")))
+    requested_algorithm= ALTER_TABLE_ALGORITHM_DEFAULT;
+  else if (lex_string_eq(str, STRING_WITH_LEN("NOCOPY")))
+    requested_algorithm= ALTER_TABLE_ALGORITHM_NOCOPY;
+  else if (lex_string_eq(str, STRING_WITH_LEN("INSTANT")))
+    requested_algorithm= ALTER_TABLE_ALGORITHM_INSTANT;
+  else
+    return true;
+  return false;
+}
+
+void Alter_info::set_requested_algorithm(enum_alter_table_algorithm algo_val)
+{
+  requested_algorithm= algo_val;
+}
+
+bool Alter_info::set_requested_lock(const LEX_CSTRING *str)
+{
+  // To avoid adding new keywords to the grammar, we match strings here.
+  if (lex_string_eq(str, STRING_WITH_LEN("NONE")))
+    requested_lock= ALTER_TABLE_LOCK_NONE;
+  else if (lex_string_eq(str, STRING_WITH_LEN("SHARED")))
+    requested_lock= ALTER_TABLE_LOCK_SHARED;
+  else if (lex_string_eq(str, STRING_WITH_LEN("EXCLUSIVE")))
+    requested_lock= ALTER_TABLE_LOCK_EXCLUSIVE;
+  else if (lex_string_eq(str, STRING_WITH_LEN("DEFAULT")))
+    requested_lock= ALTER_TABLE_LOCK_DEFAULT;
+  else
+    return true;
+  return false;
+}
+
+const char* Alter_info::algorithm_clause(THD *thd) const
+{
+  switch (algorithm(thd)) {
+  case ALTER_TABLE_ALGORITHM_INPLACE:
+    return "ALGORITHM=INPLACE";
+  case ALTER_TABLE_ALGORITHM_COPY:
+    return "ALGORITHM=COPY";
+  case ALTER_TABLE_ALGORITHM_NONE:
+    DBUG_ASSERT(0);
+    /* Fall through */
+  case ALTER_TABLE_ALGORITHM_DEFAULT:
+    return "ALGORITHM=DEFAULT";
+  case ALTER_TABLE_ALGORITHM_NOCOPY:
+    return "ALGORITHM=NOCOPY";
+  case ALTER_TABLE_ALGORITHM_INSTANT:
+    return "ALGORITHM=INSTANT";
+  }
+
+  return NULL; /* purecov: begin deadcode */
+}
+
+const char* Alter_info::lock() const
+{
+  switch (requested_lock) {
+  case ALTER_TABLE_LOCK_SHARED:
+    return "LOCK=SHARED";
+  case ALTER_TABLE_LOCK_NONE:
+    return "LOCK=NONE";
+  case ALTER_TABLE_LOCK_DEFAULT:
+    return "LOCK=DEFAULT";
+  case ALTER_TABLE_LOCK_EXCLUSIVE:
+    return "LOCK=EXCLUSIVE";
+  }
+  return NULL; /* purecov: begin deadcode */
+}
+
+
+bool Alter_info::supports_algorithm(THD *thd,
+                                    const Alter_inplace_info *ha_alter_info)
+{
+  switch (ha_alter_info->inplace_supported) {
+  case HA_ALTER_INPLACE_EXCLUSIVE_LOCK:
+  case HA_ALTER_INPLACE_SHARED_LOCK:
+  case HA_ALTER_INPLACE_NO_LOCK:
+  case HA_ALTER_INPLACE_INSTANT:
+     return false;
+  case HA_ALTER_INPLACE_COPY_NO_LOCK:
+  case HA_ALTER_INPLACE_COPY_LOCK:
+    if (algorithm(thd) >= Alter_info::ALTER_TABLE_ALGORITHM_NOCOPY)
+    {
+      ha_alter_info->report_unsupported_error(algorithm_clause(thd),
+                                              "ALGORITHM=INPLACE");
+      return true;
+    }
+    return false;
+  case HA_ALTER_INPLACE_NOCOPY_NO_LOCK:
+  case HA_ALTER_INPLACE_NOCOPY_LOCK:
+    if (algorithm(thd) == Alter_info::ALTER_TABLE_ALGORITHM_INSTANT)
+    {
+      ha_alter_info->report_unsupported_error("ALGORITHM=INSTANT",
+                                              "ALGORITHM=NOCOPY");
+      return true;
+    }
+    return false;
+  case HA_ALTER_INPLACE_NOT_SUPPORTED:
+    if (algorithm(thd) >= Alter_info::ALTER_TABLE_ALGORITHM_INPLACE)
+    {
+      ha_alter_info->report_unsupported_error(algorithm_clause(thd),
+					      "ALGORITHM=COPY");
+      return true;
+    }
+    return false;
+  case HA_ALTER_ERROR:
+    return true;
+  }
+  /* purecov: begin deadcode */
+  DBUG_ASSERT(0);
+  return false;
+}
+
+
+bool Alter_info::supports_lock(THD *thd,
+                               const Alter_inplace_info *ha_alter_info)
+{
+  switch (ha_alter_info->inplace_supported) {
+  case HA_ALTER_INPLACE_EXCLUSIVE_LOCK:
+    // If SHARED lock and no particular algorithm was requested, use COPY.
+    if (requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED &&
+        algorithm(thd) == Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT &&
+        thd->variables.alter_algorithm ==
+                Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT)
+         return false;
+
+    if (requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED ||
+        requested_lock == Alter_info::ALTER_TABLE_LOCK_NONE)
+    {
+      ha_alter_info->report_unsupported_error(lock(), "LOCK=EXCLUSIVE");
+      return true;
+    }
+    return false;
+  case HA_ALTER_INPLACE_NO_LOCK:
+  case HA_ALTER_INPLACE_INSTANT:
+  case HA_ALTER_INPLACE_COPY_NO_LOCK:
+  case HA_ALTER_INPLACE_NOCOPY_NO_LOCK:
+    return false;
+  case HA_ALTER_INPLACE_COPY_LOCK:
+  case HA_ALTER_INPLACE_NOCOPY_LOCK:
+  case HA_ALTER_INPLACE_NOT_SUPPORTED:
+  case HA_ALTER_INPLACE_SHARED_LOCK:
+    if (requested_lock == Alter_info::ALTER_TABLE_LOCK_NONE)
+    {
+      ha_alter_info->report_unsupported_error("LOCK=NONE", "LOCK=SHARED");
+      return true;
+    }
+    return false;
+  case HA_ALTER_ERROR:
+    return true;
+  }
+  /* purecov: begin deadcode */
+  DBUG_ASSERT(0);
+  return false;
+}
+
+bool Alter_info::vers_prohibited(THD *thd) const
+{
+  if (thd->slave_thread ||
+      thd->variables.vers_alter_history != VERS_ALTER_HISTORY_ERROR)
+  {
+    return false;
+  }
+  if (flags & (
+    ALTER_PARSER_ADD_COLUMN |
+    ALTER_PARSER_DROP_COLUMN |
+    ALTER_CHANGE_COLUMN |
+    ALTER_COLUMN_ORDER))
+  {
+    return true;
+  }
+  if (flags & ALTER_ADD_INDEX)
+  {
+    List_iterator_fast key_it(const_cast &>(key_list));
+    Key *key;
+    while ((key= key_it++))
+    {
+      if (key->type == Key::PRIMARY || key->type == Key::UNIQUE)
+        return true;
+    }
+  }
+  return false;
+}
+
+Alter_info::enum_alter_table_algorithm
+Alter_info::algorithm(const THD *thd) const
+{
+  if (requested_algorithm == ALTER_TABLE_ALGORITHM_NONE)
+   return (Alter_info::enum_alter_table_algorithm) thd->variables.alter_algorithm;
+  return requested_algorithm;
+}
+
+
+uint Alter_info::check_vcol_field(Item_field *item) const
+{
+  /*
+    vcol->flags are modified in-place, so we'll need to reset them
+    if ALTER fails for any reason
+  */
+  if (item->field && !item->field->table->needs_reopen())
+    item->field->table->mark_table_for_reopen();
+
+  if (!item->field &&
+      ((item->db_name.length && !db.streq(item->db_name)) ||
+       (item->table_name.length && !table_name.streq(item->table_name))))
+  {
+    char *ptr= (char*)current_thd->alloc(item->db_name.length +
+                                         item->table_name.length +
+                                         item->field_name.length + 3);
+    strxmov(ptr, safe_str(item->db_name.str), item->db_name.length ? "." : "",
+            item->table_name.str, ".", item->field_name.str, NullS);
+    item->field_name.str= ptr;
+    return VCOL_IMPOSSIBLE;
+  }
+  for (Key &k: key_list)
+  {
+    if (k.type != Key::FOREIGN_KEY)
+      continue;
+    Foreign_key *fk= (Foreign_key*) &k;
+    if (fk->update_opt < FK_OPTION_CASCADE &&
+        fk->delete_opt < FK_OPTION_SET_NULL)
+      continue;
+    for (Key_part_spec& kp: fk->columns)
+    {
+      if (item->field_name.streq(kp.field_name))
+        return VCOL_NON_DETERMINISTIC;
+    }
+  }
+  for (Create_field &cf: create_list)
+  {
+    if (item->field_name.streq(cf.field_name))
+      return cf.vcol_info ? cf.vcol_info->flags : 0;
+  }
+  return 0;
+}
+
+
+bool Alter_info::collect_renamed_fields(THD *thd)
+{
+  List_iterator_fast new_field_it;
+  Create_field *new_field;
+  DBUG_ENTER("Alter_info::collect_renamed_fields");
+
+  new_field_it.init(create_list);
+  while ((new_field= new_field_it++))
+  {
+    Field *field= new_field->field;
+
+    if (new_field->field &&
+        cmp(&field->field_name, &new_field->field_name))
+    {
+      field->flags|= FIELD_IS_RENAMED;
+      if (add_stat_rename_field(field,
+                                &new_field->field_name,
+                                thd->mem_root))
+        DBUG_RETURN(true);
+
+    }
+  }
+  DBUG_RETURN(false);
+}
+
+
+/*
+  Delete duplicate index found during mysql_prepare_create_table()
+
+  Notes:
+    - In case of temporary generated foreign keys, the key_name may not
+      be set!  These keys are ignored.
+*/
+
+bool Alter_info::add_stat_drop_index(THD *thd, const LEX_CSTRING *key_name)
+{
+  if (original_table && key_name->length)       // If from alter table
+  {
+    KEY *key_info= original_table->key_info;
+    for (uint i= 0; i < original_table->s->keys; i++, key_info++)
+    {
+      if (key_info->name.length &&
+          !lex_string_cmp(system_charset_info, &key_info->name,
+                          key_name))
+        return add_stat_drop_index(key_info, false, thd->mem_root);
+    }
+  }
+  return false;
+}
+
+
+void Alter_info::apply_statistics_deletes_renames(THD *thd, TABLE *table)
+{
+  List_iterator                     it_drop_field(drop_stat_fields);
+  List_iterator it_rename_field(rename_stat_fields);
+  List_iterator    it_drop_index(drop_stat_indexes);
+  List_iterator  it_rename_index(rename_stat_indexes);
+
+  while (Field *field= it_drop_field++)
+    delete_statistics_for_column(thd, table, field);
+
+  if (!rename_stat_fields.is_empty())
+    (void) rename_columns_in_stat_table(thd, table, &rename_stat_fields);
+
+  while (DROP_INDEX_STAT_PARAMS *key= it_drop_index++)
+    (void) delete_statistics_for_index(thd, table, key->key,
+                                       key->ext_prefixes_only);
+
+  if (!rename_stat_indexes.is_empty())
+    (void) rename_indexes_in_stat_table(thd, table, &rename_stat_indexes);
+}
+
+
+Alter_table_ctx::Alter_table_ctx()
+  : db(null_clex_str), table_name(null_clex_str), alias(null_clex_str),
+    new_db(null_clex_str), new_name(null_clex_str), new_alias(null_clex_str)
+{
+}
+
+/*
+  TODO: new_name_arg changes if lower case table names.
+  Should be copied or converted before call
+*/
+
+Alter_table_ctx::Alter_table_ctx(THD *thd, TABLE_LIST *table_list,
+                                 uint tables_opened_arg,
+                                 const LEX_CSTRING *new_db_arg,
+                                 const LEX_CSTRING *new_name_arg)
+  : tables_opened(tables_opened_arg),
+    new_db(*new_db_arg), new_name(*new_name_arg)
+{
+  /*
+    Assign members db, table_name, new_db and new_name
+    to simplify further comparisions: we want to see if it's a RENAME
+    later just by comparing the pointers, avoiding the need for strcmp.
+  */
+  db= table_list->db;
+  table_name= table_list->table_name;
+  alias= (lower_case_table_names == 2) ? table_list->alias : table_name;
+
+  if (!new_db.str || !my_strcasecmp(table_alias_charset, new_db.str, db.str))
+    new_db= db;
+
+  if (new_name.str)
+  {
+    DBUG_PRINT("info", ("new_db.new_name: '%s'.'%s'", new_db.str, new_name.str));
+
+    if (lower_case_table_names == 1) // Convert new_name/new_alias to lower
+    {
+      new_name.length= my_casedn_str(files_charset_info, (char*) new_name.str);
+      new_alias= new_name;
+    }
+    else if (lower_case_table_names == 2) // Convert new_name to lower case
+    {
+      new_alias.str=    new_alias_buff;
+      new_alias.length= new_name.length;
+      strmov(new_alias_buff, new_name.str);
+      new_name.length= my_casedn_str(files_charset_info, (char*) new_name.str);
+
+    }
+    else
+      new_alias= new_name; // LCTN=0 => case sensitive + case preserving
+
+    if (!is_database_changed() &&
+        !my_strcasecmp(table_alias_charset, new_name.str, table_name.str))
+    {
+      /*
+        Source and destination table names are equal:
+        make is_table_renamed() more efficient.
+      */
+      new_alias= table_name;
+      new_name= table_name;
+    }
+  }
+  else
+  {
+    new_alias= alias;
+    new_name= table_name;
+  }
+
+  tmp_name.str= tmp_name_buff;
+  tmp_name.length= my_snprintf(tmp_name_buff, sizeof(tmp_name_buff),
+                               "%s-alter-%lx-%llx",
+                               tmp_file_prefix, current_pid, thd->thread_id);
+  /* Safety fix for InnoDB */
+  if (lower_case_table_names)
+    tmp_name.length= my_casedn_str(files_charset_info, tmp_name_buff);
+
+  if (table_list->table->s->tmp_table == NO_TMP_TABLE)
+  {
+    build_table_filename(path, sizeof(path) - 1, db.str, table_name.str, "", 0);
+
+    build_table_filename(new_path, sizeof(new_path) - 1, new_db.str, new_name.str, "", 0);
+
+    build_table_filename(new_filename, sizeof(new_filename) - 1,
+                         new_db.str, new_name.str, reg_ext, 0);
+
+    build_table_filename(tmp_path, sizeof(tmp_path) - 1, new_db.str, tmp_name.str, "",
+                         FN_IS_TMP);
+  }
+  else
+  {
+    /*
+      We are not filling path, new_path and new_filename members if
+      we are altering temporary table as these members are not used in
+      this case. This fact is enforced with assert.
+    */
+    build_tmptable_filename(thd, tmp_path, sizeof(tmp_path));
+    tmp_table= true;
+  }
+  if ((id.length= table_list->table->s->tabledef_version.length))
+    memcpy(id_buff, table_list->table->s->tabledef_version.str, MY_UUID_SIZE);
+  id.str= id_buff;
+  storage_engine_partitioned= table_list->table->file->partition_engine();
+  storage_engine_name.str= storage_engine_buff;
+  storage_engine_name.length= ((strmake(storage_engine_buff,
+                                        table_list->table->file->
+                                        real_table_type(),
+                                        sizeof(storage_engine_buff)-1)) -
+                               storage_engine_buff);
+  tmp_storage_engine_name.str= tmp_storage_engine_buff;
+  tmp_storage_engine_name.length= 0;
+  tmp_id.str= 0;
+  tmp_id.length= 0;
+}
+
+
+void Alter_table_ctx::report_implicit_default_value_error(THD *thd,
+                                                          const TABLE_SHARE *s)
+                                                          const
+{
+  Create_field *error_field= implicit_default_value_error_field;
+  const Type_handler *h= error_field->type_handler();
+  thd->push_warning_truncated_value_for_field(Sql_condition::WARN_LEVEL_WARN,
+                                              h->name().ptr(),
+                                              h->default_value().ptr(),
+                                              s ? s->db.str : nullptr,
+                                              s ? s->table_name.str : nullptr,
+                                              error_field->field_name.str);
+}
+
+
+bool Sql_cmd_alter_table::execute(THD *thd)
+{
+  LEX *lex= thd->lex;
+  /* first SELECT_LEX (have special meaning for many of non-SELECTcommands) */
+  SELECT_LEX *select_lex= lex->first_select_lex();
+  /* first table of first SELECT_LEX */
+  TABLE_LIST *first_table= (TABLE_LIST*) select_lex->table_list.first;
+
+  const bool used_engine= lex->create_info.used_fields & HA_CREATE_USED_ENGINE;
+  DBUG_ASSERT((m_storage_engine_name.str != NULL) == used_engine);
+  if (used_engine)
+  {
+    if (resolve_storage_engine_with_error(thd, &lex->create_info.db_type,
+                                          lex->create_info.tmp_table()))
+      return true; // Engine not found, substitution is not allowed
+    if (!lex->create_info.db_type) // Not found, but substitution is allowed
+      lex->create_info.used_fields&= ~HA_CREATE_USED_ENGINE;
+  }
+
+  /*
+    Code in mysql_alter_table() may modify its HA_CREATE_INFO argument,
+    so we have to use a copy of this structure to make execution
+    prepared statement- safe. A shallow copy is enough as no memory
+    referenced from this structure will be modified.
+    @todo move these into constructor...
+  */
+  Table_specification_st create_info(lex->create_info);
+  Alter_info alter_info(lex->alter_info, thd->mem_root);
+  create_info.alter_info= &alter_info;
+  privilege_t priv(NO_ACL);
+  privilege_t priv_needed(ALTER_ACL);
+  bool result;
+
+  DBUG_ENTER("Sql_cmd_alter_table::execute");
+
+  if (unlikely(thd->is_fatal_error))
+  {
+    /* out of memory creating a copy of alter_info */
+    DBUG_RETURN(TRUE);
+  }
+  /*
+    We also require DROP priv for ALTER TABLE ... DROP PARTITION, as well
+    as for RENAME TO, as being done by SQLCOM_RENAME_TABLE
+  */
+  if ((alter_info.partition_flags & ALTER_PARTITION_DROP) ||
+      (alter_info.partition_flags & ALTER_PARTITION_CONVERT_IN) ||
+      (alter_info.partition_flags & ALTER_PARTITION_CONVERT_OUT) ||
+      (alter_info.flags & ALTER_RENAME))
+    priv_needed|= DROP_ACL;
+
+  /* Must be set in the parser */
+  DBUG_ASSERT(select_lex->db.str);
+  DBUG_ASSERT(!(alter_info.partition_flags & ALTER_PARTITION_EXCHANGE));
+  DBUG_ASSERT(!(alter_info.partition_flags & ALTER_PARTITION_ADMIN));
+  if (check_access(thd, priv_needed, first_table->db.str,
+                   &first_table->grant.privilege,
+                   &first_table->grant.m_internal,
+                   0, 0) ||
+      check_access(thd, INSERT_ACL | CREATE_ACL, select_lex->db.str,
+                   &priv,
+                   NULL, /* Don't use first_tab->grant with sel_lex->db */
+                   0, 0))
+    DBUG_RETURN(TRUE);                  /* purecov: inspected */
+
+  if ((alter_info.partition_flags & ALTER_PARTITION_CONVERT_IN))
+  {
+    TABLE_LIST *tl= first_table->next_local;
+    tl->grant.privilege= first_table->grant.privilege;
+    tl->grant.m_internal= first_table->grant.m_internal;
+  }
+
+
+  /* If it is a merge table, check privileges for merge children. */
+  if (create_info.merge_list)
+  {
+    /*
+      The user must have (SELECT_ACL | UPDATE_ACL | DELETE_ACL) on the
+      underlying base tables, even if there are temporary tables with the same
+      names.
+
+      From user's point of view, it might look as if the user must have these
+      privileges on temporary tables to create a merge table over them. This is
+      one of two cases when a set of privileges is required for operations on
+      temporary tables (see also CREATE TABLE).
+
+      The reason for this behavior stems from the following facts:
+
+        - For merge tables, the underlying table privileges are checked only
+          at CREATE TABLE / ALTER TABLE time.
+
+          In other words, once a merge table is created, the privileges of
+          the underlying tables can be revoked, but the user will still have
+          access to the merge table (provided that the user has privileges on
+          the merge table itself). 
+
+        - Temporary tables shadow base tables.
+
+          I.e. there might be temporary and base tables with the same name, and
+          the temporary table takes the precedence in all operations.
+
+        - For temporary MERGE tables we do not track if their child tables are
+          base or temporary. As result we can't guarantee that privilege check
+          which was done in presence of temporary child will stay relevant
+          later as this temporary table might be removed.
+
+      If SELECT_ACL | UPDATE_ACL | DELETE_ACL privileges were not checked for
+      the underlying *base* tables, it would create a security breach as in
+      Bug#12771903.
+    */
+
+    if (check_table_access(thd, SELECT_ACL | UPDATE_ACL | DELETE_ACL,
+                           create_info.merge_list, FALSE, UINT_MAX, FALSE))
+      DBUG_RETURN(TRUE);
+  }
+
+  if (check_grant(thd, priv_needed, first_table, FALSE, UINT_MAX, FALSE))
+    DBUG_RETURN(TRUE);                  /* purecov: inspected */
+
+#ifdef WITH_WSREP
+  if (WSREP(thd) &&
+      (!thd->is_current_stmt_binlog_format_row() ||
+       !thd->find_temporary_table(first_table)))
+  {
+    /*
+      It makes sense to set auto_increment_* to defaults in TOI operations.
+      Must be done before wsrep_TOI_begin() since Query_log_event encapsulating
+      TOI statement and auto inc variables for wsrep replication is constructed
+      there. Variables are reset back in THD::reset_for_next_command() before
+      processing of next command.
+    */
+    if (wsrep_auto_increment_control)
+    {
+      thd->variables.auto_increment_offset = 1;
+      thd->variables.auto_increment_increment = 1;
+    }
+
+    wsrep::key_array keys;
+    wsrep_append_fk_parent_table(thd, first_table, &keys);
+
+    WSREP_TO_ISOLATION_BEGIN_ALTER(lex->name.str ? select_lex->db.str
+                                   : first_table->db.str,
+                                   lex->name.str ? lex->name.str
+                                   : first_table->table_name.str,
+                                   first_table, &alter_info, &keys,
+                                   used_engine ? &create_info : nullptr)
+    {
+      WSREP_WARN("ALTER TABLE isolation failure");
+      DBUG_RETURN(TRUE);
+    }
+
+    DEBUG_SYNC(thd, "wsrep_alter_table_after_toi");
+  }
+#endif
+
+  if (lex->name.str && !test_all_bits(priv, INSERT_ACL | CREATE_ACL))
+  {
+    // Rename of table
+    TABLE_LIST tmp_table;
+    tmp_table.init_one_table(&select_lex->db, &lex->name, 0, TL_IGNORE);
+    tmp_table.grant.privilege= priv;
+    if (check_grant(thd, INSERT_ACL | CREATE_ACL, &tmp_table, FALSE,
+                    UINT_MAX, FALSE))
+      DBUG_RETURN(TRUE);                  /* purecov: inspected */
+  }
+
+  /* Don't yet allow changing of symlinks with ALTER TABLE */
+  if (create_info.data_file_name)
+    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                        WARN_OPTION_IGNORED, ER_THD(thd, WARN_OPTION_IGNORED),
+                        "DATA DIRECTORY");
+  if (create_info.index_file_name)
+    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                        WARN_OPTION_IGNORED, ER_THD(thd, WARN_OPTION_IGNORED),
+                        "INDEX DIRECTORY");
+  create_info.data_file_name= create_info.index_file_name= NULL;
+
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+  thd->work_part_info= 0;
+#endif
+
+  Recreate_info recreate_info;
+  result= mysql_alter_table(thd, &select_lex->db, &lex->name,
+                            &create_info,
+                            first_table,
+                            &recreate_info,
+                            &alter_info,
+                            select_lex->order_list.elements,
+                            select_lex->order_list.first,
+                            lex->ignore, lex->if_exists());
+
+  DBUG_RETURN(result);
+}
+
+bool Sql_cmd_discard_import_tablespace::execute(THD *thd)
+{
+  /* first SELECT_LEX (have special meaning for many of non-SELECTcommands) */
+  SELECT_LEX *select_lex= thd->lex->first_select_lex();
+  /* first table of first SELECT_LEX */
+  TABLE_LIST *table_list= (TABLE_LIST*) select_lex->table_list.first;
+
+  if (check_access(thd, ALTER_ACL, table_list->db.str,
+                   &table_list->grant.privilege,
+                   &table_list->grant.m_internal,
+                   0, 0))
+    return true;
+
+  if (check_grant(thd, ALTER_ACL, table_list, false, UINT_MAX, false))
+    return true;
+
+  /*
+    Check if we attempt to alter mysql.slow_log or
+    mysql.general_log table and return an error if
+    it is the case.
+    TODO: this design is obsolete and will be removed.
+  */
+  if (check_if_log_table(table_list, TRUE, "ALTER"))
+    return true;
+
+  return
+    mysql_discard_or_import_tablespace(thd, table_list,
+                                       m_tablespace_op == DISCARD_TABLESPACE);
+}
diff --git a/sql/sql_alter.h b/sql/sql_alter.h
new file mode 100644
index 00000000..1d1ad5b2
--- /dev/null
+++ b/sql/sql_alter.h
@@ -0,0 +1,538 @@
+/* Copyright (c) 2010, 2014, Oracle and/or its affiliates.
+   Copyright (c) 2013, 2021, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#ifndef SQL_ALTER_TABLE_H
+#define SQL_ALTER_TABLE_H
+
+class Alter_drop;
+class Alter_column;
+class Alter_rename_key;
+class Alter_index_ignorability;
+class Key;
+
+/**
+  Data describing the table being created by CREATE TABLE or
+  altered by ALTER TABLE.
+*/
+
+class Alter_info
+{
+public:
+
+  enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE };
+
+  bool vers_prohibited(THD *thd) const;
+
+  /**
+     The different values of the ALGORITHM clause.
+     Describes which algorithm to use when altering the table.
+  */
+  enum enum_alter_table_algorithm
+  {
+/*
+  Use thd->variables.alter_algorithm for alter method. If this is also
+  default then use the fastest possible ALTER TABLE method
+  (INSTANT, NOCOPY, INPLACE, COPY)
+*/
+    ALTER_TABLE_ALGORITHM_DEFAULT,
+
+    // Copy if supported, error otherwise.
+    ALTER_TABLE_ALGORITHM_COPY,
+
+    // In-place if supported, error otherwise.
+    ALTER_TABLE_ALGORITHM_INPLACE,
+
+    // No Copy will refuse any operation which does rebuild.
+    ALTER_TABLE_ALGORITHM_NOCOPY,
+
+    // Instant should allow any operation that changes metadata only.
+    ALTER_TABLE_ALGORITHM_INSTANT,
+
+    // When there is no specification of algorithm during alter table.
+    ALTER_TABLE_ALGORITHM_NONE
+  };
+
+
+  /**
+     The different values of the LOCK clause.
+     Describes the level of concurrency during ALTER TABLE.
+  */
+  enum enum_alter_table_lock
+  {
+    // Maximum supported level of concurency for the given operation.
+    ALTER_TABLE_LOCK_DEFAULT,
+
+    // Allow concurrent reads & writes. If not supported, give error.
+    ALTER_TABLE_LOCK_NONE,
+
+    // Allow concurrent reads only. If not supported, give error.
+    ALTER_TABLE_LOCK_SHARED,
+
+    // Block reads and writes.
+    ALTER_TABLE_LOCK_EXCLUSIVE
+  };
+
+  Lex_table_name db, table_name;
+
+  // Columns and keys to be dropped.
+  List              drop_list;
+  // Columns for ALTER_CHANGE_COLUMN_DEFAULT.
+  List            alter_list;
+  // List of keys, used by both CREATE and ALTER TABLE.
+  List                     key_list;
+  // List of keys to be renamed.
+  List        alter_rename_key_list;
+  // List of columns, used by both CREATE and ALTER TABLE.
+  List            create_list;
+  // Indexes whose ignorability needs to be changed.
+  List  alter_index_ignorability_list;
+  List     check_constraint_list;
+  // Type of ALTER TABLE operation.
+  alter_table_operations        flags;
+  ulong                         partition_flags;
+  // Enable or disable keys.
+  enum_enable_or_disable        keys_onoff;
+  // Used only in add_stat_drop_index()
+  TABLE                         *original_table;
+  // List of partitions.
+  List              partition_names;
+  // Number of partitions.
+  uint                          num_parts;
+
+  /* List of fields that we should delete statistics from */
+  List drop_stat_fields;
+
+  struct DROP_INDEX_STAT_PARAMS
+  {
+    KEY *key;
+    bool ext_prefixes_only;
+  };
+
+  struct RENAME_COLUMN_STAT_PARAMS
+  {
+    Field *field;
+    LEX_CSTRING *name;
+    uint duplicate_counter;                       // For temporary names
+  };
+  struct RENAME_INDEX_STAT_PARAMS
+  {
+    const KEY *key;
+    const LEX_CSTRING *name;
+    uint duplicate_counter;                       // For temporary names
+    uint usage_count;                             // How many rename entries
+  };
+
+  /* List of index that we should delete statistics from */
+  List drop_stat_indexes;
+
+  List rename_stat_fields;
+
+  List rename_stat_indexes;
+
+  bool add_stat_drop_index(KEY *key, bool ext_prefixes_only,
+                           MEM_ROOT *mem_root)
+  {
+    DROP_INDEX_STAT_PARAMS *param;
+    if (!(param= (DROP_INDEX_STAT_PARAMS*)
+          alloc_root(mem_root, sizeof(*param))))
+      return true;
+    param->key=  key;
+    param->ext_prefixes_only= ext_prefixes_only;
+    return drop_stat_indexes.push_back(param, mem_root);
+  }
+
+  bool add_stat_drop_index(THD *thd, const LEX_CSTRING *key_name);
+
+  bool add_stat_rename_index(const KEY *key, const LEX_CSTRING *name,
+                             MEM_ROOT *mem_root)
+  {
+    RENAME_INDEX_STAT_PARAMS *param;
+    if (!(param= (RENAME_INDEX_STAT_PARAMS*)
+          alloc_root(mem_root, sizeof(*param))))
+      return true;
+    param->key=  key;
+    param->name= name;
+    param->usage_count= 0;
+    return rename_stat_indexes.push_back(param, mem_root);
+  }
+
+  bool add_stat_rename_field(Field *field, LEX_CSTRING *name,
+                             MEM_ROOT *mem_root)
+  {
+    RENAME_COLUMN_STAT_PARAMS *param;
+    if (!(param= (RENAME_COLUMN_STAT_PARAMS*)
+          alloc_root(mem_root, sizeof(*param))))
+      return true;
+    param->field= field;
+    param->name=  name;
+    param->duplicate_counter= 0;
+    return rename_stat_fields.push_back(param, mem_root);
+  }
+
+  bool collect_renamed_fields(THD *thd);
+
+  /* Delete/update statistics in EITS tables */
+  void apply_statistics_deletes_renames(THD *thd, TABLE *table);
+
+private:
+  // Type of ALTER TABLE algorithm.
+  enum_alter_table_algorithm    requested_algorithm;
+
+public:
+  // Type of ALTER TABLE lock.
+  enum_alter_table_lock         requested_lock;
+
+
+  Alter_info() :
+  flags(0), partition_flags(0),
+    keys_onoff(LEAVE_AS_IS),
+    original_table(0),
+    num_parts(0),
+    requested_algorithm(ALTER_TABLE_ALGORITHM_NONE),
+    requested_lock(ALTER_TABLE_LOCK_DEFAULT)
+  {}
+
+  void reset()
+  {
+    drop_list.empty();
+    alter_list.empty();
+    key_list.empty();
+    alter_rename_key_list.empty();
+    create_list.empty();
+    alter_index_ignorability_list.empty();
+    check_constraint_list.empty();
+    drop_stat_fields.empty();
+    drop_stat_indexes.empty();
+    rename_stat_fields.empty();
+    rename_stat_indexes.empty();
+    flags= 0;
+    partition_flags= 0;
+    keys_onoff= LEAVE_AS_IS;
+    num_parts= 0;
+    partition_names.empty();
+    requested_algorithm= ALTER_TABLE_ALGORITHM_NONE;
+    requested_lock= ALTER_TABLE_LOCK_DEFAULT;
+  }
+
+
+  /**
+    Construct a copy of this object to be used for mysql_alter_table
+    and mysql_create_table.
+
+    Historically, these two functions modify their Alter_info
+    arguments. This behaviour breaks re-execution of prepared
+    statements and stored procedures and is compensated by always
+    supplying a copy of Alter_info to these functions.
+
+    @param  rhs       Alter_info to make copy of
+    @param  mem_root  Mem_root for new Alter_info
+
+    @note You need to use check the error in THD for out
+    of memory condition after calling this function.
+  */
+  Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root);
+
+
+  /**
+     Parses the given string and sets requested_algorithm
+     if the string value matches a supported value.
+     Supported values: INPLACE, COPY, DEFAULT
+
+     @param  str    String containing the supplied value
+     @retval false  Supported value found, state updated
+     @retval true   Not supported value, no changes made
+  */
+  bool set_requested_algorithm(const LEX_CSTRING *str);
+
+
+  /**
+     Parses the given string and sets requested_lock
+     if the string value matches a supported value.
+     Supported values: NONE, SHARED, EXCLUSIVE, DEFAULT
+
+     @param  str    String containing the supplied value
+     @retval false  Supported value found, state updated
+     @retval true   Not supported value, no changes made
+  */
+
+  bool set_requested_lock(const LEX_CSTRING *str);
+
+  /**
+    Set the requested algorithm to the given algorithm value
+    @param algo_value	algorithm to be set
+   */
+  void set_requested_algorithm(enum_alter_table_algorithm algo_value);
+
+  /**
+     Returns the algorithm value in the format "algorithm=value"
+  */
+  const char* algorithm_clause(THD *thd) const;
+
+  /**
+     Returns the lock value in the format "lock=value"
+  */
+  const char* lock() const;
+
+  /**
+     Check whether the given result can be supported
+     with the specified user alter algorithm.
+
+     @param  thd            Thread handle
+     @param  ha_alter_info  Structure describing changes to be done
+                            by ALTER TABLE and holding data during
+                            in-place alter
+     @retval false  Supported operation
+     @retval true   Not supported value
+  */
+  bool supports_algorithm(THD *thd,
+                          const Alter_inplace_info *ha_alter_info);
+
+  /**
+     Check whether the given result can be supported
+     with the specified user lock type.
+
+     @param  ha_alter_info  Structure describing changes to be done
+                            by ALTER TABLE and holding data during
+                            in-place alter
+     @retval false  Supported lock type
+     @retval true   Not supported value
+  */
+  bool supports_lock(THD *thd, const Alter_inplace_info *ha_alter_info);
+
+  /**
+    Return user requested algorithm. If user does not specify
+    algorithm then return alter_algorithm variable value.
+   */
+  enum_alter_table_algorithm algorithm(const THD *thd) const;
+
+  uint check_vcol_field(Item_field *f) const;
+
+private:
+  Alter_info &operator=(const Alter_info &rhs); // not implemented
+  Alter_info(const Alter_info &rhs);            // not implemented
+};
+
+
+/** Runtime context for ALTER TABLE. */
+class Alter_table_ctx
+{
+public:
+  Alter_table_ctx();
+
+  Alter_table_ctx(THD *thd, TABLE_LIST *table_list, uint tables_opened_arg,
+                  const LEX_CSTRING *new_db_arg, const LEX_CSTRING *new_name_arg);
+
+  /**
+     @return true if the table is moved to another database or a new table
+     created by ALTER_PARTITION_CONVERT_OUT, false otherwise.
+  */
+  bool is_database_changed() const
+  { return (new_db.str != db.str); };
+
+  /**
+     @return true if the table is renamed or a new table created by
+     ALTER_PARTITION_CONVERT_OUT, false otherwise.
+  */
+  bool is_table_renamed() const
+  { return (is_database_changed() || new_name.str != table_name.str); };
+
+  /**
+     @return filename (including .frm) for the new table.
+  */
+  const char *get_new_filename() const
+  {
+    DBUG_ASSERT(!tmp_table);
+    return new_filename;
+  }
+
+  /**
+     @return path to the original table.
+  */
+  const char *get_path() const
+  {
+    DBUG_ASSERT(!tmp_table);
+    return path;
+  }
+
+  /**
+     @return path to the new table.
+  */
+  const char *get_new_path() const
+  {
+    DBUG_ASSERT(!tmp_table);
+    return new_path;
+  }
+
+  /**
+     @return path to the temporary table created during ALTER TABLE.
+  */
+  const char *get_tmp_path() const
+  { return tmp_path; }
+
+  const LEX_CSTRING get_tmp_cstring_path() const
+  {
+    LEX_CSTRING tmp= { tmp_path, strlen(tmp_path) };
+    return tmp;
+  };
+
+  /**
+    Mark ALTER TABLE as needing to produce foreign key error if
+    it deletes a row from the table being changed.
+  */
+  void set_fk_error_if_delete_row(FOREIGN_KEY_INFO *fk)
+  {
+    fk_error_if_delete_row= true;
+    fk_error_id= fk->foreign_id->str;
+    fk_error_table= fk->foreign_table->str;
+  }
+
+  void report_implicit_default_value_error(THD *thd, const TABLE_SHARE *) const;
+public:
+  Create_field *implicit_default_value_error_field= nullptr;
+  bool         error_if_not_empty= false;
+  uint         tables_opened= 0;
+  LEX_CSTRING  db;
+  LEX_CSTRING  table_name;
+  LEX_CSTRING  storage_engine_name;
+  LEX_CSTRING  alias;
+  LEX_CSTRING  new_db;
+  LEX_CSTRING  new_name;
+  LEX_CSTRING  new_alias;
+  LEX_CSTRING  tmp_name;
+  LEX_CSTRING  tmp_storage_engine_name;
+  LEX_CUSTRING tmp_id, id;
+  char         tmp_buff[80];
+  uchar        id_buff[MY_UUID_SIZE];
+  char         storage_engine_buff[NAME_LEN], tmp_storage_engine_buff[NAME_LEN];
+  bool         storage_engine_partitioned;
+  bool         tmp_storage_engine_name_partitioned;
+
+  /**
+    Indicates that if a row is deleted during copying of data from old version
+    of table to the new version ER_FK_CANNOT_DELETE_PARENT error should be
+    emitted.
+  */
+  bool fk_error_if_delete_row= false;
+  /** Name of foreign key for the above error. */
+  const char *fk_error_id= nullptr;
+  /** Name of table for the above error. */
+  const char *fk_error_table= nullptr;
+  bool modified_primary_key= false;
+  /** Indicates that we are altering temporary table */
+  bool tmp_table= false;
+
+private:
+  char new_filename[FN_REFLEN + 1];
+  char new_alias_buff[NAME_LEN + 1];
+  char tmp_name_buff[NAME_LEN + 1];
+  char path[FN_REFLEN + 1];
+  char new_path[FN_REFLEN + 1];
+  char tmp_path[FN_REFLEN + 1];
+
+  Alter_table_ctx &operator=(const Alter_table_ctx &rhs); // not implemented
+  Alter_table_ctx(const Alter_table_ctx &rhs);            // not implemented
+};
+
+
+/**
+  Sql_cmd_common_alter_table represents the common properties of the ALTER TABLE
+  statements.
+  @todo move Alter_info and other ALTER generic structures from Lex here.
+*/
+class Sql_cmd_common_alter_table : public Sql_cmd
+{
+protected:
+  /**
+    Constructor.
+  */
+  Sql_cmd_common_alter_table() = default;
+
+  virtual ~Sql_cmd_common_alter_table() = default;
+
+  virtual enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_ALTER_TABLE;
+  }
+};
+
+/**
+  Sql_cmd_alter_table represents the generic ALTER TABLE statement.
+  @todo move Alter_info and other ALTER specific structures from Lex here.
+*/
+class Sql_cmd_alter_table : public Sql_cmd_common_alter_table,
+                            public Storage_engine_name
+{
+public:
+  /**
+    Constructor, used to represent a ALTER TABLE statement.
+  */
+  Sql_cmd_alter_table() = default;
+
+  ~Sql_cmd_alter_table() = default;
+
+  Storage_engine_name *option_storage_engine_name() { return this; }
+
+  bool execute(THD *thd);
+};
+
+
+/**
+  Sql_cmd_alter_sequence represents the ALTER SEQUENCE statement.
+*/
+class Sql_cmd_alter_sequence : public Sql_cmd,
+                               public DDL_options
+{
+public:
+  /**
+    Constructor, used to represent a ALTER TABLE statement.
+  */
+  Sql_cmd_alter_sequence(const DDL_options &options)
+   :DDL_options(options)
+  {}
+
+  ~Sql_cmd_alter_sequence() = default;
+
+  enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_ALTER_SEQUENCE;
+  }
+  bool execute(THD *thd);
+};
+
+
+/**
+  Sql_cmd_alter_table_tablespace represents ALTER TABLE
+  IMPORT/DISCARD TABLESPACE statements.
+*/
+class Sql_cmd_discard_import_tablespace : public Sql_cmd_common_alter_table
+{
+public:
+  enum enum_tablespace_op_type
+  {
+    DISCARD_TABLESPACE, IMPORT_TABLESPACE
+  };
+
+  Sql_cmd_discard_import_tablespace(enum_tablespace_op_type tablespace_op_arg)
+    : m_tablespace_op(tablespace_op_arg)
+  {}
+
+  bool execute(THD *thd);
+
+private:
+  const enum_tablespace_op_type m_tablespace_op;
+};
+
+#endif
diff --git a/sql/sql_analyse.cc b/sql/sql_analyse.cc
new file mode 100644
index 00000000..4c853689
--- /dev/null
+++ b/sql/sql_analyse.cc
@@ -0,0 +1,1240 @@
+/*
+   Copyright (c) 2000, 2013, Oracle and/or its affiliates.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
+
+
+/* Analyse database */
+
+/* TODO: - Check if any character fields can be of any date type
+**	   (date, datetime, year, time, timestamp, newdate)
+**	 - Check if any number field should be a timestamp
+**	 - type set is out of optimization yet
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#define MYSQL_LEX 1
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "procedure.h"
+#include "sql_analyse.h"
+#include 
+
+#define MAX_TREEMEM	  8192
+#define MAX_TREE_ELEMENTS 256
+
+int sortcmp2(void* cmp_arg __attribute__((unused)),
+	     const String *a,const String *b)
+{
+  return sortcmp(a,b,a->charset());
+}
+
+int compare_double2(void* cmp_arg __attribute__((unused)),
+		    const double *s, const double *t)
+{
+  return compare_double(s,t);
+}
+
+int compare_longlong2(void* cmp_arg __attribute__((unused)),
+		      const longlong *s, const longlong *t)
+{
+  return compare_longlong(s,t);
+}
+
+int compare_ulonglong2(void* cmp_arg __attribute__((unused)),
+		       const ulonglong *s, const ulonglong *t)
+{
+  return compare_ulonglong(s,t);
+}
+
+int compare_decimal2(int* len, const char *s, const char *t)
+{
+  return memcmp(s, t, *len);
+}
+
+
+static bool
+prepare_param(THD *thd, Item **item, const char *proc_name, uint pos)
+{
+  if ((*item)->fix_fields_if_needed(thd, item))
+  {
+    DBUG_PRINT("info", ("fix_fields() for the parameter %u failed", pos));
+    return true;
+  }
+  if ((*item)->type_handler()->result_type() != INT_RESULT ||
+      !(*item)->basic_const_item() ||
+      (*item)->val_real() < 0)
+  {
+    my_error(ER_WRONG_PARAMETERS_TO_PROCEDURE, MYF(0), proc_name);
+    return true;
+  }
+  return false;
+}
+
+
+Procedure *
+proc_analyse_init(THD *thd, ORDER *param, select_result *result,
+		  List &field_list)
+{
+  const char *proc_name = (*param->item)->name.str;
+  analyse *pc = new analyse(result);
+  field_info **f_info;
+  DBUG_ENTER("proc_analyse_init");
+
+  if (!pc)
+    DBUG_RETURN(0);
+
+  if (!(param = param->next))
+  {
+    pc->max_tree_elements = MAX_TREE_ELEMENTS;
+    pc->max_treemem = MAX_TREEMEM;
+  }
+  else if (param->next)
+  {
+    // first parameter
+    if (prepare_param(thd, param->item, proc_name, 0))
+      goto err;
+    pc->max_tree_elements = (uint) (*param->item)->val_int();
+    param = param->next;
+    if (param->next)  // no third parameter possible
+    {
+      my_error(ER_WRONG_PARAMCOUNT_TO_PROCEDURE, MYF(0), proc_name);
+      goto err;
+    }
+    // second parameter
+    if (prepare_param(thd, param->item, proc_name, 1))
+      goto err;
+    pc->max_treemem = (uint) (*param->item)->val_int();
+  }
+  else if (prepare_param(thd, param->item, proc_name, 0))
+    goto err;
+  // if only one parameter was given, it will be the value of max_tree_elements
+  else
+  {
+    pc->max_tree_elements = (uint) (*param->item)->val_int();
+    pc->max_treemem = MAX_TREEMEM;
+  }
+
+  if (!(pc->f_info=
+        (field_info**) thd->alloc(sizeof(field_info*) * field_list.elements)))
+    goto err;
+  pc->f_end = pc->f_info + field_list.elements;
+  pc->fields = field_list;
+
+  {
+    List_iterator_fast it(pc->fields);
+    f_info = pc->f_info;
+
+    Item *item;
+    while ((item = it++))
+    {
+      field_info *new_field;
+      switch (item->result_type()) {
+      case INT_RESULT:
+        // Check if fieldtype is ulonglong
+        if (item->type() == Item::FIELD_ITEM &&
+            ((Item_field*) item)->field->type() == MYSQL_TYPE_LONGLONG &&
+            ((Field_longlong*) ((Item_field*) item)->field)->unsigned_flag)
+          new_field= new field_ulonglong(item, pc);
+        else
+          new_field= new field_longlong(item, pc);
+        break;
+      case REAL_RESULT:
+        new_field= new field_real(item, pc);
+        break;
+      case DECIMAL_RESULT:
+        new_field= new field_decimal(item, pc);
+        break;
+      case STRING_RESULT:
+        new_field= new field_str(item, pc);
+        break;
+      default:
+        goto err;
+      }
+      *f_info++= new_field;
+    }
+  }
+  DBUG_RETURN(pc);
+err:
+  delete pc;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Return 1 if number, else return 0
+  store info about found number in info
+  NOTE:It is expected, that elements of 'info' are all zero!
+*/
+
+bool test_if_number(NUM_INFO *info, const char *str, uint str_len)
+{
+  const char *begin, *end= str + str_len;
+  DBUG_ENTER("test_if_number");
+
+  /*
+    MySQL removes any endspaces of a string, so we must take care only of
+    spaces in front of a string
+  */
+  for (; str != end && my_isspace(system_charset_info, *str); str++) ;
+  if (str == end)
+    DBUG_RETURN(0);
+
+  if (*str == '-')
+  {
+    info->negative = 1;
+    if (++str == end || *str == '0')    // converting -0 to a number
+      DBUG_RETURN(0);                   // might lose information
+  }
+  else
+    info->negative = 0;
+  begin = str;
+  for (; str != end && my_isdigit(system_charset_info,*str); str++)
+  {
+    if (!info->integers && *str == '0' && (str + 1) != end &&
+	my_isdigit(system_charset_info,*(str + 1)))
+      info->zerofill = 1;	     // could be a postnumber for example
+    info->integers++;
+  }
+  if (str == end && info->integers)
+  {
+    char *endpos= (char*) end;
+    int error;
+    info->ullval= (ulonglong) my_strtoll10(begin, &endpos, &error);
+    if (info->integers == 1)
+      DBUG_RETURN(0);                   // single number can't be zerofill
+    info->maybe_zerofill = 1;
+    DBUG_RETURN(1);                     // a zerofill number, or an integer
+  }
+  if (*str == '.' || *str == 'e' || *str == 'E')
+  {
+    if (info->zerofill)                 // can't be zerofill anymore
+      DBUG_RETURN(0);
+    if ((str + 1) == end)               // number was something like '123[.eE]'
+    {
+      char *endpos= (char*) str;
+      int error;
+      info->ullval= (ulonglong) my_strtoll10(begin, &endpos, &error);
+      DBUG_RETURN(1);
+    }
+    if (*str == 'e' || *str == 'E')     // number may be something like '1e+50'
+    {
+      str++;
+      if (*str != '-' && *str != '+')
+	DBUG_RETURN(0);
+      for (str++; str != end && my_isdigit(system_charset_info,*str); str++) ;
+      if (str == end)
+      {
+	info->is_float = 1;             // we can't use variable decimals here
+	DBUG_RETURN(1);
+      }
+      DBUG_RETURN(0);
+    }
+    for (str++; *(end - 1) == '0'; end--)  // jump over zeros at the end
+      ;
+    if (str == end)		     // number was something like '123.000'
+    {
+      char *endpos= (char*) str;
+      int error;
+      info->ullval= (ulonglong) my_strtoll10(begin, &endpos, &error);
+      DBUG_RETURN(1);
+    }
+    for (; str != end && my_isdigit(system_charset_info,*str); str++)
+      info->decimals++;
+    if (str == end)
+    {
+      info->dval = my_atof(begin);
+      DBUG_RETURN(1);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Stores the biggest and the smallest value from current 'info'
+  to ev_num_info
+  If info contains an ulonglong number, which is bigger than
+  biggest positive number able to be stored in a longlong variable
+  and is marked as negative, function will return 0, else 1.
+*/
+
+bool get_ev_num_info(EV_NUM_INFO *ev_info, NUM_INFO *info, const char *num)
+{
+  if (info->negative)
+  {
+    if (((longlong) info->ullval) < 0)
+      return 0; // Impossible to store as a negative number
+    ev_info->llval =  -(longlong) MY_MAX((ulonglong) -ev_info->llval, 
+				      info->ullval);
+    ev_info->min_dval = (double) -MY_MAX(-ev_info->min_dval, info->dval);
+  }
+  else		// ulonglong is as big as bigint in MySQL
+  {
+    if ((check_ulonglong(num, info->integers) == DECIMAL_NUM))
+      return 0;
+    ev_info->ullval = (ulonglong) MY_MAX(ev_info->ullval, info->ullval);
+    ev_info->max_dval =  (double) MY_MAX(ev_info->max_dval, info->dval);
+  }
+  return 1;
+} // get_ev_num_info
+
+
+int free_string(void* str, TREE_FREE, void*)
+{
+  ((String*)str)->free();
+  return 0;
+}
+
+
+void field_str::add()
+{
+  char buff[MAX_FIELD_WIDTH], *ptr;
+  String s(buff, sizeof(buff),&my_charset_bin), *res;
+  ulong length;
+
+  if (!(res = item->val_str(&s)))
+  {
+    nulls++;
+    return;
+  }
+
+  if (!(length = res->length()))
+    empty++;
+  else
+  {
+    ptr = (char*) res->ptr();
+    if (*(ptr + (length - 1)) == ' ')
+      must_be_blob = 1;
+  }
+
+  if (can_be_still_num)
+  {
+    bzero((char*) &num_info, sizeof(num_info));
+    if (!test_if_number(&num_info, res->ptr(), (uint) length))
+      can_be_still_num = 0;
+    if (!found)
+    {
+      bzero((char*) &ev_num_info, sizeof(ev_num_info));
+      was_zero_fill = num_info.zerofill;
+    }
+    else if (num_info.zerofill != was_zero_fill && !was_maybe_zerofill)
+      can_be_still_num = 0;  // one more check needed, when length is counted
+    if (can_be_still_num)
+      can_be_still_num = get_ev_num_info(&ev_num_info, &num_info, res->ptr());
+    was_maybe_zerofill = num_info.maybe_zerofill;
+  }
+
+  /* Update min and max arguments */
+  if (!found)
+  {
+    found = 1;
+    min_arg.copy(*res);
+    max_arg.copy(*res);
+    min_length = max_length = length; sum=length;
+  }
+  else if (length)
+  {
+    sum += length;
+    if (length < min_length)
+      min_length = length;
+    if (length > max_length)
+      max_length = length;
+
+    if (sortcmp(res, &min_arg,item->collation.collation) < 0)
+      min_arg.copy(*res);
+    if (sortcmp(res, &max_arg,item->collation.collation) > 0)
+      max_arg.copy(*res);
+  }
+
+  if (room_in_tree)
+  {
+    if (res != &s)
+      s.copy(*res);
+    if (!tree_search(&tree, (void*) &s, tree.custom_arg)) // If not in tree
+    {
+      s.copy();        // slow, when SAFE_MALLOC is in use
+      if (!tree_insert(&tree, (void*) &s, 0, tree.custom_arg))
+      {
+	room_in_tree = 0;      // Remove tree, out of RAM ?
+	delete_tree(&tree, 0);
+      }
+      else
+      {
+	bzero((char*) &s, sizeof(s));  // Let tree handle free of this
+	if ((treemem += length) > pc->max_treemem)
+	{
+	  room_in_tree = 0;	 // Remove tree, too big tree
+	  delete_tree(&tree, 0);
+	}
+      }
+    }
+  }
+
+  if ((num_info.zerofill && (max_length != min_length)) ||
+      (was_zero_fill && (max_length != min_length)))
+    can_be_still_num = 0; // zerofilled numbers must be of same length
+} // field_str::add
+
+
+void field_real::add()
+{
+  char buff[MAX_FIELD_WIDTH], *ptr, *end;
+  double num= item->val_real();
+  uint length, zero_count, decs;
+  TREE_ELEMENT *element;
+
+  if (item->null_value)
+  {
+    nulls++;
+    return;
+  }
+  if (num == 0.0)
+    empty++;
+
+  if ((decs = decimals()) >= FLOATING_POINT_DECIMALS)
+  {
+    length= snprintf(buff, sizeof(buff), "%g", num);
+    if (rint(num) != num)
+      max_notzero_dec_len = 1;
+  }
+  else
+  {
+    buff[sizeof(buff)-1]=0;			// Safety
+    snprintf(buff, sizeof(buff)-1, "%-.*f", (int) decs, num);
+    length = (uint) strlen(buff);
+
+    // We never need to check further than this
+    end = buff + length - 1 - decs + max_notzero_dec_len;
+
+    zero_count = 0;
+    for (ptr = buff + length - 1; ptr > end && *ptr == '0'; ptr--)
+      zero_count++;
+
+    if ((decs - zero_count > max_notzero_dec_len))
+      max_notzero_dec_len = decs - zero_count;
+  }
+
+  if (room_in_tree)
+  {
+    if (!(element = tree_insert(&tree, (void*) &num, 0, tree.custom_arg)))
+    {
+      room_in_tree = 0;    // Remove tree, out of RAM ?
+      delete_tree(&tree, 0);
+    }
+    /*
+      if element->count == 1, this element can be found only once from tree
+      if element->count == 2, or more, this element is already in tree
+    */
+    else if (element->count == 1 && (tree_elements++) >= pc->max_tree_elements)
+    {
+      room_in_tree = 0;  // Remove tree, too many elements
+      delete_tree(&tree, 0);
+    }
+  }
+
+  if (!found)
+  {
+    found = 1;
+    min_arg = max_arg = sum = num;
+    sum_sqr = num * num;
+    min_length = max_length = length;
+  }
+  else if (num != 0.0)
+  {
+    sum += num;
+    sum_sqr += num * num;
+    if (length < min_length)
+      min_length = length;
+    if (length > max_length)
+      max_length = length;
+    if (compare_double(&num, &min_arg) < 0)
+      min_arg = num;
+    if (compare_double(&num, &max_arg) > 0)
+      max_arg = num;
+  }
+} // field_real::add
+
+
+void field_decimal::add()
+{
+  /*TODO - remove rounding stuff after decimal_div returns proper frac */
+  VDec vdec(item);
+  uint length;
+  TREE_ELEMENT *element;
+
+  if (vdec.is_null())
+  {
+    nulls++;
+    return;
+  }
+
+  my_decimal dec;
+  vdec.round_to(&dec, item->decimals, HALF_UP);
+
+  length= my_decimal_string_length(&dec);
+
+  if (decimal_is_zero(&dec))
+    empty++;
+
+  if (room_in_tree)
+  {
+    uchar buf[DECIMAL_MAX_FIELD_SIZE];
+    dec.to_binary(buf, item->max_length, item->decimals);
+    if (!(element = tree_insert(&tree, (void*)buf, 0, tree.custom_arg)))
+    {
+      room_in_tree = 0;    // Remove tree, out of RAM ?
+      delete_tree(&tree, 0);
+    }
+    /*
+      if element->count == 1, this element can be found only once from tree
+      if element->count == 2, or more, this element is already in tree
+    */
+    else if (element->count == 1 && (tree_elements++) >= pc->max_tree_elements)
+    {
+      room_in_tree = 0;  // Remove tree, too many elements
+      delete_tree(&tree, 0);
+    }
+  }
+
+  if (!found)
+  {
+    found = 1;
+    min_arg = max_arg = sum[0] = dec;
+    my_decimal_mul(E_DEC_FATAL_ERROR, sum_sqr, &dec, &dec);
+    cur_sum= 0;
+    min_length = max_length = length;
+  }
+  else if (!decimal_is_zero(&dec))
+  {
+    int next_cur_sum= cur_sum ^ 1;
+    my_decimal sqr_buf;
+
+    my_decimal_add(E_DEC_FATAL_ERROR, sum+next_cur_sum, sum+cur_sum, &dec);
+    my_decimal_mul(E_DEC_FATAL_ERROR, &sqr_buf, &dec, &dec);
+    my_decimal_add(E_DEC_FATAL_ERROR,
+                   sum_sqr+next_cur_sum, sum_sqr+cur_sum, &sqr_buf);
+    cur_sum= next_cur_sum;
+    if (length < min_length)
+      min_length = length;
+    if (length > max_length)
+      max_length = length;
+    if (dec.cmp(&min_arg) < 0)
+    {
+      min_arg= dec;
+    }
+    if (dec.cmp(&max_arg) > 0)
+    {
+      max_arg= dec;
+    }
+  }
+}
+
+
+void field_longlong::add()
+{
+  char buff[MAX_FIELD_WIDTH];
+  longlong num = item->val_int();
+  uint length = (uint) (longlong10_to_str(num, buff, -10) - buff);
+  TREE_ELEMENT *element;
+
+  if (item->null_value)
+  {
+    nulls++;
+    return;
+  }
+  if (num == 0)
+    empty++;
+
+  if (room_in_tree)
+  {
+    if (!(element = tree_insert(&tree, (void*) &num, 0, tree.custom_arg)))
+    {
+      room_in_tree = 0;    // Remove tree, out of RAM ?
+      delete_tree(&tree, 0);
+    }
+    /*
+      if element->count == 1, this element can be found only once from tree
+      if element->count == 2, or more, this element is already in tree
+    */
+    else if (element->count == 1 && (tree_elements++) >= pc->max_tree_elements)
+    {
+      room_in_tree = 0;  // Remove tree, too many elements
+      delete_tree(&tree, 0);
+    }
+  }
+
+  if (!found)
+  {
+    found = 1;
+    min_arg = max_arg = sum = num;
+    sum_sqr = num * num;
+    min_length = max_length = length;
+  }
+  else if (num != 0)
+  {
+    sum += num;
+    sum_sqr += num * num;
+    if (length < min_length)
+      min_length = length;
+    if (length > max_length)
+      max_length = length;
+    if (compare_longlong(&num, &min_arg) < 0)
+      min_arg = num;
+    if (compare_longlong(&num, &max_arg) > 0)
+      max_arg = num;
+  }
+} // field_longlong::add
+
+
+void field_ulonglong::add()
+{
+  char buff[MAX_FIELD_WIDTH];
+  longlong num = item->val_int();
+  uint length = (uint) (longlong10_to_str(num, buff, 10) - buff);
+  TREE_ELEMENT *element;
+
+  if (item->null_value)
+  {
+    nulls++;
+    return;
+  }
+  if (num == 0)
+    empty++;
+
+  if (room_in_tree)
+  {
+    if (!(element = tree_insert(&tree, (void*) &num, 0, tree.custom_arg)))
+    {
+      room_in_tree = 0;    // Remove tree, out of RAM ?
+      delete_tree(&tree, 0);
+    }
+    /*
+      if element->count == 1, this element can be found only once from tree
+      if element->count == 2, or more, this element is already in tree
+    */
+    else if (element->count == 1 && (tree_elements++) >= pc->max_tree_elements)
+    {
+      room_in_tree = 0;  // Remove tree, too many elements
+      delete_tree(&tree, 0);
+    }
+  }
+
+  if (!found)
+  {
+    found = 1;
+    min_arg = max_arg = sum = num;
+    sum_sqr = num * num;
+    min_length = max_length = length;
+  }
+  else if (num != 0)
+  {
+    sum += num;
+    sum_sqr += num * num;
+    if (length < min_length)
+      min_length = length;
+    if (length > max_length)
+      max_length = length;
+    if (compare_ulonglong((ulonglong*) &num, &min_arg) < 0)
+      min_arg = num;
+    if (compare_ulonglong((ulonglong*) &num, &max_arg) > 0)
+      max_arg = num;
+  }
+} // field_ulonglong::add
+
+
+int analyse::send_row(List & /* field_list */)
+{
+  field_info **f = f_info;
+
+  rows++;
+
+  for (;f != f_end; f++)
+  {
+    (*f)->add();
+  }
+  return 0;
+} // analyse::send_row
+
+
+int analyse::end_of_records()
+{
+  field_info **f = f_info;
+  char buff[MAX_FIELD_WIDTH];
+  String *res, s_min(buff, sizeof(buff),&my_charset_bin), 
+	 s_max(buff, sizeof(buff),&my_charset_bin),
+	 ans(buff, sizeof(buff),&my_charset_bin);
+  StringBuffer name;
+
+  for (; f != f_end; f++)
+  {
+    /*
+      We have to make a copy of full_name() as it stores it's value in str_value,
+      which is reset by save_str_in_field
+    */
+    LEX_CSTRING col_name= (*f)->item->full_name_cstring();
+    name.set_buffer_if_not_allocated(&my_charset_bin);
+    name.copy(col_name.str, col_name.length, &my_charset_bin);
+    func_items[0]->set((char*) name.ptr(), name.length(), &my_charset_bin);
+
+    if (!(*f)->found)
+    {
+      func_items[1]->null_value = 1;
+      func_items[2]->null_value = 1;
+    }
+    else
+    {
+      func_items[1]->null_value = 0;
+      res = (*f)->get_min_arg(&s_min);
+      func_items[1]->set(res->ptr(), res->length(), res->charset());
+      func_items[2]->null_value = 0;
+      res = (*f)->get_max_arg(&s_max);
+      func_items[2]->set(res->ptr(), res->length(), res->charset());
+    }
+    func_items[3]->set((longlong) (*f)->min_length);
+    func_items[4]->set((longlong) (*f)->max_length);
+    func_items[5]->set((longlong) (*f)->empty);
+    func_items[6]->set((longlong) (*f)->nulls);
+    res = (*f)->avg(&s_max, rows);
+    func_items[7]->set(res->ptr(), res->length(), res->charset());
+    func_items[8]->null_value = 0;
+    res = (*f)->std(&s_max, rows);
+    if (!res)
+      func_items[8]->null_value = 1;
+    else
+      func_items[8]->set(res->ptr(), res->length(), res->charset());
+    /*
+      count the dots, quotas, etc. in (ENUM("a","b","c"...))
+      If tree has been removed, don't suggest ENUM.
+      treemem is used to measure the size of tree for strings,
+      tree_elements is used to count the elements
+      max_treemem tells how long the string starting from ENUM("... and
+      ending to ..") shall at maximum be. If case is about numbers,
+      max_tree_elements will tell the length of the above, now
+      every number is considered as length 1
+    */
+    if (((*f)->treemem || (*f)->tree_elements) &&
+	(*f)->tree.elements_in_tree &&
+	(((*f)->treemem ? max_treemem : max_tree_elements) >
+	 (((*f)->treemem ? (*f)->treemem : (*f)->tree_elements) +
+	   ((*f)->tree.elements_in_tree * 3 - 1 + 6))))
+    {
+      char tmp[331]; //331, because one double prec. num. can be this long
+      String tmp_str(tmp, sizeof(tmp),&my_charset_bin);
+      TREE_INFO tree_info;
+
+      tree_info.str = &tmp_str;
+      tree_info.found = 0;
+      tree_info.item = (*f)->item;
+
+      tmp_str.set(STRING_WITH_LEN("ENUM("),&my_charset_bin);
+      tree_walk(&(*f)->tree, (*f)->collect_enum(), (char*) &tree_info,
+		left_root_right);
+      tmp_str.append(')');
+
+      if (!(*f)->nulls)
+	tmp_str.append(STRING_WITH_LEN(" NOT NULL"));
+      output_str_length = tmp_str.length();
+      func_items[9]->set(tmp_str.ptr(), tmp_str.length(), tmp_str.charset());
+      if (result->send_data(result_fields) > 0)
+	return -1;
+      continue;
+    }
+
+    ans.length(0);
+    if (!(*f)->treemem && !(*f)->tree_elements)
+      ans.append(STRING_WITH_LEN("CHAR(0)"));
+    else if ((*f)->item->type() == Item::FIELD_ITEM)
+    {
+      switch (((Item_field*) (*f)->item)->field->real_type())
+      {
+      case MYSQL_TYPE_TIMESTAMP:
+	ans.append(STRING_WITH_LEN("TIMESTAMP"));
+	break;
+      case MYSQL_TYPE_DATETIME:
+	ans.append(STRING_WITH_LEN("DATETIME"));
+	break;
+      case MYSQL_TYPE_DATE:
+      case MYSQL_TYPE_NEWDATE:
+	ans.append(STRING_WITH_LEN("DATE"));
+	break;
+      case MYSQL_TYPE_SET:
+	ans.append(STRING_WITH_LEN("SET"));
+	break;
+      case MYSQL_TYPE_YEAR:
+	ans.append(STRING_WITH_LEN("YEAR"));
+	break;
+      case MYSQL_TYPE_TIME:
+	ans.append(STRING_WITH_LEN("TIME"));
+	break;
+      case MYSQL_TYPE_DECIMAL:
+	ans.append(STRING_WITH_LEN("DECIMAL"));
+	// if item is FIELD_ITEM, it _must_be_ Field_num in this case
+	if (((Field_num*) ((Item_field*) (*f)->item)->field)->zerofill)
+	  ans.append(STRING_WITH_LEN(" ZEROFILL"));
+	break;
+      default:
+	(*f)->get_opt_type(&ans, rows);
+	break;
+      }
+    }
+    if (!(*f)->nulls)
+      ans.append(STRING_WITH_LEN(" NOT NULL"));
+    func_items[9]->set(ans.ptr(), ans.length(), ans.charset());
+    if (result->send_data(result_fields) > 0)
+      return -1;
+  }
+  return 0;
+} // analyse::end_of_records
+
+
+void field_str::get_opt_type(String *answer, ha_rows total_rows)
+{
+  char buff[MAX_FIELD_WIDTH];
+
+  if (can_be_still_num)
+  {
+    if (num_info.is_float)
+      snprintf(buff, sizeof(buff), "DOUBLE");	  // number was like 1e+50... TODO:
+    else if (num_info.decimals) // DOUBLE(%d,%d) sometime
+    {
+      if (num_info.dval > -FLT_MAX && num_info.dval < FLT_MAX)
+	snprintf(buff, sizeof(buff), "FLOAT(%d,%d)", (num_info.integers + num_info.decimals), num_info.decimals);
+      else
+	snprintf(buff, sizeof(buff), "DOUBLE(%d,%d)", (num_info.integers + num_info.decimals), num_info.decimals);
+    }
+    else if (ev_num_info.llval >= -128 &&
+	     ev_num_info.ullval <=
+	     (ulonglong) (ev_num_info.llval >= 0 ? 255 : 127))
+      snprintf(buff, sizeof(buff), "TINYINT(%d)", num_info.integers);
+    else if (ev_num_info.llval >= INT_MIN16 &&
+	     ev_num_info.ullval <= (ulonglong) (ev_num_info.llval >= 0 ?
+						UINT_MAX16 : INT_MAX16))
+      snprintf(buff, sizeof(buff), "SMALLINT(%d)", num_info.integers);
+    else if (ev_num_info.llval >= INT_MIN24 &&
+	     ev_num_info.ullval <= (ulonglong) (ev_num_info.llval >= 0 ?
+						UINT_MAX24 : INT_MAX24))
+      snprintf(buff, sizeof(buff), "MEDIUMINT(%d)", num_info.integers);
+    else if (ev_num_info.llval >= INT_MIN32 &&
+	     ev_num_info.ullval <= (ulonglong) (ev_num_info.llval >= 0 ?
+						UINT_MAX32 : INT_MAX32))
+      snprintf(buff, sizeof(buff), "INT(%d)", num_info.integers);
+    else
+      snprintf(buff, sizeof(buff), "BIGINT(%d)", num_info.integers);
+    answer->append(buff, (uint) strlen(buff));
+    if (ev_num_info.llval >= 0 && ev_num_info.min_dval >= 0)
+      answer->append(STRING_WITH_LEN(" UNSIGNED"));
+    if (num_info.zerofill)
+      answer->append(STRING_WITH_LEN(" ZEROFILL"));
+  }
+  else if (max_length < 256)
+  {
+    if (must_be_blob)
+    {
+      if (item->collation.collation == &my_charset_bin)
+	answer->append(STRING_WITH_LEN("TINYBLOB"));
+      else
+	answer->append(STRING_WITH_LEN("TINYTEXT"));
+    }
+    else if ((max_length * (total_rows - nulls)) < (sum + total_rows))
+    {
+      snprintf(buff, sizeof(buff), "CHAR(%d)", (int) max_length);
+      answer->append(buff, (uint) strlen(buff));
+    }
+    else
+    {
+      snprintf(buff, sizeof(buff), "VARCHAR(%d)", (int) max_length);
+      answer->append(buff, (uint) strlen(buff));
+    }
+  }
+  else if (max_length < (1L << 16))
+  {
+    if (item->collation.collation == &my_charset_bin)
+      answer->append(STRING_WITH_LEN("BLOB"));
+    else
+      answer->append(STRING_WITH_LEN("TEXT"));
+  }
+  else if (max_length < (1L << 24))
+  {
+    if (item->collation.collation == &my_charset_bin)
+      answer->append(STRING_WITH_LEN("MEDIUMBLOB"));
+    else
+      answer->append(STRING_WITH_LEN("MEDIUMTEXT"));
+  }
+  else
+  {
+    if (item->collation.collation == &my_charset_bin)
+      answer->append(STRING_WITH_LEN("LONGBLOB"));
+    else
+      answer->append(STRING_WITH_LEN("LONGTEXT"));
+  }
+} // field_str::get_opt_type
+
+
+void field_real::get_opt_type(String *answer,
+			      ha_rows total_rows __attribute__((unused)))
+{
+  char buff[MAX_FIELD_WIDTH];
+
+  if (!max_notzero_dec_len)
+  {
+    int len= (int) max_length - ((item->decimals >= FLOATING_POINT_DECIMALS) ?
+				 0 : (item->decimals + 1));
+
+    if (min_arg >= -128 && max_arg <= (min_arg >= 0 ? 255 : 127))
+      snprintf(buff, sizeof(buff), "TINYINT(%d)", len);
+    else if (min_arg >= INT_MIN16 && max_arg <= (min_arg >= 0 ?
+						 UINT_MAX16 : INT_MAX16))
+      snprintf(buff, sizeof(buff), "SMALLINT(%d)", len);
+    else if (min_arg >= INT_MIN24 && max_arg <= (min_arg >= 0 ?
+						 UINT_MAX24 : INT_MAX24))
+      snprintf(buff, sizeof(buff), "MEDIUMINT(%d)", len);
+    else if (min_arg >= INT_MIN32 && max_arg <= (min_arg >= 0 ?
+						 UINT_MAX32 : INT_MAX32))
+      snprintf(buff, sizeof(buff), "INT(%d)", len);
+    else
+      snprintf(buff, sizeof(buff), "BIGINT(%d)", len);
+    answer->append(buff, (uint) strlen(buff));
+    if (min_arg >= 0)
+      answer->append(STRING_WITH_LEN(" UNSIGNED"));
+  }
+  else if (item->decimals >= FLOATING_POINT_DECIMALS)
+  {
+    if (min_arg >= -FLT_MAX && max_arg <= FLT_MAX)
+      answer->append(STRING_WITH_LEN("FLOAT"));
+    else
+      answer->append(STRING_WITH_LEN("DOUBLE"));
+  }
+  else
+  {
+    if (min_arg >= -FLT_MAX && max_arg <= FLT_MAX)
+      snprintf(buff, sizeof(buff), "FLOAT(%d,%d)", (int) max_length - (item->decimals + 1) + max_notzero_dec_len,
+	      max_notzero_dec_len);
+    else
+      snprintf(buff, sizeof(buff), "DOUBLE(%d,%d)", (int) max_length - (item->decimals + 1) + max_notzero_dec_len,
+	      max_notzero_dec_len);
+    answer->append(buff, (uint) strlen(buff));
+  }
+  // if item is FIELD_ITEM, it _must_be_ Field_num in this class
+  if (item->type() == Item::FIELD_ITEM &&
+      // a single number shouldn't be zerofill
+      (max_length - (item->decimals + 1)) != 1 &&
+      ((Field_num*) ((Item_field*) item)->field)->zerofill)
+    answer->append(STRING_WITH_LEN(" ZEROFILL"));
+} // field_real::get_opt_type
+
+
+void field_longlong::get_opt_type(String *answer,
+				  ha_rows total_rows __attribute__((unused)))
+{
+  char buff[MAX_FIELD_WIDTH];
+
+  if (min_arg >= -128 && max_arg <= (min_arg >= 0 ? 255 : 127))
+    snprintf(buff, sizeof(buff), "TINYINT(%d)", (int) max_length);
+  else if (min_arg >= INT_MIN16 && max_arg <= (min_arg >= 0 ?
+					       UINT_MAX16 : INT_MAX16))
+    snprintf(buff, sizeof(buff), "SMALLINT(%d)", (int) max_length);
+  else if (min_arg >= INT_MIN24 && max_arg <= (min_arg >= 0 ?
+					       UINT_MAX24 : INT_MAX24))
+    snprintf(buff, sizeof(buff), "MEDIUMINT(%d)", (int) max_length);
+  else if (min_arg >= INT_MIN32 && max_arg <= (min_arg >= 0 ?
+					       UINT_MAX32 : INT_MAX32))
+    snprintf(buff, sizeof(buff), "INT(%d)", (int) max_length);
+  else
+    snprintf(buff, sizeof(buff), "BIGINT(%d)", (int) max_length);
+  answer->append(buff, (uint) strlen(buff));
+  if (min_arg >= 0)
+    answer->append(STRING_WITH_LEN(" UNSIGNED"));
+
+  // if item is FIELD_ITEM, it _must_be_ Field_num in this class
+  if ((item->type() == Item::FIELD_ITEM) &&
+      // a single number shouldn't be zerofill
+      max_length != 1 &&
+      ((Field_num*) ((Item_field*) item)->field)->zerofill)
+    answer->append(STRING_WITH_LEN(" ZEROFILL"));
+} // field_longlong::get_opt_type
+
+
+void field_ulonglong::get_opt_type(String *answer,
+				   ha_rows total_rows __attribute__((unused)))
+{
+  char buff[MAX_FIELD_WIDTH];
+
+  if (max_arg < 256)
+    snprintf(buff, sizeof(buff), "TINYINT(%d) UNSIGNED", (int) max_length);
+   else if (max_arg <= ((2 * INT_MAX16) + 1))
+     snprintf(buff, sizeof(buff), "SMALLINT(%d) UNSIGNED", (int) max_length);
+  else if (max_arg <= ((2 * INT_MAX24) + 1))
+    snprintf(buff, sizeof(buff), "MEDIUMINT(%d) UNSIGNED", (int) max_length);
+  else if (max_arg < (((ulonglong) 1) << 32))
+    snprintf(buff, sizeof(buff), "INT(%d) UNSIGNED", (int) max_length);
+  else
+    snprintf(buff, sizeof(buff), "BIGINT(%d) UNSIGNED", (int) max_length);
+  // if item is FIELD_ITEM, it _must_be_ Field_num in this class
+  answer->append(buff, (uint) strlen(buff));
+  if (item->type() == Item::FIELD_ITEM &&
+      // a single number shouldn't be zerofill
+      max_length != 1 &&
+      ((Field_num*) ((Item_field*) item)->field)->zerofill)
+    answer->append(STRING_WITH_LEN(" ZEROFILL"));
+} //field_ulonglong::get_opt_type
+
+
+void field_decimal::get_opt_type(String *answer,
+                                 ha_rows total_rows __attribute__((unused)))
+{
+  my_decimal zero;
+  char buff[MAX_FIELD_WIDTH];
+  uint length;
+
+  my_decimal_set_zero(&zero);
+  my_bool is_unsigned= (zero.cmp(&min_arg) >= 0);
+
+  length= snprintf(buff, sizeof(buff), "DECIMAL(%d, %d)",
+                  (int) (max_length - (item->decimals ? 1 : 0)),
+                  item->decimals);
+  if (is_unsigned)
+    length= (uint) (strmov(buff+length, " UNSIGNED")- buff);
+  answer->append(buff, length);
+}
+
+
+String *field_decimal::get_min_arg(String *str)
+{
+  min_arg.to_string_native(str, 0, 0, '0');
+  return str;
+}
+
+
+String *field_decimal::get_max_arg(String *str)
+{
+  max_arg.to_string_native(str, 0, 0, '0');
+  return str;
+}
+
+
+String *field_decimal::avg(String *s, ha_rows rows)
+{
+  if (!(rows - nulls))
+  {
+    s->set_real((double) 0.0, 1,my_thd_charset);
+    return s;
+  }
+  my_decimal num, avg_val, rounded_avg;
+  int prec_increment= current_thd->variables.div_precincrement;
+
+  int2my_decimal(E_DEC_FATAL_ERROR, rows - nulls, FALSE, &num);
+  my_decimal_div(E_DEC_FATAL_ERROR, &avg_val, sum+cur_sum, &num, prec_increment);
+  /* TODO remove this after decimal_div returns proper frac */
+  avg_val.round_to(&rounded_avg,
+                   MY_MIN(sum[cur_sum].frac + prec_increment, DECIMAL_MAX_SCALE),
+                   HALF_UP);
+  rounded_avg.to_string_native(s, 0, 0, '0');
+  return s;
+}
+
+
+String *field_decimal::std(String *s, ha_rows rows)
+{
+  if (!(rows - nulls))
+  {
+    s->set_real((double) 0.0, 1,my_thd_charset);
+    return s;
+  }
+  my_decimal num, tmp, sum2, sum2d;
+  int prec_increment= current_thd->variables.div_precincrement;
+
+  int2my_decimal(E_DEC_FATAL_ERROR, rows - nulls, FALSE, &num);
+  my_decimal_mul(E_DEC_FATAL_ERROR, &sum2, sum+cur_sum, sum+cur_sum);
+  my_decimal_div(E_DEC_FATAL_ERROR, &tmp, &sum2, &num, prec_increment);
+  my_decimal_sub(E_DEC_FATAL_ERROR, &sum2, sum_sqr+cur_sum, &tmp);
+  my_decimal_div(E_DEC_FATAL_ERROR, &tmp, &sum2, &num, prec_increment);
+  double std_sqr= tmp.to_double();
+  s->set_real(((double) std_sqr <= 0.0 ? 0.0 : sqrt(std_sqr)),
+         MY_MIN(item->decimals + prec_increment, NOT_FIXED_DEC), my_thd_charset);
+
+  return s;
+}
+
+
+int collect_string(String *element,
+		   element_count count __attribute__((unused)),
+		   TREE_INFO *info)
+{
+  if (info->found)
+    info->str->append(',');
+  else
+    info->found = 1;
+  info->str->append('\'');
+  if (info->str->append_for_single_quote(element))
+    return 1;
+  info->str->append('\'');
+  return 0;
+} // collect_string
+
+
+int collect_real(double *element, element_count count __attribute__((unused)),
+		 TREE_INFO *info)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String s(buff, sizeof(buff),current_thd->charset());
+
+  if (info->found)
+    info->str->append(',');
+  else
+    info->found = 1;
+  info->str->append('\'');
+  s.set_real(*element, info->item->decimals, current_thd->charset());
+  info->str->append(s);
+  info->str->append('\'');
+  return 0;
+} // collect_real
+
+
+int collect_decimal(uchar *element, element_count count,
+                    TREE_INFO *info)
+{
+  char buff[DECIMAL_MAX_STR_LENGTH];
+  String s(buff, sizeof(buff),&my_charset_bin);
+
+  if (info->found)
+    info->str->append(',');
+  else
+    info->found = 1;
+  my_decimal dec(element, info->item->max_length, info->item->decimals);
+  info->str->append('\'');
+  dec.to_string_native(&s, 0, 0, '0');
+  info->str->append(s);
+  info->str->append('\'');
+  return 0;
+}
+
+
+int collect_longlong(longlong *element,
+		     element_count count __attribute__((unused)),
+		     TREE_INFO *info)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String s(buff, sizeof(buff),&my_charset_bin);
+
+  if (info->found)
+    info->str->append(',');
+  else
+    info->found = 1;
+  info->str->append('\'');
+  s.set(*element, current_thd->charset());
+  info->str->append(s);
+  info->str->append('\'');
+  return 0;
+} // collect_longlong
+
+
+int collect_ulonglong(ulonglong *element,
+		      element_count count __attribute__((unused)),
+		      TREE_INFO *info)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String s(buff, sizeof(buff),&my_charset_bin);
+
+  if (info->found)
+    info->str->append(',');
+  else
+    info->found = 1;
+  info->str->append('\'');
+  s.set(*element, current_thd->charset());
+  info->str->append(s);
+  info->str->append('\'');
+  return 0;
+} // collect_ulonglong
+
+
+bool analyse::change_columns(THD *thd, List &field_list)
+{
+  MEM_ROOT *mem_root= thd->mem_root;
+  field_list.empty();
+
+  func_items[0]= new (mem_root) Item_proc_string(thd, "Field_name", 255);
+  func_items[1]= new (mem_root) Item_proc_string(thd, "Min_value", 255);
+  func_items[1]->set_maybe_null();
+  func_items[2]= new (mem_root) Item_proc_string(thd, "Max_value", 255);
+  func_items[2]->set_maybe_null();
+  func_items[3]= new (mem_root) Item_proc_int(thd, "Min_length");
+  func_items[4]= new (mem_root) Item_proc_int(thd, "Max_length");
+  func_items[5]= new (mem_root) Item_proc_int(thd, "Empties_or_zeros");
+  func_items[6]= new (mem_root) Item_proc_int(thd, "Nulls");
+  func_items[7]= new (mem_root) Item_proc_string(thd, "Avg_value_or_avg_length", 255);
+  func_items[8]= new (mem_root) Item_proc_string(thd, "Std", 255);
+  func_items[8]->set_maybe_null();
+  func_items[9]= new (mem_root) Item_proc_string(thd, "Optimal_fieldtype",
+                                                  MY_MAX(64,
+                                                         output_str_length));
+
+  for (uint i = 0; i < array_elements(func_items); i++)
+    field_list.push_back(func_items[i], thd->mem_root);
+  result_fields = field_list;
+  return 0;
+} // analyse::change_columns
+
+int compare_double(const double *s, const double *t)
+{
+  return ((*s < *t) ? -1 : *s > *t ? 1 : 0);
+} /* compare_double */
+
+int compare_longlong(const longlong *s, const longlong *t)
+{
+  return ((*s < *t) ? -1 : *s > *t ? 1 : 0);
+} /* compare_longlong */
+
+ int compare_ulonglong(const ulonglong *s, const ulonglong *t)
+{
+  return ((*s < *t) ? -1 : *s > *t ? 1 : 0);
+} /* compare_ulonglong */
+
+
+uint check_ulonglong(const char *str, uint length)
+{
+  const char *long_str = "2147483647", *ulonglong_str = "18446744073709551615";
+  const uint long_len = 10, ulonglong_len = 20;
+
+  while (*str == '0' && length)
+  {
+    str++; length--;
+  }
+  if (length < long_len)
+    return NUM;
+
+  uint smaller, bigger;
+  const char *cmp;
+
+  if (length == long_len)
+  {
+    cmp = long_str;
+    smaller = NUM;
+    bigger = LONG_NUM;
+  }
+  else if (length > ulonglong_len)
+    return DECIMAL_NUM;
+  else
+  {
+    cmp = ulonglong_str;
+    smaller = LONG_NUM;
+    bigger = DECIMAL_NUM;
+  }
+  while (*cmp && *cmp++ == *str++) ;
+  return ((uchar) str[-1] <= (uchar) cmp[-1]) ? smaller : bigger;
+} /* check_ulonglong */
diff --git a/sql/sql_analyse.h b/sql/sql_analyse.h
new file mode 100644
index 00000000..e76ff13c
--- /dev/null
+++ b/sql/sql_analyse.h
@@ -0,0 +1,367 @@
+#ifndef SQL_ANALYSE_INCLUDED
+#define SQL_ANALYSE_INCLUDED
+
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+
+/* Analyse database */
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface				/* gcc class implementation */
+#endif
+
+#include "procedure.h"                          /* Procedure */
+
+#define my_thd_charset	default_charset_info
+
+#define DEC_IN_AVG 4
+
+typedef struct st_number_info
+{
+  // if zerofill is true, the number must be zerofill, or string
+  bool	    negative, is_float, zerofill, maybe_zerofill;
+  int8	    integers;
+  int8	    decimals;
+  double    dval;
+  ulonglong ullval;
+} NUM_INFO;
+
+typedef struct st_extreme_value_number_info
+{
+  ulonglong ullval;
+  longlong  llval;
+  double    max_dval, min_dval;
+} EV_NUM_INFO;
+
+typedef struct st_tree_info
+{
+  bool	 found;
+  String *str;
+  Item	 *item;
+} TREE_INFO;
+
+uint check_ulonglong(const char *str, uint length);
+bool get_ev_num_info(EV_NUM_INFO *ev_info, NUM_INFO *info, const char *num);
+bool test_if_number(NUM_INFO *info, const char *str, uint str_len);
+int compare_double(const double *s, const double *t);
+int compare_double2(void* cmp_arg __attribute__((unused)),
+		    const double *s, const double *t);
+int compare_longlong(const longlong *s, const longlong *t);
+int compare_longlong2(void* cmp_arg __attribute__((unused)),
+		      const longlong *s, const longlong *t);
+int compare_ulonglong(const ulonglong *s, const ulonglong *t);
+int compare_ulonglong2(void* cmp_arg __attribute__((unused)),
+		       const ulonglong *s, const ulonglong *t);
+int compare_decimal2(int* len, const char *s, const char *t);
+Procedure *proc_analyse_init(THD *thd, ORDER *param, select_result *result,
+			     List &field_list);
+int free_string(void* str, TREE_FREE, void*);
+class analyse;
+
+class field_info :public Sql_alloc
+{
+protected:
+  ulong   treemem, tree_elements, empty, nulls, min_length, max_length;
+  uint	  room_in_tree;
+  bool found;
+  TREE	  tree;
+  Item	  *item;
+  analyse *pc;
+
+public:
+  field_info(Item* a, analyse* b) : treemem(0), tree_elements(0), empty(0),
+    nulls(0), min_length(0), max_length(0), room_in_tree(1),
+    found(0),item(a), pc(b) {};
+
+  virtual ~field_info() { delete_tree(&tree, 0); }
+  virtual void	 add() = 0;
+  virtual void	 get_opt_type(String*, ha_rows) = 0;
+  virtual String *get_min_arg(String *) = 0;
+  virtual String *get_max_arg(String *) = 0;
+  virtual String *avg(String*, ha_rows) = 0;
+  virtual String *std(String*, ha_rows) = 0;
+  virtual tree_walk_action collect_enum() = 0;
+  virtual uint decimals() { return 0; }
+  friend  class analyse;
+};
+
+
+int collect_string(String *element, element_count count,
+		   TREE_INFO *info);
+
+int sortcmp2(void* cmp_arg __attribute__((unused)),
+	     const String *a,const String *b);
+
+class field_str :public field_info
+{
+  String      min_arg, max_arg;
+  ulonglong   sum;
+  bool	      must_be_blob, was_zero_fill, was_maybe_zerofill,
+	      can_be_still_num;
+  NUM_INFO    num_info;
+  EV_NUM_INFO ev_num_info;
+
+public:
+  field_str(Item* a, analyse* b) :field_info(a,b), 
+    min_arg("",0,default_charset_info),
+    max_arg("",0,default_charset_info), sum(0),
+    must_be_blob(0), was_zero_fill(0),
+    was_maybe_zerofill(0), can_be_still_num(1)
+    { init_tree(&tree, 0, 0, sizeof(String), (qsort_cmp2) sortcmp2,
+		free_string, NULL, MYF(MY_THREAD_SPECIFIC)); };
+
+  void	 add();
+  void	 get_opt_type(String*, ha_rows);
+  String *get_min_arg(String *not_used __attribute__((unused)))
+  { return &min_arg; }
+  String *get_max_arg(String *not_used __attribute__((unused)))
+  { return &max_arg; }
+  String *avg(String *s, ha_rows rows)
+  {
+    if (!(rows - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+      s->set_real((ulonglong2double(sum) / ulonglong2double(rows - nulls)),
+	     DEC_IN_AVG,my_thd_charset);
+    return s;
+  }
+  friend int collect_string(String *element, element_count count,
+			    TREE_INFO *info);
+  tree_walk_action collect_enum()
+  { return (tree_walk_action) collect_string; }
+  String *std(String *s __attribute__((unused)),
+	      ha_rows rows __attribute__((unused)))
+  { return (String*) 0; }
+};
+
+
+int collect_decimal(uchar *element, element_count count,
+                    TREE_INFO *info);
+
+class field_decimal :public field_info
+{
+  my_decimal min_arg, max_arg;
+  my_decimal sum[2], sum_sqr[2];
+  int cur_sum;
+  int bin_size;
+public:
+  field_decimal(Item* a, analyse* b) :field_info(a,b)
+  {
+    bin_size= my_decimal_get_binary_size(a->max_length, a->decimals);
+    init_tree(&tree, 0, 0, bin_size, (qsort_cmp2)compare_decimal2,
+              0, (void *)&bin_size, MYF(MY_THREAD_SPECIFIC));
+  };
+
+  void	 add();
+  void	 get_opt_type(String*, ha_rows);
+  String *get_min_arg(String *);
+  String *get_max_arg(String *);
+  String *avg(String *s, ha_rows rows);
+  friend int collect_decimal(uchar *element, element_count count,
+                             TREE_INFO *info);
+  tree_walk_action collect_enum()
+  { return (tree_walk_action) collect_decimal; }
+  String *std(String *s, ha_rows rows);
+};
+
+
+int collect_real(double *element, element_count count, TREE_INFO *info);
+
+class field_real: public field_info
+{
+  double min_arg, max_arg;
+  double sum, sum_sqr;
+  uint	 max_notzero_dec_len;
+
+public:
+  field_real(Item* a, analyse* b) :field_info(a,b),
+    min_arg(0), max_arg(0),  sum(0), sum_sqr(0), max_notzero_dec_len(0)
+    { init_tree(&tree, 0, 0, sizeof(double),
+		(qsort_cmp2) compare_double2, NULL, NULL,
+                MYF(MY_THREAD_SPECIFIC)); }
+
+  void	 add();
+  void	 get_opt_type(String*, ha_rows);
+  String *get_min_arg(String *s)
+  {
+    s->set_real(min_arg, item->decimals, my_thd_charset);
+    return s;
+  }
+  String *get_max_arg(String *s)
+  {
+    s->set_real(max_arg, item->decimals, my_thd_charset);
+    return s;
+  }
+  String *avg(String *s, ha_rows rows)
+  {
+    if (!(rows - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+      s->set_real(((double)sum / (double) (rows - nulls)), item->decimals,my_thd_charset);
+    return s;
+  }
+  String *std(String *s, ha_rows rows)
+  {
+    double tmp = ulonglong2double(rows);
+    if (!(tmp - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+    {
+      double tmp2 = ((sum_sqr - sum * sum / (tmp - nulls)) /
+		     (tmp - nulls));
+      s->set_real(((double) tmp2 <= 0.0 ? 0.0 : sqrt(tmp2)), item->decimals,my_thd_charset);
+    }
+    return s;
+  }
+  uint	 decimals() { return item->decimals; }
+  friend int collect_real(double *element, element_count count,
+			  TREE_INFO *info);
+  tree_walk_action collect_enum()
+  { return (tree_walk_action) collect_real;}
+};
+
+int collect_longlong(longlong *element, element_count count,
+		     TREE_INFO *info);
+
+class field_longlong: public field_info
+{
+  longlong min_arg, max_arg;
+  longlong sum, sum_sqr;
+
+public:
+  field_longlong(Item* a, analyse* b) :field_info(a,b), 
+    min_arg(0), max_arg(0), sum(0), sum_sqr(0)
+    { init_tree(&tree, 0, 0, sizeof(longlong),
+		(qsort_cmp2) compare_longlong2, NULL, NULL,
+                MYF(MY_THREAD_SPECIFIC)); }
+
+  void	 add();
+  void	 get_opt_type(String*, ha_rows);
+  String *get_min_arg(String *s) { s->set(min_arg,my_thd_charset); return s; }
+  String *get_max_arg(String *s) { s->set(max_arg,my_thd_charset); return s; }
+  String *avg(String *s, ha_rows rows)
+  {
+    if (!(rows - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+      s->set_real(((double) sum / (double) (rows - nulls)), DEC_IN_AVG,my_thd_charset);
+    return s;
+  }
+  String *std(String *s, ha_rows rows)
+  {
+    double tmp = ulonglong2double(rows);
+    if (!(tmp - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+    {
+      double tmp2 = ((sum_sqr - sum * sum / (tmp - nulls)) /
+		    (tmp - nulls));
+      s->set_real(((double) tmp2 <= 0.0 ? 0.0 : sqrt(tmp2)), DEC_IN_AVG,my_thd_charset);
+    }
+    return s;
+  }
+  friend int collect_longlong(longlong *element, element_count count,
+			      TREE_INFO *info);
+  tree_walk_action collect_enum()
+  { return (tree_walk_action) collect_longlong;}
+};
+
+int collect_ulonglong(ulonglong *element, element_count count,
+		      TREE_INFO *info);
+
+class field_ulonglong: public field_info
+{
+  ulonglong min_arg, max_arg;
+  ulonglong sum, sum_sqr;
+
+public:
+  field_ulonglong(Item* a, analyse * b) :field_info(a,b),
+    min_arg(0), max_arg(0), sum(0),sum_sqr(0)
+    { init_tree(&tree, 0, 0, sizeof(ulonglong),
+		(qsort_cmp2) compare_ulonglong2, NULL, NULL,
+                MYF(MY_THREAD_SPECIFIC)); }
+  void	 add();
+  void	 get_opt_type(String*, ha_rows);
+  String *get_min_arg(String *s) { s->set(min_arg,my_thd_charset); return s; }
+  String *get_max_arg(String *s) { s->set(max_arg,my_thd_charset); return s; }
+  String *avg(String *s, ha_rows rows)
+  {
+    if (!(rows - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+      s->set_real((ulonglong2double(sum) / ulonglong2double(rows - nulls)),
+	     DEC_IN_AVG,my_thd_charset);
+    return s;
+  }
+  String *std(String *s, ha_rows rows)
+  {
+    double tmp = ulonglong2double(rows);
+    if (!(tmp - nulls))
+      s->set_real((double) 0.0, 1,my_thd_charset);
+    else
+    {
+      double tmp2 = ((ulonglong2double(sum_sqr) - 
+		     ulonglong2double(sum * sum) / (tmp - nulls)) /
+		     (tmp - nulls));
+      s->set_real(((double) tmp2 <= 0.0 ? 0.0 : sqrt(tmp2)), DEC_IN_AVG,my_thd_charset);
+    }
+    return s;
+  }
+  friend int collect_ulonglong(ulonglong *element, element_count count,
+			       TREE_INFO *info);
+  tree_walk_action collect_enum()
+  { return (tree_walk_action) collect_ulonglong; }
+};
+
+
+Procedure *proc_analyse_init(THD *thd, ORDER *param,
+			     select_result *result,
+			     List &field_list);
+
+class analyse: public Procedure
+{
+protected:
+  Item_proc    *func_items[10];
+  List   fields, result_fields;
+  field_info   **f_info, **f_end;
+  ha_rows      rows;
+  uint	       output_str_length;
+
+public:
+  uint max_tree_elements, max_treemem;
+
+  analyse(select_result *res) :Procedure(res, PROC_NO_SORT), f_info(0),
+    rows(0), output_str_length(0) {}
+
+  ~analyse()
+  {
+    if (f_info)
+    {
+      for (field_info **f=f_info; f != f_end; f++)
+	delete (*f);
+    }
+  }
+  virtual void add() {}
+  virtual bool change_columns(THD *thd, List &fields);
+  virtual int  send_row(List &field_list);
+  virtual void end_group(void) {}
+  virtual int end_of_records(void);
+  friend Procedure *proc_analyse_init(THD *thd, ORDER *param,
+				      select_result *result,
+				      List &field_list);
+};
+
+#endif /* SQL_ANALYSE_INCLUDED */
diff --git a/sql/sql_analyze_stmt.cc b/sql/sql_analyze_stmt.cc
new file mode 100644
index 00000000..734ca1f8
--- /dev/null
+++ b/sql/sql_analyze_stmt.cc
@@ -0,0 +1,122 @@
+/*
+   Copyright (c) 2015 MariaDB Corporation Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "sql_select.h"
+#include "my_json_writer.h"
+
+void Filesort_tracker::print_json_members(Json_writer *writer)
+{
+  const char *varied_str= "(varied across executions)";
+  String str;
+
+  if (!get_r_loops())
+    writer->add_member("r_loops").add_null();
+  else
+    writer->add_member("r_loops").add_ll(get_r_loops());
+  
+  if (time_tracker.has_timed_statistics())
+  {
+    writer->add_member("r_total_time_ms").
+            add_double(time_tracker.get_time_ms());
+  }
+  if (r_limit != HA_POS_ERROR)
+  {
+    writer->add_member("r_limit");
+    if (!get_r_loops())
+      writer->add_null();
+    else if (r_limit == 0)
+      writer->add_str(varied_str);
+    else
+      writer->add_ll(r_limit);
+  }
+
+  writer->add_member("r_used_priority_queue"); 
+  if (!get_r_loops())
+    writer->add_null();
+  else if (r_used_pq == get_r_loops())
+    writer->add_bool(true);
+  else if (r_used_pq == 0)
+    writer->add_bool(false);
+  else
+    writer->add_str(varied_str);
+
+  if (!get_r_loops())
+    writer->add_member("r_output_rows").add_null();
+  else
+    writer->add_member("r_output_rows").add_ll(
+                        (longlong) rint((double)r_output_rows / get_r_loops()));
+
+  if (sort_passes)
+  {
+    writer->add_member("r_sort_passes").add_ll(
+                        (longlong) rint((double)sort_passes / get_r_loops()));
+  }
+
+  if (sort_buffer_size != 0)
+  {
+    writer->add_member("r_buffer_size");
+    if (sort_buffer_size == ulonglong(-1))
+      writer->add_str(varied_str);
+    else
+      writer->add_size(sort_buffer_size);
+  }
+
+  get_data_format(&str);
+  writer->add_member("r_sort_mode").add_str(str.ptr(), str.length());
+}
+
+void Filesort_tracker::get_data_format(String *str)
+{
+  if (r_sort_keys_packed)
+    str->append(STRING_WITH_LEN("packed_sort_key"));
+  else
+    str->append(STRING_WITH_LEN("sort_key"));
+  str->append(',');
+
+  if (r_using_addons)
+  {
+    if (r_packed_addon_fields)
+      str->append(STRING_WITH_LEN("packed_addon_fields"));
+    else
+      str->append(STRING_WITH_LEN("addon_fields"));
+  }
+  else
+    str->append(STRING_WITH_LEN("rowid"));
+}
+
+void attach_gap_time_tracker(THD *thd, Gap_time_tracker *gap_tracker,
+                             ulonglong timeval)
+{
+  thd->gap_tracker_data.bill_to= gap_tracker;
+  thd->gap_tracker_data.start_time= timeval;
+}
+
+void process_gap_time_tracker(THD *thd, ulonglong timeval)
+{
+  if (thd->gap_tracker_data.bill_to)
+  {
+    thd->gap_tracker_data.bill_to->log_time(thd->gap_tracker_data.start_time,
+                                            timeval);
+    thd->gap_tracker_data.bill_to= NULL;
+  }
+}
+
diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h
new file mode 100644
index 00000000..d1faa58a
--- /dev/null
+++ b/sql/sql_analyze_stmt.h
@@ -0,0 +1,479 @@
+/*
+   Copyright (c) 2015, 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+
+== ANALYZE-stmt classes ==
+
+This file contains classes for supporting "ANALYZE statement" feature. These are 
+a set of data structures that can be used to store the data about how the 
+statement executed.
+
+There are two kinds of data collection:
+
+1. Various counters. We assume that incrementing counters has very low
+overhead. Because of that, execution code increments counters unconditionally
+(even when not running "ANALYZE $statement" commands. You run regular SELECT/
+UPDATE/DELETE/etc and the counters are incremented).
+
+As a free bonus, this lets us print detailed information into the slow query
+log, should the query be slow.
+
+2. Timing data. Measuring the time it took to run parts of query has noticeable
+overhead. Because of that, we measure the time only when running "ANALYZE
+$stmt").
+*/
+
+/* fake microseconds as cycles if cycles isn't available */
+
+static inline double timer_tracker_frequency()
+{
+#if (MY_TIMER_ROUTINE_CYCLES)
+  return static_cast(sys_timer_info.cycles.frequency);
+#else
+  return static_cast(sys_timer_info.microseconds.frequency);
+#endif
+}
+
+
+class Gap_time_tracker;
+void attach_gap_time_tracker(THD *thd, Gap_time_tracker *gap_tracker, ulonglong timeval);
+void process_gap_time_tracker(THD *thd, ulonglong timeval);
+
+/*
+  A class for tracking time it takes to do a certain action
+*/
+class Exec_time_tracker
+{
+protected:
+  ulonglong count;
+  ulonglong cycles;
+  ulonglong last_start;
+
+  ulonglong measure() const
+  {
+#if (MY_TIMER_ROUTINE_CYCLES)
+    return my_timer_cycles();
+#else
+    return my_timer_microseconds();
+#endif
+  }
+
+  void cycles_stop_tracking(THD *thd)
+  {
+    ulonglong end= measure();
+    cycles += end - last_start;
+
+    process_gap_time_tracker(thd, end);
+    if (my_gap_tracker)
+      attach_gap_time_tracker(thd, my_gap_tracker, end);
+  }
+
+  /*
+    The time spent after stop_tracking() call on this object and any
+    subsequent time tracking call will be billed to this tracker.
+  */
+  Gap_time_tracker *my_gap_tracker;
+public:
+  Exec_time_tracker() : count(0), cycles(0), my_gap_tracker(NULL) {}
+
+  void set_gap_tracker(Gap_time_tracker *gap_tracker)
+  {
+    my_gap_tracker= gap_tracker;
+  }
+
+  // interface for collecting time
+  void start_tracking(THD *thd)
+  {
+    last_start= measure();
+    process_gap_time_tracker(thd, last_start);
+  }
+
+  void stop_tracking(THD *thd)
+  {
+    count++;
+    cycles_stop_tracking(thd);
+  }
+
+  // interface for getting the time
+  ulonglong get_loops() const { return count; }
+
+  inline double cycles_to_ms(ulonglong cycles_arg) const
+  {
+    // convert 'cycles' to milliseconds.
+    return 1000.0 * static_cast(cycles_arg) /
+      timer_tracker_frequency();
+  }
+
+  double get_time_ms() const
+  {
+    return cycles_to_ms(cycles);
+  }
+  ulonglong get_cycles() const
+  {
+    return cycles;
+  }
+
+  bool has_timed_statistics() const { return cycles > 0; }
+};
+
+
+/*
+  Tracker for time spent between the calls to Exec_time_tracker's {start|
+  stop}_tracking().
+
+  @seealso Gap_time_tracker_data in sql_class.h
+*/
+class Gap_time_tracker
+{
+  ulonglong cycles;
+public:
+  Gap_time_tracker() : cycles(0) {}
+
+  void log_time(ulonglong start, ulonglong end) {
+    cycles += end - start;
+  }
+
+  double get_time_ms() const
+  {
+    // convert 'cycles' to milliseconds.
+    return 1000.0 * static_cast(cycles) / timer_tracker_frequency();
+  }
+};
+
+
+/*
+  A class for counting certain actions (in all queries), and optionally
+  collecting the timings (in ANALYZE queries).
+*/
+
+class Time_and_counter_tracker: public Exec_time_tracker
+{
+public: 
+  const bool timed;
+  
+  Time_and_counter_tracker(bool timed_arg) : timed(timed_arg)
+  {}
+   
+  /* Loops are counted in both ANALYZE and regular queries, as this is cheap */
+  void incr_loops() { count++; }
+  
+  /*
+    Unlike Exec_time_tracker::stop_tracking, we don't increase loops.
+  */
+  void stop_tracking(THD *thd)
+  {
+    cycles_stop_tracking(thd);
+  }
+};
+
+#define ANALYZE_START_TRACKING(thd, tracker) \
+  { \
+    (tracker)->incr_loops(); \
+    if (unlikely((tracker)->timed)) \
+    { (tracker)->start_tracking(thd); } \
+  }
+
+#define ANALYZE_STOP_TRACKING(thd, tracker) \
+  if (unlikely((tracker)->timed)) \
+  { (tracker)->stop_tracking(thd); }
+
+
+/*
+  Just a counter to increment one value. Wrapped in a class to be uniform
+  with other counters used by ANALYZE.
+*/
+
+class Counter_tracker
+{
+public:
+  Counter_tracker() : r_scans(0) {}
+  ha_rows r_scans;
+
+  inline void on_scan_init() { r_scans++; }
+
+  bool has_scans() const { return (r_scans != 0); }
+  ha_rows get_loops() const { return r_scans; }
+};
+
+
+/*
+  A class for collecting read statistics.
+  
+  The idea is that we run several scans. Each scans gets rows, and then filters
+  some of them out.  We count scans, rows, and rows left after filtering.
+
+  (note: at the moment, the class is not actually tied to a physical table. 
+   It can be used to track reading from files, buffers, etc).
+*/
+
+class Table_access_tracker
+{
+public:
+  Table_access_tracker() : r_scans(0), r_rows(0), r_rows_after_where(0)
+  {}
+
+  ha_rows r_scans; /* how many scans were ran on this join_tab */
+  ha_rows r_rows; /* How many rows we've got after that */
+  ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
+
+  double get_avg_rows() const
+  {
+    return r_scans
+      ? static_cast(r_rows) / static_cast(r_scans)
+      : 0;
+  }
+
+  double get_filtered_after_where() const
+  {
+    return r_rows > 0
+      ? static_cast(r_rows_after_where) /
+        static_cast(r_rows)
+      : 1.0;
+  }
+
+  inline void on_scan_init() { r_scans++; }
+  inline void on_record_read() { r_rows++; }
+  inline void on_record_after_where() { r_rows_after_where++; }
+
+  bool has_scans() const { return (r_scans != 0); }
+  ha_rows get_loops() const { return r_scans; }
+};
+
+
+class Json_writer;
+
+/*
+  This stores the data about how filesort executed.
+
+  A few things from here (e.g. r_used_pq, r_limit) belong to the query plan,
+  however, these parameters are calculated right during the execution so we 
+  can't easily put them into the query plan.
+
+  The class is designed to handle multiple invocations of filesort().
+*/
+
+class Filesort_tracker : public Sql_alloc
+{
+public:
+  Filesort_tracker(bool do_timing) :
+    time_tracker(do_timing), r_limit(0), r_used_pq(0),
+    r_examined_rows(0), r_sorted_rows(0), r_output_rows(0),
+    sort_passes(0),
+    sort_buffer_size(0),
+    r_using_addons(false),
+    r_packed_addon_fields(false),
+    r_sort_keys_packed(false)
+  {}
+  
+  /* Functions that filesort uses to report various things about its execution */
+
+  inline void report_use(THD *thd, ha_rows r_limit_arg)
+  {
+    if (!time_tracker.get_loops())
+      r_limit= r_limit_arg;
+    else
+      r_limit= (r_limit != r_limit_arg)? 0: r_limit_arg;
+
+    ANALYZE_START_TRACKING(thd, &time_tracker);
+  }
+  inline void incr_pq_used() { r_used_pq++; }
+
+  inline void report_row_numbers(ha_rows examined_rows, 
+                                 ha_rows sorted_rows,
+                                 ha_rows returned_rows) 
+  { 
+    r_examined_rows += examined_rows;
+    r_sorted_rows   += sorted_rows;
+    r_output_rows   += returned_rows;
+  }
+
+  inline void report_merge_passes_at_start(ulong passes)
+  {
+    sort_passes -= passes;
+  }
+  inline void report_merge_passes_at_end(THD *thd, ulong passes)
+  {
+    ANALYZE_STOP_TRACKING(thd, &time_tracker);
+    sort_passes += passes;
+  }
+
+  inline void report_sort_buffer_size(size_t bufsize)
+  {
+    if (sort_buffer_size)
+      sort_buffer_size= ulonglong(-1); // multiple buffers of different sizes
+    else
+      sort_buffer_size= bufsize;
+  }
+
+  inline void report_addon_fields_format(bool addons_packed)
+  {
+    r_using_addons= true;
+    r_packed_addon_fields= addons_packed;
+  }
+  inline void report_sort_keys_format(bool sort_keys_packed)
+  {
+    r_sort_keys_packed= sort_keys_packed;
+  }
+
+  void get_data_format(String *str);
+
+  /* Functions to get the statistics */
+  void print_json_members(Json_writer *writer);
+
+  ulonglong get_r_loops() const { return time_tracker.get_loops(); }
+  double get_avg_examined_rows() const
+  {
+    return static_cast(r_examined_rows) /
+      static_cast(get_r_loops());
+  }
+  double get_avg_returned_rows() const
+  {
+    return static_cast(r_output_rows) /
+      static_cast(get_r_loops());
+  }
+  double get_r_filtered() const
+  {
+    return r_examined_rows > 0
+      ? static_cast(r_sorted_rows) /
+        static_cast(r_examined_rows)
+      : 1.0;
+  }
+private:
+  Time_and_counter_tracker time_tracker;
+
+  //ulonglong r_loops; /* How many times filesort was invoked */
+  /*
+    LIMIT is typically a constant. There is never "LIMIT 0".
+      HA_POS_ERROR means we never had a limit
+      0            means different values of LIMIT were used in 
+                   different filesort invocations
+      other value  means the same LIMIT value was used every time.
+  */
+  ulonglong r_limit;
+  ulonglong r_used_pq; /* How many times PQ was used */
+
+  /* How many rows were examined (before checking the select->cond) */
+  ulonglong r_examined_rows;
+  
+  /* 
+    How many rows were put into sorting (this is examined_rows minus rows that
+    didn't pass the WHERE condition)
+  */
+  ulonglong r_sorted_rows;
+
+  /*
+    How many rows were returned. This is equal to r_sorted_rows, unless there
+    was a LIMIT N clause in which case filesort would not have returned more
+    than N rows.
+  */
+  ulonglong r_output_rows;
+
+  /* How many sorts in total (divide by r_count to get the average) */
+  ulonglong sort_passes;
+  
+  /* 
+    0              - means not used (or not known 
+    (ulonglong)-1  - multiple
+    other          - value
+  */
+  ulonglong sort_buffer_size;
+  bool r_using_addons;
+  bool r_packed_addon_fields;
+  bool r_sort_keys_packed;
+};
+
+
+/**
+  A class to collect data about how rowid filter is executed.
+
+  It stores information about how rowid filter container is filled,
+  containers size and observed selectivity.
+
+  The observed selectivity is calculated in this way.
+  Some elements elem_set are checked if they belong to container.
+  Observed selectivity is calculated as the count of elem_set
+  elements that belong to container devided by all elem_set elements.
+*/
+
+class Rowid_filter_tracker : public Sql_alloc
+{
+private:
+  /* A member to track the time to fill the rowid filter */
+  Time_and_counter_tracker time_tracker;
+
+  /* Size of the rowid filter container buffer */
+  size_t container_buff_size;
+
+  /* Count of elements that were used to fill the rowid filter container */
+  uint container_elements;
+
+  /* Elements counts used for observed selectivity calculation */
+  uint n_checks;
+  uint n_positive_checks;
+public:
+  Rowid_filter_tracker(bool do_timing) :
+    time_tracker(do_timing), container_buff_size(0),
+    container_elements(0), n_checks(0), n_positive_checks(0)
+  {}
+
+  inline void start_tracking(THD *thd)
+  {
+    ANALYZE_START_TRACKING(thd, &time_tracker);
+  }
+
+  inline void stop_tracking(THD *thd)
+  {
+    ANALYZE_STOP_TRACKING(thd, &time_tracker);
+  }
+
+  /* Save container buffer size in bytes */
+  inline void report_container_buff_size(uint elem_size)
+  {
+   container_buff_size= container_elements * elem_size / 8;
+  }
+
+  Time_and_counter_tracker *get_time_tracker()
+  {
+    return &time_tracker;
+  }
+
+  double get_time_fill_container_ms() const
+  {
+    return time_tracker.get_time_ms();
+  }
+
+  void increment_checked_elements_count(bool was_checked)
+  {
+    n_checks++;
+    if (was_checked)
+     n_positive_checks++;
+  }
+
+  inline void increment_container_elements_count() { container_elements++; }
+
+  uint get_container_elements() const { return container_elements; }
+
+  uint get_container_lookups() { return n_checks; }
+
+  double get_r_selectivity_pct() const
+  {
+    return n_checks ? static_cast(n_positive_checks) /
+                      static_cast(n_checks) : 0;
+  }
+
+  size_t get_container_buff_size() const { return container_buff_size; }
+};
diff --git a/sql/sql_array.h b/sql/sql_array.h
new file mode 100644
index 00000000..85a53ae1
--- /dev/null
+++ b/sql/sql_array.h
@@ -0,0 +1,298 @@
+#ifndef SQL_ARRAY_INCLUDED
+#define SQL_ARRAY_INCLUDED
+
+/* Copyright (c) 2003, 2005-2007 MySQL AB, 2009 Sun Microsystems, Inc.
+   Use is subject to license terms.
+   Copyright (c) 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include 
+
+/**
+   A wrapper class which provides array bounds checking.
+   We do *not* own the array, we simply have a pointer to the first element,
+   and a length.
+
+   @remark
+   We want the compiler-generated versions of:
+   - the copy CTOR (memberwise initialization)
+   - the assignment operator (memberwise assignment)
+
+   @param Element_type The type of the elements of the container.
+ */
+template  class Bounds_checked_array
+{
+public:
+  Bounds_checked_array()= default;
+
+  Bounds_checked_array(Element_type *el, size_t size_arg)
+    : m_array(el), m_size(size_arg)
+  {}
+
+  void reset() { m_array= NULL; m_size= 0; }
+ 
+  void reset(Element_type *array_arg, size_t size_arg)
+  {
+    m_array= array_arg;
+    m_size= size_arg;
+  }
+
+  /**
+    Set a new bound on the array. Does not resize the underlying
+    array, so the new size must be smaller than or equal to the
+    current size.
+   */
+  void resize(size_t new_size)
+  {
+    DBUG_ASSERT(new_size <= m_size);
+    m_size= new_size;
+  }
+
+  Element_type &operator[](size_t n)
+  {
+    DBUG_ASSERT(n < m_size);
+    return m_array[n];
+  }
+
+  const Element_type &operator[](size_t n) const
+  {
+    DBUG_ASSERT(n < m_size);
+    return m_array[n];
+  }
+
+  size_t element_size() const { return sizeof(Element_type); }
+  size_t size() const         { return m_size; }
+
+  bool is_null() const { return m_array == NULL; }
+
+  void pop_front()
+  {
+    DBUG_ASSERT(m_size > 0);
+    m_array+= 1;
+    m_size-= 1;
+  }
+
+  Element_type *array() const { return m_array; }
+
+  Element_type *begin() const { return array(); }
+  Element_type *end() const { return array() + m_size; }
+
+
+  bool operator==(const Bounds_checked_array&rhs) const
+  {
+    return m_array == rhs.m_array && m_size == rhs.m_size;
+  }
+  bool operator!=(const Bounds_checked_array&rhs) const
+  {
+    return m_array != rhs.m_array || m_size != rhs.m_size;
+  }
+
+private:
+  Element_type *m_array= nullptr;
+  size_t        m_size= 0;
+};
+
+/*
+  A typesafe wrapper around DYNAMIC_ARRAY
+
+  TODO: Change creator to take a THREAD_SPECIFIC option.
+*/
+
+template  class Dynamic_array
+{
+  DYNAMIC_ARRAY array;
+public:
+  Dynamic_array(PSI_memory_key psi_key, size_t prealloc=16, size_t increment=16)
+  {
+    init(psi_key, prealloc, increment);
+  }
+
+  Dynamic_array(MEM_ROOT *root, size_t prealloc=16, size_t increment=16)
+  {
+    void *init_buffer= alloc_root(root, sizeof(Elem) * prealloc);
+    init_dynamic_array2(root->psi_key, &array, sizeof(Elem), init_buffer,
+                        prealloc, increment, MYF(0));
+  }
+
+  void init(PSI_memory_key psi_key, size_t prealloc=16, size_t increment=16)
+  {
+    init_dynamic_array2(psi_key, &array, sizeof(Elem), 0, prealloc, increment, MYF(0));
+  }
+
+  /**
+     @note Though formally this could be declared "const" it would be
+     misleading at it returns a non-const pointer to array's data.
+  */
+  Elem& at(size_t idx)
+  {
+    DBUG_ASSERT(idx < array.elements);
+    return *(((Elem*)array.buffer) + idx);
+  }
+  /// Const variant of at(), which cannot change data
+  const Elem& at(size_t idx) const
+  {
+    return *(((Elem*)array.buffer) + idx);
+  }
+
+  /// @returns pointer to first element
+  Elem *front()
+  {
+    return (Elem*)array.buffer;
+  }
+
+  /// @returns pointer to first element
+  const Elem *front() const
+  {
+    return (const Elem*)array.buffer;
+  }
+
+  /// @returns pointer to last element
+  Elem *back()
+  {
+    return ((Elem*)array.buffer) + array.elements - 1;
+  }
+
+  /// @returns pointer to last element
+  const Elem *back() const
+  {
+    return ((const Elem*)array.buffer) + array.elements - 1;
+  }
+
+  size_t size() const { return array.elements; }
+
+  const Elem *end() const
+  {
+    return back() + 1;
+  }
+
+  /// @returns pointer to n-th element
+  Elem *get_pos(size_t idx)
+  {
+    return ((Elem*)array.buffer) + idx;
+  }
+
+  /// @returns pointer to n-th element
+  const Elem *get_pos(size_t idx) const
+  {
+    return ((const Elem*)array.buffer) + idx;
+  }
+
+  /**
+     @retval false ok
+     @retval true  OOM, @c my_error() has been called.
+  */
+  bool append(const Elem &el)
+  {
+    return insert_dynamic(&array, &el);
+  }
+
+  bool append_val(Elem el)
+  {
+    return (insert_dynamic(&array, (uchar*)&el));
+  }
+
+  bool push(Elem &el)
+  {
+    return append(el);
+  }
+
+  /// Pops the last element. Does nothing if array is empty.
+  Elem& pop()
+  {
+    return *((Elem*)pop_dynamic(&array));
+  }
+
+  void del(size_t idx)
+  {
+    DBUG_ASSERT(idx <= array.max_element);
+    delete_dynamic_element(&array, idx);
+  }
+
+  size_t elements() const
+  {
+    return array.elements;
+  }
+
+  void elements(size_t num_elements)
+  {
+    DBUG_ASSERT(num_elements <= array.max_element);
+    array.elements= num_elements;
+  }
+
+  void clear()
+  {
+    elements(0);
+  }
+
+  void set(size_t idx, const Elem &el)
+  {
+    set_dynamic(&array, &el, idx);
+  }
+
+  void freeze()
+  {
+    freeze_size(&array);
+  }
+
+  bool reserve(size_t new_size)
+  {
+    return allocate_dynamic(&array, new_size);
+  }
+
+
+  bool resize(size_t new_size, Elem default_val)
+  {
+    size_t old_size= elements();
+    if (reserve(new_size))
+      return true;
+    
+    if (new_size > old_size)
+    {
+      set_dynamic(&array, (uchar*)&default_val, new_size - 1);
+      /*for (size_t i= old_size; i != new_size; i++)
+      {
+        at(i)= default_val;
+      }*/
+    }
+    return false;
+  }
+
+  ~Dynamic_array()
+  {
+    delete_dynamic(&array);
+  }
+
+  void free_memory()
+  {
+    delete_dynamic(&array);
+  }
+
+  typedef int (*CMP_FUNC)(const Elem *el1, const Elem *el2);
+
+  void sort(CMP_FUNC cmp_func)
+  {
+    my_qsort(array.buffer, array.elements, sizeof(Elem), (qsort_cmp)cmp_func);
+  }
+
+  typedef int (*CMP_FUNC2)(void *, const Elem *el1, const Elem *el2);
+  void sort(CMP_FUNC2 cmp_func, void *data)
+  {
+    my_qsort2(array.buffer, array.elements, sizeof(Elem), (qsort2_cmp)cmp_func, data);
+  }
+};
+
+typedef Bounds_checked_array Ref_ptr_array;
+
+#endif /* SQL_ARRAY_INCLUDED */
diff --git a/sql/sql_audit.cc b/sql/sql_audit.cc
new file mode 100644
index 00000000..c9c59c1b
--- /dev/null
+++ b/sql/sql_audit.cc
@@ -0,0 +1,482 @@
+/* Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "mysqld.h"
+#include "sql_audit.h"
+
+extern int initialize_audit_plugin(st_plugin_int *plugin);
+extern int finalize_audit_plugin(st_plugin_int *plugin);
+
+#ifndef EMBEDDED_LIBRARY
+
+struct st_mysql_event_generic
+{
+  unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE];
+  unsigned int event_class;
+  const void *event;
+};
+
+unsigned long mysql_global_audit_mask[MYSQL_AUDIT_CLASS_MASK_SIZE];
+
+static mysql_mutex_t LOCK_audit_mask;
+
+
+static inline
+void set_audit_mask(unsigned long *mask, uint event_class)
+{
+  mask[0]= 1;
+  mask[0]<<= event_class;
+}
+
+static inline
+void add_audit_mask(unsigned long *mask, const unsigned long *rhs)
+{
+  mask[0]|= rhs[0];
+}
+
+static inline
+bool check_audit_mask(const unsigned long *lhs,
+                      const unsigned long *rhs)
+{
+  return !(lhs[0] & rhs[0]);
+}
+
+
+/**
+  Acquire and lock any additional audit plugins as required
+  
+  @param[in] thd
+  @param[in] plugin
+  @param[in] arg
+
+  @retval FALSE Always  
+*/
+
+static my_bool acquire_plugins(THD *thd, plugin_ref plugin, void *arg)
+{
+  ulong *event_class_mask= (ulong*) arg;
+  st_mysql_audit *data= plugin_data(plugin, struct st_mysql_audit *);
+
+  /* Check if this plugin is interested in the event */
+  if (check_audit_mask(data->class_mask, event_class_mask))
+    return 0;
+
+  /*
+    Check if this plugin may already be registered. This will fail to
+    acquire a newly installed plugin on a specific corner case where
+    one or more event classes already in use by the calling thread
+    are an event class of which the audit plugin has interest.
+  */
+  if (!check_audit_mask(data->class_mask, thd->audit_class_mask))
+    return 0;
+  
+  /* Check if we need to initialize the array of acquired plugins */
+  if (unlikely(!thd->audit_class_plugins.buffer))
+  {
+    /* specify some reasonable initialization defaults */
+    my_init_dynamic_array(PSI_INSTRUMENT_ME, &thd->audit_class_plugins,
+                          sizeof(plugin_ref), 16, 16, MYF(0));
+  }
+  
+  /* lock the plugin and add it to the list */
+  plugin= my_plugin_lock(NULL, plugin);
+  insert_dynamic(&thd->audit_class_plugins, (uchar*) &plugin);
+
+  return 0;
+}
+
+
+/**
+  @brief Acquire audit plugins
+
+  @param[in]   thd              MySQL thread handle
+  @param[in]   event_class      Audit event class
+
+  @details Ensure that audit plugins interested in given event
+  class are locked by current thread.
+*/
+void mysql_audit_acquire_plugins(THD *thd, ulong *event_class_mask)
+{
+  DBUG_ENTER("mysql_audit_acquire_plugins");
+  DBUG_ASSERT(thd);
+  if (check_audit_mask(thd->audit_class_mask, event_class_mask))
+  {
+    plugin_foreach(thd, acquire_plugins, MYSQL_AUDIT_PLUGIN, event_class_mask);
+    add_audit_mask(thd->audit_class_mask, event_class_mask);
+    thd->audit_plugin_version= global_plugin_version;
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Check if there were changes in the state of plugins
+  so we need to do the mysql_audit_release asap.
+
+  @param[in] thd
+
+*/
+
+my_bool mysql_audit_release_required(THD *thd)
+{
+  return thd && (thd->audit_plugin_version != global_plugin_version);
+}
+
+
+/**
+  Release any resources associated with the current thd.
+  
+  @param[in] thd
+
+*/
+
+void mysql_audit_release(THD *thd)
+{
+  plugin_ref *plugins, *plugins_last;
+  
+  if (!thd || !(thd->audit_class_plugins.elements))
+    return;
+  
+  plugins= (plugin_ref*) thd->audit_class_plugins.buffer;
+  plugins_last= plugins + thd->audit_class_plugins.elements;
+  for (; plugins < plugins_last; plugins++)
+  {
+    st_mysql_audit *data= plugin_data(*plugins, struct st_mysql_audit *);
+	
+    /* Check to see if the plugin has a release method */
+    if (!(data->release_thd))
+      continue;
+
+    /* Tell the plugin to release its resources */
+    data->release_thd(thd);
+  }
+
+  /* Now we actually unlock the plugins */  
+  plugin_unlock_list(NULL, (plugin_ref*) thd->audit_class_plugins.buffer,
+                     thd->audit_class_plugins.elements);
+  
+  /* Reset the state of thread values */
+  reset_dynamic(&thd->audit_class_plugins);
+  bzero(thd->audit_class_mask, sizeof(thd->audit_class_mask));
+  thd->audit_plugin_version= -1;
+}
+
+
+/**
+  Initialize thd variables used by Audit
+  
+  @param[in] thd
+
+*/
+
+void mysql_audit_init_thd(THD *thd)
+{
+  bzero(&thd->audit_class_plugins, sizeof(thd->audit_class_plugins));
+  bzero(thd->audit_class_mask, sizeof(thd->audit_class_mask));
+}
+
+
+/**
+  Free thd variables used by Audit
+  
+  @param[in] thd
+  @param[in] plugin
+  @param[in] arg
+
+  @retval FALSE Always  
+*/
+
+void mysql_audit_free_thd(THD *thd)
+{
+  mysql_audit_release(thd);
+  DBUG_ASSERT(thd->audit_class_plugins.elements == 0);
+  delete_dynamic(&thd->audit_class_plugins);
+}
+
+#ifdef HAVE_PSI_INTERFACE
+static PSI_mutex_key key_LOCK_audit_mask;
+
+static PSI_mutex_info all_audit_mutexes[]=
+{
+  { &key_LOCK_audit_mask, "LOCK_audit_mask", PSI_FLAG_GLOBAL}
+};
+
+static void init_audit_psi_keys(void)
+{
+  const char* category= "sql";
+  int count;
+
+  if (PSI_server == NULL)
+    return;
+
+  count= array_elements(all_audit_mutexes);
+  PSI_server->register_mutex(category, all_audit_mutexes, count);
+}
+#endif /* HAVE_PSI_INTERFACE */
+
+/**
+  Initialize Audit global variables
+*/
+
+void mysql_audit_initialize()
+{
+#ifdef HAVE_PSI_INTERFACE
+  init_audit_psi_keys();
+#endif
+
+  mysql_mutex_init(key_LOCK_audit_mask, &LOCK_audit_mask, MY_MUTEX_INIT_FAST);
+  bzero(mysql_global_audit_mask, sizeof(mysql_global_audit_mask));
+}
+
+
+/**
+  Finalize Audit global variables  
+*/
+
+void mysql_audit_finalize()
+{
+  mysql_mutex_destroy(&LOCK_audit_mask);
+}
+
+
+/**
+  Initialize an Audit plug-in
+
+  @param[in] plugin
+
+  @retval FALSE  OK
+  @retval TRUE   There was an error.
+*/
+
+int initialize_audit_plugin(st_plugin_int *plugin)
+{
+  st_mysql_audit *data= (st_mysql_audit*) plugin->plugin->info;
+
+  if (!data->event_notify || !data->class_mask[0])
+  {
+    sql_print_error("Plugin '%s' has invalid data.",
+                    plugin->name.str);
+    return 1;
+  }
+
+  if (plugin->plugin->init && plugin->plugin->init(NULL))
+  {
+    sql_print_error("Plugin '%s' init function returned error.",
+                    plugin->name.str);
+    return 1;
+  }
+
+  /* Make the interface info more easily accessible */
+  plugin->data= plugin->plugin->info;
+
+  /* Add the bits the plugin is interested in to the global mask */
+  mysql_mutex_lock(&LOCK_audit_mask);
+  add_audit_mask(mysql_global_audit_mask, data->class_mask);
+  mysql_mutex_unlock(&LOCK_audit_mask);
+
+  /*
+    Pre-acquire the newly inslalled audit plugin for events that
+    may potentially occur further during INSTALL PLUGIN.
+
+    When audit event is triggered, audit subsystem acquires interested
+    plugins by walking through plugin list. Evidently plugin list
+    iterator protects plugin list by acquiring LOCK_plugin, see
+    plugin_foreach_with_mask().
+
+    On the other hand [UN]INSTALL PLUGIN is acquiring LOCK_plugin
+    rather for a long time.
+
+    When audit event is triggered during [UN]INSTALL PLUGIN, plugin
+    list iterator acquires the same lock (within the same thread)
+    second time.
+
+    This hack should be removed when LOCK_plugin is fixed so it
+    protects only what it supposed to protect.
+
+    See also mysql_install_plugin() and mysql_uninstall_plugin()
+  */
+  THD *thd= current_thd;
+  if (thd)
+  {
+    acquire_plugins(thd, plugin_int_to_ref(plugin), data->class_mask);
+    add_audit_mask(thd->audit_class_mask, data->class_mask);
+  }
+
+  return 0;
+}
+
+
+/**
+  Performs a bitwise OR of the installed plugins event class masks
+
+  @param[in] thd
+  @param[in] plugin
+  @param[in] arg
+
+  @retval FALSE  always
+*/
+static my_bool calc_class_mask(THD *thd, plugin_ref plugin, void *arg)
+{
+  st_mysql_audit *data= plugin_data(plugin, struct st_mysql_audit *);
+  if ((data= plugin_data(plugin, struct st_mysql_audit *)))
+    add_audit_mask((unsigned long *) arg, data->class_mask);
+  return 0;
+}
+
+
+/**
+  Finalize an Audit plug-in
+  
+  @param[in] plugin
+
+  @retval FALSE  OK
+  @retval TRUE   There was an error.
+*/
+int finalize_audit_plugin(st_plugin_int *plugin)
+{
+  int deinit_status= 0;
+  unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE];
+  
+  if (plugin->plugin->deinit)
+    deinit_status= plugin->plugin->deinit(NULL);
+  
+  plugin->data= NULL;
+  bzero(&event_class_mask, sizeof(event_class_mask));
+
+  /* Iterate through all the installed plugins to create new mask */
+
+  /*
+    LOCK_audit_mask/LOCK_plugin order is not fixed, but serialized with table
+    lock on mysql.plugin.
+  */
+  mysql_mutex_lock(&LOCK_audit_mask);
+  plugin_foreach(current_thd, calc_class_mask, MYSQL_AUDIT_PLUGIN,
+                 &event_class_mask);
+
+  /* Set the global audit mask */
+  bmove(mysql_global_audit_mask, event_class_mask, sizeof(event_class_mask));
+  mysql_mutex_unlock(&LOCK_audit_mask);
+
+  return deinit_status;
+}
+
+
+/**
+  Dispatches an event by invoking the plugin's event_notify method.  
+
+  @param[in] thd
+  @param[in] plugin
+  @param[in] arg
+
+  @retval FALSE  always
+*/
+
+static my_bool plugins_dispatch(THD *thd, plugin_ref plugin, void *arg)
+{
+  const struct st_mysql_event_generic *event_generic=
+    (const struct st_mysql_event_generic *) arg;
+  st_mysql_audit *data= plugin_data(plugin, struct st_mysql_audit *);
+
+  /* Check to see if the plugin is interested in this event */
+  if (!check_audit_mask(data->class_mask, event_generic->event_class_mask))
+    data->event_notify(thd, event_generic->event_class, event_generic->event);
+
+  return 0;
+}
+
+
+/**
+  Distributes an audit event to plug-ins
+
+  @param[in] thd
+  @param[in] event_class
+  @param[in] event
+*/
+
+void mysql_audit_notify(THD *thd, uint event_class, const void *event)
+{
+  struct st_mysql_event_generic event_generic;
+  event_generic.event_class= event_class;
+  event_generic.event= event;
+  set_audit_mask(event_generic.event_class_mask, event_class);
+  /*
+    Check if we are doing a slow global dispatch. This event occurs when
+    thd == NULL as it is not associated with any particular thread.
+  */
+  if (unlikely(!thd))
+  {
+    plugin_foreach(thd, plugins_dispatch, MYSQL_AUDIT_PLUGIN, &event_generic);
+  }
+  else
+  {
+    plugin_ref *plugins, *plugins_last;
+
+    mysql_audit_acquire_plugins(thd, event_generic.event_class_mask);
+
+    /* Use the cached set of audit plugins */
+    plugins= (plugin_ref*) thd->audit_class_plugins.buffer;
+    plugins_last= plugins + thd->audit_class_plugins.elements;
+
+    for (; plugins < plugins_last; plugins++)
+      plugins_dispatch(thd, *plugins, &event_generic);
+  }
+}
+
+
+#else /* EMBEDDED_LIBRARY */
+
+
+void mysql_audit_acquire_plugins(THD *thd, ulong *event_class_mask)
+{
+}
+
+
+void mysql_audit_initialize()
+{
+}
+
+
+void mysql_audit_finalize()
+{
+}
+
+
+int initialize_audit_plugin(st_plugin_int *plugin)
+{
+  return 1;
+}
+
+
+int finalize_audit_plugin(st_plugin_int *plugin)
+{
+  return 0;
+}
+
+
+void mysql_audit_release(THD *thd)
+{
+}
+
+void mysql_audit_init_thd(THD *thd)
+{
+}
+
+void mysql_audit_free_thd(THD *thd)
+{
+}
+
+#endif /* EMBEDDED_LIBRARY */
diff --git a/sql/sql_audit.h b/sql/sql_audit.h
new file mode 100644
index 00000000..64500067
--- /dev/null
+++ b/sql/sql_audit.h
@@ -0,0 +1,441 @@
+#ifndef SQL_AUDIT_INCLUDED
+#define SQL_AUDIT_INCLUDED
+
+/* Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2017, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+
+#include 
+#include "sql_class.h"
+
+extern unsigned long mysql_global_audit_mask[];
+
+
+extern void mysql_audit_initialize();
+extern void mysql_audit_finalize();
+
+
+extern void mysql_audit_init_thd(THD *thd);
+extern void mysql_audit_free_thd(THD *thd);
+extern void mysql_audit_acquire_plugins(THD *thd, ulong *event_class_mask);
+
+
+#ifndef EMBEDDED_LIBRARY
+extern void mysql_audit_notify(THD *thd, uint event_class, const void *event);
+
+static inline bool mysql_audit_general_enabled()
+{
+  return mysql_global_audit_mask[0] & MYSQL_AUDIT_GENERAL_CLASSMASK;
+}
+
+static inline bool mysql_audit_connection_enabled()
+{
+  return mysql_global_audit_mask[0] & MYSQL_AUDIT_CONNECTION_CLASSMASK;
+}
+
+static inline bool mysql_audit_table_enabled()
+{
+  return mysql_global_audit_mask[0] & MYSQL_AUDIT_TABLE_CLASSMASK;
+}
+
+#else
+static inline void mysql_audit_notify(THD *thd, uint event_class,
+                                      const void *event) {}
+#define mysql_audit_general_enabled() 0
+#define mysql_audit_connection_enabled() 0
+#define mysql_audit_table_enabled() 0
+#endif
+extern my_bool mysql_audit_release_required(THD *thd);
+extern void mysql_audit_release(THD *thd);
+
+static inline unsigned int strlen_uint(const char *s)
+{
+  return (uint)strlen(s);
+}
+
+static inline unsigned int safe_strlen_uint(const char *s)
+{
+  return (uint)safe_strlen(s);
+}
+
+#define MAX_USER_HOST_SIZE 512
+static inline uint make_user_name(THD *thd, char *buf)
+{
+  const Security_context *sctx= thd->security_ctx;
+  char *end= strxnmov(buf, MAX_USER_HOST_SIZE,
+                  sctx->priv_user[0] ? sctx->priv_user : "", "[",
+                  sctx->user ? sctx->user : "", "] @ ",
+                  sctx->host ? sctx->host : "", " [",
+                  sctx->ip ? sctx->ip : "", "]", NullS);
+  return (uint)(end-buf);
+}
+
+/**
+  Call audit plugins of GENERAL audit class, MYSQL_AUDIT_GENERAL_LOG subtype.
+  
+  @param[in] thd
+  @param[in] time             time that event occurred
+  @param[in] user             User name
+  @param[in] userlen          User name length
+  @param[in] cmd              Command name
+  @param[in] cmdlen           Command name length
+  @param[in] query            Query string
+  @param[in] querylen         Query string length
+*/
+ 
+static inline
+void mysql_audit_general_log(THD *thd, time_t time,
+                             const char *user, uint userlen,
+                             const char *cmd, uint cmdlen,
+                             const char *query, uint querylen)
+{
+  if (mysql_audit_general_enabled())
+  {
+    mysql_event_general event;
+
+    event.event_subclass= MYSQL_AUDIT_GENERAL_LOG;
+    event.general_error_code= 0;
+    event.general_time= time;
+    event.general_user= user;
+    event.general_user_length= userlen;
+    event.general_command= cmd;
+    event.general_command_length= cmdlen;
+    event.general_query= query;
+    event.general_query_length= querylen;
+    event.general_rows= 0;
+
+    if (thd)
+    {
+      event.general_thread_id= (unsigned long)thd->thread_id;
+      event.general_charset= thd->variables.character_set_client;
+      event.database= thd->db;
+      event.query_id= thd->query_id;
+    }
+    else
+    {
+      event.general_thread_id= 0;
+      event.general_charset= global_system_variables.character_set_client;
+      event.database= null_clex_str;
+      event.query_id= 0;
+    }
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_GENERAL_CLASS, &event);
+  }
+}
+
+/**
+  Call audit plugins of GENERAL audit class.
+  event_subtype should be set to one of:
+    MYSQL_AUDIT_GENERAL_ERROR
+    MYSQL_AUDIT_GENERAL_RESULT
+    MYSQL_AUDIT_GENERAL_STATUS
+  
+  @param[in] thd
+  @param[in] event_subtype    Type of general audit event.
+  @param[in] error_code       Error code
+  @param[in] msg              Message
+*/
+static inline
+void mysql_audit_general(THD *thd, uint event_subtype,
+                         int error_code, const char *msg)
+{
+  DBUG_ENTER("mysql_audit_general");
+  if (mysql_audit_general_enabled())
+  {
+    char user_buff[MAX_USER_HOST_SIZE+1];
+    mysql_event_general event;
+
+    event.event_subclass= event_subtype;
+    event.general_error_code= error_code;
+    event.general_time= my_time(0);
+    event.general_command= msg;
+    event.general_command_length= safe_strlen_uint(msg);
+
+    if (thd)
+    {
+      event.general_user= user_buff;
+      event.general_user_length= make_user_name(thd, user_buff);
+      event.general_thread_id= (unsigned long)thd->thread_id;
+      event.general_query= thd->query_string.str();
+      event.general_query_length= (unsigned) thd->query_string.length();
+      event.general_charset= thd->query_string.charset();
+      event.general_rows= thd->get_stmt_da()->current_row_for_warning();
+      event.database= thd->db;
+      event.query_id= thd->query_id;
+    }
+    else
+    {
+      event.general_user= NULL;
+      event.general_user_length= 0;
+      event.general_thread_id= 0;
+      event.general_query= NULL;
+      event.general_query_length= 0;
+      event.general_charset= &my_charset_bin;
+      event.general_rows= 0;
+      event.database= null_clex_str;
+      event.query_id= 0;
+    }
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_GENERAL_CLASS, &event);
+  }
+  DBUG_VOID_RETURN;
+}
+
+static inline
+void mysql_audit_notify_connection_connect(THD *thd)
+{
+  if (mysql_audit_connection_enabled())
+  {
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_connection event;
+
+    event.event_subclass= MYSQL_AUDIT_CONNECTION_CONNECT;
+    event.status= thd->get_stmt_da()->is_error() ?
+                  thd->get_stmt_da()->sql_errno() : 0;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= sctx->user;
+    event.user_length= safe_strlen_uint(sctx->user);
+    event.priv_user= sctx->priv_user;
+    event.priv_user_length= strlen_uint(sctx->priv_user);
+    event.external_user= sctx->external_user;
+    event.external_user_length= safe_strlen_uint(sctx->external_user);
+    event.proxy_user= sctx->proxy_user;
+    event.proxy_user_length= strlen_uint(sctx->proxy_user);
+    event.host= sctx->host;
+    event.host_length= safe_strlen_uint(sctx->host);
+    event.ip= sctx->ip;
+    event.ip_length= safe_strlen_uint(sctx->ip);
+    event.database= thd->db;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_CONNECTION_CLASS, &event);
+  }
+}
+
+static inline
+void mysql_audit_notify_connection_disconnect(THD *thd, int errcode)
+{
+  if (mysql_audit_connection_enabled())
+  {
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_connection event;
+
+    event.event_subclass= MYSQL_AUDIT_CONNECTION_DISCONNECT;
+    event.status= errcode;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= sctx->user;
+    event.user_length= safe_strlen_uint(sctx->user);
+    event.priv_user= sctx->priv_user;
+    event.priv_user_length= strlen_uint(sctx->priv_user);
+    event.external_user= sctx->external_user;
+    event.external_user_length= safe_strlen_uint(sctx->external_user);
+    event.proxy_user= sctx->proxy_user;
+    event.proxy_user_length= strlen_uint(sctx->proxy_user);
+    event.host= sctx->host;
+    event.host_length= safe_strlen_uint(sctx->host);
+    event.ip= sctx->ip;
+    event.ip_length= safe_strlen_uint(sctx->ip) ;
+    event.database= thd->db;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_CONNECTION_CLASS, &event);
+  }
+}
+
+static inline
+void mysql_audit_notify_connection_change_user(THD *thd,
+                                               const Security_context *old_ctx)
+{
+  if (mysql_audit_connection_enabled())
+  {
+    mysql_event_connection event;
+
+    event.event_subclass= MYSQL_AUDIT_CONNECTION_CHANGE_USER;
+    event.status= thd->get_stmt_da()->is_error() ?
+                  thd->get_stmt_da()->sql_errno() : 0;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= old_ctx->user;
+    event.user_length= safe_strlen_uint(old_ctx->user);
+    event.priv_user= old_ctx->priv_user;
+    event.priv_user_length= strlen_uint(old_ctx->priv_user);
+    event.external_user= old_ctx->external_user;
+    event.external_user_length= safe_strlen_uint(old_ctx->external_user);
+    event.proxy_user= old_ctx->proxy_user;
+    event.proxy_user_length= strlen_uint(old_ctx->proxy_user);
+    event.host= old_ctx->host;
+    event.host_length= safe_strlen_uint(old_ctx->host);
+    event.ip= old_ctx->ip;
+    event.ip_length= safe_strlen_uint(old_ctx->ip);
+    event.database= thd->db;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_CONNECTION_CLASS, &event);
+  }
+}
+
+
+static inline
+void mysql_audit_external_lock_ex(THD *thd, my_thread_id thread_id,
+    const char *user, const char *host, const char *ip, query_id_t query_id,
+    TABLE_SHARE *share, int lock)
+{
+  if (lock != F_UNLCK && mysql_audit_table_enabled())
+  {
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_table event;
+
+    event.event_subclass= MYSQL_AUDIT_TABLE_LOCK;
+    event.read_only= lock == F_RDLCK;
+    event.thread_id= (unsigned long)thread_id;
+    event.user= user;
+    event.priv_user= sctx->priv_user;
+    event.priv_host= sctx->priv_host;
+    event.external_user= sctx->external_user;
+    event.proxy_user= sctx->proxy_user;
+    event.host= host;
+    event.ip= ip;
+    event.database= share->db;
+    event.table= share->table_name;
+    event.new_database= null_clex_str;
+    event.new_table= null_clex_str;
+    event.query_id= query_id;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_TABLE_CLASS, &event);
+  }
+}
+
+static inline
+void mysql_audit_external_lock(THD *thd, TABLE_SHARE *share, int lock)
+{
+  mysql_audit_external_lock_ex(thd, thd->thread_id, thd->security_ctx->user,
+      thd->security_ctx->host, thd->security_ctx->ip, thd->query_id,
+      share, lock);
+}
+
+static inline
+void mysql_audit_create_table(TABLE *table)
+{
+  if (mysql_audit_table_enabled())
+  {
+    THD *thd= table->in_use;
+    const TABLE_SHARE *share= table->s;
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_table event;
+
+    event.event_subclass= MYSQL_AUDIT_TABLE_CREATE;
+    event.read_only= 0;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= sctx->user;
+    event.priv_user= sctx->priv_user;
+    event.priv_host= sctx->priv_host;
+    event.external_user= sctx->external_user;
+    event.proxy_user= sctx->proxy_user;
+    event.host= sctx->host;
+    event.ip= sctx->ip;
+    event.database=     share->db;
+    event.table=        share->table_name;
+    event.new_database= null_clex_str;
+    event.new_table=    null_clex_str;
+    event.query_id=     thd->query_id;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_TABLE_CLASS, &event);
+  }
+}
+
+static inline
+void mysql_audit_drop_table(THD *thd, TABLE_LIST *table)
+{
+  if (mysql_audit_table_enabled())
+  {
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_table event;
+
+    event.event_subclass= MYSQL_AUDIT_TABLE_DROP;
+    event.read_only= 0;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= sctx->user;
+    event.priv_user= sctx->priv_user;
+    event.priv_host= sctx->priv_host;
+    event.external_user= sctx->external_user;
+    event.proxy_user= sctx->proxy_user;
+    event.host= sctx->host;
+    event.ip= sctx->ip;
+    event.database=     table->db;
+    event.table=        table->table_name;
+    event.new_database= null_clex_str;
+    event.new_table=    null_clex_str;
+    event.query_id=     thd->query_id;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_TABLE_CLASS, &event);
+  }
+}
+
+static inline
+void mysql_audit_rename_table(THD *thd, const LEX_CSTRING *old_db,
+                              const LEX_CSTRING *old_tb,
+                              const LEX_CSTRING *new_db, const LEX_CSTRING *new_tb)
+{
+  if (mysql_audit_table_enabled())
+  {
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_table event;
+
+    event.event_subclass= MYSQL_AUDIT_TABLE_RENAME;
+    event.read_only= 0;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= sctx->user;
+    event.priv_user= sctx->priv_user;
+    event.priv_host= sctx->priv_host;
+    event.external_user= sctx->external_user;
+    event.proxy_user= sctx->proxy_user;
+    event.host= sctx->host;
+    event.ip= sctx->ip;
+    event.database=  *old_db;
+    event.table=     *old_tb;
+    event.new_database= *new_db;
+    event.new_table= *new_tb;
+    event.query_id= thd->query_id;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_TABLE_CLASS, &event);
+  }
+}
+
+static inline
+void mysql_audit_alter_table(THD *thd, TABLE_LIST *table)
+{
+  if (mysql_audit_table_enabled())
+  {
+    const Security_context *sctx= thd->security_ctx;
+    mysql_event_table event;
+
+    event.event_subclass= MYSQL_AUDIT_TABLE_ALTER;
+    event.read_only= 0;
+    event.thread_id= (unsigned long)thd->thread_id;
+    event.user= sctx->user;
+    event.priv_user= sctx->priv_user;
+    event.priv_host= sctx->priv_host;
+    event.external_user= sctx->external_user;
+    event.proxy_user= sctx->proxy_user;
+    event.host= sctx->host;
+    event.ip= sctx->ip;
+    event.database= table->db;
+    event.table= table->table_name;
+    event.new_database= null_clex_str;
+    event.new_table= null_clex_str;
+    event.query_id= thd->query_id;
+
+    mysql_audit_notify(thd, MYSQL_AUDIT_TABLE_CLASS, &event);
+  }
+}
+
+#endif /* SQL_AUDIT_INCLUDED */
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
new file mode 100644
index 00000000..a0656e48
--- /dev/null
+++ b/sql/sql_base.cc
@@ -0,0 +1,9738 @@
+/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
+   Copyright (c) 2010, 2022, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
+
+
+/* Basic functions needed by many modules */
+
+#include "mariadb.h"
+#include "sql_base.h"                           // setup_table_map
+#include "sql_priv.h"
+#include "unireg.h"
+#include "debug_sync.h"
+#include "lock.h"        // mysql_lock_remove,
+                         // mysql_unlock_tables,
+                         // mysql_lock_have_duplicate
+#include "sql_show.h"    // append_identifier
+#include "strfunc.h"     // find_type
+#include "sql_view.h"    // mysql_make_view, VIEW_ANY_ACL
+#include "sql_parse.h"   // check_table_access
+#include "sql_insert.h"  // kill_delayed_threads
+#include "sql_partition.h"               // ALTER_PARTITION_PARAM_TYPE
+#include "sql_derived.h" // mysql_derived_prepare,
+                         // mysql_handle_derived,
+                         // mysql_derived_filling
+#include "sql_handler.h" // mysql_ha_flush
+#include "sql_test.h"
+#include "sql_partition.h"                      // ALTER_PARTITION_PARAM_TYPE
+#include "log_event.h"                          // Query_log_event
+#include "sql_select.h"
+#include "sp_head.h"
+#include "sp.h"
+#include "sp_cache.h"
+#include "sql_trigger.h"
+#include "transaction.h"
+#include "sql_prepare.h"
+#include "sql_statistics.h"
+#include "sql_cte.h"
+#include 
+#include 
+#include 
+#include "rpl_filter.h"
+#include "sql_table.h"                          // build_table_filename
+#include "datadict.h"   // dd_frm_is_view()
+#include "rpl_rli.h"   // rpl_group_info
+#ifdef  _WIN32
+#include 
+#endif
+#include "wsrep_mysqld.h"
+#ifdef WITH_WSREP
+#include "wsrep_thd.h"
+#include "wsrep_trans_observer.h"
+#endif /* WITH_WSREP */
+
+bool
+No_such_table_error_handler::handle_condition(THD *,
+                                              uint sql_errno,
+                                              const char*,
+                                              Sql_condition::enum_warning_level *level,
+                                              const char*,
+                                              Sql_condition ** cond_hdl)
+{
+  *cond_hdl= NULL;
+  if (!first_error)
+    first_error= sql_errno;
+  if (sql_errno == ER_NO_SUCH_TABLE || sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE)
+  {
+    m_handled_errors++;
+    return TRUE;
+  }
+
+  if (*level == Sql_condition::WARN_LEVEL_ERROR)
+    m_unhandled_errors++;
+  return FALSE;
+}
+
+
+bool No_such_table_error_handler::safely_trapped_errors()
+{
+  /*
+    If m_unhandled_errors != 0, something else, unanticipated, happened,
+    so the error is not trapped but returned to the caller.
+    Multiple ER_NO_SUCH_TABLE can be raised in case of views.
+  */
+  return ((m_handled_errors > 0) && (m_unhandled_errors == 0));
+}
+
+/**
+  This internal handler is used to trap ER_NO_SUCH_TABLE and
+  ER_WRONG_MRG_TABLE errors during CHECK/REPAIR TABLE for MERGE
+  tables.
+*/
+
+class Repair_mrg_table_error_handler : public Internal_error_handler
+{
+public:
+  Repair_mrg_table_error_handler()
+    : m_handled_errors(false), m_unhandled_errors(false)
+  {}
+
+  bool handle_condition(THD *thd,
+                        uint sql_errno,
+                        const char* sqlstate,
+                        Sql_condition::enum_warning_level *level,
+                        const char* msg,
+                        Sql_condition ** cond_hdl);
+
+  /**
+    Returns TRUE if there were ER_NO_SUCH_/WRONG_MRG_TABLE and there
+    were no unhandled errors. FALSE otherwise.
+  */
+  bool safely_trapped_errors()
+  {
+    /*
+      Check for m_handled_errors is here for extra safety.
+      It can be useful in situation when call to open_table()
+      fails because some error which was suppressed by another
+      error handler (e.g. in case of MDL deadlock which we
+      decided to solve by back-off and retry).
+    */
+    return (m_handled_errors && (! m_unhandled_errors));
+  }
+
+private:
+  bool m_handled_errors;
+  bool m_unhandled_errors;
+};
+
+
+bool
+Repair_mrg_table_error_handler::handle_condition(THD *,
+                                                 uint sql_errno,
+                                                 const char*,
+                                                 Sql_condition::enum_warning_level *level,
+                                                 const char*,
+                                                 Sql_condition ** cond_hdl)
+{
+  *cond_hdl= NULL;
+  if (sql_errno == ER_NO_SUCH_TABLE ||
+      sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE ||
+      sql_errno == ER_WRONG_MRG_TABLE)
+  {
+    m_handled_errors= true;
+    return TRUE;
+  }
+
+  m_unhandled_errors= true;
+  return FALSE;
+}
+
+
+/**
+  @defgroup Data_Dictionary Data Dictionary
+  @{
+*/
+
+static bool check_and_update_table_version(THD *thd, TABLE_LIST *tables,
+                                           TABLE_SHARE *table_share);
+static bool open_table_entry_fini(THD *thd, TABLE_SHARE *share, TABLE *entry);
+static bool auto_repair_table(THD *thd, TABLE_LIST *table_list);
+
+
+/**
+  Get table cache key for a table list element.
+
+  @param table_list[in]  Table list element.
+  @param key[out]        On return points to table cache key for the table.
+
+  @note Unlike create_table_def_key() call this function doesn't construct
+        key in a buffer provided by caller. Instead it relies on the fact
+        that table list element for which key is requested has properly
+        initialized MDL_request object and the fact that table definition
+        cache key is suffix of key used in MDL subsystem. So to get table
+        definition key it simply needs to return pointer to appropriate
+        part of MDL_key object nested in this table list element.
+        Indeed, this means that lifetime of key produced by this call is
+        limited by the lifetime of table list element which it got as
+        parameter.
+
+  @return Length of key.
+*/
+
+uint get_table_def_key(const TABLE_LIST *table_list, const char **key)
+{
+  /*
+    This call relies on the fact that TABLE_LIST::mdl_request::key object
+    is properly initialized, so table definition cache can be produced
+    from key used by MDL subsystem.
+  */
+  DBUG_ASSERT(!strcmp(table_list->get_db_name(),
+                      table_list->mdl_request.key.db_name()));
+  DBUG_ASSERT(!strcmp(table_list->get_table_name(),
+                      table_list->mdl_request.key.name()));
+
+  *key= (const char*)table_list->mdl_request.key.ptr() + 1;
+  return table_list->mdl_request.key.length() - 1;
+}
+
+
+
+/*****************************************************************************
+  Functions to handle table definition cache (TABLE_SHARE)
+*****************************************************************************/
+
+/*
+  Create a list for all open tables matching SQL expression
+
+  SYNOPSIS
+    list_open_tables()
+    thd			Thread THD
+    wild		SQL like expression
+
+  NOTES
+    One gets only a list of tables for which one has any kind of privilege.
+    db and table names are allocated in result struct, so one doesn't need
+    a lock when traversing the return list.
+
+  RETURN VALUES
+    NULL	Error (Probably OOM)
+    #		Pointer to list of names of open tables.
+*/
+
+struct list_open_tables_arg
+{
+  THD *thd;
+  const char *db;
+  const char *wild;
+  TABLE_LIST table_list;
+  OPEN_TABLE_LIST **start_list, *open_list;
+};
+
+
+static my_bool list_open_tables_callback(TDC_element *element,
+                                         list_open_tables_arg *arg)
+{
+  const char *db= (char*) element->m_key;
+  size_t db_length= strlen(db);
+  const char *table_name= db + db_length + 1;
+
+  if (arg->db && my_strcasecmp(system_charset_info, arg->db, db))
+    return FALSE;
+  if (arg->wild && wild_compare(table_name, arg->wild, 0))
+    return FALSE;
+
+  /* Check if user has SELECT privilege for any column in the table */
+  arg->table_list.db.str= db;
+  arg->table_list.db.length= db_length;
+  arg->table_list.table_name.str= table_name;
+  arg->table_list.table_name.length= strlen(table_name);
+  arg->table_list.grant.privilege= NO_ACL;
+
+  if (check_table_access(arg->thd, SELECT_ACL, &arg->table_list, TRUE, 1, TRUE))
+    return FALSE;
+
+  if (!(*arg->start_list= (OPEN_TABLE_LIST *) arg->thd->alloc(
+                    sizeof(**arg->start_list) + element->m_key_length)))
+    return TRUE;
+
+  strmov((*arg->start_list)->table=
+         strmov(((*arg->start_list)->db= (char*) ((*arg->start_list) + 1)),
+                db) + 1, table_name);
+  (*arg->start_list)->in_use= 0;
+
+  mysql_mutex_lock(&element->LOCK_table_share);
+  All_share_tables_list::Iterator it(element->all_tables);
+  TABLE *table;
+  while ((table= it++))
+    if (table->in_use)
+      ++(*arg->start_list)->in_use;
+  mysql_mutex_unlock(&element->LOCK_table_share);
+  (*arg->start_list)->locked= 0;                   /* Obsolete. */
+  arg->start_list= &(*arg->start_list)->next;
+  *arg->start_list= 0;
+  return FALSE;
+}
+
+
+OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild)
+{
+  list_open_tables_arg argument;
+  DBUG_ENTER("list_open_tables");
+
+  argument.thd= thd;
+  argument.db= db;
+  argument.wild= wild;
+  bzero((char*) &argument.table_list, sizeof(argument.table_list));
+  argument.start_list= &argument.open_list;
+  argument.open_list= 0;
+
+  if (tdc_iterate(thd, (my_hash_walk_action) list_open_tables_callback,
+                  &argument, true))
+    DBUG_RETURN(0);
+
+  DBUG_RETURN(argument.open_list);
+}
+
+
+/**
+   Close all tables that are not in use in table definition cache
+*/
+
+void purge_tables()
+{
+  /*
+    Force close of all open tables.
+
+    Note that code in TABLE_SHARE::wait_for_old_version() assumes that
+    incrementing of refresh_version is followed by purge of unused table
+    shares.
+  */
+  kill_delayed_threads();
+  /*
+    Get rid of all unused TABLE and TABLE_SHARE instances. By doing
+    this we automatically close all tables which were marked as "old".
+  */
+  tc_purge();
+  /* Free table shares which were not freed implicitly by loop above. */
+  tdc_purge(true);
+}
+
+
+/**
+   close_cached_tables
+
+   This function has two separate usages:
+   1) Close not used tables in the table cache to free memory
+   2) Close a list of tables and wait until they are not used anymore. This
+      is used mainly when preparing a table for export.
+
+   If there are locked tables, they are closed and reopened before
+   function returns. This is done to ensure that table files will be closed
+   by all threads and thus external copyable when FLUSH TABLES returns.
+*/
+
+bool close_cached_tables(THD *thd, TABLE_LIST *tables,
+                         bool wait_for_refresh, ulong timeout)
+{
+  DBUG_ENTER("close_cached_tables");
+  DBUG_ASSERT(thd || (!wait_for_refresh && !tables));
+  DBUG_ASSERT(wait_for_refresh || !tables);
+
+  if (!tables)
+  {
+    /* Free tables that are not used */
+    purge_tables();
+    if (!wait_for_refresh)
+      DBUG_RETURN(false);
+  }
+
+  DBUG_PRINT("info", ("open table definitions: %d",
+                      (int) tdc_records()));
+
+  if (thd->locked_tables_mode)
+  {
+    /*
+      If we are under LOCK TABLES, we need to reopen the tables without
+      opening a door for any concurrent threads to sneak in and get
+      lock on our tables. To achieve this we use exclusive metadata
+      locks.
+    */
+    TABLE_LIST *tables_to_reopen= (tables ? tables :
+                                  thd->locked_tables_list.locked_tables());
+    bool result= false;
+
+    /* close open HANDLER for this thread to allow table to be closed */
+    mysql_ha_flush_tables(thd, tables_to_reopen);
+
+    for (TABLE_LIST *table_list= tables_to_reopen; table_list;
+         table_list= table_list->next_global)
+    {
+      int err;
+      /* A check that the table was locked for write is done by the caller. */
+      TABLE *table= find_table_for_mdl_upgrade(thd, table_list->db.str,
+                                            table_list->table_name.str, &err);
+
+      /* May return NULL if this table has already been closed via an alias. */
+      if (! table)
+        continue;
+
+      if (wait_while_table_is_used(thd, table,
+                                   HA_EXTRA_PREPARE_FOR_FORCED_CLOSE))
+      {
+        result= true;
+        break;
+      }
+      close_all_tables_for_name(thd, table->s, HA_EXTRA_NOT_USED, NULL);
+    }
+    /*
+      No other thread has the locked tables open; reopen them and get the
+      old locks. This should always succeed (unless some external process
+      has removed the tables)
+    */
+    if (thd->locked_tables_list.reopen_tables(thd, false))
+      result= true;
+
+    /*
+      Since downgrade_lock() won't do anything with shared
+      metadata lock it is much simpler to go through all open tables rather
+      than picking only those tables that were flushed.
+    */
+    for (TABLE *tab= thd->open_tables; tab; tab= tab->next)
+      tab->mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE);
+
+    DBUG_RETURN(result);
+  }
+  else if (tables)
+  {
+    /*
+      Get an explicit MDL lock for all requested tables to ensure they are
+      not used by any other thread
+    */
+    MDL_request_list mdl_requests;
+
+    DBUG_PRINT("info", ("Waiting for other threads to close their open tables"));
+    DEBUG_SYNC(thd, "after_flush_unlock");
+
+    /* close open HANDLER for this thread to allow table to be closed */
+    mysql_ha_flush_tables(thd, tables);
+
+    for (TABLE_LIST *table= tables; table; table= table->next_local)
+    {
+      MDL_request *mdl_request= new (thd->mem_root) MDL_request;
+      if (mdl_request == NULL)
+        DBUG_RETURN(true);
+      MDL_REQUEST_INIT_BY_KEY(mdl_request, &table->mdl_request.key,
+                              MDL_EXCLUSIVE, MDL_STATEMENT);
+      mdl_requests.push_front(mdl_request);
+    }
+
+    if (thd->mdl_context.acquire_locks(&mdl_requests, timeout))
+      DBUG_RETURN(true);
+
+    for (TABLE_LIST *table= tables; table; table= table->next_local)
+      tdc_remove_table(thd, table->db.str, table->table_name.str);
+  }
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Collect all shares that has open tables
+*/
+
+struct tc_collect_arg
+{
+  DYNAMIC_ARRAY shares;
+  flush_tables_type flush_type;
+};
+
+static my_bool tc_collect_used_shares(TDC_element *element,
+                                      tc_collect_arg *arg)
+{
+  my_bool result= FALSE;
+
+  DYNAMIC_ARRAY *shares= &arg->shares;
+  mysql_mutex_lock(&element->LOCK_table_share);
+  if (element->ref_count > 0 && !element->share->is_view)
+  {
+    DBUG_ASSERT(element->share);
+    bool do_flush= 0;
+    switch (arg->flush_type) {
+    case FLUSH_ALL:
+      do_flush= 1;
+      break;
+    case FLUSH_NON_TRANS_TABLES:
+      if (!element->share->online_backup &&
+          element->share->table_category == TABLE_CATEGORY_USER)
+        do_flush= 1;
+      break;
+    case FLUSH_SYS_TABLES:
+      if (!element->share->online_backup &&
+          element->share->table_category != TABLE_CATEGORY_USER)
+        do_flush= 1;
+    }
+    if (do_flush)
+    {
+      element->ref_count++;                       // Protect against delete
+      if (push_dynamic(shares, (uchar*) &element->share))
+        result= TRUE;
+    }
+  }
+  mysql_mutex_unlock(&element->LOCK_table_share);
+  return result;
+}
+
+
+/*
+  Ignore errors from opening read only tables
+*/
+
+class flush_tables_error_handler : public Internal_error_handler
+{
+public:
+  int handled_errors;
+  int unhandled_errors;
+  flush_tables_error_handler() : handled_errors(0), unhandled_errors(0)
+  {}
+
+  bool handle_condition(THD *thd,
+                        uint sql_errno,
+                        const char* sqlstate,
+                        Sql_condition::enum_warning_level *level,
+                        const char* msg,
+                        Sql_condition ** cond_hdl)
+  {
+    *cond_hdl= NULL;
+    if (sql_errno == ER_OPEN_AS_READONLY || sql_errno == ER_LOCK_WAIT_TIMEOUT)
+    {
+      handled_errors++;
+      return TRUE;
+    }
+    if (*level == Sql_condition::WARN_LEVEL_ERROR)
+      unhandled_errors++;
+    return FALSE;
+  }
+
+  bool got_fatal_error()
+  {
+    return unhandled_errors > 0;
+  }
+};
+
+
+/**
+   Flush cached table as part of global read lock
+
+   @param thd
+   @param flag   What type of tables should be flushed
+
+   @return 0  ok
+   @return 1  error
+
+   After we get the list of table shares, we will call flush on all
+   possible tables, even if some flush fails.
+*/
+
+bool flush_tables(THD *thd, flush_tables_type flag)
+{
+  bool result= TRUE;
+  tc_collect_arg collect_arg;
+  TABLE *tmp_table;
+  flush_tables_error_handler error_handler;
+  DBUG_ENTER("flush_tables");
+
+  purge_tables();  /* Flush unused tables and shares */
+  DEBUG_SYNC(thd, "after_purge_tables");
+
+  /*
+    Loop over all shares and collect shares that have open tables
+    TODO:
+    Optimize this to only collect shares that have been used for
+    write after last time all tables was closed.
+  */
+
+  if (!(tmp_table= (TABLE*) my_malloc(PSI_INSTRUMENT_ME, sizeof(*tmp_table),
+                                      MYF(MY_WME | MY_THREAD_SPECIFIC))))
+    DBUG_RETURN(1);
+
+  my_init_dynamic_array(PSI_INSTRUMENT_ME, &collect_arg.shares,
+                        sizeof(TABLE_SHARE*), 100, 100, MYF(0));
+  collect_arg.flush_type= flag;
+  if (tdc_iterate(thd, (my_hash_walk_action) tc_collect_used_shares,
+                  &collect_arg, true))
+  {
+    /* Release already collected shares */
+    for (uint i= 0 ; i < collect_arg.shares.elements ; i++)
+    {
+      TABLE_SHARE *share= *dynamic_element(&collect_arg.shares, i,
+                                           TABLE_SHARE**);
+      tdc_release_share(share);
+    }
+    goto err;
+  }
+
+  /* Call HA_EXTRA_FLUSH on all found shares */
+
+  thd->push_internal_handler(&error_handler);
+  for (uint i= 0 ; i < collect_arg.shares.elements ; i++)
+  {
+    TABLE_SHARE *share= *dynamic_element(&collect_arg.shares, i,
+                                         TABLE_SHARE**);
+    TABLE *table= tc_acquire_table(thd, share->tdc);
+    if (table)
+    {
+      (void) table->file->extra(HA_EXTRA_FLUSH);
+      DEBUG_SYNC(table->in_use, "before_tc_release_table");
+      tc_release_table(table);
+    }
+    else
+    {
+      /*
+        No free TABLE instances available. We have to open a new one.
+
+        Try to take a MDL lock to ensure we can open a new table instance.
+        If the lock fails, it means that some DDL operation or flush tables
+        with read lock is ongoing.
+        In this case we cannot sending the HA_EXTRA_FLUSH signal.
+      */
+
+      MDL_request mdl_request;
+      MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE,
+                       share->db.str,
+                       share->table_name.str,
+                       MDL_SHARED, MDL_EXPLICIT);
+
+      if (!thd->mdl_context.acquire_lock(&mdl_request, 0))
+      {
+        /*
+          HA_OPEN_FOR_FLUSH is used to allow us to open the table even if
+          TABLE_SHARE::incompatible_version is set. It also will tell
+          SEQUENCE engine that we don't have to read the sequence information
+          (which may cause deadlocks with concurrently running ALTER TABLE or
+          ALTER SEQUENCE) as we will close the table at once.
+        */
+        if (!open_table_from_share(thd, share, &empty_clex_str,
+                                   HA_OPEN_KEYFILE, 0,
+                                   HA_OPEN_FOR_ALTER | HA_OPEN_FOR_FLUSH,
+                                   tmp_table, FALSE,
+                                   NULL))
+        {
+          (void) tmp_table->file->extra(HA_EXTRA_FLUSH);
+          /*
+            We don't put the table into the TDC as the table was not fully
+            opened (we didn't open triggers)
+          */
+          closefrm(tmp_table);
+        }
+        thd->mdl_context.release_lock(mdl_request.ticket);
+      }
+    }
+    tdc_release_share(share);
+  }
+  thd->pop_internal_handler();
+  result= error_handler.got_fatal_error();
+  DBUG_PRINT("note", ("open_errors: %u %u",
+                      error_handler.handled_errors,
+                      error_handler.unhandled_errors));
+err:
+  my_free(tmp_table);
+  delete_dynamic(&collect_arg.shares);
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Mark all tables in the list which were used by current substatement
+  as free for reuse.
+
+  SYNOPSIS
+    mark_used_tables_as_free_for_reuse()
+      thd   - thread context
+      table - head of the list of tables
+
+  DESCRIPTION
+    Marks all tables in the list which were used by current substatement
+    (they are marked by its query_id) as free for reuse.
+
+    Clear 'check_table_binlog_row_based_done' flag. For tables which were used
+    by current substatement the flag is cleared as part of 'ha_reset()' call.
+    For the rest of the open tables not used by current substament if this
+    flag is enabled as part of current substatement execution,
+    (for example when THD::binlog_write_table_maps() calls
+    prepare_for_row_logging()), clear the flag explicitly.
+
+  NOTE
+    The reason we reset query_id is that it's not enough to just test
+    if table->query_id != thd->query_id to know if a table is in use.
+
+    For example
+    SELECT f1_that_uses_t1() FROM t1;
+    In f1_that_uses_t1() we will see one instance of t1 where query_id is
+    set to query_id of original query.
+*/
+
+static void mark_used_tables_as_free_for_reuse(THD *thd, TABLE *table)
+{
+  DBUG_ENTER("mark_used_tables_as_free_for_reuse");
+  for (; table ; table= table->next)
+  {
+    DBUG_ASSERT(table->pos_in_locked_tables == NULL ||
+                table->pos_in_locked_tables->table == table);
+    if (table->query_id == thd->query_id)
+    {
+      table->query_id= 0;
+      table->file->ha_reset();
+    }
+    else
+      table->file->clear_cached_table_binlog_row_based_flag();
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Close all open instances of the table but keep the MDL lock.
+
+  Works both under LOCK TABLES and in the normal mode.
+  Removes all closed instances of the table from the table cache.
+
+  @param     thd     thread handle
+  @param[in] share   table share, but is just a handy way to
+                     access the table cache key
+
+  @param[in] extra
+                     HA_EXTRA_PREPARE_FOR_DROP
+                        - The table is dropped
+                     HA_EXTRA_PREPARE_FOR_RENAME
+                        - The table is renamed
+                     HA_EXTRA_NOT_USED
+                        - The table is marked as closed in the
+                          locked_table_list but kept there so one can call
+                          locked_table_list->reopen_tables() to put it back.
+
+                     In case of drop/rename the documented behavior is to
+                     implicitly remove the table from LOCK TABLES
+                     list. 
+
+  @pre Must be called with an X MDL lock on the table.
+*/
+
+void
+close_all_tables_for_name(THD *thd, TABLE_SHARE *share,
+                          ha_extra_function extra,
+                          TABLE *skip_table)
+{
+  DBUG_ASSERT(!share->tmp_table);
+  DBUG_ASSERT(share->tdc->flushed);
+
+  char key[MAX_DBKEY_LENGTH];
+  size_t key_length= share->table_cache_key.length;
+  bool remove_from_locked_tables= extra != HA_EXTRA_NOT_USED;
+
+  memcpy(key, share->table_cache_key.str, key_length);
+
+  for (TABLE **prev= &thd->open_tables; *prev; )
+  {
+    TABLE *table= *prev;
+
+    if (table->s->table_cache_key.length == key_length &&
+        !memcmp(table->s->table_cache_key.str, key, key_length) &&
+        table != skip_table)
+    {
+      thd->locked_tables_list.unlink_from_list(thd,
+                                               table->pos_in_locked_tables,
+                                               remove_from_locked_tables);
+      /* Inform handler that there is a drop table or a rename going on */
+      if (extra != HA_EXTRA_NOT_USED && table->db_stat)
+      {
+        table->file->extra(extra);
+        extra= HA_EXTRA_NOT_USED;               // Call extra once!
+      }
+
+      /*
+        Does nothing if the table is not locked.
+        This allows one to use this function after a table
+        has been unlocked, e.g. in partition management.
+      */
+      mysql_lock_remove(thd, thd->lock, table);
+      close_thread_table(thd, prev);
+    }
+    else
+    {
+      /* Step to next entry in open_tables list. */
+      prev= &table->next;
+    }
+  }
+}
+
+
+int close_thread_tables_for_query(THD *thd)
+{
+  if (thd->lex && thd->lex->explain)
+    thd->lex->explain->notify_tables_are_closed();
+
+  DBUG_EXECUTE_IF("explain_notify_tables_are_closed",
+                  if (dbug_user_var_equals_str(thd, "show_explain_probe_query",
+                                               thd->query()))
+                      dbug_serve_apcs(thd, 1);
+                 );
+  return close_thread_tables(thd);
+}
+/*
+  Close all tables used by the current substatement, or all tables
+  used by this thread if we are on the upper level.
+
+  SYNOPSIS
+    close_thread_tables()
+    thd			Thread handler
+
+  IMPLEMENTATION
+    Unlocks tables and frees derived tables.
+    Put all normal tables used by thread in free list.
+
+    It will only close/mark as free for reuse tables opened by this
+    substatement, it will also check if we are closing tables after
+    execution of complete query (i.e. we are on upper level) and will
+    leave prelocked mode if needed.
+*/
+
+int close_thread_tables(THD *thd)
+{
+  TABLE *table;
+  int error= 0;
+  DBUG_ENTER("close_thread_tables");
+
+  THD_STAGE_INFO(thd, stage_closing_tables);
+
+#ifdef EXTRA_DEBUG
+  DBUG_PRINT("tcache", ("open tables:"));
+  for (table= thd->open_tables; table; table= table->next)
+    DBUG_PRINT("tcache", ("table: '%s'.'%s' %p", table->s->db.str,
+                          table->s->table_name.str, table));
+#endif
+
+#if defined(ENABLED_DEBUG_SYNC)
+  /* debug_sync may not be initialized for some slave threads */
+  if (thd->debug_sync_control)
+    DEBUG_SYNC(thd, "before_close_thread_tables");
+#endif
+
+  DBUG_ASSERT(thd->transaction->stmt.is_empty() || thd->in_sub_stmt ||
+              (thd->state_flags & Open_tables_state::BACKUPS_AVAIL));
+
+  for (table= thd->open_tables; table; table= table->next)
+  {
+    /* Table might be in use by some outer statement. */
+    DBUG_PRINT("tcache", ("table: '%s'  query_id: %lu",
+                          table->s->table_name.str, (ulong) table->query_id));
+
+    if (thd->locked_tables_mode)
+    {
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+      if (table->part_info && table->part_info->vers_require_hist_part(thd) &&
+          !thd->stmt_arena->is_stmt_prepare())
+        table->part_info->vers_check_limit(thd);
+#endif
+      /*
+        For simple locking we cleanup it here because we don't close thread
+        tables. For prelocking we close it when we do close thread tables.
+      */
+      if (thd->locked_tables_mode != LTM_PRELOCKED)
+        table->vcol_cleanup_expr(thd);
+    }
+
+    /* Detach MERGE children after every statement. Even under LOCK TABLES. */
+    if (thd->locked_tables_mode <= LTM_LOCK_TABLES ||
+        table->query_id == thd->query_id)
+    {
+      DBUG_ASSERT(table->file);
+      table->file->extra(HA_EXTRA_DETACH_CHILDREN);
+    }
+  }
+
+  /*
+    We are assuming here that thd->derived_tables contains ONLY derived
+    tables for this substatement. i.e. instead of approach which uses
+    query_id matching for determining which of the derived tables belong
+    to this substatement we rely on the ability of substatements to
+    save/restore thd->derived_tables during their execution.
+
+    TODO: Probably even better approach is to simply associate list of
+          derived tables with (sub-)statement instead of thread and destroy
+          them at the end of its execution.
+
+    Note: EXPLAIN/ANALYZE depends on derived tables being freed here. See
+    sql_explain.h:ExplainDataStructureLifetime.
+  */
+  if (thd->derived_tables)
+  {
+    TABLE *next;
+    /*
+      Close all derived tables generated in queries like
+      SELECT * FROM (SELECT * FROM t1)
+    */
+    for (table= thd->derived_tables ; table ; table= next)
+    {
+      next= table->next;
+      free_tmp_table(thd, table);
+    }
+    thd->derived_tables= 0;
+  }
+
+  if (thd->rec_tables)
+  {
+    TABLE *next;
+    /*
+      Close all temporary tables created for recursive table references.
+      This action was postponed because the table could be used in the
+      statements like  ANALYZE WITH r AS (...) SELECT * from r
+      where r is defined through recursion. 
+    */
+    for (table= thd->rec_tables ; table ; table= next)
+    {
+      next= table->next;
+      free_tmp_table(thd, table);
+    }
+    thd->rec_tables= 0;
+  }
+
+  /*
+    Mark all temporary tables used by this statement as free for reuse.
+  */
+  thd->mark_tmp_tables_as_free_for_reuse();
+
+  if (thd->locked_tables_mode)
+  {
+
+    /* Ensure we are calling ha_reset() for all used tables */
+    mark_used_tables_as_free_for_reuse(thd, thd->open_tables);
+
+    /*
+      We are under simple LOCK TABLES or we're inside a sub-statement
+      of a prelocked statement, so should not do anything else.
+
+      Note that even if we are in LTM_LOCK_TABLES mode and statement
+      requires prelocking (e.g. when we are closing tables after
+      failing ot "open" all tables required for statement execution)
+      we will exit this function a few lines below.
+    */
+    if (! thd->lex->requires_prelocking())
+      DBUG_RETURN(0);
+
+    /*
+      We are in the top-level statement of a prelocked statement,
+      so we have to leave the prelocked mode now with doing implicit
+      UNLOCK TABLES if needed.
+    */
+    if (thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES)
+      thd->locked_tables_mode= LTM_LOCK_TABLES;
+
+    if (thd->locked_tables_mode == LTM_LOCK_TABLES)
+      DBUG_RETURN(0);
+
+    thd->leave_locked_tables_mode();
+
+    /* Fallthrough */
+  }
+
+  if (thd->lock)
+  {
+    /*
+      For RBR we flush the pending event just before we unlock all the
+      tables.  This means that we are at the end of a topmost
+      statement, so we ensure that the STMT_END_F flag is set on the
+      pending event.  For statements that are *inside* stored
+      functions, the pending event will not be flushed: that will be
+      handled either before writing a query log event (inside
+      binlog_query()) or when preparing a pending event.
+     */
+    (void)thd->binlog_flush_pending_rows_event(TRUE);
+    error= mysql_unlock_tables(thd, thd->lock);
+    thd->lock=0;
+  }
+  /*
+    Closing a MERGE child before the parent would be fatal if the
+    other thread tries to abort the MERGE lock in between.
+  */
+  while (thd->open_tables)
+    (void) close_thread_table(thd, &thd->open_tables);
+
+  DBUG_RETURN(error);
+}
+
+
+/* move one table to free list */
+
+void close_thread_table(THD *thd, TABLE **table_ptr)
+{
+  TABLE *table= *table_ptr;
+  handler *file= table->file;
+  DBUG_ENTER("close_thread_table");
+  DBUG_PRINT("tcache", ("table: '%s'.'%s' %p", table->s->db.str,
+                        table->s->table_name.str, table));
+  DBUG_ASSERT(!file->keyread_enabled());
+  DBUG_ASSERT(file->inited == handler::NONE);
+
+  /*
+    The metadata lock must be released after giving back
+    the table to the table cache.
+  */
+  DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE,
+                                             table->s->db.str,
+                                             table->s->table_name.str,
+                                             MDL_SHARED) ||
+              thd->mdl_context.is_lock_warrantee(MDL_key::TABLE,
+                                                 table->s->db.str,
+                                                 table->s->table_name.str,
+                                                 MDL_SHARED));
+  table->vcol_cleanup_expr(thd);
+  table->mdl_ticket= NULL;
+
+  file->update_global_table_stats();
+  file->update_global_index_stats();
+  if (unlikely(thd->variables.log_slow_verbosity &
+               LOG_SLOW_VERBOSITY_ENGINE) &&
+      likely(file->handler_stats))
+  {
+    Exec_time_tracker *tracker;
+    if ((tracker= file->get_time_tracker()))
+      file->handler_stats->engine_time+= tracker->get_cycles();
+    thd->handler_stats.add(file->handler_stats);
+  }
+  /*
+    This look is needed to allow THD::notify_shared_lock() to
+    traverse the thd->open_tables list without having to worry that
+    some of the tables are removed from under it
+  */
+
+  mysql_mutex_lock(&thd->LOCK_thd_data);
+  *table_ptr=table->next;
+  mysql_mutex_unlock(&thd->LOCK_thd_data);
+
+  if (! table->needs_reopen())
+  {
+    /* Avoid having MERGE tables with attached children in table cache. */
+    file->extra(HA_EXTRA_DETACH_CHILDREN);
+    /* Free memory and reset for next loop. */
+    free_field_buffers_larger_than(table, MAX_TDC_BLOB_SIZE);
+    file->ha_reset();
+  }
+
+  /*
+    Do this *before* entering the TABLE_SHARE::tdc.LOCK_table_share
+    critical section.
+  */
+  MYSQL_UNBIND_TABLE(file);
+
+  tc_release_table(table);
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Find table in list.
+
+  SYNOPSIS
+    find_table_in_list()
+    table		Pointer to table list
+    offset		Offset to which list in table structure to use
+    db_name		Data base name
+    table_name		Table name
+
+  NOTES:
+    This is called by find_table_in_global_list().
+
+  RETURN VALUES
+    NULL	Table not found
+    #		Pointer to found table.
+*/
+
+TABLE_LIST *find_table_in_list(TABLE_LIST *table,
+                               TABLE_LIST *TABLE_LIST::*link,
+                               const LEX_CSTRING *db_name,
+                               const LEX_CSTRING *table_name)
+{
+  for (; table; table= table->*link )
+  {
+    if (cmp(&table->db, db_name) == 0 &&
+        cmp(&table->table_name, table_name) == 0)
+      break;
+  }
+  return table;
+}
+
+
+/**
+  Test that table is unique (It's only exists once in the table list)
+
+  @param  thd                   thread handle
+  @param  table                 table which should be checked
+  @param  table_list            list of tables
+  @param  check_flag            whether to check tables' aliases
+                                Currently this is only used by INSERT
+
+  NOTE: to exclude derived tables from check we use following mechanism:
+    a) during derived table processing set THD::derived_tables_processing
+    b) JOIN::prepare set SELECT::exclude_from_table_unique_test if
+       THD::derived_tables_processing set. (we can't use JOIN::execute
+       because for PS we perform only JOIN::prepare, but we can't set this
+       flag in JOIN::prepare if we are not sure that we are in derived table
+       processing loop, because multi-update call fix_fields() for some its
+       items (which mean JOIN::prepare for subqueries) before unique_table
+       call to detect which tables should be locked for write).
+    c) find_dup_table skip all tables which belong to SELECT with
+       SELECT::exclude_from_table_unique_test set.
+    Also SELECT::exclude_from_table_unique_test used to exclude from check
+    tables of main SELECT of multi-delete and multi-update
+
+    We also skip tables with TABLE_LIST::prelocking_placeholder set,
+    because we want to allow SELECTs from them, and their modification
+    will rise the error anyway.
+
+    TODO: when we will have table/view change detection we can do this check
+          only once for PS/SP
+
+  @retval !=0  found duplicate
+  @retval 0 if table is unique
+*/
+
+static
+TABLE_LIST* find_dup_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list,
+                           uint check_flag)
+{
+  TABLE_LIST *res= 0;
+  LEX_CSTRING *d_name, *t_name, *t_alias;
+  DBUG_ENTER("find_dup_table");
+  DBUG_PRINT("enter", ("table alias: %s", table->alias.str));
+
+  /*
+    If this function called for query which update table (INSERT/UPDATE/...)
+    then we have in table->table pointer to TABLE object which we are
+    updating even if it is VIEW so we need TABLE_LIST of this TABLE object
+    to get right names (even if lower_case_table_names used).
+
+    If this function called for CREATE command that we have not opened table
+    (table->table equal to 0) and right names is in current TABLE_LIST
+    object.
+  */
+  if (table->table)
+  {
+    /* All MyISAMMRG children are plain MyISAM tables. */
+    DBUG_ASSERT(table->table->file->ht->db_type != DB_TYPE_MRG_MYISAM);
+
+    table= table->find_underlying_table(table->table);
+    /*
+      as far as we have table->table we have to find real TABLE_LIST of
+      it in underlying tables
+    */
+    DBUG_ASSERT(table);
+  }
+  d_name= &table->db;
+  t_name= &table->table_name;
+  t_alias= &table->alias;
+
+retry:
+  DBUG_PRINT("info", ("real table: %s.%s", d_name->str, t_name->str));
+  for (TABLE_LIST *tl= table_list; tl ; tl= tl->next_global, res= 0)
+  {
+    if (tl->select_lex && tl->select_lex->master_unit() &&
+        tl->select_lex->master_unit()->executed)
+    {
+      /*
+        There is no sense to check tables of already executed parts
+        of the query
+      */
+      continue;
+    }
+    /*
+      Table is unique if it is present only once in the global list
+      of tables and once in the list of table locks.
+    */
+    if (! (res= find_table_in_global_list(tl, d_name, t_name)))
+      break;
+    tl= res;                       // We can continue search after this table
+
+    /* Skip if same underlying table. */
+    if (res->table && (res->table == table->table))
+      continue;
+
+    /* Skip if table is tmp table */
+    if (check_flag & CHECK_DUP_SKIP_TEMP_TABLE &&
+        res->table && res->table->s->tmp_table != NO_TMP_TABLE)
+    {
+      continue;
+    }
+    if (check_flag & CHECK_DUP_FOR_CREATE)
+      DBUG_RETURN(res);
+
+    /* Skip if table alias does not match. */
+    if (check_flag & CHECK_DUP_ALLOW_DIFFERENT_ALIAS)
+    {
+      if (my_strcasecmp(table_alias_charset, t_alias->str, res->alias.str))
+        continue;
+    }
+
+    /*
+      If table is not excluded (could be a derived table) and table is not
+      a prelocking placeholder then we found either a duplicate entry
+      or a table that is part of a derived table (handled below).
+      Examples are:
+      INSERT INTO t1 SELECT * FROM t1;
+      INSERT INTO t1 SELECT * FROM view_containing_t1;
+    */
+    if (res->select_lex &&
+        !res->select_lex->exclude_from_table_unique_test &&
+        !res->prelocking_placeholder)
+      break;
+
+    /*
+      If we found entry of this table or table of SELECT which already
+      processed in derived table or top select of multi-update/multi-delete
+      (exclude_from_table_unique_test) or prelocking placeholder.
+    */
+    DBUG_PRINT("info",
+               ("found same copy of table or table which we should skip"));
+  }
+  if (res && res->belong_to_derived)
+  {
+    /*
+      We come here for queries of type:
+      INSERT INTO t1 (SELECT tmp.a FROM (select * FROM t1) as tmp);
+
+      Try to fix by materializing the derived table
+    */
+    TABLE_LIST *derived=  res->belong_to_derived;
+    if (derived->is_merged_derived() && !derived->derived->is_excluded())
+    {
+      DBUG_PRINT("info",
+                 ("convert merged to materialization to resolve the conflict"));
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
+      goto retry;
+    }
+  }
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Test that the subject table of INSERT/UPDATE/DELETE/CREATE
+  or (in case of MyISAMMRG) one of its children are not used later
+  in the query.
+
+  For MyISAMMRG tables, it is assumed that all the underlying
+  tables of @c table (if any) are listed right after it and that
+  their @c parent_l field points at the main table.
+
+
+  @retval non-NULL The table list element for the table that
+                   represents the duplicate. 
+  @retval NULL     No duplicates found.
+*/
+
+TABLE_LIST*
+unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list,
+             uint check_flag)
+{
+  TABLE_LIST *dup;
+
+  table= table->find_table_for_update();
+
+  if (table->table &&
+      table->table->file->ha_table_flags() & HA_CAN_MULTISTEP_MERGE)
+  {
+    TABLE_LIST *child;
+    dup= NULL;
+    /* Check duplicates of all merge children. */
+    for (child= table->next_global; child;
+         child= child->next_global)
+    {
+      if (child->table &&
+          child->table->file->ha_table_flags() & HA_CAN_MULTISTEP_MERGE)
+        continue;
+
+      /*
+        Ensure that the child has one parent that is the table that is
+        updated.
+      */
+      TABLE_LIST *tmp_parent= child;
+      while ((tmp_parent= tmp_parent->parent_l))
+      {
+        if (tmp_parent == table)
+          break;
+      }
+      if (!tmp_parent)
+        break;
+
+      if ((dup= find_dup_table(thd, child, child->next_global, check_flag)))
+        break;
+    }
+  }
+  else
+    dup= find_dup_table(thd, table, table_list, check_flag);
+  return dup;
+}
+
+
+/*
+  Issue correct error message in case we found 2 duplicate tables which
+  prevent some update operation
+
+  SYNOPSIS
+    update_non_unique_table_error()
+    update      table which we try to update
+    operation   name of update operation
+    duplicate   duplicate table which we found
+
+  NOTE:
+    here we hide view underlying tables if we have them
+*/
+
+void update_non_unique_table_error(TABLE_LIST *update,
+                                   const char *operation,
+                                   TABLE_LIST *duplicate)
+{
+  update= update->top_table();
+  duplicate= duplicate->top_table();
+  if (!update->view || !duplicate->view ||
+      update->view == duplicate->view ||
+      update->view_name.length != duplicate->view_name.length ||
+      update->view_db.length != duplicate->view_db.length ||
+      lex_string_cmp(table_alias_charset,
+                     &update->view_name, &duplicate->view_name) != 0 ||
+      lex_string_cmp(table_alias_charset,
+                     &update->view_db, &duplicate->view_db) != 0)
+  {
+    /*
+      it is not the same view repeated (but it can be parts of the same copy
+      of view), so we have to hide underlying tables.
+    */
+    if (update->view)
+    {
+      /* Issue the ER_NON_INSERTABLE_TABLE error for an INSERT */
+      if (update->view == duplicate->view)
+        my_error(!strncmp(operation, "INSERT", 6) ?
+                 ER_NON_INSERTABLE_TABLE : ER_NON_UPDATABLE_TABLE, MYF(0),
+                 update->alias.str, operation);
+      else
+        my_error(ER_VIEW_PREVENT_UPDATE, MYF(0),
+                 (duplicate->view ? duplicate->alias.str : update->alias.str),
+                 operation, update->alias.str);
+      return;
+    }
+    if (duplicate->view)
+    {
+      my_error(ER_VIEW_PREVENT_UPDATE, MYF(0), duplicate->alias.str, operation,
+               update->alias.str);
+      return;
+    }
+  }
+  my_error(ER_UPDATE_TABLE_USED, MYF(0), update->alias.str, operation);
+}
+
+
+/**
+   Force all other threads to stop using the table by upgrading
+   metadata lock on it and remove unused TABLE instances from cache.
+
+   @param thd      Thread handler
+   @param table    Table to remove from cache
+   @param function HA_EXTRA_PREPARE_FOR_DROP if table is to be deleted
+                   HA_EXTRA_FORCE_REOPEN if table is not be used
+                   HA_EXTRA_PREPARE_FOR_RENAME if table is to be renamed
+                   HA_EXTRA_NOT_USED             Don't call extra()
+
+   @note When returning, the table will be unusable for other threads
+         until metadata lock is downgraded.
+
+   @retval FALSE Success.
+   @retval TRUE  Failure (e.g. because thread was killed).
+*/
+
+bool wait_while_table_is_used(THD *thd, TABLE *table,
+                              enum ha_extra_function function)
+{
+  DBUG_ENTER("wait_while_table_is_used");
+  DBUG_ASSERT(!table->s->tmp_table);
+  DBUG_PRINT("enter", ("table: '%s'  share: %p  db_stat: %u",
+                       table->s->table_name.str, table->s,
+                       table->db_stat));
+
+  if (thd->mdl_context.upgrade_shared_lock(
+             table->mdl_ticket, MDL_EXCLUSIVE,
+             thd->variables.lock_wait_timeout))
+    DBUG_RETURN(TRUE);
+
+  table->s->tdc->flush(thd, true);
+  /* extra() call must come only after all instances above are closed */
+  if (function != HA_EXTRA_NOT_USED)
+  {
+    int error= table->file->extra(function);
+    if (error)
+      table->file->print_error(error, MYF(0));
+    DBUG_RETURN(error);
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Close a and drop a just created table in CREATE TABLE ... SELECT.
+
+  @param  thd         Thread handle
+  @param  table       TABLE object for the table to be dropped
+  @param  db_name     Name of database for this table
+  @param  table_name  Name of this table
+
+  This routine assumes that the table to be closed is open only
+  by the calling thread, so we needn't wait until other threads
+  close the table. It also assumes that the table is first
+  in thd->open_ables and a data lock on it, if any, has been
+  released. To sum up, it's tuned to work with
+  CREATE TABLE ... SELECT and CREATE TABLE .. SELECT only.
+  Note, that currently CREATE TABLE ... SELECT is not supported
+  under LOCK TABLES. This function, still, can be called in
+  prelocked mode, e.g. if we do CREATE TABLE .. SELECT f1();
+*/
+
+void drop_open_table(THD *thd, TABLE *table, const LEX_CSTRING *db_name,
+                     const LEX_CSTRING *table_name)
+{
+  DBUG_ENTER("drop_open_table");
+  if (table->s->tmp_table)
+    thd->drop_temporary_table(table, NULL, true);
+  else
+  {
+    DBUG_ASSERT(table == thd->open_tables);
+
+    handlerton *table_type= table->s->db_type();
+    table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
+    table->s->tdc->flush(thd, true);
+    close_thread_table(thd, &thd->open_tables);
+    /* Remove the table from the storage engine and rm the .frm. */
+    quick_rm_table(thd, table_type, db_name, table_name, 0);
+ }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  An error handler which converts, if possible, ER_LOCK_DEADLOCK error
+  that can occur when we are trying to acquire a metadata lock to
+  a request for back-off and re-start of open_tables() process.
+*/
+
+class MDL_deadlock_handler : public Internal_error_handler
+{
+public:
+  MDL_deadlock_handler(Open_table_context *ot_ctx_arg)
+    : m_ot_ctx(ot_ctx_arg), m_is_active(FALSE)
+  {}
+
+  virtual ~MDL_deadlock_handler() = default;
+
+  virtual bool handle_condition(THD *thd,
+                                uint sql_errno,
+                                const char* sqlstate,
+                                Sql_condition::enum_warning_level *level,
+                                const char* msg,
+                                Sql_condition ** cond_hdl);
+
+private:
+  /** Open table context to be used for back-off request. */
+  Open_table_context *m_ot_ctx;
+  /**
+    Indicates that we are already in the process of handling
+    ER_LOCK_DEADLOCK error. Allows to re-emit the error from
+    the error handler without falling into infinite recursion.
+  */
+  bool m_is_active;
+};
+
+
+bool MDL_deadlock_handler::handle_condition(THD *,
+                                            uint sql_errno,
+                                            const char*,
+                                            Sql_condition::enum_warning_level*,
+                                            const char*,
+                                            Sql_condition ** cond_hdl)
+{
+  *cond_hdl= NULL;
+  if (! m_is_active && sql_errno == ER_LOCK_DEADLOCK)
+  {
+    /* Disable the handler to avoid infinite recursion. */
+    m_is_active= TRUE;
+    (void) m_ot_ctx->request_backoff_action(
+             Open_table_context::OT_BACKOFF_AND_RETRY,
+             NULL);
+    m_is_active= FALSE;
+    /*
+      If the above back-off request failed, a new instance of
+      ER_LOCK_DEADLOCK error was emitted. Thus the current
+      instance of error condition can be treated as handled.
+    */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/**
+  Try to acquire an MDL lock for a table being opened.
+
+  @param[in,out] thd      Session context, to report errors.
+  @param[out]    ot_ctx   Open table context, to hold the back off
+                          state. If we failed to acquire a lock
+                          due to a lock conflict, we add the
+                          failed request to the open table context.
+  @param[in,out] mdl_request A request for an MDL lock.
+                          If we managed to acquire a ticket
+                          (no errors or lock conflicts occurred),
+                          contains a reference to it on
+                          return. However, is not modified if MDL
+                          lock type- modifying flags were provided.
+  @param[in]    flags flags MYSQL_OPEN_FORCE_SHARED_MDL,
+                          MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL or
+                          MYSQL_OPEN_FAIL_ON_MDL_CONFLICT
+                          @sa open_table().
+  @param[out]   mdl_ticket Only modified if there was no error.
+                          If we managed to acquire an MDL
+                          lock, contains a reference to the
+                          ticket, otherwise is set to NULL.
+
+  @retval TRUE  An error occurred.
+  @retval FALSE No error, but perhaps a lock conflict, check mdl_ticket.
+*/
+
+static bool
+open_table_get_mdl_lock(THD *thd, Open_table_context *ot_ctx,
+                        MDL_request *mdl_request,
+                        uint flags,
+                        MDL_ticket **mdl_ticket)
+{
+  MDL_request mdl_request_shared;
+
+  if (flags & (MYSQL_OPEN_FORCE_SHARED_MDL |
+               MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL))
+  {
+    /*
+      MYSQL_OPEN_FORCE_SHARED_MDL flag means that we are executing
+      PREPARE for a prepared statement and want to override
+      the type-of-operation aware metadata lock which was set
+      in the parser/during view opening with a simple shared
+      metadata lock.
+      This is necessary to allow concurrent execution of PREPARE
+      and LOCK TABLES WRITE statement against the same table.
+
+      MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL flag means that we open
+      the table in order to get information about it for one of I_S
+      queries and also want to override the type-of-operation aware
+      shared metadata lock which was set earlier (e.g. during view
+      opening) with a high-priority shared metadata lock.
+      This is necessary to avoid unnecessary waiting and extra
+      ER_WARN_I_S_SKIPPED_TABLE warnings when accessing I_S tables.
+
+      These two flags are mutually exclusive.
+    */
+    DBUG_ASSERT(!(flags & MYSQL_OPEN_FORCE_SHARED_MDL) ||
+                !(flags & MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL));
+
+    MDL_REQUEST_INIT_BY_KEY(&mdl_request_shared, &mdl_request->key,
+        flags & MYSQL_OPEN_FORCE_SHARED_MDL ? MDL_SHARED : MDL_SHARED_HIGH_PRIO,
+        MDL_TRANSACTION);
+    mdl_request= &mdl_request_shared;
+  }
+
+  if (flags & MYSQL_OPEN_FAIL_ON_MDL_CONFLICT)
+  {
+    /*
+      When table is being open in order to get data for I_S table,
+      we might have some tables not only open but also locked (e.g. when
+      this happens under LOCK TABLES or in a stored function).
+      As a result by waiting on a conflicting metadata lock to go away
+      we may create a deadlock which won't entirely belong to the
+      MDL subsystem and thus won't be detectable by this subsystem's
+      deadlock detector.
+      To avoid such situation we skip the trouble-making table if
+      there is a conflicting lock.
+    */
+    if (thd->mdl_context.try_acquire_lock(mdl_request))
+      return TRUE;
+    if (mdl_request->ticket == NULL)
+    {
+      my_error(ER_WARN_I_S_SKIPPED_TABLE, MYF(0),
+               mdl_request->key.db_name(), mdl_request->key.name());
+      return TRUE;
+    }
+  }
+  else
+  {
+    /*
+      We are doing a normal table open. Let us try to acquire a metadata
+      lock on the table. If there is a conflicting lock, acquire_lock()
+      will wait for it to go away. Sometimes this waiting may lead to a
+      deadlock, with the following results:
+      1) If a deadlock is entirely within MDL subsystem, it is
+         detected by the deadlock detector of this subsystem.
+         ER_LOCK_DEADLOCK error is produced. Then, the error handler
+         that is installed prior to the call to acquire_lock() attempts
+         to request a back-off and retry. Upon success, ER_LOCK_DEADLOCK
+         error is suppressed, otherwise propagated up the calling stack.
+      2) Otherwise, a deadlock may occur when the wait-for graph
+         includes edges not visible to the MDL deadlock detector.
+         One such example is a wait on an InnoDB row lock, e.g. when:
+         conn C1 gets SR MDL lock on t1 with SELECT * FROM t1
+         conn C2 gets a row lock on t2 with  SELECT * FROM t2 FOR UPDATE
+         conn C3 gets in and waits on C1 with DROP TABLE t0, t1
+         conn C2 continues and blocks on C3 with SELECT * FROM t0
+         conn C1 deadlocks by waiting on C2 by issuing SELECT * FROM
+         t2 LOCK IN SHARE MODE.
+         Such circular waits are currently only resolved by timeouts,
+         e.g. @@innodb_lock_wait_timeout or @@lock_wait_timeout.
+    */
+    MDL_deadlock_handler mdl_deadlock_handler(ot_ctx);
+
+    thd->push_internal_handler(&mdl_deadlock_handler);
+    bool result= thd->mdl_context.acquire_lock(mdl_request,
+                                               ot_ctx->get_timeout());
+    thd->pop_internal_handler();
+
+    if (result && !ot_ctx->can_recover_from_failed_open())
+      return TRUE;
+  }
+  *mdl_ticket= mdl_request->ticket;
+  return FALSE;
+}
+
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+/* Set all [named] partitions as used. */
+static int set_partitions_as_used(TABLE_LIST *tl, TABLE *t)
+{
+  if (t->part_info)
+    return t->file->change_partitions_to_open(tl->partition_names);
+  return 0;
+}
+#endif
+
+
+/**
+  Check if the given table is actually a VIEW that was LOCK-ed
+
+  @param thd            Thread context.
+  @param t              Table to check.
+
+  @retval TRUE  The 't'-table is a locked view
+                needed to remedy problem before retrying again.
+  @retval FALSE 't' was not locked, not a VIEW or an error happened.
+*/
+
+bool is_locked_view(THD *thd, TABLE_LIST *t)
+{
+  DBUG_ENTER("is_locked_view");
+  /*
+   Is this table a view and not a base table?
+   (it is work around to allow to open view with locked tables,
+   real fix will be made after definition cache will be made)
+
+   Since opening of view which was not explicitly locked by LOCK
+   TABLES breaks metadata locking protocol (potentially can lead
+   to deadlocks) it should be disallowed.
+  */
+  if (thd->mdl_context.is_lock_owner(MDL_key::TABLE, t->db.str,
+                                     t->table_name.str, MDL_SHARED))
+  {
+    char path[FN_REFLEN + 1];
+    build_table_filename(path, sizeof(path) - 1,
+                         t->db.str, t->table_name.str, reg_ext, 0);
+    /*
+      Note that we can't be 100% sure that it is a view since it's
+      possible that we either simply have not found unused TABLE
+      instance in THD::open_tables list or were unable to open table
+      during prelocking process (in this case in theory we still
+      should hold shared metadata lock on it).
+    */
+    if (dd_frm_is_view(thd, path))
+    {
+      /*
+        If parent_l of the table_list is non null then a merge table
+        has this view as child table, which is not supported.
+      */
+      if (t->parent_l)
+      {
+        my_error(ER_WRONG_MRG_TABLE, MYF(0));
+        DBUG_RETURN(FALSE);
+      }
+
+      if (!tdc_open_view(thd, t, CHECK_METADATA_VERSION))
+      {
+        DBUG_ASSERT(t->view != 0);
+        DBUG_RETURN(TRUE); // VIEW
+      }
+    }
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+/**
+  Switch part_info->hist_part and request partition creation if needed.
+
+  @retval true  Error or partition creation was requested.
+  @retval false No error
+*/
+bool TABLE::vers_switch_partition(THD *thd, TABLE_LIST *table_list,
+                                  Open_table_context *ot_ctx)
+{
+  if (!part_info || part_info->part_type != VERSIONING_PARTITION ||
+      table_list->vers_conditions.delete_history ||
+      thd->stmt_arena->is_stmt_prepare() ||
+      table_list->lock_type < TL_WRITE_ALLOW_WRITE ||
+      table_list->mdl_request.type < MDL_SHARED_WRITE ||
+      table_list->mdl_request.type == MDL_EXCLUSIVE)
+  {
+    return false;
+  }
+
+  /*
+    NOTE: we need this condition of prelocking_placeholder because we cannot do
+    auto-create after the transaction is started. Auto-create does
+    close_tables_for_reopen() and that is not possible under started transaction.
+    Also the transaction may not be cancelled at that moment: f.ex. trigger
+    after insert is run when some data is already written.
+
+    We must do auto-creation for PRELOCK_ROUTINE tables at the initial
+    open_tables() no matter what initiating sql_command is.
+  */
+  if (table_list->prelocking_placeholder != TABLE_LIST::PRELOCK_ROUTINE)
+  {
+    switch (thd->lex->sql_command)
+    {
+      case SQLCOM_INSERT_SELECT:
+      case SQLCOM_INSERT:
+        if (thd->lex->duplicates != DUP_UPDATE)
+          return false;
+        break;
+      case SQLCOM_LOAD:
+        if (thd->lex->duplicates != DUP_REPLACE)
+          return false;
+        break;
+      case SQLCOM_LOCK_TABLES:
+      case SQLCOM_DELETE:
+      case SQLCOM_UPDATE:
+      case SQLCOM_REPLACE:
+      case SQLCOM_REPLACE_SELECT:
+      case SQLCOM_DELETE_MULTI:
+      case SQLCOM_UPDATE_MULTI:
+        break;
+      default:
+        /*
+          TODO: make row events set thd->lex->sql_command appropriately.
+
+          Sergei Golubchik: f.ex. currently row events increment
+          thd->status_var.com_stat[] each event for its own SQLCOM_xxx, it won't be
+          needed if they'll just set thd->lex->sql_command.
+        */
+        if (thd->rgi_slave && thd->rgi_slave->current_event &&
+            thd->lex->sql_command == SQLCOM_END)
+        {
+          switch (thd->rgi_slave->current_event->get_type_code())
+          {
+          case UPDATE_ROWS_EVENT:
+          case UPDATE_ROWS_EVENT_V1:
+          case DELETE_ROWS_EVENT:
+          case DELETE_ROWS_EVENT_V1:
+            break;
+          default:;
+            return false;
+          }
+        }
+        break;
+    }
+    DBUG_ASSERT(!thd->lex->last_table() ||
+                !thd->lex->last_table()->vers_conditions.delete_history);
+  }
+
+  if (table_list->partition_names)
+  {
+    my_error(ER_VERS_NOT_ALLOWED, MYF(0), s->db.str, s->table_name.str);
+    return true;
+  }
+
+  TABLE *table= this;
+
+  /*
+      NOTE: The semantics of vers_set_hist_part() is twofold: even when we
+      don't need auto-create, we need to update part_info->hist_part.
+  */
+  uint *create_count= (table_list->vers_skip_create == thd->query_id) ?
+    NULL : &ot_ctx->vers_create_count;
+  table_list->vers_skip_create= thd->query_id;
+  if (table->part_info->vers_set_hist_part(thd, create_count))
+    return true;
+  if (ot_ctx->vers_create_count)
+  {
+    Open_table_context::enum_open_table_action action;
+    TABLE_LIST *table_arg;
+    mysql_mutex_lock(&table->s->LOCK_share);
+    if (!table->s->vers_skip_auto_create)
+    {
+      table->s->vers_skip_auto_create= true;
+      action= Open_table_context::OT_ADD_HISTORY_PARTITION;
+      table_arg= table_list;
+    }
+    else
+    {
+      /*
+          NOTE: this may repeat multiple times until creating thread acquires
+          MDL_EXCLUSIVE. Since auto-creation is rare operation this is acceptable.
+          We could suspend this thread on cond-var but we must first exit
+          MDL_SHARED_WRITE and we cannot store cond-var into TABLE_SHARE
+          because it is already released and there is no guarantee that it will
+          be same instance if we acquire it again.
+      */
+      table_list->vers_skip_create= 0;
+      ot_ctx->vers_create_count= 0;
+      action= Open_table_context::OT_REOPEN_TABLES;
+      table_arg= NULL;
+      DEBUG_SYNC(thd, "reopen_history_partition");
+    }
+    mysql_mutex_unlock(&table->s->LOCK_share);
+    ot_ctx->request_backoff_action(action, table_arg);
+    return true;
+  }
+
+  return false;
+}
+#endif /* WITH_PARTITION_STORAGE_ENGINE */
+
+
+/**
+  Open a base table.
+
+  @param thd            Thread context.
+  @param table_list     Open first table in list.
+  @param ot_ctx         Context with flags which modify how open works
+                        and which is used to recover from a failed
+                        open_table() attempt.
+                        Some examples of flags:
+                        MYSQL_OPEN_IGNORE_FLUSH - Open table even if
+                        someone has done a flush. No version number
+                        checking is done.
+                        MYSQL_OPEN_HAS_MDL_LOCK - instead of acquiring
+                        metadata locks rely on that caller already has
+                        appropriate ones.
+
+  Uses a cache of open tables to find a TABLE instance not in use.
+
+  If TABLE_LIST::open_strategy is set to OPEN_IF_EXISTS, the table is
+  opened only if it exists. If the open strategy is OPEN_STUB, the
+  underlying table is never opened. In both cases, metadata locks are
+  always taken according to the lock strategy.
+
+  The function used to open temporary tables, but now it opens base tables
+  only.
+
+  @retval TRUE  Open failed. "action" parameter may contain type of action
+                needed to remedy problem before retrying again.
+  @retval FALSE Success. Members of TABLE_LIST structure are filled properly
+                (e.g.  TABLE_LIST::table is set for real tables and
+                TABLE_LIST::view is set for views).
+*/
+
+bool open_table(THD *thd, TABLE_LIST *table_list, Open_table_context *ot_ctx)
+{
+  TABLE *table;
+  const char *key;
+  uint	key_length;
+  const char *alias= table_list->alias.str;
+  uint flags= ot_ctx->get_flags();
+  MDL_ticket *mdl_ticket;
+  TABLE_SHARE *share;
+  uint gts_flags;
+  bool from_share= false;
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+  int part_names_error=0;
+#endif
+  DBUG_ENTER("open_table");
+
+  /*
+    The table must not be opened already. The table can be pre-opened for
+    some statements if it is a temporary table.
+
+    open_temporary_table() must be used to open temporary tables.
+  */
+  DBUG_ASSERT(!table_list->table);
+
+  /* an open table operation needs a lot of the stack space */
+  if (check_stack_overrun(thd, STACK_MIN_SIZE_FOR_OPEN, (uchar *)&alias))
+    DBUG_RETURN(TRUE);
+
+  if (!(flags & MYSQL_OPEN_IGNORE_KILLED) && thd->killed)
+  {
+    thd->send_kill_message();
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Check if we're trying to take a write lock in a read only transaction.
+
+    Note that we allow write locks on log tables as otherwise logging
+    to general/slow log would be disabled in read only transactions.
+  */
+  if (table_list->mdl_request.is_write_lock_request() &&
+      thd->tx_read_only &&
+      !(flags & (MYSQL_LOCK_LOG_TABLE | MYSQL_OPEN_HAS_MDL_LOCK)))
+  {
+    my_error(ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION, MYF(0));
+    DBUG_RETURN(true);
+  }
+
+  if (!table_list->db.str)
+  {
+    my_error(ER_NO_DB_ERROR, MYF(0));
+    DBUG_RETURN(true);
+  }
+
+  key_length= get_table_def_key(table_list, &key);
+
+  /*
+    If we're in pre-locked or LOCK TABLES mode, let's try to find the
+    requested table in the list of pre-opened and locked tables. If the
+    table is not there, return an error - we can't open not pre-opened
+    tables in pre-locked/LOCK TABLES mode.
+    TODO: move this block into a separate function.
+  */
+  if (thd->locked_tables_mode &&
+      ! (flags & MYSQL_OPEN_GET_NEW_TABLE))
+  {						// Using table locks
+    TABLE *best_table= 0;
+    int best_distance= INT_MIN;
+    for (table=thd->open_tables; table ; table=table->next)
+    {
+      if (table->s->table_cache_key.length == key_length &&
+	  !memcmp(table->s->table_cache_key.str, key, key_length))
+      {
+        if (!my_strcasecmp(system_charset_info, table->alias.c_ptr(), alias) &&
+            table->query_id != thd->query_id && /* skip tables already used */
+            (thd->locked_tables_mode == LTM_LOCK_TABLES ||
+             table->query_id == 0))
+        {
+          int distance= ((int) table->reginfo.lock_type -
+                         (int) table_list->lock_type);
+
+          /*
+            Find a table that either has the exact lock type requested,
+            or has the best suitable lock. In case there is no locked
+            table that has an equal or higher lock than requested,
+            we us the closest matching lock to be able to produce an error
+            message about wrong lock mode on the table. The best_table
+            is changed if bd < 0 <= d or bd < d < 0 or 0 <= d < bd.
+
+            distance <  0 - No suitable lock found
+            distance >  0 - we have lock mode higher then we require
+            distance == 0 - we have lock mode exactly which we need
+          */
+          if ((best_distance < 0 && distance > best_distance) ||
+              (distance >= 0 && distance < best_distance))
+          {
+            best_distance= distance;
+            best_table= table;
+            if (best_distance == 0)
+            {
+              /*
+                We have found a perfect match and can finish iterating
+                through open tables list. Check for table use conflict
+                between calling statement and SP/trigger is done in
+                lock_tables().
+              */
+              break;
+            }
+          }
+        }
+      }
+    }
+    if (best_table)
+    {
+      table= best_table;
+      table->query_id= thd->query_id;
+      table->init(thd, table_list);
+      DBUG_PRINT("info",("Using locked table"));
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+      part_names_error= set_partitions_as_used(table_list, table);
+      if (!part_names_error
+          && table->vers_switch_partition(thd, table_list, ot_ctx))
+        DBUG_RETURN(true);
+#endif
+      goto reset;
+    }
+
+    if (is_locked_view(thd, table_list))
+    {
+      if (table_list->sequence)
+      {
+        my_error(ER_NOT_SEQUENCE, MYF(0), table_list->db.str, table_list->alias.str);
+        DBUG_RETURN(true);
+      }
+      DBUG_RETURN(FALSE); // VIEW
+    }
+
+    /*
+      No table in the locked tables list. In case of explicit LOCK TABLES
+      this can happen if a user did not include the table into the list.
+      In case of pre-locked mode locked tables list is generated automatically,
+      so we may only end up here if the table did not exist when
+      locked tables list was created.
+    */
+    if (thd->locked_tables_mode == LTM_PRELOCKED)
+      my_error(ER_NO_SUCH_TABLE, MYF(0), table_list->db.str, table_list->alias.str);
+    else
+      my_error(ER_TABLE_NOT_LOCKED, MYF(0), alias);
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Non pre-locked/LOCK TABLES mode, and the table is not temporary.
+    This is the normal use case.
+  */
+
+  if (! (flags & MYSQL_OPEN_HAS_MDL_LOCK))
+  {
+    if (open_table_get_mdl_lock(thd, ot_ctx, &table_list->mdl_request,
+                                flags, &mdl_ticket) ||
+        mdl_ticket == NULL)
+    {
+      DEBUG_SYNC(thd, "before_open_table_wait_refresh");
+      DBUG_RETURN(TRUE);
+    }
+    DEBUG_SYNC(thd, "after_open_table_mdl_shared");
+  }
+  else
+  {
+    /*
+      Grab reference to the MDL lock ticket that was acquired
+      by the caller.
+    */
+    mdl_ticket= table_list->mdl_request.ticket;
+  }
+
+  if (table_list->open_strategy == TABLE_LIST::OPEN_IF_EXISTS)
+  {
+    if (!ha_table_exists(thd, &table_list->db, &table_list->table_name))
+      DBUG_RETURN(FALSE);
+  }
+  else if (table_list->open_strategy == TABLE_LIST::OPEN_STUB)
+    DBUG_RETURN(FALSE);
+
+  /* Table exists. Let us try to open it. */
+
+  if (table_list->i_s_requested_object & OPEN_TABLE_ONLY)
+    gts_flags= GTS_TABLE;
+  else if (table_list->i_s_requested_object &  OPEN_VIEW_ONLY)
+    gts_flags= GTS_VIEW;
+  else
+    gts_flags= GTS_TABLE | GTS_VIEW;
+
+retry_share:
+
+  share= tdc_acquire_share(thd, table_list, gts_flags, &table);
+
+  if (unlikely(!share))
+  {
+    /*
+      Hide "Table doesn't exist" errors if the table belongs to a view.
+      The check for thd->is_error() is necessary to not push an
+      unwanted error in case the error was already silenced.
+      @todo Rework the alternative ways to deal with ER_NO_SUCH TABLE.
+    */
+    if (thd->is_error())
+    {
+      if (table_list->parent_l)
+      {
+        thd->clear_error();
+        my_error(ER_WRONG_MRG_TABLE, MYF(0));
+      }
+      else if (table_list->belong_to_view)
+      {
+        TABLE_LIST *view= table_list->belong_to_view;
+        thd->clear_error();
+        my_error(ER_VIEW_INVALID, MYF(0),
+                 view->view_db.str, view->view_name.str);
+      }
+    }
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Check if this TABLE_SHARE-object corresponds to a view. Note, that there is
+    no need to check TABLE_SHARE::tdc.flushed as we do for regular tables,
+    because view shares are always up to date.
+  */
+  if (share->is_view)
+  {
+    /*
+      If parent_l of the table_list is non null then a merge table
+      has this view as child table, which is not supported.
+    */
+    if (table_list->parent_l)
+    {
+      my_error(ER_WRONG_MRG_TABLE, MYF(0));
+      goto err_lock;
+    }
+    if (table_list->sequence)
+    {
+      my_error(ER_NOT_SEQUENCE, MYF(0), table_list->db.str,
+               table_list->alias.str);
+      goto err_lock;
+    }
+
+    /*
+      This table is a view. Validate its metadata version: in particular,
+      that it was a view when the statement was prepared.
+    */
+    if (check_and_update_table_version(thd, table_list, share))
+      goto err_lock;
+
+    /* Open view */
+    if (mysql_make_view(thd, share, table_list, false))
+      goto err_lock;
+
+    /* TODO: Don't free this */
+    tdc_release_share(share);
+
+    DBUG_ASSERT(table_list->view);
+
+    DBUG_RETURN(FALSE);
+  }
+
+#ifdef WITH_WSREP
+  if (!((flags & MYSQL_OPEN_IGNORE_FLUSH) ||
+        (thd->wsrep_applier)))
+#else
+  if (!(flags & MYSQL_OPEN_IGNORE_FLUSH))
+#endif
+  {
+    if (share->tdc->flushed)
+    {
+      /*
+        We already have an MDL lock. But we have encountered an old
+        version of table in the table definition cache which is possible
+        when someone changes the table version directly in the cache
+        without acquiring a metadata lock (e.g. this can happen during
+        "rolling" FLUSH TABLE(S)).
+        Release our reference to share, wait until old version of
+        share goes away and then try to get new version of table share.
+      */
+      if (table)
+        tc_release_table(table);
+      else
+        tdc_release_share(share);
+
+      MDL_deadlock_handler mdl_deadlock_handler(ot_ctx);
+      bool wait_result;
+
+      thd->push_internal_handler(&mdl_deadlock_handler);
+      wait_result= tdc_wait_for_old_version(thd, table_list->db.str,
+                                            table_list->table_name.str,
+                                            ot_ctx->get_timeout(),
+                                            mdl_ticket->get_deadlock_weight());
+      thd->pop_internal_handler();
+
+      if (wait_result)
+        DBUG_RETURN(TRUE);
+
+      goto retry_share;
+    }
+
+    if (thd->open_tables && thd->open_tables->s->tdc->flushed)
+    {
+      /*
+        If the version changes while we're opening the tables,
+        we have to back off, close all the tables opened-so-far,
+        and try to reopen them. Note: refresh_version is currently
+        changed only during FLUSH TABLES.
+      */
+      if (table)
+        tc_release_table(table);
+      else
+        tdc_release_share(share);
+      (void)ot_ctx->request_backoff_action(Open_table_context::OT_REOPEN_TABLES,
+                                           NULL);
+      DBUG_RETURN(TRUE);
+    }
+  }
+
+  if (table)
+  {
+    DBUG_ASSERT(table->file != NULL);
+    if (table->file->discover_check_version())
+    {
+      tc_release_table(table);
+      (void) ot_ctx->request_backoff_action(Open_table_context::OT_DISCOVER,
+                                            table_list);
+      DBUG_RETURN(TRUE);
+    }
+    table->file->rebind_psi();
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+    part_names_error= set_partitions_as_used(table_list, table);
+#endif
+  }
+  else
+  {
+    enum open_frm_error error;
+    /* make a new table */
+    if (!(table=(TABLE*) my_malloc(key_memory_TABLE, sizeof(*table),
+                                   MYF(MY_WME))))
+      goto err_lock;
+
+    error= open_table_from_share(thd, share, &table_list->alias,
+                                 HA_OPEN_KEYFILE | HA_TRY_READ_ONLY,
+                                 EXTRA_RECORD,
+                                 thd->open_options, table, FALSE,
+                                 IF_PARTITIONING(table_list->partition_names,0));
+
+    if (unlikely(error))
+    {
+      my_free(table);
+
+      if (error == OPEN_FRM_DISCOVER)
+        (void) ot_ctx->request_backoff_action(Open_table_context::OT_DISCOVER,
+                                              table_list);
+      else if (share->crashed)
+      {
+        if (!(flags & MYSQL_OPEN_IGNORE_REPAIR))
+          (void) ot_ctx->request_backoff_action(Open_table_context::OT_REPAIR,
+                                                table_list);
+        else
+          table_list->crashed= 1;  /* Mark that table was crashed */
+      }
+      goto err_lock;
+    }
+    if (open_table_entry_fini(thd, share, table))
+    {
+      closefrm(table);
+      my_free(table);
+      goto err_lock;
+    }
+
+    /* Add table to the share's used tables list. */
+    tc_add_table(thd, table);
+    from_share= true;
+  }
+
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+  if (!part_names_error &&
+      table->vers_switch_partition(thd, table_list, ot_ctx))
+  {
+    MYSQL_UNBIND_TABLE(table->file);
+    tc_release_table(table);
+    DBUG_RETURN(true);
+  }
+#endif /* WITH_PARTITION_STORAGE_ENGINE */
+
+  if (!(flags & MYSQL_OPEN_HAS_MDL_LOCK) &&
+      table->s->table_category < TABLE_CATEGORY_INFORMATION)
+  {
+    /*
+      We are not under LOCK TABLES and going to acquire write-lock/
+      modify the base table. We need to acquire protection against
+      global read lock until end of this statement in order to have
+      this statement blocked by active FLUSH TABLES WITH READ LOCK.
+
+      We don't need to acquire this protection under LOCK TABLES as
+      such protection already acquired at LOCK TABLES time and
+      not released until UNLOCK TABLES.
+
+      We don't block statements which modify only temporary tables
+      as these tables are not preserved by any form of
+      backup which uses FLUSH TABLES WITH READ LOCK.
+
+      TODO: The fact that we sometimes acquire protection against
+            GRL only when we encounter table to be write-locked
+            slightly increases probability of deadlock.
+            This problem will be solved once Alik pushes his
+            temporary table refactoring patch and we can start
+            pre-acquiring metadata locks at the beggining of
+            open_tables() call.
+    */
+    enum enum_mdl_type mdl_type= MDL_BACKUP_DML;
+
+    if (table->s->table_category != TABLE_CATEGORY_USER)
+      mdl_type= MDL_BACKUP_SYS_DML;
+    else if (table->s->online_backup)
+      mdl_type= MDL_BACKUP_TRANS_DML;
+
+    if (table_list->mdl_request.is_write_lock_request() &&
+        ! (flags & (MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK |
+                    MYSQL_OPEN_FORCE_SHARED_MDL |
+                    MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL |
+                    MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK)) &&
+        ! ot_ctx->has_protection_against_grl(mdl_type))
+    {
+      MDL_request protection_request;
+      MDL_deadlock_handler mdl_deadlock_handler(ot_ctx);
+
+      if (thd->has_read_only_protection())
+      {
+        MYSQL_UNBIND_TABLE(table->file);
+        tc_release_table(table);
+        DBUG_RETURN(TRUE);
+      }
+
+      MDL_REQUEST_INIT(&protection_request, MDL_key::BACKUP, "", "", mdl_type,
+                       MDL_STATEMENT);
+
+      /*
+        Install error handler which if possible will convert deadlock error
+        into request to back-off and restart process of opening tables.
+      */
+      thd->push_internal_handler(&mdl_deadlock_handler);
+      bool result= thd->mdl_context.acquire_lock(&protection_request,
+                                                 ot_ctx->get_timeout());
+      thd->pop_internal_handler();
+
+      if (result)
+      {
+        MYSQL_UNBIND_TABLE(table->file);
+        tc_release_table(table);
+        DBUG_RETURN(TRUE);
+      }
+
+      ot_ctx->set_has_protection_against_grl(mdl_type);
+    }
+  }
+
+  table->mdl_ticket= mdl_ticket;
+  table->reginfo.lock_type=TL_READ;		/* Assume read */
+
+  table->init(thd, table_list);
+
+  DBUG_ASSERT(table != thd->open_tables);
+  table->next= thd->open_tables;		/* Link into simple list */
+  thd->set_open_tables(table);
+
+ reset:
+  /*
+    Check that there is no reference to a condition from an earlier query
+    (cf. Bug#58553). 
+  */
+  DBUG_ASSERT(table->file->pushed_cond == NULL);
+  table_list->updatable= 1; // It is not derived table nor non-updatable VIEW
+  table_list->table= table;
+
+  if (!from_share && table->vcol_fix_expr(thd))
+    DBUG_RETURN(true);
+
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+  if (unlikely(table->part_info))
+  {
+    /* Partitions specified were incorrect.*/
+    if (part_names_error)
+    {
+      table->file->print_error(part_names_error, MYF(0));
+      DBUG_RETURN(true);
+    }
+  }
+  else if (table_list->partition_names)
+  {
+    /* Don't allow PARTITION () clause on a nonpartitioned table */
+    my_error(ER_PARTITION_CLAUSE_ON_NONPARTITIONED, MYF(0));
+    DBUG_RETURN(true);
+  }
+#endif
+  if (table_list->sequence && table->s->table_type != TABLE_TYPE_SEQUENCE)
+  {
+    my_error(ER_NOT_SEQUENCE, MYF(0), table_list->db.str, table_list->alias.str);
+    DBUG_RETURN(true);
+  }
+
+  DBUG_ASSERT(thd->locked_tables_mode || table->file->row_logging == 0);
+  DBUG_RETURN(false);
+
+err_lock:
+  tdc_release_share(share);
+
+  DBUG_PRINT("exit", ("failed"));
+  DBUG_RETURN(true);
+}
+
+
+/**
+   Find table in the list of open tables.
+
+   @param list       List of TABLE objects to be inspected.
+   @param db         Database name
+   @param table_name Table name
+
+   @return Pointer to the TABLE object found, 0 if no table found.
+*/
+
+TABLE *find_locked_table(TABLE *list, const char *db, const char *table_name)
+{
+  char	key[MAX_DBKEY_LENGTH];
+  uint key_length= tdc_create_key(key, db, table_name);
+
+  for (TABLE *table= list; table ; table=table->next)
+  {
+    if (table->s->table_cache_key.length == key_length &&
+	!memcmp(table->s->table_cache_key.str, key, key_length))
+      return table;
+  }
+  return(0);
+}
+
+
+/**
+   Find instance of TABLE with upgradable or exclusive metadata
+   lock from the list of open tables, emit error if no such table
+   found.
+
+   @param thd        Thread context
+   @param db         Database name.
+   @param table_name Name of table.
+   @param p_error    In the case of an error (when the function returns NULL)
+                     the error number is stored there.
+                     If the p_error is NULL, function launches the error itself.
+
+   @note This function checks if the connection holds a global IX
+         metadata lock. If no such lock is found, it is not safe to
+         upgrade the lock and ER_TABLE_NOT_LOCKED_FOR_WRITE will be
+         reported.
+
+   @return Pointer to TABLE instance with MDL_SHARED_UPGRADABLE
+           MDL_SHARED_NO_WRITE, MDL_SHARED_NO_READ_WRITE, or
+           MDL_EXCLUSIVE metadata lock, NULL otherwise.
+*/
+
+TABLE *find_table_for_mdl_upgrade(THD *thd, const char *db,
+                                  const char *table_name, int *p_error)
+{
+  TABLE *tab= find_locked_table(thd->open_tables, db, table_name);
+  int error;
+
+  if (unlikely(!tab))
+  {
+    error= ER_TABLE_NOT_LOCKED;
+    goto err_exit;
+  }
+
+  /*
+    It is not safe to upgrade the metadata lock without a global IX lock.
+    This can happen with FLUSH TABLES  WITH READ LOCK as we in these
+    cases don't take a global IX lock in order to be compatible with
+    global read lock.
+  */
+  if (unlikely(!thd->mdl_context.is_lock_owner(MDL_key::BACKUP, "", "",
+                                               MDL_BACKUP_DDL)))
+  {
+    error= ER_TABLE_NOT_LOCKED_FOR_WRITE;
+    goto err_exit;
+  }
+
+  while (tab->mdl_ticket != NULL &&
+         !tab->mdl_ticket->is_upgradable_or_exclusive() &&
+         (tab= find_locked_table(tab->next, db, table_name)))
+    continue;
+
+  if (unlikely(!tab))
+  {
+    error= ER_TABLE_NOT_LOCKED_FOR_WRITE;
+    goto err_exit;
+  }
+
+  return tab;
+
+err_exit:
+  if (p_error)
+    *p_error= error;
+  else
+    my_error(error, MYF(0), table_name);
+
+  return NULL;
+}
+
+
+/***********************************************************************
+  class Locked_tables_list implementation. Declared in sql_class.h
+************************************************************************/
+
+/**
+  Enter LTM_LOCK_TABLES mode.
+
+  Enter the LOCK TABLES mode using all the tables that are
+  currently open and locked in this connection.
+  Initializes a TABLE_LIST instance for every locked table.
+
+  @param  thd  thread handle
+
+  @return TRUE if out of memory.
+*/
+
+bool
+Locked_tables_list::init_locked_tables(THD *thd)
+{
+  DBUG_ASSERT(thd->locked_tables_mode == LTM_NONE);
+  DBUG_ASSERT(m_locked_tables == NULL);
+  DBUG_ASSERT(m_reopen_array == NULL);
+  DBUG_ASSERT(m_locked_tables_count == 0);
+
+  for (TABLE *table= thd->open_tables; table;
+       table= table->next, m_locked_tables_count++)
+  {
+    TABLE_LIST *src_table_list= table->pos_in_table_list;
+    LEX_CSTRING db, table_name, alias;
+
+    db.length=         table->s->db.length;
+    table_name.length= table->s->table_name.length;
+    alias.length=      table->alias.length();
+    TABLE_LIST *dst_table_list;
+
+    if (! multi_alloc_root(&m_locked_tables_root,
+                           &dst_table_list, sizeof(*dst_table_list),
+                           &db.str, (size_t) db.length + 1,
+                           &table_name.str, (size_t) table_name.length + 1,
+                           &alias.str, (size_t) alias.length + 1,
+                           NullS))
+    {
+      reset();
+      return TRUE;
+    }
+
+    memcpy((char*) db.str,         table->s->db.str, db.length + 1);
+    memcpy((char*) table_name.str, table->s->table_name.str,
+           table_name.length + 1);
+    memcpy((char*) alias.str,      table->alias.c_ptr(), alias.length + 1);
+    dst_table_list->init_one_table(&db, &table_name,
+                                   &alias, table->reginfo.lock_type);
+    dst_table_list->table= table;
+    dst_table_list->mdl_request.ticket= src_table_list->mdl_request.ticket;
+
+    /* Link last into the list of tables */
+    *(dst_table_list->prev_global= m_locked_tables_last)= dst_table_list;
+    m_locked_tables_last= &dst_table_list->next_global;
+    table->pos_in_locked_tables= dst_table_list;
+  }
+  if (m_locked_tables_count)
+  {
+    /**
+      Allocate an auxiliary array to pass to mysql_lock_tables()
+      in reopen_tables(). reopen_tables() is a critical
+      path and we don't want to complicate it with extra allocations.
+    */
+    m_reopen_array= (TABLE_LIST**)alloc_root(&m_locked_tables_root,
+                                             sizeof(TABLE_LIST*) *
+                                             (m_locked_tables_count+1));
+    if (m_reopen_array == NULL)
+    {
+      reset();
+      return TRUE;
+    }
+  }
+
+  TRANSACT_TRACKER(add_trx_state(thd, TX_LOCKED_TABLES));
+
+  thd->enter_locked_tables_mode(LTM_LOCK_TABLES);
+
+  return FALSE;
+}
+
+
+/**
+  Leave LTM_LOCK_TABLES mode if it's been entered.
+
+  Close all locked tables, free memory, and leave the mode.
+
+  @note This function is a no-op if we're not in LOCK TABLES.
+*/
+
+int
+Locked_tables_list::unlock_locked_tables(THD *thd)
+{
+  int error;
+  DBUG_ASSERT(!thd->in_sub_stmt &&
+              !(thd->state_flags & Open_tables_state::BACKUPS_AVAIL));
+  /*
+    Sic: we must be careful to not close open tables if
+    we're not in LOCK TABLES mode: unlock_locked_tables() is
+    sometimes called implicitly, expecting no effect on
+    open tables, e.g. from begin_trans().
+  */
+  if (thd->locked_tables_mode != LTM_LOCK_TABLES)
+    return 0;
+
+  for (TABLE_LIST *table_list= m_locked_tables;
+       table_list; table_list= table_list->next_global)
+  {
+    /*
+      Clear the position in the list, the TABLE object will be
+      returned to the table cache.
+    */
+    if (table_list->table)                    // If not closed
+      table_list->table->pos_in_locked_tables= NULL;
+  }
+  thd->leave_locked_tables_mode();
+
+  TRANSACT_TRACKER(clear_trx_state(thd, TX_LOCKED_TABLES));
+
+  DBUG_ASSERT(thd->transaction->stmt.is_empty());
+  error= close_thread_tables(thd);
+
+  /*
+    We rely on the caller to implicitly commit the
+    transaction and release transactional locks.
+  */
+
+  /*
+    After closing tables we can free memory used for storing lock
+    request for metadata locks and TABLE_LIST elements.
+  */
+  reset();
+  return error;
+}
+
+
+/**
+  Remove all meta data locks associated with table and release locked
+  table mode if there is no locked tables anymore
+*/
+
+int
+Locked_tables_list::unlock_locked_table(THD *thd, MDL_ticket *mdl_ticket)
+{
+  /*
+    Ensure we are in locked table mode.
+    As this function is only called on error condition it's better
+    to check this condition here than in the caller.
+  */
+  if (thd->locked_tables_mode != LTM_LOCK_TABLES)
+    return 0;
+
+  if (mdl_ticket)
+  {
+    /*
+      Under LOCK TABLES we may have several instances of table open
+      and locked and therefore have to remove several metadata lock
+      requests associated with them.
+    */
+    thd->mdl_context.release_all_locks_for_name(mdl_ticket);
+  }
+
+  if (thd->lock->table_count == 0)
+    return unlock_locked_tables(thd);
+  return 0;
+}
+
+
+/*
+  Free memory allocated for storing locks
+*/
+
+void Locked_tables_list::reset()
+{
+  free_root(&m_locked_tables_root, MYF(0));
+  m_locked_tables= NULL;
+  m_locked_tables_last= &m_locked_tables;
+  m_reopen_array= NULL;
+  m_locked_tables_count= 0;
+  some_table_marked_for_reopen= 0;
+}
+
+
+/**
+  Unlink a locked table from the locked tables list, either
+  temporarily or permanently.
+
+  @param  thd        thread handle
+  @param  table_list the element of locked tables list.
+                     The implementation assumes that this argument
+                     points to a TABLE_LIST element linked into
+                     the locked tables list. Passing a TABLE_LIST
+                     instance that is not part of locked tables
+                     list will lead to a crash.
+  @param  remove_from_locked_tables
+                      TRUE if the table is removed from the list
+                      permanently.
+
+  This function is a no-op if we're not under LOCK TABLES.
+
+  @sa Locked_tables_list::reopen_tables()
+*/
+
+
+void Locked_tables_list::unlink_from_list(THD *thd,
+                                          TABLE_LIST *table_list,
+                                          bool remove_from_locked_tables)
+{
+  /*
+    If mode is not LTM_LOCK_TABLES, we needn't do anything. Moreover,
+    outside this mode pos_in_locked_tables value is not trustworthy.
+  */
+  if (thd->locked_tables_mode != LTM_LOCK_TABLES &&
+      thd->locked_tables_mode != LTM_PRELOCKED_UNDER_LOCK_TABLES)
+    return;
+
+  /*
+    table_list must be set and point to pos_in_locked_tables of some
+    table.
+  */
+  DBUG_ASSERT(table_list->table->pos_in_locked_tables == table_list);
+
+  /* Clear the pointer, the table will be returned to the table cache. */
+  table_list->table->pos_in_locked_tables= NULL;
+
+  /* Mark the table as closed in the locked tables list. */
+  table_list->table= NULL;
+
+  /*
+    If the table is being dropped or renamed, remove it from
+    the locked tables list (implicitly drop the LOCK TABLES lock
+    on it).
+  */
+  if (remove_from_locked_tables)
+  {
+    *table_list->prev_global= table_list->next_global;
+    if (table_list->next_global == NULL)
+      m_locked_tables_last= table_list->prev_global;
+    else
+      table_list->next_global->prev_global= table_list->prev_global;
+    m_locked_tables_count--;
+  }
+}
+
+/**
+  This is an attempt to recover (somewhat) in case of an error.
+  If we failed to reopen a closed table, let's unlink it from the
+  list and forget about it. From a user perspective that would look
+  as if the server "lost" the lock on one of the locked tables.
+
+  @note This function is a no-op if we're not under LOCK TABLES.
+*/
+
+void Locked_tables_list::
+unlink_all_closed_tables(THD *thd, MYSQL_LOCK *lock, size_t reopen_count)
+{
+  /* If we managed to take a lock, unlock tables and free the lock. */
+  if (lock)
+    mysql_unlock_tables(thd, lock);
+  /*
+    If a failure happened in reopen_tables(), we may have succeeded
+    reopening some tables, but not all.
+    This works when the connection was killed in mysql_lock_tables().
+  */
+  if (reopen_count)
+  {
+    while (reopen_count--)
+    {
+      /*
+        When closing the table, we must remove it
+        from thd->open_tables list.
+        We rely on the fact that open_table() that was used
+        in reopen_tables() always links the opened table
+        to the beginning of the open_tables list.
+      */
+      DBUG_ASSERT(thd->open_tables == m_reopen_array[reopen_count]->table);
+
+      thd->open_tables->pos_in_locked_tables->table= NULL;
+      thd->open_tables->pos_in_locked_tables= NULL;
+
+      close_thread_table(thd, &thd->open_tables);
+    }
+  }
+  /* Exclude all closed tables from the LOCK TABLES list. */
+  for (TABLE_LIST *table_list= m_locked_tables; table_list; table_list=
+       table_list->next_global)
+  {
+    if (table_list->table == NULL)
+    {
+      /* Unlink from list. */
+      *table_list->prev_global= table_list->next_global;
+      if (table_list->next_global == NULL)
+        m_locked_tables_last= table_list->prev_global;
+      else
+        table_list->next_global->prev_global= table_list->prev_global;
+      m_locked_tables_count--;
+    }
+  }
+
+  /* If no tables left, do an automatic UNLOCK TABLES */
+  if (thd->lock && thd->lock->table_count == 0)
+  {
+    /*
+      We have to rollback any open transactions here.
+      This is required in the case where the server has been killed
+      but some transations are still open (as part of locked tables).
+      If we don't do this, we will get an assert in unlock_locked_tables().
+    */
+    ha_rollback_trans(thd, FALSE);
+    ha_rollback_trans(thd, TRUE);
+    unlock_locked_tables(thd);
+  }
+}
+
+
+/*
+  Mark all instances of the table to be reopened
+
+  This is only needed when LOCK TABLES is active
+*/
+
+void Locked_tables_list::mark_table_for_reopen(TABLE *table)
+{
+  TABLE_SHARE *share= table->s;
+
+  for (TABLE_LIST *table_list= m_locked_tables;
+       table_list; table_list= table_list->next_global)
+  {
+    /*
+      table_list->table can be NULL in the case of TRUNCATE TABLE where
+      the table was locked twice and one instance closed in
+      close_all_tables_for_name().
+    */
+    if (table_list->table && table_list->table->s == share)
+    {
+      table_list->table->internal_set_needs_reopen(true);
+      some_table_marked_for_reopen= 1;
+    }
+  }
+  /* This is needed in the case where lock tables where not used */
+  table->internal_set_needs_reopen(true);
+}
+
+
+/**
+  Reopen the tables locked with LOCK TABLES and temporarily closed
+  by a DDL statement or FLUSH TABLES.
+
+  @param need_reopen  If set, reopen open tables that are marked with
+                      for reopen.
+                      If not set, reopen tables that where closed.
+
+  @note This function is a no-op if we're not under LOCK TABLES.
+
+  @return TRUE if an error reopening the tables. May happen in
+               case of some fatal system error only, e.g. a disk
+               corruption, out of memory or a serious bug in the
+               locking.
+*/
+
+bool
+Locked_tables_list::reopen_tables(THD *thd, bool need_reopen)
+{
+  bool is_ok= thd->get_stmt_da()->is_ok();
+  Open_table_context ot_ctx(thd, !is_ok ? MYSQL_OPEN_REOPEN:
+                                  MYSQL_OPEN_IGNORE_KILLED | MYSQL_OPEN_REOPEN);
+  uint reopen_count= 0;
+  MYSQL_LOCK *lock;
+  MYSQL_LOCK *merged_lock;
+  DBUG_ENTER("Locked_tables_list::reopen_tables");
+
+  DBUG_ASSERT(some_table_marked_for_reopen || !need_reopen);
+
+
+  /* Reset flag that some table was marked for reopen */
+  if (need_reopen)
+    some_table_marked_for_reopen= 0;
+
+  for (TABLE_LIST *table_list= m_locked_tables;
+       table_list; table_list= table_list->next_global)
+  {
+    if (need_reopen)
+    {
+      if (!table_list->table || !table_list->table->needs_reopen())
+        continue;
+      for (TABLE **prev= &thd->open_tables; *prev; prev= &(*prev)->next)
+      {
+        if (*prev == table_list->table)
+        {
+          thd->locked_tables_list.unlink_from_list(thd, table_list, false);
+          mysql_lock_remove(thd, thd->lock, *prev);
+          (*prev)->file->extra(HA_EXTRA_PREPARE_FOR_FORCED_CLOSE);
+          close_thread_table(thd, prev);
+          break;
+        }
+      }
+      DBUG_ASSERT(table_list->table == NULL);
+    }
+    else
+    {
+      if (table_list->table)                      /* The table was not closed */
+        continue;
+    }
+
+    DBUG_ASSERT(reopen_count < m_locked_tables_count);
+    m_reopen_array[reopen_count++]= table_list;
+  }
+  if (reopen_count)
+  {
+    TABLE **tables= (TABLE**) my_alloca(reopen_count * sizeof(TABLE*));
+
+    for (uint i= 0 ; i < reopen_count ; i++)
+    {
+      TABLE_LIST *table_list= m_reopen_array[i];
+      /* Links into thd->open_tables upon success */
+      if (open_table(thd, table_list, &ot_ctx))
+      {
+        unlink_all_closed_tables(thd, 0, i);
+        my_afree((void*) tables);
+        DBUG_RETURN(TRUE);
+      }
+      tables[i]= table_list->table;
+      table_list->table->pos_in_locked_tables= table_list;
+      /* See also the comment on lock type in init_locked_tables(). */
+      table_list->table->reginfo.lock_type= table_list->lock_type;
+    }
+
+    thd->in_lock_tables= 1;
+    /*
+      We re-lock all tables with mysql_lock_tables() at once rather
+      than locking one table at a time because of the case
+      reported in Bug#45035: when the same table is present
+      in the list many times, thr_lock.c fails to grant READ lock
+      on a table that is already locked by WRITE lock, even if
+      WRITE lock is taken by the same thread. If READ and WRITE
+      lock are passed to thr_lock.c in the same list, everything
+      works fine. Patching legacy code of thr_lock.c is risking to
+      break something else.
+    */
+    lock= mysql_lock_tables(thd, tables, reopen_count,
+                            MYSQL_OPEN_REOPEN | MYSQL_LOCK_USE_MALLOC);
+    thd->in_lock_tables= 0;
+    if (lock == NULL || (merged_lock=
+                         mysql_lock_merge(thd->lock, lock)) == NULL)
+    {
+      unlink_all_closed_tables(thd, lock, reopen_count);
+      if (! thd->killed)
+        my_error(ER_LOCK_DEADLOCK, MYF(0));
+      my_afree((void*) tables);
+      DBUG_RETURN(TRUE);
+    }
+    thd->lock= merged_lock;
+    my_afree((void*) tables);
+  }
+  DBUG_RETURN(FALSE);
+}
+
+/**
+  Add back a locked table to the locked list that we just removed from it.
+  This is needed in CREATE OR REPLACE TABLE where we are dropping, creating
+  and re-opening a locked table.
+
+  @return 0  0k
+  @return 1  error
+*/
+
+bool Locked_tables_list::restore_lock(THD *thd, TABLE_LIST *dst_table_list,
+                                      TABLE *table, MYSQL_LOCK *lock)
+{
+  MYSQL_LOCK *merged_lock;
+  DBUG_ENTER("restore_lock");
+  DBUG_ASSERT(!strcmp(dst_table_list->table_name.str, table->s->table_name.str));
+
+  /* Ensure we have the memory to add the table back */
+  if (!(merged_lock= mysql_lock_merge(thd->lock, lock)))
+    DBUG_RETURN(1);
+  thd->lock= merged_lock;
+
+  /* Link to the new table */
+  dst_table_list->table= table;
+  /*
+    The lock type may have changed (normally it should not as create
+    table will lock the table in write mode
+  */
+  dst_table_list->lock_type= table->reginfo.lock_type;
+  table->pos_in_locked_tables= dst_table_list;
+
+  add_back_last_deleted_lock(dst_table_list);
+
+  table->mdl_ticket->downgrade_lock(table->reginfo.lock_type >=
+                                    TL_FIRST_WRITE ?
+                                    MDL_SHARED_NO_READ_WRITE :
+                                    MDL_SHARED_READ);
+
+  DBUG_RETURN(0);
+}
+
+/*
+  Add back the last deleted lock structure.
+  This should be followed by a call to reopen_tables() to
+  open the table.
+*/
+
+void Locked_tables_list::add_back_last_deleted_lock(TABLE_LIST *dst_table_list)
+{
+  /* Link the lock back in the locked tables list */
+  dst_table_list->prev_global= m_locked_tables_last;
+  *m_locked_tables_last= dst_table_list;
+  m_locked_tables_last= &dst_table_list->next_global;
+  dst_table_list->next_global= 0;
+  m_locked_tables_count++;
+}
+
+
+#ifndef DBUG_OFF
+/* Cause a spurious statement reprepare for debug purposes. */
+static bool inject_reprepare(THD *thd)
+{
+  if (thd->m_reprepare_observer && thd->stmt_arena->is_reprepared == FALSE)
+  {
+    thd->m_reprepare_observer->report_error(thd);
+    return TRUE;
+  }
+
+  return FALSE;
+}
+#endif
+
+/**
+  Compare metadata versions of an element obtained from the table
+  definition cache and its corresponding node in the parse tree.
+
+  @details If the new and the old values mismatch, invoke
+  Metadata_version_observer.
+  At prepared statement prepare, all TABLE_LIST version values are
+  NULL and we always have a mismatch. But there is no observer set
+  in THD, and therefore no error is reported. Instead, we update
+  the value in the parse tree, effectively recording the original
+  version.
+  At prepared statement execute, an observer may be installed.  If
+  there is a version mismatch, we push an error and return TRUE.
+
+  For conventional execution (no prepared statements), the
+  observer is never installed.
+
+  @sa Execute_observer
+  @sa check_prepared_statement() to see cases when an observer is installed
+  @sa TABLE_LIST::is_the_same_definition()
+  @sa TABLE_SHARE::get_table_ref_id()
+
+  @param[in]      thd         used to report errors
+  @param[in,out]  tables      TABLE_LIST instance created by the parser
+                              Metadata version information in this object
+                              is updated upon success.
+  @param[in]      table_share an element from the table definition cache
+
+  @retval  TRUE  an error, which has been reported
+  @retval  FALSE success, version in TABLE_LIST has been updated
+*/
+
+static bool
+check_and_update_table_version(THD *thd,
+                               TABLE_LIST *tables, TABLE_SHARE *table_share)
+{
+  /*
+    First, verify that TABLE_LIST was indeed *created by the parser* -
+    it must be in the global TABLE_LIST list. Standalone TABLE_LIST objects
+    created with TABLE_LIST::init_one_table() have a short life time and
+    aren't linked anywhere.
+  */
+  if (tables->prev_global && !tables->is_the_same_definition(thd, table_share))
+  {
+    if (thd->m_reprepare_observer &&
+        thd->m_reprepare_observer->report_error(thd))
+    {
+      /*
+        Version of the table share is different from the
+        previous execution of the prepared statement, and it is
+        unacceptable for this SQLCOM. Error has been reported.
+      */
+      DBUG_ASSERT(thd->is_error());
+      return TRUE;
+    }
+    /* Always maintain the latest version and type */
+    tables->set_table_ref_id(table_share);
+  }
+
+  DBUG_EXECUTE_IF("reprepare_each_statement", return inject_reprepare(thd););
+  return FALSE;
+}
+
+
+/**
+  Compares versions of a stored routine obtained from the sp cache
+  and the version used at prepare.
+
+  @details If the new and the old values mismatch, invoke
+  Metadata_version_observer.
+  At prepared statement prepare, all Sroutine_hash_entry version values
+  are NULL and we always have a mismatch. But there is no observer set
+  in THD, and therefore no error is reported. Instead, we update
+  the value in Sroutine_hash_entry, effectively recording the original
+  version.
+  At prepared statement execute, an observer may be installed.  If
+  there is a version mismatch, we push an error and return TRUE.
+
+  For conventional execution (no prepared statements), the
+  observer is never installed.
+
+  @param[in]      thd         used to report errors
+  @param[in/out]  rt          pointer to stored routine entry in the
+                              parse tree
+  @param[in]      sp          pointer to stored routine cache entry.
+                              Can be NULL if there is no such routine.
+  @retval  TRUE  an error, which has been reported
+  @retval  FALSE success, version in Sroutine_hash_entry has been updated
+*/
+
+static bool
+check_and_update_routine_version(THD *thd, Sroutine_hash_entry *rt,
+                                 sp_head *sp)
+{
+  ulong spc_version= sp_cache_version();
+  /* sp is NULL if there is no such routine. */
+  ulong version= sp ? sp->sp_cache_version() : spc_version;
+  /*
+    If the version in the parse tree is stale,
+    or the version in the cache is stale and sp is not used,
+    we need to reprepare.
+    Sic: version != spc_version <--> sp is not NULL.
+  */
+  if (rt->m_sp_cache_version != version ||
+      (version != spc_version && !sp->is_invoked()))
+  {
+    if (thd->m_reprepare_observer &&
+        thd->m_reprepare_observer->report_error(thd))
+    {
+      /*
+        Version of the sp cache is different from the
+        previous execution of the prepared statement, and it is
+        unacceptable for this SQLCOM. Error has been reported.
+      */
+      DBUG_ASSERT(thd->is_error());
+      return TRUE;
+    }
+    /* Always maintain the latest cache version. */
+    rt->m_sp_cache_version= version;
+  }
+  return FALSE;
+}
+
+
+/**
+   Open view by getting its definition from disk (and table cache in future).
+
+   @param thd               Thread handle
+   @param table_list        TABLE_LIST with db, table_name & belong_to_view
+   @param flags             Flags which modify how we open the view
+
+   @todo This function is needed for special handling of views under
+         LOCK TABLES. We probably should get rid of it in long term.
+
+   @return FALSE if success, TRUE - otherwise.
+*/
+
+bool tdc_open_view(THD *thd, TABLE_LIST *table_list, uint flags)
+{
+  TABLE not_used;
+  TABLE_SHARE *share;
+  bool err= TRUE;
+
+  if (!(share= tdc_acquire_share(thd, table_list, GTS_VIEW)))
+    return TRUE;
+
+  DBUG_ASSERT(share->is_view);
+
+  err= mysql_make_view(thd, share, table_list, (flags & OPEN_VIEW_NO_PARSE));
+
+  if (!err && (flags & CHECK_METADATA_VERSION))
+  {
+    /*
+      Check TABLE_SHARE-version of view only if we have been instructed to do
+      so. We do not need to check the version if we're executing CREATE VIEW or
+      ALTER VIEW statements.
+
+      In the future, this functionality should be moved out from
+      tdc_open_view(), and  tdc_open_view() should became a part of a clean
+      table-definition-cache interface.
+    */
+    if (check_and_update_table_version(thd, table_list, share))
+      goto ret;
+  }
+
+ret:
+  tdc_release_share(share);
+
+  return err;
+}
+
+
+/**
+   Finalize the process of TABLE creation by loading table triggers
+   and taking action if a HEAP table content was emptied implicitly.
+*/
+
+static bool open_table_entry_fini(THD *thd, TABLE_SHARE *share, TABLE *entry)
+{
+  if (Table_triggers_list::check_n_load(thd, &share->db,
+                                        &share->table_name, entry, 0))
+    return TRUE;
+
+  /*
+    If we are here, there was no fatal error (but error may be still
+    unitialized).
+  */
+  if (unlikely(entry->file->implicit_emptied))
+  {
+    entry->file->implicit_emptied= 0;
+    if (mysql_bin_log.is_open())
+    {
+      char query_buf[2*FN_REFLEN + 21];
+      String query(query_buf, sizeof(query_buf), system_charset_info);
+
+      query.length(0);
+      query.append(STRING_WITH_LEN("DELETE FROM "));
+      append_identifier(thd, &query, &share->db);
+      query.append('.');
+      append_identifier(thd, &query, &share->table_name);
+
+      /*
+        we bypass thd->binlog_query() here,
+        as it does a lot of extra work, that is simply wrong in this case
+      */
+      Query_log_event qinfo(thd, query.ptr(), query.length(),
+                            FALSE, TRUE, TRUE, 0);
+      if (mysql_bin_log.write(&qinfo))
+        return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+/**
+   Auxiliary routine which is used for performing automatical table repair.
+*/
+
+static bool auto_repair_table(THD *thd, TABLE_LIST *table_list)
+{
+  TABLE_SHARE *share;
+  TABLE entry;
+  bool result= TRUE;
+
+  thd->clear_error();
+
+  if (!(share= tdc_acquire_share(thd, table_list, GTS_TABLE)))
+    return result;
+
+  DBUG_ASSERT(! share->is_view);
+
+  if (open_table_from_share(thd, share, &table_list->alias,
+                            HA_OPEN_KEYFILE | HA_TRY_READ_ONLY,
+                            EXTRA_RECORD,
+                            ha_open_options | HA_OPEN_FOR_REPAIR,
+                            &entry, FALSE) || ! entry.file ||
+      (entry.file->is_crashed() && entry.file->ha_check_and_repair(thd)))
+  {
+    /* Give right error message */
+    thd->clear_error();
+    my_error(ER_NOT_KEYFILE, MYF(0), share->table_name.str);
+    sql_print_error("Couldn't repair table: %s.%s", share->db.str,
+                    share->table_name.str);
+    if (entry.file)
+      closefrm(&entry);
+  }
+  else
+  {
+    thd->clear_error();			// Clear error message
+    closefrm(&entry);
+    result= FALSE;
+  }
+
+  tdc_remove_referenced_share(thd, share);
+  return result;
+}
+
+
+/** Open_table_context */
+
+Open_table_context::Open_table_context(THD *thd, uint flags)
+  :m_thd(thd),
+   m_failed_table(NULL),
+   m_start_of_statement_svp(thd->mdl_context.mdl_savepoint()),
+   m_timeout(flags & MYSQL_LOCK_IGNORE_TIMEOUT ?
+             LONG_TIMEOUT : thd->variables.lock_wait_timeout),
+   m_flags(flags),
+   m_action(OT_NO_ACTION),
+   m_has_locks(thd->mdl_context.has_locks()),
+   m_has_protection_against_grl(0),
+   vers_create_count(0)
+{}
+
+
+/**
+  Check if we can back-off and set back off action if we can.
+  Otherwise report and return error.
+
+  @retval  TRUE if back-off is impossible.
+  @retval  FALSE if we can back off. Back off action has been set.
+*/
+
+bool
+Open_table_context::
+request_backoff_action(enum_open_table_action action_arg,
+                       TABLE_LIST *table)
+{
+  /*
+    A back off action may be one of three kinds:
+
+    * We met a broken table that needs repair, or a table that
+      is not present on this MySQL server and needs re-discovery.
+      To perform the action, we need an exclusive metadata lock on
+      the table. Acquiring X lock while holding other shared
+      locks can easily lead to deadlocks. We rely on MDL deadlock
+      detector to discover them. If this is a multi-statement
+      transaction that holds metadata locks for completed statements,
+      we should keep these locks after discovery/repair.
+      The action type in this case is OT_DISCOVER or OT_REPAIR.
+    * Our attempt to acquire an MDL lock lead to a deadlock,
+      detected by the MDL deadlock detector. The current
+      session was chosen a victim. If this is a multi-statement
+      transaction that holds metadata locks taken by completed
+      statements, restarting locking for the current statement
+      may lead to a livelock. Releasing locks of completed
+      statements can not be done as will lead to violation
+      of ACID. Thus, again, if m_has_locks is set,
+      we report an error. Otherwise, when there are no metadata
+      locks other than which belong to this statement, we can
+      try to recover from error by releasing all locks and
+      restarting the pre-locking.
+      Similarly, a deadlock error can occur when the
+      pre-locking process met a TABLE_SHARE that is being
+      flushed, and unsuccessfully waited for the flush to
+      complete. A deadlock in this case can happen, e.g.,
+      when our session is holding a metadata lock that
+      is being waited on by a session which is using
+      the table which is being flushed. The only way
+      to recover from this error is, again, to close all
+      open tables, release all locks, and retry pre-locking.
+      Action type name is OT_REOPEN_TABLES. Re-trying
+      while holding some locks may lead to a livelock,
+      and thus we don't do it.
+    * Finally, this session has open TABLEs from different
+      "generations" of the table cache. This can happen, e.g.,
+      when, after this session has successfully opened one
+      table used for a statement, FLUSH TABLES interfered and
+      expelled another table used in it. FLUSH TABLES then
+      blocks and waits on the table already opened by this
+      statement.
+      We detect this situation by ensuring that table cache
+      version of all tables used in a statement is the same.
+      If it isn't, all tables needs to be reopened.
+      Note, that we can always perform a reopen in this case,
+      even if we already have metadata locks, since we don't
+      keep tables open between statements and a livelock
+      is not possible.
+  */
+  if (action_arg == OT_BACKOFF_AND_RETRY && m_has_locks)
+  {
+    my_error(ER_LOCK_DEADLOCK, MYF(0));
+    m_thd->mark_transaction_to_rollback(true);
+    return TRUE;
+  }
+  /*
+    If auto-repair or discovery are requested, a pointer to table
+    list element must be provided.
+  */
+  if (table)
+  {
+    DBUG_ASSERT(action_arg == OT_DISCOVER || action_arg == OT_REPAIR ||
+                action_arg == OT_ADD_HISTORY_PARTITION);
+    m_failed_table= (TABLE_LIST*) m_thd->alloc(sizeof(TABLE_LIST));
+    if (m_failed_table == NULL)
+      return TRUE;
+    m_failed_table->init_one_table(&table->db, &table->table_name, &table->alias, TL_WRITE);
+    m_failed_table->open_strategy= table->open_strategy;
+    m_failed_table->mdl_request.set_type(MDL_EXCLUSIVE);
+    m_failed_table->vers_skip_create= table->vers_skip_create;
+  }
+  m_action= action_arg;
+  return FALSE;
+}
+
+
+/**
+  An error handler to mark transaction to rollback on DEADLOCK error
+  during DISCOVER / REPAIR.
+*/
+class MDL_deadlock_discovery_repair_handler : public Internal_error_handler
+{
+public:
+  virtual bool handle_condition(THD *thd,
+                                  uint sql_errno,
+                                  const char* sqlstate,
+                                  Sql_condition::enum_warning_level *level,
+                                  const char* msg,
+                                  Sql_condition ** cond_hdl)
+  {
+    if (sql_errno == ER_LOCK_DEADLOCK)
+    {
+      thd->mark_transaction_to_rollback(true);
+    }
+    /*
+      We have marked this transaction to rollback. Return false to allow
+      error to be reported or handled by other handlers.
+    */
+    return false;
+  }
+};
+
+/**
+   Recover from failed attempt of open table by performing requested action.
+
+   @pre This function should be called only with "action" != OT_NO_ACTION
+        and after having called @sa close_tables_for_reopen().
+
+   @retval FALSE - Success. One should try to open tables once again.
+   @retval TRUE  - Error
+*/
+
+bool
+Open_table_context::recover_from_failed_open()
+{
+  bool result= FALSE;
+  MDL_deadlock_discovery_repair_handler handler;
+  /*
+    Install error handler to mark transaction to rollback on DEADLOCK error.
+  */
+  m_thd->push_internal_handler(&handler);
+
+  /* Execute the action. */
+  switch (m_action)
+  {
+    case OT_BACKOFF_AND_RETRY:
+    case OT_REOPEN_TABLES:
+      break;
+    case OT_DISCOVER:
+    case OT_REPAIR:
+    case OT_ADD_HISTORY_PARTITION:
+      DEBUG_SYNC(m_thd, "add_history_partition");
+      if (!m_thd->locked_tables_mode)
+        result= lock_table_names(m_thd, m_thd->lex->create_info, m_failed_table,
+                                NULL, get_timeout(), 0);
+      else
+      {
+        DBUG_ASSERT(!result);
+        DBUG_ASSERT(m_action == OT_ADD_HISTORY_PARTITION);
+      }
+      /*
+         We are now under MDL_EXCLUSIVE mode. Other threads have no table share
+         acquired: they are blocked either at open_table_get_mdl_lock() in
+         open_table() or at lock_table_names() here.
+      */
+      if (result)
+      {
+        if (m_action == OT_ADD_HISTORY_PARTITION)
+        {
+          TABLE_SHARE *share= tdc_acquire_share(m_thd, m_failed_table,
+                                                GTS_TABLE, NULL);
+          if (share)
+          {
+            share->vers_skip_auto_create= false;
+            tdc_release_share(share);
+          }
+          if (m_thd->get_stmt_da()->sql_errno() == ER_LOCK_WAIT_TIMEOUT)
+          {
+            // MDEV-23642 Locking timeout caused by auto-creation affects original DML
+            m_thd->clear_error();
+            vers_create_count= 0;
+            result= false;
+          }
+        }
+        break;
+      }
+
+      /*
+         We don't need to remove share under OT_ADD_HISTORY_PARTITION.
+         Moreover fast_alter_partition_table() works with TABLE instance.
+      */
+      if (m_action != OT_ADD_HISTORY_PARTITION)
+        tdc_remove_table(m_thd, m_failed_table->db.str,
+                        m_failed_table->table_name.str);
+
+      switch (m_action)
+      {
+        case OT_DISCOVER:
+        {
+          m_thd->get_stmt_da()->clear_warning_info(m_thd->query_id);
+          m_thd->clear_error();                 // Clear error message
+
+          No_such_table_error_handler no_such_table_handler;
+          bool open_if_exists= m_failed_table->open_strategy == TABLE_LIST::OPEN_IF_EXISTS;
+
+          if (open_if_exists)
+            m_thd->push_internal_handler(&no_such_table_handler);
+
+          result= !tdc_acquire_share(m_thd, m_failed_table,
+                                 GTS_TABLE | GTS_FORCE_DISCOVERY | GTS_NOLOCK);
+          if (open_if_exists)
+          {
+            m_thd->pop_internal_handler();
+            if (result && no_such_table_handler.safely_trapped_errors())
+              result= FALSE;
+          }
+          break;
+        }
+        case OT_REPAIR:
+          result= auto_repair_table(m_thd, m_failed_table);
+          break;
+        case OT_ADD_HISTORY_PARTITION:
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+        {
+          result= false;
+          TABLE *table= open_ltable(m_thd, m_failed_table, TL_WRITE,
+                    MYSQL_OPEN_HAS_MDL_LOCK | MYSQL_OPEN_IGNORE_LOGGING_FORMAT);
+          if (table == NULL)
+          {
+            m_thd->clear_error();
+            break;
+          }
+
+          DBUG_ASSERT(vers_create_count);
+          result= vers_create_partitions(m_thd, m_failed_table, vers_create_count);
+          vers_create_count= 0;
+          if (!m_thd->transaction->stmt.is_empty())
+            trans_commit_stmt(m_thd);
+          DBUG_ASSERT(!result ||
+                      !m_thd->locked_tables_mode ||
+                      m_thd->lock->lock_count);
+          if (result)
+            break;
+          if (!m_thd->locked_tables_mode)
+          {
+            /*
+              alter_partition_lock_handling() does mysql_lock_remove() but
+              does not clear thd->lock completely.
+            */
+            DBUG_ASSERT(m_thd->lock->lock_count == 0);
+            if (!(m_thd->lock->flags & GET_LOCK_ON_THD))
+              my_free(m_thd->lock);
+            m_thd->lock= NULL;
+          }
+          else if (m_thd->locked_tables_mode == LTM_PRELOCKED)
+          {
+            MYSQL_LOCK *lock;
+            MYSQL_LOCK *merged_lock;
+
+            /*
+              In LTM_LOCK_TABLES table was reopened via locked_tables_list,
+              but not in prelocked environment where we have to reopen
+              the table manually.
+            */
+            Open_table_context ot_ctx(m_thd, MYSQL_OPEN_REOPEN);
+            if (open_table(m_thd, m_failed_table, &ot_ctx))
+            {
+              result= true;
+              break;
+            }
+            TABLE *table= m_failed_table->table;
+            table->reginfo.lock_type= m_thd->update_lock_default;
+            m_thd->in_lock_tables= 1;
+            lock= mysql_lock_tables(m_thd, &table, 1,
+                                    MYSQL_OPEN_REOPEN | MYSQL_LOCK_USE_MALLOC);
+            m_thd->in_lock_tables= 0;
+            if (lock == NULL ||
+                !(merged_lock= mysql_lock_merge(m_thd->lock, lock, m_thd)))
+            {
+              result= true;
+              break;
+            }
+            m_thd->lock= merged_lock;
+          }
+          break;
+        }
+#endif /* WITH_PARTITION_STORAGE_ENGINE */
+        case OT_BACKOFF_AND_RETRY:
+        case OT_REOPEN_TABLES:
+        case OT_NO_ACTION:
+          DBUG_ASSERT(0);
+      }
+      /*
+        Rollback to start of the current statement to release exclusive lock
+        on table which was discovered but preserve locks from previous statements
+        in current transaction.
+      */
+      m_thd->mdl_context.rollback_to_savepoint(start_of_statement_svp());
+      break;
+    case OT_NO_ACTION:
+      DBUG_ASSERT(0);
+  }
+  m_thd->pop_internal_handler();
+  /*
+    Reset the pointers to conflicting MDL request and the
+    TABLE_LIST element, set when we need auto-discovery or repair,
+    for safety.
+  */
+  m_failed_table= NULL;
+  /*
+    Reset flag indicating that we have already acquired protection
+    against GRL. It is no longer valid as the corresponding lock was
+    released by close_tables_for_reopen().
+  */
+  m_has_protection_against_grl= 0;
+  /* Prepare for possible another back-off. */
+  m_action= OT_NO_ACTION;
+  return result;
+}
+
+
+/*
+  Return a appropriate read lock type given a table object.
+
+  @param thd              Thread context
+  @param prelocking_ctx   Prelocking context.
+  @param table_list       Table list element for table to be locked.
+  @param routine_modifies_data
+                          Some routine that is invoked by statement
+                          modifies data.
+
+  @remark Due to a statement-based replication limitation, statements such as
+          INSERT INTO .. SELECT FROM .. and CREATE TABLE .. SELECT FROM need
+          to grab a TL_READ_NO_INSERT lock on the source table in order to
+          prevent the replication of a concurrent statement that modifies the
+          source table. If such a statement gets applied on the slave before
+          the INSERT .. SELECT statement finishes, data on the master could
+          differ from data on the slave and end-up with a discrepancy between
+          the binary log and table state.
+          This also applies to SELECT/SET/DO statements which use stored
+          functions. Calls to such functions are going to be logged as a
+          whole and thus should be serialized against concurrent changes
+          to tables used by those functions. This is avoided when functions
+          do not modify data but only read it, since in this case nothing is
+          written to the binary log. Argument routine_modifies_data
+          denotes the same. So effectively, if the statement is not a
+          update query and routine_modifies_data is false, then
+          prelocking_placeholder does not take importance.
+
+          Furthermore, this does not apply to I_S and log tables as it's
+          always unsafe to replicate such tables under statement-based
+          replication as the table on the slave might contain other data
+          (ie: general_log is enabled on the slave). The statement will
+          be marked as unsafe for SBR in decide_logging_format().
+  @remark Note that even in prelocked mode it is important to correctly
+          determine lock type value. In this mode lock type is passed to
+          handler::start_stmt() method and can be used by storage engine,
+          for example, to determine what kind of row locks it should acquire
+          when reading data from the table.
+*/
+
+thr_lock_type read_lock_type_for_table(THD *thd,
+                                       Query_tables_list *prelocking_ctx,
+                                       TABLE_LIST *table_list,
+                                       bool routine_modifies_data)
+{
+  /*
+    In cases when this function is called for a sub-statement executed in
+    prelocked mode we can't rely on OPTION_BIN_LOG flag in THD::options
+    bitmap to determine that binary logging is turned on as this bit can
+    be cleared before executing sub-statement. So instead we have to look
+    at THD::variables::sql_log_bin member.
+  */
+  bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin;
+  if ((log_on == FALSE) ||
+      (thd->wsrep_binlog_format(thd->variables.binlog_format) == BINLOG_FORMAT_ROW) ||
+      (table_list->table->s->table_category == TABLE_CATEGORY_LOG) ||
+      (table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) ||
+      !(is_update_query(prelocking_ctx->sql_command) ||
+        (routine_modifies_data && table_list->prelocking_placeholder) ||
+        (thd->locked_tables_mode > LTM_LOCK_TABLES)))
+    return TL_READ;
+  else
+    return TL_READ_NO_INSERT;
+}
+
+
+/*
+  Extend the prelocking set with tables and routines used by a routine.
+
+  @param[in]  thd                   Thread context.
+  @param[in]  rt                    Element of prelocking set to be processed.
+  @param[in]  ot_ctx                Context of open_table used to recover from
+                                    locking failures.
+  @retval false  Success.
+  @retval true   Failure (Conflicting metadata lock, OOM, other errors).
+*/
+static bool
+sp_acquire_mdl(THD *thd, Sroutine_hash_entry *rt, Open_table_context *ot_ctx)
+{
+  DBUG_ENTER("sp_acquire_mdl");
+  /*
+    Since we acquire only shared lock on routines we don't
+    need to care about global intention exclusive locks.
+  */
+  DBUG_ASSERT(rt->mdl_request.type == MDL_SHARED);
+
+  /*
+    Waiting for a conflicting metadata lock to go away may
+    lead to a deadlock, detected by MDL subsystem.
+    If possible, we try to resolve such deadlocks by releasing all
+    metadata locks and restarting the pre-locking process.
+    To prevent the error from polluting the diagnostics area
+    in case of successful resolution, install a special error
+    handler for ER_LOCK_DEADLOCK error.
+  */
+  MDL_deadlock_handler mdl_deadlock_handler(ot_ctx);
+
+  thd->push_internal_handler(&mdl_deadlock_handler);
+  bool result= thd->mdl_context.acquire_lock(&rt->mdl_request,
+                                             ot_ctx->get_timeout());
+  thd->pop_internal_handler();
+
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Handle element of prelocking set other than table. E.g. cache routine
+  and, if prelocking strategy prescribes so, extend the prelocking set
+  with tables and routines used by it.
+
+  @param[in]  thd                   Thread context.
+  @param[in]  prelocking_ctx        Prelocking context.
+  @param[in]  rt                    Element of prelocking set to be processed.
+  @param[in]  prelocking_strategy   Strategy which specifies how the
+                                    prelocking set should be extended when
+                                    one of its elements is processed.
+  @param[in]  has_prelocking_list   Indicates that prelocking set/list for
+                                    this statement has already been built.
+  @param[in]  ot_ctx                Context of open_table used to recover from
+                                    locking failures.
+  @param[out] need_prelocking       Set to TRUE if it was detected that this
+                                    statement will require prelocked mode for
+                                    its execution, not touched otherwise.
+  @param[out] routine_modifies_data Set to TRUE if it was detected that this
+                                    routine does modify table data.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (Conflicting metadata lock, OOM, other errors).
+*/
+
+static bool
+open_and_process_routine(THD *thd, Query_tables_list *prelocking_ctx,
+                         Sroutine_hash_entry *rt,
+                         Prelocking_strategy *prelocking_strategy,
+                         bool has_prelocking_list,
+                         Open_table_context *ot_ctx,
+                         bool *need_prelocking, bool *routine_modifies_data)
+{
+  MDL_key::enum_mdl_namespace mdl_type= rt->mdl_request.key.mdl_namespace();
+  DBUG_ENTER("open_and_process_routine");
+
+  *routine_modifies_data= false;
+
+  switch (mdl_type)
+  {
+  case MDL_key::PACKAGE_BODY:
+    DBUG_ASSERT(rt != (Sroutine_hash_entry*)prelocking_ctx->sroutines_list.first);
+    /*
+      No need to cache the package body itself.
+      It gets cached during open_and_process_routine()
+      for the first used package routine. See the package related code
+      in the "case" below.
+    */
+    if (sp_acquire_mdl(thd, rt, ot_ctx))
+      DBUG_RETURN(TRUE);
+    break;
+  case MDL_key::FUNCTION:
+  case MDL_key::PROCEDURE:
+    {
+      sp_head *sp;
+      /*
+        Try to get MDL lock on the routine.
+        Note that we do not take locks on top-level CALLs as this can
+        lead to a deadlock. Not locking top-level CALLs does not break
+        the binlog as only the statements in the called procedure show
+        up there, not the CALL itself.
+      */
+      if (rt != (Sroutine_hash_entry*)prelocking_ctx->sroutines_list.first ||
+          mdl_type != MDL_key::PROCEDURE)
+      {
+        /*
+          TODO: If this is a package routine, we should not put MDL
+          TODO: on the routine itself. We should put only the package MDL.
+        */
+        if (sp_acquire_mdl(thd, rt, ot_ctx))
+          DBUG_RETURN(TRUE);
+
+        /* Ensures the routine is up-to-date and cached, if exists. */
+        if (rt->sp_cache_routine(thd, has_prelocking_list, &sp))
+          DBUG_RETURN(TRUE);
+
+        /* Remember the version of the routine in the parse tree. */
+        if (check_and_update_routine_version(thd, rt, sp))
+          DBUG_RETURN(TRUE);
+
+        /* 'sp' is NULL when there is no such routine. */
+        if (sp)
+        {
+          *routine_modifies_data= sp->modifies_data();
+
+          if (!has_prelocking_list)
+          {
+            prelocking_strategy->handle_routine(thd, prelocking_ctx, rt, sp,
+                                                need_prelocking);
+            if (sp->m_parent)
+            {
+              /*
+                If it's a package routine, we need also to handle the
+                package body, as its initialization section can use
+                some tables and routine calls.
+                TODO: Only package public routines actually need this.
+                TODO: Skip package body handling for private routines.
+              */
+              *routine_modifies_data|= sp->m_parent->modifies_data();
+              prelocking_strategy->handle_routine(thd, prelocking_ctx, rt,
+                                                  sp->m_parent,
+                                                  need_prelocking);
+            }
+          }
+        }
+      }
+      else
+      {
+        /*
+          If it's a top level call, just make sure we have a recent
+          version of the routine, if it exists.
+          Validating routine version is unnecessary, since CALL
+          does not affect the prepared statement prelocked list.
+        */
+        if (rt->sp_cache_routine(thd, false, &sp))
+          DBUG_RETURN(TRUE);
+      }
+    }
+    break;
+  case MDL_key::TRIGGER:
+    /**
+      We add trigger entries to lex->sroutines_list, but we don't
+      load them here. The trigger entry is only used when building
+      a transitive closure of objects used in a statement, to avoid
+      adding to this closure objects that are used in the trigger more
+      than once.
+      E.g. if a trigger trg refers to table t2, and the trigger table t1
+      is used multiple times in the statement (say, because it's used in
+      function f1() twice), we will only add t2 once to the list of
+      tables to prelock.
+
+      We don't take metadata locks on triggers either: they are protected
+      by a respective lock on the table, on which the trigger is defined.
+
+      The only two cases which give "trouble" are SHOW CREATE TRIGGER
+      and DROP TRIGGER statements. For these, statement syntax doesn't
+      specify the table on which this trigger is defined, so we have
+      to make a "dirty" read in the data dictionary to find out the
+      table name. Once we discover the table name, we take a metadata
+      lock on it, and this protects all trigger operations.
+      Of course the table, in theory, may disappear between the dirty
+      read and metadata lock acquisition, but in that case we just return
+      a run-time error.
+
+      Grammar of other trigger DDL statements (CREATE, DROP) requires
+      the table to be specified explicitly, so we use the table metadata
+      lock to protect trigger metadata in these statements. Similarly, in
+      DML we always use triggers together with their tables, and thus don't
+      need to take separate metadata locks on them.
+    */
+    break;
+  default:
+    /* Impossible type value. */
+    DBUG_ASSERT(0);
+  }
+  DBUG_RETURN(FALSE);
+}
+
+/*
+  If we are not already in prelocked mode and extended table list is not
+  yet built we might have to build the prelocking set for this statement.
+
+  Since currently no prelocking strategy prescribes doing anything for
+  tables which are only read, we do below checks only if table is going
+  to be changed.
+*/
+bool extend_table_list(THD *thd, TABLE_LIST *tables,
+                       Prelocking_strategy *prelocking_strategy,
+                       bool has_prelocking_list)
+{
+  bool error= false;
+  LEX *lex= thd->lex;
+  bool maybe_need_prelocking=
+    (tables->updating && tables->lock_type >= TL_FIRST_WRITE)
+    || thd->lex->default_used;
+
+  if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
+      ! has_prelocking_list && maybe_need_prelocking)
+  {
+    bool need_prelocking= FALSE;
+    TABLE_LIST **save_query_tables_last= lex->query_tables_last;
+    /*
+      Extend statement's table list and the prelocking set with
+      tables and routines according to the current prelocking
+      strategy.
+
+      For example, for DML statements we need to add tables and routines
+      used by triggers which are going to be invoked for this element of
+      table list and also add tables required for handling of foreign keys.
+    */
+    error= prelocking_strategy->handle_table(thd, lex, tables,
+                                             &need_prelocking);
+
+    if (need_prelocking && ! lex->requires_prelocking())
+      lex->mark_as_requiring_prelocking(save_query_tables_last);
+  }
+  return error;
+}
+
+
+/**
+  Handle table list element by obtaining metadata lock, opening table or view
+  and, if prelocking strategy prescribes so, extending the prelocking set with
+  tables and routines used by it.
+
+  @param[in]     thd                  Thread context.
+  @param[in]     lex                  LEX structure for statement.
+  @param[in]     tables               Table list element to be processed.
+  @param[in,out] counter              Number of tables which are open.
+  @param[in]     flags                Bitmap of flags to modify how the tables
+                                      will be open, see open_table() description
+                                      for details.
+  @param[in]     prelocking_strategy  Strategy which specifies how the
+                                      prelocking set should be extended
+                                      when table or view is processed.
+  @param[in]     has_prelocking_list  Indicates that prelocking set/list for
+                                      this statement has already been built.
+  @param[in]     ot_ctx               Context used to recover from a failed
+                                      open_table() attempt.
+
+  @retval  FALSE  Success.
+  @retval  TRUE   Error, reported unless there is a chance to recover from it.
+*/
+
+static bool
+open_and_process_table(THD *thd, TABLE_LIST *tables, uint *counter, uint flags,
+                       Prelocking_strategy *prelocking_strategy,
+                       bool has_prelocking_list, Open_table_context *ot_ctx)
+{
+  bool error= FALSE;
+  bool safe_to_ignore_table= FALSE;
+  LEX *lex= thd->lex;
+  DBUG_ENTER("open_and_process_table");
+  DEBUG_SYNC(thd, "open_and_process_table");
+
+  /*
+    Ignore placeholders for derived tables. After derived tables
+    processing, link to created temporary table will be put here.
+    If this is derived table for view then we still want to process
+    routines used by this view.
+  */
+  if (tables->derived)
+  {
+    if (!tables->view)
+    {
+      if (!tables->is_derived())
+        tables->set_derived();
+      goto end;
+    }
+    /*
+      We restore view's name and database wiped out by derived tables
+      processing and fall back to standard open process in order to
+      obtain proper metadata locks and do other necessary steps like
+      stored routine processing.
+    */
+    tables->db= tables->view_db;
+    tables->table_name= tables->view_name;
+  }
+
+  if (!tables->derived && is_infoschema_db(&tables->db))
+  {
+    /*
+      Check whether the information schema contains a table
+      whose name is tables->schema_table_name
+    */
+    ST_SCHEMA_TABLE *schema_table= tables->schema_table;
+    if (!schema_table ||
+        (schema_table->hidden &&
+         ((sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) == 0 ||
+          /*
+            this check is used for show columns|keys from I_S hidden table
+          */
+          lex->sql_command == SQLCOM_SHOW_FIELDS ||
+          lex->sql_command == SQLCOM_SHOW_KEYS)))
+    {
+      my_error(ER_UNKNOWN_TABLE, MYF(0),
+               tables->table_name.str, INFORMATION_SCHEMA_NAME.str);
+      DBUG_RETURN(1);
+    }
+  }
+  /*
+    If this TABLE_LIST object is a placeholder for an information_schema
+    table, create a temporary table to represent the information_schema
+    table in the query. Do not fill it yet - will be filled during
+    execution.
+  */
+  if (tables->schema_table)
+  {
+    /*
+      If this information_schema table is merged into a mergeable
+      view, ignore it for now -- it will be filled when its respective
+      TABLE_LIST is processed. This code works only during re-execution.
+    */
+    if (tables->view)
+    {
+      MDL_ticket *mdl_ticket;
+      /*
+        We still need to take a MDL lock on the merged view to protect
+        it from concurrent changes.
+      */
+      if (!open_table_get_mdl_lock(thd, ot_ctx, &tables->mdl_request,
+                                   flags, &mdl_ticket) &&
+          mdl_ticket != NULL)
+        goto process_view_routines;
+      /* Fall-through to return error. */
+    }
+    else if (!mysql_schema_table(thd, lex, tables) &&
+             !check_and_update_table_version(thd, tables, tables->table->s))
+    {
+      goto end;
+    }
+    error= TRUE;
+    goto end;
+  }
+
+  if (tables->table_function)
+  {
+    if (!create_table_for_function(thd, tables))
+      error= TRUE;
+    goto end;
+  }
+
+  DBUG_PRINT("tcache", ("opening table: '%s'.'%s'  item: %p",
+                        tables->db.str, tables->table_name.str, tables));
+  (*counter)++;
+
+  /*
+    Not a placeholder: must be a base/temporary table or a view. Let us open it.
+  */
+  if (tables->table)
+  {
+    /*
+      If this TABLE_LIST object has an associated open TABLE object
+      (TABLE_LIST::table is not NULL), that TABLE object must be a pre-opened
+      temporary table or SEQUENCE (see sequence_insert()).
+    */
+    DBUG_ASSERT(is_temporary_table(tables) || tables->table->s->sequence);
+    if (tables->sequence &&
+        tables->table->s->table_type != TABLE_TYPE_SEQUENCE)
+    {
+      my_error(ER_NOT_SEQUENCE, MYF(0), tables->db.str, tables->alias.str);
+      DBUG_RETURN(true);
+    }
+  }
+  else if (tables->open_type == OT_TEMPORARY_ONLY)
+  {
+    /*
+      OT_TEMPORARY_ONLY means that we are in CREATE TEMPORARY TABLE statement.
+      Also such table list element can't correspond to prelocking placeholder
+      or to underlying table of merge table.
+      So existing temporary table should have been preopened by this moment
+      and we can simply continue without trying to open temporary or base
+      table.
+    */
+    DBUG_ASSERT(tables->open_strategy);
+    DBUG_ASSERT(!tables->prelocking_placeholder);
+    DBUG_ASSERT(!tables->parent_l);
+    DBUG_RETURN(0);
+  }
+
+  /* Not a placeholder: must be a base table or a view. Let us open it. */
+  if (tables->prelocking_placeholder)
+  {
+    /*
+      For the tables added by the pre-locking code, attempt to open
+      the table but fail silently if the table does not exist.
+      The real failure will occur when/if a statement attempts to use
+      that table.
+    */
+    No_such_table_error_handler no_such_table_handler;
+    thd->push_internal_handler(&no_such_table_handler);
+
+    /*
+      We're opening a table from the prelocking list.
+
+      Since this table list element might have been added after pre-opening
+      of temporary tables we have to try to open temporary table for it.
+
+      We can't simply skip this table list element and postpone opening of
+      temporary table till the execution of substatement for several reasons:
+      - Temporary table can be a MERGE table with base underlying tables,
+        so its underlying tables has to be properly open and locked at
+        prelocking stage.
+      - Temporary table can be a MERGE table and we might be in PREPARE
+        phase for a prepared statement. In this case it is important to call
+        HA_ATTACH_CHILDREN for all merge children.
+        This is necessary because merge children remember "TABLE_SHARE ref type"
+        and "TABLE_SHARE def version" in the HA_ATTACH_CHILDREN operation.
+        If HA_ATTACH_CHILDREN is not called, these attributes are not set.
+        Then, during the first EXECUTE, those attributes need to be updated.
+        That would cause statement re-preparing (because changing those
+        attributes during EXECUTE is caught by THD::m_reprepare_observers).
+        The problem is that since those attributes are not set in merge
+        children, another round of PREPARE will not help.
+    */
+    if (!thd->has_temporary_tables() ||
+        (!(error= thd->open_temporary_table(tables)) &&
+         !tables->table))
+      error= open_table(thd, tables, ot_ctx);
+
+    thd->pop_internal_handler();
+    safe_to_ignore_table= no_such_table_handler.safely_trapped_errors();
+  }
+  else if (tables->parent_l && (thd->open_options & HA_OPEN_FOR_REPAIR))
+  {
+    /*
+      Also fail silently for underlying tables of a MERGE table if this
+      table is opened for CHECK/REPAIR TABLE statement. This is needed
+      to provide complete list of problematic underlying tables in
+      CHECK/REPAIR TABLE output.
+    */
+    Repair_mrg_table_error_handler repair_mrg_table_handler;
+    thd->push_internal_handler(&repair_mrg_table_handler);
+
+    if (!thd->has_temporary_tables() ||
+        (!(error= thd->open_temporary_table(tables)) &&
+         !tables->table))
+      error= open_table(thd, tables, ot_ctx);
+
+    thd->pop_internal_handler();
+    safe_to_ignore_table= repair_mrg_table_handler.safely_trapped_errors();
+  }
+  else
+  {
+    if (tables->parent_l)
+    {
+      /*
+        Even if we are opening table not from the prelocking list we
+        still might need to look for a temporary table if this table
+        list element corresponds to underlying table of a merge table.
+      */
+      if (thd->has_temporary_tables())
+        error= thd->open_temporary_table(tables);
+    }
+
+    if (!error && !tables->table)
+      error= open_table(thd, tables, ot_ctx);
+  }
+
+  if (unlikely(error))
+  {
+    if (! ot_ctx->can_recover_from_failed_open() && safe_to_ignore_table)
+    {
+      DBUG_PRINT("info", ("open_table: ignoring table '%s'.'%s'",
+                          tables->db.str, tables->alias.str));
+      error= FALSE;
+    }
+    goto end;
+  }
+
+  /*
+    We can't rely on simple check for TABLE_LIST::view to determine
+    that this is a view since during re-execution we might reopen
+    ordinary table in place of view and thus have TABLE_LIST::view
+    set from repvious execution and TABLE_LIST::table set from
+    current.
+  */
+  if (!tables->table && tables->view)
+  {
+    /* VIEW placeholder */
+    (*counter)--;
+
+    /*
+      tables->next_global list consists of two parts:
+      1) Query tables and underlying tables of views.
+      2) Tables used by all stored routines that this statement invokes on
+         execution.
+      We need to know where the bound between these two parts is. If we've
+      just opened a view, which was the last table in part #1, and it
+      has added its base tables after itself, adjust the boundary pointer
+      accordingly.
+    */
+    if (lex->query_tables_own_last == &(tables->next_global) &&
+        tables->view->query_tables)
+      lex->query_tables_own_last= tables->view->query_tables_last;
+    /*
+      Let us free memory used by 'sroutines' hash here since we never
+      call destructor for this LEX.
+    */
+    my_hash_free(&tables->view->sroutines);
+    goto process_view_routines;
+  }
+
+  /*
+    Special types of open can succeed but still don't set
+    TABLE_LIST::table to anything.
+  */
+  if (tables->open_strategy && !tables->table)
+    goto end;
+
+  /* Check and update metadata version of a base table. */
+  error= check_and_update_table_version(thd, tables, tables->table->s);
+
+  if (unlikely(error))
+    goto end;
+
+  error= extend_table_list(thd, tables, prelocking_strategy, has_prelocking_list);
+  if (unlikely(error))
+    goto end;
+
+  /* Copy grant information from TABLE_LIST instance to TABLE one. */
+  tables->table->grant= tables->grant;
+
+  /*
+    After opening a MERGE table add the children to the query list of
+    tables, so that they are opened too.
+    Note that placeholders don't have the handler open.
+  */
+  /* MERGE tables need to access parent and child TABLE_LISTs. */
+  DBUG_ASSERT(tables->table->pos_in_table_list == tables);
+  /* Non-MERGE tables ignore this call. */
+  if (tables->table->file->extra(HA_EXTRA_ADD_CHILDREN_LIST))
+  {
+    error= TRUE;
+    goto end;
+  }
+
+process_view_routines:
+  /*
+    Again we may need cache all routines used by this view and add
+    tables used by them to table list.
+  */
+  if (tables->view &&
+      thd->locked_tables_mode <= LTM_LOCK_TABLES &&
+      ! has_prelocking_list)
+  {
+    bool need_prelocking= FALSE;
+    TABLE_LIST **save_query_tables_last= lex->query_tables_last;
+
+    error= prelocking_strategy->handle_view(thd, lex, tables,
+                                            &need_prelocking);
+
+    if (need_prelocking && ! lex->requires_prelocking())
+      lex->mark_as_requiring_prelocking(save_query_tables_last);
+
+    if (unlikely(error))
+      goto end;
+  }
+
+end:
+  DBUG_RETURN(error);
+}
+
+
+static bool upgrade_lock_if_not_exists(THD *thd,
+                                       const DDL_options_st &create_info,
+                                       TABLE_LIST *create_table,
+                                       ulong lock_wait_timeout)
+{
+  DBUG_ENTER("upgrade_lock_if_not_exists");
+
+  if (thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
+      thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE)
+  {
+    DEBUG_SYNC(thd,"create_table_before_check_if_exists");
+    if (!create_info.or_replace() &&
+        ha_table_exists(thd, &create_table->db, &create_table->table_name,
+                        NULL, NULL, &create_table->db_type))
+    {
+      if (create_info.if_not_exists())
+      {
+        push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+                            ER_TABLE_EXISTS_ERROR,
+                            ER_THD(thd, ER_TABLE_EXISTS_ERROR),
+                            create_table->table_name.str);
+      }
+      else
+        my_error(ER_TABLE_EXISTS_ERROR, MYF(0), create_table->table_name.str);
+      DBUG_RETURN(true);
+    }
+    DBUG_RETURN(thd->mdl_context.upgrade_shared_lock(
+                                   create_table->mdl_request.ticket,
+                                   MDL_EXCLUSIVE,
+                                   lock_wait_timeout));
+  }
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Acquire upgradable (SNW, SNRW) metadata locks on tables used by
+  LOCK TABLES or by a DDL statement. Under LOCK TABLES, we can't take
+  new locks, so use open_tables_check_upgradable_mdl() instead.
+
+  @param thd               Thread context.
+  @param options           DDL options.
+  @param tables_start      Start of list of tables on which upgradable locks
+                           should be acquired.
+  @param tables_end        End of list of tables.
+  @param lock_wait_timeout Seconds to wait before timeout.
+  @param flags             Bitmap of flags to modify how the tables will be
+                           open, see open_table() description for details.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (e.g. connection was killed) or table existed
+	         for a CREATE TABLE.
+
+  @notes
+  In case of CREATE TABLE we avoid a wait for tables that are in use
+  by first trying to do a meta data lock with timeout == 0.  If we get a
+  timeout we will check if table exists (it should) and retry with
+  normal timeout if it didn't exists.
+  Note that for CREATE TABLE IF EXISTS we only generate a warning
+  but still return TRUE (to abort the calling open_table() function).
+  On must check THD->is_error() if one wants to distinguish between warning
+  and error.  If table existed, tables_start->db_type is set to the handlerton
+  for the found table.
+*/
+
+bool
+lock_table_names(THD *thd, const DDL_options_st &options,
+                 TABLE_LIST *tables_start, TABLE_LIST *tables_end,
+                 ulong lock_wait_timeout, uint flags)
+{
+  MDL_request_list mdl_requests;
+  TABLE_LIST *table;
+  MDL_request global_request;
+  MDL_savepoint mdl_savepoint;
+  DBUG_ENTER("lock_table_names");
+
+  DBUG_ASSERT(!thd->locked_tables_mode);
+
+  for (table= tables_start; table && table != tables_end;
+       table= table->next_global)
+  {
+    DBUG_PRINT("info", ("mdl_request.type: %d  open_type: %d",
+                        table->mdl_request.type, table->open_type));
+    if (table->mdl_request.type < MDL_SHARED_UPGRADABLE ||
+        table->mdl_request.type == MDL_SHARED_READ_ONLY ||
+        table->open_type == OT_TEMPORARY_ONLY ||
+        (table->open_type == OT_TEMPORARY_OR_BASE && is_temporary_table(table)))
+    {
+      continue;
+    }
+
+    /* Write lock on normal tables is not allowed in a read only transaction. */
+    if (thd->tx_read_only)
+    {
+      my_error(ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION, MYF(0));
+      DBUG_RETURN(true);
+    }
+
+    /* Scoped locks: Take intention exclusive locks on all involved schemas. */
+    if (!(flags & MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK))
+    {
+      MDL_request *schema_request= new (thd->mem_root) MDL_request;
+      if (schema_request == NULL)
+        DBUG_RETURN(TRUE);
+      MDL_REQUEST_INIT(schema_request, MDL_key::SCHEMA, table->db.str, "",
+                       MDL_INTENTION_EXCLUSIVE, MDL_TRANSACTION);
+      mdl_requests.push_front(schema_request);
+    }
+
+    mdl_requests.push_front(&table->mdl_request);
+  }
+
+  if (mdl_requests.is_empty())
+    DBUG_RETURN(FALSE);
+
+  if (flags & MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK)
+  {
+    DBUG_RETURN(thd->mdl_context.acquire_locks(&mdl_requests,
+                                               lock_wait_timeout) ||
+                upgrade_lock_if_not_exists(thd, options, tables_start,
+                                           lock_wait_timeout));
+  }
+
+  /* Protect this statement against concurrent BACKUP STAGE or FTWRL. */
+  if (thd->has_read_only_protection())
+    DBUG_RETURN(true);
+
+  MDL_REQUEST_INIT(&global_request, MDL_key::BACKUP, "", "", MDL_BACKUP_DDL,
+                   MDL_STATEMENT);
+  mdl_savepoint= thd->mdl_context.mdl_savepoint();
+
+  while (!thd->mdl_context.acquire_locks(&mdl_requests, lock_wait_timeout) &&
+         !upgrade_lock_if_not_exists(thd, options, tables_start,
+                                     lock_wait_timeout) &&
+         !thd->mdl_context.try_acquire_lock(&global_request))
+  {
+    if (global_request.ticket)
+    {
+      thd->mdl_backup_ticket= global_request.ticket;
+      DBUG_RETURN(false);
+    }
+
+    /*
+      There is ongoing or pending BACKUP STAGE or FTWRL.
+      Wait until it finishes and re-try.
+    */
+    thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+    if (thd->mdl_context.acquire_lock(&global_request, lock_wait_timeout))
+      break;
+    thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+
+    /* Reset tickets for all acquired locks */
+    global_request.ticket= 0;
+    MDL_request_list::Iterator it(mdl_requests);
+    while (auto mdl_request= it++)
+      mdl_request->ticket= 0;
+  }
+  DBUG_RETURN(true);
+}
+
+
+/**
+  Check for upgradable (SNW, SNRW) metadata locks on tables to be opened
+  for a DDL statement. Under LOCK TABLES, we can't take new locks, so we
+  must check if appropriate locks were pre-acquired.
+
+  @param thd           Thread context.
+  @param tables_start  Start of list of tables on which upgradable locks
+                       should be searched for.
+  @param tables_end    End of list of tables.
+  @param flags         Bitmap of flags to modify how the tables will be
+                       open, see open_table() description for details.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (e.g. connection was killed)
+*/
+
+static bool
+open_tables_check_upgradable_mdl(THD *thd, TABLE_LIST *tables_start,
+                                 TABLE_LIST *tables_end, uint flags)
+{
+  TABLE_LIST *table;
+
+  DBUG_ASSERT(thd->locked_tables_mode);
+
+  for (table= tables_start; table && table != tables_end;
+       table= table->next_global)
+  {
+    /*
+      Check below needs to be updated if this function starts
+      called for SRO locks.
+    */
+    DBUG_ASSERT(table->mdl_request.type != MDL_SHARED_READ_ONLY);
+    if (table->mdl_request.type < MDL_SHARED_UPGRADABLE ||
+        table->open_type == OT_TEMPORARY_ONLY ||
+        (table->open_type == OT_TEMPORARY_OR_BASE && is_temporary_table(table)))
+    {
+      continue;
+    }
+
+    /*
+      We don't need to do anything about the found TABLE instance as it
+      will be handled later in open_tables(), we only need to check that
+      an upgradable lock is already acquired. When we enter LOCK TABLES
+      mode, SNRW locks are acquired before all other locks. So if under
+      LOCK TABLES we find that there is TABLE instance with upgradeable
+      lock, all other instances of TABLE for the same table will have the
+      same ticket.
+
+      Note that this works OK even for CREATE TABLE statements which
+      request X type of metadata lock. This is because under LOCK TABLES
+      such statements don't create the table but only check if it exists
+      or, in most complex case, only insert into it.
+      Thus SNRW lock should be enough.
+
+      Note that find_table_for_mdl_upgrade() will report an error if
+      no suitable ticket is found.
+    */
+    if (!find_table_for_mdl_upgrade(thd, table->db.str, table->table_name.str,
+                                    NULL))
+      return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+/**
+  Open all tables in list
+
+  @param[in]     thd      Thread context.
+  @param[in]     options  DDL options.
+  @param[in,out] start    List of tables to be open (it can be adjusted for
+                          statement that uses tables only implicitly, e.g.
+                          for "SELECT f1()").
+  @param[out]    counter  Number of tables which were open.
+  @param[in]     flags    Bitmap of flags to modify how the tables will be
+                          open, see open_table() description for details.
+  @param[in]     prelocking_strategy  Strategy which specifies how prelocking
+                                      algorithm should work for this statement.
+
+  @note
+    Unless we are already in prelocked mode and prelocking strategy prescribes
+    so this function will also precache all SP/SFs explicitly or implicitly
+    (via views and triggers) used by the query and add tables needed for their
+    execution to table list. Statement that uses SFs, invokes triggers or
+    requires foreign key checks will be marked as requiring prelocking.
+    Prelocked mode will be enabled for such query during lock_tables() call.
+
+    If query for which we are opening tables is already marked as requiring
+    prelocking it won't do such precaching and will simply reuse table list
+    which is already built.
+
+  @retval  FALSE  Success.
+  @retval  TRUE   Error, reported.
+*/
+
+bool open_tables(THD *thd, const DDL_options_st &options,
+                 TABLE_LIST **start, uint *counter, uint flags,
+                 Prelocking_strategy *prelocking_strategy)
+{
+  /*
+    We use pointers to "next_global" member in the last processed
+    TABLE_LIST element and to the "next" member in the last processed
+    Sroutine_hash_entry element as iterators over, correspondingly,
+    the table list and stored routines list which stay valid and allow
+    to continue iteration when new elements are added to the tail of
+    the lists.
+  */
+  TABLE_LIST **table_to_open;
+  Sroutine_hash_entry **sroutine_to_open;
+  TABLE_LIST *tables;
+  Open_table_context ot_ctx(thd, flags);
+  bool error= FALSE;
+  bool some_routine_modifies_data= FALSE;
+  bool has_prelocking_list;
+  DBUG_ENTER("open_tables");
+
+  /* Data access in XA transaction is only allowed when it is active. */
+  for (TABLE_LIST *table= *start; table; table= table->next_global)
+    if (!table->schema_table)
+    {
+      if (thd->transaction->xid_state.check_has_uncommitted_xa())
+      {
+	thd->transaction->xid_state.er_xaer_rmfail();
+        DBUG_RETURN(true);
+      }
+      else
+        break;
+    }
+
+  thd->current_tablenr= 0;
+
+restart:
+  /*
+    Close HANDLER tables which are marked for flush or against which there
+    are pending exclusive metadata locks. This is needed both in order to
+    avoid deadlocks and to have a point during statement execution at
+    which such HANDLERs are closed even if they don't create problems for
+    the current session (i.e. to avoid having a DDL blocked by HANDLERs
+    opened for a long time).
+  */
+  if (thd->handler_tables_hash.records)
+    mysql_ha_flush(thd);
+
+  has_prelocking_list= thd->lex->requires_prelocking();
+  table_to_open= start;
+  sroutine_to_open= &thd->lex->sroutines_list.first;
+  *counter= 0;
+  THD_STAGE_INFO(thd, stage_opening_tables);
+  prelocking_strategy->reset(thd);
+
+  /*
+    If we are executing LOCK TABLES statement or a DDL statement
+    (in non-LOCK TABLES mode) we might have to acquire upgradable
+    semi-exclusive metadata locks (SNW or SNRW) on some of the
+    tables to be opened.
+    When executing CREATE TABLE .. If NOT EXISTS .. SELECT, the
+    table may not yet exist, in which case we acquire an exclusive
+    lock.
+    We acquire all such locks at once here as doing this in one
+    by one fashion may lead to deadlocks or starvation. Later when
+    we will be opening corresponding table pre-acquired metadata
+    lock will be reused (thanks to the fact that in recursive case
+    metadata locks are acquired without waiting).
+  */
+  if (! (flags & (MYSQL_OPEN_HAS_MDL_LOCK |
+                  MYSQL_OPEN_FORCE_SHARED_MDL |
+                  MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL)))
+  {
+    if (thd->locked_tables_mode)
+    {
+      /*
+        Under LOCK TABLES, we can't acquire new locks, so we instead
+        need to check if appropriate locks were pre-acquired.
+      */
+      if (open_tables_check_upgradable_mdl(thd, *start,
+                                           thd->lex->first_not_own_table(),
+                                           flags))
+      {
+        error= TRUE;
+        goto error;
+      }
+    }
+    else
+    {
+      TABLE_LIST *table;
+      if (lock_table_names(thd, options, *start,
+                           thd->lex->first_not_own_table(),
+                           ot_ctx.get_timeout(), flags))
+      {
+        error= TRUE;
+        goto error;
+      }
+      for (table= *start; table && table != thd->lex->first_not_own_table();
+           table= table->next_global)
+      {
+        if (table->mdl_request.type >= MDL_SHARED_UPGRADABLE)
+          table->mdl_request.ticket= NULL;
+      }
+    }
+  }
+
+  /*
+    Perform steps of prelocking algorithm until there are unprocessed
+    elements in prelocking list/set.
+  */
+  while (*table_to_open  ||
+         (thd->locked_tables_mode <= LTM_LOCK_TABLES && *sroutine_to_open))
+  {
+    /*
+      For every table in the list of tables to open, try to find or open
+      a table.
+
+      NOTE: there can be duplicates in the list. F.ex. table specified in
+      LOCK TABLES and prelocked via another table (like when used in a trigger).
+    */
+    for (tables= *table_to_open; tables;
+         table_to_open= &tables->next_global, tables= tables->next_global)
+    {
+      error= open_and_process_table(thd, tables, counter, flags,
+                                    prelocking_strategy, has_prelocking_list,
+                                    &ot_ctx);
+
+      if (unlikely(error))
+      {
+        if (ot_ctx.can_recover_from_failed_open())
+        {
+          /*
+            We have met exclusive metadata lock or old version of table.
+            Now we have to close all tables and release metadata locks.
+            We also have to throw away set of prelocked tables (and thus
+            close tables from this set that were open by now) since it
+            is possible that one of tables which determined its content
+            was changed.
+
+            Instead of implementing complex/non-robust logic mentioned
+            above we simply close and then reopen all tables.
+
+            We have to save pointer to table list element for table which we
+            have failed to open since closing tables can trigger removal of
+            elements from the table list (if MERGE tables are involved),
+          */
+          close_tables_for_reopen(thd, start, ot_ctx.start_of_statement_svp());
+
+          /*
+            Here we rely on the fact that 'tables' still points to the valid
+            TABLE_LIST element. Altough currently this assumption is valid
+            it may change in future.
+          */
+          if (ot_ctx.recover_from_failed_open())
+            goto error;
+
+          /* Re-open temporary tables after close_tables_for_reopen(). */
+          if (thd->open_temporary_tables(*start))
+            goto error;
+
+          error= FALSE;
+          goto restart;
+        }
+        goto error;
+      }
+
+      DEBUG_SYNC(thd, "open_tables_after_open_and_process_table");
+    }
+
+    /*
+      If we are not already in prelocked mode and extended table list is
+      not yet built for our statement we need to cache routines it uses
+      and build the prelocking list for it.
+      If we are not in prelocked mode but have built the extended table
+      list, we still need to call open_and_process_routine() to take
+      MDL locks on the routines.
+    */
+    if (thd->locked_tables_mode <= LTM_LOCK_TABLES && *sroutine_to_open)
+    {
+      /*
+        Process elements of the prelocking set which are present there
+        since parsing stage or were added to it by invocations of
+        Prelocking_strategy methods in the above loop over tables.
+
+        For example, if element is a routine, cache it and then,
+        if prelocking strategy prescribes so, add tables it uses to the
+        table list and routines it might invoke to the prelocking set.
+      */
+      for (Sroutine_hash_entry *rt= *sroutine_to_open; rt;
+           sroutine_to_open= &rt->next, rt= rt->next)
+      {
+        bool need_prelocking= false;
+        bool routine_modifies_data;
+        TABLE_LIST **save_query_tables_last= thd->lex->query_tables_last;
+
+        error= open_and_process_routine(thd, thd->lex, rt, prelocking_strategy,
+                                        has_prelocking_list, &ot_ctx,
+                                        &need_prelocking,
+                                        &routine_modifies_data);
+
+        // Remember if any of SF modifies data.
+        some_routine_modifies_data|= routine_modifies_data;
+
+        if (need_prelocking && ! thd->lex->requires_prelocking())
+          thd->lex->mark_as_requiring_prelocking(save_query_tables_last);
+
+        if (need_prelocking && ! *start)
+          *start= thd->lex->query_tables;
+
+        if (unlikely(error))
+        {
+          /* F.ex. deadlock happened */
+          if (ot_ctx.can_recover_from_failed_open())
+          {
+            DBUG_ASSERT(ot_ctx.get_action() !=
+                        Open_table_context::OT_ADD_HISTORY_PARTITION);
+            close_tables_for_reopen(thd, start,
+                                    ot_ctx.start_of_statement_svp());
+            if (ot_ctx.recover_from_failed_open())
+              goto error;
+
+            /* Re-open temporary tables after close_tables_for_reopen(). */
+            if (thd->open_temporary_tables(*start))
+              goto error;
+
+            error= FALSE;
+            goto restart;
+          }
+          /*
+            Serious error during reading stored routines from mysql.proc table.
+            Something is wrong with the table or its contents, and an error has
+            been emitted; we must abort.
+          */
+          goto error;
+        }
+      }
+    }
+    if ((error= prelocking_strategy->handle_end(thd)))
+      goto error;
+  }
+
+  /*
+    After successful open of all tables, including MERGE parents and
+    children, attach the children to their parents. At end of statement,
+    the children are detached. Attaching and detaching are always done,
+    even under LOCK TABLES.
+
+    We also convert all TL_WRITE_DEFAULT and TL_READ_DEFAULT locks to
+    appropriate "real" lock types to be used for locking and to be passed
+    to storage engine.
+
+    And start wsrep TOI if needed.
+  */
+  for (tables= *start; tables; tables= tables->next_global)
+  {
+    TABLE *tbl= tables->table;
+
+    if (!tbl)
+      continue;
+
+    /* Schema tables may not have a TABLE object here. */
+    if (tbl->file->ha_table_flags() & HA_CAN_MULTISTEP_MERGE)
+    {
+      /* MERGE tables need to access parent and child TABLE_LISTs. */
+      DBUG_ASSERT(tbl->pos_in_table_list == tables);
+      if (tbl->file->extra(HA_EXTRA_ATTACH_CHILDREN))
+      {
+        error= TRUE;
+        goto error;
+      }
+    }
+
+    /* Set appropriate TABLE::lock_type. */
+    if (tbl && tables->lock_type != TL_UNLOCK && !thd->locked_tables_mode)
+    {
+      if (tables->lock_type == TL_WRITE_DEFAULT ||
+          unlikely(tables->lock_type == TL_WRITE_SKIP_LOCKED &&
+           !(tables->table->file->ha_table_flags() & HA_CAN_SKIP_LOCKED)))
+          tbl->reginfo.lock_type= thd->update_lock_default;
+      else if (likely(tables->lock_type == TL_READ_DEFAULT) ||
+               (tables->lock_type == TL_READ_SKIP_LOCKED &&
+                !(tables->table->file->ha_table_flags() & HA_CAN_SKIP_LOCKED)))
+          tbl->reginfo.lock_type= read_lock_type_for_table(thd, thd->lex, tables,
+                                                           some_routine_modifies_data);
+      else
+        tbl->reginfo.lock_type= tables->lock_type;
+      tbl->reginfo.skip_locked= tables->skip_locked;
+    }
+#ifdef WITH_WSREP
+    /*
+       At this point we have SE associated with table so we can check wsrep_mode
+       rules at this point.
+    */
+    if (WSREP(thd) &&
+        wsrep_thd_is_local(thd) &&
+        tbl &&
+        tables == *start &&
+        !wsrep_check_mode_after_open_table(thd,
+                                           tbl->file->ht, tables))
+    {
+      error= TRUE;
+      goto error;
+    }
+
+    /* If user has issued wsrep_on = OFF and wsrep was on before
+    we need to check is local gtid feature disabled */
+    if (thd->wsrep_was_on &&
+	thd->variables.sql_log_bin == 1 &&
+	!WSREP(thd) &&
+        wsrep_check_mode(WSREP_MODE_DISALLOW_LOCAL_GTID))
+    {
+      enum_sql_command sql_command= thd->lex->sql_command;
+      bool is_dml_stmt= thd->get_command() != COM_STMT_PREPARE &&
+                    !thd->stmt_arena->is_stmt_prepare()        &&
+                    (sql_command == SQLCOM_INSERT ||
+                     sql_command == SQLCOM_INSERT_SELECT ||
+                     sql_command == SQLCOM_REPLACE ||
+                     sql_command == SQLCOM_REPLACE_SELECT ||
+                     sql_command == SQLCOM_UPDATE ||
+                     sql_command == SQLCOM_UPDATE_MULTI ||
+                     sql_command == SQLCOM_LOAD ||
+                     sql_command == SQLCOM_DELETE);
+
+      if (is_dml_stmt && !is_temporary_table(tables))
+      {
+        /* wsrep_mode = WSREP_MODE_DISALLOW_LOCAL_GTID, treat as error */
+        my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0));
+        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                            ER_OPTION_PREVENTS_STATEMENT,
+                            "You can't execute statements that would generate local "
+                            "GTIDs when wsrep_mode = DISALLOW_LOCAL_GTID is set. "
+                            "Try disabling binary logging with SET sql_log_bin=0 "
+                            "to execute this statement.");
+
+        error= TRUE;
+        goto error;
+      }
+    }
+#endif /* WITH_WSREP */
+  }
+
+error:
+  THD_STAGE_INFO(thd, stage_after_opening_tables);
+  thd_proc_info(thd, 0);
+
+  if (unlikely(error) && *table_to_open)
+  {
+    (*table_to_open)->table= NULL;
+  }
+  DBUG_PRINT("open_tables", ("returning: %d", (int) error));
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Defines how prelocking algorithm for DML statements should handle routines:
+  - For CALL statements we do unrolling (i.e. open and lock tables for each
+    sub-statement individually). So for such statements prelocking is enabled
+    only if stored functions are used in parameter list and only for period
+    during which we calculate values of parameters. Thus in this strategy we
+    ignore procedure which is directly called by such statement and extend
+    the prelocking set only with tables/functions used by SF called from the
+    parameter list.
+  - For any other statement any routine which is directly or indirectly called
+    by statement is going to be executed in prelocked mode. So in this case we
+    simply add all tables and routines used by it to the prelocking set.
+
+  @param[in]  thd              Thread context.
+  @param[in]  prelocking_ctx   Prelocking context of the statement.
+  @param[in]  rt               Prelocking set element describing routine.
+  @param[in]  sp               Routine body.
+  @param[out] need_prelocking  Set to TRUE if method detects that prelocking
+                               required, not changed otherwise.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (OOM).
+*/
+
+bool DML_prelocking_strategy::handle_routine(THD *thd,
+               Query_tables_list *prelocking_ctx, Sroutine_hash_entry *rt,
+               sp_head *sp, bool *need_prelocking)
+{
+  /*
+    We assume that for any "CALL proc(...)" statement sroutines_list will
+    have 'proc' as first element (it may have several, consider e.g.
+    "proc(sp_func(...)))". This property is currently guaranted by the
+    parser.
+  */
+
+  if (rt != (Sroutine_hash_entry*)prelocking_ctx->sroutines_list.first ||
+      rt->mdl_request.key.mdl_namespace() != MDL_key::PROCEDURE)
+  {
+    *need_prelocking= TRUE;
+    sp_update_stmt_used_routines(thd, prelocking_ctx, &sp->m_sroutines,
+                                 rt->belong_to_view);
+    (void)sp->add_used_tables_to_table_list(thd,
+                                            &prelocking_ctx->query_tables_last,
+                                            rt->belong_to_view);
+  }
+  sp->propagate_attributes(prelocking_ctx);
+  return FALSE;
+}
+
+
+/*
+  @note this can be changed to use a hash, instead of scanning the linked
+  list, if the performance of this function will ever become an issue
+*/
+bool table_already_fk_prelocked(TABLE_LIST *tl, LEX_CSTRING *db,
+                                LEX_CSTRING *table, thr_lock_type lock_type)
+{
+  for (; tl; tl= tl->next_global )
+  {
+    if (tl->lock_type >= lock_type &&
+        tl->prelocking_placeholder == TABLE_LIST::PRELOCK_FK &&
+        strcmp(tl->db.str, db->str) == 0 &&
+        strcmp(tl->table_name.str, table->str) == 0)
+      return true;
+  }
+  return false;
+}
+
+
+static TABLE_LIST *internal_table_exists(TABLE_LIST *global_list,
+                                         const char *table_name)
+{
+  do
+  {
+    if (global_list->table_name.str == table_name)
+      return global_list;
+  } while ((global_list= global_list->next_global));
+  return 0;
+}
+
+
+static bool
+add_internal_tables(THD *thd, Query_tables_list *prelocking_ctx,
+                    TABLE_LIST *tables)
+{
+  TABLE_LIST *global_table_list= prelocking_ctx->query_tables;
+  DBUG_ENTER("add_internal_tables");
+
+  do
+  {
+    TABLE_LIST *tmp __attribute__((unused));
+    DBUG_PRINT("info", ("table name: %s", tables->table_name.str));
+    /*
+      Skip table if already in the list. Can happen with prepared statements
+    */
+    if ((tmp= internal_table_exists(global_table_list,
+                                    tables->table_name.str)))
+    {
+      /*
+        Use the original value for the next local, used by the
+        original prepared statement. We cannot trust the original
+        next_local value as it may have been changed by a previous
+        statement using the same table.
+      */
+      tables->next_local= tmp;
+      continue;
+    }
+
+    TABLE_LIST *tl= (TABLE_LIST *) thd->alloc(sizeof(TABLE_LIST));
+    if (!tl)
+      DBUG_RETURN(TRUE);
+    tl->init_one_table_for_prelocking(&tables->db,
+                                      &tables->table_name,
+                                      NULL, tables->lock_type,
+                                      TABLE_LIST::PRELOCK_NONE,
+                                      0, 0,
+                                      &prelocking_ctx->query_tables_last,
+                                      tables->for_insert_data);
+    /*
+      Store link to the new table_list that will be used by open so that
+      Item_func_nextval() can find it
+    */
+    tables->next_local= tl;
+    DBUG_PRINT("info", ("table name: %s added", tables->table_name.str));
+  } while ((tables= tables->next_global));
+  DBUG_RETURN(FALSE);
+}
+
+/**
+  Extend the table_list to include foreign tables for prelocking.
+
+  @param[in]  thd              Thread context.
+  @param[in]  prelocking_ctx   Prelocking context of the statement.
+  @param[in]  table_list       Table list element for table.
+  @param[in]  sp               Routine body.
+  @param[out] need_prelocking  Set to TRUE if method detects that prelocking
+                               required, not changed otherwise.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (OOM).
+*/
+inline bool
+prepare_fk_prelocking_list(THD *thd, Query_tables_list *prelocking_ctx,
+                           TABLE_LIST *table_list, bool *need_prelocking,
+                           uint8 op)
+{
+  DBUG_ENTER("prepare_fk_prelocking_list");
+  List  fk_list;
+  List_iterator fk_list_it(fk_list);
+  FOREIGN_KEY_INFO *fk;
+  Query_arena *arena, backup;
+  TABLE *table= table_list->table;
+
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+
+  table->file->get_parent_foreign_key_list(thd, &fk_list);
+  if (unlikely(thd->is_error()))
+  {
+    if (arena)
+      thd->restore_active_arena(arena, &backup);
+    return TRUE;
+  }
+
+  *need_prelocking= TRUE;
+
+  while ((fk= fk_list_it++))
+  {
+    // FK_OPTION_RESTRICT and FK_OPTION_NO_ACTION only need read access
+    thr_lock_type lock_type;
+
+    if ((op & trg2bit(TRG_EVENT_DELETE) && fk_modifies_child(fk->delete_method))
+     || (op & trg2bit(TRG_EVENT_UPDATE) && fk_modifies_child(fk->update_method)))
+      lock_type= TL_FIRST_WRITE;
+    else
+      lock_type= TL_READ;
+
+    if (table_already_fk_prelocked(prelocking_ctx->query_tables,
+          fk->foreign_db, fk->foreign_table,
+          lock_type))
+      continue;
+
+    TABLE_LIST *tl= (TABLE_LIST *) thd->alloc(sizeof(TABLE_LIST));
+    tl->init_one_table_for_prelocking(fk->foreign_db,
+        fk->foreign_table,
+        NULL, lock_type,
+        TABLE_LIST::PRELOCK_FK,
+        table_list->belong_to_view, op,
+        &prelocking_ctx->query_tables_last,
+        table_list->for_insert_data);
+  }
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(FALSE);
+}
+
+/**
+  Defines how prelocking algorithm for DML statements should handle table list
+  elements:
+  - If table has triggers we should add all tables and routines
+    used by them to the prelocking set.
+
+  We do not need to acquire metadata locks on trigger names
+  in DML statements, since all DDL statements
+  that change trigger metadata always lock their
+  subject tables.
+
+  @param[in]  thd              Thread context.
+  @param[in]  prelocking_ctx   Prelocking context of the statement.
+  @param[in]  table_list       Table list element for table.
+  @param[in]  sp               Routine body.
+  @param[out] need_prelocking  Set to TRUE if method detects that prelocking
+                               required, not changed otherwise.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (OOM).
+*/
+
+bool DML_prelocking_strategy::handle_table(THD *thd,
+             Query_tables_list *prelocking_ctx, TABLE_LIST *table_list,
+             bool *need_prelocking)
+{
+  DBUG_ENTER("handle_table");
+  TABLE *table= table_list->table;
+  /* We rely on a caller to check that table is going to be changed. */
+  DBUG_ASSERT(table_list->lock_type >= TL_FIRST_WRITE ||
+              thd->lex->default_used);
+
+  if (table_list->trg_event_map)
+  {
+    if (table->triggers)
+    {
+      *need_prelocking= TRUE;
+
+      if (table->triggers->
+          add_tables_and_routines_for_triggers(thd, prelocking_ctx, table_list))
+        return TRUE;
+    }
+
+    if (table->file->referenced_by_foreign_key())
+    {
+      if (prepare_fk_prelocking_list(thd, prelocking_ctx, table_list,
+                                     need_prelocking,
+                                     table_list->trg_event_map))
+        return TRUE;
+    }
+  }
+  else if (table_list->slave_fk_event_map &&
+           table->file->referenced_by_foreign_key())
+  {
+    if (prepare_fk_prelocking_list(thd, prelocking_ctx, table_list,
+                                   need_prelocking,
+                                   table_list->slave_fk_event_map))
+      return TRUE;
+  }
+
+  /* Open any tables used by DEFAULT (like sequence tables) */
+  DBUG_PRINT("info", ("table: %p  name: %s  db: %s  flags: %u",
+                      table_list, table_list->table_name.str,
+                      table_list->db.str, table_list->for_insert_data));
+  if (table->internal_tables &&
+      (table_list->for_insert_data ||
+       thd->lex->default_used))
+  {
+    Query_arena *arena, backup;
+    bool error;
+    arena= thd->activate_stmt_arena_if_needed(&backup);
+    error= add_internal_tables(thd, prelocking_ctx,
+                               table->internal_tables);
+    if (arena)
+      thd->restore_active_arena(arena, &backup);
+    if (unlikely(error))
+    {
+      *need_prelocking= TRUE;
+      DBUG_RETURN(TRUE);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Open all tables used by DEFAULT functions.
+
+  This is different from normal open_and_lock_tables() as we may
+  already have other tables opened and locked and we have to merge the
+  new table with the old ones.
+*/
+
+bool open_and_lock_internal_tables(TABLE *table, bool lock_table)
+{
+  THD *thd= table->in_use;
+  TABLE_LIST *tl;
+  MYSQL_LOCK *save_lock,*new_lock;
+  DBUG_ENTER("open_and_lock_internal_tables");
+
+  /* remove pointer to old select_lex which is already destroyed */
+  for (tl= table->internal_tables ; tl ; tl= tl->next_global)
+    tl->select_lex= 0;
+
+  uint counter;
+  MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  TABLE_LIST *tmp= table->internal_tables;
+  DML_prelocking_strategy prelocking_strategy;
+
+  if (open_tables(thd, thd->lex->create_info, &tmp, &counter, 0,
+                  &prelocking_strategy))
+    goto err;
+
+  if (lock_table)
+  {
+    save_lock= thd->lock;
+    thd->lock= 0;
+    if (lock_tables(thd, table->internal_tables, counter,
+                    MYSQL_LOCK_USE_MALLOC))
+      goto err;
+
+    if (!(new_lock= mysql_lock_merge(save_lock, thd->lock)))
+    {
+      thd->lock= save_lock;
+      mysql_unlock_tables(thd, save_lock, 1);
+      /* We don't have to close tables as caller will do that */
+      goto err;
+    }
+    thd->lock= new_lock;
+  }
+  DBUG_RETURN(0);
+
+err:
+  thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+  DBUG_RETURN(1);
+}
+
+
+/**
+  Defines how prelocking algorithm for DML statements should handle view -
+  all view routines should be added to the prelocking set.
+
+  @param[in]  thd              Thread context.
+  @param[in]  prelocking_ctx   Prelocking context of the statement.
+  @param[in]  table_list       Table list element for view.
+  @param[in]  sp               Routine body.
+  @param[out] need_prelocking  Set to TRUE if method detects that prelocking
+                               required, not changed otherwise.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (OOM).
+*/
+
+bool DML_prelocking_strategy::handle_view(THD *thd,
+            Query_tables_list *prelocking_ctx, TABLE_LIST *table_list,
+            bool *need_prelocking)
+{
+  if (table_list->view->uses_stored_routines())
+  {
+    *need_prelocking= TRUE;
+
+    sp_update_stmt_used_routines(thd, prelocking_ctx,
+                                 &table_list->view->sroutines_list,
+                                 table_list->top_table());
+  }
+
+  /*
+    If a trigger was defined on one of the associated tables then assign the
+    'trg_event_map' value of the view to the next table in table_list. When a
+    Stored function is invoked, all the associated tables including the tables
+    associated with the trigger are prelocked.
+  */
+  if (table_list->trg_event_map && table_list->next_global)
+    table_list->next_global->trg_event_map= table_list->trg_event_map;
+  return FALSE;
+}
+
+
+/**
+  Defines how prelocking algorithm for LOCK TABLES statement should handle
+  table list elements.
+
+  @param[in]  thd              Thread context.
+  @param[in]  prelocking_ctx   Prelocking context of the statement.
+  @param[in]  table_list       Table list element for table.
+  @param[in]  sp               Routine body.
+  @param[out] need_prelocking  Set to TRUE if method detects that prelocking
+                               required, not changed otherwise.
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (OOM).
+*/
+
+bool Lock_tables_prelocking_strategy::handle_table(THD *thd,
+             Query_tables_list *prelocking_ctx, TABLE_LIST *table_list,
+             bool *need_prelocking)
+{
+  TABLE_LIST **last= prelocking_ctx->query_tables_last;
+
+  if (DML_prelocking_strategy::handle_table(thd, prelocking_ctx, table_list,
+                                            need_prelocking))
+    return TRUE;
+
+  /*
+    normally we don't need to open FK-prelocked tables for RESTRICT,
+    MDL is enough. But under LOCK TABLES we have to open everything
+  */
+  for (TABLE_LIST *tl= *last; tl; tl= tl->next_global)
+    tl->open_strategy= TABLE_LIST::OPEN_NORMAL;
+
+  /* We rely on a caller to check that table is going to be changed. */
+  DBUG_ASSERT(table_list->lock_type >= TL_FIRST_WRITE);
+
+  return FALSE;
+}
+
+
+/**
+  Defines how prelocking algorithm for ALTER TABLE statement should handle
+  routines - do nothing as this statement is not supposed to call routines.
+
+  We still can end up in this method when someone tries
+  to define a foreign key referencing a view, and not just
+  a simple view, but one that uses stored routines.
+*/
+
+bool Alter_table_prelocking_strategy::handle_routine(THD *thd,
+               Query_tables_list *prelocking_ctx, Sroutine_hash_entry *rt,
+               sp_head *sp, bool *need_prelocking)
+{
+  return FALSE;
+}
+
+
+/**
+  Defines how prelocking algorithm for ALTER TABLE statement should handle
+  table list elements.
+
+  Unlike in DML, we do not process triggers here.
+
+  @param[in]  thd              Thread context.
+  @param[in]  prelocking_ctx   Prelocking context of the statement.
+  @param[in]  table_list       Table list element for table.
+  @param[in]  sp               Routine body.
+  @param[out] need_prelocking  Set to TRUE if method detects that prelocking
+                               required, not changed otherwise.
+
+
+  @retval FALSE  Success.
+  @retval TRUE   Failure (OOM).
+*/
+
+bool Alter_table_prelocking_strategy::handle_table(THD *thd,
+             Query_tables_list *prelocking_ctx, TABLE_LIST *table_list,
+             bool *need_prelocking)
+{
+  return FALSE;
+}
+
+
+/**
+  Defines how prelocking algorithm for ALTER TABLE statement
+  should handle view - do nothing. We don't need to add view
+  routines to the prelocking set in this case as view is not going
+  to be materialized.
+*/
+
+bool Alter_table_prelocking_strategy::handle_view(THD *thd,
+            Query_tables_list *prelocking_ctx, TABLE_LIST *table_list,
+            bool *need_prelocking)
+{
+  return FALSE;
+}
+
+
+/**
+  Check that lock is ok for tables; Call start stmt if ok
+
+  @param thd             Thread handle.
+  @param prelocking_ctx  Prelocking context.
+  @param table_list      Table list element for table to be checked.
+
+  @retval FALSE - Ok.
+  @retval TRUE  - Error.
+*/
+
+static bool check_lock_and_start_stmt(THD *thd,
+                                      Query_tables_list *prelocking_ctx,
+                                      TABLE_LIST *table_list)
+{
+  int error;
+  thr_lock_type lock_type;
+  DBUG_ENTER("check_lock_and_start_stmt");
+
+  /*
+    Prelocking placeholder is not set for TABLE_LIST that
+    are directly used by TOP level statement.
+  */
+  DBUG_ASSERT(table_list->prelocking_placeholder == TABLE_LIST::PRELOCK_NONE);
+
+  /*
+    TL_WRITE_DEFAULT and TL_READ_DEFAULT are supposed to be parser only
+    types of locks so they should be converted to appropriate other types
+    to be passed to storage engine. The exact lock type passed to the
+    engine is important as, for example, InnoDB uses it to determine
+    what kind of row locks should be acquired when executing statement
+    in prelocked mode or under LOCK TABLES with @@innodb_table_locks = 0.
+
+    Last argument routine_modifies_data for read_lock_type_for_table()
+    is ignored, as prelocking placeholder will never be set here.
+  */
+  if (table_list->lock_type == TL_WRITE_DEFAULT)
+    lock_type= thd->update_lock_default;
+  else if (table_list->lock_type == TL_READ_DEFAULT)
+    lock_type= read_lock_type_for_table(thd, prelocking_ctx, table_list, true);
+  else
+    lock_type= table_list->lock_type;
+
+  if ((int) lock_type >= (int) TL_FIRST_WRITE &&
+      (int) table_list->table->reginfo.lock_type < (int) TL_FIRST_WRITE)
+  {
+    my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0),
+             table_list->table->alias.c_ptr());
+    DBUG_RETURN(1);
+  }
+  if (unlikely((error= table_list->table->file->start_stmt(thd, lock_type))))
+  {
+    table_list->table->file->print_error(error, MYF(0));
+    DBUG_RETURN(1);
+  }
+
+  /*
+    Record in transaction state tracking
+  */
+  TRANSACT_TRACKER(add_trx_state(thd, lock_type,
+                                 table_list->table->file->has_transactions()));
+
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief Open and lock one table
+
+  @param[in]    thd             thread handle
+  @param[in]    table_l         table to open is first table in this list
+  @param[in]    lock_type       lock to use for table
+  @param[in]    flags           options to be used while opening and locking
+                                table (see open_table(), mysql_lock_tables())
+  @param[in]    prelocking_strategy  Strategy which specifies how prelocking
+                                     algorithm should work for this statement.
+
+  @return       table
+    @retval     != NULL         OK, opened table returned
+    @retval     NULL            Error
+
+  @note
+    If ok, the following are also set:
+      table_list->lock_type 	lock_type
+      table_list->table		table
+
+  @note
+    If table_l is a list, not a single table, the list is temporarily
+    broken.
+
+  @detail
+    This function is meant as a replacement for open_ltable() when
+    MERGE tables can be opened. open_ltable() cannot open MERGE tables.
+
+    There may be more differences between open_n_lock_single_table() and
+    open_ltable(). One known difference is that open_ltable() does
+    neither call thd->decide_logging_format() nor handle some other logging
+    and locking issues because it does not call lock_tables().
+*/
+
+TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l,
+                                thr_lock_type lock_type, uint flags,
+                                Prelocking_strategy *prelocking_strategy)
+{
+  TABLE_LIST *save_next_global;
+  DBUG_ENTER("open_n_lock_single_table");
+
+  /* Remember old 'next' pointer. */
+  save_next_global= table_l->next_global;
+  /* Break list. */
+  table_l->next_global= NULL;
+
+  /* Set requested lock type. */
+  table_l->lock_type= lock_type;
+  /* Allow to open real tables only. */
+  table_l->required_type= TABLE_TYPE_NORMAL;
+
+  /* Open the table. */
+  if (open_and_lock_tables(thd, table_l, FALSE, flags,
+                           prelocking_strategy))
+    table_l->table= NULL; /* Just to be sure. */
+
+  /* Restore list. */
+  table_l->next_global= save_next_global;
+
+  DBUG_RETURN(table_l->table);
+}
+
+
+/*
+  Open and lock one table
+
+  SYNOPSIS
+    open_ltable()
+    thd			Thread handler
+    table_list		Table to open is first table in this list
+    lock_type		Lock to use for open
+    lock_flags          Flags passed to mysql_lock_table
+
+  NOTE
+    This function doesn't do anything like SP/SF/views/triggers analysis done 
+    in open_table()/lock_tables(). It is intended for opening of only one
+    concrete table. And used only in special contexts.
+
+  RETURN VALUES
+    table		Opened table
+    0			Error
+  
+    If ok, the following are also set:
+      table_list->lock_type 	lock_type
+      table_list->table		table
+*/
+
+TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type,
+                   uint lock_flags)
+{
+  TABLE *table;
+  Open_table_context ot_ctx(thd, lock_flags);
+  bool error;
+  DBUG_ENTER("open_ltable");
+
+  /* Ignore temporary tables as they have already been opened. */
+  if (table_list->table)
+    DBUG_RETURN(table_list->table);
+
+  THD_STAGE_INFO(thd, stage_opening_tables);
+  thd->current_tablenr= 0;
+  /* open_ltable can be used only for BASIC TABLEs */
+  table_list->required_type= TABLE_TYPE_NORMAL;
+
+  /* This function can't properly handle requests for such metadata locks. */
+  DBUG_ASSERT(lock_flags & MYSQL_OPEN_HAS_MDL_LOCK  ||
+              table_list->mdl_request.type < MDL_SHARED_UPGRADABLE);
+
+  while ((error= open_table(thd, table_list, &ot_ctx)) &&
+         ot_ctx.can_recover_from_failed_open())
+  {
+    /*
+      Even though we have failed to open table we still need to
+      call release_transactional_locks() to release metadata locks which
+      might have been acquired successfully.
+    */
+    thd->mdl_context.rollback_to_savepoint(ot_ctx.start_of_statement_svp());
+    table_list->mdl_request.ticket= 0;
+    if (ot_ctx.recover_from_failed_open())
+      break;
+  }
+
+  if (likely(!error))
+  {
+    /*
+      We can't have a view or some special "open_strategy" in this function
+      so there should be a TABLE instance.
+    */
+    DBUG_ASSERT(table_list->table);
+    table= table_list->table;
+    if (table->file->ha_table_flags() & HA_CAN_MULTISTEP_MERGE)
+    {
+      /* A MERGE table must not come here. */
+      /* purecov: begin tested */
+      my_error(ER_WRONG_OBJECT, MYF(0), table->s->db.str,
+               table->s->table_name.str, "BASE TABLE");
+      table= 0;
+      goto end;
+      /* purecov: end */
+    }
+
+    table_list->lock_type= lock_type;
+    table->grant= table_list->grant;
+    if (thd->locked_tables_mode)
+    {
+      if (check_lock_and_start_stmt(thd, thd->lex, table_list))
+	table= 0;
+    }
+    else
+    {
+      DBUG_ASSERT(thd->lock == 0);	// You must lock everything at once
+      if ((table->reginfo.lock_type= lock_type) != TL_UNLOCK)
+	if (! (thd->lock= mysql_lock_tables(thd, &table_list->table, 1,
+                                            lock_flags)))
+        {
+          table= 0;
+        }
+    }
+  }
+  else
+    table= 0;
+
+end:
+  if (table == NULL)
+  {
+    if (!thd->in_sub_stmt)
+      trans_rollback_stmt(thd);
+    close_thread_tables(thd);
+  }
+  THD_STAGE_INFO(thd, stage_after_opening_tables);
+
+  thd_proc_info(thd, 0);
+  DBUG_RETURN(table);
+}
+
+
+/**
+  Open all tables in list, locks them and optionally process derived tables.
+
+  @param thd		      Thread context.
+  @param options              DDL options.
+  @param tables	              List of tables for open and locking.
+  @param derived              Whether to handle derived tables.
+  @param flags                Bitmap of options to be used to open and lock
+                              tables (see open_tables() and mysql_lock_tables()
+                              for details).
+  @param prelocking_strategy  Strategy which specifies how prelocking algorithm
+                              should work for this statement.
+
+  @note
+    The thr_lock locks will automatically be freed by
+    close_thread_tables().
+
+  @retval FALSE  OK.
+  @retval TRUE   Error
+*/
+
+bool open_and_lock_tables(THD *thd, const DDL_options_st &options,
+                          TABLE_LIST *tables,
+                          bool derived, uint flags,
+                          Prelocking_strategy *prelocking_strategy)
+{
+  uint counter;
+  MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  DBUG_ENTER("open_and_lock_tables");
+  DBUG_PRINT("enter", ("derived handling: %d", derived));
+
+  if (open_tables(thd, options, &tables, &counter, flags, prelocking_strategy))
+    goto err;
+
+  DBUG_EXECUTE_IF("sleep_open_and_lock_after_open", {
+                  const char *old_proc_info= thd->proc_info;
+                  thd->proc_info= "DBUG sleep";
+                  my_sleep(6000000);
+                  thd->proc_info= old_proc_info;});
+
+  if (lock_tables(thd, tables, counter, flags))
+    goto err;
+
+  /* Don't read statistics tables when opening internal tables */
+  if (!(flags & (MYSQL_OPEN_IGNORE_LOGGING_FORMAT |
+                 MYSQL_OPEN_IGNORE_ENGINE_STATS)))
+    (void) read_statistics_for_tables_if_needed(thd, tables);
+  
+  if (derived)
+  {
+    if (mysql_handle_derived(thd->lex, DT_INIT))
+      goto err;
+    if (thd->prepare_derived_at_open &&
+        (mysql_handle_derived(thd->lex, DT_PREPARE)))
+      goto err;
+  }
+
+  DBUG_RETURN(FALSE);
+err:
+  if (! thd->in_sub_stmt)
+    trans_rollback_stmt(thd);  /* Necessary if derived handling failed. */
+  close_thread_tables(thd);
+  /* Don't keep locks for a failed statement. */
+  thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Open all tables in list and process derived tables
+
+  SYNOPSIS
+    open_normal_and_derived_tables
+    thd		- thread handler
+    tables	- list of tables for open
+    flags       - bitmap of flags to modify how the tables will be open:
+                  MYSQL_LOCK_IGNORE_FLUSH - open table even if someone has
+                  done a flush on it.
+    dt_phases   - set of flags to pass to the mysql_handle_derived
+
+  RETURN
+    FALSE - ok
+    TRUE  - error
+
+  NOTE 
+    This is to be used on prepare stage when you don't read any
+    data from the tables.
+*/
+
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+                                    uint dt_phases)
+{
+  DML_prelocking_strategy prelocking_strategy;
+  uint counter;
+  MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  DBUG_ENTER("open_normal_and_derived_tables");
+  if (open_tables(thd, &tables, &counter, flags, &prelocking_strategy) ||
+      mysql_handle_derived(thd->lex, dt_phases))
+    goto end;
+
+  DBUG_RETURN(0);
+end:
+  /*
+    No need to commit/rollback the statement transaction: it's
+    either not started or we're filling in an INFORMATION_SCHEMA
+    table on the fly, and thus mustn't manipulate with the
+    transaction of the enclosing statement.
+  */
+  DBUG_ASSERT(thd->transaction->stmt.is_empty() ||
+              (thd->state_flags & Open_tables_state::BACKUPS_AVAIL));
+  close_thread_tables(thd);
+  /* Don't keep locks for a failed statement. */
+  thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
+
+  DBUG_RETURN(TRUE); /* purecov: inspected */
+}
+
+
+/**
+  Open a table to read its structure, e.g. for:
+  - SHOW FIELDS
+  - delayed SP variable data type definition: DECLARE a t1.a%TYPE
+
+  The flag MYSQL_OPEN_GET_NEW_TABLE is passed to make %TYPE work
+  in stored functions, as during a stored function call
+  (e.g. in a SELECT query) the tables referenced in %TYPE can already be locked,
+  and attempt to open it again would return an error in open_table().
+
+  The flag MYSQL_OPEN_GET_NEW_TABLE is not really needed for
+  SHOW FIELDS or for a "CALL sp()" statement, but it's not harmful,
+  so let's pass it unconditionally.
+*/
+
+bool open_tables_only_view_structure(THD *thd, TABLE_LIST *table_list,
+                                     bool can_deadlock)
+{
+  DBUG_ENTER("open_tables_only_view_structure");
+  /*
+    Let us set fake sql_command so views won't try to merge
+    themselves into main statement. If we don't do this,
+    SELECT * from information_schema.xxxx will cause problems.
+    SQLCOM_SHOW_FIELDS is used because it satisfies
+    'LEX::only_view_structure()'.
+  */
+  enum_sql_command save_sql_command= thd->lex->sql_command;
+  thd->lex->sql_command= SQLCOM_SHOW_FIELDS;
+  bool rc= (thd->open_temporary_tables(table_list) ||
+           open_normal_and_derived_tables(thd, table_list,
+                                          (MYSQL_OPEN_IGNORE_FLUSH |
+                                           MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL |
+                                           MYSQL_OPEN_GET_NEW_TABLE |
+                                           (can_deadlock ?
+                                            MYSQL_OPEN_FAIL_ON_MDL_CONFLICT : 0)),
+                                          DT_INIT | DT_PREPARE));
+  /*
+    Restore old value of sql_command back as it is being looked at in
+    process_table() function.
+  */
+  thd->lex->sql_command= save_sql_command;
+  DBUG_RETURN(rc);
+}
+
+
+/*
+  Mark all real tables in the list as free for reuse.
+
+  SYNOPSIS
+    mark_real_tables_as_free_for_reuse()
+      thd   - thread context
+      table - head of the list of tables
+
+  DESCRIPTION
+    Marks all real tables in the list (i.e. not views, derived
+    or schema tables) as free for reuse.
+*/
+
+static void mark_real_tables_as_free_for_reuse(TABLE_LIST *table_list)
+{
+  TABLE_LIST *table;
+  DBUG_ENTER("mark_real_tables_as_free_for_reuse");
+
+  /*
+    We have to make two loops as HA_EXTRA_DETACH_CHILDREN may
+    remove items from the table list that we have to reset
+  */
+  for (table= table_list; table; table= table->next_global)
+  {
+    if (!table->placeholder())
+      table->table->query_id= 0;
+  }
+  for (table= table_list; table; table= table->next_global)
+  {
+    if (!table->placeholder())
+    {
+      /*
+        Detach children of MyISAMMRG tables used in
+        sub-statements, they will be reattached at open.
+        This has to be done in a separate loop to make sure
+        that children have had their query_id cleared.
+      */
+      table->table->file->extra(HA_EXTRA_DETACH_CHILDREN);
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Lock all tables in a list.
+
+  @param  thd           Thread handler
+  @param  tables        Tables to lock
+  @param  count         Number of opened tables
+  @param  flags         Options (see mysql_lock_tables() for details)
+
+  You can't call lock_tables() while holding thr_lock locks, as
+  this would break the dead-lock-free handling thr_lock gives us.
+  You must always get all needed locks at once.
+
+  If the query for which we are calling this function is marked as
+  requiring prelocking, this function will change
+  locked_tables_mode to LTM_PRELOCKED.
+
+  @retval FALSE         Success. 
+  @retval TRUE          A lock wait timeout, deadlock or out of memory.
+*/
+
+bool lock_tables(THD *thd, TABLE_LIST *tables, uint count, uint flags)
+{
+  TABLE_LIST *table, *first_not_own;
+  DBUG_ENTER("lock_tables");
+  /*
+    We can't meet statement requiring prelocking if we already
+    in prelocked mode.
+  */
+  DBUG_ASSERT(thd->locked_tables_mode <= LTM_LOCK_TABLES ||
+              !thd->lex->requires_prelocking());
+
+  if (!tables && !thd->lex->requires_prelocking())
+    DBUG_RETURN(0);
+
+  first_not_own= thd->lex->first_not_own_table();
+
+  /*
+    Check for thd->locked_tables_mode to avoid a redundant
+    and harmful attempt to lock the already locked tables again.
+    Checking for thd->lock is not enough in some situations. For example,
+    if a stored function contains
+    "drop table t3; create temporary t3 ..; insert into t3 ...;"
+    thd->lock may be 0 after drop tables, whereas locked_tables_mode
+    is still on. In this situation an attempt to lock temporary
+    table t3 will lead to a memory leak.
+  */
+  if (! thd->locked_tables_mode)
+  {
+    DBUG_ASSERT(thd->lock == 0);	// You must lock everything at once
+    TABLE **start,**ptr;
+    bool found_first_not_own= 0;
+
+    if (!(ptr=start=(TABLE**) thd->alloc(sizeof(TABLE*)*count)))
+      DBUG_RETURN(TRUE);
+
+    /*
+      Collect changes tables for table lock.
+      Mark own tables with query id as this is needed by
+      prepare_for_row_logging()
+    */
+    for (table= tables; table; table= table->next_global)
+    {
+      if (table == first_not_own)
+        found_first_not_own= 1;
+      if (!table->placeholder())
+      {
+        *(ptr++)= table->table;
+        if (!found_first_not_own)
+          table->table->query_id= thd->query_id;
+      }
+    }
+
+    DEBUG_SYNC(thd, "before_lock_tables_takes_lock");
+
+    if (! (thd->lock= mysql_lock_tables(thd, start, (uint) (ptr - start),
+                                        flags)))
+      DBUG_RETURN(TRUE);
+
+    DEBUG_SYNC(thd, "after_lock_tables_takes_lock");
+
+    if (thd->lex->requires_prelocking() &&
+        thd->lex->sql_command != SQLCOM_LOCK_TABLES &&
+        thd->lex->sql_command != SQLCOM_FLUSH)
+    {
+      /*
+        We just have done implicit LOCK TABLES, and now we have
+        to emulate first open_and_lock_tables() after it.
+
+        When open_and_lock_tables() is called for a single table out of
+        a table list, the 'next_global' chain is temporarily broken. We
+        may not find 'first_not_own' before the end of the "list".
+        Look for example at those places where open_n_lock_single_table()
+        is called. That function implements the temporary breaking of
+        a table list for opening a single table.
+      */
+      for (table= tables;
+           table && table != first_not_own;
+           table= table->next_global)
+      {
+        if (!table->placeholder())
+        {
+          if (check_lock_and_start_stmt(thd, thd->lex, table))
+          {
+            mysql_unlock_tables(thd, thd->lock);
+            thd->lock= 0;
+            DBUG_RETURN(TRUE);
+          }
+        }
+      }
+      /*
+        Let us mark all tables which don't belong to the statement itself,
+        and was marked as occupied during open_tables() as free for reuse.
+      */
+      mark_real_tables_as_free_for_reuse(first_not_own);
+      DBUG_PRINT("info",("locked_tables_mode= LTM_PRELOCKED"));
+      thd->enter_locked_tables_mode(LTM_PRELOCKED);
+    }
+  }
+  else
+  {
+    /*
+      When open_and_lock_tables() is called for a single table out of
+      a table list, the 'next_global' chain is temporarily broken. We
+      may not find 'first_not_own' before the end of the "list".
+      Look for example at those places where open_n_lock_single_table()
+      is called. That function implements the temporary breaking of
+      a table list for opening a single table.
+    */
+    for (table= tables;
+         table && table != first_not_own;
+         table= table->next_global)
+    {
+      if (table->placeholder())
+        continue;
+
+      table->table->query_id= thd->query_id;
+      /*
+        In a stored function or trigger we should ensure that we won't change
+        a table that is already used by the calling statement.
+      */
+      if (thd->locked_tables_mode >= LTM_PRELOCKED &&
+          table->lock_type >= TL_FIRST_WRITE)
+      {
+        for (TABLE* opentab= thd->open_tables; opentab; opentab= opentab->next)
+        {
+          if (table->table->s == opentab->s && opentab->query_id &&
+              table->table->query_id != opentab->query_id)
+          {
+            my_error(ER_CANT_UPDATE_USED_TABLE_IN_SF_OR_TRG, MYF(0),
+                     table->table->s->table_name.str);
+            DBUG_RETURN(TRUE);
+          }
+        }
+      }
+
+      if (check_lock_and_start_stmt(thd, thd->lex, table))
+      {
+	DBUG_RETURN(TRUE);
+      }
+    }
+    /*
+      If we are under explicit LOCK TABLES and our statement requires
+      prelocking, we should mark all "additional" tables as free for use
+      and enter prelocked mode.
+    */
+    if (thd->lex->requires_prelocking())
+    {
+      mark_real_tables_as_free_for_reuse(first_not_own);
+      DBUG_PRINT("info",
+                 ("thd->locked_tables_mode= LTM_PRELOCKED_UNDER_LOCK_TABLES"));
+      thd->locked_tables_mode= LTM_PRELOCKED_UNDER_LOCK_TABLES;
+    }
+  }
+
+  const bool res= !(flags & MYSQL_OPEN_IGNORE_LOGGING_FORMAT) &&
+    thd->decide_logging_format(tables);
+
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Restart transaction for tables
+
+  This is used when we had to do an implicit commit after tables are opened
+  and want to restart transactions on tables.
+
+  This is used in case of:
+  LOCK TABLES xx
+  CREATE OR REPLACE TABLE xx;
+*/
+
+bool restart_trans_for_tables(THD *thd, TABLE_LIST *table)
+{
+  DBUG_ENTER("restart_trans_for_tables");
+
+  for (; table; table= table->next_global)
+  {
+    if (table->placeholder())
+      continue;
+
+    if (check_lock_and_start_stmt(thd, thd->lex, table))
+    {
+      DBUG_ASSERT(0);                           // Should never happen
+      DBUG_RETURN(TRUE);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Prepare statement for reopening of tables and recalculation of set of
+  prelocked tables.
+
+  @param[in] thd         Thread context.
+  @param[in,out] tables  List of tables which we were trying to open
+                         and lock.
+  @param[in] start_of_statement_svp MDL savepoint which represents the set
+                         of metadata locks which the current transaction
+                         managed to acquire before execution of the current
+                         statement and to which we should revert before
+                         trying to reopen tables. NULL if no metadata locks
+                         were held and thus all metadata locks should be
+                         released.
+*/
+
+void close_tables_for_reopen(THD *thd, TABLE_LIST **tables,
+                             const MDL_savepoint &start_of_statement_svp)
+{
+  TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table();
+  TABLE_LIST *tmp;
+
+  /*
+    If table list consists only from tables from prelocking set, table list
+    for new attempt should be empty, so we have to update list's root pointer.
+  */
+  if (first_not_own_table == *tables)
+    *tables= 0;
+  thd->lex->chop_off_not_own_tables();
+  /* Reset MDL tickets for procedures/functions */
+  for (Sroutine_hash_entry *rt=
+         (Sroutine_hash_entry*)thd->lex->sroutines_list.first;
+       rt; rt= rt->next)
+    rt->mdl_request.ticket= NULL;
+  sp_remove_not_own_routines(thd->lex);
+  for (tmp= *tables; tmp; tmp= tmp->next_global)
+  {
+    tmp->table= 0;
+    tmp->mdl_request.ticket= NULL;
+    /* We have to cleanup translation tables of views. */
+    tmp->cleanup_items();
+  }
+  /*
+    No need to commit/rollback the statement transaction: it's
+    either not started or we're filling in an INFORMATION_SCHEMA
+    table on the fly, and thus mustn't manipulate with the
+    transaction of the enclosing statement.
+  */
+  DBUG_ASSERT(thd->transaction->stmt.is_empty() ||
+              (thd->state_flags & Open_tables_state::BACKUPS_AVAIL));
+  close_thread_tables(thd);
+  thd->mdl_context.rollback_to_savepoint(start_of_statement_svp);
+}
+
+
+/*****************************************************************************
+* The following find_field_in_XXX procedures implement the core of the
+* name resolution functionality. The entry point to resolve a column name in a
+* list of tables is 'find_field_in_tables'. It calls 'find_field_in_table_ref'
+* for each table reference. In turn, depending on the type of table reference,
+* 'find_field_in_table_ref' calls one of the 'find_field_in_XXX' procedures
+* below specific for the type of table reference.
+******************************************************************************/
+
+/* Special Field pointers as return values of find_field_in_XXX functions. */
+Field *not_found_field= (Field*) 0x1;
+Field *view_ref_found= (Field*) 0x2; 
+
+#define WRONG_GRANT (Field*) -1
+
+static void update_field_dependencies(THD *thd, Field *field, TABLE *table)
+{
+  DBUG_ENTER("update_field_dependencies");
+  if (should_mark_column(thd->column_usage))
+  {
+    /*
+      We always want to register the used keys, as the column bitmap may have
+      been set for all fields (for example for view).
+    */
+    table->covering_keys.intersect(field->part_of_key);
+
+    if (thd->column_usage == MARK_COLUMNS_READ)
+    {
+      if (table->mark_column_with_deps(field))
+        DBUG_VOID_RETURN; // Field was already marked
+    }
+    else
+    {
+      if (bitmap_fast_test_and_set(table->write_set, field->field_index))
+      {
+        DBUG_PRINT("warning", ("Found duplicated field"));
+        thd->dup_field= field;
+        DBUG_VOID_RETURN;
+      }
+    }
+
+    table->used_fields++;
+  }
+  if (table->get_fields_in_item_tree)
+    field->flags|= GET_FIXED_FIELDS_FLAG;
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Find a field by name in a view that uses merge algorithm.
+
+  SYNOPSIS
+    find_field_in_view()
+    thd				thread handler
+    table_list			view to search for 'name'
+    name			name of field
+    length			length of name
+    item_name                   name of item if it will be created (VIEW)
+    ref				expression substituted in VIEW should be passed
+                                using this reference (return view_ref_found)
+    register_tree_change        TRUE if ref is not stack variable and we
+                                need register changes in item tree
+
+  RETURN
+    0			field is not found
+    view_ref_found	found value in VIEW (real result is in *ref)
+    #			pointer to field - only for schema table fields
+*/
+
+static Field *
+find_field_in_view(THD *thd, TABLE_LIST *table_list,
+                   const char *name, size_t length,
+                   const char *item_name, Item **ref,
+                   bool register_tree_change)
+{
+  DBUG_ENTER("find_field_in_view");
+  DBUG_PRINT("enter",
+             ("view: '%s', field name: '%s', item name: '%s', ref %p",
+              table_list->alias.str, name, item_name, ref));
+  Field_iterator_view field_it;
+  field_it.set(table_list);
+  Query_arena *arena= 0, backup;  
+
+  for (; !field_it.end_of_fields(); field_it.next())
+  {
+    if (!my_strcasecmp(system_charset_info, field_it.name()->str, name))
+    {
+      // in PS use own arena or data will be freed after prepare
+      if (register_tree_change &&
+          thd->stmt_arena->is_stmt_prepare_or_first_stmt_execute())
+        arena= thd->activate_stmt_arena_if_needed(&backup);
+      /*
+        create_item() may, or may not create a new Item, depending on
+        the column reference. See create_view_field() for details.
+      */
+      Item *item= field_it.create_item(thd);
+      if (arena)
+        thd->restore_active_arena(arena, &backup);
+      
+      if (!item)
+        DBUG_RETURN(0);
+      if (!ref)
+        DBUG_RETURN((Field*) view_ref_found);
+      /*
+       *ref != NULL means that *ref contains the item that we need to
+       replace. If the item was aliased by the user, set the alias to
+       the replacing item.
+      */
+      if (*ref && (*ref)->is_explicit_name())
+        item->set_name(thd, (*ref)->name);
+      if (register_tree_change)
+        thd->change_item_tree(ref, item);
+      else
+        *ref= item;
+      DBUG_RETURN((Field*) view_ref_found);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Find field by name in a NATURAL/USING join table reference.
+
+  SYNOPSIS
+    find_field_in_natural_join()
+    thd			 [in]  thread handler
+    table_ref            [in]  table reference to search
+    name		 [in]  name of field
+    length		 [in]  length of name
+    ref                  [in/out] if 'name' is resolved to a view field, ref is
+                               set to point to the found view field
+    register_tree_change [in]  TRUE if ref is not stack variable and we
+                               need register changes in item tree
+    actual_table         [out] the original table reference where the field
+                               belongs - differs from 'table_list' only for
+                               NATURAL/USING joins
+
+  DESCRIPTION
+    Search for a field among the result fields of a NATURAL/USING join.
+    Notice that this procedure is called only for non-qualified field
+    names. In the case of qualified fields, we search directly the base
+    tables of a natural join.
+
+  RETURN
+    NULL        if the field was not found
+    WRONG_GRANT if no access rights to the found field
+    #           Pointer to the found Field
+*/
+
+static Field *
+find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, const char *name, size_t length, Item **ref, bool register_tree_change,
+                           TABLE_LIST **actual_table)
+{
+  List_iterator_fast
+    field_it(*(table_ref->join_columns));
+  Natural_join_column *nj_col, *curr_nj_col;
+  Field *UNINIT_VAR(found_field);
+  Query_arena *UNINIT_VAR(arena), backup;
+  DBUG_ENTER("find_field_in_natural_join");
+  DBUG_PRINT("enter", ("field name: '%s', ref %p",
+		       name, ref));
+  DBUG_ASSERT(table_ref->is_natural_join && table_ref->join_columns);
+  DBUG_ASSERT(*actual_table == NULL);
+
+  for (nj_col= NULL, curr_nj_col= field_it++; curr_nj_col; 
+       curr_nj_col= field_it++)
+  {
+    if (!my_strcasecmp(system_charset_info, curr_nj_col->name()->str, name))
+    {
+      if (nj_col)
+      {
+        my_error(ER_NON_UNIQ_ERROR, MYF(0), name, thd->where);
+        DBUG_RETURN(NULL);
+      }
+      nj_col= curr_nj_col;
+    }
+  }
+  if (!nj_col)
+    DBUG_RETURN(NULL);
+
+  if (nj_col->view_field)
+  {
+    Item *item;
+    if (register_tree_change)
+      arena= thd->activate_stmt_arena_if_needed(&backup);
+    /*
+      create_item() may, or may not create a new Item, depending on the
+      column reference. See create_view_field() for details.
+    */
+    item= nj_col->create_item(thd);
+    if (!item)
+      DBUG_RETURN(NULL);
+
+    /*
+     *ref != NULL means that *ref contains the item that we need to
+     replace. If the item was aliased by the user, set the alias to
+     the replacing item.
+     */
+    if (*ref && (*ref)->is_explicit_name())
+      item->set_name(thd, (*ref)->name);
+    if (register_tree_change && arena)
+      thd->restore_active_arena(arena, &backup);
+
+    if (!item)
+      DBUG_RETURN(NULL);
+    DBUG_ASSERT(nj_col->table_field == NULL);
+    if (nj_col->table_ref->schema_table_reformed)
+    {
+      /*
+        Translation table items are always Item_fields and fixed
+        already('mysql_schema_table' function). So we can return
+        ->field. It is used only for 'show & where' commands.
+      */
+      DBUG_RETURN(((Item_field*) (nj_col->view_field->item))->field);
+    }
+    if (register_tree_change)
+      thd->change_item_tree(ref, item);
+    else
+      *ref= item;
+    found_field= (Field*) view_ref_found;
+  }
+  else
+  {
+    /* This is a base table. */
+    DBUG_ASSERT(nj_col->view_field == NULL);
+    Item *ref= 0;
+    /*
+      This fix_fields is not necessary (initially this item is fixed by
+      the Item_field constructor; after reopen_tables the Item_func_eq
+      calls fix_fields on that item), it's just a check during table
+      reopening for columns that was dropped by the concurrent connection.
+    */
+    if (nj_col->table_field->fix_fields_if_needed(thd, &ref))
+    {
+      DBUG_PRINT("info", ("column '%s' was dropped by the concurrent connection",
+                          nj_col->table_field->name.str));
+      DBUG_RETURN(NULL);
+    }
+    DBUG_ASSERT(ref == 0);                      // Should not have changed
+    DBUG_ASSERT(nj_col->table_ref->table == nj_col->table_field->field->table);
+    found_field= nj_col->table_field->field;
+    update_field_dependencies(thd, found_field, nj_col->table_ref->table);
+  }
+
+  *actual_table= nj_col->table_ref;
+  
+  DBUG_RETURN(found_field);
+}
+
+
+/*
+  Find field by name in a base table or a view with temp table algorithm.
+
+  The caller is expected to check column-level privileges.
+
+  SYNOPSIS
+    find_field_in_table()
+    thd				thread handler
+    table			table where to search for the field
+    name			name of field
+    length			length of name
+    allow_rowid			do allow finding of "_rowid" field?
+    cached_field_index_ptr	cached position in field list (used to speedup
+                                lookup for fields in prepared tables)
+
+  RETURN
+    0	field is not found
+    #	pointer to field
+*/
+
+Field *
+find_field_in_table(THD *thd, TABLE *table, const char *name, size_t length,
+                    bool allow_rowid, field_index_t *cached_field_index_ptr)
+{
+  Field *field;
+  field_index_t cached_field_index= *cached_field_index_ptr;
+  DBUG_ENTER("find_field_in_table");
+  DBUG_PRINT("enter", ("table: '%s', field name: '%s'", table->alias.c_ptr(),
+                       name));
+
+  /* We assume here that table->field < NO_CACHED_FIELD_INDEX = UINT_MAX */
+  if (cached_field_index < table->s->fields &&
+      !my_strcasecmp(system_charset_info,
+                     table->field[cached_field_index]->field_name.str, name))
+  {
+    field= table->field[cached_field_index];
+    DEBUG_SYNC(thd, "table_field_cached");
+  }
+  else
+  {
+    LEX_CSTRING fname= {name, length};
+    field= table->find_field_by_name(&fname);
+  }
+
+  if (field)
+  {
+    if (field->invisible == INVISIBLE_FULL &&
+        !DBUG_IF("test_completely_invisible"))
+      DBUG_RETURN((Field*)0);
+
+    if (thd->column_usage != MARK_COLUMNS_READ &&
+        thd->column_usage != COLUMNS_READ)
+    {
+      if (thd->vers_insert_history(field))
+        DBUG_ASSERT(table->versioned());
+      else if (field->invisible == INVISIBLE_SYSTEM)
+        DBUG_RETURN((Field*)0);
+    }
+  }
+  else
+  {
+    if (!allow_rowid ||
+        my_strcasecmp(system_charset_info, name, "_rowid") ||
+        table->s->rowid_field_offset == 0)
+      DBUG_RETURN((Field*) 0);
+    field= table->field[table->s->rowid_field_offset-1];
+  }
+  *cached_field_index_ptr= field->field_index;
+
+  update_field_dependencies(thd, field, table);
+
+  DBUG_RETURN(field);
+}
+
+
+/*
+  Find field in a table reference.
+
+  SYNOPSIS
+    find_field_in_table_ref()
+    thd			   [in]  thread handler
+    table_list		   [in]  table reference to search
+    name		   [in]  name of field
+    length		   [in]  field length of name
+    item_name              [in]  name of item if it will be created (VIEW)
+    db_name                [in]  optional database name that qualifies the
+    table_name             [in]  optional table name that qualifies the field
+                                 0 for non-qualified field in natural joins
+    ref		       [in/out] if 'name' is resolved to a view field, ref
+                                 is set to point to the found view field
+    check_privileges       [in]  check privileges
+    allow_rowid		   [in]  do allow finding of "_rowid" field?
+    cached_field_index_ptr [in]  cached position in field list (used to
+                                 speedup lookup for fields in prepared tables)
+    register_tree_change   [in]  TRUE if ref is not stack variable and we
+                                 need register changes in item tree
+    actual_table           [out] the original table reference where the field
+                                 belongs - differs from 'table_list' only for
+                                 NATURAL_USING joins.
+
+  DESCRIPTION
+    Find a field in a table reference depending on the type of table
+    reference. There are three types of table references with respect
+    to the representation of their result columns:
+    - an array of Field_translator objects for MERGE views and some
+      information_schema tables,
+    - an array of Field objects (and possibly a name hash) for stored
+      tables,
+    - a list of Natural_join_column objects for NATURAL/USING joins.
+    This procedure detects the type of the table reference 'table_list'
+    and calls the corresponding search routine.
+
+    The routine checks column-level privieleges for the found field.
+
+  RETURN
+    0			field is not found
+    view_ref_found	found value in VIEW (real result is in *ref)
+    #			pointer to field
+*/
+
+Field *
+find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, const char *name,
+                        size_t length, const char *item_name,
+                        const char *db_name, const char *table_name,
+                        ignored_tables_list_t ignored_tables, Item **ref,
+                        bool check_privileges, bool allow_rowid,
+                        field_index_t *cached_field_index_ptr,
+                        bool register_tree_change, TABLE_LIST **actual_table)
+{
+  Field *fld;
+  DBUG_ENTER("find_field_in_table_ref");
+  DBUG_ASSERT(table_list->alias.str);
+  DBUG_ASSERT(name);
+  DBUG_ASSERT(item_name);
+  DBUG_PRINT("enter",
+             ("table: '%s'  field name: '%s'  item name: '%s'  ref %p",
+              table_list->alias.str, name, item_name, ref));
+
+  /*
+    Check that the table and database that qualify the current field name
+    are the same as the table reference we are going to search for the field.
+
+    Exclude from the test below nested joins because the columns in a
+    nested join generally originate from different tables. Nested joins
+    also have no table name, except when a nested join is a merge view
+    or an information schema table.
+
+    We include explicitly table references with a 'field_translation' table,
+    because if there are views over natural joins we don't want to search
+    inside the view, but we want to search directly in the view columns
+    which are represented as a 'field_translation'.
+
+    tables->db.str may be 0 if we are preparing a statement
+    db_name is 0 if item doesn't have a db name
+    table_name is 0 if item doesn't have a specified table_name
+  */
+  if (db_name && !db_name[0])
+    db_name= 0;                                 // Simpler test later
+
+  if (/* Exclude nested joins. */
+      (!table_list->nested_join ||
+       /* Include merge views and information schema tables. */
+       table_list->field_translation) &&
+      /*
+        Test if the field qualifiers match the table reference we plan
+        to search.
+      */
+      table_name && table_name[0] &&
+      (my_strcasecmp(table_alias_charset, table_list->alias.str, table_name) ||
+       (db_name && (!table_list->db.str || !table_list->db.str[0])) ||
+       (db_name && table_list->db.str && table_list->db.str[0] &&
+        (table_list->schema_table ?
+         my_strcasecmp(system_charset_info, db_name, table_list->db.str) :
+         strcmp(db_name, table_list->db.str)))))
+    DBUG_RETURN(0);
+
+  /*
+    Don't allow usage of fields in sequence table that is opened as part of
+    NEXT VALUE for sequence_name
+  */
+  if (table_list->sequence)
+    DBUG_RETURN(0);
+
+  *actual_table= NULL;
+
+  if (table_list->field_translation)
+  {
+    /* 'table_list' is a view or an information schema table. */
+    if ((fld= find_field_in_view(thd, table_list, name, length, item_name, ref,
+                                 register_tree_change)))
+      *actual_table= table_list;
+  }
+  else if (!table_list->nested_join)
+  {
+    /* 'table_list' is a stored table. */
+    DBUG_ASSERT(table_list->table);
+    if ((fld= find_field_in_table(thd, table_list->table, name, length,
+                                  allow_rowid, cached_field_index_ptr)))
+      *actual_table= table_list;
+  }
+  else
+  {
+    /*
+      'table_list' is a NATURAL/USING join, or an operand of such join that
+      is a nested join itself.
+
+      If the field name we search for is qualified, then search for the field
+      in the table references used by NATURAL/USING the join.
+    */
+    if (table_name && table_name[0])
+    {
+      List_iterator it(table_list->nested_join->join_list);
+      TABLE_LIST *table;
+      while ((table= it++))
+      {
+        /*
+          Check if the table is in the ignore list. Only base tables can be in
+          the ignore list.
+        */
+        if (table->table && ignored_list_includes_table(ignored_tables, table))
+          continue;
+
+        if ((fld= find_field_in_table_ref(thd, table, name, length, item_name,
+                                          db_name, table_name, ignored_tables,
+                                          ref, check_privileges, allow_rowid,
+                                          cached_field_index_ptr,
+                                          register_tree_change, actual_table)))
+          DBUG_RETURN(fld);
+      }
+      DBUG_RETURN(0);
+    }
+    /*
+      Non-qualified field, search directly in the result columns of the
+      natural join. The condition of the outer IF is true for the top-most
+      natural join, thus if the field is not qualified, we will search
+      directly the top-most NATURAL/USING join.
+    */
+    fld= find_field_in_natural_join(thd, table_list, name, length, ref,
+                                    register_tree_change, actual_table);
+  }
+
+  if (fld)
+  {
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    /* Check if there are sufficient access rights to the found field. */
+    if (check_privileges &&
+        !table_list->is_derived() &&
+        check_column_grant_in_table_ref(thd, *actual_table, name, length, fld))
+      fld= WRONG_GRANT;
+    else
+#endif
+      if (should_mark_column(thd->column_usage))
+      {
+        /*
+          Get rw_set correct for this field so that the handler
+          knows that this field is involved in the query and gets
+          retrieved/updated
+         */
+        Field *field_to_set= NULL;
+        if (fld == view_ref_found)
+        {
+          if (!ref)
+            DBUG_RETURN(fld);
+          Item *it= (*ref)->real_item();
+          if (it->type() == Item::FIELD_ITEM)
+            field_to_set= ((Item_field*)it)->field;
+          else
+          {
+            if (thd->column_usage == MARK_COLUMNS_READ)
+              it->walk(&Item::register_field_in_read_map, 0, 0);
+            else
+              it->walk(&Item::register_field_in_write_map, 0, 0);
+          }
+        }
+        else
+          field_to_set= fld;
+        if (field_to_set)
+        {
+          TABLE *table= field_to_set->table;
+          DBUG_ASSERT(table);
+          if (thd->column_usage == MARK_COLUMNS_READ)
+            field_to_set->register_field_in_read_map();
+          else
+            bitmap_set_bit(table->write_set, field_to_set->field_index);
+        }
+      }
+  }
+  DBUG_RETURN(fld);
+}
+
+
+/*
+  Find field in table, no side effects, only purpose is to check for field
+  in table object and get reference to the field if found.
+
+  SYNOPSIS
+  find_field_in_table_sef()
+
+  table                         table where to find
+  name                          Name of field searched for
+
+  RETURN
+    0                   field is not found
+    #                   pointer to field
+*/
+
+Field *find_field_in_table_sef(TABLE *table, const char *name)
+{
+  Field **field_ptr;
+  if (table->s->name_hash.records)
+  {
+    field_ptr= (Field**)my_hash_search(&table->s->name_hash,(uchar*) name,
+                                       strlen(name));
+    if (field_ptr)
+    {
+      /*
+        field_ptr points to field in TABLE_SHARE. Convert it to the matching
+        field in table
+      */
+      field_ptr= (table->field + (field_ptr - table->s->field));
+    }
+  }
+  else
+  {
+    if (!(field_ptr= table->field))
+      return (Field *)0;
+    for (; *field_ptr; ++field_ptr)
+      if (!my_strcasecmp(system_charset_info, (*field_ptr)->field_name.str,
+                         name))
+        break;
+  }
+  if (field_ptr)
+    return *field_ptr;
+  else
+    return (Field *)0;
+}
+
+
+/*
+  Find field in table list.
+
+  SYNOPSIS
+    find_field_in_tables()
+    thd			  pointer to current thread structure
+    item		  field item that should be found
+    first_table           list of tables to be searched for item
+    last_table            end of the list of tables to search for item. If NULL
+                          then search to the end of the list 'first_table'.
+    ignored_tables        Set of tables that should be ignored. Do not try to
+                          find the field in those.
+    ref			  if 'item' is resolved to a view field, ref is set to
+                          point to the found view field
+    report_error	  Degree of error reporting:
+                          - IGNORE_ERRORS then do not report any error
+                          - IGNORE_EXCEPT_NON_UNIQUE report only non-unique
+                            fields, suppress all other errors
+                          - REPORT_EXCEPT_NON_UNIQUE report all other errors
+                            except when non-unique fields were found
+                          - REPORT_ALL_ERRORS
+    check_privileges      need to check privileges
+    register_tree_change  TRUE if ref is not a stack variable and we
+                          to need register changes in item tree
+
+  RETURN VALUES
+    0			If error: the found field is not unique, or there are
+                        no sufficient access priviliges for the found field,
+                        or the field is qualified with non-existing table.
+    not_found_field	The function was called with report_error ==
+                        (IGNORE_ERRORS || IGNORE_EXCEPT_NON_UNIQUE) and a
+			field was not found.
+    view_ref_found	View field is found, item passed through ref parameter
+    found field         If a item was resolved to some field
+*/
+
+Field *
+find_field_in_tables(THD *thd, Item_ident *item,
+                     TABLE_LIST *first_table, TABLE_LIST *last_table,
+                     ignored_tables_list_t ignored_tables,
+		     Item **ref, find_item_error_report_type report_error,
+                     bool check_privileges, bool register_tree_change)
+{
+  Field *found=0;
+  const char *db= item->db_name.str;
+  const char *table_name= item->table_name.str;
+  const char *name= item->field_name.str;
+  size_t length= item->field_name.length;
+  char name_buff[SAFE_NAME_LEN+1];
+  TABLE_LIST *cur_table= first_table;
+  TABLE_LIST *actual_table;
+  bool allow_rowid;
+
+  if (!table_name || !table_name[0])
+  {
+    table_name= 0;                              // For easier test
+    db= 0;
+  }
+
+  allow_rowid= table_name || (cur_table && !cur_table->next_local);
+
+  if (item->cached_table)
+  {
+    DBUG_PRINT("info", ("using cached table"));
+    /*
+      This shortcut is used by prepared statements. We assume that
+      TABLE_LIST *first_table is not changed during query execution (which
+      is true for all queries except RENAME but luckily RENAME doesn't
+      use fields...) so we can rely on reusing pointer to its member.
+      With this optimization we also miss case when addition of one more
+      field makes some prepared query ambiguous and so erroneous, but we
+      accept this trade off.
+    */
+    TABLE_LIST *table_ref= item->cached_table;
+    /*
+      The condition (table_ref->view == NULL) ensures that we will call
+      find_field_in_table even in the case of information schema tables
+      when table_ref->field_translation != NULL.
+      */
+    if (table_ref->table && !table_ref->view &&
+        (!table_ref->is_merged_derived() ||
+         (!table_ref->is_multitable() && table_ref->merged_for_insert)))
+    {
+
+      found= find_field_in_table(thd, table_ref->table, name, length,
+                                 TRUE, &(item->cached_field_index));
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+      /* Check if there are sufficient access rights to the found field. */
+      if (found && check_privileges && !is_temporary_table(table_ref) &&
+          check_column_grant_in_table_ref(thd, table_ref, name, length, found))
+        found= WRONG_GRANT;
+#endif
+    }
+    else
+      found= find_field_in_table_ref(thd, table_ref, name, length,
+                                     item->name.str, NULL, NULL,
+                                     ignored_tables, ref, check_privileges,
+                                     TRUE, &(item->cached_field_index),
+                                     register_tree_change, &actual_table);
+    if (found)
+    {
+      if (found == WRONG_GRANT)
+	return (Field*) 0;
+
+      /*
+        Only views fields should be marked as dependent, not an underlying
+        fields.
+      */
+      if (!table_ref->belong_to_view && !table_ref->belong_to_derived)
+      {
+        SELECT_LEX *current_sel= item->context->select_lex;
+        SELECT_LEX *last_select= table_ref->select_lex;
+        bool all_merged= TRUE;
+        for (SELECT_LEX *sl= current_sel; sl && sl!=last_select;
+             sl=sl->outer_select())
+        {
+          Item *subs= sl->master_unit()->item;
+          if (!subs)
+            continue;
+
+          Item_in_subselect *in_subs= subs->get_IN_subquery();
+          if (in_subs &&
+              in_subs->substype() == Item_subselect::IN_SUBS &&
+              in_subs->test_strategy(SUBS_SEMI_JOIN))
+          {
+            continue;
+          }
+          all_merged= FALSE;
+          break;
+        }
+        /*
+          If the field was an outer referencee, mark all selects using this
+          sub query as dependent on the outer query
+        */
+        if (!all_merged && current_sel != last_select)
+        {
+          mark_select_range_as_dependent(thd, last_select, current_sel,
+                                         found, *ref, item, true);
+        }
+      }
+      return found;
+    }
+  }
+  else
+    item->can_be_depended= TRUE;
+
+  if (db && lower_case_table_names)
+  {
+    /*
+      convert database to lower case for comparison.
+      We can't do this in Item_field as this would change the
+      'name' of the item which may be used in the select list
+    */
+    strmake_buf(name_buff, db);
+    my_casedn_str(files_charset_info, name_buff);
+    db= name_buff;
+  }
+
+  if (last_table)
+    last_table= last_table->next_name_resolution_table;
+
+  field_index_t fake_index_for_duplicate_search= NO_CACHED_FIELD_INDEX;
+  /*
+    For the field search it will point to field cache, but for duplicate
+    search it will point to fake_index_for_duplicate_search (no cache
+    present).
+  */
+  field_index_t *current_cache= &(item->cached_field_index);
+  for (; cur_table != last_table ;
+       cur_table= cur_table->next_name_resolution_table)
+  {
+    if (cur_table->table &&
+        ignored_list_includes_table(ignored_tables, cur_table))
+      continue;
+
+    Field *cur_field= find_field_in_table_ref(thd, cur_table, name, length,
+                                              item->name.str, db, table_name,
+                                              ignored_tables, ref,
+                                              (thd->lex->sql_command ==
+                                               SQLCOM_SHOW_FIELDS)
+                                              ? false : check_privileges,
+                                              allow_rowid,
+                                              current_cache,
+                                              register_tree_change,
+                                              &actual_table);
+    if (cur_field)
+    {
+      if (cur_field == WRONG_GRANT)
+      {
+        if (thd->lex->sql_command != SQLCOM_SHOW_FIELDS)
+          return (Field*) 0;
+
+        thd->clear_error();
+        cur_field= find_field_in_table_ref(thd, cur_table, name, length,
+                                           item->name.str, db, table_name,
+                                           ignored_tables, ref, false,
+                                           allow_rowid,
+                                           current_cache,
+                                           register_tree_change,
+                                           &actual_table);
+        if (cur_field)
+        {
+          Field *nf=new Field_null(NULL,0,Field::NONE,
+                                   &cur_field->field_name,
+                                   &my_charset_bin);
+          nf->init(cur_table->table);
+          cur_field= nf;
+        }
+      }
+
+      /*
+        Store the original table of the field, which may be different from
+        cur_table in the case of NATURAL/USING join.
+      */
+      if (actual_table->cacheable_table /*(1)*/ && !found /*(2)*/)
+      {
+        /*
+          We have just found a field allowed to cache (1) and
+          it is not dublicate search (2).
+        */
+        item->cached_table= actual_table;
+      }
+      else
+      {
+        item->cached_table= NULL;
+        item->cached_field_index= NO_CACHED_FIELD_INDEX;
+      }
+
+      DBUG_ASSERT(thd->where);
+      /*
+        If we found a fully qualified field we return it directly as it can't
+        have duplicates.
+       */
+      if (db)
+        return cur_field;
+      
+      if (unlikely(found))
+      {
+        if (report_error == REPORT_ALL_ERRORS ||
+            report_error == IGNORE_EXCEPT_NON_UNIQUE)
+          my_error(ER_NON_UNIQ_ERROR, MYF(0),
+                   table_name ? item->full_name() : name, thd->where);
+        return (Field*) 0;
+      }
+      found= cur_field;
+      current_cache= &fake_index_for_duplicate_search;
+    }
+  }
+
+  if (likely(found))
+    return found;
+  
+  /*
+    If the field was qualified and there were no tables to search, issue
+    an error that an unknown table was given. The situation is detected
+    as follows: if there were no tables we wouldn't go through the loop
+    and cur_table wouldn't be updated by the loop increment part, so it
+    will be equal to the first table.
+  */
+  if (table_name && (cur_table == first_table) &&
+      (report_error == REPORT_ALL_ERRORS ||
+       report_error == REPORT_EXCEPT_NON_UNIQUE))
+  {
+    char buff[SAFE_NAME_LEN*2 + 2];
+    if (db && db[0])
+    {
+      strxnmov(buff,sizeof(buff)-1,db,".",table_name,NullS);
+      table_name=buff;
+    }
+    my_error(ER_UNKNOWN_TABLE, MYF(0), table_name, thd->where);
+  }
+  else
+  {
+    if (report_error == REPORT_ALL_ERRORS ||
+        report_error == REPORT_EXCEPT_NON_UNIQUE)
+      my_error(ER_BAD_FIELD_ERROR, MYF(0), item->full_name(), thd->where);
+    else
+      found= not_found_field;
+  }
+  return found;
+}
+
+
+/*
+  Find Item in list of items (find_field_in_tables analog)
+
+  TODO
+    is it better return only counter?
+
+  SYNOPSIS
+    find_item_in_list()
+    find			Item to find
+    items			List of items
+    counter			To return number of found item
+    report_error
+      REPORT_ALL_ERRORS		report errors, return 0 if error
+      REPORT_EXCEPT_NOT_FOUND	Do not report 'not found' error and
+				return not_found_item, report other errors,
+				return 0
+      IGNORE_ERRORS		Do not report errors, return 0 if error
+    resolution                  Set to the resolution type if the item is found 
+                                (it says whether the item is resolved 
+                                 against an alias name,
+                                 or as a field name without alias,
+                                 or as a field hidden by alias,
+                                 or ignoring alias)
+    limit                       How many items in the list to check
+                                (if limit==0 then all items are to be checked)
+                                
+  RETURN VALUES
+    0			Item is not found or item is not unique,
+			error message is reported
+    not_found_item	Function was called with
+			report_error == REPORT_EXCEPT_NOT_FOUND and
+			item was not found. No error message was reported
+                        found field
+*/
+
+/* Special Item pointer to serve as a return value from find_item_in_list(). */
+Item **not_found_item= (Item**) 0x1;
+
+
+Item **
+find_item_in_list(Item *find, List &items, uint *counter,
+                  find_item_error_report_type report_error,
+                  enum_resolution_type *resolution, uint limit)
+{
+  List_iterator li(items);
+  uint n_items= limit == 0 ? items.elements : limit;
+  Item **found=0, **found_unaliased= 0, *item;
+  const char *db_name=0;
+  const LEX_CSTRING *field_name= 0;
+  const char *table_name=0;
+  bool found_unaliased_non_uniq= 0;
+  /*
+    true if the item that we search for is a valid name reference
+    (and not an item that happens to have a name).
+  */
+  bool is_ref_by_name= 0;
+  uint unaliased_counter= 0;
+
+  *resolution= NOT_RESOLVED;
+
+  is_ref_by_name= (find->type() == Item::FIELD_ITEM  || 
+                   find->type() == Item::REF_ITEM);
+  if (is_ref_by_name)
+  {
+    field_name= &((Item_ident*) find)->field_name;
+    table_name= ((Item_ident*) find)->table_name.str;
+    db_name=    ((Item_ident*) find)->db_name.str;
+  }
+
+  for (uint i= 0; i < n_items; i++)
+  {
+    item= li++;
+    if (field_name && field_name->str &&
+        (item->real_item()->type() == Item::FIELD_ITEM ||
+         ((item->type() == Item::REF_ITEM) &&
+          (((Item_ref *)item)->ref_type() == Item_ref::VIEW_REF))))
+    {
+      Item_ident *item_field= (Item_ident*) item;
+
+      /*
+	In case of group_concat() with ORDER BY condition in the QUERY
+	item_field can be field of temporary table without item name 
+	(if this field created from expression argument of group_concat()),
+	=> we have to check presence of name before compare
+      */ 
+      if (unlikely(!item_field->name.str))
+        continue;
+
+      if (table_name)
+      {
+        /*
+          If table name is specified we should find field 'field_name' in
+          table 'table_name'. According to SQL-standard we should ignore
+          aliases in this case.
+
+          Since we should NOT prefer fields from the select list over
+          other fields from the tables participating in this select in
+          case of ambiguity we have to do extra check outside this function.
+
+          We use strcmp for table names and database names as these may be
+          case sensitive. In cases where they are not case sensitive, they
+          are always in lower case.
+
+	  item_field->field_name and item_field->table_name can be 0x0 if
+	  item is not fix_field()'ed yet.
+        */
+        if (item_field->field_name.str && item_field->table_name.str &&
+	    !lex_string_cmp(system_charset_info, &item_field->field_name,
+                            field_name) &&
+            !my_strcasecmp(table_alias_charset, item_field->table_name.str,
+                           table_name) &&
+            (!db_name || (item_field->db_name.str &&
+                          !strcmp(item_field->db_name.str, db_name))))
+        {
+          if (found_unaliased)
+          {
+            if ((*found_unaliased)->eq(item, 0))
+              continue;
+            /*
+              Two matching fields in select list.
+              We already can bail out because we are searching through
+              unaliased names only and will have duplicate error anyway.
+            */
+            if (report_error != IGNORE_ERRORS)
+              my_error(ER_NON_UNIQ_ERROR, MYF(0),
+                       find->full_name(), current_thd->where);
+            return (Item**) 0;
+          }
+          found_unaliased= li.ref();
+          unaliased_counter= i;
+          *resolution= RESOLVED_IGNORING_ALIAS;
+          if (db_name)
+            break;                              // Perfect match
+        }
+      }
+      else
+      {
+        bool fname_cmp= lex_string_cmp(system_charset_info,
+                                       &item_field->field_name,
+                                       field_name);
+        if (!lex_string_cmp(system_charset_info,
+                            &item_field->name, field_name))
+        {
+          /*
+            If table name was not given we should scan through aliases
+            and non-aliased fields first. We are also checking unaliased
+            name of the field in then next  else-if, to be able to find
+            instantly field (hidden by alias) if no suitable alias or
+            non-aliased field was found.
+          */
+          if (found)
+          {
+            if ((*found)->eq(item, 0))
+              continue;                           // Same field twice
+            if (report_error != IGNORE_ERRORS)
+              my_error(ER_NON_UNIQ_ERROR, MYF(0),
+                       find->full_name(), current_thd->where);
+            return (Item**) 0;
+          }
+          found= li.ref();
+          *counter= i;
+          *resolution= fname_cmp ? RESOLVED_AGAINST_ALIAS:
+	                           RESOLVED_WITH_NO_ALIAS;
+        }
+        else if (!fname_cmp)
+        {
+          /*
+            We will use non-aliased field or react on such ambiguities only if
+            we won't be able to find aliased field.
+            Again if we have ambiguity with field outside of select list
+            we should prefer fields from select list.
+          */
+          if (found_unaliased)
+          {
+            if ((*found_unaliased)->eq(item, 0))
+              continue;                           // Same field twice
+            found_unaliased_non_uniq= 1;
+          }
+          found_unaliased= li.ref();
+          unaliased_counter= i;
+        }
+      }
+    }
+    else if (!table_name)
+    { 
+      if (is_ref_by_name && find->name.str && item->name.str &&
+          find->name.length == item->name.length &&
+	  !lex_string_cmp(system_charset_info, &item->name, &find->name))
+      {
+        found= li.ref();
+        *counter= i;
+        *resolution= RESOLVED_AGAINST_ALIAS;
+        break;
+      }
+      else if (find->eq(item,0))
+      {
+        found= li.ref();
+        *counter= i;
+        *resolution= RESOLVED_IGNORING_ALIAS;
+        break;
+      }
+    }
+  }
+
+  if (likely(found))
+    return found;
+
+  if (unlikely(found_unaliased_non_uniq))
+  {
+    if (report_error != IGNORE_ERRORS)
+      my_error(ER_NON_UNIQ_ERROR, MYF(0),
+               find->full_name(), current_thd->where);
+    return (Item **) 0;
+  }
+  if (found_unaliased)
+  {
+    found= found_unaliased;
+    *counter= unaliased_counter;
+    *resolution= RESOLVED_BEHIND_ALIAS;
+  }
+
+  if (found)
+    return found;
+
+  if (report_error != REPORT_EXCEPT_NOT_FOUND)
+  {
+    if (report_error == REPORT_ALL_ERRORS)
+      my_error(ER_BAD_FIELD_ERROR, MYF(0),
+               find->full_name(), current_thd->where);
+    return (Item **) 0;
+  }
+  else
+    return (Item **) not_found_item;
+}
+
+
+/*
+  Test if a string is a member of a list of strings.
+
+  SYNOPSIS
+    test_if_string_in_list()
+    find      the string to look for
+    str_list  a list of strings to be searched
+
+  DESCRIPTION
+    Sequentially search a list of strings for a string, and test whether
+    the list contains the same string.
+
+  RETURN
+    TRUE  if find is in str_list
+    FALSE otherwise
+*/
+
+static bool
+test_if_string_in_list(const char *find, List *str_list)
+{
+  List_iterator str_list_it(*str_list);
+  String *curr_str;
+  size_t find_length= strlen(find);
+  while ((curr_str= str_list_it++))
+  {
+    if (find_length != curr_str->length())
+      continue;
+    if (!my_strcasecmp(system_charset_info, find, curr_str->ptr()))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Create a new name resolution context for an item so that it is
+  being resolved in a specific table reference.
+
+  SYNOPSIS
+    set_new_item_local_context()
+    thd        pointer to current thread
+    item       item for which new context is created and set
+    table_ref  table ref where an item showld be resolved
+
+  DESCRIPTION
+    Create a new name resolution context for an item, so that the item
+    is resolved only the supplied 'table_ref'.
+
+  RETURN
+    FALSE  if all OK
+    TRUE   otherwise
+*/
+
+static bool
+set_new_item_local_context(THD *thd, Item_ident *item, TABLE_LIST *table_ref)
+{
+  Name_resolution_context *context;
+  if (!(context= new (thd->mem_root) Name_resolution_context))
+    return TRUE;
+  context->init();
+  context->first_name_resolution_table=
+    context->last_name_resolution_table= table_ref;
+  item->context= context;
+  return FALSE;
+}
+
+
+/*
+  Find and mark the common columns of two table references.
+
+  SYNOPSIS
+    mark_common_columns()
+    thd                [in] current thread
+    table_ref_1        [in] the first (left) join operand
+    table_ref_2        [in] the second (right) join operand
+    using_fields       [in] if the join is JOIN...USING - the join columns,
+                            if NATURAL join, then NULL
+    found_using_fields [out] number of fields from the USING clause that were
+                             found among the common fields
+
+  DESCRIPTION
+    The procedure finds the common columns of two relations (either
+    tables or intermediate join results), and adds an equi-join condition
+    to the ON clause of 'table_ref_2' for each pair of matching columns.
+    If some of table_ref_XXX represents a base table or view, then we
+    create new 'Natural_join_column' instances for each column
+    reference and store them in the 'join_columns' of the table
+    reference.
+
+  IMPLEMENTATION
+    The procedure assumes that store_natural_using_join_columns() was
+    called for the previous level of NATURAL/USING joins.
+
+  RETURN
+    TRUE   error when some common column is non-unique, or out of memory
+    FALSE  OK
+*/
+
+static bool
+mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2,
+                    List *using_fields, uint *found_using_fields)
+{
+  Field_iterator_table_ref it_1, it_2;
+  Natural_join_column *nj_col_1, *nj_col_2;
+  Query_arena *arena, backup;
+  bool result= TRUE;
+  bool first_outer_loop= TRUE;
+  Field *field_1;
+  field_visibility_t field_1_invisible, field_2_invisible;
+  /*
+    Leaf table references to which new natural join columns are added
+    if the leaves are != NULL.
+  */
+  TABLE_LIST *leaf_1= (table_ref_1->nested_join &&
+                       !table_ref_1->is_natural_join) ?
+                      NULL : table_ref_1;
+  TABLE_LIST *leaf_2= (table_ref_2->nested_join &&
+                       !table_ref_2->is_natural_join) ?
+                      NULL : table_ref_2;
+
+  DBUG_ENTER("mark_common_columns");
+  DBUG_PRINT("info", ("operand_1: %s  operand_2: %s",
+                      table_ref_1->alias.str, table_ref_2->alias.str));
+
+  *found_using_fields= 0;
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+
+  for (it_1.set(table_ref_1); !it_1.end_of_fields(); it_1.next())
+  {
+    bool found= FALSE;
+    const LEX_CSTRING *field_name_1;
+    Field *field_2= 0;
+
+    /* true if field_name_1 is a member of using_fields */
+    bool is_using_column_1;
+    if (!(nj_col_1= it_1.get_or_create_column_ref(thd, leaf_1)))
+      goto err;
+
+    field_1= nj_col_1->field();
+    field_1_invisible= field_1 ? field_1->invisible : VISIBLE;
+
+    if (field_1_invisible == INVISIBLE_FULL)
+      continue;
+
+    field_name_1= nj_col_1->name();
+    is_using_column_1= using_fields && 
+      test_if_string_in_list(field_name_1->str, using_fields);
+    DBUG_PRINT ("info", ("field_name_1=%s.%s", 
+                         nj_col_1->safe_table_name(),
+                         field_name_1->str));
+
+    if (field_1_invisible && !is_using_column_1)
+      continue;
+
+    /*
+      Find a field with the same name in table_ref_2.
+
+      Note that for the second loop, it_2.set() will iterate over
+      table_ref_2->join_columns and not generate any new elements or
+      lists.
+    */
+    nj_col_2= NULL;
+    for (it_2.set(table_ref_2); !it_2.end_of_fields(); it_2.next())
+    {
+      Natural_join_column *cur_nj_col_2;
+      const LEX_CSTRING *cur_field_name_2;
+      if (!(cur_nj_col_2= it_2.get_or_create_column_ref(thd, leaf_2)))
+        goto err;
+
+      field_2= cur_nj_col_2->field();
+      field_2_invisible= field_2 ? field_2->invisible : VISIBLE;
+
+      if (field_2_invisible == INVISIBLE_FULL)
+        continue;
+
+      cur_field_name_2= cur_nj_col_2->name();
+      DBUG_PRINT ("info", ("cur_field_name_2=%s.%s", 
+                           cur_nj_col_2->safe_table_name(),
+                           cur_field_name_2->str));
+
+      /*
+        Compare the two columns and check for duplicate common fields.
+        A common field is duplicate either if it was already found in
+        table_ref_2 (then found == TRUE), or if a field in table_ref_2
+        was already matched by some previous field in table_ref_1
+        (then cur_nj_col_2->is_common == TRUE).
+        Note that it is too early to check the columns outside of the
+        USING list for ambiguity because they are not actually "referenced"
+        here. These columns must be checked only on unqualified reference 
+        by name (e.g. in SELECT list).
+      */
+      if (!lex_string_cmp(system_charset_info, field_name_1,
+                          cur_field_name_2))
+      {
+        DBUG_PRINT ("info", ("match c1.is_common=%d", nj_col_1->is_common));
+        if (cur_nj_col_2->is_common || found)
+        {
+          my_error(ER_NON_UNIQ_ERROR, MYF(0), field_name_1->str, thd->where);
+          goto err;
+        }
+        if ((!using_fields && !field_2_invisible) || is_using_column_1)
+        {
+          DBUG_ASSERT(nj_col_2 == NULL);
+          nj_col_2= cur_nj_col_2;
+          found= TRUE;
+        }
+      }
+    }
+    if (first_outer_loop && leaf_2)
+    {
+      /*
+        Make sure that the next inner loop "knows" that all columns
+        are materialized already.
+      */
+      leaf_2->is_join_columns_complete= TRUE;
+      first_outer_loop= FALSE;
+    }
+    if (!found)
+      continue;                                 // No matching field
+
+    /*
+      field_1 and field_2 have the same names. Check if they are in the USING
+      clause (if present), mark them as common fields, and add a new
+      equi-join condition to the ON clause.
+    */
+    if (nj_col_2)
+    {
+      /*
+        Create non-fixed fully qualified field and let fix_fields to
+        resolve it.
+      */
+      Item *item_1=   nj_col_1->create_item(thd);
+      Item *item_2=   nj_col_2->create_item(thd);
+      Item_ident *item_ident_1, *item_ident_2;
+      Item_func_eq *eq_cond;
+
+      if (!item_1 || !item_2)
+        goto err;                               // out of memory
+
+      /*
+        The following assert checks that the two created items are of
+        type Item_ident.
+      */
+      DBUG_ASSERT(!thd->lex->current_select->no_wrap_view_item);
+      /*
+        In the case of no_wrap_view_item == 0, the created items must be
+        of sub-classes of Item_ident.
+      */
+      DBUG_ASSERT(item_1->type() == Item::FIELD_ITEM ||
+                  item_1->type() == Item::REF_ITEM);
+      DBUG_ASSERT(item_2->type() == Item::FIELD_ITEM ||
+                  item_2->type() == Item::REF_ITEM);
+
+      /*
+        We need to cast item_1,2 to Item_ident, because we need to hook name
+        resolution contexts specific to each item.
+      */
+      item_ident_1= (Item_ident*) item_1;
+      item_ident_2= (Item_ident*) item_2;
+      /*
+        Create and hook special name resolution contexts to each item in the
+        new join condition . We need this to both speed-up subsequent name
+        resolution of these items, and to enable proper name resolution of
+        the items during the execute phase of PS.
+      */
+      if (set_new_item_local_context(thd, item_ident_1, nj_col_1->table_ref) ||
+          set_new_item_local_context(thd, item_ident_2, nj_col_2->table_ref))
+        goto err;
+
+      if (!(eq_cond= new (thd->mem_root) Item_func_eq(thd, item_ident_1, item_ident_2)))
+        goto err;                               /* Out of memory. */
+
+      /*
+        Add the new equi-join condition to the ON clause. Notice that
+        fix_fields() is applied to all ON conditions in setup_conds()
+        so we don't do it here.
+      */
+      add_join_on(thd, (table_ref_1->outer_join & JOIN_TYPE_RIGHT ?
+                        table_ref_1 : table_ref_2),
+                  eq_cond);
+
+      nj_col_1->is_common= nj_col_2->is_common= TRUE;
+      DBUG_PRINT ("info", ("%s.%s and %s.%s are common", 
+                           nj_col_1->safe_table_name(),
+                           nj_col_1->name()->str,
+                           nj_col_2->safe_table_name(),
+                           nj_col_2->name()->str));
+
+      if (field_1)
+        update_field_dependencies(thd, field_1, field_1->table);
+      if (field_2)
+        update_field_dependencies(thd, field_2, field_2->table);
+
+      if (using_fields != NULL)
+        ++(*found_using_fields);
+    }
+  }
+  if (leaf_1)
+    leaf_1->is_join_columns_complete= TRUE;
+
+  /*
+    Everything is OK.
+    Notice that at this point there may be some column names in the USING
+    clause that are not among the common columns. This is an SQL error and
+    we check for this error in store_natural_using_join_columns() when
+    (found_using_fields < length(join_using_fields)).
+  */
+  result= FALSE;
+
+err:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(result);
+}
+
+
+
+/*
+  Materialize and store the row type of NATURAL/USING join.
+
+  SYNOPSIS
+    store_natural_using_join_columns()
+    thd                current thread
+    natural_using_join the table reference of the NATURAL/USING join
+    table_ref_1        the first (left) operand (of a NATURAL/USING join).
+    table_ref_2        the second (right) operand (of a NATURAL/USING join).
+    using_fields       if the join is JOIN...USING - the join columns,
+                       if NATURAL join, then NULL
+    found_using_fields number of fields from the USING clause that were
+                       found among the common fields
+
+  DESCRIPTION
+    Iterate over the columns of both join operands and sort and store
+    all columns into the 'join_columns' list of natural_using_join
+    where the list is formed by three parts:
+      part1: The coalesced columns of table_ref_1 and table_ref_2,
+             sorted according to the column order of the first table.
+      part2: The other columns of the first table, in the order in
+             which they were defined in CREATE TABLE.
+      part3: The other columns of the second table, in the order in
+             which they were defined in CREATE TABLE.
+    Time complexity - O(N1+N2), where Ni = length(table_ref_i).
+
+  IMPLEMENTATION
+    The procedure assumes that mark_common_columns() has been called
+    for the join that is being processed.
+
+  RETURN
+    TRUE    error: Some common column is ambiguous
+    FALSE   OK
+*/
+
+static bool
+store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join,
+                                 TABLE_LIST *table_ref_1,
+                                 TABLE_LIST *table_ref_2,
+                                 List *using_fields,
+                                 uint found_using_fields)
+{
+  Field_iterator_table_ref it_1, it_2;
+  Natural_join_column *nj_col_1, *nj_col_2;
+  Query_arena *arena, backup;
+  bool result= TRUE;
+  List *non_join_columns;
+  List *join_columns;
+  DBUG_ENTER("store_natural_using_join_columns");
+
+  DBUG_ASSERT(!natural_using_join->join_columns);
+
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+
+  if (!(non_join_columns= new List) ||
+      !(join_columns= new List))
+    goto err;
+
+  /* Append the columns of the first join operand. */
+  for (it_1.set(table_ref_1); !it_1.end_of_fields(); it_1.next())
+  {
+    nj_col_1= it_1.get_natural_column_ref();
+    if (nj_col_1->is_common)
+    {
+      join_columns->push_back(nj_col_1, thd->mem_root);
+      /* Reset the common columns for the next call to mark_common_columns. */
+      nj_col_1->is_common= FALSE;
+    }
+    else
+      non_join_columns->push_back(nj_col_1, thd->mem_root);
+  }
+
+  /*
+    Check that all columns in the USING clause are among the common
+    columns. If this is not the case, report the first one that was
+    not found in an error.
+  */
+  if (using_fields && found_using_fields < using_fields->elements)
+  {
+    String *using_field_name;
+    List_iterator_fast using_fields_it(*using_fields);
+    while ((using_field_name= using_fields_it++))
+    {
+      const char *using_field_name_ptr= using_field_name->c_ptr();
+      List_iterator_fast
+        it(*join_columns);
+      Natural_join_column *common_field;
+
+      for (;;)
+      {
+        /* If reached the end of fields, and none was found, report error. */
+        if (!(common_field= it++))
+        {
+          my_error(ER_BAD_FIELD_ERROR, MYF(0), using_field_name_ptr,
+                   current_thd->where);
+          goto err;
+        }
+        if (!my_strcasecmp(system_charset_info,
+                           common_field->name()->str, using_field_name_ptr))
+          break;                                // Found match
+      }
+    }
+  }
+
+  /* Append the non-equi-join columns of the second join operand. */
+  for (it_2.set(table_ref_2); !it_2.end_of_fields(); it_2.next())
+  {
+    nj_col_2= it_2.get_natural_column_ref();
+    if (!nj_col_2->is_common)
+      non_join_columns->push_back(nj_col_2, thd->mem_root);
+    else
+    {
+      /* Reset the common columns for the next call to mark_common_columns. */
+      nj_col_2->is_common= FALSE;
+    }
+  }
+
+  if (non_join_columns->elements > 0)
+    join_columns->append(non_join_columns);
+  natural_using_join->join_columns= join_columns;
+  natural_using_join->is_join_columns_complete= TRUE;
+
+  result= FALSE;
+
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(result);
+
+err:
+  /*
+     Actually we failed to build join columns list, so we have to
+     clear it to avoid problems with half-build join on next run.
+     The list was created in mark_common_columns().
+   */
+  table_ref_1->remove_join_columns();
+  table_ref_2->remove_join_columns();
+
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Precompute and store the row types of the top-most NATURAL/USING joins.
+
+  SYNOPSIS
+    store_top_level_join_columns()
+    thd            current thread
+    table_ref      nested join or table in a FROM clause
+    left_neighbor  neighbor table reference to the left of table_ref at the
+                   same level in the join tree
+    right_neighbor neighbor table reference to the right of table_ref at the
+                   same level in the join tree
+
+  DESCRIPTION
+    The procedure performs a post-order traversal of a nested join tree
+    and materializes the row types of NATURAL/USING joins in a
+    bottom-up manner until it reaches the TABLE_LIST elements that
+    represent the top-most NATURAL/USING joins. The procedure should be
+    applied to each element of SELECT_LEX::top_join_list (i.e. to each
+    top-level element of the FROM clause).
+
+  IMPLEMENTATION
+    Notice that the table references in the list nested_join->join_list
+    are in reverse order, thus when we iterate over it, we are moving
+    from the right to the left in the FROM clause.
+
+  RETURN
+    TRUE   Error
+    FALSE  OK
+*/
+
+static bool
+store_top_level_join_columns(THD *thd, TABLE_LIST *table_ref,
+                             TABLE_LIST *left_neighbor,
+                             TABLE_LIST *right_neighbor)
+{
+  Query_arena *arena, backup;
+  bool result= TRUE;
+
+  DBUG_ENTER("store_top_level_join_columns");
+
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+
+  /* Call the procedure recursively for each nested table reference. */
+  if (table_ref->nested_join)
+  {
+    List_iterator_fast nested_it(table_ref->nested_join->join_list);
+    TABLE_LIST *same_level_left_neighbor= nested_it++;
+    TABLE_LIST *same_level_right_neighbor= NULL;
+    /* Left/right-most neighbors, possibly at higher levels in the join tree. */
+    TABLE_LIST *real_left_neighbor, *real_right_neighbor;
+
+    while (same_level_left_neighbor)
+    {
+      TABLE_LIST *cur_table_ref= same_level_left_neighbor;
+      same_level_left_neighbor= nested_it++;
+      /*
+        The order of RIGHT JOIN operands is reversed in 'join list' to
+        transform it into a LEFT JOIN. However, in this procedure we need
+        the join operands in their lexical order, so below we reverse the
+        join operands. Notice that this happens only in the first loop,
+        and not in the second one, as in the second loop
+        same_level_left_neighbor == NULL.
+        This is the correct behavior, because the second loop sets
+        cur_table_ref reference correctly after the join operands are
+        swapped in the first loop.
+      */
+      if (same_level_left_neighbor &&
+          cur_table_ref->outer_join & JOIN_TYPE_RIGHT)
+      {
+        /* This can happen only for JOIN ... ON. */
+        DBUG_ASSERT(table_ref->nested_join->join_list.elements == 2);
+        swap_variables(TABLE_LIST*, same_level_left_neighbor, cur_table_ref);
+      }
+
+      /*
+        Pick the parent's left and right neighbors if there are no immediate
+        neighbors at the same level.
+      */
+      real_left_neighbor=  (same_level_left_neighbor) ?
+                           same_level_left_neighbor : left_neighbor;
+      real_right_neighbor= (same_level_right_neighbor) ?
+                           same_level_right_neighbor : right_neighbor;
+
+      if (cur_table_ref->nested_join &&
+          store_top_level_join_columns(thd, cur_table_ref,
+                                       real_left_neighbor, real_right_neighbor))
+        goto err;
+      same_level_right_neighbor= cur_table_ref;
+    }
+  }
+
+  /*
+    If this is a NATURAL/USING join, materialize its result columns and
+    convert to a JOIN ... ON.
+  */
+  if (table_ref->is_natural_join)
+  {
+    DBUG_ASSERT(table_ref->nested_join &&
+                table_ref->nested_join->join_list.elements == 2);
+    List_iterator_fast operand_it(table_ref->nested_join->join_list);
+    /*
+      Notice that the order of join operands depends on whether table_ref
+      represents a LEFT or a RIGHT join. In a RIGHT join, the operands are
+      in inverted order.
+     */
+    TABLE_LIST *table_ref_2= operand_it++; /* Second NATURAL join operand.*/
+    TABLE_LIST *table_ref_1= operand_it++; /* First NATURAL join operand. */
+    List *using_fields= table_ref->join_using_fields;
+    uint found_using_fields;
+
+    /*
+      The two join operands were interchanged in the parser, change the order
+      back for 'mark_common_columns'.
+    */
+    if (table_ref_2->outer_join & JOIN_TYPE_RIGHT)
+      swap_variables(TABLE_LIST*, table_ref_1, table_ref_2);
+    if (mark_common_columns(thd, table_ref_1, table_ref_2,
+                            using_fields, &found_using_fields))
+      goto err;
+
+    /*
+      Swap the join operands back, so that we pick the columns of the second
+      one as the coalesced columns. In this way the coalesced columns are the
+      same as of an equivalent LEFT JOIN.
+    */
+    if (table_ref_1->outer_join & JOIN_TYPE_RIGHT)
+      swap_variables(TABLE_LIST*, table_ref_1, table_ref_2);
+    if (store_natural_using_join_columns(thd, table_ref, table_ref_1,
+                                         table_ref_2, using_fields,
+                                         found_using_fields))
+      goto err;
+
+    /*
+      Change NATURAL JOIN to JOIN ... ON. We do this for both operands
+      because either one of them or the other is the one with the
+      natural join flag because RIGHT joins are transformed into LEFT,
+      and the two tables may be reordered.
+    */
+    table_ref_1->natural_join= table_ref_2->natural_join= NULL;
+
+    /* Add a TRUE condition to outer joins that have no common columns. */
+    if (table_ref_2->outer_join &&
+        !table_ref_1->on_expr && !table_ref_2->on_expr)
+      table_ref_2->on_expr= (Item*) Item_true;
+
+    /* Change this table reference to become a leaf for name resolution. */
+    if (left_neighbor)
+    {
+      TABLE_LIST *last_leaf_on_the_left;
+      last_leaf_on_the_left= left_neighbor->last_leaf_for_name_resolution();
+      last_leaf_on_the_left->next_name_resolution_table= table_ref;
+    }
+    if (right_neighbor)
+    {
+      TABLE_LIST *first_leaf_on_the_right;
+      first_leaf_on_the_right= right_neighbor->first_leaf_for_name_resolution();
+      table_ref->next_name_resolution_table= first_leaf_on_the_right;
+    }
+    else
+      table_ref->next_name_resolution_table= NULL;
+  }
+  result= FALSE; /* All is OK. */
+
+err:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Compute and store the row types of the top-most NATURAL/USING joins
+  in a FROM clause.
+
+  SYNOPSIS
+    setup_natural_join_row_types()
+    thd          current thread
+    from_clause  list of top-level table references in a FROM clause
+
+  DESCRIPTION
+    Apply the procedure 'store_top_level_join_columns' to each of the
+    top-level table referencs of the FROM clause. Adjust the list of tables
+    for name resolution - context->first_name_resolution_table to the
+    top-most, lef-most NATURAL/USING join.
+
+  IMPLEMENTATION
+    Notice that the table references in 'from_clause' are in reverse
+    order, thus when we iterate over it, we are moving from the right
+    to the left in the FROM clause.
+
+  NOTES
+    We can't run this many times as the first_name_resolution_table would
+    be different for subsequent runs when sub queries has been optimized
+    away.
+
+  RETURN
+    TRUE   Error
+    FALSE  OK
+*/
+
+static bool setup_natural_join_row_types(THD *thd,
+                                         List *from_clause,
+                                         Name_resolution_context *context)
+{
+  DBUG_ENTER("setup_natural_join_row_types");
+  thd->where= "from clause";
+  if (from_clause->elements == 0)
+    DBUG_RETURN(false); /* We come here in the case of UNIONs. */
+
+  /* 
+     Do not redo work if already done:
+     1) for stored procedures,
+     2) for multitable update after lock failure and table reopening.
+  */
+  if (!context->select_lex->first_natural_join_processing)
+  {
+    context->first_name_resolution_table= context->natural_join_first_table;
+    DBUG_PRINT("info", ("using cached setup_natural_join_row_types"));
+    DBUG_RETURN(false);
+  }
+
+  List_iterator_fast table_ref_it(*from_clause);
+  TABLE_LIST *table_ref; /* Current table reference. */
+  /* Table reference to the left of the current. */
+  TABLE_LIST *left_neighbor;
+  /* Table reference to the right of the current. */
+  TABLE_LIST *right_neighbor= NULL;
+
+  /* Note that tables in the list are in reversed order */
+  for (left_neighbor= table_ref_it++; left_neighbor ; )
+  {
+    table_ref= left_neighbor;
+    do
+    {
+      left_neighbor= table_ref_it++;
+    }
+    while (left_neighbor && left_neighbor->sj_subq_pred);
+
+    if (store_top_level_join_columns(thd, table_ref,
+                                     left_neighbor, right_neighbor))
+      DBUG_RETURN(true);
+    if (left_neighbor)
+    {
+      TABLE_LIST *first_leaf_on_the_right;
+      first_leaf_on_the_right= table_ref->first_leaf_for_name_resolution();
+      left_neighbor->next_name_resolution_table= first_leaf_on_the_right;
+    }
+    right_neighbor= table_ref;
+  }
+
+  /*
+    Store the top-most, left-most NATURAL/USING join, so that we start
+    the search from that one instead of context->table_list. At this point
+    right_neighbor points to the left-most top-level table reference in the
+    FROM clause.
+  */
+  DBUG_ASSERT(right_neighbor);
+  context->first_name_resolution_table=
+    right_neighbor->first_leaf_for_name_resolution();
+  /*
+    This is only to ensure that first_name_resolution_table doesn't
+    change on re-execution
+  */
+  context->natural_join_first_table= context->first_name_resolution_table;
+  context->select_lex->first_natural_join_processing= false;
+  DBUG_RETURN (false);
+}
+
+
+/****************************************************************************
+** Expand all '*' in given fields
+****************************************************************************/
+
+int setup_wild(THD *thd, TABLE_LIST *tables, List &fields,
+	       List *sum_func_list, SELECT_LEX *select_lex, bool returning_field)
+{
+  Item *item;
+  List_iterator it(fields);
+  Query_arena *arena, backup;
+  uint *with_wild= returning_field ? &(thd->lex->returning()->with_wild) :
+                                     &(select_lex->with_wild);
+  DBUG_ENTER("setup_wild");
+
+  if (!(*with_wild))
+     DBUG_RETURN(0);
+
+  /*
+    Don't use arena if we are not in prepared statements or stored procedures
+    For PS/SP we have to use arena to remember the changes
+  */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+
+  thd->lex->current_select->cur_pos_in_select_list= 0;
+  while (*with_wild && (item= it++))
+  {
+    if (item->type() == Item::FIELD_ITEM &&
+        ((Item_field*) item)->field_name.str == star_clex_str.str &&
+	!((Item_field*) item)->field)
+    {
+      uint elem= fields.elements;
+      bool any_privileges= ((Item_field *) item)->any_privileges;
+      Item_subselect *subsel= thd->lex->current_select->master_unit()->item;
+      if (subsel &&
+          subsel->substype() == Item_subselect::EXISTS_SUBS)
+      {
+        /*
+          It is EXISTS(SELECT * ...) and we can replace * by any constant.
+
+          Item_int do not need fix_fields() because it is basic constant.
+        */
+        it.replace(new (thd->mem_root) Item_int(thd, "Not_used", (longlong) 1,
+                                MY_INT64_NUM_DECIMAL_DIGITS));
+      }
+      else if (insert_fields(thd, ((Item_field*) item)->context,
+                             ((Item_field*) item)->db_name.str,
+                             ((Item_field*) item)->table_name.str, &it,
+                             any_privileges, &select_lex->hidden_bit_fields, returning_field))
+      {
+	if (arena)
+	  thd->restore_active_arena(arena, &backup);
+	DBUG_RETURN(-1);
+      }
+      if (sum_func_list)
+      {
+	/*
+	  sum_func_list is a list that has the fields list as a tail.
+	  Because of this we have to update the element count also for this
+	  list after expanding the '*' entry.
+	*/
+	sum_func_list->elements+= fields.elements - elem;
+      }
+      (*with_wild)--;
+    }
+    else
+      thd->lex->current_select->cur_pos_in_select_list++;
+  }
+  DBUG_ASSERT(!(*with_wild));
+  thd->lex->current_select->cur_pos_in_select_list= UNDEF_POS;
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(0);
+}
+
+/****************************************************************************
+** Check that all given fields exists and fill struct with current data
+****************************************************************************/
+
+bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array,
+                  List &fields, enum_column_usage column_usage,
+                  List *sum_func_list, List *pre_fix,
+                  bool allow_sum_func)
+{
+  Item *item;
+  LEX * const lex= thd->lex;
+  enum_column_usage saved_column_usage= thd->column_usage;
+  nesting_map save_allow_sum_func= lex->allow_sum_func;
+  List_iterator it(fields);
+  bool save_is_item_list_lookup;
+  bool make_pre_fix= (pre_fix && (pre_fix->elements == 0));
+  DBUG_ENTER("setup_fields");
+  DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array.array()));
+
+  thd->column_usage= column_usage;
+  DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage));
+  /*
+    Followimg 2 condition always should be true (but they was added
+    due to an error present only in 10.3):
+    1) nest_level shoud be 0 or positive;
+    2) nest level of all SELECTs on the same level shoud be equal first
+       SELECT on this level (and each other).
+  */
+  DBUG_ASSERT(lex->current_select->nest_level >= 0);
+  DBUG_ASSERT(lex->current_select->master_unit()->first_select()->nest_level ==
+              lex->current_select->nest_level);
+  if (allow_sum_func)
+    lex->allow_sum_func.set_bit(lex->current_select->nest_level);
+  thd->where= THD::DEFAULT_WHERE;
+  save_is_item_list_lookup= lex->current_select->is_item_list_lookup;
+  lex->current_select->is_item_list_lookup= 0;
+
+  /*
+    To prevent fail on forward lookup we fill it with zeroes,
+    then if we got pointer on zero after find_item_in_list we will know
+    that it is forward lookup.
+
+    There is other way to solve problem: fill array with pointers to list,
+    but it will be slower.
+
+    TODO: remove it when (if) we made one list for allfields and
+    ref_pointer_array
+  */
+  if (!ref_pointer_array.is_null())
+  {
+    DBUG_ASSERT(ref_pointer_array.size() >= fields.elements);
+    memset(ref_pointer_array.array(), 0, sizeof(Item *) * fields.elements);
+  }
+
+  /*
+    We call set_entry() there (before fix_fields() of the whole list of field
+    items) because:
+    1) the list of field items has same order as in the query, and the
+       Item_func_get_user_var item may go before the Item_func_set_user_var:
+          SELECT @a, @a := 10 FROM t;
+    2) The entry->update_query_id value controls constantness of
+       Item_func_get_user_var items, so in presence of Item_func_set_user_var
+       items we have to refresh their entries before fixing of
+       Item_func_get_user_var items.
+  */
+  List_iterator li(lex->set_var_list);
+  Item_func_set_user_var *var;
+  while ((var= li++))
+    var->set_entry(thd, FALSE);
+
+  Ref_ptr_array ref= ref_pointer_array;
+  lex->current_select->cur_pos_in_select_list= 0;
+  while ((item= it++))
+  {
+    if (make_pre_fix)
+      pre_fix->push_back(item, thd->stmt_arena->mem_root);
+
+    if (item->fix_fields_if_needed_for_scalar(thd, it.ref()))
+    {
+      lex->current_select->is_item_list_lookup= save_is_item_list_lookup;
+      lex->allow_sum_func= save_allow_sum_func;
+      thd->column_usage= saved_column_usage;
+      DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage));
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+    }
+    item= *(it.ref()); // Item might have changed in fix_fields()
+    if (!ref.is_null())
+    {
+      ref[0]= item;
+      ref.pop_front();
+    }
+    /*
+      split_sum_func() must be called for Window Function items, see
+      Item_window_func::split_sum_func.
+    */
+    if (sum_func_list &&
+        ((item->with_sum_func() && item->type() != Item::SUM_FUNC_ITEM) ||
+         item->with_window_func()))
+    {
+      item->split_sum_func(thd, ref_pointer_array, *sum_func_list,
+                           SPLIT_SUM_SELECT);
+    }
+    lex->current_select->select_list_tables|= item->used_tables();
+    lex->used_tables|= item->used_tables();
+    lex->current_select->cur_pos_in_select_list++;
+    lex->current_select->rownum_in_field_list |= item->with_rownum_func();
+  }
+  lex->current_select->is_item_list_lookup= save_is_item_list_lookup;
+  lex->current_select->cur_pos_in_select_list= UNDEF_POS;
+
+  lex->allow_sum_func= save_allow_sum_func;
+  thd->column_usage= saved_column_usage;
+  DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage));
+  DBUG_RETURN(MY_TEST(thd->is_error()));
+}
+
+/*
+  make list of leaves for a single TABLE_LIST
+
+  SYNOPSIS
+    make_leaves_for_single_table()
+    thd             Thread handler
+    leaves          List of leaf tables to be filled
+    table           TABLE_LIST object to process
+    full_table_list Whether to include tables from mergeable derived table/view
+*/
+void make_leaves_for_single_table(THD *thd, List &leaves,
+                              TABLE_LIST *table, bool& full_table_list,
+                              TABLE_LIST *boundary)
+{
+  if (table == boundary)
+    full_table_list= !full_table_list;
+  if (full_table_list && table->is_merged_derived())
+  {
+    SELECT_LEX *select_lex= table->get_single_select();
+    /*
+      It's safe to use select_lex->leaf_tables because all derived
+      tables/views were already prepared and has their leaf_tables
+      set properly.
+    */
+    make_leaves_list(thd, leaves, select_lex->get_table_list(),
+                     full_table_list, boundary);
+  }
+  else
+  {
+    leaves.push_back(table, thd->mem_root);
+  }
+}
+
+
+/*
+  Perform checks like all given fields exists, if exists fill struct with
+  current data and expand all '*' in given fields for LEX::returning.
+
+  SYNOPSIS
+     thd                 Thread handler
+     table_list          Global/local table list
+*/
+
+int setup_returning_fields(THD* thd, TABLE_LIST* table_list)
+{
+  if (!thd->lex->has_returning())
+    return 0;
+  return setup_wild(thd, table_list, thd->lex->returning()->item_list, NULL,
+                    thd->lex->returning(), true)
+      || setup_fields(thd, Ref_ptr_array(), thd->lex->returning()->item_list,
+                      MARK_COLUMNS_READ, NULL, NULL, false);
+}
+
+
+/*
+  make list of leaves of join table tree
+
+  SYNOPSIS
+    make_leaves_list()
+    leaves          List of leaf tables to be filled
+    tables          Table list
+    full_table_list Whether to include tables from mergeable derived table/view.
+                    We need them for checks for INSERT/UPDATE statements only.
+*/
+
+void make_leaves_list(THD *thd, List &leaves, TABLE_LIST *tables,
+                      bool full_table_list, TABLE_LIST *boundary)
+ 
+{
+  for (TABLE_LIST *table= tables; table; table= table->next_local)
+  {
+    make_leaves_for_single_table(thd, leaves, table, full_table_list,
+                                 boundary);
+  }
+}
+
+
+/*
+  Setup the map and other attributes for a single TABLE_LIST object
+
+  SYNOPSIS
+    setup_table_attributes()
+    thd                 Thread handler
+    table_list          TABLE_LIST object to process
+    first_select_table  First table participating in SELECT for INSERT..SELECT
+                        statements, NULL for other cases
+    tablenr             Serial number of the table in the SQL statement
+
+  RETURN
+    false               Success
+    true                Failure
+*/
+bool setup_table_attributes(THD *thd, TABLE_LIST *table_list,
+                            TABLE_LIST *first_select_table,
+                            uint &tablenr)
+{
+  TABLE *table= table_list->table;
+  if (table)
+    table->pos_in_table_list= table_list;
+  if (first_select_table && table_list->top_table() == first_select_table)
+  {
+    /* new counting for SELECT of INSERT ... SELECT command */
+    first_select_table= 0;
+    thd->lex->first_select_lex()->insert_tables= tablenr;
+    tablenr= 0;
+  }
+  if (table_list->jtbm_subselect)
+  {
+    table_list->jtbm_table_no= tablenr;
+  }
+  else if (table)
+  {
+    table->pos_in_table_list= table_list;
+    setup_table_map(table, table_list, tablenr);
+
+    if (table_list->process_index_hints(table))
+      return true;
+  }
+  tablenr++;
+  /*
+    We test the max tables here as we setup_table_map() should not be called
+    with tablenr >= 64
+  */
+  if (tablenr > MAX_TABLES)
+  {
+    my_error(ER_TOO_MANY_TABLES, MYF(0), static_cast(MAX_TABLES));
+    return true;
+  }
+  return false;
+}
+
+
+/*
+  prepare tables
+
+  SYNOPSIS
+    setup_tables()
+    thd		  Thread handler
+    context       name resolution contest to setup table list there
+    from_clause   Top-level list of table references in the FROM clause
+    tables	  Table list (select_lex->table_list)
+    leaves        List of join table leaves list (select_lex->leaf_tables)
+    refresh       It is only refresh for subquery
+    select_insert It is SELECT ... INSERT command
+    full_table_list a parameter to pass to the make_leaves_list function
+
+  NOTE
+    Check also that the 'used keys' and 'ignored keys' exists and set up the
+    table structure accordingly.
+    Create a list of leaf tables. For queries with NATURAL/USING JOINs,
+    compute the row types of the top most natural/using join table references
+    and link these into a list of table references for name resolution.
+
+    This has to be called for all tables that are used by items, as otherwise
+    table->map is not set and all Item_field will be regarded as const items.
+
+  RETURN
+    FALSE ok;  In this case *map will includes the chosen index
+    TRUE  error
+*/
+
+bool setup_tables(THD *thd, Name_resolution_context *context,
+                  List *from_clause, TABLE_LIST *tables,
+                  List &leaves, bool select_insert,
+                  bool full_table_list)
+{
+  uint tablenr= 0;
+  List_iterator ti(leaves);
+  TABLE_LIST *table_list;
+
+  DBUG_ENTER("setup_tables");
+
+  DBUG_ASSERT ((select_insert && !tables->next_name_resolution_table) || !tables || 
+               (context->table_list && context->first_name_resolution_table));
+  /*
+    this is used for INSERT ... SELECT.
+    For select we setup tables except first (and its underlying tables)
+  */
+  TABLE_LIST *first_select_table= (select_insert ?
+                                   tables->next_local:
+                                   0);
+  SELECT_LEX *select_lex= select_insert ? thd->lex->first_select_lex() :
+                                          thd->lex->current_select;
+  if (select_lex->first_cond_optimization)
+  {
+    leaves.empty();
+    if (select_lex->prep_leaf_list_state != SELECT_LEX::SAVED)
+    {
+      /*
+        For INSERT ... SELECT statements we must not include the first table
+        (where the data is being inserted into) in the list of leaves
+      */
+      TABLE_LIST *tables_for_leaves=
+          select_insert ? first_select_table : tables;
+      make_leaves_list(thd, leaves, tables_for_leaves, full_table_list,
+                       first_select_table);
+      select_lex->prep_leaf_list_state= SELECT_LEX::READY;
+      select_lex->leaf_tables_exec.empty();
+    }
+    else
+    {
+      List_iterator_fast  ti(select_lex->leaf_tables_prep);
+      while ((table_list= ti++))
+        leaves.push_back(table_list, thd->mem_root);
+    }
+      
+    List_iterator ti(leaves);
+    while ((table_list= ti++))
+    {
+      if (setup_table_attributes(thd, table_list, first_select_table, tablenr))
+        DBUG_RETURN(1);
+    }
+
+    if (select_insert)
+    {
+      /*
+        The table/view in which the data is inserted must not be included into
+        the leaf_tables list. But we need this table/view to setup attributes
+        for it. So build a temporary list of leaves and setup attributes for
+        the tables included
+      */
+      List leaves;
+      TABLE_LIST *table= tables;
+
+      make_leaves_for_single_table(thd, leaves, table, full_table_list,
+                                   first_select_table);
+
+      List_iterator ti(leaves);
+      while ((table_list= ti++))
+      {
+        if (setup_table_attributes(thd, table_list, first_select_table,
+                                   tablenr))
+          DBUG_RETURN(1);
+      }
+    }
+  }
+  else
+  { 
+    List_iterator_fast  ti(select_lex->leaf_tables_exec);
+    select_lex->leaf_tables.empty();
+    while ((table_list= ti++))
+    {
+      if(table_list->jtbm_subselect)
+      {
+        table_list->jtbm_table_no= table_list->tablenr_exec;
+      }
+      else
+      {
+        table_list->table->tablenr= table_list->tablenr_exec;
+        table_list->table->map= table_list->map_exec;
+        table_list->table->maybe_null= table_list->maybe_null_exec;
+        table_list->table->pos_in_table_list= table_list;
+        if (table_list->process_index_hints(table_list->table))
+          DBUG_RETURN(1);
+      }
+      select_lex->leaf_tables.push_back(table_list);
+    }
+  }    
+
+  for (table_list= tables;
+       table_list;
+       table_list= table_list->next_local)
+  {
+    if (table_list->is_merged_derived() && table_list->merge_underlying_list)
+    {
+      Query_arena *arena, backup;
+      arena= thd->activate_stmt_arena_if_needed(&backup);
+      bool res;
+      res= table_list->setup_underlying(thd);
+      if (arena)
+        thd->restore_active_arena(arena, &backup);
+      if (res)
+        DBUG_RETURN(1);
+    }
+
+    if (table_list->jtbm_subselect)
+    {
+      Item *item= table_list->jtbm_subselect->optimizer;
+      if (!table_list->jtbm_subselect->optimizer->fixed() &&
+          table_list->jtbm_subselect->optimizer->fix_fields(thd, &item))
+      {
+        my_error(ER_TOO_MANY_TABLES,MYF(0), static_cast(MAX_TABLES)); /* psergey-todo: WHY ER_TOO_MANY_TABLES ???*/
+        DBUG_RETURN(1);
+      }
+      DBUG_ASSERT(item == table_list->jtbm_subselect->optimizer);
+    }
+  }
+
+  /* Precompute and store the row types of NATURAL/USING joins. */
+  if (setup_natural_join_row_types(thd, from_clause, context))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  prepare tables and check access for the view tables
+
+  SYNOPSIS
+    setup_tables_and_check_access()
+    thd		  Thread handler
+    context       name resolution contest to setup table list there
+    from_clause   Top-level list of table references in the FROM clause
+    tables	  Table list (select_lex->table_list)
+    conds	  Condition of current SELECT (can be changed by VIEW)
+    leaves        List of join table leaves list (select_lex->leaf_tables)
+    refresh       It is onle refresh for subquery
+    select_insert It is SELECT ... INSERT command
+    want_access   what access is needed
+    full_table_list a parameter to pass to the make_leaves_list function
+
+  NOTE
+    a wrapper for check_tables that will also check the resulting
+    table leaves list for access to all the tables that belong to a view
+
+  RETURN
+    FALSE ok;  In this case *map will include the chosen index
+    TRUE  error
+*/
+bool setup_tables_and_check_access(THD *thd, Name_resolution_context *context,
+                                   List *from_clause,
+                                   TABLE_LIST *tables,
+                                   List &leaves,
+                                   bool select_insert,
+                                   privilege_t want_access_first,
+                                   privilege_t want_access,
+                                   bool full_table_list)
+{
+  DBUG_ENTER("setup_tables_and_check_access");
+
+  if (setup_tables(thd, context, from_clause, tables,
+                   leaves, select_insert, full_table_list))
+    DBUG_RETURN(TRUE);
+
+  List_iterator ti(leaves);
+  TABLE_LIST *table_list;
+  privilege_t access= want_access_first;
+  while ((table_list= ti++))
+  {
+    if (table_list->belong_to_view && !table_list->view && 
+        check_single_table_access(thd, access, table_list, FALSE))
+    {
+      tables->hide_view_error(thd);
+      DBUG_RETURN(TRUE);
+    }
+    access= want_access;
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+   Create a key_map from a list of index names
+
+   SYNOPSIS
+     get_key_map_from_key_list()
+     map		key_map to fill in
+     table		Table
+     index_list		List of index names
+
+   RETURN
+     0	ok;  In this case *map will includes the choosed index
+     1	error
+*/
+
+bool get_key_map_from_key_list(key_map *map, TABLE *table,
+                               List *index_list)
+{
+  List_iterator_fast it(*index_list);
+  String *name;
+  uint pos;
+
+  map->clear_all();
+  while ((name=it++))
+  {
+    if (table->s->keynames.type_names == 0 ||
+        (pos= find_type(&table->s->keynames, name->ptr(),
+                        name->length(), 1)) <=
+        0)
+    {
+      my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), name->c_ptr(),
+	       table->pos_in_table_list->alias.str);
+      map->set_all();
+      return 1;
+    }
+    map->set_bit(pos-1);
+  }
+  return 0;
+}
+
+
+/*
+  Drops in all fields instead of current '*' field
+
+  SYNOPSIS
+    insert_fields()
+    thd			Thread handler
+    context             Context for name resolution
+    db_name		Database name in case of 'database_name.table_name.*'
+    table_name		Table name in case of 'table_name.*'
+    it			Pointer to '*'
+    any_privileges	0 If we should ensure that we have SELECT privileges
+		          for all columns
+                        1 If any privilege is ok
+  RETURN
+    0	ok     'it' is updated to point at last inserted
+    1	error.  Error message is generated but not sent to client
+*/
+
+bool
+insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
+	      const char *table_name, List_iterator *it,
+              bool any_privileges, uint *hidden_bit_fields, bool returning_field)
+{
+  Field_iterator_table_ref field_iterator;
+  bool found;
+  char name_buff[SAFE_NAME_LEN+1];
+  DBUG_ENTER("insert_fields");
+  DBUG_PRINT("arena", ("stmt arena: %p",thd->stmt_arena));
+
+  if (db_name && lower_case_table_names)
+  {
+    /*
+      convert database to lower case for comparison
+      We can't do this in Item_field as this would change the
+      'name' of the item which may be used in the select list
+    */
+    strmake_buf(name_buff, db_name);
+    my_casedn_str(files_charset_info, name_buff);
+    db_name= name_buff;
+  }
+
+  found= FALSE;
+
+  /*
+    If table names are qualified, then loop over all tables used in the query,
+    else treat natural joins as leaves and do not iterate over their underlying
+    tables.
+  */
+  TABLE_LIST *first= context->first_name_resolution_table;
+  TABLE_LIST *TABLE_LIST::* next= &TABLE_LIST::next_name_resolution_table;
+  if (table_name && !returning_field)
+  {
+    first= context->table_list;
+    next= &TABLE_LIST::next_local;
+  }
+  for (TABLE_LIST *tables= first; tables; tables= tables->*next)
+  {
+    Field *field;
+    TABLE *table= tables->table;
+
+    DBUG_ASSERT(tables->is_leaf_for_name_resolution());
+
+    if ((table_name && my_strcasecmp(table_alias_charset, table_name,
+                                     tables->alias.str)) ||
+        (db_name && strcmp(tables->db.str, db_name)))
+      continue;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    /* 
+       Ensure that we have access rights to all fields to be inserted
+       the table 'tables'. Under some circumstances, this check may be skipped.
+
+       The check is skipped in the following cases:
+
+       - any_privileges is true
+
+       - the table is a derived table
+
+       - the table is a view with SELECT privilege
+
+       - the table is a base table with SELECT privilege
+    */
+    if (!any_privileges &&
+        !tables->is_derived() &&
+        !(tables->is_view() && (tables->grant.privilege & SELECT_ACL)) &&
+        !(table && (table->grant.privilege & SELECT_ACL)))
+    {
+      field_iterator.set(tables);
+      if (check_grant_all_columns(thd, SELECT_ACL, &field_iterator))
+        DBUG_RETURN(TRUE);
+    }
+#endif
+
+    /*
+      Update the tables used in the query based on the referenced fields. For
+      views and natural joins this update is performed inside the loop below.
+    */
+    if (table)
+    {
+      thd->lex->used_tables|= table->map;
+      thd->lex->current_select->select_list_tables|= table->map;
+    }
+
+    /*
+      Initialize a generic field iterator for the current table reference.
+      Notice that it is guaranteed that this iterator will iterate over the
+      fields of a single table reference, because 'tables' is a leaf (for
+      name resolution purposes).
+    */
+    field_iterator.set(tables);
+
+    for (; !field_iterator.end_of_fields(); field_iterator.next())
+    {
+      /*
+        field() is always NULL for views (see, e.g. Field_iterator_view or
+        Field_iterator_natural_join).
+        But view fields can never be invisible.
+      */
+      if ((field= field_iterator.field()) && field->invisible != VISIBLE)
+        continue;
+
+      Item *item;
+
+      if (!(item= field_iterator.create_item(thd)))
+        DBUG_RETURN(TRUE);
+
+      /* cache the table for the Item_fields inserted by expanding stars */
+      if (item->type() == Item::FIELD_ITEM && tables->cacheable_table)
+        ((Item_field *)item)->cached_table= tables;
+
+      if (!found)
+      {
+        found= TRUE;
+        it->replace(item); /* Replace '*' with the first found item. */
+      }
+      else
+        it->after(item);   /* Add 'item' to the SELECT list. */
+
+      if (item->type() == Item::FIELD_ITEM && item->field_type() == MYSQL_TYPE_BIT)
+        (*hidden_bit_fields)++;
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+      /*
+        Set privilege information for the fields of newly created views.
+        We have that (any_priviliges == TRUE) if and only if we are creating
+        a view. In the time of view creation we can't use the MERGE algorithm,
+        therefore if 'tables' is itself a view, it is represented by a
+        temporary table. Thus in this case we can be sure that 'item' is an
+        Item_field.
+      */
+      if (any_privileges && !tables->is_with_table() && !tables->is_derived())
+      {
+        DBUG_ASSERT((tables->field_translation == NULL && table) ||
+                    tables->is_natural_join);
+        DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
+        Item_field *fld= (Item_field*) item;
+        const char *field_db_name= field_iterator.get_db_name();
+        const char *field_table_name= field_iterator.get_table_name();
+
+        if (!tables->schema_table && 
+            !(fld->have_privileges=
+              (get_column_grant(thd, field_iterator.grant(),
+                                field_db_name,
+                                field_table_name, fld->field_name.str) &
+               VIEW_ANY_ACL)))
+        {
+          my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), "ANY",
+                   thd->security_ctx->priv_user,
+                   thd->security_ctx->host_or_ip,
+                   field_db_name, field_table_name);
+          DBUG_RETURN(TRUE);
+        }
+      }
+#endif
+
+      if ((field= field_iterator.field()))
+      {
+        field->table->mark_column_with_deps(field);
+        if (table)
+          table->covering_keys.intersect(field->part_of_key);
+        if (tables->is_natural_join)
+        {
+          TABLE *field_table;
+          /*
+            In this case we are sure that the column ref will not be created
+            because it was already created and stored with the natural join.
+          */
+          Natural_join_column *nj_col;
+          if (!(nj_col= field_iterator.get_natural_column_ref()))
+            DBUG_RETURN(TRUE);
+          DBUG_ASSERT(nj_col->table_field);
+          field_table= nj_col->table_ref->table;
+          if (field_table)
+          {
+            thd->lex->used_tables|= field_table->map;
+            thd->lex->current_select->select_list_tables|=
+              field_table->map;
+            field_table->covering_keys.intersect(field->part_of_key);
+            field_table->used_fields++;
+          }
+        }
+      }
+      else
+        thd->lex->used_tables|= item->used_tables();
+      thd->lex->current_select->cur_pos_in_select_list++;
+    }
+    /*
+      In case of stored tables, all fields are considered as used,
+      while in the case of views, the fields considered as used are the
+      ones marked in setup_tables during fix_fields of view columns.
+      For NATURAL joins, used_tables is updated in the IF above.
+    */
+    if (table)
+      table->used_fields= table->s->fields;
+  }
+  if (found)
+    DBUG_RETURN(FALSE);
+
+  /*
+    TODO: in the case when we skipped all columns because there was a
+    qualified '*', and all columns were coalesced, we have to give a more
+    meaningful message than ER_BAD_TABLE_ERROR.
+  */
+  if (!table_name)
+    my_error(ER_NO_TABLES_USED, MYF(0));
+  else if (!db_name && !thd->db.str)
+    my_error(ER_NO_DB_ERROR, MYF(0));
+  else
+  {
+    char name[FN_REFLEN];
+    my_snprintf(name, sizeof(name), "%s.%s",
+                db_name ? db_name : thd->get_db(), table_name);
+    my_error(ER_BAD_TABLE_ERROR, MYF(0), name);
+  }
+
+  DBUG_RETURN(TRUE);
+}
+
+
+/**
+  Wrap Item_ident
+
+  @param thd             thread handle
+  @param conds           pointer to the condition which should be wrapped
+*/
+
+void wrap_ident(THD *thd, Item **conds)
+{
+  Item_direct_ref_to_ident *wrapper;
+  DBUG_ASSERT((*conds)->type() == Item::FIELD_ITEM || (*conds)->type() == Item::REF_ITEM);
+  Query_arena *arena, backup;
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+  if ((wrapper= new (thd->mem_root) Item_direct_ref_to_ident(thd, (Item_ident *) (*conds))))
+    (*conds)= (Item*) wrapper;
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+}
+
+/**
+  Prepare ON expression
+
+  @param thd             Thread handle
+  @param table           Pointer to table list
+  @param is_update       Update flag
+
+  @retval TRUE error.
+  @retval FALSE OK.
+*/
+
+bool setup_on_expr(THD *thd, TABLE_LIST *table, bool is_update)
+{
+  uchar buff[STACK_BUFF_ALLOC];			// Max argument in function
+  if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
+    return TRUE;				// Fatal error flag is set!
+  for(; table; table= table->next_local)
+  {
+    TABLE_LIST *embedded; /* The table at the current level of nesting. */
+    TABLE_LIST *embedding= table; /* The parent nested table reference. */
+    do
+    {
+      embedded= embedding;
+      if (embedded->on_expr)
+      {
+        thd->where="on clause";
+        embedded->on_expr->mark_as_condition_AND_part(embedded);
+        if (embedded->on_expr->fix_fields_if_needed_for_bool(thd,
+                                                           &embedded->on_expr))
+          return TRUE;
+      }
+      /*
+        If it's a semi-join nest, fix its "left expression", as it is used by
+        the SJ-Materialization
+      */
+      if (embedded->sj_subq_pred)
+      {
+        Item **left_expr= embedded->sj_subq_pred->left_exp_ptr();
+        if ((*left_expr)->fix_fields_if_needed(thd, left_expr))
+          return TRUE;
+      }
+
+      embedding= embedded->embedding;
+    }
+    while (embedding &&
+           embedding->nested_join->join_list.head() == embedded);
+
+    if (table->is_merged_derived())
+    {
+      SELECT_LEX *select_lex= table->get_single_select();
+      setup_on_expr(thd, select_lex->get_table_list(), is_update);
+    }
+
+    /* process CHECK OPTION */
+    if (is_update)
+    {
+      TABLE_LIST *view= table->top_table();
+      if (view->effective_with_check)
+      {
+        if (view->prepare_check_option(thd))
+          return TRUE;
+        thd->change_item_tree(&table->check_option, view->check_option);
+      }
+    }
+  }
+  return FALSE;
+}
+
+/*
+  Fix all conditions and outer join expressions.
+
+  SYNOPSIS
+    setup_conds()
+    thd     thread handler
+    tables  list of tables for name resolving (select_lex->table_list)
+    leaves  list of leaves of join table tree (select_lex->leaf_tables)
+    conds   WHERE clause
+
+  DESCRIPTION
+    TODO
+
+  RETURN
+    TRUE  if some error occurred (e.g. out of memory)
+    FALSE if all is OK
+*/
+
+int setup_conds(THD *thd, TABLE_LIST *tables, List &leaves,
+                COND **conds)
+{
+  SELECT_LEX *select_lex= thd->lex->current_select;
+  TABLE_LIST *table= NULL;	// For HP compilers
+  /*
+    it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
+    which belong to LEX, i.e. most up SELECT) will be updated by
+    INSERT/UPDATE/LOAD
+    NOTE: using this condition helps to prevent call of prepare_check_option()
+    from subquery of VIEW, because tables of subquery belongs to VIEW
+    (see condition before prepare_check_option() call)
+  */
+  bool it_is_update= (select_lex == thd->lex->first_select_lex()) &&
+    thd->lex->which_check_option_applicable();
+  bool save_is_item_list_lookup= select_lex->is_item_list_lookup;
+  TABLE_LIST *derived= select_lex->master_unit()->derived;
+  bool save_resolve_in_select_list= select_lex->context.resolve_in_select_list;
+  DBUG_ENTER("setup_conds");
+
+  select_lex->is_item_list_lookup= 0;
+  select_lex->context.resolve_in_select_list= false;
+
+  thd->column_usage= MARK_COLUMNS_READ;
+  DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage));
+  select_lex->cond_count= 0;
+  select_lex->between_count= 0;
+  select_lex->max_equal_elems= 0;
+
+  for (table= tables; table; table= table->next_local)
+  {
+    if (select_lex == thd->lex->first_select_lex() &&
+        select_lex->first_cond_optimization &&
+        table->merged_for_insert &&
+        table->prepare_where(thd, conds, FALSE))
+      goto err_no_arena;
+  }
+
+  if (*conds)
+  {
+    thd->where="where clause";
+    DBUG_EXECUTE("where",
+                 print_where(*conds,
+                             "WHERE in setup_conds",
+                             QT_ORDINARY););
+    /*
+      Wrap alone field in WHERE clause in case it will be outer field of subquery
+      which need persistent pointer on it, but conds could be changed by optimizer
+    */
+    if ((*conds)->type() == Item::FIELD_ITEM && !derived)
+      wrap_ident(thd, conds);
+    (*conds)->mark_as_condition_AND_part(NO_JOIN_NEST);
+    if ((*conds)->fix_fields_if_needed_for_bool(thd, conds))
+      goto err_no_arena;
+  }
+
+  /*
+    Apply fix_fields() to all ON clauses at all levels of nesting,
+    including the ones inside view definitions.
+  */
+  if (setup_on_expr(thd, tables, it_is_update))
+    goto err_no_arena;
+
+  if (!thd->stmt_arena->is_conventional())
+  {
+    /*
+      We are in prepared statement preparation code => we should store
+      WHERE clause changing for next executions.
+
+      We do this ON -> WHERE transformation only once per PS/SP statement.
+    */
+    select_lex->where= *conds;
+  }
+  thd->lex->current_select->is_item_list_lookup= save_is_item_list_lookup;
+  select_lex->context.resolve_in_select_list= save_resolve_in_select_list;
+  DBUG_RETURN(thd->is_error());
+
+err_no_arena:
+  select_lex->is_item_list_lookup= save_is_item_list_lookup;
+  DBUG_RETURN(1);
+}
+
+
+static bool vers_update_or_validate_fields(TABLE *table)
+{
+  if (!table->versioned())
+    return 0;
+  DBUG_ASSERT(table->vers_write);
+
+  if (table->vers_update_fields())
+    return 0;
+
+  Field *row_start= table->vers_start_field();
+  Field *row_end= table->vers_end_field();
+  MYSQL_TIME ltime;
+
+  /*
+     Inserting the history row directly, check ROW_START < ROW_END and
+     ROW_START is non-zero.
+  */
+  if ((row_start->cmp(row_start->ptr, row_end->ptr) < 0) &&
+      !row_start->get_date(<ime, Datetime::Options(
+         TIME_NO_ZERO_DATE, time_round_mode_t(time_round_mode_t::FRAC_NONE))))
+    return 0;
+
+  StringBuffer val;
+  row_start->val_str(&val);
+  my_error(ER_WRONG_VALUE, MYF(0), row_start->field_name.str, val.c_ptr());
+  return 1;
+}
+
+
+/******************************************************************************
+** Fill a record with data (for INSERT or UPDATE)
+** Returns : 1 if some field has wrong type
+******************************************************************************/
+
+
+/**
+  Fill the fields of a table with the values of an Item list
+
+  @param thd           thread handler
+  @param table_arg     the table that is being modified
+  @param fields        Item_fields list to be filled
+  @param values        values to fill with
+  @param ignore_errors TRUE if we should ignore errors
+  @param update        TRUE if update query
+
+  @details
+    fill_record() may set table->auto_increment_field_not_null and a
+    caller should make sure that it is reset after their last call to this
+    function.
+    default functions are executed for inserts.
+    virtual fields are always updated
+
+  @return Status
+  @retval true An error occurred.
+  @retval false OK.
+*/
+
+bool
+fill_record(THD *thd, TABLE *table_arg, List &fields, List &values,
+            bool ignore_errors, bool update)
+{
+  List_iterator_fast f(fields),v(values);
+  Item *value, *fld;
+  Item_field *field;
+  Field *rfield;
+  TABLE *table;
+  bool only_unvers_fields= update && table_arg->versioned();
+  bool save_abort_on_warning= thd->abort_on_warning;
+  bool save_no_errors= thd->no_errors;
+  DBUG_ENTER("fill_record");
+
+  thd->no_errors= ignore_errors;
+  /*
+    Reset the table->auto_increment_field_not_null as it is valid for
+    only one row.
+  */
+  if (fields.elements)
+    table_arg->auto_increment_field_not_null= FALSE;
+
+  while ((fld= f++))
+  {
+    if (!(field= fld->field_for_view_update()))
+    {
+      my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), fld->name.str);
+      goto err;
+    }
+    value=v++;
+    DBUG_ASSERT(value);
+    rfield= field->field;
+    table= rfield->table;
+    if (table->next_number_field &&
+        rfield->field_index ==  table->next_number_field->field_index)
+      table->auto_increment_field_not_null= TRUE;
+
+    const bool skip_sys_field= rfield->vers_sys_field() &&
+                       (update || !thd->vers_insert_history_fast(table));
+
+    if ((rfield->vcol_info || skip_sys_field) &&
+        !value->vcol_assignment_allowed_value() &&
+        table->s->table_category != TABLE_CATEGORY_TEMPORARY)
+    {
+      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                          ER_WARNING_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN,
+                          ER_THD(thd, ER_WARNING_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN),
+                          rfield->field_name.str, table->s->table_name.str);
+    }
+    if (only_unvers_fields && !rfield->vers_update_unversioned())
+      only_unvers_fields= false;
+
+    if (rfield->stored_in_db())
+    {
+      if (!skip_sys_field)
+      {
+        if (value->save_in_field(rfield, 0) < 0 && !ignore_errors)
+        {
+          my_message(ER_UNKNOWN_ERROR, ER_THD(thd, ER_UNKNOWN_ERROR), MYF(0));
+          goto err;
+        }
+        rfield->set_has_explicit_value();
+      }
+      /*
+        In sql MODE_SIMULTANEOUS_ASSIGNMENT,
+        move field pointer on value stored in record[1]
+        which contains row before update (see MDEV-13417)
+      */
+      if (update && thd->variables.sql_mode & MODE_SIMULTANEOUS_ASSIGNMENT)
+        rfield->move_field_offset((my_ptrdiff_t) (table->record[1] -
+                                                  table->record[0]));
+    }
+  }
+
+  if (update && thd->variables.sql_mode & MODE_SIMULTANEOUS_ASSIGNMENT)
+  {
+    // restore fields pointers on record[0]
+    f.rewind();
+    while ((fld= f++))
+    {
+      rfield= fld->field_for_view_update()->field;
+      if (rfield->stored_in_db())
+      {
+        table= rfield->table;
+        rfield->move_field_offset((my_ptrdiff_t) (table->record[0] -
+                                                  table->record[1]));
+      }
+    }
+  }
+
+  if (update)
+    table_arg->evaluate_update_default_function();
+  else
+    if (table_arg->default_field &&
+        table_arg->update_default_fields(ignore_errors))
+      goto err;
+
+  if (!only_unvers_fields && vers_update_or_validate_fields(table_arg))
+      goto err;
+
+  /* Update virtual fields */
+  if (table_arg->vfield &&
+      table_arg->update_virtual_fields(table_arg->file, VCOL_UPDATE_FOR_WRITE))
+    goto err;
+  thd->abort_on_warning= save_abort_on_warning;
+  thd->no_errors=        save_no_errors;
+  DBUG_RETURN(thd->is_error());
+err:
+  DBUG_PRINT("error",("got error"));
+  thd->abort_on_warning= save_abort_on_warning;
+  thd->no_errors=        save_no_errors;
+  if (fields.elements)
+    table_arg->auto_increment_field_not_null= FALSE;
+  DBUG_RETURN(TRUE);
+}
+
+
+/**
+  Prepare Item_field's for fill_record_n_invoke_before_triggers()
+
+  This means redirecting from table->field to
+  table->field_to_fill(), if needed.
+*/
+void switch_to_nullable_trigger_fields(List &items, TABLE *table)
+{
+  Field** field= table->field_to_fill();
+
+ /* True if we have NOT NULL fields and BEFORE triggers */
+  if (field != table->field)
+  {
+    List_iterator_fast it(items);
+    Item *item;
+
+    while ((item= it++))
+      item->walk(&Item::switch_to_nullable_fields_processor, 1, field);
+    table->triggers->reset_extra_null_bitmap();
+  }
+}
+
+
+/**
+  Prepare Virtual fields and field with default expressions to use
+  trigger fields
+
+  This means redirecting from table->field to
+  table->field_to_fill(), if needed.
+*/
+
+void switch_defaults_to_nullable_trigger_fields(TABLE *table)
+{
+  if (!table->default_field)
+    return; // no defaults
+
+  Field **trigger_field= table->field_to_fill();
+
+ /* True if we have NOT NULL fields and BEFORE triggers */
+  if (*trigger_field != *table->field)
+  {
+    for (Field **field_ptr= table->default_field; *field_ptr ; field_ptr++)
+    {
+      Field *field= (*field_ptr);
+      field->default_value->expr->walk(&Item::switch_to_nullable_fields_processor, 1, trigger_field);
+      *field_ptr= (trigger_field[field->field_index]);
+    }
+  }
+}
+
+
+/**
+  Test NOT NULL constraint after BEFORE triggers
+*/
+static bool not_null_fields_have_null_values(TABLE *table)
+{
+  Field **orig_field= table->field;
+  Field **filled_field= table->field_to_fill();
+
+  if (filled_field != orig_field)
+  {
+    THD *thd=table->in_use;
+    for (uint i=0; i < table->s->fields; i++)
+    {
+      Field *of= orig_field[i];
+      Field *ff= filled_field[i];
+      if (ff != of)
+      {
+        // copy after-update flags to of, copy before-update flags to ff
+        swap_variables(uint32, of->flags, ff->flags);
+        if (ff->is_real_null())
+        {
+          ff->set_notnull(); // for next row WHERE condition in UPDATE
+          if (convert_null_to_field_value_or_error(of) || thd->is_error())
+            return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/**
+  Fill fields in list with values from the list of items and invoke
+  before triggers.
+
+  @param thd           thread context
+  @param table         the table that is being modified
+  @param fields        Item_fields list to be filled
+  @param values        values to fill with
+  @param ignore_errors TRUE if we should ignore errors
+  @param event         event type for triggers to be invoked
+
+  @detail
+    This function assumes that fields which values will be set and triggers
+    to be invoked belong to the same table, and that TABLE::record[0] and
+    record[1] buffers correspond to new and old versions of row respectively.
+
+  @return Status
+  @retval true An error occurred.
+  @retval false OK.
+*/
+
+bool
+fill_record_n_invoke_before_triggers(THD *thd, TABLE *table,
+                                     List &fields,
+                                     List &values, bool ignore_errors,
+                                     enum trg_event_type event)
+{
+  int result;
+  Table_triggers_list *triggers= table->triggers;
+
+  result= fill_record(thd, table, fields, values, ignore_errors,
+                      event == TRG_EVENT_UPDATE);
+
+  if (!result && triggers)
+  {
+    if (triggers->process_triggers(thd, event, TRG_ACTION_BEFORE,
+                                    TRUE) ||
+        not_null_fields_have_null_values(table))
+      return TRUE;
+
+    /*
+      Re-calculate virtual fields to cater for cases when base columns are
+      updated by the triggers.
+    */
+    if (table->vfield && fields.elements)
+    {
+      Item *fld= (Item_field*) fields.head();
+      Item_field *item_field= fld->field_for_view_update();
+      if (item_field)
+      {
+        DBUG_ASSERT(table == item_field->field->table);
+        result|= table->update_virtual_fields(table->file,
+                                              VCOL_UPDATE_FOR_WRITE);
+      }
+    }
+  }
+  return result;
+}
+
+
+/**
+  Fill the field buffer of a table with the values of an Item list
+  All fields are given a value
+
+  @param thd           thread handler
+  @param table_arg     the table that is being modified
+  @param ptr           pointer on pointer to record of fields
+  @param values        values to fill with
+  @param ignore_errors TRUE if we should ignore errors
+  @param use_value     forces usage of value of the items instead of result
+
+  @details
+    fill_record() may set table->auto_increment_field_not_null and a
+    caller should make sure that it is reset after their last call to this
+    function.
+
+  @return Status
+  @retval true An error occurred.
+  @retval false OK.
+*/
+
+bool
+fill_record(THD *thd, TABLE *table, Field **ptr, List &values,
+            bool ignore_errors, bool use_value)
+{
+  List_iterator_fast v(values);
+  List
tbl_list; + Item *value; + Field *field; + bool abort_on_warning_saved= thd->abort_on_warning; + uint autoinc_index= table->next_number_field + ? table->next_number_field->field_index + : ~0U; + DBUG_ENTER("fill_record"); + if (!*ptr) + { + /* No fields to update, quite strange!*/ + DBUG_RETURN(0); + } + + /* + On INSERT or UPDATE fields are checked to be from the same table, + thus we safely can take table from the first field. + */ + DBUG_ASSERT((*ptr)->table == table); + + /* + Reset the table->auto_increment_field_not_null as it is valid for + only one row. + */ + table->auto_increment_field_not_null= FALSE; + while ((field = *ptr++) && ! thd->is_error()) + { + /* Ensure that all fields are from the same table */ + DBUG_ASSERT(field->table == table); + + if (unlikely(field->invisible)) + continue; + + value=v++; + /* Ensure the end of the list of values is not reached */ + DBUG_ASSERT(value); + + const bool skip_sys_field= field->vers_sys_field() && + !thd->vers_insert_history_fast(table); + + if (field->field_index == autoinc_index) + table->auto_increment_field_not_null= TRUE; + if ((unlikely(field->vcol_info) || (skip_sys_field && !ignore_errors)) && + !value->vcol_assignment_allowed_value() && + table->s->table_category != TABLE_CATEGORY_TEMPORARY) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARNING_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN, + ER_THD(thd, ER_WARNING_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN), + field->field_name.str, table->s->table_name.str); + } + + if (skip_sys_field) + continue; + + if (use_value) + value->save_val(field); + else + if (value->save_in_field(field, 0) < 0) + goto err; + field->set_has_explicit_value(); + } + /* Update virtual fields if there wasn't any errors */ + if (!thd->is_error()) + { + thd->abort_on_warning= FALSE; + if (table->default_field && table->update_default_fields(ignore_errors)) + goto err; + if (vers_update_or_validate_fields(table)) + goto err; + if (table->vfield && + table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_WRITE)) + goto err; + thd->abort_on_warning= abort_on_warning_saved; + } + DBUG_RETURN(thd->is_error()); + +err: + thd->abort_on_warning= abort_on_warning_saved; + table->auto_increment_field_not_null= FALSE; + DBUG_RETURN(TRUE); +} + + +/* + Fill fields in an array with values from the list of items and invoke + before triggers. + + @param thd thread context + @param table the table that is being modified + @param ptr the fields to be filled + @param values values to fill with + @param ignore_errors TRUE if we should ignore errors + @param event event type for triggers to be invoked + + @detail + This function assumes that fields which values will be set and triggers + to be invoked belong to the same table, and that TABLE::record[0] and + record[1] buffers correspond to new and old versions of row respectively. + + @return Status + @retval true An error occurred. + @retval false OK. +*/ + +bool +fill_record_n_invoke_before_triggers(THD *thd, TABLE *table, Field **ptr, + List &values, bool ignore_errors, + enum trg_event_type event) +{ + bool result; + Table_triggers_list *triggers= table->triggers; + + result= fill_record(thd, table, ptr, values, ignore_errors, FALSE); + + if (!result && triggers && *ptr) + result= triggers->process_triggers(thd, event, TRG_ACTION_BEFORE, TRUE) || + not_null_fields_have_null_values(table); + /* + Re-calculate virtual fields to cater for cases when base columns are + updated by the triggers. + */ + if (!result && triggers && *ptr) + { + DBUG_ASSERT(table == (*ptr)->table); + if (table->vfield) + result= table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_WRITE); + } + return result; + +} + + +my_bool mysql_rm_tmp_tables(void) +{ + size_t i, idx; + char path[FN_REFLEN], *tmpdir, path_copy[FN_REFLEN]; + MY_DIR *dirp; + FILEINFO *file; + TABLE_SHARE share; + THD *thd; + DBUG_ENTER("mysql_rm_tmp_tables"); + + if (!(thd= new THD(0))) + DBUG_RETURN(1); + thd->thread_stack= (char*) &thd; + thd->store_globals(); + + for (i=0; i<=mysql_tmpdir_list.max; i++) + { + tmpdir=mysql_tmpdir_list.list[i]; + /* See if the directory exists */ + if (!(dirp = my_dir(tmpdir,MYF(MY_WME | MY_DONT_SORT)))) + continue; + + /* Remove all SQLxxx tables from directory */ + + for (idx=0 ; idx < dirp->number_of_files ; idx++) + { + file=dirp->dir_entry+idx; + + if (!strncmp(file->name, tmp_file_prefix, tmp_file_prefix_length)) + { + char *ext= fn_ext(file->name); + size_t ext_len= strlen(ext); + size_t path_len= my_snprintf(path, sizeof(path), + "%s%c%s", tmpdir, FN_LIBCHAR, + file->name); + if (!strcmp(reg_ext, ext)) + { + /* We should cut file extention before deleting of table */ + memcpy(path_copy, path, path_len - ext_len); + path_copy[path_len - ext_len]= 0; + init_tmp_table_share(thd, &share, "", 0, "", path_copy); + if (!open_table_def(thd, &share)) + share.db_type()->drop_table(share.db_type(), path_copy); + free_table_share(&share); + } + /* + File can be already deleted by tmp_table.file->delete_table(). + So we hide error messages which happnes during deleting of these + files(MYF(0)). + */ + (void) mysql_file_delete(key_file_misc, path, MYF(0)); + } + } + my_dirend(dirp); + } + delete thd; + DBUG_RETURN(0); +} + + +/***************************************************************************** + unireg support functions +*****************************************************************************/ + +int setup_ftfuncs(SELECT_LEX *select_lex) +{ + List_iterator li(*(select_lex->ftfunc_list)), + lj(*(select_lex->ftfunc_list)); + Item_func_match *ftf, *ftf2; + + while ((ftf=li++)) + { + if (ftf->fix_index()) + return 1; + lj.rewind(); + while ((ftf2=lj++) != ftf) + { + if (ftf->eq(ftf2,1) && !ftf2->master) + ftf2->master=ftf; + } + } + + return 0; +} + + +void cleanup_ftfuncs(SELECT_LEX *select_lex) +{ + List_iterator li(*(select_lex->ftfunc_list)), + lj(*(select_lex->ftfunc_list)); + Item_func_match *ftf; + + while ((ftf=li++)) + { + ftf->cleanup(); + } +} + + +int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) +{ + if (select_lex->ftfunc_list->elements) + { + List_iterator li(*(select_lex->ftfunc_list)); + Item_func_match *ifm; + + while ((ifm=li++)) + if (unlikely(!ifm->fixed())) + /* + it mean that clause where was FT function was removed, so we have + to remove the function from the list. + */ + li.remove(); + else if (ifm->init_search(thd, no_order)) + return 1; + } + return 0; +} + + +bool is_equal(const LEX_CSTRING *a, const LEX_CSTRING *b) +{ + return a->length == b->length && !strncmp(a->str, b->str, a->length); +} + +/* + Open and lock system tables for read. + + SYNOPSIS + open_system_tables_for_read() + thd Thread context. + table_list List of tables to open. + + NOTES + Caller should have used start_new_trans object to start a new + transcation when reading system tables. + + Thanks to restrictions which we put on opening and locking of + system tables for writing, we can open and lock them for reading + even when we already have some other tables open and locked. + One should call thd->commit_whole_transaction_and_close_tables() + to close systems tables opened with this call. + + NOTES + In some situations we use this function to open system tables for + writing. It happens, for examples, with statistical tables when + they are updated by an ANALYZE command. In these cases we should + guarantee that system tables will not be deadlocked. + + RETURN + FALSE Success + TRUE Error +*/ + +bool +open_system_tables_for_read(THD *thd, TABLE_LIST *table_list) +{ + Query_tables_list query_tables_list_backup; + LEX *lex= thd->lex; + DBUG_ENTER("open_system_tables_for_read"); + DBUG_ASSERT(thd->internal_transaction()); + + /* + Besides using new Open_tables_state for opening system tables, + we also have to backup and reset/and then restore part of LEX + which is accessed by open_tables() in order to determine if + prelocking is needed and what tables should be added for it. + */ + lex->reset_n_backup_query_tables_list(&query_tables_list_backup); + thd->lex->sql_command= SQLCOM_SELECT; + + /* + Only use MYSQL_LOCK_IGNORE_TIMEOUT for tables opened for read. + This is to ensure that lock_wait_timeout is honored when trying + to update stats tables. + */ + if (open_and_lock_tables(thd, table_list, FALSE, + (MYSQL_OPEN_IGNORE_FLUSH | + MYSQL_OPEN_IGNORE_LOGGING_FORMAT | + (table_list->lock_type < TL_FIRST_WRITE ? + MYSQL_LOCK_IGNORE_TIMEOUT : 0)))) + { + lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(TRUE); + } + + for (TABLE_LIST *tables= table_list; tables; tables= tables->next_global) + { + DBUG_ASSERT(tables->table->s->table_category == TABLE_CATEGORY_SYSTEM); + tables->table->file->row_logging= 0; + tables->table->use_all_columns(); + } + lex->restore_backup_query_tables_list(&query_tables_list_backup); + + DBUG_RETURN(FALSE); +} + +/** + A helper function to close a mysql.* table opened + in an auxiliary THD during bootstrap or in the main + connection, when we know that there are no locks + held by the connection due to a preceding implicit + commit. + + We need this function since we'd like to not + just close the system table, but also release + the metadata lock on it. + + Note, that in LOCK TABLES mode this function + does not release the metadata lock. But in this + mode the table can be opened only if it is locked + explicitly with LOCK TABLES. +*/ + +void +close_mysql_tables(THD *thd) +{ + if (! thd->in_sub_stmt) + { + trans_commit_stmt(thd); + trans_commit(thd); + } + close_thread_tables(thd); + thd->release_transactional_locks(); +} + +/* + Open and lock one system table for update. + + SYNOPSIS + open_system_table_for_update() + thd Thread context. + one_table Table to open. + + NOTES + Table opened with this call should closed using close_thread_tables(). + + RETURN + 0 Error + # Pointer to TABLE object of system table +*/ + +TABLE * +open_system_table_for_update(THD *thd, TABLE_LIST *one_table) +{ + DBUG_ENTER("open_system_table_for_update"); + + TABLE *table= open_ltable(thd, one_table, one_table->lock_type, + MYSQL_LOCK_IGNORE_TIMEOUT); + if (table) + { + DBUG_ASSERT(table->s->table_category == TABLE_CATEGORY_SYSTEM); + table->use_all_columns(); + /* This table instance is not row logged */ + table->file->row_logging= 0; + } + DBUG_RETURN(table); +} + +/** + Open a log table. + Opening such tables is performed internally in the server + implementation, and is a 'nested' open, since some tables + might be already opened by the current thread. + The thread context before this call is saved, and is restored + when calling close_log_table(). + @param thd The current thread + @param one_table Log table to open + @param backup [out] Temporary storage used to save the thread context +*/ +TABLE * +open_log_table(THD *thd, TABLE_LIST *one_table, Open_tables_backup *backup) +{ + uint flags= ( MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK | + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY | + MYSQL_OPEN_IGNORE_FLUSH | + MYSQL_LOCK_IGNORE_TIMEOUT | + MYSQL_LOCK_LOG_TABLE); + TABLE *table; + /* Save value that is changed in mysql_lock_tables() */ + ulonglong save_utime_after_lock= thd->utime_after_lock; + DBUG_ENTER("open_log_table"); + + thd->reset_n_backup_open_tables_state(backup); + + if ((table= open_ltable(thd, one_table, one_table->lock_type, flags))) + { + DBUG_ASSERT(table->s->table_category == TABLE_CATEGORY_LOG); + DBUG_ASSERT(!table->file->row_logging); + + /* Make sure all columns get assigned to a default value */ + table->use_all_columns(); + DBUG_ASSERT(table->s->no_replicate); + } + else + thd->restore_backup_open_tables_state(backup); + + thd->utime_after_lock= save_utime_after_lock; + DBUG_RETURN(table); +} + +/** + Close a log table. + The last table opened by open_log_table() + is closed, then the thread context is restored. + @param thd The current thread + @param backup [in] the context to restore. +*/ + +void close_log_table(THD *thd, Open_tables_backup *backup) +{ + /* + Inform the transaction handler that we are closing the + system tables and we don't need the read view anymore. + */ + for (TABLE *table= thd->open_tables ; table ; table= table->next) + table->file->extra(HA_EXTRA_PREPARE_FOR_FORCED_CLOSE); + close_thread_tables(thd); + thd->restore_backup_open_tables_state(backup); +} + + +/** + @brief + Remove 'fixed' flag from items in a list + + @param items list of items to un-fix + + @details + This function sets to 0 the 'fixed' flag for items in the 'items' list. + It's needed to force correct marking of views' fields for INSERT/UPDATE + statements. +*/ + +void unfix_fields(List &fields) +{ + List_iterator li(fields); + Item *item; + while ((item= li++)) + item->unfix_fields(); +} + + +/** + Check result of dynamic column function and issue error if it is needed + + @param rc The result code of dynamic column function + + @return the result code which was get as an argument\ +*/ + +int dynamic_column_error_message(enum_dyncol_func_result rc) +{ + switch (rc) { + case ER_DYNCOL_YES: + case ER_DYNCOL_OK: + case ER_DYNCOL_TRUNCATED: + break; // it is not an error + case ER_DYNCOL_FORMAT: + my_error(ER_DYN_COL_WRONG_FORMAT, MYF(0)); + break; + case ER_DYNCOL_LIMIT: + my_error(ER_DYN_COL_IMPLEMENTATION_LIMIT, MYF(0)); + break; + case ER_DYNCOL_RESOURCE: + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + break; + case ER_DYNCOL_DATA: + my_error(ER_DYN_COL_DATA, MYF(0)); + break; + case ER_DYNCOL_UNKNOWN_CHARSET: + my_error(ER_DYN_COL_WRONG_CHARSET, MYF(0)); + break; + } + return rc; +} + +/** + @} (end of group Data_Dictionary) +*/ diff --git a/sql/sql_base.h b/sql/sql_base.h new file mode 100644 index 00000000..ac7024a1 --- /dev/null +++ b/sql/sql_base.h @@ -0,0 +1,678 @@ +/* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2011, 2018, MariaDB + + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_BASE_INCLUDED +#define SQL_BASE_INCLUDED + +#include "sql_class.h" /* enum_column_usage */ +#include "sql_trigger.h" /* trg_event_type */ +#include "mysqld.h" /* key_map */ +#include "table_cache.h" + +class Item_ident; +struct Name_resolution_context; +class Open_table_context; +class Open_tables_state; +class Prelocking_strategy; +struct TABLE_LIST; +class THD; +struct handlerton; +struct TABLE; + +typedef class st_select_lex SELECT_LEX; + +typedef struct st_lock_param_type ALTER_PARTITION_PARAM_TYPE; + +/* + This enumeration type is used only by the function find_item_in_list + to return the info on how an item has been resolved against a list + of possibly aliased items. + The item can be resolved: + - against an alias name of the list's element (RESOLVED_AGAINST_ALIAS) + - against non-aliased field name of the list (RESOLVED_WITH_NO_ALIAS) + - against an aliased field name of the list (RESOLVED_BEHIND_ALIAS) + - ignoring the alias name in cases when SQL requires to ignore aliases + (e.g. when the resolved field reference contains a table name or + when the resolved item is an expression) (RESOLVED_IGNORING_ALIAS) +*/ +enum enum_resolution_type { + NOT_RESOLVED=0, + RESOLVED_IGNORING_ALIAS, + RESOLVED_BEHIND_ALIAS, + RESOLVED_WITH_NO_ALIAS, + RESOLVED_AGAINST_ALIAS +}; + +/* Argument to flush_tables() of what to flush */ +enum flush_tables_type { + FLUSH_ALL, + FLUSH_NON_TRANS_TABLES, + FLUSH_SYS_TABLES +}; + +enum find_item_error_report_type {REPORT_ALL_ERRORS, REPORT_EXCEPT_NOT_FOUND, + IGNORE_ERRORS, REPORT_EXCEPT_NON_UNIQUE, + IGNORE_EXCEPT_NON_UNIQUE}; + +/* Flag bits for unique_table() */ +#define CHECK_DUP_ALLOW_DIFFERENT_ALIAS 1 +#define CHECK_DUP_FOR_CREATE 2 +#define CHECK_DUP_SKIP_TEMP_TABLE 4 + +uint get_table_def_key(const TABLE_LIST *table_list, const char **key); +TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type update, + uint lock_flags); + +/* mysql_lock_tables() and open_table() flags bits */ +#define MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK 0x0001 +#define MYSQL_OPEN_IGNORE_FLUSH 0x0002 +/* MYSQL_OPEN_TEMPORARY_ONLY (0x0004) is not used anymore. */ +#define MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY 0x0008 +#define MYSQL_LOCK_LOG_TABLE 0x0010 +/** + Do not try to acquire a metadata lock on the table: we + already have one. +*/ +#define MYSQL_OPEN_HAS_MDL_LOCK 0x0020 +/** + If in locked tables mode, ignore the locked tables and get + a new instance of the table. +*/ +#define MYSQL_OPEN_GET_NEW_TABLE 0x0040 +/* 0x0080 used to be MYSQL_OPEN_SKIP_TEMPORARY */ +/** Fail instead of waiting when conficting metadata lock is discovered. */ +#define MYSQL_OPEN_FAIL_ON_MDL_CONFLICT 0x0100 +/** Open tables using MDL_SHARED lock instead of one specified in parser. */ +#define MYSQL_OPEN_FORCE_SHARED_MDL 0x0200 +/** + Open tables using MDL_SHARED_HIGH_PRIO lock instead of one specified + in parser. +*/ +#define MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL 0x0400 +/** + When opening or locking the table, use the maximum timeout + (LONG_TIMEOUT = 1 year) rather than the user-supplied timeout value. +*/ +#define MYSQL_LOCK_IGNORE_TIMEOUT 0x0800 +/** + When acquiring "strong" (SNW, SNRW, X) metadata locks on tables to + be open do not acquire global and schema-scope IX locks. +*/ +#define MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK 0x1000 +#define MYSQL_LOCK_NOT_TEMPORARY 0x2000 +#define MYSQL_LOCK_USE_MALLOC 0x4000 +/** + Only check THD::killed if waits happen (e.g. wait on MDL, wait on + table flush, wait on thr_lock.c locks) while opening and locking table. +*/ +#define MYSQL_OPEN_IGNORE_KILLED 0x8000 +/** + Don't try to auto-repair table +*/ +#define MYSQL_OPEN_IGNORE_REPAIR 0x10000 + +/** + Don't call decide_logging_format. Used for statistic tables etc +*/ +#define MYSQL_OPEN_IGNORE_LOGGING_FORMAT 0x20000 + +/* Don't use statistics tables */ +#define MYSQL_OPEN_IGNORE_ENGINE_STATS 0x40000 + +/** Please refer to the internals manual. */ +#define MYSQL_OPEN_REOPEN (MYSQL_OPEN_IGNORE_FLUSH |\ + MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK |\ + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY |\ + MYSQL_LOCK_IGNORE_TIMEOUT |\ + MYSQL_OPEN_GET_NEW_TABLE |\ + MYSQL_OPEN_HAS_MDL_LOCK) + +bool is_locked_view(THD *thd, TABLE_LIST *t); +bool open_table(THD *thd, TABLE_LIST *table_list, Open_table_context *ot_ctx); + +bool get_key_map_from_key_list(key_map *map, TABLE *table, + List *index_list); +TABLE *find_locked_table(TABLE *list, const char *db, const char *table_name); +TABLE *find_write_locked_table(TABLE *list, const char *db, + const char *table_name); +thr_lock_type read_lock_type_for_table(THD *thd, + Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, + bool routine_modifies_data); + +my_bool mysql_rm_tmp_tables(void); +void close_tables_for_reopen(THD *thd, TABLE_LIST **tables, + const MDL_savepoint &start_of_statement_svp); +bool table_already_fk_prelocked(TABLE_LIST *tl, LEX_CSTRING *db, + LEX_CSTRING *table, thr_lock_type lock_type); +TABLE_LIST *find_table_in_list(TABLE_LIST *table, + TABLE_LIST *TABLE_LIST::*link, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name); +int close_thread_tables(THD *thd); +int close_thread_tables_for_query(THD *thd); +void switch_to_nullable_trigger_fields(List &items, TABLE *); +void switch_defaults_to_nullable_trigger_fields(TABLE *table); +bool fill_record_n_invoke_before_triggers(THD *thd, TABLE *table, + List &fields, + List &values, + bool ignore_errors, + enum trg_event_type event); +bool fill_record_n_invoke_before_triggers(THD *thd, TABLE *table, + Field **field, + List &values, + bool ignore_errors, + enum trg_event_type event); +bool insert_fields(THD *thd, Name_resolution_context *context, + const char *db_name, const char *table_name, + List_iterator *it, bool any_privileges, + uint *hidden_bit_fields, bool returning_field); +void make_leaves_list(THD *thd, List &list, TABLE_LIST *tables, + bool full_table_list, TABLE_LIST *boundary); +int setup_wild(THD *thd, TABLE_LIST *tables, List &fields, + List *sum_func_list, SELECT_LEX *sl, bool returning_field); +int setup_returning_fields(THD* thd, TABLE_LIST* table_list); +bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List &item, enum_column_usage column_usage, + List *sum_func_list, List *pre_fix, + bool allow_sum_func); +void unfix_fields(List &items); +bool fill_record(THD * thd, TABLE *table_arg, List &fields, + List &values, bool ignore_errors, bool update); +bool fill_record(THD *thd, TABLE *table, Field **field, List &values, + bool ignore_errors, bool use_value); + +Field * +find_field_in_tables(THD *thd, Item_ident *item, + TABLE_LIST *first_table, TABLE_LIST *last_table, + ignored_tables_list_t ignored_tables, + Item **ref, find_item_error_report_type report_error, + bool check_privileges, bool register_tree_change); +Field * +find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, + const char *name, size_t length, + const char *item_name, const char *db_name, + const char *table_name, + ignored_tables_list_t ignored_tables, + Item **ref, bool check_privileges, bool allow_rowid, + field_index_t *cached_field_index_ptr, + bool register_tree_change, TABLE_LIST **actual_table); +Field * +find_field_in_table(THD *thd, TABLE *table, const char *name, size_t length, + bool allow_rowid, field_index_t *cached_field_index_ptr); +Field * +find_field_in_table_sef(TABLE *table, const char *name); +Item ** find_item_in_list(Item *item, List &items, uint *counter, + find_item_error_report_type report_error, + enum_resolution_type *resolution, uint limit= 0); +bool setup_tables(THD *thd, Name_resolution_context *context, + List *from_clause, TABLE_LIST *tables, + List &leaves, bool select_insert, + bool full_table_list); +bool setup_tables_and_check_access(THD *thd, + Name_resolution_context *context, + List *from_clause, + TABLE_LIST *tables, + List &leaves, + bool select_insert, + privilege_t want_access_first, + privilege_t want_access, + bool full_table_list); +bool wait_while_table_is_used(THD *thd, TABLE *table, + enum ha_extra_function function); + +void drop_open_table(THD *thd, TABLE *table, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name); +void update_non_unique_table_error(TABLE_LIST *update, + const char *operation, + TABLE_LIST *duplicate); +int setup_conds(THD *thd, TABLE_LIST *tables, List &leaves, + COND **conds); +void wrap_ident(THD *thd, Item **conds); +int setup_ftfuncs(SELECT_LEX* select); +void cleanup_ftfuncs(SELECT_LEX *select_lex); +int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order); +bool lock_table_names(THD *thd, const DDL_options_st &options, + TABLE_LIST *table_list, + TABLE_LIST *table_list_end, ulong lock_wait_timeout, + uint flags); +static inline bool +lock_table_names(THD *thd, TABLE_LIST *table_list, + TABLE_LIST *table_list_end, ulong lock_wait_timeout, + uint flags) +{ + return lock_table_names(thd, thd->lex->create_info, table_list, + table_list_end, lock_wait_timeout, flags); +} +bool open_tables(THD *thd, const DDL_options_st &options, + TABLE_LIST **tables, uint *counter, + uint flags, Prelocking_strategy *prelocking_strategy); + +static inline bool +open_tables(THD *thd, TABLE_LIST **tables, uint *counter, uint flags, + Prelocking_strategy *prelocking_strategy) +{ + return open_tables(thd, thd->lex->create_info, tables, counter, flags, + prelocking_strategy); +} +/* open_and_lock_tables with optional derived handling */ +bool open_and_lock_tables(THD *thd, const DDL_options_st &options, + TABLE_LIST *tables, + bool derived, uint flags, + Prelocking_strategy *prelocking_strategy); +static inline bool +open_and_lock_tables(THD *thd, TABLE_LIST *tables, + bool derived, uint flags, + Prelocking_strategy *prelocking_strategy) +{ + return open_and_lock_tables(thd, thd->lex->create_info, + tables, derived, flags, prelocking_strategy); +} +/* simple open_and_lock_tables without derived handling for single table */ +TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l, + thr_lock_type lock_type, uint flags, + Prelocking_strategy *prelocking_strategy); +bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags, + uint dt_phases); +bool open_tables_only_view_structure(THD *thd, TABLE_LIST *tables, + bool can_deadlock); +bool open_and_lock_internal_tables(TABLE *table, bool lock); +bool lock_tables(THD *thd, TABLE_LIST *tables, uint counter, uint flags); +int decide_logging_format(THD *thd, TABLE_LIST *tables); +void close_thread_table(THD *thd, TABLE **table_ptr); +TABLE_LIST *unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list, + uint check_flag); +bool is_equal(const LEX_CSTRING *a, const LEX_CSTRING *b); + +class Open_tables_backup; +/* Functions to work with system tables. */ +bool open_system_tables_for_read(THD *thd, TABLE_LIST *table_list); +void close_system_tables(THD *thd); +void close_mysql_tables(THD *thd); +TABLE *open_system_table_for_update(THD *thd, TABLE_LIST *one_table); +TABLE *open_log_table(THD *thd, TABLE_LIST *one_table, Open_tables_backup *backup); +void close_log_table(THD *thd, Open_tables_backup *backup); + +bool close_cached_tables(THD *thd, TABLE_LIST *tables, + bool wait_for_refresh, ulong timeout); +void purge_tables(); +bool flush_tables(THD *thd, flush_tables_type flag); +void close_all_tables_for_name(THD *thd, TABLE_SHARE *share, + ha_extra_function extra, + TABLE *skip_table); +OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild); +bool tdc_open_view(THD *thd, TABLE_LIST *table_list, uint flags); + +TABLE *find_table_for_mdl_upgrade(THD *thd, const char *db, + const char *table_name, + int *p_error); +void mark_tmp_table_for_reuse(TABLE *table); + +int dynamic_column_error_message(enum_dyncol_func_result rc); + +/* open_and_lock_tables with optional derived handling */ +int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived); + +extern "C" int simple_raw_key_cmp(void* arg, const void* key1, + const void* key2); +extern "C" int count_distinct_walk(void *elem, element_count count, void *arg); +int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2); + +extern Item **not_found_item; +extern Field *not_found_field; +extern Field *view_ref_found; + +/** + clean/setup table fields and map. + + @param table TABLE structure pointer (which should be setup) + @param table_list TABLE_LIST structure pointer (owner of TABLE) + @param tablenr table number +*/ + + +inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr) +{ + table->used_fields= 0; + table_list->reset_const_table(); + table->null_row= 0; + table->status= STATUS_NO_RECORD; + table->maybe_null= table_list->outer_join; + TABLE_LIST *embedding= table_list->embedding; + while (!table->maybe_null && embedding) + { + table->maybe_null= embedding->outer_join; + embedding= embedding->embedding; + } + table->tablenr= tablenr; + table->map= (table_map) 1 << tablenr; + table->force_index= table_list->force_index; + table->force_index_order= table->force_index_group= 0; + table->covering_keys= table->s->keys_for_keyread; +} + +inline TABLE_LIST *find_table_in_global_list(TABLE_LIST *table, + LEX_CSTRING *db_name, + LEX_CSTRING *table_name) +{ + return find_table_in_list(table, &TABLE_LIST::next_global, + db_name, table_name); +} + +inline bool setup_fields_with_no_wrap(THD *thd, Ref_ptr_array ref_pointer_array, + List &item, + enum_column_usage column_usage, + List *sum_func_list, + bool allow_sum_func) +{ + bool res; + SELECT_LEX *first= thd->lex->first_select_lex(); + DBUG_ASSERT(thd->lex->current_select == first); + first->no_wrap_view_item= TRUE; + res= setup_fields(thd, ref_pointer_array, item, column_usage, + sum_func_list, NULL, allow_sum_func); + first->no_wrap_view_item= FALSE; + return res; +} + +/** + An abstract class for a strategy specifying how the prelocking + algorithm should extend the prelocking set while processing + already existing elements in the set. +*/ + +class Prelocking_strategy +{ +public: + virtual ~Prelocking_strategy() = default; + + virtual void reset(THD *thd) { }; + virtual bool handle_routine(THD *thd, Query_tables_list *prelocking_ctx, + Sroutine_hash_entry *rt, sp_head *sp, + bool *need_prelocking) = 0; + virtual bool handle_table(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking) = 0; + virtual bool handle_view(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking)= 0; + virtual bool handle_end(THD *thd) { return 0; }; +}; + + +/** + A Strategy for prelocking algorithm suitable for DML statements. + + Ensures that all tables used by all statement's SF/SP/triggers and + required for foreign key checks are prelocked and SF/SPs used are + cached. +*/ + +class DML_prelocking_strategy : public Prelocking_strategy +{ +public: + virtual bool handle_routine(THD *thd, Query_tables_list *prelocking_ctx, + Sroutine_hash_entry *rt, sp_head *sp, + bool *need_prelocking); + virtual bool handle_table(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); + virtual bool handle_view(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); +}; + + +/** + A strategy for prelocking algorithm to be used for LOCK TABLES + statement. +*/ + +class Lock_tables_prelocking_strategy : public DML_prelocking_strategy +{ + virtual bool handle_table(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); +}; + + +/** + Strategy for prelocking algorithm to be used for ALTER TABLE statements. + + Unlike DML or LOCK TABLES strategy, it doesn't + prelock triggers, views or stored routines, since they are not + used during ALTER. +*/ + +class Alter_table_prelocking_strategy : public Prelocking_strategy +{ +public: + virtual bool handle_routine(THD *thd, Query_tables_list *prelocking_ctx, + Sroutine_hash_entry *rt, sp_head *sp, + bool *need_prelocking); + virtual bool handle_table(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); + virtual bool handle_view(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); +}; + + +inline bool +open_tables(THD *thd, const DDL_options_st &options, + TABLE_LIST **tables, uint *counter, uint flags) +{ + DML_prelocking_strategy prelocking_strategy; + + return open_tables(thd, options, tables, counter, flags, + &prelocking_strategy); +} +inline bool +open_tables(THD *thd, TABLE_LIST **tables, uint *counter, uint flags) +{ + DML_prelocking_strategy prelocking_strategy; + + return open_tables(thd, thd->lex->create_info, tables, counter, flags, + &prelocking_strategy); +} + +inline TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l, + thr_lock_type lock_type, uint flags) +{ + DML_prelocking_strategy prelocking_strategy; + + return open_n_lock_single_table(thd, table_l, lock_type, flags, + &prelocking_strategy); +} + + +/* open_and_lock_tables with derived handling */ +inline bool open_and_lock_tables(THD *thd, + const DDL_options_st &options, + TABLE_LIST *tables, + bool derived, uint flags) +{ + DML_prelocking_strategy prelocking_strategy; + + return open_and_lock_tables(thd, options, tables, derived, flags, + &prelocking_strategy); +} +inline bool open_and_lock_tables(THD *thd, TABLE_LIST *tables, + bool derived, uint flags) +{ + DML_prelocking_strategy prelocking_strategy; + + return open_and_lock_tables(thd, thd->lex->create_info, + tables, derived, flags, + &prelocking_strategy); +} + + +bool restart_trans_for_tables(THD *thd, TABLE_LIST *table); + +bool extend_table_list(THD *thd, TABLE_LIST *tables, + Prelocking_strategy *prelocking_strategy, + bool has_prelocking_list); + +/** + A context of open_tables() function, used to recover + from a failed open_table() or open_routine() attempt. +*/ + +class Open_table_context +{ +public: + enum enum_open_table_action + { + OT_NO_ACTION= 0, + OT_BACKOFF_AND_RETRY, + OT_REOPEN_TABLES, + OT_DISCOVER, + OT_REPAIR, + OT_ADD_HISTORY_PARTITION + }; + Open_table_context(THD *thd, uint flags); + + bool recover_from_failed_open(); + bool request_backoff_action(enum_open_table_action action_arg, + TABLE_LIST *table); + + bool can_recover_from_failed_open() const + { return m_action != OT_NO_ACTION; } + + /** + When doing a back-off, we close all tables acquired by this + statement. Return an MDL savepoint taken at the beginning of + the statement, so that we can rollback to it before waiting on + locks. + */ + const MDL_savepoint &start_of_statement_svp() const + { + return m_start_of_statement_svp; + } + + inline ulong get_timeout() const + { + return m_timeout; + } + + enum_open_table_action get_action() const + { + return m_action; + } + + uint get_flags() const { return m_flags; } + + /** + Set flag indicating that we have already acquired metadata lock + protecting this statement against GRL while opening tables. + */ + void set_has_protection_against_grl(enum_mdl_type mdl_type) + { + m_has_protection_against_grl|= MDL_BIT(mdl_type); + } + + bool has_protection_against_grl(enum_mdl_type mdl_type) const + { + return (bool) (m_has_protection_against_grl & MDL_BIT(mdl_type)); + } + +private: + /* THD for which tables are opened. */ + THD *m_thd; + /** + For OT_DISCOVER and OT_REPAIR actions, the table list element for + the table which definition should be re-discovered or which + should be repaired. + */ + TABLE_LIST *m_failed_table; + MDL_savepoint m_start_of_statement_svp; + /** + Lock timeout in seconds. Initialized to LONG_TIMEOUT when opening system + tables or to the "lock_wait_timeout" system variable for regular tables. + */ + ulong m_timeout; + /* open_table() flags. */ + uint m_flags; + /** Back off action. */ + enum enum_open_table_action m_action; + /** + Whether we had any locks when this context was created. + If we did, they are from the previous statement of a transaction, + and we can't safely do back-off (and release them). + */ + bool m_has_locks; + /** + Indicates that in the process of opening tables we have acquired + protection against global read lock. + */ + mdl_bitmap_t m_has_protection_against_grl; + +public: + uint vers_create_count; +}; + + +/** + Check if a TABLE_LIST instance represents a pre-opened temporary table. +*/ + +inline bool is_temporary_table(TABLE_LIST *tl) +{ + if (tl->view || tl->schema_table) + return FALSE; + + if (!tl->table) + return FALSE; + + /* + NOTE: 'table->s' might be NULL for specially constructed TABLE + instances. See SHOW TRIGGERS for example. + */ + + if (!tl->table->s) + return FALSE; + + return tl->table->s->tmp_table != NO_TMP_TABLE; +} + + +/** + This internal handler is used to trap ER_NO_SUCH_TABLE. +*/ + +class No_such_table_error_handler : public Internal_error_handler +{ +public: + No_such_table_error_handler() + : m_handled_errors(0), m_unhandled_errors(0), first_error(0) + {} + + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl); + + /** + Returns TRUE if one or more ER_NO_SUCH_TABLE errors have been + trapped and no other errors have been seen. FALSE otherwise. + */ + bool safely_trapped_errors(); + uint got_error() { return first_error; } + +private: + int m_handled_errors; + int m_unhandled_errors; + uint first_error; +}; +#endif /* SQL_BASE_INCLUDED */ diff --git a/sql/sql_basic_types.h b/sql/sql_basic_types.h new file mode 100644 index 00000000..f592aed0 --- /dev/null +++ b/sql/sql_basic_types.h @@ -0,0 +1,339 @@ +/* + Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* File that includes common types used globally in MariaDB */ + +#ifndef SQL_TYPES_INCLUDED +#define SQL_TYPES_INCLUDED + +typedef ulonglong sql_mode_t; +typedef int64 query_id_t; + +enum enum_nullability { NOT_NULL, NULLABLE }; + + +/* + "fuzzydate" with strict data type control. + Represents a mixture of *only* data type conversion flags, without rounding. + Please keep "explicit" in constructors and conversion methods. +*/ +class date_conv_mode_t +{ +public: + enum value_t + { + CONV_NONE= 0U, + /* + FUZZY_DATES is used for the result will only be used for comparison + purposes. Conversion is as relaxed as possible. + */ + FUZZY_DATES= 1U, + TIME_ONLY= 4U, + INTERVAL_hhmmssff= 8U, + INTERVAL_DAY= 16U, + RANGE0_LAST= INTERVAL_DAY, + NO_ZERO_IN_DATE= (1UL << 23), // MODE_NO_ZERO_IN_DATE + NO_ZERO_DATE= (1UL << 24), // MODE_NO_ZERO_DATE + INVALID_DATES= (1UL << 25) // MODE_INVALID_DATES + }; + + /* + BIT-OR for all known values. Let's have a separate enum for it. + - We don't put this value "value_t", to avoid handling it in switch(). + - We don't put this value as a static const inside the class, + because "gdb" would display it every time when we do "print" + for a time_round_mode_t value. + - We can't put into into a function returning this value, because + it's not allowed to use functions in static_assert. + */ + enum known_values_t + { + KNOWN_MODES= FUZZY_DATES | + TIME_ONLY | INTERVAL_hhmmssff | INTERVAL_DAY | + NO_ZERO_IN_DATE | NO_ZERO_DATE | INVALID_DATES + }; +private: + value_t m_mode; +public: + + // Constructors + explicit date_conv_mode_t(ulonglong fuzzydate) + :m_mode((value_t) fuzzydate) + { } + + // Conversion operators + explicit operator ulonglong() const + { + return m_mode; + } + explicit operator bool() const + { + return m_mode != 0; + } + + // Unary operators + ulonglong operator~() const + { + return ~m_mode; + } + + // Dyadic bitwise operators + date_conv_mode_t operator&(const date_conv_mode_t &other) const + { + return date_conv_mode_t(m_mode & other.m_mode); + } + date_conv_mode_t operator&(const ulonglong other) const + { + return date_conv_mode_t(m_mode & other); + } + + date_conv_mode_t operator|(const date_conv_mode_t &other) const + { + return date_conv_mode_t(m_mode | other.m_mode); + } + + // Dyadic bitwise assignment operators + date_conv_mode_t &operator&=(const date_conv_mode_t &other) + { + m_mode= value_t(m_mode & other.m_mode); + return *this; + } + + date_conv_mode_t &operator|=(const date_conv_mode_t &other) + { + m_mode= value_t(m_mode | other.m_mode); + return *this; + } +}; + + +/* + Fractional rounding mode for temporal data types. +*/ +class time_round_mode_t +{ +public: + enum value_t + { + /* + Use FRAC_NONE when the value needs no rounding nor truncation, + because it is already known not to haveany fractional digits outside + of the requested precision. + */ + FRAC_NONE= 0, + FRAC_TRUNCATE= date_conv_mode_t::RANGE0_LAST << 1, // 32 + FRAC_ROUND= date_conv_mode_t::RANGE0_LAST << 2 // 64 + }; + // BIT-OR for all known values. See comments in time_conv_mode_t. + enum known_values_t + { + KNOWN_MODES= FRAC_TRUNCATE | FRAC_ROUND + }; +private: + value_t m_mode; +public: + // Constructors + explicit time_round_mode_t(ulonglong mode) + :m_mode((value_t) mode) + { +#ifdef MYSQL_SERVER + DBUG_ASSERT(mode == FRAC_NONE || + mode == FRAC_TRUNCATE || + mode == FRAC_ROUND); +#endif + } + // Conversion operators + explicit operator ulonglong() const + { + return m_mode; + } + value_t mode() const + { + return m_mode; + } + // Comparison operators + bool operator==(const time_round_mode_t &other) + { + return m_mode == other.m_mode; + } +}; + + +/* + "fuzzydate" with strict data type control. + Used as a parameter to get_date() and represents a mixture of: + - data type conversion flags + - fractional second rounding flags + Please keep "explicit" in constructors and conversion methods. +*/ +class date_mode_t +{ +public: + enum value_t + { + CONV_NONE= date_conv_mode_t::CONV_NONE, // 0 + FUZZY_DATES= date_conv_mode_t::FUZZY_DATES, // 1 + TIME_ONLY= date_conv_mode_t::TIME_ONLY, // 4 + INTERVAL_hhmmssff= date_conv_mode_t::INTERVAL_hhmmssff, // 8 + INTERVAL_DAY= date_conv_mode_t::INTERVAL_DAY, // 16 + FRAC_TRUNCATE= time_round_mode_t::FRAC_TRUNCATE, // 32 + FRAC_ROUND= time_round_mode_t::FRAC_ROUND, // 64 + NO_ZERO_IN_DATE= date_conv_mode_t::NO_ZERO_IN_DATE, // (1UL << 23) + NO_ZERO_DATE= date_conv_mode_t::NO_ZERO_DATE, // (1UL << 24) + INVALID_DATES= date_conv_mode_t::INVALID_DATES, // (1UL << 25) + }; +protected: + value_t m_mode; +public: + + // Constructors + explicit date_mode_t(ulonglong fuzzydate) + :m_mode((value_t) fuzzydate) + { } + + // Conversion operators + explicit operator ulonglong() const + { + return m_mode; + } + explicit operator bool() const + { + return m_mode != 0; + } + explicit operator date_conv_mode_t() const + { + return date_conv_mode_t(ulonglong(m_mode) & date_conv_mode_t::KNOWN_MODES); + } + explicit operator time_round_mode_t() const + { + return time_round_mode_t(ulonglong(m_mode) & time_round_mode_t::KNOWN_MODES); + } + // Unary operators + ulonglong operator~() const + { + return ~m_mode; + } + bool operator!() const + { + return !m_mode; + } + + // Dyadic bitwise operators + date_mode_t operator&(const date_mode_t &other) const + { + return date_mode_t(m_mode & other.m_mode); + } + date_mode_t operator&(ulonglong other) const + { + return date_mode_t(m_mode & other); + } + + date_mode_t operator|(const date_mode_t &other) const + { + return date_mode_t(m_mode | other.m_mode); + } + + // Dyadic bitwise assignment operators + date_mode_t &operator&=(const date_mode_t &other) + { + m_mode= value_t(m_mode & other.m_mode); + return *this; + } + + date_mode_t &operator|=(const date_mode_t &other) + { + m_mode= value_t(m_mode | other.m_mode); + return *this; + } + + date_mode_t &operator|=(const date_conv_mode_t &other) + { + m_mode= value_t(m_mode | ulonglong(other)); + return *this; + } +}; + + +// Bitwise OR out-of-class operators for data type mixtures +static inline date_mode_t operator|(const date_mode_t &a, + const date_conv_mode_t &b) +{ + return date_mode_t(ulonglong(a) | ulonglong(b)); +} + +static inline date_mode_t operator|(const date_conv_mode_t &a, + const time_round_mode_t &b) +{ + return date_mode_t(ulonglong(a) | ulonglong(b)); +} + + +static inline date_mode_t operator|(const date_conv_mode_t &a, + const date_mode_t &b) +{ + return date_mode_t(ulonglong(a) | ulonglong(b)); +} + + +// Bitwise AND out-of-class operators for data type mixtures +static inline date_conv_mode_t operator&(const date_mode_t &a, + const date_conv_mode_t &b) +{ + return date_conv_mode_t(ulonglong(a) & ulonglong(b)); +} + +static inline date_conv_mode_t operator&(const date_conv_mode_t &a, + const date_mode_t &b) +{ + return date_conv_mode_t(ulonglong(a) & ulonglong(b)); +} + +static inline date_conv_mode_t operator&(sql_mode_t &a, + const date_conv_mode_t &b) +{ + return date_conv_mode_t(a & ulonglong(b)); +} + + +static const date_conv_mode_t + TIME_CONV_NONE (date_conv_mode_t::CONV_NONE), + TIME_FUZZY_DATES (date_conv_mode_t::FUZZY_DATES), + TIME_TIME_ONLY (date_conv_mode_t::TIME_ONLY), + TIME_INTERVAL_hhmmssff (date_conv_mode_t::INTERVAL_hhmmssff), + TIME_INTERVAL_DAY (date_conv_mode_t::INTERVAL_DAY), + TIME_NO_ZERO_IN_DATE (date_conv_mode_t::NO_ZERO_IN_DATE), + TIME_NO_ZERO_DATE (date_conv_mode_t::NO_ZERO_DATE), + TIME_INVALID_DATES (date_conv_mode_t::INVALID_DATES); + +// An often used combination +static const date_conv_mode_t + TIME_NO_ZEROS (date_conv_mode_t::NO_ZERO_DATE| + date_conv_mode_t::NO_ZERO_IN_DATE); + +// Flags understood by str_to_xxx, number_to_xxx, check_date +static const date_conv_mode_t + TIME_MODE_FOR_XXX_TO_DATE (date_mode_t::NO_ZERO_IN_DATE | + date_mode_t::NO_ZERO_DATE | + date_mode_t::INVALID_DATES); + +static const time_round_mode_t + TIME_FRAC_NONE (time_round_mode_t::FRAC_NONE), + TIME_FRAC_TRUNCATE (time_round_mode_t::FRAC_TRUNCATE), + TIME_FRAC_ROUND (time_round_mode_t::FRAC_ROUND); + + +#endif diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc new file mode 100644 index 00000000..e71c7015 --- /dev/null +++ b/sql/sql_binlog.cc @@ -0,0 +1,471 @@ +/* + Copyright (c) 2005, 2013, Oracle and/or its affiliates. + Copyright (c) 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_binlog.h" +#include "sql_parse.h" +#include "sql_acl.h" +#include "rpl_rli.h" +#include "rpl_mi.h" +#include "slave.h" +#include "log_event.h" + + +/** + Check if the event type is allowed in a BINLOG statement. + + @retval 0 if the event type is ok. + @retval 1 if the event type is not ok. +*/ +static int check_event_type(int type, Relay_log_info *rli) +{ + Format_description_log_event *fd_event= + rli->relay_log.description_event_for_exec; + + /* + Convert event type id of certain old versions (see comment in + Format_description_log_event::Format_description_log_event(char*,...)). + */ + if (fd_event && fd_event->event_type_permutation) + { +#ifdef DBUG_TRACE + int new_type= fd_event->event_type_permutation[type]; + DBUG_PRINT("info", + ("converting event type %d to %d (%s)", + type, new_type, + Log_event::get_type_str((Log_event_type)new_type))); +#endif + type= fd_event->event_type_permutation[type]; + } + + switch (type) + { + case START_EVENT_V3: + case FORMAT_DESCRIPTION_EVENT: + /* + We need a preliminary FD event in order to parse the FD event, + if we don't already have one. + */ + if (!fd_event) + if (!(rli->relay_log.description_event_for_exec= + new Format_description_log_event(4))) + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); + return 1; + } + + /* It is always allowed to execute FD events. */ + return 0; + + case QUERY_EVENT: + case TABLE_MAP_EVENT: + case WRITE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT: + case UPDATE_ROWS_EVENT: + case DELETE_ROWS_EVENT: + case PRE_GA_WRITE_ROWS_EVENT: + case PRE_GA_UPDATE_ROWS_EVENT: + case PRE_GA_DELETE_ROWS_EVENT: + /* + Row events are only allowed if a Format_description_event has + already been seen. + */ + if (fd_event) + return 0; + else + { + my_error(ER_NO_FORMAT_DESCRIPTION_EVENT_BEFORE_BINLOG_STATEMENT, + MYF(0), Log_event::get_type_str((Log_event_type)type)); + return 1; + } + break; + + default: + /* + It is not meaningful to execute other events than row-events and + FD events. It would even be dangerous to execute Stop_log_event + and Rotate_log_event since they call Relay_log_info::flush(), which + is not allowed to call by other threads than the slave SQL + thread when the slave SQL thread is running. + */ + my_error(ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT, + MYF(0), Log_event::get_type_str((Log_event_type)type)); + return 1; + } +} + +/** + Copy fragments into the standard placeholder thd->lex->comment.str. + + Compute the size of the (still) encoded total, + allocate and then copy fragments one after another. + The size can exceed max(max_allowed_packet) which is not a + problem as no String instance is created off this char array. + + @param thd THD handle + @return + 0 at success, + -1 otherwise. +*/ +int binlog_defragment(THD *thd) +{ + user_var_entry *entry[2]; + LEX_CSTRING name[2]= { thd->lex->comment, thd->lex->ident }; + + /* compute the total size */ + thd->lex->comment.str= NULL; + thd->lex->comment.length= 0; + for (uint k= 0; k < 2; k++) + { + entry[k]= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name[k].str, + name[k].length); + if (!entry[k] || entry[k]->type != STRING_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_VAR, MYF(0), name[k].str); + return -1; + } + thd->lex->comment.length += entry[k]->length; + } + + thd->lex->comment.str= // to be freed by the caller + (char *) my_malloc(PSI_INSTRUMENT_ME, thd->lex->comment.length, MYF(MY_WME)); + if (!thd->lex->comment.str) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), 1); + return -1; + } + + /* fragments are merged into allocated buf while the user var:s get reset */ + size_t gathered_length= 0; + for (uint k=0; k < 2; k++) + { + memcpy(const_cast(thd->lex->comment.str) + gathered_length, entry[k]->value, + entry[k]->length); + gathered_length += entry[k]->length; + } + for (uint k=0; k < 2; k++) + update_hash(entry[k], true, NULL, 0, STRING_RESULT, &my_charset_bin, 0); + + DBUG_ASSERT(gathered_length == thd->lex->comment.length); + + return 0; +} + +/** + Wraps Log_event::apply_event to save and restore + session context in case of Query_log_event. + + @param ev replication event + @param rgi execution context for the event + + @return + 0 on success, + non-zero otherwise. +*/ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int save_restore_context_apply_event(Log_event *ev, rpl_group_info *rgi) +{ + if (ev->get_type_code() != QUERY_EVENT) + return ev->apply_event(rgi); + + THD *thd= rgi->thd; + Relay_log_info *rli= thd->rli_fake; + DBUG_ASSERT(!rli->mi); + LEX_CSTRING connection_name= { STRING_WITH_LEN("BINLOG_BASE64_EVENT") }; + + if (!(rli->mi= new Master_info(&connection_name, false))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return -1; + } + + sql_digest_state *m_digest= thd->m_digest; + PSI_statement_locker *m_statement_psi= thd->m_statement_psi;; + LEX_CSTRING save_db= thd->db; + my_thread_id m_thread_id= thd->variables.pseudo_thread_id; + + thd->system_thread_info.rpl_sql_info= NULL; + thd->reset_db(&null_clex_str); + + thd->m_digest= NULL; + thd->m_statement_psi= NULL; + + int err= ev->apply_event(rgi); + + thd->m_digest= m_digest; + thd->m_statement_psi= m_statement_psi; + thd->variables.pseudo_thread_id= m_thread_id; + thd->reset_db(&save_db); + delete rli->mi; + rli->mi= NULL; + + return err; +} +#endif + +/** + Execute a BINLOG statement. + + To execute the BINLOG command properly the server needs to know + which format the BINLOG command's event is in. Therefore, the first + BINLOG statement seen must be a base64 encoding of the + Format_description_log_event, as outputted by mysqlbinlog. This + Format_description_log_event is cached in + rli->description_event_for_exec. + + @param thd Pointer to THD object for the client thread executing the + statement. +*/ + +void mysql_client_binlog_statement(THD* thd) +{ + DBUG_ENTER("mysql_client_binlog_statement"); + DBUG_PRINT("info",("binlog base64: '%*s'", + (int) (thd->lex->comment.length < 2048 ? + thd->lex->comment.length : 2048), + thd->lex->comment.str)); + + if (check_global_access(thd, PRIV_STMT_BINLOG)) + DBUG_VOID_RETURN; + + /* + option_bits will be changed when applying the event. But we don't expect + it be changed permanently after BINLOG statement, so backup it first. + It will be restored at the end of this function. + */ + ulonglong thd_options= thd->variables.option_bits; + + /* + Allocation + */ + + int err; + Relay_log_info *rli; + rpl_group_info *rgi; + uchar *buf= NULL; + size_t coded_len= 0, decoded_len= 0; + + rli= thd->rli_fake; + if (!rli && (rli= thd->rli_fake= new Relay_log_info(FALSE, "BINLOG_BASE64_EVENT"))) + rli->sql_driver_thd= thd; + if (!(rgi= thd->rgi_fake)) + rgi= thd->rgi_fake= new rpl_group_info(rli); + rgi->thd= thd; + const char *error= 0; + Log_event *ev = 0; + my_bool is_fragmented= FALSE; + /* + Out of memory check + */ + if (!(rli)) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), 1); /* needed 1 bytes */ + goto end; + } + + DBUG_ASSERT(rli->belongs_to_client()); + + if (unlikely(is_fragmented= thd->lex->comment.str && thd->lex->ident.str)) + if (binlog_defragment(thd)) + goto end; + + if (!(coded_len= thd->lex->comment.length)) + { + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + + decoded_len= my_base64_needed_decoded_length((int)coded_len); + if (!(buf= (uchar *) my_malloc(key_memory_binlog_statement_buffer, + decoded_len, MYF(MY_WME)))) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), 1); + goto end; + } + + for (char const *strptr= thd->lex->comment.str ; + strptr < thd->lex->comment.str + thd->lex->comment.length ; ) + { + char const *endptr= 0; + int bytes_decoded= my_base64_decode(strptr, coded_len, buf, &endptr, + MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS); + +#ifndef HAVE_valgrind + /* + This debug printout should not be used for valgrind builds + since it will read from unassigned memory. + */ + DBUG_PRINT("info", + ("bytes_decoded: %d strptr: %p endptr: %p ('%c':%d)", + bytes_decoded, strptr, endptr, *endptr, + *endptr)); +#endif + + if (bytes_decoded < 0) + { + my_error(ER_BASE64_DECODE_ERROR, MYF(0)); + goto end; + } + else if (bytes_decoded == 0) + break; // If no bytes where read, the string contained only whitespace + + DBUG_ASSERT(bytes_decoded > 0); + DBUG_ASSERT(endptr > strptr); + coded_len-= endptr - strptr; + strptr= endptr; + + /* + Now we have one or more events stored in the buffer. The size of + the buffer is computed based on how much base64-encoded data + there were, so there should be ample space for the data (maybe + even too much, since a statement can consist of a considerable + number of events). + + TODO: Switch to use a stream-based base64 encoder/decoder in + order to be able to read exactly what is necessary. + */ + + DBUG_PRINT("info",("binlog base64 decoded_len: %lu bytes_decoded: %d", + (ulong) decoded_len, bytes_decoded)); + + /* + Now we start to read events of the buffer, until there are no + more. + */ + for (uchar *bufptr= buf ; bytes_decoded > 0 ; ) + { + /* + Checking that the first event in the buffer is not truncated. + */ + ulong event_len; + if (bytes_decoded < EVENT_LEN_OFFSET + 4 || + (event_len= uint4korr(bufptr + EVENT_LEN_OFFSET)) > + (uint) bytes_decoded) + { + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + DBUG_PRINT("info", ("event_len=%lu, bytes_decoded=%d", + event_len, bytes_decoded)); + + if (check_event_type(bufptr[EVENT_TYPE_OFFSET], rli)) + goto end; + + ev= Log_event::read_log_event(bufptr, event_len, &error, + rli->relay_log.description_event_for_exec, + 0); + + DBUG_PRINT("info",("binlog base64 err=%s", error)); + if (!ev) + { + /* + This could actually be an out-of-memory, but it is more likely + caused by a bad statement + */ + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + + bytes_decoded -= event_len; + bufptr += event_len; + + DBUG_PRINT("info",("ev->get_type_code()=%d", ev->get_type_code())); + ev->thd= thd; + /* + We go directly to the application phase, since we don't need + to check if the event shall be skipped or not. + + Neither do we have to update the log positions, since that is + not used at all: the rli_fake instance is used only for error + reporting. + */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + ulonglong save_skip_replication= + thd->variables.option_bits & OPTION_SKIP_REPLICATION; + thd->variables.option_bits= + (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | + (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? + OPTION_SKIP_REPLICATION : 0); + + { + /* + For conventional statements thd->lex points to thd->main_lex, that is + thd->lex == &thd->main_lex. On the other hand, for prepared statement + thd->lex points to the LEX object explicitly allocated for execution + of the prepared statement and in this case thd->lex != &thd->main_lex. + On handling the BINLOG statement, invocation of ev->apply_event(rgi) + initiates the following sequence of calls + Rows_log_event::do_apply_event -> THD::reset_for_next_command + Since the method THD::reset_for_next_command() contains assert + DBUG_ASSERT(lex == &main_lex) + this sequence of calls results in crash when a binlog event is + applied in PS mode. So, reset the current lex temporary to point to + thd->main_lex before running ev->apply_event() and restore its + original value on return. + */ + LEX *backup_lex; + + thd->backup_and_reset_current_lex(&backup_lex); + err= save_restore_context_apply_event(ev, rgi); + thd->restore_current_lex(backup_lex); + } + thd->variables.option_bits= + (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | + save_skip_replication; +#else + err= 0; +#endif + /* + Format_description_log_event should not be deleted because it + will be used to read info about the relay log's format; it + will be deleted when the SQL thread does not need it, + i.e. when this thread terminates. + */ + if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) + delete ev; + ev= 0; + if (err) + { + /* + TODO: Maybe a better error message since the BINLOG statement + now contains several events. + */ + if (!thd->is_error()) + my_error(ER_UNKNOWN_ERROR, MYF(0)); + goto end; + } + } + } + + + DBUG_PRINT("info",("binlog base64 execution finished successfully")); + my_ok(thd); + +end: + if (unlikely(is_fragmented)) + my_free(const_cast(thd->lex->comment.str)); + thd->variables.option_bits= thd_options; + rgi->slave_close_thread_tables(thd); + my_free(buf); + delete rgi; + rgi= thd->rgi_fake= NULL; + DBUG_VOID_RETURN; +} diff --git a/sql/sql_binlog.h b/sql/sql_binlog.h new file mode 100644 index 00000000..d10d78e1 --- /dev/null +++ b/sql/sql_binlog.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_BINLOG_INCLUDED +#define SQL_BINLOG_INCLUDED + +class THD; + +void mysql_client_binlog_statement(THD *thd); + +#endif /* SQL_BINLOG_INCLUDED */ diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h new file mode 100644 index 00000000..353601eb --- /dev/null +++ b/sql/sql_bitmap.h @@ -0,0 +1,314 @@ +/* Copyright (c) 2003, 2013, Oracle and/or its affiliates + Copyright (c) 2009, 2013, Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Implementation of a bitmap type. + The idea with this is to be able to handle any constant number of bits but + also be able to use 32 or 64 bits bitmaps very efficiently +*/ + +#ifndef SQL_BITMAP_INCLUDED +#define SQL_BITMAP_INCLUDED + +#include +#include +#include + + +/* An iterator to quickly walk over bits in ulonglong bitmap. */ +class Table_map_iterator +{ + ulonglong bmp; +public: + Table_map_iterator(ulonglong t): bmp(t){} + uint next_bit() + { + if (!bmp) + return BITMAP_END; + uint bit= my_find_first_bit(bmp); + bmp &= ~(1ULL << bit); + return bit; + } + int operator++(int) { return next_bit(); } + enum { BITMAP_END= 64 }; +}; + +template class Bitmap +{ +/* + Workaround GCC optimizer bug (generating SSE instuctions on unaligned data) +*/ +#if defined (__GNUC__) && defined(__x86_64__) && (__GNUC__ < 6) && !defined(__clang__) +#define NEED_GCC_NO_SSE_WORKAROUND +#endif + +#ifdef NEED_GCC_NO_SSE_WORKAROUND +#pragma GCC push_options +#pragma GCC target ("no-sse") +#endif + +private: + static const int BITS_PER_ELEMENT= sizeof(ulonglong) * 8; + static const int ARRAY_ELEMENTS= (width + BITS_PER_ELEMENT - 1) / BITS_PER_ELEMENT; + static const ulonglong ALL_BITS_SET= ULLONG_MAX; + + ulonglong buffer[ARRAY_ELEMENTS]; + + uint bit_index(uint n) const + { + DBUG_ASSERT(n < width); + return ARRAY_ELEMENTS == 1 ? 0 : n / BITS_PER_ELEMENT; + } + ulonglong bit_mask(uint n) const + { + DBUG_ASSERT(n < width); + return ARRAY_ELEMENTS == 1 ? 1ULL << n : 1ULL << (n % BITS_PER_ELEMENT); + } + ulonglong last_element_mask(int n) const + { + DBUG_ASSERT(n % BITS_PER_ELEMENT != 0); + return bit_mask(n) - 1; + } + +public: + /* + The default constructor does nothing. + The caller is supposed to either zero the memory + or to call set_all()/clear_all()/set_prefix() + to initialize bitmap. + */ + Bitmap() = default; + + explicit Bitmap(uint prefix) + { + set_prefix(prefix); + } + void init(uint prefix) + { + set_prefix(prefix); + } + uint length() const + { + return width; + } + void set_bit(uint n) + { + buffer[bit_index(n)] |= bit_mask(n); + } + void clear_bit(uint n) + { + buffer[bit_index(n)] &= ~bit_mask(n); + } + bool is_set(uint n) const + { + return buffer[bit_index(n)] & bit_mask(n); + } + void set_prefix(uint prefix_size) + { + set_if_smaller(prefix_size, width); + + size_t idx= prefix_size / BITS_PER_ELEMENT; + + for (size_t i= 0; i < idx; i++) + buffer[i]= ALL_BITS_SET; + + if (prefix_size % BITS_PER_ELEMENT) + buffer[idx++]= last_element_mask(prefix_size); + + for (size_t i= idx; i < ARRAY_ELEMENTS; i++) + buffer[i]= 0; + } + bool is_prefix(uint prefix_size) const + { + DBUG_ASSERT(prefix_size <= width); + + size_t idx= prefix_size / BITS_PER_ELEMENT; + + for (size_t i= 0; i < idx; i++) + if (buffer[i] != ALL_BITS_SET) + return false; + + if (prefix_size % BITS_PER_ELEMENT) + if (buffer[idx++] != last_element_mask(prefix_size)) + return false; + + for (size_t i= idx; i < ARRAY_ELEMENTS; i++) + if (buffer[i] != 0) + return false; + + return true; + } + void set_all() + { + if (width % BITS_PER_ELEMENT) + set_prefix(width); + else if (ARRAY_ELEMENTS > 1) + memset(buffer, 0xff, sizeof(buffer)); + else + buffer[0] = ALL_BITS_SET; + } + void clear_all() + { + if (ARRAY_ELEMENTS > 1) + memset(buffer, 0, sizeof(buffer)); + else + buffer[0]= 0; + } + void intersect(const Bitmap& map2) + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + buffer[i] &= map2.buffer[i]; + } + +private: + /* + Intersect with a bitmap represented as as longlong. + In addition, pad the rest of the bitmap with 0 or 1 bits + depending on pad_with_ones parameter. + */ + void intersect_and_pad(ulonglong map2buff, bool pad_with_ones) + { + buffer[0] &= map2buff; + + for (size_t i= 1; i < ARRAY_ELEMENTS; i++) + buffer[i]= pad_with_ones ? ALL_BITS_SET : 0; + + if (ARRAY_ELEMENTS > 1 && (width % BITS_PER_ELEMENT) && pad_with_ones) + buffer[ARRAY_ELEMENTS - 1]= last_element_mask(width); + } + +public: + void intersect(ulonglong map2buff) + { + intersect_and_pad(map2buff, 0); + } + /* Use highest bit for all bits above first element. */ + void intersect_extended(ulonglong map2buff) + { + intersect_and_pad(map2buff, (map2buff & (1ULL << 63))); + } + void subtract(const Bitmap& map2) + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + buffer[i] &= ~(map2.buffer[i]); + } + void merge(const Bitmap& map2) + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + buffer[i] |= map2.buffer[i]; + } + bool is_clear_all() const + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + if (buffer[i]) + return false; + return true; + } + bool is_subset(const Bitmap& map2) const + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + if (buffer[i] & ~(map2.buffer[i])) + return false; + return true; + } + bool is_overlapping(const Bitmap& map2) const + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + if (buffer[i] & map2.buffer[i]) + return true; + return false; + } + bool operator==(const Bitmap& map2) const + { + if (ARRAY_ELEMENTS > 1) + return !memcmp(buffer,map2.buffer,sizeof(buffer)); + return buffer[0] == map2.buffer[0]; + } + bool operator!=(const Bitmap& map2) const + { + return !(*this == map2); + } + /* + Print hexadecimal representation of bitmap. + Truncate trailing zeros. + */ + char *print(char *buf) const + { + size_t last; /*index of the last non-zero element, or 0. */ + + for (last= ARRAY_ELEMENTS - 1; last && !buffer[last]; last--){} + + const int HEX_DIGITS_PER_ELEMENT= BITS_PER_ELEMENT / 4; + for (size_t i= 0; i < last; i++) + { + ulonglong num = buffer[i]; + uint shift = BITS_PER_ELEMENT - 4; + size_t pos= i * HEX_DIGITS_PER_ELEMENT; + for (size_t j= 0; j < HEX_DIGITS_PER_ELEMENT; j++) + { + buf[pos + j]= _dig_vec_upper[(num >> shift) & 0xf]; + shift += 4; + } + } + longlong2str(buffer[last], buf, 16); + return buf; + } + ulonglong to_ulonglong() const + { + return buffer[0]; + } + uint bits_set() + { + uint res= 0; + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + res += my_count_bits(buffer[i]); + return res; + } + class Iterator + { + const Bitmap& map; + uint offset; + Table_map_iterator tmi; + public: + Iterator(const Bitmap& map2) : map(map2), offset(0), tmi(map2.buffer[0]) {} + int operator++(int) + { + for (;;) + { + int nextbit= tmi++; + + if (nextbit != Table_map_iterator::BITMAP_END) + return offset + nextbit; + + if (offset + BITS_PER_ELEMENT >= map.length()) + return BITMAP_END; + + offset += BITS_PER_ELEMENT; + tmi= Table_map_iterator(map.buffer[offset / BITS_PER_ELEMENT]); + } + } + enum { BITMAP_END = width }; + }; + +#ifdef NEED_GCC_NO_SSE_WORKAROUND +#pragma GCC pop_options +#undef NEED_GCC_NO_SSE_WORKAROUND +#endif +}; + +typedef Bitmap key_map; /* Used for finding keys */ + +#endif /* SQL_BITMAP_INCLUDED */ diff --git a/sql/sql_bootstrap.cc b/sql/sql_bootstrap.cc new file mode 100644 index 00000000..b39d7a57 --- /dev/null +++ b/sql/sql_bootstrap.cc @@ -0,0 +1,159 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mariadb.h" +#include +#include +#include "sql_bootstrap.h" +#include + +static bool is_end_of_query(const char *line, size_t len, + const std::string& delimiter) +{ + if (delimiter.length() > len) + return false; + return !strcmp(line + len-delimiter.length(),delimiter.c_str()); +} + +static std::string delimiter= ";"; +extern "C" int read_bootstrap_query(char *query, int *query_length, + fgets_input_t input, fgets_fn_t fgets_fn, + int preserve_delimiter, int *error) +{ + char line_buffer[MAX_BOOTSTRAP_LINE_SIZE]; + const char *line; + size_t len; + size_t query_len= 0; + int fgets_error= 0; + *error= 0; + + *query_length= 0; + for ( ; ; ) + { + line= (*fgets_fn)(line_buffer, sizeof(line_buffer), input, &fgets_error); + + if (error) + *error= fgets_error; + + if (fgets_error != 0) + return READ_BOOTSTRAP_ERROR; + + if (line == NULL) + return (query_len == 0) ? READ_BOOTSTRAP_EOF : READ_BOOTSTRAP_ERROR; + + len= strlen(line); + + /* + Remove trailing whitespace characters. + This assumes: + - no multibyte encoded character can be found at the very end of a line, + - whitespace characters from the "C" locale only. + which is sufficient for the kind of queries found + in the bootstrap scripts. + */ + while (len && (isspace(line[len - 1]))) + len--; + /* + Cleanly end the string, so we don't have to test len > x + all the time before reading line[x], in the code below. + */ + line_buffer[len]= '\0'; + + /* Skip blank lines */ + if (len == 0) + continue; + + /* Skip # comments */ + if (line[0] == '#') + continue; + + /* Skip -- comments */ + if ((line[0] == '-') && (line[1] == '-')) + continue; + + size_t i=0; + while (line[i] == ' ') + i++; + + /* Skip -- comments */ + if (line[i] == '-' && line[i+1] == '-') + continue; + + if (strncmp(line, "DELIMITER", 9) == 0) + { + const char *p= strrchr(line,' '); + if (!p || !p[1]) + { + /* Invalid DELIMITER specifier */ + return READ_BOOTSTRAP_ERROR; + } + delimiter.assign(p+1); + if (preserve_delimiter) + { + memcpy(query,line,len); + query[len]=0; + *query_length = (int)len; + return READ_BOOTSTRAP_SUCCESS; + } + continue; + } + + /* Append the current line to a multi line query. If the new line will make + the query too long, preserve the partial line to provide context for the + error message. + */ + if (query_len + len + 1 >= MAX_BOOTSTRAP_QUERY_SIZE) + { + size_t new_len= MAX_BOOTSTRAP_QUERY_SIZE - query_len - 1; + if ((new_len > 0) && (query_len < MAX_BOOTSTRAP_QUERY_SIZE)) + { + memcpy(query + query_len, line, new_len); + query_len+= new_len; + } + query[query_len]= '\0'; + *query_length= (int)query_len; + return READ_BOOTSTRAP_QUERY_SIZE; + } + + if (query_len != 0) + { + /* + Append a \n to the current line, if any, + to preserve the intended presentation. + */ + query[query_len++]= '\n'; + } + memcpy(query + query_len, line, len); + query_len+= len; + + if (is_end_of_query(line, len, delimiter)) + { + /* + The last line is terminated by delimiter + Return the query found. + */ + if (!preserve_delimiter) + { + query_len-= delimiter.length(); + query[query_len++]= ';'; + } + query[query_len]= 0; + *query_length= (int)query_len; + return READ_BOOTSTRAP_SUCCESS; + } + } +} + diff --git a/sql/sql_bootstrap.h b/sql/sql_bootstrap.h new file mode 100644 index 00000000..e5b9b3a5 --- /dev/null +++ b/sql/sql_bootstrap.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef SQL_BOOTSTRAP_H +#define SQL_BOOTSTRAP_H + +/** + The maximum size of a bootstrap query. + Increase this size if parsing a longer query during bootstrap is necessary. + The longest query in use depends on the documentation content, + see the file fill_help_tables.sql +*/ +#define MAX_BOOTSTRAP_QUERY_SIZE 60000 +/** + The maximum size of a bootstrap query, expressed in a single line. + Do not increase this size, use the multiline syntax instead. +*/ +#define MAX_BOOTSTRAP_LINE_SIZE 20000 +#define MAX_BOOTSTRAP_ERROR_LEN 256 + +#define READ_BOOTSTRAP_SUCCESS 0 +#define READ_BOOTSTRAP_EOF 1 +#define READ_BOOTSTRAP_ERROR 2 +#define READ_BOOTSTRAP_QUERY_SIZE 3 + +typedef void *fgets_input_t; +typedef char * (*fgets_fn_t)(char *, size_t, fgets_input_t, int *error); + +#ifdef __cplusplus +extern "C" { +#endif +int read_bootstrap_query(char *query, int *query_length, fgets_input_t input, + fgets_fn_t fgets_fn, + int preserve_delimiter, + int *error); +#ifdef __cplusplus +} +#endif + +#endif + + diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in new file mode 100644 index 00000000..810f98a8 --- /dev/null +++ b/sql/sql_builtin.cc.in @@ -0,0 +1,46 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + Note that sql_builtin.cc is automatically built by sql_bultin.cc.in + and cmake/plugin.cmake +*/ + +#include +#include + +typedef struct st_maria_plugin builtin_maria_plugin[]; + +#ifdef _MSC_VER +extern "C" +#else +extern +#endif +builtin_maria_plugin + @mysql_mandatory_plugins@ @mysql_optional_plugins@ + builtin_maria_binlog_plugin, + builtin_maria_mysql_password_plugin; + +struct st_maria_plugin *mysql_optional_plugins[]= +{ + @mysql_optional_plugins@ 0 +}; + +struct st_maria_plugin *mysql_mandatory_plugins[]= +{ + builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin, + @mysql_mandatory_plugins@ 0 +}; diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc new file mode 100644 index 00000000..b284189d --- /dev/null +++ b/sql/sql_cache.cc @@ -0,0 +1,5330 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2010, 2017, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Description of the query cache: + +1. Query_cache object consists of + - query cache memory pool (cache) + - queries hash (queries) + - tables hash (tables) + - list of blocks ordered as they allocated in memory +(first_block) + - list of queries block (queries_blocks) + - list of used tables (tables_blocks) + +2. Query cache memory pool (cache) consists of + - table of steps of memory bins allocation + - table of free memory bins + - blocks of memory + +3. Memory blocks + +Every memory block has the following structure: + ++----------------------------------------------------------+ +| Block header (Query_cache_block structure) | ++----------------------------------------------------------+ +|Table of database table lists (used for queries & tables) | ++----------------------------------------------------------+ +| Type depended header | +|(Query_cache_query, Query_cache_table, Query_cache_result)| ++----------------------------------------------------------+ +| Data ... | ++----------------------------------------------------------+ + +Block header consists of: +- type: + FREE Free memory block + QUERY Query block + RESULT Ready to send result + RES_CONT Result's continuation + RES_BEG First block of results, that is not yet complete, + written to cache + RES_INCOMPLETE Allocated for results data block + TABLE Block with database table description + INCOMPLETE The destroyed block +- length of block (length) +- length of data & headers (used) +- physical list links (pnext/pprev) - used for the list of + blocks ordered as they are allocated in physical memory +- logical list links (next/prev) - used for queries block list, tables block + list, free memory block lists and list of results block in query +- number of elements in table of database table list (n_tables) + +4. Query & results blocks + +Query stored in cache consists of following blocks: + +more more +recent+-------------+ old +<-----|Query block 1|------> double linked list of queries block + prev | | next + +-------------+ + <-| table 0 |-> (see "Table of database table lists" description) + <-| table 1 |-> + | ... | +--------------------------+ + +-------------+ +-------------------------+ | +NET | | | V V | +struct| | +-+------------+ +------------+ | +<-----|query header |----->|Result block|-->|Result block|-+ doublelinked +writer| |result| |<--| | list of results + +-------------+ +------------+ +------------+ + |charset | +------------+ +------------+ no table of dbtables + |encoding + | | result | | result | + |query text |<-----| header | | header |------+ + +-------------+parent| | | |parent| + ^ +------------+ +------------+ | + | |result data | |result data | | + | +------------+ +------------+ | + +---------------------------------------------------+ + +First query is registered. During the registration query block is +allocated. This query block is included in query hash and is linked +with appropriate database tables lists (if there is no appropriate +list exists it will be created). + +Later when query has performed results is written into the result blocks. +A result block cannot be smaller then QUERY_CACHE_MIN_RESULT_DATA_SIZE. + +When new result is written to cache it is appended to the last result +block, if no more free space left in the last block, new block is +allocated. + +5. Table of database table lists. + +For quick invalidation of queries all query are linked in lists on used +database tables basis (when table will be changed (insert/delete/...) +this queries will be removed from cache). + +Root of such list is table block: + + +------------+ list of used tables (used while invalidation of +<----| Table |-----> whole database) + prev| block |next +-----------+ + | | +-----------+ |Query block| + | | |Query block| +-----------+ + +------------+ +-----------+ | ... | + +->| table 0 |------>|table 0 |----->| table N |---+ + |+-| |<------| |<-----| |<-+| + || +------------+ | ... | | ... | || + || |table header| +-----------+ +-----------+ || + || +------------+ | ... | | ... | || + || |db name + | +-----------+ +-----------+ || + || |table name | || + || +------------+ || + |+--------------------------------------------------------+| + +----------------------------------------------------------+ + +Table block is included into the tables hash (tables). + +6. Free blocks, free blocks bins & steps of freeblock bins. + +When we just started only one free memory block existed. All query +cache memory (that will be used for block allocation) were +containing in this block. +When a new block is allocated we find most suitable memory block +(minimal of >= required size). If such a block can not be found, we try +to find max block < required size (if we allocate block for results). +If there is no free memory, oldest query is removed from cache, and then +we try to allocate memory. Last step should be repeated until we find +suitable block or until there is no unlocked query found. + +If the block is found and its length more then we need, it should be +split into 2 blocks. +New blocks cannot be smaller then min_allocation_unit_bytes. + +When a block becomes free, its neighbor-blocks should be tested and if +there are free blocks among them, they should be joined into one block. + +Free memory blocks are stored in bins according to their sizes. +The bins are stored in size-descending order. +These bins are distributed (by size) approximately logarithmically. + +First bin (number 0) stores free blocks with +size <= query_cache_size>>QUERY_CACHE_MEM_BIN_FIRST_STEP_PWR2. +It is first (number 0) step. +On the next step distributed (1 + QUERY_CACHE_MEM_BIN_PARTS_INC) * +QUERY_CACHE_MEM_BIN_PARTS_MUL bins. This bins allocated in interval from +query_cache_size>>QUERY_CACHE_MEM_BIN_FIRST_STEP_PWR2 to +query_cache_size>>QUERY_CACHE_MEM_BIN_FIRST_STEP_PWR2 >> +QUERY_CACHE_MEM_BIN_STEP_PWR2 +... +On each step interval decreases in 2 power of +QUERY_CACHE_MEM_BIN_STEP_PWR2 +times, number of bins (that distributed on this step) increases. If on +the previous step there were N bins distributed , on the current there +would be distributed +(N + QUERY_CACHE_MEM_BIN_PARTS_INC) * QUERY_CACHE_MEM_BIN_PARTS_MUL +bins. +Last distributed bin stores blocks with size near min_allocation_unit +bytes. + +For example: + query_cache_size>>QUERY_CACHE_MEM_BIN_FIRST_STEP_PWR2 = 100, + min_allocation_unit = 17, + QUERY_CACHE_MEM_BIN_STEP_PWR2 = 1, + QUERY_CACHE_MEM_BIN_PARTS_INC = 1, + QUERY_CACHE_MEM_BIN_PARTS_MUL = 1 + (in followed picture showed right (low) bound of bin): + + | 100>>1 50>>1 |25>>1| + | | | | | | + | 100 75 50 41 33 25 21 18 15| 12 | - bins right (low) bounds + + |\---/\-----/\--------/\--------|---/ | + | 0 1 2 3 | | - steps + \-----------------------------/ \---/ + bins that we store in cache this bin showed for example only + + +Calculation of steps/bins distribution is performed only when query cache +is resized. + +When we need to find appropriate bin, first we should find appropriate +step, then we should calculate number of bins that are using data +stored in Query_cache_memory_bin_step structure. + +Free memory blocks are sorted in bins in lists with size-ascending order +(more small blocks needed frequently then bigger one). + +7. Packing cache. + +Query cache packing is divided into two operation: + - pack_cache + - join_results + +pack_cache moved all blocks to "top" of cache and create one block of free +space at the "bottom": + + before pack_cache after pack_cache + +-------------+ +-------------+ + | query 1 | | query 1 | + +-------------+ +-------------+ + | table 1 | | table 1 | + +-------------+ +-------------+ + | results 1.1 | | results 1.1 | + +-------------+ +-------------+ + | free | | query 2 | + +-------------+ +-------------+ + | query 2 | | table 2 | + +-------------+ ---> +-------------+ + | table 2 | | results 1.2 | + +-------------+ +-------------+ + | results 1.2 | | results 2 | + +-------------+ +-------------+ + | free | | free | + +-------------+ | | + | results 2 | | | + +-------------+ | | + | free | | | + +-------------+ +-------------+ + +pack_cache scan blocks in physical address order and move every non-free +block "higher". + +pack_cach remove every free block it finds. The length of the deleted block +is accumulated to the "gap". All non free blocks should be shifted with the +"gap" step. + +join_results scans all complete queries. If the results of query are not +stored in the same block, join_results tries to move results so, that they +are stored in one block. + + before join_results after join_results + +-------------+ +-------------+ + | query 1 | | query 1 | + +-------------+ +-------------+ + | table 1 | | table 1 | + +-------------+ +-------------+ + | results 1.1 | | free | + +-------------+ +-------------+ + | query 2 | | query 2 | + +-------------+ +-------------+ + | table 2 | | table 2 | + +-------------+ ---> +-------------+ + | results 1.2 | | free | + +-------------+ +-------------+ + | results 2 | | results 2 | + +-------------+ +-------------+ + | free | | results 1 | + | | | | + | | +-------------+ + | | | free | + | | | | + +-------------+ +-------------+ + +If join_results allocated new block(s) then we need call pack_cache again. + +7. Interface +The query cache interfaces with the rest of the server code through 7 +functions: + 1. Query_cache::send_result_to_client + - Called before parsing and used to match a statement with the stored + queries hash. + If a match is found the cached result set is sent through repeated + calls to net_real_write. (note: calling thread does not have a + registered result set writer: thd->net.query_cache_query=0) + 2. Query_cache::store_query + - Called just before handle_select() and is used to register a result + set writer to the statement currently being processed + (thd->net.query_cache_query). + 3. query_cache_insert + - Called from net_real_write to append a result set to a cached query + if (and only if) this query has a registered result set writer + (thd->net.query_cache_query). + 4. Query_cache::invalidate + Query_cache::invalidate_locked_for_write + - Called from various places to invalidate query cache based on data- + base, table and myisam file name. During an on going invalidation + the query cache is temporarily disabled. + 5. Query_cache::flush + - Used when a RESET QUERY CACHE is issued. This clears the entire + cache block by block. + 6. Query_cache::resize + - Used to change the available memory used by the query cache. This + will also invalidate the entrie query cache in one free operation. + 7. Query_cache::pack + - Used when a FLUSH QUERY CACHE is issued. This changes the order of + the used memory blocks in physical memory order and move all avail- + able memory to the 'bottom' of the memory. + + +TODO list: + + - Delayed till after-parsing qache answer (for column rights processing) + - Optimize cache resizing + - if new_size < old_size then pack & shrink + - if new_size > old_size copy cached query to new cache + - Move MRG_MYISAM table type processing to handlers, something like: + tables_used->table->file->register_used_filenames(callback, + first_argument); + - QC improvement suggested by Monty: + - Add a counter in open_table() for how many MERGE (ISAM or MyISAM) + tables are cached in the table cache. + (This will be trivial when we have the new table cache in place I + have been working on) + - After this we can add the following test around the for loop in + is_cacheable:: + + if (thd->temp_tables || global_merge_table_count) + + - Another option would be to set thd->lex->safe_to_cache_query to 0 + in 'get_lock_data' if any of the tables was a tmp table or a + MRG_ISAM table. + (This could be done with almost no speed penalty) +*/ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "sql_basic_types.h" +#include "sql_cache.h" +#include "sql_parse.h" // check_table_access +#include "tztime.h" // struct Time_zone +#include "sql_acl.h" // SELECT_ACL +#include "sql_base.h" // TMP_TABLE_KEY_EXTRA +#include "debug_sync.h" // DEBUG_SYNC +#include "sql_table.h" +#ifdef HAVE_QUERY_CACHE +#include +#include +#include +#include "../storage/myisammrg/ha_myisammrg.h" +#include "../storage/myisammrg/myrg_def.h" +#include "probes_mysql.h" +#include "transaction.h" +#include "strfunc.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif + +const uchar *query_state_map; + +#ifdef EMBEDDED_LIBRARY +#include "emb_qcache.h" +#endif + +#if defined(EXTRA_DEBUG) && !defined(DBUG_OFF) +#define RW_WLOCK(M) {DBUG_PRINT("lock", ("rwlock wlock %p",(M))); \ + if (!mysql_rwlock_wrlock(M)) DBUG_PRINT("lock", ("rwlock wlock ok")); \ + else DBUG_PRINT("lock", ("rwlock wlock FAILED %d", errno)); } +#define RW_RLOCK(M) {DBUG_PRINT("lock", ("rwlock rlock %p",(M))); \ + if (!mysql_rwlock_rdlock(M)) DBUG_PRINT("lock", ("rwlock rlock ok")); \ + else DBUG_PRINT("lock", ("rwlock wlock FAILED %d", errno)); } +#define RW_UNLOCK(M) {DBUG_PRINT("lock", ("rwlock unlock %p",(M))); \ + if (!mysql_rwlock_unlock(M)) DBUG_PRINT("lock", ("rwlock unlock ok")); \ + else DBUG_PRINT("lock", ("rwlock unlock FAILED %d", errno)); } +#define BLOCK_LOCK_WR(B) {DBUG_PRINT("lock", ("%d LOCK_WR %p",\ + __LINE__,(B))); \ + B->query()->lock_writing();} +#define BLOCK_LOCK_RD(B) {DBUG_PRINT("lock", ("%d LOCK_RD %p",\ + __LINE__,(B))); \ + B->query()->lock_reading();} +#define BLOCK_UNLOCK_WR(B) { \ + DBUG_PRINT("lock", ("%d UNLOCK_WR %p",\ + __LINE__,(B)));B->query()->unlock_writing();} +#define BLOCK_UNLOCK_RD(B) { \ + DBUG_PRINT("lock", ("%d UNLOCK_RD %p",\ + __LINE__,(B)));B->query()->unlock_reading();} +#define DUMP(C) DBUG_EXECUTE("qcache", {\ + (C)->cache_dump(); (C)->queries_dump();(C)->tables_dump();}) +#else +#define RW_WLOCK(M) mysql_rwlock_wrlock(M) +#define RW_RLOCK(M) mysql_rwlock_rdlock(M) +#define RW_UNLOCK(M) mysql_rwlock_unlock(M) +#define BLOCK_LOCK_WR(B) B->query()->lock_writing() +#define BLOCK_LOCK_RD(B) B->query()->lock_reading() +#define BLOCK_UNLOCK_WR(B) B->query()->unlock_writing() +#define BLOCK_UNLOCK_RD(B) B->query()->unlock_reading() +#define DUMP(C) +#endif + + +/** + Macro that executes the requested action at a synchronization point + only if the thread has a associated THD session. +*/ +#if defined(ENABLED_DEBUG_SYNC) +#define QC_DEBUG_SYNC(name) \ + do { \ + THD *thd_tmp= current_thd; \ + if (thd_tmp) \ + DEBUG_SYNC(thd_tmp, name); \ + } while (0) +#else +#define QC_DEBUG_SYNC(name) +#endif + + +/** + Thread state to be used when the query cache lock needs to be acquired. + Sets the thread state name in the constructor, resets on destructor. +*/ + +struct Query_cache_wait_state +{ + THD *m_thd; + PSI_stage_info m_old_stage; + const char *m_func; + const char *m_file; + int m_line; + + Query_cache_wait_state(THD *thd, const char *func, + const char *file, unsigned int line) + : m_thd(thd), + m_old_stage(), + m_func(func), m_file(file), m_line(line) + { + if (m_thd) + set_thd_stage_info(m_thd, + &stage_waiting_for_query_cache_lock, + &m_old_stage, + m_func, m_file, m_line); + } + + ~Query_cache_wait_state() + { + if (m_thd) + set_thd_stage_info(m_thd, &m_old_stage, NULL, m_func, m_file, m_line); + } +}; + + +/* + Check if character is a white space. +*/ + +inline bool is_white_space(char c) +{ + return (query_state_map[(uint) ((uchar) c)] == MY_LEX_SKIP); +} + + +/** + Generate a query_string without query comments or duplicated space + + @param new_query New query without 'fluff' is stored here + @param query Original query + @param query_length Length of original query + @param additional_length Extra space for query cache we need to allocate + in new_query buffer. + + Note: + If there is no space to allocate new_query, we will put original query + into new_query. +*/ + +static void make_base_query(String *new_query, + const char *query, size_t query_length, + size_t additional_length) +{ + char *buffer; + const char *query_end, *last_space; + + /* The following is guaranteed by the query_cache interface */ + DBUG_ASSERT(query[query_length] == 0); + DBUG_ASSERT(!is_white_space(query[0])); + /* We do not support UCS2, UTF16, UTF32 as a client character set */ + DBUG_ASSERT(current_thd->variables.character_set_client->mbminlen == 1); + + if (new_query->alloc(query_length + additional_length)) + { + /* + We could not allocate the query. Use original query for + the query cache; Better than nothing.... + */ + new_query->set(query, query_length, system_charset_info); + return; + } + + buffer= (char*) new_query->ptr(); // Store base query here + query_end= query + query_length; + last_space= 0; // No space found yet + + while (query < query_end) + { + char current = *(query++); + switch (current) { + case '\'': + case '`': + case '"': + *(buffer++)= current; // copy first quote + while (query < query_end) + { + *(buffer++)= *query; + if (*(query++) == current) // found pair quote + break; + } + continue; // Continue with next symbol + case '/': // Start of comment ? + /* + Comment of format /#!number #/ or /#M!number #/, must be skipped. + These may include '"' and other comments, but it should + be safe to parse the content as a normal string. + */ + if (query[0] != '*' || query[1] == '!' || + (query[1] == 'M' && query[2] == '!')) + break; + + query++; // skip "/" + while (++query < query_end) + { + if (query[0] == '*' && query[1] == '/') + { + query+= 2; + goto insert_space; + } + } + continue; // Will end outer loop + case '-': + if (*query != '-' || !is_white_space(query[1])) // Not a comment + break; + query++; // skip second "-", and go to search of "\n" + /* fall through */ + case '#': + while (query < query_end) + { + if (*(query++) == '\n') + goto insert_space; + } + continue; // Will end outer loop + default: + if (is_white_space(current)) + goto insert_space; + break; + } + *(buffer++)= current; + continue; + +insert_space: + if (buffer != last_space) + { + *(buffer++)= ' '; + last_space= buffer; + } + } + if (buffer == last_space) + buffer--; // Remove the last space + *buffer= 0; // End zero after query + new_query->length((size_t) (buffer - new_query->ptr())); + + /* Copy db_length */ + memcpy(buffer+1, query_end+1, QUERY_CACHE_DB_LENGTH_SIZE); +} + + +/** + Check and change local variable if global one is switched + + @param thd thread handle +*/ + +void inline fix_local_query_cache_mode(THD *thd) +{ + if (global_system_variables.query_cache_type == 0) + thd->variables.query_cache_type= 0; +} + + +/** + Serialize access to the query cache. + If the lock cannot be granted the thread hangs in a conditional wait which + is signalled on each unlock. + + The lock attempt will also fail without wait if lock_and_suspend() is in + effect by another thread. This enables a quick path in execution to skip waits + when the outcome is known. + + @param mode TIMEOUT the lock can abort because of a timeout + TRY the lock can abort because it is locked now + WAIT wait for lock (default) + + @note mode is optional and default value is WAIT. + + @return + @retval FALSE An exclusive lock was taken + @retval TRUE The locking attempt failed +*/ + +bool Query_cache::try_lock(THD *thd, Cache_try_lock_mode mode) +{ + bool interrupt= TRUE; + Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__); + DBUG_ENTER("Query_cache::try_lock"); + + mysql_mutex_lock(&structure_guard_mutex); + DBUG_EXECUTE_IF("status_wait_query_cache_mutex_sleep", { sleep(5); }); + if (m_cache_status == DISABLED) + { + mysql_mutex_unlock(&structure_guard_mutex); + DBUG_RETURN(TRUE); + } + m_requests_in_progress++; + fix_local_query_cache_mode(thd); + + while (1) + { + if (m_cache_lock_status == Query_cache::UNLOCKED) + { + m_cache_lock_status= Query_cache::LOCKED; +#ifndef DBUG_OFF + m_cache_lock_thread_id= thd->thread_id; +#endif + interrupt= FALSE; + break; + } + else if (m_cache_lock_status == Query_cache::LOCKED_NO_WAIT) + { + /* + If query cache is protected by a LOCKED_NO_WAIT lock this thread + should avoid using the query cache as it is being evicted. + */ + break; + } + else + { + DBUG_ASSERT(m_cache_lock_status == Query_cache::LOCKED); + /* + To prevent send_result_to_client() and query_cache_insert() from + blocking execution for too long a timeout is put on the lock. + */ + if (mode == WAIT) + { + mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex); + } + else if (mode == TIMEOUT) + { + struct timespec waittime; + set_timespec_nsec(waittime,50000000UL); /* Wait for 50 msec */ + int res= mysql_cond_timedwait(&COND_cache_status_changed, + &structure_guard_mutex, &waittime); + if (res == ETIMEDOUT) + break; + } + else + { + /** + If we are here, then mode is == TRY and there was someone else using + the query cache. (m_cache_lock_status != Query_cache::UNLOCKED). + Signal that we didn't get a lock. + */ + DBUG_ASSERT(m_requests_in_progress > 1); + DBUG_ASSERT(mode == TRY); + break; + } + } + } + if (interrupt) + m_requests_in_progress--; + mysql_mutex_unlock(&structure_guard_mutex); + + DBUG_RETURN(interrupt); +} + + +/** + Serialize access to the query cache. + If the lock cannot be granted the thread hangs in a conditional wait which + is signalled on each unlock. + + This method also suspends the query cache so that other threads attempting to + lock the cache with try_lock() will fail directly without waiting. + + It is used by all methods which flushes or destroys the whole cache. + */ + +void Query_cache::lock_and_suspend(void) +{ + THD *thd= current_thd; + Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__); + DBUG_ENTER("Query_cache::lock_and_suspend"); + + mysql_mutex_lock(&structure_guard_mutex); + m_requests_in_progress++; + while (m_cache_lock_status != Query_cache::UNLOCKED) + mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex); + m_cache_lock_status= Query_cache::LOCKED_NO_WAIT; +#ifndef DBUG_OFF + /* Here thd may not be set during shutdown */ + if (thd) + m_cache_lock_thread_id= thd->thread_id; +#endif + /* Wake up everybody, a whole cache flush is starting! */ + mysql_cond_broadcast(&COND_cache_status_changed); + mysql_mutex_unlock(&structure_guard_mutex); + + DBUG_VOID_RETURN; +} + +/** + Serialize access to the query cache. + If the lock cannot be granted the thread hangs in a conditional wait which + is signalled on each unlock. + + It is used by all methods which invalidates one or more tables. + */ + +void Query_cache::lock(THD *thd) +{ + Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__); + DBUG_ENTER("Query_cache::lock"); + + mysql_mutex_lock(&structure_guard_mutex); + m_requests_in_progress++; + fix_local_query_cache_mode(thd); + while (m_cache_lock_status != Query_cache::UNLOCKED) + mysql_cond_wait(&COND_cache_status_changed, &structure_guard_mutex); + m_cache_lock_status= Query_cache::LOCKED; +#ifndef DBUG_OFF + m_cache_lock_thread_id= thd->thread_id; +#endif + mysql_mutex_unlock(&structure_guard_mutex); + + DBUG_VOID_RETURN; +} + + +/** + Set the query cache to UNLOCKED and signal waiting threads. +*/ + +void Query_cache::unlock(void) +{ + DBUG_ENTER("Query_cache::unlock"); + mysql_mutex_lock(&structure_guard_mutex); +#ifndef DBUG_OFF + /* Thd may not be set in resize() at mysqld start */ + THD *thd= current_thd; + if (thd) + DBUG_ASSERT(m_cache_lock_thread_id == thd->thread_id); +#endif + DBUG_ASSERT(m_cache_lock_status == Query_cache::LOCKED || + m_cache_lock_status == Query_cache::LOCKED_NO_WAIT); + m_cache_lock_status= Query_cache::UNLOCKED; + DBUG_PRINT("Query_cache",("Sending signal")); + mysql_cond_signal(&COND_cache_status_changed); + DBUG_ASSERT(m_requests_in_progress > 0); + m_requests_in_progress--; + if (m_requests_in_progress == 0 && m_cache_status == DISABLE_REQUEST) + { + /* No clients => just free query cache */ + free_cache(); + m_cache_status= DISABLED; + } + mysql_mutex_unlock(&structure_guard_mutex); + DBUG_VOID_RETURN; +} + + +/** + Helper function for determine if a SELECT statement has a SQL_NO_CACHE + directive. + + @param sql A pointer to the first white space character after SELECT + + @return + @retval TRUE The character string contains SQL_NO_CACHE + @retval FALSE No directive found. +*/ + +static bool has_no_cache_directive(const char *sql) +{ + while (is_white_space(*sql)) + sql++; + + if (my_toupper(system_charset_info, sql[0]) == 'S' && + my_toupper(system_charset_info, sql[1]) == 'Q' && + my_toupper(system_charset_info, sql[2]) == 'L' && + my_toupper(system_charset_info, sql[3]) == '_' && + my_toupper(system_charset_info, sql[4]) == 'N' && + my_toupper(system_charset_info, sql[5]) == 'O' && + my_toupper(system_charset_info, sql[6]) == '_' && + my_toupper(system_charset_info, sql[7]) == 'C' && + my_toupper(system_charset_info, sql[8]) == 'A' && + my_toupper(system_charset_info, sql[9]) == 'C' && + my_toupper(system_charset_info, sql[10]) == 'H' && + my_toupper(system_charset_info, sql[11]) == 'E' && + my_isspace(system_charset_info, sql[12])) + return TRUE; + + return FALSE; +} + + +/***************************************************************************** + Query_cache_block_table method(s) +*****************************************************************************/ + +inline Query_cache_block * Query_cache_block_table::block() +{ + return (Query_cache_block *)(((uchar*)this) - + ALIGN_SIZE(sizeof(Query_cache_block_table)*n) - + ALIGN_SIZE(sizeof(Query_cache_block))); +} + +/***************************************************************************** + Query_cache_block method(s) +*****************************************************************************/ + +void Query_cache_block::init(size_t block_length) +{ + DBUG_ENTER("Query_cache_block::init"); + DBUG_PRINT("qcache", ("init block: %p length: %zu", this, + block_length)); + length = block_length; + used = 0; + type = Query_cache_block::FREE; + n_tables = 0; + DBUG_VOID_RETURN; +} + +void Query_cache_block::destroy() +{ + DBUG_ENTER("Query_cache_block::destroy"); + DBUG_PRINT("qcache", ("destroy block %p, type %d", + this, type)); + type = INCOMPLETE; + DBUG_VOID_RETURN; +} + +uint Query_cache_block::headers_len() +{ + return (ALIGN_SIZE(sizeof(Query_cache_block_table)*n_tables) + + ALIGN_SIZE(sizeof(Query_cache_block))); +} + +uchar* Query_cache_block::data(void) +{ + return (uchar*)( ((uchar*)this) + headers_len() ); +} + +Query_cache_query * Query_cache_block::query() +{ +#ifndef DBUG_OFF + if (type != QUERY) + query_cache.wreck(__LINE__, "incorrect block type"); +#endif + return (Query_cache_query *) data(); +} + +Query_cache_table * Query_cache_block::table() +{ +#ifndef DBUG_OFF + if (type != TABLE) + query_cache.wreck(__LINE__, "incorrect block type"); +#endif + return (Query_cache_table *) data(); +} + +Query_cache_result * Query_cache_block::result() +{ +#ifndef DBUG_OFF + if (type != RESULT && type != RES_CONT && type != RES_BEG && + type != RES_INCOMPLETE) + query_cache.wreck(__LINE__, "incorrect block type"); +#endif + return (Query_cache_result *) data(); +} + +Query_cache_block_table * Query_cache_block::table(TABLE_COUNTER_TYPE n) +{ + return ((Query_cache_block_table *) + (((uchar*)this)+ALIGN_SIZE(sizeof(Query_cache_block)) + + n*sizeof(Query_cache_block_table))); +} + + +/***************************************************************************** + * Query_cache_table method(s) + *****************************************************************************/ + +extern "C" +{ +uchar *query_cache_table_get_key(const uchar *record, size_t *length, + my_bool not_used __attribute__((unused))) +{ + Query_cache_block* table_block = (Query_cache_block*) record; + *length = (table_block->used - table_block->headers_len() - + ALIGN_SIZE(sizeof(Query_cache_table))); + return (((uchar *) table_block->data()) + + ALIGN_SIZE(sizeof(Query_cache_table))); +} +} + +/***************************************************************************** + Query_cache_query methods +*****************************************************************************/ + +/* + Following methods work for block read/write locking only in this + particular case and in interaction with structure_guard_mutex. + + Lock for write prevents any other locking. (exclusive use) + Lock for read prevents only locking for write. +*/ + +inline void Query_cache_query::lock_writing() +{ + RW_WLOCK(&lock); +} + + +/* + Needed for finding queries, that we may delete from cache. + We don't want to wait while block become unlocked. In addition, + block locking means that query is now used and we don't need to + remove it. +*/ + +bool Query_cache_query::try_lock_writing() +{ + DBUG_ENTER("Query_cache_block::try_lock_writing"); + if (mysql_rwlock_trywrlock(&lock) != 0) + { + DBUG_PRINT("info", ("can't lock rwlock")); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("rwlock %p locked", &lock)); + DBUG_RETURN(1); +} + + +inline void Query_cache_query::lock_reading() +{ + RW_RLOCK(&lock); +} + + +inline void Query_cache_query::unlock_writing() +{ + RW_UNLOCK(&lock); +} + + +inline void Query_cache_query::unlock_reading() +{ + RW_UNLOCK(&lock); +} + + +void Query_cache_query::init_n_lock() +{ + DBUG_ENTER("Query_cache_query::init_n_lock"); + res=0; wri = 0; len = 0; ready= 0; hit_count = 0; + mysql_rwlock_init(key_rwlock_query_cache_query_lock, &lock); + lock_writing(); + DBUG_PRINT("qcache", ("inited & locked query for block %p", + (uchar*) this - + ALIGN_SIZE(sizeof(Query_cache_block)))); + DBUG_VOID_RETURN; +} + + +void Query_cache_query::unlock_n_destroy() +{ + DBUG_ENTER("Query_cache_query::unlock_n_destroy"); + DBUG_PRINT("qcache", ("destroyed & unlocked query for block %p", + (uchar*) this - + ALIGN_SIZE(sizeof(Query_cache_block)))); + /* + The following call is not needed on system where one can destroy an + active semaphore + */ + this->unlock_writing(); + mysql_rwlock_destroy(&lock); + DBUG_VOID_RETURN; +} + + +extern "C" +{ +uchar *query_cache_query_get_key(const uchar *record, size_t *length, + my_bool not_used) +{ + Query_cache_block *query_block = (Query_cache_block*) record; + *length = (query_block->used - query_block->headers_len() - + ALIGN_SIZE(sizeof(Query_cache_query))); + return (((uchar *) query_block->data()) + + ALIGN_SIZE(sizeof(Query_cache_query))); +} +} + +/***************************************************************************** + Functions to store things into the query cache +*****************************************************************************/ + +/* + Note on double-check locking (DCL) usage. + + Below, in query_cache_insert(), query_cache_abort() and + Query_cache::end_of_result() we use what is called double-check + locking (DCL) for Query_cache_tls::first_query_block. + I.e. we test it first without a lock, and, if positive, test again + under the lock. + + This means that if we see 'first_query_block == 0' without a + lock we will skip the operation. But this is safe here: when we + started to cache a query, we called Query_cache::store_query(), and + 'first_query_block' was set to non-zero in this thread (and the + thread always sees results of its memory operations, mutex or not). + If later we see 'first_query_block == 0' without locking a + mutex, that may only mean that some other thread have reset it by + invalidating the query. Skipping the operation in this case is the + right thing to do, as first_query_block won't get non-zero for + this query again. + + See also comments in Query_cache::store_query() and + Query_cache::send_result_to_client(). + + NOTE, however, that double-check locking is not applicable in + 'invalidate' functions, as we may erroneously skip invalidation, + because the thread doing invalidation may never see non-zero + 'first_query_block'. +*/ + + +/** + libmysql convenience wrapper to insert data into query cache. +*/ +void query_cache_insert(void *thd_arg, const char *packet, size_t length, + unsigned pkt_nr) +{ + THD *thd= (THD*) thd_arg; + + /* + Current_thd can be NULL when a new connection is immediately ended + due to "Too many connections". thd->store_globals() has not been + called at this time and hence set_current_thd(this) has not been + called for this thread. + */ + + if (unlikely(!thd)) + return; + + query_cache.insert(thd, &thd->query_cache_tls, + packet, (size_t)length, + pkt_nr); +} + + +/** + Insert the packet into the query cache. +*/ + +void +Query_cache::insert(THD *thd, Query_cache_tls *query_cache_tls, + const char *packet, size_t length, + unsigned pkt_nr) +{ + DBUG_ENTER("Query_cache::insert"); + + /* First we check if query cache is disable without doing a mutex lock */ + if (is_disabled() || query_cache_tls->first_query_block == NULL) + DBUG_VOID_RETURN; + + QC_DEBUG_SYNC("wait_in_query_cache_insert"); + + /* + Lock the cache with try_lock(). try_lock() will fail if + cache was disabled between the above test and lock. + */ + if (try_lock(thd, Query_cache::WAIT)) + DBUG_VOID_RETURN; + + Query_cache_block *query_block = query_cache_tls->first_query_block; + if (query_block == NULL) + { + /* + We lost the writer and the currently processed query has been + invalidated; there is nothing left to do. + */ + unlock(); + DBUG_VOID_RETURN; + } + BLOCK_LOCK_WR(query_block); + Query_cache_query *header= query_block->query(); + Query_cache_block *result= header->result(); + + DUMP(this); + DBUG_PRINT("qcache", ("insert packet %zu bytes long",length)); + + /* + On success, STRUCT_UNLOCK is done by append_result_data. Otherwise, we + still need structure_guard_mutex to free the query, and therefore unlock + it later in this function. + */ + if (!append_result_data(&result, length, (uchar*) packet, + query_block)) + { + DBUG_PRINT("warning", ("Can't append data")); + header->result(result); + DBUG_PRINT("qcache", ("free query %p", query_block)); + // The following call will remove the lock on query_block + query_cache.free_query(query_block); + query_cache.refused++; + // append_result_data no success => we need unlock + unlock(); + DBUG_VOID_RETURN; + } + + header->result(result); + header->last_pkt_nr= pkt_nr; + BLOCK_UNLOCK_WR(query_block); + DBUG_EXECUTE("check_querycache",check_integrity(0);); + + DBUG_VOID_RETURN; +} + + +void +Query_cache::abort(THD *thd, Query_cache_tls *query_cache_tls) +{ + DBUG_ENTER("query_cache_abort"); + + /* See the comment on double-check locking usage above. */ + if (is_disabled() || query_cache_tls->first_query_block == NULL) + DBUG_VOID_RETURN; + + if (try_lock(thd, Query_cache::WAIT)) + DBUG_VOID_RETURN; + + /* + While we were waiting another thread might have changed the status + of the writer. Make sure the writer still exists before continue. + */ + Query_cache_block *query_block= query_cache_tls->first_query_block; + if (query_block) + { + THD_STAGE_INFO(thd, stage_storing_result_in_query_cache); + DUMP(this); + BLOCK_LOCK_WR(query_block); + // The following call will remove the lock on query_block + free_query(query_block); + query_cache_tls->first_query_block= NULL; + DBUG_EXECUTE("check_querycache", check_integrity(1);); + } + + unlock(); + + DBUG_VOID_RETURN; +} + + +void Query_cache::end_of_result(THD *thd) +{ + Query_cache_block *query_block; + Query_cache_tls *query_cache_tls= &thd->query_cache_tls; + ulonglong limit_found_rows= thd->limit_found_rows; + DBUG_ENTER("Query_cache::end_of_result"); + + /* See the comment on double-check locking usage above. */ + if (query_cache_tls->first_query_block == NULL) + DBUG_VOID_RETURN; + + /* Ensure that only complete results are cached. */ + DBUG_ASSERT(thd->get_stmt_da()->is_eof()); + + if (thd->killed) + { + query_cache_abort(thd, &thd->query_cache_tls); + DBUG_VOID_RETURN; + } + +#ifdef EMBEDDED_LIBRARY + insert(thd, query_cache_tls, (char*)thd, + emb_count_querycache_size(thd), 0); +#endif + + if (try_lock(thd, Query_cache::WAIT)) + { + if (is_disabled()) + query_cache_tls->first_query_block= NULL; // do not try again with QC + DBUG_VOID_RETURN; + } + + query_block= query_cache_tls->first_query_block; + if (query_block) + { + /* + The writer is still present; finish last result block by chopping it to + suitable size if needed and setting block type. Since this is the last + block, the writer should be dropped. + */ + THD_STAGE_INFO(thd, stage_storing_result_in_query_cache); + DUMP(this); + BLOCK_LOCK_WR(query_block); + Query_cache_query *header= query_block->query(); + Query_cache_block *last_result_block; + size_t align_size; + size_t len; + + if (header->result() == 0) + { + DBUG_PRINT("error", ("End of data with no result blocks; " + "Query '%s' removed from cache.", header->query())); + /* + Extra safety: empty result should not happen in the normal call + to this function. In the release version that query should be ignored + and removed from QC. + */ + DBUG_ASSERT(0); + free_query(query_block); + unlock(); + DBUG_VOID_RETURN; + } + last_result_block= header->result()->prev; + align_size= ALIGN_SIZE(last_result_block->used); + len= MY_MAX(query_cache.min_allocation_unit, align_size); + if (last_result_block->length >= query_cache.min_allocation_unit + len) + query_cache.split_block(last_result_block,len); + + header->found_rows(limit_found_rows); + header->set_results_ready(); // signal for plugin + header->result()->type= Query_cache_block::RESULT; + + /* Drop the writer. */ + header->writer(0); + query_cache_tls->first_query_block= NULL; + BLOCK_UNLOCK_WR(query_block); + DBUG_EXECUTE("check_querycache", check_integrity(1);); + } + + unlock(); + DBUG_VOID_RETURN; +} + +void query_cache_invalidate_by_MyISAM_filename(const char *filename) +{ + query_cache.invalidate_by_MyISAM_filename(filename); + DBUG_EXECUTE("check_querycache",query_cache.check_integrity(0);); +} + + +/* + The following function forms part of the C plugin API +*/ +extern "C" +void mysql_query_cache_invalidate4(THD *thd, + const char *key, unsigned key_length, + int using_trx) +{ + query_cache.invalidate(thd, key, (uint32) key_length, (my_bool) using_trx); +} + + +/***************************************************************************** + Query_cache methods +*****************************************************************************/ + +Query_cache::Query_cache(size_t query_cache_limit_arg, + size_t min_allocation_unit_arg, + size_t min_result_data_size_arg, + uint def_query_hash_size_arg, + uint def_table_hash_size_arg) + :query_cache_size(0), + query_cache_limit(query_cache_limit_arg), + queries_in_cache(0), hits(0), inserts(0), refused(0), + total_blocks(0), lowmem_prunes(0), + m_cache_status(OK), + min_allocation_unit(ALIGN_SIZE(min_allocation_unit_arg)), + min_result_data_size(ALIGN_SIZE(min_result_data_size_arg)), + def_query_hash_size(ALIGN_SIZE(def_query_hash_size_arg)), + def_table_hash_size(ALIGN_SIZE(def_table_hash_size_arg)), + initialized(0) +{ + size_t min_needed= (ALIGN_SIZE(sizeof(Query_cache_block)) + + ALIGN_SIZE(sizeof(Query_cache_block_table)) + + ALIGN_SIZE(sizeof(Query_cache_query)) + 3); + set_if_bigger(min_allocation_unit,min_needed); + this->min_allocation_unit= ALIGN_SIZE(min_allocation_unit); + set_if_bigger(this->min_result_data_size,min_allocation_unit); +} + + +size_t Query_cache::resize(size_t query_cache_size_arg) +{ + size_t new_query_cache_size; + DBUG_ENTER("Query_cache::resize"); + DBUG_PRINT("qcache", ("from %zu to %zu",query_cache_size, + query_cache_size_arg)); + DBUG_ASSERT(initialized); + + lock_and_suspend(); + + /* + Wait for all readers and writers to exit. When the list of all queries + is iterated over with a block level lock, we are done. + */ + Query_cache_block *block= queries_blocks; + if (block) + { + do + { + BLOCK_LOCK_WR(block); + Query_cache_query *query= block->query(); + if (query->writer()) + { + /* + Drop the writer; this will cancel any attempts to store + the processed statement associated with this writer. + */ + query->writer()->first_query_block= NULL; + query->writer(0); + refused++; + } + query->unlock_n_destroy(); + block= block->next; + } while (block != queries_blocks); + queries_blocks= NULL; // avoid second destroying by free_cache + } + free_cache(); + + query_cache_size= query_cache_size_arg; + new_query_cache_size= init_cache(); + + /* + m_cache_status is internal query cache switch so switching it on/off + will not be reflected on global_system_variables.query_cache_type + */ + if (new_query_cache_size && global_system_variables.query_cache_type != 0) + { + DBUG_EXECUTE("check_querycache",check_integrity(1);); + m_cache_status= OK; // size > 0 => enable cache + } + else + m_cache_status= DISABLED; // size 0 means the cache disabled + + unlock(); + DBUG_RETURN(new_query_cache_size); +} + + +size_t Query_cache::set_min_res_unit(size_t size) +{ + DBUG_ASSERT(size % 8 == 0); + if (size < min_allocation_unit) + size= ALIGN_SIZE(min_allocation_unit); + return (min_result_data_size= size); +} + + +void Query_cache::store_query(THD *thd, TABLE_LIST *tables_used) +{ + TABLE_COUNTER_TYPE local_tables; + size_t tot_length; + const char *query; + size_t query_length; + uint8 tables_type; + DBUG_ENTER("Query_cache::store_query"); + /* + Testing 'query_cache_size' without a lock here is safe: the thing + we may loose is that the query won't be cached, but we save on + mutex locking in the case when query cache is disabled or the + query is uncachable. + + See also a note on double-check locking usage above. + */ + if (!thd->query_cache_is_applicable || query_cache_size == 0) + { + DBUG_PRINT("qcache", ("Query cache not ready")); + DBUG_VOID_RETURN; + } + if (thd->lex->sql_command != SQLCOM_SELECT) + { + DBUG_PRINT("qcache", ("Ignoring not SELECT command")); + DBUG_VOID_RETURN; + } + + /* + Do not store queries while tracking transaction state. + The tracker already flags queries that actually have + transaction tracker items, but this will make behavior + more straight forward. + */ +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info != TX_TRACK_NONE) + { + DBUG_PRINT("qcache", ("Do not work with transaction tracking")); + DBUG_VOID_RETURN; + } +#endif //EMBEDDED_LIBRARY + + + /* The following assert fails if we haven't called send_result_to_client */ + DBUG_ASSERT(thd->base_query.is_alloced() || + thd->base_query.ptr() == thd->query()); + + tables_type= 0; + if ((local_tables= is_cacheable(thd, thd->lex, tables_used, + &tables_type))) + { + NET *net= &thd->net; + Query_cache_query_flags flags; + // fill all gaps between fields with 0 to get repeatable key + bzero(&flags, QUERY_CACHE_FLAGS_SIZE); + flags.client_long_flag= MY_TEST(thd->client_capabilities & CLIENT_LONG_FLAG); + flags.client_protocol_41= MY_TEST(thd->client_capabilities & + CLIENT_PROTOCOL_41); + flags.client_extended_metadata= MY_TEST(thd->client_capabilities & + MARIADB_CLIENT_EXTENDED_METADATA); + flags.client_depr_eof= MY_TEST(thd->client_capabilities & + CLIENT_DEPRECATE_EOF); + /* + Protocol influences result format, so statement results in the binary + protocol (COM_EXECUTE) cannot be served to statements asking for results + in the text protocol (COM_QUERY) and vice-versa. + */ + flags.protocol_type= (unsigned int) thd->protocol->type(); + /* PROTOCOL_LOCAL results are not cached. */ + DBUG_ASSERT(flags.protocol_type != (unsigned int) Protocol::PROTOCOL_LOCAL); + flags.more_results_exists= MY_TEST(thd->server_status & + SERVER_MORE_RESULTS_EXISTS); + flags.in_trans= thd->in_active_multi_stmt_transaction(); + flags.autocommit= MY_TEST(thd->server_status & SERVER_STATUS_AUTOCOMMIT); + flags.pkt_nr= net->pkt_nr; + flags.character_set_client_num= + thd->variables.character_set_client->number; + flags.character_set_results_num= + (thd->variables.character_set_results ? + thd->variables.character_set_results->number : + UINT_MAX); + flags.collation_connection_num= + thd->variables.collation_connection->number; + flags.limit= thd->variables.select_limit; + flags.time_zone= thd->variables.time_zone; + flags.sql_mode= thd->variables.sql_mode; + flags.max_sort_length= thd->variables.max_sort_length; + flags.lc_time_names= thd->variables.lc_time_names; + flags.group_concat_max_len= thd->variables.group_concat_max_len; + flags.div_precision_increment= thd->variables.div_precincrement; + flags.default_week_format= thd->variables.default_week_format; + DBUG_PRINT("qcache", ("\ +long %d, 4.1: %d, ex metadata: %d, eof: %d, bin_proto: %d, more results %d, pkt_nr: %d, \ +CS client: %u, CS result: %u, CS conn: %u, limit: %llu, TZ: %p, \ +sql mode: 0x%llx, sort len: %llu, concat len: %u, div_precision: %zu, \ +def_week_frmt: %zu, in_trans: %d, autocommit: %d", + (int)flags.client_long_flag, + (int)flags.client_protocol_41, + (int)flags.client_extended_metadata, + (int)flags.client_depr_eof, + (int)flags.protocol_type, + (int)flags.more_results_exists, + flags.pkt_nr, + flags.character_set_client_num, + flags.character_set_results_num, + flags.collation_connection_num, + (ulonglong)flags.limit, + flags.time_zone, + flags.sql_mode, + flags.max_sort_length, + flags.group_concat_max_len, + flags.div_precision_increment, + flags.default_week_format, + (int)flags.in_trans, + (int)flags.autocommit)); + + /* + A table- or a full flush operation can potentially take a long time to + finish. We choose not to wait for them and skip caching statements + instead. + + In case the wait time can't be determined there is an upper limit which + causes try_lock() to abort with a time out. + + The 'TIMEOUT' parameter indicate that the lock is allowed to timeout + + */ + if (try_lock(thd, Query_cache::TIMEOUT)) + DBUG_VOID_RETURN; + if (query_cache_size == 0) + { + unlock(); + DBUG_VOID_RETURN; + } + DUMP(this); + + if (ask_handler_allowance(thd, tables_used)) + { + refused++; + unlock(); + DBUG_VOID_RETURN; + } + + query= thd->base_query.ptr(); + query_length= thd->base_query.length(); + + /* Key is query + database + flag */ + if (thd->db.length) + { + memcpy((char*) (query + query_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE), + thd->db.str, thd->db.length); + DBUG_PRINT("qcache", ("database: %s length: %u", + thd->db.str, (unsigned) thd->db.length)); + } + else + { + DBUG_PRINT("qcache", ("No active database")); + } + tot_length= (query_length + thd->db.length + 1 + + QUERY_CACHE_DB_LENGTH_SIZE + QUERY_CACHE_FLAGS_SIZE); + /* + We should only copy structure (don't use it location directly) + because of alignment issue + */ + memcpy((void*) (query + (tot_length - QUERY_CACHE_FLAGS_SIZE)), + &flags, QUERY_CACHE_FLAGS_SIZE); + + /* Check if another thread is processing the same query? */ + Query_cache_block *competitor = (Query_cache_block *) + my_hash_search(&queries, (uchar*) query, tot_length); + DBUG_PRINT("qcache", ("competitor %p", competitor)); + if (competitor == 0) + { + /* Query is not in cache and no one is working with it; Store it */ + Query_cache_block *query_block; + query_block= write_block_data(tot_length, (uchar*) query, + ALIGN_SIZE(sizeof(Query_cache_query)), + Query_cache_block::QUERY, local_tables); + if (query_block != 0) + { + DBUG_PRINT("qcache", ("query block %p allocated, %zu", + query_block, query_block->used)); + + Query_cache_query *header = query_block->query(); + header->init_n_lock(); + if (my_hash_insert(&queries, (uchar*) query_block)) + { + refused++; + DBUG_PRINT("qcache", ("insertion in query hash")); + header->unlock_n_destroy(); + free_memory_block(query_block); + unlock(); + goto end; + } + if (!register_all_tables(thd, query_block, tables_used, local_tables)) + { + refused++; + DBUG_PRINT("warning", ("tables list including failed")); + my_hash_delete(&queries, (uchar *) query_block); + header->unlock_n_destroy(); + free_memory_block(query_block); + unlock(); + goto end; + } + double_linked_list_simple_include(query_block, &queries_blocks); + inserts++; + queries_in_cache++; + thd->query_cache_tls.first_query_block= query_block; + header->writer(&thd->query_cache_tls); + header->tables_type(tables_type); + + unlock(); + + DEBUG_SYNC(thd, "wait_in_query_cache_store_query"); + + // init_n_lock make query block locked + BLOCK_UNLOCK_WR(query_block); + } + else + { + // We have not enough memory to store query => do nothing + refused++; + unlock(); + DBUG_PRINT("warning", ("Can't allocate query")); + } + } + else + { + // Another thread is processing the same query => do nothing + refused++; + unlock(); + DBUG_PRINT("qcache", ("Another thread process same query")); + } + } + else + statistic_increment(refused, &structure_guard_mutex); + +end: + DBUG_VOID_RETURN; +} + + +#ifndef EMBEDDED_LIBRARY +/** + Send a single memory block from the query cache. + + Respects the client/server protocol limits for the + size of the network packet, and splits a large block + in pieces to ensure that individual piece doesn't exceed + the maximal allowed size of the network packet (16M). + + @param[in] net NET handler + @param[in] packet packet to send + @param[in] len packet length + + @return Operation status + @retval FALSE On success + @retval TRUE On error +*/ +static bool +send_data_in_chunks(NET *net, const uchar *packet, size_t len) +{ + /* + On the client we may require more memory than max_allowed_packet + to keep, both, the truncated last logical packet, and the + compressed next packet. This never (or in practice never) + happens without compression, since without compression it's very + unlikely that a) a truncated logical packet would remain on the + client when it's time to read the next packet b) a subsequent + logical packet that is being read would be so large that + size-of-new-packet + size-of-old-packet-tail > + max_allowed_packet. To remedy this issue, we send data in 1MB + sized packets, that's below the current client default of 16MB + for max_allowed_packet, but large enough to ensure there is no + unnecessary overhead from too many syscalls per result set. + */ + static const size_t MAX_CHUNK_LENGTH= 1024*1024; + + while (len > MAX_CHUNK_LENGTH) + { + if (net_real_write(net, packet, MAX_CHUNK_LENGTH)) + return TRUE; + packet+= MAX_CHUNK_LENGTH; + len-= MAX_CHUNK_LENGTH; + } + if (len && net_real_write(net, packet, len)) + return TRUE; + + return FALSE; +} +#endif + + +/** + Build a normalized table name suitable for query cache engine callback + + This consist of normalized directory '/' normalized_file_name + followed by suffix. + Suffix is needed for partitioned tables. +*/ + +size_t build_normalized_name(char *buff, size_t bufflen, + const char *db, size_t db_len, + const char *table_name, size_t table_len, + size_t suffix_len) +{ + uint errors; + size_t length; + char *pos= buff, *end= buff+bufflen; + DBUG_ENTER("build_normalized_name"); + + (*pos++)= FN_LIBCHAR; + length= strconvert(system_charset_info, db, db_len, + &my_charset_filename, pos, bufflen - 3, + &errors); + pos+= length; + (*pos++)= FN_LIBCHAR; + length= strconvert(system_charset_info, table_name, table_len, + &my_charset_filename, pos, (uint) (end - pos), + &errors); + pos+= length; + if (pos + suffix_len < end) + pos= strmake(pos, table_name + table_len, suffix_len); + + DBUG_RETURN((size_t) (pos - buff)); +} + + +/* + Check if the query is in the cache. If it was cached, send it + to the user. + + @param thd Pointer to the thread handler + @param org_sql A pointer to the sql statement * + @param query_length Length of the statement in characters + + @return status code + @retval 0 Query was not cached. + @retval 1 The query was cached and user was sent the result. + @retval -1 The query was cached but we didn't have rights to use it. + + In case of -1, no error is sent to the client. + + *) The buffer must be allocated memory of size: + tot_length= query_length + thd->db.length + 1 + QUERY_CACHE_FLAGS_SIZE; +*/ + +int +Query_cache::send_result_to_client(THD *thd, char *org_sql, uint query_length) +{ + ulonglong engine_data; + Query_cache_query *query; +#ifndef EMBEDDED_LIBRARY + Query_cache_block *first_result_block; +#endif + Query_cache_block *result_block; + Query_cache_block_table *block_table, *block_table_end; + size_t tot_length; + Query_cache_query_flags flags; + const char *sql, *sql_end, *found_brace= 0; + DBUG_ENTER("Query_cache::send_result_to_client"); + + /* + Testing without a lock here is safe: the thing + we may loose is that the query won't be served from cache, but we + save on mutex locking in the case when query cache is disabled. + + See also a note on double-check locking usage above. + */ + if (is_disabled() || thd->locked_tables_mode || + thd->variables.query_cache_type == 0) + goto err; + + /* + The following can only happen for prepared statements that was found + during parsing or later that the query was not cacheable. + */ + if (!thd->lex->safe_to_cache_query) + { + DBUG_PRINT("qcache", ("SELECT is non-cacheable")); + goto err; + } + + /* + Don't allow serving from Query_cache while tracking transaction + state. This is a safeguard in case an otherwise matching query + was added to the cache before tracking was turned on. + */ +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info != TX_TRACK_NONE) + { + DBUG_PRINT("qcache", ("Do not work with transaction tracking")); + goto err; + } +#endif //EMBEDDED_LIBRARY + + + thd->query_cache_is_applicable= 1; + sql= org_sql; sql_end= sql + query_length; + + /* + Skip all comments at start of query. The following tests is false for + all normal queries. + */ + if (!my_isalpha(system_charset_info, *sql)) + { + while (sql < sql_end) + { + char current= *sql; + switch (current) { + case '/': + if (sql[1] != '*') + break; + sql+= 2; // Skip '/*' + if (*sql == '!') + { + /* + Found / *!number comment; Skip number to see if sql + starts with 'select' + */ + sql++; + while (my_isdigit(system_charset_info, *sql)) + sql++; + } + else + { + while (sql++ < sql_end) + { + if (sql[-1] == '*' && *sql == '/') + { + sql++; + break; + } + } + } + continue; + case '-': + if (sql[1] != '-' || !is_white_space(sql[2])) // Not a comment + break; + sql++; // Skip first '-' + /* Fall through */ + case '#': + while (++sql < sql_end) + { + if (*sql == '\n') + { + sql++; // Skip '\n' + break; + } + } + /* Continue with analyzing current symbol */ + continue; + case '\r': + case '\n': + case '\t': + case ' ': + sql++; + continue; + case '(': // To handle (select a from t1) union (select a from t1); + if (!found_brace) + { + found_brace= sql; + sql++; + continue; + } + /* fall through */ + default: + break; + } + /* We only come here when we found the first word of the sql */ + break; + } + } + if ((my_toupper(system_charset_info, sql[0]) != 'S' || + my_toupper(system_charset_info, sql[1]) != 'E' || + my_toupper(system_charset_info, sql[2]) != 'L') && + (my_toupper(system_charset_info, sql[0]) != 'W' || + my_toupper(system_charset_info, sql[1]) != 'I' || + my_toupper(system_charset_info, sql[2]) != 'T')) + { + DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached")); + goto err; + } + + if ((sql_end - sql) > 20 && has_no_cache_directive(sql+6)) + { + /* + We do not increase 'refused' statistics here since it will be done + later when the query is parsed. + */ + DBUG_PRINT("qcache", ("The statement has a SQL_NO_CACHE directive")); + goto err; + } + { + /* + We have allocated buffer space (in alloc_query) to hold the + SQL statement(s) + the current database name + a flags struct. + If the database name has changed during execution, which might + happen if there are multiple statements, we need to make + sure the new current database has a name with the same length + as the previous one. + */ + size_t db_len= uint2korr(sql_end+1); + if (thd->db.length != db_len) + { + /* + We should probably reallocate the buffer in this case, + but for now we just leave it uncached + */ + + DBUG_PRINT("qcache", + ("Current database has changed since start of query")); + goto err; + } + } + /* + Try to obtain an exclusive lock on the query cache. If the cache is + disabled or if a full cache flush is in progress, the attempt to + get the lock is aborted. + + The TIMEOUT parameter indicate that the lock is allowed to timeout. + */ + if (try_lock(thd, Query_cache::TIMEOUT)) + goto err; + + if (query_cache_size == 0) + { + thd->query_cache_is_applicable= 0; // Query can't be cached + goto err_unlock; + } + + Query_cache_block *query_block; + if (thd->variables.query_cache_strip_comments) + { + if (found_brace) + sql= found_brace; + make_base_query(&thd->base_query, sql, (size_t) (sql_end - sql), + thd->db.length + 1 + QUERY_CACHE_DB_LENGTH_SIZE + + QUERY_CACHE_FLAGS_SIZE); + sql= thd->base_query.ptr(); + query_length= thd->base_query.length(); + } + else + { + sql= org_sql; + thd->base_query.set(sql, query_length, system_charset_info); + } + + tot_length= (query_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE + + thd->db.length + QUERY_CACHE_FLAGS_SIZE); + + if (thd->db.length) + { + memcpy((uchar*) sql + query_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE, + thd->db.str, thd->db.length); + DBUG_PRINT("qcache", ("database: '%s' length: %u", + thd->db.str, (uint) thd->db.length)); + } + else + { + DBUG_PRINT("qcache", ("No active database")); + } + + THD_STAGE_INFO(thd, stage_checking_query_cache_for_query); + + // fill all gaps between fields with 0 to get repeatable key + bzero(&flags, QUERY_CACHE_FLAGS_SIZE); + flags.client_long_flag= MY_TEST(thd->client_capabilities & CLIENT_LONG_FLAG); + flags.client_protocol_41= MY_TEST(thd->client_capabilities & + CLIENT_PROTOCOL_41); + flags.client_extended_metadata= MY_TEST(thd->client_capabilities & + MARIADB_CLIENT_EXTENDED_METADATA); + flags.client_depr_eof= MY_TEST(thd->client_capabilities & + CLIENT_DEPRECATE_EOF); + flags.protocol_type= (unsigned int) thd->protocol->type(); + flags.more_results_exists= MY_TEST(thd->server_status & + SERVER_MORE_RESULTS_EXISTS); + flags.in_trans= thd->in_active_multi_stmt_transaction(); + flags.autocommit= MY_TEST(thd->server_status & SERVER_STATUS_AUTOCOMMIT); + flags.pkt_nr= thd->net.pkt_nr; + flags.character_set_client_num= thd->variables.character_set_client->number; + flags.character_set_results_num= + (thd->variables.character_set_results ? + thd->variables.character_set_results->number : + UINT_MAX); + flags.collation_connection_num= thd->variables.collation_connection->number; + flags.limit= thd->variables.select_limit; + flags.time_zone= thd->variables.time_zone; + flags.sql_mode= thd->variables.sql_mode; + flags.max_sort_length= thd->variables.max_sort_length; + flags.group_concat_max_len= thd->variables.group_concat_max_len; + flags.div_precision_increment= thd->variables.div_precincrement; + flags.default_week_format= thd->variables.default_week_format; + flags.lc_time_names= thd->variables.lc_time_names; + DBUG_PRINT("qcache", ("\ +long %d, 4.1: %d, ex metadata: %d, eof: %d, bin_proto: %d, more results %d, pkt_nr: %d, \ +CS client: %u, CS result: %u, CS conn: %u, limit: %llu, TZ: %p, \ +sql mode: 0x%llx, sort len: %llu, concat len: %u, div_precision: %zu, \ +def_week_frmt: %zu, in_trans: %d, autocommit: %d", + (int)flags.client_long_flag, + (int)flags.client_protocol_41, + (int)flags.client_extended_metadata, + (int)flags.client_depr_eof, + (int)flags.protocol_type, + (int)flags.more_results_exists, + flags.pkt_nr, + flags.character_set_client_num, + flags.character_set_results_num, + flags.collation_connection_num, + (ulonglong) flags.limit, + flags.time_zone, + flags.sql_mode, + flags.max_sort_length, + flags.group_concat_max_len, + flags.div_precision_increment, + flags.default_week_format, + (int)flags.in_trans, + (int)flags.autocommit)); + memcpy((uchar *)(sql + (tot_length - QUERY_CACHE_FLAGS_SIZE)), + (uchar*) &flags, QUERY_CACHE_FLAGS_SIZE); + +#ifdef WITH_WSREP + bool once_more; + once_more= true; +lookup: +#endif /* WITH_WSREP */ + + query_block = (Query_cache_block *) my_hash_search(&queries, (uchar*) sql, + tot_length); + /* Quick abort on unlocked data */ + if (query_block == 0 || + query_block->query()->result() == 0 || + query_block->query()->result()->type != Query_cache_block::RESULT) + { + DBUG_PRINT("qcache", ("No query in query hash or no results")); + goto err_unlock; + } + DBUG_PRINT("qcache", ("Query in query hash %p",query_block)); + +#ifdef WITH_WSREP + if (once_more && WSREP_CLIENT(thd) && wsrep_must_sync_wait(thd)) + { + unlock(); + if (wsrep_sync_wait(thd)) + goto err; + if (try_lock(thd, Query_cache::TIMEOUT)) + goto err; + once_more= false; + goto lookup; + } +#endif /* WITH_WSREP */ + + /* Now lock and test that nothing changed while blocks was unlocked */ + BLOCK_LOCK_RD(query_block); + + query = query_block->query(); + result_block= query->result(); +#ifndef EMBEDDED_LIBRARY + first_result_block= result_block; +#endif + + if (result_block == 0 || result_block->type != Query_cache_block::RESULT) + { + /* The query is probably yet processed */ + DBUG_PRINT("qcache", ("query found, but no data or data incomplete")); + BLOCK_UNLOCK_RD(query_block); + goto err_unlock; + } + DBUG_PRINT("qcache", ("Query have result %p", query)); + + if (thd->in_multi_stmt_transaction_mode() && + (query->tables_type() & HA_CACHE_TBL_TRANSACT)) + { + DBUG_PRINT("qcache", + ("we are in transaction and have transaction tables in query")); + BLOCK_UNLOCK_RD(query_block); + goto err_unlock; + } + + // Check access; + THD_STAGE_INFO(thd, stage_checking_privileges_on_cached_query); + block_table= query_block->table(0); + block_table_end= block_table+query_block->n_tables; + for (; block_table != block_table_end; block_table++) + { + TABLE_LIST table_list; + TMP_TABLE_SHARE *tmptable; + Query_cache_table *table = block_table->parent; + + /* + Check that we do not have temporary tables with same names as that of + base tables from this query. If we have such tables, we will not send + data from query cache, because temporary tables hide real tables by which + query in query cache was made. + */ + if ((tmptable= + thd->find_tmp_table_share_w_base_key((char *) table->data(), + table->key_length()))) + { + DBUG_PRINT("qcache", + ("Temporary table detected: '%s.%s'", + tmptable->db.str, tmptable->table_name.str)); + unlock(); + /* + We should not store result of this query because it contain + temporary tables => assign following variable to make check + faster. + */ + thd->query_cache_is_applicable= 0; // Query can't be cached + thd->lex->safe_to_cache_query= 0; // For prepared statements + BLOCK_UNLOCK_RD(query_block); + DBUG_RETURN(-1); + } + + bzero((char*) &table_list,sizeof(table_list)); + table_list.db.str= table->db(); + table_list.db.length= strlen(table_list.db.str); + table_list.alias.str= table_list.table_name.str= table->table(); + table_list.alias.length= table_list.table_name.length= strlen(table->table()); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (check_table_access(thd,SELECT_ACL,&table_list, FALSE, 1,TRUE)) + { + DBUG_PRINT("qcache", + ("probably no SELECT access to %s.%s => return to normal processing", + table_list.db.str, table_list.alias.str)); + unlock(); + thd->query_cache_is_applicable= 0; // Query can't be cached + thd->lex->safe_to_cache_query= 0; // For prepared statements + BLOCK_UNLOCK_RD(query_block); + DBUG_RETURN(-1); // Privilege error + } + if (table_list.grant.want_privilege) + { + DBUG_PRINT("qcache", ("Need to check column privileges for %s.%s", + table_list.db.str, table_list.alias.str)); + BLOCK_UNLOCK_RD(query_block); + thd->query_cache_is_applicable= 0; // Query can't be cached + thd->lex->safe_to_cache_query= 0; // For prepared statements + goto err_unlock; // Parse query + } +#endif /*!NO_EMBEDDED_ACCESS_CHECKS*/ + engine_data= table->engine_data(); + if (table->callback()) + { + char qcache_se_key_name[FN_REFLEN + 10]; + size_t qcache_se_key_len, db_length= strlen(table->db()); + engine_data= table->engine_data(); + + qcache_se_key_len= build_normalized_name(qcache_se_key_name, + sizeof(qcache_se_key_name), + table->db(), + db_length, + table->table(), + table->key_length() - + db_length - 2 - + table->suffix_length(), + table->suffix_length()); + + if (!(*table->callback())(thd, qcache_se_key_name, + (uint)qcache_se_key_len, &engine_data)) + { + DBUG_PRINT("qcache", ("Handler does not allow caching for %.*s", + (int)qcache_se_key_len, qcache_se_key_name)); + BLOCK_UNLOCK_RD(query_block); + if (engine_data != table->engine_data()) + { + DBUG_PRINT("qcache", + ("Handler require invalidation queries of %.*s %llu-%llu", + (int)qcache_se_key_len, qcache_se_key_name, + engine_data, table->engine_data())); + invalidate_table_internal((uchar *) table->db(), + table->key_length()); + } + else + { + /* + As this can change from call to call, don't reset set + thd->lex->safe_to_cache_query + */ + thd->query_cache_is_applicable= 0; // Query can't be cached + } + /* + End the statement transaction potentially started by engine. + Currently our engines do not request rollback from callbacks. + If this is going to change code needs to be reworked. + */ + DBUG_ASSERT(! thd->transaction_rollback_request); + trans_rollback_stmt(thd); + goto err_unlock; // Parse query + } + } + else + DBUG_PRINT("qcache", ("handler allow caching %s,%s", + table_list.db.str, table_list.alias.str)); + } + move_to_query_list_end(query_block); + hits++; + query->increment_hits(); + unlock(); + + /* + Send cached result to client + */ +#ifndef EMBEDDED_LIBRARY + THD_STAGE_INFO(thd, stage_sending_cached_result_to_client); + do + { + DBUG_PRINT("qcache", ("Results (len: %zu used: %zu headers: %u)", + result_block->length, result_block->used, + (uint) (result_block->headers_len()+ + ALIGN_SIZE(sizeof(Query_cache_result))))); + + Query_cache_result *result = result_block->result(); + if (send_data_in_chunks(&thd->net, result->data(), + result_block->used - + result_block->headers_len() - + ALIGN_SIZE(sizeof(Query_cache_result)))) + break; // Client aborted + result_block = result_block->next; + thd->net.pkt_nr= query->last_pkt_nr; // Keep packet number updated + } while (result_block != first_result_block); +#else + { + Querycache_stream qs(result_block, result_block->headers_len() + + ALIGN_SIZE(sizeof(Query_cache_result))); + emb_load_querycache_result(thd, &qs); + } +#endif /*!EMBEDDED_LIBRARY*/ + + thd->set_sent_row_count(thd->limit_found_rows = query->found_rows()); + thd->status_var.last_query_cost= 0.0; + thd->query_plan_flags= (thd->query_plan_flags & ~QPLAN_QC_NO) | QPLAN_QC; + if (!thd->get_sent_row_count()) + status_var_increment(thd->status_var.empty_queries); + else + status_var_add(thd->status_var.rows_sent, thd->get_sent_row_count()); + + /* + End the statement transaction potentially started by an + engine callback. We ignore the return value for now, + since as long as EOF packet is part of the query cache + response, we can't handle it anyway. + */ + (void) trans_commit_stmt(thd); + thd->get_stmt_da()->disable_status(); + + BLOCK_UNLOCK_RD(query_block); + MYSQL_QUERY_CACHE_HIT(thd->query(), thd->limit_found_rows); + DBUG_RETURN(1); // Result sent to client + +err_unlock: + unlock(); + MYSQL_QUERY_CACHE_MISS(thd->query()); + /* + query_plan_flags doesn't have to be changed here as it contains + QPLAN_QC_NO by default + */ + DBUG_RETURN(0); // Query was not cached + +err: + thd->query_cache_is_applicable= 0; // Query can't be cached + DBUG_RETURN(0); // Query was not cached +} + + +/* + Remove all cached queries that uses any of the tables in the list +*/ + +void Query_cache::invalidate(THD *thd, TABLE_LIST *tables_used, + my_bool using_transactions) +{ + DBUG_ENTER("Query_cache::invalidate (table list)"); + if (is_disabled()) + DBUG_VOID_RETURN; + + using_transactions= using_transactions && thd->in_multi_stmt_transaction_mode(); + for (; tables_used; tables_used= tables_used->next_local) + { + DBUG_ASSERT(!using_transactions || tables_used->table!=0); + if (tables_used->derived) + continue; + if (using_transactions && + (tables_used->table->file->table_cache_type() == + HA_CACHE_TBL_TRANSACT)) + /* + tables_used->table can't be 0 in transaction. + Only 'drop' invalidate not opened table, but 'drop' + force transaction finish. + */ + thd->add_changed_table(tables_used->table); + else + invalidate_table(thd, tables_used); + } + + DEBUG_SYNC(thd, "wait_after_query_cache_invalidate"); + + DBUG_VOID_RETURN; +} + +void Query_cache::invalidate(THD *thd, CHANGED_TABLE_LIST *tables_used) +{ + DBUG_ENTER("Query_cache::invalidate (changed table list)"); + if (is_disabled()) + DBUG_VOID_RETURN; + + for (; tables_used; tables_used= tables_used->next) + { + THD_STAGE_INFO(thd, stage_invalidating_query_cache_entries_table_list); + invalidate_table(thd, (uchar*) tables_used->key, tables_used->key_length); + DBUG_PRINT("qcache", ("db: %s table: %s", tables_used->key, + tables_used->key+ + strlen(tables_used->key)+1)); + } + DBUG_VOID_RETURN; +} + + +/* + Invalidate locked for write + + SYNOPSIS + Query_cache::invalidate_locked_for_write() + tables_used - table list + + NOTE + can be used only for opened tables +*/ +void Query_cache::invalidate_locked_for_write(THD *thd, + TABLE_LIST *tables_used) +{ + DBUG_ENTER("Query_cache::invalidate_locked_for_write"); + if (is_disabled()) + DBUG_VOID_RETURN; + + for (; tables_used; tables_used= tables_used->next_local) + { + THD_STAGE_INFO(thd, stage_invalidating_query_cache_entries_table); + if (tables_used->lock_type >= TL_FIRST_WRITE && + tables_used->table) + { + invalidate_table(thd, tables_used->table); + } + } + DBUG_VOID_RETURN; +} + +/* + Remove all cached queries that uses the given table +*/ + +void Query_cache::invalidate(THD *thd, TABLE *table, + my_bool using_transactions) +{ + DBUG_ENTER("Query_cache::invalidate (table)"); + if (is_disabled()) + DBUG_VOID_RETURN; + + using_transactions= using_transactions && thd->in_multi_stmt_transaction_mode(); + if (using_transactions && + (table->file->table_cache_type() == HA_CACHE_TBL_TRANSACT)) + thd->add_changed_table(table); + else + invalidate_table(thd, table); + + + DBUG_VOID_RETURN; +} + +void Query_cache::invalidate(THD *thd, const char *key, size_t key_length, + my_bool using_transactions) +{ + DBUG_ENTER("Query_cache::invalidate (key)"); + if (is_disabled()) + DBUG_VOID_RETURN; + + using_transactions= using_transactions && thd->in_multi_stmt_transaction_mode(); + if (using_transactions) // used for innodb => has_transactions() is TRUE + thd->add_changed_table(key, key_length); + else + invalidate_table(thd, (uchar*)key, key_length); + + DBUG_VOID_RETURN; +} + + +/** + Remove all cached queries that uses the given database. +*/ + +void Query_cache::invalidate(THD *thd, const char *db) +{ + DBUG_ENTER("Query_cache::invalidate (db)"); + if (is_disabled()) + DBUG_VOID_RETURN; + + DBUG_SLOW_ASSERT(ok_for_lower_case_names(db)); + + bool restart= FALSE; + /* + Lock the query cache and queue all invalidation attempts to avoid + the risk of a race between invalidation, cache inserts and flushes. + */ + lock(thd); + + if (query_cache_size > 0) + { + if (tables_blocks) + { + Query_cache_block *table_block = tables_blocks; + do { + restart= FALSE; + do + { + Query_cache_block *next= table_block->next; + Query_cache_table *table = table_block->table(); + if (strcmp(table->db(),db) == 0) + { + Query_cache_block_table *list_root= table_block->table(0); + invalidate_query_block_list(list_root); + } + + table_block= next; + + /* + If our root node to used tables became null then the last element + in the table list was removed when a query was invalidated; + Terminate the search. + */ + if (tables_blocks == 0) + { + table_block= tables_blocks; + } + /* + If the iterated list has changed underlying structure; + we need to restart the search. + */ + else if (table_block->type == Query_cache_block::FREE) + { + restart= TRUE; + table_block= tables_blocks; + } + /* + The used tables are linked in a circular list; + loop until we return to the beginning. + */ + } while (table_block != tables_blocks); + /* + Invalidating a table will also mean that all cached queries using + this table also will be invalidated. This will in turn change the + list of tables associated with these queries and the linked list of + used table will be changed. Because of this we might need to restart + the search when a table has been invalidated. + */ + } while (restart); + } // end if( tables_blocks ) + } + unlock(); + + DBUG_VOID_RETURN; +} + + +void Query_cache::invalidate_by_MyISAM_filename(const char *filename) +{ + DBUG_ENTER("Query_cache::invalidate_by_MyISAM_filename"); + + if (is_disabled()) + DBUG_VOID_RETURN; + + /* Calculate the key outside the lock to make the lock shorter */ + char key[MAX_DBKEY_LENGTH]; + uint32 db_length; + uint key_length= filename_2_table_key(key, filename, &db_length); + THD *thd= current_thd; + invalidate_table(thd,(uchar *)key, key_length); + DBUG_VOID_RETURN; +} + + /* Remove all queries from cache */ + +void Query_cache::flush() +{ + DBUG_ENTER("Query_cache::flush"); + if (is_disabled()) + DBUG_VOID_RETURN; + + QC_DEBUG_SYNC("wait_in_query_cache_flush1"); + + lock_and_suspend(); + if (query_cache_size > 0) + { + DUMP(this); + flush_cache(); + DUMP(this); + } + + DBUG_EXECUTE("check_querycache",query_cache.check_integrity(1);); + unlock(); + DBUG_VOID_RETURN; +} + + +/** + Rearrange the memory blocks and join result in cache in 1 block (if + result length > join_limit) + + @param[in] join_limit If the minimum length of a result block to be joined. + @param[in] iteration_limit The maximum number of packing and joining + sequences. + +*/ + +void Query_cache::pack(THD *thd, size_t join_limit, uint iteration_limit) +{ + DBUG_ENTER("Query_cache::pack"); + + if (is_disabled()) + DBUG_VOID_RETURN; + + /* + If the entire qc is being invalidated we can bail out early + instead of waiting for the lock. + */ + if (try_lock(thd, Query_cache::WAIT)) + DBUG_VOID_RETURN; + + if (query_cache_size == 0) + { + unlock(); + DBUG_VOID_RETURN; + } + + uint i = 0; + do + { + pack_cache(); + } while ((++i < iteration_limit) && join_results(join_limit)); + + unlock(); + DBUG_VOID_RETURN; +} + + +void Query_cache::destroy() +{ + DBUG_ENTER("Query_cache::destroy"); + if (!initialized) + { + DBUG_PRINT("qcache", ("Query Cache not initialized")); + } + else + { + /* Underlying code expects the lock. */ + lock_and_suspend(); + free_cache(); + unlock(); + + mysql_cond_destroy(&COND_cache_status_changed); + mysql_mutex_destroy(&structure_guard_mutex); + initialized = 0; + DBUG_ASSERT(m_requests_in_progress == 0); + } + DBUG_VOID_RETURN; +} + + +void Query_cache::disable_query_cache(THD *thd) +{ + m_cache_status= DISABLE_REQUEST; + /* + If there is no requests in progress try to free buffer. + try_lock(TRY) will exit immediately if there is lock. + unlock() should free block. + */ + if (m_requests_in_progress == 0 && !try_lock(thd, TRY)) + unlock(); +} + + +/***************************************************************************** + init/destroy +*****************************************************************************/ + +void Query_cache::init() +{ + DBUG_ENTER("Query_cache::init"); + mysql_mutex_init(key_structure_guard_mutex, + &structure_guard_mutex, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_cache_status_changed, + &COND_cache_status_changed, NULL); + m_cache_lock_status= Query_cache::UNLOCKED; + m_cache_status= Query_cache::OK; + m_requests_in_progress= 0; + initialized = 1; + /* + Using state_map from latin1 should be fine in all cases: + 1. We do not support UCS2, UTF16, UTF32 as a client character set. + 2. The other character sets are compatible on the lower ASCII-range + 0x00-0x20, and have the following characters marked as spaces: + + 0x09 TAB + 0x0A LINE FEED + 0x0B VERTICAL TAB + 0x0C FORM FEED + 0x0D CARRIAGE RETUR + 0x20 SPACE + + Additionally, only some of the ASCII-compatible character sets + (including latin1) can have 0xA0 mapped to "NON-BREAK SPACE" + and thus marked as space. + That should not be a problem for those charsets that map 0xA0 + to something else: the parser will just return syntax error + if this character appears straight in the query + (i.e. not inside a string literal or comment). + */ + query_state_map= my_charset_latin1.state_map; + /* + If we explicitly turn off query cache from the command line query + cache will be disabled for the reminder of the server life + time. This is because we want to avoid locking the QC specific + mutex if query cache isn't going to be used. + */ + if (global_system_variables.query_cache_type == 0) + { + m_cache_status= DISABLE_REQUEST; + free_cache(); + m_cache_status= DISABLED; + } + DBUG_VOID_RETURN; +} + + +size_t Query_cache::init_cache() +{ + size_t mem_bin_count, num, step; + size_t mem_bin_size, prev_size, inc; + size_t max_mem_bin_size, approx_additional_data_size; + int align; + + DBUG_ENTER("Query_cache::init_cache"); + + approx_additional_data_size = (sizeof(Query_cache) + + sizeof(uchar*)*(def_query_hash_size+ + def_table_hash_size)); + if (query_cache_size < approx_additional_data_size) + goto err; + + query_cache_size-= approx_additional_data_size; + align= query_cache_size % ALIGN_SIZE(1); + if (align) + { + query_cache_size-= align; + approx_additional_data_size+= align; + } + + /* + Count memory bins number. + Check section 6. in start comment for the used algorithm. + */ + + max_mem_bin_size = query_cache_size >> QUERY_CACHE_MEM_BIN_FIRST_STEP_PWR2; + mem_bin_count = (uint) ((1 + QUERY_CACHE_MEM_BIN_PARTS_INC) * + QUERY_CACHE_MEM_BIN_PARTS_MUL); + mem_bin_num = 1; + mem_bin_steps = 1; + mem_bin_size = max_mem_bin_size >> QUERY_CACHE_MEM_BIN_STEP_PWR2; + prev_size = 0; + if (mem_bin_size <= min_allocation_unit) + { + DBUG_PRINT("qcache", ("too small query cache => query cache disabled")); + // TODO here (and above) should be warning in 4.1 + goto err; + } + while (mem_bin_size > min_allocation_unit) + { + mem_bin_num += mem_bin_count; + prev_size = mem_bin_size; + mem_bin_size >>= QUERY_CACHE_MEM_BIN_STEP_PWR2; + mem_bin_steps++; + mem_bin_count += QUERY_CACHE_MEM_BIN_PARTS_INC; + mem_bin_count = (uint) (mem_bin_count * QUERY_CACHE_MEM_BIN_PARTS_MUL); + + // Prevent too small bins spacing + if (mem_bin_count > (mem_bin_size >> QUERY_CACHE_MEM_BIN_SPC_LIM_PWR2)) + mem_bin_count= (mem_bin_size >> QUERY_CACHE_MEM_BIN_SPC_LIM_PWR2); + } + inc = (prev_size - mem_bin_size) / mem_bin_count; + mem_bin_num += (mem_bin_count - (min_allocation_unit - mem_bin_size)/inc); + mem_bin_steps++; + additional_data_size = ((mem_bin_num+1) * + ALIGN_SIZE(sizeof(Query_cache_memory_bin))+ + (mem_bin_steps * + ALIGN_SIZE(sizeof(Query_cache_memory_bin_step)))); + + if (query_cache_size < additional_data_size) + goto err; + query_cache_size -= additional_data_size; + + if (!(cache= (uchar *) + my_malloc_lock(query_cache_size+additional_data_size, MYF(0)))) + goto err; +#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP) + if (madvise(cache, query_cache_size+additional_data_size, MADV_DONTDUMP)) + { + DBUG_PRINT("warning", ("coudn't mark query cache memory as " DONTDUMP_STR ": %s", + strerror(errno))); + } +#endif + + DBUG_PRINT("qcache", ("cache length %zu, min unit %zu, %zu bins", + query_cache_size, min_allocation_unit, mem_bin_num)); + + steps = (Query_cache_memory_bin_step *) cache; + bins = ((Query_cache_memory_bin *) + (cache + mem_bin_steps * + ALIGN_SIZE(sizeof(Query_cache_memory_bin_step)))); + + first_block = (Query_cache_block *) (cache + additional_data_size); + first_block->init(query_cache_size); + total_blocks++; + first_block->pnext=first_block->pprev=first_block; + first_block->next=first_block->prev=first_block; + + /* Prepare bins */ + + bins[0].init(max_mem_bin_size); + steps[0].init(max_mem_bin_size,0,0); + mem_bin_count = (uint) ((1 + QUERY_CACHE_MEM_BIN_PARTS_INC) * + QUERY_CACHE_MEM_BIN_PARTS_MUL); + num= step= 1; + mem_bin_size = max_mem_bin_size >> QUERY_CACHE_MEM_BIN_STEP_PWR2; + while (mem_bin_size > min_allocation_unit) + { + size_t incr = (steps[step-1].size - mem_bin_size) / mem_bin_count; + size_t size = mem_bin_size; + for (size_t i= mem_bin_count; i > 0; i--) + { + bins[num+i-1].init(size); + size += incr; + } + num += mem_bin_count; + steps[step].init(mem_bin_size, num-1, incr); + mem_bin_size >>= QUERY_CACHE_MEM_BIN_STEP_PWR2; + step++; + mem_bin_count += QUERY_CACHE_MEM_BIN_PARTS_INC; + mem_bin_count = (uint) (mem_bin_count * QUERY_CACHE_MEM_BIN_PARTS_MUL); + if (mem_bin_count > (mem_bin_size >> QUERY_CACHE_MEM_BIN_SPC_LIM_PWR2)) + mem_bin_count=(mem_bin_size >> QUERY_CACHE_MEM_BIN_SPC_LIM_PWR2); + } + inc = (steps[step-1].size - mem_bin_size) / mem_bin_count; + + /* + num + mem_bin_count > mem_bin_num, but index never be > mem_bin_num + because block with size < min_allocated_unit never will be requested + */ + + steps[step].init(mem_bin_size, num + mem_bin_count - 1, inc); + { + size_t skiped = (min_allocation_unit - mem_bin_size)/inc; + size_t size = mem_bin_size + inc*skiped; + size_t i = mem_bin_count - skiped; + while (i-- > 0) + { + bins[num+i].init(size); + size += inc; + } + } + bins[mem_bin_num].number = 1; // For easy end test in get_free_block + free_memory = free_memory_blocks = 0; + insert_into_free_memory_list(first_block); + + DUMP(this); + + (void) my_hash_init(key_memory_Query_cache, &queries, &my_charset_bin, + def_query_hash_size, 0,0, query_cache_query_get_key,0,0); +#ifndef FN_NO_CASE_SENSE + /* + If lower_case_table_names!=0 then db and table names are already + converted to lower case and we can use binary collation for their + comparison (no matter if file system case sensitive or not). + If we have case-sensitive file system (like on most Unixes) and + lower_case_table_names == 0 then we should distinguish my_table + and MY_TABLE cases and so again can use binary collation. + */ + (void) my_hash_init(key_memory_Query_cache, &tables, &my_charset_bin, + def_table_hash_size, 0,0, query_cache_table_get_key, 0,0); +#else + /* + On windows, OS/2, MacOS X with HFS+ or any other case insensitive + file system if lower_case_table_names!=0 we have same situation as + in previous case, but if lower_case_table_names==0 then we should + not distinguish cases (to be compatible in behavior with underlying + file system) and so should use case insensitive collation for + comparison. + */ + (void) my_hash_init(PSI_INSTRUMENT_ME, &tables, lower_case_table_names ? + &my_charset_bin : files_charset_info, + def_table_hash_size, 0,0, query_cache_table_get_key, 0,0); +#endif + + queries_in_cache = 0; + queries_blocks = 0; + DBUG_RETURN(query_cache_size + + additional_data_size + approx_additional_data_size); + +err: + make_disabled(); + DBUG_RETURN(0); +} + + +/* Disable the use of the query cache */ + +void Query_cache::make_disabled() +{ + DBUG_ENTER("Query_cache::make_disabled"); + query_cache_size= 0; + queries_blocks= 0; + free_memory= 0; + free_memory_blocks= 0; + bins= 0; + steps= 0; + cache= 0; + mem_bin_num= mem_bin_steps= 0; + queries_in_cache= 0; + first_block= 0; + total_blocks= 0; + tables_blocks= 0; + DBUG_VOID_RETURN; +} + + +/** + @class Query_cache + Free all resources allocated by the cache. + + This function frees all resources allocated by the cache. You + have to call init_cache() before using the cache again. This function + requires the cache to be locked (LOCKED_NO_WAIT, lock_and_suspend) or + disabling. +*/ + +void Query_cache::free_cache() +{ + DBUG_ENTER("Query_cache::free_cache"); + + DBUG_ASSERT(m_cache_lock_status == LOCKED_NO_WAIT || + m_cache_status == DISABLE_REQUEST); + + /* Destroy locks */ + Query_cache_block *block= queries_blocks; + if (block) + { + do + { + Query_cache_query *query= block->query(); + /* + There will not be new requests but some maybe not finished yet, + so wait for them by trying lock/unlock + */ + BLOCK_LOCK_WR(block); + BLOCK_UNLOCK_WR(block); + + mysql_rwlock_destroy(&query->lock); + block= block->next; + } while (block != queries_blocks); + } + +#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) + if (madvise(cache, query_cache_size+additional_data_size, MADV_DODUMP)) + { + DBUG_PRINT("warning", ("coudn't mark query cache memory as " DODUMP_STR ": %s", + strerror(errno))); + } +#endif + my_free(cache); + make_disabled(); + my_hash_free(&queries); + my_hash_free(&tables); + DBUG_VOID_RETURN; +} + +/***************************************************************************** + Free block data +*****************************************************************************/ + + +/** + Flush the cache. + + This function will flush cache contents. It assumes we have + 'structure_guard_mutex' locked. The function sets the m_cache_status flag and + releases the lock, so other threads may proceed skipping the cache as if it + is disabled. Concurrent flushes are performed in turn. + After flush_cache() call, the cache is flushed, all the freed memory is + accumulated in bin[0], and the 'structure_guard_mutex' is locked. However, + since we could release the mutex during execution, the rest of the cache + state could have been changed, and should not be relied on. +*/ + +void Query_cache::flush_cache() +{ + QC_DEBUG_SYNC("wait_in_query_cache_flush2"); + + my_hash_reset(&queries); + while (queries_blocks != 0) + { + BLOCK_LOCK_WR(queries_blocks); + free_query_internal(queries_blocks); + } +} + +/* + Free oldest query that is not in use by another thread. + Returns 1 if we couldn't remove anything +*/ + +my_bool Query_cache::free_old_query() +{ + DBUG_ENTER("Query_cache::free_old_query"); + if (queries_blocks) + { + /* + try_lock_writing used to prevent client because here lock + sequence is breached. + Also we don't need remove locked queries at this point. + */ + Query_cache_block *query_block= 0; + if (queries_blocks != 0) + { + Query_cache_block *block = queries_blocks; + /* Search until we find first query that we can remove */ + do + { + Query_cache_query *header = block->query(); + if (header->result() != 0 && + header->result()->type == Query_cache_block::RESULT && + block->query()->try_lock_writing()) + { + query_block = block; + break; + } + } while ((block=block->next) != queries_blocks ); + } + + if (query_block != 0) + { + free_query(query_block); + lowmem_prunes++; + DBUG_RETURN(0); + } + } + DBUG_RETURN(1); // Nothing to remove +} + + +/* + free_query_internal() - free query from query cache. + + SYNOPSIS + free_query_internal() + query_block Query_cache_block representing the query + + DESCRIPTION + This function will remove the query from a cache, and place its + memory blocks to the list of free blocks. 'query_block' must be + locked for writing, this function will release (and destroy) this + lock. + + NOTE + 'query_block' should be removed from 'queries' hash _before_ + calling this method, as the lock will be destroyed here. +*/ + +void Query_cache::free_query_internal(Query_cache_block *query_block) +{ + DBUG_ENTER("Query_cache::free_query_internal"); + DBUG_PRINT("qcache", ("free query %p %zu bytes result", + query_block, + query_block->query()->length() )); + + queries_in_cache--; + + Query_cache_query *query= query_block->query(); + + if (query->writer() != 0) + { + /* Tell MySQL that this query should not be cached anymore */ + query->writer()->first_query_block= NULL; + query->writer(0); + } + double_linked_list_exclude(query_block, &queries_blocks); + Query_cache_block_table *table= query_block->table(0); + + for (TABLE_COUNTER_TYPE i= 0; i < query_block->n_tables; i++) + unlink_table(table++); + Query_cache_block *result_block= query->result(); + + /* + The following is true when query destruction was called and no results + in query . (query just registered and then abort/pack/flush called) + */ + if (result_block != 0) + { + if (result_block->type != Query_cache_block::RESULT) + { + // removing unfinished query + refused++; + inserts--; + } + Query_cache_block *block= result_block; + do + { + Query_cache_block *current= block; + block= block->next; + free_memory_block(current); + } while (block != result_block); + } + else + { + // removing unfinished query + refused++; + inserts--; + } + + query->unlock_n_destroy(); + free_memory_block(query_block); + + DBUG_VOID_RETURN; +} + + +/* + free_query() - free query from query cache. + + SYNOPSIS + free_query() + query_block Query_cache_block representing the query + + DESCRIPTION + This function will remove 'query_block' from 'queries' hash, and + then call free_query_internal(), which see. +*/ + +void Query_cache::free_query(Query_cache_block *query_block) +{ + DBUG_ENTER("Query_cache::free_query"); + DBUG_PRINT("qcache", ("free query %p %zu bytes result", + query_block, + query_block->query()->length() )); + + my_hash_delete(&queries,(uchar *) query_block); + free_query_internal(query_block); + + DBUG_VOID_RETURN; +} + +/***************************************************************************** + Query data creation +*****************************************************************************/ + +Query_cache_block * +Query_cache::write_block_data(size_t data_len, uchar* data, + size_t header_len, + Query_cache_block::block_type type, + TABLE_COUNTER_TYPE ntab) +{ + size_t all_headers_len = (ALIGN_SIZE(sizeof(Query_cache_block)) + + ALIGN_SIZE(ntab*sizeof(Query_cache_block_table)) + + header_len); + size_t len = data_len + all_headers_len; + size_t align_len= ALIGN_SIZE(len); + DBUG_ENTER("Query_cache::write_block_data"); + DBUG_PRINT("qcache", ("data: %zd, header: %zd, all header: %zd", + data_len, header_len, all_headers_len)); + Query_cache_block *block= allocate_block(MY_MAX(align_len, + min_allocation_unit),1, 0); + if (block != 0) + { + block->type = type; + block->n_tables = ntab; + block->used = len; + + memcpy((uchar *) block+ all_headers_len, data, data_len); + } + DBUG_RETURN(block); +} + + +my_bool +Query_cache::append_result_data(Query_cache_block **current_block, + size_t data_len, uchar* data, + Query_cache_block *query_block) +{ + DBUG_ENTER("Query_cache::append_result_data"); + DBUG_PRINT("qcache", ("append %zu bytes to %p query", + data_len, query_block)); + + if (query_block->query()->add(data_len) > query_cache_limit) + { + DBUG_PRINT("qcache", ("size limit reached %zu > %zu", + query_block->query()->length(), + query_cache_limit)); + DBUG_RETURN(0); + } + if (*current_block == 0) + { + DBUG_PRINT("qcache", ("allocated first result data block %zu", data_len)); + DBUG_RETURN(write_result_data(current_block, data_len, data, query_block, + Query_cache_block::RES_BEG)); + } + Query_cache_block *last_block = (*current_block)->prev; + + DBUG_PRINT("qcache", ("lastblock %p len %zu used %zu", + last_block, last_block->length, + last_block->used)); + my_bool success = 1; + size_t last_block_free_space= last_block->length - last_block->used; + + /* + We will first allocate and write the 'tail' of data, that doesn't fit + in the 'last_block'. Only if this succeeds, we will fill the last_block. + This saves us a memcpy if the query doesn't fit in the query cache. + */ + + // Try join blocks if physically next block is free... + size_t tail = data_len - last_block_free_space; + size_t append_min = get_min_append_result_data_size(); + if (last_block_free_space < data_len && + append_next_free_block(last_block, + MY_MAX(tail, append_min))) + last_block_free_space = last_block->length - last_block->used; + // If no space in last block (even after join) allocate new block + if (last_block_free_space < data_len) + { + DBUG_PRINT("qcache", ("allocate new block for %zu bytes", + data_len-last_block_free_space)); + Query_cache_block *new_block = 0; + success = write_result_data(&new_block, data_len-last_block_free_space, + (uchar*)(((uchar*)data)+last_block_free_space), + query_block, + Query_cache_block::RES_CONT); + /* + new_block may be != 0 even !success (if write_result_data + allocate a small block but failed to allocate continue) + */ + if (new_block != 0) + double_linked_list_join(last_block, new_block); + } + else + { + // It is success (nobody can prevent us write data) + unlock(); + } + + // Now finally write data to the last block + if (success && last_block_free_space > 0) + { + size_t to_copy = MY_MIN(data_len,last_block_free_space); + DBUG_PRINT("qcache", ("use free space %zub at block %p to copy %zub", + last_block_free_space,last_block, to_copy)); + memcpy((uchar*) last_block + last_block->used, data, to_copy); + last_block->used+=to_copy; + } + DBUG_RETURN(success); +} + + +my_bool Query_cache::write_result_data(Query_cache_block **result_block, + size_t data_len, uchar* data, + Query_cache_block *query_block, + Query_cache_block::block_type type) +{ + DBUG_ENTER("Query_cache::write_result_data"); + DBUG_PRINT("qcache", ("data_len %zu",data_len)); + + /* + Reserve block(s) for filling + During data allocation we must have structure_guard_mutex locked. + As data copy is not a fast operation, it's better if we don't have + structure_guard_mutex locked during data coping. + Thus we first allocate space and lock query, then unlock + structure_guard_mutex and copy data. + */ + + my_bool success = allocate_data_chain(result_block, data_len, query_block, + type == Query_cache_block::RES_BEG); + if (success) + { + // It is success (nobody can prevent us write data) + unlock(); + uint headers_len = (ALIGN_SIZE(sizeof(Query_cache_block)) + + ALIGN_SIZE(sizeof(Query_cache_result))); +#ifndef EMBEDDED_LIBRARY + Query_cache_block *block= *result_block; + uchar *rest= data; + // Now fill list of blocks that created by allocate_data_chain + do + { + block->type = type; + size_t length = block->used - headers_len; + DBUG_PRINT("qcache", ("write %zu byte in block %p",length, + block)); + memcpy((uchar*) block+headers_len, rest, length); + rest += length; + block = block->next; + type = Query_cache_block::RES_CONT; + } while (block != *result_block); +#else + /* + Set type of first block, emb_store_querycache_result() will handle + the others. + */ + (*result_block)->type= type; + Querycache_stream qs(*result_block, headers_len); + emb_store_querycache_result(&qs, (THD*)data); +#endif /*!EMBEDDED_LIBRARY*/ + } + else + { + if (*result_block != 0) + { + // Destroy list of blocks that was created & locked by lock_result_data + Query_cache_block *block = *result_block; + do + { + Query_cache_block *current = block; + block = block->next; + free_memory_block(current); + } while (block != *result_block); + *result_block = 0; + /* + It is not success => not unlock structure_guard_mutex (we need it to + free query) + */ + } + } + DBUG_PRINT("qcache", ("success %d", (int) success)); + DBUG_RETURN(success); +} + +inline size_t Query_cache::get_min_first_result_data_size() +{ + if (queries_in_cache < QUERY_CACHE_MIN_ESTIMATED_QUERIES_NUMBER) + return min_result_data_size; + size_t avg_result = (query_cache_size - free_memory) / queries_in_cache; + avg_result = MY_MIN(avg_result, query_cache_limit); + return MY_MAX(min_result_data_size, avg_result); +} + +inline size_t Query_cache::get_min_append_result_data_size() +{ + return min_result_data_size; +} + +/* + Allocate one or more blocks to hold data +*/ +my_bool Query_cache::allocate_data_chain(Query_cache_block **result_block, + size_t data_len, + Query_cache_block *query_block, + my_bool first_block_arg) +{ + size_t all_headers_len = (ALIGN_SIZE(sizeof(Query_cache_block)) + + ALIGN_SIZE(sizeof(Query_cache_result))); + size_t min_size = (first_block_arg ? + get_min_first_result_data_size(): + get_min_append_result_data_size()); + Query_cache_block *prev_block= NULL; + Query_cache_block *new_block; + DBUG_ENTER("Query_cache::allocate_data_chain"); + DBUG_PRINT("qcache", ("data_len %zu, all_headers_len %zu", + data_len, all_headers_len)); + + do + { + size_t len= data_len + all_headers_len; + size_t align_len= ALIGN_SIZE(len); + + if (!(new_block= allocate_block(MY_MAX(min_size, align_len), + min_result_data_size == 0, + all_headers_len + min_result_data_size))) + { + DBUG_PRINT("warning", ("Can't allocate block for results")); + DBUG_RETURN(FALSE); + } + + new_block->n_tables = 0; + new_block->used = MY_MIN(len, new_block->length); + new_block->type = Query_cache_block::RES_INCOMPLETE; + new_block->next = new_block->prev = new_block; + Query_cache_result *header = new_block->result(); + header->parent(query_block); + + DBUG_PRINT("qcache", ("Block len %zu used %zu", + new_block->length, new_block->used)); + + if (prev_block) + double_linked_list_join(prev_block, new_block); + else + *result_block= new_block; + if (new_block->length >= len) + break; + + /* + We got less memory then we need (no big memory blocks) => + Continue to allocated more blocks until we got everything we need. + */ + data_len= len - new_block->length; + prev_block= new_block; + } while (1); + + DBUG_RETURN(TRUE); +} + +/***************************************************************************** + Tables management +*****************************************************************************/ + +/* + Invalidate the first table in the table_list +*/ + +void Query_cache::invalidate_table(THD *thd, TABLE_LIST *table_list) +{ + if (table_list->table != 0) + invalidate_table(thd, table_list->table); // Table is open + else + { + const char *key; + uint key_length; + key_length= get_table_def_key(table_list, &key); + + // We don't store temporary tables => no key_length+=4 ... + invalidate_table(thd, (uchar *)key, key_length); + } +} + +void Query_cache::invalidate_table(THD *thd, TABLE *table) +{ + invalidate_table(thd, (uchar*) table->s->table_cache_key.str, + table->s->table_cache_key.length); +} + +void Query_cache::invalidate_table(THD *thd, uchar * key, size_t key_length) +{ + DEBUG_SYNC(thd, "wait_in_query_cache_invalidate1"); + + /* + Lock the query cache and queue all invalidation attempts to avoid + the risk of a race between invalidation, cache inserts and flushes. + */ + lock(thd); + + DEBUG_SYNC(thd, "wait_in_query_cache_invalidate2"); + + if (query_cache_size > 0) + invalidate_table_internal(key, key_length); + + unlock(); +} + + +/** + Try to locate and invalidate a table by name. + The caller must ensure that no other thread is trying to work with + the query cache when this function is executed. + + @pre structure_guard_mutex is acquired or LOCKED is set. +*/ + +void +Query_cache::invalidate_table_internal(uchar *key, size_t key_length) +{ + Query_cache_block *table_block= + (Query_cache_block*)my_hash_search(&tables, key, key_length); + if (table_block) + { + Query_cache_block_table *list_root= table_block->table(0); + invalidate_query_block_list(list_root); + } +} + +/** + Invalidate a linked list of query cache blocks. + + Each block tries to acquire a block level lock before + free_query is a called. This function will in turn affect + related table- and result-blocks. + + @param[in,out] thd Thread context. + @param[in,out] list_root A pointer to a circular list of query blocks. + +*/ + +void +Query_cache::invalidate_query_block_list(Query_cache_block_table *list_root) +{ + while (list_root->next != list_root) + { + Query_cache_block *query_block= list_root->next->block(); + BLOCK_LOCK_WR(query_block); + free_query(query_block); + } +} + +/* + Register given table list beginning with given position in tables table of + block + + SYNOPSIS + Query_cache::register_tables_from_list + thd thread handle + tables_used given table list + counter number current position in table of tables of block + block_table pointer to current position in tables table of block + + RETURN + 0 error + number of next position of table entry in table of tables of block +*/ + +TABLE_COUNTER_TYPE +Query_cache::register_tables_from_list(THD *thd, TABLE_LIST *tables_used, + TABLE_COUNTER_TYPE counter, + Query_cache_block_table **block_table) +{ + TABLE_COUNTER_TYPE n; + DBUG_ENTER("Query_cache::register_tables_from_list"); + for (n= counter; + tables_used; + tables_used= tables_used->next_global, n++, (*block_table)++) + { + if (tables_used->is_anonymous_derived_table() || + tables_used->table_function) + { + DBUG_PRINT("qcache", ("derived table or table function skipped")); + n--; + (*block_table)--; + continue; + } + (*block_table)->n= n; + if (tables_used->view) + { + const char *key; + uint key_length; + DBUG_PRINT("qcache", ("view: %s db: %s", + tables_used->view_name.str, + tables_used->view_db.str)); + key_length= get_table_def_key(tables_used, &key); + /* + There are not callback function for for VIEWs + */ + if (!insert_table(thd, key_length, key, (*block_table), + tables_used->view_db.length, 0, + HA_CACHE_TBL_NONTRANSACT, 0, 0, TRUE)) + goto err_cleanup; + /* + We do not need to register view tables here because they are already + present in the global list. + */ + } + else + { + DBUG_PRINT("qcache", + ("table: %s db: %s openinfo: %p keylen: %zu key: %p", + tables_used->table->s->table_name.str, + tables_used->table->s->table_cache_key.str, + tables_used->table, + tables_used->table->s->table_cache_key.length, + tables_used->table->s->table_cache_key.str)); + + if (!insert_table(thd, tables_used->table->s->table_cache_key.length, + tables_used->table->s->table_cache_key.str, + (*block_table), + tables_used->db.length, 0, + tables_used->table->file->table_cache_type(), + tables_used->callback_func, + tables_used->engine_data, + TRUE)) + goto err_cleanup; + + if (tables_used->table->file-> + register_query_cache_dependant_tables(thd, this, block_table, &n)) + DBUG_RETURN(0); + } + } + DBUG_RETURN(n - counter); +err_cleanup: + // Mark failed + (*block_table)->next= (*block_table)->prev= NULL; + (*block_table)->parent= NULL; + DBUG_RETURN(0); +} + +/* + Store all used tables + + SYNOPSIS + register_all_tables() + thd Thread handle + block Store tables in this block + tables_used List if used tables + tables_arg Not used ? +*/ + +my_bool Query_cache::register_all_tables(THD *thd, + Query_cache_block *block, + TABLE_LIST *tables_used, + TABLE_COUNTER_TYPE tables_arg) +{ + TABLE_COUNTER_TYPE n; + DBUG_PRINT("qcache", ("register tables block %p, n %d, header %x", + block, (int) tables_arg, + (int) ALIGN_SIZE(sizeof(Query_cache_block)))); + + Query_cache_block_table *block_table = block->table(0); + + n= register_tables_from_list(thd, tables_used, 0, &block_table); + + if (n==0) + { + /* Unlink the tables we allocated above */ + for (Query_cache_block_table *tmp = block->table(0) ; + tmp != block_table; + tmp++) + { + if (tmp->prev) // not marked as failed and unuseable + unlink_table(tmp); + else + break; + } + if (block_table->parent) + unlink_table(block_table); + } + return MY_TEST(n); +} + + +/** + Insert used table name into the cache. + + @return Error status + @retval FALSE On error + @retval TRUE On success +*/ + +my_bool +Query_cache::insert_table(THD *thd, size_t key_len, const char *key, + Query_cache_block_table *node, size_t db_length, + uint8 suffix_length_arg, + uint8 cache_type, + qc_engine_callback callback, + ulonglong engine_data, + my_bool hash) +{ + DBUG_ENTER("Query_cache::insert_table"); + DBUG_PRINT("qcache", ("insert table node %p, len %zu", + node, key_len)); + + Query_cache_block *table_block= + (hash ? + (Query_cache_block *) my_hash_search(&tables, (uchar*) key, key_len) : + NULL); + + if (table_block && + table_block->table()->engine_data() != engine_data) + { + DBUG_PRINT("qcache", + ("Handler require invalidation queries of %s.%s %llu-%llu", + table_block->table()->db(), + table_block->table()->table(), + engine_data, + table_block->table()->engine_data())); + /* + as far as we delete all queries with this table, table block will be + deleted, too + */ + { + Query_cache_block_table *list_root= table_block->table(0); + invalidate_query_block_list(list_root); + } + + table_block= 0; + } + + if (table_block == 0) + { + DBUG_PRINT("qcache", ("new table block from %p (%u)", + key, (int) key_len)); + table_block= write_block_data(key_len, (uchar*) key, + ALIGN_SIZE(sizeof(Query_cache_table)), + Query_cache_block::TABLE, 1); + if (table_block == 0) + { + DBUG_PRINT("qcache", ("Can't write table name to cache")); + DBUG_RETURN(0); + } + Query_cache_table *header= table_block->table(); + double_linked_list_simple_include(table_block, + &tables_blocks); + /* + First node in the Query_cache_block_table-chain is the table-type + block. This block will only have one Query_cache_block_table (n=0). + */ + Query_cache_block_table *list_root= table_block->table(0); + list_root->n= 0; + + /* + The node list is circular in nature. + */ + list_root->next= list_root->prev= list_root; + + if (hash && + my_hash_insert(&tables, (const uchar *) table_block)) + { + DBUG_PRINT("qcache", ("Can't insert table to hash")); + // write_block_data return locked block + free_memory_block(table_block); + DBUG_RETURN(0); + } + char *db= header->db(); + header->table(db + db_length + 1); + header->key_length((uint32)key_len); + header->suffix_length(suffix_length_arg); + header->type(cache_type); + header->callback(callback); + header->engine_data(engine_data); + header->set_hashed(hash); + + /* + We insert this table without the assumption that it isn't refrenenced by + any queries. + */ + header->m_cached_query_count= 0; + } + + /* + Table is now in the cache; link the table_block-node associated + with the currently processed query into the chain of queries depending + on the cached table. + */ + Query_cache_block_table *list_root= table_block->table(0); + node->next= list_root->next; + list_root->next= node; + node->next->prev= node; + node->prev= list_root; + node->parent= table_block->table(); + /* + Increase the counter to keep track on how long this chain + of queries is. + */ + Query_cache_table *table_block_data= table_block->table(); + table_block_data->m_cached_query_count++; + DBUG_RETURN(1); +} + + +void Query_cache::unlink_table(Query_cache_block_table *node) +{ + DBUG_ENTER("Query_cache::unlink_table"); + node->prev->next= node->next; + node->next->prev= node->prev; + Query_cache_block_table *neighbour= node->next; + Query_cache_table *table_block_data= node->parent; + table_block_data->m_cached_query_count--; + + DBUG_ASSERT(table_block_data->m_cached_query_count >= 0); + + if (neighbour->next == neighbour) + { + DBUG_ASSERT(table_block_data->m_cached_query_count == 0); + /* + If neighbor is root of list, the list is empty. + The root of the list is always a table-type block + which contain exactly one Query_cache_block_table + node object, thus we can use the block() method + to calculate the Query_cache_block address. + */ + Query_cache_block *table_block= neighbour->block(); + double_linked_list_exclude(table_block, + &tables_blocks); + Query_cache_table *header= table_block->table(); + if (header->is_hashed()) + my_hash_delete(&tables,(uchar *) table_block); + free_memory_block(table_block); + } + DBUG_VOID_RETURN; +} + +/***************************************************************************** + Free memory management +*****************************************************************************/ + +Query_cache_block * +Query_cache::allocate_block(size_t len, my_bool not_less, size_t min) +{ + DBUG_ENTER("Query_cache::allocate_block"); + DBUG_PRINT("qcache", ("len %zu, not less %d, min %zu", + len, not_less,min)); + + if (len >= MY_MIN(query_cache_size, query_cache_limit)) + { + DBUG_PRINT("qcache", ("Query cache hase only %zu memory and limit %zu", + query_cache_size, query_cache_limit)); + DBUG_RETURN(0); // in any case we don't have such piece of memory + } + + /* Free old queries until we have enough memory to store this block */ + Query_cache_block *block; + do + { + block= get_free_block(len, not_less, min); + } + while (block == 0 && !free_old_query()); + + if (block != 0) // If we found a suitable block + { + if (block->length >= ALIGN_SIZE(len) + min_allocation_unit) + split_block(block,ALIGN_SIZE(len)); + } + + DBUG_RETURN(block); +} + + +Query_cache_block * +Query_cache::get_free_block(size_t len, my_bool not_less, size_t min) +{ + Query_cache_block *block = 0, *first = 0; + DBUG_ENTER("Query_cache::get_free_block"); + DBUG_PRINT("qcache",("length %zu, not_less %d, min %zu", len, + (int)not_less, min)); + + /* Find block with minimal size > len */ + uint start = find_bin(len); + // try matching bin + if (bins[start].number != 0) + { + Query_cache_block *list = bins[start].free_blocks; + if (list->prev->length >= len) // check block with max size + { + first = list; + uint n = 0; + while ( n < QUERY_CACHE_MEM_BIN_TRY && + first->length < len) //we don't need irst->next != list + { + first=first->next; + n++; + } + if (first->length >= len) + block=first; + else // we don't need if (first->next != list) + { + n = 0; + block = list->prev; + while (n < QUERY_CACHE_MEM_BIN_TRY && + block->length > len) + { + block=block->prev; + n++; + } + if (block->length < len) + block=block->next; + } + } + else + first = list->prev; + } + if (block == 0 && start > 0) + { + DBUG_PRINT("qcache",("Try bins with bigger block size")); + // Try more big bins + int i = start - 1; + while (i > 0 && bins[i].number == 0) + i--; + if (bins[i].number > 0) + block = bins[i].free_blocks; + } + + // If no big blocks => try less size (if it is possible) + if (block == 0 && ! not_less) + { + DBUG_PRINT("qcache",("Try to allocate a smaller block")); + if (first != 0 && first->length > min) + block = first; + else + { + uint i = start + 1; + /* bins[mem_bin_num].number contains 1 for easy end test */ + for (i= start+1 ; bins[i].number == 0 ; i++) ; + if (i < mem_bin_num && bins[i].free_blocks->prev->length >= min) + block = bins[i].free_blocks->prev; + } + } + if (block != 0) + exclude_from_free_memory_list(block); + + DBUG_PRINT("qcache",("getting block %p", block)); + DBUG_RETURN(block); +} + + +void Query_cache::free_memory_block(Query_cache_block *block) +{ + DBUG_ENTER("Query_cache::free_memory_block"); + block->used=0; + block->type= Query_cache_block::FREE; // mark block as free in any case + DBUG_PRINT("qcache", + ("first_block %p, block %p, pnext %p pprev %p", + first_block, block, block->pnext, + block->pprev)); + + if (block->pnext != first_block && block->pnext->is_free()) + block = join_free_blocks(block, block->pnext); + if (block != first_block && block->pprev->is_free()) + block = join_free_blocks(block->pprev, block->pprev); + insert_into_free_memory_list(block); + DBUG_VOID_RETURN; +} + + +void Query_cache::split_block(Query_cache_block *block, size_t len) +{ + DBUG_ENTER("Query_cache::split_block"); + Query_cache_block *new_block = (Query_cache_block*)(((uchar*) block)+len); + + new_block->init(block->length - len); + total_blocks++; + block->length=len; + new_block->pnext = block->pnext; + block->pnext = new_block; + new_block->pprev = block; + new_block->pnext->pprev = new_block; + + if (block->type == Query_cache_block::FREE) + { + // if block was free then it already joined with all free neighbours + insert_into_free_memory_list(new_block); + } + else + free_memory_block(new_block); + + DBUG_PRINT("qcache", ("split %p (%zu) new %p", + block, len, new_block)); + DBUG_VOID_RETURN; +} + + +Query_cache_block * +Query_cache::join_free_blocks(Query_cache_block *first_block_arg, + Query_cache_block *block_in_list) +{ + Query_cache_block *second_block; + DBUG_ENTER("Query_cache::join_free_blocks"); + DBUG_PRINT("qcache", + ("join first %p, pnext %p, in list %p", + first_block_arg, first_block_arg->pnext, + block_in_list)); + + exclude_from_free_memory_list(block_in_list); + second_block = first_block_arg->pnext; + // May be was not free block + second_block->used=0; + second_block->destroy(); + total_blocks--; + + first_block_arg->length += second_block->length; + first_block_arg->pnext = second_block->pnext; + second_block->pnext->pprev = first_block_arg; + + DBUG_RETURN(first_block_arg); +} + + +my_bool Query_cache::append_next_free_block(Query_cache_block *block, + size_t add_size) +{ + Query_cache_block *next_block = block->pnext; + DBUG_ENTER("Query_cache::append_next_free_block"); + DBUG_PRINT("enter", ("block %p, add_size %zu", block, + add_size)); + + if (next_block != first_block && next_block->is_free()) + { + size_t old_len = block->length; + exclude_from_free_memory_list(next_block); + next_block->destroy(); + total_blocks--; + + block->length += next_block->length; + block->pnext = next_block->pnext; + next_block->pnext->pprev = block; + + if (block->length > ALIGN_SIZE(old_len + add_size) + min_allocation_unit) + split_block(block,ALIGN_SIZE(old_len + add_size)); + DBUG_PRINT("exit", ("block was appended")); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +void Query_cache::exclude_from_free_memory_list(Query_cache_block *free_block) +{ + DBUG_ENTER("Query_cache::exclude_from_free_memory_list"); + Query_cache_memory_bin *bin = *((Query_cache_memory_bin **) + free_block->data()); + double_linked_list_exclude(free_block, &bin->free_blocks); + bin->number--; + free_memory-=free_block->length; + free_memory_blocks--; + DBUG_PRINT("qcache",("exclude block %p, bin %p", free_block, + bin)); + DBUG_VOID_RETURN; +} + +void Query_cache::insert_into_free_memory_list(Query_cache_block *free_block) +{ + DBUG_ENTER("Query_cache::insert_into_free_memory_list"); + uint idx = find_bin(free_block->length); + insert_into_free_memory_sorted_list(free_block, &bins[idx].free_blocks); + /* + We have enough memory in block for storing bin reference due to + min_allocation_unit choice + */ + Query_cache_memory_bin **bin_ptr = ((Query_cache_memory_bin**) + free_block->data()); + *bin_ptr = bins+idx; + (*bin_ptr)->number++; + DBUG_PRINT("qcache",("insert block %p, bin[%d] %p", + free_block, idx, *bin_ptr)); + DBUG_VOID_RETURN; +} + +uint Query_cache::find_bin(size_t size) +{ + DBUG_ENTER("Query_cache::find_bin"); + // Binary search + size_t left = 0, right = mem_bin_steps; + do + { + size_t middle = (left + right) / 2; + if (steps[middle].size > size) + left = middle+1; + else + right = middle; + } while (left < right); + if (left == 0) + { + // first bin not subordinate of common rules + DBUG_PRINT("qcache", ("first bin (# 0), size %zu",size)); + DBUG_RETURN(0); + } + size_t bin = steps[left].idx - + ((size - steps[left].size)/steps[left].increment); + + DBUG_PRINT("qcache", ("bin %zu step %zu, size %zu step size %zu", + bin, left, size, steps[left].size)); + DBUG_RETURN((uint)bin); +} + + +/***************************************************************************** + Lists management +*****************************************************************************/ + +void Query_cache::move_to_query_list_end(Query_cache_block *query_block) +{ + DBUG_ENTER("Query_cache::move_to_query_list_end"); + double_linked_list_exclude(query_block, &queries_blocks); + double_linked_list_simple_include(query_block, &queries_blocks); + DBUG_VOID_RETURN; +} + + +void Query_cache::insert_into_free_memory_sorted_list(Query_cache_block * + new_block, + Query_cache_block ** + list) +{ + DBUG_ENTER("Query_cache::insert_into_free_memory_sorted_list"); + /* + list sorted by size in ascendant order, because we need small blocks + more frequently than bigger ones + */ + + new_block->used = 0; + new_block->n_tables = 0; + new_block->type = Query_cache_block::FREE; + + if (*list == 0) + { + *list = new_block->next=new_block->prev=new_block; + DBUG_PRINT("qcache", ("inserted into empty list")); + } + else + { + Query_cache_block *point = *list; + if (point->length >= new_block->length) + { + point = point->prev; + *list = new_block; + } + else + { + /* Find right position in sorted list to put block */ + while (point->next != *list && + point->next->length < new_block->length) + point=point->next; + } + new_block->prev = point; + new_block->next = point->next; + new_block->next->prev = new_block; + point->next = new_block; + } + free_memory+=new_block->length; + free_memory_blocks++; + DBUG_VOID_RETURN; +} + + +void +Query_cache::double_linked_list_simple_include(Query_cache_block *point, + Query_cache_block ** + list_pointer) +{ + DBUG_ENTER("Query_cache::double_linked_list_simple_include"); + DBUG_PRINT("qcache", ("including block %p", point)); + if (*list_pointer == 0) + *list_pointer=point->next=point->prev=point; + else + { + // insert to the end of list + point->next = (*list_pointer); + point->prev = (*list_pointer)->prev; + point->prev->next = point; + (*list_pointer)->prev = point; + } + DBUG_VOID_RETURN; +} + +void +Query_cache::double_linked_list_exclude(Query_cache_block *point, + Query_cache_block **list_pointer) +{ + DBUG_ENTER("Query_cache::double_linked_list_exclude"); + DBUG_PRINT("qcache", ("excluding block %p, list %p", + point, list_pointer)); + if (point->next == point) + *list_pointer = 0; // empty list + else + { + point->next->prev = point->prev; + point->prev->next = point->next; + /* + If the root is removed; select a new root + */ + if (point == *list_pointer) + *list_pointer= point->next; + } + DBUG_VOID_RETURN; +} + + +void Query_cache::double_linked_list_join(Query_cache_block *head_tail, + Query_cache_block *tail_head) +{ + Query_cache_block *head_head = head_tail->next, + *tail_tail = tail_head->prev; + head_head->prev = tail_tail; + head_tail->next = tail_head; + tail_head->prev = head_tail; + tail_tail->next = head_head; +} + +/***************************************************************************** + Query +*****************************************************************************/ + +/* + Collect information about table types, check that tables are cachable and + count them + + SYNOPSIS + process_and_count_tables() + tables_used table list for processing + tables_type pointer to variable for table types collection + + RETURN + 0 error + >0 number of tables +*/ + +TABLE_COUNTER_TYPE +Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used, + uint8 *tables_type) +{ + DBUG_ENTER("process_and_count_tables"); + TABLE_COUNTER_TYPE table_count = 0; + for (; tables_used; tables_used= tables_used->next_global) + { + table_count++; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* + Disable any attempt to store this statement if there are + column level grants on any referenced tables. + The grant.want_privileges flag was set to 1 in the + check_grant() function earlier if the TABLE_LIST object + had any associated column privileges. + + We need to check that the TABLE_LIST object isn't part + of a VIEW definition because we want to be able to cache + views. + + TODO: Although it is possible to cache views, the privilege + check on view tables always fall back on column privileges + even if there are more generic table privileges. Thus it isn't + currently possible to retrieve cached view-tables unless the + client has the super user privileges. + */ + if (tables_used->grant.want_privilege && + tables_used->belong_to_view == NULL) + { + DBUG_PRINT("qcache", ("Don't cache statement as it refers to " + "tables with column privileges.")); + thd->query_cache_is_applicable= 0; // Query can't be cached + thd->lex->safe_to_cache_query= 0; // For prepared statements + DBUG_RETURN(0); + } +#endif + if (tables_used->view) + { + DBUG_PRINT("qcache", ("view: %s db: %s", + tables_used->view_name.str, + tables_used->view_db.str)); + *tables_type|= HA_CACHE_TBL_NONTRANSACT; + continue; + } + if (tables_used->derived || tables_used->table_function) + { + DBUG_PRINT("qcache", ("table: %s", tables_used->alias.str)); + table_count--; + DBUG_PRINT("qcache", (tables_used->table_function ? + "table function skipped" : + "derived table skipped")); + continue; + } + + DBUG_PRINT("qcache", ("table: %s db: %s type: %u", + tables_used->table->s->table_name.str, + tables_used->table->s->db.str, + tables_used->table->s->db_type()->db_type)); + *tables_type|= tables_used->table->file->table_cache_type(); + + /* + table_alias_charset used here because it depends of + lower_case_table_names variable + */ + table_count+= tables_used->table->file-> + count_query_cache_dependant_tables(tables_type); + + if (tables_used->table->s->not_usable_by_query_cache) + { + DBUG_PRINT("qcache", + ("select not cacheable: temporary, system or " + "other non-cacheable table(s)")); + DBUG_RETURN(0); + } + } + DBUG_RETURN(table_count); +} + + +/* +In non-embedded QC intercepts result in net_real_write +but if we have no net.vio then net_real_write +will not be called, so QC can't get results of the query +*/ +#ifdef EMBEDDED_LIBRARY +#define qc_is_able_to_intercept_result(T) 1 +#else +#define qc_is_able_to_intercept_result(T) ((T)->net.vio) +#endif + + +/* + If query is cacheable return number tables in query + (query without tables are not cached) +*/ + +TABLE_COUNTER_TYPE +Query_cache::is_cacheable(THD *thd, LEX *lex, + TABLE_LIST *tables_used, uint8 *tables_type) +{ + TABLE_COUNTER_TYPE table_count; + DBUG_ENTER("Query_cache::is_cacheable"); + + if (thd->lex->safe_to_cache_query && + (thd->variables.query_cache_type == 1 || + (thd->variables.query_cache_type == 2 && + (lex->first_select_lex()->options & OPTION_TO_QUERY_CACHE))) && + qc_is_able_to_intercept_result(thd)) + { + DBUG_PRINT("qcache", ("options: %lx %lx type: %u", + (long) OPTION_TO_QUERY_CACHE, + (long) lex->first_select_lex()->options, + (int) thd->variables.query_cache_type)); + + if (!(table_count= process_and_count_tables(thd, tables_used, + tables_type))) + DBUG_RETURN(0); + + if (thd->in_multi_stmt_transaction_mode() && + ((*tables_type)&HA_CACHE_TBL_TRANSACT)) + { + DBUG_PRINT("qcache", ("not in autocommin mode")); + DBUG_RETURN(0); + } + DBUG_PRINT("qcache", ("select is using %d tables", table_count)); + DBUG_RETURN(table_count); + } + + DBUG_PRINT("qcache", + ("not interesting query: %d or not cacheable, options %lx %lx type: %u net->vio present: %u", + (int) lex->sql_command, + (long) OPTION_TO_QUERY_CACHE, + (long) lex->first_select_lex()->options, + (int) thd->variables.query_cache_type, + (uint) MY_TEST(qc_is_able_to_intercept_result(thd)))); + DBUG_RETURN(0); +} + +/* + Check handler allowance to cache query with these tables + + SYNOPSYS + Query_cache::ask_handler_allowance() + thd - thread handlers + tables_used - tables list used in query + + RETURN + 0 - caching allowed + 1 - caching disallowed +*/ +my_bool Query_cache::ask_handler_allowance(THD *thd, + TABLE_LIST *tables_used) +{ + DBUG_ENTER("Query_cache::ask_handler_allowance"); + + for (; tables_used; tables_used= tables_used->next_global) + { + TABLE *table; + handler *handler; + if (!(table= tables_used->table)) + continue; + handler= table->file; + if (!handler->register_query_cache_table(thd, + table->s->normalized_path.str, + (uint)table->s->normalized_path.length, + &tables_used->callback_func, + &tables_used->engine_data)) + { + DBUG_PRINT("qcache", ("Handler does not allow caching for %s", + table->s->normalized_path.str)); + /* + As this can change from call to call, don't reset set + thd->lex->safe_to_cache_query + */ + thd->query_cache_is_applicable= 0; // Query can't be cached + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/***************************************************************************** + Packing +*****************************************************************************/ + + +/** + Rearrange all memory blocks so that free memory joins at the + 'bottom' of the allocated memory block containing all cache data. + @see Query_cache::pack(size_t join_limit, uint iteration_limit) +*/ + +void Query_cache::pack_cache() +{ + DBUG_ENTER("Query_cache::pack_cache"); + + DBUG_EXECUTE("check_querycache",query_cache.check_integrity(1);); + + uchar *border = 0; + Query_cache_block *before = 0; + size_t gap = 0; + my_bool ok = 1; + Query_cache_block *block = first_block; + DUMP(this); + + if (first_block) + { + do + { + Query_cache_block *next=block->pnext; + ok = move_by_type(&border, &before, &gap, block); + block = next; + } while (ok && block != first_block); + + if (border != 0) + { + Query_cache_block *new_block = (Query_cache_block *) border; + new_block->init(gap); + total_blocks++; + new_block->pnext = before->pnext; + before->pnext = new_block; + new_block->pprev = before; + new_block->pnext->pprev = new_block; + insert_into_free_memory_list(new_block); + } + DUMP(this); + } + + DBUG_EXECUTE("check_querycache",query_cache.check_integrity(1);); + DBUG_VOID_RETURN; +} + + +my_bool Query_cache::move_by_type(uchar **border, + Query_cache_block **before, size_t *gap, + Query_cache_block *block) +{ + DBUG_ENTER("Query_cache::move_by_type"); + + my_bool ok = 1; + switch (block->type) { + case Query_cache_block::FREE: + { + DBUG_PRINT("qcache", ("block %p FREE", block)); + if (*border == 0) + { + *border = (uchar *) block; + *before = block->pprev; + DBUG_PRINT("qcache", ("gap beginning here")); + } + exclude_from_free_memory_list(block); + *gap +=block->length; + block->pprev->pnext=block->pnext; + block->pnext->pprev=block->pprev; + block->destroy(); + total_blocks--; + DBUG_PRINT("qcache", ("added to gap (%zu)", *gap)); + break; + } + case Query_cache_block::TABLE: + { + HASH_SEARCH_STATE record_idx; + DBUG_PRINT("qcache", ("block %p TABLE", block)); + if (*border == 0) + break; + size_t len = block->length, used = block->used; + Query_cache_block_table *list_root = block->table(0); + Query_cache_block_table *tprev = list_root->prev, + *tnext = list_root->next; + Query_cache_block *prev = block->prev, + *next = block->next, + *pprev = block->pprev, + *pnext = block->pnext, + *new_block =(Query_cache_block *) *border; + size_t tablename_offset = block->table()->table() - block->table()->db(); + char *data = (char*) block->data(); + uchar *key; + size_t key_length; + key=query_cache_table_get_key((uchar*) block, &key_length, 0); + my_hash_first(&tables, (uchar*) key, key_length, &record_idx); + + block->destroy(); + new_block->init(len); + new_block->type=Query_cache_block::TABLE; + new_block->used=used; + new_block->n_tables=1; + memmove((char*) new_block->data(), data, len-new_block->headers_len()); + relink(block, new_block, next, prev, pnext, pprev); + if (tables_blocks == block) + tables_blocks = new_block; + + Query_cache_block_table *nlist_root = new_block->table(0); + nlist_root->n = 0; + nlist_root->next = tnext; + tnext->prev = nlist_root; + nlist_root->prev = tprev; + tprev->next = nlist_root; + DBUG_PRINT("qcache", + ("list_root: %p tnext %p tprev %p tprev->next %p tnext->prev %p", + list_root, tnext, tprev, + tprev->next,tnext->prev)); + /* + Go through all queries that uses this table and change them to + point to the new table object + */ + Query_cache_table *new_block_table=new_block->table(); + for (;tnext != nlist_root; tnext=tnext->next) + tnext->parent= new_block_table; + *border += len; + *before = new_block; + /* Fix pointer to table name */ + new_block->table()->table(new_block->table()->db() + tablename_offset); + /* Fix hash to point at moved block */ + my_hash_replace(&tables, &record_idx, (uchar*) new_block); + + DBUG_PRINT("qcache", ("moved %zu bytes to %p, new gap at %p", + len, new_block, *border)); + break; + } + case Query_cache_block::QUERY: + { + HASH_SEARCH_STATE record_idx; + DBUG_PRINT("qcache", ("block %p QUERY", block)); + if (*border == 0) + break; + BLOCK_LOCK_WR(block); + size_t len = block->length, used = block->used; + TABLE_COUNTER_TYPE n_tables = block->n_tables; + Query_cache_block *prev = block->prev, + *next = block->next, + *pprev = block->pprev, + *pnext = block->pnext, + *new_block =(Query_cache_block*) *border; + char *data = (char*) block->data(); + Query_cache_block *first_result_block = ((Query_cache_query *) + block->data())->result(); + uchar *key; + size_t key_length; + key=query_cache_query_get_key((uchar*) block, &key_length, 0); + my_hash_first(&queries, (uchar*) key, key_length, &record_idx); + block->query()->unlock_n_destroy(); + block->destroy(); + // Move table of used tables + memmove((char*) new_block->table(0), (char*) block->table(0), + ALIGN_SIZE(n_tables*sizeof(Query_cache_block_table))); + new_block->init(len); + new_block->type=Query_cache_block::QUERY; + new_block->used=used; + new_block->n_tables=n_tables; + memmove((char*) new_block->data(), data, len - new_block->headers_len()); + relink(block, new_block, next, prev, pnext, pprev); + if (queries_blocks == block) + queries_blocks = new_block; + Query_cache_block_table *beg_of_table_table= block->table(0), + *end_of_table_table= block->table(n_tables); + uchar *beg_of_new_table_table= (uchar*) new_block->table(0); + + for (TABLE_COUNTER_TYPE j=0; j < n_tables; j++) + { + Query_cache_block_table *block_table = new_block->table(j); + + // use aligment from beginning of table if 'next' is in same block + if ((beg_of_table_table <= block_table->next) && + (block_table->next < end_of_table_table)) + ((Query_cache_block_table *)(beg_of_new_table_table + + (((uchar*)block_table->next) - + ((uchar*)beg_of_table_table))))->prev= + block_table; + else + block_table->next->prev= block_table; + + // use aligment from beginning of table if 'prev' is in same block + if ((beg_of_table_table <= block_table->prev) && + (block_table->prev < end_of_table_table)) + ((Query_cache_block_table *)(beg_of_new_table_table + + (((uchar*)block_table->prev) - + ((uchar*)beg_of_table_table))))->next= + block_table; + else + block_table->prev->next = block_table; + } + DBUG_PRINT("qcache", ("after circle tt")); + *border += len; + *before = new_block; + new_block->query()->result(first_result_block); + if (first_result_block != 0) + { + Query_cache_block *result_block = first_result_block; + do + { + result_block->result()->parent(new_block); + result_block = result_block->next; + } while ( result_block != first_result_block ); + } + Query_cache_query *new_query= ((Query_cache_query *) new_block->data()); + mysql_rwlock_init(key_rwlock_query_cache_query_lock, &new_query->lock); + + /* + If someone is writing to this block, inform the writer that the block + has been moved. + */ + Query_cache_tls *query_cache_tls= new_block->query()->writer(); + if (query_cache_tls != NULL) + { + query_cache_tls->first_query_block= new_block; + } + /* Fix hash to point at moved block */ + my_hash_replace(&queries, &record_idx, (uchar*) new_block); + DBUG_PRINT("qcache", ("moved %zu bytes to %p, new gap at %p", + len, new_block, *border)); + break; + } + case Query_cache_block::RES_INCOMPLETE: + case Query_cache_block::RES_BEG: + case Query_cache_block::RES_CONT: + case Query_cache_block::RESULT: + { + DBUG_PRINT("qcache", ("block %p RES* (%d)", block, + (int) block->type)); + if (*border == 0) + break; + Query_cache_block *query_block= block->result()->parent(); + BLOCK_LOCK_WR(query_block); + Query_cache_block *next= block->next, *prev= block->prev; + Query_cache_block::block_type type= block->type; + size_t len = block->length, used = block->used; + Query_cache_block *pprev = block->pprev, + *pnext = block->pnext, + *new_block =(Query_cache_block*) *border; + char *data = (char*) block->data(); + block->destroy(); + new_block->init(len); + new_block->type=type; + new_block->used=used; + memmove((char*) new_block->data(), data, len - new_block->headers_len()); + relink(block, new_block, next, prev, pnext, pprev); + new_block->result()->parent(query_block); + Query_cache_query *query = query_block->query(); + if (query->result() == block) + query->result(new_block); + *border += len; + *before = new_block; + /* If result writing complete && we have free space in block */ + size_t free_space= new_block->length - new_block->used; + free_space-= free_space % ALIGN_SIZE(1); + if (query->result()->type == Query_cache_block::RESULT && + new_block->length > new_block->used && + *gap + free_space > min_allocation_unit && + new_block->length - free_space > min_allocation_unit) + { + *border-= free_space; + *gap+= free_space; + DBUG_PRINT("qcache", + ("rest of result free space added to gap (%zu)", *gap)); + new_block->length -= free_space; + } + BLOCK_UNLOCK_WR(query_block); + DBUG_PRINT("qcache", ("moved %zu bytes to %p, new gap at %p", + len, new_block, *border)); + break; + } + default: + DBUG_PRINT("error", ("unexpected block type %d, block %p", + (int)block->type, block)); + ok = 0; + } + DBUG_RETURN(ok); +} + + +void Query_cache::relink(Query_cache_block *oblock, + Query_cache_block *nblock, + Query_cache_block *next, Query_cache_block *prev, + Query_cache_block *pnext, Query_cache_block *pprev) +{ + if (prev == oblock) //check pointer to himself + { + nblock->prev = nblock; + nblock->next = nblock; + } + else + { + nblock->prev = prev; + prev->next=nblock; + } + if (next != oblock) + { + nblock->next = next; + next->prev=nblock; + } + nblock->pprev = pprev; // Physical pointer to himself have only 1 free block + nblock->pnext = pnext; + pprev->pnext=nblock; + pnext->pprev=nblock; +} + + +my_bool Query_cache::join_results(size_t join_limit) +{ + my_bool has_moving = 0; + DBUG_ENTER("Query_cache::join_results"); + + if (queries_blocks != 0) + { + DBUG_ASSERT(query_cache_size > 0); + Query_cache_block *block = queries_blocks; + do + { + Query_cache_query *header = block->query(); + if (header->result() != 0 && + header->result()->type == Query_cache_block::RESULT && + header->length() > join_limit) + { + Query_cache_block *new_result_block = + get_free_block(ALIGN_SIZE(header->length()) + + ALIGN_SIZE(sizeof(Query_cache_block)) + + ALIGN_SIZE(sizeof(Query_cache_result)), 1, 0); + if (new_result_block != 0) + { + has_moving = 1; + Query_cache_block *first_result = header->result(); + size_t new_len = (header->length() + + ALIGN_SIZE(sizeof(Query_cache_block)) + + ALIGN_SIZE(sizeof(Query_cache_result))); + if (new_result_block->length > + ALIGN_SIZE(new_len) + min_allocation_unit) + split_block(new_result_block, ALIGN_SIZE(new_len)); + BLOCK_LOCK_WR(block); + header->result(new_result_block); + new_result_block->type = Query_cache_block::RESULT; + new_result_block->n_tables = 0; + new_result_block->used = new_len; + + new_result_block->next = new_result_block->prev = new_result_block; + DBUG_PRINT("qcache", ("new block %zu/%zu (%zu)", + new_result_block->length, + new_result_block->used, + header->length())); + + Query_cache_result *new_result = new_result_block->result(); + new_result->parent(block); + uchar *write_to = (uchar*) new_result->data(); + Query_cache_block *result_block = first_result; + do + { + size_t len = (result_block->used - result_block->headers_len() - + ALIGN_SIZE(sizeof(Query_cache_result))); + DBUG_PRINT("loop", ("add block %zu/%zu (%zu)", + result_block->length, + result_block->used, + len)); + memcpy((char *) write_to, + (char*) result_block->result()->data(), + len); + write_to += len; + Query_cache_block *old_result_block = result_block; + result_block = result_block->next; + free_memory_block(old_result_block); + } while (result_block != first_result); + BLOCK_UNLOCK_WR(block); + } + } + block = block->next; + } while ( block != queries_blocks ); + } + DBUG_RETURN(has_moving); +} + + +uint Query_cache::filename_2_table_key (char *key, const char *path, + uint32 *db_length) +{ + char tablename[FN_REFLEN+2], *filename, *dbname; + DBUG_ENTER("Query_cache::filename_2_table_key"); + + /* Safety if filename didn't have a directory name */ + tablename[0]= FN_LIBCHAR; + tablename[1]= FN_LIBCHAR; + /* Convert filename to this OS's format in tablename */ + fn_format(tablename + 2, path, "", "", MY_REPLACE_EXT); + filename= tablename + dirname_length(tablename + 2) + 2; + /* Find start of databasename */ + for (dbname= filename - 2 ; dbname[-1] != FN_LIBCHAR ; dbname--) ; + *db_length= (uint32)(filename - dbname) - 1; + DBUG_PRINT("qcache", ("table '%-.*s.%s'", *db_length, dbname, filename)); + + DBUG_RETURN((uint) (strmake(strmake(key, dbname, + MY_MIN(*db_length, NAME_LEN)) + 1, + filename, NAME_LEN) - key) + 1); +} + +/**************************************************************************** + Functions to be used when debugging +****************************************************************************/ + +#if defined(DBUG_OFF) && !defined(USE_QUERY_CACHE_INTEGRITY_CHECK) + +void wreck(uint line, const char *message) { query_cache_size = 0; } +void bins_dump() {} +void cache_dump() {} +void queries_dump() {} +void tables_dump() {} +my_bool check_integrity(bool not_locked) { return 0; } +my_bool in_list(Query_cache_block * root, Query_cache_block * point, + const char *name) { return 0;} +my_bool in_blocks(Query_cache_block * point) { return 0; } + +#else + + +/* + Debug method which switch query cache off but left content for + investigation. + + SYNOPSIS + Query_cache::wreck() + line line of the wreck() call + message message for logging +*/ + +void Query_cache::wreck(uint line, const char *message) +{ + THD *thd=current_thd; + DBUG_ENTER("Query_cache::wreck"); + query_cache_size = 0; + if (*message) + DBUG_PRINT("error", (" %s", message)); + DBUG_PRINT("warning", ("==================================")); + DBUG_PRINT("warning", ("%5d QUERY CACHE WRECK => DISABLED",line)); + DBUG_PRINT("warning", ("==================================")); + if (thd) + thd->set_killed(KILL_CONNECTION); + cache_dump(); + /* check_integrity(0); */ /* Can't call it here because of locks */ + bins_dump(); + DBUG_VOID_RETURN; +} + + +void Query_cache::bins_dump() +{ + uint i; + + if (!initialized || query_cache_size == 0) + { + DBUG_PRINT("qcache", ("Query Cache not initialized")); + return; + } + + DBUG_PRINT("qcache", ("mem_bin_num=%zu, mem_bin_steps=%zu", + mem_bin_num, mem_bin_steps)); + DBUG_PRINT("qcache", ("-------------------------")); + DBUG_PRINT("qcache", (" size idx step")); + DBUG_PRINT("qcache", ("-------------------------")); + for (i=0; i < mem_bin_steps; i++) + { + DBUG_PRINT("qcache", ("%10zu %3zd %10zu", steps[i].size, steps[i].idx, + steps[i].increment)); + } + DBUG_PRINT("qcache", ("-------------------------")); + DBUG_PRINT("qcache", (" size num")); + DBUG_PRINT("qcache", ("-------------------------")); + for (i=0; i < mem_bin_num; i++) + { + DBUG_PRINT("qcache", ("%10zu %3d %p", bins[i].size, bins[i].number, + &(bins[i]))); + if (bins[i].free_blocks) + { + Query_cache_block *block = bins[i].free_blocks; + do{ + DBUG_PRINT("qcache", ("\\-- %zu %p %p %p %p %p", + block->length,block, + block->next,block->prev, + block->pnext,block->pprev)); + block = block->next; + } while ( block != bins[i].free_blocks ); + } + } + DBUG_PRINT("qcache", ("-------------------------")); +} + + +void Query_cache::cache_dump() +{ + if (!initialized || query_cache_size == 0) + { + DBUG_PRINT("qcache", ("Query Cache not initialized")); + return; + } + + DBUG_PRINT("qcache", ("-------------------------------------")); + DBUG_PRINT("qcache", (" length used t nt")); + DBUG_PRINT("qcache", ("-------------------------------------")); + Query_cache_block *i = first_block; + do + { + DBUG_PRINT("qcache", + ("%10zu %10zu %1d %2d %p %p %p %p %p", + i->length, i->used, (int)i->type, + i->n_tables,i, + i->next,i->prev,i->pnext, + i->pprev)); + i = i->pnext; + } while ( i != first_block ); + DBUG_PRINT("qcache", ("-------------------------------------")); +} + + +void Query_cache::queries_dump() +{ +#ifdef DBUG_TRACE + if (!initialized) + { + DBUG_PRINT("qcache", ("Query Cache not initialized")); + return; + } + + DBUG_PRINT("qcache", ("------------------")); + DBUG_PRINT("qcache", (" QUERIES")); + DBUG_PRINT("qcache", ("------------------")); + if (queries_blocks != 0) + { + Query_cache_block *block = queries_blocks; + do + { + size_t len; + char *str = (char*) query_cache_query_get_key((uchar*) block, &len, 0); + len-= QUERY_CACHE_FLAGS_SIZE; // Point at flags + Query_cache_query_flags flags; + memcpy(&flags, str+len, QUERY_CACHE_FLAGS_SIZE); + str[len]= 0; // make zero ending DB name + DBUG_PRINT("qcache", ("F: %u C: %u L: %llu T: '%s' (%zu) '%s' '%s'", + flags.client_long_flag, + flags.character_set_client_num, + flags.limit, + flags.time_zone->get_name()->ptr(), + len, str, strend(str)+1)); + DBUG_PRINT("qcache", ("-b- %p %p %p %p %p", block, + block->next, block->prev, + block->pnext,block->pprev)); + memcpy(str + len, &flags, QUERY_CACHE_FLAGS_SIZE); // restore flags + for (TABLE_COUNTER_TYPE t= 0; t < block->n_tables; t++) + { + Query_cache_table *table= block->table(t)->parent; + DBUG_PRINT("qcache", ("-t- '%s' '%s'", table->db(), table->table())); + } + Query_cache_query *header = block->query(); + if (header->result()) + { + Query_cache_block *result_block = header->result(); + Query_cache_block *result_beg = result_block; + do + { + DBUG_PRINT("qcache", ("-r- %u %zu/%zu %p %p %p %p %p", + (uint) result_block->type, + result_block->length, result_block->used, + result_block, + result_block->next, + result_block->prev, + result_block->pnext, + result_block->pprev)); + result_block = result_block->next; + } while ( result_block != result_beg ); + } + } while ((block=block->next) != queries_blocks); + } + else + { + DBUG_PRINT("qcache", ("no queries in list")); + } + DBUG_PRINT("qcache", ("------------------")); +#endif +} + + +void Query_cache::tables_dump() +{ +#ifdef DBUG_TRACE + if (!initialized || query_cache_size == 0) + { + DBUG_PRINT("qcache", ("Query Cache not initialized")); + return; + } + + DBUG_PRINT("qcache", ("--------------------")); + DBUG_PRINT("qcache", ("TABLES")); + DBUG_PRINT("qcache", ("--------------------")); + if (tables_blocks != 0) + { + Query_cache_block *table_block = tables_blocks; + do + { + Query_cache_table *table = table_block->table(); + DBUG_PRINT("qcache", ("'%s' '%s'", table->db(), table->table())); + table_block = table_block->next; + } while (table_block != tables_blocks); + } + else + DBUG_PRINT("qcache", ("no tables in list")); + DBUG_PRINT("qcache", ("--------------------")); +#endif +} + + +/** + Checks integrity of the various linked lists + + @return Error status code + @retval FALSE Query cache is operational. + @retval TRUE Query cache is broken. +*/ + +my_bool Query_cache::check_integrity(bool locked) +{ + my_bool result = 0; + uint i; + DBUG_ENTER("check_integrity"); + + if (!locked) + lock_and_suspend(); + + if (my_hash_check(&queries)) + { + DBUG_PRINT("error", ("queries hash is damaged")); + result = 1; + } + + if (my_hash_check(&tables)) + { + DBUG_PRINT("error", ("tables hash is damaged")); + result = 1; + } + + DBUG_PRINT("qcache", ("physical address check ...")); + size_t free=0, used=0; + Query_cache_block * block = first_block; + do + { + /* When checking at system start, there is no block. */ + if (!block) + break; + + DBUG_PRINT("qcache", ("block %p, type %u...", + block, (uint) block->type)); + // Check allignment + if ((((size_t)block) % ALIGN_SIZE(1)) != + (((size_t)first_block) % ALIGN_SIZE(1))) + { + DBUG_PRINT("error", + ("block %p do not aligned by %d", block, + (int) ALIGN_SIZE(1))); + result = 1; + } + // Check memory allocation + if (block->pnext == first_block) // Is it last block? + { + if (((uchar*)block) + block->length != + ((uchar*)first_block) + query_cache_size) + { + DBUG_PRINT("error", + ("block %p, type %u, ended at %p, but cache ended at %p", + block, (uint) block->type, + (((uchar*)block) + block->length), + (((uchar*)first_block) + query_cache_size))); + result = 1; + } + } + else + if (((uchar*)block) + block->length != ((uchar*)block->pnext)) + { + DBUG_PRINT("error", + ("block %p, type %u, ended at %p, but next block beginning at %p", + block, (uint) block->type, + (((uchar*)block) + block->length), + ((uchar*)block->pnext))); + } + if (block->type == Query_cache_block::FREE) + free+= block->length; + else + used+= block->length; + switch(block->type) { + case Query_cache_block::FREE: + { + Query_cache_memory_bin *bin = *((Query_cache_memory_bin **) + block->data()); + //is it correct pointer? + if (((uchar*)bin) < ((uchar*)bins) || + ((uchar*)bin) >= ((uchar*)first_block)) + { + DBUG_PRINT("error", + ("free block %p have bin pointer %p beyaond of bins array bounds [%p,%p]", + block, + bin, + bins, + first_block)); + result = 1; + } + else + { + size_t idx = (((uchar*)bin) - ((uchar*)bins)) / + sizeof(Query_cache_memory_bin); + if (in_list(bins[idx].free_blocks, block, "free memory")) + result = 1; + } + break; + } + case Query_cache_block::TABLE: + if (in_list(tables_blocks, block, "tables")) + result = 1; + if (in_table_list(block->table(0), block->table(0), "table list root")) + result = 1; + break; + case Query_cache_block::QUERY: + { + if (in_list(queries_blocks, block, "query")) + result = 1; + for (TABLE_COUNTER_TYPE j=0; j < block->n_tables; j++) + { + Query_cache_block_table *block_table = block->table(j); + Query_cache_block_table *block_table_root = + (Query_cache_block_table *) + (((uchar*)block_table->parent) - + ALIGN_SIZE(sizeof(Query_cache_block_table))); + + if (in_table_list(block_table, block_table_root, "table list")) + result = 1; + } + break; + } + case Query_cache_block::RES_INCOMPLETE: + // This type of block can be not lincked yet (in multithread environment) + break; + case Query_cache_block::RES_BEG: + case Query_cache_block::RES_CONT: + case Query_cache_block::RESULT: + { + Query_cache_block * query_block = block->result()->parent(); + if (((uchar*)query_block) < ((uchar*)first_block) || + ((uchar*)query_block) >= (((uchar*)first_block) + query_cache_size)) + { + DBUG_PRINT("error", + ("result block %p have query block pointer %p beyaond of block pool bounds [%p,%p]", + block, + query_block, + first_block, + (((uchar*)first_block) + query_cache_size))); + result = 1; + } + else + { + BLOCK_LOCK_RD(query_block); + if (in_list(queries_blocks, query_block, "query from results")) + result = 1; + if (in_list(query_block->query()->result(), block, + "results")) + result = 1; + BLOCK_UNLOCK_RD(query_block); + } + break; + } + default: + DBUG_PRINT("error", ("block %p have incorrect type %u", + block, block->type)); + result = 1; + } + + block = block->pnext; + } while (block != first_block); + + if (used + free != query_cache_size) + { + DBUG_PRINT("error", + ("used memory (%zu) + free memory (%zu) != query_cache_size (%zu)", + used, free, query_cache_size)); + result = 1; + } + + if (free != free_memory) + { + DBUG_PRINT("error", + ("free memory (%zu) != free_memory (%zu)", + free, free_memory)); + result = 1; + } + + DBUG_PRINT("qcache", ("check queries ...")); + if ((block = queries_blocks)) + { + do + { + DBUG_PRINT("qcache", ("block %p, type %u...", + block, (uint) block->type)); + size_t length; + uchar *key = query_cache_query_get_key((uchar*) block, &length, 0); + uchar* val = my_hash_search(&queries, key, length); + if (((uchar*)block) != val) + { + DBUG_PRINT("error", ("block %p found in queries hash like %p", + block, val)); + } + if (in_blocks(block)) + result = 1; + Query_cache_block * results = block->query()->result(); + if (results) + { + Query_cache_block * result_block = results; + do + { + DBUG_PRINT("qcache", ("block %p, type %u...", + block, (uint) block->type)); + if (in_blocks(result_block)) + result = 1; + + result_block = result_block->next; + } while (result_block != results); + } + block = block->next; + } while (block != queries_blocks); + } + + DBUG_PRINT("qcache", ("check tables ...")); + if ((block = tables_blocks)) + { + do + { + DBUG_PRINT("qcache", ("block %p, type %u...", + block, (uint) block->type)); + size_t length; + uchar *key = query_cache_table_get_key((uchar*) block, &length, 0); + uchar* val = my_hash_search(&tables, key, length); + if (((uchar*)block) != val) + { + DBUG_PRINT("error", ("block %p found in tables hash like %p", + block, val)); + } + + if (in_blocks(block)) + result = 1; + block=block->next; + } while (block != tables_blocks); + } + + DBUG_PRINT("qcache", ("check free blocks")); + for (i = 0; i < mem_bin_num; i++) + { + if ((block = bins[i].free_blocks)) + { + uint count = 0; + do + { + DBUG_PRINT("qcache", ("block %p, type %u...", + block, (uint) block->type)); + if (in_blocks(block)) + result = 1; + + count++; + block=block->next; + } while (block != bins[i].free_blocks); + if (count != bins[i].number) + { + DBUG_PRINT("error", ("bins[%d].number= %d, but bin have %d blocks", + i, bins[i].number, count)); + result = 1; + } + } + } + DBUG_ASSERT(result == 0); + if (!locked) + unlock(); + DBUG_RETURN(result); +} + + +my_bool Query_cache::in_blocks(Query_cache_block * point) +{ + my_bool result = 0; + Query_cache_block *block = point; + //back + do + { + if (block->pprev->pnext != block) + { + DBUG_PRINT("error", + ("block %p in physical list is incorrect linked, prev block %p referred as next to %p (check from %p)", + block, block->pprev, + block->pprev->pnext, + point)); + //back trace + for (; block != point; block = block->pnext) + DBUG_PRINT("error", ("back trace %p", block)); + result = 1; + goto err1; + } + block = block->pprev; + } while (block != first_block && block != point); + if (block != first_block) + { + DBUG_PRINT("error", + ("block %p (%p<-->%p) not owned by pysical list", + block, block->pprev, block->pnext)); + return 1; + } + +err1: + //forward + block = point; + do + { + if (block->pnext->pprev != block) + { + DBUG_PRINT("error", + ("block %p in physicel list is incorrect linked, next block %p referred as prev to %p (check from %p)", + block, block->pnext, + block->pnext->pprev, + point)); + //back trace + for (; block != point; block = block->pprev) + DBUG_PRINT("error", ("back trace %p", block)); + result = 1; + goto err2; + } + block = block->pnext; + } while (block != first_block); +err2: + return result; +} + + +my_bool Query_cache::in_list(Query_cache_block * root, + Query_cache_block * point, + const char *name) +{ + my_bool result = 0; + Query_cache_block *block = point; + //back + do + { + if (block->prev->next != block) + { + DBUG_PRINT("error", + ("block %p in list '%s' %p is incorrect linked, prev block %p referred as next to %p (check from %p)", + block, name, root, block->prev, + block->prev->next, + point)); + //back trace + for (; block != point; block = block->next) + DBUG_PRINT("error", ("back trace %p", block)); + result = 1; + goto err1; + } + block = block->prev; + } while (block != root && block != point); + if (block != root) + { + DBUG_PRINT("error", + ("block %p (%p<-->%p) not owned by list '%s' %p", + block, + block->prev, block->next, + name, root)); + return 1; + } +err1: + // forward + block = point; + do + { + if (block->next->prev != block) + { + DBUG_PRINT("error", + ("block %p in list '%s' %p is incorrect linked, next block %p referred as prev to %p (check from %p)", + block, name, root, block->next, + block->next->prev, + point)); + //back trace + for (; block != point; block = block->prev) + DBUG_PRINT("error", ("back trace %p", block)); + result = 1; + goto err2; + } + block = block->next; + } while (block != root); +err2: + return result; +} + +void dump_node(Query_cache_block_table * node, + const char * call, const char * descr) +{ + DBUG_PRINT("qcache", ("%s: %s: node: %p", call, descr, node)); + DBUG_PRINT("qcache", ("%s: %s: node block: %p", + call, descr, node->block())); + DBUG_PRINT("qcache", ("%s: %s: next: %p", call, descr, + node->next)); + DBUG_PRINT("qcache", ("%s: %s: prev: %p", call, descr, + node->prev)); +} + +my_bool Query_cache::in_table_list(Query_cache_block_table * root, + Query_cache_block_table * point, + const char *name) +{ + my_bool result = 0; + Query_cache_block_table *table = point; + dump_node(root, name, "parameter root"); + //back + do + { + dump_node(table, name, "list element << "); + if (table->prev->next != table) + { + DBUG_PRINT("error", + ("table %p(%p) in list '%s' %p(%p) is incorrect linked, prev table %p(%p) referred as next to %p(%p) (check from %p(%p))", + table, table->block(), name, + root, root->block(), + table->prev, table->prev->block(), + table->prev->next, + table->prev->next->block(), + point, point->block())); + //back trace + for (; table != point; table = table->next) + DBUG_PRINT("error", ("back trace %p(%p)", + table, table->block())); + result = 1; + goto err1; + } + table = table->prev; + } while (table != root && table != point); + if (table != root) + { + DBUG_PRINT("error", + ("table %p(%p) (%p(%p)<-->%p(%p)) not owned by list '%s' %p(%p)", + table, table->block(), + table->prev, table->prev->block(), + table->next, table->next->block(), + name, root, root->block())); + return 1; + } +err1: + // forward + table = point; + do + { + dump_node(table, name, "list element >> "); + if (table->next->prev != table) + { + DBUG_PRINT("error", + ("table %p(%p) in list '%s' %p(%p) is incorrect linked, next table %p(%p) referred as prev to %p(%p) (check from %p(%p))", + table, table->block(), + name, root, root->block(), + table->next, table->next->block(), + table->next->prev, + table->next->prev->block(), + point, point->block())); + //back trace + for (; table != point; table = table->prev) + DBUG_PRINT("error", ("back trace %p(%p)", + table, table->block())); + result = 1; + goto err2; + } + table = table->next; + } while (table != root); +err2: + return result; +} + +#endif /* DBUG_OFF */ + +#endif /*HAVE_QUERY_CACHE*/ + diff --git a/sql/sql_cache.h b/sql/sql_cache.h new file mode 100644 index 00000000..a0203476 --- /dev/null +++ b/sql/sql_cache.h @@ -0,0 +1,612 @@ +/* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef _SQL_CACHE_H +#define _SQL_CACHE_H + +#include "hash.h" +#include "my_base.h" /* ha_rows */ + +class MY_LOCALE; +struct TABLE_LIST; +class Time_zone; +struct LEX; +struct TABLE; +typedef struct st_changed_table_list CHANGED_TABLE_LIST; + +/* Query cache */ + +/* + Can't create new free memory block if unused memory in block less + then QUERY_CACHE_MIN_ALLOCATION_UNIT. + if QUERY_CACHE_MIN_ALLOCATION_UNIT == 0 then + QUERY_CACHE_MIN_ALLOCATION_UNIT choosed automatically +*/ +#define QUERY_CACHE_MIN_ALLOCATION_UNIT 512 + +/* inittial size of hashes */ +#define QUERY_CACHE_DEF_QUERY_HASH_SIZE 1024 +#define QUERY_CACHE_DEF_TABLE_HASH_SIZE 1024 + +/* minimal result data size when data allocated */ +#define QUERY_CACHE_MIN_RESULT_DATA_SIZE (1024*4) + +/* + start estimation of first result block size only when number of queries + bigger then: +*/ +#define QUERY_CACHE_MIN_ESTIMATED_QUERIES_NUMBER 3 + + + +/* memory bins size spacing (see at Query_cache::init_cache (sql_cache.cc)) */ +#define QUERY_CACHE_MEM_BIN_FIRST_STEP_PWR2 4 +#define QUERY_CACHE_MEM_BIN_STEP_PWR2 2 +#define QUERY_CACHE_MEM_BIN_PARTS_INC 1 +#define QUERY_CACHE_MEM_BIN_PARTS_MUL 1.2 +#define QUERY_CACHE_MEM_BIN_SPC_LIM_PWR2 3 + +/* how many free blocks check when finding most suitable before other 'end' + of list of free blocks */ +#define QUERY_CACHE_MEM_BIN_TRY 5 + +/* packing parameters */ +#define QUERY_CACHE_PACK_ITERATION 2 +#define QUERY_CACHE_PACK_LIMIT (512*1024L) + +#define TABLE_COUNTER_TYPE uint + +struct Query_cache_block; +struct Query_cache_block_table; +struct Query_cache_table; +struct Query_cache_query; +struct Query_cache_result; +class Query_cache; +struct Query_cache_tls; +struct LEX; +class THD; + +typedef my_bool (*qc_engine_callback)(THD *thd, const char *table_key, + uint key_length, + ulonglong *engine_data); + +/** + This class represents a node in the linked chain of queries + belonging to one table. + + @note The root of this linked list is not a query-type block, but the table- + type block which all queries has in common. +*/ +struct Query_cache_block_table +{ + Query_cache_block_table() = default; /* Remove gcc warning */ + + /** + This node holds a position in a static table list belonging + to the associated query (base 0). + */ + TABLE_COUNTER_TYPE n; + + /** + Pointers to the next and previous node, linking all queries with + a common table. + */ + Query_cache_block_table *next, *prev; + + /** + A pointer to the table-type block which all + linked queries has in common. + */ + Query_cache_table *parent; + + /** + A method to calculate the address of the query cache block + owning this node. The purpose of this calculation is to + make it easier to move the query cache block without having + to modify all the pointer addresses. + */ + inline Query_cache_block *block(); +}; + +struct Query_cache_block +{ + Query_cache_block() = default; /* Remove gcc warning */ + enum block_type {FREE, QUERY, RESULT, RES_CONT, RES_BEG, + RES_INCOMPLETE, TABLE, INCOMPLETE}; + + size_t length; // length of all block + size_t used; // length of data + /* + Not used **pprev, **prev because really needed access to pervious block: + *pprev to join free blocks + *prev to access to opposite side of list in cyclic sorted list + */ + Query_cache_block *pnext,*pprev, // physical next/previous block + *next,*prev; // logical next/previous block + block_type type; + TABLE_COUNTER_TYPE n_tables; // number of tables in query + + inline bool is_free(void) { return type == FREE; } + void init(size_t length); + void destroy(); + uint headers_len(); + uchar* data(void); + Query_cache_query *query(); + Query_cache_table *table(); + Query_cache_result *result(); + Query_cache_block_table *table(TABLE_COUNTER_TYPE n); +}; + +struct Query_cache_query +{ + ulonglong limit_found_rows; + mysql_rwlock_t lock; + Query_cache_block *res; + Query_cache_tls *wri; + size_t len; + unsigned int last_pkt_nr; + uint8 tbls_type; + uint8 ready; + ulonglong hit_count; + + Query_cache_query() = default; /* Remove gcc warning */ + inline void init_n_lock(); + void unlock_n_destroy(); + inline ulonglong found_rows() { return limit_found_rows; } + inline void found_rows(ulonglong rows) { limit_found_rows= rows; } + inline Query_cache_block *result() { return res; } + inline void result(Query_cache_block *p) { res= p; } + inline Query_cache_tls *writer() { return wri; } + inline void writer(Query_cache_tls *p) { wri= p; } + inline uint8 tables_type() { return tbls_type; } + inline void tables_type(uint8 type) { tbls_type= type; } + inline size_t length() { return len; } + inline size_t add(size_t packet_len) { return(len+= packet_len); } + inline void length(size_t length_arg) { len= length_arg; } + inline uchar* query() + { + return (((uchar*)this) + ALIGN_SIZE(sizeof(Query_cache_query))); + } + /** + following used to check if result ready in plugin without + locking rw_lock of the query. + */ + inline void set_results_ready() { ready= 1; } + inline bool is_results_ready() { return ready; } + inline void increment_hits() { hit_count++; } + inline ulonglong hits() { return hit_count; } + void lock_writing(); + void lock_reading(); + bool try_lock_writing(); + void unlock_writing(); + void unlock_reading(); +}; + + +struct Query_cache_table +{ + Query_cache_table() = default; /* Remove gcc warning */ + char *tbl; + uint32 key_len; + uint8 suffix_len; /* For partitioned tables */ + uint8 table_type; + /* unique for every engine reference */ + qc_engine_callback callback_func; + /* data need by some engines */ + ulonglong engine_data_buff; + + /** + The number of queries depending of this table. + */ + int32 m_cached_query_count; + /** + If table included in the table hash to be found by other queries + */ + my_bool hashed; + + inline char *db() { return (char *) data(); } + inline char *table() { return tbl; } + inline void table(char *table_arg) { tbl= table_arg; } + inline uint32 key_length() { return key_len; } + inline void key_length(uint32 len) { key_len= len; } + inline uint8 suffix_length() { return suffix_len; } + inline void suffix_length(uint8 len) { suffix_len= len; } + inline uint8 type() { return table_type; } + inline void type(uint8 t) { table_type= t; } + inline qc_engine_callback callback() { return callback_func; } + inline void callback(qc_engine_callback fn){ callback_func= fn; } + inline ulonglong engine_data() { return engine_data_buff; } + inline void engine_data(ulonglong data_arg){ engine_data_buff= data_arg; } + inline my_bool is_hashed() { return hashed; } + inline void set_hashed(my_bool hash) { hashed= hash; } + inline uchar* data() + { + return (uchar*)(((uchar*)this)+ + ALIGN_SIZE(sizeof(Query_cache_table))); + } +}; + +struct Query_cache_result +{ + Query_cache_result() = default; /* Remove gcc warning */ + Query_cache_block *query; + + inline uchar* data() + { + return (uchar*)(((uchar*) this)+ + ALIGN_SIZE(sizeof(Query_cache_result))); + } + /* data_continue (if not whole packet contained by this block) */ + inline Query_cache_block *parent() { return query; } + inline void parent (Query_cache_block *p) { query=p; } +}; + + +extern "C" +{ + uchar *query_cache_query_get_key(const uchar *record, size_t *length, + my_bool not_used); + uchar *query_cache_table_get_key(const uchar *record, size_t *length, + my_bool not_used); +} +extern "C" void query_cache_invalidate_by_MyISAM_filename(const char* filename); + + +struct Query_cache_memory_bin +{ + Query_cache_memory_bin() = default; /* Remove gcc warning */ +#ifndef DBUG_OFF + size_t size; +#endif + uint number; + Query_cache_block *free_blocks; + + inline void init(size_t size_arg) + { +#ifndef DBUG_OFF + size = size_arg; +#endif + number = 0; + free_blocks = 0; + } +}; + +struct Query_cache_memory_bin_step +{ + Query_cache_memory_bin_step() = default; /* Remove gcc warning */ + size_t size; + size_t increment; + size_t idx; + inline void init(size_t size_arg, size_t idx_arg, size_t increment_arg) + { + size = size_arg; + idx = idx_arg; + increment = increment_arg; + } +}; + +class Query_cache +{ +public: + /* Info */ + size_t query_cache_size, query_cache_limit; + /* statistics */ + size_t free_memory, queries_in_cache, hits, inserts, refused, + free_memory_blocks, total_blocks, lowmem_prunes; + + +private: +#ifndef DBUG_OFF + my_thread_id m_cache_lock_thread_id; +#endif + mysql_cond_t COND_cache_status_changed; + uint m_requests_in_progress; + enum Cache_lock_status { UNLOCKED, LOCKED_NO_WAIT, LOCKED }; + Cache_lock_status m_cache_lock_status; + enum Cache_staus {OK, DISABLE_REQUEST, DISABLED}; + Cache_staus m_cache_status; + + void free_query_internal(Query_cache_block *point); + void invalidate_table_internal(uchar *key, size_t key_length); + +protected: + /* + The following mutex is locked when searching or changing global + query, tables lists or hashes. When we are operating inside the + query structure we locked an internal query block mutex. + LOCK SEQUENCE (to prevent deadlocks): + 1. structure_guard_mutex + 2. query block (for operation inside query (query block/results)) + + Thread doing cache flush releases the mutex once it sets + m_cache_lock_status flag, so other threads may bypass the cache as + if it is disabled, not waiting for reset to finish. The exception + is other threads that were going to do cache flush---they'll wait + till the end of a flush operation. + */ + mysql_mutex_t structure_guard_mutex; + size_t additional_data_size; + uchar *cache; // cache memory + Query_cache_block *first_block; // physical location block list + Query_cache_block *queries_blocks; // query list (LIFO) + Query_cache_block *tables_blocks; + + Query_cache_memory_bin *bins; // free block lists + Query_cache_memory_bin_step *steps; // bins spacing info + HASH queries, tables; + /* options */ + size_t min_allocation_unit, min_result_data_size; + uint def_query_hash_size, def_table_hash_size; + + size_t mem_bin_num, mem_bin_steps; // See at init_cache & find_bin + + bool initialized; + + /* Exclude/include from cyclic double linked list */ + static void double_linked_list_exclude(Query_cache_block *point, + Query_cache_block **list_pointer); + static void double_linked_list_simple_include(Query_cache_block *point, + Query_cache_block ** + list_pointer); + static void double_linked_list_join(Query_cache_block *head_tail, + Query_cache_block *tail_head); + + /* The following functions require that structure_guard_mutex is locked */ + void flush_cache(); + my_bool free_old_query(); + void free_query(Query_cache_block *point); + my_bool allocate_data_chain(Query_cache_block **result_block, + size_t data_len, + Query_cache_block *query_block, + my_bool first_block); + void invalidate_table(THD *thd, TABLE_LIST *table); + void invalidate_table(THD *thd, TABLE *table); + void invalidate_table(THD *thd, uchar *key, size_t key_length); + void invalidate_table(THD *thd, Query_cache_block *table_block); + void invalidate_query_block_list(Query_cache_block_table *list_root); + + TABLE_COUNTER_TYPE + register_tables_from_list(THD *thd, TABLE_LIST *tables_used, + TABLE_COUNTER_TYPE counter, + Query_cache_block_table **block_table); + my_bool register_all_tables(THD *thd, Query_cache_block *block, + TABLE_LIST *tables_used, + TABLE_COUNTER_TYPE tables); + void unlink_table(Query_cache_block_table *node); + Query_cache_block *get_free_block (size_t len, my_bool not_less, + size_t min); + void free_memory_block(Query_cache_block *point); + void split_block(Query_cache_block *block, size_t len); + Query_cache_block *join_free_blocks(Query_cache_block *first_block, + Query_cache_block *block_in_list); + my_bool append_next_free_block(Query_cache_block *block, + size_t add_size); + void exclude_from_free_memory_list(Query_cache_block *free_block); + void insert_into_free_memory_list(Query_cache_block *new_block); + my_bool move_by_type(uchar **border, Query_cache_block **before, + size_t *gap, Query_cache_block *i); + uint find_bin(size_t size); + void move_to_query_list_end(Query_cache_block *block); + void insert_into_free_memory_sorted_list(Query_cache_block *new_block, + Query_cache_block **list); + void pack_cache(); + void relink(Query_cache_block *oblock, + Query_cache_block *nblock, + Query_cache_block *next, + Query_cache_block *prev, + Query_cache_block *pnext, + Query_cache_block *pprev); + my_bool join_results(size_t join_limit); + + /* + Following function control structure_guard_mutex + by themself or don't need structure_guard_mutex + */ + size_t init_cache(); + void make_disabled(); + void free_cache(); + Query_cache_block *write_block_data(size_t data_len, uchar* data, + size_t header_len, + Query_cache_block::block_type type, + TABLE_COUNTER_TYPE ntab = 0); + my_bool append_result_data(Query_cache_block **result, + size_t data_len, uchar* data, + Query_cache_block *parent); + my_bool write_result_data(Query_cache_block **result, + size_t data_len, uchar* data, + Query_cache_block *parent, + Query_cache_block::block_type + type=Query_cache_block::RESULT); + inline size_t get_min_first_result_data_size(); + inline size_t get_min_append_result_data_size(); + Query_cache_block *allocate_block(size_t len, my_bool not_less, + size_t min); + /* + If query is cacheable return number tables in query + (query without tables not cached) + */ + TABLE_COUNTER_TYPE is_cacheable(THD *thd, + LEX *lex, TABLE_LIST *tables_used, + uint8 *tables_type); + TABLE_COUNTER_TYPE process_and_count_tables(THD *thd, + TABLE_LIST *tables_used, + uint8 *tables_type); + + static my_bool ask_handler_allowance(THD *thd, TABLE_LIST *tables_used); + public: + + Query_cache(size_t query_cache_limit = ULONG_MAX, + size_t min_allocation_unit = QUERY_CACHE_MIN_ALLOCATION_UNIT, + size_t min_result_data_size = QUERY_CACHE_MIN_RESULT_DATA_SIZE, + uint def_query_hash_size = QUERY_CACHE_DEF_QUERY_HASH_SIZE, + uint def_table_hash_size = QUERY_CACHE_DEF_TABLE_HASH_SIZE); + + inline bool is_disabled(void) { return m_cache_status != OK; } + inline bool is_disable_in_progress(void) + { return m_cache_status == DISABLE_REQUEST; } + + /* initialize cache (mutex) */ + void init(); + /* resize query cache (return real query size, 0 if disabled) */ + size_t resize(size_t query_cache_size); + /* set limit on result size */ + inline void result_size_limit(size_t limit){query_cache_limit=limit;} + /* set minimal result data allocation unit size */ + size_t set_min_res_unit(size_t size); + + /* register query in cache */ + void store_query(THD *thd, TABLE_LIST *used_tables); + + /* + Check if the query is in the cache and if this is true send the + data to client. + */ + int send_result_to_client(THD *thd, char *query, uint query_length); + + /* Remove all queries that uses any of the listed following tables */ + void invalidate(THD *thd, TABLE_LIST *tables_used, + my_bool using_transactions); + void invalidate(THD *thd, CHANGED_TABLE_LIST *tables_used); + void invalidate_locked_for_write(THD *thd, TABLE_LIST *tables_used); + void invalidate(THD *thd, TABLE *table, my_bool using_transactions); + void invalidate(THD *thd, const char *key, size_t key_length, + my_bool using_transactions); + + /* Remove all queries that uses any of the tables in following database */ + void invalidate(THD *thd, const char *db); + + /* Remove all queries that uses any of the listed following table */ + void invalidate_by_MyISAM_filename(const char *filename); + + void flush(); + void pack(THD *thd, + size_t join_limit = QUERY_CACHE_PACK_LIMIT, + uint iteration_limit = QUERY_CACHE_PACK_ITERATION); + + void destroy(); + + void insert(THD *thd, Query_cache_tls *query_cache_tls, + const char *packet, + size_t length, + unsigned pkt_nr); + my_bool insert_table(THD *thd, size_t key_len, const char *key, + Query_cache_block_table *node, + size_t db_length, uint8 suffix_length_arg, + uint8 cache_type, + qc_engine_callback callback, + ulonglong engine_data, + my_bool hash); + + void end_of_result(THD *thd); + void abort(THD *thd, Query_cache_tls *query_cache_tls); + + /* + The following functions are only used when debugging + We don't protect these with ifndef DBUG_OFF to not have to recompile + everything if we want to add checks of the cache at some places. + */ + void wreck(uint line, const char *message); + void bins_dump(); + void cache_dump(); + void queries_dump(); + void tables_dump(); + my_bool check_integrity(bool not_locked); + my_bool in_list(Query_cache_block * root, Query_cache_block * point, + const char *name); + my_bool in_table_list(Query_cache_block_table * root, + Query_cache_block_table * point, + const char *name); + my_bool in_blocks(Query_cache_block * point); + + /* Table key generation */ + static uint filename_2_table_key (char *key, const char *filename, + uint32 *db_langth); + + enum Cache_try_lock_mode {WAIT, TIMEOUT, TRY}; + bool try_lock(THD *thd, Cache_try_lock_mode mode= WAIT); + void lock(THD *thd); + void lock_and_suspend(void); + void unlock(void); + + void disable_query_cache(THD *thd); +}; + +#ifdef HAVE_QUERY_CACHE +struct Query_cache_query_flags +{ + unsigned int client_long_flag:1; + unsigned int client_protocol_41:1; + unsigned int client_extended_metadata:1; + unsigned int client_depr_eof:1; + unsigned int protocol_type:2; + unsigned int more_results_exists:1; + unsigned int in_trans:1; + unsigned int autocommit:1; + unsigned int pkt_nr; + uint character_set_client_num; + uint character_set_results_num; + uint collation_connection_num; + uint group_concat_max_len; + ha_rows limit; + Time_zone *time_zone; + sql_mode_t sql_mode; + ulonglong max_sort_length; + size_t default_week_format; + size_t div_precision_increment; + MY_LOCALE *lc_time_names; +}; +#define QUERY_CACHE_FLAGS_SIZE sizeof(Query_cache_query_flags) +#define QUERY_CACHE_DB_LENGTH_SIZE 2 +#include "sql_cache.h" +#define query_cache_abort(A,B) query_cache.abort(A,B) +#define query_cache_end_of_result(A) query_cache.end_of_result(A) +#define query_cache_store_query(A, B) query_cache.store_query(A, B) +#define query_cache_destroy() query_cache.destroy() +#define query_cache_result_size_limit(A) query_cache.result_size_limit(A) +#define query_cache_init() query_cache.init() +#define query_cache_resize(A) query_cache.resize(A) +#define query_cache_set_min_res_unit(A) query_cache.set_min_res_unit(A) +#define query_cache_invalidate3(A, B, C) query_cache.invalidate(A, B, C) +#define query_cache_invalidate1(A, B) query_cache.invalidate(A, B) +#define query_cache_send_result_to_client(A, B, C) \ + query_cache.send_result_to_client(A, B, C) +#define query_cache_invalidate_by_MyISAM_filename_ref \ + &query_cache_invalidate_by_MyISAM_filename +/* note the "maybe": it's a read without mutex */ +#define query_cache_maybe_disabled(T) \ + (T->variables.query_cache_type == 0 || query_cache.query_cache_size == 0) +#define query_cache_is_cacheable_query(L) \ + (((L)->sql_command == SQLCOM_SELECT) && (L)->safe_to_cache_query) +#else +#define QUERY_CACHE_FLAGS_SIZE 0 +#define query_cache_store_query(A, B) do { } while(0) +#define query_cache_destroy() do { } while(0) +#define query_cache_result_size_limit(A) do { } while(0) +#define query_cache_init() do { } while(0) +#define query_cache_resize(A) do { } while(0) +#define query_cache_set_min_res_unit(A) do { } while(0) +#define query_cache_invalidate3(A, B, C) do { } while(0) +#define query_cache_invalidate1(A,B) do { } while(0) +#define query_cache_send_result_to_client(A, B, C) 0 +#define query_cache_invalidate_by_MyISAM_filename_ref NULL + +#define query_cache_abort(A,B) do { } while(0) +#define query_cache_end_of_result(A) do { } while(0) +#define query_cache_maybe_disabled(T) 1 +#define query_cache_is_cacheable_query(L) 0 +#endif /*HAVE_QUERY_CACHE*/ + +extern MYSQL_PLUGIN_IMPORT Query_cache query_cache; +#endif diff --git a/sql/sql_callback.h b/sql/sql_callback.h new file mode 100644 index 00000000..1f798ad5 --- /dev/null +++ b/sql/sql_callback.h @@ -0,0 +1,42 @@ +/* + Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#ifndef SQL_CALLBACK_INCLUDED +#define SQL_CALLBACK_INCLUDED + +/** + Macro used for an internal callback. + + The macro will check that the object exists and that the function + is defined. If that is the case, it will call the function with the + given parameters. + + If the object or the function is not defined, the callback will be + considered successful (nothing needed to be done) and will + therefore return no error. + */ + +#define MYSQL_CALLBACK(OBJ, FUNC, PARAMS) \ + do { \ + if ((OBJ) && ((OBJ)->FUNC)) \ + (OBJ)->FUNC PARAMS; \ + } while (0) + +#define MYSQL_CALLBACK_ELSE(OBJ, FUNC, PARAMS, ELSE) \ + (((OBJ) && ((OBJ)->FUNC)) ? (OBJ)->FUNC PARAMS : (ELSE)) + +#endif /* SQL_CALLBACK_INCLUDED */ diff --git a/sql/sql_class.cc b/sql/sql_class.cc new file mode 100644 index 00000000..9da1ec54 --- /dev/null +++ b/sql/sql_class.cc @@ -0,0 +1,8505 @@ +/* + Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + + +/***************************************************************************** +** +** This file implements classes defined in sql_class.h +** Especially the classes to handle a result from a select +** +*****************************************************************************/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" +#include "sql_cache.h" // query_cache_abort +#include "sql_base.h" // close_thread_tables +#include "sql_time.h" // date_time_format_copy +#include "tztime.h" // MYSQL_TIME <-> my_time_t +#include "sql_acl.h" // NO_ACL, + // acl_getroot_no_password +#include "sql_base.h" +#include "sql_handler.h" // mysql_ha_cleanup +#include "rpl_rli.h" +#include "rpl_filter.h" +#include "rpl_record.h" +#include "slave.h" +#include +#include "log_event.h" +#include "sql_audit.h" +#include +#include +#include +#include +#include + +#include "sp_head.h" +#include "sp_rcontext.h" +#include "sp_cache.h" +#include "sql_show.h" // append_identifier +#include "sql_db.h" // get_default_db_collation +#include "transaction.h" +#include "sql_select.h" /* declares create_tmp_table() */ +#include "debug_sync.h" +#include "sql_parse.h" // is_update_query +#include "sql_callback.h" +#include "lock.h" +#include "wsrep_mysqld.h" +#include "sql_connect.h" +#ifdef WITH_WSREP +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#include "wsrep_server_state.h" +#endif /* WITH_WSREP */ +#include "opt_trace.h" +#include + +#ifdef HAVE_SYS_SYSCALL_H +#include +#endif + +/* + The following is used to initialise Table_ident with a internal + table name +*/ +char internal_table_name[2]= "*"; +char empty_c_string[1]= {0}; /* used for not defined db */ + +const char * const THD::DEFAULT_WHERE= "field list"; + +/**************************************************************************** +** User variables +****************************************************************************/ + +extern "C" uchar *get_var_key(user_var_entry *entry, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= entry->name.length; + return (uchar*) entry->name.str; +} + +extern "C" void free_user_var(user_var_entry *entry) +{ + char *pos= (char*) entry+ALIGN_SIZE(sizeof(*entry)); + if (entry->value && entry->value != pos) + my_free(entry->value); + my_free(entry); +} + +/* Functions for last-value-from-sequence hash */ + +extern "C" uchar *get_sequence_last_key(SEQUENCE_LAST_VALUE *entry, + size_t *length, + my_bool not_used + __attribute__((unused))) +{ + *length= entry->length; + return (uchar*) entry->key; +} + +extern "C" void free_sequence_last(SEQUENCE_LAST_VALUE *entry) +{ + delete entry; +} + + +bool Key_part_spec::operator==(const Key_part_spec& other) const +{ + return length == other.length && + !lex_string_cmp(system_charset_info, &field_name, + &other.field_name); +} + + +bool Key_part_spec::check_key_for_blob(const handler *file) const +{ + if (!(file->ha_table_flags() & HA_CAN_INDEX_BLOBS)) + { + my_error(ER_BLOB_USED_AS_KEY, MYF(0), field_name.str, file->table_type()); + return true; + } + return false; +} + + +bool Key_part_spec::check_key_length_for_blob() const +{ + if (!length) + { + my_error(ER_BLOB_KEY_WITHOUT_LENGTH, MYF(0), field_name.str); + return true; + } + return false; +} + + +bool Key_part_spec::init_multiple_key_for_blob(const handler *file) +{ + if (check_key_for_blob(file)) + return true; + if (!length) + length= file->max_key_length() + 1; + return false; +} + + +/** + Construct an (almost) deep copy of this key. Only those + elements that are known to never change are not copied. + If out of memory, a partial copy is returned and an error is set + in THD. +*/ + +Key::Key(const Key &rhs, MEM_ROOT *mem_root) + :DDL_options(rhs),type(rhs.type), + key_create_info(rhs.key_create_info), + columns(rhs.columns, mem_root), + name(rhs.name), + option_list(rhs.option_list), + generated(rhs.generated), invisible(false), + without_overlaps(rhs.without_overlaps), old(rhs.old), period(rhs.period) +{ + list_copy_and_replace_each_value(columns, mem_root); +} + +/** + Construct an (almost) deep copy of this foreign key. Only those + elements that are known to never change are not copied. + If out of memory, a partial copy is returned and an error is set + in THD. +*/ + +Foreign_key::Foreign_key(const Foreign_key &rhs, MEM_ROOT *mem_root) + :Key(rhs,mem_root), + constraint_name(rhs.constraint_name), + ref_db(rhs.ref_db), + ref_table(rhs.ref_table), + ref_columns(rhs.ref_columns,mem_root), + delete_opt(rhs.delete_opt), + update_opt(rhs.update_opt), + match_opt(rhs.match_opt) +{ + list_copy_and_replace_each_value(ref_columns, mem_root); +} + +/* + Test if a foreign key (= generated key) is a prefix of the given key + (ignoring key name, key type and order of columns) + + NOTES: + This is only used to test if an index for a FOREIGN KEY exists + + IMPLEMENTATION + We only compare field names + + RETURN + true Generated key is a prefix of other key + false Not a prefix +*/ + +bool is_foreign_key_prefix(Key *a, Key *b) +{ + /* Ensure that 'a' is the generated key */ + if (a->generated) + { + if (b->generated && a->columns.elements > b->columns.elements) + swap_variables(Key*, a, b); // Put shorter key in 'a' + } + else + { + if (!b->generated) + return false; // No foreign key + swap_variables(Key*, a, b); // Put generated key in 'a' + } + + /* Test if 'a' is a prefix of 'b' */ + if (a->columns.elements > b->columns.elements) + return false; // Can't be prefix + + List_iterator col_it1(a->columns); + List_iterator col_it2(b->columns); + const Key_part_spec *col1, *col2; + +#ifdef ENABLE_WHEN_INNODB_CAN_HANDLE_SWAPED_FOREIGN_KEY_COLUMNS + while ((col1= col_it1++)) + { + bool found= 0; + col_it2.rewind(); + while ((col2= col_it2++)) + { + if (*col1 == *col2) + { + found= TRUE; + break; + } + } + if (!found) + return false; // Error + } + return true; // Is prefix +#else + while ((col1= col_it1++)) + { + col2= col_it2++; + if (!(*col1 == *col2)) + return false; + } + return true; // Is prefix +#endif +} + +/* + @brief + Check if the foreign key options are compatible with the specification + of the columns on which the key is created + + @retval + FALSE The foreign key options are compatible with key columns + @retval + TRUE Otherwise +*/ +bool Foreign_key::validate(List &table_fields) +{ + Create_field *sql_field; + Key_part_spec *column; + List_iterator cols(columns); + List_iterator it(table_fields); + DBUG_ENTER("Foreign_key::validate"); + if (old) + DBUG_RETURN(FALSE); // must be good + while ((column= cols++)) + { + it.rewind(); + while ((sql_field= it++) && + lex_string_cmp(system_charset_info, + &column->field_name, + &sql_field->field_name)) {} + if (!sql_field) + { + my_error(ER_KEY_COLUMN_DOES_NOT_EXIST, MYF(0), column->field_name.str); + DBUG_RETURN(TRUE); + } + if (type == Key::FOREIGN_KEY && sql_field->vcol_info) + { + if (delete_opt == FK_OPTION_SET_NULL) + { + my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), + "ON DELETE SET NULL"); + DBUG_RETURN(TRUE); + } + if (update_opt == FK_OPTION_SET_NULL) + { + my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), + "ON UPDATE SET NULL"); + DBUG_RETURN(TRUE); + } + if (update_opt == FK_OPTION_CASCADE) + { + my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), + "ON UPDATE CASCADE"); + DBUG_RETURN(TRUE); + } + } + } + DBUG_RETURN(FALSE); +} + +/**************************************************************************** +** Thread specific functions +****************************************************************************/ + +extern "C" unsigned long long thd_query_id(const MYSQL_THD thd) +{ + return((unsigned long long)thd->query_id); +} + + +/** + Get thread attributes for connection threads + + @retval Reference to thread attribute for connection threads +*/ +pthread_attr_t *get_connection_attrib(void) +{ + return &connection_attrib; +} + +/** + Get max number of connections + + @retval Max number of connections for MySQL Server +*/ +ulong get_max_connections(void) +{ + return max_connections; +} + +/* + The following functions form part of the C plugin API +*/ + +extern "C" int mysql_tmpfile(const char *prefix) +{ + char filename[FN_REFLEN]; + File fd= create_temp_file(filename, mysql_tmpdir, prefix, + O_BINARY | O_SEQUENTIAL, + MYF(MY_WME | MY_TEMPORARY)); + return fd; +} + + +extern "C" +int thd_in_lock_tables(const THD *thd) +{ + return MY_TEST(thd->in_lock_tables); +} + + +extern "C" +int thd_tablespace_op(const THD *thd) +{ + return MY_TEST(thd->tablespace_op); +} + +extern "C" +const char *set_thd_proc_info(THD *thd_arg, const char *info, + const char *calling_function, + const char *calling_file, + const unsigned int calling_line) +{ + PSI_stage_info old_stage; + PSI_stage_info new_stage; + + new_stage.m_key= 0; + new_stage.m_name= info; + + set_thd_stage_info(thd_arg, & new_stage, & old_stage, + calling_function, calling_file, calling_line); + + return old_stage.m_name; +} + +extern "C" +void set_thd_stage_info(void *thd_arg, + const PSI_stage_info *new_stage, + PSI_stage_info *old_stage, + const char *calling_func, + const char *calling_file, + const unsigned int calling_line) +{ + THD *thd= (THD*) thd_arg; + if (thd == NULL) + thd= current_thd; + + if (old_stage) + thd->backup_stage(old_stage); + + if (new_stage) + thd->enter_stage(new_stage, calling_func, calling_file, calling_line); +} + +void thd_enter_cond(MYSQL_THD thd, mysql_cond_t *cond, mysql_mutex_t *mutex, + const PSI_stage_info *stage, PSI_stage_info *old_stage, + const char *src_function, const char *src_file, + int src_line) +{ + if (!thd) + thd= current_thd; + + return thd->enter_cond(cond, mutex, stage, old_stage, src_function, src_file, + src_line); +} + +void thd_exit_cond(MYSQL_THD thd, const PSI_stage_info *stage, + const char *src_function, const char *src_file, + int src_line) +{ + if (!thd) + thd= current_thd; + + thd->exit_cond(stage, src_function, src_file, src_line); + return; +} + +extern "C" +void thd_storage_lock_wait(THD *thd, long long value) +{ + thd->utime_after_lock+= value; +} + +/** + Provide a handler data getter to simplify coding +*/ +extern "C" +void *thd_get_ha_data(const THD *thd, const struct handlerton *hton) +{ + DBUG_ASSERT(thd == current_thd || mysql_mutex_is_owner(&thd->LOCK_thd_data)); + return thd->ha_data[hton->slot].ha_ptr; +} + + +/** + Provide a handler data setter to simplify coding + @see thd_set_ha_data() definition in plugin.h +*/ +extern "C" +void thd_set_ha_data(THD *thd, const struct handlerton *hton, + const void *ha_data) +{ + plugin_ref *lock= &thd->ha_data[hton->slot].lock; + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->ha_data[hton->slot].ha_ptr= const_cast(ha_data); + mysql_mutex_unlock(&thd->LOCK_thd_data); + if (ha_data && !*lock) + *lock= ha_lock_engine(NULL, (handlerton*) hton); + else if (!ha_data && *lock) + { + plugin_unlock(NULL, *lock); + *lock= NULL; + } +} + + +/** + Allow storage engine to wakeup commits waiting in THD::wait_for_prior_commit. + @see thd_wakeup_subsequent_commits() definition in plugin.h +*/ +extern "C" +void thd_wakeup_subsequent_commits(THD *thd, int wakeup_error) +{ + thd->wakeup_subsequent_commits(wakeup_error); +} + + +extern "C" +long long thd_test_options(const THD *thd, long long test_options) +{ + return thd->variables.option_bits & test_options; +} + +extern "C" +int thd_sql_command(const THD *thd) +{ + return (int) thd->lex->sql_command; +} + +/* + Returns options used with DDL's, like IF EXISTS etc... + Will returns 'nonsense' if the command was not a DDL. +*/ + +extern "C" +struct DDL_options_st *thd_ddl_options(const THD *thd) +{ + return &thd->lex->create_info; +} + + +extern "C" +int thd_tx_isolation(const THD *thd) +{ + return (int) thd->tx_isolation; +} + +extern "C" +int thd_tx_is_read_only(const THD *thd) +{ + return (int) thd->tx_read_only; +} + + +extern "C" +{ /* Functions for thd_error_context_service */ + + const char *thd_get_error_message(const THD *thd) + { + return thd->get_stmt_da()->message(); + } + + uint thd_get_error_number(const THD *thd) + { + return thd->get_stmt_da()->sql_errno(); + } + + ulong thd_get_error_row(const THD *thd) + { + return thd->get_stmt_da()->current_row_for_warning(); + } + + void thd_inc_error_row(THD *thd) + { + thd->get_stmt_da()->inc_current_row_for_warning(); + } +} + + +#if MARIA_PLUGIN_INTERFACE_VERSION < 0x0200 +/** + TODO: This function is for API compatibility, remove it eventually. + All engines should switch to use thd_get_error_context_description() + plugin service function. +*/ +extern "C" +char *thd_security_context(THD *thd, + char *buffer, unsigned int length, + unsigned int max_query_len) +{ + return thd_get_error_context_description(thd, buffer, length, max_query_len); +} +#endif + +/** + Implementation of Drop_table_error_handler::handle_condition(). + The reason in having this implementation is to silence technical low-level + warnings during DROP TABLE operation. Currently we don't want to expose + the following warnings during DROP TABLE: + - Some of table files are missed or invalid (the table is going to be + deleted anyway, so why bother that something was missed); + - A trigger associated with the table does not have DEFINER (One of the + MySQL specifics now is that triggers are loaded for the table being + dropped. So, we may have a warning that trigger does not have DEFINER + attribute during DROP TABLE operation). + + @return TRUE if the condition is handled. +*/ +bool Drop_table_error_handler::handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) +{ + *cond_hdl= NULL; + return ((sql_errno == EE_DELETE && my_errno == ENOENT) || + sql_errno == ER_TRG_NO_DEFINER); +} + + +/** + Handle an error from MDL_context::upgrade_lock() and mysql_lock_tables(). + Ignore ER_LOCK_ABORTED and ER_LOCK_DEADLOCK errors. +*/ + +bool +MDL_deadlock_and_lock_abort_error_handler:: +handle_condition(THD *thd, + uint sql_errno, + const char *sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition **cond_hdl) +{ + *cond_hdl= NULL; + if (sql_errno == ER_LOCK_ABORTED || sql_errno == ER_LOCK_DEADLOCK) + m_need_reopen= true; + + return m_need_reopen; +} + + +/** + Send timeout to thread. + + Note that this is always safe as the thread will always remove it's + timeouts at end of query (and thus before THD is destroyed) +*/ + +extern "C" void thd_kill_timeout(THD* thd) +{ + thd->status_var.max_statement_time_exceeded++; + /* Kill queries that can't cause data corruptions */ + thd->awake(KILL_TIMEOUT); +} + +THD::THD(my_thread_id id, bool is_wsrep_applier) + :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, + /* statement id */ 0), + rli_fake(0), rgi_fake(0), rgi_slave(NULL), + protocol_text(this), protocol_binary(this), initial_status_var(0), + m_current_stage_key(0), m_psi(0), + in_sub_stmt(0), log_all_errors(0), + binlog_unsafe_warning_flags(0), used(0), + current_stmt_binlog_format(BINLOG_FORMAT_MIXED), + bulk_param(0), + table_map_for_update(0), + m_examined_row_count(0), + accessed_rows_and_keys(0), + m_digest(NULL), + m_statement_psi(NULL), + m_transaction_psi(NULL), + m_idle_psi(NULL), + col_access(NO_ACL), + thread_id(id), + thread_dbug_id(id), + os_thread_id(0), + global_disable_checkpoint(0), + current_backup_stage(BACKUP_FINISHED), + failed_com_change_user(0), + is_fatal_error(0), + transaction_rollback_request(0), + is_fatal_sub_stmt_error(false), + in_lock_tables(0), + bootstrap(0), + derived_tables_processing(FALSE), + waiting_on_group_commit(FALSE), has_waiter(FALSE), + spcont(NULL), + m_parser_state(NULL), +#ifndef EMBEDDED_LIBRARY + audit_plugin_version(-1), +#endif +#if defined(ENABLED_DEBUG_SYNC) + debug_sync_control(0), +#endif /* defined(ENABLED_DEBUG_SYNC) */ + wait_for_commit_ptr(0), + m_internal_handler(0), + main_da(0, false, false), + m_stmt_da(&main_da), + tdc_hash_pins(0), + xid_hash_pins(0), + m_tmp_tables_locked(false), + async_state() +#ifdef HAVE_REPLICATION + , + current_linfo(0), + slave_info(0), + is_awaiting_semisync_ack(0) +#endif +#ifdef WITH_WSREP + , + wsrep_applier(is_wsrep_applier), + wsrep_applier_closing(false), + wsrep_client_thread(false), + wsrep_retry_counter(0), + wsrep_PA_safe(true), + wsrep_retry_query(NULL), + wsrep_retry_query_len(0), + wsrep_retry_command(COM_CONNECT), + wsrep_consistency_check(NO_CONSISTENCY_CHECK), + wsrep_mysql_replicated(0), + wsrep_TOI_pre_query(NULL), + wsrep_TOI_pre_query_len(0), + wsrep_po_handle(WSREP_PO_INITIALIZER), + wsrep_po_cnt(0), + wsrep_apply_format(0), + wsrep_rbr_buf(NULL), + wsrep_sync_wait_gtid(WSREP_GTID_UNDEFINED), + wsrep_last_written_gtid_seqno(0), + wsrep_current_gtid_seqno(0), + wsrep_affected_rows(0), + wsrep_has_ignored_error(false), + wsrep_was_on(false), + wsrep_ignore_table(false), + wsrep_aborter(0), + wsrep_delayed_BF_abort(false), + wsrep_ctas(false), + +/* wsrep-lib */ + m_wsrep_next_trx_id(WSREP_UNDEFINED_TRX_ID), + m_wsrep_mutex(&LOCK_thd_data), + m_wsrep_cond(&COND_wsrep_thd), + m_wsrep_client_service(this, m_wsrep_client_state), + m_wsrep_client_state(this, + m_wsrep_mutex, + m_wsrep_cond, + Wsrep_server_state::instance(), + m_wsrep_client_service, + wsrep::client_id(thread_id)), + wsrep_applier_service(NULL), + wsrep_wfc() +#endif /*WITH_WSREP */ +{ + ulong tmp; + bzero(&variables, sizeof(variables)); + + /* + We set THR_THD to temporally point to this THD to register all the + variables that allocates memory for this THD + */ + THD *old_THR_THD= current_thd; + set_current_thd(this); + status_var.local_memory_used= sizeof(THD); + status_var.max_local_memory_used= status_var.local_memory_used; + status_var.global_memory_used= 0; + variables.pseudo_thread_id= thread_id; + variables.max_mem_used= global_system_variables.max_mem_used; + main_da.init(); + + mdl_context.init(this); + mdl_backup_lock= 0; + + /* + Pass nominal parameters to init_alloc_root only to ensure that + the destructor works OK in case of an error. The main_mem_root + will be re-initialized in init_for_queries(). + */ + init_sql_alloc(key_memory_thd_main_mem_root, + &main_mem_root, 64, 0, MYF(MY_THREAD_SPECIFIC)); + + /* + Allocation of user variables for binary logging is always done with main + mem root + */ + user_var_events_alloc= mem_root; + + stmt_arena= this; + thread_stack= 0; + scheduler= thread_scheduler; // Will be fixed later + event_scheduler.data= 0; + skip_wait_timeout= false; + catalog= (char*)"std"; // the only catalog we have for now + main_security_ctx.init(); + security_ctx= &main_security_ctx; + no_errors= 0; + password= 0; + count_cuted_fields= CHECK_FIELD_IGNORE; + killed= NOT_KILLED; + killed_err= 0; + is_slave_error= FALSE; + my_hash_clear(&handler_tables_hash); + my_hash_clear(&ull_hash); + tmp_table=0; + cuted_fields= 0L; + m_sent_row_count= 0L; + limit_found_rows= 0; + m_row_count_func= -1; + statement_id_counter= 0UL; + // Must be reset to handle error with THD's created for init of mysqld + lex->current_select= 0; + start_utime= utime_after_query= 0; + system_time.start.val= system_time.sec= system_time.sec_part= 0; + utime_after_lock= 0L; + progress.arena= 0; + progress.report_to_client= 0; + progress.max_counter= 0; + slave_thread = 0; + connection_name.str= 0; + connection_name.length= 0; + + file_id = 0; + query_id= 0; + query_name_consts= 0; + semisync_info= 0; + db_charset= global_system_variables.collation_database; + bzero((void*) ha_data, sizeof(ha_data)); + mysys_var=0; + binlog_evt_union.do_union= FALSE; + binlog_table_maps= FALSE; + binlog_xid= 0; + enable_slow_log= 0; + durability_property= HA_REGULAR_DURABILITY; + +#ifdef DBUG_ASSERT_EXISTS + dbug_sentry=THD_SENTRY_MAGIC; +#endif + mysql_audit_init_thd(this); + net.vio=0; + net.buff= 0; + net.reading_or_writing= 0; + client_capabilities= 0; // minimalistic client + system_thread= NON_SYSTEM_THREAD; + cleanup_done= free_connection_done= abort_on_warning= got_warning= 0; + peer_port= 0; // For SHOW PROCESSLIST + transaction= &default_transaction; + transaction->m_pending_rows_event= 0; + transaction->on= 1; + wt_thd_lazy_init(&transaction->wt, + &variables.wt_deadlock_search_depth_short, + &variables.wt_timeout_short, + &variables.wt_deadlock_search_depth_long, + &variables.wt_timeout_long); +#ifdef SIGNAL_WITH_VIO_CLOSE + active_vio = 0; +#endif + mysql_mutex_init(key_LOCK_thd_data, &LOCK_thd_data, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0); + mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data); + + /* Variables with default values */ + proc_info="login"; + where= THD::DEFAULT_WHERE; + slave_net = 0; + m_command=COM_CONNECT; + *scramble= '\0'; + +#ifdef WITH_WSREP + mysql_cond_init(key_COND_wsrep_thd, &COND_wsrep_thd, NULL); + wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ +#endif + /* Call to init() below requires fully initialized Open_tables_state. */ + reset_open_tables_state(); + + init(); + debug_sync_init_thread(this); +#if defined(ENABLED_PROFILING) + profiling.set_thd(this); +#endif + user_connect=(USER_CONN *)0; + my_hash_init(key_memory_user_var_entry, &user_vars, system_charset_info, + USER_VARS_HASH_SIZE, 0, 0, (my_hash_get_key) get_var_key, + (my_hash_free_key) free_user_var, HASH_THREAD_SPECIFIC); + my_hash_init(PSI_INSTRUMENT_ME, &sequences, system_charset_info, + SEQUENCES_HASH_SIZE, 0, 0, (my_hash_get_key) + get_sequence_last_key, (my_hash_free_key) free_sequence_last, + HASH_THREAD_SPECIFIC); + + /* For user vars replication*/ + if (opt_bin_log) + my_init_dynamic_array(key_memory_user_var_entry, &user_var_events, + sizeof(BINLOG_USER_VAR_EVENT *), 16, 16, MYF(0)); + else + bzero((char*) &user_var_events, sizeof(user_var_events)); + + /* Protocol */ + protocol= &protocol_text; // Default protocol + protocol_text.init(this); + protocol_binary.init(this); + + thr_timer_init(&query_timer, (void (*)(void*)) thd_kill_timeout, this); + + tablespace_op=FALSE; + + /* + Initialize the random generator. We call my_rnd() without a lock as + it's not really critical if two threads modifies the structure at the + same time. We ensure that we have an unique number foreach thread + by adding the address of the stack. + */ + tmp= (ulong) (my_rnd(&sql_rand) * 0xffffffff); + my_rnd_init(&rand, tmp + (ulong)((size_t) &rand), tmp + (ulong) ::global_query_id); + substitute_null_with_insert_id = FALSE; + lock_info.mysql_thd= (void *)this; + + m_token_array= NULL; + if (max_digest_length > 0) + { + m_token_array= (unsigned char*) my_malloc(PSI_INSTRUMENT_ME, + max_digest_length, + MYF(MY_WME|MY_THREAD_SPECIFIC)); + } + + m_binlog_invoker= INVOKER_NONE; + invoker.init(); + prepare_derived_at_open= FALSE; + create_tmp_table_for_derived= FALSE; + save_prep_leaf_list= FALSE; + org_charset= 0; + /* Restore THR_THD */ + set_current_thd(old_THR_THD); +} + + +void THD::push_internal_handler(Internal_error_handler *handler) +{ + DBUG_ENTER("THD::push_internal_handler"); + if (m_internal_handler) + { + handler->m_prev_internal_handler= m_internal_handler; + m_internal_handler= handler; + } + else + { + m_internal_handler= handler; + } + DBUG_VOID_RETURN; +} + +bool THD::handle_condition(uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) +{ + if (!m_internal_handler) + { + *cond_hdl= NULL; + return FALSE; + } + + for (Internal_error_handler *error_handler= m_internal_handler; + error_handler; + error_handler= error_handler->m_prev_internal_handler) + { + if (error_handler->handle_condition(this, sql_errno, sqlstate, level, msg, + cond_hdl)) + { + return TRUE; + } + } + return FALSE; +} + + +Internal_error_handler *THD::pop_internal_handler() +{ + DBUG_ENTER("THD::pop_internal_handler"); + DBUG_ASSERT(m_internal_handler != NULL); + Internal_error_handler *popped_handler= m_internal_handler; + m_internal_handler= m_internal_handler->m_prev_internal_handler; + DBUG_RETURN(popped_handler); +} + + +void THD::raise_error(uint sql_errno) +{ + const char* msg= ER_THD(this, sql_errno); + (void) raise_condition(sql_errno, "\0\0\0\0\0", + Sql_condition::WARN_LEVEL_ERROR, msg); +} + +void THD::raise_error_printf(uint sql_errno, ...) +{ + va_list args; + char ebuff[MYSQL_ERRMSG_SIZE]; + DBUG_ENTER("THD::raise_error_printf"); + DBUG_PRINT("my", ("nr: %d errno: %d", sql_errno, errno)); + const char* format= ER_THD(this, sql_errno); + va_start(args, sql_errno); + my_vsnprintf(ebuff, sizeof(ebuff), format, args); + va_end(args); + (void) raise_condition(sql_errno, "\0\0\0\0\0", + Sql_condition::WARN_LEVEL_ERROR, ebuff); + DBUG_VOID_RETURN; +} + +void THD::raise_warning(uint sql_errno) +{ + const char* msg= ER_THD(this, sql_errno); + (void) raise_condition(sql_errno, "\0\0\0\0\0", + Sql_condition::WARN_LEVEL_WARN, msg); +} + +void THD::raise_warning_printf(uint sql_errno, ...) +{ + va_list args; + char ebuff[MYSQL_ERRMSG_SIZE]; + DBUG_ENTER("THD::raise_warning_printf"); + DBUG_PRINT("enter", ("warning: %u", sql_errno)); + const char* format= ER_THD(this, sql_errno); + va_start(args, sql_errno); + my_vsnprintf(ebuff, sizeof(ebuff), format, args); + va_end(args); + (void) raise_condition(sql_errno, "\0\0\0\0\0", + Sql_condition::WARN_LEVEL_WARN, ebuff); + DBUG_VOID_RETURN; +} + +void THD::raise_note(uint sql_errno) +{ + DBUG_ENTER("THD::raise_note"); + DBUG_PRINT("enter", ("code: %d", sql_errno)); + if (!(variables.option_bits & OPTION_SQL_NOTES) || + (variables.note_verbosity == 0)) + DBUG_VOID_RETURN; + const char* msg= ER_THD(this, sql_errno); + (void) raise_condition(sql_errno, "\0\0\0\0\0", + Sql_condition::WARN_LEVEL_NOTE, msg); + DBUG_VOID_RETURN; +} + +void THD::raise_note_printf(uint sql_errno, ...) +{ + va_list args; + char ebuff[MYSQL_ERRMSG_SIZE]; + DBUG_ENTER("THD::raise_note_printf"); + DBUG_PRINT("enter",("code: %u", sql_errno)); + if (!(variables.option_bits & OPTION_SQL_NOTES) || + (variables.note_verbosity == 0)) + DBUG_VOID_RETURN; + const char* format= ER_THD(this, sql_errno); + va_start(args, sql_errno); + my_vsnprintf(ebuff, sizeof(ebuff), format, args); + va_end(args); + (void) raise_condition(sql_errno, "\0\0\0\0\0", + Sql_condition::WARN_LEVEL_NOTE, ebuff); + DBUG_VOID_RETURN; +} + +Sql_condition* THD::raise_condition(const Sql_condition *cond) +{ + uint sql_errno= cond->get_sql_errno(); + const char *sqlstate= cond->get_sqlstate(); + Sql_condition::enum_warning_level level= cond->get_level(); + const char *msg= cond->get_message_text(); + + Diagnostics_area *da= get_stmt_da(); + Sql_condition *raised= NULL; + DBUG_ENTER("THD::raise_condition"); + DBUG_ASSERT(level < Sql_condition::WARN_LEVEL_END); + + if ((level == Sql_condition::WARN_LEVEL_NOTE) && + (!(variables.option_bits & OPTION_SQL_NOTES) || + (variables.note_verbosity == 0))) + DBUG_RETURN(NULL); +#ifdef WITH_WSREP + /* + Suppress warnings/errors if the wsrep THD is going to replay. The + deadlock/interrupted errors may be transitient and should not be + reported to the client. + */ + if (wsrep_must_replay(this)) + DBUG_RETURN(NULL); +#endif /* WITH_WSREP */ + + da->opt_clear_warning_info(query_id); + + /* + TODO: replace by DBUG_ASSERT(sql_errno != 0) once all bugs similar to + Bug#36768 are fixed: a SQL condition must have a real (!=0) error number + so that it can be caught by handlers. + */ + if (sql_errno == 0) + sql_errno= ER_UNKNOWN_ERROR; + if (msg == NULL) + msg= ER_THD(this, sql_errno); + if (!*sqlstate) + sqlstate= mysql_errno_to_sqlstate(sql_errno); + + if ((level == Sql_condition::WARN_LEVEL_WARN) && really_abort_on_warning()) + { + /* FIXME: push_warning and strict SQL_MODE case. */ + level= Sql_condition::WARN_LEVEL_ERROR; + } + + if (!is_fatal_error && + handle_condition(sql_errno, sqlstate, &level, msg, &raised)) + goto ret; + + switch (level) { + case Sql_condition::WARN_LEVEL_WARN: + mysql_audit_general(this, MYSQL_AUDIT_GENERAL_WARNING, sql_errno, msg); + /* fall through */ + case Sql_condition::WARN_LEVEL_NOTE: + got_warning= 1; + break; + case Sql_condition::WARN_LEVEL_ERROR: + mysql_audit_general(this, MYSQL_AUDIT_GENERAL_ERROR, sql_errno, msg); + + is_slave_error= 1; // needed to catch query errors during replication + +#ifdef WITH_WSREP + /* + With wsrep we allow converting BF abort error to warning if + errors are ignored. + */ + if (!is_fatal_error && no_errors && + (wsrep_trx().bf_aborted() || wsrep_retry_counter)) + { + WSREP_DEBUG("BF abort error converted to warning"); + } + else +#endif /* WITH_WSREP */ + { + if (!da->is_error()) + { + set_row_count_func(-1); + da->set_error_status(sql_errno, msg, sqlstate, *cond, raised); + } + } + break; + case Sql_condition::WARN_LEVEL_END: + /* Impossible */ + break; + } + + query_cache_abort(this, &query_cache_tls); + + /* + Avoid pushing a condition for fatal out of memory errors as this will + require memory allocation and therefore might fail. Non fatal out of + memory errors can occur if raised by SIGNAL/RESIGNAL statement. + */ + if (likely(!(is_fatal_error && (sql_errno == EE_OUTOFMEMORY || + sql_errno == ER_OUTOFMEMORY)))) + { + raised= da->push_warning(this, sql_errno, sqlstate, level, *cond, msg, + cond->m_row_number); + } +ret: + if (raised) + raised->copy_opt_attributes(cond); + DBUG_RETURN(raised); +} + +extern "C" +void *thd_alloc(MYSQL_THD thd, size_t size) +{ + return thd->alloc(size); +} + +extern "C" +void *thd_calloc(MYSQL_THD thd, size_t size) +{ + return thd->calloc(size); +} + +extern "C" +char *thd_strdup(MYSQL_THD thd, const char *str) +{ + return thd->strdup(str); +} + +extern "C" +char *thd_strmake(MYSQL_THD thd, const char *str, size_t size) +{ + return thd->strmake(str, size); +} + +extern "C" +LEX_CSTRING *thd_make_lex_string(THD *thd, LEX_CSTRING *lex_str, + const char *str, size_t size, + int allocate_lex_string) +{ + return allocate_lex_string ? thd->make_clex_string(str, size) + : thd->make_lex_string(lex_str, str, size); +} + +extern "C" +void *thd_memdup(MYSQL_THD thd, const void* str, size_t size) +{ + return thd->memdup(str, size); +} + +extern "C" +void thd_get_xid(const MYSQL_THD thd, MYSQL_XID *xid) +{ + *xid = *(MYSQL_XID *) thd->get_xid(); +} + +extern "C" +my_time_t thd_TIME_to_gmt_sec(MYSQL_THD thd, const MYSQL_TIME *ltime, + unsigned int *errcode) +{ + Time_zone *tz= thd ? thd->variables.time_zone : + global_system_variables.time_zone; + return tz->TIME_to_gmt_sec(ltime, errcode); +} + + +extern "C" +void thd_gmt_sec_to_TIME(MYSQL_THD thd, MYSQL_TIME *ltime, my_time_t t) +{ + Time_zone *tz= thd ? thd->variables.time_zone : + global_system_variables.time_zone; + tz->gmt_sec_to_TIME(ltime, t); +} + + +#ifdef _WIN32 +extern "C" my_thread_id next_thread_id_noinline() +{ +#undef next_thread_id + return next_thread_id(); +} +#endif + + +const Type_handler *THD::type_handler_for_datetime() const +{ + if (opt_mysql56_temporal_format) + return &type_handler_datetime2; + return &type_handler_datetime; +} + + +/* + Init common variables that has to be reset on start and on change_user +*/ + +void THD::init() +{ + DBUG_ENTER("thd::init"); + mdl_context.reset(); + mysql_mutex_lock(&LOCK_global_system_variables); + plugin_thdvar_init(this); + /* + plugin_thd_var_init() sets variables= global_system_variables, which + has reset variables.pseudo_thread_id to 0. We need to correct it here to + avoid temporary tables replication failure. + */ + variables.pseudo_thread_id= thread_id; + + variables.default_master_connection.str= default_master_connection_buff; + ::strmake(default_master_connection_buff, + global_system_variables.default_master_connection.str, + variables.default_master_connection.length); + mysql_mutex_unlock(&LOCK_global_system_variables); + + user_time.val= start_time= start_time_sec_part= 0; + + server_status= SERVER_STATUS_AUTOCOMMIT; + if (variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) + server_status|= SERVER_STATUS_NO_BACKSLASH_ESCAPES; + if (variables.sql_mode & MODE_ANSI_QUOTES) + server_status|= SERVER_STATUS_ANSI_QUOTES; + + transaction->all.modified_non_trans_table= + transaction->stmt.modified_non_trans_table= FALSE; + transaction->all.m_unsafe_rollback_flags= + transaction->stmt.m_unsafe_rollback_flags= 0; + + open_options=ha_open_options; + update_lock_default= (variables.low_priority_updates ? + TL_WRITE_LOW_PRIORITY : + TL_WRITE); + tx_isolation= (enum_tx_isolation) variables.tx_isolation; + tx_read_only= variables.tx_read_only; + update_charset(); // plugin_thd_var() changed character sets + reset_current_stmt_binlog_format_row(); + reset_binlog_local_stmt_filter(); + set_status_var_init(); + status_var.max_local_memory_used= status_var.local_memory_used; + bzero((char *) &org_status_var, sizeof(org_status_var)); + status_in_global= 0; + start_bytes_received= 0; + m_last_commit_gtid.seq_no= 0; + last_stmt= NULL; + /* Reset status of last insert id */ + arg_of_last_insert_id_function= FALSE; + stmt_depends_on_first_successful_insert_id_in_prev_stmt= FALSE; + first_successful_insert_id_in_prev_stmt= 0; + first_successful_insert_id_in_prev_stmt_for_binlog= 0; + first_successful_insert_id_in_cur_stmt= 0; + current_backup_stage= BACKUP_FINISHED; + backup_commit_lock= 0; +#ifdef WITH_WSREP + wsrep_last_query_id= 0; + wsrep_xid.null(); + wsrep_skip_locking= FALSE; + wsrep_converted_lock_session= false; + wsrep_retry_counter= 0; + wsrep_rgi= NULL; + wsrep_PA_safe= true; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_mysql_replicated = 0; + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; + wsrep_rbr_buf = NULL; + wsrep_affected_rows = 0; + m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID; + wsrep_aborter = 0; + wsrep_abort_by_kill = NOT_KILLED; + wsrep_abort_by_kill_err = 0; +#ifndef DBUG_OFF + wsrep_killed_state = 0; +#endif /* DBUG_OFF */ + wsrep_desynced_backup_stage= false; +#endif /* WITH_WSREP */ + + set_binlog_bit(); + + select_commands= update_commands= other_commands= 0; + /* Set to handle counting of aborted connections */ + userstat_running= opt_userstat_running; + last_global_update_time= current_connect_time= time(NULL); +#ifndef EMBEDDED_LIBRARY + session_tracker.enable(this); +#endif //EMBEDDED_LIBRARY + + apc_target.init(&LOCK_thd_kill); + gap_tracker_data.init(); + DBUG_VOID_RETURN; +} + + +bool THD::restore_from_local_lex_to_old_lex(LEX *oldlex) +{ + DBUG_ASSERT(lex->sphead); + if (lex->sphead->merge_lex(this, oldlex, lex)) + return true; + lex= oldlex; + return false; +} + + +/* Updates some status variables to be used by update_global_user_stats */ + +void THD::update_stats(void) +{ + /* sql_command == SQLCOM_END in case of parse errors or quit */ + if (lex->sql_command != SQLCOM_END) + { + /* A SQL query. */ + if (lex->sql_command == SQLCOM_SELECT) + select_commands++; + else if (sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) + { + /* Ignore 'SHOW ' commands */ + } + else if (is_update_query(lex->sql_command)) + update_commands++; + else + other_commands++; + } +} + + +void THD::update_all_stats() +{ + ulonglong end_cpu_time, end_utime; + double busy_time, cpu_time; + + /* This is set at start of query if opt_userstat_running was set */ + if (!userstat_running) + return; + + end_cpu_time= my_getcputime(); + end_utime= microsecond_interval_timer(); + busy_time= (end_utime - start_utime) / 1000000.0; + cpu_time= (end_cpu_time - start_cpu_time) / 10000000.0; + /* In case there are bad values, 2629743 is the #seconds in a month. */ + if (cpu_time > 2629743.0) + cpu_time= 0; + status_var_add(status_var.cpu_time, cpu_time); + status_var_add(status_var.busy_time, busy_time); + + update_global_user_stats(this, TRUE, my_time(0)); + // Has to be updated after update_global_user_stats() + userstat_running= 0; +} + + +/* + Init THD for query processing. + This has to be called once before we call mysql_parse. + See also comments in sql_class.h. +*/ + +void THD::init_for_queries() +{ + DBUG_ASSERT(transaction->on); + DBUG_ASSERT(m_transaction_psi == NULL); + + /* Set time for --init-file queries */ + set_time(); + reset_root_defaults(mem_root, variables.query_alloc_block_size, + variables.query_prealloc_size); + reset_root_defaults(&transaction->mem_root, + variables.trans_alloc_block_size, + variables.trans_prealloc_size); + DBUG_ASSERT(!transaction->xid_state.is_explicit_XA()); + DBUG_ASSERT(transaction->implicit_xid.is_null()); +} + + +/* + Do what's needed when one invokes change user + + SYNOPSIS + change_user() + + IMPLEMENTATION + Reset all resources that are connection specific +*/ + + +void THD::change_user(void) +{ + if (!status_in_global) // Reset in init() + add_status_to_global(); + + if (!cleanup_done) + cleanup(); + cleanup_done= 0; + reset_killed(); + /* Clear errors from the previous THD */ + my_errno= 0; + if (mysys_var) + mysys_var->abort= 0; + + /* Clear warnings. */ + if (!get_stmt_da()->is_warning_info_empty()) + get_stmt_da()->clear_warning_info(0); + + init(); + stmt_map.reset(); + my_hash_init(key_memory_user_var_entry, &user_vars, system_charset_info, + USER_VARS_HASH_SIZE, 0, 0, (my_hash_get_key) get_var_key, + (my_hash_free_key) free_user_var, HASH_THREAD_SPECIFIC); + my_hash_init(key_memory_user_var_entry, &sequences, system_charset_info, + SEQUENCES_HASH_SIZE, 0, 0, (my_hash_get_key) + get_sequence_last_key, (my_hash_free_key) free_sequence_last, + HASH_THREAD_SPECIFIC); + sp_caches_clear(); + opt_trace.delete_traces(); +} + +/** + Change default database + + @note This is coded to have as few instructions as possible under + LOCK_thd_data +*/ + +bool THD::set_db(const LEX_CSTRING *new_db) +{ + bool result= 0; + /* + Acquiring mutex LOCK_thd_data as we either free the memory allocated + for the database and reallocating the memory for the new db or memcpy + the new_db to the db. + */ + /* Do not reallocate memory if current chunk is big enough. */ + if (db.str && new_db->str && db.length >= new_db->length) + { + mysql_mutex_lock(&LOCK_thd_data); + db.length= new_db->length; + memcpy((char*) db.str, new_db->str, new_db->length+1); + mysql_mutex_unlock(&LOCK_thd_data); + } + else + { + const char *org_db= db.str; + const char *tmp= NULL; + if (new_db->str) + { + if (!(tmp= my_strndup(key_memory_THD_db, new_db->str, new_db->length, + MYF(MY_WME | ME_FATAL)))) + result= 1; + } + + mysql_mutex_lock(&LOCK_thd_data); + db.str= tmp; + db.length= tmp ? new_db->length : 0; + mysql_mutex_unlock(&LOCK_thd_data); + my_free((char*) org_db); + } + PSI_CALL_set_thread_db(db.str, (int) db.length); + return result; +} + + +/** + Set the current database + + @param new_db a pointer to the new database name. + @param new_db_len length of the new database name. + + @note This operation just sets {db, db_length}. Switching the current + database usually involves other actions, like switching other database + attributes including security context. In the future, this operation + will be made private and more convenient interface will be provided. +*/ + +void THD::reset_db(const LEX_CSTRING *new_db) +{ + if (new_db->str != db.str || new_db->length != db.length) + { + if (db.str != 0) + DBUG_PRINT("QQ", ("Overwriting: %p", db.str)); + mysql_mutex_lock(&LOCK_thd_data); + db= *new_db; + mysql_mutex_unlock(&LOCK_thd_data); + PSI_CALL_set_thread_db(db.str, (int) db.length); + } +} + + +/* Do operations that may take a long time */ + +void THD::cleanup(void) +{ + DBUG_ENTER("THD::cleanup"); + DBUG_ASSERT(cleanup_done == 0); + + set_killed(KILL_CONNECTION); +#ifdef WITH_WSREP + if (wsrep_cs().state() != wsrep::client_state::s_none) + wsrep_cs().cleanup(); + wsrep_client_thread= false; +#endif /* WITH_WSREP */ + + DEBUG_SYNC(this, "THD_cleanup_after_set_killed"); + + mysql_ha_cleanup(this); + locked_tables_list.unlock_locked_tables(this); + + delete_dynamic(&user_var_events); + close_temporary_tables(); + + if (transaction->xid_state.is_explicit_XA()) + trans_xa_detach(this); + else + trans_rollback(this); + + DBUG_ASSERT(open_tables == NULL); + DBUG_ASSERT(m_transaction_psi == NULL); + + /* + If the thread was in the middle of an ongoing transaction (rolled + back a few lines above) or under LOCK TABLES (unlocked the tables + and left the mode a few lines above), there will be outstanding + metadata locks. Release them. + */ + mdl_context.release_transactional_locks(this); + + backup_end(this); + backup_unlock(this); + + /* Release the global read lock, if acquired. */ + if (global_read_lock.is_acquired()) + global_read_lock.unlock_global_read_lock(this); + + if (user_connect) + { + decrease_user_connections(user_connect); + user_connect= 0; // Safety + } + wt_thd_destroy(&transaction->wt); + + my_hash_free(&user_vars); + my_hash_free(&sequences); + sp_caches_clear(); + auto_inc_intervals_forced.empty(); + auto_inc_intervals_in_cur_stmt_for_binlog.empty(); + + mysql_ull_cleanup(this); + stmt_map.reset(); + /* All metadata locks must have been released by now. */ + DBUG_ASSERT(!mdl_context.has_locks()); + + apc_target.destroy(); +#ifdef HAVE_REPLICATION + unregister_slave(); +#endif + cleanup_done=1; + DBUG_VOID_RETURN; +} + + +/* + Free all connection related resources associated with a THD. + This is used when we put a thread into the thread cache. + After this call should either call ~THD or reset_for_reuse() depending on + circumstances. +*/ + +void THD::free_connection() +{ + DBUG_ASSERT(free_connection_done == 0); + my_free(const_cast(db.str)); + db= null_clex_str; +#ifndef EMBEDDED_LIBRARY + if (net.vio) + vio_delete(net.vio); + net.vio= nullptr; + net_end(&net); +#endif + if (!cleanup_done) + cleanup(); + ha_close_connection(this); + plugin_thdvar_cleanup(this); + mysql_audit_free_thd(this); + main_security_ctx.destroy(); + /* close all prepared statements, to save memory */ + stmt_map.reset(); + free_connection_done= 1; +#if defined(ENABLED_PROFILING) + profiling.restart(); // Reset profiling +#endif + debug_sync_reset_thread(this); +} + +/* + Reset thd for reuse by another connection + This is only used for user connections, so the following variables doesn't + have to be reset: + - Replication (slave) variables. + - Variables not reset between each statements. See reset_for_next_command. +*/ + +void THD::reset_for_reuse() +{ + mysql_audit_init_thd(this); + change_user(); // Calls cleanup() & init() + get_stmt_da()->reset_diagnostics_area(); + main_security_ctx.init(); + failed_com_change_user= 0; + is_fatal_error= 0; + client_capabilities= 0; + peer_port= 0; + query_name_consts= 0; // Safety + abort_on_warning= 0; + free_connection_done= 0; + m_command= COM_CONNECT; + transaction->on= 1; +#if defined(ENABLED_PROFILING) + profiling.reset(); +#endif +#ifdef SIGNAL_WITH_VIO_CLOSE + active_vio = 0; +#endif +#ifdef WITH_WSREP + wsrep_free_status(this); + wsrep_cs().reset_error(); + wsrep_aborter= 0; + wsrep_abort_by_kill= NOT_KILLED; + wsrep_abort_by_kill_err= 0; +#ifndef DBUG_OFF + wsrep_killed_state= 0; +#endif /* DBUG_OFF */ +#endif /* WITH_WSREP */ +} + + +THD::~THD() +{ + THD *orig_thd= current_thd; + THD_CHECK_SENTRY(this); + DBUG_ENTER("~THD()"); + /* Make sure threads are not available via server_threads. */ + assert_not_linked(); + if (m_psi) + PSI_CALL_set_thread_THD(m_psi, 0); + + /* + In error cases, thd may not be current thd. We have to fix this so + that memory allocation counting is done correctly + */ + set_current_thd(this); + if (!status_in_global) + add_status_to_global(); + + /* + Other threads may have a lock on LOCK_thd_kill to ensure that this + THD is not deleted while they access it. The following mutex_lock + ensures that no one else is using this THD and it's now safe to delete + */ + mysql_mutex_lock(&LOCK_thd_kill); + mysql_mutex_unlock(&LOCK_thd_kill); + +#ifdef WITH_WSREP + delete wsrep_rgi; +#endif + if (!free_connection_done) + free_connection(); + +#ifdef WITH_WSREP + mysql_cond_destroy(&COND_wsrep_thd); +#endif + mdl_context.destroy(); + + transaction->free(); + mysql_cond_destroy(&COND_wakeup_ready); + mysql_mutex_destroy(&LOCK_wakeup_ready); + mysql_mutex_destroy(&LOCK_thd_data); + mysql_mutex_destroy(&LOCK_thd_kill); +#ifdef DBUG_ASSERT_EXISTS + dbug_sentry= THD_SENTRY_GONE; +#endif +#ifndef EMBEDDED_LIBRARY + if (rgi_fake) + { + delete rgi_fake; + rgi_fake= NULL; + } + if (rli_fake) + { + delete rli_fake; + rli_fake= NULL; + } + + if (rgi_slave) + rgi_slave->cleanup_after_session(); + my_free(semisync_info); +#endif + main_lex.free_set_stmt_mem_root(); + free_root(&main_mem_root, MYF(0)); + my_free(m_token_array); + main_da.free_memory(); + if (tdc_hash_pins) + lf_hash_put_pins(tdc_hash_pins); + if (xid_hash_pins) + lf_hash_put_pins(xid_hash_pins); +#if defined(ENABLED_DEBUG_SYNC) + debug_sync_end_thread(this); +#endif + /* Ensure everything is freed */ + status_var.local_memory_used-= sizeof(THD); + + /* trick to make happy memory accounting system */ +#ifndef EMBEDDED_LIBRARY + session_tracker.sysvars.deinit(); +#ifdef USER_VAR_TRACKING + session_tracker.user_variables.deinit(); +#endif // USER_VAR_TRACKING +#endif //EMBEDDED_LIBRARY + + if (status_var.local_memory_used != 0) + { + DBUG_PRINT("error", ("memory_used: %lld", status_var.local_memory_used)); + SAFEMALLOC_REPORT_MEMORY(sf_malloc_dbug_id()); + DBUG_ASSERT(status_var.local_memory_used == 0 || + !debug_assert_on_not_freed_memory); + } + update_global_memory_status(status_var.global_memory_used); + set_current_thd(orig_thd == this ? 0 : orig_thd); + DBUG_VOID_RETURN; +} + + +/* + Add all status variables to another status variable array + + SYNOPSIS + add_to_status() + to_var add to this array + from_var from this array + + NOTES + This function assumes that all variables at start are long/ulong and + other types are handled explicitly +*/ + +void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var) +{ + ulong *end= (ulong*) ((uchar*) to_var + + offsetof(STATUS_VAR, last_system_status_var) + + sizeof(ulong)); + ulong *to= (ulong*) to_var, *from= (ulong*) from_var; + + while (to != end) + *(to++)+= *(from++); + + /* Handle the not ulong variables. See end of system_status_var */ + to_var->bytes_received+= from_var->bytes_received; + to_var->bytes_sent+= from_var->bytes_sent; + to_var->rows_read+= from_var->rows_read; + to_var->rows_sent+= from_var->rows_sent; + to_var->rows_tmp_read+= from_var->rows_tmp_read; + to_var->binlog_bytes_written+= from_var->binlog_bytes_written; + to_var->cpu_time+= from_var->cpu_time; + to_var->busy_time+= from_var->busy_time; + to_var->table_open_cache_hits+= from_var->table_open_cache_hits; + to_var->table_open_cache_misses+= from_var->table_open_cache_misses; + to_var->table_open_cache_overflows+= from_var->table_open_cache_overflows; + + /* + Update global_memory_used. We have to do this with atomic_add as the + global value can change outside of LOCK_status. + */ + if (to_var == &global_status_var) + { + DBUG_PRINT("info", ("global memory_used: %lld size: %lld", + (longlong) global_status_var.global_memory_used, + (longlong) from_var->global_memory_used)); + update_global_memory_status(from_var->global_memory_used); + } + else + to_var->global_memory_used+= from_var->global_memory_used; +} + +/* + Add the difference between two status variable arrays to another one. + + SYNOPSIS + add_diff_to_status + to_var add to this array + from_var from this array + dec_var minus this array + + NOTE + This function assumes that all variables at start are long/ulong and + other types are handled explicitly +*/ + +void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var, + STATUS_VAR *dec_var) +{ + ulong *end= (ulong*) ((uchar*) to_var + offsetof(STATUS_VAR, + last_system_status_var) + + sizeof(ulong)); + ulong *to= (ulong*) to_var, *from= (ulong*) from_var, *dec= (ulong*) dec_var; + + while (to != end) + *(to++)+= *(from++) - *(dec++); + + to_var->bytes_received+= from_var->bytes_received - + dec_var->bytes_received; + to_var->bytes_sent+= from_var->bytes_sent - dec_var->bytes_sent; + to_var->rows_read+= from_var->rows_read - dec_var->rows_read; + to_var->rows_sent+= from_var->rows_sent - dec_var->rows_sent; + to_var->rows_tmp_read+= from_var->rows_tmp_read - dec_var->rows_tmp_read; + to_var->binlog_bytes_written+= from_var->binlog_bytes_written - + dec_var->binlog_bytes_written; + to_var->cpu_time+= from_var->cpu_time - dec_var->cpu_time; + to_var->busy_time+= from_var->busy_time - dec_var->busy_time; + to_var->table_open_cache_hits+= from_var->table_open_cache_hits - + dec_var->table_open_cache_hits; + to_var->table_open_cache_misses+= from_var->table_open_cache_misses - + dec_var->table_open_cache_misses; + to_var->table_open_cache_overflows+= from_var->table_open_cache_overflows - + dec_var->table_open_cache_overflows; + + /* + We don't need to accumulate memory_used as these are not reset or used by + the calling functions. See execute_show_status(). + */ +} + +#define SECONDS_TO_WAIT_FOR_KILL 2 +#if !defined(_WIN32) && defined(HAVE_SELECT) +/* my_sleep() can wait for sub second times */ +#define WAIT_FOR_KILL_TRY_TIMES 20 +#else +#define WAIT_FOR_KILL_TRY_TIMES 2 +#endif + + +/** + Awake a thread. + + @param[in] state_to_set value for THD::killed + + This is normally called from another thread's THD object. + + @note Do always call this while holding LOCK_thd_kill. + NOT_KILLED is used to awake a thread for a slave +*/ +extern std::atomic shutdown_thread_id; +void THD::awake_no_mutex(killed_state state_to_set) +{ + DBUG_ENTER("THD::awake_no_mutex"); + DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d", + this, current_thd, (int) state_to_set)); + THD_CHECK_SENTRY(this); + mysql_mutex_assert_owner(&LOCK_thd_data); + mysql_mutex_assert_owner(&LOCK_thd_kill); + + print_aborted_warning(3, "KILLED"); + + /* + Don't degrade killed state, for example from a KILL_CONNECTION to + STATEMENT TIMEOUT + */ + if (killed >= KILL_CONNECTION) + state_to_set= killed; + + set_killed_no_mutex(state_to_set); + + if (state_to_set >= KILL_CONNECTION || state_to_set == NOT_KILLED) + { +#ifdef SIGNAL_WITH_VIO_CLOSE + if (this != current_thd && thread_id != shutdown_thread_id) + { + if(active_vio) + vio_shutdown(active_vio, SHUT_RDWR); + } +#endif + + /* Mark the target thread's alarm request expired, and signal alarm. */ + thr_alarm_kill(thread_id); + + /* Send an event to the scheduler that a thread should be killed. */ + if (!slave_thread) + MYSQL_CALLBACK(scheduler, post_kill_notification, (this)); + } + + /* Interrupt target waiting inside a storage engine. */ + if (state_to_set != NOT_KILLED && + IF_WSREP(!wsrep_is_bf_aborted(this) && wsrep_abort_by_kill == NOT_KILLED, + true)) + ha_kill_query(this, thd_kill_level(this)); + + abort_current_cond_wait(false); + DBUG_VOID_RETURN; +} + +/* Broadcast a condition to kick the target if it is waiting on it. */ +void THD::abort_current_cond_wait(bool force) +{ + mysql_mutex_assert_owner(&LOCK_thd_kill); + if (mysys_var) + { + mysql_mutex_lock(&mysys_var->mutex); + if (!system_thread || force) // Don't abort locks + mysys_var->abort=1; + + /* + This broadcast could be up in the air if the victim thread + exits the cond in the time between read and broadcast, but that is + ok since all we want to do is to make the victim thread get out + of waiting on current_cond. + If we see a non-zero current_cond: it cannot be an old value (because + then exit_cond() should have run and it can't because we have mutex); so + it is the true value but maybe current_mutex is not yet non-zero (we're + in the middle of enter_cond() and there is a "memory order + inversion"). So we test the mutex too to not lock 0. + + Note that there is a small chance we fail to kill. If victim has locked + current_mutex, but hasn't yet entered enter_cond() (which means that + current_cond and current_mutex are 0), then the victim will not get + a signal and it may wait "forever" on the cond (until + we issue a second KILL or the status it's waiting for happens). + It's true that we have set its thd->killed but it may not + see it immediately and so may have time to reach the cond_wait(). + + However, where possible, we test for killed once again after + enter_cond(). This should make the signaling as safe as possible. + However, there is still a small chance of failure on platforms with + instruction or memory write reordering. + + We have to do the loop with trylock, because if we would use + pthread_mutex_lock(), we can cause a deadlock as we are here locking + the mysys_var->mutex and mysys_var->current_mutex in a different order + than in the thread we are trying to kill. + We only sleep for 2 seconds as we don't want to have LOCK_thd_data + locked too long time. + + There is a small change we may not succeed in aborting a thread that + is not yet waiting for a mutex, but as this happens only for a + thread that was doing something else when the kill was issued and + which should detect the kill flag before it starts to wait, this + should be good enough. + */ + if (mysys_var->current_cond && mysys_var->current_mutex) + { + uint i; + for (i= 0; i < WAIT_FOR_KILL_TRY_TIMES * SECONDS_TO_WAIT_FOR_KILL; i++) + { + int ret= mysql_mutex_trylock(mysys_var->current_mutex); + mysql_cond_broadcast(mysys_var->current_cond); + if (!ret) + { + /* Signal is sure to get through */ + mysql_mutex_unlock(mysys_var->current_mutex); + break; + } + my_sleep(1000000L / WAIT_FOR_KILL_TRY_TIMES); + } + } + mysql_mutex_unlock(&mysys_var->mutex); + } +} + + +/** + Close the Vio associated this session. + + @remark LOCK_thd_data is taken due to the fact that + the Vio might be disassociated concurrently. +*/ + +void THD::disconnect() +{ + Vio *vio= NULL; + + set_killed(KILL_CONNECTION); + + mysql_mutex_lock(&LOCK_thd_data); + +#ifdef SIGNAL_WITH_VIO_CLOSE + /* + Since a active vio might might have not been set yet, in + any case save a reference to avoid closing a inexistent + one or closing the vio twice if there is a active one. + */ + vio= active_vio; + close_active_vio(); +#endif + + /* Disconnect even if a active vio is not associated. */ + if (net.vio != vio) + vio_close(net.vio); + net.thd= 0; // Don't collect statistics + + mysql_mutex_unlock(&LOCK_thd_data); +} + + +bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, + bool needs_thr_lock_abort) +{ + THD *in_use= ctx_in_use->get_thd(); + bool signalled= FALSE; + DBUG_ENTER("THD::notify_shared_lock"); + DBUG_PRINT("enter",("needs_thr_lock_abort: %d", needs_thr_lock_abort)); + + if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) && + !in_use->killed) + { + /* This code is similar to kill_delayed_threads() */ + DBUG_PRINT("info", ("kill delayed thread")); + mysql_mutex_lock(&in_use->LOCK_thd_kill); + if (in_use->killed < KILL_CONNECTION) + in_use->set_killed_no_mutex(KILL_CONNECTION); + in_use->abort_current_cond_wait(true); + mysql_mutex_unlock(&in_use->LOCK_thd_kill); + signalled= TRUE; + } + + if (needs_thr_lock_abort) + { + mysql_mutex_lock(&in_use->LOCK_thd_data); + /* If not already dying */ + if (in_use->killed != KILL_CONNECTION_HARD) + { + for (TABLE *thd_table= in_use->open_tables; + thd_table ; + thd_table= thd_table->next) + { + /* + Check for TABLE::needs_reopen() is needed since in some + places we call handler::close() for table instance (and set + TABLE::db_stat to 0) and do not remove such instances from + the THD::open_tables for some time, during which other + thread can see those instances (e.g. see partitioning code). + */ + if (!thd_table->needs_reopen()) + { + signalled|= mysql_lock_abort_for_thread(this, thd_table); + } + } + } + mysql_mutex_unlock(&in_use->LOCK_thd_data); + } + DBUG_RETURN(signalled); +} + + +/* + Get error number for killed state + Note that the error message can't have any parameters. + If one needs parameters, one should use THD::killed_err_msg + See thd::kill_message() +*/ + +int THD::killed_errno() +{ + DBUG_ENTER("killed_errno"); + DBUG_PRINT("enter", ("killed: %d killed_errno: %d", + killed, killed_err ? killed_err->no: 0)); + + /* Ensure that killed_err is not set if we are not killed */ + DBUG_ASSERT(!killed_err || killed != NOT_KILLED); + + if (killed_err) + DBUG_RETURN(killed_err->no); + + switch (killed) { + case NOT_KILLED: + case KILL_HARD_BIT: + DBUG_RETURN(0); // Probably wrong usage + case KILL_BAD_DATA: + case KILL_BAD_DATA_HARD: + case ABORT_QUERY_HARD: + case ABORT_QUERY: + DBUG_RETURN(0); // Not a real error + case KILL_CONNECTION: + case KILL_CONNECTION_HARD: + case KILL_SYSTEM_THREAD: + case KILL_SYSTEM_THREAD_HARD: + DBUG_RETURN(ER_CONNECTION_KILLED); + case KILL_QUERY: + case KILL_QUERY_HARD: + DBUG_RETURN(ER_QUERY_INTERRUPTED); + case KILL_TIMEOUT: + case KILL_TIMEOUT_HARD: + DBUG_RETURN(slave_thread ? + ER_SLAVE_STATEMENT_TIMEOUT : ER_STATEMENT_TIMEOUT); + case KILL_SERVER: + case KILL_SERVER_HARD: + DBUG_RETURN(ER_SERVER_SHUTDOWN); + case KILL_SLAVE_SAME_ID: + DBUG_RETURN(ER_SLAVE_SAME_ID); + case KILL_WAIT_TIMEOUT: + case KILL_WAIT_TIMEOUT_HARD: + DBUG_RETURN(ER_NET_READ_INTERRUPTED); + } + DBUG_RETURN(0); // Keep compiler happy +} + + +void THD::reset_killed() +{ + /* + Resetting killed has to be done under a mutex to ensure + its not done during an awake() call. + */ + DBUG_ENTER("reset_killed"); + if (killed != NOT_KILLED) + { + mysql_mutex_assert_not_owner(&LOCK_thd_kill); + mysql_mutex_lock(&LOCK_thd_kill); + killed= NOT_KILLED; + killed_err= 0; + mysql_mutex_unlock(&LOCK_thd_kill); + } +#ifdef WITH_WSREP + if (WSREP_NNULL(this)) + { + if (wsrep_abort_by_kill != NOT_KILLED) + { + mysql_mutex_assert_not_owner(&LOCK_thd_kill); + mysql_mutex_lock(&LOCK_thd_kill); + wsrep_abort_by_kill= NOT_KILLED; + wsrep_abort_by_kill_err= 0; + mysql_mutex_unlock(&LOCK_thd_kill); + } + } + mysql_mutex_assert_not_owner(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_data); + wsrep_aborter= 0; + mysql_mutex_unlock(&LOCK_thd_data); +#endif /* WITH_WSREP */ + + DBUG_VOID_RETURN; +} + +/* + Remember the location of thread info, the structure needed for + the structure for the net buffer +*/ + +void THD::store_globals() +{ + /* + Assert that thread_stack is initialized: it's necessary to be able + to track stack overrun. + */ + DBUG_ASSERT(thread_stack); + + set_current_thd(this); + /* + mysys_var is concurrently readable by a killer thread. + It is protected by LOCK_thd_kill, it is not needed to lock while the + pointer is changing from NULL not non-NULL. If the kill thread reads + NULL it doesn't refer to anything, but if it is non-NULL we need to + ensure that the thread doesn't proceed to assign another thread to + have the mysys_var reference (which in fact refers to the worker + threads local storage with key THR_KEY_mysys. + */ + mysys_var=my_thread_var; + /* + Let mysqld define the thread id (not mysys) + This allows us to move THD to different threads if needed. + */ + mysys_var->id= thread_id; + + /* thread_dbug_id should not change for a THD */ + if (!thread_dbug_id) + thread_dbug_id= mysys_var->dbug_id; + else + { + /* This only changes if we are using pool-of-threads */ + mysys_var->dbug_id= thread_dbug_id; + } +#ifdef __NR_gettid + os_thread_id= (uint32)syscall(__NR_gettid); +#else + os_thread_id= 0; +#endif + real_id= pthread_self(); // For debugging + mysys_var->stack_ends_here= thread_stack + // for consistency, see libevent_thread_proc + STACK_DIRECTION * (long)my_thread_stack_size; + if (net.vio) + { + net.thd= this; + } + /* + We have to call thr_lock_info_init() again here as THD may have been + created in another thread + */ + thr_lock_info_init(&lock_info, mysys_var); +} + +/** + Untie THD from current thread + + Used when using --thread-handling=pool-of-threads +*/ + +void THD::reset_globals() +{ + mysql_mutex_lock(&LOCK_thd_kill); + mysys_var= 0; + mysql_mutex_unlock(&LOCK_thd_kill); + + /* Undocking the thread specific data. */ + set_current_thd(0); + net.thd= 0; +} + +/* + Cleanup after query. + + SYNOPSIS + THD::cleanup_after_query() + + DESCRIPTION + This function is used to reset thread data to its default state. + + NOTE + This function is not suitable for setting thread data to some + non-default values, as there is only one replication thread, so + different master threads may overwrite data of each other on + slave. +*/ + +void THD::cleanup_after_query() +{ + DBUG_ENTER("THD::cleanup_after_query"); + + thd_progress_end(this); + + /* + Reset RAND_USED so that detection of calls to rand() will save random + seeds if needed by the slave. + + Do not reset RAND_USED if inside a stored function or trigger because + only the call to these operations is logged. Thus only the calling + statement needs to detect rand() calls made by its substatements. These + substatements must not set RAND_USED to 0 because it would remove the + detection of rand() by the calling statement. + */ + if (!in_sub_stmt) /* stored functions and triggers are a special case */ + { + /* Forget those values, for next binlogger: */ + stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; + auto_inc_intervals_in_cur_stmt_for_binlog.empty(); + used&= ~THD::RAND_USED; +#ifndef EMBEDDED_LIBRARY + /* + Clean possible unused INSERT_ID events by current statement. + is_update_query() is needed to ignore SET statements: + Statements that don't update anything directly and don't + used stored functions. This is mostly necessary to ignore + statements in binlog between SET INSERT_ID and DML statement + which is intended to consume its event (there can be other + SET statements between them). + */ + if ((rgi_slave || rli_fake) && is_update_query(lex->sql_command)) + auto_inc_intervals_forced.empty(); +#endif + } + /* + Forget the binlog stmt filter for the next query. + There are some code paths that: + - do not call THD::decide_logging_format() + - do call THD::binlog_query(), + making this reset necessary. + */ + reset_binlog_local_stmt_filter(); + if (first_successful_insert_id_in_cur_stmt > 0) + { + /* set what LAST_INSERT_ID() will return */ + first_successful_insert_id_in_prev_stmt= + first_successful_insert_id_in_cur_stmt; + first_successful_insert_id_in_cur_stmt= 0; + substitute_null_with_insert_id= TRUE; + } + arg_of_last_insert_id_function= 0; + /* Free Items that were created during this execution */ + free_items(); + /* Reset where. */ + where= THD::DEFAULT_WHERE; + /* reset table map for multi-table update */ + table_map_for_update= 0; + m_binlog_invoker= INVOKER_NONE; + +#ifndef EMBEDDED_LIBRARY + if (rgi_slave) + rgi_slave->cleanup_after_query(); +#endif + +#ifdef WITH_WSREP + if (!in_active_multi_stmt_transaction()) + wsrep_affected_rows= 0; +#endif /* WITH_WSREP */ + + DBUG_VOID_RETURN; +} + + +/* + Convert a string to another character set + + SYNOPSIS + convert_string() + to Store new allocated string here + to_cs New character set for allocated string + from String to convert + from_length Length of string to convert + from_cs Original character set + + NOTES + to will be 0-terminated to make it easy to pass to system funcs + + RETURN + 0 ok + 1 End of memory. + In this case to->str will point to 0 and to->length will be 0. +*/ + +bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs) +{ + DBUG_ENTER("THD::convert_string"); + size_t new_length= to_cs->mbmaxlen * from_length; + uint errors; + if (unlikely(alloc_lex_string(to, new_length + 1))) + DBUG_RETURN(true); // EOM + to->length= copy_and_convert((char*) to->str, new_length, to_cs, + from, from_length, from_cs, &errors); + to->str[to->length]= 0; // Safety + if (unlikely(errors) && lex->parse_vcol_expr) + { + my_error(ER_BAD_DATA, MYF(0), + ErrConvString(from, from_length, from_cs).ptr(), + to_cs->cs_name.str); + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/* + Reinterpret a binary string to a character string + + @param[OUT] to The result will be written here, + either the original string as is, + or a newly alloced fixed string with + some zero bytes prepended. + @param cs The destination character set + @param str The binary string + @param length The length of the binary string + + @return false on success + @return true on error +*/ + +bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs, + const char *str, size_t length) +{ + /* + When reinterpreting from binary to tricky character sets like + UCS2, UTF16, UTF32, we may need to prepend some zero bytes. + This is possible in scenarios like this: + SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary; + This code is similar to String::copy_aligned(). + */ + size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character + if (incomplete) + { + size_t zeros= cs->mbminlen - incomplete; + size_t aligned_length= zeros + length; + char *dst= (char*) alloc(aligned_length + 1); + if (!dst) + { + to->str= NULL; // Safety + to->length= 0; + return true; + } + bzero(dst, zeros); + memcpy(dst + zeros, str, length); + dst[aligned_length]= '\0'; + to->str= dst; + to->length= aligned_length; + } + else + { + to->str= str; + to->length= length; + } + return check_string_for_wellformedness(to->str, to->length, cs); +} + + +/* + Convert a string between two character sets. + dstcs and srccs cannot be &my_charset_bin. +*/ +bool THD::convert_fix(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, const char *src, size_t src_length, + String_copier *status) +{ + DBUG_ENTER("THD::convert_fix"); + size_t dst_length= dstcs->mbmaxlen * src_length; + if (alloc_lex_string(dst, dst_length + 1)) + DBUG_RETURN(true); // EOM + dst->length= status->convert_fix(dstcs, (char*) dst->str, dst_length, + srccs, src, src_length, src_length); + dst->str[dst->length]= 0; // Safety + DBUG_RETURN(false); +} + + +/* + Copy or convert a string. +*/ +bool THD::copy_fix(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, const char *src, size_t src_length, + String_copier *status) +{ + DBUG_ENTER("THD::copy_fix"); + size_t dst_length= dstcs->mbmaxlen * src_length; + if (alloc_lex_string(dst, dst_length + 1)) + DBUG_RETURN(true); // EOM + dst->length= status->well_formed_copy(dstcs, dst->str, dst_length, + srccs, src, src_length, src_length); + dst->str[dst->length]= '\0'; + DBUG_RETURN(false); +} + + +class String_copier_with_error: public String_copier +{ +public: + bool check_errors(CHARSET_INFO *srccs, const char *src, size_t src_length) + { + if (most_important_error_pos()) + { + ErrConvString err(src, src_length, &my_charset_bin); + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), srccs->cs_name.str, + err.ptr()); + return true; + } + return false; + } +}; + + +bool THD::convert_with_error(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, + const char *src, size_t src_length) +{ + String_copier_with_error status; + return convert_fix(dstcs, dst, srccs, src, src_length, &status) || + status.check_errors(srccs, src, src_length); +} + + +bool THD::copy_with_error(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, + const char *src, size_t src_length) +{ + String_copier_with_error status; + return copy_fix(dstcs, dst, srccs, src, src_length, &status) || + status.check_errors(srccs, src, src_length); +} + + +/* + Convert string from source character set to target character set inplace. + + SYNOPSIS + THD::convert_string + + DESCRIPTION + Convert string using convert_buffer - buffer for character set + conversion shared between all protocols. + + RETURN + 0 ok + !0 out of memory +*/ + +bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint dummy_errors; + if (unlikely(convert_buffer.copy(s->ptr(), s->length(), from_cs, to_cs, + &dummy_errors))) + return TRUE; + /* If convert_buffer >> s copying is more efficient long term */ + if (convert_buffer.alloced_length() >= convert_buffer.length() * 2 || + !s->is_alloced()) + { + return s->copy(convert_buffer); + } + s->swap(convert_buffer); + return FALSE; +} + + +bool THD::check_string_for_wellformedness(const char *str, + size_t length, + CHARSET_INFO *cs) const +{ + size_t wlen= Well_formed_prefix(cs, str, length).length(); + if (wlen < length) + { + ErrConvString err(str, length, &my_charset_bin); + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->cs_name.str, err.ptr()); + return true; + } + return false; +} + + +bool THD::to_ident_sys_alloc(Lex_ident_sys_st *to, const Lex_ident_cli_st *ident) +{ + if (ident->is_quoted()) + { + LEX_CSTRING unquoted; + if (quote_unescape(&unquoted, ident, ident->quote())) + return true; + return charset_is_system_charset ? + to->copy_sys(this, &unquoted) : + to->convert(this, &unquoted, charset()); + } + return charset_is_system_charset ? + to->copy_sys(this, ident) : + to->copy_or_convert(this, ident, charset()); +} + + +Item_basic_constant * +THD::make_string_literal(const char *str, size_t length, + my_repertoire_t repertoire) +{ + if (!length && (variables.sql_mode & MODE_EMPTY_STRING_IS_NULL)) + return new (mem_root) Item_null(this, 0, variables.collation_connection); + if (!charset_is_collation_connection && + (repertoire != MY_REPERTOIRE_ASCII || + !my_charset_is_ascii_based(variables.collation_connection))) + { + LEX_STRING to; + if (convert_string(&to, variables.collation_connection, + str, length, variables.character_set_client)) + return NULL; + str= to.str; + length= to.length; + } + return new (mem_root) Item_string(this, str, (uint)length, + variables.collation_connection, + DERIVATION_COERCIBLE, repertoire); +} + + +Item_basic_constant * +THD::make_string_literal_nchar(const Lex_string_with_metadata_st &str) +{ + DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info)); + if (!str.length && (variables.sql_mode & MODE_EMPTY_STRING_IS_NULL)) + return new (mem_root) Item_null(this, 0, national_charset_info); + + return new (mem_root) Item_string(this, str.str, (uint)str.length, + national_charset_info, + DERIVATION_COERCIBLE, + str.repertoire()); +} + + +Item_basic_constant * +THD::make_string_literal_charset(const Lex_string_with_metadata_st &str, + CHARSET_INFO *cs) +{ + if (!str.length && (variables.sql_mode & MODE_EMPTY_STRING_IS_NULL)) + return new (mem_root) Item_null(this, 0, cs); + return new (mem_root) Item_string_with_introducer(this, str, cs); +} + + +/* + Update some cache variables when character set changes +*/ + +void THD::update_charset() +{ + uint32 not_used; + charset_is_system_charset= + !String::needs_conversion(0, + variables.character_set_client, + system_charset_info, + ¬_used); + charset_is_collation_connection= + !String::needs_conversion(0, + variables.character_set_client, + variables.collation_connection, + ¬_used); + charset_is_character_set_filesystem= + !String::needs_conversion(0, + variables.character_set_client, + variables.character_set_filesystem, + ¬_used); +} + +void THD::give_protection_error() +{ + if (current_backup_stage != BACKUP_FINISHED) + my_error(ER_BACKUP_LOCK_IS_ACTIVE, MYF(0)); + else + { + DBUG_ASSERT(global_read_lock.is_acquired() || mdl_backup_lock); + my_error(ER_CANT_UPDATE_WITH_READLOCK, MYF(0)); + } +} + +/* routings to adding tables to list of changed in transaction tables */ + +inline static void list_include(CHANGED_TABLE_LIST** prev, + CHANGED_TABLE_LIST* curr, + CHANGED_TABLE_LIST* new_table) +{ + if (new_table) + { + *prev = new_table; + (*prev)->next = curr; + } +} + +/* add table to list of changed in transaction tables */ + +void THD::add_changed_table(TABLE *table) +{ + DBUG_ENTER("THD::add_changed_table(table)"); + + DBUG_ASSERT(in_multi_stmt_transaction_mode() && + table->file->has_transactions()); + add_changed_table(table->s->table_cache_key.str, + (long) table->s->table_cache_key.length); + DBUG_VOID_RETURN; +} + + +void THD::add_changed_table(const char *key, size_t key_length) +{ + DBUG_ENTER("THD::add_changed_table(key)"); + CHANGED_TABLE_LIST **prev_changed = &transaction->changed_tables; + CHANGED_TABLE_LIST *curr = transaction->changed_tables; + + for (; curr; prev_changed = &(curr->next), curr = curr->next) + { + int cmp = (long)curr->key_length - (long)key_length; + if (cmp < 0) + { + list_include(prev_changed, curr, changed_table_dup(key, key_length)); + DBUG_PRINT("info", + ("key_length: %zu %zu", key_length, + (*prev_changed)->key_length)); + DBUG_VOID_RETURN; + } + else if (cmp == 0) + { + cmp = memcmp(curr->key, key, curr->key_length); + if (cmp < 0) + { + list_include(prev_changed, curr, changed_table_dup(key, key_length)); + DBUG_PRINT("info", + ("key_length: %zu %zu", key_length, + (*prev_changed)->key_length)); + DBUG_VOID_RETURN; + } + else if (cmp == 0) + { + DBUG_PRINT("info", ("already in list")); + DBUG_VOID_RETURN; + } + } + } + *prev_changed = changed_table_dup(key, key_length); + DBUG_PRINT("info", ("key_length: %zu %zu", key_length, + (*prev_changed)->key_length)); + DBUG_VOID_RETURN; +} + + +CHANGED_TABLE_LIST* THD::changed_table_dup(const char *key, size_t key_length) +{ + CHANGED_TABLE_LIST* new_table = + (CHANGED_TABLE_LIST*) trans_alloc(ALIGN_SIZE(sizeof(CHANGED_TABLE_LIST))+ + key_length + 1); + if (!new_table) + { + my_error(EE_OUTOFMEMORY, MYF(ME_FATAL), + ALIGN_SIZE(sizeof(TABLE_LIST)) + key_length + 1); + set_killed(KILL_CONNECTION); + return 0; + } + + new_table->key= ((char*)new_table)+ ALIGN_SIZE(sizeof(CHANGED_TABLE_LIST)); + new_table->next = 0; + new_table->key_length = key_length; + ::memcpy(new_table->key, key, key_length); + return new_table; +} + + +int THD::prepare_explain_fields(select_result *result, List *field_list, + uint8 explain_flags, bool is_analyze) +{ + if (lex->explain_json) + make_explain_json_field_list(*field_list, is_analyze); + else + make_explain_field_list(*field_list, explain_flags, is_analyze); + + return result->prepare(*field_list, NULL); +} + + +int THD::send_explain_fields(select_result *result, + uint8 explain_flags, + bool is_analyze) +{ + List field_list; + int rc; + rc= prepare_explain_fields(result, &field_list, explain_flags, is_analyze) || + result->send_result_set_metadata(field_list, Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF); + return rc; +} + + +void THD::make_explain_json_field_list(List &field_list, bool is_analyze) +{ + Item *item= new (mem_root) Item_empty_string(this, (is_analyze ? + "ANALYZE" : + "EXPLAIN"), + 78, system_charset_info); + field_list.push_back(item, mem_root); +} + + +/* + Populate the provided field_list with EXPLAIN output columns. + this->lex->describe has the EXPLAIN flags + + The set/order of columns must be kept in sync with + Explain_query::print_explain and co. +*/ + +void THD::make_explain_field_list(List &field_list, uint8 explain_flags, + bool is_analyze) +{ + Item *item; + CHARSET_INFO *cs= system_charset_info; + field_list.push_back(item= new (mem_root) + Item_return_int(this, "id", 3, + MYSQL_TYPE_LONGLONG), mem_root); + item->set_maybe_null(); + field_list.push_back(new (mem_root) + Item_empty_string(this, "select_type", 19, cs), + mem_root); + field_list.push_back(item= new (mem_root) + Item_empty_string(this, "table", NAME_CHAR_LEN, cs), + mem_root); + item->set_maybe_null(); + if (explain_flags & DESCRIBE_PARTITIONS) + { + /* Maximum length of string that make_used_partitions_str() can produce */ + item= new (mem_root) Item_empty_string(this, "partitions", + MAX_PARTITIONS * (1 + FN_LEN), cs); + field_list.push_back(item, mem_root); + item->set_maybe_null(); + } + field_list.push_back(item= new (mem_root) + Item_empty_string(this, "type", 10, cs), + mem_root); + item->set_maybe_null(); + field_list.push_back(item= new (mem_root) + Item_empty_string(this, "possible_keys", + NAME_CHAR_LEN*MAX_KEY, cs), + mem_root); + item->set_maybe_null(); + field_list.push_back(item=new (mem_root) + Item_empty_string(this, "key", NAME_CHAR_LEN, cs), + mem_root); + item->set_maybe_null(); + field_list.push_back(item=new (mem_root) + Item_empty_string(this, "key_len", + NAME_CHAR_LEN*MAX_KEY), + mem_root); + item->set_maybe_null(); + field_list.push_back(item=new (mem_root) + Item_empty_string(this, "ref", + NAME_CHAR_LEN*MAX_REF_PARTS, cs), + mem_root); + item->set_maybe_null(); + field_list.push_back(item=new (mem_root) + Item_empty_string(this, "rows", NAME_CHAR_LEN, cs), + mem_root); + if (is_analyze) + { + field_list.push_back(item= new (mem_root) + Item_empty_string(this, "r_rows", NAME_CHAR_LEN, cs), + mem_root); + item->set_maybe_null(); + } + + if (is_analyze || (explain_flags & DESCRIBE_EXTENDED)) + { + field_list.push_back(item= new (mem_root) + Item_float(this, "filtered", 0.1234, 2, 4), + mem_root); + item->set_maybe_null(); + } + + if (is_analyze) + { + field_list.push_back(item= new (mem_root) + Item_float(this, "r_filtered", 0.1234, 2, 4), + mem_root); + item->set_maybe_null(); + } + + item->set_maybe_null(); + field_list.push_back(new (mem_root) + Item_empty_string(this, "Extra", 255, cs), + mem_root); +} + + +#ifdef SIGNAL_WITH_VIO_CLOSE +void THD::close_active_vio() +{ + DBUG_ENTER("close_active_vio"); + mysql_mutex_assert_owner(&LOCK_thd_data); +#ifndef EMBEDDED_LIBRARY + if (active_vio) + { + vio_close(active_vio); + active_vio = 0; + } +#endif + DBUG_VOID_RETURN; +} +#endif + + +/* + @brief MySQL parser used for recursive invocations + + @param old_lex The LEX structure in the state when this parser + is called recursively + @param lex The LEX structure used to parse a new SQL fragment + @param str The SQL fragment to parse + @param str_len The length of the SQL fragment to parse + @param stmt_prepare_mode true <=> when parsing a prepare statement + + @details + This function is to be used when parsing of an SQL fragment is + needed within one of the grammar rules. + + @notes + Currently the function is used only when the specification of a CTE + is parsed for the not first and not recursive references of the CTE. + + @retval false On a successful parsing of the fragment + @retval true Otherwise +*/ + +bool THD::sql_parser(LEX *old_lex, LEX *lex, + char *str, uint str_len, bool stmt_prepare_mode) +{ + extern int MYSQLparse(THD * thd); + extern int ORAparse(THD * thd); + + bool parse_status= false; + Parser_state parser_state; + Parser_state *old_parser_state= m_parser_state; + + if (parser_state.init(this, str, str_len)) + return true; + + m_parser_state= &parser_state; + parser_state.m_lip.stmt_prepare_mode= stmt_prepare_mode; + parser_state.m_lip.multi_statements= false; + parser_state.m_lip.m_digest= NULL; + + lex->param_list= old_lex->param_list; + lex->sphead= old_lex->sphead; + lex->spname= old_lex->spname; + lex->spcont= old_lex->spcont; + lex->sp_chistics= old_lex->sp_chistics; + lex->trg_chistics= old_lex->trg_chistics; + + parse_status= (variables.sql_mode & MODE_ORACLE) ? + ORAparse(this) : MYSQLparse(this) != 0; + + m_parser_state= old_parser_state; + + return parse_status; +} + + +struct Item_change_record: public ilink +{ + Item **place; + Item *old_value; + /* Placement new was hidden by `new' in ilink (TODO: check): */ + static void *operator new(size_t size, void *mem) { return mem; } + static void operator delete(void *ptr, size_t size) {} + static void operator delete(void *ptr, void *mem) { /* never called */ } +}; + + +/* + Register an item tree tree transformation, performed by the query + optimizer. We need a pointer to runtime_memroot because it may be != + thd->mem_root (due to possible set_n_backup_active_arena called for thd). +*/ + +void +Item_change_list::nocheck_register_item_tree_change(Item **place, + Item *old_value, + MEM_ROOT *runtime_memroot) +{ + Item_change_record *change; + DBUG_ENTER("THD::nocheck_register_item_tree_change"); + DBUG_PRINT("enter", ("Register %p <- %p", old_value, (*place))); + /* + Now we use one node per change, which adds some memory overhead, + but still is rather fast as we use alloc_root for allocations. + A list of item tree changes of an average query should be short. + */ + void *change_mem= alloc_root(runtime_memroot, sizeof(*change)); + if (change_mem == 0) + { + /* + OOM, thd->fatal_error() is called by the error handler of the + memroot. Just return. + */ + DBUG_VOID_RETURN; + } + change= new (change_mem) Item_change_record; + change->place= place; + change->old_value= old_value; + change_list.append(change); + DBUG_VOID_RETURN; +} + +/** + Check and register item change if needed + + @param place place where we should assign new value + @param new_value place of the new value + + @details + Let C be a reference to an item that changed the reference A + at the location (occurrence) L1 and this change has been registered. + If C is substituted for reference A another location (occurrence) L2 + that is to be registered as well than this change has to be + consistent with the first change in order the procedure that rollback + changes to substitute the same reference at both locations L1 and L2. +*/ + +void +Item_change_list::check_and_register_item_tree_change(Item **place, + Item **new_value, + MEM_ROOT *runtime_memroot) +{ + Item_change_record *change; + DBUG_ENTER("THD::check_and_register_item_tree_change"); + DBUG_PRINT("enter", ("Register: %p (%p) <- %p (%p)", + *place, place, *new_value, new_value)); + I_List_iterator it(change_list); + while ((change= it++)) + { + if (change->place == new_value) + break; // we need only very first value + } + if (change) + nocheck_register_item_tree_change(place, change->old_value, + runtime_memroot); + DBUG_VOID_RETURN; +} + + +void Item_change_list::rollback_item_tree_changes() +{ + DBUG_ENTER("THD::rollback_item_tree_changes"); + I_List_iterator it(change_list); + Item_change_record *change; + + while ((change= it++)) + { + DBUG_PRINT("info", ("Rollback: %p (%p) <- %p", + *change->place, change->place, change->old_value)); + *change->place= change->old_value; + } + /* We can forget about changes memory: it's allocated in runtime memroot */ + change_list.empty(); + DBUG_VOID_RETURN; +} + + +/***************************************************************************** +** Functions to provide a interface to select results +*****************************************************************************/ + +void select_result::cleanup() +{ + /* do nothing */ +} + +bool select_result::check_simple_select() const +{ + my_error(ER_SP_BAD_CURSOR_QUERY, MYF(0)); + return TRUE; +} + + +static String default_line_term("\n", 1, default_charset_info); +static String default_escaped("\\", 1, default_charset_info); +static String default_field_term("\t", 1, default_charset_info); +static String default_enclosed_and_line_start("", 0, default_charset_info); +static String default_xml_row_term("", 5, default_charset_info); + +sql_exchange::sql_exchange(const char *name, bool flag, + enum enum_filetype filetype_arg) + :file_name(name), opt_enclosed(0), dumpfile(flag), skip_lines(0) +{ + filetype= filetype_arg; + field_term= &default_field_term; + enclosed= line_start= &default_enclosed_and_line_start; + line_term= filetype == FILETYPE_CSV ? + &default_line_term : &default_xml_row_term; + escaped= &default_escaped; + cs= NULL; +} + +bool sql_exchange::escaped_given(void) const +{ + return escaped != &default_escaped; +} + + +bool select_send::send_result_set_metadata(List &list, uint flags) +{ + bool res; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_retry_query) + { + WSREP_DEBUG("skipping select metadata"); + return FALSE; + } +#endif /* WITH_WSREP */ + if (!(res= thd->protocol->send_result_set_metadata(&list, flags))) + is_result_set_started= 1; + return res; +} + +void select_send::abort_result_set() +{ + DBUG_ENTER("select_send::abort_result_set"); + + if (is_result_set_started && thd->spcont) + { + /* + We're executing a stored procedure, have an open result + set and an SQL exception condition. In this situation we + must abort the current statement, silence the error and + start executing the continue/exit handler if one is found. + Before aborting the statement, let's end the open result set, as + otherwise the client will hang due to the violation of the + client/server protocol. + */ + thd->spcont->end_partial_result_set= TRUE; + } + DBUG_VOID_RETURN; +} + + +/** + Cleanup an instance of this class for re-use + at next execution of a prepared statement/ + stored procedure statement. +*/ + +void select_send::cleanup() +{ + is_result_set_started= FALSE; +} + +/* Send data to client. Returns 0 if ok */ + +int select_send::send_data(List &items) +{ + Protocol *protocol= thd->protocol; + DBUG_ENTER("select_send::send_data"); + + protocol->prepare_for_resend(); + if (protocol->send_result_set_row(&items)) + { + protocol->remove_last_row(); + DBUG_RETURN(TRUE); + } + + thd->inc_sent_row_count(1); + + /* Don't return error if disconnected, only if write fails */ + if (likely(thd->vio_ok())) + DBUG_RETURN(protocol->write()); + + DBUG_RETURN(0); +} + + +bool select_send::send_eof() +{ + /* + Don't send EOF if we're in error condition (which implies we've already + sent or are sending an error) + */ + if (unlikely(thd->is_error())) + return TRUE; + ::my_eof(thd); + is_result_set_started= 0; + return FALSE; +} + + +/************************************************************************ + Handling writing to file +************************************************************************/ + +bool select_to_file::send_eof() +{ + int error= MY_TEST(end_io_cache(&cache)); + if (unlikely(mysql_file_close(file, MYF(MY_WME))) || + unlikely(thd->is_error())) + error= true; + + if (likely(!error) && !suppress_my_ok) + { + ::my_ok(thd,row_count); + } + file= -1; + return error; +} + + +void select_to_file::cleanup() +{ + /* In case of error send_eof() may be not called: close the file here. */ + if (file >= 0) + { + (void) end_io_cache(&cache); + mysql_file_close(file, MYF(0)); + file= -1; + } + path[0]= '\0'; + row_count= 0; +} + + +select_to_file::~select_to_file() +{ + if (file >= 0) + { // This only happens in case of error + (void) end_io_cache(&cache); + mysql_file_close(file, MYF(0)); + file= -1; + } +} + +/*************************************************************************** +** Export of select to textfile +***************************************************************************/ + +select_export::~select_export() +{ + thd->set_sent_row_count(row_count); +} + + +/* + Create file with IO cache + + SYNOPSIS + create_file() + thd Thread handle + path File name + exchange Excange class + cache IO cache + + RETURN + >= 0 File handle + -1 Error +*/ + + +static File create_file(THD *thd, char *path, sql_exchange *exchange, + IO_CACHE *cache) +{ + File file; + uint option= MY_UNPACK_FILENAME | MY_RELATIVE_PATH; + +#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS + option|= MY_REPLACE_DIR; // Force use of db directory +#endif + + if (!dirname_length(exchange->file_name)) + { + strxnmov(path, FN_REFLEN-1, mysql_real_data_home, thd->get_db(), NullS); + (void) fn_format(path, exchange->file_name, path, "", option); + } + else + (void) fn_format(path, exchange->file_name, mysql_real_data_home, "", option); + + if (!is_secure_file_path(path)) + { + /* Write only allowed to dir or subdir specified by secure_file_priv */ + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv"); + return -1; + } + + if (!access(path, F_OK)) + { + my_error(ER_FILE_EXISTS_ERROR, MYF(0), exchange->file_name); + return -1; + } + /* Create the file world readable */ + if ((file= mysql_file_create(key_select_to_file, + path, 0644, O_WRONLY|O_EXCL, MYF(MY_WME))) < 0) + return file; +#ifdef HAVE_FCHMOD + (void) fchmod(file, 0644); // Because of umask() +#else + (void) chmod(path, 0644); +#endif + if (init_io_cache(cache, file, 0L, WRITE_CACHE, 0L, 1, MYF(MY_WME))) + { + mysql_file_close(file, MYF(0)); + /* Delete file on error, it was just created */ + mysql_file_delete(key_select_to_file, path, MYF(0)); + return -1; + } + return file; +} + + +int +select_export::prepare(List &list, SELECT_LEX_UNIT *u) +{ + bool blob_flag=0; + bool string_results= FALSE, non_string_results= FALSE; + unit= u; + if ((uint) strlen(exchange->file_name) + NAME_LEN >= FN_REFLEN) + strmake_buf(path,exchange->file_name); + + write_cs= exchange->cs ? exchange->cs : &my_charset_bin; + + if ((file= create_file(thd, path, exchange, &cache)) < 0) + return 1; + /* Check if there is any blobs in data */ + { + List_iterator_fast li(list); + Item *item; + while ((item=li++)) + { + if (item->max_length >= MAX_BLOB_WIDTH) + { + blob_flag=1; + break; + } + if (item->result_type() == STRING_RESULT) + string_results= TRUE; + else + non_string_results= TRUE; + } + } + if (exchange->escaped->numchars() > 1 || exchange->enclosed->numchars() > 1) + { + my_error(ER_WRONG_FIELD_TERMINATORS, MYF(0)); + return TRUE; + } + if (exchange->escaped->length() > 1 || exchange->enclosed->length() > 1 || + !my_isascii(exchange->escaped->ptr()[0]) || + !my_isascii(exchange->enclosed->ptr()[0]) || + !exchange->field_term->is_ascii() || !exchange->line_term->is_ascii() || + !exchange->line_start->is_ascii()) + { + /* + Current LOAD DATA INFILE recognizes field/line separators "as is" without + converting from client charset to data file charset. So, it is supposed, + that input file of LOAD DATA INFILE consists of data in one charset and + separators in other charset. For the compatibility with that [buggy] + behaviour SELECT INTO OUTFILE implementation has been saved "as is" too, + but the new warning message has been added: + + Non-ASCII separator arguments are not fully supported + */ + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED, + ER_THD(thd, WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED)); + } + field_term_length=exchange->field_term->length(); + field_term_char= field_term_length ? + (int) (uchar) (*exchange->field_term)[0] : INT_MAX; + if (!exchange->line_term->length()) + exchange->line_term=exchange->field_term; // Use this if it exists + field_sep_char= (exchange->enclosed->length() ? + (int) (uchar) (*exchange->enclosed)[0] : field_term_char); + if (exchange->escaped->length() && (exchange->escaped_given() || + !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))) + escape_char= (int) (uchar) (*exchange->escaped)[0]; + else + escape_char= -1; + is_ambiguous_field_sep= MY_TEST(strchr(ESCAPE_CHARS, field_sep_char)); + is_unsafe_field_sep= MY_TEST(strchr(NUMERIC_CHARS, field_sep_char)); + line_sep_char= (exchange->line_term->length() ? + (int) (uchar) (*exchange->line_term)[0] : INT_MAX); + if (!field_term_length) + exchange->opt_enclosed=0; + if (!exchange->enclosed->length()) + exchange->opt_enclosed=1; // A little quicker loop + fixed_row_size= (!field_term_length && !exchange->enclosed->length() && + !blob_flag); + if ((is_ambiguous_field_sep && exchange->enclosed->is_empty() && + (string_results || is_unsafe_field_sep)) || + (exchange->opt_enclosed && non_string_results && + field_term_length && strchr(NUMERIC_CHARS, field_term_char))) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_AMBIGUOUS_FIELD_TERM, + ER_THD(thd, ER_AMBIGUOUS_FIELD_TERM)); + is_ambiguous_field_term= TRUE; + } + else + is_ambiguous_field_term= FALSE; + + return 0; +} + + +#define NEED_ESCAPING(x) ((int) (uchar) (x) == escape_char || \ + (enclosed ? (int) (uchar) (x) == field_sep_char \ + : (int) (uchar) (x) == field_term_char) || \ + (int) (uchar) (x) == line_sep_char || \ + !(x)) + +int select_export::send_data(List &items) +{ + + DBUG_ENTER("select_export::send_data"); + char buff[MAX_FIELD_WIDTH],null_buff[2],space[MAX_FIELD_WIDTH]; + char cvt_buff[MAX_FIELD_WIDTH]; + String cvt_str(cvt_buff, sizeof(cvt_buff), write_cs); + bool space_inited=0; + String tmp(buff,sizeof(buff),&my_charset_bin),*res; + tmp.length(0); + + row_count++; + Item *item; + uint used_length=0,items_left=items.elements; + List_iterator_fast li(items); + + if (my_b_write(&cache,(uchar*) exchange->line_start->ptr(), + exchange->line_start->length())) + goto err; + while ((item=li++)) + { + Item_result result_type=item->result_type(); + bool enclosed = (exchange->enclosed->length() && + (!exchange->opt_enclosed || result_type == STRING_RESULT)); + res=item->str_result(&tmp); + if (res && !my_charset_same(write_cs, res->charset()) && + !my_charset_same(write_cs, &my_charset_bin)) + { + String_copier copier; + const char *error_pos; + uint32 bytes; + uint64 estimated_bytes= + ((uint64) res->length() / res->charset()->mbminlen + 1) * + write_cs->mbmaxlen + 1; + set_if_smaller(estimated_bytes, UINT_MAX32); + if (cvt_str.alloc((uint32) estimated_bytes)) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), (uint32) estimated_bytes); + goto err; + } + + bytes= copier.well_formed_copy(write_cs, (char *) cvt_str.ptr(), + cvt_str.alloced_length(), + res->charset(), + res->ptr(), res->length()); + error_pos= copier.most_important_error_pos(); + if (unlikely(error_pos)) + { + /* + TODO: + add new error message that will show user this printable_buff + + char printable_buff[32]; + convert_to_printable(printable_buff, sizeof(printable_buff), + error_pos, res->ptr() + res->length() - error_pos, + res->charset(), 6); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, + ER_THD(thd, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), + "string", printable_buff, + item->name.str, static_cast(row_count)); + */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, + ER_THD(thd, WARN_DATA_TRUNCATED), + item->name.str, static_cast(row_count)); + } + else if (copier.source_end_pos() < res->ptr() + res->length()) + { + /* + result is longer than UINT_MAX32 and doesn't fit into String + */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_DATA_TRUNCATED, + ER_THD(thd, WARN_DATA_TRUNCATED), + item->full_name(), static_cast(row_count)); + } + cvt_str.length(bytes); + res= &cvt_str; + } + if (res && enclosed) + { + if (my_b_write(&cache,(uchar*) exchange->enclosed->ptr(), + exchange->enclosed->length())) + goto err; + } + if (!res) + { // NULL + if (!fixed_row_size) + { + if (escape_char != -1) // Use \N syntax + { + null_buff[0]=escape_char; + null_buff[1]='N'; + if (my_b_write(&cache,(uchar*) null_buff,2)) + goto err; + } + else if (my_b_write(&cache,(uchar*) "NULL",4)) + goto err; + } + else + { + used_length=0; // Fill with space + } + } + else + { + if (fixed_row_size) + used_length=MY_MIN(res->length(),item->max_length); + else + used_length=res->length(); + if ((result_type == STRING_RESULT || is_unsafe_field_sep) && + escape_char != -1) + { + char *pos, *start, *end; + CHARSET_INFO *res_charset= res->charset(); + CHARSET_INFO *character_set_client= thd->variables. + character_set_client; + bool check_second_byte= (res_charset == &my_charset_bin) && + character_set_client-> + escape_with_backslash_is_dangerous; + DBUG_ASSERT(character_set_client->mbmaxlen == 2 || + !character_set_client->escape_with_backslash_is_dangerous); + for (start=pos=(char*) res->ptr(),end=pos+used_length ; + pos != end ; + pos++) + { +#ifdef USE_MB + if (res_charset->use_mb()) + { + int l; + if ((l=my_ismbchar(res_charset, pos, end))) + { + pos += l-1; + continue; + } + } +#endif + + /* + Special case when dumping BINARY/VARBINARY/BLOB values + for the clients with character sets big5, cp932, gbk and sjis, + which can have the escape character (0x5C "\" by default) + as the second byte of a multi-byte sequence. + + If + - pos[0] is a valid multi-byte head (e.g 0xEE) and + - pos[1] is 0x00, which will be escaped as "\0", + + then we'll get "0xEE + 0x5C + 0x30" in the output file. + + If this file is later loaded using this sequence of commands: + + mysql> create table t1 (a varchar(128)) character set big5; + mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1; + + then 0x5C will be misinterpreted as the second byte + of a multi-byte character "0xEE + 0x5C", instead of + escape character for 0x00. + + To avoid this confusion, we'll escape the multi-byte + head character too, so the sequence "0xEE + 0x00" will be + dumped as "0x5C + 0xEE + 0x5C + 0x30". + + Note, in the condition below we only check if + mbcharlen is equal to 2, because there are no + character sets with mbmaxlen longer than 2 + and with escape_with_backslash_is_dangerous set. + DBUG_ASSERT before the loop makes that sure. + */ + + if ((NEED_ESCAPING(*pos) || + (check_second_byte && + ((uchar) *pos) > 0x7F /* a potential MB2HEAD */ && + pos + 1 < end && + NEED_ESCAPING(pos[1]))) && + /* + Don't escape field_term_char by doubling - doubling is only + valid for ENCLOSED BY characters: + */ + (enclosed || !is_ambiguous_field_term || + (int) (uchar) *pos != field_term_char)) + { + char tmp_buff[2]; + tmp_buff[0]= ((int) (uchar) *pos == field_sep_char && + is_ambiguous_field_sep) ? + field_sep_char : escape_char; + tmp_buff[1]= *pos ? *pos : '0'; + if (my_b_write(&cache,(uchar*) start,(uint) (pos-start)) || + my_b_write(&cache,(uchar*) tmp_buff,2)) + goto err; + start=pos+1; + } + } + if (my_b_write(&cache,(uchar*) start,(uint) (pos-start))) + goto err; + } + else if (my_b_write(&cache,(uchar*) res->ptr(),used_length)) + goto err; + } + if (fixed_row_size) + { // Fill with space + if (item->max_length > used_length) + { + if (!space_inited) + { + space_inited=1; + bfill(space,sizeof(space),' '); + } + uint length=item->max_length-used_length; + for (; length > sizeof(space) ; length-=sizeof(space)) + { + if (my_b_write(&cache,(uchar*) space,sizeof(space))) + goto err; + } + if (my_b_write(&cache,(uchar*) space,length)) + goto err; + } + } + if (res && enclosed) + { + if (my_b_write(&cache, (uchar*) exchange->enclosed->ptr(), + exchange->enclosed->length())) + goto err; + } + if (--items_left) + { + if (my_b_write(&cache, (uchar*) exchange->field_term->ptr(), + field_term_length)) + goto err; + } + } + if (my_b_write(&cache,(uchar*) exchange->line_term->ptr(), + exchange->line_term->length())) + goto err; + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + +/*************************************************************************** +** Dump of select to a binary file +***************************************************************************/ + + +int +select_dump::prepare(List &list __attribute__((unused)), + SELECT_LEX_UNIT *u) +{ + unit= u; + return (int) ((file= create_file(thd, path, exchange, &cache)) < 0); +} + + +int select_dump::send_data(List &items) +{ + List_iterator_fast li(items); + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),&my_charset_bin),*res; + tmp.length(0); + Item *item; + DBUG_ENTER("select_dump::send_data"); + + if (row_count++ > 1) + { + my_message(ER_TOO_MANY_ROWS, ER_THD(thd, ER_TOO_MANY_ROWS), MYF(0)); + goto err; + } + while ((item=li++)) + { + res=item->str_result(&tmp); + if (!res) // If NULL + { + if (my_b_write(&cache,(uchar*) "",1)) + goto err; + } + else if (my_b_write(&cache,(uchar*) res->ptr(),res->length())) + { + my_error(ER_ERROR_ON_WRITE, MYF(0), path, my_errno); + goto err; + } + } + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + +int select_singlerow_subselect::send_data(List &items) +{ + DBUG_ENTER("select_singlerow_subselect::send_data"); + Item_singlerow_subselect *it= (Item_singlerow_subselect *)item; + if (it->assigned()) + { + my_message(ER_SUBQUERY_NO_1_ROW, ER_THD(thd, ER_SUBQUERY_NO_1_ROW), + MYF(current_thd->lex->ignore ? ME_WARNING : 0)); + DBUG_RETURN(1); + } + List_iterator_fast li(items); + Item *val_item; + for (uint i= 0; (val_item= li++); i++) + it->store(i, val_item); + it->assigned(1); + DBUG_RETURN(0); +} + + +void select_max_min_finder_subselect::cleanup() +{ + DBUG_ENTER("select_max_min_finder_subselect::cleanup"); + cache= 0; + DBUG_VOID_RETURN; +} + + +void select_max_min_finder_subselect::set_op(const Type_handler *th) +{ + if (th->is_val_native_ready()) + { + op= &select_max_min_finder_subselect::cmp_native; + return; + } + + switch (th->cmp_type()) { + case REAL_RESULT: + op= &select_max_min_finder_subselect::cmp_real; + break; + case INT_RESULT: + op= &select_max_min_finder_subselect::cmp_int; + break; + case STRING_RESULT: + op= &select_max_min_finder_subselect::cmp_str; + break; + case DECIMAL_RESULT: + op= &select_max_min_finder_subselect::cmp_decimal; + break; + case TIME_RESULT: + if (th->field_type() == MYSQL_TYPE_TIME) + op= &select_max_min_finder_subselect::cmp_time; + else + op= &select_max_min_finder_subselect::cmp_str; + break; + case ROW_RESULT: + // This case should never be chosen + DBUG_ASSERT(0); + op= 0; + } +} + + +int select_max_min_finder_subselect::send_data(List &items) +{ + DBUG_ENTER("select_max_min_finder_subselect::send_data"); + Item_maxmin_subselect *it= (Item_maxmin_subselect *)item; + List_iterator_fast li(items); + Item *val_item= li++; + it->register_value(); + if (it->assigned()) + { + cache->store(val_item); + if ((this->*op)()) + it->store(0, cache); + } + else + { + if (!cache) + { + cache= val_item->get_cache(thd); + set_op(val_item->type_handler()); + cache->setup(thd, val_item); + } + else + cache->store(val_item); + it->store(0, cache); + } + it->assigned(1); + DBUG_RETURN(0); +} + +bool select_max_min_finder_subselect::cmp_real() +{ + Item *maxmin= ((Item_singlerow_subselect *)item)->element_index(0); + double val1= cache->val_real(), val2= maxmin->val_real(); + + /* Ignore NULLs for ANY and keep them for ALL subqueries */ + if (cache->null_value) + return (is_all && !maxmin->null_value) || (!is_all && maxmin->null_value); + if (maxmin->null_value) + return !is_all; + + if (fmax) + return(val1 > val2); + return (val1 < val2); +} + +bool select_max_min_finder_subselect::cmp_int() +{ + Item *maxmin= ((Item_singlerow_subselect *)item)->element_index(0); + longlong val1= cache->val_int(), val2= maxmin->val_int(); + + /* Ignore NULLs for ANY and keep them for ALL subqueries */ + if (cache->null_value) + return (is_all && !maxmin->null_value) || (!is_all && maxmin->null_value); + if (maxmin->null_value) + return !is_all; + + if (fmax) + return(val1 > val2); + return (val1 < val2); +} + +bool select_max_min_finder_subselect::cmp_time() +{ + Item *maxmin= ((Item_singlerow_subselect *)item)->element_index(0); + THD *thd= current_thd; + auto val1= cache->val_time_packed(thd), val2= maxmin->val_time_packed(thd); + + /* Ignore NULLs for ANY and keep them for ALL subqueries */ + if (cache->null_value) + return (is_all && !maxmin->null_value) || (!is_all && maxmin->null_value); + if (maxmin->null_value) + return !is_all; + + if (fmax) + return(val1 > val2); + return (val1 < val2); +} + +bool select_max_min_finder_subselect::cmp_decimal() +{ + Item *maxmin= ((Item_singlerow_subselect *)item)->element_index(0); + VDec cvalue(cache), mvalue(maxmin); + + /* Ignore NULLs for ANY and keep them for ALL subqueries */ + if (cvalue.is_null()) + return (is_all && !mvalue.is_null()) || (!is_all && mvalue.is_null()); + if (mvalue.is_null()) + return !is_all; + + return fmax ? cvalue.cmp(mvalue) > 0 : cvalue.cmp(mvalue) < 0; +} + +bool select_max_min_finder_subselect::cmp_str() +{ + String *val1, *val2, buf1, buf2; + Item *maxmin= ((Item_singlerow_subselect *)item)->element_index(0); + /* + as far as both operand is Item_cache buf1 & buf2 will not be used, + but added for safety + */ + val1= cache->val_str(&buf1); + val2= maxmin->val_str(&buf2); + + /* Ignore NULLs for ANY and keep them for ALL subqueries */ + if (cache->null_value) + return (is_all && !maxmin->null_value) || (!is_all && maxmin->null_value); + if (maxmin->null_value) + return !is_all; + + if (fmax) + return (sortcmp(val1, val2, cache->collation.collation) > 0) ; + return (sortcmp(val1, val2, cache->collation.collation) < 0); +} + + +bool select_max_min_finder_subselect::cmp_native() +{ + NativeBuffer cvalue, mvalue; + Item *maxmin= ((Item_singlerow_subselect *)item)->element_index(0); + bool cvalue_is_null= cache->val_native(thd, &cvalue); + bool mvalue_is_null= maxmin->val_native(thd, &mvalue); + + /* Ignore NULLs for ANY and keep them for ALL subqueries */ + if (cvalue_is_null) + return (is_all && !mvalue_is_null) || (!is_all && mvalue_is_null); + if (mvalue_is_null) + return !is_all; + + const Type_handler *th= cache->type_handler(); + return fmax ? th->cmp_native(cvalue, mvalue) > 0 : + th->cmp_native(cvalue, mvalue) < 0; +} + + +int select_exists_subselect::send_data(List &items) +{ + DBUG_ENTER("select_exists_subselect::send_data"); + Item_exists_subselect *it= (Item_exists_subselect *)item; + it->value= 1; + it->assigned(1); + DBUG_RETURN(0); +} + + +/*************************************************************************** + Dump of select to variables +***************************************************************************/ + +int select_dumpvar::prepare(List &list, SELECT_LEX_UNIT *u) +{ + my_var_sp *mvsp; + unit= u; + m_var_sp_row= NULL; + + if (var_list.elements == 1 && + (mvsp= var_list.head()->get_my_var_sp()) && + mvsp->type_handler() == &type_handler_row) + { + // SELECT INTO row_type_sp_variable + if (mvsp->get_rcontext(thd->spcont)->get_variable(mvsp->offset)->cols() != + list.elements) + goto error; + m_var_sp_row= mvsp; + return 0; + } + + // SELECT INTO variable list + if (var_list.elements == list.elements) + return 0; + +error: + my_message(ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT, + ER_THD(thd, ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT), MYF(0)); + return 1; +} + + +bool select_dumpvar::check_simple_select() const +{ + my_error(ER_SP_BAD_CURSOR_SELECT, MYF(0)); + return TRUE; +} + + +void select_dumpvar::cleanup() +{ + row_count= 0; +} + + +Query_arena::Type Query_arena::type() const +{ + return STATEMENT; +} + + +void Query_arena::free_items() +{ + Item *next; + DBUG_ENTER("Query_arena::free_items"); + /* This works because items are allocated on THD::mem_root */ + for (; free_list; free_list= next) + { + next= free_list->next; + DBUG_ASSERT(free_list != next); + DBUG_PRINT("info", ("free item: %p", free_list)); + free_list->delete_self(); + } + /* Postcondition: free_list is 0 */ + DBUG_VOID_RETURN; +} + + +void Query_arena::set_query_arena(Query_arena *set) +{ + mem_root= set->mem_root; + free_list= set->free_list; + state= set->state; +} + + +bool Query_arena::cleanup_stmt(bool /*restore_set_statement_vars*/) +{ + DBUG_ASSERT(! "Query_arena::cleanup_stmt() not implemented"); + return false; +} + +/* + Statement functions +*/ + +Statement::Statement(LEX *lex_arg, MEM_ROOT *mem_root_arg, + enum enum_state state_arg, ulong id_arg) + :Query_arena(mem_root_arg, state_arg), + id(id_arg), + column_usage(MARK_COLUMNS_READ), + lex(lex_arg), + db(null_clex_str) +{ + hr_prepare_time.val= 0, + name= null_clex_str; +} + + +Query_arena::Type Statement::type() const +{ + return STATEMENT; +} + + +void Statement::set_statement(Statement *stmt) +{ + id= stmt->id; + column_usage= stmt->column_usage; + lex= stmt->lex; + query_string= stmt->query_string; + hr_prepare_time= stmt->hr_prepare_time; +} + + +void +Statement::set_n_backup_statement(Statement *stmt, Statement *backup) +{ + DBUG_ENTER("Statement::set_n_backup_statement"); + backup->set_statement(this); + set_statement(stmt); + DBUG_VOID_RETURN; +} + + +void Statement::restore_backup_statement(Statement *stmt, Statement *backup) +{ + DBUG_ENTER("Statement::restore_backup_statement"); + stmt->set_statement(this); + set_statement(backup); + DBUG_VOID_RETURN; +} + + +void THD::end_statement() +{ + DBUG_ENTER("THD::end_statement"); + /* Cleanup SQL processing state to reuse this statement in next query. */ + lex_end(lex); + delete lex->result; + lex->result= 0; + /* Note that free_list is freed in cleanup_after_query() */ + + /* + Don't free mem_root, as mem_root is freed in the end of dispatch_command + (once for any command). + */ + DBUG_VOID_RETURN; +} + + +/* + Start using arena specified by @set. Current arena data will be saved to + *backup. +*/ +void THD::set_n_backup_active_arena(Query_arena *set, Query_arena *backup) +{ + DBUG_ENTER("THD::set_n_backup_active_arena"); + DBUG_ASSERT(backup->is_backup_arena == FALSE); + + backup->set_query_arena(this); + set_query_arena(set); +#ifdef DBUG_ASSERT_EXISTS + backup->is_backup_arena= TRUE; +#endif + DBUG_VOID_RETURN; +} + + +/* + Stop using the temporary arena, and start again using the arena that is + specified in *backup. + The temporary arena is returned back into *set. +*/ + +void THD::restore_active_arena(Query_arena *set, Query_arena *backup) +{ + DBUG_ENTER("THD::restore_active_arena"); + DBUG_ASSERT(backup->is_backup_arena); + set->set_query_arena(this); + set_query_arena(backup); +#ifdef DBUG_ASSERT_EXISTS + backup->is_backup_arena= FALSE; +#endif + DBUG_VOID_RETURN; +} + +Statement::~Statement() = default; + +C_MODE_START + +static uchar * +get_statement_id_as_hash_key(const uchar *record, size_t *key_length, + my_bool not_used __attribute__((unused))) +{ + const Statement *statement= (const Statement *) record; + *key_length= sizeof(statement->id); + return (uchar *) &((const Statement *) statement)->id; +} + +static void delete_statement_as_hash_key(void *key) +{ + delete (Statement *) key; +} + +static uchar *get_stmt_name_hash_key(Statement *entry, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= entry->name.length; + return (uchar*) entry->name.str; +} + +C_MODE_END + +Statement_map::Statement_map() : + last_found_statement(0) +{ + enum + { + START_STMT_HASH_SIZE = 16, + START_NAME_HASH_SIZE = 16 + }; + my_hash_init(key_memory_prepared_statement_map, &st_hash, &my_charset_bin, + START_STMT_HASH_SIZE, 0, 0, get_statement_id_as_hash_key, + delete_statement_as_hash_key, MYF(0)); + my_hash_init(key_memory_prepared_statement_map, &names_hash, system_charset_info, START_NAME_HASH_SIZE, 0, 0, + (my_hash_get_key) get_stmt_name_hash_key, + NULL, MYF(0)); +} + + +/* + Insert a new statement to the thread-local statement map. + + DESCRIPTION + If there was an old statement with the same name, replace it with the + new one. Otherwise, check if max_prepared_stmt_count is not reached yet, + increase prepared_stmt_count, and insert the new statement. It's okay + to delete an old statement and fail to insert the new one. + + POSTCONDITIONS + All named prepared statements are also present in names_hash. + Statement names in names_hash are unique. + The statement is added only if prepared_stmt_count < max_prepard_stmt_count + last_found_statement always points to a valid statement or is 0 + + RETURN VALUE + 0 success + 1 error: out of resources or max_prepared_stmt_count limit has been + reached. An error is sent to the client, the statement is deleted. +*/ + +int Statement_map::insert(THD *thd, Statement *statement) +{ + if (my_hash_insert(&st_hash, (uchar*) statement)) + { + /* + Delete is needed only in case of an insert failure. In all other + cases hash_delete will also delete the statement. + */ + delete statement; + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto err_st_hash; + } + if (statement->name.str && my_hash_insert(&names_hash, (uchar*) statement)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto err_names_hash; + } + mysql_mutex_lock(&LOCK_prepared_stmt_count); + /* + We don't check that prepared_stmt_count is <= max_prepared_stmt_count + because we would like to allow to lower the total limit + of prepared statements below the current count. In that case + no new statements can be added until prepared_stmt_count drops below + the limit. + */ + if (prepared_stmt_count >= max_prepared_stmt_count) + { + mysql_mutex_unlock(&LOCK_prepared_stmt_count); + my_error(ER_MAX_PREPARED_STMT_COUNT_REACHED, MYF(0), + max_prepared_stmt_count); + goto err_max; + } + prepared_stmt_count++; + mysql_mutex_unlock(&LOCK_prepared_stmt_count); + + last_found_statement= statement; + return 0; + +err_max: + if (statement->name.str) + my_hash_delete(&names_hash, (uchar*) statement); +err_names_hash: + my_hash_delete(&st_hash, (uchar*) statement); +err_st_hash: + return 1; +} + + +void Statement_map::close_transient_cursors() +{ +#ifdef TO_BE_IMPLEMENTED + Statement *stmt; + while ((stmt= transient_cursor_list.head())) + stmt->close_cursor(); /* deletes itself from the list */ +#endif +} + + +void Statement_map::erase(Statement *statement) +{ + if (statement == last_found_statement) + last_found_statement= 0; + if (statement->name.str) + my_hash_delete(&names_hash, (uchar *) statement); + + my_hash_delete(&st_hash, (uchar *) statement); + mysql_mutex_lock(&LOCK_prepared_stmt_count); + DBUG_ASSERT(prepared_stmt_count > 0); + prepared_stmt_count--; + mysql_mutex_unlock(&LOCK_prepared_stmt_count); +} + + +void Statement_map::reset() +{ + /* Must be first, hash_free will reset st_hash.records */ + if (st_hash.records) + { + mysql_mutex_lock(&LOCK_prepared_stmt_count); + DBUG_ASSERT(prepared_stmt_count >= st_hash.records); + prepared_stmt_count-= st_hash.records; + mysql_mutex_unlock(&LOCK_prepared_stmt_count); + } + my_hash_reset(&names_hash); + my_hash_reset(&st_hash); + last_found_statement= 0; +} + + +Statement_map::~Statement_map() +{ + /* Statement_map::reset() should be called prior to destructor. */ + DBUG_ASSERT(!st_hash.records); + my_hash_free(&names_hash); + my_hash_free(&st_hash); +} + +bool my_var_user::set(THD *thd, Item *item) +{ + Item_func_set_user_var *suv= new (thd->mem_root) Item_func_set_user_var(thd, &name, item); + suv->save_item_result(item); + return suv->fix_fields(thd, 0) || suv->update(); +} + + +sp_rcontext *my_var_sp::get_rcontext(sp_rcontext *local_ctx) const +{ + return m_rcontext_handler->get_rcontext(local_ctx); +} + + +bool my_var_sp::set(THD *thd, Item *item) +{ + return get_rcontext(thd->spcont)->set_variable(thd, offset, &item); +} + +bool my_var_sp_row_field::set(THD *thd, Item *item) +{ + return get_rcontext(thd->spcont)-> + set_variable_row_field(thd, offset, m_field_offset, &item); +} + + +bool select_dumpvar::send_data_to_var_list(List &items) +{ + DBUG_ENTER("select_dumpvar::send_data_to_var_list"); + List_iterator_fast var_li(var_list); + List_iterator it(items); + Item *item; + my_var *mv; + while ((mv= var_li++) && (item= it++)) + { + if (mv->set(thd, item)) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +int select_dumpvar::send_data(List &items) +{ + DBUG_ENTER("select_dumpvar::send_data"); + + if (row_count++) + { + my_message(ER_TOO_MANY_ROWS, ER_THD(thd, ER_TOO_MANY_ROWS), MYF(0)); + DBUG_RETURN(1); + } + if (m_var_sp_row ? + m_var_sp_row->get_rcontext(thd->spcont)-> + set_variable_row(thd, m_var_sp_row->offset, items) : + send_data_to_var_list(items)) + DBUG_RETURN(1); + + DBUG_RETURN(thd->is_error()); +} + +bool select_dumpvar::send_eof() +{ + if (! row_count) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_SP_FETCH_NO_DATA, ER_THD(thd, ER_SP_FETCH_NO_DATA)); + /* + Don't send EOF if we're in error condition (which implies we've already + sent or are sending an error) + */ + if (unlikely(thd->is_error())) + return true; + + if (!suppress_my_ok) + ::my_ok(thd,row_count); + + return 0; +} + + + +bool +select_materialize_with_stats:: +create_result_table(THD *thd_arg, List *column_types, + bool is_union_distinct, ulonglong options, + const LEX_CSTRING *table_alias, bool bit_fields_as_long, + bool create_table, + bool keep_row_order, + uint hidden) +{ + DBUG_ASSERT(table == 0); + tmp_table_param.field_count= column_types->elements; + tmp_table_param.func_count= tmp_table_param.field_count; + tmp_table_param.bit_fields_as_long= bit_fields_as_long; + + if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types, + (ORDER*) 0, is_union_distinct, 1, + options, HA_POS_ERROR, table_alias, + !create_table, keep_row_order))) + return TRUE; + + col_stat= (Column_statistics*) table->in_use->alloc(table->s->fields * + sizeof(Column_statistics)); + if (!col_stat) + return TRUE; + + reset(); + table->file->extra(HA_EXTRA_WRITE_CACHE); + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + return FALSE; +} + + +void select_materialize_with_stats::reset() +{ + memset(col_stat, 0, table->s->fields * sizeof(Column_statistics)); + max_nulls_in_row= 0; + count_rows= 0; +} + + +void select_materialize_with_stats::cleanup() +{ + reset(); + select_unit::cleanup(); +} + + +/** + Override select_unit::send_data to analyze each row for NULLs and to + update null_statistics before sending data to the client. + + @return TRUE if fatal error when sending data to the client + @return FALSE on success +*/ + +int select_materialize_with_stats::send_data(List &items) +{ + List_iterator_fast item_it(items); + Item *cur_item; + Column_statistics *cur_col_stat= col_stat; + uint nulls_in_row= 0; + int res; + + if ((res= select_unit::send_data(items))) + return res; + if (table->null_catch_flags & REJECT_ROW_DUE_TO_NULL_FIELDS) + { + table->null_catch_flags&= ~REJECT_ROW_DUE_TO_NULL_FIELDS; + return 0; + } + /* Skip duplicate rows. */ + if (write_err == HA_ERR_FOUND_DUPP_KEY || + write_err == HA_ERR_FOUND_DUPP_UNIQUE) + return 0; + + ++count_rows; + + while ((cur_item= item_it++)) + { + if (cur_item->is_null_result()) + { + ++cur_col_stat->null_count; + cur_col_stat->max_null_row= count_rows; + if (!cur_col_stat->min_null_row) + cur_col_stat->min_null_row= count_rows; + ++nulls_in_row; + } + ++cur_col_stat; + } + if (nulls_in_row > max_nulls_in_row) + max_nulls_in_row= nulls_in_row; + + return 0; +} + + +/**************************************************************************** + TMP_TABLE_PARAM +****************************************************************************/ + +void TMP_TABLE_PARAM::init() +{ + DBUG_ENTER("TMP_TABLE_PARAM::init"); + DBUG_PRINT("enter", ("this: %p", this)); + field_count= sum_func_count= func_count= hidden_field_count= 0; + group_parts= group_length= group_null_parts= 0; + quick_group= 1; + table_charset= 0; + precomputed_group_by= 0; + bit_fields_as_long= 0; + materialized_subquery= 0; + force_not_null_cols= 0; + skip_create_table= 0; + tmp_name= "temptable"; // Name of temp table on disk + DBUG_VOID_RETURN; +} + + +void thd_increment_bytes_sent(void *thd, size_t length) +{ + /* thd == 0 when close_connection() calls net_send_error() */ + if (likely(thd != 0)) + { + ((THD*) thd)->status_var.bytes_sent+= length; + } +} + +my_bool thd_net_is_killed(THD *thd) +{ + return thd && thd->killed ? 1 : 0; +} + + +void thd_increment_bytes_received(void *thd, size_t length) +{ + if (thd != NULL) // MDEV-13073 Ack collector having NULL + ((THD*) thd)->status_var.bytes_received+= length; +} + + +void THD::set_status_var_init() +{ + bzero((char*) &status_var, offsetof(STATUS_VAR, + last_cleared_system_status_var)); + /* + Session status for Threads_running is always 1. It can only be queried + by thread itself via INFORMATION_SCHEMA.SESSION_STATUS or SHOW [SESSION] + STATUS. And at this point thread is guaranteed to be running. + */ + status_var.threads_running= 1; +} + + +void Security_context::init() +{ + host= user= ip= external_user= 0; + host_or_ip= "connecting host"; + priv_user[0]= priv_host[0]= proxy_user[0]= priv_role[0]= '\0'; + master_access= NO_ACL; + password_expired= false; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + db_access= NO_ACL; +#endif +} + + +void Security_context::destroy() +{ + DBUG_PRINT("info", ("freeing security context")); + // If not pointer to constant + if (host != my_localhost) + { + my_free((char*) host); + host= NULL; + } + if (user != delayed_user) + { + my_free((char*) user); + user= NULL; + } + + if (external_user) + { + my_free(external_user); + external_user= NULL; + } + + my_free((char*) ip); + ip= NULL; +} + + +void Security_context::skip_grants() +{ + /* privileges for the user are unknown everything is allowed */ + host_or_ip= (char *)""; + master_access= ALL_KNOWN_ACL; + *priv_user= *priv_host= '\0'; + password_expired= false; +} + + +bool Security_context::set_user(char *user_arg) +{ + my_free(const_cast(user)); + user= my_strdup(key_memory_MPVIO_EXT_auth_info, user_arg, MYF(0)); + return user == 0; +} + +bool Security_context::check_access(const privilege_t want_access, + bool match_any) +{ + DBUG_ENTER("Security_context::check_access"); + DBUG_RETURN((match_any ? (master_access & want_access) != NO_ACL + : ((master_access & want_access) == want_access))); +} + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +/** + Initialize this security context from the passed in credentials + and activate it in the current thread. + + @param thd + @param definer_user + @param definer_host + @param db + @param[out] backup Save a pointer to the current security context + in the thread. In case of success it points to the + saved old context, otherwise it points to NULL. + + + During execution of a statement, multiple security contexts may + be needed: + - the security context of the authenticated user, used as the + default security context for all top-level statements + - in case of a view or a stored program, possibly the security + context of the definer of the routine, if the object is + defined with SQL SECURITY DEFINER option. + + The currently "active" security context is parameterized in THD + member security_ctx. By default, after a connection is + established, this member points at the "main" security context + - the credentials of the authenticated user. + + Later, if we would like to execute some sub-statement or a part + of a statement under credentials of a different user, e.g. + definer of a procedure, we authenticate this user in a local + instance of Security_context by means of this method (and + ultimately by means of acl_getroot), and make the + local instance active in the thread by re-setting + thd->security_ctx pointer. + + Note, that the life cycle and memory management of the "main" and + temporary security contexts are different. + For the main security context, the memory for user/host/ip is + allocated on system heap, and the THD class frees this memory in + its destructor. The only case when contents of the main security + context may change during its life time is when someone issued + CHANGE USER command. + Memory management of a "temporary" security context is + responsibility of the module that creates it. + + @retval TRUE there is no user with the given credentials. The erro + is reported in the thread. + @retval FALSE success +*/ + +bool +Security_context:: +change_security_context(THD *thd, + LEX_CSTRING *definer_user, + LEX_CSTRING *definer_host, + LEX_CSTRING *db, + Security_context **backup) +{ + bool needs_change; + + DBUG_ENTER("Security_context::change_security_context"); + + DBUG_ASSERT(definer_user->str && definer_host->str); + + *backup= NULL; + needs_change= (strcmp(definer_user->str, thd->security_ctx->priv_user) || + my_strcasecmp(system_charset_info, definer_host->str, + thd->security_ctx->priv_host)); + if (needs_change) + { + if (acl_getroot(this, definer_user->str, definer_host->str, + definer_host->str, db->str)) + { + my_error(ER_NO_SUCH_USER, MYF(0), definer_user->str, + definer_host->str); + DBUG_RETURN(TRUE); + } + *backup= thd->security_ctx; + thd->security_ctx= this; + } + + DBUG_RETURN(FALSE); +} + + +void +Security_context::restore_security_context(THD *thd, + Security_context *backup) +{ + if (backup) + thd->security_ctx= backup; +} +#endif + + +bool Security_context::user_matches(Security_context *them) +{ + return ((user != NULL) && (them->user != NULL) && + !strcmp(user, them->user)); +} + +bool Security_context::is_priv_user(const char *user, const char *host) +{ + return ((user != NULL) && (host != NULL) && + !strcmp(user, priv_user) && + !my_strcasecmp(system_charset_info, host,priv_host)); +} + + +/**************************************************************************** + Handling of open and locked tables states. + + This is used when we want to open/lock (and then close) some tables when + we already have a set of tables open and locked. We use these methods for + access to mysql.proc table to find definitions of stored routines. +****************************************************************************/ + +void THD::reset_n_backup_open_tables_state(Open_tables_backup *backup) +{ + DBUG_ENTER("reset_n_backup_open_tables_state"); + backup->set_open_tables_state(this); + backup->mdl_system_tables_svp= mdl_context.mdl_savepoint(); + reset_open_tables_state(); + state_flags|= Open_tables_state::BACKUPS_AVAIL; + DBUG_VOID_RETURN; +} + + +void THD::restore_backup_open_tables_state(Open_tables_backup *backup) +{ + DBUG_ENTER("restore_backup_open_tables_state"); + mdl_context.rollback_to_savepoint(backup->mdl_system_tables_svp); + /* + Before we will throw away current open tables state we want + to be sure that it was properly cleaned up. + */ + DBUG_ASSERT(open_tables == 0 && + temporary_tables == 0 && + derived_tables == 0 && + lock == 0 && + locked_tables_mode == LTM_NONE && + m_reprepare_observer == NULL); + + set_open_tables_state(backup); + DBUG_VOID_RETURN; +} + +#if MARIA_PLUGIN_INTERFACE_VERSION < 0x0200 +/** + This is a backward compatibility method, made obsolete + by the thd_kill_statement service. Keep it here to avoid breaking the + ABI in case some binary plugins still use it. +*/ +#undef thd_killed +extern "C" int thd_killed(const MYSQL_THD thd) +{ + return thd_kill_level(thd) > THD_ABORT_SOFTLY; +} +#else +#error now thd_killed() function can go away +#endif + +/* + return thd->killed status to the client, + mapped to the API enum thd_kill_levels values. + + @note Since this function is called quite frequently thd_kill_level(NULL) is + forbidden for performance reasons (saves one conditional branch). If your ever + need to call thd_kill_level() when THD is not available, you options are (most + to least preferred): + - try to pass THD through to thd_kill_level() + - add current_thd to some service and use thd_killed(current_thd) + - add thd_killed_current() function to kill statement service + - add if (!thd) thd= current_thd here +*/ +extern "C" enum thd_kill_levels thd_kill_level(const MYSQL_THD thd) +{ + DBUG_ASSERT(thd); + + if (likely(thd->killed == NOT_KILLED)) + { + Apc_target *apc_target= (Apc_target*) &thd->apc_target; + if (unlikely(apc_target->have_apc_requests())) + { + if (thd == current_thd) + apc_target->process_apc_requests(false); + } + return THD_IS_NOT_KILLED; + } + + return thd->killed & KILL_HARD_BIT ? THD_ABORT_ASAP : THD_ABORT_SOFTLY; +} + + +/** + Send an out-of-band progress report to the client + + The report is sent every 'thd->...progress_report_time' second, + however not more often than global.progress_report_time. + If global.progress_report_time is 0, then don't send progress reports, but + check every second if the value has changed + + We clear any errors that we get from sending the progress packet to + the client as we don't want to set an error without the caller knowing + about it. +*/ + +static void thd_send_progress(THD *thd) +{ + /* Check if we should send the client a progress report */ + ulonglong report_time= my_interval_timer(); + if (report_time > thd->progress.next_report_time) + { + uint seconds_to_next= MY_MAX(thd->variables.progress_report_time, + global_system_variables.progress_report_time); + if (seconds_to_next == 0) // Turned off + seconds_to_next= 1; // Check again after 1 second + + thd->progress.next_report_time= (report_time + + seconds_to_next * 1000000000ULL); + if (global_system_variables.progress_report_time && + thd->variables.progress_report_time && !thd->is_error()) + { + net_send_progress_packet(thd); + if (thd->is_error()) + thd->clear_error(); + } + } +} + + +/** Initialize progress report handling **/ + +extern "C" void thd_progress_init(MYSQL_THD thd, uint max_stage) +{ + DBUG_ASSERT(thd->stmt_arena != thd->progress.arena); + if (thd->progress.arena) + return; // already initialized + /* + Send progress reports to clients that supports it, if the command + is a high level command (like ALTER TABLE) and we are not in a + stored procedure + */ + thd->progress.report= ((thd->client_capabilities & MARIADB_CLIENT_PROGRESS) && + thd->progress.report_to_client && + !thd->in_sub_stmt); + thd->progress.next_report_time= 0; + thd->progress.stage= 0; + thd->progress.counter= thd->progress.max_counter= 0; + thd->progress.max_stage= max_stage; + thd->progress.arena= thd->stmt_arena; +} + + +/* Inform processlist and the client that some progress has been made */ + +extern "C" void thd_progress_report(MYSQL_THD thd, + ulonglong progress, ulonglong max_progress) +{ + if (thd->stmt_arena != thd->progress.arena) + return; + if (thd->progress.max_counter != max_progress) // Simple optimization + { + /* + Better to not wait in the unlikely event that LOCK_thd_data is locked + as Galera can potentially have this locked for a long time. + Progress counters will fix themselves after the next call. + */ + if (mysql_mutex_trylock(&thd->LOCK_thd_data)) + return; + thd->progress.counter= progress; + thd->progress.max_counter= max_progress; + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + else + thd->progress.counter= progress; + + if (thd->progress.report) + thd_send_progress(thd); +} + +/** + Move to next stage in process list handling + + This will reset the timer to ensure the progress is sent to the client + if client progress reports are activated. +*/ + +extern "C" void thd_progress_next_stage(MYSQL_THD thd) +{ + if (thd->stmt_arena != thd->progress.arena) + return; + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->progress.stage++; + thd->progress.counter= 0; + DBUG_ASSERT(thd->progress.stage < thd->progress.max_stage); + mysql_mutex_unlock(&thd->LOCK_thd_data); + if (thd->progress.report) + { + thd->progress.next_report_time= 0; // Send new stage info + thd_send_progress(thd); + } +} + +/** + Disable reporting of progress in process list. + + @note + This function is safe to call even if one has not called thd_progress_init. + + This function should be called by all parts that does progress + reporting to ensure that progress list doesn't contain 100 % done + forever. +*/ + + +extern "C" void thd_progress_end(MYSQL_THD thd) +{ + if (thd->stmt_arena != thd->progress.arena) + return; + /* + It's enough to reset max_counter to set disable progress indicator + in processlist. + */ + thd->progress.max_counter= 0; + thd->progress.arena= 0; +} + + +/** + Return the thread id of a user thread + @param thd user thread + @return thread id +*/ +extern "C" unsigned long thd_get_thread_id(const MYSQL_THD thd) +{ + return((unsigned long)thd->thread_id); +} + +/** + Check if THD socket is still connected. + */ +extern "C" int thd_is_connected(MYSQL_THD thd) +{ + return thd->is_connected(); +} + + +extern "C" double thd_rnd(MYSQL_THD thd) +{ + return my_rnd(&thd->rand); +} + + +/** + Generate string of printable random characters of requested length. + + @param to[out] Buffer for generation; must be at least length+1 bytes + long; result string is always null-terminated + @param length[in] How many random characters to put in buffer +*/ +extern "C" void thd_create_random_password(MYSQL_THD thd, + char *to, size_t length) +{ + for (char *end= to + length; to < end; to++) + *to= (char) (my_rnd(&thd->rand)*94 + 33); + *to= '\0'; +} + + +extern "C" const char *thd_priv_host(MYSQL_THD thd, size_t *length) +{ + const Security_context *sctx= thd->security_ctx; + if (!sctx) + { + *length= 0; + return NULL; + } + *length= strlen(sctx->priv_host); + return sctx->priv_host; +} + + +extern "C" const char *thd_priv_user(MYSQL_THD thd, size_t *length) +{ + const Security_context *sctx= thd->security_ctx; + if (!sctx) + { + *length= 0; + return NULL; + } + *length= strlen(sctx->priv_user); + return sctx->priv_user; +} + + +#ifdef INNODB_COMPATIBILITY_HOOKS + +/** open a table and add it to thd->open_tables + + @note At the moment this is used in innodb background purge threads + *only*.There should be no table locks, because the background purge does not + change the table as far as LOCK TABLES is concerned. MDL locks are + still needed, though. + + To make sure no table stays open for long, this helper allows the thread to + have only one table open at any given time. +*/ +TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, + const char *tb, size_t tblen) +{ + DBUG_ENTER("open_purge_table"); + DBUG_ASSERT(thd->open_tables == NULL); + DBUG_ASSERT(thd->locked_tables_mode < LTM_PRELOCKED); + + /* Purge already hold the MDL for the table */ + Open_table_context ot_ctx(thd, MYSQL_OPEN_HAS_MDL_LOCK); + TABLE_LIST *tl= (TABLE_LIST*)thd->alloc(sizeof(TABLE_LIST)); + LEX_CSTRING db_name= {db, dblen }; + LEX_CSTRING table_name= { tb, tblen }; + + tl->init_one_table(&db_name, &table_name, 0, TL_READ); + tl->i_s_requested_object= OPEN_TABLE_ONLY; + + bool error= open_table(thd, tl, &ot_ctx); + + /* we don't recover here */ + DBUG_ASSERT(!error || !ot_ctx.can_recover_from_failed_open()); + + if (unlikely(error)) + close_thread_tables(thd); + + DBUG_RETURN(error ? NULL : tl->table); +} + +TABLE *get_purge_table(THD *thd) +{ + /* see above, at most one table can be opened */ + DBUG_ASSERT(thd->open_tables == NULL || thd->open_tables->next == NULL); + return thd->open_tables; +} + +/** Find an open table in the list of prelocked tabled + + Used for foreign key actions, for example, in UPDATE t1 SET a=1; + where a child table t2 has a KB on t1.a. + + But only when virtual columns are involved, otherwise InnoDB + does not need an open TABLE. +*/ +TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len, + const char *table, size_t table_len) +{ + for (TABLE *t= thd->open_tables; t; t= t->next) + { + if (t->s->db.length == db_len && t->s->table_name.length == table_len && + !strcmp(t->s->db.str, db) && !strcmp(t->s->table_name.str, table) && + t->pos_in_table_list->prelocking_placeholder == TABLE_LIST::PRELOCK_FK) + return t; + } + return NULL; +} + +/* the following three functions are used in background purge threads */ + +MYSQL_THD create_thd() +{ + THD *thd= new THD(next_thread_id()); + thd->thread_stack= (char*) &thd; + thd->store_globals(); + thd->set_command(COM_DAEMON); + thd->system_thread= SYSTEM_THREAD_GENERIC; + thd->security_ctx->host_or_ip=""; + server_threads.insert(thd); + return thd; +} + +void destroy_thd(MYSQL_THD thd) +{ + thd->add_status_to_global(); + server_threads.erase(thd); + delete thd; +} + +/** + Create a THD that only has auxilliary functions + It will never be added to the global connection list + server_threads. It does not represent any client connection. + + It should never be counted, because it will stall the + shutdown. It is solely for engine's internal use, + like for example, evaluation of virtual function in innodb + purge. +*/ +extern "C" pthread_key(struct st_my_thread_var *, THR_KEY_mysys); +MYSQL_THD create_background_thd() +{ + auto save_thd = current_thd; + set_current_thd(nullptr); + + auto save_mysysvar= pthread_getspecific(THR_KEY_mysys); + + /* + Allocate new mysys_var specifically new THD, + so that e.g safemalloc, DBUG etc are happy. + */ + pthread_setspecific(THR_KEY_mysys, 0); + my_thread_init(); + auto thd_mysysvar= pthread_getspecific(THR_KEY_mysys); + auto thd= new THD(0); + pthread_setspecific(THR_KEY_mysys, save_mysysvar); + thd->set_psi(nullptr); + set_current_thd(save_thd); + + /* + Workaround the adverse effect of incrementing thread_count + in THD constructor. We do not want these THDs to be counted, + or waited for on shutdown. + */ + THD_count::count--; + + thd->mysys_var= (st_my_thread_var *) thd_mysysvar; + thd->set_command(COM_DAEMON); + thd->system_thread= SYSTEM_THREAD_GENERIC; + thd->security_ctx->host_or_ip= ""; + thd->real_id= 0; + thd->thread_id= 0; + thd->query_id= 0; + return thd; +} + + +/* + Attach a background THD. + + Changes current value THR_KEY_mysys TLS variable, + and returns the original value. +*/ +void *thd_attach_thd(MYSQL_THD thd) +{ + DBUG_ASSERT(!current_thd); + DBUG_ASSERT(thd && thd->mysys_var); + + auto save_mysysvar= pthread_getspecific(THR_KEY_mysys); + pthread_setspecific(THR_KEY_mysys, thd->mysys_var); + thd->thread_stack= (char *) &thd; + thd->store_globals(); + return save_mysysvar; +} + +/* + Restore THR_KEY_mysys TLS variable, + which was changed thd_attach_thd(). +*/ +void thd_detach_thd(void *mysysvar) +{ + /* Restore mysys_var that is changed when THD was attached.*/ + pthread_setspecific(THR_KEY_mysys, mysysvar); + /* Restore the THD (we assume it was NULL during attach).*/ + set_current_thd(0); +} + +/* + Destroy a THD that was previously created by + create_background_thd() +*/ +void destroy_background_thd(MYSQL_THD thd) +{ + DBUG_ASSERT(!current_thd); + auto thd_mysys_var= thd->mysys_var; + auto save_mysys_var= thd_attach_thd(thd); + DBUG_ASSERT(thd_mysys_var != save_mysys_var); + /* + Workaround the adverse effect decrementing thread_count on THD() + destructor. + As we decremented it in create_background_thd(), in order for it + not to go negative, we have to increment it before destructor. + */ + THD_count::count++; + delete thd; + + thd_detach_thd(save_mysys_var); + /* + Delete THD-specific my_thread_var, that was + allocated in create_background_thd(). + Also preserve current PSI context, since my_thread_end() + would kill it, if we're not careful. + */ +#ifdef HAVE_PSI_THREAD_INTERFACE + auto save_psi_thread= PSI_CALL_get_thread(); +#endif + PSI_CALL_set_thread(0); + pthread_setspecific(THR_KEY_mysys, thd_mysys_var); + my_thread_end(); + pthread_setspecific(THR_KEY_mysys, save_mysys_var); + PSI_CALL_set_thread(save_psi_thread); +} + + +void reset_thd(MYSQL_THD thd) +{ + close_thread_tables(thd); + thd->release_transactional_locks(); + thd->free_items(); + free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); +} + +/** + This function can be used by storage engine + to indicate a start of an async operation. + + This asynchronous is such operation needs to be + finished before we write response to the client +. + An example of this operation is Innodb's asynchronous + group commit. Server needs to wait for the end of it + before writing response to client, to provide durability + guarantees, in other words, server can't send OK packet + before modified data is durable in redo log. + + NOTE: system THD (those that are not associated with client + connection) do not allows async operations yet. + + @param thd a THD + @return thd + @retval nullptr if this is system THD */ +extern "C" MYSQL_THD thd_increment_pending_ops(MYSQL_THD thd) +{ + if (!thd || thd->system_thread != NON_SYSTEM_THREAD) + return nullptr; + thd->async_state.inc_pending_ops(); + return thd; +} + + +/** + This function can be used by plugin/engine to indicate + end of async operation (such as end of group commit + write flush) + + @param thd THD +*/ +extern "C" void thd_decrement_pending_ops(MYSQL_THD thd) +{ + DBUG_ASSERT(thd); + DBUG_ASSERT(thd->system_thread == NON_SYSTEM_THREAD); + + thd_async_state::enum_async_state state; + if (thd->async_state.dec_pending_ops(&state) == 0) + { + switch(state) + { + case thd_async_state::enum_async_state::SUSPENDED: + DBUG_ASSERT(thd->scheduler->thd_resume); + thd->scheduler->thd_resume(thd); + break; + case thd_async_state::enum_async_state::NONE: + break; + default: + DBUG_ASSERT(0); + } + } +} + + +unsigned long long thd_get_query_id(const MYSQL_THD thd) +{ + return((unsigned long long)thd->query_id); +} + +void thd_clear_error(MYSQL_THD thd) +{ + thd->clear_error(); +} + +extern "C" const struct charset_info_st *thd_charset(MYSQL_THD thd) +{ + return(thd->charset()); +} + + +/** + Get the current query string for the thread. + + This function is not thread safe and can be used only by thd owner thread. + + @param The MySQL internal thread pointer + @return query string and length. May be non-null-terminated. +*/ +extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd) +{ + DBUG_ASSERT(thd == current_thd); + return(&thd->query_string.string); +} + + +/** + Get the current query string for the thread. + + @param thd The MySQL internal thread pointer + @param buf Buffer where the query string will be copied + @param buflen Length of the buffer + + @return Length of the query + @retval 0 if LOCK_thd_data cannot be acquired without waiting + + @note This function is thread safe as the query string is + accessed under mutex protection and the string is copied + into the provided buffer. @see thd_query_string(). +*/ + +extern "C" size_t thd_query_safe(MYSQL_THD thd, char *buf, size_t buflen) +{ + size_t len= 0; + /* InnoDB invokes this function while holding internal mutexes. + THD::awake() will hold LOCK_thd_data while invoking an InnoDB + function that would acquire the internal mutex. Because this + function is a non-essential part of information_schema view output, + we will break the deadlock by avoiding a mutex wait here + and returning the empty string if a wait would be needed. */ + if (!mysql_mutex_trylock(&thd->LOCK_thd_data)) + { + len= MY_MIN(buflen - 1, thd->query_length()); + if (len) + memcpy(buf, thd->query(), len); + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + buf[len]= '\0'; + return len; +} + + +extern "C" const char *thd_user_name(MYSQL_THD thd) +{ + if (!thd->security_ctx) + return 0; + + return thd->security_ctx->user; +} + + +extern "C" const char *thd_client_host(MYSQL_THD thd) +{ + if (!thd->security_ctx) + return 0; + + return thd->security_ctx->host; +} + + +extern "C" const char *thd_client_ip(MYSQL_THD thd) +{ + if (!thd->security_ctx) + return 0; + + return thd->security_ctx->ip; +} + + +extern "C" LEX_CSTRING *thd_current_db(MYSQL_THD thd) +{ + return &thd->db; +} + + +extern "C" int thd_current_status(MYSQL_THD thd) +{ + Diagnostics_area *da= thd->get_stmt_da(); + if (!da) + return 0; + + return da->is_error() ? da->sql_errno() : 0; +} + + +extern "C" enum enum_server_command thd_current_command(MYSQL_THD thd) +{ + return thd->get_command(); +} + +#ifdef HAVE_REPLICATION /* Working around MDEV-24622 */ +/** @return whether the current thread is for applying binlog in a replica */ +extern "C" int thd_is_slave(const MYSQL_THD thd) +{ + return thd && thd->slave_thread; +} +#endif /* HAVE_REPLICATION */ + +/* Returns high resolution timestamp for the start + of the current query. */ +extern "C" unsigned long long thd_start_utime(const MYSQL_THD thd) +{ + return thd->start_time * 1000000 + thd->start_time_sec_part; +} + + +/* + This function can optionally be called to check if thd_rpl_deadlock_check() + needs to be called for waits done by a given transaction. + + If this function returns false for a given thd, there is no need to do + any calls to thd_rpl_deadlock_check() on that thd. + + This call is optional; it is safe to call thd_rpl_deadlock_check() in + any case. This call can be used to save some redundant calls to + thd_rpl_deadlock_check() if desired. (This is unlikely to matter much + unless there are _lots_ of waits to report, as the overhead of + thd_rpl_deadlock_check() is small). +*/ +extern "C" int +thd_need_wait_reports(const MYSQL_THD thd) +{ + rpl_group_info *rgi; + + if (mysql_bin_log.is_open()) + return true; + if (!thd) + return false; + rgi= thd->rgi_slave; + if (!rgi) + return false; + return rgi->is_parallel_exec; +} + +/* + Used by storage engines (currently InnoDB) to report that + one transaction THD is about to go to wait for a transactional lock held by + another transactions OTHER_THD. + + This is used for parallel replication, where transactions are required to + commit in the same order on the slave as they did on the master. If the + transactions on the slave encounter lock conflicts on the slave that did not + exist on the master, this can cause deadlocks. This is primarily used in + optimistic (and aggressive) modes. + + Normally, such conflicts will not occur in conservative mode, because the + same conflict would have prevented the two transactions from committing in + parallel on the master, thus preventing them from running in parallel on the + slave in the first place. However, it is possible in case when the optimizer + chooses a different plan on the slave than on the master (eg. table scan + instead of index scan). + + Storage engines report lock waits using this call. If a lock wait causes a + deadlock with the pre-determined commit order, we kill the later + transaction, and later re-try it, to resolve the deadlock. + + Returns 1 if the OTHER_THD will be killed to resolve deadlock, 0 if not. The + actual kill will happen later, asynchronously from another thread. The + caller does not need to take any actions on the return value if the + handlerton kill_query method is implemented to abort the to-be-killed + transaction. +*/ +extern "C" int +thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd) +{ + rpl_group_info *rgi; + rpl_group_info *other_rgi; + + if (!thd) + return 0; + DEBUG_SYNC(thd, "thd_report_wait_for"); + thd->transaction->stmt.mark_trans_did_wait(); + if (!other_thd) + return 0; + binlog_report_wait_for(thd, other_thd); + rgi= thd->rgi_slave; + other_rgi= other_thd->rgi_slave; + if (!rgi || !other_rgi) + return 0; + if (!rgi->is_parallel_exec) + return 0; + if (rgi->rli != other_rgi->rli) + return 0; + if (!rgi->gtid_sub_id || !other_rgi->gtid_sub_id) + return 0; + if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id) + return 0; + if (rgi->gtid_sub_id > other_rgi->gtid_sub_id) + return 0; + if (rgi->finish_event_group_called || other_rgi->finish_event_group_called) + { + /* + If either of two transactions has already performed commit + (e.g split ALTER, asserted below) there won't be any deadlock. + */ + DBUG_ASSERT(rgi->sa_info || other_rgi->sa_info); + + return 0; + } + /* + This transaction is about to wait for another transaction that is required + by replication binlog order to commit after. This would cause a deadlock. + + So send a kill to the other transaction, with a temporary error; this will + cause replication to rollback (and later re-try) the other transaction, + releasing the lock for this transaction so replication can proceed. + */ +#ifdef HAVE_REPLICATION + slave_background_kill_request(other_thd); +#endif + return 1; +} + +/* + This function is called from InnoDB to check if the commit order of + two transactions has already been decided by the upper layer. This happens + in parallel replication, where the commit order is forced to be the same on + the slave as it was originally on the master. + + If this function returns false, it means that such commit order will be + enforced. This allows the storage engine to optionally omit gap lock waits + or similar measures that would otherwise be needed to ensure that + transactions would be serialised in a way that would cause a commit order + that is correct for binlogging for statement-based replication. + + Since transactions are only run in parallel on the slave if they ran without + lock conflicts on the master, normally no lock conflicts on the slave happen + during parallel replication. However, there are a couple of corner cases + where it can happen, like these secondary-index operations: + + T1: INSERT INTO t1 VALUES (7, NULL); + T2: DELETE FROM t1 WHERE b <= 3; + + T1: UPDATE t1 SET secondary=NULL WHERE primary=1 + T2: DELETE t1 WHERE secondary <= 3 + + The DELETE takes a gap lock that can block the INSERT/UPDATE, but the row + locks set by INSERT/UPDATE do not block the DELETE. Thus, the execution + order of the transactions determine whether a lock conflict occurs or + not. Thus a lock conflict can occur on the slave where it did not on the + master. + + If this function returns true, normal locking should be done as required by + the binlogging and transaction isolation level in effect. But if it returns + false, the correct order will be enforced anyway, and InnoDB can + avoid taking the gap lock, preventing the lock conflict. + + Calling this function is just an optimisation to avoid unnecessary + deadlocks. If it was not used, a gap lock would be set that could eventually + cause a deadlock; the deadlock would be caught by thd_rpl_deadlock_check() + and the transaction T2 killed and rolled back (and later re-tried). +*/ +extern "C" int +thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) +{ + rpl_group_info *rgi, *other_rgi; + + DBUG_EXECUTE_IF("disable_thd_need_ordering_with", return 1;); + if (!thd || !other_thd) + return 1; +#ifdef WITH_WSREP + /* wsrep applier, replayer and TOI processing threads are ordered + by replication provider, relaxed GAP locking protocol can be used + between high priority wsrep threads. + Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd, + the caller should guarantee that the BF state won't change. + (e.g. InnoDB does it by keeping lock_sys.mutex locked) + */ + if (WSREP_ON && + wsrep_thd_order_before(thd, other_thd)) + return 0; +#endif /* WITH_WSREP */ + rgi= thd->rgi_slave; + other_rgi= other_thd->rgi_slave; + if (!rgi || !other_rgi) + return 1; + if (!rgi->is_parallel_exec) + return 1; + if (rgi->rli != other_rgi->rli) + return 1; + if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id) + return 1; + if (!rgi->commit_id || rgi->commit_id != other_rgi->commit_id) + return 1; + DBUG_EXECUTE_IF("thd_need_ordering_with_force", return 1;); + /* + Otherwise, these two threads are doing parallel replication within the same + replication domain. Their commit order is already fixed, so we do not need + gap locks or similar to otherwise enforce ordering (and in fact such locks + could lead to unnecessary deadlocks and transaction retry). + */ + return 0; +} + + +/* + If the storage engine detects a deadlock, and needs to choose a victim + transaction to roll back, it can call this function to ask the upper + server layer for which of two possible transactions is prefered to be + aborted and rolled back. + + In parallel replication, if two transactions are running in parallel and + one is fixed to commit before the other, then the one that commits later + will be prefered as the victim - chosing the early transaction as a victim + will not resolve the deadlock anyway, as the later transaction still needs + to wait for the earlier to commit. + + The return value is -1 if the first transaction is prefered as a deadlock + victim, 1 if the second transaction is prefered, or 0 for no preference (in + which case the storage engine can make the choice as it prefers). +*/ +extern "C" int +thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2) +{ + rpl_group_info *rgi1, *rgi2; + + if (!thd1 || !thd2) + return 0; + + /* + If the transactions are participating in the same replication domain in + parallel replication, then request to select the one that will commit + later (in the fixed commit order from the master) as the deadlock victim. + */ + rgi1= thd1->rgi_slave; + rgi2= thd2->rgi_slave; + if (rgi1 && rgi2 && + rgi1->is_parallel_exec && + rgi1->rli == rgi2->rli && + rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id) + return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1; + + /* No preferences, let the storage engine decide. */ + return 0; +} + + +extern "C" int thd_non_transactional_update(const MYSQL_THD thd) +{ + return(thd->transaction->all.modified_non_trans_table); +} + +extern "C" int thd_binlog_format(const MYSQL_THD thd) +{ + if (WSREP(thd)) + { + /* for wsrep binlog format is meaningful also when binlogging is off */ + return (int) thd->wsrep_binlog_format(thd->variables.binlog_format); + } + + if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG)) + return (int) thd->variables.binlog_format; + return BINLOG_FORMAT_UNSPEC; +} + +extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all) +{ + DBUG_ASSERT(thd); + thd->mark_transaction_to_rollback(all); +} + +extern "C" bool thd_binlog_filter_ok(const MYSQL_THD thd) +{ + return binlog_filter->db_ok(thd->db.str); +} + +/* + This is similar to sqlcom_can_generate_row_events, with the expection + that we only return 1 if we are going to generate row events in a + transaction. + CREATE OR REPLACE is always safe to do as this will run in it's own + transaction. +*/ + +extern "C" bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd) +{ + return (sqlcom_can_generate_row_events(thd) && thd->lex->sql_command != + SQLCOM_CREATE_TABLE); +} + + +extern "C" enum durability_properties thd_get_durability_property(const MYSQL_THD thd) +{ + enum durability_properties ret= HA_REGULAR_DURABILITY; + + if (thd != NULL) + ret= thd->durability_property; + + return ret; +} + +/** Get the auto_increment_offset auto_increment_increment. +Exposed by thd_autoinc_service. +Needed by InnoDB. +@param thd Thread object +@param off auto_increment_offset +@param inc auto_increment_increment */ +extern "C" void thd_get_autoinc(const MYSQL_THD thd, ulong* off, ulong* inc) +{ + *off = thd->variables.auto_increment_offset; + *inc = thd->variables.auto_increment_increment; +} + + +/** + Is strict sql_mode set. + Needed by InnoDB. + @param thd Thread object + @return True if sql_mode has strict mode (all or trans). + @retval true sql_mode has strict mode (all or trans). + @retval false sql_mode has not strict mode (all or trans). +*/ +extern "C" bool thd_is_strict_mode(const MYSQL_THD thd) +{ + return thd->is_strict_mode(); +} + + +/** + Get query start time as SQL field data. + Needed by InnoDB. + @param thd Thread object + @param buf Buffer to hold start time data +*/ +void thd_get_query_start_data(THD *thd, char *buf) +{ + Field_timestampf f((uchar *)buf, nullptr, 0, Field::NONE, &empty_clex_str, + nullptr, 6); + f.store_TIME(thd->query_start(), thd->query_start_sec_part()); +} + + +/* + Interface for MySQL Server, plugins and storage engines to report + when they are going to sleep/stall. + + SYNOPSIS + thd_wait_begin() + thd Thread object + Can be NULL, in this case current THD is used. + wait_type Type of wait + 1 -- short wait (e.g. for mutex) + 2 -- medium wait (e.g. for disk io) + 3 -- large wait (e.g. for locked row/table) + NOTES + This is used by the threadpool to have better knowledge of which + threads that currently are actively running on CPUs. When a thread + reports that it's going to sleep/stall, the threadpool scheduler is + free to start another thread in the pool most likely. The expected wait + time is simply an indication of how long the wait is expected to + become, the real wait time could be very different. + + thd_wait_end MUST be called immediately after waking up again. +*/ +extern "C" void thd_wait_begin(MYSQL_THD thd, int wait_type) +{ + if (!thd) + { + thd= current_thd; + if (unlikely(!thd)) + return; + } + MYSQL_CALLBACK(thd->scheduler, thd_wait_begin, (thd, wait_type)); +} + +/** + Interface for MySQL Server, plugins and storage engines to report + when they waking up from a sleep/stall. + + @param thd Thread handle + Can be NULL, in this case current THD is used. +*/ +extern "C" void thd_wait_end(MYSQL_THD thd) +{ + if (!thd) + { + thd= current_thd; + if (unlikely(!thd)) + return; + } + MYSQL_CALLBACK(thd->scheduler, thd_wait_end, (thd)); +} + +#endif // INNODB_COMPATIBILITY_HOOKS */ + + +/** + MDL_context accessor + @param thd the current session + @return pointer to thd->mdl_context +*/ +extern "C" void *thd_mdl_context(MYSQL_THD thd) +{ + return &thd->mdl_context; +} + + +/**************************************************************************** + Handling of statement states in functions and triggers. + + This is used to ensure that the function/trigger gets a clean state + to work with and does not cause any side effects of the calling statement. + + It also allows most stored functions and triggers to replicate even + if they are used items that would normally be stored in the binary + replication (like last_insert_id() etc...) + + The following things is done + - Disable binary logging for the duration of the statement + - Disable multi-result-sets for the duration of the statement + - Value of last_insert_id() is saved and restored + - Value set by 'SET INSERT_ID=#' is reset and restored + - Value for found_rows() is reset and restored + - examined_row_count is added to the total + - cuted_fields is added to the total + - new savepoint level is created and destroyed + + NOTES: + Seed for random() is saved for the first! usage of RAND() + We reset examined_row_count and cuted_fields and add these to the + result to ensure that if we have a bug that would reset these within + a function, we are not loosing any rows from the main statement. + + We do not reset value of last_insert_id(). +****************************************************************************/ + +void THD::reset_sub_statement_state(Sub_statement_state *backup, + uint new_state) +{ +#ifndef EMBEDDED_LIBRARY + /* BUG#33029, if we are replicating from a buggy master, reset + auto_inc_intervals_forced to prevent substatement + (triggers/functions) from using erroneous INSERT_ID value + */ + if (rpl_master_erroneous_autoinc(this)) + { + DBUG_ASSERT(backup->auto_inc_intervals_forced.nb_elements() == 0); + auto_inc_intervals_forced.swap(&backup->auto_inc_intervals_forced); + } +#endif + + backup->option_bits= variables.option_bits; + backup->count_cuted_fields= count_cuted_fields; + backup->in_sub_stmt= in_sub_stmt; + backup->enable_slow_log= enable_slow_log; + backup->limit_found_rows= limit_found_rows; + backup->cuted_fields= cuted_fields; + backup->client_capabilities= client_capabilities; + backup->savepoints= transaction->savepoints; + backup->first_successful_insert_id_in_prev_stmt= + first_successful_insert_id_in_prev_stmt; + backup->first_successful_insert_id_in_cur_stmt= + first_successful_insert_id_in_cur_stmt; + store_slow_query_state(backup); + + if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) && + !is_current_stmt_binlog_format_row()) + { + variables.option_bits&= ~OPTION_BIN_LOG; + } + + if ((backup->option_bits & OPTION_BIN_LOG) && + is_update_query(lex->sql_command) && + !is_current_stmt_binlog_format_row()) + mysql_bin_log.start_union_events(this, this->query_id); + + /* Disable result sets */ + client_capabilities &= ~CLIENT_MULTI_RESULTS; + in_sub_stmt|= new_state; + cuted_fields= 0; + transaction->savepoints= 0; + first_successful_insert_id_in_cur_stmt= 0; + reset_slow_query_state(); +} + +void THD::restore_sub_statement_state(Sub_statement_state *backup) +{ + DBUG_ENTER("THD::restore_sub_statement_state"); +#ifndef EMBEDDED_LIBRARY + /* BUG#33029, if we are replicating from a buggy master, restore + auto_inc_intervals_forced so that the top statement can use the + INSERT_ID value set before this statement. + */ + if (rpl_master_erroneous_autoinc(this)) + { + backup->auto_inc_intervals_forced.swap(&auto_inc_intervals_forced); + DBUG_ASSERT(backup->auto_inc_intervals_forced.nb_elements() == 0); + } +#endif + + /* + To save resources we want to release savepoints which were created + during execution of function or trigger before leaving their savepoint + level. It is enough to release first savepoint set on this level since + all later savepoints will be released automatically. + */ + if (transaction->savepoints) + { + SAVEPOINT *sv; + for (sv= transaction->savepoints; sv->prev; sv= sv->prev) + {} + /* ha_release_savepoint() never returns error. */ + (void)ha_release_savepoint(this, sv); + } + count_cuted_fields= backup->count_cuted_fields; + transaction->savepoints= backup->savepoints; + variables.option_bits= backup->option_bits; + in_sub_stmt= backup->in_sub_stmt; + enable_slow_log= backup->enable_slow_log; + first_successful_insert_id_in_prev_stmt= + backup->first_successful_insert_id_in_prev_stmt; + first_successful_insert_id_in_cur_stmt= + backup->first_successful_insert_id_in_cur_stmt; + limit_found_rows= backup->limit_found_rows; + set_sent_row_count(backup->sent_row_count); + client_capabilities= backup->client_capabilities; + + /* Restore statistic needed for slow log */ + add_slow_query_state(backup); + + /* + If we've left sub-statement mode, reset the fatal error flag. + Otherwise keep the current value, to propagate it up the sub-statement + stack. + + NOTE: is_fatal_sub_stmt_error can be set only if we've been in the + sub-statement mode. + */ + if (!in_sub_stmt) + is_fatal_sub_stmt_error= false; + + if ((variables.option_bits & OPTION_BIN_LOG) && is_update_query(lex->sql_command) && + !is_current_stmt_binlog_format_row()) + mysql_bin_log.stop_union_events(this); + + /* + The following is added to the old values as we are interested in the + total complexity of the query + */ + cuted_fields+= backup->cuted_fields; + DBUG_VOID_RETURN; +} + +/* + Store slow query state at start of a stored procedure statment +*/ + +void THD::store_slow_query_state(Sub_statement_state *backup) +{ + backup->affected_rows= affected_rows; + backup->bytes_sent_old= bytes_sent_old; + backup->examined_row_count= m_examined_row_count; + backup->query_plan_flags= query_plan_flags; + backup->query_plan_fsort_passes= query_plan_fsort_passes; + backup->sent_row_count= m_sent_row_count; + backup->tmp_tables_disk_used= tmp_tables_disk_used; + backup->tmp_tables_size= tmp_tables_size; + backup->tmp_tables_used= tmp_tables_used; + backup->handler_stats= handler_stats; +} + +/* Reset variables related to slow query log */ + +void THD::reset_slow_query_state() +{ + affected_rows= 0; + bytes_sent_old= status_var.bytes_sent; + m_examined_row_count= 0; + m_sent_row_count= 0; + query_plan_flags= QPLAN_INIT; + query_plan_fsort_passes= 0; + tmp_tables_disk_used= 0; + tmp_tables_size= 0; + tmp_tables_used= 0; + if ((variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_ENGINE)) + handler_stats.reset(); +} + +/* + Add back the stored values to the current counters to be able to get + right status for 'call procedure_name' +*/ + +void THD::add_slow_query_state(Sub_statement_state *backup) +{ + affected_rows+= backup->affected_rows; + bytes_sent_old= backup->bytes_sent_old; + m_examined_row_count+= backup->examined_row_count; + m_sent_row_count+= backup->sent_row_count; + query_plan_flags|= backup->query_plan_flags; + query_plan_fsort_passes+= backup->query_plan_fsort_passes; + tmp_tables_disk_used+= backup->tmp_tables_disk_used; + tmp_tables_size+= backup->tmp_tables_size; + tmp_tables_used+= backup->tmp_tables_used; + if ((variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_ENGINE)) + handler_stats.add(&backup->handler_stats); +} + + +void THD::set_statement(Statement *stmt) +{ + mysql_mutex_lock(&LOCK_thd_data); + Statement::set_statement(stmt); + mysql_mutex_unlock(&LOCK_thd_data); +} + +void THD::set_sent_row_count(ha_rows count) +{ + m_sent_row_count= count; + MYSQL_SET_STATEMENT_ROWS_SENT(m_statement_psi, m_sent_row_count); +} + +void THD::set_examined_row_count(ha_rows count) +{ + m_examined_row_count= count; + MYSQL_SET_STATEMENT_ROWS_EXAMINED(m_statement_psi, m_examined_row_count); +} + +void THD::inc_sent_row_count(ha_rows count) +{ + m_sent_row_count+= count; + DBUG_EXECUTE_IF("debug_huge_number_of_examined_rows", + m_examined_row_count= (ULONGLONG_MAX - 1000000);); + MYSQL_SET_STATEMENT_ROWS_SENT(m_statement_psi, m_sent_row_count); +} + +void THD::inc_examined_row_count(ha_rows count) +{ + m_examined_row_count+= count; + MYSQL_SET_STATEMENT_ROWS_EXAMINED(m_statement_psi, m_examined_row_count); +} + +void THD::inc_status_created_tmp_disk_tables() +{ + tmp_tables_disk_used++; + query_plan_flags|= QPLAN_TMP_DISK; + status_var_increment(status_var.created_tmp_disk_tables_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_created_tmp_disk_tables)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_created_tmp_tables() +{ + tmp_tables_used++; + query_plan_flags|= QPLAN_TMP_TABLE; + status_var_increment(status_var.created_tmp_tables_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_created_tmp_tables)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_select_full_join() +{ + status_var_increment(status_var.select_full_join_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_select_full_join)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_select_full_range_join() +{ + status_var_increment(status_var.select_full_range_join_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_select_full_range_join)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_select_range() +{ + status_var_increment(status_var.select_range_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_select_range)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_select_range_check() +{ + status_var_increment(status_var.select_range_check_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_select_range_check)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_select_scan() +{ + status_var_increment(status_var.select_scan_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_select_scan)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_sort_merge_passes() +{ + status_var_increment(status_var.filesort_merge_passes_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_sort_merge_passes)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_sort_range() +{ + status_var_increment(status_var.filesort_range_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_sort_range)(m_statement_psi, 1); +#endif +} + +void THD::inc_status_sort_rows(ha_rows count) +{ + statistic_add(status_var.filesort_rows_, (ulong)count, &LOCK_status); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_sort_rows)(m_statement_psi, (ulong)count); +#endif +} + +void THD::inc_status_sort_scan() +{ + status_var_increment(status_var.filesort_scan_count_); +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(inc_statement_sort_scan)(m_statement_psi, 1); +#endif +} + +void THD::set_status_no_index_used() +{ + server_status|= SERVER_QUERY_NO_INDEX_USED; +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(set_statement_no_index_used)(m_statement_psi); +#endif +} + +void THD::set_status_no_good_index_used() +{ + server_status|= SERVER_QUERY_NO_GOOD_INDEX_USED; +#ifdef HAVE_PSI_STATEMENT_INTERFACE + PSI_STATEMENT_CALL(set_statement_no_good_index_used)(m_statement_psi); +#endif +} + +/** Assign a new value to thd->query and thd->query_id. */ + +void THD::set_query_and_id(char *query_arg, uint32 query_length_arg, + CHARSET_INFO *cs, + query_id_t new_query_id) +{ + mysql_mutex_lock(&LOCK_thd_data); + set_query_inner(query_arg, query_length_arg, cs); + mysql_mutex_unlock(&LOCK_thd_data); + query_id= new_query_id; +#ifdef WITH_WSREP + set_wsrep_next_trx_id(query_id); + WSREP_DEBUG("assigned new next query and trx id: %" PRIu64, wsrep_next_trx_id()); +#endif /* WITH_WSREP */ +} + +/** Assign a new value to thd->mysys_var. */ +void THD::set_mysys_var(struct st_my_thread_var *new_mysys_var) +{ + mysql_mutex_lock(&LOCK_thd_kill); + mysys_var= new_mysys_var; + mysql_mutex_unlock(&LOCK_thd_kill); +} + +/** + Leave explicit LOCK TABLES or prelocked mode and restore value of + transaction sentinel in MDL subsystem. +*/ + +void THD::leave_locked_tables_mode() +{ + if (locked_tables_mode == LTM_LOCK_TABLES) + { + DBUG_ASSERT(current_backup_stage == BACKUP_FINISHED); + /* + When leaving LOCK TABLES mode we have to change the duration of most + of the metadata locks being held, except for HANDLER and GRL locks, + to transactional for them to be properly released at UNLOCK TABLES. + */ + mdl_context.set_transaction_duration_for_all_locks(); + /* + Make sure we don't release the global read lock and commit blocker + when leaving LTM. + */ + global_read_lock.set_explicit_lock_duration(this); + /* Also ensure that we don't release metadata locks for open HANDLERs. */ + if (handler_tables_hash.records) + mysql_ha_set_explicit_lock_duration(this); + if (ull_hash.records) + mysql_ull_set_explicit_lock_duration(this); + } + locked_tables_mode= LTM_NONE; +} + +void THD::get_definer(LEX_USER *definer, bool role) +{ + binlog_invoker(role); +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + if ((IF_WSREP(wsrep_applier, 0) || slave_thread) && has_invoker()) + { + definer->user= invoker.user; + definer->host= invoker.host; + definer->auth= NULL; + } + else +#endif + get_default_definer(this, definer, role); +} + + +/** + Mark transaction to rollback and mark error as fatal to a sub-statement. + + @param all TRUE <=> rollback main transaction. +*/ + +void THD::mark_transaction_to_rollback(bool all) +{ + /* + There is no point in setting is_fatal_sub_stmt_error unless + we are actually in_sub_stmt. + */ + if (in_sub_stmt) + is_fatal_sub_stmt_error= true; + transaction_rollback_request= all; +} + + +/** + Commit the whole transaction (both statment and all) + + This is used mainly to commit an independent transaction, + like reading system tables. + + @return 0 0k + @return <>0 error code. my_error() has been called() +*/ + +int THD::commit_whole_transaction_and_close_tables() +{ + int error, error2; + DBUG_ENTER("THD::commit_whole_transaction_and_close_tables"); + + /* + This can only happened if we failed to open any table in the + new transaction + */ + DBUG_ASSERT(open_tables); + + if (!open_tables) // Safety for production usage + DBUG_RETURN(0); + + /* + Ensure table was locked (opened with open_and_lock_tables()). If not + the THD can't be part of any transactions and doesn't have to call + this function. + */ + DBUG_ASSERT(lock); + + error= ha_commit_trans(this, FALSE); + /* This will call external_lock to unlock all tables */ + if ((error2= mysql_unlock_tables(this, lock))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), error2); + error= error2; + } + lock= 0; + if ((error2= ha_commit_trans(this, TRUE))) + error= error2; + close_thread_tables(this); + DBUG_RETURN(error); +} + +/** + Start a new independent transaction +*/ + +start_new_trans::start_new_trans(THD *thd) +{ + org_thd= thd; + mdl_savepoint= thd->mdl_context.mdl_savepoint(); + memcpy(old_ha_data, thd->ha_data, sizeof(old_ha_data)); + thd->reset_n_backup_open_tables_state(&open_tables_state_backup); + for (auto &data : thd->ha_data) + data.reset(); + old_transaction= thd->transaction; + thd->transaction= &new_transaction; + new_transaction.on= 1; + in_sub_stmt= thd->in_sub_stmt; + thd->in_sub_stmt= 0; + server_status= thd->server_status; + m_transaction_psi= thd->m_transaction_psi; + thd->m_transaction_psi= 0; + wsrep_on= thd->variables.wsrep_on; + thd->variables.wsrep_on= 0; + thd->server_status&= ~(SERVER_STATUS_IN_TRANS | + SERVER_STATUS_IN_TRANS_READONLY); + thd->server_status|= SERVER_STATUS_AUTOCOMMIT; +} + + +void start_new_trans::restore_old_transaction() +{ + org_thd->transaction= old_transaction; + org_thd->restore_backup_open_tables_state(&open_tables_state_backup); + ha_close_connection(org_thd); + memcpy(org_thd->ha_data, old_ha_data, sizeof(old_ha_data)); + org_thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + org_thd->in_sub_stmt= in_sub_stmt; + org_thd->server_status= server_status; + if (org_thd->m_transaction_psi) + MYSQL_COMMIT_TRANSACTION(org_thd->m_transaction_psi); + org_thd->m_transaction_psi= m_transaction_psi; + org_thd->variables.wsrep_on= wsrep_on; + org_thd= 0; +} + + +/** + Decide on logging format to use for the statement and issue errors + or warnings as needed. The decision depends on the following + parameters: + + - The logging mode, i.e., the value of binlog_format. Can be + statement, mixed, or row. + + - The type of statement. There are three types of statements: + "normal" safe statements; unsafe statements; and row injections. + An unsafe statement is one that, if logged in statement format, + might produce different results when replayed on the slave (e.g., + INSERT DELAYED). A row injection is either a BINLOG statement, or + a row event executed by the slave's SQL thread. + + - The capabilities of tables modified by the statement. The + *capabilities vector* for a table is a set of flags associated + with the table. Currently, it only includes two flags: *row + capability flag* and *statement capability flag*. + + The row capability flag is set if and only if the engine can + handle row-based logging. The statement capability flag is set if + and only if the table can handle statement-based logging. + + Decision table for logging format + --------------------------------- + + The following table summarizes how the format and generated + warning/error depends on the tables' capabilities, the statement + type, and the current binlog_format. + + Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY + Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY + + Statement type * SSSUUUIII SSSUUUIII SSSUUUIII + + binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR + + Logged format - SS-S----- -RR-RR-RR SRRSRR-RR + Warning/Error 1 --2732444 5--5--6-- ---7--6-- + + Legend + ------ + + Row capable: N - Some table not row-capable, Y - All tables row-capable + Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable + Statement type: (S)afe, (U)nsafe, or Row (I)njection + binlog_format: (S)TATEMENT, (M)IXED, or (R)OW + Logged format: (S)tatement or (R)ow + Warning/Error: Warnings and error messages are as follows: + + 1. Error: Cannot execute statement: binlogging impossible since both + row-incapable engines and statement-incapable engines are + involved. + + 2. Error: Cannot execute statement: binlogging impossible since + BINLOG_FORMAT = ROW and at least one table uses a storage engine + limited to statement-logging. + + 3. Error: Cannot execute statement: binlogging of unsafe statement + is impossible when storage engine is limited to statement-logging + and BINLOG_FORMAT = MIXED. + + 4. Error: Cannot execute row injection: binlogging impossible since + at least one table uses a storage engine limited to + statement-logging. + + 5. Error: Cannot execute statement: binlogging impossible since + BINLOG_FORMAT = STATEMENT and at least one table uses a storage + engine limited to row-logging. + + 6. Warning: Unsafe statement binlogged in statement format since + BINLOG_FORMAT = STATEMENT. + + In addition, we can produce the following error (not depending on + the variables of the decision diagram): + + 7. Error: Cannot execute statement: binlogging impossible since more + than one engine is involved and at least one engine is + self-logging. + + For each error case above, the statement is prevented from being + logged, we report an error, and roll back the statement. For + warnings, we set the thd->binlog_flags variable: the warning will be + printed only if the statement is successfully logged. + + @see THD::binlog_query + + @param[in] thd Client thread + @param[in] tables Tables involved in the query + + @retval 0 No error; statement can be logged. + @retval -1 One of the error conditions above applies (1, 2, 4, 5, or 6). +*/ + +int THD::decide_logging_format(TABLE_LIST *tables) +{ + DBUG_ENTER("THD::decide_logging_format"); + DBUG_PRINT("info", ("Query: %.*s", (uint) query_length(), query())); + DBUG_PRINT("info", ("binlog_format: %lu", (ulong) variables.binlog_format)); + DBUG_PRINT("info", ("current_stmt_binlog_format: %lu", + (ulong) current_stmt_binlog_format)); + DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x", + lex->get_stmt_unsafe_flags())); + + reset_binlog_local_stmt_filter(); + + // Used binlog format + ulong binlog_format= wsrep_binlog_format(variables.binlog_format); + /* + We should not decide logging format if the binlog is closed or + binlogging is off, or if the statement is filtered out from the + binlog by filtering rules. + */ + +#ifdef WITH_WSREP + if (WSREP_CLIENT_NNULL(this) && + wsrep_thd_is_local(this) && + wsrep_is_active(this) && + variables.wsrep_trx_fragment_size > 0) + { + if (!is_current_stmt_binlog_format_row()) + { + my_message(ER_NOT_SUPPORTED_YET, + "Streaming replication not supported with " + "binlog_format=STATEMENT", MYF(0)); + DBUG_RETURN(-1); + } + } + + /* + If user has configured wsrep_forced_binlog_format to + STMT OR MIXED and used binlog_format would be same + and this is CREATE TABLE AS SELECT we will fall back + to ROW. + */ + if (wsrep_forced_binlog_format < BINLOG_FORMAT_ROW && + wsrep_ctas) + { + if (!get_stmt_da()->has_sql_condition(ER_UNKNOWN_ERROR)) + { + push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "Galera does not support wsrep_forced_binlog_format = %s " + "in CREATE TABLE AS SELECT", + wsrep_forced_binlog_format == BINLOG_FORMAT_STMT ? + "STMT" : "MIXED"); + } + set_current_stmt_binlog_format_row(); + } +#endif /* WITH_WSREP */ + + if (WSREP_EMULATE_BINLOG_NNULL(this) || + binlog_table_should_be_logged(&db)) + { + if (is_bulk_op()) + { + if (binlog_format == BINLOG_FORMAT_STMT) + { + my_error(ER_BINLOG_NON_SUPPORTED_BULK, MYF(0)); + DBUG_PRINT("info", + ("decision: no logging since an error was generated")); + DBUG_RETURN(-1); + } + } + /* + Compute one bit field with the union of all the engine + capabilities, and one with the intersection of all the engine + capabilities. + */ + handler::Table_flags flags_write_some_set= 0; + handler::Table_flags flags_access_some_set= 0; + handler::Table_flags flags_write_all_set= + HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE; + + /* + If different types of engines are about to be updated. + For example: Innodb and Falcon; Innodb and MyIsam. + */ + bool multi_write_engine= FALSE; + /* + If different types of engines are about to be accessed + and any of them is about to be updated. For example: + Innodb and Falcon; Innodb and MyIsam. + */ + bool multi_access_engine= FALSE; + /* + Identifies if a table is changed. + */ + bool is_write= FALSE; // If any write tables + bool has_read_tables= FALSE; // If any read only tables + bool has_auto_increment_write_tables= FALSE; // Write with auto-increment + /* true if it's necessary to switch current statement log format from + STATEMENT to ROW if binary log format is MIXED and autoincrement values + are changed in the statement */ + bool has_unsafe_stmt_autoinc_lock_mode= false; + /* If a write table that doesn't have auto increment part first */ + bool has_write_table_auto_increment_not_first_in_pk= FALSE; + bool has_auto_increment_write_tables_not_first= FALSE; + bool found_first_not_own_table= FALSE; + bool has_write_tables_with_unsafe_statements= FALSE; + bool blackhole_table_found= 0; + + /* + A pointer to a previous table that was changed. + */ + TABLE* prev_write_table= NULL; + /* + A pointer to a previous table that was accessed. + */ + TABLE* prev_access_table= NULL; + /** + The number of tables used in the current statement, + that should be replicated. + */ + uint replicated_tables_count= 0; + /** + The number of tables written to in the current statement, + that should not be replicated. + A table should not be replicated when it is considered + 'local' to a MySQL instance. + Currently, these tables are: + - mysql.slow_log + - mysql.general_log + - mysql.slave_relay_log_info + - mysql.slave_master_info + - mysql.slave_worker_info + - performance_schema.* + - TODO: information_schema.* + In practice, from this list, only performance_schema.* tables + are written to by user queries. + */ + uint non_replicated_tables_count= 0; + +#ifndef DBUG_OFF + { + static const char *prelocked_mode_name[] = { + "NON_PRELOCKED", + "LOCK_TABLES", + "PRELOCKED", + "PRELOCKED_UNDER_LOCK_TABLES", + }; + compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last); + DBUG_PRINT("debug", ("prelocked_mode: %s", + prelocked_mode_name[locked_tables_mode])); + } +#endif + + /* + Get the capabilities vector for all involved storage engines and + mask out the flags for the binary log. + */ + for (TABLE_LIST *tbl= tables; tbl; tbl= tbl->next_global) + { + TABLE *table; + TABLE_SHARE *share; + handler::Table_flags flags; + if (tbl->placeholder()) + continue; + + table= tbl->table; + share= table->s; + flags= table->file->ha_table_flags(); + if (!share->table_creation_was_logged) + { + /* + This is a temporary table which was not logged in the binary log. + Disable statement logging to enforce row level logging. + */ + DBUG_ASSERT(share->tmp_table); + flags&= ~HA_BINLOG_STMT_CAPABLE; + /* We can only use row logging */ + set_current_stmt_binlog_format_row(); + } + + DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx", + tbl->table_name.str, flags)); + + if (share->no_replicate) + { + /* + The statement uses a table that is not replicated. + The following properties about the table: + - persistent / transient + - transactional / non transactional + - temporary / permanent + - read or write + - multiple engines involved because of this table + are not relevant, as this table is completely ignored. + Because the statement uses a non replicated table, + using STATEMENT format in the binlog is impossible. + Either this statement will be discarded entirely, + or it will be logged (possibly partially) in ROW format. + */ + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE); + + if (tbl->lock_type >= TL_FIRST_WRITE) + { + non_replicated_tables_count++; + continue; + } + } + if (tbl == lex->first_not_own_table()) + found_first_not_own_table= true; + + replicated_tables_count++; + + if (tbl->prelocking_placeholder != TABLE_LIST::PRELOCK_FK) + { + if (tbl->lock_type < TL_FIRST_WRITE) + has_read_tables= true; + else if (table->found_next_number_field && + (tbl->lock_type >= TL_FIRST_WRITE)) + { + has_auto_increment_write_tables= true; + has_auto_increment_write_tables_not_first= found_first_not_own_table; + if (share->next_number_keypart != 0) + has_write_table_auto_increment_not_first_in_pk= true; + has_unsafe_stmt_autoinc_lock_mode= + table->file->autoinc_lock_mode_stmt_unsafe(); + } + } + + if (tbl->lock_type >= TL_FIRST_WRITE) + { + bool trans; + if (prev_write_table && prev_write_table->file->ht != + table->file->ht) + multi_write_engine= TRUE; + + if (table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB) + blackhole_table_found= 1; + + if (share->non_determinstic_insert && + (sql_command_flags[lex->sql_command] & CF_CAN_GENERATE_ROW_EVENTS + && !(sql_command_flags[lex->sql_command] & CF_SCHEMA_CHANGE))) + has_write_tables_with_unsafe_statements= true; + + trans= table->file->has_transactions(); + + if (share->tmp_table) + lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE : + LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE); + else + lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE : + LEX::STMT_WRITES_NON_TRANS_TABLE); + + flags_write_all_set &= flags; + flags_write_some_set |= flags; + is_write= TRUE; + + prev_write_table= table; + + } + flags_access_some_set |= flags; + + if (lex->sql_command != SQLCOM_CREATE_TABLE || lex->tmp_table()) + { + my_bool trans= table->file->has_transactions(); + + if (share->tmp_table) + lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE : + LEX::STMT_READS_TEMP_NON_TRANS_TABLE); + else + lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE : + LEX::STMT_READS_NON_TRANS_TABLE); + } + + if (prev_access_table && prev_access_table->file->ht != + table->file->ht) + multi_access_engine= TRUE; + + prev_access_table= table; + } + + if (binlog_format != BINLOG_FORMAT_ROW) + { + /* + DML statements that modify a table with an auto_increment + column based on rows selected from a table are unsafe as the + order in which the rows are fetched fron the select tables + cannot be determined and may differ on master and slave. + */ + if (has_auto_increment_write_tables && has_read_tables) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT); + + if (has_write_table_auto_increment_not_first_in_pk) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST); + + if (has_write_tables_with_unsafe_statements) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + + if (has_unsafe_stmt_autoinc_lock_mode) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_LOCK_MODE); + + /* + A query that modifies autoinc column in sub-statement can make the + master and slave inconsistent. + We can solve these problems in mixed mode by switching to binlogging + if at least one updated table is used by sub-statement + */ + if (lex->requires_prelocking() && + has_auto_increment_write_tables_not_first) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS); + } + + DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set)); + DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set)); + DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set)); + DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine)); + DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine)); + + int error= 0; + int unsafe_flags; + + bool multi_stmt_trans= in_multi_stmt_transaction_mode(); + bool trans_table= trans_has_updated_trans_table(this); + bool binlog_direct= variables.binlog_direct_non_trans_update; + + if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct, + trans_table, tx_isolation)) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT); + else if (multi_stmt_trans && trans_table && !binlog_direct && + lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE)) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS); + + /* + If more than one engine is involved in the statement and at + least one is doing it's own logging (is *self-logging*), the + statement cannot be logged atomically, so we generate an error + rather than allowing the binlog to become corrupt. + */ + if (multi_write_engine && + (flags_write_some_set & HA_HAS_OWN_BINLOGGING)) + my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE), + MYF(0)); + else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE); + + /* both statement-only and row-only engines involved */ + if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0) + { + /* + 1. Error: Binary logging impossible since both row-incapable + engines and statement-incapable engines are involved + */ + my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0)); + } + /* statement-only engines involved */ + else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0) + { + if (lex->is_stmt_row_injection()) + { + /* + 4. Error: Cannot execute row injection since table uses + storage engine limited to statement-logging + */ + my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0)); + } + else if ((binlog_format == BINLOG_FORMAT_ROW || is_bulk_op()) && + sqlcom_can_generate_row_events(this)) + { + /* + 2. Error: Cannot modify table that uses a storage engine + limited to statement-logging when BINLOG_FORMAT = ROW + */ + my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0)); + } + else if ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) + { + /* + 3. Error: Cannot execute statement: binlogging of unsafe + statement is impossible when storage engine is limited to + statement-logging and BINLOG_FORMAT = MIXED. + */ + for (int unsafe_type= 0; + unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT; + unsafe_type++) + if (unsafe_flags & (1 << unsafe_type)) + my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0), + ER_THD(this, + LEX::binlog_stmt_unsafe_errcode[unsafe_type])); + } + /* log in statement format! */ + } + /* no statement-only engines */ + else + { + /* binlog_format = STATEMENT */ + if (binlog_format == BINLOG_FORMAT_STMT) + { + if (lex->is_stmt_row_injection()) + { + /* + We have to log the statement as row or give an error. + Better to accept what master gives us than stopping replication. + */ + set_current_stmt_binlog_format_row(); + } + else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 && + sqlcom_can_generate_row_events(this)) + { + /* + 5. Error: Cannot modify table that uses a storage engine + limited to row-logging when binlog_format = STATEMENT, except + if all tables that are updated are temporary tables + */ + if (!lex->stmt_writes_to_non_temp_table()) + { + /* As all updated tables are temporary, nothing will be logged */ + set_current_stmt_binlog_format_row(); + } + else if (IF_WSREP((!WSREP_NNULL(this) || + wsrep_cs().mode() == + wsrep::client_state::m_local),1)) + { + my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); + } + } + else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) + { + /* + 7. Warning: Unsafe statement logged as statement due to + binlog_format = STATEMENT + */ + binlog_unsafe_warning_flags|= unsafe_flags; + + DBUG_PRINT("info", ("Scheduling warning to be issued by " + "binlog_query: '%s'", + ER_THD(this, ER_BINLOG_UNSAFE_STATEMENT))); + DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x", + binlog_unsafe_warning_flags)); + } + /* log in statement format (or row if row event)! */ + } + /* No statement-only engines and binlog_format != STATEMENT. + I.e., nothing prevents us from row logging if needed. */ + else + { + if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection() + || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 || + is_bulk_op()) + { + /* log in row format! */ + set_current_stmt_binlog_format_row_if_mixed(); + } + } + } + + if (non_replicated_tables_count > 0) + { + if ((replicated_tables_count == 0) || ! is_write) + { + DBUG_PRINT("info", ("decision: no logging, no replicated table affected")); + set_binlog_local_stmt_filter(); + } + else + { + if (! is_current_stmt_binlog_format_row()) + { + my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0)); + } + else + { + clear_binlog_local_stmt_filter(); + } + } + } + else + { + clear_binlog_local_stmt_filter(); + } + + if (unlikely(error)) + { + DBUG_PRINT("info", ("decision: no logging since an error was generated")); + DBUG_RETURN(-1); + } + DBUG_PRINT("info", ("decision: logging in %s format", + is_current_stmt_binlog_format_row() ? + "ROW" : "STATEMENT")); + + if (blackhole_table_found && + variables.binlog_format == BINLOG_FORMAT_ROW && + (sql_command_flags[lex->sql_command] & + (CF_UPDATES_DATA | CF_DELETES_DATA))) + { + String table_names; + /* + Generate a warning for UPDATE/DELETE statements that modify a + BLACKHOLE table, as row events are not logged in row format. + */ + for (TABLE_LIST *table= tables; table; table= table->next_global) + { + if (table->placeholder()) + continue; + if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB && + table->lock_type >= TL_FIRST_WRITE) + { + table_names.append(&table->table_name); + table_names.append(','); + } + } + if (!table_names.is_empty()) + { + bool is_update= MY_TEST(sql_command_flags[lex->sql_command] & + CF_UPDATES_DATA); + /* + Replace the last ',' with '.' for table_names + */ + table_names.replace(table_names.length()-1, 1, ".", 1); + push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "Row events are not logged for %s statements " + "that modify BLACKHOLE tables in row format. " + "Table(s): '%-.192s'", + is_update ? "UPDATE" : "DELETE", + table_names.c_ptr()); + } + } + + if (is_write && is_current_stmt_binlog_format_row()) + binlog_prepare_for_row_logging(); + } + else + { + DBUG_PRINT("info", ("decision: no logging since " + "mysql_bin_log.is_open() = %d " + "and (options & OPTION_BIN_LOG) = 0x%llx " + "and binlog_format = %u " + "and binlog_filter->db_ok(db) = %d", + mysql_bin_log.is_open(), + (variables.option_bits & OPTION_BIN_LOG), + (uint) binlog_format, + binlog_filter->db_ok(db.str))); + if (WSREP_NNULL(this) && is_current_stmt_binlog_format_row()) + binlog_prepare_for_row_logging(); + } + DBUG_RETURN(0); +} + + +/* + Reconsider logging format in case of INSERT...ON DUPLICATE KEY UPDATE + for tables with more than one unique keys in case of MIXED binlog format. + + Unsafe means that a master could execute the statement differently than + the slave. + This could can happen in the following cases: + - The unique check are done in different order on master or slave + (different engine or different key order). + - There is a conflict on another key than the first and before the + statement is committed, another connection commits a row that conflicts + on an earlier unique key. Example follows: + + Below a and b are unique keys, the table has a row (1,1,0) + connection 1: + INSERT INTO t1 set a=2,b=1,c=0 ON DUPLICATE KEY UPDATE c=1; + connection 2: + INSERT INTO t1 set a=2,b=2,c=0; + + If 2 commits after 1 has been executed but before 1 has committed + (and are thus put before the other in the binary log), one will + get different data on the slave: + (1,1,1),(2,2,1) instead of (1,1,1),(2,2,0) +*/ + +void THD::reconsider_logging_format_for_iodup(TABLE *table) +{ + DBUG_ENTER("reconsider_logging_format_for_iodup"); + enum_binlog_format bf= (enum_binlog_format) wsrep_binlog_format(variables.binlog_format); + + DBUG_ASSERT(lex->duplicates == DUP_UPDATE); + + if (bf <= BINLOG_FORMAT_STMT && + !is_current_stmt_binlog_format_row()) + { + KEY *end= table->s->key_info + table->s->keys; + uint unique_keys= 0; + + for (KEY *keyinfo= table->s->key_info; keyinfo < end ; keyinfo++) + { + if (keyinfo->flags & HA_NOSAME) + { + /* + We assume that the following cases will guarantee that the + key is unique if a key part is not set: + - The key part is an autoincrement (autogenerated) + - The key part has a default value that is null and it not + a virtual field that will be calculated later. + */ + for (uint j= 0; j < keyinfo->user_defined_key_parts; j++) + { + Field *field= keyinfo->key_part[j].field; + if (!bitmap_is_set(table->write_set, field->field_index)) + { + /* Check auto_increment */ + if (field == table->next_number_field) + goto exit; + if (field->is_real_null() && !field->default_value) + goto exit; + } + } + if (unique_keys++) + break; +exit:; + } + } + if (unique_keys > 1) + { + if (bf == BINLOG_FORMAT_STMT && !lex->is_stmt_unsafe()) + { + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS); + binlog_unsafe_warning_flags|= lex->get_stmt_unsafe_flags(); + } + set_current_stmt_binlog_format_row_if_mixed(); + if (is_current_stmt_binlog_format_row()) + binlog_prepare_for_row_logging(); + } + } + DBUG_VOID_RETURN; +} + +#ifndef MYSQL_CLIENT +/** + Check if we should log a table DDL to the binlog + + @retval true yes + @retval false no +*/ + +bool THD::binlog_table_should_be_logged(const LEX_CSTRING *db) +{ + return (mysql_bin_log.is_open() && + (variables.option_bits & OPTION_BIN_LOG) && + (wsrep_binlog_format(variables.binlog_format) != BINLOG_FORMAT_STMT || + binlog_filter->db_ok(db->str))); +} + +/* + Template member function for ensuring that there is an rows log + event of the apropriate type before proceeding. + + PRE CONDITION: + - Events of type 'RowEventT' have the type code 'type_code'. + + POST CONDITION: + If a non-NULL pointer is returned, the pending event for thread 'thd' will + be an event of type 'RowEventT' (which have the type code 'type_code') + will either empty or have enough space to hold 'needed' bytes. In + addition, the columns bitmap will be correct for the row, meaning that + the pending event will be flushed if the columns in the event differ from + the columns suppled to the function. + + RETURNS + If no error, a non-NULL pending event (either one which already existed or + the newly created one). + If error, NULL. + */ + +template Rows_log_event* +THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, + size_t needed, + bool is_transactional, + RowsEventT *hint __attribute__((unused))) +{ + DBUG_ENTER("binlog_prepare_pending_rows_event"); + /* Pre-conditions */ + DBUG_ASSERT(table->s->table_map_id != ~0UL); + + /* Fetch the type code for the RowsEventT template parameter */ + int const general_type_code= RowsEventT::TYPE_CODE; + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_transactional= 1; + + /* + There is no good place to set up the transactional data, so we + have to do it here. + */ + if (binlog_setup_trx_data() == NULL) + DBUG_RETURN(NULL); + + Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional); + + if (unlikely(pending && !pending->is_valid())) + DBUG_RETURN(NULL); + + /* + Check if the current event is non-NULL and a write-rows + event. Also check if the table provided is mapped: if it is not, + then we have switched to writing to a new table. + If there is no pending event, we need to create one. If there is a pending + event, but it's not about the same table id, or not of the same type + (between Write, Update and Delete), or not the same affected columns, or + going to be too big, flush this event to disk and create a new pending + event. + */ + if (!pending || + pending->server_id != serv_id || + pending->get_table_id() != table->s->table_map_id || + pending->get_general_type_code() != general_type_code || + pending->get_data_size() + needed > opt_binlog_rows_event_max_size || + pending->read_write_bitmaps_cmp(table) == FALSE) + { + /* Create a new RowsEventT... */ + Rows_log_event* const + ev= new RowsEventT(this, table, table->s->table_map_id, + is_transactional); + if (unlikely(!ev)) + DBUG_RETURN(NULL); + ev->server_id= serv_id; // I don't like this, it's too easy to forget. + /* + flush the pending event and replace it with the newly created + event... + */ + if (unlikely( + mysql_bin_log.flush_and_set_pending_rows_event(this, ev, + is_transactional))) + { + delete ev; + DBUG_RETURN(NULL); + } + + DBUG_RETURN(ev); /* This is the new pending event */ + } + DBUG_RETURN(pending); /* This is the current pending event */ +} + +/* Declare in unnamed namespace. */ +CPP_UNNAMED_NS_START + /** + Class to handle temporary allocation of memory for row data. + + The responsibilities of the class is to provide memory for + packing one or two rows of packed data (depending on what + constructor is called). + + In order to make the allocation more efficient for "simple" rows, + i.e., rows that do not contain any blobs, a pointer to the + allocated memory is of memory is stored in the table structure + for simple rows. If memory for a table containing a blob field + is requested, only memory for that is allocated, and subsequently + released when the object is destroyed. + + */ + class Row_data_memory { + public: + /** + Build an object to keep track of a block-local piece of memory + for storing a row of data. + + @param table + Table where the pre-allocated memory is stored. + + @param length + Length of data that is needed, if the record contain blobs. + */ + Row_data_memory(TABLE *table, size_t const len1) + : m_memory(0) + { +#ifndef DBUG_OFF + m_alloc_checked= FALSE; +#endif + allocate_memory(table, len1); + m_ptr[0]= has_memory() ? m_memory : 0; + m_ptr[1]= 0; + } + + Row_data_memory(TABLE *table, size_t const len1, size_t const len2) + : m_memory(0) + { +#ifndef DBUG_OFF + m_alloc_checked= FALSE; +#endif + allocate_memory(table, len1 + len2); + m_ptr[0]= has_memory() ? m_memory : 0; + m_ptr[1]= has_memory() ? m_memory + len1 : 0; + } + + ~Row_data_memory() + { + if (m_memory != 0 && m_release_memory_on_destruction) + my_free(m_memory); + } + + /** + Is there memory allocated? + + @retval true There is memory allocated + @retval false Memory allocation failed + */ + bool has_memory() const { +#ifndef DBUG_OFF + m_alloc_checked= TRUE; +#endif + return m_memory != 0; + } + + uchar *slot(uint s) + { + DBUG_ASSERT(s < sizeof(m_ptr)/sizeof(*m_ptr)); + DBUG_ASSERT(m_ptr[s] != 0); + DBUG_SLOW_ASSERT(m_alloc_checked == TRUE); + return m_ptr[s]; + } + + private: + void allocate_memory(TABLE *const table, size_t const total_length) + { + if (table->s->blob_fields == 0) + { + /* + The maximum length of a packed record is less than this + length. We use this value instead of the supplied length + when allocating memory for records, since we don't know how + the memory will be used in future allocations. + + Since table->s->reclength is for unpacked records, we have + to add two bytes for each field, which can potentially be + added to hold the length of a packed field. + */ + size_t const maxlen= table->s->reclength + 2 * table->s->fields; + + /* + Allocate memory for two records if memory hasn't been + allocated. We allocate memory for two records so that it can + be used when processing update rows as well. + */ + if (table->write_row_record == 0) + table->write_row_record= + (uchar *) alloc_root(&table->mem_root, 2 * maxlen); + m_memory= table->write_row_record; + m_release_memory_on_destruction= FALSE; + } + else + { + m_memory= (uchar *) my_malloc(key_memory_Row_data_memory_memory, + total_length, MYF(MY_WME)); + m_release_memory_on_destruction= TRUE; + } + } + +#ifndef DBUG_OFF + mutable bool m_alloc_checked; +#endif + bool m_release_memory_on_destruction; + uchar *m_memory; + uchar *m_ptr[2]; + }; + +CPP_UNNAMED_NS_END + +int THD::binlog_write_row(TABLE* table, bool is_trans, + uchar const *record) +{ + + DBUG_ASSERT(is_current_stmt_binlog_format_row()); + DBUG_ASSERT((WSREP_NNULL(this) && wsrep_emulate_bin_log) || + mysql_bin_log.is_open()); + /* + Pack records into format for transfer. We are allocating more + memory than needed, but that doesn't matter. + */ + Row_data_memory memory(table, max_row_length(table, table->rpl_write_set, + record)); + if (!memory.has_memory()) + return HA_ERR_OUT_OF_MEM; + + uchar *row_data= memory.slot(0); + + size_t const len= pack_row(table, table->rpl_write_set, row_data, record); + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_trans= 1; + + Rows_log_event* ev; + if (binlog_should_compress(len)) + ev = + binlog_prepare_pending_rows_event(table, variables.server_id, + len, is_trans, + static_cast(0)); + else + ev = + binlog_prepare_pending_rows_event(table, variables.server_id, + len, is_trans, + static_cast(0)); + + if (unlikely(ev == 0)) + return HA_ERR_OUT_OF_MEM; + + return ev->add_row_data(row_data, len); +} + +int THD::binlog_update_row(TABLE* table, bool is_trans, + const uchar *before_record, + const uchar *after_record) +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row()); + DBUG_ASSERT((WSREP_NNULL(this) && wsrep_emulate_bin_log) || + mysql_bin_log.is_open()); + + /** + Save a reference to the original read bitmaps + We will need this to restore the bitmaps at the end as + binlog_prepare_row_images() may change table->read_set. + table->read_set is used by pack_row and deep in + binlog_prepare_pending_events(). + */ + MY_BITMAP *old_read_set= table->read_set; + + /** + This will remove spurious fields required during execution but + not needed for binlogging. This is done according to the: + binlog-row-image option. + */ + binlog_prepare_row_images(table); + + size_t const before_maxlen= max_row_length(table, table->read_set, + before_record); + size_t const after_maxlen= max_row_length(table, table->rpl_write_set, + after_record); + + Row_data_memory row_data(table, before_maxlen, after_maxlen); + if (!row_data.has_memory()) + return HA_ERR_OUT_OF_MEM; + + uchar *before_row= row_data.slot(0); + uchar *after_row= row_data.slot(1); + + size_t const before_size= pack_row(table, table->read_set, before_row, + before_record); + size_t const after_size= pack_row(table, table->rpl_write_set, after_row, + after_record); + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_trans= 1; + + /* + Don't print debug messages when running valgrind since they can + trigger false warnings. + */ +#ifndef HAVE_valgrind + DBUG_DUMP("before_record", before_record, table->s->reclength); + DBUG_DUMP("after_record", after_record, table->s->reclength); + DBUG_DUMP("before_row", before_row, before_size); + DBUG_DUMP("after_row", after_row, after_size); +#endif + + Rows_log_event* ev; + if(binlog_should_compress(before_size + after_size)) + ev = + binlog_prepare_pending_rows_event(table, variables.server_id, + before_size + after_size, is_trans, + static_cast(0)); + else + ev = + binlog_prepare_pending_rows_event(table, variables.server_id, + before_size + after_size, is_trans, + static_cast(0)); + + if (unlikely(ev == 0)) + return HA_ERR_OUT_OF_MEM; + + int error= ev->add_row_data(before_row, before_size) || + ev->add_row_data(after_row, after_size); + + /* restore read set for the rest of execution */ + table->column_bitmaps_set_no_signal(old_read_set, + table->write_set); + return error; + +} + +int THD::binlog_delete_row(TABLE* table, bool is_trans, + uchar const *record) +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row()); + DBUG_ASSERT((WSREP_NNULL(this) && wsrep_emulate_bin_log) || + mysql_bin_log.is_open()); + /** + Save a reference to the original read bitmaps + We will need this to restore the bitmaps at the end as + binlog_prepare_row_images() may change table->read_set. + table->read_set is used by pack_row and deep in + binlog_prepare_pending_events(). + */ + MY_BITMAP *old_read_set= table->read_set; + + /** + This will remove spurious fields required during execution but + not needed for binlogging. This is done according to the: + binlog-row-image option. + */ + binlog_prepare_row_images(table); + + /* + Pack records into format for transfer. We are allocating more + memory than needed, but that doesn't matter. + */ + Row_data_memory memory(table, max_row_length(table, table->read_set, + record)); + if (unlikely(!memory.has_memory())) + return HA_ERR_OUT_OF_MEM; + + uchar *row_data= memory.slot(0); + + DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8); + size_t const len= pack_row(table, table->read_set, row_data, record); + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_trans= 1; + + Rows_log_event* ev; + if(binlog_should_compress(len)) + ev = + binlog_prepare_pending_rows_event(table, variables.server_id, + len, is_trans, + static_cast(0)); + else + ev = + binlog_prepare_pending_rows_event(table, variables.server_id, + len, is_trans, + static_cast(0)); + + if (unlikely(ev == 0)) + return HA_ERR_OUT_OF_MEM; + + + int error= ev->add_row_data(row_data, len); + + /* restore read set for the rest of execution */ + table->column_bitmaps_set_no_signal(old_read_set, + table->write_set); + + return error; +} + + +/** + Remove from read_set spurious columns. The write_set has been + handled before in table->mark_columns_needed_for_update. +*/ + +void THD::binlog_prepare_row_images(TABLE *table) +{ + DBUG_ENTER("THD::binlog_prepare_row_images"); + + DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", + table->read_set); + THD *thd= table->in_use; + + /** + if there is a primary key in the table (ie, user declared PK or a + non-null unique index) and we don't want to ship the entire image, + and the handler involved supports this. + */ + if (table->s->primary_key < MAX_KEY && + (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) && + !ha_check_storage_engine_flag(table->s->db_type(), + HTON_NO_BINLOG_ROW_OPT)) + { + /** + Just to be sure that tmp_set is currently not in use as + the read_set already. + */ + DBUG_ASSERT(table->read_set != &table->tmp_set); + + switch (thd->variables.binlog_row_image) + { + case BINLOG_ROW_IMAGE_MINIMAL: + /* MINIMAL: Mark only PK */ + table->mark_index_columns(table->s->primary_key, + &table->tmp_set); + break; + case BINLOG_ROW_IMAGE_NOBLOB: + /** + NOBLOB: Remove unnecessary BLOB fields from read_set + (the ones that are not part of PK). + */ + bitmap_copy(&table->tmp_set, table->read_set); + for (Field **ptr=table->field ; *ptr ; ptr++) + { + Field *field= (*ptr); + if ((field->type() == MYSQL_TYPE_BLOB) && + !(field->flags & PRI_KEY_FLAG)) + bitmap_clear_bit(&table->tmp_set, field->field_index); + } + break; + default: + DBUG_ASSERT(0); // impossible. + } + + /* set the temporary read_set */ + table->column_bitmaps_set_no_signal(&table->tmp_set, + table->write_set); + } + + DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", + table->read_set); + DBUG_VOID_RETURN; +} + + + +int THD::binlog_remove_pending_rows_event(bool reset_stmt, + bool is_transactional) +{ + DBUG_ENTER("THD::binlog_remove_pending_rows_event"); + + if(!WSREP_EMULATE_BINLOG_NNULL(this) && !mysql_bin_log.is_open()) + DBUG_RETURN(0); + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_transactional= 1; + + mysql_bin_log.remove_pending_rows_event(this, is_transactional); + + if (reset_stmt) + reset_binlog_for_next_statement(); + DBUG_RETURN(0); +} + + +int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) +{ + DBUG_ENTER("THD::binlog_flush_pending_rows_event"); + /* + We shall flush the pending event even if we are not in row-based + mode: it might be the case that we left row-based mode before + flushing anything (e.g., if we have explicitly locked tables). + */ + if (!WSREP_EMULATE_BINLOG_NNULL(this) && !mysql_bin_log.is_open()) + DBUG_RETURN(0); + + /* Ensure that all events in a GTID group are in the same cache */ + if (variables.option_bits & OPTION_GTID_BEGIN) + is_transactional= 1; + + /* + Mark the event as the last event of a statement if the stmt_end + flag is set. + */ + int error= 0; + if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional)) + { + if (stmt_end) + { + pending->set_flags(Rows_log_event::STMT_END_F); + reset_binlog_for_next_statement(); + } + error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0, + is_transactional); + } + + DBUG_RETURN(error); +} + + +/* + DML that doesn't change the table normally is not logged, + but it needs to be logged if it auto-created a partition as a side effect. +*/ +bool THD::binlog_for_noop_dml(bool transactional_table) +{ + if (mysql_bin_log.is_open() && log_current_statement()) + { + reset_unsafe_warnings(); + if (binlog_query(THD::STMT_QUERY_TYPE, query(), query_length(), + transactional_table, FALSE, FALSE, 0) > 0) + { + my_error(ER_ERROR_ON_WRITE, MYF(0), "binary log", -1); + return true; + } + } + return false; +} + + +#if defined(DBUG_TRACE) && !defined(_lint) +static const char * +show_query_type(THD::enum_binlog_query_type qtype) +{ + switch (qtype) { + case THD::ROW_QUERY_TYPE: + return "ROW"; + case THD::STMT_QUERY_TYPE: + return "STMT"; + case THD::QUERY_TYPE_COUNT: + default: + DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT); + } + static char buf[64]; + sprintf(buf, "UNKNOWN#%d", qtype); + return buf; +} +#endif + +/* + Constants required for the limit unsafe warnings suppression +*/ +//seconds after which the limit unsafe warnings suppression will be activated +#define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 5*60 +//number of limit unsafe warnings after which the suppression will be activated +#define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 10 + +static ulonglong unsafe_suppression_start_time= 0; +static bool unsafe_warning_suppression_active[LEX::BINLOG_STMT_UNSAFE_COUNT]; +static ulong unsafe_warnings_count[LEX::BINLOG_STMT_UNSAFE_COUNT]; +static ulong total_unsafe_warnings_count; + +/** + Auxiliary function to reset the limit unsafety warning suppression. + This is done without mutex protection, but this should be good + enough as it doesn't matter if we loose a couple of suppressed + messages or if this is called multiple times. +*/ + +static void reset_binlog_unsafe_suppression(ulonglong now) +{ + uint i; + DBUG_ENTER("reset_binlog_unsafe_suppression"); + + unsafe_suppression_start_time= now; + total_unsafe_warnings_count= 0; + + for (i= 0 ; i < LEX::BINLOG_STMT_UNSAFE_COUNT ; i++) + { + unsafe_warnings_count[i]= 0; + unsafe_warning_suppression_active[i]= 0; + } + DBUG_VOID_RETURN; +} + +/** + Auxiliary function to print warning in the error log. +*/ +static void print_unsafe_warning_to_log(THD *thd, int unsafe_type, char* buf, + char* query) +{ + DBUG_ENTER("print_unsafe_warning_in_log"); + sprintf(buf, ER_THD(thd, ER_BINLOG_UNSAFE_STATEMENT), + ER_THD(thd, LEX::binlog_stmt_unsafe_errcode[unsafe_type])); + sql_print_warning(ER_THD(thd, ER_MESSAGE_AND_STATEMENT), buf, query); + DBUG_VOID_RETURN; +} + +/** + Auxiliary function to check if the warning for unsafe repliction statements + should be thrown or suppressed. + + Logic is: + - If we get more than LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT errors + of one type, that type of errors will be suppressed for + LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT. + - When the time limit has been reached, all suppression is reset. + + This means that if one gets many different types of errors, some of them + may be reset less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT. However at + least one error is disable for this time. + + SYNOPSIS: + @params + unsafe_type - The type of unsafety. + + RETURN: + 0 0k to log + 1 Message suppressed +*/ + +static bool protect_against_unsafe_warning_flood(int unsafe_type) +{ + ulong count; + ulonglong now= my_interval_timer()/1000000000ULL; + DBUG_ENTER("protect_against_unsafe_warning_flood"); + + count= ++unsafe_warnings_count[unsafe_type]; + total_unsafe_warnings_count++; + + /* + INITIALIZING: + If this is the first time this function is called with log warning + enabled, the monitoring the unsafe warnings should start. + */ + if (unsafe_suppression_start_time == 0) + { + reset_binlog_unsafe_suppression(now); + DBUG_RETURN(0); + } + + /* + The following is true if we got too many errors or if the error was + already suppressed + */ + if (count >= LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT) + { + ulonglong diff_time= (now - unsafe_suppression_start_time); + + if (!unsafe_warning_suppression_active[unsafe_type]) + { + /* + ACTIVATION: + We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in + less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the + suppression. + */ + if (diff_time <= LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT) + { + unsafe_warning_suppression_active[unsafe_type]= 1; + sql_print_information("Suppressing warnings of type '%s' for up to %d seconds because of flooding", + ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]), + LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT); + } + else + { + /* + There is no flooding till now, therefore we restart the monitoring + */ + reset_binlog_unsafe_suppression(now); + } + } + else + { + /* This type of warnings was suppressed */ + if (diff_time > LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT) + { + ulong save_count= total_unsafe_warnings_count; + /* Print a suppression note and remove the suppression */ + reset_binlog_unsafe_suppression(now); + sql_print_information("Suppressed %lu unsafe warnings during " + "the last %d seconds", + save_count, (int) diff_time); + } + } + } + DBUG_RETURN(unsafe_warning_suppression_active[unsafe_type]); +} + +MYSQL_TIME THD::query_start_TIME() +{ + MYSQL_TIME res; + variables.time_zone->gmt_sec_to_TIME(&res, query_start()); + res.second_part= query_start_sec_part(); + used|= TIME_ZONE_USED; + return res; +} + +/** + Auxiliary method used by @c binlog_query() to raise warnings. + + The type of warning and the type of unsafeness is stored in + THD::binlog_unsafe_warning_flags. +*/ +void THD::issue_unsafe_warnings() +{ + char buf[MYSQL_ERRMSG_SIZE * 2]; + uint32 unsafe_type_flags; + DBUG_ENTER("issue_unsafe_warnings"); + /* + Ensure that binlog_unsafe_warning_flags is big enough to hold all + bits. This is actually a constant expression. + */ + DBUG_ASSERT(LEX::BINLOG_STMT_UNSAFE_COUNT <= + sizeof(binlog_unsafe_warning_flags) * CHAR_BIT); + + if (!(unsafe_type_flags= binlog_unsafe_warning_flags)) + DBUG_VOID_RETURN; // Nothing to do + + /* + For each unsafe_type, check if the statement is unsafe in this way + and issue a warning. + */ + for (int unsafe_type=0; + unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT; + unsafe_type++) + { + if ((unsafe_type_flags & (1 << unsafe_type)) != 0) + { + push_warning_printf(this, Sql_condition::WARN_LEVEL_NOTE, + ER_BINLOG_UNSAFE_STATEMENT, + ER_THD(this, ER_BINLOG_UNSAFE_STATEMENT), + ER_THD(this, LEX::binlog_stmt_unsafe_errcode[unsafe_type])); + if (global_system_variables.log_warnings > 0 && + !protect_against_unsafe_warning_flood(unsafe_type)) + print_unsafe_warning_to_log(this, unsafe_type, buf, query()); + } + } + DBUG_VOID_RETURN; +} + +/** + Log the current query. + + The query will be logged in either row format or statement format + depending on the value of @c current_stmt_binlog_format_row field and + the value of the @c qtype parameter. + + This function must be called: + + - After the all calls to ha_*_row() functions have been issued. + + - After any writes to system tables. Rationale: if system tables + were written after a call to this function, and the master crashes + after the call to this function and before writing the system + tables, then the master and slave get out of sync. + + - Before tables are unlocked and closed. + + @see decide_logging_format + + @retval < 0 No logging of query (ok) + @retval 0 Success + @retval > 0 If there is a failure when writing the query (e.g., + write failure), then the error code is returned. +*/ + +int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, + ulong query_len, bool is_trans, bool direct, + bool suppress_use, int errcode) +{ + DBUG_ENTER("THD::binlog_query"); + DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'", + show_query_type(qtype), (int) query_len, query_arg)); + + DBUG_ASSERT(query_arg); + DBUG_ASSERT(WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open()); + + /* If this is withing a BEGIN ... COMMIT group, don't log it */ + if (variables.option_bits & OPTION_GTID_BEGIN) + { + direct= 0; + is_trans= 1; + } + DBUG_PRINT("info", ("is_trans: %d direct: %d", is_trans, direct)); + + if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET) + { + /* + The current statement is to be ignored, and not written to + the binlog. Do not call issue_unsafe_warnings(). + */ + DBUG_RETURN(-1); + } + + /* + If we are not in prelocked mode, mysql_unlock_tables() will be + called after this binlog_query(), so we have to flush the pending + rows event with the STMT_END_F set to unlock all tables at the + slave side as well. + + If we are in prelocked mode, the flushing will be done inside the + top-most close_thread_tables(). + */ + if (this->locked_tables_mode <= LTM_LOCK_TABLES) + { + int error; + if (unlikely(error= binlog_flush_pending_rows_event(TRUE, is_trans))) + { + DBUG_ASSERT(error > 0); + DBUG_RETURN(error); + } + } + + /* + Warnings for unsafe statements logged in statement format are + printed in three places instead of in decide_logging_format(). + This is because the warnings should be printed only if the statement + is actually logged. When executing decide_logging_format(), we cannot + know for sure if the statement will be logged: + + 1 - sp_head::execute_procedure which prints out warnings for calls to + stored procedures. + + 2 - sp_head::execute_function which prints out warnings for calls + involving functions. + + 3 - THD::binlog_query (here) which prints warning for top level + statements not covered by the two cases above: i.e., if not insided a + procedure and a function. + + Besides, we should not try to print these warnings if it is not + possible to write statements to the binary log as it happens when + the execution is inside a function, or generaly speaking, when + the variables.option_bits & OPTION_BIN_LOG is false. + + */ + if ((variables.option_bits & OPTION_BIN_LOG) && + spcont == NULL && !binlog_evt_union.do_union) + issue_unsafe_warnings(); + + switch (qtype) { + /* + ROW_QUERY_TYPE means that the statement may be logged either in + row format or in statement format. If + current_stmt_binlog_format is row, it means that the + statement has already been logged in row format and hence shall + not be logged again. + */ + case THD::ROW_QUERY_TYPE: + DBUG_PRINT("debug", + ("is_current_stmt_binlog_format_row: %d", + is_current_stmt_binlog_format_row())); + if (is_current_stmt_binlog_format_row()) + DBUG_RETURN(-1); + /* Fall through */ + + /* + STMT_QUERY_TYPE means that the query must be logged in statement + format; it cannot be logged in row format. This is typically + used by DDL statements. It is an error to use this query type + if current_stmt_binlog_format_row is row. + + @todo Currently there are places that call this method with + STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those + places and add assert to ensure correct behavior. /Sven + */ + case THD::STMT_QUERY_TYPE: + /* + The MYSQL_LOG::write() function will set the STMT_END_F flag and + flush the pending rows event if necessary. + */ + { + int error = 0; + + /* + Binlog table maps will be irrelevant after a Query_log_event + (they are just removed on the slave side) so after the query + log event is written to the binary log, we pretend that no + table maps were written. + */ + if (binlog_should_compress(query_len)) + { + Query_compressed_log_event qinfo(this, query_arg, query_len, is_trans, + direct, suppress_use, errcode); + error= mysql_bin_log.write(&qinfo); + } + else + { + Query_log_event qinfo(this, query_arg, query_len, is_trans, direct, + suppress_use, errcode); + error= mysql_bin_log.write(&qinfo); + } + /* + row logged binlog may not have been reset in the case of locked tables + */ + reset_binlog_for_next_statement(); + + DBUG_RETURN(error >= 0 ? error : 1); + } + + case THD::QUERY_TYPE_COUNT: + default: + DBUG_ASSERT(qtype < QUERY_TYPE_COUNT); + } + DBUG_RETURN(0); +} + + +/** + Binlog current query as a statement, ignoring the binlog filter setting. + + The filter is in decide_logging_format() to mark queries to not be stored + in the binary log, for example by a shared distributed engine like S3. + This function resets the filter to ensure the the query is logged if + the binlog is active. + + Note that 'direct' is set to false, which means that the query will + not be directly written to the binary log but instead to the cache. + + @retval false ok + @retval true error +*/ + + +bool THD::binlog_current_query_unfiltered() +{ + if (!mysql_bin_log.is_open()) + return 0; + + reset_binlog_local_stmt_filter(); + clear_binlog_local_stmt_filter(); + return binlog_query(THD::STMT_QUERY_TYPE, query(), query_length(), + /* is_trans */ FALSE, + /* direct */ FALSE, + /* suppress_use */ FALSE, + /* Error */ 0) > 0; +} + + +void +THD::wait_for_wakeup_ready() +{ + mysql_mutex_lock(&LOCK_wakeup_ready); + while (!wakeup_ready) + mysql_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready); + mysql_mutex_unlock(&LOCK_wakeup_ready); +} + +void +THD::signal_wakeup_ready() +{ + mysql_mutex_lock(&LOCK_wakeup_ready); + wakeup_ready= true; + mysql_mutex_unlock(&LOCK_wakeup_ready); + mysql_cond_signal(&COND_wakeup_ready); +} + +void THD::set_last_commit_gtid(rpl_gtid >id) +{ +#ifndef EMBEDDED_LIBRARY + bool changed_gtid= (m_last_commit_gtid.seq_no != gtid.seq_no); +#endif + m_last_commit_gtid= gtid; +#ifndef EMBEDDED_LIBRARY + if (changed_gtid) + { + DBUG_ASSERT(current_thd == this); + session_tracker.sysvars.mark_as_changed(this, Sys_last_gtid_ptr); + } +#endif +} + +void +wait_for_commit::reinit() +{ + subsequent_commits_list= NULL; + next_subsequent_commit= NULL; + waitee.store(NULL, std::memory_order_relaxed); + opaque_pointer= NULL; + wakeup_error= 0; + wakeup_subsequent_commits_running= false; + commit_started= false; + wakeup_blocked= false; +#ifdef SAFE_MUTEX + /* + When using SAFE_MUTEX, the ordering between taking the LOCK_wait_commit + mutexes is checked. This causes a problem when we re-use a mutex, as then + the expected locking order may change. + + So in this case, do a re-init of the mutex. In release builds, we want to + avoid the overhead of a re-init though. + + To ensure that no one is locking the mutex, we take a lock of it first. + For full explanation, see wait_for_commit::~wait_for_commit() + */ + mysql_mutex_lock(&LOCK_wait_commit); + mysql_mutex_unlock(&LOCK_wait_commit); + + mysql_mutex_destroy(&LOCK_wait_commit); + mysql_mutex_init(key_LOCK_wait_commit, &LOCK_wait_commit, MY_MUTEX_INIT_FAST); +#endif +} + + +wait_for_commit::wait_for_commit() +{ + mysql_mutex_init(key_LOCK_wait_commit, &LOCK_wait_commit, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wait_commit, &COND_wait_commit, 0); + reinit(); +} + + +wait_for_commit::~wait_for_commit() +{ + /* + Since we do a dirty read of the waiting_for_commit flag in + wait_for_prior_commit() and in unregister_wait_for_prior_commit(), we need + to take extra care before freeing the wait_for_commit object. + + It is possible for the waitee to be pre-empted inside wakeup(), just after + it has cleared the waiting_for_commit flag and before it has released the + LOCK_wait_commit mutex. And then it is possible for the waiter to find the + flag cleared in wait_for_prior_commit() and go finish up things and + de-allocate the LOCK_wait_commit and COND_wait_commit objects before the + waitee has time to be re-scheduled and finish unlocking the mutex and + signalling the condition. This would lead to the waitee accessing no + longer valid memory. + + To prevent this, we do an extra lock/unlock of the mutex here before + deallocation; this makes certain that any waitee has completed wakeup() + first. + */ + mysql_mutex_lock(&LOCK_wait_commit); + mysql_mutex_unlock(&LOCK_wait_commit); + + mysql_mutex_destroy(&LOCK_wait_commit); + mysql_cond_destroy(&COND_wait_commit); +} + + +void +wait_for_commit::wakeup(int wakeup_error) +{ + /* + We signal each waiter on their own condition and mutex (rather than using + pthread_cond_broadcast() or something like that). + + Otherwise we would need to somehow ensure that they were done + waking up before we could allow this THD to be destroyed, which would + be annoying and unnecessary. + + Note that wakeup_subsequent_commits2() depends on this function being a + full memory barrier (it is, because it takes a mutex lock). + + */ + mysql_mutex_lock(&LOCK_wait_commit); + this->wakeup_error= wakeup_error; + /* Memory barrier to make wakeup_error visible to the waiter thread. */ + waitee.store(NULL, std::memory_order_release); + /* + Note that it is critical that the mysql_cond_signal() here is done while + still holding the mutex. As soon as we release the mutex, the waiter might + deallocate the condition object. + */ + mysql_cond_signal(&COND_wait_commit); + mysql_mutex_unlock(&LOCK_wait_commit); +} + + +/* + Register that the next commit of this THD should wait to complete until + commit in another THD (the waitee) has completed. + + The wait may occur explicitly, with the waiter sitting in + wait_for_prior_commit() until the waitee calls wakeup_subsequent_commits(). + + Alternatively, the TC (eg. binlog) may do the commits of both waitee and + waiter at once during group commit, resolving both of them in the right + order. + + Only one waitee can be registered for a waiter; it must be removed by + wait_for_prior_commit() or unregister_wait_for_prior_commit() before a new + one is registered. But it is ok for several waiters to register a wait for + the same waitee. It is also permissible for one THD to be both a waiter and + a waitee at the same time. +*/ +void +wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee) +{ + DBUG_ASSERT(!this->waitee.load(std::memory_order_relaxed) + /* No prior registration allowed */); + wakeup_error= 0; + this->waitee.store(waitee, std::memory_order_relaxed); + + mysql_mutex_lock(&waitee->LOCK_wait_commit); + /* + If waitee is in the middle of wakeup, then there is nothing to wait for, + so we need not register. This is necessary to avoid a race in unregister, + see comments on wakeup_subsequent_commits2() for details. + */ + if (waitee->wakeup_subsequent_commits_running) + this->waitee.store(NULL, std::memory_order_relaxed); + else + { + /* + Put ourself at the head of the waitee's list of transactions that must + wait for it to commit first. + */ + this->next_subsequent_commit= waitee->subsequent_commits_list; + waitee->subsequent_commits_list= this; + } + mysql_mutex_unlock(&waitee->LOCK_wait_commit); +} + + +/** + Waits for commit of another transaction to complete, as already registered + with register_wait_for_prior_commit(). If the commit already completed, + returns immediately. + + If ALLOW_KILL is set to true (the default), the wait can be aborted by a + kill. In case of kill, the wait registration is still removed, so another + call of unregister_wait_for_prior_commit() is needed to later retry the + wait. If ALLOW_KILL is set to false, then kill will be ignored and this + function will not return until the prior commit (if any) has called + wakeup_subsequent_commits(). + + If thd->backup_commit_lock is set, release it while waiting for other threads +*/ + +int +wait_for_commit::wait_for_prior_commit2(THD *thd, bool allow_kill) +{ + PSI_stage_info old_stage; + wait_for_commit *loc_waitee; + bool backup_lock_released= false; + + /* + Release MDL_BACKUP_COMMIT LOCK while waiting for other threads to commit + This is needed to avoid deadlock between the other threads (which not + yet have the MDL_BACKUP_COMMIT_LOCK) and any threads using + BACKUP LOCK BLOCK_COMMIT. + */ + if (thd->backup_commit_lock && thd->backup_commit_lock->ticket) + { + backup_lock_released= true; + thd->mdl_context.release_lock(thd->backup_commit_lock->ticket); + thd->backup_commit_lock->ticket= 0; + } + + mysql_mutex_lock(&LOCK_wait_commit); + DEBUG_SYNC(thd, "wait_for_prior_commit_waiting"); + thd->ENTER_COND(&COND_wait_commit, &LOCK_wait_commit, + &stage_waiting_for_prior_transaction_to_commit, + &old_stage); + while ((loc_waitee= this->waitee.load(std::memory_order_relaxed)) && + (!allow_kill || likely(!thd->check_killed(1)))) + mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); + if (!loc_waitee) + { + if (wakeup_error) + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + goto end; + } + /* + Wait was interrupted by kill. We need to unregister our wait and give the + error. But if a wakeup is already in progress, then we must ignore the + kill and not give error, otherwise we get inconsistency between waitee and + waiter as to whether we succeed or fail (eg. we may roll back but waitee + might attempt to commit both us and any subsequent commits waiting for us). + */ + mysql_mutex_lock(&loc_waitee->LOCK_wait_commit); + if (loc_waitee->wakeup_subsequent_commits_running) + { + /* We are being woken up; ignore the kill and just wait. */ + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + do + { + mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); + } while (this->waitee.load(std::memory_order_relaxed)); + if (wakeup_error) + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + goto end; + } + remove_from_list(&loc_waitee->subsequent_commits_list); + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + this->waitee.store(NULL, std::memory_order_relaxed); + + wakeup_error= thd->killed_errno(); + if (!wakeup_error) + wakeup_error= ER_QUERY_INTERRUPTED; + my_message(wakeup_error, ER_THD(thd, wakeup_error), MYF(0)); + thd->EXIT_COND(&old_stage); + /* + Must do the DEBUG_SYNC() _after_ exit_cond(), as DEBUG_SYNC is not safe to + use within enter_cond/exit_cond. + */ + DEBUG_SYNC(thd, "wait_for_prior_commit_killed"); + if (unlikely(backup_lock_released)) + thd->mdl_context.acquire_lock(thd->backup_commit_lock, + thd->variables.lock_wait_timeout); + return wakeup_error; + +end: + thd->EXIT_COND(&old_stage); + if (unlikely(backup_lock_released)) + thd->mdl_context.acquire_lock(thd->backup_commit_lock, + thd->variables.lock_wait_timeout); + return wakeup_error; +} + + +/* + Wakeup anyone waiting for us to have committed. + + Note about locking: + + We have a potential race or deadlock between wakeup_subsequent_commits() in + the waitee and unregister_wait_for_prior_commit() in the waiter. + + Both waiter and waitee needs to take their own lock before it is safe to take + a lock on the other party - else the other party might disappear and invalid + memory data could be accessed. But if we take the two locks in different + order, we may end up in a deadlock. + + The waiter needs to lock the waitee to delete itself from the list in + unregister_wait_for_prior_commit(). Thus wakeup_subsequent_commits() can not + hold its own lock while locking waiters, as this could lead to deadlock. + + So we need to prevent unregister_wait_for_prior_commit() running while wakeup + is in progress - otherwise the unregister could complete before the wakeup, + leading to incorrect spurious wakeup or accessing invalid memory. + + However, if we are in the middle of running wakeup_subsequent_commits(), then + there is no need for unregister_wait_for_prior_commit() in the first place - + the waiter can just do a normal wait_for_prior_commit(), as it will be + immediately woken up. + + So the solution to the potential race/deadlock is to set a flag in the waitee + that wakeup_subsequent_commits() is in progress. When this flag is set, + unregister_wait_for_prior_commit() becomes just wait_for_prior_commit(). + + Then also register_wait_for_prior_commit() needs to check if + wakeup_subsequent_commits() is running, and skip the registration if + so. This is needed in case a new waiter manages to register itself and + immediately try to unregister while wakeup_subsequent_commits() is + running. Else the new waiter would also wait rather than unregister, but it + would not be woken up until next wakeup, which could be potentially much + later than necessary. +*/ + +void +wait_for_commit::wakeup_subsequent_commits2(int wakeup_error) +{ + wait_for_commit *waiter; + + if (unlikely(wakeup_blocked)) + return; + + mysql_mutex_lock(&LOCK_wait_commit); + wakeup_subsequent_commits_running= true; + waiter= subsequent_commits_list; + subsequent_commits_list= NULL; + mysql_mutex_unlock(&LOCK_wait_commit); + + while (waiter) + { + /* + Important: we must grab the next pointer before waking up the waiter; + once the wakeup is done, the field could be invalidated at any time. + */ + wait_for_commit *next= waiter->next_subsequent_commit; + waiter->wakeup(wakeup_error); + waiter= next; + } + + /* + We need a full memory barrier between walking the list above, and clearing + the flag wakeup_subsequent_commits_running below. This barrier is needed + to ensure that no other thread will start to modify the list pointers + before we are done traversing the list. + + But wait_for_commit::wakeup() does a full memory barrier already (it locks + a mutex), so no extra explicit barrier is needed here. + */ + wakeup_subsequent_commits_running= false; + DBUG_EXECUTE_IF("inject_wakeup_subsequent_commits_sleep", my_sleep(21000);); +} + + +/* Cancel a previously registered wait for another THD to commit before us. */ +void +wait_for_commit::unregister_wait_for_prior_commit2() +{ + wait_for_commit *loc_waitee; + + mysql_mutex_lock(&LOCK_wait_commit); + if ((loc_waitee= this->waitee.load(std::memory_order_relaxed))) + { + mysql_mutex_lock(&loc_waitee->LOCK_wait_commit); + if (loc_waitee->wakeup_subsequent_commits_running) + { + /* + When a wakeup is running, we cannot safely remove ourselves from the + list without corrupting it. Instead we can just wait, as wakeup is + already in progress and will thus be immediate. + + See comments on wakeup_subsequent_commits2() for more details. + */ + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + while (this->waitee.load(std::memory_order_relaxed)) + mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); + } + else + { + /* Remove ourselves from the list in the waitee. */ + remove_from_list(&loc_waitee->subsequent_commits_list); + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + this->waitee.store(NULL, std::memory_order_relaxed); + } + } + wakeup_error= 0; + mysql_mutex_unlock(&LOCK_wait_commit); +} + + +bool Discrete_intervals_list::append(ulonglong start, ulonglong val, + ulonglong incr) +{ + DBUG_ENTER("Discrete_intervals_list::append"); + /* first, see if this can be merged with previous */ + if ((head == NULL) || tail->merge_if_contiguous(start, val, incr)) + { + /* it cannot, so need to add a new interval */ + Discrete_interval *new_interval= new Discrete_interval(start, val, incr); + DBUG_RETURN(append(new_interval)); + } + DBUG_RETURN(0); +} + +bool Discrete_intervals_list::append(Discrete_interval *new_interval) +{ + DBUG_ENTER("Discrete_intervals_list::append"); + if (unlikely(new_interval == NULL)) + DBUG_RETURN(1); + DBUG_PRINT("info",("adding new auto_increment interval")); + if (head == NULL) + head= current= new_interval; + else + tail->next= new_interval; + tail= new_interval; + elements++; + DBUG_RETURN(0); +} + + +void AUTHID::copy(MEM_ROOT *mem_root, const LEX_CSTRING *user_name, + const LEX_CSTRING *host_name) +{ + user.str= strmake_root(mem_root, user_name->str, user_name->length); + user.length= user_name->length; + + host.str= strmake_root(mem_root, host_name->str, host_name->length); + host.length= host_name->length; +} + + +/* + Set from a string in 'user@host' format. + This method resebmles parse_user(), + but does not need temporary buffers. +*/ +void AUTHID::parse(const char *str, size_t length) +{ + const char *p= strrchr(str, '@'); + if (!p) + { + user.str= str; + user.length= length; + host= null_clex_str; + } + else + { + user.str= str; + user.length= (size_t) (p - str); + host.str= p + 1; + host.length= (size_t) (length - user.length - 1); + if (user.length && !host.length) + host= host_not_specified; // 'user@' -> 'user@%' + } + if (user.length > USERNAME_LENGTH) + user.length= USERNAME_LENGTH; + if (host.length > HOSTNAME_LENGTH) + host.length= HOSTNAME_LENGTH; +} + + +void Database_qualified_name::copy(MEM_ROOT *mem_root, + const LEX_CSTRING &db, + const LEX_CSTRING &name) +{ + m_db.length= db.length; + m_db.str= strmake_root(mem_root, db.str, db.length); + m_name.length= name.length; + m_name.str= strmake_root(mem_root, name.str, name.length); +} + + +bool Table_ident::append_to(THD *thd, String *str) const +{ + return (db.length && + (append_identifier(thd, str, db.str, db.length) || + str->append('.'))) || + append_identifier(thd, str, table.str, table.length); +} + + +bool Qualified_column_ident::append_to(THD *thd, String *str) const +{ + return Table_ident::append_to(thd, str) || str->append('.') || + append_identifier(thd, str, m_column.str, m_column.length); +} + + +#endif /* !defined(MYSQL_CLIENT) */ + + +Query_arena_stmt::Query_arena_stmt(THD *_thd) : + thd(_thd) +{ + arena= thd->activate_stmt_arena_if_needed(&backup); +} + +Query_arena_stmt::~Query_arena_stmt() +{ + if (arena) + thd->restore_active_arena(arena, &backup); +} + + +bool THD::timestamp_to_TIME(MYSQL_TIME *ltime, my_time_t ts, + ulong sec_part, date_mode_t fuzzydate) +{ + used|= TIME_ZONE_USED; + if (ts == 0 && sec_part == 0) + { + if (fuzzydate & TIME_NO_ZERO_DATE) + return 1; + set_zero_time(ltime, MYSQL_TIMESTAMP_DATETIME); + } + else + { + variables.time_zone->gmt_sec_to_TIME(ltime, ts); + ltime->second_part= sec_part; + } + return 0; +} + + +void THD::my_ok_with_recreate_info(const Recreate_info &info, + ulong warn_count) +{ + char buf[80]; + my_snprintf(buf, sizeof(buf), + ER_THD(this, ER_INSERT_INFO), + (ulong) info.records_processed(), + (ulong) info.records_duplicate(), + warn_count); + my_ok(this, info.records_processed(), 0L, buf); +} + + +THD_list_iterator *THD_list_iterator::iterator() +{ + return &server_threads; +} + + +Charset_collation_context +THD::charset_collation_context_alter_db(const char *db) +{ + return Charset_collation_context(variables.collation_server, + get_default_db_collation(this, db)); +} + + +Charset_collation_context +THD::charset_collation_context_create_table_in_db(const char *db) +{ + CHARSET_INFO *cs= get_default_db_collation(this, db); + return Charset_collation_context(cs, cs); +} + + +Charset_collation_context +THD::charset_collation_context_alter_table(const TABLE_SHARE *s) +{ + return Charset_collation_context(get_default_db_collation(this, s->db.str), + s->table_charset); +} + + +void Charset_loader_server::raise_unknown_collation_error(const char *name) const +{ + ErrConvString err(name, &my_charset_utf8mb4_general_ci); + my_error(ER_UNKNOWN_COLLATION, MYF(0), err.ptr()); + if (error[0]) + push_warning_printf(current_thd, + Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_COLLATION, "%s", error); +} + + +void Charset_loader_server::raise_not_applicable_error(const char *cs, + const char *cl) const +{ + my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), cl, cs); +} diff --git a/sql/sql_class.h b/sql/sql_class.h new file mode 100644 index 00000000..beb33d83 --- /dev/null +++ b/sql/sql_class.h @@ -0,0 +1,8144 @@ +/* + Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_CLASS_INCLUDED +#define SQL_CLASS_INCLUDED + +/* Classes in mysql */ + +#include +#include "dur_prop.h" +#include +#include "sql_const.h" +#include +#include "log.h" +#include "rpl_tblmap.h" +#include "mdl.h" +#include "field.h" // Create_field +#include "opt_trace_context.h" +#include "probes_mysql.h" +#include "sql_locale.h" /* my_locale_st */ +#include "sql_profile.h" /* PROFILING */ +#include "scheduler.h" /* thd_scheduler */ +#include "protocol.h" /* Protocol_text, Protocol_binary */ +#include "violite.h" /* vio_is_connected */ +#include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA, THR_LOCK_INFO */ +#include "thr_timer.h" +#include "thr_malloc.h" +#include "log_slow.h" /* LOG_SLOW_DISABLE_... */ +#include +#include "sql_digest_stream.h" // sql_digest_state +#include +#include +#include +#include +#include +#include "session_tracker.h" +#include "backup.h" +#include "xa.h" +#include "ddl_log.h" /* DDL_LOG_STATE */ +#include "ha_handler_stats.h" // ha_handler_stats */ + +extern "C" +void set_thd_stage_info(void *thd, + const PSI_stage_info *new_stage, + PSI_stage_info *old_stage, + const char *calling_func, + const char *calling_file, + const unsigned int calling_line); + +#define THD_STAGE_INFO(thd, stage) \ + (thd)->enter_stage(&stage, __func__, __FILE__, __LINE__) + +#include "my_apc.h" +#include "rpl_gtid.h" + +#include "wsrep.h" +#include "wsrep_on.h" +#ifdef WITH_WSREP +#include +/* wsrep-lib */ +#include "wsrep_client_service.h" +#include "wsrep_client_state.h" +#include "wsrep_mutex.h" +#include "wsrep_condition_variable.h" + +class Wsrep_applier_service; +enum wsrep_consistency_check_mode { + NO_CONSISTENCY_CHECK, + CONSISTENCY_CHECK_DECLARED, + CONSISTENCY_CHECK_RUNNING, +}; +#endif /* WITH_WSREP */ + +class Reprepare_observer; +class Relay_log_info; +struct rpl_group_info; +struct rpl_parallel_thread; +class Rpl_filter; +class Query_log_event; +class Load_log_event; +class Log_event_writer; +class sp_rcontext; +class sp_cache; +class Lex_input_stream; +class Parser_state; +class Rows_log_event; +class Sroutine_hash_entry; +class user_var_entry; +struct Trans_binlog_info; +class rpl_io_thread_info; +class rpl_sql_thread_info; +#ifdef HAVE_REPLICATION +struct Slave_info; +#endif + +enum enum_ha_read_modes { RFIRST, RNEXT, RPREV, RLAST, RKEY, RNEXT_SAME }; +enum enum_duplicates { DUP_ERROR, DUP_REPLACE, DUP_UPDATE }; +enum enum_delay_key_write { DELAY_KEY_WRITE_NONE, DELAY_KEY_WRITE_ON, + DELAY_KEY_WRITE_ALL }; +enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT, + SLAVE_EXEC_MODE_IDEMPOTENT, + SLAVE_EXEC_MODE_LAST_BIT }; +enum enum_slave_run_triggers_for_rbr { SLAVE_RUN_TRIGGERS_FOR_RBR_NO, + SLAVE_RUN_TRIGGERS_FOR_RBR_YES, + SLAVE_RUN_TRIGGERS_FOR_RBR_LOGGING, + SLAVE_RUN_TRIGGERS_FOR_RBR_ENFORCE}; +enum enum_slave_type_conversions { SLAVE_TYPE_CONVERSIONS_ALL_LOSSY, + SLAVE_TYPE_CONVERSIONS_ALL_NON_LOSSY}; + +/* + COLUMNS_READ: A column is goind to be read. + COLUMNS_WRITE: A column is going to be written to. + MARK_COLUMNS_READ: A column is goind to be read. + A bit in read set is set to inform handler that the field + is to be read. If field list contains duplicates, then + thd->dup_field is set to point to the last found + duplicate. + MARK_COLUMNS_WRITE: A column is going to be written to. + A bit is set in write set to inform handler that it needs + to update this field in write_row and update_row. +*/ +enum enum_column_usage +{ COLUMNS_READ, COLUMNS_WRITE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE}; + +static inline bool should_mark_column(enum_column_usage column_usage) +{ return column_usage >= MARK_COLUMNS_READ; } + +enum enum_filetype { FILETYPE_CSV, FILETYPE_XML }; + +enum enum_binlog_row_image { + /** PKE in the before image and changed columns in the after image */ + BINLOG_ROW_IMAGE_MINIMAL= 0, + /** Whenever possible, before and after image contain all columns except blobs. */ + BINLOG_ROW_IMAGE_NOBLOB= 1, + /** All columns in both before and after image. */ + BINLOG_ROW_IMAGE_FULL= 2 +}; + + +/* Bits for different SQL modes modes (including ANSI mode) */ +#define MODE_REAL_AS_FLOAT (1ULL << 0) +#define MODE_PIPES_AS_CONCAT (1ULL << 1) +#define MODE_ANSI_QUOTES (1ULL << 2) +#define MODE_IGNORE_SPACE (1ULL << 3) +#define MODE_IGNORE_BAD_TABLE_OPTIONS (1ULL << 4) +#define MODE_ONLY_FULL_GROUP_BY (1ULL << 5) +#define MODE_NO_UNSIGNED_SUBTRACTION (1ULL << 6) +#define MODE_NO_DIR_IN_CREATE (1ULL << 7) +#define MODE_POSTGRESQL (1ULL << 8) +#define MODE_ORACLE (1ULL << 9) +#define MODE_MSSQL (1ULL << 10) +#define MODE_DB2 (1ULL << 11) +#define MODE_MAXDB (1ULL << 12) +#define MODE_NO_KEY_OPTIONS (1ULL << 13) +#define MODE_NO_TABLE_OPTIONS (1ULL << 14) +#define MODE_NO_FIELD_OPTIONS (1ULL << 15) +#define MODE_MYSQL323 (1ULL << 16) +#define MODE_MYSQL40 (1ULL << 17) +#define MODE_ANSI (1ULL << 18) +#define MODE_NO_AUTO_VALUE_ON_ZERO (1ULL << 19) +#define MODE_NO_BACKSLASH_ESCAPES (1ULL << 20) +#define MODE_STRICT_TRANS_TABLES (1ULL << 21) +#define MODE_STRICT_ALL_TABLES (1ULL << 22) +#define MODE_NO_ZERO_IN_DATE (1ULL << 23) +#define MODE_NO_ZERO_DATE (1ULL << 24) +#define MODE_INVALID_DATES (1ULL << 25) +#define MODE_ERROR_FOR_DIVISION_BY_ZERO (1ULL << 26) +#define MODE_TRADITIONAL (1ULL << 27) +#define MODE_NO_AUTO_CREATE_USER (1ULL << 28) +#define MODE_HIGH_NOT_PRECEDENCE (1ULL << 29) +#define MODE_NO_ENGINE_SUBSTITUTION (1ULL << 30) +#define MODE_PAD_CHAR_TO_FULL_LENGTH (1ULL << 31) +/* SQL mode bits defined above are common for MariaDB and MySQL */ +#define MODE_MASK_MYSQL_COMPATIBLE 0xFFFFFFFFULL +/* The following modes are specific to MariaDB */ +#define MODE_EMPTY_STRING_IS_NULL (1ULL << 32) +#define MODE_SIMULTANEOUS_ASSIGNMENT (1ULL << 33) +#define MODE_TIME_ROUND_FRACTIONAL (1ULL << 34) +/* The following modes are specific to MySQL */ +#define MODE_MYSQL80_TIME_TRUNCATE_FRACTIONAL (1ULL << 32) + + +/* Bits for different old style modes */ +#define OLD_MODE_NO_DUP_KEY_WARNINGS_WITH_IGNORE (1 << 0) +#define OLD_MODE_NO_PROGRESS_INFO (1 << 1) +#define OLD_MODE_ZERO_DATE_TIME_CAST (1 << 2) +#define OLD_MODE_UTF8_IS_UTF8MB3 (1 << 3) +#define OLD_MODE_IGNORE_INDEX_ONLY_FOR_JOIN (1 << 4) +#define OLD_MODE_COMPAT_5_1_CHECKSUM (1 << 5) + +extern char internal_table_name[2]; +extern char empty_c_string[1]; +extern MYSQL_PLUGIN_IMPORT const char **errmesg; + +extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd); +extern "C" unsigned long long thd_query_id(const MYSQL_THD thd); +extern "C" size_t thd_query_safe(MYSQL_THD thd, char *buf, size_t buflen); +extern "C" const char *thd_priv_user(MYSQL_THD thd, size_t *length); +extern "C" const char *thd_priv_host(MYSQL_THD thd, size_t *length); +extern "C" const char *thd_user_name(MYSQL_THD thd); +extern "C" const char *thd_client_host(MYSQL_THD thd); +extern "C" const char *thd_client_ip(MYSQL_THD thd); +extern "C" LEX_CSTRING *thd_current_db(MYSQL_THD thd); +extern "C" int thd_current_status(MYSQL_THD thd); +extern "C" enum enum_server_command thd_current_command(MYSQL_THD thd); + +/** + @class CSET_STRING + @brief Character set armed LEX_STRING +*/ +class CSET_STRING +{ +private: + LEX_STRING string; + CHARSET_INFO *cs; +public: + CSET_STRING() : cs(&my_charset_bin) + { + string.str= NULL; + string.length= 0; + } + CSET_STRING(char *str_arg, size_t length_arg, CHARSET_INFO *cs_arg) : + cs(cs_arg) + { + DBUG_ASSERT(cs_arg != NULL); + string.str= str_arg; + string.length= length_arg; + } + + inline char *str() const { return string.str; } + inline size_t length() const { return string.length; } + CHARSET_INFO *charset() const { return cs; } + + friend LEX_STRING * thd_query_string (MYSQL_THD thd); +}; + + +class Recreate_info +{ + ha_rows m_records_copied; + ha_rows m_records_duplicate; +public: + Recreate_info() + :m_records_copied(0), + m_records_duplicate(0) + { } + Recreate_info(ha_rows records_copied, + ha_rows records_duplicate) + :m_records_copied(records_copied), + m_records_duplicate(records_duplicate) + { } + ha_rows records_copied() const { return m_records_copied; } + ha_rows records_duplicate() const { return m_records_duplicate; } + ha_rows records_processed() const + { + return m_records_copied + m_records_duplicate; + } +}; + + +#define TC_HEURISTIC_RECOVER_COMMIT 1 +#define TC_HEURISTIC_RECOVER_ROLLBACK 2 +extern ulong tc_heuristic_recover; + +typedef struct st_user_var_events +{ + user_var_entry *user_var_event; + char *value; + size_t length; + Item_result type; + uint charset_number; + bool unsigned_flag; +} BINLOG_USER_VAR_EVENT; + +/* + The COPY_INFO structure is used by INSERT/REPLACE code. + The schema of the row counting by the INSERT/INSERT ... ON DUPLICATE KEY + UPDATE code: + If a row is inserted then the copied variable is incremented. + If a row is updated by the INSERT ... ON DUPLICATE KEY UPDATE and the + new data differs from the old one then the copied and the updated + variables are incremented. + The touched variable is incremented if a row was touched by the update part + of the INSERT ... ON DUPLICATE KEY UPDATE no matter whether the row + was actually changed or not. +*/ +typedef struct st_copy_info { + ha_rows records; /**< Number of processed records */ + ha_rows deleted; /**< Number of deleted records */ + ha_rows updated; /**< Number of updated records */ + ha_rows copied; /**< Number of copied records */ + ha_rows accepted_rows; /**< Number of accepted original rows + (same as number of rows in RETURNING) */ + ha_rows error_count; + ha_rows touched; /* Number of touched records */ + enum enum_duplicates handle_duplicates; + int escape_char, last_errno; + bool ignore; + /* for INSERT ... UPDATE */ + List *update_fields; + List *update_values; + /* for VIEW ... WITH CHECK OPTION */ + TABLE_LIST *view; + TABLE_LIST *table_list; /* Normal table */ +} COPY_INFO; + + +class Key_part_spec :public Sql_alloc { +public: + Lex_ident field_name; + uint length; + bool generated, asc; + Key_part_spec(const LEX_CSTRING *name, uint len, bool gen= false) + : field_name(*name), length(len), generated(gen), asc(1) + {} + bool operator==(const Key_part_spec& other) const; + /** + Construct a copy of this Key_part_spec. field_name is copied + by-pointer as it is known to never change. At the same time + 'length' may be reset in mysql_prepare_create_table, and this + is why we supply it with a copy. + + @return If out of memory, 0 is returned and an error is set in + THD. + */ + Key_part_spec *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Key_part_spec(*this); } + bool check_key_for_blob(const class handler *file) const; + bool check_key_length_for_blob() const; + bool check_primary_key_for_blob(const class handler *file) const + { + return check_key_for_blob(file) || check_key_length_for_blob(); + } + bool check_foreign_key_for_blob(const class handler *file) const + { + return check_key_for_blob(file) || check_key_length_for_blob(); + } + bool init_multiple_key_for_blob(const class handler *file); +}; + + +class Alter_drop :public Sql_alloc { +public: + enum drop_type { KEY, COLUMN, FOREIGN_KEY, CHECK_CONSTRAINT, PERIOD }; + const char *name; + enum drop_type type; + bool drop_if_exists; + Alter_drop(enum drop_type par_type,const char *par_name, bool par_exists) + :name(par_name), type(par_type), drop_if_exists(par_exists) + { + DBUG_ASSERT(par_name != NULL); + } + /** + Used to make a clone of this object for ALTER/CREATE TABLE + @sa comment for Key_part_spec::clone + */ + Alter_drop *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Alter_drop(*this); } + const char *type_name() + { + return type == COLUMN ? "COLUMN" : + type == CHECK_CONSTRAINT ? "CONSTRAINT" : + type == PERIOD ? "PERIOD" : + type == KEY ? "INDEX" : "FOREIGN KEY"; + } +}; + + +class Alter_column :public Sql_alloc { +public: + LEX_CSTRING name; + LEX_CSTRING new_name; + Virtual_column_info *default_value; + bool alter_if_exists; + Alter_column(LEX_CSTRING par_name, Virtual_column_info *expr, bool par_exists) + :name(par_name), new_name{NULL, 0}, default_value(expr), alter_if_exists(par_exists) {} + Alter_column(LEX_CSTRING par_name, LEX_CSTRING _new_name, bool exists) + :name(par_name), new_name(_new_name), default_value(NULL), alter_if_exists(exists) {} + /** + Used to make a clone of this object for ALTER/CREATE TABLE + @sa comment for Key_part_spec::clone + */ + Alter_column *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Alter_column(*this); } + bool is_rename() + { + DBUG_ASSERT(!new_name.str || !default_value); + return new_name.str; + } +}; + + +class Alter_rename_key : public Sql_alloc +{ +public: + LEX_CSTRING old_name; + LEX_CSTRING new_name; + bool alter_if_exists; + + Alter_rename_key(LEX_CSTRING old_name_arg, LEX_CSTRING new_name_arg, bool exists) + : old_name(old_name_arg), new_name(new_name_arg), alter_if_exists(exists) {} + + Alter_rename_key *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Alter_rename_key(*this); } + +}; + + +/* An ALTER INDEX operation that changes the ignorability of an index. */ +class Alter_index_ignorability: public Sql_alloc +{ +public: + Alter_index_ignorability(const char *name, bool is_ignored, bool if_exists) : + m_name(name), m_is_ignored(is_ignored), m_if_exists(if_exists) + { + assert(name != NULL); + } + + const char *name() const { return m_name; } + bool if_exists() const { return m_if_exists; } + + /* The ignorability after the operation is performed. */ + bool is_ignored() const { return m_is_ignored; } + Alter_index_ignorability *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Alter_index_ignorability(*this); } + +private: + const char *m_name; + bool m_is_ignored; + bool m_if_exists; +}; + + +class Key :public Sql_alloc, public DDL_options { +public: + enum Keytype { PRIMARY, UNIQUE, MULTIPLE, FULLTEXT, SPATIAL, FOREIGN_KEY, + IGNORE_KEY}; + enum Keytype type; + KEY_CREATE_INFO key_create_info; + List columns; + LEX_CSTRING name; + engine_option_value *option_list; + bool generated; + bool invisible; + bool without_overlaps; + bool old; + Lex_ident period; + + Key(enum Keytype type_par, const LEX_CSTRING *name_arg, + ha_key_alg algorithm_arg, bool generated_arg, DDL_options_st ddl_options) + :DDL_options(ddl_options), + type(type_par), key_create_info(default_key_create_info), + name(*name_arg), option_list(NULL), generated(generated_arg), + invisible(false), without_overlaps(false), old(false) + { + key_create_info.algorithm= algorithm_arg; + } + Key(enum Keytype type_par, const LEX_CSTRING *name_arg, + KEY_CREATE_INFO *key_info_arg, + bool generated_arg, List *cols, + engine_option_value *create_opt, DDL_options_st ddl_options) + :DDL_options(ddl_options), + type(type_par), key_create_info(*key_info_arg), columns(*cols), + name(*name_arg), option_list(create_opt), generated(generated_arg), + invisible(false), without_overlaps(false), old(false) + {} + Key(const Key &rhs, MEM_ROOT *mem_root); + virtual ~Key() = default; + /* Equality comparison of keys (ignoring name) */ + friend bool is_foreign_key_prefix(Key *a, Key *b); + /** + Used to make a clone of this object for ALTER/CREATE TABLE + @sa comment for Key_part_spec::clone + */ + virtual Key *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Key(*this, mem_root); } +}; + + +class Foreign_key: public Key { +public: + enum fk_match_opt { FK_MATCH_UNDEF, FK_MATCH_FULL, + FK_MATCH_PARTIAL, FK_MATCH_SIMPLE}; + LEX_CSTRING constraint_name; + LEX_CSTRING ref_db; + LEX_CSTRING ref_table; + List ref_columns; + enum enum_fk_option delete_opt, update_opt; + enum fk_match_opt match_opt; + Foreign_key(const LEX_CSTRING *name_arg, List *cols, + const LEX_CSTRING *constraint_name_arg, + const LEX_CSTRING *ref_db_arg, const LEX_CSTRING *ref_table_arg, + List *ref_cols, + enum_fk_option delete_opt_arg, enum_fk_option update_opt_arg, + fk_match_opt match_opt_arg, + DDL_options ddl_options) + :Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols, NULL, + ddl_options), + constraint_name(*constraint_name_arg), + ref_db(*ref_db_arg), ref_table(*ref_table_arg), ref_columns(*ref_cols), + delete_opt(delete_opt_arg), update_opt(update_opt_arg), + match_opt(match_opt_arg) + { + } + Foreign_key(const Foreign_key &rhs, MEM_ROOT *mem_root); + /** + Used to make a clone of this object for ALTER/CREATE TABLE + @sa comment for Key_part_spec::clone + */ + virtual Key *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Foreign_key(*this, mem_root); } + /* Used to validate foreign key options */ + bool validate(List &table_fields); +}; + +typedef struct st_mysql_lock +{ + TABLE **table; + THR_LOCK_DATA **locks; + uint table_count,lock_count; + uint flags; +} MYSQL_LOCK; + + +class LEX_COLUMN : public Sql_alloc +{ +public: + String column; + privilege_t rights; + LEX_COLUMN (const String& x,const privilege_t & y ): column (x),rights (y) {} +}; + +class MY_LOCALE; + +/** + Query_cache_tls -- query cache thread local data. +*/ + +struct Query_cache_block; + +struct Query_cache_tls +{ + /* + 'first_query_block' should be accessed only via query cache + functions and methods to maintain proper locking. + */ + Query_cache_block *first_query_block; + void set_first_query_block(Query_cache_block *first_query_block_arg) + { + first_query_block= first_query_block_arg; + } + + Query_cache_tls() :first_query_block(NULL) {} +}; + +/* SIGNAL / RESIGNAL / GET DIAGNOSTICS */ + +/** + This enumeration list all the condition item names of a condition in the + SQL condition area. +*/ +typedef enum enum_diag_condition_item_name +{ + /* + Conditions that can be set by the user (SIGNAL/RESIGNAL), + and by the server implementation. + */ + + DIAG_CLASS_ORIGIN= 0, + FIRST_DIAG_SET_PROPERTY= DIAG_CLASS_ORIGIN, + DIAG_SUBCLASS_ORIGIN= 1, + DIAG_CONSTRAINT_CATALOG= 2, + DIAG_CONSTRAINT_SCHEMA= 3, + DIAG_CONSTRAINT_NAME= 4, + DIAG_CATALOG_NAME= 5, + DIAG_SCHEMA_NAME= 6, + DIAG_TABLE_NAME= 7, + DIAG_COLUMN_NAME= 8, + DIAG_CURSOR_NAME= 9, + DIAG_MESSAGE_TEXT= 10, + DIAG_MYSQL_ERRNO= 11, + DIAG_ROW_NUMBER= 12, + LAST_DIAG_SET_PROPERTY= DIAG_ROW_NUMBER +} Diag_condition_item_name; + +/** + Name of each diagnostic condition item. + This array is indexed by Diag_condition_item_name. +*/ +extern const LEX_CSTRING Diag_condition_item_names[]; + +/** + These states are bit coded with HARD. For each state there must be a pair + , and _HARD. +*/ +enum killed_state +{ + NOT_KILLED= 0, + KILL_HARD_BIT= 1, /* Bit for HARD KILL */ + KILL_BAD_DATA= 2, + KILL_BAD_DATA_HARD= 3, + KILL_QUERY= 4, + KILL_QUERY_HARD= 5, + /* + ABORT_QUERY signals to the query processor to stop execution ASAP without + issuing an error. Instead a warning is issued, and when possible a partial + query result is returned to the client. + */ + ABORT_QUERY= 6, + ABORT_QUERY_HARD= 7, + KILL_TIMEOUT= 8, + KILL_TIMEOUT_HARD= 9, + /* + When binlog reading thread connects to the server it kills + all the binlog threads with the same ID. + */ + KILL_SLAVE_SAME_ID= 10, + /* + All of the following killed states will kill the connection + KILL_CONNECTION must be the first of these and it must start with + an even number (becasue of HARD bit)! + */ + KILL_CONNECTION= 12, + KILL_CONNECTION_HARD= 13, + KILL_SYSTEM_THREAD= 14, + KILL_SYSTEM_THREAD_HARD= 15, + KILL_SERVER= 16, + KILL_SERVER_HARD= 17, + /* + Used in threadpool to signal wait timeout. + */ + KILL_WAIT_TIMEOUT= 18, + KILL_WAIT_TIMEOUT_HARD= 19 + +}; + +#define killed_mask_hard(killed) ((killed_state) ((killed) & ~KILL_HARD_BIT)) + +enum killed_type +{ + KILL_TYPE_ID, + KILL_TYPE_USER, + KILL_TYPE_QUERY +}; + +#include "sql_lex.h" /* Must be here */ + +class Delayed_insert; +class select_result; +class Time_zone; + +#define THD_SENTRY_MAGIC 0xfeedd1ff +#define THD_SENTRY_GONE 0xdeadbeef + +#define THD_CHECK_SENTRY(thd) DBUG_ASSERT(thd->dbug_sentry == THD_SENTRY_MAGIC) + +typedef struct system_variables +{ + /* + How dynamically allocated system variables are handled: + + The global_system_variables and max_system_variables are "authoritative" + They both should have the same 'version' and 'size'. + When attempting to access a dynamic variable, if the session version + is out of date, then the session version is updated and realloced if + neccessary and bytes copied from global to make up for missing data. + + Note that one should use my_bool instead of bool here, as the variables + are used with my_getopt.c + */ + ulong dynamic_variables_version; + char* dynamic_variables_ptr; + uint dynamic_variables_head; /* largest valid variable offset */ + uint dynamic_variables_size; /* how many bytes are in use */ + + ulonglong max_heap_table_size; + ulonglong tmp_memory_table_size; + ulonglong tmp_disk_table_size; + ulonglong long_query_time; + ulonglong max_statement_time; + ulonglong optimizer_switch; + ulonglong optimizer_trace; + ulong optimizer_trace_max_mem_size; + sql_mode_t sql_mode; ///< which non-standard SQL behaviour should be enabled + sql_mode_t old_behavior; ///< which old SQL behaviour should be enabled + ulonglong option_bits; ///< OPTION_xxx constants, e.g. OPTION_PROFILING + ulonglong join_buff_space_limit; + ulonglong log_slow_filter; + ulonglong log_slow_verbosity; + ulonglong log_slow_disabled_statements; + ulonglong log_disabled_statements; + ulonglong note_verbosity; + ulonglong bulk_insert_buff_size; + ulonglong join_buff_size; + ulonglong sortbuff_size; + ulonglong default_regex_flags; + ulonglong max_mem_used; + + /** + Place holders to store Multi-source variables in sys_var.cc during + update and show of variables. + */ + ulonglong slave_skip_counter; + ulonglong max_relay_log_size; + + ha_rows select_limit; + ha_rows max_join_size; + ha_rows expensive_subquery_limit; + ulong auto_increment_increment, auto_increment_offset; +#ifdef WITH_WSREP + /* + Stored values of the auto_increment_increment and auto_increment_offset + that are will be restored when wsrep_auto_increment_control will be set + to 'OFF', because the setting it to 'ON' leads to overwriting of the + original values (which are set by the user) by calculated ones (which + are based on the cluster size): + */ + ulong saved_auto_increment_increment, saved_auto_increment_offset; + ulong saved_lock_wait_timeout; + ulonglong wsrep_gtid_seq_no; +#endif /* WITH_WSREP */ + uint eq_range_index_dive_limit; + ulong column_compression_zlib_strategy; + ulong lock_wait_timeout; + ulong join_cache_level; + ulong max_allowed_packet; + ulong max_error_count; + ulong max_length_for_sort_data; + ulong max_recursive_iterations; + ulong max_sort_length; + ulong max_tmp_tables; + ulong max_insert_delayed_threads; + ulong min_examined_row_limit; + ulong net_buffer_length; + ulong net_interactive_timeout; + ulong net_read_timeout; + ulong net_retry_count; + ulong net_wait_timeout; + ulong net_write_timeout; + ulong optimizer_extra_pruning_depth; + ulong optimizer_prune_level; + ulong optimizer_search_depth; + ulong optimizer_selectivity_sampling_limit; + ulong optimizer_use_condition_selectivity; + ulong optimizer_max_sel_arg_weight; + ulong optimizer_max_sel_args; + ulong use_stat_tables; + double sample_percentage; + ulong histogram_size; + ulong histogram_type; + ulong preload_buff_size; + ulong profiling_history_size; + ulong read_buff_size; + ulong read_rnd_buff_size; + ulong mrr_buff_size; + ulong div_precincrement; + /* Total size of all buffers used by the subselect_rowid_merge_engine. */ + ulong rowid_merge_buff_size; + ulong max_sp_recursion_depth; + ulong default_week_format; + ulong max_seeks_for_key; + ulong range_alloc_block_size; + ulong query_alloc_block_size; + ulong query_prealloc_size; + ulong trans_alloc_block_size; + ulong trans_prealloc_size; + ulong log_warnings; + ulong log_slow_max_warnings; + /* Flags for slow log filtering */ + ulong log_slow_rate_limit; + ulong binlog_format; ///< binlog format for this thd (see enum_binlog_format) + ulong binlog_row_image; + ulong progress_report_time; + ulong completion_type; + ulong query_cache_type; + ulong tx_isolation; + ulong updatable_views_with_limit; + ulong alter_algorithm; + int max_user_connections; + ulong server_id; + /** + In slave thread we need to know in behalf of which + thread the query is being run to replicate temp tables properly + */ + my_thread_id pseudo_thread_id; + /** + When replicating an event group with GTID, keep these values around so + slave binlog can receive the same GTID as the original. + */ + uint32 gtid_domain_id; + uint64 gtid_seq_no; + + uint group_concat_max_len; + + /** + Default transaction access mode. READ ONLY (true) or READ WRITE (false). + */ + my_bool tx_read_only; + my_bool low_priority_updates; + my_bool query_cache_wlock_invalidate; + my_bool keep_files_on_create; + + my_bool old_mode; + my_bool old_passwords; + my_bool big_tables; + my_bool only_standard_compliant_cte; + my_bool query_cache_strip_comments; + my_bool sql_log_slow; + my_bool sql_log_bin; + my_bool binlog_annotate_row_events; + my_bool binlog_direct_non_trans_update; + my_bool column_compression_zlib_wrap; + + plugin_ref table_plugin; + plugin_ref tmp_table_plugin; + plugin_ref enforced_table_plugin; + + /* Only charset part of these variables is sensible */ + CHARSET_INFO *character_set_filesystem; + CHARSET_INFO *character_set_client; + CHARSET_INFO *character_set_results; + + /* Both charset and collation parts of these variables are important */ + CHARSET_INFO *collation_server; + CHARSET_INFO *collation_database; + CHARSET_INFO *collation_connection; + + /* Names. These will be allocated in buffers in thd */ + LEX_CSTRING default_master_connection; + + /* Error messages */ + MY_LOCALE *lc_messages; + const char ***errmsgs; /* lc_messages->errmsg->errmsgs */ + + /* Locale Support */ + MY_LOCALE *lc_time_names; + + Time_zone *time_zone; + + my_bool sysdate_is_now; + + /* deadlock detection */ + ulong wt_timeout_short, wt_deadlock_search_depth_short; + ulong wt_timeout_long, wt_deadlock_search_depth_long; + + my_bool wsrep_on; + my_bool wsrep_causal_reads; + uint wsrep_sync_wait; + ulong wsrep_retry_autocommit; + ulonglong wsrep_trx_fragment_size; + ulong wsrep_trx_fragment_unit; + ulong wsrep_OSU_method; + my_bool wsrep_dirty_reads; + double long_query_time_double, max_statement_time_double; + + my_bool pseudo_slave_mode; + + char *session_track_system_variables; + ulong session_track_transaction_info; + my_bool session_track_schema; + my_bool session_track_state_change; +#ifdef USER_VAR_TRACKING + my_bool session_track_user_variables; +#endif // USER_VAR_TRACKING + my_bool tcp_nodelay; + + ulong threadpool_priority; + + uint idle_transaction_timeout; + uint idle_readonly_transaction_timeout; + uint idle_write_transaction_timeout; + uint column_compression_threshold; + uint column_compression_zlib_level; + uint in_subquery_conversion_threshold; + + ulonglong max_rowid_filter_size; + + vers_asof_timestamp_t vers_asof_timestamp; + ulong vers_alter_history; + my_bool binlog_alter_two_phase; +} SV; + +/** + Per thread status variables. + Must be long/ulong up to last_system_status_var so that + add_to_status/add_diff_to_status can work. +*/ + +typedef struct system_status_var +{ + ulong column_compressions; + ulong column_decompressions; + ulong com_stat[(uint) SQLCOM_END]; + ulong com_create_tmp_table; + ulong com_drop_tmp_table; + ulong com_other; + + ulong com_stmt_prepare; + ulong com_stmt_reprepare; + ulong com_stmt_execute; + ulong com_stmt_send_long_data; + ulong com_stmt_fetch; + ulong com_stmt_reset; + ulong com_stmt_close; + + ulong com_register_slave; + ulong created_tmp_disk_tables_; + ulong created_tmp_tables_; + ulong ha_commit_count; + ulong ha_delete_count; + ulong ha_read_first_count; + ulong ha_read_last_count; + ulong ha_read_key_count; + ulong ha_read_next_count; + ulong ha_read_prev_count; + ulong ha_read_retry_count; + ulong ha_read_rnd_count; + ulong ha_read_rnd_next_count; + ulong ha_read_rnd_deleted_count; + + /* + This number doesn't include calls to the default implementation and + calls made by range access. The intent is to count only calls made by + BatchedKeyAccess. + */ + ulong ha_mrr_init_count; + ulong ha_mrr_key_refills_count; + ulong ha_mrr_rowid_refills_count; + + ulong ha_rollback_count; + ulong ha_update_count; + ulong ha_write_count; + /* The following are for internal temporary tables */ + ulong ha_tmp_update_count; + ulong ha_tmp_write_count; + ulong ha_tmp_delete_count; + ulong ha_prepare_count; + ulong ha_icp_attempts; + ulong ha_icp_match; + ulong ha_discover_count; + ulong ha_savepoint_count; + ulong ha_savepoint_rollback_count; + ulong ha_external_lock_count; + + ulong opened_tables; + ulong opened_shares; + ulong opened_views; /* +1 opening a view */ + + ulong select_full_join_count_; + ulong select_full_range_join_count_; + ulong select_range_count_; + ulong select_range_check_count_; + ulong select_scan_count_; + ulong update_scan_count; + ulong delete_scan_count; + ulong executed_triggers; + ulong long_query_count; + ulong filesort_merge_passes_; + ulong filesort_range_count_; + ulong filesort_rows_; + ulong filesort_scan_count_; + ulong filesort_pq_sorts_; + ulong optimizer_join_prefixes_check_calls; + + /* Features used */ + ulong feature_custom_aggregate_functions; /* +1 when custom aggregate + functions are used */ + ulong feature_dynamic_columns; /* +1 when creating a dynamic column */ + ulong feature_fulltext; /* +1 when MATCH is used */ + ulong feature_gis; /* +1 opening a table with GIS features */ + ulong feature_invisible_columns; /* +1 opening a table with invisible column */ + ulong feature_json; /* +1 when JSON function appears in the statement */ + ulong feature_locale; /* +1 when LOCALE is set */ + ulong feature_subquery; /* +1 when subqueries are used */ + ulong feature_system_versioning; /* +1 opening a table WITH SYSTEM VERSIONING */ + ulong feature_application_time_periods; + /* +1 opening a table with application-time period */ + ulong feature_insert_returning; /* +1 when INSERT...RETURNING is used */ + ulong feature_timezone; /* +1 when XPATH is used */ + ulong feature_trigger; /* +1 opening a table with triggers */ + ulong feature_xml; /* +1 when XPATH is used */ + ulong feature_window_functions; /* +1 when window functions are used */ + + /* From MASTER_GTID_WAIT usage */ + ulong master_gtid_wait_timeouts; /* Number of timeouts */ + ulong master_gtid_wait_time; /* Time in microseconds */ + ulong master_gtid_wait_count; + + ulong empty_queries; + ulong access_denied_errors; + ulong lost_connections; + ulong max_statement_time_exceeded; + /* + Number of times where column info was not + sent with prepared statement metadata. + */ + ulong skip_metadata_count; + + /* + Number of statements sent from the client + */ + ulong questions; + /* + IMPORTANT! + SEE last_system_status_var DEFINITION BELOW. + Below 'last_system_status_var' are all variables that cannot be handled + automatically by add_to_status()/add_diff_to_status(). + */ + ulonglong bytes_received; + ulonglong bytes_sent; + ulonglong rows_read; + ulonglong rows_sent; + ulonglong rows_tmp_read; + ulonglong binlog_bytes_written; + ulonglong table_open_cache_hits; + ulonglong table_open_cache_misses; + ulonglong table_open_cache_overflows; + ulonglong send_metadata_skips; + double last_query_cost; + double cpu_time, busy_time; + uint32 threads_running; + /* Don't initialize */ + /* Memory used for thread local storage */ + int64 max_local_memory_used; + volatile int64 local_memory_used; + /* Memory allocated for global usage */ + volatile int64 global_memory_used; +} STATUS_VAR; + +/* + This is used for 'SHOW STATUS'. It must be updated to the last ulong + variable in system_status_var which is makes sense to add to the global + counter +*/ + +#define last_system_status_var questions +#define last_cleared_system_status_var local_memory_used + +/** Number of contiguous global status variables */ +constexpr int COUNT_GLOBAL_STATUS_VARS= int(offsetof(STATUS_VAR, + last_system_status_var) / + sizeof(ulong)) + 1; + +/* + Global status variables +*/ + +extern ulong feature_files_opened_with_delayed_keys, feature_check_constraint; + +void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var); + +void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var, + STATUS_VAR *dec_var); + +uint calc_sum_of_all_status(STATUS_VAR *to); +static inline void calc_sum_of_all_status_if_needed(STATUS_VAR *to) +{ + if (to->local_memory_used == 0) + { + mysql_mutex_lock(&LOCK_status); + *to= global_status_var; + mysql_mutex_unlock(&LOCK_status); + calc_sum_of_all_status(to); + DBUG_ASSERT(to->local_memory_used); + } +} + +/* + Update global_memory_used. We have to do this with atomic_add as the + global value can change outside of LOCK_status. +*/ +static inline void update_global_memory_status(int64 size) +{ + DBUG_PRINT("info", ("global memory_used: %lld size: %lld", + (longlong) global_status_var.global_memory_used, + size)); + // workaround for gcc 4.2.4-1ubuntu4 -fPIE (from DEB_BUILD_HARDENING=1) + int64 volatile * volatile ptr= &global_status_var.global_memory_used; + my_atomic_add64_explicit(ptr, size, MY_MEMORY_ORDER_RELAXED); +} + + +static inline bool is_supported_parser_charset(CHARSET_INFO *cs) +{ + return MY_TEST(cs->mbminlen == 1 && cs->number != 17 /* filename */); +} + +/** THD registry */ +class THD_list_iterator +{ +protected: + I_List threads; + mutable mysql_rwlock_t lock; + +public: + + /** + Iterates registered threads. + + @param action called for every element + @param argument opque argument passed to action + + @return + @retval 0 iteration completed successfully + @retval 1 iteration was interrupted (action returned 1) + */ + template int iterate(my_bool (*action)(THD *thd, T *arg), T *arg= 0) + { + int res= 0; + mysql_rwlock_rdlock(&lock); + I_List_iterator it(threads); + while (auto tmp= it++) + if ((res= action(tmp, arg))) + break; + mysql_rwlock_unlock(&lock); + return res; + } + static THD_list_iterator *iterator(); +}; + +/** + A counter of THDs + + It must be specified as a first base class of THD, so that increment is + done before any other THD constructors and decrement - after any other THD + destructors. + + Destructor unblocks close_conneciton() if there are no more THD's left. +*/ +struct THD_count +{ + static Atomic_counter count; + static uint value() { return static_cast(count); } + static uint connection_thd_count(); + THD_count() { count++; } + ~THD_count() { count--; } +}; + +#ifdef MYSQL_SERVER + +void free_tmp_table(THD *thd, TABLE *entry); + + +/* The following macro is to make init of Query_arena simpler */ +#ifdef DBUG_ASSERT_EXISTS +#define INIT_ARENA_DBUG_INFO is_backup_arena= 0; is_reprepared= FALSE; +#else +#define INIT_ARENA_DBUG_INFO +#endif + +class Query_arena +{ +public: + /* + List of items created in the parser for this query. Every item puts + itself to the list on creation (see Item::Item() for details)) + */ + Item *free_list; + MEM_ROOT *mem_root; // Pointer to current memroot +#ifdef DBUG_ASSERT_EXISTS + bool is_backup_arena; /* True if this arena is used for backup. */ + bool is_reprepared; +#endif + /* + The states relfects three diffrent life cycles for three + different types of statements: + Prepared statement: STMT_INITIALIZED -> STMT_PREPARED -> STMT_EXECUTED. + Stored procedure: STMT_INITIALIZED_FOR_SP -> STMT_EXECUTED. + Other statements: STMT_CONVENTIONAL_EXECUTION never changes. + + Special case for stored procedure arguments: STMT_SP_QUERY_ARGUMENTS + This state never changes and used for objects + whose lifetime is whole duration of function call + (sp_rcontext, it's tables and items. etc). Such objects + should be deallocated after every execution of a stored + routine. Caller's arena/memroot can't be used for + placing such objects since memory allocated on caller's + arena not freed until termination of user's session. + */ + enum enum_state + { + STMT_INITIALIZED= 0, STMT_INITIALIZED_FOR_SP= 1, STMT_PREPARED= 2, + STMT_CONVENTIONAL_EXECUTION= 3, STMT_EXECUTED= 4, + STMT_SP_QUERY_ARGUMENTS= 5, STMT_ERROR= -1 + }; + + enum_state state; + +public: + /* We build without RTTI, so dynamic_cast can't be used. */ + enum Type + { + STATEMENT, PREPARED_STATEMENT, STORED_PROCEDURE + }; + + Query_arena(MEM_ROOT *mem_root_arg, enum enum_state state_arg) : + free_list(0), mem_root(mem_root_arg), state(state_arg) + { INIT_ARENA_DBUG_INFO; } + /* + This constructor is used only when Query_arena is created as + backup storage for another instance of Query_arena. + */ + Query_arena() { INIT_ARENA_DBUG_INFO; } + + virtual Type type() const; + virtual ~Query_arena() = default; + + inline bool is_stmt_prepare() const { return state == STMT_INITIALIZED; } + inline bool is_stmt_prepare_or_first_sp_execute() const + { return (int)state < (int)STMT_PREPARED; } + inline bool is_stmt_prepare_or_first_stmt_execute() const + { return (int)state <= (int)STMT_PREPARED; } + inline bool is_stmt_execute() const + { return state == STMT_PREPARED || state == STMT_EXECUTED; } + inline bool is_conventional() const + { return state == STMT_CONVENTIONAL_EXECUTION; } + + inline void* alloc(size_t size) { return alloc_root(mem_root,size); } + inline void* calloc(size_t size) + { + void *ptr; + if (likely((ptr=alloc_root(mem_root,size)))) + bzero(ptr, size); + return ptr; + } + inline char *strdup(const char *str) + { return strdup_root(mem_root,str); } + inline char *strmake(const char *str, size_t size) + { return strmake_root(mem_root,str,size); } + inline void *memdup(const void *str, size_t size) + { return memdup_root(mem_root,str,size); } + inline void *memdup_w_gap(const void *str, size_t size, size_t gap) + { + void *ptr; + if (likely((ptr= alloc_root(mem_root,size+gap)))) + memcpy(ptr,str,size); + return ptr; + } + + void set_query_arena(Query_arena *set); + + void free_items(); + /* Close the active state associated with execution of this statement */ + virtual bool cleanup_stmt(bool /*restore_set_statement_vars*/); +}; + + +class Query_arena_memroot: public Query_arena, public Sql_alloc +{ +public: + Query_arena_memroot(MEM_ROOT *mem_root_arg, enum enum_state state_arg) : + Query_arena(mem_root_arg, state_arg) + {} + Query_arena_memroot() : Query_arena() + {} + + virtual ~Query_arena_memroot() = default; +}; + + +class Query_arena_stmt +{ + THD *thd; + Query_arena backup; + Query_arena *arena; + +public: + Query_arena_stmt(THD *_thd); + ~Query_arena_stmt(); + bool arena_replaced() + { + return arena != NULL; + } +}; + + +class Server_side_cursor; + +/* + Struct to catch changes in column metadata that is sent to client. + in the "result set metadata". Used to support + MARIADB_CLIENT_CACHE_METADATA. +*/ +struct send_column_info_state +{ + /* Last client charset (affects metadata) */ + CHARSET_INFO *last_charset= nullptr; + + /* Checksum, only used to check changes if 'immutable' is false*/ + uint32 checksum= 0; + + /* + Column info can only be changed by PreparedStatement::reprepare() + + There is a class of "weird" prepared statements like SELECT ? or SELECT @a + that are not immutable, and depend on input parameters or user variables + */ + bool immutable= false; + + bool initialized= false; + + /* Used by PreparedStatement::reprepare()*/ + void reset() + { + initialized= false; + checksum= 0; + } +}; + + +/** + @class Statement + @brief State of a single command executed against this connection. + + One connection can contain a lot of simultaneously running statements, + some of which could be: + - prepared, that is, contain placeholders, + - opened as cursors. We maintain 1 to 1 relationship between + statement and cursor - if user wants to create another cursor for his + query, we create another statement for it. + To perform some action with statement we reset THD part to the state of + that statement, do the action, and then save back modified state from THD + to the statement. It will be changed in near future, and Statement will + be used explicitly. +*/ + +class Statement: public ilink, public Query_arena +{ + Statement(const Statement &rhs); /* not implemented: */ + Statement &operator=(const Statement &rhs); /* non-copyable */ +public: + /* + Uniquely identifies each statement object in thread scope; change during + statement lifetime. FIXME: must be const + */ + ulong id; + + enum enum_column_usage column_usage; + + LEX_CSTRING name; /* name for named prepared statements */ + LEX *lex; // parse tree descriptor + my_hrtime_t hr_prepare_time; // time of preparation in microseconds + /* + Points to the query associated with this statement. It's const, but + we need to declare it char * because all table handlers are written + in C and need to point to it. + + Note that if we set query = NULL, we must at the same time set + query_length = 0, and protect the whole operation with + LOCK_thd_data mutex. To avoid crashes in races, if we do not + know that thd->query cannot change at the moment, we should print + thd->query like this: + (1) reserve the LOCK_thd_data mutex; + (2) print or copy the value of query and query_length + (3) release LOCK_thd_data mutex. + This printing is needed at least in SHOW PROCESSLIST and SHOW + ENGINE INNODB STATUS. + */ + CSET_STRING query_string; + /* + If opt_query_cache_strip_comments is set, this contains query without + comments. If not set, it contains pointer to query_string. + */ + String base_query; + + + inline char *query() const { return query_string.str(); } + inline uint32 query_length() const + { + return static_cast(query_string.length()); + } + inline char *query_end() const + { + return query_string.str() + query_string.length(); + } + CHARSET_INFO *query_charset() const { return query_string.charset(); } + void set_query_inner(const CSET_STRING &string_arg) + { + query_string= string_arg; + } + void set_query_inner(char *query_arg, uint32 query_length_arg, + CHARSET_INFO *cs_arg) + { + set_query_inner(CSET_STRING(query_arg, query_length_arg, cs_arg)); + } + void reset_query_inner() + { + set_query_inner(CSET_STRING()); + } + /** + Name of the current (default) database. + + If there is the current (default) database, "db.str" contains its name. If + there is no current (default) database, "db.str" is NULL and "db.length" is + 0. In other words, db must either be NULL, or contain a + valid database name. + */ + + LEX_CSTRING db; + + send_column_info_state column_info_state; + + /* This is set to 1 of last call to send_result_to_client() was ok */ + my_bool query_cache_is_applicable; + + /* This constructor is called for backup statements */ + Statement() = default; + + Statement(LEX *lex_arg, MEM_ROOT *mem_root_arg, + enum enum_state state_arg, ulong id_arg); + virtual ~Statement(); + + /* Assign execution context (note: not all members) of given stmt to self */ + virtual void set_statement(Statement *stmt); + void set_n_backup_statement(Statement *stmt, Statement *backup); + void restore_backup_statement(Statement *stmt, Statement *backup); + /* return class type */ + virtual Type type() const; +}; + + +/** + Container for all statements created/used in a connection. + Statements in Statement_map have unique Statement::id (guaranteed by id + assignment in Statement::Statement) + Non-empty statement names are unique too: attempt to insert a new statement + with duplicate name causes older statement to be deleted + + Statements are auto-deleted when they are removed from the map and when the + map is deleted. +*/ + +class Statement_map +{ +public: + Statement_map(); + + int insert(THD *thd, Statement *statement); + + Statement *find_by_name(const LEX_CSTRING *name) + { + Statement *stmt; + stmt= (Statement*)my_hash_search(&names_hash, (uchar*)name->str, + name->length); + return stmt; + } + + Statement *find(ulong id) + { + if (last_found_statement == 0 || id != last_found_statement->id) + { + Statement *stmt; + stmt= (Statement *) my_hash_search(&st_hash, (uchar *) &id, sizeof(id)); + if (stmt && stmt->name.str) + return NULL; + last_found_statement= stmt; + } + return last_found_statement; + } + /* + Close all cursors of this connection that use tables of a storage + engine that has transaction-specific state and therefore can not + survive COMMIT or ROLLBACK. Currently all but MyISAM cursors are closed. + */ + void close_transient_cursors(); + void erase(Statement *statement); + /* Erase all statements (calls Statement destructor) */ + void reset(); + ~Statement_map(); +private: + HASH st_hash; + HASH names_hash; + I_List transient_cursor_list; + Statement *last_found_statement; +}; + +struct st_savepoint { + struct st_savepoint *prev; + char *name; + uint length; + Ha_trx_info *ha_list; + /** State of metadata locks before this savepoint was set. */ + MDL_savepoint mdl_savepoint; +}; + +/** + @class Security_context + @brief A set of THD members describing the current authenticated user. +*/ + +class Security_context { +public: + Security_context() + :master_access(NO_ACL), + db_access(NO_ACL) + {} /* Remove gcc warning */ + /* + host - host of the client + user - user of the client, set to NULL until the user has been read from + the connection + priv_user - The user privilege we are using. May be "" for anonymous user. + ip - client IP + */ + const char *host; + const char *user, *ip; + char priv_user[USERNAME_LENGTH]; + char proxy_user[USERNAME_LENGTH + MAX_HOSTNAME + 5]; + /* The host privilege we are using */ + char priv_host[MAX_HOSTNAME]; + /* The role privilege we are using */ + char priv_role[USERNAME_LENGTH]; + /* The external user (if available) */ + char *external_user; + /* points to host if host is available, otherwise points to ip */ + const char *host_or_ip; + privilege_t master_access; /* Global privileges from mysql.user */ + privilege_t db_access; /* Privileges for current db */ + + bool password_expired; + + void init(); + void destroy(); + void skip_grants(); + inline char *priv_host_name() + { + return (*priv_host ? priv_host : (char *)"%"); + } + + bool set_user(char *user_arg); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + bool + change_security_context(THD *thd, + LEX_CSTRING *definer_user, + LEX_CSTRING *definer_host, + LEX_CSTRING *db, + Security_context **backup); + + void + restore_security_context(THD *thd, Security_context *backup); +#endif + bool user_matches(Security_context *); + /** + Check global access + @param want_access The required privileges + @param match_any if the security context must match all or any of the req. + * privileges. + @return True if the security context fulfills the access requirements. + */ + bool check_access(const privilege_t want_access, bool match_any = false); + bool is_priv_user(const char *user, const char *host); +}; + + +/** + A registry for item tree transformations performed during + query optimization. We register only those changes which require + a rollback to re-execute a prepared statement or stored procedure + yet another time. +*/ + +struct Item_change_record; +class Item_change_list +{ + I_List change_list; +public: + void nocheck_register_item_tree_change(Item **place, Item *old_value, + MEM_ROOT *runtime_memroot); + void check_and_register_item_tree_change(Item **place, Item **new_value, + MEM_ROOT *runtime_memroot); + void rollback_item_tree_changes(); + void move_elements_to(Item_change_list *to) + { + change_list.move_elements_to(&to->change_list); + } + bool is_empty() { return change_list.is_empty(); } +}; + + +class Item_change_list_savepoint: public Item_change_list +{ +public: + Item_change_list_savepoint(Item_change_list *list) + { + list->move_elements_to(this); + } + void rollback(Item_change_list *list) + { + list->rollback_item_tree_changes(); + move_elements_to(list); + } + ~Item_change_list_savepoint() + { + DBUG_ASSERT(is_empty()); + } +}; + + +/** + Type of locked tables mode. + See comment for THD::locked_tables_mode for complete description. +*/ + +enum enum_locked_tables_mode +{ + LTM_NONE= 0, + LTM_LOCK_TABLES, + LTM_PRELOCKED, + /* + TODO: remove LTM_PRELOCKED_UNDER_LOCK_TABLES: it is never used apart from + LTM_LOCK_TABLES. + */ + LTM_PRELOCKED_UNDER_LOCK_TABLES, + LTM_always_last +}; + +/** + The following structure is an extension to TABLE_SHARE and is + exclusively for temporary tables. + + @note: + Although, TDC_element has data members (like next, prev & + all_tables) to store the list of TABLE_SHARE & TABLE objects + related to a particular TABLE_SHARE, they cannot be moved to + TABLE_SHARE in order to be reused for temporary tables. This + is because, as concurrent threads iterating through hash of + TDC_element's may need access to all_tables, but if all_tables + is made part of TABLE_SHARE, then TDC_element->share->all_tables + is not always guaranteed to be valid, as TDC_element can live + longer than TABLE_SHARE. +*/ +struct TMP_TABLE_SHARE : public TABLE_SHARE +{ +private: + /* + Link to all temporary table shares. Declared as private to + avoid direct manipulation with those objects. One should + use methods of I_P_List template instead. + */ + TMP_TABLE_SHARE *tmp_next; + TMP_TABLE_SHARE **tmp_prev; + + friend struct All_tmp_table_shares; + +public: + /* + Doubly-linked (back-linked) lists of used and unused TABLE objects + for this share. + */ + All_share_tables_list all_tmp_tables; +}; + +/** + Helper class which specifies which members of TMP_TABLE_SHARE are + used for participation in the list of temporary tables. +*/ + +struct All_tmp_table_shares +{ + static inline TMP_TABLE_SHARE **next_ptr(TMP_TABLE_SHARE *l) + { + return &l->tmp_next; + } + static inline TMP_TABLE_SHARE ***prev_ptr(TMP_TABLE_SHARE *l) + { + return &l->tmp_prev; + } +}; + +/* Also used in rpl_rli.h. */ +typedef I_P_List All_tmp_tables_list; + +/** + Class that holds information about tables which were opened and locked + by the thread. It is also used to save/restore this information in + push_open_tables_state()/pop_open_tables_state(). +*/ + +class Open_tables_state +{ +public: + /** + As part of class THD, this member is set during execution + of a prepared statement. When it is set, it is used + by the locking subsystem to report a change in table metadata. + + When Open_tables_state part of THD is reset to open + a system or INFORMATION_SCHEMA table, the member is cleared + to avoid spurious ER_NEED_REPREPARE errors -- system and + INFORMATION_SCHEMA tables are not subject to metadata version + tracking. + @sa check_and_update_table_version() + */ + Reprepare_observer *m_reprepare_observer; + + /** + List of regular tables in use by this thread. Contains temporary and + base tables that were opened with @see open_tables(). + */ + TABLE *open_tables; + + /** + A list of temporary tables used by this thread. This includes + user-level temporary tables, created with CREATE TEMPORARY TABLE, + and internal temporary tables, created, e.g., to resolve a SELECT, + or for an intermediate table used in ALTER. + */ + All_tmp_tables_list *temporary_tables; + + /* + Derived tables. + */ + TABLE *derived_tables; + + /* + Temporary tables created for recursive table references. + */ + TABLE *rec_tables; + + /* + During a MySQL session, one can lock tables in two modes: automatic + or manual. In automatic mode all necessary tables are locked just before + statement execution, and all acquired locks are stored in 'lock' + member. Unlocking takes place automatically as well, when the + statement ends. + Manual mode comes into play when a user issues a 'LOCK TABLES' + statement. In this mode the user can only use the locked tables. + Trying to use any other tables will give an error. + The locked tables are also stored in this member, however, + thd->locked_tables_mode is turned on. Manual locking is described in + the 'LOCK_TABLES' chapter of the MySQL manual. + See also lock_tables() for details. + */ + MYSQL_LOCK *lock; + + /* + CREATE-SELECT keeps an extra lock for the table being + created. This field is used to keep the extra lock available for + lower level routines, which would otherwise miss that lock. + */ + MYSQL_LOCK *extra_lock; + + /* + Enum enum_locked_tables_mode and locked_tables_mode member are + used to indicate whether the so-called "locked tables mode" is on, + and what kind of mode is active. + + Locked tables mode is used when it's necessary to open and + lock many tables at once, for usage across multiple + (sub-)statements. + This may be necessary either for queries that use stored functions + and triggers, in which case the statements inside functions and + triggers may be executed many times, or for implementation of + LOCK TABLES, in which case the opened tables are reused by all + subsequent statements until a call to UNLOCK TABLES. + + The kind of locked tables mode employed for stored functions and + triggers is also called "prelocked mode". + In this mode, first open_tables() call to open the tables used + in a statement analyses all functions used by the statement + and adds all indirectly used tables to the list of tables to + open and lock. + It also marks the parse tree of the statement as requiring + prelocking. After that, lock_tables() locks the entire list + of tables and changes THD::locked_tables_modeto LTM_PRELOCKED. + All statements executed inside functions or triggers + use the prelocked tables, instead of opening their own ones. + Prelocked mode is turned off automatically once close_thread_tables() + of the main statement is called. + */ + enum enum_locked_tables_mode locked_tables_mode; + uint current_tablenr; + + enum enum_flags { + BACKUPS_AVAIL = (1U << 0) /* There are backups available */ + }; + + /* + Flags with information about the open tables state. + */ + uint state_flags; + /** + This constructor initializes Open_tables_state instance which can only + be used as backup storage. To prepare Open_tables_state instance for + operations which open/lock/close tables (e.g. open_table()) one has to + call init_open_tables_state(). + */ + Open_tables_state() : state_flags(0U) { } + + void set_open_tables_state(Open_tables_state *state) + { + *this= *state; + } + + void reset_open_tables_state() + { + open_tables= 0; + temporary_tables= 0; + derived_tables= 0; + rec_tables= 0; + extra_lock= 0; + lock= 0; + locked_tables_mode= LTM_NONE; + state_flags= 0U; + m_reprepare_observer= NULL; + } +}; + + +/** + Storage for backup of Open_tables_state. Must + be used only to open system tables (TABLE_CATEGORY_SYSTEM + and TABLE_CATEGORY_LOG). +*/ + +class Open_tables_backup: public Open_tables_state +{ +public: + /** + When we backup the open tables state to open a system + table or tables, we want to save state of metadata + locks which were acquired before the backup. It is used + to release metadata locks on system tables after they are + no longer used. + */ + MDL_savepoint mdl_system_tables_svp; +}; + +/** + @class Sub_statement_state + @brief Used to save context when executing a function or trigger + + operations on stat tables aren't technically a sub-statement, but they are + similar in a sense that they cannot change the transaction status. +*/ + +/* Defines used for Sub_statement_state::in_sub_stmt */ + +#define SUB_STMT_TRIGGER 1 +#define SUB_STMT_FUNCTION 2 +#define SUB_STMT_STAT_TABLES 4 + + +class Sub_statement_state +{ +public: + Discrete_interval auto_inc_interval_for_cur_row; + Discrete_intervals_list auto_inc_intervals_forced; + SAVEPOINT *savepoints; + ulonglong option_bits; + ulonglong first_successful_insert_id_in_prev_stmt; + ulonglong first_successful_insert_id_in_cur_stmt, insert_id_for_cur_row; + ulonglong limit_found_rows; + ulonglong tmp_tables_size; + ulonglong client_capabilities; + ulonglong cuted_fields, sent_row_count, examined_row_count; + ulonglong affected_rows; + ulonglong bytes_sent_old; + ha_handler_stats handler_stats; + ulong tmp_tables_used; + ulong tmp_tables_disk_used; + ulong query_plan_fsort_passes; + ulong query_plan_flags; + uint in_sub_stmt; /* 0, SUB_STMT_TRIGGER or SUB_STMT_FUNCTION */ + bool enable_slow_log; + bool last_insert_id_used; + enum enum_check_fields count_cuted_fields; +}; + + +/* Flags for the THD::system_thread variable */ +enum enum_thread_type +{ + NON_SYSTEM_THREAD= 0, + SYSTEM_THREAD_DELAYED_INSERT= 1, + SYSTEM_THREAD_SLAVE_IO= 2, + SYSTEM_THREAD_SLAVE_SQL= 4, + SYSTEM_THREAD_EVENT_SCHEDULER= 8, + SYSTEM_THREAD_EVENT_WORKER= 16, + SYSTEM_THREAD_BINLOG_BACKGROUND= 32, + SYSTEM_THREAD_SLAVE_BACKGROUND= 64, + SYSTEM_THREAD_GENERIC= 128, + SYSTEM_THREAD_SEMISYNC_MASTER_BACKGROUND= 256 +}; + +inline char const * +show_system_thread(enum_thread_type thread) +{ +#define RETURN_NAME_AS_STRING(NAME) case (NAME): return #NAME + switch (thread) { + static char buf[64]; + RETURN_NAME_AS_STRING(NON_SYSTEM_THREAD); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_DELAYED_INSERT); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_SLAVE_IO); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_SLAVE_SQL); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_EVENT_SCHEDULER); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_EVENT_WORKER); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_SLAVE_BACKGROUND); + RETURN_NAME_AS_STRING(SYSTEM_THREAD_SEMISYNC_MASTER_BACKGROUND); + default: + snprintf(buf, sizeof(buf), "", thread); + return buf; + } +#undef RETURN_NAME_AS_STRING +} + +/** + This class represents the interface for internal error handlers. + Internal error handlers are exception handlers used by the server + implementation. +*/ + +class Internal_error_handler +{ +protected: + Internal_error_handler() : + m_prev_internal_handler(NULL) + {} + + virtual ~Internal_error_handler() = default; + +public: + /** + Handle a sql condition. + This method can be implemented by a subclass to achieve any of the + following: + - mask a warning/error internally, prevent exposing it to the user, + - mask a warning/error and throw another one instead. + When this method returns true, the sql condition is considered + 'handled', and will not be propagated to upper layers. + It is the responsability of the code installing an internal handler + to then check for trapped conditions, and implement logic to recover + from the anticipated conditions trapped during runtime. + + This mechanism is similar to C++ try/throw/catch: + - 'try' correspond to THD::push_internal_handler(), + - 'throw' correspond to my_error(), + which invokes my_message_sql(), + - 'catch' correspond to checking how/if an internal handler was invoked, + before removing it from the exception stack with + THD::pop_internal_handler(). + + @param thd the calling thread + @param cond the condition raised. + @return true if the condition is handled + */ + virtual bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) = 0; + +private: + Internal_error_handler *m_prev_internal_handler; + friend class THD; +}; + + +/** + Implements the trivial error handler which cancels all error states + and prevents an SQLSTATE to be set. + Remembers the first error +*/ + +class Dummy_error_handler : public Internal_error_handler +{ + uint m_unhandled_errors; + uint first_error; +public: + Dummy_error_handler() + : m_unhandled_errors(0), first_error(0) + {} + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + m_unhandled_errors++; + if (!first_error) + first_error= sql_errno; + return TRUE; // Ignore error + } + bool any_error() { return m_unhandled_errors != 0; } + uint got_error() { return first_error; } +}; + +/** + Implements the trivial error handler which counts errors as they happen. +*/ + +class Counting_error_handler : public Internal_error_handler +{ +public: + int errors; + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + if (*level == Sql_condition::WARN_LEVEL_ERROR) + errors++; + return false; + } + Counting_error_handler() : errors(0) {} +}; + + +/** + This class is an internal error handler implementation for + DROP TABLE statements. The thing is that there may be warnings during + execution of these statements, which should not be exposed to the user. + This class is intended to silence such warnings. +*/ + +class Drop_table_error_handler : public Internal_error_handler +{ +public: + Drop_table_error_handler() = default; + +public: + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl); + +private: +}; + + +/** + Internal error handler to process an error from MDL_context::upgrade_lock() + and mysql_lock_tables(). Used by implementations of HANDLER READ and + LOCK TABLES LOCAL. +*/ + +class MDL_deadlock_and_lock_abort_error_handler: public Internal_error_handler +{ +public: + virtual + bool handle_condition(THD *thd, + uint sql_errno, + const char *sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition **cond_hdl); + + bool need_reopen() const { return m_need_reopen; }; + void init() { m_need_reopen= FALSE; }; +private: + bool m_need_reopen; +}; + + +class Turn_errors_to_warnings_handler : public Internal_error_handler +{ +public: + Turn_errors_to_warnings_handler() = default; + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + *cond_hdl= NULL; + if (*level == Sql_condition::WARN_LEVEL_ERROR) + *level= Sql_condition::WARN_LEVEL_WARN; + return(0); + } +}; + + +struct Suppress_warnings_error_handler : public Internal_error_handler +{ + bool handle_condition(THD *thd, + uint sql_errno, + const char *sqlstate, + Sql_condition::enum_warning_level *level, + const char *msg, + Sql_condition **cond_hdl) + { + return *level == Sql_condition::WARN_LEVEL_WARN; + } +}; + + + +/** + Tables that were locked with LOCK TABLES statement. + + Encapsulates a list of TABLE_LIST instances for tables + locked by LOCK TABLES statement, memory root for metadata locks, + and, generally, the context of LOCK TABLES statement. + + In LOCK TABLES mode, the locked tables are kept open between + statements. + Therefore, we can't allocate metadata locks on execution memory + root -- as well as tables, the locks need to stay around till + UNLOCK TABLES is called. + The locks are allocated in the memory root encapsulated in this + class. + + Some SQL commands, like FLUSH TABLE or ALTER TABLE, demand that + the tables they operate on are closed, at least temporarily. + This class encapsulates a list of TABLE_LIST instances, one + for each base table from LOCK TABLES list, + which helps conveniently close the TABLEs when it's necessary + and later reopen them. + + Implemented in sql_base.cc +*/ + +class Locked_tables_list +{ +public: + MEM_ROOT m_locked_tables_root; +private: + TABLE_LIST *m_locked_tables; + TABLE_LIST **m_locked_tables_last; + /** An auxiliary array used only in reopen_tables(). */ + TABLE_LIST **m_reopen_array; + /** + Count the number of tables in m_locked_tables list. We can't + rely on thd->lock->table_count because it excludes + non-transactional temporary tables. We need to know + an exact number of TABLE objects. + */ + uint m_locked_tables_count; +public: + bool some_table_marked_for_reopen; + + Locked_tables_list() + :m_locked_tables(NULL), + m_locked_tables_last(&m_locked_tables), + m_reopen_array(NULL), + m_locked_tables_count(0), + some_table_marked_for_reopen(0) + { + init_sql_alloc(key_memory_locked_table_list, &m_locked_tables_root, + MEM_ROOT_BLOCK_SIZE, 0, MYF(MY_THREAD_SPECIFIC)); + } + int unlock_locked_tables(THD *thd); + int unlock_locked_table(THD *thd, MDL_ticket *mdl_ticket); + ~Locked_tables_list() + { + reset(); + } + void reset(); + bool init_locked_tables(THD *thd); + TABLE_LIST *locked_tables() { return m_locked_tables; } + void unlink_from_list(THD *thd, TABLE_LIST *table_list, + bool remove_from_locked_tables); + void unlink_all_closed_tables(THD *thd, + MYSQL_LOCK *lock, + size_t reopen_count); + bool reopen_tables(THD *thd, bool need_reopen); + bool restore_lock(THD *thd, TABLE_LIST *dst_table_list, TABLE *table, + MYSQL_LOCK *lock); + void add_back_last_deleted_lock(TABLE_LIST *dst_table_list); + void mark_table_for_reopen(TABLE *table); +}; + + +/** + Storage engine specific thread local data. +*/ + +struct Ha_data +{ + /** + Storage engine specific thread local data. + Lifetime: one user connection. + */ + void *ha_ptr; + /** + 0: Life time: one statement within a transaction. If @@autocommit is + on, also represents the entire transaction. + @sa trans_register_ha() + + 1: Life time: one transaction within a connection. + If the storage engine does not participate in a transaction, + this should not be used. + @sa trans_register_ha() + */ + Ha_trx_info ha_info[2]; + /** + NULL: engine is not bound to this thread + non-NULL: engine is bound to this thread, engine shutdown forbidden + */ + plugin_ref lock; + Ha_data() :ha_ptr(NULL) {} + + void reset() + { + ha_ptr= nullptr; + for (auto &info : ha_info) + info.reset(); + lock= nullptr; + } +}; + +/** + An instance of the global read lock in a connection. + Implemented in lock.cc. +*/ + +class Global_read_lock +{ +public: + enum enum_grl_state + { + GRL_NONE, + GRL_ACQUIRED, + GRL_ACQUIRED_AND_BLOCKS_COMMIT + }; + + Global_read_lock() + : m_state(GRL_NONE), + m_mdl_global_read_lock(NULL) + {} + + bool lock_global_read_lock(THD *thd); + void unlock_global_read_lock(THD *thd); + bool make_global_read_lock_block_commit(THD *thd); + bool is_acquired() const { return m_state != GRL_NONE; } + void set_explicit_lock_duration(THD *thd); +private: + enum_grl_state m_state; + /** + Global read lock is acquired in two steps: + 1. acquire MDL_BACKUP_FTWRL1 in BACKUP namespace to prohibit DDL and DML + 2. upgrade to MDL_BACKUP_FTWRL2 to prohibit commits + */ + MDL_ticket *m_mdl_global_read_lock; +}; + + +/* + Class to facilitate the commit of one transactions waiting for the commit of + another transaction to complete first. + + This is used during (parallel) replication, to allow different transactions + to be applied in parallel, but still commit in order. + + The transaction that wants to wait for a prior commit must first register + to wait with register_wait_for_prior_commit(waitee). Such registration + must be done holding the waitee->LOCK_wait_commit, to prevent the other + THD from disappearing during the registration. + + Then during commit, if a THD is registered to wait, it will call + wait_for_prior_commit() as part of ha_commit_trans(). If no wait is + registered, or if the waitee for has already completed commit, then + wait_for_prior_commit() returns immediately. + + And when a THD that may be waited for has completed commit (more precisely + commit_ordered()), then it must call wakeup_subsequent_commits() to wake + up any waiters. Note that this must be done at a point that is guaranteed + to be later than any waiters registering themselves. It is safe to call + wakeup_subsequent_commits() multiple times, as waiters are removed from + registration as part of the wakeup. + + The reason for separate register and wait calls is that this allows to + register the wait early, at a point where the waited-for THD is known to + exist. And then the actual wait can be done much later, where the + waited-for THD may have been long gone. By registering early, the waitee + can signal before disappearing. +*/ +struct wait_for_commit +{ + /* + The LOCK_wait_commit protects the fields subsequent_commits_list and + wakeup_subsequent_commits_running (for a waitee), and the pointer + waitee and associated COND_wait_commit (for a waiter). + */ + mysql_mutex_t LOCK_wait_commit; + mysql_cond_t COND_wait_commit; + /* List of threads that did register_wait_for_prior_commit() on us. */ + wait_for_commit *subsequent_commits_list; + /* Link field for entries in subsequent_commits_list. */ + wait_for_commit *next_subsequent_commit; + /* + Our waitee, if we did register_wait_for_prior_commit(), and were not + yet woken up. Else NULL. + + When this is cleared for wakeup, the COND_wait_commit condition is + signalled. + + This pointer is protected by LOCK_wait_commit. But there is also a "fast + path" where the waiter compares this to NULL without holding the lock. + Such read must be done with acquire semantics (and all corresponding + writes done with release semantics). This ensures that a wakeup with error + is reliably detected as (waitee==NULL && wakeup_error != 0). + */ + std::atomic waitee; + /* + Generic pointer for use by the transaction coordinator to optimise the + waiting for improved group commit. + + Currently used by binlog TC to signal that a waiter is ready to commit, so + that the waitee can grab it and group commit it directly. It is free to be + used by another transaction coordinator for similar purposes. + */ + void *opaque_pointer; + /* The wakeup error code from the waitee. 0 means no error. */ + int wakeup_error; + /* + Flag set when wakeup_subsequent_commits_running() is active, see comments + on that function for details. + */ + bool wakeup_subsequent_commits_running; + /* + This flag can be set when a commit starts, but has not completed yet. + It is used by binlog group commit to allow a waiting transaction T2 to + join the group commit of an earlier transaction T1. When T1 has queued + itself for group commit, it will set the commit_started flag. Then when + T2 becomes ready to commit and needs to wait for T1 to commit first, T2 + can queue itself before waiting, and thereby participate in the same + group commit as T1. + */ + bool commit_started; + /* + Set to temporarily ignore calls to wakeup_subsequent_commits(). The + caller must arrange that another wakeup_subsequent_commits() gets called + later after wakeup_blocked has been set back to false. + + This is used for parallel replication with temporary tables. + Temporary tables require strict single-threaded operation. The normal + optimization, of doing wakeup_subsequent_commits early and overlapping + part of the commit with the following transaction, is not safe. Thus + when temporary tables are replicated, wakeup is blocked until the + event group is fully done. + */ + bool wakeup_blocked; + + void register_wait_for_prior_commit(wait_for_commit *waitee); + int wait_for_prior_commit(THD *thd, bool allow_kill=true) + { + /* + Quick inline check, to avoid function call and locking in the common case + where no wakeup is registered, or a registered wait was already signalled. + */ + if (waitee.load(std::memory_order_acquire)) + return wait_for_prior_commit2(thd, allow_kill); + else + { + if (wakeup_error) + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + return wakeup_error; + } + } + void wakeup_subsequent_commits(int wakeup_error_arg) + { + /* + Do the check inline, so only the wakeup case takes the cost of a function + call for every commmit. + + Note that the check is done without locking. It is the responsibility of + the user of the wakeup facility to ensure that no waiters can register + themselves after the last call to wakeup_subsequent_commits(). + + This avoids having to take another lock for every commit, which would be + pointless anyway - even if we check under lock, there is nothing to + prevent a waiter from arriving just after releasing the lock. + */ + if (subsequent_commits_list) + wakeup_subsequent_commits2(wakeup_error_arg); + } + void unregister_wait_for_prior_commit() + { + if (waitee.load(std::memory_order_relaxed)) + unregister_wait_for_prior_commit2(); + else + wakeup_error= 0; + } + /* + Remove a waiter from the list in the waitee. Used to unregister a wait. + The caller must be holding the locks of both waiter and waitee. + */ + void remove_from_list(wait_for_commit **next_ptr_ptr) + { + wait_for_commit *cur; + + while ((cur= *next_ptr_ptr) != NULL) + { + if (cur == this) + { + *next_ptr_ptr= this->next_subsequent_commit; + break; + } + next_ptr_ptr= &cur->next_subsequent_commit; + } + waitee.store(NULL, std::memory_order_relaxed); + } + + void wakeup(int wakeup_error); + + int wait_for_prior_commit2(THD *thd, bool allow_kill); + void wakeup_subsequent_commits2(int wakeup_error); + void unregister_wait_for_prior_commit2(); + + wait_for_commit(); + ~wait_for_commit(); + void reinit(); +}; + + +class Sp_caches +{ +public: + sp_cache *sp_proc_cache; + sp_cache *sp_func_cache; + sp_cache *sp_package_spec_cache; + sp_cache *sp_package_body_cache; + Sp_caches() + :sp_proc_cache(NULL), + sp_func_cache(NULL), + sp_package_spec_cache(NULL), + sp_package_body_cache(NULL) + { } + ~Sp_caches() + { + // All caches must be freed by the caller explicitly + DBUG_ASSERT(sp_proc_cache == NULL); + DBUG_ASSERT(sp_func_cache == NULL); + DBUG_ASSERT(sp_package_spec_cache == NULL); + DBUG_ASSERT(sp_package_body_cache == NULL); + } + void sp_caches_swap(Sp_caches &rhs) + { + swap_variables(sp_cache*, sp_proc_cache, rhs.sp_proc_cache); + swap_variables(sp_cache*, sp_func_cache, rhs.sp_func_cache); + swap_variables(sp_cache*, sp_package_spec_cache, rhs.sp_package_spec_cache); + swap_variables(sp_cache*, sp_package_body_cache, rhs.sp_package_body_cache); + } + void sp_caches_clear(); +}; + + +extern "C" void my_message_sql(uint error, const char *str, myf MyFlags); + + +class Gap_time_tracker; + +/* + Thread context for Gap_time_tracker class. +*/ +class Gap_time_tracker_data +{ +public: + Gap_time_tracker_data(): bill_to(NULL) {} + + Gap_time_tracker *bill_to; + ulonglong start_time; + + void init() { bill_to = NULL; } +}; + +/** + Support structure for asynchronous group commit, or more generally + any asynchronous operation that needs to finish before server writes + response to client. + + An engine, or any other server component, can signal that there is + a pending operation by incrementing a counter, i.e inc_pending_ops() + and that pending operation is finished by decrementing that counter + dec_pending_ops(). + + NOTE: Currently, pending operations can not fail, i.e there is no + way to pass a return code in dec_pending_ops() + + The server does not write response to the client before the counter + becomes 0. In case of group commit it ensures that data is persistent + before success reported to client, i.e durability in ACID. +*/ +struct thd_async_state +{ + enum class enum_async_state + { + NONE, + SUSPENDED, /* do_command() did not finish, and needs to be resumed */ + RESUMED /* do_command() is resumed*/ + }; + enum_async_state m_state{enum_async_state::NONE}; + + /* Stuff we need to resume do_command where we finished last time*/ + enum enum_server_command m_command{COM_SLEEP}; + LEX_STRING m_packet{0,0}; + + mysql_mutex_t m_mtx; + mysql_cond_t m_cond; + + /** Pending counter*/ + Atomic_counter m_pending_ops=0; + +#ifndef DBUG_OFF + /* Checks */ + pthread_t m_dbg_thread; +#endif + + thd_async_state() + { + mysql_mutex_init(PSI_NOT_INSTRUMENTED, &m_mtx, 0); + mysql_cond_init(PSI_INSTRUMENT_ME, &m_cond, 0); + } + + /* + Currently only used with threadpool, one can "suspend" and "resume" a THD. + Suspend only means leaving do_command earlier, after saving some state. + Resume is continuing suspended THD's do_command(), from where it finished last time. + */ + bool try_suspend() + { + bool ret; + mysql_mutex_lock(&m_mtx); + DBUG_ASSERT(m_state == enum_async_state::NONE); + DBUG_ASSERT(m_pending_ops >= 0); + + if(m_pending_ops) + { + ret=true; + m_state= enum_async_state::SUSPENDED; + } + else + { + /* + If there is no pending operations, can't suspend, since + nobody can resume it. + */ + ret=false; + } + mysql_mutex_unlock(&m_mtx); + return ret; + } + + ~thd_async_state() + { + wait_for_pending_ops(); + mysql_mutex_destroy(&m_mtx); + mysql_cond_destroy(&m_cond); + } + + /* + Increment pending asynchronous operations. + The client response may not be written if + this count > 0. + So, without threadpool query needs to wait for + the operations to finish. + With threadpool, THD can be suspended and resumed + when this counter goes to 0. + */ + void inc_pending_ops() + { + mysql_mutex_lock(&m_mtx); + +#ifndef DBUG_OFF + /* + Check that increments are always done by the same thread. + */ + if (!m_pending_ops) + m_dbg_thread= pthread_self(); + else + DBUG_ASSERT(pthread_equal(pthread_self(),m_dbg_thread)); +#endif + + m_pending_ops++; + mysql_mutex_unlock(&m_mtx); + } + + int dec_pending_ops(enum_async_state* state) + { + int ret; + mysql_mutex_lock(&m_mtx); + ret= --m_pending_ops; + if (!ret) + mysql_cond_signal(&m_cond); + *state = m_state; + mysql_mutex_unlock(&m_mtx); + return ret; + } + + /* + This is used for "dirty" reading pending ops, + when dirty read is OK. + */ + int pending_ops() + { + return m_pending_ops; + } + + /* Wait for pending operations to finish.*/ + void wait_for_pending_ops() + { + /* + It is fine to read m_pending_ops and compare it with 0, + without mutex protection. + + The value is only incremented by the current thread, and will + be decremented by another one, thus "dirty" may show positive number + when it is really 0, but this is not a problem, and the only + bad thing from that will be rechecking under mutex. + */ + if (!pending_ops()) + return; + + mysql_mutex_lock(&m_mtx); + DBUG_ASSERT(m_pending_ops >= 0); + while (m_pending_ops) + mysql_cond_wait(&m_cond, &m_mtx); + mysql_mutex_unlock(&m_mtx); + } +}; + + +/** + @class THD + For each client connection we create a separate thread with THD serving as + a thread/connection descriptor +*/ + +class THD: public THD_count, /* this must be first */ + public Statement, + /* + This is to track items changed during execution of a prepared + statement/stored procedure. It's created by + nocheck_register_item_tree_change() in memory root of THD, + and freed in rollback_item_tree_changes(). + For conventional execution it's always empty. + */ + public Item_change_list, + public MDL_context_owner, + public Open_tables_state, + public Sp_caches +{ +private: + inline bool is_stmt_prepare() const + { DBUG_ASSERT(0); return Statement::is_stmt_prepare(); } + + inline bool is_stmt_prepare_or_first_sp_execute() const + { DBUG_ASSERT(0); return Statement::is_stmt_prepare_or_first_sp_execute(); } + + inline bool is_stmt_prepare_or_first_stmt_execute() const + { DBUG_ASSERT(0); return Statement::is_stmt_prepare_or_first_stmt_execute(); } + + inline bool is_conventional() const + { DBUG_ASSERT(0); return Statement::is_conventional(); } + +public: + MDL_context mdl_context; + + /* Used to execute base64 coded binlog events in MySQL server */ + Relay_log_info* rli_fake; + rpl_group_info* rgi_fake; + /* Slave applier execution context */ + rpl_group_info* rgi_slave; + + union { + rpl_io_thread_info *rpl_io_info; + rpl_sql_thread_info *rpl_sql_info; + } system_thread_info; + /* Used for BACKUP LOCK */ + MDL_ticket *mdl_backup_ticket, *mdl_backup_lock; + /* Used to register that thread has a MDL_BACKUP_WAIT_COMMIT lock */ + MDL_request *backup_commit_lock; + + void reset_for_next_command(bool do_clear_errors= 1); + /* + Constant for THD::where initialization in the beginning of every query. + + It's needed because we do not save/restore THD::where normally during + primary (non subselect) query execution. + */ + static const char * const DEFAULT_WHERE; + +#ifdef EMBEDDED_LIBRARY + struct st_mysql *mysql; + unsigned long client_stmt_id; + unsigned long client_param_count; + struct st_mysql_bind *client_params; + char *extra_data; + ulong extra_length; + struct st_mysql_data *cur_data; + struct st_mysql_data *first_data; + struct st_mysql_data **data_tail; + void clear_data_list(); + struct st_mysql_data *alloc_new_dataset(); + /* + In embedded server it points to the statement that is processed + in the current query. We store some results directly in statement + fields then. + */ + struct st_mysql_stmt *current_stmt; +#endif +#ifdef HAVE_QUERY_CACHE + Query_cache_tls query_cache_tls; +#endif + NET net; // client connection descriptor + /** Aditional network instrumentation for the server only. */ + NET_SERVER m_net_server_extension; + scheduler_functions *scheduler; // Scheduler for this connection + Protocol *protocol; // Current protocol + Protocol_text protocol_text; // Normal protocol + Protocol_binary protocol_binary; // Binary protocol + HASH user_vars; // hash for user variables + String packet; // dynamic buffer for network I/O + String convert_buffer; // buffer for charset conversions + struct my_rnd_struct rand; // used for authentication + struct system_variables variables; // Changeable local variables + struct system_status_var status_var; // Per thread statistic vars + struct system_status_var org_status_var; // For user statistics + struct system_status_var *initial_status_var; /* used by show status */ + ha_handler_stats handler_stats; // Handler statistics + THR_LOCK_INFO lock_info; // Locking info of this thread + /** + Protects THD data accessed from other threads: + - thd->query and thd->query_length (used by SHOW ENGINE + INNODB STATUS and SHOW PROCESSLIST + - thd->db (used in SHOW PROCESSLIST) + Is locked when THD is deleted. + */ + mutable mysql_mutex_t LOCK_thd_data; + /* + Protects: + - kill information + - mysys_var (used by KILL statement and shutdown). + - Also ensures that THD is not deleted while mutex is hold + */ + mutable mysql_mutex_t LOCK_thd_kill; + + /* all prepared statements and cursors of this connection */ + Statement_map stmt_map; + + /* Last created prepared statement */ + Statement *last_stmt; + Statement *cur_stmt= 0; + + inline void set_last_stmt(Statement *stmt) + { last_stmt= (is_error() ? NULL : stmt); } + inline void clear_last_stmt() { last_stmt= NULL; } + + /* + A pointer to the stack frame of handle_one_connection(), + which is called first in the thread for handling a client + */ + char *thread_stack; + + /** + Currently selected catalog. + */ + char *catalog; + + /** + @note + Some members of THD (currently 'Statement::db', + 'catalog' and 'query') are set and alloced by the slave SQL thread + (for the THD of that thread); that thread is (and must remain, for now) + the only responsible for freeing these 3 members. If you add members + here, and you add code to set them in replication, don't forget to + free_them_and_set_them_to_0 in replication properly. For details see + the 'err:' label of the handle_slave_sql() in sql/slave.cc. + + @see handle_slave_sql + */ + + Security_context main_security_ctx; + Security_context *security_ctx; + Security_context *security_context() const { return security_ctx; } + void set_security_context(Security_context *sctx) { security_ctx = sctx; } + + /* + Points to info-string that we show in SHOW PROCESSLIST + You are supposed to update thd->proc_info only if you have coded + a time-consuming piece that MySQL can get stuck in for a long time. + + Set it using the thd_proc_info(THD *thread, const char *message) + macro/function. + + This member is accessed and assigned without any synchronization. + Therefore, it may point only to constant (statically + allocated) strings, which memory won't go away over time. + */ + const char *proc_info; + + void set_psi(PSI_thread *psi) + { + my_atomic_storeptr((void*volatile*)&m_psi, psi); + } + + PSI_thread* get_psi() + { + return static_cast(my_atomic_loadptr((void*volatile*)&m_psi)); + } + +private: + unsigned int m_current_stage_key; + + /** Performance schema thread instrumentation for this session. */ + PSI_thread *m_psi; + +public: + void enter_stage(const PSI_stage_info *stage, + const char *calling_func, + const char *calling_file, + const unsigned int calling_line) + { + DBUG_PRINT("THD::enter_stage", ("%s at %s:%d", stage->m_name, + calling_file, calling_line)); + DBUG_ASSERT(stage); + m_current_stage_key= stage->m_key; + proc_info= stage->m_name; +#if defined(ENABLED_PROFILING) + profiling.status_change(proc_info, calling_func, calling_file, + calling_line); +#endif +#ifdef HAVE_PSI_THREAD_INTERFACE + m_stage_progress_psi= MYSQL_SET_STAGE(m_current_stage_key, calling_file, calling_line); +#endif + } + + void backup_stage(PSI_stage_info *stage) + { + stage->m_key= m_current_stage_key; + stage->m_name= proc_info; + } + + const char *get_proc_info() const + { return proc_info; } + + /* + Used in error messages to tell user in what part of MySQL we found an + error. E. g. when where= "having clause", if fix_fields() fails, user + will know that the error was in having clause. + */ + const char *where; + + /* Needed by MariaDB semi sync replication */ + Trans_binlog_info *semisync_info; + /* If this is a semisync slave connection. */ + bool semi_sync_slave; + ulonglong client_capabilities; /* What the client supports */ + ulong max_client_packet_length; + + HASH handler_tables_hash; + /* + A thread can hold named user-level locks. This variable + contains granted tickets if a lock is present. See item_func.cc and + chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK. + */ + HASH ull_hash; + /* Hash of used seqeunces (for PREVIOUS value) */ + HASH sequences; +#ifdef DBUG_ASSERT_EXISTS + uint dbug_sentry; // watch out for memory corruption +#endif + struct st_my_thread_var *mysys_var; + + /* Original charset number from the first client packet, or COM_CHANGE_USER*/ + CHARSET_INFO *org_charset; +private: + /* + Type of current query: COM_STMT_PREPARE, COM_QUERY, etc. Set from + first byte of the packet in do_command() + */ + enum enum_server_command m_command; + +public: + uint32 file_id; // for LOAD DATA INFILE + /* remote (peer) port */ + uint16 peer_port; + my_time_t start_time; // start_time and its sec_part + ulong start_time_sec_part; // are almost always used separately + my_hrtime_t user_time; + // track down slow pthread_create + ulonglong prior_thr_create_utime, thr_create_utime; + ulonglong start_utime, utime_after_lock, utime_after_query; + /* This can be used by handlers to send signals to the SQL level */ + ulonglong replication_flags; + // Process indicator + struct { + /* + true, if the currently running command can send progress report + packets to a client. Set by mysql_execute_command() for safe commands + See CF_REPORT_PROGRESS + */ + bool report_to_client; + /* + true, if we will send progress report packets to a client + (client has requested them, see MARIADB_CLIENT_PROGRESS; report_to_client + is true; not in sub-statement) + */ + bool report; + uint stage, max_stage; + ulonglong counter, max_counter; + ulonglong next_report_time; + Query_arena *arena; + } progress; + + thr_lock_type update_lock_default; + Delayed_insert *di; + + /* <> 0 if we are inside of trigger or stored function. */ + uint in_sub_stmt; + /* True when opt_userstat_running is set at start of query */ + bool userstat_running; + /* + True if we have to log all errors. Are set by some engines to temporary + force errors to the error log. + */ + bool log_all_errors; + + /* Do not set socket timeouts for wait_timeout (used with threadpool) */ + bool skip_wait_timeout; + + bool prepare_derived_at_open; + + /* Set to 1 if status of this THD is already in global status */ + bool status_in_global; + + /* + To signal that the tmp table to be created is created for materialized + derived table or a view. + */ + bool create_tmp_table_for_derived; + + bool save_prep_leaf_list; + + /* container for handler's private per-connection data */ + Ha_data ha_data[MAX_HA]; + + /** + Bit field for the state of binlog warnings. + + The first Lex::BINLOG_STMT_UNSAFE_COUNT bits list all types of + unsafeness that the current statement has. + + This must be a member of THD and not of LEX, because warnings are + detected and issued in different places (@c + decide_logging_format() and @c binlog_query(), respectively). + Between these calls, the THD->lex object may change; e.g., if a + stored routine is invoked. Only THD persists between the calls. + */ + uint32 binlog_unsafe_warning_flags; + + typedef uint used_t; + enum { RAND_USED=1, TIME_ZONE_USED=2, QUERY_START_SEC_PART_USED=4, + THREAD_SPECIFIC_USED=8 }; + + used_t used; + +#ifndef MYSQL_CLIENT + binlog_cache_mngr * binlog_setup_trx_data(); + /* + If set, tell binlog to store the value as query 'xid' in the next + Query_log_event + */ + ulonglong binlog_xid; + + /* + Public interface to write RBR events to the binlog + */ + void binlog_start_trans_and_stmt(); + void binlog_set_stmt_begin(); + int binlog_write_row(TABLE* table, bool is_transactional, + const uchar *buf); + int binlog_delete_row(TABLE* table, bool is_transactional, + const uchar *buf); + int binlog_update_row(TABLE* table, bool is_transactional, + const uchar *old_data, const uchar *new_data); + bool prepare_handlers_for_update(uint flag); + bool binlog_write_annotated_row(Log_event_writer *writer); + void binlog_prepare_for_row_logging(); + bool binlog_write_table_maps(); + bool binlog_write_table_map(TABLE *table, bool with_annotate); + static void binlog_prepare_row_images(TABLE* table); + + void set_server_id(uint32 sid) { variables.server_id = sid; } + + /* + Member functions to handle pending event for row-level logging. + */ + template Rows_log_event* + binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, + size_t needed, + bool is_transactional, + RowsEventT* hint); + Rows_log_event* binlog_get_pending_rows_event(bool is_transactional) const; + void binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional); + inline int binlog_flush_pending_rows_event(bool stmt_end) + { + return (binlog_flush_pending_rows_event(stmt_end, FALSE) || + binlog_flush_pending_rows_event(stmt_end, TRUE)); + } + int binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional); + int binlog_remove_pending_rows_event(bool clear_maps, bool is_transactional); + + bool binlog_need_stmt_format(bool is_transactional) const + { + return log_current_statement() && + !binlog_get_pending_rows_event(is_transactional); + } + + bool binlog_for_noop_dml(bool transactional_table); + + /** + Determine the binlog format of the current statement. + + @retval 0 if the current statement will be logged in statement + format. + @retval nonzero if the current statement will be logged in row + format. + */ + int is_current_stmt_binlog_format_row() const { + DBUG_ASSERT(current_stmt_binlog_format == BINLOG_FORMAT_STMT || + current_stmt_binlog_format == BINLOG_FORMAT_ROW); + return current_stmt_binlog_format == BINLOG_FORMAT_ROW; + } + /** + Determine if binlogging is disabled for this session + @retval 0 if the current statement binlogging is disabled + (could be because of binlog closed/binlog option + is set to false). + @retval 1 if the current statement will be binlogged + */ + inline bool is_current_stmt_binlog_disabled() const + { + return (!(variables.option_bits & OPTION_BIN_LOG) || + !mysql_bin_log.is_open()); + } + + enum binlog_filter_state + { + BINLOG_FILTER_UNKNOWN, + BINLOG_FILTER_CLEAR, + BINLOG_FILTER_SET + }; + + inline void reset_binlog_local_stmt_filter() + { + m_binlog_filter_state= BINLOG_FILTER_UNKNOWN; + } + + inline void clear_binlog_local_stmt_filter() + { + DBUG_ASSERT(m_binlog_filter_state == BINLOG_FILTER_UNKNOWN); + m_binlog_filter_state= BINLOG_FILTER_CLEAR; + } + + inline void set_binlog_local_stmt_filter() + { + DBUG_ASSERT(m_binlog_filter_state == BINLOG_FILTER_UNKNOWN); + m_binlog_filter_state= BINLOG_FILTER_SET; + } + + inline binlog_filter_state get_binlog_local_stmt_filter() + { + return m_binlog_filter_state; + } + + /** + Checks if a user connection is read-only + */ + inline bool is_read_only_ctx() + { + return opt_readonly && + !(security_ctx->master_access & PRIV_IGNORE_READ_ONLY) && + !slave_thread; + } + +private: + /** + Indicate if the current statement should be discarded + instead of written to the binlog. + This is used to discard special statements, such as + DML or DDL that affects only 'local' (non replicated) + tables, such as performance_schema.* + */ + binlog_filter_state m_binlog_filter_state; + + /** + Indicates the format in which the current statement will be + logged. This can only be set from @c decide_logging_format(). + */ + enum_binlog_format current_stmt_binlog_format; + +public: + + /* 1 if binlog table maps has been written */ + bool binlog_table_maps; + + void issue_unsafe_warnings(); + void reset_unsafe_warnings() + { binlog_unsafe_warning_flags= 0; } + + void reset_binlog_for_next_statement() + { + binlog_table_maps= 0; + } + bool binlog_table_should_be_logged(const LEX_CSTRING *db); + + // Accessors and setters of two-phase loggable ALTER binlog properties + uchar get_binlog_flags_for_alter(); + void set_binlog_flags_for_alter(uchar); + uint64 get_binlog_start_alter_seq_no(); + void set_binlog_start_alter_seq_no(uint64); +#endif /* MYSQL_CLIENT */ + +public: + + struct st_transactions { + SAVEPOINT *savepoints; + THD_TRANS all; // Trans since BEGIN WORK + THD_TRANS stmt; // Trans for current statement + bool on; // see ha_enable_transaction() + XID_STATE xid_state; + XID implicit_xid; + WT_THD wt; ///< for deadlock detection + Rows_log_event *m_pending_rows_event; + + struct st_trans_time : public timeval + { + void reset(THD *thd) + { + tv_sec= thd->query_start(); + tv_usec= (long) thd->query_start_sec_part(); + } + } start_time; + + /* + Tables changed in transaction (that must be invalidated in query cache). + List contain only transactional tables, that not invalidated in query + cache (instead of full list of changed in transaction tables). + */ + CHANGED_TABLE_LIST* changed_tables; + MEM_ROOT mem_root; // Transaction-life memory allocation pool + void cleanup() + { + DBUG_ENTER("THD::st_transactions::cleanup"); + changed_tables= 0; + savepoints= 0; + implicit_xid.null(); + free_root(&mem_root,MYF(MY_KEEP_PREALLOC)); + DBUG_VOID_RETURN; + } + void free() + { + free_root(&mem_root,MYF(0)); + } + bool is_active() + { + return (all.ha_list != NULL); + } + bool is_empty() + { + return all.is_empty() && stmt.is_empty(); + } + st_transactions() + { + bzero((char*)this, sizeof(*this)); + implicit_xid.null(); + init_sql_alloc(key_memory_thd_transactions, &mem_root, 256, + 0, MYF(MY_THREAD_SPECIFIC)); + } + } default_transaction, *transaction; + Global_read_lock global_read_lock; + Field *dup_field; +#ifndef _WIN32 + sigset_t signals; +#endif +#ifdef SIGNAL_WITH_VIO_CLOSE + Vio* active_vio; +#endif + + /* + A permanent memory area of the statement. For conventional + execution, the parsed tree and execution runtime reside in the same + memory root. In this case stmt_arena points to THD. In case of + a prepared statement or a stored procedure statement, thd->mem_root + conventionally points to runtime memory, and thd->stmt_arena + points to the memory of the PS/SP, where the parsed tree of the + statement resides. Whenever you need to perform a permanent + transformation of a parsed tree, you should allocate new memory in + stmt_arena, to allow correct re-execution of PS/SP. + Note: in the parser, stmt_arena == thd, even for PS/SP. + */ + Query_arena *stmt_arena; + + void *bulk_param; + + /* + map for tables that will be updated for a multi-table update query + statement, for other query statements, this will be zero. + */ + table_map table_map_for_update; + + /* Tells if LAST_INSERT_ID(#) was called for the current statement */ + bool arg_of_last_insert_id_function; + /* + ALL OVER THIS FILE, "insert_id" means "*automatically generated* value for + insertion into an auto_increment column". + */ + /* + This is the first autogenerated insert id which was *successfully* + inserted by the previous statement (exactly, if the previous statement + didn't successfully insert an autogenerated insert id, then it's the one + of the statement before, etc). + It can also be set by SET LAST_INSERT_ID=# or SELECT LAST_INSERT_ID(#). + It is returned by LAST_INSERT_ID(). + */ + ulonglong first_successful_insert_id_in_prev_stmt; + /* + Variant of the above, used for storing in statement-based binlog. The + difference is that the one above can change as the execution of a stored + function progresses, while the one below is set once and then does not + change (which is the value which statement-based binlog needs). + */ + ulonglong first_successful_insert_id_in_prev_stmt_for_binlog; + /* + This is the first autogenerated insert id which was *successfully* + inserted by the current statement. It is maintained only to set + first_successful_insert_id_in_prev_stmt when statement ends. + */ + ulonglong first_successful_insert_id_in_cur_stmt; + /* + We follow this logic: + - when stmt starts, first_successful_insert_id_in_prev_stmt contains the + first insert id successfully inserted by the previous stmt. + - as stmt makes progress, handler::insert_id_for_cur_row changes; + every time get_auto_increment() is called, + auto_inc_intervals_in_cur_stmt_for_binlog is augmented with the + reserved interval (if statement-based binlogging). + - at first successful insertion of an autogenerated value, + first_successful_insert_id_in_cur_stmt is set to + handler::insert_id_for_cur_row. + - when stmt goes to binlog, + auto_inc_intervals_in_cur_stmt_for_binlog is binlogged if + non-empty. + - when stmt ends, first_successful_insert_id_in_prev_stmt is set to + first_successful_insert_id_in_cur_stmt. + */ + /* + stmt_depends_on_first_successful_insert_id_in_prev_stmt is set when + LAST_INSERT_ID() is used by a statement. + If it is set, first_successful_insert_id_in_prev_stmt_for_binlog will be + stored in the statement-based binlog. + This variable is CUMULATIVE along the execution of a stored function or + trigger: if one substatement sets it to 1 it will stay 1 until the + function/trigger ends, thus making sure that + first_successful_insert_id_in_prev_stmt_for_binlog does not change anymore + and is propagated to the caller for binlogging. + */ + bool stmt_depends_on_first_successful_insert_id_in_prev_stmt; + /* + List of auto_increment intervals reserved by the thread so far, for + storage in the statement-based binlog. + Note that its minimum is not first_successful_insert_id_in_cur_stmt: + assuming a table with an autoinc column, and this happens: + INSERT INTO ... VALUES(3); + SET INSERT_ID=3; INSERT IGNORE ... VALUES (NULL); + then the latter INSERT will insert no rows + (first_successful_insert_id_in_cur_stmt == 0), but storing "INSERT_ID=3" + in the binlog is still needed; the list's minimum will contain 3. + This variable is cumulative: if several statements are written to binlog + as one (stored functions or triggers are used) this list is the + concatenation of all intervals reserved by all statements. + */ + Discrete_intervals_list auto_inc_intervals_in_cur_stmt_for_binlog; + /* Used by replication and SET INSERT_ID */ + Discrete_intervals_list auto_inc_intervals_forced; + /* + There is BUG#19630 where statement-based replication of stored + functions/triggers with two auto_increment columns breaks. + We however ensure that it works when there is 0 or 1 auto_increment + column; our rules are + a) on master, while executing a top statement involving substatements, + first top- or sub- statement to generate auto_increment values wins the + exclusive right to see its values be written to binlog (the write + will be done by the statement or its caller), and the losers won't see + their values be written to binlog. + b) on slave, while replicating a top statement involving substatements, + first top- or sub- statement to need to read auto_increment values from + the master's binlog wins the exclusive right to read them (so the losers + won't read their values from binlog but instead generate on their own). + a) implies that we mustn't backup/restore + auto_inc_intervals_in_cur_stmt_for_binlog. + b) implies that we mustn't backup/restore auto_inc_intervals_forced. + + If there are more than 1 auto_increment columns, then intervals for + different columns may mix into the + auto_inc_intervals_in_cur_stmt_for_binlog list, which is logically wrong, + but there is no point in preventing this mixing by preventing intervals + from the secondly inserted column to come into the list, as such + prevention would be wrong too. + What will happen in the case of + INSERT INTO t1 (auto_inc) VALUES(NULL); + where t1 has a trigger which inserts into an auto_inc column of t2, is + that in binlog we'll store the interval of t1 and the interval of t2 (when + we store intervals, soon), then in slave, t1 will use both intervals, t2 + will use none; if t1 inserts the same number of rows as on master, + normally the 2nd interval will not be used by t1, which is fine. t2's + values will be wrong if t2's internal auto_increment counter is different + from what it was on master (which is likely). In 5.1, in mixed binlogging + mode, row-based binlogging is used for such cases where two + auto_increment columns are inserted. + */ + inline void record_first_successful_insert_id_in_cur_stmt(ulonglong id_arg) + { + if (first_successful_insert_id_in_cur_stmt == 0) + first_successful_insert_id_in_cur_stmt= id_arg; + } + inline ulonglong read_first_successful_insert_id_in_prev_stmt(void) + { + if (!stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + /* It's the first time we read it */ + first_successful_insert_id_in_prev_stmt_for_binlog= + first_successful_insert_id_in_prev_stmt; + stmt_depends_on_first_successful_insert_id_in_prev_stmt= 1; + } + return first_successful_insert_id_in_prev_stmt; + } + /* + Used by Intvar_log_event::do_apply_event() and by "SET INSERT_ID=#" + (mysqlbinlog). We'll soon add a variant which can take many intervals in + argument. + */ + inline void force_one_auto_inc_interval(ulonglong next_id) + { + auto_inc_intervals_forced.empty(); // in case of multiple SET INSERT_ID + auto_inc_intervals_forced.append(next_id, ULONGLONG_MAX, 0); + } + inline void set_binlog_bit() + { + if (variables.sql_log_bin) + variables.option_bits |= OPTION_BIN_LOG; + else + variables.option_bits &= ~OPTION_BIN_LOG; + } + + ulonglong limit_found_rows; + +private: + /** + Stores the result of ROW_COUNT() function. + + ROW_COUNT() function is a MySQL extention, but we try to keep it + similar to ROW_COUNT member of the GET DIAGNOSTICS stack of the SQL + standard (see SQL99, part 2, search for ROW_COUNT). It's value is + implementation defined for anything except INSERT, DELETE, UPDATE. + + ROW_COUNT is assigned according to the following rules: + + - In my_ok(): + - for DML statements: to the number of affected rows; + - for DDL statements: to 0. + + - In my_eof(): to -1 to indicate that there was a result set. + + We derive this semantics from the JDBC specification, where int + java.sql.Statement.getUpdateCount() is defined to (sic) "return the + current result as an update count; if the result is a ResultSet + object or there are no more results, -1 is returned". + + - In my_error(): to -1 to be compatible with the MySQL C API and + MySQL ODBC driver. + + - For SIGNAL statements: to 0 per WL#2110 specification (see also + sql_signal.cc comment). Zero is used since that's the "default" + value of ROW_COUNT in the diagnostics area. + */ + + longlong m_row_count_func; /* For the ROW_COUNT() function */ + +public: + inline longlong get_row_count_func() const + { + return m_row_count_func; + } + + inline void set_row_count_func(longlong row_count_func) + { + m_row_count_func= row_count_func; + } + inline void set_affected_rows(longlong row_count_func) + { + /* + We have to add to affected_rows (used by slow log), as otherwise + information for 'call' will be wrong + */ + affected_rows+= (row_count_func >= 0 ? row_count_func : 0); + } + + ha_rows cuted_fields; + +private: + /* + number of rows we actually sent to the client, including "synthetic" + rows in ROLLUP etc. + */ + ha_rows m_sent_row_count; + + /** + Number of rows read and/or evaluated for a statement. Used for + slow log reporting. + + An examined row is defined as a row that is read and/or evaluated + according to a statement condition, including in + create_sort_index(). Rows may be counted more than once, e.g., a + statement including ORDER BY could possibly evaluate the row in + filesort() before reading it for e.g. update. + */ + ha_rows m_examined_row_count; + +public: + ha_rows get_sent_row_count() const + { return m_sent_row_count; } + + ha_rows get_examined_row_count() const + { return m_examined_row_count; } + + ulonglong get_affected_rows() const + { return affected_rows; } + + void set_sent_row_count(ha_rows count); + void set_examined_row_count(ha_rows count); + + void inc_sent_row_count(ha_rows count); + void inc_examined_row_count(ha_rows count); + + void inc_status_created_tmp_disk_tables(); + void inc_status_created_tmp_files(); + void inc_status_created_tmp_tables(); + void inc_status_select_full_join(); + void inc_status_select_full_range_join(); + void inc_status_select_range(); + void inc_status_select_range_check(); + void inc_status_select_scan(); + void inc_status_sort_merge_passes(); + void inc_status_sort_range(); + void inc_status_sort_rows(ha_rows count); + void inc_status_sort_scan(); + void set_status_no_index_used(); + void set_status_no_good_index_used(); + + /** + The number of rows and/or keys examined by the query, both read, + changed or written. + */ + ulonglong accessed_rows_and_keys; + + /** + Check if the number of rows accessed by a statement exceeded + LIMIT ROWS EXAMINED. If so, signal the query engine to stop execution. + */ + void check_limit_rows_examined() + { + if (++accessed_rows_and_keys > lex->limit_rows_examined_cnt) + set_killed(ABORT_QUERY); + } + + USER_CONN *user_connect; + CHARSET_INFO *db_charset; +#if defined(ENABLED_PROFILING) + PROFILING profiling; +#endif + + /** Current stage progress instrumentation. */ + PSI_stage_progress *m_stage_progress_psi; + /** Current statement digest. */ + sql_digest_state *m_digest; + /** Current statement digest token array. */ + unsigned char *m_token_array; + /** Top level statement digest. */ + sql_digest_state m_digest_state; + + /** Current statement instrumentation. */ + PSI_statement_locker *m_statement_psi; +#ifdef HAVE_PSI_STATEMENT_INTERFACE + /** Current statement instrumentation state. */ + PSI_statement_locker_state m_statement_state; +#endif /* HAVE_PSI_STATEMENT_INTERFACE */ + + /** Current transaction instrumentation. */ + PSI_transaction_locker *m_transaction_psi; +#ifdef HAVE_PSI_TRANSACTION_INTERFACE + /** Current transaction instrumentation state. */ + PSI_transaction_locker_state m_transaction_state; +#endif /* HAVE_PSI_TRANSACTION_INTERFACE */ + + /** Idle instrumentation. */ + PSI_idle_locker *m_idle_psi; +#ifdef HAVE_PSI_IDLE_INTERFACE + /** Idle instrumentation state. */ + PSI_idle_locker_state m_idle_state; +#endif /* HAVE_PSI_IDLE_INTERFACE */ + + /* + Id of current query. Statement can be reused to execute several queries + query_id is global in context of the whole MySQL server. + ID is automatically generated from mutex-protected counter. + It's used in handler code for various purposes: to check which columns + from table are necessary for this select, to check if it's necessary to + update auto-updatable fields (like auto_increment and timestamp). + */ + query_id_t query_id; + privilege_t col_access; + + /* Statement id is thread-wide. This counter is used to generate ids */ + ulong statement_id_counter; + ulong rand_saved_seed1, rand_saved_seed2; + + /* The following variables are used when printing to slow log */ + ulong query_plan_flags; + ulong query_plan_fsort_passes; + ulong tmp_tables_used; + ulong tmp_tables_disk_used; + ulonglong tmp_tables_size; + ulonglong bytes_sent_old; + ulonglong affected_rows; /* Number of changed rows */ + + Opt_trace_context opt_trace; + pthread_t real_id; /* For debugging */ + my_thread_id thread_id, thread_dbug_id; + uint32 os_thread_id; + uint tmp_table, global_disable_checkpoint; + uint server_status,open_options; + enum enum_thread_type system_thread; + enum backup_stages current_backup_stage; +#ifdef WITH_WSREP + bool wsrep_desynced_backup_stage; +#endif /* WITH_WSREP */ + /* + Current or next transaction isolation level. + When a connection is established, the value is taken from + @@session.tx_isolation (default transaction isolation for + the session), which is in turn taken from @@global.tx_isolation + (the global value). + If there is no transaction started, this variable + holds the value of the next transaction's isolation level. + When a transaction starts, the value stored in this variable + becomes "actual". + At transaction commit or rollback, we assign this variable + again from @@session.tx_isolation. + The only statement that can otherwise change the value + of this variable is SET TRANSACTION ISOLATION LEVEL. + Its purpose is to effect the isolation level of the next + transaction in this session. When this statement is executed, + the value in this variable is changed. However, since + this statement is only allowed when there is no active + transaction, this assignment (naturally) only affects the + upcoming transaction. + At the end of the current active transaction the value is + be reset again from @@session.tx_isolation, as described + above. + */ + enum_tx_isolation tx_isolation; + /* + Current or next transaction access mode. + See comment above regarding tx_isolation. + */ + bool tx_read_only; + enum_check_fields count_cuted_fields; + + DYNAMIC_ARRAY user_var_events; /* For user variables replication */ + MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */ + + /* + Define durability properties that engines may check to + improve performance. Not yet used in MariaDB + */ + enum durability_properties durability_property; + + /* + If checking this in conjunction with a wait condition, please + include a check after enter_cond() if you want to avoid a race + condition. For details see the implementation of awake(), + especially the "broadcast" part. + */ + killed_state volatile killed; + + /* + The following is used if one wants to have a specific error number and + text for the kill + */ + struct err_info + { + int no; + const char msg[256]; + } *killed_err; + + /* See also thd_killed() */ + inline bool check_killed(bool dont_send_error_message= 0) + { + if (unlikely(killed)) + { + if (!dont_send_error_message) + send_kill_message(); + return TRUE; + } + if (apc_target.have_apc_requests()) + apc_target.process_apc_requests(false); + return FALSE; + } + + /* scramble - random string sent to client on handshake */ + char scramble[SCRAMBLE_LENGTH+1]; + + /* + If this is a slave, the name of the connection stored here. + This is used for taging error messages in the log files. + */ + LEX_CSTRING connection_name; + char default_master_connection_buff[MAX_CONNECTION_NAME+1]; + uint8 password; /* 0, 1 or 2 */ + uint8 failed_com_change_user; + bool slave_thread; + bool no_errors; + + /** + Set to TRUE if execution of the current compound statement + can not continue. In particular, disables activation of + CONTINUE or EXIT handlers of stored routines. + Reset in the end of processing of the current user request, in + @see THD::reset_for_next_command(). + */ + bool is_fatal_error; + /** + Set by a storage engine to request the entire + transaction (that possibly spans multiple engines) to + rollback. Reset in ha_rollback. + */ + bool transaction_rollback_request; + /** + TRUE if we are in a sub-statement and the current error can + not be safely recovered until we left the sub-statement mode. + In particular, disables activation of CONTINUE and EXIT + handlers inside sub-statements. E.g. if it is a deadlock + error and requires a transaction-wide rollback, this flag is + raised (traditionally, MySQL first has to close all the reads + via @see handler::ha_index_or_rnd_end() and only then perform + the rollback). + Reset to FALSE when we leave the sub-statement mode. + */ + bool is_fatal_sub_stmt_error; + /* for IS NULL => = last_insert_id() fix in remove_eq_conds() */ + bool substitute_null_with_insert_id; + bool in_lock_tables; + bool bootstrap, cleanup_done, free_connection_done; + + /** + is set if a statement accesses a temporary table created through + CREATE TEMPORARY TABLE. + */ +private: + bool charset_is_system_charset, charset_is_collation_connection; + bool charset_is_character_set_filesystem; +public: + bool enable_slow_log; /* Enable slow log for current statement */ + bool abort_on_warning; + bool got_warning; /* Set on call to push_warning() */ + /* set during loop of derived table processing */ + bool derived_tables_processing; + bool tablespace_op; /* This is TRUE in DISCARD/IMPORT TABLESPACE */ + bool log_current_statement() const + { + return variables.option_bits & OPTION_BINLOG_THIS_STMT; + } + /** + True if a slave error. Causes the slave to stop. Not the same + as the statement execution error (is_error()), since + a statement may be expected to return an error, e.g. because + it returned an error on master, and this is OK on the slave. + */ + bool is_slave_error; + /* True if we have printed something to the error log for this statement */ + bool error_printed_to_log; + + /* + True when a transaction is queued up for binlog group commit. + Used so that if another transaction needs to wait for a row lock held by + this transaction, it can signal to trigger the group commit immediately, + skipping the normal --binlog-commit-wait-count wait. + */ + bool waiting_on_group_commit; + /* + Set true when another transaction goes to wait on a row lock held by this + transaction. Used together with waiting_on_group_commit. + */ + bool has_waiter; + /* + In case of a slave, set to the error code the master got when executing + the query. 0 if no error on the master. + The stored into variable master error code may get reset inside + execution stack when the event turns out to be ignored. + */ + int slave_expected_error; + enum_sql_command last_sql_command; // Last sql_command exceuted in mysql_execute_command() + + sp_rcontext *spcont; // SP runtime context + + /** number of name_const() substitutions, see sp_head.cc:subst_spvars() */ + uint query_name_consts; + + NET* slave_net; // network connection from slave -> m. + + /* + Used to update global user stats. The global user stats are updated + occasionally with the 'diff' variables. After the update, the 'diff' + variables are reset to 0. + */ + /* Time when the current thread connected to MySQL. */ + time_t current_connect_time; + /* Last time when THD stats were updated in global_user_stats. */ + time_t last_global_update_time; + /* Number of commands not reflected in global_user_stats yet. */ + uint select_commands, update_commands, other_commands; + ulonglong start_cpu_time; + ulonglong start_bytes_received; + + /* Used by the sys_var class to store temporary values */ + union + { + my_bool my_bool_value; + int int_value; + uint uint_value; + long long_value; + ulong ulong_value; + ulonglong ulonglong_value; + double double_value; + void *ptr_value; + } sys_var_tmp; + + struct { + /* + If true, mysql_bin_log::write(Log_event) call will not write events to + binlog, and maintain 2 below variables instead (use + mysql_bin_log.start_union_events to turn this on) + */ + bool do_union; + /* + If TRUE, at least one mysql_bin_log::write(Log_event) call has been + made after last mysql_bin_log.start_union_events() call. + */ + bool unioned_events; + /* + If TRUE, at least one mysql_bin_log::write(Log_event e), where + e.cache_stmt == TRUE call has been made after last + mysql_bin_log.start_union_events() call. + */ + bool unioned_events_trans; + /* + 'queries' (actually SP statements) that run under inside this binlog + union have thd->query_id >= first_query_id. + */ + query_id_t first_query_id; + } binlog_evt_union; + + /** + Internal parser state. + Note that since the parser is not re-entrant, we keep only one parser + state here. This member is valid only when executing code during parsing. + */ + Parser_state *m_parser_state; + + Locked_tables_list locked_tables_list; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *work_part_info; +#endif + +#ifndef EMBEDDED_LIBRARY + /** + Array of active audit plugins which have been used by this THD. + This list is later iterated to invoke release_thd() on those + plugins. + */ + DYNAMIC_ARRAY audit_class_plugins; + /** + Array of bits indicating which audit classes have already been + added to the list of audit plugins which are currently in use. + */ + unsigned long audit_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE]; + int audit_plugin_version; +#endif + +#if defined(ENABLED_DEBUG_SYNC) + /* Debug Sync facility. See debug_sync.cc. */ + struct st_debug_sync_control *debug_sync_control; +#endif /* defined(ENABLED_DEBUG_SYNC) */ + /** + @param id thread identifier + @param is_wsrep_applier thread type + */ + THD(my_thread_id id, bool is_wsrep_applier= false); + + ~THD(); + + void init(); + /* + Initialize memory roots necessary for query processing and (!) + pre-allocate memory for it. We can't do that in THD constructor because + there are use cases (acl_init, delayed inserts, watcher threads, + killing mysqld) where it's vital to not allocate excessive and not used + memory. Note, that we still don't return error from init_for_queries(): + if preallocation fails, we should notice that at the first call to + alloc_root. + */ + void init_for_queries(); + void update_all_stats(); + void update_stats(void); + void change_user(void); + void cleanup(void); + void cleanup_after_query(); + void free_connection(); + void reset_for_reuse(); + void store_globals(); + void reset_globals(); + bool trace_started() + { + return opt_trace.is_started(); + } +#ifdef SIGNAL_WITH_VIO_CLOSE + inline void set_active_vio(Vio* vio) + { + mysql_mutex_lock(&LOCK_thd_data); + active_vio = vio; + mysql_mutex_unlock(&LOCK_thd_data); + } + inline void clear_active_vio() + { + mysql_mutex_lock(&LOCK_thd_data); + active_vio = 0; + mysql_mutex_unlock(&LOCK_thd_data); + } + void close_active_vio(); +#endif + void awake_no_mutex(killed_state state_to_set); + void awake(killed_state state_to_set) + { + mysql_mutex_lock(&LOCK_thd_kill); + mysql_mutex_lock(&LOCK_thd_data); + awake_no_mutex(state_to_set); + mysql_mutex_unlock(&LOCK_thd_data); + mysql_mutex_unlock(&LOCK_thd_kill); + } + void abort_current_cond_wait(bool force); + + /** Disconnect the associated communication endpoint. */ + void disconnect(); + + + /* + Allows this thread to serve as a target for others to schedule Async + Procedure Calls on. + + It's possible to schedule any code to be executed this way, by + inheriting from the Apc_call object. Currently, only + Show_explain_request uses this. + */ + Apc_target apc_target; + + Gap_time_tracker_data gap_tracker_data; +#ifndef MYSQL_CLIENT + enum enum_binlog_query_type { + /* The query can be logged in row format or in statement format. */ + ROW_QUERY_TYPE, + + /* The query has to be logged in statement format. */ + STMT_QUERY_TYPE, + + QUERY_TYPE_COUNT + }; + + int binlog_query(enum_binlog_query_type qtype, + char const *query, ulong query_len, bool is_trans, + bool direct, bool suppress_use, + int errcode); + bool binlog_current_query_unfiltered(); +#endif + + inline void + enter_cond(mysql_cond_t *cond, mysql_mutex_t* mutex, + const PSI_stage_info *stage, PSI_stage_info *old_stage, + const char *src_function, const char *src_file, + int src_line) + { + mysql_mutex_assert_owner(mutex); + mysys_var->current_mutex = mutex; + mysys_var->current_cond = cond; + if (old_stage) + backup_stage(old_stage); + if (stage) + enter_stage(stage, src_function, src_file, src_line); + } + inline void exit_cond(const PSI_stage_info *stage, + const char *src_function, const char *src_file, + int src_line) + { + /* + Putting the mutex unlock in thd->exit_cond() ensures that + mysys_var->current_mutex is always unlocked _before_ mysys_var->mutex is + locked (if that would not be the case, you'll get a deadlock if someone + does a THD::awake() on you). + */ + mysql_mutex_unlock(mysys_var->current_mutex); + mysql_mutex_lock(&mysys_var->mutex); + mysys_var->current_mutex = 0; + mysys_var->current_cond = 0; + if (stage) + enter_stage(stage, src_function, src_file, src_line); + mysql_mutex_unlock(&mysys_var->mutex); + return; + } + virtual int is_killed() { return killed; } + virtual THD* get_thd() { return this; } + + /** + A callback to the server internals that is used to address + special cases of the locking protocol. + Invoked when acquiring an exclusive lock, for each thread that + has a conflicting shared metadata lock. + + This function: + - aborts waiting of the thread on a data lock, to make it notice + the pending exclusive lock and back off. + - if the thread is an INSERT DELAYED thread, sends it a KILL + signal to terminate it. + + @note This function does not wait for the thread to give away its + locks. Waiting is done outside for all threads at once. + + @param ctx_in_use The MDL context owner (thread) to wake up. + @param needs_thr_lock_abort Indicates that to wake up thread + this call needs to abort its waiting + on table-level lock. + + @retval TRUE if the thread was woken up + @retval FALSE otherwise. + */ + virtual bool notify_shared_lock(MDL_context_owner *ctx_in_use, + bool needs_thr_lock_abort); + + // End implementation of MDL_context_owner interface. + + inline bool is_strict_mode() const + { + return (bool) (variables.sql_mode & (MODE_STRICT_TRANS_TABLES | + MODE_STRICT_ALL_TABLES)); + } + inline bool backslash_escapes() const + { + return !MY_TEST(variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES); + } + const Type_handler *type_handler_for_datetime() const; + bool timestamp_to_TIME(MYSQL_TIME *ltime, my_time_t ts, + ulong sec_part, date_mode_t fuzzydate); + inline my_time_t query_start() { return start_time; } + inline ulong query_start_sec_part() + { used|= QUERY_START_SEC_PART_USED; return start_time_sec_part; } + MYSQL_TIME query_start_TIME(); + time_round_mode_t temporal_round_mode() const + { + return variables.sql_mode & MODE_TIME_ROUND_FRACTIONAL ? + TIME_FRAC_ROUND : TIME_FRAC_TRUNCATE; + } + +private: + struct { + my_hrtime_t start; + my_time_t sec; + ulong sec_part; + } system_time; + + void set_system_time() + { + my_hrtime_t hrtime= my_hrtime(); + my_time_t sec= hrtime_to_my_time(hrtime); + ulong sec_part= hrtime_sec_part(hrtime); + if (sec > system_time.sec || + (sec == system_time.sec && sec_part > system_time.sec_part) || + hrtime.val < system_time.start.val) + { + system_time.sec= sec; + system_time.sec_part= sec_part; + system_time.start= hrtime; + } + else + { + if (system_time.sec_part < TIME_MAX_SECOND_PART) + system_time.sec_part++; + else + { + system_time.sec++; + system_time.sec_part= 0; + } + } + } + +public: + timeval transaction_time() + { + if (!in_multi_stmt_transaction_mode()) + transaction->start_time.reset(this); + return transaction->start_time; + } + + inline void set_start_time() + { + if (user_time.val) + { + start_time= hrtime_to_my_time(user_time); + start_time_sec_part= hrtime_sec_part(user_time); + } + else + { + set_system_time(); + start_time= system_time.sec; + start_time_sec_part= system_time.sec_part; + } + PSI_CALL_set_thread_start_time(start_time); + } + inline void set_time() + { + set_start_time(); + start_utime= utime_after_lock= microsecond_interval_timer(); + } + /* only used in SET @@timestamp=... */ + inline void set_time(my_hrtime_t t) + { + user_time= t; + set_time(); + } + inline void force_set_time(my_time_t t, ulong sec_part) + { + start_time= system_time.sec= t; + start_time_sec_part= system_time.sec_part= sec_part; + } + /* + this is only used by replication and BINLOG command. + usecs > TIME_MAX_SECOND_PART means "was not in binlog" + */ + inline void set_time(my_time_t t, ulong sec_part) + { + if (opt_secure_timestamp > (slave_thread ? SECTIME_REPL : SECTIME_SUPER)) + set_time(); // note that BINLOG itself requires SUPER + else + { + if (sec_part <= TIME_MAX_SECOND_PART) + force_set_time(t, sec_part); + else if (t != system_time.sec) + force_set_time(t, 0); + else + { + start_time= t; + start_time_sec_part= ++system_time.sec_part; + } + user_time.val= hrtime_from_time(start_time) + start_time_sec_part; + PSI_CALL_set_thread_start_time(start_time); + start_utime= utime_after_lock= microsecond_interval_timer(); + } + } + void set_time_after_lock() + { + utime_after_lock= microsecond_interval_timer(); + MYSQL_SET_STATEMENT_LOCK_TIME(m_statement_psi, + (utime_after_lock - start_utime)); + } + ulonglong current_utime() { return microsecond_interval_timer(); } + + /* Tell SHOW PROCESSLIST to show time from this point */ + inline void set_time_for_next_stage() + { + utime_after_query= current_utime(); + } + + /** + Update server status after execution of a top level statement. + Currently only checks if a query was slow, and assigns + the status accordingly. + Evaluate the current time, and if it exceeds the long-query-time + setting, mark the query as slow. + */ + void update_server_status() + { + set_time_for_next_stage(); + if (utime_after_query >= utime_after_lock + variables.long_query_time) + server_status|= SERVER_QUERY_WAS_SLOW; + } + inline ulonglong found_rows(void) + { + return limit_found_rows; + } + /** + Returns TRUE if session is in a multi-statement transaction mode. + + OPTION_NOT_AUTOCOMMIT: When autocommit is off, a multi-statement + transaction is implicitly started on the first statement after a + previous transaction has been ended. + + OPTION_BEGIN: Regardless of the autocommit status, a multi-statement + transaction can be explicitly started with the statements "START + TRANSACTION", "BEGIN [WORK]", "[COMMIT | ROLLBACK] AND CHAIN", etc. + + Note: this doesn't tell you whether a transaction is active. + A session can be in multi-statement transaction mode, and yet + have no active transaction, e.g., in case of: + set @@autocommit=0; + set @a= 3; <-- these statements don't + set transaction isolation level serializable; <-- start an active + flush tables; <-- transaction + + I.e. for the above scenario this function returns TRUE, even + though no active transaction has begun. + @sa in_active_multi_stmt_transaction() + */ + inline bool in_multi_stmt_transaction_mode() + { + return variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN); + } + /** + TRUE if the session is in a multi-statement transaction mode + (@sa in_multi_stmt_transaction_mode()) *and* there is an + active transaction, i.e. there is an explicit start of a + transaction with BEGIN statement, or implicit with a + statement that uses a transactional engine. + + For example, these scenarios don't start an active transaction + (even though the server is in multi-statement transaction mode): + + set @@autocommit=0; + select * from nontrans_table; + set @var=TRUE; + flush tables; + + Note, that even for a statement that starts a multi-statement + transaction (i.e. select * from trans_table), this + flag won't be set until we open the statement's tables + and the engines register themselves for the transaction + (see trans_register_ha()), + hence this method is reliable to use only after + open_tables() has completed. + + Why do we need a flag? + ---------------------- + We need to maintain a (at first glance redundant) + session flag, rather than looking at thd->transaction.all.ha_list + because of explicit start of a transaction with BEGIN. + + I.e. in case of + BEGIN; + select * from nontrans_t1; <-- in_active_multi_stmt_transaction() is true + */ + inline bool in_active_multi_stmt_transaction() + { + return server_status & SERVER_STATUS_IN_TRANS; + } + /* Commit both statement and full transaction */ + int commit_whole_transaction_and_close_tables(); + void give_protection_error(); + /* + Give an error if any of the following is true for this connection + - BACKUP STAGE is active + - FLUSH TABLE WITH READ LOCK is active + - BACKUP LOCK table_name is active + */ + inline bool has_read_only_protection() + { + if (current_backup_stage == BACKUP_FINISHED && + !global_read_lock.is_acquired() && + !mdl_backup_lock) + return FALSE; + give_protection_error(); + return TRUE; + } + inline bool fill_information_schema_tables() + { + return !stmt_arena->is_stmt_prepare(); + } + inline void* trans_alloc(size_t size) + { + return alloc_root(&transaction->mem_root,size); + } + + LEX_CSTRING strmake_lex_cstring(const char *str, size_t length) + { + const char *tmp= strmake_root(mem_root, str, length); + if (!tmp) + return {0,0}; + return {tmp, length}; + } + LEX_CSTRING strmake_lex_cstring(const LEX_CSTRING &from) + { + return strmake_lex_cstring(from.str, from.length); + } + LEX_CSTRING strmake_lex_cstring_trim_whitespace(const LEX_CSTRING &from) + { + return strmake_lex_cstring(Lex_cstring(from).trim_whitespace(charset())); + } + + LEX_STRING *make_lex_string(LEX_STRING *lex_str, const char* str, size_t length) + { + if (!(lex_str->str= strmake_root(mem_root, str, length))) + { + lex_str->length= 0; + return 0; + } + lex_str->length= length; + return lex_str; + } + LEX_CSTRING *make_lex_string(LEX_CSTRING *lex_str, const char* str, size_t length) + { + if (!(lex_str->str= strmake_root(mem_root, str, length))) + { + lex_str->length= 0; + return 0; + } + lex_str->length= length; + return lex_str; + } + // Remove double quotes: aaa""bbb -> aaa"bbb + bool quote_unescape(LEX_CSTRING *dst, const LEX_CSTRING *src, char quote) + { + const char *tmp= src->str; + const char *tmpend= src->str + src->length; + char *to; + if (!(dst->str= to= (char *) alloc(src->length + 1))) + { + dst->length= 0; // Safety + return true; + } + for ( ; tmp < tmpend; ) + { + if ((*to++= *tmp++) == quote) + tmp++; // Skip double quotes + } + *to= 0; // End null for safety + dst->length= to - dst->str; + return false; + } + + LEX_CSTRING *make_clex_string(const char* str, size_t length) + { + LEX_CSTRING *lex_str; + char *tmp; + if (unlikely(!(lex_str= (LEX_CSTRING *)alloc_root(mem_root, + sizeof(LEX_CSTRING) + + length+1)))) + return 0; + tmp= (char*) (lex_str+1); + lex_str->str= tmp; + memcpy(tmp, str, length); + tmp[length]= 0; + lex_str->length= length; + return lex_str; + } + LEX_CSTRING *make_clex_string(const LEX_CSTRING from) + { + return make_clex_string(from.str, from.length); + } + + // Allocate LEX_STRING for character set conversion + bool alloc_lex_string(LEX_STRING *dst, size_t length) + { + if (likely((dst->str= (char*) alloc(length)))) + return false; + dst->length= 0; // Safety + return true; // EOM + } + bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs); + bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length); + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs) + { + LEX_STRING tmp; + bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs); + to->str= tmp.str; + to->length= tmp.length; + return rc; + } + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs, + const LEX_CSTRING *from, CHARSET_INFO *fromcs, + bool simple_copy_is_possible) + { + if (!simple_copy_is_possible) + return unlikely(convert_string(to, tocs, from->str, from->length, fromcs)); + if (fromcs == &my_charset_bin) + return reinterpret_string_from_binary(to, tocs, from->str, from->length); + *to= *from; + return false; + } + /* + Convert a strings between character sets. + Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally. + dstcs and srccs cannot be &my_charset_bin. + */ + bool convert_fix(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, const char *src, size_t src_length, + String_copier *status); + + /* + Same as above, but additionally sends ER_INVALID_CHARACTER_STRING + in case of bad byte sequences or Unicode conversion problems. + */ + bool convert_with_error(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, + const char *src, size_t src_length); + /* + If either "dstcs" or "srccs" is &my_charset_bin, + then performs native copying using copy_fix(). + Otherwise, performs Unicode conversion using convert_fix(). + */ + bool copy_fix(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, const char *src, size_t src_length, + String_copier *status); + + /* + Same as above, but additionally sends ER_INVALID_CHARACTER_STRING + in case of bad byte sequences or Unicode conversion problems. + */ + bool copy_with_error(CHARSET_INFO *dstcs, LEX_STRING *dst, + CHARSET_INFO *srccs, const char *src, size_t src_length); + + bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs); + + /* + Check if the string is wellformed, raise an error if not wellformed. + @param str - The string to check. + @param length - the string length. + */ + bool check_string_for_wellformedness(const char *str, + size_t length, + CHARSET_INFO *cs) const; + + bool to_ident_sys_alloc(Lex_ident_sys_st *to, const Lex_ident_cli_st *from); + + /* + Create a string literal with optional client->connection conversion. + @param str - the string in the client character set + @param length - length of the string + @param repertoire - the repertoire of the string + */ + Item_basic_constant *make_string_literal(const char *str, size_t length, + my_repertoire_t repertoire); + Item_basic_constant *make_string_literal(const Lex_string_with_metadata_st &str) + { + my_repertoire_t repertoire= str.repertoire(variables.character_set_client); + return make_string_literal(str.str, str.length, repertoire); + } + Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str); + Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str, + CHARSET_INFO *cs); + bool make_text_string_sys(LEX_CSTRING *to, + const Lex_string_with_metadata_st *from) + { + return convert_string(to, system_charset_info, + from, charset(), charset_is_system_charset); + } + bool make_text_string_connection(LEX_CSTRING *to, + const Lex_string_with_metadata_st *from) + { + return convert_string(to, variables.collation_connection, + from, charset(), charset_is_collation_connection); + } + bool make_text_string_filesystem(LEX_CSTRING *to, + const Lex_string_with_metadata_st *from) + { + return convert_string(to, variables.character_set_filesystem, + from, charset(), charset_is_character_set_filesystem); + } + void add_changed_table(TABLE *table); + void add_changed_table(const char *key, size_t key_length); + CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length); + int prepare_explain_fields(select_result *result, List *field_list, + uint8 explain_flags, bool is_analyze); + int send_explain_fields(select_result *result, uint8 explain_flags, + bool is_analyze); + void make_explain_field_list(List &field_list, uint8 explain_flags, + bool is_analyze); + void make_explain_json_field_list(List &field_list, bool is_analyze); + + /** + Clear the current error, if any. + We do not clear is_fatal_error or is_fatal_sub_stmt_error since we + assume this is never called if the fatal error is set. + + @todo: To silence an error, one should use Internal_error_handler + mechanism. Issuing an error that can be possibly later "cleared" is not + compatible with other installed error handlers and audit plugins. + */ + inline void clear_error(bool clear_diagnostics= 0) + { + DBUG_ENTER("clear_error"); + if (get_stmt_da()->is_error() || clear_diagnostics) + get_stmt_da()->reset_diagnostics_area(); + is_slave_error= 0; + if (killed == KILL_BAD_DATA) + reset_killed(); + DBUG_VOID_RETURN; + } + +#ifndef EMBEDDED_LIBRARY + inline bool vio_ok() const { return net.vio != 0; } + /** Return FALSE if connection to client is broken. */ + bool is_connected() + { + /* + All system threads (e.g., the slave IO thread) are connected but + not using vio. So this function always returns true for all + system threads. + */ + return system_thread || (vio_ok() ? vio_is_connected(net.vio) : FALSE); + } +#else + inline bool vio_ok() const { return TRUE; } + inline bool is_connected() { return TRUE; } +#endif + + void my_ok_with_recreate_info(const Recreate_info &info, ulong warn_count); + /** + Mark the current error as fatal. Warning: this does not + set any error, it sets a property of the error, so must be + followed or prefixed with my_error(). + */ + inline void fatal_error() + { + DBUG_ASSERT(get_stmt_da()->is_error() || killed); + is_fatal_error= 1; + DBUG_PRINT("error",("Fatal error set")); + } + /** + TRUE if there is an error in the error stack. + + Please use this method instead of direct access to + net.report_error. + + If TRUE, the current (sub)-statement should be aborted. + The main difference between this member and is_fatal_error + is that a fatal error can not be handled by a stored + procedure continue handler, whereas a normal error can. + + To raise this flag, use my_error(). + */ + inline bool is_error() const { return m_stmt_da->is_error(); } + void set_bulk_execution(void *bulk) + { + bulk_param= bulk; + m_stmt_da->set_bulk_execution(MY_TEST(bulk)); + } + bool is_bulk_op() const { return MY_TEST(bulk_param); } + + /// Returns Diagnostics-area for the current statement. + Diagnostics_area *get_stmt_da() + { return m_stmt_da; } + + /// Returns Diagnostics-area for the current statement. + const Diagnostics_area *get_stmt_da() const + { return m_stmt_da; } + + /// Sets Diagnostics-area for the current statement. + void set_stmt_da(Diagnostics_area *da) + { m_stmt_da= da; } + + inline CHARSET_INFO *charset() const { return variables.character_set_client; } + void update_charset(); + void update_charset(CHARSET_INFO *character_set_client, + CHARSET_INFO *collation_connection) + { + variables.character_set_client= character_set_client; + variables.collation_connection= collation_connection; + update_charset(); + } + void update_charset(CHARSET_INFO *character_set_client, + CHARSET_INFO *collation_connection, + CHARSET_INFO *character_set_results) + { + variables.character_set_client= character_set_client; + variables.collation_connection= collation_connection; + variables.character_set_results= character_set_results; + update_charset(); + } + + inline Query_arena *activate_stmt_arena_if_needed(Query_arena *backup) + { + if (state == Query_arena::STMT_SP_QUERY_ARGUMENTS) + /* + Caller uses the arena with state STMT_SP_QUERY_ARGUMENTS for stored + routine's parameters. Lifetime of these objects spans a lifetime of + stored routine call and freed every time the stored routine execution + has been completed. That is the reason why switching to statement's + arena is not performed for arguments, else we would observe increasing + of memory usage while a stored routine be called over and over again. + */ + return NULL; + + /* + Use the persistent arena if we are in a prepared statement or a stored + procedure statement and we have not already changed to use this arena. + */ + if (!stmt_arena->is_conventional() && mem_root != stmt_arena->mem_root) + { + set_n_backup_active_arena(stmt_arena, backup); + return stmt_arena; + } + return 0; + } + + + bool is_item_tree_change_register_required() + { + return !stmt_arena->is_conventional(); + } + + void change_item_tree(Item **place, Item *new_value) + { + DBUG_ENTER("THD::change_item_tree"); + DBUG_PRINT("enter", ("Register: %p (%p) <- %p", + *place, place, new_value)); + /* TODO: check for OOM condition here */ + if (is_item_tree_change_register_required()) + nocheck_register_item_tree_change(place, *place, mem_root); + *place= new_value; + DBUG_VOID_RETURN; + } + /** + Make change in item tree after checking whether it needs registering + + + @param place place where we should assign new value + @param new_value place of the new value + + @details + see check_and_register_item_tree_change details + */ + void check_and_register_item_tree(Item **place, Item **new_value) + { + if (!stmt_arena->is_conventional()) + check_and_register_item_tree_change(place, new_value, mem_root); + /* + We have to use memcpy instead of *place= *new_value merge to + avoid problems with strict aliasing. + */ + memcpy((char*) place, new_value, sizeof(*new_value)); + } + + /* + Cleanup statement parse state (parse tree, lex) and execution + state after execution of a non-prepared SQL statement. + */ + void end_statement(); + + /* + Mark thread to be killed, with optional error number and string. + string is not released, so it has to be allocted on thd mem_root + or be a global string + + Ensure that we don't replace a kill with a lesser one. For example + if user has done 'kill_connection' we shouldn't replace it with + KILL_QUERY. + */ + inline void set_killed(killed_state killed_arg, + int killed_errno_arg= 0, + const char *killed_err_msg_arg= 0) + { + mysql_mutex_lock(&LOCK_thd_kill); + set_killed_no_mutex(killed_arg, killed_errno_arg, killed_err_msg_arg); + mysql_mutex_unlock(&LOCK_thd_kill); + } + /* + This is only used by THD::awake where we need to keep the lock mutex + locked over some time. + It's ok to have this inline, as in most cases killed_errno_arg will + be a constant 0 and most of the function will disappear. + */ + inline void set_killed_no_mutex(killed_state killed_arg, + int killed_errno_arg= 0, + const char *killed_err_msg_arg= 0) + { + if (killed <= killed_arg) + { + killed= killed_arg; + if (killed_errno_arg) + { + /* + If alloc fails, we only remember the killed flag. + The worst things that can happen is that we get + a suboptimal error message. + */ + killed_err= (err_info*) alloc_root(&main_mem_root, sizeof(*killed_err)); + if (likely(killed_err)) + { + killed_err->no= killed_errno_arg; + ::strmake((char*) killed_err->msg, killed_err_msg_arg, + sizeof(killed_err->msg)-1); + } + } + } + } + int killed_errno(); + void reset_killed(); + inline void reset_kill_query() + { + if (killed < KILL_CONNECTION) + { + reset_killed(); + mysys_var->abort= 0; + } + } + inline void send_kill_message() + { + mysql_mutex_lock(&LOCK_thd_kill); + int err= killed_errno(); + if (err) + my_message(err, killed_err ? killed_err->msg : ER_THD(this, err), MYF(0)); + mysql_mutex_unlock(&LOCK_thd_kill); + } + /* return TRUE if we will abort query if we make a warning now */ + inline bool really_abort_on_warning() + { + return (abort_on_warning && + (!transaction->stmt.modified_non_trans_table || + (variables.sql_mode & MODE_STRICT_ALL_TABLES))); + } + void set_status_var_init(); + void reset_n_backup_open_tables_state(Open_tables_backup *backup); + void restore_backup_open_tables_state(Open_tables_backup *backup); + void reset_sub_statement_state(Sub_statement_state *backup, uint new_state); + void restore_sub_statement_state(Sub_statement_state *backup); + void store_slow_query_state(Sub_statement_state *backup); + void reset_slow_query_state(); + void add_slow_query_state(Sub_statement_state *backup); + void set_n_backup_active_arena(Query_arena *set, Query_arena *backup); + void restore_active_arena(Query_arena *set, Query_arena *backup); + + inline void get_binlog_format(enum_binlog_format *format, + enum_binlog_format *current_format) + { + *format= (enum_binlog_format) variables.binlog_format; + *current_format= current_stmt_binlog_format; + } + inline enum_binlog_format get_current_stmt_binlog_format() + { + return current_stmt_binlog_format; + } + inline void set_binlog_format(enum_binlog_format format, + enum_binlog_format current_format) + { + DBUG_ENTER("set_binlog_format"); + variables.binlog_format= format; + current_stmt_binlog_format= current_format; + DBUG_VOID_RETURN; + } + inline void set_binlog_format_stmt() + { + DBUG_ENTER("set_binlog_format_stmt"); + variables.binlog_format= BINLOG_FORMAT_STMT; + current_stmt_binlog_format= BINLOG_FORMAT_STMT; + DBUG_VOID_RETURN; + } + /* + @todo Make these methods private or remove them completely. Only + decide_logging_format should call them. /Sven + */ + inline void set_current_stmt_binlog_format_row_if_mixed() + { + DBUG_ENTER("set_current_stmt_binlog_format_row_if_mixed"); + /* + This should only be called from decide_logging_format. + + @todo Once we have ensured this, uncomment the following + statement, remove the big comment below that, and remove the + in_sub_stmt==0 condition from the following 'if'. + */ + /* DBUG_ASSERT(in_sub_stmt == 0); */ + /* + If in a stored/function trigger, the caller should already have done the + change. We test in_sub_stmt to prevent introducing bugs where people + wouldn't ensure that, and would switch to row-based mode in the middle + of executing a stored function/trigger (which is too late, see also + reset_current_stmt_binlog_format_row()); this condition will make their + tests fail and so force them to propagate the + lex->binlog_row_based_if_mixed upwards to the caller. + */ + if ((wsrep_binlog_format(variables.binlog_format) == BINLOG_FORMAT_MIXED) && (in_sub_stmt == 0)) + set_current_stmt_binlog_format_row(); + + DBUG_VOID_RETURN; + } + + inline void set_current_stmt_binlog_format(enum_binlog_format format) + { + current_stmt_binlog_format= format; + } + + inline void set_current_stmt_binlog_format_row() + { + DBUG_ENTER("set_current_stmt_binlog_format_row"); + current_stmt_binlog_format= BINLOG_FORMAT_ROW; + DBUG_VOID_RETURN; + } + /* Set binlog format temporarily to statement. Returns old format */ + inline enum_binlog_format set_current_stmt_binlog_format_stmt() + { + enum_binlog_format orig_format= current_stmt_binlog_format; + DBUG_ENTER("set_current_stmt_binlog_format_stmt"); + current_stmt_binlog_format= BINLOG_FORMAT_STMT; + DBUG_RETURN(orig_format); + } + inline void restore_stmt_binlog_format(enum_binlog_format format) + { + DBUG_ENTER("restore_stmt_binlog_format"); + DBUG_ASSERT(!is_current_stmt_binlog_format_row()); + current_stmt_binlog_format= format; + DBUG_VOID_RETURN; + } + inline void reset_current_stmt_binlog_format_row() + { + DBUG_ENTER("reset_current_stmt_binlog_format_row"); + /* + If there are temporary tables, don't reset back to + statement-based. Indeed it could be that: + CREATE TEMPORARY TABLE t SELECT UUID(); # row-based + # and row-based does not store updates to temp tables + # in the binlog. + INSERT INTO u SELECT * FROM t; # stmt-based + and then the INSERT will fail as data inserted into t was not logged. + So we continue with row-based until the temp table is dropped. + If we are in a stored function or trigger, we mustn't reset in the + middle of its execution (as the binary logging way of a stored function + or trigger is decided when it starts executing, depending for example on + the caller (for a stored function: if caller is SELECT or + INSERT/UPDATE/DELETE...). + */ + DBUG_PRINT("debug", + ("temporary_tables: %s, in_sub_stmt: %s, system_thread: %s", + YESNO(has_temporary_tables()), YESNO(in_sub_stmt), + show_system_thread(system_thread))); + if (in_sub_stmt == 0) + { + if (wsrep_binlog_format(variables.binlog_format) == BINLOG_FORMAT_ROW) + set_current_stmt_binlog_format_row(); + else if (!has_temporary_tables()) + set_current_stmt_binlog_format_stmt(); + } + DBUG_VOID_RETURN; + } + + /** + Set the current database; use deep copy of C-string. + + @param new_db a pointer to the new database name. + @param new_db_len length of the new database name. + + Initialize the current database from a NULL-terminated string with + length. If we run out of memory, we free the current database and + return TRUE. This way the user will notice the error as there will be + no current database selected (in addition to the error message set by + malloc). + + @note This operation just sets {db, db_length}. Switching the current + database usually involves other actions, like switching other database + attributes including security context. In the future, this operation + will be made private and more convenient interface will be provided. + + @return Operation status + @retval FALSE Success + @retval TRUE Out-of-memory error + */ + bool set_db(const LEX_CSTRING *new_db); + + /** Set the current database, without copying */ + void reset_db(const LEX_CSTRING *new_db); + + /* + Copy the current database to the argument. Use the current arena to + allocate memory for a deep copy: current database may be freed after + a statement is parsed but before it's executed. + + Can only be called by owner of thd (no mutex protection) + */ + bool copy_db_to(LEX_CSTRING *to) + { + if (db.str == NULL) + { + /* + No default database is set. In this case if it's guaranteed that + no CTE can be used in the statement then we can throw an error right + now at the parser stage. Otherwise the decision about throwing such + a message must be postponed until a post-parser stage when we are able + to resolve all CTE names as we don't need this message to be thrown + for any CTE references. + */ + if (!lex->with_cte_resolution) + my_message(ER_NO_DB_ERROR, ER(ER_NO_DB_ERROR), MYF(0)); + return TRUE; + } + + to->str= strmake(db.str, db.length); + to->length= db.length; + return to->str == NULL; /* True on error */ + } + /* Get db name or "". Use for printing current db */ + const char *get_db() + { return safe_str(db.str); } + + thd_scheduler event_scheduler; + +public: + inline Internal_error_handler *get_internal_handler() + { return m_internal_handler; } + + /** + Add an internal error handler to the thread execution context. + @param handler the exception handler to add + */ + void push_internal_handler(Internal_error_handler *handler); + +private: + /** + Handle a sql condition. + @param sql_errno the condition error number + @param sqlstate the condition sqlstate + @param level the condition level + @param msg the condition message text + @param[out] cond_hdl the sql condition raised, if any + @return true if the condition is handled + */ + bool handle_condition(uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl); + +public: + /** + Remove the error handler last pushed. + */ + Internal_error_handler *pop_internal_handler(); + + /** + Raise an exception condition. + @param code the MYSQL_ERRNO error code of the error + */ + void raise_error(uint code); + + /** + Raise an exception condition, with a formatted message. + @param code the MYSQL_ERRNO error code of the error + */ + void raise_error_printf(uint code, ...); + + /** + Raise a completion condition (warning). + @param code the MYSQL_ERRNO error code of the warning + */ + void raise_warning(uint code); + + /** + Raise a completion condition (warning), with a formatted message. + @param code the MYSQL_ERRNO error code of the warning + */ + void raise_warning_printf(uint code, ...); + + /** + Raise a completion condition (note), with a fixed message. + @param code the MYSQL_ERRNO error code of the note + */ + void raise_note(uint code); + + /** + Raise an completion condition (note), with a formatted message. + @param code the MYSQL_ERRNO error code of the note + */ + void raise_note_printf(uint code, ...); + + /** + @brief Push an error message into MySQL error stack with line + and position information. + + This function provides semantic action implementers with a way + to push the famous "You have a syntax error near..." error + message into the error stack, which is normally produced only if + a parse error is discovered internally by the Bison generated + parser. + */ + void parse_error(const char *err_text, const char *yytext) + { + Lex_input_stream *lip= &m_parser_state->m_lip; + if (!yytext && !(yytext= lip->get_tok_start())) + yytext= ""; + /* Push an error into the error stack */ + ErrConvString err(yytext, strlen(yytext), variables.character_set_client); + my_printf_error(ER_PARSE_ERROR, ER_THD(this, ER_PARSE_ERROR), MYF(0), + err_text, err.ptr(), lip->yylineno); + } + void parse_error(uint err_number, const char *yytext= 0) + { + parse_error(ER_THD(this, err_number), yytext); + } + void parse_error() + { + parse_error(ER_SYNTAX_ERROR); + } +#ifdef mysqld_error_find_printf_error_used + void parse_error(const char *t) + { + } +#endif +private: + /* + Only the implementation of the SIGNAL and RESIGNAL statements + is permitted to raise SQL conditions in a generic way, + or to raise them by bypassing handlers (RESIGNAL). + To raise a SQL condition, the code should use the public + raise_error() or raise_warning() methods provided by class THD. + */ + friend class Sql_cmd_common_signal; + friend class Sql_cmd_signal; + friend class Sql_cmd_resignal; + friend void push_warning(THD*, Sql_condition::enum_warning_level, uint, const char*); + friend void my_message_sql(uint, const char *, myf); + + /** + Raise a generic SQL condition. + @param sql_errno the condition error number + @param sqlstate the condition SQLSTATE + @param level the condition level + @param msg the condition message text + @return The condition raised, or NULL + */ + Sql_condition* raise_condition(uint sql_errno, const char* sqlstate, + Sql_condition::enum_warning_level level, const char* msg) + { + Sql_condition cond(NULL, // don't strdup the msg + Sql_condition_identity(sql_errno, sqlstate, level, + Sql_user_condition_identity()), + msg, get_stmt_da()->current_row_for_warning()); + return raise_condition(&cond); + } + + Sql_condition* raise_condition(const Sql_condition *cond); + +private: + void push_warning_truncated_priv(Sql_condition::enum_warning_level level, + uint sql_errno, + const char *type_str, const char *val) + { + DBUG_ASSERT(sql_errno == ER_TRUNCATED_WRONG_VALUE || + sql_errno == ER_WRONG_VALUE); + char buff[MYSQL_ERRMSG_SIZE]; + CHARSET_INFO *cs= &my_charset_latin1; + cs->cset->snprintf(cs, buff, sizeof(buff), + ER_THD(this, sql_errno), type_str, val); + /* + Note: the format string can vary between ER_TRUNCATED_WRONG_VALUE + and ER_WRONG_VALUE, but the code passed to push_warning() is + always ER_TRUNCATED_WRONG_VALUE. This is intentional. + */ + push_warning(this, level, ER_TRUNCATED_WRONG_VALUE, buff); + } +public: + void push_warning_truncated_wrong_value(Sql_condition::enum_warning_level level, + const char *type_str, const char *val) + { + return push_warning_truncated_priv(level, ER_TRUNCATED_WRONG_VALUE, + type_str, val); + } + void push_warning_wrong_value(Sql_condition::enum_warning_level level, + const char *type_str, const char *val) + { + return push_warning_truncated_priv(level, ER_WRONG_VALUE, type_str, val); + } + void push_warning_truncated_wrong_value(const char *type_str, const char *val) + { + return push_warning_truncated_wrong_value(Sql_condition::WARN_LEVEL_WARN, + type_str, val); + } + void push_warning_truncated_value_for_field(Sql_condition::enum_warning_level + level, const char *type_str, + const char *val, + const char *db_name, + const char *table_name, + const char *name) + { + DBUG_ASSERT(name); + char buff[MYSQL_ERRMSG_SIZE]; + CHARSET_INFO *cs= &my_charset_latin1; + + if (!db_name) + db_name= ""; + if (!table_name) + table_name= ""; + cs->cset->snprintf(cs, buff, sizeof(buff), + ER_THD(this, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), + type_str, val, db_name, table_name, name, + (ulong) get_stmt_da()->current_row_for_warning()); + push_warning(this, level, ER_TRUNCATED_WRONG_VALUE, buff); + + } + void push_warning_wrong_or_truncated_value(Sql_condition::enum_warning_level level, + bool totally_useless_value, + const char *type_str, + const char *val, + const char *db_name, + const char *table_name, + const char *field_name) + { + if (field_name) + push_warning_truncated_value_for_field(level, type_str, val, + db_name, table_name, field_name); + else if (totally_useless_value) + push_warning_wrong_value(level, type_str, val); + else + push_warning_truncated_wrong_value(level, type_str, val); + } + +public: + /** Overloaded to guard query/query_length fields */ + virtual void set_statement(Statement *stmt); + void set_command(enum enum_server_command command) + { + m_command= command; +#ifdef HAVE_PSI_THREAD_INTERFACE + PSI_STATEMENT_CALL(set_thread_command)(m_command); +#endif + } + inline enum enum_server_command get_command() const + { return m_command; } + + /** + Assign a new value to thd->query and thd->query_id and mysys_var. + Protected with LOCK_thd_data mutex. + */ + void set_query(char *query_arg, size_t query_length_arg, + CHARSET_INFO *cs_arg) + { + set_query(CSET_STRING(query_arg, query_length_arg, cs_arg)); + } + void set_query(char *query_arg, size_t query_length_arg) /*Mutex protected*/ + { + set_query(CSET_STRING(query_arg, query_length_arg, charset())); + } + void set_query(const CSET_STRING &string_arg) + { + mysql_mutex_lock(&LOCK_thd_data); + set_query_inner(string_arg); + mysql_mutex_unlock(&LOCK_thd_data); + + PSI_CALL_set_thread_info(query(), query_length()); + } + void reset_query() /* Mutex protected */ + { set_query(CSET_STRING()); } + void set_query_and_id(char *query_arg, uint32 query_length_arg, + CHARSET_INFO *cs, query_id_t new_query_id); + void set_query_id(query_id_t new_query_id) + { + query_id= new_query_id; +#ifdef WITH_WSREP + if (WSREP_NNULL(this)) + { + set_wsrep_next_trx_id(query_id); + WSREP_DEBUG("assigned new next trx id: %" PRIu64, wsrep_next_trx_id()); + } +#endif /* WITH_WSREP */ + } + void set_open_tables(TABLE *open_tables_arg) + { + mysql_mutex_lock(&LOCK_thd_data); + open_tables= open_tables_arg; + mysql_mutex_unlock(&LOCK_thd_data); + } + void set_mysys_var(struct st_my_thread_var *new_mysys_var); + void enter_locked_tables_mode(enum_locked_tables_mode mode_arg) + { + DBUG_ASSERT(locked_tables_mode == LTM_NONE); + + if (mode_arg == LTM_LOCK_TABLES) + { + /* + When entering LOCK TABLES mode we should set explicit duration + for all metadata locks acquired so far in order to avoid releasing + them till UNLOCK TABLES statement. + We don't do this when entering prelocked mode since sub-statements + don't release metadata locks and restoring status-quo after leaving + prelocking mode gets complicated. + */ + mdl_context.set_explicit_duration_for_all_locks(); + } + + locked_tables_mode= mode_arg; + } + void leave_locked_tables_mode(); + /* Relesae transactional locks if there are no active transactions */ + void release_transactional_locks() + { + if (!in_active_multi_stmt_transaction()) + mdl_context.release_transactional_locks(this); + } + int decide_logging_format(TABLE_LIST *tables); + + /* + In Some cases when decide_logging_format is called it does not have + all information to decide the logging format. So that cases we call + decide_logging_format_2 at later stages in execution. + + One example would be binlog format for insert on duplicate key + (IODKU) but column with unique key is not inserted. We do not have + inserted columns info when we call decide_logging_format so on + later stage we call reconsider_logging_format_for_iodup() + */ + void reconsider_logging_format_for_iodup(TABLE *table); + + enum need_invoker { INVOKER_NONE=0, INVOKER_USER, INVOKER_ROLE}; + void binlog_invoker(bool role) { m_binlog_invoker= role ? INVOKER_ROLE : INVOKER_USER; } + enum need_invoker need_binlog_invoker() { return m_binlog_invoker; } + void get_definer(LEX_USER *definer, bool role); + void set_invoker(const LEX_CSTRING *user, const LEX_CSTRING *host) + { + invoker.user= *user; + invoker.host= *host; + } + LEX_CSTRING get_invoker_user() { return invoker.user; } + LEX_CSTRING get_invoker_host() { return invoker.host; } + bool has_invoker() { return invoker.user.length > 0; } + + void print_aborted_warning(uint threshold, const char *reason) + { + if (global_system_variables.log_warnings > threshold) + { + Security_context *sctx= &main_security_ctx; + sql_print_warning(ER_THD(this, ER_NEW_ABORTING_CONNECTION), + thread_id, (db.str ? db.str : "unconnected"), + sctx->user ? sctx->user : "unauthenticated", + sctx->host_or_ip, reason); + } + } + +public: + void clear_wakeup_ready() { wakeup_ready= false; } + /* + Sleep waiting for others to wake us up with signal_wakeup_ready(). + Must call clear_wakeup_ready() before waiting. + */ + void wait_for_wakeup_ready(); + /* Wake this thread up from wait_for_wakeup_ready(). */ + void signal_wakeup_ready(); + + void add_status_to_global() + { + DBUG_ASSERT(status_in_global == 0); + mysql_mutex_lock(&LOCK_status); + add_to_status(&global_status_var, &status_var); + /* Mark that this THD status has already been added in global status */ + status_var.global_memory_used= 0; + status_in_global= 1; + mysql_mutex_unlock(&LOCK_status); + } + + wait_for_commit *wait_for_commit_ptr; + int wait_for_prior_commit(bool allow_kill=true) + { + if (wait_for_commit_ptr) + return wait_for_commit_ptr->wait_for_prior_commit(this, allow_kill); + return 0; + } + void wakeup_subsequent_commits(int wakeup_error) + { + if (wait_for_commit_ptr) + wait_for_commit_ptr->wakeup_subsequent_commits(wakeup_error); + } + wait_for_commit *suspend_subsequent_commits() { + wait_for_commit *suspended= wait_for_commit_ptr; + wait_for_commit_ptr= NULL; + return suspended; + } + void resume_subsequent_commits(wait_for_commit *suspended) { + DBUG_ASSERT(!wait_for_commit_ptr); + wait_for_commit_ptr= suspended; + } + + void mark_transaction_to_rollback(bool all); + bool internal_transaction() { return transaction != &default_transaction; } +private: + + /** The current internal error handler for this thread, or NULL. */ + Internal_error_handler *m_internal_handler; + + /** + The lex to hold the parsed tree of conventional (non-prepared) queries. + Whereas for prepared and stored procedure statements we use an own lex + instance for each new query, for conventional statements we reuse + the same lex. (@see mysql_parse for details). + */ + LEX main_lex; + /** + This memory root is used for two purposes: + - for conventional queries, to allocate structures stored in main_lex + during parsing, and allocate runtime data (execution plan, etc.) + during execution. + - for prepared queries, only to allocate runtime data. The parsed + tree itself is reused between executions and thus is stored elsewhere. + */ + MEM_ROOT main_mem_root; + Diagnostics_area main_da; + Diagnostics_area *m_stmt_da; + + /** + It will be set if CURRENT_USER() or CURRENT_ROLE() is called in account + management statements or default definer is set in CREATE/ALTER SP, SF, + Event, TRIGGER or VIEW statements. + + Current user or role will be binlogged into Query_log_event if + m_binlog_invoker is not NONE; It will be stored into invoker_host and + invoker_user by SQL thread. + */ + enum need_invoker m_binlog_invoker; + + /** + It points to the invoker in the Query_log_event. + SQL thread use it as the default definer in CREATE/ALTER SP, SF, Event, + TRIGGER or VIEW statements or current user in account management + statements if it is not NULL. + */ + AUTHID invoker; + +public: + Session_tracker session_tracker; + /* + Flag, mutex and condition for a thread to wait for a signal from another + thread. + + Currently used to wait for group commit to complete, can also be used for + other purposes. + */ + bool wakeup_ready; + mysql_mutex_t LOCK_wakeup_ready; + mysql_cond_t COND_wakeup_ready; + /* + The GTID assigned to the last commit. If no GTID was assigned to any commit + so far, this is indicated by last_commit_gtid.seq_no == 0. + */ +private: + rpl_gtid m_last_commit_gtid; + +public: + rpl_gtid get_last_commit_gtid() { return m_last_commit_gtid; } + void set_last_commit_gtid(rpl_gtid >id); + + + LF_PINS *tdc_hash_pins; + LF_PINS *xid_hash_pins; + bool fix_xid_hash_pins(); + + const XID *get_xid() const + { +#ifdef WITH_WSREP + if (!wsrep_xid.is_null()) + return &wsrep_xid; +#endif /* WITH_WSREP */ + return (transaction->xid_state.is_explicit_XA() ? + transaction->xid_state.get_xid() : + &transaction->implicit_xid); + } + +/* Members related to temporary tables. */ +public: + /* Opened table states. */ + enum Temporary_table_state { + TMP_TABLE_IN_USE, + TMP_TABLE_NOT_IN_USE, + TMP_TABLE_ANY + }; + bool has_thd_temporary_tables(); + bool has_temporary_tables(); + + TABLE *create_and_open_tmp_table(LEX_CUSTRING *frm, + const char *path, + const char *db, + const char *table_name, + bool open_internal_tables); + + TABLE *find_temporary_table(const char *db, const char *table_name, + Temporary_table_state state= TMP_TABLE_IN_USE); + TABLE *find_temporary_table(const TABLE_LIST *tl, + Temporary_table_state state= TMP_TABLE_IN_USE); + + TMP_TABLE_SHARE *find_tmp_table_share_w_base_key(const char *key, + uint key_length); + TMP_TABLE_SHARE *find_tmp_table_share(const char *db, + const char *table_name); + TMP_TABLE_SHARE *find_tmp_table_share(const TABLE_LIST *tl); + TMP_TABLE_SHARE *find_tmp_table_share(const char *key, size_t key_length); + + bool open_temporary_table(TABLE_LIST *tl); + bool open_temporary_tables(TABLE_LIST *tl); + + bool close_temporary_tables(); + bool rename_temporary_table(TABLE *table, const LEX_CSTRING *db, + const LEX_CSTRING *table_name); + bool drop_temporary_table(TABLE *table, bool *is_trans, bool delete_table); + bool rm_temporary_table(handlerton *hton, const char *path); + void mark_tmp_tables_as_free_for_reuse(); + void mark_tmp_table_as_free_for_reuse(TABLE *table); + + TMP_TABLE_SHARE* save_tmp_table_share(TABLE *table); + void restore_tmp_table_share(TMP_TABLE_SHARE *share); + void close_unused_temporary_table_instances(const TABLE_LIST *tl); + +private: + /* Whether a lock has been acquired? */ + bool m_tmp_tables_locked; + + uint create_tmp_table_def_key(char *key, const char *db, + const char *table_name); + TMP_TABLE_SHARE *create_temporary_table(LEX_CUSTRING *frm, + const char *path, const char *db, + const char *table_name); + TABLE *find_temporary_table(const char *key, uint key_length, + Temporary_table_state state); + TABLE *open_temporary_table(TMP_TABLE_SHARE *share, const char *alias); + bool find_and_use_tmp_table(const TABLE_LIST *tl, TABLE **out_table); + bool use_temporary_table(TABLE *table, TABLE **out_table); + void close_temporary_table(TABLE *table); + bool log_events_and_free_tmp_shares(); + bool free_tmp_table_share(TMP_TABLE_SHARE *share, bool delete_table); + void free_temporary_table(TABLE *table); + bool lock_temporary_tables(); + void unlock_temporary_tables(); + + inline uint tmpkeyval(TMP_TABLE_SHARE *share) + { + return uint4korr(share->table_cache_key.str + + share->table_cache_key.length - 4); + } + + inline TMP_TABLE_SHARE *tmp_table_share(TABLE *table) + { + DBUG_ASSERT(table->s->tmp_table); + return static_cast(table->s); + } + +public: + thd_async_state async_state; +#ifdef HAVE_REPLICATION + /* + If we do a purge of binary logs, log index info of the threads + that are currently reading it needs to be adjusted. To do that + each thread that is using LOG_INFO needs to adjust the pointer to it + */ + LOG_INFO *current_linfo; + Slave_info *slave_info; + + void set_current_linfo(LOG_INFO *linfo); + void reset_current_linfo() { set_current_linfo(0); } + + int register_slave(uchar *packet, size_t packet_length); + void unregister_slave(); + bool is_binlog_dump_thread(); +#endif + + /* + Indicates if this thread is suspended due to awaiting an ACK from a + replica. True if suspended, false otherwise. + + Note that this variable is protected by Repl_semi_sync_master::LOCK_binlog + */ + bool is_awaiting_semisync_ack; + + inline ulong wsrep_binlog_format(ulong binlog_format) const + { +#ifdef WITH_WSREP + // During CTAS we force ROW format + if (wsrep_ctas) + return BINLOG_FORMAT_ROW; + else + return ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? + wsrep_forced_binlog_format : binlog_format); +#else + return (binlog_format); +#endif + } + +#ifdef WITH_WSREP + bool wsrep_applier; /* dedicated slave applier thread */ + bool wsrep_applier_closing; /* applier marked to close */ + bool wsrep_client_thread; /* to identify client threads*/ + query_id_t wsrep_last_query_id; + XID wsrep_xid; + + /** This flag denotes that record locking should be skipped during INSERT + and gap locking during SELECT. Only used by the streaming replication thread + that only modifies the wsrep_schema.SR table. */ + my_bool wsrep_skip_locking; + + mysql_cond_t COND_wsrep_thd; + + // changed from wsrep_seqno_t to wsrep_trx_meta_t in wsrep API rev 75 + uint32 wsrep_rand; + rpl_group_info *wsrep_rgi; + bool wsrep_converted_lock_session; + char wsrep_info[128]; /* string for dynamic proc info */ + ulong wsrep_retry_counter; // of autocommit + bool wsrep_PA_safe; + char* wsrep_retry_query; + size_t wsrep_retry_query_len; + enum enum_server_command wsrep_retry_command; + enum wsrep_consistency_check_mode + wsrep_consistency_check; + std::vector wsrep_status_vars; + int wsrep_mysql_replicated; + const char* wsrep_TOI_pre_query; /* a query to apply before + the actual TOI query */ + size_t wsrep_TOI_pre_query_len; + wsrep_po_handle_t wsrep_po_handle; + size_t wsrep_po_cnt; + void *wsrep_apply_format; + uchar* wsrep_rbr_buf; + wsrep_gtid_t wsrep_sync_wait_gtid; + uint64 wsrep_last_written_gtid_seqno; + uint64 wsrep_current_gtid_seqno; + ulong wsrep_affected_rows; + bool wsrep_has_ignored_error; + /* true if wsrep_on was ON in last wsrep_on_update */ + bool wsrep_was_on; + + /* + When enabled, do not replicate/binlog updates from the current table that's + being processed. At the moment, it is used to keep mysql.gtid_slave_pos + table updates from being replicated to other nodes via galera replication. + */ + bool wsrep_ignore_table; + /* thread who has started kill for this THD protected by LOCK_thd_data*/ + my_thread_id wsrep_aborter; + /* Kill signal used, if thread was killed by manual KILL. Protected by + LOCK_thd_kill. */ + std::atomic wsrep_abort_by_kill; + /* */ + struct err_info* wsrep_abort_by_kill_err; +#ifndef DBUG_OFF + int wsrep_killed_state; +#endif /* DBUG_OFF */ + /* true if BF abort is observed in do_command() right after reading + client's packet, and if the client has sent PS execute command. */ + bool wsrep_delayed_BF_abort; + // true if this transaction is CREATE TABLE AS SELECT (CTAS) + bool wsrep_ctas; + /* + Transaction id: + * m_wsrep_next_trx_id is assigned on the first query after + wsrep_next_trx_id() return WSREP_UNDEFINED_TRX_ID + * Each storage engine must assign value of wsrep_next_trx_id() + when the transaction starts. + * Effective transaction id is returned via wsrep_trx_id() + */ + /* + Return effective transaction id + */ + wsrep_trx_id_t wsrep_trx_id() const + { + return m_wsrep_client_state.transaction().id().get(); + } + + + /* + Set next trx id + */ + void set_wsrep_next_trx_id(query_id_t query_id) + { + m_wsrep_next_trx_id = (wsrep_trx_id_t) query_id; + } + /* + Return next trx id + */ + wsrep_trx_id_t wsrep_next_trx_id() const + { + return m_wsrep_next_trx_id; + } + /* + If node is async slave and have parallel execution, wait for prior commits. + */ + bool wsrep_parallel_slave_wait_for_prior_commit(); +private: + wsrep_trx_id_t m_wsrep_next_trx_id; /* cast from query_id_t */ + /* wsrep-lib */ + Wsrep_mutex m_wsrep_mutex; + Wsrep_condition_variable m_wsrep_cond; + Wsrep_client_service m_wsrep_client_service; + Wsrep_client_state m_wsrep_client_state; + +public: + Wsrep_client_state& wsrep_cs() { return m_wsrep_client_state; } + const Wsrep_client_state& wsrep_cs() const { return m_wsrep_client_state; } + const wsrep::transaction& wsrep_trx() const + { return m_wsrep_client_state.transaction(); } + const wsrep::streaming_context& wsrep_sr() const + { return m_wsrep_client_state.transaction().streaming_context(); } + /* Pointer to applier service for streaming THDs. This is needed to + be able to delete applier service object in case of background + rollback. */ + Wsrep_applier_service* wsrep_applier_service; + /* wait_for_commit struct for binlog group commit */ + wait_for_commit wsrep_wfc; +#endif /* WITH_WSREP */ + + /* Handling of timeouts for commands */ + thr_timer_t query_timer; + +public: + void set_query_timer() + { +#ifndef EMBEDDED_LIBRARY + /* + Slave vs user threads have timeouts configured via different variables, + so pick the appropriate one to use. + */ + ulonglong timeout_val= + slave_thread ? slave_max_statement_time : variables.max_statement_time; + + /* + Don't start a query timer if + - If timeouts are not set + - if we are in a stored procedure or sub statement + - If we already have set a timeout (happens when running prepared + statements that calls mysql_execute_command()) + */ + if (!timeout_val || spcont || in_sub_stmt || query_timer.expired == 0) + return; + thr_timer_settime(&query_timer, timeout_val); +#endif + } + void reset_query_timer() + { +#ifndef EMBEDDED_LIBRARY + if (spcont || in_sub_stmt) + return; + if (!query_timer.expired) + thr_timer_end(&query_timer); +#endif + } + bool restore_set_statement_var() + { + return main_lex.restore_set_statement_var(); + } + /* Copy relevant `stmt` transaction flags to `all` transaction. */ + void merge_unsafe_rollback_flags() + { + if (transaction->stmt.modified_non_trans_table) + transaction->all.modified_non_trans_table= TRUE; + transaction->all.m_unsafe_rollback_flags|= + (transaction->stmt.m_unsafe_rollback_flags & + (THD_TRANS::MODIFIED_NON_TRANS_TABLE | + THD_TRANS::DID_WAIT | THD_TRANS::CREATED_TEMP_TABLE | + THD_TRANS::DROPPED_TEMP_TABLE | THD_TRANS::DID_DDL | + THD_TRANS::EXECUTED_TABLE_ADMIN_CMD)); + } + + uint get_net_wait_timeout() + { + if (in_active_multi_stmt_transaction()) + { + if (transaction->all.is_trx_read_write()) + { + if (variables.idle_write_transaction_timeout > 0) + return variables.idle_write_transaction_timeout; + } + else + { + if (variables.idle_readonly_transaction_timeout > 0) + return variables.idle_readonly_transaction_timeout; + } + + if (variables.idle_transaction_timeout > 0) + return variables.idle_transaction_timeout; + } + + return variables.net_wait_timeout; + } + + /** + Switch to a sublex, to parse a substatement or an expression. + */ + void set_local_lex(sp_lex_local *sublex) + { + DBUG_ASSERT(lex->sphead); + lex= sublex; + /* Reset part of parser state which needs this. */ + m_parser_state->m_yacc.reset_before_substatement(); + } + + /** + Switch back from a sublex (currently pointed by this->lex) to the old lex. + Sublex is merged to "oldlex" and this->lex is set to "oldlex". + + This method is called after parsing a substatement or an expression. + set_local_lex() must be previously called. + @param oldlex - The old lex which was active before set_local_lex(). + @returns - false on success, true on error (failed to merge LEX's). + + See also sp_head::merge_lex(). + */ + bool restore_from_local_lex_to_old_lex(LEX *oldlex); + + Item *sp_fix_func_item(Item **it_addr); + Item *sp_fix_func_item_for_assignment(const Field *to, Item **it_addr); + Item *sp_prepare_func_item(Item **it_addr, uint cols); + bool sp_eval_expr(Field *result_field, Item **expr_item_ptr); + + bool sql_parser(LEX *old_lex, LEX *lex, + char *str, uint str_len, bool stmt_prepare_mode); + + myf get_utf8_flag() const + { + return (variables.old_behavior & OLD_MODE_UTF8_IS_UTF8MB3 ? + MY_UTF8_IS_UTF8MB3 : 0); + } + + Charset_collation_context + charset_collation_context_create_db() const + { + return Charset_collation_context(variables.collation_server, + variables.collation_server); + } + Charset_collation_context + charset_collation_context_alter_db(const char *db); + Charset_collation_context + charset_collation_context_create_table_in_db(const char *db); + Charset_collation_context + charset_collation_context_alter_table(const TABLE_SHARE *s); + + /** + Save current lex to the output parameter and reset it to point to + main_lex. This method is called from mysql_client_binlog_statement() + to temporary + + @param[out] backup_lex original value of current lex + */ + + void backup_and_reset_current_lex(LEX **backup_lex) + { + *backup_lex= lex; + lex= &main_lex; + } + + + /** + Restore current lex to its original value it had before calling the method + backup_and_reset_current_lex(). + + @param backup_lex original value of current lex + */ + + void restore_current_lex(LEX *backup_lex) + { + lex= backup_lex; + } + + bool should_collect_handler_stats() const + { + return (variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_ENGINE) || + lex->analyze_stmt; + } + + /* Return true if we should create a note when an unusable key is found */ + bool give_notes_for_unusable_keys() + { + return ((variables.note_verbosity & (NOTE_VERBOSITY_UNUSABLE_KEYS)) || + (lex->describe && // Is EXPLAIN + (variables.note_verbosity & NOTE_VERBOSITY_EXPLAIN))); + } + + bool vers_insert_history_fast(const TABLE *table) + { + DBUG_ASSERT(table->versioned()); + return table->versioned(VERS_TIMESTAMP) && + (variables.option_bits & OPTION_INSERT_HISTORY) && + lex->duplicates == DUP_ERROR; + } + + bool vers_insert_history(const Field *field) + { + if (!field->vers_sys_field()) + return false; + if (!vers_insert_history_fast(field->table)) + return false; + if (lex->sql_command != SQLCOM_INSERT && + lex->sql_command != SQLCOM_INSERT_SELECT && + lex->sql_command != SQLCOM_LOAD) + return false; + return !is_set_timestamp_forbidden(this); + } +}; + + +/* + Start a new independent transaction for the THD. + The old one is stored in this object and restored when calling + restore_old_transaction() or when the object is freed +*/ + +class start_new_trans +{ + /* container for handler's private per-connection data */ + Ha_data old_ha_data[MAX_HA]; + struct THD::st_transactions *old_transaction, new_transaction; + Open_tables_backup open_tables_state_backup; + MDL_savepoint mdl_savepoint; + PSI_transaction_locker *m_transaction_psi; + THD *org_thd; + uint in_sub_stmt; + uint server_status; + my_bool wsrep_on; + +public: + start_new_trans(THD *thd); + ~start_new_trans() + { + destroy(); + } + void destroy() + { + if (org_thd) // Safety + restore_old_transaction(); + new_transaction.free(); + } + void restore_old_transaction(); +}; + +/** A short cut for thd->get_stmt_da()->set_ok_status(). */ + +inline void +my_ok(THD *thd, ulonglong affected_rows_arg= 0, ulonglong id= 0, + const char *message= NULL) +{ + thd->set_row_count_func(affected_rows_arg); + thd->set_affected_rows(affected_rows_arg); + thd->get_stmt_da()->set_ok_status(affected_rows_arg, id, message); +} + + +/** A short cut for thd->get_stmt_da()->set_eof_status(). */ + +inline void +my_eof(THD *thd) +{ + thd->set_row_count_func(-1); + thd->get_stmt_da()->set_eof_status(thd); + + TRANSACT_TRACKER(add_trx_state(thd, TX_RESULT_SET)); +} + +#define tmp_disable_binlog(A) \ + {ulonglong tmp_disable_binlog__save_options= (A)->variables.option_bits; \ + (A)->variables.option_bits&= ~OPTION_BIN_LOG; \ + (A)->variables.option_bits|= OPTION_BIN_TMP_LOG_OFF; + +#define reenable_binlog(A) \ + (A)->variables.option_bits= tmp_disable_binlog__save_options; } + + +inline date_conv_mode_t sql_mode_for_dates(THD *thd) +{ + static_assert((ulonglong(date_conv_mode_t::KNOWN_MODES) & + ulonglong(time_round_mode_t::KNOWN_MODES)) == 0, + "date_conv_mode_t and time_round_mode_t must use different " + "bit values"); + static_assert(MODE_NO_ZERO_DATE == date_mode_t::NO_ZERO_DATE && + MODE_NO_ZERO_IN_DATE == date_mode_t::NO_ZERO_IN_DATE && + MODE_INVALID_DATES == date_mode_t::INVALID_DATES, + "sql_mode_t and date_mode_t values must be equal"); + return date_conv_mode_t(thd->variables.sql_mode & + (MODE_NO_ZERO_DATE | MODE_NO_ZERO_IN_DATE | MODE_INVALID_DATES)); +} + +/* + Used to hold information about file and file structure in exchange + via non-DB file (...INTO OUTFILE..., ...LOAD DATA...) + XXX: We never call destructor for objects of this class. +*/ + +class sql_exchange :public Sql_alloc +{ +public: + enum enum_filetype filetype; /* load XML, Added by Arnold & Erik */ + const char *file_name; + String *field_term,*enclosed,*line_term,*line_start,*escaped; + bool opt_enclosed; + bool dumpfile; + ulong skip_lines; + CHARSET_INFO *cs; + sql_exchange(const char *name, bool dumpfile_flag, + enum_filetype filetype_arg= FILETYPE_CSV); + bool escaped_given(void) const; +}; + +/* + This is used to get result from a select +*/ + +class JOIN; + +/* Pure interface for sending tabular data */ +class select_result_sink: public Sql_alloc +{ +public: + THD *thd; + select_result_sink(THD *thd_arg): thd(thd_arg) {} + inline int send_data_with_check(List &items, + SELECT_LEX_UNIT *u, + ha_rows sent) + { + if (u->lim.check_offset(sent)) + return 0; + + if (u->thd->killed == ABORT_QUERY) + return 0; + + return send_data(items); + } + /* + send_data returns 0 on ok, 1 on error and -1 if data was ignored, for + example for a duplicate row entry written to a temp table. + */ + virtual int send_data(List &items)=0; + virtual ~select_result_sink() = default; + void reset(THD *thd_arg) { thd= thd_arg; } +}; + +class select_result_interceptor; + +/* + Interface for sending tabular data, together with some other stuff: + + - Primary purpose seems to be seding typed tabular data: + = the DDL is sent with send_fields() + = the rows are sent with send_data() + Besides that, + - there seems to be an assumption that the sent data is a result of + SELECT_LEX_UNIT *unit, + - nest_level is used by SQL parser +*/ + +class select_result :public select_result_sink +{ +protected: + /* + All descendant classes have their send_data() skip the first + unit->offset_limit_cnt rows sent. Select_materialize + also uses unit->get_column_types(). + */ + SELECT_LEX_UNIT *unit; + /* Something used only by the parser: */ +public: + ha_rows est_records; /* estimated number of records in the result */ + select_result(THD *thd_arg): select_result_sink(thd_arg), est_records(0) {} + void set_unit(SELECT_LEX_UNIT *unit_arg) { unit= unit_arg; } + virtual ~select_result() = default; + /** + Change wrapped select_result. + + Replace the wrapped result object with new_result and call + prepare() and prepare2() on new_result. + + This base class implementation doesn't wrap other select_results. + + @param new_result The new result object to wrap around + + @retval false Success + @retval true Error + */ + virtual bool change_result(select_result *new_result) + { + return false; + } + virtual int prepare(List &list, SELECT_LEX_UNIT *u) + { + unit= u; + return 0; + } + virtual int prepare2(JOIN *join) { return 0; } + /* + Because of peculiarities of prepared statements protocol + we need to know number of columns in the result set (if + there is a result set) apart from sending columns metadata. + */ + virtual uint field_count(List &fields) const + { return fields.elements; } + virtual bool send_result_set_metadata(List &list, uint flags)=0; + virtual bool initialize_tables (JOIN *join) { return 0; } + virtual bool send_eof()=0; + /** + Check if this query returns a result set and therefore is allowed in + cursors and set an error message if it is not the case. + + @retval FALSE success + @retval TRUE error, an error message is set + */ + virtual bool check_simple_select() const; + virtual void abort_result_set() {} + /* + Cleanup instance of this class for next execution of a prepared + statement/stored procedure. + */ + virtual void cleanup(); + void set_thd(THD *thd_arg) { thd= thd_arg; } + void reset(THD *thd_arg) + { + select_result_sink::reset(thd_arg); + unit= NULL; + } +#ifdef EMBEDDED_LIBRARY + virtual void begin_dataset() {} +#else + void begin_dataset() {} +#endif + virtual void update_used_tables() {} + + /* this method is called just before the first row of the table can be read */ + virtual void prepare_to_read_rows() {} + + void remove_offset_limit() + { + unit->lim.remove_offset(); + } + + /* + This returns + - NULL if the class sends output row to the client + - this if the output is set elsewhere (a file, @variable, or table). + */ + virtual select_result_interceptor *result_interceptor()=0; + + /* + This method is used to distinguish an normal SELECT from the cursor + structure discovery for cursor%ROWTYPE routine variables. + If this method returns "true", then a SELECT execution performs only + all preparation stages, but does not fetch any rows. + */ + virtual bool view_structure_only() const { return false; } +}; + + +/* + This is a select_result_sink which simply writes all data into a (temporary) + table. Creation/deletion of the table is outside of the scope of the class + + It is aimed at capturing SHOW EXPLAIN output, so: + - Unlike select_result class, we don't assume that the sent data is an + output of a SELECT_LEX_UNIT (and so we don't apply "LIMIT x,y" from the + unit) + - We don't try to convert the target table to MyISAM +*/ + +class select_result_explain_buffer : public select_result_sink +{ +public: + select_result_explain_buffer(THD *thd_arg, TABLE *table_arg) : + select_result_sink(thd_arg), dst_table(table_arg) {}; + + TABLE *dst_table; /* table to write into */ + + /* The following is called in the child thread: */ + int send_data(List &items); +}; + + +/* + This is a select_result_sink which stores the data in text form. + + It is only used to save EXPLAIN output. +*/ + +class select_result_text_buffer : public select_result_sink +{ +public: + select_result_text_buffer(THD *thd_arg): select_result_sink(thd_arg) {} + int send_data(List &items); + bool send_result_set_metadata(List &fields, uint flag); + + void save_to(String *res); +private: + int append_row(List &items, bool send_names); + + List rows; + int n_columns; +}; + + +/* + Base class for select_result descendands which intercept and + transform result set rows. As the rows are not sent to the client, + sending of result set metadata should be suppressed as well. +*/ + +class select_result_interceptor: public select_result +{ +public: + select_result_interceptor(THD *thd_arg): + select_result(thd_arg), suppress_my_ok(false) + { + DBUG_ENTER("select_result_interceptor::select_result_interceptor"); + DBUG_PRINT("enter", ("this %p", this)); + DBUG_VOID_RETURN; + } /* Remove gcc warning */ + uint field_count(List &fields) const { return 0; } + bool send_result_set_metadata(List &fields, uint flag) { return FALSE; } + select_result_interceptor *result_interceptor() { return this; } + + /* + Instruct the object to not call my_ok(). Client output will be handled + elsewhere. (this is used by ANALYZE $stmt feature). + */ + void disable_my_ok_calls() { suppress_my_ok= true; } + void reset(THD *thd_arg) + { + select_result::reset(thd_arg); + suppress_my_ok= false; + } +protected: + bool suppress_my_ok; +}; + + +class sp_cursor_statistics +{ +protected: + ulonglong m_fetch_count; // Number of FETCH commands since last OPEN + ulonglong m_row_count; // Number of successful FETCH since last OPEN + bool m_found; // If last FETCH fetched a row +public: + sp_cursor_statistics() + :m_fetch_count(0), + m_row_count(0), + m_found(false) + { } + bool found() const + { return m_found; } + + ulonglong row_count() const + { return m_row_count; } + + ulonglong fetch_count() const + { return m_fetch_count; } + void reset() { *this= sp_cursor_statistics(); } +}; + + +/* A mediator between stored procedures and server side cursors */ +class sp_lex_keeper; +class sp_cursor: public sp_cursor_statistics +{ +private: + /// An interceptor of cursor result set used to implement + /// FETCH INTO . + class Select_fetch_into_spvars: public select_result_interceptor + { + List *spvar_list; + uint field_count; + bool m_view_structure_only; + bool send_data_to_variable_list(List &vars, List &items); + public: + Select_fetch_into_spvars(THD *thd_arg, bool view_structure_only) + :select_result_interceptor(thd_arg), + m_view_structure_only(view_structure_only) + {} + void reset(THD *thd_arg) + { + select_result_interceptor::reset(thd_arg); + spvar_list= NULL; + field_count= 0; + } + uint get_field_count() { return field_count; } + void set_spvar_list(List *vars) { spvar_list= vars; } + + virtual bool send_eof() { return FALSE; } + virtual int send_data(List &items); + virtual int prepare(List &list, SELECT_LEX_UNIT *u); + virtual bool view_structure_only() const { return m_view_structure_only; } +}; + +public: + sp_cursor() + :result(NULL, false), + m_lex_keeper(NULL), + server_side_cursor(NULL) + { } + sp_cursor(THD *thd_arg, sp_lex_keeper *lex_keeper, bool view_structure_only) + :result(thd_arg, view_structure_only), + m_lex_keeper(lex_keeper), + server_side_cursor(NULL) + {} + + virtual ~sp_cursor() + { destroy(); } + + sp_lex_keeper *get_lex_keeper() { return m_lex_keeper; } + + int open(THD *thd); + + int close(THD *thd); + + my_bool is_open() + { return MY_TEST(server_side_cursor); } + + int fetch(THD *, List *vars, bool error_on_no_data); + + bool export_structure(THD *thd, Row_definition_list *list); + + void reset(THD *thd_arg, sp_lex_keeper *lex_keeper) + { + sp_cursor_statistics::reset(); + result.reset(thd_arg); + m_lex_keeper= lex_keeper; + server_side_cursor= NULL; + } + +private: + Select_fetch_into_spvars result; + sp_lex_keeper *m_lex_keeper; + Server_side_cursor *server_side_cursor; + void destroy(); +}; + + +class select_send :public select_result { + /** + True if we have sent result set metadata to the client. + In this case the client always expects us to end the result + set with an eof or error packet + */ + bool is_result_set_started; +public: + select_send(THD *thd_arg): + select_result(thd_arg), is_result_set_started(FALSE) {} + bool send_result_set_metadata(List &list, uint flags); + int send_data(List &items); + bool send_eof(); + virtual bool check_simple_select() const { return FALSE; } + void abort_result_set(); + virtual void cleanup(); + select_result_interceptor *result_interceptor() { return NULL; } +}; + + +/* + We need this class, because select_send::send_eof() will call ::my_eof. + + See also class Protocol_discard. +*/ + +class select_send_analyze : public select_send +{ + bool send_result_set_metadata(List &list, uint flags) { return 0; } + bool send_eof() { return 0; } + void abort_result_set() {} +public: + select_send_analyze(THD *thd_arg): select_send(thd_arg) {} +}; + + +class select_to_file :public select_result_interceptor { +protected: + sql_exchange *exchange; + File file; + IO_CACHE cache; + ha_rows row_count; + char path[FN_REFLEN]; + +public: + select_to_file(THD *thd_arg, sql_exchange *ex): + select_result_interceptor(thd_arg), exchange(ex), file(-1),row_count(0L) + { path[0]=0; } + ~select_to_file(); + bool send_eof(); + void cleanup(); +}; + + +#define ESCAPE_CHARS "ntrb0ZN" // keep synchronous with READ_INFO::unescape + + +/* + List of all possible characters of a numeric value text representation. +*/ +#define NUMERIC_CHARS ".0123456789e+-" + + +class select_export :public select_to_file { + uint field_term_length; + int field_sep_char,escape_char,line_sep_char; + int field_term_char; // first char of FIELDS TERMINATED BY or MAX_INT + /* + The is_ambiguous_field_sep field is true if a value of the field_sep_char + field is one of the 'n', 't', 'r' etc characters + (see the READ_INFO::unescape method and the ESCAPE_CHARS constant value). + */ + bool is_ambiguous_field_sep; + /* + The is_ambiguous_field_term is true if field_sep_char contains the first + char of the FIELDS TERMINATED BY (ENCLOSED BY is empty), and items can + contain this character. + */ + bool is_ambiguous_field_term; + /* + The is_unsafe_field_sep field is true if a value of the field_sep_char + field is one of the '0'..'9', '+', '-', '.' and 'e' characters + (see the NUMERIC_CHARS constant value). + */ + bool is_unsafe_field_sep; + bool fixed_row_size; + CHARSET_INFO *write_cs; // output charset +public: + select_export(THD *thd_arg, sql_exchange *ex): select_to_file(thd_arg, ex) {} + ~select_export(); + int prepare(List &list, SELECT_LEX_UNIT *u); + int send_data(List &items); +}; + + +class select_dump :public select_to_file { +public: + select_dump(THD *thd_arg, sql_exchange *ex): select_to_file(thd_arg, ex) {} + int prepare(List &list, SELECT_LEX_UNIT *u); + int send_data(List &items); +}; + + +class select_insert :public select_result_interceptor { + public: + select_result *sel_result; + TABLE_LIST *table_list; + TABLE *table; + List *fields; + ulonglong autoinc_value_of_last_inserted_row; // autogenerated or not + COPY_INFO info; + bool insert_into_view; + select_insert(THD *thd_arg, TABLE_LIST *table_list_par, TABLE *table_par, + List *fields_par, List *update_fields, + List *update_values, enum_duplicates duplic, + bool ignore, select_result *sel_ret_list); + ~select_insert(); + int prepare(List &list, SELECT_LEX_UNIT *u); + virtual int prepare2(JOIN *join); + virtual int send_data(List &items); + virtual bool store_values(List &values); + virtual bool can_rollback_data() { return 0; } + bool prepare_eof(); + bool send_ok_packet(); + bool send_eof(); + virtual void abort_result_set(); + /* not implemented: select_insert is never re-used in prepared statements */ + void cleanup(); +}; + + +class select_create: public select_insert { + Table_specification_st *create_info; + TABLE_LIST *select_tables; + Alter_info *alter_info; + Field **field; + /* lock data for tmp table */ + MYSQL_LOCK *m_lock; + /* m_lock or thd->extra_lock */ + MYSQL_LOCK **m_plock; + bool exit_done; + TMP_TABLE_SHARE *saved_tmp_table_share; + DDL_LOG_STATE ddl_log_state_create, ddl_log_state_rm; + +public: + select_create(THD *thd_arg, TABLE_LIST *table_arg, + Table_specification_st *create_info_par, + Alter_info *alter_info_arg, + List &select_fields,enum_duplicates duplic, bool ignore, + TABLE_LIST *select_tables_arg): + select_insert(thd_arg, table_arg, NULL, &select_fields, 0, 0, duplic, + ignore, NULL), + create_info(create_info_par), + select_tables(select_tables_arg), + alter_info(alter_info_arg), + m_plock(NULL), exit_done(0), + saved_tmp_table_share(0) + { + DBUG_ASSERT(create_info->default_table_charset); + bzero(&ddl_log_state_create, sizeof(ddl_log_state_create)); + bzero(&ddl_log_state_rm, sizeof(ddl_log_state_rm)); + } + int prepare(List &list, SELECT_LEX_UNIT *u); + + int binlog_show_create_table(TABLE **tables, uint count); + bool store_values(List &values); + bool send_eof(); + virtual void abort_result_set(); + virtual bool can_rollback_data() { return 1; } + + // Needed for access from local class MY_HOOKS in prepare(), since thd is proteted. + const THD *get_thd(void) { return thd; } + const HA_CREATE_INFO *get_create_info() { return create_info; }; + int prepare2(JOIN *join) { return 0; } + +private: + TABLE *create_table_from_items(THD *thd, + List *items, + MYSQL_LOCK **lock); + int postlock(THD *thd, TABLE **tables); +}; + +#include + +#ifdef WITH_ARIA_STORAGE_ENGINE +#include +#else +#undef USE_ARIA_FOR_TMP_TABLES +#endif + +#ifdef USE_ARIA_FOR_TMP_TABLES +#define TMP_ENGINE_COLUMNDEF MARIA_COLUMNDEF +#define TMP_ENGINE_HTON maria_hton +#define TMP_ENGINE_NAME "Aria" +inline uint tmp_table_max_key_length() { return maria_max_key_length(); } +inline uint tmp_table_max_key_parts() { return maria_max_key_segments(); } +#else +#define TMP_ENGINE_COLUMNDEF MI_COLUMNDEF +#define TMP_ENGINE_HTON myisam_hton +#define TMP_ENGINE_NAME "MyISAM" +inline uint tmp_table_max_key_length() { return MI_MAX_KEY_LENGTH; } +inline uint tmp_table_max_key_parts() { return MI_MAX_KEY_SEG; } +#endif + +/* + Param to create temporary tables when doing SELECT:s + NOTE + This structure is copied using memcpy as a part of JOIN. +*/ + +class TMP_TABLE_PARAM :public Sql_alloc +{ +public: + List copy_funcs; + Copy_field *copy_field, *copy_field_end; + uchar *group_buff; + const char *tmp_name; + Item **items_to_copy; /* Fields in tmp table */ + TMP_ENGINE_COLUMNDEF *recinfo, *start_recinfo; + KEY *keyinfo; + ha_rows end_write_records; + /** + Number of normal fields in the query, including those referred to + from aggregate functions. Hence, "SELECT `field1`, + SUM(`field2`) from t1" sets this counter to 2. + + @see count_field_types + */ + uint field_count; + /** + Number of fields in the query that have functions. Includes both + aggregate functions (e.g., SUM) and non-aggregates (e.g., RAND). + Also counts functions referred to from aggregate functions, i.e., + "SELECT SUM(RAND())" sets this counter to 2. + + @see count_field_types + */ + uint func_count; + /** + Number of fields in the query that have aggregate functions. Note + that the optimizer may choose to optimize away these fields by + replacing them with constants, in which case sum_func_count will + need to be updated. + + @see opt_sum_query, count_field_types + */ + uint sum_func_count; + uint copy_func_count; // Allocated copy fields + uint hidden_field_count; + uint group_parts,group_length,group_null_parts; + + /* + If we're doing a GROUP BY operation, shows which one is used: + true TemporaryTableWithPartialSums algorithm (see end_update()). + false OrderedGroupBy algorithm (see end_write_group()). + */ + uint quick_group; + /** + Enabled when we have atleast one outer_sum_func. Needed when used + along with distinct. + + @see create_tmp_table + */ + bool using_outer_summary_function; + CHARSET_INFO *table_charset; + bool schema_table; + /* TRUE if the temp table is created for subquery materialization. */ + bool materialized_subquery; + /* TRUE if all columns of the table are guaranteed to be non-nullable */ + bool force_not_null_cols; + /* + True if GROUP BY and its aggregate functions are already computed + by a table access method (e.g. by loose index scan). In this case + query execution should not perform aggregation and should treat + aggregate functions as normal functions. + */ + bool precomputed_group_by; + bool force_copy_fields; + /* + If TRUE, create_tmp_field called from create_tmp_table will convert + all BIT fields to 64-bit longs. This is a workaround the limitation + that MEMORY tables cannot index BIT columns. + */ + bool bit_fields_as_long; + /* + Whether to create or postpone actual creation of this temporary table. + TRUE <=> create_tmp_table will create only the TABLE structure. + */ + bool skip_create_table; + + TMP_TABLE_PARAM() + :copy_field(0), group_parts(0), + group_length(0), group_null_parts(0), + using_outer_summary_function(0), + schema_table(0), materialized_subquery(0), force_not_null_cols(0), + precomputed_group_by(0), + force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0) + { + init(); + } + ~TMP_TABLE_PARAM() + { + cleanup(); + } + void init(void); + inline void cleanup(void) + { + if (copy_field) /* Fix for Intel compiler */ + { + delete [] copy_field; + copy_field= NULL; + copy_field_end= NULL; + } + } +}; + + +class select_unit :public select_result_interceptor +{ +protected: + uint curr_step, prev_step, curr_sel; + enum sub_select_type step; +public: + TMP_TABLE_PARAM tmp_table_param; + /* Number of additional (hidden) field of the used temporary table */ + int addon_cnt; + int write_err; /* Error code from the last send_data->ha_write_row call. */ + TABLE *table; + + select_unit(THD *thd_arg): + select_result_interceptor(thd_arg), addon_cnt(0), table(0) + { + init(); + tmp_table_param.init(); + } + int prepare(List &list, SELECT_LEX_UNIT *u); + /** + Do prepare() and prepare2() if they have been postponed until + column type information is computed (used by select_union_direct). + + @param types Column types + + @return false on success, true on failure + */ + virtual bool postponed_prepare(List &types) + { return false; } + int send_data(List &items); + int write_record(); + int update_counter(Field *counter, longlong value); + int delete_record(); + bool send_eof(); + virtual bool flush(); + void cleanup(); + virtual bool create_result_table(THD *thd, List *column_types, + bool is_distinct, ulonglong options, + const LEX_CSTRING *alias, + bool bit_fields_as_long, + bool create_table, + bool keep_row_order, + uint hidden); + TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; } + void init() + { + curr_step= prev_step= 0; + curr_sel= UINT_MAX; + step= UNION_TYPE; + write_err= 0; + } + virtual void change_select(); + virtual bool force_enable_index_if_needed() { return false; } +}; + + +/** + @class select_unit_ext + + The class used when processing rows produced by operands of query expressions + containing INTERSECT ALL and/or EXCEPT all operations. One or two extra fields + of the temporary to store the rows of the partial and final result can be employed. + Both of them contain counters. The second additional field is used only when + the processed query expression contains INTERSECT ALL. + + Consider how these extra fields are used. + + Let + table t1 (f char(8)) + table t2 (f char(8)) + table t3 (f char(8)) + contain the following sets: + ("b"),("a"),("d"),("c"),("b"),("a"),("c"),("a") + ("c"),("b"),("c"),("c"),("a"),("b"),("g") + ("c"),("a"),("b"),("d"),("b"),("e") + + - Let's demonstrate how the the set operation INTERSECT ALL is proceesed + for the query + SELECT f FROM t1 INTERSECT ALL SELECT f FROM t2 + + When send_data() is called for the rows of the first operand we put + the processed record into the temporary table if there was no such record + setting dup_cnt field to 1 and add_cnt field to 0 and increment the + counter in the dup_cnt field by one otherwise. We get + + |add_cnt|dup_cnt| f | + |0 |2 |b | + |0 |3 |a | + |0 |1 |d | + |0 |2 |c | + + The call of send_eof() for the first operand swaps the values stored in + dup_cnt and add_cnt. After this, we'll see the following rows in the + temporary table + + |add_cnt|dup_cnt| f | + |2 |0 |b | + |3 |0 |a | + |1 |0 |d | + |2 |0 |c | + + When send_data() is called for the rows of the second operand we increment + the counter in dup_cnt if the processed row is found in the table and do + nothing otherwise. As a result we get + + |add_cnt|dup_cnt| f | + |2 |2 |b | + |3 |1 |a | + |1 |0 |d | + |2 |3 |c | + + At the call of send_eof() for the second operand first we disable index. + Then for each record, the minimum of counters from dup_cnt and add_cnt m is + taken. If m == 0 then the record is deleted. Otherwise record is replaced + with m copies of it. Yet the counter in this copies are set to 1 for + dup_cnt and to 0 for add_cnt + + |add_cnt|dup_cnt| f | + |0 |1 |b | + |0 |1 |b | + |0 |1 |a | + |0 |1 |c | + |0 |1 |c | + + - Let's demonstrate how the the set operation EXCEPT ALL is proceesed + for the query + SELECT f FROM t1 EXCEPT ALL SELECT f FROM t3 + + Only one additional counter field dup_cnt is used for EXCEPT ALL. + After the first operand has been processed we have in the temporary table + + |dup_cnt| f | + |2 |b | + |3 |a | + |1 |d | + |2 |c | + + When send_data() is called for the rows of the second operand we decrement + the counter in dup_cnt if the processed row is found in the table and do + nothing otherwise. If the counter becomes 0 we delete the record + + |dup_cnt| f | + |2 |a | + |1 |c | + + Finally at the call of send_eof() for the second operand we disable index + unfold rows adding duplicates + + |dup_cnt| f | + |1 |a | + |1 |a | + |1 |c | + */ + +class select_unit_ext :public select_unit +{ +public: + select_unit_ext(THD *thd_arg): + select_unit(thd_arg), increment(0), is_index_enabled(TRUE), + curr_op_type(UNSPECIFIED) + { + }; + int send_data(List &items); + void change_select(); + int unfold_record(ha_rows cnt); + bool send_eof(); + bool force_enable_index_if_needed() + { + is_index_enabled= true; + return true; + } + bool disable_index_if_needed(SELECT_LEX *curr_sl); + + /* + How to change increment/decrement the counter in duplicate_cnt field + when processing a record produced by the current operand in send_data(). + The value can be 1 or -1 + */ + int increment; + /* TRUE <=> the index of the result temporary table is enabled */ + bool is_index_enabled; + /* The type of the set operation currently executed */ + enum set_op_type curr_op_type; + /* + Points to the extra field of the temporary table where + duplicate counters are stored + */ + Field *duplicate_cnt; + /* + Points to the extra field of the temporary table where additional + counters used only for INTERSECT ALL operations are stored + */ + Field *additional_cnt; +}; + +class select_union_recursive :public select_unit +{ + public: + /* The temporary table with the new records generated by one iterative step */ + TABLE *incr_table; + /* The TMP_TABLE_PARAM structure used to create incr_table */ + TMP_TABLE_PARAM incr_table_param; + /* One of tables from the list rec_tables (determined dynamically) */ + TABLE *first_rec_table_to_update; + /* + The list of all recursive table references to the CTE for whose + specification this select_union_recursive was created + */ + List rec_table_refs; + /* + The count of how many times cleanup() was called with cleaned==false + for the unit specifying the recursive CTE for which this object was created + or for the unit specifying a CTE that mutually recursive with this CTE. + */ + uint cleanup_count; + long row_counter; + + select_union_recursive(THD *thd_arg): + select_unit(thd_arg), + incr_table(0), first_rec_table_to_update(0), cleanup_count(0), + row_counter(0) + { incr_table_param.init(); }; + + int send_data(List &items); + bool create_result_table(THD *thd, List *column_types, + bool is_distinct, ulonglong options, + const LEX_CSTRING *alias, + bool bit_fields_as_long, + bool create_table, + bool keep_row_order, + uint hidden); + void cleanup(); +}; + +/** + UNION result that is passed directly to the receiving select_result + without filling a temporary table. + + Function calls are forwarded to the wrapped select_result, but some + functions are expected to be called only once for each query, so + they are only executed for the first SELECT in the union (execept + for send_eof(), which is executed only for the last SELECT). + + This select_result is used when a UNION is not DISTINCT and doesn't + have a global ORDER BY clause. @see st_select_lex_unit::prepare(). +*/ + +class select_union_direct :public select_unit +{ +private: + /* Result object that receives all rows */ + select_result *result; + /* The last SELECT_LEX of the union */ + SELECT_LEX *last_select_lex; + + /* Wrapped result has received metadata */ + bool done_send_result_set_metadata; + /* Wrapped result has initialized tables */ + bool done_initialize_tables; + + /* Accumulated limit_found_rows */ + ulonglong limit_found_rows; + + /* Number of rows offset */ + ha_rows offset; + /* Number of rows limit + offset, @see select_union_direct::send_data() */ + ha_rows limit; + +public: + /* Number of rows in the union */ + ha_rows send_records; + select_union_direct(THD *thd_arg, select_result *result_arg, + SELECT_LEX *last_select_lex_arg): + select_unit(thd_arg), result(result_arg), + last_select_lex(last_select_lex_arg), + done_send_result_set_metadata(false), done_initialize_tables(false), + limit_found_rows(0) + { send_records= 0; } + bool change_result(select_result *new_result); + uint field_count(List &fields) const + { + // Only called for top-level select_results, usually select_send + DBUG_ASSERT(false); /* purecov: inspected */ + return 0; /* purecov: inspected */ + } + bool postponed_prepare(List &types); + bool send_result_set_metadata(List &list, uint flags); + int send_data(List &items); + bool initialize_tables (JOIN *join); + bool send_eof(); + bool flush() { return false; } + bool check_simple_select() const + { + /* Only called for top-level select_results, usually select_send */ + DBUG_ASSERT(false); /* purecov: inspected */ + return false; /* purecov: inspected */ + } + void abort_result_set() + { + result->abort_result_set(); /* purecov: inspected */ + } + void cleanup() + { + send_records= 0; + } + void set_thd(THD *thd_arg) + { + /* + Only called for top-level select_results, usually select_send, + and for the results of subquery engines + (select__subselect). + */ + DBUG_ASSERT(false); /* purecov: inspected */ + } + void remove_offset_limit() + { + // EXPLAIN should never output to a select_union_direct + DBUG_ASSERT(false); /* purecov: inspected */ + } + void begin_dataset() + { + // Only called for sp_cursor::Select_fetch_into_spvars + DBUG_ASSERT(false); /* purecov: inspected */ + } +}; + + +/* Base subselect interface class */ +class select_subselect :public select_result_interceptor +{ +protected: + Item_subselect *item; +public: + select_subselect(THD *thd_arg, Item_subselect *item_arg): + select_result_interceptor(thd_arg), item(item_arg) {} + int send_data(List &items)=0; + bool send_eof() { return 0; }; +}; + +/* Single value subselect interface class */ +class select_singlerow_subselect :public select_subselect +{ +public: + select_singlerow_subselect(THD *thd_arg, Item_subselect *item_arg): + select_subselect(thd_arg, item_arg) + {} + int send_data(List &items); +}; + + +/* + This class specializes select_union to collect statistics about the + data stored in the temp table. Currently the class collects statistcs + about NULLs. +*/ + +class select_materialize_with_stats : public select_unit +{ +protected: + class Column_statistics + { + public: + /* Count of NULLs per column. */ + ha_rows null_count; + /* The row number that contains the first NULL in a column. */ + ha_rows min_null_row; + /* The row number that contains the last NULL in a column. */ + ha_rows max_null_row; + }; + + /* Array of statistics data per column. */ + Column_statistics* col_stat; + + /* + The number of columns in the biggest sub-row that consists of only + NULL values. + */ + uint max_nulls_in_row; + /* + Count of rows writtent to the temp table. This is redundant as it is + already stored in handler::stats.records, however that one is relatively + expensive to compute (given we need that for evry row). + */ + ha_rows count_rows; + +protected: + void reset(); + +public: + select_materialize_with_stats(THD *thd_arg): select_unit(thd_arg) + { tmp_table_param.init(); } + bool create_result_table(THD *thd, List *column_types, + bool is_distinct, ulonglong options, + const LEX_CSTRING *alias, + bool bit_fields_as_long, + bool create_table, + bool keep_row_order, + uint hidden); + bool init_result_table(ulonglong select_options); + int send_data(List &items); + void cleanup(); + ha_rows get_null_count_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].null_count; + } + ha_rows get_max_null_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].max_null_row; + } + ha_rows get_min_null_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].min_null_row; + } + uint get_max_nulls_in_row() { return max_nulls_in_row; } +}; + + +/* used in independent ALL/ANY optimisation */ +class select_max_min_finder_subselect :public select_subselect +{ + Item_cache *cache; + bool (select_max_min_finder_subselect::*op)(); + bool fmax; + bool is_all; + void set_op(const Type_handler *ha); +public: + select_max_min_finder_subselect(THD *thd_arg, Item_subselect *item_arg, + bool mx, bool all): + select_subselect(thd_arg, item_arg), cache(0), fmax(mx), is_all(all) + {} + void cleanup(); + int send_data(List &items); + bool cmp_real(); + bool cmp_int(); + bool cmp_decimal(); + bool cmp_str(); + bool cmp_time(); + bool cmp_native(); +}; + +/* EXISTS subselect interface class */ +class select_exists_subselect :public select_subselect +{ +public: + select_exists_subselect(THD *thd_arg, Item_subselect *item_arg): + select_subselect(thd_arg, item_arg) {} + int send_data(List &items); +}; + + +/* + Optimizer and executor structure for the materialized semi-join info. This + structure contains + - The sj-materialization temporary table + - Members needed to make index lookup or a full scan of the temptable. +*/ +class POSITION; + +class SJ_MATERIALIZATION_INFO : public Sql_alloc +{ +public: + /* Optimal join sub-order */ + POSITION *positions; + + uint tables; /* Number of tables in the sj-nest */ + + /* Number of rows in the materialized table, before the de-duplication */ + double rows_with_duplicates; + + /* Expected #rows in the materialized table, after de-duplication */ + double rows; + + /* + Cost to materialize - execute the sub-join and write rows into temp.table + */ + Cost_estimate materialization_cost; + + /* Cost to make one lookup in the temptable */ + Cost_estimate lookup_cost; + + /* Cost of scanning the materialized table */ + Cost_estimate scan_cost; + + /* --- Execution structures ---------- */ + + /* + TRUE <=> This structure is used for execution. We don't necessarily pick + sj-materialization, so some of SJ_MATERIALIZATION_INFO structures are not + used by materialization + */ + bool is_used; + + bool materialized; /* TRUE <=> materialization already performed */ + /* + TRUE - the temptable is read with full scan + FALSE - we use the temptable for index lookups + */ + bool is_sj_scan; + + /* The temptable and its related info */ + TMP_TABLE_PARAM sjm_table_param; + List sjm_table_cols; + TABLE *table; + + /* Structure used to make index lookups */ + struct st_table_ref *tab_ref; + Item *in_equality; /* See create_subq_in_equalities() */ + + Item *join_cond; /* See comments in make_join_select() */ + Copy_field *copy_field; /* Needed for SJ_Materialization scan */ +}; + + +/* Structs used when sorting */ +struct SORT_FIELD_ATTR +{ + /* + If using mem-comparable fixed-size keys: + length of the mem-comparable image of the field, in bytes. + + If using packed keys: still the same? Not clear what is the use of it. + */ + uint length; + + /* + For most datatypes, this is 0. + The exception are the VARBINARY columns. + For those columns, the comparison actually compares + + (value_prefix(N), suffix=length(value)) + + Here value_prefix is either the whole value or its prefix if it was too + long, and the suffix is the length of the original value. + (this way, for values X and Y: if X=prefix(Y) then X compares as less + than Y + */ + uint suffix_length; + + /* + If using packed keys, number of bytes that are used to store the length + of the packed key. + + */ + uint length_bytes; + + /* Max. length of the original value, in bytes */ + uint original_length; + enum Type { FIXED_SIZE, VARIABLE_SIZE } type; + /* + TRUE : if the item or field is NULLABLE + FALSE : otherwise + */ + bool maybe_null; + CHARSET_INFO *cs; + uint pack_sort_string(uchar *to, const Binary_string *str, + CHARSET_INFO *cs) const; + int compare_packed_fixed_size_vals(uchar *a, size_t *a_len, + uchar *b, size_t *b_len); + int compare_packed_varstrings(uchar *a, size_t *a_len, + uchar *b, size_t *b_len); + bool check_if_packing_possible(THD *thd) const; + bool is_variable_sized() { return type == VARIABLE_SIZE; } + void set_length_and_original_length(THD *thd, uint length_arg); +}; + + +struct SORT_FIELD: public SORT_FIELD_ATTR +{ + Field *field; /* Field to sort */ + Item *item; /* Item if not sorting fields */ + bool reverse; /* if descending sort */ +}; + + +typedef struct st_sort_buffer { + uint index; /* 0 or 1 */ + uint sort_orders; + uint change_pos; /* If sort-fields changed */ + char **buff; + SORT_FIELD *sortorder; +} SORT_BUFFER; + +/* Structure for db & table in sql_yacc */ + +class Table_ident :public Sql_alloc +{ +public: + LEX_CSTRING db; + LEX_CSTRING table; + SELECT_LEX_UNIT *sel; + inline Table_ident(THD *thd, const LEX_CSTRING *db_arg, + const LEX_CSTRING *table_arg, + bool force) + :table(*table_arg), sel((SELECT_LEX_UNIT *)0) + { + if (!force && (thd->client_capabilities & CLIENT_NO_SCHEMA)) + db= null_clex_str; + else + db= *db_arg; + } + inline Table_ident(const LEX_CSTRING *table_arg) + :table(*table_arg), sel((SELECT_LEX_UNIT *)0) + { + db= null_clex_str; + } + /* + This constructor is used only for the case when we create a derived + table. A derived table has no name and doesn't belong to any database. + Later, if there was an alias specified for the table, it will be set + by add_table_to_list. + */ + inline Table_ident(SELECT_LEX_UNIT *s) : sel(s) + { + /* We must have a table name here as this is used with add_table_to_list */ + db.str= empty_c_string; /* a subject to casedn_str */ + db.length= 0; + table.str= internal_table_name; + table.length=1; + } + bool is_derived_table() const { return MY_TEST(sel); } + inline void change_db(LEX_CSTRING *db_name) + { + db= *db_name; + } + bool resolve_table_rowtype_ref(THD *thd, Row_definition_list &defs); + bool append_to(THD *thd, String *to) const; +}; + + +class Qualified_column_ident: public Table_ident +{ +public: + LEX_CSTRING m_column; +public: + Qualified_column_ident(const LEX_CSTRING *column) + :Table_ident(&null_clex_str), + m_column(*column) + { } + Qualified_column_ident(const LEX_CSTRING *table, const LEX_CSTRING *column) + :Table_ident(table), + m_column(*column) + { } + Qualified_column_ident(THD *thd, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + const LEX_CSTRING *column) + :Table_ident(thd, db, table, false), + m_column(*column) + { } + bool resolve_type_ref(THD *thd, Column_definition *def); + bool append_to(THD *thd, String *to) const; +}; + + +// this is needed for user_vars hash +class user_var_entry +{ + CHARSET_INFO *m_charset; + public: + user_var_entry() = default; /* Remove gcc warning */ + LEX_CSTRING name; + char *value; + size_t length; + query_id_t update_query_id, used_query_id; + Item_result type; + bool unsigned_flag; + + double val_real(bool *null_value); + longlong val_int(bool *null_value) const; + String *val_str(bool *null_value, String *str, uint decimals) const; + my_decimal *val_decimal(bool *null_value, my_decimal *result); + CHARSET_INFO *charset() const { return m_charset; } + void set_charset(CHARSET_INFO *cs) { m_charset= cs; } +}; + +user_var_entry *get_variable(HASH *hash, LEX_CSTRING *name, + bool create_if_not_exists); + +class SORT_INFO; +class multi_delete :public select_result_interceptor +{ + TABLE_LIST *delete_tables, *table_being_deleted; + Unique **tempfiles; + ha_rows deleted, found; + uint num_of_tables; + int error; + bool do_delete; + /* True if at least one table we delete from is transactional */ + bool transactional_tables; + /* True if at least one table we delete from is not transactional */ + bool normal_tables; + bool delete_while_scanning; + /* + error handling (rollback and binlogging) can happen in send_eof() + so that afterward abort_result_set() needs to find out that. + */ + bool error_handled; + +public: + // Methods used by ColumnStore + uint get_num_of_tables() const { return num_of_tables; } + TABLE_LIST* get_tables() const { return delete_tables; } +public: + multi_delete(THD *thd_arg, TABLE_LIST *dt, uint num_of_tables); + ~multi_delete(); + int prepare(List &list, SELECT_LEX_UNIT *u); + int send_data(List &items); + bool initialize_tables (JOIN *join); + int do_deletes(); + int do_table_deletes(TABLE *table, SORT_INFO *sort_info, bool ignore); + bool send_eof(); + inline ha_rows num_deleted() const { return deleted; } + virtual void abort_result_set(); + void prepare_to_read_rows(); +}; + + +class multi_update :public select_result_interceptor +{ + TABLE_LIST *all_tables; /* query/update command tables */ + List *leaves; /* list of leaves of join table tree */ + List updated_leaves; /* list of of updated leaves */ + TABLE_LIST *update_tables; + TABLE **tmp_tables, *main_table, *table_to_update; + TMP_TABLE_PARAM *tmp_table_param; + ha_rows updated, found; + List *fields, *values; + List **fields_for_table, **values_for_table; + uint table_count; + /* + List of tables referenced in the CHECK OPTION condition of + the updated view excluding the updated table. + */ + List
unupdated_check_opt_tables; + Copy_field *copy_field; + enum enum_duplicates handle_duplicates; + bool do_update, trans_safe; + /* True if the update operation has made a change in a transactional table */ + bool transactional_tables; + bool ignore; + /* + error handling (rollback and binlogging) can happen in send_eof() + so that afterward abort_result_set() needs to find out that. + */ + bool error_handled; + + /* Need this to protect against multiple prepare() calls */ + bool prepared; + + // For System Versioning (may need to insert new fields to a table). + ha_rows updated_sys_ver; + + bool has_vers_fields; + +public: + multi_update(THD *thd_arg, TABLE_LIST *ut, List *leaves_list, + List *fields, List *values, + enum_duplicates handle_duplicates, bool ignore); + ~multi_update(); + bool init(THD *thd); + int prepare(List &list, SELECT_LEX_UNIT *u); + int send_data(List &items); + bool initialize_tables (JOIN *join); + int prepare2(JOIN *join); + int do_updates(); + bool send_eof(); + inline ha_rows num_found() const { return found; } + inline ha_rows num_updated() const { return updated; } + virtual void abort_result_set(); + void update_used_tables(); + void prepare_to_read_rows(); +}; + +class my_var_sp; +class my_var : public Sql_alloc { +public: + const LEX_CSTRING name; + enum type { SESSION_VAR, LOCAL_VAR, PARAM_VAR }; + type scope; + my_var(const LEX_CSTRING *j, enum type s) : name(*j), scope(s) { } + virtual ~my_var() = default; + virtual bool set(THD *thd, Item *val) = 0; + virtual my_var_sp *get_my_var_sp() { return NULL; } +}; + +class my_var_sp: public my_var { + const Sp_rcontext_handler *m_rcontext_handler; + const Type_handler *m_type_handler; +public: + uint offset; + /* + Routine to which this Item_splocal belongs. Used for checking if correct + runtime context is used for variable handling. + */ + sp_head *sp; + my_var_sp(const Sp_rcontext_handler *rcontext_handler, + const LEX_CSTRING *j, uint o, const Type_handler *type_handler, + sp_head *s) + : my_var(j, LOCAL_VAR), + m_rcontext_handler(rcontext_handler), + m_type_handler(type_handler), offset(o), sp(s) { } + ~my_var_sp() = default; + bool set(THD *thd, Item *val); + my_var_sp *get_my_var_sp() { return this; } + const Type_handler *type_handler() const + { return m_type_handler; } + sp_rcontext *get_rcontext(sp_rcontext *local_ctx) const; +}; + +/* + This class handles fields of a ROW SP variable when it's used as a OUT + parameter in a stored procedure. +*/ +class my_var_sp_row_field: public my_var_sp +{ + uint m_field_offset; +public: + my_var_sp_row_field(const Sp_rcontext_handler *rcontext_handler, + const LEX_CSTRING *varname, const LEX_CSTRING *fieldname, + uint var_idx, uint field_idx, sp_head *s) + :my_var_sp(rcontext_handler, varname, var_idx, + &type_handler_double/*Not really used*/, s), + m_field_offset(field_idx) + { } + bool set(THD *thd, Item *val); +}; + +class my_var_user: public my_var { +public: + my_var_user(const LEX_CSTRING *j) + : my_var(j, SESSION_VAR) { } + ~my_var_user() = default; + bool set(THD *thd, Item *val); +}; + +class select_dumpvar :public select_result_interceptor { + ha_rows row_count; + my_var_sp *m_var_sp_row; // Not NULL if SELECT INTO row_type_sp_variable + bool send_data_to_var_list(List &items); +public: + List var_list; + select_dumpvar(THD *thd_arg) + :select_result_interceptor(thd_arg), row_count(0), m_var_sp_row(NULL) + { var_list.empty(); } + ~select_dumpvar() = default; + int prepare(List &list, SELECT_LEX_UNIT *u); + int send_data(List &items); + bool send_eof(); + virtual bool check_simple_select() const; + void cleanup(); +}; + +/* Bits in sql_command_flags */ + +#define CF_CHANGES_DATA (1U << 0) +#define CF_REPORT_PROGRESS (1U << 1) +#define CF_STATUS_COMMAND (1U << 2) +#define CF_SHOW_TABLE_COMMAND (1U << 3) +#define CF_WRITE_LOGS_COMMAND (1U << 4) + +/** + Must be set for SQL statements that may contain + Item expressions and/or use joins and tables. + Indicates that the parse tree of such statement may + contain rule-based optimizations that depend on metadata + (i.e. number of columns in a table), and consequently + that the statement must be re-prepared whenever + referenced metadata changes. Must not be set for + statements that themselves change metadata, e.g. RENAME, + ALTER and other DDL, since otherwise will trigger constant + reprepare. Consequently, complex item expressions and + joins are currently prohibited in these statements. +*/ +#define CF_REEXECUTION_FRAGILE (1U << 5) +/** + Implicitly commit before the SQL statement is executed. + + Statements marked with this flag will cause any active + transaction to end (commit) before proceeding with the + command execution. + + This flag should be set for statements that probably can't + be rolled back or that do not expect any previously metadata + locked tables. +*/ +#define CF_IMPLICIT_COMMIT_BEGIN (1U << 6) +/** + Implicitly commit after the SQL statement. + + Statements marked with this flag are automatically committed + at the end of the statement. + + This flag should be set for statements that will implicitly + open and take metadata locks on system tables that should not + be carried for the whole duration of a active transaction. +*/ +#define CF_IMPLICIT_COMMIT_END (1U << 7) +/** + CF_IMPLICT_COMMIT_BEGIN and CF_IMPLICIT_COMMIT_END are used + to ensure that the active transaction is implicitly committed + before and after every DDL statement and any statement that + modifies our currently non-transactional system tables. +*/ +#define CF_AUTO_COMMIT_TRANS (CF_IMPLICIT_COMMIT_BEGIN | CF_IMPLICIT_COMMIT_END) + +/** + Diagnostic statement. + Diagnostic statements: + - SHOW WARNING + - SHOW ERROR + - GET DIAGNOSTICS (WL#2111) + do not modify the diagnostics area during execution. +*/ +#define CF_DIAGNOSTIC_STMT (1U << 8) + +/** + Identifies statements that may generate row events + and that may end up in the binary log. +*/ +#define CF_CAN_GENERATE_ROW_EVENTS (1U << 9) + +/** + Identifies statements which may deal with temporary tables and for which + temporary tables should be pre-opened to simplify privilege checks. +*/ +#define CF_PREOPEN_TMP_TABLES (1U << 10) + +/** + Identifies statements for which open handlers should be closed in the + beginning of the statement. +*/ +#define CF_HA_CLOSE (1U << 11) + +/** + Identifies statements that can be explained with EXPLAIN. +*/ +#define CF_CAN_BE_EXPLAINED (1U << 12) + +/** Identifies statements which may generate an optimizer trace */ +#define CF_OPTIMIZER_TRACE (1U << 14) + +/** + Identifies statements that should always be disallowed in + read only transactions. +*/ +#define CF_DISALLOW_IN_RO_TRANS (1U << 15) + +/** + Statement that need the binlog format to be unchanged. +*/ +#define CF_FORCE_ORIGINAL_BINLOG_FORMAT (1U << 16) + +/** + Statement that inserts new rows (INSERT, REPLACE, LOAD, ALTER TABLE) +*/ +#define CF_INSERTS_DATA (1U << 17) + +/** + Statement that updates existing rows (UPDATE, multi-update) +*/ +#define CF_UPDATES_DATA (1U << 18) + +/** + Not logged into slow log as "admin commands" +*/ +#define CF_ADMIN_COMMAND (1U << 19) + +/** + SP Bulk execution safe +*/ +#define CF_PS_ARRAY_BINDING_SAFE (1U << 20) +/** + SP Bulk execution optimized +*/ +#define CF_PS_ARRAY_BINDING_OPTIMIZED (1U << 21) +/** + If command creates or drops a table +*/ +#define CF_SCHEMA_CHANGE (1U << 22) +/** + If command creates or drops a database +*/ +#define CF_DB_CHANGE (1U << 23) + +#ifdef WITH_WSREP +/** + DDL statement that may be subject to error filtering. +*/ +#define CF_WSREP_MAY_IGNORE_ERRORS (1U << 24) +#endif /* WITH_WSREP */ + + +/* Bits in server_command_flags */ + +/** + Statement that deletes existing rows (DELETE, DELETE_MULTI) +*/ +#define CF_DELETES_DATA (1U << 24) + +/** + Skip the increase of the global query id counter. Commonly set for + commands that are stateless (won't cause any change on the server + internal states). +*/ +#define CF_SKIP_QUERY_ID (1U << 0) + +/** + Skip the increase of the number of statements that clients have + sent to the server. Commonly used for commands that will cause + a statement to be executed but the statement might have not been + sent by the user (ie: stored procedure). +*/ +#define CF_SKIP_QUESTIONS (1U << 1) +#ifdef WITH_WSREP +/** + Do not check that wsrep snapshot is ready before allowing this command +*/ +#define CF_SKIP_WSREP_CHECK (1U << 2) +#else +#define CF_SKIP_WSREP_CHECK 0 +#endif /* WITH_WSREP */ + + +/* Inline functions */ + +inline bool add_item_to_list(THD *thd, Item *item) +{ + bool res; + LEX *lex= thd->lex; + if (lex->current_select->parsing_place == IN_RETURNING) + res= lex->returning()->add_item_to_list(thd, item); + else + res= lex->current_select->add_item_to_list(thd, item); + return res; +} + +inline bool add_value_to_list(THD *thd, Item *value) +{ + return thd->lex->value_list.push_back(value, thd->mem_root); +} + +inline bool add_order_to_list(THD *thd, Item *item, bool asc) +{ + return thd->lex->current_select->add_order_to_list(thd, item, asc); +} + +inline bool add_gorder_to_list(THD *thd, Item *item, bool asc) +{ + return thd->lex->current_select->add_gorder_to_list(thd, item, asc); +} + +inline bool add_group_to_list(THD *thd, Item *item, bool asc) +{ + return thd->lex->current_select->add_group_to_list(thd, item, asc); +} + +inline Item *and_conds(THD *thd, Item *a, Item *b) +{ + if (!b) return a; + if (!a) return b; + return new (thd->mem_root) Item_cond_and(thd, a, b); +} + +/* inline handler methods that need to know TABLE and THD structures */ +inline void handler::increment_statistics(ulong SSV::*offset) const +{ + status_var_increment(table->in_use->status_var.*offset); + table->in_use->check_limit_rows_examined(); +} + +inline void handler::decrement_statistics(ulong SSV::*offset) const +{ + status_var_decrement(table->in_use->status_var.*offset); +} + + +inline int handler::ha_ft_read(uchar *buf) +{ + int error= ft_read(buf); + if (!error) + { + update_rows_read(); + + if (table->vfield && buf == table->record[0]) + table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ); + } + + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +inline int handler::ha_rnd_pos_by_record(uchar *buf) +{ + int error= rnd_pos_by_record(buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +inline int handler::ha_read_first_row(uchar *buf, uint primary_key) +{ + int error= read_first_row(buf, primary_key); + if (!error) + update_rows_read(); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +inline int handler::ha_write_tmp_row(uchar *buf) +{ + int error; + MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str); + increment_statistics(&SSV::ha_tmp_write_count); + TABLE_IO_WAIT(tracker, PSI_TABLE_WRITE_ROW, MAX_KEY, error, + { error= write_row(buf); }) + MYSQL_INSERT_ROW_DONE(error); + return error; +} + +inline int handler::ha_delete_tmp_row(uchar *buf) +{ + int error; + MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str); + increment_statistics(&SSV::ha_tmp_delete_count); + TABLE_IO_WAIT(tracker, PSI_TABLE_DELETE_ROW, MAX_KEY, error, + { error= delete_row(buf); }) + MYSQL_DELETE_ROW_DONE(error); + return error; +} + +inline int handler::ha_update_tmp_row(const uchar *old_data, uchar *new_data) +{ + int error; + MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str); + increment_statistics(&SSV::ha_tmp_update_count); + TABLE_IO_WAIT(tracker, PSI_TABLE_UPDATE_ROW, active_index, error, + { error= update_row(old_data, new_data);}) + MYSQL_UPDATE_ROW_DONE(error); + return error; +} + +inline bool handler::has_long_unique() +{ + return table->s->long_unique_table; +} + +extern pthread_attr_t *get_connection_attrib(void); + +/** + Set thread entering a condition + + This function should be called before putting a thread to wait for + a condition. @a mutex should be held before calling this + function. After being waken up, @f thd_exit_cond should be called. + + @param thd The thread entering the condition, NULL means current thread + @param cond The condition the thread is going to wait for + @param mutex The mutex associated with the condition, this must be + held before call this function + @param stage The new process message for the thread + @param old_stage The old process message for the thread + @param src_function The caller source function name + @param src_file The caller source file name + @param src_line The caller source line number +*/ +void thd_enter_cond(MYSQL_THD thd, mysql_cond_t *cond, mysql_mutex_t *mutex, + const PSI_stage_info *stage, PSI_stage_info *old_stage, + const char *src_function, const char *src_file, + int src_line); + +#define THD_ENTER_COND(P1, P2, P3, P4, P5) \ + thd_enter_cond(P1, P2, P3, P4, P5, __func__, __FILE__, __LINE__) + +/** + Set thread leaving a condition + + This function should be called after a thread being waken up for a + condition. + + @param thd The thread entering the condition, NULL means current thread + @param stage The process message, ususally this should be the old process + message before calling @f thd_enter_cond + @param src_function The caller source function name + @param src_file The caller source file name + @param src_line The caller source line number +*/ +void thd_exit_cond(MYSQL_THD thd, const PSI_stage_info *stage, + const char *src_function, const char *src_file, + int src_line); + +#define THD_EXIT_COND(P1, P2) \ + thd_exit_cond(P1, P2, __func__, __FILE__, __LINE__) + +inline bool binlog_should_compress(size_t len) +{ + return opt_bin_log_compress && + len >= opt_bin_log_compress_min_len; +} + + +/** + Save thd sql_mode on instantiation. + On destruction it resets the mode to the previously stored value. +*/ +class Sql_mode_save +{ + public: + Sql_mode_save(THD *thd) : thd(thd), old_mode(thd->variables.sql_mode) {} + ~Sql_mode_save() { thd->variables.sql_mode = old_mode; } + + private: + THD *thd; + sql_mode_t old_mode; // SQL mode saved at construction time. +}; + + +/* + Save the current sql_mode. Switch off sql_mode flags which can prevent + normal parsing of VIEWs, expressions in generated columns. + Restore the old sql_mode on destructor. +*/ +class Sql_mode_save_for_frm_handling: public Sql_mode_save +{ +public: + Sql_mode_save_for_frm_handling(THD *thd) + :Sql_mode_save(thd) + { + /* + - MODE_REAL_AS_FLOAT affect only CREATE TABLE parsing + + MODE_PIPES_AS_CONCAT affect expression parsing + + MODE_ANSI_QUOTES affect expression parsing + + MODE_IGNORE_SPACE affect expression parsing + - MODE_IGNORE_BAD_TABLE_OPTIONS affect only CREATE/ALTER TABLE parsing + * MODE_ONLY_FULL_GROUP_BY affect execution + * MODE_NO_UNSIGNED_SUBTRACTION affect execution + - MODE_NO_DIR_IN_CREATE affect table creation only + - MODE_POSTGRESQL compounded from other modes + + MODE_ORACLE affects Item creation (e.g for CONCAT) + - MODE_MSSQL compounded from other modes + - MODE_DB2 compounded from other modes + - MODE_MAXDB affect only CREATE TABLE parsing + - MODE_NO_KEY_OPTIONS affect only SHOW + - MODE_NO_TABLE_OPTIONS affect only SHOW + - MODE_NO_FIELD_OPTIONS affect only SHOW + - MODE_MYSQL323 affect only SHOW + - MODE_MYSQL40 affect only SHOW + - MODE_ANSI compounded from other modes + (+ transaction mode) + ? MODE_NO_AUTO_VALUE_ON_ZERO affect UPDATEs + + MODE_NO_BACKSLASH_ESCAPES affect expression parsing + + MODE_EMPTY_STRING_IS_NULL affect expression parsing + */ + thd->variables.sql_mode&= ~(MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE | MODE_NO_BACKSLASH_ESCAPES | + MODE_ORACLE | MODE_EMPTY_STRING_IS_NULL); + }; +}; + + +class Switch_to_definer_security_ctx +{ + public: + Switch_to_definer_security_ctx(THD *thd, TABLE_LIST *table) : + m_thd(thd), m_sctx(thd->security_ctx) + { + if (table->security_ctx) + thd->security_ctx= table->security_ctx; + } + ~Switch_to_definer_security_ctx() { m_thd->security_ctx = m_sctx; } + + private: + THD *m_thd; + Security_context *m_sctx; +}; + + +class Sql_mode_instant_set: public Sql_mode_save +{ +public: + Sql_mode_instant_set(THD *thd, sql_mode_t temporary_value) + :Sql_mode_save(thd) + { + thd->variables.sql_mode= temporary_value; + } +}; + + +class Sql_mode_instant_remove: public Sql_mode_save +{ +public: + Sql_mode_instant_remove(THD *thd, sql_mode_t temporary_remove_flags) + :Sql_mode_save(thd) + { + thd->variables.sql_mode&= ~temporary_remove_flags; + } +}; + + +class Abort_on_warning_instant_set +{ + THD *m_thd; + bool m_save_abort_on_warning; +public: + Abort_on_warning_instant_set(THD *thd, bool temporary_value) + :m_thd(thd), m_save_abort_on_warning(thd->abort_on_warning) + { + thd->abort_on_warning= temporary_value; + } + ~Abort_on_warning_instant_set() + { + m_thd->abort_on_warning= m_save_abort_on_warning; + } +}; + + +class Check_level_instant_set +{ + THD *m_thd; + enum_check_fields m_check_level; +public: + Check_level_instant_set(THD *thd, enum_check_fields temporary_value) + :m_thd(thd), m_check_level(thd->count_cuted_fields) + { + thd->count_cuted_fields= temporary_value; + } + ~Check_level_instant_set() + { + m_thd->count_cuted_fields= m_check_level; + } +}; + + +class Use_relaxed_field_copy: public Sql_mode_save, + public Check_level_instant_set, + public Abort_on_warning_instant_set +{ +public: + Use_relaxed_field_copy(THD *thd) : + Sql_mode_save(thd), Check_level_instant_set(thd, CHECK_FIELD_IGNORE), + Abort_on_warning_instant_set(thd, 0) + { + thd->variables.sql_mode&= ~(MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE); + thd->variables.sql_mode|= MODE_INVALID_DATES; + } +}; + + +/** + This class resembles the SQL Standard schema qualified object name: + ::= [ ] +*/ +class Database_qualified_name +{ +public: + LEX_CSTRING m_db; + LEX_CSTRING m_name; + Database_qualified_name(const LEX_CSTRING *db, const LEX_CSTRING *name) + :m_db(*db), m_name(*name) + { } + Database_qualified_name(const LEX_CSTRING &db, const LEX_CSTRING &name) + :m_db(db), m_name(name) + { } + Database_qualified_name(const char *db, size_t db_length, + const char *name, size_t name_length) + { + m_db.str= db; + m_db.length= db_length; + m_name.str= name; + m_name.length= name_length; + } + + bool eq(const Database_qualified_name *other) const + { + CHARSET_INFO *cs= lower_case_table_names ? + &my_charset_utf8mb3_general_ci : + &my_charset_utf8mb3_bin; + return + m_db.length == other->m_db.length && + m_name.length == other->m_name.length && + !cs->strnncoll(m_db.str, m_db.length, + other->m_db.str, other->m_db.length) && + !cs->strnncoll(m_name.str, m_name.length, + other->m_name.str, other->m_name.length); + } + void copy(MEM_ROOT *mem_root, const LEX_CSTRING &db, + const LEX_CSTRING &name); + + static Database_qualified_name split(const LEX_CSTRING &txt) + { + DBUG_ASSERT(txt.str[txt.length] == '\0'); // Expect 0-terminated input + const char *dot= strchr(txt.str, '.'); + if (!dot) + return Database_qualified_name(NULL, 0, txt.str, txt.length); + size_t dblen= dot - txt.str; + Lex_cstring db(txt.str, dblen); + Lex_cstring name(txt.str + dblen + 1, txt.length - dblen - 1); + return Database_qualified_name(db, name); + } + + // Export db and name as a qualified name string: 'db.name' + size_t make_qname(char *dst, size_t dstlen) const + { + return my_snprintf(dst, dstlen, "%.*s.%.*s", + (int) m_db.length, m_db.str, + (int) m_name.length, m_name.str); + } + // Export db and name as a qualified name string, allocate on mem_root. + bool make_qname(MEM_ROOT *mem_root, LEX_CSTRING *dst) const + { + const uint dot= !!m_db.length; + char *tmp; + /* format: [database + dot] + name + '\0' */ + dst->length= m_db.length + dot + m_name.length; + if (unlikely(!(dst->str= tmp= (char*) alloc_root(mem_root, + dst->length + 1)))) + return true; + snprintf(tmp, dst->length + 1, "%.*s%.*s%.*s", + (int) m_db.length, (m_db.length ? m_db.str : ""), + dot, ".", + (int) m_name.length, m_name.str); + DBUG_SLOW_ASSERT(ok_for_lower_case_names(m_db.str)); + return false; + } + + bool make_package_routine_name(MEM_ROOT *mem_root, + const LEX_CSTRING &package, + const LEX_CSTRING &routine) + { + char *tmp; + size_t length= package.length + 1 + routine.length + 1; + if (unlikely(!(tmp= (char *) alloc_root(mem_root, length)))) + return true; + m_name.length= my_snprintf(tmp, length, "%.*s.%.*s", + (int) package.length, package.str, + (int) routine.length, routine.str); + m_name.str= tmp; + return false; + } + + bool make_package_routine_name(MEM_ROOT *mem_root, + const LEX_CSTRING &db, + const LEX_CSTRING &package, + const LEX_CSTRING &routine) + { + if (unlikely(make_package_routine_name(mem_root, package, routine))) + return true; + if (unlikely(!(m_db.str= strmake_root(mem_root, db.str, db.length)))) + return true; + m_db.length= db.length; + return false; + } +}; + + +class ErrConvDQName: public ErrConv +{ + const Database_qualified_name *m_name; +public: + ErrConvDQName(const Database_qualified_name *name) + :m_name(name) + { } + LEX_CSTRING lex_cstring() const override + { + size_t length= m_name->make_qname(err_buffer, sizeof(err_buffer)); + return {err_buffer, length}; + } +}; + +class Type_holder: public Sql_alloc, + public Item_args, + public Type_handler_hybrid_field_type, + public Type_all_attributes +{ + const TYPELIB *m_typelib; + bool m_maybe_null; +public: + Type_holder() + :m_typelib(NULL), + m_maybe_null(false) + { } + + void set_type_maybe_null(bool maybe_null_arg) { m_maybe_null= maybe_null_arg; } + bool get_maybe_null() const { return m_maybe_null; } + + decimal_digits_t decimal_precision() const + { + /* + Type_holder is not used directly to create fields, so + its virtual decimal_precision() is never called. + We should eventually extend create_result_table() to accept + an array of Type_holders directly, without having to allocate + Item_type_holder's and put them into List. + */ + DBUG_ASSERT(0); + return 0; + } + void set_typelib(const TYPELIB *typelib) + { + m_typelib= typelib; + } + const TYPELIB *get_typelib() const + { + return m_typelib; + } + + bool aggregate_attributes(THD *thd) + { + static LEX_CSTRING union_name= { STRING_WITH_LEN("UNION") }; + for (uint i= 0; i < arg_count; i++) + m_maybe_null|= args[i]->maybe_null(); + return + type_handler()->Item_hybrid_func_fix_attributes(thd, + union_name, this, this, + args, arg_count); + } +}; + + +/* + A helper class to set THD flags to emit warnings/errors in case of + overflow/type errors during assigning values into the SP variable fields. + Saves original flags values in constructor. + Restores original flags in destructor. +*/ +class Sp_eval_expr_state +{ + THD *m_thd; + enum_check_fields m_count_cuted_fields; + bool m_abort_on_warning; + bool m_stmt_modified_non_trans_table; + void start() + { + m_thd->count_cuted_fields= CHECK_FIELD_ERROR_FOR_NULL; + m_thd->abort_on_warning= m_thd->is_strict_mode(); + m_thd->transaction->stmt.modified_non_trans_table= false; + } + void stop() + { + m_thd->count_cuted_fields= m_count_cuted_fields; + m_thd->abort_on_warning= m_abort_on_warning; + m_thd->transaction->stmt.modified_non_trans_table= + m_stmt_modified_non_trans_table; + } +public: + Sp_eval_expr_state(THD *thd) + :m_thd(thd), + m_count_cuted_fields(thd->count_cuted_fields), + m_abort_on_warning(thd->abort_on_warning), + m_stmt_modified_non_trans_table(thd->transaction->stmt. + modified_non_trans_table) + { + start(); + } + ~Sp_eval_expr_state() + { + stop(); + } +}; + + +#ifndef DBUG_OFF +void dbug_serve_apcs(THD *thd, int n_calls); +#endif + +class StatementBinlog +{ + const enum_binlog_format saved_binlog_format; + THD *const thd; + +public: + StatementBinlog(THD *thd, bool need_stmt) : + saved_binlog_format(thd->get_current_stmt_binlog_format()), + thd(thd) + { + if (need_stmt && saved_binlog_format != BINLOG_FORMAT_STMT) + { + thd->set_current_stmt_binlog_format_stmt(); + } + } + ~StatementBinlog() + { + thd->set_current_stmt_binlog_format(saved_binlog_format); + } +}; + + +/** THD registry */ +class THD_list: public THD_list_iterator +{ +public: + /** + Constructor replacement. + + Unfortunately we can't use fair constructor to initialize mutex + for two reasons: PFS and embedded. The former can probably be fixed, + the latter can probably be dropped. + */ + void init() + { + mysql_rwlock_init(key_rwlock_THD_list, &lock); + } + + /** Destructor replacement. */ + void destroy() + { + mysql_rwlock_destroy(&lock); + } + + /** + Inserts thread to registry. + + @param thd thread + + Thread becomes accessible via server_threads. + */ + void insert(THD *thd) + { + mysql_rwlock_wrlock(&lock); + threads.append(thd); + mysql_rwlock_unlock(&lock); + } + + /** + Removes thread from registry. + + @param thd thread + + Thread becomes not accessible via server_threads. + */ + void erase(THD *thd) + { + thd->assert_linked(); + mysql_rwlock_wrlock(&lock); + thd->unlink(); + mysql_rwlock_unlock(&lock); + } +}; + +extern THD_list server_threads; + +void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, + uint field_count); +#ifdef WITH_WSREP +extern void wsrep_to_isolation_end(THD*); +#endif +/* + RAII utility class to ease binlogging with temporary setting + THD etc context and restoring the original one upon logger execution. +*/ +class Write_log_with_flags +{ + THD* m_thd; +#ifdef WITH_WSREP + bool wsrep_to_isolation; +#endif + +public: +~Write_log_with_flags() + { + m_thd->set_binlog_flags_for_alter(0); + m_thd->set_binlog_start_alter_seq_no(0); +#ifdef WITH_WSREP + if (wsrep_to_isolation) + wsrep_to_isolation_end(m_thd); +#endif + } + + Write_log_with_flags(THD *thd, uchar flags, + bool do_wsrep_iso __attribute__((unused))= false) : + m_thd(thd) + { + m_thd->set_binlog_flags_for_alter(flags); +#ifdef WITH_WSREP + wsrep_to_isolation= do_wsrep_iso && WSREP(m_thd); +#endif + } +}; + +#endif /* MYSQL_SERVER */ +#endif /* SQL_CLASS_INCLUDED */ diff --git a/sql/sql_client.cc b/sql/sql_client.cc new file mode 100644 index 00000000..b4a22c34 --- /dev/null +++ b/sql/sql_client.cc @@ -0,0 +1,44 @@ +/* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This files defines some MySQL C API functions that are server specific +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" // system_variables + +/* + Function called by my_net_init() to set some check variables +*/ + +extern "C" { +void my_net_local_init(NET *net) +{ +#ifndef EMBEDDED_LIBRARY + net->max_packet= (uint) global_system_variables.net_buffer_length; + net->read_timeout= net->write_timeout= 0; + my_net_set_read_timeout(net, (uint)global_system_variables.net_read_timeout); + my_net_set_write_timeout(net, + (uint)global_system_variables.net_write_timeout); + + net->retry_count= (uint) global_system_variables.net_retry_count; + net->max_packet_size= MY_MAX(global_system_variables.net_buffer_length, + global_system_variables.max_allowed_packet); +#endif +} +} diff --git a/sql/sql_cmd.h b/sql/sql_cmd.h new file mode 100644 index 00000000..2623b370 --- /dev/null +++ b/sql/sql_cmd.h @@ -0,0 +1,279 @@ +/* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file Representation of an SQL command. +*/ + +#ifndef SQL_CMD_INCLUDED +#define SQL_CMD_INCLUDED + +/* + When a command is added here, be sure it's also added in mysqld.cc + in "struct show_var_st status_vars[]= {" ... + + If the command returns a result set or is not allowed in stored + functions or triggers, please also make sure that + sp_get_flags_for_command (sp_head.cc) returns proper flags for the + added SQLCOM_. +*/ + +enum enum_sql_command { + SQLCOM_SELECT, SQLCOM_CREATE_TABLE, SQLCOM_CREATE_INDEX, SQLCOM_ALTER_TABLE, + SQLCOM_UPDATE, SQLCOM_INSERT, SQLCOM_INSERT_SELECT, + SQLCOM_DELETE, SQLCOM_TRUNCATE, SQLCOM_DROP_TABLE, SQLCOM_DROP_INDEX, + + SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_ENGINE_LOGS, SQLCOM_SHOW_ENGINE_STATUS, SQLCOM_SHOW_ENGINE_MUTEX, + SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_BINLOG_STAT, SQLCOM_SHOW_SLAVE_STAT, + SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, + SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, + SQLCOM_SHOW_TRIGGERS, + + SQLCOM_LOAD,SQLCOM_SET_OPTION,SQLCOM_LOCK_TABLES,SQLCOM_UNLOCK_TABLES, + SQLCOM_GRANT, + SQLCOM_CHANGE_DB, SQLCOM_CREATE_DB, SQLCOM_DROP_DB, SQLCOM_ALTER_DB, + SQLCOM_REPAIR, SQLCOM_REPLACE, SQLCOM_REPLACE_SELECT, + SQLCOM_CREATE_FUNCTION, SQLCOM_DROP_FUNCTION, + SQLCOM_REVOKE,SQLCOM_OPTIMIZE, SQLCOM_CHECK, + SQLCOM_ASSIGN_TO_KEYCACHE, SQLCOM_PRELOAD_KEYS, + SQLCOM_FLUSH, SQLCOM_KILL, SQLCOM_ANALYZE, + SQLCOM_ROLLBACK, SQLCOM_ROLLBACK_TO_SAVEPOINT, + SQLCOM_COMMIT, SQLCOM_SAVEPOINT, SQLCOM_RELEASE_SAVEPOINT, + SQLCOM_SLAVE_START, SQLCOM_SLAVE_STOP, + SQLCOM_BEGIN, SQLCOM_CHANGE_MASTER, + SQLCOM_RENAME_TABLE, + SQLCOM_RESET, SQLCOM_PURGE, SQLCOM_PURGE_BEFORE, SQLCOM_SHOW_BINLOGS, + SQLCOM_SHOW_OPEN_TABLES, + SQLCOM_HA_OPEN, SQLCOM_HA_CLOSE, SQLCOM_HA_READ, + SQLCOM_SHOW_SLAVE_HOSTS, SQLCOM_DELETE_MULTI, SQLCOM_UPDATE_MULTI, + SQLCOM_SHOW_BINLOG_EVENTS, SQLCOM_DO, + SQLCOM_SHOW_WARNS, SQLCOM_EMPTY_QUERY, SQLCOM_SHOW_ERRORS, + SQLCOM_SHOW_STORAGE_ENGINES, SQLCOM_SHOW_PRIVILEGES, + SQLCOM_HELP, SQLCOM_CREATE_USER, SQLCOM_DROP_USER, SQLCOM_RENAME_USER, + SQLCOM_REVOKE_ALL, SQLCOM_CHECKSUM, + SQLCOM_CREATE_PROCEDURE, SQLCOM_CREATE_SPFUNCTION, SQLCOM_CALL, + SQLCOM_DROP_PROCEDURE, SQLCOM_ALTER_PROCEDURE,SQLCOM_ALTER_FUNCTION, + SQLCOM_SHOW_CREATE_PROC, SQLCOM_SHOW_CREATE_FUNC, + SQLCOM_SHOW_STATUS_PROC, SQLCOM_SHOW_STATUS_FUNC, + SQLCOM_PREPARE, SQLCOM_EXECUTE, SQLCOM_DEALLOCATE_PREPARE, + SQLCOM_CREATE_VIEW, SQLCOM_DROP_VIEW, + SQLCOM_CREATE_TRIGGER, SQLCOM_DROP_TRIGGER, + SQLCOM_XA_START, SQLCOM_XA_END, SQLCOM_XA_PREPARE, + SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, + SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN, + SQLCOM_SHOW_AUTHORS, SQLCOM_BINLOG_BASE64_EVENT, + SQLCOM_SHOW_PLUGINS, SQLCOM_SHOW_CONTRIBUTORS, + SQLCOM_CREATE_SERVER, SQLCOM_DROP_SERVER, SQLCOM_ALTER_SERVER, + SQLCOM_CREATE_EVENT, SQLCOM_ALTER_EVENT, SQLCOM_DROP_EVENT, + SQLCOM_SHOW_CREATE_EVENT, SQLCOM_SHOW_EVENTS, + SQLCOM_SHOW_CREATE_TRIGGER, + SQLCOM_ALTER_DB_UPGRADE, + SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES, + SQLCOM_SIGNAL, SQLCOM_RESIGNAL, + SQLCOM_SHOW_RELAYLOG_EVENTS, + SQLCOM_GET_DIAGNOSTICS, + SQLCOM_SLAVE_ALL_START, SQLCOM_SLAVE_ALL_STOP, + SQLCOM_SHOW_EXPLAIN, + SQLCOM_SHOW_ANALYZE, SQLCOM_SHUTDOWN, + SQLCOM_CREATE_ROLE, SQLCOM_DROP_ROLE, SQLCOM_GRANT_ROLE, SQLCOM_REVOKE_ROLE, + SQLCOM_COMPOUND, + SQLCOM_SHOW_GENERIC, + SQLCOM_ALTER_USER, + SQLCOM_SHOW_CREATE_USER, + SQLCOM_EXECUTE_IMMEDIATE, + SQLCOM_CREATE_SEQUENCE, + SQLCOM_DROP_SEQUENCE, + SQLCOM_ALTER_SEQUENCE, + SQLCOM_CREATE_PACKAGE, + SQLCOM_DROP_PACKAGE, + SQLCOM_CREATE_PACKAGE_BODY, + SQLCOM_DROP_PACKAGE_BODY, + SQLCOM_SHOW_CREATE_PACKAGE, + SQLCOM_SHOW_CREATE_PACKAGE_BODY, + SQLCOM_SHOW_STATUS_PACKAGE, + SQLCOM_SHOW_STATUS_PACKAGE_BODY, + SQLCOM_SHOW_PACKAGE_BODY_CODE, + SQLCOM_BACKUP, SQLCOM_BACKUP_LOCK, + + /* + When a command is added here, be sure it's also added in mysqld.cc + in "struct show_var_st com_status_vars[]= {" ... + */ + /* This should be the last !!! */ + SQLCOM_END +}; + + +class Storage_engine_name +{ +protected: + LEX_CSTRING m_storage_engine_name; +public: + Storage_engine_name() + { + m_storage_engine_name.str= NULL; + m_storage_engine_name.length= 0; + } + Storage_engine_name(const LEX_CSTRING &name) + :m_storage_engine_name(name) + { } + Storage_engine_name(const LEX_STRING &name) + { + m_storage_engine_name.str= name.str; + m_storage_engine_name.length= name.length; + } + bool resolve_storage_engine_with_error(THD *thd, + handlerton **ha, + bool tmp_table); + bool is_set() { return m_storage_engine_name.str != NULL; } +}; + + +/** + @class Sql_cmd - Representation of an SQL command. + + This class is an interface between the parser and the runtime. + The parser builds the appropriate derived classes of Sql_cmd + to represent a SQL statement in the parsed tree. + The execute() method in the derived classes of Sql_cmd contain the runtime + implementation. + Note that this interface is used for SQL statements recently implemented, + the code for older statements tend to load the LEX structure with more + attributes instead. + Implement new statements by sub-classing Sql_cmd, as this improves + code modularity (see the 'big switch' in dispatch_command()), and decreases + the total size of the LEX structure (therefore saving memory in stored + programs). + The recommended name of a derived class of Sql_cmd is Sql_cmd_. + + Notice that the Sql_cmd class should not be confused with the + Statement class. Statement is a class that is used to manage an SQL + command or a set of SQL commands. When the SQL statement text is + analyzed, the parser will create one or more Sql_cmd objects to + represent the actual SQL commands. +*/ +class Sql_cmd : public Sql_alloc +{ +private: + Sql_cmd(const Sql_cmd &); // No copy constructor wanted + void operator=(Sql_cmd &); // No assignment operator wanted + +public: + /** + @brief Return the command code for this statement + */ + virtual enum_sql_command sql_command_code() const = 0; + + /** + Execute this SQL statement. + @param thd the current thread. + @retval false on success. + @retval true on error + */ + virtual bool execute(THD *thd) = 0; + + virtual Storage_engine_name *option_storage_engine_name() + { + return NULL; + } + +protected: + Sql_cmd() = default; + + virtual ~Sql_cmd() + { + /* + Sql_cmd objects are allocated in thd->mem_root. + In MySQL, the C++ destructor is never called, the underlying MEM_ROOT is + simply destroyed instead. + Do not rely on the destructor for any cleanup. + */ + DBUG_ASSERT(FALSE); + } +}; + +class Sql_cmd_show_slave_status: public Sql_cmd +{ +protected: + bool show_all_slaves_status; +public: + Sql_cmd_show_slave_status() + :show_all_slaves_status(false) + {} + + Sql_cmd_show_slave_status(bool status_all) + :show_all_slaves_status(status_all) + {} + + enum_sql_command sql_command_code() const { return SQLCOM_SHOW_SLAVE_STAT; } + + bool execute(THD *thd); + bool is_show_all_slaves_stat() { return show_all_slaves_status; } +}; + + +class Sql_cmd_create_table_like: public Sql_cmd, + public Storage_engine_name +{ +public: + Storage_engine_name *option_storage_engine_name() { return this; } + bool execute(THD *thd); +}; + +class Sql_cmd_create_table: public Sql_cmd_create_table_like +{ +public: + enum_sql_command sql_command_code() const { return SQLCOM_CREATE_TABLE; } +}; + +class Sql_cmd_create_sequence: public Sql_cmd_create_table_like +{ +public: + enum_sql_command sql_command_code() const { return SQLCOM_CREATE_SEQUENCE; } +}; + + +/** + Sql_cmd_call represents the CALL statement. +*/ +class Sql_cmd_call : public Sql_cmd +{ +public: + class sp_name *m_name; + const class Sp_handler *m_handler; + Sql_cmd_call(class sp_name *name, const class Sp_handler *handler) + :m_name(name), + m_handler(handler) + {} + + virtual ~Sql_cmd_call() = default; + + /** + Execute a CALL statement at runtime. + @param thd the current thread. + @return false on success. + */ + bool execute(THD *thd); + + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_CALL; + } +}; + +#endif // SQL_CMD_INCLUDED diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc new file mode 100644 index 00000000..6b195ac9 --- /dev/null +++ b/sql/sql_connect.cc @@ -0,0 +1,1580 @@ +/* + Copyright (c) 2007, 2013, Oracle and/or its affiliates. + Copyright (c) 2008, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + Functions to autenticate and handle reqests for a connection +*/ + +#include "mariadb.h" +#include "mysqld.h" +#include "sql_priv.h" +#ifndef _WIN32 +#include // getservbyname, servent +#endif +#include "sql_audit.h" +#include "sql_connect.h" +#include "thread_cache.h" +#include "probes_mysql.h" +#include "sql_parse.h" // sql_command_flags, + // execute_init_command, + // do_command +#include "sql_db.h" // mysql_change_db +#include "hostname.h" // inc_host_errors, ip_to_hostname, + // reset_host_errors +#include "sql_callback.h" + +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" /* wsrep open/close */ +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ +#include "proxy_protocol.h" +#include + +HASH global_user_stats, global_client_stats, global_table_stats; +HASH global_index_stats; +/* Protects the above global stats */ +extern mysql_mutex_t LOCK_global_user_client_stats; +extern mysql_mutex_t LOCK_global_table_stats; +extern mysql_mutex_t LOCK_global_index_stats; +extern vio_keepalive_opts opt_vio_keepalive; + +/* + Get structure for logging connection data for the current user +*/ + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +static HASH hash_user_connections; + +int get_or_create_user_conn(THD *thd, const char *user, + const char *host, + const USER_RESOURCES *mqh) +{ + int return_val= 0; + size_t temp_len, user_len; + char temp_user[USER_HOST_BUFF_SIZE]; + struct user_conn *uc; + + DBUG_ASSERT(user != 0); + DBUG_ASSERT(host != 0); + DBUG_ASSERT(thd->user_connect == 0); + + user_len= strlen(user); + temp_len= (strmov(strmov(temp_user, user)+1, host) - temp_user)+1; + mysql_mutex_lock(&LOCK_user_conn); + if (!(uc = (struct user_conn *) my_hash_search(&hash_user_connections, + (uchar*) temp_user, temp_len))) + { + /* First connection for user; Create a user connection object */ + if (!(uc= ((struct user_conn*) + my_malloc(key_memory_user_conn, + sizeof(struct user_conn) + temp_len+1, MYF(MY_WME))))) + { + /* MY_WME ensures an error is set in THD. */ + return_val= 1; + goto end; + } + uc->user=(char*) (uc+1); + memcpy(uc->user,temp_user,temp_len+1); + uc->host= uc->user + user_len + 1; + uc->len= (uint)temp_len; + uc->connections= uc->questions= uc->updates= uc->conn_per_hour= 0; + uc->reset_utime= thd->thr_create_utime; + if (my_hash_insert(&hash_user_connections, (uchar*) uc)) + { + /* The only possible error is out of memory, MY_WME sets an error. */ + my_free(uc); + return_val= 1; + goto end; + } + } + uc->user_resources= *mqh; + thd->user_connect=uc; + uc->connections++; +end: + mysql_mutex_unlock(&LOCK_user_conn); + return return_val; +} + + +/* + check if user has already too many connections + + SYNOPSIS + check_for_max_user_connections() + thd Thread handle + uc User connect object + + NOTES + If check fails, we decrease user connection count, which means one + shouldn't call decrease_user_connections() after this function. + + RETURN + 0 ok + 1 error +*/ + +int check_for_max_user_connections(THD *thd, USER_CONN *uc) +{ + int error= 1; + Host_errors errors; + DBUG_ENTER("check_for_max_user_connections"); + + mysql_mutex_lock(&LOCK_user_conn); + + /* Root is not affected by the value of max_user_connections */ + if (global_system_variables.max_user_connections && + !uc->user_resources.user_conn && + global_system_variables.max_user_connections < uc->connections && + !(thd->security_ctx->master_access & PRIV_IGNORE_MAX_USER_CONNECTIONS)) + { + my_error(ER_TOO_MANY_USER_CONNECTIONS, MYF(0), uc->user); + error=1; + errors.m_max_user_connection= 1; + goto end; + } + time_out_user_resource_limits(thd, uc); + if (uc->user_resources.user_conn && + uc->user_resources.user_conn < uc->connections) + { + my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, + "max_user_connections", + (long) uc->user_resources.user_conn); + error= 1; + errors.m_max_user_connection= 1; + goto end; + } + if (uc->user_resources.conn_per_hour && + uc->user_resources.conn_per_hour <= uc->conn_per_hour) + { + my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, + "max_connections_per_hour", + (long) uc->user_resources.conn_per_hour); + error=1; + errors.m_max_user_connection_per_hour= 1; + goto end; + } + uc->conn_per_hour++; + error= 0; + +end: + if (unlikely(error)) + { + uc->connections--; // no need for decrease_user_connections() here + /* + The thread may returned back to the pool and assigned to a user + that doesn't have a limit. Ensure the user is not using resources + of someone else. + */ + thd->user_connect= NULL; + } + mysql_mutex_unlock(&LOCK_user_conn); + if (unlikely(error)) + { + inc_host_errors(thd->main_security_ctx.ip, &errors); + } + DBUG_RETURN(error); +} + + +/* + Decrease user connection count + + SYNOPSIS + decrease_user_connections() + uc User connection object + + NOTES + If there is a n user connection object for a connection + (which only happens if 'max_user_connections' is defined or + if someone has created a resource grant for a user), then + the connection count is always incremented on connect. + + The user connect object is not freed if some users has + 'max connections per hour' defined as we need to be able to hold + count over the lifetime of the connection. +*/ + +void decrease_user_connections(USER_CONN *uc) +{ + DBUG_ENTER("decrease_user_connections"); + mysql_mutex_lock(&LOCK_user_conn); + DBUG_ASSERT(uc->connections); + if (!--uc->connections && !mqh_used) + { + /* Last connection for user; Delete it */ + (void) my_hash_delete(&hash_user_connections,(uchar*) uc); + } + mysql_mutex_unlock(&LOCK_user_conn); + DBUG_VOID_RETURN; +} + + +/* + Reset per-hour user resource limits when it has been more than + an hour since they were last checked + + SYNOPSIS: + time_out_user_resource_limits() + thd Thread handler + uc User connection details + + NOTE: + This assumes that the LOCK_user_conn mutex has been acquired, so it is + safe to test and modify members of the USER_CONN structure. +*/ + +void time_out_user_resource_limits(THD *thd, USER_CONN *uc) +{ + ulonglong check_time= thd->start_utime; + DBUG_ENTER("time_out_user_resource_limits"); + + /* If more than a hour since last check, reset resource checking */ + if (check_time - uc->reset_utime >= 3600000000ULL) + { + uc->questions=0; + uc->updates=0; + uc->conn_per_hour=0; + uc->reset_utime= check_time; + } + + DBUG_VOID_RETURN; +} + +/* + Check if maximum queries per hour limit has been reached + returns 0 if OK. +*/ + +bool check_mqh(THD *thd, uint check_command) +{ + bool error= 0; + USER_CONN *uc=thd->user_connect; + DBUG_ENTER("check_mqh"); + DBUG_ASSERT(uc != 0); + + mysql_mutex_lock(&LOCK_user_conn); + + time_out_user_resource_limits(thd, uc); + + /* Check that we have not done too many questions / hour */ + if (uc->user_resources.questions && + uc->questions++ >= uc->user_resources.questions) + { + my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, "max_queries_per_hour", + (long) uc->user_resources.questions); + error=1; + goto end; + } + if (check_command < (uint) SQLCOM_END) + { + /* Check that we have not done too many updates / hour */ + if (uc->user_resources.updates && + (sql_command_flags[check_command] & CF_CHANGES_DATA) && + uc->updates++ >= uc->user_resources.updates) + { + my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, "max_updates_per_hour", + (long) uc->user_resources.updates); + error=1; + goto end; + } + } +end: + mysql_mutex_unlock(&LOCK_user_conn); + DBUG_RETURN(error); +} + +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ + +/* + Check for maximum allowable user connections, if the mysqld server is + started with corresponding variable that is greater then 0. +*/ + +extern "C" uchar *get_key_conn(user_conn *buff, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= buff->len; + return (uchar*) buff->user; +} + + +extern "C" void free_user(struct user_conn *uc) +{ + my_free(uc); +} + + +void init_max_user_conn(void) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + my_hash_init(key_memory_user_conn, &hash_user_connections, + system_charset_info, max_connections, 0, 0, (my_hash_get_key) + get_key_conn, (my_hash_free_key) free_user, 0); +#endif +} + + +void free_max_user_conn(void) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + my_hash_free(&hash_user_connections); +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ +} + + +void reset_mqh(LEX_USER *lu, bool get_them= 0) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + mysql_mutex_lock(&LOCK_user_conn); + if (lu) // for GRANT + { + USER_CONN *uc; + size_t temp_len=lu->user.length+lu->host.length+2; + char temp_user[USER_HOST_BUFF_SIZE]; + + memcpy(temp_user,lu->user.str,lu->user.length); + memcpy(temp_user+lu->user.length+1,lu->host.str,lu->host.length); + temp_user[lu->user.length]='\0'; temp_user[temp_len-1]=0; + if ((uc = (struct user_conn *) my_hash_search(&hash_user_connections, + (uchar*) temp_user, + temp_len))) + { + uc->questions=0; + get_mqh(temp_user,&temp_user[lu->user.length+1],uc); + uc->updates=0; + uc->conn_per_hour=0; + } + } + else + { + /* for FLUSH PRIVILEGES and FLUSH USER_RESOURCES */ + for (uint idx=0;idx < hash_user_connections.records; idx++) + { + USER_CONN *uc=(struct user_conn *) + my_hash_element(&hash_user_connections, idx); + if (get_them) + get_mqh(uc->user,uc->host,uc); + uc->questions=0; + uc->updates=0; + uc->conn_per_hour=0; + } + } + mysql_mutex_unlock(&LOCK_user_conn); +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ +} + +/***************************************************************************** + Handle users statistics +*****************************************************************************/ + +/* 'mysql_system_user' is used for when the user is not defined for a THD. */ +static const char mysql_system_user[]= "#mysql_system#"; + +// Returns 'user' if it's not NULL. Returns 'mysql_system_user' otherwise. +static const char * get_valid_user_string(const char* user) +{ + return user ? user : mysql_system_user; +} + +/* + Returns string as 'IP' for the client-side of the connection represented by + 'client'. Does not allocate memory. May return "". +*/ + +static const char *get_client_host(THD *client) +{ + return client->security_ctx->host_or_ip[0] ? + client->security_ctx->host_or_ip : + client->security_ctx->host ? client->security_ctx->host : ""; +} + +extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= user_stats->user_name_length; + return (uchar*) user_stats->user; +} + +void free_user_stats(USER_STATS* user_stats) +{ + my_free(user_stats); +} + +void init_user_stats(USER_STATS *user_stats, + const char *user, + size_t user_length, + const char *priv_user, + uint total_connections, + uint total_ssl_connections, + uint concurrent_connections, + time_t connected_time, + double busy_time, + double cpu_time, + ulonglong bytes_received, + ulonglong bytes_sent, + ulonglong binlog_bytes_written, + ha_rows rows_sent, + ha_rows rows_read, + ha_rows rows_inserted, + ha_rows rows_deleted, + ha_rows rows_updated, + ulonglong select_commands, + ulonglong update_commands, + ulonglong other_commands, + ulonglong commit_trans, + ulonglong rollback_trans, + ulonglong denied_connections, + ulonglong lost_connections, + ulonglong max_statement_time_exceeded, + ulonglong access_denied_errors, + ulonglong empty_queries) +{ + DBUG_ENTER("init_user_stats"); + DBUG_PRINT("enter", ("user: %s priv_user: %s", user, priv_user)); + + user_length= MY_MIN(user_length, sizeof(user_stats->user)-1); + memcpy(user_stats->user, user, user_length); + user_stats->user[user_length]= 0; + user_stats->user_name_length= (uint)user_length; + strmake_buf(user_stats->priv_user, priv_user); + + user_stats->total_connections= total_connections; + user_stats->total_ssl_connections= total_ssl_connections; + user_stats->concurrent_connections= concurrent_connections; + user_stats->connected_time= connected_time; + user_stats->busy_time= busy_time; + user_stats->cpu_time= cpu_time; + user_stats->bytes_received= bytes_received; + user_stats->bytes_sent= bytes_sent; + user_stats->binlog_bytes_written= binlog_bytes_written; + user_stats->rows_sent= rows_sent; + user_stats->rows_read= rows_read; + user_stats->rows_inserted= rows_inserted; + user_stats->rows_deleted= rows_deleted; + user_stats->rows_updated= rows_updated; + user_stats->select_commands= select_commands; + user_stats->update_commands= update_commands; + user_stats->other_commands= other_commands; + user_stats->commit_trans= commit_trans; + user_stats->rollback_trans= rollback_trans; + user_stats->denied_connections= denied_connections; + user_stats->lost_connections= lost_connections; + user_stats->max_statement_time_exceeded= max_statement_time_exceeded; + user_stats->access_denied_errors= access_denied_errors; + user_stats->empty_queries= empty_queries; + DBUG_VOID_RETURN; +} + + +void init_global_user_stats(void) +{ + my_hash_init(PSI_INSTRUMENT_ME, &global_user_stats, system_charset_info, max_connections, + 0, 0, (my_hash_get_key) get_key_user_stats, + (my_hash_free_key) free_user_stats, 0); +} + +void init_global_client_stats(void) +{ + my_hash_init(PSI_INSTRUMENT_ME, &global_client_stats, system_charset_info, max_connections, + 0, 0, (my_hash_get_key) get_key_user_stats, + (my_hash_free_key) free_user_stats, 0); +} + +extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= table_stats->table_name_length; + return (uchar*) table_stats->table; +} + +extern "C" void free_table_stats(TABLE_STATS* table_stats) +{ + my_free(table_stats); +} + +void init_global_table_stats(void) +{ + my_hash_init(PSI_INSTRUMENT_ME, &global_table_stats, system_charset_info, + max_connections, 0, 0, (my_hash_get_key) get_key_table_stats, + (my_hash_free_key) free_table_stats, 0); +} + +extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= index_stats->index_name_length; + return (uchar*) index_stats->index; +} + +extern "C" void free_index_stats(INDEX_STATS* index_stats) +{ + my_free(index_stats); +} + +void init_global_index_stats(void) +{ + my_hash_init(PSI_INSTRUMENT_ME, &global_index_stats, system_charset_info, + max_connections, 0, 0, (my_hash_get_key) get_key_index_stats, + (my_hash_free_key) free_index_stats, 0); +} + + +void free_global_user_stats(void) +{ + my_hash_free(&global_user_stats); +} + +void free_global_table_stats(void) +{ + my_hash_free(&global_table_stats); +} + +void free_global_index_stats(void) +{ + my_hash_free(&global_index_stats); +} + +void free_global_client_stats(void) +{ + my_hash_free(&global_client_stats); +} + +/* + Increments the global stats connection count for an entry from + global_client_stats or global_user_stats. Returns 0 on success + and 1 on error. +*/ + +static bool increment_count_by_name(const char *name, size_t name_length, + const char *role_name, + HASH *users_or_clients, THD *thd) +{ + USER_STATS *user_stats; + + if (!(user_stats= (USER_STATS*) my_hash_search(users_or_clients, (uchar*) name, + name_length))) + { + /* First connection for this user or client */ + if (!(user_stats= ((USER_STATS*) + my_malloc(PSI_INSTRUMENT_ME, sizeof(USER_STATS), + MYF(MY_WME | MY_ZEROFILL))))) + return TRUE; // Out of memory + + init_user_stats(user_stats, name, name_length, role_name, + 0, 0, 0, // connections + 0, 0, 0, // time + 0, 0, 0, // bytes sent, received and written + 0, 0, // rows sent and read + 0, 0, 0, // rows inserted, deleted and updated + 0, 0, 0, // select, update and other commands + 0, 0, // commit and rollback trans + thd->status_var.access_denied_errors, + 0, // lost connections + 0, // max query timeouts + 0, // access denied errors + 0); // empty queries + + if (my_hash_insert(users_or_clients, (uchar*)user_stats)) + { + my_free(user_stats); + return TRUE; // Out of memory + } + } + user_stats->total_connections++; + if (thd->net.vio && thd->net.vio->type == VIO_TYPE_SSL) + user_stats->total_ssl_connections++; + return FALSE; +} + + +/* + Increments the global user and client stats connection count. + + @param use_lock if true, LOCK_global_user_client_stats will be locked + + @retval 0 ok + @retval 1 error. +*/ + +#ifndef EMBEDDED_LIBRARY +static bool increment_connection_count(THD* thd, bool use_lock) +{ + const char *user_string= get_valid_user_string(thd->main_security_ctx.user); + const char *client_string= get_client_host(thd); + bool return_value= FALSE; + + if (!thd->userstat_running) + return FALSE; + + if (use_lock) + mysql_mutex_lock(&LOCK_global_user_client_stats); + + if (increment_count_by_name(user_string, strlen(user_string), user_string, + &global_user_stats, thd)) + { + return_value= TRUE; + goto end; + } + if (increment_count_by_name(client_string, strlen(client_string), + user_string, &global_client_stats, thd)) + { + return_value= TRUE; + goto end; + } + +end: + if (use_lock) + mysql_mutex_unlock(&LOCK_global_user_client_stats); + return return_value; +} +#endif + +/* + Used to update the global user and client stats +*/ + +static void update_global_user_stats_with_user(THD *thd, + USER_STATS *user_stats, + time_t now) +{ + DBUG_ASSERT(thd->userstat_running); + + user_stats->connected_time+= now - thd->last_global_update_time; + user_stats->busy_time+= (thd->status_var.busy_time - + thd->org_status_var.busy_time); + user_stats->cpu_time+= (thd->status_var.cpu_time - + thd->org_status_var.cpu_time); + /* + This is handle specially as bytes_received is incremented BEFORE + org_status_var is copied. + */ + user_stats->bytes_received+= (thd->org_status_var.bytes_received- + thd->start_bytes_received); + user_stats->bytes_sent+= (thd->status_var.bytes_sent - + thd->org_status_var.bytes_sent); + user_stats->binlog_bytes_written+= + (thd->status_var.binlog_bytes_written - + thd->org_status_var.binlog_bytes_written); + /* We are not counting rows in internal temporary tables here ! */ + user_stats->rows_read+= (thd->status_var.rows_read - + thd->org_status_var.rows_read); + user_stats->rows_sent+= (thd->status_var.rows_sent - + thd->org_status_var.rows_sent); + user_stats->rows_inserted+= (thd->status_var.ha_write_count - + thd->org_status_var.ha_write_count); + user_stats->rows_deleted+= (thd->status_var.ha_delete_count - + thd->org_status_var.ha_delete_count); + user_stats->rows_updated+= (thd->status_var.ha_update_count - + thd->org_status_var.ha_update_count); + user_stats->select_commands+= thd->select_commands; + user_stats->update_commands+= thd->update_commands; + user_stats->other_commands+= thd->other_commands; + user_stats->commit_trans+= (thd->status_var.ha_commit_count - + thd->org_status_var.ha_commit_count); + user_stats->rollback_trans+= (thd->status_var.ha_rollback_count + + thd->status_var.ha_savepoint_rollback_count - + thd->org_status_var.ha_rollback_count - + thd->org_status_var. + ha_savepoint_rollback_count); + user_stats->access_denied_errors+= + (thd->status_var.access_denied_errors - + thd->org_status_var.access_denied_errors); + user_stats->empty_queries+= (thd->status_var.empty_queries - + thd->org_status_var.empty_queries); + + /* The following can only contain 0 or 1 and then connection ends */ + user_stats->denied_connections+= thd->status_var.access_denied_errors; + user_stats->lost_connections+= thd->status_var.lost_connections; + user_stats->max_statement_time_exceeded+= thd->status_var.max_statement_time_exceeded; +} + + +/* Updates the global stats of a user or client */ +void update_global_user_stats(THD *thd, bool create_user, time_t now) +{ + const char *user_string, *client_string; + USER_STATS *user_stats; + size_t user_string_length, client_string_length; + DBUG_ASSERT(thd->userstat_running); + + user_string= get_valid_user_string(thd->main_security_ctx.user); + user_string_length= strlen(user_string); + client_string= get_client_host(thd); + client_string_length= strlen(client_string); + + mysql_mutex_lock(&LOCK_global_user_client_stats); + + // Update by user name + if ((user_stats= (USER_STATS*) my_hash_search(&global_user_stats, + (uchar*) user_string, + user_string_length))) + { + /* Found user. */ + update_global_user_stats_with_user(thd, user_stats, now); + } + else + { + /* Create the entry */ + if (create_user) + { + increment_count_by_name(user_string, user_string_length, user_string, + &global_user_stats, thd); + } + } + + /* Update by client IP */ + if ((user_stats= (USER_STATS*)my_hash_search(&global_client_stats, + (uchar*) client_string, + client_string_length))) + { + // Found by client IP + update_global_user_stats_with_user(thd, user_stats, now); + } + else + { + // Create the entry + if (create_user) + { + increment_count_by_name(client_string, client_string_length, + user_string, &global_client_stats, thd); + } + } + /* Reset variables only used for counting */ + thd->select_commands= thd->update_commands= thd->other_commands= 0; + thd->last_global_update_time= now; + + mysql_mutex_unlock(&LOCK_global_user_client_stats); +} + + +/** + Set thread character set variables from the given ID + + @param thd thread handle + @param cs_number character set and collation ID + + @retval 0 OK; character_set_client, collation_connection and + character_set_results are set to the new value, + or to the default global values. + + @retval 1 error, e.g. the given ID is not supported by parser. + Corresponding SQL error is sent. +*/ + +bool thd_init_client_charset(THD *thd, uint cs_number) +{ + CHARSET_INFO *cs; + /* + Use server character set and collation if + - opt_character_set_client_handshake is not set + - client has not specified a character set + - client character set doesn't exists in server + */ + if (!opt_character_set_client_handshake || + !(cs= get_charset(cs_number, MYF(0)))) + { + thd->update_charset(global_system_variables.character_set_client, + global_system_variables.collation_connection, + global_system_variables.character_set_results); + } + else + { + if (!is_supported_parser_charset(cs)) + { + /* Disallow non-supported parser character sets: UCS2, UTF16, UTF32 */ + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "character_set_client", + cs->cs_name.str); + return true; + } + thd->org_charset= cs; + thd->update_charset(cs,cs,cs); + } + return false; +} + + +/* + Initialize connection threads +*/ + +#ifndef EMBEDDED_LIBRARY +bool init_new_connection_handler_thread() +{ + pthread_detach_this_thread(); + if (my_thread_init()) + { + statistic_increment(aborted_connects,&LOCK_status); + statistic_increment(connection_errors_internal, &LOCK_status); + return 1; + } + DBUG_EXECUTE_IF("simulate_failed_connection_1", return(1); ); + return 0; +} + +/** + Set client address during authentication. + + Initializes THD::main_security_ctx and THD::peer_port. + Optionally does ip to hostname translation. + + @param thd current THD handle + @param addr peer address (can be NULL, if 'ip' is set) + @param ip peer address as string (can be NULL if 'addr' is set) + @param port peer port + @param check_proxy_networks if true, and host is in + 'proxy_protocol_networks' list, skip + "host not privileged" check + @param[out] host_errors - number of connect + errors for this host + + @retval 0 ok, 1 error +*/ +int thd_set_peer_addr(THD *thd, + sockaddr_storage *addr, + const char *ip, + uint port, + bool check_proxy_networks, + uint *host_errors) +{ + *host_errors= 0; + + thd->peer_port= port; + + char ip_string[128]; + if (!ip) + { + void *addr_data; + if (addr->ss_family == AF_UNIX) + { + /* local connection */ + my_free((void *)thd->main_security_ctx.ip); + thd->main_security_ctx.host_or_ip= thd->main_security_ctx.host = my_localhost; + thd->main_security_ctx.ip= 0; + return 0; + } + else if (addr->ss_family == AF_INET) + addr_data= &((struct sockaddr_in *)addr)->sin_addr; + else + addr_data= &((struct sockaddr_in6 *)addr)->sin6_addr; + if (!inet_ntop(addr->ss_family,addr_data, ip_string, sizeof(ip_string))) + { + DBUG_ASSERT(0); + return 1; + } + ip= ip_string; + } + + my_free((void *)thd->main_security_ctx.ip); + if (!(thd->main_security_ctx.ip = my_strdup(PSI_INSTRUMENT_ME, ip, MYF(MY_WME)))) + { + /* + No error accounting per IP in host_cache, + this is treated as a global server OOM error. + TODO: remove the need for my_strdup. + */ + statistic_increment(aborted_connects, &LOCK_status); + statistic_increment(connection_errors_internal, &LOCK_status); + return 1; /* The error is set by my_strdup(). */ + } + thd->main_security_ctx.host_or_ip = thd->main_security_ctx.ip; + if (!opt_skip_name_resolve) + { + int rc; + + rc = ip_to_hostname(addr, + thd->main_security_ctx.ip, + &thd->main_security_ctx.host, + host_errors); + + /* Cut very long hostnames to avoid possible overflows */ + if (thd->main_security_ctx.host) + { + if (thd->main_security_ctx.host != my_localhost) + ((char*)thd->main_security_ctx.host)[MY_MIN(strlen(thd->main_security_ctx.host), + HOSTNAME_LENGTH)] = 0; + thd->main_security_ctx.host_or_ip = thd->main_security_ctx.host; + } + + if (rc == RC_BLOCKED_HOST) + { + /* HOST_CACHE stats updated by ip_to_hostname(). */ + my_error(ER_HOST_IS_BLOCKED, MYF(0), thd->main_security_ctx.host_or_ip); + return 1; + } + } + DBUG_PRINT("info", ("Host: %s ip: %s", + (thd->main_security_ctx.host ? + thd->main_security_ctx.host : "unknown host"), + (thd->main_security_ctx.ip ? + thd->main_security_ctx.ip : "unknown ip"))); + if ((!check_proxy_networks || !is_proxy_protocol_allowed((struct sockaddr *) addr)) + && acl_check_host(thd->main_security_ctx.host, thd->main_security_ctx.ip)) + { + /* HOST_CACHE stats updated by acl_check_host(). */ + my_error(ER_HOST_NOT_PRIVILEGED, MYF(0), + thd->main_security_ctx.host_or_ip); + return 1; + } + return 0; +} + +/* + Perform handshake, authorize client and update thd ACL variables. + + SYNOPSIS + check_connection() + thd thread handle + + RETURN + 0 success, thd is updated. + 1 error +*/ + +static int check_connection(THD *thd) +{ + uint connect_errors= 0; + int auth_rc; + NET *net= &thd->net; + + DBUG_PRINT("info", + ("New connection received on %s", vio_description(net->vio))); + +#ifdef SIGNAL_WITH_VIO_CLOSE + thd->set_active_vio(net->vio); +#endif + + if (!thd->main_security_ctx.host) // If TCP/IP connection + { + my_bool peer_rc; + char ip[NI_MAXHOST]; + uint16 peer_port; + + peer_rc= vio_peer_addr(net->vio, ip, &peer_port, NI_MAXHOST); + + /* + =========================================================================== + DEBUG code only (begin) + Simulate various output from vio_peer_addr(). + =========================================================================== + */ + + DBUG_EXECUTE_IF("vio_peer_addr_error", + { + peer_rc= 1; + } + ); + DBUG_EXECUTE_IF("vio_peer_addr_fake_ipv4", + { + struct sockaddr *sa= (sockaddr *) &net->vio->remote; + sa->sa_family= AF_INET; + struct in_addr *ip4= &((struct sockaddr_in *) sa)->sin_addr; + /* See RFC 5737, 192.0.2.0/24 is reserved. */ + const char* fake= "192.0.2.4"; + inet_pton(AF_INET,fake, ip4); + strcpy(ip, fake); + peer_rc= 0; + } + ); + +#ifdef HAVE_IPV6 + DBUG_EXECUTE_IF("vio_peer_addr_fake_ipv6", + { + struct sockaddr_in6 *sa= (sockaddr_in6 *) &net->vio->remote; + sa->sin6_family= AF_INET6; + struct in6_addr *ip6= & sa->sin6_addr; + /* See RFC 3849, ipv6 2001:DB8::/32 is reserved. */ + const char* fake= "2001:db8::6:6"; + /* inet_pton(AF_INET6, fake, ip6); not available on Windows XP. */ + ip6->s6_addr[ 0] = 0x20; + ip6->s6_addr[ 1] = 0x01; + ip6->s6_addr[ 2] = 0x0d; + ip6->s6_addr[ 3] = 0xb8; + ip6->s6_addr[ 4] = 0x00; + ip6->s6_addr[ 5] = 0x00; + ip6->s6_addr[ 6] = 0x00; + ip6->s6_addr[ 7] = 0x00; + ip6->s6_addr[ 8] = 0x00; + ip6->s6_addr[ 9] = 0x00; + ip6->s6_addr[10] = 0x00; + ip6->s6_addr[11] = 0x00; + ip6->s6_addr[12] = 0x00; + ip6->s6_addr[13] = 0x06; + ip6->s6_addr[14] = 0x00; + ip6->s6_addr[15] = 0x06; + strcpy(ip, fake); + peer_rc= 0; + } + ); +#endif /* HAVE_IPV6 */ + + /* + =========================================================================== + DEBUG code only (end) + =========================================================================== + */ + + if (peer_rc) + { + /* + Since we can not even get the peer IP address, + there is nothing to show in the host_cache, + so increment the global status variable for peer address errors. + */ + statistic_increment(connection_errors_peer_addr, &LOCK_status); + my_error(ER_BAD_HOST_ERROR, MYF(0)); + statistic_increment(aborted_connects_preauth, &LOCK_status); + return 1; + } + + if (thd_set_peer_addr(thd, &net->vio->remote, ip, peer_port, + true, &connect_errors)) + { + statistic_increment(aborted_connects_preauth, &LOCK_status); + return 1; + } + } + else /* Hostname given means that the connection was on a socket */ + { + DBUG_PRINT("info",("Host: %s", thd->main_security_ctx.host)); + thd->main_security_ctx.host_or_ip= thd->main_security_ctx.host; + thd->main_security_ctx.ip= 0; + /* Reset sin_addr */ + bzero((char*) &net->vio->remote, sizeof(net->vio->remote)); + } + vio_keepalive(net->vio, TRUE); + vio_set_keepalive_options(net->vio, &opt_vio_keepalive); + + if (unlikely(thd->packet.alloc(thd->variables.net_buffer_length))) + { + /* + Important note: + net_buffer_length is a SESSION variable, + so it may be tempting to account OOM conditions per IP in the HOST_CACHE, + in case some clients are more demanding than others ... + However, this session variable is *not* initialized with a per client + value during the initial connection, it is initialized from the + GLOBAL net_buffer_length variable from the server. + Hence, there is no reason to account on OOM conditions per client IP, + we count failures in the global server status instead. + */ + statistic_increment(aborted_connects,&LOCK_status); + statistic_increment(connection_errors_internal, &LOCK_status); + statistic_increment(aborted_connects_preauth, &LOCK_status); + return 1; /* The error is set by alloc(). */ + } + + auth_rc= acl_authenticate(thd, 0); + if (auth_rc == 0 && connect_errors != 0) + { + /* + A client connection from this IP was successful, + after some previous failures. + Reset the connection error counter. + */ + reset_host_connect_errors(thd->main_security_ctx.ip); + } + + return auth_rc; +} + + +/* + Setup thread to be used with the current thread + + SYNOPSIS + bool setup_connection_thread_globals() + thd Thread/connection handler + + RETURN + 0 ok + 1 Error (out of memory) + In this case we will close the connection and increment status +*/ + +void setup_connection_thread_globals(THD *thd) +{ + DBUG_EXECUTE_IF("CONNECT_wait", { + extern Dynamic_array listen_sockets; + while (listen_sockets.size()) + my_sleep(1000); + }); + thd->store_globals(); +} + + +/* + Autenticate user, with error reporting + + SYNOPSIS + login_connection() + thd Thread handler + + NOTES + Connection is not closed in case of errors + + RETURN + 0 ok + 1 error +*/ + +bool login_connection(THD *thd) +{ + NET *net= &thd->net; + int error= 0; + DBUG_ENTER("login_connection"); + DBUG_PRINT("info", ("login_connection called by thread %lu", + (ulong) thd->thread_id)); + + /* Use "connect_timeout" value during connection phase */ + my_net_set_read_timeout(net, connect_timeout); + my_net_set_write_timeout(net, connect_timeout); + + error= check_connection(thd); + thd->protocol->end_statement(); + + if (unlikely(error)) + { // Wrong permissions +#ifdef _WIN32 + if (vio_type(net->vio) == VIO_TYPE_NAMEDPIPE) + my_sleep(1000); /* must wait after eof() */ +#endif + statistic_increment(aborted_connects,&LOCK_status); + error=1; + goto exit; + } + /* Connect completed, set read/write timeouts back to default */ + my_net_set_read_timeout(net, thd->variables.net_read_timeout); + my_net_set_write_timeout(net, thd->variables.net_write_timeout); + + /* Updates global user connection stats. */ + if (increment_connection_count(thd, TRUE)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int) (2*sizeof(USER_STATS))); + error= 1; + goto exit; + } + +exit: + mysql_audit_notify_connection_connect(thd); + DBUG_RETURN(error); +} + + +/* + Close an established connection + + NOTES + This mainly updates status variables +*/ + +void end_connection(THD *thd) +{ + NET *net= &thd->net; + +#ifdef WITH_WSREP + if (thd->wsrep_cs().state() == wsrep::client_state::s_exec) + { + /* Error happened after the thread acquired ownership to wsrep + client state, but before command was processed. Clean up the + state before wsrep_close(). */ + wsrep_after_command_ignore_result(thd); + } + wsrep_close(thd); +#endif /* WITH_WSREP */ + if (thd->user_connect) + { + /* + We decrease this variable early to make it easy to log again quickly. + This code is not critical as we will in any case do this test + again in thd->cleanup() + */ + decrease_user_connections(thd->user_connect); + /* + The thread may returned back to the pool and assigned to a user + that doesn't have a limit. Ensure the user is not using resources + of someone else. + */ + thd->user_connect= NULL; + } + + if (unlikely(thd->killed) || (net->error && net->vio != 0)) + { + statistic_increment(aborted_threads,&LOCK_status); + status_var_increment(thd->status_var.lost_connections); + } + + if (likely(!thd->killed) && (net->error && net->vio != 0)) + thd->print_aborted_warning(1, thd->get_stmt_da()->is_error() + ? thd->get_stmt_da()->message() : ER_THD(thd, ER_UNKNOWN_ERROR)); +} + + +/* + Initialize THD to handle queries +*/ + +void prepare_new_connection_state(THD* thd) +{ + Security_context *sctx= thd->security_ctx; + + if (thd->client_capabilities & CLIENT_COMPRESS) + thd->net.compress=1; // Use compression + + /* + Much of this is duplicated in create_embedded_thd() for the + embedded server library. + TODO: refactor this to avoid code duplication there + */ + thd->proc_info= 0; + thd->set_command(COM_SLEEP); + thd->init_for_queries(); + + if (opt_init_connect.length && + !(sctx->master_access & PRIV_IGNORE_INIT_CONNECT)) + { + execute_init_command(thd, &opt_init_connect, &LOCK_sys_init_connect); + if (unlikely(thd->is_error())) + { + Host_errors errors; + thd->set_killed(KILL_CONNECTION); + thd->print_aborted_warning(0, "init_connect command failed"); + sql_print_warning("%s", thd->get_stmt_da()->message()); + + /* + now let client to send its first command, + to be able to send the error back + */ + NET *net= &thd->net; + thd->lex->current_select= 0; + my_net_set_read_timeout(net, thd->variables.net_wait_timeout); + thd->clear_error(); + net_new_transaction(net); + ulong packet_length= my_net_read(net); + /* + If my_net_read() failed, my_error() has been already called, + and the main Diagnostics Area contains an error condition. + */ + if (packet_length != packet_error) + my_error(ER_NEW_ABORTING_CONNECTION, + (thd->db.str || sctx->user) ? MYF(0) : MYF(ME_WARNING), + thd->thread_id, + thd->db.str ? thd->db.str : "unconnected", + sctx->user ? sctx->user : "unauthenticated", + sctx->host_or_ip, "init_connect command failed"); + thd->server_status&= ~SERVER_STATUS_CLEAR_SET; + thd->protocol->end_statement(); + thd->killed = KILL_CONNECTION; + errors.m_init_connect= 1; + inc_host_errors(thd->main_security_ctx.ip, &errors); + return; + } + + thd->proc_info=0; + } +} + + +/* + Thread handler for a connection + + SYNOPSIS + handle_one_connection() + arg Connection object (THD) + + IMPLEMENTATION + This function (normally) does the following: + - Initialize thread + - Initialize THD to be used with this thread + - Authenticate user + - Execute all queries sent on the connection + - Take connection down + - End thread / Handle next connection using thread from thread cache +*/ + +pthread_handler_t handle_one_connection(void *arg) +{ + CONNECT *connect= (CONNECT*) arg; + + mysql_thread_set_psi_id(connect->thread_id); + + if (init_new_connection_handler_thread()) + connect->close_with_error(0, 0, ER_OUT_OF_RESOURCES); + else + do_handle_one_connection(connect, true); + + DBUG_PRINT("info", ("killing thread")); +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) + ERR_remove_state(0); +#endif + my_thread_end(); + return 0; +} + +bool thd_prepare_connection(THD *thd) +{ + bool rc; + lex_start(thd); + rc= login_connection(thd); + if (rc) + return rc; + + MYSQL_CONNECTION_START(thd->thread_id, &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip); + + prepare_new_connection_state(thd); +#ifdef WITH_WSREP + thd->wsrep_client_thread= true; + wsrep_open(thd); +#endif /* WITH_WSREP */ + return FALSE; +} + +bool thd_is_connection_alive(THD *thd) +{ + NET *net= &thd->net; + if (likely(!net->error && + net->vio != 0 && + thd->killed < KILL_CONNECTION)) + return TRUE; + return FALSE; +} + + +void do_handle_one_connection(CONNECT *connect, bool put_in_cache) +{ + ulonglong thr_create_utime= microsecond_interval_timer(); + THD *thd; + if (!(thd= connect->create_thd(NULL))) + { + connect->close_and_delete(); + return; + } + + /* + If a thread was created to handle this connection: + increment slow_launch_threads counter if it took more than + slow_launch_time seconds to create the thread. + */ + + if (connect->prior_thr_create_utime) + { + ulong launch_time= (ulong) (thr_create_utime - + connect->prior_thr_create_utime); + if (launch_time >= slow_launch_time*1000000L) + statistic_increment(slow_launch_threads, &LOCK_status); + } + + server_threads.insert(thd); // Make THD visible in show processlist + + delete connect; // must be after server_threads.insert, see close_connections() + + thd->thr_create_utime= thr_create_utime; + /* We need to set this because of time_out_user_resource_limits */ + thd->start_utime= thr_create_utime; + + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + thd->thread_stack= (char*) &thd; + setup_connection_thread_globals(thd); + + for (;;) + { + bool create_user= TRUE; + + mysql_socket_set_thread_owner(thd->net.vio->mysql_socket); + if (thd_prepare_connection(thd)) + { + create_user= FALSE; + goto end_thread; + } + + while (thd_is_connection_alive(thd)) + { + if (mysql_audit_release_required(thd)) + mysql_audit_release(thd); + if (do_command(thd)) + break; + } + end_connection(thd); + +end_thread: + close_connection(thd); + + if (thd->userstat_running) + update_global_user_stats(thd, create_user, time(NULL)); + + unlink_thd(thd); + if (IF_WSREP(thd->wsrep_applier, false) || !put_in_cache || + !(connect= thread_cache.park())) + break; + + /* Create new instrumentation for the new THD job */ + PSI_CALL_set_thread(PSI_CALL_new_thread(key_thread_one_connection, thd, + thd->thread_id)); + + if (!(connect->create_thd(thd))) + { + /* Out of resources. Free thread to get more resources */ + connect->close_and_delete(); + break; + } + delete connect; + + /* + We have to call store_globals to update mysys_var->id and lock_info + with the new thread_id + */ + thd->store_globals(); + + /* reset abort flag for the thread */ + thd->mysys_var->abort= 0; + thd->thr_create_utime= microsecond_interval_timer(); + thd->start_utime= thd->thr_create_utime; + + server_threads.insert(thd); + } + delete thd; +} +#endif /* EMBEDDED_LIBRARY */ + + +/* Handling of CONNECT objects */ + +/* + Close connection without error and delete the connect object + This and close_with_error are only called if we didn't manage to + create a new thd object. +*/ + +void CONNECT::close_and_delete() +{ + DBUG_ENTER("close_and_delete"); + +#if _WIN32 + if (vio_type == VIO_TYPE_NAMEDPIPE) + CloseHandle(pipe); + else +#endif + if (vio_type != VIO_CLOSED) + mysql_socket_close(sock); + vio_type= VIO_CLOSED; + + --*scheduler->connection_count; + statistic_increment(connection_errors_internal, &LOCK_status); + statistic_increment(aborted_connects,&LOCK_status); + + delete this; + DBUG_VOID_RETURN; +} + +/* + Close a connection with a possible error to the end user + Alse deletes the connection object, like close_and_delete() +*/ + +void CONNECT::close_with_error(uint sql_errno, + const char *message, uint close_error) +{ + THD *thd= create_thd(NULL); + if (thd) + { + if (sql_errno) + thd->protocol->net_send_error(thd, sql_errno, message, NULL); + close_connection(thd, close_error); + delete thd; + set_current_thd(0); + } + close_and_delete(); +} + + +/* Reuse or create a THD based on a CONNECT object */ + +THD *CONNECT::create_thd(THD *thd) +{ + bool res, thd_reused= thd != 0; + Vio *vio; + DBUG_ENTER("create_thd"); + + DBUG_EXECUTE_IF("simulate_failed_connection_2", DBUG_RETURN(0); ); + + if (thd) + { + /* reuse old thd */ + thd->reset_for_reuse(); + /* + reset tread_id's, but not thread_dbug_id's as the later isn't allowed + to change as there is already structures in thd marked with the old + value. + */ + thd->thread_id= thd->variables.pseudo_thread_id= thread_id; + } + else if (!(thd= new THD(thread_id))) + DBUG_RETURN(0); + +#if _WIN32 + if (vio_type == VIO_TYPE_NAMEDPIPE) + vio= vio_new_win32pipe(pipe); + else +#endif + vio= mysql_socket_vio_new(sock, vio_type, vio_type == VIO_TYPE_SOCKET ? + VIO_LOCALHOST : 0); + if (!vio) + { + if (!thd_reused) + delete thd; + DBUG_RETURN(0); + } + + set_current_thd(thd); + res= my_net_init(&thd->net, vio, thd, MYF(MY_THREAD_SPECIFIC)); + vio_type= VIO_CLOSED; // Vio now handled by thd + + if (unlikely(res || thd->is_error())) + { + if (!thd_reused) + delete thd; + set_current_thd(0); + DBUG_RETURN(0); + } + + init_net_server_extension(thd); + + thd->security_ctx->host= thd->net.vio->type == VIO_TYPE_NAMEDPIPE || + thd->net.vio->type == VIO_TYPE_SOCKET ? + my_localhost : 0; + + thd->scheduler= scheduler; + thd->real_id= pthread_self(); /* Duplicates THD::store_globals() setting. */ + + /* Attach PSI instrumentation to the new THD */ + + PSI_thread *psi= PSI_CALL_get_thread(); + PSI_CALL_set_thread_os_id(psi); + PSI_CALL_set_thread_THD(psi, thd); + PSI_CALL_set_thread_id(psi, thd->thread_id); + thd->set_psi(psi); + + DBUG_RETURN(thd); +} diff --git a/sql/sql_connect.h b/sql/sql_connect.h new file mode 100644 index 00000000..8be6c1ae --- /dev/null +++ b/sql/sql_connect.h @@ -0,0 +1,123 @@ +/* Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_CONNECT_INCLUDED +#define SQL_CONNECT_INCLUDED + +#include /* pthread_handler_t */ +#include "mysql_com.h" /* enum_server_command */ +#include "structs.h" +#include +#include +#include "violite.h" + +/* + Object to hold connect information to be given to the newly created thread +*/ + +struct scheduler_functions; + +class CONNECT : public ilink { +public: + MYSQL_SOCKET sock; +#ifdef _WIN32 + HANDLE pipe; + CONNECT(HANDLE pipe_arg): pipe(pipe_arg), vio_type(VIO_TYPE_NAMEDPIPE), + scheduler(thread_scheduler), thread_id(0), prior_thr_create_utime(0) + { + count++; + } +#endif + enum enum_vio_type vio_type; + scheduler_functions *scheduler; + my_thread_id thread_id; + + /* Own variables */ + ulonglong prior_thr_create_utime; + + static Atomic_counter count; + + CONNECT(MYSQL_SOCKET sock_arg, enum enum_vio_type vio_type_arg, + scheduler_functions *scheduler_arg): sock(sock_arg), + vio_type(vio_type_arg), scheduler(scheduler_arg), thread_id(0), + prior_thr_create_utime(0) + { + count++; + } + ~CONNECT() + { + count--; + DBUG_ASSERT(vio_type == VIO_CLOSED); + } + void close_and_delete(); + void close_with_error(uint sql_errno, + const char *message, uint close_error); + THD *create_thd(THD *thd); +}; + + +class THD; +typedef struct user_conn USER_CONN; + +void init_max_user_conn(void); +void init_global_user_stats(void); +void init_global_table_stats(void); +void init_global_index_stats(void); +void init_global_client_stats(void); +void free_max_user_conn(void); +void free_global_user_stats(void); +void free_global_table_stats(void); +void free_global_index_stats(void); +void free_global_client_stats(void); + +pthread_handler_t handle_one_connection(void *arg); +void do_handle_one_connection(CONNECT *connect, bool put_in_cache); +bool init_new_connection_handler_thread(); +void reset_mqh(LEX_USER *lu, bool get_them); +bool check_mqh(THD *thd, uint check_command); +void time_out_user_resource_limits(THD *thd, USER_CONN *uc); +#ifndef NO_EMBEDDED_ACCESS_CHECKS +void decrease_user_connections(USER_CONN *uc); +#else +#define decrease_user_connections(X) do { } while(0) /* nothing */ +#endif +bool thd_init_client_charset(THD *thd, uint cs_number); +void setup_connection_thread_globals(THD *thd); +bool thd_prepare_connection(THD *thd); +bool thd_is_connection_alive(THD *thd); +int thd_set_peer_addr(THD *thd, sockaddr_storage *addr, + const char *ip, uint port, + bool check_proxy_networks, + uint *host_errors); + +bool login_connection(THD *thd); +void prepare_new_connection_state(THD* thd); +void end_connection(THD *thd); +void update_global_user_stats(THD* thd, bool create_user, time_t now); +int get_or_create_user_conn(THD *thd, const char *user, + const char *host, const USER_RESOURCES *mqh); +int check_for_max_user_connections(THD *thd, USER_CONN *uc); + +extern HASH global_user_stats; +extern HASH global_client_stats; +extern HASH global_table_stats; +extern HASH global_index_stats; + +extern mysql_mutex_t LOCK_global_user_client_stats; +extern mysql_mutex_t LOCK_global_table_stats; +extern mysql_mutex_t LOCK_global_index_stats; +extern mysql_mutex_t LOCK_stats; + +#endif /* SQL_CONNECT_INCLUDED */ diff --git a/sql/sql_const.h b/sql/sql_const.h new file mode 100644 index 00000000..490b870d --- /dev/null +++ b/sql/sql_const.h @@ -0,0 +1,302 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + File containing constants that can be used throughout the server. + + @note This file shall not contain or include any declarations of any kinds. +*/ + +#ifndef SQL_CONST_INCLUDED +#define SQL_CONST_INCLUDED + +#include + +#define LIBLEN FN_REFLEN-FN_LEN /* Max l{ngd p} dev */ +/* extra 4+4 bytes for slave tmp tables */ +#define MAX_DBKEY_LENGTH (NAME_LEN*2+1+1+4+4) +#define MAX_ALIAS_NAME 256 +#define MAX_FIELD_NAME (NAME_LEN+1) /* Max colum name length +1 */ +#define MAX_SYS_VAR_LENGTH 32 +#define MAX_KEY MAX_INDEXES /* Max used keys */ +#define MAX_REF_PARTS 32 /* Max parts used as ref */ + +/* + Maximum length of the data part of an index lookup key. + + The "data part" is defined as the value itself, not including the + NULL-indicator bytes or varchar length bytes ("the Extras"). We need this + value because there was a bug where length of the Extras were not counted. + + You probably need MAX_KEY_LENGTH, not this constant. +*/ +#define MAX_DATA_LENGTH_FOR_KEY 3072 +#if SIZEOF_OFF_T > 4 +#define MAX_REFLENGTH 8 /* Max length for record ref */ +#else +#define MAX_REFLENGTH 4 /* Max length for record ref */ +#endif +#define MAX_HOSTNAME (HOSTNAME_LENGTH + 1) /* len+1 in mysql.user */ +#define MAX_CONNECTION_NAME NAME_LEN + +#define MAX_MBWIDTH 3 /* Max multibyte sequence */ +#define MAX_FILENAME_MBWIDTH 5 +#define MAX_FIELD_CHARLENGTH 255 +/* + In MAX_FIELD_VARCHARLENGTH we reserve extra bytes for the overhead: + - 2 bytes for the length + - 1 byte for NULL bits + to avoid the "Row size too large" error for these three corner definitions: + CREATE TABLE t1 (c VARBINARY(65533)); + CREATE TABLE t1 (c VARBINARY(65534)); + CREATE TABLE t1 (c VARBINARY(65535)); + Like VARCHAR(65536), they will be converted to BLOB automatically + in non-strict mode. +*/ +#define MAX_FIELD_VARCHARLENGTH (65535-2-1) +#define MAX_FIELD_BLOBLENGTH UINT_MAX32 /* cf field_blob::get_length() */ +#define CONVERT_IF_BIGGER_TO_BLOB 512 /* Threshold *in characters* */ + +/* Max column width +1 */ +#define MAX_FIELD_WIDTH (MAX_FIELD_CHARLENGTH*MAX_MBWIDTH+1) + +#define MAX_BIT_FIELD_LENGTH 64 /* Max length in bits for bit fields */ + +#define MAX_DATE_WIDTH 10 /* YYYY-MM-DD */ +#define MIN_TIME_WIDTH 10 /* -HHH:MM:SS */ +#define MAX_TIME_WIDTH 16 /* -DDDDDD HH:MM:SS */ +#define MAX_TIME_FULL_WIDTH 23 /* -DDDDDD HH:MM:SS.###### */ +#define MAX_DATETIME_FULL_WIDTH 26 /* YYYY-MM-DD HH:MM:SS.###### */ +#define MAX_DATETIME_WIDTH 19 /* YYYY-MM-DD HH:MM:SS */ +#define MAX_DATETIME_COMPRESSED_WIDTH 14 /* YYYYMMDDHHMMSS */ +#define MAX_DATETIME_PRECISION 6 + +#define MAX_TABLES (sizeof(table_map)*8-3) /* Max tables in join */ +#define PARAM_TABLE_BIT (((table_map) 1) << (sizeof(table_map)*8-3)) +#define OUTER_REF_TABLE_BIT (((table_map) 1) << (sizeof(table_map)*8-2)) +#define RAND_TABLE_BIT (((table_map) 1) << (sizeof(table_map)*8-1)) +#define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \ + RAND_TABLE_BIT) +#define CONNECT_STRING_MAXLEN 65535 /* stored in 2 bytes in .frm */ +#define MAX_FIELDS 4096 /* Limit in the .frm file */ +#define MAX_PARTITIONS 8192 + +#define MAX_SELECT_NESTING (SELECT_NESTING_MAP_SIZE - 1) + +#define MAX_SORT_MEMORY 2048*1024 +#define MIN_SORT_MEMORY 1024 + +/* Some portable defines */ + +#define STRING_BUFFER_USUAL_SIZE 80 + +/* Memory allocated when parsing a statement / saving a statement */ +#define MEM_ROOT_BLOCK_SIZE 8192 +#define MEM_ROOT_PREALLOC 8192 +#define TRANS_MEM_ROOT_BLOCK_SIZE 4096 +#define TRANS_MEM_ROOT_PREALLOC 4096 + +#define DEFAULT_ERROR_COUNT 64 +#define EXTRA_RECORDS 10 /* Extra records in sort */ +#define SCROLL_EXTRA 5 /* Extra scroll-rows. */ +#define FIELD_NAME_USED ((uint) 32768) /* Bit set if fieldname used */ +#define FORM_NAME_USED ((uint) 16384) /* Bit set if formname used */ +#define FIELD_NR_MASK 16383 /* To get fieldnumber */ +#define FERR -1 /* Error from my_functions */ +#define CREATE_MODE 0 /* Default mode on new files */ +#define NAMES_SEP_CHAR 255 /* Char to sep. names */ + +#define READ_RECORD_BUFFER (uint) (IO_SIZE*8) /* Pointer_buffer_size */ +#define DISK_BUFFER_SIZE (uint) (IO_SIZE*16) /* Size of diskbuffer */ + +#define FRM_VER_TRUE_VARCHAR (FRM_VER+4) /* 10 */ +#define FRM_VER_EXPRESSSIONS (FRM_VER+5) /* 11 */ +#define FRM_VER_CURRENT FRM_VER_EXPRESSSIONS + +/*************************************************************************** + Configuration parameters +****************************************************************************/ + +#define ACL_CACHE_SIZE 256 +#define MAX_PASSWORD_LENGTH 32 +#define HOST_CACHE_SIZE 128 +#define MAX_ACCEPT_RETRY 10 // Test accept this many times +#define MAX_FIELDS_BEFORE_HASH 32 +#define USER_VARS_HASH_SIZE 16 +#define SEQUENCES_HASH_SIZE 16 +#define TABLE_OPEN_CACHE_MIN 200 +#define TABLE_OPEN_CACHE_DEFAULT 2000 +#define TABLE_DEF_CACHE_DEFAULT 400 +/** + We must have room for at least 400 table definitions in the table + cache, since otherwise there is no chance prepared + statements that use these many tables can work. + Prepared statements use table definition cache ids (table_map_id) + as table version identifiers. If the table definition + cache size is less than the number of tables used in a statement, + the contents of the table definition cache is guaranteed to rotate + between a prepare and execute. This leads to stable validation + errors. In future we shall use more stable version identifiers, + for now the only solution is to ensure that the table definition + cache can contain at least all tables of a given statement. +*/ +#define TABLE_DEF_CACHE_MIN 400 + +/** + Maximum number of connections default value. + 151 is larger than Apache's default max children, + to avoid "too many connections" error in a common setup. +*/ +#define MAX_CONNECTIONS_DEFAULT 151 + +/* + Stack reservation. + Feel free to raise this by the smallest amount you can to get the + "execution_constants" test to pass. +*/ +#define STACK_MIN_SIZE 16000 // Abort if less stack during eval. + +#define STACK_MIN_SIZE_FOR_OPEN (1024*80) +#define STACK_BUFF_ALLOC 352 ///< For stack overrun checks +#ifndef MYSQLD_NET_RETRY_COUNT +#define MYSQLD_NET_RETRY_COUNT 10 ///< Abort read after this many int. +#endif + +#define QUERY_ALLOC_BLOCK_SIZE 16384 +#define QUERY_ALLOC_PREALLOC_SIZE 24576 +#define TRANS_ALLOC_BLOCK_SIZE 8192 +#define TRANS_ALLOC_PREALLOC_SIZE 4096 +#define RANGE_ALLOC_BLOCK_SIZE 4096 +#define ACL_ALLOC_BLOCK_SIZE 1024 +#define UDF_ALLOC_BLOCK_SIZE 1024 +#define TABLE_ALLOC_BLOCK_SIZE 1024 +#define WARN_ALLOC_BLOCK_SIZE 2048 +#define WARN_ALLOC_PREALLOC_SIZE 1024 +/* + Note that if we are using 32K or less, then TCmalloc will use a local + heap without locks! +*/ +#define SHOW_ALLOC_BLOCK_SIZE (32768-MALLOC_OVERHEAD) + +/* + The following parameters is to decide when to use an extra cache to + optimise seeks when reading a big table in sorted order +*/ +#define MIN_FILE_LENGTH_TO_USE_ROW_CACHE (10L*1024*1024) +#define MIN_ROWS_TO_USE_TABLE_CACHE 100 +#define MIN_ROWS_TO_USE_BULK_INSERT 100 + +/** + The following is used to decide if MySQL should use table scanning + instead of reading with keys. The number says how many evaluation of the + WHERE clause is comparable to reading one extra row from a table. +*/ +#define TIME_FOR_COMPARE 5.0 // 5 WHERE compares == one read +#define TIME_FOR_COMPARE_IDX 20.0 + +#define IDX_BLOCK_COPY_COST ((double) 1 / TIME_FOR_COMPARE) +#define IDX_LOOKUP_COST ((double) 1 / 8) +#define MULTI_RANGE_READ_SETUP_COST (IDX_BLOCK_COPY_COST/10) + +/** + Number of comparisons of table rowids equivalent to reading one row from a + table. +*/ +#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*100) + +/* cost1 is better that cost2 only if cost1 + COST_EPS < cost2 */ +#define COST_EPS 0.001 + +/* + For sequential disk seeks the cost formula is: + DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip + + The cost of average seek + DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0. +*/ +#define DISK_SEEK_BASE_COST ((double)0.9) + +#define BLOCKS_IN_AVG_SEEK 128 + +#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK) + + +/** + Number of rows in a reference table when refereed through a not unique key. + This value is only used when we don't know anything about the key + distribution. +*/ +#define MATCHING_ROWS_IN_OTHER_TABLE 10 + +/* + Subquery materialization-related constants +*/ +#define HEAP_TEMPTABLE_LOOKUP_COST 0.05 +#define DISK_TEMPTABLE_LOOKUP_COST 1.0 +#define SORT_INDEX_CMP_COST 0.02 + + +#define COST_MAX (DBL_MAX * (1.0 - DBL_EPSILON)) + +#define COST_ADD(c,d) (COST_MAX - (d) > (c) ? (c) + (d) : COST_MAX) + +#define COST_MULT(c,f) (COST_MAX / (f) > (c) ? (c) * (f) : COST_MAX) + + +#define MY_CHARSET_BIN_MB_MAXLEN 1 + +/** Don't pack string keys shorter than this (if PACK_KEYS=1 isn't used). */ +#define KEY_DEFAULT_PACK_LENGTH 8 + +/** Characters shown for the command in 'show processlist'. */ +#define PROCESS_LIST_WIDTH 100 +/* Characters shown for the command in 'information_schema.processlist' */ +#define PROCESS_LIST_INFO_WIDTH 65535 + +#define PRECISION_FOR_DOUBLE 53 +#define PRECISION_FOR_FLOAT 24 + +/* -[digits].E+## */ +#define MAX_FLOAT_STR_LENGTH (FLT_DIG + 6) +/* -[digits].E+### */ +#define MAX_DOUBLE_STR_LENGTH (DBL_DIG + 7) + +/* + Default time to wait before aborting a new client connection + that does not respond to "initial server greeting" timely +*/ +#define CONNECT_TIMEOUT 10 + /* Wait 5 minutes before removing thread from thread cache */ +#define THREAD_CACHE_TIMEOUT 5*60 + +/* The following can also be changed from the command line */ +#define DEFAULT_CONCURRENCY 10 +#define DELAYED_LIMIT 100 /**< pause after xxx inserts */ +#define DELAYED_QUEUE_SIZE 1000 +#define DELAYED_WAIT_TIMEOUT (5*60) /**< Wait for delayed insert */ +#define MAX_CONNECT_ERRORS 100 ///< errors before disabling host + +#define LONG_TIMEOUT ((ulong) 3600L*24L*365L) + +/** + Maximum length of time zone name that we support (Time zone name is + char(64) in db). mysqlbinlog needs it. +*/ +#define MAX_TIME_ZONE_NAME_LENGTH (NAME_LEN + 1) + +#define SP_PSI_STATEMENT_INFO_COUNT 19 + +#endif /* SQL_CONST_INCLUDED */ diff --git a/sql/sql_crypt.cc b/sql/sql_crypt.cc new file mode 100644 index 00000000..edff85a0 --- /dev/null +++ b/sql/sql_crypt.cc @@ -0,0 +1,77 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + + +/* + Functions to handle the encode() and decode() functions + The strongness of this crypt is large based on how good the random + generator is. It should be ok for short strings, but for communication one + needs something like 'ssh'. +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_crypt.h" +#include "password.h" + +void SQL_CRYPT::init(ulong *rand_nr) +{ + uint i; + my_rnd_init(&rand,rand_nr[0],rand_nr[1]); + + for (i=0 ; i<=255; i++) + decode_buff[i]= (char) i; + + for (i=0 ; i<= 255 ; i++) + { + int idx= (uint) (my_rnd(&rand)*255.0); + char a= decode_buff[idx]; + decode_buff[idx]= decode_buff[i]; + decode_buff[+i]=a; + } + for (i=0 ; i <= 255 ; i++) + encode_buff[(uchar) decode_buff[i]]=i; + org_rand=rand; + shift=0; +} + + +void SQL_CRYPT::encode(char *str,uint length) +{ + for (uint i=0; i < length; i++) + { + shift^=(uint) (my_rnd(&rand)*255.0); + uint idx= (uint) (uchar) str[0]; + *str++ = (char) ((uchar) encode_buff[idx] ^ shift); + shift^= idx; + } +} + + +void SQL_CRYPT::decode(char *str,uint length) +{ + for (uint i=0; i < length; i++) + { + shift^=(uint) (my_rnd(&rand)*255.0); + uint idx= (uint) ((uchar) str[0] ^ shift); + *str = decode_buff[idx]; + shift^= (uint) (uchar) *str++; + } +} diff --git a/sql/sql_crypt.h b/sql/sql_crypt.h new file mode 100644 index 00000000..aab97501 --- /dev/null +++ b/sql/sql_crypt.h @@ -0,0 +1,45 @@ +#ifndef SQL_CRYPT_INCLUDED +#define SQL_CRYPT_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_alloc.h" /* Sql_alloc */ +#include "my_rnd.h" /* rand_struct */ + +class SQL_CRYPT :public Sql_alloc +{ + struct my_rnd_struct rand,org_rand; + char decode_buff[256],encode_buff[256]; + uint shift; + public: + SQL_CRYPT() = default; + SQL_CRYPT(ulong *seed) + { + init(seed); + } + ~SQL_CRYPT() = default; + void init(ulong *seed); + void reinit() { shift=0; rand=org_rand; } + void encode(char *str, uint length); + void decode(char *str, uint length); +}; + +#endif /* SQL_CRYPT_INCLUDED */ diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc new file mode 100644 index 00000000..ad385128 --- /dev/null +++ b/sql/sql_cte.cc @@ -0,0 +1,1738 @@ +/* + Copyright (c) 2016, 2017 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mariadb.h" +#include "sql_class.h" +#include "sql_lex.h" +#include "sql_cte.h" +#include "sql_view.h" // for make_valid_column_names +#include "sql_parse.h" +#include "sql_select.h" +#include "sql_show.h" // append_definer, append_identifier + + +/** + @brief + Add a new element to this with clause + + @param elem The with element to add to this with clause + + @details + The method adds the with element 'elem' to the elements + in this with clause. The method reports an error if + the number of the added element exceeds the value + of the constant max_number_of_elements_in_with_clause. + + @retval + true if an error is reported + false otherwise +*/ + +bool With_clause::add_with_element(With_element *elem) +{ + if (with_list.elements == max_number_of_elements_in_with_clause) + { + my_error(ER_TOO_MANY_DEFINITIONS_IN_WITH_CLAUSE, MYF(0)); + return true; + } + elem->owner= this; + elem->number= with_list.elements; + elem->spec->with_element= elem; + with_list.link_in_list(elem, &elem->next); + return false; +} + + +void st_select_lex_unit::set_with_clause(With_clause *with_cl) +{ + with_clause= with_cl; + if (with_clause) + with_clause->set_owner(this); +} + + +/** + @brief + Check dependencies between tables defined in a list of with clauses + + @param + with_clauses_list Pointer to the first clause in the list + + @details + For each with clause from the given list the procedure finds all + dependencies between tables defined in the clause by calling the + method With_clause::checked_dependencies. + Additionally, based on the info collected by this method the procedure + finds anchors for each recursive definition and moves them at the head + of the definition. + + @retval + false on success + true on failure +*/ + +bool LEX::check_dependencies_in_with_clauses() +{ + for (With_clause *with_clause= with_clauses_list; + with_clause; + with_clause= with_clause->next_with_clause) + { + if (with_clause->check_dependencies()) + return true; + if (with_clause->check_anchors()) + return true; + with_clause->move_anchors_ahead(); + } + return false; +} + + +/** + @brief + Resolve table references to CTE from a sub-chain of table references + + @param tables Points to the beginning of the sub-chain + @param tables_last Points to the address with the sub-chain barrier + + @details + The method resolves tables references to CTE from the chain of + table references specified by the parameters 'tables' and 'tables_last'. + It resolves the references against the CTE definition occurred in a query + or the specification of a CTE whose parsing tree is represented by + this LEX structure. The method is always called right after the process + of parsing the query or of the specification of a CTE has been finished, + thus the chain of table references used in the parsed fragment has been + already built. It is assumed that parameters of the method specify a + a sub-chain of this chain. + If a table reference can be potentially a table reference to a CTE and it + has not been resolved yet then the method tries to find the definition + of the CTE against which the reference can be resolved. If it succeeds + it sets the field TABLE_LIST::with to point to the found definition. + It also sets the field TABLE_LIST::derived to point to the specification + of the found CTE and sets TABLE::db.str to empty_c_string. This will + allow to handle this table reference like a reference to a derived handle. + If another table reference has been already resolved against this CTE + and this CTE is not recursive then a clone of the CTE specification is + constructed using the function With_element::clone_parsed_spec() and + TABLE_LIST::derived is set to point to this clone rather than to the + original specification. + If the method does not find a matched CTE definition in the parsed fragment + then in the case when the flag this->only_cte_resolution is set to true + it just moves to the resolution of the next table reference from the + specified sub-chain while in the case when this->only_cte_resolution is set + to false the method additionally sets an mdl request for this table + reference. + + @notes + The flag this->only_cte_resolution is set to true in the cases when + the failure to resolve a table reference as a CTE reference within + the fragment associated with this LEX structure does not imply that + this table reference cannot be resolved as such at all. + + @retval false On success: no errors reported, no memory allocations failed + @retval true Otherwise +*/ + +bool LEX::resolve_references_to_cte(TABLE_LIST *tables, + TABLE_LIST **tables_last) +{ + With_element *with_elem= 0; + + for (TABLE_LIST *tbl= tables; tbl != *tables_last; tbl= tbl->next_global) + { + if (tbl->derived) + continue; + if (!tbl->db.str && !tbl->with) + tbl->with= tbl->select_lex->find_table_def_in_with_clauses(tbl); + if (!tbl->with) // no CTE matches table reference tbl + { + if (only_cte_resolution) + continue; + if (!tbl->db.str) // no database specified in table reference tbl + { + if (!thd->db.str) // no default database is set + { + my_message(ER_NO_DB_ERROR, ER(ER_NO_DB_ERROR), MYF(0)); + return true; + } + if (copy_db_to(&tbl->db)) + return true; + if (!(tbl->table_options & TL_OPTION_ALIAS)) + MDL_REQUEST_INIT(&tbl->mdl_request, MDL_key::TABLE, + tbl->db.str, tbl->table_name.str, + tbl->mdl_type, MDL_TRANSACTION); + tbl->mdl_request.set_type((tbl->lock_type >= TL_WRITE_ALLOW_WRITE) ? + MDL_SHARED_WRITE : MDL_SHARED_READ); + } + continue; + } + with_elem= tbl->with; + if (tbl->is_recursive_with_table() && + !tbl->is_with_table_recursive_reference()) + { + tbl->with->rec_outer_references++; + while ((with_elem= with_elem->get_next_mutually_recursive()) != + tbl->with) + with_elem->rec_outer_references++; + } + if (!with_elem->is_used_in_query || with_elem->is_recursive) + { + tbl->derived= with_elem->spec; + if (tbl->derived != tbl->select_lex->master_unit() && + !with_elem->is_recursive && + !tbl->is_with_table_recursive_reference()) + { + tbl->derived->move_as_slave(tbl->select_lex); + } + with_elem->is_used_in_query= true; + } + else + { + if (!(tbl->derived= tbl->with->clone_parsed_spec(thd->lex, tbl))) + return true; + } + tbl->db.str= empty_c_string; + tbl->db.length= 0; + tbl->schema_table= 0; + if (tbl->derived) + { + tbl->derived->first_select()->set_linkage(DERIVED_TABLE_TYPE); + tbl->select_lex->add_statistics(tbl->derived); + } + if (tbl->with->is_recursive && tbl->is_with_table_recursive_reference()) + continue; + with_elem->inc_references(); + } + return false; +} + + +/** + @brief + Find out dependencies between CTEs, resolve references to them + + @details + The function can be called in two modes. With this->with_cte_resolution + set to false the function only finds out all dependencies between CTEs + used in a query expression with a WITH clause whose parsing has been + just finished. Based on these dependencies recursive CTEs are detected. + If this->with_cte_resolution is set to true the function additionally + resolves all references to CTE occurred in this query expression. + + @retval + true on failure + false on success +*/ + +bool +LEX::check_cte_dependencies_and_resolve_references() +{ + if (check_dependencies_in_with_clauses()) + return true; + if (!with_cte_resolution) + return false; + if (resolve_references_to_cte(query_tables, query_tables_last)) + return true; + return false; +} + + +/** + @brief + Check dependencies between tables defined in this with clause + + @details + The method performs the following for this with clause: + - checks that there are no definitions of the tables with the same name + - for each table T defined in this with clause looks for the tables + from the same with clause that are used in the query that specifies T + and set the dependencies of T on these tables in a bitmap. + - builds the transitive closure of the above direct dependencies + to find out all recursive definitions. + + @retval + true if an error is reported + false otherwise +*/ + +bool With_clause::check_dependencies() +{ + if (dependencies_are_checked) + return false; + /* + Look for for definitions with the same query name. + When found report an error and return true immediately. + For each table T defined in this with clause look for all other tables + from the same with clause that are used in the specification of T. + For each such table set the dependency bit in the dependency map of + the with element for T. + */ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + for (With_element *elem= with_list.first; + elem != with_elem; + elem= elem->next) + { + if (lex_string_cmp(system_charset_info, with_elem->get_name(), + elem->get_name()) == 0) + { + my_error(ER_DUP_QUERY_NAME, MYF(0), + with_elem->get_name_str()); + return true; + } + } + if (with_elem->check_dependencies_in_spec()) + return true; + } + /* Build the transitive closure of the direct dependencies found above */ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + with_elem->derived_dep_map= with_elem->base_dep_map; + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + table_map with_elem_map= with_elem->get_elem_map(); + for (With_element *elem= with_list.first; elem; elem= elem->next) + { + if (elem->derived_dep_map & with_elem_map) + elem->derived_dep_map |= with_elem->derived_dep_map; + } + } + + /* + Mark those elements where tables are defined with direct or indirect + recursion. + */ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + if (with_elem->derived_dep_map & with_elem->get_elem_map()) + with_elem->is_recursive= true; + } + + dependencies_are_checked= true; + return false; +} + + +/* + This structure describes an element of the stack of embedded units. + The stack is used when looking for a definition of a table in + with clauses. The definition can be found only in the scopes + of the with clauses attached to the units from the stack. + The with clauses are looked through from starting from the top + element of the stack. +*/ + +struct st_unit_ctxt_elem +{ + st_unit_ctxt_elem *prev; // the previous element of the stack + st_select_lex_unit *unit; +}; + + +/** + @brief + Find the dependencies of this element on its siblings in its specification + + @details + For each table reference ref(T) from the FROM list of every select sl + immediately contained in the specification query of this element this + method searches for the definition of T in the the with clause which + this element belongs to. If such definition is found then the dependency + on it is set in sl->with_dep and in this->base_dep_map. +*/ + +bool With_element::check_dependencies_in_spec() +{ + for (st_select_lex *sl= spec->first_select(); sl; sl= sl->next_select()) + { + if (owner->with_recursive) + { + st_unit_ctxt_elem ctxt0= {NULL, owner->owner}; + st_unit_ctxt_elem ctxt1= {&ctxt0, spec}; + check_dependencies_in_select(sl, &ctxt1, false, &sl->with_dep); + } + else + { + st_unit_ctxt_elem ctxt= {NULL, spec}; + check_dependencies_in_select(sl, &ctxt, false, &sl->with_dep); + } + base_dep_map|= sl->with_dep; + } + return false; +} + + +/** + @brief + Search for the definition of a table among the elements of this with clause + + @param table The reference to the table that is looked for + @param barrier The barrier with element for the search + + @details + The function looks through the elements of this with clause trying to find + the definition of the given table. When it encounters the element with + the same query name as the table's name it returns this element. If no + such definitions are found the function returns NULL. + + @retval + found with element if the search succeeded + NULL - otherwise +*/ + +With_element *With_clause::find_table_def(TABLE_LIST *table, + With_element *barrier) +{ + for (With_element *with_elem= with_list.first; + with_elem != barrier; + with_elem= with_elem->next) + { + if (my_strcasecmp(system_charset_info, with_elem->get_name_str(), + table->table_name.str) == 0 && + !table->is_fqtn) + { + table->set_derived(); + with_elem->referenced= true; + return with_elem; + } + } + return NULL; +} + + +/** + @brief + Search for the definition of a table in with clauses + + @param tbl The reference to the table that is looked for + @param ctxt The context describing in what with clauses of the upper + levels the table has to be searched for. + + @details + The function looks for the definition of the table tbl in the definitions + of the with clauses from the upper levels specified by the parameter ctxt. + When it encounters the element with the same query name as the table's name + it returns this element. If no such definitions are found the function + returns NULL. + + @retval + found with element if the search succeeded + NULL - otherwise +*/ + +With_element *find_table_def_in_with_clauses(TABLE_LIST *tbl, + st_unit_ctxt_elem *ctxt) +{ + With_element *found= 0; + st_select_lex_unit *top_unit= 0; + for (st_unit_ctxt_elem *unit_ctxt_elem= ctxt; + unit_ctxt_elem; + unit_ctxt_elem= unit_ctxt_elem->prev) + { + st_select_lex_unit *unit= unit_ctxt_elem->unit; + With_clause *with_clause= unit->with_clause; + if (with_clause) + { + /* + If the reference to tbl that has to be resolved belongs to + the FROM clause of a descendant of top_unit->with_element + and this with element belongs to with_clause then this + element must be used as the barrier for the search in the + the list of CTEs from with_clause unless the clause contains + RECURSIVE. + */ + With_element *barrier= 0; + if (top_unit && !with_clause->with_recursive && + top_unit->with_element && + top_unit->with_element->get_owner() == with_clause) + barrier= top_unit->with_element; + found= with_clause->find_table_def(tbl, barrier); + if (found) + break; + } + top_unit= unit; + } + return found; +} + + +/** + @brief + Find the dependencies of this element on its siblings in a select + + @param sl The select where to look for the dependencies + @param ctxt The structure specifying the scope of the definitions + of the with elements of the upper levels + @param in_sbq if true mark dependencies found in subqueries in + this->sq_dep_map + @param dep_map IN/OUT The bit where to mark the found dependencies + + @details + For each table reference ref(T) from the FROM list of the select sl + the method searches in with clauses for the definition of the table T. + If the found definition belongs to the same with clause as this with + element then the method set dependency on T in the in/out parameter + dep_map, add if required - in this->sq_dep_map. + The parameter ctxt describes the proper context for the search + of the definition of T. +*/ + +void With_element::check_dependencies_in_select(st_select_lex *sl, + st_unit_ctxt_elem *ctxt, + bool in_subq, + table_map *dep_map) +{ + bool is_spec_select= sl->get_with_element() == this; + + for (TABLE_LIST *tbl= sl->table_list.first; tbl; tbl= tbl->next_local) + { + if (tbl->with || tbl->derived || tbl->nested_join) + continue; + tbl->with_internal_reference_map= 0; + /* + Look first for the definition of tbl in the with clause to which + this with element belongs. If such definition is not found there + look in the with clauses of the upper levels via the context + chain of embedding with elements. + If the definition of tbl is found somewhere in with clauses + then tbl->with is set to point to this definition. + */ + if (is_spec_select) + { + With_clause *with_clause= sl->master_unit()->with_clause; + if (with_clause) + tbl->with= with_clause->find_table_def(tbl, NULL); + if (!tbl->with) + tbl->with= owner->find_table_def(tbl, + owner->with_recursive ? NULL : this); + } + if (!tbl->with) + tbl->with= find_table_def_in_with_clauses(tbl, ctxt); + + if (tbl->with && tbl->with->owner== this->owner) + { + /* + The found definition T of tbl belongs to the same + with clause as this with element. In this case: + - set the dependence on T in the bitmap dep_map + - set tbl->with_internal_reference_map with + the bitmap for this definition + - set the dependence on T in the bitmap this->sq_dep_map + if needed + */ + *dep_map|= tbl->with->get_elem_map(); + tbl->with_internal_reference_map= get_elem_map(); + if (in_subq) + sq_dep_map|= tbl->with->get_elem_map(); + else + top_level_dep_map|= tbl->with->get_elem_map(); + } + } + /* Now look for the dependencies in the subqueries of sl */ + st_select_lex_unit *inner_unit= sl->first_inner_unit(); + for (; inner_unit; inner_unit= inner_unit->next_unit()) + { + check_dependencies_in_unit(inner_unit, ctxt, in_subq, dep_map); + } +} + + +/** + @brief + Find a recursive reference to this with element in subqueries of a select + + @param sel The select in whose subqueries the reference + to be looked for + + @details + The function looks for a recursive reference to this with element in + subqueries of select sl. When the first such reference is found + it is returned as the result. + The function assumes that the identification of all CTE references + has been performed earlier. + + @retval + Pointer to the found recursive reference if the search succeeded + NULL - otherwise +*/ + +TABLE_LIST *With_element::find_first_sq_rec_ref_in_select(st_select_lex *sel) +{ + TABLE_LIST *rec_ref= NULL; + st_select_lex_unit *inner_unit= sel->first_inner_unit(); + for (; inner_unit; inner_unit= inner_unit->next_unit()) + { + st_select_lex *sl= inner_unit->first_select(); + for (; sl; sl= sl->next_select()) + { + for (TABLE_LIST *tbl= sl->table_list.first; tbl; tbl= tbl->next_local) + { + if (tbl->derived || tbl->nested_join) + continue; + if (tbl->with && tbl->with->owner== this->owner && + (tbl->with_internal_reference_map & mutually_recursive)) + { + rec_ref= tbl; + return rec_ref; + } + } + if ((rec_ref= find_first_sq_rec_ref_in_select(sl))) + return rec_ref; + } + } + return 0; +} + + +/** + @brief + Find the dependencies of this element on its siblings in a unit + + @param unit The unit where to look for the dependencies + @param ctxt The structure specifying the scope of the definitions + of the with elements of the upper levels + @param in_sbq if true mark dependencies found in subqueries in + this->sq_dep_map + @param dep_map IN/OUT The bit where to mark the found dependencies + + @details + This method searches in the unit 'unit' for the the references in FROM + lists of all selects contained in this unit and in the with clause + attached to this unit that refer to definitions of tables from the + same with clause as this element. + If such definitions are found then the dependencies on them are + set in the in/out parameter dep_map and optionally in this->sq_dep_map. + The parameter ctxt describes the proper context for the search. +*/ + +void With_element::check_dependencies_in_unit(st_select_lex_unit *unit, + st_unit_ctxt_elem *ctxt, + bool in_subq, + table_map *dep_map) +{ + st_unit_ctxt_elem unit_ctxt_elem= {ctxt, unit}; + if (unit->with_clause) + { + (void) unit->with_clause->check_dependencies(); + check_dependencies_in_with_clause(unit->with_clause, &unit_ctxt_elem, + in_subq, dep_map); + } + in_subq |= unit->item != NULL; + st_select_lex *sl= unit->first_select(); + for (; sl; sl= sl->next_select()) + { + check_dependencies_in_select(sl, &unit_ctxt_elem, in_subq, dep_map); + } +} + + +/** + @brief + Find the dependencies of this element on its siblings in a with clause + + @param witt_clause The with clause where to look for the dependencies + @param ctxt The structure specifying the scope of the definitions + of the with elements of the upper levels + @param in_sbq if true mark dependencies found in subqueries in + this->sq_dep_map + @param dep_map IN/OUT The bit where to mark the found dependencies + + @details + This method searches in the with_clause for the the references in FROM + lists of all selects contained in the specifications of the with elements + from this with_clause that refer to definitions of tables from the + same with clause as this element. + If such definitions are found then the dependencies on them are + set in the in/out parameter dep_map and optionally in this->sq_dep_map. + The parameter ctxt describes the proper context for the search. +*/ + +void +With_element::check_dependencies_in_with_clause(With_clause *with_clause, + st_unit_ctxt_elem *ctxt, + bool in_subq, + table_map *dep_map) +{ + for (With_element *with_elem= with_clause->with_list.first; + with_elem; + with_elem= with_elem->next) + { + check_dependencies_in_unit(with_elem->spec, ctxt, in_subq, dep_map); + } +} + + +/** + @brief + Find mutually recursive with elements and check that they have ancors + + @details + This method performs the following: + - for each recursive with element finds all mutually recursive with it + - links each group of mutually recursive with elements into a ring chain + - checks that every group of mutually recursive with elements contains + at least one anchor + - checks that after removing any with element with anchor the remaining + with elements mutually recursive with the removed one are not recursive + anymore + + @retval + true if an error is reported + false otherwise +*/ + +bool With_clause::check_anchors() +{ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + if (!with_elem->is_recursive) + continue; + + /* + It with_elem is recursive with element find all elements mutually recursive + with it (any recursive element is mutually recursive with itself). Mark all + these elements in the bitmap->mutually_recursive. Also link all these + elements into a ring chain. + */ + if (!with_elem->next_mutually_recursive) + { + With_element *last_mutually_recursive= with_elem; + table_map with_elem_dep= with_elem->derived_dep_map; + table_map with_elem_map= with_elem->get_elem_map(); + for (With_element *elem= with_elem; elem; elem= elem->next) + { + if (!elem->is_recursive) + continue; + + if (elem == with_elem || + ((elem->derived_dep_map & with_elem_map) && + (with_elem_dep & elem->get_elem_map()))) + { + elem->next_mutually_recursive= with_elem; + last_mutually_recursive->next_mutually_recursive= elem; + last_mutually_recursive= elem; + with_elem->mutually_recursive|= elem->get_elem_map(); + } + } + for (With_element *elem= with_elem->next_mutually_recursive; + elem != with_elem; + elem= elem->next_mutually_recursive) + elem->mutually_recursive= with_elem->mutually_recursive; + } + + /* + For each select from the specification of 'with_elem' check whether + it is an anchor i.e. does not depend on any with elements mutually + recursive with 'with_elem". + */ + for (st_select_lex *sl= with_elem->spec->first_select(); + sl; + sl= sl->next_select()) + { + if (with_elem->is_anchor(sl)) + { + with_elem->with_anchor= true; + break; + } + } + } + + /* + Check that for any group of mutually recursive with elements + - there is at least one anchor + - after removing any with element with anchor the remaining with elements + mutually recursive with the removed one are not recursive anymore + */ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + if (!with_elem->is_recursive) + continue; + + if (!with_elem->with_anchor) + { + /* + Check that the other with elements mutually recursive with 'with_elem' + contain at least one anchor. + */ + With_element *elem= with_elem; + while ((elem= elem->get_next_mutually_recursive()) != with_elem) + { + if (elem->with_anchor) + break; + } + if (elem == with_elem) + { + my_error(ER_RECURSIVE_WITHOUT_ANCHORS, MYF(0), + with_elem->get_name_str()); + return true; + } + } + else + { + /* 'with_elem' is a with element with an anchor */ + With_element *elem= with_elem; + /* + For the other with elements mutually recursive with 'with_elem' + set dependency bits between those elements in the field work_dep_map + and build transitive closure of these dependencies + */ + while ((elem= elem->get_next_mutually_recursive()) != with_elem) + elem->work_dep_map= elem->base_dep_map & elem->mutually_recursive; + elem= with_elem; + while ((elem= elem->get_next_mutually_recursive()) != with_elem) + { + table_map elem_map= elem->get_elem_map(); + With_element *el= with_elem; + while ((el= el->get_next_mutually_recursive()) != with_elem) + { + if (el->work_dep_map & elem_map) + el->work_dep_map|= elem->work_dep_map; + } + } + /* If the transitive closure displays any cycle report an arror */ + elem= with_elem; + while ((elem= elem->get_next_mutually_recursive()) != with_elem) + { + if (elem->work_dep_map & elem->get_elem_map()) + { + my_error(ER_UNACCEPTABLE_MUTUAL_RECURSION, MYF(0), + with_elem->get_name_str()); + return true; + } + } + } + } + + return false; +} + + +/** + @brief + Move anchors at the beginning of the specifications for with elements + + @details + This method moves anchors at the beginning of the specifications for + all recursive with elements. +*/ + +void With_clause::move_anchors_ahead() +{ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + if (with_elem->is_recursive) + with_elem->move_anchors_ahead(); + } +} + + +/** + @brief + Move anchors at the beginning of the specification of this with element + + @details + If the specification of this with element contains anchors the method + moves them at the very beginning of the specification. + Additionally for the other selects of the specification if none of them + contains a recursive reference to this with element or a mutually recursive + one the method looks for the first such reference in the first recursive + select and set a pointer to it in this->sq_rec_ref. +*/ + +void With_element::move_anchors_ahead() +{ + st_select_lex *next_sl; + st_select_lex *new_pos= spec->first_select(); + new_pos->set_linkage(UNION_TYPE); + for (st_select_lex *sl= new_pos; sl; sl= next_sl) + { + next_sl= sl->next_select(); + if (is_anchor(sl)) + { + sl->move_node(new_pos); + if (new_pos == spec->first_select()) + { + enum sub_select_type type= new_pos->get_linkage(); + new_pos->set_linkage(sl->get_linkage()); + sl->set_linkage(type); + new_pos->with_all_modifier= sl->with_all_modifier; + sl->with_all_modifier= false; + } + new_pos= sl->next_select(); + } + else if (!sq_rec_ref && no_rec_ref_on_top_level()) + { + sq_rec_ref= find_first_sq_rec_ref_in_select(sl); + DBUG_ASSERT(sq_rec_ref != NULL); + } + } + first_recursive= new_pos; + spec->first_select()->set_linkage(DERIVED_TABLE_TYPE); +} + + +/** + @brief + Perform context analysis for all unreferenced tables defined in with clause + + @param thd The context of the statement containing this with clause + + @details + For each unreferenced table T defined in this with clause the method + calls the method With_element::prepare_unreferenced that performs + context analysis of the element with the definition of T. + + @retval + false If context analysis does not report any error + true Otherwise +*/ + +bool With_clause::prepare_unreferenced_elements(THD *thd) +{ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + if ((with_elem->is_hanging_recursive() || !with_elem->is_referenced()) && + with_elem->prepare_unreferenced(thd)) + return true; + } + + return false; +} + + +/** + @brief + Save the specification of the given with table as a string + + @param thd The context of the statement containing this with element + @param spec_start The beginning of the specification in the input string + @param spec_end The end of the specification in the input string + @param spec_offset The offset of the specification in the input string + + @details + The method creates for a string copy of the specification used in this + element. The method is called when the element is parsed. The copy may be + used to create clones of the specification whenever they are needed. + + @retval + false on success + true on failure +*/ + +bool With_element::set_unparsed_spec(THD *thd, + const char *spec_start, + const char *spec_end, + my_ptrdiff_t spec_offset) +{ + stmt_prepare_mode= thd->m_parser_state->m_lip.stmt_prepare_mode; + unparsed_spec.length= spec_end - spec_start; + + if (stmt_prepare_mode || !thd->lex->sphead) + unparsed_spec.str= spec_start; + else + unparsed_spec.str= thd->strmake(spec_start, unparsed_spec.length); + unparsed_spec_offset= spec_offset; + + if (!unparsed_spec.str) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), + static_cast(unparsed_spec.length)); + return true; + } + return false; +} + + +/** + @brief + Create a clone of the specification for the given with table + + @param old_lex The LEX structure created for the query or CTE specification + where this With_element is defined + @param with_table The reference to the table defined in this element for which + the clone is created. + + @details + The method creates a clone of the specification used in this element. + The clone is created for the given reference to the table defined by + this element. + The clone is created when the string with the specification saved in + unparsed_spec is fed into the parser as an input string. The parsing + this string a unit object representing the specification is built. + A chain of all table references occurred in the specification is also + formed. + The method includes the new unit and its sub-unit into hierarchy of + the units of the main query. I also insert the constructed chain of the + table references into the chain of all table references of the main query. + The method resolves all references to CTE in the clone. + + @note + Clones is created only for not first references to tables defined in + the with clause. They are necessary for merged specifications because + the optimizer handles any such specification as independent on the others. + When a table defined in the with clause is materialized in a temporary table + one could do without specification clones. However in this case they + are created as well, because currently different table references to a + the same temporary table cannot share the same definition structure. + + @retval + pointer to the built clone if succeeds + NULL - otherwise +*/ + +st_select_lex_unit *With_element::clone_parsed_spec(LEX *old_lex, + TABLE_LIST *with_table) +{ + THD *thd= old_lex->thd; + LEX *lex; + st_select_lex_unit *res= NULL; + + if (!(lex= (LEX*) new(thd->mem_root) st_lex_local)) + return res; + thd->lex= lex; + + bool parse_status= false; + st_select_lex *with_select; + st_select_lex *last_clone_select; + + char save_end= unparsed_spec.str[unparsed_spec.length]; + ((char*) &unparsed_spec.str[unparsed_spec.length])[0]= '\0'; + + lex_start(thd); + lex->clone_spec_offset= unparsed_spec_offset; + lex->with_cte_resolution= true; + /* + There's no need to add SPs/SFs referenced in the clone to the global + list of the SPs/SFs used in the query as they were added when the first + reference to the cloned CTE was parsed. Yet the recursive call of the + parser must to know that they were already included into the list. + */ + lex->sroutines= old_lex->sroutines; + lex->sroutines_list_own_last= old_lex->sroutines_list_own_last; + lex->sroutines_list_own_elements= old_lex->sroutines_list_own_elements; + + /* + The specification of a CTE is to be parsed as a regular query. + At the very end of the parsing query the function + check_cte_dependencies_and_resolve_references() will be called. + It will check the dependencies between CTEs that are defined + within the query and will resolve CTE references in this query. + If a table reference is not resolved as a CTE reference within + this query it still can be resolved as a reference to a CTE defined + in the same clause as the CTE whose specification is to be parsed + or defined in an embedding CTE definition. + + Example: + with + cte1 as ( ... ), + cte2 as ([WITH ...] select ... from cte1 ...) + select ... from cte2 as r, ..., cte2 as s ... + + Here the specification of cte2 has be cloned for table reference + with alias s1. The specification contains a reference to cte1 + that is defined outside this specification. If the reference to + cte1 cannot be resolved within the specification of cte2 it's + not necessarily has to be a reference to a non-CTE table. That's + why the flag lex->only_cte_resolution has to be set to true + before parsing of the specification of cte2 invoked by this + function starts. Otherwise an mdl_lock would be requested for s + and this would not be correct. + */ + + lex->only_cte_resolution= true; + + lex->stmt_lex= old_lex->stmt_lex ? old_lex->stmt_lex : old_lex; + + parse_status= thd->sql_parser(old_lex, lex, + (char*) unparsed_spec.str, + (unsigned int)unparsed_spec.length, + stmt_prepare_mode); + + ((char*) &unparsed_spec.str[unparsed_spec.length])[0]= save_end; + with_select= lex->unit.first_select(); + + if (parse_status) + goto err; + + /* + The unit of the specification that just has been parsed is included + as a slave of the select that contained in its from list the table + reference for which the unit has been created. + */ + lex->unit.include_down(with_table->select_lex); + lex->unit.set_slave(with_select); + lex->unit.cloned_from= spec; + + /* + Now all references to the CTE defined outside of the cloned specification + has to be resolved. Additionally if old_lex->only_cte_resolution == false + for the table references that has not been resolved requests for mdl_locks + has to be set. + */ + lex->only_cte_resolution= old_lex->only_cte_resolution; + if (lex->resolve_references_to_cte(lex->query_tables, + lex->query_tables_last)) + { + res= NULL; + goto err; + } + + /* + The global chain of TABLE_LIST objects created for the specification that + just has been parsed is added to such chain that contains the reference + to the CTE whose specification is parsed right after the TABLE_LIST object + created for the reference. + */ + if (lex->query_tables) + { + head->tables_pos.set_start_pos(&with_table->next_global); + head->tables_pos.set_end_pos(lex->query_tables_last); + TABLE_LIST *next_tbl= with_table->next_global; + if (next_tbl) + { + *(lex->query_tables->prev_global= next_tbl->prev_global)= + lex->query_tables; + *(next_tbl->prev_global= lex->query_tables_last)= next_tbl; + } + else + { + *(lex->query_tables->prev_global= old_lex->query_tables_last)= + lex->query_tables; + old_lex->query_tables_last= lex->query_tables_last; + } + } + old_lex->sroutines_list_own_last= lex->sroutines_list_own_last; + old_lex->sroutines_list_own_elements= lex->sroutines_list_own_elements; + res= &lex->unit; + res->with_element= this; + + last_clone_select= lex->all_selects_list; + while (last_clone_select->next_select_in_list()) + last_clone_select= last_clone_select->next_select_in_list(); + old_lex->all_selects_list= + (st_select_lex*) (lex->all_selects_list-> + insert_chain_before( + (st_select_lex_node **) &(old_lex->all_selects_list), + last_clone_select)); + + lex->sphead= NULL; // in order not to delete lex->sphead + lex_end(lex); +err: + thd->lex= old_lex; + return res; +} + + +/** + @brief + Rename columns of the unit derived from the spec of this with element + @param thd The context of the statement containing the with element + @param unit The specification of the with element or its clone + + @details + The method assumes that the parameter unit is either specification itself + of this with element or a clone of this specification. The looks through + the column list in this with element. It reports an error if the cardinality + of this list differs from the cardinality of select lists in 'unit'. + Otherwise it renames the columns of the first select list and sets the flag + unit->column_list_is_processed to true preventing renaming columns for the + second time. + + @retval + true if an error was reported + false otherwise +*/ + +bool +With_element::process_columns_of_derived_unit(THD *thd, + st_select_lex_unit *unit) +{ + if (unit->columns_are_renamed) + return false; + + st_select_lex *select= unit->first_select(); + + if (column_list.elements) // The column list is optional + { + List_iterator_fast it(select->item_list); + List_iterator_fast nm(column_list); + Item *item; + LEX_CSTRING *name; + + if (column_list.elements != select->item_list.elements) + { + my_error(ER_WITH_COL_WRONG_LIST, MYF(0)); + return true; + } + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + /* Rename the columns of the first select in the unit */ + while ((item= it++, name= nm++)) + { + item->set_name(thd, *name); + item->base_flags|= item_base_t::IS_EXPLICIT_NAME; + } + + if (arena) + thd->restore_active_arena(arena, &backup); + } + else + make_valid_column_names(thd, select->item_list); + + if (cycle_list) + { + List_iterator_fast it(select->item_list); + List_iterator_fast nm(*cycle_list); + List_iterator_fast nm_check(*cycle_list); + DBUG_ASSERT(cycle_list->elements != 0); + while (LEX_CSTRING *name= nm++) + { + Item *item; + /* + Check for uniqueness of each element in the cycle list: + It's sufficient to check that there is no duplicate of 'name' + among the elements that precede it. + */ + LEX_CSTRING *check; + nm_check.rewind(); + while ((check= nm_check++) && check != name) + { + if (check->length == name->length && + strncmp(check->str, name->str, name->length) == 0) + { + my_error(ER_DUP_FIELDNAME, MYF(0), check->str); + return true; + } + } + /* Check that 'name' is the name of a column of the processed CTE */ + while ((item= it++) && + (item->name.length != name->length || + strncmp(item->name.str, name->str, name->length) != 0)); + if (item == NULL) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), name->str, "CYCLE clause"); + return true; + } + item->base_flags|= item_base_t::IS_IN_WITH_CYCLE; + } + } + unit->columns_are_renamed= true; + + return false; +} + + +/** + @brief + Perform context analysis the definition of an unreferenced table + + @param thd The context of the statement containing this with element + + @details + The method assumes that this with element contains the definition + of a table that is not used anywhere. In this case one has to check + that context conditions are met. + + @retval + true if an error was reported + false otherwise +*/ + +bool With_element::prepare_unreferenced(THD *thd) +{ + bool rc= false; + st_select_lex *first_sl= spec->first_select(); + + /* Prevent name resolution for field references out of with elements */ + for (st_select_lex *sl= first_sl; + sl; + sl= sl->next_select()) + sl->context.outer_context= 0; + + thd->lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED; + if (!spec->prepared && + (spec->prepare(spec->derived, 0, 0) || + process_columns_of_derived_unit(thd, spec) || + check_duplicate_names(thd, first_sl->item_list, 1))) + rc= true; + + thd->lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED; + return rc; +} + + +bool With_element::is_anchor(st_select_lex *sel) +{ + return !(mutually_recursive & sel->with_dep); +} + + +/** + @brief + Search for the definition of the given table referred in this select node + + @param table reference to the table whose definition is searched for + + @details + The method looks for the definition of the table whose reference is occurred + in the FROM list of this select node. First it searches for it in the + with clause attached to the unit this select node belongs to. If such a + definition is not found then the embedding units are looked through. + + @retval + pointer to the found definition if the search has been successful + NULL - otherwise +*/ + +With_element *st_select_lex::find_table_def_in_with_clauses(TABLE_LIST *table) +{ + With_element *found= NULL; + With_clause *containing_with_clause= NULL; + st_select_lex_unit *master_unit; + st_select_lex *outer_sl; + for (st_select_lex *sl= this; sl; sl= outer_sl) + { + /* + If sl->master_unit() is the spec of a with element then the search for + a definition was already done by With_element::check_dependencies_in_spec + and it was unsuccesful. Yet for units cloned from the spec it has not + been done yet. + */ + With_clause *attached_with_clause= sl->get_with_clause(); + if (attached_with_clause && + attached_with_clause != containing_with_clause && + (found= attached_with_clause->find_table_def(table, NULL))) + break; + master_unit= sl->master_unit(); + outer_sl= master_unit->outer_select(); + With_element *with_elem= sl->get_with_element(); + if (with_elem) + { + containing_with_clause= with_elem->get_owner(); + With_element *barrier= containing_with_clause->with_recursive ? + NULL : with_elem; + if ((found= containing_with_clause->find_table_def(table, barrier))) + break; + if (outer_sl && !outer_sl->get_with_element()) + break; + } + /* Do not look for the table's definition beyond the scope of the view */ + if (master_unit->is_view) + break; + } + return found; +} + + +bool TABLE_LIST::is_recursive_with_table() +{ + return with && with->is_recursive; +} + + +/* + A reference to a with table T is recursive if it occurs somewhere + in the query specifying T or in the query specifying one of the tables + mutually recursive with T. +*/ + +bool TABLE_LIST::is_with_table_recursive_reference() +{ + return (with_internal_reference_map && + (with->get_mutually_recursive() & with_internal_reference_map)); +} + + +/* + Specifications of with tables with recursive table references + in non-mergeable derived tables are not allowed in this + implementation. +*/ + + +/* + We say that the specification of a with table T is restricted + if all below is true. + 1. Any immediate select of the specification contains at most one + recursive table reference taking into account table references + from mergeable derived tables. + 2. Any recursive table reference is not an inner operand of an + outer join operation used in an immediate select of the + specification. + 3. Any immediate select from the specification of T does not + contain aggregate functions. + 4. The specification of T does not contain recursive table references. + + If the specification of T is not restricted we call the corresponding + with element unrestricted. + + The SQL standards allows only with elements with restricted specification. + By default we comply with the standards here. + + Yet we allow unrestricted specification if the status variable + 'standards_compliant_cte' set to 'off'(0). +*/ + + +/** + @brief + Check if this select makes the including specification unrestricted + + @param + only_standards_compliant true if the system variable + 'standards_compliant_cte' is set to 'on' + @details + This method checks whether the conditions 1-4 (see the comment above) + are satisfied for this select. If not then mark this element as + unrestricted and report an error if 'only_standards_compliant' is true. + + @retval + true if an error is reported + false otherwise +*/ + +bool st_select_lex::check_unrestricted_recursive(bool only_standard_compliant) +{ + With_element *with_elem= get_with_element(); + if (!with_elem ||!with_elem->is_recursive) + { + /* + If this select is not from the specifiocation of a with elememt or + if this not a recursive with element then there is nothing to check. + */ + return false; + } + + /* Check conditions 1-2 for restricted specification*/ + table_map unrestricted= 0; + table_map encountered= 0; + if (with_elem->check_unrestricted_recursive(this, + unrestricted, + encountered)) + return true; + with_elem->get_owner()->add_unrestricted(unrestricted); + + + /* Check conditions 3-4 for restricted specification*/ + if ((with_sum_func && !with_elem->is_anchor(this)) || + (with_elem->contains_sq_with_recursive_reference())) + with_elem->get_owner()->add_unrestricted( + with_elem->get_mutually_recursive()); + + /* Report an error on unrestricted specification if this is required */ + if (only_standard_compliant && with_elem->is_unrestricted()) + { + my_error(ER_NOT_STANDARD_COMPLIANT_RECURSIVE, + MYF(0), with_elem->get_name_str()); + return true; + } + + return false; +} + + +/** + @brief + Check if a select from the spec of this with element is partially restricted + + @param + sel select from the specification of this element where to check + whether conditions 1-2 are satisfied + unrestricted IN/OUT bitmap where to mark unrestricted specs + encountered IN/OUT bitmap where to mark encountered recursive references + @details + This method checks whether the conditions 1-2 (see the comment above) + are satisfied for the select sel. + This method is called recursively for derived tables. + + @retval + true if an error is reported + false otherwise +*/ + +bool With_element::check_unrestricted_recursive(st_select_lex *sel, + table_map &unrestricted, + table_map &encountered) +{ + /* Check conditions 1 for restricted specification*/ + List_iterator ti(sel->leaf_tables); + TABLE_LIST *tbl; + while ((tbl= ti++)) + { + st_select_lex_unit *unit= tbl->get_unit(); + if (unit) + { + if(!tbl->is_with_table()) + { + if (check_unrestricted_recursive(unit->first_select(), + unrestricted, + encountered)) + return true; + } + if (!(tbl->is_recursive_with_table() && unit->with_element->owner == owner)) + continue; + With_element *with_elem= unit->with_element; + if (encountered & with_elem->get_elem_map()) + unrestricted|= with_elem->mutually_recursive; + else if (with_elem ==this) + encountered|= with_elem->get_elem_map(); + } + } + for (With_element *with_elem= owner->with_list.first; + with_elem; + with_elem= with_elem->next) + { + if (!with_elem->is_recursive && (unrestricted & with_elem->get_elem_map())) + continue; + if (encountered & with_elem->get_elem_map()) + { + uint cnt= 0; + table_map encountered_mr= encountered & with_elem->mutually_recursive; + for (table_map map= encountered_mr >> with_elem->number; + map != 0; + map>>= 1) + { + if (map & 1) + { + if (cnt) + { + unrestricted|= with_elem->mutually_recursive; + break; + } + else + cnt++; + } + } + } + } + + + /* Check conditions 2 for restricted specification*/ + ti.rewind(); + while ((tbl= ti++)) + { + if (!tbl->is_with_table_recursive_reference()) + continue; + for (TABLE_LIST *tab= tbl; tab; tab= tab->embedding) + { + if (tab->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) + { + unrestricted|= mutually_recursive; + break; + } + } + } + return false; +} + + +/** + @brief + Check subqueries with recursive table references from FROM list of this select + + @details + For each recursive table reference from the FROM list of this select + this method checks: + - whether this reference is within a materialized derived table and + if so it report an error + - whether this reference is within a subquery and if so it set a flag + in this subquery that disallows some optimization strategies for + this subquery. + + @retval + true if an error is reported + false otherwise +*/ + +bool st_select_lex::check_subqueries_with_recursive_references() +{ + List_iterator ti(leaf_tables); + TABLE_LIST *tbl; + while ((tbl= ti++)) + { + if (!(tbl->is_with_table_recursive_reference())) + continue; + With_element *rec_elem= tbl->with; + st_select_lex_unit *sl_master; + for (st_select_lex *sl= this; sl; sl= sl_master->outer_select()) + { + sl_master= sl->master_unit(); + if (sl_master->with_element && + sl_master->with_element->get_owner() == rec_elem->get_owner()) + break; + sl->uncacheable|= UNCACHEABLE_DEPENDENT; + sl_master->uncacheable|= UNCACHEABLE_DEPENDENT; + if (sl_master->derived) + sl_master->derived->register_as_derived_with_rec_ref(rec_elem); + if (sl_master->item) + { + Item_subselect *subq= (Item_subselect *) (sl_master->item); + subq->register_as_with_rec_ref(rec_elem); + } + } + } + return false; +} + + +/** + @brief + Print this with clause + + @param thd Thread handle + @param str Where to print to + @param query_type The mode of printing + + @details + The method prints a string representation of this clause in the + string str. The parameter query_type specifies the mode of printing. +*/ + +void With_clause::print(THD *thd, String *str, enum_query_type query_type) +{ + /* + Any with clause contains just definitions of CTE tables. + No data expansion is applied to these definitions. + */ + query_type= (enum_query_type) (query_type | QT_NO_DATA_EXPANSION); + + str->append(STRING_WITH_LEN("with ")); + if (with_recursive) + str->append(STRING_WITH_LEN("recursive ")); + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + if (with_elem != with_list.first) + str->append(STRING_WITH_LEN(", ")); + with_elem->print(thd, str, query_type); + } +} + + +static void list_strlex_print(THD *thd, String *str, List *list) +{ + List_iterator_fast li(*list); + bool first= TRUE; + while(Lex_ident_sys *col_name= li++) + { + if (first) + first= FALSE; + else + str->append(','); + append_identifier(thd, str, col_name); + } +} + + +/** + @brief + Print this with element + + @param thd Thread handle + @param str Where to print to + @param query_type The mode of printing + + @details + The method prints a string representation of this with element in the + string str. The parameter query_type specifies the mode of printing. +*/ + +void With_element::print(THD *thd, String *str, enum_query_type query_type) +{ + str->append(get_name()); + if (column_list.elements) + { + List_iterator_fast li(column_list); + str->append('('); + list_strlex_print(thd, str, &column_list); + str->append(')'); + } + str->append(STRING_WITH_LEN(" as (")); + spec->print(str, query_type); + str->append(')'); + + if (cycle_list) + { + DBUG_ASSERT(cycle_list->elements != 0); + str->append(STRING_WITH_LEN(" CYCLE ")); + list_strlex_print(thd, str, cycle_list); + str->append(STRING_WITH_LEN(" RESTRICT ")); + } +} + + +bool With_element::instantiate_tmp_tables() +{ + List_iterator_fast li(rec_result->rec_table_refs); + TABLE_LIST *rec_tbl; + while ((rec_tbl= li++)) + { + TABLE *rec_table= rec_tbl->table; + if (!rec_table->is_created() && + instantiate_tmp_table(rec_table, + rec_table->s->key_info, + rec_result->tmp_table_param.start_recinfo, + &rec_result->tmp_table_param.recinfo, + 0)) + return true; + + rec_table->file->extra(HA_EXTRA_WRITE_CACHE); + rec_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + } + return false; +} + +void With_element::set_cycle_list(List *cycle_list_arg) +{ + cycle_list= cycle_list_arg; + + /* + If a CTE table with columns c1,...,cn is defined with a cycle + clause CYCLE(ci1,...,cik) then no two rows r1 and r2 from the + table shall have r1.ci1=r2.ci1 && ... && r1.cik=r2.cik. + + If a cycle clause is used in the specification of a CTE then + each UNION ALL at the top level of the specification is interpreted + as a UNION DISTINCT over the cycle columns. + */ + for (st_select_lex *sl= spec->first_select(); sl; sl= sl->next_select()) + { + spec->union_distinct= sl; + if (sl != spec->first_select()) + { + sl->distinct= TRUE; + sl->with_all_modifier= FALSE; + } + } +} diff --git a/sql/sql_cte.h b/sql/sql_cte.h new file mode 100644 index 00000000..6a1f67d3 --- /dev/null +++ b/sql/sql_cte.h @@ -0,0 +1,551 @@ +/* + Copyright (c) 2016, 2017 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SQL_CTE_INCLUDED +#define SQL_CTE_INCLUDED +#include "sql_list.h" +#include "sql_lex.h" +#include "sql_select.h" + +class select_unit; +struct st_unit_ctxt_elem; + + +/** + @class With_element_head + @brief Head of the definition of a CTE table + + It contains the name of the CTE and it contains the position of the subchain + of table references used in the definition in the global chain of table + references used in the query where this definition is encountered. +*/ + +class With_element_head : public Sql_alloc +{ + /* The name of the defined CTE */ + LEX_CSTRING *query_name; + +public: + /* + The structure describing the subchain of the table references used in + the specification of the defined CTE in the global chain of table + references used in the query. The structure is fully defined only + after the CTE definition has been parsed. + */ + TABLE_CHAIN tables_pos; + + With_element_head(LEX_CSTRING *name) + : query_name(name) + { + tables_pos.set_start_pos(0); + tables_pos.set_end_pos(0); + } + friend class With_element; +}; + + +/** + @class With_element + @brief Definition of a CTE table + + It contains a reference to the name of the table introduced by this with element, + and a reference to the unit that specificies this table. Also it contains + a reference to the with clause to which this element belongs to. +*/ + +class With_element : public Sql_alloc +{ +private: + With_clause *owner; // with clause this object belongs to + With_element *next; // next element in the with clause + uint number; // number of the element in the with clause (starting from 0) + table_map elem_map; // The map where with only one 1 set in this->number + /* + The map base_dep_map has 1 in the i-th position if the query that + specifies this with element contains a reference to the with element number i + in the query FROM list. + (In this case this with element depends directly on the i-th with element.) + */ + table_map base_dep_map; + /* + The map derived_dep_map has 1 in i-th position if this with element depends + directly or indirectly from the i-th with element. + */ + table_map derived_dep_map; + /* + The map sq_dep_map has 1 in i-th position if there is a reference to this + with element somewhere in subqueries of the specifications of the tables + defined in the with clause containing this element; + */ + table_map sq_dep_map; + table_map work_dep_map; // dependency map used for work + /* Dependency map of with elements mutually recursive with this with element */ + table_map mutually_recursive; + /* + Dependency map built only for the top level references i.e. for those that + are encountered in from lists of the selects of the specification unit + */ + table_map top_level_dep_map; + /* + Points to a recursive reference in subqueries. + Used only for specifications without recursive references on the top level. + */ + TABLE_LIST *sq_rec_ref; + /* + The next with element from the circular chain of the with elements + mutually recursive with this with element. + (If This element is simply recursive than next_mutually_recursive contains + the pointer to itself. If it's not recursive than next_mutually_recursive + is set to NULL.) + */ + With_element *next_mutually_recursive; + /* + Total number of references to this element in the FROM lists of + the queries that are in the scope of the element (including + subqueries and specifications of other with elements). + */ + uint references; + + /* + true <=> this With_element is referred in the query in which the + element is defined + */ + bool referenced; + + /* + true <=> this With_element is needed for the execution of the query + in which the element is defined + */ + bool is_used_in_query; + + /* + Unparsed specification of the query that specifies this element. + It's used to build clones of the specification if they are needed. + */ + LEX_CSTRING unparsed_spec; + /* Offset of the specification in the input string */ + my_ptrdiff_t unparsed_spec_offset; + + /* True if the with element is used a prepared statement */ + bool stmt_prepare_mode; + + /* Return the map where 1 is set only in the position for this element */ + table_map get_elem_map() { return (table_map) 1 << number; } + +public: + /* + Contains the name of the defined With element and the position of + the subchain of the tables references used by its definition in the + global chain of TABLE_LIST objects created for the whole query. + */ + With_element_head *head; + + /* + Optional list of column names to name the columns of the table introduced + by this with element. It is used in the case when the names are not + inherited from the query that specified the table. Otherwise the list is + always empty. + */ + List column_list; + List *cycle_list; + /* The query that specifies the table introduced by this with element */ + st_select_lex_unit *spec; + /* + Set to true is recursion is used (directly or indirectly) + for the definition of this element + */ + bool is_recursive; + /* + For a simple recursive CTE: the number of references to the CTE from + outside of the CTE specification. + For a CTE mutually recursive with other CTEs : the total number of + references to all these CTEs outside of their specification. + Each of these mutually recursive CTEs has the same value in this field. + */ + uint rec_outer_references; + /* + Any non-recursive select in the specification of a recursive + with element is a called anchor. In the case mutually recursive + elements the specification of some them may be without any anchor. + Yet at least one of them must contain an anchor. + All anchors of any recursivespecification are moved ahead before + the prepare stage. + */ + /* Set to true if this is a recursive element with an anchor */ + bool with_anchor; + /* + Set to the first recursive select of the unit specifying the element + after all anchor have been moved to the head of the unit. + */ + st_select_lex *first_recursive; + + /* + The number of the last performed iteration for recursive table + (the number of the initial non-recursive step is 0, the number + of the first iteration is 1). + */ + uint level; + + /* + The pointer to the object used to materialize this with element + if it's recursive. This object is built at the end of prepare + stage and is used at the execution stage. + */ + select_union_recursive *rec_result; + + /* List of Item_subselects containing recursive references to this CTE */ + SQL_I_List sq_with_rec_ref; + /* List of derived tables containing recursive references to this CTE */ + SQL_I_List derived_with_rec_ref; + + With_element(With_element_head *h, + List list, + st_select_lex_unit *unit) + : next(NULL), base_dep_map(0), derived_dep_map(0), + sq_dep_map(0), work_dep_map(0), mutually_recursive(0), + top_level_dep_map(0), sq_rec_ref(NULL), + next_mutually_recursive(NULL), references(0), + referenced(false), is_used_in_query(false), + head(h), column_list(list), cycle_list(0), spec(unit), + is_recursive(false), rec_outer_references(0), with_anchor(false), + level(0), rec_result(NULL) + { unit->with_element= this; } + + LEX_CSTRING *get_name() { return head->query_name; } + const char *get_name_str() { return get_name()->str; } + + void set_tables_start_pos(TABLE_LIST **pos) + { head->tables_pos.set_start_pos(pos); } + void set_tables_end_pos(TABLE_LIST **pos) + { head->tables_pos.set_end_pos(pos); } + + bool check_dependencies_in_spec(); + + void check_dependencies_in_select(st_select_lex *sl, st_unit_ctxt_elem *ctxt, + bool in_subq, table_map *dep_map); + + void check_dependencies_in_unit(st_select_lex_unit *unit, + st_unit_ctxt_elem *ctxt, + bool in_subq, + table_map *dep_map); + + void check_dependencies_in_with_clause(With_clause *with_clause, + st_unit_ctxt_elem *ctxt, + bool in_subq, + table_map *dep_map); + + void set_dependency_on(With_element *with_elem) + { base_dep_map|= with_elem->get_elem_map(); } + + bool check_dependency_on(With_element *with_elem) + { return base_dep_map & with_elem->get_elem_map(); } + + TABLE_LIST *find_first_sq_rec_ref_in_select(st_select_lex *sel); + + bool set_unparsed_spec(THD *thd, const char *spec_start, const char *spec_end, + my_ptrdiff_t spec_offset); + + st_select_lex_unit *clone_parsed_spec(LEX *old_lex, TABLE_LIST *with_table); + + bool is_referenced() { return referenced; } + + bool is_hanging_recursive() { return is_recursive && !rec_outer_references; } + + void inc_references() { references++; } + + bool process_columns_of_derived_unit(THD *thd, st_select_lex_unit *unit); + + bool prepare_unreferenced(THD *thd); + + bool check_unrestricted_recursive(st_select_lex *sel, + table_map &unrestricted, + table_map &encountered); + + void print(THD *thd, String *str, enum_query_type query_type); + + With_clause *get_owner() { return owner; } + + bool contains_sq_with_recursive_reference() + { return sq_dep_map & mutually_recursive; } + + bool no_rec_ref_on_top_level() + { return !(top_level_dep_map & mutually_recursive); } + + table_map get_mutually_recursive() { return mutually_recursive; } + + With_element *get_next_mutually_recursive() + { return next_mutually_recursive; } + + TABLE_LIST *get_sq_rec_ref() { return sq_rec_ref; } + + bool is_anchor(st_select_lex *sel); + + void move_anchors_ahead(); + + bool is_unrestricted(); + + bool is_with_prepared_anchor(); + + void mark_as_with_prepared_anchor(); + + bool is_cleaned(); + + void mark_as_cleaned(); + + void reset_recursive_for_exec(); + + void cleanup_stabilized(); + + void set_as_stabilized(); + + bool is_stabilized(); + + bool all_are_stabilized(); + + bool instantiate_tmp_tables(); + + void prepare_for_next_iteration(); + + void set_cycle_list(List *cycle_list_arg); + + friend class With_clause; + + friend + bool LEX::resolve_references_to_cte(TABLE_LIST *tables, + TABLE_LIST **tables_last); +}; + +const uint max_number_of_elements_in_with_clause= sizeof(table_map)*8; + +/** + @class With_clause + @brief Set of with_elements + + It has a reference to the first with element from this with clause. + This reference allows to navigate through all the elements of the with clause. + It contains a reference to the unit to which this with clause is attached. + It also contains a flag saying whether this with clause was specified as recursive. +*/ + +class With_clause : public Sql_alloc +{ +private: + st_select_lex_unit *owner; // the unit this with clause attached to + + /* The list of all with elements from this with clause */ + SQL_I_List with_list; + /* + The with clause immediately containing this with clause if there is any, + otherwise NULL. Now used only at parsing. + */ + With_clause *embedding_with_clause; + /* + The next with the clause of the chain of with clauses encountered + in the current statement + */ + With_clause *next_with_clause; + /* Set to true if dependencies between with elements have been checked */ + bool dependencies_are_checked; + /* + The bitmap of all recursive with elements whose specifications + are not complied with restrictions imposed by the SQL standards + on recursive specifications. + */ + table_map unrestricted; + /* + The bitmap of all recursive with elements whose anchors + has been already prepared. + */ + table_map with_prepared_anchor; + table_map cleaned; + /* + The bitmap of all recursive with elements that + has been already materialized + */ + table_map stabilized; + +public: + /* If true the specifier RECURSIVE is present in the with clause */ + bool with_recursive; + + With_clause(bool recursive_fl, With_clause *emb_with_clause) + : owner(NULL), embedding_with_clause(emb_with_clause), + next_with_clause(NULL), dependencies_are_checked(false), unrestricted(0), + with_prepared_anchor(0), cleaned(0), stabilized(0), + with_recursive(recursive_fl) + { } + + bool add_with_element(With_element *elem); + + /* Add this with clause to the list of with clauses used in the statement */ + void add_to_list(With_clause **ptr, With_clause ** &last_next) + { + if (embedding_with_clause) + { + /* + An embedded with clause is always placed before the embedding one + in the list of with clauses used in the query. + */ + while (*ptr != embedding_with_clause) + ptr= &(*ptr)->next_with_clause; + *ptr= this; + next_with_clause= embedding_with_clause; + } + else + { + *last_next= this; + last_next= &this->next_with_clause; + } + } + + st_select_lex_unit *get_owner() { return owner; } + + void set_owner(st_select_lex_unit *unit) { owner= unit; } + + void attach_to(st_select_lex *select_lex); + + With_clause *pop() { return embedding_with_clause; } + + bool check_dependencies(); + + bool check_anchors(); + + void move_anchors_ahead(); + + With_element *find_table_def(TABLE_LIST *table, With_element *barrier); + + With_element *find_table_def_in_with_clauses(TABLE_LIST *table); + + bool prepare_unreferenced_elements(THD *thd); + + void add_unrestricted(table_map map) { unrestricted|= map; } + + void print(THD *thd, String *str, enum_query_type query_type); + + friend class With_element; + + friend + bool LEX::check_dependencies_in_with_clauses(); +}; + +inline +bool With_element::is_unrestricted() +{ + return owner->unrestricted & get_elem_map(); +} + +inline +bool With_element::is_with_prepared_anchor() +{ + return owner->with_prepared_anchor & get_elem_map(); +} + +inline +void With_element::mark_as_with_prepared_anchor() +{ + owner->with_prepared_anchor|= mutually_recursive; +} + + +inline +bool With_element::is_cleaned() +{ + return owner->cleaned & get_elem_map(); +} + + +inline +void With_element::mark_as_cleaned() +{ + owner->cleaned|= get_elem_map(); +} + + +inline +void With_element::reset_recursive_for_exec() +{ + DBUG_ASSERT(is_recursive); + level= 0; + owner->with_prepared_anchor&= ~mutually_recursive; + owner->cleaned&= ~get_elem_map(); + cleanup_stabilized(); + spec->columns_are_renamed= false; +} + + + +inline +void With_element::cleanup_stabilized() +{ + owner->stabilized&= ~mutually_recursive; +} + + +inline +void With_element::set_as_stabilized() +{ + owner->stabilized|= get_elem_map(); +} + + +inline +bool With_element::is_stabilized() +{ + return owner->stabilized & get_elem_map(); +} + + +inline +bool With_element::all_are_stabilized() +{ + return (owner->stabilized & mutually_recursive) == mutually_recursive; +} + + +inline +void With_element::prepare_for_next_iteration() +{ + With_element *with_elem= this; + while ((with_elem= with_elem->get_next_mutually_recursive()) != this) + { + TABLE *rec_table= with_elem->rec_result->first_rec_table_to_update; + if (rec_table) + rec_table->reginfo.join_tab->preread_init_done= false; + } +} + + +inline +void With_clause::attach_to(st_select_lex *select_lex) +{ + for (With_element *with_elem= with_list.first; + with_elem; + with_elem= with_elem->next) + { + select_lex->register_unit(with_elem->spec, NULL); + } +} + + +inline +void st_select_lex::set_with_clause(With_clause *with_clause) +{ + master_unit()->with_clause= with_clause; + if (with_clause) + with_clause->set_owner(master_unit()); +} + +#endif /* SQL_CTE_INCLUDED */ diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc new file mode 100644 index 00000000..cef8ac16 --- /dev/null +++ b/sql/sql_cursor.cc @@ -0,0 +1,458 @@ +/* + Copyright (c) 2005, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation /* gcc class implementation */ +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_cursor.h" +#include "probes_mysql.h" +#include "sql_parse.h" // mysql_execute_command + +/**************************************************************************** + Declarations. +****************************************************************************/ + +/** + Materialized_cursor -- an insensitive materialized server-side + cursor. The result set of this cursor is saved in a temporary + table at open. The cursor itself is simply an interface for the + handler of the temporary table. +*/ + +class Materialized_cursor: public Server_side_cursor +{ + MEM_ROOT main_mem_root; + /* A fake unit to supply to select_send when fetching */ + SELECT_LEX_UNIT fake_unit; + TABLE *table; + List item_list; + ulong fetch_limit; + ulong fetch_count; + bool is_rnd_inited; +public: + Materialized_cursor(select_result *result, TABLE *table); + + int send_result_set_metadata(THD *thd, List &send_result_set_metadata); + virtual bool is_open() const { return table != 0; } + virtual int open(JOIN *join __attribute__((unused))); + virtual void fetch(ulong num_rows); + virtual void close(); + bool export_structure(THD *thd, Row_definition_list *defs) + { + return table->export_structure(thd, defs); + } + virtual ~Materialized_cursor(); + + void on_table_fill_finished(); +}; + + +/** + Select_materialize -- a mediator between a cursor query and the + protocol. In case we were not able to open a non-materialzed + cursor, it creates an internal temporary HEAP table, and insert + all rows into it. When the table reaches max_heap_table_size, + it's converted to a MyISAM table. Later this table is used to + create a Materialized_cursor. +*/ + +class Select_materialize: public select_unit +{ + select_result *result; /**< the result object of the caller (PS or SP) */ +public: + Materialized_cursor *materialized_cursor; + Select_materialize(THD *thd_arg, select_result *result_arg): + select_unit(thd_arg), result(result_arg), materialized_cursor(0) {} + virtual bool send_result_set_metadata(List &list, uint flags); + bool send_eof() { return false; } + bool view_structure_only() const + { + return result->view_structure_only(); + } +}; + + +/**************************************************************************/ + +/** + Attempt to open a materialized cursor. + + @param thd thread handle + @param[in] result result class of the caller used as a destination + for the rows fetched from the cursor + @param[out] pcursor a pointer to store a pointer to cursor in + + @retval + 0 the query has been successfully executed; in this + case pcursor may or may not contain + a pointer to an open cursor. + @retval + non-zero an error, 'pcursor' has been left intact. +*/ + +int mysql_open_cursor(THD *thd, select_result *result, + Server_side_cursor **pcursor) +{ + sql_digest_state *parent_digest; + PSI_statement_locker *parent_locker; + select_result *save_result; + Select_materialize *result_materialize; + LEX *lex= thd->lex; + int rc; + + if (!(result_materialize= new (thd->mem_root) Select_materialize(thd, result))) + return 1; + + save_result= lex->result; + + lex->result= result_materialize; + + MYSQL_QUERY_EXEC_START(thd->query(), + thd->thread_id, + thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip, + 2); + parent_digest= thd->m_digest; + parent_locker= thd->m_statement_psi; + thd->m_digest= NULL; + thd->m_statement_psi= NULL; + /* Mark that we can't use query cache with cursors */ + thd->query_cache_is_applicable= 0; + rc= mysql_execute_command(thd); + thd->lex->restore_set_statement_var(); + thd->m_digest= parent_digest; + thd->m_statement_psi= parent_locker; + MYSQL_QUERY_EXEC_DONE(rc); + + lex->result= save_result; + /* + Possible options here: + - a materialized cursor is open. In this case rc is 0 and + result_materialize->materialized is not NULL + - an error occurred during materialization. + result_materialize->materialized_cursor is not NULL, but rc != 0 + - successful completion of mysql_execute_command without + a cursor: rc is 0, result_materialize->materialized_cursor is NULL. + This is possible if some command writes directly to the + network, bypassing select_result mechanism. An example of + such command is SHOW VARIABLES or SHOW STATUS. + */ + if (rc) + { + if (result_materialize->materialized_cursor) + { + /* Rollback metadata in the client-server protocol. */ + result_materialize->abort_result_set(); + + delete result_materialize->materialized_cursor; + } + + goto end; + } + + if (result_materialize->materialized_cursor) + { + Materialized_cursor *materialized_cursor= + result_materialize->materialized_cursor; + + /* + NOTE: close_thread_tables() has been called in + mysql_execute_command(), so all tables except from the cursor + temporary table have been closed. + */ + + if ((rc= materialized_cursor->open(0))) + { + delete materialized_cursor; + goto end; + } + + *pcursor= materialized_cursor; + rc|= (thd->stmt_arena->cleanup_stmt(true)? 1 : 0); + } + +end: + delete result_materialize; + return rc; +} + +/**************************************************************************** + Server_side_cursor +****************************************************************************/ + +Server_side_cursor::~Server_side_cursor() = default; + + +void Server_side_cursor::operator delete(void *ptr, size_t size) +{ + Server_side_cursor *cursor= (Server_side_cursor*) ptr; + MEM_ROOT own_root= *cursor->mem_root; + + DBUG_ENTER("Server_side_cursor::operator delete"); + TRASH_FREE(ptr, size); + /* + If this cursor has never been opened mem_root is empty. Otherwise + mem_root points to the memory the cursor object was allocated in. + In this case it's important to call free_root last, and free a copy + instead of *mem_root to avoid writing into freed memory. + */ + free_root(&own_root, MYF(0)); + DBUG_VOID_RETURN; +} + + +/*************************************************************************** + Materialized_cursor +****************************************************************************/ + +Materialized_cursor::Materialized_cursor(select_result *result_arg, + TABLE *table_arg) + :Server_side_cursor(&table_arg->mem_root, result_arg), + table(table_arg), + fetch_limit(0), + fetch_count(0), + is_rnd_inited(0) +{ + fake_unit.init_query(); + fake_unit.thd= table->in_use; +} + + +/** + Preserve the original metadata to be sent to the client. + Initiate sending of the original metadata to the client + (call Protocol::send_result_set_metadata()). + + @param thd Thread identifier. + @param send_result_set_metadata List of fields that would be sent. +*/ + +int Materialized_cursor::send_result_set_metadata( + THD *thd, List &send_result_set_metadata) +{ + Query_arena backup_arena; + int rc; + List_iterator_fast it_org(send_result_set_metadata); + List_iterator_fast it_dst(item_list); + Item *item_org; + Item *item_dst; + + thd->set_n_backup_active_arena(this, &backup_arena); + + if ((rc= table->fill_item_list(&item_list))) + goto end; + + DBUG_ASSERT(send_result_set_metadata.elements == item_list.elements); + + /* + Unless we preserve the original metadata, it will be lost, + since new fields describe columns of the temporary table. + Allocate a copy of the name for safety only. Currently + items with original names are always kept in memory, + but in case this changes a memory leak may be hard to notice. + */ + while ((item_dst= it_dst++, item_org= it_org++)) + { + Item_ident *ident= static_cast(item_dst); + Send_field send_field(thd, item_org); + + ident->db_name= thd->strmake_lex_cstring(send_field.db_name); + ident->table_name= thd->strmake_lex_cstring(send_field.table_name); + } + + /* + Original metadata result set should be sent here. After + mysql_execute_command() is finished, item_list can not be used for + sending metadata, because it references closed table. + */ + rc= result->send_result_set_metadata(item_list, Protocol::SEND_NUM_ROWS); + +end: + thd->restore_active_arena(this, &backup_arena); + /* Check for thd->is_error() in case of OOM */ + return rc || thd->is_error(); +} + + +int Materialized_cursor::open(JOIN *join __attribute__((unused))) +{ + THD *thd= fake_unit.thd; + int rc; + Query_arena backup_arena; + + thd->set_n_backup_active_arena(this, &backup_arena); + + /* Create a list of fields and start sequential scan. */ + + rc= result->prepare(item_list, &fake_unit); + rc= !rc && table->file->ha_rnd_init_with_error(TRUE); + is_rnd_inited= !rc; + + thd->restore_active_arena(this, &backup_arena); + + /* Commit or rollback metadata in the client-server protocol. */ + + if (!rc) + { + thd->server_status|= SERVER_STATUS_CURSOR_EXISTS; + result->send_eof(); + } + else + { + result->abort_result_set(); + } + + on_table_fill_finished(); + + return rc; +} + + +/** + Fetch up to the given number of rows from a materialized cursor. + + Precondition: the cursor is open. + + If the cursor points after the last row, the fetch will automatically + close the cursor and not send any data (except the 'EOF' packet + with SERVER_STATUS_LAST_ROW_SENT). This is an extra round trip + and probably should be improved to return + SERVER_STATUS_LAST_ROW_SENT along with the last row. +*/ + +void Materialized_cursor::fetch(ulong num_rows) +{ + THD *thd= table->in_use; + + int res= 0; + result->begin_dataset(); + for (fetch_limit+= num_rows; fetch_count < fetch_limit; fetch_count++) + { + if ((res= table->file->ha_rnd_next(table->record[0]))) + break; + /* Send data only if the read was successful. */ + /* + If network write failed (i.e. due to a closed socked), + the error has already been set. Just return. + */ + if (result->send_data(item_list) > 0) + return; + } + + switch (res) { + case 0: + thd->server_status|= SERVER_STATUS_CURSOR_EXISTS; + result->send_eof(); + break; + case HA_ERR_END_OF_FILE: + thd->server_status|= SERVER_STATUS_LAST_ROW_SENT; + result->send_eof(); + close(); + break; + default: + table->file->print_error(res, MYF(0)); + close(); + break; + } +} + + +void Materialized_cursor::close() +{ + /* Free item_list items */ + free_items(); + if (is_rnd_inited) + (void) table->file->ha_rnd_end(); + /* + We need to grab table->mem_root to prevent free_tmp_table from freeing: + the cursor object was allocated in this memory. + */ + main_mem_root= table->mem_root; + mem_root= &main_mem_root; + clear_alloc_root(&table->mem_root); + free_tmp_table(table->in_use, table); + table= 0; +} + + +Materialized_cursor::~Materialized_cursor() +{ + if (is_open()) + close(); +} + + +/* + @brief + Perform actions that are to be done when cursor materialization has + finished. + + @detail + This function is called when "OPEN $cursor" has finished filling the + temporary table with rows that the cursor will return. + + Temporary table has table->field->orig_table pointing at the tables + that are used in the cursor definition query. Pointers to these tables + will not be valid after the query finishes. So, we do what is done for + regular tables: have orig_table point at the table that the fields belong + to. +*/ + +void Materialized_cursor::on_table_fill_finished() +{ + uint fields= table->s->fields; + for (uint i= 0; i < fields; i++) + table->field[i]->orig_table= table->field[i]->table; +} + +/*************************************************************************** + Select_materialize +****************************************************************************/ + +bool Select_materialize::send_result_set_metadata(List &list, uint flags) +{ + DBUG_ASSERT(table == 0); + if (create_result_table(unit->thd, unit->get_column_types(true), + FALSE, + thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, + &empty_clex_str, FALSE, TRUE, TRUE, 0)) + return TRUE; + + materialized_cursor= new (&table->mem_root) + Materialized_cursor(result, table); + + if (!materialized_cursor) + { + free_tmp_table(table->in_use, table); + table= 0; + return TRUE; + } + + if (materialized_cursor->send_result_set_metadata(unit->thd, list)) + { + delete materialized_cursor; + table= 0; + materialized_cursor= 0; + return TRUE; + } + + return FALSE; +} + diff --git a/sql/sql_cursor.h b/sql/sql_cursor.h new file mode 100644 index 00000000..b9d0b41e --- /dev/null +++ b/sql/sql_cursor.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef _sql_cursor_h_ +#define _sql_cursor_h_ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class interface */ +#endif + +#include "sql_class.h" /* Query_arena */ + +class JOIN; + +/** + @file + + Declarations for implementation of server side cursors. Only + read-only non-scrollable cursors are currently implemented. +*/ + +/** + Server_side_cursor -- an interface for materialized + implementation of cursors. All cursors are self-contained + (created in their own memory root). For that reason they must + be deleted only using a pointer to Server_side_cursor, not to + its base class. +*/ + +class Server_side_cursor: protected Query_arena +{ +protected: + /** Row destination used for fetch */ + select_result *result; +public: + Server_side_cursor(MEM_ROOT *mem_root_arg, select_result *result_arg) + :Query_arena(mem_root_arg, STMT_INITIALIZED), result(result_arg) + {} + + virtual bool is_open() const= 0; + + virtual int open(JOIN *top_level_join)= 0; + virtual void fetch(ulong num_rows)= 0; + virtual void close()= 0; + virtual bool export_structure(THD *thd, Row_definition_list *defs) + { + DBUG_ASSERT(0); + return true; + } + virtual ~Server_side_cursor(); + + static void *operator new(size_t size, MEM_ROOT *mem_root) + { return alloc_root(mem_root, size); } + static void operator delete(void *ptr, size_t size); + static void operator delete(void *, MEM_ROOT *){} +}; + + +int mysql_open_cursor(THD *thd, select_result *result, + Server_side_cursor **res); + +#endif /* _sql_cusor_h_ */ diff --git a/sql/sql_db.cc b/sql/sql_db.cc new file mode 100644 index 00000000..2d582eb8 --- /dev/null +++ b/sql/sql_db.cc @@ -0,0 +1,2165 @@ +/* + Copyright (c) 2000, 2014, Oracle and/or its affiliates. + Copyright (c) 2009, 2016, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* create and drop of databases */ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "unireg.h" +#include "sql_db.h" +#include "sql_cache.h" // query_cache_* +#include "lock.h" // lock_schema_name +#include "sql_table.h" // build_table_filename, + // filename_to_tablename + // validate_comment_length +#include "sql_rename.h" // mysql_rename_tables +#include "sql_acl.h" // SELECT_ACL, DB_ACLS, + // acl_get, check_grant_db +#include "log_event.h" // Query_log_event +#include "sql_base.h" // lock_table_names +#include "sql_handler.h" // mysql_ha_rm_tables +#include "sql_class.h" +#include +#include "sp_head.h" +#include "sp.h" +#include "events.h" +#include "sql_handler.h" +#include "sql_statistics.h" +#include "ddl_log.h" // ddl_log functions +#include +#include +#include "log.h" +#ifdef _WIN32 +#include +#endif +#include "debug.h" // debug_crash_here + +#define MAX_DROP_TABLE_Q_LEN 1024 + +const char *del_exts[]= {".BAK", ".opt", NullS}; +static TYPELIB deletable_extensions= +{array_elements(del_exts)-1,"del_exts", del_exts, NULL}; + +static bool find_db_tables_and_rm_known_files(THD *, MY_DIR *, const char *, + const char *, TABLE_LIST **); + +long mysql_rm_arc_files(THD *thd, MY_DIR *dirp, const char *org_path); +my_bool rm_dir_w_symlink(const char *org_path, my_bool send_error); +static void mysql_change_db_impl(THD *thd, + LEX_CSTRING *new_db_name, + privilege_t new_db_access, + CHARSET_INFO *new_db_charset); +static bool mysql_rm_db_internal(THD *thd, const LEX_CSTRING *db, + bool if_exists, bool silent); + + +/* Database options hash */ +static HASH dboptions; +static my_bool dboptions_init= 0; +static mysql_rwlock_t LOCK_dboptions; + +/* Structure for database options */ +typedef struct my_dbopt_st +{ + char *name; /* Database name */ + uint name_length; /* Database length name */ + CHARSET_INFO *charset; /* Database default character set */ + LEX_STRING comment; /* Database comment */ +} my_dbopt_t; + + +/** + Return TRUE if db1_name is equal to db2_name, FALSE otherwise. + + The function allows to compare database names according to the MariaDB + rules. The database names db1 and db2 are equal if: + - db1 is NULL and db2 is NULL; + or + - db1 is not-NULL, db2 is not-NULL, db1 is equal to db2 in + table_alias_charset + + This is the same rules as we use for filenames. +*/ + +static inline bool +cmp_db_names(LEX_CSTRING *db1_name, const LEX_CSTRING *db2_name) +{ + return (db1_name->length == db2_name->length && + (db1_name->length == 0 || + my_strcasecmp(table_alias_charset, + db1_name->str, db2_name->str) == 0)); +} + +#ifdef HAVE_PSI_INTERFACE +static PSI_rwlock_key key_rwlock_LOCK_dboptions; +static PSI_rwlock_key key_rwlock_LOCK_dbnames; +static PSI_rwlock_key key_rwlock_LOCK_rmdir; + +static PSI_rwlock_info all_database_names_rwlocks[]= { + {&key_rwlock_LOCK_dboptions, "LOCK_dboptions", PSI_FLAG_GLOBAL}, + {&key_rwlock_LOCK_dbnames, "LOCK_dbnames", PSI_FLAG_GLOBAL}, + {&key_rwlock_LOCK_rmdir, "LOCK_rmdir",PSI_FLAG_GLOBAL}, +}; + +static void init_database_names_psi_keys(void) +{ + const char *category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_database_names_rwlocks); + PSI_server->register_rwlock(category, all_database_names_rwlocks, count); +} +#endif + +static mysql_rwlock_t rmdir_lock; + +/* + Cache of C strings for existing database names. + + The only use of it is to avoid repeated expensive + my_access() calls. + + Provided operations are lookup, insert (after successfull my_access()) + and clear (this is called whenever rmdir is called). +*/ +struct dbname_cache_t +{ +private: + Hash_set m_set; + mysql_rwlock_t m_lock; + + static uchar *get_key(const LEX_STRING *ls, size_t *sz, my_bool) + { + *sz= ls->length; + return (uchar *) ls->str; + } + +public: + dbname_cache_t() + : m_set(key_memory_dbnames_cache, table_alias_charset, 10, 0, + sizeof(char *), (my_hash_get_key) get_key, my_free, 0) + { + mysql_rwlock_init(key_rwlock_LOCK_dbnames, &m_lock); + } + + bool contains(const char *s) + { + auto sz= strlen(s); + mysql_rwlock_rdlock(&m_lock); + bool ret= m_set.find(s, sz) != 0; + mysql_rwlock_unlock(&m_lock); + return ret; + } + + void insert(const char *s) + { + auto len= strlen(s); + auto ls= (LEX_STRING *) my_malloc(key_memory_dbnames_cache, + sizeof(LEX_STRING) + strlen(s) + 1, 0); + + if (!ls) + return; + + ls->length= len; + ls->str= (char *) (ls + 1); + + memcpy(ls->str, s, len + 1); + mysql_rwlock_wrlock(&m_lock); + bool found= m_set.find(s, len) != 0; + if (!found) + m_set.insert(ls); + mysql_rwlock_unlock(&m_lock); + if (found) + my_free(ls); + } + + void clear() + { + mysql_rwlock_wrlock(&m_lock); + m_set.clear(); + mysql_rwlock_unlock(&m_lock); + } + + ~dbname_cache_t() + { + mysql_rwlock_destroy(&m_lock); + } +}; + +static dbname_cache_t* dbname_cache; + +static void dbname_cache_init() +{ + static MY_ALIGNED(16) char buf[sizeof(dbname_cache_t)]; + DBUG_ASSERT(!dbname_cache); + dbname_cache= new (buf) dbname_cache_t; + mysql_rwlock_init(key_rwlock_LOCK_rmdir, &rmdir_lock); +} + +static void dbname_cache_destroy() +{ + if (!dbname_cache) + return; + + dbname_cache->~dbname_cache_t(); + dbname_cache= 0; + mysql_rwlock_destroy(&rmdir_lock); +} + +static int my_rmdir(const char *dir) +{ + auto ret= rmdir(dir); + if (ret) + return ret; + mysql_rwlock_wrlock(&rmdir_lock); + dbname_cache->clear(); + mysql_rwlock_unlock(&rmdir_lock); + return 0; +} + + /* + Function we use in the creation of our hash to get key. +*/ + +extern "C" uchar* dboptions_get_key(my_dbopt_t *opt, size_t *length, + my_bool not_used); + +uchar* dboptions_get_key(my_dbopt_t *opt, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= opt->name_length; + return (uchar*) opt->name; +} + + +/* + Helper function to write a query to binlog used by mysql_rm_db() +*/ + +static inline int write_to_binlog(THD *thd, const char *query, size_t q_len, + const char *db, size_t db_len) +{ + Query_log_event qinfo(thd, query, q_len, FALSE, TRUE, FALSE, 0); + qinfo.db= db; + qinfo.db_len= (uint32)db_len; + return mysql_bin_log.write(&qinfo); +} + + +/* + Function to free dboptions hash element +*/ + +extern "C" void free_dbopt(void *dbopt); + +void free_dbopt(void *dbopt) +{ + my_free(dbopt); +} + + + +/** + Initialize database option cache. + + @note Must be called before any other database function is called. + + @retval 0 ok + @retval 1 Fatal error +*/ + +bool my_dboptions_cache_init(void) +{ +#ifdef HAVE_PSI_INTERFACE + init_database_names_psi_keys(); +#endif + + bool error= 0; + mysql_rwlock_init(key_rwlock_LOCK_dboptions, &LOCK_dboptions); + if (!dboptions_init) + { + dboptions_init= 1; + error= my_hash_init(key_memory_dboptions_hash, &dboptions, + table_alias_charset, 32, 0, 0, (my_hash_get_key) + dboptions_get_key, free_dbopt, 0); + } + dbname_cache_init(); + return error; +} + + + +/** + Free database option hash and locked databases hash. +*/ + +void my_dboptions_cache_free(void) +{ + if (dboptions_init) + { + dboptions_init= 0; + my_hash_free(&dboptions); + dbname_cache_destroy(); + mysql_rwlock_destroy(&LOCK_dboptions); + } +} + + +/** + Cleanup cached options. +*/ + +void my_dbopt_cleanup(void) +{ + mysql_rwlock_wrlock(&LOCK_dboptions); + my_hash_free(&dboptions); + my_hash_init(key_memory_dboptions_hash, &dboptions, table_alias_charset, 32, + 0, 0, (my_hash_get_key) dboptions_get_key, free_dbopt, 0); + mysql_rwlock_unlock(&LOCK_dboptions); +} + + +/* + Find database options in the hash. + + DESCRIPTION + Search a database options in the hash, usings its path. + Fills "create" on success. + + RETURN VALUES + 0 on success. + 1 on error. +*/ + +static my_bool get_dbopt(THD *thd, const char *dbname, + Schema_specification_st *create) +{ + my_dbopt_t *opt; + uint length; + my_bool error= 1; + + length= (uint) strlen(dbname); + + mysql_rwlock_rdlock(&LOCK_dboptions); + if ((opt= (my_dbopt_t*) my_hash_search(&dboptions, (uchar*) dbname, length))) + { + create->default_table_charset= opt->charset; + if (opt->comment.length) + { + create->schema_comment= thd->make_clex_string(opt->comment.str, + opt->comment.length); + } + error= 0; + } + mysql_rwlock_unlock(&LOCK_dboptions); + return error; +} + + +/* + Writes database options into the hash. + + DESCRIPTION + Inserts database options into the hash, or updates + options if they are already in the hash. + + RETURN VALUES + 0 on success. + 1 on error. +*/ + +static my_bool put_dbopt(const char *dbname, Schema_specification_st *create) +{ + my_dbopt_t *opt; + uint length; + my_bool error= 0; + DBUG_ENTER("put_dbopt"); + + length= (uint) strlen(dbname); + + mysql_rwlock_wrlock(&LOCK_dboptions); + if (!(opt= (my_dbopt_t*) my_hash_search(&dboptions, (uchar*) dbname, + length))) + { + /* Options are not in the hash, insert them */ + char *tmp_name; + char *tmp_comment= NULL; + if (!my_multi_malloc(key_memory_dboptions_hash, MYF(MY_WME | MY_ZEROFILL), + &opt, (uint) sizeof(*opt), &tmp_name, (uint) length+1, + &tmp_comment, (uint) DATABASE_COMMENT_MAXLEN+1, + NullS)) + { + error= 1; + goto end; + } + + opt->name= tmp_name; + strmov(opt->name, dbname); + opt->name_length= length; + opt->comment.str= tmp_comment; + if (unlikely((error= my_hash_insert(&dboptions, (uchar*) opt)))) + { + my_free(opt); + goto end; + } + } + + /* Update / write options in hash */ + opt->charset= create->default_table_charset; + + if (create->schema_comment) + { + strmov(opt->comment.str, create->schema_comment->str); + opt->comment.length= create->schema_comment->length; + } + +end: + mysql_rwlock_unlock(&LOCK_dboptions); + DBUG_RETURN(error); +} + + +/* + Deletes database options from the hash. +*/ + +static void del_dbopt(const char *path) +{ + my_dbopt_t *opt; + mysql_rwlock_wrlock(&LOCK_dboptions); + if ((opt= (my_dbopt_t *)my_hash_search(&dboptions, (const uchar*) path, + strlen(path)))) + my_hash_delete(&dboptions, (uchar*) opt); + mysql_rwlock_unlock(&LOCK_dboptions); +} + + +/* + Create database options file: + + DESCRIPTION + Currently database default charset, default collation + and comment are stored there. + + RETURN VALUES + 0 ok + 1 Could not create file or write to it. Error sent through my_error() +*/ + +static bool write_db_opt(THD *thd, const char *path, + Schema_specification_st *create) +{ + File file; + char buf[256+DATABASE_COMMENT_MAXLEN]; + bool error=1; + + if (create->schema_comment) + { + if (validate_comment_length(thd, create->schema_comment, + DATABASE_COMMENT_MAXLEN, + ER_TOO_LONG_DATABASE_COMMENT, + thd->lex->name.str)) + return error; + } + + if (thd->lex->sql_command == SQLCOM_ALTER_DB && + (!create->schema_comment || !create->default_table_charset)) + { + /* Use existing values of schema_comment and charset for + ALTER DATABASE queries */ + Schema_specification_st tmp; + tmp.init(); + load_db_opt(thd, path, &tmp); + + if (!create->schema_comment) + create->schema_comment= tmp.schema_comment; + + if (!create->default_table_charset) + create->default_table_charset= tmp.default_table_charset; + } + + if (!create->default_table_charset) + create->default_table_charset= thd->variables.collation_server; + + if (put_dbopt(path, create)) + return 1; + + if ((file= mysql_file_create(key_file_dbopt, path, CREATE_MODE, + O_RDWR | O_TRUNC, MYF(MY_WME))) >= 0) + { + ulong length; + length= (ulong) (strxnmov(buf, sizeof(buf)-1, "default-character-set=", + create->default_table_charset->cs_name.str, + "\ndefault-collation=", + create->default_table_charset->coll_name.str, + "\n", NullS) - buf); + + if (create->schema_comment) + length= (ulong) (strxnmov(buf+length, sizeof(buf)-1-length, + "comment=", create->schema_comment->str, + "\n", NullS) - buf); + + /* Error is written by mysql_file_write */ + if (!mysql_file_write(file, (uchar*) buf, length, MYF(MY_NABP+MY_WME))) + error=0; + mysql_file_close(file, MYF(0)); + } + return error; +} + + +/* + Load database options file + + load_db_opt() + path Path for option file + create Where to store the read options + + DESCRIPTION + + RETURN VALUES + 0 File found + 1 No database file or could not open it + +*/ + +bool load_db_opt(THD *thd, const char *path, Schema_specification_st *create) +{ + File file; + char buf[256+DATABASE_COMMENT_MAXLEN]; + DBUG_ENTER("load_db_opt"); + bool error=1; + size_t nbytes; + myf utf8_flag= thd->get_utf8_flag(); + + bzero((char*) create,sizeof(*create)); + create->default_table_charset= thd->variables.collation_server; + + /* Check if options for this database are already in the hash */ + if (!get_dbopt(thd, path, create)) + DBUG_RETURN(0); + + /* Otherwise, load options from the .opt file */ + if ((file= mysql_file_open(key_file_dbopt, + path, O_RDONLY | O_SHARE, MYF(0))) < 0) + goto err1; + + IO_CACHE cache; + if (init_io_cache(&cache, file, IO_SIZE, READ_CACHE, 0, 0, MYF(0))) + goto err2; + + while ((int) (nbytes= my_b_gets(&cache, (char*) buf, sizeof(buf))) > 0) + { + char *pos= buf+nbytes-1; + /* Remove end space and control characters */ + while (pos > buf && !my_isgraph(&my_charset_latin1, pos[-1])) + pos--; + *pos=0; + if ((pos= strchr(buf, '='))) + { + if (!strncmp(buf,"default-character-set", (pos-buf))) + { + /* + Try character set name, and if it fails + try collation name, probably it's an old + 4.1.0 db.opt file, which didn't have + separate default-character-set and + default-collation commands. + */ + if (!(create->default_table_charset= + get_charset_by_csname(pos+1, MY_CS_PRIMARY, MYF(utf8_flag))) && + !(create->default_table_charset= + get_charset_by_name(pos+1, MYF(utf8_flag)))) + { + sql_print_error("Error while loading database options: '%s':",path); + sql_print_error(ER_THD(thd, ER_UNKNOWN_CHARACTER_SET),pos+1); + create->default_table_charset= default_charset_info; + } + } + else if (!strncmp(buf,"default-collation", (pos-buf))) + { + if (!(create->default_table_charset= get_charset_by_name(pos+1, MYF(utf8_flag)))) + { + sql_print_error("Error while loading database options: '%s':",path); + sql_print_error(ER_THD(thd, ER_UNKNOWN_COLLATION),pos+1); + create->default_table_charset= default_charset_info; + } + } + else if (!strncmp(buf, "comment", (pos-buf))) + create->schema_comment= thd->make_clex_string(pos+1, strlen(pos+1)); + } + } + /* + Put the loaded value into the hash. + Note that another thread could've added the same + entry to the hash after we called get_dbopt(), + but it's not an error, as put_dbopt() takes this + possibility into account. + */ + error= put_dbopt(path, create); + + end_io_cache(&cache); +err2: + mysql_file_close(file, MYF(0)); +err1: + DBUG_RETURN(error); +} + + +/* + Retrieve database options by name. Load database options file or fetch from + cache. + + SYNOPSIS + load_db_opt_by_name() + db_name Database name + db_create_info Where to store the database options + + DESCRIPTION + load_db_opt_by_name() is a shortcut for load_db_opt(). + + NOTE + Although load_db_opt_by_name() (and load_db_opt()) returns status of + the operation, it is useless usually and should be ignored. The problem + is that there are 1) system databases ("mysql") and 2) virtual + databases ("information_schema"), which do not contain options file. + So, load_db_opt[_by_name]() returns FALSE for these databases, but this + is not an error. + + load_db_opt[_by_name]() clears db_create_info structure in any case, so + even on failure it contains valid data. So, common use case is just + call load_db_opt[_by_name]() without checking return value and use + db_create_info right after that. + + RETURN VALUES (read NOTE!) + FALSE Success + TRUE Failed to retrieve options +*/ + +bool load_db_opt_by_name(THD *thd, const char *db_name, + Schema_specification_st *db_create_info) +{ + char db_opt_path[FN_REFLEN + 1]; + + /* + Pass an empty file name, and the database options file name as extension + to avoid table name to file name encoding. + */ + (void) build_table_filename(db_opt_path, sizeof(db_opt_path) - 1, + db_name, "", MY_DB_OPT_FILE, 0); + + return load_db_opt(thd, db_opt_path, db_create_info); +} + + +/** + Return default database collation. + + @param thd Thread context. + @param db_name Database name. + + @return CHARSET_INFO object. The operation always return valid character + set, even if the database does not exist. +*/ + +CHARSET_INFO *get_default_db_collation(THD *thd, const char *db_name) +{ + Schema_specification_st db_info; + + if (thd->db.str != NULL && strcmp(db_name, thd->db.str) == 0) + return thd->db_charset; + + load_db_opt_by_name(thd, db_name, &db_info); + + /* + NOTE: even if load_db_opt_by_name() fails, + db_info.default_table_charset contains valid character set + (collation_server). We should not fail if load_db_opt_by_name() fails, + because it is valid case. If a database has been created just by + "mkdir", it does not contain db.opt file, but it is valid database. + */ + + return db_info.default_table_charset; +} + + +/* + Create a database + + SYNOPSIS + mysql_create_db_internal() + thd Thread handler + db Name of database to create + Function assumes that this is already validated. + options DDL options, e.g. IF NOT EXISTS + create_info Database create options (like character set) + silent Used by replication when internally creating a database. + In this case the entry should not be logged. + + SIDE-EFFECTS + 1. Report back to client that command succeeded (my_ok) + 2. Report errors to client + 3. Log event to binary log + (The 'silent' flags turns off 1 and 3.) + + RETURN VALUES + FALSE ok + TRUE Error + +*/ + +static int +mysql_create_db_internal(THD *thd, const LEX_CSTRING *db, + const DDL_options_st &options, + Schema_specification_st *create_info, + bool silent) +{ + char path[FN_REFLEN+16]; + MY_STAT stat_info; + uint path_len; + DBUG_ENTER("mysql_create_db"); + + /* do not create 'information_schema' db */ + if (is_infoschema_db(db)) + { + my_error(ER_DB_CREATE_EXISTS, MYF(0), db->str); + DBUG_RETURN(-1); + } + + char db_tmp[SAFE_NAME_LEN+1]; + const char *dbnorm= normalize_db_name(db->str, db_tmp, sizeof(db_tmp)); + + if (lock_schema_name(thd, dbnorm)) + DBUG_RETURN(-1); + + /* Check directory */ + path_len= build_table_filename(path, sizeof(path) - 1, db->str, "", "", 0); + path[path_len-1]= 0; // Remove last '/' from path + + long affected_rows= 1; + if (!mysql_file_stat(key_file_misc, path, &stat_info, MYF(0))) + { + // The database directory does not exist, or my_file_stat() failed + if (my_errno != ENOENT) + { + my_error(EE_STAT, MYF(0), path, my_errno); + DBUG_RETURN(1); + } + } + else if (options.or_replace()) + { + if (mysql_rm_db_internal(thd, db, 0, true)) // Removing the old database + DBUG_RETURN(1); + /* + Reset the diagnostics m_status. + It might be set ot DA_OK in mysql_rm_db. + */ + thd->get_stmt_da()->reset_diagnostics_area(); + affected_rows= 2; + } + else if (options.if_not_exists()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DB_CREATE_EXISTS, ER_THD(thd, ER_DB_CREATE_EXISTS), + db->str); + affected_rows= 0; + goto not_silent; + } + else + { + my_error(ER_DB_CREATE_EXISTS, MYF(0), db->str); + DBUG_RETURN(-1); + } + + if (my_mkdir(path, 0777, MYF(0)) < 0) + { + my_error(ER_CANT_CREATE_DB, MYF(0), db->str, my_errno); + DBUG_RETURN(-1); + } + + path[path_len-1]= FN_LIBCHAR; + strmake(path+path_len, MY_DB_OPT_FILE, sizeof(path)-path_len-1); + if (write_db_opt(thd, path, create_info)) + { + /* + Could not create options file. + Restore things to beginning. + */ + path[path_len]= 0; + if (my_rmdir(path) >= 0) + DBUG_RETURN(-1); + /* + We come here when we managed to create the database, but not the option + file. In this case it's best to just continue as if nothing has + happened. (This is a very unlikely senario) + */ + thd->clear_error(); + } + + /* Log command to ddl log */ + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("CREATE") }; + ddl_log.org_storage_engine_name= { C_STRING_WITH_LEN("DATABASE") }; + ddl_log.org_database= *db; + backup_log_ddl(&ddl_log); + +not_silent: + if (!silent) + { + char *query; + uint query_length; + + query= thd->query(); + query_length= thd->query_length(); + DBUG_ASSERT(query); + + if (mysql_bin_log.is_open()) + { + int errcode= query_error_code(thd, TRUE); + Query_log_event qinfo(thd, query, query_length, FALSE, TRUE, + /* suppress_use */ TRUE, errcode); + + /* + Write should use the database being created as the "current + database" and not the threads current database, which is the + default. If we do not change the "current database" to the + database being created, the CREATE statement will not be + replicated when using --binlog-do-db to select databases to be + replicated. + + An example (--binlog-do-db=sisyfos): + + CREATE DATABASE bob; # Not replicated + USE bob; # 'bob' is the current database + CREATE DATABASE sisyfos; # Not replicated since 'bob' is + # current database. + USE sisyfos; # Will give error on slave since + # database does not exist. + */ + qinfo.db = db->str; + qinfo.db_len = (uint32)db->length; + + /* + These DDL methods and logging are protected with the exclusive + metadata lock on the schema + */ + if (mysql_bin_log.write(&qinfo)) + DBUG_RETURN(-1); + } + my_ok(thd, affected_rows); + } + + DBUG_RETURN(0); +} + + +/* db-name is already validated when we come here */ + +static bool +mysql_alter_db_internal(THD *thd, const LEX_CSTRING *db, + Schema_specification_st *create_info) +{ + char path[FN_REFLEN+16]; + long result=1; + int error= 0; + DBUG_ENTER("mysql_alter_db"); + + char dbnorm_buffer[SAFE_NAME_LEN + 1]; + const char *dbnorm= normalize_db_name(db->str, dbnorm_buffer, + sizeof(dbnorm_buffer)); + if (lock_schema_name(thd, dbnorm)) + DBUG_RETURN(TRUE); + + /* + Recreate db options file: /dbpath/.db.opt + We pass MY_DB_OPT_FILE as "extension" to avoid + "table name to file name" encoding. + */ + build_table_filename(path, sizeof(path) - 1, db->str, "", MY_DB_OPT_FILE, 0); + if (unlikely((error=write_db_opt(thd, path, create_info)))) + goto exit; + + /* Change options if current database is being altered. */ + + if (thd->db.str && !cmp(&thd->db, db)) + { + thd->db_charset= create_info->default_table_charset ? + create_info->default_table_charset : + thd->variables.collation_server; + thd->variables.collation_database= thd->db_charset; + } + + /* Log command to ddl log */ + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("ALTER") }; + ddl_log.org_storage_engine_name= { C_STRING_WITH_LEN("DATABASE") }; + ddl_log.org_database= *db; + backup_log_ddl(&ddl_log); + + if (mysql_bin_log.is_open()) + { + int errcode= query_error_code(thd, TRUE); + Query_log_event qinfo(thd, thd->query(), thd->query_length(), FALSE, TRUE, + /* suppress_use */ TRUE, errcode); + /* + Write should use the database being created as the "current + database" and not the threads current database, which is the + default. + */ + qinfo.db= db->str; + qinfo.db_len= (uint)db->length; + + /* + These DDL methods and logging are protected with the exclusive + metadata lock on the schema. + */ + if (unlikely((error= mysql_bin_log.write(&qinfo)))) + goto exit; + } + my_ok(thd, result); + +exit: + DBUG_RETURN(error); +} + + +int mysql_create_db(THD *thd, const LEX_CSTRING *db, DDL_options_st options, + const Schema_specification_st *create_info) +{ + DBUG_ASSERT(create_info->default_table_charset); + /* + As mysql_create_db_internal() may modify Db_create_info structure passed + to it, we need to use a copy to make execution prepared statement- safe. + */ + Schema_specification_st tmp(*create_info); + if (thd->slave_thread && + slave_ddl_exec_mode_options == SLAVE_EXEC_MODE_IDEMPOTENT) + options.add(DDL_options::OPT_IF_NOT_EXISTS); + return mysql_create_db_internal(thd, db, options, &tmp, false); +} + + +bool mysql_alter_db(THD *thd, const LEX_CSTRING *db, + const Schema_specification_st *create_info) +{ + DBUG_ASSERT(create_info->default_table_charset); + /* + As mysql_alter_db_internal() may modify Db_create_info structure passed + to it, we need to use a copy to make execution prepared statement- safe. + */ + Schema_specification_st tmp(*create_info); + return mysql_alter_db_internal(thd, db, &tmp); +} + + +/** + Drop database objects + + @param thd THD object + @param path Path to database (for ha_drop_database) + @param db Normalized database name + @param rm_mysql_schema If the schema is 'mysql', in which case we don't + log the query to binary log or delete related + routines or events. +*/ + +void drop_database_objects(THD *thd, const LEX_CSTRING *path, + const LEX_CSTRING *db, + bool rm_mysql_schema) +{ + debug_crash_here("ddl_log_drop_before_ha_drop_database"); + + ha_drop_database(path->str); + + /* + We temporarily disable the binary log while dropping the objects + in the database. Since the DROP DATABASE statement is always + replicated as a statement, execution of it will drop all objects + in the database on the slave as well, so there is no need to + replicate the removal of the individual objects in the database + as well. + + This is more of a safety precaution, since normally no objects + should be dropped while the database is being cleaned, but in + the event that a change in the code to remove other objects is + made, these drops should still not be logged. + */ + + debug_crash_here("ddl_log_drop_before_drop_db_routines"); + + query_cache_invalidate1(thd, db->str); + + if (!rm_mysql_schema) + { + tmp_disable_binlog(thd); + (void) sp_drop_db_routines(thd, db->str); /* @todo Do not ignore errors */ +#ifdef HAVE_EVENT_SCHEDULER + Events::drop_schema_events(thd, db->str); +#endif + reenable_binlog(thd); + } + debug_crash_here("ddl_log_drop_after_drop_db_routines"); +} + + +/** + Drop all tables, routines and events in a database and the database itself. + + @param thd Thread handle + @param db Database name in the case given by user + It's already validated and set to lower case + (if needed) when we come here + @param if_exists Don't give error if database doesn't exists + @param silent Don't write the statement to the binary log and don't + send ok packet to the client + + @retval false OK (Database dropped) + @retval true Error +*/ + +static bool +mysql_rm_db_internal(THD *thd, const LEX_CSTRING *db, bool if_exists, + bool silent) +{ + ulong deleted_tables= 0; + bool error= true, rm_mysql_schema; + char path[FN_REFLEN + 16]; + MY_DIR *dirp; + uint path_length; + TABLE_LIST *tables= NULL; + TABLE_LIST *table; + DDL_LOG_STATE ddl_log_state; + Drop_table_error_handler err_handler; + LEX_CSTRING rm_db; + char db_tmp[SAFE_NAME_LEN+1]; + const char *dbnorm; + DBUG_ENTER("mysql_rm_db"); + + dbnorm= normalize_db_name(db->str, db_tmp, sizeof(db_tmp)); + lex_string_set(&rm_db, dbnorm); + bzero(&ddl_log_state, sizeof(ddl_log_state)); + + if (lock_schema_name(thd, dbnorm)) + DBUG_RETURN(true); + + path_length= build_table_filename(path, sizeof(path) - 1, db->str, "", "", 0); + + /* See if the directory exists */ + if (!(dirp= my_dir(path,MYF(MY_DONT_SORT)))) + { + if (!if_exists) + { + my_error(ER_DB_DROP_EXISTS, MYF(0), db->str); + DBUG_RETURN(true); + } + else + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DB_DROP_EXISTS, ER_THD(thd, ER_DB_DROP_EXISTS), + db->str); + error= false; + goto update_binlog; + } + } + + if (find_db_tables_and_rm_known_files(thd, dirp, dbnorm, path, &tables)) + goto exit; + + /* + Disable drop of enabled log tables, must be done before name locking. + This check is only needed if we are dropping the "mysql" database. + */ + if ((rm_mysql_schema= + (my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db->str) == 0))) + { + for (table= tables; table; table= table->next_local) + if (check_if_log_table(table, TRUE, "DROP")) + goto exit; + } + + /* Lock all tables and stored routines about to be dropped. */ + if (lock_table_names(thd, tables, NULL, thd->variables.lock_wait_timeout, + 0) || + lock_db_routines(thd, dbnorm)) + goto exit; + + if (!rm_mysql_schema) + { + for (table= tables; table; table= table->next_local) + { + if (table->open_type == OT_BASE_ONLY || + !thd->find_temporary_table(table)) + (void) delete_statistics_for_table(thd, &table->db, &table->table_name); + } + } + + /* + Close active HANDLER's for tables in the database. + Note that mysql_ha_rm_tables() requires a non-null TABLE_LIST. + */ + if (tables) + mysql_ha_rm_tables(thd, tables); + + for (table= tables; table; table= table->next_local) + deleted_tables++; + + thd->push_internal_handler(&err_handler); + if (!thd->killed && + !(tables && + mysql_rm_table_no_locks(thd, tables, &rm_db, &ddl_log_state, true, false, + true, false, true, false))) + { + debug_crash_here("ddl_log_drop_after_drop_tables"); + + LEX_CSTRING cpath{ path, path_length}; + ddl_log_drop_db(&ddl_log_state, &rm_db, &cpath); + + drop_database_objects(thd, &cpath, &rm_db, rm_mysql_schema); + + /* + Now remove the db.opt file. + The 'find_db_tables_and_rm_known_files' doesn't remove this file + if there exists a table with the name 'db', so let's just do it + separately. We know this file exists and needs to be deleted anyway. + */ + debug_crash_here("ddl_log_drop_before_drop_option_file"); + strmov(path+path_length, MY_DB_OPT_FILE); // Append db option file name + if (mysql_file_delete_with_symlink(key_file_misc, path, "", MYF(0)) && + my_errno != ENOENT) + { + thd->pop_internal_handler(); + my_error(EE_DELETE, MYF(0), path, my_errno); + error= true; + ddl_log_complete(&ddl_log_state); + goto end; + } + del_dbopt(path); // Remove dboption hash entry + path[path_length]= '\0'; // Remove file name + + /* + If the directory is a symbolic link, remove the link first, then + remove the directory the symbolic link pointed at + */ + debug_crash_here("ddl_log_drop_before_drop_dir"); + error= rm_dir_w_symlink(path, true); + debug_crash_here("ddl_log_drop_after_drop_dir"); + } + + thd->pop_internal_handler(); + +update_binlog: + if (likely(!error)) + { + /* Log command to ddl log */ + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("DROP") }; + ddl_log.org_storage_engine_name= { C_STRING_WITH_LEN("DATABASE") }; + ddl_log.org_database= *db; + backup_log_ddl(&ddl_log); + } + + if (!silent && likely(!error)) + { + const char *query; + ulong query_length; + + query= thd->query(); + query_length= thd->query_length(); + DBUG_ASSERT(query); + + if (mysql_bin_log.is_open()) + { + int errcode= query_error_code(thd, TRUE); + int res; + Query_log_event qinfo(thd, query, query_length, FALSE, TRUE, + /* suppress_use */ TRUE, errcode); + /* + Write should use the database being created as the "current + database" and not the threads current database, which is the + default. + */ + qinfo.db = db->str; + qinfo.db_len = (uint32)db->length; + + /* + These DDL methods and logging are protected with the exclusive + metadata lock on the schema. + */ + debug_crash_here("ddl_log_drop_before_binlog"); + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + res= mysql_bin_log.write(&qinfo); + thd->binlog_xid= 0; + debug_crash_here("ddl_log_drop_after_binlog"); + + if (res) + { + error= true; + goto exit; + } + } + thd->clear_error(); + thd->server_status|= SERVER_STATUS_DB_DROPPED; + my_ok(thd, deleted_tables); + } + else if (mysql_bin_log.is_open() && !silent) + { + char *query, *query_pos, *query_end, *query_data_start; + TABLE_LIST *tbl; + + if (!(query= (char*) thd->alloc(MAX_DROP_TABLE_Q_LEN))) + goto exit; /* not much else we can do */ + query_pos= query_data_start= strmov(query,"DROP TABLE IF EXISTS "); + query_end= query + MAX_DROP_TABLE_Q_LEN; + + for (tbl= tables; tbl; tbl= tbl->next_local) + { + size_t tbl_name_len; + char quoted_name[FN_REFLEN+3]; + + // Only write drop table to the binlog for tables that no longer exist. + if (ha_table_exists(thd, &tbl->db, &tbl->table_name)) + continue; + + tbl_name_len= my_snprintf(quoted_name, sizeof(quoted_name), "%`s", + tbl->table_name.str); + tbl_name_len++; /* +1 for the comma */ + if (query_pos + tbl_name_len + 1 >= query_end) + { + /* + These DDL methods and logging are protected with the exclusive + metadata lock on the schema. + */ + if (write_to_binlog(thd, query, (uint)(query_pos -1 - query), db->str, db->length)) + { + error= true; + goto exit; + } + query_pos= query_data_start; + } + + query_pos= strmov(query_pos, quoted_name); + *query_pos++ = ','; + } + + if (query_pos != query_data_start) // If database was not empty + { + int res; + /* + These DDL methods and logging are protected with the exclusive + metadata lock on the schema. + */ + debug_crash_here("ddl_log_drop_before_binlog"); + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + res= write_to_binlog(thd, query, (uint)(query_pos -1 - query), db->str, + db->length); + thd->binlog_xid= 0; + debug_crash_here("ddl_log_drop_after_binlog"); + if (res) + { + error= true; + goto exit; + } + } + } + +exit: + ddl_log_complete(&ddl_log_state); + /* + If this database was the client's selected database, we silently + change the client's selected database to nothing (to have an empty + SELECT DATABASE() in the future). For this we free() thd->db and set + it to 0. + */ + if (unlikely(thd->db.str && cmp_db_names(&thd->db, db) && !error)) + { + mysql_change_db_impl(thd, NULL, NO_ACL, thd->variables.collation_server); + thd->session_tracker.current_schema.mark_as_changed(thd); + } +end: + my_dirend(dirp); + DBUG_RETURN(error); +} + + +bool mysql_rm_db(THD *thd, const LEX_CSTRING *db, bool if_exists) +{ + if (thd->slave_thread && + slave_ddl_exec_mode_options == SLAVE_EXEC_MODE_IDEMPOTENT) + if_exists= true; + return mysql_rm_db_internal(thd, db, if_exists, false); +} + + +static bool find_db_tables_and_rm_known_files(THD *thd, MY_DIR *dirp, + const char *dbname, + const char *path, + TABLE_LIST **tables) +{ + char filePath[FN_REFLEN]; + LEX_CSTRING db= { dbname, strlen(dbname) }; + TABLE_LIST *tot_list=0, **tot_list_next_local, **tot_list_next_global; + DBUG_ENTER("find_db_tables_and_rm_known_files"); + DBUG_PRINT("enter",("path: %s", path)); + + /* first, get the list of tables */ + Dynamic_array files(PSI_INSTRUMENT_MEM, dirp->number_of_files); + Discovered_table_list tl(thd, &files); + if (ha_discover_table_names(thd, &db, dirp, &tl, true)) + DBUG_RETURN(1); + + /* Now put the tables in the list */ + tot_list_next_local= tot_list_next_global= &tot_list; + + for (size_t idx=0; idx < files.elements(); idx++) + { + LEX_CSTRING *table= files.at(idx); + + /* Drop the table nicely */ + TABLE_LIST *table_list=(TABLE_LIST*)thd->calloc(sizeof(*table_list)); + + if (!table_list) + DBUG_RETURN(true); + table_list->db= db; + table_list->table_name= *table; + table_list->open_type= OT_BASE_ONLY; + + /* + On the case-insensitive file systems table is opened + with the lowercased file name. So we should lowercase + as well to look up the cache properly. + */ + if (lower_case_file_system) + table_list->table_name.length= my_casedn_str(files_charset_info, + (char*) table_list->table_name.str); + + table_list->alias= table_list->table_name; // If lower_case_table_names=2 + MDL_REQUEST_INIT(&table_list->mdl_request, MDL_key::TABLE, + table_list->db.str, table_list->table_name.str, + MDL_EXCLUSIVE, MDL_TRANSACTION); + /* Link into list */ + (*tot_list_next_local)= table_list; + (*tot_list_next_global)= table_list; + tot_list_next_local= &table_list->next_local; + tot_list_next_global= &table_list->next_global; + } + *tables= tot_list; + + /* and at last delete all non-table files */ + for (size_t idx=0; idx < dirp->number_of_files && !thd->killed; idx++) + { + FILEINFO *file=dirp->dir_entry+idx; + char *extension; + DBUG_PRINT("info",("Examining: %s", file->name)); + + if (file->name[0] == 'a' && file->name[1] == 'r' && + file->name[2] == 'c' && file->name[3] == '\0') + { + /* .frm archive: + Those archives are obsolete, but following code should + exist to remove existent "arc" directories. + */ + char newpath[FN_REFLEN]; + MY_DIR *new_dirp; + strxmov(newpath, path, "/", "arc", NullS); + (void) unpack_filename(newpath, newpath); + if ((new_dirp = my_dir(newpath, MYF(MY_DONT_SORT)))) + { + DBUG_PRINT("my",("Archive subdir found: %s", newpath)); + if ((mysql_rm_arc_files(thd, new_dirp, newpath)) < 0) + DBUG_RETURN(true); + } + continue; + } + if (!(extension= strrchr(file->name, '.'))) + extension= strend(file->name); + if (find_type(extension, &deletable_extensions, FIND_TYPE_NO_PREFIX) > 0) + { + strxmov(filePath, path, "/", file->name, NullS); + /* + We ignore ENOENT error in order to skip files that was deleted + by concurrently running statement like REPAIR TABLE ... + */ + if (mysql_file_delete_with_symlink(key_file_misc, filePath, "", MYF(0)) && + my_errno != ENOENT) + { + my_error(EE_DELETE, MYF(0), filePath, my_errno); + DBUG_RETURN(true); + } + } + } + + DBUG_RETURN(false); +} + + +/* + Remove directory with symlink + + SYNOPSIS + rm_dir_w_symlink() + org_path path of derictory + send_error send errors + RETURN + 0 OK + 1 ERROR +*/ + +my_bool rm_dir_w_symlink(const char *org_path, my_bool send_error) +{ + char tmp_path[FN_REFLEN], *pos; + char *path= tmp_path; + DBUG_ENTER("rm_dir_w_symlink"); + unpack_filename(tmp_path, org_path); + + /* Remove end FN_LIBCHAR as this causes problem on Linux and OS/2 */ + pos= strend(path); + if (pos > path && pos[-1] == FN_LIBCHAR) + *--pos=0; + +#ifdef HAVE_READLINK + int error; + char tmp2_path[FN_REFLEN]; + + if (unlikely((error= my_readlink(tmp2_path, path, + MYF(send_error ? MY_WME : 0))) < 0)) + DBUG_RETURN(1); + if (likely(!error)) + { + if (mysql_file_delete(key_file_misc, path, MYF(send_error ? MY_WME : 0))) + { + DBUG_RETURN(send_error); + } + /* Delete directory symbolic link pointed at */ + path= tmp2_path; + } +#endif + + if (unlikely(my_rmdir(path) < 0 && send_error)) + { + my_error(ER_DB_DROP_RMDIR, MYF(0), path, errno); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + Remove .frm archives from directory + + SYNOPSIS + thd thread handler + dirp list of files in archive directory + db data base name + org_path path of archive directory + + RETURN + > 0 number of removed files + -1 error + + NOTE + A support of "arc" directories is obsolete, however this + function should exist to remove existent "arc" directories. +*/ +long mysql_rm_arc_files(THD *thd, MY_DIR *dirp, const char *org_path) +{ + long deleted= 0; + ulong found_other_files= 0; + char filePath[FN_REFLEN]; + DBUG_ENTER("mysql_rm_arc_files"); + DBUG_PRINT("enter", ("path: %s", org_path)); + + for (size_t idx=0; idx < dirp->number_of_files && !thd->killed; idx++) + { + FILEINFO *file=dirp->dir_entry+idx; + char *extension, *revision; + DBUG_PRINT("info",("Examining: %s", file->name)); + + extension= fn_ext(file->name); + if (extension[0] != '.' || + extension[1] != 'f' || extension[2] != 'r' || + extension[3] != 'm' || extension[4] != '-') + { + found_other_files++; + continue; + } + revision= extension+5; + while (*revision && my_isdigit(system_charset_info, *revision)) + revision++; + if (*revision) + { + found_other_files++; + continue; + } + strxmov(filePath, org_path, "/", file->name, NullS); + if (mysql_file_delete_with_symlink(key_file_misc, filePath, "", MYF(MY_WME))) + { + goto err; + } + deleted++; + } + if (thd->killed) + goto err; + + my_dirend(dirp); + + /* + If the directory is a symbolic link, remove the link first, then + remove the directory the symbolic link pointed at + */ + if (!found_other_files && + rm_dir_w_symlink(org_path, 0)) + DBUG_RETURN(-1); + DBUG_RETURN(deleted); + +err: + my_dirend(dirp); + DBUG_RETURN(-1); +} + + +/** + @brief Internal implementation: switch current database to a valid one. + + @param thd Thread context. + @param new_db_name Name of the database to switch to. The function will + take ownership of the name (the caller must not free + the allocated memory). If the name is NULL, we're + going to switch to NULL db. + @param new_db_access Privileges of the new database. + @param new_db_charset Character set of the new database. +*/ + +static void mysql_change_db_impl(THD *thd, + LEX_CSTRING *new_db_name, + privilege_t new_db_access, + CHARSET_INFO *new_db_charset) +{ + /* 1. Change current database in THD. */ + + if (new_db_name == NULL) + { + /* + THD::set_db() does all the job -- it frees previous database name and + sets the new one. + */ + + thd->set_db(&null_clex_str); + } + else if (new_db_name->str == INFORMATION_SCHEMA_NAME.str) + { + /* + Here we must use THD::set_db(), because we want to copy + INFORMATION_SCHEMA_NAME constant. + */ + + thd->set_db(&INFORMATION_SCHEMA_NAME); + } + else + { + /* + Here we already have a copy of database name to be used in THD. So, + we just call THD::reset_db(). Since THD::reset_db() does not releases + the previous database name, we should do it explicitly. + */ + thd->set_db(&null_clex_str); + thd->reset_db(new_db_name); + } + + /* 2. Update security context. */ + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + thd->security_ctx->db_access= new_db_access; +#endif + + /* 3. Update db-charset environment variables. */ + + thd->db_charset= new_db_charset; + thd->variables.collation_database= new_db_charset; +} + + + +/** + Backup the current database name before switch. + + @param[in] thd thread handle + @param[in, out] saved_db_name IN: "str" points to a buffer where to store + the old database name, "length" contains the + buffer size + OUT: if the current (default) database is + not NULL, its name is copied to the + buffer pointed at by "str" + and "length" is updated accordingly. + Otherwise "str" is set to NULL and + "length" is set to 0. +*/ + +static void backup_current_db_name(THD *thd, + LEX_STRING *saved_db_name) +{ + DBUG_ASSERT(saved_db_name->length >= SAFE_NAME_LEN +1); + if (!thd->db.str) + { + /* No current (default) database selected. */ + saved_db_name->str= 0; + saved_db_name->length= 0; + } + else + { + memcpy(saved_db_name->str, thd->db.str, thd->db.length + 1); + saved_db_name->length= thd->db.length; + } +} + + +/** + @brief Change the current database and its attributes unconditionally. + + @param thd thread handle + @param new_db_name database name + @param force_switch if force_switch is FALSE, then the operation will fail if + + - new_db_name is NULL or empty; + + - OR new database name is invalid + (check_db_name() failed); + + - OR user has no privilege on the new database; + + - OR new database does not exist; + + if force_switch is TRUE, then + + - if new_db_name is NULL or empty, the current + database will be NULL, @@collation_database will + be set to @@collation_server, the operation will + succeed. + + - if new database name is invalid + (check_db_name() failed), the current database + will be NULL, @@collation_database will be set to + @@collation_server, but the operation will fail; + + - user privileges will not be checked + (THD::db_access however is updated); + + TODO: is this really the intention? + (see sp-security.test). + + - if new database does not exist,the current database + will be NULL, @@collation_database will be set to + @@collation_server, a warning will be thrown, the + operation will succeed. + + @details The function checks that the database name corresponds to a + valid and existent database, checks access rights and changes the current + database with database attributes (@@collation_database session variable, + THD::db_access). + + This function is not the only way to switch the database that is + currently employed. When the replication slave thread switches the + database before executing a query, it calls thd->set_db directly. + However, if the query, in turn, uses a stored routine, the stored routine + will use this function, even if it's run on the slave. + + This function allocates the name of the database on the system heap: this + is necessary to be able to uniformly change the database from any module + of the server. Up to 5.0 different modules were using different memory to + store the name of the database, and this led to memory corruption: + a stack pointer set by Stored Procedures was used by replication after + the stack address was long gone. + + @return error code (ER_XXX) + @retval 0 Success + @retval >0 Error +*/ + +uint mysql_change_db(THD *thd, const LEX_CSTRING *new_db_name, + bool force_switch) +{ + LEX_CSTRING new_db_file_name; + + Security_context *sctx= thd->security_ctx; + privilege_t db_access(sctx->db_access); + CHARSET_INFO *db_default_cl; + DBUG_ENTER("mysql_change_db"); + + if (new_db_name->length == 0) + { + if (force_switch) + { + /* + This can happen only if we're switching the current database back + after loading stored program. The thing is that loading of stored + program can happen when there is no current database. + + In case of stored program, new_db_name->str == "" and + new_db_name->length == 0. + */ + + mysql_change_db_impl(thd, NULL, NO_ACL, thd->variables.collation_server); + + goto done; + } + else + { + my_message(ER_NO_DB_ERROR, ER_THD(thd, ER_NO_DB_ERROR), MYF(0)); + + DBUG_RETURN(ER_NO_DB_ERROR); + } + } + DBUG_PRINT("enter",("name: '%s'", new_db_name->str)); + + if (is_infoschema_db(new_db_name)) + { + /* Switch the current database to INFORMATION_SCHEMA. */ + + mysql_change_db_impl(thd, &INFORMATION_SCHEMA_NAME, SELECT_ACL, + system_charset_info); + goto done; + } + + /* + Now we need to make a copy because check_db_name requires a + non-constant argument. Actually, it takes database file name. + + TODO: fix check_db_name(). + */ + + new_db_file_name.str= my_strndup(key_memory_THD_db, new_db_name->str, + new_db_name->length, MYF(MY_WME)); + new_db_file_name.length= new_db_name->length; + + if (new_db_file_name.str == NULL) + DBUG_RETURN(ER_OUT_OF_RESOURCES); /* the error is set */ + + /* + NOTE: if check_db_name() fails, we should throw an error in any case, + even if we are called from sp_head::execute(). + + It's next to impossible however to get this error when we are called + from sp_head::execute(). But let's switch the current database to NULL + in this case to be sure. + The cast below ok here as new_db_file_name was just allocated + */ + + if (check_db_name((LEX_STRING*) &new_db_file_name)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), new_db_file_name.str); + my_free(const_cast(new_db_file_name.str)); + + if (force_switch) + mysql_change_db_impl(thd, NULL, NO_ACL, thd->variables.collation_server); + + DBUG_RETURN(ER_WRONG_DB_NAME); + } + + DBUG_PRINT("info",("Use database: %s", new_db_file_name.str)); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (test_all_bits(sctx->master_access, DB_ACLS)) + { + db_access= DB_ACLS; + } + else + { + db_access= acl_get_all3(sctx, new_db_file_name.str, FALSE); + db_access|= sctx->master_access; + } + + if (!force_switch && + !(db_access & DB_ACLS) && + check_grant_db(thd, new_db_file_name.str)) + { + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, + sctx->priv_host, + new_db_file_name.str); + general_log_print(thd, COM_INIT_DB, ER_THD(thd, ER_DBACCESS_DENIED_ERROR), + sctx->priv_user, sctx->priv_host, new_db_file_name.str); + my_free(const_cast(new_db_file_name.str)); + DBUG_RETURN(ER_DBACCESS_DENIED_ERROR); + } +#endif + + DEBUG_SYNC(thd, "before_db_dir_check"); + + if (check_db_dir_existence(new_db_file_name.str)) + { + if (force_switch) + { + /* Throw a warning and free new_db_file_name. */ + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_BAD_DB_ERROR, ER_THD(thd, ER_BAD_DB_ERROR), + new_db_file_name.str); + + my_free(const_cast(new_db_file_name.str)); + + /* Change db to NULL. */ + + mysql_change_db_impl(thd, NULL, NO_ACL, thd->variables.collation_server); + + /* The operation succeed. */ + goto done; + } + else + { + /* Report an error and free new_db_file_name. */ + + my_error(ER_BAD_DB_ERROR, MYF(0), new_db_file_name.str); + my_free(const_cast(new_db_file_name.str)); + + /* The operation failed. */ + + DBUG_RETURN(ER_BAD_DB_ERROR); + } + } + + /* + NOTE: in mysql_change_db_impl() new_db_file_name is assigned to THD + attributes and will be freed in THD::~THD(). + */ + + db_default_cl= get_default_db_collation(thd, new_db_file_name.str); + + mysql_change_db_impl(thd, &new_db_file_name, db_access, db_default_cl); + +done: + thd->session_tracker.current_schema.mark_as_changed(thd); + thd->session_tracker.state_change.mark_as_changed(thd); + DBUG_RETURN(0); +} + + +/** + Change the current database and its attributes if needed. + + @param thd thread handle + @param new_db_name database name + @param[in, out] saved_db_name IN: "str" points to a buffer where to store + the old database name, "length" contains the + buffer size + OUT: if the current (default) database is + not NULL, its name is copied to the + buffer pointed at by "str" + and "length" is updated accordingly. + Otherwise "str" is set to NULL and + "length" is set to 0. + @param force_switch @see mysql_change_db() + @param[out] cur_db_changed out-flag to indicate whether the current + database has been changed (valid only if + the function suceeded) +*/ + +bool mysql_opt_change_db(THD *thd, + const LEX_CSTRING *new_db_name, + LEX_STRING *saved_db_name, + bool force_switch, + bool *cur_db_changed) +{ + *cur_db_changed= !cmp_db_names(&thd->db, new_db_name); + + if (!*cur_db_changed) + return FALSE; + + backup_current_db_name(thd, saved_db_name); + + return mysql_change_db(thd, new_db_name, force_switch); +} + + +/** + Upgrade a 5.0 database. + This function is invoked whenever an ALTER DATABASE UPGRADE query is executed: + ALTER DATABASE 'olddb' UPGRADE DATA DIRECTORY NAME. + + If we have managed to rename (move) tables to the new database + but something failed on a later step, then we store the + RENAME DATABASE event in the log. mysql_rename_db() is atomic in + the sense that it will rename all or none of the tables. + + @param thd Current thread + @param old_db 5.0 database name, in #mysql50#name format + @return 0 on success, 1 on error +*/ + +bool mysql_upgrade_db(THD *thd, const LEX_CSTRING *old_db) +{ + bool error= 0, change_to_newdb= 0; + char path[FN_REFLEN+16]; + uint length; + Schema_specification_st create_info; + MY_DIR *dirp; + TABLE_LIST *table_list; + SELECT_LEX *sl= thd->lex->current_select; + LEX_CSTRING new_db; + DBUG_ENTER("mysql_upgrade_db"); + + if ((old_db->length <= MYSQL50_TABLE_NAME_PREFIX_LENGTH) || + (strncmp(old_db->str, + MYSQL50_TABLE_NAME_PREFIX, + MYSQL50_TABLE_NAME_PREFIX_LENGTH) != 0)) + { + my_error(ER_WRONG_USAGE, MYF(0), + "ALTER DATABASE UPGRADE DATA DIRECTORY NAME", + "name"); + DBUG_RETURN(1); + } + + /* `#mysql50#` converted to encoded `` */ + new_db.str= old_db->str + MYSQL50_TABLE_NAME_PREFIX_LENGTH; + new_db.length= old_db->length - MYSQL50_TABLE_NAME_PREFIX_LENGTH; + + char dbnorm_buffer_old[SAFE_NAME_LEN + 1]; + const char *old_dbnorm= normalize_db_name(old_db->str, dbnorm_buffer_old, + sizeof(dbnorm_buffer_old)); + + /* Lock the old name, the new name will be locked by mysql_create_db().*/ + if (lock_schema_name(thd, old_dbnorm)) + DBUG_RETURN(1); + + /* + Let's remember if we should do "USE newdb" afterwards. + thd->db will be cleared in mysql_rename_db() + */ + if (thd->db.str && !cmp(&thd->db, old_db)) + change_to_newdb= 1; + + build_table_filename(path, sizeof(path)-1, + old_db->str, "", MY_DB_OPT_FILE, 0); + if ((load_db_opt(thd, path, &create_info))) + create_info.default_table_charset= thd->variables.collation_server; + + length= build_table_filename(path, sizeof(path)-1, old_db->str, "", "", 0); + if (length && path[length-1] == FN_LIBCHAR) + path[length-1]=0; // remove ending '\' + if (unlikely((error= my_access(path,F_OK)))) + { + my_error(ER_BAD_DB_ERROR, MYF(0), old_db->str); + goto exit; + } + + /* Step1: Create the new database */ + if (unlikely((error= mysql_create_db_internal(thd, &new_db, + DDL_options(), &create_info, + 1)))) + goto exit; + + /* Step2: Move tables to the new database */ + if ((dirp = my_dir(path,MYF(MY_DONT_SORT)))) + { + size_t nfiles= dirp->number_of_files; + for (size_t idx=0 ; idx < nfiles && !thd->killed ; idx++) + { + FILEINFO *file= dirp->dir_entry + idx; + char *extension, tname[FN_REFLEN + 1]; + LEX_CSTRING table_str; + DBUG_PRINT("info",("Examining: %s", file->name)); + + /* skiping non-FRM files */ + if (!(extension= (char*) fn_frm_ext(file->name))) + continue; + + /* A frm file found, add the table info rename list */ + *extension= '\0'; + + table_str.length= filename_to_tablename(file->name, + tname, sizeof(tname)-1); + table_str.str= (char*) thd->memdup(tname, table_str.length + 1); + Table_ident *old_ident= new Table_ident(thd, old_db, &table_str, 0); + Table_ident *new_ident= new Table_ident(thd, &new_db, &table_str, 0); + if (!old_ident || !new_ident || + !sl->add_table_to_list(thd, old_ident, NULL, + TL_OPTION_UPDATING, TL_IGNORE, + MDL_EXCLUSIVE) || + !sl->add_table_to_list(thd, new_ident, NULL, + TL_OPTION_UPDATING, TL_IGNORE, + MDL_EXCLUSIVE)) + { + error= 1; + my_dirend(dirp); + goto exit; + } + } + my_dirend(dirp); + } + + if ((table_list= thd->lex->query_tables) && + (error= mysql_rename_tables(thd, table_list, 1, 0))) + { + /* + Failed to move all tables from the old database to the new one. + In the best case mysql_rename_tables() moved all tables back to the old + database. In the worst case mysql_rename_tables() moved some tables + to the new database, then failed, then started to move the tables back, + and then failed again. In this situation we have some tables in the + old database and some tables in the new database. + Let's delete the option file, and then the new database directory. + If some tables were left in the new directory, rmdir() will fail. + It garantees we never loose any tables. + */ + build_table_filename(path, sizeof(path)-1, + new_db.str,"",MY_DB_OPT_FILE, 0); + mysql_file_delete(key_file_dbopt, path, MYF(MY_WME)); + length= build_table_filename(path, sizeof(path)-1, new_db.str, "", "", 0); + if (length && path[length-1] == FN_LIBCHAR) + path[length-1]=0; // remove ending '\' + my_rmdir(path); + goto exit; + } + + + /* + Step3: move all remaining files to the new db's directory. + Skip db opt file: it's been created by mysql_create_db() in + the new directory, and will be dropped by mysql_rm_db() in the old one. + Trigger TRN and TRG files are be moved as regular files at the moment, + without any special treatment. + + Triggers without explicit database qualifiers in table names work fine: + use d1; + create trigger trg1 before insert on t2 for each row set @a:=1 + rename database d1 to d2; + + TODO: Triggers, having the renamed database explicitly written + in the table qualifiers. + 1. when the same database is renamed: + create trigger d1.trg1 before insert on d1.t1 for each row set @a:=1; + rename database d1 to d2; + Problem: After database renaming, the trigger's body + still points to the old database d1. + 2. when another database is renamed: + create trigger d3.trg1 before insert on d3.t1 for each row + insert into d1.t1 values (...); + rename database d1 to d2; + Problem: After renaming d1 to d2, the trigger's body + in the database d3 still points to database d1. + */ + + if ((dirp = my_dir(path,MYF(MY_DONT_SORT)))) + { + size_t nfiles= dirp->number_of_files; + for (size_t idx=0 ; idx < nfiles ; idx++) + { + FILEINFO *file= dirp->dir_entry + idx; + char oldname[FN_REFLEN + 1], newname[FN_REFLEN + 1]; + DBUG_PRINT("info",("Examining: %s", file->name)); + + /* skiping MY_DB_OPT_FILE */ + if (!my_strcasecmp(files_charset_info, file->name, MY_DB_OPT_FILE)) + continue; + + /* pass empty file name, and file->name as extension to avoid encoding */ + build_table_filename(oldname, sizeof(oldname)-1, + old_db->str, "", file->name, 0); + build_table_filename(newname, sizeof(newname)-1, + new_db.str, "", file->name, 0); + mysql_file_rename(key_file_misc, oldname, newname, MYF(MY_WME)); + } + my_dirend(dirp); + } + + /* + Step7: drop the old database. + query_cache_invalidate(olddb) is done inside mysql_rm_db(), no need + to execute them again. + mysql_rm_db() also "unuses" if we drop the current database. + */ + error= mysql_rm_db_internal(thd, old_db, 0, true); + + /* Step8: logging */ + if (mysql_bin_log.is_open()) + { + int errcode= query_error_code(thd, TRUE); + Query_log_event qinfo(thd, thd->query(), thd->query_length(), + FALSE, TRUE, TRUE, errcode); + thd->clear_error(); + error|= mysql_bin_log.write(&qinfo); + } + + /* Step9: Let's do "use newdb" if we renamed the current database */ + if (change_to_newdb) + error|= mysql_change_db(thd, & new_db, FALSE) != 0; + +exit: + DBUG_RETURN(error); +} + + + +/* + Check if there is directory for the database name. + + SYNOPSIS + check_db_dir_existence() + db_name database name + + RETURN VALUES + FALSE There is directory for the specified database name. + TRUE The directory does not exist. +*/ + + +bool check_db_dir_existence(const char *db_name) +{ + char db_dir_path[FN_REFLEN + 1]; + uint db_dir_path_len; + + if (dbname_cache->contains(db_name)) + return 0; + + db_dir_path_len= build_table_filename(db_dir_path, sizeof(db_dir_path) - 1, + db_name, "", "", 0); + + if (db_dir_path_len && db_dir_path[db_dir_path_len - 1] == FN_LIBCHAR) + db_dir_path[db_dir_path_len - 1]= 0; + + /* + Check access. + + The locking is to prevent creating permanent stale + entries for deleted databases, in case of + race condition with my_rmdir. + */ + mysql_rwlock_rdlock(&rmdir_lock); + int ret= my_access(db_dir_path, F_OK); + if (!ret) + dbname_cache->insert(db_name); + mysql_rwlock_unlock(&rmdir_lock); + return ret; +} + + +const char *normalize_db_name(const char *db, char *buffer, size_t buffer_size) +{ + DBUG_ASSERT(buffer_size > 1); + if (!lower_case_table_names) + return db; + strmake(buffer, db, buffer_size - 1); + my_casedn_str(system_charset_info, buffer); + return buffer; +} diff --git a/sql/sql_db.h b/sql/sql_db.h new file mode 100644 index 00000000..3c037d66 --- /dev/null +++ b/sql/sql_db.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DB_INCLUDED +#define SQL_DB_INCLUDED + +#include "hash.h" /* HASH */ + +class THD; + +int mysql_create_db(THD *thd, const LEX_CSTRING *db, DDL_options_st options, + const Schema_specification_st *create); +bool mysql_alter_db(THD *thd, const LEX_CSTRING *db, + const Schema_specification_st *create); +bool mysql_rm_db(THD *thd, const LEX_CSTRING *db, bool if_exists); +bool mysql_upgrade_db(THD *thd, const LEX_CSTRING *old_db); +uint mysql_change_db(THD *thd, const LEX_CSTRING *new_db_name, + bool force_switch); + +bool mysql_opt_change_db(THD *thd, + const LEX_CSTRING *new_db_name, + LEX_STRING *saved_db_name, + bool force_switch, + bool *cur_db_changed); +bool my_dboptions_cache_init(void); +void my_dboptions_cache_free(void); +bool check_db_dir_existence(const char *db_name); +bool load_db_opt(THD *thd, const char *path, Schema_specification_st *create); +bool load_db_opt_by_name(THD *thd, const char *db_name, + Schema_specification_st *db_create_info); +CHARSET_INFO *get_default_db_collation(THD *thd, const char *db_name); +bool my_dbopt_init(void); +void my_dbopt_cleanup(void); + +const char *normalize_db_name(const char *db, char *buffer, + size_t buffer_size); + +void drop_database_objects(THD *thd, const LEX_CSTRING *path, + const LEX_CSTRING *db, + bool rm_mysql_schema); +my_bool rm_dir_w_symlink(const char *org_path, my_bool send_error); +#define MY_DB_OPT_FILE "db.opt" + +#endif /* SQL_DB_INCLUDED */ diff --git a/sql/sql_debug.h b/sql/sql_debug.h new file mode 100644 index 00000000..003caec5 --- /dev/null +++ b/sql/sql_debug.h @@ -0,0 +1,168 @@ +#ifndef SQL_DEBUG_INCLUDED +#define SQL_DEBUG_INCLUDED +/* + Copyright (c) 2022, MariaDB + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + + +class Debug_key: public String +{ +public: + Debug_key() = default; + void print(THD *thd) const + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: %.*s", length(), ptr()); + } + + bool append_key_type(ha_base_keytype type) + { + static LEX_CSTRING names[20]= + { + {STRING_WITH_LEN("END")}, + {STRING_WITH_LEN("TEXT")}, + {STRING_WITH_LEN("BINARY")}, + {STRING_WITH_LEN("SHORT_INT")}, + {STRING_WITH_LEN("LONG_INT")}, + {STRING_WITH_LEN("FLOAT")}, + {STRING_WITH_LEN("DOUBLE")}, + {STRING_WITH_LEN("NUM")}, + {STRING_WITH_LEN("USHORT_INT")}, + {STRING_WITH_LEN("ULONG_INT")}, + {STRING_WITH_LEN("LONGLONG")}, + {STRING_WITH_LEN("ULONGLONG")}, + {STRING_WITH_LEN("INT24")}, + {STRING_WITH_LEN("UINT24")}, + {STRING_WITH_LEN("INT8")}, + {STRING_WITH_LEN("VARTEXT1")}, + {STRING_WITH_LEN("VARBINARY1")}, + {STRING_WITH_LEN("VARTEXT2")}, + {STRING_WITH_LEN("VARBINARY2")}, + {STRING_WITH_LEN("BIT")} + }; + if ((uint) type >= array_elements(names)) + return append(STRING_WITH_LEN("???")); + return append(names[(uint) type]); + } + + bool append_KEY_flag_names(ulong flags) + { + static LEX_CSTRING names[17]= + { + {STRING_WITH_LEN("HA_NOSAME")}, // 1 + {STRING_WITH_LEN("HA_PACK_KEY")}, // 2; also in HA_KEYSEG + {STRING_WITH_LEN("HA_SPACE_PACK_USED")}, // 4 + {STRING_WITH_LEN("HA_VAR_LENGTH_KEY")}, // 8 + {STRING_WITH_LEN("HA_AUTO_KEY")}, // 16 + {STRING_WITH_LEN("HA_BINARY_PACK_KEY")}, // 32 + {STRING_WITH_LEN("HA_NULL_PART_KEY")}, // 64 + {STRING_WITH_LEN("HA_FULLTEXT")}, // 128 + {STRING_WITH_LEN("HA_UNIQUE_CHECK")}, // 256 + {STRING_WITH_LEN("HA_SORT_ALLOWS_SAME")}, // 512 + {STRING_WITH_LEN("HA_SPATIAL")}, // 1024 + {STRING_WITH_LEN("HA_NULL_ARE_EQUAL")}, // 2048 + {STRING_WITH_LEN("HA_USES_COMMENT")}, // 4096 + {STRING_WITH_LEN("HA_GENERATED_KEY")}, // 8192 + {STRING_WITH_LEN("HA_USES_PARSER")}, // 16384 + {STRING_WITH_LEN("HA_USES_BLOCK_SIZE")}, // 32768 + {STRING_WITH_LEN("HA_KEY_HAS_PART_KEY_SEG")}// 65536 + }; + return append_flag32_names((uint) flags, names, array_elements(names)); + } + + bool append_HA_KEYSEG_flag_names(uint32 flags) + { + static LEX_CSTRING names[]= + { + {STRING_WITH_LEN("HA_SPACE_PACK")}, // 1 + {STRING_WITH_LEN("HA_PACK_KEY")}, // 2; also in KEY/MI/KEY_DEF + {STRING_WITH_LEN("HA_PART_KEY_SEG")}, // 4 + {STRING_WITH_LEN("HA_VAR_LENGTH_PART")}, // 8 + {STRING_WITH_LEN("HA_NULL_PART")}, // 16 + {STRING_WITH_LEN("HA_BLOB_PART")}, // 32 + {STRING_WITH_LEN("HA_SWAP_KEY")}, // 64 + {STRING_WITH_LEN("HA_REVERSE_SORT")}, // 128 + {STRING_WITH_LEN("HA_NO_SORT")}, // 256 + {STRING_WITH_LEN("??? 512 ???")}, // 512 + {STRING_WITH_LEN("HA_BIT_PART")}, // 1024 + {STRING_WITH_LEN("HA_CAN_MEMCMP")} // 2048 + }; + return append_flag32_names(flags, names, array_elements(names)); + } + + bool append_HA_KEYSEG_type(ha_base_keytype type) + { + return append_ulonglong(type) || + append(' ') || + append_key_type(type); + } + + bool append_HA_KEYSEG_flags(uint32 flags) + { + return append_hex_uint32(flags) || + append(' ') || + append_HA_KEYSEG_flag_names(flags); + } + + bool append_key(const LEX_CSTRING &name, uint32 flags) + { + return + append_name_value(Lex_cstring(STRING_WITH_LEN("name")), name, '`') || + append(Lex_cstring(STRING_WITH_LEN(" flags="))) || + append_hex_uint32(flags) || + append(' ') || + append_KEY_flag_names(flags); + } + + bool append_KEY(const KEY &key) + { + return append_key(key.name, key.flags); + } + + static void print_keysegs(THD *thd, const HA_KEYSEG *seg, uint count) + { + for (uint i= 0; i < count; i++) + { + Debug_key tmp; + if (!tmp.append(Lex_cstring(STRING_WITH_LEN(" seg["))) && + !tmp.append_ulonglong(i) && + !tmp.append(Lex_cstring(STRING_WITH_LEN("].type="))) && + !tmp.append_HA_KEYSEG_type((ha_base_keytype) seg[i].type)) + tmp.print(thd); + tmp.length(0); + if (!tmp.append(Lex_cstring(STRING_WITH_LEN(" seg["))) && + !tmp.append_ulonglong(i) && + !tmp.append(Lex_cstring(STRING_WITH_LEN("].flag="))) && + !tmp.append_HA_KEYSEG_flags(seg[i].flag)) + tmp.print(thd); + } + } + + static void print_keys(THD *thd, const char *where, + const KEY *keys, uint key_count) + { + for (uint i= 0; i < key_count; i++) + { + Debug_key tmp; + if (!tmp.append(where, strlen(where)) && !tmp.append_KEY(keys[i])) + tmp.print(thd); + } + } +}; + + +#endif // SQL_DEBUG_INCLUDED diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc new file mode 100644 index 00000000..90194659 --- /dev/null +++ b/sql/sql_delete.cc @@ -0,0 +1,1677 @@ +/* + Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Delete of records tables. + + Multi-table deletes were introduced by Monty and Sinisa +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_delete.h" +#include "sql_cache.h" // query_cache_* +#include "sql_base.h" // open_temprary_table +#include "lock.h" // unlock_table_name +#include "sql_view.h" // check_key_in_view, mysql_frm_type +#include "sql_parse.h" // mysql_init_select +#include "filesort.h" // filesort +#include "sql_handler.h" // mysql_ha_rm_tables +#include "sql_select.h" +#include "sp_head.h" +#include "sql_trigger.h" +#include "sql_statistics.h" +#include "transaction.h" +#include "records.h" // init_read_record, +#include "filesort.h" +#include "uniques.h" +#include "sql_derived.h" // mysql_handle_derived + // end_read_record +#include "sql_insert.h" // fix_rownum_pointers +#include "sql_partition.h" // make_used_partitions_str + +#define MEM_STRIP_BUF_SIZE ((size_t) thd->variables.sortbuff_size) + +/* + @brief + Print query plan of a single-table DELETE command + + @detail + This function is used by EXPLAIN DELETE and by SHOW EXPLAIN when it is + invoked on a running DELETE statement. +*/ + +Explain_delete* Delete_plan::save_explain_delete_data(THD *thd, MEM_ROOT *mem_root) +{ + Explain_query *query= thd->lex->explain; + Explain_delete *explain= + new (mem_root) Explain_delete(mem_root, thd->lex->analyze_stmt); + if (!explain) + return 0; + + if (deleting_all_rows) + { + explain->deleting_all_rows= true; + explain->select_type= "SIMPLE"; + explain->rows= scanned_rows; + } + else + { + explain->deleting_all_rows= false; + if (Update_plan::save_explain_data_intern(thd, mem_root, explain, + thd->lex->analyze_stmt)) + return 0; + } + + query->add_upd_del_plan(explain); + return explain; +} + + +Explain_update* +Update_plan::save_explain_update_data(THD *thd, MEM_ROOT *mem_root) +{ + Explain_query *query= thd->lex->explain; + Explain_update* explain= + new (mem_root) Explain_update(mem_root, thd->lex->analyze_stmt); + if (!explain) + return 0; + if (save_explain_data_intern(thd, mem_root, explain, thd->lex->analyze_stmt)) + return 0; + query->add_upd_del_plan(explain); + return explain; +} + + +bool Update_plan::save_explain_data_intern(THD *thd, + MEM_ROOT *mem_root, + Explain_update *explain, + bool is_analyze) +{ + explain->select_type= "SIMPLE"; + explain->table_name.append(&table->pos_in_table_list->alias); + + explain->impossible_where= false; + explain->no_partitions= false; + + if (impossible_where) + { + explain->impossible_where= true; + return 0; + } + + if (no_partitions) + { + explain->no_partitions= true; + return 0; + } + + if (is_analyze || + (thd->variables.log_slow_verbosity & + LOG_SLOW_VERBOSITY_ENGINE)) + { + table->file->set_time_tracker(&explain->table_tracker); + + if (table->file->handler_stats && table->s->tmp_table != INTERNAL_TMP_TABLE) + explain->handler_for_stats= table->file; + } + + select_lex->set_explain_type(TRUE); + explain->select_type= select_lex->type; + /* Partitions */ + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + if ((part_info= table->part_info)) + { + make_used_partitions_str(mem_root, part_info, &explain->used_partitions, + explain->used_partitions_list); + explain->used_partitions_set= true; + } + else + explain->used_partitions_set= false; +#else + /* just produce empty column if partitioning is not compiled in */ + explain->used_partitions_set= false; +#endif + } + + + /* Set jtype */ + if (select && select->quick) + { + int quick_type= select->quick->get_type(); + if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || + (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) + explain->jtype= JT_INDEX_MERGE; + else + explain->jtype= JT_RANGE; + } + else + { + if (index == MAX_KEY) + explain->jtype= JT_ALL; + else + explain->jtype= JT_NEXT; + } + + explain->using_where= MY_TEST(select && select->cond); + explain->where_cond= select? select->cond: NULL; + + if (using_filesort) + if (!(explain->filesort_tracker= new (mem_root) Filesort_tracker(is_analyze))) + return 1; + explain->using_io_buffer= using_io_buffer; + + append_possible_keys(mem_root, explain->possible_keys, table, + possible_keys); + + explain->quick_info= NULL; + + /* Calculate key_len */ + if (select && select->quick) + { + explain->quick_info= select->quick->get_explain(mem_root); + } + else + { + if (index != MAX_KEY) + { + explain->key.set(mem_root, &table->key_info[index], + table->key_info[index].key_length); + } + } + explain->rows= scanned_rows; + + if (select && select->quick && + select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + { + explain_append_mrr_info((QUICK_RANGE_SELECT*)select->quick, + &explain->mrr_type); + } + + /* Save subquery children */ + for (SELECT_LEX_UNIT *unit= select_lex->first_inner_unit(); + unit; + unit= unit->next_unit()) + { + if (unit->explainable()) + explain->add_child(unit->first_select()->select_number); + } + return 0; +} + + +static bool record_should_be_deleted(THD *thd, TABLE *table, SQL_SELECT *sel, + Explain_delete *explain, bool truncate_history) +{ + explain->tracker.on_record_read(); + thd->inc_examined_row_count(1); + if (table->vfield) + (void) table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_DELETE); + if (!sel || sel->skip_record(thd) > 0) + { + explain->tracker.on_record_after_where(); + return true; + } + return false; +} + +static +int update_portion_of_time(THD *thd, TABLE *table, + const vers_select_conds_t &period_conds, + bool *inside_period) +{ + bool lcond= period_conds.field_start->val_datetime_packed(thd) + < period_conds.start.item->val_datetime_packed(thd); + bool rcond= period_conds.field_end->val_datetime_packed(thd) + > period_conds.end.item->val_datetime_packed(thd); + + *inside_period= !lcond && !rcond; + if (*inside_period) + return 0; + + DBUG_ASSERT(!table->triggers + || !table->triggers->has_triggers(TRG_EVENT_INSERT, + TRG_ACTION_BEFORE)); + + int res= 0; + Item *src= lcond ? period_conds.start.item : period_conds.end.item; + uint dst_fieldno= lcond ? table->s->period.end_fieldno + : table->s->period.start_fieldno; + + ulonglong prev_insert_id= table->file->next_insert_id; + store_record(table, record[1]); + if (likely(!res)) + res= src->save_in_field(table->field[dst_fieldno], true); + + if (likely(!res)) + res= table->update_generated_fields(); + + if(likely(!res)) + res= table->file->ha_update_row(table->record[1], table->record[0]); + + if (likely(!res) && table->triggers) + res= table->triggers->process_triggers(thd, TRG_EVENT_INSERT, + TRG_ACTION_AFTER, true); + restore_record(table, record[1]); + if (res) + table->file->restore_auto_increment(prev_insert_id); + + if (likely(!res) && lcond && rcond) + res= table->period_make_insert(period_conds.end.item, + table->field[table->s->period.start_fieldno]); + + return res; +} + +inline +int TABLE::delete_row() +{ + if (!versioned(VERS_TIMESTAMP) || !vers_end_field()->is_max()) + return file->ha_delete_row(record[0]); + + store_record(this, record[1]); + vers_update_end(); + int err= file->ha_update_row(record[1], record[0]); + /* + MDEV-23644: we get HA_ERR_FOREIGN_DUPLICATE_KEY iff we already got history + row with same trx_id which is the result of foreign key action, so we + don't need one more history row. + */ + if (err == HA_ERR_FOREIGN_DUPLICATE_KEY) + return file->ha_delete_row(record[0]); + return err; +} + + +/** + Implement DELETE SQL word. + + @note Like implementations of other DDL/DML in MySQL, this function + relies on the caller to close the thread tables. This is done in the + end of dispatch_command(). +*/ + +bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, + SQL_I_List *order_list, ha_rows limit, + ulonglong options, select_result *result) +{ + bool will_batch= FALSE; + int error, loc_error; + TABLE *table; + SQL_SELECT *select=0; + SORT_INFO *file_sort= 0; + READ_RECORD info; + bool using_limit=limit != HA_POS_ERROR; + bool transactional_table, safe_update, const_cond; + bool const_cond_result; + bool return_error= 0; + ha_rows deleted= 0; + bool reverse= FALSE; + bool has_triggers= false; + ORDER *order= (ORDER *) ((order_list && order_list->elements) ? + order_list->first : NULL); + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + SELECT_LEX *returning= thd->lex->has_returning() ? thd->lex->returning() : 0; + killed_state killed_status= NOT_KILLED; + THD::enum_binlog_query_type query_type= THD::ROW_QUERY_TYPE; + bool binlog_is_row; + Explain_delete *explain; + Delete_plan query_plan(thd->mem_root); + Unique * deltempfile= NULL; + bool delete_record= false; + bool delete_while_scanning; + bool portion_of_time_through_update; + DBUG_ENTER("mysql_delete"); + + query_plan.index= MAX_KEY; + query_plan.using_filesort= FALSE; + + create_explain_query(thd->lex, thd->mem_root); + if (open_and_lock_tables(thd, table_list, TRUE, 0)) + DBUG_RETURN(TRUE); + + THD_STAGE_INFO(thd, stage_init_update); + + const bool delete_history= table_list->vers_conditions.delete_history; + DBUG_ASSERT(!(delete_history && table_list->period_conditions.is_set())); + + if (thd->lex->handle_list_of_derived(table_list, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(TRUE); + if (thd->lex->handle_list_of_derived(table_list, DT_PREPARE)) + DBUG_RETURN(TRUE); + + if (!table_list->single_table_updatable()) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "DELETE"); + DBUG_RETURN(TRUE); + } + if (!(table= table_list->table) || !table->is_created()) + { + my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), + table_list->view_db.str, table_list->view_name.str); + DBUG_RETURN(TRUE); + } + table->map=1; + query_plan.select_lex= thd->lex->first_select_lex(); + query_plan.table= table; + + thd->lex->promote_select_describe_flag_if_needed(); + + if (mysql_prepare_delete(thd, table_list, &conds, &delete_while_scanning)) + DBUG_RETURN(TRUE); + + if (table_list->has_period()) + { + if (!table_list->period_conditions.start.item->const_item() + || !table_list->period_conditions.end.item->const_item()) + { + my_error(ER_NOT_CONSTANT_EXPRESSION, MYF(0), "FOR PORTION OF"); + DBUG_RETURN(true); + } + } + + if (delete_history) + table->vers_write= false; + + if (returning) + (void) result->prepare(returning->item_list, NULL); + + if (thd->lex->current_select->first_cond_optimization) + { + thd->lex->current_select->save_leaf_tables(thd); + thd->lex->current_select->first_cond_optimization= 0; + } + /* check ORDER BY even if it can be ignored */ + if (order) + { + TABLE_LIST tables; + List fields; + List all_fields; + + bzero((char*) &tables,sizeof(tables)); + tables.table = table; + tables.alias = table_list->alias; + + if (select_lex->setup_ref_array(thd, order_list->elements) || + setup_order(thd, select_lex->ref_pointer_array, &tables, + fields, all_fields, order)) + { + free_underlaid_joins(thd, thd->lex->first_select_lex()); + DBUG_RETURN(TRUE); + } + } + + /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */ + if (select_lex->optimize_unflattened_subqueries(false)) + DBUG_RETURN(TRUE); + + const_cond= (!conds || conds->const_item()); + safe_update= (thd->variables.option_bits & OPTION_SAFE_UPDATES) && + !thd->lex->describe; + if (safe_update && const_cond) + { + my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, + ER_THD(thd, ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0)); + DBUG_RETURN(TRUE); + } + + const_cond_result= const_cond && (!conds || conds->val_int()); + if (unlikely(thd->is_error())) + { + /* Error evaluating val_int(). */ + DBUG_RETURN(TRUE); + } + + /* + Test if the user wants to delete all rows and deletion doesn't have + any side-effects (because of triggers), so we can use optimized + handler::delete_all_rows() method. + + We can use delete_all_rows() if and only if: + - We allow new functions (not using option --skip-new), and are + not in safe mode (not using option --safe-mode) + - There is no limit clause + - The condition is constant + - If there is a condition, then it it produces a non-zero value + - If the current command is DELETE FROM with no where clause, then: + - We should not be binlogging this statement in row-based, and + - there should be no delete triggers associated with the table. + */ + + has_triggers= table->triggers && table->triggers->has_delete_triggers(); + transactional_table= table->file->has_transactions_and_rollback(); + + if (!returning && !using_limit && const_cond_result && + (!thd->is_current_stmt_binlog_format_row() && !has_triggers) + && !table->versioned(VERS_TIMESTAMP) && !table_list->has_period()) + { + /* Update the table->file->stats.records number */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + ha_rows const maybe_deleted= table->file->stats.records; + DBUG_PRINT("debug", ("Trying to use delete_all_rows()")); + + query_plan.set_delete_all_rows(maybe_deleted); + if (thd->lex->describe) + goto produce_explain_and_leave; + + if (likely(!(error=table->file->ha_delete_all_rows()))) + { + /* + If delete_all_rows() is used, it is not possible to log the + query in row format, so we have to log it in statement format. + */ + query_type= THD::STMT_QUERY_TYPE; + error= -1; + deleted= maybe_deleted; + if (!query_plan.save_explain_delete_data(thd, thd->mem_root)) + error= 1; + goto cleanup; + } + if (error != HA_ERR_WRONG_COMMAND) + { + table->file->print_error(error,MYF(0)); + error=0; + goto cleanup; + } + /* Handler didn't support fast delete; Delete rows one by one */ + query_plan.cancel_delete_all_rows(); + } + if (conds) + { + Item::cond_result result; + conds= conds->remove_eq_conds(thd, &result, true); + if (result == Item::COND_FALSE) // Impossible where + { + limit= 0; + query_plan.set_impossible_where(); + if (thd->lex->describe || thd->lex->analyze_stmt) + goto produce_explain_and_leave; + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (prune_partitions(thd, table, conds)) + { + free_underlaid_joins(thd, select_lex); + + query_plan.set_no_partitions(); + if (thd->lex->describe || thd->lex->analyze_stmt) + goto produce_explain_and_leave; + + if (thd->binlog_for_noop_dml(transactional_table)) + DBUG_RETURN(1); + + my_ok(thd, 0); + DBUG_RETURN(0); + } +#endif + /* Update the table->file->stats.records number */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + set_statistics_for_table(thd, table); + + table->covering_keys.clear_all(); + table->opt_range_keys.clear_all(); + + select=make_select(table, 0, 0, conds, (SORT_INFO*) 0, 0, &error); + if (unlikely(error)) + DBUG_RETURN(TRUE); + if ((select && select->check_quick(thd, safe_update, limit)) || !limit) + { + query_plan.set_impossible_where(); + if (thd->lex->describe || thd->lex->analyze_stmt) + goto produce_explain_and_leave; + + delete select; + free_underlaid_joins(thd, select_lex); + /* + Error was already created by quick select evaluation (check_quick()). + TODO: Add error code output parameter to Item::val_xxx() methods. + Currently they rely on the user checking DA for + errors when unwinding the stack after calling Item::val_xxx(). + */ + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + + if (thd->binlog_for_noop_dml(transactional_table)) + DBUG_RETURN(1); + + my_ok(thd, 0); + DBUG_RETURN(0); // Nothing to delete + } + + /* If running in safe sql mode, don't allow updates without keys */ + if (!select || !select->quick) + { + thd->set_status_no_index_used(); + if (safe_update && !using_limit) + { + delete select; + free_underlaid_joins(thd, select_lex); + my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, + ER_THD(thd, ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0)); + DBUG_RETURN(TRUE); + } + } + if (options & OPTION_QUICK) + (void) table->file->extra(HA_EXTRA_QUICK); + + query_plan.scanned_rows= select? select->records: table->file->stats.records; + if (order) + { + table->update_const_key_parts(conds); + order= simple_remove_const(order, conds); + + if (select && select->quick && select->quick->unique_key_range()) + { // Single row select (always "ordered") + query_plan.using_filesort= FALSE; + query_plan.index= MAX_KEY; + } + else + { + ha_rows scanned_limit= query_plan.scanned_rows; + table->no_keyread= 1; + query_plan.index= get_index_for_order(order, table, select, limit, + &scanned_limit, + &query_plan.using_filesort, + &reverse); + table->no_keyread= 0; + if (!query_plan.using_filesort) + query_plan.scanned_rows= scanned_limit; + } + } + + query_plan.select= select; + query_plan.possible_keys= select? select->possible_keys: key_map(0); + + /* + Ok, we have generated a query plan for the DELETE. + - if we're running EXPLAIN DELETE, goto produce explain output + - otherwise, execute the query plan + */ + if (thd->lex->describe) + goto produce_explain_and_leave; + + if (!(explain= query_plan.save_explain_delete_data(thd, thd->mem_root))) + goto got_error; + ANALYZE_START_TRACKING(thd, &explain->command_tracker); + + DBUG_EXECUTE_IF("show_explain_probe_delete_exec_start", + dbug_serve_apcs(thd, 1);); + + if (!(select && select->quick)) + status_var_increment(thd->status_var.delete_scan_count); + + binlog_is_row= thd->is_current_stmt_binlog_format_row(); + DBUG_PRINT("info", ("binlog_is_row: %s", binlog_is_row ? "TRUE" : "FALSE")); + + /* + We can use direct delete (delete that is done silently in the handler) + if none of the following conditions are true: + - There are triggers + - There is binary logging + - There is a virtual not stored column in the WHERE clause + - ORDER BY or LIMIT + - As this requires the rows to be deleted in a specific order + - Note that Spider can handle ORDER BY and LIMIT in a cluster with + one data node. These conditions are therefore checked in + direct_delete_rows_init(). + + Direct delete does not require a WHERE clause + + Later we also ensure that we are only using one table (no sub queries) + */ + + if ((table->file->ha_table_flags() & HA_CAN_DIRECT_UPDATE_AND_DELETE) && + !has_triggers && !binlog_is_row && !returning && + !table_list->has_period()) + { + table->mark_columns_needed_for_delete(); + if (!table->check_virtual_columns_marked_for_read()) + { + DBUG_PRINT("info", ("Trying direct delete")); + bool use_direct_delete= !select || !select->cond; + if (!use_direct_delete && + (select->cond->used_tables() & ~RAND_TABLE_BIT) == table->map) + { + DBUG_ASSERT(!table->file->pushed_cond); + if (!table->file->cond_push(select->cond)) + { + use_direct_delete= TRUE; + table->file->pushed_cond= select->cond; + } + } + if (use_direct_delete && !table->file->direct_delete_rows_init()) + { + /* Direct deleting is supported */ + DBUG_PRINT("info", ("Using direct delete")); + THD_STAGE_INFO(thd, stage_updating); + if (!(error= table->file->ha_direct_delete_rows(&deleted))) + error= -1; + goto terminate_delete; + } + } + } + + if (query_plan.using_filesort) + { + { + Filesort fsort(order, HA_POS_ERROR, true, select); + DBUG_ASSERT(query_plan.index == MAX_KEY); + + Filesort_tracker *fs_tracker= + thd->lex->explain->get_upd_del_plan()->filesort_tracker; + + if (!(file_sort= filesort(thd, table, &fsort, fs_tracker))) + goto got_error; + + thd->inc_examined_row_count(file_sort->examined_rows); + /* + Filesort has already found and selected the rows we want to delete, + so we don't need the where clause + */ + delete select; + + /* + If we are not in DELETE ... RETURNING, we can free subqueries. (in + DELETE ... RETURNING we can't, because the RETURNING part may have + a subquery in it) + */ + if (!returning) + free_underlaid_joins(thd, select_lex); + select= 0; + } + } + + /* If quick select is used, initialize it before retrieving rows. */ + if (select && select->quick && select->quick->reset()) + goto got_error; + + if (query_plan.index == MAX_KEY || (select && select->quick)) + error= init_read_record(&info, thd, table, select, file_sort, 1, 1, FALSE); + else + error= init_read_record_idx(&info, thd, table, 1, query_plan.index, + reverse); + if (unlikely(error)) + goto got_error; + + if (unlikely(init_ftfuncs(thd, select_lex, 1))) + goto got_error; + + if (table_list->has_period()) + { + table->use_all_columns(); + table->rpl_write_set= table->write_set; + // Initialize autoinc. + // We don't set next_number_field here, as it is handled manually. + if (table->found_next_number_field) + table->file->info(HA_STATUS_AUTO); + } + else + { + table->mark_columns_needed_for_delete(); + } + + if ((table->file->ha_table_flags() & HA_CAN_FORCE_BULK_DELETE) && + !table->prepare_triggers_for_delete_stmt_or_event()) + will_batch= !table->file->start_bulk_delete(); + + /* + thd->get_stmt_da()->is_set() means first iteration of prepared statement + with array binding operation execution (non optimized so it is not + INSERT) + */ + if (returning && !thd->get_stmt_da()->is_set()) + { + if (result->send_result_set_metadata(returning->item_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + goto cleanup; + } + + explain= (Explain_delete*)thd->lex->explain->get_upd_del_plan(); + explain->tracker.on_scan_init(); + + thd->get_stmt_da()->reset_current_row_for_warning(1); + + if (!delete_while_scanning) + { + /* + The table we are going to delete appears in subqueries in the where + clause. Instead of deleting the rows, first mark them deleted. + */ + ha_rows tmplimit=limit; + deltempfile= new (thd->mem_root) Unique (refpos_order_cmp, table->file, + table->file->ref_length, + MEM_STRIP_BUF_SIZE); + + THD_STAGE_INFO(thd, stage_searching_rows_for_update); + while (!(error=info.read_record()) && !thd->killed && + ! thd->is_error()) + { + if (record_should_be_deleted(thd, table, select, explain, delete_history)) + { + table->file->position(table->record[0]); + if (unlikely((error= + deltempfile->unique_add((char*) table->file->ref)))) + { + error= 1; + goto terminate_delete; + } + if (!--tmplimit && using_limit) + break; + } + } + end_read_record(&info); + if (unlikely(deltempfile->get(table)) || + unlikely(table->file->ha_index_or_rnd_end()) || + unlikely(init_read_record(&info, thd, table, 0, &deltempfile->sort, 0, + 1, false))) + { + error= 1; + goto terminate_delete; + } + delete_record= true; + } + + /* + From SQL2016, Part 2, 15.7 , + General Rules, 8), we can conclude that DELETE FOR PORTTION OF time performs + 0-2 INSERTS + DELETE. We can substitute INSERT+DELETE with one UPDATE, with + a condition of no side effects. The side effect is possible if there is a + BEFORE INSERT trigger, since it is the only one splitting DELETE and INSERT + operations. + Another possible side effect is related to tables of non-transactional + engines, since UPDATE is anyway atomic, and DELETE+INSERT is not. + + This optimization is not possible for system-versioned table. + */ + portion_of_time_through_update= + !(table->triggers && table->triggers->has_triggers(TRG_EVENT_INSERT, + TRG_ACTION_BEFORE)) + && !table->versioned() + && table->file->has_transactions(); + + if (table->versioned(VERS_TIMESTAMP) || (table_list->has_period())) + table->file->prepare_for_insert(1); + DBUG_ASSERT(table->file->inited != handler::NONE); + + THD_STAGE_INFO(thd, stage_updating); + fix_rownum_pointers(thd, thd->lex->current_select, &deleted); + + thd->get_stmt_da()->reset_current_row_for_warning(0); + while (likely(!(error=info.read_record())) && likely(!thd->killed) && + likely(!thd->is_error())) + { + thd->get_stmt_da()->inc_current_row_for_warning(); + if (delete_while_scanning) + delete_record= record_should_be_deleted(thd, table, select, explain, + delete_history); + if (delete_record) + { + if (!delete_history && table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_BEFORE, FALSE)) + { + error= 1; + break; + } + + // no LIMIT / OFFSET + if (returning && result->send_data(returning->item_list) < 0) + { + error=1; + break; + } + + if (table_list->has_period() && portion_of_time_through_update) + { + bool need_delete= true; + error= update_portion_of_time(thd, table, table_list->period_conditions, + &need_delete); + if (likely(!error) && need_delete) + error= table->delete_row(); + } + else + { + error= table->delete_row(); + + ha_rows rows_inserted; + if (likely(!error) && table_list->has_period() + && !portion_of_time_through_update) + error= table->insert_portion_of_time(thd, table_list->period_conditions, + &rows_inserted); + } + + if (likely(!error)) + { + deleted++; + if (!delete_history && table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_AFTER, FALSE)) + { + error= 1; + break; + } + if (!--limit && using_limit) + { + error= -1; + break; + } + } + else + { + table->file->print_error(error, + MYF(thd->lex->ignore ? ME_WARNING : 0)); + if (thd->is_error()) + { + error= 1; + break; + } + } + } + /* + Don't try unlocking the row if skip_record reported an error since in + this case the transaction might have been rolled back already. + */ + else if (likely(!thd->is_error())) + table->file->unlock_row(); // Row failed selection, release lock on it + else + break; + } + thd->get_stmt_da()->reset_current_row_for_warning(1); + +terminate_delete: + killed_status= thd->killed; + if (unlikely(killed_status != NOT_KILLED || thd->is_error())) + error= 1; // Aborted + if (will_batch && unlikely((loc_error= table->file->end_bulk_delete()))) + { + if (error != 1) + table->file->print_error(loc_error,MYF(0)); + error=1; + } + THD_STAGE_INFO(thd, stage_end); + end_read_record(&info); + if (table_list->has_period()) + table->file->ha_release_auto_increment(); + if (options & OPTION_QUICK) + (void) table->file->extra(HA_EXTRA_NORMAL); + ANALYZE_STOP_TRACKING(thd, &explain->command_tracker); + +cleanup: + /* + Invalidate the table in the query cache if something changed. This must + be before binlog writing and ha_autocommit_... + */ + if (deleted) + { + query_cache_invalidate3(thd, table_list, 1); + } + + if (thd->lex->current_select->first_cond_optimization) + { + thd->lex->current_select->save_leaf_tables(thd); + thd->lex->current_select->first_cond_optimization= 0; + } + + delete deltempfile; + deltempfile=NULL; + delete select; + select= NULL; + + if (!transactional_table && deleted > 0) + thd->transaction->stmt.modified_non_trans_table= + thd->transaction->all.modified_non_trans_table= TRUE; + + /* See similar binlogging code in sql_update.cc, for comments */ + if (likely((error < 0) || thd->transaction->stmt.modified_non_trans_table + || thd->log_current_statement())) + { + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + int errcode= 0; + if (error < 0) + thd->clear_error(); + else + errcode= query_error_code(thd, killed_status == NOT_KILLED); + + StatementBinlog stmt_binlog(thd, table->versioned(VERS_TRX_ID) || + thd->binlog_need_stmt_format(transactional_table)); + /* + [binlog]: If 'handler::delete_all_rows()' was called and the + storage engine does not inject the rows itself, we replicate + statement-based; otherwise, 'ha_delete_row()' was used to + delete specific rows which we might log row-based. + */ + int log_result= thd->binlog_query(query_type, + thd->query(), thd->query_length(), + transactional_table, FALSE, FALSE, + errcode); + + if (log_result > 0) + { + error=1; + } + } + } + DBUG_ASSERT(transactional_table || !deleted || thd->transaction->stmt.modified_non_trans_table); + + if (likely(error < 0) || + (thd->lex->ignore && !thd->is_error() && !thd->is_fatal_error)) + { + if (thd->lex->analyze_stmt) + goto send_nothing_and_leave; + + if (returning) + result->send_eof(); + else + my_ok(thd, deleted); + DBUG_PRINT("info",("%ld records deleted",(long) deleted)); + } + delete file_sort; + free_underlaid_joins(thd, select_lex); + if (table->file->pushed_cond) + table->file->cond_pop(); + DBUG_RETURN(error >= 0 || thd->is_error()); + + /* Special exits */ +produce_explain_and_leave: + /* + We come here for various "degenerate" query plans: impossible WHERE, + no-partitions-used, impossible-range, etc. + */ + if (!(query_plan.save_explain_delete_data(thd, thd->mem_root))) + goto got_error; + +send_nothing_and_leave: + /* + ANALYZE DELETE jumps here. We can't send explain right here, because + we might be using ANALYZE DELETE ...RETURNING, in which case we have + Protocol_discard active. + */ + + delete select; + delete file_sort; + free_underlaid_joins(thd, select_lex); + if (table->file->pushed_cond) + table->file->cond_pop(); + + DBUG_ASSERT(!return_error || thd->is_error() || thd->killed); + DBUG_RETURN((return_error || thd->is_error() || thd->killed) ? 1 : 0); + +got_error: + return_error= 1; + goto send_nothing_and_leave; +} + + +/* + Prepare items in DELETE statement + + SYNOPSIS + mysql_prepare_delete() + thd - thread handler + table_list - global/local table list + conds - conditions + + RETURN VALUE + FALSE OK + TRUE error +*/ +int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds, + bool *delete_while_scanning) +{ + Item *fake_conds= 0; + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + DBUG_ENTER("mysql_prepare_delete"); + List all_fields; + + *delete_while_scanning= true; + thd->lex->allow_sum_func.clear_all(); + if (setup_tables_and_check_access(thd, &select_lex->context, + &select_lex->top_join_list, table_list, + select_lex->leaf_tables, FALSE, + DELETE_ACL, SELECT_ACL, TRUE)) + DBUG_RETURN(TRUE); + + if (table_list->vers_conditions.is_set() && table_list->is_view_or_derived()) + { + my_error(ER_IT_IS_A_VIEW, MYF(0), table_list->table_name.str); + DBUG_RETURN(true); + } + + if (table_list->has_period()) + { + if (table_list->is_view_or_derived()) + { + my_error(ER_IT_IS_A_VIEW, MYF(0), table_list->table_name.str); + DBUG_RETURN(true); + } + + if (select_lex->period_setup_conds(thd, table_list)) + DBUG_RETURN(true); + } + + DBUG_ASSERT(table_list->table); + // conds could be cached from previous SP call + DBUG_ASSERT(!table_list->vers_conditions.need_setup() || + !*conds || thd->stmt_arena->is_stmt_execute()); + if (select_lex->vers_setup_conds(thd, table_list)) + DBUG_RETURN(TRUE); + + *conds= select_lex->where; + + if (setup_returning_fields(thd, table_list) || + setup_conds(thd, table_list, select_lex->leaf_tables, conds) || + setup_ftfuncs(select_lex)) + DBUG_RETURN(TRUE); + if (!table_list->single_table_updatable() || + check_key_in_view(thd, table_list)) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "DELETE"); + DBUG_RETURN(TRUE); + } + + /* + Application-time periods: if FOR PORTION OF ... syntax used, DELETE + statement could issue delete_row's mixed with write_row's. This causes + problems for myisam and corrupts table, if deleting while scanning. + */ + if (table_list->has_period() + || unique_table(thd, table_list, table_list->next_global, 0)) + *delete_while_scanning= false; + + if (select_lex->inner_refs_list.elements && + fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array)) + DBUG_RETURN(TRUE); + + select_lex->fix_prepare_information(thd, conds, &fake_conds); + if (!thd->lex->upd_del_where) + thd->lex->upd_del_where= *conds; + DBUG_RETURN(FALSE); +} + + +/*************************************************************************** + Delete multiple tables from join +***************************************************************************/ + + +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b) +{ + handler *file= (handler*)arg; + return file->cmp_ref((const uchar*)a, (const uchar*)b); +} + +/* + make delete specific preparation and checks after opening tables + + SYNOPSIS + mysql_multi_delete_prepare() + thd thread handler + + RETURN + FALSE OK + TRUE Error +*/ + +int mysql_multi_delete_prepare(THD *thd) +{ + LEX *lex= thd->lex; + TABLE_LIST *aux_tables= lex->auxiliary_table_list.first; + TABLE_LIST *target_tbl; + DBUG_ENTER("mysql_multi_delete_prepare"); + + if (mysql_handle_derived(lex, DT_INIT)) + DBUG_RETURN(TRUE); + if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(TRUE); + if (mysql_handle_derived(lex, DT_PREPARE)) + DBUG_RETURN(TRUE); + /* + setup_tables() need for VIEWs. JOIN::prepare() will not do it second + time. + + lex->query_tables also point on local list of DELETE SELECT_LEX + */ + if (setup_tables_and_check_access(thd, + &thd->lex->first_select_lex()->context, + &thd->lex->first_select_lex()-> + top_join_list, + lex->query_tables, + lex->first_select_lex()->leaf_tables, + FALSE, DELETE_ACL, SELECT_ACL, FALSE)) + DBUG_RETURN(TRUE); + + /* + Multi-delete can't be constructed over-union => we always have + single SELECT on top and have to check underlying SELECTs of it + */ + lex->first_select_lex()->set_unique_exclude(); + /* Fix tables-to-be-deleted-from list to point at opened tables */ + for (target_tbl= (TABLE_LIST*) aux_tables; + target_tbl; + target_tbl= target_tbl->next_local) + { + + target_tbl->table= target_tbl->correspondent_table->table; + if (target_tbl->correspondent_table->is_multitable()) + { + my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), + target_tbl->correspondent_table->view_db.str, + target_tbl->correspondent_table->view_name.str); + DBUG_RETURN(TRUE); + } + + if (!target_tbl->correspondent_table->single_table_updatable() || + check_key_in_view(thd, target_tbl->correspondent_table)) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), + target_tbl->table_name.str, "DELETE"); + DBUG_RETURN(TRUE); + } + } + + for (target_tbl= (TABLE_LIST*) aux_tables; + target_tbl; + target_tbl= target_tbl->next_local) + { + /* + Check that table from which we delete is not used somewhere + inside subqueries/view. + */ + { + TABLE_LIST *duplicate; + if ((duplicate= unique_table(thd, target_tbl->correspondent_table, + lex->query_tables, 0))) + { + update_non_unique_table_error(target_tbl->correspondent_table, + "DELETE", duplicate); + DBUG_RETURN(TRUE); + } + } + } + /* + Reset the exclude flag to false so it doesn't interfare + with further calls to unique_table + */ + lex->first_select_lex()->exclude_from_table_unique_test= FALSE; + + if (lex->save_prep_leaf_tables()) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt, uint num_of_tables_arg): + select_result_interceptor(thd_arg), delete_tables(dt), deleted(0), found(0), + num_of_tables(num_of_tables_arg), error(0), + do_delete(0), transactional_tables(0), normal_tables(0), error_handled(0) +{ + tempfiles= (Unique **) thd_arg->calloc(sizeof(Unique *) * num_of_tables); +} + + +int +multi_delete::prepare(List &values, SELECT_LEX_UNIT *u) +{ + DBUG_ENTER("multi_delete::prepare"); + unit= u; + do_delete= 1; + THD_STAGE_INFO(thd, stage_deleting_from_main_table); + DBUG_RETURN(0); +} + +void multi_delete::prepare_to_read_rows() +{ + /* see multi_update::prepare_to_read_rows() */ + for (TABLE_LIST *walk= delete_tables; walk; walk= walk->next_local) + { + TABLE_LIST *tbl= walk->correspondent_table->find_table_for_update(); + tbl->table->mark_columns_needed_for_delete(); + } +} + +bool +multi_delete::initialize_tables(JOIN *join) +{ + TABLE_LIST *walk; + Unique **tempfiles_ptr; + DBUG_ENTER("initialize_tables"); + + if (unlikely((thd->variables.option_bits & OPTION_SAFE_UPDATES) && + error_if_full_join(join))) + DBUG_RETURN(1); + + table_map tables_to_delete_from=0; + delete_while_scanning= true; + for (walk= delete_tables; walk; walk= walk->next_local) + { + TABLE_LIST *tbl= walk->correspondent_table->find_table_for_update(); + tables_to_delete_from|= tbl->table->map; + if (delete_while_scanning && + unique_table(thd, tbl, join->tables_list, 0)) + { + /* + If the table we are going to delete from appears + in join, we need to defer delete. So the delete + doesn't interfers with the scaning of results. + */ + delete_while_scanning= false; + } + } + + walk= delete_tables; + + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_BUSH_ROOTS, + WITH_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + if (!tab->bush_children && tab->table->map & tables_to_delete_from) + { + /* We are going to delete from this table */ + TABLE *tbl=walk->table=tab->table; + walk= walk->next_local; + /* Don't use KEYREAD optimization on this table */ + tbl->no_keyread=1; + /* Don't use record cache */ + tbl->no_cache= 1; + tbl->covering_keys.clear_all(); + if (tbl->file->has_transactions()) + transactional_tables= 1; + else + normal_tables= 1; + tbl->prepare_triggers_for_delete_stmt_or_event(); + tbl->prepare_for_position(); + + if (tbl->versioned(VERS_TIMESTAMP)) + tbl->file->prepare_for_insert(1); + } + else if ((tab->type != JT_SYSTEM && tab->type != JT_CONST) && + walk == delete_tables) + { + /* + We are not deleting from the table we are scanning. In this + case send_data() shouldn't delete any rows a we may touch + the rows in the deleted table many times + */ + delete_while_scanning= false; + } + } + walk= delete_tables; + tempfiles_ptr= tempfiles; + if (delete_while_scanning) + { + table_being_deleted= delete_tables; + walk= walk->next_local; + } + for (;walk ;walk= walk->next_local) + { + TABLE *table=walk->table; + *tempfiles_ptr++= new (thd->mem_root) Unique (refpos_order_cmp, table->file, + table->file->ref_length, + MEM_STRIP_BUF_SIZE); + } + if (init_ftfuncs(thd, thd->lex->current_select, 1)) + DBUG_RETURN(true); + + DBUG_RETURN(thd->is_fatal_error); +} + + +multi_delete::~multi_delete() +{ + for (table_being_deleted= delete_tables; + table_being_deleted; + table_being_deleted= table_being_deleted->next_local) + { + TABLE *table= table_being_deleted->table; + table->no_keyread=0; + table->no_cache= 0; + } + + for (uint counter= 0; counter < num_of_tables; counter++) + { + if (tempfiles[counter]) + delete tempfiles[counter]; + } +} + + +int multi_delete::send_data(List &values) +{ + int secure_counter= delete_while_scanning ? -1 : 0; + TABLE_LIST *del_table; + DBUG_ENTER("multi_delete::send_data"); + + bool ignore= thd->lex->ignore; + + for (del_table= delete_tables; + del_table; + del_table= del_table->next_local, secure_counter++) + { + TABLE *table= del_table->table; + + /* Check if we are using outer join and we didn't find the row */ + if (table->status & (STATUS_NULL_ROW | STATUS_DELETED)) + continue; + + table->file->position(table->record[0]); + found++; + + if (secure_counter < 0) + { + /* We are scanning the current table */ + DBUG_ASSERT(del_table == table_being_deleted); + if (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_BEFORE, FALSE)) + DBUG_RETURN(1); + table->status|= STATUS_DELETED; + + error= table->delete_row(); + if (likely(!error)) + { + deleted++; + if (!table->file->has_transactions()) + thd->transaction->stmt.modified_non_trans_table= TRUE; + if (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_AFTER, FALSE)) + DBUG_RETURN(1); + } + else if (!ignore) + { + /* + If the IGNORE option is used errors caused by ha_delete_row don't + have to stop the iteration. + */ + table->file->print_error(error,MYF(0)); + DBUG_RETURN(1); + } + } + else + { + error=tempfiles[secure_counter]->unique_add((char*) table->file->ref); + if (unlikely(error)) + { + error= 1; // Fatal error + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} + + +void multi_delete::abort_result_set() +{ + DBUG_ENTER("multi_delete::abort_result_set"); + + /* the error was handled or nothing deleted and no side effects return */ + if (error_handled || + (!thd->transaction->stmt.modified_non_trans_table && !deleted)) + DBUG_VOID_RETURN; + + /* Something already deleted so we have to invalidate cache */ + if (deleted) + query_cache_invalidate3(thd, delete_tables, 1); + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + + /* + If rows from the first table only has been deleted and it is + transactional, just do rollback. + The same if all tables are transactional, regardless of where we are. + In all other cases do attempt deletes ... + */ + if (do_delete && normal_tables && + (table_being_deleted != delete_tables || + !table_being_deleted->table->file->has_transactions_and_rollback())) + { + /* + We have to execute the recorded do_deletes() and write info into the + error log + */ + error= 1; + send_eof(); + DBUG_ASSERT(error_handled); + DBUG_VOID_RETURN; + } + + if (thd->transaction->stmt.modified_non_trans_table || + thd->log_current_statement()) + { + /* + there is only side effects; to binlog with the error + */ + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + StatementBinlog stmt_binlog(thd, thd->binlog_need_stmt_format(transactional_tables)); + int errcode= query_error_code(thd, thd->killed == NOT_KILLED); + /* possible error of writing binary log is ignored deliberately */ + (void) thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query(), thd->query_length(), + transactional_tables, FALSE, FALSE, errcode); + } + } + DBUG_VOID_RETURN; +} + + + +/** + Do delete from other tables. + + @retval 0 ok + @retval 1 error + + @todo Is there any reason not use the normal nested-loops join? If not, and + there is no documentation supporting it, this method and callee should be + removed and there should be hooks within normal execution. +*/ + +int multi_delete::do_deletes() +{ + DBUG_ENTER("do_deletes"); + DBUG_ASSERT(do_delete); + + do_delete= 0; // Mark called + if (!found) + DBUG_RETURN(0); + + table_being_deleted= (delete_while_scanning ? delete_tables->next_local : + delete_tables); + + for (uint counter= 0; table_being_deleted; + table_being_deleted= table_being_deleted->next_local, counter++) + { + TABLE *table = table_being_deleted->table; + int local_error; + if (unlikely(tempfiles[counter]->get(table))) + DBUG_RETURN(1); + + local_error= do_table_deletes(table, &tempfiles[counter]->sort, + thd->lex->ignore); + + if (unlikely(thd->killed) && likely(!local_error)) + DBUG_RETURN(1); + + if (unlikely(local_error == -1)) // End of file + local_error= 0; + + if (unlikely(local_error)) + DBUG_RETURN(local_error); + } + DBUG_RETURN(0); +} + + +/** + Implements the inner loop of nested-loops join within multi-DELETE + execution. + + @param table The table from which to delete. + + @param ignore If used, all non fatal errors will be translated + to warnings and we should not break the row-by-row iteration. + + @return Status code + + @retval 0 All ok. + @retval 1 Triggers or handler reported error. + @retval -1 End of file from handler. +*/ +int multi_delete::do_table_deletes(TABLE *table, SORT_INFO *sort_info, + bool ignore) +{ + int local_error= 0; + READ_RECORD info; + ha_rows last_deleted= deleted; + DBUG_ENTER("do_deletes_for_table"); + + if (unlikely(init_read_record(&info, thd, table, NULL, sort_info, 0, 1, + FALSE))) + DBUG_RETURN(1); + + bool will_batch= !table->file->start_bulk_delete(); + while (likely(!(local_error= info.read_record())) && likely(!thd->killed)) + { + if (table->triggers && + unlikely(table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_BEFORE, FALSE))) + { + local_error= 1; + break; + } + + local_error= table->delete_row(); + if (unlikely(local_error) && !ignore) + { + table->file->print_error(local_error, MYF(0)); + break; + } + + /* + Increase the reported number of deleted rows only if no error occurred + during ha_delete_row. + Also, don't execute the AFTER trigger if the row operation failed. + */ + if (unlikely(!local_error)) + { + deleted++; + if (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_AFTER, FALSE)) + { + local_error= 1; + break; + } + } + } + if (will_batch) + { + int tmp_error= table->file->end_bulk_delete(); + if (unlikely(tmp_error) && !local_error) + { + local_error= tmp_error; + table->file->print_error(local_error, MYF(0)); + } + } + if (last_deleted != deleted && !table->file->has_transactions_and_rollback()) + thd->transaction->stmt.modified_non_trans_table= TRUE; + + end_read_record(&info); + + DBUG_RETURN(local_error); +} + +/* + Send ok to the client + + return: 0 success + 1 error +*/ + +bool multi_delete::send_eof() +{ + killed_state killed_status= NOT_KILLED; + THD_STAGE_INFO(thd, stage_deleting_from_reference_tables); + + /* Does deletes for the last n - 1 tables, returns 0 if ok */ + int local_error= do_deletes(); // returns 0 if success + + /* compute a total error to know if something failed */ + local_error= local_error || error; + killed_status= (local_error == 0)? NOT_KILLED : thd->killed; + /* reset used flags */ + THD_STAGE_INFO(thd, stage_end); + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + + /* + We must invalidate the query cache before binlog writing and + ha_autocommit_... + */ + if (deleted) + { + query_cache_invalidate3(thd, delete_tables, 1); + } + if (likely((local_error == 0) || + thd->transaction->stmt.modified_non_trans_table) || + thd->log_current_statement()) + { + if(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + int errcode= 0; + if (likely(local_error == 0)) + thd->clear_error(); + else + errcode= query_error_code(thd, killed_status == NOT_KILLED); + thd->used|= THD::THREAD_SPECIFIC_USED; + StatementBinlog stmt_binlog(thd, thd->binlog_need_stmt_format(transactional_tables)); + if (unlikely(thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query(), thd->query_length(), + transactional_tables, FALSE, FALSE, + errcode) > 0) && + !normal_tables) + { + local_error=1; // Log write failed: roll back the SQL statement + } + } + } + if (unlikely(local_error != 0)) + error_handled= TRUE; // to force early leave from ::abort_result_set() + + if (likely(!local_error && !thd->lex->analyze_stmt)) + { + ::my_ok(thd, deleted); + } + return 0; +} diff --git a/sql/sql_delete.h b/sql/sql_delete.h new file mode 100644 index 00000000..520524c7 --- /dev/null +++ b/sql/sql_delete.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DELETE_INCLUDED +#define SQL_DELETE_INCLUDED + +#include "my_base.h" /* ha_rows */ + +class THD; +struct TABLE_LIST; +class Item; +class select_result; + +typedef class Item COND; +template class SQL_I_List; + +int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds, + bool *delete_while_scanning); +bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, + SQL_I_List *order, ha_rows rows, + ulonglong options, select_result *result); + +#endif /* SQL_DELETE_INCLUDED */ diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc new file mode 100644 index 00000000..4e42bcd3 --- /dev/null +++ b/sql/sql_derived.cc @@ -0,0 +1,1675 @@ +/* + Copyright (c) 2002, 2011, Oracle and/or its affiliates. + Copyright (c) 2010, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* + Derived tables + These were introduced by Sinisa +*/ + + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "unireg.h" +#include "sql_select.h" +#include "derived_handler.h" +#include "sql_base.h" +#include "sql_view.h" // check_duplicate_names +#include "sql_acl.h" // SELECT_ACL +#include "sql_class.h" +#include "sql_derived.h" +#include "sql_cte.h" +#include "my_json_writer.h" +#include "opt_trace.h" + +typedef bool (*dt_processor)(THD *thd, LEX *lex, TABLE_LIST *derived); + +static bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived); +static bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, + TABLE_LIST *derived); + +dt_processor processors[]= +{ + &mysql_derived_init, + &mysql_derived_prepare, + &mysql_derived_optimize, + &mysql_derived_merge, + &mysql_derived_merge_for_insert, + &mysql_derived_create, + &mysql_derived_fill, + &mysql_derived_reinit, +}; + +/* + Run specified phases on all derived tables/views in given LEX. + + @param lex LEX for this thread + @param phases phases to run derived tables/views through + + @return FALSE OK + @return TRUE Error +*/ +bool +mysql_handle_derived(LEX *lex, uint phases) +{ + bool res= FALSE; + DBUG_ENTER("mysql_handle_derived"); + DBUG_PRINT("enter", ("phases: 0x%x", phases)); + if (!lex->derived_tables) + DBUG_RETURN(FALSE); + + lex->thd->derived_tables_processing= TRUE; + + for (uint phase= 0; phase < DT_PHASES && !res; phase++) + { + uint phase_flag= DT_INIT << phase; + if (phase_flag > phases) + break; + if (!(phases & phase_flag)) + continue; + + for (SELECT_LEX *sl= lex->all_selects_list; + sl && !res; + sl= sl->next_select_in_list()) + { + TABLE_LIST *cursor= sl->get_table_list(); + sl->changed_elements|= TOUCHED_SEL_DERIVED; + /* + DT_MERGE_FOR_INSERT is not needed for views/derived tables inside + subqueries. Views and derived tables of subqueries should be + processed normally. + */ + if (phases == DT_MERGE_FOR_INSERT && + cursor && (cursor->top_table()->select_lex != + lex->first_select_lex())) + continue; + for (; + cursor && !res; + cursor= cursor->next_local) + { + if (!cursor->is_view_or_derived() && phases == DT_MERGE_FOR_INSERT) + continue; + uint8 allowed_phases= (cursor->is_merged_derived() ? DT_PHASES_MERGE : + DT_PHASES_MATERIALIZE | DT_MERGE_FOR_INSERT); + /* + Skip derived tables to which the phase isn't applicable. + TODO: mark derived at the parse time, later set it's type + (merged or materialized) + */ + if ((phase_flag != DT_PREPARE && !(allowed_phases & phase_flag)) || + (cursor->merged_for_insert && phase_flag != DT_REINIT && + phase_flag != DT_PREPARE)) + continue; + res= (*processors[phase])(lex->thd, lex, cursor); + } + if (lex->describe) + { + /* + Force join->join_tmp creation, because we will use this JOIN + twice for EXPLAIN and we have to have unchanged join for EXPLAINing + */ + sl->uncacheable|= UNCACHEABLE_EXPLAIN; + sl->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN; + } + } + } + lex->thd->derived_tables_processing= FALSE; + DBUG_RETURN(res); +} + +/* + Run through phases for the given derived table/view. + + @param lex LEX for this thread + @param derived the derived table to handle + @param phase_map phases to process tables/views through + + @details + + This function process the derived table (view) 'derived' to performs all + actions that are to be done on the table at the phases specified by + phase_map. The processing is carried out starting from the actions + performed at the earlier phases (those having smaller ordinal numbers). + + @note + This function runs specified phases of the derived tables handling on the + given derived table/view. This function is used in the chain of calls: + SELECT_LEX::handle_derived -> + TABLE_LIST::handle_derived -> + mysql_handle_single_derived + This chain of calls implements the bottom-up handling of the derived tables: + i.e. most inner derived tables/views are handled first. This order is + required for the all phases except the merge and the create steps. + For the sake of code simplicity this order is kept for all phases. + + @return FALSE ok + @return TRUE error +*/ + +bool +mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases) +{ + bool res= FALSE; + uint8 allowed_phases= (derived->is_merged_derived() ? DT_PHASES_MERGE : + DT_PHASES_MATERIALIZE); + DBUG_ENTER("mysql_handle_single_derived"); + DBUG_PRINT("enter", ("phases: 0x%x allowed: 0x%x alias: '%s'", + phases, allowed_phases, + (derived->alias.str ? derived->alias.str : ""))); + if (!lex->derived_tables) + DBUG_RETURN(FALSE); + + if (derived->select_lex) + derived->select_lex->changed_elements|= TOUCHED_SEL_DERIVED; + else + DBUG_ASSERT(derived->prelocking_placeholder); + lex->thd->derived_tables_processing= TRUE; + + for (uint phase= 0; phase < DT_PHASES; phase++) + { + uint phase_flag= DT_INIT << phase; + if (phase_flag > phases) + break; + if (!(phases & phase_flag)) + continue; + /* Skip derived tables to which the phase isn't applicable. */ + if (phase_flag != DT_PREPARE && + !(allowed_phases & phase_flag)) + continue; + + if ((res= (*processors[phase])(lex->thd, lex, derived))) + break; + } + + lex->thd->derived_tables_processing= FALSE; + DBUG_RETURN(res); +} + + +/** + Merge a derived table/view into the embedding select + + @param thd thread handle + @param lex LEX of the embedding query. + @param derived reference to the derived table. + + @details + This function merges the given derived table / view into the parent select + construction. Any derived table/reference to view occurred in the FROM + clause of the embedding select is represented by a TABLE_LIST structure a + pointer to which is passed to the function as in the parameter 'derived'. + This structure contains the number/map, alias, a link to SELECT_LEX of the + derived table and other info. If the 'derived' table is used in a nested join + then additionally the structure contains a reference to the ON expression + for this join. + + The merge process results in elimination of the derived table (or the + reference to a view) such that: + - the FROM list of the derived table/view is wrapped into a nested join + after which the nest is added to the FROM list of the embedding select + - the WHERE condition of the derived table (view) is ANDed with the ON + condition attached to the table. + + @note + Tables are merged into the leaf_tables list, original derived table is removed + from this list also. SELECT_LEX::table_list list is left untouched. + Where expression is merged with derived table's on_expr and can be found after + the merge through the SELECT_LEX::table_list. + + Examples of the derived table/view merge: + + Schema: + Tables: t1(f1), t2(f2), t3(f3) + View v1: SELECT f1 FROM t1 WHERE f1 < 1 + + Example with a view: + Before merge: + + The query (Q1): SELECT f1,f2 FROM t2 LEFT JOIN v1 ON f1 = f2 + + (LEX of the main query) + | + (select_lex) + | + (FROM table list) + | + (join list)= t2, v1 + / \ + / (on_expr)= (f1 = f2) + | + (LEX of the v1 view) + | + (select_lex)= SELECT f1 FROM t1 WHERE f1 < 1 + + + After merge: + + The rewritten query Q1 (Q1'): + SELECT f1,f2 FROM t2 LEFT JOIN (t1) ON ((f1 = f2) and (f1 < 1)) + + (LEX of the main query) + | + (select_lex) + | + (FROM table list) + | + (join list)= t2, (t1) + \ + (on_expr)= (f1 = f2) and (f1 < 1) + + In this example table numbers are assigned as follows: + (outer select): t2 - 1, v1 - 2 + (inner select): t1 - 1 + After the merge table numbers will be: + (outer select): t2 - 1, t1 - 2 + + Example with a derived table: + The query Q2: + SELECT f1,f2 + FROM (SELECT f1 FROM t1, t3 WHERE f1=f3 and f1 < 1) tt, t2 + WHERE f1 = f2 + + Before merge: + (LEX of the main query) + | + (select_lex) + / \ + (FROM table list) (WHERE clause)= (f1 = f2) + | + (join list)= tt, t2 + / \ + / (on_expr)= (empty) + / + (select_lex)= SELECT f1 FROM t1, t3 WHERE f1 = f3 and f1 < 1 + + After merge: + + The rewritten query Q2 (Q2'): + SELECT f1,f2 + FROM (t1, t3) JOIN t2 ON (f1 = f3 and f1 < 1) + WHERE f1 = f2 + + (LEX of the main query) + | + (select_lex) + / \ + (FROM table list) (WHERE clause)= (f1 = f2) + | + (join list)= t2, (t1, t3) + \ + (on_expr)= (f1 = f3 and f1 < 1) + + In this example table numbers are assigned as follows: + (outer select): tt - 1, t2 - 2 + (inner select): t1 - 1, t3 - 2 + After the merge table numbers will be: + (outer select): t1 - 1, t2 - 2, t3 - 3 + + @return FALSE if derived table/view were successfully merged. + @return TRUE if an error occur. +*/ + +static +bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + bool res= FALSE; + SELECT_LEX *dt_select= derived->get_single_select(); + table_map map; + uint tablenr; + SELECT_LEX *parent_lex= derived->select_lex; + Query_arena *arena, backup; + DBUG_ENTER("mysql_derived_merge"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + const char *cause= NULL; + + if (derived->merged) + { + + DBUG_PRINT("info", ("Irreversibly merged: exit")); + DBUG_RETURN(FALSE); + } + + if (derived->dt_handler) + { + derived->change_refs_to_fields(); + derived->set_materialized_derived(); + DBUG_RETURN(FALSE); + } + + arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test + + if (!derived->merged_for_insert || + (derived->is_multitable() && + (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI))) + { + /* + Check whether there is enough free bits in table map to merge subquery. + If not - materialize it. This check isn't cached so when there is a big + and small subqueries, and the bigger one can't be merged it wouldn't + block the smaller one. + */ + if (parent_lex->get_free_table_map(&map, &tablenr) || + dt_select->leaf_tables.elements + tablenr > MAX_TABLES) + { + /* There is no enough table bits, fall back to materialization. */ + cause= "Not enough table bits to merge subquery"; + goto unconditional_materialization; + } + + if (dt_select->options & OPTION_SCHEMA_TABLE) + parent_lex->options |= OPTION_SCHEMA_TABLE; + + if (!derived->get_unit()->prepared) + { + dt_select->leaf_tables.empty(); + make_leaves_list(thd, dt_select->leaf_tables, derived, TRUE, 0); + } + + derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN)); + if (!derived->nested_join) + { + res= TRUE; + goto exit_merge; + } + + /* Merge derived table's subquery in the parent select. */ + if (parent_lex->merge_subquery(thd, derived, dt_select, tablenr, map)) + { + res= TRUE; + goto exit_merge; + } + + /* + exclude select lex so it doesn't show up in explain. + do this only for derived table as for views this is already done. + + From sql_view.cc + Add subqueries units to SELECT into which we merging current view. + unit(->next)* chain starts with subqueries that are used by this + view and continues with subqueries that are used by other views. + We must not add any subquery twice (otherwise we'll form a loop), + to do this we remember in end_unit the first subquery that has + been already added. + */ + derived->get_unit()->exclude_level(); + if (parent_lex->join) + parent_lex->join->table_count+= dt_select->join->table_count - 1; + } + derived->merged= TRUE; + if (derived->get_unit()->prepared) + { + Item *expr= derived->on_expr; + expr= and_conds(thd, expr, dt_select->join ? dt_select->join->conds : 0); + if (expr) + expr->top_level_item(); + + if (expr && (derived->prep_on_expr || expr != derived->on_expr)) + { + derived->on_expr= expr; + derived->prep_on_expr= expr->copy_andor_structure(thd); + } + thd->where= "on clause"; + if (derived->on_expr && + derived->on_expr->fix_fields_if_needed_for_bool(thd, &derived->on_expr)) + { + res= TRUE; /* purecov: inspected */ + goto exit_merge; + } + // Update used tables cache according to new table map + if (derived->on_expr) + { + derived->on_expr->fix_after_pullout(parent_lex, &derived->on_expr, + TRUE); + fix_list_after_tbl_changes(parent_lex, &derived->nested_join->join_list); + } + } + +exit_merge: + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(res); + +unconditional_materialization: + + if (unlikely(thd->trace_started())) + { + OPT_TRACE_VIEWS_TRANSFORM(thd,trace_wrapper, trace_derived, + derived->is_derived() ? "derived" : "view", + derived->alias.str ? derived->alias.str : "", + derived->get_unit()->first_select()->select_number, + "materialized"); + trace_derived.add("cause", cause); + } + + derived->change_refs_to_fields(); + derived->set_materialized_derived(); + if (!derived->table || !derived->table->is_created()) + res= mysql_derived_create(thd, lex, derived); + goto exit_merge; +} + + +/** + Merge a view for the embedding INSERT/UPDATE/DELETE + + @param thd thread handle + @param lex LEX of the embedding query. + @param derived reference to the derived table. + + @details + This function substitutes the derived table for the first table from + the query of the derived table thus making it a correct target table for the + INSERT/UPDATE/DELETE statements. As this operation is correct only for + single table views only, for multi table views this function does nothing. + The derived parameter isn't checked to be a view as derived tables aren't + allowed for INSERT/UPDATE/DELETE statements. + + @return FALSE if derived table/view were successfully merged. + @return TRUE if an error occur. +*/ + +static +bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + DBUG_ENTER("mysql_derived_merge_for_insert"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + DBUG_PRINT("info", ("merged_for_insert: %d is_materialized_derived: %d " + "is_multitable: %d single_table_updatable: %d " + "merge_underlying_list: %d", + derived->merged_for_insert, + derived->is_materialized_derived(), + derived->is_multitable(), + derived->single_table_updatable(), + derived->merge_underlying_list != 0)); + if (derived->merged_for_insert) + DBUG_RETURN(FALSE); + if (derived->init_derived(thd, FALSE)) + DBUG_RETURN(TRUE); + if (derived->is_materialized_derived()) + DBUG_RETURN(mysql_derived_prepare(thd, lex, derived)); + if ((thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI)) + DBUG_RETURN(FALSE); + if (!derived->is_multitable()) + { + if (!derived->single_table_updatable()) + DBUG_RETURN(derived->create_field_translation(thd)); + if (derived->merge_underlying_list) + { + derived->table= derived->merge_underlying_list->table; + derived->schema_table= derived->merge_underlying_list->schema_table; + derived->merged_for_insert= TRUE; + DBUG_ASSERT(derived->table); + } + } + DBUG_RETURN(FALSE); +} + + +/* + Initialize a derived table/view + + @param thd Thread handle + @param lex LEX of the embedding query. + @param derived reference to the derived table. + + @detail + Fill info about derived table/view without preparing an + underlying select. Such as: create a field translation for views, mark it as + a multitable if it is and so on. + + @return + false OK + true Error +*/ + +static +bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + SELECT_LEX_UNIT *unit= derived->get_unit(); + DBUG_ENTER("mysql_derived_init"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + + // Skip already prepared views/DT + if (!unit || unit->prepared) + DBUG_RETURN(FALSE); + + bool res= derived->init_derived(thd, TRUE); + + derived->updatable= derived->updatable && derived->is_view(); + + DBUG_RETURN(res); +} + + +/** + @brief + Prevent name resolution out of context of ON expressions in derived tables + + @param + join_list list of tables used in from list of a derived + + @details + The function sets the Name_resolution_context::outer_context to NULL + for all ON expressions contexts in the given join list. It does this + recursively for all nested joins the list contains. +*/ + +static void nullify_outer_context_for_on_clauses(List& join_list) +{ + List_iterator li(join_list); + while (TABLE_LIST *table= li++) + { + if (table->on_context) + table->on_context->outer_context= NULL; + if (table->nested_join) + nullify_outer_context_for_on_clauses(table->nested_join->join_list); + } +} + + +/* + Create temporary table structure (but do not fill it) + + @param thd Thread handle + @param lex LEX of the embedding query. + @param derived reference to the derived table. + + @detail + Prepare underlying select for a derived table/view. To properly resolve + names in the embedding query the TABLE structure is created. Actual table + is created later by the mysql_derived_create function. + + This function is called before any command containing derived table + is executed. All types of derived tables are handled by this function: + - Anonymous derived tables, or + - Named derived tables (aka views). + + The table reference, contained in @c derived, is updated with the + fields of a new temporary table. + Derived tables are stored in @c thd->derived_tables and closed by + close_thread_tables(). + + This function is part of the procedure that starts in + open_and_lock_tables(), a procedure that - among other things - introduces + new table and table reference objects (to represent derived tables) that + don't exist in the privilege database. This means that normal privilege + checking cannot handle them. Hence this function does some extra tricks in + order to bypass normal privilege checking, by exploiting the fact that the + current state of privilege verification is attached as GRANT_INFO structures + on the relevant TABLE and TABLE_REF objects. + + For table references, the current state of accrued access is stored inside + TABLE_LIST::grant. Hence this function must update the state of fulfilled + privileges for the new TABLE_LIST, an operation which is normally performed + exclusively by the table and database access checking functions, + check_access() and check_grant(), respectively. This modification is done + for both views and anonymous derived tables: The @c SELECT privilege is set + as fulfilled by the user. However, if a view is referenced and the table + reference is queried against directly (see TABLE_LIST::referencing_view), + the state of privilege checking (GRANT_INFO struct) is copied as-is to the + temporary table. + + Only the TABLE structure is created here, actual table is created by the + mysql_derived_create function. + + @note This function sets @c SELECT_ACL for @c TEMPTABLE views as well as + anonymous derived tables, but this is ok since later access checking will + distinguish between them. + + @see mysql_handle_derived(), mysql_derived_fill(), GRANT_INFO + + @return + false OK + true Error +*/ + +static +bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + SELECT_LEX_UNIT *unit= derived->get_unit(); + SELECT_LEX *first_select; + bool res= FALSE, keep_row_order; + DBUG_ENTER("mysql_derived_prepare"); + DBUG_PRINT("enter", ("unit: %p table_list: %p alias: '%s'", + unit, derived, derived->alias.str)); + if (!unit) + DBUG_RETURN(FALSE); + + first_select= unit->first_select(); + /* + If rownum() is used we have to preserve the insert row order + to make GROUP BY and ORDER BY with filesort work. + + SELECT * from (SELECT a,b from t1 ORDER BY a)) WHERE rownum <= 0; + + When rownum is not used the optimizer will skip the ORDER BY clause. + With rownum we have to keep the ORDER BY as this is what is expected. + We also have to create any sort result temporary table in such a way + that the inserted row order is maintained. + */ + keep_row_order= (thd->lex->with_rownum && + (first_select->group_list.elements || + first_select->order_list.elements)); + + if (derived->is_recursive_with_table() && + !derived->is_with_table_recursive_reference() && + !derived->with->rec_result && derived->with->get_sq_rec_ref()) + { + /* + This is a non-recursive reference to a recursive CTE whose + specification unit has not been prepared at the regular processing of + derived table references. This can happen only in the case when + the specification unit has no recursive references at the top level. + Force the preparation of the specification unit. Use a recursive + table reference from a subquery for this. + */ + DBUG_ASSERT(derived->with->get_sq_rec_ref()); + if (unlikely(mysql_derived_prepare(lex->thd, lex, + derived->with->get_sq_rec_ref()))) + DBUG_RETURN(TRUE); + } + + if (unit->prepared && derived->is_recursive_with_table() && + !derived->table) + { + /* + Here 'derived' is either a non-recursive table reference to a recursive + with table or a recursive table reference to a recursvive table whose + specification has been already prepared (a secondary recursive table + reference. + */ + if (!(derived->derived_result= new (thd->mem_root) select_unit(thd))) + DBUG_RETURN(TRUE); // out of memory + thd->create_tmp_table_for_derived= TRUE; + res= derived->derived_result->create_result_table( + thd, &unit->types, FALSE, + (first_select->options | + thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS), + &derived->alias, FALSE, FALSE, + keep_row_order, 0); + thd->create_tmp_table_for_derived= FALSE; + + if (likely(!res) && !derived->table) + { + derived->derived_result->set_unit(unit); + derived->table= derived->derived_result->table; + if (derived->is_with_table_recursive_reference()) + { + /* Here 'derived" is a secondary recursive table reference */ + unit->with_element->rec_result->rec_table_refs.push_back(derived); + } + } + DBUG_ASSERT(derived->table || res); + goto exit; + } + + // Skip already prepared views/DT + if (unit->prepared || + (derived->merged_for_insert && + !(derived->is_multitable() && + (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI)))) + { + /* + System versioned tables may still require to get versioning conditions + when modifying view (see vers_setup_conds()). Only UPDATE and DELETE are + affected because they use WHERE condition. + */ + if (!unit->prepared && + derived->table->versioned() && + derived->merge_underlying_list && + /* choose only those merged views that do not select from other views */ + !derived->merge_underlying_list->merge_underlying_list) + { + switch (thd->lex->sql_command) + { + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + if ((res= first_select->vers_setup_conds(thd, + derived->merge_underlying_list))) + goto exit; + if (derived->merge_underlying_list->where) + { + Query_arena_stmt on_stmt_arena(thd); + derived->where= and_items(thd, derived->where, + derived->merge_underlying_list->where); + } + default: + break; + } + } + DBUG_RETURN(FALSE); + } + + /* prevent name resolving out of derived table */ + for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select()) + { + // Prevent it for the WHERE clause + sl->context.outer_context= 0; + + // And for ON clauses, if there are any + nullify_outer_context_for_on_clauses(*sl->join_list); + + if (!derived->is_with_table_recursive_reference() || + (!derived->with->with_anchor && + !derived->with->is_with_prepared_anchor())) + { + /* + Prepare underlying views/DT first unless 'derived' is a recursive + table reference and either the anchors from the specification of + 'derived' has been already prepared or there no anchor in this + specification + */ + if ((res= sl->handle_derived(lex, DT_PREPARE))) + goto exit; + } + if (derived->outer_join && sl->first_cond_optimization) + { + /* Mark that table is part of OUTER JOIN and fields may be NULL */ + for (TABLE_LIST *cursor= (TABLE_LIST*) sl->table_list.first; + cursor; + cursor= cursor->next_local) + cursor->outer_join|= JOIN_TYPE_OUTER; + } + } + // Prevent it for possible ORDER BY clause + if (unit->fake_select_lex) + unit->fake_select_lex->context.outer_context= 0; + + if (unlikely(thd->trace_started())) + { + /* + Add to optimizer trace whether a derived table/view + is merged into the parent select or not. + */ + OPT_TRACE_VIEWS_TRANSFORM(thd, trace_wrapper, trace_derived, + derived->is_derived() ? "derived" : "view", + derived->alias.str ? derived->alias.str : "", + derived->get_unit()->first_select()->select_number, + derived->is_merged_derived() ? "merged" : "materialized"); + } + /* + Above cascade call of prepare is important for PS protocol, but after it + is called we can check if we really need prepare for this derived + */ + if (derived->merged) + { + DBUG_PRINT("info", ("Irreversibly merged: exit")); + DBUG_RETURN(FALSE); + } + + derived->fill_me= FALSE; + + if ((!derived->is_with_table_recursive_reference() || + !derived->derived_result) && + !(derived->derived_result= new (thd->mem_root) select_unit(thd))) + DBUG_RETURN(TRUE); // out of memory + + // st_select_lex_unit::prepare correctly work for single select + if ((res= unit->prepare(derived, derived->derived_result, 0))) + goto exit; + if (derived->with && + (res= derived->with->process_columns_of_derived_unit(thd, unit))) + goto exit; + if ((res= check_duplicate_names(thd, unit->types, 0))) + goto exit; + + /* + Check whether we can merge this derived table into main select. + Depending on the result field translation will or will not + be created. + */ + if (!derived->is_with_table_recursive_reference() && + derived->init_derived(thd, FALSE)) + goto exit; + + /* + Temp table is created so that it hounours if UNION without ALL is to be + processed + + As 'distinct' parameter we always pass FALSE (0), because underlying + query will control distinct condition by itself. Correct test of + distinct underlying query will be is_unit_op && + !unit->union_distinct->next_select() (i.e. it is union and last distinct + SELECT is last SELECT of UNION). + */ + thd->create_tmp_table_for_derived= TRUE; + if (!(derived->table) && + derived->derived_result->create_result_table(thd, &unit->types, FALSE, + (first_select->options | + thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS), + &derived->alias, + FALSE, FALSE, keep_row_order, + 0)) + { + thd->create_tmp_table_for_derived= FALSE; + goto exit; + } + thd->create_tmp_table_for_derived= FALSE; + + if (!derived->table) + derived->table= derived->derived_result->table; + DBUG_ASSERT(derived->table); + if (derived->is_derived() && derived->is_merged_derived()) + first_select->mark_as_belong_to_derived(derived); + + derived->dt_handler= derived->find_derived_handler(thd); + if (derived->dt_handler) + { + char query_buff[4096]; + String derived_query(query_buff, sizeof(query_buff), thd->charset()); + derived_query.length(0); + derived->derived->print(&derived_query, + enum_query_type(QT_VIEW_INTERNAL | + QT_ITEM_ORIGINAL_FUNC_NULLIF | + QT_PARSABLE)); + if (!thd->make_lex_string(&derived->derived_spec, + derived_query.ptr(), derived_query.length())) + { + delete derived->dt_handler; + derived->dt_handler= NULL; + } + } + +exit: + /* Hide "Unknown column" or "Unknown function" error */ + if (derived->view) + { + if (thd->is_error() && + (thd->get_stmt_da()->sql_errno() == ER_BAD_FIELD_ERROR || + thd->get_stmt_da()->sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION || + thd->get_stmt_da()->sql_errno() == ER_SP_DOES_NOT_EXIST)) + { + thd->clear_error(); + my_error(ER_VIEW_INVALID, MYF(0), derived->db.str, + derived->table_name.str); + } + } + + /* + if it is preparation PS only or commands that need only VIEW structure + then we do not need real data and we can skip execution (and parameters + is not defined, too) + */ + if (res) + { + if (!derived->is_with_table_recursive_reference()) + { + if (derived->table && derived->table->s->tmp_table) + free_tmp_table(thd, derived->table); + delete derived->derived_result; + } + } + else + { + TABLE *table= derived->table; + table->derived_select_number= first_select->select_number; + table->s->tmp_table= INTERNAL_TMP_TABLE; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (derived->is_view()) + table->grant= derived->grant; + else + { + DBUG_ASSERT(derived->is_derived()); + DBUG_ASSERT(derived->is_anonymous_derived_table()); + table->grant.privilege= SELECT_ACL; + derived->grant.privilege= SELECT_ACL; + } +#endif + /* Add new temporary table to list of open derived tables */ + if (!derived->is_with_table_recursive_reference()) + { + table->next= thd->derived_tables; + thd->derived_tables= table; + } + + /* If table is used by a left join, mark that any column may be null */ + if (derived->outer_join) + table->maybe_null= 1; + } + DBUG_RETURN(res); +} + + +/** + Runs optimize phase for a derived table/view. + + @param thd thread handle + @param lex LEX of the embedding query. + @param derived reference to the derived table. + + @details + Runs optimize phase for given 'derived' derived table/view. + If optimizer finds out that it's of the type "SELECT a_constant" then this + functions also materializes it. + + @return FALSE ok. + @return TRUE if an error occur. +*/ + +static +bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + SELECT_LEX_UNIT *unit= derived->get_unit(); + SELECT_LEX *first_select= unit->first_select(); + SELECT_LEX *save_current_select= lex->current_select; + bool res= FALSE; + DBUG_ENTER("mysql_derived_optimize"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + if (derived->merged) + { + DBUG_PRINT("info", ("Irreversibly merged: exit")); + DBUG_RETURN(FALSE); + } + + if (derived->is_materialized_derived() && derived->dt_handler) + { + /* Create an object for execution of the query specifying the table */ + if (!(derived->pushdown_derived= + new (thd->mem_root) Pushdown_derived(derived, derived->dt_handler))) + DBUG_RETURN(TRUE); + } + + lex->current_select= first_select; + + if (unit->is_unit_op()) + { + if (unit->optimized) + DBUG_RETURN(FALSE); + // optimize union without execution + res= unit->optimize(); + } + else if (unit->derived) + { + if (!derived->is_merged_derived()) + { + JOIN *join= first_select->join; + unit->set_limit(unit->global_parameters()); + if (join && + join->optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE && + join->with_two_phase_optimization) + { + if (unit->optimized_2) + DBUG_RETURN(FALSE); + unit->optimized_2= TRUE; + } + else + { + if (unit->optimized) + DBUG_RETURN(FALSE); + unit->optimized= TRUE; + if (!join) + { + /* + This happens when derived is used in SELECT for which + zer_result_cause != 0. + In this case join is already destroyed. + */ + DBUG_RETURN(FALSE); + } + } + if ((res= join->optimize())) + goto err; + if (join->table_count == join->const_tables) + derived->fill_me= TRUE; + } + } + /* + Materialize derived tables/views of the "SELECT a_constant" type. + Such tables should be materialized at the optimization phase for + correct constant evaluation. + */ + if (!res && derived->fill_me && !derived->merged_for_insert) + { + if (derived->is_merged_derived()) + { + derived->change_refs_to_fields(); + derived->set_materialized_derived(); + } + if ((res= mysql_derived_create(thd, lex, derived))) + goto err; + if ((res= mysql_derived_fill(thd, lex, derived))) + goto err; + } +err: + lex->current_select= save_current_select; + DBUG_RETURN(res); +} + + +/** + Actually create result table for a materialized derived table/view. + + @param thd thread handle + @param lex LEX of the embedding query. + @param derived reference to the derived table. + + @details + This function actually creates the result table for given 'derived' + table/view, but it doesn't fill it. + 'thd' and 'lex' parameters are not used by this function. + + @return FALSE ok. + @return TRUE if an error occur. +*/ + +static +bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + DBUG_ENTER("mysql_derived_create"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + TABLE *table= derived->table; + SELECT_LEX_UNIT *unit= derived->get_unit(); + + if (table->is_created()) + DBUG_RETURN(FALSE); + select_unit *result= derived->derived_result; + if (table->s->db_type() == TMP_ENGINE_HTON) + { + result->tmp_table_param.keyinfo= table->s->key_info; + if (create_internal_tmp_table(table, result->tmp_table_param.keyinfo, + result->tmp_table_param.start_recinfo, + &result->tmp_table_param.recinfo, + (unit->first_select()->options | + thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS))) + DBUG_RETURN(TRUE); + } + if (open_tmp_table(table)) + DBUG_RETURN(TRUE); + table->file->extra(HA_EXTRA_WRITE_CACHE); + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + DBUG_RETURN(FALSE); +} + + +void TABLE_LIST::register_as_derived_with_rec_ref(With_element *rec_elem) +{ + rec_elem->derived_with_rec_ref.link_in_list(this, &this->next_with_rec_ref); + is_derived_with_recursive_reference= true; + get_unit()->uncacheable|= UNCACHEABLE_DEPENDENT; +} + + +bool TABLE_LIST::is_nonrecursive_derived_with_rec_ref() +{ + return is_derived_with_recursive_reference; +} + + +/** + @brief + Fill the recursive with table + + @param thd The thread handle + + @details + The method is called only for recursive with tables. + The method executes the recursive part of the specification + of this with table until no more rows are added to the table + or the number of the performed iteration reaches the allowed + maximum. + + @retval + false on success + true on failure +*/ + +bool TABLE_LIST::fill_recursive(THD *thd) +{ + bool rc= false; + st_select_lex_unit *unit= get_unit(); + rc= with->instantiate_tmp_tables(); + while (!rc && !with->all_are_stabilized()) + { + if (with->level > thd->variables.max_recursive_iterations) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_QUERY_RESULT_INCOMPLETE, + ER_THD(thd, ER_QUERY_RESULT_INCOMPLETE), + "max_recursive_iterations =", + (ulonglong)thd->variables.max_recursive_iterations); + break; + } + with->prepare_for_next_iteration(); + rc= unit->exec_recursive(); + } + if (!rc) + { + TABLE *src= with->rec_result->table; + rc =src->insert_all_rows_into_tmp_table(thd, + table, + &with->rec_result->tmp_table_param, + true); + } + return rc; +} + + +/* + Execute subquery of a materialized derived table/view and fill the result + table. + + @param thd Thread handle + @param lex LEX for this thread + @param derived reference to the derived table. + + @details + Execute subquery of given 'derived' table/view and fill the result + table. After result table is filled, if this is not the EXPLAIN statement + and the table is not specified with a recursion the entire unit / node + is deleted. unit is deleted if UNION is used for derived table and node + is deleted is it is a simple SELECT. + 'lex' is unused and 'thd' is passed as an argument to an underlying function. + + @note + If you use this function, make sure it's not called at prepare. + Due to evaluation of LIMIT clause it can not be used at prepared stage. + + @return FALSE OK + @return TRUE Error +*/ + +static +bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + Field_iterator_table field_iterator; + SELECT_LEX_UNIT *unit= derived->get_unit(); + bool derived_is_recursive= derived->is_recursive_with_table(); + bool res= FALSE; + DBUG_ENTER("mysql_derived_fill"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + + if (unit->executed && !unit->uncacheable && !unit->describe && + !derived_is_recursive) + DBUG_RETURN(FALSE); + /*check that table creation passed without problems. */ + DBUG_ASSERT(derived->table && derived->table->is_created()); + select_unit *derived_result= derived->derived_result; + SELECT_LEX *save_current_select= lex->current_select; + + if (derived->pushdown_derived) + { + int res; + if (unit->executed) + DBUG_RETURN(FALSE); + /* Execute the query that specifies the derived table by a foreign engine */ + res= derived->pushdown_derived->execute(); + unit->executed= true; + DBUG_RETURN(res); + } + + if (unit->executed && !derived_is_recursive && + (unit->uncacheable & UNCACHEABLE_DEPENDENT)) + { + if ((res= derived->table->file->ha_delete_all_rows())) + goto err; + JOIN *join= unit->first_select()->join; + join->first_record= false; + for (uint i= join->top_join_tab_count; + i < join->top_join_tab_count + join->aggr_tables; + i++) + { + if ((res= join->join_tab[i].table->file->ha_delete_all_rows())) + goto err; + } + } + + if (derived_is_recursive) + { + if (derived->is_with_table_recursive_reference()) + { + /* Here only one iteration step is performed */ + res= unit->exec_recursive(); + } + else + { + /* In this case all iteration are performed */ + res= derived->fill_recursive(thd); + } + } + else if (unit->is_unit_op()) + { + // execute union without clean up + res= unit->exec(); + } + else + { + SELECT_LEX *first_select= unit->first_select(); + unit->set_limit(unit->global_parameters()); + if (unit->lim.is_unlimited()) + first_select->options&= ~OPTION_FOUND_ROWS; + + lex->current_select= first_select; + res= mysql_select(thd, + first_select->table_list.first, + first_select->item_list, first_select->where, + (first_select->order_list.elements+ + first_select->group_list.elements), + first_select->order_list.first, + first_select->group_list.first, + first_select->having, (ORDER*) NULL, + (first_select->options |thd->variables.option_bits | + SELECT_NO_UNLOCK), + derived_result, unit, first_select); + } + + if (!res && !derived_is_recursive) + { + if (derived_result->flush()) + res= TRUE; + unit->executed= TRUE; + + if (derived->field_translation) + { + /* reset translation table to materialized table */ + field_iterator.set_table(derived->table); + for (uint i= 0; + !field_iterator.end_of_fields(); + field_iterator.next(), i= i + 1) + { + Item *item; + + if (!(item= field_iterator.create_item(thd))) + { + res= TRUE; + break; + } + thd->change_item_tree(&derived->field_translation[i].item, item); + } + } + } +err: + if (res || (!derived_is_recursive && !lex->describe && !unit->uncacheable)) + unit->cleanup(); + lex->current_select= save_current_select; + + DBUG_RETURN(res); +} + + +/** + Re-initialize given derived table/view for the next execution. + + @param thd thread handle + @param lex LEX for this thread + @param derived reference to the derived table. + + @details + Re-initialize given 'derived' table/view for the next execution. + All underlying views/derived tables are recursively reinitialized prior + to re-initialization of given derived table. + 'thd' and 'lex' are passed as arguments to called functions. + + @return FALSE OK + @return TRUE Error +*/ + +static +bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived) +{ + DBUG_ENTER("mysql_derived_reinit"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (derived->alias.str ? derived->alias.str : ""), + derived->get_unit())); + st_select_lex_unit *unit= derived->get_unit(); + + derived->merged_for_insert= FALSE; + unit->unclean(); + unit->types.empty(); + /* for derived tables & PS (which can't be reset by Item_subselect) */ + unit->reinit_exec_mechanism(); + unit->set_thd(thd); + DBUG_RETURN(FALSE); +} + + +/* + @brief + Given condition cond and transformer+argument, try transforming as many + conjuncts as possible. + + @detail + The motivation of this function is to convert the condition that's being + pushed into a WHERE clause with derived_field_transformer_for_where or + with derived_grouping_field_transformer_for_where. + The transformer may fail for some sub-condition, in this case we want to + convert the most restrictive part of the condition that can be pushed. + + This function only does it for top-level AND: conjuncts that could not be + converted are dropped. + + @return + Converted condition, or NULL if nothing could be converted +*/ + +Item *transform_condition_or_part(THD *thd, + Item *cond, + Item_transformer transformer, + uchar *arg) +{ + if (cond->type() != Item::COND_ITEM || + ((Item_cond*) cond)->functype() != Item_func::COND_AND_FUNC) + { + Item *new_item= cond->transform(thd, transformer, arg); + // Indicate that the condition is not pushable + if (!new_item) + cond->clear_extraction_flag(); + return new_item; + } + + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *new_item= item->transform(thd, transformer, arg); + if (!new_item) + { + // Indicate that the condition is not pushable + item->clear_extraction_flag(); + li.remove(); + } + else + li.replace(new_item); + } + + switch (((Item_cond*) cond)->argument_list()->elements) + { + case 0: + return NULL; + case 1: + return ((Item_cond*) cond)->argument_list()->head(); + default: + return cond; + } +} + + +/** + @brief + Extract condition that can be pushed into a derived table/view + + @param thd the thread handle + @param cond current condition + @param derived the reference to the derived table/view + + @details + This function builds the most restrictive condition depending only on + the derived table/view (directly or indirectly through equality) that + can be extracted from the given condition cond and pushes it into the + derived table/view. + + Example of the transformation: + + SELECT * + FROM t1, + ( + SELECT x,MAX(y) AS max_y + FROM t2 + GROUP BY x + ) AS d_tab + WHERE d_tab.x>1 AND d_tab.max_y<30; + + => + + SELECT * + FROM t1, + ( + SELECT x,z,MAX(y) AS max_y + FROM t2 + WHERE x>1 + HAVING max_y<30 + GROUP BY x + ) AS d_tab + WHERE d_tab.x>1 AND d_tab.max_y<30; + + In details: + 1. Check what pushable formula can be extracted from cond + 2. Build a clone PC of the formula that can be extracted + (the clone is built only if the extracted formula is a AND subformula + of cond or conjunction of such subformulas) + Do for every select specifying derived table/view: + 3. If there is no HAVING clause prepare PC to be conjuncted with + WHERE clause of the select. Otherwise do 4-7. + 4. Check what formula PC_where can be extracted from PC to be pushed + into the WHERE clause of the select + 5. Build PC_where and if PC_where is a conjunct(s) of PC remove it from PC + getting PC_having + 6. Prepare PC_where to be conjuncted with the WHERE clause of the select + 7. Prepare PC_having to be conjuncted with the HAVING clause of the select + @note + This method is similar to pushdown_cond_for_in_subquery() + + @retval TRUE if an error occurs + @retval FALSE otherwise +*/ + +bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived) +{ + DBUG_ENTER("pushdown_cond_for_derived"); + if (!cond) + DBUG_RETURN(false); + + st_select_lex_unit *unit= derived->get_unit(); + st_select_lex *first_sl= unit->first_select(); + st_select_lex *sl= first_sl; + + if (derived->prohibit_cond_pushdown) + DBUG_RETURN(false); + + /* Do not push conditions into constant derived */ + if (unit->executed) + DBUG_RETURN(false); + + /* Do not push conditions into recursive with tables */ + if (derived->is_recursive_with_table()) + DBUG_RETURN(false); + + /* Do not push conditions into unit with global ORDER BY ... LIMIT */ + if (unit->fake_select_lex && + unit->fake_select_lex->limit_params.explicit_limit) + DBUG_RETURN(false); + + /* Check whether any select of 'unit' allows condition pushdown */ + bool some_select_allows_cond_pushdown= false; + for (; sl; sl= sl->next_select()) + { + if (sl->cond_pushdown_is_allowed()) + { + some_select_allows_cond_pushdown= true; + break; + } + } + if (!some_select_allows_cond_pushdown) + DBUG_RETURN(false); + + /* 1. Check what pushable formula can be extracted from cond */ + Item *extracted_cond; + cond->check_pushable_cond(&Item::pushable_cond_checker_for_derived, + (uchar *)(&derived->table->map)); + /* 2. Build a clone PC of the formula that can be extracted */ + extracted_cond= + cond->build_pushable_cond(thd, + &Item::pushable_equality_checker_for_derived, + ((uchar *)&derived->table->map)); + if (!extracted_cond) + { + /* Nothing can be pushed into the derived table */ + DBUG_RETURN(false); + } + + st_select_lex *save_curr_select= thd->lex->current_select; + for (; sl; sl= sl->next_select()) + { + Item *extracted_cond_copy; + if (!sl->cond_pushdown_is_allowed()) + continue; + /* + For each select of the unit except the last one + create a clone of extracted_cond + */ + extracted_cond_copy= !sl->next_select() ? + extracted_cond : + extracted_cond->build_clone(thd); + if (!extracted_cond_copy) + continue; + + /* + Rename the columns of all non-first selects of a union to be compatible + by names with the columns of the first select. It will allow to use copies + of the same expression pushed into having clauses of different selects. + */ + if (sl != first_sl) + { + DBUG_ASSERT(sl->item_list.elements == first_sl->item_list.elements); + List_iterator_fast it(sl->item_list); + List_iterator_fast nm_it(unit->types); + while (Item *item= it++) + item->share_name_with(nm_it++); + } + + /* Collect fields that are used in the GROUP BY of sl */ + if (sl->have_window_funcs()) + { + if (sl->group_list.first || sl->join->implicit_grouping) + continue; + ORDER *common_partition_fields= + sl->find_common_window_func_partition_fields(thd); + if (!common_partition_fields) + continue; + sl->collect_grouping_fields_for_derived(thd, common_partition_fields); + } + else + sl->collect_grouping_fields_for_derived(thd, sl->group_list.first); + + Item *remaining_cond= NULL; + /* Do 4-6 */ + sl->pushdown_cond_into_where_clause(thd, extracted_cond_copy, + &remaining_cond, + &Item::derived_field_transformer_for_where, + (uchar *) sl); + + if (!remaining_cond) + continue; + /* + 7. Prepare PC_having to be conjuncted with the HAVING clause of + the select + */ + remaining_cond= + remaining_cond->transform(thd, + &Item::derived_field_transformer_for_having, + (uchar *) sl); + if (!remaining_cond) + continue; + + if (remaining_cond->walk(&Item::cleanup_excluding_const_fields_processor, + 0, 0)) + continue; + + mark_or_conds_to_avoid_pushdown(remaining_cond); + + sl->cond_pushed_into_having= remaining_cond; + } + thd->lex->current_select= save_curr_select; + DBUG_RETURN(false); +} + + +/** + @brief + Look for provision of the derived_handler interface by a foreign engine + + @param thd The thread handler + + @details + The function looks through its tables of the query that specifies this + derived table searching for a table whose handlerton owns a + create_derived call-back function. If the call of this function returns + a derived_handler interface object then the server will push the query + specifying the derived table into this engine. + This is a responsibility of the create_derived call-back function to + check whether the engine can execute the query. + + @retval the found derived_handler if the search is successful + 0 otherwise +*/ + +derived_handler *TABLE_LIST::find_derived_handler(THD *thd) +{ + if (!derived || is_recursive_with_table()) + return 0; + for (SELECT_LEX *sl= derived->first_select(); sl; sl= sl->next_select()) + { + if (!(sl->join)) + continue; + for (TABLE_LIST *tbl= sl->join->tables_list; tbl; tbl= tbl->next_local) + { + if (!tbl->table) + continue; + handlerton *ht= tbl->table->file->partition_ht(); + if (!ht->create_derived) + continue; + derived_handler *dh= ht->create_derived(thd, this); + if (dh) + { + dh->set_derived(this); + return dh; + } + } + } + return 0; +} + + +TABLE_LIST *TABLE_LIST::get_first_table() +{ + for (SELECT_LEX *sl= derived->first_select(); sl; sl= sl->next_select()) + { + if (!(sl->join)) + continue; + for (TABLE_LIST *tbl= sl->join->tables_list; tbl; tbl= tbl->next_local) + { + if (!tbl->table) + continue; + return tbl; + } + } + return 0; +} diff --git a/sql/sql_derived.h b/sql/sql_derived.h new file mode 100644 index 00000000..6100b4b4 --- /dev/null +++ b/sql/sql_derived.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DERIVED_INCLUDED +#define SQL_DERIVED_INCLUDED + +struct TABLE_LIST; +class THD; +struct LEX; + +bool mysql_handle_derived(LEX *lex, uint phases); +bool mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases); + +Item *transform_condition_or_part(THD *thd, + Item *cond, + Item_transformer transformer, + uchar *arg); + +bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived); + +#endif /* SQL_DERIVED_INCLUDED */ diff --git a/sql/sql_digest.cc b/sql/sql_digest.cc new file mode 100644 index 00000000..36a6b398 --- /dev/null +++ b/sql/sql_digest.cc @@ -0,0 +1,688 @@ +/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This code needs extra visibility in the lexer structures +*/ + +#include "mariadb.h" +#include "my_md5.h" +#include "unireg.h" + +#include "sql_string.h" +#include "sql_class.h" +#include "sql_lex.h" +#include "sp_pcontext.h" +#include "sql_digest.h" +#include "sql_digest_stream.h" + +#include "sql_get_diagnostics.h" + +/* Generated code */ +#include "yy_mariadb.hh" +#define LEX_TOKEN_WITH_DEFINITION +#include "lex_token.h" + +/* Name pollution from sql/sql_lex.h */ +#ifdef LEX_YYSTYPE +#undef LEX_YYSTYPE +#endif + +#define LEX_YYSTYPE YYSTYPE* + +#define SIZE_OF_A_TOKEN 2 + +/** + Read a single token from token array. +*/ +inline uint read_token(const sql_digest_storage *digest_storage, + uint index, uint *tok) +{ + uint safe_byte_count= digest_storage->m_byte_count; + + if (index + SIZE_OF_A_TOKEN <= safe_byte_count && + safe_byte_count <= digest_storage->m_token_array_length) + { + const unsigned char *src= & digest_storage->m_token_array[index]; + *tok= src[0] | (src[1] << 8); + return index + SIZE_OF_A_TOKEN; + } + + /* The input byte stream is exhausted. */ + *tok= 0; + return MAX_DIGEST_STORAGE_SIZE + 1; +} + +/** + Store a single token in token array. +*/ +inline void store_token(sql_digest_storage* digest_storage, uint token) +{ + DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length); + + if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length) + { + unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count]; + dest[0]= token & 0xff; + dest[1]= (token >> 8) & 0xff; + digest_storage->m_byte_count+= SIZE_OF_A_TOKEN; + } + else + { + digest_storage->m_full= true; + } +} + +/** + Read an identifier from token array. +*/ +inline uint read_identifier(const sql_digest_storage* digest_storage, + uint index, char ** id_string, int *id_length) +{ + uint new_index; + uint safe_byte_count= digest_storage->m_byte_count; + + DBUG_ASSERT(index <= safe_byte_count); + DBUG_ASSERT(safe_byte_count <= digest_storage->m_token_array_length); + + /* + token + length + string are written in an atomic way, + so we do always expect a length + string here + */ + + uint bytes_needed= SIZE_OF_A_TOKEN; + /* If we can read token and identifier length */ + if ((index + bytes_needed) <= safe_byte_count) + { + const unsigned char *src= & digest_storage->m_token_array[index]; + /* Read the length of identifier */ + uint length= src[0] | (src[1] << 8); + bytes_needed+= length; + /* If we can read entire identifier from token array */ + if ((index + bytes_needed) <= safe_byte_count) + { + *id_string= (char *) (src + 2); + *id_length= length; + + new_index= index + bytes_needed; + DBUG_ASSERT(new_index <= safe_byte_count); + return new_index; + } + } + + /* The input byte stream is exhausted. */ + return MAX_DIGEST_STORAGE_SIZE + 1; +} + +/** + Store an identifier in token array. +*/ +inline void store_token_identifier(sql_digest_storage* digest_storage, + uint token, + size_t id_length, const char *id_name) +{ + DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length); + + size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length; + if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length) + { + unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count]; + /* Write the token */ + dest[0]= token & 0xff; + dest[1]= (token >> 8) & 0xff; + /* Write the string length */ + dest[2]= id_length & 0xff; + dest[3]= (id_length >> 8) & 0xff; + /* Write the string data */ + if (id_length > 0) + memcpy((char *)(dest + 4), id_name, id_length); + digest_storage->m_byte_count+= (uint)bytes_needed; + } + else + { + digest_storage->m_full= true; + } +} + +void compute_digest_md5(const sql_digest_storage *digest_storage, unsigned char *md5) +{ + compute_md5_hash(md5, + (const char *) digest_storage->m_token_array, + digest_storage->m_byte_count); +} + +/* + Iterate token array and updates digest_text. +*/ +void compute_digest_text(const sql_digest_storage* digest_storage, + String *digest_text) +{ + DBUG_ASSERT(digest_storage != NULL); + uint byte_count= digest_storage->m_byte_count; + String *digest_output= digest_text; + uint tok= 0; + uint current_byte= 0; + lex_token_string *tok_data; + + /* Reset existing data */ + digest_output->length(0); + + if (byte_count > digest_storage->m_token_array_length) + { + digest_output->append('\0'); + return; + } + + /* Convert text to utf8 */ + const CHARSET_INFO *from_cs= get_charset(digest_storage->m_charset_number, MYF(0)); + const CHARSET_INFO *to_cs= &my_charset_utf8mb3_bin; + + if (from_cs == NULL) + { + /* + Can happen, as we do dirty reads on digest_storage, + which can be written to in another thread. + */ + digest_output->append('\0'); + return; + } + + char id_buffer[NAME_LEN + 1]= {'\0'}; + char *id_string; + size_t id_length; + bool convert_text= !my_charset_same(from_cs, to_cs); + + while (current_byte < byte_count) + { + current_byte= read_token(digest_storage, current_byte, &tok); + + if (tok <= 0 || tok >= array_elements(lex_token_array) + || current_byte > max_digest_length) + return; + + tok_data= &lex_token_array[tok]; + + switch (tok) + { + /* All identifiers are printed with their name. */ + case IDENT: + case IDENT_QUOTED: + case TOK_IDENT: + { + char *id_ptr= NULL; + int id_len= 0; + uint err_cs= 0; + + /* Get the next identifier from the storage buffer. */ + current_byte= read_identifier(digest_storage, current_byte, + &id_ptr, &id_len); + if (current_byte > max_digest_length) + return; + + if (convert_text) + { + /* Verify that the converted text will fit. */ + if (to_cs->mbmaxlen*id_len > NAME_LEN) + { + digest_output->append("...", 3); + break; + } + /* Convert identifier string into the storage character set. */ + id_length= my_convert(id_buffer, NAME_LEN, to_cs, + id_ptr, id_len, from_cs, &err_cs); + id_string= id_buffer; + } + else + { + id_string= id_ptr; + id_length= id_len; + } + + if (id_length == 0 || err_cs != 0) + { + break; + } + /* Copy the converted identifier into the digest string. */ + digest_output->append('`'); + if (id_length > 0) + digest_output->append(id_string, id_length); + digest_output->append("` ", 2); + } + break; + + /* Everything else is printed as is. */ + default: + /* + Make sure not to overflow digest_text buffer. + +1 is to make sure extra space for ' '. + */ + int tok_length= tok_data->m_token_length; + + digest_output->append(tok_data->m_token_string, tok_length); + if (tok_data->m_append_space) + digest_output->append(' '); + break; + } + } +} + +static inline uint peek_token(const sql_digest_storage *digest, uint index) +{ + uint token; + DBUG_ASSERT(index + SIZE_OF_A_TOKEN <= digest->m_byte_count); + DBUG_ASSERT(digest->m_byte_count <= digest->m_token_array_length); + + token= ((digest->m_token_array[index + 1])<<8) | digest->m_token_array[index]; + return token; +} + +/** + Function to read last two tokens from token array. If an identifier + is found, do not look for token before that. +*/ +static inline void peek_last_two_tokens(const sql_digest_storage* digest_storage, + uint last_id_index, uint *t1, uint *t2) +{ + uint byte_count= digest_storage->m_byte_count; + uint peek_index= byte_count; + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take last token. */ + peek_index-= SIZE_OF_A_TOKEN; + *t1= peek_token(digest_storage, peek_index); + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take 2nd token from last. */ + peek_index-= SIZE_OF_A_TOKEN; + *t2= peek_token(digest_storage, peek_index); + } + else + { + *t2= TOK_UNUSED; + } + } + else + { + *t1= TOK_UNUSED; + *t2= TOK_UNUSED; + } +} + +/** + Function to read last three tokens from token array. If an identifier + is found, do not look for token before that. +*/ +static inline void peek_last_three_tokens(const sql_digest_storage* digest_storage, + uint last_id_index, uint *t1, uint *t2, uint *t3) +{ + uint byte_count= digest_storage->m_byte_count; + uint peek_index= byte_count; + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take last token. */ + peek_index-= SIZE_OF_A_TOKEN; + *t1= peek_token(digest_storage, peek_index); + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take 2nd token from last. */ + peek_index-= SIZE_OF_A_TOKEN; + *t2= peek_token(digest_storage, peek_index); + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take 3rd token from last. */ + peek_index-= SIZE_OF_A_TOKEN; + *t3= peek_token(digest_storage, peek_index); + } + else + { + *t3= TOK_UNUSED; + } + } + else + { + *t2= TOK_UNUSED; + *t3= TOK_UNUSED; + } + } + else + { + *t1= TOK_UNUSED; + *t2= TOK_UNUSED; + *t3= TOK_UNUSED; + } +} + +sql_digest_state* digest_add_token(sql_digest_state *state, + uint token, + LEX_YYSTYPE yylval) +{ + sql_digest_storage *digest_storage= NULL; + + digest_storage= &state->m_digest_storage; + + /* + Stop collecting further tokens if digest storage is full or + if END token is received. + */ + if (digest_storage->m_full || token == END_OF_INPUT) + return NULL; + + /* + Take last_token 2 tokens collected till now. These tokens will be used + in reduce for normalisation. Make sure not to consider ID tokens in reduce. + */ + uint last_token; + uint last_token2; + + switch (token) + { + case NUM: + case LONG_NUM: + case ULONGLONG_NUM: + case DECIMAL_NUM: + case FLOAT_NUM: + case BIN_NUM: + case HEX_NUM: + { + bool found_unary; + do + { + found_unary= false; + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token == '-') || (last_token == '+')) + { + /* + We need to differentiate: + - a operator + - a operator + from + - a operator + - a operator + to only reduce "a = -1" to "a = ?", and not change "b - 1" to "b ?" + + Binary operators are found inside an expression, + while unary operators are found at the beginning of an expression, or after operators. + + To achieve this, every token that is followed by an expression + in the SQL grammar is flagged. + See sql/sql_yacc.yy + See sql/gen_lex_token.cc + + For example, + "(-1)" is parsed as "(", "-", NUM, ")", and lex_token_array["("].m_start_expr is true, + so reduction of the "-" NUM is done, the result is "(?)". + "(a-1)" is parsed as "(", ID, "-", NUM, ")", and lex_token_array[ID].m_start_expr is false, + so the operator is binary, no reduction is done, and the result is "(a-?)". + */ + if (lex_token_array[last_token2].m_start_expr) + { + /* + REDUCE: + TOK_GENERIC_VALUE := (UNARY_PLUS | UNARY_MINUS) (NUM | LOG_NUM | ... | FLOAT_NUM) + + REDUCE: + TOK_GENERIC_VALUE := (UNARY_PLUS | UNARY_MINUS) TOK_GENERIC_VALUE + */ + token= TOK_GENERIC_VALUE; + digest_storage->m_byte_count-= SIZE_OF_A_TOKEN; + found_unary= true; + } + } + } while (found_unary); + } + /* for case NULL_SYM below */ + /* fall through */ + case LEX_HOSTNAME: + case TEXT_STRING: + case NCHAR_STRING: + case PARAM_MARKER: + { + /* + REDUCE: + TOK_GENERIC_VALUE := BIN_NUM | DECIMAL_NUM | ... | ULONGLONG_NUM + */ + token= TOK_GENERIC_VALUE; + + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token2 == TOK_GENERIC_VALUE || + last_token2 == TOK_GENERIC_VALUE_LIST) && + (last_token == ',')) + { + /* + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE ',' TOK_GENERIC_VALUE + + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE_LIST ',' TOK_GENERIC_VALUE + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_GENERIC_VALUE_LIST; + } + /* + Add this token or the resulting reduce to digest storage. + */ + store_token(digest_storage, token); + break; + } + case ')': + { + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if (last_token == TOK_GENERIC_VALUE && + last_token2 == '(') + { + /* + REDUCE: + TOK_ROW_SINGLE_VALUE := + '(' TOK_GENERIC_VALUE ')' + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_SINGLE_VALUE; + + /* Read last two tokens again */ + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token2 == TOK_ROW_SINGLE_VALUE || + last_token2 == TOK_ROW_SINGLE_VALUE_LIST) && + (last_token == ',')) + { + /* + REDUCE: + TOK_ROW_SINGLE_VALUE_LIST := + TOK_ROW_SINGLE_VALUE ',' TOK_ROW_SINGLE_VALUE + + REDUCE: + TOK_ROW_SINGLE_VALUE_LIST := + TOK_ROW_SINGLE_VALUE_LIST ',' TOK_ROW_SINGLE_VALUE + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_SINGLE_VALUE_LIST; + } + } + else if (last_token == TOK_GENERIC_VALUE_LIST && + last_token2 == '(') + { + /* + REDUCE: + TOK_ROW_MULTIPLE_VALUE := + '(' TOK_GENERIC_VALUE_LIST ')' + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_MULTIPLE_VALUE; + + /* Read last two tokens again */ + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token2 == TOK_ROW_MULTIPLE_VALUE || + last_token2 == TOK_ROW_MULTIPLE_VALUE_LIST) && + (last_token == ',')) + { + /* + REDUCE: + TOK_ROW_MULTIPLE_VALUE_LIST := + TOK_ROW_MULTIPLE_VALUE ',' TOK_ROW_MULTIPLE_VALUE + + REDUCE: + TOK_ROW_MULTIPLE_VALUE_LIST := + TOK_ROW_MULTIPLE_VALUE_LIST ',' TOK_ROW_MULTIPLE_VALUE + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_MULTIPLE_VALUE_LIST; + } + } + /* + Add this token or the resulting reduce to digest storage. + */ + store_token(digest_storage, token); + break; + } + case IDENT: + case IDENT_QUOTED: + { + YYSTYPE *lex_token= yylval; + const char *yytext= lex_token->lex_str.str; + size_t yylen= lex_token->lex_str.length; + + /* + REDUCE: + TOK_IDENT := IDENT | IDENT_QUOTED + The parser gives IDENT or IDENT_TOKEN for the same text, + depending on the character set used. + We unify both to always print the same digest text, + and always have the same digest hash. + */ + token= TOK_IDENT; + /* Add this token and identifier string to digest storage. */ + store_token_identifier(digest_storage, token, yylen, yytext); + + /* Update the index of last identifier found. */ + state->m_last_id_index= digest_storage->m_byte_count; + break; + } + default: + { + /* Add this token to digest storage. */ + store_token(digest_storage, token); + break; + } + } + + return state; +} + +sql_digest_state* digest_reduce_token(sql_digest_state *state, + uint token_left, uint token_right) +{ + sql_digest_storage *digest_storage= NULL; + + digest_storage= &state->m_digest_storage; + + /* + Stop collecting further tokens if digest storage is full. + */ + if (digest_storage->m_full) + return NULL; + + uint last_token; + uint last_token2; + uint last_token3; + uint token_to_push= TOK_UNUSED; + + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + /* + There is only one caller of digest_reduce_token(), + see sql/sql_yacc.yy, rule literal := NULL_SYM. + REDUCE: + token_left := token_right + Used for: + TOK_GENERIC_VALUE := NULL_SYM + */ + + if (last_token == token_right) + { + /* + Current stream is like: + TOKEN_X TOKEN_RIGHT . + REDUCE to + TOKEN_X TOKEN_LEFT . + */ + digest_storage->m_byte_count-= SIZE_OF_A_TOKEN; + store_token(digest_storage, token_left); + } + else + { + /* + Current stream is like: + TOKEN_X TOKEN_RIGHT TOKEN_Y . + Pop TOKEN_Y + TOKEN_X TOKEN_RIGHT . TOKEN_Y + REDUCE to + TOKEN_X TOKEN_LEFT . TOKEN_Y + */ + DBUG_ASSERT(last_token2 == token_right); + digest_storage->m_byte_count-= 2 * SIZE_OF_A_TOKEN; + store_token(digest_storage, token_left); + token_to_push= last_token; + } + + peek_last_three_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2, &last_token3); + + if ((last_token3 == TOK_GENERIC_VALUE || + last_token3 == TOK_GENERIC_VALUE_LIST) && + (last_token2 == ',') && + (last_token == TOK_GENERIC_VALUE)) + { + /* + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE ',' TOK_GENERIC_VALUE + + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE_LIST ',' TOK_GENERIC_VALUE + */ + digest_storage->m_byte_count-= 3*SIZE_OF_A_TOKEN; + store_token(digest_storage, TOK_GENERIC_VALUE_LIST); + } + + if (token_to_push != TOK_UNUSED) + { + /* + Push TOKEN_Y + */ + store_token(digest_storage, token_to_push); + } + + return state; +} + diff --git a/sql/sql_digest.h b/sql/sql_digest.h new file mode 100644 index 00000000..cc786a3b --- /dev/null +++ b/sql/sql_digest.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DIGEST_H +#define SQL_DIGEST_H + +#include +class String; +#include "my_md5.h" + +#define MAX_DIGEST_STORAGE_SIZE (1024*1024) + +/** + Structure to store token count/array for a statement + on which digest is to be calculated. +*/ +struct sql_digest_storage +{ + bool m_full; + uint m_byte_count; + unsigned char m_md5[MD5_HASH_SIZE]; + /** Character set number. */ + uint m_charset_number; + /** + Token array. + Token array is an array of bytes to store tokens received during parsing. + Following is the way token array is formed. + ... <non-id-token> <non-id-token> <id-token> <id_len> <id_text> ... + For Example: + SELECT * FROM T1; + <SELECT_TOKEN> <*> <FROM_TOKEN> <ID_TOKEN> <2> <T1> + + @note Only the first @c m_byte_count bytes are initialized, + out of @c m_token_array_length. + */ + unsigned char *m_token_array; + /* Length of the token array to be considered for DIGEST_TEXT calculation. */ + uint m_token_array_length; + + sql_digest_storage() + { + reset(NULL, 0); + } + + inline void reset(unsigned char *token_array, size_t length) + { + m_token_array= token_array; + m_token_array_length= (uint)length; + reset(); + } + + inline void reset() + { + m_full= false; + m_byte_count= 0; + m_charset_number= 0; + memset(m_md5, 0, MD5_HASH_SIZE); + } + + inline bool is_empty() + { + return (m_byte_count == 0); + } + + inline void copy(const sql_digest_storage *from) + { + /* + Keep in mind this is a dirty copy of something that may change, + as the thread producing the digest is executing concurrently, + without any lock enforced. + */ + uint byte_count_copy= m_token_array_length < from->m_byte_count ? + m_token_array_length : from->m_byte_count; + + if (byte_count_copy > 0) + { + m_full= from->m_full; + m_byte_count= byte_count_copy; + m_charset_number= from->m_charset_number; + memcpy(m_token_array, from->m_token_array, m_byte_count); + memcpy(m_md5, from->m_md5, MD5_HASH_SIZE); + } + else + { + m_full= false; + m_byte_count= 0; + m_charset_number= 0; + } + } +}; +typedef struct sql_digest_storage sql_digest_storage; + +/** + Compute a digest hash. + @param digest_storage The digest + @param [out] md5 The computed digest hash. This parameter is a buffer of size @c MD5_HASH_SIZE. +*/ +void compute_digest_md5(const sql_digest_storage *digest_storage, unsigned char *md5); + +/** + Compute a digest text. + A 'digest text' is a textual representation of a query, + where: + - comments are removed, + - non significant spaces are removed, + - literal values are replaced with a special '?' marker, + - lists of values are collapsed using a shorter notation + @param digest_storage The digest + @param [out] digest_text + @param digest_text_length Size of @c digest_text. + @param [out] truncated true if the text representation was truncated +*/ +void compute_digest_text(const sql_digest_storage *digest_storage, + String *digest_text); + +#endif + diff --git a/sql/sql_digest_stream.h b/sql/sql_digest_stream.h new file mode 100644 index 00000000..75e534e8 --- /dev/null +++ b/sql/sql_digest_stream.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DIGEST_STREAM_H +#define SQL_DIGEST_STREAM_H + +#include "sql_digest.h" + +/** + State data storage for @c digest_start, @c digest_add_token. + This structure extends the @c sql_digest_storage structure + with temporary state used only during parsing. +*/ +struct sql_digest_state +{ + /** + Index, in the digest token array, of the last identifier seen. + Reduce rules used in the digest computation can not + apply to tokens seen before an identifier. + @sa digest_add_token + */ + int m_last_id_index; + sql_digest_storage m_digest_storage; + + inline void reset(unsigned char *token_array, uint length) + { + m_last_id_index= 0; + m_digest_storage.reset(token_array, length); + } + + inline bool is_empty() + { + return m_digest_storage.is_empty(); + } +}; +typedef struct sql_digest_state sql_digest_state; + +#endif + diff --git a/sql/sql_do.cc b/sql/sql_do.cc new file mode 100644 index 00000000..5a7bca27 --- /dev/null +++ b/sql/sql_do.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Execute DO statement */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "transaction.h" +#include "unireg.h" +#include "sql_do.h" +#include "sql_base.h" // setup_fields +#include "sql_select.h" // free_underlaid_joins + +bool mysql_do(THD *thd, List &values) +{ + List_iterator li(values); + Item *value; + DBUG_ENTER("mysql_do"); + if (setup_fields(thd, Ref_ptr_array(), values, COLUMNS_READ, 0, NULL, 0)) + DBUG_RETURN(TRUE); + while ((value = li++)) + (void) value->is_null(); + free_underlaid_joins(thd, thd->lex->first_select_lex()); + + if (unlikely(thd->is_error())) + { + /* + Rollback the effect of the statement, since next instruction + will clear the error and the rollback in the end of + mysql_execute_command() won't work. + */ + if (! thd->in_sub_stmt) + trans_rollback_stmt(thd); + thd->clear_error(); // DO always is OK + } + my_ok(thd); + DBUG_RETURN(FALSE); +} diff --git a/sql/sql_do.h b/sql/sql_do.h new file mode 100644 index 00000000..5280a4a5 --- /dev/null +++ b/sql/sql_do.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DO_INCLUDED +#define SQL_DO_INCLUDED + +#include "sql_list.h" /* List */ + +class THD; +class Item; + +bool mysql_do(THD *thd, List &values); + +#endif /* SQL_DO_INCLUDED */ diff --git a/sql/sql_error.cc b/sql/sql_error.cc new file mode 100644 index 00000000..76ed0ca2 --- /dev/null +++ b/sql/sql_error.cc @@ -0,0 +1,1027 @@ +/* Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/********************************************************************** +This file contains the implementation of error and warnings related + + - Whenever an error or warning occurred, it pushes it to a warning list + that the user can retrieve with SHOW WARNINGS or SHOW ERRORS. + + - For each statement, we return the number of warnings generated from this + command. Note that this can be different from @@warning_count as + we reset the warning list only for questions that uses a table. + This is done to allow on to do: + INSERT ...; + SELECT @@warning_count; + SHOW WARNINGS; + (If we would reset after each command, we could not retrieve the number + of warnings) + + - When client requests the information using SHOW command, then + server processes from this list and returns back in the form of + resultset. + + Supported syntaxes: + + SHOW [COUNT(*)] ERRORS [LIMIT [offset,] rows] + SHOW [COUNT(*)] WARNINGS [LIMIT [offset,] rows] + SELECT @@warning_count, @@error_count; + +***********************************************************************/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_error.h" +#include "sp_rcontext.h" + +/* + Design notes about Sql_condition::m_message_text. + + The member Sql_condition::m_message_text contains the text associated with + an error, warning or note (which are all SQL 'conditions') + + Producer of Sql_condition::m_message_text: + ---------------------------------------- + + (#1) the server implementation itself, when invoking functions like + my_error() or push_warning() + + (#2) user code in stored programs, when using the SIGNAL statement. + + (#3) user code in stored programs, when using the RESIGNAL statement. + + When invoking my_error(), the error number and message is typically + provided like this: + - my_error(ER_WRONG_DB_NAME, MYF(0), ...); + - my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); + + In both cases, the message is retrieved from ER(ER_XXX), which in turn + is read from the resource file errmsg.sys at server startup. + The strings stored in the errmsg.sys file are expressed in the character set + that corresponds to the server --language start option + (see error_message_charset_info). + + When executing: + - a SIGNAL statement, + - a RESIGNAL statement, + the message text is provided by the user logic, and is expressed in UTF8. + + Storage of Sql_condition::m_message_text: + --------------------------------------- + + (#4) The class Sql_condition is used to hold the message text member. + This class represents a single SQL condition. + + (#5) The class Warning_info represents a SQL condition area, and contains + a collection of SQL conditions in the Warning_info::m_warn_list + + Consumer of Sql_condition::m_message_text: + ---------------------------------------- + + (#6) The statements SHOW WARNINGS and SHOW ERRORS display the content of + the warning list. + + (#7) The GET DIAGNOSTICS statement (planned, not implemented yet) will + also read the content of: + - the top level statement condition area (when executed in a query), + - a sub statement (when executed in a stored program) + and return the data stored in a Sql_condition. + + (#8) The RESIGNAL statement reads the Sql_condition caught by an exception + handler, to raise a new or modified condition (in #3). + + The big picture + --------------- + -------------- + | ^ + V | + my_error(#1) SIGNAL(#2) RESIGNAL(#3) | + |(#A) |(#B) |(#C) | + | | | | + ----------------------------|---------------------------- | + | | + V | + Sql_condition(#4) | + | | + | | + V | + Warning_info(#5) | + | | + ----------------------------------------------------- | + | | | | + | | | | + | | | | + V V V | + SHOW WARNINGS(#6) GET DIAGNOSTICS(#7) RESIGNAL(#8) | + | | | | | + | -------- | V | + | | | -------------- + V | | + Connectors | | + | | | + ------------------------- + | + V + Client application + + Current implementation status + ----------------------------- + + (#1) (my_error) produces data in the 'error_message_charset_info' CHARSET + + (#2) and (#3) (SIGNAL, RESIGNAL) produces data internally in UTF8 + + (#6) (SHOW WARNINGS) produces data in the 'error_message_charset_info' CHARSET + + (#7) (GET DIAGNOSTICS) is not implemented. + + (#8) (RESIGNAL) produces data internally in UTF8 (see #3) + + As a result, the design choice for (#4) and (#5) is to store data in + the 'error_message_charset_info' CHARSET, to minimize impact on the code base. + This is implemented by using 'String Sql_condition::m_message_text'. + + The UTF8 -> error_message_charset_info conversion is implemented in + Sql_cmd_common_signal::eval_signal_informations() (for path #B and #C). + + Future work + ----------- + + - Change (#1) (my_error) to generate errors in UTF8. + See WL#751 (Recoding of error messages) + + - Change (#4 and #5) to store message text in UTF8 natively. + In practice, this means changing the type of the message text to + ' Sql_condition::m_message_text', and is a direct + consequence of WL#751. + + - Implement (#9) (GET DIAGNOSTICS). + See WL#2111 (Stored Procedures: Implement GET DIAGNOSTICS) +*/ + + +static void copy_string(MEM_ROOT *mem_root, String* dst, const String* src) +{ + size_t len= src->length(); + if (len) + { + char* copy= (char*) alloc_root(mem_root, len + 1); + if (copy) + { + memcpy(copy, src->ptr(), len); + copy[len]= '\0'; + dst->set(copy, len, src->charset()); + } + } + else + dst->length(0); +} + +void +Sql_condition::copy_opt_attributes(const Sql_condition *cond) +{ + DBUG_ASSERT(this != cond); + copy_string(m_mem_root, & m_class_origin, & cond->m_class_origin); + copy_string(m_mem_root, & m_subclass_origin, & cond->m_subclass_origin); + copy_string(m_mem_root, & m_constraint_catalog, & cond->m_constraint_catalog); + copy_string(m_mem_root, & m_constraint_schema, & cond->m_constraint_schema); + copy_string(m_mem_root, & m_constraint_name, & cond->m_constraint_name); + copy_string(m_mem_root, & m_catalog_name, & cond->m_catalog_name); + copy_string(m_mem_root, & m_schema_name, & cond->m_schema_name); + copy_string(m_mem_root, & m_table_name, & cond->m_table_name); + copy_string(m_mem_root, & m_column_name, & cond->m_column_name); + copy_string(m_mem_root, & m_cursor_name, & cond->m_cursor_name); + m_row_number= cond->m_row_number; +} + + +void +Sql_condition::set_builtin_message_text(const char* str) +{ + /* + See the comments + "Design notes about Sql_condition::m_message_text." + */ + const char* copy; + + copy= m_mem_root ? strdup_root(m_mem_root, str) : str; + m_message_text.set(copy, strlen(copy), error_message_charset_info); + DBUG_ASSERT(! m_message_text.is_alloced()); +} + +const char* +Sql_condition::get_message_text() const +{ + return m_message_text.ptr(); +} + +int +Sql_condition::get_message_octet_length() const +{ + return m_message_text.length(); +} + + +void Sql_state_errno_level::assign_defaults(const Sql_state_errno *from) +{ + DBUG_ASSERT(from); + int sqlerrno= from->get_sql_errno(); + /* + SIGNAL is restricted in sql_yacc.yy to only signal SQLSTATE conditions. + */ + DBUG_ASSERT(from->has_sql_state()); + set_sqlstate(from); + /* SQLSTATE class "00": illegal, rejected in the parser. */ + DBUG_ASSERT(m_sqlstate[0] != '0' || get_sqlstate()[1] != '0'); + + if (Sql_state::is_warning()) /* SQLSTATE class "01": warning. */ + { + m_level= Sql_condition::WARN_LEVEL_WARN; + m_sql_errno= sqlerrno ? sqlerrno : ER_SIGNAL_WARN; + } + else if (Sql_state::is_not_found()) /* SQLSTATE class "02": not found. */ + { + m_level= Sql_condition::WARN_LEVEL_ERROR; + m_sql_errno= sqlerrno ? sqlerrno : ER_SIGNAL_NOT_FOUND; + } + else /* other SQLSTATE classes : error. */ + { + m_level= Sql_condition::WARN_LEVEL_ERROR; + m_sql_errno= sqlerrno ? sqlerrno : ER_SIGNAL_EXCEPTION; + } +} + + +void Sql_condition::assign_defaults(THD *thd, const Sql_state_errno *from) +{ + if (from) + Sql_state_errno_level::assign_defaults(from); + if (!get_message_text()) + set_builtin_message_text(ER(get_sql_errno())); +} + + +Diagnostics_area::Diagnostics_area(bool initialize) + : is_bulk_execution(0), m_main_wi(0, false, initialize) +{ + push_warning_info(&m_main_wi); + + reset_diagnostics_area(); +} + +Diagnostics_area::Diagnostics_area(ulonglong warning_info_id, + bool allow_unlimited_warnings, + bool initialize) + : is_bulk_execution(0), + m_main_wi(warning_info_id, allow_unlimited_warnings, initialize) +{ + push_warning_info(&m_main_wi); + + reset_diagnostics_area(); +} + +/** + Clear this diagnostics area. + + Normally called at the end of a statement. +*/ + +void +Diagnostics_area::reset_diagnostics_area() +{ + DBUG_ENTER("reset_diagnostics_area"); +#ifdef DBUG_OFF + m_can_overwrite_status= FALSE; + /** Don't take chances in production */ + m_message[0]= '\0'; + Sql_state_errno::clear(); + Sql_user_condition_identity::clear(); + m_affected_rows= 0; + m_last_insert_id= 0; + m_statement_warn_count= 0; +#endif + get_warning_info()->clear_error_condition(); + set_is_sent(false); + /** Tiny reset in debug mode to see garbage right away */ + m_status= DA_EMPTY; + DBUG_VOID_RETURN; +} + + +/** + Set OK status -- ends commands that do not return a + result set, e.g. INSERT/UPDATE/DELETE. +*/ + +void +Diagnostics_area::set_ok_status(ulonglong affected_rows, + ulonglong last_insert_id, + const char *message) +{ + DBUG_ENTER("set_ok_status"); + DBUG_ASSERT(!is_set() || (m_status == DA_OK_BULK && is_bulk_op())); + /* + In production, refuse to overwrite an error or a custom response + with an OK packet. + */ + if (unlikely(is_error() || is_disabled())) + return; + /* + When running a bulk operation, m_status will be DA_OK for the first + operation and set to DA_OK_BULK for all following operations. + */ + if (m_status == DA_OK_BULK) + { + m_statement_warn_count+= current_statement_warn_count(); + m_affected_rows+= affected_rows; + } + else + { + m_statement_warn_count= current_statement_warn_count(); + m_affected_rows= affected_rows; + m_status= (is_bulk_op() ? DA_OK_BULK : DA_OK); + } + m_last_insert_id= last_insert_id; + if (message) + strmake_buf(m_message, message); + else + m_message[0]= '\0'; + DBUG_VOID_RETURN; +} + + +/** + Set EOF status. +*/ + +void +Diagnostics_area::set_eof_status(THD *thd) +{ + DBUG_ENTER("set_eof_status"); + /* Only allowed to report eof if has not yet reported an error */ + DBUG_ASSERT(!is_set() || (m_status == DA_EOF_BULK && is_bulk_op())); + /* + In production, refuse to overwrite an error or a custom response + with an EOF packet. + */ + if (unlikely(is_error() || is_disabled())) + return; + + /* + If inside a stored procedure, do not return the total + number of warnings, since they are not available to the client + anyway. + */ + if (m_status == DA_EOF_BULK) + { + if (!thd->spcont) + m_statement_warn_count+= current_statement_warn_count(); + } + else + { + if (thd->spcont) + { + m_statement_warn_count= 0; + m_affected_rows= 0; + } + else + m_statement_warn_count= current_statement_warn_count(); + m_status= (is_bulk_op() ? DA_EOF_BULK : DA_EOF); + } + + DBUG_VOID_RETURN; +} + +/** + Set ERROR status in the Diagnostics Area. This function should be used to + report fatal errors (such as out-of-memory errors) when no further + processing is possible. + + @param sql_errno SQL-condition error number +*/ + +void +Diagnostics_area::set_error_status(uint sql_errno) +{ + set_error_status(sql_errno, + ER(sql_errno), + mysql_errno_to_sqlstate(sql_errno), + Sql_user_condition_identity(), + NULL); +} + + +/** + Set ERROR status in the Diagnostics Area. + + @note error_condition may be NULL. It happens if a) OOM error is being + reported; or b) when Warning_info is full. + + @param sql_errno SQL-condition error number + @param message SQL-condition message + @param sqlstate SQL-condition state + @param ucid User defined condition identity + @param error_condition SQL-condition object representing the error state + + @note Note, that error_condition may be NULL. It happens if a) OOM error is + being reported; or b) when Warning_info is full. +*/ + +void +Diagnostics_area::set_error_status(uint sql_errno, + const char *message, + const char *sqlstate, + const Sql_user_condition_identity &ucid, + const Sql_condition *error_condition) +{ + DBUG_ENTER("set_error_status"); + DBUG_PRINT("enter", ("error: %d", sql_errno)); + /* + Only allowed to report error if has not yet reported a success + The only exception is when we flush the message to the client, + an error can happen during the flush. + */ + DBUG_ASSERT(! is_set() || m_can_overwrite_status); + + // message must be set properly by the caller. + DBUG_ASSERT(message); + + // sqlstate must be set properly by the caller. + DBUG_ASSERT(sqlstate); + +#ifdef DBUG_OFF + /* + In production, refuse to overwrite a custom response with an + ERROR packet. + */ + if (is_disabled()) + return; +#endif + + Sql_state_errno::set(sql_errno, sqlstate); + Sql_user_condition_identity::set(ucid); + strmake_buf(m_message, message); + + get_warning_info()->set_error_condition(error_condition); + + m_status= DA_ERROR; + DBUG_VOID_RETURN; +} + +/** + Mark the diagnostics area as 'DISABLED'. + + This is used in rare cases when the COM_ command at hand sends a response + in a custom format. One example is the query cache, another is + COM_STMT_PREPARE. +*/ + +void +Diagnostics_area::disable_status() +{ + DBUG_ENTER("disable_status"); + DBUG_ASSERT(! is_set()); + m_status= DA_DISABLED; + DBUG_VOID_RETURN; +} + +Warning_info::Warning_info(ulonglong warn_id_arg, + bool allow_unlimited_warnings, bool initialize) + :m_current_statement_warn_count(0), + m_current_row_for_warning(0), + m_warn_id(warn_id_arg), + m_error_condition(NULL), + m_allow_unlimited_warnings(allow_unlimited_warnings), + initialized(0), + m_read_only(FALSE) +{ + m_warn_list.empty(); + memset(m_warn_count, 0, sizeof(m_warn_count)); + if (initialize) + init(); +} + +void Warning_info::init() +{ + /* Initialize sub structures */ + DBUG_ASSERT(initialized == 0); + init_sql_alloc(PSI_INSTRUMENT_ME, &m_warn_root, WARN_ALLOC_BLOCK_SIZE, + WARN_ALLOC_PREALLOC_SIZE, MYF(MY_THREAD_SPECIFIC)); + initialized= 1; +} + +void Warning_info::free_memory() +{ + if (initialized) + free_root(&m_warn_root,MYF(0)); +} + +Warning_info::~Warning_info() +{ + free_memory(); +} + + +bool Warning_info::has_sql_condition(const char *message_str, size_t message_length) const +{ + Diagnostics_area::Sql_condition_iterator it(m_warn_list); + const Sql_condition *err; + + while ((err= it++)) + { + if (strncmp(message_str, err->get_message_text(), message_length) == 0) + return true; + } + + return false; +} + +bool Warning_info::has_sql_condition(uint sql_errno) const +{ + Diagnostics_area::Sql_condition_iterator it(m_warn_list); + const Sql_condition *err; + + while ((err = it++)) + { + if (err->get_sql_errno() == sql_errno) + return true; + } + return false; +} + +void Warning_info::clear(ulonglong new_id) +{ + id(new_id); + m_warn_list.empty(); + m_marked_sql_conditions.empty(); + free_memory(); + memset(m_warn_count, 0, sizeof(m_warn_count)); + m_current_statement_warn_count= 0; + m_current_row_for_warning= 0; + clear_error_condition(); +} + +void Warning_info::append_warning_info(THD *thd, const Warning_info *source) +{ + const Sql_condition *err; + Diagnostics_area::Sql_condition_iterator it(source->m_warn_list); + const Sql_condition *src_error_condition = source->get_error_condition(); + + while ((err= it++)) + { + // Do not use ::push_warning() to avoid invocation of THD-internal-handlers. + Sql_condition *new_error= Warning_info::push_warning(thd, err); + + if (src_error_condition && src_error_condition == err) + set_error_condition(new_error); + + if (source->is_marked_for_removal(err)) + mark_condition_for_removal(new_error); + } +} + + +/** + Copy Sql_conditions that are not WARN_LEVEL_ERROR from the source + Warning_info to the current Warning_info. + + @param thd Thread context. + @param sp_wi Stored-program Warning_info + @param thd Thread context. + @param src_wi Warning_info to copy from. +*/ +void Diagnostics_area::copy_non_errors_from_wi(THD *thd, + const Warning_info *src_wi) +{ + Sql_condition_iterator it(src_wi->m_warn_list); + const Sql_condition *cond; + Warning_info *wi= get_warning_info(); + + while ((cond= it++)) + { + if (cond->get_level() == Sql_condition::WARN_LEVEL_ERROR) + continue; + + Sql_condition *new_condition= wi->push_warning(thd, cond); + + if (src_wi->is_marked_for_removal(cond)) + wi->mark_condition_for_removal(new_condition); + } +} + + +void Warning_info::mark_sql_conditions_for_removal() +{ + Sql_condition_list::Iterator it(m_warn_list); + Sql_condition *cond; + + while ((cond= it++)) + mark_condition_for_removal(cond); +} + + +void Warning_info::remove_marked_sql_conditions() +{ + List_iterator_fast it(m_marked_sql_conditions); + Sql_condition *cond; + + while ((cond= it++)) + { + m_warn_list.remove(cond); + m_warn_count[cond->get_level()]--; + m_current_statement_warn_count--; + if (cond == m_error_condition) + m_error_condition= NULL; + } + + m_marked_sql_conditions.empty(); +} + + +bool Warning_info::is_marked_for_removal(const Sql_condition *cond) const +{ + List_iterator_fast it( + const_cast&> (m_marked_sql_conditions)); + Sql_condition *c; + + while ((c= it++)) + { + if (c == cond) + return true; + } + + return false; +} + + +void Warning_info::reserve_space(THD *thd, uint count) +{ + while (m_warn_list.elements() && + (m_warn_list.elements() + count) > thd->variables.max_error_count) + m_warn_list.remove(m_warn_list.front()); +} + +Sql_condition *Warning_info::push_warning(THD *thd, + const Sql_condition_identity *value, + const char *msg, + ulong current_row_number) +{ + Sql_condition *cond= NULL; + + if (! m_read_only) + { + if (m_allow_unlimited_warnings || + m_warn_list.elements() < thd->variables.max_error_count) + { + cond= new (& m_warn_root) Sql_condition(& m_warn_root, *value, msg, + current_row_number); + if (cond) + m_warn_list.push_back(cond); + } + m_warn_count[(uint) value->get_level()]++; + } + + m_current_statement_warn_count++; + return cond; +} + + +Sql_condition *Warning_info::push_warning(THD *thd, + const Sql_condition *sql_condition) +{ + Sql_condition *new_condition= push_warning(thd, sql_condition, + sql_condition->get_message_text(), + sql_condition->m_row_number); + + if (new_condition) + new_condition->copy_opt_attributes(sql_condition); + + return new_condition; +} + +/* + Push the warning to error list if there is still room in the list + + SYNOPSIS + push_warning() + thd Thread handle + level Severity of warning (note, warning) + code Error number + msg Clear error message +*/ + +void push_warning(THD *thd, Sql_condition::enum_warning_level level, + uint code, const char *msg) +{ + DBUG_ENTER("push_warning"); + DBUG_PRINT("enter", ("code: %d, msg: %s", code, msg)); + + /* + Calling push_warning/push_warning_printf with a level of + WARN_LEVEL_ERROR *is* a bug. Either use my_printf_error(), + my_error(), or WARN_LEVEL_WARN. + */ + DBUG_ASSERT(level != Sql_condition::WARN_LEVEL_ERROR); + + if (level == Sql_condition::WARN_LEVEL_ERROR) + level= Sql_condition::WARN_LEVEL_WARN; + + (void) thd->raise_condition(code, "\0\0\0\0\0", level, msg); + + /* Make sure we also count warnings pushed after calling set_ok_status(). */ + thd->get_stmt_da()->increment_warning(); + + DBUG_VOID_RETURN; +} + + +/* + Push the warning to error list if there is still room in the list + + SYNOPSIS + push_warning_printf() + thd Thread handle + level Severity of warning (note, warning) + code Error number + msg Clear error message +*/ + +void push_warning_printf(THD *thd, Sql_condition::enum_warning_level level, + uint code, const char *format, ...) +{ + va_list args; + char warning[MYSQL_ERRMSG_SIZE]; + DBUG_ENTER("push_warning_printf"); + DBUG_PRINT("enter",("warning: %u", code)); + + DBUG_ASSERT(code != 0); + DBUG_ASSERT(format != NULL); + + va_start(args,format); + my_vsnprintf_ex(&my_charset_utf8mb3_general_ci, warning, + sizeof(warning), format, args); + va_end(args); + push_warning(thd, level, code, warning); + DBUG_VOID_RETURN; +} + + +/* + Send all notes, errors or warnings to the client in a result set + + SYNOPSIS + mysqld_show_warnings() + thd Thread handler + levels_to_show Bitmap for which levels to show + + DESCRIPTION + Takes into account the current LIMIT + + RETURN VALUES + FALSE ok + TRUE Error sending data to client +*/ + +const LEX_CSTRING warning_level_names[]= +{ + { STRING_WITH_LEN("Note") }, + { STRING_WITH_LEN("Warning") }, + { STRING_WITH_LEN("Error") }, + { STRING_WITH_LEN("?") } +}; + +bool mysqld_show_warnings(THD *thd, ulong levels_to_show) +{ + List field_list; + MEM_ROOT *mem_root= thd->mem_root; + const Sql_condition *err; + SELECT_LEX *sel= thd->lex->first_select_lex(); + SELECT_LEX_UNIT *unit= &thd->lex->unit; + ha_rows idx; + Protocol *protocol=thd->protocol; + DBUG_ENTER("mysqld_show_warnings"); + + DBUG_ASSERT(thd->get_stmt_da()->is_warning_info_read_only()); + + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Level", 7), + mem_root); + field_list.push_back(new (mem_root) + Item_return_int(thd, "Code", 4, MYSQL_TYPE_LONG), + mem_root); + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Message", MYSQL_ERRMSG_SIZE), + mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + unit->set_limit(sel); + + Diagnostics_area::Sql_condition_iterator it= + thd->get_stmt_da()->sql_conditions(); + for (idx= 0; (err= it++) ; idx++) + { + /* Skip levels that the user is not interested in */ + if (!(levels_to_show & ((ulong) 1 << err->get_level()))) + continue; + if (unit->lim.check_offset(idx)) + continue; // using limit offset,count + if (idx >= unit->lim.get_select_limit()) + break; + protocol->prepare_for_resend(); + protocol->store(warning_level_names[err->get_level()].str, + warning_level_names[err->get_level()].length, + system_charset_info); + protocol->store((uint32) err->get_sql_errno()); + protocol->store_warning(err->get_message_text(), + err->get_message_octet_length()); + if (protocol->write()) + DBUG_RETURN(TRUE); + } + my_eof(thd); + + thd->get_stmt_da()->set_warning_info_read_only(FALSE); + + DBUG_RETURN(FALSE); +} + + +/** + This replaces U+0000 to '\0000', so the result error message string: + - is a good null-terminated string + - presents the entire data + For example: + SELECT CAST(_latin1 0x610062 AS SIGNED); + returns a warning: + Truncated incorrect INTEGER value: 'a\0000b' + Notice, the 0x00 byte is replaced to a 5-byte long string '\0000', + while 'a' and 'b' are printed as is. +*/ +extern "C" int my_wc_mb_utf8_null_terminated(CHARSET_INFO *cs, + my_wc_t wc, uchar *r, uchar *e) +{ + return wc == '\0' ? + cs->wc_to_printable(wc, r, e) : + my_charset_utf8mb3_handler.wc_mb(cs, wc, r, e); +} + + +/** + Convert value for dispatch to error message(see WL#751). + + @param to buffer for converted string + @param to_length size of the buffer + @param from string which should be converted + @param from_length string length + @param from_cs charset from convert + + @retval + result string length +*/ + +size_t err_conv(char *buff, uint to_length, const char *from, + uint from_length, CHARSET_INFO *from_cs) +{ + char *to= buff; + const char *from_start= from; + size_t res; + + DBUG_ASSERT(to_length > 0); + to_length--; + if (from_cs == &my_charset_bin) + { + uchar char_code; + res= 0; + while (1) + { + if ((uint)(from - from_start) >= from_length || + res >= to_length) + { + *to= 0; + break; + } + + char_code= ((uchar) *from); + if (char_code >= 0x20 && char_code <= 0x7E) + { + *to++= char_code; + from++; + res++; + } + else + { + if (res + 4 >= to_length) + { + *to= 0; + break; + } + res+= my_snprintf(to, 5, "\\x%02X", (uint) char_code); + to+=4; + from++; + } + } + } + else + { + uint errors; + res= my_convert_using_func(to, to_length, system_charset_info, + my_wc_mb_utf8_null_terminated, + from, from_length, from_cs, + from_cs->cset->mb_wc, + &errors); + to[res]= 0; + } + return res; +} + + +/** + Convert string for dispatch to client(see WL#751). + + @param to buffer to convert + @param to_length buffer length + @param to_cs chraset to convert + @param from string from convert + @param from_length string length + @param from_cs charset from convert + @param errors count of errors during convertion + + @retval + length of converted string +*/ + +size_t convert_error_message(char *to, size_t to_length, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs, uint *errors) +{ + DBUG_ASSERT(to_length > 0); + /* Make room for the null terminator. */ + to_length--; + + if (!to_cs || to_cs == &my_charset_bin) + to_cs= system_charset_info; + uint32 cnv_length= my_convert_using_func(to, to_length, + to_cs, + to_cs->cset->wc_to_printable, + from, from_length, + from_cs, from_cs->cset->mb_wc, + errors); + DBUG_ASSERT(to_length >= cnv_length); + to[cnv_length]= '\0'; + return cnv_length; +} + + +/** + Sanity check for SQLSTATEs. The function does not check if it's really an + existing SQL-state (there are just too many), it just checks string length and + looks for bad characters. + + @param sqlstate the condition SQLSTATE. + + @retval true if it's ok. + @retval false if it's bad. +*/ + +bool is_sqlstate_valid(const LEX_CSTRING *sqlstate) +{ + if (sqlstate->length != 5) + return false; + + for (int i= 0 ; i < 5 ; ++i) + { + char c = sqlstate->str[i]; + + if ((c < '0' || '9' < c) && + (c < 'A' || 'Z' < c)) + return false; + } + + return true; +} + + +void convert_error_to_warning(THD *thd) +{ + DBUG_ASSERT(thd->is_error()); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); +} diff --git a/sql/sql_error.h b/sql/sql_error.h new file mode 100644 index 00000000..b5afdf9b --- /dev/null +++ b/sql/sql_error.h @@ -0,0 +1,1336 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_ERROR_H +#define SQL_ERROR_H + +#include "sql_list.h" /* Sql_alloc, MEM_ROOT, list */ +#include "sql_type_int.h" // Longlong_hybrid +#include "sql_string.h" /* String */ +#include "sql_plist.h" /* I_P_List */ +#include "mysql_com.h" /* MYSQL_ERRMSG_SIZE */ +#include "my_time.h" /* MYSQL_TIME */ +#include "decimal.h" + +class THD; +class my_decimal; +class sp_condition_value; + +/* Types of LOG warnings, used by note_verbosity */ + +#define NOTE_VERBOSITY_NORMAL (1U << 0) +/* Show warnings about keys parts that cannot be used */ +#define NOTE_VERBOSITY_UNUSABLE_KEYS (1U << 1) +/* Show warnings in explain for key parts that cannot be used */ +#define NOTE_VERBOSITY_EXPLAIN (1U << 2) + +/////////////////////////////////////////////////////////////////////////// + +class Sql_state +{ +protected: + /** + This member is always NUL terminated. + */ + char m_sqlstate[SQLSTATE_LENGTH + 1]; +public: + Sql_state() + { + memset(m_sqlstate, 0, sizeof(m_sqlstate)); + } + + Sql_state(const char *sqlstate) + { + set_sqlstate(sqlstate); + } + + const char* get_sqlstate() const + { return m_sqlstate; } + + void set_sqlstate(const Sql_state *other) + { + *this= *other; + } + void set_sqlstate(const char *sqlstate) + { + memcpy(m_sqlstate, sqlstate, SQLSTATE_LENGTH); + m_sqlstate[SQLSTATE_LENGTH]= '\0'; + } + bool eq(const Sql_state *other) const + { + return strcmp(m_sqlstate, other->m_sqlstate) == 0; + } + + bool has_sql_state() const { return m_sqlstate[0] != '\0'; } + + /** + Checks if this SQL state defines a WARNING condition. + Note: m_sqlstate must contain a valid SQL-state. + + @retval true if this SQL state defines a WARNING condition. + @retval false otherwise. + */ + inline bool is_warning() const + { return m_sqlstate[0] == '0' && m_sqlstate[1] == '1'; } + + + /** + Checks if this SQL state defines a NOT FOUND condition. + Note: m_sqlstate must contain a valid SQL-state. + + @retval true if this SQL state defines a NOT FOUND condition. + @retval false otherwise. + */ + inline bool is_not_found() const + { return m_sqlstate[0] == '0' && m_sqlstate[1] == '2'; } + + + /** + Checks if this SQL state defines an EXCEPTION condition. + Note: m_sqlstate must contain a valid SQL-state. + + @retval true if this SQL state defines an EXCEPTION condition. + @retval false otherwise. + */ + inline bool is_exception() const + { return m_sqlstate[0] != '0' || m_sqlstate[1] > '2'; } + +}; + + +class Sql_state_errno: public Sql_state +{ +protected: + /** + MySQL extension, MYSQL_ERRNO condition item. + SQL error number. One of ER_ codes from share/errmsg.txt. + Set by set_error_status. + */ + uint m_sql_errno; + +public: + Sql_state_errno() + :m_sql_errno(0) + { } + Sql_state_errno(uint sql_errno) + :m_sql_errno(sql_errno) + { } + Sql_state_errno(uint sql_errno, const char *sql_state) + :Sql_state(sql_state), + m_sql_errno(sql_errno) + { } + /** + Get the SQL_ERRNO of this condition. + @return the sql error number condition item. + */ + uint get_sql_errno() const + { return m_sql_errno; } + + void set(uint sql_errno, const char *sqlstate) + { + m_sql_errno= sql_errno; + set_sqlstate(sqlstate); + } + void clear() + { + m_sql_errno= 0; + } +}; + + +class Sql_state_errno_level: public Sql_state_errno +{ +public: + /* + Enumeration value describing the severity of the error. + + Note that these enumeration values must correspond to the indices + of the sql_print_message_handlers array. + */ + enum enum_warning_level + { WARN_LEVEL_NOTE, WARN_LEVEL_WARN, WARN_LEVEL_ERROR, WARN_LEVEL_END}; + +protected: + /** Severity (error, warning, note) of this condition. */ + enum_warning_level m_level; + + void assign_defaults(const Sql_state_errno *value); + +public: + /** + Get the error level of this condition. + @return the error level condition item. + */ + enum_warning_level get_level() const + { return m_level; } + + Sql_state_errno_level() + :m_level(WARN_LEVEL_ERROR) + { } + + Sql_state_errno_level(uint sqlerrno, const char* sqlstate, + enum_warning_level level) + :Sql_state_errno(sqlerrno, sqlstate), + m_level(level) + { } + Sql_state_errno_level(const Sql_state_errno &state_errno, + enum_warning_level level) + :Sql_state_errno(state_errno), + m_level(level) + { } + void clear() + { + m_level= WARN_LEVEL_ERROR; + Sql_state_errno::clear(); + } +}; + + +/* + class Sql_user_condition_identity. + Instances of this class uniquely idetify user defined conditions (EXCEPTION). + + SET sql_mode=ORACLE; + CREATE PROCEDURE p1 + AS + a EXCEPTION; + BEGIN + RAISE a; + EXCEPTION + WHEN a THEN NULL; + END; + + Currently a user defined condition is identified by a pointer to + its parse time sp_condition_value instance. This can change when + we add packages. See MDEV-10591. +*/ +class Sql_user_condition_identity +{ +protected: + const sp_condition_value *m_user_condition_value; +public: + Sql_user_condition_identity() + :m_user_condition_value(NULL) + { } + Sql_user_condition_identity(const sp_condition_value *value) + :m_user_condition_value(value) + { } + const sp_condition_value *get_user_condition_value() const + { return m_user_condition_value; } + + void set(const Sql_user_condition_identity &identity) + { + *this= identity; + } + void clear() + { + m_user_condition_value= NULL; + } +}; + + +/** + class Sql_condition_identity. + Instances of this class uniquely identify conditions + (including user-defined exceptions for sql_mode=ORACLE) + and store everything that is needed for handler search + purposes in sp_pcontext::find_handler(). +*/ +class Sql_condition_identity: public Sql_state_errno_level, + public Sql_user_condition_identity +{ +public: + Sql_condition_identity() = default; + Sql_condition_identity(const Sql_state_errno_level &st, + const Sql_user_condition_identity &ucid) + :Sql_state_errno_level(st), + Sql_user_condition_identity(ucid) + { } + Sql_condition_identity(const Sql_state_errno &st, + enum_warning_level level, + const Sql_user_condition_identity &ucid) + :Sql_state_errno_level(st, level), + Sql_user_condition_identity(ucid) + { } + Sql_condition_identity(uint sqlerrno, + const char* sqlstate, + enum_warning_level level, + const Sql_user_condition_identity &ucid) + :Sql_state_errno_level(sqlerrno, sqlstate, level), + Sql_user_condition_identity(ucid) + { } + void clear() + { + Sql_state_errno_level::clear(); + Sql_user_condition_identity::clear(); + } +}; + + +class Sql_condition_items +{ +protected: + /** SQL CLASS_ORIGIN condition item. */ + String m_class_origin; + + /** SQL SUBCLASS_ORIGIN condition item. */ + String m_subclass_origin; + + /** SQL CONSTRAINT_CATALOG condition item. */ + String m_constraint_catalog; + + /** SQL CONSTRAINT_SCHEMA condition item. */ + String m_constraint_schema; + + /** SQL CONSTRAINT_NAME condition item. */ + String m_constraint_name; + + /** SQL CATALOG_NAME condition item. */ + String m_catalog_name; + + /** SQL SCHEMA_NAME condition item. */ + String m_schema_name; + + /** SQL TABLE_NAME condition item. */ + String m_table_name; + + /** SQL COLUMN_NAME condition item. */ + String m_column_name; + + /** SQL CURSOR_NAME condition item. */ + String m_cursor_name; + + /** SQL ROW_NUMBER condition item. */ + ulong m_row_number; + + Sql_condition_items() + :m_class_origin((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_subclass_origin((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_constraint_catalog((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_constraint_schema((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_constraint_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_catalog_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_schema_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_table_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_column_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_cursor_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_row_number(0) + { } + + void clear() + { + m_class_origin.length(0); + m_subclass_origin.length(0); + m_constraint_catalog.length(0); + m_constraint_schema.length(0); + m_constraint_name.length(0); + m_catalog_name.length(0); + m_schema_name.length(0); + m_table_name.length(0); + m_column_name.length(0); + m_cursor_name.length(0); + m_row_number= 0; + } +}; + + +/** + Representation of a SQL condition. + A SQL condition can be a completion condition (note, warning), + or an exception condition (error, not found). +*/ +class Sql_condition : public Sql_alloc, + public Sql_condition_identity, + public Sql_condition_items +{ +public: + + /** + Convert a bitmask consisting of MYSQL_TIME_{NOTE|WARN}_XXX bits + to WARN_LEVEL_XXX + */ + static enum_warning_level time_warn_level(uint warnings) + { + return MYSQL_TIME_WARN_HAVE_WARNINGS(warnings) ? + WARN_LEVEL_WARN : WARN_LEVEL_NOTE; + } + + /** + Get the MESSAGE_TEXT of this condition. + @return the message text. + */ + const char* get_message_text() const; + + /** + Get the MESSAGE_OCTET_LENGTH of this condition. + @return the length in bytes of the message text. + */ + int get_message_octet_length() const; + +private: + /* + The interface of Sql_condition is mostly private, by design, + so that only the following code: + - various raise_error() or raise_warning() methods in class THD, + - the implementation of SIGNAL / RESIGNAL / GET DIAGNOSTICS + - catch / re-throw of SQL conditions in stored procedures (sp_rcontext) + is allowed to create / modify a SQL condition. + Enforcing this policy prevents confusion, since the only public + interface available to the rest of the server implementation + is the interface offered by the THD methods (THD::raise_error()), + which should be used. + */ + friend class THD; + friend class Warning_info; + friend class Sql_cmd_common_signal; + friend class Sql_cmd_signal; + friend class Sql_cmd_resignal; + friend class sp_rcontext; + friend class Condition_information_item; + + /** + Default constructor. + This constructor is usefull when allocating arrays. + Note that the init() method should be called to complete the Sql_condition. + */ + Sql_condition() + :m_mem_root(NULL) + { } + + /** + Complete the Sql_condition initialisation. + @param mem_root The memory root to use for the condition items + of this condition + */ + void init(MEM_ROOT *mem_root) + { + DBUG_ASSERT(mem_root != NULL); + DBUG_ASSERT(m_mem_root == NULL); + m_mem_root= mem_root; + } + + /** + Constructor. + @param mem_root The memory root to use for the condition items + of this condition + */ + Sql_condition(MEM_ROOT *mem_root) + :m_mem_root(mem_root) + { + DBUG_ASSERT(mem_root != NULL); + } + + Sql_condition(MEM_ROOT *mem_root, const Sql_user_condition_identity &ucid) + :Sql_condition_identity(Sql_state_errno_level(), ucid), + m_mem_root(mem_root) + { + DBUG_ASSERT(mem_root != NULL); + } + /** + Constructor for a fixed message text. + @param mem_root - memory root + @param value - the error number and the sql state for this condition + @param level - the error level for this condition + @param msg - the message text for this condition + */ + Sql_condition(MEM_ROOT *mem_root, const Sql_condition_identity &value, + const char *msg, ulong current_row_for_warning) + : Sql_condition_identity(value), m_mem_root(mem_root) + { + DBUG_ASSERT(value.get_sql_errno() != 0); + DBUG_ASSERT(msg != NULL); + set_builtin_message_text(msg); + m_row_number= current_row_for_warning; + } + + /** Destructor. */ + ~Sql_condition() = default; + + /** + Copy optional condition items attributes. + @param cond the condition to copy. + */ + void copy_opt_attributes(const Sql_condition *cond); + + /** + Set the condition message test. + @param str Message text, expressed in the character set derived from + the server --language option + */ + void set_builtin_message_text(const char* str); + + /** Set the CLASS_ORIGIN of this condition. */ + void set_class_origin(); + + /** Set the SUBCLASS_ORIGIN of this condition. */ + void set_subclass_origin(); + + /** + Assign the condition items 'MYSQL_ERRNO', 'level' and 'MESSAGE_TEXT' + default values of a condition. + @param thd - current thread, to access to localized error messages + @param from - copy condition items from here (can be NULL) + */ + void assign_defaults(THD *thd, const Sql_state_errno *from); + + /** + Clear this SQL condition. + */ + void clear() + { + Sql_condition_identity::clear(); + Sql_condition_items::clear(); + m_message_text.length(0); + } + +private: + /** Message text, expressed in the character set implied by --language. */ + String m_message_text; + + /** Pointers for participating in the list of conditions. */ + Sql_condition *next_in_wi; + Sql_condition **prev_in_wi; + + /** Memory root to use to hold condition item values. */ + MEM_ROOT *m_mem_root; +}; + +/////////////////////////////////////////////////////////////////////////// + +/** + Information about warnings of the current connection. +*/ +class Warning_info +{ + /** The type of the counted and doubly linked list of conditions. */ + typedef I_P_List, + I_P_List_counter, + I_P_List_fast_push_back > + Sql_condition_list; + + /** A memory root to allocate warnings and errors */ + MEM_ROOT m_warn_root; + + /** List of warnings of all severities (levels). */ + Sql_condition_list m_warn_list; + + /** A break down of the number of warnings per severity (level). */ + uint m_warn_count[(uint) Sql_condition::WARN_LEVEL_END]; + + /** + The number of warnings of the current statement. Warning_info + life cycle differs from statement life cycle -- it may span + multiple statements. In that case we get + m_current_statement_warn_count 0, whereas m_warn_list is not empty. + */ + uint m_current_statement_warn_count; + + /* + Row counter, to print in errors and warnings. Not increased in + create_sort_index(); may differ from examined_row_count. + */ + ulong m_current_row_for_warning; + + /** Used to optionally clear warnings only once per statement. */ + ulonglong m_warn_id; + + /** + A pointer to an element of m_warn_list. It determines SQL-condition + instance which corresponds to the error state in Diagnostics_area. + + This is needed for properly processing SQL-conditions in SQL-handlers. + When an SQL-handler is found for the current error state in Diagnostics_area, + this pointer is needed to remove the corresponding SQL-condition from the + Warning_info list. + + @note m_error_condition might be NULL in the following cases: + - Diagnostics_area set to fatal error state (like OOM); + - Max number of Warning_info elements has been reached (thus, there is + no corresponding SQL-condition object in Warning_info). + */ + const Sql_condition *m_error_condition; + + /** Indicates if push_warning() allows unlimited number of warnings. */ + bool m_allow_unlimited_warnings; + bool initialized; /* Set to 1 if init() has been called */ + + /** Read only status. */ + bool m_read_only; + + /** Pointers for participating in the stack of Warning_info objects. */ + Warning_info *m_next_in_da; + Warning_info **m_prev_in_da; + + List m_marked_sql_conditions; + +public: + Warning_info(ulonglong warn_id_arg, bool allow_unlimited_warnings, + bool initialized); + ~Warning_info(); + /* Allocate memory for structures */ + void init(); + void free_memory(); + +private: + Warning_info(const Warning_info &rhs); /* Not implemented */ + Warning_info& operator=(const Warning_info &rhs); /* Not implemented */ + + /** + Checks if Warning_info contains SQL-condition with the given message. + + @param message_str Message string. + @param message_length Length of message string. + + @return true if the Warning_info contains an SQL-condition with the given + message. + */ + bool has_sql_condition(const char *message_str, size_t message_length) const; + + /** + Checks if Warning_info contains SQL-condition with the given error id + + @param sql_errno SQL-condition error number + + @return true if the Warning_info contains an SQL-condition with the given + error id. + */ + bool has_sql_condition(uint sql_errno) const; + + /** + Reset the warning information. Clear all warnings, + the number of warnings, reset current row counter + to point to the first row. + + @param new_id new Warning_info id. + */ + void clear(ulonglong new_id); + + /** + Only clear warning info if haven't yet done that already + for the current query. Allows to be issued at any time + during the query, without risk of clearing some warnings + that have been generated by the current statement. + + @todo: This is a sign of sloppy coding. Instead we need to + designate one place in a statement life cycle where we call + Warning_info::clear(). + + @param query_id Current query id. + */ + void opt_clear(ulonglong query_id) + { + if (query_id != m_warn_id) + clear(query_id); + } + + /** + Concatenate the list of warnings. + + It's considered tolerable to lose an SQL-condition in case of OOM-error, + or if the number of SQL-conditions in the Warning_info reached top limit. + + @param thd Thread context. + @param source Warning_info object to copy SQL-conditions from. + */ + void append_warning_info(THD *thd, const Warning_info *source); + + /** + Reset between two COM_ commands. Warnings are preserved + between commands, but statement_warn_count indicates + the number of warnings of this particular statement only. + */ + void reset_for_next_command() + { m_current_statement_warn_count= 0; } + + /** + Mark active SQL-conditions for later removal. + This is done to simulate stacked DAs for HANDLER statements. + */ + void mark_sql_conditions_for_removal(); + + /** + Unmark SQL-conditions, which were marked for later removal. + This is done to simulate stacked DAs for HANDLER statements. + */ + void unmark_sql_conditions_from_removal() + { m_marked_sql_conditions.empty(); } + + /** + Remove SQL-conditions that are marked for deletion. + This is done to simulate stacked DAs for HANDLER statements. + */ + void remove_marked_sql_conditions(); + + /** + Check if the given SQL-condition is marked for removal in this Warning_info + instance. + + @param cond the SQL-condition. + + @retval true if the given SQL-condition is marked for removal in this + Warning_info instance. + @retval false otherwise. + */ + bool is_marked_for_removal(const Sql_condition *cond) const; + + /** + Mark a single SQL-condition for removal (add the given SQL-condition to the + removal list of this Warning_info instance). + */ + void mark_condition_for_removal(Sql_condition *cond) + { m_marked_sql_conditions.push_back(cond, &m_warn_root); } + + /** + Used for @@warning_count system variable, which prints + the number of rows returned by SHOW WARNINGS. + */ + ulong warn_count() const + { + /* + This may be higher than warn_list.elements() if we have + had more warnings than thd->variables.max_error_count. + */ + return (m_warn_count[(uint) Sql_condition::WARN_LEVEL_NOTE] + + m_warn_count[(uint) Sql_condition::WARN_LEVEL_ERROR] + + m_warn_count[(uint) Sql_condition::WARN_LEVEL_WARN]); + } + + /** + The number of errors, or number of rows returned by SHOW ERRORS, + also the value of session variable @@error_count. + */ + ulong error_count() const + { return m_warn_count[(uint) Sql_condition::WARN_LEVEL_ERROR]; } + + /** + The number of conditions (errors, warnings and notes) in the list. + */ + uint cond_count() const + { + return m_warn_list.elements(); + } + + /** Id of the warning information area. */ + ulonglong id() const { return m_warn_id; } + + /** Set id of the warning information area. */ + void id(ulonglong id_arg) { m_warn_id= id_arg; } + + /** Do we have any errors and warnings that we can *show*? */ + bool is_empty() const { return m_warn_list.is_empty(); } + + /** Increment the current row counter to point at the next row. */ + void inc_current_row_for_warning() { m_current_row_for_warning++; } + + /** Reset the current row counter. Start counting from the first row. */ + void reset_current_row_for_warning(int n) { m_current_row_for_warning= n; } + + ulong set_current_row_for_warning(ulong row) + { + ulong old_row= m_current_row_for_warning; + m_current_row_for_warning= row; + return old_row; + } + + /** Return the current counter value. */ + ulong current_row_for_warning() const { return m_current_row_for_warning; } + + /** Return the number of warnings thrown by the current statement. */ + ulong current_statement_warn_count() const + { return m_current_statement_warn_count; } + + /** Make sure there is room for the given number of conditions. */ + void reserve_space(THD *thd, uint count); + + /** + Add a new SQL-condition to the current list and increment the respective + counters. + + @param thd Thread context. + @param identity SQL-condition identity + @param msg SQL-condition message. + + @return a pointer to the added SQL-condition. + */ + Sql_condition *push_warning(THD *thd, const Sql_condition_identity *identity, + const char* msg, ulong current_row_number); + + /** + Add a new SQL-condition to the current list and increment the respective + counters. + + @param thd Thread context. + @param sql_condition SQL-condition to copy values from. + + @return a pointer to the added SQL-condition. + */ + Sql_condition *push_warning(THD *thd, const Sql_condition *sql_condition); + + /** + Set the read only status for this statement area. + This is a privileged operation, reserved for the implementation of + diagnostics related statements, to enforce that the statement area is + left untouched during execution. + The diagnostics statements are: + - SHOW WARNINGS + - SHOW ERRORS + - GET DIAGNOSTICS + @param read_only the read only property to set. + */ + void set_read_only(bool read_only_arg) + { m_read_only= read_only_arg; } + + /** + Read only status. + @return the read only property. + */ + bool is_read_only() const + { return m_read_only; } + + /** + @return SQL-condition, which corresponds to the error state in + Diagnostics_area. + + @see m_error_condition. + */ + const Sql_condition *get_error_condition() const + { return m_error_condition; } + + /** + Set SQL-condition, which corresponds to the error state in Diagnostics_area. + + @see m_error_condition. + */ + void set_error_condition(const Sql_condition *error_condition) + { m_error_condition= error_condition; } + + /** + Reset SQL-condition, which corresponds to the error state in + Diagnostics_area. + + @see m_error_condition. + */ + void clear_error_condition() + { m_error_condition= NULL; } + + // for: + // - m_next_in_da / m_prev_in_da + // - is_marked_for_removal() + friend class Diagnostics_area; +}; + + +extern size_t err_conv(char *buff, uint to_length, const char *from, + uint from_length, CHARSET_INFO *from_cs); + +class ErrBuff +{ +protected: + mutable char err_buffer[MYSQL_ERRMSG_SIZE]; +public: + ErrBuff() + { + err_buffer[0]= '\0'; + } + const char *ptr() const { return err_buffer; } + LEX_CSTRING set_longlong(const Longlong_hybrid &nr) const + { + int radix= nr.is_unsigned() ? 10 : -10; + const char *end= longlong10_to_str(nr.value(), err_buffer, radix); + DBUG_ASSERT(end >= err_buffer); + return {err_buffer, (size_t) (end - err_buffer)}; + } + LEX_CSTRING set_double(double nr) const + { + size_t length= my_gcvt(nr, MY_GCVT_ARG_DOUBLE, + sizeof(err_buffer), err_buffer, 0); + return {err_buffer, length}; + } + LEX_CSTRING set_decimal(const decimal_t *d) const + { + int length= sizeof(err_buffer); + decimal2string(d, err_buffer, &length, 0, 0, ' '); + DBUG_ASSERT(length >= 0); + return {err_buffer, (size_t) length}; + } + LEX_CSTRING set_str(const char *str, size_t len, CHARSET_INFO *cs) const + { + DBUG_ASSERT(len < UINT_MAX32); + len= err_conv(err_buffer, (uint) sizeof(err_buffer), str, (uint) len, cs); + return {err_buffer, len}; + } + LEX_CSTRING set_mysql_time(const MYSQL_TIME *ltime) const + { + int length= my_TIME_to_str(ltime, err_buffer, AUTO_SEC_PART_DIGITS); + DBUG_ASSERT(length >= 0); + return {err_buffer, (size_t) length}; + } +}; + + +class ErrConv: public ErrBuff +{ +public: + ErrConv() = default; + virtual ~ErrConv() = default; + virtual LEX_CSTRING lex_cstring() const= 0; + inline const char *ptr() const + { + return lex_cstring().str; + } +}; + +class ErrConvString : public ErrConv +{ + const char *str; + size_t len; + CHARSET_INFO *cs; +public: + ErrConvString(const char *str_arg, size_t len_arg, CHARSET_INFO *cs_arg) + : ErrConv(), str(str_arg), len(len_arg), cs(cs_arg) {} + ErrConvString(const char *str_arg, CHARSET_INFO *cs_arg) + : ErrConv(), str(str_arg), len(strlen(str_arg)), cs(cs_arg) {} + ErrConvString(const String *s) + : ErrConv(), str(s->ptr()), len(s->length()), cs(s->charset()) {} + LEX_CSTRING lex_cstring() const override + { + return set_str(str, len, cs); + } +}; + +class ErrConvInteger : public ErrConv, public Longlong_hybrid +{ +public: + ErrConvInteger(const Longlong_hybrid &nr) + : ErrConv(), Longlong_hybrid(nr) { } + LEX_CSTRING lex_cstring() const override + { + return set_longlong(static_cast(*this)); + } +}; + +class ErrConvDouble: public ErrConv +{ + double num; +public: + ErrConvDouble(double num_arg) : ErrConv(), num(num_arg) {} + LEX_CSTRING lex_cstring() const override + { + return set_double(num); + } +}; + +class ErrConvTime : public ErrConv +{ + const MYSQL_TIME *ltime; +public: + ErrConvTime(const MYSQL_TIME *ltime_arg) : ErrConv(), ltime(ltime_arg) {} + LEX_CSTRING lex_cstring() const override + { + return set_mysql_time(ltime); + } +}; + +class ErrConvDecimal : public ErrConv +{ + const decimal_t *d; +public: + ErrConvDecimal(const decimal_t *d_arg) : ErrConv(), d(d_arg) {} + LEX_CSTRING lex_cstring() const override + { + return set_decimal(d); + } +}; + +/////////////////////////////////////////////////////////////////////////// + +/** + Stores status of the currently executed statement. + Cleared at the beginning of the statement, and then + can hold either OK, ERROR, or EOF status. + Can not be assigned twice per statement. +*/ + +class Diagnostics_area: public Sql_state_errno, + public Sql_user_condition_identity +{ +private: + /** The type of the counted and doubly linked list of conditions. */ + typedef I_P_List, + I_P_List_counter, + I_P_List_fast_push_back > + Warning_info_list; + +public: + /** Const iterator used to iterate through the warning list. */ + typedef Warning_info::Sql_condition_list::Const_Iterator + Sql_condition_iterator; + + enum enum_diagnostics_status + { + /** The area is cleared at start of a statement. */ + DA_EMPTY= 0, + /** Set whenever one calls my_ok(). */ + DA_OK, + /** Set whenever one calls my_eof(). */ + DA_EOF, + /** Set whenever one calls my_ok() in PS bulk mode. */ + DA_OK_BULK, + /** Set whenever one calls my_eof() in PS bulk mode. */ + DA_EOF_BULK, + /** Set whenever one calls my_error() or my_message(). */ + DA_ERROR, + /** Set in case of a custom response, such as one from COM_STMT_PREPARE. */ + DA_DISABLED + }; + + void set_overwrite_status(bool can_overwrite_status) + { m_can_overwrite_status= can_overwrite_status; } + + /** True if status information is sent to the client. */ + bool is_sent() const { return m_is_sent; } + + void set_is_sent(bool is_sent_arg) { m_is_sent= is_sent_arg; } + + void set_ok_status(ulonglong affected_rows, + ulonglong last_insert_id, + const char *message); + + void set_eof_status(THD *thd); + + void set_error_status(uint sql_errno); + + void set_error_status(uint sql_errno, + const char *message, + const char *sqlstate, + const Sql_user_condition_identity &ucid, + const Sql_condition *error_condition); + + void set_error_status(uint sql_errno, + const char *message, + const char *sqlstate, + const Sql_condition *error_condition) + { + set_error_status(sql_errno, message, sqlstate, + Sql_user_condition_identity(), + error_condition); + } + + void disable_status(); + + void reset_diagnostics_area(); + + bool is_set() const { return m_status != DA_EMPTY; } + + bool is_error() const { return m_status == DA_ERROR; } + + bool is_eof() const { return m_status == DA_EOF; } + + bool is_ok() const { return m_status == DA_OK; } + + bool is_disabled() const { return m_status == DA_DISABLED; } + + void set_bulk_execution(bool bulk) { is_bulk_execution= bulk; } + + bool is_bulk_op() const { return is_bulk_execution; } + + enum_diagnostics_status status() const { return m_status; } + + const char *message() const + { + DBUG_ASSERT(m_status == DA_ERROR || m_status == DA_OK || + m_status == DA_OK_BULK || m_status == DA_EOF_BULK); + return m_message; + } + + + uint sql_errno() const + { + DBUG_ASSERT(m_status == DA_ERROR); + return Sql_state_errno::get_sql_errno(); + } + + const char* get_sqlstate() const + { DBUG_ASSERT(m_status == DA_ERROR); return Sql_state::get_sqlstate(); } + + ulonglong affected_rows() const + { + DBUG_ASSERT(m_status == DA_OK || m_status == DA_OK_BULK); + return m_affected_rows; + } + + ulonglong last_insert_id() const + { + DBUG_ASSERT(m_status == DA_OK || m_status == DA_OK_BULK); + return m_last_insert_id; + } + + uint statement_warn_count() const + { + DBUG_ASSERT(m_status == DA_OK || m_status == DA_OK_BULK || + m_status == DA_EOF ||m_status == DA_EOF_BULK ); + return m_statement_warn_count; + } + + uint unsafe_statement_warn_count() const + { + return m_statement_warn_count; + } + + /** + Get the current errno, state and id of the user defined condition + and return them as Sql_condition_identity. + */ + Sql_condition_identity get_error_condition_identity() const + { + DBUG_ASSERT(m_status == DA_ERROR); + return Sql_condition_identity(*this /*Sql_state_errno*/, + Sql_condition::WARN_LEVEL_ERROR, + *this /*Sql_user_condition_identity*/); + } + + /* Used to count any warnings pushed after calling set_ok_status(). */ + void increment_warning() + { + if (m_status != DA_EMPTY) + m_statement_warn_count++; + } + + Diagnostics_area(bool initialize); + Diagnostics_area(ulonglong warning_info_id, bool allow_unlimited_warnings, + bool initialize); + void init() { m_main_wi.init() ; } + void free_memory() { m_main_wi.free_memory() ; } + + void push_warning_info(Warning_info *wi) + { m_wi_stack.push_front(wi); } + + void pop_warning_info() + { + DBUG_ASSERT(m_wi_stack.elements() > 0); + m_wi_stack.remove(m_wi_stack.front()); + } + + void set_warning_info_id(ulonglong id) + { get_warning_info()->id(id); } + + ulonglong warning_info_id() const + { return get_warning_info()->id(); } + + /** + Compare given current warning info and current warning info + and see if they are different. They will be different if + warnings have been generated or statements that use tables + have been executed. This is checked by comparing m_warn_id. + + @param wi Warning info to compare with current Warning info. + + @return false if they are equal, true if they are not. + */ + bool warning_info_changed(const Warning_info *wi) const + { return get_warning_info()->id() != wi->id(); } + + bool is_warning_info_empty() const + { return get_warning_info()->is_empty(); } + + ulong current_statement_warn_count() const + { return get_warning_info()->current_statement_warn_count(); } + + bool has_sql_condition(const char *message_str, size_t message_length) const + { return get_warning_info()->has_sql_condition(message_str, message_length); } + + bool has_sql_condition(uint sql_errno) const + { return get_warning_info()->has_sql_condition(sql_errno); } + + void reset_for_next_command() + { get_warning_info()->reset_for_next_command(); } + + void clear_warning_info(ulonglong id) + { get_warning_info()->clear(id); } + + void opt_clear_warning_info(ulonglong query_id) + { get_warning_info()->opt_clear(query_id); } + + long set_current_row_for_warning(long row) + { return get_warning_info()->set_current_row_for_warning(row); } + + ulong current_row_for_warning() const + { return get_warning_info()->current_row_for_warning(); } + + void inc_current_row_for_warning() + { get_warning_info()->inc_current_row_for_warning(); } + + void reset_current_row_for_warning(int n) + { get_warning_info()->reset_current_row_for_warning(n); } + + bool is_warning_info_read_only() const + { return get_warning_info()->is_read_only(); } + + void set_warning_info_read_only(bool read_only_arg) + { get_warning_info()->set_read_only(read_only_arg); } + + ulong error_count() const + { return get_warning_info()->error_count(); } + + ulong warn_count() const + { return get_warning_info()->warn_count(); } + + uint cond_count() const + { return get_warning_info()->cond_count(); } + + Sql_condition_iterator sql_conditions() const + { return get_warning_info()->m_warn_list; } + + void reserve_space(THD *thd, uint count) + { get_warning_info()->reserve_space(thd, count); } + + Sql_condition *push_warning(THD *thd, const Sql_condition *sql_condition) + { return get_warning_info()->push_warning(thd, sql_condition); } + + Sql_condition *push_warning(THD *thd, + uint sql_errno_arg, + const char* sqlstate, + Sql_condition::enum_warning_level level, + const Sql_user_condition_identity &ucid, + const char* msg, + ulong current_row_number) + { + Sql_condition_identity tmp(sql_errno_arg, sqlstate, level, ucid); + return get_warning_info()->push_warning(thd, &tmp, msg, + current_row_number); + } + + Sql_condition *push_warning(THD *thd, + uint sqlerrno, + const char* sqlstate, + Sql_condition::enum_warning_level level, + const char* msg) + { + return push_warning(thd, sqlerrno, sqlstate, level, + Sql_user_condition_identity(), msg, 0); + } + void mark_sql_conditions_for_removal() + { get_warning_info()->mark_sql_conditions_for_removal(); } + + void unmark_sql_conditions_from_removal() + { get_warning_info()->unmark_sql_conditions_from_removal(); } + + void remove_marked_sql_conditions() + { get_warning_info()->remove_marked_sql_conditions(); } + + const Sql_condition *get_error_condition() const + { return get_warning_info()->get_error_condition(); } + + void copy_sql_conditions_to_wi(THD *thd, Warning_info *dst_wi) const + { dst_wi->append_warning_info(thd, get_warning_info()); } + + void copy_sql_conditions_from_wi(THD *thd, const Warning_info *src_wi) + { get_warning_info()->append_warning_info(thd, src_wi); } + + void copy_non_errors_from_wi(THD *thd, const Warning_info *src_wi); + +private: + Warning_info *get_warning_info() { return m_wi_stack.front(); } + + const Warning_info *get_warning_info() const { return m_wi_stack.front(); } + +private: + /** True if status information is sent to the client. */ + bool m_is_sent; + + /** Set to make set_error_status after set_{ok,eof}_status possible. */ + bool m_can_overwrite_status; + + /** Message buffer. Can be used by OK or ERROR status. */ + char m_message[MYSQL_ERRMSG_SIZE]; + + /** + The number of rows affected by the last statement. This is + semantically close to thd->m_row_count_func, but has a different + life cycle. thd->m_row_count_func stores the value returned by + function ROW_COUNT() and is cleared only by statements that + update its value, such as INSERT, UPDATE, DELETE and few others. + This member is cleared at the beginning of the next statement. + + We could possibly merge the two, but life cycle of thd->m_row_count_func + can not be changed. + */ + ulonglong m_affected_rows; + + /** + Similarly to the previous member, this is a replacement of + thd->first_successful_insert_id_in_prev_stmt, which is used + to implement LAST_INSERT_ID(). + */ + + ulonglong m_last_insert_id; + /** + Number of warnings of this last statement. May differ from + the number of warnings returned by SHOW WARNINGS e.g. in case + the statement doesn't clear the warnings, and doesn't generate + them. + */ + uint m_statement_warn_count; + + enum_diagnostics_status m_status; + + my_bool is_bulk_execution; + + Warning_info m_main_wi; + + Warning_info_list m_wi_stack; +}; + +/////////////////////////////////////////////////////////////////////////// + +void convert_error_to_warning(THD *thd); + +void push_warning(THD *thd, Sql_condition::enum_warning_level level, + uint code, const char *msg); + +void push_warning_printf(THD *thd, Sql_condition::enum_warning_level level, + uint code, const char *format, ...); + +bool mysqld_show_warnings(THD *thd, ulong levels_to_show); + +size_t convert_error_message(char *to, size_t to_length, + CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs, uint *errors); + +extern const LEX_CSTRING warning_level_names[]; + +bool is_sqlstate_valid(const LEX_CSTRING *sqlstate); +/** + Checks if the specified SQL-state-string defines COMPLETION condition. + This function assumes that the given string contains a valid SQL-state. + + @param s the condition SQLSTATE. + + @retval true if the given string defines COMPLETION condition. + @retval false otherwise. +*/ +inline bool is_sqlstate_completion(const char *s) +{ return s[0] == '0' && s[1] == '0'; } + + +#endif // SQL_ERROR_H diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc new file mode 100644 index 00000000..210f229a --- /dev/null +++ b/sql/sql_explain.cc @@ -0,0 +1,2845 @@ +/* + Copyright (c) 2013 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" +#include "my_json_writer.h" +#include "opt_range.h" +#include "sql_expression_cache.h" + +#include + +const char * STR_DELETING_ALL_ROWS= "Deleting all rows"; +const char * STR_IMPOSSIBLE_WHERE= "Impossible WHERE"; +const char * STR_NO_ROWS_AFTER_PRUNING= "No matching rows after partition pruning"; + +const char *unit_operation_text[4]= +{ + "UNIT RESULT","UNION RESULT","INTERSECT RESULT","EXCEPT RESULT" +}; + +const char *pushed_derived_text= "PUSHED DERIVED"; +const char *pushed_select_text= "PUSHED SELECT"; + +static void write_item(Json_writer *writer, Item *item); +static void append_item_to_str(String *out, Item *item); + +Explain_query::Explain_query(THD *thd_arg, MEM_ROOT *root) : + mem_root(root), upd_del_plan(nullptr), insert_plan(nullptr), + unions(root), selects(root), stmt_thd(thd_arg), apc_enabled(false), + operations(0) +{ + optimization_time_tracker.start_tracking(stmt_thd); +} + +static void print_json_array(Json_writer *writer, + const char *title, String_list &list) +{ + List_iterator_fast it(list); + const char *name; + writer->add_member(title).start_array(); + while ((name= it++)) + writer->add_str(name); + writer->end_array(); +} + + + +Explain_query::~Explain_query() +{ + if (apc_enabled) + stmt_thd->apc_target.disable(); + + delete upd_del_plan; + delete insert_plan; + uint i; + for (i= 0 ; i < unions.elements(); i++) + delete unions.at(i); + for (i= 0 ; i < selects.elements(); i++) + delete selects.at(i); +} + + +Explain_node *Explain_query::get_node(uint select_id) +{ + Explain_union *u; + if ((u= get_union(select_id))) + return u; + else + return get_select(select_id); +} + +Explain_union *Explain_query::get_union(uint select_id) +{ + return (unions.elements() > select_id) ? unions.at(select_id) : NULL; +} + +Explain_select *Explain_query::get_select(uint select_id) +{ + return (selects.elements() > select_id) ? selects.at(select_id) : NULL; +} + + +void Explain_query::add_node(Explain_node *node) +{ + uint select_id; + operations++; + if (node->get_type() == Explain_node::EXPLAIN_UNION) + { + Explain_union *u= (Explain_union*)node; + select_id= u->get_select_id(); + if (unions.elements() <= select_id) + unions.resize(MY_MAX(select_id+1, unions.elements()*2), NULL); + + Explain_union *old_node; + if ((old_node= get_union(select_id))) + delete old_node; + + unions.at(select_id)= u; + } + else + { + Explain_select *sel= (Explain_select*)node; + if (sel->select_id == FAKE_SELECT_LEX_ID) + { + DBUG_ASSERT(0); // this is a "fake select" from a UNION. + } + else + { + select_id= sel->select_id; + Explain_select *old_node; + + if (selects.elements() <= select_id) + selects.resize(MY_MAX(select_id+1, selects.elements()*2), NULL); + + if ((old_node= get_select(select_id))) + delete old_node; + + selects.at(select_id)= sel; + } + } +} + + +void Explain_query::add_insert_plan(Explain_insert *insert_plan_arg) +{ + insert_plan= insert_plan_arg; + query_plan_ready(); +} + + +void Explain_query::add_upd_del_plan(Explain_update *upd_del_plan_arg) +{ + upd_del_plan= upd_del_plan_arg; + query_plan_ready(); +} + + +void Explain_query::query_plan_ready() +{ + optimization_time_tracker.stop_tracking(stmt_thd); + + if (!apc_enabled) + stmt_thd->apc_target.enable(); + apc_enabled= true; +#ifndef DBUG_OFF + can_print_json= true; +#endif +} + + +void Explain_query::notify_tables_are_closed() +{ + /* + Disable processing of SHOW EXPLAIN|ANALYZE. The query is about to close + the tables it is using, which will make it impossible to print Item* + values. See sql_explain.h:ExplainDataStructureLifetime for details. + */ + if (apc_enabled) + { + stmt_thd->apc_target.disable(); + apc_enabled= false; +#ifndef DBUG_OFF + can_print_json= false; +#endif + } +} + + +/* + Send EXPLAIN output to the client. +*/ + +int Explain_query::send_explain(THD *thd, bool extended) +{ + select_result *result; + LEX *lex= thd->lex; + + if (!(result= new (thd->mem_root) select_send(thd)) || + thd->send_explain_fields(result, lex->describe, lex->analyze_stmt)) + return 1; + + int res= 0; + if (thd->lex->explain_json) + print_explain_json(result, thd->lex->analyze_stmt); + else + { + res= print_explain(result, lex->describe, thd->lex->analyze_stmt); + if (extended) + { + char buff[1024]; + String str(buff,(uint32) sizeof(buff), system_charset_info); + str.length(0); + /* + The warnings system requires input in utf8, @see + mysqld_show_warnings(). + */ + lex->unit.print(&str, QT_EXPLAIN_EXTENDED); + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_YES, str.c_ptr_safe()); + } + } + if (res) + result->abort_result_set(); + else + result->send_eof(); + + return res; +} + + + +/* + The main entry point to print EXPLAIN of the entire query +*/ + +int Explain_query::print_explain(select_result_sink *output, + uint8 explain_flags, bool is_analyze) +{ + if (upd_del_plan) + { + upd_del_plan->print_explain(this, output, explain_flags, is_analyze); + return 0; + } + else if (insert_plan) + { + insert_plan->print_explain(this, output, explain_flags, is_analyze); + return 0; + } + else + { + /* Start printing from node with id=1 */ + Explain_node *node= get_node(1); + if (!node) + return 1; /* No query plan */ + return node->print_explain(this, output, explain_flags, is_analyze); + } +} + + +int Explain_query::print_explain_json(select_result_sink *output, + bool is_analyze, + ulonglong query_time_in_progress_ms) +{ + Json_writer writer; + +#ifndef DBUG_OFF + DBUG_ASSERT(can_print_json); +#endif + + writer.start_object(); + + if (is_analyze) + { + if (query_time_in_progress_ms > 0){ + writer.add_member("r_query_time_in_progress_ms"). + add_ull(query_time_in_progress_ms); + } + + print_query_optimization_json(&writer); + } + + bool plan_found = print_query_blocks_json(&writer, is_analyze); + writer.end_object(); + + if( plan_found ) + { + send_explain_json_to_output(&writer, output); + } + + return 0; +} + +void Explain_query::print_query_optimization_json(Json_writer *writer) +{ + if (optimization_time_tracker.has_timed_statistics()) + { + // if more timers are added, move the query_optimization member + // outside the if statement + writer->add_member("query_optimization").start_object(); + writer->add_member("r_total_time_ms"). + add_double(optimization_time_tracker.get_time_ms()); + writer->end_object(); + } +} + +bool Explain_query::print_query_blocks_json(Json_writer *writer, const bool is_analyze) +{ + if (upd_del_plan) + upd_del_plan->print_explain_json(this, writer, is_analyze); + else if (insert_plan) + insert_plan->print_explain_json(this, writer, is_analyze); + else + { + /* Start printing from root node with id=1 */ + Explain_node *node= get_node(1); + if (!node) + return false; /* No query plan */ + node->print_explain_json(this, writer, is_analyze); + } + + return true; +} + +void Explain_query::send_explain_json_to_output(Json_writer *writer, + select_result_sink *output) +{ + CHARSET_INFO *cs= system_charset_info; + List item_list; + const String *buf= writer->output.get_string(); + THD *thd= output->thd; + item_list.push_back(new (thd->mem_root) + Item_string(thd, buf->ptr(), buf->length(), cs), + thd->mem_root); + output->send_data(item_list); +} + +bool print_explain_for_slow_log(LEX *lex, THD *thd, String *str) +{ + return lex->explain->print_explain_str(thd, str, /*is_analyze*/ true); +} + + +/* + Return tabular EXPLAIN output as a text string +*/ + +bool Explain_query::print_explain_str(THD *thd, String *out_str, + bool is_analyze) +{ + List fields; + thd->make_explain_field_list(fields, thd->lex->describe, is_analyze); + + select_result_text_buffer output_buf(thd); + output_buf.send_result_set_metadata(fields, thd->lex->describe); + if (print_explain(&output_buf, thd->lex->describe, is_analyze)) + return true; + output_buf.save_to(out_str); + return false; +} + + +static void push_str(THD *thd, List *item_list, const char *str) +{ + item_list->push_back(new (thd->mem_root) Item_string_sys(thd, str), + thd->mem_root); +} + + +static void push_string(THD *thd, List *item_list, String *str) +{ + item_list->push_back(new (thd->mem_root) + Item_string_sys(thd, str->ptr(), str->length()), + thd->mem_root); +} + +static void push_string_list(THD *thd, List *item_list, + String_list &lines, String *buf) +{ + List_iterator_fast it(lines); + char *line; + bool first= true; + while ((line= it++)) + { + if (first) + first= false; + else + buf->append(','); + + buf->append(line, strlen(line)); + } + push_string(thd, item_list, buf); +} + + +/* + Print an EXPLAIN output row, based on information provided in the parameters + + @note + Parameters that may have NULL value in EXPLAIN output, should be passed + (char*)NULL. + + @return + 0 - OK + 1 - OOM Error +*/ + +static +int print_explain_row(select_result_sink *result, + uint8 options, bool is_analyze, + uint select_number, + const char *select_type, + const char *table_name, + const char *partitions, + enum join_type jtype, + String_list *possible_keys, + const char *index, + const char *key_len, + const char *ref, + ha_rows *rows, + double *r_rows, + double r_filtered, + const char *extra) +{ + THD *thd= result->thd; + MEM_ROOT *mem_root= thd->mem_root; + Item *item_null= new (mem_root) Item_null(thd); + List item_list; + Item *item; + + if (!select_type[0]) + return 0; + + item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number), + mem_root); + item_list.push_back(new (mem_root) Item_string_sys(thd, select_type), + mem_root); + item_list.push_back(new (mem_root) Item_string_sys(thd, table_name), + mem_root); + if (options & DESCRIBE_PARTITIONS) + { + if (partitions) + { + item_list.push_back(new (mem_root) Item_string_sys(thd, partitions), + mem_root); + } + else + item_list.push_back(item_null, mem_root); + } + + const char *jtype_str= join_type_str[jtype]; + item_list.push_back(new (mem_root) Item_string_sys(thd, jtype_str), + mem_root); + + /* 'possible_keys' + The buffer must not be deallocated before we call send_data, otherwise + we may end up reading freed memory. + */ + StringBuffer<64> possible_keys_buf; + if (possible_keys && !possible_keys->is_empty()) + { + push_string_list(thd, &item_list, *possible_keys, &possible_keys_buf); + } + else + item_list.push_back(item_null, mem_root); + + /* 'index */ + item= index ? new (mem_root) Item_string_sys(thd, index) : item_null; + item_list.push_back(item, mem_root); + + /* 'key_len */ + item= key_len ? new (mem_root) Item_string_sys(thd, key_len) : item_null; + item_list.push_back(item, mem_root); + + /* 'ref' */ + item= ref ? new (mem_root) Item_string_sys(thd, ref) : item_null; + item_list.push_back(item, mem_root); + + /* 'rows' */ + StringBuffer<64> rows_str; + if (rows) + { + rows_str.append_ulonglong((ulonglong)(*rows)); + item_list.push_back(new (mem_root) + Item_string_sys(thd, rows_str.ptr(), + rows_str.length()), mem_root); + } + else + item_list.push_back(item_null, mem_root); + + /* 'r_rows' */ + StringBuffer<64> r_rows_str; + if (is_analyze) + { + if (r_rows) + { + Item_float *fl= new (mem_root) Item_float(thd, *r_rows, 2); + String tmp; + String *res= fl->val_str(&tmp); + r_rows_str.append(*res); + item_list.push_back(new (mem_root) + Item_string_sys(thd, r_rows_str.ptr(), + r_rows_str.length()), mem_root); + } + else + item_list.push_back(item_null, mem_root); + } + + /* 'filtered' */ + const double filtered=100.0; + if (options & DESCRIBE_EXTENDED || is_analyze) + item_list.push_back(new (mem_root) Item_float(thd, filtered, 2), mem_root); + + /* 'r_filtered' */ + if (is_analyze) + item_list.push_back(new (mem_root) Item_float(thd, r_filtered, 2), + mem_root); + + /* 'Extra' */ + if (extra) + item_list.push_back(new (mem_root) Item_string_sys(thd, extra), mem_root); + else + item_list.push_back(item_null, mem_root); + + if (result->send_data(item_list)) + return 1; + return 0; +} + + + + +uint Explain_union::make_union_table_name(char *buf) +{ + uint childno= 0; + uint len, lastop= 0; + LEX_CSTRING type; + switch (operation) + { + case OP_MIX: + lex_string_set3(&type, STRING_WITH_LEN("= NAME_LEN) + { + memcpy(buf + len, STRING_WITH_LEN("...>") + 1); + len+= 4; + } + else + { + len+= lastop; + buf[len - 1]= '>'; // change ',' to '>' + } + return len; +} + + +int Explain_union::print_explain(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, + bool is_analyze) +{ + THD *thd= output->thd; + MEM_ROOT *mem_root= thd->mem_root; + char table_name_buffer[SAFE_NAME_LEN]; + + /* print all UNION children, in order */ + for (int i= 0; i < (int) union_members.elements(); i++) + { + Explain_select *sel= query->get_select(union_members.at(i)); + sel->print_explain(query, output, explain_flags, is_analyze); + } + + if (!using_tmp) + return 0; + + /* Print a line with "UNIT RESULT" */ + List item_list; + Item *item_null= new (mem_root) Item_null(thd); + + /* `id` column */ + item_list.push_back(item_null, mem_root); + + /* `select_type` column */ + push_str(thd, &item_list, fake_select_type); + + /* `table` column: something like "" */ + uint len= make_union_table_name(table_name_buffer); + item_list.push_back(new (mem_root) + Item_string_sys(thd, table_name_buffer, len), + mem_root); + + /* `partitions` column */ + if (explain_flags & DESCRIBE_PARTITIONS) + item_list.push_back(item_null, mem_root); + + /* `type` column */ + push_str(thd, &item_list, join_type_str[JT_ALL]); + + /* `possible_keys` column */ + item_list.push_back(item_null, mem_root); + + /* `key` */ + item_list.push_back(item_null, mem_root); + + /* `key_len` */ + item_list.push_back(item_null, mem_root); + + /* `ref` */ + item_list.push_back(item_null, mem_root); + + /* `rows` */ + item_list.push_back(item_null, mem_root); + + /* `r_rows` */ + StringBuffer<64> r_rows_str; + if (is_analyze) + { + double avg_rows= fake_select_lex_tracker.get_avg_rows(); + Item_float *fl= new (mem_root) Item_float(thd, avg_rows, 2); + String tmp; + String *res= fl->val_str(&tmp); + r_rows_str.append(*res); + item_list.push_back(new (mem_root) + Item_string_sys(thd, r_rows_str.ptr(), + r_rows_str.length()), mem_root); + } + + /* `filtered` */ + if (explain_flags & DESCRIBE_EXTENDED || is_analyze) + item_list.push_back(item_null, mem_root); + + /* `r_filtered` */ + if (is_analyze) + item_list.push_back(item_null, mem_root); + + /* `Extra` */ + StringBuffer<256> extra_buf; + if (using_filesort) + { + extra_buf.append(STRING_WITH_LEN("Using filesort")); + } + item_list.push_back(new (mem_root) + Item_string_sys(thd, extra_buf.ptr(), + extra_buf.length()), + mem_root); + + //output->unit.offset_limit_cnt= 0; + if (output->send_data(item_list)) + return 1; + + /* + Print all subquery children (UNION children have already been printed at + the start of this function) + */ + return print_explain_for_children(query, output, explain_flags, is_analyze); +} + + +void Explain_union::print_explain_json(Explain_query *query, + Json_writer *writer, bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + char table_name_buffer[SAFE_NAME_LEN]; + + bool started_object= print_explain_json_cache(writer, is_analyze); + + writer->add_member("query_block").start_object(); + + if (is_recursive_cte) + writer->add_member("recursive_union").start_object(); + else + writer->add_member("union_result").start_object(); + + if (using_tmp) + { + make_union_table_name(table_name_buffer); + writer->add_member("table_name").add_str(table_name_buffer); + writer->add_member("access_type").add_str("ALL"); // not very useful + + /* r_loops (not present in tabular output) */ + if (is_analyze) + { + writer->add_member("r_loops").add_ll( + fake_select_lex_tracker.get_loops()); + } + + /* `r_rows` */ + if (is_analyze) + { + writer->add_member("r_rows"); + if (fake_select_lex_tracker.has_scans()) + writer->add_double(fake_select_lex_tracker.get_avg_rows()); + else + writer->add_null(); + } + } + writer->add_member("query_specifications").start_array(); + + for (int i= 0; i < (int) union_members.elements(); i++) + { + writer->start_object(); + //writer->add_member("dependent").add_str("TODO"); + //writer->add_member("cacheable").add_str("TODO"); + Explain_select *sel= query->get_select(union_members.at(i)); + sel->print_explain_json(query, writer, is_analyze); + writer->end_object(); + } + writer->end_array(); + + print_explain_json_for_children(query, writer, is_analyze); + + writer->end_object(); // union_result + writer->end_object(); // query_block + + if (started_object) + writer->end_object(); +} + + +/* + Print EXPLAINs for all children nodes (i.e. for subqueries) +*/ + +int Explain_node::print_explain_for_children(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, + bool is_analyze) +{ + for (int i= 0; i < (int) children.elements(); i++) + { + Explain_node *node= query->get_node(children.at(i)); + /* + Note: node may not be present because for certain kinds of subqueries, + the optimizer is not able to see that they were eliminated. + */ + if (node && node->print_explain(query, output, explain_flags, is_analyze)) + return 1; + } + return 0; +} + +bool Explain_basic_join::add_table(Explain_table_access *tab, Explain_query *query) +{ + if (!join_tabs) + { + n_join_tabs= 0; + if (!(join_tabs= ((Explain_table_access**) + alloc_root(query->mem_root, + sizeof(Explain_table_access*) * + MAX_TABLES)))) + return true; + } + join_tabs[n_join_tabs++]= tab; + return false; +} + +/* + This tells whether a child subquery should be printed in JSON output. + + Derived tables and Non-merged semi-joins should not be printed, because they + are printed inline in Explain_table_access. +*/ +bool is_connection_printable_in_json(enum Explain_node::explain_connection_type type) +{ + return (type != Explain_node::EXPLAIN_NODE_DERIVED && + type != Explain_node::EXPLAIN_NODE_NON_MERGED_SJ); +} + + +void Explain_node::print_explain_json_for_children(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + + bool started= false; + for (int i= 0; i < (int) children.elements(); i++) + { + Explain_node *node= query->get_node(children.at(i)); + + /* + Note: node may not be present because for certain kinds of subqueries, + the optimizer is not able to see that they were eliminated. + */ + if (!node) + continue; + + /* Derived tables are printed inside Explain_table_access objects */ + if (!is_connection_printable_in_json(node->connection_type)) + continue; + + if (!started) + { + writer->add_member("subqueries").start_array(); + started= true; + } + + writer->start_object(); + node->print_explain_json(query, writer, is_analyze); + writer->end_object(); + } + + if (started) + writer->end_array(); +} + + +bool Explain_node::print_explain_json_cache(Json_writer *writer, + bool is_analyze) +{ + if (cache_tracker) + { + cache_tracker->fetch_current_stats(); + writer->add_member("expression_cache").start_object(); + if (cache_tracker->state != Expression_cache_tracker::OK) + { + writer->add_member("state"). + add_str(Expression_cache_tracker::state_str[cache_tracker->state]); + } + + if (is_analyze) + { + longlong cache_reads= cache_tracker->hit + cache_tracker->miss; + writer->add_member("r_loops").add_ll(cache_reads); + if (cache_reads != 0) + { + double hit_ratio= double(cache_tracker->hit) / cache_reads * 100.0; + writer->add_member("r_hit_ratio").add_double(hit_ratio); + } + } + return true; + } + return false; +} + + +Explain_basic_join::~Explain_basic_join() +{ + if (join_tabs) + { + for (uint i= 0; i< n_join_tabs; i++) + delete join_tabs[i]; + } +} + + +int Explain_select::print_explain(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, bool is_analyze) +{ + THD *thd= output->thd; + MEM_ROOT *mem_root= thd->mem_root; + + if (select_type == pushed_derived_text || select_type == pushed_select_text) + { + print_explain_message_line(output, explain_flags, is_analyze, + select_id /*select number*/, + select_type, + NULL, /* rows */ + NULL); + } + else if (message) + { + List item_list; + Item *item_null= new (mem_root) Item_null(thd); + + item_list.push_back(new (mem_root) Item_int(thd, (int32) select_id), + mem_root); + item_list.push_back(new (mem_root) Item_string_sys(thd, select_type), + mem_root); + for (uint i=0 ; i < 7; i++) + item_list.push_back(item_null, mem_root); + if (explain_flags & DESCRIBE_PARTITIONS) + item_list.push_back(item_null, mem_root); + + /* filtered */ + if (is_analyze || explain_flags & DESCRIBE_EXTENDED) + item_list.push_back(item_null, mem_root); + + if (is_analyze) + { + /* r_rows, r_filtered */ + item_list.push_back(item_null, mem_root); + item_list.push_back(item_null, mem_root); + } + + item_list.push_back(new (mem_root) Item_string_sys(thd, message), + mem_root); + + if (output->send_data(item_list)) + return 1; + } + else + { + bool using_tmp= false; + bool using_fs= false; + + for (Explain_aggr_node *node= aggr_tree; node; node= node->child) + { + switch (node->get_type()) + { + case AGGR_OP_TEMP_TABLE: + using_tmp= true; + break; + case AGGR_OP_FILESORT: + using_fs= true; + break; + default: + break; + } + } + + for (uint i=0; i< n_join_tabs; i++) + { + join_tabs[i]->print_explain(output, explain_flags, is_analyze, select_id, + select_type, using_tmp, using_fs); + if (i == 0) + { + /* + "Using temporary; Using filesort" should only be shown near the 1st + table + */ + using_tmp= false; + using_fs= false; + } + } + for (uint i=0; i< n_join_tabs; i++) + { + Explain_basic_join* nest; + if ((nest= join_tabs[i]->sjm_nest)) + nest->print_explain(query, output, explain_flags, is_analyze); + } + } + + return print_explain_for_children(query, output, explain_flags, is_analyze); +} + + +int Explain_basic_join::print_explain(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, bool is_analyze) +{ + for (uint i=0; i< n_join_tabs; i++) + { + if (join_tabs[i]->print_explain(output, explain_flags, is_analyze, + select_id, + "MATERIALIZED" /*select_type*/, + FALSE /*using temporary*/, + FALSE /*using filesort*/)) + return 1; + } + return 0; +} + + +void Explain_select::add_linkage(Json_writer *writer) +{ + const char *operation= NULL; + switch (linkage) + { + case UNION_TYPE: + operation= "UNION"; + break; + case INTERSECT_TYPE: + operation= "INTERSECT"; + break; + case EXCEPT_TYPE: + operation= "EXCEPT"; + break; + default: + // It is the first or the only SELECT => no operation + break; + } + if (operation) + writer->add_member("operation").add_str(operation); +} + +void Explain_select::print_explain_json(Explain_query *query, + Json_writer *writer, bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + + bool started_cache= print_explain_json_cache(writer, is_analyze); + + if (message || + select_type == pushed_derived_text || + select_type == pushed_select_text) + { + writer->add_member("query_block").start_object(); + writer->add_member("select_id").add_ll(select_id); + add_linkage(writer); + + writer->add_member("table").start_object(); + writer->add_member("message").add_str(select_type == pushed_derived_text ? + "Pushed derived" : + select_type == pushed_select_text ? + "Pushed select" : + message); + writer->end_object(); + + print_explain_json_for_children(query, writer, is_analyze); + writer->end_object(); + } + else + { + writer->add_member("query_block").start_object(); + writer->add_member("select_id").add_ll(select_id); + add_linkage(writer); + + if (is_analyze && time_tracker.get_loops()) + { + writer->add_member("r_loops").add_ll(time_tracker.get_loops()); + if (time_tracker.has_timed_statistics()) + { + writer->add_member("r_total_time_ms"). + add_double(time_tracker.get_time_ms()); + } + } + + if (exec_const_cond) + { + writer->add_member("const_condition"); + write_item(writer, exec_const_cond); + } + if (outer_ref_cond) + { + writer->add_member("outer_ref_condition"); + write_item(writer, outer_ref_cond); + } + if (pseudo_bits_cond) + { + writer->add_member("pseudo_bits_condition"); + write_item(writer, pseudo_bits_cond); + } + + /* we do not print HAVING which always evaluates to TRUE */ + if (having || (having_value == Item::COND_FALSE)) + { + writer->add_member("having_condition"); + if (likely(having)) + write_item(writer, having); + else + { + /* Normally we should not go this branch, left just for safety */ + DBUG_ASSERT(having_value == Item::COND_FALSE); + writer->add_str("0"); + } + } + + int started_objects= 0; + + Explain_aggr_node *node= aggr_tree; + + for (; node; node= node->child) + { + switch (node->get_type()) + { + case AGGR_OP_TEMP_TABLE: + writer->add_member("temporary_table").start_object(); + break; + case AGGR_OP_FILESORT: + { + writer->add_member("filesort").start_object(); + auto aggr_node= (Explain_aggr_filesort*)node; + aggr_node->print_json_members(writer, is_analyze); + break; + } + case AGGR_OP_REMOVE_DUPLICATES: + writer->add_member("duplicate_removal").start_object(); + break; + case AGGR_OP_WINDOW_FUNCS: + { + //TODO: make print_json_members virtual? + writer->add_member("window_functions_computation").start_object(); + auto aggr_node= (Explain_aggr_window_funcs*)node; + aggr_node->print_json_members(writer, is_analyze); + break; + } + default: + DBUG_ASSERT(0); + } + started_objects++; + } + + Explain_basic_join::print_explain_json_interns(query, writer, is_analyze); + + for (;started_objects; started_objects--) + writer->end_object(); + + writer->end_object(); + } + + if (started_cache) + writer->end_object(); +} + + +Explain_aggr_filesort::Explain_aggr_filesort(MEM_ROOT *mem_root, + bool is_analyze, + Filesort *filesort) + : tracker(is_analyze) +{ + child= NULL; + for (ORDER *ord= filesort->order; ord; ord= ord->next) + { + sort_items.push_back(ord->item[0], mem_root); + sort_directions.push_back(&ord->direction, mem_root); + } + filesort->tracker= &tracker; +} + + +void Explain_aggr_filesort::print_json_members(Json_writer *writer, + bool is_analyze) +{ + char item_buf[256]; + String str(item_buf, sizeof(item_buf), &my_charset_bin); + str.length(0); + + List_iterator_fast it(sort_items); + List_iterator_fast it_dir(sort_directions); + Item* item; + ORDER::enum_order *direction; + bool first= true; + while ((item= it++)) + { + direction= it_dir++; + if (first) + first= false; + else + { + str.append(STRING_WITH_LEN(", ")); + } + append_item_to_str(&str, item); + if (*direction == ORDER::ORDER_DESC) + str.append(STRING_WITH_LEN(" desc")); + } + + writer->add_member("sort_key").add_str(str.c_ptr_safe()); + + if (is_analyze) + tracker.print_json_members(writer); +} + + +void Explain_aggr_window_funcs::print_json_members(Json_writer *writer, + bool is_analyze) +{ + Explain_aggr_filesort *srt; + List_iterator it(sorts); + Json_writer_array sorts(writer, "sorts"); + while ((srt= it++)) + { + Json_writer_object sort(writer); + Json_writer_object filesort(writer, "filesort"); + srt->print_json_members(writer, is_analyze); + } +} + + +void Explain_basic_join::print_explain_json(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + writer->add_member("query_block").start_object(); + writer->add_member("select_id").add_ll(select_id); + + print_explain_json_interns(query, writer, is_analyze); + + writer->end_object(); +} + + +void Explain_basic_join:: +print_explain_json_interns(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + { + Json_writer_array loop(writer, "nested_loop"); + for (uint i=0; i< n_join_tabs; i++) + { + if (join_tabs[i]->start_dups_weedout) + { + writer->start_object(); + writer->add_member("duplicates_removal"); + writer->start_array(); + } + + join_tabs[i]->print_explain_json(query, writer, is_analyze); + + if (join_tabs[i]->end_dups_weedout) + { + writer->end_array(); + writer->end_object(); + } + } + } // "nested_loop" + print_explain_json_for_children(query, writer, is_analyze); +} + + +void Explain_table_access::push_extra(enum explain_extra_tag extra_tag) +{ + extra_tags.append(extra_tag); +} + + +/* + Put the contents of 'key' field of EXPLAIN otuput into key_str. + + It is surprisingly complex: + - hash join shows #hash#used_key + - quick selects that use single index will print index name +*/ + +void Explain_table_access::fill_key_str(String *key_str, bool is_json) const +{ + CHARSET_INFO *cs= system_charset_info; + bool is_hj= (type == JT_HASH || type == JT_HASH_NEXT || + type == JT_HASH_RANGE || type == JT_HASH_INDEX_MERGE); + LEX_CSTRING hash_key_prefix= { STRING_WITH_LEN("#hash#") }; + const char *key_name; + + if ((key_name= key.get_key_name())) + { + if (is_hj) + key_str->append(hash_key_prefix.str, hash_key_prefix.length, cs); + + key_str->append(key_name, strlen(key_name)); + + if (is_hj && type != JT_HASH) + key_str->append(':'); + } + + if (quick_info) + { + StringBuffer<64> buf2; + if (is_json) + quick_info->print_extra_recursive(&buf2); + else + quick_info->print_key(&buf2); + key_str->append(buf2); + } + if (type == JT_HASH_NEXT) + { + key_name= hash_next_key.get_key_name(); + key_str->append(key_name, strlen(key_name)); + } +} + + +/* + Fill "key_length". + - this is just used key length for ref/range + - for index_merge, it is a comma-separated list of lengths. + - for hash join, it is key_len:pseudo_key_len + - [tabular form only] rowid filter length is added after "|". + + In JSON, we consider this column to be legacy, it is superceded by + used_key_parts. +*/ + +void Explain_table_access::fill_key_len_str(String *key_len_str, + bool is_json) const +{ + bool is_hj= (type == JT_HASH || type == JT_HASH_NEXT || + type == JT_HASH_RANGE || type == JT_HASH_INDEX_MERGE); + if (key.get_key_len() != (uint)-1) + { + char buf[64]; + size_t length; + length= longlong10_to_str(key.get_key_len(), buf, 10) - buf; + key_len_str->append(buf, length); + if (is_hj && type != JT_HASH) + key_len_str->append(':'); + } + + if (quick_info) + { + StringBuffer<64> buf2; + quick_info->print_key_len(&buf2); + key_len_str->append(buf2); + } + + if (type == JT_HASH_NEXT) + { + char buf[64]; + size_t length; + length= longlong10_to_str(hash_next_key.get_key_len(), buf, 10) - buf; + key_len_str->append(buf, length); + } + + if (!is_json && rowid_filter) + { + key_len_str->append('|'); + StringBuffer<64> filter_key_len; + rowid_filter->quick->print_key_len(&filter_key_len); + key_len_str->append(filter_key_len); + } +} + + +bool Explain_index_use::set(MEM_ROOT *mem_root, KEY *key, uint key_len_arg) +{ + if (set_pseudo_key(mem_root, key->name.str)) + return 1; + + key_len= key_len_arg; + uint len= 0; + for (uint i= 0; i < key->usable_key_parts; i++) + { + if (!key_parts_list.append_str(mem_root, + key->key_part[i].field->field_name.str)) + return 1; + len += key->key_part[i].store_length; + if (len >= key_len_arg) + break; + } + return 0; +} + + +bool Explain_index_use::set_pseudo_key(MEM_ROOT *root, const char* key_name_arg) +{ + if (key_name_arg) + { + if (!(key_name= strdup_root(root, key_name_arg))) + return 1; + } + else + key_name= NULL; + key_len= ~(uint) 0; + return 0; +} + + +/* + Given r_filtered% from join buffer condition and join condition, produce a + combined r_filtered% number. This is needed for tabular EXPLAIN output which + has only one cell for r_filtered value. +*/ + +double Explain_table_access::get_r_filtered() +{ + double r_filtered= tracker.get_filtered_after_where(); + if (bka_type.is_using_jbuf()) + r_filtered *= jbuf_tracker.get_filtered_after_where(); + return r_filtered; +} + + +int Explain_table_access::print_explain(select_result_sink *output, uint8 explain_flags, + bool is_analyze, + uint select_id, const char *select_type, + bool using_temporary, bool using_filesort) +{ + THD *thd= output->thd; // note: for SHOW EXPLAIN, this is target thd. + MEM_ROOT *mem_root= thd->mem_root; + + List item_list; + Item *item_null= new (mem_root) Item_null(thd); + + /* `id` column */ + item_list.push_back(new (mem_root) Item_int(thd, (int32) select_id), + mem_root); + + /* `select_type` column */ + push_str(thd, &item_list, select_type); + + /* `table` column */ + push_string(thd, &item_list, &table_name); + + /* `partitions` column */ + if (explain_flags & DESCRIBE_PARTITIONS) + { + if (used_partitions_set) + { + push_string(thd, &item_list, &used_partitions); + } + else + item_list.push_back(item_null, mem_root); + } + + /* `type` column */ + StringBuffer<64> join_type_buf; + if (rowid_filter == NULL) + push_str(thd, &item_list, join_type_str[type]); + else + { + join_type_buf.append(join_type_str[type], strlen(join_type_str[type])); + join_type_buf.append(STRING_WITH_LEN("|filter")); + item_list.push_back(new (mem_root) + Item_string_sys(thd, join_type_buf.ptr(), + join_type_buf.length()), + mem_root); + } + + /* `possible_keys` column */ + StringBuffer<64> possible_keys_buf; + if (possible_keys.is_empty()) + item_list.push_back(item_null, mem_root); + else + push_string_list(thd, &item_list, possible_keys, &possible_keys_buf); + + /* `key` */ + StringBuffer<64> key_str; + fill_key_str(&key_str, false); + + if (rowid_filter) + { + key_str.append('|'); + StringBuffer<64> rowid_key_str; + rowid_filter->quick->print_key(&rowid_key_str); + key_str.append(rowid_key_str); + } + + if (key_str.length() > 0) + push_string(thd, &item_list, &key_str); + else + item_list.push_back(item_null, mem_root); + + /* `key_len` */ + StringBuffer<64> key_len_str; + fill_key_len_str(&key_len_str, false); + + if (key_len_str.length() > 0) + push_string(thd, &item_list, &key_len_str); + else + item_list.push_back(item_null, mem_root); + + /* `ref` */ + StringBuffer<64> ref_list_buf; + if (ref_list.is_empty()) + { + if (type == JT_FT) + { + /* Traditionally, EXPLAIN lines with type=fulltext have ref='' */ + push_str(thd, &item_list, ""); + } + else + item_list.push_back(item_null, mem_root); + } + else + push_string_list(thd, &item_list, ref_list, &ref_list_buf); + + /* `rows` */ + StringBuffer<64> rows_str; + if (rows_set) + { + rows_str.append_ulonglong((ulonglong)rows); + + if (rowid_filter) + { + rows_str.append(STRING_WITH_LEN(" (")); + rows_str.append_ulonglong((ulonglong) (round(rowid_filter->selectivity * + 100.0))); + rows_str.append(STRING_WITH_LEN("%)")); + } + item_list.push_back(new (mem_root) + Item_string_sys(thd, rows_str.ptr(), + rows_str.length()), mem_root); + } + else + item_list.push_back(item_null, mem_root); + + /* `r_rows` */ + StringBuffer<64> r_rows_str; + if (is_analyze) + { + if (!tracker.has_scans()) + { + item_list.push_back(item_null, mem_root); + } + else + { + double avg_rows= tracker.get_avg_rows(); + Item_float *fl= new (mem_root) Item_float(thd, avg_rows, 2); + String tmp; + String *res= fl->val_str(&tmp); + r_rows_str.append(*res); + if (rowid_filter) + { + r_rows_str.append(STRING_WITH_LEN(" (")); + r_rows_str.append_ulonglong( + (ulonglong) (rowid_filter->tracker->get_r_selectivity_pct() * 100.0)); + r_rows_str.append(STRING_WITH_LEN("%)")); + } + item_list.push_back(new (mem_root) + Item_string_sys(thd, r_rows_str.ptr(), + r_rows_str.length()), mem_root); + } + } + + /* `filtered` */ + if (explain_flags & DESCRIBE_EXTENDED || is_analyze) + { + if (filtered_set) + { + item_list.push_back(new (mem_root) Item_float(thd, filtered, 2), + mem_root); + } + else + item_list.push_back(item_null, mem_root); + } + + /* `r_filtered` */ + if (is_analyze) + { + if (!tracker.has_scans()) + { + item_list.push_back(item_null, mem_root); + } + else + { + double r_filtered= tracker.get_filtered_after_where(); + if (bka_type.is_using_jbuf()) + r_filtered *= jbuf_tracker.get_filtered_after_where(); + item_list.push_back(new (mem_root) + Item_float(thd, r_filtered * 100.0, 2), + mem_root); + } + } + + /* `Extra` */ + StringBuffer<256> extra_buf; + bool first= true; + for (int i=0; i < (int)extra_tags.elements(); i++) + { + if (first) + first= false; + else + extra_buf.append(STRING_WITH_LEN("; ")); + append_tag_name(&extra_buf, extra_tags.at(i)); + } + + if (using_temporary) + { + if (first) + first= false; + else + extra_buf.append(STRING_WITH_LEN("; ")); + extra_buf.append(STRING_WITH_LEN("Using temporary")); + } + + if (using_filesort || this->pre_join_sort) + { + if (first) + first= false; + else + extra_buf.append(STRING_WITH_LEN("; ")); + extra_buf.append(STRING_WITH_LEN("Using filesort")); + } + + if (rowid_filter) + { + if (first) + first= false; + else + extra_buf.append(STRING_WITH_LEN("; ")); + extra_buf.append(STRING_WITH_LEN("Using rowid filter")); + } + + item_list.push_back(new (mem_root) + Item_string_sys(thd, extra_buf.ptr(), + extra_buf.length()), + mem_root); + + if (output->send_data(item_list)) + return 1; + + return 0; +} + + +/** + Adds copy of the string to the list + + @param mem_root where to allocate string + @param str string to copy and add + + @return + NULL - out of memory error + poiner on allocated copy of the string +*/ + +const char *String_list::append_str(MEM_ROOT *mem_root, const char *str) +{ + size_t len= strlen(str); + char *cp; + if (!(cp = (char*)alloc_root(mem_root, len+1))) + return NULL; + memcpy(cp, str, len+1); + push_back(cp, mem_root); + return cp; +} + + +static void write_item(Json_writer *writer, Item *item) +{ + THD *thd= current_thd; + char item_buf[256]; + String str(item_buf, sizeof(item_buf), &my_charset_bin); + str.length(0); + + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + + item->print(&str, QT_EXPLAIN); + + thd->variables.option_bits= save_option_bits; + writer->add_str(str.c_ptr_safe()); +} + +static void append_item_to_str(String *out, Item *item) +{ + THD *thd= current_thd; + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + + item->print(out, QT_EXPLAIN); + + thd->variables.option_bits= save_option_bits; +} + +void Explain_table_access::tag_to_json(Json_writer *writer, + enum explain_extra_tag tag) +{ + switch (tag) + { + case ET_OPEN_FULL_TABLE: + writer->add_member("open_full_table").add_bool(true); + break; + case ET_SCANNED_0_DATABASES: + writer->add_member("scanned_databases").add_ll(0); + break; + case ET_SCANNED_1_DATABASE: + writer->add_member("scanned_databases").add_ll(1); + break; + case ET_SCANNED_ALL_DATABASES: + writer->add_member("scanned_databases").add_str("all"); + break; + case ET_SKIP_OPEN_TABLE: + writer->add_member("skip_open_table").add_bool(true); + break; + case ET_OPEN_FRM_ONLY: + writer->add_member("open_frm_only").add_bool(true); + break; + case ET_USING_INDEX_CONDITION: + writer->add_member("index_condition"); + write_item(writer, pushed_index_cond); + break; + case ET_USING_INDEX_CONDITION_BKA: + writer->add_member("index_condition_bka"); + write_item(writer, pushed_index_cond); + break; + case ET_USING_WHERE: + { + /* + We are printing the condition that is checked when scanning this + table. + - when join buffer is used, it is cache_cond. + - in other cases, it is where_cond. + */ + Item *item= bka_type.is_using_jbuf()? cache_cond: where_cond; + if (item) + { + writer->add_member("attached_condition"); + write_item(writer, item); + } + } + break; + case ET_USING_INDEX: + writer->add_member("using_index").add_bool(true); + break; + case ET_USING: + // index merge: case ET_USING + break; + case ET_RANGE_CHECKED_FOR_EACH_RECORD: + /* Handled as range_checked_fer */ + case ET_USING_JOIN_BUFFER: + /* Do nothing. Join buffer is handled differently */ + case ET_START_TEMPORARY: + case ET_END_TEMPORARY: + /* Handled as "duplicates_removal: { ... } */ + case ET_FULL_SCAN_ON_NULL_KEY: + /* Handled in full_scan_on_null_key */ + break; + case ET_FIRST_MATCH: + writer->add_member("first_match").add_str(firstmatch_table_name.c_ptr()); + break; + case ET_LOOSESCAN: + writer->add_member("loose_scan").add_bool(true); + break; + case ET_USING_MRR: + writer->add_member("mrr_type").add_str(mrr_type.c_ptr()); + break; + case ET_USING_INDEX_FOR_GROUP_BY: + writer->add_member("using_index_for_group_by"); + if (loose_scan_is_scanning) + writer->add_str("scanning"); + else + writer->add_bool(true); + break; + + /*new:*/ + case ET_CONST_ROW_NOT_FOUND: + writer->add_member("const_row_not_found").add_bool(true); + break; + case ET_UNIQUE_ROW_NOT_FOUND: + /* + Currently, we never get here. All SELECTs that have + ET_UNIQUE_ROW_NOT_FOUND for a table are converted into degenerate + SELECTs with message="Impossible WHERE ...". + MySQL 5.6 has the same property. + I'm leaving the handling in just for the sake of covering all enum + members and safety. + */ + writer->add_member("unique_row_not_found").add_bool(true); + break; + case ET_IMPOSSIBLE_ON_CONDITION: + writer->add_member("impossible_on_condition").add_bool(true); + break; + case ET_USING_WHERE_WITH_PUSHED_CONDITION: + /* + It would be nice to print the pushed condition, but current Storage + Engine API doesn't provide any way to do that + */ + writer->add_member("pushed_condition").add_bool(true); + break; + + case ET_NOT_EXISTS: + writer->add_member("not_exists").add_bool(true); + break; + case ET_DISTINCT: + writer->add_member("distinct").add_bool(true); + break; + case ET_TABLE_FUNCTION: + writer->add_member("table_function").add_str("json_table"); + break; + + default: + DBUG_ASSERT(0); + } +} + + +static +void add_json_keyset(Json_writer *writer, const char *elem_name, + String_list *keyset) +{ + if (!keyset->is_empty()) + print_json_array(writer, elem_name, *keyset); +} + + +void Explain_rowid_filter::print_explain_json(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + writer->add_member("rowid_filter").start_object(); + quick->print_json(writer); + writer->add_member("rows").add_ll(rows); + writer->add_member("selectivity_pct").add_double(selectivity * 100.0); + if (is_analyze) + { + writer->add_member("r_rows").add_double(tracker->get_container_elements()); + writer->add_member("r_lookups").add_ll(tracker->get_container_lookups()); + writer->add_member("r_selectivity_pct"). + add_double(tracker->get_r_selectivity_pct() * 100.0); + writer->add_member("r_buffer_size"). + add_double((double) (tracker->get_container_buff_size())); + writer->add_member("r_filling_time_ms"). + add_double(tracker->get_time_fill_container_ms()); + } + writer->end_object(); // rowid_filter +} + +static void trace_engine_stats(handler *file, Json_writer *writer) +{ + if (file && file->handler_stats) + { + ha_handler_stats *hs= file->handler_stats; + writer->add_member("r_engine_stats").start_object(); + if (hs->pages_accessed) + writer->add_member("pages_accessed").add_ull(hs->pages_accessed); + if (hs->pages_updated) + writer->add_member("pages_updated").add_ull(hs->pages_updated); + if (hs->pages_read_count) + writer->add_member("pages_read_count").add_ull(hs->pages_read_count); + if (hs->pages_read_time) + writer->add_member("pages_read_time_ms"). + add_double(hs->pages_read_time / 1000.0); + if (hs->undo_records_read) + writer->add_member("old_rows_read").add_ull(hs->undo_records_read); + writer->end_object(); + } +} + +void Explain_table_access::print_explain_json(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + Json_writer_object jsobj(writer); + + if (pre_join_sort) + { + /* filesort was invoked on this join tab before doing the join with the rest */ + writer->add_member("read_sorted_file").start_object(); + if (is_analyze) + { + writer->add_member("r_rows"); + /* + r_rows when reading filesort result. This can be less than the number + of rows produced by filesort due to NL-join having LIMIT. + */ + if (tracker.has_scans()) + writer->add_double(tracker.get_avg_rows()); + else + writer->add_null(); + + /* + r_filtered when reading filesort result. We should have checked the + WHERE while doing filesort but lets check just in case. + */ + if (tracker.has_scans() && tracker.get_filtered_after_where() < 1.0) + { + writer->add_member("r_filtered"); + writer->add_double(tracker.get_filtered_after_where()*100.0); + } + } + writer->add_member("filesort").start_object(); + pre_join_sort->print_json_members(writer, is_analyze); + } + + if (bka_type.is_using_jbuf()) + { + writer->add_member("block-nl-join").start_object(); + } + + if (range_checked_fer) + { + range_checked_fer->print_json(writer, is_analyze); + } + + if (full_scan_on_null_key) + writer->add_member("full-scan-on-null_key").start_object(); + + writer->add_member("table").start_object(); + + writer->add_member("table_name").add_str(table_name); + + if (used_partitions_set) + print_json_array(writer, "partitions", used_partitions_list); + + writer->add_member("access_type").add_str(join_type_str[type]); + + add_json_keyset(writer, "possible_keys", &possible_keys); + + /* `key` */ + /* For non-basic quick select, 'key' will not be present */ + if (!quick_info || quick_info->is_basic()) + { + StringBuffer<64> key_str; + fill_key_str(&key_str, true); + if (key_str.length()) + writer->add_member("key").add_str(key_str); + } + + /* `key_length` */ + StringBuffer<64> key_len_str; + fill_key_len_str(&key_len_str, true); + if (key_len_str.length()) + writer->add_member("key_length").add_str(key_len_str); + + /* `used_key_parts` */ + String_list *parts_list= NULL; + if (quick_info && quick_info->is_basic()) + parts_list= &quick_info->range.key_parts_list; + else + parts_list= &key.key_parts_list; + + if (parts_list && !parts_list->is_empty()) + print_json_array(writer, "used_key_parts", *parts_list); + + if (quick_info && !quick_info->is_basic()) + { + writer->add_member("index_merge").start_object(); + quick_info->print_json(writer); + writer->end_object(); + } + + /* `ref` */ + if (!ref_list.is_empty()) + print_json_array(writer, "ref", ref_list); + + if (rowid_filter) + { + rowid_filter->print_explain_json(query, writer, is_analyze); + } + + /* r_loops (not present in tabular output) */ + if (is_analyze) + { + writer->add_member("r_loops").add_ll(tracker.get_loops()); + } + + /* `rows` */ + if (rows_set) + writer->add_member("rows").add_ull(rows); + + /* `r_rows` */ + if (is_analyze) + { + writer->add_member("r_rows"); + if (pre_join_sort) + { + /* Get r_rows value from filesort */ + if (pre_join_sort->tracker.get_r_loops()) + writer->add_double(pre_join_sort->tracker.get_avg_examined_rows()); + else + writer->add_null(); + } + else + { + if (tracker.has_scans()) + writer->add_double(tracker.get_avg_rows()); + else + writer->add_null(); + } + + if (op_tracker.get_loops()) + { + double total_time= op_tracker.get_time_ms(); + if (rowid_filter) + total_time+= rowid_filter->tracker->get_time_fill_container_ms(); + writer->add_member("r_table_time_ms").add_double(total_time); + writer->add_member("r_other_time_ms").add_double(extra_time_tracker.get_time_ms()); + } + trace_engine_stats(handler_for_stats, writer); + } + + /* `filtered` */ + if (filtered_set) + writer->add_member("filtered").add_double(filtered); + + /* `r_filtered` */ + if (is_analyze) + { + writer->add_member("r_filtered"); + if (pre_join_sort) + { + /* Get r_filtered value from filesort */ + if (pre_join_sort->tracker.get_r_loops()) + writer->add_double(pre_join_sort->tracker.get_r_filtered()*100); + else + writer->add_null(); + } + else + { + /* Get r_filtered from the NL-join runtime */ + if (tracker.has_scans()) + writer->add_double(tracker.get_filtered_after_where()*100.0); + else + writer->add_null(); + } + } + + for (int i=0; i < (int)extra_tags.elements(); i++) + { + tag_to_json(writer, extra_tags.at(i)); + } + + if (full_scan_on_null_key) + writer->end_object(); //"full-scan-on-null_key" + + if (range_checked_fer) + writer->end_object(); // "range-checked-for-each-record" + + if (bka_type.is_using_jbuf()) + { + writer->end_object(); // "block-nl-join" + writer->add_member("buffer_type").add_str(bka_type.incremental? + "incremental":"flat"); + writer->add_member("buffer_size").add_size(bka_type.join_buffer_size); + writer->add_member("join_type").add_str(bka_type.join_alg); + if (bka_type.mrr_type.length()) + writer->add_member("mrr_type").add_str(bka_type.mrr_type); + if (where_cond) + { + writer->add_member("attached_condition"); + write_item(writer, where_cond); + } + + if (is_analyze) + { + writer->add_member("r_loops").add_ll(jbuf_loops_tracker.get_loops()); + + writer->add_member("r_filtered"); + if (jbuf_tracker.has_scans()) + writer->add_double(jbuf_tracker.get_filtered_after_where()*100.0); + else + writer->add_null(); + + writer->add_member("r_unpack_time_ms"); + writer->add_double(jbuf_unpack_tracker.get_time_ms()); + DBUG_EXECUTE_IF("analyze_print_r_unpack_ops", + { + writer->add_member("r_unpack_ops"); + writer->add_ull(jbuf_unpack_tracker.get_loops()); + }); + + writer->add_member("r_other_time_ms"). + add_double(jbuf_extra_time_tracker.get_time_ms()); + /* + effective_rows is average number of matches we got for an incoming + row. The row is stored in the join buffer and then is read + from there, possibly multiple times. We can't count this number + directly. Infer it as: + total_number_of_row_combinations_considered / r_loops. + */ + writer->add_member("r_effective_rows"); + if (jbuf_loops_tracker.has_scans()) + { + double loops= (double)jbuf_loops_tracker.get_loops(); + double row_combinations= (double)jbuf_tracker.r_rows; + writer->add_double(row_combinations / loops); + } + else + writer->add_null(); + } + } + + if (derived_select_number) + { + /* This is a derived table. Print its contents here */ + writer->add_member("materialized").start_object(); + Explain_node *node= query->get_node(derived_select_number); + if (node->get_type() == Explain_node::EXPLAIN_SELECT && + ((Explain_select*)node)->is_lateral) + { + writer->add_member("lateral").add_ll(1); + } + node->print_explain_json(query, writer, is_analyze); + writer->end_object(); + } + if (non_merged_sjm_number) + { + /* This is a non-merged semi-join table. Print its contents here */ + writer->add_member("materialized").start_object(); + writer->add_member("unique").add_ll(1); + Explain_node *node= query->get_node(non_merged_sjm_number); + node->connection_type= Explain_node::EXPLAIN_NODE_NON_MERGED_SJ; + node->print_explain_json(query, writer, is_analyze); + writer->end_object(); + } + if (sjm_nest) + { + /* This is a non-merged semi-join table. Print its contents here */ + writer->add_member("materialized").start_object(); + writer->add_member("unique").add_ll(1); + sjm_nest->print_explain_json(query, writer, is_analyze); + writer->end_object(); + } + + if (pre_join_sort) + { + writer->end_object(); // filesort + writer->end_object(); // read_sorted_file + } + + writer->end_object(); +} + + +/* + Elements in this array match members of enum explain_extra_tag, defined in + sql_explain.h +*/ + +const LEX_CSTRING extra_tag_text[]= +{ + { STRING_WITH_LEN("ET_none") }, + { STRING_WITH_LEN("Using index condition") }, + { STRING_WITH_LEN("Using index condition(BKA)") }, + { STRING_WITH_LEN("Using ") }, // special handling + { STRING_WITH_LEN("Range checked for each record (index map: 0x") }, // special handling + { STRING_WITH_LEN("Using where with pushed condition") }, + { STRING_WITH_LEN("Using where") }, + { STRING_WITH_LEN("Not exists") }, + + { STRING_WITH_LEN("Using index") }, + { STRING_WITH_LEN("Full scan on NULL key") }, + { STRING_WITH_LEN("Skip_open_table") }, + { STRING_WITH_LEN("Open_frm_only") }, + { STRING_WITH_LEN("Open_full_table") }, + + { STRING_WITH_LEN("Scanned 0 databases") }, + { STRING_WITH_LEN("Scanned 1 database") }, + { STRING_WITH_LEN("Scanned all databases") }, + + { STRING_WITH_LEN("Using index for group-by") }, // special handling + { STRING_WITH_LEN("USING MRR: DONT PRINT ME") }, // special handling + + { STRING_WITH_LEN("Distinct") }, + { STRING_WITH_LEN("LooseScan") }, + { STRING_WITH_LEN("Start temporary") }, + { STRING_WITH_LEN("End temporary") }, + { STRING_WITH_LEN("FirstMatch") }, // special handling + + { STRING_WITH_LEN("Using join buffer") }, // special handling + + { STRING_WITH_LEN("Const row not found") }, + { STRING_WITH_LEN("Unique row not found") }, + { STRING_WITH_LEN("Impossible ON condition") } +}; + + +void Explain_table_access::append_tag_name(String *str, enum explain_extra_tag tag) +{ + switch (tag) { + case ET_USING: + { + // quick select + str->append(STRING_WITH_LEN("Using ")); + quick_info->print_extra(str); + break; + } + case ET_RANGE_CHECKED_FOR_EACH_RECORD: + { + /* 4 bits per 1 hex digit + terminating '\0' */ + char buf[MAX_KEY / 4 + 1]; + str->append(STRING_WITH_LEN("Range checked for each " + "record (index map: 0x")); + range_checked_fer->keys_map.print(buf); + str->append(buf, strlen(buf)); + str->append(')'); + break; + } + case ET_USING_MRR: + { + str->append(mrr_type); + break; + } + case ET_USING_JOIN_BUFFER: + { + str->append(extra_tag_text[tag]); + + str->append(STRING_WITH_LEN(" (")); + LEX_CSTRING buffer_type; + if (bka_type.incremental) + buffer_type= { STRING_WITH_LEN("incremental") }; + else + buffer_type= { STRING_WITH_LEN("flat") }; + str->append(buffer_type); + str->append(STRING_WITH_LEN(", ")); + str->append(bka_type.join_alg, strlen(bka_type.join_alg)); + str->append(STRING_WITH_LEN(" join")); + str->append(')'); + if (bka_type.mrr_type.length()) + { + str->append(STRING_WITH_LEN("; ")); + str->append(bka_type.mrr_type); + } + + break; + } + case ET_FIRST_MATCH: + { + if (firstmatch_table_name.length()) + { + str->append(STRING_WITH_LEN("FirstMatch(")); + str->append(firstmatch_table_name); + str->append(')'); + } + else + str->append(extra_tag_text[tag]); + break; + } + case ET_USING_INDEX_FOR_GROUP_BY: + { + str->append(extra_tag_text[tag]); + if (loose_scan_is_scanning) + str->append(STRING_WITH_LEN(" (scanning)")); + break; + case ET_TABLE_FUNCTION: + str->append(STRING_WITH_LEN("Table function: json_table")); + break; + } + default: + str->append(extra_tag_text[tag]); + } +} + + +/* + This is called for top-level Explain_quick_select only. The point of this + function is: + - index_merge should print $index_merge_type (child, ...) + - 'range' should not print anything. +*/ + +void Explain_quick_select::print_extra(String *str) +{ + if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE || + quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC || + quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + /* print nothing */ + } + else + print_extra_recursive(str); +} + +void Explain_quick_select::print_json(Json_writer *writer) +{ + if (is_basic()) + { + writer->add_member("range").start_object(); + + writer->add_member("key").add_str(range.get_key_name()); + + print_json_array(writer, "used_key_parts", range.key_parts_list); + + writer->end_object(); + } + else + { + Json_writer_array ranges(writer, get_name_by_type()); + + List_iterator_fast it (children); + Explain_quick_select* child; + while ((child = it++)) + { + Json_writer_object obj(writer); + child->print_json(writer); + } + } +} + +void Explain_quick_select::print_extra_recursive(String *str) +{ + const char *name; + if (is_basic()) + { + name= range.get_key_name(); + str->append(name, strlen(name)); + } + else + { + name= get_name_by_type(); + str->append(name, strlen(name)); + str->append('('); + List_iterator_fast it (children); + Explain_quick_select* child; + bool first= true; + while ((child = it++)) + { + if (first) + first= false; + else + str->append(','); + + child->print_extra_recursive(str); + } + str->append(')'); + } +} + + +const char * Explain_quick_select::get_name_by_type() +{ + switch (quick_type) { + case QUICK_SELECT_I::QS_TYPE_INDEX_MERGE: + return "sort_union"; + case QUICK_SELECT_I::QS_TYPE_ROR_UNION: + return "union"; + case QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT: + return "intersect"; + case QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT: + return "sort_intersect"; + default: + DBUG_ASSERT(0); + return "unknown quick select type"; + } +} + + +/* + This prints a comma-separated list of used indexes, ignoring nesting +*/ + +void Explain_quick_select::print_key(String *str) +{ + if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE || + quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC || + quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + if (str->length() > 0) + str->append(','); + str->append(range.get_key_name(), strlen(range.get_key_name())); + } + else + { + List_iterator_fast it (children); + Explain_quick_select* child; + while ((child = it++)) + { + child->print_key(str); + } + } +} + + +/* + This prints a comma-separated list of used key_lengths, ignoring nesting +*/ + +void Explain_quick_select::print_key_len(String *str) +{ + if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE || + quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC || + quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + char buf[64]; + size_t length; + length= longlong10_to_str(range.get_key_len(), buf, 10) - buf; + if (str->length() > 0) + str->append(','); + str->append(buf, length); + } + else + { + List_iterator_fast it (children); + Explain_quick_select* child; + while ((child = it++)) + { + child->print_key_len(str); + } + } +} + + +int Explain_delete::print_explain(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, + bool is_analyze) +{ + if (deleting_all_rows) + { + const char *msg= STR_DELETING_ALL_ROWS; + int res= print_explain_message_line(output, explain_flags, is_analyze, + 1 /*select number*/, + select_type, &rows, msg); + return res; + + } + else + { + return Explain_update::print_explain(query, output, explain_flags, + is_analyze); + } +} + + +void Explain_delete::print_explain_json(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + + if (deleting_all_rows) + { + writer->add_member("query_block").start_object(); + writer->add_member("select_id").add_ll(1); + writer->add_member("table").start_object(); + // just like mysql-5.6, we don't print table name. Is this ok? + writer->add_member("message").add_str(STR_DELETING_ALL_ROWS); + writer->end_object(); // table + writer->end_object(); // query_block + return; + } + Explain_update::print_explain_json(query, writer, is_analyze); +} + + +int Explain_update::print_explain(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, + bool is_analyze) +{ + StringBuffer<64> key_buf; + StringBuffer<64> key_len_buf; + StringBuffer<64> extra_str; + if (impossible_where || no_partitions) + { + const char *msg= impossible_where ? + STR_IMPOSSIBLE_WHERE : + STR_NO_ROWS_AFTER_PRUNING; + int res= print_explain_message_line(output, explain_flags, is_analyze, + 1 /*select number*/, + select_type, + NULL, /* rows */ + msg); + return res; + } + + if (quick_info) + { + quick_info->print_key(&key_buf); + quick_info->print_key_len(&key_len_buf); + + StringBuffer<64> quick_buf; + quick_info->print_extra(&quick_buf); + if (quick_buf.length()) + { + extra_str.append(STRING_WITH_LEN("Using ")); + extra_str.append(quick_buf); + } + } + else if (key.get_key_name()) + { + const char *name= key.get_key_name(); + key_buf.set(name, strlen(name), &my_charset_bin); + char buf[64]; + size_t length= longlong10_to_str(key.get_key_len(), buf, 10) - buf; + key_len_buf.copy(buf, length, &my_charset_bin); + } + + if (using_where) + { + if (extra_str.length() !=0) + extra_str.append(STRING_WITH_LEN("; ")); + extra_str.append(STRING_WITH_LEN("Using where")); + } + + if (mrr_type.length() != 0) + { + if (extra_str.length() !=0) + extra_str.append(STRING_WITH_LEN("; ")); + extra_str.append(mrr_type); + } + + if (is_using_filesort()) + { + if (extra_str.length() !=0) + extra_str.append(STRING_WITH_LEN("; ")); + extra_str.append(STRING_WITH_LEN("Using filesort")); + } + + if (using_io_buffer) + { + if (extra_str.length() !=0) + extra_str.append(STRING_WITH_LEN("; ")); + extra_str.append(STRING_WITH_LEN("Using buffer")); + } + + /* + Single-table DELETE commands do not do "Using temporary". + "Using index condition" is also not possible (which is an unjustified limitation) + */ + double r_filtered= 100 * tracker.get_filtered_after_where(); + double r_rows= tracker.get_avg_rows(); + + print_explain_row(output, explain_flags, is_analyze, + 1, /* id */ + select_type, + table_name.c_ptr(), + used_partitions_set? used_partitions.c_ptr() : NULL, + jtype, + &possible_keys, + key_buf.length()? key_buf.c_ptr() : NULL, + key_len_buf.length() ? key_len_buf.c_ptr() : NULL, + NULL, /* 'ref' is always NULL in single-table EXPLAIN DELETE */ + &rows, + tracker.has_scans()? &r_rows : NULL, + r_filtered, + extra_str.c_ptr_safe()); + + return print_explain_for_children(query, output, explain_flags, is_analyze); +} + + +void Explain_update::print_explain_json(Explain_query *query, + Json_writer *writer, + bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + + writer->add_member("query_block").start_object(); + writer->add_member("select_id").add_ll(1); + + /* This is the total time it took to do the UPDATE/DELETE */ + if (is_analyze && command_tracker.has_timed_statistics()) + { + writer->add_member("r_total_time_ms"). + add_double(command_tracker.get_time_ms()); + } + + if (impossible_where || no_partitions) + { + const char *msg= impossible_where ? STR_IMPOSSIBLE_WHERE : + STR_NO_ROWS_AFTER_PRUNING; + writer->add_member("table").start_object(); + writer->add_member("message").add_str(msg); + writer->end_object(); // table + writer->end_object(); // query_block + return; + } + + DBUG_ASSERT(!(is_using_filesort() && using_io_buffer)); + + bool doing_buffering= false; + + if (is_using_filesort()) + { + writer->add_member("filesort").start_object(); + if (is_analyze) + filesort_tracker->print_json_members(writer); + doing_buffering= true; + } + + if (using_io_buffer) + { + writer->add_member("buffer").start_object(); + doing_buffering= true; + } + + /* Produce elements that are common for buffered and un-buffered cases */ + writer->add_member("table").start_object(); + + if (get_type() == EXPLAIN_UPDATE) + writer->add_member("update").add_ll(1); + else + writer->add_member("delete").add_ll(1); + + writer->add_member("table_name").add_str(table_name); + + if (used_partitions_set) + print_json_array(writer, "partitions", used_partitions_list); + + writer->add_member("access_type").add_str(join_type_str[jtype]); + + if (!possible_keys.is_empty()) + { + List_iterator_fast it(possible_keys); + const char *name; + writer->add_member("possible_keys").start_array(); + while ((name= it++)) + writer->add_str(name); + writer->end_array(); + } + + /* `key`, `key_length` */ + if (quick_info && quick_info->is_basic()) + { + StringBuffer<64> key_buf; + StringBuffer<64> key_len_buf; + quick_info->print_extra_recursive(&key_buf); + quick_info->print_key_len(&key_len_buf); + + writer->add_member("key").add_str(key_buf); + writer->add_member("key_length").add_str(key_len_buf); + } + else if (key.get_key_name()) + { + writer->add_member("key").add_str(key.get_key_name()); + writer->add_member("key_length").add_str(key.get_key_len()); + } + + /* `used_key_parts` */ + String_list *parts_list= NULL; + if (quick_info && quick_info->is_basic()) + parts_list= &quick_info->range.key_parts_list; + else + parts_list= &key.key_parts_list; + + if (parts_list && !parts_list->is_empty()) + { + List_iterator_fast it(*parts_list); + const char *name; + writer->add_member("used_key_parts").start_array(); + while ((name= it++)) + writer->add_str(name); + writer->end_array(); + } + + if (quick_info && !quick_info->is_basic()) + { + writer->add_member("index_merge").start_object(); + quick_info->print_json(writer); + writer->end_object(); + } + + /* `rows` */ + writer->add_member("rows").add_ull(rows); + + + if (mrr_type.length() != 0) + writer->add_member("mrr_type").add_str(mrr_type.ptr()); + + if (is_analyze) + { + if (doing_buffering) + { + ha_rows r_rows; + double r_filtered; + + if (is_using_filesort()) + { + if (filesort_tracker->get_r_loops()) + r_rows= (ha_rows) filesort_tracker->get_avg_examined_rows(); + else + r_rows= 0; + r_filtered= filesort_tracker->get_r_filtered() * 100.0; + } + else + { + if (buf_tracker.has_scans()) + r_rows= (ha_rows) buf_tracker.get_avg_rows(); + else + r_rows= 0; + r_filtered= buf_tracker.get_filtered_after_where() * 100.0; + } + writer->add_member("r_rows").add_ull(r_rows); + writer->add_member("r_filtered").add_double(r_filtered); + } + else /* Not doing buffering */ + { + writer->add_member("r_rows"); + if (tracker.has_scans()) + writer->add_double(tracker.get_avg_rows()); + else + writer->add_null(); + + /* There is no 'filtered' estimate in UPDATE/DELETE atm */ + double r_filtered= tracker.get_filtered_after_where() * 100.0; + writer->add_member("r_filtered").add_double(r_filtered); + } + + if (table_tracker.has_timed_statistics()) + { + writer->add_member("r_total_time_ms"). + add_double(table_tracker.get_time_ms()); + } + } + + trace_engine_stats(handler_for_stats, writer); + + if (where_cond) + { + writer->add_member("attached_condition"); + write_item(writer, where_cond); + } + + /*** The part of plan that is before the buffering/sorting ends here ***/ + if (is_using_filesort()) + writer->end_object(); + + if (using_io_buffer) + writer->end_object(); + + writer->end_object(); // table + + print_explain_json_for_children(query, writer, is_analyze); + writer->end_object(); // query_block +} + + +int Explain_insert::print_explain(Explain_query *query, + select_result_sink *output, + uint8 explain_flags, + bool is_analyze) +{ + const char *select_type="INSERT"; + print_explain_row(output, explain_flags, is_analyze, + 1, /* id */ + select_type, + table_name.c_ptr(), + NULL, // partitions + JT_ALL, + NULL, // possible_keys + NULL, // key + NULL, // key_len + NULL, // ref + NULL, // rows + NULL, // r_rows + 100.0, // r_filtered + NULL); + + return print_explain_for_children(query, output, explain_flags, is_analyze); +} + +void Explain_insert::print_explain_json(Explain_query *query, + Json_writer *writer, bool is_analyze) +{ + Json_writer_nesting_guard guard(writer); + + writer->add_member("query_block").start_object(); + writer->add_member("select_id").add_ll(1); + writer->add_member("table").start_object(); + writer->add_member("table_name").add_str(table_name.c_ptr()); + writer->end_object(); // table + print_explain_json_for_children(query, writer, is_analyze); + writer->end_object(); // query_block +} + + +void delete_explain_query(LEX *lex) +{ + DBUG_ENTER("delete_explain_query"); + delete lex->explain; + lex->explain= NULL; + DBUG_VOID_RETURN; +} + + +void create_explain_query(LEX *lex, MEM_ROOT *mem_root) +{ + DBUG_ASSERT(!lex->explain); + DBUG_ENTER("create_explain_query"); + + lex->explain= new (mem_root) Explain_query(lex->thd, mem_root); + DBUG_ASSERT(mem_root == current_thd->mem_root); + + DBUG_VOID_RETURN; +} + +void create_explain_query_if_not_exists(LEX *lex, MEM_ROOT *mem_root) +{ + if (!lex->explain) + create_explain_query(lex, mem_root); +} + + +/** + Build arrays for collectiong keys statistics, sdd possible key names + to the list and name array + + @param alloc MEM_ROOT to put data in + @param list list of possible key names to fill + @param table table of the keys + @patam possible_keys possible keys map + + @retval 0 - OK + @retval 1 - Error +*/ + +int Explain_range_checked_fer::append_possible_keys_stat(MEM_ROOT *alloc, + TABLE *table, + key_map possible_keys) +{ + uint j; + multi_alloc_root(alloc, &keys_stat, sizeof(ha_rows) * table->s->keys, + &keys_stat_names, sizeof(char *) * table->s->keys, NULL); + if ((!keys_stat) || (!keys_stat_names)) + { + keys_stat= NULL; + keys_stat_names= NULL; + return 1; + } + keys_map= possible_keys; + keys= table->s->keys; + bzero(keys_stat, sizeof(ha_rows) * table->s->keys); + for (j= 0; j < table->s->keys; j++) + { + if (possible_keys.is_set(j)) + { + if (!(keys_stat_names[j]= key_set.append_str(alloc, + table->key_info[j].name.str))) + return 1; + } + else + keys_stat_names[j]= NULL; + } + return 0; +} + +void Explain_range_checked_fer::collect_data(QUICK_SELECT_I *quick) +{ + if (quick) + { + if (quick->index == MAX_KEY) + index_merge++; + else + { + DBUG_ASSERT(quick->index < keys); + DBUG_ASSERT(keys_stat); + DBUG_ASSERT(keys_stat_names); + DBUG_ASSERT(keys_stat_names[ quick->index]); + keys_stat[quick->index]++; + } + } + else + full_scan++; +} + + +void Explain_range_checked_fer::print_json(Json_writer *writer, + bool is_analyze) +{ + writer->add_member("range-checked-for-each-record").start_object(); + add_json_keyset(writer, "keys", &key_set); + if (is_analyze) + { + writer->add_member("r_keys").start_object(); + writer->add_member("full_scan").add_ll(full_scan); + writer->add_member("index_merge").add_ll(index_merge); + if (keys_stat) + { + writer->add_member("range").start_object(); + for (uint i= 0; i < keys; i++) + { + if (keys_stat_names[i]) + { + writer->add_member(keys_stat_names[i]).add_ll(keys_stat[i]); + } + } + writer->end_object(); + } + writer->end_object(); + } +} diff --git a/sql/sql_explain.h b/sql/sql_explain.h new file mode 100644 index 00000000..c71ba3a6 --- /dev/null +++ b/sql/sql_explain.h @@ -0,0 +1,1064 @@ +/* + Copyright (c) 2013 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + +== EXPLAIN/ANALYZE architecture == + +=== [SHOW] EXPLAIN data === +Query optimization produces two data structures: +1. execution data structures themselves (eg. JOINs, JOIN_TAB, etc, etc) +2. Explain data structures. + +#2 are self contained set of data structures that has sufficient info to +produce output of SHOW EXPLAIN, EXPLAIN [FORMAT=JSON], or +ANALYZE [FORMAT=JSON], without accessing the execution data structures. + +The exception is that Explain data structures have Item* pointers. See +ExplainDataStructureLifetime below for details. + +=== ANALYZE data === +EXPLAIN data structures have embedded ANALYZE data structures. These are +objects that are used to track how the parts of query plan were executed: +how many times each part of query plan was invoked, how many rows were +read/returned, etc. + +Each execution data structure keeps a direct pointer to its ANALYZE data +structure. It is needed so that execution code can quickly increment the +counters. + +(note that this increases the set of data that is frequently accessed +during the execution. What is the impact of this?) + +Since ANALYZE/EXPLAIN data structures are separated from execution data +structures, it is easy to have them survive until the end of the query, +where we can return ANALYZE [FORMAT=JSON] output to the user, or print +it into the slow query log. + +*/ + +#ifndef SQL_EXPLAIN_INCLUDED +#define SQL_EXPLAIN_INCLUDED + +class String_list: public List +{ +public: + const char *append_str(MEM_ROOT *mem_root, const char *str); +}; + +class Json_writer; + +/************************************************************************************** + + Data structures for producing EXPLAIN outputs. + + These structures + - Can be produced inexpensively from query plan. + - Store sufficient information to produce tabular EXPLAIN output (the goal is + to be able to produce JSON also) + +*************************************************************************************/ + + + +class Explain_query; + +/* + A node can be either a SELECT, or a UNION. +*/ +class Explain_node : public Sql_alloc +{ +public: + Explain_node(MEM_ROOT *root) : + cache_tracker(NULL), + connection_type(EXPLAIN_NODE_OTHER), + children(root) + {} + /* A type specifying what kind of node this is */ + enum explain_node_type + { + EXPLAIN_UNION, + EXPLAIN_SELECT, + EXPLAIN_BASIC_JOIN, + EXPLAIN_UPDATE, + EXPLAIN_DELETE, + EXPLAIN_INSERT + }; + + /* How this node is connected */ + enum explain_connection_type { + EXPLAIN_NODE_OTHER, + EXPLAIN_NODE_DERIVED, /* Materialized derived table */ + EXPLAIN_NODE_NON_MERGED_SJ /* aka JTBM semi-join */ + }; + + virtual enum explain_node_type get_type()= 0; + virtual uint get_select_id()= 0; + + /** + expression cache statistics + */ + Expression_cache_tracker* cache_tracker; + + /* + How this node is connected to its parent. + (NOTE: EXPLAIN_NODE_NON_MERGED_SJ is set very late currently) + */ + enum explain_connection_type connection_type; + +protected: + /* + A node may have children nodes. When a node's explain structure is + created, children nodes may not yet have QPFs. This is why we store ids. + */ + Dynamic_array children; +public: + void add_child(int select_no) + { + children.append(select_no); + } + + virtual int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze)=0; + virtual void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze)= 0; + + int print_explain_for_children(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + void print_explain_json_for_children(Explain_query *query, + Json_writer *writer, bool is_analyze); + bool print_explain_json_cache(Json_writer *writer, bool is_analyze); + virtual ~Explain_node() = default; +}; + + +class Explain_table_access; + + +/* + A basic join. This is only used for SJ-Materialization nests. + + Basic join doesn't have ORDER/GROUP/DISTINCT operations. It also cannot be + degenerate. + + It has its own select_id. +*/ +class Explain_basic_join : public Explain_node +{ +public: + enum explain_node_type get_type() { return EXPLAIN_BASIC_JOIN; } + + Explain_basic_join(MEM_ROOT *root) : Explain_node(root), join_tabs(NULL) {} + ~Explain_basic_join(); + + bool add_table(Explain_table_access *tab, Explain_query *query); + + uint get_select_id() { return select_id; } + + uint select_id; + + int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); + + void print_explain_json_interns(Explain_query *query, Json_writer *writer, + bool is_analyze); + + /* A flat array of Explain structs for tables. */ + Explain_table_access** join_tabs; + uint n_join_tabs; +}; + + +class Explain_aggr_node; +/* + EXPLAIN structure for a SELECT. + + A select can be: + 1. A degenerate case. In this case, message!=NULL, and it contains a + description of what kind of degenerate case it is (e.g. "Impossible + WHERE"). + 2. a non-degenrate join. In this case, join_tabs describes the join. + + In the non-degenerate case, a SELECT may have a GROUP BY/ORDER BY operation. + + In both cases, the select may have children nodes. class Explain_node + provides a way get node's children. +*/ + +class Explain_select : public Explain_basic_join +{ +public: + enum explain_node_type get_type() { return EXPLAIN_SELECT; } + + Explain_select(MEM_ROOT *root, bool is_analyze) : + Explain_basic_join(root), +#ifndef DBUG_OFF + select_lex(NULL), +#endif + linkage(UNSPECIFIED_TYPE), + is_lateral(false), + message(NULL), + having(NULL), having_value(Item::COND_UNDEF), + using_temporary(false), using_filesort(false), + time_tracker(is_analyze), + aggr_tree(NULL) + {} + + void add_linkage(Json_writer *writer); + +public: +#ifndef DBUG_OFF + SELECT_LEX *select_lex; +#endif + const char *select_type; + enum sub_select_type linkage; + bool is_lateral; + + /* + If message != NULL, this is a degenerate join plan, and all subsequent + members have no info + */ + const char *message; + + /* Expensive constant condition */ + Item *exec_const_cond; + Item *outer_ref_cond; + Item *pseudo_bits_cond; + + /* HAVING condition */ + Item *having; + Item::cond_result having_value; + + /* Global join attributes. In tabular form, they are printed on the first row */ + bool using_temporary; + bool using_filesort; + + /* ANALYZE members */ + Time_and_counter_tracker time_tracker; + + /* + Part of query plan describing sorting, temp.table usage, and duplicate + removal + */ + Explain_aggr_node* aggr_tree; + + int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); + + Table_access_tracker *get_using_temporary_read_tracker() + { + return &using_temporary_read_tracker; + } +private: + Table_access_tracker using_temporary_read_tracker; +}; + +///////////////////////////////////////////////////////////////////////////// +// EXPLAIN structures for ORDER/GROUP operations. +///////////////////////////////////////////////////////////////////////////// +typedef enum +{ + AGGR_OP_TEMP_TABLE, + AGGR_OP_FILESORT, + //AGGR_OP_READ_SORTED_FILE, // need this? + AGGR_OP_REMOVE_DUPLICATES, + AGGR_OP_WINDOW_FUNCS + //AGGR_OP_JOIN // Need this? +} enum_explain_aggr_node_type; + + +class Explain_aggr_node : public Sql_alloc +{ +public: + virtual enum_explain_aggr_node_type get_type()= 0; + virtual ~Explain_aggr_node() = default; + Explain_aggr_node *child; +}; + +class Explain_aggr_filesort : public Explain_aggr_node +{ + List sort_items; + List sort_directions; +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_FILESORT; } + Filesort_tracker tracker; + + Explain_aggr_filesort(MEM_ROOT *mem_root, bool is_analyze, + Filesort *filesort); + + void print_json_members(Json_writer *writer, bool is_analyze); +}; + +class Explain_aggr_tmp_table : public Explain_aggr_node +{ +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_TEMP_TABLE; } +}; + +class Explain_aggr_remove_dups : public Explain_aggr_node +{ +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_REMOVE_DUPLICATES; } +}; + +class Explain_aggr_window_funcs : public Explain_aggr_node +{ + List sorts; +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_WINDOW_FUNCS; } + + void print_json_members(Json_writer *writer, bool is_analyze); + friend class Window_funcs_computation; +}; + +///////////////////////////////////////////////////////////////////////////// + +extern const char *unit_operation_text[4]; +extern const char *pushed_derived_text; +extern const char *pushed_select_text; + +/* + Explain structure for a UNION [ALL]. + + A UNION may or may not have "Using filesort". +*/ + +class Explain_union : public Explain_node +{ +public: + Explain_union(MEM_ROOT *root, bool is_analyze) : + Explain_node(root), union_members(PSI_INSTRUMENT_MEM), + is_recursive_cte(false), + fake_select_lex_explain(root, is_analyze) + {} + + enum explain_node_type get_type() { return EXPLAIN_UNION; } + unit_common_op operation; + + uint get_select_id() + { + DBUG_ASSERT(union_members.elements() > 0); + return union_members.at(0); + } + /* + Members of the UNION. Note: these are different from UNION's "children". + Example: + + (select * from t1) union + (select * from t2) order by (select col1 from t3 ...) + + here + - select-from-t1 and select-from-t2 are "union members", + - select-from-t3 is the only "child". + */ + Dynamic_array union_members; + + void add_select(int select_no) + { + union_members.append(select_no); + } + int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); + + const char *fake_select_type; + bool using_filesort; + bool using_tmp; + bool is_recursive_cte; + + /* + Explain data structure for "fake_select_lex" (i.e. for the degenerate + SELECT that reads UNION result). + It doesn't have a query plan, but we still need execution tracker, etc. + */ + Explain_select fake_select_lex_explain; + + Table_access_tracker *get_fake_select_lex_tracker() + { + return &fake_select_lex_tracker; + } + Table_access_tracker *get_tmptable_read_tracker() + { + return &tmptable_read_tracker; + } +private: + uint make_union_table_name(char *buf); + + Table_access_tracker fake_select_lex_tracker; + /* This one is for reading after ORDER BY */ + Table_access_tracker tmptable_read_tracker; +}; + + +class Explain_update; +class Explain_delete; +class Explain_insert; + + +/* + Explain structure for a query (i.e. a statement). + + This should be able to survive when the query plan was deleted. Currently, + we do not intend for it survive until after query's MEM_ROOT is freed. + + == ExplainDataStructureLifetime == + + >dispatch_command + | >mysql_parse + | | ... + | | + | | explain->query_plan_ready(); // (1) + | | + | | some_join->cleanup(); // (2) + | | + | | explain->notify_tables_are_closed(); // (3) + | | close_thread_tables(); // (4) + | | ... + | | free_items(); // (5) + | | ... + | | + | dispatch_command + + (1) - Query plan construction is finished and it is available for reading. + + (2) - Temporary tables are freed (with exception of derived tables + which are freed at step (4)). + The tables are no longer accessible but one can still call + item->print(), even for items that refer to temp.tables (see + Item_field::print() for details) + + (3) - Notification about (4). + (4) - Tables used by the query are closed. One consequence of this is that + the values of the const tables' fields are not available anymore. + We could adjust the code in Item_field::print() to handle this but + instead we make step (3) disallow production of FORMAT=JSON output. + We also disable processing of SHOW EXPLAIN|ANALYZE output because + the query is about to finish anyway. + + (5) - Item objects are freed. After this, it's certainly not possible to + print them into FORMAT=JSON output. + + (6) - We may decide to log tabular EXPLAIN output to the slow query log. + +*/ + +class Explain_query : public Sql_alloc +{ +public: + Explain_query(THD *thd, MEM_ROOT *root); + ~Explain_query(); + + /* Add a new node */ + void add_node(Explain_node *node); + void add_insert_plan(Explain_insert *insert_plan_arg); + void add_upd_del_plan(Explain_update *upd_del_plan_arg); + + /* This will return a select, or a union */ + Explain_node *get_node(uint select_id); + + /* This will return a select (even if there is a union with this id) */ + Explain_select *get_select(uint select_id); + + Explain_union *get_union(uint select_id); + + /* Produce a tabular EXPLAIN output */ + int print_explain(select_result_sink *output, uint8 explain_flags, + bool is_analyze); + + /* Send tabular EXPLAIN to the client */ + int send_explain(THD *thd, bool extended); + + /* Return tabular EXPLAIN output as a text string */ + bool print_explain_str(THD *thd, String *out_str, bool is_analyze); + + int print_explain_json(select_result_sink *output, bool is_analyze, + ulonglong query_time_in_progress_ms= 0); + + /* If true, at least part of EXPLAIN can be printed */ + bool have_query_plan() { return insert_plan || upd_del_plan|| get_node(1) != NULL; } + + void query_plan_ready(); + void notify_tables_are_closed(); + + MEM_ROOT *mem_root; + + Explain_update *get_upd_del_plan() { return upd_del_plan; } +private: + bool print_query_blocks_json(Json_writer *writer, const bool is_analyze); + void print_query_optimization_json(Json_writer *writer); + void send_explain_json_to_output(Json_writer *writer, select_result_sink *output); + + /* Explain_delete inherits from Explain_update */ + Explain_update *upd_del_plan; + + /* Query "plan" for INSERTs */ + Explain_insert *insert_plan; + + Dynamic_array unions; + Dynamic_array selects; + + THD *stmt_thd; // for APC start/stop + bool apc_enabled; + /* + Debugging aid: count how many times add_node() was called. Ideally, it + should be one, we currently allow O(1) query plan saves for each + select or union. The goal is not to have O(#rows_in_some_table), which + is unacceptable. + */ + longlong operations; +#ifndef DBUG_OFF + bool can_print_json= false; +#endif + + Exec_time_tracker optimization_time_tracker; +}; + + +/* + Some of the tags have matching text. See extra_tag_text for text names, and + Explain_table_access::append_tag_name() for code to convert from tag form to text + form. +*/ +enum explain_extra_tag +{ + ET_none= 0, /* not-a-tag */ + ET_USING_INDEX_CONDITION, + ET_USING_INDEX_CONDITION_BKA, + ET_USING, /* For quick selects of various kinds */ + ET_RANGE_CHECKED_FOR_EACH_RECORD, + ET_USING_WHERE_WITH_PUSHED_CONDITION, + ET_USING_WHERE, + ET_NOT_EXISTS, + + ET_USING_INDEX, + ET_FULL_SCAN_ON_NULL_KEY, + ET_SKIP_OPEN_TABLE, + ET_OPEN_FRM_ONLY, + ET_OPEN_FULL_TABLE, + + ET_SCANNED_0_DATABASES, + ET_SCANNED_1_DATABASE, + ET_SCANNED_ALL_DATABASES, + + ET_USING_INDEX_FOR_GROUP_BY, + + ET_USING_MRR, // does not print "Using mrr". + + ET_DISTINCT, + ET_LOOSESCAN, + ET_START_TEMPORARY, + ET_END_TEMPORARY, + ET_FIRST_MATCH, + + ET_USING_JOIN_BUFFER, + + ET_CONST_ROW_NOT_FOUND, + ET_UNIQUE_ROW_NOT_FOUND, + ET_IMPOSSIBLE_ON_CONDITION, + ET_TABLE_FUNCTION, + + ET_total +}; + + +/* + Explain data structure describing join buffering use. +*/ + +class EXPLAIN_BKA_TYPE +{ +public: + EXPLAIN_BKA_TYPE() : join_alg(NULL) {} + + size_t join_buffer_size; + + bool incremental; + + /* + NULL if no join buferring used. + Other values: BNL, BNLH, BKA, BKAH. + */ + const char *join_alg; + + /* Information about MRR usage. */ + StringBuffer<64> mrr_type; + + bool is_using_jbuf() { return (join_alg != NULL); } +}; + + +/* + Data about how an index is used by some access method +*/ +class Explain_index_use : public Sql_alloc +{ + char *key_name; + uint key_len; + char *filter_name; + uint filter_len; +public: + String_list key_parts_list; + + Explain_index_use() + { + clear(); + } + + void clear() + { + key_name= NULL; + key_len= (uint)-1; + filter_name= NULL; + filter_len= (uint)-1; + } + bool set(MEM_ROOT *root, KEY *key_name, uint key_len_arg); + bool set_pseudo_key(MEM_ROOT *root, const char *key_name); + + inline const char *get_key_name() const { return key_name; } + inline uint get_key_len() const { return key_len; } + //inline const char *get_filter_name() const { return filter_name; } +}; + + +/* + Query Plan data structure for Rowid filter. +*/ +class Explain_rowid_filter : public Sql_alloc +{ +public: + /* Quick select used to collect the rowids into filter */ + Explain_quick_select *quick; + + /* How many rows the above quick select is expected to return */ + ha_rows rows; + + /* Expected selectivity for the filter */ + double selectivity; + + /* Tracker with the information about how rowid filter is executed */ + Rowid_filter_tracker *tracker; + + void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); + + /* + TODO: + Here should be ANALYZE members: + - r_rows for the quick select + - An object that tracked the table access time + - real selectivity of the filter. + */ +}; + + +/* + QPF for quick range selects, as well as index_merge select +*/ +class Explain_quick_select : public Sql_alloc +{ +public: + Explain_quick_select(int quick_type_arg) : quick_type(quick_type_arg) + {} + + const int quick_type; + + bool is_basic() + { + return (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE || + quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC || + quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX); + } + + /* This is used when quick_type == QUICK_SELECT_I::QS_TYPE_RANGE */ + Explain_index_use range; + + /* Used in all other cases */ + List children; + + void print_extra(String *str); + void print_key(String *str); + void print_key_len(String *str); + + void print_json(Json_writer *writer); + + void print_extra_recursive(String *str); +private: + const char *get_name_by_type(); +}; + + +/* + Data structure for "range checked for each record". + It's a set of keys, tabular explain prints hex bitmap, json prints key names. +*/ + +typedef const char* NAME; + +class Explain_range_checked_fer : public Sql_alloc +{ +public: + String_list key_set; + key_map keys_map; +private: + ha_rows full_scan, index_merge; + ha_rows *keys_stat; + NAME *keys_stat_names; + uint keys; + +public: + Explain_range_checked_fer() + :Sql_alloc(), full_scan(0), index_merge(0), + keys_stat(0), keys_stat_names(0), keys(0) + {} + + int append_possible_keys_stat(MEM_ROOT *alloc, + TABLE *table, key_map possible_keys); + void collect_data(QUICK_SELECT_I *quick); + void print_json(Json_writer *writer, bool is_analyze); +}; + + +/* + EXPLAIN data structure for a single JOIN_TAB. +*/ + +class Explain_table_access : public Sql_alloc +{ +public: + Explain_table_access(MEM_ROOT *root, bool timed) : + derived_select_number(0), + non_merged_sjm_number(0), + extra_tags(root), + range_checked_fer(NULL), + full_scan_on_null_key(false), + start_dups_weedout(false), + end_dups_weedout(false), + where_cond(NULL), + cache_cond(NULL), + pushed_index_cond(NULL), + sjm_nest(NULL), + pre_join_sort(NULL), + handler_for_stats(NULL), + jbuf_unpack_tracker(timed), + rowid_filter(NULL) + {} + ~Explain_table_access() { delete sjm_nest; } + + void push_extra(enum explain_extra_tag extra_tag); + + /* Internals */ + + /* id and 'select_type' are cared-of by the parent Explain_select */ + StringBuffer<32> table_name; + StringBuffer<32> used_partitions; + String_list used_partitions_list; + // valid with ET_USING_MRR + StringBuffer<32> mrr_type; + StringBuffer<32> firstmatch_table_name; + + /* + Non-zero number means this is a derived table. The number can be used to + find the query plan for the derived table + */ + int derived_select_number; + /* TODO: join with the previous member. */ + int non_merged_sjm_number; + + enum join_type type; + + bool used_partitions_set; + + /* Empty means "NULL" will be printed */ + String_list possible_keys; + + bool rows_set; /* not set means 'NULL' should be printed */ + bool filtered_set; /* not set means 'NULL' should be printed */ + // Valid if ET_USING_INDEX_FOR_GROUP_BY is present + bool loose_scan_is_scanning; + + /* + Index use: key name and length. + Note: that when one is accessing I_S tables, those may show use of + non-existant indexes. + + key.key_name == NULL means 'NULL' will be shown in tabular output. + key.key_len == (uint)-1 means 'NULL' will be shown in tabular output. + */ + Explain_index_use key; + + /* + when type==JT_HASH_NEXT, 'key' stores the hash join pseudo-key. + hash_next_key stores the table's key. + */ + Explain_index_use hash_next_key; + + String_list ref_list; + + ha_rows rows; + double filtered; + + /* + Contents of the 'Extra' column. Some are converted into strings, some have + parameters, values for which are stored below. + */ + Dynamic_array extra_tags; + + // Valid if ET_USING tag is present + Explain_quick_select *quick_info; + + /* Non-NULL value means this tab uses "range checked for each record" */ + Explain_range_checked_fer *range_checked_fer; + + bool full_scan_on_null_key; + + // valid with ET_USING_JOIN_BUFFER + EXPLAIN_BKA_TYPE bka_type; + + bool start_dups_weedout; + bool end_dups_weedout; + + /* + Note: lifespan of WHERE condition is less than lifespan of this object. + The below two are valid if tags include "ET_USING_WHERE". + (TODO: indexsubquery may put ET_USING_WHERE without setting where_cond?) + */ + Item *where_cond; + Item *cache_cond; + + /* + This is either pushed index condition, or BKA's index condition. + (the latter refers to columns of other tables and so can only be checked by + BKA code). Examine extra_tags to tell which one it is. + */ + Item *pushed_index_cond; + + Explain_basic_join *sjm_nest; + + /* + This describes a possible filesort() call that is done before doing the + join operation. + */ + Explain_aggr_filesort *pre_join_sort; + + /* ANALYZE members */ + + /* Tracker for reading the table */ + Table_access_tracker tracker; + Exec_time_tracker op_tracker; + Gap_time_tracker extra_time_tracker; + + /* + Handler object to get the handler_stats from. + + Notes: + This pointer is only valid until notify_tables_are_closed() is called. + After that, the tables may be freed or reused, together with their + handler_stats objects. + notify_tables_are_closed() disables printing of FORMAT=JSON output. + r_engine_stats is only printed in FORMAT=JSON output, so we're fine. + + We do not store pointers to temporary (aka "work") tables here. + Temporary tables may be freed (e.g. by JOIN::cleanup()) or re-created + during query execution (when HEAP table is converted into Aria). + */ + handler *handler_for_stats; + + /* When using join buffer: Track the reads from join buffer */ + Table_access_tracker jbuf_tracker; + + /* When using join buffer: time spent unpacking rows from the join buffer */ + Time_and_counter_tracker jbuf_unpack_tracker; + + /* + When using join buffer: time spent after unpacking rows from the join + buffer. This will capture the time spent checking the Join Condition: + the condition that depends on this table and preceding tables. + */ + Gap_time_tracker jbuf_extra_time_tracker; + + /* When using join buffer: Track the number of incoming record combinations */ + Counter_tracker jbuf_loops_tracker; + + Explain_rowid_filter *rowid_filter; + + int print_explain(select_result_sink *output, uint8 explain_flags, + bool is_analyze, + uint select_id, const char *select_type, + bool using_temporary, bool using_filesort); + void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); + +private: + void append_tag_name(String *str, enum explain_extra_tag tag); + void fill_key_str(String *key_str, bool is_json) const; + void fill_key_len_str(String *key_len_str, bool is_json) const; + double get_r_filtered(); + void tag_to_json(Json_writer *writer, enum explain_extra_tag tag); +}; + + +/* + EXPLAIN structure for single-table UPDATE. + + This is similar to Explain_table_access, except that it is more restrictive. + Also, it can have UPDATE operation options, but currently there aren't any. + + Explain_delete inherits from this. +*/ + +class Explain_update : public Explain_node +{ +public: + + Explain_update(MEM_ROOT *root, bool is_analyze) : + Explain_node(root), + filesort_tracker(NULL), + command_tracker(is_analyze), + handler_for_stats(NULL) + {} + + virtual enum explain_node_type get_type() { return EXPLAIN_UPDATE; } + virtual uint get_select_id() { return 1; /* always root */ } + + const char *select_type; + + StringBuffer<32> used_partitions; + String_list used_partitions_list; + bool used_partitions_set; + + bool impossible_where; + bool no_partitions; + StringBuffer<64> table_name; + + enum join_type jtype; + String_list possible_keys; + + /* Used key when doing a full index scan (possibly with limit) */ + Explain_index_use key; + + /* + MRR that's used with quick select. This should probably belong to the + quick select + */ + StringBuffer<64> mrr_type; + + Explain_quick_select *quick_info; + + bool using_where; + Item *where_cond; + + ha_rows rows; + + bool using_io_buffer; + + /* Tracker for doing reads when filling the buffer */ + Table_access_tracker buf_tracker; + + bool is_using_filesort() { return filesort_tracker? true: false; } + /* + Non-null value of filesort_tracker means "using filesort" + + if we are using filesort, then table_tracker is for the io done inside + filesort. + + 'tracker' is for tracking post-filesort reads. + */ + Filesort_tracker *filesort_tracker; + + /* ANALYZE members and methods */ + Table_access_tracker tracker; + + /* This tracks execution of the whole command */ + Time_and_counter_tracker command_tracker; + + /* TODO: This tracks time to read rows from the table */ + Exec_time_tracker table_tracker; + + /* The same as Explain_table_access::handler_for_stats */ + handler *handler_for_stats; + + virtual int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + virtual void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); +}; + + +/* + EXPLAIN data structure for an INSERT. + + At the moment this doesn't do much as we don't really have any query plans + for INSERT statements. +*/ + +class Explain_insert : public Explain_node +{ +public: + Explain_insert(MEM_ROOT *root) : + Explain_node(root) + {} + + StringBuffer<64> table_name; + + enum explain_node_type get_type() { return EXPLAIN_INSERT; } + uint get_select_id() { return 1; /* always root */ } + + int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); +}; + + +/* + EXPLAIN data of a single-table DELETE. +*/ + +class Explain_delete: public Explain_update +{ +public: + Explain_delete(MEM_ROOT *root, bool is_analyze) : + Explain_update(root, is_analyze) + {} + + /* + TRUE means we're going to call handler->delete_all_rows() and not read any + rows. + */ + bool deleting_all_rows; + + virtual enum explain_node_type get_type() { return EXPLAIN_DELETE; } + virtual uint get_select_id() { return 1; /* always root */ } + + virtual int print_explain(Explain_query *query, select_result_sink *output, + uint8 explain_flags, bool is_analyze); + virtual void print_explain_json(Explain_query *query, Json_writer *writer, + bool is_analyze); +}; + + +#endif //SQL_EXPLAIN_INCLUDED diff --git a/sql/sql_expression_cache.cc b/sql/sql_expression_cache.cc new file mode 100644 index 00000000..8681e08e --- /dev/null +++ b/sql/sql_expression_cache.cc @@ -0,0 +1,344 @@ +/* Copyright (C) 2010-2011 Monty Program Ab & Oleksandr Byelkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_base.h" +#include "sql_select.h" +#include "sql_expression_cache.h" + +/** + Minimum hit ration to proceed on disk if in memory table overflowed. + hit_rate = hit / (miss + hit); +*/ +#define EXPCACHE_MIN_HIT_RATE_FOR_DISK_TABLE 0.7 +/** + Minimum hit ratio to keep in memory table (do not switch cache off) + hit_rate = hit / (miss + hit); +*/ +#define EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE 0.2 +/** + Number of cache miss to check hit ratio (maximum cache performance + impact in the case when the cache is not applicable) +*/ +#define EXPCACHE_CHECK_HIT_RATIO_AFTER 200 + +/* + Expression cache is used only for caching subqueries now, so its statistic + variables we call subquery_cache*. +*/ +ulong subquery_cache_miss, subquery_cache_hit; + +Expression_cache_tmptable::Expression_cache_tmptable(THD *thd, + List &dependants, + Item *value) + :cache_table(NULL), table_thd(thd), tracker(NULL), items(dependants), val(value), + hit(0), miss(0), inited (0) +{ + DBUG_ENTER("Expression_cache_tmptable::Expression_cache_tmptable"); + DBUG_VOID_RETURN; +}; + + +/** + Disable cache +*/ + +void Expression_cache_tmptable::disable_cache() +{ + if (cache_table->file->inited) + cache_table->file->ha_index_end(); + free_tmp_table(table_thd, cache_table); + cache_table= NULL; + update_tracker(); + if (tracker) + tracker->detach_from_cache(); +} + + +/** + Field enumerator for TABLE::add_tmp_key + + @param arg reference variable with current field number + + @return field number +*/ + +static uint field_enumerator(uchar *arg) +{ + return ((uint*)arg)[0]++; +} + + +/** + Initialize temporary table and auxiliary structures for the expression + cache + + @details + The function creates a temporary table for the expression cache, defines + the search index and initializes auxiliary search structures used to check + whether a given set of of values of the expression parameters is in some + cache entry. +*/ + +void Expression_cache_tmptable::init() +{ + List_iterator li(items); + Item_iterator_list it(li); + uint field_counter; + LEX_CSTRING cache_table_name= { STRING_WITH_LEN("subquery-cache-table") }; + DBUG_ENTER("Expression_cache_tmptable::init"); + DBUG_ASSERT(!inited); + inited= TRUE; + cache_table= NULL; + + if (items.elements == 0) + { + DBUG_PRINT("info", ("All parameters were removed by optimizer.")); + DBUG_VOID_RETURN; + } + + /* add result field */ + items.push_front(val); + + cache_table_param.init(); + /* dependent items and result */ + cache_table_param.field_count= cache_table_param.func_count= items.elements; + /* postpone table creation to index description */ + cache_table_param.skip_create_table= 1; + + if (!(cache_table= create_tmp_table(table_thd, &cache_table_param, + items, (ORDER*) NULL, + FALSE, TRUE, + ((table_thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS) & + ~TMP_TABLE_FORCE_MYISAM), + HA_POS_ERROR, + &cache_table_name, + TRUE))) + { + DBUG_PRINT("error", ("create_tmp_table failed, caching switched off")); + DBUG_VOID_RETURN; + } + + if (cache_table->s->db_type() != heap_hton) + { + DBUG_PRINT("error", ("we need only heap table")); + goto error; + } + + field_counter= 1; + + if (cache_table->alloc_keys(1) || + cache_table->add_tmp_key(0, items.elements - 1, &field_enumerator, + (uchar*)&field_counter, TRUE) || + ref.tmp_table_index_lookup_init(table_thd, cache_table->key_info, it, + TRUE, 1 /* skip result field*/)) + { + DBUG_PRINT("error", ("creating index failed")); + goto error; + } + cache_table->s->keys= 1; + ref.null_rejecting= 1; + ref.const_ref_part_map= 0; + ref.disable_cache= FALSE; + ref.has_record= 0; + ref.use_count= 0; + + + if (open_tmp_table(cache_table)) + { + DBUG_PRINT("error", ("Opening (creating) temporary table failed")); + goto error; + } + + if (!(cached_result= new (table_thd->mem_root) + Item_field(table_thd, cache_table->field[0]))) + { + DBUG_PRINT("error", ("Creating Item_field failed")); + goto error; + } + + update_tracker(); + DBUG_VOID_RETURN; + +error: + disable_cache(); + DBUG_VOID_RETURN; +} + + +Expression_cache_tmptable::~Expression_cache_tmptable() +{ + /* Add accumulated statistics */ + statistic_add(subquery_cache_miss, miss, &LOCK_status); + statistic_add(subquery_cache_hit, hit, &LOCK_status); + + if (cache_table) + disable_cache(); + else + { + update_tracker(); + if (tracker) + tracker->detach_from_cache(); + tracker= NULL; + } +} + + +/** + Check if a given set of parameters of the expression is in the cache + + @param [out] value the expression value found in the cache if any + + @details + For a given set of the parameters of the expression the function + checks whether it can be found in some entry of the cache. If so + the function returns the result of the expression extracted from + the cache. + + @retval Expression_cache::HIT if the set of parameters is in the cache + @retval Expression_cache::MISS - otherwise +*/ + +Expression_cache::result Expression_cache_tmptable::check_value(Item **value) +{ + int res; + DBUG_ENTER("Expression_cache_tmptable::check_value"); + + if (cache_table) + { + DBUG_PRINT("info", ("status: %u has_record %u", + (uint)cache_table->status, (uint)ref.has_record)); + if ((res= join_read_key2(table_thd, NULL, cache_table, &ref)) == 1) + DBUG_RETURN(ERROR); + + if (res) + { + if (((++miss) == EXPCACHE_CHECK_HIT_RATIO_AFTER) && + ((double)hit / ((double)hit + miss)) < + EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE) + { + DBUG_PRINT("info", + ("Early check: hit rate is not so good to keep the cache")); + disable_cache(); + } + + DBUG_RETURN(MISS); + } + + hit++; + *value= cached_result; + DBUG_RETURN(Expression_cache::HIT); + } + DBUG_RETURN(Expression_cache::MISS); +} + + +/** + Put a new entry into the expression cache + + @param value the result of the expression to be put into the cache + + @details + The function evaluates 'value' and puts the result into the cache as the + result of the expression for the current set of parameters. + + @retval FALSE OK + @retval TRUE Error +*/ + +my_bool Expression_cache_tmptable::put_value(Item *value) +{ + int error; + DBUG_ENTER("Expression_cache_tmptable::put_value"); + DBUG_ASSERT(inited); + + if (!cache_table) + { + DBUG_PRINT("info", ("No table so behave as we successfully put value")); + DBUG_RETURN(FALSE); + } + + *(items.head_ref())= value; + fill_record(table_thd, cache_table, cache_table->field, items, TRUE, TRUE); + if (unlikely(table_thd->is_error())) + goto err;; + + if (unlikely((error= + cache_table->file->ha_write_tmp_row(cache_table->record[0])))) + { + /* create_myisam_from_heap will generate error if needed */ + if (cache_table->file->is_fatal_error(error, HA_CHECK_DUP)) + goto err; + else + { + double hit_rate= ((double)hit / ((double)hit + miss)); + DBUG_ASSERT(miss > 0); + if (hit_rate < EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE) + { + DBUG_PRINT("info", ("hit rate is not so good to keep the cache")); + disable_cache(); + DBUG_RETURN(FALSE); + } + else if (hit_rate < EXPCACHE_MIN_HIT_RATE_FOR_DISK_TABLE) + { + DBUG_PRINT("info", ("hit rate is not so good to go to disk")); + if (cache_table->file->ha_delete_all_rows() || + cache_table->file->ha_write_tmp_row(cache_table->record[0])) + goto err; + } + else + { + if (create_internal_tmp_table_from_heap(table_thd, cache_table, + cache_table_param.start_recinfo, + &cache_table_param.recinfo, + error, 1, NULL)) + goto err; + } + } + } + cache_table->status= 0; /* cache_table->record contains an existed record */ + ref.has_record= TRUE; /* the same as above */ + DBUG_PRINT("info", ("has_record: TRUE status: 0")); + + DBUG_RETURN(FALSE); + +err: + disable_cache(); + DBUG_RETURN(TRUE); +} + + +void Expression_cache_tmptable::print(String *str, enum_query_type query_type) +{ + List_iterator li(items); + Item *item; + bool is_first= TRUE; + + str->append('<'); + li++; // skip result field + while ((item= li++)) + { + if (!is_first) + str->append(','); + item->print(str, query_type); + is_first= FALSE; + } + str->append('>'); +} + + +const char *Expression_cache_tracker::state_str[3]= +{"uninitialized", "disabled", "enabled"}; diff --git a/sql/sql_expression_cache.h b/sql/sql_expression_cache.h new file mode 100644 index 00000000..88436837 --- /dev/null +++ b/sql/sql_expression_cache.h @@ -0,0 +1,164 @@ +/* + Copyright (c) 2010, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_EXPRESSION_CACHE_INCLUDED +#define SQL_EXPRESSION_CACHE_INCLUDED + +#include "sql_select.h" + + +/** + Interface for expression cache + + @note + Parameters of an expression cache interface are set on the creation of the + cache. They are passed when a cache object of the implementation class is + constructed. That's why they are not visible in this interface. +*/ + +extern ulong subquery_cache_miss, subquery_cache_hit; + +class Expression_cache :public Sql_alloc +{ +public: + enum result {ERROR, HIT, MISS}; + + Expression_cache()= default; + virtual ~Expression_cache() = default; + /** + Shall check the presence of expression value in the cache for a given + set of values of the expression parameters. Return the result of the + expression if it's found in the cache. + */ + virtual result check_value(Item **value)= 0; + /** + Shall put the value of an expression for given set of its parameters + into the expression cache + */ + virtual my_bool put_value(Item *value)= 0; + + /** + Print cache parameters + */ + virtual void print(String *str, enum_query_type query_type)= 0; + + /** + Is this cache initialized + */ + virtual bool is_inited()= 0; + /** + Initialize this cache + */ + virtual void init()= 0; + + /** + Save this object's statistics into Expression_cache_tracker object + */ + virtual void update_tracker()= 0; +}; + +struct st_table_ref; +struct st_join_table; +class Item_field; + + +class Expression_cache_tracker :public Sql_alloc +{ +public: + enum expr_cache_state {UNINITED, STOPPED, OK}; + Expression_cache_tracker(Expression_cache *c) : + cache(c), hit(0), miss(0), state(UNINITED) + {} + +private: + // This can be NULL if the cache is already deleted + Expression_cache *cache; + +public: + ulong hit, miss; + enum expr_cache_state state; + + static const char* state_str[3]; + void set(ulong h, ulong m, enum expr_cache_state s) + {hit= h; miss= m; state= s;} + + void detach_from_cache() { cache= NULL; } + void fetch_current_stats() + { + if (cache) + cache->update_tracker(); + } +}; + + +/** + Implementation of expression cache over a temporary table +*/ + +class Expression_cache_tmptable :public Expression_cache +{ +public: + Expression_cache_tmptable(THD *thd, List &dependants, Item *value); + virtual ~Expression_cache_tmptable(); + virtual result check_value(Item **value); + virtual my_bool put_value(Item *value); + + void print(String *str, enum_query_type query_type); + bool is_inited() { return inited; }; + void init(); + + void set_tracker(Expression_cache_tracker *st) + { + tracker= st; + update_tracker(); + } + virtual void update_tracker() + { + if (tracker) + { + tracker->set(hit, miss, (inited ? (cache_table ? + Expression_cache_tracker::OK : + Expression_cache_tracker::STOPPED) : + Expression_cache_tracker::UNINITED)); + } + } + +private: + void disable_cache(); + + /* tmp table parameters */ + TMP_TABLE_PARAM cache_table_param; + /* temporary table to store this cache */ + TABLE *cache_table; + /* Thread handle for the temporary table */ + THD *table_thd; + /* EXPALIN/ANALYZE statistics */ + Expression_cache_tracker *tracker; + /* TABLE_REF for index lookup */ + struct st_table_ref ref; + /* Cached result */ + Item_field *cached_result; + /* List of parameter items */ + List &items; + /* Value Item example */ + Item *val; + /* hit/miss counters */ + ulong hit, miss; + /* Set on if the object has been successfully initialized with init() */ + bool inited; +}; + +#endif /* SQL_EXPRESSION_CACHE_INCLUDED */ diff --git a/sql/sql_get_diagnostics.cc b/sql/sql_get_diagnostics.cc new file mode 100644 index 00000000..240975d2 --- /dev/null +++ b/sql/sql_get_diagnostics.cc @@ -0,0 +1,347 @@ +/* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_list.h" // Sql_alloc, List, List_iterator +#include "sql_cmd.h" // Sql_cmd +#include "sql_class.h" // Diagnostics_area +#include "sql_get_diagnostics.h" // Sql_cmd_get_diagnostics + +/** + Execute this GET DIAGNOSTICS statement. + + @param thd The current thread. + + @remark Errors or warnings occurring during the execution of the GET + DIAGNOSTICS statement should not affect the diagnostics area + of a previous statement as the diagnostics information there + would be wiped out. Thus, in order to preserve the contents + of the diagnostics area from which information is being + retrieved, the GET DIAGNOSTICS statement is executed under + a separate diagnostics area. If any errors or warnings occur + during the execution of the GET DIAGNOSTICS statement, these + error or warnings (conditions) are appended to the list of + the original diagnostics area. The only exception to this is + fatal errors, which must always cause the statement to fail. + + @retval false on success. + @retval true on error +*/ + +bool +Sql_cmd_get_diagnostics::execute(THD *thd) +{ + bool rv; + Diagnostics_area new_stmt_da(thd->query_id, false, true); + Diagnostics_area *save_stmt_da= thd->get_stmt_da(); + DBUG_ENTER("Sql_cmd_get_diagnostics::execute"); + + /* Disable the unneeded read-only mode of the original DA. */ + save_stmt_da->set_warning_info_read_only(false); + + /* Set new diagnostics area, execute statement and restore. */ + thd->set_stmt_da(&new_stmt_da); + rv= m_info->aggregate(thd, save_stmt_da); + thd->set_stmt_da(save_stmt_da); + + /* Bail out early if statement succeeded. */ + if (! rv) + { + thd->get_stmt_da()->set_ok_status(0, 0, NULL); + DBUG_RETURN(false); + } + + /* Statement failed, retrieve the error information for propagation. */ + uint sql_errno= new_stmt_da.sql_errno(); + const char *message= new_stmt_da.message(); + const char *sqlstate= new_stmt_da.get_sqlstate(); + + /* In case of a fatal error, set it into the original DA.*/ + if (unlikely(thd->is_fatal_error)) + { + save_stmt_da->set_error_status(sql_errno, message, sqlstate, NULL); + DBUG_RETURN(true); + } + + /* Otherwise, just append the new error as a exception condition. */ + save_stmt_da->push_warning(thd, sql_errno, sqlstate, + Sql_condition::WARN_LEVEL_ERROR, + message); + + /* Appending might have failed. */ + if (unlikely(!(rv= thd->is_error()))) + thd->get_stmt_da()->set_ok_status(0, 0, NULL); + + DBUG_RETURN(rv); +} + + +/** + Set a value for this item. + + @param thd The current thread. + @param value The obtained value. + + @retval false on success. + @retval true on error. +*/ + +bool +Diagnostics_information_item::set_value(THD *thd, Item **value) +{ + bool rv; + Settable_routine_parameter *srp; + DBUG_ENTER("Diagnostics_information_item::set_value"); + + /* Get a settable reference to the target. */ + srp= m_target->get_settable_routine_parameter(); + + DBUG_ASSERT(srp); + + /* GET DIAGNOSTICS is not allowed in prepared statements */ + DBUG_ASSERT(srp->get_item_param() == NULL); + + /* Set variable/parameter value. */ + rv= srp->set_value(thd, thd->spcont, value); + + DBUG_RETURN(rv); +} + + +/** + Obtain statement information in the context of a given diagnostics area. + + @param thd The current thread. + @param da The diagnostics area. + + @retval false on success. + @retval true on error +*/ + +bool +Statement_information::aggregate(THD *thd, const Diagnostics_area *da) +{ + bool rv= false; + Statement_information_item *stmt_info_item; + List_iterator it(*m_items); + DBUG_ENTER("Statement_information::aggregate"); + + /* + Each specified target gets the value of each given + information item obtained from the diagnostics area. + */ + while ((stmt_info_item= it++)) + { + if ((rv= evaluate(thd, stmt_info_item, da))) + break; + } + + DBUG_RETURN(rv); +} + + +/** + Obtain the value of this statement information item in the context of + a given diagnostics area. + + @param thd The current thread. + @param da The diagnostics area. + + @retval Item representing the value. + @retval NULL on error. +*/ + +Item * +Statement_information_item::get_value(THD *thd, const Diagnostics_area *da) +{ + Item *value= NULL; + DBUG_ENTER("Statement_information_item::get_value"); + + switch (m_name) + { + /* + The number of condition areas that have information. That is, + the number of errors and warnings within the diagnostics area. + */ + case NUMBER: + { + ulong count= da->cond_count(); + value= new (thd->mem_root) Item_uint(thd, count); + break; + } + /* + Number that shows how many rows were directly affected by + a data-change statement (INSERT, UPDATE, DELETE, MERGE, + REPLACE, LOAD). + */ + case ROW_COUNT: + value= new (thd->mem_root) Item_int(thd, thd->get_row_count_func()); + break; + } + + DBUG_RETURN(value); +} + + +/** + Obtain condition information in the context of a given diagnostics area. + + @param thd The current thread. + @param da The diagnostics area. + + @retval false on success. + @retval true on error +*/ + +bool +Condition_information::aggregate(THD *thd, const Diagnostics_area *da) +{ + bool rv= false; + longlong cond_number; + const Sql_condition *cond= NULL; + Condition_information_item *cond_info_item; + Diagnostics_area::Sql_condition_iterator it_conds= da->sql_conditions(); + List_iterator_fast it_items(*m_items); + DBUG_ENTER("Condition_information::aggregate"); + + /* Prepare the expression for evaluation. */ + if (m_cond_number_expr->fix_fields_if_needed(thd, &m_cond_number_expr)) + DBUG_RETURN(true); + + cond_number= m_cond_number_expr->val_int(); + + /* + Limit to the number of available conditions. Warning_info::warn_count() + is not used because it indicates the number of condition regardless of + @@max_error_count, which prevents conditions from being pushed, but not + counted. + */ + if (cond_number < 1 || (ulonglong) cond_number > da->cond_count()) + { + my_error(ER_DA_INVALID_CONDITION_NUMBER, MYF(0)); + DBUG_RETURN(true); + } + + /* Advance to the requested condition. */ + while (cond_number--) + cond= it_conds++; + + DBUG_ASSERT(cond); + + /* Evaluate the requested information in the context of the condition. */ + while ((cond_info_item= it_items++)) + { + if ((rv= evaluate(thd, cond_info_item, cond))) + break; + } + + DBUG_RETURN(rv); +} + + +/** + Create an UTF-8 string item to represent a condition item string. + + @remark The string might not have a associated charset. For example, + this can be the case if the server does not or fails to process + the error message file. + + @remark See "Design notes about Sql_condition::m_message_text." in sql_error.cc + + @return Pointer to an string item, NULL on failure. +*/ + +Item * +Condition_information_item::make_utf8_string_item(THD *thd, const String *str) +{ + /* Default is utf8 character set and utf8mb3_general_ci collation. */ + CHARSET_INFO *to_cs= &my_charset_utf8mb3_general_ci; + /* If a charset was not set, assume that no conversion is needed. */ + CHARSET_INFO *from_cs= str->charset() ? str->charset() : to_cs; + String tmp(str->ptr(), str->length(), from_cs); + /* If necessary, convert the string (ignoring errors), then copy it over. */ + uint conv_errors; + return new (thd->mem_root) Item_string(thd, &tmp, to_cs, &conv_errors, + DERIVATION_COERCIBLE, MY_REPERTOIRE_UNICODE30); +} + + +/** + Obtain the value of this condition information item in the context of + a given condition. + + @param thd The current thread. + @param da The diagnostics area. + + @retval Item representing the value. + @retval NULL on error. +*/ + +Item * +Condition_information_item::get_value(THD *thd, const Sql_condition *cond) +{ + String str; + Item *value= NULL; + DBUG_ENTER("Condition_information_item::get_value"); + + switch (m_name) + { + case CLASS_ORIGIN: + value= make_utf8_string_item(thd, &(cond->m_class_origin)); + break; + case SUBCLASS_ORIGIN: + value= make_utf8_string_item(thd, &(cond->m_subclass_origin)); + break; + case CONSTRAINT_CATALOG: + value= make_utf8_string_item(thd, &(cond->m_constraint_catalog)); + break; + case CONSTRAINT_SCHEMA: + value= make_utf8_string_item(thd, &(cond->m_constraint_schema)); + break; + case CONSTRAINT_NAME: + value= make_utf8_string_item(thd, &(cond->m_constraint_name)); + break; + case CATALOG_NAME: + value= make_utf8_string_item(thd, &(cond->m_catalog_name)); + break; + case SCHEMA_NAME: + value= make_utf8_string_item(thd, &(cond->m_schema_name)); + break; + case TABLE_NAME: + value= make_utf8_string_item(thd, &(cond->m_table_name)); + break; + case COLUMN_NAME: + value= make_utf8_string_item(thd, &(cond->m_column_name)); + break; + case CURSOR_NAME: + value= make_utf8_string_item(thd, &(cond->m_cursor_name)); + break; + case MESSAGE_TEXT: + value= make_utf8_string_item(thd, &(cond->m_message_text)); + break; + case MYSQL_ERRNO: + value= new (thd->mem_root) Item_uint(thd, cond->m_sql_errno); + break; + case RETURNED_SQLSTATE: + str.set_ascii(cond->get_sqlstate(), strlen(cond->get_sqlstate())); + value= make_utf8_string_item(thd, &str); + break; + case ROW_NUMBER: + value= new (thd->mem_root) Item_uint(thd, cond->m_row_number); + } + + DBUG_RETURN(value); +} + diff --git a/sql/sql_get_diagnostics.h b/sql/sql_get_diagnostics.h new file mode 100644 index 00000000..efe526d7 --- /dev/null +++ b/sql/sql_get_diagnostics.h @@ -0,0 +1,319 @@ +/* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_GET_DIAGNOSTICS_H +#define SQL_GET_DIAGNOSTICS_H + +/** Diagnostics information forward reference. */ +class Diagnostics_information; + + +/** + Sql_cmd_get_diagnostics represents a GET DIAGNOSTICS statement. + + The GET DIAGNOSTICS statement retrieves exception or completion + condition information from a diagnostics area, usually pertaining + to the last non-diagnostic SQL statement that was executed. +*/ +class Sql_cmd_get_diagnostics : public Sql_cmd +{ +public: + /** + Constructor, used to represent a GET DIAGNOSTICS statement. + + @param info Diagnostics information to be obtained. + */ + Sql_cmd_get_diagnostics(Diagnostics_information *info) + : m_info(info) + {} + + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_GET_DIAGNOSTICS; + } + + virtual bool execute(THD *thd); + +private: + /** The information to be obtained. */ + Diagnostics_information *m_info; +}; + + +/** + Represents the diagnostics information to be obtained. + + Diagnostic information is made available through statement + information and condition information items. +*/ +class Diagnostics_information : public Sql_alloc +{ +public: + /** + Which diagnostics area to access. + Only CURRENT is supported for now. + */ + enum Which_area + { + /** Access the first diagnostics area. */ + CURRENT_AREA + }; + + /** Set which diagnostics area to access. */ + void set_which_da(Which_area area) + { m_area= area; } + + /** Get which diagnostics area to access. */ + Which_area get_which_da(void) const + { return m_area; } + + /** + Aggregate diagnostics information. + + @param thd The current thread. + @param da The diagnostics area. + + @retval false on success. + @retval true on error + */ + virtual bool aggregate(THD *thd, const Diagnostics_area *da) = 0; + +protected: + /** + Diagnostics_information objects are allocated in thd->mem_root. + Do not rely on the destructor for any cleanup. + */ + virtual ~Diagnostics_information() + { + DBUG_ASSERT(false); + } + + /** + Evaluate a diagnostics information item in a specific context. + + @param thd The current thread. + @param diag_item The diagnostics information item. + @param ctx The context to evaluate the item. + + @retval false on success. + @retval true on error. + */ + template + bool evaluate(THD *thd, Diag_item *diag_item, Context ctx) + { + Item *value; + + /* Get this item's value. */ + if (! (value= diag_item->get_value(thd, ctx))) + return true; + + /* Set variable/parameter value. */ + return diag_item->set_value(thd, &value); + } + +private: + /** Which diagnostics area to access. */ + Which_area m_area; +}; + + +/** + A diagnostics information item. Used to associate a specific + diagnostics information item to a target variable. +*/ +class Diagnostics_information_item : public Sql_alloc +{ +public: + /** + Set a value for this item. + + @param thd The current thread. + @param value The obtained value. + + @retval false on success. + @retval true on error. + */ + bool set_value(THD *thd, Item **value); + +protected: + /** + Constructor, used to represent a diagnostics information item. + + @param target A target that gets the value of this item. + */ + Diagnostics_information_item(Item *target) + : m_target(target) + {} + + /** + Diagnostics_information_item objects are allocated in thd->mem_root. + Do not rely on the destructor for any cleanup. + */ + virtual ~Diagnostics_information_item() + { + DBUG_ASSERT(false); + } + +private: + /** The target variable that will receive the value of this item. */ + Item *m_target; +}; + + +/** + A statement information item. +*/ +class Statement_information_item : public Diagnostics_information_item +{ +public: + /** The name of a statement information item. */ + enum Name + { + NUMBER, + ROW_COUNT + }; + + /** + Constructor, used to represent a statement information item. + + @param name The name of this item. + @param target A target that gets the value of this item. + */ + Statement_information_item(Name name, Item *target) + : Diagnostics_information_item(target), m_name(name) + {} + + /** Obtain value of this statement information item. */ + Item *get_value(THD *thd, const Diagnostics_area *da); + +private: + /** The name of this statement information item. */ + Name m_name; +}; + + +/** + Statement information. + + @remark Provides information about the execution of a statement. +*/ +class Statement_information : public Diagnostics_information +{ +public: + /** + Constructor, used to represent the statement information of a + GET DIAGNOSTICS statement. + + @param items List of requested statement information items. + */ + Statement_information(List *items) + : m_items(items) + {} + + /** Obtain statement information in the context of a diagnostics area. */ + bool aggregate(THD *thd, const Diagnostics_area *da); + +private: + /* List of statement information items. */ + List *m_items; +}; + + +/** + A condition information item. +*/ +class Condition_information_item : public Diagnostics_information_item +{ +public: + /** + The name of a condition information item. + */ + enum Name + { + CLASS_ORIGIN, + SUBCLASS_ORIGIN, + CONSTRAINT_CATALOG, + CONSTRAINT_SCHEMA, + CONSTRAINT_NAME, + CATALOG_NAME, + SCHEMA_NAME, + TABLE_NAME, + COLUMN_NAME, + CURSOR_NAME, + MESSAGE_TEXT, + MYSQL_ERRNO, + RETURNED_SQLSTATE, + ROW_NUMBER + }; + + /** + Constructor, used to represent a condition information item. + + @param name The name of this item. + @param target A target that gets the value of this item. + */ + Condition_information_item(Name name, Item *target) + : Diagnostics_information_item(target), m_name(name) + {} + + /** Obtain value of this condition information item. */ + Item *get_value(THD *thd, const Sql_condition *cond); + +private: + /** The name of this condition information item. */ + Name m_name; + + /** Create an string item to represent a condition item string. */ + Item *make_utf8_string_item(THD *thd, const String *str); +}; + + +/** + Condition information. + + @remark Provides information about conditions raised during the + execution of a statement. +*/ +class Condition_information : public Diagnostics_information +{ +public: + /** + Constructor, used to represent the condition information of a + GET DIAGNOSTICS statement. + + @param cond_number_expr Number that identifies the diagnostic condition. + @param items List of requested condition information items. + */ + Condition_information(Item *cond_number_expr, + List *items) + : m_cond_number_expr(cond_number_expr), m_items(items) + {} + + /** Obtain condition information in the context of a diagnostics area. */ + bool aggregate(THD *thd, const Diagnostics_area *da); + +private: + /** + Number that identifies the diagnostic condition for which + information is to be obtained. + */ + Item *m_cond_number_expr; + + /** List of condition information items. */ + List *m_items; +}; + +#endif + diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc new file mode 100644 index 00000000..7235dc64 --- /dev/null +++ b/sql/sql_handler.cc @@ -0,0 +1,1292 @@ +/* Copyright (c) 2001, 2015, Oracle and/or its affiliates. + Copyright (c) 2011, 2016, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* HANDLER ... commands - direct access to ISAM */ + +/* TODO: + HANDLER blabla OPEN [ AS foobar ] [ (column-list) ] + + the most natural (easiest, fastest) way to do it is to + compute List field_list not in mysql_ha_read + but in mysql_ha_open, and then store it in TABLE structure. + + The problem here is that mysql_parse calls free_item to free all the + items allocated at the end of every query. The workaround would to + keep two item lists per THD - normal free_list and handler_items. + The second is to be freeed only on thread end. mysql_ha_open should + then do { handler_items=concat(handler_items, free_list); free_list=0; } + + But !!! do_command calls free_root at the end of every query and frees up + all the memory allocated on THD::mem_root. It's harder to work around... +*/ + +/* + The information about open HANDLER objects is stored in a HASH. + It holds objects of type TABLE_LIST, which are indexed by table + name/alias, and allows us to quickly find a HANDLER table for any + operation at hand - be it HANDLER READ or HANDLER CLOSE. + + It also allows us to maintain an "open" HANDLER even in cases + when there is no physically open cursor. E.g. a FLUSH TABLE + statement in this or some other connection demands that all open + HANDLERs against the flushed table are closed. In order to + preserve the information about an open HANDLER, we don't perform + a complete HANDLER CLOSE, but only close the TABLE object. The + corresponding TABLE_LIST is kept in the cache with 'table' + pointer set to NULL. The table will be reopened on next access + (this, however, leads to loss of cursor position, unless the + cursor points at the first record). +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_handler.h" +#include "sql_base.h" // close_thread_tables +#include "lock.h" // mysql_unlock_tables +#include "key.h" // key_copy +#include "sql_base.h" // insert_fields +#include "sql_select.h" +#include "transaction.h" + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define HANDLER_TABLES_HASH_SIZE 120 + +static enum enum_ha_read_modes rkey_to_rnext[]= +{ RNEXT_SAME, RNEXT, RPREV, RNEXT, RPREV, RNEXT, RPREV, RPREV }; + +/* + Set handler to state after create, but keep base information about + which table is used +*/ + +void SQL_HANDLER::reset() +{ + fields.empty(); + arena.free_items(); + free_root(&mem_root, MYF(0)); + my_free(lock); + init(); +} + +/* Free all allocated data */ + +SQL_HANDLER::~SQL_HANDLER() +{ + reset(); + my_free(base_data); +} + +/* + Get hash key and hash key length. + + SYNOPSIS + mysql_ha_hash_get_key() + tables Pointer to the hash object. + key_len_p (out) Pointer to the result for key length. + first Unused. + + DESCRIPTION + The hash object is an TABLE_LIST struct. + The hash key is the alias name. + The hash key length is the alias name length plus one for the + terminateing NUL character. + + RETURN + Pointer to the TABLE_LIST struct. +*/ + +static char *mysql_ha_hash_get_key(SQL_HANDLER *table, size_t *key_len, + my_bool first __attribute__((unused))) +{ + *key_len= table->handler_name.length + 1 ; /* include '\0' in comparisons */ + return (char*) table->handler_name.str; +} + + +/* + Free an hash object. + + SYNOPSIS + mysql_ha_hash_free() + tables Pointer to the hash object. + + DESCRIPTION + The hash object is an TABLE_LIST struct. + + RETURN + Nothing +*/ + +static void mysql_ha_hash_free(SQL_HANDLER *table) +{ + delete table; +} + +static void mysql_ha_close_childs(THD *thd, TABLE_LIST *current_table_list, + TABLE_LIST **next_global) +{ + TABLE_LIST *table_list; + DBUG_ENTER("mysql_ha_close_childs"); + DBUG_PRINT("info",("current_table_list: %p", current_table_list)); + DBUG_PRINT("info",("next_global: %p", *next_global)); + for (table_list = *next_global; table_list; table_list = *next_global) + { + *next_global = table_list->next_global; + DBUG_PRINT("info",("table_name: %s.%s", table_list->table->s->db.str, + table_list->table->s->table_name.str)); + DBUG_PRINT("info",("parent_l: %p", table_list->parent_l)); + if (table_list->parent_l == current_table_list) + { + DBUG_PRINT("info",("found child")); + TABLE *table = table_list->table; + if (table) + { + table->open_by_handler= 0; + if (!table->s->tmp_table) + { + (void) close_thread_table(thd, &table); + thd->mdl_context.release_lock(table_list->mdl_request.ticket); + } + else + { + thd->mark_tmp_table_as_free_for_reuse(table); + } + } + mysql_ha_close_childs(thd, table_list, next_global); + } + else + { + /* the end of child tables */ + *next_global = table_list; + break; + } + } + DBUG_VOID_RETURN; +} + +/** + Close a HANDLER table. + + @param thd Thread identifier. + @param tables A list of tables with the first entry to close. + + @note Though this function takes a list of tables, only the first list entry + will be closed. + @mote handler_object is not deleted! + @note Broadcasts refresh if it closed a table with old version. +*/ + +static void mysql_ha_close_table(SQL_HANDLER *handler) +{ + DBUG_ENTER("mysql_ha_close_table"); + THD *thd= handler->thd; + TABLE *table= handler->table; + TABLE_LIST *current_table_list= NULL, *next_global; + + /* check if table was already closed */ + if (!table) + DBUG_VOID_RETURN; + + if ((next_global= table->file->get_next_global_for_child())) + current_table_list= next_global->parent_l; + + table->open_by_handler= 0; + if (!table->s->tmp_table) + { + /* Non temporary table. */ + if (handler->lock) + { + // Mark it unlocked, like in reset_lock_data() + reset_lock_data(handler->lock, 1); + } + + table->file->ha_index_or_rnd_end(); + close_thread_table(thd, &table); + if (current_table_list) + mysql_ha_close_childs(thd, current_table_list, &next_global); + thd->mdl_context.release_lock(handler->mdl_request.ticket); + } + else + { + /* Must be a temporary table */ + table->file->ha_index_or_rnd_end(); + if (current_table_list) + mysql_ha_close_childs(thd, current_table_list, &next_global); + thd->mark_tmp_table_as_free_for_reuse(table); + } + my_free(handler->lock); + handler->init(); + DBUG_VOID_RETURN; +} + +/* + Open a HANDLER table. + + SYNOPSIS + mysql_ha_open() + thd Thread identifier. + tables A list of tables with the first entry to open. + reopen Re-open a previously opened handler table. + + DESCRIPTION + Though this function takes a list of tables, only the first list entry + will be opened. + 'reopen' is set when a handler table is to be re-opened. In this case, + 'tables' is the pointer to the hashed SQL_HANDLER object which has been + saved on the original open. + 'reopen' is also used to suppress the sending of an 'ok' message. + + RETURN + FALSE OK + TRUE Error +*/ + +bool mysql_ha_open(THD *thd, TABLE_LIST *tables, SQL_HANDLER *reopen) +{ + SQL_HANDLER *sql_handler= 0; + uint counter; + bool error; + TABLE *table, *backup_open_tables; + MDL_savepoint mdl_savepoint; + Query_arena backup_arena; + DBUG_ENTER("mysql_ha_open"); + DBUG_PRINT("enter",("'%s'.'%s' as '%s' reopen: %d", + tables->db.str, tables->table_name.str, tables->alias.str, + reopen != 0)); + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (tables->schema_table) + { + my_error(ER_WRONG_USAGE, MYF(0), "HANDLER OPEN", + INFORMATION_SCHEMA_NAME.str); + DBUG_PRINT("exit",("ERROR")); + DBUG_RETURN(TRUE); + } + + if (! my_hash_inited(&thd->handler_tables_hash)) + { + /* + HASH entries are of type SQL_HANDLER + */ + if (my_hash_init(key_memory_THD_handler_tables_hash, + &thd->handler_tables_hash, &my_charset_latin1, + HANDLER_TABLES_HASH_SIZE, 0, 0, (my_hash_get_key) + mysql_ha_hash_get_key, (my_hash_free_key) + mysql_ha_hash_free, 0)) + { + DBUG_PRINT("exit",("ERROR")); + DBUG_RETURN(TRUE); + } + } + else if (! reopen) /* Otherwise we have 'tables' already. */ + { + if (my_hash_search(&thd->handler_tables_hash, (uchar*) tables->alias.str, + tables->alias.length + 1)) + { + DBUG_PRINT("info",("duplicate '%s'", tables->alias.str)); + DBUG_PRINT("exit",("ERROR")); + my_error(ER_NONUNIQ_TABLE, MYF(0), tables->alias.str); + DBUG_RETURN(TRUE); + } + } + + /* + Save and reset the open_tables list so that open_tables() won't + be able to access (or know about) the previous list. And on return + from open_tables(), thd->open_tables will contain only the opened + table. + + See open_table() back-off comments for more details. + */ + backup_open_tables= thd->open_tables; + thd->set_open_tables(NULL); + + /* + open_tables() will set 'tables->table' if successful. + It must be NULL for a real open when calling open_tables(). + */ + DBUG_ASSERT(! tables->table); + + /* + We can't request lock with explicit duration for this table + right from the start as open_tables() can't handle properly + back-off for such locks. + */ + MDL_REQUEST_INIT(&tables->mdl_request, MDL_key::TABLE, tables->db.str, + tables->table_name.str, MDL_SHARED_READ, MDL_TRANSACTION); + mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + /* for now HANDLER can be used only for real TABLES */ + tables->required_type= TABLE_TYPE_NORMAL; + + /* + We use open_tables() here, rather than, say, + open_ltable() or open_table() because we would like to be able + to open a temporary table. + */ + error= (thd->open_temporary_tables(tables) || + open_tables(thd, &tables, &counter, 0)); + + if (unlikely(error)) + goto err; + + table= tables->table; + + /* There can be only one table in '*tables'. */ + if (! (table->file->ha_table_flags() & HA_CAN_SQL_HANDLER)) + { + my_error(ER_ILLEGAL_HA, MYF(0), table->file->table_type(), + table->s->db.str, table->s->table_name.str); + goto err; + } + + DBUG_PRINT("info",("clone_tickets start")); + for (TABLE_LIST *table_list= tables; table_list; + table_list= table_list->next_global) + { + DBUG_PRINT("info",("table_list %s.%s", table_list->table->s->db.str, + table_list->table->s->table_name.str)); + if (table_list->mdl_request.ticket && + thd->mdl_context.has_lock(mdl_savepoint, table_list->mdl_request.ticket)) + { + DBUG_PRINT("info",("clone_tickets")); + /* The ticket returned is within a savepoint. Make a copy. */ + error= thd->mdl_context.clone_ticket(&table_list->mdl_request); + table_list->table->mdl_ticket= table_list->mdl_request.ticket; + if (unlikely(error)) + goto err; + } + } + DBUG_PRINT("info",("clone_tickets end")); + + if (! reopen) + { + /* copy data to sql_handler */ + if (!(sql_handler= new SQL_HANDLER(thd))) + goto err; + init_alloc_root(PSI_INSTRUMENT_ME, &sql_handler->mem_root, 1024, 0, + MYF(MY_THREAD_SPECIFIC)); + + sql_handler->db.length= tables->db.length; + sql_handler->table_name.length= tables->table_name.length; + sql_handler->handler_name.length= tables->alias.length; + + if (!(my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &sql_handler->base_data, + (uint) sql_handler->db.length + 1, + &sql_handler->table_name.str, + (uint) sql_handler->table_name.length + 1, + &sql_handler->handler_name.str, + (uint) sql_handler->handler_name.length + 1, + NullS))) + goto err; + sql_handler->db.str= sql_handler->base_data; + memcpy((char*) sql_handler->db.str, tables->db.str, tables->db.length +1); + memcpy((char*) sql_handler->table_name.str, tables->table_name.str, + tables->table_name.length+1); + memcpy((char*) sql_handler->handler_name.str, tables->alias.str, + tables->alias.length +1); + + /* add to hash */ + if (my_hash_insert(&thd->handler_tables_hash, (uchar*) sql_handler)) + goto err; + } + else + { + sql_handler= reopen; + sql_handler->reset(); + } + sql_handler->table= table; + + if (!(sql_handler->lock= get_lock_data(thd, &sql_handler->table, 1, + GET_LOCK_STORE_LOCKS))) + goto err; + + /* Get a list of all fields for send_fields */ + thd->set_n_backup_active_arena(&sql_handler->arena, &backup_arena); + error= table->fill_item_list(&sql_handler->fields); + thd->restore_active_arena(&sql_handler->arena, &backup_arena); + if (unlikely(error)) + goto err; + + sql_handler->mdl_request.move_from(tables->mdl_request); + + /* Always read all columns */ + table->read_set= &table->s->all_set; + + /* Restore the state. */ + thd->set_open_tables(backup_open_tables); + DBUG_PRINT("info",("set_lock_duration start")); + if (sql_handler->mdl_request.ticket) + { + thd->mdl_context.set_lock_duration(sql_handler->mdl_request.ticket, + MDL_EXPLICIT); + thd->mdl_context.set_needs_thr_lock_abort(TRUE); + } + for (TABLE_LIST *table_list= tables->next_global; table_list; + table_list= table_list->next_global) + { + DBUG_PRINT("info",("table_list %s.%s", table_list->table->s->db.str, + table_list->table->s->table_name.str)); + if (table_list->mdl_request.ticket) + { + thd->mdl_context.set_lock_duration(table_list->mdl_request.ticket, + MDL_EXPLICIT); + thd->mdl_context.set_needs_thr_lock_abort(TRUE); + } + } + DBUG_PRINT("info",("set_lock_duration end")); + + /* + If it's a temp table, don't reset table->query_id as the table is + being used by this handler. For non-temp tables we use this flag + in asserts. + */ + for (TABLE_LIST *table_list= tables; table_list; + table_list= table_list->next_global) + { + table_list->table->open_by_handler= 1; + } + + if (! reopen) + my_ok(thd); + DBUG_PRINT("exit",("OK")); + DBUG_RETURN(FALSE); + +err: + /* + No need to rollback statement transaction, it's not started. + If called with reopen flag, no need to rollback either, + it will be done at statement end. + */ + DBUG_ASSERT(thd->transaction->stmt.is_empty()); + close_thread_tables(thd); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + thd->set_open_tables(backup_open_tables); + if (sql_handler) + { + if (!reopen) + my_hash_delete(&thd->handler_tables_hash, (uchar*) sql_handler); + else + sql_handler->reset(); // or should it be init() ? + } + DBUG_PRINT("exit",("ERROR")); + DBUG_RETURN(TRUE); +} + + +/* + Close a HANDLER table by alias or table name + + SYNOPSIS + mysql_ha_close() + thd Thread identifier. + tables A list of tables with the first entry to close. + + DESCRIPTION + Closes the table that is associated (on the handler tables hash) with the + name (table->alias) of the specified table. + + RETURN + FALSE ok + TRUE error +*/ + +bool mysql_ha_close(THD *thd, TABLE_LIST *tables) +{ + SQL_HANDLER *handler; + DBUG_ENTER("mysql_ha_close"); + DBUG_PRINT("enter",("'%s'.'%s' as '%s'", + tables->db.str, tables->table_name.str, tables->alias.str)); + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(TRUE); + } + if ((my_hash_inited(&thd->handler_tables_hash)) && + (handler= (SQL_HANDLER*) my_hash_search(&thd->handler_tables_hash, + (const uchar*) tables->alias.str, + tables->alias.length + 1))) + { + mysql_ha_close_table(handler); + my_hash_delete(&thd->handler_tables_hash, (uchar*) handler); + } + else + { + my_error(ER_UNKNOWN_TABLE, MYF(0), tables->alias.str, "HANDLER"); + DBUG_PRINT("exit",("ERROR")); + DBUG_RETURN(TRUE); + } + + /* + Mark MDL_context as no longer breaking protocol if we have + closed last HANDLER. + */ + if (! thd->handler_tables_hash.records) + thd->mdl_context.set_needs_thr_lock_abort(FALSE); + + my_ok(thd); + DBUG_PRINT("exit", ("OK")); + DBUG_RETURN(FALSE); +} + + +/** + Finds an open HANDLER table. + + @params name Name of handler to open + + @return 0 failure + @return handler +*/ + +static SQL_HANDLER *mysql_ha_find_handler(THD *thd, const LEX_CSTRING *name) +{ + SQL_HANDLER *handler; + if ((my_hash_inited(&thd->handler_tables_hash)) && + (handler= (SQL_HANDLER*) my_hash_search(&thd->handler_tables_hash, + (const uchar*) name->str, + name->length + 1))) + { + DBUG_PRINT("info-in-hash",("'%s'.'%s' as '%s' table: %p", + handler->db.str, + handler->table_name.str, + handler->handler_name.str, handler->table)); + if (!handler->table) + { + /* The handler table has been closed. Re-open it. */ + TABLE_LIST tmp; + tmp.init_one_table(&handler->db, &handler->table_name, + &handler->handler_name, TL_READ); + + if (mysql_ha_open(thd, &tmp, handler)) + { + DBUG_PRINT("exit",("reopen failed")); + return 0; + } + } + } + else + { + my_error(ER_UNKNOWN_TABLE, MYF(0), name->str, "HANDLER"); + return 0; + } + return handler; +} + + +/** + Check that condition and key name are ok + + @param handler + @param mode Read mode (RFIRST, RNEXT etc...) + @param keyname Key to use. + @param key_expr List of key column values + @param cond Where clause + @param in_prepare If we are in prepare phase (we can't evalute items yet) + + @return 0 ok + @return 1 error + + In ok, then values of used key and mode is stored in sql_handler +*/ + +static bool +mysql_ha_fix_cond_and_key(SQL_HANDLER *handler, + enum enum_ha_read_modes mode, const char *keyname, + List *key_expr, + enum ha_rkey_function ha_rkey_mode, + Item *cond, bool in_prepare) +{ + THD *thd= handler->thd; + TABLE *table= handler->table; + if (cond) + { + bool ret; + Item::vcol_func_processor_result res; + + /* This can only be true for temp tables */ + if (table->query_id != thd->query_id) + cond->cleanup(); // File was reopened + + ret= cond->walk(&Item::check_handler_func_processor, 0, &res); + if (ret || res.errors) + { + my_error(ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), res.name, + "WHERE", "HANDLER"); + return 1; // ROWNUM() used + } + if (cond->fix_fields_if_needed_for_bool(thd, &cond)) + return 1; + } + + if (keyname) + { + /* Check if same as last keyname. If not, do a full lookup */ + if (handler->keyno < 0 || + my_strcasecmp(&my_charset_latin1, + keyname, + table->s->key_info[handler->keyno].name.str)) + { + if ((handler->keyno= find_type(keyname, &table->s->keynames, + FIND_TYPE_NO_PREFIX) - 1) < 0) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), keyname, + handler->handler_name.str); + return 1; + } + } + + /* Check key parts */ + if (mode == RKEY) + { + TABLE *table= handler->table; + KEY *keyinfo= table->key_info + handler->keyno; + KEY_PART_INFO *key_part= keyinfo->key_part; + List_iterator it_ke(*key_expr); + Item *item; + key_part_map keypart_map; + uint key_len; + const KEY *c_key= table->s->key_info + handler->keyno; + + if ((c_key->flags & HA_SPATIAL) || + c_key->algorithm == HA_KEY_ALG_FULLTEXT || + (ha_rkey_mode != HA_READ_KEY_EXACT && + (table->file->index_flags(handler->keyno, 0, TRUE) & + (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE)) == 0)) + { + my_error(ER_KEY_DOESNT_SUPPORT, MYF(0), + table->file->index_type(handler->keyno), keyinfo->name.str); + return 1; + } + + if (key_expr->elements > keyinfo->user_defined_key_parts) + { + my_error(ER_TOO_MANY_KEY_PARTS, MYF(0), + keyinfo->user_defined_key_parts); + return 1; + } + + if (key_expr->elements < keyinfo->user_defined_key_parts && + (table->file->index_flags(handler->keyno, 0, TRUE) & + HA_ONLY_WHOLE_INDEX)) + { + my_error(ER_KEY_DOESNT_SUPPORT, MYF(0), + table->file->index_type(handler->keyno), keyinfo->name.str); + return 1; + } + + for (keypart_map= key_len=0 ; (item=it_ke++) ; key_part++) + { + /* note that 'item' can be changed by fix_fields() call */ + if (item->fix_fields_if_needed_for_scalar(thd, it_ke.ref())) + return 1; + item= *it_ke.ref(); + if (item->used_tables() & ~(RAND_TABLE_BIT | PARAM_TABLE_BIT)) + { + my_error(ER_WRONG_ARGUMENTS,MYF(0),"HANDLER ... READ"); + return 1; + } + if (!in_prepare) + { + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set); + int res= item->save_in_field(key_part->field, 1); + dbug_tmp_restore_column_map(&table->write_set, old_map); + if (res) + return 1; + } + key_len+= key_part->store_length; + keypart_map= (keypart_map << 1) | 1; + } + handler->keypart_map= keypart_map; + handler->key_len= key_len; + } + else + { + /* + Check if the same index involved. + We need to always do this check because we may not have yet + called the handler since the last keyno change. + */ + if ((uint) handler->keyno != table->file->get_index()) + { + if (mode == RNEXT) + mode= RFIRST; + else if (mode == RPREV) + mode= RLAST; + } + } + } + else if (table->file->inited != handler::RND) + { + /* Convert RNEXT to RFIRST if we haven't started row scan */ + if (mode == RNEXT) + mode= RFIRST; + } + handler->mode= mode; // Store adjusted mode + return 0; +} + +/* + Read from a HANDLER table. + + SYNOPSIS + mysql_ha_read() + thd Thread identifier. + tables A list of tables with the first entry to read. + mode + keyname + key_expr + ha_rkey_mode + cond + select_limit_cnt + offset_limit_cnt + + RETURN + FALSE ok + TRUE error +*/ + +bool mysql_ha_read(THD *thd, TABLE_LIST *tables, + enum enum_ha_read_modes mode, const char *keyname, + List *key_expr, + enum ha_rkey_function ha_rkey_mode, Item *cond, + ha_rows select_limit_cnt, ha_rows offset_limit_cnt) +{ + SQL_HANDLER *handler; + TABLE *table; + Protocol *protocol= thd->protocol; + char buff[MAX_FIELD_WIDTH]; + String buffer(buff, sizeof(buff), system_charset_info); + int error, keyno; + uint num_rows; + uchar *UNINIT_VAR(key); + MDL_deadlock_and_lock_abort_error_handler sql_handler_lock_error; + DBUG_ENTER("mysql_ha_read"); + DBUG_PRINT("enter",("'%s'.'%s' as '%s'", + tables->db.str, tables->table_name.str, tables->alias.str)); + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(TRUE); + } + +retry: + if (!(handler= mysql_ha_find_handler(thd, &tables->alias))) + goto err0; + + if (thd->transaction->xid_state.check_has_uncommitted_xa()) + goto err0; + + table= handler->table; + tables->table= table; // This is used by fix_fields + table->pos_in_table_list= tables; + + if (handler->lock->table_count > 0) + { + int lock_error; + + THR_LOCK_DATA **pos,**end; + for (pos= handler->lock->locks, + end= handler->lock->locks + handler->lock->lock_count; + pos < end; + pos++) + { + pos[0]->type= pos[0]->org_type; + } + + /* save open_tables state */ + TABLE* backup_open_tables= thd->open_tables; + /* Always a one-element list, see mysql_ha_open(). */ + DBUG_ASSERT(table->next == NULL || table->s->tmp_table); + /* + mysql_lock_tables() needs thd->open_tables to be set correctly to + be able to handle aborts properly. + */ + thd->set_open_tables(table); + + sql_handler_lock_error.init(); + thd->push_internal_handler(&sql_handler_lock_error); + + lock_error= mysql_lock_tables(thd, handler->lock, + (table->s->tmp_table == NO_TMP_TABLE ? + MYSQL_LOCK_NOT_TEMPORARY : 0)); + + thd->pop_internal_handler(); + + /* + In 5.1 and earlier, mysql_lock_tables() could replace the TABLE + object with another one (reopen it). This is no longer the case + with new MDL. + */ + DBUG_ASSERT(table == thd->open_tables); + /* Restore previous context. */ + thd->set_open_tables(backup_open_tables); + + if (sql_handler_lock_error.need_reopen()) + { + DBUG_ASSERT(lock_error && !thd->is_error()); + /* + Always close statement transaction explicitly, + so that the engine doesn't have to count locks. + There should be no need to perform transaction + rollback due to deadlock. + */ + DBUG_ASSERT(! thd->transaction_rollback_request); + trans_rollback_stmt(thd); + mysql_ha_close_table(handler); + if (thd->stmt_arena->is_stmt_execute()) + { + /* + As we have already sent field list and types to the client, we can't + handle any changes in the table format for prepared statements. + Better to force a reprepare. + */ + my_error(ER_NEED_REPREPARE, MYF(0)); + goto err0; + } + goto retry; + } + + if (unlikely(lock_error)) + goto err0; // mysql_lock_tables() printed error message already + } + + if (mysql_ha_fix_cond_and_key(handler, mode, keyname, key_expr, + ha_rkey_mode, cond, 0)) + goto err; + mode= handler->mode; + keyno= handler->keyno; + + protocol->send_result_set_metadata(&handler->fields, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); + + /* + In ::external_lock InnoDB resets the fields which tell it that + the handle is used in the HANDLER interface. Tell it again that + we are using it for HANDLER. + */ + + table->file->init_table_handle_for_HANDLER(); + + for (num_rows=0; num_rows < select_limit_cnt; ) + { + switch (mode) { + case RNEXT: + if (table->file->inited != handler::NONE) + { + if ((error= table->file->can_continue_handler_scan())) + break; + if (keyname) + { + /* Check if we read from the same index. */ + DBUG_ASSERT((uint) keyno == table->file->get_index()); + error= table->file->ha_index_next(table->record[0]); + } + else + error= table->file->ha_rnd_next(table->record[0]); + break; + } + /* else fall through */ + case RFIRST: + if (keyname) + { + if (likely(!(error= table->file->ha_index_or_rnd_end())) && + likely(!(error= table->file->ha_index_init(keyno, 1)))) + error= table->file->ha_index_first(table->record[0]); + } + else + { + if (likely(!(error= table->file->ha_index_or_rnd_end())) && + likely(!(error= table->file->ha_rnd_init(1)))) + error= table->file->ha_rnd_next(table->record[0]); + } + mode= RNEXT; + break; + case RPREV: + DBUG_ASSERT(keyname != 0); + /* Check if we read from the same index. */ + DBUG_ASSERT((uint) keyno == table->file->get_index()); + if (table->file->inited != handler::NONE) + { + if ((error= table->file->can_continue_handler_scan())) + break; + error= table->file->ha_index_prev(table->record[0]); + break; + } + /* else fall through */ + case RLAST: + DBUG_ASSERT(keyname != 0); + if (likely(!(error= table->file->ha_index_or_rnd_end())) && + likely(!(error= table->file->ha_index_init(keyno, 1)))) + error= table->file->ha_index_last(table->record[0]); + mode=RPREV; + break; + case RNEXT_SAME: + /* Continue scan on "(keypart1,keypart2,...)=(c1, c2, ...) */ + DBUG_ASSERT(keyname != 0); + error= table->file->ha_index_next_same(table->record[0], key, + handler->key_len); + break; + case RKEY: + { + DBUG_ASSERT(keyname != 0); + + if (unlikely(!(key= (uchar*) thd->calloc(ALIGN_SIZE(handler->key_len))))) + goto err; + if (unlikely((error= table->file->ha_index_or_rnd_end()))) + break; + key_copy(key, table->record[0], table->key_info + keyno, + handler->key_len); + if (unlikely(!(error= table->file->ha_index_init(keyno, 1)))) + error= table->file->ha_index_read_map(table->record[0], + key, handler->keypart_map, + ha_rkey_mode); + mode= rkey_to_rnext[(int)ha_rkey_mode]; + break; + } + default: + my_error(ER_ILLEGAL_HA, MYF(0), table->file->table_type(), + table->s->db.str, table->s->table_name.str); + goto err; + } + + if (unlikely(error)) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + /* Don't give error in the log file for some expected problems */ + if (error != HA_ERR_RECORD_CHANGED && error != HA_ERR_WRONG_COMMAND) + sql_print_error("mysql_ha_read: Got error %d when reading " + "table '%s'", + error, tables->table_name.str); + table->file->print_error(error,MYF(0)); + table->file->ha_index_or_rnd_end(); + goto err; + } + goto ok; + } + if (cond && !cond->val_int()) + { + if (thd->is_error()) + goto err; + continue; + } + if (num_rows >= offset_limit_cnt) + { + protocol->prepare_for_resend(); + + if (protocol->send_result_set_row(&handler->fields)) + goto err; + + protocol->write(); + } + num_rows++; + } +ok: + /* + Always close statement transaction explicitly, + so that the engine doesn't have to count locks. + */ + trans_commit_stmt(thd); + mysql_unlock_tables(thd, handler->lock, 0); + my_eof(thd); + DBUG_PRINT("exit",("OK")); + DBUG_RETURN(FALSE); + +err: + trans_rollback_stmt(thd); + mysql_unlock_tables(thd, handler->lock, 0); +err0: + DBUG_PRINT("exit",("ERROR")); + DBUG_RETURN(TRUE); +} + + +/** + Prepare for handler read + + For parameters, see mysql_ha_read() +*/ + +SQL_HANDLER *mysql_ha_read_prepare(THD *thd, TABLE_LIST *tables, + enum enum_ha_read_modes mode, + const char *keyname, + List *key_expr, + enum ha_rkey_function ha_rkey_mode, + Item *cond) +{ + SQL_HANDLER *handler; + DBUG_ENTER("mysql_ha_read_prepare"); + if (!(handler= mysql_ha_find_handler(thd, &tables->alias))) + DBUG_RETURN(0); + tables->table= handler->table; // This is used by fix_fields + handler->table->pos_in_table_list= tables; + if (mysql_ha_fix_cond_and_key(handler, mode, keyname, key_expr, + ha_rkey_mode, cond, 1)) + DBUG_RETURN(0); + DBUG_RETURN(handler); +} + + + +/** + Scan the handler tables hash for matching tables. + + @param thd Thread identifier. + @param tables The list of tables to remove. + + @return Pointer to head of linked list (TABLE_LIST::next_local) of matching + TABLE_LIST elements from handler_tables_hash. Otherwise, NULL if no + table was matched. +*/ + +static SQL_HANDLER *mysql_ha_find_match(THD *thd, TABLE_LIST *tables) +{ + SQL_HANDLER *hash_tables, *head= NULL; + TABLE_LIST *first= tables; + DBUG_ENTER("mysql_ha_find_match"); + + /* search for all handlers with matching table names */ + for (uint i= 0; i < thd->handler_tables_hash.records; i++) + { + hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i); + + for (tables= first; tables; tables= tables->next_local) + { + if (tables->is_anonymous_derived_table()) + continue; + if ((! tables->db.str[0] || + ! my_strcasecmp(&my_charset_latin1, hash_tables->db.str, + tables->get_db_name())) && + ! my_strcasecmp(&my_charset_latin1, hash_tables->table_name.str, + tables->get_table_name())) + { + /* Link into hash_tables list */ + hash_tables->next= head; + head= hash_tables; + break; + } + } + } + DBUG_RETURN(head); +} + + +/** + Remove matching tables from the HANDLER's hash table. + + @param thd Thread identifier. + @param tables The list of tables to remove. + + @note Broadcasts refresh if it closed a table with old version. +*/ + +void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables) +{ + SQL_HANDLER *hash_tables, *next; + DBUG_ENTER("mysql_ha_rm_tables"); + + DBUG_ASSERT(tables); + + hash_tables= mysql_ha_find_match(thd, tables); + + while (hash_tables) + { + next= hash_tables->next; + if (hash_tables->table) + mysql_ha_close_table(hash_tables); + my_hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables); + hash_tables= next; + } + + /* + Mark MDL_context as no longer breaking protocol if we have + closed last HANDLER. + */ + if (! thd->handler_tables_hash.records) + thd->mdl_context.set_needs_thr_lock_abort(FALSE); + + DBUG_VOID_RETURN; +} + + +/** + Close cursors of matching tables from the HANDLER's hash table. + + @param thd Thread identifier. + @param tables The list of tables to flush. +*/ + +void mysql_ha_flush_tables(THD *thd, TABLE_LIST *all_tables) +{ + DBUG_ENTER("mysql_ha_flush_tables"); + + for (TABLE_LIST *table_list= all_tables; table_list; + table_list= table_list->next_global) + { + SQL_HANDLER *hash_tables= mysql_ha_find_match(thd, table_list); + /* Close all aliases of the same table. */ + while (hash_tables) + { + SQL_HANDLER *next_local= hash_tables->next; + if (hash_tables->table) + mysql_ha_close_table(hash_tables); + hash_tables= next_local; + } + } + + DBUG_VOID_RETURN; +} + + +/** + Flush (close and mark for re-open) all tables that should be should + be reopen. + + @param thd Thread identifier. + + @note Broadcasts refresh if it closed a table with old version. +*/ + +void mysql_ha_flush(THD *thd) +{ + SQL_HANDLER *hash_tables; + DBUG_ENTER("mysql_ha_flush"); + + /* + Don't try to flush open HANDLERs when we're working with + system tables. The main MDL context is backed up and we can't + properly release HANDLER locks stored there. + */ + if (thd->state_flags & Open_tables_state::BACKUPS_AVAIL) + DBUG_VOID_RETURN; + + for (uint i= 0; i < thd->handler_tables_hash.records; i++) + { + hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i); + /* + TABLE::mdl_ticket is 0 for temporary tables so we need extra check. + */ + if (hash_tables->table && + ((hash_tables->table->mdl_ticket && + hash_tables->table->mdl_ticket->has_pending_conflicting_lock()) || + (!hash_tables->table->s->tmp_table && + hash_tables->table->s->tdc->flushed))) + mysql_ha_close_table(hash_tables); + } + + DBUG_VOID_RETURN; +} + + +/** + Close all HANDLER's tables. + + @param thd Thread identifier. + + @note Broadcasts refresh if it closed a table with old version. +*/ + +void mysql_ha_cleanup_no_free(THD *thd) +{ + SQL_HANDLER *hash_tables; + DBUG_ENTER("mysql_ha_cleanup_no_free"); + + for (uint i= 0; i < thd->handler_tables_hash.records; i++) + { + hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i); + if (hash_tables->table) + mysql_ha_close_table(hash_tables); + } + DBUG_VOID_RETURN; +} + + +void mysql_ha_cleanup(THD *thd) +{ + DBUG_ENTER("mysql_ha_cleanup"); + mysql_ha_cleanup_no_free(thd); + my_hash_free(&thd->handler_tables_hash); + DBUG_VOID_RETURN; +} + + +/** + Set explicit duration for metadata locks corresponding to open HANDLERs + to protect them from being released at the end of transaction. + + @param thd Thread identifier. +*/ + +void mysql_ha_set_explicit_lock_duration(THD *thd) +{ + SQL_HANDLER *hash_tables; + DBUG_ENTER("mysql_ha_set_explicit_lock_duration"); + + for (uint i= 0; i < thd->handler_tables_hash.records; i++) + { + hash_tables= (SQL_HANDLER*) my_hash_element(&thd->handler_tables_hash, i); + if (hash_tables->table && hash_tables->table->mdl_ticket) + thd->mdl_context.set_lock_duration(hash_tables->table->mdl_ticket, + MDL_EXPLICIT); + } + DBUG_VOID_RETURN; +} + + +/** + Remove temporary tables from the HANDLER's hash table. The reason + for having a separate function, rather than calling + mysql_ha_rm_tables() is that it is not always feasible (e.g. in + THD::close_temporary_tables) to obtain a TABLE_LIST containing the + temporary tables. + + @See THD::close_temporary_tables() + @param thd Thread identifier. +*/ +void mysql_ha_rm_temporary_tables(THD *thd) +{ + DBUG_ENTER("mysql_ha_rm_temporary_tables"); + + TABLE_LIST *tmp_handler_tables= NULL; + for (uint i= 0; i < thd->handler_tables_hash.records; i++) + { + TABLE_LIST *handler_table= reinterpret_cast + (my_hash_element(&thd->handler_tables_hash, i)); + + if (handler_table->table && handler_table->table->s->tmp_table) + { + handler_table->next_local= tmp_handler_tables; + tmp_handler_tables= handler_table; + } + } + + if (tmp_handler_tables) + mysql_ha_rm_tables(thd, tmp_handler_tables); + + DBUG_VOID_RETURN; +} diff --git a/sql/sql_handler.h b/sql/sql_handler.h new file mode 100644 index 00000000..4ac0d09e --- /dev/null +++ b/sql/sql_handler.h @@ -0,0 +1,86 @@ +#ifndef SQL_HANDLER_INCLUDED +#define SQL_HANDLER_INCLUDED +/* Copyright (c) 2006, 2015, Oracle and/or its affiliates. + Copyright (C) 2010, 2015, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_class.h" /* enum_ha_read_mode */ +#include "my_base.h" /* ha_rkey_function, ha_rows */ +#include "sql_list.h" /* List */ + +/* Open handlers are stored here */ + +class SQL_HANDLER { +public: + TABLE *table; + List fields; /* Fields, set on open */ + THD *thd; + LEX_CSTRING handler_name; + LEX_CSTRING db; + LEX_CSTRING table_name; + MEM_ROOT mem_root; + MYSQL_LOCK *lock; + MDL_request mdl_request; + + key_part_map keypart_map; + int keyno; /* Used key */ + uint key_len; + enum enum_ha_read_modes mode; + + /* This is only used when deleting many handler objects */ + SQL_HANDLER *next; + + Query_arena arena; + char *base_data; + SQL_HANDLER(THD *thd_arg) : + thd(thd_arg), arena(&mem_root, Query_arena::STMT_INITIALIZED) + { init(); clear_alloc_root(&mem_root); base_data= 0; } + void init() + { + keyno= -1; + table= 0; + lock= 0; + mdl_request.ticket= 0; + } + void reset(); + + ~SQL_HANDLER(); +}; + +class THD; +struct TABLE_LIST; + +bool mysql_ha_open(THD *thd, TABLE_LIST *tables, SQL_HANDLER *reopen); +bool mysql_ha_close(THD *thd, TABLE_LIST *tables); +bool mysql_ha_read(THD *, TABLE_LIST *,enum enum_ha_read_modes, const char *, + List *,enum ha_rkey_function,Item *,ha_rows,ha_rows); +void mysql_ha_flush(THD *thd); +void mysql_ha_flush_tables(THD *thd, TABLE_LIST *all_tables); +void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables); +void mysql_ha_cleanup_no_free(THD *thd); +void mysql_ha_cleanup(THD *thd); +void mysql_ha_set_explicit_lock_duration(THD *thd); +void mysql_ha_rm_temporary_tables(THD *thd); + +SQL_HANDLER *mysql_ha_read_prepare(THD *thd, TABLE_LIST *tables, + enum enum_ha_read_modes mode, + const char *keyname, + List *key_expr, enum ha_rkey_function ha_rkey_mode, + Item *cond); +#endif diff --git a/sql/sql_help.cc b/sql/sql_help.cc new file mode 100644 index 00000000..f9932f11 --- /dev/null +++ b/sql/sql_help.cc @@ -0,0 +1,1088 @@ +/* Copyright (c) 2002, 2012, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_help.h" +#include "sql_table.h" // primary_key_name +#include "sql_base.h" // REPORT_ALL_ERRORS, setup_tables +#include "opt_range.h" // SQL_SELECT +#include "records.h" // init_read_record, end_read_record + +struct st_find_field +{ + const char *table_name, *field_name; + Field *field; +}; + +/* Used fields */ + +static struct st_find_field init_used_fields[]= +{ + { "help_topic", "help_topic_id", 0}, + { "help_topic", "name", 0}, + { "help_topic", "help_category_id", 0}, + { "help_topic", "description", 0}, + { "help_topic", "example", 0}, + + { "help_category", "help_category_id", 0}, + { "help_category", "parent_category_id", 0}, + { "help_category", "name", 0}, + + { "help_keyword", "help_keyword_id", 0}, + { "help_keyword", "name", 0}, + + { "help_relation", "help_topic_id", 0}, + { "help_relation", "help_keyword_id", 0} +}; + +enum enum_used_fields +{ + help_topic_help_topic_id= 0, + help_topic_name, + help_topic_help_category_id, + help_topic_description, + help_topic_example, + + help_category_help_category_id, + help_category_parent_category_id, + help_category_name, + + help_keyword_help_keyword_id, + help_keyword_name, + + help_relation_help_topic_id, + help_relation_help_keyword_id +}; + + +/* + Fill st_find_field structure with pointers to fields + + SYNOPSIS + init_fields() + thd Thread handler + tables list of all tables for fields + find_fields array of structures + count size of previous array + + RETURN VALUES + 0 all ok + 1 one of the fileds was not found +*/ + +static bool init_fields(THD *thd, TABLE_LIST *tables, + struct st_find_field *find_fields, uint count) +{ + Name_resolution_context *context= &thd->lex->first_select_lex()->context; + DBUG_ENTER("init_fields"); + context->resolve_in_table_list_only(tables); + for (; count-- ; find_fields++) + { + /* We have to use 'new' here as field will be re_linked on free */ + Item_field *field= (new (thd->mem_root) + Item_field(thd, context, + {STRING_WITH_LEN("mysql")}, + Lex_cstring_strlen(find_fields->table_name), + Lex_cstring_strlen(find_fields->field_name))); + if (!(find_fields->field= find_field_in_tables(thd, field, tables, NULL, + ignored_tables_list_t(NULL), + 0, REPORT_ALL_ERRORS, 1, + TRUE))) + DBUG_RETURN(1); + bitmap_set_bit(find_fields->field->table->read_set, + find_fields->field->field_index); + /* To make life easier when setting values in keys */ + bitmap_set_bit(find_fields->field->table->write_set, + find_fields->field->field_index); + } + DBUG_RETURN(0); +} + + +/* + Returns variants of found topic for help (if it is just single topic, + returns description and example, or else returns only names..) + + SYNOPSIS + memorize_variant_topic() + + thd Thread handler + topics Table of topics + count number of alredy found topics + find_fields Filled array of information for work with fields + + RETURN VALUES + names array of names of found topics (out) + + name name of found topic (out) + description description of found topic (out) + example example for found topic (out) + + NOTE + Field 'names' is set only if more than one topic is found. + Fields 'name', 'description', 'example' are set only if + found exactly one topic. +*/ + +void memorize_variant_topic(THD *thd, TABLE *topics, int count, + struct st_find_field *find_fields, + List *names, + String *name, String *description, String *example) +{ + DBUG_ENTER("memorize_variant_topic"); + MEM_ROOT *mem_root= thd->mem_root; + if (count==0) + { + get_field(mem_root,find_fields[help_topic_name].field, name); + get_field(mem_root,find_fields[help_topic_description].field, description); + get_field(mem_root,find_fields[help_topic_example].field, example); + } + else + { + if (count == 1) + names->push_back(name, thd->mem_root); + String *new_name= new (thd->mem_root) String; + get_field(mem_root,find_fields[help_topic_name].field,new_name); + names->push_back(new_name, thd->mem_root); + } + DBUG_VOID_RETURN; +} + +/* + Look for topics by mask + + SYNOPSIS + search_topics() + thd Thread handler + topics Table of topics + find_fields Filled array of info for fields + select Function to test for matching help topic. + Normally 'help_topic.name like 'bit%' + + RETURN VALUES + # number of topics found + + names array of names of found topics (out) + name name of found topic (out) + description description of found topic (out) + example example for found topic (out) + + NOTE + Field 'names' is set only if more than one topic was found. + Fields 'name', 'description', 'example' are set only if + exactly one topic was found. + +*/ + +int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields, + SQL_SELECT *select, List *names, + String *name, String *description, String *example) +{ + int count= 0; + READ_RECORD read_record_info; + DBUG_ENTER("search_topics"); + + /* Should never happen. As this is part of help, we can ignore this */ + if (init_read_record(&read_record_info, thd, topics, select, NULL, 1, 0, + FALSE)) + DBUG_RETURN(0); + + while (!read_record_info.read_record()) + { + if (!select->cond->val_int()) // Doesn't match like + continue; + memorize_variant_topic(thd,topics,count,find_fields, + names,name,description,example); + count++; + } + end_read_record(&read_record_info); + + DBUG_RETURN(count); +} + +/* + Look for keyword by mask + + SYNOPSIS + search_keyword() + thd Thread handler + keywords Table of keywords + find_fields Filled array of info for fields + select Function to test for matching keyword. + Normally 'help_keyword.name like 'bit%' + + key_id help_keyword_if of found topics (out) + + RETURN VALUES + 0 didn't find any topics matching the mask + 1 found exactly one topic matching the mask + 2 found more then one topic matching the mask +*/ + +int search_keyword(THD *thd, TABLE *keywords, + struct st_find_field *find_fields, + SQL_SELECT *select, int *key_id) +{ + int count= 0; + READ_RECORD read_record_info; + DBUG_ENTER("search_keyword"); + /* Should never happen. As this is part of help, we can ignore this */ + if (init_read_record(&read_record_info, thd, keywords, select, NULL, 1, 0, + FALSE)) + DBUG_RETURN(0); + + while (!read_record_info.read_record() && count<2) + { + if (!select->cond->val_int()) // Dosn't match like + continue; + + *key_id= (int)find_fields[help_keyword_help_keyword_id].field->val_int(); + + count++; + } + end_read_record(&read_record_info); + + DBUG_RETURN(count); +} + +/* + Look for all topics with keyword + + SYNOPSIS + get_topics_for_keyword() + thd Thread handler + topics Table of topics + relations Table of m:m relation "topic/keyword" + find_fields Filled array of info for fields + key_id Primary index to use to find for keyword + + RETURN VALUES + # number of topics found + + names array of name of found topics (out) + + name name of found topic (out) + description description of found topic (out) + example example for found topic (out) + + NOTE + Field 'names' is set only if more than one topic was found. + Fields 'name', 'description', 'example' are set only if + exactly one topic was found. +*/ + +int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations, + struct st_find_field *find_fields, int16 key_id, + List *names, + String *name, String *description, String *example) +{ + uchar buff[8]; // Max int length + int count= 0; + int iindex_topic, iindex_relations; + Field *rtopic_id, *rkey_id; + DBUG_ENTER("get_topics_for_keyword"); + + if ((iindex_topic= + find_type(primary_key_name.str, &topics->s->keynames, + FIND_TYPE_NO_PREFIX) - 1) < 0 || + (iindex_relations= + find_type(primary_key_name.str, &relations->s->keynames, + FIND_TYPE_NO_PREFIX) - 1) < 0) + { + my_message(ER_CORRUPT_HELP_DB, ER_THD(thd, ER_CORRUPT_HELP_DB), MYF(0)); + DBUG_RETURN(-1); + } + rtopic_id= find_fields[help_relation_help_topic_id].field; + rkey_id= find_fields[help_relation_help_keyword_id].field; + + if (topics->file->ha_index_init(iindex_topic,1) || + relations->file->ha_index_init(iindex_relations,1)) + { + if (topics->file->inited) + topics->file->ha_index_end(); + my_message(ER_CORRUPT_HELP_DB, ER_THD(thd, ER_CORRUPT_HELP_DB), MYF(0)); + DBUG_RETURN(-1); + } + + rkey_id->store((longlong) key_id, TRUE); + rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW); + int key_res= relations->file->ha_index_read_map(relations->record[0], + buff, (key_part_map) 1, + HA_READ_KEY_EXACT); + + for ( ; + !key_res && key_id == (int16) rkey_id->val_int() ; + key_res= relations->file->ha_index_next(relations->record[0])) + { + uchar topic_id_buff[8]; + longlong topic_id= rtopic_id->val_int(); + Field *field= find_fields[help_topic_help_topic_id].field; + field->store((longlong) topic_id, TRUE); + field->get_key_image(topic_id_buff, field->pack_length(), Field::itRAW); + + if (!topics->file->ha_index_read_map(topics->record[0], topic_id_buff, + (key_part_map)1, HA_READ_KEY_EXACT)) + { + memorize_variant_topic(thd,topics,count,find_fields, + names,name,description,example); + count++; + } + } + topics->file->ha_index_end(); + relations->file->ha_index_end(); + DBUG_RETURN(count); +} + +/* + Look for categories by mask + + SYNOPSIS + search_categories() + thd THD for init_read_record + categories Table of categories + find_fields Filled array of info for fields + select Function to test for if matching help topic. + Normally 'help_vategory.name like 'bit%' + names List of found categories names (out) + res_id Primary index of found category (only if + found exactly one category) + + RETURN VALUES + # Number of categories found +*/ + +int search_categories(THD *thd, TABLE *categories, + struct st_find_field *find_fields, + SQL_SELECT *select, List *names, int16 *res_id) +{ + Field *pfname= find_fields[help_category_name].field; + Field *pcat_id= find_fields[help_category_help_category_id].field; + int count= 0; + READ_RECORD read_record_info; + DBUG_ENTER("search_categories"); + + /* Should never happen. As this is part of help, we can ignore this */ + if (init_read_record(&read_record_info, thd, categories, select, NULL, + 1, 0, FALSE)) + DBUG_RETURN(0); + while (!read_record_info.read_record()) + { + if (select && !select->cond->val_int()) + continue; + String *lname= new (thd->mem_root) String; + get_field(thd->mem_root,pfname,lname); + if (++count == 1 && res_id) + *res_id= (int16) pcat_id->val_int(); + names->push_back(lname, thd->mem_root); + } + end_read_record(&read_record_info); + + DBUG_RETURN(count); +} + +/* + Look for all topics or subcategories of category + + SYNOPSIS + get_all_items_for_category() + thd Thread handler + items Table of items + pfname Field "name" in items + select "where" part of query.. + res list of finded names +*/ + +void get_all_items_for_category(THD *thd, TABLE *items, Field *pfname, + SQL_SELECT *select, List *res) +{ + READ_RECORD read_record_info; + DBUG_ENTER("get_all_items_for_category"); + + /* Should never happen. As this is part of help, we can ignore this */ + if (init_read_record(&read_record_info, thd, items, select, NULL, 1, 0, + FALSE)) + DBUG_VOID_RETURN; + + while (!read_record_info.read_record()) + { + if (!select->cond->val_int()) + continue; + String *name= new (thd->mem_root) String(); + get_field(thd->mem_root,pfname,name); + res->push_back(name, thd->mem_root); + } + end_read_record(&read_record_info); + + DBUG_VOID_RETURN; +} + + +/** + Collect field names of HELP header that will be sent to a client + + @param thd Thread data object + @param[out] field_list List of fields whose metadata should be collected for + sending to client +*/ + +static void fill_answer_1_fields(THD *thd, List *field_list) +{ + MEM_ROOT *mem_root= thd->mem_root; + + field_list->push_back(new (mem_root) Item_empty_string(thd, "name", 64), + mem_root); + field_list->push_back(new (mem_root) Item_empty_string(thd, "description", + 1000), + mem_root); + field_list->push_back(new (mem_root) Item_empty_string(thd, "example", 1000), + mem_root); +} + + +/** + Send metadata of an answer on help request to a client + + @param protocol protocol for sending +*/ + +static bool send_answer_1_metadata(Protocol *protocol) +{ + List field_list; + + fill_answer_1_fields(protocol->thd, &field_list); + return protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF); +} + + +/* + Send to client answer for help request + + SYNOPSIS + send_answer_1() + protocol - protocol for sending + s1 - value of column "Name" + s2 - value of column "Description" + s3 - value of column "Example" + + IMPLEMENTATION + Format used: + +----------+------------+------------+ + |name |description |example | + +----------+------------+------------+ + |String(64)|String(1000)|String(1000)| + +----------+------------+------------+ + with exactly one row! + + RETURN VALUES + 1 Writing of head failed + -1 Writing of row failed + 0 Successeful send +*/ + +static int send_answer_1(Protocol *protocol, String *s1, String *s2, String *s3) +{ + DBUG_ENTER("send_answer_1"); + + if (send_answer_1_metadata(protocol)) + DBUG_RETURN(1); + + protocol->prepare_for_resend(); + protocol->store(s1); + protocol->store(s2); + protocol->store(s3); + if (protocol->write()) + DBUG_RETURN(-1); + DBUG_RETURN(0); +} + + +/** + Collect field names of HELP header that will be sent to a client + + @param thd Thread data object + @param[out] field_list List of fields whose metadata should be collected for + sending to client + @param for_category need column 'source_category_name' +*/ + +static void fill_header_2_fields(THD *thd, List *field_list, + bool for_category) +{ + MEM_ROOT *mem_root= thd->mem_root; + if (for_category) + field_list->push_back(new (mem_root) + Item_empty_string(thd, "source_category_name", 64), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "name", 64), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "is_it_category", 1), + mem_root); +} + + +/* + Send to client help header + + SYNOPSIS + send_header_2() + protocol - protocol for sending + for_category - need column 'source_category_name' + + IMPLEMENTATION + +- -+ + |+-------------------- | +----------+--------------+ + ||source_category_name | |name |is_it_category| + |+-------------------- | +----------+--------------+ + ||String(64) | |String(64)|String(1) | + |+-------------------- | +----------+--------------+ + +- -+ + + RETURN VALUES + result of protocol->send_result_set_metadata +*/ + +static int send_header_2(Protocol *protocol, bool for_category) +{ + DBUG_ENTER("send_header_2"); + List field_list; + + fill_header_2_fields(protocol->thd, &field_list, for_category); + DBUG_RETURN(protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)); +} + +/* + strcmp for using in qsort + + SYNOPSIS + strptrcmp() + ptr1 (const void*)&str1 + ptr2 (const void*)&str2 + + RETURN VALUES + same as strcmp +*/ + +extern "C" int string_ptr_cmp(const void* ptr1, const void* ptr2) +{ + String *str1= *(String**)ptr1; + String *str2= *(String**)ptr2; + uint length1= str1->length(); + uint length2= str2->length(); + int tmp= memcmp(str1->ptr(),str2->ptr(), MY_MIN(length1, length2)); + if (tmp) + return tmp; + return (int) length2 - (int) length1; +} + +/* + Send to client rows in format: + column1 : + column2 : + + SYNOPSIS + send_variant_2_list() + protocol Protocol for sending + names List of names + cat Value of the column + source_name name of category for all items.. + + RETURN VALUES + -1 Writing fail + 0 Data was successefully send +*/ + +int send_variant_2_list(MEM_ROOT *mem_root, Protocol *protocol, + List *names, + const char *cat, String *source_name) +{ + DBUG_ENTER("send_variant_2_list"); + + String **pointers= (String**)alloc_root(mem_root, + sizeof(String*)*names->elements); + String **pos; + String **end= pointers + names->elements; + + List_iterator it(*names); + for (pos= pointers; pos!=end; (*pos++= it++)) + ; + + my_qsort(pointers,names->elements,sizeof(String*),string_ptr_cmp); + + for (pos= pointers; pos!=end; pos++) + { + protocol->prepare_for_resend(); + if (source_name) + protocol->store(source_name); + protocol->store(*pos); + protocol->store(cat,1,&my_charset_latin1); + if (protocol->write()) + DBUG_RETURN(-1); + } + + DBUG_RETURN(0); +} + +/* + Prepare simple SQL_SELECT table.* WHERE + + SYNOPSIS + prepare_simple_select() + thd Thread handler + cond WHERE part of select + table goal table + + error code of error (out) + + RETURN VALUES + # created SQL_SELECT +*/ + +SQL_SELECT *prepare_simple_select(THD *thd, Item *cond, + TABLE *table, int *error) +{ + cond->fix_fields_if_needed(thd, &cond); // can never fail + + /* Assume that no indexes cover all required fields */ + table->covering_keys.clear_all(); + + SQL_SELECT *res= make_select(table, 0, 0, cond, 0, 0, error); + if (unlikely(*error) || + (likely(res) && unlikely(res->check_quick(thd, 0, HA_POS_ERROR))) || + (likely(res) && res->quick && unlikely(res->quick->reset()))) + { + delete res; + res=0; + } + return res; +} + +/* + Prepare simple SQL_SELECT table.* WHERE table.name LIKE mask + + SYNOPSIS + prepare_select_for_name() + thd Thread handler + mask mask for compare with name + mlen length of mask + table goal table + pfname field "name" in table + + error code of error (out) + + RETURN VALUES + # created SQL_SELECT +*/ + +SQL_SELECT *prepare_select_for_name(THD *thd, const char *mask, size_t mlen, + TABLE *table, Field *pfname, int *error) +{ + MEM_ROOT *mem_root= thd->mem_root; + Item *cond= new (mem_root) + Item_func_like(thd, + new (mem_root) + Item_field(thd, pfname), + new (mem_root) Item_string(thd, mask, (uint)mlen, + pfname->charset()), + new (mem_root) Item_string_ascii(thd, "\\"), + FALSE); + if (unlikely(thd->is_fatal_error)) + return 0; // OOM + return prepare_simple_select(thd, cond, table, error); +} + + +/** + Initialize the TABLE_LIST with tables used in HELP statement handling. + + @param thd Thread handler + @param tables Array of four TABLE_LIST objects to initialize with data + about the tables help_topic, help_category, help_relation, + help_keyword +*/ + +static void initialize_tables_for_help_command(THD *thd, TABLE_LIST *tables) +{ + LEX_CSTRING MYSQL_HELP_TOPIC_NAME= {STRING_WITH_LEN("help_topic") }; + LEX_CSTRING MYSQL_HELP_CATEGORY_NAME= {STRING_WITH_LEN("help_category") }; + LEX_CSTRING MYSQL_HELP_RELATION_NAME= {STRING_WITH_LEN("help_relation") }; + LEX_CSTRING MYSQL_HELP_KEYWORD_NAME= {STRING_WITH_LEN("help_keyword") }; + + tables[0].init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_HELP_TOPIC_NAME, 0, + TL_READ); + tables[1].init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_HELP_CATEGORY_NAME, 0, + TL_READ); + tables[2].init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_HELP_RELATION_NAME, 0, + TL_READ); + tables[3].init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_HELP_KEYWORD_NAME, 0, + TL_READ); + tables[0].next_global= tables[0].next_local= + tables[0].next_name_resolution_table= &tables[1]; + tables[1].next_global= tables[1].next_local= + tables[1].next_name_resolution_table= &tables[2]; + tables[2].next_global= tables[2].next_local= + tables[2].next_name_resolution_table= &tables[3]; +} + + +/** + Setup tables and fields for query. + + @param thd Thread handler + @param first_select_lex SELECT_LEX of the parsed statement + @param tables Array of tables used in handling of the HELP + statement + @param used_fields Array of fields used in handling of the HELP + statement + + @return false on success, else true. +*/ + +template +static bool init_items_for_help_command(THD *thd, + SELECT_LEX *first_select_lex, + TABLE_LIST (&tables)[M], + st_find_field (& used_fields)[N]) +{ + List leaves; + + /* + Initialize tables and fields to be usable from items. + tables do not contain VIEWs => we can pass 0 as conds + */ + first_select_lex->context.table_list= + first_select_lex->context.first_name_resolution_table= + &tables[0]; + + if (setup_tables(thd, &first_select_lex->context, + &first_select_lex->top_join_list, + &tables[0], leaves, false, false)) + return true; + + memcpy((char*) used_fields, (char*) init_used_fields, + sizeof(used_fields[0]) * N); + if (init_fields(thd, &tables[0], used_fields, N)) + return true; + + for (size_t i= 0; i < M; i++) + tables[i].table->file->init_table_handle_for_HANDLER(); + + return false; +} + + +/** + Prepare (in the sense of prepared statement) the HELP statement. + + @param thd Thread handler + @param mask string value passed to the HELP statement + @oaram[out] fields fields for result set metadata + + @return false on success, else true. +*/ + +bool mysqld_help_prepare(THD *thd, const char *mask, List *fields) +{ + TABLE_LIST tables[4]; + st_find_field used_fields[array_elements(init_used_fields)]; + SQL_SELECT *select; + + List topics_list; + + Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH); + initialize_tables_for_help_command(thd, tables); + + /* + HELP must be available under LOCK TABLES. + Reset and backup the current open tables state to + make it possible. + */ + start_new_trans new_trans(thd); + + if (open_system_tables_for_read(thd, tables)) + return true; + + auto cleanup_and_return= [&](bool ret) + { + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + return ret; + }; + + if (init_items_for_help_command(thd, thd->lex->first_select_lex(), + tables, used_fields)) + return cleanup_and_return(false); + + size_t mlen= strlen(mask); + int error; + + /* + Prepare the query 'SELECT * FROM help_topic WHERE name LIKE mask' + for execution + */ + if (!(select= + prepare_select_for_name(thd,mask, mlen, tables[0].table, + used_fields[help_topic_name].field, &error))) + return cleanup_and_return(true); + + String name, description, example; + /* + Run the query 'SELECT * FROM help_topic WHERE name LIKE mask' + */ + int count_topics= search_topics(thd, tables[0].table, used_fields, + select, &topics_list, + &name, &description, &example); + delete select; + + if (thd->is_error()) + return cleanup_and_return(true); + + if (count_topics == 0) + { + int UNINIT_VAR(key_id); + /* + Prepare the query 'SELECT * FROM help_keyword WHERE name LIKE mask' + for execution + */ + if (!(select= + prepare_select_for_name(thd, mask, mlen, tables[3].table, + used_fields[help_keyword_name].field, + &error))) + return cleanup_and_return(true); + + /* + Run the query 'SELECT * FROM help_keyword WHERE name LIKE mask' + */ + count_topics= search_keyword(thd,tables[3].table, used_fields, select, + &key_id); + delete select; + count_topics= (count_topics != 1) ? 0 : + get_topics_for_keyword(thd, tables[0].table, tables[2].table, + used_fields, key_id, &topics_list, &name, + &description, &example); + + } + + if (count_topics == 0) + { + if (!(select= + prepare_select_for_name(thd, mask, mlen, tables[1].table, + used_fields[help_category_name].field, + &error))) + return cleanup_and_return(true); + + List categories_list; + int16 category_id; + int count_categories= search_categories(thd, tables[1].table, used_fields, + select, + &categories_list,&category_id); + delete select; + if (count_categories == 1) + fill_header_2_fields(thd, fields, true); + else + fill_header_2_fields(thd, fields, false); + } + else if (count_topics == 1) + fill_answer_1_fields(thd, fields); + else + fill_header_2_fields(thd, fields, false); + + return cleanup_and_return(false); +} + + +/* + Server-side function 'help' + + SYNOPSIS + mysqld_help() + thd Thread handler + + RETURN VALUES + FALSE Success + TRUE Error and send_error already committed +*/ + +static bool mysqld_help_internal(THD *thd, const char *mask) +{ + Protocol *protocol= thd->protocol; + SQL_SELECT *select; + st_find_field used_fields[array_elements(init_used_fields)]; + TABLE_LIST tables[4]; + List topics_list, categories_list, subcategories_list; + String name, description, example; + int count_topics, count_categories, error; + size_t mlen= strlen(mask); + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("mysqld_help"); + + initialize_tables_for_help_command(thd, tables); + + /* + HELP must be available under LOCK TABLES. + Reset and backup the current open tables state to + make it possible. + */ + start_new_trans new_trans(thd); + + if (open_system_tables_for_read(thd, tables)) + goto error2; + + if (init_items_for_help_command(thd, thd->lex->first_select_lex(), + tables, used_fields)) + goto error; + + if (!(select= + prepare_select_for_name(thd,mask,mlen,tables[0].table, + used_fields[help_topic_name].field,&error))) + goto error; + + count_topics= search_topics(thd,tables[0].table,used_fields, + select,&topics_list, + &name, &description, &example); + delete select; + + if (thd->is_error()) + goto error; + + if (count_topics == 0) + { + int UNINIT_VAR(key_id); + if (!(select= + prepare_select_for_name(thd,mask,mlen,tables[3].table, + used_fields[help_keyword_name].field, + &error))) + goto error; + + count_topics= search_keyword(thd,tables[3].table, used_fields, select, + &key_id); + delete select; + count_topics= (count_topics != 1) ? 0 : + get_topics_for_keyword(thd,tables[0].table,tables[2].table, + used_fields,key_id,&topics_list,&name, + &description,&example); + } + + if (count_topics == 0) + { + int16 category_id; + Field *cat_cat_id= used_fields[help_category_parent_category_id].field; + if (!(select= + prepare_select_for_name(thd,mask,mlen,tables[1].table, + used_fields[help_category_name].field, + &error))) + goto error; + + count_categories= search_categories(thd, tables[1].table, used_fields, + select, + &categories_list,&category_id); + delete select; + if (!count_categories) + { + if (send_header_2(protocol,FALSE)) + goto error; + } + else if (count_categories > 1) + { + if (send_header_2(protocol,FALSE) || + send_variant_2_list(mem_root,protocol,&categories_list,"Y",0)) + goto error; + } + else + { + Field *topic_cat_id= used_fields[help_topic_help_category_id].field; + Item *cond_topic_by_cat= + new (mem_root) + Item_func_equal(thd, + new (mem_root) + Item_field(thd, topic_cat_id), + new (mem_root) + Item_int(thd, (int32) category_id)); + Item *cond_cat_by_cat= + new (mem_root) + Item_func_equal(thd, + new (mem_root) Item_field(thd, cat_cat_id), + new (mem_root) Item_int(thd, (int32) category_id)); + if (!(select= prepare_simple_select(thd, cond_topic_by_cat, + tables[0].table, &error))) + goto error; + get_all_items_for_category(thd,tables[0].table, + used_fields[help_topic_name].field, + select,&topics_list); + delete select; + if (!(select= prepare_simple_select(thd, cond_cat_by_cat, + tables[1].table, &error))) + goto error; + get_all_items_for_category(thd,tables[1].table, + used_fields[help_category_name].field, + select,&subcategories_list); + delete select; + String *cat= categories_list.head(); + if (send_header_2(protocol, TRUE) || + send_variant_2_list(mem_root,protocol,&topics_list, "N",cat) || + send_variant_2_list(mem_root,protocol,&subcategories_list,"Y",cat)) + goto error; + } + } + else if (count_topics == 1) + { + if (send_answer_1(protocol,&name,&description,&example)) + goto error; + } + else + { + /* First send header and functions */ + if (send_header_2(protocol, FALSE) || + send_variant_2_list(mem_root,protocol, &topics_list, "N", 0)) + goto error; + if (!(select= + prepare_select_for_name(thd,mask,mlen,tables[1].table, + used_fields[help_category_name].field,&error))) + goto error; + search_categories(thd, tables[1].table, used_fields, + select,&categories_list, 0); + delete select; + /* Then send categories */ + if (send_variant_2_list(mem_root,protocol, &categories_list, "Y", 0)) + goto error; + } + my_eof(thd); + + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + DBUG_RETURN(FALSE); + +error: + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + +error2: + DBUG_RETURN(TRUE); +} + + +bool mysqld_help(THD *thd, const char *mask) +{ + Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH); + bool rc= mysqld_help_internal(thd, mask); + return rc; +} diff --git a/sql/sql_help.h b/sql/sql_help.h new file mode 100644 index 00000000..b0117649 --- /dev/null +++ b/sql/sql_help.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_HELP_INCLUDED +#define SQL_HELP_INCLUDED + +class THD; + + +/* + Function prototypes +*/ + +bool mysqld_help (THD *thd, const char *text); + +bool mysqld_help_prepare(THD *thd, const char *text, List *fields); + +#endif /* SQL_HELP_INCLUDED */ diff --git a/sql/sql_hset.h b/sql/sql_hset.h new file mode 100644 index 00000000..41573fb5 --- /dev/null +++ b/sql/sql_hset.h @@ -0,0 +1,114 @@ +#ifndef SQL_HSET_INCLUDED +#define SQL_HSET_INCLUDED +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "my_global.h" +#include "hash.h" + + +/** + A type-safe wrapper around mysys HASH. +*/ + +template +class Hash_set +{ +public: + enum { START_SIZE= 8 }; + /** + Constructs an empty unique hash. + */ + Hash_set(PSI_memory_key psi_key, uchar *(*K)(const T *, size_t *, my_bool), + CHARSET_INFO *cs= &my_charset_bin) + { + my_hash_init(psi_key, &m_hash, cs, START_SIZE, 0, 0, (my_hash_get_key)K, 0, + HASH_UNIQUE); + } + + Hash_set(PSI_memory_key psi_key, CHARSET_INFO *charset, ulong default_array_elements, + size_t key_offset, size_t key_length, my_hash_get_key get_key, + void (*free_element)(void*), uint flags) + { + my_hash_init(psi_key, &m_hash, charset, default_array_elements, key_offset, + key_length, get_key, free_element, flags); + } + /** + Destroy the hash by freeing the buckets table. Does + not call destructors for the elements. + */ + ~Hash_set() + { + my_hash_free(&m_hash); + } + /** + Insert a single value into a hash. Does not tell whether + the value was inserted -- if an identical value existed, + it is not replaced. + + @retval TRUE Out of memory. + @retval FALSE OK. The value either was inserted or existed + in the hash. + */ + bool insert(T *value) + { + return my_hash_insert(&m_hash, reinterpret_cast(value)); + } + bool remove(T *value) + { + return my_hash_delete(&m_hash, reinterpret_cast(value)); + } + T *find(const void *key, size_t klen) const + { + return (T*)my_hash_search(&m_hash, reinterpret_cast(key), klen); + } + /** Is this hash set empty? */ + bool is_empty() const { return m_hash.records == 0; } + /** Returns the number of unique elements. */ + size_t size() const { return static_cast(m_hash.records); } + /** Erases all elements from the container */ + void clear() { my_hash_reset(&m_hash); } + const T* at(size_t i) const + { + return reinterpret_cast(my_hash_element(const_cast(&m_hash), i)); + } + /** An iterator over hash elements. Is not insert-stable. */ + class Iterator + { + public: + Iterator(Hash_set &hash_set) + : m_hash(&hash_set.m_hash), + m_idx(0) + {} + /** + Return the current element and reposition the iterator to the next + element. + */ + inline T *operator++(int) + { + if (m_idx < m_hash->records) + return reinterpret_cast(my_hash_element(m_hash, m_idx++)); + return NULL; + } + void rewind() { m_idx= 0; } + private: + HASH *m_hash; + uint m_idx; + }; +private: + HASH m_hash; +}; + +#endif // SQL_HSET_INCLUDED diff --git a/sql/sql_i_s.h b/sql/sql_i_s.h new file mode 100644 index 00000000..263031ae --- /dev/null +++ b/sql/sql_i_s.h @@ -0,0 +1,349 @@ +#ifndef SQL_I_S_INCLUDED +#define SQL_I_S_INCLUDED +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2009, 2019, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_const.h" // MAX_FIELD_VARCHARLENGTH +#include "sql_basic_types.h" // enum_nullability +#include "sql_string.h" // strlen, MY_CS_CHARACTER_SET_NAME_SIZE +#include "lex_string.h" // LEX_CSTRING +#include "mysql_com.h" // enum_field_types +#include "my_time.h" // TIME_SECOND_PART_DIGITS +#include "sql_type.h" // Type_handler_xxx + +struct TABLE_LIST; +struct TABLE; +typedef class Item COND; + +#ifdef MYSQL_CLIENT +#error MYSQL_CLIENT must not be defined +#endif // MYSQL_CLIENT + + +bool schema_table_store_record(THD *thd, TABLE *table); +COND *make_cond_for_info_schema(THD *thd, COND *cond, TABLE_LIST *table); + + +enum enum_show_open_table +{ + SKIP_OPEN_TABLE= 0U, // do not open table + OPEN_FRM_ONLY= 1U, // open FRM file only + OPEN_FULL_TABLE= 2U // open FRM,MYD, MYI files +}; + + +namespace Show { +class Type +{ + /** + This denotes data type for the column. For the most part, there seems to + be one entry in the enum for each SQL data type, although there seem to + be a number of additional entries in the enum. + */ + const Type_handler *m_type_handler; + /** + For string-type columns, this is the maximum number of + characters. Otherwise, it is the 'display-length' for the column. + */ + uint m_char_length; + uint m_unsigned_flag; + const Typelib *m_typelib; +public: + Type(const Type_handler *th, uint length, uint unsigned_flag, + const Typelib *typelib= NULL) + :m_type_handler(th), m_char_length(length), m_unsigned_flag(unsigned_flag), + m_typelib(typelib) + { } + const Type_handler *type_handler() const { return m_type_handler; } + uint char_length() const { return m_char_length; } + decimal_digits_t decimal_precision() const + { return (decimal_digits_t) ((m_char_length / 100) % 100); } + decimal_digits_t decimal_scale() const + { return (decimal_digits_t) (m_char_length % 10); } + uint fsp() const + { + DBUG_ASSERT(m_char_length <= TIME_SECOND_PART_DIGITS); + return m_char_length; + } + uint unsigned_flag() const { return m_unsigned_flag; } + const Typelib *typelib() const { return m_typelib; } +}; +} // namespace Show + + + +class ST_FIELD_INFO: public Show::Type +{ +protected: + LEX_CSTRING m_name; // I_S column name + enum_nullability m_nullability; // NULLABLE or NOT NULL + LEX_CSTRING m_old_name; // SHOW column name + enum_show_open_table m_open_method; +public: + ST_FIELD_INFO(const LEX_CSTRING &name, const Type &type, + enum_nullability nullability, + LEX_CSTRING &old_name, + enum_show_open_table open_method) + :Type(type), m_name(name), + m_nullability(nullability), + m_old_name(old_name), + m_open_method(open_method) + { } + ST_FIELD_INFO(const char *name, const Type &type, + enum_nullability nullability, + const char *old_name, + enum_show_open_table open_method) + :Type(type), + m_nullability(nullability), + m_open_method(open_method) + { + m_name.str= name; + m_name.length= safe_strlen(name); + m_old_name.str= old_name; + m_old_name.length= safe_strlen(old_name); + } + const LEX_CSTRING &name() const { return m_name; } + bool nullable() const { return m_nullability == NULLABLE; } + const LEX_CSTRING &old_name() const { return m_old_name; } + enum_show_open_table open_method() const { return m_open_method; } + bool end_marker() const { return m_name.str == NULL; } +}; + + +namespace Show +{ + + +class Enum: public Type +{ +public: + Enum(const Typelib *typelib) :Type(&type_handler_enum, 0, false, typelib) { } +}; + + +class Blob: public Type +{ +public: + Blob(uint length) :Type(&type_handler_blob, length, false) { } +}; + + +class Varchar: public Type +{ +public: + Varchar(uint length) :Type(&type_handler_varchar, length, false) + { + DBUG_ASSERT(length * 3 <= MAX_FIELD_VARCHARLENGTH); + } +}; + + +class Longtext: public Type +{ +public: + Longtext(uint length) :Type(&type_handler_varchar, length, false) { } +}; + + +class Yes_or_empty: public Varchar +{ +public: + Yes_or_empty(): Varchar(3) { } + static LEX_CSTRING value(bool val) + { + return val ? Lex_cstring(STRING_WITH_LEN("Yes")) : + Lex_cstring(); + } +}; + + +class Catalog: public Varchar +{ +public: + Catalog(): Varchar(FN_REFLEN) { } +}; + + +class Name: public Varchar +{ +public: + Name(): Varchar(NAME_CHAR_LEN) { } +}; + + +class Definer: public Varchar +{ +public: + Definer(): Varchar(DEFINER_CHAR_LENGTH) { } +}; + + +class Userhost: public Varchar +{ +public: + Userhost(): Varchar(USERNAME_CHAR_LENGTH + HOSTNAME_LENGTH + 2) { } +}; + + +class CSName: public Varchar +{ +public: + CSName(): Varchar(MY_CS_CHARACTER_SET_NAME_SIZE) { } +}; + + +class CLName: public Varchar +{ +public: + CLName(): Varchar(MY_CS_COLLATION_NAME_SIZE) { } +}; + + +class SQLMode: public Varchar +{ +public: + SQLMode(): Varchar(32*256) { } +}; + + +class Datetime: public Type +{ +public: + Datetime(uint dec) :Type(&type_handler_datetime2, dec, false) { } +}; + + +class Decimal: public Type +{ +public: + Decimal(uint length) :Type(&type_handler_newdecimal, length, false) { } +}; + + +class ULonglong: public Type +{ +public: + ULonglong(uint length) :Type(&type_handler_ulonglong, length, true) { } + ULonglong() :ULonglong(MY_INT64_NUM_DECIMAL_DIGITS) { } +}; + + +class ULong: public Type +{ +public: + ULong(uint length) :Type(&type_handler_ulong, length, true) { } + ULong() :ULong(MY_INT32_NUM_DECIMAL_DIGITS) { } +}; + + +class SLonglong: public Type +{ +public: + SLonglong(uint length) :Type(&type_handler_slonglong, length, false) { } + SLonglong() :SLonglong(MY_INT64_NUM_DECIMAL_DIGITS) { } +}; + + +class SLong: public Type +{ +public: + SLong(uint length) :Type(&type_handler_slong, length, false) { } + SLong() :SLong(MY_INT32_NUM_DECIMAL_DIGITS) { } +}; + + +class SShort: public Type +{ +public: + SShort(uint length) :Type(&type_handler_sshort, length, false) { } +}; + + +class STiny: public Type +{ +public: + STiny(uint length) :Type(&type_handler_stiny, length, false) { } +}; + + +class Double: public Type +{ +public: + Double(uint length) :Type(&type_handler_double, length, false) { } +}; + + +class Float: public Type +{ +public: + Float(uint length) :Type(&type_handler_float, length, false) { } +}; + + + +class Column: public ST_FIELD_INFO +{ +public: + Column(const char *name, const Type &type, + enum_nullability nullability, + const char *old_name, + enum_show_open_table open_method= SKIP_OPEN_TABLE) + :ST_FIELD_INFO(name, type, nullability, + old_name, open_method) + { } + Column(const char *name, const Type &type, + enum_nullability nullability, + enum_show_open_table open_method= SKIP_OPEN_TABLE) + :ST_FIELD_INFO(name, type, nullability, + NullS, open_method) + { } +}; + + +// End marker +class CEnd: public Column +{ +public: + CEnd() :Column(NullS, Varchar(0), NOT_NULL, NullS, SKIP_OPEN_TABLE) { } +}; + + +} // namespace Show + + +struct TABLE_LIST; +typedef class Item COND; + +typedef struct st_schema_table +{ + const char *table_name; + ST_FIELD_INFO *fields_info; + /* for FLUSH table_name */ + int (*reset_table) (); + /* Fill table with data */ + int (*fill_table) (THD *thd, TABLE_LIST *tables, COND *cond); + /* Handle fileds for old SHOW */ + int (*old_format) (THD *thd, struct st_schema_table *schema_table); + int (*process_table) (THD *thd, TABLE_LIST *tables, TABLE *table, + bool res, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name); + int idx_field1, idx_field2; + bool hidden; + uint i_s_requested_object; /* the object we need to open(TABLE | VIEW) */ +} ST_SCHEMA_TABLE; + + +#endif // SQL_I_S_INCLUDED diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc new file mode 100644 index 00000000..26431968 --- /dev/null +++ b/sql/sql_insert.cc @@ -0,0 +1,5345 @@ +/* + Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* Insert of records */ + +/* + INSERT DELAYED + + Insert delayed is distinguished from a normal insert by lock_type == + TL_WRITE_DELAYED instead of TL_WRITE. It first tries to open a + "delayed" table (delayed_get_table()), but falls back to + open_and_lock_tables() on error and proceeds as normal insert then. + + Opening a "delayed" table means to find a delayed insert thread that + has the table open already. If this fails, a new thread is created and + waited for to open and lock the table. + + If accessing the thread succeeded, in + Delayed_insert::get_local_table() the table of the thread is copied + for local use. A copy is required because the normal insert logic + works on a target table, but the other threads table object must not + be used. The insert logic uses the record buffer to create a record. + And the delayed insert thread uses the record buffer to pass the + record to the table handler. So there must be different objects. Also + the copied table is not included in the lock, so that the statement + can proceed even if the real table cannot be accessed at this moment. + + Copying a table object is not a trivial operation. Besides the TABLE + object there are the field pointer array, the field objects and the + record buffer. After copying the field objects, their pointers into + the record must be "moved" to point to the new record buffer. + + After this setup the normal insert logic is used. Only that for + delayed inserts write_delayed() is called instead of write_record(). + It inserts the rows into a queue and signals the delayed insert thread + instead of writing directly to the table. + + The delayed insert thread awakes from the signal. It locks the table, + inserts the rows from the queue, unlocks the table, and waits for the + next signal. It does normally live until a FLUSH TABLES or SHUTDOWN. + +*/ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "sql_insert.h" +#include "sql_update.h" // compare_record +#include "sql_base.h" // close_thread_tables +#include "sql_cache.h" // query_cache_* +#include "key.h" // key_copy +#include "lock.h" // mysql_unlock_tables +#include "sp_head.h" +#include "sql_view.h" // check_key_in_view, insert_view_fields +#include "sql_table.h" // mysql_create_table_no_lock +#include "sql_trigger.h" +#include "sql_select.h" +#include "sql_show.h" +#include "slave.h" +#include "sql_parse.h" // end_active_trans +#include "rpl_mi.h" +#include "transaction.h" +#include "sql_audit.h" +#include "sql_derived.h" // mysql_handle_derived +#include "sql_prepare.h" +#include "debug_sync.h" // DEBUG_SYNC +#include "debug.h" // debug_crash_here +#include +#include "rpl_rli.h" + +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" /* wsrep_start_transction() */ +#endif /* WITH_WSREP */ + +#ifndef EMBEDDED_LIBRARY +static bool delayed_get_table(THD *thd, MDL_request *grl_protection_request, + TABLE_LIST *table_list); +static int write_delayed(THD *thd, TABLE *table, enum_duplicates duplic, + LEX_STRING query, bool ignore, bool log_on); +static void end_delayed_insert(THD *thd); +pthread_handler_t handle_delayed_insert(void *arg); +static void unlink_blobs(TABLE *table); +#endif +static bool check_view_insertability(THD *thd, TABLE_LIST *view); +static int binlog_show_create_table_(THD *thd, TABLE *table, + Table_specification_st *create_info); + +/* + Check that insert/update fields are from the same single table of a view. + + @param fields The insert/update fields to be checked. + @param values The insert/update values to be checked, NULL if + checking is not wanted. + @param view The view for insert. + @param map [in/out] The insert table map. + + This function is called in 2 cases: + 1. to check insert fields. In this case *map will be set to 0. + Insert fields are checked to be all from the same single underlying + table of the given view. Otherwise the error is thrown. Found table + map is returned in the map parameter. + 2. to check update fields of the ON DUPLICATE KEY UPDATE clause. + In this case *map contains table_map found on the previous call of + the function to check insert fields. Update fields are checked to be + from the same table as the insert fields. + + @returns false if success. +*/ + +static bool check_view_single_update(List &fields, List *values, + TABLE_LIST *view, table_map *map, + bool insert) +{ + /* it is join view => we need to find the table for update */ + List_iterator_fast it(fields); + Item *item; + TABLE_LIST *tbl= 0; // reset for call to check_single_table() + table_map tables= 0; + + while ((item= it++)) + tables|= item->used_tables(); + + /* + Check that table is only one + (we can not rely on check_single_table because it skips some + types of tables) + */ + if (my_count_bits(tables) > 1) + goto error; + + if (values) + { + it.init(*values); + while ((item= it++)) + tables|= item->view_used_tables(view); + } + + /* Convert to real table bits */ + tables&= ~PSEUDO_TABLE_BITS; + + /* Check found map against provided map */ + if (*map) + { + if (tables != *map) + goto error; + return FALSE; + } + + if (view->check_single_table(&tbl, tables, view) || tbl == 0) + goto error; + + /* view->table should have been set in mysql_derived_merge_for_insert */ + DBUG_ASSERT(view->table); + + /* + Use buffer for the insert values that was allocated for the merged view. + */ + tbl->table->insert_values= view->table->insert_values; + view->table= tbl->table; + if (!tbl->single_table_updatable()) + { + if (insert) + my_error(ER_NON_INSERTABLE_TABLE, MYF(0), view->alias.str, "INSERT"); + else + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), view->alias.str, "UPDATE"); + return TRUE; + } + *map= tables; + + return FALSE; + +error: + my_error(ER_VIEW_MULTIUPDATE, MYF(0), + view->view_db.str, view->view_name.str); + return TRUE; +} + + +/* + Check if insert fields are correct. + + @param thd The current thread. + @param table_list The table we are inserting into (may be view) + @param fields The insert fields. + @param values The insert values. + @param check_unique If duplicate values should be rejected. + @param fields_and_values_from_different_maps If 'values' are allowed to + refer to other tables than those of 'fields' + @param map See check_view_single_update + + @returns 0 if success, -1 if error +*/ + +static int check_insert_fields(THD *thd, TABLE_LIST *table_list, + List &fields, List &values, + bool check_unique, + bool fields_and_values_from_different_maps, + table_map *map) +{ + TABLE *table= table_list->table; + DBUG_ENTER("check_insert_fields"); + + if (!table_list->single_table_updatable()) + { + my_error(ER_NON_INSERTABLE_TABLE, MYF(0), table_list->alias.str, "INSERT"); + DBUG_RETURN(-1); + } + + if (fields.elements == 0 && values.elements != 0) + { + if (!table) + { + my_error(ER_VIEW_NO_INSERT_FIELD_LIST, MYF(0), + table_list->view_db.str, table_list->view_name.str); + DBUG_RETURN(-1); + } + if (values.elements != table->s->visible_fields) + { + thd->get_stmt_da()->reset_current_row_for_warning(1); + my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), 1L); + DBUG_RETURN(-1); + } +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Field_iterator_table_ref field_it; + field_it.set(table_list); + if (check_grant_all_columns(thd, INSERT_ACL, &field_it)) + DBUG_RETURN(-1); +#endif + /* + No fields are provided so all fields must be provided in the values. + Thus we set all bits in the write set. + */ + bitmap_set_all(table->write_set); + } + else + { // Part field list + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + Name_resolution_context *context= &select_lex->context; + Name_resolution_context_state ctx_state; + int res; + + if (fields.elements != values.elements) + { + thd->get_stmt_da()->reset_current_row_for_warning(1); + my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), 1L); + DBUG_RETURN(-1); + } + + thd->dup_field= 0; + select_lex->no_wrap_view_item= TRUE; + + /* Save the state of the current name resolution context. */ + ctx_state.save_state(context, table_list); + + /* + Perform name resolution only in the first table - 'table_list', + which is the table that is inserted into. + */ + table_list->next_local= 0; + context->resolve_in_table_list_only(table_list); + /* 'Unfix' fields to allow correct marking by the setup_fields function. */ + if (table_list->is_view()) + unfix_fields(fields); + + res= setup_fields(thd, Ref_ptr_array(), + fields, MARK_COLUMNS_WRITE, 0, NULL, 0); + + /* Restore the current context. */ + ctx_state.restore_state(context, table_list); + thd->lex->first_select_lex()->no_wrap_view_item= FALSE; + + if (res) + DBUG_RETURN(-1); + + if (table_list->is_view() && table_list->is_merged_derived()) + { + if (check_view_single_update(fields, + fields_and_values_from_different_maps ? + (List*) 0 : &values, + table_list, map, true)) + DBUG_RETURN(-1); + table= table_list->table; + } + + if (check_unique && thd->dup_field) + { + my_error(ER_FIELD_SPECIFIED_TWICE, MYF(0), + thd->dup_field->field_name.str); + DBUG_RETURN(-1); + } + } + // For the values we need select_priv +#ifndef NO_EMBEDDED_ACCESS_CHECKS + table->grant.want_privilege= (SELECT_ACL & ~table->grant.privilege); +#endif + + if (check_key_in_view(thd, table_list) || + (table_list->view && + check_view_insertability(thd, table_list))) + { + my_error(ER_NON_INSERTABLE_TABLE, MYF(0), table_list->alias.str, "INSERT"); + DBUG_RETURN(-1); + } + + DBUG_RETURN(0); +} + +static bool has_no_default_value(THD *thd, Field *field, TABLE_LIST *table_list) +{ + if ((field->flags & (NO_DEFAULT_VALUE_FLAG | VERS_ROW_START | VERS_ROW_END)) + == NO_DEFAULT_VALUE_FLAG && field->real_type() != MYSQL_TYPE_ENUM) + { + bool view= false; + if (table_list) + { + table_list= table_list->top_table(); + view= table_list->view != NULL; + } + if (view) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_NO_DEFAULT_FOR_VIEW_FIELD, + ER_THD(thd, ER_NO_DEFAULT_FOR_VIEW_FIELD), + table_list->view_db.str, table_list->view_name.str); + } + else + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_NO_DEFAULT_FOR_FIELD, + ER_THD(thd, ER_NO_DEFAULT_FOR_FIELD), + field->field_name.str); + } + return thd->really_abort_on_warning(); + } + return false; +} + + +/** + Check if update fields are correct. + + @param thd The current thread. + @param insert_table_list The table we are inserting into (may be view) + @param update_fields The update fields. + @param update_values The update values. + @param fields_and_values_from_different_maps If 'update_values' are allowed to + refer to other tables than those of 'update_fields' + @param map See check_view_single_update + + @note + If the update fields include an autoinc field, set the + table->next_number_field_updated flag. + + @returns 0 if success, -1 if error +*/ + +static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, + List &update_fields, + List &update_values, + bool fields_and_values_from_different_maps, + table_map *map) +{ + TABLE *table= insert_table_list->table; + my_bool UNINIT_VAR(autoinc_mark); + enum_sql_command sql_command_save= thd->lex->sql_command; + + table->next_number_field_updated= FALSE; + + if (table->found_next_number_field) + { + /* + Unmark the auto_increment field so that we can check if this is modified + by update_fields + */ + autoinc_mark= bitmap_test_and_clear(table->write_set, + table->found_next_number_field-> + field_index); + } + + thd->lex->sql_command= SQLCOM_UPDATE; + + /* Check the fields we are going to modify */ + if (setup_fields(thd, Ref_ptr_array(), + update_fields, MARK_COLUMNS_WRITE, 0, NULL, 0)) + { + thd->lex->sql_command= sql_command_save; + return -1; + } + + thd->lex->sql_command= sql_command_save; + + if (insert_table_list->is_view() && + insert_table_list->is_merged_derived() && + check_view_single_update(update_fields, + fields_and_values_from_different_maps ? + (List*) 0 : &update_values, + insert_table_list, map, false)) + return -1; + + if (table->default_field) + table->mark_default_fields_for_write(FALSE); + + if (table->found_next_number_field) + { + if (bitmap_is_set(table->write_set, + table->found_next_number_field->field_index)) + table->next_number_field_updated= TRUE; + + if (autoinc_mark) + bitmap_set_bit(table->write_set, + table->found_next_number_field->field_index); + } + + return 0; +} + +/** + Upgrade table-level lock of INSERT statement to TL_WRITE if + a more concurrent lock is infeasible for some reason. This is + necessary for engines without internal locking support (MyISAM). + An engine with internal locking implementation might later + downgrade the lock in handler::store_lock() method. +*/ + +static +void upgrade_lock_type(THD *thd, thr_lock_type *lock_type, + enum_duplicates duplic) +{ + if (duplic == DUP_UPDATE || + (duplic == DUP_REPLACE && *lock_type == TL_WRITE_CONCURRENT_INSERT)) + { + *lock_type= TL_WRITE_DEFAULT; + return; + } + + if (*lock_type == TL_WRITE_DELAYED) + { + /* + We do not use delayed threads if: + - we're running in the safe mode or skip-new mode -- the + feature is disabled in these modes + - we're executing this statement on a replication slave -- + we need to ensure serial execution of queries on the + slave + - it is INSERT .. ON DUPLICATE KEY UPDATE - in this case the + insert cannot be concurrent + - this statement is directly or indirectly invoked from + a stored function or trigger (under pre-locking) - to + avoid deadlocks, since INSERT DELAYED involves a lock + upgrade (TL_WRITE_DELAYED -> TL_WRITE) which we should not + attempt while keeping other table level locks. + - this statement itself may require pre-locking. + We should upgrade the lock even though in most cases + delayed functionality may work. Unfortunately, we can't + easily identify whether the subject table is not used in + the statement indirectly via a stored function or trigger: + if it is used, that will lead to a deadlock between the + client connection and the delayed thread. + */ + if (specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE) || + thd->variables.max_insert_delayed_threads == 0 || + thd->locked_tables_mode > LTM_LOCK_TABLES || + thd->lex->uses_stored_routines() /*|| + thd->lex->describe*/) + { + *lock_type= TL_WRITE; + return; + } + if (thd->slave_thread) + { + /* Try concurrent insert */ + *lock_type= (duplic == DUP_UPDATE || duplic == DUP_REPLACE) ? + TL_WRITE : TL_WRITE_CONCURRENT_INSERT; + return; + } + + bool log_on= (thd->variables.option_bits & OPTION_BIN_LOG); + if (thd->wsrep_binlog_format(global_system_variables.binlog_format) == BINLOG_FORMAT_STMT && + log_on && mysql_bin_log.is_open()) + { + /* + Statement-based binary logging does not work in this case, because: + a) two concurrent statements may have their rows intermixed in the + queue, leading to autoincrement replication problems on slave (because + the values generated used for one statement don't depend only on the + value generated for the first row of this statement, so are not + replicable) + b) if first row of the statement has an error the full statement is + not binlogged, while next rows of the statement may be inserted. + c) if first row succeeds, statement is binlogged immediately with a + zero error code (i.e. "no error"), if then second row fails, query + will fail on slave too and slave will stop (wrongly believing that the + master got no error). + So we fallback to non-delayed INSERT. + Note that to be fully correct, we should test the "binlog format which + the delayed thread is going to use for this row". But in the common case + where the global binlog format is not changed and the session binlog + format may be changed, that is equal to the global binlog format. + We test it without mutex for speed reasons (condition rarely true), and + in the common case (global not changed) it is as good as without mutex; + if global value is changed, anyway there is uncertainty as the delayed + thread may be old and use the before-the-change value. + */ + *lock_type= TL_WRITE; + } + } +} + + +/** + Find or create a delayed insert thread for the first table in + the table list, then open and lock the remaining tables. + If a table can not be used with insert delayed, upgrade the lock + and open and lock all tables using the standard mechanism. + + @param thd thread context + @param table_list list of "descriptors" for tables referenced + directly in statement SQL text. + The first element in the list corresponds to + the destination table for inserts, remaining + tables, if any, are usually tables referenced + by sub-queries in the right part of the + INSERT. + + @return Status of the operation. In case of success 'table' + member of every table_list element points to an instance of + class TABLE. + + @sa open_and_lock_tables for more information about MySQL table + level locking +*/ + +static +bool open_and_lock_for_insert_delayed(THD *thd, TABLE_LIST *table_list) +{ + MDL_request protection_request; + DBUG_ENTER("open_and_lock_for_insert_delayed"); + +#ifndef EMBEDDED_LIBRARY + /* INSERT DELAYED is not allowed in a read only transaction. */ + if (thd->tx_read_only) + { + my_error(ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION, MYF(0)); + DBUG_RETURN(true); + } + + /* + In order for the deadlock detector to be able to find any deadlocks + caused by the handler thread waiting for GRL or this table, we acquire + protection against GRL (global IX metadata lock) and metadata lock on + table to being inserted into inside the connection thread. + If this goes ok, the tickets are cloned and added to the list of granted + locks held by the handler thread. + */ + if (thd->has_read_only_protection()) + DBUG_RETURN(TRUE); + + MDL_REQUEST_INIT(&protection_request, MDL_key::BACKUP, "", "", + MDL_BACKUP_DML, MDL_STATEMENT); + + if (thd->mdl_context.acquire_lock(&protection_request, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(TRUE); + + if (thd->mdl_context.acquire_lock(&table_list->mdl_request, + thd->variables.lock_wait_timeout)) + /* + If a lock can't be acquired, it makes no sense to try normal insert. + Therefore we just abort the statement. + */ + DBUG_RETURN(TRUE); + + bool error= FALSE; + if (delayed_get_table(thd, &protection_request, table_list)) + error= TRUE; + else if (table_list->table) + { + /* + Open tables used for sub-selects or in stored functions, will also + cache these functions. + */ + if (open_and_lock_tables(thd, table_list->next_global, TRUE, + MYSQL_OPEN_IGNORE_ENGINE_STATS)) + { + end_delayed_insert(thd); + error= TRUE; + } + else + { + /* + First table was not processed by open_and_lock_tables(), + we need to set updatability flag "by hand". + */ + if (!table_list->derived && !table_list->view) + table_list->updatable= 1; // usual table + } + } + + /* + We can't release protection against GRL and metadata lock on the table + being inserted into here. These locks might be required, for example, + because this INSERT DELAYED calls functions which may try to update + this or another tables (updating the same table is of course illegal, + but such an attempt can be discovered only later during statement + execution). + */ + + /* + Reset the ticket in case we end up having to use normal insert and + therefore will reopen the table and reacquire the metadata lock. + */ + table_list->mdl_request.ticket= NULL; + + if (error || table_list->table) + DBUG_RETURN(error); +#endif + /* + * This is embedded library and we don't have auxiliary + threads OR + * a lock upgrade was requested inside delayed_get_table + because + - there are too many delayed insert threads OR + - the table has triggers. + Use a normal insert. + */ + table_list->lock_type= TL_WRITE; + DBUG_RETURN(open_and_lock_tables(thd, table_list, TRUE, 0)); +} + + +/** + Create a new query string for removing DELAYED keyword for + multi INSERT DEALAYED statement. + + @param[in] thd Thread handler + @param[in] buf Query string + + @return + 0 ok + 1 error +*/ +static int +create_insert_stmt_from_insert_delayed(THD *thd, String *buf) +{ + /* Make a copy of thd->query() and then remove the "DELAYED" keyword */ + if (buf->append(thd->query(), thd->query_length()) || + buf->replace(thd->lex->keyword_delayed_begin_offset, + thd->lex->keyword_delayed_end_offset - + thd->lex->keyword_delayed_begin_offset, NULL, 0)) + return 1; + return 0; +} + + +static void save_insert_query_plan(THD* thd, TABLE_LIST *table_list) +{ + Explain_insert* explain= new (thd->mem_root) Explain_insert(thd->mem_root); + explain->table_name.append(table_list->table->alias); + + thd->lex->explain->add_insert_plan(explain); + + /* Save subquery children */ + for (SELECT_LEX_UNIT *unit= thd->lex->first_select_lex()->first_inner_unit(); + unit; + unit= unit->next_unit()) + { + if (unit->explainable()) + explain->add_child(unit->first_select()->select_number); + } +} + + +Field **TABLE::field_to_fill() +{ + return triggers && triggers->nullable_fields() ? triggers->nullable_fields() : field; +} + + +/** + INSERT statement implementation + + SYNOPSIS + mysql_insert() + result NULL if the insert is not outputing results + via 'RETURNING' clause. + + @note Like implementations of other DDL/DML in MySQL, this function + relies on the caller to close the thread tables. This is done in the + end of dispatch_command(). +*/ +bool mysql_insert(THD *thd, TABLE_LIST *table_list, + List &fields, List &values_list, + List &update_fields, List &update_values, + enum_duplicates duplic, bool ignore, select_result *result) +{ + bool retval= true; + int error, res; + bool transactional_table, joins_freed= FALSE; + bool changed; + const bool was_insert_delayed= (table_list->lock_type == TL_WRITE_DELAYED); + bool using_bulk_insert= 0; + uint value_count; + /* counter of iteration in bulk PS operation*/ + ulonglong iteration= 0; + ulonglong id; + COPY_INFO info; + TABLE *table= 0; + List_iterator_fast its(values_list); + List_item *values; + Name_resolution_context *context; + Name_resolution_context_state ctx_state; + SELECT_LEX *returning= thd->lex->has_returning() ? thd->lex->returning() : 0; + unsigned char *readbuff= NULL; + +#ifndef EMBEDDED_LIBRARY + char *query= thd->query(); + /* + log_on is about delayed inserts only. + By default, both logs are enabled (this won't cause problems if the server + runs without --log-bin). + */ + bool log_on= (thd->variables.option_bits & OPTION_BIN_LOG); +#endif + thr_lock_type lock_type; + Item *unused_conds= 0; + DBUG_ENTER("mysql_insert"); + + bzero((char*) &info,sizeof(info)); + create_explain_query(thd->lex, thd->mem_root); + /* + Upgrade lock type if the requested lock is incompatible with + the current connection mode or table operation. + */ + upgrade_lock_type(thd, &table_list->lock_type, duplic); + + /* + We can't write-delayed into a table locked with LOCK TABLES: + this will lead to a deadlock, since the delayed thread will + never be able to get a lock on the table. + */ + if (table_list->lock_type == TL_WRITE_DELAYED && thd->locked_tables_mode && + find_locked_table(thd->open_tables, table_list->db.str, + table_list->table_name.str)) + { + my_error(ER_DELAYED_INSERT_TABLE_LOCKED, MYF(0), + table_list->table_name.str); + DBUG_RETURN(TRUE); + } + + if (table_list->lock_type == TL_WRITE_DELAYED) + { + if (open_and_lock_for_insert_delayed(thd, table_list)) + DBUG_RETURN(TRUE); + } + else + { + if (open_and_lock_tables(thd, table_list, TRUE, 0)) + DBUG_RETURN(TRUE); + } + + THD_STAGE_INFO(thd, stage_init_update); + lock_type= table_list->lock_type; + thd->lex->used_tables=0; + values= its++; + if (bulk_parameters_set(thd)) + DBUG_RETURN(TRUE); + value_count= values->elements; + + if ((res= mysql_prepare_insert(thd, table_list, fields, values, + update_fields, update_values, duplic, ignore, + &unused_conds, FALSE))) + { + retval= thd->is_error(); + if (res < 0) + { + /* + Insert should be ignored but we have to log the query in statement + format in the binary log + */ + if (thd->binlog_current_query_unfiltered()) + retval= 1; + } + goto abort; + } + /* mysql_prepare_insert sets table_list->table if it was not set */ + table= table_list->table; + + /* Prepares LEX::returing_list if it is not empty */ + if (returning) + { + result->prepare(returning->item_list, NULL); + if (thd->is_bulk_op()) + { + /* + It is RETURNING which needs network buffer to write result set and + it is array binfing which need network buffer to read parameters. + So we allocate yet another network buffer. + The old buffer will be freed at the end of operation. + */ + DBUG_ASSERT(thd->protocol == &thd->protocol_binary); + readbuff= thd->net.buff; // old buffer + if (net_allocate_new_packet(&thd->net, thd, MYF(MY_THREAD_SPECIFIC))) + { + readbuff= NULL; // failure, net_allocate_new_packet keeps old buffer + goto abort; + } + } + } + + context= &thd->lex->first_select_lex()->context; + /* + These three asserts test the hypothesis that the resetting of the name + resolution context below is not necessary at all since the list of local + tables for INSERT always consists of one table. + */ + DBUG_ASSERT(!table_list->next_local); + DBUG_ASSERT(!context->table_list->next_local); + DBUG_ASSERT(!context->first_name_resolution_table->next_name_resolution_table); + + /* Save the state of the current name resolution context. */ + ctx_state.save_state(context, table_list); + + /* + Perform name resolution only in the first table - 'table_list', + which is the table that is inserted into. + */ + table_list->next_local= 0; + context->resolve_in_table_list_only(table_list); + switch_to_nullable_trigger_fields(*values, table); + + /* + Check assignability for the leftmost () in VALUES: + INSERT INTO t1 (a,b) VALUES (1,2), (3,4); + This checks if the values (1,2) can be assigned to fields (a,b). + The further values, e.g. (3,4) are not checked - they will be + checked during the execution time (when processing actual rows). + This is to preserve the "insert until the very first error"-style + behaviour for non-transactional tables. + */ + if (values->elements && + table_list->table->check_assignability_opt_fields(fields, *values, + ignore)) + goto abort; + + while ((values= its++)) + { + thd->get_stmt_da()->inc_current_row_for_warning(); + if (values->elements != value_count) + { + my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), + thd->get_stmt_da()->current_row_for_warning()); + goto abort; + } + if (setup_fields(thd, Ref_ptr_array(), + *values, MARK_COLUMNS_READ, 0, NULL, 0)) + goto abort; + switch_to_nullable_trigger_fields(*values, table); + } + its.rewind (); + thd->get_stmt_da()->reset_current_row_for_warning(0); + + /* Restore the current context. */ + ctx_state.restore_state(context, table_list); + + if (thd->lex->unit.first_select()->optimize_unflattened_subqueries(false)) + { + goto abort; + } + save_insert_query_plan(thd, table_list); + if (thd->lex->describe) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + retval= thd->lex->explain->send_explain(thd, extended); + goto abort; + } + + /* + Fill in the given fields and dump it to the table file + */ + info.ignore= ignore; + info.handle_duplicates=duplic; + info.update_fields= &update_fields; + info.update_values= &update_values; + info.view= (table_list->view ? table_list : 0); + info.table_list= table_list; + + /* + Count warnings for all inserts. + For single line insert, generate an error if try to set a NOT NULL field + to NULL. + */ + thd->count_cuted_fields= (values_list.elements == 1 && !ignore) + ? CHECK_FIELD_ERROR_FOR_NULL : CHECK_FIELD_WARN; + thd->cuted_fields = 0L; + table->next_number_field=table->found_next_number_field; + +#ifdef HAVE_REPLICATION + if (thd->rgi_slave && + (info.handle_duplicates == DUP_UPDATE) && + (table->next_number_field != NULL) && + rpl_master_has_bug(thd->rgi_slave->rli, 24432, TRUE, NULL, NULL)) + goto abort; +#endif + + error=0; + if (duplic == DUP_REPLACE && + (!table->triggers || !table->triggers->has_delete_triggers())) + table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + if (duplic == DUP_UPDATE) + table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE); + /* + let's *try* to start bulk inserts. It won't necessary + start them as values_list.elements should be greater than + some - handler dependent - threshold. + We should not start bulk inserts if this statement uses + functions or invokes triggers since they may access + to the same table and therefore should not see its + inconsistent state created by this optimization. + So we call start_bulk_insert to perform nesessary checks on + values_list.elements, and - if nothing else - to initialize + the code to make the call of end_bulk_insert() below safe. + */ +#ifndef EMBEDDED_LIBRARY + if (lock_type != TL_WRITE_DELAYED) +#endif /* EMBEDDED_LIBRARY */ + { + bool create_lookup_handler= duplic != DUP_ERROR; + if (duplic != DUP_ERROR || ignore) + { + create_lookup_handler= true; + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + if (table->file->ha_rnd_init_with_error(0)) + goto abort; + } + } + table->file->prepare_for_insert(create_lookup_handler); + /** + This is a simple check for the case when the table has a trigger + that reads from it, or when the statement invokes a stored function + that reads from the table being inserted to. + Engines can't handle a bulk insert in parallel with a read form the + same table in the same connection. + */ + if (thd->locked_tables_mode <= LTM_LOCK_TABLES && + !table->s->long_unique_table && values_list.elements > 1) + { + using_bulk_insert= 1; + table->file->ha_start_bulk_insert(values_list.elements); + } + else + table->file->ha_reset_copy_info(); + } + + thd->abort_on_warning= !ignore && thd->is_strict_mode(); + + table->reset_default_fields(); + table->prepare_triggers_for_insert_stmt_or_event(); + table->mark_columns_needed_for_insert(); + + if (fields.elements || !value_count || table_list->view != 0) + { + if (table->triggers && + table->triggers->has_triggers(TRG_EVENT_INSERT, TRG_ACTION_BEFORE)) + { + /* BEFORE INSERT triggers exist, the check will be done later, per row */ + } + else if (check_that_all_fields_are_given_values(thd, table, table_list)) + { + error= 1; + goto values_loop_end; + } + } + + if (table_list->prepare_where(thd, 0, TRUE) || + table_list->prepare_check_option(thd)) + error= 1; + + switch_to_nullable_trigger_fields(fields, table); + switch_to_nullable_trigger_fields(update_fields, table); + switch_to_nullable_trigger_fields(update_values, table); + + if (fields.elements || !value_count) + { + /* + There are possibly some default values: + INSERT INTO t1 (fields) VALUES ... + INSERT INTO t1 VALUES () + */ + if (table->validate_default_values_of_unset_fields(thd)) + { + error= 1; + goto values_loop_end; + } + } + /* + If statement returns result set, we need to send the result set metadata + to the client so that it knows that it has to expect an EOF or ERROR. + At this point we have all the required information to send the result set + metadata. + */ + if (returning && + result->send_result_set_metadata(returning->item_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + goto values_loop_end; + + THD_STAGE_INFO(thd, stage_update); + + if (duplic == DUP_UPDATE) + { + restore_record(table,s->default_values); // Get empty record + thd->reconsider_logging_format_for_iodup(table); + } + fix_rownum_pointers(thd, thd->lex->current_select, &info.accepted_rows); + if (returning) + fix_rownum_pointers(thd, thd->lex->returning(), &info.accepted_rows); + + do + { + DBUG_PRINT("info", ("iteration %llu", iteration)); + if (iteration && bulk_parameters_set(thd)) + { + error= 1; + goto values_loop_end; + } + + while ((values= its++)) + { + thd->get_stmt_da()->inc_current_row_for_warning(); + if (fields.elements || !value_count) + { + /* + There are possibly some default values: + INSERT INTO t1 (fields) VALUES ... + INSERT INTO t1 VALUES () + */ + restore_record(table,s->default_values); // Get empty record + table->reset_default_fields(); + if (unlikely(fill_record_n_invoke_before_triggers(thd, table, fields, + *values, 0, + TRG_EVENT_INSERT))) + { + if (values_list.elements != 1 && ! thd->is_error()) + { + info.records++; + continue; + } + /* + TODO: set thd->abort_on_warning if values_list.elements == 1 + and check that all items return warning in case of problem with + storing field. + */ + error=1; + break; + } + } + else + { + /* + No field list, all fields are set explicitly: + INSERT INTO t1 VALUES (values) + */ + if (thd->lex->used_tables || // Column used in values() + table->s->visible_fields != table->s->fields) + restore_record(table,s->default_values); // Get empty record + else + { + TABLE_SHARE *share= table->s; + + /* + Fix delete marker. No need to restore rest of record since it will + be overwritten by fill_record() anyway (and fill_record() does not + use default values in this case). + */ + table->record[0][0]= share->default_values[0]; + + /* Fix undefined null_bits. */ + if (share->null_bytes > 1 && share->last_null_bit_pos) + { + table->record[0][share->null_bytes - 1]= + share->default_values[share->null_bytes - 1]; + } + } + table->reset_default_fields(); + if (unlikely(fill_record_n_invoke_before_triggers(thd, table, + table-> + field_to_fill(), + *values, 0, + TRG_EVENT_INSERT))) + { + if (values_list.elements != 1 && ! thd->is_error()) + { + info.records++; + continue; + } + error=1; + break; + } + } + + /* + with triggers a field can get a value *conditionally*, so we have to + repeat has_no_default_value() check for every row + */ + if (table->triggers && + table->triggers->has_triggers(TRG_EVENT_INSERT, TRG_ACTION_BEFORE)) + { + for (Field **f=table->field ; *f ; f++) + { + if (unlikely(!(*f)->has_explicit_value() && + has_no_default_value(thd, *f, table_list))) + { + error= 1; + goto values_loop_end; + } + } + } + + if ((res= table_list->view_check_option(thd, + (values_list.elements == 1 ? + 0 : + ignore))) == + VIEW_CHECK_SKIP) + continue; + else if (res == VIEW_CHECK_ERROR) + { + error= 1; + break; + } + +#ifndef EMBEDDED_LIBRARY + if (lock_type == TL_WRITE_DELAYED) + { + LEX_STRING const st_query = { query, thd->query_length() }; + DEBUG_SYNC(thd, "before_write_delayed"); + error=write_delayed(thd, table, duplic, st_query, ignore, log_on); + DEBUG_SYNC(thd, "after_write_delayed"); + query=0; + } + else +#endif + error= write_record(thd, table, &info, result); + if (unlikely(error)) + break; + info.accepted_rows++; + } + its.rewind(); + iteration++; + } while (bulk_parameters_iterations(thd)); + +values_loop_end: + free_underlaid_joins(thd, thd->lex->first_select_lex()); + joins_freed= TRUE; + + /* + Now all rows are inserted. Time to update logs and sends response to + user + */ +#ifndef EMBEDDED_LIBRARY + if (unlikely(lock_type == TL_WRITE_DELAYED)) + { + if (likely(!error)) + { + info.copied=values_list.elements; + end_delayed_insert(thd); + } + } + else +#endif + { + /* + Do not do this release if this is a delayed insert, it would steal + auto_inc values from the delayed_insert thread as they share TABLE. + */ + table->file->ha_release_auto_increment(); + if (using_bulk_insert) + { + /* + if my_error() wasn't called yet on some specific row, end_bulk_insert() + can still do it, but the error shouldn't be for any specific row number + */ + if (!error) + thd->get_stmt_da()->reset_current_row_for_warning(0); + if (unlikely(table->file->ha_end_bulk_insert()) && !error) + { + table->file->print_error(my_errno,MYF(0)); + error=1; + } + } + /* Get better status from handler if handler supports it */ + if (table->file->copy_info.records) + { + DBUG_ASSERT(info.copied >= table->file->copy_info.copied); + info.touched= table->file->copy_info.touched; + info.copied= table->file->copy_info.copied; + info.deleted= table->file->copy_info.deleted; + info.updated= table->file->copy_info.updated; + } + if (duplic != DUP_ERROR || ignore) + { + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + table->file->ha_rnd_end(); + } + + transactional_table= table->file->has_transactions_and_rollback(); + + if (likely(changed= (info.copied || info.deleted || info.updated))) + { + /* + Invalidate the table in the query cache if something changed. + For the transactional algorithm to work the invalidation must be + before binlog writing and ha_autocommit_or_rollback + */ + query_cache_invalidate3(thd, table_list, 1); + } + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + + if (error <= 0 || + thd->transaction->stmt.modified_non_trans_table || + thd->log_current_statement() || + was_insert_delayed) + { + if(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + int errcode= 0; + if (error <= 0) + { + /* + [Guilhem wrote] Temporary errors may have filled + thd->net.last_error/errno. For example if there has + been a disk full error when writing the row, and it was + MyISAM, then thd->net.last_error/errno will be set to + "disk full"... and the mysql_file_pwrite() will wait until free + space appears, and so when it finishes then the + write_row() was entirely successful + */ + /* todo: consider removing */ + thd->clear_error(); + } + else + errcode= query_error_code(thd, thd->killed == NOT_KILLED); + + StatementBinlog stmt_binlog(thd, table->versioned(VERS_TRX_ID) || + thd->binlog_need_stmt_format(transactional_table)); + /* bug#22725: + + A query which per-row-loop can not be interrupted with + KILLED, like INSERT, and that does not invoke stored + routines can be binlogged with neglecting the KILLED error. + + If there was no error (error == zero) until after the end of + inserting loop the KILLED flag that appeared later can be + disregarded since previously possible invocation of stored + routines did not result in any error due to the KILLED. In + such case the flag is ignored for constructing binlog event. + */ + DBUG_ASSERT(thd->killed != KILL_BAD_DATA || error > 0); + if (was_insert_delayed && table_list->lock_type == TL_WRITE) + { + /* Binlog INSERT DELAYED as INSERT without DELAYED. */ + String log_query; + if (create_insert_stmt_from_insert_delayed(thd, &log_query)) + { + sql_print_error("Event Error: An error occurred while creating query string" + "for INSERT DELAYED stmt, before writing it into binary log."); + + error= 1; + } + else if (thd->binlog_query(THD::ROW_QUERY_TYPE, + log_query.c_ptr(), log_query.length(), + transactional_table, FALSE, FALSE, + errcode) > 0) + error= 1; + } + else if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query(), thd->query_length(), + transactional_table, FALSE, FALSE, + errcode) > 0) + error= 1; + } + } + DBUG_ASSERT(transactional_table || !changed || + thd->transaction->stmt.modified_non_trans_table); + } + THD_STAGE_INFO(thd, stage_end); + /* + We'll report to the client this id: + - if the table contains an autoincrement column and we successfully + inserted an autogenerated value, the autogenerated value. + - if the table contains no autoincrement column and LAST_INSERT_ID(X) was + called, X. + - if the table contains an autoincrement column, and some rows were + inserted, the id of the last "inserted" row (if IGNORE, that value may not + have been really inserted but ignored). + */ + id= (thd->first_successful_insert_id_in_cur_stmt > 0) ? + thd->first_successful_insert_id_in_cur_stmt : + (thd->arg_of_last_insert_id_function ? + thd->first_successful_insert_id_in_prev_stmt : + ((table->next_number_field && info.copied) ? + table->next_number_field->val_int() : 0)); + table->next_number_field=0; + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + table->auto_increment_field_not_null= FALSE; + if (duplic == DUP_REPLACE && + (!table->triggers || !table->triggers->has_delete_triggers())) + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + + if (unlikely(error)) + goto abort; + if (thd->lex->analyze_stmt) + { + retval= 0; + goto abort; + } + DBUG_PRINT("info", ("touched: %llu copied: %llu updated: %llu deleted: %llu", + (ulonglong) info.touched, (ulonglong) info.copied, + (ulonglong) info.updated, (ulonglong) info.deleted)); + + if ((iteration * values_list.elements) == 1 && + (!(thd->variables.option_bits & OPTION_WARNINGS) || !thd->cuted_fields)) + { + /* + Client expects an EOF/OK packet if result set metadata was sent. If + LEX::has_returning and the statement returns result set + we send EOF which is the indicator of the end of the row stream. + Oherwise we send an OK packet i.e when the statement returns only the + status information + */ + if (returning) + result->send_eof(); + else + my_ok(thd, info.copied + info.deleted + + ((thd->client_capabilities & CLIENT_FOUND_ROWS) ? + info.touched : info.updated), id); + } + else + { + char buff[160]; + ha_rows updated=((thd->client_capabilities & CLIENT_FOUND_ROWS) ? + info.touched : info.updated); + + if (ignore) + sprintf(buff, ER_THD(thd, ER_INSERT_INFO), (ulong) info.records, + (lock_type == TL_WRITE_DELAYED) ? (ulong) 0 : + (ulong) (info.records - info.copied), + (long) thd->get_stmt_da()->current_statement_warn_count()); + else + sprintf(buff, ER_THD(thd, ER_INSERT_INFO), (ulong) info.records, + (ulong) (info.deleted + updated), + (long) thd->get_stmt_da()->current_statement_warn_count()); + if (returning) + result->send_eof(); + else + ::my_ok(thd, info.copied + info.deleted + updated, id, buff); + } + thd->abort_on_warning= 0; + if (thd->lex->current_select->first_cond_optimization) + { + thd->lex->current_select->save_leaf_tables(thd); + thd->lex->current_select->first_cond_optimization= 0; + } + + my_free(readbuff); +#ifndef EMBEDDED_LIBRARY + if (lock_type == TL_WRITE_DELAYED && table->expr_arena) + table->expr_arena->free_items(); +#endif + DBUG_RETURN(FALSE); + +abort: +#ifndef EMBEDDED_LIBRARY + if (lock_type == TL_WRITE_DELAYED) + { + end_delayed_insert(thd); + /* + In case of an error (e.g. data truncation), the data type specific data + in fields (e.g. Field_blob::value) was not taken over + by the delayed writer thread. All fields in table_list->table + will be freed by free_root() soon. We need to free the specific + data before free_root() to avoid a memory leak. + */ + for (Field **ptr= table_list->table->field ; *ptr ; ptr++) + (*ptr)->free(); + if (table_list->table->expr_arena) + table_list->table->expr_arena->free_items(); + } +#endif + if (table != NULL) + table->file->ha_release_auto_increment(); + + if (!joins_freed) + free_underlaid_joins(thd, thd->lex->first_select_lex()); + thd->abort_on_warning= 0; + if (readbuff) + my_free(readbuff); + DBUG_RETURN(retval); +} + + +/* + Additional check for insertability for VIEW + + SYNOPSIS + check_view_insertability() + thd - thread handler + view - reference on VIEW + + IMPLEMENTATION + A view is insertable if the folloings are true: + - All columns in the view are columns from a table + - All not used columns in table have a default values + - All field in view are unique (not referring to the same column) + + RETURN + FALSE - OK + view->contain_auto_increment is 1 if and only if the view contains an + auto_increment field + + TRUE - can't be used for insert +*/ + +static bool check_view_insertability(THD * thd, TABLE_LIST *view) +{ + uint num= view->view->first_select_lex()->item_list.elements; + TABLE *table= view->table; + Field_translator *trans_start= view->field_translation, + *trans_end= trans_start + num; + Field_translator *trans; + uint used_fields_buff_size= bitmap_buffer_size(table->s->fields); + uint32 *used_fields_buff= (uint32*)thd->alloc(used_fields_buff_size); + MY_BITMAP used_fields; + enum_column_usage saved_column_usage= thd->column_usage; + DBUG_ENTER("check_key_in_view"); + + if (!used_fields_buff) + DBUG_RETURN(TRUE); // EOM + + DBUG_ASSERT(view->table != 0 && view->field_translation != 0); + + (void) my_bitmap_init(&used_fields, used_fields_buff, table->s->fields); + bitmap_clear_all(&used_fields); + + view->contain_auto_increment= 0; + /* + we must not set query_id for fields as they're not + really used in this context + */ + thd->column_usage= COLUMNS_WRITE; + /* check simplicity and prepare unique test of view */ + for (trans= trans_start; trans != trans_end; trans++) + { + if (trans->item->fix_fields_if_needed(thd, &trans->item)) + { + thd->column_usage= saved_column_usage; + DBUG_RETURN(TRUE); + } + Item_field *field; + /* simple SELECT list entry (field without expression) */ + if (!(field= trans->item->field_for_view_update())) + { + thd->column_usage= saved_column_usage; + DBUG_RETURN(TRUE); + } + if (field->field->unireg_check == Field::NEXT_NUMBER) + view->contain_auto_increment= 1; + /* prepare unique test */ + /* + remove collation (or other transparent for update function) if we have + it + */ + trans->item= field; + } + thd->column_usage= saved_column_usage; + /* unique test */ + for (trans= trans_start; trans != trans_end; trans++) + { + /* Thanks to test above, we know that all columns are of type Item_field */ + Item_field *field= (Item_field *)trans->item; + /* check fields belong to table in which we are inserting */ + if (field->field->table == table && + bitmap_fast_test_and_set(&used_fields, field->field->field_index)) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/** + TODO remove when MDEV-17395 will be closed + + Checks if REPLACE or ON DUPLICATE UPDATE was executed on table containing + WITHOUT OVERLAPS key. + + @return + 0 if no error + ER_NOT_SUPPORTED_YET if the above condidion was met + */ +int check_duplic_insert_without_overlaps(THD *thd, TABLE *table, + enum_duplicates duplic) +{ + if (duplic == DUP_REPLACE || duplic == DUP_UPDATE) + { + for (uint k = 0; k < table->s->keys; k++) + { + if (table->key_info[k].without_overlaps) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "WITHOUT OVERLAPS"); + return ER_NOT_SUPPORTED_YET; + } + } + } + return 0; +} + +/* + Check if table can be updated + + SYNOPSIS + mysql_prepare_insert_check_table() + thd Thread handle + table_list Table list + fields List of fields to be updated + where Pointer to where clause + select_insert Check is making for SELECT ... INSERT + + RETURN + FALSE ok + TRUE ERROR +*/ + +static bool mysql_prepare_insert_check_table(THD *thd, TABLE_LIST *table_list, + List &fields, + bool select_insert) +{ + bool insert_into_view= (table_list->view != 0); + DBUG_ENTER("mysql_prepare_insert_check_table"); + + if (!table_list->single_table_updatable()) + { + my_error(ER_NON_INSERTABLE_TABLE, MYF(0), table_list->alias.str, "INSERT"); + DBUG_RETURN(TRUE); + } + /* + first table in list is the one we'll INSERT into, requires INSERT_ACL. + all others require SELECT_ACL only. the ACL requirement below is for + new leaves only anyway (view-constituents), so check for SELECT rather + than INSERT. + */ + + if (setup_tables_and_check_access(thd, + &thd->lex->first_select_lex()->context, + &thd->lex->first_select_lex()-> + top_join_list, + table_list, + thd->lex->first_select_lex()->leaf_tables, + select_insert, INSERT_ACL, SELECT_ACL, + TRUE)) + DBUG_RETURN(TRUE); + + if (insert_into_view && !fields.elements) + { + thd->lex->empty_field_list_on_rset= 1; + if (!table_list->table || table_list->is_multitable()) + { + my_error(ER_VIEW_NO_INSERT_FIELD_LIST, MYF(0), + table_list->view_db.str, table_list->view_name.str); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(insert_view_fields(thd, &fields, table_list)); + } + + DBUG_RETURN(FALSE); +} + + +/* + Get extra info for tables we insert into + + @param table table(TABLE object) we insert into, + might be NULL in case of view + @param table(TABLE_LIST object) or view we insert into +*/ + +static void prepare_for_positional_update(TABLE *table, TABLE_LIST *tables) +{ + if (table) + { + if(table->reginfo.lock_type != TL_WRITE_DELAYED) + table->prepare_for_position(); + return; + } + + DBUG_ASSERT(tables->view); + List_iterator it(*tables->view_tables); + TABLE_LIST *tbl; + while ((tbl= it++)) + prepare_for_positional_update(tbl->table, tbl); + + return; +} + + +/* + Prepare items in INSERT statement + + SYNOPSIS + mysql_prepare_insert() + thd Thread handler + table_list Global/local table list + where Where clause (for insert ... select) + select_insert TRUE if INSERT ... SELECT statement + + TODO (in far future) + In cases of: + INSERT INTO t1 SELECT a, sum(a) as sum1 from t2 GROUP BY a + ON DUPLICATE KEY ... + we should be able to refer to sum1 in the ON DUPLICATE KEY part + + WARNING + You MUST set table->insert_values to 0 after calling this function + before releasing the table object. + + RETURN VALUE + 0 OK + >0 error + <0 insert should be ignored +*/ + +int mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, + List &fields, List_item *values, + List &update_fields, List &update_values, + enum_duplicates duplic, bool ignore, + COND **where, + bool select_insert) +{ + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + Name_resolution_context *context= &select_lex->context; + Name_resolution_context_state ctx_state; + bool insert_into_view= (table_list->view != 0); + bool res= 0; + table_map map= 0; + TABLE *table; + DBUG_ENTER("mysql_prepare_insert"); + DBUG_PRINT("enter", ("table_list: %p view: %d", + table_list, (int) insert_into_view)); + /* INSERT should have a SELECT or VALUES clause */ + DBUG_ASSERT (!select_insert || !values); + + if (mysql_handle_derived(thd->lex, DT_INIT)) + DBUG_RETURN(1); + if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(1); + if (thd->lex->handle_list_of_derived(table_list, DT_PREPARE)) + DBUG_RETURN(1); + + if (duplic == DUP_UPDATE) + { + /* it should be allocated before Item::fix_fields() */ + if (table_list->set_insert_values(thd->mem_root)) + DBUG_RETURN(1); + } + + table= table_list->table; + + if (table->file->check_if_updates_are_ignored("INSERT")) + DBUG_RETURN(-1); + + if (mysql_prepare_insert_check_table(thd, table_list, fields, select_insert)) + DBUG_RETURN(1); + + /* Prepare the fields in the statement. */ + if (values) + { + /* if we have INSERT ... VALUES () we cannot have a GROUP BY clause */ + DBUG_ASSERT (!select_lex->group_list.elements); + + /* Save the state of the current name resolution context. */ + ctx_state.save_state(context, table_list); + + /* + Perform name resolution only in the first table - 'table_list', + which is the table that is inserted into. + */ + table_list->next_local= 0; + context->resolve_in_table_list_only(table_list); + + res= setup_returning_fields(thd, table_list) || + setup_fields(thd, Ref_ptr_array(), + *values, MARK_COLUMNS_READ, 0, NULL, 0) || + check_insert_fields(thd, context->table_list, fields, *values, + !insert_into_view, 0, &map); + + if (!res) + res= setup_fields(thd, Ref_ptr_array(), + update_values, MARK_COLUMNS_READ, 0, NULL, 0); + + if (!res && duplic == DUP_UPDATE) + { + select_lex->no_wrap_view_item= TRUE; + res= check_update_fields(thd, context->table_list, update_fields, + update_values, false, &map) || + /* + Check that all col=expr pairs are compatible for assignment in + INSERT INTO t1 VALUES (...) + ON DUPLICATE KEY UPDATE col=expr [, col=expr]; + */ + TABLE::check_assignability_explicit_fields(update_fields, + update_values, + ignore); + + select_lex->no_wrap_view_item= FALSE; + } + + /* Restore the current context. */ + ctx_state.restore_state(context, table_list); + } + + thd->get_stmt_da()->reset_current_row_for_warning(1); + + if (res) + DBUG_RETURN(res); + + if (check_duplic_insert_without_overlaps(thd, table, duplic) != 0) + DBUG_RETURN(true); + + if (table->versioned(VERS_TIMESTAMP)) + { + // Additional memory may be required to create historical items. + if (duplic == DUP_REPLACE && table_list->set_insert_values(thd->mem_root)) + DBUG_RETURN(1); + + Field *row_start= table->vers_start_field(); + Field *row_end= table->vers_end_field(); + if (!fields.elements && !(row_start->invisible && row_end->invisible)) + thd->vers_insert_history(row_start); // check privileges + } + + if (!select_insert) + { + Item *fake_conds= 0; + TABLE_LIST *duplicate; + if ((duplicate= unique_table(thd, table_list, table_list->next_global, + CHECK_DUP_ALLOW_DIFFERENT_ALIAS))) + { + update_non_unique_table_error(table_list, "INSERT", duplicate); + DBUG_RETURN(1); + } + select_lex->fix_prepare_information(thd, &fake_conds, &fake_conds); + } + /* + Only call prepare_for_posistion() if we are not performing a DELAYED + operation. It will instead be executed by delayed insert thread. + */ + if (duplic == DUP_UPDATE || duplic == DUP_REPLACE) + prepare_for_positional_update(table, table_list); + DBUG_RETURN(0); +} + + + /* Check if there is more uniq keys after field */ + +static int last_uniq_key(TABLE *table,uint keynr) +{ + /* + When an underlying storage engine informs that the unique key + conflicts are not reported in the ascending order by setting + the HA_DUPLICATE_KEY_NOT_IN_ORDER flag, we cannot rely on this + information to determine the last key conflict. + + The information about the last key conflict will be used to + do a replace of the new row on the conflicting row, rather + than doing a delete (of old row) + insert (of new row). + + Hence check for this flag and disable replacing the last row + by returning 0 always. Returning 0 will result in doing + a delete + insert always. + */ + if (table->file->ha_table_flags() & HA_DUPLICATE_KEY_NOT_IN_ORDER) + return 0; + + while (++keynr < table->s->keys) + if (table->key_info[keynr].flags & HA_NOSAME) + return 0; + return 1; +} + + +/* + Inserts one historical row to a table. + + Copies content of the row from table->record[1] to table->record[0], + sets Sys_end to now() and calls ha_write_row() . +*/ + +int vers_insert_history_row(TABLE *table) +{ + DBUG_ASSERT(table->versioned(VERS_TIMESTAMP)); + DBUG_ASSERT(table->vers_write); + restore_record(table,record[1]); + + // Set Sys_end to now() + table->vers_update_end(); + + Field *row_start= table->vers_start_field(); + Field *row_end= table->vers_end_field(); + if (row_start->cmp(row_start->ptr, row_end->ptr) >= 0) + return 0; + + if (table->vfield && + table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_READ)) + return HA_ERR_GENERIC; + + return table->file->ha_write_row(table->record[0]); +} + +/* + Write a record to table with optional deleting of conflicting records, + invoke proper triggers if needed. + + SYNOPSIS + write_record() + thd - thread context + table - table to which record should be written + info - COPY_INFO structure describing handling of duplicates + and which is used for counting number of records inserted + and deleted. + sink - result sink for the RETURNING clause + + NOTE + Once this record will be written to table after insert trigger will + be invoked. If instead of inserting new record we will update old one + then both on update triggers will work instead. Similarly both on + delete triggers will be invoked if we will delete conflicting records. + + Sets thd->transaction.stmt.modified_non_trans_table to TRUE if table which + is updated didn't have transactions. + + RETURN VALUE + 0 - success + non-0 - error +*/ + + +int write_record(THD *thd, TABLE *table, COPY_INFO *info, select_result *sink) +{ + int error, trg_error= 0; + char *key=0; + MY_BITMAP *save_read_set, *save_write_set; + ulonglong prev_insert_id= table->file->next_insert_id; + ulonglong insert_id_for_cur_row= 0; + ulonglong prev_insert_id_for_cur_row= 0; + DBUG_ENTER("write_record"); + + info->records++; + save_read_set= table->read_set; + save_write_set= table->write_set; + + DBUG_EXECUTE_IF("rpl_write_record_small_sleep_gtid_100_200", + { + if (thd->rgi_slave && (thd->rgi_slave->current_gtid.seq_no == 100 || + thd->rgi_slave->current_gtid.seq_no == 200)) + my_sleep(20000); + }); + if (info->handle_duplicates == DUP_REPLACE || + info->handle_duplicates == DUP_UPDATE) + { + while (unlikely(error=table->file->ha_write_row(table->record[0]))) + { + uint key_nr; + /* + If we do more than one iteration of this loop, from the second one the + row will have an explicit value in the autoinc field, which was set at + the first call of handler::update_auto_increment(). So we must save + the autogenerated value to avoid thd->insert_id_for_cur_row to become + 0. + */ + if (table->file->insert_id_for_cur_row > 0) + insert_id_for_cur_row= table->file->insert_id_for_cur_row; + else + table->file->insert_id_for_cur_row= insert_id_for_cur_row; + bool is_duplicate_key_error; + if (table->file->is_fatal_error(error, HA_CHECK_ALL)) + goto err; + is_duplicate_key_error= + table->file->is_fatal_error(error, HA_CHECK_ALL & ~HA_CHECK_DUP); + if (!is_duplicate_key_error) + { + /* + We come here when we had an ignorable error which is not a duplicate + key error. In this we ignore error if ignore flag is set, otherwise + report error as usual. We will not do any duplicate key processing. + */ + if (info->ignore) + { + table->file->print_error(error, MYF(ME_WARNING)); + goto after_trg_or_ignored_err; /* Ignoring a not fatal error */ + } + goto err; + } + if (unlikely((int) (key_nr = table->file->get_dup_key(error)) < 0)) + { + error= HA_ERR_FOUND_DUPP_KEY; /* Database can't find key */ + goto err; + } + DEBUG_SYNC(thd, "write_row_replace"); + + /* Read all columns for the row we are going to replace */ + table->use_all_columns(); + /* + Don't allow REPLACE to replace a row when a auto_increment column + was used. This ensures that we don't get a problem when the + whole range of the key has been used. + */ + if (info->handle_duplicates == DUP_REPLACE && table->next_number_field && + key_nr == table->s->next_number_index && insert_id_for_cur_row > 0) + goto err; + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + DBUG_ASSERT(table->file->inited == handler::RND); + if (table->file->ha_rnd_pos(table->record[1],table->file->dup_ref)) + goto err; + } + else + { + if (table->file->extra(HA_EXTRA_FLUSH_CACHE)) /* Not needed with NISAM */ + { + error=my_errno; + goto err; + } + + if (!key) + { + if (!(key=(char*) my_safe_alloca(table->s->max_unique_length))) + { + error=ENOMEM; + goto err; + } + } + key_copy((uchar*) key,table->record[0],table->key_info+key_nr,0); + key_part_map keypart_map= (1 << table->key_info[key_nr].user_defined_key_parts) - 1; + if ((error= (table->file->ha_index_read_idx_map(table->record[1], + key_nr, (uchar*) key, + keypart_map, + HA_READ_KEY_EXACT)))) + goto err; + } + if (table->vfield) + { + /* + We have not yet called update_virtual_fields(VOL_UPDATE_FOR_READ) + in handler methods for the just read row in record[1]. + */ + table->move_fields(table->field, table->record[1], table->record[0]); + int verr = table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_REPLACE); + table->move_fields(table->field, table->record[0], table->record[1]); + if (verr) + goto err; + } + if (info->handle_duplicates == DUP_UPDATE) + { + int res= 0; + /* + We don't check for other UNIQUE keys - the first row + that matches, is updated. If update causes a conflict again, + an error is returned + */ + DBUG_ASSERT(table->insert_values != NULL); + store_record(table,insert_values); + restore_record(table,record[1]); + table->reset_default_fields(); + + /* + in INSERT ... ON DUPLICATE KEY UPDATE the set of modified fields can + change per row. Thus, we have to do reset_default_fields() per row. + Twice (before insert and before update). + */ + DBUG_ASSERT(info->update_fields->elements == + info->update_values->elements); + if (fill_record_n_invoke_before_triggers(thd, table, + *info->update_fields, + *info->update_values, + info->ignore, + TRG_EVENT_UPDATE)) + goto before_trg_err; + + bool different_records= (!records_are_comparable(table) || + compare_record(table)); + /* + Default fields must be updated before checking view updateability. + This branch of INSERT is executed only when a UNIQUE key was violated + with the ON DUPLICATE KEY UPDATE option. In this case the INSERT + operation is transformed to an UPDATE, and the default fields must + be updated as if this is an UPDATE. + */ + if (different_records && table->default_field) + table->evaluate_update_default_function(); + + /* CHECK OPTION for VIEW ... ON DUPLICATE KEY UPDATE ... */ + res= info->table_list->view_check_option(table->in_use, info->ignore); + if (res == VIEW_CHECK_SKIP) + goto after_trg_or_ignored_err; + if (res == VIEW_CHECK_ERROR) + goto before_trg_err; + + table->file->restore_auto_increment(prev_insert_id); + info->touched++; + if (different_records) + { + if (unlikely(error=table->file->ha_update_row(table->record[1], + table->record[0])) && + error != HA_ERR_RECORD_IS_THE_SAME) + { + if (info->ignore && + !table->file->is_fatal_error(error, HA_CHECK_ALL)) + { + if (!(thd->variables.old_behavior & + OLD_MODE_NO_DUP_KEY_WARNINGS_WITH_IGNORE)) + table->file->print_error(error, MYF(ME_WARNING)); + goto after_trg_or_ignored_err; + } + goto err; + } + + if (error != HA_ERR_RECORD_IS_THE_SAME) + { + info->updated++; + if (table->versioned() && + table->vers_check_update(*info->update_fields)) + { + if (table->versioned(VERS_TIMESTAMP)) + { + store_record(table, record[2]); + if ((error= vers_insert_history_row(table))) + { + info->last_errno= error; + table->file->print_error(error, MYF(0)); + trg_error= 1; + restore_record(table, record[2]); + goto after_trg_or_ignored_err; + } + restore_record(table, record[2]); + } + info->copied++; + } + } + else + error= 0; + /* + If ON DUP KEY UPDATE updates a row instead of inserting + one, it's like a regular UPDATE statement: it should not + affect the value of a next SELECT LAST_INSERT_ID() or + mysql_insert_id(). Except if LAST_INSERT_ID(#) was in the + INSERT query, which is handled separately by + THD::arg_of_last_insert_id_function. + */ + prev_insert_id_for_cur_row= table->file->insert_id_for_cur_row; + insert_id_for_cur_row= table->file->insert_id_for_cur_row= 0; + trg_error= (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_UPDATE, + TRG_ACTION_AFTER, TRUE)); + info->copied++; + } + + /* + Only update next_insert_id if the AUTO_INCREMENT value was explicitly + updated, so we don't update next_insert_id with the value from the + row being updated. Otherwise reset next_insert_id to what it was + before the duplicate key error, since that value is unused. + */ + if (table->next_number_field_updated) + { + DBUG_ASSERT(table->next_number_field != NULL); + + table->file->adjust_next_insert_id_after_explicit_value(table->next_number_field->val_int()); + } + else if (prev_insert_id_for_cur_row) + { + table->file->restore_auto_increment(prev_insert_id_for_cur_row); + } + goto ok; + } + else /* DUP_REPLACE */ + { + /* + The manual defines the REPLACE semantics that it is either + an INSERT or DELETE(s) + INSERT; FOREIGN KEY checks in + InnoDB do not function in the defined way if we allow MySQL + to convert the latter operation internally to an UPDATE. + We also should not perform this conversion if we have + timestamp field with ON UPDATE which is different from DEFAULT. + Another case when conversion should not be performed is when + we have ON DELETE trigger on table so user may notice that + we cheat here. Note that it is ok to do such conversion for + tables which have ON UPDATE but have no ON DELETE triggers, + we just should not expose this fact to users by invoking + ON UPDATE triggers. + */ + if (last_uniq_key(table,key_nr) && + !table->file->referenced_by_foreign_key() && + (!table->triggers || !table->triggers->has_delete_triggers())) + { + if (table->versioned(VERS_TRX_ID)) + { + DBUG_ASSERT(table->vers_write); + bitmap_set_bit(table->write_set, table->vers_start_field()->field_index); + table->file->column_bitmaps_signal(); + table->vers_start_field()->store(0, false); + } + if (unlikely(error= table->file->ha_update_row(table->record[1], + table->record[0])) && + error != HA_ERR_RECORD_IS_THE_SAME) + goto err; + if (likely(!error)) + { + info->deleted++; + if (!table->file->has_transactions()) + thd->transaction->stmt.modified_non_trans_table= TRUE; + if (table->versioned(VERS_TIMESTAMP) && table->vers_write) + { + store_record(table, record[2]); + error= vers_insert_history_row(table); + restore_record(table, record[2]); + if (unlikely(error)) + goto err; + } + } + else + error= 0; // error was HA_ERR_RECORD_IS_THE_SAME + /* + Since we pretend that we have done insert we should call + its after triggers. + */ + goto after_trg_n_copied_inc; + } + else + { + if (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_BEFORE, TRUE)) + goto before_trg_err; + + if (!table->versioned(VERS_TIMESTAMP)) + error= table->file->ha_delete_row(table->record[1]); + else + { + store_record(table, record[2]); + restore_record(table, record[1]); + table->vers_update_end(); + error= table->file->ha_update_row(table->record[1], + table->record[0]); + restore_record(table, record[2]); + } + if (unlikely(error)) + goto err; + if (!table->versioned(VERS_TIMESTAMP)) + info->deleted++; + else + info->updated++; + if (!table->file->has_transactions_and_rollback()) + thd->transaction->stmt.modified_non_trans_table= TRUE; + if (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_DELETE, + TRG_ACTION_AFTER, TRUE)) + { + trg_error= 1; + goto after_trg_or_ignored_err; + } + /* Let us attempt do write_row() once more */ + } + } + } + + /* + If more than one iteration of the above while loop is done, from + the second one the row being inserted will have an explicit + value in the autoinc field, which was set at the first call of + handler::update_auto_increment(). This value is saved to avoid + thd->insert_id_for_cur_row becoming 0. Use this saved autoinc + value. + */ + if (table->file->insert_id_for_cur_row == 0) + table->file->insert_id_for_cur_row= insert_id_for_cur_row; + + /* + Restore column maps if they where replaced during an duplicate key + problem. + */ + if (table->read_set != save_read_set || + table->write_set != save_write_set) + table->column_bitmaps_set(save_read_set, save_write_set); + } + else if (unlikely((error=table->file->ha_write_row(table->record[0])))) + { + DEBUG_SYNC(thd, "write_row_noreplace"); + if (!info->ignore || + table->file->is_fatal_error(error, HA_CHECK_ALL)) + goto err; + if (!(thd->variables.old_behavior & + OLD_MODE_NO_DUP_KEY_WARNINGS_WITH_IGNORE)) + table->file->print_error(error, MYF(ME_WARNING)); + table->file->restore_auto_increment(prev_insert_id); + goto after_trg_or_ignored_err; + } + +after_trg_n_copied_inc: + info->copied++; + thd->record_first_successful_insert_id_in_cur_stmt(table->file->insert_id_for_cur_row); + trg_error= (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_INSERT, + TRG_ACTION_AFTER, TRUE)); + +ok: + /* + We send the row after writing it to the table so that the + correct values are sent to the client. Otherwise it won't show + autoinc values (generated inside the handler::ha_write()) and + values updated in ON DUPLICATE KEY UPDATE. + */ + if (sink) + { + if (sink->send_data(thd->lex->returning()->item_list) < 0) + trg_error= 1; + } + +after_trg_or_ignored_err: + if (key) + my_safe_afree(key,table->s->max_unique_length); + if (!table->file->has_transactions_and_rollback()) + thd->transaction->stmt.modified_non_trans_table= TRUE; + DBUG_RETURN(trg_error); + +err: + info->last_errno= error; + table->file->print_error(error,MYF(0)); + +before_trg_err: + table->file->restore_auto_increment(prev_insert_id); + if (key) + my_safe_afree(key, table->s->max_unique_length); + table->column_bitmaps_set(save_read_set, save_write_set); + DBUG_RETURN(1); +} + + +/****************************************************************************** + Check that there aren't any null_fields +******************************************************************************/ + + +int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, TABLE_LIST *table_list) +{ + int err= 0; + MY_BITMAP *write_set= entry->write_set; + + for (Field **field=entry->field ; *field ; field++) + { + if (!bitmap_is_set(write_set, (*field)->field_index) && + !(*field)->vcol_info && + has_no_default_value(thd, *field, table_list)) + err=1; + } + return thd->abort_on_warning ? err : 0; +} + +/***************************************************************************** + Handling of delayed inserts + A thread is created for each table that one uses with the DELAYED attribute. +*****************************************************************************/ + +#ifndef EMBEDDED_LIBRARY + +class delayed_row :public ilink { +public: + char *record; + enum_duplicates dup; + my_time_t start_time; + ulong start_time_sec_part; + sql_mode_t sql_mode; + bool auto_increment_field_not_null; + bool ignore, log_query; + THD::used_t query_start_sec_part_used; + bool stmt_depends_on_first_successful_insert_id_in_prev_stmt; + ulonglong first_successful_insert_id_in_prev_stmt; + ulonglong forced_insert_id; + ulong auto_increment_increment; + ulong auto_increment_offset; + LEX_STRING query; + Time_zone *time_zone; + char *user, *host, *ip; + query_id_t query_id; + my_thread_id thread_id; + + delayed_row(LEX_STRING const query_arg, enum_duplicates dup_arg, + bool ignore_arg, bool log_query_arg) + : record(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg), + forced_insert_id(0), query(query_arg), time_zone(0), + user(0), host(0), ip(0) + {} + ~delayed_row() + { + my_free(query.str); + my_free(record); + } +}; + +/** + Delayed_insert - context of a thread responsible for delayed insert + into one table. When processing delayed inserts, we create an own + thread for every distinct table. Later on all delayed inserts directed + into that table are handled by a dedicated thread. +*/ + +class Delayed_insert :public ilink { + uint locks_in_memory; + thr_lock_type delayed_lock; +public: + THD thd; + TABLE *table; + mysql_mutex_t mutex; + mysql_cond_t cond, cond_client; + uint tables_in_use, stacked_inserts; + volatile bool status; + bool retry; + /** + When the handler thread starts, it clones a metadata lock ticket + which protects against GRL and ticket for the table to be inserted. + This is done to allow the deadlock detector to detect deadlocks + resulting from these locks. + Before this is done, the connection thread cannot safely exit + without causing problems for clone_ticket(). + Once handler_thread_initialized has been set, it is safe for the + connection thread to exit. + Access to handler_thread_initialized is protected by di->mutex. + */ + bool handler_thread_initialized; + COPY_INFO info; + I_List rows; + ulong group_count; + TABLE_LIST table_list; // Argument + /** + Request for IX metadata lock protecting against GRL which is + passed from connection thread to the handler thread. + */ + MDL_request grl_protection; + Delayed_insert(SELECT_LEX *current_select) + :locks_in_memory(0), thd(next_thread_id()), + table(0),tables_in_use(0), stacked_inserts(0), + status(0), retry(0), handler_thread_initialized(FALSE), group_count(0) + { + DBUG_ENTER("Delayed_insert constructor"); + thd.security_ctx->user=(char*) delayed_user; + thd.security_ctx->host=(char*) my_localhost; + thd.security_ctx->ip= NULL; + thd.query_id= 0; + strmake_buf(thd.security_ctx->priv_user, thd.security_ctx->user); + thd.current_tablenr=0; + thd.set_command(COM_DELAYED_INSERT); + thd.lex->current_select= current_select; + thd.lex->sql_command= SQLCOM_INSERT; // For innodb::store_lock() + /* + Prevent changes to global.lock_wait_timeout from affecting + delayed insert threads as any timeouts in delayed inserts + are not communicated to the client. + */ + thd.variables.lock_wait_timeout= LONG_TIMEOUT; + + bzero((char*) &thd.net, sizeof(thd.net)); // Safety + bzero((char*) &table_list, sizeof(table_list)); // Safety + thd.system_thread= SYSTEM_THREAD_DELAYED_INSERT; + thd.security_ctx->host_or_ip= ""; + bzero((char*) &info,sizeof(info)); + mysql_mutex_init(key_delayed_insert_mutex, &mutex, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_delayed_insert_cond, &cond, NULL); + mysql_cond_init(key_delayed_insert_cond_client, &cond_client, NULL); + mysql_mutex_lock(&LOCK_delayed_insert); + delayed_insert_threads++; + mysql_mutex_unlock(&LOCK_delayed_insert); + delayed_lock= global_system_variables.low_priority_updates ? + TL_WRITE_LOW_PRIORITY : TL_WRITE; + DBUG_VOID_RETURN; + } + ~Delayed_insert() + { + /* The following is not really needed, but just for safety */ + delayed_row *row; + while ((row=rows.get())) + delete row; + if (table) + { + close_thread_tables(&thd); + thd.mdl_context.release_transactional_locks(&thd); + } + mysql_mutex_destroy(&mutex); + mysql_cond_destroy(&cond); + mysql_cond_destroy(&cond_client); + + server_threads.erase(&thd); + mysql_mutex_assert_owner(&LOCK_delayed_insert); + delayed_insert_threads--; + + my_free(thd.query()); + thd.security_ctx->user= 0; + thd.security_ctx->host= 0; + } + + /* The following is for checking when we can delete ourselves */ + inline void lock() + { + locks_in_memory++; // Assume LOCK_delay_insert + } + void unlock() + { + mysql_mutex_lock(&LOCK_delayed_insert); + if (!--locks_in_memory) + { + mysql_mutex_lock(&mutex); + if (thd.killed && ! stacked_inserts && ! tables_in_use) + { + mysql_cond_signal(&cond); + status=1; + } + mysql_mutex_unlock(&mutex); + } + mysql_mutex_unlock(&LOCK_delayed_insert); + } + inline uint lock_count() { return locks_in_memory; } + + TABLE* get_local_table(THD* client_thd); + bool open_and_lock_table(); + bool handle_inserts(void); +}; + + +I_List delayed_threads; + + +/** + Return an instance of delayed insert thread that can handle + inserts into a given table, if it exists. Otherwise return NULL. +*/ + +static +Delayed_insert *find_handler(THD *thd, TABLE_LIST *table_list) +{ + THD_STAGE_INFO(thd, stage_waiting_for_delay_list); + mysql_mutex_lock(&LOCK_delayed_insert); // Protect master list + I_List_iterator it(delayed_threads); + Delayed_insert *di; + while ((di= it++)) + { + if (!cmp(&table_list->db, &di->table_list.db) && + !cmp(&table_list->table_name, &di->table_list.table_name)) + { + di->lock(); + break; + } + } + mysql_mutex_unlock(&LOCK_delayed_insert); // For unlink from list + return di; +} + + +/** + Attempt to find or create a delayed insert thread to handle inserts + into this table. + + @return In case of success, table_list->table points to a local copy + of the delayed table or is set to NULL, which indicates a + request for lock upgrade. In case of failure, value of + table_list->table is undefined. + @retval TRUE - this thread ran out of resources OR + - a newly created delayed insert thread ran out of + resources OR + - the created thread failed to open and lock the table + (e.g. because it does not exist) OR + - the table opened in the created thread turned out to + be a view + @retval FALSE - table successfully opened OR + - too many delayed insert threads OR + - the table has triggers and we have to fall back to + a normal INSERT + Two latter cases indicate a request for lock upgrade. + + XXX: why do we regard INSERT DELAYED into a view as an error and + do not simply perform a lock upgrade? + + TODO: The approach with using two mutexes to work with the + delayed thread list -- LOCK_delayed_insert and + LOCK_delayed_create -- is redundant, and we only need one of + them to protect the list. The reason we have two locks is that + we do not want to block look-ups in the list while we're waiting + for the newly created thread to open the delayed table. However, + this wait itself is redundant -- we always call get_local_table + later on, and there wait again until the created thread acquires + a table lock. + + As is redundant the concept of locks_in_memory, since we already + have another counter with similar semantics - tables_in_use, + both of them are devoted to counting the number of producers for + a given consumer (delayed insert thread), only at different + stages of producer-consumer relationship. + + The 'status' variable in Delayed_insert is redundant + too, since there is already di->stacked_inserts. +*/ + +static +bool delayed_get_table(THD *thd, MDL_request *grl_protection_request, + TABLE_LIST *table_list) +{ + int error; + Delayed_insert *di; + DBUG_ENTER("delayed_get_table"); + + /* Must be set in the parser */ + DBUG_ASSERT(table_list->db.str); + + /* Find the thread which handles this table. */ + if (!(di= find_handler(thd, table_list))) + { + /* + No match. Create a new thread to handle the table, but + no more than max_insert_delayed_threads. + */ + if (delayed_insert_threads >= thd->variables.max_insert_delayed_threads) + DBUG_RETURN(0); + THD_STAGE_INFO(thd, stage_creating_delayed_handler); + mysql_mutex_lock(&LOCK_delayed_create); + /* + The first search above was done without LOCK_delayed_create. + Another thread might have created the handler in between. Search again. + */ + if (! (di= find_handler(thd, table_list))) + { + if (!(di= new Delayed_insert(thd->lex->current_select))) + goto end_create; + + /* + Annotating delayed inserts is not supported. + */ + di->thd.variables.binlog_annotate_row_events= 0; + + di->thd.set_db(&table_list->db); + di->thd.set_query(my_strndup(PSI_INSTRUMENT_ME, + table_list->table_name.str, + table_list->table_name.length, + MYF(MY_WME | ME_FATAL)), + table_list->table_name.length, system_charset_info); + if (di->thd.db.str == NULL || di->thd.query() == NULL) + { + /* The error is reported */ + delete di; + goto end_create; + } + di->table_list= *table_list; // Needed to open table + /* Replace volatile strings with local copies */ + di->table_list.alias.str= di->table_list.table_name.str= di->thd.query(); + di->table_list.alias.length= di->table_list.table_name.length= di->thd.query_length(); + di->table_list.db= di->thd.db; + /* + Nulify select_lex because, if the thread that spawned the current one + disconnects, the select_lex will point to freed memory. + */ + di->table_list.select_lex= NULL; + /* + We need the tickets so that they can be cloned in + handle_delayed_insert + */ + MDL_REQUEST_INIT(&di->grl_protection, MDL_key::BACKUP, "", "", + MDL_BACKUP_DML, MDL_STATEMENT); + di->grl_protection.ticket= grl_protection_request->ticket; + init_mdl_requests(&di->table_list); + di->table_list.mdl_request.ticket= table_list->mdl_request.ticket; + + di->lock(); + mysql_mutex_lock(&di->mutex); + if ((error= mysql_thread_create(key_thread_delayed_insert, + &di->thd.real_id, &connection_attrib, + handle_delayed_insert, (void*) di))) + { + DBUG_PRINT("error", + ("Can't create thread to handle delayed insert (error %d)", + error)); + mysql_mutex_unlock(&di->mutex); + di->unlock(); + delete di; + my_error(ER_CANT_CREATE_THREAD, MYF(ME_FATAL), error); + goto end_create; + } + + /* + Wait until table is open unless the handler thread or the connection + thread has been killed. Note that we in all cases must wait until the + handler thread has been properly initialized before exiting. Otherwise + we risk doing clone_ticket() on a ticket that is no longer valid. + */ + THD_STAGE_INFO(thd, stage_waiting_for_handler_open); + while (!di->handler_thread_initialized || + (!di->thd.killed && !di->table && !thd->killed)) + { + mysql_cond_wait(&di->cond_client, &di->mutex); + } + mysql_mutex_unlock(&di->mutex); + THD_STAGE_INFO(thd, stage_got_old_table); + if (thd->killed) + { + di->unlock(); + goto end_create; + } + if (di->thd.killed) + { + if (di->thd.is_error() && ! di->retry) + { + /* + Copy the error message. Note that we don't treat fatal + errors in the delayed thread as fatal errors in the + main thread. If delayed thread was killed, we don't + want to send "Server shutdown in progress" in the + INSERT THREAD. + */ + my_message(di->thd.get_stmt_da()->sql_errno(), + di->thd.get_stmt_da()->message(), + MYF(0)); + } + di->unlock(); + goto end_create; + } + mysql_mutex_lock(&LOCK_delayed_insert); + delayed_threads.append(di); + mysql_mutex_unlock(&LOCK_delayed_insert); + } + mysql_mutex_unlock(&LOCK_delayed_create); + } + + mysql_mutex_lock(&di->mutex); + table_list->table= di->get_local_table(thd); + mysql_mutex_unlock(&di->mutex); + if (table_list->table) + { + DBUG_ASSERT(! thd->is_error()); + thd->di= di; + } + /* Unlock the delayed insert object after its last access. */ + di->unlock(); + DBUG_PRINT("exit", ("table_list->table: %p", table_list->table)); + DBUG_RETURN(thd->is_error()); + +end_create: + mysql_mutex_unlock(&LOCK_delayed_create); + DBUG_PRINT("exit", ("is_error(): %d", thd->is_error())); + DBUG_RETURN(thd->is_error()); +} + +#define memdup_vcol(thd, vcol) \ + if (vcol) \ + { \ + (vcol)= (Virtual_column_info*)(thd)->memdup((vcol), sizeof(*(vcol))); \ + (vcol)->expr= NULL; \ + } + +/** + As we can't let many client threads modify the same TABLE + structure of the dedicated delayed insert thread, we create an + own structure for each client thread. This includes a row + buffer to save the column values and new fields that point to + the new row buffer. The memory is allocated in the client + thread and is freed automatically. + + @pre This function is called from the client thread. Delayed + insert thread mutex must be acquired before invoking this + function. + + @return Not-NULL table object on success. NULL in case of an error, + which is set in client_thd. +*/ + +TABLE *Delayed_insert::get_local_table(THD* client_thd) +{ + my_ptrdiff_t adjust_ptrs; + Field **field,**org_field, *found_next_number_field; + TABLE *copy; + TABLE_SHARE *share; + uchar *bitmap; + char *copy_tmp; + uint bitmaps_used; + Field **default_fields, **virtual_fields; + uchar *record; + DBUG_ENTER("Delayed_insert::get_local_table"); + + /* First request insert thread to get a lock */ + status=1; + tables_in_use++; + if (!thd.lock) // Table is not locked + { + THD_STAGE_INFO(client_thd, stage_waiting_for_handler_lock); + mysql_cond_signal(&cond); // Tell handler to lock table + while (!thd.killed && !thd.lock && ! client_thd->killed) + { + mysql_cond_wait(&cond_client, &mutex); + } + THD_STAGE_INFO(client_thd, stage_got_handler_lock); + if (client_thd->killed) + goto error; + if (thd.killed) + { + /* + Check how the insert thread was killed. If it was killed + by FLUSH TABLES which calls kill_delayed_threads_for_table(), + then is_error is not set. + In this case, return without setting an error, + which means that the insert will be converted to a normal insert. + */ + if (thd.is_error()) + { + /* + Copy the error message. Note that we don't treat fatal + errors in the delayed thread as fatal errors in the + main thread. If delayed thread was killed, we don't + want to send "Server shutdown in progress" in the + INSERT THREAD. + + The thread could be killed with an error message if + di->handle_inserts() or di->open_and_lock_table() fails. + */ + my_message(thd.get_stmt_da()->sql_errno(), + thd.get_stmt_da()->message(), MYF(0)); + } + goto error; + } + } + share= table->s; + + /* + Allocate memory for the TABLE object, the field pointers array, + and one record buffer of reclength size. + Normally a table has three record buffers of rec_buff_length size, + which includes alignment bytes. Since the table copy is used for + creating one record only, the other record buffers and alignment + are unnecessary. + As the table will also need to calculate default values and + expresions, we have to allocate own version of fields. keys and key + parts. The key and key parts are needed as parse_vcol_defs() changes + them in case of long hash keys. + */ + THD_STAGE_INFO(client_thd, stage_allocating_local_table); + if (!multi_alloc_root(client_thd->mem_root, + ©_tmp, sizeof(*table), + &field, (uint) (share->fields+1)*sizeof(Field**), + &default_fields, + (share->default_fields + + share->default_expressions + 1) * sizeof(Field*), + &virtual_fields, + (share->virtual_fields + 1) * sizeof(Field*), + &record, (uint) share->reclength, + &bitmap, (uint) share->column_bitmap_size*4, + NullS)) + goto error; + + /* Copy the TABLE object. */ + copy= new (copy_tmp) TABLE; + *copy= *table; + + /* We don't need to change the file handler here */ + /* Assign the pointers for the field pointers array and the record. */ + copy->field= field; + copy->record[0]= record; + memcpy((char*) copy->record[0], (char*) table->record[0], share->reclength); + if (share->default_fields || share->default_expressions) + copy->default_field= default_fields; + if (share->virtual_fields) + copy->vfield= virtual_fields; + + copy->expr_arena= NULL; + + /* Ensure we don't use the table list of the original table */ + copy->pos_in_table_list= 0; + + /* We don't need statistics for insert delayed */ + copy->stats_cb= 0; + + /* + Make a copy of all fields. + The copied fields need to point into the copied record. This is done + by copying the field objects with their old pointer values and then + "move" the pointers by the distance between the original and copied + records. That way we preserve the relative positions in the records. + */ + adjust_ptrs= PTR_BYTE_DIFF(copy->record[0], table->record[0]); + found_next_number_field= table->found_next_number_field; + for (org_field= table->field; *org_field; org_field++, field++) + { + if (!(*field= (*org_field)->make_new_field(client_thd->mem_root, copy, 1))) + goto error; + (*field)->unireg_check= (*org_field)->unireg_check; + (*field)->invisible= (*org_field)->invisible; + (*field)->orig_table= copy; // Remove connection + (*field)->move_field_offset(adjust_ptrs); // Point at copy->record[0] + (*field)->flags|= ((*org_field)->flags & LONG_UNIQUE_HASH_FIELD); + (*field)->invisible= (*org_field)->invisible; + memdup_vcol(client_thd, (*field)->vcol_info); + memdup_vcol(client_thd, (*field)->default_value); + memdup_vcol(client_thd, (*field)->check_constraint); + if (*org_field == found_next_number_field) + (*field)->table->found_next_number_field= *field; + } + *field=0; + + if (copy_keys_from_share(copy, client_thd->mem_root)) + goto error; + + if (share->virtual_fields || share->default_expressions || + share->default_fields) + { + bool error_reported= FALSE; + if (unlikely(parse_vcol_defs(client_thd, client_thd->mem_root, copy, + &error_reported, + VCOL_INIT_DEPENDENCY_FAILURE_IS_WARNING))) + goto error; + } + + switch_defaults_to_nullable_trigger_fields(copy); + + /* Adjust in_use for pointing to client thread */ + copy->in_use= client_thd; + + /* Adjust lock_count. This table object is not part of a lock. */ + copy->lock_count= 0; + + /* Adjust bitmaps */ + copy->def_read_set.bitmap= (my_bitmap_map*) bitmap; + copy->def_write_set.bitmap= ((my_bitmap_map*) + (bitmap + share->column_bitmap_size)); + bitmaps_used= 2; + if (share->default_fields || share->default_expressions) + { + my_bitmap_init(©->has_value_set, + (my_bitmap_map*) (bitmap + + bitmaps_used*share->column_bitmap_size), + share->fields); + } + copy->tmp_set.bitmap= 0; // To catch errors + bzero((char*) bitmap, share->column_bitmap_size * bitmaps_used); + copy->read_set= ©->def_read_set; + copy->write_set= ©->def_write_set; + + DBUG_RETURN(copy); + + /* Got fatal error */ + error: + tables_in_use--; + mysql_cond_signal(&cond); // Inform thread about abort + DBUG_RETURN(0); +} + + +/* Put a question in queue */ + +static +int write_delayed(THD *thd, TABLE *table, enum_duplicates duplic, + LEX_STRING query, bool ignore, bool log_on) +{ + delayed_row *row= 0; + Delayed_insert *di=thd->di; + const Discrete_interval *forced_auto_inc; + size_t user_len, host_len, ip_length; + DBUG_ENTER("write_delayed"); + DBUG_PRINT("enter", ("query = '%s' length %lu", query.str, + (ulong) query.length)); + + THD_STAGE_INFO(thd, stage_waiting_for_handler_insert); + mysql_mutex_lock(&di->mutex); + while (di->stacked_inserts >= delayed_queue_size && !thd->killed) + mysql_cond_wait(&di->cond_client, &di->mutex); + THD_STAGE_INFO(thd, stage_storing_row_into_queue); + + if (thd->killed) + goto err; + + /* + Take a copy of the query string, if there is any. The string will + be free'ed when the row is destroyed. If there is no query string, + we don't do anything special. + */ + + if (query.str) + { + char *str; + if (!(str= my_strndup(PSI_INSTRUMENT_ME, query.str, query.length, + MYF(MY_WME)))) + goto err; + query.str= str; + } + row= new delayed_row(query, duplic, ignore, log_on); + if (row == NULL) + { + my_free(query.str); + goto err; + } + + user_len= host_len= ip_length= 0; + row->user= row->host= row->ip= NULL; + if (thd->security_ctx) + { + if (thd->security_ctx->user) + user_len= strlen(thd->security_ctx->user) + 1; + if (thd->security_ctx->host) + host_len= strlen(thd->security_ctx->host) + 1; + if (thd->security_ctx->ip) + ip_length= strlen(thd->security_ctx->ip) + 1; + } + /* This can't be THREAD_SPECIFIC as it's freed in delayed thread */ + if (!(row->record= (char*) my_malloc(PSI_INSTRUMENT_ME, + table->s->reclength + + user_len + host_len + ip_length, + MYF(MY_WME)))) + goto err; + memcpy(row->record, table->record[0], table->s->reclength); + + if (thd->security_ctx) + { + if (thd->security_ctx->user) + { + row->user= row->record + table->s->reclength; + memcpy(row->user, thd->security_ctx->user, user_len); + } + if (thd->security_ctx->host) + { + row->host= row->record + table->s->reclength + user_len; + memcpy(row->host, thd->security_ctx->host, host_len); + } + if (thd->security_ctx->ip) + { + row->ip= row->record + table->s->reclength + user_len + host_len; + memcpy(row->ip, thd->security_ctx->ip, ip_length); + } + } + row->query_id= thd->query_id; + row->thread_id= thd->thread_id; + + row->start_time= thd->start_time; + row->start_time_sec_part= thd->start_time_sec_part; + row->query_start_sec_part_used= thd->used & THD::QUERY_START_SEC_PART_USED; + /* + those are for the binlog: LAST_INSERT_ID() has been evaluated at this + time, so record does not need it, but statement-based binlogging of the + INSERT will need when the row is actually inserted. + As for SET INSERT_ID, DELAYED does not honour it (BUG#20830). + */ + row->stmt_depends_on_first_successful_insert_id_in_prev_stmt= + thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt; + row->first_successful_insert_id_in_prev_stmt= + thd->first_successful_insert_id_in_prev_stmt; + + /* Add session variable timezone + Time_zone object will not be freed even the thread is ended. + So we can get time_zone object from thread which handling delayed statement. + See the comment of my_tz_find() for detail. + */ + if (thd->used & THD::TIME_ZONE_USED) + { + row->time_zone = thd->variables.time_zone; + } + else + { + row->time_zone = NULL; + } + /* Copy session variables. */ + row->auto_increment_increment= thd->variables.auto_increment_increment; + row->auto_increment_offset= thd->variables.auto_increment_offset; + row->sql_mode= thd->variables.sql_mode; + row->auto_increment_field_not_null= table->auto_increment_field_not_null; + + /* Copy the next forced auto increment value, if any. */ + if ((forced_auto_inc= thd->auto_inc_intervals_forced.get_next())) + { + row->forced_insert_id= forced_auto_inc->minimum(); + DBUG_PRINT("delayed", ("transmitting auto_inc: %lu", + (ulong) row->forced_insert_id)); + } + + di->rows.push_back(row); + di->stacked_inserts++; + di->status=1; + if (table->s->blob_fields) + unlink_blobs(table); + mysql_cond_signal(&di->cond); + + thread_safe_increment(delayed_rows_in_use,&LOCK_delayed_status); + mysql_mutex_unlock(&di->mutex); + DBUG_RETURN(0); + + err: + delete row; + mysql_mutex_unlock(&di->mutex); + DBUG_RETURN(1); +} + +/** + Signal the delayed insert thread that this user connection + is finished using it for this statement. +*/ + +static void end_delayed_insert(THD *thd) +{ + DBUG_ENTER("end_delayed_insert"); + Delayed_insert *di=thd->di; + mysql_mutex_lock(&di->mutex); + DBUG_PRINT("info",("tables in use: %d",di->tables_in_use)); + if (!--di->tables_in_use || di->thd.killed) + { // Unlock table + di->status=1; + mysql_cond_signal(&di->cond); + } + mysql_mutex_unlock(&di->mutex); + DBUG_VOID_RETURN; +} + + +/* We kill all delayed threads when doing flush-tables */ + +void kill_delayed_threads(void) +{ + DBUG_ENTER("kill_delayed_threads"); + mysql_mutex_lock(&LOCK_delayed_insert); // For unlink from list + + I_List_iterator it(delayed_threads); + Delayed_insert *di; + while ((di= it++)) + { + mysql_mutex_lock(&di->thd.LOCK_thd_kill); + if (di->thd.killed < KILL_CONNECTION) + di->thd.set_killed_no_mutex(KILL_CONNECTION); + di->thd.abort_current_cond_wait(false); + mysql_mutex_unlock(&di->thd.LOCK_thd_kill); + } + mysql_mutex_unlock(&LOCK_delayed_insert); // For unlink from list + DBUG_VOID_RETURN; +} + + +/** + A strategy for the prelocking algorithm which prevents the + delayed insert thread from opening tables with engines which + do not support delayed inserts. + + Particularly it allows to abort open_tables() as soon as we + discover that we have opened a MERGE table, without acquiring + metadata locks on underlying tables. +*/ + +class Delayed_prelocking_strategy : public Prelocking_strategy +{ +public: + virtual bool handle_routine(THD *thd, Query_tables_list *prelocking_ctx, + Sroutine_hash_entry *rt, sp_head *sp, + bool *need_prelocking); + virtual bool handle_table(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); + virtual bool handle_view(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking); +}; + + +bool Delayed_prelocking_strategy::handle_table(THD *thd, + Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking) +{ + DBUG_ASSERT(table_list->lock_type == TL_WRITE_DELAYED); + + if (!(table_list->table->file->ha_table_flags() & HA_CAN_INSERT_DELAYED)) + { + my_error(ER_DELAYED_NOT_SUPPORTED, MYF(0), table_list->table_name.str); + return TRUE; + } + return FALSE; +} + + +bool Delayed_prelocking_strategy::handle_routine(THD *thd, + Query_tables_list *prelocking_ctx, Sroutine_hash_entry *rt, + sp_head *sp, bool *need_prelocking) +{ + /* LEX used by the delayed insert thread has no routines. */ + DBUG_ASSERT(0); + return FALSE; +} + + +bool Delayed_prelocking_strategy:: +handle_view(THD *thd, Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list, bool *need_prelocking) +{ + /* We don't open views in the delayed insert thread. */ + DBUG_ASSERT(0); + return FALSE; +} + + +/** + Open and lock table for use by delayed thread and check that + this table is suitable for delayed inserts. + + @retval FALSE - Success. + @retval TRUE - Failure. +*/ + +bool Delayed_insert::open_and_lock_table() +{ + Delayed_prelocking_strategy prelocking_strategy; + + /* + Use special prelocking strategy to get ER_DELAYED_NOT_SUPPORTED + error for tables with engines which don't support delayed inserts. + + We can't do auto-repair in insert delayed thread, as it would hang + when trying to an exclusive MDL_LOCK on the table during repair + as the connection thread has a SHARED_WRITE lock. + */ + if (!(table= open_n_lock_single_table(&thd, &table_list, + TL_WRITE_DELAYED, + MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK | + MYSQL_OPEN_IGNORE_REPAIR, + &prelocking_strategy))) + { + /* If table was crashed, then upper level should retry open+repair */ + retry= table_list.crashed; + thd.fatal_error(); // Abort waiting inserts + return TRUE; + } + + if (table->triggers || table->check_constraints) + { + /* + Table has triggers or check constraints. This is not an error, but we do + not support these with delayed insert. Terminate the delayed + thread without an error and thus request lock upgrade. + */ + return TRUE; + } + table->copy_blobs= 1; + + table->file->prepare_for_row_logging(); + return FALSE; +} + + +/* + * Create a new delayed insert thread +*/ + +pthread_handler_t handle_delayed_insert(void *arg) +{ + Delayed_insert *di=(Delayed_insert*) arg; + THD *thd= &di->thd; + + pthread_detach_this_thread(); + /* Add thread to THD list so that's it's visible in 'show processlist' */ + thd->set_start_time(); + server_threads.insert(thd); + if (abort_loop) + thd->set_killed(KILL_CONNECTION); + else + thd->reset_killed(); + + mysql_thread_set_psi_id(thd->thread_id); + + /* + Wait until the client runs into mysql_cond_wait(), + where we free it after the table is opened and di linked in the list. + If we did not wait here, the client might detect the opened table + before it is linked to the list. It would release LOCK_delayed_create + and allow another thread to create another handler for the same table, + since it does not find one in the list. + */ + mysql_mutex_lock(&di->mutex); + if (my_thread_init()) + { + /* Can't use my_error since store_globals has not yet been called */ + thd->get_stmt_da()->set_error_status(ER_OUT_OF_RESOURCES); + di->handler_thread_initialized= TRUE; + } + else + { + DBUG_ENTER("handle_delayed_insert"); + thd->thread_stack= (char*) &thd; + if (init_thr_lock()) + { + thd->get_stmt_da()->set_error_status(ER_OUT_OF_RESOURCES); + di->handler_thread_initialized= TRUE; + thd->fatal_error(); + goto err; + } + + thd->store_globals(); + + thd->lex->sql_command= SQLCOM_INSERT; // For innodb::store_lock() + + /* + INSERT DELAYED has to go to row-based format because the time + at which rows are inserted cannot be determined in mixed mode. + */ + thd->set_current_stmt_binlog_format_row_if_mixed(); + /* Don't annotate insert delayed binlog events */ + thd->variables.binlog_annotate_row_events= 0; + + /* + Clone tickets representing protection against GRL and the lock on + the target table for the insert and add them to the list of granted + metadata locks held by the handler thread. This is safe since the + handler thread is not holding nor waiting on any metadata locks. + */ + if (thd->mdl_context.clone_ticket(&di->grl_protection) || + thd->mdl_context.clone_ticket(&di->table_list.mdl_request)) + { + thd->release_transactional_locks(); + di->handler_thread_initialized= TRUE; + goto err; + } + + /* + Now that the ticket has been cloned, it is safe for the connection + thread to exit. + */ + di->handler_thread_initialized= TRUE; + di->table_list.mdl_request.ticket= NULL; + + thd->set_query_id(next_query_id()); + + if (di->open_and_lock_table()) + goto err; + + /* + INSERT DELAYED generally expects thd->lex->current_select to be NULL, + since this is not an attribute of the current thread. This can lead to + problems if the thread that spawned the current one disconnects. + current_select will then point to freed memory. But current_select is + required to resolve the partition function. So, after fulfilling that + requirement, we set the current_select to 0. + */ + thd->lex->current_select= NULL; + + /* Tell client that the thread is initialized */ + mysql_cond_signal(&di->cond_client); + + /* + Inform mdl that it needs to call mysql_lock_abort to abort locks + for delayed insert. + */ + thd->mdl_context.set_needs_thr_lock_abort(TRUE); + + di->table->mark_columns_needed_for_insert(); + /* Mark all columns for write as we don't know which columns we get from user */ + bitmap_set_all(di->table->write_set); + + /* Now wait until we get an insert or lock to handle */ + /* We will not abort as long as a client thread uses this thread */ + + for (;;) + { + if (thd->killed) + { + uint lock_count; + DBUG_PRINT("delayed", ("Insert delayed killed")); + /* + Remove this from delay insert list so that no one can request a + table from this + */ + mysql_mutex_unlock(&di->mutex); + mysql_mutex_lock(&LOCK_delayed_insert); + di->unlink(); + lock_count=di->lock_count(); + mysql_mutex_unlock(&LOCK_delayed_insert); + mysql_mutex_lock(&di->mutex); + if (!lock_count && !di->tables_in_use && !di->stacked_inserts && + !thd->lock) + break; // Time to die + } + + /* Shouldn't wait if killed or an insert is waiting. */ + DBUG_PRINT("delayed", + ("thd->killed: %d di->status: %d di->stacked_inserts: %d", + thd->killed, di->status, di->stacked_inserts)); + if (!thd->killed && !di->status && !di->stacked_inserts) + { + struct timespec abstime; + set_timespec(abstime, delayed_insert_timeout); + + /* Information for pthread_kill */ + mysql_mutex_unlock(&di->mutex); + mysql_mutex_lock(&di->thd.mysys_var->mutex); + di->thd.mysys_var->current_mutex= &di->mutex; + di->thd.mysys_var->current_cond= &di->cond; + mysql_mutex_unlock(&di->thd.mysys_var->mutex); + mysql_mutex_lock(&di->mutex); + THD_STAGE_INFO(&(di->thd), stage_waiting_for_insert); + + DBUG_PRINT("info",("Waiting for someone to insert rows")); + while (!thd->killed && !di->status) + { + int error; + mysql_audit_release(thd); + error= mysql_cond_timedwait(&di->cond, &di->mutex, &abstime); +#ifdef EXTRA_DEBUG + if (error && error != EINTR && error != ETIMEDOUT) + { + fprintf(stderr, "Got error %d from mysql_cond_timedwait\n", error); + DBUG_PRINT("error", ("Got error %d from mysql_cond_timedwait", + error)); + } +#endif + if (error == ETIMEDOUT || error == ETIME) + thd->set_killed(KILL_CONNECTION); + } + /* We can't lock di->mutex and mysys_var->mutex at the same time */ + mysql_mutex_unlock(&di->mutex); + mysql_mutex_lock(&di->thd.mysys_var->mutex); + di->thd.mysys_var->current_mutex= 0; + di->thd.mysys_var->current_cond= 0; + mysql_mutex_unlock(&di->thd.mysys_var->mutex); + mysql_mutex_lock(&di->mutex); + } + + /* + The code depends on that the following ASSERT always hold. + I don't want to accidently introduce and bugs in the following code + in this commit, so I leave the small cleaning up of the code to + a future commit + */ + DBUG_ASSERT(thd->lock || di->stacked_inserts == 0); + + DBUG_PRINT("delayed", + ("thd->killed: %d di->status: %d di->stacked_insert: %d di->tables_in_use: %d thd->lock: %d", + thd->killed, di->status, di->stacked_inserts, di->tables_in_use, thd->lock != 0)); + + /* + This is used to test see what happens if killed is sent before + we have time to handle the insert requests. + */ + DBUG_EXECUTE_IF("write_delay_wakeup", + if (!thd->killed && di->stacked_inserts) + my_sleep(500000); + ); + + if (di->tables_in_use && ! thd->lock && + (!thd->killed || di->stacked_inserts)) + { + thd->set_query_id(next_query_id()); + /* + Request for new delayed insert. + Lock the table, but avoid to be blocked by a global read lock. + If we got here while a global read lock exists, then one or more + inserts started before the lock was requested. These are allowed + to complete their work before the server returns control to the + client which requested the global read lock. The delayed insert + handler will close the table and finish when the outstanding + inserts are done. + */ + if (! (thd->lock= mysql_lock_tables(thd, &di->table, 1, 0))) + { + /* Fatal error */ + thd->set_killed(KILL_CONNECTION); + } + mysql_cond_broadcast(&di->cond_client); + } + if (di->stacked_inserts) + { + delayed_row *row; + I_List_iterator it(di->rows); + my_thread_id cur_thd= di->thd.thread_id; + + while ((row= it++)) + { + if (cur_thd != row->thread_id) + { + mysql_audit_external_lock_ex(&di->thd, row->thread_id, + row->user, row->host, row->ip, row->query_id, + di->table->s, F_WRLCK); + cur_thd= row->thread_id; + } + } + if (di->handle_inserts()) + { + /* Some fatal error */ + thd->set_killed(KILL_CONNECTION); + } + } + di->status=0; + if (!di->stacked_inserts && !di->tables_in_use && thd->lock) + { + /* + No one is doing a insert delayed + Unlock table so that other threads can use it + */ + MYSQL_LOCK *lock=thd->lock; + thd->lock=0; + mysql_mutex_unlock(&di->mutex); + /* + We need to release next_insert_id before unlocking. This is + enforced by handler::ha_external_lock(). + */ + di->table->file->ha_release_auto_increment(); + mysql_unlock_tables(thd, lock); + trans_commit_stmt(thd); + di->group_count=0; + mysql_audit_release(thd); + /* + Reset binlog. We can't call ha_reset() for the table as this will + reset the table maps we have calculated earlier. + */ + mysql_mutex_lock(&di->mutex); + } + + /* + Reset binlog. We can't call ha_reset() for the table as this will + reset the table maps we have calculated earlier. + */ + thd->reset_binlog_for_next_statement(); + + if (di->tables_in_use) + mysql_cond_broadcast(&di->cond_client); // If waiting clients + } + + err: + DBUG_LEAVE; + } + + { + DBUG_ENTER("handle_delayed_insert-cleanup"); + di->table=0; + mysql_mutex_unlock(&di->mutex); + + /* + Protect against mdl_locks trying to access open tables + We use KILL_CONNECTION_HARD here to ensure that + THD::notify_shared_lock() dosn't try to access open tables after + this. + */ + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->mdl_context.set_needs_thr_lock_abort(0); + mysql_mutex_unlock(&thd->LOCK_thd_data); + thd->set_killed(KILL_CONNECTION_HARD); // If error + + close_thread_tables(thd); // Free the table + thd->release_transactional_locks(); + mysql_cond_broadcast(&di->cond_client); // Safety + + mysql_mutex_lock(&LOCK_delayed_create); // Because of delayed_get_table + mysql_mutex_lock(&LOCK_delayed_insert); + /* + di should be unlinked from the thread handler list and have no active + clients + */ + delete di; + mysql_mutex_unlock(&LOCK_delayed_insert); + mysql_mutex_unlock(&LOCK_delayed_create); + + DBUG_LEAVE; + } + my_thread_end(); + pthread_exit(0); + + return 0; +} + + +/* Remove all pointers to data for blob fields so that original table doesn't try to free them */ + +static void unlink_blobs(TABLE *table) +{ + for (Field **ptr=table->field ; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + ((Field_blob *) (*ptr))->clear_temporary(); + } +} + +/* Free blobs stored in current row */ + +static void free_delayed_insert_blobs(TABLE *table) +{ + for (Field **ptr=table->field ; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + ((Field_blob *) *ptr)->free(); + } +} + + +/* set value field for blobs to point to data in record */ + +static void set_delayed_insert_blobs(TABLE *table) +{ + for (Field **ptr=table->field ; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + { + Field_blob *blob= ((Field_blob *) *ptr); + uchar *data= blob->get_ptr(); + if (data) + blob->set_value(data); // Set value.ptr() to point to data + } + } +} + + +bool Delayed_insert::handle_inserts(void) +{ + int error; + ulong max_rows; + bool using_ignore= 0, using_opt_replace= 0, using_bin_log; + delayed_row *row; + DBUG_ENTER("handle_inserts"); + + /* Allow client to insert new rows */ + mysql_mutex_unlock(&mutex); + + table->next_number_field=table->found_next_number_field; + table->use_all_columns(); + + THD_STAGE_INFO(&thd, stage_upgrading_lock); + if (thr_upgrade_write_delay_lock(*thd.lock->locks, delayed_lock, + thd.variables.lock_wait_timeout)) + { + /* + This can happen if thread is killed either by a shutdown + or if another thread is removing the current table definition + from the table cache. + */ + my_error(ER_DELAYED_CANT_CHANGE_LOCK, MYF(ME_FATAL | ME_ERROR_LOG), + table->s->table_name.str); + goto err; + } + + THD_STAGE_INFO(&thd, stage_insert); + max_rows= delayed_insert_limit; + if (thd.killed || table->s->tdc->flushed) + { + thd.set_killed(KILL_SYSTEM_THREAD); + max_rows= ULONG_MAX; // Do as much as possible + } + + if (table->file->ha_rnd_init_with_error(0)) + goto err; + /* + We have to call prepare_for_row_logging() as the second call to + handler_writes() will not have called decide_logging_format. + */ + table->file->prepare_for_row_logging(); + table->file->prepare_for_insert(1); + using_bin_log= table->file->row_logging; + + /* + We can't use row caching when using the binary log because if + we get a crash, then binary log will contain rows that are not yet + written to disk, which will cause problems in replication. + */ + if (!using_bin_log && !table->s->long_unique_table) + table->file->extra(HA_EXTRA_WRITE_CACHE); + + mysql_mutex_lock(&mutex); + + while ((row=rows.get())) + { + int tmp_error; + stacked_inserts--; + mysql_mutex_unlock(&mutex); + memcpy(table->record[0],row->record,table->s->reclength); + if (table->s->blob_fields) + set_delayed_insert_blobs(table); + + thd.start_time=row->start_time; + thd.start_time_sec_part=row->start_time_sec_part; + thd.used= row->query_start_sec_part_used; + /* + To get the exact auto_inc interval to store in the binlog we must not + use values from the previous interval (of the previous rows). + */ + bool log_query= (row->log_query && row->query.str != NULL); + DBUG_PRINT("delayed", ("query: '%s' length: %lu", row->query.str ? + row->query.str : "[NULL]", + (ulong) row->query.length)); + if (log_query) + { + /* + Guaranteed that the INSERT DELAYED STMT will not be here + in SBR when mysql binlog is enabled. + */ + DBUG_ASSERT(!mysql_bin_log.is_open() || + thd.is_current_stmt_binlog_format_row()); + + /* + This is the first value of an INSERT statement. + It is the right place to clear a forced insert_id. + This is usually done after the last value of an INSERT statement, + but we won't know this in the insert delayed thread. But before + the first value is sufficiently equivalent to after the last + value of the previous statement. + */ + table->file->ha_release_auto_increment(); + thd.auto_inc_intervals_in_cur_stmt_for_binlog.empty(); + } + thd.first_successful_insert_id_in_prev_stmt= + row->first_successful_insert_id_in_prev_stmt; + thd.stmt_depends_on_first_successful_insert_id_in_prev_stmt= + row->stmt_depends_on_first_successful_insert_id_in_prev_stmt; + table->auto_increment_field_not_null= row->auto_increment_field_not_null; + + /* Copy the session variables. */ + thd.variables.auto_increment_increment= row->auto_increment_increment; + thd.variables.auto_increment_offset= row->auto_increment_offset; + thd.variables.sql_mode= row->sql_mode; + + /* Copy a forced insert_id, if any. */ + if (row->forced_insert_id) + { + DBUG_PRINT("delayed", ("received auto_inc: %lu", + (ulong) row->forced_insert_id)); + thd.force_one_auto_inc_interval(row->forced_insert_id); + } + + info.ignore= row->ignore; + info.handle_duplicates= row->dup; + if (info.ignore || + info.handle_duplicates != DUP_ERROR) + { + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + using_ignore=1; + } + if (info.handle_duplicates == DUP_REPLACE && + (!table->triggers || + !table->triggers->has_delete_triggers())) + { + table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + using_opt_replace= 1; + } + if (info.handle_duplicates == DUP_UPDATE) + table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE); + thd.clear_error(); // reset error for binlog + + tmp_error= 0; + if (unlikely(table->vfield)) + { + /* + Virtual fields where not calculated by caller as the temporary + TABLE object used had vcol_set empty. Better to calculate them + here to make the caller faster. + */ + tmp_error= table->update_virtual_fields(table->file, + VCOL_UPDATE_FOR_WRITE); + } + + if (unlikely(tmp_error || write_record(&thd, table, &info, NULL))) + { + info.error_count++; // Ignore errors + thread_safe_increment(delayed_insert_errors,&LOCK_delayed_status); + row->log_query = 0; + } + + if (using_ignore) + { + using_ignore=0; + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + } + if (using_opt_replace) + { + using_opt_replace= 0; + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + } + + if (table->s->blob_fields) + free_delayed_insert_blobs(table); + thread_safe_decrement(delayed_rows_in_use,&LOCK_delayed_status); + thread_safe_increment(delayed_insert_writes,&LOCK_delayed_status); + mysql_mutex_lock(&mutex); + + /* + Reset the table->auto_increment_field_not_null as it is valid for + only one row. + */ + table->auto_increment_field_not_null= FALSE; + + delete row; + /* + Let READ clients do something once in a while + We should however not break in the middle of a multi-line insert + if we have binary logging enabled as we don't want other commands + on this table until all entries has been processed + */ + if (group_count++ >= max_rows && (row= rows.head()) && + (!(row->log_query & using_bin_log))) + { + group_count=0; + if (stacked_inserts || tables_in_use) // Let these wait a while + { + if (tables_in_use) + mysql_cond_broadcast(&cond_client); // If waiting clients + THD_STAGE_INFO(&thd, stage_reschedule); + mysql_mutex_unlock(&mutex); + if (unlikely((error=table->file->extra(HA_EXTRA_NO_CACHE)))) + { + /* This should never happen */ + table->file->print_error(error,MYF(0)); + sql_print_error("%s", thd.get_stmt_da()->message()); + DBUG_PRINT("error", ("HA_EXTRA_NO_CACHE failed in loop")); + goto err; + } + query_cache_invalidate3(&thd, table, 1); + if (thr_reschedule_write_lock(*thd.lock->locks, + thd.variables.lock_wait_timeout)) + { + /* This is not known to happen. */ + my_error(ER_DELAYED_CANT_CHANGE_LOCK, + MYF(ME_FATAL | ME_ERROR_LOG), + table->s->table_name.str); + goto err; + } + if (!using_bin_log && !table->s->long_unique_table) + table->file->extra(HA_EXTRA_WRITE_CACHE); + mysql_mutex_lock(&mutex); + THD_STAGE_INFO(&thd, stage_insert); + } + if (tables_in_use) + mysql_cond_broadcast(&cond_client); // If waiting clients + } + } + + table->file->ha_rnd_end(); + + if (WSREP((&thd))) + thd_proc_info(&thd, "Insert done"); + else + thd_proc_info(&thd, 0); + mysql_mutex_unlock(&mutex); + + /* + We need to flush the pending event when using row-based + replication since the flushing normally done in binlog_query() is + not done last in the statement: for delayed inserts, the insert + statement is logged *before* all rows are inserted. + + We can flush the pending event without checking the thd->lock + since the delayed insert *thread* is not inside a stored function + or trigger. + + TODO: Move the logging to last in the sequence of rows. + */ + if (table->file->row_logging && + thd.binlog_flush_pending_rows_event(TRUE, + table->file->row_logging_has_trans)) + goto err; + + if (unlikely((error=table->file->extra(HA_EXTRA_NO_CACHE)))) + { // This shouldn't happen + table->file->print_error(error,MYF(0)); + sql_print_error("%s", thd.get_stmt_da()->message()); + DBUG_PRINT("error", ("HA_EXTRA_NO_CACHE failed after loop")); + goto err; + } + query_cache_invalidate3(&thd, table, 1); + mysql_mutex_lock(&mutex); + DBUG_RETURN(0); + + err: +#ifndef DBUG_OFF + max_rows= 0; // For DBUG output +#endif + /* Remove all not used rows */ + mysql_mutex_lock(&mutex); + while ((row=rows.get())) + { + if (table->s->blob_fields) + { + memcpy(table->record[0],row->record,table->s->reclength); + set_delayed_insert_blobs(table); + free_delayed_insert_blobs(table); + } + delete row; + thread_safe_increment(delayed_insert_errors,&LOCK_delayed_status); + stacked_inserts--; +#ifndef DBUG_OFF + max_rows++; +#endif + } + DBUG_PRINT("error", ("dropped %lu rows after an error", max_rows)); + thread_safe_increment(delayed_insert_errors, &LOCK_delayed_status); + DBUG_RETURN(1); +} +#endif /* EMBEDDED_LIBRARY */ + +/*************************************************************************** + Store records in INSERT ... SELECT * +***************************************************************************/ + + +/* + make insert specific preparation and checks after opening tables + + SYNOPSIS + mysql_insert_select_prepare() + thd thread handler + + RETURN + 0 OK + > 0 Error + < 0 Ok, ignore insert +*/ + +int mysql_insert_select_prepare(THD *thd, select_result *sel_res) +{ + int res; + LEX *lex= thd->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + DBUG_ENTER("mysql_insert_select_prepare"); + + /* + SELECT_LEX do not belong to INSERT statement, so we can't add WHERE + clause if table is VIEW + */ + + if ((res= mysql_prepare_insert(thd, lex->query_tables, lex->field_list, 0, + lex->update_list, lex->value_list, + lex->duplicates, lex->ignore, + &select_lex->where, TRUE))) + DBUG_RETURN(res); + + /* + If sel_res is not empty, it means we have items in returing_list. + So we prepare the list now + */ + if (sel_res) + sel_res->prepare(lex->returning()->item_list, NULL); + + List_iterator ti(select_lex->leaf_tables); + TABLE_LIST *table; + uint insert_tables; + + if (select_lex->first_cond_optimization) + { + /* Back up leaf_tables list. */ + Query_arena *arena= thd->stmt_arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test + + insert_tables= select_lex->insert_tables; + while ((table= ti++) && insert_tables--) + { + select_lex->leaf_tables_exec.push_back(table); + table->tablenr_exec= table->table->tablenr; + table->map_exec= table->table->map; + table->maybe_null_exec= table->table->maybe_null; + } + if (arena) + thd->restore_active_arena(arena, &backup); + } + ti.rewind(); + /* + exclude first table from leaf tables list, because it belong to + INSERT + */ + /* skip all leaf tables belonged to view where we are insert */ + insert_tables= select_lex->insert_tables; + while ((table= ti++) && insert_tables--) + ti.remove(); + + DBUG_RETURN(0); +} + + +select_insert::select_insert(THD *thd_arg, TABLE_LIST *table_list_par, + TABLE *table_par, + List *fields_par, + List *update_fields, + List *update_values, + enum_duplicates duplic, + bool ignore_check_option_errors, + select_result *result): + select_result_interceptor(thd_arg), + sel_result(result), + table_list(table_list_par), table(table_par), fields(fields_par), + autoinc_value_of_last_inserted_row(0), + insert_into_view(table_list_par && table_list_par->view != 0) +{ + bzero((char*) &info,sizeof(info)); + info.handle_duplicates= duplic; + info.ignore= ignore_check_option_errors; + info.update_fields= update_fields; + info.update_values= update_values; + info.view= (table_list_par->view ? table_list_par : 0); + info.table_list= table_list_par; +} + + +int +select_insert::prepare(List &values, SELECT_LEX_UNIT *u) +{ + LEX *lex= thd->lex; + int res= 0; + table_map map= 0; + SELECT_LEX *lex_current_select_save= lex->current_select; + DBUG_ENTER("select_insert::prepare"); + + unit= u; + + /* + Since table in which we are going to insert is added to the first + select, LEX::current_select should point to the first select while + we are fixing fields from insert list. + */ + lex->current_select= lex->first_select_lex(); + + res= (setup_returning_fields(thd, table_list) || + setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_READ, 0, 0, + 0) || + check_insert_fields(thd, table_list, *fields, values, + !insert_into_view, 1, &map)); + + if (!res) + { + /* + Check that all colN=exprN pairs are compatible for assignment, e.g.: + INSERT INTO t1 (col1, col2) VALUES (expr1, expr2); + INSERT INTO t1 SET col1=expr1, col2=expr2; + */ + res= table_list->table->check_assignability_opt_fields(*fields, values, + lex->ignore); + } + + if (!res && fields->elements) + { + Abort_on_warning_instant_set aws(thd, + !info.ignore && thd->is_strict_mode()); + res= check_that_all_fields_are_given_values(thd, table_list->table, + table_list); + } + + if (info.handle_duplicates == DUP_UPDATE && !res) + { + Name_resolution_context *context= &lex->first_select_lex()->context; + Name_resolution_context_state ctx_state; + + /* Save the state of the current name resolution context. */ + ctx_state.save_state(context, table_list); + + /* Perform name resolution only in the first table - 'table_list'. */ + table_list->next_local= 0; + context->resolve_in_table_list_only(table_list); + + lex->first_select_lex()->no_wrap_view_item= TRUE; + res= res || + check_update_fields(thd, context->table_list, + *info.update_fields, *info.update_values, + /* + In INSERT SELECT ON DUPLICATE KEY UPDATE col=x + 'x' can legally refer to a non-inserted table. + 'x' is not even resolved yet. + */ + true, + &map); + lex->first_select_lex()->no_wrap_view_item= FALSE; + /* + When we are not using GROUP BY and there are no ungrouped + aggregate functions we can refer to other tables in the ON + DUPLICATE KEY part. We use next_name_resolution_table + descructively, so check it first (views?) + */ + DBUG_ASSERT (!table_list->next_name_resolution_table); + if (lex->first_select_lex()->group_list.elements == 0 && + !lex->first_select_lex()->with_sum_func) + { + /* + We must make a single context out of the two separate name + resolution contexts : the INSERT table and the tables in the + SELECT part of INSERT ... SELECT. To do that we must + concatenate the two lists + */ + table_list->next_name_resolution_table= + ctx_state.get_first_name_resolution_table(); + } + + res= res || setup_fields(thd, Ref_ptr_array(), *info.update_values, + MARK_COLUMNS_READ, 0, NULL, 0) || + /* + Check that all col=expr pairs are compatible for assignment in + INSERT INTO t1 SELECT ... FROM t2 + ON DUPLICATE KEY UPDATE col=expr [, col=expr] + */ + TABLE::check_assignability_explicit_fields(*info.update_fields, + *info.update_values, + lex->ignore); + if (!res) + { + /* + Traverse the update values list and substitute fields from the + select for references (Item_ref objects) to them. This is done in + order to get correct values from those fields when the select + employs a temporary table. + */ + List_iterator li(*info.update_values); + Item *item; + + while ((item= li++)) + { + item->transform(thd, &Item::update_value_transformer, + (uchar*)lex->current_select); + } + } + + /* Restore the current context. */ + ctx_state.restore_state(context, table_list); + } + + lex->current_select= lex_current_select_save; + if (res) + DBUG_RETURN(1); + /* + if it is INSERT into join view then check_insert_fields already found + real table for insert + */ + table= table_list->table; + + /* + Is table which we are changing used somewhere in other parts of + query + */ + if (unique_table(thd, table_list, table_list->next_global, 0)) + { + /* Using same table for INSERT and SELECT */ + lex->current_select->options|= OPTION_BUFFER_RESULT; + lex->current_select->join->select_options|= OPTION_BUFFER_RESULT; + } + else if (!(lex->current_select->options & OPTION_BUFFER_RESULT) && + thd->locked_tables_mode <= LTM_LOCK_TABLES && + !table->s->long_unique_table) + { + /* + We must not yet prepare the result table if it is the same as one of the + source tables (INSERT SELECT). The preparation may disable + indexes on the result table, which may be used during the select, if it + is the same table (Bug #6034). Do the preparation after the select phase + in select_insert::prepare2(). + We won't start bulk inserts at all if this statement uses functions or + should invoke triggers since they may access to the same table too. + */ + table->file->ha_start_bulk_insert((ha_rows) 0); + } + restore_record(table,s->default_values); // Get empty record + table->reset_default_fields(); + table->next_number_field=table->found_next_number_field; + +#ifdef HAVE_REPLICATION + if (thd->rgi_slave && + (info.handle_duplicates == DUP_UPDATE) && + (table->next_number_field != NULL) && + rpl_master_has_bug(thd->rgi_slave->rli, 24432, TRUE, NULL, NULL)) + DBUG_RETURN(1); +#endif + + thd->cuted_fields=0; + bool create_lookup_handler= info.handle_duplicates != DUP_ERROR; + if (info.ignore || info.handle_duplicates != DUP_ERROR) + { + create_lookup_handler= true; + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + if (table->file->ha_rnd_init_with_error(0)) + DBUG_RETURN(1); + } + } + table->file->prepare_for_insert(create_lookup_handler); + if (info.handle_duplicates == DUP_REPLACE && + (!table->triggers || !table->triggers->has_delete_triggers())) + table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + if (info.handle_duplicates == DUP_UPDATE) + table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE); + thd->abort_on_warning= !info.ignore && thd->is_strict_mode(); + res= (table_list->prepare_where(thd, 0, TRUE) || + table_list->prepare_check_option(thd)); + + if (!res) + { + table->prepare_triggers_for_insert_stmt_or_event(); + table->mark_columns_needed_for_insert(); + } + + DBUG_RETURN(res); +} + + +/* + Finish the preparation of the result table. + + SYNOPSIS + select_insert::prepare2() + void + + DESCRIPTION + If the result table is the same as one of the source tables + (INSERT SELECT), the result table is not finally prepared at the + join prepair phase. Do the final preparation now. + + RETURN + 0 OK +*/ + +int select_insert::prepare2(JOIN *) +{ + DBUG_ENTER("select_insert::prepare2"); + if (table->validate_default_values_of_unset_fields(thd)) + DBUG_RETURN(1); + if (thd->lex->describe) + DBUG_RETURN(0); + if (thd->lex->current_select->options & OPTION_BUFFER_RESULT && + thd->locked_tables_mode <= LTM_LOCK_TABLES && + !table->s->long_unique_table) + table->file->ha_start_bulk_insert((ha_rows) 0); + + /* Same as the other variants of INSERT */ + if (sel_result && + sel_result->send_result_set_metadata(thd->lex->returning()->item_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +void select_insert::cleanup() +{ + /* select_insert/select_create are never re-used in prepared statement */ + DBUG_ASSERT(0); +} + +select_insert::~select_insert() +{ + DBUG_ENTER("~select_insert"); + sel_result= NULL; + if (table && table->is_created()) + { + table->next_number_field=0; + table->auto_increment_field_not_null= FALSE; + table->file->ha_reset(); + } + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + thd->abort_on_warning= 0; + DBUG_VOID_RETURN; +} + + +int select_insert::send_data(List &values) +{ + DBUG_ENTER("select_insert::send_data"); + bool error=0; + + thd->count_cuted_fields= CHECK_FIELD_WARN; // Calculate cuted fields + if (store_values(values)) + DBUG_RETURN(1); + thd->count_cuted_fields= CHECK_FIELD_ERROR_FOR_NULL; + if (unlikely(thd->is_error())) + { + table->auto_increment_field_not_null= FALSE; + DBUG_RETURN(1); + } + + if (table_list) // Not CREATE ... SELECT + { + switch (table_list->view_check_option(thd, info.ignore)) { + case VIEW_CHECK_SKIP: + DBUG_RETURN(0); + case VIEW_CHECK_ERROR: + DBUG_RETURN(1); + } + } + + error= write_record(thd, table, &info, sel_result); + table->auto_increment_field_not_null= FALSE; + + if (likely(!error)) + { + if (table->triggers || info.handle_duplicates == DUP_UPDATE) + { + /* + Restore fields of the record since it is possible that they were + changed by ON DUPLICATE KEY UPDATE clause. + + If triggers exist then whey can modify some fields which were not + originally touched by INSERT ... SELECT, so we have to restore + their original values for the next row. + */ + restore_record(table, s->default_values); + } + if (table->next_number_field) + { + /* + If no value has been autogenerated so far, we need to remember the + value we just saw, we may need to send it to client in the end. + */ + if (thd->first_successful_insert_id_in_cur_stmt == 0) // optimization + autoinc_value_of_last_inserted_row= + table->next_number_field->val_int(); + /* + Clear auto-increment field for the next record, if triggers are used + we will clear it twice, but this should be cheap. + */ + table->next_number_field->reset(); + } + } + DBUG_RETURN(error); +} + + +bool select_insert::store_values(List &values) +{ + DBUG_ENTER("select_insert::store_values"); + bool error; + + table->reset_default_fields(); + if (fields->elements) + error= fill_record_n_invoke_before_triggers(thd, table, *fields, values, + true, TRG_EVENT_INSERT); + else + error= fill_record_n_invoke_before_triggers(thd, table, table->field_to_fill(), + values, true, TRG_EVENT_INSERT); + + DBUG_RETURN(error); +} + +bool select_insert::prepare_eof() +{ + int error; + bool const trans_table= table->file->has_transactions_and_rollback(); + bool changed; + bool binary_logged= 0; + killed_state killed_status= thd->killed; + + DBUG_ENTER("select_insert::prepare_eof"); + DBUG_PRINT("enter", ("trans_table: %d, table_type: '%s'", + trans_table, table->file->table_type())); + +#ifdef WITH_WSREP + error= (thd->wsrep_cs().current_error()) ? -1 : + (thd->locked_tables_mode <= LTM_LOCK_TABLES) ? +#else + error= (thd->locked_tables_mode <= LTM_LOCK_TABLES) ? +#endif /* WITH_WSREP */ + table->file->ha_end_bulk_insert() : 0; + + if (likely(!error) && unlikely(thd->is_error())) + error= thd->get_stmt_da()->sql_errno(); + + if (info.ignore || info.handle_duplicates != DUP_ERROR) + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + table->file->ha_rnd_end(); + table->file->extra(HA_EXTRA_END_ALTER_COPY); + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + + if (likely((changed= (info.copied || info.deleted || info.updated)))) + { + /* + We must invalidate the table in the query cache before binlog writing + and ha_autocommit_or_rollback. + */ + query_cache_invalidate3(thd, table, 1); + } + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + + DBUG_ASSERT(trans_table || !changed || + thd->transaction->stmt.modified_non_trans_table); + + /* + Write to binlog before commiting transaction. No statement will + be written by the binlog_query() below in RBR mode. All the + events are in the transaction cache and will be written when + ha_autocommit_or_rollback() is issued below. + */ + if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && + (likely(!error) || thd->transaction->stmt.modified_non_trans_table || + thd->log_current_statement())) + { + int errcode= 0; + int res; + if (likely(!error)) + thd->clear_error(); + else + errcode= query_error_code(thd, killed_status == NOT_KILLED); + StatementBinlog stmt_binlog(thd, !can_rollback_data() && + thd->binlog_need_stmt_format(trans_table)); + res= thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query(), thd->query_length(), + trans_table, FALSE, FALSE, errcode); + if (res > 0) + { + table->file->ha_release_auto_increment(); + DBUG_RETURN(true); + } + binary_logged= res == 0 || !table->s->tmp_table; + } + table->s->table_creation_was_logged|= binary_logged; + table->file->ha_release_auto_increment(); + + if (unlikely(error)) + { + table->file->print_error(error,MYF(0)); + DBUG_RETURN(true); + } + + DBUG_RETURN(false); +} + +bool select_insert::send_ok_packet() { + char message[160]; /* status message */ + ulonglong row_count; /* rows affected */ + ulonglong id; /* last insert-id */ + DBUG_ENTER("select_insert::send_ok_packet"); + + if (info.ignore) + my_snprintf(message, sizeof(message), ER(ER_INSERT_INFO), + (ulong) info.records, (ulong) (info.records - info.copied), + (long) thd->get_stmt_da()->current_statement_warn_count()); + else + my_snprintf(message, sizeof(message), ER(ER_INSERT_INFO), + (ulong) info.records, (ulong) (info.deleted + info.updated), + (long) thd->get_stmt_da()->current_statement_warn_count()); + + row_count= info.copied + info.deleted + + ((thd->client_capabilities & CLIENT_FOUND_ROWS) ? + info.touched : info.updated); + + id= (thd->first_successful_insert_id_in_cur_stmt > 0) ? + thd->first_successful_insert_id_in_cur_stmt : + (thd->arg_of_last_insert_id_function ? + thd->first_successful_insert_id_in_prev_stmt : + (info.copied ? autoinc_value_of_last_inserted_row : 0)); + + /* + Client expects an EOF/OK packet If LEX::has_returning and if result set + meta was sent. See explanation for other variants of INSERT. + */ + if (sel_result) + sel_result->send_eof(); + else + ::my_ok(thd, row_count, id, message); + + DBUG_RETURN(false); +} + +bool select_insert::send_eof() +{ + bool res; + DBUG_ENTER("select_insert::send_eof"); + res= (prepare_eof() || (!suppress_my_ok && send_ok_packet())); + DBUG_RETURN(res); +} + +void select_insert::abort_result_set() +{ + bool binary_logged= 0; + DBUG_ENTER("select_insert::abort_result_set"); + /* + If the creation of the table failed (due to a syntax error, for + example), no table will have been opened and therefore 'table' + will be NULL. In that case, we still need to execute the rollback + and the end of the function. + + If it fail due to inability to insert in multi-table view for example, + table will be assigned with view table structure, but that table will + not be opened really (it is dummy to check fields types & Co). + */ + if (table && table->file->is_open()) + { + bool changed, transactional_table; + /* + If we are not in prelocked mode, we end the bulk insert started + before. + */ + if (thd->locked_tables_mode <= LTM_LOCK_TABLES) + table->file->ha_end_bulk_insert(); + + if (table->file->inited) + table->file->ha_rnd_end(); + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + + /* + If at least one row has been inserted/modified and will stay in + the table (the table doesn't have transactions) we must write to + the binlog (and the error code will make the slave stop). + + For many errors (example: we got a duplicate key error while + inserting into a MyISAM table), no row will be added to the table, + so passing the error to the slave will not help since there will + be an error code mismatch (the inserts will succeed on the slave + with no error). + + If table creation failed, the number of rows modified will also be + zero, so no check for that is made. + */ + changed= (info.copied || info.deleted || info.updated); + transactional_table= table->file->has_transactions_and_rollback(); + if (thd->transaction->stmt.modified_non_trans_table || + thd->log_current_statement()) + { + if (!can_rollback_data()) + thd->transaction->all.modified_non_trans_table= TRUE; + + if(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + StatementBinlog stmt_binlog(thd, !can_rollback_data() && + thd->binlog_need_stmt_format(transactional_table)); + int errcode= query_error_code(thd, thd->killed == NOT_KILLED); + int res; + /* error of writing binary log is ignored */ + res= thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), + thd->query_length(), + transactional_table, FALSE, FALSE, errcode); + binary_logged= res == 0 || !table->s->tmp_table; + } + if (changed) + query_cache_invalidate3(thd, table, 1); + } + DBUG_ASSERT(transactional_table || !changed || + thd->transaction->stmt.modified_non_trans_table); + + table->s->table_creation_was_logged|= binary_logged; + table->file->ha_release_auto_increment(); + } + + DBUG_VOID_RETURN; +} + + +/*************************************************************************** + CREATE TABLE (SELECT) ... +***************************************************************************/ + +Field *Item::create_field_for_create_select(MEM_ROOT *root, TABLE *table) +{ + static Tmp_field_param param(false, false, false, false); + Tmp_field_src src; + return create_tmp_field_ex(root, table, &src, ¶m); +} + + +/** + Create table from lists of fields and items (or just return TABLE + object for pre-opened existing table). + + @param thd [in] Thread object + @param create_info [in] Create information (like MAX_ROWS, ENGINE or + temporary table flag) + @param create_table [in] Pointer to TABLE_LIST object providing database + and name for table to be created or to be open + @param alter_info [in/out] Initial list of columns and indexes for the + table to be created + @param items [in] List of items which should be used to produce + rest of fields for the table (corresponding + fields will be added to the end of + alter_info->create_list) + @param lock [out] Pointer to the MYSQL_LOCK object for table + created will be returned in this parameter. + Since this table is not included in THD::lock + caller is responsible for explicitly unlocking + this table. + @param hooks [in] Hooks to be invoked before and after obtaining + table lock on the table being created. + + @note + This function assumes that either table exists and was pre-opened and + locked at open_and_lock_tables() stage (and in this case we just emit + error or warning and return pre-opened TABLE object) or an exclusive + metadata lock was acquired on table so we can safely create, open and + lock table in it (we don't acquire metadata lock if this create is + for temporary table). + + @note + Since this function contains some logic specific to CREATE TABLE ... + SELECT it should be changed before it can be used in other contexts. + + @retval non-zero Pointer to TABLE object for table created or opened + @retval 0 Error +*/ + +TABLE *select_create::create_table_from_items(THD *thd, List *items, + MYSQL_LOCK **lock) +{ + TABLE tmp_table; // Used during 'Create_field()' + TABLE_SHARE share; + TABLE *table= 0; + uint select_field_count= items->elements; + /* Add selected items to field list */ + List_iterator_fast it(*items); + Item *item; + bool save_table_creation_was_logged; + DBUG_ENTER("select_create::create_table_from_items"); + + tmp_table.s= &share; + init_tmp_table_share(thd, &share, "", 0, "", ""); + + tmp_table.s->db_create_options=0; + tmp_table.null_row= 0; + tmp_table.maybe_null= 0; + tmp_table.in_use= thd; + + if (!(thd->variables.option_bits & OPTION_EXPLICIT_DEF_TIMESTAMP)) + promote_first_timestamp_column(&alter_info->create_list); + + if (create_info->fix_create_fields(thd, alter_info, *table_list)) + DBUG_RETURN(NULL); + + while ((item=it++)) + { + Field *tmp_field= item->create_field_for_create_select(thd->mem_root, + &tmp_table); + + if (!tmp_field) + DBUG_RETURN(NULL); + + Field *table_field; + + switch (item->type()) + { + /* + We have to take into account both the real table's fields and + pseudo-fields used in trigger's body. These fields are used + to copy defaults values later inside constructor of + the class Create_field. + */ + case Item::FIELD_ITEM: + case Item::TRIGGER_FIELD_ITEM: + table_field= ((Item_field *) item)->field; + break; + default: + table_field= NULL; + } + + Create_field *cr_field= new (thd->mem_root) + Create_field(thd, tmp_field, table_field); + + if (!cr_field) + DBUG_RETURN(NULL); + + if (item->maybe_null()) + cr_field->flags &= ~NOT_NULL_FLAG; + alter_info->create_list.push_back(cr_field, thd->mem_root); + } + + /* + Item*::type_handler() always returns pointers to + type_handler_{time2|datetime2|timestamp2} no matter what + the current mysql56_temporal_format says. + Let's convert them according to mysql56_temporal_format. + QQ: This perhaps should eventually be fixed to have Item*::type_handler() + respect mysql56_temporal_format, and remove the upgrade from here. + */ + Create_field::upgrade_data_types(alter_info->create_list); + + if (create_info->check_fields(thd, alter_info, + table_list->table_name, + table_list->db, + select_field_count)) + DBUG_RETURN(NULL); + + DEBUG_SYNC(thd,"create_table_select_before_create"); + + /* Check if LOCK TABLES + CREATE OR REPLACE of existing normal table*/ + if (thd->locked_tables_mode && table_list->table && + !create_info->tmp_table()) + { + /* Remember information about the locked table */ + create_info->pos_in_locked_tables= + table_list->table->pos_in_locked_tables; + create_info->mdl_ticket= table_list->table->mdl_ticket; + } + + /* + Create and lock table. + + Note that we either creating (or opening existing) temporary table or + creating base table on which name we have exclusive lock. So code below + should not cause deadlocks or races. + + We don't log the statement, it will be logged later. + + If this is a HEAP table, the automatic DELETE FROM which is written to the + binlog when a HEAP table is opened for the first time since startup, must + not be written: 1) it would be wrong (imagine we're in CREATE SELECT: we + don't want to delete from it) 2) it would be written before the CREATE + TABLE, which is a wrong order. So we keep binary logging disabled when we + open_table(). + */ + + if (!mysql_create_table_no_lock(thd, &ddl_log_state_create, &ddl_log_state_rm, + create_info, alter_info, NULL, + select_field_count, table_list)) + { + DEBUG_SYNC(thd,"create_table_select_before_open"); + + /* + If we had a temporary table or a table used with LOCK TABLES, + it was closed by mysql_create() + */ + table_list->table= 0; + + if (!create_info->tmp_table()) + { + Open_table_context ot_ctx(thd, MYSQL_OPEN_REOPEN); + TABLE_LIST::enum_open_strategy save_open_strategy; + + /* Force the newly created table to be opened */ + save_open_strategy= table_list->open_strategy; + table_list->open_strategy= TABLE_LIST::OPEN_NORMAL; + /* + Here we open the destination table, on which we already have + an exclusive metadata lock. + */ + if (open_table(thd, table_list, &ot_ctx)) + { + quick_rm_table(thd, create_info->db_type, &table_list->db, + table_case_name(create_info, &table_list->table_name), + 0); + } + /* Restore */ + table_list->open_strategy= save_open_strategy; + } + else + { + /* + The pointer to the newly created temporary table has been stored in + table->create_info. + */ + table_list->table= create_info->table; + if (!table_list->table) + { + /* + This shouldn't happen as creation of temporary table should make + it preparable for open. Anyway we can't drop temporary table if + we are unable to find it. + */ + DBUG_ASSERT(0); + } + table_list->table->pos_in_table_list= table_list; + } + } + else + table_list->table= 0; // Create failed + + if (unlikely(!(table= table_list->table))) + { + if (likely(!thd->is_error())) // CREATE ... IF NOT EXISTS + my_ok(thd); // succeed, but did nothing + ddl_log_complete(&ddl_log_state_rm); + ddl_log_complete(&ddl_log_state_create); + DBUG_RETURN(NULL); + } + + DEBUG_SYNC(thd,"create_table_select_before_lock"); + + table->reginfo.lock_type=TL_WRITE; + + /* + Ensure that decide_logging_format(), called by mysql_lock_tables(), works + with temporary tables that will be logged later if needed. + */ + save_table_creation_was_logged= table->s->table_creation_was_logged; + table->s->table_creation_was_logged= 1; + + /* + mysql_lock_tables() below should never fail with request to reopen table + since it won't wait for the table lock (we have exclusive metadata lock on + the table) and thus can't get aborted. + */ + if (unlikely(!((*lock)= mysql_lock_tables(thd, &table, 1, 0)) || + postlock(thd, &table))) + { + /* purecov: begin tested */ + /* + This can happen in innodb when you get a deadlock when using same table + in insert and select or when you run out of memory. + It can also happen if there was a conflict in + THD::decide_logging_format() + */ + if (!thd->is_error()) + my_error(ER_CANT_LOCK, MYF(0), my_errno); + if (*lock) + { + mysql_unlock_tables(thd, *lock); + *lock= 0; + } + drop_open_table(thd, table, &table_list->db, &table_list->table_name); + ddl_log_complete(&ddl_log_state_rm); + ddl_log_complete(&ddl_log_state_create); + DBUG_RETURN(NULL); + /* purecov: end */ + } + table->s->table_creation_was_logged= save_table_creation_was_logged; + if (!table->s->tmp_table) + table->file->prepare_for_row_logging(); + + /* + If slave is converting a statement event to row events, log the original + create statement as an annotated row + */ +#ifdef HAVE_REPLICATION + if (thd->slave_thread && opt_replicate_annotate_row_events && + thd->is_current_stmt_binlog_format_row()) + thd->variables.binlog_annotate_row_events= 1; +#endif + DBUG_RETURN(table); +} + + +/* + For row-based replication, the CREATE-SELECT statement is written + in two pieces: the first one contain the CREATE TABLE statement + necessary to create the table and the second part contain the rows + that should go into the table. + + For non-temporary tables, the start of the CREATE-SELECT + implicitly commits the previous transaction, and all events + forming the statement will be stored the transaction cache. At end + of the statement, the entire statement is committed as a + transaction, and all events are written to the binary log. + + On the master, the table is locked for the duration of the + statement, but since the CREATE part is replicated as a simple + statement, there is no way to lock the table for accesses on the + slave. Hence, we have to hold on to the CREATE part of the + statement until the statement has finished. +*/ +int select_create::postlock(THD *thd, TABLE **tables) +{ + /* + NOTE: for row format CREATE TABLE must be logged before row data. + */ + int error; + TABLE_LIST *save_next_global= table_list->next_global; + table_list->next_global= select_tables; + error= thd->decide_logging_format(table_list); + table_list->next_global= save_next_global; + + if (unlikely(error)) + return error; + + TABLE const *const table = *tables; + if (thd->is_current_stmt_binlog_format_row() && + !table->s->tmp_table) + return binlog_show_create_table_(thd, *tables, create_info); + return 0; +} + + +int +select_create::prepare(List &_values, SELECT_LEX_UNIT *u) +{ + List values(_values, thd->mem_root); + MYSQL_LOCK *extra_lock= NULL; + DBUG_ENTER("select_create::prepare"); + + unit= u; + + /* + Start a statement transaction before the create if we are using + row-based replication for the statement. If we are creating a + temporary table, we need to start a statement transaction. + */ + if (!thd->lex->tmp_table() && + thd->is_current_stmt_binlog_format_row() && + mysql_bin_log.is_open()) + { + thd->binlog_start_trans_and_stmt(); + } + + if (!(table= create_table_from_items(thd, &values, &extra_lock))) + { + if (create_info->or_replace()) + { + /* Original table was deleted. We have to log it */ + log_drop_table(thd, &table_list->db, &table_list->table_name, + &create_info->org_storage_engine_name, + create_info->db_type == partition_hton, + &create_info->org_tabledef_version, + thd->lex->tmp_table()); + } + + /* abort() deletes table */ + DBUG_RETURN(-1); + } + + if (create_info->tmp_table()) + { + /* + When the temporary table was created & opened in create_table_impl(), + the table's TABLE_SHARE (and thus TABLE) object was also linked to THD + temporary tables lists. So, we must temporarily remove it from the + list to keep them inaccessible from inner statements. + e.g. CREATE TEMPORARY TABLE `t1` AS SELECT * FROM `t1`; + */ + saved_tmp_table_share= thd->save_tmp_table_share(table_list->table); + } + + if (extra_lock) + { + DBUG_ASSERT(m_plock == NULL); + + if (create_info->tmp_table()) + m_plock= &m_lock; + else + m_plock= &thd->extra_lock; + + *m_plock= extra_lock; + } + + if (table->s->fields < values.elements) + { + my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), 1L); + DBUG_RETURN(-1); + } + + /* First field to copy */ + field= table->field+table->s->fields; + + /* Mark all fields that are given values */ + for (uint n= values.elements; n; ) + { + if ((*--field)->invisible >= INVISIBLE_SYSTEM) + continue; + n--; + bitmap_set_bit(table->write_set, (*field)->field_index); + } + + table->next_number_field=table->found_next_number_field; + + restore_record(table,s->default_values); // Get empty record + thd->cuted_fields=0; + bool create_lookup_handler= info.handle_duplicates != DUP_ERROR; + if (info.ignore || info.handle_duplicates != DUP_ERROR) + { + create_lookup_handler= true; + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + if (table->file->ha_table_flags() & HA_DUPLICATE_POS) + { + if (table->file->ha_rnd_init_with_error(0)) + DBUG_RETURN(1); + } + } + table->file->prepare_for_insert(create_lookup_handler); + if (info.handle_duplicates == DUP_REPLACE && + (!table->triggers || !table->triggers->has_delete_triggers())) + table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + if (info.handle_duplicates == DUP_UPDATE) + table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE); + if (thd->locked_tables_mode <= LTM_LOCK_TABLES && + !table->s->long_unique_table) + { + table->file->ha_start_bulk_insert((ha_rows) 0); + if (thd->lex->duplicates == DUP_ERROR && !thd->lex->ignore) + table->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + table->file->extra(HA_EXTRA_WRITE_CACHE); + } + thd->abort_on_warning= !info.ignore && thd->is_strict_mode(); + if (check_that_all_fields_are_given_values(thd, table, table_list)) + DBUG_RETURN(1); + table->mark_columns_needed_for_insert(); + // Mark table as used + table->query_id= thd->query_id; + DBUG_RETURN(0); +} + + +static int binlog_show_create_table_(THD *thd, TABLE *table, + Table_specification_st *create_info) +{ + /* + Note 1: In RBR mode, we generate a CREATE TABLE statement for the + created table by calling show_create_table(). In the event of an error, + nothing should be written to the binary log, even if the table is + non-transactional; therefore we pretend that the generated CREATE TABLE + statement is for a transactional table. The event will then be put in the + transaction cache, and any subsequent events (e.g., table-map events and + binrow events) will also be put there. We can then use + ha_autocommit_or_rollback() to either throw away the entire kaboodle of + events, or write them to the binary log. + + We write the CREATE TABLE statement here and not in prepare() + since there potentially are sub-selects or accesses to information + schema that will do a close_thread_tables(), destroying the + statement transaction cache. + */ + DBUG_ASSERT(thd->is_current_stmt_binlog_format_row()); + StringBuffer<2048> query(system_charset_info); + int result; + TABLE_LIST tmp_table_list; + + tmp_table_list.reset(); + tmp_table_list.table = table; + + result= show_create_table(thd, &tmp_table_list, &query, + create_info, WITH_DB_NAME); + DBUG_ASSERT(result == 0); /* show_create_table() always return 0 */ + + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + int errcode= query_error_code(thd, thd->killed == NOT_KILLED); + result= thd->binlog_query(THD::STMT_QUERY_TYPE, + query.ptr(), query.length(), + /* is_trans */ TRUE, + /* direct */ FALSE, + /* suppress_use */ FALSE, + errcode) > 0; + } +#ifdef WITH_WSREP + if (thd->wsrep_trx().active()) + { + WSREP_DEBUG("transaction already started for CTAS"); + } + else + { + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } +#endif + return result; +} + + +/** + Log CREATE TABLE to binary log + + @param thd Thread handler + @param table Log create statement for this table + + This function is called from ALTER TABLE for a shared table converted + to a not shared table. +*/ + +bool binlog_create_table(THD *thd, TABLE *table, bool replace) +{ + Table_specification_st create_info; + bool result; + ulonglong save_option_bits; + + /* Don't log temporary tables in row format */ + if (thd->variables.binlog_format == BINLOG_FORMAT_ROW && + table->s->tmp_table) + return 0; + if (!thd->binlog_table_should_be_logged(&table->s->db)) + return 0; + + /* + We have to use ROW format to ensure that future row inserts will be + logged + */ + thd->set_current_stmt_binlog_format_row(); + table->file->prepare_for_row_logging(); + + create_info.lex_start(); + save_option_bits= thd->variables.option_bits; + if (replace) + create_info.set(DDL_options_st::OPT_OR_REPLACE); + /* Ensure we write ENGINE=xxx and CHARSET=... to binary log */ + create_info.used_fields|= (HA_CREATE_USED_ENGINE | + HA_CREATE_USED_DEFAULT_CHARSET); + /* Ensure we write all engine options to binary log */ + create_info.used_fields|= HA_CREATE_PRINT_ALL_OPTIONS; + result= binlog_show_create_table_(thd, table, &create_info) != 0; + thd->variables.option_bits= save_option_bits; + return result; +} + + +/** + Log DROP TABLE to binary log + + @param thd Thread handler + @param table Log create statement for this table + + This function is called from ALTER TABLE for a shared table converted + to a not shared table. +*/ + +bool binlog_drop_table(THD *thd, TABLE *table) +{ + StringBuffer<2048> query(system_charset_info); + /* Don't log temporary tables in row format */ + if (!table->s->table_creation_was_logged) + return 0; + if (!thd->binlog_table_should_be_logged(&table->s->db)) + return 0; + + query.append(STRING_WITH_LEN("DROP ")); + if (table->s->tmp_table) + query.append(STRING_WITH_LEN("TEMPORARY ")); + query.append(STRING_WITH_LEN("TABLE IF EXISTS ")); + append_identifier(thd, &query, &table->s->db); + query.append('.'); + append_identifier(thd, &query, &table->s->table_name); + + return thd->binlog_query(THD::STMT_QUERY_TYPE, + query.ptr(), query.length(), + /* is_trans */ TRUE, + /* direct */ FALSE, + /* suppress_use */ TRUE, + 0) > 0; +} + + +bool select_create::store_values(List &values) +{ + return fill_record_n_invoke_before_triggers(thd, table, field, values, + true, TRG_EVENT_INSERT); +} + + +bool select_create::send_eof() +{ + DBUG_ENTER("select_create::send_eof"); + + /* + The routine that writes the statement in the binary log + is in select_insert::prepare_eof(). For that reason, we + mark the flag at this point. + */ + if (table->s->tmp_table) + thd->transaction->stmt.mark_created_temp_table(); + + if (thd->slave_thread) + thd->variables.binlog_annotate_row_events= 0; + + debug_crash_here("ddl_log_create_before_binlog"); + + /* + In case of crash, we have to add DROP TABLE to the binary log as + the CREATE TABLE will already be logged if we are not using row based + replication. + */ + if (!thd->is_current_stmt_binlog_format_row()) + { + if (ddl_log_state_create.is_active()) // Not temporary table + ddl_log_update_phase(&ddl_log_state_create, DDL_CREATE_TABLE_PHASE_LOG); + /* + We can ignore if we replaced an old table as ddl_log_state_create will + now handle the logging of the drop if needed. + */ + ddl_log_complete(&ddl_log_state_rm); + } + + if (prepare_eof()) + { + abort_result_set(); + DBUG_RETURN(true); + } + debug_crash_here("ddl_log_create_after_prepare_eof"); + + if (table->s->tmp_table) + { + /* + Now is good time to add the new table to THD temporary tables list. + But, before that we need to check if same table got created by the sub- + statement. + */ + if (thd->find_tmp_table_share(table->s->table_cache_key.str, + table->s->table_cache_key.length)) + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), table->alias.c_ptr()); + abort_result_set(); + DBUG_RETURN(true); + } + else + { + DBUG_ASSERT(saved_tmp_table_share); + thd->restore_tmp_table_share(saved_tmp_table_share); + } + } + + /* + Do an implicit commit at end of statement for non-temporary + tables. This can fail, but we should unlock the table + nevertheless. + */ + if (!table->s->tmp_table) + { +#ifdef WITH_WSREP + if (WSREP(thd) && + table->file->ht->db_type == DB_TYPE_INNODB) + { + if (thd->wsrep_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + DBUG_ASSERT(thd->wsrep_trx_id() != WSREP_UNDEFINED_TRX_ID); + WSREP_DEBUG("CTAS key append for trx: %" PRIu64 " thd %llu query %lld ", + thd->wsrep_trx_id(), thd->thread_id, thd->query_id); + + /* + append table level exclusive key for CTAS + */ + wsrep_key_arr_t key_arr= {0, 0}; + wsrep_prepare_keys_for_isolation(thd, + table_list->db.str, + table_list->table_name.str, + table_list, + &key_arr); + int rcode= wsrep_thd_append_key(thd, key_arr.keys, key_arr.keys_len, + WSREP_SERVICE_KEY_EXCLUSIVE); + wsrep_keys_free(&key_arr); + if (rcode) + { + DBUG_PRINT("wsrep", ("row key failed: %d", rcode)); + WSREP_ERROR("Appending table key for CTAS failed: %s, %d", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void", rcode); + abort_result_set(); + DBUG_RETURN(true); + } + /* If commit fails, we should be able to reset the OK status. */ + thd->get_stmt_da()->set_overwrite_status(true); + } +#endif /* WITH_WSREP */ + thd->binlog_xid= thd->query_id; + /* Remember xid's for the case of row based logging */ + ddl_log_update_xid(&ddl_log_state_create, thd->binlog_xid); + ddl_log_update_xid(&ddl_log_state_rm, thd->binlog_xid); + trans_commit_stmt(thd); + if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) + trans_commit_implicit(thd); + thd->binlog_xid= 0; + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + thd->get_stmt_da()->set_overwrite_status(FALSE); + mysql_mutex_lock(&thd->LOCK_thd_data); + if (wsrep_current_error(thd)) + { + WSREP_DEBUG("select_create commit failed, thd: %llu err: %s %s", + thd->thread_id, + wsrep_thd_transaction_state_str(thd), + wsrep_thd_query(thd)); + mysql_mutex_unlock(&thd->LOCK_thd_data); + abort_result_set(); + DBUG_RETURN(true); + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + } +#endif /* WITH_WSREP */ + + /* Log query to ddl log */ + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("CREATE") }; + if ((ddl_log.org_partitioned= (create_info->db_type == partition_hton))) + ddl_log.org_storage_engine_name= create_info->new_storage_engine_name; + else + lex_string_set(&ddl_log.org_storage_engine_name, + ha_resolve_storage_engine_name(create_info->db_type)); + ddl_log.org_database= table_list->db; + ddl_log.org_table= table_list->table_name; + ddl_log.org_table_id= create_info->tabledef_version; + backup_log_ddl(&ddl_log); + } + /* + If are using statement based replication the table will be deleted here + in case of a crash as we can't use xid to check if the query was logged + (as the query was logged before commit!) + */ + debug_crash_here("ddl_log_create_after_binlog"); + ddl_log_complete(&ddl_log_state_rm); + ddl_log_complete(&ddl_log_state_create); + debug_crash_here("ddl_log_create_log_complete"); + + /* + exit_done must only be set after last potential call to + abort_result_set(). + */ + exit_done= 1; // Avoid double calls + + send_ok_packet(); + + if (m_plock) + { + MYSQL_LOCK *lock= *m_plock; + *m_plock= NULL; + m_plock= NULL; + + if (create_info->pos_in_locked_tables) + { + /* + If we are under lock tables, we have created a table that was + originally locked. We should add back the lock to ensure that + all tables in the thd->open_list are locked! + */ + table->mdl_ticket= create_info->mdl_ticket; + + /* The following should never fail, except if out of memory */ + if (!thd->locked_tables_list.restore_lock(thd, + create_info-> + pos_in_locked_tables, + table, lock)) + DBUG_RETURN(false); // ok + /* Fail. Continue without locking the table */ + } + mysql_unlock_tables(thd, lock); + } + DBUG_RETURN(false); +} + + +void select_create::abort_result_set() +{ + ulonglong save_option_bits; + DBUG_ENTER("select_create::abort_result_set"); + + /* Avoid double calls, could happen in case of out of memory on cleanup */ + if (exit_done) + DBUG_VOID_RETURN; + exit_done= 1; + + /* + In select_insert::abort_result_set() we roll back the statement, including + truncating the transaction cache of the binary log. To do this, we + pretend that the statement is transactional, even though it might + be the case that it was not. + + We roll back the statement prior to deleting the table and prior + to releasing the lock on the table, since there might be potential + for failure if the rollback is executed after the drop or after + unlocking the table. + + We also roll back the statement regardless of whether the creation + of the table succeeded or not, since we need to reset the binary + log state. + + However if there was an original table that was deleted, as part of + create or replace table, then we must log the statement. + */ + + save_option_bits= thd->variables.option_bits; + thd->variables.option_bits&= ~OPTION_BIN_LOG; + select_insert::abort_result_set(); + thd->transaction->stmt.modified_non_trans_table= FALSE; + thd->variables.option_bits= save_option_bits; + + /* possible error of writing binary log is ignored deliberately */ + (void) thd->binlog_flush_pending_rows_event(TRUE, TRUE); + + if (table) + { + bool tmp_table= table->s->tmp_table; + bool table_creation_was_logged= (!tmp_table || + table->s->table_creation_was_logged); + if (tmp_table) + { + DBUG_ASSERT(saved_tmp_table_share); + thd->restore_tmp_table_share(saved_tmp_table_share); + } + + if (table->file->inited && + (info.ignore || info.handle_duplicates != DUP_ERROR) && + (table->file->ha_table_flags() & HA_DUPLICATE_POS)) + table->file->ha_rnd_end(); + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + table->auto_increment_field_not_null= FALSE; + + if (m_plock) + { + mysql_unlock_tables(thd, *m_plock); + *m_plock= NULL; + m_plock= NULL; + } + + drop_open_table(thd, table, &table_list->db, &table_list->table_name); + table=0; // Safety + if (thd->log_current_statement()) + { + if (mysql_bin_log.is_open()) + { + /* Remove logging of drop, create + insert rows */ + binlog_reset_cache(thd); + /* Original table was deleted. We have to log it */ + if (table_creation_was_logged) + { + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state_create, thd->binlog_xid); + ddl_log_update_xid(&ddl_log_state_rm, thd->binlog_xid); + debug_crash_here("ddl_log_create_before_binlog"); + log_drop_table(thd, &table_list->db, &table_list->table_name, + &create_info->org_storage_engine_name, + create_info->db_type == partition_hton, + &create_info->tabledef_version, + tmp_table); + debug_crash_here("ddl_log_create_after_binlog"); + thd->binlog_xid= 0; + } + } + else if (!tmp_table) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("DROP_AFTER_CREATE") }; + ddl_log.org_partitioned= (create_info->db_type == partition_hton); + ddl_log.org_storage_engine_name= create_info->org_storage_engine_name; + ddl_log.org_database= table_list->db; + ddl_log.org_table= table_list->table_name; + ddl_log.org_table_id= create_info->tabledef_version; + backup_log_ddl(&ddl_log); + } + } + } + + ddl_log_complete(&ddl_log_state_rm); + ddl_log_complete(&ddl_log_state_create); + + if (create_info->table_was_deleted) + { + /* Unlock locked table that was dropped by CREATE. */ + (void) trans_rollback_stmt(thd); + thd->locked_tables_list.unlock_locked_table(thd, create_info->mdl_ticket); + } + + DBUG_VOID_RETURN; +} diff --git a/sql/sql_insert.h b/sql/sql_insert.h new file mode 100644 index 00000000..8b034c25 --- /dev/null +++ b/sql/sql_insert.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_INSERT_INCLUDED +#define SQL_INSERT_INCLUDED + +#include "sql_class.h" /* enum_duplicates */ +#include "sql_list.h" + +/* Instead of including sql_lex.h we add this typedef here */ +typedef List List_item; +typedef struct st_copy_info COPY_INFO; + +int mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, + List &fields, List_item *values, + List &update_fields, + List &update_values, enum_duplicates duplic, + bool ignore, + COND **where, bool select_insert); +bool mysql_insert(THD *thd,TABLE_LIST *table,List &fields, + List &values, List &update_fields, + List &update_values, enum_duplicates flag, + bool ignore, select_result* result); +void upgrade_lock_type_for_insert(THD *thd, thr_lock_type *lock_type, + enum_duplicates duplic, + bool is_multi_insert); +int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, + TABLE_LIST *table_list); +int vers_insert_history_row(TABLE *table); +int check_duplic_insert_without_overlaps(THD *thd, TABLE *table, + enum_duplicates duplic); +int write_record(THD *thd, TABLE *table, COPY_INFO *info, + select_result *returning= NULL); +void kill_delayed_threads(void); +bool binlog_create_table(THD *thd, TABLE *table, bool replace); +bool binlog_drop_table(THD *thd, TABLE *table); + +#ifdef EMBEDDED_LIBRARY +inline void kill_delayed_threads(void) {} +#endif + +#endif /* SQL_INSERT_INCLUDED */ diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc new file mode 100644 index 00000000..f8ac6516 --- /dev/null +++ b/sql/sql_join_cache.cc @@ -0,0 +1,4834 @@ +/* Copyright (C) 2000-2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + join cache optimizations + + @defgroup Query_Optimizer Query Optimizer + @{ +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "key.h" +#include "sql_base.h" +#include "sql_select.h" +#include "opt_subselect.h" + +#define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) + +static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save); + +/***************************************************************************** + * Join cache module +******************************************************************************/ + +/* + Fill in the descriptor of a flag field associated with a join cache + + SYNOPSIS + add_field_flag_to_join_cache() + str position in a record buffer to copy the field from/to + length length of the field + field IN/OUT pointer to the field descriptor to fill in + + DESCRIPTION + The function fill in the descriptor of a cache flag field to which + the parameter 'field' points to. The function uses the first two + parameters to set the position in the record buffer from/to which + the field value is to be copied and the length of the copied fragment. + Before returning the result the function increments the value of + *field by 1. + The function ignores the fields 'blob_length' and 'ofset' of the + descriptor. + + RETURN VALUE + the length of the field +*/ + +static +uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field) +{ + CACHE_FIELD *copy= *field; + copy->str= str; + copy->length= length; + copy->type= 0; + copy->field= 0; + copy->referenced_field_no= 0; + (*field)++; + return length; +} + + +/* + Fill in the descriptors of table data fields associated with a join cache + + SYNOPSIS + add_table_data_fields_to_join_cache() + tab descriptors of fields from this table are to be filled + field_set descriptors for only these fields are to be created + field_cnt IN/OUT counter of data fields + descr IN/OUT pointer to the first descriptor to be filled + field_ptr_cnt IN/OUT counter of pointers to the data fields + descr_ptr IN/OUT pointer to the first pointer to blob descriptors + + DESCRIPTION + The function fills in the descriptors of cache data fields from the table + 'tab'. The descriptors are filled only for the fields marked in the + bitmap 'field_set'. + The function fills the descriptors starting from the position pointed + by 'descr'. If an added field is of a BLOB type then a pointer to the + its descriptor is added to the array descr_ptr. + At the return 'descr' points to the position after the last added + descriptor while 'descr_ptr' points to the position right after the + last added pointer. + + RETURN VALUE + the total length of the added fields +*/ + +static +uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, + MY_BITMAP *field_set, + uint *field_cnt, + CACHE_FIELD **descr, + uint *field_ptr_cnt, + CACHE_FIELD ***descr_ptr) +{ + Field **fld_ptr; + uint len= 0; + CACHE_FIELD *copy= *descr; + CACHE_FIELD **copy_ptr= *descr_ptr; + uint used_fields= bitmap_bits_set(field_set); + for (fld_ptr= tab->table->field; used_fields; fld_ptr++) + { + if (bitmap_is_set(field_set, (*fld_ptr)->field_index)) + { + len+= (*fld_ptr)->fill_cache_field(copy); + if (copy->type == CACHE_BLOB) + { + *copy_ptr= copy; + copy_ptr++; + (*field_ptr_cnt)++; + } + copy->field= *fld_ptr; + copy->referenced_field_no= 0; + copy++; + (*field_cnt)++; + used_fields--; + } + } + *descr= copy; + *descr_ptr= copy_ptr; + return len; +} + +/* + Determine different counters of fields associated with a record in the cache + + SYNOPSIS + calc_record_fields() + + DESCRIPTION + The function counts the number of total fields stored in a record + of the cache and saves this number in the 'fields' member. It also + determines the number of flag fields and the number of blobs. + The function sets 'with_match_flag' on if 'join_tab' needs a match flag + i.e. if it is the first inner table of an outer join or a semi-join. + + RETURN VALUE + none +*/ + +void JOIN_CACHE::calc_record_fields() +{ + JOIN_TAB *tab; + + if (prev_cache) + tab= prev_cache->join_tab; + else + { + if (join_tab->bush_root_tab) + { + /* + --ot1--SJM1--------------ot2--... + | + | + +-it1--...--itN + ^____________ this->join_tab is somewhere here, + inside an sjm nest. + + The join buffer should store the values of it1.*, it2.*, .. + It should not store values of ot1.*. + */ + tab= join_tab->bush_root_tab->bush_children->start; + } + else + { + /* + -ot1--ot2--SJM1--SJM2--------------ot3--...--otN + | | ^ + | +-it21--...--it2N | + | \-- we're somewhere here, + +-it11--...--it1N at the top level + + The join buffer should store the values of + + ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ... + + that is, we should start from the first non-const top-level table. + + We will need to store columns of SJ-inner tables (it_X_Y.*), but we're + not interested in storing the columns of materialization tables + themselves. Beause of that, if the first non-const top-level table is a + materialized table, we move to its bush_children: + */ + tab= join->join_tab + join->const_tables; + if (tab->bush_children) + tab= tab->bush_children->start; + } + } + DBUG_ASSERT(!tab->bush_children); + + start_tab= tab; + fields= 0; + blobs= 0; + flag_fields= 0; + data_field_count= 0; + data_field_ptr_count= 0; + referenced_fields= 0; + + /* + The following loop will get inside SJM nests, because data may be unpacked + to sjm-inner tables. + */ + for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + tab->calc_used_field_length(FALSE); + flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields); + flag_fields+= MY_TEST(tab->table->maybe_null); + fields+= tab->used_fields; + blobs+= tab->used_blobs; + } + if ((with_match_flag= join_tab->use_match_flag())) + flag_fields++; + fields+= flag_fields; +} + + +/* + Collect information on join key arguments + + SYNOPSIS + collect_info_on_key_args() + + DESCRIPTION + The function traverses the ref expressions that are used to access the + joined table join_tab. For each table 'tab' whose fields are to be stored + in the join buffer of the cache the function finds the fields from 'tab' + that occur in the ref expressions and marks these fields in the bitmap + tab->table->tmp_set. The function counts the number of them stored + in this cache and the total number of them stored in the previous caches + and saves the results of the counting in 'local_key_arg_fields' and + 'external_key_arg_fields' respectively. + + NOTES + The function does not do anything if no key is used to join the records + from join_tab. + + RETURN VALUE + none +*/ + +void JOIN_CACHE::collect_info_on_key_args() +{ + JOIN_TAB *tab; + JOIN_CACHE *cache; + local_key_arg_fields= 0; + external_key_arg_fields= 0; + + if (!is_key_access()) + return; + + TABLE_REF *ref= &join_tab->ref; + cache= this; + do + { + for (tab= cache->start_tab; tab != cache->join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + uint key_args; + bitmap_clear_all(&tab->table->tmp_set); + for (uint i= 0; i < ref->key_parts; i++) + { + Item *ref_item= ref->items[i]; + if (!(tab->table->map & ref_item->used_tables())) + continue; + ref_item->walk(&Item::add_field_to_set_processor, 1, tab->table); + } + if ((key_args= bitmap_bits_set(&tab->table->tmp_set))) + { + if (cache == this) + local_key_arg_fields+= key_args; + else + external_key_arg_fields+= key_args; + } + } + cache= cache->prev_cache; + } + while (cache); + + return; +} + + +/* + Allocate memory for descriptors and pointers to them associated with the cache + + SYNOPSIS + alloc_fields() + + DESCRIPTION + The function allocates memory for the array of fields descriptors + and the array of pointers to the field descriptors used to copy + join record data from record buffers into the join buffer and + backward. Some pointers refer to the field descriptor associated + with previous caches. They are placed at the beginning of the array + of pointers and its total number is stored in external_key_arg_fields. + The pointer of the first array is assigned to field_descr and the number + of the elements in it is precalculated by the function calc_record_fields. + The allocated arrays are adjacent. + + NOTES + The memory is allocated in join->thd->mem_root + + RETURN VALUE + pointer to the first array +*/ + +int JOIN_CACHE::alloc_fields() +{ + uint ptr_cnt= external_key_arg_fields+blobs+1; + uint fields_size= sizeof(CACHE_FIELD)*fields; + field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size + + sizeof(CACHE_FIELD*)*ptr_cnt); + blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size); + return (field_descr == NULL); +} + + +/* + Create descriptors of the record flag fields stored in the join buffer + + SYNOPSIS + create_flag_fields() + + DESCRIPTION + The function creates descriptors of the record flag fields stored + in the join buffer. These are descriptors for: + - an optional match flag field, + - table null bitmap fields, + - table null row fields. + The match flag field is created when 'join_tab' is the first inner + table of an outer join our a semi-join. A null bitmap field is + created for any table whose fields are to be stored in the join + buffer if at least one of these fields is nullable or is a BIT field + whose bits are partially stored with null bits. A null row flag + is created for any table assigned to the cache if it is an inner + table of an outer join. + The descriptor for flag fields are placed one after another at the + beginning of the array of field descriptors 'field_descr' that + contains 'fields' elements. If there is a match flag field the + descriptor for it is always first in the sequence of flag fields. + The descriptors for other flag fields can follow in an arbitrary + order. + The flag field values follow in a record stored in the join buffer + in the same order as field descriptors, with the match flag always + following first. + The function sets the value of 'flag_fields' to the total number + of the descriptors created for the flag fields. + The function sets the value of 'length' to the total length of the + flag fields. + + RETURN VALUE + none +*/ + +void JOIN_CACHE::create_flag_fields() +{ + CACHE_FIELD *copy; + JOIN_TAB *tab; + + copy= field_descr; + + length=0; + + /* If there is a match flag the first field is always used for this flag */ + if (with_match_flag) + length+= add_flag_field_to_join_cache((uchar*) &join_tab->found, + sizeof(join_tab->found), + ©); + + /* Create fields for all null bitmaps and null row flags that are needed */ + for (tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + TABLE *table= tab->table; + + /* Create a field for the null bitmap from table if needed */ + if (tab->used_null_fields || tab->used_uneven_bit_fields) + length+= add_flag_field_to_join_cache(table->null_flags, + table->s->null_bytes, + ©); + + /* Create table for the null row flag if needed */ + if (table->maybe_null) + length+= add_flag_field_to_join_cache((uchar*) &table->null_row, + sizeof(table->null_row), + ©); + } + + /* Theoretically the new value of flag_fields can be less than the old one */ + flag_fields= (uint)(copy-field_descr); +} + + +/* + Create descriptors of the fields used to build access keys to the joined table + + SYNOPSIS + create_key_arg_fields() + + DESCRIPTION + The function creates descriptors of the record fields stored in the join + buffer that are used to build access keys to the joined table. These + fields are put into the buffer ahead of other records fields stored in + the buffer. Such placement helps to optimize construction of access keys. + For each field that is used to build access keys to the joined table but + is stored in some other join cache buffer the function saves a pointer + to the the field descriptor. The array of such pointers are placed in the + the join cache structure just before the array of pointers to the + blob fields blob_ptr. + Any field stored in a join cache buffer that is used to construct keys + to access tables associated with other join caches is called a referenced + field. It receives a unique number that is saved by the function in the + member 'referenced_field_no' of the CACHE_FIELD descriptor for the field. + This number is used as index to the array of offsets to the referenced + fields that are saved and put in the join cache buffer after all record + fields. + The function also finds out whether that the keys to access join_tab + can be considered as embedded and, if so, sets the flag 'use_emb_key' in + this join cache appropriately. + + NOTES. + When a key to access the joined table 'join_tab' is constructed the array + of pointers to the field descriptors for the external fields is looked + through. For each of this pointers we find out in what previous key cache + the referenced field is stored. The value of 'referenced_field_no' + provides us with the index into the array of offsets for referenced + fields stored in the join cache. The offset read by the the index allows + us to read the field without reading all other fields of the record + stored the join cache buffer. This optimizes the construction of keys + to access 'join_tab' when some key arguments are stored in the previous + join caches. + + NOTES + The function does not do anything if no key is used to join the records + from join_tab. + + RETURN VALUE + none +*/ +void JOIN_CACHE::create_key_arg_fields() +{ + JOIN_TAB *tab; + JOIN_CACHE *cache; + + if (!is_key_access()) + return; + + /* + Save pointers to the cache fields in previous caches + that are used to build keys for this key access. + */ + cache= this; + uint ext_key_arg_cnt= external_key_arg_fields; + CACHE_FIELD *copy; + CACHE_FIELD **copy_ptr= blob_ptr; + while (ext_key_arg_cnt) + { + cache= cache->prev_cache; + for (tab= cache->start_tab; tab != cache->join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + CACHE_FIELD *copy_end; + MY_BITMAP *key_read_set= &tab->table->tmp_set; + /* key_read_set contains the bitmap of tab's fields referenced by ref */ + if (bitmap_is_clear_all(key_read_set)) + continue; + copy_end= cache->field_descr+cache->fields; + for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++) + { + /* + (1) - when we store rowids for DuplicateWeedout, they have + copy->field==NULL + */ + if (copy->field && // (1) + copy->field->table == tab->table && + bitmap_is_set(key_read_set, copy->field->field_index)) + { + *copy_ptr++= copy; + ext_key_arg_cnt--; + if (!copy->referenced_field_no) + { + /* + Register the referenced field 'copy': + - set the offset number in copy->referenced_field_no, + - adjust the value of the flag 'with_length', + - adjust the values of 'pack_length' and + of 'pack_length_with_blob_ptrs'. + */ + copy->referenced_field_no= ++cache->referenced_fields; + if (!cache->with_length) + { + cache->with_length= TRUE; + uint sz= cache->get_size_of_rec_length(); + cache->base_prefix_length+= sz; + cache->pack_length+= sz; + cache->pack_length_with_blob_ptrs+= sz; + } + cache->pack_length+= cache->get_size_of_fld_offset(); + cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset(); + } + } + } + } + } + /* After this 'blob_ptr' shall not be be changed */ + blob_ptr= copy_ptr; + + /* Now create local fields that are used to build ref for this key access */ + copy= field_descr+flag_fields; + for (tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set, + &data_field_count, ©, + &data_field_ptr_count, + ©_ptr); + } + + use_emb_key= check_emb_key_usage(); + + return; +} + + +/* + Create descriptors of all remaining data fields stored in the join buffer + + SYNOPSIS + create_remaining_fields() + + DESCRIPTION + The function creates descriptors for all remaining data fields of a + record from the join buffer. If the value returned by is_key_access() is + false the function creates fields for all read record fields that + comprise the partial join record joined with join_tab. Otherwise, + for each table tab, the set of the read fields for which the descriptors + have to be added is determined as the difference between all read fields + and and those for which the descriptors have been already created. + The latter are supposed to be marked in the bitmap tab->table->tmp_set. + The function increases the value of 'length' to the the total length of + the added fields. + + NOTES + If is_key_access() returns true the function modifies the value of + tab->table->tmp_set for a each table whose fields are stored in the cache. + The function calls the method Field::fill_cache_field to figure out + the type of the cache field and the maximal length of its representation + in the join buffer. If this is a blob field then additionally a pointer + to this field is added as an element of the array blob_ptr. For a blob + field only the size of the length of the blob data is taken into account. + It is assumed that 'data_field_count' contains the number of descriptors + for data fields that have been already created and 'data_field_ptr_count' + contains the number of the pointers to such descriptors having been + stored up to the moment. + + RETURN VALUE + none +*/ + +void JOIN_CACHE::create_remaining_fields() +{ + JOIN_TAB *tab; + bool all_read_fields= !is_key_access(); + CACHE_FIELD *copy= field_descr+flag_fields+data_field_count; + CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count; + + for (tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + MY_BITMAP *rem_field_set; + TABLE *table= tab->table; + + if (all_read_fields) + rem_field_set= table->read_set; + else + { + bitmap_invert(&table->tmp_set); + bitmap_intersect(&table->tmp_set, table->read_set); + rem_field_set= &table->tmp_set; + } + + length+= add_table_data_fields_to_join_cache(tab, rem_field_set, + &data_field_count, ©, + &data_field_ptr_count, + ©_ptr); + + /* SemiJoinDuplicateElimination: allocate space for rowid if needed */ + if (tab->keep_current_rowid) + { + copy->str= table->file->ref; + if (copy->str) + copy->length= table->file->ref_length; + else + { + /* This may happen only for materialized derived tables and views */ + copy->length= 0; + copy->str= (uchar *) table; + } + copy->type= CACHE_ROWID; + copy->field= 0; + copy->referenced_field_no= 0; + /* + Note: this may seem odd, but at this point we have + table->file->ref==NULL while table->file->ref_length is already set + to correct value. + */ + length += table->file->ref_length; + data_field_count++; + copy++; + } + } +} + + + +/* + Calculate and set all cache constants + + SYNOPSIS + set_constants() + + DESCRIPTION + The function calculates and set all precomputed constants that are used + when writing records into the join buffer and reading them from it. + It calculates the size of offsets of a record within the join buffer + and of a field within a record. It also calculates the number of bytes + used to store record lengths. + The function also calculates the maximal length of the representation + of record in the cache excluding blob_data. This value is used when + making a dicision whether more records should be added into the join + buffer or not. + + RETURN VALUE + none +*/ + +void JOIN_CACHE::set_constants() +{ + /* + Any record from a BKA cache is prepended with the record length. + We use the record length when reading the buffer and building key values + for each record. The length allows us not to read the fields that are + not needed for keys. + If a record has match flag it also may be skipped when the match flag + is on. It happens if the cache is used for a semi-join operation or + for outer join when the 'not exist' optimization can be applied. + If some of the fields are referenced from other caches then + the record length allows us to easily reach the saved offsets for + these fields since the offsets are stored at the very end of the record. + However at this moment we don't know whether we have referenced fields for + the cache or not. Later when a referenced field is registered for the cache + we adjust the value of the flag 'with_length'. + */ + with_length= is_key_access() || + join_tab->is_inner_table_of_semi_join_with_first_match() || + join_tab->is_inner_table_of_outer_join(); + /* + At this moment we don't know yet the value of 'referenced_fields', + but in any case it can't be greater than the value of 'fields'. + */ + uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) + + (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + + sizeof(ulong); + /* + The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs, + base_prefix_length, pack_length, pack_length_with_blob_ptrs + will be recalculated later in this function when we get the estimate + for the actual value of the join buffer size. + */ + size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= 4; + base_prefix_length= (with_length ? size_of_rec_len : 0) + + (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); + pack_length= (with_length ? size_of_rec_len : 0) + + (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + + length + fields*sizeof(uint); + pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); + min_records= 1; + min_buff_size= get_min_join_buffer_size(); + buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size, + min_buff_size); + size_of_rec_ofs= offset_size(buff_size); + size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); + size_of_fld_ofs= size_of_rec_len; + base_prefix_length= (with_length ? size_of_rec_len : 0) + + (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); + /* + Call ge_min_join_buffer_size() again as the size may have got smaller + if size_of_rec_ofs or some other variable changed since last call. + */ + min_buff_size= 0; + min_buff_size= get_min_join_buffer_size(); + /* + The size of the offsets for referenced fields will be added later. + The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted + every time when the first reference to the referenced field is registered. + */ + pack_length= (with_length ? size_of_rec_len : 0) + + (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + + length; + pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); +} + + +/* + Get maximum total length of all affixes of a record in the join cache buffer + + SYNOPSIS + get_record_max_affix_length() + + DESCRIPTION + The function calculates the maximum possible total length of all affixes + of a record in the join cache buffer, that is made of: + - the length of all prefixes used in this cache, + - the length of the match flag if it's needed + - the total length of the maximum possible offsets to the fields of + a record in the buffer. + + RETURN VALUE + The maximum total length of all affixes of a record in the join buffer +*/ + +uint JOIN_CACHE::get_record_max_affix_length() +{ + uint len= get_prefix_length() + + MY_TEST(with_match_flag) + + size_of_fld_ofs * data_field_count; + return len; +} + + +/* + Get the minimum possible size of the cache join buffer + + SYNOPSIS + get_min_join_buffer_size() + + DESCRIPTION + At the first its invocation for the cache the function calculates the + minimum possible size of the join buffer of the cache. This value depends + on the minimal number of records 'min_records' to be stored in the join + buffer. The number is supposed to be determined by the procedure that + chooses the best access path to the joined table join_tab in the execution + plan. After the calculation of the interesting size the function saves it + in the field 'min_buff_size' in order to use it directly at the next + invocations of the function. + + NOTES + Currently the number of minimal records is just set to 1. + + RETURN VALUE + The minimal possible size of the join buffer of this cache +*/ + +size_t JOIN_CACHE::get_min_join_buffer_size() +{ + if (min_buff_size) + return min_buff_size; // use cached value + + size_t len= 0, len_last= 0, len_addon, min_sz, add_sz= 0; + + for (JOIN_TAB *tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + len+= tab->get_max_used_fieldlength(); + len_last+= tab->get_used_fieldlength(); + } + len_addon= (get_record_max_affix_length() + + get_max_key_addon_space_per_record()); + len+= len_addon; + len_last+= len_addon; + min_sz= len*(min_records-1) + len_last; + min_sz+= pack_length_with_blob_ptrs; + for (uint i=0; i < min_records; i++) + add_sz+= join_tab_scan->aux_buffer_incr(i+1); + avg_aux_buffer_incr= add_sz/min_records; + min_sz+= add_sz; + set_if_bigger(min_sz, 1); + min_buff_size= min_sz; + return min_buff_size; +} + + +size_t JOIN_CACHE::calc_avg_record_length() +{ + size_t len= 0; + for (JOIN_TAB *tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + len+= tab->get_used_fieldlength(); + } + len+= get_record_max_affix_length(); + return len; +} + +/* + Get the maximum possible size of the cache join buffer + + SYNOPSIS + get_max_join_buffer_size() + + optimize_buff_size FALSE <-> do not take more memory than needed for + the estimated number of records in the partial join + + DESCRIPTION + + At the first its invocation for the cache the function calculates + the maximum possible size of join buffer for the cache. If the + parameter optimize_buff_size true then this value does not exceed + the size of the space needed for the estimated number of records + 'max_records' in the partial join that joins tables from the first + one through join_tab. This value is also capped off by the value + of the system parameter join_buffer_size. After the calculation of + the interesting size the function saves the value in the field + 'max_buff_size' in order to use it directly at the next + invocations of the function. + + RETURN VALUE + The maximum possible size of the join buffer of this cache + avg_record_length is also updated if optimize_buff_size != 0 +*/ + +size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size, + size_t min_sz) +{ + if (max_buff_size) + return max_buff_size; // use cached value + + size_t limit_sz= (size_t) join->thd->variables.join_buff_size; + + if (!optimize_buff_size) + return max_buff_size= limit_sz; + + size_t max_sz; + size_t len; + double max_records, partial_join_cardinality= + (join_tab-1)->get_partial_join_cardinality(); + /* Expected join buffer space used for one record */ + size_t space_per_record; + + len= avg_record_length= calc_avg_record_length(); + len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; + space_per_record= len; + + /* Note that space_per_record can be 0 if no table fields where used */ + max_records= (double) (limit_sz / MY_MAX(space_per_record, 1)); + set_if_smaller(max_records, partial_join_cardinality); + set_if_bigger(max_records, 10.0); + + if ((size_t) (limit_sz / max_records) > space_per_record) + max_sz= space_per_record * (size_t) max_records; + else + max_sz= limit_sz; + max_sz+= pack_length_with_blob_ptrs; + set_if_smaller(max_sz, limit_sz); + + set_if_bigger(max_sz, min_sz); + max_buff_size= max_sz; + return max_buff_size; +} + + +/* + Allocate memory for a join buffer + + SYNOPSIS + alloc_buffer() + + DESCRIPTION + The function allocates a lump of memory for the cache join buffer. + Initially the function sets the size of the buffer buff_size equal to + the value returned by get_max_join_buffer_size(). If the total size of + the space intended to be used for the join buffers employed by the + tables from the first one through join_tab exceeds the value of the + system parameter join_buff_space_limit, then the function first tries + to shrink the used buffers to make the occupied space fit the maximum + memory allowed to be used for all join buffers in total. After + this the function tries to allocate a join buffer for join_tab. + If it fails to do so, it decrements the requested size of the join + buffer, shrinks proportionally the join buffers used for the previous + tables and tries to allocate a buffer for join_tab. In the case of a + failure the function repeats its attempts with smaller and smaller + requested sizes of the buffer, but not more than 4 times. + + RETURN VALUE + 0 if the memory has been successfully allocated + 1 otherwise +*/ + +int JOIN_CACHE::alloc_buffer() +{ + JOIN_TAB *tab; + JOIN_CACHE *cache; + ulonglong curr_buff_space_sz= 0; + ulonglong curr_min_buff_space_sz= 0; + ulonglong join_buff_space_limit= + join->thd->variables.join_buff_space_limit; + bool optimize_buff_size= + optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE); + buff= NULL; + buff_size= get_max_join_buffer_size(optimize_buff_size, min_buff_size); + + for (tab= start_tab; tab!= join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + cache= tab->cache; + if (cache) + { + curr_min_buff_space_sz+= cache->get_min_join_buffer_size(); + curr_buff_space_sz+= cache->get_join_buffer_size(); + } + } + curr_min_buff_space_sz+= min_buff_size; + curr_buff_space_sz+= buff_size; + + if (optimize_buff_size) + { + /* + optimize_join_buffer_size=on used. We should limit the join + buffer space to join_buff_space_limit if possible. + */ + if (curr_min_buff_space_sz > join_buff_space_limit) + { + /* + Increase buffer size to minimum needed, to be able to use the + join buffer. + */ + join_buff_space_limit= curr_min_buff_space_sz; + } + if (curr_buff_space_sz > join_buff_space_limit && + join->shrink_join_buffers(join_tab, curr_buff_space_sz, + join_buff_space_limit)) + goto fail; // Fatal error + } + else if (curr_min_buff_space_sz > buff_size) + goto fail; + + if (for_explain_only) + return 0; + + for (size_t buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; ) + { + size_t next_buff_size; + + if ((buff= (uchar*) my_malloc(key_memory_JOIN_CACHE, buff_size, + MYF(MY_THREAD_SPECIFIC)))) + break; + + next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0; + if (next_buff_size < min_buff_size || + join->shrink_join_buffers(join_tab, curr_buff_space_sz, + curr_buff_space_sz-buff_size_decr)) + goto fail; + buff_size= next_buff_size; + + curr_buff_space_sz= 0; + for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++) + { + cache= tab->cache; + if (cache) + curr_buff_space_sz+= cache->get_join_buffer_size(); + } + } + return 0; + +fail: + buff_size= 0; + return 1; +} + + +/* + Shrink the size if the cache join buffer in a given ratio + + SYNOPSIS + shrink_join_buffer_in_ratio() + n nominator of the ratio to shrink the buffer in + d denominator if the ratio + + DESCRIPTION + The function first deallocates the join buffer of the cache. Then + it allocates a buffer that is (n/d) times smaller. + + RETURN VALUE + FALSE on success with allocation of the smaller join buffer + TRUE otherwise +*/ + +bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d) +{ + size_t next_buff_size; + if (n < d) + return FALSE; + next_buff_size= (size_t) ((double) buff_size / n * d); + set_if_bigger(next_buff_size, min_buff_size); + buff_size= next_buff_size; + return realloc_buffer(); +} + + +/* + Reallocate the join buffer of a join cache + + SYNOPSIS + realloc_buffer() + + DESCRITION + The function reallocates the join buffer of the join cache. After this + it resets the buffer for writing. + + NOTES + The function assumes that buff_size contains the new value for the join + buffer size. + + RETURN VALUE + 0 if the buffer has been successfully reallocated + 1 otherwise +*/ + +int JOIN_CACHE::realloc_buffer() +{ + free(); + buff= (uchar*) my_malloc(key_memory_JOIN_CACHE, buff_size, + MYF(MY_THREAD_SPECIFIC)); + reset(TRUE); + return buff == NULL; +} + + +/* + Initialize a join cache + + SYNOPSIS + init() + for_explain join buffer is initialized for explain only + + DESCRIPTION + The function initializes the join cache structure. It supposed to be called + by init methods for classes derived from the JOIN_CACHE. + The function allocates memory for the join buffer and for descriptors of + the record fields stored in the buffer. + + NOTES + The code of this function should have been included into the constructor + code itself. However the new operator for the class JOIN_CACHE would + never fail while memory allocation for the join buffer is not absolutely + unlikely to fail. That's why this memory allocation has to be placed in a + separate function that is called in a couple with a cache constructor. + It is quite natural to put almost all other constructor actions into + this function. + + RETURN VALUE + 0 initialization with buffer allocations has been succeeded + 1 otherwise +*/ + +int JOIN_CACHE::init(bool for_explain) +{ + DBUG_ENTER("JOIN_CACHE::init"); + + for_explain_only= for_explain; + + calc_record_fields(); + + collect_info_on_key_args(); + + if (alloc_fields()) + DBUG_RETURN(1); + + create_flag_fields(); + + create_key_arg_fields(); + + create_remaining_fields(); + + set_constants(); + + if (alloc_buffer()) + DBUG_RETURN(1); + + reset(TRUE); + + DBUG_RETURN(0); +} + + +/* + Check the possibility to read the access keys directly from the join buffer + SYNOPSIS + check_emb_key_usage() + + DESCRIPTION + The function checks some conditions at which the key values can be + read directly from the join buffer. This is possible when the key + values can be composed by concatenation of the record fields + stored in the join buffer. Sometimes when the access key is + multi-component the function has to re-order the fields written + into the join buffer to make keys embedded. If key values for the + key access are detected as embedded then 'use_emb_key' is set to + TRUE. + + EXAMPLE + Let table t2 has an index defined on the columns a,b . Let's assume also + that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all + of the integer type. Then if the query + SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b + is executed with a join cache in such a way that t1 is the driving + table then the key values to access table t2 can be read directly + from the join buffer. + + NOTES + In some cases key values could be read directly from the join buffer but + we still do not consider them embedded. In the future we'll expand the + the class of keys which we identify as embedded. + + NOTES + The function returns FALSE if no key is used to join the records + from join_tab. + + RETURN VALUE + TRUE key values will be considered as embedded, + FALSE otherwise. +*/ + +bool JOIN_CACHE::check_emb_key_usage() +{ + + if (!is_key_access()) + return FALSE; + + uint i; + Item *item; + KEY_PART_INFO *key_part; + CACHE_FIELD *copy; + CACHE_FIELD *copy_end; + uint len= 0; + TABLE_REF *ref= &join_tab->ref; + KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); + + /* + If some of the key arguments are not from the local cache the key + is not considered as embedded. + TODO: + Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0. + */ + if (external_key_arg_fields != 0) + return FALSE; + /* + If the number of the local key arguments is not equal to the number + of key parts the key value cannot be read directly from the join buffer. + */ + if (local_key_arg_fields != ref->key_parts) + return FALSE; + + /* + A key is not considered embedded if one of the following is true: + - one of its key parts is not equal to a field + - it is a partial key + - definition of the argument field does not coincide with the + definition of the corresponding key component + - some of the key components are nullable + */ + for (i=0; i < ref->key_parts; i++) + { + item= ref->items[i]->real_item(); + if (item->type() != Item::FIELD_ITEM) + return FALSE; + key_part= keyinfo->key_part+i; + if (key_part->key_part_flag & HA_PART_KEY_SEG) + return FALSE; + if (!key_part->field->eq_def(((Item_field *) item)->field)) + return FALSE; + if (key_part->field->maybe_null()) + return FALSE; + } + + copy= field_descr+flag_fields; + copy_end= copy+local_key_arg_fields; + for ( ; copy < copy_end; copy++) + { + /* + If some of the key arguments are of variable length the key + is not considered as embedded. + */ + if (copy->type != 0) + return FALSE; + /* + If some of the key arguments are bit fields whose bits are partially + stored with null bits the key is not considered as embedded. + */ + if (copy->field->type() == MYSQL_TYPE_BIT && + ((Field_bit*) (copy->field))->bit_len) + return FALSE; + len+= copy->length; + } + + emb_key_length= len; + + /* + Make sure that key fields follow the order of the corresponding + key components these fields are equal to. For this the descriptors + of the fields that comprise the key might be re-ordered. + */ + for (i= 0; i < ref->key_parts; i++) + { + uint j; + Item *item= ref->items[i]->real_item(); + Field *fld= ((Item_field *) item)->field; + CACHE_FIELD *init_copy= field_descr+flag_fields+i; + for (j= i, copy= init_copy; j < local_key_arg_fields; j++, copy++) + { + if (fld->eq(copy->field)) + { + if (j != i) + { + CACHE_FIELD key_part_copy= *copy; + *copy= *init_copy; + *init_copy= key_part_copy; + } + break; + } + } + } + + return TRUE; +} + + +/* + Write record fields and their required offsets into the join cache buffer + + SYNOPSIS + write_record_data() + link a reference to the associated info in the previous cache + is_full OUT true if it has been decided that no more records will be + added to the join buffer + + DESCRIPTION + This function put into the cache buffer the following info that it reads + from the join record buffers or computes somehow: + (1) the length of all fields written for the record (optional) + (2) an offset to the associated info in the previous cache (if there is any) + determined by the link parameter + (3) all flag fields of the tables whose data field are put into the cache: + - match flag (optional), + - null bitmaps for all tables, + - null row flags for all tables + (4) values of all data fields including + - full images of those fixed legth data fields that cannot have + trailing spaces + - significant part of fixed length fields that can have trailing spaces + with the prepanded length + - data of non-blob variable length fields with the prepanded data length + - blob data from blob fields with the prepanded data length + (5) record offset values for the data fields that are referred to from + other caches + + The record is written at the current position stored in the field 'pos'. + At the end of the function 'pos' points at the position right after the + written record data. + The function increments the number of records in the cache that is stored + in the 'records' field by 1. The function also modifies the values of + 'curr_rec_pos' and 'last_rec_pos' to point to the written record. + The 'end_pos' cursor is modified accordingly. + The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data + remains in the record buffers and not copied to the join buffer. It may + happen only to the blob data from the last record added into the cache. + If on_precond is attached to join_tab and it is not evaluated to TRUE + then MATCH_IMPOSSIBLE is placed in the match flag field of the record + written into the join buffer. + + RETURN VALUE + length of the written record data +*/ + +uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full) +{ + uint len; + bool last_record; + CACHE_FIELD *copy; + CACHE_FIELD *copy_end; + uchar *flags_pos; + uchar *cp= pos; + uchar *init_pos= cp; + uchar *rec_len_ptr= 0; + uint key_extra= extra_key_length(); + + records++; /* Increment the counter of records in the cache */ + + len= pack_length + key_extra; + + /* Make an adjustment for the size of the auxiliary buffer if there is any */ + uint incr= aux_buffer_incr(records); + size_t rem= rem_space(); + aux_buff_size+= len+incr < rem ? incr : rem; + + /* + For each blob to be put into cache save its length and a pointer + to the value in the corresponding element of the blob_ptr array. + Blobs with null values are skipped. + Increment 'len' by the total length of all these blobs. + */ + if (blobs) + { + CACHE_FIELD **copy_ptr= blob_ptr; + CACHE_FIELD **copy_ptr_end= copy_ptr+blobs; + for ( ; copy_ptr < copy_ptr_end; copy_ptr++) + { + Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field; + if (!blob_field->is_null()) + { + uint blob_len= blob_field->get_length(); + (*copy_ptr)->blob_length= blob_len; + len+= blob_len; + (*copy_ptr)->str= blob_field->get_ptr(); + } + } + } + + /* + Check whether we won't be able to add any new record into the cache after + this one because the cache will be full. Set last_record to TRUE if it's so. + The assume that the cache will be full after the record has been written + into it if either the remaining space of the cache is not big enough for the + record's blob values or if there is a chance that not all non-blob fields + of the next record can be placed there. + This function is called only in the case when there is enough space left in + the cache to store at least non-blob parts of the current record. + */ + last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space(); + + /* + Save the position for the length of the record in the cache if it's needed. + The length of the record will be inserted here when all fields of the record + are put into the cache. + */ + if (with_length) + { + rec_len_ptr= cp; + DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size); + cp+= size_of_rec_len; + } + + /* + Put a reference to the fields of the record that are stored in the previous + cache if there is any. This reference is passed by the 'link' parameter. + */ + if (prev_cache) + { + DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size); + cp+= prev_cache->get_size_of_rec_offset(); + prev_cache->store_rec_ref(cp, link); + } + + curr_rec_pos= cp; + + /* If the there is a match flag set its value to 0 */ + copy= field_descr; + if (with_match_flag) + *copy[0].str= 0; + + /* First put into the cache the values of all flag fields */ + copy_end= field_descr+flag_fields; + flags_pos= cp; + for ( ; copy < copy_end; copy++) + { + DBUG_ASSERT(cp + copy->length <= buff + buff_size); + memcpy(cp, copy->str, copy->length); + cp+= copy->length; + } + + /* Now put the values of the remaining fields as soon as they are not nulls */ + copy_end= field_descr+fields; + for ( ; copy < copy_end; copy++) + { + Field *field= copy->field; + if (field && field->maybe_null() && field->is_null()) + { + if (copy->referenced_field_no) + copy->offset= 0; + continue; + } + /* Save the offset of the field to put it later at the end of the record */ + if (copy->referenced_field_no) + copy->offset= (uint)(cp-curr_rec_pos); + + switch (copy->type) { + case CACHE_BLOB: + { + Field_blob *blob_field= (Field_blob *) copy->field; + if (last_record) + { + last_rec_blob_data_is_in_rec_buff= 1; + /* Put down the length of the blob and the pointer to the data */ + DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size); + blob_field->get_image(cp, copy->length+sizeof(char*), + blob_field->charset()); + cp+= copy->length+sizeof(char*); + } + else + { + /* First put down the length of the blob and then copy the data */ + blob_field->get_image(cp, copy->length, + blob_field->charset()); + DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size); + if (copy->blob_length) + memcpy(cp+copy->length, copy->str, copy->blob_length); + cp+= copy->length+copy->blob_length; + } + break; + } + case CACHE_VARSTR1: + /* Copy the significant part of the short varstring field */ + len= (uint) copy->str[0] + 1; + DBUG_ASSERT(cp + len <= buff + buff_size); + memcpy(cp, copy->str, len); + cp+= len; + break; + case CACHE_VARSTR2: + /* Copy the significant part of the long varstring field */ + len= uint2korr(copy->str) + 2; + DBUG_ASSERT(cp + len <= buff + buff_size); + memcpy(cp, copy->str, len); + cp+= len; + break; + case CACHE_STRIPPED: + { + /* + Put down the field value stripping all trailing spaces off. + After this insert the length of the written sequence of bytes. + */ + uchar *str, *end; + for (str= copy->str, end= str+copy->length; + end > str && end[-1] == ' '; + end--) ; + len=(uint) (end-str); + DBUG_ASSERT(cp + len + 2 <= buff + buff_size); + int2store(cp, len); + memcpy(cp+2, str, len); + cp+= len+2; + break; + } + case CACHE_ROWID: + if (!copy->length) + { + /* + This may happen only for ROWID fields of materialized + derived tables and views. + */ + TABLE *table= (TABLE *) copy->str; + copy->str= table->file->ref; + copy->length= table->file->ref_length; + if (!copy->str) + { + /* + If table is an empty inner table of an outer join and it is + a materialized derived table then table->file->ref == NULL. + */ + cp+= copy->length; + break; + } + } + /* fall through */ + default: + /* Copy the entire image of the field from the record buffer */ + DBUG_ASSERT(cp + copy->length <= buff + buff_size); + if (copy->str) + memcpy(cp, copy->str, copy->length); + cp+= copy->length; + } + } + + /* Add the offsets of the fields that are referenced from other caches */ + if (referenced_fields) + { + uint cnt= 0; + for (copy= field_descr+flag_fields; copy < copy_end ; copy++) + { + if (copy->referenced_field_no) + { + store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1), + copy->offset); + cnt++; + } + } + DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size); + cp+= size_of_fld_ofs*cnt; + } + + if (rec_len_ptr) + store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len)); + last_rec_pos= curr_rec_pos; + end_pos= pos= cp; + *is_full= last_record; + + last_written_is_null_compl= 0; + if (!join_tab->first_unmatched && join_tab->on_precond) + { + join_tab->found= 0; + join_tab->not_null_compl= 1; + if (!join_tab->on_precond->val_int()) + { + flags_pos[0]= MATCH_IMPOSSIBLE; + last_written_is_null_compl= 1; + } + } + + return (uint) (cp-init_pos); +} + + +/* + Reset the join buffer for reading/writing: default implementation + + SYNOPSIS + reset() + for_writing if it's TRUE the function reset the buffer for writing + + DESCRIPTION + This default implementation of the virtual function reset() resets + the join buffer for reading or writing. + If the buffer is reset for reading only the 'pos' value is reset + to point to the very beginning of the join buffer. If the buffer is + reset for writing additionally: + - the counter of the records in the buffer is set to 0, + - the the value of 'last_rec_pos' gets pointing at the position just + before the buffer, + - 'end_pos' is set to point to the beginning of the join buffer, + - the size of the auxiliary buffer is reset to 0, + - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0. + + RETURN VALUE + none +*/ +void JOIN_CACHE::reset(bool for_writing) +{ + pos= buff; + curr_rec_link= 0; + if (for_writing) + { + records= 0; + last_rec_pos= buff; + aux_buff_size= 0; + end_pos= pos; + last_rec_blob_data_is_in_rec_buff= 0; + } +} + + +/* + Add a record into the join buffer: the default implementation + + SYNOPSIS + put_record() + + DESCRIPTION + This default implementation of the virtual function put_record writes + the next matching record into the join buffer. + It also links the record having been written into the join buffer with + the matched record in the previous cache if there is any. + The implementation assumes that the function get_curr_link() + will return exactly the pointer to this matched record. + + RETURN VALUE + TRUE if it has been decided that it should be the last record + in the join buffer, + FALSE otherwise +*/ + +bool JOIN_CACHE::put_record() +{ + bool is_full; + uchar *link= 0; + if (prev_cache) + link= prev_cache->get_curr_rec_link(); + write_record_data(link, &is_full); + return is_full; +} + + +/* + Read the next record from the join buffer: the default implementation + + SYNOPSIS + get_record() + + DESCRIPTION + This default implementation of the virtual function get_record + reads fields of the next record from the join buffer of this cache. + The function also reads all other fields associated with this record + from the the join buffers of the previous caches. The fields are read + into the corresponding record buffers. + It is supposed that 'pos' points to the position in the buffer + right after the previous record when the function is called. + When the function returns the 'pos' values is updated to point + to the position after the read record. + The value of 'curr_rec_pos' is also updated by the function to + point to the beginning of the first field of the record in the + join buffer. + + RETURN VALUE + TRUE there are no more records to read from the join buffer + FALSE otherwise +*/ + +bool JOIN_CACHE::get_record() +{ + bool res; + uchar *prev_rec_ptr= 0; + if (with_length) + pos+= size_of_rec_len; + if (prev_cache) + { + pos+= prev_cache->get_size_of_rec_offset(); + prev_rec_ptr= prev_cache->get_rec_ref(pos); + } + curr_rec_pos= pos; + if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER)) + { + pos+= referenced_fields*size_of_fld_ofs; + if (prev_cache) + prev_cache->get_record_by_pos(prev_rec_ptr); + } + return res; +} + + +/* + Read a positioned record from the join buffer: the default implementation + + SYNOPSIS + get_record_by_pos() + rec_ptr position of the first field of the record in the join buffer + + DESCRIPTION + This default implementation of the virtual function get_record_pos + reads the fields of the record positioned at 'rec_ptr' from the join buffer. + The function also reads all other fields associated with this record + from the the join buffers of the previous caches. The fields are read + into the corresponding record buffers. + + RETURN VALUE + none +*/ + +void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) +{ + uchar *save_pos= pos; + pos= rec_ptr; + read_all_record_fields(); + pos= save_pos; + if (prev_cache) + { + uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); + prev_cache->get_record_by_pos(prev_rec_ptr); + } +} + + +/* + Get the match flag from the referenced record: the default implementation + + SYNOPSIS + get_match_flag_by_pos() + rec_ptr position of the first field of the record in the join buffer + + DESCRIPTION + This default implementation of the virtual function get_match_flag_by_pos + get the match flag for the record pointed by the reference at the position + rec_ptr. If the match flag is placed in one of the previous buffers the + function first reaches the linked record fields in this buffer. + The function returns the value of the first encountered match flag. + + RETURN VALUE + match flag for the record at the position rec_ptr +*/ + +enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr) +{ + Match_flag match_fl= MATCH_NOT_FOUND; + if (with_match_flag) + { + match_fl= (enum Match_flag) rec_ptr[0]; + return match_fl; + } + if (prev_cache) + { + uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); + return prev_cache->get_match_flag_by_pos(prev_rec_ptr); + } + DBUG_ASSERT(0); + return match_fl; +} + + +/* + Get the match flag for the referenced record from specified join buffer + + SYNOPSIS + get_match_flag_by_pos_from_join_buffer() + rec_ptr position of the first field of the record in the join buffer + tab join table with join buffer where to look for the match flag + + DESCRIPTION + This default implementation of the get_match_flag_by_pos_from_join_buffer + method gets the match flag for the record pointed by the reference at the + position rec_ptr from the join buffer attached to the join table tab. + + RETURN VALUE + match flag for the record at the position rec_ptr from the join + buffer attached to the table tab. +*/ + +enum JOIN_CACHE::Match_flag +JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, + JOIN_TAB *tab) +{ + DBUG_ASSERT(tab->cache && tab->cache->with_match_flag); + for (JOIN_CACHE *cache= this; ; ) + { + if (cache->join_tab == tab) + return (enum Match_flag) rec_ptr[0]; + cache= cache->prev_cache; + rec_ptr= cache->get_rec_ref(rec_ptr); + } +} + + +/* + Calculate the increment of the auxiliary buffer for a record write + + SYNOPSIS + aux_buffer_incr() + recno the number of the record the increment to be calculated for + + DESCRIPTION + This function calls the aux_buffer_incr the method of the + companion member join_tab_scan to calculate the growth of the + auxiliary buffer when the recno-th record is added to the + join_buffer of this cache. + + RETURN VALUE + the number of bytes in the increment +*/ + +uint JOIN_CACHE::aux_buffer_incr(size_t recno) +{ + return join_tab_scan->aux_buffer_incr(recno); +} + +/* + Read all flag and data fields of a record from the join buffer + + SYNOPSIS + read_all_record_fields() + + DESCRIPTION + The function reads all flag and data fields of a record from the join + buffer into the corresponding record buffers. + The fields are read starting from the position 'pos' which is + supposed to point to the beginning of the first record field. + The function increments the value of 'pos' by the length of the + read data. + + RETURN VALUE + (-1) if there is no more records in the join buffer + length of the data read from the join buffer - otherwise +*/ + +uint JOIN_CACHE::read_all_record_fields() +{ + uchar *init_pos= pos; + + if (pos > last_rec_pos || !records) + return NO_MORE_RECORDS_IN_BUFFER; + + /* First match flag, read null bitmaps and null_row flag for each table */ + read_flag_fields(); + + /* Now read the remaining table fields if needed */ + CACHE_FIELD *copy= field_descr+flag_fields; + CACHE_FIELD *copy_end= field_descr+fields; + bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos); + for ( ; copy < copy_end; copy++) + read_record_field(copy, blob_in_rec_buff); + + return (uint) (pos-init_pos); +} + + +/* + Read all flag fields of a record from the join buffer + + SYNOPSIS + read_flag_fields() + + DESCRIPTION + The function reads all flag fields of a record from the join + buffer into the corresponding record buffers. + The fields are read starting from the position 'pos'. + The function increments the value of 'pos' by the length of the + read data. + + RETURN VALUE + length of the data read from the join buffer +*/ + +uint JOIN_CACHE::read_flag_fields() +{ + uchar *init_pos= pos; + CACHE_FIELD *copy= field_descr; + CACHE_FIELD *copy_end= copy+flag_fields; + if (with_match_flag) + { + copy->str[0]= MY_TEST((Match_flag) pos[0] == MATCH_FOUND); + pos+= copy->length; + copy++; + } + for ( ; copy < copy_end; copy++) + { + memcpy(copy->str, pos, copy->length); + pos+= copy->length; + } + return (uint)(pos-init_pos); +} + + +/* + Read a data record field from the join buffer + + SYNOPSIS + read_record_field() + copy the descriptor of the data field to be read + blob_in_rec_buff indicates whether this is the field from the record + whose blob data are in record buffers + + DESCRIPTION + The function reads the data field specified by the parameter copy + from the join buffer into the corresponding record buffer. + The field is read starting from the position 'pos'. + The data of blob values is not copied from the join buffer. + The function increments the value of 'pos' by the length of the + read data. + + RETURN VALUE + length of the data read from the join buffer +*/ + +uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff) +{ + uint len; + /* Do not copy the field if its value is null */ + if (copy->field && copy->field->maybe_null() && copy->field->is_null()) + return 0; + switch (copy->type) { + case CACHE_BLOB: + { + Field_blob *blob_field= (Field_blob *) copy->field; + /* + Copy the length and the pointer to data but not the blob data + itself to the record buffer + */ + if (blob_in_rec_buff) + { + blob_field->set_image(pos, copy->length + sizeof(char*), + blob_field->charset()); + len= copy->length + sizeof(char*); + } + else + { + blob_field->set_ptr(pos, pos+copy->length); + len= copy->length + blob_field->get_length(); + } + } + break; + case CACHE_VARSTR1: + /* Copy the significant part of the short varstring field */ + len= (uint) pos[0] + 1; + memcpy(copy->str, pos, len); + break; + case CACHE_VARSTR2: + /* Copy the significant part of the long varstring field */ + len= uint2korr(pos) + 2; + memcpy(copy->str, pos, len); + break; + case CACHE_STRIPPED: + /* Pad the value by spaces that has been stripped off */ + len= uint2korr(pos); + memcpy(copy->str, pos+2, len); + memset(copy->str+len, ' ', copy->length-len); + len+= 2; + break; + case CACHE_ROWID: + if (!copy->str) + { + len= copy->length; + break; + } + /* fall through */ + default: + /* Copy the entire image of the field from the record buffer */ + len= copy->length; + memcpy(copy->str, pos, len); + } + pos+= len; + return len; +} + + +/* + Read a referenced field from the join buffer + + SYNOPSIS + read_referenced_field() + copy pointer to the descriptor of the referenced field + rec_ptr pointer to the record that may contain this field + len IN/OUT total length of the record fields + + DESCRIPTION + The function checks whether copy points to a data field descriptor + for this cache object. If it does not then the function returns + FALSE. Otherwise the function reads the field of the record in + the join buffer pointed by 'rec_ptr' into the corresponding record + buffer and returns TRUE. + If the value of *len is 0 then the function sets it to the total + length of the record fields including possible trailing offset + values. Otherwise *len is supposed to provide this value that + has been obtained earlier. + + NOTE + If the value of the referenced field is null then the offset + for the value is set to 0. If the value of a field can be null + then the value of flag_fields is always positive. So the offset + for any non-null value cannot be 0 in this case. + + RETURN VALUE + TRUE 'copy' points to a data descriptor of this join cache + FALSE otherwise +*/ + +bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy, + uchar *rec_ptr, + uint *len) +{ + uchar *ptr; + uint offset; + if (copy < field_descr || copy >= field_descr+fields) + return FALSE; + if (!*len) + { + /* Get the total length of the record fields */ + uchar *len_ptr= rec_ptr; + if (prev_cache) + len_ptr-= prev_cache->get_size_of_rec_offset(); + *len= get_rec_length(len_ptr-size_of_rec_len); + } + + ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0); + offset= get_fld_offset(ptr+ *len - + size_of_fld_ofs* + (referenced_fields+1-copy->referenced_field_no)); + bool is_null= FALSE; + Field *field= copy->field; + if (offset == 0 && flag_fields) + is_null= TRUE; + if (is_null) + { + field->set_null(); + if (!field->real_maybe_null()) + field->table->null_row= 1; + } + else + { + uchar *save_pos= pos; + field->set_notnull(); + if (!field->real_maybe_null()) + field->table->null_row= 0; + pos= rec_ptr+offset; + read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr)); + pos= save_pos; + } + return TRUE; +} + + +/* + Skip record from join buffer if's already matched: default implementation + + SYNOPSIS + skip_if_matched() + + DESCRIPTION + This default implementation of the virtual function skip_if_matched + skips the next record from the join buffer if its match flag is set to + MATCH_FOUND. + If the record is skipped the value of 'pos' is set to point to the position + right after the record. + + NOTE + Currently this function is called only when generating null complemented + records for outer joins (=> only when join_tab->first_unmatched != NULL). + + RETURN VALUE + TRUE the match flag is set to MATCH_FOUND and the record has been skipped + FALSE otherwise +*/ + +bool JOIN_CACHE::skip_if_matched() +{ + DBUG_ASSERT(with_length); + uint offset= size_of_rec_len; + if (prev_cache) + offset+= prev_cache->get_size_of_rec_offset(); + /* Check whether the match flag is MATCH_FOUND */ + if (get_match_flag_by_pos_from_join_buffer(pos+offset, + join_tab->first_unmatched) == + MATCH_FOUND) + { + pos+= size_of_rec_len + get_rec_length(pos); + return TRUE; + } + return FALSE; +} + + +/* + Skip record from join buffer if the match isn't needed: default implementation + + SYNOPSIS + skip_if_not_needed_match() + + DESCRIPTION + This default implementation of the virtual function skip_if_not_needed_match + skips the next record from the join when generating join extensions + for the records in the join buffer depending on the value of the match flag. + - In the case of a semi-nest the match flag may be in two states + {MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set + to MATCH_FOUND. + - In the case of an outer join the match may be in three states + {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE, MATCH_FOUND}. + If not_exists optimization is applied the record is skipped when + the flag is set to MATCH_FOUND or to MATCH_IMPOSSIBLE. Otherwise + the record is skipped only when the flag is set to MATCH_IMPOSSIBLE. + + If the record is skipped the value of 'pos' is set to point to the position + right after the record. + + NOTE + Currently the function is called only when generating non-null complemented + extensions for records in the join buffer. + + RETURN VALUE + TRUE the record has to be skipped + FALSE otherwise +*/ + +bool JOIN_CACHE::skip_if_not_needed_match() +{ + DBUG_ASSERT(with_length); + enum Match_flag match_fl; + uint offset= size_of_rec_len; + bool skip= FALSE; + if (prev_cache) + offset+= prev_cache->get_size_of_rec_offset(); + + match_fl= get_match_flag_by_pos(pos+offset); + skip= join_tab->first_sj_inner_tab ? + match_fl == MATCH_FOUND : // the case of semi-join + not_exists_opt_is_applicable && + join_tab->table->reginfo.not_exists_optimize ? + match_fl != MATCH_NOT_FOUND : // the case of not exist opt + match_fl == MATCH_IMPOSSIBLE; + + if (skip) + { + pos+= size_of_rec_len + get_rec_length(pos); + return TRUE; + } + return FALSE; +} + + +/* + Restore the fields of the last record from the join buffer + + SYNOPSIS + restore_last_record() + + DESCRIPTION + This function restore the values of the fields of the last record put + into join buffer in record buffers. The values most probably have been + overwritten by the field values from other records when they were read + from the join buffer into the record buffer in order to check pushdown + predicates. + + RETURN + none +*/ + +void JOIN_CACHE::restore_last_record() +{ + if (records) + get_record_by_pos(last_rec_pos); +} + + +/* + Join records from the join buffer with records from the next join table + + SYNOPSIS + join_records() + skip_last do not find matches for the last record from the buffer + + DESCRIPTION + The functions extends all records from the join buffer by the matched + records from join_tab. In the case of outer join operation it also + adds null complementing extensions for the records from the join buffer + that have no match. + No extensions are generated for the last record from the buffer if + skip_last is true. + + NOTES + The function must make sure that if linked join buffers are used then + a join buffer cannot be refilled again until all extensions in the + buffers chained to this one are generated. + Currently an outer join operation with several inner tables always uses + at least two linked buffers with the match join flags placed in the + first buffer. Any record composed of rows of the inner tables that + matches a record in this buffer must refer to the position of the + corresponding match flag. + + IMPLEMENTATION + When generating extensions for outer tables of an outer join operation + first we generate all extensions for those records from the join buffer + that have matches, after which null complementing extension for all + unmatched records from the join buffer are generated. + + RETURN VALUE + return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. +*/ + +enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last) +{ + JOIN_TAB *tab; + enum_nested_loop_state rc= NESTED_LOOP_OK; + bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); + DBUG_ENTER("JOIN_CACHE::join_records"); + + if (outer_join_first_inner && !join_tab->first_unmatched) + join_tab->not_null_compl= TRUE; + + if (!join_tab->first_unmatched) + { + bool pfs_batch_update= join_tab->pfs_batch_update(join); + if (pfs_batch_update) + join_tab->table->file->start_psi_batch_mode(); + /* Find all records from join_tab that match records from join buffer */ + rc= join_matching_records(skip_last); + if (pfs_batch_update) + join_tab->table->file->end_psi_batch_mode(); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + goto finish; + if (outer_join_first_inner) + { + if (next_cache && join_tab != join_tab->last_inner) + { + /* + Ensure that all matches for outer records from join buffer are to be + found. Now we ensure that all full records are found for records from + join buffer. Generally this is an overkill. + TODO: Ensure that only matches of the inner table records have to be + found for the records from join buffer. + */ + rc= next_cache->join_records(skip_last); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + goto finish; + } + join_tab->not_null_compl= FALSE; + /* + Prepare for generation of null complementing extensions. + For all inner tables of the outer join operation for which + regular matches have been just found the field 'first_unmatched' + is set to point the the first inner table. After all null + complement rows are generated for this outer join this field + is set back to NULL. + */ + for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) + tab->first_unmatched= join_tab->first_inner; + } + } + if (join_tab->first_unmatched) + { + if (is_key_access()) + restore_last_record(); + + /* + Generate all null complementing extensions for the records from + join buffer that don't have any matching rows from the inner tables. + */ + reset(FALSE); + rc= join_null_complements(skip_last); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + goto finish; + } + if(next_cache) + { + /* + When using linked caches we must ensure the records in the next caches + that refer to the records in the join buffer are fully extended. + Otherwise we could have references to the records that have been + already erased from the join buffer and replaced for new records. + */ + rc= next_cache->join_records(skip_last); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + goto finish; + } + + if (skip_last) + { + DBUG_ASSERT(!is_key_access()); + /* + Restore the last record from the join buffer to generate + all extensions for it. + */ + get_record(); + } + +finish: + if (outer_join_first_inner && + join_tab->first_inner == join_tab->first_unmatched) + { + /* + All null complemented rows have been already generated for all + outer records from join buffer. Restore the state of the + first_unmatched values to 0 to avoid another null complementing. + */ + for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) + tab->first_unmatched= 0; + } + restore_last_record(); + reset(TRUE); + DBUG_PRINT("exit", ("rc: %d", rc)); + DBUG_RETURN(rc); +} + + +/* + Find matches from the next table for records from the join buffer + + SYNOPSIS + join_matching_records() + skip_last do not look for matches for the last partial join record + + DESCRIPTION + The function retrieves rows of the join_tab table and checks whether they + match partial join records from the join buffer. If a match is found + the function will call the sub_select function trying to look for matches + for the remaining join operations. + This function currently is called only from the function join_records. + If the value of skip_last is true the function writes the partial join + record from the record buffer into the join buffer to save its value for + the future processing in the caller function. + + NOTES + If employed by BNL or BNLH join algorithms the function performs a full + scan of join_tab for each refill of the join buffer. If BKA or BKAH + algorithms are used then the function iterates only over those records + from join_tab that can be accessed by keys built over records in the join + buffer. To apply a proper method of iteration the function just calls + virtual iterator methods (open, next, close) of the member join_tab_scan. + The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type. + The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join + algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods + for BKA/BKAH join algorithms. + When the function looks for records from the join buffer that would + match a record from join_tab it iterates either over all records in + the buffer or only over selected records. If BNL join operation is + performed all records are checked for the match. If BNLH or BKAH + algorithm is employed to join join_tab then the function looks only + through the records with the same join key as the record from join_tab. + With the BKA join algorithm only one record from the join buffer is checked + for a match for any record from join_tab. To iterate over the candidates + for a match the virtual function get_next_candidate_for_match is used, + while the virtual function prepare_look_for_matches is called to prepare + for such iteration process. + + NOTES + The function produces all matching extensions for the records in the + join buffer following the path of the employed blocked algorithm. + When an outer join operation is performed all unmatched records from + the join buffer must be extended by null values. The function + 'join_null_complements' serves this purpose. + + RETURN VALUE + return one of enum_nested_loop_state +*/ + +enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) +{ + int error; + enum_nested_loop_state rc= NESTED_LOOP_OK; + join_tab->table->null_row= 0; + bool check_only_first_match= join_tab->check_only_first_match(); + DBUG_ENTER("JOIN_CACHE::join_matching_records"); + + /* Return at once if there are no records in the join buffer */ + if (!records) + DBUG_RETURN(NESTED_LOOP_OK); + + /* + When joining we read records from the join buffer back into record buffers. + If matches for the last partial join record are found through a call to + the sub_select function then this partial join record must be saved in the + join buffer in order to be restored just before the sub_select call. + */ + if (skip_last) + put_record(); + + if (join_tab->use_quick == 2 && join_tab->select->quick) + { + /* A dynamic range access was used last. Clean up after it */ + delete join_tab->select->quick; + join_tab->select->quick= 0; + } + + if ((rc= join_tab_execution_startup(join_tab)) < 0) + goto finish2; + + if (join_tab->build_range_rowid_filter_if_needed()) + { + rc= NESTED_LOOP_ERROR; + goto finish2; + } + + /* Prepare to retrieve all records of the joined table */ + if (unlikely((error= join_tab_scan->open()))) + { + /* + TODO: if we get here, we will assert in net_send_statement(). Add test + coverage and fix. + */ + goto finish; + } + + while (!(error= join_tab_scan->next())) + { + if (unlikely(join->thd->check_killed())) + { + /* The user has aborted the execution of the query */ + rc= NESTED_LOOP_KILLED; + goto finish; + } + + if (join_tab->keep_current_rowid) + join_tab->table->file->position(join_tab->table->record[0]); + + /* Prepare to read matching candidates from the join buffer */ + if (prepare_look_for_matches(skip_last)) + continue; + join_tab->jbuf_tracker->r_scans++; + + uchar *rec_ptr; + /* Read each possible candidate from the buffer and look for matches */ + while ((rec_ptr= get_next_candidate_for_match())) + { + join_tab->jbuf_tracker->r_rows++; + /* + If only the first match is needed, and, it has been already found for + the next record read from the join buffer, then the record is skipped. + Also those records that must be null complemented are not considered + as candidates for matches. + */ + + not_exists_opt_is_applicable= true; + if (check_only_first_match && join_tab->first_inner) + { + /* + This is the case with not_exists optimization for nested outer join + when join_tab is the last inner table for one or more embedding outer + joins. To safely use 'not_exists' optimization in this case we have + to check that the match flags for all these embedding outer joins are + in the 'on' state. + (See also a similar check in evaluate_join_record() for the case when + join buffer are not used.) + */ + for (JOIN_TAB *tab= join_tab->first_inner; + tab && tab->first_inner && tab->last_inner == join_tab; + tab= tab->first_inner->first_upper) + { + if (get_match_flag_by_pos_from_join_buffer(rec_ptr, tab) != + MATCH_FOUND) + { + not_exists_opt_is_applicable= false; + break; + } + } + } + + if ((!join_tab->on_precond && + (!check_only_first_match || + (join_tab->first_inner && !not_exists_opt_is_applicable))) || + !skip_next_candidate_for_match(rec_ptr)) + { + ANALYZE_START_TRACKING(join->thd, join_tab->jbuf_unpack_tracker); + read_next_candidate_for_match(rec_ptr); + ANALYZE_STOP_TRACKING(join->thd, join_tab->jbuf_unpack_tracker); + rc= generate_full_extensions(rec_ptr); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + goto finish; + } + } + } + +finish: + if (error) + rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; +finish2: + join_tab_scan->close(); + DBUG_RETURN(rc); +} + + +/* + Set match flag for a record in join buffer if it has not been set yet + + SYNOPSIS + set_match_flag_if_none() + first_inner the join table to which this flag is attached to + rec_ptr pointer to the record in the join buffer + + DESCRIPTION + If the records of the table are accumulated in a join buffer the function + sets the match flag for the record in the buffer that is referred to by + the record from this cache positioned at 'rec_ptr'. + The function also sets the match flag 'found' of the table first inner + if it has not been set before. + + NOTES + The function assumes that the match flag for any record in any cache + is placed in the first byte occupied by the record fields. + + RETURN VALUE + TRUE the match flag is set by this call for the first time + FALSE the match flag has been set before this call +*/ + +bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner, + uchar *rec_ptr) +{ + if (!first_inner->cache) + { + /* + Records of the first inner table to which the flag is attached to + are not accumulated in a join buffer. + */ + if (first_inner->found) + return FALSE; + else + { + first_inner->found= 1; + return TRUE; + } + } + JOIN_CACHE *cache= this; + while (cache->join_tab != first_inner) + { + cache= cache->prev_cache; + DBUG_ASSERT(cache); + rec_ptr= cache->get_rec_ref(rec_ptr); + } + if ((Match_flag) rec_ptr[0] != MATCH_FOUND) + { + rec_ptr[0]= MATCH_FOUND; + first_inner->found= 1; + return TRUE; + } + return FALSE; +} + + +/* + Generate all full extensions for a partial join record in the buffer + + SYNOPSIS + generate_full_extensions() + rec_ptr pointer to the record from join buffer to generate extensions + + DESCRIPTION + The function first checks whether the current record of 'join_tab' matches + the partial join record from join buffer located at 'rec_ptr'. If it is the + case the function calls the join_tab->next_select method to generate + all full extension for this partial join match. + + RETURN VALUE + return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr) +{ + enum_nested_loop_state rc= NESTED_LOOP_OK; + DBUG_ENTER("JOIN_CACHE::generate_full_extensions"); + + /* + Check whether the extended partial join record meets + the pushdown conditions. + */ + if (check_match(rec_ptr)) + { + int res= 0; + + if (!join_tab->check_weed_out_table || + !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd))) + { + set_curr_rec_link(rec_ptr); + rc= (join_tab->next_select)(join, join_tab+1, 0); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + { + reset(TRUE); + DBUG_RETURN(rc); + } + } + if (res == -1) + { + rc= NESTED_LOOP_ERROR; + DBUG_RETURN(rc); + } + } + else if (unlikely(join->thd->is_error())) + rc= NESTED_LOOP_ERROR; + DBUG_RETURN(rc); +} + + +/* + Check matching to a partial join record from the join buffer + + SYNOPSIS + check_match() + rec_ptr pointer to the record from join buffer to check matching to + + DESCRIPTION + The function checks whether the current record of 'join_tab' matches + the partial join record from join buffer located at 'rec_ptr'. If this is + the case and 'join_tab' is the last inner table of a semi-join or an outer + join the function turns on the match flag for the 'rec_ptr' record unless + it has been already set. + + NOTES + Setting the match flag on can trigger re-evaluation of pushdown conditions + for the record when join_tab is the last inner table of an outer join. + + RETURN VALUE + TRUE there is a match + FALSE there is no match + In this case the caller must also check thd->is_error() to see + if there was a fatal error for the query. +*/ + +inline bool JOIN_CACHE::check_match(uchar *rec_ptr) +{ + /* Check whether pushdown conditions are satisfied */ + DBUG_ENTER("JOIN_CACHE:check_match"); + + if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0) + DBUG_RETURN(FALSE); + + join_tab->jbuf_tracker->r_rows_after_where++; + + if (!join_tab->is_last_inner_table()) + DBUG_RETURN(TRUE); + + /* + This is the last inner table of an outer join, + and maybe of other embedding outer joins, or + this is the last inner table of a semi-join. + */ + JOIN_TAB *first_inner= join_tab->get_first_inner_table(); + do + { + set_match_flag_if_none(first_inner, rec_ptr); + if (first_inner->check_only_first_match() && + !join_tab->first_inner) + DBUG_RETURN(TRUE); + /* + This is the first match for the outer table row. + The function set_match_flag_if_none has turned the flag + first_inner->found on. The pushdown predicates for + inner tables must be re-evaluated with this flag on. + Note that, if first_inner is the first inner table + of a semi-join, but is not an inner table of an outer join + such that 'not exists' optimization can be applied to it, + the re-evaluation of the pushdown predicates is not needed. + */ + for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++) + { + if (tab->select && tab->select->skip_record(join->thd) <= 0) + DBUG_RETURN(FALSE); + } + } + while ((first_inner= first_inner->first_upper) && + first_inner->last_inner == join_tab); + DBUG_RETURN(TRUE); +} + + +/* + Add null complements for unmatched outer records from join buffer + + SYNOPSIS + join_null_complements() + skip_last do not add null complements for the last record + + DESCRIPTION + This function is called only for inner tables of outer joins. + The function retrieves all rows from the join buffer and adds null + complements for those of them that do not have matches for outer + table records. + If the 'join_tab' is the last inner table of the embedding outer + join and the null complemented record satisfies the outer join + condition then the the corresponding match flag is turned on + unless it has been set earlier. This setting may trigger + re-evaluation of pushdown conditions for the record. + + NOTES + The same implementation of the virtual method join_null_complements + is used for BNL/BNLH/BKA/BKA join algorthm. + + RETURN VALUE + return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last) +{ + ulonglong cnt; + enum_nested_loop_state rc= NESTED_LOOP_OK; + bool is_first_inner= join_tab == join_tab->first_unmatched; + DBUG_ENTER("JOIN_CACHE::join_null_complements"); + + /* Return at once if there are no records in the join buffer */ + if (!records) + DBUG_RETURN(NESTED_LOOP_OK); + + cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last)); + + /* This function may be called only for inner tables of outer joins */ + DBUG_ASSERT(join_tab->first_inner); + + for ( ; cnt; cnt--) + { + if (unlikely(join->thd->check_killed())) + { + /* The user has aborted the execution of the query */ + rc= NESTED_LOOP_KILLED; + goto finish; + } + /* Just skip the whole record if a match for it has been already found */ + if (!is_first_inner || !skip_if_matched()) + { + get_record(); + /* The outer row is complemented by nulls for each inner table */ + restore_record(join_tab->table, s->default_values); + mark_as_null_row(join_tab->table); + rc= generate_full_extensions(get_curr_rec()); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + goto finish; + } + } + +finish: + DBUG_RETURN(rc); +} + + +/* + Save data on the join algorithm employed by the join cache + + SYNOPSIS + save_explain_data() + str string to add the comment on the employed join algorithm to + + DESCRIPTION + This function puts info about the type of the used join buffer (flat or + incremental) and on the type of the the employed join algorithm (BNL, + BNLH, BKA or BKAH) to the data structure + + RETURN VALUE + 0 ok + 1 error +*/ + +bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain) +{ + explain->incremental= MY_TEST(prev_cache); + + explain->join_buffer_size= get_join_buffer_size(); + + switch (get_join_alg()) { + case BNL_JOIN_ALG: + explain->join_alg= "BNL"; + break; + case BNLH_JOIN_ALG: + explain->join_alg= "BNLH"; + break; + case BKA_JOIN_ALG: + explain->join_alg= "BKA"; + break; + case BKAH_JOIN_ALG: + explain->join_alg= "BKAH"; + break; + default: + DBUG_ASSERT(0); + } + return 0; +} + +/** + get thread handle. +*/ + +THD *JOIN_CACHE::thd() +{ + return join->thd; +} + + +static bool add_mrr_explain_info(String *str, uint mrr_mode, handler *file) +{ + char mrr_str_buf[128]={0}; + int len; + len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf, + sizeof(mrr_str_buf)); + if (len > 0) + { + if (str->length()) + { + if (str->append(STRING_WITH_LEN("; "))) + return 1; + } + if (str->append(mrr_str_buf, len)) + return 1; + } + return 0; +} + + +bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain) +{ + if (JOIN_CACHE::save_explain_data(explain)) + return 1; + return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); +} + + +bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain) +{ + if (JOIN_CACHE::save_explain_data(explain)) + return 1; + return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); +} + + +/* + Initialize a hashed join cache + + SYNOPSIS + init() + for_explain join buffer is initialized for explain only + + DESCRIPTION + The function initializes the cache structure with a hash table in it. + The hash table will be used to store key values for the records from + the join buffer. + The function allocates memory for the join buffer and for descriptors of + the record fields stored in the buffer. + The function also initializes a hash table for record keys within the join + buffer space. + + NOTES VALUE + The function is supposed to be called by the init methods of the classes + derived from JOIN_CACHE_HASHED. + + RETURN VALUE + 0 initialization with buffer allocations has been succeeded + 1 otherwise +*/ + +int JOIN_CACHE_HASHED::init(bool for_explain) +{ + TABLE_REF *ref= &join_tab->ref; + DBUG_ENTER("JOIN_CACHE_HASHED::init"); + + hash_table= 0; + key_entries= 0; + + key_length= ref->key_length; + + if (JOIN_CACHE::init(for_explain)) + { + THD *thd= join->thd; + const char *errmsg= + "Could not create a join buffer. Please check and " + "adjust the value of the variables 'JOIN_BUFFER_SIZE (%llu)' and " + "'JOIN_BUFFER_SPACE_LIMIT (%llu)'"; + my_printf_error(ER_OUTOFMEMORY, errmsg, MYF(0), + thd->variables.join_buff_size, + thd->variables.join_buff_space_limit); + DBUG_RETURN (1); + } + + if (for_explain) + DBUG_RETURN(0); + + if (!(key_buff= (uchar*) join->thd->alloc(key_length))) + DBUG_RETURN(1); + + /* Take into account a reference to the next record in the key chain */ + pack_length+= get_size_of_rec_offset(); + pack_length_with_blob_ptrs+= get_size_of_rec_offset(); + + ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key); + ref_used_key_parts= join_tab->ref.key_parts; + + hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple; + hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple; + + KEY_PART_INFO *key_part= ref_key_info->key_part; + KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts; + for ( ; key_part < key_part_end; key_part++) + { + if (!key_part->field->eq_cmp_as_binary()) + { + hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex; + hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex; + break; + } + } + + init_hash_table(); + + rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+ + (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); + + data_fields_offset= 0; + if (use_emb_key) + { + CACHE_FIELD *copy= field_descr; + CACHE_FIELD *copy_end= copy+flag_fields; + for ( ; copy < copy_end; copy++) + data_fields_offset+= copy->length; + } + + DBUG_RETURN(0); +} + + +/* + Initialize the hash table of a hashed join cache + + SYNOPSIS + init_hash_table() + + DESCRIPTION + The function estimates the number of hash table entries in the hash + table to be used and initializes this hash table within the join buffer + space. + + RETURN VALUE + Currently the function always returns 0; +*/ + +int JOIN_CACHE_HASHED::init_hash_table() +{ + hash_table= 0; + key_entries= 0; + + avg_record_length= calc_avg_record_length(); + + /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */ + uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset()); + for (size_of_key_ofs= 2; + size_of_key_ofs <= max_size_of_key_ofs; + size_of_key_ofs+= 2) + { + key_entry_length= get_size_of_rec_offset() + // key chain header + size_of_key_ofs + // reference to the next key + (use_emb_key ? get_size_of_rec_offset() : key_length); + + size_t space_per_rec= avg_record_length + + avg_aux_buffer_incr + + key_entry_length+size_of_key_ofs; + size_t n= buff_size / space_per_rec; + + /* + TODO: Make a better estimate for this upper bound of + the number of records in in the join buffer. + */ + size_t max_n= buff_size / (pack_length-length+ + key_entry_length+size_of_key_ofs); + + hash_entries= (uint) (n / 0.7); + set_if_bigger(hash_entries, 1); + + if (offset_size((uint)(max_n*key_entry_length)) <= + size_of_key_ofs) + break; + } + + /* Initialize the hash table */ + hash_table= buff + (buff_size-hash_entries*size_of_key_ofs); + cleanup_hash_table(); + curr_key_entry= hash_table; + + return 0; +} + + +/* + Reallocate the join buffer of a hashed join cache + + SYNOPSIS + realloc_buffer() + + DESCRITION + The function reallocates the join buffer of the hashed join cache. + After this it initializes a hash table within the buffer space and + resets the join cache for writing. + + NOTES + The function assumes that buff_size contains the new value for the join + buffer size. + + RETURN VALUE + 0 if the buffer has been successfully reallocated + 1 otherwise +*/ + +int JOIN_CACHE_HASHED::realloc_buffer() +{ + free(); + buff= (uchar*) my_malloc(key_memory_JOIN_CACHE, buff_size, + MYF(MY_THREAD_SPECIFIC)); + init_hash_table(); + reset(TRUE); + return buff == NULL; +} + + +/* + Get maximum size of the additional space per record used for record keys + + SYNOPSYS + get_max_key_addon_space_per_record() + + DESCRIPTION + The function returns the size of the space occupied by one key entry + and one hash table entry. + + RETURN VALUE + maximum size of the additional space per record that is used to store + record keys in the hash table +*/ + +uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record() +{ + ulong len; + TABLE_REF *ref= &join_tab->ref; + /* + The total number of hash entries in the hash tables is bounded by + ceiling(N/0.7) where N is the maximum number of records in the buffer. + That's why the multiplier 2 is used in the formula below. + */ + len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) + + size_of_rec_ofs + // size of the key chain header + size_of_rec_ofs + // >= size of the reference to the next key + 2*size_of_rec_ofs; // >= 2*( size of hash table entry) + return len; +} + + +/* + Reset the buffer of a hashed join cache for reading/writing + + SYNOPSIS + reset() + for_writing if it's TRUE the function reset the buffer for writing + + DESCRIPTION + This implementation of the virtual function reset() resets the join buffer + of the JOIN_CACHE_HASHED class for reading or writing. + Additionally to what the default implementation does this function + cleans up the hash table allocated within the buffer. + + RETURN VALUE + none +*/ + +void JOIN_CACHE_HASHED::reset(bool for_writing) +{ + this->JOIN_CACHE::reset(for_writing); + if (for_writing && hash_table) + cleanup_hash_table(); + curr_key_entry= hash_table; +} + + +/* + Add a record into the buffer of a hashed join cache + + SYNOPSIS + put_record() + + DESCRIPTION + This implementation of the virtual function put_record writes the next + matching record into the join buffer of the JOIN_CACHE_HASHED class. + Additionally to what the default implementation does this function + performs the following. + It extracts from the record the key value used in lookups for matching + records and searches for this key in the hash tables from the join cache. + If it finds the key in the hash table it joins the record to the chain + of records with this key. If the key is not found in the hash table the + key is placed into it and a chain containing only the newly added record + is attached to the key entry. The key value is either placed in the hash + element added for the key or, if the use_emb_key flag is set, remains in + the record from the partial join. + If the match flag field of a record contains MATCH_IMPOSSIBLE the key is + not created for this record. + + RETURN VALUE + TRUE if it has been decided that it should be the last record + in the join buffer, + FALSE otherwise +*/ + +bool JOIN_CACHE_HASHED::put_record() +{ + bool is_full; + uchar *key; + uint key_len= key_length; + uchar *key_ref_ptr; + uchar *link= 0; + TABLE_REF *ref= &join_tab->ref; + uchar *next_ref_ptr= pos; + + pos+= get_size_of_rec_offset(); + /* Write the record into the join buffer */ + if (prev_cache) + link= prev_cache->get_curr_rec_link(); + write_record_data(link, &is_full); + + if (last_written_is_null_compl) + return is_full; + + if (use_emb_key) + key= get_curr_emb_key(); + else + { + /* Build the key over the fields read into the record buffers */ + cp_buffer_from_ref(join->thd, join_tab->table, ref); + key= ref->key_buff; + } + + /* Look for the key in the hash table */ + if (key_search(key, key_len, &key_ref_ptr)) + { + uchar *last_next_ref_ptr; + /* + The key is found in the hash table. + Add the record to the circular list of the records attached to this key. + Below 'rec' is the record to be added into the record chain for the found + key, 'key_ref' points to a flatten representation of the st_key_entry + structure that contains the key and the head of the record chain. + */ + last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset()); + /* rec->next_rec= key_entry->last_rec->next_rec */ + memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset()); + /* key_entry->last_rec->next_rec= rec */ + store_next_rec_ref(last_next_ref_ptr, next_ref_ptr); + /* key_entry->last_rec= rec */ + store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr); + } + else + { + /* + The key is not found in the hash table. + Put the key into the join buffer linking it with the keys for the + corresponding hash entry. Create a circular list with one element + referencing the record and attach the list to the key in the buffer. + */ + uchar *cp= last_key_entry; + cp-= get_size_of_rec_offset()+get_size_of_key_offset(); + store_next_key_ref(key_ref_ptr, cp); + store_null_key_ref(cp); + store_next_rec_ref(next_ref_ptr, next_ref_ptr); + store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr); + if (use_emb_key) + { + cp-= get_size_of_rec_offset(); + store_emb_key_ref(cp, key); + } + else + { + cp-= key_len; + memcpy(cp, key, key_len); + } + last_key_entry= cp; + DBUG_ASSERT(last_key_entry >= end_pos); + /* Increment the counter of key_entries in the hash table */ + key_entries++; + } + return is_full; +} + + +/* + Read the next record from the buffer of a hashed join cache + + SYNOPSIS + get_record() + + DESCRIPTION + Additionally to what the default implementation of the virtual + function get_record does this implementation skips the link element + used to connect the records with the same key into a chain. + + RETURN VALUE + TRUE there are no more records to read from the join buffer + FALSE otherwise +*/ + +bool JOIN_CACHE_HASHED::get_record() +{ + pos+= get_size_of_rec_offset(); + return this->JOIN_CACHE::get_record(); +} + + +/* + Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND + + SYNOPSIS + skip_if_matched() + + DESCRIPTION + This implementation of the virtual function skip_if_matched does + the same as the default implementation does, but it takes into account + the link element used to connect the records with the same key into a chain. + + RETURN VALUE + TRUE the match flag is MATCH_FOUND and the record has been skipped + FALSE otherwise +*/ + +bool JOIN_CACHE_HASHED::skip_if_matched() +{ + uchar *save_pos= pos; + pos+= get_size_of_rec_offset(); + if (!this->JOIN_CACHE::skip_if_matched()) + { + pos= save_pos; + return FALSE; + } + return TRUE; +} + + +/* + Skip record from a hashed join buffer if its match flag dictates to do so + + SYNOPSIS + skip_if_uneeded_match() + + DESCRIPTION + This implementation of the virtual function skip_if_not_needed_match does + the same as the default implementation does, but it takes into account + the link element used to connect the records with the same key into a chain. + + RETURN VALUE + TRUE the match flag dictates to skip the record + FALSE the match flag is off +*/ + +bool JOIN_CACHE_HASHED::skip_if_not_needed_match() +{ + uchar *save_pos= pos; + pos+= get_size_of_rec_offset(); + if (!this->JOIN_CACHE::skip_if_not_needed_match()) + { + pos= save_pos; + return FALSE; + } + return TRUE; +} + + +/* + Search for a key in the hash table of the join buffer + + SYNOPSIS + key_search() + key pointer to the key value + key_len key value length + key_ref_ptr OUT position of the reference to the next key from + the hash element for the found key , or + a position where the reference to the the hash + element for the key is to be added in the + case when the key has not been found + + DESCRIPTION + The function looks for a key in the hash table of the join buffer. + If the key is found the functionreturns the position of the reference + to the next key from to the hash element for the given key. + Otherwise the function returns the position where the reference to the + newly created hash element for the given key is to be added. + + RETURN VALUE + TRUE the key is found in the hash table + FALSE otherwise +*/ + +bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, + uchar **key_ref_ptr) +{ + bool is_found= FALSE; + uint idx= (this->*hash_func)(key, key_length); + uchar *ref_ptr= hash_table+size_of_key_ofs*idx; + while (!is_null_key_ref(ref_ptr)) + { + uchar *next_key; + ref_ptr= get_next_key_ref(ref_ptr); + next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) : + ref_ptr-key_length; + + if ((this->*hash_cmp_func)(next_key, key, key_len)) + { + is_found= TRUE; + break; + } + } + *key_ref_ptr= ref_ptr; + return is_found; +} + + +/* + Hash function that considers a key in the hash table as byte array + + SYNOPSIS + get_hash_idx_simple() + key pointer to the key value + key_len key value length + + DESCRIPTION + The function calculates an index of the hash entry in the hash table + of the join buffer for the given key. It considers the key just as + a sequence of bytes of the length key_len. + + RETURN VALUE + the calculated index of the hash entry for the given key +*/ + +inline +uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len) +{ + ulong nr= 1; + ulong nr2= 4; + uchar *pos= key; + uchar *end= key+key_len; + for (; pos < end ; pos++) + { + nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8); + nr2+= 3; + } + return nr % hash_entries; +} + + +/* + Hash function that takes into account collations of the components of the key + + SYNOPSIS + get_hash_idx_complex() + key pointer to the key value + key_len key value length + + DESCRIPTION + The function calculates an index of the hash entry in the hash table + of the join buffer for the given key. It takes into account that the + components of the key may be of a varchar type with different collations. + The function guarantees that the same hash value for any two equal + keys that may differ as byte sequences. + The function takes the info about the components of the key, their + types and used collations from the class member ref_key_info containing + a pointer to the descriptor of the index that can be used for the join + operation. + + RETURN VALUE + the calculated index of the hash entry for the given key +*/ + +inline +uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len) +{ + return + (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries); +} + + +/* + Compare two key entries in the hash table as sequence of bytes + + SYNOPSIS + equal_keys_simple() + key1 pointer to the first key entry + key2 pointer to the second key entry + key_len the length of the key values + + DESCRIPTION + The function compares two key entries in the hash table key1 and key2 + as two sequences bytes of the length key_len + + RETURN VALUE + TRUE key1 coincides with key2 + FALSE otherwise +*/ + +inline +bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2, + uint key_len) +{ + return memcmp(key1, key2, key_len) == 0; +} + + +/* + Compare two key entries taking into account the used collation + + SYNOPSIS + equal_keys_complex() + key1 pointer to the first key entry + key2 pointer to the second key entry + key_len the length of the key values + + DESCRIPTION + The function checks whether two key entries in the hash table + key1 and key2 are equal as, possibly, compound keys of a certain + structure whose components may be of a varchar type and may + employ different collations. + The descriptor of the key structure is taken from the class + member ref_key_info. + + RETURN VALUE + TRUE key1 is equal tokey2 + FALSE otherwise +*/ + +inline +bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2, + uint key_len) +{ + return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0; +} + + +/* + Clean up the hash table of the join buffer + + SYNOPSIS + cleanup_hash_table() + key pointer to the key value + key_len key value length + + DESCRIPTION + The function cleans up the hash table in the join buffer removing all + hash elements from the table. + + RETURN VALUE + none +*/ + +void JOIN_CACHE_HASHED:: cleanup_hash_table() +{ + last_key_entry= hash_table; + bzero(hash_table, (buff+buff_size)-hash_table); + key_entries= 0; +} + + +/* + Check whether all records in a key chain have their match flags set on + + SYNOPSIS + check_all_match_flags_for_key() + key_chain_ptr + + DESCRIPTION + This function retrieves records in the given circular chain and checks + whether their match flags are set on. The parameter key_chain_ptr shall + point to the position in the join buffer storing the reference to the + last element of this chain. + + RETURN VALUE + TRUE if each retrieved record has its match flag set to MATCH_FOUND + FALSE otherwise +*/ + +bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr) +{ + uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr); + uchar *next_rec_ref_ptr= last_rec_ref_ptr; + do + { + next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); + uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset; + if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND) + return FALSE; + } + while (next_rec_ref_ptr != last_rec_ref_ptr); + return TRUE; +} + + +/* + Get the next key built for the records from the buffer of a hashed join cache + + SYNOPSIS + get_next_key() + key pointer to the buffer where the key value is to be placed + + DESCRIPTION + The function reads the next key value stored in the hash table of the + join buffer. Depending on the value of the use_emb_key flag of the + join cache the value is read either from the table itself or from + the record field where it occurs. + + RETURN VALUE + length of the key value - if the starting value of 'cur_key_entry' refers + to the position after that referred by the the value of 'last_key_entry', + 0 - otherwise. +*/ + +uint JOIN_CACHE_HASHED::get_next_key(uchar ** key) +{ + if (curr_key_entry == last_key_entry) + return 0; + + curr_key_entry-= key_entry_length; + + *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry; + + DBUG_ASSERT(*key >= buff && *key < hash_table); + + return key_length; +} + + +/* + Initiate an iteration process over records in the joined table + + SYNOPSIS + open() + + DESCRIPTION + The function initiates the process of iteration over records from the + joined table recurrently performed by the BNL/BKLH join algorithm. + + RETURN VALUE + 0 the initiation is a success + error code otherwise +*/ + +int JOIN_TAB_SCAN::open() +{ + save_or_restore_used_tabs(join_tab, FALSE); + is_first_record= TRUE; + join_tab->tracker->r_scans++; + return join_init_read_record(join_tab); +} + + +/* + Read the next record that can match while scanning the joined table + + SYNOPSIS + next() + + DESCRIPTION + The function reads the next record from the joined table that can + match some records in the buffer of the join cache 'cache'. To do + this the function calls the function that scans table records and + looks for the next one that meets the condition pushed to the + joined table join_tab. + + NOTES + The function catches the signal that kills the query. + + RETURN VALUE + 0 the next record exists and has been successfully read + error code otherwise +*/ + +int JOIN_TAB_SCAN::next() +{ + int err= 0; + int skip_rc; + READ_RECORD *info= &join_tab->read_record; + SQL_SELECT *select= join_tab->cache_select; + THD *thd= join->thd; + + if (is_first_record) + is_first_record= FALSE; + else + err= info->read_record(); + + if (!err) + { + join_tab->tracker->r_rows++; + } + + while (!err && select && (skip_rc= select->skip_record(thd)) <= 0) + { + if (unlikely(thd->check_killed()) || skip_rc < 0) + return 1; + /* + Move to the next record if the last retrieved record does not + meet the condition pushed to the table join_tab. + */ + err= info->read_record(); + if (!err) + { + join_tab->tracker->r_rows++; + } + } + + if (!err) + join_tab->tracker->r_rows_after_where++; + return err; +} + + +/* + Walk back in join order from join_tab until we encounter a join tab with + tab->cache!=NULL, and save/restore tab->table->status along the way. + + @param save TRUE save + FALSE restore +*/ + +static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save) +{ + JOIN_TAB *first= join_tab->bush_root_tab? + join_tab->bush_root_tab->bush_children->start : + join_tab->join->join_tab + join_tab->join->const_tables; + + for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--) + { + if (tab->bush_children) + { + for (JOIN_TAB *child= tab->bush_children->start; + child != tab->bush_children->end; + child++) + { + if (save) + child->table->status= child->status; + else + { + tab->status= tab->table->status; + tab->table->status= 0; + } + } + } + + if (save) + tab->table->status= tab->status; + else + { + tab->status= tab->table->status; + tab->table->status= 0; + } + } +} + + +/* + Perform finalizing actions for a scan over the table records + + SYNOPSIS + close() + + DESCRIPTION + The function performs the necessary restoring actions after + the table scan over the joined table has been finished. + + RETURN VALUE + none +*/ + +void JOIN_TAB_SCAN::close() +{ + save_or_restore_used_tabs(join_tab, TRUE); +} + + +/* + Prepare to iterate over the BNL join cache buffer to look for matches + + SYNOPSIS + prepare_look_for_matches() + skip_last <-> ignore the last record in the buffer + + DESCRIPTION + The function prepares the join cache for an iteration over the + records in the join buffer. The iteration is performed when looking + for matches for the record from the joined table join_tab that + has been placed into the record buffer of the joined table. + If the value of the parameter skip_last is TRUE then the last + record from the join buffer is ignored. + The function initializes the counter of the records that have been + not iterated over yet. + + RETURN VALUE + TRUE there are no records in the buffer to iterate over + FALSE otherwise +*/ + +bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last) +{ + if (!records) + return TRUE; + reset(FALSE); + rem_records= (uint)records - MY_TEST(skip_last); + return rem_records == 0; +} + + +/* + Get next record from the BNL join cache buffer when looking for matches + + SYNOPSIS + get_next_candidate_for_match + + DESCRIPTION + This method is used for iterations over the records from the join + cache buffer when looking for matches for records from join_tab. + The methods performs the necessary preparations to read the next record + from the join buffer into the record buffer by the method + read_next_candidate_for_match, or, to skip the next record from the join + buffer by the method skip_recurrent_candidate_for_match. + This implementation of the virtual method get_next_candidate_for_match + just decrements the counter of the records that are to be iterated over + and returns the current value of the cursor 'pos' as the position of + the record to be processed. + + RETURN VALUE + pointer to the position right after the prefix of the current record + in the join buffer if the there is another record to iterate over, + 0 - otherwise. +*/ + +uchar *JOIN_CACHE_BNL::get_next_candidate_for_match() +{ + if (!rem_records) + return 0; + rem_records--; + return pos+base_prefix_length; +} + + +/* + Check whether the matching record from the BNL cache is to be skipped + + SYNOPSIS + skip_next_candidate_for_match + rec_ptr pointer to the position in the join buffer right after the prefix + of the current record + + DESCRIPTION + This implementation of the virtual function just calls the + method skip_if_not_needed_match to check whether the record referenced by + ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the + first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and + join_tab is the first inner table of an outer join. + If so, the function just skips this record setting the value of the + cursor 'pos' to the position right after it. + + RETURN VALUE + TRUE the record referenced by rec_ptr has been skipped + FALSE otherwise +*/ + +bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr) +{ + pos= rec_ptr-base_prefix_length; + return skip_if_not_needed_match(); +} + + +/* + Read next record from the BNL join cache buffer when looking for matches + + SYNOPSIS + read_next_candidate_for_match + rec_ptr pointer to the position in the join buffer right after the prefix + the current record. + + DESCRIPTION + This implementation of the virtual method read_next_candidate_for_match + calls the method get_record to read the record referenced by rec_ptr from + the join buffer into the record buffer. If this record refers to the + fields in the other join buffers the call of get_record ensures that + these fields are read into the corresponding record buffers as well. + This function is supposed to be called after a successful call of + the method get_next_candidate_for_match. + + RETURN VALUE + none +*/ + +void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr) +{ + pos= rec_ptr-base_prefix_length; + get_record(); +} + + +/* + Initialize the BNL join cache + + SYNOPSIS + init + for_explain join buffer is initialized for explain only + + DESCRIPTION + The function initializes the cache structure. It is supposed to be called + right after a constructor for the JOIN_CACHE_BNL. + + NOTES + The function first constructs a companion object of the type JOIN_TAB_SCAN, + then it calls the init method of the parent class. + + RETURN VALUE + 0 initialization with buffer allocations has been succeeded + 1 otherwise +*/ + +int JOIN_CACHE_BNL::init(bool for_explain) +{ + DBUG_ENTER("JOIN_CACHE_BNL::init"); + + if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) + DBUG_RETURN(1); + + DBUG_RETURN(JOIN_CACHE::init(for_explain)); +} + + +/* + Get the chain of records from buffer matching the current candidate for join + + SYNOPSIS + get_matching_chain_by_join_key() + + DESCRIPTION + This function first build a join key for the record of join_tab that + currently is in the join buffer for this table. Then it looks for + the key entry with this key in the hash table of the join cache. + If such a key entry is found the function returns the pointer to + the head of the chain of records in the join_buffer that match this + key. + + RETURN VALUE + The pointer to the corresponding circular list of records if + the key entry with the join key is found, 0 - otherwise. +*/ + +uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key() +{ + uchar *key_ref_ptr; + TABLE *table= join_tab->table; + TABLE_REF *ref= &join_tab->ref; + KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); + /* Build the join key value out of the record in the record buffer */ + key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE); + /* Look for this key in the join buffer */ + if (!key_search(key_buff, key_length, &key_ref_ptr)) + return 0; + return key_ref_ptr+get_size_of_key_offset(); +} + + +/* + Prepare to iterate over the BNLH join cache buffer to look for matches + + SYNOPSIS + prepare_look_for_matches() + skip_last <-> ignore the last record in the buffer + + DESCRIPTION + The function prepares the join cache for an iteration over the + records in the join buffer. The iteration is performed when looking + for matches for the record from the joined table join_tab that + has been placed into the record buffer of the joined table. + If the value of the parameter skip_last is TRUE then the last + record from the join buffer is ignored. + The function builds the hashed key from the join fields of join_tab + and uses this key to look in the hash table of the join cache for + the chain of matching records in in the join buffer. If it finds + such a chain it sets the member last_rec_ref_ptr to point to the + last link of the chain while setting the member next_rec_ref_po 0. + + RETURN VALUE + TRUE there are no matching records in the buffer to iterate over + FALSE otherwise +*/ + +bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last) +{ + uchar *curr_matching_chain; + last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; + if (!(curr_matching_chain= get_matching_chain_by_join_key())) + return 1; + last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); + return 0; +} + + +/* + Get next record from the BNLH join cache buffer when looking for matches + + SYNOPSIS + get_next_candidate_for_match + + DESCRIPTION + This method is used for iterations over the records from the join + cache buffer when looking for matches for records from join_tab. + The methods performs the necessary preparations to read the next record + from the join buffer into the record buffer by the method + read_next_candidate_for_match, or, to skip the next record from the join + buffer by the method skip_next_candidate_for_match. + This implementation of the virtual method moves to the next record + in the chain of all records from the join buffer that are to be + equi-joined with the current record from join_tab. + + RETURN VALUE + pointer to the beginning of the record fields in the join buffer + if the there is another record to iterate over, 0 - otherwise. +*/ + +uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match() +{ + if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr) + return 0; + next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ? + next_matching_rec_ref_ptr : + last_matching_rec_ref_ptr); + return next_matching_rec_ref_ptr+rec_fields_offset; +} + + +/* + Check whether the matching record from the BNLH cache is to be skipped + + SYNOPSIS + skip_next_candidate_for_match + rec_ptr pointer to the position in the join buffer right after + the previous record + + DESCRIPTION + This implementation of the virtual function just calls the + method get_match_flag_by_pos to check whether the record referenced + by ref_ptr has its match flag set to MATCH_FOUND. + + RETURN VALUE + TRUE the record referenced by rec_ptr has its match flag set to + MATCH_FOUND + FALSE otherwise +*/ + +bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr) +{ + return join_tab->check_only_first_match() && + (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); +} + + +/* + Read next record from the BNLH join cache buffer when looking for matches + + SYNOPSIS + read_next_candidate_for_match + rec_ptr pointer to the position in the join buffer right after + the previous record + + DESCRIPTION + This implementation of the virtual method read_next_candidate_for_match + calls the method get_record_by_pos to read the record referenced by rec_ptr + from the join buffer into the record buffer. If this record refers to + fields in the other join buffers the call of get_record_by_po ensures that + these fields are read into the corresponding record buffers as well. + This function is supposed to be called after a successful call of + the method get_next_candidate_for_match. + + RETURN VALUE + none +*/ + +void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr) +{ + get_record_by_pos(rec_ptr); +} + + +/* + Initialize the BNLH join cache + + SYNOPSIS + init + for_explain join buffer is initialized for explain only + + DESCRIPTION + The function initializes the cache structure. It is supposed to be called + right after a constructor for the JOIN_CACHE_BNLH. + + NOTES + The function first constructs a companion object of the type JOIN_TAB_SCAN, + then it calls the init method of the parent class. + + RETURN VALUE + 0 initialization with buffer allocations has been succeeded + 1 otherwise +*/ + +int JOIN_CACHE_BNLH::init(bool for_explain) +{ + DBUG_ENTER("JOIN_CACHE_BNLH::init"); + + if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) + DBUG_RETURN(1); + + DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); +} + + +/* + Calculate the increment of the MRR buffer for a record write + + SYNOPSIS + aux_buffer_incr() + + DESCRIPTION + This implementation of the virtual function aux_buffer_incr determines + for how much the size of the MRR buffer should be increased when another + record is added to the cache. + + RETURN VALUE + the increment of the size of the MRR buffer for the next record +*/ + +uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno) +{ + uint incr= 0; + TABLE_REF *ref= &join_tab->ref; + TABLE *tab= join_tab->table; + ha_rows rec_per_key= + (ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1); + set_if_bigger(rec_per_key, 1); + if (recno == 1) + incr= ref->key_length + tab->file->ref_length; + incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key); + return incr; +} + + +/* + Initiate iteration over records returned by MRR for the current join buffer + + SYNOPSIS + open() + + DESCRIPTION + The function initiates the process of iteration over the records from + join_tab returned by the MRR interface functions for records from + the join buffer. Such an iteration is performed by the BKA/BKAH join + algorithm for each new refill of the join buffer. + The function calls the MRR handler function multi_range_read_init to + initiate this process. + + RETURN VALUE + 0 the initiation is a success + error code otherwise +*/ + +int JOIN_TAB_SCAN_MRR::open() +{ + handler *file= join_tab->table->file; + + join_tab->table->null_row= 0; + + + /* Dynamic range access is never used with BKA */ + DBUG_ASSERT(join_tab->use_quick != 2); + + join_tab->tracker->r_scans++; + save_or_restore_used_tabs(join_tab, FALSE); + + init_mrr_buff(); + + /* + Prepare to iterate over keys from the join buffer and to get + matching candidates obtained with MMR handler functions. + */ + if (!file->inited) + file->ha_index_init(join_tab->ref.key, 1); + ranges= cache->get_number_of_ranges_for_mrr(); + if (!join_tab->cache_idx_cond) + range_seq_funcs.skip_index_tuple= 0; + return file->multi_range_read_init(&range_seq_funcs, (void*) cache, + ranges, mrr_mode, &mrr_buff); +} + + +/* + Read the next record returned by MRR for the current join buffer + + SYNOPSIS + next() + + DESCRIPTION + The function reads the next record from the joined table join_tab + returned by the MRR handler function multi_range_read_next for + the current refill of the join buffer. The record is read into + the record buffer used for join_tab records in join operations. + + RETURN VALUE + 0 the next record exists and has been successfully read + error code otherwise +*/ + +int JOIN_TAB_SCAN_MRR::next() +{ + char **ptr= (char **) cache->get_curr_association_ptr(); + + DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr)); + int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0; + if (!rc) + { + join_tab->tracker->r_rows++; + join_tab->tracker->r_rows_after_where++; + /* + If a record in in an incremental cache contains no fields then the + association for the last record in cache will be equal to cache->end_pos + */ + /* + psergey: this makes no sense where HA_MRR_NO_ASSOC is used. + DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) && + (uchar *) (*ptr) <= cache->end_pos); + */ + } + return rc; +} + + +static +void bka_range_seq_key_info(void *init_params, uint *length, + key_part_map *map) +{ + TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref); + *length= ref->key_length; + *map= (key_part_map(1) << ref->key_parts) - 1; +} + + +/* +Initialize retrieval of range sequence for BKA join algorithm + +SYNOPSIS + bka_range_seq_init() + init_params pointer to the BKA join cache object + n_ranges the number of ranges obtained + flags combination of MRR flags + +DESCRIPTION + The function interprets init_param as a pointer to a JOIN_CACHE_BKA + object. The function prepares for an iteration over the join keys + built for all records from the cache join buffer. + +NOTE + This function are used only as a callback function. + +RETURN VALUE + init_param value that is to be used as a parameter of bka_range_seq_next() +*/ + +static +range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags) +{ + DBUG_ENTER("bka_range_seq_init"); + JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param; + cache->reset(0); + DBUG_RETURN((range_seq_t) init_param); +} + + +/* +Get the next range/key over records from the join buffer used by a BKA cache + +SYNOPSIS + bka_range_seq_next() + seq the value returned by bka_range_seq_init + range OUT reference to the next range + +DESCRIPTION + The function interprets seq as a pointer to a JOIN_CACHE_BKA + object. The function returns a pointer to the range descriptor + for the key built over the next record from the join buffer. + +NOTE + This function are used only as a callback function. + +RETURN VALUE + FALSE ok, the range structure filled with info about the next range/key + TRUE no more ranges +*/ + +static +bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) +{ + DBUG_ENTER("bka_range_seq_next"); + JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; + TABLE_REF *ref= &cache->join_tab->ref; + key_range *start_key= &range->start_key; + if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) + { + start_key->keypart_map= (1 << ref->key_parts) - 1; + start_key->flag= HA_READ_KEY_EXACT; + range->end_key= *start_key; + range->end_key.flag= HA_READ_AFTER_KEY; + range->ptr= (char *) cache->get_curr_rec(); + range->range_flag= EQ_RANGE; + DBUG_RETURN(0); + } + DBUG_RETURN(1); +} + + +/* +Check whether range_info orders to skip the next record from BKA buffer + +SYNOPSIS + bka_range_seq_skip_record() + seq value returned by bka_range_seq_init() + range_info information about the next range + rowid [NOT USED] rowid of the record to be checked + + +DESCRIPTION + The function interprets seq as a pointer to a JOIN_CACHE_BKA object. + The function returns TRUE if the record with this range_info + is to be filtered out from the stream of records returned by + multi_range_read_next(). + +NOTE + This function are used only as a callback function. + +RETURN VALUE + 1 record with this range_info is to be filtered out from the stream + of records returned by multi_range_read_next() + 0 the record is to be left in the stream +*/ + +static +bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) +{ + DBUG_ENTER("bka_range_seq_skip_record"); + JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; + bool res= cache->get_match_flag_by_pos((uchar *) range_info) == + JOIN_CACHE::MATCH_FOUND; + DBUG_RETURN(res); +} + + +/* +Check if the record combination from BKA cache matches the index condition + +SYNOPSIS + bka_skip_index_tuple() + rseq value returned by bka_range_seq_init() + range_info record chain for the next range/key returned by MRR + +DESCRIPTION + This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method, + see comments there. + +NOTE + This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. + +RETURN VALUE + 0 The record combination satisfies the index condition + 1 Otherwise +*/ + +static +bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info) +{ + DBUG_ENTER("bka_skip_index_tuple"); + JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; + THD *thd= cache->thd(); + bool res; + status_var_increment(thd->status_var.ha_icp_attempts); + if (!(res= cache->skip_index_tuple(range_info))) + status_var_increment(thd->status_var.ha_icp_match); + DBUG_RETURN(res); +} + + +/* +Prepare to read the record from BKA cache matching the current joined record + +SYNOPSIS + prepare_look_for_matches() + skip_last <-> ignore the last record in the buffer (always unused here) + +DESCRIPTION + The function prepares to iterate over records in the join cache buffer + matching the record loaded into the record buffer for join_tab when + performing join operation by BKA join algorithm. With BKA algorithms the + record loaded into the record buffer for join_tab always has a direct + reference to the matching records from the join buffer. When the regular + BKA join algorithm is employed the record from join_tab can refer to + only one such record. + The function sets the counter of the remaining records from the cache + buffer that would match the current join_tab record to 1. + +RETURN VALUE + TRUE there are no records in the buffer to iterate over + FALSE otherwise +*/ + +bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last) +{ + if (!records) + return TRUE; + rem_records= 1; + return FALSE; +} + + +/* +Get the record from the BKA cache matching the current joined record + +SYNOPSIS + get_next_candidate_for_match + +DESCRIPTION + This method is used for iterations over the records from the join + cache buffer when looking for matches for records from join_tab. + The method performs the necessary preparations to read the next record + from the join buffer into the record buffer by the method + read_next_candidate_for_match, or, to skip the next record from the join + buffer by the method skip_if_not_needed_match. + This implementation of the virtual method get_next_candidate_for_match + just decrements the counter of the records that are to be iterated over + and returns the value of curr_association as a reference to the position + of the beginning of the record fields in the buffer. + +RETURN VALUE + pointer to the start of the record fields in the join buffer + if the there is another record to iterate over, 0 - otherwise. +*/ + +uchar *JOIN_CACHE_BKA::get_next_candidate_for_match() +{ + if (!rem_records) + return 0; + rem_records--; + return curr_association; +} + + +/* +Check whether the matching record from the BKA cache is to be skipped + +SYNOPSIS + skip_next_candidate_for_match + rec_ptr pointer to the position in the join buffer right after + the previous record + +DESCRIPTION + This implementation of the virtual function just calls the + method get_match_flag_by_pos to check whether the record referenced + by ref_ptr has its match flag set to MATCH_FOUND. + +RETURN VALUE + TRUE the record referenced by rec_ptr has its match flag set to + MATCH_FOUND + FALSE otherwise +*/ + +bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr) +{ + return join_tab->check_only_first_match() && + (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); +} + + +/* +Read the next record from the BKA join cache buffer when looking for matches + +SYNOPSIS + read_next_candidate_for_match + rec_ptr pointer to the position in the join buffer right after + the previous record + +DESCRIPTION + This implementation of the virtual method read_next_candidate_for_match + calls the method get_record_by_pos to read the record referenced by rec_ptr + from the join buffer into the record buffer. If this record refers to + fields in the other join buffers the call of get_record_by_po ensures that + these fields are read into the corresponding record buffers as well. + This function is supposed to be called after a successful call of + the method get_next_candidate_for_match. + +RETURN VALUE + none +*/ + +void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr) +{ + get_record_by_pos(rec_ptr); +} + + +/* +Initialize the BKA join cache + +SYNOPSIS + init + for_explain join buffer is initialized for explain only + + +DESCRIPTION + The function initializes the cache structure. It is supposed to be called + right after a constructor for the JOIN_CACHE_BKA. + +NOTES + The function first constructs a companion object of the type + JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. + +RETURN VALUE + 0 initialization with buffer allocations has been succeeded + 1 otherwise +*/ + +int JOIN_CACHE_BKA::init(bool for_explain) +{ + int res; + bool check_only_first_match= join_tab->check_only_first_match(); + + RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, + bka_range_seq_init, + bka_range_seq_next, + check_only_first_match ? bka_range_seq_skip_record : 0, + bka_skip_index_tuple }; + + DBUG_ENTER("JOIN_CACHE_BKA::init"); + + JOIN_TAB_SCAN_MRR *jsm; + if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab, + mrr_mode, rs_funcs))) + DBUG_RETURN(1); + + if ((res= JOIN_CACHE::init(for_explain))) + DBUG_RETURN(res); + + if (use_emb_key) + jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS; + + DBUG_RETURN(0); +} + + +/* +Get the key built over the next record from BKA join buffer + +SYNOPSIS + get_next_key() + key pointer to the buffer where the key value is to be placed + +DESCRIPTION + The function reads key fields from the current record in the join buffer. + and builds the key value out of these fields that will be used to access + the 'join_tab' table. Some of key fields may belong to previous caches. + They are accessed via record references to the record parts stored in the + previous join buffers. The other key fields always are placed right after + the flag fields of the record. + If the key is embedded, which means that its value can be read directly + from the join buffer, then *key is set to the beginning of the key in + this buffer. Otherwise the key is built in the join_tab->ref->key_buff. + The function returns the length of the key if it succeeds ro read it. + If is assumed that the functions starts reading at the position of + the record length which is provided for each records in a BKA cache. + After the key is built the 'pos' value points to the first position after + the current record. + The function just skips the records with MATCH_IMPOSSIBLE in the + match flag field if there is any. + The function returns 0 if the initial position is after the beginning + of the record fields for last record from the join buffer. + +RETURN VALUE + length of the key value - if the starting value of 'pos' points to + the position before the fields for the last record, + 0 - otherwise. +*/ + +uint JOIN_CACHE_BKA::get_next_key(uchar ** key) +{ + uint len; + uint32 rec_len; + uchar *init_pos; + JOIN_CACHE *cache; + +start: + + /* Any record in a BKA cache is prepended with its length */ + DBUG_ASSERT(with_length); + + if ((pos+size_of_rec_len) > last_rec_pos || !records) + return 0; + + /* Read the length of the record */ + rec_len= get_rec_length(pos); + pos+= size_of_rec_len; + init_pos= pos; + + /* Read a reference to the previous cache if any */ + if (prev_cache) + pos+= prev_cache->get_size_of_rec_offset(); + + curr_rec_pos= pos; + + /* Read all flag fields of the record */ + read_flag_fields(); + + if (with_match_flag && + (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE ) + { + pos= init_pos+rec_len; + goto start; + } + + if (use_emb_key) + { + /* An embedded key is taken directly from the join buffer */ + *key= pos; + len= emb_key_length; + } + else + { + /* Read key arguments from previous caches if there are any such fields */ + if (external_key_arg_fields) + { + uchar *rec_ptr= curr_rec_pos; + uint key_arg_count= external_key_arg_fields; + CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count; + for (cache= prev_cache; key_arg_count; cache= cache->prev_cache) + { + uint len= 0; + DBUG_ASSERT(cache); + rec_ptr= cache->get_rec_ref(rec_ptr); + while (!cache->referenced_fields) + { + cache= cache->prev_cache; + DBUG_ASSERT(cache); + rec_ptr= cache->get_rec_ref(rec_ptr); + } + while (key_arg_count && + cache->read_referenced_field(*copy_ptr, rec_ptr, &len)) + { + copy_ptr++; + --key_arg_count; + } + } + } + + /* + Read the other key arguments from the current record. The fields for + these arguments are always first in the sequence of the record's fields. + */ + CACHE_FIELD *copy= field_descr+flag_fields; + CACHE_FIELD *copy_end= copy+local_key_arg_fields; + bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos); + for ( ; copy < copy_end; copy++) + read_record_field(copy, blob_in_rec_buff); + + /* Build the key over the fields read into the record buffers */ + TABLE_REF *ref= &join_tab->ref; + cp_buffer_from_ref(join->thd, join_tab->table, ref); + *key= ref->key_buff; + len= ref->key_length; + } + + pos= init_pos+rec_len; + + return len; +} + + +/* +Check the index condition of the joined table for a record from the BKA cache + +SYNOPSIS + skip_index_tuple() + range_info pointer to the record returned by MRR + +DESCRIPTION + This function is invoked from MRR implementation to check if an index + tuple matches the index condition. It is used in the case where the index + condition actually depends on both columns of the used index and columns + from previous tables. + +NOTES + Accessing columns of the previous tables requires special handling with + BKA. The idea of BKA is to collect record combinations in a buffer and + then do a batch of ref access lookups, i.e. by the time we're doing a + lookup its previous-records-combination is not in prev_table->record[0] + but somewhere in the join buffer. + We need to get it from there back into prev_table(s)->record[0] before we + can evaluate the index condition, and that's why we need this function + instead of regular IndexConditionPushdown. + +NOTES + Possible optimization: + Before we unpack the record from a previous table + check if this table is used in the condition. + If so then unpack the record otherwise skip the unpacking. + This should be done by a special virtual method + get_partial_record_by_pos(). + +RETURN VALUE + 1 the record combination does not satisfies the index condition + 0 otherwise +*/ + +bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info) +{ + DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple"); + get_record_by_pos((uchar*)range_info); + DBUG_RETURN(!join_tab->cache_idx_cond->val_int()); +} + + + +/* +Initialize retrieval of range sequence for the BKAH join algorithm + +SYNOPSIS + bkah_range_seq_init() + init_params pointer to the BKAH join cache object + n_ranges the number of ranges obtained + flags combination of MRR flags + +DESCRIPTION + The function interprets init_param as a pointer to a JOIN_CACHE_BKAH + object. The function prepares for an iteration over distinct join keys + built over the records from the cache join buffer. + +NOTE + This function are used only as a callback function. + +RETURN VALUE + init_param value that is to be used as a parameter of + bkah_range_seq_next() +*/ + +static +range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags) +{ + DBUG_ENTER("bkah_range_seq_init"); + JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param; + cache->reset(0); + DBUG_RETURN((range_seq_t) init_param); +} + + +/* +Get the next range/key over records from the join buffer of a BKAH cache + +SYNOPSIS + bkah_range_seq_next() + seq value returned by bkah_range_seq_init() + range OUT reference to the next range + +DESCRIPTION + The function interprets seq as a pointer to a JOIN_CACHE_BKAH + object. The function returns a pointer to the range descriptor + for the next unique key built over records from the join buffer. + +NOTE + This function are used only as a callback function. + +RETURN VALUE + FALSE ok, the range structure filled with info about the next range/key + TRUE no more ranges +*/ + +static +bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) +{ + DBUG_ENTER("bkah_range_seq_next"); + JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; + TABLE_REF *ref= &cache->join_tab->ref; + key_range *start_key= &range->start_key; + if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) + { + start_key->keypart_map= (1 << ref->key_parts) - 1; + start_key->flag= HA_READ_KEY_EXACT; + range->end_key= *start_key; + range->end_key.flag= HA_READ_AFTER_KEY; + range->ptr= (char *) cache->get_curr_key_chain(); + range->range_flag= EQ_RANGE; + DBUG_RETURN(0); + } + DBUG_RETURN(1); +} + + +/* +Check whether range_info orders to skip the next record from BKAH join buffer + +SYNOPSIS + bkah_range_seq_skip_record() + seq value returned by bkah_range_seq_init() + range_info information about the next range/key returned by MRR + rowid [NOT USED] rowid of the record to be checked (not used) + +DESCRIPTION + The function interprets seq as a pointer to a JOIN_CACHE_BKAH + object. The function returns TRUE if the record with this range_info + is to be filtered out from the stream of records returned by + multi_range_read_next(). + +NOTE + This function are used only as a callback function. + +RETURN VALUE + 1 record with this range_info is to be filtered out from the stream + of records returned by multi_range_read_next() + 0 the record is to be left in the stream +*/ + +static +bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) +{ + DBUG_ENTER("bkah_range_seq_skip_record"); + JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; + bool res= cache->check_all_match_flags_for_key((uchar *) range_info); + DBUG_RETURN(res); +} + + +/* +Check if the record combination from BKAH cache matches the index condition + +SYNOPSIS + bkah_skip_index_tuple() + rseq value returned by bka_range_seq_init() + range_info record chain for the next range/key returned by MRR + +DESCRIPTION + This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method, + see comments there. + +NOTE + This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. + +RETURN VALUE + 0 some records from the chain satisfy the index condition + 1 otherwise +*/ + +static +bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info) +{ + DBUG_ENTER("bka_unique_skip_index_tuple"); + JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; + THD *thd= cache->thd(); + bool res; + status_var_increment(thd->status_var.ha_icp_attempts); + if (!(res= cache->skip_index_tuple(range_info))) + status_var_increment(thd->status_var.ha_icp_match); + DBUG_RETURN(res); +} + + +/* +Prepare to read record from BKAH cache matching the current joined record + +SYNOPSIS + prepare_look_for_matches() + skip_last <-> ignore the last record in the buffer (always unused here) + +DESCRIPTION + The function prepares to iterate over records in the join cache buffer + matching the record loaded into the record buffer for join_tab when + performing join operation by BKAH join algorithm. With BKAH algorithm, if + association labels are used, then record loaded into the record buffer + for join_tab always has a direct reference to the chain of the mathing + records from the join buffer. If association labels are not used then + then the chain of the matching records is obtained by the call of the + get_key_chain_by_join_key function. + +RETURN VALUE + TRUE there are no records in the buffer to iterate over + FALSE otherwise +*/ + +bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last) +{ + last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; + if (no_association && + !(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!' + return 1; + last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); + return 0; +} + +/* + Initialize the BKAH join cache + + SYNOPSIS + init + for_explain join buffer is initialized for explain only + + DESCRIPTION + The function initializes the cache structure. It is supposed to be called + right after a constructor for the JOIN_CACHE_BKAH. + + NOTES + The function first constructs a companion object of the type + JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. + + RETURN VALUE + 0 initialization with buffer allocations has been succeeded + 1 otherwise +*/ + +int JOIN_CACHE_BKAH::init(bool for_explain) +{ + bool check_only_first_match= join_tab->check_only_first_match(); + + no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION); + + RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, + bkah_range_seq_init, + bkah_range_seq_next, + check_only_first_match && !no_association ? + bkah_range_seq_skip_record : 0, + bkah_skip_index_tuple }; + + DBUG_ENTER("JOIN_CACHE_BKAH::init"); + + if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab, + mrr_mode, rs_funcs))) + DBUG_RETURN(1); + + DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); +} + + +/* + Check the index condition of the joined table for a record from the BKA cache + + SYNOPSIS + skip_index_tuple() + range_info record chain returned by MRR + + DESCRIPTION + See JOIN_CACHE_BKA::skip_index_tuple(). + This function is the variant for use with rhe class JOIN_CACHE_BKAH. + The difference from JOIN_CACHE_BKA case is that there may be multiple + previous table record combinations that share the same key(MRR range). + As a consequence, we need to loop through the chain of all table record + combinations that match the given MRR range key range_info until we find + one that satisfies the index condition. + + NOTE + Possible optimization: + Before we unpack the record from a previous table + check if this table is used in the condition. + If so then unpack the record otherwise skip the unpacking. + This should be done by a special virtual method + get_partial_record_by_pos(). + + RETURN VALUE + 1 any record combination from the chain referred by range_info + does not satisfy the index condition + 0 otherwise + + +*/ + +bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info) +{ + uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info); + uchar *next_rec_ref_ptr= last_rec_ref_ptr; + DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple"); + do + { + next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); + uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset; + get_record_by_pos(rec_ptr); + if (join_tab->cache_idx_cond->val_int()) + DBUG_RETURN(FALSE); + } while(next_rec_ref_ptr != last_rec_ref_ptr); + DBUG_RETURN(TRUE); +} diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h new file mode 100644 index 00000000..d2d2e1a1 --- /dev/null +++ b/sql/sql_join_cache.h @@ -0,0 +1,1455 @@ +/* + Copyright (c) 2011, 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This file contains declarations for implementations + of block based join algorithms +*/ + +#define JOIN_CACHE_INCREMENTAL_BIT 1 +#define JOIN_CACHE_HASHED_BIT 2 +#define JOIN_CACHE_BKA_BIT 4 + +/* + Categories of data fields of variable length written into join cache buffers. + The value of any of these fields is written into cache together with the + prepended length of the value. +*/ +#define CACHE_BLOB 1 /* blob field */ +#define CACHE_STRIPPED 2 /* field stripped of trailing spaces */ +#define CACHE_VARSTR1 3 /* short string value (length takes 1 byte) */ +#define CACHE_VARSTR2 4 /* long string value (length takes 2 bytes) */ +#define CACHE_ROWID 5 /* ROWID field */ + +/* + The CACHE_FIELD structure used to describe fields of records that + are written into a join cache buffer from record buffers and backward. +*/ +typedef struct st_cache_field { + uchar *str; /**< buffer from/to where the field is to be copied */ + uint length; /**< maximal number of bytes to be copied from/to str */ + /* + Field object for the moved field + (0 - for a flag field, see JOIN_CACHE::create_flag_fields). + */ + Field *field; + uint type; /**< category of the of the copied field (CACHE_BLOB et al.) */ + /* + The number of the record offset value for the field in the sequence + of offsets placed after the last field of the record. These + offset values are used to access fields referred to from other caches. + If the value is 0 then no offset for the field is saved in the + trailing sequence of offsets. + */ + uint referenced_field_no; + /* The remaining structure fields are used as containers for temp values */ + uint blob_length; /**< length of the blob to be copied */ + uint offset; /**< field offset to be saved in cache buffer */ +} CACHE_FIELD; + + +class JOIN_TAB_SCAN; + +class EXPLAIN_BKA_TYPE; + +/* + JOIN_CACHE is the base class to support the implementations of + - Block Nested Loop (BNL) Join Algorithm, + - Block Nested Loop Hash (BNLH) Join Algorithm, + - Batched Key Access (BKA) Join Algorithm. + + The first algorithm is supported by the derived class JOIN_CACHE_BNL, + the second algorithm is supported by the derived class JOIN_CACHE_BNLH, + while the third algorithm is implemented in two variant supported by + the classes JOIN_CACHE_BKA and JOIN_CACHE_BKAH. + These three algorithms have a lot in common. Each of them first accumulates + the records of the left join operand in a join buffer and then searches for + matching rows of the second operand for all accumulated records. + For the first two algorithms this strategy saves on logical I/O operations: + the entire set of records from the join buffer requires only one look-through + of the records provided by the second operand. + For the third algorithm the accumulation of records allows to optimize + fetching rows of the second operand from disk for some engines (MyISAM, + InnoDB), or to minimize the number of round-trips between the Server and + the engine nodes. +*/ + +class JOIN_CACHE :public Sql_alloc +{ + +private: + + /* Size of the offset of a record from the cache */ + uint size_of_rec_ofs; + /* Size of the length of a record in the cache */ + uint size_of_rec_len; + /* Size of the offset of a field within a record in the cache */ + uint size_of_fld_ofs; + + /* This structure is used only for explain, not for execution */ + bool for_explain_only; + +protected: + + /* 3 functions below actually do not use the hidden parameter 'this' */ + + /* Calculate the number of bytes used to store an offset value */ + uint offset_size(size_t len) + { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); } + + /* Get the offset value that takes ofs_sz bytes at the position ptr */ + ulong get_offset(uint ofs_sz, uchar *ptr) + { + switch (ofs_sz) { + case 1: return uint(*ptr); + case 2: return uint2korr(ptr); + case 4: return uint4korr(ptr); + } + return 0; + } + + /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */ + void store_offset(uint ofs_sz, uchar *ptr, ulong ofs) + { + switch (ofs_sz) { + case 1: *ptr= (uchar) ofs; return; + case 2: int2store(ptr, (uint16) ofs); return; + case 4: int4store(ptr, (uint32) ofs); return; + } + } + size_t calc_avg_record_length(); + + /* + The maximum total length of the fields stored for a record in the cache. + For blob fields only the sizes of the blob lengths are taken into account. + */ + uint length; + + /* + Representation of the executed multi-way join through which all needed + context can be accessed. + */ + JOIN *join; + + /* + JOIN_TAB of the first table that can have it's fields in the join cache. + That is, tables in the [start_tab, tab) range can have their fields in the + join cache. + If a join tab in the range represents an SJM-nest, then all tables from the + nest can have their fields in the join cache, too. + */ + JOIN_TAB *start_tab; + + /* + The total number of flag and data fields that can appear in a record + written into the cache. Fields with null values are always skipped + to save space. + */ + uint fields; + + /* + The total number of flag fields in a record put into the cache. They are + used for table null bitmaps, table null row flags, and an optional match + flag. Flag fields go before other fields in a cache record with the match + flag field placed always at the very beginning of the record. + */ + uint flag_fields; + + /* The total number of blob fields that are written into the cache */ + uint blobs; + + /* + The total number of fields referenced from field descriptors for other join + caches. These fields are used to construct key values. + When BKA join algorithm is employed the constructed key values serve to + access matching rows with index lookups. + The key values are put into a hash table when the BNLH join algorithm + is employed and when BKAH is used for the join operation. + */ + uint referenced_fields; + + /* + The current number of already created data field descriptors. + This number can be useful for implementations of the init methods. + */ + uint data_field_count; + + /* + The current number of already created pointers to the data field + descriptors. This number can be useful for implementations of + the init methods. + */ + uint data_field_ptr_count; + + /* + Array of the descriptors of fields containing 'fields' elements. + These are all fields that are stored for a record in the cache. + */ + CACHE_FIELD *field_descr; + + /* + Array of pointers to the blob descriptors that contains 'blobs' elements. + */ + CACHE_FIELD **blob_ptr; + + /* + This flag indicates that records written into the join buffer contain + a match flag field. + This is set to true for the first inner table of an outer join or a + semi-join. + The flag must be set by the init method. + Currently any implementation of the virtial init method calls + the function JOIN_CACHE::calc_record_fields() to set this flag. + */ + bool with_match_flag; + /* + This flag indicates that any record is prepended with the length of the + record which allows us to skip the record or part of it without reading. + */ + bool with_length; + + /* + The maximal number of bytes used for a record representation in + the cache excluding the space for blob data. + For future derived classes this representation may contains some + redundant info such as a key value associated with the record. + */ + uint pack_length; + /* + The value of pack_length incremented by the total size of all + pointers of a record in the cache to the blob data. + */ + uint pack_length_with_blob_ptrs; + + /* + The total size of the record base prefix. The base prefix of record may + include the following components: + - the length of the record + - the link to a record in a previous buffer. + Each record in the buffer are supplied with the same set of the components. + */ + uint base_prefix_length; + + /* + The expected length of a record in the join buffer together with + all prefixes and postfixes + */ + size_t avg_record_length; + + /* The expected size of the space per record in the auxiliary buffer */ + size_t avg_aux_buffer_incr; + + /* Pointer to the beginning of the join buffer */ + uchar *buff; + /* + Size of the entire memory allocated for the join buffer. + Part of this memory may be reserved for the auxiliary buffer. + */ + size_t buff_size; + /* The minimal join buffer size when join buffer still makes sense to use */ + size_t min_buff_size; + /* The maximum expected size if the join buffer to be used */ + size_t max_buff_size; + /* Size of the auxiliary buffer */ + size_t aux_buff_size; + + /* The number of records put into the join buffer */ + size_t records; + /* + The number of records in the fully refilled join buffer of + the minimal size equal to min_buff_size + */ + size_t min_records; + + /* + Pointer to the current position in the join buffer. + This member is used both when writing to buffer and + when reading from it. + */ + uchar *pos; + /* + Pointer to the first free position in the join buffer, + right after the last record into it. + */ + uchar *end_pos; + + /* + Pointer to the beginning of the first field of the current read/write + record from the join buffer. The value is adjusted by the + get_record/put_record functions. + */ + uchar *curr_rec_pos; + /* + Pointer to the beginning of the first field of the last record + from the join buffer. + */ + uchar *last_rec_pos; + + /* + Flag is set if the blob data for the last record in the join buffer + is in record buffers rather than in the join cache. + */ + bool last_rec_blob_data_is_in_rec_buff; + + /* + Pointer to the position to the current record link. + Record links are used only with linked caches. Record links allow to set + connections between parts of one join record that are stored in different + join buffers. + In the simplest case a record link is just a pointer to the beginning of + the record stored in the buffer. + In a more general case a link could be a reference to an array of pointers + to records in the buffer. + */ + uchar *curr_rec_link; + + /* + This flag is set to TRUE if join_tab is the first inner table of an outer + join and the latest record written to the join buffer is detected to be + null complemented after checking on conditions over the outer tables for + this outer join operation + */ + bool last_written_is_null_compl; + + /* + The number of fields put in the join buffer of the join cache that are + used in building keys to access the table join_tab + */ + uint local_key_arg_fields; + /* + The total number of the fields in the previous caches that are used + in building keys to access the table join_tab + */ + uint external_key_arg_fields; + + /* + This flag indicates that the key values will be read directly from the join + buffer. It will save us building key values in the key buffer. + */ + bool use_emb_key; + /* The length of an embedded key value */ + uint emb_key_length; + + /* This flag is used only when 'not exists' optimization can be applied */ + bool not_exists_opt_is_applicable; + + /* + This object provides the methods to iterate over records of + the joined table join_tab when looking for join matches between + records from join buffer and records from join_tab. + BNL and BNLH join algorithms retrieve all records from join_tab, + while BKA/BKAH algorithm iterates only over those records from + join_tab that can be accessed by look-ups with join keys built + from records in join buffer. + */ + JOIN_TAB_SCAN *join_tab_scan; + + void calc_record_fields(); + void collect_info_on_key_args(); + int alloc_fields(); + void create_flag_fields(); + void create_key_arg_fields(); + void create_remaining_fields(); + void set_constants(); + int alloc_buffer(); + + /* Shall reallocate the join buffer */ + virtual int realloc_buffer(); + + /* Check the possibility to read the access keys directly from join buffer */ + bool check_emb_key_usage(); + + uint get_size_of_rec_offset() { return size_of_rec_ofs; } + uint get_size_of_rec_length() { return size_of_rec_len; } + uint get_size_of_fld_offset() { return size_of_fld_ofs; } + + uchar *get_rec_ref(uchar *ptr) + { + return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs); + } + ulong get_rec_length(uchar *ptr) + { + return (ulong) get_offset(size_of_rec_len, ptr); + } + ulong get_fld_offset(uchar *ptr) + { + return (ulong) get_offset(size_of_fld_ofs, ptr); + } + + void store_rec_ref(uchar *ptr, uchar* ref) + { + store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff)); + } + void store_rec_length(uchar *ptr, ulong len) + { + store_offset(size_of_rec_len, ptr, len); + } + void store_fld_offset(uchar *ptr, ulong ofs) + { + store_offset(size_of_fld_ofs, ptr, ofs); + } + + /* Write record fields and their required offsets into the join buffer */ + uint write_record_data(uchar *link, bool *is_full); + + /* Get the total length of all prefixes of a record in the join buffer */ + virtual uint get_prefix_length() { return base_prefix_length; } + /* Get maximum total length of all affixes of a record in the join buffer */ + virtual uint get_record_max_affix_length(); + + /* + Shall get maximum size of the additional space per record used for + record keys + */ + virtual uint get_max_key_addon_space_per_record() { return 0; } + + /* + This method must determine for how much the auxiliary buffer should be + incremented when a new record is added to the join buffer. + If no auxiliary buffer is needed the function should return 0. + */ + virtual uint aux_buffer_incr(size_t recno); + + /* Shall calculate how much space is remaining in the join buffer */ + virtual size_t rem_space() + { + return MY_MAX(buff_size-(end_pos-buff)-aux_buff_size,0); + } + + /* + Shall calculate how much space is taken by allocation of the key + for a record in the join buffer + */ + virtual uint extra_key_length() { return 0; } + + /* Read all flag and data fields of a record from the join buffer */ + uint read_all_record_fields(); + + /* Read all flag fields of a record from the join buffer */ + uint read_flag_fields(); + + /* Read a data record field from the join buffer */ + uint read_record_field(CACHE_FIELD *copy, bool last_record); + + /* Read a referenced field from the join buffer */ + bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len); + + /* + Shall skip record from the join buffer if its match flag + is set to MATCH_FOUND + */ + virtual bool skip_if_matched(); + + /* + Shall skip record from the join buffer if its match flag + commands to do so + */ + virtual bool skip_if_not_needed_match(); + + /* + True if rec_ptr points to the record whose blob data stay in + record buffers + */ + bool blob_data_is_in_rec_buff(uchar *rec_ptr) + { + return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff; + } + + /* Find matches from the next table for records from the join buffer */ + virtual enum_nested_loop_state join_matching_records(bool skip_last); + + /* Shall set an auxiliary buffer up (currently used only by BKA joins) */ + virtual int setup_aux_buffer(HANDLER_BUFFER &aux_buff) + { + DBUG_ASSERT(0); + return 0; + } + + /* + Shall get the number of ranges in the cache buffer passed + to the MRR interface + */ + virtual uint get_number_of_ranges_for_mrr() { return 0; }; + + /* + Shall prepare to look for records from the join cache buffer that would + match the record of the joined table read into the record buffer + */ + virtual bool prepare_look_for_matches(bool skip_last)= 0; + /* + Shall return a pointer to the record from join buffer that is checked + as the next candidate for a match with the current record from join_tab. + Each implementation of this virtual function should bare in mind + that the record position it returns shall be exactly the position + passed as the parameter to the implementations of the virtual functions + skip_next_candidate_for_match and read_next_candidate_for_match. + */ + virtual uchar *get_next_candidate_for_match()= 0; + /* + Shall check whether the given record from the join buffer has its match + flag settings commands to skip the record in the buffer. + */ + virtual bool skip_next_candidate_for_match(uchar *rec_ptr)= 0; + /* + Shall read the given record from the join buffer into the + the corresponding record buffer + */ + virtual void read_next_candidate_for_match(uchar *rec_ptr)= 0; + + /* + Shall return the location of the association label returned by + the multi_read_range_next function for the current record loaded + into join_tab's record buffer + */ + virtual uchar **get_curr_association_ptr() { return 0; }; + + /* Add null complements for unmatched outer records from the join buffer */ + virtual enum_nested_loop_state join_null_complements(bool skip_last); + + /* Restore the fields of the last record from the join buffer */ + virtual void restore_last_record(); + + /* Set match flag for a record in join buffer if it has not been set yet */ + bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr); + + enum_nested_loop_state generate_full_extensions(uchar *rec_ptr); + + /* Check matching to a partial join record from the join buffer */ + bool check_match(uchar *rec_ptr); + + /* + This constructor creates an unlinked join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. + */ + JOIN_CACHE(JOIN *j, JOIN_TAB *tab) + { + join= j; + join_tab= tab; + prev_cache= next_cache= 0; + buff= 0; + min_buff_size= max_buff_size= 0; // Caches + not_exists_opt_is_applicable= false; + } + + /* + This constructor creates a linked join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. The parameter 'prev' specifies the previous + cache object to which this cache is linked. + */ + JOIN_CACHE(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) + { + join= j; + join_tab= tab; + next_cache= 0; + prev_cache= prev; + buff= 0; + min_buff_size= max_buff_size= 0; // Caches + if (prev) + prev->next_cache= this; + } + +public: + + /* + The enumeration type Join_algorithm includes a mnemonic constant for + each join algorithm that employs join buffers + */ + + enum Join_algorithm + { + BNL_JOIN_ALG, /* Block Nested Loop Join algorithm */ + BNLH_JOIN_ALG, /* Block Nested Loop Hash Join algorithm */ + BKA_JOIN_ALG, /* Batched Key Access Join algorithm */ + BKAH_JOIN_ALG /* Batched Key Access with Hash Table Join Algorithm */ + }; + + /* + The enumeration type Match_flag describes possible states of the match flag + field stored for the records of the first inner tables of outer joins and + semi-joins in the cases when the first match strategy is used for them. + When a record with match flag field is written into the join buffer the + state of the field usually is MATCH_NOT_FOUND unless this is a record of the + first inner table of the outer join for which the on precondition (the + condition from on expression over outer tables) has turned out not to be + true. In the last case the state of the match flag is MATCH_IMPOSSIBLE. + The state of the match flag field is changed to MATCH_FOUND as soon as + the first full matching combination of inner tables of the outer join or + the semi-join is discovered. + */ + enum Match_flag { MATCH_NOT_FOUND, MATCH_FOUND, MATCH_IMPOSSIBLE }; + + /* Table to be joined with the partial join records from the cache */ + JOIN_TAB *join_tab; + + /* Pointer to the previous join cache if there is any */ + JOIN_CACHE *prev_cache; + /* Pointer to the next join cache if there is any */ + JOIN_CACHE *next_cache; + + /* Shall initialize the join cache structure */ + virtual int init(bool for_explain); + + /* Get the current size of the cache join buffer */ + size_t get_join_buffer_size() { return buff_size; } + /* Set the size of the cache join buffer to a new value */ + void set_join_buffer_size(size_t sz) { buff_size= sz; } + + /* Get the minimum possible size of the cache join buffer */ + size_t get_min_join_buffer_size(); + /* Get the maximum possible size of the cache join buffer */ + size_t get_max_join_buffer_size(bool optimize_buff_size, + size_t min_buffer_size_arg); + + /* Shrink the size if the cache join buffer in a given ratio */ + bool shrink_join_buffer_in_ratio(ulonglong n, ulonglong d); + + /* Shall return the type of the employed join algorithm */ + virtual enum Join_algorithm get_join_alg()= 0; + + /* + The function shall return TRUE only when there is a key access + to the join table + */ + virtual bool is_key_access()= 0; + + /* Shall reset the join buffer for reading/writing */ + virtual void reset(bool for_writing); + + /* + This function shall add a record into the join buffer and return TRUE + if it has been decided that it should be the last record in the buffer. + */ + virtual bool put_record(); + + /* + This function shall read the next record into the join buffer and return + TRUE if there is no more next records. + */ + virtual bool get_record(); + + /* + This function shall read the record at the position rec_ptr + in the join buffer + */ + virtual void get_record_by_pos(uchar *rec_ptr); + + /* Shall return the value of the match flag for the positioned record */ + virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr); + + /* + Shall return the value of the match flag for the positioned record + from the join buffer attached to the specified table + */ + virtual enum Match_flag + get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, JOIN_TAB *tab); + + /* Shall return the position of the current record */ + virtual uchar *get_curr_rec() { return curr_rec_pos; } + + /* Shall set the current record link */ + virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; } + + /* Shall return the current record link */ + virtual uchar *get_curr_rec_link() + { + return (curr_rec_link ? curr_rec_link : get_curr_rec()); + } + + /* Join records from the join buffer with records from the next join table */ + enum_nested_loop_state join_records(bool skip_last); + + /* Add a comment on the join algorithm employed by the join cache */ + virtual bool save_explain_data(EXPLAIN_BKA_TYPE *explain); + + THD *thd(); + + virtual ~JOIN_CACHE() = default; + void reset_join(JOIN *j) { join= j; } + void free() + { + my_free(buff); + buff= 0; + } + + friend class JOIN_CACHE_HASHED; + friend class JOIN_CACHE_BNL; + friend class JOIN_CACHE_BKA; + friend class JOIN_TAB_SCAN; + friend class JOIN_TAB_SCAN_MRR; + +}; + + +/* + The class JOIN_CACHE_HASHED is the base class for the classes + JOIN_CACHE_HASHED_BNL and JOIN_CACHE_HASHED_BKA. The first of them supports + an implementation of Block Nested Loop Hash (BNLH) Join Algorithm, + while the second is used for a variant of the BKA Join algorithm that performs + only one lookup for any records from join buffer with the same key value. + For a join cache of this class the records from the join buffer that have + the same access key are linked into a chain attached to a key entry structure + that either itself contains the key value, or, in the case when the keys are + embedded, refers to its occurrence in one of the records from the chain. + To build the chains with the same keys a hash table is employed. It is placed + at the very end of the join buffer. The array of hash entries is allocated + first at the very bottom of the join buffer, while key entries are placed + before this array. + A hash entry contains a header of the list of the key entries with the same + hash value. + Each key entry is a structure of the following type: + struct st_join_cache_key_entry { + union { + uchar[] value; + cache_ref *value_ref; // offset from the beginning of the buffer + } hash_table_key; + key_ref next_key; // offset backward from the beginning of hash table + cache_ref *last_rec // offset from the beginning of the buffer + } + The references linking the records in a chain are always placed at the very + beginning of the record info stored in the join buffer. The records are + linked in a circular list. A new record is always added to the end of this + list. + + The following picture represents a typical layout for the info stored in the + join buffer of a join cache object of the JOIN_CACHE_HASHED class. + + buff + V + +----------------------------------------------------------------------------+ + | |[*]record_1_1| | + | ^ | | + | | +--------------------------------------------------+ | + | | |[*]record_2_1| | | + | | ^ | V | + | | | +------------------+ |[*]record_1_2| | + | | +--------------------+-+ | | + |+--+ +---------------------+ | | +-------------+ | + || | | V | | | + |||[*]record_3_1| |[*]record_1_3| |[*]record_2_2| | | + ||^ ^ ^ | | + ||+----------+ | | | | + ||^ | |<---------------------------+-------------------+ | + |++ | | ... mrr | buffer ... ... | | | + | | | | | + | +-----+--------+ | +-----|-------+ | + | V | | | V | | | + ||key_3|[/]|[*]| | | |key_2|[/]|[*]| | | + | +-+---|-----------------------+ | | + | V | | | | | + | |key_1|[*]|[*]| | | ... |[*]| ... |[*]| ... | | + +----------------------------------------------------------------------------+ + ^ ^ ^ + | i-th entry j-th entry + hash table + + i-th hash entry: + circular record chain for key_1: + record_1_1 + record_1_2 + record_1_3 (points to record_1_1) + circular record chain for key_3: + record_3_1 (points to itself) + + j-th hash entry: + circular record chain for key_2: + record_2_1 + record_2_2 (points to record_2_1) + +*/ + +class JOIN_CACHE_HASHED: public JOIN_CACHE +{ + + typedef uint (JOIN_CACHE_HASHED::*Hash_func) (uchar *key, uint key_len); + typedef bool (JOIN_CACHE_HASHED::*Hash_cmp_func) (uchar *key1, uchar *key2, + uint key_len); + +private: + + /* Size of the offset of a key entry in the hash table */ + uint size_of_key_ofs; + + /* + Length of the key entry in the hash table. + A key entry either contains the key value, or it contains a reference + to the key value if use_emb_key flag is set for the cache. + */ + uint key_entry_length; + + /* The beginning of the hash table in the join buffer */ + uchar *hash_table; + /* Number of hash entries in the hash table */ + uint hash_entries; + + + /* The position of the currently retrieved key entry in the hash table */ + uchar *curr_key_entry; + + /* The offset of the data fields from the beginning of the record fields */ + uint data_fields_offset; + + inline uint get_hash_idx_simple(uchar *key, uint key_len); + inline uint get_hash_idx_complex(uchar *key, uint key_len); + + inline bool equal_keys_simple(uchar *key1, uchar *key2, uint key_len); + inline bool equal_keys_complex(uchar *key1, uchar *key2, uint key_len); + + int init_hash_table(); + void cleanup_hash_table(); + +protected: + + /* + Index info on the TABLE_REF object used by the hash join + to look for matching records + */ + KEY *ref_key_info; + /* + Number of the key parts the TABLE_REF object used by the hash join + to look for matching records + */ + uint ref_used_key_parts; + + /* + The hash function used in the hash table, + usually set by the init() method + */ + Hash_func hash_func; + /* + The function to check whether two key entries in the hash table + are equal or not, usually set by the init() method + */ + Hash_cmp_func hash_cmp_func; + + /* + Length of a key value. + It is assumed that all key values have the same length. + */ + uint key_length; + /* Buffer to store key values for probing */ + uchar *key_buff; + + /* Number of key entries in the hash table (number of distinct keys) */ + uint key_entries; + + /* The position of the last key entry in the hash table */ + uchar *last_key_entry; + + /* + The offset of the record fields from the beginning of the record + representation. The record representation starts with a reference to + the next record in the key record chain followed by the length of + the trailing record data followed by a reference to the record segment + in the previous cache, if any, followed by the record fields. + */ + uint rec_fields_offset; + + uint get_size_of_key_offset() { return size_of_key_ofs; } + + /* + Get the position of the next_key_ptr field pointed to by + a linking reference stored at the position key_ref_ptr. + This reference is actually the offset backward from the + beginning of hash table. + */ + uchar *get_next_key_ref(uchar *key_ref_ptr) + { + return hash_table-get_offset(size_of_key_ofs, key_ref_ptr); + } + + /* + Store the linking reference to the next_key_ptr field at + the position key_ref_ptr. The position of the next_key_ptr + field is pointed to by ref. The stored reference is actually + the offset backward from the beginning of the hash table. + */ + void store_next_key_ref(uchar *key_ref_ptr, uchar *ref) + { + store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref)); + } + + /* + Check whether the reference to the next_key_ptr field at the position + key_ref_ptr contains a nil value. + */ + bool is_null_key_ref(uchar *key_ref_ptr) + { + ulong nil= 0; + return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0; + } + + /* + Set the reference to the next_key_ptr field at the position + key_ref_ptr equal to nil. + */ + void store_null_key_ref(uchar *key_ref_ptr) + { + ulong nil= 0; + store_offset(size_of_key_ofs, key_ref_ptr, nil); + } + + uchar *get_next_rec_ref(uchar *ref_ptr) + { + return buff+get_offset(get_size_of_rec_offset(), ref_ptr); + } + + void store_next_rec_ref(uchar *ref_ptr, uchar *ref) + { + store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff)); + } + + /* + Get the position of the embedded key value for the current + record pointed to by get_curr_rec(). + */ + uchar *get_curr_emb_key() + { + return get_curr_rec()+data_fields_offset; + } + + /* + Get the position of the embedded key value pointed to by a reference + stored at ref_ptr. The stored reference is actually the offset from + the beginning of the join buffer. + */ + uchar *get_emb_key(uchar *ref_ptr) + { + return buff+get_offset(get_size_of_rec_offset(), ref_ptr); + } + + /* + Store the reference to an embedded key at the position key_ref_ptr. + The position of the embedded key is pointed to by ref. The stored + reference is actually the offset from the beginning of the join buffer. + */ + void store_emb_key_ref(uchar *ref_ptr, uchar *ref) + { + store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff)); + } + + /* Get the total length of all prefixes of a record in hashed join buffer */ + uint get_prefix_length() + { + return base_prefix_length + get_size_of_rec_offset(); + } + + /* + Get maximum size of the additional space per record used for + the hash table with record keys + */ + uint get_max_key_addon_space_per_record(); + + /* + Calculate how much space in the buffer would not be occupied by + records, key entries and additional memory for the MMR buffer. + */ + size_t rem_space() + { + return MY_MAX(last_key_entry-end_pos-aux_buff_size,0); + } + + /* + Calculate how much space is taken by allocation of the key + entry for a record in the join buffer + */ + uint extra_key_length() { return key_entry_length; } + + /* + Skip record from a hashed join buffer if its match flag + is set to MATCH_FOUND + */ + bool skip_if_matched(); + + /* + Skip record from a hashed join buffer if its match flag setting + commands to do so + */ + bool skip_if_not_needed_match(); + + /* Search for a key in the hash table of the join buffer */ + bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr); + + /* Reallocate the join buffer of a hashed join cache */ + int realloc_buffer(); + + /* + This constructor creates an unlinked hashed join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. + */ + JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {} + + /* + This constructor creates a linked hashed join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. The parameter 'prev' specifies the previous + cache object to which this cache is linked. + */ + JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) + :JOIN_CACHE(j, tab, prev) {} + +public: + + /* Initialize a hashed join cache */ + int init(bool for_explain); + + /* Reset the buffer of a hashed join cache for reading/writing */ + void reset(bool for_writing); + + /* Add a record into the buffer of a hashed join cache */ + bool put_record(); + + /* Read the next record from the buffer of a hashed join cache */ + bool get_record(); + + /* + Shall check whether all records in a key chain have + their match flags set on + */ + virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr); + + uint get_next_key(uchar **key); + + /* Get the head of the record chain attached to the current key entry */ + uchar *get_curr_key_chain() + { + return get_next_rec_ref(curr_key_entry+key_entry_length- + get_size_of_rec_offset()); + } + +}; + + +/* + The class JOIN_TAB_SCAN is a companion class for the classes JOIN_CACHE_BNL + and JOIN_CACHE_BNLH. Actually the class implements the iterator over the + table joinded by BNL/BNLH join algorithm. + The virtual functions open, next and close are called for any iteration over + the table. The function open is called to initiate the process of the + iteration. The function next shall read the next record from the joined + table. The record is read into the record buffer of the joined table. + The record is to be matched with records from the join cache buffer. + The function close shall perform the finalizing actions for the iteration. +*/ + +class JOIN_TAB_SCAN: public Sql_alloc +{ + +private: + /* TRUE if this is the first record from the joined table to iterate over */ + bool is_first_record; + +protected: + + /* The joined table to be iterated over */ + JOIN_TAB *join_tab; + /* The join cache used to join the table join_tab */ + JOIN_CACHE *cache; + /* + Representation of the executed multi-way join through which + all needed context can be accessed. + */ + JOIN *join; + +public: + + JOIN_TAB_SCAN(JOIN *j, JOIN_TAB *tab) + { + join= j; + join_tab= tab; + cache= join_tab->cache; + } + + virtual ~JOIN_TAB_SCAN() = default; + + /* + Shall calculate the increment of the auxiliary buffer for a record + write if such a buffer is used by the table scan object + */ + virtual uint aux_buffer_incr(size_t recno) { return 0; } + + /* Initiate the process of iteration over the joined table */ + virtual int open(); + /* + Shall read the next candidate for matches with records from + the join buffer. + */ + virtual int next(); + /* + Perform the finalizing actions for the process of iteration + over the joined_table. + */ + virtual void close(); + +}; + +/* + The class JOIN_CACHE_BNL is used when the BNL join algorithm is + employed to perform a join operation +*/ + +class JOIN_CACHE_BNL :public JOIN_CACHE +{ +private: + /* + The number of the records in the join buffer that have to be + checked yet for a match with the current record of join_tab + read into the record buffer. + */ + uint rem_records; + +protected: + + bool prepare_look_for_matches(bool skip_last); + + uchar *get_next_candidate_for_match(); + + bool skip_next_candidate_for_match(uchar *rec_ptr); + + void read_next_candidate_for_match(uchar *rec_ptr); + +public: + + /* + This constructor creates an unlinked BNL join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. + */ + JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {} + + /* + This constructor creates a linked BNL join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. The parameter 'prev' specifies the previous + cache object to which this cache is linked. + */ + JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) + :JOIN_CACHE(j, tab, prev) {} + + /* Initialize the BNL cache */ + int init(bool for_explain); + + enum Join_algorithm get_join_alg() { return BNL_JOIN_ALG; } + + bool is_key_access() { return FALSE; } + +}; + + +/* + The class JOIN_CACHE_BNLH is used when the BNLH join algorithm is + employed to perform a join operation +*/ + +class JOIN_CACHE_BNLH :public JOIN_CACHE_HASHED +{ + +protected: + + /* + The pointer to the last record from the circular list of the records + that match the join key built out of the record in the join buffer for + the join_tab table + */ + uchar *last_matching_rec_ref_ptr; + /* + The pointer to the next current record from the circular list of the + records that match the join key built out of the record in the join buffer + for the join_tab table. This pointer is used by the class method + get_next_candidate_for_match to iterate over records from the circular + list. + */ + uchar *next_matching_rec_ref_ptr; + + /* + Get the chain of records from buffer matching the current candidate + record for join + */ + uchar *get_matching_chain_by_join_key(); + + bool prepare_look_for_matches(bool skip_last); + + uchar *get_next_candidate_for_match(); + + bool skip_next_candidate_for_match(uchar *rec_ptr); + + void read_next_candidate_for_match(uchar *rec_ptr); + +public: + + /* + This constructor creates an unlinked BNLH join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. + */ + JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab) : JOIN_CACHE_HASHED(j, tab) {} + + /* + This constructor creates a linked BNLH join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. The parameter 'prev' specifies the previous + cache object to which this cache is linked. + */ + JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) + : JOIN_CACHE_HASHED(j, tab, prev) {} + + /* Initialize the BNLH cache */ + int init(bool for_explain); + + enum Join_algorithm get_join_alg() { return BNLH_JOIN_ALG; } + + bool is_key_access() { return TRUE; } + +}; + + +/* + The class JOIN_TAB_SCAN_MRR is a companion class for the classes + JOIN_CACHE_BKA and JOIN_CACHE_BKAH. Actually the class implements the + iterator over the records from join_tab selected by BKA/BKAH join + algorithm as the candidates to be joined. + The virtual functions open, next and close are called for any iteration over + join_tab record candidates. The function open is called to initiate the + process of the iteration. The function next shall read the next record from + the set of the record candidates. The record is read into the record buffer + of the joined table. The function close shall perform the finalizing actions + for the iteration. +*/ + +class JOIN_TAB_SCAN_MRR: public JOIN_TAB_SCAN +{ + /* Interface object to generate key ranges for MRR */ + RANGE_SEQ_IF range_seq_funcs; + + /* Number of ranges to be processed by the MRR interface */ + uint ranges; + + /* Flag to to be passed to the MRR interface */ + uint mrr_mode; + + /* MRR buffer assotiated with this join cache */ + HANDLER_BUFFER mrr_buff; + + /* Shall initialize the MRR buffer */ + virtual void init_mrr_buff() + { + cache->setup_aux_buffer(mrr_buff); + } + +public: + + JOIN_TAB_SCAN_MRR(JOIN *j, JOIN_TAB *tab, uint flags, RANGE_SEQ_IF rs_funcs) + :JOIN_TAB_SCAN(j, tab), range_seq_funcs(rs_funcs), mrr_mode(flags) {} + + uint aux_buffer_incr(size_t recno); + + int open(); + + int next(); + + friend class JOIN_CACHE_BKA; /* it needs to add an mrr_mode flag after JOIN_CACHE::init() call */ +}; + +/* + The class JOIN_CACHE_BKA is used when the BKA join algorithm is + employed to perform a join operation +*/ + +class JOIN_CACHE_BKA :public JOIN_CACHE +{ +private: + + /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */ + uint mrr_mode; + + /* + This value is set to 1 by the class prepare_look_for_matches method + and back to 0 by the class get_next_candidate_for_match method + */ + uint rem_records; + + /* + This field contains the current association label set by a call of + the multi_range_read_next handler function. + See the function JOIN_CACHE_BKA::get_curr_key_association() + */ + uchar *curr_association; + +protected: + + /* + Get the number of ranges in the cache buffer passed to the MRR + interface. For each record its own range is passed. + */ + uint get_number_of_ranges_for_mrr() { return (uint)records; } + + /* + Setup the MRR buffer as the space between the last record put + into the join buffer and the very end of the join buffer + */ + int setup_aux_buffer(HANDLER_BUFFER &aux_buff) + { + aux_buff.buffer= end_pos; + aux_buff.buffer_end= buff+buff_size; + return 0; + } + + bool prepare_look_for_matches(bool skip_last); + + uchar *get_next_candidate_for_match(); + + bool skip_next_candidate_for_match(uchar *rec_ptr); + + void read_next_candidate_for_match(uchar *rec_ptr); + +public: + + /* + This constructor creates an unlinked BKA join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. + The MRR mode initially is set to 'flags'. + */ + JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags) + :JOIN_CACHE(j, tab), mrr_mode(flags) {} + /* + This constructor creates a linked BKA join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. The parameter 'prev' specifies the previous + cache object to which this cache is linked. + The MRR mode initially is set to 'flags'. + */ + JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev) + :JOIN_CACHE(j, tab, prev), mrr_mode(flags) {} + + JOIN_CACHE_BKA(JOIN_CACHE_BKA *bka) + :JOIN_CACHE(bka->join, bka->join_tab, bka->prev_cache), + mrr_mode(bka->mrr_mode) {} + + uchar **get_curr_association_ptr() { return &curr_association; } + + /* Initialize the BKA cache */ + int init(bool for_explain); + + enum Join_algorithm get_join_alg() { return BKA_JOIN_ALG; } + + bool is_key_access() { return TRUE; } + + /* Get the key built over the next record from the join buffer */ + uint get_next_key(uchar **key); + + /* Check index condition of the joined table for a record from BKA cache */ + bool skip_index_tuple(range_id_t range_info); + + bool save_explain_data(EXPLAIN_BKA_TYPE *explain); +}; + + + +/* + The class JOIN_CACHE_BKAH is used when the BKAH join algorithm is + employed to perform a join operation +*/ + +class JOIN_CACHE_BKAH :public JOIN_CACHE_BNLH +{ + +private: + /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */ + uint mrr_mode; + + /* + This flag is set to TRUE if the implementation of the MRR interface cannot + handle range association labels and does not return them to the caller of + the multi_range_read_next handler function. E.g. the implementation of + the MRR inteface for the Falcon engine could not return association + labels to the caller of multi_range_read_next. + The flag is set by JOIN_CACHE_BKA::init() and is not ever changed. + */ + bool no_association; + + /* + This field contains the association label returned by the + multi_range_read_next function. + See the function JOIN_CACHE_BKAH::get_curr_key_association() + */ + uchar *curr_matching_chain; + +protected: + + uint get_number_of_ranges_for_mrr() { return key_entries; } + + /* + Initialize the MRR buffer allocating some space within the join buffer. + The entire space between the last record put into the join buffer and the + last key entry added to the hash table is used for the MRR buffer. + */ + int setup_aux_buffer(HANDLER_BUFFER &aux_buff) + { + aux_buff.buffer= end_pos; + aux_buff.buffer_end= last_key_entry; + return 0; + } + + bool prepare_look_for_matches(bool skip_last); + + /* + The implementations of the methods + - get_next_candidate_for_match + - skip_recurrent_candidate_for_match + - read_next_candidate_for_match + are inherited from the JOIN_CACHE_BNLH class + */ + +public: + + /* + This constructor creates an unlinked BKAH join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. + The MRR mode initially is set to 'flags'. + */ + JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags) + :JOIN_CACHE_BNLH(j, tab), mrr_mode(flags) {} + + /* + This constructor creates a linked BKAH join cache. The cache is to be + used to join table 'tab' to the result of joining the previous tables + specified by the 'j' parameter. The parameter 'prev' specifies the previous + cache object to which this cache is linked. + The MRR mode initially is set to 'flags'. + */ + JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev) + :JOIN_CACHE_BNLH(j, tab, prev), mrr_mode(flags) {} + + JOIN_CACHE_BKAH(JOIN_CACHE_BKAH *bkah) + :JOIN_CACHE_BNLH(bkah->join, bkah->join_tab, bkah->prev_cache), + mrr_mode(bkah->mrr_mode) {} + + uchar **get_curr_association_ptr() { return &curr_matching_chain; } + + /* Initialize the BKAH cache */ + int init(bool for_explain); + + enum Join_algorithm get_join_alg() { return BKAH_JOIN_ALG; } + + /* Check index condition of the joined table for a record from BKAH cache */ + bool skip_index_tuple(range_id_t range_info); + + bool save_explain_data(EXPLAIN_BKA_TYPE *explain); +}; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc new file mode 100644 index 00000000..d9706020 --- /dev/null +++ b/sql/sql_lex.cc @@ -0,0 +1,11881 @@ +/* Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* A lexical scanner on a temporary buffer with a yacc interface */ + +#define MYSQL_LEX 1 +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" // sql_lex.h: SQLCOM_END +#include "sql_lex.h" +#include "sql_parse.h" // add_to_list +#include "item_create.h" +#include +#include +#include "sp_head.h" +#include "sp.h" +#include "sql_select.h" +#include "sql_cte.h" +#include "sql_signal.h" +#include "sql_derived.h" +#include "sql_truncate.h" // Sql_cmd_truncate_table +#include "sql_admin.h" // Sql_cmd_analyze/Check..._table +#include "sql_partition.h" +#include "sql_partition_admin.h" // Sql_cmd_alter_table_*_part +#include "event_parse_data.h" +#ifdef WITH_WSREP +#include "mysql/service_wsrep.h" +#endif + +void LEX::parse_error(uint err_number) +{ + thd->parse_error(err_number); +} + + +/** + LEX_STRING constant for null-string to be used in parser and other places. +*/ +const LEX_STRING empty_lex_str= {(char *) "", 0}; +const LEX_CSTRING null_clex_str= {NULL, 0}; +const LEX_CSTRING empty_clex_str= {"", 0}; +const LEX_CSTRING star_clex_str= {"*", 1}; +const LEX_CSTRING param_clex_str= {"?", 1}; +const LEX_CSTRING NULL_clex_str= {STRING_WITH_LEN("NULL")}; +const LEX_CSTRING error_clex_str= {STRING_WITH_LEN("error")}; + +/** + Helper action for a case expression statement (the expr in 'CASE expr'). + This helper is used for 'searched' cases only. + @param lex the parser lex context + @param expr the parsed expression + @return 0 on success +*/ + +int sp_expr_lex::case_stmt_action_expr() +{ + int case_expr_id= spcont->register_case_expr(); + sp_instr_set_case_expr *i; + + if (spcont->push_case_expr_id(case_expr_id)) + return 1; + + i= new (thd->mem_root) + sp_instr_set_case_expr(sphead->instructions(), spcont, case_expr_id, + get_item(), this); + + sphead->add_cont_backpatch(i); + return sphead->add_instr(i); +} + +/** + Helper action for a case when condition. + This helper is used for both 'simple' and 'searched' cases. + @param lex the parser lex context + @param when the parsed expression for the WHEN clause + @param simple true for simple cases, false for searched cases +*/ + +int sp_expr_lex::case_stmt_action_when(bool simple) +{ + uint ip= sphead->instructions(); + sp_instr_jump_if_not *i; + Item_case_expr *var; + Item *expr; + + if (simple) + { + var= new (thd->mem_root) + Item_case_expr(thd, spcont->get_current_case_expr_id()); + +#ifdef DBUG_ASSERT_EXISTS + if (var) + { + var->m_sp= sphead; + } +#endif + + expr= new (thd->mem_root) Item_func_eq(thd, var, get_item()); + i= new (thd->mem_root) sp_instr_jump_if_not(ip, spcont, expr, this); + } + else + i= new (thd->mem_root) sp_instr_jump_if_not(ip, spcont, get_item(), this); + + /* + BACKPATCH: Registering forward jump from + "case_stmt_action_when" to "case_stmt_action_then" + (jump_if_not from instruction 2 to 5, 5 to 8 ... in the example) + */ + + return + !MY_TEST(i) || + sphead->push_backpatch(thd, i, spcont->push_label(thd, &empty_clex_str, 0)) || + sphead->add_cont_backpatch(i) || + sphead->add_instr(i); +} + +/** + Helper action for a case then statements. + This helper is used for both 'simple' and 'searched' cases. + @param lex the parser lex context +*/ + +int LEX::case_stmt_action_then() +{ + uint ip= sphead->instructions(); + sp_instr_jump *i= new (thd->mem_root) sp_instr_jump(ip, spcont); + if (!MY_TEST(i) || sphead->add_instr(i)) + return 1; + + /* + BACKPATCH: Resolving forward jump from + "case_stmt_action_when" to "case_stmt_action_then" + (jump_if_not from instruction 2 to 5, 5 to 8 ... in the example) + */ + + sphead->backpatch(spcont->pop_label()); + + /* + BACKPATCH: Registering forward jump from + "case_stmt_action_then" to after END CASE + (jump from instruction 4 to 12, 7 to 12 ... in the example) + */ + + return sphead->push_backpatch(thd, i, spcont->last_label()); +} + + +/** + Helper action for a SET statement. + Used to push a system variable into the assignment list. + + @param tmp the system variable with base name + @param var_type the scope of the variable + @param val the value being assigned to the variable + + @return TRUE if error, FALSE otherwise. +*/ + +bool +LEX::set_system_variable(enum enum_var_type var_type, + sys_var *sysvar, const Lex_ident_sys_st *base_name, + Item *val) +{ + set_var *setvar; + + /* No AUTOCOMMIT from a stored function or trigger. */ + if (spcont && sysvar == Sys_autocommit_ptr) + sphead->m_flags|= sp_head::HAS_SET_AUTOCOMMIT_STMT; + + if (val && val->type() == Item::FIELD_ITEM && + ((Item_field*)val)->table_name.str) + { + my_error(ER_WRONG_TYPE_FOR_VAR, MYF(0), sysvar->name.str); + return TRUE; + } + + if (!(setvar= new (thd->mem_root) set_var(thd, var_type, sysvar, + base_name, val))) + return TRUE; + + return var_list.push_back(setvar, thd->mem_root); +} + + +/** + Helper action for a SET statement. + Used to SET a field of NEW row. + + @param name the field name + @param val the value being assigned to the row + + @return TRUE if error, FALSE otherwise. +*/ + +bool LEX::set_trigger_new_row(const LEX_CSTRING *name, Item *val) +{ + Item_trigger_field *trg_fld; + sp_instr_set_trigger_field *sp_fld; + + /* QQ: Shouldn't this be field's default value ? */ + if (! val) + val= new (thd->mem_root) Item_null(thd); + + DBUG_ASSERT(trg_chistics.action_time == TRG_ACTION_BEFORE && + (trg_chistics.event == TRG_EVENT_INSERT || + trg_chistics.event == TRG_EVENT_UPDATE)); + + trg_fld= new (thd->mem_root) + Item_trigger_field(thd, current_context(), + Item_trigger_field::NEW_ROW, + *name, UPDATE_ACL, FALSE); + + if (unlikely(trg_fld == NULL)) + return TRUE; + + sp_fld= new (thd->mem_root) + sp_instr_set_trigger_field(sphead->instructions(), + spcont, trg_fld, val, this); + + if (unlikely(sp_fld == NULL)) + return TRUE; + + /* + Let us add this item to list of all Item_trigger_field + objects in trigger. + */ + trg_table_fields.link_in_list(trg_fld, &trg_fld->next_trg_field); + + return sphead->add_instr(sp_fld); +} + + +/** + Create an object to represent a SP variable in the Item-hierarchy. + + @param name The SP variable name. + @param spvar The SP variable (optional). + @param start_in_q Start position of the SP variable name in the query. + @param end_in_q End position of the SP variable name in the query. + + @remark If spvar is not specified, the name is used to search for the + variable in the parse-time context. If the variable does not + exist, a error is set and NULL is returned to the caller. + + @return An Item_splocal object representing the SP variable, or NULL on error. +*/ +Item_splocal* +LEX::create_item_for_sp_var(const Lex_ident_cli_st *cname, sp_variable *spvar) +{ + const Sp_rcontext_handler *rh; + Item_splocal *item; + const char *start_in_q= cname->pos(); + const char *end_in_q= cname->end(); + uint pos_in_q, len_in_q; + Lex_ident_sys name(thd, cname); + + if (name.is_null()) + return NULL; // EOM + + /* If necessary, look for the variable. */ + if (spcont && !spvar) + spvar= find_variable(&name, &rh); + + if (!spvar) + { + my_error(ER_SP_UNDECLARED_VAR, MYF(0), name.str); + return NULL; + } + + DBUG_ASSERT(spcont && spvar); + + /* Position and length of the SP variable name in the query. */ + pos_in_q= (uint)(start_in_q - sphead->m_tmp_query); + len_in_q= (uint)(end_in_q - start_in_q); + + item= new (thd->mem_root) + Item_splocal(thd, rh, &name, spvar->offset, spvar->type_handler(), + pos_in_q, len_in_q); + +#ifdef DBUG_ASSERT_EXISTS + if (item) + item->m_sp= sphead; +#endif + + return item; +} + + +/** + Helper to resolve the SQL:2003 Syntax exception 1) in . + See SQL:2003, Part 2, section 8.4 , Note 184, page 383. + This function returns the proper item for the SQL expression + left [NOT] IN ( expr ) + @param thd the current thread + @param left the in predicand + @param equal true for IN predicates, false for NOT IN predicates + @param expr first and only expression of the in value list + @return an expression representing the IN predicate. +*/ +Item* handle_sql2003_note184_exception(THD *thd, Item* left, bool equal, + Item *expr) +{ + /* + Relevant references for this issue: + - SQL:2003, Part 2, section 8.4 , page 383, + - SQL:2003, Part 2, section 7.2 , page 296, + - SQL:2003, Part 2, section 6.3 , page 174, + - SQL:2003, Part 2, section 7.15 , page 370, + - SQL:2003 Feature F561, "Full value expressions". + + The exception in SQL:2003 Note 184 means: + Item_singlerow_subselect, which corresponds to a , + should be re-interpreted as an Item_in_subselect, which corresponds + to a
when used inside an . + + Our reading of Note 184 is reccursive, so that all: + - IN (( )) + - IN ((( ))) + - IN '('^N ')'^N + - etc + should be interpreted as a
, no matter how deep in the + expression the is. + */ + + Item *result; + + DBUG_ENTER("handle_sql2003_note184_exception"); + + if (expr->type() == Item::SUBSELECT_ITEM) + { + Item_subselect *expr2 = (Item_subselect*) expr; + + if (expr2->substype() == Item_subselect::SINGLEROW_SUBS) + { + Item_singlerow_subselect *expr3 = (Item_singlerow_subselect*) expr2; + st_select_lex *subselect; + + /* + Implement the mandated change, by altering the semantic tree: + left IN Item_singlerow_subselect(subselect) + is modified to + left IN (subselect) + which is represented as + Item_in_subselect(left, subselect) + */ + subselect= expr3->invalidate_and_restore_select_lex(); + result= new (thd->mem_root) Item_in_subselect(thd, left, subselect); + + if (! equal) + result = negate_expression(thd, result); + + DBUG_RETURN(result); + } + } + + if (equal) + result= new (thd->mem_root) Item_func_eq(thd, left, expr); + else + result= new (thd->mem_root) Item_func_ne(thd, left, expr); + + DBUG_RETURN(result); +} + +/** + Create a separate LEX for each assignment if in SP. + + If we are in SP we want have own LEX for each assignment. + This is mostly because it is hard for several sp_instr_set + and sp_instr_set_trigger instructions share one LEX. + (Well, it is theoretically possible but adds some extra + overhead on preparation for execution stage and IMO less + robust). + + QQ: May be we should simply prohibit group assignments in SP? + + @see sp_create_assignment_instr + + @param thd Thread context + @param pos The position in the raw SQL buffer +*/ + + +bool sp_create_assignment_lex(THD *thd, const char *pos) +{ + if (thd->lex->sphead) + { + sp_lex_local *new_lex; + if (!(new_lex= new (thd->mem_root) sp_lex_set_var(thd, thd->lex)) || + new_lex->main_select_push()) + return true; + new_lex->sphead->m_tmp_query= pos; + return thd->lex->sphead->reset_lex(thd, new_lex); + } + else + if (thd->lex->main_select_push(false)) + return true; + return false; +} + + +/** + Create a SP instruction for a SET assignment. + + @see sp_create_assignment_lex + + @param thd - Thread context + @param no_lookahead - True if the parser has no lookahead + @param need_set_keyword - if a SET statement "SET a=10", + or a direct assignment overwise "a:=10" + @return false if success, true otherwise. +*/ + +bool sp_create_assignment_instr(THD *thd, bool no_lookahead, + bool need_set_keyword) +{ + LEX *lex= thd->lex; + + if (lex->sphead) + { + if (!lex->var_list.is_empty()) + { + /* + - Every variable assignment from the same SET command, e.g.: + SET @var1=expr1, @var2=expr2; + produce each own sp_create_assignment_instr() call + lex->var_list.elements is 1 in this case. + - This query: + SET TRANSACTION READ ONLY, ISOLATION LEVEL SERIALIZABLE; + in translated to: + SET tx_read_only=1, tx_isolation=ISO_SERIALIZABLE; + but produces a single sp_create_assignment_instr() call + which includes the query fragment covering both options. + */ + DBUG_ASSERT(lex->var_list.elements >= 1 && lex->var_list.elements <= 2); + /* + sql_mode=ORACLE's direct assignment of a global variable + is not possible by the grammar. + */ + DBUG_ASSERT(lex->option_type != OPT_GLOBAL || need_set_keyword); + /* + We have assignment to user or system variable or + option setting, so we should construct sp_instr_stmt + for it. + */ + Lex_input_stream *lip= &thd->m_parser_state->m_lip; + + /* + Extract the query statement from the tokenizer. The + end is either lip->ptr, if there was no lookahead, + lip->tok_end otherwise. + */ + static const LEX_CSTRING setlc= { STRING_WITH_LEN("SET ") }; + static const LEX_CSTRING setgl= { STRING_WITH_LEN("SET GLOBAL ") }; + const char *qend= no_lookahead ? lip->get_ptr() : lip->get_tok_end(); + Lex_cstring qbuf(lex->sphead->m_tmp_query, qend); + if (lex->new_sp_instr_stmt(thd, + lex->option_type == OPT_GLOBAL ? setgl : + need_set_keyword ? setlc : + null_clex_str, + qbuf)) + return true; + } + lex->pop_select(); + if (lex->check_main_unit_semantics()) + { + /* + "lex" can be referrenced by: + - sp_instr_set SET a= expr; + - sp_instr_set_row_field SET r.a= expr; + - sp_instr_stmt (just generated above) SET @a= expr; + In this case, "lex" is fully owned by sp_instr_xxx and it will + be deleted by the destructor ~sp_instr_xxx(). + So we should remove "lex" from the stack sp_head::m_lex, + to avoid double free. + */ + lex->sphead->restore_lex(thd); + /* + No needs for "delete lex" here: "lex" is already linked + to the sp_instr_stmt (using sp_lex_keeper) instance created by + the call for new_sp_instr_stmt() above. It will be freed + by ~sp_head/~sp_instr/~sp_lex_keeper during THD::end_statement(). + */ + DBUG_ASSERT(lex->sp_lex_in_use); // used by sp_instr_stmt + return true; + } + enum_var_type inner_option_type= lex->option_type; + if (lex->sphead->restore_lex(thd)) + return true; + /* Copy option_type to outer lex in case it has changed. */ + thd->lex->option_type= inner_option_type; + } + else + lex->pop_select(); + return false; +} + + +void LEX::add_key_to_list(LEX_CSTRING *field_name, + enum Key::Keytype type, bool check_exists) +{ + Key *key; + MEM_ROOT *mem_root= thd->mem_root; + key= new (mem_root) + Key(type, &null_clex_str, HA_KEY_ALG_UNDEF, false, + DDL_options(check_exists ? + DDL_options::OPT_IF_NOT_EXISTS : + DDL_options::OPT_NONE)); + key->columns.push_back(new (mem_root) Key_part_spec(field_name, 0), + mem_root); + alter_info.key_list.push_back(key, mem_root); +} + + +bool LEX::add_alter_list(LEX_CSTRING name, Virtual_column_info *expr, + bool exists) +{ + MEM_ROOT *mem_root= thd->mem_root; + Alter_column *ac= new (mem_root) Alter_column(name, expr, exists); + if (unlikely(ac == NULL)) + return true; + alter_info.alter_list.push_back(ac, mem_root); + alter_info.flags|= ALTER_CHANGE_COLUMN_DEFAULT; + return false; +} + + +bool LEX::add_alter_list(LEX_CSTRING name, LEX_CSTRING new_name, bool exists) +{ + Alter_column *ac= new (thd->mem_root) Alter_column(name, new_name, exists); + if (unlikely(ac == NULL)) + return true; + alter_info.alter_list.push_back(ac, thd->mem_root); + alter_info.flags|= ALTER_RENAME_COLUMN; + return false; +} + + +void LEX::init_last_field(Column_definition *field, + const LEX_CSTRING *field_name) +{ + last_field= field; + field->field_name= *field_name; +} + + +Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) +{ + Virtual_column_info *v= new (thd->mem_root) Virtual_column_info(); + if (unlikely(!v)) + return 0; + v->expr= expr; + v->utf8= 0; /* connection charset */ + return v; +} + + + +/** + @note The order of the elements of this array must correspond to + the order of elements in enum_binlog_stmt_unsafe. +*/ +const int +Query_tables_list::binlog_stmt_unsafe_errcode[BINLOG_STMT_UNSAFE_COUNT] = +{ + ER_BINLOG_UNSAFE_LIMIT, + ER_BINLOG_UNSAFE_INSERT_DELAYED, + ER_BINLOG_UNSAFE_SYSTEM_TABLE, + ER_BINLOG_UNSAFE_AUTOINC_COLUMNS, + ER_BINLOG_UNSAFE_UDF, + ER_BINLOG_UNSAFE_SYSTEM_VARIABLE, + ER_BINLOG_UNSAFE_SYSTEM_FUNCTION, + ER_BINLOG_UNSAFE_NONTRANS_AFTER_TRANS, + ER_BINLOG_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE, + ER_BINLOG_UNSAFE_MIXED_STATEMENT, + ER_BINLOG_UNSAFE_INSERT_IGNORE_SELECT, + ER_BINLOG_UNSAFE_INSERT_SELECT_UPDATE, + ER_BINLOG_UNSAFE_WRITE_AUTOINC_SELECT, + ER_BINLOG_UNSAFE_REPLACE_SELECT, + ER_BINLOG_UNSAFE_CREATE_IGNORE_SELECT, + ER_BINLOG_UNSAFE_CREATE_REPLACE_SELECT, + ER_BINLOG_UNSAFE_CREATE_SELECT_AUTOINC, + ER_BINLOG_UNSAFE_UPDATE_IGNORE, + ER_BINLOG_UNSAFE_INSERT_TWO_KEYS, + ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST, + /* + There is no need to add new error code as we plan to get rid of auto + increment lock mode variable, so we use existing error code below, add + the correspondent text to the existing error message during merging to + non-GA release. + */ + ER_BINLOG_UNSAFE_SYSTEM_VARIABLE, + ER_BINLOG_UNSAFE_SKIP_LOCKED +}; + + +/* Longest standard keyword name */ + +#define TOCK_NAME_LENGTH 24 + +/* + The following data is based on the latin1 character set, and is only + used when comparing keywords +*/ + +static uchar to_upper_lex[]= +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255 +}; + +/* + Names of the index hints (for error messages). Keep in sync with + index_hint_type +*/ + +const char * index_hint_type_name[] = +{ + "IGNORE INDEX", + "USE INDEX", + "FORCE INDEX" +}; + +inline int lex_casecmp(const char *s, const char *t, uint len) +{ + while (len-- != 0 && + to_upper_lex[(uchar) *s++] == to_upper_lex[(uchar) *t++]) ; + return (int) len+1; +} + +#include + + +void lex_init(void) +{ + uint i; + DBUG_ENTER("lex_init"); + for (i=0 ; i < array_elements(symbols) ; i++) + symbols[i].length=(uchar) strlen(symbols[i].name); + for (i=0 ; i < array_elements(sql_functions) ; i++) + sql_functions[i].length=(uchar) strlen(sql_functions[i].name); + + DBUG_VOID_RETURN; +} + + +void lex_free(void) +{ // Call this when daemon ends + DBUG_ENTER("lex_free"); + DBUG_VOID_RETURN; +} + +/** + Initialize lex object for use in fix_fields and parsing. + + SYNOPSIS + init_lex_with_single_table() + @param thd The thread object + @param table The table object + @return Operation status + @retval TRUE An error occurred, memory allocation error + @retval FALSE Ok + + DESCRIPTION + This function is used to initialize a lex object on the + stack for use by fix_fields and for parsing. In order to + work properly it also needs to initialize the + Name_resolution_context object of the lexer. + Finally it needs to set a couple of variables to ensure + proper functioning of fix_fields. +*/ + +int +init_lex_with_single_table(THD *thd, TABLE *table, LEX *lex) +{ + TABLE_LIST *table_list; + Table_ident *table_ident; + SELECT_LEX *select_lex= lex->first_select_lex(); + Name_resolution_context *context= &select_lex->context; + /* + We will call the parser to create a part_info struct based on the + partition string stored in the frm file. + We will use a local lex object for this purpose. However we also + need to set the Name_resolution_object for this lex object. We + do this by using add_table_to_list where we add the table that + we're working with to the Name_resolution_context. + */ + thd->lex= lex; + lex_start(thd); + context->init(); + if (unlikely((!(table_ident= new Table_ident(thd, + &table->s->db, + &table->s->table_name, + TRUE)))) || + (unlikely(!(table_list= select_lex->add_table_to_list(thd, + table_ident, + NULL, + 0))))) + return TRUE; + context->resolve_in_table_list_only(table_list); + lex->use_only_table_context= TRUE; + select_lex->cur_pos_in_select_list= UNDEF_POS; + table->map= 1; //To ensure correct calculation of const item + table_list->table= table; + table_list->cacheable_table= false; + lex->create_last_non_select_table= table_list; + return FALSE; +} + +/** + End use of local lex with single table + + SYNOPSIS + end_lex_with_single_table() + @param thd The thread object + @param table The table object + @param old_lex The real lex object connected to THD + + DESCRIPTION + This function restores the real lex object after calling + init_lex_with_single_table and also restores some table + variables temporarily set. +*/ + +void +end_lex_with_single_table(THD *thd, TABLE *table, LEX *old_lex) +{ + LEX *lex= thd->lex; + table->map= 0; + table->get_fields_in_item_tree= FALSE; + lex_end(lex); + thd->lex= old_lex; +} + + +void +st_parsing_options::reset() +{ + allows_variable= TRUE; + lookup_keywords_after_qualifier= false; +} + + +/** + Perform initialization of Lex_input_stream instance. + + Basically, a buffer for pre-processed query. This buffer should be large + enough to keep multi-statement query. The allocation is done once in + Lex_input_stream::init() in order to prevent memory pollution when + the server is processing large multi-statement queries. +*/ + +bool Lex_input_stream::init(THD *thd, + char* buff, + size_t length) +{ + DBUG_EXECUTE_IF("bug42064_simulate_oom", + DBUG_SET("+d,simulate_out_of_memory");); + + m_cpp_buf= (char*) thd->alloc(length + 1); + + DBUG_EXECUTE_IF("bug42064_simulate_oom", + DBUG_SET("-d,bug42064_simulate_oom");); + + if (m_cpp_buf == NULL) + return true; + + m_thd= thd; + reset(buff, length); + + return false; +} + + +/** + Prepare Lex_input_stream instance state for use for handling next SQL statement. + + It should be called between two statements in a multi-statement query. + The operation resets the input stream to the beginning-of-parse state, + but does not reallocate m_cpp_buf. +*/ + +void +Lex_input_stream::reset(char *buffer, size_t length) +{ + yylineno= 1; + lookahead_token= -1; + lookahead_yylval= NULL; + m_ptr= buffer; + m_tok_start= NULL; + m_tok_end= NULL; + m_end_of_query= buffer + length; + m_tok_start_prev= NULL; + m_buf= buffer; + m_buf_length= length; + m_echo= TRUE; + m_cpp_tok_start= NULL; + m_cpp_tok_start_prev= NULL; + m_cpp_tok_end= NULL; + m_body_utf8= NULL; + m_cpp_utf8_processed_ptr= NULL; + next_state= MY_LEX_START; + found_semicolon= NULL; + ignore_space= MY_TEST(m_thd->variables.sql_mode & MODE_IGNORE_SPACE); + stmt_prepare_mode= FALSE; + multi_statements= TRUE; + in_comment=NO_COMMENT; + m_underscore_cs= NULL; + m_cpp_ptr= m_cpp_buf; +} + + +/** + The operation is called from the parser in order to + 1) designate the intention to have utf8 body; + 1) Indicate to the lexer that we will need a utf8 representation of this + statement; + 2) Determine the beginning of the body. + + @param thd Thread context. + @param begin_ptr Pointer to the start of the body in the pre-processed + buffer. +*/ + +void Lex_input_stream::body_utf8_start(THD *thd, const char *begin_ptr) +{ + DBUG_ASSERT(begin_ptr); + DBUG_ASSERT(m_cpp_buf <= begin_ptr && begin_ptr <= m_cpp_buf + m_buf_length); + + size_t body_utf8_length= get_body_utf8_maximum_length(thd); + + m_body_utf8= (char *) thd->alloc(body_utf8_length + 1); + m_body_utf8_ptr= m_body_utf8; + *m_body_utf8_ptr= 0; + + m_cpp_utf8_processed_ptr= begin_ptr; +} + + +size_t Lex_input_stream::get_body_utf8_maximum_length(THD *thd) const +{ + /* + String literals can grow during escaping: + 1a. Character string '' can grow to '\t', 3 bytes to 4 bytes growth. + 1b. Character string '1000 times ' grows from + 1002 to 2002 bytes (including quotes), which gives a little bit + less than 2 times growth. + "2" should be a reasonable multiplier that safely covers escaping needs. + */ + return (m_buf_length / thd->variables.character_set_client->mbminlen) * + my_charset_utf8mb3_bin.mbmaxlen * 2/*for escaping*/; +} + + +/** + @brief The operation appends unprocessed part of pre-processed buffer till + the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to end_ptr. + + The idea is that some tokens in the pre-processed buffer (like character + set introducers) should be skipped. + + Example: + CPP buffer: SELECT 'str1', _latin1 'str2'; + m_cpp_utf8_processed_ptr -- points at the "SELECT ..."; + In order to skip "_latin1", the following call should be made: + body_utf8_append(, ) + + @param ptr Pointer in the pre-processed buffer, which specifies the + end of the chunk, which should be appended to the utf8 + body. + @param end_ptr Pointer in the pre-processed buffer, to which + m_cpp_utf8_processed_ptr will be set in the end of the + operation. +*/ + +void Lex_input_stream::body_utf8_append(const char *ptr, + const char *end_ptr) +{ + DBUG_ASSERT(m_cpp_buf <= ptr && ptr <= m_cpp_buf + m_buf_length); + DBUG_ASSERT(m_cpp_buf <= end_ptr && end_ptr <= m_cpp_buf + m_buf_length); + + if (!m_body_utf8) + return; + + if (m_cpp_utf8_processed_ptr >= ptr) + return; + + size_t bytes_to_copy= ptr - m_cpp_utf8_processed_ptr; + + memcpy(m_body_utf8_ptr, m_cpp_utf8_processed_ptr, bytes_to_copy); + m_body_utf8_ptr += bytes_to_copy; + *m_body_utf8_ptr= 0; + + m_cpp_utf8_processed_ptr= end_ptr; +} + +/** + The operation appends unprocessed part of the pre-processed buffer till + the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to ptr. + + @param ptr Pointer in the pre-processed buffer, which specifies the end + of the chunk, which should be appended to the utf8 body. +*/ + +void Lex_input_stream::body_utf8_append(const char *ptr) +{ + body_utf8_append(ptr, ptr); +} + +/** + The operation converts the specified text literal to the utf8 and appends + the result to the utf8-body. + + @param thd Thread context. + @param txt Text literal. + @param txt_cs Character set of the text literal. + @param end_ptr Pointer in the pre-processed buffer, to which + m_cpp_utf8_processed_ptr will be set in the end of the + operation. +*/ + +void +Lex_input_stream::body_utf8_append_ident(THD *thd, + const Lex_string_with_metadata_st *txt, + const char *end_ptr) +{ + if (!m_cpp_utf8_processed_ptr) + return; + + LEX_CSTRING utf_txt; + thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value? + + /* NOTE: utf_txt.length is in bytes, not in symbols. */ + memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length); + m_body_utf8_ptr += utf_txt.length; + *m_body_utf8_ptr= 0; + + m_cpp_utf8_processed_ptr= end_ptr; +} + + + + +extern "C" { + +/** + Escape a character. Consequently puts "escape" and "wc" characters into + the destination utf8 string. + @param cs - the character set (utf8) + @param escape - the escape character (backslash, single quote, double quote) + @param wc - the character to be escaped + @param str - the destination string + @param end - the end of the destination string + @returns - a code according to the wc_mb() convension. +*/ +int my_wc_mb_utf8mb3_with_escape(CHARSET_INFO *cs, my_wc_t escape, my_wc_t wc, + uchar *str, uchar *end) +{ + DBUG_ASSERT(escape > 0); + if (str + 1 >= end) + return MY_CS_TOOSMALL2; // Not enough space, need at least two bytes. + *str= (uchar)escape; + int cnvres= my_charset_utf8mb3_handler.wc_mb(cs, wc, str + 1, end); + if (cnvres > 0) + return cnvres + 1; // The character was normally put + if (cnvres == MY_CS_ILUNI) + return MY_CS_ILUNI; // Could not encode "wc" (e.g. non-BMP character) + DBUG_ASSERT(cnvres <= MY_CS_TOOSMALL); + return cnvres - 1; // Not enough space +} + + +/** + Optionally escape a character. + If "escape" is non-zero, then both "escape" and "wc" are put to + the destination string. Otherwise, only "wc" is put. + @param cs - the character set (utf8) + @param wc - the character to be optionally escaped + @param escape - the escape character, or 0 + @param ewc - the escaped replacement of "wc" (e.g. 't' for '\t') + @param str - the destination string + @param end - the end of the destination string + @returns - a code according to the wc_mb() conversion. +*/ +int my_wc_mb_utf8mb3_opt_escape(CHARSET_INFO *cs, + my_wc_t wc, my_wc_t escape, my_wc_t ewc, + uchar *str, uchar *end) +{ + return escape ? my_wc_mb_utf8mb3_with_escape(cs, escape, ewc, str, end) : + my_charset_utf8mb3_handler.wc_mb(cs, wc, str, end); +} + +/** + Encode a character with optional backlash escaping and quote escaping. + Quote marks are escaped using another quote mark. + Additionally, if "escape" is non-zero, then special characters are + also escaped using "escape". + Otherwise (if "escape" is zero, e.g. in case of MODE_NO_BACKSLASH_ESCAPES), + then special characters are not escaped and handled as normal characters. + + @param cs - the character set (utf8) + @param wc - the character to be encoded + @param str - the destination string + @param end - the end of the destination string + @param sep - the string delimiter (e.g. ' or ") + @param escape - the escape character (backslash, or 0) + @returns - a code according to the wc_mb() convension. +*/ +int my_wc_mb_utf8mb3_escape(CHARSET_INFO *cs, my_wc_t wc, + uchar *str, uchar *end, + my_wc_t sep, my_wc_t escape) +{ + DBUG_ASSERT(escape == 0 || escape == '\\'); + DBUG_ASSERT(sep == '"' || sep == '\''); + switch (wc) { + case 0: return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, '0', str, end); + case '\t': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 't', str, end); + case '\r': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 'r', str, end); + case '\n': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 'n', str, end); + case '\032': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 'Z', str, end); + case '\'': + case '\"': + if (wc == sep) + return my_wc_mb_utf8mb3_with_escape(cs, wc, wc, str, end); + } + return my_charset_utf8mb3_handler.wc_mb(cs, wc, str, end); // No escaping needed +} + + +/** wc_mb() compatible routines for all sql_mode and delimiter combinations */ +int my_wc_mb_utf8mb3_escape_single_quote_and_backslash(CHARSET_INFO *cs, + my_wc_t wc, + uchar *str, uchar *end) +{ + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '\'', '\\'); +} + + +int my_wc_mb_utf8mb3_escape_double_quote_and_backslash(CHARSET_INFO *cs, + my_wc_t wc, + uchar *str, uchar *end) +{ + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '"', '\\'); +} + + +int my_wc_mb_utf8mb3_escape_single_quote(CHARSET_INFO *cs, my_wc_t wc, + uchar *str, uchar *end) +{ + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '\'', 0); +} + + +int my_wc_mb_utf8mb3_escape_double_quote(CHARSET_INFO *cs, my_wc_t wc, + uchar *str, uchar *end) +{ + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '"', 0); +} + +}; // End of extern "C" + + +/** + Get an escaping function, depending on the current sql_mode and the + string separator. +*/ +my_charset_conv_wc_mb +Lex_input_stream::get_escape_func(THD *thd, my_wc_t sep) const +{ + return thd->backslash_escapes() ? + (sep == '"' ? my_wc_mb_utf8mb3_escape_double_quote_and_backslash: + my_wc_mb_utf8mb3_escape_single_quote_and_backslash) : + (sep == '"' ? my_wc_mb_utf8mb3_escape_double_quote: + my_wc_mb_utf8mb3_escape_single_quote); +} + + +/** + Append a text literal to the end of m_body_utf8. + The string is escaped according to the current sql_mode and the + string delimiter (e.g. ' or "). + + @param thd - current THD + @param txt - the string to be appended to m_body_utf8. + Note, the string must be already unescaped. + @param cs - the character set of the string + @param end_ptr - m_cpp_utf8_processed_ptr will be set to this value + (see body_utf8_append_ident for details) + @param sep - the string delimiter (single or double quote) +*/ +void Lex_input_stream::body_utf8_append_escape(THD *thd, + const LEX_CSTRING *txt, + CHARSET_INFO *cs, + const char *end_ptr, + my_wc_t sep) +{ + DBUG_ASSERT(sep == '\'' || sep == '"'); + if (!m_cpp_utf8_processed_ptr) + return; + uint errors; + /** + We previously alloced m_body_utf8 to be able to store the query with all + strings properly escaped. See get_body_utf8_maximum_length(). + So here we have guaranteedly enough space to append any string literal + with escaping. Passing txt->length*2 as "available space" is always safe. + For better safety purposes we could calculate get_body_utf8_maximum_length() + every time we append a string, but this would affect performance negatively, + so let's check that we don't get beyond the allocated buffer in + debug build only. + */ + DBUG_ASSERT(m_body_utf8 + get_body_utf8_maximum_length(thd) >= + m_body_utf8_ptr + txt->length * 2); + uint32 cnv_length= my_convert_using_func(m_body_utf8_ptr, txt->length * 2, + &my_charset_utf8mb3_general_ci, + get_escape_func(thd, sep), + txt->str, txt->length, + cs, cs->cset->mb_wc, + &errors); + m_body_utf8_ptr+= cnv_length; + *m_body_utf8_ptr= 0; + m_cpp_utf8_processed_ptr= end_ptr; +} + + +void Lex_input_stream::add_digest_token(uint token, LEX_YYSTYPE yylval) +{ + if (m_digest != NULL) + { + m_digest= digest_add_token(m_digest, token, yylval); + } +} + +void Lex_input_stream::reduce_digest_token(uint token_left, uint token_right) +{ + if (m_digest != NULL) + { + m_digest= digest_reduce_token(m_digest, token_left, token_right); + } +} + +/** + lex starting operations for builtin select collected together +*/ + +void SELECT_LEX::lex_start(LEX *plex) +{ + SELECT_LEX_UNIT *unit= &plex->unit; + /* 'parent_lex' is used in init_query() so it must be before it. */ + parent_lex= plex; + init_query(); + master= unit; + prev= &unit->slave; + link_next= slave= next= 0; + link_prev= (st_select_lex_node**)&(plex->all_selects_list); + DBUG_ASSERT(!group_list_ptrs); + select_number= 1; + in_sum_expr=0; + ftfunc_list_alloc.empty(); + ftfunc_list= &ftfunc_list_alloc; + group_list.empty(); + order_list.empty(); + gorder_list.empty(); +} + +void lex_start(THD *thd) +{ + DBUG_ENTER("lex_start"); + thd->lex->start(thd); + DBUG_VOID_RETURN; +} + + +/* + This is called before every query that is to be parsed. + Because of this, it's critical to not do too much things here. + (We already do too much here) +*/ + +void LEX::start(THD *thd_arg) +{ + DBUG_ENTER("LEX::start"); + DBUG_PRINT("info", ("This: %p thd_arg->lex: %p", this, thd_arg->lex)); + + thd= unit.thd= thd_arg; + stmt_lex= this; // default, should be rewritten for VIEWs And CTEs + + DBUG_ASSERT(!explain); + + builtin_select.lex_start(this); + lex_options= 0; + context_stack.empty(); + //empty select_stack + select_stack_top= 0; + select_stack_outer_barrier= 0; + unit.init_query(); + current_select_number= 0; + curr_with_clause= 0; + with_clauses_list= 0; + with_clauses_list_last_next= &with_clauses_list; + clone_spec_offset= 0; + create_view= NULL; + field_list.empty(); + value_list.empty(); + update_list.empty(); + set_var_list.empty(); + param_list.empty(); + view_list.empty(); + with_persistent_for_clause= FALSE; + column_list= NULL; + index_list= NULL; + prepared_stmt.lex_start(); + auxiliary_table_list.empty(); + unit.next= unit.master= unit.link_next= unit.return_to= 0; + unit.prev= unit.link_prev= 0; + unit.slave= current_select= all_selects_list= &builtin_select; + sql_cache= LEX::SQL_CACHE_UNSPECIFIED; + describe= 0; + context_analysis_only= 0; + derived_tables= 0; + with_cte_resolution= false; + only_cte_resolution= false; + parsing_options.reset(); + part_info= 0; + m_sql_cmd= NULL; + duplicates= DUP_ERROR; + spname= NULL; + spcont= NULL; + proc_list.first= 0; + query_tables= 0; + reset_query_tables_list(FALSE); + clause_that_disallows_subselect= NULL; + + /* reset bool variables */ + is_shutdown_wait_for_slaves= 0; + selects_allow_procedure= 0; + parse_vcol_expr= 0; + analyze_stmt= 0; + explain_json= 0; + local_file= 0; + check_exists= 0; + verbose= 0; + safe_to_cache_query= 1; + ignore= 0; + next_is_main= 0; + next_is_down= 0; + empty_field_list_on_rset= 0; + use_only_table_context= 0; + escape_used= 0; + default_used= 0; + with_rownum= FALSE; + is_lex_started= 1; + + create_info.lex_start(); + name= null_clex_str; + event_parse_data= NULL; + profile_options= PROFILE_NONE; + nest_level= 0; + builtin_select.nest_level_base= &unit; + allow_sum_func.clear_all(); + in_sum_func= NULL; + + used_tables= 0; + table_type= TABLE_TYPE_UNKNOWN; + reset_slave_info.all= false; + limit_rows_examined= 0; + limit_rows_examined_cnt= ULONGLONG_MAX; + var_list.empty(); + stmt_var_list.empty(); + proc_list.elements=0; + + win_ref= NULL; + win_frame= NULL; + frame_top_bound= NULL; + frame_bottom_bound= NULL; + win_spec= NULL; + + upd_del_where= NULL; + + vers_conditions.empty(); + period_conditions.empty(); + + wild= 0; + exchange= 0; + + DBUG_VOID_RETURN; +} + +void lex_end(LEX *lex) +{ + DBUG_ENTER("lex_end"); + DBUG_PRINT("enter", ("lex: %p", lex)); + + lex_unlock_plugins(lex); + lex_end_nops(lex); + + DBUG_VOID_RETURN; +} + +void lex_unlock_plugins(LEX *lex) +{ + DBUG_ENTER("lex_unlock_plugins"); + + /* release used plugins */ + if (lex->plugins.elements) /* No function call and no mutex if no plugins. */ + { + plugin_unlock_list(0, (plugin_ref*)lex->plugins.buffer, + lex->plugins.elements); + } + reset_dynamic(&lex->plugins); + DBUG_VOID_RETURN; +} + +/* + Don't delete lex->sphead, it'll be needed for EXECUTE. + Note that of all statements that populate lex->sphead + only SQLCOM_COMPOUND can be PREPAREd + + MASTER INFO parameters (or state) is normally cleared towards the end + of a statement. But in case of PS, the state needs to be preserved during + its lifetime and should only be cleared on PS close or deallocation. +*/ +void lex_end_nops(LEX *lex) +{ + DBUG_ENTER("lex_end_nops"); + sp_head::destroy(lex->sphead); + lex->sphead= NULL; + + /* Reset LEX_MASTER_INFO */ + lex->mi.reset(lex->sql_command == SQLCOM_CHANGE_MASTER); + delete_dynamic(&lex->delete_gtid_domain); + + DBUG_VOID_RETURN; +} + +Yacc_state::~Yacc_state() +{ + if (yacc_yyss) + { + my_free(yacc_yyss); + my_free(yacc_yyvs); + } +} + +int Lex_input_stream::find_keyword(Lex_ident_cli_st *kwd, + uint len, bool function) const +{ + const char *tok= m_tok_start; + + SYMBOL *symbol= get_hash_symbol(tok, len, function); + if (symbol) + { + kwd->set_keyword(tok, len); + DBUG_ASSERT(tok >= get_buf()); + DBUG_ASSERT(tok < get_end_of_query()); + + if (m_thd->variables.sql_mode & MODE_ORACLE) + { + switch (symbol->tok) { + case BEGIN_MARIADB_SYM: return BEGIN_ORACLE_SYM; + case BLOB_MARIADB_SYM: return BLOB_ORACLE_SYM; + case BODY_MARIADB_SYM: return BODY_ORACLE_SYM; + case CLOB_MARIADB_SYM: return CLOB_ORACLE_SYM; + case CONTINUE_MARIADB_SYM: return CONTINUE_ORACLE_SYM; + case DECLARE_MARIADB_SYM: return DECLARE_ORACLE_SYM; + case ELSEIF_MARIADB_SYM: return ELSEIF_ORACLE_SYM; + case ELSIF_MARIADB_SYM: return ELSIF_ORACLE_SYM; + case EXCEPTION_MARIADB_SYM: return EXCEPTION_ORACLE_SYM; + case EXIT_MARIADB_SYM: return EXIT_ORACLE_SYM; + case GOTO_MARIADB_SYM: return GOTO_ORACLE_SYM; + case MINUS_ORACLE_SYM: return EXCEPT_SYM; + case NUMBER_MARIADB_SYM: return NUMBER_ORACLE_SYM; + case OTHERS_MARIADB_SYM: return OTHERS_ORACLE_SYM; + case PACKAGE_MARIADB_SYM: return PACKAGE_ORACLE_SYM; + case RAISE_MARIADB_SYM: return RAISE_ORACLE_SYM; + case RAW_MARIADB_SYM: return RAW_ORACLE_SYM; + case RETURN_MARIADB_SYM: return RETURN_ORACLE_SYM; + case ROWTYPE_MARIADB_SYM: return ROWTYPE_ORACLE_SYM; + case VARCHAR2_MARIADB_SYM: return VARCHAR2_ORACLE_SYM; + } + } + + if ((symbol->tok == NOT_SYM) && + (m_thd->variables.sql_mode & MODE_HIGH_NOT_PRECEDENCE)) + return NOT2_SYM; + if ((symbol->tok == OR2_SYM) && + (m_thd->variables.sql_mode & MODE_PIPES_AS_CONCAT)) + { + return (m_thd->variables.sql_mode & MODE_ORACLE) ? + ORACLE_CONCAT_SYM : MYSQL_CONCAT_SYM; + } + + return symbol->tok; + } + return 0; +} + +/* + Check if name is a keyword + + SYNOPSIS + is_keyword() + name checked name (must not be empty) + len length of checked name + + RETURN VALUES + 0 name is a keyword + 1 name isn't a keyword +*/ + +bool is_keyword(const char *name, uint len) +{ + DBUG_ASSERT(len != 0); + return get_hash_symbol(name,len,0)!=0; +} + +/** + Check if name is a sql function + + @param name checked name + + @return is this a native function or not + @retval 0 name is a function + @retval 1 name isn't a function +*/ + +bool is_lex_native_function(const LEX_CSTRING *name) +{ + DBUG_ASSERT(name != NULL); + return (get_hash_symbol(name->str, (uint) name->length, 1) != 0); +} + + +bool is_native_function(THD *thd, const LEX_CSTRING *name) +{ + if (native_functions_hash.find(thd, *name)) + return true; + + if (is_lex_native_function(name)) + return true; + + if (Type_handler::handler_by_name(thd, *name)) + return true; + + return false; +} + + +bool is_native_function_with_warn(THD *thd, const LEX_CSTRING *name) +{ + if (!is_native_function(thd, name)) + return false; + /* + This warning will be printed when + [1] A client query is parsed, + [2] A stored function is loaded by db_load_routine. + Printing the warning for [2] is intentional, to cover the + following scenario: + - A user define a SF 'foo' using MySQL 5.N + - An application uses select foo(), and works. + - MySQL 5.{N+1} defines a new native function 'foo', as + part of a new feature. + - MySQL 5.{N+1} documentation is updated, and should mention + that there is a potential incompatible change in case of + existing stored function named 'foo'. + - The user deploys 5.{N+1}. At this point, 'select foo()' + means something different, and the user code is most likely + broken (it's only safe if the code is 'select db.foo()'). + With a warning printed when the SF is loaded (which has to + occur before the call), the warning will provide a hint + explaining the root cause of a later failure of 'select foo()'. + With no warning printed, the user code will fail with no + apparent reason. + Printing a warning each time db_load_routine is executed for + an ambiguous function is annoying, since that can happen a lot, + but in practice should not happen unless there *are* name + collisions. + If a collision exists, it should not be silenced but fixed. + */ + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + ER_NATIVE_FCT_NAME_COLLISION, + ER_THD(thd, ER_NATIVE_FCT_NAME_COLLISION), + name->str); + return true; +} + + +/* make a copy of token before ptr and set yytoklen */ + +LEX_CSTRING Lex_input_stream::get_token(uint skip, uint length) +{ + LEX_CSTRING tmp; + yyUnget(); // ptr points now after last token char + tmp.length= length; + tmp.str= m_thd->strmake(m_tok_start + skip, tmp.length); + + m_cpp_text_start= m_cpp_tok_start + skip; + m_cpp_text_end= m_cpp_text_start + tmp.length; + + return tmp; +} + + +static size_t +my_unescape(CHARSET_INFO *cs, char *to, const char *str, const char *end, + int sep, bool backslash_escapes) +{ + char *start= to; + for ( ; str != end ; str++) + { +#ifdef USE_MB + int l; + if (cs->use_mb() && (l= my_ismbchar(cs, str, end))) + { + while (l--) + *to++ = *str++; + str--; + continue; + } +#endif + if (backslash_escapes && *str == '\\' && str + 1 != end) + { + switch(*++str) { + case 'n': + *to++='\n'; + break; + case 't': + *to++= '\t'; + break; + case 'r': + *to++ = '\r'; + break; + case 'b': + *to++ = '\b'; + break; + case '0': + *to++= 0; // Ascii null + break; + case 'Z': // ^Z must be escaped on Win32 + *to++='\032'; + break; + case '_': + case '%': + *to++= '\\'; // remember prefix for wildcard + /* Fall through */ + default: + *to++= *str; + break; + } + } + else if (*str == sep) + *to++= *str++; // Two ' or " + else + *to++ = *str; + } + *to= 0; + return to - start; +} + + +size_t +Lex_input_stream::unescape(CHARSET_INFO *cs, char *to, + const char *str, const char *end, + int sep) +{ + return my_unescape(cs, to, str, end, sep, m_thd->backslash_escapes()); +} + + +/* + Return an unescaped text literal without quotes + Fix sometimes to do only one scan of the string +*/ + +bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep, + int pre_skip, int post_skip) +{ + uchar c; + uint found_escape=0; + CHARSET_INFO *cs= m_thd->charset(); + bool is_8bit= false; + + while (! eof()) + { + c= yyGet(); + if (c & 0x80) + is_8bit= true; +#ifdef USE_MB + { + int l; + if (cs->use_mb() && + (l = my_ismbchar(cs, + get_ptr() -1, + get_end_of_query()))) { + skip_binary(l-1); + continue; + } + } +#endif + if (c == '\\' && + !(m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES)) + { // Escaped character + found_escape=1; + if (eof()) + return true; + yySkip(); + } + else if (c == sep) + { + if (c == yyGet()) // Check if two separators in a row + { + found_escape=1; // duplicate. Remember for delete + continue; + } + else + yyUnget(); + + /* Found end. Unescape and return string */ + const char *str, *end; + char *to; + + str= m_tok_start; + end= get_ptr(); + /* Extract the text from the token */ + str += pre_skip; + end -= post_skip; + DBUG_ASSERT(end >= str); + + if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1))) + { + dst->set(&empty_clex_str, 0, '\0'); + return true; // Sql_alloc has set error flag + } + + m_cpp_text_start= m_cpp_tok_start + pre_skip; + m_cpp_text_end= get_cpp_ptr() - post_skip; + + if (!found_escape) + { + size_t len= (end - str); + memcpy(to, str, len); + to[len]= '\0'; + dst->set(to, len, is_8bit, '\0'); + } + else + { + size_t len= unescape(cs, to, str, end, sep); + dst->set(to, len, is_8bit, '\0'); + } + return false; + } + } + return true; // unexpected end of query +} + + +/* +** Calc type of integer; long integer, longlong integer or real. +** Returns smallest type that match the string. +** When using unsigned long long values the result is converted to a real +** because else they will be unexpected sign changes because all calculation +** is done with longlong or double. +*/ + +static const char *long_str="2147483647"; +static const uint long_len=10; +static const char *signed_long_str="-2147483648"; +static const char *longlong_str="9223372036854775807"; +static const uint longlong_len=19; +static const char *signed_longlong_str="-9223372036854775808"; +static const uint signed_longlong_len=19; +static const char *unsigned_longlong_str="18446744073709551615"; +static const uint unsigned_longlong_len=20; + +static inline uint int_token(const char *str,uint length) +{ + if (length < long_len) // quick normal case + return NUM; + bool neg=0; + + if (*str == '+') // Remove sign and pre-zeros + { + str++; length--; + } + else if (*str == '-') + { + str++; length--; + neg=1; + } + while (*str == '0' && length) + { + str++; length --; + } + if (length < long_len) + return NUM; + + uint smaller,bigger; + const char *cmp; + if (neg) + { + if (length == long_len) + { + cmp= signed_long_str + 1; + smaller= NUM; // If <= signed_long_str + bigger= LONG_NUM; // If >= signed_long_str + } + else if (length < signed_longlong_len) + return LONG_NUM; + else if (length > signed_longlong_len) + return DECIMAL_NUM; + else + { + cmp= signed_longlong_str + 1; + smaller= LONG_NUM; // If <= signed_longlong_str + bigger=DECIMAL_NUM; + } + } + else + { + if (length == long_len) + { + cmp= long_str; + smaller=NUM; + bigger=LONG_NUM; + } + else if (length < longlong_len) + return LONG_NUM; + else if (length > longlong_len) + { + if (length > unsigned_longlong_len) + return DECIMAL_NUM; + cmp=unsigned_longlong_str; + smaller=ULONGLONG_NUM; + bigger=DECIMAL_NUM; + } + else + { + cmp=longlong_str; + smaller=LONG_NUM; + bigger= ULONGLONG_NUM; + } + } + while (*cmp && *cmp++ == *str++) ; + return ((uchar) str[-1] <= (uchar) cmp[-1]) ? smaller : bigger; +} + + +/** + Given a stream that is advanced to the first contained character in + an open comment, consume the comment. Optionally, if we are allowed, + recurse so that we understand comments within this current comment. + + At this level, we do not support version-condition comments. We might + have been called with having just passed one in the stream, though. In + that case, we probably want to tolerate mundane comments inside. Thus, + the case for recursion. + + @retval Whether EOF reached before comment is closed. +*/ +bool Lex_input_stream::consume_comment(int remaining_recursions_permitted) +{ + // only one level of nested comments are allowed + DBUG_ASSERT(remaining_recursions_permitted == 0 || + remaining_recursions_permitted == 1); + uchar c; + while (!eof()) + { + c= yyGet(); + + if (remaining_recursions_permitted == 1) + { + if ((c == '/') && (yyPeek() == '*')) + { + yyUnput('('); // Replace nested "/*..." with "(*..." + yySkip(); // and skip "(" + + yySkip(); /* Eat asterisk */ + if (consume_comment(0)) + return true; + + yyUnput(')'); // Replace "...*/" with "...*)" + yySkip(); // and skip ")" + continue; + } + } + + if (c == '*') + { + if (yyPeek() == '/') + { + yySkip(); // Eat slash + return FALSE; + } + } + + if (c == '\n') + yylineno++; + } + + return TRUE; +} + + +/* + MYSQLlex remember the following states from the following MYSQLlex() + + @param yylval [out] semantic value of the token being parsed (yylval) + @param thd THD + + - MY_LEX_EOQ Found end of query + - MY_LEX_OPERATOR_OR_IDENT Last state was an ident, text or number + (which can't be followed by a signed number) +*/ + +int MYSQLlex(YYSTYPE *yylval, THD *thd) +{ + return thd->m_parser_state->m_lip.lex_token(yylval, thd); +} + + +int ORAlex(YYSTYPE *yylval, THD *thd) +{ + return thd->m_parser_state->m_lip.lex_token(yylval, thd); +} + + +int Lex_input_stream::lex_token(YYSTYPE *yylval, THD *thd) +{ + int token; + const int left_paren= (int) '('; + + if (lookahead_token >= 0) + { + /* + The next token was already parsed in advance, + return it. + */ + token= lookahead_token; + lookahead_token= -1; + *yylval= *(lookahead_yylval); + lookahead_yylval= NULL; + return token; + } + + token= lex_one_token(yylval, thd); + add_digest_token(token, yylval); + + SELECT_LEX *curr_sel= thd->lex->current_select; + + switch(token) { + case WITH: + /* + Parsing 'WITH' 'ROLLUP' or 'WITH' 'CUBE' requires 2 look ups, + which makes the grammar LALR(2). + Replace by a single 'WITH_ROLLUP' or 'WITH_CUBE' token, + to transform the grammar into a LALR(1) grammar, + which sql_yacc.yy can process. + */ + token= lex_one_token(yylval, thd); + add_digest_token(token, yylval); + switch(token) { + case CUBE_SYM: + return WITH_CUBE_SYM; + case ROLLUP_SYM: + return WITH_ROLLUP_SYM; + case SYSTEM: + return WITH_SYSTEM_SYM; + default: + /* + Save the token following 'WITH' + */ + lookahead_yylval= yylval; + lookahead_token= token; + return WITH; + } + break; + case FOR_SYM: + /* + * Additional look-ahead to resolve doubtful cases like: + * SELECT ... FOR UPDATE + * SELECT ... FOR SYSTEM_TIME ... . + */ + token= lex_one_token(yylval, thd); + add_digest_token(token, yylval); + switch(token) { + case SYSTEM_TIME_SYM: + return FOR_SYSTEM_TIME_SYM; + default: + /* + Save the token following 'FOR_SYM' + */ + lookahead_yylval= yylval; + lookahead_token= token; + return FOR_SYM; + } + break; + case VALUES: + if (curr_sel && + (curr_sel->parsing_place == BEFORE_OPT_LIST || + curr_sel->parsing_place == AFTER_LIST)) + { + curr_sel->parsing_place= NO_MATTER; + break; + } + if (curr_sel && + (curr_sel->parsing_place == IN_UPDATE_ON_DUP_KEY || + curr_sel->parsing_place == IN_PART_FUNC)) + return VALUE_SYM; + token= lex_one_token(yylval, thd); + add_digest_token(token, yylval); + switch(token) { + case LESS_SYM: + return VALUES_LESS_SYM; + case IN_SYM: + return VALUES_IN_SYM; + default: + lookahead_yylval= yylval; + lookahead_token= token; + return VALUES; + } + case VALUE_SYM: + if (curr_sel && + (curr_sel->parsing_place == BEFORE_OPT_LIST || + curr_sel->parsing_place == AFTER_LIST)) + { + curr_sel->parsing_place= NO_MATTER; + return VALUES; + } + break; + case PARTITION_SYM: + case SELECT_SYM: + case UNION_SYM: + if (curr_sel && + (curr_sel->parsing_place == BEFORE_OPT_LIST || + curr_sel->parsing_place == AFTER_LIST)) + { + curr_sel->parsing_place= NO_MATTER; + } + break; + case left_paren: + if (!curr_sel || + curr_sel->parsing_place != BEFORE_OPT_LIST) + return token; + token= lex_one_token(yylval, thd); + add_digest_token(token, yylval); + lookahead_yylval= yylval; + yylval= NULL; + lookahead_token= token; + curr_sel->parsing_place= NO_MATTER; + if (token == LIKE) + return LEFT_PAREN_LIKE; + if (token == WITH) + return LEFT_PAREN_WITH; + if (token != left_paren && token != SELECT_SYM && token != VALUES) + return LEFT_PAREN_ALT; + else + return left_paren; + break; + default: + break; + } + return token; +} + + +int Lex_input_stream::lex_one_token(YYSTYPE *yylval, THD *thd) +{ + uchar UNINIT_VAR(c); + bool comment_closed; + int tokval; + uint length; + enum my_lex_states state; + LEX *lex= thd->lex; + CHARSET_INFO *const cs= thd->charset(); + const uchar *const state_map= cs->state_map; + const uchar *const ident_map= cs->ident_map; + + start_token(); + state= next_state; + next_state= MY_LEX_OPERATOR_OR_IDENT; + for (;;) + { + switch (state) { + case MY_LEX_OPERATOR_OR_IDENT: // Next is operator or keyword + case MY_LEX_START: // Start of token + // Skip starting whitespace + while(state_map[c= yyPeek()] == MY_LEX_SKIP) + { + if (c == '\n') + yylineno++; + + yySkip(); + } + + /* Start of real token */ + restart_token(); + c= yyGet(); + state= (enum my_lex_states) state_map[c]; + break; + case MY_LEX_ESCAPE: + if (!eof() && yyGet() == 'N') + { // Allow \N as shortcut for NULL + yylval->lex_str.str= (char*) "\\N"; + yylval->lex_str.length= 2; + return NULL_SYM; + } + /* Fall through */ + case MY_LEX_CHAR: // Unknown or single char token + if (c == '%' && (m_thd->variables.sql_mode & MODE_ORACLE)) + { + next_state= MY_LEX_START; + return PERCENT_ORACLE_SYM; + } + if (c == '[' && (m_thd->variables.sql_mode & MODE_MSSQL)) + return scan_ident_delimited(thd, &yylval->ident_cli, ']'); + /* Fall through */ + case MY_LEX_SKIP: // This should not happen + if (c != ')') + next_state= MY_LEX_START; // Allow signed numbers + yylval->kwd.set_keyword(m_tok_start, 1); + return((int) c); + + case MY_LEX_MINUS_OR_COMMENT: + if (yyPeek() == '-' && + (my_isspace(cs,yyPeekn(1)) || + my_iscntrl(cs,yyPeekn(1)))) + { + state=MY_LEX_COMMENT; + break; + } + next_state= MY_LEX_START; // Allow signed numbers + return((int) c); + + case MY_LEX_PLACEHOLDER: + /* + Check for a placeholder: it should not precede a possible identifier + because of binlogging: when a placeholder is replaced with + its value in a query for the binlog, the query must stay + grammatically correct. + */ + next_state= MY_LEX_START; // Allow signed numbers + if (stmt_prepare_mode && !ident_map[(uchar) yyPeek()]) + return(PARAM_MARKER); + return((int) c); + + case MY_LEX_COMMA: + next_state= MY_LEX_START; // Allow signed numbers + /* + Warning: + This is a work around, to make the "remember_name" rule in + sql/sql_yacc.yy work properly. + The problem is that, when parsing "select expr1, expr2", + the code generated by bison executes the *pre* action + remember_name (see select_item) *before* actually parsing the + first token of expr2. + */ + restart_token(); + return((int) c); + + case MY_LEX_IDENT_OR_NCHAR: + { + uint sep; + if (yyPeek() != '\'') + { + state= MY_LEX_IDENT; + break; + } + /* Found N'string' */ + yySkip(); // Skip ' + if (get_text(&yylval->lex_string_with_metadata, (sep= yyGetLast()), 2, 1)) + { + state= MY_LEX_CHAR; // Read char by char + break; + } + + body_utf8_append(m_cpp_text_start); + body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, + national_charset_info, + m_cpp_text_end, sep); + return(NCHAR_STRING); + } + case MY_LEX_IDENT_OR_HEX: + if (yyPeek() == '\'') + { // Found x'hex-number' + state= MY_LEX_HEX_NUMBER; + break; + } + /* fall through */ + case MY_LEX_IDENT_OR_BIN: + if (yyPeek() == '\'') + { // Found b'bin-number' + state= MY_LEX_BIN_NUMBER; + break; + } + /* fall through */ + case MY_LEX_IDENT: + { + tokval= scan_ident_middle(thd, &yylval->ident_cli, + &yylval->charset, &state); + if (!tokval) + continue; + if (tokval == UNDERSCORE_CHARSET) + m_underscore_cs= yylval->charset; + return tokval; + } + + case MY_LEX_IDENT_SEP: // Found ident and now '.' + yylval->lex_str.str= (char*) get_ptr(); + yylval->lex_str.length= 1; + c= yyGet(); // should be '.' + if (lex->parsing_options.lookup_keywords_after_qualifier) + next_state= MY_LEX_IDENT_OR_KEYWORD; + else + next_state= MY_LEX_IDENT_START; // Next is ident (not keyword) + if (!ident_map[(uchar) yyPeek()]) // Probably ` or " + next_state= MY_LEX_START; + return((int) c); + + case MY_LEX_NUMBER_IDENT: // number or ident which num-start + if (yyGetLast() == '0') + { + c= yyGet(); + if (c == 'x') + { + while (my_isxdigit(cs, (c = yyGet()))) ; + if ((yyLength() >= 3) && !ident_map[c]) + { + /* skip '0x' */ + yylval->lex_str= get_token(2, yyLength() - 2); + return (HEX_NUM); + } + yyUnget(); + state= MY_LEX_IDENT_START; + break; + } + else if (c == 'b') + { + while ((c= yyGet()) == '0' || c == '1') + ; + if ((yyLength() >= 3) && !ident_map[c]) + { + /* Skip '0b' */ + yylval->lex_str= get_token(2, yyLength() - 2); + return (BIN_NUM); + } + yyUnget(); + state= MY_LEX_IDENT_START; + break; + } + yyUnget(); + } + + while (my_isdigit(cs, (c= yyGet()))) ; + if (!ident_map[c]) + { // Can't be identifier + state=MY_LEX_INT_OR_REAL; + break; + } + if (c == 'e' || c == 'E') + { + // The following test is written this way to allow numbers of type 1e1 + if (my_isdigit(cs, yyPeek()) || + (c=(yyGet())) == '+' || c == '-') + { // Allow 1E+10 + if (my_isdigit(cs, yyPeek())) // Number must have digit after sign + { + yySkip(); + while (my_isdigit(cs, yyGet())) ; + yylval->lex_str= get_token(0, yyLength()); + return(FLOAT_NUM); + } + } + /* + We've found: + - A sequence of digits + - Followed by 'e' or 'E' + - Followed by some byte XX which is not a known mantissa start, + and it's known to be a valid identifier part. + XX can be either a 8bit identifier character, or a multi-byte head. + */ + yyUnget(); + return scan_ident_start(thd, &yylval->ident_cli); + } + /* + We've found: + - A sequence of digits + - Followed by some character XX, which is neither 'e' nor 'E', + and it's known to be a valid identifier part. + XX can be a 8bit identifier character, or a multi-byte head. + */ + yyUnget(); + return scan_ident_start(thd, &yylval->ident_cli); + + case MY_LEX_IDENT_START: // We come here after '.' + return scan_ident_start(thd, &yylval->ident_cli); + + case MY_LEX_USER_VARIABLE_DELIMITER: // Found quote char + return scan_ident_delimited(thd, &yylval->ident_cli, m_tok_start[0]); + + case MY_LEX_INT_OR_REAL: // Complete int or incomplete real + if (c != '.' || yyPeek() == '.') + { + /* + Found a complete integer number: + - the number is either not followed by a dot at all, or + - the number is followed by a double dot as in: FOR i IN 1..10 + */ + yylval->lex_str= get_token(0, yyLength()); + return int_token(yylval->lex_str.str, (uint) yylval->lex_str.length); + } + // fall through + case MY_LEX_REAL: // Incomplete real number + while (my_isdigit(cs, c= yyGet())) ; + + if (c == 'e' || c == 'E') + { + c= yyGet(); + if (c == '-' || c == '+') + c= yyGet(); // Skip sign + if (!my_isdigit(cs, c)) + return ABORT_SYM; // No digit after sign + while (my_isdigit(cs, yyGet())) ; + yylval->lex_str= get_token(0, yyLength()); + return(FLOAT_NUM); + } + yylval->lex_str= get_token(0, yyLength()); + return(DECIMAL_NUM); + + case MY_LEX_HEX_NUMBER: // Found x'hexstring' + yySkip(); // Accept opening ' + while (my_isxdigit(cs, (c= yyGet()))) ; + if (c != '\'') + return(ABORT_SYM); // Illegal hex constant + yySkip(); // Accept closing ' + length= yyLength(); // Length of hexnum+3 + if ((length % 2) == 0) + return(ABORT_SYM); // odd number of hex digits + yylval->lex_str= get_token(2, // skip x' + length - 3); // don't count x' and last ' + return HEX_STRING; + + case MY_LEX_BIN_NUMBER: // Found b'bin-string' + yySkip(); // Accept opening ' + while ((c= yyGet()) == '0' || c == '1') + ; + if (c != '\'') + return(ABORT_SYM); // Illegal hex constant + yySkip(); // Accept closing ' + length= yyLength(); // Length of bin-num + 3 + yylval->lex_str= get_token(2, // skip b' + length - 3); // don't count b' and last ' + return (BIN_NUM); + + case MY_LEX_CMP_OP: // Incomplete comparison operator + next_state= MY_LEX_START; // Allow signed numbers + if (state_map[(uchar) yyPeek()] == MY_LEX_CMP_OP || + state_map[(uchar) yyPeek()] == MY_LEX_LONG_CMP_OP) + { + yySkip(); + if ((tokval= find_keyword(&yylval->kwd, 2, 0))) + return(tokval); + yyUnget(); + } + return(c); + + case MY_LEX_LONG_CMP_OP: // Incomplete comparison operator + next_state= MY_LEX_START; + if (state_map[(uchar) yyPeek()] == MY_LEX_CMP_OP || + state_map[(uchar) yyPeek()] == MY_LEX_LONG_CMP_OP) + { + yySkip(); + if (state_map[(uchar) yyPeek()] == MY_LEX_CMP_OP) + { + yySkip(); + if ((tokval= find_keyword(&yylval->kwd, 3, 0))) + return(tokval); + yyUnget(); + } + if ((tokval= find_keyword(&yylval->kwd, 2, 0))) + return(tokval); + yyUnget(); + } + return(c); + + case MY_LEX_BOOL: + if (c != yyPeek()) + { + state= MY_LEX_CHAR; + break; + } + yySkip(); + tokval= find_keyword(&yylval->kwd, 2, 0); // Is a bool operator + next_state= MY_LEX_START; // Allow signed numbers + return(tokval); + + case MY_LEX_STRING_OR_DELIMITER: + if (thd->variables.sql_mode & MODE_ANSI_QUOTES) + { + state= MY_LEX_USER_VARIABLE_DELIMITER; + break; + } + /* " used for strings */ + /* fall through */ + case MY_LEX_STRING: // Incomplete text string + { + uint sep; + if (get_text(&yylval->lex_string_with_metadata, (sep= yyGetLast()), 1, 1)) + { + state= MY_LEX_CHAR; // Read char by char + break; + } + CHARSET_INFO *strcs= m_underscore_cs ? m_underscore_cs : cs; + body_utf8_append(m_cpp_text_start); + + body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, + strcs, m_cpp_text_end, sep); + m_underscore_cs= NULL; + return(TEXT_STRING); + } + case MY_LEX_COMMENT: // Comment + lex->lex_options|= OPTION_LEX_FOUND_COMMENT; + while ((c= yyGet()) != '\n' && c) ; + yyUnget(); // Safety against eof + state= MY_LEX_START; // Try again + break; + case MY_LEX_LONG_COMMENT: // Long C comment? + if (yyPeek() != '*') + { + state= MY_LEX_CHAR; // Probable division + break; + } + lex->lex_options|= OPTION_LEX_FOUND_COMMENT; + /* Reject '/' '*', since we might need to turn off the echo */ + yyUnget(); + + save_in_comment_state(); + + if (yyPeekn(2) == '!' || + (yyPeekn(2) == 'M' && yyPeekn(3) == '!')) + { + bool maria_comment_syntax= yyPeekn(2) == 'M'; + in_comment= DISCARD_COMMENT; + /* Accept '/' '*' '!', but do not keep this marker. */ + set_echo(FALSE); + yySkipn(maria_comment_syntax ? 4 : 3); + + /* + The special comment format is very strict: + '/' '*' '!', followed by an optional 'M' and exactly + 1-2 digits (major), 2 digits (minor), then 2 digits (dot). + 32302 -> 3.23.02 + 50032 -> 5.0.32 + 50114 -> 5.1.14 + 100000 -> 10.0.0 + */ + if ( my_isdigit(cs, yyPeekn(0)) + && my_isdigit(cs, yyPeekn(1)) + && my_isdigit(cs, yyPeekn(2)) + && my_isdigit(cs, yyPeekn(3)) + && my_isdigit(cs, yyPeekn(4)) + ) + { + ulong version; + uint length= 5; + char *end_ptr= (char*) get_ptr() + length; + int error; + if (my_isdigit(cs, yyPeekn(5))) + { + end_ptr++; // 6 digit number + length++; + } + + version= (ulong) my_strtoll10(get_ptr(), &end_ptr, &error); + + /* + MySQL-5.7 has new features and might have new SQL syntax that + MariaDB-10.0 does not understand. Ignore all versioned comments + with MySQL versions in the range 50700-999999, but + do not ignore MariaDB specific comments for the same versions. + */ + if (version <= MYSQL_VERSION_ID && + (version < 50700 || version > 99999 || maria_comment_syntax)) + { + /* Accept 'M' 'm' 'm' 'd' 'd' */ + yySkipn(length); + /* Expand the content of the special comment as real code */ + set_echo(TRUE); + state=MY_LEX_START; + break; /* Do not treat contents as a comment. */ + } + else + { +#ifdef WITH_WSREP + if (WSREP(thd) && version == 99997 && wsrep_thd_is_local(thd)) + { + WSREP_DEBUG("consistency check: %s", thd->query()); + thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED; + yySkipn(5); + set_echo(TRUE); + state= MY_LEX_START; + break; /* Do not treat contents as a comment. */ + } +#endif /* WITH_WSREP */ + /* + Patch and skip the conditional comment to avoid it + being propagated infinitely (eg. to a slave). + */ + char *pcom= yyUnput(' '); + comment_closed= ! consume_comment(1); + if (! comment_closed) + { + *pcom= '!'; + } + /* version allowed to have one level of comment inside. */ + } + } + else + { + /* Not a version comment. */ + state=MY_LEX_START; + set_echo(TRUE); + break; + } + } + else + { + in_comment= PRESERVE_COMMENT; + yySkip(); // Accept / + yySkip(); // Accept * + comment_closed= ! consume_comment(0); + /* regular comments can have zero comments inside. */ + } + /* + Discard: + - regular '/' '*' comments, + - special comments '/' '*' '!' for a future version, + by scanning until we find a closing '*' '/' marker. + + Nesting regular comments isn't allowed. The first + '*' '/' returns the parser to the previous state. + + /#!VERSI oned containing /# regular #/ is allowed #/ + + Inside one versioned comment, another versioned comment + is treated as a regular discardable comment. It gets + no special parsing. + */ + + /* Unbalanced comments with a missing '*' '/' are a syntax error */ + if (! comment_closed) + return (ABORT_SYM); + state = MY_LEX_START; // Try again + restore_in_comment_state(); + break; + case MY_LEX_END_LONG_COMMENT: + if ((in_comment != NO_COMMENT) && yyPeek() == '/') + { + /* Reject '*' '/' */ + yyUnget(); + /* Accept '*' '/', with the proper echo */ + set_echo(in_comment == PRESERVE_COMMENT); + yySkipn(2); + /* And start recording the tokens again */ + set_echo(TRUE); + in_comment= NO_COMMENT; + state=MY_LEX_START; + } + else + state= MY_LEX_CHAR; // Return '*' + break; + case MY_LEX_SET_VAR: // Check if ':=' + if (yyPeek() != '=') + { + next_state= MY_LEX_START; + if (m_thd->variables.sql_mode & MODE_ORACLE) + { + yylval->kwd.set_keyword(m_tok_start, 1); + return COLON_ORACLE_SYM; + } + return (int) ':'; + } + yySkip(); + return (SET_VAR); + case MY_LEX_SEMICOLON: // optional line terminator + state= MY_LEX_CHAR; // Return ';' + break; + case MY_LEX_EOL: + if (eof()) + { + yyUnget(); // Reject the last '\0' + set_echo(FALSE); + yySkip(); + set_echo(TRUE); + /* Unbalanced comments with a missing '*' '/' are a syntax error */ + if (in_comment != NO_COMMENT) + return (ABORT_SYM); + next_state= MY_LEX_END; // Mark for next loop + return(END_OF_INPUT); + } + state=MY_LEX_CHAR; + break; + case MY_LEX_END: + next_state= MY_LEX_END; + return(0); // We found end of input last time + + /* Actually real shouldn't start with . but allow them anyhow */ + case MY_LEX_REAL_OR_POINT: + if (my_isdigit(cs, (c= yyPeek()))) + state = MY_LEX_REAL; // Real + else if (c == '.') + { + yySkip(); + return DOT_DOT_SYM; + } + else + { + state= MY_LEX_IDENT_SEP; // return '.' + yyUnget(); // Put back '.' + } + break; + case MY_LEX_USER_END: // end '@' of user@hostname + switch (state_map[(uchar) yyPeek()]) { + case MY_LEX_STRING: + case MY_LEX_USER_VARIABLE_DELIMITER: + case MY_LEX_STRING_OR_DELIMITER: + break; + case MY_LEX_USER_END: + next_state= MY_LEX_SYSTEM_VAR; + break; + default: + next_state= MY_LEX_HOSTNAME; + break; + } + yylval->lex_str.str= (char*) get_ptr() - 1; + yylval->lex_str.length= 1; + return((int) '@'); + case MY_LEX_HOSTNAME: // end '@' of user@hostname + for (c= yyGet() ; + my_isalnum(cs, c) || c == '.' || c == '_' || c == '$'; + c= yyGet()) ; + yylval->lex_str= get_token(0, yyLength()); + return(LEX_HOSTNAME); + case MY_LEX_SYSTEM_VAR: + yylval->lex_str.str= (char*) get_ptr(); + yylval->lex_str.length= 1; + yySkip(); // Skip '@' + next_state= (state_map[(uchar) yyPeek()] == + MY_LEX_USER_VARIABLE_DELIMITER ? + MY_LEX_OPERATOR_OR_IDENT : + MY_LEX_IDENT_OR_KEYWORD); + return((int) '@'); + case MY_LEX_IDENT_OR_KEYWORD: + /* + We come here when we have found two '@' in a row. + We should now be able to handle: + [(global | local | session) .]variable_name + */ + return scan_ident_sysvar(thd, &yylval->ident_cli); + } + } +} + + +bool Lex_input_stream::get_7bit_or_8bit_ident(THD *thd, uchar *last_char) +{ + uchar c; + CHARSET_INFO *const cs= thd->charset(); + const uchar *const ident_map= cs->ident_map; + bool is_8bit= false; + for ( ; ident_map[c= yyGet()]; ) + { + if (c & 0x80) + is_8bit= true; // will convert + } + *last_char= c; + return is_8bit; +} + + +int Lex_input_stream::scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str) +{ + uchar last_char; + uint length; + int tokval; + bool is_8bit; + DBUG_ASSERT(m_tok_start == m_ptr); + + is_8bit= get_7bit_or_8bit_ident(thd, &last_char); + + if (last_char == '.') + next_state= MY_LEX_IDENT_SEP; + if (!(length= yyLength())) + return ABORT_SYM; // Names must be nonempty. + if ((tokval= find_keyword(str, length, 0))) + { + yyUnget(); // Put back 'c' + return tokval; // Was keyword + } + + yyUnget(); // ptr points now after last token char + str->set_ident(m_tok_start, length, is_8bit); + + m_cpp_text_start= m_cpp_tok_start; + m_cpp_text_end= m_cpp_text_start + length; + body_utf8_append(m_cpp_text_start); + body_utf8_append_ident(thd, str, m_cpp_text_end); + + return is_8bit ? IDENT_QUOTED : IDENT; +} + + +/* + We can come here if different parsing stages: + - In an identifier chain: + SELECT t1.cccc FROM t1; + (when the "cccc" part starts) + In this case both m_tok_start and m_ptr point to "cccc". + - When a sequence of digits has changed to something else, + therefore the token becomes an identifier rather than a number: + SELECT 12345_6 FROM t1; + In this case m_tok_start points to the entire "12345_678", + while m_ptr points to "678". +*/ +int Lex_input_stream::scan_ident_start(THD *thd, Lex_ident_cli_st *str) +{ + uchar c; + bool is_8bit; + CHARSET_INFO *const cs= thd->charset(); + const uchar *const ident_map= cs->ident_map; + DBUG_ASSERT(m_tok_start <= m_ptr); + + if (cs->use_mb()) + { + is_8bit= true; + while (ident_map[c= yyGet()]) + { + int char_length= cs->charlen(get_ptr() - 1, get_end_of_query()); + if (char_length <= 0) + break; + skip_binary(char_length - 1); + } + } + else + { + is_8bit= get_7bit_or_8bit_ident(thd, &c); + } + if (c == '.' && ident_map[(uchar) yyPeek()]) + next_state= MY_LEX_IDENT_SEP;// Next is '.' + + uint length= yyLength(); + yyUnget(); // ptr points now after last token char + str->set_ident(m_tok_start, length, is_8bit); + m_cpp_text_start= m_cpp_tok_start; + m_cpp_text_end= m_cpp_text_start + length; + body_utf8_append(m_cpp_text_start); + body_utf8_append_ident(thd, str, m_cpp_text_end); + return is_8bit ? IDENT_QUOTED : IDENT; +} + + +int Lex_input_stream::scan_ident_middle(THD *thd, Lex_ident_cli_st *str, + CHARSET_INFO **introducer, + my_lex_states *st) +{ + CHARSET_INFO *const cs= thd->charset(); + const uchar *const ident_map= cs->ident_map; + const uchar *const state_map= cs->state_map; + const char *start; + uint length; + uchar c; + bool is_8bit; + bool resolve_introducer= true; + DBUG_ASSERT(m_ptr == m_tok_start + 1); // m_ptr points to the second byte + + if (cs->use_mb()) + { + is_8bit= true; + int char_length= cs->charlen(get_ptr() - 1, get_end_of_query()); + if (char_length <= 0) + { + *st= MY_LEX_CHAR; + return 0; + } + skip_binary(char_length - 1); + + while (ident_map[c= yyGet()]) + { + char_length= cs->charlen(get_ptr() - 1, get_end_of_query()); + if (char_length <= 0) + break; + if (char_length > 1 || (c & 0x80)) + resolve_introducer= false; + skip_binary(char_length - 1); + } + } + else + { + is_8bit= get_7bit_or_8bit_ident(thd, &c) || (m_tok_start[0] & 0x80); + resolve_introducer= !is_8bit; + } + length= yyLength(); + start= get_ptr(); + if (ignore_space) + { + /* + If we find a space then this can't be an identifier. We notice this + below by checking start != lex->ptr. + */ + for (; state_map[(uchar) c] == MY_LEX_SKIP ; c= yyGet()) + { + if (c == '\n') + yylineno++; + } + } + if (start == get_ptr() && c == '.' && ident_map[(uchar) yyPeek()]) + next_state= MY_LEX_IDENT_SEP; + else + { // '(' must follow directly if function + int tokval; + yyUnget(); + if ((tokval= find_keyword(str, length, c == '('))) + { + next_state= MY_LEX_START; // Allow signed numbers + return(tokval); // Was keyword + } + yySkip(); // next state does a unget + } + + yyUnget(); // ptr points now after last token char + str->set_ident(m_tok_start, length, is_8bit); + m_cpp_text_start= m_cpp_tok_start; + m_cpp_text_end= m_cpp_text_start + length; + + /* + Note: "SELECT _bla AS 'alias'" + _bla should be considered as a IDENT if charset haven't been found. + So we don't use MYF(MY_WME) with get_charset_by_csname to avoid + producing an error. + */ + DBUG_ASSERT(length > 0); + if (resolve_introducer && m_tok_start[0] == '_') + { + ErrConvString csname(str->str + 1, str->length - 1, &my_charset_bin); + myf utf8_flag= thd->get_utf8_flag(); + CHARSET_INFO *cs= get_charset_by_csname(csname.ptr(), + MY_CS_PRIMARY, + MYF(utf8_flag)); + if (cs) + { + body_utf8_append(m_cpp_text_start, m_cpp_tok_start + length); + *introducer= cs; + return UNDERSCORE_CHARSET; + } + } + + body_utf8_append(m_cpp_text_start); + body_utf8_append_ident(thd, str, m_cpp_text_end); + return is_8bit ? IDENT_QUOTED : IDENT; +} + + +int Lex_input_stream::scan_ident_delimited(THD *thd, + Lex_ident_cli_st *str, + uchar quote_char) +{ + CHARSET_INFO *const cs= thd->charset(); + uchar c; + DBUG_ASSERT(m_ptr == m_tok_start + 1); + + for ( ; ; ) + { + if (!(c= yyGet())) + { + /* + End-of-query or straight 0x00 inside a delimited identifier. + Return the quote character, to have the parser fail on syntax error. + */ + m_ptr= (char *) m_tok_start + 1; + if (m_echo) + m_cpp_ptr= (char *) m_cpp_tok_start + 1; + return quote_char; + } + int var_length= cs->charlen(get_ptr() - 1, get_end_of_query()); + if (var_length == 1) + { + if (c == quote_char) + { + if (yyPeek() != quote_char) + break; + c= yyGet(); + continue; + } + } + else if (var_length > 1) + { + skip_binary(var_length - 1); + } + } + + str->set_ident_quoted(m_tok_start + 1, yyLength() - 1, true, quote_char); + yyUnget(); // ptr points now after last token char + + m_cpp_text_start= m_cpp_tok_start + 1; + m_cpp_text_end= m_cpp_text_start + str->length; + + if (c == quote_char) + yySkip(); // Skip end ` + next_state= MY_LEX_START; + body_utf8_append(m_cpp_text_start); + // QQQ: shouldn't it add unescaped version ???? + body_utf8_append_ident(thd, str, m_cpp_text_end); + return IDENT_QUOTED; +} + + +/* + st_select_lex structures initialisations +*/ + +void st_select_lex_node::init_query_common() +{ + options= 0; + set_linkage(UNSPECIFIED_TYPE); + distinct= TRUE; + no_table_names_allowed= 0; + uncacheable= 0; +} + +void st_select_lex_unit::init_query() +{ + init_query_common(); + set_linkage(GLOBAL_OPTIONS_TYPE); + lim.clear(); + union_distinct= 0; + item= 0; + union_result= 0; + table= 0; + fake_select_lex= 0; + saved_fake_select_lex= 0; + item_list.empty(); + found_rows_for_union= 0; + derived= 0; + with_clause= 0; + with_element= 0; + + /* reset all bit fields */ + prepared= 0; + optimized= 0; + optimized_2= 0; + executed= 0; + cleaned= 0; + bag_set_op_optimized= 0; + optimize_started= 0; + have_except_all_or_intersect_all= 0; + with_wrapped_tvc= 0; + is_view= 0; + describe= 0; + cloned_from= 0; + columns_are_renamed= 0; +} + +void st_select_lex::init_query() +{ + init_query_common(); + table_list.empty(); + top_join_list.empty(); + join_list= &top_join_list; + embedding= 0; + leaf_tables_prep.empty(); + leaf_tables.empty(); + item_list.empty(); + fix_after_optimize.empty(); + min_max_opt_list.empty(); + limit_params.clear(); + join= 0; + cur_pos_in_select_list= UNDEF_POS; + having= prep_having= where= prep_where= 0; + cond_pushed_into_where= cond_pushed_into_having= 0; + attach_to_conds.empty(); + olap= UNSPECIFIED_OLAP_TYPE; + + /* reset all bit fields */ + is_item_list_lookup= 0; + have_merged_subqueries= 0; + is_set_query_expr_tail= 0; + with_sum_func= with_rownum= 0; + braces= 0; + automatic_brackets= 0; + having_fix_field= 0; + having_fix_field_for_pushed_cond= 0; + subquery_in_having= 0; + is_item_list_lookup= 0; + with_all_modifier= 0; + is_correlated= 0; + first_natural_join_processing= 1; + first_cond_optimization= 1; + no_wrap_view_item= 0; + exclude_from_table_unique_test= 0; + in_tvc= 0; + skip_locked= 0; + m_non_agg_field_used= 0; + m_agg_func_used= 0; + m_custom_agg_func_used= 0; + is_service_select= 0; + + context.select_lex= this; + context.init(); + cond_count= between_count= with_wild= 0; + max_equal_elems= 0; + ref_pointer_array.reset(); + select_n_where_fields= 0; + order_group_num= 0; + select_n_reserved= 0; + select_n_having_items= 0; + n_sum_items= 0; + n_child_sum_items= 0; + hidden_bit_fields= 0; + fields_in_window_functions= 0; + changed_elements= 0; + parsing_place= NO_MATTER; + save_parsing_place= NO_MATTER; + context_analysis_place= NO_MATTER; + nest_level= 0; + link_next= 0; + prep_leaf_list_state= UNINIT; + bzero((char*) expr_cache_may_be_used, sizeof(expr_cache_may_be_used)); + select_list_tables= 0; + rownum_in_field_list= 0; + + window_specs.empty(); + window_funcs.empty(); + is_win_spec_list_built= false; + tvc= 0; + versioned_tables= 0; + pushdown_select= 0; +} + +void st_select_lex::init_select() +{ + sj_nests.empty(); + sj_subselects.empty(); + group_list.empty(); + if (group_list_ptrs) + group_list_ptrs->clear(); + type= 0; + db= null_clex_str; + having= 0; + table_join_options= 0; + select_lock= select_lock_type::NONE; + in_sum_expr= with_wild= 0; + options= 0; + ftfunc_list_alloc.empty(); + inner_sum_func_list= 0; + ftfunc_list= &ftfunc_list_alloc; + order_list.empty(); + /* Set limit and offset to default values */ + limit_params.clear(); + + /* Reset bit fields */ + is_set_query_expr_tail= 0; + with_sum_func= 0; + with_all_modifier= 0; + is_correlated= 0; + in_tvc= 0; + skip_locked= 0; + m_non_agg_field_used= 0; + m_agg_func_used= 0; + m_custom_agg_func_used= 0; + + cur_pos_in_select_list= UNDEF_POS; + cond_value= having_value= Item::COND_UNDEF; + inner_refs_list.empty(); + insert_tables= 0; + merged_into= 0; + name_visibility_map.clear_all(); + with_dep= 0; + join= 0; + lock_type= TL_READ_DEFAULT; + save_many_values.empty(); + save_insert_list= 0; + tvc= 0; + in_funcs.empty(); + curr_tvc_name= 0; + versioned_tables= 0; + is_tvc_wrapper= false; + nest_flags= 0; +} + +/* + st_select_lex structures linking +*/ + +/* include on level down */ +void st_select_lex_node::include_down(st_select_lex_node *upper) +{ + if ((next= upper->slave)) + next->prev= &next; + prev= &upper->slave; + upper->slave= this; + master= upper; + slave= 0; +} + + +void st_select_lex_node::attach_single(st_select_lex_node *slave_arg) +{ + DBUG_ASSERT(slave == 0); + { + slave= slave_arg; + slave_arg->master= this; + slave->prev= &master->slave; + slave->next= 0; + } +} + +void st_select_lex_node::link_chain_down(st_select_lex_node *first) +{ + st_select_lex_node *last_node; + st_select_lex_node *node= first; + do + { + last_node= node; + node->master= this; + node= node->next; + } while (node); + if ((last_node->next= slave)) + { + slave->prev= &last_node->next; + } + first->prev= &slave; + slave= first; +} + +/* + @brief + Substitute this node in select tree for a newly creates node + + @param subst the node to substitute for + + @details + The function substitute this node in the select tree for a newly + created node subst. This node is just removed from the tree but all + its link fields and the attached sub-tree remain untouched. +*/ + +void st_select_lex_node::substitute_in_tree(st_select_lex_node *subst) +{ + if ((subst->next= next)) + next->prev= &subst->next; + subst->prev= prev; + (*prev)= subst; + subst->master= master; +} + +/* + include on level down (but do not link) + + SYNOPSYS + st_select_lex_node::include_standalone() + upper - reference on node underr which this node should be included + ref - references on reference on this node +*/ +void st_select_lex_node::include_standalone(st_select_lex_node *upper, + st_select_lex_node **ref) +{ + next= 0; + prev= ref; + master= upper; + slave= 0; +} + +/* include neighbour (on same level) */ +void st_select_lex_node::include_neighbour(st_select_lex_node *before) +{ + if ((next= before->next)) + next->prev= &next; + prev= &before->next; + before->next= this; + master= before->master; + slave= 0; +} + +/* including in global SELECT_LEX list */ +void st_select_lex_node::include_global(st_select_lex_node **plink) +{ + if ((link_next= *plink)) + link_next->link_prev= &link_next; + link_prev= plink; + *plink= this; +} + +//excluding from global list (internal function) +void st_select_lex_node::fast_exclude() +{ + if (link_prev) + { + if ((*link_prev= link_next)) + link_next->link_prev= link_prev; + } + // Remove slave structure + for (; slave; slave= slave->next) + slave->fast_exclude(); + + prev= NULL; // to ensure correct behavior of st_select_lex_unit::is_excluded() +} + + +/** + @brief + Insert a new chain of nodes into another chain before a particular link + + @param in/out + ptr_pos_to_insert the address of the chain pointer pointing to the link + before which the subchain has to be inserted + @param + end_chain_node the last link of the subchain to be inserted + + @details + The method inserts the chain of nodes starting from this node and ending + with the node nd_chain_node into another chain of nodes before the node + pointed to by *ptr_pos_to_insert. + It is assumed that ptr_pos_to_insert belongs to the chain where we insert. + So it must be updated. + + @retval + The method returns the pointer to the first link of the inserted chain +*/ + +st_select_lex_node *st_select_lex_node:: insert_chain_before( + st_select_lex_node **ptr_pos_to_insert, + st_select_lex_node *end_chain_node) +{ + end_chain_node->link_next= *ptr_pos_to_insert; + (*ptr_pos_to_insert)->link_prev= &end_chain_node->link_next; + link_prev= ptr_pos_to_insert; + return this; +} + + +/* + Detach the node from its master and attach it to a new master +*/ + +void st_select_lex_node::move_as_slave(st_select_lex_node *new_master) +{ + exclude_from_tree(); + if (new_master->slave) + { + st_select_lex_node *curr= new_master->slave; + for ( ; curr->next ; curr= curr->next) ; + prev= &curr->next; + } + else + prev= &new_master->slave; + *prev= this; + next= 0; + master= new_master; +} + + +/* + Exclude a node from the tree lex structure, but leave it in the global + list of nodes. +*/ + +void st_select_lex_node::exclude_from_tree() +{ + if ((*prev= next)) + next->prev= prev; +} + + +/* + Exclude select_lex structure (except first (first select can't be + deleted, because it is most upper select)) +*/ +void st_select_lex_node::exclude() +{ + /* exclude the node from the tree */ + exclude_from_tree(); + /* + We do not need following statements, because prev pointer of first + list element point to master->slave + if (master->slave == this) + master->slave= next; + */ + /* exclude all nodes under this excluded node */ + fast_exclude(); +} + + +/* + Exclude level of current unit from tree of SELECTs + + SYNOPSYS + st_select_lex_unit::exclude_level() + + NOTE: units which belong to current will be brought up on level of + currernt unit +*/ +void st_select_lex_unit::exclude_level() +{ + SELECT_LEX_UNIT *units= 0, **units_last= &units; + for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + // unlink current level from global SELECTs list + if (sl->link_prev && (*sl->link_prev= sl->link_next)) + sl->link_next->link_prev= sl->link_prev; + + // bring up underlay levels + SELECT_LEX_UNIT **last= 0; + for (SELECT_LEX_UNIT *u= sl->first_inner_unit(); u; u= u->next_unit()) + { + u->master= master; + last= (SELECT_LEX_UNIT**)&(u->next); + } + if (last) + { + (*units_last)= sl->first_inner_unit(); + units_last= last; + } + } + if (units) + { + // include brought up levels in place of current + (*prev)= units; + (*units_last)= (SELECT_LEX_UNIT*)next; + if (next) + next->prev= (SELECT_LEX_NODE**)units_last; + units->prev= prev; + } + else + { + // exclude currect unit from list of nodes + (*prev)= next; + if (next) + next->prev= prev; + } + // Mark it excluded + prev= NULL; +} + + +#if 0 +/* + Exclude subtree of current unit from tree of SELECTs + + SYNOPSYS + st_select_lex_unit::exclude_tree() +*/ +void st_select_lex_unit::exclude_tree() +{ + for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + // unlink current level from global SELECTs list + if (sl->link_prev && (*sl->link_prev= sl->link_next)) + sl->link_next->link_prev= sl->link_prev; + + // unlink underlay levels + for (SELECT_LEX_UNIT *u= sl->first_inner_unit(); u; u= u->next_unit()) + { + u->exclude_level(); + } + } + // exclude currect unit from list of nodes + (*prev)= next; + if (next) + next->prev= prev; +} +#endif + + +/* + st_select_lex_node::mark_as_dependent mark all st_select_lex struct from + this to 'last' as dependent + + SYNOPSIS + last - pointer to last st_select_lex struct, before which all + st_select_lex have to be marked as dependent + + NOTE + 'last' should be reachable from this st_select_lex_node +*/ + +bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, + Item_ident *dependency) +{ + + DBUG_ASSERT(this != last); + + /* + Mark all selects from resolved to 1 before select where was + found table as depended (of select where was found table) + + We move by name resolution context, bacause during merge can some select + be excleded from SELECT tree + */ + Name_resolution_context *c= &this->context; + do + { + SELECT_LEX *s= c->select_lex; + if (!(s->uncacheable & UNCACHEABLE_DEPENDENT_GENERATED)) + { + // Select is dependent of outer select + s->uncacheable= (s->uncacheable & ~UNCACHEABLE_UNITED) | + UNCACHEABLE_DEPENDENT_GENERATED; + SELECT_LEX_UNIT *munit= s->master_unit(); + munit->uncacheable= (munit->uncacheable & ~UNCACHEABLE_UNITED) | + UNCACHEABLE_DEPENDENT_GENERATED; + for (SELECT_LEX *sl= munit->first_select(); sl ; sl= sl->next_select()) + { + if (sl != s && + !(sl->uncacheable & (UNCACHEABLE_DEPENDENT_GENERATED | + UNCACHEABLE_UNITED))) + sl->uncacheable|= UNCACHEABLE_UNITED; + } + } + + Item_subselect *subquery_expr= s->master_unit()->item; + if (subquery_expr && subquery_expr->mark_as_dependent(thd, last, + dependency)) + return TRUE; + } while ((c= c->outer_context) != NULL && (c->select_lex != last)); + is_correlated= TRUE; + master_unit()->item->is_correlated= TRUE; + return FALSE; +} + +/* + prohibit using LIMIT clause +*/ +bool st_select_lex::test_limit() +{ + if (limit_params.select_limit) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "LIMIT & IN/ALL/ANY/SOME subquery"); + return(1); + } + return(0); +} + + + +st_select_lex* st_select_lex_unit::outer_select() const +{ + return (st_select_lex*) master; +} + + +ha_rows st_select_lex::get_offset() +{ + ha_rows val= 0; + + Item *offset_limit= limit_params.offset_limit; + if (offset_limit) + { + // see comment for st_select_lex::get_limit() + bool err= offset_limit->fix_fields_if_needed(master_unit()->thd, NULL); + DBUG_ASSERT(!err); + val= err ? HA_POS_ERROR : (ha_rows)offset_limit->val_uint(); + } + + return val; +} + + +ha_rows st_select_lex::get_limit() +{ + ha_rows val= HA_POS_ERROR; + + Item *select_limit= limit_params.select_limit; + if (select_limit) + { + /* + fix_fields() has not been called for select_limit. That's due to the + historical reasons -- this item could be only of type Item_int, and + Item_int does not require fix_fields(). Thus, fix_fields() was never + called for select_limit. + + Some time ago, Item_splocal was also allowed for LIMIT / OFFSET clauses. + However, the fix_fields() behavior was not updated, which led to a crash + in some cases. + + There is no single place where to call fix_fields() for LIMIT / OFFSET + items during the fix-fields-phase. Thus, for the sake of readability, + it was decided to do it here, on the evaluation phase (which is a + violation of design, but we chose the lesser of two evils). + + We can call fix_fields() here, because select_limit can be of two + types only: Item_int and Item_splocal. Item_int::fix_fields() is trivial, + and Item_splocal::fix_fields() (or rather Item_sp_variable::fix_fields()) + has the following properties: + 1) it does not affect other items; + 2) it does not fail. + + Nevertheless DBUG_ASSERT was added to catch future changes in + fix_fields() implementation. Also added runtime check against a result + of fix_fields() in order to handle error condition in non-debug build. + */ + bool err= select_limit->fix_fields_if_needed(master_unit()->thd, NULL); + DBUG_ASSERT(!err); + val= err ? HA_POS_ERROR : (ha_rows) select_limit->val_uint(); + } + + return val; +} + + +bool st_select_lex::add_order_to_list(THD *thd, Item *item, bool asc) +{ + return add_to_list(thd, order_list, item, asc); +} + + +bool st_select_lex::add_gorder_to_list(THD *thd, Item *item, bool asc) +{ + return add_to_list(thd, gorder_list, item, asc); +} + + +bool st_select_lex::add_item_to_list(THD *thd, Item *item) +{ + DBUG_ENTER("st_select_lex::add_item_to_list"); + DBUG_PRINT("info", ("Item: %p", item)); + DBUG_RETURN(item_list.push_back(item, thd->mem_root)); +} + + +bool st_select_lex::add_group_to_list(THD *thd, Item *item, bool asc) +{ + return add_to_list(thd, group_list, item, asc); +} + + +bool st_select_lex::add_ftfunc_to_list(THD *thd, Item_func_match *func) +{ + return !func || ftfunc_list->push_back(func, thd->mem_root); // end of memory? +} + + +st_select_lex* st_select_lex::outer_select() +{ + return (st_select_lex*) master->get_master(); +} + + +bool st_select_lex::inc_in_sum_expr() +{ + in_sum_expr++; + return 0; +} + + +uint st_select_lex::get_in_sum_expr() +{ + return in_sum_expr; +} + + +TABLE_LIST* st_select_lex::get_table_list() +{ + return table_list.first; +} + +List* st_select_lex::get_item_list() +{ + return &item_list; +} + + +uint st_select_lex::get_cardinality_of_ref_ptrs_slice(uint order_group_num_arg) +{ + if (!((options & SELECT_DISTINCT) && !group_list.elements)) + hidden_bit_fields= 0; + + if (!order_group_num) + order_group_num= order_group_num_arg; + + /* + find_order_in_list() may need some extra space, + so multiply order_group_num by 2 + */ + uint n= n_sum_items + + n_child_sum_items + + item_list.elements + + select_n_reserved + + select_n_having_items + + select_n_where_fields + + order_group_num * 2 + + hidden_bit_fields + + fields_in_window_functions; + return n; +} + + +bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num) +{ + uint n_elems= get_cardinality_of_ref_ptrs_slice(order_group_num) * 5; + if (!ref_pointer_array.is_null()) + return false; + Item **array= static_cast(thd->stmt_arena->alloc(sizeof(Item*) * + n_elems)); + if (likely(array != NULL)) + ref_pointer_array= Ref_ptr_array(array, n_elems); + return array == NULL; +} + + +/* + @brief + Print the whole statement + + @param str Print into this string + @param query_type Flags describing how to print + + @detail + The intent is to allow to eventually print back any query. + + This is useful e.g. for storage engines that take over diferrent kinds of + queries +*/ + +void LEX::print(String *str, enum_query_type query_type) +{ + if (sql_command == SQLCOM_UPDATE) + { + SELECT_LEX *sel= first_select_lex(); + str->append(STRING_WITH_LEN("UPDATE ")); + if (ignore) + str->append(STRING_WITH_LEN("IGNORE ")); + // table name. If the query was using a view, we need + // the underlying table name, not the view name + TABLE_LIST *base_tbl= query_tables->table->pos_in_table_list; + base_tbl->print(thd, table_map(0), str, query_type); + str->append(STRING_WITH_LEN(" SET ")); + // print item assignments + List_iterator it(sel->item_list); + List_iterator it2(value_list); + Item *col_ref, *value; + bool first= true; + while ((col_ref= it++) && (value= it2++)) + { + if (first) + first= false; + else + str->append(STRING_WITH_LEN(", ")); + col_ref->print(str, query_type); + str->append(STRING_WITH_LEN("=")); + value->print(str, query_type); + } + + if (sel->where) + { + str->append(STRING_WITH_LEN(" WHERE ")); + sel->where->print(str, query_type); + } + + if (sel->order_list.elements) + { + str->append(STRING_WITH_LEN(" ORDER BY ")); + for (ORDER *ord= sel->order_list.first; ord; ord= ord->next) + { + if (ord != sel->order_list.first) + str->append(STRING_WITH_LEN(", ")); + (*ord->item)->print(str, query_type); + } + } + if (sel->limit_params.select_limit) + { + str->append(STRING_WITH_LEN(" LIMIT ")); + sel->limit_params.select_limit->print(str, query_type); + } + } + else if (sql_command == SQLCOM_DELETE) + { + SELECT_LEX *sel= first_select_lex(); + str->append(STRING_WITH_LEN("DELETE ")); + if (ignore) + str->append(STRING_WITH_LEN("IGNORE ")); + + str->append(STRING_WITH_LEN("FROM ")); + // table name. If the query was using a view, we need + // the underlying table name, not the view name + TABLE_LIST *base_tbl= query_tables->table->pos_in_table_list; + base_tbl->print(thd, table_map(0), str, query_type); + + if (sel->where) + { + str->append(STRING_WITH_LEN(" WHERE ")); + sel->where->print(str, query_type); + } + + if (sel->order_list.elements) + { + str->append(STRING_WITH_LEN(" ORDER BY ")); + for (ORDER *ord= sel->order_list.first; ord; ord= ord->next) + { + if (ord != sel->order_list.first) + str->append(STRING_WITH_LEN(", ")); + (*ord->item)->print(str, query_type); + } + } + if (sel->limit_params.select_limit) + { + str->append(STRING_WITH_LEN(" LIMIT ")); + sel->limit_params.select_limit->print(str, query_type); + } + } + else + DBUG_ASSERT(0); // Not implemented yet +} + +void st_select_lex_unit::print(String *str, enum_query_type query_type) +{ + if (with_clause) + with_clause->print(thd, str, query_type); + for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + if (sl != first_select()) + { + switch (sl->linkage) + { + default: + DBUG_ASSERT(0); + /* fall through */ + case UNION_TYPE: + str->append(STRING_WITH_LEN(" union ")); + break; + case INTERSECT_TYPE: + str->append(STRING_WITH_LEN(" intersect ")); + break; + case EXCEPT_TYPE: + str->append(STRING_WITH_LEN(" except ")); + break; + } + if (!sl->distinct) + str->append(STRING_WITH_LEN("all ")); + } + if (sl->braces) + str->append('('); + sl->print(thd, str, query_type); + if (sl->braces) + str->append(')'); + } + if (fake_select_lex) + { + if (fake_select_lex->order_list.elements) + { + str->append(STRING_WITH_LEN(" order by ")); + fake_select_lex->print_order(str, + fake_select_lex->order_list.first, + query_type); + } + fake_select_lex->print_limit(thd, str, query_type); + } + else if (saved_fake_select_lex) + saved_fake_select_lex->print_limit(thd, str, query_type); +} + + +void st_select_lex::print_order(String *str, + ORDER *order, + enum_query_type query_type) +{ + for (; order; order= order->next) + { + if (order->counter_used) + { + char buffer[20]; + size_t length= my_snprintf(buffer, 20, "%d", order->counter); + str->append(buffer, (uint) length); + } + else + { + /* replace numeric reference with equivalent for ORDER constant */ + if (order->item[0]->is_order_clause_position()) + { + /* make it expression instead of integer constant */ + str->append(STRING_WITH_LEN("''")); + } + else + (*order->item)->print(str, query_type); + } + if (order->direction == ORDER::ORDER_DESC) + str->append(STRING_WITH_LEN(" desc")); + if (order->next) + str->append(','); + } +} + + +void st_select_lex::print_limit(THD *thd, + String *str, + enum_query_type query_type) +{ + SELECT_LEX_UNIT *unit= master_unit(); + Item_subselect *item= unit->item; + + if (item && unit->global_parameters() == this) + { + Item_subselect::subs_type subs_type= item->substype(); + if (subs_type == Item_subselect::IN_SUBS || + subs_type == Item_subselect::ALL_SUBS) + { + return; + } + } + if (limit_params.explicit_limit && + limit_params.select_limit) + { + /* + [OFFSET n] + FETCH FIRST n ROWS WITH TIES + + For FETCH FIRST n ROWS ONLY we fall back to the "limit" specification + as it's identical. + */ + if (limit_params.with_ties) + { + if (limit_params.offset_limit) + { + str->append(STRING_WITH_LEN(" offset ")); + limit_params.offset_limit->print(str, query_type); + str->append(STRING_WITH_LEN(" rows ")); + } + str->append(STRING_WITH_LEN(" fetch first ")); + limit_params.select_limit->print(str, query_type); + str->append(STRING_WITH_LEN(" rows with ties")); + } + else + { + str->append(STRING_WITH_LEN(" limit ")); + if (limit_params.offset_limit) + { + limit_params.offset_limit->print(str, query_type); + str->append(','); + } + limit_params.select_limit->print(str, query_type); + } + } +} + + +/** + @brief Restore the LEX and THD in case of a parse error. + + This is a clean up call that is invoked by the Bison generated + parser before returning an error from MYSQLparse. If your + semantic actions manipulate with the global thread state (which + is a very bad practice and should not normally be employed) and + need a clean-up in case of error, and you can not use %destructor + rule in the grammar file itself, this function should be used + to implement the clean up. +*/ + +void LEX::cleanup_lex_after_parse_error(THD *thd) +{ + /* + Delete sphead for the side effect of restoring of the original + LEX state, thd->lex, thd->mem_root and thd->free_list if they + were replaced when parsing stored procedure statements. We + will never use sphead object after a parse error, so it's okay + to delete it only for the sake of the side effect. + TODO: make this functionality explicit in sp_head class. + Sic: we must nullify the member of the main lex, not the + current one that will be thrown away + */ + if (thd->lex->sphead) + { + sp_package *pkg; + thd->lex->sphead->restore_thd_mem_root(thd); + if ((pkg= thd->lex->sphead->m_parent)) + { + /* + If a syntax error happened inside a package routine definition, + then thd->lex points to the routine sublex. We need to restore to + the top level LEX. + */ + DBUG_ASSERT(pkg->m_top_level_lex); + DBUG_ASSERT(pkg == pkg->m_top_level_lex->sphead); + pkg->restore_thd_mem_root(thd); + LEX *top= pkg->m_top_level_lex; + sp_package::destroy(pkg); + thd->lex= top; + thd->lex->sphead= NULL; + } + else + { + sp_head::destroy(thd->lex->sphead); + thd->lex->sphead= NULL; + } + } + + /* + json_table must be NULL before the query. + Didn't want to overload LEX::start, it's enough to put it here. + */ + thd->lex->json_table= 0; +} + +/* + Initialize (or reset) Query_tables_list object. + + SYNOPSIS + reset_query_tables_list() + init TRUE - we should perform full initialization of object with + allocating needed memory + FALSE - object is already initialized so we should only reset + its state so it can be used for parsing/processing + of new statement + + DESCRIPTION + This method initializes Query_tables_list so it can be used as part + of LEX object for parsing/processing of statement. One can also use + this method to reset state of already initialized Query_tables_list + so it can be used for processing of new statement. +*/ + +void Query_tables_list::reset_query_tables_list(bool init) +{ + sql_command= SQLCOM_END; + if (!init && query_tables) + { + TABLE_LIST *table= query_tables; + for (;;) + { + delete table->view; + if (query_tables_last == &table->next_global || + !(table= table->next_global)) + break; + } + } + query_tables= 0; + query_tables_last= &query_tables; + query_tables_own_last= 0; + if (init) + { + /* + We delay real initialization of hash (and therefore related + memory allocation) until first insertion into this hash. + */ + my_hash_clear(&sroutines); + } + else if (sroutines.records) + { + /* Non-zero sroutines.records means that hash was initialized. */ + my_hash_reset(&sroutines); + } + sroutines_list.empty(); + sroutines_list_own_last= sroutines_list.next; + sroutines_list_own_elements= 0; + binlog_stmt_flags= 0; + stmt_accessed_table_flag= 0; +} + + +/* + Destroy Query_tables_list object with freeing all resources used by it. + + SYNOPSIS + destroy_query_tables_list() +*/ + +void Query_tables_list::destroy_query_tables_list() +{ + my_hash_free(&sroutines); +} + + +/* + Initialize LEX object. + + SYNOPSIS + LEX::LEX() + + NOTE + LEX object initialized with this constructor can be used as part of + THD object for which one can safely call open_tables(), lock_tables() + and close_thread_tables() functions. But it is not yet ready for + statement parsing. On should use lex_start() function to prepare LEX + for this. +*/ + +LEX::LEX() + : explain(NULL), result(0), part_info(NULL), arena_for_set_stmt(0), + mem_root_for_set_stmt(0), json_table(NULL), analyze_stmt(0), + default_used(0), + with_rownum(0), is_lex_started(0), option_type(OPT_DEFAULT), + context_analysis_only(0), sphead(0), limit_rows_examined_cnt(ULONGLONG_MAX) +{ + + init_dynamic_array2(PSI_INSTRUMENT_ME, &plugins, sizeof(plugin_ref), + plugins_static_buffer, INITIAL_LEX_PLUGIN_LIST_SIZE, + INITIAL_LEX_PLUGIN_LIST_SIZE, 0); + reset_query_tables_list(TRUE); + mi.init(); + init_dynamic_array2(PSI_INSTRUMENT_ME, &delete_gtid_domain, sizeof(uint32), + gtid_domain_static_buffer, + initial_gtid_domain_buffer_size, + initial_gtid_domain_buffer_size, 0); + unit.slave= &builtin_select; +} + + +bool LEX::can_be_merged() +{ + return unit.can_be_merged(); +} + + +/* + Check whether the merging algorithm can be used for this unit + + SYNOPSIS + st_select_lex_unit::can_be_merged() + + DESCRIPTION + We can apply merge algorithm for a unit if it is single SELECT with + subqueries only in WHERE clauses or in ON conditions or in select list + (we do not count SELECTs of underlying views/derived tables/CTEs and + second level subqueries) and we have no grouping, ordering, HAVING + clause, aggregate functions, DISTINCT clause, LIMIT clause. + + RETURN + FALSE - only temporary table algorithm can be used + TRUE - merge algorithm can be used +*/ + +bool st_select_lex_unit::can_be_merged() +{ + // TODO: do not forget implement case when select_lex.table_list.elements==0 + + /* find non VIEW subqueries/unions */ + st_select_lex *fs= first_select(); + + if (fs->next_select() || + (fs->uncacheable & UNCACHEABLE_RAND) || + (fs->options & SELECT_DISTINCT) || + fs->group_list.elements || fs->having || + fs->with_sum_func || + fs->table_list.elements < 1 || + fs->limit_params.select_limit) + return false; + for (SELECT_LEX_UNIT *tmp_unit= fs->first_inner_unit(); + tmp_unit; + tmp_unit= tmp_unit->next_unit()) + if ((tmp_unit->item != 0 && + (tmp_unit->item->place() != IN_WHERE && + tmp_unit->item->place() != IN_ON && + tmp_unit->item->place() != SELECT_LIST))) + return false; + return true; +} + + +/* + check if command can use VIEW with MERGE algorithm (for top VIEWs) + + SYNOPSIS + LEX::can_use_merged() + + DESCRIPTION + Only listed here commands can use merge algorithm in top level + SELECT_LEX (for subqueries will be used merge algorithm if + LEX::can_not_use_merged() is not TRUE). + + RETURN + FALSE - command can't use merged VIEWs + TRUE - VIEWs with MERGE algorithms can be used +*/ + +bool LEX::can_use_merged() +{ + switch (sql_command) + { + case SQLCOM_SELECT: + case SQLCOM_CREATE_TABLE: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_LOAD: + return TRUE; + default: + return FALSE; + } +} + +/* + Check if command can't use merged views in any part of command + + SYNOPSIS + LEX::can_not_use_merged() + + @param no_update_or_delete Set to 1 if we can't use merge with multiple-table + updates, like when used from + TALE_LIST::init_derived() + + DESCRIPTION + Temporary table algorithm will be used on all SELECT levels for queries + listed here (see also LEX::can_use_merged()). + + RETURN + FALSE - command can't use merged VIEWs + TRUE - VIEWs with MERGE algorithms can be used +*/ + +bool LEX::can_not_use_merged() +{ + switch (sql_command) { + case SQLCOM_CREATE_VIEW: + case SQLCOM_SHOW_CREATE: + /* + SQLCOM_SHOW_FIELDS is necessary to make + information schema tables working correctly with views. + see get_schema_tables_result function + */ + case SQLCOM_SHOW_FIELDS: + return TRUE; + + default: + return FALSE; + } +} + +/** + Detect that we need only table structure of derived table/view. + + Also used by I_S tables (@see create_schema_table) to detect that + they need a full table structure and cannot optimize unused columns away + + @retval TRUE yes, we need only structure + @retval FALSE no, we need data +*/ + +bool LEX::only_view_structure() +{ + switch (sql_command) { + case SQLCOM_SHOW_CREATE: + case SQLCOM_CHECKSUM: + case SQLCOM_SHOW_TABLES: + case SQLCOM_SHOW_FIELDS: + case SQLCOM_REVOKE_ALL: + case SQLCOM_REVOKE: + case SQLCOM_GRANT: + case SQLCOM_CREATE_VIEW: + return TRUE; + case SQLCOM_CREATE_TABLE: + return create_info.like(); + default: + return FALSE; + } +} + + +/* + Should Items_ident be printed correctly + + SYNOPSIS + need_correct_ident() + + RETURN + TRUE yes, we need only structure + FALSE no, we need data +*/ + + +bool LEX::need_correct_ident() +{ + switch(sql_command) + { + case SQLCOM_SHOW_CREATE: + case SQLCOM_SHOW_TABLES: + case SQLCOM_CREATE_VIEW: + return TRUE; + default: + return FALSE; + } +} + +/* + Get effective type of CHECK OPTION for given view + + SYNOPSIS + get_effective_with_check() + view given view + + NOTE + It have not sense to set CHECK OPTION for SELECT satement or subqueries, + so we do not. + + RETURN + VIEW_CHECK_NONE no need CHECK OPTION + VIEW_CHECK_LOCAL CHECK OPTION LOCAL + VIEW_CHECK_CASCADED CHECK OPTION CASCADED +*/ + +uint8 LEX::get_effective_with_check(TABLE_LIST *view) +{ + if (view->select_lex->master_unit() == &unit && + which_check_option_applicable()) + return (uint8)view->with_check; + return VIEW_CHECK_NONE; +} + + +/** + This method should be called only during parsing. + It is aware of compound statements (stored routine bodies) + and will initialize the destination with the default + database of the stored routine, rather than the default + database of the connection it is parsed in. + E.g. if one has no current database selected, or current database + set to 'bar' and then issues: + + CREATE PROCEDURE foo.p1() BEGIN SELECT * FROM t1 END// + + t1 is meant to refer to foo.t1, not to bar.t1. + + This method is needed to support this rule. + + @return TRUE in case of error (parsing should be aborted, FALSE in + case of success +*/ + +bool LEX::copy_db_to(LEX_CSTRING *to) +{ + if (sphead && sphead->m_name.str) + { + DBUG_ASSERT(sphead->m_db.str && sphead->m_db.length); + /* + It is safe to assign the string by-pointer, both sphead and + its statements reside in the same memory root. + */ + *to= sphead->m_db; + return FALSE; + } + return thd->copy_db_to(to); +} + +/** + Initialize offset and limit counters. + + @param sl SELECT_LEX to get offset and limit from. +*/ + +void st_select_lex_unit::set_limit(st_select_lex *sl) +{ + DBUG_ASSERT(!thd->stmt_arena->is_stmt_prepare()); + + lim.set_limit(sl->get_limit(), sl->get_offset(), sl->limit_params.with_ties); +} + +/** + Decide if a temporary table is needed for the UNION. + + @retval true A temporary table is needed. + @retval false A temporary table is not needed. + */ + +bool st_select_lex_unit::union_needs_tmp_table() +{ + if (with_element && with_element->is_recursive) + return true; + if (!with_wrapped_tvc) + { + for (st_select_lex *sl= first_select(); sl; sl=sl->next_select()) + { + if (sl->tvc && sl->tvc->to_be_wrapped_as_with_tail()) + { + with_wrapped_tvc= true; + break; + } + if (sl != first_select() && sl->linkage != UNION_TYPE) + return true; + } + } + if (with_wrapped_tvc) + return true; + return union_distinct != NULL || + global_parameters()->order_list.elements != 0 || + thd->lex->sql_command == SQLCOM_INSERT_SELECT || + thd->lex->sql_command == SQLCOM_REPLACE_SELECT; +} + +/** + @brief Set the initial purpose of this TABLE_LIST object in the list of used + tables. + + We need to track this information on table-by-table basis, since when this + table becomes an element of the pre-locked list, it's impossible to identify + which SQL sub-statement it has been originally used in. + + E.g.: + + User request: SELECT * FROM t1 WHERE f1(); + FUNCTION f1(): DELETE FROM t2; RETURN 1; + BEFORE DELETE trigger on t2: INSERT INTO t3 VALUES (old.a); + + For this user request, the pre-locked list will contain t1, t2, t3 + table elements, each needed for different DML. + + The trigger event map is updated to reflect INSERT, UPDATE, DELETE, + REPLACE, LOAD DATA, CREATE TABLE .. SELECT, CREATE TABLE .. + REPLACE SELECT statements, and additionally ON DUPLICATE KEY UPDATE + clause. +*/ + +void LEX::set_trg_event_type_for_tables() +{ + uint8 new_trg_event_map= 0; + DBUG_ENTER("LEX::set_trg_event_type_for_tables"); + + /* + Some auxiliary operations + (e.g. GRANT processing) create TABLE_LIST instances outside + the parser. Additionally, some commands (e.g. OPTIMIZE) change + the lock type for a table only after parsing is done. Luckily, + these do not fire triggers and do not need to pre-load them. + For these TABLE_LISTs set_trg_event_type is never called, and + trg_event_map is always empty. That means that the pre-locking + algorithm will ignore triggers defined on these tables, if + any, and the execution will either fail with an assert in + sql_trigger.cc or with an error that a used table was not + pre-locked, in case of a production build. + + TODO: this usage pattern creates unnecessary module dependencies + and should be rewritten to go through the parser. + Table list instances created outside the parser in most cases + refer to mysql.* system tables. It is not allowed to have + a trigger on a system table, but keeping track of + initialization provides extra safety in case this limitation + is circumvented. + */ + + switch (sql_command) { + case SQLCOM_LOCK_TABLES: + /* + On a LOCK TABLE, all triggers must be pre-loaded for this TABLE_LIST + when opening an associated TABLE. + */ + new_trg_event_map= trg2bit(TRG_EVENT_INSERT) | trg2bit(TRG_EVENT_UPDATE) | + trg2bit(TRG_EVENT_DELETE); + break; + /* + Basic INSERT. If there is an additional ON DUPLIATE KEY UPDATE + clause, it will be handled later in this method. + */ + case SQLCOM_INSERT: /* fall through */ + case SQLCOM_INSERT_SELECT: + /* + LOAD DATA ... INFILE is expected to fire BEFORE/AFTER INSERT + triggers. + If the statement also has REPLACE clause, it will be + handled later in this method. + */ + case SQLCOM_LOAD: /* fall through */ + /* + REPLACE is semantically equivalent to INSERT. In case + of a primary or unique key conflict, it deletes the old + record and inserts a new one. So we also may need to + fire ON DELETE triggers. This functionality is handled + later in this method. + */ + case SQLCOM_REPLACE: /* fall through */ + case SQLCOM_REPLACE_SELECT: + /* + CREATE TABLE ... SELECT defaults to INSERT if the table or + view already exists. REPLACE option of CREATE TABLE ... + REPLACE SELECT is handled later in this method. + */ + case SQLCOM_CREATE_TABLE: + case SQLCOM_CREATE_SEQUENCE: + new_trg_event_map|= trg2bit(TRG_EVENT_INSERT); + break; + /* Basic update and multi-update */ + case SQLCOM_UPDATE: /* fall through */ + case SQLCOM_UPDATE_MULTI: + new_trg_event_map|= trg2bit(TRG_EVENT_UPDATE); + break; + /* Basic delete and multi-delete */ + case SQLCOM_DELETE: /* fall through */ + case SQLCOM_DELETE_MULTI: + new_trg_event_map|= trg2bit(TRG_EVENT_DELETE); + break; + default: + break; + } + + switch (duplicates) { + case DUP_UPDATE: + new_trg_event_map|= trg2bit(TRG_EVENT_UPDATE); + break; + case DUP_REPLACE: + new_trg_event_map|= trg2bit(TRG_EVENT_DELETE); + break; + case DUP_ERROR: + default: + break; + } + + if (period_conditions.is_set()) + { + switch (sql_command) + { + case SQLCOM_DELETE: + case SQLCOM_UPDATE: + case SQLCOM_REPLACE: + new_trg_event_map |= trg2bit(TRG_EVENT_INSERT); + default: + break; + } + } + + + /* + Do not iterate over sub-selects, only the tables in the outermost + SELECT_LEX can be modified, if any. + */ + TABLE_LIST *tables= first_select_lex()->get_table_list(); + + while (tables) + { + /* + This is a fast check to filter out statements that do + not change data, or tables on the right side, in case of + INSERT .. SELECT, CREATE TABLE .. SELECT and so on. + Here we also filter out OPTIMIZE statement and non-updateable + views, for which lock_type is TL_UNLOCK or TL_READ after + parsing. + */ + if (static_cast(tables->lock_type) >= + static_cast(TL_FIRST_WRITE)) + tables->trg_event_map= new_trg_event_map; + tables= tables->next_local; + } + DBUG_VOID_RETURN; +} + + +/* + Unlink the first table from the global table list and the first table from + outer select (lex->select_lex) local list + + SYNOPSIS + unlink_first_table() + link_to_local Set to 1 if caller should link this table to local list + + NOTES + We assume that first tables in both lists is the same table or the local + list is empty. + + RETURN + 0 If 'query_tables' == 0 + unlinked table + In this case link_to_local is set. + +*/ +TABLE_LIST *LEX::unlink_first_table(bool *link_to_local) +{ + TABLE_LIST *first; + if ((first= query_tables)) + { + /* + Exclude from global table list + */ + if ((query_tables= query_tables->next_global)) + query_tables->prev_global= &query_tables; + else + query_tables_last= &query_tables; + first->next_global= 0; + + /* + and from local list if it is not empty + */ + if ((*link_to_local= MY_TEST(first_select_lex()->table_list.first))) + { + first_select_lex()->context.table_list= + first_select_lex()->context.first_name_resolution_table= + first->next_local; + first_select_lex()->table_list.first= first->next_local; + first_select_lex()->table_list.elements--; //safety + first->next_local= 0; + /* + Ensure that the global list has the same first table as the local + list. + */ + first_lists_tables_same(); + } + } + return first; +} + + +/* + Bring first local table of first most outer select to first place in global + table list + + SYNOPSYS + LEX::first_lists_tables_same() + + NOTES + In many cases (for example, usual INSERT/DELETE/...) the first table of + main SELECT_LEX have special meaning => check that it is the first table + in global list and re-link to be first in the global list if it is + necessary. We need such re-linking only for queries with sub-queries in + the select list, as only in this case tables of sub-queries will go to + the global list first. +*/ + +void LEX::first_lists_tables_same() +{ + TABLE_LIST *first_table= first_select_lex()->table_list.first; + if (query_tables != first_table && first_table != 0) + { + TABLE_LIST *next; + if (query_tables_last == &first_table->next_global) + query_tables_last= first_table->prev_global; + + if (query_tables_own_last == &first_table->next_global) + query_tables_own_last= first_table->prev_global; + + if ((next= *first_table->prev_global= first_table->next_global)) + next->prev_global= first_table->prev_global; + /* include in new place */ + first_table->next_global= query_tables; + /* + We are sure that query_tables is not 0, because first_table was not + first table in the global list => we can use + query_tables->prev_global without check of query_tables + */ + query_tables->prev_global= &first_table->next_global; + first_table->prev_global= &query_tables; + query_tables= first_table; + } +} + +void LEX::fix_first_select_number() +{ + SELECT_LEX *first= first_select_lex(); + if (first && first->select_number != 1) + { + uint num= first->select_number; + for (SELECT_LEX *sel= all_selects_list; + sel; + sel= sel->next_select_in_list()) + { + if (sel->select_number < num) + sel->select_number++; + } + first->select_number= 1; + } +} + + +/* + Link table back that was unlinked with unlink_first_table() + + SYNOPSIS + link_first_table_back() + link_to_local do we need link this table to local + + RETURN + global list +*/ + +void LEX::link_first_table_back(TABLE_LIST *first, + bool link_to_local) +{ + if (first) + { + if ((first->next_global= query_tables)) + query_tables->prev_global= &first->next_global; + else + query_tables_last= &first->next_global; + query_tables= first; + + if (link_to_local) + { + first->next_local= first_select_lex()->table_list.first; + first_select_lex()->context.table_list= first; + first_select_lex()->table_list.first= first; + first_select_lex()->table_list.elements++; //safety + } + } +} + + + +/* + cleanup lex for case when we open table by table for processing + + SYNOPSIS + LEX::cleanup_after_one_table_open() + + NOTE + This method is mostly responsible for cleaning up of selects lists and + derived tables state. To rollback changes in Query_tables_list one has + to call Query_tables_list::reset_query_tables_list(FALSE). +*/ + +void LEX::cleanup_after_one_table_open() +{ + /* + thd->lex->derived_tables & additional units may be set if we open + a view. It is necessary to clear thd->lex->derived_tables flag + to prevent processing of derived tables during next open_and_lock_tables + if next table is a real table and cleanup & remove underlying units + NOTE: all units will be connected to thd->lex->select_lex, because we + have not UNION on most upper level. + */ + if (all_selects_list != first_select_lex()) + { + derived_tables= 0; + first_select_lex()->exclude_from_table_unique_test= false; + /* cleunup underlying units (units of VIEW) */ + for (SELECT_LEX_UNIT *un= first_select_lex()->first_inner_unit(); + un; + un= un->next_unit()) + un->cleanup(); + /* reduce all selects list to default state */ + all_selects_list= first_select_lex(); + /* remove underlying units (units of VIEW) subtree */ + first_select_lex()->cut_subtree(); + } +} + + +/* + Save current state of Query_tables_list for this LEX, and prepare it + for processing of new statemnt. + + SYNOPSIS + reset_n_backup_query_tables_list() + backup Pointer to Query_tables_list instance to be used for backup +*/ + +void LEX::reset_n_backup_query_tables_list(Query_tables_list *backup) +{ + backup->set_query_tables_list(this); + /* + We have to perform full initialization here since otherwise we + will damage backed up state. + */ + reset_query_tables_list(TRUE); +} + + +/* + Restore state of Query_tables_list for this LEX from backup. + + SYNOPSIS + restore_backup_query_tables_list() + backup Pointer to Query_tables_list instance used for backup +*/ + +void LEX::restore_backup_query_tables_list(Query_tables_list *backup) +{ + destroy_query_tables_list(); + set_query_tables_list(backup); +} + + +/* + Checks for usage of routines and/or tables in a parsed statement + + SYNOPSIS + LEX:table_or_sp_used() + + RETURN + FALSE No routines and tables used + TRUE Either or both routines and tables are used. +*/ + +bool LEX::table_or_sp_used() +{ + DBUG_ENTER("table_or_sp_used"); + + if (sroutines.records || query_tables) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +/* + Do end-of-prepare fixup for list of tables and their merge-VIEWed tables + + SYNOPSIS + fix_prepare_info_in_table_list() + thd Thread handle + tbl List of tables to process + + DESCRIPTION + Perform end-end-of prepare fixup for list of tables, if any of the tables + is a merge-algorithm VIEW, recursively fix up its underlying tables as + well. + +*/ + +static void fix_prepare_info_in_table_list(THD *thd, TABLE_LIST *tbl) +{ + for (; tbl; tbl= tbl->next_local) + { + if (tbl->on_expr && !tbl->prep_on_expr) + { + thd->check_and_register_item_tree(&tbl->prep_on_expr, &tbl->on_expr); + tbl->on_expr= tbl->on_expr->copy_andor_structure(thd); + } + if (tbl->is_view_or_derived() && tbl->is_merged_derived()) + { + SELECT_LEX *sel= tbl->get_single_select(); + fix_prepare_info_in_table_list(thd, sel->get_table_list()); + } + } +} + + +/* + Save WHERE/HAVING/ON clauses and replace them with disposable copies + + SYNOPSIS + st_select_lex::fix_prepare_information + thd thread handler + conds in/out pointer to WHERE condition to be met at execution + having_conds in/out pointer to HAVING condition to be met at execution + + DESCRIPTION + The passed WHERE and HAVING are to be saved for the future executions. + This function saves it, and returns a copy which can be thrashed during + this execution of the statement. By saving/thrashing here we mean only + We also save the chain of ORDER::next in group_list, in case + the list is modified by remove_const(). + AND/OR trees. + The function also calls fix_prepare_info_in_table_list that saves all + ON expressions. +*/ + +void st_select_lex::fix_prepare_information(THD *thd, Item **conds, + Item **having_conds) +{ + DBUG_ENTER("st_select_lex::fix_prepare_information"); + if (!thd->stmt_arena->is_conventional() && + !(changed_elements & TOUCHED_SEL_COND)) + { + Query_arena_stmt on_stmt_arena(thd); + changed_elements|= TOUCHED_SEL_COND; + if (group_list.first) + { + if (!group_list_ptrs) + { + void *mem= thd->stmt_arena->alloc(sizeof(Group_list_ptrs)); + group_list_ptrs= new (mem) Group_list_ptrs(thd->stmt_arena->mem_root); + } + group_list_ptrs->reserve(group_list.elements); + for (ORDER *order= group_list.first; order; order= order->next) + { + group_list_ptrs->push_back(order); + } + } + if (*conds) + { + thd->check_and_register_item_tree(&prep_where, conds); + *conds= where= prep_where->copy_andor_structure(thd); + } + if (*having_conds) + { + thd->check_and_register_item_tree(&prep_having, having_conds); + *having_conds= having= prep_having->copy_andor_structure(thd); + } + fix_prepare_info_in_table_list(thd, table_list.first); + } + DBUG_VOID_RETURN; +} + + +/* + There are st_select_lex::add_table_to_list & + st_select_lex::set_lock_for_tables are in sql_parse.cc + + st_select_lex::print is in sql_select.cc + + st_select_lex_unit::prepare, st_select_lex_unit::exec, + st_select_lex_unit::cleanup, st_select_lex_unit::reinit_exec_mechanism, + st_select_lex_unit::change_result + are in sql_union.cc +*/ + +/* + Sets the kind of hints to be added by the calls to add_index_hint(). + + SYNOPSIS + set_index_hint_type() + type_arg The kind of hints to be added from now on. + clause The clause to use for hints to be added from now on. + + DESCRIPTION + Used in filling up the tagged hints list. + This list is filled by first setting the kind of the hint as a + context variable and then adding hints of the current kind. + Then the context variable index_hint_type can be reset to the + next hint type. +*/ +void st_select_lex::set_index_hint_type(enum index_hint_type type_arg, + index_clause_map clause) +{ + current_index_hint_type= type_arg; + current_index_hint_clause= clause; +} + + +/* + Makes an array to store index usage hints (ADD/FORCE/IGNORE INDEX). + + SYNOPSIS + alloc_index_hints() + thd current thread. +*/ + +void st_select_lex::alloc_index_hints (THD *thd) +{ + index_hints= new (thd->mem_root) List(); +} + + + +/* + adds an element to the array storing index usage hints + (ADD/FORCE/IGNORE INDEX). + + SYNOPSIS + add_index_hint() + thd current thread. + str name of the index. + length number of characters in str. + + RETURN VALUE + 0 on success, non-zero otherwise +*/ +bool st_select_lex::add_index_hint (THD *thd, const char *str, size_t length) +{ + return index_hints->push_front(new (thd->mem_root) + Index_hint(current_index_hint_type, + current_index_hint_clause, + str, length), thd->mem_root); +} + + +/** + Optimize all subqueries that have not been flattened into semi-joins. + + @details + This functionality is a method of SELECT_LEX instead of JOIN because + SQL statements as DELETE/UPDATE do not have a corresponding JOIN object. + + @see JOIN::optimize_unflattened_subqueries + + @param const_only Restrict subquery optimization to constant subqueries + + @return Operation status + @retval FALSE success. + @retval TRUE error occurred. +*/ + +bool st_select_lex::optimize_unflattened_subqueries(bool const_only) +{ + SELECT_LEX_UNIT *next_unit= NULL; + for (SELECT_LEX_UNIT *un= first_inner_unit(); + un; + un= next_unit ? next_unit : un->next_unit()) + { + Item_subselect *subquery_predicate= un->item; + next_unit= NULL; + + if (subquery_predicate) + { + if (!subquery_predicate->fixed()) + { + /* + This subquery was excluded as part of some expression so it is + invisible from all prepared expression. + */ + next_unit= un->next_unit(); + un->exclude_level(); + if (next_unit) + continue; + break; + } + if (subquery_predicate->substype() == Item_subselect::IN_SUBS) + { + Item_in_subselect *in_subs= subquery_predicate->get_IN_subquery(); + if (in_subs->is_jtbm_merged) + continue; + } + + if (const_only && !subquery_predicate->const_item()) + { + /* Skip non-constant subqueries if the caller asked so. */ + continue; + } + + bool empty_union_result= true; + bool is_correlated_unit= false; + bool first= true; + bool union_plan_saved= false; + /* + If the subquery is a UNION, optimize all the subqueries in the UNION. If + there is no UNION, then the loop will execute once for the subquery. + */ + for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select()) + { + JOIN *inner_join= sl->join; + if (first) + first= false; + else + { + if (!union_plan_saved) + { + union_plan_saved= true; + if (un->save_union_explain(un->thd->lex->explain)) + return true; /* Failure */ + } + } + if (!inner_join) + continue; + SELECT_LEX *save_select= un->thd->lex->current_select; + ulonglong save_options; + int res; + /* We need only 1 row to determine existence */ + un->set_limit(un->global_parameters()); + un->thd->lex->current_select= sl; + save_options= inner_join->select_options; + if (options & SELECT_DESCRIBE) + { + /* Optimize the subquery in the context of EXPLAIN. */ + sl->set_explain_type(FALSE); + sl->options|= SELECT_DESCRIBE; + inner_join->select_options|= SELECT_DESCRIBE; + } + if ((res= inner_join->optimize())) + return TRUE; + if (!inner_join->cleaned) + sl->update_used_tables(); + sl->update_correlated_cache(); + is_correlated_unit|= sl->is_correlated; + inner_join->select_options= save_options; + un->thd->lex->current_select= save_select; + + Explain_query *eq; + if ((eq= inner_join->thd->lex->explain)) + { + Explain_select *expl_sel; + if ((expl_sel= eq->get_select(inner_join->select_lex->select_number))) + { + sl->set_explain_type(TRUE); + expl_sel->select_type= sl->type; + } + } + + if (empty_union_result) + { + /* + If at least one subquery in a union is non-empty, the UNION result + is non-empty. If there is no UNION, the only subquery is non-empy. + */ + empty_union_result= inner_join->empty_result(); + } + if (res) + return TRUE; + } + if (empty_union_result) + subquery_predicate->no_rows_in_result(); + + if (is_correlated_unit) + { + /* + Some parts of UNION are not correlated. This means we will need to + re-execute the whole UNION every time. Mark all parts of the UNION + as correlated so that they are prepared to be executed multiple + times (if we don't do that, some part of the UNION may free its + execution data at the end of first execution and crash on the second + execution) + */ + for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select()) + sl->uncacheable |= UNCACHEABLE_DEPENDENT; + } + else + un->uncacheable&= ~UNCACHEABLE_DEPENDENT; + subquery_predicate->is_correlated= is_correlated_unit; + } + } + return FALSE; +} + + + +/** + @brief Process all derived tables/views of the SELECT. + + @param lex LEX of this thread + @param phase phases to run derived tables/views through + + @details + This function runs specified 'phases' on all tables from the + table_list of this select. + + @return FALSE ok. + @return TRUE an error occur. +*/ + +bool st_select_lex::handle_derived(LEX *lex, uint phases) +{ + return lex->handle_list_of_derived(table_list.first, phases); +} + + +/** + @brief + Returns first unoccupied table map and table number + + @param map [out] return found map + @param tablenr [out] return found tablenr + + @details + Returns first unoccupied table map and table number in this select. + Map and table are returned in *'map' and *'tablenr' accordingly. + + @retrun TRUE no free table map/table number + @return FALSE found free table map/table number +*/ + +bool st_select_lex::get_free_table_map(table_map *map, uint *tablenr) +{ + *map= 0; + *tablenr= 0; + TABLE_LIST *tl; + List_iterator ti(leaf_tables); + while ((tl= ti++)) + { + if (tl->table->map > *map) + *map= tl->table->map; + if (tl->table->tablenr > *tablenr) + *tablenr= tl->table->tablenr; + } + (*map)<<= 1; + (*tablenr)++; + if (*tablenr >= MAX_TABLES) + return TRUE; + return FALSE; +} + + +/** + @brief + Append given table to the leaf_tables list. + + @param link Offset to which list in table structure to use + @param table Table to append + + @details + Append given 'table' to the leaf_tables list using the 'link' offset. + If the 'table' is linked with other tables through next_leaf/next_local + chains then whole list will be appended. +*/ + +void st_select_lex::append_table_to_list(TABLE_LIST *TABLE_LIST::*link, + TABLE_LIST *table) +{ + TABLE_LIST *tl; + for (tl= leaf_tables.head(); tl->*link; tl= tl->*link) ; + tl->*link= table; +} + + +/* + @brief + Replace given table from the leaf_tables list for a list of tables + + @param table Table to replace + @param list List to substititute the table for + + @details + Replace 'table' from the leaf_tables list for a list of tables 'tbl_list'. +*/ + +void st_select_lex::replace_leaf_table(TABLE_LIST *table, List &tbl_list) +{ + TABLE_LIST *tl; + List_iterator ti(leaf_tables); + while ((tl= ti++)) + { + if (tl == table) + { + ti.replace(tbl_list); + break; + } + } +} + + +/** + @brief + Assigns new table maps to tables in the leaf_tables list + + @param derived Derived table to take initial table map from + @param map table map to begin with + @param tablenr table number to begin with + @param parent_lex new parent select_lex + + @details + Assign new table maps/table numbers to all tables in the leaf_tables list. + 'map'/'tablenr' are used for the first table and shifted to left/ + increased for each consequent table in the leaf_tables list. + If the 'derived' table is given then it's table map/number is used for the + first table in the list and 'map'/'tablenr' are used for the second and + all consequent tables. + The 'parent_lex' is set as the new parent select_lex for all tables in the + list. +*/ + +void st_select_lex::remap_tables(TABLE_LIST *derived, table_map map, + uint tablenr, SELECT_LEX *parent_lex) +{ + bool first_table= TRUE; + bool has_table_function= FALSE; + TABLE_LIST *tl; + table_map first_map; + uint first_tablenr; + + if (derived && derived->table) + { + first_map= derived->table->map; + first_tablenr= derived->table->tablenr; + } + else + { + first_map= map; + map<<= 1; + first_tablenr= tablenr++; + } + /* + Assign table bit/table number. + To the first table of the subselect the table bit/tablenr of the + derived table is assigned. The rest of tables are getting bits + sequentially, starting from the provided table map/tablenr. + */ + List_iterator ti(leaf_tables); + while ((tl= ti++)) + { + if (first_table) + { + first_table= FALSE; + tl->table->set_table_map(first_map, first_tablenr); + } + else + { + tl->table->set_table_map(map, tablenr); + tablenr++; + map<<= 1; + } + SELECT_LEX *old_sl= tl->select_lex; + tl->select_lex= parent_lex; + for(TABLE_LIST *emb= tl->embedding; + emb && emb->select_lex == old_sl; + emb= emb->embedding) + emb->select_lex= parent_lex; + + if (tl->table_function) + has_table_function= TRUE; + } + + if (has_table_function) + { + ti.rewind(); + while ((tl= ti++)) + { + if (tl->table_function) + tl->table_function->fix_after_pullout(tl, parent_lex, true); + } + } +} + +/** + @brief + Merge a subquery into this select. + + @param derived derived table of the subquery to be merged + @param subq_select select_lex of the subquery + @param map table map for assigning to merged tables from subquery + @param table_no table number for assigning to merged tables from subquery + + @details + This function merges a subquery into its parent select. In short the + merge operation appends the subquery FROM table list to the parent's + FROM table list. In more details: + .) the top_join_list of the subquery is wrapped into a join_nest + and attached to 'derived' + .) subquery's leaf_tables list is merged with the leaf_tables + list of this select_lex + .) the table maps and table numbers of the tables merged from + the subquery are adjusted to reflect their new binding to + this select + + @return TRUE an error occur + @return FALSE ok +*/ + +bool SELECT_LEX::merge_subquery(THD *thd, TABLE_LIST *derived, + SELECT_LEX *subq_select, + uint table_no, table_map map) +{ + derived->wrap_into_nested_join(subq_select->top_join_list); + + ftfunc_list->append(subq_select->ftfunc_list); + if (join || + thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI) + { + List_iterator_fast li(subq_select->sj_subselects); + Item_in_subselect *in_subq; + while ((in_subq= li++)) + { + sj_subselects.push_back(in_subq, thd->mem_root); + if (in_subq->emb_on_expr_nest == NO_JOIN_NEST) + in_subq->emb_on_expr_nest= derived; + } + + uint cnt= sizeof(expr_cache_may_be_used)/sizeof(bool); + for (uint i= 0; i < cnt; i++) + { + if (subq_select->expr_cache_may_be_used[i]) + expr_cache_may_be_used[i]= true; + } + + List_iterator_fast it(subq_select->in_funcs); + Item_func_in *in_func; + while ((in_func= it++)) + { + in_funcs.push_back(in_func, thd->mem_root); + if (in_func->emb_on_expr_nest == NO_JOIN_NEST) + in_func->emb_on_expr_nest= derived; + } + } + + /* Walk through child's tables and adjust table map, tablenr, + * parent_lex */ + subq_select->remap_tables(derived, map, table_no, this); + subq_select->merged_into= this; + + replace_leaf_table(derived, subq_select->leaf_tables); + + return FALSE; +} + + +/** + @brief + Mark tables from the leaf_tables list as belong to a derived table. + + @param derived tables will be marked as belonging to this derived + + @details + Run through the leaf_list and mark all tables as belonging to the 'derived'. +*/ + +void SELECT_LEX::mark_as_belong_to_derived(TABLE_LIST *derived) +{ + /* Mark tables as belonging to this DT */ + TABLE_LIST *tl; + List_iterator ti(leaf_tables); + while ((tl= ti++)) + tl->belong_to_derived= derived; +} + + +/** + @brief + Update used_tables cache for this select + + @details + This function updates used_tables cache of ON expressions of all tables + in the leaf_tables list and of the conds expression (if any). +*/ + +void SELECT_LEX::update_used_tables() +{ + TABLE_LIST *tl; + List_iterator ti(leaf_tables); + + while ((tl= ti++)) + { + if (tl->table && !tl->is_view_or_derived()) + { + TABLE_LIST *embedding= tl->embedding; + for (embedding= tl->embedding; embedding; embedding=embedding->embedding) + { + if (embedding->is_view_or_derived()) + { + DBUG_ASSERT(embedding->is_merged_derived()); + TABLE *tab= tl->table; + tab->covering_keys= tab->s->keys_for_keyread; + tab->covering_keys.intersect(tab->keys_in_use_for_query); + /* + View/derived was merged. Need to recalculate read_set + bitmaps here. For example: + CREATE VIEW v1 AS SELECT f1,f2,f3 FROM t1; + SELECT f1 FROM v1; + Initially, the view definition will put all f1,f2,f3 in the + read_set for t1. But after the view is merged, only f1 should + be in the read_set. + */ + bitmap_clear_all(tab->read_set); + break; + } + } + } + } + + ti.rewind(); + while ((tl= ti++)) + { + TABLE_LIST *embedding= tl; + if (!is_eliminated_table(join->eliminated_tables, tl)) + { + do + { + bool maybe_null; + if ((maybe_null= MY_TEST(embedding->outer_join))) + { + tl->table->maybe_null= maybe_null; + break; + } + } + while ((embedding= embedding->embedding)); + } + + if (tl->on_expr && !is_eliminated_table(join->eliminated_tables, tl)) + { + tl->on_expr->update_used_tables(); + tl->on_expr->walk(&Item::eval_not_null_tables, 0, NULL); + } + /* + - There is no need to check sj_on_expr, because merged semi-joins inject + sj_on_expr into the parent's WHERE clase. + - For non-merged semi-joins (aka JTBMs), we need to check their + left_expr. There is no need to check the rest of the subselect, we know + it is uncorrelated and so cannot refer to any tables in this select. + */ + if (tl->jtbm_subselect) + { + Item *left_expr= tl->jtbm_subselect->left_exp(); + left_expr->walk(&Item::update_table_bitmaps_processor, FALSE, NULL); + } + + if (tl->table_function) + tl->table_function->update_used_tables(); + + embedding= tl->embedding; + while (embedding) + { + if (embedding->on_expr && + embedding->nested_join->join_list.head() == tl) + { + if (!is_eliminated_table(join->eliminated_tables, embedding)) + { + embedding->on_expr->update_used_tables(); + embedding->on_expr->walk(&Item::eval_not_null_tables, 0, NULL); + } + } + tl= embedding; + embedding= tl->embedding; + } + } + + if (join->conds) + { + join->conds->update_used_tables(); + join->conds->walk(&Item::eval_not_null_tables, 0, NULL); + } + if (join->having) + { + join->having->update_used_tables(); + } + + Item *item; + List_iterator_fast it(join->all_fields); + select_list_tables= 0; + while ((item= it++)) + { + item->update_used_tables(); + select_list_tables|= item->used_tables(); + } + Item_outer_ref *ref; + List_iterator_fast ref_it(inner_refs_list); + while ((ref= ref_it++)) + { + item= ref->outer_ref; + item->update_used_tables(); + } + for (ORDER *order= group_list.first; order; order= order->next) + (*order->item)->update_used_tables(); + if (!master_unit()->is_unit_op() || + master_unit()->global_parameters() != this) + { + for (ORDER *order= order_list.first; order; order= order->next) + (*order->item)->update_used_tables(); + } + join->result->update_used_tables(); +} + + +/** + @brief + Update is_correlated cache for this select + + @details +*/ + +void st_select_lex::update_correlated_cache() +{ + TABLE_LIST *tl; + List_iterator ti(leaf_tables); + + is_correlated= false; + + while ((tl= ti++)) + { + // is_correlated|= tl->is_with_table_recursive_reference(); + if (tl->on_expr) + is_correlated|= MY_TEST(tl->on_expr->used_tables() & OUTER_REF_TABLE_BIT); + for (TABLE_LIST *embedding= tl->embedding ; embedding ; + embedding= embedding->embedding) + { + if (embedding->on_expr) + is_correlated|= MY_TEST(embedding->on_expr->used_tables() & + OUTER_REF_TABLE_BIT); + } + } + + if (join->conds) + is_correlated|= MY_TEST(join->conds->used_tables() & OUTER_REF_TABLE_BIT); + + is_correlated|= join->having_is_correlated; + + if (join->having) + is_correlated|= MY_TEST(join->having->used_tables() & OUTER_REF_TABLE_BIT); + + if (join->tmp_having) + is_correlated|= MY_TEST(join->tmp_having->used_tables() & + OUTER_REF_TABLE_BIT); + + Item *item; + List_iterator_fast it(join->fields_list); + while ((item= it++)) + is_correlated|= MY_TEST(item->used_tables() & OUTER_REF_TABLE_BIT); + + for (ORDER *order= group_list.first; order; order= order->next) + is_correlated|= MY_TEST((*order->item)->used_tables() & + OUTER_REF_TABLE_BIT); + + if (!master_unit()->is_unit_op()) + { + for (ORDER *order= order_list.first; order; order= order->next) + is_correlated|= MY_TEST((*order->item)->used_tables() & + OUTER_REF_TABLE_BIT); + } + + if (!is_correlated) + uncacheable&= ~UNCACHEABLE_DEPENDENT; +} + + +/** + Set the EXPLAIN type for this subquery. + + @param on_the_fly TRUE<=> We're running a SHOW EXPLAIN command, so we must + not change any variables +*/ + +void st_select_lex::set_explain_type(bool on_the_fly) +{ + bool is_primary= FALSE; + if (next_select()) + is_primary= TRUE; + + if (!is_primary && first_inner_unit()) + { + /* + If there is at least one materialized derived|view then it's a PRIMARY select. + Otherwise, all derived tables/views were merged and this select is a SIMPLE one. + */ + for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit()) + { + if ((!un->derived || un->derived->is_materialized_derived())) + { + is_primary= TRUE; + break; + } + } + } + + if (on_the_fly && !is_primary && have_merged_subqueries) + is_primary= TRUE; + + SELECT_LEX *first= master_unit()->first_select(); + /* drop UNCACHEABLE_EXPLAIN, because it is for internal usage only */ + uint8 is_uncacheable= (uncacheable & ~UNCACHEABLE_EXPLAIN); + + bool using_materialization= FALSE; + Item_subselect *parent_item; + if ((parent_item= master_unit()->item) && + parent_item->substype() == Item_subselect::IN_SUBS) + { + Item_in_subselect *in_subs= parent_item->get_IN_subquery(); + /* + Surprisingly, in_subs->is_set_strategy() can return FALSE here, + even for the last invocation of this function for the select. + */ + if (in_subs->test_strategy(SUBS_MATERIALIZATION)) + using_materialization= TRUE; + } + + if (master_unit()->thd->lex->first_select_lex() == this) + { + if (pushdown_select) + type= pushed_select_text; + else + type= is_primary ? "PRIMARY" : "SIMPLE"; + } + else + { + if (this == first) + { + /* If we're a direct child of a UNION, we're the first sibling there */ + if (linkage == DERIVED_TABLE_TYPE) + { + bool is_pushed_master_unit= master_unit()->derived && + master_unit()->derived->pushdown_derived; + if (is_pushed_master_unit) + type= pushed_derived_text; + else if (is_uncacheable & UNCACHEABLE_DEPENDENT) + type= "LATERAL DERIVED"; + else + type= "DERIVED"; + } + else if (using_materialization) + type= "MATERIALIZED"; + else + { + if (is_uncacheable & UNCACHEABLE_DEPENDENT) + type= "DEPENDENT SUBQUERY"; + else + { + type= is_uncacheable? "UNCACHEABLE SUBQUERY" : + "SUBQUERY"; + } + } + } + else + { + switch (linkage) + { + case INTERSECT_TYPE: + type= "INTERSECT"; + break; + case EXCEPT_TYPE: + type= "EXCEPT"; + break; + default: + /* This a non-first sibling in UNION */ + if (is_uncacheable & UNCACHEABLE_DEPENDENT) + type= "DEPENDENT UNION"; + else if (using_materialization) + type= "MATERIALIZED UNION"; + else + { + type= is_uncacheable ? "UNCACHEABLE UNION": "UNION"; + if (this == master_unit()->fake_select_lex) + type= unit_operation_text[master_unit()->common_op()]; + /* + join below may be =NULL when this functions is called at an early + stage. It will be later called again and we will set the correct + value. + */ + if (join) + { + bool uses_cte= false; + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_BUSH_ROOTS, + WITH_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + /* + pos_in_table_list=NULL for e.g. post-join aggregation JOIN_TABs. + */ + if (!(tab->table && tab->table->pos_in_table_list)) + continue; + TABLE_LIST *tbl= tab->table->pos_in_table_list; + if (tbl->with && tbl->with->is_recursive && + tbl->is_with_table_recursive_reference()) + { + uses_cte= true; + break; + } + } + if (uses_cte) + type= "RECURSIVE UNION"; + } + } + break; + } + } + } + + if (!on_the_fly) + options|= SELECT_DESCRIBE; +} + + +/** + @brief + Increase estimated number of records for a derived table/view + + @param records number of records to increase estimate by + + @details + This function increases estimated number of records by the 'records' + for the derived table to which this select belongs to. +*/ + +void SELECT_LEX::increase_derived_records(ha_rows records) +{ + SELECT_LEX_UNIT *unit= master_unit(); + DBUG_ASSERT(unit->derived); + + if (unit->with_element && unit->with_element->is_recursive) + { + st_select_lex *first_recursive= unit->with_element->first_recursive; + st_select_lex *sl= unit->first_select(); + for ( ; sl != first_recursive; sl= sl->next_select()) + { + if (sl == this) + break; + } + if (sl == first_recursive) + return; + } + + select_result *result= unit->result; + switch (linkage) + { + case INTERSECT_TYPE: + // result of intersect can't be more then one of components + set_if_smaller(result->est_records, records); + case EXCEPT_TYPE: + // in worse case none of record will be removed + break; + default: + // usual UNION + if (HA_ROWS_MAX - records > result->est_records) + result->est_records+= records; + else + result->est_records= HA_ROWS_MAX; + break; + } +} + + +/** + @brief + Mark select's derived table as a const one. + + @param empty Whether select has an empty result set + + @details + Mark derived table/view of this select as a constant one (to + materialize it at the optimization phase) unless this select belongs to a + union. Estimated number of rows is incremented if this select has non empty + result set. +*/ + +void SELECT_LEX::mark_const_derived(bool empty) +{ + TABLE_LIST *derived= master_unit()->derived; + /* join == NULL in DELETE ... RETURNING */ + if (!(join && join->thd->lex->describe) && derived) + { + if (!empty) + increase_derived_records(1); + if (!master_unit()->is_unit_op() && !derived->is_merged_derived() && + !(join && join->with_two_phase_optimization)) + derived->fill_me= TRUE; + } +} + + +bool st_select_lex::save_leaf_tables(THD *thd) +{ + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + List_iterator_fast li(leaf_tables); + TABLE_LIST *table; + while ((table= li++)) + { + if (leaf_tables_exec.push_back(table, thd->mem_root)) + return 1; + table->tablenr_exec= table->get_tablenr(); + table->map_exec= table->get_map(); + if (join && (join->select_options & SELECT_DESCRIBE)) + table->maybe_null_exec= 0; + else + table->maybe_null_exec= table->table? table->table->maybe_null: 0; + } + if (arena) + thd->restore_active_arena(arena, &backup); + + return 0; +} + + +bool LEX::save_prep_leaf_tables() +{ + if (!thd->save_prep_leaf_list) + return FALSE; + + Query_arena *arena= thd->stmt_arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + //It is used for DETETE/UPDATE so top level has only one SELECT + DBUG_ASSERT(first_select_lex()->next_select() == NULL); + bool res= first_select_lex()->save_prep_leaf_tables(thd); + + if (arena) + thd->restore_active_arena(arena, &backup); + + if (res) + return TRUE; + + thd->save_prep_leaf_list= FALSE; + return FALSE; +} + + +bool st_select_lex::save_prep_leaf_tables(THD *thd) +{ + if (prep_leaf_list_state == SAVED) + return FALSE; + + List_iterator_fast li(leaf_tables); + TABLE_LIST *table; + + /* + Check that the SELECT_LEX was really prepared and so tables are setup. + + It can be subquery in SET clause of UPDATE which was not prepared yet, so + its tables are not yet setup and ready for storing. + */ + if (prep_leaf_list_state != READY) + return FALSE; + + while ((table= li++)) + { + if (leaf_tables_prep.push_back(table)) + return TRUE; + } + prep_leaf_list_state= SAVED; + for (SELECT_LEX_UNIT *u= first_inner_unit(); u; u= u->next_unit()) + { + for (SELECT_LEX *sl= u->first_select(); sl; sl= sl->next_select()) + { + if (sl->save_prep_leaf_tables(thd)) + return TRUE; + } + } + + return FALSE; +} + + +/** + Set exclude_from_table_unique_test for selects of this select and all selects + belonging to the underlying units of derived tables or views +*/ + +void st_select_lex::set_unique_exclude() +{ + exclude_from_table_unique_test= TRUE; + for (SELECT_LEX_UNIT *unit= first_inner_unit(); + unit; + unit= unit->next_unit()) + { + if (unit->derived && unit->derived->is_view_or_derived()) + { + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + sl->set_unique_exclude(); + } + } +} + + +/* + Return true if this select_lex has been converted into a semi-join nest + within 'ancestor'. + + We need a loop to check this because there could be several nested + subselects, like + + SELECT ... FROM grand_parent + WHERE expr1 IN (SELECT ... FROM parent + WHERE expr2 IN ( SELECT ... FROM child) + + which were converted into: + + SELECT ... + FROM grand_parent SEMI_JOIN (parent JOIN child) + WHERE + expr1 AND expr2 + + In this case, both parent and child selects were merged into the parent. +*/ + +bool st_select_lex::is_merged_child_of(st_select_lex *ancestor) +{ + bool all_merged= TRUE; + for (SELECT_LEX *sl= this; sl && sl!=ancestor; + sl=sl->outer_select()) + { + Item *subs= sl->master_unit()->item; + Item_in_subselect *in_subs= (subs ? subs->get_IN_subquery() : NULL); + if (in_subs && + ((Item_subselect*)subs)->substype() == Item_subselect::IN_SUBS && + in_subs->test_strategy(SUBS_SEMI_JOIN)) + { + continue; + } + + if (sl->master_unit()->derived && + sl->master_unit()->derived->is_merged_derived()) + { + continue; + } + all_merged= FALSE; + break; + } + return all_merged; +} + +/* + This is used by SHOW EXPLAIN|ANALYZE. It assumes query plan has been already + collected into QPF structures and we only need to print it out. +*/ + +int LEX::print_explain(select_result_sink *output, uint8 explain_flags, + bool is_analyze, bool is_json_format, + bool *printed_anything) +{ + int res; + if (explain && explain->have_query_plan()) + { + if (is_json_format) + { + auto now= microsecond_interval_timer(); + auto start_time= thd->start_utime; + auto query_time_in_progress_ms= 0ULL; + if (likely(now > start_time)) + query_time_in_progress_ms= + (now - start_time) / (HRTIME_RESOLUTION / 1000); + res= explain->print_explain_json(output, is_analyze, + query_time_in_progress_ms); + } + else + { + res= explain->print_explain(output, explain_flags, is_analyze); + } + *printed_anything= true; + } + else + { + res= 0; + *printed_anything= false; + } + return res; +} + + +/** + Allocates and set arena for SET STATEMENT old values. + + @param backup where to save backup of arena. + + @retval 1 Error + @retval 0 OK +*/ + +bool LEX::set_arena_for_set_stmt(Query_arena *backup) +{ + DBUG_ENTER("LEX::set_arena_for_set_stmt"); + DBUG_ASSERT(arena_for_set_stmt== 0); + if (!mem_root_for_set_stmt) + { + mem_root_for_set_stmt= new MEM_ROOT(); + if (unlikely(!(mem_root_for_set_stmt))) + DBUG_RETURN(1); + init_sql_alloc(PSI_INSTRUMENT_ME, mem_root_for_set_stmt, ALLOC_ROOT_SET, + ALLOC_ROOT_SET, MYF(MY_THREAD_SPECIFIC)); + } + if (unlikely(!(arena_for_set_stmt= new(mem_root_for_set_stmt) + Query_arena_memroot(mem_root_for_set_stmt, + Query_arena::STMT_INITIALIZED)))) + DBUG_RETURN(1); + DBUG_PRINT("info", ("mem_root: %p arena: %p", + mem_root_for_set_stmt, + arena_for_set_stmt)); + thd->set_n_backup_active_arena(arena_for_set_stmt, backup); + DBUG_RETURN(0); +} + + +void LEX::reset_arena_for_set_stmt(Query_arena *backup) +{ + DBUG_ENTER("LEX::reset_arena_for_set_stmt"); + DBUG_ASSERT(arena_for_set_stmt); + thd->restore_active_arena(arena_for_set_stmt, backup); + DBUG_PRINT("info", ("mem_root: %p arena: %p", + arena_for_set_stmt->mem_root, + arena_for_set_stmt)); + DBUG_VOID_RETURN; +} + + +void LEX::free_arena_for_set_stmt() +{ + DBUG_ENTER("LEX::free_arena_for_set_stmt"); + if (!arena_for_set_stmt) + return; + DBUG_PRINT("info", ("mem_root: %p arena: %p", + arena_for_set_stmt->mem_root, + arena_for_set_stmt)); + arena_for_set_stmt->free_items(); + delete(arena_for_set_stmt); + free_root(mem_root_for_set_stmt, MYF(MY_KEEP_PREALLOC)); + arena_for_set_stmt= 0; + DBUG_VOID_RETURN; +} + +bool LEX::restore_set_statement_var() +{ + bool err= false; + DBUG_ENTER("LEX::restore_set_statement_var"); + if (!old_var_list.is_empty()) + { + DBUG_PRINT("info", ("vars: %d", old_var_list.elements)); + err= sql_set_variables(thd, &old_var_list, false); + old_var_list.empty(); + free_arena_for_set_stmt(); + } + DBUG_ASSERT(!is_arena_for_set_stmt()); + DBUG_RETURN(err); +} + +unit_common_op st_select_lex_unit::common_op() +{ + SELECT_LEX *first= first_select(); + bool first_op= TRUE; + unit_common_op operation= OP_MIX; // if no op + for (SELECT_LEX *sl= first; sl; sl= sl->next_select()) + { + if (sl != first) + { + unit_common_op op; + switch (sl->linkage) + { + case INTERSECT_TYPE: + op= OP_INTERSECT; + break; + case EXCEPT_TYPE: + op= OP_EXCEPT; + break; + default: + op= OP_UNION; + break; + } + if (first_op) + { + operation= op; + first_op= FALSE; + } + else + { + if (operation != op) + operation= OP_MIX; + } + } + } + return operation; +} +/* + Save explain structures of a UNION. The only variable member is whether the + union has "Using filesort". + + There is also save_union_explain_part2() function, which is called before we read + UNION's output. + + The reason for it is examples like this: + + SELECT col1 FROM t1 UNION SELECT col2 FROM t2 ORDER BY (select ... from t3 ...) + + Here, the (select ... from t3 ...) subquery must be a child of UNION's + st_select_lex. However, it is not connected as child until a very late + stage in execution. +*/ + +int st_select_lex_unit::save_union_explain(Explain_query *output) +{ + SELECT_LEX *first= first_select(); + + if (output->get_union(first->select_number)) + return 0; /* Already added */ + + Explain_union *eu= + new (output->mem_root) Explain_union(output->mem_root, + thd->lex->analyze_stmt); + if (unlikely(!eu)) + return 0; + + if (with_element && with_element->is_recursive) + eu->is_recursive_cte= true; + + if (derived) + eu->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + /* + Note: Non-merged semi-joins cannot be made out of UNIONs currently, so we + don't ever set EXPLAIN_NODE_NON_MERGED_SJ. + */ + for (SELECT_LEX *sl= first; sl; sl= sl->next_select()) + eu->add_select(sl->select_number); + + eu->fake_select_type= unit_operation_text[eu->operation= common_op()]; + eu->using_filesort= MY_TEST(global_parameters()->order_list.first); + eu->using_tmp= union_needs_tmp_table(); + + // Save the UNION node + output->add_node(eu); + + if (eu->get_select_id() == 1) + output->query_plan_ready(); + + return 0; +} + + +/* + @see st_select_lex_unit::save_union_explain +*/ + +int st_select_lex_unit::save_union_explain_part2(Explain_query *output) +{ + Explain_union *eu= output->get_union(first_select()->select_number); + if (fake_select_lex) + { + for (SELECT_LEX_UNIT *unit= fake_select_lex->first_inner_unit(); + unit; unit= unit->next_unit()) + { + if (unit->explainable()) + eu->add_child(unit->first_select()->select_number); + } + fake_select_lex->join->explain= &eu->fake_select_lex_explain; + } + return 0; +} + + +/** + A routine used by the parser to decide whether we are specifying a full + partitioning or if only partitions to add or to split. + + @note This needs to be outside of WITH_PARTITION_STORAGE_ENGINE since it + is used from the sql parser that doesn't have any ifdef's + + @retval TRUE Yes, it is part of a management partition command + @retval FALSE No, not a management partition command +*/ + +bool LEX::is_partition_management() const +{ + return (sql_command == SQLCOM_ALTER_TABLE && + (alter_info.partition_flags & (ALTER_PARTITION_ADD | + ALTER_PARTITION_CONVERT_IN | + ALTER_PARTITION_CONVERT_OUT | + ALTER_PARTITION_REORGANIZE))); +} + + +/** + Exclude last added SELECT_LEX (current) in the UNIT and return pointer in it + (previous become currect) + + @return detached SELECT_LEX or NULL in case of error +*/ + +SELECT_LEX *LEX::exclude_last_select() +{ + return exclude_not_first_select(current_select); +} + +SELECT_LEX *LEX::exclude_not_first_select(SELECT_LEX *exclude) +{ + DBUG_ENTER("LEX::exclude_not_first_select"); + DBUG_PRINT("enter", ("exclude %p #%u", exclude, exclude->select_number)); + SELECT_LEX_UNIT *unit= exclude->master_unit(); + SELECT_LEX *sl; + DBUG_ASSERT(unit->first_select() != exclude); + /* we should go through the list to correctly set current_select */ + for(sl= unit->first_select(); + sl->next_select() && sl->next_select() != exclude; + sl= sl->next_select()); + DBUG_PRINT("info", ("excl: %p unit: %p prev: %p", exclude, unit, sl)); + if (!sl) + DBUG_RETURN(NULL); + DBUG_ASSERT(&sl->next == exclude->prev); + + exclude->prev= NULL; + + current_select= sl; + DBUG_RETURN(exclude); +} + + +SELECT_LEX_UNIT *LEX::alloc_unit() +{ + SELECT_LEX_UNIT *unit; + DBUG_ENTER("LEX::alloc_unit"); + if (!(unit= new (thd->mem_root) SELECT_LEX_UNIT())) + DBUG_RETURN(NULL); + + unit->init_query(); + /* TODO: reentrant problem */ + unit->thd= thd; + unit->link_next= 0; + unit->link_prev= 0; + /* TODO: remove return_to */ + unit->return_to= NULL; + DBUG_RETURN(unit); +} + + +SELECT_LEX *LEX::alloc_select(bool select) +{ + SELECT_LEX *select_lex; + DBUG_ENTER("LEX::alloc_select"); + if (!(select_lex= new (thd->mem_root) SELECT_LEX())) + DBUG_RETURN(NULL); + DBUG_PRINT("info", ("Allocate select: %p #%u statement lex: %p", + select_lex, thd->lex->stmt_lex->current_select_number, + thd->lex->stmt_lex)); + /* + TODO: move following init to constructor when we get rid of builtin + select + */ + select_lex->select_number= ++thd->lex->stmt_lex->current_select_number; + select_lex->parent_lex= this; /* Used in init_query. */ + select_lex->init_query(); + if (select) + select_lex->init_select(); + select_lex->nest_level_base= &this->unit; + select_lex->include_global((st_select_lex_node**)&all_selects_list); + select_lex->context.resolve_in_select_list= TRUE; + DBUG_RETURN(select_lex); +} + +SELECT_LEX_UNIT * +LEX::create_unit(SELECT_LEX *first_sel) +{ + SELECT_LEX_UNIT *unit; + DBUG_ENTER("LEX::create_unit"); + + unit = first_sel->master_unit(); + + if (!unit && !(unit= alloc_unit())) + DBUG_RETURN(NULL); + + unit->register_select_chain(first_sel); + if (first_sel->next_select()) + { + unit->reset_distinct(); + DBUG_ASSERT(!unit->fake_select_lex); + if (unit->add_fake_select_lex(thd)) + DBUG_RETURN(NULL); + } + DBUG_RETURN(unit); +} + +SELECT_LEX_UNIT * +SELECT_LEX::attach_selects_chain(SELECT_LEX *first_sel, + Name_resolution_context *context) +{ + SELECT_LEX_UNIT *unit; + DBUG_ENTER("SELECT_LEX::attach_select_chain"); + + if (!(unit= parent_lex->alloc_unit())) + DBUG_RETURN(NULL); + + unit->register_select_chain(first_sel); + register_unit(unit, context); + if (first_sel->next_select()) + { + unit->reset_distinct(); + DBUG_ASSERT(!unit->fake_select_lex); + if (unit->add_fake_select_lex(parent_lex->thd)) + DBUG_RETURN(NULL); + } + + DBUG_RETURN(unit); +} + +SELECT_LEX * +LEX::wrap_unit_into_derived(SELECT_LEX_UNIT *unit) +{ + SELECT_LEX *wrapping_sel; + Table_ident *ti; + DBUG_ENTER("LEX::wrap_unit_into_derived"); + + if (!(wrapping_sel= alloc_select(TRUE))) + DBUG_RETURN(NULL); + Name_resolution_context *context= &wrapping_sel->context; + context->init(); + wrapping_sel->automatic_brackets= FALSE; + wrapping_sel->mark_as_unit_nest(); + wrapping_sel->register_unit(unit, context); + + /* stuff dummy SELECT * FROM (...) */ + + if (push_select(wrapping_sel)) // for Items & TABLE_LIST + DBUG_RETURN(NULL); + + /* add SELECT list*/ + { + Item *item= new (thd->mem_root) Item_field(thd, context, star_clex_str); + if (item == NULL) + goto err; + if (add_item_to_list(thd, item)) + goto err; + (wrapping_sel->with_wild)++; + } + + unit->first_select()->set_linkage(DERIVED_TABLE_TYPE); + + ti= new (thd->mem_root) Table_ident(unit); + if (ti == NULL) + goto err; + { + TABLE_LIST *table_list; + LEX_CSTRING alias; + if (wrapping_sel->make_unique_derived_name(thd, &alias)) + goto err; + + if (!(table_list= wrapping_sel->add_table_to_list(thd, ti, &alias, + 0, TL_READ, + MDL_SHARED_READ))) + goto err; + + context->resolve_in_table_list_only(table_list); + wrapping_sel->add_joined_table(table_list); + } + + pop_select(); + + derived_tables|= DERIVED_SUBQUERY; + + DBUG_RETURN(wrapping_sel); + +err: + pop_select(); + DBUG_RETURN(NULL); +} + +SELECT_LEX *LEX::wrap_select_chain_into_derived(SELECT_LEX *sel) +{ + SELECT_LEX *dummy_select; + SELECT_LEX_UNIT *unit; + Table_ident *ti; + DBUG_ENTER("LEX::wrap_select_chain_into_derived"); + + if (!(dummy_select= alloc_select(TRUE))) + DBUG_RETURN(NULL); + Name_resolution_context *context= &dummy_select->context; + dummy_select->automatic_brackets= FALSE; + sel->distinct= TRUE; // First select has not this attribute (safety) + + if (!(unit= dummy_select->attach_selects_chain(sel, context))) + DBUG_RETURN(NULL); + + /* stuff dummy SELECT * FROM (...) */ + + if (push_select(dummy_select)) // for Items & TABLE_LIST + DBUG_RETURN(NULL); + + /* add SELECT list*/ + { + Item *item= new (thd->mem_root) Item_field(thd, context, star_clex_str); + if (item == NULL) + goto err; + if (add_item_to_list(thd, item)) + goto err; + (dummy_select->with_wild)++; + } + + sel->set_linkage(DERIVED_TABLE_TYPE); + + ti= new (thd->mem_root) Table_ident(unit); + if (ti == NULL) + goto err; + { + TABLE_LIST *table_list; + LEX_CSTRING alias; + if (dummy_select->make_unique_derived_name(thd, &alias)) + goto err; + + if (!(table_list= dummy_select->add_table_to_list(thd, ti, &alias, + 0, TL_READ, + MDL_SHARED_READ))) + goto err; + + context->resolve_in_table_list_only(table_list); + dummy_select->add_joined_table(table_list); + } + + pop_select(); + + derived_tables|= DERIVED_SUBQUERY; + + DBUG_RETURN(dummy_select); + +err: + pop_select(); + DBUG_RETURN(NULL); +} + +bool LEX::push_context(Name_resolution_context *context) +{ + DBUG_ENTER("LEX::push_context"); + DBUG_PRINT("info", ("Context: %p Select: %p (%d)", + context, context->select_lex, + (context->select_lex ? + context->select_lex->select_number: + 0))); + bool res= context_stack.push_front(context, thd->mem_root); + DBUG_RETURN(res); +} + + +Name_resolution_context *LEX::pop_context() +{ + DBUG_ENTER("LEX::pop_context"); + Name_resolution_context *context= context_stack.pop(); + DBUG_PRINT("info", ("Context: %p Select: %p (%d)", + context, context->select_lex, + (context->select_lex ? + context->select_lex->select_number: + 0))); + DBUG_RETURN(context); +} + + +SELECT_LEX *LEX::create_priority_nest(SELECT_LEX *first_in_nest) +{ + DBUG_ENTER("LEX::create_priority_nest"); + DBUG_ASSERT(first_in_nest->first_nested); + enum sub_select_type wr_unit_type= first_in_nest->get_linkage(); + bool wr_distinct= first_in_nest->distinct; + SELECT_LEX *attach_to= first_in_nest->first_nested; + attach_to->cut_next(); + SELECT_LEX *wrapper= wrap_select_chain_into_derived(first_in_nest); + if (wrapper) + { + first_in_nest->first_nested= NULL; + wrapper->set_linkage_and_distinct(wr_unit_type, wr_distinct); + wrapper->first_nested= attach_to->first_nested; + wrapper->set_master_unit(attach_to->master_unit()); + attach_to->link_neighbour(wrapper); + } + DBUG_RETURN(wrapper); +} + + +/** + Checks if we need finish "automatic brackets" mode + + INTERSECT has higher priority then UNION and EXCEPT, so when it is need we + automatically create lower layer for INTERSECT (automatic brackets) and + here we check if we should return back one level up during parsing procedure. +*/ + +void LEX::check_automatic_up(enum sub_select_type type) +{ + if (type != INTERSECT_TYPE && + current_select->get_linkage() == INTERSECT_TYPE && + current_select->outer_select() && + current_select->outer_select()->automatic_brackets) + { + nest_level--; + current_select= current_select->outer_select(); + } +} + + +sp_variable *LEX::sp_param_init(LEX_CSTRING *name) +{ + if (spcont->find_variable(name, true)) + { + my_error(ER_SP_DUP_PARAM, MYF(0), name->str); + return NULL; + } + sp_variable *spvar= spcont->add_variable(thd, name); + init_last_field(&spvar->field_def, name); + return spvar; +} + + +bool LEX::sp_param_fill_definition(sp_variable *spvar, + const Lex_field_type_st &def) +{ + return + last_field->set_attributes(thd, def, COLUMN_DEFINITION_ROUTINE_PARAM) || + sphead->fill_spvar_definition(thd, last_field, &spvar->name); +} + + +bool LEX::sf_return_fill_definition(const Lex_field_type_st &def) +{ + return + last_field->set_attributes(thd, def, COLUMN_DEFINITION_FUNCTION_RETURN) || + sphead->fill_field_definition(thd, last_field); +} + + +void LEX::set_stmt_init() +{ + sql_command= SQLCOM_SET_OPTION; + init_select(); + option_type= OPT_SESSION; + autocommit= 0; + var_list.empty(); +}; + + +/** + Find a local or a package body variable by name. + @param IN name - the variable name + @param OUT ctx - NULL, if the variable was not found, + or LEX::spcont (if a local variable was found) + or the package top level context + (if a package variable was found) + @param OUT handler - NULL, if the variable was not found, + or a pointer to rcontext handler + @retval - the variable (if found), or NULL otherwise. +*/ +sp_variable * +LEX::find_variable(const LEX_CSTRING *name, + sp_pcontext **ctx, + const Sp_rcontext_handler **rh) const +{ + sp_variable *spv; + if (spcont && (spv= spcont->find_variable(name, false))) + { + *ctx= spcont; + *rh= &sp_rcontext_handler_local; + return spv; + } + sp_package *pkg= sphead ? sphead->m_parent : NULL; + if (pkg && (spv= pkg->find_package_variable(name))) + { + *ctx= pkg->get_parse_context()->child_context(0); + *rh= &sp_rcontext_handler_package_body; + return spv; + } + *ctx= NULL; + *rh= NULL; + return NULL; +} + + +static bool is_new(const char *str) +{ + return (str[0] == 'n' || str[0] == 'N') && + (str[1] == 'e' || str[1] == 'E') && + (str[2] == 'w' || str[2] == 'W'); +} + +static bool is_old(const char *str) +{ + return (str[0] == 'o' || str[0] == 'O') && + (str[1] == 'l' || str[1] == 'L') && + (str[2] == 'd' || str[2] == 'D'); +} + + +bool LEX::is_trigger_new_or_old_reference(const LEX_CSTRING *name) const +{ + // "name" is not necessarily NULL-terminated! + return sphead && sphead->m_handler->type() == SP_TYPE_TRIGGER && + name->length == 3 && (is_new(name->str) || is_old(name->str)); +} + + +void LEX::sp_variable_declarations_init(THD *thd, int nvars) +{ + sp_variable *spvar= spcont->get_last_context_variable(); + + sphead->reset_lex(thd); + spcont->declare_var_boundary(nvars); + thd->lex->init_last_field(&spvar->field_def, &spvar->name); +} + + +bool LEX::sp_variable_declarations_set_default(THD *thd, int nvars, + Item *dflt_value_item) +{ + bool has_default_clause= dflt_value_item != NULL; + if (!has_default_clause && + unlikely(!(dflt_value_item= new (thd->mem_root) Item_null(thd)))) + return true; + + sp_variable *first_spvar = NULL; + + for (uint i= 0 ; i < (uint) nvars ; i++) + { + sp_variable *spvar= spcont->get_last_context_variable((uint) nvars - 1 - i); + + if (i == 0) { + first_spvar = spvar; + } else if (has_default_clause) { + Item_splocal *item = + new (thd->mem_root) + Item_splocal(thd, &sp_rcontext_handler_local, + &first_spvar->name, first_spvar->offset, + first_spvar->type_handler(), 0, 0); + if (item == NULL) + return true; // OOM +#ifndef DBUG_OFF + item->m_sp = sphead; +#endif + dflt_value_item = item; + } + + bool last= i + 1 == (uint) nvars; + spvar->default_value= dflt_value_item; + /* The last instruction is responsible for freeing LEX. */ + sp_instr_set *is= new (thd->mem_root) + sp_instr_set(sphead->instructions(), + spcont, &sp_rcontext_handler_local, + spvar->offset, dflt_value_item, + this, last); + if (unlikely(is == NULL || sphead->add_instr(is))) + return true; + } + return false; +} + + +bool +LEX::sp_variable_declarations_copy_type_finalize(THD *thd, int nvars, + const Column_definition &ref, + Row_definition_list *fields, + Item *default_value) +{ + for (uint i= 0 ; i < (uint) nvars; i++) + { + sp_variable *spvar= spcont->get_last_context_variable((uint) nvars - 1 - i); + spvar->field_def.set_type(ref); + if (fields) + { + DBUG_ASSERT(ref.type_handler() == &type_handler_row); + spvar->field_def.set_row_field_definitions(fields); + } + spvar->field_def.field_name= spvar->name; + } + if (unlikely(sp_variable_declarations_set_default(thd, nvars, + default_value))) + return true; + spcont->declare_var_boundary(0); + return sphead->restore_lex(thd); +} + + +bool LEX::sp_variable_declarations_finalize(THD *thd, int nvars, + const Column_definition *cdef, + Item *dflt_value_item) +{ + DBUG_ASSERT(cdef); + Column_definition tmp(*cdef); + if (sphead->fill_spvar_definition(thd, &tmp)) + return true; + return sp_variable_declarations_copy_type_finalize(thd, nvars, tmp, NULL, + dflt_value_item); +} + + +bool LEX::sp_variable_declarations_row_finalize(THD *thd, int nvars, + Row_definition_list *row, + Item *dflt_value_item) +{ + DBUG_ASSERT(row); + /* + Prepare all row fields. + Note, we do it only one time outside of the below loop. + The converted list in "row" is further reused by all variable + declarations processed by the current call. + Example: + DECLARE + a, b, c ROW(x VARCHAR(10) CHARACTER SET utf8); + BEGIN + ... + END; + */ + if (sphead->row_fill_field_definitions(thd, row)) + return true; + + for (uint i= 0 ; i < (uint) nvars ; i++) + { + sp_variable *spvar= spcont->get_last_context_variable((uint) nvars - 1 - i); + spvar->field_def.set_row_field_definitions(row); + if (sphead->fill_spvar_definition(thd, &spvar->field_def, &spvar->name)) + return true; + } + + if (sp_variable_declarations_set_default(thd, nvars, dflt_value_item)) + return true; + spcont->declare_var_boundary(0); + return sphead->restore_lex(thd); +} + + +/** + Finalize a %ROWTYPE declaration, e.g.: + DECLARE a,b,c,d t1%ROWTYPE := ROW(1,2,3); + + @param thd - the current thd + @param nvars - the number of variables in the declaration + @param ref - the table or cursor name (see comments below) + @param def - the default value, e.g., ROW(1,2,3), or NULL (no default). +*/ +bool +LEX::sp_variable_declarations_rowtype_finalize(THD *thd, int nvars, + Qualified_column_ident *ref, + Item *def) +{ + uint coffp; + const sp_pcursor *pcursor= ref->table.str && ref->db.str ? NULL : + spcont->find_cursor(&ref->m_column, &coffp, + false); + if (pcursor) + return sp_variable_declarations_cursor_rowtype_finalize(thd, nvars, + coffp, def); + /* + When parsing a qualified identifier chain, the parser does not know yet + if it's going to be a qualified column name (for %TYPE), + or a qualified table name (for %ROWTYPE). So it collects the chain + into Qualified_column_ident. + Now we know that it was actually a qualified table name (%ROWTYPE). + Create a new Table_ident from Qualified_column_ident, + shifting fields as follows: + - ref->m_column becomes table_ref->table + - ref->table becomes table_ref->db + */ + return sp_variable_declarations_table_rowtype_finalize(thd, nvars, + ref->table, + ref->m_column, + def); +} + + +bool +LEX::sp_variable_declarations_table_rowtype_finalize(THD *thd, int nvars, + const LEX_CSTRING &db, + const LEX_CSTRING &table, + Item *def) +{ + Table_ident *table_ref; + if (unlikely(!(table_ref= + new (thd->mem_root) Table_ident(thd, &db, &table, false)))) + return true; + // Loop through all variables in the same declaration + for (uint i= 0 ; i < (uint) nvars; i++) + { + sp_variable *spvar= spcont->get_last_context_variable((uint) nvars - 1 - i); + spvar->field_def.set_table_rowtype_ref(table_ref); + sphead->fill_spvar_definition(thd, &spvar->field_def, &spvar->name); + } + if (sp_variable_declarations_set_default(thd, nvars, def)) + return true; + // Make sure sp_rcontext is created using the invoker security context: + sphead->m_flags|= sp_head::HAS_COLUMN_TYPE_REFS; + spcont->declare_var_boundary(0); + return sphead->restore_lex(thd); +} + + +bool +LEX::sp_variable_declarations_cursor_rowtype_finalize(THD *thd, int nvars, + uint offset, + Item *def) +{ + const sp_pcursor *pcursor= spcont->find_cursor(offset); + + // Loop through all variables in the same declaration + for (uint i= 0 ; i < (uint) nvars; i++) + { + sp_variable *spvar= spcont->get_last_context_variable((uint) nvars - 1 - i); + + spvar->field_def.set_cursor_rowtype_ref(offset); + sp_instr_cursor_copy_struct *instr= + new (thd->mem_root) sp_instr_cursor_copy_struct(sphead->instructions(), + spcont, offset, + pcursor->lex(), + spvar->offset); + if (instr == NULL || sphead->add_instr(instr)) + return true; + + sphead->fill_spvar_definition(thd, &spvar->field_def, &spvar->name); + } + if (unlikely(sp_variable_declarations_set_default(thd, nvars, def))) + return true; + // Make sure sp_rcontext is created using the invoker security context: + sphead->m_flags|= sp_head::HAS_COLUMN_TYPE_REFS; + spcont->declare_var_boundary(0); + return sphead->restore_lex(thd); +} + + +/* + Add declarations for table column and SP variable anchor types: + - DECLARE spvar1 TYPE OF db1.table1.column1; + - DECLARE spvar1 TYPE OF table1.column1; + - DECLARE spvar1 TYPE OF spvar0; +*/ +bool +LEX::sp_variable_declarations_with_ref_finalize(THD *thd, int nvars, + Qualified_column_ident *ref, + Item *def) +{ + return ref->db.length == 0 && ref->table.length == 0 ? + sp_variable_declarations_vartype_finalize(thd, nvars, ref->m_column, def) : + sp_variable_declarations_column_type_finalize(thd, nvars, ref, def); +} + + +bool +LEX::sp_variable_declarations_column_type_finalize(THD *thd, int nvars, + Qualified_column_ident *ref, + Item *def) +{ + for (uint i= 0 ; i < (uint) nvars; i++) + { + sp_variable *spvar= spcont->get_last_context_variable((uint) nvars - 1 - i); + spvar->field_def.set_column_type_ref(ref); + spvar->field_def.field_name= spvar->name; + } + sphead->m_flags|= sp_head::HAS_COLUMN_TYPE_REFS; + if (sp_variable_declarations_set_default(thd, nvars, def)) + return true; + spcont->declare_var_boundary(0); + return sphead->restore_lex(thd); +} + + +bool +LEX::sp_variable_declarations_vartype_finalize(THD *thd, int nvars, + const LEX_CSTRING &ref, + Item *default_value) +{ + sp_variable *t; + if (!spcont || !(t= spcont->find_variable(&ref, false))) + { + my_error(ER_SP_UNDECLARED_VAR, MYF(0), ref.str); + return true; + } + + if (t->field_def.is_cursor_rowtype_ref()) + { + uint offset= t->field_def.cursor_rowtype_offset(); + return sp_variable_declarations_cursor_rowtype_finalize(thd, nvars, + offset, + default_value); + } + + if (t->field_def.is_column_type_ref()) + { + Qualified_column_ident *tmp= t->field_def.column_type_ref(); + return sp_variable_declarations_column_type_finalize(thd, nvars, tmp, + default_value); + } + + if (t->field_def.is_table_rowtype_ref()) + { + const Table_ident *tmp= t->field_def.table_rowtype_ref(); + return sp_variable_declarations_table_rowtype_finalize(thd, nvars, + tmp->db, + tmp->table, + default_value); + } + + // A reference to a scalar or a row variable with an explicit data type + return sp_variable_declarations_copy_type_finalize(thd, nvars, + t->field_def, + t->field_def. + row_field_definitions(), + default_value); +} + + +/********************************************************************** + The FOR LOOP statement + + This syntax: + FOR i IN lower_bound .. upper_bound + LOOP + statements; + END LOOP; + + is translated into: + + DECLARE + i INT := lower_bound; + j INT := upper_bound; + BEGIN + WHILE i <= j + LOOP + statements; + i:= i + 1; + END LOOP; + END; +*/ + + +sp_variable *LEX::sp_add_for_loop_variable(THD *thd, const LEX_CSTRING *name, + Item *value) +{ + sp_variable *spvar= spcont->add_variable(thd, name); + spcont->declare_var_boundary(1); + spvar->field_def.field_name= spvar->name; + spvar->field_def.set_handler(&type_handler_slonglong); + type_handler_slonglong.Column_definition_prepare_stage2(&spvar->field_def, + NULL, HA_CAN_GEOMETRY); + if (!value && unlikely(!(value= new (thd->mem_root) Item_null(thd)))) + return NULL; + + spvar->default_value= value; + sp_instr_set *is= new (thd->mem_root) + sp_instr_set(sphead->instructions(), + spcont, &sp_rcontext_handler_local, + spvar->offset, value, + this, true); + if (unlikely(is == NULL || sphead->add_instr(is))) + return NULL; + spcont->declare_var_boundary(0); + return spvar; +} + + +bool LEX::sp_for_loop_implicit_cursor_statement(THD *thd, + Lex_for_loop_bounds_st *bounds, + sp_lex_cursor *cur) +{ + Item *item; + DBUG_ASSERT(sphead); + LEX_CSTRING name= {STRING_WITH_LEN("[implicit_cursor]") }; + if (sp_declare_cursor(thd, &name, cur, NULL, true)) + return true; + DBUG_ASSERT(thd->lex == this); + if (unlikely(!(bounds->m_index= + new (thd->mem_root) sp_assignment_lex(thd, this)))) + return true; + sphead->reset_lex(thd, bounds->m_index); + DBUG_ASSERT(thd->lex != this); + /* + We pass NULL as Name_resolution_context here. + It's OK, fix_fields() will not be called for this Item_field created. + Item_field is only needed for LEX::sp_for_loop_cursor_declarations() + and is used to transfer the loop index variable name, "rec" in this example: + FOR rec IN (SELECT * FROM t1) + DO + SELECT rec.a, rec.b; + END FOR; + */ + if (!(item= new (thd->mem_root) Item_field(thd, NULL, name))) + return true; + bounds->m_index->set_item_and_free_list(item, NULL); + if (thd->lex->sphead->restore_lex(thd)) + return true; + DBUG_ASSERT(thd->lex == this); + bounds->m_direction= 1; + bounds->m_target_bound= NULL; + bounds->m_implicit_cursor= true; + return false; +} + +sp_variable * +LEX::sp_add_for_loop_cursor_variable(THD *thd, + const LEX_CSTRING *name, + const sp_pcursor *pcursor, + uint coffset, + sp_assignment_lex *param_lex, + Item_args *parameters) +{ + sp_variable *spvar= spcont->add_variable(thd, name); + if (!spvar) + return NULL; + spcont->declare_var_boundary(1); + sphead->fill_spvar_definition(thd, &spvar->field_def, &spvar->name); + if (unlikely(!(spvar->default_value= new (thd->mem_root) Item_null(thd)))) + return NULL; + + spvar->field_def.set_cursor_rowtype_ref(coffset); + + if (unlikely(sphead->add_for_loop_open_cursor(thd, spcont, spvar, pcursor, + coffset, + param_lex, parameters))) + return NULL; + + spcont->declare_var_boundary(0); + return spvar; +} + + +/** + Generate a code for a FOR loop condition: + - Make Item_splocal for the FOR loop index variable + - Make Item_splocal for the FOR loop upper bound variable + - Make a comparison function item on top of these two variables +*/ +bool LEX::sp_for_loop_condition(THD *thd, const Lex_for_loop_st &loop) +{ + Item_splocal *args[2]; + for (uint i= 0 ; i < 2; i++) + { + sp_variable *src= i == 0 ? loop.m_index : loop.m_target_bound; + args[i]= new (thd->mem_root) + Item_splocal(thd, &sp_rcontext_handler_local, + &src->name, src->offset, src->type_handler()); + if (unlikely(args[i] == NULL)) + return true; +#ifdef DBUG_ASSERT_EXISTS + args[i]->m_sp= sphead; +#endif + } + + Item *expr= loop.m_direction > 0 ? + (Item *) new (thd->mem_root) Item_func_le(thd, args[0], args[1]) : + (Item *) new (thd->mem_root) Item_func_ge(thd, args[0], args[1]); + return unlikely(!expr) || unlikely(sp_while_loop_expression(thd, expr)); +} + + +/** + Generate the FOR LOOP condition code in its own lex +*/ +bool LEX::sp_for_loop_intrange_condition_test(THD *thd, + const Lex_for_loop_st &loop) +{ + spcont->set_for_loop(loop); + sphead->reset_lex(thd); + if (unlikely(thd->lex->sp_for_loop_condition(thd, loop))) + return true; + return thd->lex->sphead->restore_lex(thd); +} + + +bool LEX::sp_for_loop_cursor_condition_test(THD *thd, + const Lex_for_loop_st &loop) +{ + const LEX_CSTRING *cursor_name; + Item *expr; + spcont->set_for_loop(loop); + sphead->reset_lex(thd); + cursor_name= spcont->find_cursor(loop.m_cursor_offset); + DBUG_ASSERT(cursor_name); + if (unlikely(!(expr= + new (thd->mem_root) + Item_func_cursor_found(thd, cursor_name, + loop.m_cursor_offset)))) + return true; + if (thd->lex->sp_while_loop_expression(thd, expr)) + return true; + return thd->lex->sphead->restore_lex(thd); +} + + +bool LEX::sp_for_loop_intrange_declarations(THD *thd, Lex_for_loop_st *loop, + const LEX_CSTRING *index, + const Lex_for_loop_bounds_st &bounds) +{ + Item *item; + if ((item= bounds.m_index->get_item())->type() == Item::FIELD_ITEM) + { + // We're here is the lower bound is unknown identifier + my_error(ER_SP_UNDECLARED_VAR, MYF(0), item->full_name()); + return true; + } + if ((item= bounds.m_target_bound->get_item())->type() == Item::FIELD_ITEM) + { + // We're here is the upper bound is unknown identifier + my_error(ER_SP_UNDECLARED_VAR, MYF(0), item->full_name()); + return true; + } + if (!(loop->m_index= + bounds.m_index->sp_add_for_loop_variable(thd, index, + bounds.m_index->get_item()))) + return true; + if (unlikely(!(loop->m_target_bound= + bounds.m_target_bound-> + sp_add_for_loop_target_bound(thd, + bounds. + m_target_bound->get_item())))) + return true; + loop->m_direction= bounds.m_direction; + loop->m_implicit_cursor= 0; + return false; +} + + +bool LEX::sp_for_loop_cursor_declarations(THD *thd, + Lex_for_loop_st *loop, + const LEX_CSTRING *index, + const Lex_for_loop_bounds_st &bounds) +{ + Item *item= bounds.m_index->get_item(); + Item_splocal *item_splocal; + Item_field *item_field; + Item_func_sp *item_func_sp= NULL; + LEX_CSTRING name; + uint coffs, param_count= 0; + const sp_pcursor *pcursor; + DBUG_ENTER("LEX::sp_for_loop_cursor_declarations"); + + if ((item_splocal= item->get_item_splocal())) + name= item_splocal->m_name; + else if ((item_field= item->type() == Item::FIELD_ITEM ? + static_cast(item) : NULL) && + item_field->table_name.str == NULL) + name= item_field->field_name; + else if (item->type() == Item::FUNC_ITEM && + static_cast(item)->functype() == Item_func::FUNC_SP && + !static_cast(item)->get_sp_name()->m_explicit_name) + { + /* + When a FOR LOOP for a cursor with parameters is parsed: + FOR index IN cursor(1,2,3) LOOP + statements; + END LOOP; + the parser scans "cursor(1,2,3)" using the "expr" rule, + so it thinks that cursor(1,2,3) is a stored function call. + It's not easy to implement this without using "expr" because + of grammar conflicts. + As a side effect, the Item_func_sp and its arguments in the parentheses + belong to the same LEX. This is different from an explicit + "OPEN cursor(1,2,3)" where every expression belongs to a separate LEX. + */ + item_func_sp= static_cast(item); + name= item_func_sp->get_sp_name()->m_name; + param_count= item_func_sp->argument_count(); + } + else + { + thd->parse_error(); + DBUG_RETURN(true); + } + if (unlikely(!(pcursor= spcont->find_cursor_with_error(&name, &coffs, + false)) || + pcursor->check_param_count_with_error(param_count))) + DBUG_RETURN(true); + + if (!(loop->m_index= sp_add_for_loop_cursor_variable(thd, index, + pcursor, coffs, + bounds.m_index, + item_func_sp))) + DBUG_RETURN(true); + loop->m_target_bound= NULL; + loop->m_direction= bounds.m_direction; + loop->m_cursor_offset= coffs; + loop->m_implicit_cursor= bounds.m_implicit_cursor; + DBUG_RETURN(false); +} + + +/** + Generate a code for a FOR loop index increment +*/ +bool LEX::sp_for_loop_increment(THD *thd, const Lex_for_loop_st &loop) +{ + Item_splocal *splocal= new (thd->mem_root) + Item_splocal(thd, &sp_rcontext_handler_local, + &loop.m_index->name, loop.m_index->offset, + loop.m_index->type_handler()); + if (unlikely(splocal == NULL)) + return true; +#ifdef DBUG_ASSERT_EXISTS + splocal->m_sp= sphead; +#endif + Item_int *inc= new (thd->mem_root) Item_int(thd, loop.m_direction); + if (unlikely(!inc)) + return true; + Item *expr= new (thd->mem_root) Item_func_plus(thd, splocal, inc); + if (unlikely(!expr) || + unlikely(sphead->set_local_variable(thd, spcont, + &sp_rcontext_handler_local, + loop.m_index, expr, this, true))) + return true; + return false; +} + + +bool LEX::sp_for_loop_intrange_iterate(THD *thd, const Lex_for_loop_st &loop) +{ + sphead->reset_lex(thd); + + // Generate FOR LOOP index increment in its own lex + DBUG_ASSERT(this != thd->lex); + if (unlikely(thd->lex->sp_for_loop_increment(thd, loop) || + thd->lex->sphead->restore_lex(thd))) + return true; + + return false; +} + + +bool LEX::sp_for_loop_cursor_iterate(THD *thd, const Lex_for_loop_st &loop) +{ + sp_instr_cfetch *instr= + new (thd->mem_root) sp_instr_cfetch(sphead->instructions(), + spcont, loop.m_cursor_offset, false); + if (unlikely(instr == NULL) || unlikely(sphead->add_instr(instr))) + return true; + instr->add_to_varlist(loop.m_index); + return false; +} + + +bool LEX::sp_for_loop_outer_block_finalize(THD *thd, + const Lex_for_loop_st &loop) +{ + Lex_spblock tmp; + tmp.curs= MY_TEST(loop.m_implicit_cursor); + if (unlikely(sp_block_finalize(thd, tmp))) // The outer DECLARE..BEGIN..END + return true; + if (!loop.is_for_loop_explicit_cursor()) + return false; + /* + Explicit cursor FOR loop must close the cursor automatically. + Note, implicit cursor FOR loop does not need to close the cursor, + it's closed by sp_instr_cpop. + */ + sp_instr_cclose *ic= new (thd->mem_root) + sp_instr_cclose(sphead->instructions(), spcont, + loop.m_cursor_offset); + return ic == NULL || sphead->add_instr(ic); +} + +/***************************************************************************/ + +bool LEX::sp_declare_cursor(THD *thd, const LEX_CSTRING *name, + sp_lex_cursor *cursor_stmt, + sp_pcontext *param_ctx, bool add_cpush_instr) +{ + uint offp; + sp_instr_cpush *i; + + /* In some cases param_ctx can be NULL. e.g.: FOR rec IN (SELECT...) */ + if (param_ctx) + { + for (uint prm= 0; prm < param_ctx->context_var_count(); prm++) + { + const sp_variable *param= param_ctx->get_context_variable(prm); + if (param->mode != sp_variable::MODE_IN) + { + /* + PL/SQL supports the IN keyword in cursor parameters. + We also support this for compatibility. Note, OUT/INOUT parameters + will unlikely be ever supported. So "YET" may sound confusing here. + But it should be better than using a generic error. Adding a dedicated + error message for this small issue is not desirable. + */ + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "OUT/INOUT cursor parameter"); + return true; + } + } + } + + if (spcont->find_cursor(name, &offp, true)) + { + my_error(ER_SP_DUP_CURS, MYF(0), name->str); + return true; + } + + if (unlikely(spcont->add_cursor(name, param_ctx, cursor_stmt))) + return true; + + if (add_cpush_instr) + { + i= new (thd->mem_root) + sp_instr_cpush(sphead->instructions(), spcont, cursor_stmt, + spcont->current_cursor_count() - 1); + return unlikely(i == NULL) || unlikely(sphead->add_instr(i)); + } + return false; +} + + +/** + Generate an SP code for an "OPEN cursor_name" statement. + @param thd + @param name - Name of the cursor + @param parameters - Cursor parameters, e.g. OPEN c(1,2,3) + @returns - false on success, true on error +*/ +bool LEX::sp_open_cursor(THD *thd, const LEX_CSTRING *name, + List *parameters) +{ + uint offset; + const sp_pcursor *pcursor; + uint param_count= parameters ? parameters->elements : 0; + return !(pcursor= spcont->find_cursor_with_error(name, &offset, false)) || + pcursor->check_param_count_with_error(param_count) || + sphead->add_open_cursor(thd, spcont, offset, + pcursor->param_context(), parameters); +} + + +bool LEX::sp_handler_declaration_init(THD *thd, int type) +{ + sp_handler *h= spcont->add_handler(thd, (sp_handler::enum_type) type); + + spcont= spcont->push_context(thd, sp_pcontext::HANDLER_SCOPE); + + sp_instr_hpush_jump *i= + new (thd->mem_root) sp_instr_hpush_jump(sphead->instructions(), spcont, h); + + if (unlikely(i == NULL) || unlikely(sphead->add_instr(i))) + return true; + + /* For continue handlers, mark end of handler scope. */ + if (type == sp_handler::CONTINUE && + unlikely(sphead->push_backpatch(thd, i, spcont->last_label()))) + return true; + + if (unlikely(sphead->push_backpatch(thd, i, + spcont->push_label(thd, &empty_clex_str, + 0)))) + return true; + + return false; +} + + +bool LEX::sp_handler_declaration_finalize(THD *thd, int type) +{ + sp_label *hlab= spcont->pop_label(); /* After this hdlr */ + sp_instr_hreturn *i; + + if (type == sp_handler::CONTINUE) + { + i= new (thd->mem_root) sp_instr_hreturn(sphead->instructions(), spcont); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i))) + return true; + } + else + { /* EXIT or UNDO handler, just jump to the end of the block */ + i= new (thd->mem_root) sp_instr_hreturn(sphead->instructions(), spcont); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i)) || + unlikely(sphead->push_backpatch(thd, i, spcont->last_label()))) /* Block end */ + return true; + } + sphead->backpatch(hlab); + spcont= spcont->pop_context(); + return false; +} + + +void LEX::sp_block_init(THD *thd, const LEX_CSTRING *label) +{ + spcont->push_label(thd, label, sphead->instructions(), sp_label::BEGIN); + spcont= spcont->push_context(thd, sp_pcontext::REGULAR_SCOPE); +} + + +bool LEX::sp_block_finalize(THD *thd, const Lex_spblock_st spblock, + class sp_label **splabel) +{ + sp_head *sp= sphead; + sp_pcontext *ctx= spcont; + sp_instr *i; + + sp->backpatch(ctx->last_label()); /* We always have a label */ + if (spblock.hndlrs) + { + i= new (thd->mem_root) + sp_instr_hpop(sp->instructions(), ctx, spblock.hndlrs); + if (unlikely(i == NULL) || + unlikely(sp->add_instr(i))) + return true; + } + if (spblock.curs) + { + i= new (thd->mem_root) + sp_instr_cpop(sp->instructions(), ctx, spblock.curs); + if (unlikely(i == NULL) || + unlikely(sp->add_instr(i))) + return true; + } + spcont= ctx->pop_context(); + *splabel= spcont->pop_label(); + return false; +} + + +bool LEX::sp_block_finalize(THD *thd, const Lex_spblock_st spblock, + const LEX_CSTRING *end_label) +{ + sp_label *splabel; + if (unlikely(sp_block_finalize(thd, spblock, &splabel))) + return true; + if (unlikely(end_label->str && + lex_string_cmp(system_charset_info, + end_label, &splabel->name) != 0)) + { + my_error(ER_SP_LABEL_MISMATCH, MYF(0), end_label->str); + return true; + } + return false; +} + + +sp_name *LEX::make_sp_name(THD *thd, const LEX_CSTRING *name) +{ + sp_name *res; + LEX_CSTRING db; + if (unlikely(check_routine_name(name)) || + unlikely(copy_db_to(&db)) || + unlikely((!(res= new (thd->mem_root) sp_name(&db, name, false))))) + return NULL; + return res; +} + + +/** + When a package routine name is stored in memory in Database_qualified_name, + the dot character is used to delimit package name from the routine name, + e.g.: + m_db= 'test'; -- database 'test' + m_name= 'p1.p1'; -- package 'p1', routine 'p1' + See database_qualified_name::make_package_routine_name() for details. + Disallow package routine names with dots, + to avoid ambiguity when interpreting m_name='p1.p1.p1', between: + a. package 'p1.p1' + routine 'p1' + b. package 'p1' + routine 'p1.p1' + m_name='p1.p1.p1' will always mean (a). +*/ +sp_name *LEX::make_sp_name_package_routine(THD *thd, const LEX_CSTRING *name) +{ + sp_name *res= make_sp_name(thd, name); + if (likely(res) && unlikely(strchr(res->m_name.str, '.'))) + { + my_error(ER_SP_WRONG_NAME, MYF(0), res->m_name.str); + res= NULL; + } + return res; +} + + +sp_name *LEX::make_sp_name(THD *thd, const LEX_CSTRING *name1, + const LEX_CSTRING *name2) +{ + sp_name *res; + LEX_CSTRING norm_name1; + if (unlikely(!name1->str) || + unlikely(!thd->make_lex_string(&norm_name1, name1->str, + name1->length)) || + unlikely(check_db_name((LEX_STRING *) &norm_name1))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), name1->str); + return NULL; + } + if (unlikely(check_routine_name(name2)) || + unlikely(!(res= new (thd->mem_root) sp_name(&norm_name1, name2, true)))) + return NULL; + return res; +} + + +sp_head *LEX::make_sp_head(THD *thd, const sp_name *name, + const Sp_handler *sph, + enum_sp_aggregate_type agg_type) +{ + sp_package *package= get_sp_package(); + sp_head *sp; + + /* Order is important here: new - reset - init */ + if (likely((sp= sp_head::create(package, sph, agg_type)))) + { + sp->reset_thd_mem_root(thd); + sp->init(this); + if (name) + { + if (package) + sp->make_package_routine_name(sp->get_main_mem_root(), + package->m_db, + package->m_name, + name->m_name); + else + sp->init_sp_name(name); + sp->make_qname(sp->get_main_mem_root(), &sp->m_qname); + } + sphead= sp; + } + sp_chistics.init(); + return sp; +} + + +sp_head *LEX::make_sp_head_no_recursive(THD *thd, const sp_name *name, + const Sp_handler *sph, + enum_sp_aggregate_type agg_type) +{ + sp_package *package= thd->lex->get_sp_package(); + /* + Sp_handler::sp_clone_and_link_routine() generates a standalone-alike + statement to clone package routines for recursion, e.g.: + CREATE PROCEDURE p1 AS BEGIN NULL; END; + Translate a standalone routine handler to the corresponding + package routine handler if we're cloning a package routine, e.g.: + sp_handler_procedure -> sp_handler_package_procedure + sp_handler_function -> sp_handler_package_function + */ + if (package && package->m_is_cloning_routine) + sph= sph->package_routine_handler(); + if (!sphead || + (package && + (sph == &sp_handler_package_procedure || + sph == &sp_handler_package_function))) + return make_sp_head(thd, name, sph, agg_type); + my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), sph->type_str()); + return NULL; +} + + +bool LEX::sp_body_finalize_routine(THD *thd) +{ + if (sphead->check_unresolved_goto()) + return true; + sphead->set_stmt_end(thd, thd->m_parser_state->m_lip.get_cpp_tok_start()); + sphead->restore_thd_mem_root(thd); + return false; +} + + +bool LEX::sp_body_finalize_procedure(THD *thd) +{ + return sphead->check_group_aggregate_instructions_forbid() || + sp_body_finalize_routine(thd); +} + + +bool LEX::sp_body_finalize_procedure_standalone(THD *thd, + const sp_name *end_name) +{ + return sp_body_finalize_procedure(thd) || + sphead->check_standalone_routine_end_name(end_name); +} + + +bool LEX::sp_body_finalize_function(THD *thd) +{ + if (sphead->is_not_allowed_in_function("function") || + sphead->check_group_aggregate_instructions_function()) + return true; + if (!(sphead->m_flags & sp_head::HAS_RETURN)) + { + my_error(ER_SP_NORETURN, MYF(0), ErrConvDQName(sphead).ptr()); + return true; + } + if (sp_body_finalize_routine(thd)) + return true; + (void) is_native_function_with_warn(thd, &sphead->m_name); + return false; +} + + +bool LEX::sp_body_finalize_trigger(THD *thd) +{ + return sphead->is_not_allowed_in_function("trigger") || + sp_body_finalize_procedure(thd); +} + + +bool LEX::sp_body_finalize_event(THD *thd) +{ + event_parse_data->body_changed= true; + return sp_body_finalize_procedure(thd); +} + + +bool LEX::stmt_create_stored_function_finalize_standalone(const sp_name *end_name) +{ + if (sphead->check_standalone_routine_end_name(end_name)) + return true; + stmt_create_routine_finalize(); + return false; +} + + +bool LEX::sp_block_with_exceptions_finalize_declarations(THD *thd) +{ + /* + [ DECLARE declarations ] + BEGIN executable_section + [ EXCEPTION exceptions ] + END + + We are now at the "BEGIN" keyword. + We have collected all declarations, including DECLARE HANDLER directives. + But there will be possibly more handlers in the EXCEPTION section. + + Generate a forward jump from the end of the DECLARE section to the + beginning of the EXCEPTION section, over the executable section. + */ + return sphead->add_instr_jump(thd, spcont); +} + + +bool +LEX::sp_block_with_exceptions_finalize_executable_section(THD *thd, + uint executable_section_ip) +{ + /* + We're now at the end of "executable_section" of the block, + near the "EXCEPTION" or the "END" keyword. + Generate a jump to the END of the block over the EXCEPTION section. + */ + if (sphead->add_instr_jump_forward_with_backpatch(thd, spcont)) + return true; + /* + Set the destination for the jump that we added in + sp_block_with_exceptions_finalize_declarations(). + */ + sp_instr *instr= sphead->get_instr(executable_section_ip - 1); + instr->backpatch(sphead->instructions(), spcont); + return false; +} + + +bool +LEX::sp_block_with_exceptions_finalize_exceptions(THD *thd, + uint executable_section_ip, + uint exception_count) +{ + if (!exception_count) + { + /* + The jump from the end of DECLARE section to + the beginning of the EXCEPTION section that we added in + sp_block_with_exceptions_finalize_declarations() is useless + if there were no exceptions. + Replace it to "no operation". + */ + return sphead->replace_instr_to_nop(thd, executable_section_ip - 1); + } + /* + Generate a jump from the end of the EXCEPTION code + to the executable section. + */ + return sphead->add_instr_jump(thd, spcont, executable_section_ip); +} + + +bool LEX::sp_block_with_exceptions_add_empty(THD *thd) +{ + uint ip= sphead->instructions(); + return sp_block_with_exceptions_finalize_executable_section(thd, ip) || + sp_block_with_exceptions_finalize_exceptions(thd, ip, 0); +} + + +bool LEX::sp_change_context(THD *thd, const sp_pcontext *ctx, bool exclusive) +{ + uint n; + uint ip= sphead->instructions(); + if ((n= spcont->diff_handlers(ctx, exclusive))) + { + sp_instr_hpop *hpop= new (thd->mem_root) sp_instr_hpop(ip++, spcont, n); + if (unlikely(hpop == NULL) || unlikely(sphead->add_instr(hpop))) + return true; + } + if ((n= spcont->diff_cursors(ctx, exclusive))) + { + sp_instr_cpop *cpop= new (thd->mem_root) sp_instr_cpop(ip++, spcont, n); + if (unlikely(cpop == NULL) || unlikely(sphead->add_instr(cpop))) + return true; + } + return false; +} + + +bool LEX::sp_leave_statement(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_label(label_name); + if (unlikely(!lab)) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "LEAVE", label_name->str); + return true; + } + return sp_exit_block(thd, lab, NULL); +} + +bool LEX::sp_goto_statement(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_goto_label(label_name); + if (!lab || lab->ip == 0) + { + sp_label *delayedlabel; + if (!lab) + { + // Label not found --> add forward jump to an unknown label + spcont->push_goto_label(thd, label_name, 0, sp_label::GOTO); + delayedlabel= spcont->last_goto_label(); + } + else + { + delayedlabel= lab; + } + return sphead->push_backpatch_goto(thd, spcont, delayedlabel); + } + else + { + // Label found (backward goto) + return sp_change_context(thd, lab->ctx, false) || + sphead->add_instr_jump(thd, spcont, lab->ip); /* Jump back */ + } + return false; +} + +bool LEX::sp_push_goto_label(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_goto_label(label_name, false); + if (lab) + { + if (unlikely(lab->ip != 0)) + { + my_error(ER_SP_LABEL_REDEFINE, MYF(0), label_name->str); + return true; + } + lab->ip= sphead->instructions(); + + sp_label *beginblocklabel= spcont->find_label(&empty_clex_str); + sphead->backpatch_goto(thd, lab, beginblocklabel); + } + else + { + spcont->push_goto_label(thd, label_name, sphead->instructions()); + } + return false; +} + +bool LEX::sp_exit_block(THD *thd, sp_label *lab) +{ + /* + When jumping to a BEGIN-END block end, the target jump + points to the block hpop/cpop cleanup instructions, + so we should exclude the block context here. + When jumping to something else (i.e., SP_LAB_ITER), + there are no hpop/cpop at the jump destination, + so we should include the block context here for cleanup. + */ + bool exclusive= (lab->type == sp_label::BEGIN); + return sp_change_context(thd, lab->ctx, exclusive) || + sphead->add_instr_jump_forward_with_backpatch(thd, spcont, lab); +} + + +bool LEX::sp_exit_block(THD *thd, sp_label *lab, Item *when) +{ + if (!when) + return sp_exit_block(thd, lab); + + DBUG_ASSERT(sphead == thd->lex->sphead); + DBUG_ASSERT(spcont == thd->lex->spcont); + sp_instr_jump_if_not *i= new (thd->mem_root) + sp_instr_jump_if_not(sphead->instructions(), + spcont, + when, this); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i)) || + unlikely(sp_exit_block(thd, lab))) + return true; + i->backpatch(sphead->instructions(), spcont); + return false; +} + + +bool LEX::sp_exit_statement(THD *thd, Item *item) +{ + sp_label *lab= spcont->find_label_current_loop_start(); + if (unlikely(!lab)) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "EXIT", ""); + return true; + } + DBUG_ASSERT(lab->type == sp_label::ITERATION); + return sp_exit_block(thd, lab, item); +} + + +bool LEX::sp_exit_statement(THD *thd, const LEX_CSTRING *label_name, Item *item) +{ + sp_label *lab= spcont->find_label(label_name); + if (unlikely(!lab || lab->type != sp_label::ITERATION)) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "EXIT", label_name->str); + return true; + } + return sp_exit_block(thd, lab, item); +} + + +bool LEX::sp_iterate_statement(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_label(label_name); + if (unlikely(!lab || lab->type != sp_label::ITERATION)) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "ITERATE", label_name->str); + return true; + } + return sp_continue_loop(thd, lab); +} + + +bool LEX::sp_continue_loop(THD *thd, sp_label *lab) +{ + const sp_pcontext::Lex_for_loop &for_loop= lab->ctx->for_loop(); + /* + FOR loops need some additional instructions (e.g. an integer increment or + a cursor fetch) before the "jump to the start of the body" instruction. + We need to check two things here: + - If we're in a FOR loop at all. + - If the label pointed by "lab" belongs exactly to the nearest FOR loop, + rather than to a nested LOOP/WHILE/REPEAT inside the FOR. + */ + if (for_loop.m_index /* we're in some FOR loop */ && + for_loop.m_start_label == lab /* lab belongs to the FOR loop */) + { + // We're in a FOR loop, and "ITERATE loop_label" belongs to this FOR loop. + if (for_loop.is_for_loop_cursor() ? + sp_for_loop_cursor_iterate(thd, for_loop) : + sp_for_loop_intrange_iterate(thd, for_loop)) + return true; + } + return sp_change_context(thd, lab->ctx, false) || + sphead->add_instr_jump(thd, spcont, lab->ip); /* Jump back */ +} + + +bool LEX::sp_continue_statement(THD *thd) +{ + sp_label *lab= spcont->find_label_current_loop_start(); + if (unlikely(!lab)) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "CONTINUE", ""); + return true; + } + DBUG_ASSERT(lab->type == sp_label::ITERATION); + return sp_continue_loop(thd, lab); +} + + +bool LEX::sp_continue_statement(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_label(label_name); + if (!lab || lab->type != sp_label::ITERATION) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "CONTINUE", label_name->str); + return true; + } + return sp_continue_loop(thd, lab); +} + + +bool LEX::sp_continue_loop(THD *thd, sp_label *lab, Item *when) +{ + DBUG_ASSERT(when); + DBUG_ASSERT(sphead == thd->lex->sphead); + DBUG_ASSERT(spcont == thd->lex->spcont); + sp_instr_jump_if_not *i= new (thd->mem_root) + sp_instr_jump_if_not(sphead->instructions(), + spcont, + when, this); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i)) || + unlikely(sp_continue_loop(thd, lab))) + return true; + i->backpatch(sphead->instructions(), spcont); + return false; +} + + +bool sp_expr_lex::sp_continue_when_statement(THD *thd) +{ + sp_label *lab= spcont->find_label_current_loop_start(); + if (unlikely(!lab)) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "CONTINUE", ""); + return true; + } + DBUG_ASSERT(lab->type == sp_label::ITERATION); + return sp_continue_loop(thd, lab, get_item()); +} + + +bool sp_expr_lex::sp_continue_when_statement(THD *thd, + const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_label(label_name); + if (!lab || lab->type != sp_label::ITERATION) + { + my_error(ER_SP_LILABEL_MISMATCH, MYF(0), "CONTINUE", label_name->str); + return true; + } + return sp_continue_loop(thd, lab, get_item()); +} + + +bool LEX::maybe_start_compound_statement(THD *thd) +{ + if (!sphead) + { + if (!make_sp_head(thd, NULL, &sp_handler_procedure, DEFAULT_AGGREGATE)) + return true; + sphead->set_suid(SP_IS_NOT_SUID); + sphead->set_body_start(thd, thd->m_parser_state->m_lip.get_cpp_tok_start()); + } + return false; +} + + +bool LEX::sp_push_loop_label(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->find_label(label_name); + if (lab) + { + my_error(ER_SP_LABEL_REDEFINE, MYF(0), label_name->str); + return true; + } + spcont->push_label(thd, label_name, sphead->instructions(), + sp_label::ITERATION); + return false; +} + + +bool LEX::sp_push_loop_empty_label(THD *thd) +{ + if (maybe_start_compound_statement(thd)) + return true; + /* Unlabeled controls get an empty label. */ + spcont->push_label(thd, &empty_clex_str, sphead->instructions(), + sp_label::ITERATION); + return false; +} + + +bool LEX::sp_pop_loop_label(THD *thd, const LEX_CSTRING *label_name) +{ + sp_label *lab= spcont->pop_label(); + sphead->backpatch(lab); + if (label_name->str && + lex_string_cmp(system_charset_info, label_name, + &lab->name) != 0) + { + my_error(ER_SP_LABEL_MISMATCH, MYF(0), label_name->str); + return true; + } + return false; +} + + +void LEX::sp_pop_loop_empty_label(THD *thd) +{ + sp_label *lab= spcont->pop_label(); + sphead->backpatch(lab); + DBUG_ASSERT(lab->name.length == 0); +} + + +bool LEX::sp_while_loop_expression(THD *thd, Item *item) +{ + sp_instr_jump_if_not *i= new (thd->mem_root) + sp_instr_jump_if_not(sphead->instructions(), spcont, item, this); + return (unlikely(i == NULL) || + /* Jumping forward */ + unlikely(sphead->push_backpatch(thd, i, spcont->last_label())) || + unlikely(sphead->new_cont_backpatch(i)) || + unlikely(sphead->add_instr(i))); +} + + +bool LEX::sp_while_loop_finalize(THD *thd) +{ + sp_label *lab= spcont->last_label(); /* Jumping back */ + sp_instr_jump *i= new (thd->mem_root) + sp_instr_jump(sphead->instructions(), spcont, lab->ip); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i))) + return true; + sphead->do_cont_backpatch(); + return false; +} + + +Item *LEX::create_and_link_Item_trigger_field(THD *thd, + const LEX_CSTRING *name, + bool new_row) +{ + Item_trigger_field *trg_fld; + + if (unlikely(trg_chistics.event == TRG_EVENT_INSERT && !new_row)) + { + my_error(ER_TRG_NO_SUCH_ROW_IN_TRG, MYF(0), "OLD", "on INSERT"); + return NULL; + } + + if (unlikely(trg_chistics.event == TRG_EVENT_DELETE && new_row)) + { + my_error(ER_TRG_NO_SUCH_ROW_IN_TRG, MYF(0), "NEW", "on DELETE"); + return NULL; + } + + DBUG_ASSERT(!new_row || + (trg_chistics.event == TRG_EVENT_INSERT || + trg_chistics.event == TRG_EVENT_UPDATE)); + + const bool tmp_read_only= + !(new_row && trg_chistics.action_time == TRG_ACTION_BEFORE); + trg_fld= new (thd->mem_root) + Item_trigger_field(thd, current_context(), + new_row ? + Item_trigger_field::NEW_ROW: + Item_trigger_field::OLD_ROW, + *name, SELECT_ACL, tmp_read_only); + /* + Let us add this item to list of all Item_trigger_field objects + in trigger. + */ + if (likely(trg_fld)) + trg_table_fields.link_in_list(trg_fld, &trg_fld->next_trg_field); + + return trg_fld; +} + + +Item *LEX::make_item_colon_ident_ident(THD *thd, + const Lex_ident_cli_st *ca, + const Lex_ident_cli_st *cb) +{ + Lex_ident_sys a(thd, ca), b(thd, cb); + if (a.is_null() || b.is_null()) + return NULL; // OEM + if (!is_trigger_new_or_old_reference(&a)) + { + thd->parse_error(); + return NULL; + } + bool new_row= (a.str[0] == 'N' || a.str[0] == 'n'); + return create_and_link_Item_trigger_field(thd, &b, new_row); +} + + +Item *LEX::make_item_plsql_cursor_attr(THD *thd, const LEX_CSTRING *name, + plsql_cursor_attr_t attr) +{ + uint offset; + if (unlikely(!spcont || !spcont->find_cursor(name, &offset, false))) + { + my_error(ER_SP_CURSOR_MISMATCH, MYF(0), name->str); + return NULL; + } + switch (attr) { + case PLSQL_CURSOR_ATTR_ISOPEN: + return new (thd->mem_root) Item_func_cursor_isopen(thd, name, offset); + case PLSQL_CURSOR_ATTR_FOUND: + return new (thd->mem_root) Item_func_cursor_found(thd, name, offset); + case PLSQL_CURSOR_ATTR_NOTFOUND: + return new (thd->mem_root) Item_func_cursor_notfound(thd, name, offset); + case PLSQL_CURSOR_ATTR_ROWCOUNT: + return new (thd->mem_root) Item_func_cursor_rowcount(thd, name, offset); + } + DBUG_ASSERT(0); + return NULL; +} + + +Item *LEX::make_item_sysvar(THD *thd, + enum_var_type type, + const LEX_CSTRING *name, + const LEX_CSTRING *component) + +{ + Item *item; + DBUG_ASSERT(name->str); + /* + "SELECT @@global.global.variable" is not allowed + Note, "global" can come through TEXT_STRING_sys. + */ + if (component->str && unlikely(check_reserved_words(name))) + { + thd->parse_error(); + return NULL; + } + if (unlikely(!(item= get_system_var(thd, type, name, component)))) + return NULL; + if (!((Item_func_get_system_var*) item)->is_written_to_binlog()) + set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_VARIABLE); + return item; +} + + +static bool param_push_or_clone(THD *thd, LEX *lex, Item_param *item) +{ + return !lex->clone_spec_offset ? + lex->param_list.push_back(item, thd->mem_root) : + item->add_as_clone(thd); +} + + +Item_param *LEX::add_placeholder(THD *thd, const LEX_CSTRING *name, + const char *start, const char *end) +{ + if (unlikely(!thd->m_parser_state->m_lip.stmt_prepare_mode)) + { + thd->parse_error(ER_SYNTAX_ERROR, start); + return NULL; + } + if (unlikely(!parsing_options.allows_variable)) + { + my_error(ER_VIEW_SELECT_VARIABLE, MYF(0)); + return NULL; + } + Query_fragment pos(thd, sphead, start, end); + Item_param *item= new (thd->mem_root) Item_param(thd, name, + pos.pos(), pos.length()); + if (unlikely(!item) || unlikely(param_push_or_clone(thd, this, item))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + return item; +} + + +bool LEX::add_signal_statement(THD *thd, const sp_condition_value *v) +{ + Yacc_state *state= &thd->m_parser_state->m_yacc; + sql_command= SQLCOM_SIGNAL; + m_sql_cmd= new (thd->mem_root) Sql_cmd_signal(v, state->m_set_signal_info); + return m_sql_cmd == NULL; +} + + +bool LEX::add_resignal_statement(THD *thd, const sp_condition_value *v) +{ + Yacc_state *state= &thd->m_parser_state->m_yacc; + sql_command= SQLCOM_RESIGNAL; + m_sql_cmd= new (thd->mem_root) Sql_cmd_resignal(v, state->m_set_signal_info); + return m_sql_cmd == NULL; +} + + +/* + Make an Item when an identifier is found in the FOR loop bounds: + FOR rec IN cursor + FOR var IN var1 .. xxx + FOR var IN row1.field1 .. xxx + When we parse the first expression after the "IN" keyword, + we don't know yet if it's a cursor name, or a scalar SP variable name, + or a field of a ROW SP variable. Here we create Item_field to remember + the fully qualified name. Later sp_for_loop_cursor_declarations() + detects how to treat this name properly. +*/ +Item *LEX::create_item_for_loop_bound(THD *thd, + const LEX_CSTRING *a, + const LEX_CSTRING *b, + const LEX_CSTRING *c) +{ + /* + Pass NULL as the name resolution context. + This is OK, fix_fields() won't be called for this Item_field. + */ + return new (thd->mem_root) Item_field(thd, NULL, *a, *b, *c); +} + + +bool LEX::check_expr_allows_fields_or_error(THD *thd, const char *name) const +{ + if (select_stack_top > 0) + return false; // OK, fields are allowed + my_error(ER_BAD_FIELD_ERROR, MYF(0), name, thd->where); + return true; // Error, fields are not allowed +} + +Item *LEX::create_item_ident_nospvar(THD *thd, + const Lex_ident_sys_st *a, + const Lex_ident_sys_st *b) +{ + DBUG_ASSERT(this == thd->lex); + /* + FIXME This will work ok in simple_ident_nospvar case because + we can't meet simple_ident_nospvar in trigger now. But it + should be changed in future. + */ + if (is_trigger_new_or_old_reference(a)) + { + bool new_row= (a->str[0]=='N' || a->str[0]=='n'); + + return create_and_link_Item_trigger_field(thd, b, new_row); + } + + if (unlikely(current_select->no_table_names_allowed)) + { + my_error(ER_TABLENAME_NOT_ALLOWED_HERE, MYF(0), a->str, thd->where); + return NULL; + } + + if (current_select->parsing_place == FOR_LOOP_BOUND) + return create_item_for_loop_bound(thd, &null_clex_str, a, b); + + return create_item_ident_field(thd, Lex_ident_sys(), *a, *b); +} + + +Item_splocal *LEX::create_item_spvar_row_field(THD *thd, + const Sp_rcontext_handler *rh, + const Lex_ident_sys *a, + const Lex_ident_sys *b, + sp_variable *spv, + const char *start, + const char *end) +{ + if (unlikely(!parsing_options.allows_variable)) + { + my_error(ER_VIEW_SELECT_VARIABLE, MYF(0)); + return NULL; + } + + Query_fragment pos(thd, sphead, start, end); + Item_splocal *item; + if (spv->field_def.is_table_rowtype_ref() || + spv->field_def.is_cursor_rowtype_ref()) + { + if (unlikely(!(item= new (thd->mem_root) + Item_splocal_row_field_by_name(thd, rh, a, b, spv->offset, + &type_handler_null, + pos.pos(), pos.length())))) + return NULL; + } + else + { + uint row_field_offset; + const Spvar_definition *def; + if (unlikely(!(def= spv->find_row_field(a, b, &row_field_offset)))) + return NULL; + + if (unlikely(!(item= new (thd->mem_root) + Item_splocal_row_field(thd, rh, a, b, + spv->offset, row_field_offset, + def->type_handler(), + pos.pos(), pos.length())))) + return NULL; + } +#ifdef DBUG_ASSERT_EXISTS + item->m_sp= sphead; +#endif + safe_to_cache_query=0; + return item; +} + + +my_var *LEX::create_outvar(THD *thd, const LEX_CSTRING *name) +{ + const Sp_rcontext_handler *rh; + sp_variable *spv; + if (likely((spv= find_variable(name, &rh)))) + return result ? new (thd->mem_root) + my_var_sp(rh, name, spv->offset, + spv->type_handler(), sphead) : + NULL /* EXPLAIN */; + my_error(ER_SP_UNDECLARED_VAR, MYF(0), name->str); + return NULL; +} + + +my_var *LEX::create_outvar(THD *thd, + const LEX_CSTRING *a, + const LEX_CSTRING *b) +{ + const Sp_rcontext_handler *rh; + sp_variable *t; + if (unlikely(!(t= find_variable(a, &rh)))) + { + my_error(ER_SP_UNDECLARED_VAR, MYF(0), a->str); + return NULL; + } + uint row_field_offset; + if (!t->find_row_field(a, b, &row_field_offset)) + return NULL; + return result ? + new (thd->mem_root) my_var_sp_row_field(rh, a, b, t->offset, + row_field_offset, sphead) : + NULL /* EXPLAIN */; +} + + +Item *LEX::create_item_func_nextval(THD *thd, Table_ident *table_ident) +{ + TABLE_LIST *table; + if (unlikely(!(table= current_select->add_table_to_list(thd, table_ident, 0, + TL_OPTION_SEQUENCE, + TL_WRITE_ALLOW_WRITE, + MDL_SHARED_WRITE)))) + return NULL; + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + return new (thd->mem_root) Item_func_nextval(thd, table); +} + + +Item *LEX::create_item_func_lastval(THD *thd, Table_ident *table_ident) +{ + TABLE_LIST *table; + if (unlikely(!(table= current_select->add_table_to_list(thd, table_ident, 0, + TL_OPTION_SEQUENCE, + TL_READ, + MDL_SHARED_READ)))) + return NULL; + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + return new (thd->mem_root) Item_func_lastval(thd, table); +} + + +Item *LEX::create_item_func_nextval(THD *thd, + const LEX_CSTRING *db, + const LEX_CSTRING *name) +{ + Table_ident *table_ident; + if (unlikely(!(table_ident= + new (thd->mem_root) Table_ident(thd, db, name, false)))) + return NULL; + return create_item_func_nextval(thd, table_ident); +} + + +Item *LEX::create_item_func_lastval(THD *thd, + const LEX_CSTRING *db, + const LEX_CSTRING *name) +{ + Table_ident *table_ident; + if (unlikely(!(table_ident= + new (thd->mem_root) Table_ident(thd, db, name, false)))) + return NULL; + return create_item_func_lastval(thd, table_ident); +} + + +Item *LEX::create_item_func_setval(THD *thd, Table_ident *table_ident, + longlong nextval, ulonglong round, + bool is_used) +{ + TABLE_LIST *table; + if (unlikely(!(table= current_select->add_table_to_list(thd, table_ident, 0, + TL_OPTION_SEQUENCE, + TL_WRITE_ALLOW_WRITE, + MDL_SHARED_WRITE)))) + return NULL; + return new (thd->mem_root) Item_func_setval(thd, table, nextval, round, + is_used); +} + + +Item *LEX::create_item_ident(THD *thd, + const Lex_ident_cli_st *ca, + const Lex_ident_cli_st *cb) +{ + const char *start= ca->pos(); + const char *end= cb->end(); + const Sp_rcontext_handler *rh; + sp_variable *spv; + DBUG_ASSERT(thd->m_parser_state->m_lip.get_buf() <= start); + DBUG_ASSERT(start <= end); + DBUG_ASSERT(end <= thd->m_parser_state->m_lip.get_end_of_query()); + Lex_ident_sys a(thd, ca), b(thd, cb); + if (a.is_null() || b.is_null()) + return NULL; // OEM + if ((spv= find_variable(&a, &rh)) && + (spv->field_def.is_row() || + spv->field_def.is_table_rowtype_ref() || + spv->field_def.is_cursor_rowtype_ref())) + return create_item_spvar_row_field(thd, rh, &a, &b, spv, start, end); + + if ((thd->variables.sql_mode & MODE_ORACLE) && b.length == 7) + { + if (!system_charset_info->strnncoll( + (const uchar *) b.str, 7, + (const uchar *) "NEXTVAL", 7)) + return create_item_func_nextval(thd, &null_clex_str, &a); + else if (!system_charset_info->strnncoll( + (const uchar *) b.str, 7, + (const uchar *) "CURRVAL", 7)) + return create_item_func_lastval(thd, &null_clex_str, &a); + } + + return create_item_ident_nospvar(thd, &a, &b); +} + + +Item *LEX::create_item_ident(THD *thd, + const Lex_ident_sys_st *a, + const Lex_ident_sys_st *b, + const Lex_ident_sys_st *c) +{ + Lex_ident_sys_st schema= thd->client_capabilities & CLIENT_NO_SCHEMA ? + Lex_ident_sys() : *a; + if ((thd->variables.sql_mode & MODE_ORACLE) && c->length == 7) + { + if (!system_charset_info->strnncoll( + (const uchar *) c->str, 7, + (const uchar *) "NEXTVAL", 7)) + return create_item_func_nextval(thd, a, b); + else if (!system_charset_info->strnncoll( + (const uchar *) c->str, 7, + (const uchar *) "CURRVAL", 7)) + return create_item_func_lastval(thd, a, b); + } + + if (current_select->no_table_names_allowed) + { + my_error(ER_TABLENAME_NOT_ALLOWED_HERE, MYF(0), b->str, thd->where); + return NULL; + } + + if (current_select->parsing_place == FOR_LOOP_BOUND) + return create_item_for_loop_bound(thd, &null_clex_str, b, c); + + return create_item_ident_field(thd, schema, *b, *c); +} + + +Item *LEX::create_item_limit(THD *thd, const Lex_ident_cli_st *ca) +{ + DBUG_ASSERT(thd->m_parser_state->m_lip.get_buf() <= ca->pos()); + DBUG_ASSERT(ca->pos() <= ca->end()); + DBUG_ASSERT(ca->end() <= thd->m_parser_state->m_lip.get_end_of_query()); + + const Sp_rcontext_handler *rh; + sp_variable *spv; + Lex_ident_sys sa(thd, ca); + if (sa.is_null()) + return NULL; // EOM + if (!(spv= find_variable(&sa, &rh))) + { + my_error(ER_SP_UNDECLARED_VAR, MYF(0), sa.str); + return NULL; + } + + Query_fragment pos(thd, sphead, ca->pos(), ca->end()); + Item_splocal *item; + if (unlikely(!(item= new (thd->mem_root) + Item_splocal(thd, rh, &sa, + spv->offset, spv->type_handler(), + clone_spec_offset ? 0 : pos.pos(), + clone_spec_offset ? 0 : pos.length())))) + return NULL; +#ifdef DBUG_ASSERT_EXISTS + item->m_sp= sphead; +#endif + safe_to_cache_query= 0; + + if (!item->is_valid_limit_clause_variable_with_error()) + return NULL; + + item->limit_clause_param= true; + return item; +} + + +Item *LEX::create_item_limit(THD *thd, + const Lex_ident_cli_st *ca, + const Lex_ident_cli_st *cb) +{ + DBUG_ASSERT(thd->m_parser_state->m_lip.get_buf() <= ca->pos()); + DBUG_ASSERT(ca->pos() <= cb->end()); + DBUG_ASSERT(cb->end() <= thd->m_parser_state->m_lip.get_end_of_query()); + + const Sp_rcontext_handler *rh; + sp_variable *spv; + Lex_ident_sys sa(thd, ca), sb(thd, cb); + if (unlikely(sa.is_null() || sb.is_null())) + return NULL; // EOM + if (!(spv= find_variable(&sa, &rh))) + { + my_error(ER_SP_UNDECLARED_VAR, MYF(0), sa.str); + return NULL; + } + // Qualified %TYPE variables are not possible + DBUG_ASSERT(!spv->field_def.column_type_ref()); + Item_splocal *item; + if (unlikely(!(item= create_item_spvar_row_field(thd, rh, &sa, &sb, spv, + ca->pos(), cb->end())))) + return NULL; + if (!item->is_valid_limit_clause_variable_with_error()) + return NULL; + item->limit_clause_param= true; + return item; +} + + +bool LEX::set_user_variable(THD *thd, const LEX_CSTRING *name, Item *val) +{ + Item_func_set_user_var *item; + set_var_user *var; + if (unlikely(!(item= new (thd->mem_root) Item_func_set_user_var(thd, name, + val))) || + unlikely(!(var= new (thd->mem_root) set_var_user(item)))) + return true; + if (unlikely(var_list.push_back(var, thd->mem_root))) + return true; + return false; +} + + +Item *LEX::create_item_ident_field(THD *thd, + const Lex_ident_sys_st &db, + const Lex_ident_sys_st &table, + const Lex_ident_sys_st &name) +{ + if (check_expr_allows_fields_or_error(thd, name.str)) + return NULL; + + if (current_select->parsing_place != IN_HAVING || + current_select->get_in_sum_expr() > 0) + return new (thd->mem_root) Item_field(thd, current_context(), + db, table, name); + + return new (thd->mem_root) Item_ref(thd, current_context(), + db, table, name); +} + + +Item *LEX::create_item_ident_sp(THD *thd, Lex_ident_sys_st *name, + const char *start, + const char *end) +{ + DBUG_ASSERT(thd->m_parser_state->m_lip.get_buf() <= start); + DBUG_ASSERT(start <= end); + DBUG_ASSERT(end <= thd->m_parser_state->m_lip.get_end_of_query()); + + const Sp_rcontext_handler *rh; + sp_variable *spv; + uint unused_off; + DBUG_ASSERT(spcont); + DBUG_ASSERT(sphead); + if ((spv= find_variable(name, &rh))) + { + /* We're compiling a stored procedure and found a variable */ + if (!parsing_options.allows_variable) + { + my_error(ER_VIEW_SELECT_VARIABLE, MYF(0)); + return NULL; + } + + Query_fragment pos(thd, sphead, start, end); + uint f_pos= clone_spec_offset ? 0 : pos.pos(); + uint f_length= clone_spec_offset ? 0 : pos.length(); + Item_splocal *splocal= spv->field_def.is_column_type_ref() ? + new (thd->mem_root) Item_splocal_with_delayed_data_type(thd, rh, name, + spv->offset, + f_pos, f_length) : + new (thd->mem_root) Item_splocal(thd, rh, name, + spv->offset, spv->type_handler(), + f_pos, f_length); + if (unlikely(splocal == NULL)) + return NULL; +#ifdef DBUG_ASSERT_EXISTS + splocal->m_sp= sphead; +#endif + safe_to_cache_query= 0; + return splocal; + } + + if (thd->variables.sql_mode & MODE_ORACLE) + { + if (lex_string_eq(name, STRING_WITH_LEN("SQLCODE"))) + return new (thd->mem_root) Item_func_sqlcode(thd); + if (lex_string_eq(name, STRING_WITH_LEN("SQLERRM"))) + return new (thd->mem_root) Item_func_sqlerrm(thd); + } + + if (fields_are_impossible() && + (current_select->parsing_place != FOR_LOOP_BOUND || + spcont->find_cursor(name, &unused_off, false) == NULL)) + { + // we are out of SELECT or FOR so it is syntax error + my_error(ER_SP_UNDECLARED_VAR, MYF(0), name->str); + return NULL; + } + + if (current_select->parsing_place == FOR_LOOP_BOUND) + return create_item_for_loop_bound(thd, &null_clex_str, &null_clex_str, + name); + + return create_item_ident_nosp(thd, name); +} + + + +bool LEX::set_variable(const Lex_ident_sys_st *name, Item *item) +{ + sp_pcontext *ctx; + const Sp_rcontext_handler *rh; + sp_variable *spv= find_variable(name, &ctx, &rh); + return spv ? sphead->set_local_variable(thd, ctx, rh, spv, item, this, true) : + set_system_variable(option_type, name, item); +} + + +/** + Generate instructions for: + SET x.y= expr; +*/ +bool LEX::set_variable(const Lex_ident_sys_st *name1, + const Lex_ident_sys_st *name2, + Item *item) +{ + const Sp_rcontext_handler *rh; + sp_pcontext *ctx; + sp_variable *spv; + if (spcont && (spv= find_variable(name1, &ctx, &rh))) + { + if (spv->field_def.is_table_rowtype_ref() || + spv->field_def.is_cursor_rowtype_ref()) + return sphead->set_local_variable_row_field_by_name(thd, ctx, + rh, + spv, name2, + item, this); + // A field of a ROW variable + uint row_field_offset; + return !spv->find_row_field(name1, name2, &row_field_offset) || + sphead->set_local_variable_row_field(thd, ctx, rh, + spv, row_field_offset, + item, this); + } + + if (is_trigger_new_or_old_reference(name1)) + return set_trigger_field(name1, name2, item); + + return set_system_variable(thd, option_type, name1, name2, item); +} + + +bool LEX::set_default_system_variable(enum_var_type var_type, + const Lex_ident_sys_st *name, + Item *val) +{ + static Lex_ident_sys default_base_name= {STRING_WITH_LEN("default")}; + sys_var *var= find_sys_var(thd, name->str, name->length); + if (!var) + return true; + if (unlikely(!var->is_struct())) + { + my_error(ER_VARIABLE_IS_NOT_STRUCT, MYF(0), name->str); + return true; + } + return set_system_variable(var_type, var, &default_base_name, val); +} + + +bool LEX::set_system_variable(enum_var_type var_type, + const Lex_ident_sys_st *name, + Item *val) +{ + sys_var *var= find_sys_var(thd, name->str, name->length); + DBUG_ASSERT(thd->is_error() || var != NULL); + static Lex_ident_sys null_str; + return likely(var) ? set_system_variable(var_type, var, &null_str, val) : true; +} + + +bool LEX::set_system_variable(THD *thd, enum_var_type var_type, + const Lex_ident_sys_st *name1, + const Lex_ident_sys_st *name2, + Item *val) +{ + sys_var *tmp; + if (unlikely(check_reserved_words(name1)) || + unlikely(!(tmp= find_sys_var(thd, name2->str, name2->length, true)))) + { + my_error(ER_UNKNOWN_STRUCTURED_VARIABLE, MYF(0), + (int) name1->length, name1->str); + return true; + } + if (unlikely(!tmp->is_struct())) + { + my_error(ER_VARIABLE_IS_NOT_STRUCT, MYF(0), name2->str); + return true; + } + return set_system_variable(var_type, tmp, name1, val); +} + + +bool LEX::set_trigger_field(const LEX_CSTRING *name1, const LEX_CSTRING *name2, + Item *val) +{ + DBUG_ASSERT(is_trigger_new_or_old_reference(name1)); + if (unlikely(name1->str[0]=='O' || name1->str[0]=='o')) + { + my_error(ER_TRG_CANT_CHANGE_ROW, MYF(0), "OLD", ""); + return true; + } + if (unlikely(trg_chistics.event == TRG_EVENT_DELETE)) + { + my_error(ER_TRG_NO_SUCH_ROW_IN_TRG, MYF(0), "NEW", "on DELETE"); + return true; + } + if (unlikely(trg_chistics.action_time == TRG_ACTION_AFTER)) + { + my_error(ER_TRG_CANT_CHANGE_ROW, MYF(0), "NEW", "after "); + return true; + } + return set_trigger_new_row(name2, val); +} + + +#ifdef MYSQL_SERVER +uint binlog_unsafe_map[256]; + +#define UNSAFE(a, b, c) \ + { \ + DBUG_PRINT("unsafe_mixed_statement", ("SETTING BASE VALUES: %s, %s, %02X", \ + LEX::stmt_accessed_table_string(a), \ + LEX::stmt_accessed_table_string(b), \ + c)); \ + unsafe_mixed_statement(a, b, c); \ + } + +/* + Sets the combination given by "a" and "b" and automatically combinations + given by other types of access, i.e. 2^(8 - 2), as unsafe. + + It may happen a colision when automatically defining a combination as unsafe. + For that reason, a combination has its unsafe condition redefined only when + the new_condition is greater then the old. For instance, + + . (BINLOG_DIRECT_ON & TRX_CACHE_NOT_EMPTY) is never overwritten by + . (BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF). +*/ +void unsafe_mixed_statement(LEX::enum_stmt_accessed_table a, + LEX::enum_stmt_accessed_table b, uint condition) +{ + int type= 0; + int index= (1U << a) | (1U << b); + + + for (type= 0; type < 256; type++) + { + if ((type & index) == index) + { + binlog_unsafe_map[type] |= condition; + } + } +} +/* + The BINLOG_* AND TRX_CACHE_* values can be combined by using '&' or '|', + which means that both conditions need to be satisfied or any of them is + enough. For example, + + . BINLOG_DIRECT_ON & TRX_CACHE_NOT_EMPTY means that the statment is + unsafe when the option is on and trx-cache is not empty; + + . BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF means the statement is unsafe + in all cases. + + . TRX_CACHE_EMPTY | TRX_CACHE_NOT_EMPTY means the statement is unsafe + in all cases. Similar as above. +*/ +void binlog_unsafe_map_init() +{ + memset((void*) binlog_unsafe_map, 0, sizeof(uint) * 256); + + /* + Classify a statement as unsafe when there is a mixed statement and an + on-going transaction at any point of the execution if: + + 1. The mixed statement is about to update a transactional table and + a non-transactional table. + + 2. The mixed statement is about to update a transactional table and + read from a non-transactional table. + + 3. The mixed statement is about to update a non-transactional table + and temporary transactional table. + + 4. The mixed statement is about to update a temporary transactional + table and read from a non-transactional table. + + 5. The mixed statement is about to update a transactional table and + a temporary non-transactional table. + + 6. The mixed statement is about to update a transactional table and + read from a temporary non-transactional table. + + 7. The mixed statement is about to update a temporary transactional + table and temporary non-transactional table. + + 8. The mixed statement is about to update a temporary transactional + table and read from a temporary non-transactional table. + + After updating a transactional table if: + + 9. The mixed statement is about to update a non-transactional table + and read from a transactional table. + + 10. The mixed statement is about to update a non-transactional table + and read from a temporary transactional table. + + 11. The mixed statement is about to update a temporary non-transactional + table and read from a transactional table. + + 12. The mixed statement is about to update a temporary non-transactional + table and read from a temporary transactional table. + + 13. The mixed statement is about to update a temporary non-transactional + table and read from a non-transactional table. + + The reason for this is that locks acquired may not protected a concurrent + transaction of interfering in the current execution and by consequence in + the result. + */ + /* Case 1. */ + UNSAFE(LEX::STMT_WRITES_TRANS_TABLE, LEX::STMT_WRITES_NON_TRANS_TABLE, + BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF); + /* Case 2. */ + UNSAFE(LEX::STMT_WRITES_TRANS_TABLE, LEX::STMT_READS_NON_TRANS_TABLE, + BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF); + /* Case 3. */ + UNSAFE(LEX::STMT_WRITES_NON_TRANS_TABLE, LEX::STMT_WRITES_TEMP_TRANS_TABLE, + BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF); + /* Case 4. */ + UNSAFE(LEX::STMT_WRITES_TEMP_TRANS_TABLE, LEX::STMT_READS_NON_TRANS_TABLE, + BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF); + /* Case 5. */ + UNSAFE(LEX::STMT_WRITES_TRANS_TABLE, LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE, + BINLOG_DIRECT_ON); + /* Case 6. */ + UNSAFE(LEX::STMT_WRITES_TRANS_TABLE, LEX::STMT_READS_TEMP_NON_TRANS_TABLE, + BINLOG_DIRECT_ON); + /* Case 7. */ + UNSAFE(LEX::STMT_WRITES_TEMP_TRANS_TABLE, LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE, + BINLOG_DIRECT_ON); + /* Case 8. */ + UNSAFE(LEX::STMT_WRITES_TEMP_TRANS_TABLE, LEX::STMT_READS_TEMP_NON_TRANS_TABLE, + BINLOG_DIRECT_ON); + /* Case 9. */ + UNSAFE(LEX::STMT_WRITES_NON_TRANS_TABLE, LEX::STMT_READS_TRANS_TABLE, + (BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF) & TRX_CACHE_NOT_EMPTY); + /* Case 10 */ + UNSAFE(LEX::STMT_WRITES_NON_TRANS_TABLE, LEX::STMT_READS_TEMP_TRANS_TABLE, + (BINLOG_DIRECT_ON | BINLOG_DIRECT_OFF) & TRX_CACHE_NOT_EMPTY); + /* Case 11. */ + UNSAFE(LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE, LEX::STMT_READS_TRANS_TABLE, + BINLOG_DIRECT_ON & TRX_CACHE_NOT_EMPTY); + /* Case 12. */ + UNSAFE(LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE, LEX::STMT_READS_TEMP_TRANS_TABLE, + BINLOG_DIRECT_ON & TRX_CACHE_NOT_EMPTY); + /* Case 13. */ + UNSAFE(LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE, LEX::STMT_READS_NON_TRANS_TABLE, + BINLOG_DIRECT_OFF & TRX_CACHE_NOT_EMPTY); +} +#endif + + +/** + @brief + Collect fiels that are used in the GROUP BY of this st_select_lex + + @param thd The thread handle + + @details + This method looks through the fields that are used in the GROUP BY of this + st_select_lex and saves info on these fields. +*/ + +void st_select_lex::collect_grouping_fields_for_derived(THD *thd, + ORDER *grouping_list) +{ + grouping_tmp_fields.empty(); + List_iterator li(join->fields_list); + Item *item= li++; + for (uint i= 0; i < master_unit()->derived->table->s->fields; + i++, (item=li++)) + { + for (ORDER *ord= grouping_list; ord; ord= ord->next) + { + if ((*ord->item)->eq((Item*)item, 0)) + { + Field_pair *grouping_tmp_field= + new Field_pair(master_unit()->derived->table->field[i], item); + grouping_tmp_fields.push_back(grouping_tmp_field); + } + } + } +} + + +/** + Collect fields that are used in the GROUP BY of this SELECT +*/ + +bool st_select_lex::collect_grouping_fields(THD *thd) +{ + grouping_tmp_fields.empty(); + + for (ORDER *ord= group_list.first; ord; ord= ord->next) + { + Item *item= *ord->item; + if (item->type() != Item::FIELD_ITEM && + !(item->type() == Item::REF_ITEM && + item->real_type() == Item::FIELD_ITEM && + ((((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF) || + (((Item_ref *) item)->ref_type() == Item_ref::REF)))) + continue; + + Field_pair *grouping_tmp_field= + new Field_pair(((Item_field *)item->real_item())->field, item); + if (grouping_tmp_fields.push_back(grouping_tmp_field, thd->mem_root)) + return false; + } + if (grouping_tmp_fields.elements) + return false; + return true; +} + + +/** + @brief + For a condition check possibility of exraction a formula over grouping fields + + @param thd The thread handle + @param cond The condition whose subformulas are to be analyzed + @param checker The checker callback function to be applied to the nodes + of the tree of the object + + @details + This method traverses the AND-OR condition cond and for each subformula of + the condition it checks whether it can be usable for the extraction of a + condition over the grouping fields of this select. The method uses + the call-back parameter checker to check whether a primary formula + depends only on grouping fields. + The subformulas that are not usable are marked with the flag MARKER_NO_EXTRACTION. + The subformulas that can be entierly extracted are marked with the flag + MARKER_FULL_EXTRACTION. + @note + This method is called before any call of extract_cond_for_grouping_fields. + The flag MARKER_NO_EXTRACTION set in a subformula allows to avoid building clone + for the subformula when extracting the pushable condition. + The flag MARKER_FULL_EXTRACTION allows to delete later all top level conjuncts + from cond. +*/ + +void +st_select_lex::check_cond_extraction_for_grouping_fields(THD *thd, Item *cond) +{ + if (cond->get_extraction_flag() == MARKER_NO_EXTRACTION) + return; + cond->clear_extraction_flag(); + if (cond->type() == Item::COND_ITEM) + { + Item_cond_and *and_cond= + (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) ? + ((Item_cond_and*) cond) : 0; + + List *arg_list= ((Item_cond*) cond)->argument_list(); + List_iterator li(*arg_list); + uint count= 0; // to count items not containing MARKER_NO_EXTRACTION + uint count_full= 0; // to count items with MARKER_FULL_EXTRACTION + Item *item; + while ((item=li++)) + { + check_cond_extraction_for_grouping_fields(thd, item); + if (item->get_extraction_flag() != MARKER_NO_EXTRACTION) + { + count++; + if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION) + count_full++; + } + else if (!and_cond) + break; + } + if ((and_cond && count == 0) || item) + cond->set_extraction_flag(MARKER_NO_EXTRACTION); + if (count_full == arg_list->elements) + { + cond->set_extraction_flag(MARKER_FULL_EXTRACTION); + } + if (cond->get_extraction_flag() != 0) + { + li.rewind(); + while ((item=li++)) + item->clear_extraction_flag(); + } + } + else + { + int fl= cond->excl_dep_on_grouping_fields(this) && !cond->is_expensive() ? + MARKER_FULL_EXTRACTION : MARKER_NO_EXTRACTION; + cond->set_extraction_flag(fl); + } +} + + +/** + @brief + Build condition extractable from the given one depended on grouping fields + + @param thd The thread handle + @param cond The condition from which the condition depended + on grouping fields is to be extracted + @param no_top_clones If it's true then no clones for the top fully + extractable conjuncts are built + + @details + For the given condition cond this method finds out what condition depended + only on the grouping fields can be extracted from cond. If such condition C + exists the method builds the item for it. + This method uses the flags MARKER_NO_EXTRACTION and MARKER_FULL_EXTRACTION set by the + preliminary call of st_select_lex::check_cond_extraction_for_grouping_fields + to figure out whether a subformula depends only on these fields or not. + @note + The built condition C is always implied by the condition cond + (cond => C). The method tries to build the least restictive such + condition (i.e. for any other condition C' such that cond => C' + we have C => C'). + @note + The build item is not ready for usage: substitution for the field items + has to be done and it has to be re-fixed. + + @retval + the built condition depended only on grouping fields if such a condition exists + NULL if there is no such a condition +*/ + +Item *st_select_lex::build_cond_for_grouping_fields(THD *thd, Item *cond, + bool no_top_clones) +{ + if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + if (no_top_clones) + return cond; + cond->clear_extraction_flag(); + return cond->build_clone(thd); + } + if (cond->type() == Item::COND_ITEM) + { + bool cond_and= false; + Item_cond *new_cond; + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + cond_and= true; + new_cond= new (thd->mem_root) Item_cond_and(thd); + } + else + new_cond= new (thd->mem_root) Item_cond_or(thd); + if (unlikely(!new_cond)) + return 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + if (item->get_extraction_flag() == MARKER_NO_EXTRACTION) + { + DBUG_ASSERT(cond_and); + item->clear_extraction_flag(); + continue; + } + Item *fix= build_cond_for_grouping_fields(thd, item, + no_top_clones & cond_and); + if (unlikely(!fix)) + { + if (cond_and) + continue; + break; + } + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + + if (!cond_and && item) + { + while((item= li++)) + item->clear_extraction_flag(); + return 0; + } + switch (new_cond->argument_list()->elements) + { + case 0: + return 0; + case 1: + return new_cond->argument_list()->head(); + default: + return new_cond; + } + } + return 0; +} + + +bool st_select_lex::set_nest_level(int new_nest_level) +{ + DBUG_ENTER("st_select_lex::set_nest_level"); + DBUG_PRINT("enter", ("select #%d %p nest level: %d", + select_number, this, new_nest_level)); + if (new_nest_level > (int) MAX_SELECT_NESTING) + { + my_error(ER_TOO_HIGH_LEVEL_OF_NESTING_FOR_SELECT, MYF(0)); + DBUG_RETURN(TRUE); + } + nest_level= new_nest_level; + new_nest_level++; + for (SELECT_LEX_UNIT *u= first_inner_unit(); u; u= u->next_unit()) + { + if (u->set_nest_level(new_nest_level)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + +bool st_select_lex_unit::set_nest_level(int new_nest_level) +{ + DBUG_ENTER("st_select_lex_unit::set_nest_level"); + for(SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + if (sl->set_nest_level(new_nest_level)) + DBUG_RETURN(TRUE); + } + if (fake_select_lex && + fake_select_lex->set_nest_level(new_nest_level)) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +bool st_select_lex::check_parameters(SELECT_LEX *main_select) +{ + DBUG_ENTER("st_select_lex::check_parameters"); + DBUG_PRINT("enter", ("select #%d %p nest level: %d", + select_number, this, nest_level)); + + + if ((options & OPTION_PROCEDURE_CLAUSE) && + (!parent_lex->selects_allow_procedure || + next_select() != NULL || + this != master_unit()->first_select() || + nest_level != 0)) + { + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "PROCEDURE"); + DBUG_RETURN(TRUE); + } + + if ((options & SELECT_HIGH_PRIORITY) && this != main_select) + { + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "HIGH_PRIORITY"); + DBUG_RETURN(TRUE); + } + if ((options & OPTION_BUFFER_RESULT) && this != main_select) + { + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "SQL_BUFFER_RESULT"); + DBUG_RETURN(TRUE); + } + if ((options & OPTION_FOUND_ROWS) && this != main_select) + { + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "SQL_CALC_FOUND_ROWS"); + DBUG_RETURN(TRUE); + } + if (options & OPTION_NO_QUERY_CACHE) + { + /* + Allow this flag only on the first top-level SELECT statement, if + SQL_CACHE wasn't specified. + */ + if (this != main_select) + { + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "SQL_NO_CACHE"); + DBUG_RETURN(TRUE); + } + if (parent_lex->sql_cache == LEX::SQL_CACHE) + { + my_error(ER_WRONG_USAGE, MYF(0), "SQL_CACHE", "SQL_NO_CACHE"); + DBUG_RETURN(TRUE); + } + parent_lex->safe_to_cache_query=0; + parent_lex->sql_cache= LEX::SQL_NO_CACHE; + } + if (options & OPTION_TO_QUERY_CACHE) + { + /* + Allow this flag only on the first top-level SELECT statement, if + SQL_NO_CACHE wasn't specified. + */ + if (this != main_select) + { + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "SQL_CACHE"); + DBUG_RETURN(TRUE); + } + if (parent_lex->sql_cache == LEX::SQL_NO_CACHE) + { + my_error(ER_WRONG_USAGE, MYF(0), "SQL_NO_CACHE", "SQL_CACHE"); + DBUG_RETURN(TRUE); + } + parent_lex->safe_to_cache_query=1; + parent_lex->sql_cache= LEX::SQL_CACHE; + } + + for (SELECT_LEX_UNIT *u= first_inner_unit(); u; u= u->next_unit()) + { + if (u->check_parameters(main_select)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +bool st_select_lex_unit::check_parameters(SELECT_LEX *main_select) +{ + for(SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + if (sl->check_parameters(main_select)) + return TRUE; + } + return fake_select_lex && fake_select_lex->check_parameters(main_select); +} + + +bool LEX::check_main_unit_semantics() +{ + if (unit.set_nest_level(0) || + unit.check_parameters(first_select_lex())) + return TRUE; + if (check_cte_dependencies_and_resolve_references()) + return TRUE; + return FALSE; +} + +int set_statement_var_if_exists(THD *thd, const char *var_name, + size_t var_name_length, ulonglong value) +{ + sys_var *sysvar; + if (unlikely(thd->lex->sql_command == SQLCOM_CREATE_VIEW)) + { + my_error(ER_VIEW_SELECT_CLAUSE, MYF(0), "[NO]WAIT"); + return 1; + } + if (unlikely(thd->lex->sphead)) + { + my_error(ER_SP_BADSTATEMENT, MYF(0), "[NO]WAIT"); + return 1; + } + if ((sysvar= find_sys_var(thd, var_name, var_name_length, true))) + { + Item *item= new (thd->mem_root) Item_uint(thd, value); + set_var *var= new (thd->mem_root) set_var(thd, OPT_SESSION, sysvar, + &null_clex_str, item); + + if (unlikely(!item) || unlikely(!var) || + unlikely(thd->lex->stmt_var_list.push_back(var, thd->mem_root))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return 1; + } + } + return 0; +} + + +bool LEX::sp_add_cfetch(THD *thd, const LEX_CSTRING *name) +{ + uint offset; + sp_instr_cfetch *i; + + if (!spcont->find_cursor(name, &offset, false)) + { + my_error(ER_SP_CURSOR_MISMATCH, MYF(0), name->str); + return true; + } + i= new (thd->mem_root) + sp_instr_cfetch(sphead->instructions(), spcont, offset, + !(thd->variables.sql_mode & MODE_ORACLE)); + if (unlikely(i == NULL) || unlikely(sphead->add_instr(i))) + return true; + return false; +} + + +bool LEX::sp_add_agg_cfetch() +{ + sphead->m_flags|= sp_head::HAS_AGGREGATE_INSTR; + sp_instr_agg_cfetch *i= + new (thd->mem_root) sp_instr_agg_cfetch(sphead->instructions(), spcont); + return i == NULL || sphead->add_instr(i); +} + + +bool LEX::create_or_alter_view_finalize(THD *thd, Table_ident *table_ident) +{ + sql_command= SQLCOM_CREATE_VIEW; + /* first table in list is target VIEW name */ + if (!first_select_lex()->add_table_to_list(thd, table_ident, NULL, + TL_OPTION_UPDATING, + TL_IGNORE, + MDL_EXCLUSIVE)) + return true; + query_tables->open_strategy= TABLE_LIST::OPEN_STUB; + return false; +} + + +bool LEX::add_alter_view(THD *thd, uint16 algorithm, + enum_view_suid suid, + Table_ident *table_ident) +{ + if (unlikely(sphead)) + { + my_error(ER_SP_BADSTATEMENT, MYF(0), "ALTER VIEW"); + return true; + } + if (unlikely(!(create_view= new (thd->mem_root) + Create_view_info(VIEW_ALTER, algorithm, suid)))) + return true; + return create_or_alter_view_finalize(thd, table_ident); +} + + +bool LEX::add_create_view(THD *thd, DDL_options_st ddl, + uint16 algorithm, enum_view_suid suid, + Table_ident *table_ident) +{ + if (unlikely(set_create_options_with_check(ddl))) + return true; + if (unlikely(!(create_view= new (thd->mem_root) + Create_view_info(ddl.or_replace() ? + VIEW_CREATE_OR_REPLACE : + VIEW_CREATE_NEW, + algorithm, suid)))) + return true; + return create_or_alter_view_finalize(thd, table_ident); +} + + +bool LEX::call_statement_start(THD *thd, sp_name *name) +{ + Database_qualified_name pkgname(&null_clex_str, &null_clex_str); + const Sp_handler *sph= &sp_handler_procedure; + sql_command= SQLCOM_CALL; + value_list.empty(); + if (unlikely(sph->sp_resolve_package_routine(thd, thd->lex->sphead, + name, &sph, &pkgname))) + return true; + if (unlikely(!(m_sql_cmd= new (thd->mem_root) Sql_cmd_call(name, sph)))) + return true; + sph->add_used_routine(this, thd, name); + if (pkgname.m_name.length) + sp_handler_package_body.add_used_routine(this, thd, &pkgname); + return false; +} + + +bool LEX::call_statement_start(THD *thd, const Lex_ident_sys_st *name) +{ + sp_name *spname= make_sp_name(thd, name); + return unlikely(!spname) || call_statement_start(thd, spname); +} + + +bool LEX::call_statement_start(THD *thd, const Lex_ident_sys_st *name1, + const Lex_ident_sys_st *name2) +{ + sp_name *spname= make_sp_name(thd, name1, name2); + return unlikely(!spname) || call_statement_start(thd, spname); +} + + +bool LEX::call_statement_start(THD *thd, + const Lex_ident_sys_st *db, + const Lex_ident_sys_st *pkg, + const Lex_ident_sys_st *proc) +{ + Database_qualified_name q_db_pkg(db, pkg); + Database_qualified_name q_pkg_proc(pkg, proc); + sp_name *spname; + + sql_command= SQLCOM_CALL; + + if (check_db_name(reinterpret_cast + (const_cast + (static_cast(db))))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db->str); + return true; + } + if (check_routine_name(pkg) || + check_routine_name(proc)) + return true; + + // Concat `pkg` and `name` to `pkg.name` + LEX_CSTRING pkg_dot_proc; + if (q_pkg_proc.make_qname(thd->mem_root, &pkg_dot_proc) || + check_ident_length(&pkg_dot_proc) || + !(spname= new (thd->mem_root) sp_name(db, &pkg_dot_proc, true))) + return true; + + sp_handler_package_function.add_used_routine(thd->lex, thd, spname); + sp_handler_package_body.add_used_routine(thd->lex, thd, &q_db_pkg); + + return !(m_sql_cmd= new (thd->mem_root) Sql_cmd_call(spname, + &sp_handler_package_procedure)); +} + + +sp_package *LEX::get_sp_package() const +{ + return sphead ? sphead->get_package() : NULL; +} + + +sp_package *LEX::create_package_start(THD *thd, + enum_sql_command command, + const Sp_handler *sph, + const sp_name *name_arg, + DDL_options_st options) +{ + sp_package *pkg; + + if (unlikely(sphead)) + { + my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), sph->type_str()); + return NULL; + } + if (unlikely(set_command_with_check(command, options))) + return NULL; + if (sph->type() == SP_TYPE_PACKAGE_BODY) + { + /* + If we start parsing a "CREATE PACKAGE BODY", we need to load + the corresponding "CREATE PACKAGE", for the following reasons: + 1. "CREATE PACKAGE BODY" is allowed only if "CREATE PACKAGE" + was done earlier for the same package name. + So if "CREATE PACKAGE" does not exist, we throw an error here. + 2. When parsing "CREATE PACKAGE BODY", we need to know all package + public and private routine names, to translate procedure and + function calls correctly. + For example, this statement inside a package routine: + CALL p; + can be translated to: + CALL db.pkg.p; -- p is a known (public or private) package routine + CALL db.p; -- p is not a known package routine + */ + sp_head *spec; + int ret= sp_handler_package_spec. + sp_cache_routine_reentrant(thd, name_arg, &spec); + if (unlikely(!spec)) + { + if (!ret) + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), + "PACKAGE", ErrConvDQName(name_arg).ptr()); + return 0; + } + } + if (unlikely(!(pkg= sp_package::create(this, name_arg, sph)))) + return NULL; + pkg->reset_thd_mem_root(thd); + pkg->init(this); + pkg->make_qname(pkg->get_main_mem_root(), &pkg->m_qname); + sphead= pkg; + return pkg; +} + + +bool LEX::create_package_finalize(THD *thd, + const sp_name *name, + const sp_name *name2, + const char *cpp_body_end) +{ + if (name2 && + (name2->m_explicit_name != name->m_explicit_name || + strcmp(name2->m_db.str, name->m_db.str) || + !Sp_handler::eq_routine_name(name2->m_name, name->m_name))) + { + bool exp= name2->m_explicit_name || name->m_explicit_name; + my_error(ER_END_IDENTIFIER_DOES_NOT_MATCH, MYF(0), + exp ? ErrConvDQName(name2).ptr() : name2->m_name.str, + exp ? ErrConvDQName(name).ptr() : name->m_name.str); + return true; + } + + sphead->set_stmt_end(thd, cpp_body_end); + sphead->restore_thd_mem_root(thd); + sp_package *pkg= sphead->get_package(); + DBUG_ASSERT(pkg); + return sphead->check_group_aggregate_instructions_forbid() || + pkg->validate_after_parser(thd); +} + + +bool LEX::add_grant_command(THD *thd, const List &columns) +{ + if (columns.elements) + { + thd->parse_error(); + return true; + } + return false; +} + + +Item *LEX::make_item_func_sysdate(THD *thd, uint fsp) +{ + /* + Unlike other time-related functions, SYSDATE() is + replication-unsafe because it is not affected by the + TIMESTAMP variable. It is unsafe even if + sysdate_is_now=1, because the slave may have + sysdate_is_now=0. + */ + set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Item *item= global_system_variables.sysdate_is_now == 0 ? + (Item *) new (thd->mem_root) Item_func_sysdate_local(thd, fsp) : + (Item *) new (thd->mem_root) Item_func_now_local(thd, fsp); + if (unlikely(item == NULL)) + return NULL; + safe_to_cache_query=0; + return item; +} + + +bool SELECT_LEX::vers_push_field(THD *thd, TABLE_LIST *table, + const LEX_CSTRING field_name) +{ + DBUG_ASSERT(field_name.str); + Item_field *fld= new (thd->mem_root) Item_field(thd, &context, + table->db, + table->alias, + field_name); + if (unlikely(!fld) || unlikely(item_list.push_back(fld))) + return true; + + if (thd->lex->view_list.elements) + { + LEX_CSTRING *l; + if (unlikely(!(l= thd->make_clex_string(field_name.str, + field_name.length))) || + unlikely(thd->lex->view_list.push_back(l))) + return true; + } + + return false; +} + + +Item *Lex_trim_st::make_item_func_trim_std(THD *thd) const +{ + if (m_remove) + { + switch (m_spec) { + case TRIM_BOTH: + return new (thd->mem_root) Item_func_trim(thd, m_source, m_remove); + case TRIM_LEADING: + return new (thd->mem_root) Item_func_ltrim(thd, m_source, m_remove); + case TRIM_TRAILING: + return new (thd->mem_root) Item_func_rtrim(thd, m_source, m_remove); + } + } + + switch (m_spec) { + case TRIM_BOTH: + return new (thd->mem_root) Item_func_trim(thd, m_source); + case TRIM_LEADING: + return new (thd->mem_root) Item_func_ltrim(thd, m_source); + case TRIM_TRAILING: + return new (thd->mem_root) Item_func_rtrim(thd, m_source); + } + DBUG_ASSERT(0); + return NULL; +} + + +Item *Lex_trim_st::make_item_func_trim_oracle(THD *thd) const +{ + if (m_remove) + { + switch (m_spec) { + case TRIM_BOTH: + return new (thd->mem_root) Item_func_trim_oracle(thd, m_source, m_remove); + case TRIM_LEADING: + return new (thd->mem_root) Item_func_ltrim_oracle(thd, m_source, m_remove); + case TRIM_TRAILING: + return new (thd->mem_root) Item_func_rtrim_oracle(thd, m_source, m_remove); + } + } + + switch (m_spec) { + case TRIM_BOTH: + return new (thd->mem_root) Item_func_trim_oracle(thd, m_source); + case TRIM_LEADING: + return new (thd->mem_root) Item_func_ltrim_oracle(thd, m_source); + case TRIM_TRAILING: + return new (thd->mem_root) Item_func_rtrim_oracle(thd, m_source); + } + DBUG_ASSERT(0); + return NULL; +} + + +Item *Lex_trim_st::make_item_func_trim(THD *thd) const +{ + return (thd->variables.sql_mode & MODE_ORACLE) ? + make_item_func_trim_oracle(thd) : + make_item_func_trim_std(thd); +} + + +Item *LEX::make_item_func_call_generic(THD *thd, Lex_ident_cli_st *cdb, + Lex_ident_cli_st *cname, List *args) +{ + Lex_ident_sys db(thd, cdb), name(thd, cname); + if (db.is_null() || name.is_null()) + return NULL; // EOM + /* + The following in practice calls: + Create_sp_func::create() + and builds a stored function. + + However, it's important to maintain the interface between the + parser and the implementation in item_create.cc clean, + since this will change with WL#2128 (SQL PATH): + - INFORMATION_SCHEMA.version() is the SQL 99 syntax for the native + function version(), + - MySQL.version() is the SQL 2003 syntax for the native function + version() (a vendor can specify any schema). + */ + + if (!name.str || check_db_name((LEX_STRING*) static_cast(&db))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db.str); + return NULL; + } + if (check_routine_name(&name)) + return NULL; + + Create_qfunc *builder= find_qualified_function_builder(thd); + DBUG_ASSERT(builder); + return builder->create_with_db(thd, &db, &name, true, args); +} + + +/* + Create a 3-step qualified function call. + Currently it's possible for package routines only, e.g.: + SELECT db.pkg.func(); +*/ +Item *LEX::make_item_func_call_generic(THD *thd, + Lex_ident_cli_st *cdb, + Lex_ident_cli_st *cpkg, + Lex_ident_cli_st *cfunc, + List *args) +{ + static Lex_cstring dot(".", 1); + Lex_ident_sys db(thd, cdb), pkg(thd, cpkg), func(thd, cfunc); + Database_qualified_name q_db_pkg(db, pkg); + Database_qualified_name q_pkg_func(pkg, func); + sp_name *qname; + + if (db.is_null() || pkg.is_null() || func.is_null()) + return NULL; // EOM + + if (check_db_name((LEX_STRING*) static_cast(&db))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db.str); + return NULL; + } + if (check_routine_name(&pkg) || + check_routine_name(&func)) + return NULL; + + // Concat `pkg` and `name` to `pkg.name` + LEX_CSTRING pkg_dot_func; + if (q_pkg_func.make_qname(thd->mem_root, &pkg_dot_func) || + check_ident_length(&pkg_dot_func) || + !(qname= new (thd->mem_root) sp_name(&db, &pkg_dot_func, true))) + return NULL; + + sp_handler_package_function.add_used_routine(thd->lex, thd, qname); + sp_handler_package_body.add_used_routine(thd->lex, thd, &q_db_pkg); + + thd->lex->safe_to_cache_query= 0; + + if (args && args->elements > 0) + return new (thd->mem_root) Item_func_sp(thd, thd->lex->current_context(), + qname, &sp_handler_package_function, + *args); + return new (thd->mem_root) Item_func_sp(thd, thd->lex->current_context(), + qname, &sp_handler_package_function); +} + + +Item *LEX::make_item_func_call_native_or_parse_error(THD *thd, + Lex_ident_cli_st &name, + List *args) +{ + Create_func *builder= native_functions_hash.find(thd, name); + DBUG_EXECUTE_IF("make_item_func_call_native_simulate_not_found", + builder= NULL;); + if (builder) + return builder->create_func(thd, &name, args); + thd->parse_error(ER_SYNTAX_ERROR, name.end()); + return NULL; +} + + +Item *LEX::create_item_qualified_asterisk(THD *thd, + const Lex_ident_sys_st *name) +{ + Item *item; + if (!(item= new (thd->mem_root) Item_field(thd, current_context(), + null_clex_str, *name, + star_clex_str))) + return NULL; + current_select->parsing_place == IN_RETURNING ? + thd->lex->returning()->with_wild++ : current_select->with_wild++; + return item; +} + + +Item *LEX::create_item_qualified_asterisk(THD *thd, + const Lex_ident_sys_st *a, + const Lex_ident_sys_st *b) +{ + Item *item; + Lex_ident_sys_st schema= thd->client_capabilities & CLIENT_NO_SCHEMA ? + Lex_ident_sys() : *a; + if (!(item= new (thd->mem_root) Item_field(thd, current_context(), + schema, *b, star_clex_str))) + return NULL; + current_select->parsing_place == IN_RETURNING ? + thd->lex->returning()->with_wild++ : current_select->with_wild++; + return item; +} + + +bool Lex_ident_sys_st::copy_ident_cli(THD *thd, const Lex_ident_cli_st *str) +{ + return thd->to_ident_sys_alloc(this, str); +} + +bool Lex_ident_sys_st::copy_keyword(THD *thd, const Lex_ident_cli_st *str) +{ + return thd->make_lex_string(static_cast(this), + str->str, str->length) == NULL; +} + +bool Lex_ident_sys_st::copy_or_convert(THD *thd, + const Lex_ident_cli_st *src, + CHARSET_INFO *cs) +{ + if (!src->is_8bit()) + return copy_keyword(thd, src); // 7bit string makes a wellformed identifier + return convert(thd, src, cs); +} + + +bool Lex_ident_sys_st::copy_sys(THD *thd, const LEX_CSTRING *src) +{ + if (thd->check_string_for_wellformedness(src->str, src->length, + system_charset_info)) + return true; + return thd->make_lex_string(this, src->str, src->length) == NULL; +} + + +bool Lex_ident_sys_st::convert(THD *thd, + const LEX_CSTRING *src, CHARSET_INFO *cs) +{ + LEX_STRING tmp; + if (thd->convert_with_error(system_charset_info, &tmp, cs, + src->str, src->length)) + return true; + str= tmp.str; + length= tmp.length; + return false; +} + + +bool Lex_ident_sys_st::to_size_number(ulonglong *to) const +{ + ulonglong number; + uint text_shift_number= 0; + longlong prefix_number; + const char *start_ptr= str; + size_t str_len= length; + const char *end_ptr= start_ptr + str_len; + int error; + prefix_number= my_strtoll10(start_ptr, (char**) &end_ptr, &error); + if (likely((start_ptr + str_len - 1) == end_ptr)) + { + switch (end_ptr[0]) + { + case 'g': + case 'G': text_shift_number+=30; break; + case 'm': + case 'M': text_shift_number+=20; break; + case 'k': + case 'K': text_shift_number+=10; break; + default: + my_error(ER_WRONG_SIZE_NUMBER, MYF(0)); + return true; + } + if (unlikely(prefix_number >> 31)) + { + my_error(ER_SIZE_OVERFLOW_ERROR, MYF(0)); + return true; + } + number= prefix_number << text_shift_number; + } + else + { + my_error(ER_WRONG_SIZE_NUMBER, MYF(0)); + return true; + } + *to= number; + return false; +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool LEX::part_values_current(THD *thd) +{ + partition_element *elem= part_info->curr_part_elem; + if (!is_partition_management()) + { + if (unlikely(part_info->part_type != VERSIONING_PARTITION)) + { + part_type_error(thd, NULL, "SYSTEM_TIME", part_info); + return true; + } + } + else + { + DBUG_ASSERT(create_last_non_select_table); + DBUG_ASSERT(create_last_non_select_table->table_name.str); + // FIXME: other ALTER commands? + my_error(ER_VERS_WRONG_PARTS, MYF(0), + create_last_non_select_table->table_name.str); + return true; + } + elem->type= partition_element::CURRENT; + DBUG_ASSERT(part_info->vers_info); + part_info->vers_info->now_part= elem; + return false; +} + + +bool LEX::part_values_history(THD *thd) +{ + partition_element *elem= part_info->curr_part_elem; + if (!is_partition_management()) + { + if (unlikely(part_info->part_type != VERSIONING_PARTITION)) + { + part_type_error(thd, NULL, "SYSTEM_TIME", part_info); + return true; + } + } + else + { + if (unlikely(part_info->vers_init_info(thd))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + elem->id= UINT_MAX32; + } + DBUG_ASSERT(part_info->vers_info); + if (unlikely(part_info->vers_info->now_part)) + { + DBUG_ASSERT(create_last_non_select_table); + DBUG_ASSERT(create_last_non_select_table->table_name.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), + create_last_non_select_table->table_name.str); + return true; + } + elem->type= partition_element::HISTORY; + return false; +} +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + + +bool LEX::last_field_generated_always_as_row_start_or_end(Lex_ident *p, + const char *type, + uint flag) +{ + if (unlikely(p->str)) + { + my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(0), type, + last_field->field_name.str); + return true; + } + last_field->flags|= (flag | NO_DEFAULT_VALUE_FLAG | NOT_NULL_FLAG); + DBUG_ASSERT(p); + *p= last_field->field_name; + return false; +} + + + +bool LEX::last_field_generated_always_as_row_start() +{ + Vers_parse_info &info= vers_get_info(); + Lex_ident *p= &info.as_row.start; + return last_field_generated_always_as_row_start_or_end(p, "START", + VERS_ROW_START); +} + + +bool LEX::last_field_generated_always_as_row_end() +{ + Vers_parse_info &info= vers_get_info(); + Lex_ident *p= &info.as_row.end; + return last_field_generated_always_as_row_start_or_end(p, "END", + VERS_ROW_END); +} + +void st_select_lex_unit::reset_distinct() +{ + union_distinct= NULL; + for(SELECT_LEX *sl= first_select()->next_select(); + sl; + sl= sl->next_select()) + { + if (sl->distinct) + { + union_distinct= sl; + } + } +} + + +void LEX::save_values_list_state() +{ + current_select->save_many_values= many_values; + current_select->save_insert_list= insert_list; +} + + +void LEX::restore_values_list_state() +{ + many_values= current_select->save_many_values; + insert_list= current_select->save_insert_list; +} + + +void st_select_lex_unit::fix_distinct() +{ + if (union_distinct && this != union_distinct->master_unit()) + reset_distinct(); +} + + +void st_select_lex_unit::register_select_chain(SELECT_LEX *first_sel) +{ + DBUG_ASSERT(first_sel != 0); + slave= first_sel; + first_sel->prev= &slave; + for(SELECT_LEX *sel=first_sel; sel; sel= sel->next_select()) + { + sel->master= (st_select_lex_node *)this; + uncacheable|= sel->uncacheable; + } +} + + +void st_select_lex::register_unit(SELECT_LEX_UNIT *unit, + Name_resolution_context *outer_context) +{ + if ((unit->next= slave)) + slave->prev= &unit->next; + unit->prev= &slave; + slave= unit; + unit->master= this; + uncacheable|= unit->uncacheable; + + for(SELECT_LEX *sel= unit->first_select();sel; sel= sel->next_select()) + { + sel->context.outer_context= outer_context; + } +} + + +void st_select_lex::add_statistics(SELECT_LEX_UNIT *unit) +{ + for (; + unit; + unit= unit->next_unit()) + for(SELECT_LEX *child= unit->first_select(); + child; + child= child->next_select()) + { + /* + A subselect can add fields to an outer select. + Reserve space for them. + */ + select_n_where_fields+= child->select_n_where_fields; + /* + Aggregate functions in having clause may add fields + to an outer select. Count them also. + */ + select_n_having_items+= child->select_n_having_items; + } +} + + +bool LEX::main_select_push(bool service) +{ + DBUG_ENTER("LEX::main_select_push"); + DBUG_PRINT("info", ("service: %u", service)); + current_select_number= ++thd->lex->stmt_lex->current_select_number; + builtin_select.select_number= current_select_number; + builtin_select.is_service_select= service; + if (push_select(&builtin_select)) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + +void Lex_select_lock::set_to(SELECT_LEX *sel) +{ + if (defined_lock) + { + if (sel->master_unit() && + sel == sel->master_unit()->fake_select_lex) + sel->master_unit()->set_lock_to_the_last_select(*this); + else + { + thr_lock_type lock_type; + sel->parent_lex->safe_to_cache_query= 0; + if (unlikely(skip_locked)) + { + lock_type= update_lock ? TL_WRITE_SKIP_LOCKED : TL_READ_SKIP_LOCKED; + } + else + { + lock_type= update_lock ? TL_WRITE : TL_READ_WITH_SHARED_LOCKS; + } + sel->lock_type= lock_type; + sel->select_lock= (update_lock ? st_select_lex::select_lock_type::FOR_UPDATE : + st_select_lex::select_lock_type::IN_SHARE_MODE); + sel->set_lock_for_tables(lock_type, false, skip_locked); + } + } + else + { + /* + select_lock can be FOR_UPDATE in case of + (SELECT x FROM t WINDOW w1 AS () FOR UPDATE) LIMIT 1 + */ + sel->select_lock= st_select_lex::select_lock_type::NONE; + } +} + +bool Lex_order_limit_lock::set_to(SELECT_LEX *sel) +{ + /*TODO: lock */ + //if (lock.defined_lock && sel == sel->master_unit()->fake_select_lex) + // return TRUE; + if (lock.defined_timeout) + { + THD *thd= sel->parent_lex->thd; + if (set_statement_var_if_exists(thd, + C_STRING_WITH_LEN("lock_wait_timeout"), + lock.timeout) || + set_statement_var_if_exists(thd, + C_STRING_WITH_LEN("innodb_lock_wait_timeout"), + lock.timeout)) + return TRUE; + } + lock.set_to(sel); + sel->limit_params= limit; + if (order_list) + { + if (sel->get_linkage() != GLOBAL_OPTIONS_TYPE && + sel->olap != UNSPECIFIED_OLAP_TYPE && + (sel->get_linkage() != UNION_TYPE || sel->braces)) + { + my_error(ER_WRONG_USAGE, MYF(0), + "CUBE/ROLLUP", "ORDER BY"); + return TRUE; + } + sel->order_list= *(order_list); + } + sel->is_set_query_expr_tail= true; + return FALSE; +} + + +static void change_item_list_context(List *list, + Name_resolution_context *context) +{ + List_iterator_fast it (*list); + Item *item; + while((item= it++)) + { + item->walk(&Item::change_context_processor, FALSE, (void *)context); + } +} + + +bool LEX::insert_select_hack(SELECT_LEX *sel) +{ + DBUG_ENTER("LEX::insert_select_hack"); + + DBUG_ASSERT(first_select_lex() == &builtin_select); + DBUG_ASSERT(sel != NULL); + + DBUG_ASSERT(builtin_select.first_inner_unit() == NULL); + + if (builtin_select.link_prev) + { + if ((*builtin_select.link_prev= builtin_select.link_next)) + ((st_select_lex *)builtin_select.link_next)->link_prev= + builtin_select.link_prev; + builtin_select.link_prev= NULL; // indicator of removal + } + + if (set_main_unit(sel->master_unit())) + return true; + + DBUG_ASSERT(builtin_select.table_list.elements == 1); + TABLE_LIST *insert_table= builtin_select.table_list.first; + + if (!(insert_table->next_local= sel->table_list.first)) + { + sel->table_list.next= &insert_table->next_local; + } + sel->table_list.first= insert_table; + sel->table_list.elements++; + insert_table->select_lex= sel; + + sel->context.first_name_resolution_table= insert_table; + builtin_select.context= sel->context; + change_item_list_context(&field_list, &sel->context); + + if (sel->tvc && !sel->next_select() && + (sql_command == SQLCOM_INSERT_SELECT || + sql_command == SQLCOM_REPLACE_SELECT)) + { + DBUG_PRINT("info", ("'Usual' INSERT detected")); + many_values= sel->tvc->lists_of_values; + sel->options= sel->tvc->select_options; + sel->tvc= NULL; + if (sql_command == SQLCOM_INSERT_SELECT) + sql_command= SQLCOM_INSERT; + else + sql_command= SQLCOM_REPLACE; + } + + + for (SELECT_LEX *sel= all_selects_list; + sel; + sel= sel->next_select_in_list()) + { + if (sel->select_number != 1) + sel->select_number--; + }; + + DBUG_RETURN(FALSE); +} + + +/** + Create an Item_singlerow_subselect for a query expression. +*/ + +Item *LEX::create_item_query_expression(THD *thd, + st_select_lex_unit *unit) +{ + if (clause_that_disallows_subselect) + { + my_error(ER_SUBQUERIES_NOT_SUPPORTED, MYF(0), + clause_that_disallows_subselect); + return NULL; + } + + // Add the subtree of subquery to the current SELECT_LEX + SELECT_LEX *curr_sel= select_stack_head(); + DBUG_ASSERT(current_select == curr_sel || + (curr_sel == NULL && current_select == &builtin_select)); + if (!curr_sel) + { + curr_sel= &builtin_select; + curr_sel->register_unit(unit, &curr_sel->context); + curr_sel->add_statistics(unit); + } + + return new (thd->mem_root) + Item_singlerow_subselect(thd, unit->first_select()); +} + + +SELECT_LEX_UNIT *LEX::parsed_select_expr_start(SELECT_LEX *s1, SELECT_LEX *s2, + enum sub_select_type unit_type, + bool distinct) +{ + SELECT_LEX_UNIT *res; + SELECT_LEX *sel1; + SELECT_LEX *sel2; + if (!s1->next_select()) + sel1= s1; + else + { + sel1= wrap_unit_into_derived(s1->master_unit()); + if (!sel1) + return NULL; + } + if (!s2->next_select()) + sel2= s2; + else + { + sel2= wrap_unit_into_derived(s2->master_unit()); + if (!sel2) + return NULL; + } + sel1->link_neighbour(sel2); + sel2->set_linkage_and_distinct(unit_type, distinct); + sel2->first_nested= sel1->first_nested= sel1; + res= create_unit(sel1); + if (res == NULL) + return NULL; + res->pre_last_parse= sel1; + push_select(res->fake_select_lex); + return res; +} + + +SELECT_LEX_UNIT *LEX::parsed_select_expr_cont(SELECT_LEX_UNIT *unit, + SELECT_LEX *s2, + enum sub_select_type unit_type, + bool distinct, bool oracle) +{ + DBUG_ASSERT(!s2->next_select()); + SELECT_LEX *sel1= s2; + SELECT_LEX *last= unit->pre_last_parse->next_select(); + + int cmp= oracle? 0 : cmp_unit_op(unit_type, last->get_linkage()); + if (cmp == 0) + { + sel1->first_nested= last->first_nested; + } + else if (cmp > 0) + { + last->first_nested= unit->pre_last_parse; + sel1->first_nested= last; + } + else /* cmp < 0 */ + { + SELECT_LEX *first_in_nest= last->first_nested; + if (first_in_nest->first_nested != first_in_nest) + { + /* There is a priority jump starting from first_in_nest */ + if ((last= create_priority_nest(first_in_nest)) == NULL) + return NULL; + unit->fix_distinct(); + } + sel1->first_nested= last->first_nested; + } + last->link_neighbour(sel1); + sel1->set_master_unit(unit); + sel1->set_linkage_and_distinct(unit_type, distinct); + unit->pre_last_parse= last; + return unit; +} + + +/** + Add primary expression as the next term in a given query expression body + pruducing a new query expression body +*/ + +SELECT_LEX_UNIT * +LEX::add_primary_to_query_expression_body(SELECT_LEX_UNIT *unit, + SELECT_LEX *sel, + enum sub_select_type unit_type, + bool distinct, + bool oracle) +{ + SELECT_LEX *sel2= sel; + if (sel->master_unit() && sel->master_unit()->first_select()->next_select()) + { + sel2= wrap_unit_into_derived(sel->master_unit()); + if (!sel2) + return NULL; + } + SELECT_LEX *sel1= unit->first_select(); + if (!sel1->next_select()) + unit= parsed_select_expr_start(sel1, sel2, unit_type, distinct); + else + unit= parsed_select_expr_cont(unit, sel2, unit_type, distinct, oracle); + return unit; +} + + +SELECT_LEX_UNIT * +LEX::add_primary_to_query_expression_body(SELECT_LEX_UNIT *unit, + SELECT_LEX *sel, + enum sub_select_type unit_type, + bool distinct) +{ + return + add_primary_to_query_expression_body(unit, sel, unit_type, distinct, + thd->variables.sql_mode & MODE_ORACLE); +} + +/** + Add query primary to a parenthesized query primary + pruducing a new query expression body +*/ + +SELECT_LEX_UNIT * +LEX::add_primary_to_query_expression_body_ext_parens( + SELECT_LEX_UNIT *unit, + SELECT_LEX *sel, + enum sub_select_type unit_type, + bool distinct) +{ + SELECT_LEX *sel1= unit->first_select(); + if (unit->first_select()->next_select()) + { + sel1= wrap_unit_into_derived(unit); + if (!sel1) + return NULL; + if (!create_unit(sel1)) + return NULL; + } + SELECT_LEX *sel2= sel; + if (sel->master_unit() && sel->master_unit()->first_select()->next_select()) + { + sel2= wrap_unit_into_derived(sel->master_unit()); + if (!sel2) + return NULL; + } + unit= parsed_select_expr_start(sel1, sel2, unit_type, distinct); + return unit; +} + + +/** + Process multi-operand query expression body +*/ + +bool LEX::parsed_multi_operand_query_expression_body(SELECT_LEX_UNIT *unit) +{ + SELECT_LEX *first_in_nest= + unit->pre_last_parse->next_select()->first_nested; + if (first_in_nest->first_nested != first_in_nest) + { + /* There is a priority jump starting from first_in_nest */ + if (create_priority_nest(first_in_nest) == NULL) + return true; + unit->fix_distinct(); + } + return false; +} + + +/** + Add non-empty tail to a query expression body +*/ + +SELECT_LEX_UNIT *LEX::add_tail_to_query_expression_body(SELECT_LEX_UNIT *unit, + Lex_order_limit_lock *l) +{ + DBUG_ASSERT(l != NULL); + pop_select(); + SELECT_LEX *sel= unit->first_select()->next_select() ? unit->fake_select_lex : + unit->first_select(); + l->set_to(sel); + return unit; +} + + +/** + Add non-empty tail to a parenthesized query primary +*/ + +SELECT_LEX_UNIT * +LEX::add_tail_to_query_expression_body_ext_parens(SELECT_LEX_UNIT *unit, + Lex_order_limit_lock *l) +{ + SELECT_LEX *sel= unit->first_select()->next_select() ? unit->fake_select_lex : + unit->first_select(); + + DBUG_ASSERT(l != NULL); + + pop_select(); + if (sel->is_set_query_expr_tail) + { + if (!l->order_list && !sel->limit_params.explicit_limit) + l->order_list= &sel->order_list; + else + { + if (!unit) + return NULL; + sel= wrap_unit_into_derived(unit); + if (!sel) + return NULL; + if (!create_unit(sel)) + return NULL; + } + } + l->set_to(sel); + return sel->master_unit(); +} + + +/** + Process subselect parsing +*/ + +SELECT_LEX *LEX::parsed_subselect(SELECT_LEX_UNIT *unit) +{ + if (clause_that_disallows_subselect) + { + my_error(ER_SUBQUERIES_NOT_SUPPORTED, MYF(0), + clause_that_disallows_subselect); + return NULL; + } + + // Add the subtree of subquery to the current SELECT_LEX + SELECT_LEX *curr_sel= select_stack_head(); + DBUG_ASSERT(current_select == curr_sel || + (curr_sel == NULL && current_select == &builtin_select)); + if (curr_sel) + { + curr_sel->register_unit(unit, context_stack.head()); + curr_sel->add_statistics(unit); + } + + return unit->first_select(); +} + + +/** + Process INSERT-like select +*/ + +bool LEX::parsed_insert_select(SELECT_LEX *first_select) +{ + if (sql_command == SQLCOM_INSERT || + sql_command == SQLCOM_REPLACE) + { + if (sql_command == SQLCOM_INSERT) + sql_command= SQLCOM_INSERT_SELECT; + else + sql_command= SQLCOM_REPLACE_SELECT; + } + insert_select_hack(first_select); + if (check_main_unit_semantics()) + return true; + + // fix "main" select + SELECT_LEX *blt __attribute__((unused))= pop_select(); + DBUG_ASSERT(blt == &builtin_select); + push_select(first_select); + return false; +} + + +bool LEX::parsed_TVC_start() +{ + SELECT_LEX *sel; + save_values_list_state(); + many_values.empty(); + insert_list= 0; + if (!(sel= alloc_select(TRUE)) || push_select(sel)) + return true; + sel->init_select(); + sel->braces= FALSE; // just initialisation + return false; +} + + +SELECT_LEX *LEX::parsed_TVC_end() +{ + SELECT_LEX *res= pop_select(); // above TVC select + if (!(res->tvc= + new (thd->mem_root) table_value_constr(many_values, res, res->options))) + return NULL; + restore_values_list_state(); + return res; +} + + + +TABLE_LIST *LEX::parsed_derived_table(SELECT_LEX_UNIT *unit, + int for_system_time, + LEX_CSTRING *alias) +{ + TABLE_LIST *res; + derived_tables|= DERIVED_SUBQUERY; + unit->first_select()->set_linkage(DERIVED_TABLE_TYPE); + + // Add the subtree of subquery to the current SELECT_LEX + SELECT_LEX *curr_sel= select_stack_head(); + DBUG_ASSERT(current_select == curr_sel || + (curr_sel == NULL && current_select == &builtin_select)); + + Table_ident *ti= new (thd->mem_root) Table_ident(unit); + if (ti == NULL) + return NULL; + if (!(res= curr_sel->add_table_to_list(thd, ti, alias, 0, + TL_READ, MDL_SHARED_READ))) + return NULL; + if (for_system_time) + { + res->vers_conditions= vers_conditions; + } + return res; +} + +bool LEX::parsed_create_view(SELECT_LEX_UNIT *unit, int check) +{ + SQL_I_List *save= &first_select_lex()->table_list; + if (set_main_unit(unit)) + return true; + if (check_main_unit_semantics()) + return true; + first_select_lex()->table_list.push_front(save); + current_select= first_select_lex(); + size_t len= thd->m_parser_state->m_lip.get_cpp_ptr() - + create_view->select.str; + void *create_view_select= thd->memdup(create_view->select.str, len); + create_view->select.length= len; + create_view->select.str= (char *) create_view_select; + size_t not_used; + trim_whitespace(thd->charset(), + &create_view->select, ¬_used); + create_view->check= check; + parsing_options.allows_variable= TRUE; + return false; +} + +bool LEX::select_finalize(st_select_lex_unit *expr) +{ + sql_command= SQLCOM_SELECT; + selects_allow_procedure= TRUE; + if (set_main_unit(expr)) + return true; + return check_main_unit_semantics(); +} + + +bool LEX::select_finalize(st_select_lex_unit *expr, Lex_select_lock l) +{ + return expr->set_lock_to_the_last_select(l) || + select_finalize(expr); +} + + +/* + "IN" and "EXISTS" subselect can appear in two statement types: + + 1. Statements that can have table columns, such as SELECT, DELETE, UPDATE + 2. Statements that cannot have table columns, e.g: + RETURN ((1) IN (SELECT * FROM t1)) + IF ((1) IN (SELECT * FROM t1)) + + Statements of the first type call master_select_push() in the beginning. + In such case everything is properly linked. + + Statements of the second type do not call mastr_select_push(). + Here we catch the second case and relink thd->lex->builtin_select and + select_lex to properly point to each other. + + QQ: Shouldn't subselects of other type also call relink_hack()? + QQ: Can we do it at constructor time instead? +*/ + +void LEX::relink_hack(st_select_lex *select_lex) +{ + if (!select_stack_top) // Statements of the second type + { + if (!select_lex->outer_select() && + !builtin_select.first_inner_unit()) + { + builtin_select.register_unit(select_lex->master_unit(), + &builtin_select.context); + builtin_select.add_statistics(select_lex->master_unit()); + } + } +} + + +bool SELECT_LEX_UNIT::set_lock_to_the_last_select(Lex_select_lock l) +{ + if (l.defined_lock) + { + SELECT_LEX *sel= first_select(); + while (sel->next_select()) + sel= sel->next_select(); + if (sel->braces) + { + my_error(ER_WRONG_USAGE, MYF(0), "lock options", + "SELECT in brackets"); + return TRUE; + } + l.set_to(sel); + } + return FALSE; +} + +/** + Generate unique name for generated derived table for this SELECT +*/ + +bool SELECT_LEX::make_unique_derived_name(THD *thd, LEX_CSTRING *alias) +{ + // uint32 digits + two underscores + trailing '\0' + char buff[MAX_INT_WIDTH + 2 + 1]; + alias->length= my_snprintf(buff, sizeof(buff), "__%u", select_number); + alias->str= thd->strmake(buff, alias->length); + return !alias->str; +} + + +/* + Make a new sp_instr_stmt and set its m_query to a concatenation + of two strings. +*/ +bool LEX::new_sp_instr_stmt(THD *thd, + const LEX_CSTRING &prefix, + const LEX_CSTRING &suffix) +{ + LEX_STRING qbuff; + sp_instr_stmt *i; + + if (!(i= new (thd->mem_root) sp_instr_stmt(sphead->instructions(), + spcont, this))) + return true; + + qbuff.length= prefix.length + suffix.length; + if (!(qbuff.str= (char*) alloc_root(thd->mem_root, qbuff.length + 1))) + return true; + if (prefix.length) + memcpy(qbuff.str, prefix.str, prefix.length); + strmake(qbuff.str + prefix.length, suffix.str, suffix.length); + i->m_query= qbuff; + return sphead->add_instr(i); +} + + +bool LEX::sp_proc_stmt_statement_finalize_buf(THD *thd, const LEX_CSTRING &qbuf) +{ + sphead->m_flags|= sp_get_flags_for_command(this); + /* "USE db" doesn't work in a procedure */ + if (unlikely(sql_command == SQLCOM_CHANGE_DB)) + { + my_error(ER_SP_BADSTATEMENT, MYF(0), "USE"); + return true; + } + /* + Don't add an instruction for SET statements, since all + instructions for them were already added during processing + of "set" rule. + */ + DBUG_ASSERT(sql_command != SQLCOM_SET_OPTION || var_list.is_empty()); + if (sql_command != SQLCOM_SET_OPTION) + return new_sp_instr_stmt(thd, empty_clex_str, qbuf); + return false; +} + + +bool LEX::sp_proc_stmt_statement_finalize(THD *thd, bool no_lookahead) +{ + // Extract the query statement from the tokenizer + Lex_input_stream *lip= &thd->m_parser_state->m_lip; + Lex_cstring qbuf(sphead->m_tmp_query, no_lookahead ? lip->get_ptr() : + lip->get_tok_start()); + return LEX::sp_proc_stmt_statement_finalize_buf(thd, qbuf); +} + + +/** + @brief + Extract the condition that can be pushed into WHERE clause + + @param thd the thread handle + @param cond the condition from which to extract a pushed condition + @param remaining_cond IN/OUT the condition that will remain of cond after + the extraction + @param transformer the transformer callback function to be + applied to the fields of the condition so it + can be pushed` + @param arg parameter to be passed to the transformer + + @details + This function builds the most restrictive condition depending only on + the fields used in the GROUP BY of this SELECT. These fields were + collected before in grouping_tmp_fields list of this SELECT. + + First this method checks if this SELECT doesn't have any aggregation + functions and has no GROUP BY clause. If so cond can be entirely pushed + into WHERE. + + Otherwise the method checks if there is a condition depending only on + grouping fields that can be extracted from cond. + + The condition that can be pushed into WHERE should be transformed. + It is done by transformer. + + The extracted condition is saved in cond_pushed_into_where of this select. + COND can remain not empty after the extraction of the conditions that can be + pushed into WHERE. It is saved in remaining_cond. + + @note + This method is called for pushdown conditions into materialized + derived tables/views optimization. + Item::derived_field_transformer_for_where is passed as the actual + callback function. + Also it is called for pushdown into materialized IN subqueries. + Item::in_subq_field_transformer_for_where is passed as the actual + callback function. +*/ + +void st_select_lex::pushdown_cond_into_where_clause(THD *thd, Item *cond, + Item **remaining_cond, + Item_transformer transformer, + uchar *arg) +{ + if (!cond_pushdown_is_allowed()) + return; + thd->lex->current_select= this; + if (have_window_funcs()) + { + Item *cond_over_partition_fields; + check_cond_extraction_for_grouping_fields(thd, cond); + cond_over_partition_fields= + build_cond_for_grouping_fields(thd, cond, true); + if (cond_over_partition_fields) + cond_over_partition_fields= cond_over_partition_fields->transform(thd, + &Item::grouping_field_transformer_for_where, + (uchar*) this); + if (cond_over_partition_fields) + { + cond_over_partition_fields->walk( + &Item::cleanup_excluding_const_fields_processor, 0, 0); + cond_pushed_into_where= cond_over_partition_fields; + } + + return; + } + + if (!join->group_list && !with_sum_func) + { + cond= transform_condition_or_part(thd, cond, transformer, arg); + if (cond) + { + cond->walk( + &Item::cleanup_excluding_const_fields_processor, 0, 0); + cond_pushed_into_where= cond; + } + + return; + } + + /* + Figure out what can be extracted from cond and pushed into + the WHERE clause of this select. + */ + Item *cond_over_grouping_fields; + check_cond_extraction_for_grouping_fields(thd, cond); + cond_over_grouping_fields= + build_cond_for_grouping_fields(thd, cond, true); + + /* + Transform references to the columns of condition that can be pushed + into WHERE so it can be pushed. + */ + if (cond_over_grouping_fields) + { + cond_over_grouping_fields= + transform_condition_or_part(thd, cond_over_grouping_fields, + &Item::grouping_field_transformer_for_where, + (uchar*) this); + } + + if (cond_over_grouping_fields) + { + + /* + Remove top conjuncts in cond that has been pushed into the WHERE + clause of this select + */ + cond= remove_pushed_top_conjuncts(thd, cond); + + cond_over_grouping_fields->walk( + &Item::cleanup_excluding_const_fields_processor, 0, 0); + cond_pushed_into_where= cond_over_grouping_fields; + } + + *remaining_cond= cond; +} + + +/** + @brief + Mark OR-conditions as non-pushable to avoid repeatable pushdown + + @param cond the processed condition + + @details + Consider pushdown into the materialized derived table/view. + Consider OR condition that can be pushed into HAVING and some + parts of this OR condition that can be pushed into WHERE. + + On example: + + SELECT * + FROM t1, + ( + SELECT a,MAX(c) AS m_c + GROUP BY a + ) AS dt + WHERE ((dt.m_c>10) AND (dt.a>2)) OR ((dt.m_c<7) and (dt.a<3)) AND + (t1.a=v1.a); + + + Here ((dt.m_c>10) AND (dt.a>2)) OR ((dt.m_c<7) and (dt.a<3)) or1 + can be pushed down into the HAVING of the materialized + derived table dt. + + (dt.a>2) OR (dt.a<3) part of or1 depends only on grouping fields + of dt and can be pushed into WHERE. + + As a result: + + SELECT * + FROM t1, + ( + SELECT a,MAX(c) AS m_c + WHERE (dt.a>2) OR (dt.a<3) + GROUP BY a + HAVING ((dt.m_c>10) AND (dt.a>2)) OR ((dt.m_c<7) and (dt.a<3)) + ) AS dt + WHERE ((dt.m_c>10) AND (dt.a>2)) OR ((dt.m_c<7) and (dt.a<3)) AND + (t1.a=v1.a); + + + Here (dt.a>2) OR (dt.a<3) also remains in HAVING of dt. + When SELECT that defines df is processed HAVING pushdown optimization + is made. In HAVING pushdown optimization it will extract + (dt.a>2) OR (dt.a<3) condition from or1 again and push it into WHERE. + This will cause duplicate conditions in WHERE of dt. + + To avoid repeatable pushdown such OR conditions as or1 describen + above are marked with MARKER_NO_EXTRACTION. + + @note + This method is called for pushdown into materialized + derived tables/views/IN subqueries optimization. +*/ + +void mark_or_conds_to_avoid_pushdown(Item *cond) +{ + if (cond->type() == Item::COND_ITEM && + ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + if (item->type() == Item::COND_ITEM && + ((Item_cond*) item)->functype() == Item_func::COND_OR_FUNC) + item->set_extraction_flag(MARKER_NO_EXTRACTION); + } + } + else if (cond->type() == Item::COND_ITEM && + ((Item_cond*) cond)->functype() == Item_func::COND_OR_FUNC) + cond->set_extraction_flag(MARKER_NO_EXTRACTION); +} + +/** + @brief + Get condition that can be pushed from HAVING into WHERE + + @param thd the thread handle + @param cond the condition from which to extract the condition + + @details + The method collects in attach_to_conds list conditions from cond + that can be pushed from HAVING into WHERE. + + Conditions that can be pushed were marked with MARKER_FULL_EXTRACTION in + check_cond_extraction_for_grouping_fields() method. + Conditions that can't be pushed were marked with MARKER_NO_EXTRACTION. + Conditions which parts can be pushed weren't marked. + + There are two types of conditions that can be pushed: + 1. Condition that can be simply moved from HAVING + (if cond is marked with MARKER_FULL_EXTRACTION or + cond is an AND condition and some of its parts are marked with + MARKER_FULL_EXTRACTION) + In this case condition is transformed and pushed into attach_to_conds + list. + 2. Part of some other condition c1 that can't be entirely pushed + (if с1 isn't marked with any flag). + + For example: + + SELECT t1.a,MAX(t1.b),t1.c + FROM t1 + GROUP BY t1.a + HAVING ((t1.a > 5) AND (t1.c < 3)) OR (t1.a = 3); + + Here (t1.a > 5) OR (t1.a = 3) from HAVING can be pushed into WHERE. + + In this case build_pushable_cond() is called for c1. + This method builds a clone of the c1 part that can be pushed. + + Transformation mentioned above is made with multiple_equality_transformer + transformer. It transforms all multiple equalities in the extracted + condition into the set of equalities. + + @note + Conditions that can be pushed are collected in attach_to_conds in this way: + 1. if cond is an AND condition its parts that can be pushed into WHERE + are added to attach_to_conds list separately. + 2. in all other cases conditions are pushed into the list entirely. + + @retval + true - if an error occurs + false - otherwise +*/ + +bool +st_select_lex::build_pushable_cond_for_having_pushdown(THD *thd, Item *cond) +{ + List equalities; + + /* Condition can't be pushed */ + if (cond->get_extraction_flag() == MARKER_NO_EXTRACTION) + return false; + + /** + Condition can be pushed entirely. + Transform its multiple equalities and add to attach_to_conds list. + */ + if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + Item *result= cond->top_level_transform(thd, + &Item::multiple_equality_transformer, (uchar *)this); + if (!result) + return true; + if (result->type() == Item::COND_ITEM && + ((Item_cond*) result)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) result)->argument_list()); + Item *item; + while ((item= li++)) + { + if (attach_to_conds.push_back(item, thd->mem_root)) + return true; + } + } + else + { + if (attach_to_conds.push_back(result, thd->mem_root)) + return true; + } + return false; + } + + /** + There is no flag set for this condition. It means that some + part of this condition can be pushed. + */ + if (cond->type() != Item::COND_ITEM) + return false; + + if (((Item_cond *)cond)->functype() != Item_cond::COND_AND_FUNC) + { + /* + cond is not a conjunctive formula and it cannot be pushed into WHERE. + Try to extract a formula that can be pushed. + */ + Item *fix= cond->build_pushable_cond(thd, 0, 0); + if (!fix) + return false; + if (attach_to_conds.push_back(fix, thd->mem_root)) + return true; + } + else + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + if (item->get_extraction_flag() == MARKER_NO_EXTRACTION) + continue; + else if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + Item *result= item->transform(thd, + &Item::multiple_equality_transformer, + (uchar *)item); + if (!result) + return true; + if (result->type() == Item::COND_ITEM && + ((Item_cond*) result)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) result)->argument_list()); + Item *item; + while ((item=li++)) + { + if (attach_to_conds.push_back(item, thd->mem_root)) + return true; + } + } + else + { + if (attach_to_conds.push_back(result, thd->mem_root)) + return true; + } + } + else + { + Item *fix= item->build_pushable_cond(thd, 0, 0); + if (!fix) + continue; + if (attach_to_conds.push_back(fix, thd->mem_root)) + return true; + } + } + } + return false; +} + + +/** + Check if item is equal to some field in Field_pair 'field_pair' + from 'pair_list' and return found 'field_pair' if it exists. +*/ + +Field_pair *get_corresponding_field_pair(Item *item, + List pair_list) +{ + DBUG_ASSERT(item->type() == Item::FIELD_ITEM || + (item->type() == Item::REF_ITEM && + ((((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF) || + (((Item_ref *) item)->ref_type() == Item_ref::REF)))); + + List_iterator it(pair_list); + Field_pair *field_pair; + Item_field *field_item= (Item_field *) (item->real_item()); + while ((field_pair= it++)) + { + if (field_item->field == field_pair->field) + return field_pair; + } + return NULL; +} + + +/** + @brief + Collect fields from multiple equalities which are equal to grouping + + @param thd the thread handle + + @details + This method checks if multiple equalities of the WHERE clause contain + fields from GROUP BY of this SELECT. If so all fields of such multiple + equalities are collected in grouping_tmp_fields list without repetitions. + + @retval + true - if an error occurs + false - otherwise +*/ + +bool st_select_lex::collect_fields_equal_to_grouping(THD *thd) +{ + if (!join->cond_equal || join->cond_equal->is_empty()) + return false; + + List_iterator_fast li(join->cond_equal->current_level); + Item_equal *item_equal; + + while ((item_equal= li++)) + { + Item_equal_fields_iterator it(*item_equal); + Item *item; + while ((item= it++)) + { + if (get_corresponding_field_pair(item, grouping_tmp_fields)) + break; + } + if (!item) + break; + + it.rewind(); + while ((item= it++)) + { + if (get_corresponding_field_pair(item, grouping_tmp_fields)) + continue; + Field_pair *grouping_tmp_field= + new Field_pair(((Item_field *)item->real_item())->field, item); + if (grouping_tmp_fields.push_back(grouping_tmp_field, thd->mem_root)) + return true; + } + } + return false; +} + + +/** + @brief + Remove marked top conjuncts of HAVING for having pushdown + + @param thd the thread handle + @param cond the condition which subformulas are to be removed + + @details + This method removes from cond all subformulas that can be moved from HAVING + into WHERE. + + @retval + condition without removed subformulas + 0 if the whole 'cond' is removed +*/ + +Item *remove_pushed_top_conjuncts_for_having(THD *thd, Item *cond) +{ + /* Nothing to extract */ + if (cond->get_extraction_flag() == MARKER_NO_EXTRACTION) + { + cond->clear_extraction_flag(); + return cond; + } + /* cond can be pushed in WHERE entirely */ + if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + cond->clear_extraction_flag(); + return 0; + } + + /* Some parts of cond can be pushed */ + if (cond->type() == Item::COND_ITEM && + ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + if (item->get_extraction_flag() == MARKER_NO_EXTRACTION) + item->clear_extraction_flag(); + else if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + if (item->type() == Item::FUNC_ITEM && + ((Item_func*) item)->functype() == Item_func::MULT_EQUAL_FUNC) + item->set_extraction_flag(MARKER_DELETION); + else + { + item->clear_extraction_flag(); + li.remove(); + } + } + } + switch (((Item_cond*) cond)->argument_list()->elements) + { + case 0: + return 0; + case 1: + return (((Item_cond*) cond)->argument_list()->head()); + default: + return cond; + } + } + return cond; +} + + +/** + @brief + Extract condition that can be pushed from HAVING into WHERE + + @param thd the thread handle + @param having the HAVING clause of this select + @param having_equal multiple equalities of HAVING + + @details + This method builds a set of conditions dependent only on + fields used in the GROUP BY of this select (directly or indirectly + through equalities). These conditions are extracted from the HAVING + clause of this select. + The method saves these conditions into attach_to_conds list and removes + from HAVING conditions that can be entirely pushed into WHERE. + + Example of the HAVING pushdown transformation: + + SELECT t1.a,MAX(t1.b) + FROM t1 + GROUP BY t1.a + HAVING (t1.a>2) AND (MAX(c)>12); + + => + + SELECT t1.a,MAX(t1.b) + FROM t1 + WHERE (t1.a>2) + GROUP BY t1.a + HAVING (MAX(c)>12); + + In this method (t1.a>2) is not attached to the WHERE clause. + It is pushed into the attach_to_conds list to be attached to + the WHERE clause later. + + In details: + 1. Collect fields used in the GROUP BY grouping_fields of this SELECT + 2. Collect fields equal to grouping_fields from the WHERE clause + of this SELECT and add them to the grouping_fields list. + 3. Extract the most restrictive condition from the HAVING clause of this + select that depends only on the grouping fields (directly or indirectly + through equality). + If the extracted condition is an AND condition it is transformed into a + list of all its conjuncts saved in attach_to_conds. Otherwise, + the condition is put into attach_to_conds as the only its element. + 4. Remove conditions from HAVING clause that can be entirely pushed + into WHERE. + Multiple equalities are not removed but marked with MARKER_DELETION flag. + They will be deleted later in substitite_for_best_equal_field() called + for the HAVING condition. + 5. Unwrap fields wrapped in Item_ref wrappers contained in the condition + of attach_to_conds so the condition could be pushed into WHERE. + + @note + This method is similar to st_select_lex::pushdown_cond_into_where_clause(). + + @retval TRUE if an error occurs + @retval FALSE otherwise +*/ + +Item *st_select_lex::pushdown_from_having_into_where(THD *thd, Item *having) +{ + if (!having || !group_list.first) + return having; + if (!cond_pushdown_is_allowed()) + return having; + + st_select_lex *save_curr_select= thd->lex->current_select; + thd->lex->current_select= this; + + /* + 1. Collect fields used in the GROUP BY grouping fields of this SELECT + 2. Collect fields equal to grouping_fields from the WHERE clause + of this SELECT and add them to the grouping fields list. + */ + if (collect_grouping_fields(thd) || + collect_fields_equal_to_grouping(thd)) + return having; + + /* + 3. Extract the most restrictive condition from the HAVING clause of this + select that depends only on the grouping fields (directly or indirectly + through equality). + If the extracted condition is an AND condition it is transformed into a + list of all its conjuncts saved in attach_to_conds. Otherwise, + the condition is put into attach_to_conds as the only its element. + */ + List_iterator_fast it(attach_to_conds); + Item *item; + check_cond_extraction_for_grouping_fields(thd, having); + if (build_pushable_cond_for_having_pushdown(thd, having)) + { + attach_to_conds.empty(); + goto exit; + } + if (!attach_to_conds.elements) + goto exit; + + /* + 4. Remove conditions from HAVING clause that can be entirely pushed + into WHERE. + Multiple equalities are not removed but marked with MARKER_DELETION flag. + They will be deleted later in substitite_for_best_equal_field() called + for the HAVING condition. + */ + having= remove_pushed_top_conjuncts_for_having(thd, having); + + /* + Change join->cond_equal which points to the multiple equalities of + the top level of HAVING. + Removal of AND conditions may leave only one conjunct in HAVING. + + Example 1: + SELECT * + FROM t1 + GROUP BY t1.a + (t1.a < 2) AND (t1.b = 2) + + (t1.a < 2) is pushed into WHERE. + join->cond_equal should point on (t1.b = 2) multiple equality now. + + Example 2: + SELECT * + FROM t1 + GROUP BY t1.a + (t1.a = 2) AND (t1.b < 2) + + (t1.a = 2) is pushed into WHERE. + join->cond_equal should be NULL now. + */ + if (having && + having->type() == Item::FUNC_ITEM && + ((Item_func*) having)->functype() == Item_func::MULT_EQUAL_FUNC) + join->having_equal= new (thd->mem_root) COND_EQUAL((Item_equal *)having, + thd->mem_root); + else if (!having || + having->type() != Item::COND_ITEM || + ((Item_cond *)having)->functype() != Item_cond::COND_AND_FUNC) + join->having_equal= 0; + + /* + 5. Unwrap fields wrapped in Item_ref wrappers contained in the condition + of attach_to_conds so the condition could be pushed into WHERE. + */ + it.rewind(); + while ((item=it++)) + { + item= item->transform(thd, + &Item::field_transformer_for_having_pushdown, + (uchar *)this); + + if (item->walk(&Item::cleanup_excluding_immutables_processor, 0, STOP_PTR) + || item->fix_fields(thd, NULL)) + { + attach_to_conds.empty(); + goto exit; + } + } +exit: + thd->lex->current_select= save_curr_select; + return having; +} + + +bool LEX::stmt_install_plugin(const DDL_options_st &opt, + const Lex_ident_sys_st &name, + const LEX_CSTRING &soname) +{ + create_info.init(); + if (add_create_options_with_check(opt)) + return true; + sql_command= SQLCOM_INSTALL_PLUGIN; + comment= name; + ident= soname; + return false; +} + + +void LEX::stmt_install_plugin(const LEX_CSTRING &soname) +{ + sql_command= SQLCOM_INSTALL_PLUGIN; + comment= null_clex_str; + ident= soname; +} + + +bool LEX::stmt_uninstall_plugin_by_name(const DDL_options_st &opt, + const Lex_ident_sys_st &name) +{ + check_opt.init(); + if (add_create_options_with_check(opt)) + return true; + sql_command= SQLCOM_UNINSTALL_PLUGIN; + comment= name; + ident= null_clex_str; + return false; +} + + +bool LEX::stmt_uninstall_plugin_by_soname(const DDL_options_st &opt, + const LEX_CSTRING &soname) +{ + check_opt.init(); + if (add_create_options_with_check(opt)) + return true; + sql_command= SQLCOM_UNINSTALL_PLUGIN; + comment= null_clex_str; + ident= soname; + return false; +} + + +bool LEX::stmt_prepare_validate(const char *stmt_type) +{ + if (unlikely(table_or_sp_used())) + { + my_error(ER_SUBQUERIES_NOT_SUPPORTED, MYF(0), stmt_type); + return true; + } + return check_main_unit_semantics(); +} + + +bool LEX::stmt_prepare(const Lex_ident_sys_st &ident, Item *code) +{ + sql_command= SQLCOM_PREPARE; + if (stmt_prepare_validate("PREPARE..FROM")) + return true; + prepared_stmt.set(ident, code, NULL); + return false; +} + + +bool LEX::stmt_execute_immediate(Item *code, List *params) +{ + sql_command= SQLCOM_EXECUTE_IMMEDIATE; + if (stmt_prepare_validate("EXECUTE IMMEDIATE")) + return true; + static const Lex_ident_sys immediate(STRING_WITH_LEN("IMMEDIATE")); + prepared_stmt.set(immediate, code, params); + return false; +} + + +bool LEX::stmt_execute(const Lex_ident_sys_st &ident, List *params) +{ + sql_command= SQLCOM_EXECUTE; + prepared_stmt.set(ident, NULL, params); + return stmt_prepare_validate("EXECUTE..USING"); +} + + +void LEX::stmt_deallocate_prepare(const Lex_ident_sys_st &ident) +{ + sql_command= SQLCOM_DEALLOCATE_PREPARE; + prepared_stmt.set(ident, NULL, NULL); +} + + +bool LEX::stmt_alter_table_exchange_partition(Table_ident *table) +{ + DBUG_ASSERT(sql_command == SQLCOM_ALTER_TABLE); + first_select_lex()->db= table->db; + if (first_select_lex()->db.str == NULL && + copy_db_to(&first_select_lex()->db)) + return true; + name= table->table; + alter_info.partition_flags|= ALTER_PARTITION_EXCHANGE; + if (!first_select_lex()->add_table_to_list(thd, table, NULL, + TL_OPTION_UPDATING, + TL_READ_NO_INSERT, + MDL_SHARED_NO_WRITE)) + return true; + DBUG_ASSERT(!m_sql_cmd); + m_sql_cmd= new (thd->mem_root) Sql_cmd_alter_table_exchange_partition(); + return m_sql_cmd == NULL; +} + + +bool LEX::stmt_alter_table(Table_ident *table) +{ + DBUG_ASSERT(sql_command == SQLCOM_ALTER_TABLE); + first_select_lex()->db= table->db; + if (first_select_lex()->db.str == NULL && + copy_db_to(&first_select_lex()->db)) + return true; + if (unlikely(check_table_name(table->table.str, table->table.length, + false)) || + (table->db.str && unlikely(check_db_name((LEX_STRING*) &table->db)))) + { + my_error(ER_WRONG_TABLE_NAME, MYF(0), table->table.str); + return true; + } + name= table->table; + return false; +} + + +void LEX::stmt_purge_to(const LEX_CSTRING &to) +{ + type= 0; + sql_command= SQLCOM_PURGE; + to_log= to.str; +} + + +bool LEX::stmt_purge_before(Item *item) +{ + type= 0; + sql_command= SQLCOM_PURGE_BEFORE; + value_list.empty(); + value_list.push_front(item, thd->mem_root); + return check_main_unit_semantics(); +} + + +bool LEX::stmt_create_udf_function(const DDL_options_st &options, + enum_sp_aggregate_type agg_type, + const Lex_ident_sys_st &name, + Item_result return_type, + const LEX_CSTRING &soname) +{ + if (stmt_create_function_start(options)) + return true; + + if (unlikely(is_native_function(thd, &name))) + { + my_error(ER_NATIVE_FCT_NAME_COLLISION, MYF(0), name.str); + return true; + } + sql_command= SQLCOM_CREATE_FUNCTION; + udf.name= name; + udf.returns= return_type; + udf.dl= soname.str; + udf.type= agg_type == GROUP_AGGREGATE ? UDFTYPE_AGGREGATE : + UDFTYPE_FUNCTION; + stmt_create_routine_finalize(); + return false; +} + + +bool LEX::stmt_create_stored_function_start(const DDL_options_st &options, + enum_sp_aggregate_type agg_type, + const sp_name *spname) +{ + if (stmt_create_function_start(options) || + unlikely(!make_sp_head_no_recursive(thd, spname, + &sp_handler_function, agg_type))) + return true; + return false; +} + + +bool LEX::stmt_drop_function(const DDL_options_st &options, + const Lex_ident_sys_st &db, + const Lex_ident_sys_st &name) +{ + if (unlikely(db.str && check_db_name((LEX_STRING*) &db))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db.str); + return true; + } + if (unlikely(sphead)) + { + my_error(ER_SP_NO_DROP_SP, MYF(0), "FUNCTION"); + return true; + } + set_command(SQLCOM_DROP_FUNCTION, options); + spname= new (thd->mem_root) sp_name(&db, &name, true); + return spname == NULL; +} + + +bool LEX::stmt_drop_function(const DDL_options_st &options, + const Lex_ident_sys_st &name) +{ + LEX_CSTRING db= {0, 0}; + if (unlikely(sphead)) + { + my_error(ER_SP_NO_DROP_SP, MYF(0), "FUNCTION"); + return true; + } + if (thd->db.str && unlikely(copy_db_to(&db))) + return true; + set_command(SQLCOM_DROP_FUNCTION, options); + spname= new (thd->mem_root) sp_name(&db, &name, false); + return spname == NULL; +} + + +bool LEX::stmt_drop_procedure(const DDL_options_st &options, + sp_name *name) +{ + if (unlikely(sphead)) + { + my_error(ER_SP_NO_DROP_SP, MYF(0), "PROCEDURE"); + return true; + } + set_command(SQLCOM_DROP_PROCEDURE, options); + spname= name; + return false; +} + + +bool LEX::stmt_alter_function_start(sp_name *name) +{ + if (unlikely(sphead)) + { + my_error(ER_SP_NO_DROP_SP, MYF(0), "FUNCTION"); + return true; + } + if (main_select_push()) + return true; + sp_chistics.init(); + sql_command= SQLCOM_ALTER_FUNCTION; + spname= name; + return false; +} + + +bool LEX::stmt_alter_procedure_start(sp_name *name) +{ + if (unlikely(sphead)) + { + my_error(ER_SP_NO_DROP_SP, MYF(0), "PROCEDURE"); + return true; + } + if (main_select_push()) + return true; + sp_chistics.init(); + sql_command= SQLCOM_ALTER_PROCEDURE; + spname= name; + return false; +} + + +Spvar_definition *LEX::row_field_name(THD *thd, const Lex_ident_sys_st &name) +{ + Spvar_definition *res; + if (unlikely(check_string_char_length(&name, 0, NAME_CHAR_LEN, + system_charset_info, 1))) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), name.str); + return NULL; + } + if (unlikely(!(res= new (thd->mem_root) Spvar_definition()))) + return NULL; + init_last_field(res, &name); + return res; +} + + +Item * +Lex_cast_type_st::create_typecast_item_or_error(THD *thd, Item *item) const +{ + Item *tmp= create_typecast_item(thd, item); + if (!tmp) + { + Name name= m_type_handler->name(); + char buf[128]; + size_t length= my_snprintf(buf, sizeof(buf), "CAST(expr AS %.*s)", + (int) name.length(), name.ptr()); + my_error(ER_UNKNOWN_OPERATOR, MYF(0), + ErrConvString(buf, length, system_charset_info).ptr()); + } + return tmp; +} + + +void +Lex_length_and_dec_st::set(const char *plength, const char *pdec) +{ + reset(); + + if ((m_has_explicit_length= (plength != nullptr))) + { + int err; + ulonglong tmp= my_strtoll10(plength, NULL, &err); + if ((m_length_overflowed= (tmp > UINT_MAX32 || err))) + m_length= UINT_MAX32; + else + m_length= (uint32) tmp; + } + + if ((m_has_explicit_dec= (pdec != nullptr))) + { + int err; + ulonglong tmp= my_strtoll10(pdec, NULL, &err); + if ((m_dec_overflowed= (tmp > 255 || err))) + m_dec= 255; + else + m_dec= (uint8) tmp; + } +} + + +void +Lex_field_type_st::set_handler_length_flags(const Type_handler *handler, + const Lex_length_and_dec_st &attr, + uint32 flags) +{ + DBUG_ASSERT(!handler->is_unsigned()); + set(handler, attr); + if (flags & UNSIGNED_FLAG) + m_handler= m_handler->type_handler_unsigned(); +} + + +bool LEX::set_field_type_udt(Lex_field_type_st *type, + const LEX_CSTRING &name, + const Lex_length_and_dec_st &attr) +{ + const Type_handler *h; + if (!(h= Type_handler::handler_by_name_or_error(thd, name))) + return true; + type->set(h, attr, &my_charset_bin); + return false; +} + + +bool LEX::set_cast_type_udt(Lex_cast_type_st *type, + const LEX_CSTRING &name) +{ + const Type_handler *h; + if (!(h= Type_handler::handler_by_name_or_error(thd, name))) + return true; + type->set(h); + return false; +} + + +bool sp_expr_lex::sp_repeat_loop_finalize(THD *thd) +{ + uint ip= sphead->instructions(); + sp_label *lab= spcont->last_label(); /* Jumping back */ + sp_instr_jump_if_not *i= new (thd->mem_root) + sp_instr_jump_if_not(ip, spcont, get_item(), lab->ip, this); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i))) + return true; + /* We can shortcut the cont_backpatch here */ + i->m_cont_dest= ip+1; + return false; +} + + +bool sp_expr_lex::sp_if_expr(THD *thd) +{ + uint ip= sphead->instructions(); + sp_instr_jump_if_not *i= new (thd->mem_root) + sp_instr_jump_if_not(ip, spcont, get_item(), this); + return + (unlikely(i == NULL) || + unlikely(sphead->push_backpatch(thd, i, + spcont->push_label(thd, &empty_clex_str, + 0))) || + unlikely(sphead->add_cont_backpatch(i)) || + unlikely(sphead->add_instr(i))); +} + + +bool LEX::sp_if_after_statements(THD *thd) +{ + uint ip= sphead->instructions(); + sp_instr_jump *i= new (thd->mem_root) sp_instr_jump(ip, spcont); + if (unlikely(i == NULL) || + unlikely(sphead->add_instr(i))) + return true; + sphead->backpatch(spcont->pop_label()); + sphead->push_backpatch(thd, i, spcont->push_label(thd, &empty_clex_str, 0)); + return false; +} + + +sp_condition_value *LEX::stmt_signal_value(const Lex_ident_sys_st &ident) +{ + sp_condition_value *cond; + /* SIGNAL foo cannot be used outside of stored programs */ + if (unlikely(spcont == NULL)) + { + my_error(ER_SP_COND_MISMATCH, MYF(0), ident.str); + return NULL; + } + cond= spcont->find_declared_or_predefined_condition(thd, &ident); + if (unlikely(cond == NULL)) + { + my_error(ER_SP_COND_MISMATCH, MYF(0), ident.str); + return NULL; + } + bool bad= thd->variables.sql_mode & MODE_ORACLE ? + !cond->has_sql_state() : + cond->type != sp_condition_value::SQLSTATE; + if (unlikely(bad)) + { + my_error(ER_SIGNAL_BAD_CONDITION_TYPE, MYF(0)); + return NULL; + } + return cond; +} + + +bool LEX::add_table_foreign_key(const LEX_CSTRING *name, + const LEX_CSTRING *constraint_name, + Table_ident *ref_table_name, + DDL_options ddl_options) +{ + Key *key= new (thd->mem_root) Foreign_key(name, + &last_key->columns, + constraint_name, + &ref_table_name->db, + &ref_table_name->table, + &ref_list, + fk_delete_opt, + fk_update_opt, + fk_match_option, + ddl_options); + if (unlikely(key == NULL)) + return true; + + /* + handle_if_exists_options() expects the two keys in this order: + the Foreign_key, followed by its auto-generated Key. + */ + alter_info.key_list.push_back(key, thd->mem_root); + alter_info.key_list.push_back(last_key, thd->mem_root); + + option_list= NULL; + + /* Only used for ALTER TABLE. Ignored otherwise. */ + alter_info.flags|= ALTER_ADD_FOREIGN_KEY; + + return false; +} + + +bool LEX::add_column_foreign_key(const LEX_CSTRING *name, + const LEX_CSTRING *constraint_name, + Table_ident *ref_table_name, + DDL_options ddl_options) +{ + if (last_field->vcol_info || last_field->vers_sys_field()) + { + thd->parse_error(); + return true; + } + if (unlikely(!(last_key= (new (thd->mem_root) + Key(Key::MULTIPLE, constraint_name, + HA_KEY_ALG_UNDEF, true, ddl_options))))) + return true; + Key_part_spec *key= new (thd->mem_root) Key_part_spec(name, 0); + if (unlikely(key == NULL)) + return true; + last_key->columns.push_back(key, thd->mem_root); + if (ref_list.is_empty()) + { + ref_list.push_back(key, thd->mem_root); + } + if (unlikely(add_table_foreign_key(constraint_name, constraint_name, + ref_table_name, ddl_options))) + return true; + option_list= NULL; + + /* Only used for ALTER TABLE. Ignored otherwise. */ + alter_info.flags|= ALTER_ADD_FOREIGN_KEY; + + return false; +} + + +bool LEX::stmt_grant_table(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident, + privilege_t grant_option) +{ + sql_command= SQLCOM_GRANT; + return + grant->set_object_name(thd, ident, current_select, grant_option) || + !(m_sql_cmd= new (thd->mem_root) Sql_cmd_grant_table(sql_command, *grant)); +} + + +bool LEX::stmt_revoke_table(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident) +{ + sql_command= SQLCOM_REVOKE; + return + grant->set_object_name(thd, ident, current_select, NO_ACL) || + !(m_sql_cmd= new (thd->mem_root) Sql_cmd_grant_table(sql_command, *grant)); +} + + +bool LEX::stmt_grant_sp(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident, + const Sp_handler &sph, + privilege_t grant_option) +{ + sql_command= SQLCOM_GRANT; + return + grant->set_object_name(thd, ident, current_select, grant_option) || + add_grant_command(thd, grant->columns()) || + !(m_sql_cmd= new (thd->mem_root) Sql_cmd_grant_sp(sql_command, + *grant, sph)); +} + + +bool LEX::stmt_revoke_sp(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident, + const Sp_handler &sph) +{ + sql_command= SQLCOM_REVOKE; + return + grant->set_object_name(thd, ident, current_select, NO_ACL) || + add_grant_command(thd, grant->columns()) || + !(m_sql_cmd= new (thd->mem_root) Sql_cmd_grant_sp(sql_command, + *grant, sph)); +} + + +bool LEX::stmt_grant_proxy(THD *thd, LEX_USER *user, privilege_t grant_option) +{ + users_list.push_front(user); + sql_command= SQLCOM_GRANT; + return !(m_sql_cmd= new (thd->mem_root) Sql_cmd_grant_proxy(sql_command, + grant_option)); +} + + +bool LEX::stmt_revoke_proxy(THD *thd, LEX_USER *user) +{ + users_list.push_front(user); + sql_command= SQLCOM_REVOKE; + return !(m_sql_cmd= new (thd->mem_root) Sql_cmd_grant_proxy(sql_command, + NO_ACL)); +} + + +LEX_USER *LEX::current_user_for_set_password(THD *thd) +{ + LEX_CSTRING pw= { STRING_WITH_LEN("password") }; + if (unlikely(spcont && spcont->find_variable(&pw, false))) + { + my_error(ER_SP_BAD_VAR_SHADOW, MYF(0), pw.str); + return NULL; + } + LEX_USER *res; + if (unlikely(!(res= (LEX_USER*) thd->calloc(sizeof(LEX_USER))))) + return NULL; + res->user= current_user; + return res; +} + + +bool LEX::sp_create_set_password_instr(THD *thd, + LEX_USER *user, + USER_AUTH *auth, + bool no_lookahead) +{ + user->auth= auth; + set_var_password *var= new (thd->mem_root) set_var_password(user); + if (unlikely(var == NULL) || + unlikely(var_list.push_back(var, thd->mem_root))) + return true; + autocommit= true; + if (sphead) + sphead->m_flags|= sp_head::HAS_SET_AUTOCOMMIT_STMT; + return sp_create_assignment_instr(thd, no_lookahead); +} + + +bool LEX::set_names(const char *pos, + const Lex_exact_charset_opt_extended_collate &cscl, + bool no_lookahead) +{ + if (sp_create_assignment_lex(thd, pos)) + return true; + CHARSET_INFO *ci= cscl.collation().charset_info(); + set_var_collation_client *var; + var= new (thd->mem_root) set_var_collation_client(ci, ci, ci); + return unlikely(var == NULL) || + unlikely(thd->lex->var_list.push_back(var, thd->mem_root)) || + unlikely(sp_create_assignment_instr(thd, no_lookahead)); +} + + +bool LEX::map_data_type(const Lex_ident_sys_st &schema_name, + Lex_field_type_st *type) const +{ + const Schema *schema= schema_name.str ? + Schema::find_by_name(schema_name) : + Schema::find_implied(thd); + if (!schema) + { + char buf[128]; + const Name type_name= type->type_handler()->name(); + my_snprintf(buf, sizeof(buf), "%.*s.%.*s", + (int) schema_name.length, schema_name.str, + (int) type_name.length(), type_name.ptr()); + my_error(ER_UNKNOWN_DATA_TYPE, MYF(0), buf); + return true; + } + const Type_handler *mapped= schema->map_data_type(thd, type->type_handler()); + type->set_handler(mapped); + return false; +} + + +bool SELECT_LEX_UNIT::explainable() const +{ + /* + EXPLAIN/ANALYZE unit, when: + (1) if it's a subquery - it's not part of eliminated WHERE/ON clause. + (2) if it's a CTE - it's not hanging (needed for execution) + (3) if it's a derived - it's not merged or eliminated + if it's not 1/2/3 - it's some weird internal thing, ignore it + */ + + return item ? + !item->eliminated : // (1) + with_element ? + derived && derived->derived_result && + !with_element->is_hanging_recursive(): // (2) + derived ? + derived->is_materialized_derived() && // (3) + !is_derived_eliminated() : + false; +} + + +bool st_select_lex::is_query_topmost(THD *thd) +{ + return get_master() == &thd->lex->unit; +} + + +/* + Determines whether the derived table was eliminated during + the call of eliminate_tables(JOIN *) made at the optimization stage + or completely optimized out (for such degenerate statements like + "SELECT 1", for example) +*/ + +bool SELECT_LEX_UNIT::is_derived_eliminated() const +{ + if (!derived) + return false; + if (!derived->table) + return true; + return derived->table->map & outer_select()->join->eliminated_tables; +} diff --git a/sql/sql_lex.h b/sql/sql_lex.h new file mode 100644 index 00000000..f548fbe5 --- /dev/null +++ b/sql/sql_lex.h @@ -0,0 +1,5190 @@ +/* Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @defgroup Semantic_Analysis Semantic Analysis +*/ + +#ifndef SQL_LEX_INCLUDED +#define SQL_LEX_INCLUDED + +#include "violite.h" /* SSL_type */ +#include "sql_trigger.h" +#include "thr_lock.h" /* thr_lock_type, TL_UNLOCK */ +#include "mem_root_array.h" +#include "grant.h" +#include "sql_cmd.h" +#include "sql_alter.h" // Alter_info +#include "sql_window.h" +#include "sql_trigger.h" +#include "sp.h" // enum enum_sp_type +#include "sql_tvc.h" +#include "item.h" +#include "sql_limit.h" // Select_limit_counters +#include "json_table.h" // Json_table_column +#include "sql_schema.h" +#include "table.h" + +/* Used for flags of nesting constructs */ +#define SELECT_NESTING_MAP_SIZE 64 +typedef Bitmap nesting_map; + +/* YACC and LEX Definitions */ + + +/** + A string with metadata. Usually points to a string in the client + character set, but unlike Lex_ident_cli_st (see below) it does not + necessarily point to a query fragment. It can also point to memory + of other kinds (e.g. an additional THD allocated memory buffer + not overlapping with the current query text). + + We'll add more flags here eventually, to know if the string has, e.g.: + - multi-byte characters + - bad byte sequences + - backslash escapes: 'a\nb' + and reuse the original query fragments instead of making the string + copy too early, in Lex_input_stream::get_text(). + This will allow to avoid unnecessary copying, as well as + create more optimal Item types in sql_yacc.yy +*/ +struct Lex_string_with_metadata_st: public LEX_CSTRING +{ +private: + bool m_is_8bit; // True if the string has 8bit characters + char m_quote; // Quote character, or 0 if not quoted +public: + void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; } + void set_metadata(bool is_8bit, char quote) + { + m_is_8bit= is_8bit; + m_quote= quote; + } + void set(const char *s, size_t len, bool is_8bit, char quote) + { + str= s; + length= len; + set_metadata(is_8bit, quote); + } + void set(const LEX_CSTRING *s, bool is_8bit, char quote) + { + ((LEX_CSTRING &)*this)= *s; + set_metadata(is_8bit, quote); + } + bool is_8bit() const { return m_is_8bit; } + bool is_quoted() const { return m_quote != '\0'; } + char quote() const { return m_quote; } + // Get string repertoire by the 8-bit flag and the character set + my_repertoire_t repertoire(CHARSET_INFO *cs) const + { + return !m_is_8bit && my_charset_is_ascii_based(cs) ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } + // Get string repertoire by the 8-bit flag, for ASCII-based character sets + my_repertoire_t repertoire() const + { + return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } +}; + + +/* + Used to store identifiers in the client character set. + Points to a query fragment. +*/ +struct Lex_ident_cli_st: public Lex_string_with_metadata_st +{ +public: + void set_keyword(const char *s, size_t len) + { + set(s, len, false, '\0'); + } + void set_ident(const char *s, size_t len, bool is_8bit) + { + set(s, len, is_8bit, '\0'); + } + void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote) + { + set(s, len, is_8bit, quote); + } + void set_unquoted(const LEX_CSTRING *s, bool is_8bit) + { + set(s, is_8bit, '\0'); + } + const char *pos() const { return str - is_quoted(); } + const char *end() const { return str + length + is_quoted(); } +}; + + +class Lex_ident_cli: public Lex_ident_cli_st +{ +public: + Lex_ident_cli(const LEX_CSTRING *s, bool is_8bit) + { + set_unquoted(s, is_8bit); + } + Lex_ident_cli(const char *s, size_t len) + { + set_ident(s, len, false); + } +}; + + +struct Lex_ident_sys_st: public LEX_CSTRING +{ +public: + bool copy_ident_cli(THD *thd, const Lex_ident_cli_st *str); + bool copy_keyword(THD *thd, const Lex_ident_cli_st *str); + bool copy_sys(THD *thd, const LEX_CSTRING *str); + bool convert(THD *thd, const LEX_CSTRING *str, CHARSET_INFO *cs); + bool copy_or_convert(THD *thd, const Lex_ident_cli_st *str, CHARSET_INFO *cs); + bool is_null() const { return str == NULL; } + bool to_size_number(ulonglong *to) const; + void set_valid_utf8(const LEX_CSTRING *name) + { + DBUG_ASSERT(Well_formed_prefix(system_charset_info, name->str, + name->length).length() == name->length); + str= name->str ; length= name->length; + } +}; + + +class Lex_ident_sys: public Lex_ident_sys_st +{ +public: + Lex_ident_sys(THD *thd, const Lex_ident_cli_st *str) + { + if (copy_ident_cli(thd, str)) + ((LEX_CSTRING &) *this)= null_clex_str; + } + Lex_ident_sys() + { + ((LEX_CSTRING &) *this)= null_clex_str; + } + Lex_ident_sys(const char *name, size_t length) + { + LEX_CSTRING tmp= {name, length}; + set_valid_utf8(&tmp); + } + Lex_ident_sys & operator=(const Lex_ident_sys_st &name) + { + Lex_ident_sys_st::operator=(name); + return *this; + } +}; + + +struct Lex_column_list_privilege_st +{ + List *m_columns; + privilege_t m_privilege; +}; + + +class Lex_column_list_privilege: public Lex_column_list_privilege_st +{ +public: + Lex_column_list_privilege(List *columns, privilege_t privilege) + { + m_columns= columns; + m_privilege= privilege; + } +}; + + +/** + ORDER BY ... LIMIT parameters; +*/ +class Lex_order_limit_lock: public Sql_alloc +{ +public: + SQL_I_List *order_list; /* ORDER clause */ + Lex_select_lock lock; + Lex_select_limit limit; + + Lex_order_limit_lock() :order_list(NULL) + {} + + bool set_to(st_select_lex *sel); +}; + + +enum sub_select_type +{ + UNSPECIFIED_TYPE, + /* following 3 enums should be as they are*/ + UNION_TYPE, INTERSECT_TYPE, EXCEPT_TYPE, + GLOBAL_OPTIONS_TYPE, DERIVED_TABLE_TYPE, OLAP_TYPE +}; + +enum set_op_type +{ + UNSPECIFIED, + UNION_DISTINCT, UNION_ALL, + EXCEPT_DISTINCT, EXCEPT_ALL, + INTERSECT_DISTINCT, INTERSECT_ALL +}; + +inline int cmp_unit_op(enum sub_select_type op1, enum sub_select_type op2) +{ + DBUG_ASSERT(op1 >= UNION_TYPE && op1 <= EXCEPT_TYPE); + DBUG_ASSERT(op2 >= UNION_TYPE && op2 <= EXCEPT_TYPE); + return (op1 == INTERSECT_TYPE ? 1 : 0) - (op2 == INTERSECT_TYPE ? 1 : 0); +} + +enum unit_common_op {OP_MIX, OP_UNION, OP_INTERSECT, OP_EXCEPT}; + +enum enum_view_suid +{ + VIEW_SUID_INVOKER= 0, + VIEW_SUID_DEFINER= 1, + VIEW_SUID_DEFAULT= 2 +}; + + +enum plsql_cursor_attr_t +{ + PLSQL_CURSOR_ATTR_ISOPEN, + PLSQL_CURSOR_ATTR_FOUND, + PLSQL_CURSOR_ATTR_NOTFOUND, + PLSQL_CURSOR_ATTR_ROWCOUNT +}; + + +enum enum_sp_suid_behaviour +{ + SP_IS_DEFAULT_SUID= 0, + SP_IS_NOT_SUID, + SP_IS_SUID +}; + + +enum enum_sp_aggregate_type +{ + DEFAULT_AGGREGATE= 0, + NOT_AGGREGATE, + GROUP_AGGREGATE +}; + + +/* These may not be declared yet */ +class Table_ident; +class sql_exchange; +class LEX_COLUMN; +class sp_head; +class sp_name; +class sp_instr; +class sp_pcontext; +class sp_variable; +class sp_expr_lex; +class sp_assignment_lex; +class partition_info; +class Event_parse_data; +class set_var_base; +class sys_var; +class Item_func_match; +class File_parser; +class Key_part_spec; +class Item_window_func; +struct sql_digest_state; +class With_clause; +class my_var; +class select_handler; +class Pushdown_select; + +#define ALLOC_ROOT_SET 1024 + +#ifdef MYSQL_SERVER +/* + There are 8 different type of table access so there is no more than + combinations 2^8 = 256: + + . STMT_READS_TRANS_TABLE + + . STMT_READS_NON_TRANS_TABLE + + . STMT_READS_TEMP_TRANS_TABLE + + . STMT_READS_TEMP_NON_TRANS_TABLE + + . STMT_WRITES_TRANS_TABLE + + . STMT_WRITES_NON_TRANS_TABLE + + . STMT_WRITES_TEMP_TRANS_TABLE + + . STMT_WRITES_TEMP_NON_TRANS_TABLE + + The unsafe conditions for each combination is represented within a byte + and stores the status of the option --binlog-direct-non-trans-updates, + whether the trx-cache is empty or not, and whether the isolation level + is lower than ISO_REPEATABLE_READ: + + . option (OFF/ON) + . trx-cache (empty/not empty) + . isolation (>= ISO_REPEATABLE_READ / < ISO_REPEATABLE_READ) + + bits 0 : . OFF, . empty, . >= ISO_REPEATABLE_READ + bits 1 : . OFF, . empty, . < ISO_REPEATABLE_READ + bits 2 : . OFF, . not empty, . >= ISO_REPEATABLE_READ + bits 3 : . OFF, . not empty, . < ISO_REPEATABLE_READ + bits 4 : . ON, . empty, . >= ISO_REPEATABLE_READ + bits 5 : . ON, . empty, . < ISO_REPEATABLE_READ + bits 6 : . ON, . not empty, . >= ISO_REPEATABLE_READ + bits 7 : . ON, . not empty, . < ISO_REPEATABLE_READ +*/ +extern uint binlog_unsafe_map[256]; +/* + Initializes the array with unsafe combinations and its respective + conditions. +*/ +void binlog_unsafe_map_init(); +#endif + +#ifdef MYSQL_SERVER +/* + The following hack is needed because yy_*.cc do not define + YYSTYPE before including this file +*/ +#ifdef MYSQL_YACC +#define LEX_YYSTYPE void * +#else +#include "lex_symbol.h" +#ifdef MYSQL_LEX +#include "item_func.h" /* Cast_target used in yy_mariadb.hh */ +#include "sql_get_diagnostics.h" /* Types used in yy_mariadb.hh */ +#include "sp_pcontext.h" +#include "yy_mariadb.hh" +#define LEX_YYSTYPE YYSTYPE * +#else +#define LEX_YYSTYPE void * +#endif +#endif +#endif + +// describe/explain types +#define DESCRIBE_NORMAL 1 +#define DESCRIBE_EXTENDED 2 +/* + This is not within #ifdef because we want "EXPLAIN PARTITIONS ..." to produce + additional "partitions" column even if partitioning is not compiled in. +*/ +#define DESCRIBE_PARTITIONS 4 +#define DESCRIBE_EXTENDED2 8 + +#ifdef MYSQL_SERVER + +extern const LEX_STRING empty_lex_str; +extern const LEX_CSTRING empty_clex_str; +extern const LEX_CSTRING star_clex_str; +extern const LEX_CSTRING param_clex_str; + +enum enum_sp_data_access +{ + SP_DEFAULT_ACCESS= 0, + SP_CONTAINS_SQL, + SP_NO_SQL, + SP_READS_SQL_DATA, + SP_MODIFIES_SQL_DATA +}; + +const LEX_CSTRING sp_data_access_name[]= +{ + { STRING_WITH_LEN("") }, + { STRING_WITH_LEN("CONTAINS SQL") }, + { STRING_WITH_LEN("NO SQL") }, + { STRING_WITH_LEN("READS SQL DATA") }, + { STRING_WITH_LEN("MODIFIES SQL DATA") } +}; + +#define DERIVED_SUBQUERY 1 +#define DERIVED_VIEW 2 +#define DERIVED_WITH 4 + +enum enum_view_create_mode +{ + VIEW_CREATE_NEW, // check that there are not such VIEW/table + VIEW_ALTER, // check that VIEW .frm with such name exists + VIEW_CREATE_OR_REPLACE // check only that there are not such table +}; + + +class Create_view_info: public Sql_alloc +{ +public: + LEX_CSTRING select; // The SELECT statement of CREATE VIEW + enum enum_view_create_mode mode; + uint16 algorithm; + uint8 check; + enum enum_view_suid suid; + Create_view_info(enum_view_create_mode mode_arg, + uint16 algorithm_arg, + enum_view_suid suid_arg) + :select(null_clex_str), + mode(mode_arg), + algorithm(algorithm_arg), + check(VIEW_CHECK_NONE), + suid(suid_arg) + { } +}; + + +enum enum_drop_mode +{ + DROP_DEFAULT, // mode is not specified + DROP_CASCADE, // CASCADE option + DROP_RESTRICT // RESTRICT option +}; + +/* Options to add_table_to_list() */ +#define TL_OPTION_UPDATING 1 +#define TL_OPTION_FORCE_INDEX 2 +#define TL_OPTION_IGNORE_LEAVES 4 +#define TL_OPTION_ALIAS 8 +#define TL_OPTION_SEQUENCE 16 +#define TL_OPTION_TABLE_FUNCTION 32 + +typedef List List_item; +typedef Mem_root_array Group_list_ptrs; + +/* SERVERS CACHE CHANGES */ +typedef struct st_lex_server_options +{ + long port; + LEX_CSTRING server_name, host, db, username, password, scheme, socket, owner; + void reset(LEX_CSTRING name) + { + server_name= name; + host= db= username= password= scheme= socket= owner= null_clex_str; + port= -1; + } +} LEX_SERVER_OPTIONS; + + +/** + Structure to hold parameters for CHANGE MASTER, START SLAVE, and STOP SLAVE. + + Remark: this should not be confused with Master_info (and perhaps + would better be renamed to st_lex_replication_info). Some fields, + e.g., delay, are saved in Relay_log_info, not in Master_info. +*/ +struct LEX_MASTER_INFO +{ + DYNAMIC_ARRAY repl_ignore_server_ids; + DYNAMIC_ARRAY repl_do_domain_ids; + DYNAMIC_ARRAY repl_ignore_domain_ids; + const char *host, *user, *password, *log_file_name; + const char *ssl_key, *ssl_cert, *ssl_ca, *ssl_capath, *ssl_cipher; + const char *ssl_crl, *ssl_crlpath; + const char *relay_log_name; + LEX_CSTRING connection_name; + /* Value in START SLAVE UNTIL master_gtid_pos=xxx */ + LEX_CSTRING gtid_pos_str; + ulonglong pos; + ulong relay_log_pos; + ulong server_id; + uint port, connect_retry; + float heartbeat_period; + int sql_delay; + bool is_demotion_opt; + /* + Enum is used for making it possible to detect if the user + changed variable or if it should be left at old value + */ + enum {LEX_MI_UNCHANGED= 0, LEX_MI_DISABLE, LEX_MI_ENABLE} + ssl, ssl_verify_server_cert, heartbeat_opt, repl_ignore_server_ids_opt, + repl_do_domain_ids_opt, repl_ignore_domain_ids_opt; + enum { + LEX_GTID_UNCHANGED, LEX_GTID_NO, LEX_GTID_CURRENT_POS, LEX_GTID_SLAVE_POS + } use_gtid_opt; + + void init() + { + bzero(this, sizeof(*this)); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &repl_ignore_server_ids, + sizeof(::server_id), 0, 16, MYF(0)); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &repl_do_domain_ids, + sizeof(ulong), 0, 16, MYF(0)); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &repl_ignore_domain_ids, + sizeof(ulong), 0, 16, MYF(0)); + sql_delay= -1; + } + void reset(bool is_change_master) + { + if (unlikely(is_change_master)) + { + delete_dynamic(&repl_ignore_server_ids); + /* Free all the array elements. */ + delete_dynamic(&repl_do_domain_ids); + delete_dynamic(&repl_ignore_domain_ids); + } + + host= user= password= log_file_name= ssl_key= ssl_cert= ssl_ca= + ssl_capath= ssl_cipher= ssl_crl= ssl_crlpath= relay_log_name= NULL; + pos= relay_log_pos= server_id= port= connect_retry= 0; + heartbeat_period= 0; + ssl= ssl_verify_server_cert= heartbeat_opt= + repl_ignore_server_ids_opt= repl_do_domain_ids_opt= + repl_ignore_domain_ids_opt= LEX_MI_UNCHANGED; + gtid_pos_str= null_clex_str; + use_gtid_opt= LEX_GTID_UNCHANGED; + sql_delay= -1; + is_demotion_opt= 0; + } +}; + +typedef struct st_lex_reset_slave +{ + bool all; +} LEX_RESET_SLAVE; + +enum olap_type +{ + UNSPECIFIED_OLAP_TYPE, CUBE_TYPE, ROLLUP_TYPE +}; + +/* + String names used to print a statement with index hints. + Keep in sync with index_hint_type. +*/ +extern const char * index_hint_type_name[]; +typedef uchar index_clause_map; + +/* + Bits in index_clause_map : one for each possible FOR clause in + USE/FORCE/IGNORE INDEX index hint specification +*/ +#define INDEX_HINT_MASK_JOIN (1) +#define INDEX_HINT_MASK_GROUP (1 << 1) +#define INDEX_HINT_MASK_ORDER (1 << 2) + +#define INDEX_HINT_MASK_ALL (INDEX_HINT_MASK_JOIN | INDEX_HINT_MASK_GROUP | \ + INDEX_HINT_MASK_ORDER) + +class select_result_sink; + +/* Single element of an USE/FORCE/IGNORE INDEX list specified as a SQL hint */ +class Index_hint : public Sql_alloc +{ +public: + /* The type of the hint : USE/FORCE/IGNORE */ + enum index_hint_type type; + /* Where the hit applies to. A bitmask of INDEX_HINT_MASK_ values */ + index_clause_map clause; + /* + The index name. Empty (str=NULL) name represents an empty list + USE INDEX () clause + */ + LEX_CSTRING key_name; + + Index_hint (enum index_hint_type type_arg, index_clause_map clause_arg, + const char *str, size_t length) : + type(type_arg), clause(clause_arg) + { + key_name.str= str; + key_name.length= length; + } + + void print(THD *thd, String *str); +}; + +/* + The state of the lex parsing for selects + + master and slaves are pointers to select_lex. + master is pointer to upper level node. + slave is pointer to lower level node + select_lex is a SELECT without union + unit is container of either + - One SELECT + - UNION of selects + select_lex and unit are both inherited form st_select_lex_node + neighbors are two select_lex or units on the same level + + All select describing structures linked with following pointers: + - list of neighbors (next/prev) (prev of first element point to slave + pointer of upper structure) + - For select this is a list of UNION's (or one element list) + - For units this is a list of sub queries for the upper level select + + - pointer to master (master), which is + If this is a unit + - pointer to outer select_lex + If this is a select_lex + - pointer to outer unit structure for select + + - pointer to slave (slave), which is either: + If this is a unit: + - first SELECT that belong to this unit + If this is a select_lex + - first unit that belong to this SELECT (subquries or derived tables) + + - list of all select_lex (link_next/link_prev) + This is to be used for things like derived tables creation, where we + go through this list and create the derived tables. + + If unit contain several selects (UNION now, INTERSECT etc later) + then it have special select_lex called fake_select_lex. It used for + storing global parameters (like ORDER BY, LIMIT) and executing union. + Subqueries used in global ORDER BY clause will be attached to this + fake_select_lex, which will allow them correctly resolve fields of + 'upper' UNION and outer selects. + + For example for following query: + + select * + from table1 + where table1.field IN (select * from table1_1_1 union + select * from table1_1_2) + union + select * + from table2 + where table2.field=(select (select f1 from table2_1_1_1_1 + where table2_1_1_1_1.f2=table2_1_1.f3) + from table2_1_1 + where table2_1_1.f1=table2.f2) + union + select * from table3; + + we will have following structure: + + select1: (select * from table1 ...) + select2: (select * from table2 ...) + select3: (select * from table3) + select1.1.1: (select * from table1_1_1) + ... + + main unit + fake0 + select1 select2 select3 + |^^ |^ + s||| ||master + l||| |+---------------------------------+ + a||| +---------------------------------+| + v|||master slave || + e||+-------------------------+ || + V| neighbor | V| + unit1.1<+==================>unit1.2 unit2.1 + fake1.1 + select1.1.1 select 1.1.2 select1.2.1 select2.1.1 + |^ + || + V| + unit2.1.1.1 + select2.1.1.1.1 + + + relation in main unit will be following: + (bigger picture for: + main unit + fake0 + select1 select2 select3 + in the above picture) + + main unit + |^^^^|fake_select_lex + |||||+--------------------------------------------+ + ||||+--------------------------------------------+| + |||+------------------------------+ || + ||+--------------+ | || + slave||master | | || + V| neighbor | neighbor | master|V + select1<========>select2<========>select3 fake0 + + list of all select_lex will be following (as it will be constructed by + parser): + + select1->select2->select3->select2.1.1->select 2.1.2->select2.1.1.1.1-+ + | + +---------------------------------------------------------------------+ + | + +->select1.1.1->select1.1.2 + +*/ + +/* + Base class for st_select_lex (SELECT_LEX) & + st_select_lex_unit (SELECT_LEX_UNIT) +*/ +struct LEX; +class st_select_lex; +class st_select_lex_unit; + + +class st_select_lex_node { +protected: + st_select_lex_node *next, **prev, /* neighbor list */ + *master, *slave, /* vertical links */ + *link_next, **link_prev; /* list of whole SELECT_LEX */ + enum sub_select_type linkage; + + void init_query_common(); + +public: + ulonglong options; + uint8 uncacheable; + bool distinct:1; + bool no_table_names_allowed:1; /* used for global order by */ + /* + result of this query can't be cached, bit field, can be : + UNCACHEABLE_DEPENDENT_GENERATED + UNCACHEABLE_DEPENDENT_INJECTED + UNCACHEABLE_RAND + UNCACHEABLE_SIDEEFFECT + UNCACHEABLE_EXPLAIN + UNCACHEABLE_PREPARE + */ + + bool is_linkage_set() const + { + return linkage == UNION_TYPE || linkage == INTERSECT_TYPE || linkage == EXCEPT_TYPE; + } + enum sub_select_type get_linkage() { return linkage; } + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () + { return (void*) alloc_root(mem_root, (uint) size); } + static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); } + static void operator delete(void *ptr, MEM_ROOT *mem_root) {} + + // Ensures that at least all members used during cleanup() are initialized. + st_select_lex_node() + : next(NULL), prev(NULL), + master(NULL), slave(NULL), + link_next(NULL), link_prev(NULL), + linkage(UNSPECIFIED_TYPE) + { + } + + inline st_select_lex_node* get_master() { return master; } + void include_down(st_select_lex_node *upper); + void attach_single(st_select_lex_node *slave_arg); + void include_neighbour(st_select_lex_node *before); + void link_chain_down(st_select_lex_node *first); + void link_neighbour(st_select_lex_node *neighbour) + { + DBUG_ASSERT(next == NULL); + DBUG_ASSERT(neighbour != NULL); + next= neighbour; + neighbour->prev= &next; + } + void cut_next() { next= NULL; } + void include_standalone(st_select_lex_node *sel, st_select_lex_node **ref); + void include_global(st_select_lex_node **plink); + void exclude(); + void exclude_from_tree(); + void exclude_from_global() + { + if (!link_prev) + return; + if (((*link_prev)= link_next)) + link_next->link_prev= link_prev; + link_next= NULL; + link_prev= NULL; + } + void substitute_in_tree(st_select_lex_node *subst); + + void set_slave(st_select_lex_node *slave_arg) { slave= slave_arg; } + void move_node(st_select_lex_node *where_to_move) + { + if (where_to_move == this) + return; + if (next) + next->prev= prev; + *prev= next; + *where_to_move->prev= this; + next= where_to_move; + } + st_select_lex_node *insert_chain_before(st_select_lex_node **ptr_pos_to_insert, + st_select_lex_node *end_chain_node); + void move_as_slave(st_select_lex_node *new_master); + void set_linkage(enum sub_select_type l) + { + DBUG_ENTER("st_select_lex_node::set_linkage"); + DBUG_PRINT("info", ("node: %p linkage: %d->%d", this, linkage, l)); + linkage= l; + DBUG_VOID_RETURN; + } + /* + This method created for reiniting LEX in mysql_admin_table() and can be + used only if you are going remove all SELECT_LEX & units except belonger + to LEX (LEX::unit & LEX::select, for other purposes there are + SELECT_LEX_UNIT::exclude_level & SELECT_LEX_UNIT::exclude_tree. + + It is also used in parsing to detach builtin select. + */ + void cut_subtree() { slave= 0; } + friend class st_select_lex_unit; + friend bool mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *sel); + friend bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table, + bool open_view_no_parse); + friend class st_select_lex; +private: + void fast_exclude(); +}; +typedef class st_select_lex_node SELECT_LEX_NODE; + +/* + SELECT_LEX_UNIT - unit of selects (UNION, INTERSECT, ...) group + SELECT_LEXs +*/ +class THD; +class select_result; +class JOIN; +class select_unit; +class Procedure; +class Explain_query; + +void delete_explain_query(LEX *lex); +void create_explain_query(LEX *lex, MEM_ROOT *mem_root); +void create_explain_query_if_not_exists(LEX *lex, MEM_ROOT *mem_root); +bool print_explain_for_slow_log(LEX *lex, THD *thd, String *str); + + +class st_select_lex_unit: public st_select_lex_node { +protected: + TABLE_LIST result_table_list; + select_unit *union_result; + ulonglong found_rows_for_union; + + bool prepare_join(THD *thd, SELECT_LEX *sl, select_result *result, + ulonglong additional_options, + bool is_union_select); + bool join_union_type_handlers(THD *thd, + class Type_holder *holders, uint count); + bool join_union_type_attributes(THD *thd, + class Type_holder *holders, uint count); +public: + bool join_union_item_types(THD *thd, List &types, uint count); + // Ensures that at least all members used during cleanup() are initialized. + st_select_lex_unit() + : union_result(NULL), table(NULL), result(NULL), fake_select_lex(NULL), + last_procedure(NULL),cleaned(false), bag_set_op_optimized(false), + have_except_all_or_intersect_all(false) + { + } + + TABLE *table; /* temporary table using for appending UNION results */ + select_result *result; + st_select_lex *pre_last_parse; + /* + Node on which we should return current_select pointer after parsing + subquery + */ + st_select_lex *return_to; + /* LIMIT clause runtime counters */ + Select_limit_counters lim; + /* not NULL if unit used in subselect, point to subselect item */ + Item_subselect *item; + /* + TABLE_LIST representing this union in the embedding select. Used for + derived tables/views handling. + */ + TABLE_LIST *derived; + /* With clause attached to this unit (if any) */ + With_clause *with_clause; + /* With element where this unit is used as the specification (if any) */ + With_element *with_element; + /* The unit used as a CTE specification from which this unit is cloned */ + st_select_lex_unit *cloned_from; + /* thread handler */ + THD *thd; + /* + SELECT_LEX for hidden SELECT in union which process global + ORDER BY and LIMIT + */ + st_select_lex *fake_select_lex; + /** + SELECT_LEX that stores LIMIT and OFFSET for UNION ALL when noq + fake_select_lex is used. + */ + st_select_lex *saved_fake_select_lex; + + /* pointer to the last node before last subsequence of UNION ALL */ + st_select_lex *union_distinct; + Procedure *last_procedure; /* Pointer to procedure, if such exists */ + + // list of fields which points to temporary table for union + List item_list; + /* + list of types of items inside union (used for union & derived tables) + + Item_type_holders from which this list consist may have pointers to Field, + pointers is valid only after preparing SELECTS of this unit and before + any SELECT of this unit execution + */ + List types; + + bool prepared:1; // prepare phase already performed for UNION (unit) + bool optimized:1; // optimize phase already performed for UNION (unit) + bool optimized_2:1; + bool executed:1; // already executed + bool cleaned:1; + bool bag_set_op_optimized:1; + bool optimize_started:1; + bool have_except_all_or_intersect_all:1; + /** + TRUE if the unit contained TVC at the top level that has been wrapped + into SELECT: + VALUES (v1) ... (vn) => SELECT * FROM (VALUES (v1) ... (vn)) as tvc + */ + bool with_wrapped_tvc:1; + bool is_view:1; + bool describe:1; /* union exec() called for EXPLAIN */ + bool columns_are_renamed:1; + +protected: + /* This is bool, not bit, as it's used and set in many places */ + bool saved_error; +public: + + /** + Pointer to 'last' select, or pointer to select where we stored + global parameters for union. + + If this is a union of multiple selects, the parser puts the global + parameters in fake_select_lex. If the union doesn't use a + temporary table, st_select_lex_unit::prepare() nulls out + fake_select_lex, but saves a copy in saved_fake_select_lex in + order to preserve the global parameters. + + If it is not a union, first_select() is the last select. + + @return select containing the global parameters + */ + inline st_select_lex *global_parameters() + { + if (fake_select_lex != NULL) + return fake_select_lex; + else if (saved_fake_select_lex != NULL) + return saved_fake_select_lex; + return first_select(); + }; + + void init_query(); + st_select_lex* outer_select() const; + const st_select_lex* first_select() const + { + return reinterpret_cast(slave); + } + st_select_lex* first_select() + { + return reinterpret_cast(slave); + } + void set_with_clause(With_clause *with_cl); + st_select_lex_unit* next_unit() + { + return reinterpret_cast(next); + } + st_select_lex* return_after_parsing() { return return_to; } + void exclude_level(); + // void exclude_tree(); // it is not used for long time + bool is_excluded() { return prev == NULL; } + + /* UNION methods */ + bool prepare(TABLE_LIST *derived_arg, select_result *sel_result, + ulonglong additional_options); + bool optimize(); + void optimize_bag_operation(bool is_outer_distinct); + bool exec(); + bool exec_recursive(); + bool cleanup(); + inline void unclean() { cleaned= 0; } + void reinit_exec_mechanism(); + + void print(String *str, enum_query_type query_type); + + bool add_fake_select_lex(THD *thd); + void init_prepare_fake_select_lex(THD *thd, bool first_execution); + inline bool is_prepared() { return prepared; } + bool change_result(select_result_interceptor *result, + select_result_interceptor *old_result); + void set_limit(st_select_lex *values); + void set_thd(THD *thd_arg) { thd= thd_arg; } + inline bool is_unit_op (); + bool union_needs_tmp_table(); + + void set_unique_exclude(); + bool check_distinct_in_union(); + + friend struct LEX; + friend int subselect_union_engine::exec(); + + List *get_column_types(bool for_cursor); + + select_unit *get_union_result() { return union_result; } + int save_union_explain(Explain_query *output); + int save_union_explain_part2(Explain_query *output); + unit_common_op common_op(); + + bool explainable() const; + + void reset_distinct(); + void fix_distinct(); + + void register_select_chain(SELECT_LEX *first_sel); + + bool set_nest_level(int new_nest_level); + bool check_parameters(SELECT_LEX *main_select); + + bool set_lock_to_the_last_select(Lex_select_lock l); + + bool can_be_merged(); + + friend class st_select_lex; + +private: + bool is_derived_eliminated() const; +}; + +typedef class st_select_lex_unit SELECT_LEX_UNIT; +typedef Bounds_checked_array Ref_ptr_array; + + +/** + Structure which consists of the field and the item that + corresponds to this field. +*/ + +class Field_pair :public Sql_alloc +{ +public: + Field *field; + Item *corresponding_item; + Field_pair(Field *fld, Item *item) + :field(fld), corresponding_item(item) {} +}; + +Field_pair *get_corresponding_field_pair(Item *item, + List pair_list); +Field_pair *find_matching_field_pair(Item *item, List pair_list); + + +#define TOUCHED_SEL_COND 1/* WHERE/HAVING/ON should be reinited before use */ +#define TOUCHED_SEL_DERIVED (1<<1)/* derived should be reinited before use */ + +#define UNIT_NEST_FL 1 +/* + SELECT_LEX - store information of parsed SELECT statment +*/ +class st_select_lex: public st_select_lex_node +{ +public: + /* + Currently the field first_nested is used only by parser. + It containa either a reference to the first select + of the nest of selects to which 'this' belongs to, or + in the case of priority jump it contains a reference to + the select to which the priority nest has to be attached to. + If there is no priority jump then the first select of the + nest contains the reference to itself in first_nested. + Example: + select1 union select2 intersect select + Here we have a priority jump at select2. + So select2->first_nested points to select1, + while select3->first_nested points to select2 and + select1->first_nested points to select1. + */ + + Name_resolution_context context; + LEX_CSTRING db; + + /* + Point to the LEX in which it was created, used in view subquery detection. + + TODO: make also st_select_lex::parent_stmt_lex (see LEX::stmt_lex) + and use st_select_lex::parent_lex & st_select_lex::parent_stmt_lex + instead of global (from THD) references where it is possible. + */ + LEX *parent_lex; + st_select_lex *first_nested; + Item *where, *having; /* WHERE & HAVING clauses */ + Item *prep_where; /* saved WHERE clause for prepared statement processing */ + Item *prep_having;/* saved HAVING clause for prepared statement processing */ + Item *cond_pushed_into_where; /* condition pushed into WHERE */ + Item *cond_pushed_into_having; /* condition pushed into HAVING */ + + /* + nest_levels are local to the query or VIEW, + and that view merge procedure does not re-calculate them. + So we also have to remember unit against which we count levels. + */ + SELECT_LEX_UNIT *nest_level_base; + Item_sum *inner_sum_func_list; /* list of sum func in nested selects */ + /* + This is a copy of the original JOIN USING list that comes from + the parser. The parser : + 1. Sets the natural_join of the second TABLE_LIST in the join + and the st_select_lex::prev_join_using. + 2. Makes a parent TABLE_LIST and sets its is_natural_join/ + join_using_fields members. + 3. Uses the wrapper TABLE_LIST as a table in the upper level. + We cannot assign directly to join_using_fields in the parser because + at stage (1.) the parent TABLE_LIST is not constructed yet and + the assignment will override the JOIN USING fields of the lower level + joins on the right. + */ + List *prev_join_using; + JOIN *join; /* after JOIN::prepare it is pointer to corresponding JOIN */ + TABLE_LIST *embedding; /* table embedding to the above list */ + table_value_constr *tvc; + + /* The interface employed to execute the select query by a foreign engine */ + select_handler *select_h; + /* The object used to organize execution of the query by a foreign engine */ + select_handler *pushdown_select; + List *join_list; /* list for the currently parsed join */ + st_select_lex *merged_into; /* select which this select is merged into */ + /* (not 0 only for views/derived tables) */ + const char *type; /* type of select for EXPLAIN */ + + + /* List of references to fields referenced from inner selects */ + List inner_refs_list; + List attach_to_conds; + /* Saved values of the WHERE and HAVING clauses*/ + Item::cond_result cond_value, having_value; + /* + Usually it is pointer to ftfunc_list_alloc, but in union used to create + fake select_lex for calling mysql_select under results of union + */ + List *ftfunc_list; + List ftfunc_list_alloc; + /* + The list of items to which MIN/MAX optimizations of opt_sum_query() + have been applied. Used to rollback those optimizations if it's needed. + */ + List min_max_opt_list; + List top_join_list; /* join list of the top level */ + List sj_nests; /* Semi-join nests within this join */ + /* + Beginning of the list of leaves in a FROM clause, where the leaves + inlcude all base tables including view tables. The tables are connected + by TABLE_LIST::next_leaf, so leaf_tables points to the left-most leaf. + + List of all base tables local to a subquery including all view + tables. Unlike 'next_local', this in this list views are *not* + leaves. Created in setup_tables() -> make_leaves_list(). + */ + /* + Subqueries that will need to be converted to semi-join nests, including + those converted to jtbm nests. The list is emptied when conversion is done. + */ + List sj_subselects; + /* + List of IN-predicates in this st_select_lex that + can be transformed into IN-subselect defined with TVC. + */ + List in_funcs; + List leaf_tables; + List leaf_tables_exec; + List leaf_tables_prep; + + /* current index hint kind. used in filling up index_hints */ + enum index_hint_type current_index_hint_type; + + /* + FROM clause - points to the beginning of the TABLE_LIST::next_local list. + */ + SQL_I_List table_list; + + /* + GROUP BY clause. + This list may be mutated during optimization (by remove_const()), + so for prepared statements, we keep a copy of the ORDER.next pointers in + group_list_ptrs, and re-establish the original list before each execution. + */ + SQL_I_List group_list; + SQL_I_List save_group_list; + Group_list_ptrs *group_list_ptrs; + + List item_list; /* list of fields & expressions */ + List pre_fix; /* above list before fix_fields */ + List fix_after_optimize; + SQL_I_List order_list; /* ORDER clause */ + SQL_I_List save_order_list; + SQL_I_List gorder_list; + Lex_select_limit limit_params; /* LIMIT clause parameters */ + + /* Structure to store fields that are used in the GROUP BY of this select */ + List grouping_tmp_fields; + List udf_list; /* udf function calls stack */ + List *index_hints; /* list of USE/FORCE/IGNORE INDEX */ + List save_many_values; + List *save_insert_list; + + bool is_item_list_lookup:1; + /* + Needed to correctly generate 'PRIMARY' or 'SIMPLE' for select_type column + of EXPLAIN + */ + bool have_merged_subqueries:1; + bool is_set_query_expr_tail:1; + bool with_sum_func:1; /* sum function indicator */ + bool with_rownum:1; /* rownum() function indicator */ + bool braces:1; /* SELECT ... UNION (SELECT ... ) <- this braces */ + bool automatic_brackets:1; /* dummy select for INTERSECT precedence */ + /* TRUE when having fix field called in processing of this SELECT */ + bool having_fix_field:1; + /* + TRUE when fix field is called for a new condition pushed into the + HAVING clause of this SELECT + */ + bool having_fix_field_for_pushed_cond:1; + /* + there are subquery in HAVING clause => we can't close tables before + query processing end even if we use temporary table + */ + bool subquery_in_having:1; + /* TRUE <=> this SELECT is correlated w.r.t. some ancestor select */ + bool with_all_modifier:1; /* used for selects in union */ + bool is_correlated:1; + bool first_natural_join_processing:1; + bool first_cond_optimization:1; + /* do not wrap view fields with Item_ref */ + bool no_wrap_view_item:1; + /* exclude this select from check of unique_table() */ + bool exclude_from_table_unique_test:1; + bool in_tvc:1; + bool skip_locked:1; + bool m_non_agg_field_used:1; + bool m_agg_func_used:1; + bool m_custom_agg_func_used:1; + /* the select is "service-select" and can not have tables */ + bool is_service_select:1; + + /// Array of pointers to top elements of all_fields list + Ref_ptr_array ref_pointer_array; + ulong table_join_options; + + /* + number of items in select_list and HAVING clause used to get number + bigger then can be number of entries that will be added to all item + list during split_sum_func + */ + uint select_n_having_items; + uint cond_count; /* number of sargable Items in where/having/on */ + uint between_count; /* number of between predicates in where/having/on */ + uint max_equal_elems; /* max number of elements in multiple equalities */ + /* + Number of fields used in select list or where clause of current select + and all inner subselects. + */ + uint select_n_where_fields; + /* Total number of elements in group by and order by lists */ + uint order_group_num; + /* reserved for exists 2 in */ + uint select_n_reserved; + /* + it counts the number of bit fields in the SELECT list. These are used when + DISTINCT is converted to a GROUP BY involving BIT fields. + */ + uint hidden_bit_fields; + /* + Number of fields used in the definition of all the windows functions. + This includes: + 1) Fields in the arguments + 2) Fields in the PARTITION BY clause + 3) Fields in the ORDER BY clause + */ + /* + Number of current derived table made with TVC during the + transformation of IN-predicate into IN-subquery for this + st_select_lex. + */ + uint curr_tvc_name; + /* true <=> select has been created a TVC wrapper */ + bool is_tvc_wrapper; + uint fields_in_window_functions; + uint insert_tables; + enum_parsing_place parsing_place; /* where we are parsing expression */ + enum_parsing_place save_parsing_place; + enum_parsing_place context_analysis_place; /* where we are in prepare */ + enum leaf_list_state {UNINIT, READY, SAVED}; + enum leaf_list_state prep_leaf_list_state; + enum olap_type olap; + /* SELECT [FOR UPDATE/LOCK IN SHARE MODE] [SKIP LOCKED] */ + enum select_lock_type {NONE, IN_SHARE_MODE, FOR_UPDATE}; + enum select_lock_type select_lock; + + uint in_sum_expr; + uint select_number; /* number of select (used for EXPLAIN) */ + uint with_wild; /* item list contain '*' ; Counter */ + /* Number of Item_sum-derived objects in this SELECT */ + uint n_sum_items; + /* Number of Item_sum-derived objects in children and descendant SELECTs */ + uint n_child_sum_items; + uint versioned_tables; /* For versioning */ + int nest_level; /* nesting level of select */ + /* index in the select list of the expression currently being fixed */ + int cur_pos_in_select_list; + + /* + This array is used to note whether we have any candidates for + expression caching in the corresponding clauses + */ + bool expr_cache_may_be_used[PARSING_PLACE_SIZE]; + uint8 nest_flags; + /* + This variable is required to ensure proper work of subqueries and + stored procedures. Generally, one should use the states of + Query_arena to determine if it's a statement prepare or first + execution of a stored procedure. However, in case when there was an + error during the first execution of a stored procedure, the SP body + is not expelled from the SP cache. Therefore, a deeply nested + subquery might be left unoptimized. So we need this per-subquery + variable to inidicate the optimization/execution state of every + subquery. Prepared statements work OK in that regard, as in + case of an error during prepare the PS is not created. + */ + uint8 changed_elements; // see TOUCHED_SEL_* + + /** + The set of those tables whose fields are referenced in the select list of + this select level. + */ + table_map select_list_tables; + + /* Set to 1 if any field in field list has ROWNUM() */ + bool rownum_in_field_list; + + /* namp of nesting SELECT visibility (for aggregate functions check) */ + nesting_map name_visibility_map; + table_map with_dep; + index_clause_map current_index_hint_clause; + + /* it is for correct printing SELECT options */ + thr_lock_type lock_type; + + /** System Versioning */ + int vers_setup_conds(THD *thd, TABLE_LIST *tables); + /* push new Item_field into item_list */ + bool vers_push_field(THD *thd, TABLE_LIST *table, + const LEX_CSTRING field_name); + + int period_setup_conds(THD *thd, TABLE_LIST *table); + void init_query(); + void init_select(); + st_select_lex_unit* master_unit() { return (st_select_lex_unit*) master; } + inline void set_master_unit(st_select_lex_unit *master_unit) + { + master= (st_select_lex_node *)master_unit; + } + void set_master(st_select_lex *master_arg) + { + master= master_arg; + } + st_select_lex_unit* first_inner_unit() + { + return (st_select_lex_unit*) slave; + } + st_select_lex* outer_select(); + bool is_query_topmost(THD *thd); + st_select_lex* next_select() { return (st_select_lex*) next; } + st_select_lex* next_select_in_list() + { + return (st_select_lex*) link_next; + } + st_select_lex_node** next_select_in_list_addr() + { + return &link_next; + } + st_select_lex* return_after_parsing() + { + return master_unit()->return_after_parsing(); + } + inline bool is_subquery_function() { return master_unit()->item != 0; } + + bool mark_as_dependent(THD *thd, st_select_lex *last, + Item_ident *dependency); + + void set_braces(bool value) + { + braces= value; + } + bool inc_in_sum_expr(); + uint get_in_sum_expr(); + + bool add_item_to_list(THD *thd, Item *item); + bool add_group_to_list(THD *thd, Item *item, bool asc); + bool add_ftfunc_to_list(THD *thd, Item_func_match *func); + bool add_order_to_list(THD *thd, Item *item, bool asc); + bool add_gorder_to_list(THD *thd, Item *item, bool asc); + TABLE_LIST* add_table_to_list(THD *thd, Table_ident *table, + LEX_CSTRING *alias, + ulong table_options, + thr_lock_type flags= TL_UNLOCK, + enum_mdl_type mdl_type= MDL_SHARED_READ, + List *hints= 0, + List *partition_names= 0, + LEX_STRING *option= 0); + TABLE_LIST* get_table_list(); + bool init_nested_join(THD *thd); + TABLE_LIST *end_nested_join(THD *thd); + TABLE_LIST *nest_last_join(THD *thd); + void add_joined_table(TABLE_LIST *table); + bool add_cross_joined_table(TABLE_LIST *left_op, TABLE_LIST *right_op, + bool straight_fl); + TABLE_LIST *convert_right_join(); + List* get_item_list(); + ulong get_table_join_options(); + void set_lock_for_tables(thr_lock_type lock_type, bool for_update, + bool skip_locks); + /* + This method created for reiniting LEX in mysql_admin_table() and can be + used only if you are going remove all SELECT_LEX & units except belonger + to LEX (LEX::unit & LEX::select, for other purposes there are + SELECT_LEX_UNIT::exclude_level & SELECT_LEX_UNIT::exclude_tree + */ + void cut_subtree() { slave= 0; } + bool test_limit(); + /** + Get offset for LIMIT. + + Evaluate offset item if necessary. + + @return Number of rows to skip. + */ + ha_rows get_offset(); + /** + Get limit. + + Evaluate limit item if necessary. + + @return Limit of rows in result. + */ + ha_rows get_limit(); + + friend struct LEX; + st_select_lex() : group_list_ptrs(NULL), braces(0), + automatic_brackets(0), n_sum_items(0), n_child_sum_items(0) + {} + void make_empty_select() + { + init_query(); + init_select(); + } + bool setup_ref_array(THD *thd, uint order_group_num); + uint get_cardinality_of_ref_ptrs_slice(uint order_group_num_arg); + void print(THD *thd, String *str, enum_query_type query_type); + void print_item_list(THD *thd, String *str, enum_query_type query_type); + void print_set_clause(THD *thd, String *str, enum_query_type query_type); + void print_on_duplicate_key_clause(THD *thd, String *str, + enum_query_type query_type); + static void print_order(String *str, + ORDER *order, + enum_query_type query_type); + void print_limit(THD *thd, String *str, enum_query_type query_type); + void fix_prepare_information(THD *thd, Item **conds, Item **having_conds); + /* + Destroy the used execution plan (JOIN) of this subtree (this + SELECT_LEX and all nested SELECT_LEXes and SELECT_LEX_UNITs). + */ + bool cleanup(); + /* + Recursively cleanup the join of this select lex and of all nested + select lexes. + */ + void cleanup_all_joins(bool full); + + void set_index_hint_type(enum index_hint_type type, index_clause_map clause); + + /* + Add a index hint to the tagged list of hints. The type and clause of the + hint will be the current ones (set by set_index_hint()) + */ + bool add_index_hint (THD *thd, const char *str, size_t length); + + /* make a list to hold index hints */ + void alloc_index_hints (THD *thd); + /* read and clear the index hints */ + List* pop_index_hints(void) + { + List *hints= index_hints; + index_hints= NULL; + return hints; + } + + inline void clear_index_hints(void) { index_hints= NULL; } + bool is_part_of_union() { return master_unit()->is_unit_op(); } + bool is_top_level_node() + { + return (select_number == 1) && !is_part_of_union(); + } + bool optimize_unflattened_subqueries(bool const_only); + /* Set the EXPLAIN type for this subquery. */ + void set_explain_type(bool on_the_fly); + bool handle_derived(LEX *lex, uint phases); + void append_table_to_list(TABLE_LIST *TABLE_LIST::*link, TABLE_LIST *table); + bool get_free_table_map(table_map *map, uint *tablenr); + void replace_leaf_table(TABLE_LIST *table, List &tbl_list); + void remap_tables(TABLE_LIST *derived, table_map map, + uint tablenr, st_select_lex *parent_lex); + bool merge_subquery(THD *thd, TABLE_LIST *derived, st_select_lex *subq_lex, + uint tablenr, table_map map); + inline bool is_mergeable() + { + return (next_select() == 0 && group_list.elements == 0 && + having == 0 && with_sum_func == 0 && with_rownum == 0 && + table_list.elements >= 1 && !(options & SELECT_DISTINCT) && + limit_params.select_limit == 0); + } + void mark_as_belong_to_derived(TABLE_LIST *derived); + void increase_derived_records(ha_rows records); + void update_used_tables(); + void update_correlated_cache(); + void mark_const_derived(bool empty); + + bool save_leaf_tables(THD *thd); + bool save_prep_leaf_tables(THD *thd); + + void set_unique_exclude(); + + bool is_merged_child_of(st_select_lex *ancestor); + + /* + For MODE_ONLY_FULL_GROUP_BY we need to maintain two flags: + - Non-aggregated fields are used in this select. + - Aggregate functions are used in this select. + In MODE_ONLY_FULL_GROUP_BY only one of these may be true. + */ + bool non_agg_field_used() const { return m_non_agg_field_used; } + bool agg_func_used() const { return m_agg_func_used; } + bool custom_agg_func_used() const { return m_custom_agg_func_used; } + + void set_non_agg_field_used(bool val) { m_non_agg_field_used= val; } + void set_agg_func_used(bool val) { m_agg_func_used= val; } + void set_custom_agg_func_used(bool val) { m_custom_agg_func_used= val; } + inline void set_with_clause(With_clause *with_clause); + With_clause *get_with_clause() + { + return master_unit()->with_clause; + } + With_element *get_with_element() + { + return master_unit()->cloned_from ? + master_unit()->cloned_from->with_element : + master_unit()->with_element; + } + With_element *find_table_def_in_with_clauses(TABLE_LIST *table); + bool check_unrestricted_recursive(bool only_standard_compliant); + bool check_subqueries_with_recursive_references(); + void collect_grouping_fields_for_derived(THD *thd, ORDER *grouping_list); + bool collect_grouping_fields(THD *thd); + bool collect_fields_equal_to_grouping(THD *thd); + void check_cond_extraction_for_grouping_fields(THD *thd, Item *cond); + Item *build_cond_for_grouping_fields(THD *thd, Item *cond, + bool no_to_clones); + + List window_specs; + bool is_win_spec_list_built; + void prepare_add_window_spec(THD *thd); + bool add_window_def(THD *thd, LEX_CSTRING *win_name, LEX_CSTRING *win_ref, + SQL_I_List win_partition_list, + SQL_I_List win_order_list, + Window_frame *win_frame); + bool add_window_spec(THD *thd, LEX_CSTRING *win_ref, + SQL_I_List win_partition_list, + SQL_I_List win_order_list, + Window_frame *win_frame); + List window_funcs; + bool add_window_func(Item_window_func *win_func); + + bool have_window_funcs() const { return (window_funcs.elements !=0); } + ORDER *find_common_window_func_partition_fields(THD *thd); + + bool cond_pushdown_is_allowed() const + { return !olap && !limit_params.explicit_limit && !tvc && !with_rownum; } + + bool build_pushable_cond_for_having_pushdown(THD *thd, Item *cond); + void pushdown_cond_into_where_clause(THD *thd, Item *extracted_cond, + Item **remaining_cond, + Item_transformer transformer, + uchar *arg); + Item *pushdown_from_having_into_where(THD *thd, Item *having); + + select_handler *find_select_handler(THD *thd); + + bool is_set_op() + { + return linkage == UNION_TYPE || + linkage == EXCEPT_TYPE || + linkage == INTERSECT_TYPE; + } + + inline void add_where_field(st_select_lex *sel) + { + DBUG_ASSERT(this != sel); + select_n_where_fields+= sel->select_n_where_fields; + } + inline void set_linkage_and_distinct(enum sub_select_type l, bool d) + { + DBUG_ENTER("SELECT_LEX::set_linkage_and_distinct"); + DBUG_PRINT("info", ("select: %p distinct %d", this, d)); + set_linkage(l); + DBUG_ASSERT(l == UNION_TYPE || + l == INTERSECT_TYPE || + l == EXCEPT_TYPE); + if (d && master_unit() && master_unit()->union_distinct != this) + master_unit()->union_distinct= this; + distinct= d; + with_all_modifier= !distinct; + DBUG_VOID_RETURN; + } + bool set_nest_level(int new_nest_level); + bool check_parameters(SELECT_LEX *main_select); + void mark_select() + { + DBUG_ENTER("st_select_lex::mark_select()"); + DBUG_PRINT("info", ("Select #%d", select_number)); + DBUG_VOID_RETURN; + } + void register_unit(SELECT_LEX_UNIT *unit, + Name_resolution_context *outer_context); + SELECT_LEX_UNIT *attach_selects_chain(SELECT_LEX *sel, + Name_resolution_context *context); + void add_statistics(SELECT_LEX_UNIT *unit); + bool make_unique_derived_name(THD *thd, LEX_CSTRING *alias); + void lex_start(LEX *plex); + bool is_unit_nest() { return (nest_flags & UNIT_NEST_FL); } + void mark_as_unit_nest() { nest_flags= UNIT_NEST_FL; } +}; +typedef class st_select_lex SELECT_LEX; + +inline bool st_select_lex_unit::is_unit_op () +{ + if (!first_select()->next_select()) + { + if (first_select()->tvc) + return 1; + else + return 0; + } + + enum sub_select_type linkage= first_select()->next_select()->linkage; + return linkage == UNION_TYPE || linkage == INTERSECT_TYPE || + linkage == EXCEPT_TYPE; +} + + +struct st_sp_chistics +{ + LEX_CSTRING comment; + enum enum_sp_suid_behaviour suid; + bool detistic; + enum enum_sp_data_access daccess; + enum enum_sp_aggregate_type agg_type; + void init() { bzero(this, sizeof(*this)); } + void set(const st_sp_chistics &other) { *this= other; } + bool read_from_mysql_proc_row(THD *thd, TABLE *table); +}; + + +class Sp_chistics: public st_sp_chistics +{ +public: + Sp_chistics() { init(); } +}; + + +struct st_trg_chistics: public st_trg_execution_order +{ + enum trg_action_time_type action_time; + enum trg_event_type event; + + const char *ordering_clause_begin; + const char *ordering_clause_end; + +}; + +enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE, + XA_SUSPEND, XA_FOR_MIGRATE}; + +class Sroutine_hash_entry; + +/* + Class representing list of all tables used by statement and other + information which is necessary for opening and locking its tables, + like SQL command for this statement. + + Also contains information about stored functions used by statement + since during its execution we may have to add all tables used by its + stored functions/triggers to this list in order to pre-open and lock + them. + + Also used by LEX::reset_n_backup/restore_backup_query_tables_list() + methods to save and restore this information. +*/ + +class Query_tables_list +{ +public: + /** + SQL command for this statement. Part of this class since the + process of opening and locking tables for the statement needs + this information to determine correct type of lock for some of + the tables. + */ + enum_sql_command sql_command; + /* Global list of all tables used by this statement */ + TABLE_LIST *query_tables; + /* Pointer to next_global member of last element in the previous list. */ + TABLE_LIST **query_tables_last; + /* + If non-0 then indicates that query requires prelocking and points to + next_global member of last own element in query table list (i.e. last + table which was not added to it as part of preparation to prelocking). + 0 - indicates that this query does not need prelocking. + */ + TABLE_LIST **query_tables_own_last; + /* + Set of stored routines called by statement. + (Note that we use lazy-initialization for this hash). + */ + enum { START_SROUTINES_HASH_SIZE= 16 }; + HASH sroutines; + /* + List linking elements of 'sroutines' set. Allows you to add new elements + to this set as you iterate through the list of existing elements. + 'sroutines_list_own_last' is pointer to ::next member of last element of + this list which represents routine which is explicitly used by query. + 'sroutines_list_own_elements' number of explicitly used routines. + We use these two members for restoring of 'sroutines_list' to the state + in which it was right after query parsing. + */ + SQL_I_List sroutines_list; + Sroutine_hash_entry **sroutines_list_own_last; + uint sroutines_list_own_elements; + + /* + These constructor and destructor serve for creation/destruction + of Query_tables_list instances which are used as backup storage. + */ + Query_tables_list() = default; + ~Query_tables_list() = default; + + /* Initializes (or resets) Query_tables_list object for "real" use. */ + void reset_query_tables_list(bool init); + void destroy_query_tables_list(); + void set_query_tables_list(Query_tables_list *state) + { + *this= *state; + } + + /* + Direct addition to the list of query tables. + If you are using this function, you must ensure that the table + object, in particular table->db member, is initialized. + */ + void add_to_query_tables(TABLE_LIST *table) + { + *(table->prev_global= query_tables_last)= table; + query_tables_last= &table->next_global; + } + bool requires_prelocking() + { + return MY_TEST(query_tables_own_last); + } + void mark_as_requiring_prelocking(TABLE_LIST **tables_own_last) + { + query_tables_own_last= tables_own_last; + } + /* Return pointer to first not-own table in query-tables or 0 */ + TABLE_LIST* first_not_own_table() + { + return ( query_tables_own_last ? *query_tables_own_last : 0); + } + void chop_off_not_own_tables() + { + if (query_tables_own_last) + { + *query_tables_own_last= 0; + query_tables_last= query_tables_own_last; + query_tables_own_last= 0; + } + } + + /** Return a pointer to the last element in query table list. */ + TABLE_LIST *last_table() + { + /* Don't use offsetof() macro in order to avoid warnings. */ + return query_tables ? + (TABLE_LIST*) ((char*) query_tables_last - + ((char*) &(query_tables->next_global) - + (char*) query_tables)) : + 0; + } + + /** + Enumeration listing of all types of unsafe statement. + + @note The order of elements of this enumeration type must + correspond to the order of the elements of the @c explanations + array defined in the body of @c THD::issue_unsafe_warnings. + */ + enum enum_binlog_stmt_unsafe { + /** + SELECT..LIMIT is unsafe because the set of rows returned cannot + be predicted. + */ + BINLOG_STMT_UNSAFE_LIMIT= 0, + /** + INSERT DELAYED is unsafe because the time when rows are inserted + cannot be predicted. + */ + BINLOG_STMT_UNSAFE_INSERT_DELAYED, + /** + Access to log tables is unsafe because slave and master probably + log different things. + */ + BINLOG_STMT_UNSAFE_SYSTEM_TABLE, + /** + Inserting into an autoincrement column in a stored routine is unsafe. + Even with just one autoincrement column, if the routine is invoked more than + once slave is not guaranteed to execute the statement graph same way as + the master. + And since it's impossible to estimate how many times a routine can be invoked at + the query pre-execution phase (see lock_tables), the statement is marked + pessimistically unsafe. + */ + BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS, + /** + Using a UDF (user-defined function) is unsafe. + */ + BINLOG_STMT_UNSAFE_UDF, + /** + Using most system variables is unsafe, because slave may run + with different options than master. + */ + BINLOG_STMT_UNSAFE_SYSTEM_VARIABLE, + /** + Using some functions is unsafe (e.g., UUID). + */ + BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION, + + /** + Mixing transactional and non-transactional statements are unsafe if + non-transactional reads or writes are occur after transactional + reads or writes inside a transaction. + */ + BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS, + + /** + Mixing self-logging and non-self-logging engines in a statement + is unsafe. + */ + BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE, + + /** + Statements that read from both transactional and non-transactional + tables and write to any of them are unsafe. + */ + BINLOG_STMT_UNSAFE_MIXED_STATEMENT, + + /** + INSERT...IGNORE SELECT is unsafe because which rows are ignored depends + on the order that rows are retrieved by SELECT. This order cannot be + predicted and may differ on master and the slave. + */ + BINLOG_STMT_UNSAFE_INSERT_IGNORE_SELECT, + + /** + INSERT...SELECT...UPDATE is unsafe because which rows are updated depends + on the order that rows are retrieved by SELECT. This order cannot be + predicted and may differ on master and the slave. + */ + BINLOG_STMT_UNSAFE_INSERT_SELECT_UPDATE, + + /** + Query that writes to a table with auto_inc column after selecting from + other tables are unsafe as the order in which the rows are retrieved by + select may differ on master and slave. + */ + BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT, + + /** + INSERT...REPLACE SELECT is unsafe because which rows are replaced depends + on the order that rows are retrieved by SELECT. This order cannot be + predicted and may differ on master and the slave. + */ + BINLOG_STMT_UNSAFE_REPLACE_SELECT, + + /** + CREATE TABLE... IGNORE... SELECT is unsafe because which rows are ignored + depends on the order that rows are retrieved by SELECT. This order cannot + be predicted and may differ on master and the slave. + */ + BINLOG_STMT_UNSAFE_CREATE_IGNORE_SELECT, + + /** + CREATE TABLE...REPLACE... SELECT is unsafe because which rows are replaced + depends on the order that rows are retrieved from SELECT. This order + cannot be predicted and may differ on master and the slave + */ + BINLOG_STMT_UNSAFE_CREATE_REPLACE_SELECT, + + /** + CREATE TABLE...SELECT on a table with auto-increment column is unsafe + because which rows are replaced depends on the order that rows are + retrieved from SELECT. This order cannot be predicted and may differ on + master and the slave + */ + BINLOG_STMT_UNSAFE_CREATE_SELECT_AUTOINC, + + /** + UPDATE...IGNORE is unsafe because which rows are ignored depends on the + order that rows are updated. This order cannot be predicted and may differ + on master and the slave. + */ + BINLOG_STMT_UNSAFE_UPDATE_IGNORE, + + /** + INSERT... ON DUPLICATE KEY UPDATE on a table with more than one + UNIQUE KEYS is unsafe. + */ + BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS, + + /** + INSERT into auto-inc field which is not the first part of composed + primary key. + */ + BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST, + + /** + Autoincrement lock mode is incompatible with STATEMENT binlog format. + */ + BINLOG_STMT_UNSAFE_AUTOINC_LOCK_MODE, + + /** + INSERT .. SELECT ... SKIP LOCKED is unlikely to have the same + rows locked on the replica. + primary key. + */ + BINLOG_STMT_UNSAFE_SKIP_LOCKED, + + /* The last element of this enumeration type. */ + BINLOG_STMT_UNSAFE_COUNT + }; + /** + This has all flags from 0 (inclusive) to BINLOG_STMT_FLAG_COUNT + (exclusive) set. + */ + static const uint32 BINLOG_STMT_UNSAFE_ALL_FLAGS= + ((1U << BINLOG_STMT_UNSAFE_COUNT) - 1); + + /** + Maps elements of enum_binlog_stmt_unsafe to error codes. + */ + static const int binlog_stmt_unsafe_errcode[BINLOG_STMT_UNSAFE_COUNT]; + + /** + Determine if this statement is marked as unsafe. + + @retval 0 if the statement is not marked as unsafe. + @retval nonzero if the statement is marked as unsafe. + */ + inline bool is_stmt_unsafe() const { + return get_stmt_unsafe_flags() != 0; + } + + inline bool is_stmt_unsafe(enum_binlog_stmt_unsafe unsafe) + { + return binlog_stmt_flags & (1 << unsafe); + } + + /** + Flag the current (top-level) statement as unsafe. + The flag will be reset after the statement has finished. + + @param unsafe_type The type of unsafety: one of the @c + BINLOG_STMT_FLAG_UNSAFE_* flags in @c enum_binlog_stmt_flag. + */ + inline void set_stmt_unsafe(enum_binlog_stmt_unsafe unsafe_type) { + DBUG_ENTER("set_stmt_unsafe"); + DBUG_ASSERT(unsafe_type >= 0 && unsafe_type < BINLOG_STMT_UNSAFE_COUNT); + binlog_stmt_flags|= (1U << unsafe_type); + DBUG_VOID_RETURN; + } + + /** + Set the bits of binlog_stmt_flags determining the type of + unsafeness of the current statement. No existing bits will be + cleared, but new bits may be set. + + @param flags A binary combination of zero or more bits, (1<= ISO_REPEATABLE_READ */ + + /** + Sets the type of table that is about to be accessed while executing a + statement. + + @param accessed_table Enumeration type that defines the type of table, + e.g. temporary, transactional, non-transactional. + */ + inline void set_stmt_accessed_table(enum_stmt_accessed_table accessed_table) + { + DBUG_ENTER("LEX::set_stmt_accessed_table"); + + DBUG_ASSERT(accessed_table >= 0 && accessed_table < STMT_ACCESS_TABLE_COUNT); + stmt_accessed_table_flag |= (1U << accessed_table); + + DBUG_VOID_RETURN; + } + + /** + Checks if a type of table is about to be accessed while executing a + statement. + + @param accessed_table Enumeration type that defines the type of table, + e.g. temporary, transactional, non-transactional. + + @return + @retval TRUE if the type of the table is about to be accessed + @retval FALSE otherwise + */ + inline bool stmt_accessed_table(enum_stmt_accessed_table accessed_table) + { + DBUG_ENTER("LEX::stmt_accessed_table"); + + DBUG_ASSERT(accessed_table >= 0 && accessed_table < STMT_ACCESS_TABLE_COUNT); + + DBUG_RETURN((stmt_accessed_table_flag & (1U << accessed_table)) != 0); + } + + /** + Checks either a trans/non trans temporary table is being accessed while + executing a statement. + + @return + @retval TRUE if a temporary table is being accessed + @retval FALSE otherwise + */ + inline bool stmt_accessed_temp_table() + { + DBUG_ENTER("THD::stmt_accessed_temp_table"); + DBUG_RETURN(stmt_accessed_non_trans_temp_table() || + stmt_accessed_trans_temp_table()); + } + + /** + Checks if a temporary transactional table is being accessed while executing + a statement. + + @return + @retval TRUE if a temporary transactional table is being accessed + @retval FALSE otherwise + */ + inline bool stmt_accessed_trans_temp_table() + { + DBUG_ENTER("THD::stmt_accessed_trans_temp_table"); + + DBUG_RETURN((stmt_accessed_table_flag & + ((1U << STMT_READS_TEMP_TRANS_TABLE) | + (1U << STMT_WRITES_TEMP_TRANS_TABLE))) != 0); + } + inline bool stmt_writes_to_non_temp_table() + { + DBUG_ENTER("THD::stmt_writes_to_non_temp_table"); + + DBUG_RETURN((stmt_accessed_table_flag & + ((1U << STMT_WRITES_TRANS_TABLE) | + (1U << STMT_WRITES_NON_TRANS_TABLE)))); + } + + /** + Checks if a temporary non-transactional table is about to be accessed + while executing a statement. + + @return + @retval TRUE if a temporary non-transactional table is about to be + accessed + @retval FALSE otherwise + */ + inline bool stmt_accessed_non_trans_temp_table() + { + DBUG_ENTER("THD::stmt_accessed_non_trans_temp_table"); + + DBUG_RETURN((stmt_accessed_table_flag & + ((1U << STMT_READS_TEMP_NON_TRANS_TABLE) | + (1U << STMT_WRITES_TEMP_NON_TRANS_TABLE))) != 0); + } + + /* + Checks if a mixed statement is unsafe. + + + @param in_multi_stmt_transaction_mode defines if there is an on-going + multi-transactional statement. + @param binlog_direct defines if --binlog-direct-non-trans-updates is + active. + @param trx_cache_is_not_empty defines if the trx-cache is empty or not. + @param trx_isolation defines the isolation level. + + @return + @retval TRUE if the mixed statement is unsafe + @retval FALSE otherwise + */ + inline bool is_mixed_stmt_unsafe(bool in_multi_stmt_transaction_mode, + bool binlog_direct, + bool trx_cache_is_not_empty, + uint tx_isolation) + { + bool unsafe= FALSE; + + if (in_multi_stmt_transaction_mode) + { + uint condition= + (binlog_direct ? BINLOG_DIRECT_ON : BINLOG_DIRECT_OFF) & + (trx_cache_is_not_empty ? TRX_CACHE_NOT_EMPTY : TRX_CACHE_EMPTY) & + (tx_isolation >= ISO_REPEATABLE_READ ? IL_GTE_REPEATABLE : IL_LT_REPEATABLE); + + unsafe= (binlog_unsafe_map[stmt_accessed_table_flag] & condition); + +#if !defined(DBUG_OFF) + DBUG_PRINT("LEX::is_mixed_stmt_unsafe", ("RESULT %02X %02X %02X", condition, + binlog_unsafe_map[stmt_accessed_table_flag], + (binlog_unsafe_map[stmt_accessed_table_flag] & condition))); + + int type_in= 0; + for (; type_in < STMT_ACCESS_TABLE_COUNT; type_in++) + { + if (stmt_accessed_table((enum_stmt_accessed_table) type_in)) + DBUG_PRINT("LEX::is_mixed_stmt_unsafe", ("ACCESSED %s ", + stmt_accessed_table_string((enum_stmt_accessed_table) type_in))); + } +#endif + } + + if (stmt_accessed_table(STMT_WRITES_NON_TRANS_TABLE) && + stmt_accessed_table(STMT_READS_TRANS_TABLE) && + tx_isolation < ISO_REPEATABLE_READ) + unsafe= TRUE; + else if (stmt_accessed_table(STMT_WRITES_TEMP_NON_TRANS_TABLE) && + stmt_accessed_table(STMT_READS_TRANS_TABLE) && + tx_isolation < ISO_REPEATABLE_READ) + unsafe= TRUE; + + return(unsafe); + } + + /** + true if the parsed tree contains references to stored procedures + or functions, false otherwise + */ + bool uses_stored_routines() const + { return sroutines_list.elements != 0; } + +private: + + /** + Enumeration listing special types of statements. + + Currently, the only possible type is ROW_INJECTION. + */ + enum enum_binlog_stmt_type { + /** + The statement is a row injection (i.e., either a BINLOG + statement or a row event executed by the slave SQL thread). + */ + BINLOG_STMT_TYPE_ROW_INJECTION = BINLOG_STMT_UNSAFE_COUNT, + + /** The last element of this enumeration type. */ + BINLOG_STMT_TYPE_COUNT + }; + + /** + Bit field indicating the type of statement. + + There are two groups of bits: + + - The low BINLOG_STMT_UNSAFE_COUNT bits indicate the types of + unsafeness that the current statement has. + + - The next BINLOG_STMT_TYPE_COUNT-BINLOG_STMT_TYPE_COUNT bits indicate if + the statement is of some special type. + + This must be a member of LEX, not of THD: each stored procedure + needs to remember its unsafeness state between calls and each + stored procedure has its own LEX object (but no own THD object). + */ + uint32 binlog_stmt_flags; + + /** + Bit field that determines the type of tables that are about to be + be accessed while executing a statement. + */ + uint32 stmt_accessed_table_flag; +}; + + +/* + st_parsing_options contains the flags for constructions that are + allowed in the current statement. +*/ + +struct st_parsing_options +{ + bool allows_variable; + bool lookup_keywords_after_qualifier; + + st_parsing_options() { reset(); } + void reset(); +}; + + +/** + The state of the lexical parser, when parsing comments. +*/ +enum enum_comment_state +{ + /** + Not parsing comments. + */ + NO_COMMENT, + /** + Parsing comments that need to be preserved. + Typically, these are user comments '/' '*' ... '*' '/'. + */ + PRESERVE_COMMENT, + /** + Parsing comments that need to be discarded. + Typically, these are special comments '/' '*' '!' ... '*' '/', + or '/' '*' '!' 'M' 'M' 'm' 'm' 'm' ... '*' '/', where the comment + markers should not be expanded. + */ + DISCARD_COMMENT +}; + + +/** + @brief This class represents the character input stream consumed during + lexical analysis. + + In addition to consuming the input stream, this class performs some + comment pre processing, by filtering out out of bound special text + from the query input stream. + Two buffers, with pointers inside each buffers, are maintained in + parallel. The 'raw' buffer is the original query text, which may + contain out-of-bound comments. The 'cpp' (for comments pre processor) + is the pre-processed buffer that contains only the query text that + should be seen once out-of-bound data is removed. +*/ + +class Lex_input_stream +{ + size_t unescape(CHARSET_INFO *cs, char *to, + const char *str, const char *end, int sep); + my_charset_conv_wc_mb get_escape_func(THD *thd, my_wc_t sep) const; +public: + Lex_input_stream() = default; + + ~Lex_input_stream() = default; + + /** + Object initializer. Must be called before usage. + + @retval FALSE OK + @retval TRUE Error + */ + bool init(THD *thd, char *buff, size_t length); + + void reset(char *buff, size_t length); + + /** + The main method to scan the next token, with token contraction processing + for LALR(2) resolution, e.g. translate "WITH" followed by "ROLLUP" + to a single token WITH_ROLLUP_SYM. + */ + int lex_token(union YYSTYPE *yylval, THD *thd); + + void reduce_digest_token(uint token_left, uint token_right); + +private: + /** + Set the echo mode. + + When echo is true, characters parsed from the raw input stream are + preserved. When false, characters parsed are silently ignored. + @param echo the echo mode. + */ + void set_echo(bool echo) + { + m_echo= echo; + } + + void save_in_comment_state() + { + m_echo_saved= m_echo; + in_comment_saved= in_comment; + } + + void restore_in_comment_state() + { + m_echo= m_echo_saved; + in_comment= in_comment_saved; + } + + /** + Skip binary from the input stream. + @param n number of bytes to accept. + */ + void skip_binary(int n) + { + if (m_echo) + { + memcpy(m_cpp_ptr, m_ptr, n); + m_cpp_ptr += n; + } + m_ptr += n; + } + + /** + Get a character, and advance in the stream. + @return the next character to parse. + */ + unsigned char yyGet() + { + char c= *m_ptr++; + if (m_echo) + *m_cpp_ptr++ = c; + return c; + } + + /** + Get the last character accepted. + @return the last character accepted. + */ + unsigned char yyGetLast() const + { + return m_ptr[-1]; + } + + /** + Look at the next character to parse, but do not accept it. + */ + unsigned char yyPeek() const + { + return m_ptr[0]; + } + + /** + Look ahead at some character to parse. + @param n offset of the character to look up + */ + unsigned char yyPeekn(int n) const + { + return m_ptr[n]; + } + + /** + Cancel the effect of the last yyGet() or yySkip(). + Note that the echo mode should not change between calls to yyGet / yySkip + and yyUnget. The caller is responsible for ensuring that. + */ + void yyUnget() + { + m_ptr--; + if (m_echo) + m_cpp_ptr--; + } + + /** + Accept a character, by advancing the input stream. + */ + void yySkip() + { + if (m_echo) + *m_cpp_ptr++ = *m_ptr++; + else + m_ptr++; + } + + /** + Accept multiple characters at once. + @param n the number of characters to accept. + */ + void yySkipn(int n) + { + if (m_echo) + { + memcpy(m_cpp_ptr, m_ptr, n); + m_cpp_ptr += n; + } + m_ptr += n; + } + + /** + Puts a character back into the stream, canceling + the effect of the last yyGet() or yySkip(). + Note that the echo mode should not change between calls + to unput, get, or skip from the stream. + */ + char *yyUnput(char ch) + { + *--m_ptr= ch; + if (m_echo) + m_cpp_ptr--; + return m_ptr; + } + + /** + End of file indicator for the query text to parse. + @param n number of characters expected + @return true if there are less than n characters to parse + */ + bool eof(int n) const + { + return ((m_ptr + n) >= m_end_of_query); + } + + /** Mark the stream position as the start of a new token. */ + void start_token() + { + m_tok_start_prev= m_tok_start; + m_tok_start= m_ptr; + m_tok_end= m_ptr; + + m_cpp_tok_start_prev= m_cpp_tok_start; + m_cpp_tok_start= m_cpp_ptr; + m_cpp_tok_end= m_cpp_ptr; + } + + /** + Adjust the starting position of the current token. + This is used to compensate for starting whitespace. + */ + void restart_token() + { + m_tok_start= m_ptr; + m_cpp_tok_start= m_cpp_ptr; + } + + /** + Get the maximum length of the utf8-body buffer. + The utf8 body can grow because of the character set conversion and escaping. + */ + size_t get_body_utf8_maximum_length(THD *thd) const; + + /** Get the length of the current token, in the raw buffer. */ + uint yyLength() const + { + /* + The assumption is that the lexical analyser is always 1 character ahead, + which the -1 account for. + */ + DBUG_ASSERT(m_ptr > m_tok_start); + return (uint) ((m_ptr - m_tok_start) - 1); + } + + /** + Test if a lookahead token was already scanned by lex_token(), + for LALR(2) resolution. + */ + bool has_lookahead() const + { + return lookahead_token >= 0; + } + +public: + + /** + End of file indicator for the query text to parse. + @return true if there are no more characters to parse + */ + bool eof() const + { + return (m_ptr >= m_end_of_query); + } + + /** Get the raw query buffer. */ + const char *get_buf() const + { + return m_buf; + } + + /** Get the pre-processed query buffer. */ + const char *get_cpp_buf() const + { + return m_cpp_buf; + } + + /** Get the end of the raw query buffer. */ + const char *get_end_of_query() const + { + return m_end_of_query; + } + + /** Get the token start position, in the raw buffer. */ + const char *get_tok_start() const + { + return has_lookahead() ? m_tok_start_prev : m_tok_start; + } + + void set_cpp_tok_start(const char *pos) + { + m_cpp_tok_start= pos; + } + + /** Get the token end position, in the raw buffer. */ + const char *get_tok_end() const + { + return m_tok_end; + } + + /** Get the current stream pointer, in the raw buffer. */ + const char *get_ptr() const + { + return m_ptr; + } + + /** Get the token start position, in the pre-processed buffer. */ + const char *get_cpp_tok_start() const + { + return has_lookahead() ? m_cpp_tok_start_prev : m_cpp_tok_start; + } + + /** Get the token end position, in the pre-processed buffer. */ + const char *get_cpp_tok_end() const + { + return m_cpp_tok_end; + } + + /** + Get the token end position in the pre-processed buffer, + with trailing spaces removed. + */ + const char *get_cpp_tok_end_rtrim() const + { + const char *p; + for (p= m_cpp_tok_end; + p > m_cpp_buf && my_isspace(system_charset_info, p[-1]); + p--) + { } + return p; + } + + /** Get the current stream pointer, in the pre-processed buffer. */ + const char *get_cpp_ptr() const + { + return m_cpp_ptr; + } + + /** + Get the current stream pointer, in the pre-processed buffer, + with traling spaces removed. + */ + const char *get_cpp_ptr_rtrim() const + { + const char *p; + for (p= m_cpp_ptr; + p > m_cpp_buf && my_isspace(system_charset_info, p[-1]); + p--) + { } + return p; + } + /** Get the utf8-body string. */ + LEX_CSTRING body_utf8() const + { + return LEX_CSTRING({m_body_utf8, (size_t) (m_body_utf8_ptr - m_body_utf8)}); + } + + void body_utf8_start(THD *thd, const char *begin_ptr); + void body_utf8_append(const char *ptr); + void body_utf8_append(const char *ptr, const char *end_ptr); + void body_utf8_append_ident(THD *thd, + const Lex_string_with_metadata_st *txt, + const char *end_ptr); + void body_utf8_append_escape(THD *thd, + const LEX_CSTRING *txt, + CHARSET_INFO *txt_cs, + const char *end_ptr, + my_wc_t sep); + +private: + /** + LALR(2) resolution, look ahead token. + Value of the next token to return, if any, + or -1, if no token was parsed in advance. + Note: 0 is a legal token, and represents YYEOF. + */ + int lookahead_token; + + /** LALR(2) resolution, value of the look ahead token.*/ + LEX_YYSTYPE lookahead_yylval; + + bool get_text(Lex_string_with_metadata_st *to, + uint sep, int pre_skip, int post_skip); + + void add_digest_token(uint token, LEX_YYSTYPE yylval); + + bool consume_comment(int remaining_recursions_permitted); + int lex_one_token(union YYSTYPE *yylval, THD *thd); + int find_keyword(Lex_ident_cli_st *str, uint len, bool function) const; + LEX_CSTRING get_token(uint skip, uint length); + int scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str); + int scan_ident_start(THD *thd, Lex_ident_cli_st *str); + int scan_ident_middle(THD *thd, Lex_ident_cli_st *str, + CHARSET_INFO **cs, my_lex_states *); + int scan_ident_delimited(THD *thd, Lex_ident_cli_st *str, uchar quote_char); + bool get_7bit_or_8bit_ident(THD *thd, uchar *last_char); + + /** Current thread. */ + THD *m_thd; + + /** Pointer to the current position in the raw input stream. */ + char *m_ptr; + + /** Starting position of the last token parsed, in the raw buffer. */ + const char *m_tok_start; + + /** Ending position of the previous token parsed, in the raw buffer. */ + const char *m_tok_end; + + /** End of the query text in the input stream, in the raw buffer. */ + const char *m_end_of_query; + + /** Starting position of the previous token parsed, in the raw buffer. */ + const char *m_tok_start_prev; + + /** Begining of the query text in the input stream, in the raw buffer. */ + const char *m_buf; + + /** Length of the raw buffer. */ + size_t m_buf_length; + + /** Echo the parsed stream to the pre-processed buffer. */ + bool m_echo:1; + bool m_echo_saved:1; + + /** Pre-processed buffer. */ + char *m_cpp_buf; + + /** Pointer to the current position in the pre-processed input stream. */ + char *m_cpp_ptr; + + /** + Starting position of the last token parsed, + in the pre-processed buffer. + */ + const char *m_cpp_tok_start; + + /** + Starting position of the previous token parsed, + in the pre-procedded buffer. + */ + const char *m_cpp_tok_start_prev; + + /** + Ending position of the previous token parsed, + in the pre-processed buffer. + */ + const char *m_cpp_tok_end; + + /** UTF8-body buffer created during parsing. */ + char *m_body_utf8; + + /** Pointer to the current position in the UTF8-body buffer. */ + char *m_body_utf8_ptr; + + /** + Position in the pre-processed buffer. The query from m_cpp_buf to + m_cpp_utf_processed_ptr is converted to UTF8-body. + */ + const char *m_cpp_utf8_processed_ptr; + +public: + + /** Current state of the lexical analyser. */ + enum my_lex_states next_state; + + /** + Position of ';' in the stream, to delimit multiple queries. + This delimiter is in the raw buffer. + */ + const char *found_semicolon; + + /** SQL_MODE = IGNORE_SPACE. */ + bool ignore_space:1; + + /** + TRUE if we're parsing a prepared statement: in this mode + we should allow placeholders. + */ + bool stmt_prepare_mode:1; + /** + TRUE if we should allow multi-statements. + */ + bool multi_statements:1; + + /** Current line number. */ + uint yylineno; + + /** + Current statement digest instrumentation. + */ + sql_digest_state* m_digest; + +private: + /** State of the lexical analyser for comments. */ + enum_comment_state in_comment; + enum_comment_state in_comment_saved; + + /** + Starting position of the TEXT_STRING or IDENT in the pre-processed + buffer. + + NOTE: this member must be used within MYSQLlex() function only. + */ + const char *m_cpp_text_start; + + /** + Ending position of the TEXT_STRING or IDENT in the pre-processed + buffer. + + NOTE: this member must be used within MYSQLlex() function only. + */ + const char *m_cpp_text_end; + + /** + Character set specified by the character-set-introducer. + + NOTE: this member must be used within MYSQLlex() function only. + */ + CHARSET_INFO *m_underscore_cs; +}; + + +/** + Abstract representation of a statement. + This class is an interface between the parser and the runtime. + The parser builds the appropriate sub classes of Sql_statement + to represent a SQL statement in the parsed tree. + The execute() method in the sub classes contain the runtime implementation. + Note that this interface is used for SQL statement recently implemented, + the code for older statements tend to load the LEX structure with more + attributes instead. + The recommended way to implement new statements is to sub-class + Sql_statement, as this improves code modularity (see the 'big switch' in + dispatch_command()), and decrease the total size of the LEX structure + (therefore saving memory in stored programs). +*/ +class Sql_statement : public Sql_alloc +{ +public: + /** + Execute this SQL statement. + @param thd the current thread. + @return 0 on success. + */ + virtual bool execute(THD *thd) = 0; + +protected: + /** + Constructor. + @param lex the LEX structure that represents parts of this statement. + */ + Sql_statement(LEX *lex) + : m_lex(lex) + {} + + /** Destructor. */ + virtual ~Sql_statement() + { + /* + Sql_statement objects are allocated in thd->mem_root. + In MySQL, the C++ destructor is never called, the underlying MEM_ROOT is + simply destroyed instead. + Do not rely on the destructor for any cleanup. + */ + DBUG_ASSERT(FALSE); + } + +protected: + /** + The legacy LEX structure for this statement. + The LEX structure contains the existing properties of the parsed tree. + TODO: with time, attributes from LEX should move to sub classes of + Sql_statement, so that the parser only builds Sql_statement objects + with the minimum set of attributes, instead of a LEX structure that + contains the collection of every possible attribute. + */ + LEX *m_lex; +}; + + +class Delete_plan; +class SQL_SELECT; + +class Explain_query; +class Explain_update; +class Explain_delete; + +/* + Query plan of a single-table UPDATE. + (This is actually a plan for single-table DELETE also) +*/ + +class Update_plan +{ +protected: + bool impossible_where; + bool no_partitions; +public: + /* Allocate things there */ + MEM_ROOT *mem_root; + + TABLE *table; + SQL_SELECT *select; + uint index; + ha_rows scanned_rows; + /* + Top-level select_lex. Most of its fields are not used, we need it only to + get to the subqueries. + */ + SELECT_LEX *select_lex; + + key_map possible_keys; + bool using_filesort; + bool using_io_buffer; + + /* Set this plan to be a plan to do nothing because of impossible WHERE */ + void set_impossible_where() { impossible_where= true; } + void set_no_partitions() { no_partitions= true; } + + Explain_update* save_explain_update_data(THD *thd, MEM_ROOT *mem_root); +protected: + bool save_explain_data_intern(THD *thd, MEM_ROOT *mem_root, Explain_update *eu, bool is_analyze); +public: + virtual ~Update_plan() = default; + + Update_plan(MEM_ROOT *mem_root_arg) : + impossible_where(false), no_partitions(false), + mem_root(mem_root_arg), + using_filesort(false), using_io_buffer(false) + {} +}; + + +/* Query plan of a single-table DELETE */ +class Delete_plan : public Update_plan +{ + bool deleting_all_rows; +public: + + /* Construction functions */ + Delete_plan(MEM_ROOT *mem_root_arg) : + Update_plan(mem_root_arg), + deleting_all_rows(false) + {} + + /* Set this query plan to be a plan to make a call to h->delete_all_rows() */ + void set_delete_all_rows(ha_rows rows_arg) + { + deleting_all_rows= true; + scanned_rows= rows_arg; + } + void cancel_delete_all_rows() + { + deleting_all_rows= false; + } + + Explain_delete* save_explain_delete_data(THD *thd, MEM_ROOT *mem_root); +}; + +enum account_lock_type +{ + ACCOUNTLOCK_UNSPECIFIED= 0, + ACCOUNTLOCK_LOCKED, + ACCOUNTLOCK_UNLOCKED +}; + +enum password_exp_type +{ + PASSWORD_EXPIRE_UNSPECIFIED= 0, + PASSWORD_EXPIRE_NOW, + PASSWORD_EXPIRE_NEVER, + PASSWORD_EXPIRE_DEFAULT, + PASSWORD_EXPIRE_INTERVAL +}; + +struct Account_options: public USER_RESOURCES +{ + Account_options() = default; + + void reset() + { + bzero(this, sizeof(*this)); + ssl_type= SSL_TYPE_NOT_SPECIFIED; + } + + enum SSL_type ssl_type; // defined in violite.h + LEX_CSTRING x509_subject, x509_issuer, ssl_cipher; + account_lock_type account_locked; + password_exp_type password_expire; + longlong num_expiration_days; +}; + +class Query_arena_memroot; +/* The state of the lex parsing. This is saved in the THD struct */ + + +class Lex_prepared_stmt +{ + Lex_ident_sys m_name; // Statement name (in all queries) + Item *m_code; // PREPARE or EXECUTE IMMEDIATE source expression + List m_params; // List of parameters for EXECUTE [IMMEDIATE] +public: + + Lex_prepared_stmt() + :m_code(NULL) + { } + const Lex_ident_sys &name() const + { + return m_name; + } + uint param_count() const + { + return m_params.elements; + } + List ¶ms() + { + return m_params; + } + void set(const Lex_ident_sys_st &ident, Item *code, List *params) + { + DBUG_ASSERT(m_params.elements == 0); + m_name= ident; + m_code= code; + if (params) + m_params= *params; + } + bool params_fix_fields(THD *thd) + { + // Fix Items in the EXECUTE..USING list + List_iterator_fast param_it(m_params); + while (Item *param= param_it++) + { + if (param->fix_fields_if_needed_for_scalar(thd, 0)) + return true; + } + return false; + } + bool get_dynamic_sql_string(THD *thd, LEX_CSTRING *dst, String *buffer); + void lex_start() + { + m_params.empty(); + } +}; + + +class Lex_grant_object_name: public Grant_object_name, public Sql_alloc +{ +public: + Lex_grant_object_name(Table_ident *table_ident) + :Grant_object_name(table_ident) + { } + Lex_grant_object_name(const LEX_CSTRING &db, Type type) + :Grant_object_name(db, type) + { } +}; + + +class Lex_grant_privilege: public Grant_privilege, public Sql_alloc +{ +public: + Lex_grant_privilege() {} + Lex_grant_privilege(privilege_t grant, bool all_privileges= false) + :Grant_privilege(grant, all_privileges) + { } +}; + + +struct LEX: public Query_tables_list +{ + SELECT_LEX_UNIT unit; /* most upper unit */ + SELECT_LEX *first_select_lex() { return unit.first_select(); } + const SELECT_LEX *first_select_lex() const { return unit.first_select(); } + +private: + SELECT_LEX builtin_select; + +public: + /* current SELECT_LEX in parsing */ + SELECT_LEX *current_select; + /* list of all SELECT_LEX */ + SELECT_LEX *all_selects_list; + /* current with clause in parsing if any, otherwise 0*/ + With_clause *curr_with_clause; + /* pointer to the first with clause in the current statement */ + With_clause *with_clauses_list; + /* + (*with_clauses_list_last_next) contains a pointer to the last + with clause in the current statement + */ + With_clause **with_clauses_list_last_next; + /* + When a copy of a with element is parsed this is set to the offset of + the with element in the input string, otherwise it's set to 0 + */ + my_ptrdiff_t clone_spec_offset; + + Create_view_info *create_view; + + /* Query Plan Footprint of a currently running select */ + Explain_query *explain; + + /* + LEX which represents current statement (conventional, SP or PS) + + For example during view parsing THD::lex will point to the views LEX and + lex::stmt_lex will point to LEX of the statement where the view will be + included + + Currently it is used to have always correct select numbering inside + statement (LEX::current_select_number) without storing and restoring a + global counter which was THD::select_number. + + TODO: make some unified statement representation (now SP has different) + to store such data like LEX::current_select_number. + */ + LEX *stmt_lex; + + LEX_CSTRING name; + const char *help_arg; + const char *backup_dir; /* For RESTORE/BACKUP */ + const char* to_log; /* For PURGE MASTER LOGS TO */ + String *wild; /* Wildcard in SHOW {something} LIKE 'wild'*/ + sql_exchange *exchange; + select_result *result; + /** + @c the two may also hold BINLOG arguments: either comment holds a + base64-char string or both represent the BINLOG fragment user variables. + */ + LEX_CSTRING comment, ident; + LEX_USER *grant_user; + XID *xid; + THD *thd; + + /* maintain a list of used plugins for this LEX */ + DYNAMIC_ARRAY plugins; + plugin_ref plugins_static_buffer[INITIAL_LEX_PLUGIN_LIST_SIZE]; + + /** SELECT of CREATE VIEW statement */ + LEX_STRING create_view_select; + + /** Start of 'ON table', in trigger statements. */ + const char* raw_trg_on_table_name_begin; + /** End of 'ON table', in trigger statements. */ + const char* raw_trg_on_table_name_end; + + /* Partition info structure filled in by PARTITION BY parse part */ + partition_info *part_info; + + /* + The definer of the object being created (view, trigger, stored routine). + I.e. the value of DEFINER clause. + */ + LEX_USER *definer; + + /* Used in ALTER/CREATE user to store account locking options */ + Account_options account_options; + + Table_type table_type; /* Used for SHOW CREATE */ + List ref_list; + List users_list; + List *insert_list,field_list,value_list,update_list; + List many_values; + List var_list; + List stmt_var_list; //SET_STATEMENT values + List old_var_list; // SET STATEMENT old values +private: + Query_arena_memroot *arena_for_set_stmt; + MEM_ROOT *mem_root_for_set_stmt; + bool sp_block_finalize(THD *thd, const Lex_spblock_st spblock, + class sp_label **splabel); + bool sp_change_context(THD *thd, const sp_pcontext *ctx, bool exclusive); + bool sp_exit_block(THD *thd, sp_label *lab); + bool sp_exit_block(THD *thd, sp_label *lab, Item *when); + + bool sp_continue_loop(THD *thd, sp_label *lab); + + bool sp_for_loop_condition(THD *thd, const Lex_for_loop_st &loop); + bool sp_for_loop_increment(THD *thd, const Lex_for_loop_st &loop); + + /* + Check if Item_field and Item_ref are allowed in the current statement. + @retval false OK (fields are allowed) + @retval true ERROR (fields are not allowed). Error is raised. + */ + bool check_expr_allows_fields_or_error(THD *thd, const char *name) const; + +protected: + bool sp_continue_loop(THD *thd, sp_label *lab, Item *when); + +public: + void parse_error(uint err_number= ER_SYNTAX_ERROR); + inline bool is_arena_for_set_stmt() {return arena_for_set_stmt != 0;} + bool set_arena_for_set_stmt(Query_arena *backup); + void reset_arena_for_set_stmt(Query_arena *backup); + void free_arena_for_set_stmt(); + + void print(String *str, enum_query_type qtype); + List set_var_list; // in-query assignment list + List param_list; + List view_list; // view list (list of field names in view) + List *column_list; // list of column names (in ANALYZE) + List *index_list; // list of index names (in ANALYZE) + /* + A stack of name resolution contexts for the query. This stack is used + at parse time to set local name resolution contexts for various parts + of a query. For example, in a JOIN ... ON (some_condition) clause the + Items in 'some_condition' must be resolved only against the operands + of the the join, and not against the whole clause. Similarly, Items in + subqueries should be resolved against the subqueries (and outer queries). + The stack is used in the following way: when the parser detects that + all Items in some clause need a local context, it creates a new context + and pushes it on the stack. All newly created Items always store the + top-most context in the stack. Once the parser leaves the clause that + required a local context, the parser pops the top-most context. + */ + List context_stack; + SELECT_LEX *select_stack[MAX_SELECT_NESTING + 1]; + uint select_stack_top; + /* + Usually this is set to 0, but for INSERT/REPLACE SELECT it is set to 1. + When parsing such statements the pointer to the most outer select is placed + into the second element of select_stack rather than into the first. + */ + uint select_stack_outer_barrier; + + SQL_I_List proc_list; + SQL_I_List auxiliary_table_list, save_list; + Column_definition *last_field; + Table_function_json_table *json_table; + Item_sum *in_sum_func; + udf_func udf; + HA_CHECK_OPT check_opt; // check/repair options + Table_specification_st create_info; + Key *last_key; + LEX_MASTER_INFO mi; // used by CHANGE MASTER + LEX_SERVER_OPTIONS server_options; + LEX_CSTRING relay_log_connection_name; + LEX_RESET_SLAVE reset_slave_info; + ulonglong type; + ulong next_binlog_file_number; + /* The following is used by KILL */ + killed_state kill_signal; + killed_type kill_type; + uint current_select_number; // valid for statment LEX (not view) + + /* + The following bool variables should not be bit fields as they are not + reset for every query + */ + bool autocommit; // Often used, better as bool + bool sp_lex_in_use; // Keep track on lex usage in SPs for error handling + + /* Bit fields, reset for every query */ + bool is_shutdown_wait_for_slaves:1; + bool selects_allow_procedure:1; + /* + A special command "PARSE_VCOL_EXPR" is defined for the parser + to translate a defining expression of a virtual column into an + Item object. + The following flag is used to prevent other applications to use + this command. + */ + bool parse_vcol_expr:1; + bool analyze_stmt:1; /* TRUE<=> this is "ANALYZE $stmt" */ + bool explain_json:1; + /* + true <=> The parsed fragment requires resolution of references to CTE + at the end of parsing. This name resolution process involves searching + for possible dependencies between CTE defined in the parsed fragment and + detecting possible recursive references. + The flag is set to true if the fragment contains CTE definitions. + */ + bool with_cte_resolution:1; + /* + true <=> only resolution of references to CTE are required in the parsed + fragment, no checking of dependencies between CTE is required. + This flag is used only when parsing clones of CTE specifications. + */ + bool only_cte_resolution:1; + bool local_file:1; + bool check_exists:1; + bool verbose:1, no_write_to_binlog:1; + bool safe_to_cache_query:1; + bool ignore:1; + bool next_is_main:1; // use "main" SELECT_LEX for nrxt allocation; + bool next_is_down:1; // use "main" SELECT_LEX for nrxt allocation; + /* + field_list was created for view and should be removed before PS/SP + rexecuton + */ + bool empty_field_list_on_rset:1; + /** + During name resolution search only in the table list given by + Name_resolution_context::first_name_resolution_table and + Name_resolution_context::last_name_resolution_table + (see Item_field::fix_fields()). + */ + bool use_only_table_context:1; + bool escape_used:1; + bool default_used:1; /* using default() function */ + bool with_rownum:1; /* Using rownum() function */ + bool is_lex_started:1; /* If lex_start() did run. For debugging. */ + /* + This variable is used in post-parse stage to declare that sum-functions, + or functions which have sense only if GROUP BY is present, are allowed. + For example in a query + SELECT ... FROM ...WHERE MIN(i) == 1 GROUP BY ... HAVING MIN(i) > 2 + MIN(i) in the WHERE clause is not allowed in the opposite to MIN(i) + in the HAVING clause. Due to possible nesting of select construct + the variable can contain 0 or 1 for each nest level. + */ + nesting_map allow_sum_func; + + Sql_cmd *m_sql_cmd; + + /* + Usually `expr` rule of yacc is quite reused but some commands better + not support subqueries which comes standard with this rule, like + KILL, HA_READ, CREATE/ALTER EVENT etc. Set this to a non-NULL + clause name to get an error. + */ + const char *clause_that_disallows_subselect; + + enum enum_duplicates duplicates; + enum enum_tx_isolation tx_isolation; + enum enum_ha_read_modes ha_read_mode; + union { + enum ha_rkey_function ha_rkey_mode; + enum xa_option_words xa_opt; + bool with_admin_option; // GRANT role + bool with_persistent_for_clause; // uses PERSISTENT FOR clause (in ANALYZE) + }; + enum enum_var_type option_type; + enum enum_drop_mode drop_mode; + + enum backup_stages backup_stage; + enum Foreign_key::fk_match_opt fk_match_option; + enum_fk_option fk_update_opt; + enum_fk_option fk_delete_opt; + enum enum_yes_no_unknown tx_chain, tx_release; + st_parsing_options parsing_options; + /* + In sql_cache we store SQL_CACHE flag as specified by user to be + able to restore SELECT statement from internal structures. + */ + enum e_sql_cache { SQL_CACHE_UNSPECIFIED, SQL_NO_CACHE, SQL_CACHE }; + e_sql_cache sql_cache; + + uint slave_thd_opt, start_transaction_opt; + uint profile_query_id; + uint profile_options; + int nest_level; + + /* + In LEX representing update which were transformed to multi-update + stores total number of tables. For LEX representing multi-delete + holds number of tables from which we will delete records. + */ + uint table_count_update; + + uint8 describe; + /* + A flag that indicates what kinds of derived tables are present in the + query (0 if no derived tables, otherwise a combination of flags + DERIVED_SUBQUERY and DERIVED_VIEW). + */ + uint8 derived_tables; + uint8 context_analysis_only; + uint8 lex_options; // see OPTION_LEX_* + + Alter_info alter_info; + Lex_prepared_stmt prepared_stmt; + /* + For CREATE TABLE statement last element of table list which is not + part of SELECT or LIKE part (i.e. either element for table we are + creating or last of tables referenced by foreign keys). + */ + TABLE_LIST *create_last_non_select_table; + sp_head *sphead; + sp_name *spname; + sp_pcontext *spcont; + + st_sp_chistics sp_chistics; + + Event_parse_data *event_parse_data; + + /* Characterstics of trigger being created */ + st_trg_chistics trg_chistics; + /* + List of all items (Item_trigger_field objects) representing fields in + old/new version of row in trigger. We use this list for checking whenever + all such fields are valid at trigger creation time and for binding these + fields to TABLE object at table open (altough for latter pointer to table + being opened is probably enough). + */ + SQL_I_List trg_table_fields; + + /* + stmt_definition_begin is intended to point to the next word after + DEFINER-clause in the following statements: + - CREATE TRIGGER (points to "TRIGGER"); + - CREATE PROCEDURE (points to "PROCEDURE"); + - CREATE FUNCTION (points to "FUNCTION" or "AGGREGATE"); + - CREATE EVENT (points to "EVENT") + + This pointer is required to add possibly omitted DEFINER-clause to the + DDL-statement before dumping it to the binlog. + + keyword_delayed_begin_offset is the offset to the beginning of the DELAYED + keyword in INSERT DELAYED statement. keyword_delayed_end_offset is the + offset to the character right after the DELAYED keyword. + */ + union { + const char *stmt_definition_begin; + uint keyword_delayed_begin_offset; + }; + + union { + const char *stmt_definition_end; + uint keyword_delayed_end_offset; + }; + + /** + Collects create options for KEY + */ + engine_option_value *option_list; + + /** + Helper pointer to the end of the list when parsing options for + LEX::create_info.option_list (for table) + LEX::last_field->option_list (for fields) + LEX::option_list (for indexes) + */ + engine_option_value *option_list_last; + + + /* + The set of those tables whose fields are referenced in all subqueries + of the query. + TODO: possibly this it is incorrect to have used tables in LEX because + with subquery, it is not clear what does the field mean. To fix this + we should aggregate used tables information for selected expressions + into the select_lex. + */ + table_map used_tables; + /** + Maximum number of rows and/or keys examined by the query, both read, + changed or written. This is the argument of LIMIT ROWS EXAMINED. + The limit is represented by two variables - the Item is needed because + in case of parameters we have to delay its evaluation until execution. + Once evaluated, its value is stored in examined_rows_limit_cnt. + */ + Item *limit_rows_examined; + ulonglong limit_rows_examined_cnt; + /** + Holds a set of domain_ids for deletion at FLUSH..DELETE_DOMAIN_ID + */ + DYNAMIC_ARRAY delete_gtid_domain; + static const ulong initial_gtid_domain_buffer_size= 16; + uint32 gtid_domain_static_buffer[initial_gtid_domain_buffer_size]; + + inline void set_limit_rows_examined() + { + if (limit_rows_examined) + limit_rows_examined_cnt= limit_rows_examined->val_uint(); + else + limit_rows_examined_cnt= ULONGLONG_MAX; + } + + + LEX_CSTRING *win_ref; + Window_frame *win_frame; + Window_frame_bound *frame_top_bound; + Window_frame_bound *frame_bottom_bound; + Window_spec *win_spec; + + Item *upd_del_where; + + /* System Versioning */ + vers_select_conds_t vers_conditions; + vers_select_conds_t period_conditions; + + inline void free_set_stmt_mem_root() + { + DBUG_ASSERT(!is_arena_for_set_stmt()); + if (mem_root_for_set_stmt) + { + free_root(mem_root_for_set_stmt, MYF(0)); + delete mem_root_for_set_stmt; + mem_root_for_set_stmt= 0; + } + } + + LEX(); + + virtual ~LEX() + { + free_set_stmt_mem_root(); + destroy_query_tables_list(); + plugin_unlock_list(NULL, (plugin_ref *)plugins.buffer, plugins.elements); + delete_dynamic(&plugins); + } + + virtual class Query_arena *query_arena() + { + DBUG_ASSERT(0); + return NULL; + } + + void start(THD *thd); + + inline bool is_ps_or_view_context_analysis() + { + return (context_analysis_only & + (CONTEXT_ANALYSIS_ONLY_PREPARE | + CONTEXT_ANALYSIS_ONLY_VCOL_EXPR | + CONTEXT_ANALYSIS_ONLY_VIEW)); + } + + inline bool is_view_context_analysis() + { + return (context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW); + } + + inline void uncacheable(uint8 cause) + { + safe_to_cache_query= 0; + + if (current_select) // initialisation SP variables has no SELECT + { + /* + There are no sense to mark select_lex and union fields of LEX, + but we should merk all subselects as uncacheable from current till + most upper + */ + SELECT_LEX *sl; + SELECT_LEX_UNIT *un; + for (sl= current_select, un= sl->master_unit(); + un && un != &unit; + sl= sl->outer_select(), un= (sl ? sl->master_unit() : NULL)) + { + sl->uncacheable|= cause; + un->uncacheable|= cause; + } + if (sl) + sl->uncacheable|= cause; + } + if (first_select_lex()) + first_select_lex()->uncacheable|= cause; + } + void set_trg_event_type_for_tables(); + + TABLE_LIST *unlink_first_table(bool *link_to_local); + void link_first_table_back(TABLE_LIST *first, bool link_to_local); + void first_lists_tables_same(); + void fix_first_select_number(); + + bool can_be_merged(); + bool can_use_merged(); + bool can_not_use_merged(); + bool only_view_structure(); + bool need_correct_ident(); + uint8 get_effective_with_check(TABLE_LIST *view); + /* + Is this update command where 'WHITH CHECK OPTION' clause is important + + SYNOPSIS + LEX::which_check_option_applicable() + + RETURN + TRUE have to take 'WHITH CHECK OPTION' clause into account + FALSE 'WHITH CHECK OPTION' clause do not need + */ + inline bool which_check_option_applicable() + { + switch (sql_command) { + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_LOAD: + return TRUE; + default: + return FALSE; + } + } + + void cleanup_after_one_table_open(); + + bool push_context(Name_resolution_context *context); + + Name_resolution_context *pop_context(); + + SELECT_LEX *select_stack_head() + { + if (likely(select_stack_top)) + return select_stack[select_stack_top - 1]; + return NULL; + } + + bool push_select(SELECT_LEX *select_lex) + { + DBUG_ENTER("LEX::push_select"); + DBUG_PRINT("info", ("Top Select was %p (%d) depth: %u pushed: %p (%d)", + select_stack_head(), + select_stack_top, + (select_stack_top ? + select_stack_head()->select_number : + 0), + select_lex, select_lex->select_number)); + if (unlikely(select_stack_top > MAX_SELECT_NESTING)) + { + my_error(ER_TOO_HIGH_LEVEL_OF_NESTING_FOR_SELECT, MYF(0)); + DBUG_RETURN(TRUE); + } + if (push_context(&select_lex->context)) + DBUG_RETURN(TRUE); + select_stack[select_stack_top++]= select_lex; + current_select= select_lex; + DBUG_RETURN(FALSE); + } + + SELECT_LEX *pop_select() + { + DBUG_ENTER("LEX::pop_select"); + SELECT_LEX *select_lex; + if (likely(select_stack_top)) + select_lex= select_stack[--select_stack_top]; + else + select_lex= 0; + DBUG_PRINT("info", ("Top Select is %p (%d) depth: %u poped: %p (%d)", + select_stack_head(), + select_stack_top, + (select_stack_top ? + select_stack_head()->select_number : + 0), + select_lex, + (select_lex ? select_lex->select_number : 0))); + DBUG_ASSERT(select_lex); + + pop_context(); + + if (unlikely(!select_stack_top)) + { + current_select= &builtin_select; + DBUG_PRINT("info", ("Top Select is empty -> sel builtin: %p service: %u", + current_select, builtin_select.is_service_select)); + builtin_select.is_service_select= false; + } + else + current_select= select_stack[select_stack_top - 1]; + + DBUG_RETURN(select_lex); + } + + SELECT_LEX *current_select_or_default() + { + return current_select ? current_select : &builtin_select; + } + + bool copy_db_to(LEX_CSTRING *to); + + void inc_select_stack_outer_barrier() + { + select_stack_outer_barrier++; + } + + SELECT_LEX *parser_current_outer_select() + { + return select_stack_top - 1 == select_stack_outer_barrier ? + 0 : select_stack[select_stack_top - 2]; + } + + Name_resolution_context *current_context() + { + return context_stack.head(); + } + + /* + Restore the LEX and THD in case of a parse error. + */ + static void cleanup_lex_after_parse_error(THD *thd); + + void reset_n_backup_query_tables_list(Query_tables_list *backup); + void restore_backup_query_tables_list(Query_tables_list *backup); + + bool table_or_sp_used(); + + bool is_partition_management() const; +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool part_values_current(THD *thd); + bool part_values_history(THD *thd); +#endif + + /** + @brief check if the statement is a single-level join + @return result of the check + @retval TRUE The statement doesn't contain subqueries, unions and + stored procedure calls. + @retval FALSE There are subqueries, UNIONs or stored procedure calls. + */ + bool is_single_level_stmt() + { + /* + This check exploits the fact that the last added to all_select_list is + on its top. So select_lex (as the first added) will be at the tail + of the list. + */ + if (first_select_lex() == all_selects_list && !sroutines.records) + { + return TRUE; + } + return FALSE; + } + + bool save_prep_leaf_tables(); + + int print_explain(select_result_sink *output, uint8 explain_flags, + bool is_analyze, bool is_json_format, + bool *printed_anything); + bool restore_set_statement_var(); + + void init_last_field(Column_definition *field, const LEX_CSTRING *name); + bool last_field_generated_always_as_row_start_or_end(Lex_ident *p, + const char *type, + uint flags); + bool last_field_generated_always_as_row_start(); + bool last_field_generated_always_as_row_end(); + + bool new_sp_instr_stmt(THD *, const LEX_CSTRING &prefix, + const LEX_CSTRING &suffix); + bool sp_proc_stmt_statement_finalize_buf(THD *, const LEX_CSTRING &qbuf); + bool sp_proc_stmt_statement_finalize(THD *, bool no_lookahead); + + sp_variable *sp_param_init(LEX_CSTRING *name); + bool sp_param_fill_definition(sp_variable *spvar, + const Lex_field_type_st &def); + bool sf_return_fill_definition(const Lex_field_type_st &def); + + int case_stmt_action_then(); + bool setup_select_in_parentheses(); + bool set_names(const char *pos, + const Lex_exact_charset_opt_extended_collate &cs, + bool no_lookahead); + bool set_trigger_new_row(const LEX_CSTRING *name, Item *val); + bool set_trigger_field(const LEX_CSTRING *name1, const LEX_CSTRING *name2, + Item *val); + bool set_system_variable(enum_var_type var_type, sys_var *var, + const Lex_ident_sys_st *base_name, Item *val); + bool set_system_variable(enum_var_type var_type, + const Lex_ident_sys_st *name, Item *val); + bool set_system_variable(THD *thd, enum_var_type var_type, + const Lex_ident_sys_st *name1, + const Lex_ident_sys_st *name2, + Item *val); + bool set_default_system_variable(enum_var_type var_type, + const Lex_ident_sys_st *name, + Item *val); + bool set_user_variable(THD *thd, const LEX_CSTRING *name, Item *val); + void set_stmt_init(); + sp_name *make_sp_name(THD *thd, const LEX_CSTRING *name); + sp_name *make_sp_name(THD *thd, const LEX_CSTRING *name1, + const LEX_CSTRING *name2); + sp_name *make_sp_name_package_routine(THD *thd, const LEX_CSTRING *name); + sp_head *make_sp_head(THD *thd, const sp_name *name, const Sp_handler *sph, + enum_sp_aggregate_type agg_type); + sp_head *make_sp_head_no_recursive(THD *thd, const sp_name *name, + const Sp_handler *sph, + enum_sp_aggregate_type agg_type); + bool sp_body_finalize_routine(THD *); + bool sp_body_finalize_trigger(THD *); + bool sp_body_finalize_event(THD *); + bool sp_body_finalize_function(THD *); + bool sp_body_finalize_procedure(THD *); + bool sp_body_finalize_procedure_standalone(THD *, const sp_name *end_name); + sp_package *create_package_start(THD *thd, + enum_sql_command command, + const Sp_handler *sph, + const sp_name *name, + DDL_options_st options); + bool create_package_finalize(THD *thd, + const sp_name *name, + const sp_name *name2, + const char *cpp_body_end); + bool call_statement_start(THD *thd, sp_name *name); + bool call_statement_start(THD *thd, const Lex_ident_sys_st *name); + bool call_statement_start(THD *thd, const Lex_ident_sys_st *name1, + const Lex_ident_sys_st *name2); + bool call_statement_start(THD *thd, + const Lex_ident_sys_st *db, + const Lex_ident_sys_st *pkg, + const Lex_ident_sys_st *proc); + sp_variable *find_variable(const LEX_CSTRING *name, + sp_pcontext **ctx, + const Sp_rcontext_handler **rh) const; + sp_variable *find_variable(const LEX_CSTRING *name, + const Sp_rcontext_handler **rh) const + { + sp_pcontext *not_used_ctx; + return find_variable(name, ¬_used_ctx, rh); + } + bool set_variable(const Lex_ident_sys_st *name, Item *item); + bool set_variable(const Lex_ident_sys_st *name1, + const Lex_ident_sys_st *name2, Item *item); + void sp_variable_declarations_init(THD *thd, int nvars); + bool sp_variable_declarations_finalize(THD *thd, int nvars, + const Column_definition *cdef, + Item *def); + bool sp_variable_declarations_set_default(THD *thd, int nvars, Item *def); + bool sp_variable_declarations_row_finalize(THD *thd, int nvars, + Row_definition_list *row, + Item *def); + bool sp_variable_declarations_with_ref_finalize(THD *thd, int nvars, + Qualified_column_ident *col, + Item *def); + bool sp_variable_declarations_rowtype_finalize(THD *thd, int nvars, + Qualified_column_ident *, + Item *def); + bool sp_variable_declarations_cursor_rowtype_finalize(THD *thd, int nvars, + uint offset, + Item *def); + bool sp_variable_declarations_table_rowtype_finalize(THD *thd, int nvars, + const LEX_CSTRING &db, + const LEX_CSTRING &table, + Item *def); + bool sp_variable_declarations_column_type_finalize(THD *thd, int nvars, + Qualified_column_ident *ref, + Item *def); + bool sp_variable_declarations_vartype_finalize(THD *thd, int nvars, + const LEX_CSTRING &name, + Item *def); + bool sp_variable_declarations_copy_type_finalize(THD *thd, int nvars, + const Column_definition &ref, + Row_definition_list *fields, + Item *def); + + LEX_USER *current_user_for_set_password(THD *thd); + bool sp_create_set_password_instr(THD *thd, + LEX_USER *user, + USER_AUTH *auth, + bool no_lookahead); + bool sp_create_set_password_instr(THD *thd, + USER_AUTH *auth, + bool no_lookahead) + { + LEX_USER *user; + return !(user= current_user_for_set_password(thd)) || + sp_create_set_password_instr(thd, user, auth, no_lookahead); + } + + bool sp_handler_declaration_init(THD *thd, int type); + bool sp_handler_declaration_finalize(THD *thd, int type); + + bool sp_declare_cursor(THD *thd, const LEX_CSTRING *name, + class sp_lex_cursor *cursor_stmt, + sp_pcontext *param_ctx, bool add_cpush_instr); + + bool sp_open_cursor(THD *thd, const LEX_CSTRING *name, + List *parameters); + Item_splocal *create_item_for_sp_var(const Lex_ident_cli_st *name, + sp_variable *spvar); + + Item *create_item_qualified_asterisk(THD *thd, const Lex_ident_sys_st *name); + Item *create_item_qualified_asterisk(THD *thd, + const Lex_ident_sys_st *a, + const Lex_ident_sys_st *b); + Item *create_item_qualified_asterisk(THD *thd, const Lex_ident_cli_st *cname) + { + Lex_ident_sys name(thd, cname); + if (name.is_null()) + return NULL; // EOM + return create_item_qualified_asterisk(thd, &name); + } + Item *create_item_qualified_asterisk(THD *thd, + const Lex_ident_cli_st *ca, + const Lex_ident_cli_st *cb) + { + Lex_ident_sys a(thd, ca), b(thd, cb); + if (a.is_null() || b.is_null()) + return NULL; // EOM + return create_item_qualified_asterisk(thd, &a, &b); + } + + Item *create_item_ident_field(THD *thd, + const Lex_ident_sys_st &db, + const Lex_ident_sys_st &table, + const Lex_ident_sys_st &name); + Item *create_item_ident_nosp(THD *thd, Lex_ident_sys_st *name) + { + return create_item_ident_field(thd, Lex_ident_sys(), Lex_ident_sys(), *name); + } + Item *create_item_ident_sp(THD *thd, Lex_ident_sys_st *name, + const char *start, const char *end); + Item *create_item_ident(THD *thd, Lex_ident_cli_st *cname) + { + Lex_ident_sys name(thd, cname); + if (name.is_null()) + return NULL; // EOM + return sphead ? + create_item_ident_sp(thd, &name, cname->pos(), cname->end()) : + create_item_ident_nosp(thd, &name); + } + /* + Create an Item corresponding to a qualified name: a.b + when the parser is out of an SP context. + @param THD - THD, for mem_root + @param a - the first name + @param b - the second name + @retval - a pointer to a created item, or NULL on error. + + Possible Item types that can be created: + - Item_trigger_field + - Item_field + - Item_ref + */ + Item *create_item_ident_nospvar(THD *thd, + const Lex_ident_sys_st *a, + const Lex_ident_sys_st *b); + /* + Create an Item corresponding to a ROW field valiable: var.field + @param THD - THD, for mem_root + @param rh [OUT] - the rcontext handler (local vs package variables) + @param var - the ROW variable name + @param field - the ROW variable field name + @param spvar - the variable that was previously found by name + using "var_name". + @param start - position in the query (for binary log) + @param end - end in the query (for binary log) + */ + Item_splocal *create_item_spvar_row_field(THD *thd, + const Sp_rcontext_handler *rh, + const Lex_ident_sys *var, + const Lex_ident_sys *field, + sp_variable *spvar, + const char *start, + const char *end); + /* + Create an item from its qualified name. + Depending on context, it can be either a ROW variable field, + or trigger, table field, table field reference. + See comments to create_item_spvar_row_field() and + create_item_ident_nospvar(). + @param thd - THD, for mem_root + @param a - the first name + @param b - the second name + @retval - NULL on error, or a pointer to a new Item. + */ + Item *create_item_ident(THD *thd, + const Lex_ident_cli_st *a, + const Lex_ident_cli_st *b); + /* + Create an item from its qualified name. + Depending on context, it can be a table field, a table field reference, + or a sequence NEXTVAL and CURRVAL. + @param thd - THD, for mem_root + @param a - the first name + @param b - the second name + @param c - the third name + @retval - NULL on error, or a pointer to a new Item. + */ + Item *create_item_ident(THD *thd, + const Lex_ident_sys_st *a, + const Lex_ident_sys_st *b, + const Lex_ident_sys_st *c); + + Item *create_item_ident(THD *thd, + const Lex_ident_cli_st *ca, + const Lex_ident_cli_st *cb, + const Lex_ident_cli_st *cc) + { + Lex_ident_sys b(thd, cb), c(thd, cc); + if (b.is_null() || c.is_null()) + return NULL; + if (ca->pos() == cb->pos()) // SELECT .t1.col1 + { + DBUG_ASSERT(ca->length == 0); + Lex_ident_sys none; + return create_item_ident(thd, &none, &b, &c); + } + Lex_ident_sys a(thd, ca); + return a.is_null() ? NULL : create_item_ident(thd, &a, &b, &c); + } + + /* + Create an item for "NEXT VALUE FOR sequence_name" + */ + Item *create_item_func_nextval(THD *thd, Table_ident *ident); + Item *create_item_func_nextval(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *name); + /* + Create an item for "PREVIOUS VALUE FOR sequence_name" + */ + Item *create_item_func_lastval(THD *thd, Table_ident *ident); + Item *create_item_func_lastval(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *name); + + /* + Create an item for "SETVAL(sequence_name, value [, is_used [, round]]) + */ + Item *create_item_func_setval(THD *thd, Table_ident *ident, longlong value, + ulonglong round, bool is_used); + + /* + Create an item for a name in LIMIT clause: LIMIT var + @param THD - THD, for mem_root + @param var_name - the variable name + @retval - a new Item corresponding to the SP variable, + or NULL on error + (non in SP, unknown variable, wrong data type). + */ + Item *create_item_limit(THD *thd, const Lex_ident_cli_st *var_name); + + /* + Create an item for a qualified name in LIMIT clause: LIMIT var.field + @param THD - THD, for mem_root + @param var_name - the variable name + @param field_name - the variable field name + @param start - start in the query (for binary log) + @param end - end in the query (for binary log) + @retval - a new Item corresponding to the SP variable, + or NULL on error + (non in SP, unknown variable, unknown ROW field, + wrong data type). + */ + Item *create_item_limit(THD *thd, + const Lex_ident_cli_st *var_name, + const Lex_ident_cli_st *field_name); + + Item *create_item_query_expression(THD *thd, st_select_lex_unit *unit); + + Item *make_item_func_sysdate(THD *thd, uint fsp); + Item *make_item_func_call_generic(THD *thd, Lex_ident_cli_st *db, + Lex_ident_cli_st *name, List *args); + Item *make_item_func_call_generic(THD *thd, + Lex_ident_cli_st *db, + Lex_ident_cli_st *pkg, + Lex_ident_cli_st *name, + List *args); + Item *make_item_func_call_native_or_parse_error(THD *thd, + Lex_ident_cli_st &name, + List *args); + my_var *create_outvar(THD *thd, const LEX_CSTRING *name); + + /* + Create a my_var instance for a ROW field variable that was used + as an OUT SP parameter: CALL p1(var.field); + @param THD - THD, for mem_root + @param var_name - the variable name + @param field_name - the variable field name + */ + my_var *create_outvar(THD *thd, + const LEX_CSTRING *var_name, + const LEX_CSTRING *field_name); + + bool is_trigger_new_or_old_reference(const LEX_CSTRING *name) const; + + Item *create_and_link_Item_trigger_field(THD *thd, const LEX_CSTRING *name, + bool new_row); + // For syntax with colon, e.g. :NEW.a or :OLD.a + Item *make_item_colon_ident_ident(THD *thd, + const Lex_ident_cli_st *a, + const Lex_ident_cli_st *b); + // PLSQL: cursor%ISOPEN etc + Item *make_item_plsql_cursor_attr(THD *thd, const LEX_CSTRING *name, + plsql_cursor_attr_t attr); + + // For "SELECT @@var", "SELECT @@var.field" + Item *make_item_sysvar(THD *thd, + enum_var_type type, + const LEX_CSTRING *name) + { + return make_item_sysvar(thd, type, name, &null_clex_str); + } + Item *make_item_sysvar(THD *thd, + enum_var_type type, + const LEX_CSTRING *name, + const LEX_CSTRING *component); + void sp_block_init(THD *thd, const LEX_CSTRING *label); + void sp_block_init(THD *thd) + { + // Unlabeled blocks get an empty label + sp_block_init(thd, &empty_clex_str); + } + bool sp_block_finalize(THD *thd, const Lex_spblock_st spblock) + { + class sp_label *tmp; + return sp_block_finalize(thd, spblock, &tmp); + } + bool sp_block_finalize(THD *thd) + { + return sp_block_finalize(thd, Lex_spblock()); + } + bool sp_block_finalize(THD *thd, const Lex_spblock_st spblock, + const LEX_CSTRING *end_label); + bool sp_block_finalize(THD *thd, const LEX_CSTRING *end_label) + { + return sp_block_finalize(thd, Lex_spblock(), end_label); + } + bool sp_declarations_join(Lex_spblock_st *res, + const Lex_spblock_st b1, + const Lex_spblock_st b2) const + { + if ((b2.vars || b2.conds) && (b1.curs || b1.hndlrs)) + { + my_error(ER_SP_VARCOND_AFTER_CURSHNDLR, MYF(0)); + return true; + } + if (b2.curs && b1.hndlrs) + { + my_error(ER_SP_CURSOR_AFTER_HANDLER, MYF(0)); + return true; + } + res->join(b1, b2); + return false; + } + bool sp_block_with_exceptions_finalize_declarations(THD *thd); + bool sp_block_with_exceptions_finalize_executable_section(THD *thd, + uint executable_section_ip); + bool sp_block_with_exceptions_finalize_exceptions(THD *thd, + uint executable_section_ip, + uint exception_count); + bool sp_block_with_exceptions_add_empty(THD *thd); + bool sp_exit_statement(THD *thd, Item *when); + bool sp_exit_statement(THD *thd, const LEX_CSTRING *label_name, Item *item); + bool sp_leave_statement(THD *thd, const LEX_CSTRING *label_name); + bool sp_goto_statement(THD *thd, const LEX_CSTRING *label_name); + + bool sp_continue_statement(THD *thd); + bool sp_continue_statement(THD *thd, const LEX_CSTRING *label_name); + bool sp_iterate_statement(THD *thd, const LEX_CSTRING *label_name); + + bool maybe_start_compound_statement(THD *thd); + bool sp_push_loop_label(THD *thd, const LEX_CSTRING *label_name); + bool sp_push_loop_empty_label(THD *thd); + bool sp_pop_loop_label(THD *thd, const LEX_CSTRING *label_name); + void sp_pop_loop_empty_label(THD *thd); + bool sp_while_loop_expression(THD *thd, Item *expr); + bool sp_while_loop_finalize(THD *thd); + bool sp_if_after_statements(THD *thd); + bool sp_push_goto_label(THD *thd, const LEX_CSTRING *label_name); + + Item_param *add_placeholder(THD *thd, const LEX_CSTRING *name, + const char *start, const char *end); + + /* Integer range FOR LOOP methods */ + sp_variable *sp_add_for_loop_variable(THD *thd, const LEX_CSTRING *name, + Item *value); + sp_variable *sp_add_for_loop_target_bound(THD *thd, Item *value) + { + LEX_CSTRING name= { STRING_WITH_LEN("[target_bound]") }; + return sp_add_for_loop_variable(thd, &name, value); + } + bool sp_for_loop_intrange_declarations(THD *thd, Lex_for_loop_st *loop, + const LEX_CSTRING *index, + const Lex_for_loop_bounds_st &bounds); + bool sp_for_loop_intrange_condition_test(THD *thd, const Lex_for_loop_st &loop); + bool sp_for_loop_intrange_iterate(THD *thd, const Lex_for_loop_st &loop); + + /* Cursor FOR LOOP methods */ + bool sp_for_loop_cursor_declarations(THD *thd, Lex_for_loop_st *loop, + const LEX_CSTRING *index, + const Lex_for_loop_bounds_st &bounds); + sp_variable *sp_add_for_loop_cursor_variable(THD *thd, + const LEX_CSTRING *name, + const class sp_pcursor *cur, + uint coffset, + sp_assignment_lex *param_lex, + Item_args *parameters); + bool sp_for_loop_implicit_cursor_statement(THD *thd, + Lex_for_loop_bounds_st *bounds, + sp_lex_cursor *cur); + bool sp_for_loop_cursor_condition_test(THD *thd, const Lex_for_loop_st &loop); + bool sp_for_loop_cursor_iterate(THD *thd, const Lex_for_loop_st &); + + /* Generic FOR LOOP methods*/ + + /* + Generate FOR loop declarations and + initialize "loop" from "index" and "bounds". + + @param [IN] thd - current THD, for mem_root and error reporting + @param [OUT] loop - the loop generated SP variables are stored here, + together with additional loop characteristics. + @param [IN] index - the loop index variable name + @param [IN] bounds - the loop bounds (in sp_assignment_lex format) + and additional loop characteristics, + as created by the sp_for_loop_bounds rule. + @retval true - on error + @retval false - on success + + This methods adds declarations: + - An explicit integer or cursor%ROWTYPE "index" variable + - An implicit integer upper bound variable, in case of integer range loops + - A CURSOR, in case of an implicit CURSOR loops + The generated variables are stored into "loop". + Additional loop characteristics are copied from "bounds" to "loop". + */ + bool sp_for_loop_declarations(THD *thd, Lex_for_loop_st *loop, + const LEX_CSTRING *index, + const Lex_for_loop_bounds_st &bounds) + { + return bounds.is_for_loop_cursor() ? + sp_for_loop_cursor_declarations(thd, loop, index, bounds) : + sp_for_loop_intrange_declarations(thd, loop, index, bounds); + } + + /* + Generate a conditional jump instruction to leave the loop, + using a proper condition depending on the loop type: + - Item_func_le -- integer range loops + - Item_func_ge -- integer range reverse loops + - Item_func_cursor_found -- cursor loops + */ + bool sp_for_loop_condition_test(THD *thd, const Lex_for_loop_st &loop) + { + return loop.is_for_loop_cursor() ? + sp_for_loop_cursor_condition_test(thd, loop) : + sp_for_loop_intrange_condition_test(thd, loop); + } + + /* + Generate "increment" instructions followed by a jump to the + condition test in the beginnig of the loop. + "Increment" depends on the loop type and can be: + - index:= index + 1; -- integer range loops + - index:= index - 1; -- integer range reverse loops + - FETCH cursor INTO index; -- cursor loops + */ + bool sp_for_loop_finalize(THD *thd, const Lex_for_loop_st &loop) + { + if (loop.is_for_loop_cursor() ? + sp_for_loop_cursor_iterate(thd, loop) : + sp_for_loop_intrange_iterate(thd, loop)) + return true; + // Generate a jump to the beginning of the loop + return sp_while_loop_finalize(thd); + } + bool sp_for_loop_outer_block_finalize(THD *thd, const Lex_for_loop_st &loop); + + /* + Make an Item when an identifier is found in the FOR loop bounds: + FOR rec IN cursor + FOR rec IN var1 .. var2 + FOR rec IN row1.field1 .. xxx + */ + Item *create_item_for_loop_bound(THD *thd, + const LEX_CSTRING *a, + const LEX_CSTRING *b, + const LEX_CSTRING *c); + /* End of FOR LOOP methods */ + + bool add_signal_statement(THD *thd, const class sp_condition_value *value); + bool add_resignal_statement(THD *thd, const class sp_condition_value *value); + + // Check if "KEY IF NOT EXISTS name" used outside of ALTER context + bool check_add_key(DDL_options_st ddl) + { + if (ddl.if_not_exists() && sql_command != SQLCOM_ALTER_TABLE) + { + parse_error(); + return true; + } + return false; + } + // Add a key as a part of CREATE TABLE or ALTER TABLE + bool add_key(Key::Keytype key_type, const LEX_CSTRING *key_name, + ha_key_alg algorithm, DDL_options_st ddl) + { + if (check_add_key(ddl) || + !(last_key= new Key(key_type, key_name, algorithm, false, ddl))) + return true; + alter_info.key_list.push_back(last_key); + return false; + } + // Add a key for a CREATE INDEX statement + bool add_create_index(Key::Keytype key_type, const LEX_CSTRING *key_name, + ha_key_alg algorithm, DDL_options_st ddl) + { + if (check_create_options(ddl) || + !(last_key= new Key(key_type, key_name, algorithm, false, ddl))) + return true; + alter_info.key_list.push_back(last_key); + return false; + } + bool add_create_index_prepare(Table_ident *table) + { + sql_command= SQLCOM_CREATE_INDEX; + if (!current_select->add_table_to_list(thd, table, NULL, + TL_OPTION_UPDATING, + TL_READ_NO_INSERT, + MDL_SHARED_UPGRADABLE)) + return true; + alter_info.reset(); + alter_info.flags= ALTER_ADD_INDEX; + option_list= NULL; + return false; + } + /* + Add an UNIQUE or PRIMARY key which is a part of a column definition: + CREATE TABLE t1 (a INT PRIMARY KEY); + */ + void add_key_to_list(LEX_CSTRING *field_name, + enum Key::Keytype type, bool check_exists); + // Add a constraint as a part of CREATE TABLE or ALTER TABLE + bool add_constraint(const LEX_CSTRING &name, Virtual_column_info *constr, + bool if_not_exists) + { + constr->name= name; + constr->if_not_exists= if_not_exists; + alter_info.check_constraint_list.push_back(constr); + return false; + } + bool add_alter_list(LEX_CSTRING par_name, Virtual_column_info *expr, + bool par_exists); + bool add_alter_list(LEX_CSTRING name, LEX_CSTRING new_name, bool exists); + bool add_alter_list_item_convert_to_charset(CHARSET_INFO *cs) + { + if (create_info.add_table_option_convert_charset(cs)) + return true; + alter_info.flags|= ALTER_CONVERT_TO; + return false; + } + bool + add_alter_list_item_convert_to_charset(CHARSET_INFO *cs, + const Lex_extended_collation_st &cl) + { + if (create_info.add_table_option_convert_charset(cs) || + create_info.add_table_option_convert_collation(cl)) + return true; + alter_info.flags|= ALTER_CONVERT_TO; + return false; + } + void set_command(enum_sql_command command, + DDL_options_st options) + { + sql_command= command; + create_info.set(options); + } + void set_command(enum_sql_command command, + uint scope, + DDL_options_st options) + { + set_command(command, options); + create_info.options|= scope; // HA_LEX_CREATE_TMP_TABLE or 0 + } + bool check_create_options(DDL_options_st options) + { + if (options.or_replace() && options.if_not_exists()) + { + my_error(ER_WRONG_USAGE, MYF(0), "OR REPLACE", "IF NOT EXISTS"); + return true; + } + return false; + } + bool set_create_options_with_check(DDL_options_st options) + { + create_info.set(options); + return check_create_options(create_info); + } + bool add_create_options_with_check(DDL_options_st options) + { + create_info.add(options); + return check_create_options(create_info); + } + bool sp_add_cfetch(THD *thd, const LEX_CSTRING *name); + bool sp_add_agg_cfetch(); + + bool set_command_with_check(enum_sql_command command, + uint scope, + DDL_options_st options) + { + set_command(command, scope, options); + return check_create_options(options); + } + bool set_command_with_check(enum_sql_command command, DDL_options_st options) + { + set_command(command, options); + return check_create_options(options); + } + /* + DROP shares lex->create_info to store TEMPORARY and IF EXISTS options + to save on extra initialization in lex_start(). + Add some wrappers, to avoid direct use of lex->create_info in the + caller code processing DROP statements (which might look confusing). + */ + bool tmp_table() const { return create_info.tmp_table(); } + bool if_exists() const { return create_info.if_exists(); } + + /* + Run specified phases for derived tables/views in the given list + + @param table_list - list of derived tables/view to handle + @param phase - phases to process tables/views through + + @details + This method runs phases specified by the 'phases' on derived + tables/views found in the 'table_list' with help of the + TABLE_LIST::handle_derived function. + 'this' is passed as an argument to the TABLE_LIST::handle_derived. + + @return false - ok + @return true - error + */ + bool handle_list_of_derived(TABLE_LIST *table_list, uint phases) + { + for (TABLE_LIST *tl= table_list; tl; tl= tl->next_local) + { + if (tl->is_view_or_derived() && tl->handle_derived(this, phases)) + return true; + } + return false; + } + + bool create_like() const + { + DBUG_ASSERT(!create_info.like() || + !first_select_lex()->item_list.elements); + return create_info.like(); + } + + bool create_select() const + { + DBUG_ASSERT(!create_info.like() || + !first_select_lex()->item_list.elements); + return first_select_lex()->item_list.elements; + } + + bool create_simple() const + { + return !create_like() && !create_select(); + } + + SELECT_LEX *exclude_last_select(); + SELECT_LEX *exclude_not_first_select(SELECT_LEX *exclude); + void check_automatic_up(enum sub_select_type type); + bool create_or_alter_view_finalize(THD *thd, Table_ident *table_ident); + bool add_alter_view(THD *thd, uint16 algorithm, enum_view_suid suid, + Table_ident *table_ident); + bool add_create_view(THD *thd, DDL_options_st ddl, + uint16 algorithm, enum_view_suid suid, + Table_ident *table_ident); + bool add_grant_command(THD *thd, const List &columns); + + bool stmt_grant_table(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident, + privilege_t grant_option); + + bool stmt_revoke_table(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident); + + bool stmt_grant_sp(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident, + const Sp_handler &sph, + privilege_t grant_option); + + bool stmt_revoke_sp(THD *thd, + Grant_privilege *grant, + const Lex_grant_object_name &ident, + const Sp_handler &sph); + + bool stmt_grant_proxy(THD *thd, LEX_USER *user, privilege_t grant_option); + bool stmt_revoke_proxy(THD *thd, LEX_USER *user); + + Vers_parse_info &vers_get_info() + { + return create_info.vers_info; + } + + /* The list of history-generating DML commands */ + bool vers_history_generating() const + { + switch (sql_command) + { + case SQLCOM_DELETE: + return !vers_conditions.delete_history; + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_DELETE_MULTI: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + return true; + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + return duplicates == DUP_UPDATE; + case SQLCOM_LOAD: + return duplicates == DUP_REPLACE; + default: + return false; + } + } + + int add_period(Lex_ident name, Lex_ident_sys_st start, Lex_ident_sys_st end) + { + if (check_period_name(name.str)) { + my_error(ER_WRONG_COLUMN_NAME, MYF(0), name.str); + return 1; + } + + if (lex_string_cmp(system_charset_info, &start, &end) == 0) + { + my_error(ER_FIELD_SPECIFIED_TWICE, MYF(0), start.str); + return 1; + } + + Table_period_info &info= create_info.period_info; + + if (check_exists && info.name.streq(name)) + return 0; + + if (info.is_set()) + { + my_error(ER_MORE_THAN_ONE_PERIOD, MYF(0)); + return 1; + } + info.set_period(start, end); + info.name= name; + + info.constr= new Virtual_column_info(); + info.constr->expr= lt_creator.create(thd, + create_item_ident_nosp(thd, &start), + create_item_ident_nosp(thd, &end)); + add_constraint(null_clex_str, info.constr, false); + return 0; + } + + sp_package *get_sp_package() const; + + /** + Check if the select is a simple select (not an union). + @retval + 0 ok + @retval + 1 error ; In this case the error messege is sent to the client + */ + bool check_simple_select(const LEX_CSTRING *option) + { + if (current_select != &builtin_select) + { + char command[80]; + strmake(command, option->str, MY_MIN(option->length, sizeof(command)-1)); + my_error(ER_CANT_USE_OPTION_HERE, MYF(0), command); + return true; + } + return false; + } + + SELECT_LEX_UNIT *alloc_unit(); + SELECT_LEX *alloc_select(bool is_select); + SELECT_LEX_UNIT *create_unit(SELECT_LEX*); + SELECT_LEX *wrap_unit_into_derived(SELECT_LEX_UNIT *unit); + SELECT_LEX *wrap_select_chain_into_derived(SELECT_LEX *sel); + void init_select() + { + current_select->init_select(); + wild= 0; + exchange= 0; + } + bool main_select_push(bool service= false); + bool insert_select_hack(SELECT_LEX *sel); + SELECT_LEX *create_priority_nest(SELECT_LEX *first_in_nest); + + bool set_main_unit(st_select_lex_unit *u) + { + unit.options= u->options; + unit.uncacheable= u->uncacheable; + unit.register_select_chain(u->first_select()); + unit.first_select()->options|= builtin_select.options; + unit.fake_select_lex= u->fake_select_lex; + unit.union_distinct= u->union_distinct; + unit.set_with_clause(u->with_clause); + builtin_select.exclude_from_global(); + return false; + } + bool check_main_unit_semantics(); + + SELECT_LEX_UNIT *parsed_select_expr_start(SELECT_LEX *s1, SELECT_LEX *s2, + enum sub_select_type unit_type, + bool distinct); + SELECT_LEX_UNIT *parsed_select_expr_cont(SELECT_LEX_UNIT *unit, + SELECT_LEX *s2, + enum sub_select_type unit_type, + bool distinct, bool oracle); + bool parsed_multi_operand_query_expression_body(SELECT_LEX_UNIT *unit); + SELECT_LEX_UNIT *add_tail_to_query_expression_body(SELECT_LEX_UNIT *unit, + Lex_order_limit_lock *l); + SELECT_LEX_UNIT * + add_tail_to_query_expression_body_ext_parens(SELECT_LEX_UNIT *unit, + Lex_order_limit_lock *l); + SELECT_LEX_UNIT *parsed_body_ext_parens_primary(SELECT_LEX_UNIT *unit, + SELECT_LEX *primary, + enum sub_select_type unit_type, + bool distinct); + SELECT_LEX_UNIT * + add_primary_to_query_expression_body(SELECT_LEX_UNIT *unit, + SELECT_LEX *sel, + enum sub_select_type unit_type, + bool distinct, + bool oracle); + SELECT_LEX_UNIT * + add_primary_to_query_expression_body(SELECT_LEX_UNIT *unit, + SELECT_LEX *sel, + enum sub_select_type unit_type, + bool distinct); + SELECT_LEX_UNIT * + add_primary_to_query_expression_body_ext_parens( + SELECT_LEX_UNIT *unit, + SELECT_LEX *sel, + enum sub_select_type unit_type, + bool distinct); + SELECT_LEX *parsed_subselect(SELECT_LEX_UNIT *unit); + bool parsed_insert_select(SELECT_LEX *firs_select); + void save_values_list_state(); + void restore_values_list_state(); + bool parsed_TVC_start(); + SELECT_LEX *parsed_TVC_end(); + TABLE_LIST *parsed_derived_table(SELECT_LEX_UNIT *unit, + int for_system_time, + LEX_CSTRING *alias); + bool parsed_create_view(SELECT_LEX_UNIT *unit, int check); + bool select_finalize(st_select_lex_unit *expr); + bool select_finalize(st_select_lex_unit *expr, Lex_select_lock l); + void relink_hack(st_select_lex *select_lex); + + bool stmt_install_plugin(const DDL_options_st &opt, + const Lex_ident_sys_st &name, + const LEX_CSTRING &soname); + void stmt_install_plugin(const LEX_CSTRING &soname); + + bool stmt_uninstall_plugin_by_name(const DDL_options_st &opt, + const Lex_ident_sys_st &name); + bool stmt_uninstall_plugin_by_soname(const DDL_options_st &opt, + const LEX_CSTRING &soname); + bool stmt_prepare_validate(const char *stmt_type); + bool stmt_prepare(const Lex_ident_sys_st &ident, Item *code); + bool stmt_execute(const Lex_ident_sys_st &ident, List *params); + bool stmt_execute_immediate(Item *code, List *params); + void stmt_deallocate_prepare(const Lex_ident_sys_st &ident); + + bool stmt_alter_table_exchange_partition(Table_ident *table); + bool stmt_alter_table(Table_ident *table); + + void stmt_purge_to(const LEX_CSTRING &to); + bool stmt_purge_before(Item *item); + + SELECT_LEX *returning() + { return &builtin_select; } + bool has_returning() + { return !builtin_select.item_list.is_empty(); } + +private: + bool stmt_create_routine_start(const DDL_options_st &options) + { + create_info.set(options); + return main_select_push() || check_create_options(options); + } +public: + bool stmt_create_function_start(const DDL_options_st &options) + { + sql_command= SQLCOM_CREATE_SPFUNCTION; + return stmt_create_routine_start(options); + } + bool stmt_create_procedure_start(const DDL_options_st &options) + { + sql_command= SQLCOM_CREATE_PROCEDURE; + return stmt_create_routine_start(options); + } + void stmt_create_routine_finalize() + { + pop_select(); // main select + } + + bool stmt_create_stored_function_start(const DDL_options_st &options, + enum_sp_aggregate_type, + const sp_name *name); + bool stmt_create_stored_function_finalize_standalone(const sp_name *end_name); + + bool stmt_create_udf_function(const DDL_options_st &options, + enum_sp_aggregate_type agg_type, + const Lex_ident_sys_st &name, + Item_result return_type, + const LEX_CSTRING &soname); + + bool stmt_drop_function(const DDL_options_st &options, + const Lex_ident_sys_st &db, + const Lex_ident_sys_st &name); + + bool stmt_drop_function(const DDL_options_st &options, + const Lex_ident_sys_st &name); + + bool stmt_drop_procedure(const DDL_options_st &options, + sp_name *name); + + bool stmt_alter_function_start(sp_name *name); + bool stmt_alter_procedure_start(sp_name *name); + + sp_condition_value *stmt_signal_value(const Lex_ident_sys_st &ident); + + Spvar_definition *row_field_name(THD *thd, const Lex_ident_sys_st &name); + + bool set_field_type_udt(Lex_field_type_st *type, + const LEX_CSTRING &name, + const Lex_length_and_dec_st &attr); + bool set_cast_type_udt(Lex_cast_type_st *type, + const LEX_CSTRING &name); + + bool map_data_type(const Lex_ident_sys_st &schema, + Lex_field_type_st *type) const; + + void mark_first_table_as_inserting(); + + bool fields_are_impossible() + { + // no select or it is last select with no tables (service select) + return !select_stack_head() || + (select_stack_top == 1 && + select_stack[0]->is_service_select); + } + + bool add_table_foreign_key(const LEX_CSTRING *name, + const LEX_CSTRING *constraint_name, + Table_ident *table_name, + DDL_options ddl_options); + bool add_column_foreign_key(const LEX_CSTRING *name, + const LEX_CSTRING *constraint_name, + Table_ident *ref_table_name, + DDL_options ddl_options); + + bool check_dependencies_in_with_clauses(); + bool check_cte_dependencies_and_resolve_references(); + bool resolve_references_to_cte(TABLE_LIST *tables, + TABLE_LIST **tables_last); + + /** + Turn on the SELECT_DESCRIBE flag for every SELECT_LEX involved into + the statement being processed in case the statement is EXPLAIN UPDATE/DELETE. + + @param lex current LEX + */ + + void promote_select_describe_flag_if_needed() + { + if (describe) + builtin_select.options |= SELECT_DESCRIBE; + } + +}; + + +/** + Set_signal_information is a container used in the parsed tree to represent + the collection of assignments to condition items in the SIGNAL and RESIGNAL + statements. +*/ +class Set_signal_information +{ +public: + /** Empty default constructor, use clear() */ + Set_signal_information() = default; + + /** Copy constructor. */ + Set_signal_information(const Set_signal_information& set); + + /** Destructor. */ + ~Set_signal_information() = default; + + /** Clear all items. */ + void clear(); + + /** + For each condition item assignment, m_item[] contains the parsed tree + that represents the expression assigned, if any. + m_item[] is an array indexed by Diag_condition_item_name. + */ + Item *m_item[LAST_DIAG_SET_PROPERTY+1]; +}; + + +/** + The internal state of the syntax parser. + This object is only available during parsing, + and is private to the syntax parser implementation (sql_yacc.yy). +*/ +class Yacc_state +{ +public: + Yacc_state() : yacc_yyss(NULL), yacc_yyvs(NULL) { reset(); } + + void reset() + { + if (yacc_yyss != NULL) { + my_free(yacc_yyss); + yacc_yyss = NULL; + } + if (yacc_yyvs != NULL) { + my_free(yacc_yyvs); + yacc_yyvs = NULL; + } + m_set_signal_info.clear(); + m_lock_type= TL_READ_DEFAULT; + m_mdl_type= MDL_SHARED_READ; + } + + ~Yacc_state(); + + /** + Reset part of the state which needs resetting before parsing + substatement. + */ + void reset_before_substatement() + { + m_lock_type= TL_READ_DEFAULT; + m_mdl_type= MDL_SHARED_READ; + } + + /** + Bison internal state stack, yyss, when dynamically allocated using + my_yyoverflow(). + */ + uchar *yacc_yyss; + + /** + Bison internal semantic value stack, yyvs, when dynamically allocated using + my_yyoverflow(). + */ + uchar *yacc_yyvs; + + /** + Fragments of parsed tree, + used during the parsing of SIGNAL and RESIGNAL. + */ + Set_signal_information m_set_signal_info; + + /** + Type of lock to be used for tables being added to the statement's + table list in table_factor, table_alias_ref, single_multi and + table_wild_one rules. + Statements which use these rules but require lock type different + from one specified by this member have to override it by using + st_select_lex::set_lock_for_tables() method. + + The default value of this member is TL_READ_DEFAULT. The only two + cases in which we change it are: + - When parsing SELECT HIGH_PRIORITY. + - Rule for DELETE. In which we use this member to pass information + about type of lock from delete to single_multi part of rule. + + We should try to avoid introducing new use cases as we would like + to get rid of this member eventually. + */ + thr_lock_type m_lock_type; + + /** + The type of requested metadata lock for tables added to + the statement table list. + */ + enum_mdl_type m_mdl_type; + + /* + TODO: move more attributes from the LEX structure here. + */ +}; + +/** + Internal state of the parser. + The complete state consist of: + - state data used during lexical parsing, + - state data used during syntactic parsing. +*/ +class Parser_state +{ +public: + Parser_state() + : m_yacc() + {} + + /** + Object initializer. Must be called before usage. + + @retval FALSE OK + @retval TRUE Error + */ + bool init(THD *thd, char *buff, size_t length) + { + return m_lip.init(thd, buff, length); + } + + ~Parser_state() = default; + + Lex_input_stream m_lip; + Yacc_state m_yacc; + + /** + Current performance digest instrumentation. + */ + PSI_digest_locker* m_digest_psi; + + void reset(char *found_semicolon, unsigned int length) + { + m_lip.reset(found_semicolon, length); + m_yacc.reset(); + } +}; + + +extern sql_digest_state * +digest_add_token(sql_digest_state *state, uint token, LEX_YYSTYPE yylval); + +extern sql_digest_state * +digest_reduce_token(sql_digest_state *state, uint token_left, uint token_right); + +struct st_lex_local: public LEX, public Sql_alloc +{ +}; + + +/** + An st_lex_local extension with automatic initialization for SP purposes. + Used to parse sub-expressions and SP sub-statements. + + This class is reused for: + 1. sp_head::reset_lex() based constructs + - SP variable assignments (e.g. SET x=10;) + - FOR loop conditions and index variable increments + - Cursor statements + - SP statements + - SP function RETURN statements + - CASE statements + - REPEAT..UNTIL expressions + - WHILE expressions + - EXIT..WHEN and CONTINUE..WHEN statements + 2. sp_assignment_lex based constructs: + - CURSOR parameter assignments +*/ +class sp_lex_local: public st_lex_local +{ +public: + sp_lex_local(THD *thd, const LEX *oldlex) + { + /* Reset most stuff. */ + start(thd); + /* Keep the parent SP stuff */ + sphead= oldlex->sphead; + spcont= oldlex->spcont; + /* Keep the parent trigger stuff too */ + trg_chistics= oldlex->trg_chistics; + trg_table_fields.empty(); + sp_lex_in_use= false; + } +}; + + +class sp_lex_set_var: public sp_lex_local +{ +public: + sp_lex_set_var(THD *thd, const LEX *oldlex) + :sp_lex_local(thd, oldlex) + { + // Set new LEX as if we at start of set rule + init_select(); + sql_command= SQLCOM_SET_OPTION; + var_list.empty(); + autocommit= 0; + option_type= oldlex->option_type; // Inherit from the outer lex + } +}; + + +class sp_expr_lex: public sp_lex_local +{ + Item *m_item; // The expression +public: + sp_expr_lex(THD *thd, LEX *oldlex) + :sp_lex_local(thd, oldlex), + m_item(NULL) + { } + void set_item(Item *item) + { + m_item= item; + } + Item *get_item() const + { + return m_item; + } + bool sp_continue_when_statement(THD *thd); + bool sp_continue_when_statement(THD *thd, const LEX_CSTRING *label_name); + int case_stmt_action_expr(); + int case_stmt_action_when(bool simple); + bool sp_while_loop_expression(THD *thd) + { + return LEX::sp_while_loop_expression(thd, get_item()); + } + bool sp_repeat_loop_finalize(THD *thd); + bool sp_if_expr(THD *thd); +}; + + +/** + An assignment specific LEX, which additionally has an Item (an expression) + and an associated with the Item free_list, which is usually freed + after the expression is calculated. + + Note, consider changing some of sp_lex_local to sp_assignment_lex, + as the latter allows to use a simpler grammar in sql_yacc.yy (IMO). + + If the expression is simple (e.g. does not have function calls), + then m_item and m_free_list point to the same Item. + + If the expressions is complex (e.g. have function calls), + then m_item points to the leftmost Item, while m_free_list points + to the rightmost item. + For example: + f1(COALESCE(f2(10), f2(20))) + - m_item points to Item_func_sp for f1 (the leftmost Item) + - m_free_list points to Item_int for 20 (the rightmost Item) + + Note, we could avoid storing m_item at all, as we can always reach + the leftmost item from the rightmost item by iterating through m_free_list. + But with a separate m_item the code should be faster. +*/ +class sp_assignment_lex: public sp_lex_local +{ + Item *m_item; // The expression + Item *m_free_list; // The associated free_list (sub-expressions) +public: + sp_assignment_lex(THD *thd, LEX *oldlex) + :sp_lex_local(thd, oldlex), + m_item(NULL), + m_free_list(NULL) + { } + void set_item_and_free_list(Item *item, Item *free_list) + { + m_item= item; + m_free_list= free_list; + } + Item *get_item() const + { + return m_item; + } + Item *get_free_list() const + { + return m_free_list; + } +}; + + +extern void lex_init(void); +extern void lex_free(void); +extern void lex_start(THD *thd); +extern void lex_end(LEX *lex); +extern void lex_end_nops(LEX *lex); +extern void lex_unlock_plugins(LEX *lex); +void end_lex_with_single_table(THD *thd, TABLE *table, LEX *old_lex); +int init_lex_with_single_table(THD *thd, TABLE *table, LEX *lex); +extern int MYSQLlex(union YYSTYPE *yylval, THD *thd); +extern int ORAlex(union YYSTYPE *yylval, THD *thd); + +inline void trim_whitespace(CHARSET_INFO *cs, LEX_CSTRING *str, + size_t * prefix_length = 0) +{ + *str= Lex_cstring(*str).trim_whitespace(cs, prefix_length); +} + + +extern bool is_lex_native_function(const LEX_CSTRING *name); +extern bool is_native_function(THD *thd, const LEX_CSTRING *name); +extern bool is_native_function_with_warn(THD *thd, const LEX_CSTRING *name); + +/** + @} (End of group Semantic_Analysis) +*/ + +void my_missing_function_error(const LEX_CSTRING &token, const char *name); +bool is_keyword(const char *name, uint len); +int set_statement_var_if_exists(THD *thd, const char *var_name, + size_t var_name_length, ulonglong value); + +Virtual_column_info *add_virtual_expression(THD *thd, Item *expr); +Item* handle_sql2003_note184_exception(THD *thd, Item* left, bool equal, + Item *expr); + +bool sp_create_assignment_lex(THD *thd, const char *pos); +bool sp_create_assignment_instr(THD *thd, bool no_lookahead, + bool need_set_keyword= true); + +void mark_or_conds_to_avoid_pushdown(Item *cond); + +#endif /* MYSQL_SERVER */ +#endif /* SQL_LEX_INCLUDED */ diff --git a/sql/sql_lifo_buffer.h b/sql/sql_lifo_buffer.h new file mode 100644 index 00000000..2d648271 --- /dev/null +++ b/sql/sql_lifo_buffer.h @@ -0,0 +1,359 @@ +/* + Copyright (c) 2010, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @defgroup Bi-directional LIFO buffers used by DS-MRR implementation + @{ +*/ + +class Forward_lifo_buffer; +class Backward_lifo_buffer; + + +/* + A base class for in-memory buffer used by DS-MRR implementation. Common + properties: + - The buffer is last-in-first-out, i.e. elements that are written last are + read first. + - The buffer contains fixed-size elements. The elements are either atomic + byte sequences or pairs of them. + - The buffer resides in the memory provided by the user. It is possible to + = dynamically (ie. between write operations) add ajacent memory space to + the buffer + = dynamically remove unused space from the buffer. + The intent of this is to allow to have two buffers on adjacent memory + space, one is being read from (and so its space shrinks), while the other + is being written to (and so it needs more and more space). + + There are two concrete classes, Forward_lifo_buffer and Backward_lifo_buffer. +*/ + +class Lifo_buffer +{ +protected: + size_t size1; + size_t size2; + +public: + /** + write() will put into buffer size1 bytes pointed by write_ptr1. If + size2!=0, then they will be accompanied by size2 bytes pointed by + write_ptr2. + */ + uchar *write_ptr1; + uchar *write_ptr2; + + /** + read() will do reading by storing pointers to read data into read_ptr1 or + into (read_ptr1, read_ptr2), depending on whether the buffer was set to + store single objects or pairs. + */ + uchar *read_ptr1; + uchar *read_ptr2; + +protected: + uchar *start; /**< points to start of buffer space */ + uchar *end; /**< points to just beyond the end of buffer space */ +public: + + enum enum_direction { + BACKWARD=-1, /**< buffer is filled/read from bigger to smaller memory addresses */ + FORWARD=1 /**< buffer is filled/read from smaller to bigger memory addresses */ + }; + + virtual enum_direction type() = 0; + + /* Buffer space control functions */ + + /** Let the buffer store data in the given space. */ + void set_buffer_space(uchar *start_arg, uchar *end_arg) + { + start= start_arg; + end= end_arg; + if (end != start) + TRASH_ALLOC(start, size_t(end - start)); + reset(); + } + + /** + Specify where write() should get the source data from, as well as source + data size. + */ + void setup_writing(size_t len1, size_t len2) + { + size1= len1; + size2= len2; + } + + /** + Specify where read() should store pointers to read data, as well as read + data size. The sizes must match those passed to setup_writing(). + */ + void setup_reading(size_t len1, size_t len2) + { + DBUG_ASSERT(len1 == size1); + DBUG_ASSERT(len2 == size2); + } + + bool can_write() + { + return have_space_for(size1 + size2); + } + virtual void write() = 0; + + bool is_empty() { return used_size() == 0; } + virtual bool read() = 0; + + void sort(qsort2_cmp cmp_func, void *cmp_func_arg) + { + size_t elem_size= size1 + size2; + size_t n_elements= used_size() / elem_size; + my_qsort2(used_area(), n_elements, elem_size, cmp_func, cmp_func_arg); + } + + virtual void reset() = 0; + virtual uchar *end_of_space() = 0; +protected: + virtual size_t used_size() = 0; + + /* To be used only by iterator class: */ + virtual uchar *get_pos()= 0; + virtual bool read(uchar **position, uchar **ptr1, uchar **ptr2)= 0; + friend class Lifo_buffer_iterator; +public: + virtual bool have_space_for(size_t bytes) = 0; + + virtual void remove_unused_space(uchar **unused_start, uchar **unused_end)=0; + virtual uchar *used_area() = 0; + virtual ~Lifo_buffer() = default; +}; + + +/** + Forward LIFO buffer + + The buffer that is being written to from start to end and read in the + reverse. 'pos' points to just beyond the end of used space. + + It is possible to grow/shink the buffer at the end bound + + used space unused space + *==============*-----------------* + ^ ^ ^ + | | +--- end + | +---- pos + +--- start +*/ + +class Forward_lifo_buffer: public Lifo_buffer +{ + uchar *pos; +public: + enum_direction type() { return FORWARD; } + size_t used_size() + { + return (size_t)(pos - start); + } + void reset() + { + pos= start; + } + uchar *end_of_space() { return pos; } + bool have_space_for(size_t bytes) + { + return (pos + bytes < end); + } + + void write() + { + write_bytes(write_ptr1, size1); + if (size2) + write_bytes(write_ptr2, size2); + } + void write_bytes(const uchar *data, size_t bytes) + { + DBUG_ASSERT(have_space_for(bytes)); + memcpy(pos, data, bytes); + pos += bytes; + } + bool have_data(uchar *position, size_t bytes) + { + return ((position - start) >= (ptrdiff_t)bytes); + } + uchar *read_bytes(uchar **position, size_t bytes) + { + DBUG_ASSERT(have_data(*position, bytes)); + *position= (*position) - bytes; + return *position; + } + bool read() { return read(&pos, &read_ptr1, &read_ptr2); } + bool read(uchar **position, uchar **ptr1, uchar **ptr2) + { + if (!have_data(*position, size1 + size2)) + return TRUE; + if (size2) + *ptr2= read_bytes(position, size2); + *ptr1= read_bytes(position, size1); + return FALSE; + } + void remove_unused_space(uchar **unused_start, uchar **unused_end) + { + DBUG_ASSERT(0); /* Don't need this yet */ + } + /** + Add more space to the buffer. The caller is responsible that the space + being added is adjacent to the end of the buffer. + + @param unused_start Start of space + @param unused_end End of space + */ + void grow(uchar *unused_start, uchar *unused_end) + { + DBUG_ASSERT(unused_end >= unused_start); + DBUG_ASSERT(end == unused_start); + TRASH_ALLOC(unused_start, size_t(unused_end - unused_start)); + end= unused_end; + } + /* Return pointer to start of the memory area that is occupied by the data */ + uchar *used_area() { return start; } + friend class Lifo_buffer_iterator; + uchar *get_pos() { return pos; } +}; + + + +/** + Backward LIFO buffer + + The buffer that is being written to from start to end and read in the + reverse. 'pos' points to the start of used space. + + It is possible to grow/shink the buffer at the start. + + unused space used space + *--------------*=================* + ^ ^ ^ + | | +--- end + | +---- pos + +--- start +*/ +class Backward_lifo_buffer: public Lifo_buffer +{ + uchar *pos; +public: + enum_direction type() { return BACKWARD; } + + size_t used_size() + { + return (size_t)(end - pos); + } + void reset() + { + pos= end; + } + uchar *end_of_space() { return end; } + bool have_space_for(size_t bytes) + { + return (pos - bytes >= start); + } + void write() + { + if (write_ptr2) + write_bytes(write_ptr2, size2); + write_bytes(write_ptr1, size1); + } + void write_bytes(const uchar *data, size_t bytes) + { + DBUG_ASSERT(have_space_for(bytes)); + pos -= bytes; + memcpy(pos, data, bytes); + } + bool read() + { + return read(&pos, &read_ptr1, &read_ptr2); + } + bool read(uchar **position, uchar **ptr1, uchar **ptr2) + { + if (!have_data(*position, size1 + size2)) + return TRUE; + *ptr1= read_bytes(position, size1); + if (size2) + *ptr2= read_bytes(position, size2); + return FALSE; + } + bool have_data(uchar *position, size_t bytes) + { + return ((end - position) >= (ptrdiff_t)bytes); + } + uchar *read_bytes(uchar **position, size_t bytes) + { + DBUG_ASSERT(have_data(*position, bytes)); + uchar *ret= *position; + *position= *position + bytes; + return ret; + } + /** + Stop using/return the unused part of the space + @param unused_start OUT Start of the unused space + @param unused_end OUT End of the unused space + */ + void remove_unused_space(uchar **unused_start, uchar **unused_end) + { + *unused_start= start; + *unused_end= pos; + start= pos; + } + void grow(uchar *unused_start, uchar *unused_end) + { + DBUG_ASSERT(0); /* Not used for backward buffers */ + } + /* Return pointer to start of the memory area that is occupied by the data */ + uchar *used_area() { return pos; } + friend class Lifo_buffer_iterator; + uchar *get_pos() { return pos; } +}; + + +/** Iterator to walk over contents of the buffer without reading from it */ +class Lifo_buffer_iterator +{ + uchar *pos; + Lifo_buffer *buf; + +public: + /* The data is read to here */ + uchar *read_ptr1; + uchar *read_ptr2; + + void init(Lifo_buffer *buf_arg) + { + buf= buf_arg; + pos= buf->get_pos(); + } + /* + Read the next value. The calling convention is the same as buf->read() + has. + + @retval FALSE - ok + @retval TRUE - EOF, reached the end of the buffer + */ + bool read() + { + return buf->read(&pos, &read_ptr1, &read_ptr2); + } +}; + + diff --git a/sql/sql_limit.h b/sql/sql_limit.h new file mode 100644 index 00000000..335aff9d --- /dev/null +++ b/sql/sql_limit.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef INCLUDES_MARIADB_SQL_LIMIT_H +#define INCLUDES_MARIADB_SQL_LIMIT_H +/** + LIMIT/OFFSET parameters for execution. +*/ + +class Select_limit_counters +{ + ha_rows select_limit_cnt, offset_limit_cnt; + bool with_ties; + + public: + Select_limit_counters(): + select_limit_cnt(0), offset_limit_cnt(0), with_ties(false) + {}; + Select_limit_counters(const Select_limit_counters &orig): + select_limit_cnt(orig.select_limit_cnt), + offset_limit_cnt(orig.offset_limit_cnt), + with_ties(orig.with_ties) + {}; + + void set_limit(ha_rows limit, ha_rows offset, bool with_ties_arg) + { + if (limit == 0) + offset= 0; + offset_limit_cnt= offset; + select_limit_cnt= limit; + with_ties= with_ties_arg; + /* + Guard against an overflow condition, where limit + offset exceede + ha_rows value range. This case covers unreasonably large parameter + values that do not have any practical use so assuming in this case + that the query does not have a limit is fine. + */ + if (select_limit_cnt + offset_limit_cnt >= select_limit_cnt) + select_limit_cnt+= offset_limit_cnt; + else + select_limit_cnt= HA_POS_ERROR; + } + + void set_single_row() + { + offset_limit_cnt= 0; + select_limit_cnt= 1; + with_ties= false; + } + + /* Send the first row, still honoring offset_limit_cnt */ + void send_first_row() + { + /* Guard against overflow */ + if ((select_limit_cnt= offset_limit_cnt +1 ) == 0) + select_limit_cnt= offset_limit_cnt; + // with_ties= false; Remove // on merge to 10.6 + } + + bool is_unlimited() const + { return select_limit_cnt == HA_POS_ERROR; } + /* + Set the limit to allow returning an unlimited number of rows. Useful + for cases when we want to continue execution indefinitely after the limit + is reached (for example for SQL_CALC_ROWS extension). + */ + void set_unlimited() + { select_limit_cnt= HA_POS_ERROR; } + + /* Reset the limit entirely. */ + void clear() + { select_limit_cnt= HA_POS_ERROR; offset_limit_cnt= 0; with_ties= false;} + + bool check_offset(ha_rows sent) const + { + return sent < offset_limit_cnt; + } + void remove_offset() { offset_limit_cnt= 0; } + + ha_rows get_select_limit() const + { return select_limit_cnt; } + ha_rows get_offset_limit() const + { return offset_limit_cnt; } + bool is_with_ties() const + { return with_ties; } +}; + +#endif // INCLUDES_MARIADB_SQL_LIMIT_H diff --git a/sql/sql_list.cc b/sql/sql_list.cc new file mode 100644 index 00000000..6ccec9b5 --- /dev/null +++ b/sql/sql_list.cc @@ -0,0 +1,68 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_list.h" + +list_node end_of_list; + + +void free_list(I_List *list) +{ + i_string *tmp; + while ((tmp= list->get())) + delete tmp; +} + + +bool base_list::copy(const base_list *rhs, MEM_ROOT *mem_root) +{ + bool error= 0; + if (rhs->elements) + { + /* + It's okay to allocate an array of nodes at once: we never + call a destructor for list_node objects anyway. + */ + if ((first= (list_node*) alloc_root(mem_root, + sizeof(list_node) * rhs->elements))) + { + elements= rhs->elements; + list_node *dst= first; + list_node *src= rhs->first; + for (; dst < first + elements - 1; dst++, src= src->next) + { + dst->info= src->info; + dst->next= dst + 1; + } + /* Copy the last node */ + dst->info= src->info; + dst->next= &end_of_list; + /* Setup 'last' member */ + last= &dst->next; + return 0; + } + error= 1; + } + elements= 0; + first= &end_of_list; + last= &first; + return error; +} diff --git a/sql/sql_list.h b/sql/sql_list.h new file mode 100644 index 00000000..5a57c86e --- /dev/null +++ b/sql/sql_list.h @@ -0,0 +1,874 @@ +#ifndef INCLUDES_MYSQL_SQL_LIST_H +#define INCLUDES_MYSQL_SQL_LIST_H +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. + Copyright (c) 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_alloc.h" +#include + +/** + Simple intrusive linked list. + + @remark Similar in nature to base_list, but intrusive. It keeps a + a pointer to the first element in the list and a indirect + reference to the last element. +*/ + +template +class SQL_I_List :public Sql_alloc +{ +public: + uint elements; + /** The first element in the list. */ + T *first; + /** A reference to the next element in the list. */ + T **next; + + SQL_I_List() { empty(); } + + SQL_I_List(const SQL_I_List &tmp) : Sql_alloc() + { + elements= tmp.elements; + first= tmp.first; + next= elements ? tmp.next : &first; + } + + SQL_I_List& operator=(const SQL_I_List &tmp) + { + elements= tmp.elements; + first= tmp.first; + next= elements ? tmp.next : &first;; + return *this; + } + + inline void empty() + { + elements= 0; + first= NULL; + next= &first; + } + + inline void link_in_list(T *element, T **next_ptr) + { + elements++; + (*next)= element; + next= next_ptr; + *next= NULL; + } + + inline void save_and_clear(SQL_I_List *save) + { + *save= *this; + empty(); + } + + inline void push_front(SQL_I_List *save) + { + /* link current list last */ + *save->next= first; + first= save->first; + elements+= save->elements; + } + + inline void push_back(SQL_I_List *save) + { + if (save->first) + { + *next= save->first; + next= save->next; + elements+= save->elements; + } + } +}; + + +/* + Basic single linked list + Used for item and item_buffs. + All list ends with a pointer to the 'end_of_list' element, which + data pointer is a null pointer and the next pointer points to itself. + This makes it very fast to traverse lists as we don't have to + test for a specialend condition for list that can't contain a null + pointer. +*/ + + +/** + list_node - a node of a single-linked list. + @note We never call a destructor for instances of this class. +*/ + +struct list_node :public Sql_alloc +{ + list_node *next; + void *info; + list_node(const void *info_par, list_node *next_par) + :next(next_par), info(const_cast(info_par)) + {} + list_node() /* For end_of_list */ + { + info= 0; + next= this; + } +}; + +typedef bool List_eq(void *a, void *b); + +extern MYSQL_PLUGIN_IMPORT list_node end_of_list; + +class base_list :public Sql_alloc +{ +protected: + list_node *first,**last; + +public: + uint elements; + + bool operator==(const base_list &rhs) const + { + return + elements == rhs.elements && + first == rhs.first && + last == rhs.last; + } + base_list& operator=(const base_list &rhs) + { + elements= rhs.elements; + first= rhs.first; + last= elements ? rhs.last : &first; + return *this; + } + + inline void empty() { elements=0; first= &end_of_list; last=&first;} + inline base_list() { empty(); } + /** + This is a shallow copy constructor that implicitly passes the ownership + from the source list to the new instance. The old instance is not + updated, so both objects end up sharing the same nodes. If one of + the instances then adds or removes a node, the other becomes out of + sync ('last' pointer), while still operational. Some old code uses and + relies on this behaviour. This logic is quite tricky: please do not use + it in any new code. + */ + inline base_list(const base_list &tmp) :Sql_alloc() + { + *this= tmp; + } + /** + Construct a deep copy of the argument in memory root mem_root. + The elements themselves are copied by pointer. If you also + need to copy elements by value, you should employ + list_copy_and_replace_each_value after creating a copy. + */ + bool copy(const base_list *rhs, MEM_ROOT *mem_root); + base_list(const base_list &rhs, MEM_ROOT *mem_root) { copy(&rhs, mem_root); } + inline base_list(bool) {} + inline bool push_back(void *info) + { + if (((*last)=new list_node(info, &end_of_list))) + { + last= &(*last)->next; + elements++; + return 0; + } + return 1; + } + inline bool push_back(void *info, MEM_ROOT *mem_root) + { + if (((*last)=new (mem_root) list_node(info, &end_of_list))) + { + last= &(*last)->next; + elements++; + return 0; + } + return 1; + } + bool push_front_impl(list_node *node) + { + if (node) + { + if (last == &first) + last= &node->next; + first=node; + elements++; + return 0; + } + return 1; + } + inline bool push_front(void *info) + { return push_front_impl(new list_node(info, first)); } + inline bool push_front(void *info, MEM_ROOT *mem_root) + { return push_front_impl(new (mem_root) list_node(info,first)); } + void remove(list_node **prev) + { + list_node *node=(*prev)->next; + if (!--elements) + last= &first; + else if (last == &(*prev)->next) + last= prev; + delete *prev; + *prev=node; + } + inline void append(base_list *list) + { + if (!list->is_empty()) + { + if (is_empty()) + { + *this= *list; + return; + } + *last= list->first; + last= list->last; + elements+= list->elements; + } + } + inline void *pop(void) + { + if (first == &end_of_list) return 0; + list_node *tmp=first; + first=first->next; + if (!--elements) + last= &first; + return tmp->info; + } + + /* + Remove from this list elements that are contained in the passed list. + We assume that the passed list is a tail of this list (that is, the whole + list_node* elements are shared). + */ + inline void disjoin(const base_list *list) + { + list_node **prev= &first; + list_node *node= first; + list_node *list_first= list->first; + elements=0; + while (node != &end_of_list && node != list_first) + { + prev= &node->next; + node= node->next; + elements++; + if (node == &end_of_list) + return; + } + *prev= &end_of_list; + last= prev; + } + inline void prepend(base_list *list) + { + if (!list->is_empty()) + { + if (is_empty()) + last= list->last; + *list->last= first; + first= list->first; + elements+= list->elements; + } + } + /** + Swap two lists. + */ + inline void swap(base_list &rhs) + { + list_node **rhs_last=rhs.last; + swap_variables(list_node *, first, rhs.first); + swap_variables(uint, elements, rhs.elements); + rhs.last= last == &first ? &rhs.first : last; + last = rhs_last == &rhs.first ? &first : rhs_last; + } + + inline list_node* last_node() { return *last; } + inline list_node* first_node() { return first;} + inline void *head() { return first->info; } + inline void **head_ref() { return first != &end_of_list ? &first->info : 0; } + inline bool is_empty() { return first == &end_of_list ; } + inline list_node *last_ref() { return &end_of_list; } + inline bool add_unique(void *info, List_eq *eq) + { + list_node *node= first; + for (; + node != &end_of_list && (!(*eq)(node->info, info)); + node= node->next) ; + if (node == &end_of_list) + return push_back(info); + return 1; + } + friend class base_list_iterator; + friend class error_list; + friend class error_list_iterator; + + /* + Return N-th element in the list, or NULL if the list has + less than N elements. + */ + void *elem(uint n) + { + list_node *node= first; + void *data= NULL; + for (uint i= 0; i <= n; i++) + { + if (node == &end_of_list) + { + data= NULL; + break; + } + data= node->info; + node= node->next; + } + return data; + } + +#ifdef LIST_EXTRA_DEBUG + /* + Check list invariants and print results into trace. Invariants are: + - (*last) points to end_of_list + - There are no NULLs in the list. + - base_list::elements is the number of elements in the list. + + SYNOPSIS + check_list() + name Name to print to trace file + + RETURN + 1 The list is Ok. + 0 List invariants are not met. + */ + + bool check_list(const char *name) + { + base_list *list= this; + list_node *node= first; + uint cnt= 0; + + while (node->next != &end_of_list) + { + if (!node->info) + { + DBUG_PRINT("list_invariants",("%s: error: NULL element in the list", + name)); + return FALSE; + } + node= node->next; + cnt++; + } + if (last != &(node->next)) + { + DBUG_PRINT("list_invariants", ("%s: error: wrong last pointer", name)); + return FALSE; + } + if (cnt+1 != elements) + { + DBUG_PRINT("list_invariants", ("%s: error: wrong element count", name)); + return FALSE; + } + DBUG_PRINT("list_invariants", ("%s: list is ok", name)); + return TRUE; + } +#endif // LIST_EXTRA_DEBUG + +protected: + void after(const void *info, list_node *node) + { + list_node *new_node=new list_node(info,node->next); + node->next=new_node; + elements++; + if (last == &(node->next)) + last= &new_node->next; + } +}; + + +class base_list_iterator +{ +protected: + base_list *list; + list_node **el,**prev,*current; + void sublist(base_list &ls, uint elm) + { + ls.first= *el; + ls.last= list->last; + ls.elements= elm; + } +public: + base_list_iterator() + :list(0), el(0), prev(0), current(0) + {} + + base_list_iterator(base_list &list_par) + { init(list_par); } + + inline void init(base_list &list_par) + { + list= &list_par; + el= &list_par.first; + prev= 0; + current= 0; + } + + inline void *next(void) + { + prev=el; + current= *el; + el= ¤t->next; + return current->info; + } + /* Get what calling next() would return, without moving the iterator */ + inline void *peek() + { + return (*el)->info; + } + inline void *next_fast(void) + { + list_node *tmp; + tmp= *el; + el= &tmp->next; + return tmp->info; + } + inline void rewind(void) + { + el= &list->first; + } + inline void *replace(const void *element) + { // Return old element + void *tmp=current->info; + DBUG_ASSERT(current->info != 0); + current->info= const_cast(element); + return tmp; + } + void *replace(base_list &new_list) + { + void *ret_value=current->info; + if (!new_list.is_empty()) + { + *new_list.last=current->next; + current->info=new_list.first->info; + current->next=new_list.first->next; + if ((list->last == ¤t->next) && (new_list.elements > 1)) + list->last= new_list.last; + list->elements+=new_list.elements-1; + } + return ret_value; // return old element + } + inline void remove(void) // Remove current + { + list->remove(prev); + el=prev; + current=0; // Safeguard + } + void after(const void *element) // Insert element after current + { + list->after(element,current); + current=current->next; + el= ¤t->next; + } + inline void **ref(void) // Get reference pointer + { + return ¤t->info; + } + inline bool is_last(void) + { + return el == &list->last_ref()->next; + } + inline bool at_end() + { + return current == &end_of_list; + } + friend class error_list_iterator; +}; + +template class List :public base_list +{ +public: + inline List() :base_list() {} + inline List(const List &tmp, MEM_ROOT *mem_root) : + base_list(tmp, mem_root) {} + inline bool push_back(T *a) { return base_list::push_back(a); } + inline bool push_back(T *a, MEM_ROOT *mem_root) + { return base_list::push_back((void*) a, mem_root); } + inline bool push_front(T *a) { return base_list::push_front(a); } + inline bool push_front(T *a, MEM_ROOT *mem_root) + { return base_list::push_front((void*) a, mem_root); } + inline T* head() {return (T*) base_list::head(); } + inline T** head_ref() {return (T**) base_list::head_ref(); } + inline T* pop() {return (T*) base_list::pop(); } + inline void append(List *list) { base_list::append(list); } + inline void prepend(List *list) { base_list::prepend(list); } + inline void disjoin(List *list) { base_list::disjoin(list); } + inline bool add_unique(T *a, bool (*eq)(T *a, T *b)) + { return base_list::add_unique(a, (List_eq *)eq); } + inline bool copy(const List *list, MEM_ROOT *root) + { return base_list::copy(list, root); } + void delete_elements(void) + { + list_node *element,*next; + for (element=first; element != &end_of_list; element=next) + { + next=element->next; + delete (T*) element->info; + } + empty(); + } + T *elem(uint n) { return (T*) base_list::elem(n); } + // Create a new list with one element + static List *make(MEM_ROOT *mem_root, T *first) + { + List *res= new (mem_root) List; + return res == NULL || res->push_back(first, mem_root) ? NULL : res; + } + + class Iterator; + using value_type= T; + using iterator= Iterator; + using const_iterator= const Iterator; + + Iterator begin() const { return Iterator(first); } + Iterator end() const { return Iterator(); } + + class Iterator + { + public: + using iterator_category= std::forward_iterator_tag; + using value_type= T; + using difference_type= std::ptrdiff_t; + using pointer= T *; + using reference= T &; + + Iterator(list_node *p= &end_of_list) : node{p} {} + + Iterator &operator++() + { + DBUG_ASSERT(node != &end_of_list); + + node= node->next; + return *this; + } + + T operator++(int) + { + Iterator tmp(*this); + operator++(); + return tmp; + } + + T &operator*() { return *static_cast(node->info); } + T *operator->() { return static_cast(node->info); } + + bool operator==(const typename List::iterator &rhs) + { + return node == rhs.node; + } + + bool operator!=(const typename List::iterator &rhs) + { + return node != rhs.node; + } + + private: + list_node *node{&end_of_list}; + }; +}; + + +template class List_iterator :public base_list_iterator +{ +public: + List_iterator(List &a) : base_list_iterator(a) {} + List_iterator() : base_list_iterator() {} + inline void init(List &a) { base_list_iterator::init(a); } + inline T* operator++(int) { return (T*) base_list_iterator::next(); } + inline T* peek() { return (T*) base_list_iterator::peek(); } + inline T *replace(T *a) { return (T*) base_list_iterator::replace(a); } + inline T *replace(List &a) { return (T*) base_list_iterator::replace(a); } + inline void rewind(void) { base_list_iterator::rewind(); } + inline void remove() { base_list_iterator::remove(); } + inline void after(T *a) { base_list_iterator::after(a); } + inline T** ref(void) { return (T**) base_list_iterator::ref(); } +}; + + +template class List_iterator_fast :public base_list_iterator +{ +protected: + inline T *replace(T *) { return (T*) 0; } + inline T *replace(List &) { return (T*) 0; } + inline void remove(void) {} + inline void after(T *) {} + inline T** ref(void) { return (T**) 0; } + +public: + inline List_iterator_fast(List &a) : base_list_iterator(a) {} + inline List_iterator_fast() : base_list_iterator() {} + inline void init(List &a) { base_list_iterator::init(a); } + inline T* operator++(int) { return (T*) base_list_iterator::next_fast(); } + inline void rewind(void) { base_list_iterator::rewind(); } + void sublist(List &list_arg, uint el_arg) + { + base_list_iterator::sublist(list_arg, el_arg); + } +}; + + +/* + Bubble sort algorithm for List. + This sort function is supposed to be used only for very short list. + Currently it is used for the lists of Item_equal objects and + for some lists in the table elimination algorithms. In both + cases the sorted lists are very short. +*/ + +template +inline void bubble_sort(List *list_to_sort, + int (*sort_func)(T *a, T *b, void *arg), void *arg) +{ + bool swap; + T **ref1= 0; + T **ref2= 0; + List_iterator it(*list_to_sort); + do + { + T **last_ref= ref1; + T *item1= it++; + ref1= it.ref(); + T *item2; + + swap= FALSE; + while ((item2= it++) && (ref2= it.ref()) != last_ref) + { + if (sort_func(item1, item2, arg) > 0) + { + *ref1= item2; + *ref2= item1; + swap= TRUE; + } + else + item1= item2; + ref1= ref2; + } + it.rewind(); + } while (swap); +} + + +/* + A simple intrusive list which automaticly removes element from list + on delete (for THD element) +*/ + +struct ilink +{ + struct ilink **prev,*next; + static void *operator new(size_t size) throw () + { + return (void*)my_malloc(PSI_INSTRUMENT_ME, + (uint)size, MYF(MY_WME | MY_FAE | ME_FATAL)); + } + static void operator delete(void* ptr_arg, size_t) + { + my_free(ptr_arg); + } + + inline ilink() + { + prev=0; next=0; + } + inline void unlink() + { + /* Extra tests because element doesn't have to be linked */ + if (prev) *prev= next; + if (next) next->prev=prev; + prev=0 ; next=0; + } + inline void assert_linked() + { + DBUG_ASSERT(prev != 0 && next != 0); + } + inline void assert_not_linked() + { + DBUG_ASSERT(prev == 0 && next == 0); + } + virtual ~ilink() { unlink(); } /*lint -e1740 */ +}; + + +/* Needed to be able to have an I_List of char* strings in mysqld.cc. */ + +class i_string: public ilink +{ +public: + const char* ptr; + i_string():ptr(0) { } + i_string(const char* s) : ptr(s) {} +}; + +/* needed for linked list of two strings for replicate-rewrite-db */ +class i_string_pair: public ilink +{ +public: + const char* key; + const char* val; + i_string_pair():key(0),val(0) { } + i_string_pair(const char* key_arg, const char* val_arg) : + key(key_arg),val(val_arg) {} +}; + + +template class I_List_iterator; + + +class base_ilist +{ + struct ilink *first; + struct ilink last; +public: + inline void empty() { first= &last; last.prev= &first; } + base_ilist() { empty(); } + inline bool is_empty() { return first == &last; } + // Returns true if p is the last "real" object in the list, + // i.e. p->next points to the sentinel. + inline bool is_last(ilink *p) { return p->next == NULL || p->next == &last; } + inline void append(ilink *a) + { + first->prev= &a->next; + a->next=first; a->prev= &first; first=a; + } + inline void push_back(ilink *a) + { + *last.prev= a; + a->next= &last; + a->prev= last.prev; + last.prev= &a->next; + } + inline struct ilink *get() + { + struct ilink *first_link=first; + if (first_link == &last) + return 0; + first_link->unlink(); // Unlink from list + return first_link; + } + inline struct ilink *head() + { + return (first != &last) ? first : 0; + } + + /** + Moves list elements to new owner, and empties current owner (i.e. this). + + @param[in,out] new_owner The new owner of the list elements. + Should be empty in input. + */ + + void move_elements_to(base_ilist *new_owner) + { + DBUG_ASSERT(new_owner->is_empty()); + new_owner->first= first; + new_owner->last= last; + empty(); + } + + friend class base_ilist_iterator; + private: + /* + We don't want to allow copying of this class, as that would give us + two list heads containing the same elements. + So we declare, but don't define copy CTOR and assignment operator. + */ + base_ilist(const base_ilist&); + void operator=(const base_ilist&); +}; + + +class base_ilist_iterator +{ + base_ilist *list; + struct ilink **el,*current; +public: + base_ilist_iterator(base_ilist &list_par) :list(&list_par), + el(&list_par.first),current(0) {} + void *next(void) + { + /* This is coded to allow push_back() while iterating */ + current= *el; + if (current == &list->last) return 0; + el= ¤t->next; + return current; + } +}; + + +template +class I_List :private base_ilist +{ +public: + I_List() :base_ilist() {} + inline bool is_last(T *p) { return base_ilist::is_last(p); } + inline void empty() { base_ilist::empty(); } + inline bool is_empty() { return base_ilist::is_empty(); } + inline void append(T* a) { base_ilist::append(a); } + inline void push_back(T* a) { base_ilist::push_back(a); } + inline T* get() { return (T*) base_ilist::get(); } + inline T* head() { return (T*) base_ilist::head(); } + inline void move_elements_to(I_List* new_owner) { + base_ilist::move_elements_to(new_owner); + } +#ifndef _lint + friend class I_List_iterator; +#endif +}; + + +template class I_List_iterator :public base_ilist_iterator +{ +public: + I_List_iterator(I_List &a) : base_ilist_iterator(a) {} + inline T* operator++(int) { return (T*) base_ilist_iterator::next(); } +}; + +/** + Make a deep copy of each list element. + + @note A template function and not a template method of class List + is employed because of explicit template instantiation: + in server code there are explicit instantiations of List and + an explicit instantiation of a template requires that any method + of the instantiated class used in the template can be resolved. + Evidently not all template arguments have clone() method with + the right signature. + + @return You must query the error state in THD for out-of-memory + situation after calling this function. +*/ + +template +inline +void +list_copy_and_replace_each_value(List &list, MEM_ROOT *mem_root) +{ + /* Make a deep copy of each element */ + List_iterator it(list); + T *el; + while ((el= it++)) + it.replace(el->clone(mem_root)); +} + +void free_list(I_List *list); + +#endif // INCLUDES_MYSQL_SQL_LIST_H diff --git a/sql/sql_load.cc b/sql/sql_load.cc new file mode 100644 index 00000000..8aa1452c --- /dev/null +++ b/sql/sql_load.cc @@ -0,0 +1,2099 @@ +/* + Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Copy data from a textfile to table */ +/* 2006-12 Erik Wetterberg : LOAD XML added */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_load.h" +#include "sql_load.h" +#include "sql_cache.h" // query_cache_* +#include "sql_base.h" // fill_record_n_invoke_before_triggers +#include +#include "sql_view.h" // check_key_in_view +#include "sql_insert.h" // check_that_all_fields_are_given_values, + // write_record +#include "sql_acl.h" // INSERT_ACL, UPDATE_ACL +#include "log_event.h" // Delete_file_log_event, + // Execute_load_query_log_event, + // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F +#include +#include "rpl_mi.h" +#include "sql_repl.h" +#include "sp_head.h" +#include "sql_trigger.h" +#include "sql_derived.h" +#include "sql_show.h" + +#include "wsrep_mysqld.h" + +#include "scope.h" // scope_exit + +extern "C" int _my_b_net_read(IO_CACHE *info, uchar *Buffer, size_t Count); + +class XML_TAG { +public: + int level; + String field; + String value; + XML_TAG(int l, const String &f, const String &v); +}; + + +XML_TAG::XML_TAG(int l, const String &f, const String &v) +{ + level= l; + field.append(f); + value.append(v); +} + + +/* + Field and line terminators must be interpreted as sequence of unsigned char. + Otherwise, non-ascii terminators will be negative on some platforms, + and positive on others (depending on the implementation of char). +*/ +class Term_string +{ + const uchar *m_ptr; + uint m_length; + int m_initial_byte; +public: + Term_string(const String &str) : + m_ptr(static_cast(static_cast(str.ptr()))), + m_length(str.length()), + m_initial_byte((uchar) (str.length() ? str.ptr()[0] : INT_MAX)) + { } + void set(const uchar *str, uint length, int initial_byte) + { + m_ptr= str; + m_length= length; + m_initial_byte= initial_byte; + } + void reset() { set(NULL, 0, INT_MAX); } + const uchar *ptr() const { return m_ptr; } + uint length() const { return m_length; } + int initial_byte() const { return m_initial_byte; } + bool eq(const Term_string &other) const + { + return length() == other.length() && !memcmp(ptr(), other.ptr(), length()); + } +}; + + +#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache)) +#define PUSH(A) *(stack_pos++)=(A) + +#ifdef WITH_WSREP +/** If requested by wsrep_load_data_splitting and streaming replication is + not enabled, replicate a streaming fragment every 10,000 rows.*/ +class Wsrep_load_data_split +{ +public: + Wsrep_load_data_split(THD *thd) + : m_thd(thd) + , m_load_data_splitting(wsrep_load_data_splitting) + , m_fragment_unit(thd->wsrep_trx().streaming_context().fragment_unit()) + , m_fragment_size(thd->wsrep_trx().streaming_context().fragment_size()) + { + if (WSREP(m_thd) && m_load_data_splitting) + { + /* Override streaming settings with backward compatible values for + load data splitting */ + m_thd->wsrep_cs().streaming_params(wsrep::streaming_context::row, 10000); + } + } + + ~Wsrep_load_data_split() + { + if (WSREP(m_thd) && m_load_data_splitting) + { + /* Restore original settings */ + m_thd->wsrep_cs().streaming_params(m_fragment_unit, m_fragment_size); + } + } +private: + THD *m_thd; + my_bool m_load_data_splitting; + enum wsrep::streaming_context::fragment_unit m_fragment_unit; + size_t m_fragment_size; +}; +#endif /* WITH_WSREP */ + +class READ_INFO: public Load_data_param +{ + File file; + String data; /* Read buffer */ + Term_string m_field_term; /* FIELDS TERMINATED BY 'string' */ + Term_string m_line_term; /* LINES TERMINATED BY 'string' */ + Term_string m_line_start; /* LINES STARTING BY 'string' */ + int enclosed_char,escape_char; + int *stack,*stack_pos; + bool found_end_of_line,start_of_line,eof; + int level; /* for load xml */ + + bool getbyte(char *to) + { + int chr= GET; + if (chr == my_b_EOF) + return (eof= true); + *to= chr; + return false; + } + + /** + Read a tail of a multi-byte character. + The first byte of the character is assumed to be already + read from the file and appended to "str". + + @returns true - if EOF happened unexpectedly + @returns false - no EOF happened: found a good multi-byte character, + or a bad byte sequence + + Note: + The return value depends only on EOF: + - read_mbtail() returns "false" is a good character was read, but also + - read_mbtail() returns "false" if an incomplete byte sequence was found + and no EOF happened. + + For example, suppose we have an ujis file with bytes 0x8FA10A, where: + - 0x8FA1 is an incomplete prefix of a 3-byte character + (it should be [8F][A1-FE][A1-FE] to make a full 3-byte character) + - 0x0A is a line demiliter + This file has some broken data, the trailing [A1-FE] is missing. + + In this example it works as follows: + - 0x8F is read from the file and put into "data" before the call + for read_mbtail() + - 0xA1 is read from the file and put into "data" by read_mbtail() + - 0x0A is kept in the read queue, so the next read iteration after + the current read_mbtail() call will normally find it and recognize as + a line delimiter + - the current call for read_mbtail() returns "false", + because no EOF happened + */ + bool read_mbtail(String *str) + { + int chlen; + if ((chlen= charset()->charlen(str->end() - 1, str->end())) == 1) + return false; // Single byte character found + for (uint32 length0= str->length() - 1 ; MY_CS_IS_TOOSMALL(chlen); ) + { + int chr= GET; + if (chr == my_b_EOF) + { + DBUG_PRINT("info", ("read_mbtail: chlen=%d; unexpected EOF", chlen)); + return true; // EOF + } + str->append(chr); + chlen= charset()->charlen(str->ptr() + length0, str->end()); + if (chlen == MY_CS_ILSEQ) + { + /** + It has been an incomplete (but a valid) sequence so far, + but the last byte turned it into a bad byte sequence. + Unget the very last byte. + */ + str->length(str->length() - 1); + PUSH(chr); + DBUG_PRINT("info", ("read_mbtail: ILSEQ")); + return false; // Bad byte sequence + } + } + DBUG_PRINT("info", ("read_mbtail: chlen=%d", chlen)); + return false; // Good multi-byte character + } + +public: + bool error,line_cuted,found_null,enclosed; + uchar *row_start, /* Found row starts here */ + *row_end; /* Found row ends here */ + LOAD_FILE_IO_CACHE cache; + + READ_INFO(THD *thd, File file, const Load_data_param ¶m, + String &field_term,String &line_start,String &line_term, + String &enclosed,int escape,bool get_it_from_net, bool is_fifo); + ~READ_INFO(); + int read_field(); + int read_fixed_length(void); + int next_line(void); + char unescape(char chr); + bool terminator(const uchar *ptr, uint length); + bool terminator(const Term_string &str) + { return terminator(str.ptr(), str.length()); } + bool terminator(int chr, const Term_string &str) + { return str.initial_byte() == chr && terminator(str); } + bool find_start_of_fields(); + /* load xml */ + List taglist; + int read_value(int delim, String *val); + int read_xml(THD *thd); + int clear_level(int level); + + my_off_t file_length() { return cache.end_of_file; } + my_off_t position() { return my_b_tell(&cache); } + + /** + skip all data till the eof. + */ + void skip_data_till_eof() + { + while (GET != my_b_EOF) + ; + } +}; + +static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, READ_INFO &read_info, + ulong skip_lines, + bool ignore_check_option_errors); +static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, READ_INFO &read_info, + String &enclosed, ulong skip_lines, + bool ignore_check_option_errors); + +static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, READ_INFO &read_info, + String &enclosed, ulong skip_lines, + bool ignore_check_option_errors); + +#ifndef EMBEDDED_LIBRARY +static bool write_execute_load_query_log_event(THD *, const sql_exchange*, const + char*, const char*, bool, enum enum_duplicates, bool, bool, int); +#endif /* EMBEDDED_LIBRARY */ + + +bool Load_data_param::add_outvar_field(THD *thd, const Field *field) +{ + if (field->flags & BLOB_FLAG) + { + m_use_blobs= true; + m_fixed_length+= 256; // Will be extended if needed + } + else + m_fixed_length+= field->field_length; + return false; +} + + +bool Load_data_param::add_outvar_user_var(THD *thd) +{ + if (m_is_fixed_length) + { + my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0)); + return true; + } + return false; +} + + +/* + Execute LOAD DATA query + + SYNOPSYS + mysql_load() + thd - current thread + ex - sql_exchange object representing source file and its parsing rules + table_list - list of tables to which we are loading data + fields_vars - list of fields and variables to which we read + data from file + set_fields - list of fields mentioned in set clause + set_values - expressions to assign to fields in previous list + handle_duplicates - indicates whenever we should emit error or + replace row if we will meet duplicates. + ignore - - indicates whenever we should ignore duplicates + read_file_from_client - is this LOAD DATA LOCAL ? + + RETURN VALUES + TRUE - error / FALSE - success +*/ + +int mysql_load(THD *thd, const sql_exchange *ex, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, + enum enum_duplicates handle_duplicates, bool ignore, + bool read_file_from_client) +{ + char name[FN_REFLEN]; + File file; + TABLE *table= NULL; + int error= 0; + bool is_fifo=0; +#ifndef EMBEDDED_LIBRARY + killed_state killed_status; + bool is_concurrent; +#endif + const char *db= table_list->db.str; // This is never null + /* + If path for file is not defined, we will use the current database. + If this is not set, we will use the directory where the table to be + loaded is located + */ + const char *tdb= thd->db.str ? thd->db.str : db; // Result is never null + ulong skip_lines= ex->skip_lines; + bool transactional_table __attribute__((unused)); + DBUG_ENTER("mysql_load"); + +#ifdef WITH_WSREP + Wsrep_load_data_split wsrep_load_data_split(thd); +#endif /* WITH_WSREP */ + /* + Bug #34283 + mysqlbinlog leaves tmpfile after termination if binlog contains + load data infile, so in mixed mode we go to row-based for + avoiding the problem. + */ + thd->set_current_stmt_binlog_format_row_if_mixed(); + +#ifdef EMBEDDED_LIBRARY + read_file_from_client = 0; //server is always in the same process +#endif + + if (ex->escaped->length() > 1 || ex->enclosed->length() > 1) + { + my_message(ER_WRONG_FIELD_TERMINATORS, + ER_THD(thd, ER_WRONG_FIELD_TERMINATORS), + MYF(0)); + DBUG_RETURN(TRUE); + } + + /* Report problems with non-ascii separators */ + if (!ex->escaped->is_ascii() || !ex->enclosed->is_ascii() || + !ex->field_term->is_ascii() || + !ex->line_term->is_ascii() || !ex->line_start->is_ascii()) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED, + ER_THD(thd, WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED)); + } + + if (open_and_lock_tables(thd, table_list, TRUE, 0)) + DBUG_RETURN(TRUE); + if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(TRUE); + if (thd->lex->handle_list_of_derived(table_list, DT_PREPARE)) + DBUG_RETURN(TRUE); + + if (setup_tables_and_check_access(thd, + &thd->lex->first_select_lex()->context, + &thd->lex->first_select_lex()-> + top_join_list, + table_list, + thd->lex->first_select_lex()->leaf_tables, + FALSE, + INSERT_ACL | UPDATE_ACL, + INSERT_ACL | UPDATE_ACL, FALSE)) + DBUG_RETURN(-1); + if (!table_list->table || // do not suport join view + !table_list->single_table_updatable() || // and derived tables + check_key_in_view(thd, table_list)) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "LOAD"); + DBUG_RETURN(TRUE); + } + if (table_list->is_multitable()) + { + my_error(ER_WRONG_USAGE, MYF(0), "Multi-table VIEW", "LOAD"); + DBUG_RETURN(TRUE); + } + if (table_list->prepare_where(thd, 0, TRUE) || + table_list->prepare_check_option(thd)) + { + DBUG_RETURN(TRUE); + } + thd_proc_info(thd, "Executing"); + /* + Let us emit an error if we are loading data to table which is used + in subselect in SET clause like we do it for INSERT. + + The main thing to fix to remove this restriction is to ensure that the + table is marked to be 'used for insert' in which case we should never + mark this table as 'const table' (ie, one that has only one row). + */ + if (unique_table(thd, table_list, table_list->next_global, 0)) + { + my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name.str, + "LOAD DATA"); + DBUG_RETURN(TRUE); + } + + table= table_list->table; + transactional_table= table->file->has_transactions_and_rollback(); +#ifndef EMBEDDED_LIBRARY + is_concurrent= (table_list->lock_type == TL_WRITE_CONCURRENT_INSERT); +#endif + + if (check_duplic_insert_without_overlaps(thd, table, handle_duplicates) != 0) + DBUG_RETURN(true); + + auto scope_cleaner = make_scope_exit( + [&fields_vars]() { + fields_vars.empty(); + } + ); + + if (!fields_vars.elements) + { + Field_iterator_table_ref field_iterator; + field_iterator.set(table_list); + for (; !field_iterator.end_of_fields(); field_iterator.next()) + { + if (field_iterator.field() && + field_iterator.field()->invisible > VISIBLE) + continue; + Item *item; + if (!(item= field_iterator.create_item(thd))) + DBUG_RETURN(TRUE); + fields_vars.push_back(item->real_item(), thd->mem_root); + } + bitmap_set_all(table->write_set); + /* + Let us also prepare SET clause, altough it is probably empty + in this case. + */ + if (setup_fields(thd, Ref_ptr_array(), + set_fields, MARK_COLUMNS_WRITE, 0, NULL, 0) || + setup_fields(thd, Ref_ptr_array(), + set_values, MARK_COLUMNS_READ, 0, NULL, 0)) + DBUG_RETURN(TRUE); + } + else + { // Part field list + scope_cleaner.release(); + /* TODO: use this conds for 'WITH CHECK OPTIONS' */ + if (setup_fields(thd, Ref_ptr_array(), + fields_vars, MARK_COLUMNS_WRITE, 0, NULL, 0) || + setup_fields(thd, Ref_ptr_array(), + set_fields, MARK_COLUMNS_WRITE, 0, NULL, 0) || + check_that_all_fields_are_given_values(thd, table, table_list)) + DBUG_RETURN(TRUE); + /* Fix the expressions in SET clause */ + if (setup_fields(thd, Ref_ptr_array(), + set_values, MARK_COLUMNS_READ, 0, NULL, 0)) + DBUG_RETURN(TRUE); + } + switch_to_nullable_trigger_fields(fields_vars, table); + switch_to_nullable_trigger_fields(set_fields, table); + switch_to_nullable_trigger_fields(set_values, table); + + table->prepare_triggers_for_insert_stmt_or_event(); + table->mark_columns_needed_for_insert(); + + Load_data_param param(ex->cs ? ex->cs : thd->variables.collation_database, + !ex->field_term->length() && !ex->enclosed->length()); + List_iterator_fast it(fields_vars); + Item *item; + + while ((item= it++)) + { + const Load_data_outvar *var= item->get_load_data_outvar_or_error(); + if (!var || var->load_data_add_outvar(thd, ¶m)) + DBUG_RETURN(true); + } + if (param.use_blobs() && !ex->line_term->length() && !ex->field_term->length()) + { + my_message(ER_BLOBS_AND_NO_TERMINATED, + ER_THD(thd, ER_BLOBS_AND_NO_TERMINATED), MYF(0)); + DBUG_RETURN(TRUE); + } + + /* We can't give an error in the middle when using LOCAL files */ + if (read_file_from_client && handle_duplicates == DUP_ERROR) + ignore= 1; + +#ifndef EMBEDDED_LIBRARY + if (read_file_from_client) + { + (void)net_request_file(&thd->net,ex->file_name); + file = -1; + } + else +#endif + { +#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS + ex->file_name+=dirname_length(ex->file_name); +#endif + if (!dirname_length(ex->file_name)) + { + strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS); + (void) fn_format(name, ex->file_name, name, "", + MY_RELATIVE_PATH | MY_UNPACK_FILENAME); + } + else + { + (void) fn_format(name, ex->file_name, mysql_real_data_home, "", + MY_RELATIVE_PATH | MY_UNPACK_FILENAME | + MY_RETURN_REAL_PATH); + } + + if (thd->rgi_slave) + { +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) + if (strncmp(thd->rgi_slave->rli->slave_patternload_file, name, + thd->rgi_slave->rli->slave_patternload_file_size)) + { + /* + LOAD DATA INFILE in the slave SQL Thread can only read from + --slave-load-tmpdir". This should never happen. Please, report a bug. + */ + + sql_print_error("LOAD DATA INFILE in the slave SQL Thread can only read from --slave-load-tmpdir. " \ + "Please, report a bug."); + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--slave-load-tmpdir"); + DBUG_RETURN(TRUE); + } +#else + /* + This is impossible and should never happen. + */ + DBUG_ASSERT(FALSE); +#endif + } + else if (!is_secure_file_path(name)) + { + /* Read only allowed from within dir specified by secure_file_priv */ + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv"); + DBUG_RETURN(TRUE); + } + +#if !defined(_WIN32) + MY_STAT stat_info; + if (!my_stat(name, &stat_info, MYF(MY_WME))) + DBUG_RETURN(TRUE); + + // if we are not in slave thread, the file must be: + if (!thd->slave_thread && + !((stat_info.st_mode & S_IFLNK) != S_IFLNK && // symlink + ((stat_info.st_mode & S_IFREG) == S_IFREG || // regular file + (stat_info.st_mode & S_IFIFO) == S_IFIFO))) // named pipe + { + my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name); + DBUG_RETURN(TRUE); + } + if ((stat_info.st_mode & S_IFIFO) == S_IFIFO) + is_fifo= 1; +#endif + if ((file= mysql_file_open(key_file_load, + name, O_RDONLY, MYF(MY_WME))) < 0) + + DBUG_RETURN(TRUE); + } + + COPY_INFO info; + bzero((char*) &info,sizeof(info)); + info.ignore= ignore; + info.handle_duplicates=handle_duplicates; + info.escape_char= (ex->escaped->length() && (ex->escaped_given() || + !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))) + ? (*ex->escaped)[0] : INT_MAX; + + READ_INFO read_info(thd, file, param, + *ex->field_term, *ex->line_start, + *ex->line_term, *ex->enclosed, + info.escape_char, read_file_from_client, is_fifo); + if (unlikely(read_info.error)) + { + if (file >= 0) + mysql_file_close(file, MYF(0)); // no files in net reading + DBUG_RETURN(TRUE); // Can't allocate buffers + } + +#ifndef EMBEDDED_LIBRARY + if (mysql_bin_log.is_open()) + { + read_info.cache.thd = thd; + read_info.cache.wrote_create_file = 0; + read_info.cache.last_pos_in_file = HA_POS_ERROR; + read_info.cache.log_delayed= transactional_table; + } +#endif /*!EMBEDDED_LIBRARY*/ + + thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */ + thd->cuted_fields=0L; + /* Skip lines if there is a line terminator */ + if (ex->line_term->length() && ex->filetype != FILETYPE_XML) + { + /* ex->skip_lines needs to be preserved for logging */ + while (skip_lines > 0) + { + skip_lines--; + if (read_info.next_line()) + break; + } + } + + thd_proc_info(thd, "Reading file"); + if (likely(!(error= MY_TEST(read_info.error)))) + { + table->reset_default_fields(); + table->next_number_field=table->found_next_number_field; + if (ignore || + handle_duplicates == DUP_REPLACE) + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + if (handle_duplicates == DUP_REPLACE && + (!table->triggers || + !table->triggers->has_delete_triggers())) + table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); + if (thd->locked_tables_mode <= LTM_LOCK_TABLES && + !table->s->long_unique_table) + table->file->ha_start_bulk_insert((ha_rows) 0); + table->copy_blobs=1; + + thd->abort_on_warning= !ignore && thd->is_strict_mode(); + thd->get_stmt_da()->reset_current_row_for_warning(1); + + bool create_lookup_handler= handle_duplicates != DUP_ERROR; + if ((table_list->table->file->ha_table_flags() & HA_DUPLICATE_POS)) + { + create_lookup_handler= true; + if ((error= table_list->table->file->ha_rnd_init_with_error(0))) + goto err; + } + table->file->prepare_for_insert(create_lookup_handler); + thd_progress_init(thd, 2); + fix_rownum_pointers(thd, thd->lex->current_select, &info.copied); + if (table_list->table->validate_default_values_of_unset_fields(thd)) + { + read_info.error= true; + error= 1; + } + else if (ex->filetype == FILETYPE_XML) /* load xml */ + error= read_xml_field(thd, info, table_list, fields_vars, + set_fields, set_values, read_info, + *(ex->line_term), skip_lines, ignore); + else if (read_info.is_fixed_length()) + error= read_fixed_length(thd, info, table_list, fields_vars, + set_fields, set_values, read_info, + skip_lines, ignore); + else + error= read_sep_field(thd, info, table_list, fields_vars, + set_fields, set_values, read_info, + *ex->enclosed, skip_lines, ignore); + + if (table_list->table->file->ha_table_flags() & HA_DUPLICATE_POS) + table_list->table->file->ha_rnd_end(); + + thd_proc_info(thd, "End bulk insert"); + if (likely(!error)) + thd_progress_next_stage(thd); + if (thd->locked_tables_mode <= LTM_LOCK_TABLES && + table->file->ha_end_bulk_insert() && !error) + { + table->file->print_error(my_errno, MYF(0)); + error= 1; + } + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + table->next_number_field=0; + } + if (file >= 0) + mysql_file_close(file, MYF(0)); + free_blobs(table); /* if pack_blob was used */ + table->copy_blobs=0; + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + /* + simulated killing in the middle of per-row loop + must be effective for binlogging + */ + DBUG_EXECUTE_IF("simulate_kill_bug27571", + { + error=1; + thd->set_killed(KILL_QUERY); + };); + +#ifndef EMBEDDED_LIBRARY + killed_status= (error == 0) ? NOT_KILLED : thd->killed; +#endif + + /* + We must invalidate the table in query cache before binlog writing and + ha_autocommit_... + */ + query_cache_invalidate3(thd, table_list, 0); + if (error) + { + if (read_file_from_client) + read_info.skip_data_till_eof(); + +#ifndef EMBEDDED_LIBRARY + if (mysql_bin_log.is_open()) + { + { + /* + Make sure last block (the one which caused the error) gets + logged. + */ + log_loaded_block(&read_info.cache, 0, 0); + /* If the file was not empty, wrote_create_file is true */ + if (read_info.cache.wrote_create_file) + { + int errcode= query_error_code(thd, killed_status == NOT_KILLED); + + /* since there is already an error, the possible error of + writing binary log will be ignored */ + if (thd->transaction->stmt.modified_non_trans_table) + (void) write_execute_load_query_log_event(thd, ex, + table_list->db.str, + table_list->table_name.str, + is_concurrent, + handle_duplicates, ignore, + transactional_table, + errcode); + else + { + Delete_file_log_event d(thd, db, transactional_table); + (void) mysql_bin_log.write(&d); + } + } + } + } +#endif /*!EMBEDDED_LIBRARY*/ + error= -1; // Error on read + goto err; + } + sprintf(name, ER_THD(thd, ER_LOAD_INFO), + (ulong) info.records, (ulong) info.deleted, + (ulong) (info.records - info.copied), + (long) thd->get_stmt_da()->current_statement_warn_count()); + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); +#ifndef EMBEDDED_LIBRARY + if (mysql_bin_log.is_open()) + { + /* + We need to do the job that is normally done inside + binlog_query() here, which is to ensure that the pending event + is written before tables are unlocked and before any other + events are written. We also need to update the table map + version for the binary log to mark that table maps are invalid + after this point. + */ + if (thd->is_current_stmt_binlog_format_row()) + error= thd->binlog_flush_pending_rows_event(TRUE, transactional_table); + else + { + /* + As already explained above, we need to call log_loaded_block() to have + the last block logged + */ + log_loaded_block(&read_info.cache, 0, 0); + if (read_info.cache.wrote_create_file) + { + int errcode= query_error_code(thd, killed_status == NOT_KILLED); + error= write_execute_load_query_log_event(thd, ex, + table_list->db.str, + table_list->table_name.str, + is_concurrent, + handle_duplicates, ignore, + transactional_table, + errcode); + } + + /* + Flushing the IO CACHE while writing the execute load query log event + may result in error (for instance, because the max_binlog_size has been + reached, and rotation of the binary log failed). + */ + error= error || mysql_bin_log.get_log_file()->error; + } + if (unlikely(error)) + goto err; + } +#endif /*!EMBEDDED_LIBRARY*/ + + /* ok to client sent only after binlog write and engine commit */ + my_ok(thd, info.copied + info.deleted, 0L, name); +err: + DBUG_ASSERT(transactional_table || !(info.copied || info.deleted) || + thd->transaction->stmt.modified_non_trans_table); + table->file->ha_release_auto_increment(); + table->auto_increment_field_not_null= FALSE; + thd->abort_on_warning= 0; + DBUG_RETURN(error); +} + + +#ifndef EMBEDDED_LIBRARY + +/* Not a very useful function; just to avoid duplication of code */ +static bool write_execute_load_query_log_event(THD *thd, const sql_exchange* ex, + const char* db_arg, /* table's database */ + const char* table_name_arg, + bool is_concurrent, + enum enum_duplicates duplicates, + bool ignore, + bool transactional_table, + int errcode) +{ + char *load_data_query; + my_off_t fname_start, + fname_end; + List fv; + Item *item, *val; + int n; + const char *tdb= (thd->db.str != NULL ? thd->db.str : db_arg); + const char *qualify_db= NULL; + char command_buffer[1024]; + String query_str(command_buffer, sizeof(command_buffer), + system_charset_info); + + Load_log_event lle(thd, ex, tdb, table_name_arg, fv, is_concurrent, + duplicates, ignore, transactional_table); + + /* + force in a LOCAL if there was one in the original. + */ + if (thd->lex->local_file) + lle.set_fname_outside_temp_buf(ex->file_name, strlen(ex->file_name)); + + query_str.length(0); + if (!thd->db.str || strcmp(db_arg, thd->db.str)) + { + /* + If used database differs from table's database, + prefix table name with database name so that it + becomes a FQ name. + */ + qualify_db= db_arg; + } + lle.print_query(thd, FALSE, (const char*) ex->cs ? ex->cs->cs_name.str : NULL, + &query_str, &fname_start, &fname_end, qualify_db); + + /* + prepare fields-list and SET if needed; print_query won't do that for us. + */ + if (!thd->lex->field_list.is_empty()) + { + List_iterator li(thd->lex->field_list); + + query_str.append(STRING_WITH_LEN(" (")); + n= 0; + + while ((item= li++)) + { + if (n++) + query_str.append(STRING_WITH_LEN(", ")); + const Load_data_outvar *var= item->get_load_data_outvar(); + DBUG_ASSERT(var); + var->load_data_print_for_log_event(thd, &query_str); + } + query_str.append(')'); + } + + if (!thd->lex->update_list.is_empty()) + { + List_iterator lu(thd->lex->update_list); + List_iterator lv(thd->lex->value_list); + + query_str.append(STRING_WITH_LEN(" SET ")); + n= 0; + + while ((item= lu++)) + { + val= lv++; + if (n++) + query_str.append(STRING_WITH_LEN(", ")); + append_identifier(thd, &query_str, &item->name); + query_str.append(&val->name); + } + } + + if (!(load_data_query= (char *)thd->strmake(query_str.ptr(), query_str.length()))) + return TRUE; + + Execute_load_query_log_event + e(thd, load_data_query, query_str.length(), + (uint) (fname_start - 1), (uint) fname_end, + (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : + (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), + transactional_table, FALSE, FALSE, errcode); + return mysql_bin_log.write(&e); +} + +#endif + +/**************************************************************************** +** Read of rows of fixed size + optional garage + optonal newline +****************************************************************************/ + +static int +read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, READ_INFO &read_info, + ulong skip_lines, bool ignore_check_option_errors) +{ + List_iterator_fast it(fields_vars); + Item *item; + TABLE *table= table_list->table; + bool err, progress_reports; + ulonglong counter, time_to_report_progress; + DBUG_ENTER("read_fixed_length"); + + counter= 0; + time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; + progress_reports= 1; + if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0) + progress_reports= 0; + + while (!read_info.read_fixed_length()) + { + if (thd->killed) + { + thd->send_kill_message(); + DBUG_RETURN(1); + } + if (progress_reports) + { + thd->progress.counter= read_info.position(); + if (++counter >= time_to_report_progress) + { + time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10; + thd_progress_report(thd, thd->progress.counter, + thd->progress.max_counter); + } + } + if (skip_lines) + { + /* + We could implement this with a simple seek if: + - We are not using DATA INFILE LOCAL + - escape character is "" + - line starting prefix is "" + */ + skip_lines--; + continue; + } + it.rewind(); + uchar *pos=read_info.row_start; +#ifdef HAVE_valgrind + read_info.row_end[0]=0; +#endif + + restore_record(table, s->default_values); + + while ((item= it++)) + { + Load_data_outvar *dst= item->get_load_data_outvar(); + DBUG_ASSERT(dst); + if (pos == read_info.row_end) + { + if (dst->load_data_set_no_data(thd, &read_info)) + DBUG_RETURN(1); + } + else + { + uint length, fixed_length= dst->load_data_fixed_length(); + uchar save_chr; + if ((length=(uint) (read_info.row_end - pos)) > fixed_length) + length= fixed_length; + save_chr= pos[length]; pos[length]= '\0'; // Safeguard aganst malloc + dst->load_data_set_value(thd, (const char *) pos, length, &read_info); + pos[length]= save_chr; + if ((pos+= length) > read_info.row_end) + pos= read_info.row_end; // Fills rest with space + } + } + if (pos != read_info.row_end) + { + thd->cuted_fields++; /* To long row */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_TOO_MANY_RECORDS, + ER_THD(thd, ER_WARN_TOO_MANY_RECORDS), + thd->get_stmt_da()->current_row_for_warning()); + } + + if (thd->killed || + fill_record_n_invoke_before_triggers(thd, table, set_fields, set_values, + ignore_check_option_errors, + TRG_EVENT_INSERT)) + DBUG_RETURN(1); + + switch (table_list->view_check_option(thd, ignore_check_option_errors)) { + case VIEW_CHECK_SKIP: + read_info.next_line(); + goto continue_loop; + case VIEW_CHECK_ERROR: + DBUG_RETURN(-1); + } + + err= write_record(thd, table, &info); + table->auto_increment_field_not_null= FALSE; + if (err) + DBUG_RETURN(1); + + /* + We don't need to reset auto-increment field since we are restoring + its default value at the beginning of each loop iteration. + */ + if (read_info.next_line()) // Skip to next line + break; + if (read_info.line_cuted) + { + thd->cuted_fields++; /* To long row */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_TOO_MANY_RECORDS, + ER_THD(thd, ER_WARN_TOO_MANY_RECORDS), + thd->get_stmt_da()->current_row_for_warning()); + } + thd->get_stmt_da()->inc_current_row_for_warning(); +continue_loop:; + } + DBUG_RETURN(MY_TEST(read_info.error)); +} + + +static int +read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, READ_INFO &read_info, + String &enclosed, ulong skip_lines, + bool ignore_check_option_errors) +{ + List_iterator_fast it(fields_vars); + Item *item; + TABLE *table= table_list->table; + uint enclosed_length; + bool err, progress_reports; + ulonglong counter, time_to_report_progress; + DBUG_ENTER("read_sep_field"); + + enclosed_length=enclosed.length(); + + counter= 0; + time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; + progress_reports= 1; + if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0) + progress_reports= 0; + + for (;;it.rewind()) + { + if (thd->killed) + { + thd->send_kill_message(); + DBUG_RETURN(1); + } + + if (progress_reports) + { + thd->progress.counter= read_info.position(); + if (++counter >= time_to_report_progress) + { + time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10; + thd_progress_report(thd, thd->progress.counter, + thd->progress.max_counter); + } + } + restore_record(table, s->default_values); + + while ((item= it++)) + { + uint length; + uchar *pos; + if (read_info.read_field()) + break; + + /* If this line is to be skipped we don't want to fill field or var */ + if (skip_lines) + continue; + + pos=read_info.row_start; + length=(uint) (read_info.row_end-pos); + + Load_data_outvar *dst= item->get_load_data_outvar_or_error(); + DBUG_ASSERT(dst); + + if ((!read_info.enclosed && + (enclosed_length && length == 4 && + !memcmp(pos, STRING_WITH_LEN("NULL")))) || + (length == 1 && read_info.found_null)) + { + if (dst->load_data_set_null(thd, &read_info)) + DBUG_RETURN(1); + } + else + { + read_info.row_end[0]= 0; // Safe to change end marker + if (dst->load_data_set_value(thd, (const char *) pos, length, &read_info)) + DBUG_RETURN(1); + } + } + + if (unlikely(thd->is_error())) + read_info.error= 1; + if (unlikely(read_info.error)) + break; + + if (skip_lines) + { + skip_lines--; + continue; + } + if (item) + { + /* Have not read any field, thus input file is simply ended */ + if (item == fields_vars.head()) + break; + for (; item ; item= it++) + { + Load_data_outvar *dst= item->get_load_data_outvar_or_error(); + DBUG_ASSERT(dst); + if (unlikely(dst->load_data_set_no_data(thd, &read_info))) + DBUG_RETURN(1); + } + } + + if (unlikely(thd->killed) || + unlikely(fill_record_n_invoke_before_triggers(thd, table, set_fields, + set_values, + ignore_check_option_errors, + TRG_EVENT_INSERT))) + DBUG_RETURN(1); + + switch (table_list->view_check_option(thd, + ignore_check_option_errors)) { + case VIEW_CHECK_SKIP: + read_info.next_line(); + goto continue_loop; + case VIEW_CHECK_ERROR: + DBUG_RETURN(-1); + } + + err= write_record(thd, table, &info); + table->auto_increment_field_not_null= FALSE; + if (err) + DBUG_RETURN(1); + /* + We don't need to reset auto-increment field since we are restoring + its default value at the beginning of each loop iteration. + */ + if (read_info.next_line()) // Skip to next line + break; + if (read_info.line_cuted) + { + thd->cuted_fields++; /* To long row */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_TOO_MANY_RECORDS, + ER_THD(thd, ER_WARN_TOO_MANY_RECORDS), + thd->get_stmt_da()->current_row_for_warning()); + if (thd->killed) + DBUG_RETURN(1); + } + thd->get_stmt_da()->inc_current_row_for_warning(); +continue_loop:; + } + DBUG_RETURN(MY_TEST(read_info.error)); +} + + +/**************************************************************************** +** Read rows in xml format +****************************************************************************/ +static int +read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values, READ_INFO &read_info, + String &row_tag, ulong skip_lines, + bool ignore_check_option_errors) +{ + List_iterator_fast it(fields_vars); + Item *item; + TABLE *table= table_list->table; + bool no_trans_update_stmt; + DBUG_ENTER("read_xml_field"); + + no_trans_update_stmt= !table->file->has_transactions_and_rollback(); + + for ( ; ; it.rewind()) + { + bool err; + if (thd->killed) + { + thd->send_kill_message(); + DBUG_RETURN(1); + } + + // read row tag and save values into tag list + if (read_info.read_xml(thd)) + break; + + List_iterator_fast xmlit(read_info.taglist); + xmlit.rewind(); + XML_TAG *tag= NULL; + +#ifndef DBUG_OFF + DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines)); + while ((tag= xmlit++)) + { + DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'", + tag->level, tag->field.c_ptr(), + tag->value.c_ptr())); + } +#endif + + restore_record(table, s->default_values); + + while ((item= it++)) + { + /* If this line is to be skipped we don't want to fill field or var */ + if (skip_lines) + continue; + + /* find field in tag list */ + xmlit.rewind(); + tag= xmlit++; + + while(tag && strcmp(tag->field.c_ptr(), item->name.str) != 0) + tag= xmlit++; + + Load_data_outvar *dst= item->get_load_data_outvar_or_error(); + DBUG_ASSERT(dst); + if (!tag ? dst->load_data_set_null(thd, &read_info) : + dst->load_data_set_value(thd, tag->value.ptr(), + tag->value.length(), + &read_info)) + DBUG_RETURN(1); + } + + if (unlikely(read_info.error)) + break; + + if (skip_lines) + { + skip_lines--; + continue; + } + + DBUG_ASSERT(!item); + + if (thd->killed || + fill_record_n_invoke_before_triggers(thd, table, set_fields, set_values, + ignore_check_option_errors, + TRG_EVENT_INSERT)) + DBUG_RETURN(1); + + switch (table_list->view_check_option(thd, + ignore_check_option_errors)) { + case VIEW_CHECK_SKIP: + read_info.next_line(); + goto continue_loop; + case VIEW_CHECK_ERROR: + DBUG_RETURN(-1); + } + + err= write_record(thd, table, &info); + table->auto_increment_field_not_null= false; + if (err) + DBUG_RETURN(1); + + /* + We don't need to reset auto-increment field since we are restoring + its default value at the beginning of each loop iteration. + */ + thd->transaction->stmt.modified_non_trans_table= no_trans_update_stmt; + thd->get_stmt_da()->inc_current_row_for_warning(); + continue_loop:; + } + DBUG_RETURN(MY_TEST(read_info.error) || thd->is_error()); +} /* load xml end */ + + +/* Unescape all escape characters, mark \N as null */ + +char +READ_INFO::unescape(char chr) +{ + /* keep this switch synchornous with the ESCAPE_CHARS macro */ + switch(chr) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'b': return '\b'; + case '0': return 0; // Ascii null + case 'Z': return '\032'; // Win32 end of file + case 'N': found_null=1; + + /* fall through */ + default: return chr; + } +} + + +/* + Read a line using buffering + If last line is empty (in line mode) then it isn't outputed +*/ + + +READ_INFO::READ_INFO(THD *thd, File file_par, + const Load_data_param ¶m, + String &field_term, String &line_start, String &line_term, + String &enclosed_par, int escape, bool get_it_from_net, + bool is_fifo) + :Load_data_param(param), + file(file_par), + m_field_term(field_term), m_line_term(line_term), m_line_start(line_start), + escape_char(escape), found_end_of_line(false), eof(false), + error(false), line_cuted(false), found_null(false) +{ + data.set_thread_specific(); + /* + Field and line terminators must be interpreted as sequence of unsigned char. + Otherwise, non-ascii terminators will be negative on some platforms, + and positive on others (depending on the implementation of char). + */ + + level= 0; /* for load xml */ + start_of_line= line_start.length() != 0; + /* If field_terminator == line_terminator, don't use line_terminator */ + if (m_field_term.eq(m_line_term)) + m_line_term.reset(); + enclosed_char= enclosed_par.length() ? (uchar) enclosed_par[0] : INT_MAX; + + /* Set of a stack for unget if long terminators */ + uint length= MY_MAX(charset()->mbmaxlen, MY_MAX(m_field_term.length(), + m_line_term.length())) + 1; + set_if_bigger(length,line_start.length()); + stack= stack_pos= (int*) thd->alloc(sizeof(int) * length); + + DBUG_ASSERT(m_fixed_length < UINT_MAX32); + if (data.reserve((size_t) m_fixed_length)) + error=1; /* purecov: inspected */ + else + { + if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0, + (get_it_from_net) ? READ_NET : + (is_fifo ? READ_FIFO : READ_CACHE),0L,1, + MYF(MY_WME | MY_THREAD_SPECIFIC))) + { + error=1; + } + else + { +#ifndef EMBEDDED_LIBRARY + if (get_it_from_net) + cache.read_function = _my_b_net_read; + + if (mysql_bin_log.is_open()) + { + cache.real_read_function= cache.read_function; + cache.read_function= log_loaded_block; + } +#endif + } + } +} + + +READ_INFO::~READ_INFO() +{ + ::end_io_cache(&cache); + List_iterator xmlit(taglist); + XML_TAG *t; + while ((t= xmlit++)) + delete(t); +} + + +inline bool READ_INFO::terminator(const uchar *ptr, uint length) +{ + int chr=0; // Keep gcc happy + uint i; + for (i=1 ; i < length ; i++) + { + if ((chr=GET) != *(uchar*)++ptr) + { + break; + } + } + if (i == length) + return true; + PUSH(chr); + while (i-- > 1) + PUSH(*--ptr); + return false; +} + + +/** + Read a field. + + The data in the loaded file was presumably escaped using + - either select_export::send_data() OUTFILE + - or mysql_real_escape_string() + using the same character set with the one specified in the current + "LOAD DATA INFILE ... CHARACTER SET ..." (or the default LOAD character set). + + Note, non-escaped multi-byte characters are scanned as a single entity. + This is needed to correctly distinguish between: + - 0x5C as an escape character versus + - 0x5C as the second byte in a multi-byte sequence (big5, cp932, gbk, sjis) + + Parts of escaped multi-byte characters are scanned on different loop + iterations. See the comment about 0x5C handling in select_export::send_data() + in sql_class.cc. + + READ_INFO::read_field() does not check wellformedness. + Raising wellformedness errors or warnings in READ_INFO::read_field() + would be wrong, as the data after unescaping can go into a BLOB field, + or into a TEXT/VARCHAR field of a different character set. + The loop below only makes sure to revert escaping made by + select_export::send_data() or mysql_real_escape_string(). + Wellformedness is checked later, during Field::store(str,length,cs) time. + + Note, in some cases users can supply data which did not go through + escaping properly. For example, utf8 "\" + (backslash followed by LATIN SMALL LETTER A WITH DIAERESIS) + is improperly escaped data that could not be generated by + select_export::send_data() / mysql_real_escape_string(): + - either there should be two backslashes: "\\" + - or there should be no backslashes at all: "" + "\" and " are scanned on two different loop iterations and + store "" into the field. + + Note, adding useless escapes before multi-byte characters like in the + example above is safe in case of utf8, but is not safe in case of + character sets that have escape_with_backslash_is_dangerous==TRUE, + such as big5, cp932, gbk, sjis. This can lead to mis-interpretation of the + data. Suppose we have a big5 character "<5C>" followed by <30> (digit 0). + If we add an extra escape before this sequence, then we'll get + <5C><5C><30>. The first loop iteration will turn <5C> into . + The second loop iteration will turn <5C><30> into <30>. + So the program that generates a dump file for further use with LOAD DATA + must make sure to use escapes properly. +*/ + +int READ_INFO::read_field() +{ + int chr,found_enclosed_char; + + found_null=0; + if (found_end_of_line) + return 1; // One have to call next_line + + /* Skip until we find 'line_start' */ + + if (start_of_line) + { // Skip until line_start + start_of_line=0; + if (find_start_of_fields()) + return 1; + } + if ((chr=GET) == my_b_EOF) + { + found_end_of_line=eof=1; + return 1; + } + data.length(0); + if (chr == enclosed_char) + { + found_enclosed_char=enclosed_char; + data.append(chr); // If error + } + else + { + found_enclosed_char= INT_MAX; + PUSH(chr); + } + + for (;;) + { + // Make sure we have enough space for the longest multi-byte character. + while (data.length() + charset()->mbmaxlen <= data.alloced_length()) + { + chr = GET; + if (chr == my_b_EOF) + goto found_eof; + if (chr == escape_char) + { + if ((chr=GET) == my_b_EOF) + { + data.append(escape_char); + goto found_eof; + } + /* + When escape_char == enclosed_char, we treat it like we do for + handling quotes in SQL parsing -- you can double-up the + escape_char to include it literally, but it doesn't do escapes + like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"' + with data like: "fie""ld1", "field2" + */ + if (escape_char != enclosed_char || chr == escape_char) + { + data.append(unescape((char) chr)); + continue; + } + PUSH(chr); + chr= escape_char; + } +#ifdef ALLOW_LINESEPARATOR_IN_STRINGS + if (chr == m_line_term.initial_byte()) +#else + if (chr == m_line_term.initial_byte() && found_enclosed_char == INT_MAX) +#endif + { + if (terminator(m_line_term)) + { // Maybe unexpected linefeed + enclosed=0; + found_end_of_line=1; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); + return 0; + } + } + if (chr == found_enclosed_char) + { + if ((chr=GET) == found_enclosed_char) + { // Remove dupplicated + data.append(chr); + continue; + } + // End of enclosed field if followed by field_term or line_term + if (chr == my_b_EOF || terminator(chr, m_line_term)) + { + /* Maybe unexpected linefeed */ + enclosed=1; + found_end_of_line=1; + row_start= (uchar *) data.ptr() + 1; + row_end= (uchar *) data.end(); + return 0; + } + if (terminator(chr, m_field_term)) + { + enclosed=1; + row_start= (uchar *) data.ptr() + 1; + row_end= (uchar *) data.end(); + return 0; + } + /* + The string didn't terminate yet. + Store back next character for the loop + */ + PUSH(chr); + /* copy the found term character to 'to' */ + chr= found_enclosed_char; + } + else if (chr == m_field_term.initial_byte() && + found_enclosed_char == INT_MAX) + { + if (terminator(m_field_term)) + { + enclosed=0; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); + return 0; + } + } + data.append(chr); + if (charset()->use_mb() && read_mbtail(&data)) + goto found_eof; + } + /* + ** We come here if buffer is too small. Enlarge it and continue + */ + if (data.reserve(IO_SIZE)) + return (error= 1); + } + +found_eof: + enclosed=0; + found_end_of_line=eof=1; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); + return 0; +} + +/* + Read a row with fixed length. + + NOTES + The row may not be fixed size on disk if there are escape + characters in the file. + + IMPLEMENTATION NOTE + One can't use fixed length with multi-byte charset ** + + RETURN + 0 ok + 1 error +*/ + +int READ_INFO::read_fixed_length() +{ + int chr; + if (found_end_of_line) + return 1; // One have to call next_line + + if (start_of_line) + { // Skip until line_start + start_of_line=0; + if (find_start_of_fields()) + return 1; + } + + for (data.length(0); data.length() < m_fixed_length ; ) + { + if ((chr=GET) == my_b_EOF) + goto found_eof; + if (chr == escape_char) + { + if ((chr=GET) == my_b_EOF) + { + data.append(escape_char); + goto found_eof; + } + data.append((uchar) unescape((char) chr)); + continue; + } + if (terminator(chr, m_line_term)) + { // Maybe unexpected linefeed + found_end_of_line= true; + break; + } + data.append(chr); + } + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); // Found full line + return 0; + +found_eof: + found_end_of_line=eof=1; + row_start= (uchar *) data.ptr(); + row_end= (uchar *) data.end(); + return data.length() == 0 ? 1 : 0; +} + + +int READ_INFO::next_line() +{ + line_cuted=0; + start_of_line= m_line_start.length() != 0; + if (found_end_of_line || eof) + { + found_end_of_line=0; + return eof; + } + found_end_of_line=0; + if (!m_line_term.length()) + return 0; // No lines + for (;;) + { + int chlen; + char buf[MY_CS_MBMAXLEN]; + + if (getbyte(&buf[0])) + return 1; // EOF + + if (charset()->use_mb() && + (chlen= charset()->charlen(buf, buf + 1)) != 1) + { + uint i; + for (i= 1; MY_CS_IS_TOOSMALL(chlen); ) + { + DBUG_ASSERT(i < sizeof(buf)); + DBUG_ASSERT(chlen != 1); + if (getbyte(&buf[i++])) + return 1; // EOF + chlen= charset()->charlen(buf, buf + i); + } + + /* + Either a complete multi-byte sequence, + or a broken byte sequence was found. + Check if the sequence is a prefix of the "LINES TERMINATED BY" string. + */ + if ((uchar) buf[0] == m_line_term.initial_byte() && + i <= m_line_term.length() && + !memcmp(buf, m_line_term.ptr(), i)) + { + if (m_line_term.length() == i) + { + /* + We found a "LINES TERMINATED BY" string that consists + of a single multi-byte character. + */ + return 0; + } + /* + buf[] is a prefix of "LINES TERMINATED BY". + Now check the suffix. Length of the suffix of line_term_ptr + that still needs to be checked is (line_term_length - i). + Note, READ_INFO::terminator() assumes that the leftmost byte of the + argument is already scanned from the file and is checked to + be a known prefix (e.g. against line_term.initial_char()). + So we need to pass one extra byte. + */ + if (terminator(m_line_term.ptr() + i - 1, + m_line_term.length() - i + 1)) + return 0; + } + /* + Here we have a good multi-byte sequence or a broken byte sequence, + and the sequence is not equal to "LINES TERMINATED BY". + No needs to check for escape_char, because: + - multi-byte escape characters in "FIELDS ESCAPED BY" are not + supported and are rejected at parse time. + - broken single-byte sequences are not recognized as escapes, + they are considered to be a part of the data and are converted to + question marks. + */ + line_cuted= true; + continue; + } + if (buf[0] == escape_char) + { + line_cuted= true; + if (GET == my_b_EOF) + return 1; + continue; + } + if (terminator(buf[0], m_line_term)) + return 0; + line_cuted= true; + } +} + + +bool READ_INFO::find_start_of_fields() +{ + for (int chr= GET ; chr != my_b_EOF ; chr= GET) + { + if (terminator(chr, m_line_start)) + return false; + } + return (found_end_of_line= eof= true); +} + + +/* + Clear taglist from tags with a specified level +*/ +int READ_INFO::clear_level(int level_arg) +{ + DBUG_ENTER("READ_INFO::read_xml clear_level"); + List_iterator xmlit(taglist); + xmlit.rewind(); + XML_TAG *tag; + + while ((tag= xmlit++)) + { + if(tag->level >= level_arg) + { + xmlit.remove(); + delete tag; + } + } + DBUG_RETURN(0); +} + + +/* + Convert an XML entity to Unicode value. + Return -1 on error; +*/ +static int +my_xml_entity_to_char(const char *name, uint length) +{ + if (length == 2) + { + if (!memcmp(name, "gt", length)) + return '>'; + if (!memcmp(name, "lt", length)) + return '<'; + } + else if (length == 3) + { + if (!memcmp(name, "amp", length)) + return '&'; + } + else if (length == 4) + { + if (!memcmp(name, "quot", length)) + return '"'; + if (!memcmp(name, "apos", length)) + return '\''; + } + return -1; +} + + +/** + @brief Convert newline, linefeed, tab to space + + @param chr character + + @details According to the "XML 1.0" standard, + only space (#x20) characters, carriage returns, + line feeds or tabs are considered as spaces. + Convert all of them to space (#x20) for parsing simplicity. +*/ +static int +my_tospace(int chr) +{ + return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr; +} + + +/* + Read an xml value: handle multibyte and xml escape +*/ +int READ_INFO::read_value(int delim, String *val) +{ + int chr; + String tmp; + + for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF; chr= GET) + { + if(chr == '&') + { + tmp.length(0); + for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET)) + { + if (chr == my_b_EOF) + return chr; + tmp.append(chr); + } + if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0) + val->append(chr); + else + { + val->append('&'); + val->append(tmp); + val->append(';'); + } + } + else + { + val->append(chr); + if (charset()->use_mb() && read_mbtail(val)) + return my_b_EOF; + } + } + return my_tospace(chr); +} + + +/* + Read a record in xml format + tags and attributes are stored in taglist + when tag set in ROWS IDENTIFIED BY is closed, we are ready and return +*/ +int READ_INFO::read_xml(THD *thd) +{ + DBUG_ENTER("READ_INFO::read_xml"); + int chr, chr2, chr3; + int delim= 0; + String tag, attribute, value; + bool in_tag= false; + + tag.length(0); + attribute.length(0); + value.length(0); + + for (chr= my_tospace(GET); chr != my_b_EOF ; ) + { + switch(chr){ + case '<': /* read tag */ + /* TODO: check if this is a comment */ + chr= my_tospace(GET); + if(chr == '!') + { + chr2= GET; + chr3= GET; + + if(chr2 == '-' && chr3 == '-') + { + chr2= 0; + chr3= 0; + chr= my_tospace(GET); + + while(chr != '>' || chr2 != '-' || chr3 != '-') + { + if(chr == '-') + { + chr3= chr2; + chr2= chr; + } + else if (chr2 == '-') + { + chr2= 0; + chr3= 0; + } + chr= my_tospace(GET); + if (chr == my_b_EOF) + goto found_eof; + } + break; + } + } + + tag.length(0); + while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF) + { + if(chr != delim) /* fix for the '' - stored in line_term + if((tag.length() == m_line_term.length() - 2) && + (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0)) + { + DBUG_PRINT("read_xml", ("start-of-row: %i %s %s", + level,tag.c_ptr_safe(), m_line_term.ptr())); + } + + if(chr == ' ' || chr == '>') + { + level++; + clear_level(level + 1); + } + + if (chr == ' ') + in_tag= true; + else + in_tag= false; + break; + + case ' ': /* read attribute */ + while(chr == ' ') /* skip blanks */ + chr= my_tospace(GET); + + if(!in_tag) + break; + + while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF) + { + attribute.append(chr); + chr= my_tospace(GET); + } + break; + + case '>': /* end tag - read tag value */ + in_tag= false; + chr= read_value('<', &value); + if(chr == my_b_EOF) + goto found_eof; + + /* save value to list */ + if (tag.length() > 0 && value.length() > 0) + { + DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s", + level,tag.c_ptr_safe(), value.c_ptr_safe())); + XML_TAG *tmp= new XML_TAG(level, tag, value); + if (!tmp || taglist.push_front(tmp, thd->mem_root)) + DBUG_RETURN(1); // End of memory + } + tag.length(0); + value.length(0); + attribute.length(0); + break; + + case '/': /* close tag */ + chr= my_tospace(GET); + /* Decrease the 'level' only when (i) It's not an */ + /* (without space) empty tag i.e. or, (ii) */ + /* It is of format */ + if(chr != '>' || in_tag) + { + level--; + in_tag= false; + } + if(chr != '>') /* if this is an empty tag */ + tag.length(0); /* we should keep tag value */ + while(chr != '>' && chr != my_b_EOF) + { + tag.append(chr); + chr= my_tospace(GET); + } + + if((tag.length() == m_line_term.length() - 2) && + (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0)) + { + DBUG_PRINT("read_xml", ("found end-of-row %i %s", + level, tag.c_ptr_safe())); + DBUG_RETURN(0); //normal return + } + chr= my_tospace(GET); + break; + + case '=': /* attribute name end - read the value */ + //check for tag field and attribute name + if(!strcmp(tag.c_ptr_safe(), "field") && + !strcmp(attribute.c_ptr_safe(), "name")) + { + /* + this is format xx + where actual fieldname is in attribute + */ + delim= my_tospace(GET); + tag.length(0); + attribute.length(0); + chr= '<'; /* we pretend that it is a tag */ + level--; + break; + } + + //check for " or ' + chr= GET; + if (chr == my_b_EOF) + goto found_eof; + if(chr == '"' || chr == '\'') + { + delim= chr; + } + else + { + delim= ' '; /* no delimiter, use space */ + PUSH(chr); + } + + chr= read_value(delim, &value); + if (attribute.length() > 0 && value.length() > 0) + { + DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s", + level + 1, + attribute.c_ptr_safe(), + value.c_ptr_safe())); + XML_TAG *tmp= new XML_TAG(level + 1, attribute, value); + if (!tmp || taglist.push_front(tmp, thd->mem_root)) + DBUG_RETURN(1); // End of memory + } + attribute.length(0); + value.length(0); + if (chr != ' ') + chr= my_tospace(GET); + break; + + default: + chr= my_tospace(GET); + } /* end switch */ + } /* end while */ + +found_eof: + DBUG_PRINT("read_xml",("Found eof")); + eof= 1; + DBUG_RETURN(1); +} diff --git a/sql/sql_load.h b/sql/sql_load.h new file mode 100644 index 00000000..8413d278 --- /dev/null +++ b/sql/sql_load.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_LOAD_INCLUDED +#define SQL_LOAD_INCLUDED + +#include "sql_list.h" /* List */ + +class Item; + +#include "sql_class.h" /* enum_duplicates */ + +class sql_exchange; + +int mysql_load(THD *thd, const sql_exchange *ex, TABLE_LIST *table_list, + List &fields_vars, List &set_fields, + List &set_values_list, + enum enum_duplicates handle_duplicates, bool ignore, + bool local_file); + + +#endif /* SQL_LOAD_INCLUDED */ diff --git a/sql/sql_locale.cc b/sql/sql_locale.cc new file mode 100644 index 00000000..713ee1fe --- /dev/null +++ b/sql/sql_locale.cc @@ -0,0 +1,3581 @@ +/* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + The beginnings of locale(7) support. + Sponsored for subset of LC_TIME support, WorkLog entry 2928, -- Josh Chamas + + !! This file is built from my_locale.pl !! +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_locale.h" +#include "sql_class.h" // THD + + +enum err_msgs_index +{ + en_US= 0, zh_CN, cs_CZ, da_DK, nl_NL, et_EE, fr_FR, de_DE, el_GR, hu_HU, it_IT, + ja_JP, ko_KR, no_NO, nn_NO, pl_PL, pt_PT, ro_RO, ru_RU, sr_RS, sk_SK, + es_ES, sv_SE, uk_UA, hi_IN, ka_GE +} ERR_MSGS_INDEX; + + +MY_LOCALE_ERRMSGS global_errmsgs[]= +{ + {"english", NULL}, + {"chinese", NULL}, + {"czech", NULL}, + {"danish", NULL}, + {"dutch", NULL}, + {"estonian", NULL}, + {"french", NULL}, + {"german", NULL}, + {"greek", NULL}, + {"hungarian", NULL}, + {"italian", NULL}, + {"japanese", NULL}, + {"korean", NULL}, + {"norwegian", NULL}, + {"norwegian-ny", NULL}, + {"polish", NULL}, + {"portuguese", NULL}, + {"romanian", NULL}, + {"russian", NULL}, + {"serbian", NULL}, + {"slovak", NULL}, + {"spanish", NULL}, + {"swedish", NULL}, + {"ukrainian", NULL}, + {"hindi", NULL}, + {"georgian", NULL}, + {NULL, NULL} +}; + + +/***** LOCALE BEGIN ar_AE: Arabic - United Arab Emirates *****/ +static const char *my_locale_month_names_ar_AE[13] = + {"يناير","فبراير","مارس","أبريل","مايو","يونيو","يوليو","أغسطس","سبتمبر","أكتوبر","نوفمبر","ديسمبر", NullS }; +static const char *my_locale_ab_month_names_ar_AE[13] = + {"ينا","فبر","مار","أبر","ماي","يون","يول","أغس","سبت","أكت","نوف","ديس", NullS }; +static const char *my_locale_day_names_ar_AE[8] = + {"الاثنين","الثلاثاء","الأربعاء","الخميس","الجمعة","السبت ","الأحد", NullS }; +static const char *my_locale_ab_day_names_ar_AE[8] = + {"ن","ث","ر","خ","ج","س","ح", NullS }; +static TYPELIB my_locale_typelib_month_names_ar_AE = + { array_elements(my_locale_month_names_ar_AE)-1, "", my_locale_month_names_ar_AE, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ar_AE = + { array_elements(my_locale_ab_month_names_ar_AE)-1, "", my_locale_ab_month_names_ar_AE, NULL }; +static TYPELIB my_locale_typelib_day_names_ar_AE = + { array_elements(my_locale_day_names_ar_AE)-1, "", my_locale_day_names_ar_AE, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ar_AE = + { array_elements(my_locale_ab_day_names_ar_AE)-1, "", my_locale_ab_day_names_ar_AE, NULL }; +MY_LOCALE my_locale_ar_AE +( + 6, + "ar_AE", + "Arabic - United Arab Emirates", + FALSE, + &my_locale_typelib_month_names_ar_AE, + &my_locale_typelib_ab_month_names_ar_AE, + &my_locale_typelib_day_names_ar_AE, + &my_locale_typelib_ab_day_names_ar_AE, + 6, + 8, + '.', /* decimal point ar_AE */ + ',', /* thousands_sep ar_AE */ + "\x03", /* grouping ar_AE */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_AE *****/ + +/***** LOCALE BEGIN ar_BH: Arabic - Bahrain *****/ +static const char *my_locale_month_names_ar_BH[13] = + {"يناير","فبراير","مارس","أبريل","مايو","يونيو","يوليو","أغسطس","سبتمبر","أكتوبر","نوفمبر","ديسمبر", NullS }; +static const char *my_locale_ab_month_names_ar_BH[13] = + {"ينا","فبر","مار","أبر","ماي","يون","يول","أغس","سبت","أكت","نوف","ديس", NullS }; +static const char *my_locale_day_names_ar_BH[8] = + {"الاثنين","الثلاثاء","الأربعاء","الخميس","الجمعة","السبت","الأحد", NullS }; +static const char *my_locale_ab_day_names_ar_BH[8] = + {"ن","ث","ر","خ","ج","س","ح", NullS }; +static TYPELIB my_locale_typelib_month_names_ar_BH = + { array_elements(my_locale_month_names_ar_BH)-1, "", my_locale_month_names_ar_BH, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ar_BH = + { array_elements(my_locale_ab_month_names_ar_BH)-1, "", my_locale_ab_month_names_ar_BH, NULL }; +static TYPELIB my_locale_typelib_day_names_ar_BH = + { array_elements(my_locale_day_names_ar_BH)-1, "", my_locale_day_names_ar_BH, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ar_BH = + { array_elements(my_locale_ab_day_names_ar_BH)-1, "", my_locale_ab_day_names_ar_BH, NULL }; +MY_LOCALE my_locale_ar_BH +( + 7, + "ar_BH", + "Arabic - Bahrain", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_BH */ + ',', /* thousands_sep ar_BH */ + "\x03", /* grouping ar_BH */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_BH *****/ + +/***** LOCALE BEGIN ar_JO: Arabic - Jordan *****/ +static const char *my_locale_month_names_ar_JO[13] = + {"كانون الثاني","شباط","آذار","نيسان","نوار","حزيران","تموز","آب","أيلول","تشرين الأول","تشرين الثاني","كانون الأول", NullS }; +static const char *my_locale_ab_month_names_ar_JO[13] = + {"كانون الثاني","شباط","آذار","نيسان","نوار","حزيران","تموز","آب","أيلول","تشرين الأول","تشرين الثاني","كانون الأول", NullS }; +static const char *my_locale_day_names_ar_JO[8] = + {"الاثنين","الثلاثاء","الأربعاء","الخميس","الجمعة","السبت","الأحد", NullS }; +static const char *my_locale_ab_day_names_ar_JO[8] = + {"الاثنين","الثلاثاء","الأربعاء","الخميس","الجمعة","السبت","الأحد", NullS }; +static TYPELIB my_locale_typelib_month_names_ar_JO = + { array_elements(my_locale_month_names_ar_JO)-1, "", my_locale_month_names_ar_JO, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ar_JO = + { array_elements(my_locale_ab_month_names_ar_JO)-1, "", my_locale_ab_month_names_ar_JO, NULL }; +static TYPELIB my_locale_typelib_day_names_ar_JO = + { array_elements(my_locale_day_names_ar_JO)-1, "", my_locale_day_names_ar_JO, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ar_JO = + { array_elements(my_locale_ab_day_names_ar_JO)-1, "", my_locale_ab_day_names_ar_JO, NULL }; +MY_LOCALE my_locale_ar_JO +( + 8, + "ar_JO", + "Arabic - Jordan", + FALSE, + &my_locale_typelib_month_names_ar_JO, + &my_locale_typelib_ab_month_names_ar_JO, + &my_locale_typelib_day_names_ar_JO, + &my_locale_typelib_ab_day_names_ar_JO, + 12, + 8, + '.', /* decimal point ar_JO */ + ',', /* thousands_sep ar_JO */ + "\x03", /* grouping ar_JO */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_JO *****/ + +/***** LOCALE BEGIN ar_SA: Arabic - Saudi Arabia *****/ +static const char *my_locale_month_names_ar_SA[13] = + {"كانون الثاني","شباط","آذار","نيسـان","أيار","حزيران","تـمـوز","آب","أيلول","تشرين الأول","تشرين الثاني","كانون الأول", NullS }; +static const char *my_locale_ab_month_names_ar_SA[13] = + {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec", NullS }; +static const char *my_locale_day_names_ar_SA[8] = + {"الإثنين","الثلاثاء","الأربعاء","الخميس","الجمعـة","السبت","الأحد", NullS }; +static const char *my_locale_ab_day_names_ar_SA[8] = + {"Mon","Tue","Wed","Thu","Fri","Sat","Sun", NullS }; +static TYPELIB my_locale_typelib_month_names_ar_SA = + { array_elements(my_locale_month_names_ar_SA)-1, "", my_locale_month_names_ar_SA, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ar_SA = + { array_elements(my_locale_ab_month_names_ar_SA)-1, "", my_locale_ab_month_names_ar_SA, NULL }; +static TYPELIB my_locale_typelib_day_names_ar_SA = + { array_elements(my_locale_day_names_ar_SA)-1, "", my_locale_day_names_ar_SA, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ar_SA = + { array_elements(my_locale_ab_day_names_ar_SA)-1, "", my_locale_ab_day_names_ar_SA, NULL }; +MY_LOCALE my_locale_ar_SA +( + 9, + "ar_SA", + "Arabic - Saudi Arabia", + FALSE, + &my_locale_typelib_month_names_ar_SA, + &my_locale_typelib_ab_month_names_ar_SA, + &my_locale_typelib_day_names_ar_SA, + &my_locale_typelib_ab_day_names_ar_SA, + 12, + 8, + '.', /* decimal point ar_SA */ + '\0', /* thousands_sep ar_SA */ + "\x80", /* grouping ar_SA */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_SA *****/ + +/***** LOCALE BEGIN ar_SY: Arabic - Syria *****/ +static const char *my_locale_month_names_ar_SY[13] = + {"كانون الثاني","شباط","آذار","نيسان","نواران","حزير","تموز","آب","أيلول","تشرين الأول","تشرين الثاني","كانون الأول", NullS }; +static const char *my_locale_ab_month_names_ar_SY[13] = + {"كانون الثاني","شباط","آذار","نيسان","نوار","حزيران","تموز","آب","أيلول","تشرين الأول","تشرين الثاني","كانون الأول", NullS }; +static const char *my_locale_day_names_ar_SY[8] = + {"الاثنين","الثلاثاء","الأربعاء","الخميس","الجمعة","السبت","الأحد", NullS }; +static const char *my_locale_ab_day_names_ar_SY[8] = + {"الاثنين","الثلاثاء","الأربعاء","الخميس","الجمعة","السبت","الأحد", NullS }; +static TYPELIB my_locale_typelib_month_names_ar_SY = + { array_elements(my_locale_month_names_ar_SY)-1, "", my_locale_month_names_ar_SY, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ar_SY = + { array_elements(my_locale_ab_month_names_ar_SY)-1, "", my_locale_ab_month_names_ar_SY, NULL }; +static TYPELIB my_locale_typelib_day_names_ar_SY = + { array_elements(my_locale_day_names_ar_SY)-1, "", my_locale_day_names_ar_SY, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ar_SY = + { array_elements(my_locale_ab_day_names_ar_SY)-1, "", my_locale_ab_day_names_ar_SY, NULL }; +MY_LOCALE my_locale_ar_SY +( + 10, + "ar_SY", + "Arabic - Syria", + FALSE, + &my_locale_typelib_month_names_ar_SY, + &my_locale_typelib_ab_month_names_ar_SY, + &my_locale_typelib_day_names_ar_SY, + &my_locale_typelib_ab_day_names_ar_SY, + 12, + 8, + '.', /* decimal point ar_SY */ + ',', /* thousands_sep ar_SY */ + "\x03", /* grouping ar_SY */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_SY *****/ + +/***** LOCALE BEGIN be_BY: Belarusian - Belarus *****/ +static const char *my_locale_month_names_be_BY[13] = + {"Студзень","Люты","Сакавік","Красавік","Травень","Чэрвень","Ліпень","Жнівень","Верасень","Кастрычнік","Лістапад","Снежань", NullS }; +static const char *my_locale_ab_month_names_be_BY[13] = + {"Стд","Лют","Сак","Крс","Тра","Чэр","Ліп","Жнв","Врс","Кст","Ліс","Снж", NullS }; +static const char *my_locale_day_names_be_BY[8] = + {"Панядзелак","Аўторак","Серада","Чацвер","Пятніца","Субота","Нядзеля", NullS }; +static const char *my_locale_ab_day_names_be_BY[8] = + {"Пан","Аўт","Срд","Чцв","Пят","Суб","Няд", NullS }; +static TYPELIB my_locale_typelib_month_names_be_BY = + { array_elements(my_locale_month_names_be_BY)-1, "", my_locale_month_names_be_BY, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_be_BY = + { array_elements(my_locale_ab_month_names_be_BY)-1, "", my_locale_ab_month_names_be_BY, NULL }; +static TYPELIB my_locale_typelib_day_names_be_BY = + { array_elements(my_locale_day_names_be_BY)-1, "", my_locale_day_names_be_BY, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_be_BY = + { array_elements(my_locale_ab_day_names_be_BY)-1, "", my_locale_ab_day_names_be_BY, NULL }; +MY_LOCALE my_locale_be_BY +( + 11, + "be_BY", + "Belarusian - Belarus", + FALSE, + &my_locale_typelib_month_names_be_BY, + &my_locale_typelib_ab_month_names_be_BY, + &my_locale_typelib_day_names_be_BY, + &my_locale_typelib_ab_day_names_be_BY, + 10, + 10, + ',', /* decimal point be_BY */ + '.', /* thousands_sep be_BY */ + "\x03\x03", /* grouping be_BY */ + &global_errmsgs[en_US] +); +/***** LOCALE END be_BY *****/ + +/***** LOCALE BEGIN bg_BG: Bulgarian - Bulgaria *****/ +static const char *my_locale_month_names_bg_BG[13] = + {"януари","февруари","март","април","май","юни","юли","август","септември","октомври","ноември","декември", NullS }; +static const char *my_locale_ab_month_names_bg_BG[13] = + {"яну","фев","мар","апр","май","юни","юли","авг","сеп","окт","ное","дек", NullS }; +static const char *my_locale_day_names_bg_BG[8] = + {"понеделник","вторник","сряда","четвъртък","петък","събота","неделя", NullS }; +static const char *my_locale_ab_day_names_bg_BG[8] = + {"пн","вт","ср","чт","пт","сб","нд", NullS }; +static TYPELIB my_locale_typelib_month_names_bg_BG = + { array_elements(my_locale_month_names_bg_BG)-1, "", my_locale_month_names_bg_BG, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_bg_BG = + { array_elements(my_locale_ab_month_names_bg_BG)-1, "", my_locale_ab_month_names_bg_BG, NULL }; +static TYPELIB my_locale_typelib_day_names_bg_BG = + { array_elements(my_locale_day_names_bg_BG)-1, "", my_locale_day_names_bg_BG, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_bg_BG = + { array_elements(my_locale_ab_day_names_bg_BG)-1, "", my_locale_ab_day_names_bg_BG, NULL }; +MY_LOCALE my_locale_bg_BG +( + 12, + "bg_BG", + "Bulgarian - Bulgaria", + FALSE, + &my_locale_typelib_month_names_bg_BG, + &my_locale_typelib_ab_month_names_bg_BG, + &my_locale_typelib_day_names_bg_BG, + &my_locale_typelib_ab_day_names_bg_BG, + 9, + 10, + ',', /* decimal point bg_BG */ + '\0', /* thousands_sep bg_BG */ + "\x03\x03", /* grouping bg_BG */ + &global_errmsgs[en_US] +); +/***** LOCALE END bg_BG *****/ + +/***** LOCALE BEGIN ca_ES: Catalan - Catalan *****/ +static const char *my_locale_month_names_ca_ES[13] = + {"gener","febrer","març","abril","maig","juny","juliol","agost","setembre","octubre","novembre","desembre", NullS }; +static const char *my_locale_ab_month_names_ca_ES[13] = + {"gen","feb","mar","abr","mai","jun","jul","ago","set","oct","nov","des", NullS }; +static const char *my_locale_day_names_ca_ES[8] = + {"dilluns","dimarts","dimecres","dijous","divendres","dissabte","diumenge", NullS }; +static const char *my_locale_ab_day_names_ca_ES[8] = + {"dl","dt","dc","dj","dv","ds","dg", NullS }; +static TYPELIB my_locale_typelib_month_names_ca_ES = + { array_elements(my_locale_month_names_ca_ES)-1, "", my_locale_month_names_ca_ES, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ca_ES = + { array_elements(my_locale_ab_month_names_ca_ES)-1, "", my_locale_ab_month_names_ca_ES, NULL }; +static TYPELIB my_locale_typelib_day_names_ca_ES = + { array_elements(my_locale_day_names_ca_ES)-1, "", my_locale_day_names_ca_ES, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ca_ES = + { array_elements(my_locale_ab_day_names_ca_ES)-1, "", my_locale_ab_day_names_ca_ES, NULL }; +MY_LOCALE my_locale_ca_ES +( + 13, + "ca_ES", + "Catalan - Catalan", + FALSE, + &my_locale_typelib_month_names_ca_ES, + &my_locale_typelib_ab_month_names_ca_ES, + &my_locale_typelib_day_names_ca_ES, + &my_locale_typelib_ab_day_names_ca_ES, + 8, + 9, + ',', /* decimal point ca_ES */ + '\0', /* thousands_sep ca_ES */ + "\x80\x80", /* grouping ca_ES */ + &global_errmsgs[en_US] +); +/***** LOCALE END ca_ES *****/ + +/***** LOCALE BEGIN cs_CZ: Czech - Czech Republic *****/ +static const char *my_locale_month_names_cs_CZ[13] = + {"leden","únor","březen","duben","květen","červen","červenec","srpen","září","říjen","listopad","prosinec", NullS }; +static const char *my_locale_ab_month_names_cs_CZ[13] = + {"led","úno","bře","dub","kvě","čen","čec","srp","zář","říj","lis","pro", NullS }; +static const char *my_locale_day_names_cs_CZ[8] = + {"Pondělí","Úterý","Středa","Čtvrtek","Pátek","Sobota","Neděle", NullS }; +static const char *my_locale_ab_day_names_cs_CZ[8] = + {"Po","Út","St","Čt","Pá","So","Ne", NullS }; +static TYPELIB my_locale_typelib_month_names_cs_CZ = + { array_elements(my_locale_month_names_cs_CZ)-1, "", my_locale_month_names_cs_CZ, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_cs_CZ = + { array_elements(my_locale_ab_month_names_cs_CZ)-1, "", my_locale_ab_month_names_cs_CZ, NULL }; +static TYPELIB my_locale_typelib_day_names_cs_CZ = + { array_elements(my_locale_day_names_cs_CZ)-1, "", my_locale_day_names_cs_CZ, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_cs_CZ = + { array_elements(my_locale_ab_day_names_cs_CZ)-1, "", my_locale_ab_day_names_cs_CZ, NULL }; +MY_LOCALE my_locale_cs_CZ +( + 14, + "cs_CZ", + "Czech - Czech Republic", + FALSE, + &my_locale_typelib_month_names_cs_CZ, + &my_locale_typelib_ab_month_names_cs_CZ, + &my_locale_typelib_day_names_cs_CZ, + &my_locale_typelib_ab_day_names_cs_CZ, + 8, + 7, + ',', /* decimal point cs_CZ */ + ' ', /* thousands_sep cs_CZ */ + "\x03\x03", /* grouping cs_CZ */ + &global_errmsgs[cs_CZ] +); +/***** LOCALE END cs_CZ *****/ + +/***** LOCALE BEGIN da_DK: Danish - Denmark *****/ +static const char *my_locale_month_names_da_DK[13] = + {"januar","februar","marts","april","maj","juni","juli","august","september","oktober","november","december", NullS }; +static const char *my_locale_ab_month_names_da_DK[13] = + {"jan","feb","mar","apr","maj","jun","jul","aug","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_da_DK[8] = + {"mandag","tirsdag","onsdag","torsdag","fredag","lørdag","søndag", NullS }; +static const char *my_locale_ab_day_names_da_DK[8] = + {"man","tir","ons","tor","fre","lør","søn", NullS }; +static TYPELIB my_locale_typelib_month_names_da_DK = + { array_elements(my_locale_month_names_da_DK)-1, "", my_locale_month_names_da_DK, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_da_DK = + { array_elements(my_locale_ab_month_names_da_DK)-1, "", my_locale_ab_month_names_da_DK, NULL }; +static TYPELIB my_locale_typelib_day_names_da_DK = + { array_elements(my_locale_day_names_da_DK)-1, "", my_locale_day_names_da_DK, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_da_DK = + { array_elements(my_locale_ab_day_names_da_DK)-1, "", my_locale_ab_day_names_da_DK, NULL }; +MY_LOCALE my_locale_da_DK +( + 15, + "da_DK", + "Danish - Denmark", + FALSE, + &my_locale_typelib_month_names_da_DK, + &my_locale_typelib_ab_month_names_da_DK, + &my_locale_typelib_day_names_da_DK, + &my_locale_typelib_ab_day_names_da_DK, + 9, + 7, + ',', /* decimal point da_DK */ + '.', /* thousands_sep da_DK */ + "\x03\x03", /* grouping da_DK */ + &global_errmsgs[da_DK] +); +/***** LOCALE END da_DK *****/ + +/***** LOCALE BEGIN de_AT: German - Austria *****/ +static const char *my_locale_month_names_de_AT[13] = + {"Jänner","Februar","März","April","Mai","Juni","Juli","August","September","Oktober","November","Dezember", NullS }; +static const char *my_locale_ab_month_names_de_AT[13] = + {"Jän","Feb","Mär","Apr","Mai","Jun","Jul","Aug","Sep","Okt","Nov","Dez", NullS }; +static const char *my_locale_day_names_de_AT[8] = + {"Montag","Dienstag","Mittwoch","Donnerstag","Freitag","Samstag","Sonntag", NullS }; +static const char *my_locale_ab_day_names_de_AT[8] = + {"Mon","Die","Mit","Don","Fre","Sam","Son", NullS }; +static TYPELIB my_locale_typelib_month_names_de_AT = + { array_elements(my_locale_month_names_de_AT)-1, "", my_locale_month_names_de_AT, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_de_AT = + { array_elements(my_locale_ab_month_names_de_AT)-1, "", my_locale_ab_month_names_de_AT, NULL }; +static TYPELIB my_locale_typelib_day_names_de_AT = + { array_elements(my_locale_day_names_de_AT)-1, "", my_locale_day_names_de_AT, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_de_AT = + { array_elements(my_locale_ab_day_names_de_AT)-1, "", my_locale_ab_day_names_de_AT, NULL }; +MY_LOCALE my_locale_de_AT +( + 16, + "de_AT", + "German - Austria", + FALSE, + &my_locale_typelib_month_names_de_AT, + &my_locale_typelib_ab_month_names_de_AT, + &my_locale_typelib_day_names_de_AT, + &my_locale_typelib_ab_day_names_de_AT, + 9, + 10, + ',', /* decimal point de_AT */ + '\0', /* thousands_sep de_AT */ + "\x80\x80", /* grouping de_AT */ + &global_errmsgs[de_DE] +); +/***** LOCALE END de_AT *****/ + +/***** LOCALE BEGIN de_DE: German - Germany *****/ +static const char *my_locale_month_names_de_DE[13] = + {"Januar","Februar","März","April","Mai","Juni","Juli","August","September","Oktober","November","Dezember", NullS }; +static const char *my_locale_ab_month_names_de_DE[13] = + {"Jan","Feb","Mär","Apr","Mai","Jun","Jul","Aug","Sep","Okt","Nov","Dez", NullS }; +static const char *my_locale_day_names_de_DE[8] = + {"Montag","Dienstag","Mittwoch","Donnerstag","Freitag","Samstag","Sonntag", NullS }; +static const char *my_locale_ab_day_names_de_DE[8] = + {"Mo","Di","Mi","Do","Fr","Sa","So", NullS }; +static TYPELIB my_locale_typelib_month_names_de_DE = + { array_elements(my_locale_month_names_de_DE)-1, "", my_locale_month_names_de_DE, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_de_DE = + { array_elements(my_locale_ab_month_names_de_DE)-1, "", my_locale_ab_month_names_de_DE, NULL }; +static TYPELIB my_locale_typelib_day_names_de_DE = + { array_elements(my_locale_day_names_de_DE)-1, "", my_locale_day_names_de_DE, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_de_DE = + { array_elements(my_locale_ab_day_names_de_DE)-1, "", my_locale_ab_day_names_de_DE, NULL }; +MY_LOCALE my_locale_de_DE +( + 4, + "de_DE", + "German - Germany", + FALSE, + &my_locale_typelib_month_names_de_DE, + &my_locale_typelib_ab_month_names_de_DE, + &my_locale_typelib_day_names_de_DE, + &my_locale_typelib_ab_day_names_de_DE, + 9, + 10, + ',', /* decimal point de_DE */ + '.', /* thousands_sep de_DE */ + "\x03\x03", /* grouping de_DE */ + &global_errmsgs[de_DE] +); +/***** LOCALE END de_DE *****/ + +/***** LOCALE BEGIN en_US: English - United States *****/ +static const char *my_locale_month_names_en_US[13] = + {"January","February","March","April","May","June","July","August","September","October","November","December", NullS }; +static const char *my_locale_ab_month_names_en_US[13] = + {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec", NullS }; +static const char *my_locale_day_names_en_US[8] = + {"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday", NullS }; +static const char *my_locale_ab_day_names_en_US[8] = + {"Mon","Tue","Wed","Thu","Fri","Sat","Sun", NullS }; +static TYPELIB my_locale_typelib_month_names_en_US = + { array_elements(my_locale_month_names_en_US)-1, "", my_locale_month_names_en_US, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_en_US = + { array_elements(my_locale_ab_month_names_en_US)-1, "", my_locale_ab_month_names_en_US, NULL }; +static TYPELIB my_locale_typelib_day_names_en_US = + { array_elements(my_locale_day_names_en_US)-1, "", my_locale_day_names_en_US, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_en_US = + { array_elements(my_locale_ab_day_names_en_US)-1, "", my_locale_ab_day_names_en_US, NULL }; +MY_LOCALE my_locale_en_US +( + 0, + "en_US", + "English - United States", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_US */ + ',', /* thousands_sep en_US */ + "\x03\x03", /* grouping en_US */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_US *****/ + +/***** LOCALE BEGIN es_ES: Spanish - Spain *****/ +static const char *my_locale_month_names_es_ES[13] = + {"enero","febrero","marzo","abril","mayo","junio","julio","agosto","septiembre","octubre","noviembre","diciembre", NullS }; +static const char *my_locale_ab_month_names_es_ES[13] = + {"ene","feb","mar","abr","may","jun","jul","ago","sep","oct","nov","dic", NullS }; +static const char *my_locale_day_names_es_ES[8] = + {"lunes","martes","miércoles","jueves","viernes","sábado","domingo", NullS }; +static const char *my_locale_ab_day_names_es_ES[8] = + {"lun","mar","mié","jue","vie","sáb","dom", NullS }; +static TYPELIB my_locale_typelib_month_names_es_ES = + { array_elements(my_locale_month_names_es_ES)-1, "", my_locale_month_names_es_ES, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_es_ES = + { array_elements(my_locale_ab_month_names_es_ES)-1, "", my_locale_ab_month_names_es_ES, NULL }; +static TYPELIB my_locale_typelib_day_names_es_ES = + { array_elements(my_locale_day_names_es_ES)-1, "", my_locale_day_names_es_ES, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_es_ES = + { array_elements(my_locale_ab_day_names_es_ES)-1, "", my_locale_ab_day_names_es_ES, NULL }; +MY_LOCALE my_locale_es_ES +( + 17, + "es_ES", + "Spanish - Spain", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_ES */ + '.', /* thousands_sep es_ES */ + "\x03\x03", /* grouping es_ES */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_ES *****/ + +/***** LOCALE BEGIN et_EE: Estonian - Estonia *****/ +static const char *my_locale_month_names_et_EE[13] = + {"jaanuar","veebruar","märts","aprill","mai","juuni","juuli","august","september","oktoober","november","detsember", NullS }; +static const char *my_locale_ab_month_names_et_EE[13] = + {"jaan ","veebr","märts","apr ","mai ","juuni","juuli","aug ","sept ","okt ","nov ","dets ", NullS }; +static const char *my_locale_day_names_et_EE[8] = + {"esmaspäev","teisipäev","kolmapäev","neljapäev","reede","laupäev","pühapäev", NullS }; +static const char *my_locale_ab_day_names_et_EE[8] = + {"E","T","K","N","R","L","P", NullS }; +static TYPELIB my_locale_typelib_month_names_et_EE = + { array_elements(my_locale_month_names_et_EE)-1, "", my_locale_month_names_et_EE, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_et_EE = + { array_elements(my_locale_ab_month_names_et_EE)-1, "", my_locale_ab_month_names_et_EE, NULL }; +static TYPELIB my_locale_typelib_day_names_et_EE = + { array_elements(my_locale_day_names_et_EE)-1, "", my_locale_day_names_et_EE, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_et_EE = + { array_elements(my_locale_ab_day_names_et_EE)-1, "", my_locale_ab_day_names_et_EE, NULL }; +MY_LOCALE my_locale_et_EE +( + 18, + "et_EE", + "Estonian - Estonia", + FALSE, + &my_locale_typelib_month_names_et_EE, + &my_locale_typelib_ab_month_names_et_EE, + &my_locale_typelib_day_names_et_EE, + &my_locale_typelib_ab_day_names_et_EE, + 9, + 9, + ',', /* decimal point et_EE */ + ' ', /* thousands_sep et_EE */ + "\x03\x03", /* grouping et_EE */ + &global_errmsgs[et_EE] +); +/***** LOCALE END et_EE *****/ + +/***** LOCALE BEGIN eu_ES: Basque - Basque *****/ +static const char *my_locale_month_names_eu_ES[13] = + {"urtarrila","otsaila","martxoa","apirila","maiatza","ekaina","uztaila","abuztua","iraila","urria","azaroa","abendua", NullS }; +static const char *my_locale_ab_month_names_eu_ES[13] = + {"urt","ots","mar","api","mai","eka","uzt","abu","ira","urr","aza","abe", NullS }; +static const char *my_locale_day_names_eu_ES[8] = + {"astelehena","asteartea","asteazkena","osteguna","ostirala","larunbata","igandea", NullS }; +static const char *my_locale_ab_day_names_eu_ES[8] = + {"al.","ar.","az.","og.","or.","lr.","ig.", NullS }; +static TYPELIB my_locale_typelib_month_names_eu_ES = + { array_elements(my_locale_month_names_eu_ES)-1, "", my_locale_month_names_eu_ES, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_eu_ES = + { array_elements(my_locale_ab_month_names_eu_ES)-1, "", my_locale_ab_month_names_eu_ES, NULL }; +static TYPELIB my_locale_typelib_day_names_eu_ES = + { array_elements(my_locale_day_names_eu_ES)-1, "", my_locale_day_names_eu_ES, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_eu_ES = + { array_elements(my_locale_ab_day_names_eu_ES)-1, "", my_locale_ab_day_names_eu_ES, NULL }; +MY_LOCALE my_locale_eu_ES +( + 19, + "eu_ES", + "Basque - Basque", + TRUE, + &my_locale_typelib_month_names_eu_ES, + &my_locale_typelib_ab_month_names_eu_ES, + &my_locale_typelib_day_names_eu_ES, + &my_locale_typelib_ab_day_names_eu_ES, + 9, + 10, + ',', /* decimal point eu_ES */ + '\0', /* thousands_sep eu_ES */ + "\x80\x80", /* grouping eu_ES */ + &global_errmsgs[en_US] +); +/***** LOCALE END eu_ES *****/ + +/***** LOCALE BEGIN fi_FI: Finnish - Finland *****/ +static const char *my_locale_month_names_fi_FI[13] = + {"tammikuu","helmikuu","maaliskuu","huhtikuu","toukokuu","kesäkuu","heinäkuu","elokuu","syyskuu","lokakuu","marraskuu","joulukuu", NullS }; +static const char *my_locale_ab_month_names_fi_FI[13] = + {"tammi ","helmi ","maalis","huhti ","touko ","kesä  ","heinä ","elo   ","syys  ","loka  ","marras","joulu ", NullS }; +static const char *my_locale_day_names_fi_FI[8] = + {"maanantai","tiistai","keskiviikko","torstai","perjantai","lauantai","sunnuntai", NullS }; +static const char *my_locale_ab_day_names_fi_FI[8] = + {"ma","ti","ke","to","pe","la","su", NullS }; +static TYPELIB my_locale_typelib_month_names_fi_FI = + { array_elements(my_locale_month_names_fi_FI)-1, "", my_locale_month_names_fi_FI, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_fi_FI = + { array_elements(my_locale_ab_month_names_fi_FI)-1, "", my_locale_ab_month_names_fi_FI, NULL }; +static TYPELIB my_locale_typelib_day_names_fi_FI = + { array_elements(my_locale_day_names_fi_FI)-1, "", my_locale_day_names_fi_FI, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_fi_FI = + { array_elements(my_locale_ab_day_names_fi_FI)-1, "", my_locale_ab_day_names_fi_FI, NULL }; +MY_LOCALE my_locale_fi_FI +( + 20, + "fi_FI", + "Finnish - Finland", + FALSE, + &my_locale_typelib_month_names_fi_FI, + &my_locale_typelib_ab_month_names_fi_FI, + &my_locale_typelib_day_names_fi_FI, + &my_locale_typelib_ab_day_names_fi_FI, + 9, + 11, + ',', /* decimal point fi_FI */ + ' ', /* thousands_sep fi_FI */ + "\x03\x03", /* grouping fi_FI */ + &global_errmsgs[en_US] +); +/***** LOCALE END fi_FI *****/ + +/***** LOCALE BEGIN fo_FO: Faroese - Faroe Islands *****/ +static const char *my_locale_month_names_fo_FO[13] = + {"januar","februar","mars","apríl","mai","juni","juli","august","september","oktober","november","desember", NullS }; +static const char *my_locale_ab_month_names_fo_FO[13] = + {"jan","feb","mar","apr","mai","jun","jul","aug","sep","okt","nov","des", NullS }; +static const char *my_locale_day_names_fo_FO[8] = + {"mánadagur","týsdagur","mikudagur","hósdagur","fríggjadagur","leygardagur","sunnudagur", NullS }; +static const char *my_locale_ab_day_names_fo_FO[8] = + {"mán","týs","mik","hós","frí","ley","sun", NullS }; +static TYPELIB my_locale_typelib_month_names_fo_FO = + { array_elements(my_locale_month_names_fo_FO)-1, "", my_locale_month_names_fo_FO, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_fo_FO = + { array_elements(my_locale_ab_month_names_fo_FO)-1, "", my_locale_ab_month_names_fo_FO, NULL }; +static TYPELIB my_locale_typelib_day_names_fo_FO = + { array_elements(my_locale_day_names_fo_FO)-1, "", my_locale_day_names_fo_FO, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_fo_FO = + { array_elements(my_locale_ab_day_names_fo_FO)-1, "", my_locale_ab_day_names_fo_FO, NULL }; +MY_LOCALE my_locale_fo_FO +( + 21, + "fo_FO", + "Faroese - Faroe Islands", + FALSE, + &my_locale_typelib_month_names_fo_FO, + &my_locale_typelib_ab_month_names_fo_FO, + &my_locale_typelib_day_names_fo_FO, + &my_locale_typelib_ab_day_names_fo_FO, + 9, + 12, + ',', /* decimal point fo_FO */ + '.', /* thousands_sep fo_FO */ + "\x03\x03", /* grouping fo_FO */ + &global_errmsgs[en_US] +); +/***** LOCALE END fo_FO *****/ + +/***** LOCALE BEGIN fr_FR: French - France *****/ +static const char *my_locale_month_names_fr_FR[13] = + {"janvier","février","mars","avril","mai","juin","juillet","août","septembre","octobre","novembre","décembre", NullS }; +static const char *my_locale_ab_month_names_fr_FR[13] = + {"jan","fév","mar","avr","mai","jun","jui","aoû","sep","oct","nov","déc", NullS }; +static const char *my_locale_day_names_fr_FR[8] = + {"lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche", NullS }; +static const char *my_locale_ab_day_names_fr_FR[8] = + {"lun","mar","mer","jeu","ven","sam","dim", NullS }; +static TYPELIB my_locale_typelib_month_names_fr_FR = + { array_elements(my_locale_month_names_fr_FR)-1, "", my_locale_month_names_fr_FR, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_fr_FR = + { array_elements(my_locale_ab_month_names_fr_FR)-1, "", my_locale_ab_month_names_fr_FR, NULL }; +static TYPELIB my_locale_typelib_day_names_fr_FR = + { array_elements(my_locale_day_names_fr_FR)-1, "", my_locale_day_names_fr_FR, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_fr_FR = + { array_elements(my_locale_ab_day_names_fr_FR)-1, "", my_locale_ab_day_names_fr_FR, NULL }; +MY_LOCALE my_locale_fr_FR +( + 5, + "fr_FR", + "French - France", + FALSE, + &my_locale_typelib_month_names_fr_FR, + &my_locale_typelib_ab_month_names_fr_FR, + &my_locale_typelib_day_names_fr_FR, + &my_locale_typelib_ab_day_names_fr_FR, + 9, + 8, + ',', /* decimal point fr_FR */ + '\0', /* thousands_sep fr_FR */ + "\x80\x80", /* grouping fr_FR */ + &global_errmsgs[fr_FR] +); +/***** LOCALE END fr_FR *****/ + +/***** LOCALE BEGIN gl_ES: Galician - Galician *****/ +static const char *my_locale_month_names_gl_ES[13] = + {"Xaneiro","Febreiro","Marzo","Abril","Maio","Xuño","Xullo","Agosto","Setembro","Outubro","Novembro","Decembro", NullS }; +static const char *my_locale_ab_month_names_gl_ES[13] = + {"Xan","Feb","Mar","Abr","Mai","Xuñ","Xul","Ago","Set","Out","Nov","Dec", NullS }; +static const char *my_locale_day_names_gl_ES[8] = + {"Luns","Martes","Mércores","Xoves","Venres","Sábado","Domingo", NullS }; +static const char *my_locale_ab_day_names_gl_ES[8] = + {"Lun","Mar","Mér","Xov","Ven","Sáb","Dom", NullS }; +static TYPELIB my_locale_typelib_month_names_gl_ES = + { array_elements(my_locale_month_names_gl_ES)-1, "", my_locale_month_names_gl_ES, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_gl_ES = + { array_elements(my_locale_ab_month_names_gl_ES)-1, "", my_locale_ab_month_names_gl_ES, NULL }; +static TYPELIB my_locale_typelib_day_names_gl_ES = + { array_elements(my_locale_day_names_gl_ES)-1, "", my_locale_day_names_gl_ES, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_gl_ES = + { array_elements(my_locale_ab_day_names_gl_ES)-1, "", my_locale_ab_day_names_gl_ES, NULL }; +MY_LOCALE my_locale_gl_ES +( + 22, + "gl_ES", + "Galician - Galician", + FALSE, + &my_locale_typelib_month_names_gl_ES, + &my_locale_typelib_ab_month_names_gl_ES, + &my_locale_typelib_day_names_gl_ES, + &my_locale_typelib_ab_day_names_gl_ES, + 8, + 8, + ',', /* decimal point gl_ES */ + '\0', /* thousands_sep gl_ES */ + "\x80\x80", /* grouping gl_ES */ + &global_errmsgs[en_US] +); +/***** LOCALE END gl_ES *****/ + +/***** LOCALE BEGIN gu_IN: Gujarati - India *****/ +static const char *my_locale_month_names_gu_IN[13] = + {"જાન્યુઆરી","ફેબ્રુઆરી","માર્ચ","એપ્રિલ","મે","જુન","જુલાઇ","ઓગસ્ટ","સેપ્ટેમ્બર","ઓક્ટોબર","નવેમ્બર","ડિસેમ્બર", NullS }; +static const char *my_locale_ab_month_names_gu_IN[13] = + {"જાન","ફેબ","માર","એપ્ર","મે","જુન","જુલ","ઓગ","સેપ્ટ","ઓક્ટ","નોવ","ડિસ", NullS }; +static const char *my_locale_day_names_gu_IN[8] = + {"સોમવાર","મન્ગળવાર","બુધવાર","ગુરુવાર","શુક્રવાર","શનિવાર","રવિવાર", NullS }; +static const char *my_locale_ab_day_names_gu_IN[8] = + {"સોમ","મન્ગળ","બુધ","ગુરુ","શુક્ર","શનિ","રવિ", NullS }; +static TYPELIB my_locale_typelib_month_names_gu_IN = + { array_elements(my_locale_month_names_gu_IN)-1, "", my_locale_month_names_gu_IN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_gu_IN = + { array_elements(my_locale_ab_month_names_gu_IN)-1, "", my_locale_ab_month_names_gu_IN, NULL }; +static TYPELIB my_locale_typelib_day_names_gu_IN = + { array_elements(my_locale_day_names_gu_IN)-1, "", my_locale_day_names_gu_IN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_gu_IN = + { array_elements(my_locale_ab_day_names_gu_IN)-1, "", my_locale_ab_day_names_gu_IN, NULL }; +MY_LOCALE my_locale_gu_IN +( + 23, + "gu_IN", + "Gujarati - India", + FALSE, + &my_locale_typelib_month_names_gu_IN, + &my_locale_typelib_ab_month_names_gu_IN, + &my_locale_typelib_day_names_gu_IN, + &my_locale_typelib_ab_day_names_gu_IN, + 10, + 8, + '.', /* decimal point gu_IN */ + ',', /* thousands_sep gu_IN */ + "\x03", /* grouping gu_IN */ + &global_errmsgs[en_US] +); +/***** LOCALE END gu_IN *****/ + +/***** LOCALE BEGIN he_IL: Hebrew - Israel *****/ +static const char *my_locale_month_names_he_IL[13] = + {"ינואר","פברואר","מרץ","אפריל","מאי","יוני","יולי","אוגוסט","ספטמבר","אוקטובר","נובמבר","דצמבר", NullS }; +static const char *my_locale_ab_month_names_he_IL[13] = + {"ינו","פבר","מרץ","אפר","מאי","יונ","יול","אוג","ספט","אוק","נוב","דצמ", NullS }; +static const char *my_locale_day_names_he_IL[8] = + {"שני","שלישי","רביעי","חמישי","שישי","שבת","ראשון", NullS }; +static const char *my_locale_ab_day_names_he_IL[8] = + {"ב'","ג'","ד'","ה'","ו'","ש'","א'", NullS }; +static TYPELIB my_locale_typelib_month_names_he_IL = + { array_elements(my_locale_month_names_he_IL)-1, "", my_locale_month_names_he_IL, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_he_IL = + { array_elements(my_locale_ab_month_names_he_IL)-1, "", my_locale_ab_month_names_he_IL, NULL }; +static TYPELIB my_locale_typelib_day_names_he_IL = + { array_elements(my_locale_day_names_he_IL)-1, "", my_locale_day_names_he_IL, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_he_IL = + { array_elements(my_locale_ab_day_names_he_IL)-1, "", my_locale_ab_day_names_he_IL, NULL }; +MY_LOCALE my_locale_he_IL +( + 24, + "he_IL", + "Hebrew - Israel", + FALSE, + &my_locale_typelib_month_names_he_IL, + &my_locale_typelib_ab_month_names_he_IL, + &my_locale_typelib_day_names_he_IL, + &my_locale_typelib_ab_day_names_he_IL, + 7, + 5, + '.', /* decimal point he_IL */ + ',', /* thousands_sep he_IL */ + "\x03\x03", /* grouping he_IL */ + &global_errmsgs[en_US] +); +/***** LOCALE END he_IL *****/ + +/***** LOCALE BEGIN hi_IN: Hindi - India *****/ +static const char *my_locale_month_names_hi_IN[13] = + {"जनवरी","फ़रवरी","मार्च","अप्रेल","मई","जून","जुलाई","अगस्त","सितम्बर","अक्टूबर","नवम्बर","दिसम्बर", NullS }; +static const char *my_locale_ab_month_names_hi_IN[13] = + {"जनवरी","फ़रवरी","मार्च","अप्रेल","मई","जून","जुलाई","अगस्त","सितम्बर","अक्टूबर","नवम्बर","दिसम्बर", NullS }; +static const char *my_locale_day_names_hi_IN[8] = + {"सोमवार ","मंगलवार ","बुधवार ","गुरुवार ","शुक्रवार ","शनिवार ","रविवार ", NullS }; +static const char *my_locale_ab_day_names_hi_IN[8] = + {"सोम ","मंगल ","बुध ","गुरु ","शुक्र ","शनि ","रवि ", NullS }; +static TYPELIB my_locale_typelib_month_names_hi_IN = + { array_elements(my_locale_month_names_hi_IN)-1, "", my_locale_month_names_hi_IN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_hi_IN = + { array_elements(my_locale_ab_month_names_hi_IN)-1, "", my_locale_ab_month_names_hi_IN, NULL }; +static TYPELIB my_locale_typelib_day_names_hi_IN = + { array_elements(my_locale_day_names_hi_IN)-1, "", my_locale_day_names_hi_IN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_hi_IN = + { array_elements(my_locale_ab_day_names_hi_IN)-1, "", my_locale_ab_day_names_hi_IN, NULL }; +MY_LOCALE my_locale_hi_IN +( + 25, + "hi_IN", + "Hindi - India", + FALSE, + &my_locale_typelib_month_names_hi_IN, + &my_locale_typelib_ab_month_names_hi_IN, + &my_locale_typelib_day_names_hi_IN, + &my_locale_typelib_ab_day_names_hi_IN, + 7, + 9, + '.', /* decimal point hi_IN */ + ',', /* thousands_sep hi_IN */ + "\x03", /* grouping hi_IN */ + &global_errmsgs[hi_IN] +); +/***** LOCALE END hi_IN *****/ + +/***** LOCALE BEGIN hr_HR: Croatian - Croatia *****/ +static const char *my_locale_month_names_hr_HR[13] = + {"Siječanj","Veljača","Ožujak","Travanj","Svibanj","Lipanj","Srpanj","Kolovoz","Rujan","Listopad","Studeni","Prosinac", NullS }; +static const char *my_locale_ab_month_names_hr_HR[13] = + {"Sij","Vel","Ožu","Tra","Svi","Lip","Srp","Kol","Ruj","Lis","Stu","Pro", NullS }; +static const char *my_locale_day_names_hr_HR[8] = + {"Ponedjeljak","Utorak","Srijeda","Četvrtak","Petak","Subota","Nedjelja", NullS }; +static const char *my_locale_ab_day_names_hr_HR[8] = + {"Pon","Uto","Sri","Čet","Pet","Sub","Ned", NullS }; +static TYPELIB my_locale_typelib_month_names_hr_HR = + { array_elements(my_locale_month_names_hr_HR)-1, "", my_locale_month_names_hr_HR, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_hr_HR = + { array_elements(my_locale_ab_month_names_hr_HR)-1, "", my_locale_ab_month_names_hr_HR, NULL }; +static TYPELIB my_locale_typelib_day_names_hr_HR = + { array_elements(my_locale_day_names_hr_HR)-1, "", my_locale_day_names_hr_HR, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_hr_HR = + { array_elements(my_locale_ab_day_names_hr_HR)-1, "", my_locale_ab_day_names_hr_HR, NULL }; +MY_LOCALE my_locale_hr_HR +( + 26, + "hr_HR", + "Croatian - Croatia", + FALSE, + &my_locale_typelib_month_names_hr_HR, + &my_locale_typelib_ab_month_names_hr_HR, + &my_locale_typelib_day_names_hr_HR, + &my_locale_typelib_ab_day_names_hr_HR, + 8, + 11, + ',', /* decimal point hr_HR */ + '\0', /* thousands_sep hr_HR */ + "\x80\x80", /* grouping hr_HR */ + &global_errmsgs[en_US] +); +/***** LOCALE END hr_HR *****/ + +/***** LOCALE BEGIN hu_HU: Hungarian - Hungary *****/ +static const char *my_locale_month_names_hu_HU[13] = + {"január","február","március","április","május","június","július","augusztus","szeptember","október","november","december", NullS }; +static const char *my_locale_ab_month_names_hu_HU[13] = + {"jan","feb","már","ápr","máj","jún","júl","aug","sze","okt","nov","dec", NullS }; +static const char *my_locale_day_names_hu_HU[8] = + {"hétfő","kedd","szerda","csütörtök","péntek","szombat","vasárnap", NullS }; +static const char *my_locale_ab_day_names_hu_HU[8] = + {"h","k","sze","cs","p","szo","v", NullS }; +static TYPELIB my_locale_typelib_month_names_hu_HU = + { array_elements(my_locale_month_names_hu_HU)-1, "", my_locale_month_names_hu_HU, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_hu_HU = + { array_elements(my_locale_ab_month_names_hu_HU)-1, "", my_locale_ab_month_names_hu_HU, NULL }; +static TYPELIB my_locale_typelib_day_names_hu_HU = + { array_elements(my_locale_day_names_hu_HU)-1, "", my_locale_day_names_hu_HU, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_hu_HU = + { array_elements(my_locale_ab_day_names_hu_HU)-1, "", my_locale_ab_day_names_hu_HU, NULL }; +MY_LOCALE my_locale_hu_HU +( + 27, + "hu_HU", + "Hungarian - Hungary", + FALSE, + &my_locale_typelib_month_names_hu_HU, + &my_locale_typelib_ab_month_names_hu_HU, + &my_locale_typelib_day_names_hu_HU, + &my_locale_typelib_ab_day_names_hu_HU, + 10, + 9, + ',', /* decimal point hu_HU */ + '.', /* thousands_sep hu_HU */ + "\x03\x03", /* grouping hu_HU */ + &global_errmsgs[hu_HU] +); +/***** LOCALE END hu_HU *****/ + +/***** LOCALE BEGIN id_ID: Indonesian - Indonesia *****/ +static const char *my_locale_month_names_id_ID[13] = + {"Januari","Pebruari","Maret","April","Mei","Juni","Juli","Agustus","September","Oktober","November","Desember", NullS }; +static const char *my_locale_ab_month_names_id_ID[13] = + {"Jan","Peb","Mar","Apr","Mei","Jun","Jul","Agu","Sep","Okt","Nov","Des", NullS }; +static const char *my_locale_day_names_id_ID[8] = + {"Senin","Selasa","Rabu","Kamis","Jumat","Sabtu","Minggu", NullS }; +static const char *my_locale_ab_day_names_id_ID[8] = + {"Sen","Sel","Rab","Kam","Jum","Sab","Min", NullS }; +static TYPELIB my_locale_typelib_month_names_id_ID = + { array_elements(my_locale_month_names_id_ID)-1, "", my_locale_month_names_id_ID, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_id_ID = + { array_elements(my_locale_ab_month_names_id_ID)-1, "", my_locale_ab_month_names_id_ID, NULL }; +static TYPELIB my_locale_typelib_day_names_id_ID = + { array_elements(my_locale_day_names_id_ID)-1, "", my_locale_day_names_id_ID, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_id_ID = + { array_elements(my_locale_ab_day_names_id_ID)-1, "", my_locale_ab_day_names_id_ID, NULL }; +MY_LOCALE my_locale_id_ID +( + 28, + "id_ID", + "Indonesian - Indonesia", + TRUE, + &my_locale_typelib_month_names_id_ID, + &my_locale_typelib_ab_month_names_id_ID, + &my_locale_typelib_day_names_id_ID, + &my_locale_typelib_ab_day_names_id_ID, + 9, + 6, + ',', /* decimal point id_ID */ + '.', /* thousands_sep id_ID */ + "\x03\x03", /* grouping id_ID */ + &global_errmsgs[en_US] +); +/***** LOCALE END id_ID *****/ + +/***** LOCALE BEGIN is_IS: Icelandic - Iceland *****/ +static const char *my_locale_month_names_is_IS[13] = + {"janúar","febrúar","mars","apríl","maí","júní","júlí","ágúst","september","október","nóvember","desember", NullS }; +static const char *my_locale_ab_month_names_is_IS[13] = + {"jan","feb","mar","apr","maí","jún","júl","ágú","sep","okt","nóv","des", NullS }; +static const char *my_locale_day_names_is_IS[8] = + {"mánudagur","þriðjudagur","miðvikudagur","fimmtudagur","föstudagur","laugardagur","sunnudagur", NullS }; +static const char *my_locale_ab_day_names_is_IS[8] = + {"mán","þri","mið","fim","fös","lau","sun", NullS }; +static TYPELIB my_locale_typelib_month_names_is_IS = + { array_elements(my_locale_month_names_is_IS)-1, "", my_locale_month_names_is_IS, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_is_IS = + { array_elements(my_locale_ab_month_names_is_IS)-1, "", my_locale_ab_month_names_is_IS, NULL }; +static TYPELIB my_locale_typelib_day_names_is_IS = + { array_elements(my_locale_day_names_is_IS)-1, "", my_locale_day_names_is_IS, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_is_IS = + { array_elements(my_locale_ab_day_names_is_IS)-1, "", my_locale_ab_day_names_is_IS, NULL }; +MY_LOCALE my_locale_is_IS +( + 29, + "is_IS", + "Icelandic - Iceland", + FALSE, + &my_locale_typelib_month_names_is_IS, + &my_locale_typelib_ab_month_names_is_IS, + &my_locale_typelib_day_names_is_IS, + &my_locale_typelib_ab_day_names_is_IS, + 9, + 12, + ',', /* decimal point is_IS */ + '.', /* thousands_sep is_IS */ + "\x03\x03", /* grouping is_IS */ + &global_errmsgs[en_US] +); +/***** LOCALE END is_IS *****/ + +/***** LOCALE BEGIN it_CH: Italian - Switzerland *****/ +static const char *my_locale_month_names_it_CH[13] = + {"gennaio","febbraio","marzo","aprile","maggio","giugno","luglio","agosto","settembre","ottobre","novembre","dicembre", NullS }; +static const char *my_locale_ab_month_names_it_CH[13] = + {"gen","feb","mar","apr","mag","giu","lug","ago","set","ott","nov","dic", NullS }; +static const char *my_locale_day_names_it_CH[8] = + {"lunedì","martedì","mercoledì","giovedì","venerdì","sabato","domenica", NullS }; +static const char *my_locale_ab_day_names_it_CH[8] = + {"lun","mar","mer","gio","ven","sab","dom", NullS }; +static TYPELIB my_locale_typelib_month_names_it_CH = + { array_elements(my_locale_month_names_it_CH)-1, "", my_locale_month_names_it_CH, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_it_CH = + { array_elements(my_locale_ab_month_names_it_CH)-1, "", my_locale_ab_month_names_it_CH, NULL }; +static TYPELIB my_locale_typelib_day_names_it_CH = + { array_elements(my_locale_day_names_it_CH)-1, "", my_locale_day_names_it_CH, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_it_CH = + { array_elements(my_locale_ab_day_names_it_CH)-1, "", my_locale_ab_day_names_it_CH, NULL }; +MY_LOCALE my_locale_it_CH +( + 30, + "it_CH", + "Italian - Switzerland", + FALSE, + &my_locale_typelib_month_names_it_CH, + &my_locale_typelib_ab_month_names_it_CH, + &my_locale_typelib_day_names_it_CH, + &my_locale_typelib_ab_day_names_it_CH, + 9, + 9, + ',', /* decimal point it_CH */ + '\'', /* thousands_sep it_CH */ + "\x03\x03", /* grouping it_CH */ + &global_errmsgs[it_IT] +); +/***** LOCALE END it_CH *****/ + +/***** LOCALE BEGIN ja_JP: Japanese - Japan *****/ +static const char *my_locale_month_names_ja_JP[13] = + {"1月","2月","3月","4月","5月","6月","7月","8月","9月","10月","11月","12月", NullS }; +static const char *my_locale_ab_month_names_ja_JP[13] = + {" 1月"," 2月"," 3月"," 4月"," 5月"," 6月"," 7月"," 8月"," 9月","10月","11月","12月", NullS }; +static const char *my_locale_day_names_ja_JP[8] = + {"月曜日","火曜日","水曜日","木曜日","金曜日","土曜日","日曜日", NullS }; +static const char *my_locale_ab_day_names_ja_JP[8] = + {"月","火","水","木","金","土","日", NullS }; +static TYPELIB my_locale_typelib_month_names_ja_JP = + { array_elements(my_locale_month_names_ja_JP)-1, "", my_locale_month_names_ja_JP, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ja_JP = + { array_elements(my_locale_ab_month_names_ja_JP)-1, "", my_locale_ab_month_names_ja_JP, NULL }; +static TYPELIB my_locale_typelib_day_names_ja_JP = + { array_elements(my_locale_day_names_ja_JP)-1, "", my_locale_day_names_ja_JP, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ja_JP = + { array_elements(my_locale_ab_day_names_ja_JP)-1, "", my_locale_ab_day_names_ja_JP, NULL }; +MY_LOCALE my_locale_ja_JP +( + 2, + "ja_JP", + "Japanese - Japan", + FALSE, + &my_locale_typelib_month_names_ja_JP, + &my_locale_typelib_ab_month_names_ja_JP, + &my_locale_typelib_day_names_ja_JP, + &my_locale_typelib_ab_day_names_ja_JP, + 3, + 3, + '.', /* decimal point ja_JP */ + ',', /* thousands_sep ja_JP */ + "\x03", /* grouping ja_JP */ + &global_errmsgs[ja_JP] +); +/***** LOCALE END ja_JP *****/ + +/***** LOCALE BEGIN ko_KR: Korean - Korea *****/ +static const char *my_locale_month_names_ko_KR[13] = + {"일월","이월","삼월","사월","오월","유월","칠월","팔월","구월","시월","십일월","십이월", NullS }; +static const char *my_locale_ab_month_names_ko_KR[13] = + {" 1월"," 2월"," 3월"," 4월"," 5월"," 6월"," 7월"," 8월"," 9월","10월","11월","12월", NullS }; +static const char *my_locale_day_names_ko_KR[8] = + {"월요일","화요일","수요일","목요일","금요일","토요일","일요일", NullS }; +static const char *my_locale_ab_day_names_ko_KR[8] = + {"월","화","수","목","금","토","일", NullS }; +static TYPELIB my_locale_typelib_month_names_ko_KR = + { array_elements(my_locale_month_names_ko_KR)-1, "", my_locale_month_names_ko_KR, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ko_KR = + { array_elements(my_locale_ab_month_names_ko_KR)-1, "", my_locale_ab_month_names_ko_KR, NULL }; +static TYPELIB my_locale_typelib_day_names_ko_KR = + { array_elements(my_locale_day_names_ko_KR)-1, "", my_locale_day_names_ko_KR, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ko_KR = + { array_elements(my_locale_ab_day_names_ko_KR)-1, "", my_locale_ab_day_names_ko_KR, NULL }; +MY_LOCALE my_locale_ko_KR +( + 31, + "ko_KR", + "Korean - Korea", + FALSE, + &my_locale_typelib_month_names_ko_KR, + &my_locale_typelib_ab_month_names_ko_KR, + &my_locale_typelib_day_names_ko_KR, + &my_locale_typelib_ab_day_names_ko_KR, + 3, + 3, + '.', /* decimal point ko_KR */ + ',', /* thousands_sep ko_KR */ + "\x03\x03", /* grouping ko_KR */ + &global_errmsgs[ko_KR] +); +/***** LOCALE END ko_KR *****/ + +/***** LOCALE BEGIN lt_LT: Lithuanian - Lithuania *****/ +static const char *my_locale_month_names_lt_LT[13] = + {"sausio","vasario","kovo","balandžio","gegužės","birželio","liepos","rugpjūčio","rugsėjo","spalio","lapkričio","gruodžio", NullS }; +static const char *my_locale_ab_month_names_lt_LT[13] = + {"Sau","Vas","Kov","Bal","Geg","Bir","Lie","Rgp","Rgs","Spa","Lap","Grd", NullS }; +static const char *my_locale_day_names_lt_LT[8] = + {"Pirmadienis","Antradienis","Trečiadienis","Ketvirtadienis","Penktadienis","Šeštadienis","Sekmadienis", NullS }; +static const char *my_locale_ab_day_names_lt_LT[8] = + {"Pr","An","Tr","Kt","Pn","Št","Sk", NullS }; +static TYPELIB my_locale_typelib_month_names_lt_LT = + { array_elements(my_locale_month_names_lt_LT)-1, "", my_locale_month_names_lt_LT, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_lt_LT = + { array_elements(my_locale_ab_month_names_lt_LT)-1, "", my_locale_ab_month_names_lt_LT, NULL }; +static TYPELIB my_locale_typelib_day_names_lt_LT = + { array_elements(my_locale_day_names_lt_LT)-1, "", my_locale_day_names_lt_LT, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_lt_LT = + { array_elements(my_locale_ab_day_names_lt_LT)-1, "", my_locale_ab_day_names_lt_LT, NULL }; +MY_LOCALE my_locale_lt_LT +( + 32, + "lt_LT", + "Lithuanian - Lithuania", + FALSE, + &my_locale_typelib_month_names_lt_LT, + &my_locale_typelib_ab_month_names_lt_LT, + &my_locale_typelib_day_names_lt_LT, + &my_locale_typelib_ab_day_names_lt_LT, + 9, + 14, + ',', /* decimal point lt_LT */ + '.', /* thousands_sep lt_LT */ + "\x03\x03", /* grouping lt_LT */ + &global_errmsgs[en_US] +); +/***** LOCALE END lt_LT *****/ + +/***** LOCALE BEGIN lv_LV: Latvian - Latvia *****/ +static const char *my_locale_month_names_lv_LV[13] = + {"janvāris","februāris","marts","aprīlis","maijs","jūnijs","jūlijs","augusts","septembris","oktobris","novembris","decembris", NullS }; +static const char *my_locale_ab_month_names_lv_LV[13] = + {"jan","feb","mar","apr","mai","jūn","jūl","aug","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_lv_LV[8] = + {"pirmdiena","otrdiena","trešdiena","ceturtdiena","piektdiena","sestdiena","svētdiena", NullS }; +static const char *my_locale_ab_day_names_lv_LV[8] = + {"P ","O ","T ","C ","Pk","S ","Sv", NullS }; +static TYPELIB my_locale_typelib_month_names_lv_LV = + { array_elements(my_locale_month_names_lv_LV)-1, "", my_locale_month_names_lv_LV, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_lv_LV = + { array_elements(my_locale_ab_month_names_lv_LV)-1, "", my_locale_ab_month_names_lv_LV, NULL }; +static TYPELIB my_locale_typelib_day_names_lv_LV = + { array_elements(my_locale_day_names_lv_LV)-1, "", my_locale_day_names_lv_LV, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_lv_LV = + { array_elements(my_locale_ab_day_names_lv_LV)-1, "", my_locale_ab_day_names_lv_LV, NULL }; +MY_LOCALE my_locale_lv_LV +( + 33, + "lv_LV", + "Latvian - Latvia", + FALSE, + &my_locale_typelib_month_names_lv_LV, + &my_locale_typelib_ab_month_names_lv_LV, + &my_locale_typelib_day_names_lv_LV, + &my_locale_typelib_ab_day_names_lv_LV, + 10, + 11, + ',', /* decimal point lv_LV */ + ' ', /* thousands_sep lv_LV */ + "\x03\x03", /* grouping lv_LV */ + &global_errmsgs[en_US] +); +/***** LOCALE END lv_LV *****/ + +/***** LOCALE BEGIN mk_MK: Macedonian - FYROM *****/ +static const char *my_locale_month_names_mk_MK[13] = + {"јануари","февруари","март","април","мај","јуни","јули","август","септември","октомври","ноември","декември", NullS }; +static const char *my_locale_ab_month_names_mk_MK[13] = + {"јан","фев","мар","апр","мај","јун","јул","авг","сеп","окт","ное","дек", NullS }; +static const char *my_locale_day_names_mk_MK[8] = + {"понеделник","вторник","среда","четврток","петок","сабота","недела", NullS }; +static const char *my_locale_ab_day_names_mk_MK[8] = + {"пон","вто","сре","чет","пет","саб","нед", NullS }; +static TYPELIB my_locale_typelib_month_names_mk_MK = + { array_elements(my_locale_month_names_mk_MK)-1, "", my_locale_month_names_mk_MK, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_mk_MK = + { array_elements(my_locale_ab_month_names_mk_MK)-1, "", my_locale_ab_month_names_mk_MK, NULL }; +static TYPELIB my_locale_typelib_day_names_mk_MK = + { array_elements(my_locale_day_names_mk_MK)-1, "", my_locale_day_names_mk_MK, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_mk_MK = + { array_elements(my_locale_ab_day_names_mk_MK)-1, "", my_locale_ab_day_names_mk_MK, NULL }; +MY_LOCALE my_locale_mk_MK +( + 34, + "mk_MK", + "Macedonian - FYROM", + FALSE, + &my_locale_typelib_month_names_mk_MK, + &my_locale_typelib_ab_month_names_mk_MK, + &my_locale_typelib_day_names_mk_MK, + &my_locale_typelib_ab_day_names_mk_MK, + 9, + 10, + ',', /* decimal point mk_MK */ + ' ', /* thousands_sep mk_MK */ + "\x03\x03", /* grouping mk_MK */ + &global_errmsgs[en_US] +); +/***** LOCALE END mk_MK *****/ + +/***** LOCALE BEGIN mn_MN: Mongolia - Mongolian *****/ +static const char *my_locale_month_names_mn_MN[13] = + {"Нэгдүгээр сар","Хоёрдугаар сар","Гуравдугаар сар","Дөрөвдүгээр сар","Тавдугаар сар","Зургаадугар сар","Долоодугаар сар","Наймдугаар сар","Есдүгээр сар","Аравдугаар сар","Арваннэгдүгээр сар","Арванхоёрдгаар сар", NullS }; +static const char *my_locale_ab_month_names_mn_MN[13] = + {"1-р","2-р","3-р","4-р","5-р","6-р","7-р","8-р","9-р","10-р","11-р","12-р", NullS }; +static const char *my_locale_day_names_mn_MN[8] = + {"Даваа","Мягмар","Лхагва","Пүрэв","Баасан","Бямба","Ням", NullS }; +static const char *my_locale_ab_day_names_mn_MN[8] = + {"Да","Мя","Лх","Пү","Ба","Бя","Ня", NullS }; +static TYPELIB my_locale_typelib_month_names_mn_MN = + { array_elements(my_locale_month_names_mn_MN)-1, "", my_locale_month_names_mn_MN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_mn_MN = + { array_elements(my_locale_ab_month_names_mn_MN)-1, "", my_locale_ab_month_names_mn_MN, NULL }; +static TYPELIB my_locale_typelib_day_names_mn_MN = + { array_elements(my_locale_day_names_mn_MN)-1, "", my_locale_day_names_mn_MN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_mn_MN = + { array_elements(my_locale_ab_day_names_mn_MN)-1, "", my_locale_ab_day_names_mn_MN, NULL }; +MY_LOCALE my_locale_mn_MN +( + 35, + "mn_MN", + "Mongolia - Mongolian", + FALSE, + &my_locale_typelib_month_names_mn_MN, + &my_locale_typelib_ab_month_names_mn_MN, + &my_locale_typelib_day_names_mn_MN, + &my_locale_typelib_ab_day_names_mn_MN, + 18, + 6, + ',', /* decimal point mn_MN */ + '.', /* thousands_sep mn_MN */ + "\x03\x03", /* grouping mn_MN */ + &global_errmsgs[en_US] +); +/***** LOCALE END mn_MN *****/ + +/***** LOCALE BEGIN ms_MY: Malay - Malaysia *****/ +static const char *my_locale_month_names_ms_MY[13] = + {"Januari","Februari","Mac","April","Mei","Jun","Julai","Ogos","September","Oktober","November","Disember", NullS }; +static const char *my_locale_ab_month_names_ms_MY[13] = + {"Jan","Feb","Mac","Apr","Mei","Jun","Jul","Ogos","Sep","Okt","Nov","Dis", NullS }; +static const char *my_locale_day_names_ms_MY[8] = + {"Isnin","Selasa","Rabu","Khamis","Jumaat","Sabtu","Ahad", NullS }; +static const char *my_locale_ab_day_names_ms_MY[8] = + {"Isn","Sel","Rab","Kha","Jum","Sab","Ahd", NullS }; +static TYPELIB my_locale_typelib_month_names_ms_MY = + { array_elements(my_locale_month_names_ms_MY)-1, "", my_locale_month_names_ms_MY, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ms_MY = + { array_elements(my_locale_ab_month_names_ms_MY)-1, "", my_locale_ab_month_names_ms_MY, NULL }; +static TYPELIB my_locale_typelib_day_names_ms_MY = + { array_elements(my_locale_day_names_ms_MY)-1, "", my_locale_day_names_ms_MY, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ms_MY = + { array_elements(my_locale_ab_day_names_ms_MY)-1, "", my_locale_ab_day_names_ms_MY, NULL }; +MY_LOCALE my_locale_ms_MY +( + 36, + "ms_MY", + "Malay - Malaysia", + TRUE, + &my_locale_typelib_month_names_ms_MY, + &my_locale_typelib_ab_month_names_ms_MY, + &my_locale_typelib_day_names_ms_MY, + &my_locale_typelib_ab_day_names_ms_MY, + 9, + 6, + '.', /* decimal point ms_MY */ + ',', /* thousands_sep ms_MY */ + "\x03", /* grouping ms_MY */ + &global_errmsgs[en_US] +); +/***** LOCALE END ms_MY *****/ + +/***** LOCALE BEGIN nb_NO: Norwegian(Bokml) - Norway *****/ +static const char *my_locale_month_names_nb_NO[13] = + {"januar","februar","mars","april","mai","juni","juli","august","september","oktober","november","desember", NullS }; +static const char *my_locale_ab_month_names_nb_NO[13] = + {"jan","feb","mar","apr","mai","jun","jul","aug","sep","okt","nov","des", NullS }; +static const char *my_locale_day_names_nb_NO[8] = + {"mandag","tirsdag","onsdag","torsdag","fredag","lørdag","søndag", NullS }; +static const char *my_locale_ab_day_names_nb_NO[8] = + {"man","tir","ons","tor","fre","lør","søn", NullS }; +static TYPELIB my_locale_typelib_month_names_nb_NO = + { array_elements(my_locale_month_names_nb_NO)-1, "", my_locale_month_names_nb_NO, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_nb_NO = + { array_elements(my_locale_ab_month_names_nb_NO)-1, "", my_locale_ab_month_names_nb_NO, NULL }; +static TYPELIB my_locale_typelib_day_names_nb_NO = + { array_elements(my_locale_day_names_nb_NO)-1, "", my_locale_day_names_nb_NO, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_nb_NO = + { array_elements(my_locale_ab_day_names_nb_NO)-1, "", my_locale_ab_day_names_nb_NO, NULL }; +MY_LOCALE my_locale_nb_NO +( + 37, + "nb_NO", + "Norwegian(Bokml) - Norway", + FALSE, + &my_locale_typelib_month_names_nb_NO, + &my_locale_typelib_ab_month_names_nb_NO, + &my_locale_typelib_day_names_nb_NO, + &my_locale_typelib_ab_day_names_nb_NO, + 9, + 7, + ',', /* decimal point nb_NO */ + '.', /* thousands_sep nb_NO */ + "\x03\x03", /* grouping nb_NO */ + &global_errmsgs[no_NO] +); +/***** LOCALE END nb_NO *****/ + +/***** LOCALE BEGIN nl_NL: Dutch - The Netherlands *****/ +static const char *my_locale_month_names_nl_NL[13] = + {"januari","februari","maart","april","mei","juni","juli","augustus","september","oktober","november","december", NullS }; +static const char *my_locale_ab_month_names_nl_NL[13] = + {"jan","feb","mrt","apr","mei","jun","jul","aug","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_nl_NL[8] = + {"maandag","dinsdag","woensdag","donderdag","vrijdag","zaterdag","zondag", NullS }; +static const char *my_locale_ab_day_names_nl_NL[8] = + {"ma","di","wo","do","vr","za","zo", NullS }; +static TYPELIB my_locale_typelib_month_names_nl_NL = + { array_elements(my_locale_month_names_nl_NL)-1, "", my_locale_month_names_nl_NL, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_nl_NL = + { array_elements(my_locale_ab_month_names_nl_NL)-1, "", my_locale_ab_month_names_nl_NL, NULL }; +static TYPELIB my_locale_typelib_day_names_nl_NL = + { array_elements(my_locale_day_names_nl_NL)-1, "", my_locale_day_names_nl_NL, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_nl_NL = + { array_elements(my_locale_ab_day_names_nl_NL)-1, "", my_locale_ab_day_names_nl_NL, NULL }; +MY_LOCALE my_locale_nl_NL +( + 38, + "nl_NL", + "Dutch - The Netherlands", + TRUE, + &my_locale_typelib_month_names_nl_NL, + &my_locale_typelib_ab_month_names_nl_NL, + &my_locale_typelib_day_names_nl_NL, + &my_locale_typelib_ab_day_names_nl_NL, + 9, + 9, + ',', /* decimal point nl_NL */ + '\0', /* thousands_sep nl_NL */ + "\x80\x80", /* grouping nl_NL */ + &global_errmsgs[nl_NL] +); +/***** LOCALE END nl_NL *****/ + +/***** LOCALE BEGIN pl_PL: Polish - Poland *****/ +static const char *my_locale_month_names_pl_PL[13] = + {"styczeń","luty","marzec","kwiecień","maj","czerwiec","lipiec","sierpień","wrzesień","październik","listopad","grudzień", NullS }; +static const char *my_locale_ab_month_names_pl_PL[13] = + {"sty","lut","mar","kwi","maj","cze","lip","sie","wrz","paź","lis","gru", NullS }; +static const char *my_locale_day_names_pl_PL[8] = + {"poniedziałek","wtorek","środa","czwartek","piątek","sobota","niedziela", NullS }; +static const char *my_locale_ab_day_names_pl_PL[8] = + {"pon","wto","śro","czw","pią","sob","nie", NullS }; +static TYPELIB my_locale_typelib_month_names_pl_PL = + { array_elements(my_locale_month_names_pl_PL)-1, "", my_locale_month_names_pl_PL, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_pl_PL = + { array_elements(my_locale_ab_month_names_pl_PL)-1, "", my_locale_ab_month_names_pl_PL, NULL }; +static TYPELIB my_locale_typelib_day_names_pl_PL = + { array_elements(my_locale_day_names_pl_PL)-1, "", my_locale_day_names_pl_PL, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_pl_PL = + { array_elements(my_locale_ab_day_names_pl_PL)-1, "", my_locale_ab_day_names_pl_PL, NULL }; +MY_LOCALE my_locale_pl_PL +( + 39, + "pl_PL", + "Polish - Poland", + FALSE, + &my_locale_typelib_month_names_pl_PL, + &my_locale_typelib_ab_month_names_pl_PL, + &my_locale_typelib_day_names_pl_PL, + &my_locale_typelib_ab_day_names_pl_PL, + 11, + 12, + ',', /* decimal point pl_PL */ + '\0', /* thousands_sep pl_PL */ + "\x80\x80", /* grouping pl_PL */ + &global_errmsgs[pl_PL] +); +/***** LOCALE END pl_PL *****/ + +/***** LOCALE BEGIN pt_BR: Portuguese - Brazil *****/ +static const char *my_locale_month_names_pt_BR[13] = + {"janeiro","fevereiro","março","abril","maio","junho","julho","agosto","setembro","outubro","novembro","dezembro", NullS }; +static const char *my_locale_ab_month_names_pt_BR[13] = + {"Jan","Fev","Mar","Abr","Mai","Jun","Jul","Ago","Set","Out","Nov","Dez", NullS }; +static const char *my_locale_day_names_pt_BR[8] = + {"segunda","terça","quarta","quinta","sexta","sábado","domingo", NullS }; +static const char *my_locale_ab_day_names_pt_BR[8] = + {"Seg","Ter","Qua","Qui","Sex","Sáb","Dom", NullS }; +static TYPELIB my_locale_typelib_month_names_pt_BR = + { array_elements(my_locale_month_names_pt_BR)-1, "", my_locale_month_names_pt_BR, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_pt_BR = + { array_elements(my_locale_ab_month_names_pt_BR)-1, "", my_locale_ab_month_names_pt_BR, NULL }; +static TYPELIB my_locale_typelib_day_names_pt_BR = + { array_elements(my_locale_day_names_pt_BR)-1, "", my_locale_day_names_pt_BR, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_pt_BR = + { array_elements(my_locale_ab_day_names_pt_BR)-1, "", my_locale_ab_day_names_pt_BR, NULL }; +MY_LOCALE my_locale_pt_BR +( + 40, + "pt_BR", + "Portuguese - Brazil", + FALSE, + &my_locale_typelib_month_names_pt_BR, + &my_locale_typelib_ab_month_names_pt_BR, + &my_locale_typelib_day_names_pt_BR, + &my_locale_typelib_ab_day_names_pt_BR, + 9, + 7, + ',', /* decimal point pt_BR */ + '\0', /* thousands_sep pt_BR */ + "\x80\x80", /* grouping pt_BR */ + &global_errmsgs[pt_PT] +); +/***** LOCALE END pt_BR *****/ + +/***** LOCALE BEGIN pt_PT: Portuguese - Portugal *****/ +static const char *my_locale_month_names_pt_PT[13] = + {"Janeiro","Fevereiro","Março","Abril","Maio","Junho","Julho","Agosto","Setembro","Outubro","Novembro","Dezembro", NullS }; +static const char *my_locale_ab_month_names_pt_PT[13] = + {"Jan","Fev","Mar","Abr","Mai","Jun","Jul","Ago","Set","Out","Nov","Dez", NullS }; +static const char *my_locale_day_names_pt_PT[8] = + {"Segunda","Terça","Quarta","Quinta","Sexta","Sábado","Domingo", NullS }; +static const char *my_locale_ab_day_names_pt_PT[8] = + {"Seg","Ter","Qua","Qui","Sex","Sáb","Dom", NullS }; +static TYPELIB my_locale_typelib_month_names_pt_PT = + { array_elements(my_locale_month_names_pt_PT)-1, "", my_locale_month_names_pt_PT, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_pt_PT = + { array_elements(my_locale_ab_month_names_pt_PT)-1, "", my_locale_ab_month_names_pt_PT, NULL }; +static TYPELIB my_locale_typelib_day_names_pt_PT = + { array_elements(my_locale_day_names_pt_PT)-1, "", my_locale_day_names_pt_PT, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_pt_PT = + { array_elements(my_locale_ab_day_names_pt_PT)-1, "", my_locale_ab_day_names_pt_PT, NULL }; +MY_LOCALE my_locale_pt_PT +( + 41, + "pt_PT", + "Portuguese - Portugal", + FALSE, + &my_locale_typelib_month_names_pt_PT, + &my_locale_typelib_ab_month_names_pt_PT, + &my_locale_typelib_day_names_pt_PT, + &my_locale_typelib_ab_day_names_pt_PT, + 9, + 7, + ',', /* decimal point pt_PT */ + '\0', /* thousands_sep pt_PT */ + "\x80\x80", /* grouping pt_PT */ + &global_errmsgs[pt_PT] +); +/***** LOCALE END pt_PT *****/ + +/***** LOCALE BEGIN ro_RO: Romanian - Romania *****/ +static const char *my_locale_month_names_ro_RO[13] = + {"Ianuarie","Februarie","Martie","Aprilie","Mai","Iunie","Iulie","August","Septembrie","Octombrie","Noiembrie","Decembrie", NullS }; +static const char *my_locale_ab_month_names_ro_RO[13] = + {"ian","feb","mar","apr","mai","iun","iul","aug","sep","oct","nov","dec", NullS }; +static const char *my_locale_day_names_ro_RO[8] = + {"Luni","Marţi","Miercuri","Joi","Vineri","Sâmbătă","Duminică", NullS }; +static const char *my_locale_ab_day_names_ro_RO[8] = + {"Lu","Ma","Mi","Jo","Vi","Sâ","Du", NullS }; +static TYPELIB my_locale_typelib_month_names_ro_RO = + { array_elements(my_locale_month_names_ro_RO)-1, "", my_locale_month_names_ro_RO, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ro_RO = + { array_elements(my_locale_ab_month_names_ro_RO)-1, "", my_locale_ab_month_names_ro_RO, NULL }; +static TYPELIB my_locale_typelib_day_names_ro_RO = + { array_elements(my_locale_day_names_ro_RO)-1, "", my_locale_day_names_ro_RO, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ro_RO = + { array_elements(my_locale_ab_day_names_ro_RO)-1, "", my_locale_ab_day_names_ro_RO, NULL }; +MY_LOCALE my_locale_ro_RO +( + 42, + "ro_RO", + "Romanian - Romania", + FALSE, + &my_locale_typelib_month_names_ro_RO, + &my_locale_typelib_ab_month_names_ro_RO, + &my_locale_typelib_day_names_ro_RO, + &my_locale_typelib_ab_day_names_ro_RO, + 10, + 8, + ',', /* decimal point ro_RO */ + '.', /* thousands_sep ro_RO */ + "\x03\x03", /* grouping ro_RO */ + &global_errmsgs[ro_RO] +); +/***** LOCALE END ro_RO *****/ + +/***** LOCALE BEGIN ru_RU: Russian - Russia *****/ +static const char *my_locale_month_names_ru_RU[13] = + {"Января","Февраля","Марта","Апреля","Мая","Июня","Июля","Августа","Сентября","Октября","Ноября","Декабря", NullS }; +static const char *my_locale_ab_month_names_ru_RU[13] = + {"Янв","Фев","Мар","Апр","Май","Июн","Июл","Авг","Сен","Окт","Ноя","Дек", NullS }; +static const char *my_locale_day_names_ru_RU[8] = + {"Понедельник","Вторник","Среда","Четверг","Пятница","Суббота","Воскресенье", NullS }; +static const char *my_locale_ab_day_names_ru_RU[8] = + {"Пнд","Втр","Срд","Чтв","Птн","Сбт","Вск", NullS }; +static TYPELIB my_locale_typelib_month_names_ru_RU = + { array_elements(my_locale_month_names_ru_RU)-1, "", my_locale_month_names_ru_RU, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ru_RU = + { array_elements(my_locale_ab_month_names_ru_RU)-1, "", my_locale_ab_month_names_ru_RU, NULL }; +static TYPELIB my_locale_typelib_day_names_ru_RU = + { array_elements(my_locale_day_names_ru_RU)-1, "", my_locale_day_names_ru_RU, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ru_RU = + { array_elements(my_locale_ab_day_names_ru_RU)-1, "", my_locale_ab_day_names_ru_RU, NULL }; +MY_LOCALE my_locale_ru_RU +( + 43, + "ru_RU", + "Russian - Russia", + FALSE, + &my_locale_typelib_month_names_ru_RU, + &my_locale_typelib_ab_month_names_ru_RU, + &my_locale_typelib_day_names_ru_RU, + &my_locale_typelib_ab_day_names_ru_RU, + 8, + 11, + ',', /* decimal point ru_RU */ + ' ', /* thousands_sep ru_RU */ + "\x03\x03", /* grouping ru_RU */ + &global_errmsgs[ru_RU] +); +/***** LOCALE END ru_RU *****/ + +/***** LOCALE BEGIN ru_UA: Russian - Ukraine *****/ +static const char *my_locale_month_names_ru_UA[13] = + {"Январь","Февраль","Март","Апрель","Май","Июнь","Июль","Август","Сентябрь","Октябрь","Ноябрь","Декабрь", NullS }; +static const char *my_locale_ab_month_names_ru_UA[13] = + {"Янв","Фев","Мар","Апр","Май","Июн","Июл","Авг","Сен","Окт","Ноя","Дек", NullS }; +static const char *my_locale_day_names_ru_UA[8] = + {"Понедельник","Вторник","Среда","Четверг","Пятница","Суббота","Воскресенье", NullS }; +static const char *my_locale_ab_day_names_ru_UA[8] = + {"Пнд","Вто","Срд","Чтв","Птн","Суб","Вск", NullS }; +static TYPELIB my_locale_typelib_month_names_ru_UA = + { array_elements(my_locale_month_names_ru_UA)-1, "", my_locale_month_names_ru_UA, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ru_UA = + { array_elements(my_locale_ab_month_names_ru_UA)-1, "", my_locale_ab_month_names_ru_UA, NULL }; +static TYPELIB my_locale_typelib_day_names_ru_UA = + { array_elements(my_locale_day_names_ru_UA)-1, "", my_locale_day_names_ru_UA, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ru_UA = + { array_elements(my_locale_ab_day_names_ru_UA)-1, "", my_locale_ab_day_names_ru_UA, NULL }; +MY_LOCALE my_locale_ru_UA +( + 44, + "ru_UA", + "Russian - Ukraine", + FALSE, + &my_locale_typelib_month_names_ru_UA, + &my_locale_typelib_ab_month_names_ru_UA, + &my_locale_typelib_day_names_ru_UA, + &my_locale_typelib_ab_day_names_ru_UA, + 8, + 11, + ',', /* decimal point ru_UA */ + '.', /* thousands_sep ru_UA */ + "\x03\x03", /* grouping ru_UA */ + &global_errmsgs[ru_RU] +); +/***** LOCALE END ru_UA *****/ + +/***** LOCALE BEGIN sk_SK: Slovak - Slovakia *****/ +static const char *my_locale_month_names_sk_SK[13] = + {"január","február","marec","apríl","máj","jún","júl","august","september","október","november","december", NullS }; +static const char *my_locale_ab_month_names_sk_SK[13] = + {"jan","feb","mar","apr","máj","jún","júl","aug","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_sk_SK[8] = + {"Pondelok","Utorok","Streda","Štvrtok","Piatok","Sobota","Nedeľa", NullS }; +static const char *my_locale_ab_day_names_sk_SK[8] = + {"Po","Ut","St","Št","Pi","So","Ne", NullS }; +static TYPELIB my_locale_typelib_month_names_sk_SK = + { array_elements(my_locale_month_names_sk_SK)-1, "", my_locale_month_names_sk_SK, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_sk_SK = + { array_elements(my_locale_ab_month_names_sk_SK)-1, "", my_locale_ab_month_names_sk_SK, NULL }; +static TYPELIB my_locale_typelib_day_names_sk_SK = + { array_elements(my_locale_day_names_sk_SK)-1, "", my_locale_day_names_sk_SK, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_sk_SK = + { array_elements(my_locale_ab_day_names_sk_SK)-1, "", my_locale_ab_day_names_sk_SK, NULL }; +MY_LOCALE my_locale_sk_SK +( + 45, + "sk_SK", + "Slovak - Slovakia", + FALSE, + &my_locale_typelib_month_names_sk_SK, + &my_locale_typelib_ab_month_names_sk_SK, + &my_locale_typelib_day_names_sk_SK, + &my_locale_typelib_ab_day_names_sk_SK, + 9, + 8, + ',', /* decimal point sk_SK */ + ' ', /* thousands_sep sk_SK */ + "\x03\x03", /* grouping sk_SK */ + &global_errmsgs[sk_SK] +); +/***** LOCALE END sk_SK *****/ + +/***** LOCALE BEGIN sl_SI: Slovenian - Slovenia *****/ +static const char *my_locale_month_names_sl_SI[13] = + {"januar","februar","marec","april","maj","junij","julij","avgust","september","oktober","november","december", NullS }; +static const char *my_locale_ab_month_names_sl_SI[13] = + {"jan","feb","mar","apr","maj","jun","jul","avg","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_sl_SI[8] = + {"ponedeljek","torek","sreda","četrtek","petek","sobota","nedelja", NullS }; +static const char *my_locale_ab_day_names_sl_SI[8] = + {"pon","tor","sre","čet","pet","sob","ned", NullS }; +static TYPELIB my_locale_typelib_month_names_sl_SI = + { array_elements(my_locale_month_names_sl_SI)-1, "", my_locale_month_names_sl_SI, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_sl_SI = + { array_elements(my_locale_ab_month_names_sl_SI)-1, "", my_locale_ab_month_names_sl_SI, NULL }; +static TYPELIB my_locale_typelib_day_names_sl_SI = + { array_elements(my_locale_day_names_sl_SI)-1, "", my_locale_day_names_sl_SI, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_sl_SI = + { array_elements(my_locale_ab_day_names_sl_SI)-1, "", my_locale_ab_day_names_sl_SI, NULL }; +MY_LOCALE my_locale_sl_SI +( + 46, + "sl_SI", + "Slovenian - Slovenia", + FALSE, + &my_locale_typelib_month_names_sl_SI, + &my_locale_typelib_ab_month_names_sl_SI, + &my_locale_typelib_day_names_sl_SI, + &my_locale_typelib_ab_day_names_sl_SI, + 9, + 10, + ',', /* decimal point sl_SI */ + ' ', /* thousands_sep sl_SI */ + "\x80\x80", /* grouping sl_SI */ + &global_errmsgs[en_US] +); +/***** LOCALE END sl_SI *****/ + +/***** LOCALE BEGIN sq_AL: Albanian - Albania *****/ +static const char *my_locale_month_names_sq_AL[13] = + {"janar","shkurt","mars","prill","maj","qershor","korrik","gusht","shtator","tetor","nëntor","dhjetor", NullS }; +static const char *my_locale_ab_month_names_sq_AL[13] = + {"Jan","Shk","Mar","Pri","Maj","Qer","Kor","Gsh","Sht","Tet","Nën","Dhj", NullS }; +static const char *my_locale_day_names_sq_AL[8] = + {"e hënë ","e martë ","e mërkurë ","e enjte ","e premte ","e shtunë ","e diel ", NullS }; +static const char *my_locale_ab_day_names_sq_AL[8] = + {"Hën ","Mar ","Mër ","Enj ","Pre ","Sht ","Die ", NullS }; +static TYPELIB my_locale_typelib_month_names_sq_AL = + { array_elements(my_locale_month_names_sq_AL)-1, "", my_locale_month_names_sq_AL, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_sq_AL = + { array_elements(my_locale_ab_month_names_sq_AL)-1, "", my_locale_ab_month_names_sq_AL, NULL }; +static TYPELIB my_locale_typelib_day_names_sq_AL = + { array_elements(my_locale_day_names_sq_AL)-1, "", my_locale_day_names_sq_AL, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_sq_AL = + { array_elements(my_locale_ab_day_names_sq_AL)-1, "", my_locale_ab_day_names_sq_AL, NULL }; +MY_LOCALE my_locale_sq_AL +( + 47, + "sq_AL", + "Albanian - Albania", + FALSE, + &my_locale_typelib_month_names_sq_AL, + &my_locale_typelib_ab_month_names_sq_AL, + &my_locale_typelib_day_names_sq_AL, + &my_locale_typelib_ab_day_names_sq_AL, + 7, + 10, + ',', /* decimal point sq_AL */ + '.', /* thousands_sep sq_AL */ + "\x03", /* grouping sq_AL */ + &global_errmsgs[en_US] +); +/***** LOCALE END sq_AL *****/ + +/***** LOCALE BEGIN sr_RS: Serbian - Serbia *****/ +static const char *my_locale_month_names_sr_RS[13] = + {"januar","februar","mart","april","maj","juni","juli","avgust","septembar","oktobar","novembar","decembar", NullS }; +static const char *my_locale_ab_month_names_sr_RS[13] = + {"jan","feb","mar","apr","maj","jun","jul","avg","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_sr_RS[8] = + {"ponedeljak","utorak","sreda","četvrtak","petak","subota","nedelja", NullS }; +static const char *my_locale_ab_day_names_sr_RS[8] = + {"pon","uto","sre","čet","pet","sub","ned", NullS }; +static TYPELIB my_locale_typelib_month_names_sr_RS = + { array_elements(my_locale_month_names_sr_RS)-1, "", my_locale_month_names_sr_RS, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_sr_RS = + { array_elements(my_locale_ab_month_names_sr_RS)-1, "", my_locale_ab_month_names_sr_RS, NULL }; +static TYPELIB my_locale_typelib_day_names_sr_RS = + { array_elements(my_locale_day_names_sr_RS)-1, "", my_locale_day_names_sr_RS, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_sr_RS = + { array_elements(my_locale_ab_day_names_sr_RS)-1, "", my_locale_ab_day_names_sr_RS, NULL }; +MY_LOCALE my_locale_sr_YU /* Deprecated, use sr_RS instead */ +( + 48, + "sr_YU", + "Serbian - Yugoslavia", + FALSE, + &my_locale_typelib_month_names_sr_RS, + &my_locale_typelib_ab_month_names_sr_RS, + &my_locale_typelib_day_names_sr_RS, + &my_locale_typelib_ab_day_names_sr_RS, + 9, + 10, + '.', /* decimal point sr_RS */ + '\0', /* thousands_sep sr_RS */ + "\x80", /* grouping sr_RS */ + &global_errmsgs[sr_RS] +); + +MY_LOCALE my_locale_sr_RS +( + 48, + "sr_RS", + "Serbian - Serbia", + FALSE, + &my_locale_typelib_month_names_sr_RS, + &my_locale_typelib_ab_month_names_sr_RS, + &my_locale_typelib_day_names_sr_RS, + &my_locale_typelib_ab_day_names_sr_RS, + 9, + 10, + '.', /* decimal point sr_RS */ + '\0', /* thousands_sep sr_RS */ + "\x80", /* grouping sr_RS */ + &global_errmsgs[sr_RS] +); +/***** LOCALE END sr_RS *****/ + +/***** LOCALE BEGIN sv_SE: Swedish - Sweden *****/ +static const char *my_locale_month_names_sv_SE[13] = + {"januari","februari","mars","april","maj","juni","juli","augusti","september","oktober","november","december", NullS }; +static const char *my_locale_ab_month_names_sv_SE[13] = + {"jan","feb","mar","apr","maj","jun","jul","aug","sep","okt","nov","dec", NullS }; +static const char *my_locale_day_names_sv_SE[8] = + {"måndag","tisdag","onsdag","torsdag","fredag","lördag","söndag", NullS }; +static const char *my_locale_ab_day_names_sv_SE[8] = + {"mån","tis","ons","tor","fre","lör","sön", NullS }; +static TYPELIB my_locale_typelib_month_names_sv_SE = + { array_elements(my_locale_month_names_sv_SE)-1, "", my_locale_month_names_sv_SE, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_sv_SE = + { array_elements(my_locale_ab_month_names_sv_SE)-1, "", my_locale_ab_month_names_sv_SE, NULL }; +static TYPELIB my_locale_typelib_day_names_sv_SE = + { array_elements(my_locale_day_names_sv_SE)-1, "", my_locale_day_names_sv_SE, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_sv_SE = + { array_elements(my_locale_ab_day_names_sv_SE)-1, "", my_locale_ab_day_names_sv_SE, NULL }; +MY_LOCALE my_locale_sv_SE +( + 3, + "sv_SE", + "Swedish - Sweden", + FALSE, + &my_locale_typelib_month_names_sv_SE, + &my_locale_typelib_ab_month_names_sv_SE, + &my_locale_typelib_day_names_sv_SE, + &my_locale_typelib_ab_day_names_sv_SE, + 9, + 7, + ',', /* decimal point sv_SE */ + ' ', /* thousands_sep sv_SE */ + "\x03\x03", /* grouping sv_SE */ + &global_errmsgs[sv_SE] +); +/***** LOCALE END sv_SE *****/ + +/***** LOCALE BEGIN ta_IN: Tamil - India *****/ +static const char *my_locale_month_names_ta_IN[13] = + {"ஜனவரி","பெப்ரவரி","மார்ச்","ஏப்ரல்","மே","ஜூன்","ஜூலை","ஆகஸ்ட்","செப்டம்பர்","அக்டோபர்","நவம்பர்","டிசம்பர்r", NullS }; +static const char *my_locale_ab_month_names_ta_IN[13] = + {"ஜனவரி","பெப்ரவரி","மார்ச்","ஏப்ரல்","மே","ஜூன்","ஜூலை","ஆகஸ்ட்","செப்டம்பர்","அக்டோபர்","நவம்பர்","டிசம்பர்r", NullS }; +static const char *my_locale_day_names_ta_IN[8] = + {"திங்கள்","செவ்வாய்","புதன்","வியாழன்","வெள்ளி","சனி","ஞாயிறு", NullS }; +static const char *my_locale_ab_day_names_ta_IN[8] = + {"த","ச","ப","வ","வ","ச","ஞ", NullS }; +static TYPELIB my_locale_typelib_month_names_ta_IN = + { array_elements(my_locale_month_names_ta_IN)-1, "", my_locale_month_names_ta_IN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ta_IN = + { array_elements(my_locale_ab_month_names_ta_IN)-1, "", my_locale_ab_month_names_ta_IN, NULL }; +static TYPELIB my_locale_typelib_day_names_ta_IN = + { array_elements(my_locale_day_names_ta_IN)-1, "", my_locale_day_names_ta_IN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ta_IN = + { array_elements(my_locale_ab_day_names_ta_IN)-1, "", my_locale_ab_day_names_ta_IN, NULL }; +MY_LOCALE my_locale_ta_IN +( + 49, + "ta_IN", + "Tamil - India", + FALSE, + &my_locale_typelib_month_names_ta_IN, + &my_locale_typelib_ab_month_names_ta_IN, + &my_locale_typelib_day_names_ta_IN, + &my_locale_typelib_ab_day_names_ta_IN, + 10, + 8, + '.', /* decimal point ta_IN */ + ',', /* thousands_sep ta_IN */ + "\x03\x02", /* grouping ta_IN */ + &global_errmsgs[en_US] +); +/***** LOCALE END ta_IN *****/ + +/***** LOCALE BEGIN te_IN: Telugu - India *****/ +static const char *my_locale_month_names_te_IN[13] = + {"జనవరి","ఫిబ్రవరి","మార్చి","ఏప్రిల్","మే","జూన్","జూలై","ఆగస్టు","సెప్టెంబర్","అక్టోబర్","నవంబర్","డిసెంబర్", NullS }; +static const char *my_locale_ab_month_names_te_IN[13] = + {"జనవరి","ఫిబ్రవరి","మార్చి","ఏప్రిల్","మే","జూన్","జూలై","ఆగస్టు","సెప్టెంబర్","అక్టోబర్","నవంబర్","డిసెంబర్", NullS }; +static const char *my_locale_day_names_te_IN[8] = + {"సోమవారం","మంగళవారం","బుధవారం","గురువారం","శుక్రవారం","శనివారం","ఆదివారం", NullS }; +static const char *my_locale_ab_day_names_te_IN[8] = + {"సోమ","మంగళ","బుధ","గురు","శుక్ర","శని","ఆది", NullS }; +static TYPELIB my_locale_typelib_month_names_te_IN = + { array_elements(my_locale_month_names_te_IN)-1, "", my_locale_month_names_te_IN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_te_IN = + { array_elements(my_locale_ab_month_names_te_IN)-1, "", my_locale_ab_month_names_te_IN, NULL }; +static TYPELIB my_locale_typelib_day_names_te_IN = + { array_elements(my_locale_day_names_te_IN)-1, "", my_locale_day_names_te_IN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_te_IN = + { array_elements(my_locale_ab_day_names_te_IN)-1, "", my_locale_ab_day_names_te_IN, NULL }; +MY_LOCALE my_locale_te_IN +( + 50, + "te_IN", + "Telugu - India", + FALSE, + &my_locale_typelib_month_names_te_IN, + &my_locale_typelib_ab_month_names_te_IN, + &my_locale_typelib_day_names_te_IN, + &my_locale_typelib_ab_day_names_te_IN, + 10, + 9, + '.', /* decimal point te_IN */ + ',', /* thousands_sep te_IN */ + "\x03\x02", /* grouping te_IN */ + &global_errmsgs[en_US] +); +/***** LOCALE END te_IN *****/ + +/***** LOCALE BEGIN th_TH: Thai - Thailand *****/ +static const char *my_locale_month_names_th_TH[13] = + {"มกราคม","กุมภาพันธ์","มีนาคม","เมษายน","พฤษภาคม","มิถุนายน","กรกฎาคม","สิงหาคม","กันยายน","ตุลาคม","พฤศจิกายน","ธันวาคม", NullS }; +static const char *my_locale_ab_month_names_th_TH[13] = + {"ม.ค.","ก.พ.","มี.ค.","เม.ย.","พ.ค.","มิ.ย.","ก.ค.","ส.ค.","ก.ย.","ต.ค.","พ.ย.","ธ.ค.", NullS }; +static const char *my_locale_day_names_th_TH[8] = + {"จันทร์","อังคาร","พุธ","พฤหัสบดี","ศุกร์","เสาร์","อาทิตย์", NullS }; +static const char *my_locale_ab_day_names_th_TH[8] = + {"จ.","อ.","พ.","พฤ.","ศ.","ส.","อา.", NullS }; +static TYPELIB my_locale_typelib_month_names_th_TH = + { array_elements(my_locale_month_names_th_TH)-1, "", my_locale_month_names_th_TH, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_th_TH = + { array_elements(my_locale_ab_month_names_th_TH)-1, "", my_locale_ab_month_names_th_TH, NULL }; +static TYPELIB my_locale_typelib_day_names_th_TH = + { array_elements(my_locale_day_names_th_TH)-1, "", my_locale_day_names_th_TH, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_th_TH = + { array_elements(my_locale_ab_day_names_th_TH)-1, "", my_locale_ab_day_names_th_TH, NULL }; +MY_LOCALE my_locale_th_TH +( + 51, + "th_TH", + "Thai - Thailand", + FALSE, + &my_locale_typelib_month_names_th_TH, + &my_locale_typelib_ab_month_names_th_TH, + &my_locale_typelib_day_names_th_TH, + &my_locale_typelib_ab_day_names_th_TH, + 10, + 8, + '.', /* decimal point th_TH */ + ',', /* thousands_sep th_TH */ + "\x03", /* grouping th_TH */ + &global_errmsgs[en_US] +); +/***** LOCALE END th_TH *****/ + +/***** LOCALE BEGIN tr_TR: Turkish - Türkiye *****/ +static const char *my_locale_month_names_tr_TR[13] = + {"Ocak","Şubat","Mart","Nisan","Mayıs","Haziran","Temmuz","Ağustos","Eylül","Ekim","Kasım","Aralık", NullS }; +static const char *my_locale_ab_month_names_tr_TR[13] = + {"Oca","Şub","Mar","Nis","May","Haz","Tem","Ağu","Eyl","Eki","Kas","Ara", NullS }; +static const char *my_locale_day_names_tr_TR[8] = + {"Pazartesi","Salı","Çarşamba","Perşembe","Cuma","Cumartesi","Pazar", NullS }; +static const char *my_locale_ab_day_names_tr_TR[8] = + {"Pzt","Sal","Çrş","Prş","Cum","Cts","Paz", NullS }; +static TYPELIB my_locale_typelib_month_names_tr_TR = + { array_elements(my_locale_month_names_tr_TR)-1, "", my_locale_month_names_tr_TR, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_tr_TR = + { array_elements(my_locale_ab_month_names_tr_TR)-1, "", my_locale_ab_month_names_tr_TR, NULL }; +static TYPELIB my_locale_typelib_day_names_tr_TR = + { array_elements(my_locale_day_names_tr_TR)-1, "", my_locale_day_names_tr_TR, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_tr_TR = + { array_elements(my_locale_ab_day_names_tr_TR)-1, "", my_locale_ab_day_names_tr_TR, NULL }; +MY_LOCALE my_locale_tr_TR +( + 52, + "tr_TR", + "Turkish - Türkiye", + FALSE, + &my_locale_typelib_month_names_tr_TR, + &my_locale_typelib_ab_month_names_tr_TR, + &my_locale_typelib_day_names_tr_TR, + &my_locale_typelib_ab_day_names_tr_TR, + 7, + 9, + ',', /* decimal point tr_TR */ + '.', /* thousands_sep tr_TR */ + "\x03\x03", /* grouping tr_TR */ + &global_errmsgs[en_US] +); +/***** LOCALE END tr_TR *****/ + +/***** LOCALE BEGIN uk_UA: Ukrainian - Ukraine *****/ +static const char *my_locale_month_names_uk_UA[13] = + {"Січень","Лютий","Березень","Квітень","Травень","Червень","Липень","Серпень","Вересень","Жовтень","Листопад","Грудень", NullS }; +static const char *my_locale_ab_month_names_uk_UA[13] = + {"Січ","Лют","Бер","Кві","Тра","Чер","Лип","Сер","Вер","Жов","Лис","Гру", NullS }; +static const char *my_locale_day_names_uk_UA[8] = + {"Понеділок","Вівторок","Середа","Четвер","П'ятниця","Субота","Неділя", NullS }; +static const char *my_locale_ab_day_names_uk_UA[8] = + {"Пнд","Втр","Срд","Чтв","Птн","Сбт","Ндл", NullS }; +static TYPELIB my_locale_typelib_month_names_uk_UA = + { array_elements(my_locale_month_names_uk_UA)-1, "", my_locale_month_names_uk_UA, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_uk_UA = + { array_elements(my_locale_ab_month_names_uk_UA)-1, "", my_locale_ab_month_names_uk_UA, NULL }; +static TYPELIB my_locale_typelib_day_names_uk_UA = + { array_elements(my_locale_day_names_uk_UA)-1, "", my_locale_day_names_uk_UA, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_uk_UA = + { array_elements(my_locale_ab_day_names_uk_UA)-1, "", my_locale_ab_day_names_uk_UA, NULL }; +MY_LOCALE my_locale_uk_UA +( + 53, + "uk_UA", + "Ukrainian - Ukraine", + FALSE, + &my_locale_typelib_month_names_uk_UA, + &my_locale_typelib_ab_month_names_uk_UA, + &my_locale_typelib_day_names_uk_UA, + &my_locale_typelib_ab_day_names_uk_UA, + 8, + 9, + ',', /* decimal point uk_UA */ + '.', /* thousands_sep uk_UA */ + "\x03\x03", /* grouping uk_UA */ + &global_errmsgs[uk_UA] +); +/***** LOCALE END uk_UA *****/ + +/***** LOCALE BEGIN ur_PK: Urdu - Pakistan *****/ +static const char *my_locale_month_names_ur_PK[13] = + {"جنوري","فروري","مارچ","اپريل","مٓی","جون","جولاي","اگست","ستمبر","اكتوبر","نومبر","دسمبر", NullS }; +static const char *my_locale_ab_month_names_ur_PK[13] = + {"جنوري","فروري","مارچ","اپريل","مٓی","جون","جولاي","اگست","ستمبر","اكتوبر","نومبر","دسمبر", NullS }; +static const char *my_locale_day_names_ur_PK[8] = + {"پير","منگل","بدھ","جمعرات","جمعه","هفته","اتوار", NullS }; +static const char *my_locale_ab_day_names_ur_PK[8] = + {"پير","منگل","بدھ","جمعرات","جمعه","هفته","اتوار", NullS }; +static TYPELIB my_locale_typelib_month_names_ur_PK = + { array_elements(my_locale_month_names_ur_PK)-1, "", my_locale_month_names_ur_PK, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_ur_PK = + { array_elements(my_locale_ab_month_names_ur_PK)-1, "", my_locale_ab_month_names_ur_PK, NULL }; +static TYPELIB my_locale_typelib_day_names_ur_PK = + { array_elements(my_locale_day_names_ur_PK)-1, "", my_locale_day_names_ur_PK, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_ur_PK = + { array_elements(my_locale_ab_day_names_ur_PK)-1, "", my_locale_ab_day_names_ur_PK, NULL }; +MY_LOCALE my_locale_ur_PK +( + 54, + "ur_PK", + "Urdu - Pakistan", + FALSE, + &my_locale_typelib_month_names_ur_PK, + &my_locale_typelib_ab_month_names_ur_PK, + &my_locale_typelib_day_names_ur_PK, + &my_locale_typelib_ab_day_names_ur_PK, + 6, + 6, + '.', /* decimal point ur_PK */ + ',', /* thousands_sep ur_PK */ + "\x03\x03", /* grouping ur_PK */ + &global_errmsgs[en_US] +); +/***** LOCALE END ur_PK *****/ + +/***** LOCALE BEGIN vi_VN: Vietnamese - Vietnam *****/ +static const char *my_locale_month_names_vi_VN[13] = + {"Tháng một","Tháng hai","Tháng ba","Tháng tư","Tháng năm","Tháng sáu","Tháng bảy","Tháng tám","Tháng chín","Tháng mười","Tháng mười một","Tháng mười hai", NullS }; +static const char *my_locale_ab_month_names_vi_VN[13] = + {"Thg 1","Thg 2","Thg 3","Thg 4","Thg 5","Thg 6","Thg 7","Thg 8","Thg 9","Thg 10","Thg 11","Thg 12", NullS }; +static const char *my_locale_day_names_vi_VN[8] = + {"Thứ hai ","Thứ ba ","Thứ tư ","Thứ năm ","Thứ sáu ","Thứ bảy ","Chủ nhật ", NullS }; +static const char *my_locale_ab_day_names_vi_VN[8] = + {"Th 2 ","Th 3 ","Th 4 ","Th 5 ","Th 6 ","Th 7 ","CN ", NullS }; +static TYPELIB my_locale_typelib_month_names_vi_VN = + { array_elements(my_locale_month_names_vi_VN)-1, "", my_locale_month_names_vi_VN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_vi_VN = + { array_elements(my_locale_ab_month_names_vi_VN)-1, "", my_locale_ab_month_names_vi_VN, NULL }; +static TYPELIB my_locale_typelib_day_names_vi_VN = + { array_elements(my_locale_day_names_vi_VN)-1, "", my_locale_day_names_vi_VN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_vi_VN = + { array_elements(my_locale_ab_day_names_vi_VN)-1, "", my_locale_ab_day_names_vi_VN, NULL }; +MY_LOCALE my_locale_vi_VN +( + 55, + "vi_VN", + "Vietnamese - Vietnam", + FALSE, + &my_locale_typelib_month_names_vi_VN, + &my_locale_typelib_ab_month_names_vi_VN, + &my_locale_typelib_day_names_vi_VN, + &my_locale_typelib_ab_day_names_vi_VN, + 16, + 11, + ',', /* decimal point vi_VN */ + '.', /* thousands_sep vi_VN */ + "\x03\x03", /* grouping vi_VN */ + &global_errmsgs[en_US] +); +/***** LOCALE END vi_VN *****/ + +/***** LOCALE BEGIN zh_CN: Chinese - Peoples Republic of China *****/ +static const char *my_locale_month_names_zh_CN[13] = + {"一月","二月","三月","四月","五月","六月","七月","八月","九月","十月","十一月","十二月", NullS }; +static const char *my_locale_ab_month_names_zh_CN[13] = + {" 1月"," 2月"," 3月"," 4月"," 5月"," 6月"," 7月"," 8月"," 9月","10月","11月","12月", NullS }; +static const char *my_locale_day_names_zh_CN[8] = + {"星期一","星期二","星期三","星期四","星期五","星期六","星期日", NullS }; +static const char *my_locale_ab_day_names_zh_CN[8] = + {"一","二","三","四","五","六","日", NullS }; +static TYPELIB my_locale_typelib_month_names_zh_CN = + { array_elements(my_locale_month_names_zh_CN)-1, "", my_locale_month_names_zh_CN, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_zh_CN = + { array_elements(my_locale_ab_month_names_zh_CN)-1, "", my_locale_ab_month_names_zh_CN, NULL }; +static TYPELIB my_locale_typelib_day_names_zh_CN = + { array_elements(my_locale_day_names_zh_CN)-1, "", my_locale_day_names_zh_CN, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_zh_CN = + { array_elements(my_locale_ab_day_names_zh_CN)-1, "", my_locale_ab_day_names_zh_CN, NULL }; +MY_LOCALE my_locale_zh_CN +( + 56, + "zh_CN", + "Chinese - Peoples Republic of China", + FALSE, + &my_locale_typelib_month_names_zh_CN, + &my_locale_typelib_ab_month_names_zh_CN, + &my_locale_typelib_day_names_zh_CN, + &my_locale_typelib_ab_day_names_zh_CN, + 3, + 3, + '.', /* decimal point zh_CN */ + ',', /* thousands_sep zh_CN */ + "\x03", /* grouping zh_CN */ + &global_errmsgs[zh_CN] +); +/***** LOCALE END zh_CN *****/ + +/***** LOCALE BEGIN zh_TW: Chinese - Taiwan *****/ +static const char *my_locale_month_names_zh_TW[13] = + {"一月","二月","三月","四月","五月","六月","七月","八月","九月","十月","十一月","十二月", NullS }; +static const char *my_locale_ab_month_names_zh_TW[13] = + {" 1月"," 2月"," 3月"," 4月"," 5月"," 6月"," 7月"," 8月"," 9月","10月","11月","12月", NullS }; +static const char *my_locale_day_names_zh_TW[8] = + {"週一","週二","週三","週四","週五","週六","週日", NullS }; +static const char *my_locale_ab_day_names_zh_TW[8] = + {"一","二","三","四","五","六","日", NullS }; +static TYPELIB my_locale_typelib_month_names_zh_TW = + { array_elements(my_locale_month_names_zh_TW)-1, "", my_locale_month_names_zh_TW, NULL }; +static TYPELIB my_locale_typelib_ab_month_names_zh_TW = + { array_elements(my_locale_ab_month_names_zh_TW)-1, "", my_locale_ab_month_names_zh_TW, NULL }; +static TYPELIB my_locale_typelib_day_names_zh_TW = + { array_elements(my_locale_day_names_zh_TW)-1, "", my_locale_day_names_zh_TW, NULL }; +static TYPELIB my_locale_typelib_ab_day_names_zh_TW = + { array_elements(my_locale_ab_day_names_zh_TW)-1, "", my_locale_ab_day_names_zh_TW, NULL }; +MY_LOCALE my_locale_zh_TW +( + 57, + "zh_TW", + "Chinese - Taiwan", + FALSE, + &my_locale_typelib_month_names_zh_TW, + &my_locale_typelib_ab_month_names_zh_TW, + &my_locale_typelib_day_names_zh_TW, + &my_locale_typelib_ab_day_names_zh_TW, + 3, + 2, + '.', /* decimal point zh_TW */ + ',', /* thousands_sep zh_TW */ + "\x03", /* grouping zh_TW */ + &global_errmsgs[en_US] +); +/***** LOCALE END zh_TW *****/ + +/***** LOCALE BEGIN ar_DZ: Arabic - Algeria *****/ +MY_LOCALE my_locale_ar_DZ +( + 58, + "ar_DZ", + "Arabic - Algeria", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_DZ */ + ',', /* thousands_sep ar_DZ */ + "\x03", /* grouping ar_DZ */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_DZ *****/ + +/***** LOCALE BEGIN ar_EG: Arabic - Egypt *****/ +MY_LOCALE my_locale_ar_EG +( + 59, + "ar_EG", + "Arabic - Egypt", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_EG */ + ',', /* thousands_sep ar_EG */ + "\x03", /* grouping ar_EG */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_EG *****/ + +/***** LOCALE BEGIN ar_IN: Arabic - Iran *****/ +MY_LOCALE my_locale_ar_IN +( + 60, + "ar_IN", + "Arabic - Iran", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_IN */ + ',', /* thousands_sep ar_IN */ + "\x03", /* grouping ar_IN */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_IN *****/ + +/***** LOCALE BEGIN ar_IQ: Arabic - Iraq *****/ +MY_LOCALE my_locale_ar_IQ +( + 61, + "ar_IQ", + "Arabic - Iraq", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_IQ */ + ',', /* thousands_sep ar_IQ */ + "\x03", /* grouping ar_IQ */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_IQ *****/ + +/***** LOCALE BEGIN ar_KW: Arabic - Kuwait *****/ +MY_LOCALE my_locale_ar_KW +( + 62, + "ar_KW", + "Arabic - Kuwait", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_KW */ + ',', /* thousands_sep ar_KW */ + "\x03", /* grouping ar_KW */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_KW *****/ + +/***** LOCALE BEGIN ar_LB: Arabic - Lebanon *****/ +MY_LOCALE my_locale_ar_LB +( + 63, + "ar_LB", + "Arabic - Lebanon", + FALSE, + &my_locale_typelib_month_names_ar_JO, + &my_locale_typelib_ab_month_names_ar_JO, + &my_locale_typelib_day_names_ar_JO, + &my_locale_typelib_ab_day_names_ar_JO, + 12, + 8, + '.', /* decimal point ar_LB */ + ',', /* thousands_sep ar_LB */ + "\x03", /* grouping ar_LB */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_LB *****/ + +/***** LOCALE BEGIN ar_LY: Arabic - Libya *****/ +MY_LOCALE my_locale_ar_LY +( + 64, + "ar_LY", + "Arabic - Libya", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_LY */ + ',', /* thousands_sep ar_LY */ + "\x03", /* grouping ar_LY */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_LY *****/ + +/***** LOCALE BEGIN ar_MA: Arabic - Morocco *****/ +MY_LOCALE my_locale_ar_MA +( + 65, + "ar_MA", + "Arabic - Morocco", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_MA */ + ',', /* thousands_sep ar_MA */ + "\x03", /* grouping ar_MA */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_MA *****/ + +/***** LOCALE BEGIN ar_OM: Arabic - Oman *****/ +MY_LOCALE my_locale_ar_OM +( + 66, + "ar_OM", + "Arabic - Oman", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_OM */ + ',', /* thousands_sep ar_OM */ + "\x03", /* grouping ar_OM */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_OM *****/ + +/***** LOCALE BEGIN ar_QA: Arabic - Qatar *****/ +MY_LOCALE my_locale_ar_QA +( + 67, + "ar_QA", + "Arabic - Qatar", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_QA */ + ',', /* thousands_sep ar_QA */ + "\x03", /* grouping ar_QA */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_QA *****/ + +/***** LOCALE BEGIN ar_SD: Arabic - Sudan *****/ +MY_LOCALE my_locale_ar_SD +( + 68, + "ar_SD", + "Arabic - Sudan", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_SD */ + ',', /* thousands_sep ar_SD */ + "\x03", /* grouping ar_SD */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_SD *****/ + +/***** LOCALE BEGIN ar_TN: Arabic - Tunisia *****/ +MY_LOCALE my_locale_ar_TN +( + 69, + "ar_TN", + "Arabic - Tunisia", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_TN */ + ',', /* thousands_sep ar_TN */ + "\x03", /* grouping ar_TN */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_TN *****/ + +/***** LOCALE BEGIN ar_YE: Arabic - Yemen *****/ +MY_LOCALE my_locale_ar_YE +( + 70, + "ar_YE", + "Arabic - Yemen", + FALSE, + &my_locale_typelib_month_names_ar_BH, + &my_locale_typelib_ab_month_names_ar_BH, + &my_locale_typelib_day_names_ar_BH, + &my_locale_typelib_ab_day_names_ar_BH, + 6, + 8, + '.', /* decimal point ar_YE */ + ',', /* thousands_sep ar_YE */ + "\x03", /* grouping ar_YE */ + &global_errmsgs[en_US] +); +/***** LOCALE END ar_YE *****/ + +/***** LOCALE BEGIN de_BE: German - Belgium *****/ +MY_LOCALE my_locale_de_BE +( + 71, + "de_BE", + "German - Belgium", + FALSE, + &my_locale_typelib_month_names_de_DE, + &my_locale_typelib_ab_month_names_de_DE, + &my_locale_typelib_day_names_de_DE, + &my_locale_typelib_ab_day_names_de_DE, + 9, + 10, + ',', /* decimal point de_BE */ + '.', /* thousands_sep de_BE */ + "\x03\x03", /* grouping de_BE */ + &global_errmsgs[de_DE] +); +/***** LOCALE END de_BE *****/ + +/***** LOCALE BEGIN de_CH: German - Switzerland *****/ +MY_LOCALE my_locale_de_CH +( + 72, + "de_CH", + "German - Switzerland", + FALSE, + &my_locale_typelib_month_names_de_DE, + &my_locale_typelib_ab_month_names_de_DE, + &my_locale_typelib_day_names_de_DE, + &my_locale_typelib_ab_day_names_de_DE, + 9, + 10, + '.', /* decimal point de_CH */ + '\'', /* thousands_sep de_CH */ + "\x03\x03", /* grouping de_CH */ + &global_errmsgs[de_DE] +); +/***** LOCALE END de_CH *****/ + +/***** LOCALE BEGIN de_LU: German - Luxembourg *****/ +MY_LOCALE my_locale_de_LU +( + 73, + "de_LU", + "German - Luxembourg", + FALSE, + &my_locale_typelib_month_names_de_DE, + &my_locale_typelib_ab_month_names_de_DE, + &my_locale_typelib_day_names_de_DE, + &my_locale_typelib_ab_day_names_de_DE, + 9, + 10, + ',', /* decimal point de_LU */ + '.', /* thousands_sep de_LU */ + "\x03\x03", /* grouping de_LU */ + &global_errmsgs[de_DE] +); +/***** LOCALE END de_LU *****/ + +/***** LOCALE BEGIN en_AU: English - Australia *****/ +MY_LOCALE my_locale_en_AU +( + 74, + "en_AU", + "English - Australia", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_AU */ + ',', /* thousands_sep en_AU */ + "\x03\x03", /* grouping en_AU */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_AU *****/ + +/***** LOCALE BEGIN en_CA: English - Canada *****/ +MY_LOCALE my_locale_en_CA +( + 75, + "en_CA", + "English - Canada", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_CA */ + ',', /* thousands_sep en_CA */ + "\x03\x03", /* grouping en_CA */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_CA *****/ + +/***** LOCALE BEGIN en_GB: English - United Kingdom *****/ +MY_LOCALE my_locale_en_GB +( + 1, + "en_GB", + "English - United Kingdom", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_GB */ + ',', /* thousands_sep en_GB */ + "\x03\x03", /* grouping en_GB */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_GB *****/ + +/***** LOCALE BEGIN en_IN: English - India *****/ +MY_LOCALE my_locale_en_IN +( + 76, + "en_IN", + "English - India", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_IN */ + ',', /* thousands_sep en_IN */ + "\x03\x02", /* grouping en_IN */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_IN *****/ + +/***** LOCALE BEGIN en_NZ: English - New Zealand *****/ +MY_LOCALE my_locale_en_NZ +( + 77, + "en_NZ", + "English - New Zealand", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_NZ */ + ',', /* thousands_sep en_NZ */ + "\x03\x03", /* grouping en_NZ */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_NZ *****/ + +/***** LOCALE BEGIN en_PH: English - Philippines *****/ +MY_LOCALE my_locale_en_PH +( + 78, + "en_PH", + "English - Philippines", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_PH */ + ',', /* thousands_sep en_PH */ + "\x03", /* grouping en_PH */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_PH *****/ + +/***** LOCALE BEGIN en_ZA: English - South Africa *****/ +MY_LOCALE my_locale_en_ZA +( + 79, + "en_ZA", + "English - South Africa", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_ZA */ + ',', /* thousands_sep en_ZA */ + "\x03\x03", /* grouping en_ZA */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_ZA *****/ + +/***** LOCALE BEGIN en_ZW: English - Zimbabwe *****/ +MY_LOCALE my_locale_en_ZW +( + 80, + "en_ZW", + "English - Zimbabwe", + TRUE, + &my_locale_typelib_month_names_en_US, + &my_locale_typelib_ab_month_names_en_US, + &my_locale_typelib_day_names_en_US, + &my_locale_typelib_ab_day_names_en_US, + 9, + 9, + '.', /* decimal point en_ZW */ + ',', /* thousands_sep en_ZW */ + "\x03\x03", /* grouping en_ZW */ + &global_errmsgs[en_US] +); +/***** LOCALE END en_ZW *****/ + +/***** LOCALE BEGIN es_AR: Spanish - Argentina *****/ +MY_LOCALE my_locale_es_AR +( + 81, + "es_AR", + "Spanish - Argentina", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_AR */ + '.', /* thousands_sep es_AR */ + "\x03\x03", /* grouping es_AR */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_AR *****/ + +/***** LOCALE BEGIN es_BO: Spanish - Bolivia *****/ +MY_LOCALE my_locale_es_BO +( + 82, + "es_BO", + "Spanish - Bolivia", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_BO */ + '.', /* thousands_sep es_BO */ + "\x03\x03", /* grouping es_BO */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_BO *****/ + +/***** LOCALE BEGIN es_CL: Spanish - Chile *****/ +MY_LOCALE my_locale_es_CL +( + 83, + "es_CL", + "Spanish - Chile", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_CL */ + '.', /* thousands_sep es_CL */ + "\x03\x03", /* grouping es_CL */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_CL *****/ + +/***** LOCALE BEGIN es_CO: Spanish - Columbia *****/ +MY_LOCALE my_locale_es_CO +( + 84, + "es_CO", + "Spanish - Columbia", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_CO */ + '.', /* thousands_sep es_CO */ + "\x03\x03", /* grouping es_CO */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_CO *****/ + +/***** LOCALE BEGIN es_CR: Spanish - Costa Rica *****/ +MY_LOCALE my_locale_es_CR +( + 85, + "es_CR", + "Spanish - Costa Rica", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_CR */ + ' ', /* thousands_sep es_CR */ + "\x03\x03", /* grouping es_CR */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_CR *****/ + +/***** LOCALE BEGIN es_DO: Spanish - Dominican Republic *****/ +MY_LOCALE my_locale_es_DO +( + 86, + "es_DO", + "Spanish - Dominican Republic", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_DO */ + ',', /* thousands_sep es_DO */ + "\x03\x03", /* grouping es_DO */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_DO *****/ + +/***** LOCALE BEGIN es_EC: Spanish - Ecuador *****/ +MY_LOCALE my_locale_es_EC +( + 87, + "es_EC", + "Spanish - Ecuador", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_EC */ + '.', /* thousands_sep es_EC */ + "\x03\x03", /* grouping es_EC */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_EC *****/ + +/***** LOCALE BEGIN es_GT: Spanish - Guatemala *****/ +MY_LOCALE my_locale_es_GT +( + 88, + "es_GT", + "Spanish - Guatemala", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_GT */ + ',', /* thousands_sep es_GT */ + "\x03\x03", /* grouping es_GT */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_GT *****/ + +/***** LOCALE BEGIN es_HN: Spanish - Honduras *****/ +MY_LOCALE my_locale_es_HN +( + 89, + "es_HN", + "Spanish - Honduras", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_HN */ + ',', /* thousands_sep es_HN */ + "\x03\x03", /* grouping es_HN */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_HN *****/ + +/***** LOCALE BEGIN es_MX: Spanish - Mexico *****/ +MY_LOCALE my_locale_es_MX +( + 90, + "es_MX", + "Spanish - Mexico", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_MX */ + ',', /* thousands_sep es_MX */ + "\x03\x03", /* grouping es_MX */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_MX *****/ + +/***** LOCALE BEGIN es_NI: Spanish - Nicaragua *****/ +MY_LOCALE my_locale_es_NI +( + 91, + "es_NI", + "Spanish - Nicaragua", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_NI */ + ',', /* thousands_sep es_NI */ + "\x03\x03", /* grouping es_NI */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_NI *****/ + +/***** LOCALE BEGIN es_PA: Spanish - Panama *****/ +MY_LOCALE my_locale_es_PA +( + 92, + "es_PA", + "Spanish - Panama", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_PA */ + ',', /* thousands_sep es_PA */ + "\x03\x03", /* grouping es_PA */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_PA *****/ + +/***** LOCALE BEGIN es_PE: Spanish - Peru *****/ +MY_LOCALE my_locale_es_PE +( + 93, + "es_PE", + "Spanish - Peru", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_PE */ + ',', /* thousands_sep es_PE */ + "\x03\x03", /* grouping es_PE */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_PE *****/ + +/***** LOCALE BEGIN es_PR: Spanish - Puerto Rico *****/ +MY_LOCALE my_locale_es_PR +( + 94, + "es_PR", + "Spanish - Puerto Rico", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_PR */ + ',', /* thousands_sep es_PR */ + "\x03\x03", /* grouping es_PR */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_PR *****/ + +/***** LOCALE BEGIN es_PY: Spanish - Paraguay *****/ +MY_LOCALE my_locale_es_PY +( + 95, + "es_PY", + "Spanish - Paraguay", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_PY */ + '.', /* thousands_sep es_PY */ + "\x03\x03", /* grouping es_PY */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_PY *****/ + +/***** LOCALE BEGIN es_SV: Spanish - El Salvador *****/ +MY_LOCALE my_locale_es_SV +( + 96, + "es_SV", + "Spanish - El Salvador", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_SV */ + ',', /* thousands_sep es_SV */ + "\x03\x03", /* grouping es_SV */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_SV *****/ + +/***** LOCALE BEGIN es_US: Spanish - United States *****/ +MY_LOCALE my_locale_es_US +( + 97, + "es_US", + "Spanish - United States", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + '.', /* decimal point es_US */ + ',', /* thousands_sep es_US */ + "\x03\x03", /* grouping es_US */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_US *****/ + +/***** LOCALE BEGIN es_UY: Spanish - Uruguay *****/ +MY_LOCALE my_locale_es_UY +( + 98, + "es_UY", + "Spanish - Uruguay", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_UY */ + '.', /* thousands_sep es_UY */ + "\x03\x03", /* grouping es_UY */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_UY *****/ + +/***** LOCALE BEGIN es_VE: Spanish - Venezuela *****/ +MY_LOCALE my_locale_es_VE +( + 99, + "es_VE", + "Spanish - Venezuela", + FALSE, + &my_locale_typelib_month_names_es_ES, + &my_locale_typelib_ab_month_names_es_ES, + &my_locale_typelib_day_names_es_ES, + &my_locale_typelib_ab_day_names_es_ES, + 10, + 9, + ',', /* decimal point es_VE */ + '.', /* thousands_sep es_VE */ + "\x03\x03", /* grouping es_VE */ + &global_errmsgs[es_ES] +); +/***** LOCALE END es_VE *****/ + +/***** LOCALE BEGIN fr_BE: French - Belgium *****/ +MY_LOCALE my_locale_fr_BE +( + 100, + "fr_BE", + "French - Belgium", + FALSE, + &my_locale_typelib_month_names_fr_FR, + &my_locale_typelib_ab_month_names_fr_FR, + &my_locale_typelib_day_names_fr_FR, + &my_locale_typelib_ab_day_names_fr_FR, + 9, + 8, + ',', /* decimal point fr_BE */ + '.', /* thousands_sep fr_BE */ + "\x80\x80", /* grouping fr_BE */ + &global_errmsgs[fr_FR] +); +/***** LOCALE END fr_BE *****/ + +/***** LOCALE BEGIN fr_CA: French - Canada *****/ +MY_LOCALE my_locale_fr_CA +( + 101, + "fr_CA", + "French - Canada", + FALSE, + &my_locale_typelib_month_names_fr_FR, + &my_locale_typelib_ab_month_names_fr_FR, + &my_locale_typelib_day_names_fr_FR, + &my_locale_typelib_ab_day_names_fr_FR, + 9, + 8, + ',', /* decimal point fr_CA */ + ' ', /* thousands_sep fr_CA */ + "\x80\x80", /* grouping fr_CA */ + &global_errmsgs[fr_FR] +); +/***** LOCALE END fr_CA *****/ + +/***** LOCALE BEGIN fr_CH: French - Switzerland *****/ +MY_LOCALE my_locale_fr_CH +( + 102, + "fr_CH", + "French - Switzerland", + FALSE, + &my_locale_typelib_month_names_fr_FR, + &my_locale_typelib_ab_month_names_fr_FR, + &my_locale_typelib_day_names_fr_FR, + &my_locale_typelib_ab_day_names_fr_FR, + 9, + 8, + ',', /* decimal point fr_CH */ + '\0', /* thousands_sep fr_CH */ + "\x80\x80", /* grouping fr_CH */ + &global_errmsgs[fr_FR] +); +/***** LOCALE END fr_CH *****/ + +/***** LOCALE BEGIN fr_LU: French - Luxembourg *****/ +MY_LOCALE my_locale_fr_LU +( + 103, + "fr_LU", + "French - Luxembourg", + FALSE, + &my_locale_typelib_month_names_fr_FR, + &my_locale_typelib_ab_month_names_fr_FR, + &my_locale_typelib_day_names_fr_FR, + &my_locale_typelib_ab_day_names_fr_FR, + 9, + 8, + ',', /* decimal point fr_LU */ + '\0', /* thousands_sep fr_LU */ + "\x80\x80", /* grouping fr_LU */ + &global_errmsgs[fr_FR] +); +/***** LOCALE END fr_LU *****/ + +/***** LOCALE BEGIN it_IT: Italian - Italy *****/ +MY_LOCALE my_locale_it_IT +( + 104, + "it_IT", + "Italian - Italy", + FALSE, + &my_locale_typelib_month_names_it_CH, + &my_locale_typelib_ab_month_names_it_CH, + &my_locale_typelib_day_names_it_CH, + &my_locale_typelib_ab_day_names_it_CH, + 9, + 9, + ',', /* decimal point it_IT */ + '\0', /* thousands_sep it_IT */ + "\x80\x80", /* grouping it_IT */ + &global_errmsgs[it_IT] +); +/***** LOCALE END it_IT *****/ + +/***** LOCALE BEGIN nl_BE: Dutch - Belgium *****/ +MY_LOCALE my_locale_nl_BE +( + 105, + "nl_BE", + "Dutch - Belgium", + TRUE, + &my_locale_typelib_month_names_nl_NL, + &my_locale_typelib_ab_month_names_nl_NL, + &my_locale_typelib_day_names_nl_NL, + &my_locale_typelib_ab_day_names_nl_NL, + 9, + 9, + ',', /* decimal point nl_BE */ + '.', /* thousands_sep nl_BE */ + "\x80\x80", /* grouping nl_BE */ + &global_errmsgs[nl_NL] +); +/***** LOCALE END nl_BE *****/ + +/***** LOCALE BEGIN no_NO: Norwegian - Norway *****/ +MY_LOCALE my_locale_no_NO +( + 106, + "no_NO", + "Norwegian - Norway", + FALSE, + &my_locale_typelib_month_names_nb_NO, + &my_locale_typelib_ab_month_names_nb_NO, + &my_locale_typelib_day_names_nb_NO, + &my_locale_typelib_ab_day_names_nb_NO, + 9, + 7, + ',', /* decimal point no_NO */ + '.', /* thousands_sep no_NO */ + "\x03\x03", /* grouping no_NO */ + &global_errmsgs[no_NO] +); +/***** LOCALE END no_NO *****/ + +/***** LOCALE BEGIN sv_FI: Swedish - Finland *****/ +MY_LOCALE my_locale_sv_FI +( + 107, + "sv_FI", + "Swedish - Finland", + FALSE, + &my_locale_typelib_month_names_sv_SE, + &my_locale_typelib_ab_month_names_sv_SE, + &my_locale_typelib_day_names_sv_SE, + &my_locale_typelib_ab_day_names_sv_SE, + 9, + 7, + ',', /* decimal point sv_FI */ + ' ', /* thousands_sep sv_FI */ + "\x03\x03", /* grouping sv_FI */ + &global_errmsgs[sv_SE] +); +/***** LOCALE END sv_FI *****/ + +/***** LOCALE BEGIN zh_HK: Chinese - Hong Kong SAR *****/ +MY_LOCALE my_locale_zh_HK +( + 108, + "zh_HK", + "Chinese - Hong Kong SAR", + FALSE, + &my_locale_typelib_month_names_zh_CN, + &my_locale_typelib_ab_month_names_zh_CN, + &my_locale_typelib_day_names_zh_CN, + &my_locale_typelib_ab_day_names_zh_CN, + 3, + 3, + '.', /* decimal point zh_HK */ + ',', /* thousands_sep zh_HK */ + "\x03", /* grouping zh_HK */ + &global_errmsgs[en_US] +); +/***** LOCALE END zh_HK *****/ + + +/***** LOCALE BEGIN el_GR: Greek - Greece *****/ +static const char *my_locale_month_names_el_GR[13]= +{ + "Ιανουάριος", "Φεβρουάριος", "Μάρτιος", + "Απρίλιος", "Μάιος", "Ιούνιος", + "Ιούλιος", "Αύγουστος", "Σεπτέμβριος", + "Οκτώβριος", "Νοέμβριος", "Δεκέμβριος", NullS +}; + +static const char *my_locale_ab_month_names_el_GR[13]= +{ + "Ιαν", "Φεβ", "Μάρ", + "Απρ", "Μάι", "Ιούν", + "Ιούλ","Αύγ", "Σεπ", + "Οκτ", "Νοέ", "Δεκ", NullS +}; + +static const char *my_locale_day_names_el_GR[8] = +{ + "Δευτέρα", "Τρίτη", "Τετάρτη", "Πέμπτη", + "Παρασκευή", "Σάββατο", "Κυριακή", NullS +}; + +static const char *my_locale_ab_day_names_el_GR[8]= +{ + "Δευ", "Τρί", "Τετ", "Πέμ", + "Παρ", "Σάβ", "Κυρ", NullS +}; + +static TYPELIB my_locale_typelib_month_names_el_GR= +{ + array_elements(my_locale_month_names_el_GR) - 1, + "", my_locale_month_names_el_GR, NULL +}; + +static TYPELIB my_locale_typelib_ab_month_names_el_GR= +{ + array_elements(my_locale_ab_month_names_el_GR)-1, + "", my_locale_ab_month_names_el_GR, NULL +}; + +static TYPELIB my_locale_typelib_day_names_el_GR= +{ + array_elements(my_locale_day_names_el_GR)-1, + "", my_locale_day_names_el_GR, NULL +}; + +static TYPELIB my_locale_typelib_ab_day_names_el_GR= +{ + array_elements(my_locale_ab_day_names_el_GR) - 1, + "", my_locale_ab_day_names_el_GR, NULL +}; + +MY_LOCALE my_locale_el_GR +( + 109, + "el_GR", + "Greek - Greece", + FALSE, + &my_locale_typelib_month_names_el_GR, + &my_locale_typelib_ab_month_names_el_GR, + &my_locale_typelib_day_names_el_GR, + &my_locale_typelib_ab_day_names_el_GR, + 11, /* max mon name length */ + 9, /* max day name length */ + ',', /* decimal point el_GR */ + '.', /* thousands_sep el_GR */ + "\x80", /* grouping el_GR */ + &global_errmsgs[el_GR] +); +/***** LOCALE END el_GR *****/ + + +/***** LOCALE BEGIN rm_CH: Romansh - Switzerland *****/ +static const char *my_locale_month_names_rm_CH[13]= +{ + "schaner", "favrer", "mars", "avrigl", "matg", "zercladur", + "fanadur", "avust", "settember", "october", "november", "december", NullS +}; + +static const char *my_locale_ab_month_names_rm_CH[13]= +{ + "schan", "favr", "mars", "avr", "matg", "zercl", + "fan", "avust", "sett", "oct", "nov", "dec", NullS +}; + +static const char *my_locale_day_names_rm_CH[8]= +{ + "glindesdi", "mardi", "mesemna", "gievgia", + "venderdi", "sonda", "dumengia", NullS +}; + +static const char *my_locale_ab_day_names_rm_CH[8]= +{ + "gli", "ma", "me", "gie", "ve", "so", "du", NullS +}; + +static TYPELIB my_locale_typelib_month_names_rm_CH= +{ + array_elements(my_locale_month_names_rm_CH) - 1, + "", my_locale_month_names_rm_CH, NULL +}; + +static TYPELIB my_locale_typelib_ab_month_names_rm_CH= +{ + array_elements(my_locale_ab_month_names_rm_CH) - 1, + "", my_locale_ab_month_names_rm_CH, NULL +}; + +static TYPELIB my_locale_typelib_day_names_rm_CH= +{ + array_elements(my_locale_day_names_rm_CH) - 1, + "", my_locale_day_names_rm_CH, NULL +}; + +static TYPELIB my_locale_typelib_ab_day_names_rm_CH= +{ + array_elements(my_locale_ab_day_names_rm_CH) - 1, + "", my_locale_ab_day_names_rm_CH, NULL +}; + +MY_LOCALE my_locale_rm_CH +( + 110, + "rm_CH", + "Romansh - Switzerland", + FALSE, + &my_locale_typelib_month_names_rm_CH, + &my_locale_typelib_ab_month_names_rm_CH, + &my_locale_typelib_day_names_rm_CH, + &my_locale_typelib_ab_day_names_rm_CH, + 9, /* max mon name length */ + 9, /* max day name length */ + ',', /* decimal point rm_CH */ + '\'', /* thousands_sep rm_CH */ + "\x03\x03", /* grouping rm_CH */ + &global_errmsgs[en_US] +); +/***** LOCALE END rm_CH *****/ + + +/***** LOCALE BEGIN ka_GE: Georgian - Georgia *****/ +static const char *my_locale_month_names_ka_GE[13] = +{ + "იანვარი", // January + "თებერვალი", // February + "მარტი", // March + "აპრილი", // April + "მაისი", // May + "ივნისი", // June + "ივლისი", // July + "აგვისტო", // August + "სექტემბერი", // September + "ოქტომბერი", // October + "ნოემბერი", // November + "დეკემბერი", // December + NullS +}; + +static const char *my_locale_ab_month_names_ka_GE[13] = + {"იან","თებ","მარ","აპრ","მაი","ივნ","ივლ","აგვ","სექტ","ოქტ","ნოე","დეკ", NullS }; + +static const char *my_locale_day_names_ka_GE[8] = + {"ორშაბათი","სამშაბათი","ოთხშაბათი","ხუთშაბათი","პარასკევი","შაბათი","კვირა", NullS }; + +static const char *my_locale_ab_day_names_ka_GE[8] = + {"ორშ","სამშ","ოთხშ","ხუთშ","პარ","შაბ","კვ", NullS }; + +static TYPELIB my_locale_typelib_month_names_ka_GE = + { array_elements(my_locale_month_names_ka_GE)-1, "", my_locale_month_names_ka_GE, NULL }; + +static TYPELIB my_locale_typelib_ab_month_names_ka_GE = + { array_elements(my_locale_ab_month_names_ka_GE)-1, "", my_locale_ab_month_names_ka_GE, NULL }; + +static TYPELIB my_locale_typelib_day_names_ka_GE = + { array_elements(my_locale_day_names_ka_GE)-1, "", my_locale_day_names_ka_GE, NULL }; + +static TYPELIB my_locale_typelib_ab_day_names_ka_GE = + { array_elements(my_locale_ab_day_names_ka_GE)-1, "", my_locale_ab_day_names_ka_GE, NULL }; + +MY_LOCALE my_locale_ka_GE +( + 111, + "ka_GE", + "Georgian - Georgia", + FALSE, + &my_locale_typelib_month_names_ka_GE, + &my_locale_typelib_ab_month_names_ka_GE, + &my_locale_typelib_day_names_ka_GE, + &my_locale_typelib_ab_day_names_ka_GE, + 10, + 9, + ',', /* decimal point ka_GE */ + ' ', /* thousands_sep ka_GE */ + "\x03", /* grouping ka_GE */ + &global_errmsgs[ka_GE] +); +/***** LOCALE END ka_GE *****/ + + +/* + The list of all locales. + Note, locales must be ordered according to their + numbers to make my_locale_by_number() work fast. + Some debug asserts below check this. +*/ +MY_LOCALE *my_locales[]= + { + &my_locale_en_US, + &my_locale_en_GB, + &my_locale_ja_JP, + &my_locale_sv_SE, + &my_locale_de_DE, + &my_locale_fr_FR, + &my_locale_ar_AE, + &my_locale_ar_BH, + &my_locale_ar_JO, + &my_locale_ar_SA, + &my_locale_ar_SY, + &my_locale_be_BY, + &my_locale_bg_BG, + &my_locale_ca_ES, + &my_locale_cs_CZ, + &my_locale_da_DK, + &my_locale_de_AT, + &my_locale_es_ES, + &my_locale_et_EE, + &my_locale_eu_ES, + &my_locale_fi_FI, + &my_locale_fo_FO, + &my_locale_gl_ES, + &my_locale_gu_IN, + &my_locale_he_IL, + &my_locale_hi_IN, + &my_locale_hr_HR, + &my_locale_hu_HU, + &my_locale_id_ID, + &my_locale_is_IS, + &my_locale_it_CH, + &my_locale_ko_KR, + &my_locale_lt_LT, + &my_locale_lv_LV, + &my_locale_mk_MK, + &my_locale_mn_MN, + &my_locale_ms_MY, + &my_locale_nb_NO, + &my_locale_nl_NL, + &my_locale_pl_PL, + &my_locale_pt_BR, + &my_locale_pt_PT, + &my_locale_ro_RO, + &my_locale_ru_RU, + &my_locale_ru_UA, + &my_locale_sk_SK, + &my_locale_sl_SI, + &my_locale_sq_AL, + &my_locale_sr_RS, + &my_locale_ta_IN, + &my_locale_te_IN, + &my_locale_th_TH, + &my_locale_tr_TR, + &my_locale_uk_UA, + &my_locale_ur_PK, + &my_locale_vi_VN, + &my_locale_zh_CN, + &my_locale_zh_TW, + &my_locale_ar_DZ, + &my_locale_ar_EG, + &my_locale_ar_IN, + &my_locale_ar_IQ, + &my_locale_ar_KW, + &my_locale_ar_LB, + &my_locale_ar_LY, + &my_locale_ar_MA, + &my_locale_ar_OM, + &my_locale_ar_QA, + &my_locale_ar_SD, + &my_locale_ar_TN, + &my_locale_ar_YE, + &my_locale_de_BE, + &my_locale_de_CH, + &my_locale_de_LU, + &my_locale_en_AU, + &my_locale_en_CA, + &my_locale_en_IN, + &my_locale_en_NZ, + &my_locale_en_PH, + &my_locale_en_ZA, + &my_locale_en_ZW, + &my_locale_es_AR, + &my_locale_es_BO, + &my_locale_es_CL, + &my_locale_es_CO, + &my_locale_es_CR, + &my_locale_es_DO, + &my_locale_es_EC, + &my_locale_es_GT, + &my_locale_es_HN, + &my_locale_es_MX, + &my_locale_es_NI, + &my_locale_es_PA, + &my_locale_es_PE, + &my_locale_es_PR, + &my_locale_es_PY, + &my_locale_es_SV, + &my_locale_es_US, + &my_locale_es_UY, + &my_locale_es_VE, + &my_locale_fr_BE, + &my_locale_fr_CA, + &my_locale_fr_CH, + &my_locale_fr_LU, + &my_locale_it_IT, + &my_locale_nl_BE, + &my_locale_no_NO, + &my_locale_sv_FI, + &my_locale_zh_HK, + &my_locale_el_GR, + &my_locale_rm_CH, + &my_locale_ka_GE, + NULL + }; + + +MY_LOCALE *my_locales_deprecated[]= +{ + &my_locale_sr_YU, + NULL +}; + + +MY_LOCALE *my_locale_by_number(uint number) +{ + MY_LOCALE *locale; + if (number >= array_elements(my_locales) - 1) + return NULL; + locale= my_locales[number]; + // Check that locale is on its correct position in the array + DBUG_ASSERT(locale == my_locales[locale->number]); + return locale; +} + + +static MY_LOCALE* +my_locale_by_name(MY_LOCALE** locales, const char *name) +{ + MY_LOCALE **locale; + for (locale= locales; *locale != NULL; locale++) + { + if (!my_strcasecmp(&my_charset_latin1, (*locale)->name, name)) + return *locale; + } + return NULL; +} + + +MY_LOCALE *my_locale_by_name(const char *name) +{ + MY_LOCALE *locale; + + if ((locale= my_locale_by_name(my_locales, name))) + { + // Check that locale is on its correct position in the array + DBUG_ASSERT(locale == my_locales[locale->number]); + return locale; + } + else if ((locale= my_locale_by_name(my_locales_deprecated, name))) + { + THD *thd= current_thd; + /* + Replace the deprecated locale to the corresponding + 'fresh' locale with the same ID. + */ + locale= my_locales[locale->number]; + if (thd) + { + // Send a warning to the client + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX), + name, locale->name); + } + else + { + // Send a warning to mysqld error log + sql_print_warning("The syntax '%s' is deprecated and will be removed. " + "Please use %s instead.", + name, locale->name); + } + } + return locale; +} + + +void cleanup_errmsgs() +{ + for (MY_LOCALE_ERRMSGS *msgs= global_errmsgs; msgs->language; msgs++) + { + my_free(msgs->errmsgs); + } +} diff --git a/sql/sql_locale.h b/sql/sql_locale.h new file mode 100644 index 00000000..b7ce9f7b --- /dev/null +++ b/sql/sql_locale.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_LOCALE_INCLUDED +#define SQL_LOCALE_INCLUDED + +typedef struct my_locale_errmsgs +{ + const char *language; + const char ***errmsgs; +} MY_LOCALE_ERRMSGS; + + +typedef struct st_typelib TYPELIB; + +class MY_LOCALE +{ +public: + uint number; + const char *name; + const char *description; + const bool is_ascii; + TYPELIB *month_names; + TYPELIB *ab_month_names; + TYPELIB *day_names; + TYPELIB *ab_day_names; + uint max_month_name_length; + uint max_day_name_length; + uint decimal_point; + uint thousand_sep; + const char *grouping; + MY_LOCALE_ERRMSGS *errmsgs; + MY_LOCALE(uint number_par, + const char *name_par, const char *descr_par, bool is_ascii_par, + TYPELIB *month_names_par, TYPELIB *ab_month_names_par, + TYPELIB *day_names_par, TYPELIB *ab_day_names_par, + uint max_month_name_length_par, uint max_day_name_length_par, + uint decimal_point_par, uint thousand_sep_par, + const char *grouping_par, MY_LOCALE_ERRMSGS *errmsgs_par) : + number(number_par), + name(name_par), description(descr_par), is_ascii(is_ascii_par), + month_names(month_names_par), ab_month_names(ab_month_names_par), + day_names(day_names_par), ab_day_names(ab_day_names_par), + max_month_name_length(max_month_name_length_par), + max_day_name_length(max_day_name_length_par), + decimal_point(decimal_point_par), + thousand_sep(thousand_sep_par), + grouping(grouping_par), + errmsgs(errmsgs_par) + {} + my_repertoire_t repertoire() const + { return is_ascii ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_EXTENDED; } +}; +/* Exported variables */ + +extern MY_LOCALE my_locale_en_US; +extern MYSQL_PLUGIN_IMPORT MY_LOCALE *my_locales[]; +extern MY_LOCALE *my_default_lc_messages; +extern MY_LOCALE *my_default_lc_time_names; + +/* Exported functions */ + +MY_LOCALE *my_locale_by_name(const char *name); +MY_LOCALE *my_locale_by_number(uint number); +void cleanup_errmsgs(void); + +#endif /* SQL_LOCALE_INCLUDED */ diff --git a/sql/sql_manager.cc b/sql/sql_manager.cc new file mode 100644 index 00000000..3d3728b9 --- /dev/null +++ b/sql/sql_manager.cc @@ -0,0 +1,164 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + * sql_manager.cc + * This thread manages various maintenance tasks. + * + * o Flushing the tables every flush_time seconds. + * o Berkeley DB: removing unneeded log files. + */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_manager.h" +#include "sql_base.h" // flush_tables + +static bool volatile manager_thread_in_use = 0; +static bool abort_manager = false; + +pthread_t manager_thread; +mysql_mutex_t LOCK_manager; +mysql_cond_t COND_manager; + +struct handler_cb { + struct handler_cb *next; + void (*action)(void *); + void *data; +}; + +static struct handler_cb *cb_list; // protected by LOCK_manager + +bool mysql_manager_submit(void (*action)(void *), void *data) +{ + bool result= FALSE; + DBUG_ASSERT(manager_thread_in_use); + struct handler_cb **cb; + mysql_mutex_lock(&LOCK_manager); + cb= &cb_list; + while (*cb) + cb= &(*cb)->next; + *cb= (struct handler_cb *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(struct handler_cb), MYF(MY_WME)); + if (!*cb) + result= TRUE; + else + { + (*cb)->next= NULL; + (*cb)->action= action; + (*cb)->data= data; + } + mysql_cond_signal(&COND_manager); + mysql_mutex_unlock(&LOCK_manager); + return result; +} + +pthread_handler_t handle_manager(void *arg __attribute__((unused))) +{ + int error = 0; + struct timespec abstime; + bool reset_flush_time = TRUE; + my_thread_init(); + DBUG_ENTER("handle_manager"); + + pthread_detach_this_thread(); + manager_thread = pthread_self(); + mysql_mutex_lock(&LOCK_manager); + while (!abort_manager) + { + /* XXX: This will need to be made more general to handle different + * polling needs. */ + if (flush_time) + { + if (reset_flush_time) + { + set_timespec(abstime, flush_time); + reset_flush_time = FALSE; + } + while ((!error || error == EINTR) && !abort_manager && !cb_list) + error= mysql_cond_timedwait(&COND_manager, &LOCK_manager, &abstime); + + if (error == ETIMEDOUT || error == ETIME) + { + tc_purge(); + error = 0; + reset_flush_time = TRUE; + } + } + else + { + while ((!error || error == EINTR) && !abort_manager && !cb_list) + error= mysql_cond_wait(&COND_manager, &LOCK_manager); + } + + struct handler_cb *cb= cb_list; + cb_list= NULL; + mysql_mutex_unlock(&LOCK_manager); + + while (cb) + { + struct handler_cb *next= cb->next; + cb->action(cb->data); + my_free(cb); + cb= next; + } + mysql_mutex_lock(&LOCK_manager); + } + manager_thread_in_use = 0; + mysql_mutex_unlock(&LOCK_manager); + mysql_mutex_destroy(&LOCK_manager); + mysql_cond_destroy(&COND_manager); + DBUG_LEAVE; // Can't use DBUG_RETURN after my_thread_end + my_thread_end(); + return (NULL); +} + + +/* Start handle manager thread */ +void start_handle_manager() +{ + DBUG_ENTER("start_handle_manager"); + abort_manager = false; + { + pthread_t hThread; + int err; + DBUG_EXECUTE_IF("delay_start_handle_manager", my_sleep(1000);); + manager_thread_in_use = 1; + mysql_cond_init(key_COND_manager, &COND_manager,NULL); + mysql_mutex_init(key_LOCK_manager, &LOCK_manager, NULL); + if ((err= mysql_thread_create(key_thread_handle_manager, &hThread, + &connection_attrib, handle_manager, 0))) + sql_print_warning("Can't create handle_manager thread (errno: %M)", err); + } + DBUG_VOID_RETURN; +} + + +/* Initiate shutdown of handle manager thread */ +void stop_handle_manager() +{ + DBUG_ENTER("stop_handle_manager"); + if (manager_thread_in_use) + { + mysql_mutex_lock(&LOCK_manager); + abort_manager = true; + DBUG_PRINT("quit", ("initiate shutdown of handle manager thread: %lu", + (ulong)manager_thread)); + mysql_cond_signal(&COND_manager); + mysql_mutex_unlock(&LOCK_manager); + } + DBUG_VOID_RETURN; +} + diff --git a/sql/sql_manager.h b/sql/sql_manager.h new file mode 100644 index 00000000..f97d4a2c --- /dev/null +++ b/sql/sql_manager.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_MANAGER_INCLUDED +#define SQL_MANAGER_INCLUDED + +void start_handle_manager(); +void stop_handle_manager(); +bool mysql_manager_submit(void (*action)(void *), void *data); + +#endif /* SQL_MANAGER_INCLUDED */ diff --git a/sql/sql_mode.cc b/sql/sql_mode.cc new file mode 100644 index 00000000..6e62fa8f --- /dev/null +++ b/sql/sql_mode.cc @@ -0,0 +1,34 @@ +/* + Copyright (c) 2019, MariaDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "set_var.h" + +void Sql_mode_dependency::push_dependency_warnings(THD *thd) const +{ + sql_mode_t all= m_hard | m_soft; + for (uint i= 0; all ; i++, all >>= 1) + { + if (all & 1) + { + // TODO-10.5: add a new error code + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Expression depends on the @@%s value %s", + "sql_mode", sql_mode_string_representation(i)); + } + } +} diff --git a/sql/sql_mode.h b/sql/sql_mode.h new file mode 100644 index 00000000..fb2b7cef --- /dev/null +++ b/sql/sql_mode.h @@ -0,0 +1,162 @@ +#ifndef SQL_MODE_H_INCLUDED +#define SQL_MODE_H_INCLUDED +/* + Copyright (c) 2019, MariaDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_basic_types.h" + +/* + class Sql_mode_dependency + + A combination of hard and soft dependency on sql_mode. + Used to watch if a GENERATED ALWAYS AS expression guarantees consitent + data written to its virtual column. + + A virtual column can appear in an index if: + - the generation expression does not depend on any sql_mode flags, or + - the generation expression has a soft dependency on an sql_mode flag, + and the column knows how to handle this dependeny. + + A virtual column cannot appear in an index if: + - its generation expression has a hard dependency + - its generation expression has a soft dependency, but the column + cannot handle it on store. + An error is reported in such cases. + + How dependencies appear: + - When a column return value depends on some sql_mode flag, + its Item_field adds a corresponding bit to m_soft. For example, + Item_field for a CHAR(N) column adds the PAD_CHAR_TO_FULL_LENGTH flag. + - When an SQL function/operator return value depends on some sql_mode flag, + it adds a corresponding bit to m_soft. For example, Item_func_minus + adds the MODE_NO_UNSIGNED_SUBTRACTION in case of unsigned arguments. + + How dependency are processed (see examples below): + - All SQL functions/operators bit-OR all hard dependencies from all arguments. + - Some soft dependencies can be handled by the underlying Field on store, + e.g. CHAR(N) can handle PAD_CHAR_TO_FULL_LENGTH. + - Some soft dependencies can be handled by SQL functions and operators, + e.g. RTRIM(expr) removes expr's soft dependency on PAD_CHAR_TO_FULL_LENGTH. + If a function or operator handles a soft dependency on a certain sql_mode + flag, it removes the corresponding bit from m_soft (see below). + Note, m_hard is not touched in such cases. + - When an expression with a soft dependency on a certain sql_mode flag + goes as an argument to an SQL function/operator which cannot handle + this flag, the dependency escalates from soft to hard + (by moving the corresponding bit from m_soft to m_hard) and cannot be + handled any more on the upper level, neither by a Field on store, + nor by another SQL function/operator. + + There are four kinds of Items: + 1. Items that generate a soft or hard dependency, e.g. + - Item_field for CHAR(N) - generates soft/PAD_CHAR_TO_FULL_LENGTH + - Item_func_minus - generates soft/NO_UNSIGNED_SUBTRACTION + 2. Items that convert a soft dependency to a hard dependency. + This happens e.g. when an Item_func instance gets a soft dependency + from its arguments, and it does not know how to handle this dependency. + Most Item_func descendants do this. + 3. Items that remove soft dependencies, e.g.: + - Item_func_rtrim - removes soft/PAD_CHAR_TO_FULL_LENGTH + that came from args[0] (under certain conditions) + - Item_func_rpad - removes soft/PAD_CJAR_TO_FULL_LENGTH + that came from args[0] (under certain conditions) + 4. Items that repeat soft dependency from its arguments to the caller. + They are not implemented yet. But functions like Item_func_coalesce, + Item_func_case, Item_func_case_abbreviation2 could do this. + + Examples: + + 1. CREATE OR REPLACE TABLE t1 (a CHAR(5), v CHAR(20) AS(a), KEY(v)); + + Here `v` has a soft dependency on `a`. + The value of `a` depends on PAD_CHAR_TO_FULL_LENGTH, it can return: + - 'a' - if PAD_CHAR_TO_FULL_LENGTH is disabled + - 'a' followed by four spaces - if PAD_CHAR_TO_FULL_LENGTH is enabled + But `v` will pad trailing spaces to the full length on store anyway. + So Field_string handles this soft dependency on store. + This combination of the virtial column data type and its generation + expression is safe and provides consistent data in `v`, which is + 'a' followed by four spaces, no matter what PAD_CHAR_TO_FULL_LENGTH is. + + 2. CREATE OR REPLACE TABLE t1 (a CHAR(5), v VARCHAR(20) AS(a), KEY(v)); + + Here `v` has a soft dependency on `a`. But Field_varstring does + not pad spaces on store, so it cannot handle this dependency. + This combination of the virtual column data type and its generation + expression is not safe. An error is returned. + + 3. CREATE OR REPLACE TABLE t1 (a CHAR(5), v INT AS(LENGTH(a)), KEY(v)); + + Here `v` has a hard dependency on `a`, because the value of `a` + is wrapped to the function LENGTH(). + The value of `LENGTH(a)` depends on PAD_CHAR_TO_FULL_LENGTH, it can return: + - 1 - if PAD_CHAR_TO_FULL_LENGTH is disabled + - 4 - if PAD_CHAR_TO_FULL_LENGTH is enabled + This combination cannot provide consistent data stored to `v`, + therefore it's disallowed. +*/ +class Sql_mode_dependency +{ + sql_mode_t m_hard; + sql_mode_t m_soft; +public: + Sql_mode_dependency() + :m_hard(0), m_soft(0) + { } + Sql_mode_dependency(sql_mode_t hard, sql_mode_t soft) + :m_hard(hard), m_soft(soft) + { } + sql_mode_t hard() const { return m_hard; } + sql_mode_t soft() const { return m_soft; } + operator bool () const + { + return m_hard > 0 || m_soft > 0; + } + Sql_mode_dependency operator|(const Sql_mode_dependency &other) const + { + return Sql_mode_dependency(m_hard | other.m_hard, m_soft | other.m_soft); + } + Sql_mode_dependency operator&(const Sql_mode_dependency &other) const + { + return Sql_mode_dependency(m_hard & other.m_hard, m_soft & other.m_soft); + } + Sql_mode_dependency &operator|=(const Sql_mode_dependency &other) + { + m_hard|= other.m_hard; + m_soft|= other.m_soft; + return *this; + } + Sql_mode_dependency &operator&=(const Sql_mode_dependency &other) + { + m_hard&= other.m_hard; + m_soft&= other.m_soft; + return *this; + } + Sql_mode_dependency &soft_to_hard() + { + m_hard|= m_soft; + m_soft= 0; + return *this; + } + void push_dependency_warnings(THD *thd) const; +}; + + +#endif // SQL_MODE_H_INCLUDED diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc new file mode 100644 index 00000000..1817f811 --- /dev/null +++ b/sql/sql_parse.cc @@ -0,0 +1,10462 @@ +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2008, 2023, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#define MYSQL_LEX 1 +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_parse.h" // sql_kill, *_precheck, *_prepare +#include "lock.h" // try_transactional_lock, + // check_transactional_lock, + // set_handler_table_locks, + // lock_global_read_lock, + // make_global_read_lock_block_commit +#include "sql_base.h" // open_tables, open_and_lock_tables, + // lock_tables, unique_table, + // close_thread_tables, is_temporary_table + // table_cache.h +#include "sql_cache.h" // QUERY_CACHE_FLAGS_SIZE, query_cache_* +#include "sql_show.h" // mysqld_list_*, mysqld_show_*, + // calc_sum_of_all_status +#include "mysqld.h" +#include "sql_locale.h" // my_locale_en_US +#include "log.h" // flush_error_log +#include "sql_view.h" // mysql_create_view, mysql_drop_view +#include "sql_delete.h" // mysql_delete +#include "sql_insert.h" // mysql_insert +#include "sql_update.h" // mysql_update, mysql_multi_update +#include "sql_partition.h" // struct partition_info +#include "sql_db.h" // mysql_change_db, mysql_create_db, + // mysql_rm_db, mysql_upgrade_db, + // mysql_alter_db, + // check_db_dir_existence, + // my_dbopt_cleanup +#include "sql_table.h" // mysql_alter_table, + // mysql_backup_table, + // mysql_restore_table +#include "sql_reload.h" // reload_acl_and_cache +#include "sql_admin.h" // mysql_assign_to_keycache +#include "sql_connect.h" // decrease_user_connections, + // check_mqh, + // reset_mqh +#include "sql_rename.h" // mysql_rename_tables +#include "hostname.h" // hostname_cache_refresh +#include "sql_test.h" // mysql_print_status +#include "sql_select.h" // handle_select, mysql_select, + // mysql_explain_union +#include "sql_load.h" // mysql_load +#include "sql_servers.h" // create_servers, alter_servers, + // drop_servers, servers_reload +#include "sql_handler.h" // mysql_ha_open, mysql_ha_close, + // mysql_ha_read +#include "sql_binlog.h" // mysql_client_binlog_statement +#include "sql_do.h" // mysql_do +#include "sql_help.h" // mysqld_help +#include "rpl_constants.h" // Incident, INCIDENT_LOST_EVENTS +#include "log_event.h" +#include "sql_repl.h" +#include "rpl_filter.h" +#include "repl_failsafe.h" +#include +#include +#include +#include "rpl_mi.h" + +#include "sql_digest.h" + +#include "sp_head.h" +#include "sp.h" +#include "sp_cache.h" +#include "events.h" +#include "sql_trigger.h" +#include "transaction.h" +#include "sql_alter.h" +#include "sql_audit.h" +#include "sql_prepare.h" +#include "sql_cte.h" +#include "debug_sync.h" +#include "probes_mysql.h" +#include "set_var.h" +#include "sql_bootstrap.h" +#include "sql_sequence.h" +#include "opt_trace.h" +#include "mysql/psi/mysql_sp.h" + +#include "my_json_writer.h" + +#define FLAGSTR(V,F) ((V)&(F)?#F" ":"") + +#ifdef WITH_ARIA_STORAGE_ENGINE +#include "../storage/maria/ha_maria.h" +#endif + +#include "wsrep.h" +#include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" /* wsrep transaction hooks */ + +static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state); + +#endif /* WITH_WSREP */ +/** + @defgroup Runtime_Environment Runtime Environment + @{ +*/ + +static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables); +static void sql_kill(THD *thd, my_thread_id id, killed_state state, killed_type type); +static void sql_kill_user(THD *thd, LEX_USER *user, killed_state state); +static bool lock_tables_precheck(THD *thd, TABLE_LIST *tables); +static bool execute_show_status(THD *, TABLE_LIST *); +static bool check_rename_table(THD *, TABLE_LIST *, TABLE_LIST *); +static bool generate_incident_event(THD *thd); +static int show_create_db(THD *thd, LEX *lex); +static bool alter_routine(THD *thd, LEX *lex); +static bool drop_routine(THD *thd, LEX *lex); + +const LEX_CSTRING any_db= {STRING_WITH_LEN("*any*")}; + +const LEX_CSTRING command_name[257]={ + { STRING_WITH_LEN("Sleep") }, //0 + { STRING_WITH_LEN("Quit") }, //1 + { STRING_WITH_LEN("Init DB") }, //2 + { STRING_WITH_LEN("Query") }, //3 + { STRING_WITH_LEN("Field List") }, //4 + { STRING_WITH_LEN("Create DB") }, //5 + { STRING_WITH_LEN("Drop DB") }, //6 + { STRING_WITH_LEN("Refresh") }, //7 + { STRING_WITH_LEN("Shutdown") }, //8 + { STRING_WITH_LEN("Statistics") }, //9 + { STRING_WITH_LEN("Processlist") }, //10 + { STRING_WITH_LEN("Connect") }, //11 + { STRING_WITH_LEN("Kill") }, //12 + { STRING_WITH_LEN("Debug") }, //13 + { STRING_WITH_LEN("Ping") }, //14 + { STRING_WITH_LEN("Time") }, //15 + { STRING_WITH_LEN("Delayed insert") }, //16 + { STRING_WITH_LEN("Change user") }, //17 + { STRING_WITH_LEN("Binlog Dump") }, //18 + { STRING_WITH_LEN("Table Dump") }, //19 + { STRING_WITH_LEN("Connect Out") }, //20 + { STRING_WITH_LEN("Register Slave") }, //21 + { STRING_WITH_LEN("Prepare") }, //22 + { STRING_WITH_LEN("Execute") }, //23 + { STRING_WITH_LEN("Long Data") }, //24 + { STRING_WITH_LEN("Close stmt") }, //25 + { STRING_WITH_LEN("Reset stmt") }, //26 + { STRING_WITH_LEN("Set option") }, //27 + { STRING_WITH_LEN("Fetch") }, //28 + { STRING_WITH_LEN("Daemon") }, //29 + { STRING_WITH_LEN("Unimpl get tid") }, //30 + { STRING_WITH_LEN("Reset connection") },//31 + { 0, 0 }, //32 + { 0, 0 }, //33 + { 0, 0 }, //34 + { 0, 0 }, //35 + { 0, 0 }, //36 + { 0, 0 }, //37 + { 0, 0 }, //38 + { 0, 0 }, //39 + { 0, 0 }, //40 + { 0, 0 }, //41 + { 0, 0 }, //42 + { 0, 0 }, //43 + { 0, 0 }, //44 + { 0, 0 }, //45 + { 0, 0 }, //46 + { 0, 0 }, //47 + { 0, 0 }, //48 + { 0, 0 }, //49 + { 0, 0 }, //50 + { 0, 0 }, //51 + { 0, 0 }, //52 + { 0, 0 }, //53 + { 0, 0 }, //54 + { 0, 0 }, //55 + { 0, 0 }, //56 + { 0, 0 }, //57 + { 0, 0 }, //58 + { 0, 0 }, //59 + { 0, 0 }, //60 + { 0, 0 }, //61 + { 0, 0 }, //62 + { 0, 0 }, //63 + { 0, 0 }, //64 + { 0, 0 }, //65 + { 0, 0 }, //66 + { 0, 0 }, //67 + { 0, 0 }, //68 + { 0, 0 }, //69 + { 0, 0 }, //70 + { 0, 0 }, //71 + { 0, 0 }, //72 + { 0, 0 }, //73 + { 0, 0 }, //74 + { 0, 0 }, //75 + { 0, 0 }, //76 + { 0, 0 }, //77 + { 0, 0 }, //78 + { 0, 0 }, //79 + { 0, 0 }, //80 + { 0, 0 }, //81 + { 0, 0 }, //82 + { 0, 0 }, //83 + { 0, 0 }, //84 + { 0, 0 }, //85 + { 0, 0 }, //86 + { 0, 0 }, //87 + { 0, 0 }, //88 + { 0, 0 }, //89 + { 0, 0 }, //90 + { 0, 0 }, //91 + { 0, 0 }, //92 + { 0, 0 }, //93 + { 0, 0 }, //94 + { 0, 0 }, //95 + { 0, 0 }, //96 + { 0, 0 }, //97 + { 0, 0 }, //98 + { 0, 0 }, //99 + { 0, 0 }, //100 + { 0, 0 }, //101 + { 0, 0 }, //102 + { 0, 0 }, //103 + { 0, 0 }, //104 + { 0, 0 }, //105 + { 0, 0 }, //106 + { 0, 0 }, //107 + { 0, 0 }, //108 + { 0, 0 }, //109 + { 0, 0 }, //110 + { 0, 0 }, //111 + { 0, 0 }, //112 + { 0, 0 }, //113 + { 0, 0 }, //114 + { 0, 0 }, //115 + { 0, 0 }, //116 + { 0, 0 }, //117 + { 0, 0 }, //118 + { 0, 0 }, //119 + { 0, 0 }, //120 + { 0, 0 }, //121 + { 0, 0 }, //122 + { 0, 0 }, //123 + { 0, 0 }, //124 + { 0, 0 }, //125 + { 0, 0 }, //126 + { 0, 0 }, //127 + { 0, 0 }, //128 + { 0, 0 }, //129 + { 0, 0 }, //130 + { 0, 0 }, //131 + { 0, 0 }, //132 + { 0, 0 }, //133 + { 0, 0 }, //134 + { 0, 0 }, //135 + { 0, 0 }, //136 + { 0, 0 }, //137 + { 0, 0 }, //138 + { 0, 0 }, //139 + { 0, 0 }, //140 + { 0, 0 }, //141 + { 0, 0 }, //142 + { 0, 0 }, //143 + { 0, 0 }, //144 + { 0, 0 }, //145 + { 0, 0 }, //146 + { 0, 0 }, //147 + { 0, 0 }, //148 + { 0, 0 }, //149 + { 0, 0 }, //150 + { 0, 0 }, //151 + { 0, 0 }, //152 + { 0, 0 }, //153 + { 0, 0 }, //154 + { 0, 0 }, //155 + { 0, 0 }, //156 + { 0, 0 }, //157 + { 0, 0 }, //158 + { 0, 0 }, //159 + { 0, 0 }, //160 + { 0, 0 }, //161 + { 0, 0 }, //162 + { 0, 0 }, //163 + { 0, 0 }, //164 + { 0, 0 }, //165 + { 0, 0 }, //166 + { 0, 0 }, //167 + { 0, 0 }, //168 + { 0, 0 }, //169 + { 0, 0 }, //170 + { 0, 0 }, //171 + { 0, 0 }, //172 + { 0, 0 }, //173 + { 0, 0 }, //174 + { 0, 0 }, //175 + { 0, 0 }, //176 + { 0, 0 }, //177 + { 0, 0 }, //178 + { 0, 0 }, //179 + { 0, 0 }, //180 + { 0, 0 }, //181 + { 0, 0 }, //182 + { 0, 0 }, //183 + { 0, 0 }, //184 + { 0, 0 }, //185 + { 0, 0 }, //186 + { 0, 0 }, //187 + { 0, 0 }, //188 + { 0, 0 }, //189 + { 0, 0 }, //190 + { 0, 0 }, //191 + { 0, 0 }, //192 + { 0, 0 }, //193 + { 0, 0 }, //194 + { 0, 0 }, //195 + { 0, 0 }, //196 + { 0, 0 }, //197 + { 0, 0 }, //198 + { 0, 0 }, //199 + { 0, 0 }, //200 + { 0, 0 }, //201 + { 0, 0 }, //202 + { 0, 0 }, //203 + { 0, 0 }, //204 + { 0, 0 }, //205 + { 0, 0 }, //206 + { 0, 0 }, //207 + { 0, 0 }, //208 + { 0, 0 }, //209 + { 0, 0 }, //210 + { 0, 0 }, //211 + { 0, 0 }, //212 + { 0, 0 }, //213 + { 0, 0 }, //214 + { 0, 0 }, //215 + { 0, 0 }, //216 + { 0, 0 }, //217 + { 0, 0 }, //218 + { 0, 0 }, //219 + { 0, 0 }, //220 + { 0, 0 }, //221 + { 0, 0 }, //222 + { 0, 0 }, //223 + { 0, 0 }, //224 + { 0, 0 }, //225 + { 0, 0 }, //226 + { 0, 0 }, //227 + { 0, 0 }, //228 + { 0, 0 }, //229 + { 0, 0 }, //230 + { 0, 0 }, //231 + { 0, 0 }, //232 + { 0, 0 }, //233 + { 0, 0 }, //234 + { 0, 0 }, //235 + { 0, 0 }, //236 + { 0, 0 }, //237 + { 0, 0 }, //238 + { 0, 0 }, //239 + { 0, 0 }, //240 + { 0, 0 }, //241 + { 0, 0 }, //242 + { 0, 0 }, //243 + { 0, 0 }, //244 + { 0, 0 }, //245 + { 0, 0 }, //246 + { 0, 0 }, //247 + { 0, 0 }, //248 + { 0, 0 }, //249 + { STRING_WITH_LEN("Bulk_execute") }, //250 + { STRING_WITH_LEN("Slave_worker") }, //251 + { STRING_WITH_LEN("Slave_IO") }, //252 + { STRING_WITH_LEN("Slave_SQL") }, //253 + { 0, 0}, + { STRING_WITH_LEN("Error") } // Last command number 255 +}; + +#ifdef HAVE_REPLICATION +/** + Returns true if all tables should be ignored. +*/ +inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables) +{ + Rpl_filter *rpl_filter= thd->system_thread_info.rpl_sql_info->rpl_filter; + return rpl_filter->is_on() && tables && !thd->spcont && + !rpl_filter->tables_ok(thd->db.str, tables); +} +#endif + + +static bool some_non_temp_table_to_be_updated(THD *thd, TABLE_LIST *tables) +{ + for (TABLE_LIST *table= tables; table; table= table->next_global) + { + DBUG_ASSERT(table->db.str && table->table_name.str); + if (table->updating && !thd->find_tmp_table_share(table)) + return 1; + } + return 0; +} + + +/* + Check whether the statement implicitly commits an active transaction. + + @param thd Thread handle. + @param mask Bitmask used for the SQL command match. + + @return 0 No implicit commit + @return 1 Do a commit +*/ +bool stmt_causes_implicit_commit(THD *thd, uint mask) +{ + LEX *lex= thd->lex; + bool skip= FALSE; + DBUG_ENTER("stmt_causes_implicit_commit"); + + if (!(sql_command_flags[lex->sql_command] & mask)) + DBUG_RETURN(FALSE); + + switch (lex->sql_command) { + case SQLCOM_ALTER_TABLE: + case SQLCOM_ALTER_SEQUENCE: + /* If ALTER TABLE of non-temporary table, do implicit commit */ + skip= (lex->tmp_table()); + break; + case SQLCOM_DROP_TABLE: + case SQLCOM_DROP_SEQUENCE: + case SQLCOM_CREATE_TABLE: + case SQLCOM_CREATE_SEQUENCE: + /* + If CREATE TABLE of non-temporary table and the table is not part + if a BEGIN GTID ... COMMIT group, do a implicit commit. + This ensures that CREATE ... SELECT will in the same GTID group on the + master and slave. + */ + skip= (lex->tmp_table() || + (thd->variables.option_bits & OPTION_GTID_BEGIN)); + break; + case SQLCOM_SET_OPTION: + skip= lex->autocommit ? FALSE : TRUE; + break; + default: + break; + } + + DBUG_RETURN(!skip); +} + + +/** + Mark all commands that somehow changes a table. + + This is used to check number of updates / hour. + + sql_command is actually set to SQLCOM_END sometimes + so we need the +1 to include it in the array. + + See COMMAND_FLAG_xxx for different type of commands + 2 - query that returns meaningful ROW_COUNT() - + a number of modified rows +*/ + +uint sql_command_flags[SQLCOM_END+1]; +uint server_command_flags[COM_END+1]; + +void init_update_queries(void) +{ + /* Initialize the server command flags array. */ + memset(server_command_flags, 0, sizeof(server_command_flags)); + + server_command_flags[COM_STATISTICS]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + + server_command_flags[COM_QUIT]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_PROCESS_INFO]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_PROCESS_KILL]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_SHUTDOWN]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_SLEEP]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_TIME]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_INIT_DB]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_END]= CF_SKIP_WSREP_CHECK; + for (uint i= COM_MDB_GAP_BEG; i <= COM_MDB_GAP_END; i++) + { + server_command_flags[i]= CF_SKIP_WSREP_CHECK; + } + + /* + COM_QUERY, COM_SET_OPTION and COM_STMT_XXX are allowed to pass the early + COM_xxx filter, they're checked later in mysql_execute_command(). + */ + server_command_flags[COM_QUERY]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_SET_OPTION]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_PREPARE]= CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_EXECUTE]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_FETCH]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_CLOSE]= CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_RESET]= CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_EXECUTE]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_SEND_LONG_DATA]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_REGISTER_SLAVE]= CF_SKIP_WSREP_CHECK; + + /* Initialize the sql command flags array. */ + memset(sql_command_flags, 0, sizeof(sql_command_flags)); + + /* + In general, DDL statements do not generate row events and do not go + through a cache before being written to the binary log. However, the + CREATE TABLE...SELECT is an exception because it may generate row + events. For that reason, the SQLCOM_CREATE_TABLE which represents + a CREATE TABLE, including the CREATE TABLE...SELECT, has the + CF_CAN_GENERATE_ROW_EVENTS flag. The distinction between a regular + CREATE TABLE and the CREATE TABLE...SELECT is made in other parts of + the code, in particular in the Query_log_event's constructor. + */ + sql_command_flags[SQLCOM_CREATE_TABLE]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS | + CF_CAN_GENERATE_ROW_EVENTS | + CF_SCHEMA_CHANGE; + sql_command_flags[SQLCOM_CREATE_SEQUENCE]= (CF_CHANGES_DATA | + CF_REEXECUTION_FRAGILE | + CF_AUTO_COMMIT_TRANS | + CF_SCHEMA_CHANGE); + sql_command_flags[SQLCOM_CREATE_INDEX]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | + CF_ADMIN_COMMAND | CF_REPORT_PROGRESS; + sql_command_flags[SQLCOM_ALTER_TABLE]= CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND | + CF_AUTO_COMMIT_TRANS | CF_REPORT_PROGRESS | + CF_INSERTS_DATA | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_ALTER_SEQUENCE]= CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND | + CF_AUTO_COMMIT_TRANS | CF_SCHEMA_CHANGE | + CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_TRUNCATE]= CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND | + CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_TABLE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_SCHEMA_CHANGE; + sql_command_flags[SQLCOM_DROP_SEQUENCE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_SCHEMA_CHANGE; + sql_command_flags[SQLCOM_LOAD]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | CF_REPORT_PROGRESS | + CF_INSERTS_DATA; + sql_command_flags[SQLCOM_CREATE_DB]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_DB_CHANGE; + sql_command_flags[SQLCOM_DROP_DB]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_DB_CHANGE; + sql_command_flags[SQLCOM_CREATE_PACKAGE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_PACKAGE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_PACKAGE_BODY]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_PACKAGE_BODY]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_DB_UPGRADE]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_DB]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_DB_CHANGE; + sql_command_flags[SQLCOM_RENAME_TABLE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_DROP_INDEX]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS | + CF_REPORT_PROGRESS | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_CREATE_VIEW]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_VIEW]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_TRIGGER]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_TRIGGER]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_EVENT]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_EVENT]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_EVENT]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + + sql_command_flags[SQLCOM_UPDATE]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_UPDATES_DATA | + CF_PS_ARRAY_BINDING_SAFE; + sql_command_flags[SQLCOM_UPDATE_MULTI]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_UPDATES_DATA | + CF_PS_ARRAY_BINDING_SAFE; + sql_command_flags[SQLCOM_INSERT]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_INSERTS_DATA | + CF_PS_ARRAY_BINDING_SAFE | + CF_PS_ARRAY_BINDING_OPTIMIZED; + sql_command_flags[SQLCOM_INSERT_SELECT]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_INSERTS_DATA; + sql_command_flags[SQLCOM_DELETE]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_DELETES_DATA | + CF_PS_ARRAY_BINDING_SAFE; + sql_command_flags[SQLCOM_DELETE_MULTI]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_DELETES_DATA; + sql_command_flags[SQLCOM_REPLACE]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_INSERTS_DATA | + CF_PS_ARRAY_BINDING_SAFE | + CF_PS_ARRAY_BINDING_OPTIMIZED; + sql_command_flags[SQLCOM_REPLACE_SELECT]= CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED | + CF_INSERTS_DATA; + sql_command_flags[SQLCOM_SELECT]= CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE | + CF_CAN_BE_EXPLAINED; + // (1) so that subquery is traced when doing "SET @var = (subquery)" + /* + @todo SQLCOM_SET_OPTION should have CF_CAN_GENERATE_ROW_EVENTS + set, because it may invoke a stored function that generates row + events. /Sven + */ + sql_command_flags[SQLCOM_SET_OPTION]= CF_REEXECUTION_FRAGILE | + CF_AUTO_COMMIT_TRANS | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE; // (1) + // (1) so that subquery is traced when doing "DO @var := (subquery)" + sql_command_flags[SQLCOM_DO]= CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE; // (1) + + sql_command_flags[SQLCOM_SHOW_STATUS_PROC]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_STATUS_PACKAGE]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_STATUS_PACKAGE_BODY]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_STATUS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_DATABASES]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_TRIGGERS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_EVENTS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_OPEN_TABLES]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_PLUGINS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_GENERIC]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_FIELDS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_KEYS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_VARIABLES]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_CHARSETS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_COLLATIONS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_BINLOGS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_SLAVE_HOSTS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_BINLOG_EVENTS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_STORAGE_ENGINES]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_AUTHORS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CONTRIBUTORS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_PRIVILEGES]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_WARNS]= CF_STATUS_COMMAND | CF_DIAGNOSTIC_STMT; + sql_command_flags[SQLCOM_SHOW_ERRORS]= CF_STATUS_COMMAND | CF_DIAGNOSTIC_STMT; + sql_command_flags[SQLCOM_SHOW_ENGINE_STATUS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_ENGINE_MUTEX]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_ENGINE_LOGS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_EXPLAIN]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_ANALYZE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_PROCESSLIST]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_GRANTS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_USER]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_DB]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_BINLOG_STAT]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_SLAVE_STAT]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_PROC]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_FUNC]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_PACKAGE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_PACKAGE_BODY]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_TRIGGER]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_STATUS_FUNC]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; + sql_command_flags[SQLCOM_SHOW_PROC_CODE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_FUNC_CODE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_PACKAGE_BODY_CODE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_CREATE_EVENT]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_PROFILES]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_PROFILE]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_BINLOG_BASE64_EVENT]= CF_STATUS_COMMAND | CF_CAN_GENERATE_ROW_EVENTS; + sql_command_flags[SQLCOM_SHOW_TABLES]= (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE); + sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE); + + + sql_command_flags[SQLCOM_CREATE_USER]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_RENAME_USER]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_DROP_USER]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_ALTER_USER]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_CREATE_ROLE]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_GRANT]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_GRANT_ROLE]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_REVOKE]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_REVOKE_ROLE]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_OPTIMIZE]= CF_CHANGES_DATA; + sql_command_flags[SQLCOM_CREATE_FUNCTION]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_PROCEDURE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_SPFUNCTION]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_PROCEDURE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_FUNCTION]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_PROCEDURE]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_FUNCTION]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_INSTALL_PLUGIN]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_UNINSTALL_PLUGIN]= CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS; + + /* + The following is used to preserver CF_ROW_COUNT during the + a CALL or EXECUTE statement, so the value generated by the + last called (or executed) statement is preserved. + See mysql_execute_command() for how CF_ROW_COUNT is used. + */ + /* + (1): without it, in "CALL some_proc((subq))", subquery would not be + traced. + */ + sql_command_flags[SQLCOM_CALL]= CF_REEXECUTION_FRAGILE | + CF_CAN_GENERATE_ROW_EVENTS | + CF_OPTIMIZER_TRACE; // (1) + sql_command_flags[SQLCOM_EXECUTE]= CF_CAN_GENERATE_ROW_EVENTS; + sql_command_flags[SQLCOM_EXECUTE_IMMEDIATE]= CF_CAN_GENERATE_ROW_EVENTS; + sql_command_flags[SQLCOM_COMPOUND]= CF_CAN_GENERATE_ROW_EVENTS; + + /* + We don't want to change to statement based replication for these commands + */ + sql_command_flags[SQLCOM_ROLLBACK]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + /* We don't want to replicate ALTER TABLE for temp tables in row format */ + sql_command_flags[SQLCOM_ALTER_TABLE]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + /* We don't want to replicate TRUNCATE for temp tables in row format */ + sql_command_flags[SQLCOM_TRUNCATE]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + /* We don't want to replicate DROP for temp tables in row format */ + sql_command_flags[SQLCOM_DROP_TABLE]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + sql_command_flags[SQLCOM_DROP_SEQUENCE]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + /* We don't want to replicate CREATE/DROP INDEX for temp tables in row format */ + sql_command_flags[SQLCOM_CREATE_INDEX]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + sql_command_flags[SQLCOM_DROP_INDEX]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + /* One can change replication mode with SET */ + sql_command_flags[SQLCOM_SET_OPTION]|= CF_FORCE_ORIGINAL_BINLOG_FORMAT; + + /* + The following admin table operations are allowed + on log tables. + */ + sql_command_flags[SQLCOM_REPAIR]= CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | + CF_REPORT_PROGRESS | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_OPTIMIZE]|= CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | + CF_REPORT_PROGRESS | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_ANALYZE]= CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | + CF_REPORT_PROGRESS | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_CHECK]= CF_WRITE_LOGS_COMMAND | CF_AUTO_COMMIT_TRANS | + CF_REPORT_PROGRESS | CF_ADMIN_COMMAND; + sql_command_flags[SQLCOM_CHECKSUM]= CF_REPORT_PROGRESS; + + sql_command_flags[SQLCOM_CREATE_USER]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_USER]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_USER]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_RENAME_USER]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_ROLE]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_ROLE]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_REVOKE]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_REVOKE_ALL]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_REVOKE_ROLE]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_GRANT]|= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_GRANT_ROLE]|= CF_AUTO_COMMIT_TRANS; + + sql_command_flags[SQLCOM_FLUSH]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_RESET]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_CREATE_SERVER]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_ALTER_SERVER]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_DROP_SERVER]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_BACKUP]= CF_AUTO_COMMIT_TRANS; + sql_command_flags[SQLCOM_BACKUP_LOCK]= CF_AUTO_COMMIT_TRANS; + + /* + The following statements can deal with temporary tables, + so temporary tables should be pre-opened for those statements to + simplify privilege checking. + + There are other statements that deal with temporary tables and open + them, but which are not listed here. The thing is that the order of + pre-opening temporary tables for those statements is somewhat custom. + + Note that SQLCOM_RENAME_TABLE should not be in this list! + */ + sql_command_flags[SQLCOM_CREATE_TABLE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_CREATE_INDEX]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_ALTER_TABLE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_TRUNCATE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_LOAD]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_DROP_INDEX]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_UPDATE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_UPDATE_MULTI]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_INSERT_SELECT]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_DELETE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_DELETE_MULTI]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_REPLACE_SELECT]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_SELECT]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_SET_OPTION]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_DO]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_HA_OPEN]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_CALL]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_CHECKSUM]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_ANALYZE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_CHECK]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_OPTIMIZE]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_REPAIR]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_PRELOAD_KEYS]|= CF_PREOPEN_TMP_TABLES; + sql_command_flags[SQLCOM_ASSIGN_TO_KEYCACHE]|= CF_PREOPEN_TMP_TABLES; + + /* + DDL statements that should start with closing opened handlers. + + We use this flag only for statements for which open HANDLERs + have to be closed before temporary tables are pre-opened. + */ + sql_command_flags[SQLCOM_CREATE_TABLE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_CREATE_SEQUENCE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_DROP_TABLE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_DROP_SEQUENCE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_ALTER_TABLE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_TRUNCATE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_REPAIR]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_OPTIMIZE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_ANALYZE]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_CHECK]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_CREATE_INDEX]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_DROP_INDEX]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_PRELOAD_KEYS]|= CF_HA_CLOSE; + sql_command_flags[SQLCOM_ASSIGN_TO_KEYCACHE]|= CF_HA_CLOSE; + + /* + Mark statements that always are disallowed in read-only + transactions. Note that according to the SQL standard, + even temporary table DDL should be disallowed. + */ + sql_command_flags[SQLCOM_CREATE_TABLE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_SEQUENCE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_TABLE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_TABLE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_SEQUENCE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_RENAME_TABLE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_INDEX]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_INDEX]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_DB]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_DB]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_PACKAGE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_PACKAGE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_PACKAGE_BODY]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_PACKAGE_BODY]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_DB_UPGRADE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_DB]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_VIEW]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_VIEW]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_TRIGGER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_TRIGGER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_EVENT]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_EVENT]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_EVENT]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_USER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_USER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_RENAME_USER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_USER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_SERVER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_SERVER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_SERVER]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_FUNCTION]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_PROCEDURE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_CREATE_SPFUNCTION]|=CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_PROCEDURE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_DROP_FUNCTION]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_PROCEDURE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_ALTER_FUNCTION]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_TRUNCATE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_REPAIR]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_OPTIMIZE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_GRANT]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_REVOKE]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_REVOKE_ALL]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_INSTALL_PLUGIN]|= CF_DISALLOW_IN_RO_TRANS; + sql_command_flags[SQLCOM_UNINSTALL_PLUGIN]|= CF_DISALLOW_IN_RO_TRANS; +#ifdef WITH_WSREP + /* + Statements for which some errors are ignored when + wsrep_ignore_apply_errors = WSREP_IGNORE_ERRORS_ON_RECONCILING_DDL + */ + sql_command_flags[SQLCOM_DROP_DB]|= CF_WSREP_MAY_IGNORE_ERRORS; + sql_command_flags[SQLCOM_DROP_TABLE]|= CF_WSREP_MAY_IGNORE_ERRORS; + sql_command_flags[SQLCOM_DROP_INDEX]|= CF_WSREP_MAY_IGNORE_ERRORS; + sql_command_flags[SQLCOM_ALTER_TABLE]|= CF_WSREP_MAY_IGNORE_ERRORS; +#endif /* WITH_WSREP */ +} + +bool sqlcom_can_generate_row_events(const THD *thd) +{ + return (sql_command_flags[thd->lex->sql_command] & + CF_CAN_GENERATE_ROW_EVENTS); +} + +bool is_update_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command <= SQLCOM_END); + return (sql_command_flags[command] & CF_CHANGES_DATA) != 0; +} + +/** + Check if a sql command is allowed to write to log tables. + @param command The SQL command + @return true if writing is allowed +*/ +bool is_log_table_write_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command <= SQLCOM_END); + return (sql_command_flags[command] & CF_WRITE_LOGS_COMMAND) != 0; +} + +void execute_init_command(THD *thd, LEX_STRING *init_command, + mysql_rwlock_t *var_lock) +{ + Vio* save_vio; + ulonglong save_client_capabilities; + + mysql_rwlock_rdlock(var_lock); + if (!init_command->length) + { + mysql_rwlock_unlock(var_lock); + return; + } + + /* + copy the value under a lock, and release the lock. + init_command has to be executed without a lock held, + as it may try to change itself + */ + size_t len= init_command->length; + char *buf= thd->strmake(init_command->str, len); + mysql_rwlock_unlock(var_lock); + + THD_STAGE_INFO(thd, stage_execution_of_init_command); + save_client_capabilities= thd->client_capabilities; + thd->client_capabilities|= CLIENT_MULTI_QUERIES; + /* + We don't need return result of execution to client side. + To forbid this we should set thd->net.vio to 0. + */ + save_vio= thd->net.vio; + thd->net.vio= 0; + thd->clear_error(1); + dispatch_command(COM_QUERY, thd, buf, (uint)len); + thd->client_capabilities= save_client_capabilities; + thd->net.vio= save_vio; + +} + + +static char *fgets_fn(char *buffer, size_t size, fgets_input_t input, int *error) +{ + MYSQL_FILE *in= static_cast (input); + char *line= mysql_file_fgets(buffer, (int)size, in); + if (unlikely(error)) + *error= (line == NULL) ? ferror(in->m_file) : 0; + return line; +} + + +int bootstrap(MYSQL_FILE *file) +{ + int bootstrap_error= 0; + DBUG_ENTER("handle_bootstrap"); + + THD *thd= new THD(next_thread_id()); + char *buffer= new char[MAX_BOOTSTRAP_QUERY_SIZE]; +#ifdef WITH_WSREP + thd->variables.wsrep_on= 0; +#endif + thd->bootstrap=1; + my_net_init(&thd->net,(st_vio*) 0, thd, MYF(0)); + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ALL_KNOWN_ACL; + +#ifndef EMBEDDED_LIBRARY + mysql_thread_set_psi_id(thd->thread_id); +#else + thd->mysql= 0; +#endif + + /* The following must be called before DBUG_ENTER */ + thd->thread_stack= (char*) &thd; + thd->store_globals(); + + thd->security_ctx->user= (char*) my_strdup(key_memory_MPVIO_EXT_auth_info, + "boot", MYF(MY_WME)); + thd->security_ctx->priv_user[0]= thd->security_ctx->priv_host[0]= + thd->security_ctx->priv_role[0]= 0; + /* + Make the "client" handle multiple results. This is necessary + to enable stored procedures with SELECTs and Dynamic SQL + in init-file. + */ + thd->client_capabilities|= CLIENT_MULTI_RESULTS; + + thd->init_for_queries(); + + for ( ; ; ) + { + buffer[0]= 0; + int rc, length; + char *query; + int error= 0; + + rc= read_bootstrap_query(buffer, &length, file, fgets_fn, 0, &error); + + if (rc == READ_BOOTSTRAP_EOF) + break; + /* + Check for bootstrap file errors. SQL syntax errors will be + caught below. + */ + if (rc != READ_BOOTSTRAP_SUCCESS) + { + /* + mysql_parse() may have set a successful error status for the previous + query. We must clear the error status to report the bootstrap error. + */ + thd->get_stmt_da()->reset_diagnostics_area(); + + /* Get the nearest query text for reference. */ + char *err_ptr= buffer + (length <= MAX_BOOTSTRAP_ERROR_LEN ? + 0 : (length - MAX_BOOTSTRAP_ERROR_LEN)); + switch (rc) + { + case READ_BOOTSTRAP_ERROR: + my_printf_error(ER_UNKNOWN_ERROR, "Bootstrap file error, return code (%d). " + "Nearest query: '%s'", MYF(0), error, err_ptr); + break; + + case READ_BOOTSTRAP_QUERY_SIZE: + my_printf_error(ER_UNKNOWN_ERROR, "Bootstrap file error. Query size " + "exceeded %d bytes near '%s'.", MYF(0), + MAX_BOOTSTRAP_QUERY_SIZE, err_ptr); + break; + + default: + DBUG_ASSERT(false); + break; + } + + thd->protocol->end_statement(); + bootstrap_error= 1; + break; + } + + query= (char *) thd->memdup_w_gap(buffer, length + 1, + thd->db.length + 1 + + QUERY_CACHE_DB_LENGTH_SIZE + + QUERY_CACHE_FLAGS_SIZE); + size_t db_len= 0; + memcpy(query + length + 1, (char *) &db_len, sizeof(size_t)); + thd->set_query_and_id(query, length, thd->charset(), next_query_id()); + int2store(query + length + 1, 0); // No db in bootstrap + DBUG_PRINT("query",("%-.4096s",thd->query())); +#if defined(ENABLED_PROFILING) + thd->profiling.start_new_query(); + thd->profiling.set_query_source(thd->query(), length); +#endif + + thd->set_time(); + Parser_state parser_state; + if (parser_state.init(thd, thd->query(), length)) + { + thd->protocol->end_statement(); + bootstrap_error= 1; + break; + } + + mysql_parse(thd, thd->query(), length, &parser_state); + + bootstrap_error= thd->is_error(); + thd->protocol->end_statement(); + +#if defined(ENABLED_PROFILING) + thd->profiling.finish_current_query(); +#endif + delete_explain_query(thd->lex); + + if (unlikely(bootstrap_error)) + break; + + thd->reset_kill_query(); /* Ensure that killed_errmsg is released */ + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + thd->lex->restore_set_statement_var(); + } + delete thd; + delete[] buffer; + DBUG_RETURN(bootstrap_error); +} + + +/* This works because items are allocated on THD::mem_root */ + +void free_items(Item *item) +{ + Item *next; + DBUG_ENTER("free_items"); + for (; item ; item=next) + { + next=item->next; + item->delete_self(); + } + DBUG_VOID_RETURN; +} + +/** + This works because items are allocated on THD::mem_root. + @note The function also handles null pointers (empty list). +*/ +void cleanup_items(Item *item) +{ + DBUG_ENTER("cleanup_items"); + for (; item ; item=item->next) + item->cleanup(); + DBUG_VOID_RETURN; +} + +#ifdef WITH_WSREP +static bool wsrep_tables_accessible_when_detached(const TABLE_LIST *tables) +{ + for (const TABLE_LIST *table= tables; table; table= table->next_global) + { + LEX_CSTRING db= table->db, tn= table->table_name; + if (get_table_category(&db, &tn) < TABLE_CATEGORY_INFORMATION) + return false; + } + return tables != NULL; +} + +static bool wsrep_command_no_result(char command) +{ + return (command == COM_STMT_FETCH || + command == COM_STMT_SEND_LONG_DATA || + command == COM_STMT_CLOSE); +} +#endif /* WITH_WSREP */ +#ifndef EMBEDDED_LIBRARY +static enum enum_server_command fetch_command(THD *thd, char *packet) +{ + enum enum_server_command + command= (enum enum_server_command) (uchar) packet[0]; + DBUG_ENTER("fetch_command"); + + if (command >= COM_END || + (command >= COM_MDB_GAP_BEG && command <= COM_MDB_GAP_END)) + command= COM_END; // Wrong command + + DBUG_PRINT("info",("Command on %s = %d (%s)", + vio_description(thd->net.vio), command, + command_name[command].str)); + DBUG_RETURN(command); +} + +/** + Read one command from connection and execute it (query or simple command). + This function is to be used by different schedulers (one-thread-per-connection, + pool-of-threads) + + For profiling to work, it must never be called recursively. + + @param thd - client connection context + + @param blocking - wait for command to finish. + if false (nonblocking), then the function might + return when command is "half-finished", with + DISPATCH_COMMAND_WOULDBLOCK. + Currenly, this can *only* happen when using + threadpool. The command will resume, after all outstanding + async operations (i.e group commit) finish. + Threadpool scheduler takes care of "resume". + + @retval + DISPATCH_COMMAND_SUCCESS - success + @retval + DISPATCH_COMMAND_CLOSE_CONNECTION request of THD shutdown + (s. dispatch_command() description) + @retval + DISPATCH_COMMAND_WOULDBLOCK - need to wait for asynchronous operations + to finish. Only returned if parameter + 'blocking' is false. +*/ + +dispatch_command_return do_command(THD *thd, bool blocking) +{ + dispatch_command_return return_value; + char *packet= 0; + ulong packet_length; + NET *net= &thd->net; + enum enum_server_command command; + DBUG_ENTER("do_command"); + +#ifdef WITH_WSREP + DBUG_ASSERT(!thd->async_state.pending_ops() || + (WSREP(thd) && + thd->wsrep_trx().state() == wsrep::transaction::s_aborted)); +#else + DBUG_ASSERT(!thd->async_state.pending_ops()); +#endif + + if (thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + { + /* + Resuming previously suspended command. + Restore the state + */ + command = thd->async_state.m_command; + packet = thd->async_state.m_packet.str; + packet_length = (ulong)thd->async_state.m_packet.length; + goto resume; + } + + /* + indicator of uninitialized lex => normal flow of errors handling + (see my_message_sql) + */ + thd->lex->current_select= 0; + + /* + This thread will do a blocking read from the client which + will be interrupted when the next command is received from + the client, the connection is closed or "net_wait_timeout" + number of seconds has passed. + */ + if (!thd->skip_wait_timeout) + my_net_set_read_timeout(net, thd->get_net_wait_timeout()); + + /* Errors and diagnostics are cleared once here before query */ + thd->clear_error(1); + + net_new_transaction(net); + + /* Save for user statistics */ + thd->start_bytes_received= thd->status_var.bytes_received; + + /* + Synchronization point for testing of KILL_CONNECTION. + This sync point can wait here, to simulate slow code execution + between the last test of thd->killed and blocking in read(). + + The goal of this test is to verify that a connection does not + hang, if it is killed at this point of execution. + (Bug#37780 - main.kill fails randomly) + + Note that the sync point wait itself will be terminated by a + kill. In this case it consumes a condition broadcast, but does + not change anything else. The consumed broadcast should not + matter here, because the read/recv() below doesn't use it. + */ + DEBUG_SYNC(thd, "before_do_command_net_read"); + + packet_length= my_net_read_packet(net, 1); + + if (unlikely(packet_length == packet_error)) + { + DBUG_PRINT("info",("Got error %d reading command from socket %s", + net->error, + vio_description(net->vio))); + + /* Instrument this broken statement as "statement/com/error" */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[COM_END]. + m_key); + + + /* Check if we can continue without closing the connection */ + + /* The error must be set. */ + DBUG_ASSERT(thd->is_error()); + thd->protocol->end_statement(); + + /* Mark the statement completed. */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + if (net->error != 3) + { + return_value= DISPATCH_COMMAND_CLOSE_CONNECTION; // We have to close it. + goto out; + } + + net->error= 0; + return_value= DISPATCH_COMMAND_SUCCESS; + goto out; + } + + packet= (char*) net->read_pos; + /* + 'packet_length' contains length of data, as it was stored in packet + header. In case of malformed header, my_net_read returns zero. + If packet_length is not zero, my_net_read ensures that the returned + number of bytes was actually read from network. + There is also an extra safety measure in my_net_read: + it sets packet[packet_length]= 0, but only for non-zero packets. + */ + if (packet_length == 0) /* safety */ + { + /* Initialize with COM_SLEEP packet */ + packet[0]= (uchar) COM_SLEEP; + packet_length= 1; + } + /* Do not rely on my_net_read, extra safety against programming errors. */ + packet[packet_length]= '\0'; /* safety */ + + + command= fetch_command(thd, packet); + +#ifdef WITH_WSREP + DEBUG_SYNC(thd, "wsrep_before_before_command"); + /* + If this command does not return a result, then we + instruct wsrep_before_command() to skip result handling. + This causes BF aborted transaction to roll back but keep + the error state until next command which is able to return + a result to the client. + */ + if (unlikely(wsrep_service_started) && + wsrep_before_command(thd, wsrep_command_no_result(command))) + { + /* + Aborted by background rollbacker thread. + Handle error here and jump straight to out. + Notice that thd->store_globals() is called + in wsrep_before_command(). + */ + WSREP_LOG_THD(thd, "enter found BF aborted"); + DBUG_ASSERT(!thd->mdl_context.has_transactional_locks()); + DBUG_ASSERT(!thd->get_stmt_da()->is_set()); + /* We let COM_QUIT and COM_STMT_CLOSE to execute even if wsrep aborted. */ + if (command == COM_STMT_EXECUTE) + { + WSREP_DEBUG("PS BF aborted at do_command"); + thd->wsrep_delayed_BF_abort= true; + } + if (command != COM_STMT_CLOSE && + command != COM_STMT_EXECUTE && + command != COM_QUIT) + { + my_error(ER_LOCK_DEADLOCK, MYF(0)); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + thd->reset_killed(); + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; + + /* Instrument this broken statement as "statement/com/error" */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[COM_END]. + m_key); + + thd->protocol->end_statement(); + + /* Mark the statement completed. */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + return_value= DISPATCH_COMMAND_SUCCESS; + + wsrep_after_command_before_result(thd); + goto out; + } + } + + if (WSREP(thd)) + { + /* + * bail out if DB snapshot has not been installed. We however, + * allow queries "SET" and "SHOW", they are trapped later in execute_command + */ + if (!(thd->wsrep_applier) && + (!wsrep_ready_get() || wsrep_reject_queries != WSREP_REJECT_NONE) && + (server_command_flags[command] & CF_SKIP_WSREP_CHECK) == 0) + { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", MYF(0)); + thd->protocol->end_statement(); + + /* Performance Schema Interface instrumentation end. */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + return_value= DISPATCH_COMMAND_SUCCESS; + wsrep_after_command_before_result(thd); + goto out; + } + } +#endif /* WITH_WSREP */ + /* Restore read timeout value */ + my_net_set_read_timeout(net, thd->variables.net_read_timeout); + + DBUG_ASSERT(packet_length); + DBUG_ASSERT(!thd->apc_target.is_enabled()); + +resume: + return_value= dispatch_command(command, thd, packet+1, + (uint) (packet_length-1), blocking); + if (return_value == DISPATCH_COMMAND_WOULDBLOCK) + { + /* Save current state, and resume later.*/ + thd->async_state.m_command= command; + thd->async_state.m_packet={packet,packet_length}; + DBUG_RETURN(return_value); + } + + DBUG_ASSERT(!thd->apc_target.is_enabled()); + +out: + thd->lex->restore_set_statement_var(); + /* The statement instrumentation must be closed in all cases. */ + DBUG_ASSERT(thd->m_digest == NULL); + DBUG_ASSERT(thd->m_statement_psi == NULL); +#ifdef WITH_WSREP + if (packet_length != packet_error) + { + /* there was a command to process, and before_command() has been called */ + if (unlikely(wsrep_service_started)) + wsrep_after_command_after_result(thd); + } + + if (thd->wsrep_delayed_BF_abort) + { + my_error(ER_LOCK_DEADLOCK, MYF(0)); + WSREP_DEBUG("Deadlock error for PS query: %s", thd->query()); + thd->reset_killed(); + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; + + thd->wsrep_delayed_BF_abort= false; + } +#endif /* WITH_WSREP */ + DBUG_RETURN(return_value); +} +#endif /* EMBEDDED_LIBRARY */ + +/** + @brief Determine if an attempt to update a non-temporary table while the + read-only option was enabled has been made. + + This is a helper function to mysql_execute_command. + + @note SQLCOM_MULTI_UPDATE is an exception and dealt with elsewhere. + + @see mysql_execute_command + @returns Status code + @retval TRUE The statement should be denied. + @retval FALSE The statement isn't updating any relevant tables. +*/ + +static bool deny_updates_if_read_only_option(THD *thd, TABLE_LIST *all_tables) +{ + DBUG_ENTER("deny_updates_if_read_only_option"); + + if (!opt_readonly) + DBUG_RETURN(FALSE); + + LEX *lex= thd->lex; + + /* Super user is allowed to do changes */ + if ((thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY) != NO_ACL) + DBUG_RETURN(FALSE); + + /* Check if command doesn't update anything */ + if (!(sql_command_flags[lex->sql_command] & CF_CHANGES_DATA)) + DBUG_RETURN(FALSE); + + /* Multi update is an exception and is dealt with later. */ + if (lex->sql_command == SQLCOM_UPDATE_MULTI) + DBUG_RETURN(FALSE); + + /* + a table-to-be-created is not in the temp table list yet, + so CREATE TABLE needs a special treatment + */ + if (lex->sql_command == SQLCOM_CREATE_TABLE) + DBUG_RETURN(!lex->tmp_table()); + + /* + a table-to-be-dropped might not exist (DROP TEMPORARY TABLE IF EXISTS), + cannot use the temp table list either. + */ + if (lex->sql_command == SQLCOM_DROP_TABLE && lex->tmp_table()) + DBUG_RETURN(FALSE); + + /* Check if we created, dropped, or renamed a database */ + if ((sql_command_flags[lex->sql_command] & CF_DB_CHANGE)) + DBUG_RETURN(TRUE); + + if (some_non_temp_table_to_be_updated(thd, all_tables)) + DBUG_RETURN(TRUE); + + /* Assuming that only temporary tables are modified. */ + DBUG_RETURN(FALSE); +} + +#ifdef WITH_WSREP +static void wsrep_copy_query(THD *thd) +{ + thd->wsrep_retry_command = thd->get_command(); + thd->wsrep_retry_query_len = thd->query_length(); + if (thd->wsrep_retry_query) { + my_free(thd->wsrep_retry_query); + } + thd->wsrep_retry_query = (char *)my_malloc(PSI_INSTRUMENT_ME, + thd->wsrep_retry_query_len + 1, MYF(0)); + strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len); + thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; +} +#endif /* WITH_WSREP */ + + + +#if defined(WITH_ARIA_STORAGE_ENGINE) +class Silence_all_errors : public Internal_error_handler +{ + char m_message[MYSQL_ERRMSG_SIZE]; + int error; +public: + Silence_all_errors():error(0) {} + virtual ~Silence_all_errors() {} + + virtual bool handle_condition(THD *thd, + uint sql_errno, + const char* sql_state, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + error= sql_errno; + *cond_hdl= NULL; + strmake_buf(m_message, msg); + return true; // Error handled + } +}; +#endif + + +/** + Perform one connection-level (COM_XXXX) command. + + @param command type of command to perform + @param thd connection handle + @param packet data for the command, packet is always null-terminated + @param packet_length length of packet + 1 (to show that data is + null-terminated) except for COM_SLEEP, where it + can be zero. + @param blocking if false (nonblocking), then the function might + return when command is "half-finished", with + DISPATCH_COMMAND_WOULDBLOCK. + Currenly, this can *only* happen when using threadpool. + The current command will resume, after all outstanding + async operations (i.e group commit) finish. + Threadpool scheduler takes care of "resume". + + @todo + set thd->lex->sql_command to SQLCOM_END here. + @todo + The following has to be changed to an 8 byte integer + + @retval + 0 ok + @retval + 1 request of thread shutdown, i. e. if command is + COM_QUIT/COM_SHUTDOWN +*/ +dispatch_command_return dispatch_command(enum enum_server_command command, THD *thd, + char* packet, uint packet_length, bool blocking) +{ + NET *net= &thd->net; + bool error= 0; + bool do_end_of_statement= true; + DBUG_ENTER("dispatch_command"); + DBUG_PRINT("info", ("command: %d %s", command, + (command_name[command].str != 0 ? + command_name[command].str : + ""))); + bool drop_more_results= 0; + + if (thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + { + thd->async_state.m_state = thd_async_state::enum_async_state::NONE; + goto resume; + } + + /* keep it withing 1 byte */ + compile_time_assert(COM_END == 255); + +#if defined(ENABLED_PROFILING) + thd->profiling.start_new_query(); +#endif + MYSQL_COMMAND_START(thd->thread_id, command, + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip); + + DBUG_EXECUTE_IF("crash_dispatch_command_before", + { DBUG_PRINT("crash_dispatch_command_before", ("now")); + DBUG_SUICIDE(); }); + + /* Performance Schema Interface instrumentation, begin */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[command]. + m_key); + /* + We should always call reset_for_next_command() before a query. + mysql_parse() will do this for queries. Ensure it's also done + for other commands. + */ + if (command != COM_QUERY) + thd->reset_for_next_command(); + thd->set_command(command); + + thd->enable_slow_log= true; + thd->query_plan_flags= QPLAN_INIT; + thd->lex->sql_command= SQLCOM_END; /* to avoid confusing VIEW detectors */ + thd->reset_kill_query(); + + DEBUG_SYNC(thd,"dispatch_command_before_set_time"); + + thd->set_time(); + if (!(server_command_flags[command] & CF_SKIP_QUERY_ID)) + thd->set_query_id(next_query_id()); + else + { + /* + ping, get statistics or similar stateless command. + No reason to increase query id here. + */ + thd->set_query_id(get_query_id()); + } +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + thd->set_wsrep_next_trx_id(thd->query_id); + WSREP_DEBUG("assigned new next trx id: %" PRIu64, thd->wsrep_next_trx_id()); + } +#endif /* WITH_WSREP */ + + if (!(server_command_flags[command] & CF_SKIP_QUESTIONS)) + statistic_increment(thd->status_var.questions, &LOCK_status); + + /* Copy data for user stats */ + if ((thd->userstat_running= opt_userstat_running)) + { + thd->start_cpu_time= my_getcputime(); + memcpy(&thd->org_status_var, &thd->status_var, sizeof(thd->status_var)); + thd->select_commands= thd->update_commands= thd->other_commands= 0; + } + + /** + Clear the set of flags that are expected to be cleared at the + beginning of each command. + */ + thd->server_status&= ~SERVER_STATUS_CLEAR_SET; + + if (unlikely(thd->security_ctx->password_expired && + command != COM_QUERY && + command != COM_PING && + command != COM_QUIT && + command != COM_STMT_PREPARE && + command != COM_STMT_EXECUTE && + command != COM_STMT_CLOSE)) + { + my_error(ER_MUST_CHANGE_PASSWORD, MYF(0)); + goto dispatch_end; + } + + switch (command) { + case COM_INIT_DB: + { + LEX_CSTRING tmp; + status_var_increment(thd->status_var.com_stat[SQLCOM_CHANGE_DB]); + if (unlikely(thd->copy_with_error(system_charset_info, (LEX_STRING*) &tmp, + thd->charset(), packet, packet_length))) + break; + if (!mysql_change_db(thd, &tmp, FALSE)) + { + general_log_write(thd, command, thd->db.str, thd->db.length); + my_ok(thd); + } + break; + } +#ifdef HAVE_REPLICATION + case COM_REGISTER_SLAVE: + { + status_var_increment(thd->status_var.com_register_slave); + if (!thd->register_slave((uchar*) packet, packet_length)) + my_ok(thd); + break; + } +#endif + case COM_RESET_CONNECTION: + { + thd->status_var.com_other++; +#ifdef WITH_WSREP + if (unlikely(wsrep_service_started)) + { + wsrep_after_command_ignore_result(thd); + wsrep_close(thd); + } +#endif /* WITH_WSREP */ + thd->change_user(); + thd->clear_error(); // if errors from rollback +#ifdef WITH_WSREP + if (unlikely(wsrep_service_started)) + { + wsrep_open(thd); + wsrep_before_command(thd); + } +#endif /* WITH_WSREP */ + /* Restore original charset from client authentication packet.*/ + if(thd->org_charset) + thd->update_charset(thd->org_charset,thd->org_charset,thd->org_charset); + my_ok(thd, 0, 0, 0); + break; + } + case COM_CHANGE_USER: + { + int auth_rc; + status_var_increment(thd->status_var.com_other); + +#ifdef WITH_WSREP + if (unlikely(wsrep_service_started)) + { + wsrep_after_command_ignore_result(thd); + wsrep_close(thd); + } +#endif /* WITH_WSREP */ + thd->change_user(); +#ifdef WITH_WSREP + if (unlikely(wsrep_service_started)) + { + wsrep_open(thd); + wsrep_before_command(thd); + } +#endif /* WITH_WSREP */ + thd->clear_error(); // if errors from rollback + + /* acl_authenticate() takes the data from net->read_pos */ + net->read_pos= (uchar*)packet; + + LEX_CSTRING save_db= thd->db; + USER_CONN *save_user_connect= thd->user_connect; + Security_context save_security_ctx= *thd->security_ctx; + CHARSET_INFO *save_character_set_client= + thd->variables.character_set_client; + CHARSET_INFO *save_collation_connection= + thd->variables.collation_connection; + CHARSET_INFO *save_character_set_results= + thd->variables.character_set_results; + + /* Ensure we don't free security_ctx->user in case we have to revert */ + thd->security_ctx->user= 0; + thd->user_connect= 0; + + /* + to limit COM_CHANGE_USER ability to brute-force passwords, + we only allow three unsuccessful COM_CHANGE_USER per connection. + */ + if (thd->failed_com_change_user >= 3) + { + my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd,ER_UNKNOWN_COM_ERROR), + MYF(0)); + auth_rc= 1; + } + else + auth_rc= acl_authenticate(thd, packet_length); + + mysql_audit_notify_connection_change_user(thd, &save_security_ctx); + if (auth_rc) + { + /* Free user if allocated by acl_authenticate */ + my_free(const_cast(thd->security_ctx->user)); + *thd->security_ctx= save_security_ctx; + if (thd->user_connect) + decrease_user_connections(thd->user_connect); + thd->user_connect= save_user_connect; + thd->reset_db(&save_db); + thd->update_charset(save_character_set_client, save_collation_connection, + save_character_set_results); + thd->failed_com_change_user++; + my_sleep(1000000); + } + else + { +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* we've authenticated new user */ + if (save_user_connect) + decrease_user_connections(save_user_connect); +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ + my_free((char*) save_db.str); + my_free(const_cast(save_security_ctx.user)); + } + break; + } + case COM_STMT_BULK_EXECUTE: + { + mysqld_stmt_bulk_execute(thd, packet, packet_length); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + (void)wsrep_after_statement(thd); + } +#endif /* WITH_WSREP */ + break; + } + case COM_STMT_EXECUTE: + { + mysqld_stmt_execute(thd, packet, packet_length); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + (void)wsrep_after_statement(thd); + } +#endif /* WITH_WSREP */ + break; + } + case COM_STMT_FETCH: + { + mysqld_stmt_fetch(thd, packet, packet_length); + break; + } + case COM_STMT_SEND_LONG_DATA: + { + mysql_stmt_get_longdata(thd, packet, packet_length); + break; + } + case COM_STMT_PREPARE: + { + mysqld_stmt_prepare(thd, packet, packet_length); + break; + } + case COM_STMT_CLOSE: + { + mysqld_stmt_close(thd, packet); + break; + } + case COM_STMT_RESET: + { + mysqld_stmt_reset(thd, packet); + break; + } + case COM_QUERY: + { + DBUG_ASSERT(thd->m_digest == NULL); + thd->m_digest= & thd->m_digest_state; + thd->m_digest->reset(thd->m_token_array, max_digest_length); + + if (unlikely(alloc_query(thd, packet, packet_length))) + break; // fatal error is set + MYSQL_QUERY_START(thd->query(), thd->thread_id, + thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip); + char *packet_end= thd->query() + thd->query_length(); + general_log_write(thd, command, thd->query(), thd->query_length()); + DBUG_PRINT("query",("%-.4096s",thd->query())); +#if defined(ENABLED_PROFILING) + thd->profiling.set_query_source(thd->query(), thd->query_length()); +#endif + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), + thd->query_length()); + + Parser_state parser_state; + if (unlikely(parser_state.init(thd, thd->query(), thd->query_length()))) + break; + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + if (wsrep_mysql_parse(thd, thd->query(), thd->query_length(), + &parser_state)) + { + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->reset_kill_query(); + thd->wsrep_retry_counter = 0; + mysql_mutex_unlock(&thd->LOCK_thd_data); + goto dispatch_end; + } + } + else +#endif /* WITH_WSREP */ + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); + + while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) && + ! thd->is_error()) + { + /* + Multiple queries exist, execute them individually + */ + char *beginning_of_next_stmt= (char*) parser_state.m_lip.found_semicolon; + + /* Finalize server status flags after executing a statement. */ + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + + mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_STATUS, + thd->get_stmt_da()->is_error() + ? thd->get_stmt_da()->sql_errno() + : 0, + command_name[command].str); + + ulong length= (ulong)(packet_end - beginning_of_next_stmt); + + log_slow_statement(thd); + DBUG_ASSERT(!thd->apc_target.is_enabled()); + + /* Remove garbage at start of query */ + while (length > 0 && my_isspace(thd->charset(), *beginning_of_next_stmt)) + { + beginning_of_next_stmt++; + length--; + } + + /* PSI end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + /* DTRACE end */ + if (MYSQL_QUERY_DONE_ENABLED()) + { + MYSQL_QUERY_DONE(thd->is_error()); + } + + thd->lex->restore_set_statement_var(); + +#if defined(ENABLED_PROFILING) + thd->profiling.finish_current_query(); + thd->profiling.start_new_query("continuing"); + thd->profiling.set_query_source(beginning_of_next_stmt, length); +#endif + + /* DTRACE begin */ + MYSQL_QUERY_START(beginning_of_next_stmt, thd->thread_id, + thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip); + + /* PSI begin */ + thd->m_digest= & thd->m_digest_state; + + thd->m_statement_psi= MYSQL_START_STATEMENT(&thd->m_statement_state, + com_statement_info[command].m_key, + thd->db.str, thd->db.length, + thd->charset(), NULL); + THD_STAGE_INFO(thd, stage_starting); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, beginning_of_next_stmt, + length); + + thd->set_query_and_id(beginning_of_next_stmt, length, + thd->charset(), next_query_id()); + + /* + Count each statement from the client. + */ + statistic_increment(thd->status_var.questions, &LOCK_status); + + if (!WSREP(thd)) + thd->set_time(); /* Reset the query start time. */ + + parser_state.reset(beginning_of_next_stmt, length); + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + if (wsrep_mysql_parse(thd, beginning_of_next_stmt, + length, &parser_state)) + { + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->reset_kill_query(); + thd->wsrep_retry_counter = 0; + mysql_mutex_unlock(&thd->LOCK_thd_data); + + goto dispatch_end; + } + } + else +#endif /* WITH_WSREP */ + mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + + } + + DBUG_PRINT("info",("query ready")); + break; + } + case COM_FIELD_LIST: // This isn't actually needed +#ifdef DONT_ALLOW_SHOW_COMMANDS + my_message(ER_NOT_ALLOWED_COMMAND, ER_THD(thd, ER_NOT_ALLOWED_COMMAND), + MYF(0)); /* purecov: inspected */ + break; +#else + { + char *fields, *packet_end= packet + packet_length, *arg_end; + /* Locked closure of all tables */ + TABLE_LIST table_list; + LEX_STRING table_name; + LEX_CSTRING db; + /* + SHOW statements should not add the used tables to the list of tables + used in a transaction. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + status_var_increment(thd->status_var.com_stat[SQLCOM_SHOW_FIELDS]); + if (thd->copy_db_to(&db)) + break; + /* + We have name + wildcard in packet, separated by endzero + (The packet is guaranteed to end with an end zero) + */ + arg_end= strend(packet); + uint arg_length= (uint)(arg_end - packet); + + /* Check given table name length. */ + if (packet_length - arg_length > NAME_LEN + 1 || arg_length > SAFE_NAME_LEN) + { + my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR), + MYF(0)); + break; + } + thd->convert_string(&table_name, system_charset_info, + packet, arg_length, thd->charset()); + if (check_table_name(table_name.str, table_name.length, FALSE)) + { + /* this is OK due to convert_string() null-terminating the string */ + my_error(ER_WRONG_TABLE_NAME, MYF(0), table_name.str); + break; + } + packet= arg_end + 1; + + lex_start(thd); + /* Must be before we init the table list. */ + if (lower_case_table_names) + { + table_name.length= my_casedn_str(files_charset_info, table_name.str); + db.length= my_casedn_str(files_charset_info, (char*) db.str); + } + table_list.init_one_table(&db, (LEX_CSTRING*) &table_name, 0, TL_READ); + /* + Init TABLE_LIST members necessary when the undelrying + table is view. + */ + table_list.select_lex= thd->lex->first_select_lex(); + thd->lex-> + first_select_lex()->table_list.link_in_list(&table_list, + &table_list.next_local); + thd->lex->add_to_query_tables(&table_list); + + if (is_infoschema_db(&table_list.db)) + { + ST_SCHEMA_TABLE *schema_table= find_schema_table(thd, &table_list.alias); + if (schema_table) + table_list.schema_table= schema_table; + } + + uint query_length= (uint) (packet_end - packet); // Don't count end \0 + if (!(fields= (char *) thd->memdup(packet, query_length + 1))) + break; + thd->set_query(fields, query_length); + general_log_print(thd, command, "%s %s", table_list.table_name.str, + fields); + + if (thd->open_temporary_tables(&table_list)) + break; + + if (check_table_access(thd, SELECT_ACL, &table_list, + TRUE, UINT_MAX, FALSE)) + break; + /* + Turn on an optimization relevant if the underlying table + is a view: do not fill derived tables. + */ + thd->lex->sql_command= SQLCOM_SHOW_FIELDS; + + mysqld_list_fields(thd,&table_list,fields); + thd->lex->unit.cleanup(); + /* No need to rollback statement transaction, it's not started. */ + DBUG_ASSERT(thd->transaction->stmt.is_empty()); + close_thread_tables(thd); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + + if (thd->transaction_rollback_request) + { + /* + Transaction rollback was requested since MDL deadlock was + discovered while trying to open tables. Rollback transaction + in all storage engines including binary log and release all + locks. + */ + trans_rollback_implicit(thd); + thd->release_transactional_locks(); + } + + thd->cleanup_after_query(); + break; + } +#endif + case COM_QUIT: + /* Note: We don't calculate statistics for this command */ + + /* Ensure that quit works even if max_mem_used is set */ + thd->variables.max_mem_used= LONGLONG_MAX; + general_log_print(thd, command, NullS); + net->error=0; // Don't give 'abort' message + thd->get_stmt_da()->disable_status(); // Don't send anything back + error=TRUE; // End server + break; +#ifndef EMBEDDED_LIBRARY + case COM_BINLOG_DUMP: + { + ulong pos; + ushort flags; + uint32 slave_server_id; + + status_var_increment(thd->status_var.com_other); + + thd->query_plan_flags|= QPLAN_ADMIN; + if (check_global_access(thd, PRIV_COM_BINLOG_DUMP)) + break; + + /* TODO: The following has to be changed to an 8 byte integer */ + pos = uint4korr(packet); + flags = uint2korr(packet + 4); + thd->variables.server_id=0; /* avoid suicide */ + if ((slave_server_id= uint4korr(packet+6))) // mysqlbinlog.server_id==0 + kill_zombie_dump_threads(slave_server_id); + thd->variables.server_id = slave_server_id; + + const char *name= packet + 10; + size_t nlen= strlen(name); + + general_log_print(thd, command, "Log: '%s' Pos: %lu", name, pos); + if (nlen < FN_REFLEN) + mysql_binlog_send(thd, thd->strmake(name, nlen), (my_off_t)pos, flags); + thd->unregister_slave(); // todo: can be extraneous + /* fake COM_QUIT -- if we get here, the thread needs to terminate */ + error = TRUE; + break; + } +#endif + case COM_REFRESH: + { + int not_used; + + /* + Initialize thd->lex since it's used in many base functions, such as + open_tables(). Otherwise, it remains unitialized and may cause crash + during execution of COM_REFRESH. + */ + lex_start(thd); + + status_var_increment(thd->status_var.com_stat[SQLCOM_FLUSH]); + ulonglong options= (ulonglong) (uchar) packet[0]; + if (trans_commit_implicit(thd)) + break; + thd->release_transactional_locks(); + if (check_global_access(thd,RELOAD_ACL)) + break; + general_log_print(thd, command, NullS); +#ifndef DBUG_OFF + bool debug_simulate= FALSE; + DBUG_EXECUTE_IF("simulate_detached_thread_refresh", debug_simulate= TRUE;); + if (debug_simulate) + { + /* This code doesn't work under FTWRL */ + DBUG_ASSERT(! (options & REFRESH_READ_LOCK)); + /* + Simulate a reload without a attached thread session. + Provides a environment similar to that of when the + server receives a SIGHUP signal and reloads caches + and flushes tables. + */ + bool res; + set_current_thd(0); + res= reload_acl_and_cache(NULL, options | REFRESH_FAST, + NULL, ¬_used); + set_current_thd(thd); + if (res) + break; + } + else +#endif + { + thd->lex->relay_log_connection_name= empty_clex_str; + if (reload_acl_and_cache(thd, options, (TABLE_LIST*) 0, ¬_used)) + break; + } + if (trans_commit_implicit(thd)) + break; + close_thread_tables(thd); + thd->release_transactional_locks(); + my_ok(thd); + break; + } +#ifndef EMBEDDED_LIBRARY + case COM_SHUTDOWN: + { + status_var_increment(thd->status_var.com_other); + if (check_global_access(thd,SHUTDOWN_ACL)) + break; /* purecov: inspected */ + /* + If the client is < 4.1.3, it is going to send us no argument; then + packet_length is 0, packet[0] is the end 0 of the packet. Note that + SHUTDOWN_DEFAULT is 0. If client is >= 4.1.3, the shutdown level is in + packet[0]. + */ + enum mysql_enum_shutdown_level level; + level= (enum mysql_enum_shutdown_level) (uchar) packet[0]; + thd->lex->is_shutdown_wait_for_slaves= false; // "deferred" cleanup + if (level == SHUTDOWN_DEFAULT) + level= SHUTDOWN_WAIT_ALL_BUFFERS; // soon default will be configurable + else if (level != SHUTDOWN_WAIT_ALL_BUFFERS) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "this shutdown level"); + break; + } + DBUG_PRINT("quit",("Got shutdown command for level %u", level)); + general_log_print(thd, command, NullS); + my_eof(thd); + kill_mysql(thd); + error=TRUE; + break; + } +#endif + case COM_STATISTICS: + { + STATUS_VAR *current_global_status_var; // Big; Don't allocate on stack + ulong uptime; + ulonglong queries_per_second1000; + char buff[250]; + uint buff_len= sizeof(buff); + + if (!(current_global_status_var= (STATUS_VAR*) + thd->alloc(sizeof(STATUS_VAR)))) + break; + general_log_print(thd, command, NullS); + status_var_increment(thd->status_var.com_stat[SQLCOM_SHOW_STATUS]); + *current_global_status_var= global_status_var; + calc_sum_of_all_status(current_global_status_var); + if (!(uptime= (ulong) (thd->start_time - server_start_time))) + queries_per_second1000= 0; + else + queries_per_second1000= thd->query_id * 1000 / uptime; +#ifndef EMBEDDED_LIBRARY + size_t length= +#endif + my_snprintf(buff, buff_len - 1, + "Uptime: %lu Threads: %u Questions: %lu " + "Slow queries: %lu Opens: %lu " + "Open tables: %u Queries per second avg: %u.%03u", + uptime, THD_count::value(), (ulong) thd->query_id, + current_global_status_var->long_query_count, + current_global_status_var->opened_tables, + tc_records(), + (uint) (queries_per_second1000 / 1000), + (uint) (queries_per_second1000 % 1000)); +#ifdef EMBEDDED_LIBRARY + /* Store the buffer in permanent memory */ + my_ok(thd, 0, 0, buff); +#else + (void) my_net_write(net, (uchar*) buff, length); + (void) net_flush(net); + thd->get_stmt_da()->disable_status(); +#endif + break; + } + case COM_PING: + status_var_increment(thd->status_var.com_other); + my_ok(thd); // Tell client we are alive + break; + case COM_PROCESS_INFO: + status_var_increment(thd->status_var.com_stat[SQLCOM_SHOW_PROCESSLIST]); + if (!thd->security_ctx->priv_user[0] && + check_global_access(thd, PRIV_COM_PROCESS_INFO)) + break; + general_log_print(thd, command, NullS); + mysqld_list_processes(thd, + thd->security_ctx->master_access & PRIV_COM_PROCESS_INFO ? + NullS : thd->security_ctx->priv_user, 0); + break; + case COM_PROCESS_KILL: + { + status_var_increment(thd->status_var.com_stat[SQLCOM_KILL]); + ulong id=(ulong) uint4korr(packet); + sql_kill(thd, id, KILL_CONNECTION_HARD, KILL_TYPE_ID); + break; + } + case COM_SET_OPTION: + { + status_var_increment(thd->status_var.com_stat[SQLCOM_SET_OPTION]); + uint opt_command= uint2korr(packet); + + switch (opt_command) { + case (int) MYSQL_OPTION_MULTI_STATEMENTS_ON: + thd->client_capabilities|= CLIENT_MULTI_STATEMENTS; + my_eof(thd); + break; + case (int) MYSQL_OPTION_MULTI_STATEMENTS_OFF: + thd->client_capabilities&= ~CLIENT_MULTI_STATEMENTS; + my_eof(thd); + break; + default: + my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR), + MYF(0)); + break; + } + break; + } + case COM_DEBUG: + status_var_increment(thd->status_var.com_other); + if (check_global_access(thd, PRIV_DEBUG)) + break; /* purecov: inspected */ + mysql_print_status(); + general_log_print(thd, command, NullS); + my_eof(thd); + break; + + case COM_SLEEP: + case COM_CONNECT: // Impossible here + case COM_TIME: // Impossible from client + case COM_DELAYED_INSERT: + case COM_END: + case COM_UNIMPLEMENTED: + default: + my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR), + MYF(0)); + break; + } + +dispatch_end: + /* + For the threadpool i.e if non-blocking call, if not all async operations + are finished, return without cleanup. The cleanup will be done on + later, when command execution is resumed. + */ + if (!blocking && !error && thd->async_state.pending_ops()) + { + DBUG_RETURN(DISPATCH_COMMAND_WOULDBLOCK); + } + +resume: + +#ifdef WITH_WSREP + /* + Next test should really be WSREP(thd), but that causes a failure when doing + 'set WSREP_ON=0' + */ + if (unlikely(wsrep_service_started)) + { + if (thd->killed == KILL_QUERY) + { + WSREP_DEBUG("THD is killed at dispatch_end"); + } + wsrep_after_command_before_result(thd); + if (wsrep_current_error(thd) && !wsrep_command_no_result(command)) + { + /* todo: Pass wsrep client state current error to override */ + wsrep_override_error(thd, wsrep_current_error(thd), + wsrep_current_error_status(thd)); + WSREP_LOG_THD(thd, "leave"); + } + if (WSREP(thd)) + { + /* + MDEV-10812 + In the case of COM_QUIT/COM_STMT_CLOSE thread status should be disabled. + */ + DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE) + || thd->get_stmt_da()->is_disabled()); + DBUG_ASSERT(thd->wsrep_trx().state() != wsrep::transaction::s_replaying); + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_thd_kill); + do_end_of_statement= thd_is_connection_alive(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + } + } +#endif /* WITH_WSREP */ + + + if (do_end_of_statement) + { + DBUG_ASSERT(thd->derived_tables == NULL && + (thd->open_tables == NULL || + (thd->locked_tables_mode == LTM_LOCK_TABLES))); + + thd_proc_info(thd, "Updating status"); + /* Finalize server status flags after executing a command. */ + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + } + if (drop_more_results) + thd->server_status&= ~SERVER_MORE_RESULTS_EXISTS; + + if (likely(!thd->is_error() && !thd->killed_errno())) + mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0); + + mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_STATUS, + thd->get_stmt_da()->is_error() ? + thd->get_stmt_da()->sql_errno() : 0, + command_name[command].str); + + thd->update_all_stats(); + + /* + Write to slow query log only those statements that received via the text + protocol except the EXECUTE statement. The reason we do that way is + that for statements received via binary protocol and for the EXECUTE + statement, the slow statements have been already written to slow query log + inside the method Prepared_statement::execute(). + */ + if(command == COM_QUERY && + thd->lex->sql_command != SQLCOM_EXECUTE) + log_slow_statement(thd); + else + delete_explain_query(thd->lex); + + THD_STAGE_INFO(thd, stage_cleaning_up); + thd->reset_query(); + + /* Performance Schema Interface instrumentation, end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->set_examined_row_count(0); // For processlist + thd->set_command(COM_SLEEP); + + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory + + thd->reset_kill_query(); /* Ensure that killed_errmsg is released */ + /* + LEX::m_sql_cmd can point to Sql_cmd allocated on thd->mem_root. + Unlink it now, before freeing the root. + */ + thd->lex->m_sql_cmd= NULL; + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + +#if defined(ENABLED_PROFILING) + thd->profiling.finish_current_query(); +#endif + if (MYSQL_QUERY_DONE_ENABLED() || MYSQL_COMMAND_DONE_ENABLED()) + { + int res __attribute__((unused)); + res= (int) thd->is_error(); + if (command == COM_QUERY) + { + MYSQL_QUERY_DONE(res); + } + MYSQL_COMMAND_DONE(res); + } + DEBUG_SYNC(thd,"dispatch_command_end"); + + /* Check that some variables are reset properly */ + DBUG_ASSERT(thd->abort_on_warning == 0); + thd->lex->restore_set_statement_var(); + DBUG_RETURN(error?DISPATCH_COMMAND_CLOSE_CONNECTION: DISPATCH_COMMAND_SUCCESS); +} + +static bool slow_filter_masked(THD *thd, ulonglong mask) +{ + return thd->variables.log_slow_filter && !(thd->variables.log_slow_filter & mask); +} + +/* + Log query to slow queries, if it passes filtering + + @note + This function must call delete_explain_query(). +*/ + +void log_slow_statement(THD *thd) +{ + DBUG_ENTER("log_slow_statement"); + + /* + The following should never be true with our current code base, + but better to keep this here so we don't accidently try to log a + statement in a trigger or stored function + */ + if (unlikely(thd->in_sub_stmt)) + goto end; // Don't set time for sub stmt + /* + Skip both long_query_count increment and logging if the current + statement forces slow log suppression (e.g. an SP statement). + + Note, we don't check for global_system_variables.sql_log_slow here. + According to the manual, the "Slow_queries" status variable does not require + sql_log_slow to be ON. So even if sql_log_slow is OFF, we still need to + continue and increment long_query_count (and skip only logging, see below): + */ + if (!thd->enable_slow_log) + goto end; // E.g. SP statement + + DBUG_EXECUTE_IF("simulate_slow_query", { + if (thd->get_command() == COM_QUERY || + thd->get_command() == COM_STMT_EXECUTE) + thd->server_status|= SERVER_QUERY_WAS_SLOW; + }); + + if ((thd->server_status & + (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) && + !(thd->query_plan_flags & QPLAN_STATUS) && + (thd->variables.log_slow_filter & QPLAN_NOT_USING_INDEX)) + { + thd->query_plan_flags|= QPLAN_NOT_USING_INDEX; + /* We are always logging no index queries if enabled in filter */ + thd->server_status|= SERVER_QUERY_WAS_SLOW; + } + + if ((thd->server_status & SERVER_QUERY_WAS_SLOW) && + thd->get_examined_row_count() >= thd->variables.min_examined_row_limit) + { + thd->status_var.long_query_count++; + + /* + until log_slow_disabled_statements=admin is removed, it + duplicates slow_log_filter=admin + */ + if ((thd->query_plan_flags & QPLAN_ADMIN) && + (thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_ADMIN)) + goto end; + + if (!global_system_variables.sql_log_slow || !thd->variables.sql_log_slow) + goto end; + + /* + If rate limiting of slow log writes is enabled, decide whether to log + this query to the log or not. + */ + if (thd->variables.log_slow_rate_limit > 1 && + (global_query_id % thd->variables.log_slow_rate_limit) != 0) + goto end; + + /* + Follow the slow log filter configuration: + skip logging if the current statement matches the filter. + */ + if (slow_filter_masked(thd, thd->query_plan_flags)) + goto end; + + THD_STAGE_INFO(thd, stage_logging_slow_query); + slow_log_print(thd, thd->query(), thd->query_length(), + thd->utime_after_query); + } + +end: + delete_explain_query(thd->lex); + DBUG_VOID_RETURN; +} + + +/** + Create a TABLE_LIST object for an INFORMATION_SCHEMA table. + + This function is used in the parser to convert a SHOW or DESCRIBE + table_name command to a SELECT from INFORMATION_SCHEMA. + It prepares a SELECT_LEX and a TABLE_LIST object to represent the + given command as a SELECT parse tree. + + @param thd thread handle + @param lex current lex + @param table_ident table alias if it's used + @param schema_table_idx the type of the INFORMATION_SCHEMA table to be + created + + @note + Due to the way this function works with memory and LEX it cannot + be used outside the parser (parse tree transformations outside + the parser break PS and SP). + + @retval + 0 success + @retval + 1 out of memory or SHOW commands are not allowed + in this version of the server. +*/ + +int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident, + enum enum_schema_tables schema_table_idx) +{ + SELECT_LEX *schema_select_lex= NULL; + DBUG_ENTER("prepare_schema_table"); + + switch (schema_table_idx) { + case SCH_SCHEMATA: +#if defined(DONT_ALLOW_SHOW_COMMANDS) + my_message(ER_NOT_ALLOWED_COMMAND, + ER_THD(thd, ER_NOT_ALLOWED_COMMAND), MYF(0)); + DBUG_RETURN(1); +#else + break; +#endif + + case SCH_TABLE_NAMES: + case SCH_TABLES: + case SCH_CHECK_CONSTRAINTS: + case SCH_VIEWS: + case SCH_TRIGGERS: + case SCH_EVENTS: +#ifdef DONT_ALLOW_SHOW_COMMANDS + my_message(ER_NOT_ALLOWED_COMMAND, + ER_THD(thd, ER_NOT_ALLOWED_COMMAND), MYF(0)); + DBUG_RETURN(1); +#else + { + if (lex->first_select_lex()->db.str == NULL && + lex->copy_db_to(&lex->first_select_lex()->db)) + { + DBUG_RETURN(1); + } + schema_select_lex= new (thd->mem_root) SELECT_LEX(); + schema_select_lex->table_list.first= NULL; + if (lower_case_table_names == 1) + lex->first_select_lex()->db.str= + thd->strdup(lex->first_select_lex()->db.str); + schema_select_lex->db= lex->first_select_lex()->db; + /* + check_db_name() may change db.str if lower_case_table_names == 1, + but that's ok as the db is allocted above in this case. + */ + if (check_db_name((LEX_STRING*) &lex->first_select_lex()->db)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), lex->first_select_lex()->db.str); + DBUG_RETURN(1); + } + break; + } +#endif + case SCH_COLUMNS: + case SCH_STATISTICS: +#ifdef DONT_ALLOW_SHOW_COMMANDS + my_message(ER_NOT_ALLOWED_COMMAND, + ER_THD(thd, ER_NOT_ALLOWED_COMMAND), MYF(0)); + DBUG_RETURN(1); +#else + { + DBUG_ASSERT(table_ident); + TABLE_LIST **query_tables_last= lex->query_tables_last; + schema_select_lex= new (thd->mem_root) SELECT_LEX(); + /* 'parent_lex' is used in init_query() so it must be before it. */ + schema_select_lex->parent_lex= lex; + schema_select_lex->init_query(); + schema_select_lex->select_number= 0; + if (!schema_select_lex->add_table_to_list(thd, table_ident, 0, 0, TL_READ, + MDL_SHARED_READ)) + DBUG_RETURN(1); + lex->query_tables_last= query_tables_last; + break; +#endif + } + case SCH_PROFILES: + /* + Mark this current profiling record to be discarded. We don't + wish to have SHOW commands show up in profiling. + */ +#if defined(ENABLED_PROFILING) + thd->profiling.discard_current_query(); +#endif + break; + default: + break; + } + if (schema_select_lex) + schema_select_lex->set_master_unit(&lex->unit); + SELECT_LEX *select_lex= lex->current_select; + if (make_schema_select(thd, select_lex, get_schema_table(schema_table_idx))) + DBUG_RETURN(1); + + select_lex->table_list.first->schema_select_lex= schema_select_lex; + DBUG_RETURN(0); +} + + +/** + Read query from packet and store in thd->query. + Used in COM_QUERY and COM_STMT_PREPARE. + + Sets the following THD variables: + - query + - query_length + + @retval + FALSE ok + @retval + TRUE error; In this case thd->fatal_error is set +*/ + +bool alloc_query(THD *thd, const char *packet, size_t packet_length) +{ + char *query; + /* Remove garbage at start and end of query */ + while (packet_length > 0 && my_isspace(thd->charset(), packet[0])) + { + packet++; + packet_length--; + } + const char *pos= packet + packet_length; // Point at end null + while (packet_length > 0 && + (pos[-1] == ';' || my_isspace(thd->charset() ,pos[-1]))) + { + pos--; + packet_length--; + } + /* We must allocate some extra memory for query cache + + The query buffer layout is: + buffer :== + The input statement(s) + '\0' Terminating null char (1 byte) + Length of following current database name (size_t) + Name of current database + Flags struct + */ + if (! (query= (char*) thd->memdup_w_gap(packet, + packet_length, + 1 + thd->db.length + + QUERY_CACHE_DB_LENGTH_SIZE + + QUERY_CACHE_FLAGS_SIZE))) + return TRUE; + query[packet_length]= '\0'; + /* + Space to hold the name of the current database is allocated. We + also store this length, in case current database is changed during + execution. We might need to reallocate the 'query' buffer + */ + int2store(query + packet_length + 1, thd->db.length); + + thd->set_query(query, packet_length); + + /* Reclaim some memory */ + thd->packet.shrink(thd->variables.net_buffer_length); + thd->convert_buffer.shrink(thd->variables.net_buffer_length); + + return FALSE; +} + + +bool sp_process_definer(THD *thd) +{ + DBUG_ENTER("sp_process_definer"); + + LEX *lex= thd->lex; + + /* + If the definer is not specified, this means that CREATE-statement missed + DEFINER-clause. DEFINER-clause can be missed in two cases: + + - The user submitted a statement w/o the clause. This is a normal + case, we should assign CURRENT_USER as definer. + + - Our slave received an updated from the master, that does not + replicate definer for stored rountines. We should also assign + CURRENT_USER as definer here, but also we should mark this routine + as NON-SUID. This is essential for the sake of backward + compatibility. + + The problem is the slave thread is running under "special" user (@), + that actually does not exist. In the older versions we do not fail + execution of a stored routine if its definer does not exist and + continue the execution under the authorization of the invoker + (BUG#13198). And now if we try to switch to slave-current-user (@), + we will fail. + + Actually, this leads to the inconsistent state of master and + slave (different definers, different SUID behaviour), but it seems, + this is the best we can do. + */ + + if (!lex->definer) + { + Query_arena original_arena; + Query_arena *ps_arena= thd->activate_stmt_arena_if_needed(&original_arena); + + lex->definer= create_default_definer(thd, false); + + if (ps_arena) + thd->restore_active_arena(ps_arena, &original_arena); + + /* Error has been already reported. */ + if (lex->definer == NULL) + DBUG_RETURN(TRUE); + + if (thd->slave_thread && lex->sphead) + lex->sphead->set_suid(SP_IS_NOT_SUID); + } + else + { + LEX_USER *d= get_current_user(thd, lex->definer); + if (!d) + DBUG_RETURN(TRUE); + if (d->user.str == public_name.str) + { + my_error(ER_INVALID_ROLE, MYF(0), lex->definer->user.str); + DBUG_RETURN(TRUE); + } + thd->change_item_tree((Item**)&lex->definer, (Item*)d); + + /* + If the specified definer differs from the current user or role, we + should check that the current user has SUPER privilege (in order + to create a stored routine under another user one must have + SUPER privilege). + */ + bool curuser= !strcmp(d->user.str, thd->security_ctx->priv_user); + bool currole= !curuser && !strcmp(d->user.str, thd->security_ctx->priv_role); + bool curuserhost= curuser && d->host.str && + !my_strcasecmp(system_charset_info, d->host.str, + thd->security_ctx->priv_host); + if (!curuserhost && !currole && + check_global_access(thd, PRIV_DEFINER_CLAUSE, false)) + DBUG_RETURN(TRUE); + } + + /* Check that the specified definer exists. Emit a warning if not. */ + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (!is_acl_user(lex->definer->host.str, lex->definer->user.str)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_NO_SUCH_USER, ER_THD(thd, ER_NO_SUCH_USER), + lex->definer->user.str, lex->definer->host.str); + } +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ + + DBUG_RETURN(FALSE); +} + + +/** + Auxiliary call that opens and locks tables for LOCK TABLES statement + and initializes the list of locked tables. + + @param thd Thread context. + @param tables List of tables to be locked. + + @return FALSE in case of success, TRUE in case of error. +*/ + +static bool __attribute__ ((noinline)) +lock_tables_open_and_lock_tables(THD *thd, TABLE_LIST *tables) +{ + Lock_tables_prelocking_strategy lock_tables_prelocking_strategy; + MDL_deadlock_and_lock_abort_error_handler deadlock_handler; + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + uint counter; + TABLE_LIST *table; + + thd->in_lock_tables= 1; + +retry: + + if (open_tables(thd, &tables, &counter, 0, &lock_tables_prelocking_strategy)) + goto err; + + for (table= tables; table; table= table->next_global) + { + if (!table->placeholder()) + { + if (table->table->s->tmp_table) + { + /* + We allow to change temporary tables even if they were locked for read + by LOCK TABLES. To avoid a discrepancy between lock acquired at LOCK + TABLES time and by the statement which is later executed under LOCK + TABLES we ensure that for temporary tables we always request a write + lock (such discrepancy can cause problems for the storage engine). + We don't set TABLE_LIST::lock_type in this case as this might result + in extra warnings from THD::decide_logging_format() even though + binary logging is totally irrelevant for LOCK TABLES. + */ + table->table->reginfo.lock_type= TL_WRITE; + } + else if (table->mdl_request.type == MDL_SHARED_READ && + ! table->prelocking_placeholder && + table->table->file->lock_count() == 0) + { + enum enum_mdl_type lock_type; + /* + In case when LOCK TABLE ... READ LOCAL was issued for table with + storage engine which doesn't support READ LOCAL option and doesn't + use THR_LOCK locks we need to upgrade weak SR metadata lock acquired + in open_tables() to stronger SRO metadata lock. + This is not needed for tables used through stored routines or + triggers as we always acquire SRO (or even stronger SNRW) metadata + lock for them. + */ + deadlock_handler.init(); + thd->push_internal_handler(&deadlock_handler); + + lock_type= table->table->mdl_ticket->get_type() == MDL_SHARED_WRITE ? + MDL_SHARED_NO_READ_WRITE : MDL_SHARED_READ_ONLY; + + bool result= thd->mdl_context.upgrade_shared_lock( + table->table->mdl_ticket, + lock_type, + thd->variables.lock_wait_timeout); + + thd->pop_internal_handler(); + + if (deadlock_handler.need_reopen()) + { + /* + Deadlock occurred during upgrade of metadata lock. + Let us restart acquring and opening tables for LOCK TABLES. + */ + close_tables_for_reopen(thd, &tables, mdl_savepoint); + if (thd->open_temporary_tables(tables)) + goto err; + goto retry; + } + + if (result) + goto err; + } + +#ifdef WITH_WSREP + if (WSREP(thd) && table->table->s->table_type == TABLE_TYPE_SEQUENCE) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "LOCK TABLE on SEQUENCES in Galera cluster"); + goto err; + } +#endif + + } + /* + Check privileges of view tables here, after views were opened. + Either definer or invoker has to have PRIV_LOCK_TABLES to be able + to lock view and its tables. For mysqldump (that locks views + before dumping their structures) compatibility we allow locking + views that select from I_S or P_S tables, but downrade the lock + to TL_READ + */ + if (table->belong_to_view && + check_single_table_access(thd, PRIV_LOCK_TABLES, table, 1)) + { + if (table->grant.m_internal.m_schema_access) + table->lock_type= TL_READ; + else + { + bool error= true; + if (Security_context *sctx= table->security_ctx) + { + table->security_ctx= 0; + error= check_single_table_access(thd, PRIV_LOCK_TABLES, table, 1); + table->security_ctx= sctx; + } + if (error) + { + my_error(ER_VIEW_INVALID, MYF(0), table->belong_to_view->view_db.str, + table->belong_to_view->view_name.str); + goto err; + } + } + } + } + + if (lock_tables(thd, tables, counter, 0) || + thd->locked_tables_list.init_locked_tables(thd)) + goto err; + + thd->in_lock_tables= 0; + + return FALSE; + +err: + thd->in_lock_tables= 0; + + trans_rollback_stmt(thd); + /* + Need to end the current transaction, so the storage engine (InnoDB) + can free its locks if LOCK TABLES locked some tables before finding + that it can't lock a table in its list + */ + trans_rollback(thd); + /* Close tables and release metadata locks. */ + close_thread_tables(thd); + DBUG_ASSERT(!thd->locked_tables_mode); + thd->release_transactional_locks(); + return TRUE; +} + + +static bool do_execute_sp(THD *thd, sp_head *sp) +{ + /* bits that should be cleared in thd->server_status */ + uint bits_to_be_cleared= 0; + ulonglong affected_rows; + if (sp->m_flags & sp_head::MULTI_RESULTS) + { + if (!(thd->client_capabilities & CLIENT_MULTI_RESULTS)) + { + /* The client does not support multiple result sets being sent back */ + my_error(ER_SP_BADSELECT, MYF(0), ErrConvDQName(sp).ptr()); + return 1; + } + } + /* + If SERVER_MORE_RESULTS_EXISTS is not set, + then remember that it should be cleared + */ + bits_to_be_cleared= (~thd->server_status & + SERVER_MORE_RESULTS_EXISTS); + thd->server_status|= SERVER_MORE_RESULTS_EXISTS; + ha_rows select_limit= thd->variables.select_limit; + thd->variables.select_limit= HA_POS_ERROR; + + /* + Reset current_select as it may point to random data as a + result of previous parsing. + */ + thd->lex->current_select= NULL; + thd->lex->in_sum_func= 0; // For Item_field::fix_fields() + + /* + We never write CALL statements into binlog: + - If the mode is non-prelocked, each statement will be logged + separately. + - If the mode is prelocked, the invoking statement will care + about writing into binlog. + So just execute the statement. + */ + int res= sp->execute_procedure(thd, &thd->lex->value_list); + + thd->variables.select_limit= select_limit; + thd->server_status&= ~bits_to_be_cleared; + + if (res) + { + DBUG_ASSERT(thd->is_error() || thd->killed); + return 1; // Substatement should already have sent error + } + + affected_rows= thd->affected_rows; // Affected rows for all sub statements + thd->affected_rows= 0; // Reset total, as my_ok() adds to it + my_ok(thd, affected_rows); + return 0; +} + + +static int __attribute__ ((noinline)) +mysql_create_routine(THD *thd, LEX *lex) +{ + DBUG_ASSERT(lex->sphead != 0); + DBUG_ASSERT(lex->sphead->m_db.str); /* Must be initialized in the parser */ + /* + Verify that the database name is allowed, optionally + lowercase it. + */ + if (check_db_name((LEX_STRING*) &lex->sphead->m_db)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), lex->sphead->m_db.str); + return true; + } + + if (check_access(thd, CREATE_PROC_ACL, lex->sphead->m_db.str, + NULL, NULL, 0, 0)) + return true; + + /* Checking the drop permissions if CREATE OR REPLACE is used */ + if (lex->create_info.or_replace()) + { + if (check_routine_access(thd, ALTER_PROC_ACL, &lex->sphead->m_db, + &lex->sphead->m_name, + Sp_handler::handler(lex->sql_command), 0)) + return true; + } + + const LEX_CSTRING *name= lex->sphead->name(); +#ifdef HAVE_DLOPEN + if (lex->sphead->m_handler->type() == SP_TYPE_FUNCTION) + { + udf_func *udf = find_udf(name->str, name->length); + + if (udf) + { + my_error(ER_UDF_EXISTS, MYF(0), name->str); + return true; + } + } +#endif + + if (sp_process_definer(thd)) + return true; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (!lex->sphead->m_handler->sp_create_routine(thd, lex->sphead)) + { +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* only add privileges if really neccessary */ + + Security_context security_context; + bool restore_backup_context= false; + Security_context *backup= NULL; + LEX_USER *definer= thd->lex->definer; + /* + We're going to issue an implicit GRANT statement so we close all + open tables. We have to keep metadata locks as this ensures that + this statement is atomic against concurent FLUSH TABLES WITH READ + LOCK. Deadlocks which can arise due to fact that this implicit + statement takes metadata locks should be detected by a deadlock + detector in MDL subsystem and reported as errors. + + TODO: Long-term we should either ensure that implicit GRANT statement + is written into binary log as a separate statement or make both + creation of routine and implicit GRANT parts of one fully atomic + statement. + */ + if (trans_commit_stmt(thd)) + goto wsrep_error_label; + close_thread_tables(thd); + /* + Check if the definer exists on slave, + then use definer privilege to insert routine privileges to mysql.procs_priv. + + For current user of SQL thread has GLOBAL_ACL privilege, + which doesn't any check routine privileges, + so no routine privilege record will insert into mysql.procs_priv. + */ + if (thd->slave_thread && is_acl_user(definer->host.str, definer->user.str)) + { + security_context.change_security_context(thd, &thd->lex->definer->user, + &thd->lex->definer->host, + &thd->lex->sphead->m_db, + &backup); + restore_backup_context= true; + } + + if (sp_automatic_privileges && !opt_noacl && + check_routine_access(thd, DEFAULT_CREATE_PROC_ACLS, + &lex->sphead->m_db, name, + Sp_handler::handler(lex->sql_command), 1)) + { + if (sp_grant_privileges(thd, lex->sphead->m_db.str, name->str, + Sp_handler::handler(lex->sql_command))) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_PROC_AUTO_GRANT_FAIL, ER_THD(thd, ER_PROC_AUTO_GRANT_FAIL)); + thd->clear_error(); + } + + /* + Restore current user with GLOBAL_ACL privilege of SQL thread + */ + if (restore_backup_context) + { + DBUG_ASSERT(thd->slave_thread == 1); + thd->security_ctx->restore_security_context(thd, backup); + } + +#endif + return false; + } + (void) trans_commit_stmt(thd); + +#if !defined(NO_EMBEDDED_ACCESS_CHECKS) || defined(WITH_WSREP) +wsrep_error_label: +#endif + return true; +} + + +/** + Prepare for CREATE DATABASE, ALTER DATABASE, DROP DATABASE. + + @param thd - current THD + @param want_access - access needed + @param dbname - the database name + + @retval false - Ok to proceed with CREATE/ALTER/DROP + @retval true - not OK to proceed (error, or filtered) + + Note, on slave this function returns true if the database + is in the ignore filter. The caller must distinguish this case + from other cases: bad database error, no access error. + This can be done by testing thd->is_error(). +*/ +static bool prepare_db_action(THD *thd, privilege_t want_access, + LEX_CSTRING *dbname) +{ + if (check_db_name((LEX_STRING*)dbname)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), dbname->str); + return true; + } + /* + If in a slave thread : + - CREATE DATABASE DB was certainly not preceded by USE DB. + - ALTER DATABASE DB may not be preceded by USE DB. + - DROP DATABASE DB may not be preceded by USE DB. + For that reason, db_ok() in sql/slave.cc did not check the + do_db/ignore_db. And as this query involves no tables, tables_ok() + was not called. So we have to check rules again here. + */ +#ifdef HAVE_REPLICATION + if (thd->slave_thread) + { + Rpl_filter *rpl_filter; + rpl_filter= thd->system_thread_info.rpl_sql_info->rpl_filter; + if (!rpl_filter->db_ok(dbname->str) || + !rpl_filter->db_ok_with_wild_table(dbname->str)) + { + my_message(ER_SLAVE_IGNORED_TABLE, + ER_THD(thd, ER_SLAVE_IGNORED_TABLE), MYF(0)); + return true; + } + } +#endif + return check_access(thd, want_access, dbname->str, NULL, NULL, 1, 0); +} + + +bool Sql_cmd_call::execute(THD *thd) +{ + TABLE_LIST *all_tables= thd->lex->query_tables; + sp_head *sp; + /* + This will cache all SP and SF and open and lock all tables + required for execution. + */ + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, + UINT_MAX, FALSE) || + open_and_lock_tables(thd, all_tables, TRUE, 0)) + return true; + + /* + By this moment all needed SPs should be in cache so no need to look + into DB. + */ + if (!(sp= m_handler->sp_find_routine(thd, m_name, true))) + { + /* + If the routine is not found, let's still check EXECUTE_ACL to decide + whether to return "Access denied" or "Routine does not exist". + */ + if (check_routine_access(thd, EXECUTE_ACL, &m_name->m_db, + &m_name->m_name, + &sp_handler_procedure, + false)) + return true; + /* + sp_find_routine can have issued an ER_SP_RECURSION_LIMIT error. + Send message ER_SP_DOES_NOT_EXIST only if procedure is not found in + cache. + */ + if (!sp_cache_lookup(&thd->sp_proc_cache, m_name)) + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "PROCEDURE", + ErrConvDQName(m_name).ptr()); + return true; + } + else + { + if (sp->check_execute_access(thd)) + return true; + /* + Check that the stored procedure doesn't contain Dynamic SQL + and doesn't return result sets: such stored procedures can't + be called from a function or trigger. + */ + if (thd->in_sub_stmt) + { + const char *where= (thd->in_sub_stmt & SUB_STMT_TRIGGER ? + "trigger" : "function"); + if (sp->is_not_allowed_in_function(where)) + return true; + } + + if (do_execute_sp(thd, sp)) + return true; + + /* + Disable slow log for the above call(), if calls are disabled. + Instead we will log the executed statements to the slow log. + */ + if (thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_CALL) + thd->enable_slow_log= 0; + } + return false; +} + + +/** + Check whether the SQL statement being processed is prepended by + SET STATEMENT clause and handle variables assignment if it is. + + @param thd thread handle + @param lex current lex + + @return false in case of success, true in case of error. +*/ + +bool run_set_statement_if_requested(THD *thd, LEX *lex) +{ + if (!lex->stmt_var_list.is_empty() && !thd->slave_thread) + { + Query_arena backup; + DBUG_PRINT("info", ("SET STATEMENT %d vars", lex->stmt_var_list.elements)); + + lex->old_var_list.empty(); + List_iterator_fast it(lex->stmt_var_list); + set_var_base *var; + + if (lex->set_arena_for_set_stmt(&backup)) + return true; + + MEM_ROOT *mem_root= thd->mem_root; + while ((var= it++)) + { + DBUG_ASSERT(var->is_system()); + set_var *o= NULL, *v= (set_var*)var; + if (!v->var->is_set_stmt_ok()) + { + my_error(ER_SET_STATEMENT_NOT_SUPPORTED, MYF(0), v->var->name.str); + lex->reset_arena_for_set_stmt(&backup); + lex->old_var_list.empty(); + lex->free_arena_for_set_stmt(); + return true; + } + if (v->var->session_is_default(thd)) + o= new set_var(thd,v->type, v->var, &v->base, NULL); + else + { + switch (v->var->option.var_type & GET_TYPE_MASK) + { + case GET_BIT: + case GET_BOOL: + case GET_INT: + case GET_LONG: + case GET_LL: + { + bool null_value; + longlong val= v->var->val_int(&null_value, thd, v->type, &v->base); + o= new set_var(thd, v->type, v->var, &v->base, + (null_value ? + (Item *) new (mem_root) Item_null(thd) : + (Item *) new (mem_root) Item_int(thd, val))); + } + break; + case GET_UINT: + case GET_ULONG: + case GET_ULL: + { + bool null_value; + ulonglong val= v->var->val_int(&null_value, thd, v->type, &v->base); + o= new set_var(thd, v->type, v->var, &v->base, + (null_value ? + (Item *) new (mem_root) Item_null(thd) : + (Item *) new (mem_root) Item_uint(thd, val))); + } + break; + case GET_DOUBLE: + { + bool null_value; + double val= v->var->val_real(&null_value, thd, v->type, &v->base); + o= new set_var(thd, v->type, v->var, &v->base, + (null_value ? + (Item *) new (mem_root) Item_null(thd) : + (Item *) new (mem_root) Item_float(thd, val, 1))); + } + break; + default: + case GET_NO_ARG: + case GET_DISABLED: + DBUG_ASSERT(0); + /* fall through */ + case 0: + case GET_FLAGSET: + case GET_ENUM: + case GET_SET: + case GET_STR: + case GET_STR_ALLOC: + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmp(buff, sizeof(buff), v->var->charset(thd)),*val; + val= v->var->val_str(&tmp, thd, v->type, &v->base); + if (val) + { + Item_string *str= + new (mem_root) Item_string(thd, v->var->charset(thd), + val->ptr(), val->length()); + o= new set_var(thd, v->type, v->var, &v->base, str); + } + else + o= new set_var(thd, v->type, v->var, &v->base, + new (mem_root) Item_null(thd)); + } + break; + } + } + DBUG_ASSERT(o); + lex->old_var_list.push_back(o, thd->mem_root); + } + lex->reset_arena_for_set_stmt(&backup); + + if (lex->old_var_list.is_empty()) + lex->free_arena_for_set_stmt(); + + if (thd->is_error() || + sql_set_variables(thd, &lex->stmt_var_list, false)) + { + if (!thd->is_error()) + my_error(ER_WRONG_ARGUMENTS, MYF(0), "SET"); + lex->restore_set_statement_var(); + return true; + } + /* + The value of last_insert_id is remembered in THD to be written to binlog + when it's used *the first time* in the statement. But SET STATEMENT + must read the old value of last_insert_id to be able to restore it at + the end. This should not count at "reading of last_insert_id" and + should not remember last_insert_id for binlog. That is, it should clear + stmt_depends_on_first_successful_insert_id_in_prev_stmt flag. + */ + if (!thd->in_sub_stmt) + { + thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; + } + } + return false; +} + + +/** + Execute command saved in thd and lex->sql_command. + + @param thd Thread handle + + @todo + - Invalidate the table in the query cache if something changed + after unlocking when changes become visible. + TODO: this is workaround. right way will be move invalidating in + the unlock procedure. + - TODO: use check_change_password() + + @retval + FALSE OK + @retval + TRUE Error +*/ + +int +mysql_execute_command(THD *thd, bool is_called_from_prepared_stmt) +{ + int res= 0; + int up_result= 0; + LEX *lex= thd->lex; + /* first SELECT_LEX (have special meaning for many of non-SELECTcommands) */ + SELECT_LEX *select_lex= lex->first_select_lex(); + /* first table of first SELECT_LEX */ + TABLE_LIST *first_table= select_lex->table_list.first; + /* list of all tables in query */ + TABLE_LIST *all_tables; + /* most outer SELECT_LEX_UNIT of query */ + SELECT_LEX_UNIT *unit= &lex->unit; +#ifdef HAVE_REPLICATION + /* have table map for update for multi-update statement (BUG#37051) */ + bool have_table_map_for_update= FALSE; + /* */ + Rpl_filter *rpl_filter; +#endif + DBUG_ENTER("mysql_execute_command"); + + // check that we correctly marked first table for data insertion + DBUG_ASSERT(!(sql_command_flags[lex->sql_command] & CF_INSERTS_DATA) || + first_table->for_insert_data); + + if (thd->security_ctx->password_expired && + lex->sql_command != SQLCOM_SET_OPTION && + lex->sql_command != SQLCOM_PREPARE && + lex->sql_command != SQLCOM_EXECUTE && + lex->sql_command != SQLCOM_DEALLOCATE_PREPARE) + { + my_error(ER_MUST_CHANGE_PASSWORD, MYF(0)); + DBUG_RETURN(1); + } + + DBUG_ASSERT(thd->transaction->stmt.is_empty() || thd->in_sub_stmt); + /* + Each statement or replication event which might produce deadlock + should handle transaction rollback on its own. So by the start of + the next statement transaction rollback request should be fulfilled + already. + */ + DBUG_ASSERT(! thd->transaction_rollback_request || thd->in_sub_stmt); + /* + In many cases first table of main SELECT_LEX have special meaning => + check that it is first table in global list and relink it first in + queries_tables list if it is necessary (we need such relinking only + for queries with subqueries in select list, in this case tables of + subqueries will go to global list first) + + all_tables will differ from first_table only if most upper SELECT_LEX + do not contain tables. + + Because of above in place where should be at least one table in most + outer SELECT_LEX we have following check: + DBUG_ASSERT(first_table == all_tables); + DBUG_ASSERT(first_table == all_tables && first_table != 0); + */ + lex->first_lists_tables_same(); + lex->fix_first_select_number(); + /* should be assigned after making first tables same */ + all_tables= lex->query_tables; + /* set context for commands which do not use setup_tables */ + select_lex-> + context.resolve_in_table_list_only(select_lex-> + table_list.first); + + /* + Remember last commmand executed, so that we can use it in functions called by + dispatch_command() + */ + thd->last_sql_command= lex->sql_command; + + /* + Reset warning count for each query that uses tables + A better approach would be to reset this for any commands + that is not a SHOW command or a select that only access local + variables, but for now this is probably good enough. + */ + if ((sql_command_flags[lex->sql_command] & CF_DIAGNOSTIC_STMT) != 0) + thd->get_stmt_da()->set_warning_info_read_only(TRUE); + else + { + thd->get_stmt_da()->set_warning_info_read_only(FALSE); + if (all_tables) + thd->get_stmt_da()->opt_clear_warning_info(thd->query_id); + } + +#ifdef HAVE_REPLICATION + if (unlikely(thd->slave_thread)) + { + if (lex->sql_command == SQLCOM_DROP_TRIGGER) + { + /* + When dropping a trigger, we need to load its table name + before checking slave filter rules. + */ + add_table_for_trigger(thd, thd->lex->spname, 1, &all_tables); + + if (!all_tables) + { + /* + If table name cannot be loaded, + it means the trigger does not exists possibly because + CREATE TRIGGER was previously skipped for this trigger + according to slave filtering rules. + Returning success without producing any errors in this case. + */ + if (!thd->lex->create_info.if_exists() && + !(thd->variables.option_bits & OPTION_IF_EXISTS)) + DBUG_RETURN(0); + /* + DROP TRIGGER IF NOT EXISTS will return without an error later + after possibly writing the query to a binlog + */ + } + else // force searching in slave.cc:tables_ok() + all_tables->updating= 1; + } + + /* + For fix of BUG#37051, the master stores the table map for update + in the Query_log_event, and the value is assigned to + thd->variables.table_map_for_update before executing the update + query. + + If thd->variables.table_map_for_update is set, then we are + replicating from a new master, we can use this value to apply + filter rules without opening all the tables. However If + thd->variables.table_map_for_update is not set, then we are + replicating from an old master, so we just skip this and + continue with the old method. And of course, the bug would still + exist for old masters. + */ + if (lex->sql_command == SQLCOM_UPDATE_MULTI && + thd->table_map_for_update) + { + have_table_map_for_update= TRUE; + table_map table_map_for_update= thd->table_map_for_update; + uint nr= 0; + TABLE_LIST *table; + for (table=all_tables; table; table=table->next_global, nr++) + { + if (table_map_for_update & ((table_map)1 << nr)) + table->updating= TRUE; + else + table->updating= FALSE; + } + + if (all_tables_not_ok(thd, all_tables)) + { + /* we warn the slave SQL thread */ + my_message(ER_SLAVE_IGNORED_TABLE, ER_THD(thd, ER_SLAVE_IGNORED_TABLE), + MYF(0)); + } + } + + /* + Check if statment should be skipped because of slave filtering + rules + + Exceptions are: + - UPDATE MULTI: For this statement, we want to check the filtering + rules later in the code + - SET: we always execute it (Not that many SET commands exists in + the binary log anyway -- only 4.1 masters write SET statements, + in 5.0 there are no SET statements in the binary log) + - DROP TEMPORARY TABLE IF EXISTS: we always execute it (otherwise we + have stale files on slave caused by exclusion of one tmp table). + */ + if (!(lex->sql_command == SQLCOM_UPDATE_MULTI) && + !(lex->sql_command == SQLCOM_SET_OPTION) && + !((lex->sql_command == SQLCOM_DROP_TABLE || + lex->sql_command == SQLCOM_DROP_SEQUENCE) && + lex->tmp_table() && lex->if_exists()) && + all_tables_not_ok(thd, all_tables)) + { + /* we warn the slave SQL thread */ + my_message(ER_SLAVE_IGNORED_TABLE, ER_THD(thd, ER_SLAVE_IGNORED_TABLE), + MYF(0)); + DBUG_RETURN(0); + } + /* + Execute deferred events first + */ + if (slave_execute_deferred_events(thd)) + DBUG_RETURN(-1); + } + else + { +#endif /* HAVE_REPLICATION */ + /* + When option readonly is set deny operations which change non-temporary + tables. Except for the replication thread and the 'super' users. + */ + if (deny_updates_if_read_only_option(thd, all_tables)) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + DBUG_RETURN(-1); + } +#ifdef HAVE_REPLICATION + } /* endif unlikely slave */ +#endif + Opt_trace_start ots(thd); + + /* store old value of binlog format */ + enum_binlog_format orig_binlog_format,orig_current_stmt_binlog_format; + + thd->get_binlog_format(&orig_binlog_format, + &orig_current_stmt_binlog_format); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + /* + change LOCK TABLE WRITE to transaction + */ + if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (table->lock_type >= TL_FIRST_WRITE) + { + lex->sql_command= SQLCOM_BEGIN; + thd->wsrep_converted_lock_session= true; + break; + } + } + } + if (lex->sql_command== SQLCOM_UNLOCK_TABLES && + thd->wsrep_converted_lock_session) + { + thd->wsrep_converted_lock_session= false; + lex->sql_command= SQLCOM_COMMIT; + lex->tx_release= TVL_NO; + } + + /* + * Bail out if DB snapshot has not been installed. We however, + * allow SET and SHOW queries and reads from information schema + * and dirty reads (if configured) + */ + if (!(thd->wsrep_applier) && + !(wsrep_ready_get() && wsrep_reject_queries == WSREP_REJECT_NONE) && + !(thd->variables.wsrep_dirty_reads && + (sql_command_flags[lex->sql_command] & CF_CHANGES_DATA) == 0) && + !wsrep_tables_accessible_when_detached(all_tables) && + lex->sql_command != SQLCOM_SET_OPTION && + lex->sql_command != SQLCOM_CHANGE_DB && + !(lex->sql_command == SQLCOM_SELECT && !all_tables) && + !wsrep_is_show_query(lex->sql_command)) + { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", MYF(0)); + goto error; + } + } +#endif /* WITH_WSREP */ + status_var_increment(thd->status_var.com_stat[lex->sql_command]); + thd->progress.report_to_client= MY_TEST(sql_command_flags[lex->sql_command] & + CF_REPORT_PROGRESS); + + DBUG_ASSERT(thd->transaction->stmt.modified_non_trans_table == FALSE); + + /* + Assign system variables with values specified by the clause + SET STATEMENT var1=value1 [, var2=value2, ...] FOR + if they are any. + */ + if (run_set_statement_if_requested(thd, lex)) + goto error; + + /* After SET STATEMENT is done, we can initialize the Optimizer Trace: */ + ots.init(thd, all_tables, lex->sql_command, &lex->var_list, thd->query(), + thd->query_length(), thd->variables.character_set_client); + + if (thd->lex->mi.connection_name.str == NULL) + thd->lex->mi.connection_name= thd->variables.default_master_connection; + + /* + Force statement logging for DDL commands to allow us to update + privilege, system or statistic tables directly without the updates + getting logged. + */ + if (!(sql_command_flags[lex->sql_command] & + (CF_CAN_GENERATE_ROW_EVENTS | CF_FORCE_ORIGINAL_BINLOG_FORMAT | + CF_STATUS_COMMAND))) + thd->set_binlog_format_stmt(); + + /* + End a active transaction so that this command will have it's + own transaction and will also sync the binary log. If a DDL is + not run in it's own transaction it may simply never appear on + the slave in case the outside transaction rolls back. + */ + if (stmt_causes_implicit_commit(thd, CF_IMPLICIT_COMMIT_BEGIN)) + { + /* + Note that this should never happen inside of stored functions + or triggers as all such statements prohibited there. + */ + DBUG_ASSERT(! thd->in_sub_stmt); + /* Statement transaction still should not be started. */ + DBUG_ASSERT(thd->transaction->stmt.is_empty()); + if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) + { + /* Commit the normal transaction if one is active. */ + bool commit_failed= trans_commit_implicit(thd); + /* Release metadata locks acquired in this transaction. */ + thd->release_transactional_locks(); + if (commit_failed) + { + WSREP_DEBUG("implicit commit failed, MDL released: %lld", + (longlong) thd->thread_id); + goto error; + } + } + thd->transaction->stmt.mark_trans_did_ddl(); +#ifdef WITH_WSREP + /* Clean up the previous transaction on implicit commit */ + if (WSREP_NNULL(thd) && wsrep_thd_is_local(thd) && + wsrep_after_statement(thd)) + { + goto error; + } +#endif /* WITH_WSREP */ + } + +#ifndef DBUG_OFF + if (lex->sql_command != SQLCOM_SET_OPTION) + DEBUG_SYNC(thd,"before_execute_sql_command"); +#endif + + /* + Check if we are in a read-only transaction and we're trying to + execute a statement which should always be disallowed in such cases. + + Note that this check is done after any implicit commits. + */ + if (thd->tx_read_only && + (sql_command_flags[lex->sql_command] & CF_DISALLOW_IN_RO_TRANS)) + { + my_error(ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION, MYF(0)); + goto error; + } + + /* + Close tables open by HANDLERs before executing DDL statement + which is going to affect those tables. + + This should happen before temporary tables are pre-opened as + otherwise we will get errors about attempt to re-open tables + if table to be changed is open through HANDLER. + + Note that even although this is done before any privilege + checks there is no security problem here as closing open + HANDLER doesn't require any privileges anyway. + */ + if (sql_command_flags[lex->sql_command] & CF_HA_CLOSE) + mysql_ha_rm_tables(thd, all_tables); + + /* + Pre-open temporary tables to simplify privilege checking + for statements which need this. + */ + if (sql_command_flags[lex->sql_command] & CF_PREOPEN_TMP_TABLES) + { + if (thd->open_temporary_tables(all_tables)) + goto error; + } + + if (sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) + thd->query_plan_flags|= QPLAN_STATUS; + if (sql_command_flags[lex->sql_command] & CF_ADMIN_COMMAND) + thd->query_plan_flags|= QPLAN_ADMIN; + + /* Start timeouts */ + thd->set_query_timer(); + +#ifdef WITH_WSREP + /* Check wsrep_mode rules before command execution. */ + if (WSREP_NNULL(thd) && + wsrep_thd_is_local(thd) && !wsrep_check_mode_before_cmd_execute(thd)) + goto error; + + /* + Always start a new transaction for a wsrep THD unless the + current command is DDL or explicit BEGIN. This will guarantee that + the THD is BF abortable even if it does not generate any + changes and takes only read locks. If the statement does not + start a multi STMT transaction, the wsrep_transaction is + committed as empty at the end of this function. + + Transaction is started for BEGIN in trans_begin(), for DDL the + implicit commit took care of committing previous transaction + above and a new transaction should not be started. + + Do not start transaction for stored procedures, it will be handled + internally in SP processing. + */ + if (WSREP_NNULL(thd) && + wsrep_thd_is_local(thd) && + lex->sql_command != SQLCOM_BEGIN && + lex->sql_command != SQLCOM_CALL && + lex->sql_command != SQLCOM_EXECUTE && + lex->sql_command != SQLCOM_EXECUTE_IMMEDIATE && + !(sql_command_flags[lex->sql_command] & CF_AUTO_COMMIT_TRANS)) + { + wsrep_start_trx_if_not_started(thd); + } +#endif /* WITH_WSREP */ + + switch (lex->sql_command) { + + case SQLCOM_SHOW_EVENTS: +#ifndef HAVE_EVENT_SCHEDULER + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "embedded server"); + break; +#endif + case SQLCOM_SHOW_STATUS: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + execute_show_status(thd, all_tables); + + break; + } + case SQLCOM_SHOW_EXPLAIN: + case SQLCOM_SHOW_ANALYZE: + { + if (!thd->security_ctx->priv_user[0] && + check_global_access(thd, PRIV_STMT_SHOW_EXPLAIN)) + break; + + /* + The select should use only one table, it's the SHOW EXPLAIN pseudo-table + */ + if (lex->sroutines.records || lex->query_tables->next_global) + { + my_message(ER_SET_CONSTANTS_ONLY, ER_THD(thd, ER_SET_CONSTANTS_ONLY), + MYF(0)); + goto error; + } + + Item **it= lex->value_list.head_ref(); + if (!(*it)->basic_const_item() || + (*it)->fix_fields_if_needed_for_scalar(lex->thd, it)) + { + my_message(ER_SET_CONSTANTS_ONLY, ER_THD(thd, ER_SET_CONSTANTS_ONLY), + MYF(0)); + goto error; + } + } + /* fall through */ + case SQLCOM_SHOW_STATUS_PROC: + case SQLCOM_SHOW_STATUS_FUNC: + case SQLCOM_SHOW_STATUS_PACKAGE: + case SQLCOM_SHOW_STATUS_PACKAGE_BODY: + case SQLCOM_SHOW_DATABASES: + case SQLCOM_SHOW_TABLES: + case SQLCOM_SHOW_TRIGGERS: + case SQLCOM_SHOW_TABLE_STATUS: + case SQLCOM_SHOW_OPEN_TABLES: + case SQLCOM_SHOW_GENERIC: + case SQLCOM_SHOW_PLUGINS: + case SQLCOM_SHOW_FIELDS: + case SQLCOM_SHOW_KEYS: + case SQLCOM_SHOW_VARIABLES: + case SQLCOM_SHOW_CHARSETS: + case SQLCOM_SHOW_COLLATIONS: + case SQLCOM_SHOW_STORAGE_ENGINES: + case SQLCOM_SHOW_PROFILE: + case SQLCOM_SELECT: + { +#ifdef WITH_WSREP + if (lex->sql_command == SQLCOM_SELECT) + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_READ); + } + else + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); +# ifdef ENABLED_PROFILING + if (lex->sql_command == SQLCOM_SHOW_PROFILE) + thd->profiling.discard_current_query(); +# endif + } +#endif /* WITH_WSREP */ + + thd->status_var.last_query_cost= 0.0; + + /* + lex->exchange != NULL implies SELECT .. INTO OUTFILE and this + requires FILE_ACL access. + */ + privilege_t privileges_requested= lex->exchange ? SELECT_ACL | FILE_ACL : + SELECT_ACL; + + if (all_tables) + res= check_table_access(thd, + privileges_requested, + all_tables, FALSE, UINT_MAX, FALSE); + else + res= check_access(thd, privileges_requested, any_db.str, NULL,NULL,0,0); + + if (!res) + res= execute_sqlcom_select(thd, all_tables); + + break; + } + case SQLCOM_EXECUTE_IMMEDIATE: + { + mysql_sql_stmt_execute_immediate(thd); + break; + } + case SQLCOM_PREPARE: + { + mysql_sql_stmt_prepare(thd); + break; + } + case SQLCOM_EXECUTE: + { + mysql_sql_stmt_execute(thd); + break; + } + case SQLCOM_DEALLOCATE_PREPARE: + { + mysql_sql_stmt_close(thd); + break; + } + case SQLCOM_DO: + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE) + || open_and_lock_tables(thd, all_tables, TRUE, 0)) + goto error; + + res= mysql_do(thd, *lex->insert_list); + break; + + case SQLCOM_EMPTY_QUERY: + my_ok(thd); + break; + + case SQLCOM_HELP: + res= mysqld_help(thd,lex->help_arg); + break; + +#ifndef EMBEDDED_LIBRARY + case SQLCOM_PURGE: + { + if (check_global_access(thd, PRIV_STMT_PURGE_BINLOG)) + goto error; + /* PURGE MASTER LOGS TO 'file' */ + res = purge_master_logs(thd, lex->to_log); + break; + } + case SQLCOM_PURGE_BEFORE: + { + Item *it; + + if (check_global_access(thd, PRIV_STMT_PURGE_BINLOG)) + goto error; + /* PURGE MASTER LOGS BEFORE 'data' */ + it= (Item *)lex->value_list.head(); + if (it->fix_fields_if_needed_for_scalar(lex->thd, &it)) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "PURGE LOGS BEFORE"); + goto error; + } + it= new (thd->mem_root) Item_func_unix_timestamp(thd, it); + it->fix_fields(thd, &it); + res = purge_master_logs_before_date(thd, (ulong)it->val_int()); + break; + } +#endif + case SQLCOM_SHOW_WARNS: + { + res= mysqld_show_warnings(thd, (ulong) + ((1L << (uint) Sql_condition::WARN_LEVEL_NOTE) | + (1L << (uint) Sql_condition::WARN_LEVEL_WARN) | + (1L << (uint) Sql_condition::WARN_LEVEL_ERROR) + )); + break; + } + case SQLCOM_SHOW_ERRORS: + { + res= mysqld_show_warnings(thd, (ulong) + (1L << (uint) Sql_condition::WARN_LEVEL_ERROR)); + break; + } + case SQLCOM_SHOW_PROFILES: + { +#if defined(ENABLED_PROFILING) + thd->profiling.discard_current_query(); + res= thd->profiling.show_profiles(); + if (res) + goto error; +#else + my_error(ER_FEATURE_DISABLED, MYF(0), "SHOW PROFILES", "enable-profiling"); + goto error; +#endif + break; + } + +#ifdef HAVE_REPLICATION + case SQLCOM_SHOW_SLAVE_HOSTS: + { + if (check_global_access(thd, PRIV_STMT_SHOW_SLAVE_HOSTS)) + goto error; + res = show_slave_hosts(thd); + break; + } + case SQLCOM_SHOW_RELAYLOG_EVENTS: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + if (check_global_access(thd, PRIV_STMT_SHOW_RELAYLOG_EVENTS)) + goto error; + res = mysql_show_binlog_events(thd); + break; + } + case SQLCOM_SHOW_BINLOG_EVENTS: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + if (check_global_access(thd, PRIV_STMT_SHOW_BINLOG_EVENTS)) + goto error; + res = mysql_show_binlog_events(thd); + break; + } +#endif + + case SQLCOM_ASSIGN_TO_KEYCACHE: + { + DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (check_access(thd, INDEX_ACL, first_table->db.str, + &first_table->grant.privilege, + &first_table->grant.m_internal, + 0, 0)) + goto error; + res= mysql_assign_to_keycache(thd, first_table, &lex->ident); + break; + } + case SQLCOM_PRELOAD_KEYS: + { + DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (check_access(thd, INDEX_ACL, first_table->db.str, + &first_table->grant.privilege, + &first_table->grant.m_internal, + 0, 0)) + goto error; + res = mysql_preload_keys(thd, first_table); + break; + } +#ifdef HAVE_REPLICATION + case SQLCOM_CHANGE_MASTER: + { + LEX_MASTER_INFO *lex_mi= &thd->lex->mi; + Master_info *mi; + bool new_master= 0; + bool master_info_added; + + if (check_global_access(thd, PRIV_STMT_CHANGE_MASTER)) + goto error; + /* + In this code it's ok to use LOCK_active_mi as we are adding new things + into master_info_index + */ + mysql_mutex_lock(&LOCK_active_mi); + if (!master_info_index) + { + mysql_mutex_unlock(&LOCK_active_mi); + my_error(ER_SERVER_SHUTDOWN, MYF(0)); + goto error; + } + + mi= master_info_index->get_master_info(&lex_mi->connection_name, + Sql_condition::WARN_LEVEL_NOTE); + + if (mi == NULL) + { + /* New replication created */ + mi= new Master_info(&lex_mi->connection_name, relay_log_recovery); + if (unlikely(!mi || mi->error())) + { + delete mi; + res= 1; + mysql_mutex_unlock(&LOCK_active_mi); + break; + } + new_master= 1; + } + + res= change_master(thd, mi, &master_info_added); + if (res && new_master) + { + /* + If the new master was added by change_master(), remove it as it didn't + work (this will free mi as well). + + If new master was not added, we still need to free mi. + */ + if (master_info_added) + master_info_index->remove_master_info(mi, 1); + else + delete mi; + } + else + { + mi->rpl_filter= get_or_create_rpl_filter(lex_mi->connection_name.str, + lex_mi->connection_name.length); + } + + mysql_mutex_unlock(&LOCK_active_mi); + break; + } + + case SQLCOM_SHOW_BINLOG_STAT: + { + /* Accept one of two privileges */ + if (check_global_access(thd, PRIV_STMT_SHOW_BINLOG_STATUS)) + goto error; + res = show_binlog_info(thd); + break; + } + +#endif /* HAVE_REPLICATION */ + case SQLCOM_SHOW_ENGINE_STATUS: + { + if (check_global_access(thd, PRIV_STMT_SHOW_ENGINE_STATUS)) + goto error; + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_STATUS); + break; + } + case SQLCOM_SHOW_ENGINE_MUTEX: + { + if (check_global_access(thd, PRIV_STMT_SHOW_ENGINE_MUTEX)) + goto error; + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_MUTEX); + break; + } + case SQLCOM_DROP_INDEX: + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + /* fall through */ + case SQLCOM_CREATE_INDEX: + /* + CREATE INDEX and DROP INDEX are implemented by calling ALTER + TABLE with proper arguments. + + In the future ALTER TABLE will notice that the request is to + only add indexes and create these one by one for the existing + table without having to do a full rebuild. + */ + { + /* Prepare stack copies to be re-execution safe */ + Table_specification_st create_info; + Alter_info alter_info(lex->alter_info, thd->mem_root); + + if (unlikely(thd->is_fatal_error)) /* out of memory creating alter_info */ + goto error; + + DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (check_one_table_access(thd, INDEX_ACL, all_tables)) + goto error; /* purecov: inspected */ + + create_info.init(); + create_info.db_type= 0; + create_info.row_type= ROW_TYPE_NOT_USED; + create_info.alter_info= &alter_info; + + WSREP_TO_ISOLATION_BEGIN(first_table->db.str, first_table->table_name.str, NULL); + + Recreate_info recreate_info; + res= mysql_alter_table(thd, &first_table->db, &first_table->table_name, + &create_info, first_table, + &recreate_info, &alter_info, + 0, (ORDER*) 0, 0, lex->if_exists()); + break; + } +#ifdef HAVE_REPLICATION + case SQLCOM_SLAVE_START: + { + LEX_MASTER_INFO* lex_mi= &thd->lex->mi; + Master_info *mi; + int load_error; + + load_error= rpl_load_gtid_slave_state(thd); + + /* + We don't need to ensure that only one user is using master_info + as start_slave is protected against simultaneous usage + */ + if (unlikely((mi= get_master_info(&lex_mi->connection_name, + Sql_condition::WARN_LEVEL_ERROR)))) + { + if (load_error) + { + /* + We cannot start a slave using GTID if we cannot load the + GTID position from the mysql.gtid_slave_pos table. But we + can allow non-GTID replication (useful eg. during upgrade). + */ + if (mi->using_gtid != Master_info::USE_GTID_NO) + { + mi->release(); + break; + } + else + thd->clear_error(); + } + if (!start_slave(thd, mi, 1 /* net report*/)) + my_ok(thd); + mi->release(); + } + break; + } + case SQLCOM_SLAVE_STOP: + { + LEX_MASTER_INFO *lex_mi; + Master_info *mi; + /* + If the client thread has locked tables, a deadlock is possible. + Assume that + - the client thread does LOCK TABLE t READ. + - then the master updates t. + - then the SQL slave thread wants to update t, + so it waits for the client thread because t is locked by it. + - then the client thread does SLAVE STOP. + SLAVE STOP waits for the SQL slave thread to terminate its + update t, which waits for the client thread because t is locked by it. + To prevent that, refuse SLAVE STOP if the + client thread has locked tables + */ + if (thd->locked_tables_mode || + thd->in_active_multi_stmt_transaction() || + thd->global_read_lock.is_acquired()) + { + my_message(ER_LOCK_OR_ACTIVE_TRANSACTION, + ER_THD(thd, ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); + goto error; + } + + lex_mi= &thd->lex->mi; + if ((mi= get_master_info(&lex_mi->connection_name, + Sql_condition::WARN_LEVEL_ERROR))) + { + if (stop_slave(thd, mi, 1/* net report*/)) + res= 1; + mi->release(); + if (rpl_parallel_resize_pool_if_no_slaves()) + res= 1; + if (!res) + my_ok(thd); + } + break; + } + case SQLCOM_SLAVE_ALL_START: + { + mysql_mutex_lock(&LOCK_active_mi); + if (master_info_index && !master_info_index->start_all_slaves(thd)) + my_ok(thd); + mysql_mutex_unlock(&LOCK_active_mi); + break; + } + case SQLCOM_SLAVE_ALL_STOP: + { + if (thd->locked_tables_mode || + thd->in_active_multi_stmt_transaction() || + thd->global_read_lock.is_acquired()) + { + my_message(ER_LOCK_OR_ACTIVE_TRANSACTION, + ER_THD(thd, ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); + goto error; + } + mysql_mutex_lock(&LOCK_active_mi); + if (master_info_index && !master_info_index->stop_all_slaves(thd)) + my_ok(thd); + mysql_mutex_unlock(&LOCK_active_mi); + break; + } +#endif /* HAVE_REPLICATION */ + case SQLCOM_RENAME_TABLE: + { + if (check_rename_table(thd, first_table, all_tables)) + goto error; + + WSREP_TO_ISOLATION_BEGIN(0, 0, first_table); + + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + + if (mysql_rename_tables(thd, first_table, 0, lex->if_exists())) + goto error; + break; + } +#ifndef EMBEDDED_LIBRARY + case SQLCOM_SHOW_BINLOGS: +#ifdef DONT_ALLOW_SHOW_COMMANDS + my_message(ER_NOT_ALLOWED_COMMAND, ER_THD(thd, ER_NOT_ALLOWED_COMMAND), + MYF(0)); /* purecov: inspected */ + goto error; +#else + { + if (check_global_access(thd, PRIV_STMT_SHOW_BINARY_LOGS)) + goto error; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + res = show_binlogs(thd); + break; + } +#endif +#endif /* EMBEDDED_LIBRARY */ + case SQLCOM_SHOW_CREATE: + { + DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef DONT_ALLOW_SHOW_COMMANDS + my_message(ER_NOT_ALLOWED_COMMAND, ER_THD(thd, ER_NOT_ALLOWED_COMMAND), + MYF(0)); /* purecov: inspected */ + goto error; +#else + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + + /* + Access check: + SHOW CREATE TABLE require any privileges on the table level (ie + effecting all columns in the table). + SHOW CREATE VIEW require the SHOW_VIEW and SELECT ACLs on the table + level. + NOTE: SHOW_VIEW ACL is checked when the view is created. + */ + + DBUG_PRINT("debug", ("lex->only_view: %d, table: %s.%s", + lex->table_type == TABLE_TYPE_VIEW, + first_table->db.str, first_table->table_name.str)); + res= mysqld_show_create(thd, first_table); + break; +#endif + } + case SQLCOM_CHECKSUM: + { + DBUG_ASSERT(first_table == all_tables && first_table != 0); + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_READ); + + if (check_table_access(thd, SELECT_ACL, all_tables, + FALSE, UINT_MAX, FALSE)) + goto error; /* purecov: inspected */ + + res = mysql_checksum_table(thd, first_table, &lex->check_opt); + break; + } + case SQLCOM_UPDATE: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + ha_rows found= 0, updated= 0; + DBUG_ASSERT(first_table == all_tables && first_table != 0); + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + + if (update_precheck(thd, all_tables)) + break; + + /* + UPDATE IGNORE can be unsafe. We therefore use row based + logging if mixed or row based logging is available. + TODO: Check if the order of the output of the select statement is + deterministic. Waiting for BUG#42415 + */ + if (lex->ignore) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_UPDATE_IGNORE); + + DBUG_ASSERT(select_lex->limit_params.offset_limit == 0); + unit->set_limit(select_lex); + MYSQL_UPDATE_START(thd->query()); + res= up_result= mysql_update(thd, all_tables, + select_lex->item_list, + lex->value_list, + select_lex->where, + select_lex->order_list.elements, + select_lex->order_list.first, + unit->lim.get_select_limit(), + lex->ignore, &found, &updated); + MYSQL_UPDATE_DONE(res, found, updated); + /* mysql_update return 2 if we need to switch to multi-update */ + if (up_result != 2) + break; + if (thd->lex->period_conditions.is_set()) + { + DBUG_ASSERT(0); // Should never happen + goto error; + } + } + /* fall through */ + case SQLCOM_UPDATE_MULTI: + { + DBUG_ASSERT(first_table == all_tables && first_table != 0); + /* if we switched from normal update, rights are checked */ + if (up_result != 2) + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + if ((res= multi_update_precheck(thd, all_tables))) + break; + } + else + res= 0; + + unit->set_limit(select_lex); + /* + We can not use mysql_explain_union() because of parameters of + mysql_select in mysql_multi_update so just set the option if needed + */ + if (thd->lex->describe) + { + select_lex->set_explain_type(FALSE); + select_lex->options|= SELECT_DESCRIBE; + } + + res= mysql_multi_update_prepare(thd); + +#ifdef HAVE_REPLICATION + /* Check slave filtering rules */ + if (unlikely(thd->slave_thread && !have_table_map_for_update)) + { + if (all_tables_not_ok(thd, all_tables)) + { + if (res!= 0) + { + res= 0; /* don't care of prev failure */ + thd->clear_error(); /* filters are of highest prior */ + } + /* we warn the slave SQL thread */ + my_error(ER_SLAVE_IGNORED_TABLE, MYF(0)); + break; + } + if (res) + break; + } + else + { +#endif /* HAVE_REPLICATION */ + if (res) + break; + if (opt_readonly && + !(thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY) && + some_non_temp_table_to_be_updated(thd, all_tables)) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + break; + } +#ifdef HAVE_REPLICATION + } /* unlikely */ +#endif + { + multi_update *result_obj; + MYSQL_MULTI_UPDATE_START(thd->query()); + res= mysql_multi_update(thd, all_tables, + &select_lex->item_list, + &lex->value_list, + select_lex->where, + select_lex->options, + lex->duplicates, + lex->ignore, + unit, + select_lex, + &result_obj); + if (result_obj) + { + MYSQL_MULTI_UPDATE_DONE(res, result_obj->num_found(), + result_obj->num_updated()); + res= FALSE; /* Ignore errors here */ + delete result_obj; + } + else + { + MYSQL_MULTI_UPDATE_DONE(1, 0, 0); + } + } + break; + } + case SQLCOM_REPLACE: + if ((res= generate_incident_event(thd))) + break; + /* fall through */ + case SQLCOM_INSERT: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE); + select_result *sel_result= NULL; + DBUG_ASSERT(first_table == all_tables && first_table != 0); + + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE); + + /* + Since INSERT DELAYED doesn't support temporary tables, we could + not pre-open temporary tables for SQLCOM_INSERT / SQLCOM_REPLACE. + Open them here instead. + */ + if (first_table->lock_type != TL_WRITE_DELAYED) + { + res= (thd->open_temporary_tables(all_tables)) ? TRUE : FALSE; + if (res) + break; + } + + if ((res= insert_precheck(thd, all_tables))) + break; + + MYSQL_INSERT_START(thd->query()); + Protocol* save_protocol=NULL; + + if (lex->has_returning()) + { + status_var_increment(thd->status_var.feature_insert_returning); + + /* This is INSERT ... RETURNING. It will return output to the client */ + if (thd->lex->analyze_stmt) + { + /* + Actually, it is ANALYZE .. INSERT .. RETURNING. We need to produce + output and then discard it. + */ + sel_result= new (thd->mem_root) select_send_analyze(thd); + save_protocol= thd->protocol; + thd->protocol= new Protocol_discard(thd); + } + else + { + if (!(sel_result= new (thd->mem_root) select_send(thd))) + goto error; + } + } + + res= mysql_insert(thd, all_tables, lex->field_list, lex->many_values, + lex->update_list, lex->value_list, + lex->duplicates, lex->ignore, sel_result); + if (save_protocol) + { + delete thd->protocol; + thd->protocol= save_protocol; + } + if (!res && thd->lex->analyze_stmt) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + res= thd->lex->explain->send_explain(thd, extended); + } + delete sel_result; + MYSQL_INSERT_DONE(res, (ulong) thd->get_row_count_func()); + /* + If we have inserted into a VIEW, and the base table has + AUTO_INCREMENT column, but this column is not accessible through + a view, then we should restore LAST_INSERT_ID to the value it + had before the statement. + */ + if (first_table->view && !first_table->contain_auto_increment) + thd->first_successful_insert_id_in_cur_stmt= + thd->first_successful_insert_id_in_prev_stmt; + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("after_mysql_insert", + { + const char act1[]= "now wait_for signal.continue"; + const char act2[]= "now signal signal.continued"; + DBUG_ASSERT(debug_sync_service); + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act1))); + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act2))); + };); + DEBUG_SYNC(thd, "after_mysql_insert"); +#endif + break; + } + case SQLCOM_REPLACE_SELECT: + case SQLCOM_INSERT_SELECT: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE); + select_insert *sel_result; + select_result *result= NULL; + bool explain= MY_TEST(lex->describe); + DBUG_ASSERT(first_table == all_tables && first_table != 0); + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + + if ((res= insert_precheck(thd, all_tables))) + break; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_consistency_check == CONSISTENCY_CHECK_DECLARED) + { + thd->wsrep_consistency_check = CONSISTENCY_CHECK_RUNNING; + WSREP_TO_ISOLATION_BEGIN(first_table->db.str, first_table->table_name.str, NULL); + } +#endif /* WITH_WSREP */ + + /* + INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/ + INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY + clause is used in SELECT statement. We therefore use row based + logging if mixed or row based logging is available. + TODO: Check if the order of the output of the select statement is + deterministic. Waiting for BUG#42415 + */ + if (lex->sql_command == SQLCOM_INSERT_SELECT && + lex->duplicates == DUP_UPDATE) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_SELECT_UPDATE); + + if (lex->sql_command == SQLCOM_INSERT_SELECT && lex->ignore) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_IGNORE_SELECT); + + if (lex->sql_command == SQLCOM_REPLACE_SELECT) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_REPLACE_SELECT); + + /* Fix lock for first table */ + if (first_table->lock_type == TL_WRITE_DELAYED) + first_table->lock_type= TL_WRITE; + + /* Don't unlock tables until command is written to binary log */ + select_lex->options|= SELECT_NO_UNLOCK; + + unit->set_limit(select_lex); + + if (!(res=open_and_lock_tables(thd, all_tables, TRUE, 0))) + { + MYSQL_INSERT_SELECT_START(thd->query()); + /* + Only the INSERT table should be merged. Other will be handled by + select. + */ + + Protocol* save_protocol=NULL; + + if (lex->has_returning()) + { + status_var_increment(thd->status_var.feature_insert_returning); + + /* This is INSERT ... RETURNING. It will return output to the client */ + if (thd->lex->analyze_stmt) + { + /* + Actually, it is ANALYZE .. INSERT .. RETURNING. We need to produce + output and then discard it. + */ + result= new (thd->mem_root) select_send_analyze(thd); + save_protocol= thd->protocol; + thd->protocol= new Protocol_discard(thd); + } + else + { + if (!(result= new (thd->mem_root) select_send(thd))) + goto error; + } + } + + /* Skip first table, which is the table we are inserting in */ + TABLE_LIST *second_table= first_table->next_local; + /* + This is a hack: this leaves select_lex->table_list in an inconsistent + state as 'elements' does not contain number of elements in the list. + Moreover, if second_table == NULL then 'next' becomes invalid. + TODO: fix it by removing the front element (restoring of it should + be done properly as well) + */ + select_lex->table_list.first= second_table; + select_lex->context.table_list= + select_lex->context.first_name_resolution_table= second_table; + res= mysql_insert_select_prepare(thd, result); + if (!res && + (sel_result= new (thd->mem_root) + select_insert(thd, first_table, + first_table->table, + &lex->field_list, + &lex->update_list, + &lex->value_list, + lex->duplicates, + lex->ignore, + result))) + { + if (lex->analyze_stmt) + ((select_result_interceptor*)sel_result)->disable_my_ok_calls(); + + if (explain) + res= mysql_explain_union(thd, &thd->lex->unit, sel_result); + else + res= handle_select(thd, lex, sel_result, OPTION_SETUP_TABLES_DONE); + /* + Invalidate the table in the query cache if something changed + after unlocking when changes become visible. + TODO: this is workaround. right way will be move invalidating in + the unlock procedure. + */ + if (!res && first_table->lock_type == TL_WRITE_CONCURRENT_INSERT && + thd->lock) + { + /* INSERT ... SELECT should invalidate only the very first table */ + TABLE_LIST *save_table= first_table->next_local; + first_table->next_local= 0; + query_cache_invalidate3(thd, first_table, 1); + first_table->next_local= save_table; + } + if (explain) + { + /* + sel_result needs to be cleaned up properly. + INSERT... SELECT statement will call either send_eof() or + abort_result_set(). EXPLAIN doesn't call either, so we need + to cleanup manually. + */ + sel_result->abort_result_set(); + } + delete sel_result; + } + else if (res < 0) + { + /* + Insert should be ignored but we have to log the query in statement + format in the binary log + */ + res= thd->binlog_current_query_unfiltered(); + } + delete result; + if (save_protocol) + { + delete thd->protocol; + thd->protocol= save_protocol; + } + if (!res && (explain || lex->analyze_stmt)) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + res= thd->lex->explain->send_explain(thd, extended); + } + + /* revert changes for SP */ + MYSQL_INSERT_SELECT_DONE(res, (ulong) thd->get_row_count_func()); + select_lex->table_list.first= first_table; + } + /* + If we have inserted into a VIEW, and the base table has + AUTO_INCREMENT column, but this column is not accessible through + a view, then we should restore LAST_INSERT_ID to the value it + had before the statement. + */ + if (first_table->view && !first_table->contain_auto_increment) + thd->first_successful_insert_id_in_cur_stmt= + thd->first_successful_insert_id_in_prev_stmt; + + break; + } + case SQLCOM_DELETE: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + select_result *sel_result= NULL; + DBUG_ASSERT(first_table == all_tables && first_table != 0); + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + + if ((res= delete_precheck(thd, all_tables))) + break; + DBUG_ASSERT(select_lex->limit_params.offset_limit == 0); + unit->set_limit(select_lex); + + MYSQL_DELETE_START(thd->query()); + Protocol *save_protocol= NULL; + + if (lex->has_returning()) + { + /* This is DELETE ... RETURNING. It will return output to the client */ + if (thd->lex->analyze_stmt) + { + /* + Actually, it is ANALYZE .. DELETE .. RETURNING. We need to produce + output and then discard it. + */ + sel_result= new (thd->mem_root) select_send_analyze(thd); + save_protocol= thd->protocol; + thd->protocol= new Protocol_discard(thd); + } + else + { + if (!lex->result && !(sel_result= new (thd->mem_root) select_send(thd))) + goto error; + } + } + + res = mysql_delete(thd, all_tables, + select_lex->where, &select_lex->order_list, + unit->lim.get_select_limit(), select_lex->options, + lex->result ? lex->result : sel_result); + + if (save_protocol) + { + delete thd->protocol; + thd->protocol= save_protocol; + } + + if (thd->lex->analyze_stmt || thd->lex->describe) + { + if (!res) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + res= thd->lex->explain->send_explain(thd, extended); + } + } + + delete sel_result; + MYSQL_DELETE_DONE(res, (ulong) thd->get_row_count_func()); + break; + } + case SQLCOM_DELETE_MULTI: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + DBUG_ASSERT(first_table == all_tables && first_table != 0); + TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first; + multi_delete *result; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); + + if ((res= multi_delete_precheck(thd, all_tables))) + break; + + /* condition will be TRUE on SP re-excuting */ + if (select_lex->item_list.elements != 0) + select_lex->item_list.empty(); + if (add_item_to_list(thd, new (thd->mem_root) Item_null(thd))) + goto error; + + THD_STAGE_INFO(thd, stage_init); + if ((res= open_and_lock_tables(thd, all_tables, TRUE, 0))) + break; + + MYSQL_MULTI_DELETE_START(thd->query()); + if (unlikely(res= mysql_multi_delete_prepare(thd))) + { + MYSQL_MULTI_DELETE_DONE(1, 0); + goto error; + } + + if (likely(!thd->is_fatal_error)) + { + result= new (thd->mem_root) multi_delete(thd, aux_tables, + lex->table_count_update); + if (likely(result)) + { + if (unlikely(select_lex->vers_setup_conds(thd, aux_tables))) + goto multi_delete_error; + res= mysql_select(thd, + select_lex->get_table_list(), + select_lex->item_list, + select_lex->where, + 0, (ORDER *)NULL, (ORDER *)NULL, (Item *)NULL, + (ORDER *)NULL, + (select_lex->options | thd->variables.option_bits | + SELECT_NO_JOIN_CACHE | SELECT_NO_UNLOCK | + OPTION_SETUP_TABLES_DONE) & ~OPTION_BUFFER_RESULT, + result, unit, select_lex); + res|= (int)(thd->is_error()); + + MYSQL_MULTI_DELETE_DONE(res, result->num_deleted()); + if (res) + result->abort_result_set(); /* for both DELETE and EXPLAIN DELETE */ + else + { + if (lex->describe || lex->analyze_stmt) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + res= thd->lex->explain->send_explain(thd, extended); + } + } + multi_delete_error: + delete result; + } + } + else + { + res= TRUE; // Error + MYSQL_MULTI_DELETE_DONE(1, 0); + } + break; + } + case SQLCOM_DROP_SEQUENCE: + case SQLCOM_DROP_TABLE: + { + int result; + DBUG_ASSERT(first_table == all_tables && first_table != 0); + + thd->open_options|= HA_OPEN_FOR_REPAIR; + result= thd->open_temporary_tables(all_tables); + thd->open_options&= ~HA_OPEN_FOR_REPAIR; + if (result) + goto error; + if (!lex->tmp_table()) + { + if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE)) + goto error; /* purecov: inspected */ + } + else + { + if (thd->transaction->xid_state.check_has_uncommitted_xa()) + goto error; + status_var_decrement(thd->status_var.com_stat[lex->sql_command]); + status_var_increment(thd->status_var.com_drop_tmp_table); + + /* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */ + thd->variables.option_bits|= OPTION_BINLOG_THIS_TRX; + } + /* + If we are a slave, we should add IF EXISTS if the query executed + on the master without an error. This will help a slave to + recover from multi-table DROP TABLE that was aborted in the + middle. + */ + if ((thd->slave_thread && !thd->slave_expected_error && + slave_ddl_exec_mode_options == SLAVE_EXEC_MODE_IDEMPOTENT) || + thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (!lex->tmp_table() && + (!thd->is_current_stmt_binlog_format_row() || + !is_temporary_table(table))) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, all_tables); + break; + } + } + } +#endif /* WITH_WSREP */ + + /* DDL and binlog write order are protected by metadata locks. */ + res= mysql_rm_table(thd, first_table, lex->if_exists(), lex->tmp_table(), + lex->table_type == TABLE_TYPE_SEQUENCE, 0); + + /* + When dropping temporary tables if @@session_track_state_change is ON + then send the boolean tracker in the OK packet + */ + if(!res && (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)) + { + thd->session_tracker.state_change.mark_as_changed(thd); + } + break; + } + case SQLCOM_SHOW_PROCESSLIST: + if (!thd->security_ctx->priv_user[0] && + check_global_access(thd, PRIV_STMT_SHOW_PROCESSLIST)) + break; + mysqld_list_processes(thd, + (thd->security_ctx->master_access & PRIV_STMT_SHOW_PROCESSLIST ? + NullS : + thd->security_ctx->priv_user), + lex->verbose); + break; + case SQLCOM_SHOW_AUTHORS: + res= mysqld_show_authors(thd); + break; + case SQLCOM_SHOW_CONTRIBUTORS: + res= mysqld_show_contributors(thd); + break; + case SQLCOM_SHOW_PRIVILEGES: + res= mysqld_show_privileges(thd); + break; + case SQLCOM_SHOW_ENGINE_LOGS: +#ifdef DONT_ALLOW_SHOW_COMMANDS + my_message(ER_NOT_ALLOWED_COMMAND, ER_THD(thd, ER_NOT_ALLOWED_COMMAND), + MYF(0)); /* purecov: inspected */ + goto error; +#else + { + if (check_access(thd, FILE_ACL, any_db.str, NULL, NULL, 0, 0)) + goto error; + res= ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_LOGS); + break; + } +#endif + case SQLCOM_CHANGE_DB: + { + if (!mysql_change_db(thd, &select_lex->db, FALSE)) + my_ok(thd); + + break; + } + + case SQLCOM_LOAD: + { + DBUG_ASSERT(first_table == all_tables && first_table != 0); + privilege_t privilege= (lex->duplicates == DUP_REPLACE ? + INSERT_ACL | DELETE_ACL : INSERT_ACL) | + (lex->local_file ? NO_ACL : FILE_ACL); + + if (lex->local_file) + { + if (!(thd->client_capabilities & CLIENT_LOCAL_FILES) || + !opt_local_infile) + { + my_message(ER_LOAD_INFILE_CAPABILITY_DISABLED, + ER_THD(thd, ER_LOAD_INFILE_CAPABILITY_DISABLED), MYF(0)); + goto error; + } + } + + if (check_one_table_access(thd, privilege, all_tables)) + goto error; + + res= mysql_load(thd, lex->exchange, first_table, lex->field_list, + lex->update_list, lex->value_list, lex->duplicates, + lex->ignore, (bool) lex->local_file); + break; + } + + case SQLCOM_SET_OPTION: + { + List *lex_var_list= &lex->var_list; + + if ((check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE) + || open_and_lock_tables(thd, all_tables, TRUE, 0))) + goto error; + if (likely(!(res= sql_set_variables(thd, lex_var_list, true)))) + { + if (likely(!thd->is_error())) + my_ok(thd); + } + else + { + /* + We encountered some sort of error, but no message was sent. + Send something semi-generic here since we don't know which + assignment in the list caused the error. + */ + if (!thd->is_error()) + my_error(ER_WRONG_ARGUMENTS,MYF(0),"SET"); + goto error; + } + + break; + } + + case SQLCOM_UNLOCK_TABLES: + /* + It is critical for mysqldump --single-transaction --master-data that + UNLOCK TABLES does not implicitely commit a connection which has only + done FLUSH TABLES WITH READ LOCK + BEGIN. If this assumption becomes + false, mysqldump will not work. + */ + if (thd->variables.option_bits & OPTION_TABLE_LOCK) + { + res= trans_commit_implicit(thd); + if (thd->locked_tables_list.unlock_locked_tables(thd)) + res= 1; + thd->release_transactional_locks(); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + thd->reset_binlog_for_next_statement(); + } + if (thd->global_read_lock.is_acquired() && + thd->current_backup_stage == BACKUP_FINISHED) + thd->global_read_lock.unlock_global_read_lock(thd); + if (res) + goto error; + my_ok(thd); + break; + case SQLCOM_LOCK_TABLES: + /* We must end the transaction first, regardless of anything */ + res= trans_commit_implicit(thd); + if (thd->locked_tables_list.unlock_locked_tables(thd)) + res= 1; + /* Release transactional metadata locks. */ + thd->release_transactional_locks(); + if (res) + goto error; + +#ifdef WITH_WSREP + /* Clean up the previous transaction on implicit commit. */ + if (wsrep_on(thd) && !wsrep_not_committed(thd) && wsrep_after_statement(thd)) + goto error; +#endif + + /* We can't have any kind of table locks while backup is active */ + if (thd->current_backup_stage != BACKUP_FINISHED) + { + my_error(ER_BACKUP_LOCK_IS_ACTIVE, MYF(0)); + goto error; + } + + /* Should not lock tables while BACKUP LOCK is active */ + if (thd->mdl_backup_lock) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + goto error; + } + + /* + Here we have to pre-open temporary tables for LOCK TABLES. + + CF_PREOPEN_TMP_TABLES is not set for this SQL statement simply + because LOCK TABLES calls close_thread_tables() as a first thing + (it's called from unlock_locked_tables() above). So even if + CF_PREOPEN_TMP_TABLES was set and the tables would be pre-opened + in a usual way, they would have been closed. + */ + if (thd->open_temporary_tables(all_tables)) + goto error; + + if (lock_tables_precheck(thd, all_tables)) + goto error; + + thd->variables.option_bits|= OPTION_TABLE_LOCK; + + res= lock_tables_open_and_lock_tables(thd, all_tables); + + if (res) + { + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + else + { +#ifdef HAVE_QUERY_CACHE + if (thd->variables.query_cache_wlock_invalidate) + query_cache.invalidate_locked_for_write(thd, first_table); +#endif /*HAVE_QUERY_CACHE*/ + my_ok(thd); + } + break; + case SQLCOM_BACKUP: + if (check_global_access(thd, RELOAD_ACL)) + goto error; + if (!(res= run_backup_stage(thd, lex->backup_stage))) + my_ok(thd); + break; + case SQLCOM_BACKUP_LOCK: + if (check_global_access(thd, RELOAD_ACL)) + goto error; + /* first table is set for lock. For unlock the list is empty */ + if (first_table) + res= backup_lock(thd, first_table); + else + backup_unlock(thd); + if (!res) + my_ok(thd); + break; + case SQLCOM_CREATE_DB: + { + if (prepare_db_action(thd, lex->create_info.or_replace() ? + (CREATE_ACL | DROP_ACL) : CREATE_ACL, + &lex->name)) + break; + + if ((res= lex->create_info.resolve_to_charset_collation_context(thd, + thd->charset_collation_context_create_db()))) + break; + + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL); + + res= mysql_create_db(thd, &lex->name, + lex->create_info, &lex->create_info); + break; + } + case SQLCOM_DROP_DB: + { + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + + if (prepare_db_action(thd, DROP_ACL, &lex->name)) + break; + + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL); + + res= mysql_rm_db(thd, &lex->name, lex->if_exists()); + break; + } + case SQLCOM_ALTER_DB_UPGRADE: + { + LEX_CSTRING *db= &lex->name; +#ifdef HAVE_REPLICATION + if (thd->slave_thread) + { + rpl_filter= thd->system_thread_info.rpl_sql_info->rpl_filter; + if (!rpl_filter->db_ok(db->str) || + !rpl_filter->db_ok_with_wild_table(db->str)) + { + res= 1; + my_message(ER_SLAVE_IGNORED_TABLE, ER_THD(thd, ER_SLAVE_IGNORED_TABLE), MYF(0)); + break; + } + } +#endif + if (check_db_name((LEX_STRING*) db)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db->str); + break; + } + if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0) || + check_access(thd, DROP_ACL, db->str, NULL, NULL, 1, 0) || + check_access(thd, CREATE_ACL, db->str, NULL, NULL, 1, 0)) + { + res= 1; + break; + } + + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL); + + res= mysql_upgrade_db(thd, db); + if (!res) + my_ok(thd); + break; + } + case SQLCOM_ALTER_DB: + { + LEX_CSTRING *db= &lex->name; + if (prepare_db_action(thd, ALTER_ACL, db)) + break; + + if ((res= lex->create_info.resolve_to_charset_collation_context(thd, + thd->charset_collation_context_alter_db(lex->name.str)))) + break; + + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL); + + res= mysql_alter_db(thd, db, &lex->create_info); + break; + } + case SQLCOM_SHOW_CREATE_DB: + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + res= show_create_db(thd, lex); + break; + case SQLCOM_CREATE_EVENT: + case SQLCOM_ALTER_EVENT: + #ifdef HAVE_EVENT_SCHEDULER + do + { + DBUG_ASSERT(lex->event_parse_data); + if (lex->table_or_sp_used()) + { + my_error(ER_SUBQUERIES_NOT_SUPPORTED, MYF(0), "CREATE/ALTER EVENT"); + break; + } + + res= sp_process_definer(thd); + if (res) + break; + + switch (lex->sql_command) { + case SQLCOM_CREATE_EVENT: + { + res= Events::create_event(thd, lex->event_parse_data); + break; + } + case SQLCOM_ALTER_EVENT: + res= Events::update_event(thd, lex->event_parse_data, + lex->spname ? &lex->spname->m_db : NULL, + lex->spname ? &lex->spname->m_name : NULL); + break; + default: + DBUG_ASSERT(0); + } + DBUG_PRINT("info",("DDL error code=%d", res)); + if (!res) + my_ok(thd); + + } while (0); + /* Don't do it, if we are inside a SP */ + if (!thd->spcont && !is_called_from_prepared_stmt) + { + sp_head::destroy(lex->sphead); + lex->sphead= NULL; + } + /* lex->unit.cleanup() is called outside, no need to call it here */ + break; + case SQLCOM_SHOW_CREATE_EVENT: + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + res= Events::show_create_event(thd, &lex->spname->m_db, + &lex->spname->m_name); + break; + case SQLCOM_DROP_EVENT: + if (!(res= Events::drop_event(thd, + &lex->spname->m_db, &lex->spname->m_name, + lex->if_exists()))) + my_ok(thd); + break; +#else + my_error(ER_NOT_SUPPORTED_YET,MYF(0),"embedded server"); + break; +#endif + case SQLCOM_CREATE_FUNCTION: // UDF function + { + if (check_access(thd, lex->create_info.or_replace() ? + (INSERT_ACL | DELETE_ACL) : INSERT_ACL, + "mysql", NULL, NULL, 1, 0)) + break; +#ifdef HAVE_DLOPEN + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (!(res = mysql_create_function(thd, &lex->udf))) + my_ok(thd); +#else + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), lex->udf.dl, 0, "feature disabled"); + res= TRUE; +#endif + break; + } +#ifndef NO_EMBEDDED_ACCESS_CHECKS + case SQLCOM_CREATE_USER: + case SQLCOM_CREATE_ROLE: + { + if (check_access(thd, lex->create_info.or_replace() ? + INSERT_ACL | DELETE_ACL : INSERT_ACL, + "mysql", NULL, NULL, 1, 1) && + check_global_access(thd,CREATE_USER_ACL)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* Conditionally writes to binlog */ + if (!(res= mysql_create_user(thd, lex->users_list, + lex->sql_command == SQLCOM_CREATE_ROLE))) + my_ok(thd); + break; + } + case SQLCOM_DROP_USER: + case SQLCOM_DROP_ROLE: + { + if (check_access(thd, DELETE_ACL, "mysql", NULL, NULL, 1, 1) && + check_global_access(thd,CREATE_USER_ACL)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* Conditionally writes to binlog */ + if (!(res= mysql_drop_user(thd, lex->users_list, + lex->sql_command == SQLCOM_DROP_ROLE))) + my_ok(thd); + break; + } + case SQLCOM_ALTER_USER: + case SQLCOM_RENAME_USER: + { + if (check_access(thd, UPDATE_ACL, "mysql", NULL, NULL, 1, 1) && + check_global_access(thd,CREATE_USER_ACL)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* Conditionally writes to binlog */ + if (lex->sql_command == SQLCOM_ALTER_USER) + res= mysql_alter_user(thd, lex->users_list); + else + res= mysql_rename_user(thd, lex->users_list); + if (!res) + my_ok(thd); + break; + } + case SQLCOM_REVOKE_ALL: + { + if (check_access(thd, UPDATE_ACL, "mysql", NULL, NULL, 1, 1) && + check_global_access(thd,CREATE_USER_ACL)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* Conditionally writes to binlog */ + if (!(res = mysql_revoke_all(thd, lex->users_list))) + my_ok(thd); + break; + } + + case SQLCOM_REVOKE_ROLE: + case SQLCOM_GRANT_ROLE: + { + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (!(res= mysql_grant_role(thd, lex->users_list, + lex->sql_command != SQLCOM_GRANT_ROLE))) + my_ok(thd); + break; + } +#endif /*!NO_EMBEDDED_ACCESS_CHECKS*/ + case SQLCOM_RESET: + /* + RESET commands are never written to the binary log, so we have to + initialize this variable because RESET shares the same code as FLUSH + */ + lex->no_write_to_binlog= 1; + /* fall through */ + case SQLCOM_FLUSH: + { + int write_to_binlog; + if (check_global_access(thd,RELOAD_ACL)) + goto error; + + if (first_table && lex->type & (REFRESH_READ_LOCK|REFRESH_FOR_EXPORT)) + { + /* Check table-level privileges. */ + if (check_table_access(thd, PRIV_LOCK_TABLES, all_tables, + FALSE, UINT_MAX, FALSE)) + goto error; + + if (flush_tables_with_read_lock(thd, all_tables)) + goto error; + + my_ok(thd); + break; + } + +#ifdef WITH_WSREP + if (lex->type & ( + REFRESH_GRANT | + REFRESH_HOSTS | +#ifdef HAVE_OPENSSL + REFRESH_DES_KEY_FILE | +#endif + /* + Write all flush log statements except + FLUSH LOGS + FLUSH BINARY LOGS + Check reload_acl_and_cache for why. + */ + REFRESH_RELAY_LOG | + REFRESH_SLOW_LOG | + REFRESH_GENERAL_LOG | + REFRESH_ENGINE_LOG | + REFRESH_ERROR_LOG | +#ifdef HAVE_QUERY_CACHE + REFRESH_QUERY_CACHE_FREE | +#endif /* HAVE_QUERY_CACHE */ + REFRESH_STATUS | + REFRESH_USER_RESOURCES)) + { + WSREP_TO_ISOLATION_BEGIN_WRTCHK(WSREP_MYSQL_DB, NULL, NULL); + } +#endif /* WITH_WSREP*/ + +#ifdef HAVE_REPLICATION + if (lex->type & REFRESH_READ_LOCK) + { + /* + We need to pause any parallel replication slave workers during FLUSH + TABLES WITH READ LOCK. Otherwise we might cause a deadlock, as + worker threads eun run in arbitrary order but need to commit in a + specific given order. + */ + if (rpl_pause_for_ftwrl(thd)) + goto error; + } +#endif + /* + reload_acl_and_cache() will tell us if we are allowed to write to the + binlog or not. + */ + if (!reload_acl_and_cache(thd, lex->type, first_table, &write_to_binlog)) + { +#ifdef WITH_WSREP + if ((lex->type & REFRESH_TABLES) && !(lex->type & (REFRESH_FOR_EXPORT|REFRESH_READ_LOCK))) + { + /* + This is done after reload_acl_and_cache is because + LOCK TABLES is not replicated in galera, the upgrade of which + is checked in reload_acl_and_cache. + Hence, done after/if we are able to upgrade locks. + */ + if (first_table) + { + WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); + } + else + { + WSREP_TO_ISOLATION_BEGIN_WRTCHK(WSREP_MYSQL_DB, NULL, NULL); + } + } +#endif /* WITH_WSREP */ + /* + We WANT to write and we CAN write. + ! we write after unlocking the table. + */ + /* + Presumably, RESET and binlog writing doesn't require synchronization + */ + + if (write_to_binlog > 0) // we should write + { + if (!lex->no_write_to_binlog) + res= write_bin_log(thd, FALSE, thd->query(), thd->query_length()); + } else if (write_to_binlog < 0) + { + /* + We should not write, but rather report error because + reload_acl_and_cache binlog interactions failed + */ + res= 1; + } + + if (!res) + my_ok(thd); + } + else + res= 1; // reload_acl_and_cache failed +#ifdef HAVE_REPLICATION + if (lex->type & REFRESH_READ_LOCK) + rpl_unpause_after_ftwrl(thd); +#endif + + break; + } + case SQLCOM_KILL: + { + if (lex->table_or_sp_used()) + { + my_error(ER_SUBQUERIES_NOT_SUPPORTED, MYF(0), "KILL"); + break; + } + + if (lex->kill_type == KILL_TYPE_ID || lex->kill_type == KILL_TYPE_QUERY) + { + Item *it= (Item *)lex->value_list.head(); + if (it->fix_fields_if_needed_for_scalar(lex->thd, &it)) + { + my_message(ER_SET_CONSTANTS_ONLY, ER_THD(thd, ER_SET_CONSTANTS_ONLY), + MYF(0)); + goto error; + } + sql_kill(thd, (my_thread_id) it->val_int(), lex->kill_signal, lex->kill_type); + } + else + sql_kill_user(thd, get_current_user(thd, lex->users_list.head()), + lex->kill_signal); + break; + } + case SQLCOM_SHUTDOWN: +#ifndef EMBEDDED_LIBRARY + DBUG_EXECUTE_IF("crash_shutdown", DBUG_SUICIDE();); + if (check_global_access(thd,SHUTDOWN_ACL)) + goto error; + kill_mysql(thd); + my_ok(thd); +#else + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "embedded server"); +#endif + break; + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + case SQLCOM_SHOW_CREATE_USER: + { + LEX_USER *grant_user= lex->grant_user; + if (!grant_user) + goto error; + + res = mysql_show_create_user(thd, grant_user); + break; + } + case SQLCOM_SHOW_GRANTS: + { + LEX_USER *grant_user= lex->grant_user; + if (!grant_user) + goto error; + + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + res = mysql_show_grants(thd, grant_user); + break; + } +#endif + case SQLCOM_HA_OPEN: + DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE)) + goto error; + /* Close temporary tables which were pre-opened for privilege checking. */ + close_thread_tables(thd); + all_tables->table= NULL; + res= mysql_ha_open(thd, first_table, 0); + break; + case SQLCOM_HA_CLOSE: + DBUG_ASSERT(first_table == all_tables && first_table != 0); + res= mysql_ha_close(thd, first_table); + break; + case SQLCOM_HA_READ: + DBUG_ASSERT(first_table == all_tables && first_table != 0); + /* + There is no need to check for table permissions here, because + if a user has no permissions to read a table, he won't be + able to open it (with SQLCOM_HA_OPEN) in the first place. + */ + unit->set_limit(select_lex); + + res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str, + lex->insert_list, lex->ha_rkey_mode, select_lex->where, + unit->lim.get_select_limit(), + unit->lim.get_offset_limit()); + break; + + case SQLCOM_BEGIN: + DBUG_PRINT("info", ("Executing SQLCOM_BEGIN thd: %p", thd)); + if (trans_begin(thd, lex->start_transaction_opt)) + { + thd->release_transactional_locks(); + WSREP_DEBUG("BEGIN failed, MDL released: %lld", + (longlong) thd->thread_id); + WSREP_DEBUG("stmt_da, sql_errno: %d", (thd->get_stmt_da()->is_error()) ? thd->get_stmt_da()->sql_errno() : 0); + goto error; + } + my_ok(thd); + break; + case SQLCOM_COMMIT: + { + DBUG_ASSERT(thd->lock == NULL || + thd->locked_tables_mode == LTM_LOCK_TABLES); + bool tx_chain= (lex->tx_chain == TVL_YES || + (thd->variables.completion_type == 1 && + lex->tx_chain != TVL_NO)); + bool tx_release= (lex->tx_release == TVL_YES || + (thd->variables.completion_type == 2 && + lex->tx_release != TVL_NO)); + bool commit_failed= trans_commit(thd); + thd->release_transactional_locks(); + if (commit_failed) + { + WSREP_DEBUG("COMMIT failed, MDL released: %lld", + (longlong) thd->thread_id); + goto error; + } + /* Begin transaction with the same isolation level. */ + if (tx_chain) + { + if (trans_begin(thd)) + goto error; + } + else + { + /* Reset the isolation level and access mode if no chaining transaction.*/ + trans_reset_one_shot_chistics(thd); + } + /* Disconnect the current client connection. */ + if (tx_release) + { + thd->set_killed(KILL_CONNECTION); + thd->print_aborted_warning(3, "RELEASE"); + } + my_ok(thd); + break; + } + case SQLCOM_ROLLBACK: + { + DBUG_ASSERT(thd->lock == NULL || + thd->locked_tables_mode == LTM_LOCK_TABLES); + bool tx_chain= (lex->tx_chain == TVL_YES || + (thd->variables.completion_type == 1 && + lex->tx_chain != TVL_NO)); + bool tx_release= (lex->tx_release == TVL_YES || + (thd->variables.completion_type == 2 && + lex->tx_release != TVL_NO)); + bool rollback_failed= trans_rollback(thd); + thd->release_transactional_locks(); + + if (rollback_failed) + { + WSREP_DEBUG("rollback failed, MDL released: %lld", + (longlong) thd->thread_id); + goto error; + } + /* Begin transaction with the same isolation level. */ + if (tx_chain) + { +#ifdef WITH_WSREP + /* If there are pending changes after rollback we should clear them */ + if (wsrep_on(thd) && wsrep_has_changes(thd)) + wsrep_after_statement(thd); +#endif + if (trans_begin(thd)) + goto error; + } + else + { + /* Reset the isolation level and access mode if no chaining transaction.*/ + trans_reset_one_shot_chistics(thd); + } + /* Disconnect the current client connection. */ + if (tx_release) + thd->set_killed(KILL_CONNECTION); + my_ok(thd); + break; + } + case SQLCOM_RELEASE_SAVEPOINT: + if (trans_release_savepoint(thd, lex->ident)) + goto error; + my_ok(thd); + break; + case SQLCOM_ROLLBACK_TO_SAVEPOINT: + if (trans_rollback_to_savepoint(thd, lex->ident)) + goto error; + my_ok(thd); + break; + case SQLCOM_SAVEPOINT: + if (trans_savepoint(thd, lex->ident)) + goto error; + my_ok(thd); + break; + case SQLCOM_CREATE_PROCEDURE: + case SQLCOM_CREATE_SPFUNCTION: + case SQLCOM_CREATE_PACKAGE: + case SQLCOM_CREATE_PACKAGE_BODY: + { + if (mysql_create_routine(thd, lex)) + goto error; + my_ok(thd); + break; /* break super switch */ + } /* end case group bracket */ + case SQLCOM_COMPOUND: + { + sp_head *sp= lex->sphead; + DBUG_ASSERT(all_tables == 0); + DBUG_ASSERT(thd->in_sub_stmt == 0); + sp->m_sql_mode= thd->variables.sql_mode; + sp->m_sp_share= MYSQL_GET_SP_SHARE(sp->m_handler->type(), + sp->m_db.str, static_cast(sp->m_db.length), + sp->m_name.str, static_cast(sp->m_name.length)); + if (do_execute_sp(thd, lex->sphead)) + goto error; + break; + } + + case SQLCOM_ALTER_PROCEDURE: + case SQLCOM_ALTER_FUNCTION: + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + if (alter_routine(thd, lex)) + goto error; + break; + case SQLCOM_DROP_PROCEDURE: + case SQLCOM_DROP_FUNCTION: + case SQLCOM_DROP_PACKAGE: + case SQLCOM_DROP_PACKAGE_BODY: + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + if (drop_routine(thd, lex)) + goto error; + break; + case SQLCOM_SHOW_CREATE_PROC: + case SQLCOM_SHOW_CREATE_FUNC: + case SQLCOM_SHOW_CREATE_PACKAGE: + case SQLCOM_SHOW_CREATE_PACKAGE_BODY: + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + const Sp_handler *sph= Sp_handler::handler(lex->sql_command); + if (sph->sp_show_create_routine(thd, lex->spname)) + goto error; + break; + } + case SQLCOM_SHOW_PROC_CODE: + case SQLCOM_SHOW_FUNC_CODE: + case SQLCOM_SHOW_PACKAGE_BODY_CODE: + { +#ifndef DBUG_OFF + Database_qualified_name pkgname(&null_clex_str, &null_clex_str); + sp_head *sp; + const Sp_handler *sph= Sp_handler::handler(lex->sql_command); + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + if (sph->sp_resolve_package_routine(thd, thd->lex->sphead, + lex->spname, &sph, &pkgname)) + return true; + if (sph->sp_cache_routine(thd, lex->spname, false, &sp)) + goto error; + if (!sp || sp->show_routine_code(thd)) + { + /* We don't distinguish between errors for now */ + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), + sph->type_str(), lex->spname->m_name.str); + goto error; + } + break; +#else + my_error(ER_FEATURE_DISABLED, MYF(0), + "SHOW PROCEDURE|FUNCTION CODE", "--with-debug"); + goto error; +#endif // ifndef DBUG_OFF + } + case SQLCOM_SHOW_CREATE_TRIGGER: + { + if (check_ident_length(&lex->spname->m_name)) + goto error; + + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + if (show_create_trigger(thd, lex->spname)) + goto error; /* Error has been already logged. */ + + break; + } + case SQLCOM_CREATE_VIEW: + { + /* + Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands + as specified through the thd->lex->create_view->mode flag. + */ + res= mysql_create_view(thd, first_table, thd->lex->create_view->mode); + break; + } + case SQLCOM_DROP_VIEW: + { + if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE)) + goto error; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + + /* Conditionally writes to binlog. */ + res= mysql_drop_view(thd, first_table, thd->lex->drop_mode); + break; + } + case SQLCOM_CREATE_TRIGGER: + { + /* Conditionally writes to binlog. */ + res= mysql_create_or_drop_trigger(thd, all_tables, 1); + + break; + } + case SQLCOM_DROP_TRIGGER: + { + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + + /* Conditionally writes to binlog. */ + res= mysql_create_or_drop_trigger(thd, all_tables, 0); + break; + } + case SQLCOM_XA_START: +#ifdef WITH_WSREP + if (WSREP_ON) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "XA transactions with Galera replication"); + break; + } +#endif /* WITH_WSREP */ + if (trans_xa_start(thd)) + goto error; + my_ok(thd); + break; + case SQLCOM_XA_END: + if (trans_xa_end(thd)) + goto error; + my_ok(thd); + break; + case SQLCOM_XA_PREPARE: + if (trans_xa_prepare(thd)) + goto error; + my_ok(thd); + break; + case SQLCOM_XA_COMMIT: + { + bool commit_failed= trans_xa_commit(thd); + if (commit_failed) + { + WSREP_DEBUG("XA commit failed, MDL released: %lld", + (longlong) thd->thread_id); + goto error; + } + /* + We've just done a commit, reset transaction + isolation level and access mode to the session default. + */ + trans_reset_one_shot_chistics(thd); + my_ok(thd); + break; + } + case SQLCOM_XA_ROLLBACK: + { + bool rollback_failed= trans_xa_rollback(thd); + if (rollback_failed) + { + WSREP_DEBUG("XA rollback failed, MDL released: %lld", + (longlong) thd->thread_id); + goto error; + } + /* + We've just done a rollback, reset transaction + isolation level and access mode to the session default. + */ + trans_reset_one_shot_chistics(thd); + my_ok(thd); + break; + } + case SQLCOM_XA_RECOVER: + res= mysql_xa_recover(thd); + break; + case SQLCOM_INSTALL_PLUGIN: + if (! (res= mysql_install_plugin(thd, &thd->lex->comment, + &thd->lex->ident))) + my_ok(thd); + break; + case SQLCOM_UNINSTALL_PLUGIN: + if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment, + &thd->lex->ident))) + my_ok(thd); + break; + case SQLCOM_BINLOG_BASE64_EVENT: + { +#ifndef EMBEDDED_LIBRARY + mysql_client_binlog_statement(thd); +#else /* EMBEDDED_LIBRARY */ + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "embedded"); +#endif /* EMBEDDED_LIBRARY */ + break; + } + case SQLCOM_CREATE_SERVER: + { + DBUG_PRINT("info", ("case SQLCOM_CREATE_SERVER")); + + if (check_global_access(thd, PRIV_STMT_CREATE_SERVER)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + res= create_server(thd, &lex->server_options); + break; + } + case SQLCOM_ALTER_SERVER: + { + int error; + DBUG_PRINT("info", ("case SQLCOM_ALTER_SERVER")); + + if (check_global_access(thd, PRIV_STMT_ALTER_SERVER)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if (unlikely((error= alter_server(thd, &lex->server_options)))) + { + DBUG_PRINT("info", ("problem altering server <%s>", + lex->server_options.server_name.str)); + my_error(error, MYF(0), lex->server_options.server_name.str); + break; + } + my_ok(thd, 1); + break; + } + case SQLCOM_DROP_SERVER: + { + int err_code; + DBUG_PRINT("info", ("case SQLCOM_DROP_SERVER")); + + if (check_global_access(thd, PRIV_STMT_DROP_SERVER)) + break; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + if ((err_code= drop_server(thd, &lex->server_options))) + { + if (! lex->if_exists() && err_code == ER_FOREIGN_SERVER_DOESNT_EXIST) + { + DBUG_PRINT("info", ("problem dropping server %s", + lex->server_options.server_name.str)); + my_error(err_code, MYF(0), lex->server_options.server_name.str); + } + else + { + my_ok(thd, 0); + } + break; + } + my_ok(thd, 1); + break; + } + case SQLCOM_ANALYZE: + case SQLCOM_CHECK: + case SQLCOM_OPTIMIZE: + case SQLCOM_REPAIR: + case SQLCOM_TRUNCATE: + case SQLCOM_CREATE_TABLE: + case SQLCOM_CREATE_SEQUENCE: + case SQLCOM_ALTER_TABLE: + DBUG_ASSERT(first_table == all_tables && first_table != 0); + /* fall through */ + case SQLCOM_ALTER_SEQUENCE: + case SQLCOM_SHOW_SLAVE_STAT: + case SQLCOM_SIGNAL: + case SQLCOM_RESIGNAL: + case SQLCOM_GET_DIAGNOSTICS: + case SQLCOM_CALL: + case SQLCOM_REVOKE: + case SQLCOM_GRANT: + if (thd->variables.option_bits & OPTION_IF_EXISTS) + lex->create_info.set(DDL_options_st::OPT_IF_EXISTS); + DBUG_ASSERT(lex->m_sql_cmd != NULL); + res= lex->m_sql_cmd->execute(thd); + DBUG_PRINT("result", ("res: %d killed: %d is_error(): %d", + res, thd->killed, thd->is_error())); + break; + default: + +#ifndef EMBEDDED_LIBRARY + DBUG_ASSERT(0); /* Impossible */ +#endif + my_ok(thd); + break; + } + THD_STAGE_INFO(thd, stage_query_end); + thd->update_stats(); + + goto finish; + +error: +#ifdef WITH_WSREP +wsrep_error_label: +#endif + res= true; + +finish: + + thd->reset_query_timer(); + DBUG_ASSERT(!thd->in_active_multi_stmt_transaction() || + thd->in_multi_stmt_transaction_mode()); + + lex->unit.cleanup(); + + /* close/reopen tables that were marked to need reopen under LOCK TABLES */ + if (unlikely(thd->locked_tables_list.some_table_marked_for_reopen) && + !thd->lex->requires_prelocking()) + thd->locked_tables_list.reopen_tables(thd, true); + + if (! thd->in_sub_stmt) + { + if (thd->killed != NOT_KILLED) + { + /* report error issued during command execution */ + if (thd->killed_errno()) + { + /* If we already sent 'ok', we can ignore any kill query statements */ + if (! thd->get_stmt_da()->is_set()) + thd->send_kill_message(); + } + thd->reset_kill_query(); + } + if (unlikely(thd->is_error()) || + (thd->variables.option_bits & OPTION_MASTER_SQL_ERROR)) + { + THD_STAGE_INFO(thd, stage_rollback); + trans_rollback_stmt(thd); + } + else + { + /* If commit fails, we should be able to reset the OK status. */ + THD_STAGE_INFO(thd, stage_commit); + thd->get_stmt_da()->set_overwrite_status(true); + trans_commit_stmt(thd); + thd->get_stmt_da()->set_overwrite_status(false); + } + } + + /* Free tables. Set stage 'closing tables' */ + close_thread_tables_for_query(thd); + +#ifndef DBUG_OFF + if (lex->sql_command != SQLCOM_SET_OPTION && ! thd->in_sub_stmt) + DEBUG_SYNC(thd, "execute_command_after_close_tables"); +#endif + if (!(sql_command_flags[lex->sql_command] & + (CF_CAN_GENERATE_ROW_EVENTS | CF_FORCE_ORIGINAL_BINLOG_FORMAT | + CF_STATUS_COMMAND))) + thd->set_binlog_format(orig_binlog_format, + orig_current_stmt_binlog_format); + + if (! thd->in_sub_stmt && thd->transaction_rollback_request) + { + /* + We are not in sub-statement and transaction rollback was requested by + one of storage engines (e.g. due to deadlock). Rollback transaction in + all storage engines including binary log. + */ + THD_STAGE_INFO(thd, stage_rollback_implicit); + trans_rollback_implicit(thd); + thd->release_transactional_locks(); + } + else if (stmt_causes_implicit_commit(thd, CF_IMPLICIT_COMMIT_END)) + { + /* No transaction control allowed in sub-statements. */ + DBUG_ASSERT(! thd->in_sub_stmt); + if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) + { + THD_STAGE_INFO(thd, stage_commit_implicit); + /* If commit fails, we should be able to reset the OK status. */ + thd->get_stmt_da()->set_overwrite_status(true); + /* Commit the normal transaction if one is active. */ + trans_commit_implicit(thd); + thd->get_stmt_da()->set_overwrite_status(false); + thd->release_transactional_locks(); + } + } + else if (! thd->in_sub_stmt && ! thd->in_active_multi_stmt_transaction()) + { + /* + - If inside a multi-statement transaction, + defer the release of metadata locks until the current + transaction is either committed or rolled back. This prevents + other statements from modifying the table for the entire + duration of this transaction. This provides commit ordering + and guarantees serializability across multiple transactions. + - If in autocommit mode, or outside a transactional context, + automatically release metadata locks of the current statement. + */ + thd->release_transactional_locks(); + } + else if (! thd->in_sub_stmt) + { + thd->mdl_context.release_statement_locks(); + } + + THD_STAGE_INFO(thd, stage_starting_cleanup); + + TRANSACT_TRACKER(add_trx_state_from_thd(thd)); + +#ifdef WITH_WSREP + thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; + + if (wsrep_thd_is_toi(thd) || wsrep_thd_is_in_rsu(thd)) + { + WSREP_DEBUG("mysql_execute_command for %s", wsrep_thd_query(thd)); + THD_STAGE_INFO(thd, stage_waiting_isolation); + wsrep_to_isolation_end(thd); + } + + /* + Force release of transactional locks if not in active MST and wsrep is on. + */ + if (WSREP(thd) && + ! thd->in_sub_stmt && + ! thd->in_active_multi_stmt_transaction() && + thd->mdl_context.has_transactional_locks()) + { + WSREP_DEBUG("Forcing release of transactional locks for thd: %lld", + (longlong) thd->thread_id); + thd->release_transactional_locks(); + } + + /* + Current command did not start multi STMT transaction and the command + did not cause commit to happen (e.g. read only). Commit the wsrep + transaction as empty. + */ + if (!thd->in_active_multi_stmt_transaction() && + !thd->in_sub_stmt && + thd->wsrep_trx().active() && + thd->wsrep_trx().state() == wsrep::transaction::s_executing) + { + wsrep_commit_empty(thd, true); + } + + /* assume PA safety for next transaction */ + thd->wsrep_PA_safe= true; +#endif /* WITH_WSREP */ + + DBUG_RETURN(res || thd->is_error()); + } + +static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) +{ + LEX *lex= thd->lex; + select_result *result=lex->result; + bool res; + /* assign global limit variable if limit is not given */ + { + SELECT_LEX *param= lex->unit.global_parameters(); + if (!param->limit_params.explicit_limit) + param->limit_params.select_limit= + new (thd->mem_root) Item_int(thd, + (ulonglong) thd->variables.select_limit); + } + + if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0))) + { + if (lex->describe) + { + /* + We always use select_send for EXPLAIN, even if it's an EXPLAIN + for SELECT ... INTO OUTFILE: a user application should be able + to prepend EXPLAIN to any query and receive output for it, + even if the query itself redirects the output. + */ + if (unlikely(!(result= new (thd->mem_root) select_send(thd)))) + return 1; /* purecov: inspected */ + thd->send_explain_fields(result, lex->describe, lex->analyze_stmt); + + /* + This will call optimize() for all parts of query. The query plan is + printed out below. + */ + res= mysql_explain_union(thd, &lex->unit, result); + + /* Print EXPLAIN only if we don't have an error */ + if (likely(!res)) + { + /* + Do like the original select_describe did: remove OFFSET from the + top-level LIMIT + */ + result->remove_offset_limit(); + if (lex->explain_json) + { + lex->explain->print_explain_json(result, lex->analyze_stmt); + } + else + { + lex->explain->print_explain(result, thd->lex->describe, + thd->lex->analyze_stmt); + if (lex->describe & DESCRIBE_EXTENDED) + { + char buff[1024]; + String str(buff,(uint32) sizeof(buff), system_charset_info); + str.length(0); + /* + The warnings system requires input in utf8, @see + mysqld_show_warnings(). + */ + lex->unit.print(&str, QT_EXPLAIN_EXTENDED); + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_YES, str.c_ptr_safe()); + } + } + } + + if (res) + result->abort_result_set(); + else + result->send_eof(); + delete result; + } + else + { + Protocol *save_protocol= NULL; + if (lex->analyze_stmt) + { + if (result && result->result_interceptor()) + result->result_interceptor()->disable_my_ok_calls(); + else + { + DBUG_ASSERT(thd->protocol); + result= new (thd->mem_root) select_send_analyze(thd); + save_protocol= thd->protocol; + thd->protocol= new Protocol_discard(thd); + } + } + else + { + if (!result && !(result= new (thd->mem_root) select_send(thd))) + return 1; /* purecov: inspected */ + } + query_cache_store_query(thd, all_tables); + res= handle_select(thd, lex, result, 0); + if (result != lex->result) + delete result; + + if (lex->analyze_stmt) + { + if (save_protocol) + { + delete thd->protocol; + thd->protocol= save_protocol; + } + if (!res) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + res= thd->lex->explain->send_explain(thd, extended); + } + } + } + } + /* Count number of empty select queries */ + if (!thd->get_sent_row_count() && !res) + status_var_increment(thd->status_var.empty_queries); + else + status_var_add(thd->status_var.rows_sent, thd->get_sent_row_count()); + + return res; +} + + +/** + SHOW STATUS + + Notes: This is noinline as we don't want to have system_status_var (> 3K) + to be on the stack of mysql_execute_command() +*/ + +static bool __attribute__ ((noinline)) +execute_show_status(THD *thd, TABLE_LIST *all_tables) +{ + bool res; + system_status_var old_status_var= thd->status_var; + thd->initial_status_var= &old_status_var; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, + UINT_MAX, FALSE))) + res= execute_sqlcom_select(thd, all_tables); + + thd->initial_status_var= NULL; + /* Don't log SHOW STATUS commands to slow query log */ + thd->server_status&= ~(SERVER_QUERY_NO_INDEX_USED | + SERVER_QUERY_NO_GOOD_INDEX_USED); + /* + restore status variables, as we don't want 'show status' to cause + changes + */ + mysql_mutex_lock(&LOCK_status); + add_diff_to_status(&global_status_var, &thd->status_var, + &old_status_var); + memcpy(&thd->status_var, &old_status_var, + offsetof(STATUS_VAR, last_cleared_system_status_var)); + mysql_mutex_unlock(&LOCK_status); + thd->initial_status_var= NULL; + return res; +#ifdef WITH_WSREP +wsrep_error_label: /* see WSREP_SYNC_WAIT() macro above */ + thd->initial_status_var= NULL; + return true; +#endif /* WITH_WSREP */ +} + + +/* + Find out if a table is a temporary table + + A table is a temporary table if it's a temporary table or + there has been before a temporary table that has been renamed + to the current name. + + Some examples: + A->B B is a temporary table if and only if A is a temp. + A->B, B->C Second B is temp if A is temp + A->B, A->C Second A can't be temp as if A was temp then B is temp + and Second A can only be a normal table. C is also not temp +*/ + +static TABLE *find_temporary_table_for_rename(THD *thd, + TABLE_LIST *first_table, + TABLE_LIST *cur_table) +{ + TABLE_LIST *table; + TABLE *res= 0; + bool found= 0; + DBUG_ENTER("find_temporary_table_for_rename"); + + /* Find last instance when cur_table is in TO part */ + for (table= first_table; + table != cur_table; + table= table->next_local->next_local) + { + TABLE_LIST *next= table->next_local; + + if (!strcmp(table->get_db_name(), cur_table->get_db_name()) && + !strcmp(table->get_table_name(), cur_table->get_table_name())) + { + /* Table was moved away, can't be same as 'table' */ + found= 1; + res= 0; // Table can't be a temporary table + } + if (!strcmp(next->get_db_name(), cur_table->get_db_name()) && + !strcmp(next->get_table_name(), cur_table->get_table_name())) + { + /* + Table has matching name with new name of this table. cur_table should + have same temporary type as this table. + */ + found= 1; + res= table->table; + } + } + if (!found) + res= thd->find_temporary_table(table, THD::TMP_TABLE_ANY); + DBUG_RETURN(res); +} + + +static bool __attribute__ ((noinline)) +check_rename_table(THD *thd, TABLE_LIST *first_table, + TABLE_LIST *all_tables) +{ + DBUG_ASSERT(first_table == all_tables && first_table != 0); + TABLE_LIST *table; + for (table= first_table; table; table= table->next_local->next_local) + { + if (check_access(thd, ALTER_ACL | DROP_ACL, table->db.str, + &table->grant.privilege, + &table->grant.m_internal, + 0, 0) || + check_access(thd, INSERT_ACL | CREATE_ACL, table->next_local->db.str, + &table->next_local->grant.privilege, + &table->next_local->grant.m_internal, + 0, 0)) + return 1; + + /* check if these are referring to temporary tables */ + table->table= find_temporary_table_for_rename(thd, first_table, table); + table->next_local->table= table->table; + + TABLE_LIST old_list, new_list; + /* + we do not need initialize old_list and new_list because we will + copy table[0] and table->next[0] there + */ + old_list= table[0]; + new_list= table->next_local[0]; + + if (check_grant(thd, ALTER_ACL | DROP_ACL, &old_list, FALSE, 1, FALSE) || + (!test_all_bits(table->next_local->grant.privilege, + INSERT_ACL | CREATE_ACL) && + check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1, + FALSE))) + return 1; + } + + return 0; +} + +/* + Generate an incident log event before writing the real event + to the binary log. We put this event is before the statement + since that makes it simpler to check that the statement was + not executed on the slave (since incidents usually stop the + slave). + + Observe that any row events that are generated will be generated before. + + This is only for testing purposes and will not be present in a release build. +*/ + +#ifndef DBUG_OFF +static bool __attribute__ ((noinline)) generate_incident_event(THD *thd) +{ + if (mysql_bin_log.is_open()) + { + + Incident incident= INCIDENT_NONE; + DBUG_PRINT("debug", ("Just before generate_incident()")); + DBUG_EXECUTE_IF("incident_database_resync_on_replace", + incident= INCIDENT_LOST_EVENTS;); + if (incident) + { + Incident_log_event ev(thd, incident); + (void) mysql_bin_log.write(&ev); /* error is ignored */ + if (mysql_bin_log.rotate_and_purge(true)) + return 1; + } + DBUG_PRINT("debug", ("Just after generate_incident()")); + } + return 0; +} +#else +static bool generate_incident_event(THD *thd) +{ + return 0; +} +#endif + + +static int __attribute__ ((noinline)) +show_create_db(THD *thd, LEX *lex) +{ + char db_name_buff[NAME_LEN+1]; + LEX_CSTRING db_name; + DBUG_EXECUTE_IF("4x_server_emul", + my_error(ER_UNKNOWN_ERROR, MYF(0)); return 1;); + + db_name.str= db_name_buff; + db_name.length= lex->name.length; + strmov(db_name_buff, lex->name.str); + + if (check_db_name((LEX_STRING*) &db_name)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db_name.str); + return 1; + } + return mysqld_show_create_db(thd, &db_name, &lex->name, lex->create_info); +} + + +/** + Called on SQLCOM_ALTER_PROCEDURE and SQLCOM_ALTER_FUNCTION +*/ + +static bool __attribute__ ((noinline)) +alter_routine(THD *thd, LEX *lex) +{ + int sp_result; + const Sp_handler *sph= Sp_handler::handler(lex->sql_command); + if (check_routine_access(thd, ALTER_PROC_ACL, &lex->spname->m_db, + &lex->spname->m_name, sph, 0)) + return 1; + /* + Note that if you implement the capability of ALTER FUNCTION to + alter the body of the function, this command should be made to + follow the restrictions that log-bin-trust-function-creators=0 + already puts on CREATE FUNCTION. + */ + /* Conditionally writes to binlog */ + sp_result= sph->sp_update_routine(thd, lex->spname, &lex->sp_chistics); + switch (sp_result) { + case SP_OK: + my_ok(thd); + return 0; + case SP_KEY_NOT_FOUND: + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), + sph->type_str(), ErrConvDQName(lex->spname).ptr()); + return 1; + default: + my_error(ER_SP_CANT_ALTER, MYF(0), + sph->type_str(), ErrConvDQName(lex->spname).ptr()); + return 1; + } + return 0; /* purecov: deadcode */ +} + + +static bool __attribute__ ((noinline)) +drop_routine(THD *thd, LEX *lex) +{ + int sp_result; +#ifdef HAVE_DLOPEN + if (lex->sql_command == SQLCOM_DROP_FUNCTION && + ! lex->spname->m_explicit_name) + { + /* DROP FUNCTION */ + enum drop_udf_result rc= mysql_drop_function(thd, &lex->spname->m_name); + switch (rc) { + case UDF_DEL_RESULT_DELETED: + my_ok(thd); + return 0; + case UDF_DEL_RESULT_ERROR: + return 1; + case UDF_DEL_RESULT_ABSENT: + goto absent; + } + + DBUG_ASSERT("wrong return code" == 0); +absent: + // If there was no current database, so it cannot be SP + if (!lex->spname->m_db.str) + { + if (lex->if_exists()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SP_DOES_NOT_EXIST, + ER_THD(thd, ER_SP_DOES_NOT_EXIST), + "FUNCTION (UDF)", lex->spname->m_name.str); + my_ok(thd); + return 0; + } + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), + "FUNCTION (UDF)", lex->spname->m_name.str); + return 1; + } + /* Fall trough to test for a stored function */ + } +#endif /* HAVE_DLOPEN */ + + const Sp_handler *sph= Sp_handler::handler(lex->sql_command); + + if (check_routine_access(thd, ALTER_PROC_ACL, &lex->spname->m_db, + &lex->spname->m_name, + Sp_handler::handler(lex->sql_command), 0)) + return 1; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* Conditionally writes to binlog */ + sp_result= sph->sp_drop_routine(thd, lex->spname); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* + We're going to issue an implicit REVOKE statement so we close all + open tables. We have to keep metadata locks as this ensures that + this statement is atomic against concurent FLUSH TABLES WITH READ + LOCK. Deadlocks which can arise due to fact that this implicit + statement takes metadata locks should be detected by a deadlock + detector in MDL subsystem and reported as errors. + + TODO: Long-term we should either ensure that implicit REVOKE statement + is written into binary log as a separate statement or make both + dropping of routine and implicit REVOKE parts of one fully atomic + statement. + */ + if (trans_commit_stmt(thd)) + sp_result= SP_INTERNAL_ERROR; + close_thread_tables(thd); + + if (sp_result != SP_KEY_NOT_FOUND && + sp_automatic_privileges && !opt_noacl && + sp_revoke_privileges(thd, lex->spname->m_db.str, lex->spname->m_name.str, + Sp_handler::handler(lex->sql_command))) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_PROC_AUTO_REVOKE_FAIL, + ER_THD(thd, ER_PROC_AUTO_REVOKE_FAIL)); + /* If this happens, an error should have been reported. */ + return 1; + } +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ + + switch (sp_result) { + case SP_OK: + my_ok(thd); + return 0; + case SP_KEY_NOT_FOUND: + int res; + if (lex->if_exists()) + { + res= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SP_DOES_NOT_EXIST, + ER_THD(thd, ER_SP_DOES_NOT_EXIST), + sph->type_str(), + ErrConvDQName(lex->spname).ptr()); + if (res) + return 1; + my_ok(thd); + return 0; + } + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), + sph->type_str(), ErrConvDQName(lex->spname).ptr()); + return 1; + default: + my_error(ER_SP_DROP_FAILED, MYF(0), + sph->type_str(), ErrConvDQName(lex->spname).ptr()); + return 1; + } + +#ifdef WITH_WSREP +wsrep_error_label: + return 1; +#endif +} + +/** + @brief Compare requested privileges with the privileges acquired from the + User- and Db-tables. + @param thd Thread handler + @param want_access The requested access privileges. + @param db A pointer to the Db name. + @param[out] save_priv A pointer to the granted privileges will be stored. + @param grant_internal_info A pointer to the internal grant cache. + @param dont_check_global_grants True if no global grants are checked. + @param no_error True if no errors should be sent to the client. + + 'save_priv' is used to save the User-table (global) and Db-table grants for + the supplied db name. Note that we don't store db level grants if the global + grants is enough to satisfy the request AND the global grants contains a + SELECT grant. + + For internal databases (INFORMATION_SCHEMA, PERFORMANCE_SCHEMA), + additional rules apply, see ACL_internal_schema_access. + + @see check_grant + + @return Status of denial of access by exclusive ACLs. + @retval FALSE Access can't exclusively be denied by Db- and User-table + access unless Column- and Table-grants are checked too. + @retval TRUE Access denied. +*/ + +bool +check_access(THD *thd, privilege_t want_access, + const char *db, privilege_t *save_priv, + GRANT_INTERNAL_INFO *grant_internal_info, + bool dont_check_global_grants, bool no_errors) +{ +#ifdef NO_EMBEDDED_ACCESS_CHECKS + if (save_priv) + *save_priv= GLOBAL_ACLS; + return false; +#else + Security_context *sctx= thd->security_ctx; + privilege_t db_access(NO_ACL); + + /* + GRANT command: + In case of database level grant the database name may be a pattern, + in case of table|column level grant the database name can not be a pattern. + We use 'dont_check_global_grants' as a flag to determine + if it's database level grant command + (see SQLCOM_GRANT case, mysql_execute_command() function) and + set db_is_pattern according to 'dont_check_global_grants' value. + */ + bool db_is_pattern= ((want_access & GRANT_ACL) && dont_check_global_grants); + privilege_t dummy(NO_ACL); + DBUG_ENTER("check_access"); + DBUG_PRINT("enter",("db: %s want_access: %llx master_access: %llx", + db ? db : "", + (longlong) want_access, + (longlong) sctx->master_access)); + + if (save_priv) + *save_priv= NO_ACL; + else + { + save_priv= &dummy; + dummy= NO_ACL; + } + + /* check access may be called twice in a row. Don't change to same stage */ + if (thd->proc_info != stage_checking_permissions.m_name) + THD_STAGE_INFO(thd, stage_checking_permissions); + if (unlikely((!db || !db[0]) && !thd->db.str && !dont_check_global_grants)) + { + DBUG_RETURN(FALSE); // CTE reference or an error later + } + + if (likely((db != NULL) && (db != any_db.str))) + { + /* + Check if this is reserved database, like information schema or + performance schema + */ + const ACL_internal_schema_access *access; + access= get_cached_schema_access(grant_internal_info, db); + if (access) + { + switch (access->check(want_access, save_priv)) + { + case ACL_INTERNAL_ACCESS_GRANTED: + /* + All the privileges requested have been granted internally. + [out] *save_privileges= Internal privileges. + */ + DBUG_RETURN(FALSE); + case ACL_INTERNAL_ACCESS_DENIED: + if (! no_errors) + { + status_var_increment(thd->status_var.access_denied_errors); + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, db); + } + DBUG_RETURN(TRUE); + case ACL_INTERNAL_ACCESS_CHECK_GRANT: + /* + Only some of the privilege requested have been granted internally, + proceed with the remaining bits of the request (want_access). + */ + want_access&= ~(*save_priv); + break; + } + } + } + + if ((sctx->master_access & want_access) == want_access) + { + /* + 1. If we don't have a global SELECT privilege, we have to get the + database specific access rights to be able to handle queries of type + UPDATE t1 SET a=1 WHERE b > 0 + 2. Change db access if it isn't current db which is being addressed + */ + if (!(sctx->master_access & SELECT_ACL)) + { + if (db && (!thd->db.str || db_is_pattern || strcmp(db, thd->db.str))) + { + db_access= acl_get_all3(sctx, db, db_is_pattern); + } + else + { + /* get access for current db */ + db_access= sctx->db_access; + } + /* + The effective privileges are the union of the global privileges + and the intersection of db- and host-privileges, + plus the internal privileges. + */ + *save_priv|= sctx->master_access | db_access; + } + else + *save_priv|= sctx->master_access; + DBUG_RETURN(FALSE); + } + if (unlikely(((want_access & ~sctx->master_access) & ~DB_ACLS) || + (! db && dont_check_global_grants))) + { // We can never grant this + DBUG_PRINT("error",("No possible access")); + if (!no_errors) + { + status_var_increment(thd->status_var.access_denied_errors); + my_error(access_denied_error_code(thd->password), MYF(0), + sctx->priv_user, + sctx->priv_host, + (thd->password ? + ER_THD(thd, ER_YES) : + ER_THD(thd, ER_NO))); /* purecov: tested */ + } + DBUG_RETURN(TRUE); /* purecov: tested */ + } + + if (unlikely(db == any_db.str)) + { + /* + Access granted; Allow select on *any* db. + [out] *save_privileges= 0 + */ + DBUG_RETURN(FALSE); + } + + if (db && (!thd->db.str || db_is_pattern || strcmp(db, thd->db.str))) + db_access= acl_get_all3(sctx, db, db_is_pattern); + else + db_access= sctx->db_access; + DBUG_PRINT("info",("db_access: %llx want_access: %llx", + (longlong) db_access, (longlong) want_access)); + + /* + Save the union of User-table and the intersection between Db-table and + Host-table privileges, with the already saved internal privileges. + */ + db_access= (db_access | sctx->master_access); + *save_priv|= db_access; + + /* + We need to investigate column- and table access if all requested privileges + belongs to the bit set of . + */ + bool need_table_or_column_check= + (want_access & (TABLE_ACLS | PROC_ACLS | db_access)) == want_access; + + /* + Grant access if the requested access is in the intersection of + host- and db-privileges (as retrieved from the acl cache), + also grant access if all the requested privileges are in the union of + TABLES_ACLS and PROC_ACLS; see check_grant. + */ + if ( (db_access & want_access) == want_access || + (!dont_check_global_grants && + need_table_or_column_check)) + { + /* + Ok; but need to check table- and column privileges. + [out] *save_privileges is (User-priv | (Db-priv & Host-priv) | Internal-priv) + */ + DBUG_RETURN(FALSE); + } + + /* + Access is denied; + [out] *save_privileges is (User-priv | (Db-priv & Host-priv) | Internal-priv) + */ + DBUG_PRINT("error",("Access denied")); + if (!no_errors) + { + status_var_increment(thd->status_var.access_denied_errors); + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, + (db ? db : (thd->db.str ? + thd->db.str : + "unknown"))); + } + DBUG_RETURN(TRUE); +#endif // NO_EMBEDDED_ACCESS_CHECKS +} + + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +/** + Check grants for commands which work only with one table. + + @param thd Thread handler + @param privilege requested privilege + @param tables global table list of query + @param no_errors FALSE/TRUE - report/don't report error to + the client (using my_error() call). + + @retval + 0 OK + @retval + 1 access denied, error is sent to client +*/ + +bool check_single_table_access(THD *thd, privilege_t privilege, + TABLE_LIST *tables, bool no_errors) +{ + if (tables->derived) + return 0; + + Switch_to_definer_security_ctx backup_sctx(thd, tables); + + const char *db_name; + if ((tables->view || tables->field_translation) && !tables->schema_table) + db_name= tables->view_db.str; + else + db_name= tables->db.str; + + if (check_access(thd, privilege, db_name, &tables->grant.privilege, + &tables->grant.m_internal, 0, no_errors)) + return 1; + + /* Show only 1 table for check_grant */ + if (!(tables->belong_to_view && + (thd->lex->sql_command == SQLCOM_SHOW_FIELDS)) && + check_grant(thd, privilege, tables, FALSE, 1, no_errors)) + return 1; + + return 0; +} + +/** + Check grants for commands which work only with one table and all other + tables belonging to subselects or implicitly opened tables. + + @param thd Thread handler + @param privilege requested privilege + @param all_tables global table list of query + + @retval + 0 OK + @retval + 1 access denied, error is sent to client +*/ + +bool check_one_table_access(THD *thd, privilege_t privilege, + TABLE_LIST *all_tables) +{ + if (check_single_table_access (thd,privilege,all_tables, FALSE)) + return 1; + + /* Check rights on tables of subselects and implictly opened tables */ + TABLE_LIST *subselects_tables, *view= all_tables->view ? all_tables : 0; + if ((subselects_tables= all_tables->next_global)) + { + /* + Access rights asked for the first table of a view should be the same + as for the view + */ + if (view && subselects_tables->belong_to_view == view) + { + if (check_single_table_access (thd, privilege, subselects_tables, FALSE)) + return 1; + subselects_tables= subselects_tables->next_global; + } + if (subselects_tables && + (check_table_access(thd, SELECT_ACL, subselects_tables, FALSE, + UINT_MAX, FALSE))) + return 1; + } + return 0; +} + + +static bool check_show_access(THD *thd, TABLE_LIST *table) +{ + /* + This is a SHOW command using an INFORMATION_SCHEMA table. + check_access() has not been called for 'table', + and SELECT is currently always granted on the I_S, so we automatically + grant SELECT on table here, to bypass a call to check_access(). + Note that not calling check_access(table) is an optimization, + which needs to be revisited if the INFORMATION_SCHEMA does + not always automatically grant SELECT but use the grant tables. + See Bug#38837 need a way to disable information_schema for security + */ + table->grant.privilege= SELECT_ACL; + + switch (get_schema_table_idx(table->schema_table)) { + case SCH_SCHEMATA: + return (specialflag & SPECIAL_SKIP_SHOW_DB) && + check_global_access(thd, SHOW_DB_ACL); + + case SCH_TABLE_NAMES: + case SCH_TABLES: + case SCH_VIEWS: + case SCH_TRIGGERS: + case SCH_EVENTS: + { + const char *dst_db_name= table->schema_select_lex->db.str; + + DBUG_ASSERT(dst_db_name); + + if (check_access(thd, SELECT_ACL, dst_db_name, + &thd->col_access, NULL, FALSE, FALSE)) + return TRUE; + + if (!thd->col_access && check_grant_db(thd, dst_db_name)) + { + status_var_increment(thd->status_var.access_denied_errors); + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + thd->security_ctx->priv_user, + thd->security_ctx->priv_host, + dst_db_name); + return TRUE; + } + + return FALSE; + } + + case SCH_COLUMNS: + case SCH_STATISTICS: + { + TABLE_LIST *dst_table; + dst_table= table->schema_select_lex->table_list.first; + + DBUG_ASSERT(dst_table); + + /* + Open temporary tables to be able to detect them during privilege check. + */ + if (thd->open_temporary_tables(dst_table)) + return TRUE; + + if (check_access(thd, SELECT_ACL, dst_table->db.str, + &dst_table->grant.privilege, + &dst_table->grant.m_internal, + FALSE, FALSE)) + return TRUE; /* Access denied */ + + thd->col_access= dst_table->grant.privilege; // for sql_show.cc + /* + Check_grant will grant access if there is any column privileges on + all of the tables thanks to the fourth parameter (bool show_table). + */ + if (check_grant(thd, SELECT_ACL, dst_table, TRUE, 1, FALSE)) + return TRUE; /* Access denied */ + + close_thread_tables(thd); + dst_table->table= NULL; + + /* Access granted */ + return FALSE; + } + default: + break; + } + + return FALSE; +} + + + +/** + @brief Check if the requested privileges exists in either User-, Host- or + Db-tables. + @param thd Thread context + @param requirements Privileges requested + @param tables List of tables to be compared against + @param any_combination_of_privileges_will_do TRUE if any privileges on any + column combination is enough. + @param number Only the first 'number' tables in the linked list are + relevant. + @param no_errors Don't report error to the client (using my_error() call). + + The suppled table list contains cached privileges. This functions calls the + help functions check_access and check_grant to verify the first three steps + in the privileges check queue: + 1. Global privileges + 2. OR (db privileges AND host privileges) + 3. OR table privileges + 4. OR column privileges (not checked by this function!) + 5. OR routine privileges (not checked by this function!) + + @see check_access + @see check_grant + + @note This functions assumes that table list used and + thd->lex->query_tables_own_last value correspond to each other + (the latter should be either 0 or point to next_global member + of one of elements of this table list). + + @return + @retval FALSE OK + @retval TRUE Access denied; But column or routine privileges might need to + be checked also. +*/ + +bool +check_table_access(THD *thd, privilege_t requirements, TABLE_LIST *tables, + bool any_combination_of_privileges_will_do, + uint number, bool no_errors) +{ + TABLE_LIST *org_tables= tables; + TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table(); + uint i= 0; + /* + The check that first_not_own_table is not reached is for the case when + the given table list refers to the list for prelocking (contains tables + of other queries). For simple queries first_not_own_table is 0. + */ + for (; i < number && tables != first_not_own_table && tables; + tables= tables->next_global, i++) + { + TABLE_LIST *const table_ref= tables->correspondent_table ? + tables->correspondent_table : tables; + Switch_to_definer_security_ctx backup_ctx(thd, table_ref); + + privilege_t want_access(requirements); + + /* + Register access for view underlying table. + Remove SHOW_VIEW_ACL, because it will be checked during making view + */ + table_ref->grant.orig_want_privilege= (want_access & ~SHOW_VIEW_ACL); + + if (table_ref->schema_table_reformed) + { + if (check_show_access(thd, table_ref)) + return 1; + continue; + } + + DBUG_PRINT("info", ("derived: %d view: %d", table_ref->derived != 0, + table_ref->view != 0)); + + if (table_ref->is_anonymous_derived_table()) + continue; + + if (table_ref->sequence) + { + /* We want to have either SELECT or INSERT rights to sequences depending + on how they are accessed + */ + want_access= ((table_ref->lock_type >= TL_FIRST_WRITE) ? + INSERT_ACL : SELECT_ACL); + } + + if (check_access(thd, want_access, + table_ref->get_db_name(), + &table_ref->grant.privilege, + &table_ref->grant.m_internal, + 0, no_errors)) + return 1; + } + return check_grant(thd,requirements,org_tables, + any_combination_of_privileges_will_do, + number, no_errors); +} + + +bool +check_routine_access(THD *thd, privilege_t want_access, const LEX_CSTRING *db, + const LEX_CSTRING *name, + const Sp_handler *sph, bool no_errors) +{ + TABLE_LIST tables[1]; + + bzero((char *)tables, sizeof(TABLE_LIST)); + tables->db= *db; + tables->table_name= tables->alias= *name; + + /* + The following test is just a shortcut for check_access() (to avoid + calculating db_access) under the assumption that it's common to + give persons global right to execute all stored SP (but not + necessary to create them). + Note that this effectively bypasses the ACL_internal_schema_access checks + that are implemented for the INFORMATION_SCHEMA and PERFORMANCE_SCHEMA, + which are located in check_access(). + Since the I_S and P_S do not contain routines, this bypass is ok, + as long as this code path is not abused to create routines. + The assert enforce that. + */ + DBUG_ASSERT((want_access & CREATE_PROC_ACL) == NO_ACL); + if ((thd->security_ctx->master_access & want_access) == want_access) + tables->grant.privilege= want_access; + else if (check_access(thd, want_access, db->str, + &tables->grant.privilege, + &tables->grant.m_internal, + 0, no_errors)) + return TRUE; + + return check_grant_routine(thd, want_access, tables, sph, no_errors); +} + + +/** + Check if the routine has any of the routine privileges. + + @param thd Thread handler + @param db Database name + @param name Routine name + + @retval + 0 ok + @retval + 1 error +*/ + +bool check_some_routine_access(THD *thd, const char *db, const char *name, + const Sp_handler *sph) +{ + privilege_t save_priv(NO_ACL); + /* + The following test is just a shortcut for check_access() (to avoid + calculating db_access) + Note that this effectively bypasses the ACL_internal_schema_access checks + that are implemented for the INFORMATION_SCHEMA and PERFORMANCE_SCHEMA, + which are located in check_access(). + Since the I_S and P_S do not contain routines, this bypass is ok, + as it only opens SHOW_PROC_ACLS. + */ + if (thd->security_ctx->master_access & SHOW_PROC_ACLS) + return FALSE; + if (!check_access(thd, SHOW_PROC_ACLS, db, &save_priv, NULL, 0, 1) || + (save_priv & SHOW_PROC_ACLS)) + return FALSE; + return check_routine_level_acl(thd, db, name, sph); +} + + +/* + Check if the given table has any of the asked privileges + + @param thd Thread handler + @param want_access Bitmap of possible privileges to check for + + @retval + 0 ok + @retval + 1 error +*/ + +bool check_some_access(THD *thd, privilege_t want_access, TABLE_LIST *table) +{ + DBUG_ENTER("check_some_access"); + + for (ulonglong bit= 1; bit < (ulonglong) want_access ; bit<<= 1) + { + if (bit & want_access) + { + privilege_t access= ALL_KNOWN_ACL & bit; + if (!check_access(thd, access, table->db.str, + &table->grant.privilege, + &table->grant.m_internal, + 0, 1) && + !check_grant(thd, access, table, FALSE, 1, TRUE)) + DBUG_RETURN(0); + } + } + DBUG_PRINT("exit",("no matching access rights")); + DBUG_RETURN(1); +} + +#endif /*NO_EMBEDDED_ACCESS_CHECKS*/ + + +/** + check for global access and give descriptive error message if it fails. + + @param thd Thread handler + @param want_access Use should have any of these global rights + + @warning + Starting from 10.5.2 only one bit is allowed in want_access. + Access denied error is returned if want_access has multiple bits set. + + @retval + 0 ok + @retval + 1 Access denied. In this case an error is sent to the client +*/ + +bool check_global_access(THD *thd, privilege_t want_access, bool no_errors) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + char command[128]; + if (thd->security_ctx->master_access & want_access) + return 0; + if (unlikely(!no_errors)) + { + get_privilege_desc(command, sizeof(command), want_access); + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command); + } + status_var_increment(thd->status_var.access_denied_errors); + return 1; +#else + return 0; +#endif +} + + +/** + Checks foreign key's parent table access. + + @param thd [in] Thread handler + @param create_info [in] Create information (like MAX_ROWS, ENGINE or + temporary table flag) + @param alter_info [in] Initial list of columns and indexes for the + table to be created + @param create_db [in] Database of the created table + + @retval + false ok. + @retval + true error or access denied. Error is sent to client in this case. +*/ +bool check_fk_parent_table_access(THD *thd, + HA_CREATE_INFO *create_info, + Alter_info *alter_info, + const char* create_db) +{ + Key *key; + List_iterator key_iterator(alter_info->key_list); + + while ((key= key_iterator++)) + { + if (key->type == Key::FOREIGN_KEY) + { + TABLE_LIST parent_table; + Foreign_key *fk_key= (Foreign_key *)key; + LEX_CSTRING db_name; + LEX_CSTRING table_name= { fk_key->ref_table.str, + fk_key->ref_table.length }; + const privilege_t privileges(COL_DML_ACLS | REFERENCES_ACL); + + // Check if tablename is valid or not. + DBUG_ASSERT(table_name.str != NULL); + if (check_table_name(table_name.str, table_name.length, false)) + { + my_error(ER_WRONG_TABLE_NAME, MYF(0), table_name.str); + return true; + } + + if (fk_key->ref_db.str) + { + if (!(db_name.str= (char *) thd->memdup(fk_key->ref_db.str, + fk_key->ref_db.length+1))) + return true; + db_name.length= fk_key->ref_db.length; + + // Check if database name is valid or not. + if (check_db_name((LEX_STRING*) &db_name)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db_name.str); + return true; + } + } + else + { + if (!thd->db.str) + { + DBUG_ASSERT(create_db); + db_name.length= strlen(create_db); + if (!(db_name.str= (char *) thd->memdup(create_db, + db_name.length+1))) + return true; + + if (check_db_name((LEX_STRING*) &db_name)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db_name.str); + return true; + } + } + else + { + if (thd->lex->copy_db_to(&db_name)) + return true; + } + } + + // if lower_case_table_names is set then convert tablename to lower case. + if (lower_case_table_names) + { + char *name; + table_name.str= name= (char *) thd->memdup(fk_key->ref_table.str, + fk_key->ref_table.length+1); + table_name.length= my_casedn_str(files_charset_info, name); + db_name.length= my_casedn_str(files_charset_info, (char*) db_name.str); + } + + parent_table.init_one_table(&db_name, &table_name, 0, TL_IGNORE); + + /* + Check if user has any of the "privileges" at table level on + "parent_table". + Having privilege on any of the parent_table column is not + enough so checking whether user has any of the "privileges" + at table level only here. + */ + if (check_some_access(thd, privileges, &parent_table) || + parent_table.grant.want_privilege) + { + my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), + "REFERENCES", + thd->security_ctx->priv_user, + thd->security_ctx->host_or_ip, + db_name.str, table_name.str); + return true; + } + } + } + + return false; +} + + +/**************************************************************************** + Check stack size; Send error if there isn't enough stack to continue +****************************************************************************/ + + +#ifndef DBUG_OFF +long max_stack_used; +#endif + +/** + @note + Note: The 'buf' parameter is necessary, even if it is unused here. + - fix_fields functions has a "dummy" buffer large enough for the + corresponding exec. (Thus we only have to check in fix_fields.) + - Passing to check_stack_overrun() prevents the compiler from removing it. +*/ + +bool +#if defined __GNUC__ && !defined __clang__ +/* + Do not optimize the function in order to preserve a stack variable creation. + Otherwise, the variable pointed as "buf" can be removed due to a missing + usage. + */ +__attribute__((optimize("-O0"))) +#endif +check_stack_overrun(THD *thd, long margin, uchar *buf __attribute__((unused))) +{ + long stack_used; + DBUG_ASSERT(thd == current_thd); + if ((stack_used= available_stack_size(thd->thread_stack, &stack_used)) >= + (long) (my_thread_stack_size - margin)) + { + thd->is_fatal_error= 1; + /* + Do not use stack for the message buffer to ensure correct + behaviour in cases we have close to no stack left. + */ + char* ebuff= new char[MYSQL_ERRMSG_SIZE]; + if (ebuff) { + my_snprintf(ebuff, MYSQL_ERRMSG_SIZE, ER_THD(thd, ER_STACK_OVERRUN_NEED_MORE), + stack_used, my_thread_stack_size, margin); + my_message(ER_STACK_OVERRUN_NEED_MORE, ebuff, MYF(ME_FATAL)); + delete [] ebuff; + } + return 1; + } +#ifndef DBUG_OFF + max_stack_used= MY_MAX(max_stack_used, stack_used); +#endif + return 0; +} + + +#define MY_YACC_INIT 1000 // Start with big alloc +#define MY_YACC_MAX 32000 // Because of 'short' + +bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, size_t *yystacksize) +{ + Yacc_state *state= & current_thd->m_parser_state->m_yacc; + size_t old_info=0; + DBUG_ASSERT(state); + if ( *yystacksize >= MY_YACC_MAX) + return 1; + if (!state->yacc_yyvs) + old_info= *yystacksize; + *yystacksize= set_zone((int)(*yystacksize)*2,MY_YACC_INIT,MY_YACC_MAX); + if (!(state->yacc_yyvs= (uchar*) + my_realloc(key_memory_bison_stack, state->yacc_yyvs, + *yystacksize*sizeof(**yyvs), + MYF(MY_ALLOW_ZERO_PTR | MY_FREE_ON_ERROR))) || + !(state->yacc_yyss= (uchar*) + my_realloc(key_memory_bison_stack, state->yacc_yyss, + *yystacksize*sizeof(**yyss), + MYF(MY_ALLOW_ZERO_PTR | MY_FREE_ON_ERROR)))) + return 1; + if (old_info) + { + /* + Only copy the old stack on the first call to my_yyoverflow(), + when replacing a static stack (YYINITDEPTH) by a dynamic stack. + For subsequent calls, my_realloc already did preserve the old stack. + */ + memcpy(state->yacc_yyss, *yyss, old_info*sizeof(**yyss)); + memcpy(state->yacc_yyvs, *yyvs, old_info*sizeof(**yyvs)); + } + *yyss= (short*) state->yacc_yyss; + *yyvs= (YYSTYPE*) state->yacc_yyvs; + return 0; +} + + +/** + Reset the part of THD responsible for the state of command + processing. + + @param do_clear_error Set if we should clear errors + + This needs to be called before execution of every statement + (prepared or conventional). It is not called by substatements of + routines. + + @todo Call it after we use THD for queries, not before. +*/ + +void THD::reset_for_next_command(bool do_clear_error) +{ + DBUG_ENTER("THD::reset_for_next_command"); + DBUG_ASSERT(!spcont); /* not for substatements of routines */ + DBUG_ASSERT(!in_sub_stmt); + /* + Table maps should have been reset after previous statement except in the + case where we have locked tables + */ + DBUG_ASSERT(binlog_table_maps == 0 || + locked_tables_mode == LTM_LOCK_TABLES); + + if (likely(do_clear_error)) + { + clear_error(1); + /* + The following variable can't be reset in clear_error() as + clear_error() is called during auto_repair of table + */ + error_printed_to_log= 0; + } + free_list= 0; + /* + We also assign stmt_lex in lex_start(), but during bootstrap this + code is executed first. + */ + DBUG_ASSERT(lex == &main_lex); + main_lex.stmt_lex= &main_lex; main_lex.current_select_number= 0; + /* + Those two lines below are theoretically unneeded as + THD::cleanup_after_query() should take care of this already. + */ + auto_inc_intervals_in_cur_stmt_for_binlog.empty(); + stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; + +#ifdef WITH_WSREP + /* + Autoinc variables should be adjusted only for locally executed + transactions. Appliers and replayers are either processing ROW + events or get autoinc variable values from Query_log_event and + mysql slave may be processing STATEMENT format events, but he should + use autoinc values passed in binlog events, not the values forced by + the cluster. + */ + if (WSREP_NNULL(this) && wsrep_thd_is_local(this) && + !slave_thread && wsrep_auto_increment_control) + { + variables.auto_increment_offset= + global_system_variables.auto_increment_offset; + variables.auto_increment_increment= + global_system_variables.auto_increment_increment; + } +#endif /* WITH_WSREP */ + + used= 0; + is_fatal_error= 0; + variables.option_bits&= ~OPTION_BINLOG_THIS_STMT; + + /* + Clear the status flag that are expected to be cleared at the + beginning of each SQL statement. + */ + server_status&= ~SERVER_STATUS_CLEAR_SET; + /* + If in autocommit mode and not in a transaction, reset + OPTION_STATUS_NO_TRANS_UPDATE | OPTION_BINLOG_THIS_TRX to not get warnings + in ha_rollback_trans() about some tables couldn't be rolled back. + */ + if (!in_multi_stmt_transaction_mode()) + { + variables.option_bits&= ~OPTION_BINLOG_THIS_TRX; + transaction->all.reset(); + } + DBUG_ASSERT(security_ctx== &main_security_ctx); + + if (opt_bin_log) + reset_dynamic(&user_var_events); + DBUG_ASSERT(user_var_events_alloc == &main_mem_root); + enable_slow_log= true; + get_stmt_da()->reset_for_next_command(); + m_sent_row_count= m_examined_row_count= 0; + accessed_rows_and_keys= 0; + + reset_slow_query_state(); + + reset_current_stmt_binlog_format_row(); + binlog_unsafe_warning_flags= 0; + + save_prep_leaf_list= false; + +#ifdef WITH_WSREP +#if !defined(DBUG_OFF) + if (mysql_bin_log.is_open()) +#endif +#endif + DBUG_PRINT("debug", + ("is_current_stmt_binlog_format_row(): %d", + is_current_stmt_binlog_format_row())); + + DBUG_VOID_RETURN; +} + + +/** + Used to allocate a new SELECT_LEX object on the current thd mem_root and + link it into the relevant lists. + + This function is always followed by mysql_init_select. + + @see mysql_init_select + + @retval TRUE An error occurred + @retval FALSE The new SELECT_LEX was successfully allocated. +*/ + +bool +mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *select_lex) +{ + THD *thd= lex->thd; + bool new_select= select_lex == NULL; + int old_nest_level= lex->current_select->nest_level; + DBUG_ENTER("mysql_new_select"); + + if (new_select) + { + if (!(select_lex= new (thd->mem_root) SELECT_LEX())) + DBUG_RETURN(1); + select_lex->select_number= ++thd->lex->stmt_lex->current_select_number; + select_lex->parent_lex= lex; /* Used in init_query. */ + select_lex->init_query(); + select_lex->init_select(); + } + select_lex->nest_level_base= &thd->lex->unit; + if (move_down) + { + lex->nest_level++; + if (select_lex->set_nest_level(old_nest_level + 1)) + DBUG_RETURN(1); + SELECT_LEX_UNIT *unit; + /* first select_lex of subselect or derived table */ + if (!(unit= lex->alloc_unit())) + DBUG_RETURN(1); + + unit->include_down(lex->current_select); + unit->return_to= lex->current_select; + select_lex->include_down(unit); + /* + By default we assume that it is usual subselect and we have outer name + resolution context, if no we will assign it to 0 later + */ + select_lex->context.outer_context= &select_lex->outer_select()->context; + } + else + { + bool const outer_most= (lex->current_select->master_unit() == &lex->unit); + if (outer_most && lex->result) + { + my_error(ER_WRONG_USAGE, MYF(0), "UNION", "INTO"); + DBUG_RETURN(TRUE); + } + + /* + This type of query is not possible in the grammar: + SELECT 1 FROM t1 PROCEDURE ANALYSE() UNION ... ; + + But this type of query is still possible: + (SELECT 1 FROM t1 PROCEDURE ANALYSE()) UNION ... ; + and it's not easy to disallow this grammatically, + because there can be any parenthesis nest level: + (((SELECT 1 FROM t1 PROCEDURE ANALYSE()))) UNION ... ; + */ + if (lex->proc_list.elements!=0) + { + my_error(ER_WRONG_USAGE, MYF(0), "UNION", + "SELECT ... PROCEDURE ANALYSE()"); + DBUG_RETURN(TRUE); + } + + SELECT_LEX_NODE *save_slave= select_lex->slave; + select_lex->include_neighbour(lex->current_select); + select_lex->slave= save_slave; + SELECT_LEX_UNIT *unit= select_lex->master_unit(); + if (select_lex->set_nest_level(old_nest_level)) + DBUG_RETURN(1); + if (!unit->fake_select_lex && unit->add_fake_select_lex(lex->thd)) + DBUG_RETURN(1); + select_lex->context.outer_context= + unit->first_select()->context.outer_context; + } + + if (new_select) + select_lex->include_global((st_select_lex_node**)&lex->all_selects_list); + lex->current_select= select_lex; + /* + in subquery is SELECT query and we allow resolution of names in SELECT + list + */ + select_lex->context.resolve_in_select_list= TRUE; + DBUG_RETURN(0); +} + +/** + Create a select to return the same output as 'SELECT @@var_name'. + + Used for SHOW COUNT(*) [ WARNINGS | ERROR]. + + This will crash with a core dump if the variable doesn't exists. + + @param var_name Variable name +*/ + +void create_select_for_variable(THD *thd, LEX_CSTRING *var_name) +{ + LEX *lex; + Item *var; + char buff[MAX_SYS_VAR_LENGTH*2+4+8], *end; + DBUG_ENTER("create_select_for_variable"); + + lex= thd->lex; + lex->init_select(); + lex->sql_command= SQLCOM_SELECT; + /* + We set the name of Item to @@session.var_name because that then is used + as the column name in the output. + */ + if ((var= get_system_var(thd, OPT_SESSION, var_name, &null_clex_str))) + { + end= strxmov(buff, "@@session.", var_name->str, NullS); + var->set_name(thd, buff, (uint)(end-buff), system_charset_info); + add_item_to_list(thd, var); + } + DBUG_VOID_RETURN; +} + + +void mysql_init_multi_delete(LEX *lex) +{ + lex->sql_command= SQLCOM_DELETE_MULTI; + lex->init_select(); + lex->first_select_lex()->limit_params.clear(); + lex->unit.lim.clear(); + lex->first_select_lex()->table_list. + save_and_clear(&lex->auxiliary_table_list); + lex->query_tables= 0; + lex->query_tables_last= &lex->query_tables; +} + +#ifdef WITH_WSREP +static void wsrep_prepare_for_autocommit_retry(THD* thd, + char* rawbuf, + uint length, + Parser_state* parser_state) +{ + thd->clear_error(); + close_thread_tables(thd); + thd->wsrep_retry_counter++; // grow + wsrep_copy_query(thd); + thd->set_time(); + parser_state->reset(rawbuf, length); + + /* PSI end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + /* DTRACE end */ + if (MYSQL_QUERY_DONE_ENABLED()) + { + MYSQL_QUERY_DONE(thd->is_error()); + } + + /* SHOW PROFILE end */ +#if defined(ENABLED_PROFILING) + thd->profiling.finish_current_query(); +#endif + + /* SHOW PROFILE begin */ +#if defined(ENABLED_PROFILING) + thd->profiling.start_new_query("continuing"); + thd->profiling.set_query_source(rawbuf, length); +#endif + + /* DTRACE begin */ + MYSQL_QUERY_START(rawbuf, thd->thread_id, + thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip); + + /* Performance Schema Interface instrumentation, begin */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[thd->get_command()].m_key); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), + thd->query_length()); + + DBUG_ASSERT(thd->wsrep_trx().active() == false); + thd->wsrep_cs().reset_error(); + thd->set_query_id(next_query_id()); +} + +static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state) +{ + bool is_autocommit= + !thd->in_multi_stmt_transaction_mode() && + !thd->wsrep_applier; + bool retry_autocommit; + do + { + retry_autocommit= false; + mysql_parse(thd, rawbuf, length, parser_state); + + /* + Convert all ER_QUERY_INTERRUPTED errors to ER_LOCK_DEADLOCK + if the transaction was BF aborted. This can happen when the + transaction is being BF aborted via thd->awake() while it is + still executing. + + Note that this must be done before wsrep_after_statement() call + since it clears the transaction for autocommit queries. + */ + if (((thd->get_stmt_da()->is_error() && + thd->get_stmt_da()->sql_errno() == ER_QUERY_INTERRUPTED) || + !thd->get_stmt_da()->is_set()) && + thd->wsrep_trx().bf_aborted()) + { + WSREP_DEBUG("overriding error: %d with DEADLOCK", + (thd->get_stmt_da()->is_error()) ? + thd->get_stmt_da()->sql_errno() : 0); + + thd->reset_kill_query(); + wsrep_override_error(thd, ER_LOCK_DEADLOCK); + } + +#ifdef ENABLED_DEBUG_SYNC + /* we need the test otherwise we get stuck in the "SET DEBUG_SYNC" itself */ + if (thd->lex->sql_command != SQLCOM_SET_OPTION) + DEBUG_SYNC(thd, "wsrep_after_statement_enter"); +#endif + + if (wsrep_after_statement(thd) && + is_autocommit && + thd_is_connection_alive(thd)) + { + thd->reset_for_next_command(); + thd->reset_kill_query(); + if (is_autocommit && + thd->lex->sql_command != SQLCOM_SELECT && + thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit) + { +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_retry_autocommit", + { + const char act[]= + "now " + "SIGNAL wsrep_retry_autocommit_reached " + "WAIT_FOR wsrep_retry_autocommit_continue"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + }); +#endif + WSREP_DEBUG("wsrep retrying AC query: %lu %s", + thd->wsrep_retry_counter, + wsrep_thd_query(thd)); + wsrep_prepare_for_autocommit_retry(thd, rawbuf, length, parser_state); + if (thd->lex->explain) + delete_explain_query(thd->lex); + retry_autocommit= true; + } + else + { + WSREP_DEBUG("%s, thd: %llu is_AC: %d, retry: %lu - %lu SQL: %s", + wsrep_thd_transaction_state_str(thd), + thd->thread_id, + is_autocommit, + thd->wsrep_retry_counter, + thd->variables.wsrep_retry_autocommit, + wsrep_thd_query(thd)); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + thd->reset_kill_query(); + thd->wsrep_retry_counter= 0; // reset + } + } + else + { + set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok + } + } while (retry_autocommit); + + if (thd->wsrep_retry_query) + { + WSREP_DEBUG("releasing retry_query: " + "conf %s sent %d kill %d errno %d SQL %s", + wsrep_thd_transaction_state_str(thd), + thd->get_stmt_da()->is_sent(), + thd->killed, + thd->get_stmt_da()->is_error() ? + thd->get_stmt_da()->sql_errno() : 0, + thd->wsrep_retry_query); + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } + return false; +} +#endif /* WITH_WSREP */ + + +/* + When you modify mysql_parse(), you may need to modify + mysql_test_parse_for_slave() in this same file. +*/ + +/** + Parse a query. + + @param thd Current thread + @param rawbuf Begining of the query text + @param length Length of the query text +*/ + +void mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state) +{ + DBUG_ENTER("mysql_parse"); + DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on_MYSQLparse();); + DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on_ORAparse();); + + /* + Warning. + The purpose of query_cache_send_result_to_client() is to lookup the + query in the query cache first, to avoid parsing and executing it. + So, the natural implementation would be to: + - first, call query_cache_send_result_to_client, + - second, if caching failed, initialise the lexical and syntactic parser. + The problem is that the query cache depends on a clean initialization + of (among others) lex->safe_to_cache_query and thd->server_status, + which are reset respectively in + - lex_start() + - THD::reset_for_next_command() + So, initializing the lexical analyser *before* using the query cache + is required for the cache to work properly. + FIXME: cleanup the dependencies in the code to simplify this. + */ + lex_start(thd); + thd->reset_for_next_command(); + + if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0) + { + LEX *lex= thd->lex; + + bool err= parse_sql(thd, parser_state, NULL, true); + + if (likely(!err)) + { + thd->m_statement_psi= + MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + sql_statement_info[thd->lex->sql_command]. + m_key); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (mqh_used && thd->user_connect && + check_mqh(thd, lex->sql_command)) + { + thd->net.error = 0; + } + else +#endif + { + if (likely(! thd->is_error())) + { + const char *found_semicolon= parser_state->m_lip.found_semicolon; + /* + Binlog logs a string starting from thd->query and having length + thd->query_length; so we set thd->query_length correctly (to not + log several statements in one event, when we executed only first). + We set it to not see the ';' (otherwise it would get into binlog + and Query_log_event::print() would give ';;' output). + This also helps display only the current query in SHOW + PROCESSLIST. + */ + if (found_semicolon && (ulong) (found_semicolon - thd->query())) + thd->set_query(thd->query(), + (uint32) (found_semicolon - thd->query() - 1), + thd->charset()); + /* Actually execute the query */ + if (found_semicolon) + { + lex->safe_to_cache_query= 0; + thd->server_status|= SERVER_MORE_RESULTS_EXISTS; + } + lex->set_trg_event_type_for_tables(); + MYSQL_QUERY_EXEC_START(thd->query(), + thd->thread_id, + thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip, + 0); + + int error __attribute__((unused)); + error= mysql_execute_command(thd); + MYSQL_QUERY_EXEC_DONE(error); + } + } + } + else + { + /* Instrument this broken statement as "statement/sql/error" */ + thd->m_statement_psi= + MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + sql_statement_info[SQLCOM_END].m_key); + DBUG_ASSERT(thd->is_error()); + DBUG_PRINT("info",("Command aborted. Fatal_error: %d", + thd->is_fatal_error)); + + query_cache_abort(thd, &thd->query_cache_tls); + } + THD_STAGE_INFO(thd, stage_freeing_items); + sp_cache_enforce_limit(thd->sp_proc_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_func_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_package_spec_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_package_body_cache, stored_program_cache_size); + thd->end_statement(); + thd->Item_change_list::rollback_item_tree_changes(); + thd->cleanup_after_query(); + } + else + { + /* Update statistics for getting the query from the cache */ + thd->lex->sql_command= SQLCOM_SELECT; + thd->m_statement_psi= + MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + sql_statement_info[SQLCOM_SELECT].m_key); + status_var_increment(thd->status_var.com_stat[SQLCOM_SELECT]); + thd->update_stats(); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd)) + { + thd->wsrep_sync_wait_gtid= WSREP_GTID_UNDEFINED; + } +#endif /* WITH_WSREP */ + } + DBUG_VOID_RETURN; +} + + +#ifdef HAVE_REPLICATION +/* + Usable by the replication SQL thread only: just parse a query to know if it + can be ignored because of replicate-*-table rules. + + @retval + 0 cannot be ignored + @retval + 1 can be ignored +*/ + +bool mysql_test_parse_for_slave(THD *thd, char *rawbuf, uint length) +{ + LEX *lex= thd->lex; + bool error= 0; + DBUG_ENTER("mysql_test_parse_for_slave"); + + Parser_state parser_state; + if (likely(!(error= parser_state.init(thd, rawbuf, length)))) + { + lex_start(thd); + thd->reset_for_next_command(); + + if (!parse_sql(thd, & parser_state, NULL, true) && + all_tables_not_ok(thd, lex->first_select_lex()->table_list.first)) + error= 1; /* Ignore question */ + thd->end_statement(); + } + thd->cleanup_after_query(); + DBUG_RETURN(error); +} +#endif + + +bool +add_proc_to_list(THD* thd, Item *item) +{ + ORDER *order; + Item **item_ptr; + + if (unlikely(!(order = (ORDER *) thd->alloc(sizeof(ORDER)+sizeof(Item*))))) + return 1; + item_ptr = (Item**) (order+1); + *item_ptr= item; + order->item=item_ptr; + thd->lex->proc_list.link_in_list(order, &order->next); + return 0; +} + + +/** + save order by and tables in own lists. +*/ + +bool add_to_list(THD *thd, SQL_I_List &list, Item *item,bool asc) +{ + ORDER *order; + DBUG_ENTER("add_to_list"); + if (unlikely(!(order = (ORDER *) thd->alloc(sizeof(ORDER))))) + DBUG_RETURN(1); + order->item_ptr= item; + order->item= &order->item_ptr; + order->direction= (asc ? ORDER::ORDER_ASC : ORDER::ORDER_DESC); + order->used=0; + order->counter_used= 0; + order->fast_field_copier_setup= 0; + list.link_in_list(order, &order->next); + DBUG_RETURN(0); +} + + +/** + Add a table to list of used tables. + + @param table Table to add + @param alias alias for table (or null if no alias) + @param table_options A set of the following bits: + - TL_OPTION_UPDATING : Table will be updated + - TL_OPTION_FORCE_INDEX : Force usage of index + - TL_OPTION_ALIAS : an alias in multi table DELETE + @param lock_type How table should be locked + @param mdl_type Type of metadata lock to acquire on the table. + @param use_index List of indexed used in USE INDEX + @param ignore_index List of indexed used in IGNORE INDEX + + @retval + 0 Error + @retval + \# Pointer to TABLE_LIST element added to the total table list +*/ + +TABLE_LIST *st_select_lex::add_table_to_list(THD *thd, + Table_ident *table, + LEX_CSTRING *alias, + ulong table_options, + thr_lock_type lock_type, + enum_mdl_type mdl_type, + List *index_hints_arg, + List *partition_names, + LEX_STRING *option) +{ + TABLE_LIST *ptr; + TABLE_LIST *UNINIT_VAR(previous_table_ref); /* The table preceding the current one. */ + LEX_CSTRING alias_str; + LEX *lex= thd->lex; + DBUG_ENTER("add_table_to_list"); + DBUG_PRINT("enter", ("Table '%s' (%p) Select %p (%u)", + (alias ? alias->str : table->table.str), + table, + this, select_number)); + DBUG_ASSERT(!is_service_select || (table_options & TL_OPTION_SEQUENCE)); + + if (unlikely(!table)) + DBUG_RETURN(0); // End of memory + alias_str= alias ? *alias : table->table; + DBUG_ASSERT(alias_str.str); + if (!MY_TEST(table_options & TL_OPTION_ALIAS) && + unlikely(check_table_name(table->table.str, table->table.length, FALSE))) + { + my_error(ER_WRONG_TABLE_NAME, MYF(0), table->table.str); + DBUG_RETURN(0); + } + + if (unlikely(table->is_derived_table() == FALSE && table->db.str && + !(table_options & TL_OPTION_TABLE_FUNCTION) && + check_db_name((LEX_STRING*) &table->db))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), table->db.str); + DBUG_RETURN(0); + } + + if (!alias) /* Alias is case sensitive */ + { + if (unlikely(table->sel)) + { + my_message(ER_DERIVED_MUST_HAVE_ALIAS, + ER_THD(thd, ER_DERIVED_MUST_HAVE_ALIAS), MYF(0)); + DBUG_RETURN(0); + } + /* alias_str points to table->table; Let's make a copy */ + if (unlikely(!(alias_str.str= (char*) thd->memdup(alias_str.str, alias_str.length+1)))) + DBUG_RETURN(0); + } + if (unlikely(!(ptr = (TABLE_LIST *) thd->calloc(sizeof(TABLE_LIST))))) + DBUG_RETURN(0); /* purecov: inspected */ + if (table->db.str) + { + ptr->is_fqtn= TRUE; + ptr->db= table->db; + } + else if (!lex->with_cte_resolution && lex->copy_db_to(&ptr->db)) + DBUG_RETURN(0); + else + ptr->is_fqtn= FALSE; + + ptr->alias= alias_str; + ptr->is_alias= alias ? TRUE : FALSE; + if (lower_case_table_names) + { + if (table->table.length) + table->table.length= my_casedn_str(files_charset_info, + (char*) table->table.str); + if (ptr->db.length && ptr->db.str != any_db.str) + ptr->db.length= my_casedn_str(files_charset_info, (char*) ptr->db.str); + } + + ptr->table_name= table->table; + ptr->lock_type= lock_type; + ptr->mdl_type= mdl_type; + ptr->table_options= table_options; + ptr->updating= MY_TEST(table_options & TL_OPTION_UPDATING); + /* TODO: remove TL_OPTION_FORCE_INDEX as it looks like it's not used */ + ptr->force_index= MY_TEST(table_options & TL_OPTION_FORCE_INDEX); + ptr->ignore_leaves= MY_TEST(table_options & TL_OPTION_IGNORE_LEAVES); + ptr->sequence= MY_TEST(table_options & TL_OPTION_SEQUENCE); + ptr->derived= table->sel; + if (!ptr->derived && is_infoschema_db(&ptr->db)) + { + if (ptr->updating && + /* Special cases which are processed by commands itself */ + lex->sql_command != SQLCOM_CHECK && + lex->sql_command != SQLCOM_CHECKSUM) + { + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + thd->security_ctx->priv_user, + thd->security_ctx->priv_host, + INFORMATION_SCHEMA_NAME.str); + DBUG_RETURN(0); + } + ST_SCHEMA_TABLE *schema_table; + schema_table= find_schema_table(thd, &ptr->table_name); + ptr->schema_table_name= ptr->table_name; + ptr->schema_table= schema_table; + } + ptr->select_lex= this; + /* + We can't cache internal temporary tables between prepares as the + table may be deleted before next exection. + */ + ptr->cacheable_table= !table->is_derived_table(); + ptr->index_hints= index_hints_arg; + ptr->option= option ? option->str : 0; + /* check that used name is unique. Sequences are ignored */ + if (lock_type != TL_IGNORE && !ptr->sequence) + { + TABLE_LIST *first_table= table_list.first; + if (lex->sql_command == SQLCOM_CREATE_VIEW) + first_table= first_table ? first_table->next_local : NULL; + for (TABLE_LIST *tables= first_table ; + tables ; + tables=tables->next_local) + { + if (unlikely(!my_strcasecmp(table_alias_charset, alias_str.str, + tables->alias.str) && + (tables->db.str == any_db.str || ptr->db.str == any_db.str || + !cmp(&ptr->db, &tables->db)) && + !tables->sequence)) + { + my_error(ER_NONUNIQ_TABLE, MYF(0), alias_str.str); /* purecov: tested */ + DBUG_RETURN(0); /* purecov: tested */ + } + } + } + /* Store the table reference preceding the current one. */ + if (table_list.elements > 0 && likely(!ptr->sequence)) + { + /* + table_list.next points to the last inserted TABLE_LIST->next_local' + element + We don't use the offsetof() macro here to avoid warnings from gcc + */ + previous_table_ref= (TABLE_LIST*) ((char*) table_list.next - + ((char*) &(ptr->next_local) - + (char*) ptr)); + /* + Set next_name_resolution_table of the previous table reference to point + to the current table reference. In effect the list + TABLE_LIST::next_name_resolution_table coincides with + TABLE_LIST::next_local. Later this may be changed in + store_top_level_join_columns() for NATURAL/USING joins. + */ + previous_table_ref->next_name_resolution_table= ptr; + } + + /* + Link the current table reference in a local list (list for current select). + Notice that as a side effect here we set the next_local field of the + previous table reference to 'ptr'. Here we also add one element to the + list 'table_list'. + We don't store sequences into the local list to hide them from INSERT + and SELECT. + */ + if (likely(!ptr->sequence)) + table_list.link_in_list(ptr, &ptr->next_local); + ptr->next_name_resolution_table= NULL; +#ifdef WITH_PARTITION_STORAGE_ENGINE + ptr->partition_names= partition_names; +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + /* Link table in global list (all used tables) */ + lex->add_to_query_tables(ptr); + + // Pure table aliases do not need to be locked: + if (ptr->db.str && !(table_options & TL_OPTION_ALIAS)) + { + MDL_REQUEST_INIT(&ptr->mdl_request, MDL_key::TABLE, ptr->db.str, + ptr->table_name.str, mdl_type, MDL_TRANSACTION); + } + DBUG_RETURN(ptr); +} + + +/** + Initialize a new table list for a nested join. + + The function initializes a structure of the TABLE_LIST type + for a nested join. It sets up its nested join list as empty. + The created structure is added to the front of the current + join list in the st_select_lex object. Then the function + changes the current nest level for joins to refer to the newly + created empty list after having saved the info on the old level + in the initialized structure. + + @param thd current thread + + @retval + 0 if success + @retval + 1 otherwise +*/ + +bool st_select_lex::init_nested_join(THD *thd) +{ + TABLE_LIST *ptr; + NESTED_JOIN *nested_join; + DBUG_ENTER("init_nested_join"); + + if (unlikely(!(ptr= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ + sizeof(NESTED_JOIN))))) + DBUG_RETURN(1); + nested_join= ptr->nested_join= + ((NESTED_JOIN*) ((uchar*) ptr + ALIGN_SIZE(sizeof(TABLE_LIST)))); + + ptr->embedding= embedding; + ptr->join_list= join_list; + ptr->alias.str="(nested_join)"; + ptr->alias.length= sizeof("(nested_join)")-1; + embedding= ptr; + join_list= &nested_join->join_list; + join_list->empty(); + DBUG_RETURN(0); +} + + +/** + End a nested join table list. + + The function returns to the previous join nest level. + If the current level contains only one member, the function + moves it one level up, eliminating the nest. + + @param thd current thread + + @return + - Pointer to TABLE_LIST element added to the total table list, if success + - 0, otherwise +*/ + +TABLE_LIST *st_select_lex::end_nested_join(THD *thd) +{ + TABLE_LIST *ptr; + NESTED_JOIN *nested_join; + DBUG_ENTER("end_nested_join"); + + DBUG_ASSERT(embedding); + ptr= embedding; + join_list= ptr->join_list; + embedding= ptr->embedding; + nested_join= ptr->nested_join; + if (nested_join->join_list.elements == 1) + { + TABLE_LIST *embedded= nested_join->join_list.head(); + join_list->pop(); + embedded->join_list= join_list; + embedded->embedding= embedding; + join_list->push_front(embedded, thd->mem_root); + ptr= embedded; + embedded->lifted= 1; + } + else if (nested_join->join_list.elements == 0) + { + join_list->pop(); + ptr= 0; // return value + } + DBUG_RETURN(ptr); +} + + +/** + Nest last join operation. + + The function nest last join operation as if it was enclosed in braces. + + @param thd current thread + + @retval + 0 Error + @retval + \# Pointer to TABLE_LIST element created for the new nested join +*/ + +TABLE_LIST *st_select_lex::nest_last_join(THD *thd) +{ + TABLE_LIST *ptr; + NESTED_JOIN *nested_join; + List *embedded_list; + DBUG_ENTER("nest_last_join"); + + TABLE_LIST *head= join_list->head(); + if (head->nested_join && (head->nested_join->nest_type & REBALANCED_NEST)) + { + head= join_list->pop(); + DBUG_RETURN(head); + } + + if (unlikely(!(ptr= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ + sizeof(NESTED_JOIN))))) + DBUG_RETURN(0); + nested_join= ptr->nested_join= + ((NESTED_JOIN*) ((uchar*) ptr + ALIGN_SIZE(sizeof(TABLE_LIST)))); + + ptr->embedding= embedding; + ptr->join_list= join_list; + ptr->alias.str= "(nest_last_join)"; + ptr->alias.length= sizeof("(nest_last_join)")-1; + embedded_list= &nested_join->join_list; + embedded_list->empty(); + nested_join->nest_type= JOIN_OP_NEST; + + for (uint i=0; i < 2; i++) + { + TABLE_LIST *table= join_list->pop(); + if (unlikely(!table)) + DBUG_RETURN(NULL); + table->join_list= embedded_list; + table->embedding= ptr; + embedded_list->push_back(table); + if (table->natural_join) + { + ptr->is_natural_join= TRUE; + /* + If this is a JOIN ... USING, move the list of joined fields to the + table reference that describes the join. + */ + if (prev_join_using) + ptr->join_using_fields= prev_join_using; + } + } + nested_join->used_tables= nested_join->not_null_tables= (table_map) 0; + DBUG_RETURN(ptr); +} + + +/** + Add a table to the current join list. + + The function puts a table in front of the current join list + of st_select_lex object. + Thus, joined tables are put into this list in the reverse order + (the most outer join operation follows first). + + @param table the table to add + + @return + None +*/ + +void st_select_lex::add_joined_table(TABLE_LIST *table) +{ + DBUG_ENTER("add_joined_table"); + join_list->push_front(table, parent_lex->thd->mem_root); + table->join_list= join_list; + table->embedding= embedding; + DBUG_VOID_RETURN; +} + + +/** + @brief + Create a node for JOIN/INNER JOIN/CROSS JOIN/STRAIGHT_JOIN operation + + @param left_op the node for the left operand constructed by the parser + @param right_op the node for the right operand constructed by the parser + @param straight_fl TRUE if STRAIGHT_JOIN is used + + @retval + false on success + true otherwise + + @details + + JOIN operator can be left-associative with other join operators in one + context and right-associative in another context. + + In this query + SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a (Q1) + JOIN is left-associative and the query Q1 is interpreted as + SELECT * FROM (t1 JOIN t2) LEFT JOIN t3 ON t2.a=t3.a. + While in this query + SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a ON t1.b=t2.b (Q2) + JOIN is right-associative and the query Q2 is interpreted as + SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.b=t2.b + + JOIN is right-associative if it is used with ON clause or with USING clause. + Otherwise it is left-associative. + When parsing a join expression with JOIN operator we can't determine + whether this operation left or right associative until either we read the + corresponding ON clause or we reach the end of the expression. This creates + a problem for the parser to build a proper internal representation of the + used join expression. + + For Q1 and Q2 the trees representing the used join expressions look like + + LJ - ON J - ON + / \ / \ + J t3 (TQ1) t1 LJ - ON (TQ2) + / \ / \ + t1 t2 t2 t3 + + To build TQ1 the parser has to reduce the expression for JOIN right after + it has read the reference to t2. To build TQ2 the parser reduces JOIN + when he has read the whole join expression. There is no way to determine + whether an early reduction is needed until the whole join expression is + read. + A solution here is always to do a late reduction. In this case the parser + first builds an incorrect tree TQ1* that has to be rebalanced right after + it has been constructed. + + J LJ - ON + / \ / \ + t1 LJ - ON (TQ1*) => J t3 + / \ / \ + t2 t3 t1 t2 + + Actually the transformation is performed over the nodes t1 and LJ before the + node for J is created in the function st_select_lex::add_cross_joined_table. + The function creates a node for J which replaces the node t2. Then it + attaches the nodes t1 and t2 to this newly created node. The node LJ becomes + the top node of the tree. + + For the query + SELECT * FROM t1 JOIN t2 RIGHT JOIN t3 ON t2.a=t3.a (Q3) + the transformation looks slightly differently because the parser + replaces the RIGHT JOIN tree for an equivalent LEFT JOIN tree. + + J LJ - ON + / \ / \ + t1 LJ - ON (TQ3*) => t3 J + / \ / \ + t3 t2 t1 t2 + + With several left associative JOINs + SELECT * FROM t1 JOIN t2 JOIN t3 LEFT JOIN t4 ON t3.a=t4.a (Q4) + the newly created node for JOIN replaces the left most node of the tree: + + J1 LJ - ON + / \ / \ + t1 J2 J2 t4 + / \ => / \ + t2 LJ - ON J1 t3 + / \ / \ + t3 t4 t1 t2 + + Here's another example: + SELECT * + FROM t1 JOIN t2 LEFT JOIN t3 JOIN t4 ON t3.a=t4.a ON t2.b=t3.b (Q5) + + J LJ - ON + / \ / \ + t1 LJ - ON J J - ON + / \ => / \ / \ + t2 J - ON t1 t2 t3 t4 + / \ + t3 t4 + + If the transformed nested join node node is a natural join node like in + the following query + SELECT * FROM t1 JOIN t2 LEFT JOIN t3 USING(a) (Q6) + the transformation additionally has to take care about setting proper + references in the field natural_join for both operands of the natural + join operation. + + The queries that combine comma syntax for join operation with + JOIN expression require a special care. Consider the query + SELECT * FROM t1, t2 JOIN t3 LEFT JOIN t4 ON t3.a=t4.a (Q7) + This query is equivalent to the query + SELECT * FROM (t1, t2) JOIN t3 LEFT JOIN t4 ON t3.a=t4.a + The latter is transformed in the same way as query Q1 + + J LJ - ON + / \ / \ + (t1,t2) LJ - ON => J t4 + / \ / \ + t3 t4 (t1,t2) t3 + + A transformation similar to the transformation for Q3 is done for + the following query with RIGHT JOIN + SELECT * FROM t1, t2 JOIN t3 RIGHT JOIN t4 ON t3.a=t4.a (Q8) + + J LJ - ON + / \ / \ + t3 LJ - ON => t4 J + / \ / \ + t4 (t1,t2) (t1,t2) t3 + + The function also has to change the name resolution context for ON + expressions used in the transformed join expression to take into + account the tables of the left_op node. + + TODO: + A more elegant solution would be to implement the transformation that + eliminates nests for cross join operations. For Q7 it would work like this: + + J LJ - ON + / \ / \ + (t1,t2) LJ - ON => (t1,t2,t3) t4 + / \ + t3 t4 + + For Q8 with RIGHT JOIN the transformation would work similarly: + + J LJ - ON + / \ / \ + t3 LJ - ON => t4 (t1,t2,t3) + / \ + t4 (t1,t2) + +*/ + +bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op, + TABLE_LIST *right_op, + bool straight_fl) +{ + DBUG_ENTER("add_cross_joined_table"); + THD *thd= parent_lex->thd; + if (!(right_op->nested_join && + (right_op->nested_join->nest_type & JOIN_OP_NEST))) + { + /* + This handles the cases when the right operand is not a nested join. + like in queries + SELECT * FROM t1 JOIN t2; + SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a JOIN t3 + */ + add_joined_table(left_op); + add_joined_table(right_op); + right_op->straight= straight_fl; + DBUG_RETURN(false); + } + + TABLE_LIST *tbl; + List *right_op_jl= right_op->join_list; + TABLE_LIST *cj_nest; + + /* + Create the node NJ for a new nested join for the future inclusion + of left_op in it. Initially the nest is empty. + */ + if (unlikely(!(cj_nest= + (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ + sizeof(NESTED_JOIN))))) + DBUG_RETURN(true); + cj_nest->nested_join= + ((NESTED_JOIN*) ((uchar*) cj_nest + ALIGN_SIZE(sizeof(TABLE_LIST)))); + cj_nest->nested_join->nest_type= JOIN_OP_NEST; + List *cjl= &cj_nest->nested_join->join_list; + cjl->empty(); + + List *jl= &right_op->nested_join->join_list; + DBUG_ASSERT(jl->elements == 2); + /* Look for the left most node tbl of the right_op tree */ + for ( ; ; ) + { + TABLE_LIST *pair_tbl= 0; /* useful only for operands of natural joins */ + + List_iterator li(*jl); + tbl= li++; + + /* Expand name resolution context */ + Name_resolution_context *on_context; + if ((on_context= tbl->on_context)) + { + on_context->first_name_resolution_table= + left_op->first_leaf_for_name_resolution(); + } + + if (!(tbl->outer_join & JOIN_TYPE_RIGHT)) + { + pair_tbl= tbl; + tbl= li++; + } + if (tbl->nested_join && + tbl->nested_join->nest_type & JOIN_OP_NEST) + { + jl= &tbl->nested_join->join_list; + continue; + } + + /* Replace the tbl node in the tree for the newly created NJ node */ + cj_nest->outer_join= tbl->outer_join; + cj_nest->on_expr= tbl->on_expr; + cj_nest->embedding= tbl->embedding; + cj_nest->join_list= jl; + cj_nest->alias.str= "(nest_last_join)"; + cj_nest->alias.length= sizeof("(nest_last_join)")-1; + li.replace(cj_nest); + + /* + If tbl is an operand of a natural join set properly the references + in the fields natural_join for both operands of the operation. + */ + if(tbl->embedding && tbl->embedding->is_natural_join) + { + if (!pair_tbl) + pair_tbl= li++; + pair_tbl->natural_join= cj_nest; + cj_nest->natural_join= pair_tbl; + } + break; + } + + /* Attach tbl as the right operand of NJ */ + if (unlikely(cjl->push_back(tbl, thd->mem_root))) + DBUG_RETURN(true); + tbl->outer_join= 0; + tbl->on_expr= 0; + tbl->straight= straight_fl; + tbl->natural_join= 0; + tbl->embedding= cj_nest; + tbl->join_list= cjl; + + /* Add left_op as the left operand of NJ */ + if (unlikely(cjl->push_back(left_op, thd->mem_root))) + DBUG_RETURN(true); + left_op->embedding= cj_nest; + left_op->join_list= cjl; + + /* + Mark right_op as a rebalanced nested join in order not to + create a new top level nested join node. + */ + right_op->nested_join->nest_type|= REBALANCED_NEST; + if (unlikely(right_op_jl->push_front(right_op))) + DBUG_RETURN(true); + DBUG_RETURN(false); +} + + +/** + Convert a right join into equivalent left join. + + The function takes the current join list t[0],t[1] ... and + effectively converts it into the list t[1],t[0] ... + Although the outer_join flag for the new nested table contains + JOIN_TYPE_RIGHT, it will be handled as the inner table of a left join + operation. + + EXAMPLES + @verbatim + SELECT * FROM t1 RIGHT JOIN t2 ON on_expr => + SELECT * FROM t2 LEFT JOIN t1 ON on_expr + + SELECT * FROM t1,t2 RIGHT JOIN t3 ON on_expr => + SELECT * FROM t1,t3 LEFT JOIN t2 ON on_expr + + SELECT * FROM t1,t2 RIGHT JOIN (t3,t4) ON on_expr => + SELECT * FROM t1,(t3,t4) LEFT JOIN t2 ON on_expr + + SELECT * FROM t1 LEFT JOIN t2 ON on_expr1 RIGHT JOIN t3 ON on_expr2 => + SELECT * FROM t3 LEFT JOIN (t1 LEFT JOIN t2 ON on_expr2) ON on_expr1 + @endverbatim + + @param thd current thread + + @return + - Pointer to the table representing the inner table, if success + - 0, otherwise +*/ + +TABLE_LIST *st_select_lex::convert_right_join() +{ + TABLE_LIST *tab2= join_list->pop(); + TABLE_LIST *tab1= join_list->pop(); + DBUG_ENTER("convert_right_join"); + + join_list->push_front(tab2, parent_lex->thd->mem_root); + join_list->push_front(tab1, parent_lex->thd->mem_root); + tab1->outer_join|= JOIN_TYPE_RIGHT; + + DBUG_RETURN(tab1); +} + + +void st_select_lex::prepare_add_window_spec(THD *thd) +{ + LEX *lex= thd->lex; + save_group_list= group_list; + save_order_list= order_list; + lex->win_ref= NULL; + lex->win_frame= NULL; + lex->frame_top_bound= NULL; + lex->frame_bottom_bound= NULL; + group_list.empty(); + order_list.empty(); +} + +bool st_select_lex::add_window_def(THD *thd, + LEX_CSTRING *win_name, + LEX_CSTRING *win_ref, + SQL_I_List win_partition_list, + SQL_I_List win_order_list, + Window_frame *win_frame) +{ + SQL_I_List *win_part_list_ptr= + new (thd->mem_root) SQL_I_List (win_partition_list); + SQL_I_List *win_order_list_ptr= + new (thd->mem_root) SQL_I_List (win_order_list); + if (!(win_part_list_ptr && win_order_list_ptr)) + return true; + Window_def *win_def= new (thd->mem_root) Window_def(win_name, + win_ref, + win_part_list_ptr, + win_order_list_ptr, + win_frame); + group_list= save_group_list; + order_list= save_order_list; + if (parsing_place != SELECT_LIST) + { + fields_in_window_functions+= win_part_list_ptr->elements + + win_order_list_ptr->elements; + } + win_def->win_spec_number= window_specs.elements; + return (win_def == NULL || window_specs.push_back(win_def)); +} + +bool st_select_lex::add_window_spec(THD *thd, + LEX_CSTRING *win_ref, + SQL_I_List win_partition_list, + SQL_I_List win_order_list, + Window_frame *win_frame) +{ + SQL_I_List *win_part_list_ptr= + new (thd->mem_root) SQL_I_List (win_partition_list); + SQL_I_List *win_order_list_ptr= + new (thd->mem_root) SQL_I_List (win_order_list); + if (!(win_part_list_ptr && win_order_list_ptr)) + return true; + Window_spec *win_spec= new (thd->mem_root) Window_spec(win_ref, + win_part_list_ptr, + win_order_list_ptr, + win_frame); + group_list= save_group_list; + order_list= save_order_list; + if (parsing_place != SELECT_LIST) + { + fields_in_window_functions+= win_part_list_ptr->elements + + win_order_list_ptr->elements; + } + thd->lex->win_spec= win_spec; + win_spec->win_spec_number= window_specs.elements; + return (win_spec == NULL || window_specs.push_back(win_spec)); +} + +/** + Set lock for all tables in current select level. + + @param lock_type Lock to set for tables + @param skip_locked (SELECT {FOR UPDATE/LOCK IN SHARED MODE} SKIP LOCKED) + + @note + If lock is a write lock, then tables->updating is set 1 + This is to get tables_ok to know that the table is updated by the + query +*/ + +void st_select_lex::set_lock_for_tables(thr_lock_type lock_type, bool for_update, + bool skip_locked_arg) +{ + DBUG_ENTER("set_lock_for_tables"); + DBUG_PRINT("enter", ("lock_type: %d for_update: %d skip_locked %d", + lock_type, for_update, skip_locked)); + skip_locked= skip_locked_arg; + for (TABLE_LIST *tables= table_list.first; + tables; + tables= tables->next_local) + { + tables->lock_type= lock_type; + tables->skip_locked= skip_locked; + tables->updating= for_update; + + if (tables->db.length) + tables->mdl_request.set_type((lock_type >= TL_FIRST_WRITE) ? + MDL_SHARED_WRITE : MDL_SHARED_READ); + } + DBUG_VOID_RETURN; +} + + +/** + Create a fake SELECT_LEX for a unit. + + The method create a fake SELECT_LEX object for a unit. + This object is created for any union construct containing a union + operation and also for any single select union construct of the form + @verbatim + (SELECT ... ORDER BY order_list [LIMIT n]) ORDER BY ... + @endvarbatim + or of the form + @varbatim + (SELECT ... ORDER BY LIMIT n) ORDER BY ... + @endvarbatim + + @param thd_arg thread handle + + @note + The object is used to retrieve rows from the temporary table + where the result on the union is obtained. + + @retval + 1 on failure to create the object + @retval + 0 on success +*/ + +bool st_select_lex_unit::add_fake_select_lex(THD *thd_arg) +{ + SELECT_LEX *first_sl= first_select(); + DBUG_ENTER("st_select_lex_unit::add_fake_select_lex"); + DBUG_ASSERT(!fake_select_lex); + + if (!(fake_select_lex= new (thd_arg->mem_root) SELECT_LEX())) + DBUG_RETURN(1); + fake_select_lex->include_standalone(this, + (SELECT_LEX_NODE**)&fake_select_lex); + fake_select_lex->select_number= FAKE_SELECT_LEX_ID; + fake_select_lex->parent_lex= thd_arg->lex; /* Used in init_query. */ + fake_select_lex->make_empty_select(); + fake_select_lex->set_linkage(GLOBAL_OPTIONS_TYPE); + + fake_select_lex->no_table_names_allowed= 1; + + fake_select_lex->context.outer_context=first_sl->context.outer_context; + /* allow item list resolving in fake select for ORDER BY */ + fake_select_lex->context.resolve_in_select_list= TRUE; + fake_select_lex->context.select_lex= fake_select_lex; + + fake_select_lex->nest_level_base= first_select()->nest_level_base; + if (fake_select_lex->set_nest_level(first_select()->nest_level)) + DBUG_RETURN(1); + + if (!is_unit_op()) + { + /* + This works only for + (SELECT ... ORDER BY list [LIMIT n]) ORDER BY order_list [LIMIT m], + (SELECT ... LIMIT n) ORDER BY order_list [LIMIT m] + just before the parser starts processing order_list + */ + fake_select_lex->no_table_names_allowed= 1; + thd_arg->lex->current_select= fake_select_lex; + } + //thd_arg->lex->pop_context("add fake"); + DBUG_RETURN(0); +} + + +/** + Push a new name resolution context for a JOIN ... ON clause to the + context stack of a query block. + + Create a new name resolution context for a JOIN ... ON clause, + set the first and last leaves of the list of table references + to be used for name resolution, and push the newly created + context to the stack of contexts of the query. + + @param thd pointer to current thread + @param left_op left operand of the JOIN + @param right_op rigth operand of the JOIN + + @seealso + push_table_function_arg_context() serves similar purpose for table + functions + + @retval + FALSE if all is OK + @retval + TRUE if a memory allocation error occurred +*/ + +bool +push_new_name_resolution_context(THD *thd, + TABLE_LIST *left_op, TABLE_LIST *right_op) +{ + Name_resolution_context *on_context; + if (!(on_context= new (thd->mem_root) Name_resolution_context)) + return TRUE; + on_context->first_name_resolution_table= + left_op->first_leaf_for_name_resolution(); + on_context->last_name_resolution_table= + right_op->last_leaf_for_name_resolution(); + LEX *lex= thd->lex; + on_context->select_lex = lex->current_select; + st_select_lex *outer_sel= lex->parser_current_outer_select(); + on_context->outer_context = outer_sel ? &outer_sel->context : 0; + return lex->push_context(on_context); +} + + +/** + Fix condition which contains only field (f turns to f <> 0 ) + + @param cond The condition to fix + + @return fixed condition +*/ + +Item *normalize_cond(THD *thd, Item *cond) +{ + if (cond) + { + Item::Type type= cond->type(); + if (type == Item::FIELD_ITEM || type == Item::REF_ITEM) + { + cond= new (thd->mem_root) Item_func_ne(thd, cond, new (thd->mem_root) Item_int(thd, 0)); + } + } + return cond; +} + + +/** + Add an ON condition to the second operand of a JOIN ... ON. + + Add an ON condition to the right operand of a JOIN ... ON clause. + + @param b the second operand of a JOIN ... ON + @param expr the condition to be added to the ON clause + + @retval + FALSE if there was some error + @retval + TRUE if all is OK +*/ + +void add_join_on(THD *thd, TABLE_LIST *b, Item *expr) +{ + if (expr) + { + expr= normalize_cond(thd, expr); + if (!b->on_expr) + b->on_expr= expr; + else + { + /* + If called from the parser, this happens if you have both a + right and left join. If called later, it happens if we add more + than one condition to the ON clause. + */ + b->on_expr= new (thd->mem_root) Item_cond_and(thd, b->on_expr,expr); + } + b->on_expr->top_level_item(); + } +} + + +/** + Mark that there is a NATURAL JOIN or JOIN ... USING between two + tables. + + This function marks that table b should be joined with a either via + a NATURAL JOIN or via JOIN ... USING. Both join types are special + cases of each other, so we treat them together. The function + setup_conds() creates a list of equal condition between all fields + of the same name for NATURAL JOIN or the fields in 'using_fields' + for JOIN ... USING. The list of equality conditions is stored + either in b->on_expr, or in JOIN::conds, depending on whether there + was an outer join. + + EXAMPLE + @verbatim + SELECT * FROM t1 NATURAL LEFT JOIN t2 + <=> + SELECT * FROM t1 LEFT JOIN t2 ON (t1.i=t2.i and t1.j=t2.j ... ) + + SELECT * FROM t1 NATURAL JOIN t2 WHERE + <=> + SELECT * FROM t1, t2 WHERE (t1.i=t2.i and t1.j=t2.j and ) + + SELECT * FROM t1 JOIN t2 USING(j) WHERE + <=> + SELECT * FROM t1, t2 WHERE (t1.j=t2.j and ) + @endverbatim + + @param a Left join argumentex + @param b Right join argument + @param using_fields Field names from USING clause +*/ + +void add_join_natural(TABLE_LIST *a, TABLE_LIST *b, List *using_fields, + SELECT_LEX *lex) +{ + b->natural_join= a; + lex->prev_join_using= using_fields; +} + + +/** + Find a thread by id and return it, locking it LOCK_thd_kill + + @param id Identifier of the thread we're looking for + @param query_id If true, search by query_id instead of thread_id + + @return NULL - not found + pointer - thread found, and its LOCK_thd_kill is locked. +*/ + +struct find_thread_callback_arg +{ + find_thread_callback_arg(longlong id_arg, bool query_id_arg): + thd(0), id(id_arg), query_id(query_id_arg) {} + THD *thd; + longlong id; + bool query_id; +}; + + +static my_bool find_thread_callback(THD *thd, find_thread_callback_arg *arg) +{ + if (arg->id == (arg->query_id ? thd->query_id : (longlong) thd->thread_id)) + { + mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete + arg->thd= thd; + return 1; + } + return 0; +} + + +THD *find_thread_by_id(longlong id, bool query_id) +{ + find_thread_callback_arg arg(id, query_id); + server_threads.iterate(find_thread_callback, &arg); + return arg.thd; +} + + +/** + kill one thread. + + @param thd Thread class + @param id Thread id or query id + @param kill_signal Should it kill the query or the connection + @param type Type of id: thread id or query id +*/ + +uint +kill_one_thread(THD *thd, my_thread_id id, killed_state kill_signal, killed_type type) +{ + THD *tmp; + uint error= (type == KILL_TYPE_QUERY ? ER_NO_SUCH_QUERY : ER_NO_SUCH_THREAD); + DBUG_ENTER("kill_one_thread"); + DBUG_PRINT("enter", ("id: %lld signal: %d", (long long) id, kill_signal)); + tmp= find_thread_by_id(id, type == KILL_TYPE_QUERY); + if (!tmp) + DBUG_RETURN(error); + DEBUG_SYNC(thd, "found_killee"); + if (tmp->get_command() != COM_DAEMON) + { + /* + If we're SUPER, we can KILL anything, including system-threads. + No further checks. + + KILLer: thd->security_ctx->user could in theory be NULL while + we're still in "unauthenticated" state. This is a theoretical + case (the code suggests this could happen, so we play it safe). + + KILLee: tmp->security_ctx->user will be NULL for system threads. + We need to check so Jane Random User doesn't crash the server + when trying to kill a) system threads or b) unauthenticated users' + threads (Bug#43748). + + If user of both killer and killee are non-NULL, proceed with + slayage if both are string-equal. + + It's ok to also kill DELAYED threads with KILL_CONNECTION instead of + KILL_SYSTEM_THREAD; The difference is that KILL_CONNECTION may be + faster and do a harder kill than KILL_SYSTEM_THREAD; + */ + + mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from concurrent usage + +#ifdef WITH_WSREP + if (((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) || + thd->security_ctx->user_matches(tmp->security_ctx)) && + !wsrep_thd_is_BF(tmp, false) && !tmp->wsrep_applier) +#else + if ((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) || + thd->security_ctx->user_matches(tmp->security_ctx)) +#endif /* WITH_WSREP */ + { + { +#ifdef WITH_WSREP + if (WSREP(tmp)) + { + error = wsrep_kill_thd(thd, tmp, kill_signal); + } + else + { +#endif /* WITH_WSREP */ + tmp->awake_no_mutex(kill_signal); + error= 0; +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ + } + } + else + error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : + ER_KILL_DENIED_ERROR); + + mysql_mutex_unlock(&tmp->LOCK_thd_data); + } + mysql_mutex_unlock(&tmp->LOCK_thd_kill); + DBUG_PRINT("exit", ("%u", error)); + DBUG_RETURN(error); +} + + +/** + kill all threads from one user + + @param thd Thread class + @param user_name User name for threads we should kill + @param only_kill_query Should it kill the query or the connection + + @note + If we can't kill all threads because of security issues, no threads + are killed. +*/ + +struct kill_threads_callback_arg +{ + kill_threads_callback_arg(THD *thd_arg, LEX_USER *user_arg): + thd(thd_arg), user(user_arg) {} + THD *thd; + LEX_USER *user; + List threads_to_kill; +}; + + +static my_bool kill_threads_callback(THD *thd, kill_threads_callback_arg *arg) +{ + if (thd->security_ctx->user) + { + /* + Check that hostname (if given) and user name matches. + + host.str[0] == '%' means that host name was not given. See sql_yacc.yy + */ + if (((arg->user->host.str[0] == '%' && !arg->user->host.str[1]) || + !strcmp(thd->security_ctx->host_or_ip, arg->user->host.str)) && + !strcmp(thd->security_ctx->user, arg->user->user.str)) + { + if (!(arg->thd->security_ctx->master_access & + PRIV_KILL_OTHER_USER_PROCESS) && + !arg->thd->security_ctx->user_matches(thd->security_ctx)) + { + return MY_TEST(arg->thd->security_ctx->master_access & PROCESS_ACL); + } + if (!arg->threads_to_kill.push_back(thd, arg->thd->mem_root)) + { + mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete + mysql_mutex_lock(&thd->LOCK_thd_data); + } + } + } + return 0; +} + + +static uint kill_threads_for_user(THD *thd, LEX_USER *user, + killed_state kill_signal, ha_rows *rows) +{ + kill_threads_callback_arg arg(thd, user); + DBUG_ENTER("kill_threads_for_user"); + + *rows= 0; + + if (unlikely(thd->is_fatal_error)) // If we run out of memory + DBUG_RETURN(ER_OUT_OF_RESOURCES); + + DBUG_PRINT("enter", ("user: %s signal: %u", user->user.str, + (uint) kill_signal)); + + if (server_threads.iterate(kill_threads_callback, &arg)) + DBUG_RETURN(ER_KILL_DENIED_ERROR); + + if (!arg.threads_to_kill.is_empty()) + { + List_iterator_fast it2(arg.threads_to_kill); + THD *next_ptr; + THD *ptr= it2++; + do + { + ptr->awake_no_mutex(kill_signal); + /* + Careful here: The list nodes are allocated on the memroots of the + THDs to be awakened. + But those THDs may be terminated and deleted as soon as we release + LOCK_thd_kill, which will make the list nodes invalid. + Since the operation "it++" dereferences the "next" pointer of the + previous list node, we need to do this while holding LOCK_thd_kill. + */ + next_ptr= it2++; + mysql_mutex_unlock(&ptr->LOCK_thd_kill); + mysql_mutex_unlock(&ptr->LOCK_thd_data); + (*rows)++; + } while ((ptr= next_ptr)); + } + DBUG_RETURN(0); +} + + +/** + kills a thread and sends response. + + @param thd Thread class + @param id Thread id or query id + @param state Should it kill the query or the connection + @param type Type of id: thread id or query id +*/ + +static +void sql_kill(THD *thd, my_thread_id id, killed_state state, killed_type type) +{ + uint error; + if (likely(!(error= kill_one_thread(thd, id, state, type)))) + { + if (!thd->killed) + my_ok(thd); + else + thd->send_kill_message(); + } + else + my_error(error, MYF(0), id); +} + + +static void __attribute__ ((noinline)) +sql_kill_user(THD *thd, LEX_USER *user, killed_state state) +{ + uint error; + ha_rows rows; + switch (error= kill_threads_for_user(thd, user, state, &rows)) + { + case 0: + my_ok(thd, rows); + break; + case ER_KILL_DENIED_ERROR: + char buf[DEFINER_LENGTH+1]; + strxnmov(buf, sizeof(buf), user->user.str, "@", user->host.str, NULL); + my_printf_error(ER_KILL_DENIED_ERROR, ER_THD(thd, ER_CANNOT_USER), MYF(0), + "KILL USER", buf); + break; + case ER_OUT_OF_RESOURCES: + default: + my_error(error, MYF(0)); + } +} + + +/** If pointer is not a null pointer, append filename to it. */ + +bool append_file_to_dir(THD *thd, const char **filename_ptr, + const LEX_CSTRING *table_name) +{ + char buff[FN_REFLEN],*ptr, *end; + if (!*filename_ptr) + return 0; // nothing to do + + /* Check that the filename is not too long and it's a hard path */ + if (strlen(*filename_ptr)+table_name->length >= FN_REFLEN-1 || + !test_if_hard_path(*filename_ptr)) + { + my_error(ER_WRONG_TABLE_NAME, MYF(0), *filename_ptr); + return 1; + } + /* Fix is using unix filename format on dos */ + strmov(buff,*filename_ptr); + end=convert_dirname(buff, *filename_ptr, NullS); + if (unlikely(!(ptr= (char*) thd->alloc((size_t) (end-buff) + + table_name->length + 1)))) + return 1; // End of memory + *filename_ptr=ptr; + strxmov(ptr,buff,table_name->str,NullS); + return 0; +} + + +Comp_creator *comp_eq_creator(bool invert) +{ + return invert?(Comp_creator *)&ne_creator:(Comp_creator *)&eq_creator; +} + + +Comp_creator *comp_ge_creator(bool invert) +{ + return invert?(Comp_creator *)<_creator:(Comp_creator *)&ge_creator; +} + + +Comp_creator *comp_gt_creator(bool invert) +{ + return invert?(Comp_creator *)&le_creator:(Comp_creator *)>_creator; +} + + +Comp_creator *comp_le_creator(bool invert) +{ + return invert?(Comp_creator *)>_creator:(Comp_creator *)&le_creator; +} + + +Comp_creator *comp_lt_creator(bool invert) +{ + return invert?(Comp_creator *)&ge_creator:(Comp_creator *)<_creator; +} + + +Comp_creator *comp_ne_creator(bool invert) +{ + return invert?(Comp_creator *)&eq_creator:(Comp_creator *)&ne_creator; +} + + +/** + Construct ALL/ANY/SOME subquery Item. + + @param left_expr pointer to left expression + @param cmp compare function creator + @param all true if we create ALL subquery + @param select_lex pointer on parsed subquery structure + + @return + constructed Item (or 0 if out of memory) +*/ +Item * all_any_subquery_creator(THD *thd, Item *left_expr, + chooser_compare_func_creator cmp, + bool all, + SELECT_LEX *select_lex) +{ + if ((cmp == &comp_eq_creator) && !all) // = ANY <=> IN + return new (thd->mem_root) Item_in_subselect(thd, left_expr, select_lex); + + if ((cmp == &comp_ne_creator) && all) // <> ALL <=> NOT IN + return new (thd->mem_root) Item_func_not(thd, + new (thd->mem_root) Item_in_subselect(thd, left_expr, select_lex)); + + Item_allany_subselect *it= + new (thd->mem_root) Item_allany_subselect(thd, left_expr, cmp, select_lex, + all); + if (all) /* ALL */ + return it->upper_item= new (thd->mem_root) Item_func_not_all(thd, it); + + /* ANY/SOME */ + return it->upper_item= new (thd->mem_root) Item_func_nop_all(thd, it); +} + + +/** + Multi update query pre-check. + + @param thd Thread handler + @param tables Global/local table list (have to be the same) + + @retval + FALSE OK + @retval + TRUE Error +*/ + +bool multi_update_precheck(THD *thd, TABLE_LIST *tables) +{ + TABLE_LIST *table; + LEX *lex= thd->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + DBUG_ENTER("multi_update_precheck"); + + if (select_lex->item_list.elements != lex->value_list.elements) + { + my_message(ER_WRONG_VALUE_COUNT, ER_THD(thd, ER_WRONG_VALUE_COUNT), MYF(0)); + DBUG_RETURN(TRUE); + } + /* + Ensure that we have UPDATE or SELECT privilege for each table + The exact privilege is checked in mysql_multi_update() + */ + for (table= tables; table; table= table->next_local) + { + if (table->is_jtbm()) + continue; + if (table->derived) + table->grant.privilege= SELECT_ACL; + else if ((check_access(thd, UPDATE_ACL, table->db.str, + &table->grant.privilege, + &table->grant.m_internal, + 0, 1) || + check_grant(thd, UPDATE_ACL, table, FALSE, 1, TRUE)) && + (check_access(thd, SELECT_ACL, table->db.str, + &table->grant.privilege, + &table->grant.m_internal, + 0, 0) || + check_grant(thd, SELECT_ACL, table, FALSE, 1, FALSE))) + DBUG_RETURN(TRUE); + + table->grant.orig_want_privilege= NO_ACL; + table->table_in_first_from_clause= 1; + } + /* + Is there tables of subqueries? + */ + if (lex->first_select_lex() != lex->all_selects_list) + { + DBUG_PRINT("info",("Checking sub query list")); + for (table= tables; table; table= table->next_global) + { + if (!table->table_in_first_from_clause) + { + if (check_access(thd, SELECT_ACL, table->db.str, + &table->grant.privilege, + &table->grant.m_internal, + 0, 0) || + check_grant(thd, SELECT_ACL, table, FALSE, 1, FALSE)) + DBUG_RETURN(TRUE); + } + } + } + + DBUG_RETURN(FALSE); +} + +/** + Multi delete query pre-check. + + @param thd Thread handler + @param tables Global/local table list + + @retval + FALSE OK + @retval + TRUE error +*/ + +bool multi_delete_precheck(THD *thd, TABLE_LIST *tables) +{ + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first; + TABLE_LIST **save_query_tables_own_last= thd->lex->query_tables_own_last; + DBUG_ENTER("multi_delete_precheck"); + + /* + Temporary tables are pre-opened in 'tables' list only. Here we need to + initialize TABLE instances in 'aux_tables' list. + */ + for (TABLE_LIST *tl= aux_tables; tl; tl= tl->next_global) + { + if (tl->table) + continue; + + if (tl->correspondent_table) + tl->table= tl->correspondent_table->table; + } + + /* sql_yacc guarantees that tables and aux_tables are not zero */ + DBUG_ASSERT(aux_tables != 0); + if (check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)) + DBUG_RETURN(TRUE); + + /* + Since aux_tables list is not part of LEX::query_tables list we + have to juggle with LEX::query_tables_own_last value to be able + call check_table_access() safely. + */ + thd->lex->query_tables_own_last= 0; + if (check_table_access(thd, DELETE_ACL, aux_tables, FALSE, UINT_MAX, FALSE)) + { + thd->lex->query_tables_own_last= save_query_tables_own_last; + DBUG_RETURN(TRUE); + } + thd->lex->query_tables_own_last= save_query_tables_own_last; + + if ((thd->variables.option_bits & OPTION_SAFE_UPDATES) && !select_lex->where) + { + my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, + ER_THD(thd, ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0)); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + Given a table in the source list, find a correspondent table in the + table references list. + + @param lex Pointer to LEX representing multi-delete. + @param src Source table to match. + @param ref Table references list. + + @remark The source table list (tables listed before the FROM clause + or tables listed in the FROM clause before the USING clause) may + contain table names or aliases that must match unambiguously one, + and only one, table in the target table list (table references list, + after FROM/USING clause). + + @return Matching table, NULL otherwise. +*/ + +static TABLE_LIST *multi_delete_table_match(LEX *lex, TABLE_LIST *tbl, + TABLE_LIST *tables) +{ + TABLE_LIST *match= NULL; + DBUG_ENTER("multi_delete_table_match"); + + for (TABLE_LIST *elem= tables; elem; elem= elem->next_local) + { + int res; + + if (tbl->is_fqtn && elem->is_alias) + continue; /* no match */ + if (tbl->is_fqtn && elem->is_fqtn) + res= (my_strcasecmp(table_alias_charset, tbl->table_name.str, elem->table_name.str) || + cmp(&tbl->db, &elem->db)); + else if (elem->is_alias) + res= my_strcasecmp(table_alias_charset, tbl->alias.str, elem->alias.str); + else + res= (my_strcasecmp(table_alias_charset, tbl->table_name.str, elem->table_name.str) || + cmp(&tbl->db, &elem->db)); + + if (res) + continue; + + if (match) + { + my_error(ER_NONUNIQ_TABLE, MYF(0), elem->alias.str); + DBUG_RETURN(NULL); + } + + match= elem; + } + + if (!match) + my_error(ER_UNKNOWN_TABLE, MYF(0), tbl->table_name.str, "MULTI DELETE"); + + DBUG_RETURN(match); +} + + +/** + Link tables in auxilary table list of multi-delete with corresponding + elements in main table list, and set proper locks for them. + + @param lex pointer to LEX representing multi-delete + + @retval + FALSE success + @retval + TRUE error +*/ + +bool multi_delete_set_locks_and_link_aux_tables(LEX *lex) +{ + TABLE_LIST *tables= lex->first_select_lex()->table_list.first; + TABLE_LIST *target_tbl; + DBUG_ENTER("multi_delete_set_locks_and_link_aux_tables"); + + lex->table_count_update= 0; + + for (target_tbl= lex->auxiliary_table_list.first; + target_tbl; target_tbl= target_tbl->next_local) + { + lex->table_count_update++; + /* All tables in aux_tables must be found in FROM PART */ + TABLE_LIST *walk= multi_delete_table_match(lex, target_tbl, tables); + if (!walk) + DBUG_RETURN(TRUE); + if (!walk->derived) + target_tbl->table_name= walk->table_name; + walk->updating= target_tbl->updating; + walk->lock_type= target_tbl->lock_type; + /* We can assume that tables to be deleted from are locked for write. */ + DBUG_ASSERT(walk->lock_type >= TL_FIRST_WRITE); + walk->mdl_request.set_type(MDL_SHARED_WRITE); + target_tbl->correspondent_table= walk; // Remember corresponding table + } + DBUG_RETURN(FALSE); +} + + +/** + simple UPDATE query pre-check. + + @param thd Thread handler + @param tables Global table list + + @retval + FALSE OK + @retval + TRUE Error +*/ + +bool update_precheck(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("update_precheck"); + if (thd->lex->first_select_lex()->item_list.elements != + thd->lex->value_list.elements) + { + my_message(ER_WRONG_VALUE_COUNT, ER_THD(thd, ER_WRONG_VALUE_COUNT), MYF(0)); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(check_one_table_access(thd, UPDATE_ACL, tables)); +} + + +/** + simple DELETE query pre-check. + + @param thd Thread handler + @param tables Global table list + + @retval + FALSE OK + @retval + TRUE error +*/ + +bool delete_precheck(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("delete_precheck"); + if (tables->vers_conditions.delete_history) + { + if (check_one_table_access(thd, DELETE_HISTORY_ACL, tables)) + DBUG_RETURN(TRUE); + } + else + { + if (check_one_table_access(thd, DELETE_ACL, tables)) + DBUG_RETURN(TRUE); + /* Set privilege for the WHERE clause */ + tables->grant.want_privilege=(SELECT_ACL & ~tables->grant.privilege); + } + DBUG_RETURN(FALSE); +} + + +/** + simple INSERT query pre-check. + + @param thd Thread handler + @param tables Global table list + + @retval + FALSE OK + @retval + TRUE error +*/ + +bool insert_precheck(THD *thd, TABLE_LIST *tables) +{ + LEX *lex= thd->lex; + DBUG_ENTER("insert_precheck"); + + /* + Check that we have modify privileges for the first table and + select privileges for the rest + */ + privilege_t privilege= (INSERT_ACL | + (lex->duplicates == DUP_REPLACE ? DELETE_ACL : NO_ACL) | + (lex->value_list.elements ? UPDATE_ACL : NO_ACL)); + + if (check_one_table_access(thd, privilege, tables)) + DBUG_RETURN(TRUE); + + if (lex->update_list.elements != lex->value_list.elements) + { + my_message(ER_WRONG_VALUE_COUNT, ER_THD(thd, ER_WRONG_VALUE_COUNT), MYF(0)); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/** + Set proper open mode and table type for element representing target table + of CREATE TABLE statement, also adjust statement table list if necessary. +*/ + +void create_table_set_open_action_and_adjust_tables(LEX *lex) +{ + TABLE_LIST *create_table= lex->query_tables; + + if (lex->tmp_table()) + create_table->open_type= OT_TEMPORARY_ONLY; + else + create_table->open_type= OT_BASE_ONLY; + + if (!lex->first_select_lex()->item_list.elements) + { + /* + Avoid opening and locking target table for ordinary CREATE TABLE + or CREATE TABLE LIKE for write (unlike in CREATE ... SELECT we + won't do any insertions in it anyway). Not doing this causes + problems when running CREATE TABLE IF NOT EXISTS for already + existing log table. + */ + create_table->lock_type= TL_READ; + } +} + + +/** + CREATE TABLE query pre-check. + + @param thd Thread handler + @param tables Global table list + @param create_table Table which will be created + + @retval + FALSE OK + @retval + TRUE Error +*/ + +bool create_table_precheck(THD *thd, TABLE_LIST *tables, + TABLE_LIST *create_table) +{ + LEX *lex= thd->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + privilege_t want_priv{CREATE_ACL}; + bool error= TRUE; // Error message is given + DBUG_ENTER("create_table_precheck"); + + /* + Require CREATE [TEMPORARY] privilege on new table; for + CREATE TABLE ... SELECT, also require INSERT. + */ + + if (lex->tmp_table()) + want_priv= CREATE_TMP_ACL; + else if (select_lex->item_list.elements || select_lex->tvc) + want_priv|= INSERT_ACL; + + /* CREATE OR REPLACE on not temporary tables require DROP_ACL */ + if (lex->create_info.or_replace() && !lex->tmp_table()) + want_priv|= DROP_ACL; + + if (check_access(thd, want_priv, create_table->db.str, + &create_table->grant.privilege, + &create_table->grant.m_internal, + 0, 0)) + goto err; + + /* If it is a merge table, check privileges for merge children. */ + if (lex->create_info.merge_list) + { + /* + The user must have (SELECT_ACL | UPDATE_ACL | DELETE_ACL) on the + underlying base tables, even if there are temporary tables with the same + names. + + From user's point of view, it might look as if the user must have these + privileges on temporary tables to create a merge table over them. This is + one of two cases when a set of privileges is required for operations on + temporary tables (see also CREATE TABLE). + + The reason for this behavior stems from the following facts: + + - For merge tables, the underlying table privileges are checked only + at CREATE TABLE / ALTER TABLE time. + + In other words, once a merge table is created, the privileges of + the underlying tables can be revoked, but the user will still have + access to the merge table (provided that the user has privileges on + the merge table itself). + + - Temporary tables shadow base tables. + + I.e. there might be temporary and base tables with the same name, and + the temporary table takes the precedence in all operations. + + - For temporary MERGE tables we do not track if their child tables are + base or temporary. As result we can't guarantee that privilege check + which was done in presence of temporary child will stay relevant + later as this temporary table might be removed. + + If SELECT_ACL | UPDATE_ACL | DELETE_ACL privileges were not checked for + the underlying *base* tables, it would create a security breach as in + Bug#12771903. + */ + + if (check_table_access(thd, SELECT_ACL | UPDATE_ACL | DELETE_ACL, + lex->create_info.merge_list, FALSE, UINT_MAX, FALSE)) + goto err; + } + + if (want_priv != CREATE_TMP_ACL && + check_grant(thd, want_priv, create_table, FALSE, 1, FALSE)) + goto err; + + if (select_lex->item_list.elements) + { + /* Check permissions for used tables in CREATE TABLE ... SELECT */ + if (tables && check_table_access(thd, SELECT_ACL, tables, FALSE, + UINT_MAX, FALSE)) + goto err; + } + else if (lex->create_info.like()) + { + if (check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)) + goto err; + } + + if (check_fk_parent_table_access(thd, &lex->create_info, &lex->alter_info, + create_table->db.str)) + goto err; + + error= FALSE; + +err: + DBUG_RETURN(error); +} + + +/** + Check privileges for LOCK TABLES statement. + + @param thd Thread context. + @param tables List of tables to be locked. + + @retval FALSE - Success. + @retval TRUE - Failure. +*/ + +static bool lock_tables_precheck(THD *thd, TABLE_LIST *tables) +{ + TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table(); + + for (TABLE_LIST *table= tables; table != first_not_own_table && table; + table= table->next_global) + { + if (is_temporary_table(table)) + continue; + + if (check_table_access(thd, PRIV_LOCK_TABLES, table, + FALSE, 1, FALSE)) + return TRUE; + } + + return FALSE; +} + + +/** + negate given expression. + + @param thd thread handler + @param expr expression for negation + + @return + negated expression +*/ + +Item *negate_expression(THD *thd, Item *expr) +{ + Item *negated; + if (expr->type() == Item::FUNC_ITEM && + ((Item_func *) expr)->functype() == Item_func::NOT_FUNC) + { + /* it is NOT(NOT( ... )) */ + Item *arg= ((Item_func *) expr)->arguments()[0]; + const Type_handler *fh= arg->fixed_type_handler(); + enum_parsing_place place= thd->lex->current_select->parsing_place; + if ((fh && fh->is_bool_type()) || place == IN_WHERE || place == IN_HAVING) + return arg; + /* + if it is not boolean function then we have to emulate value of + not(not(a)), it will be a != 0 + */ + return new (thd->mem_root) Item_func_ne(thd, arg, new (thd->mem_root) Item_int(thd, (char*) "0", 0, 1)); + } + + if ((negated= expr->neg_transformer(thd)) != 0) + return negated; + return new (thd->mem_root) Item_func_not(thd, expr); +} + +/** + Set the specified definer to the default value, which is the + current user in the thread. + + @param[in] thd thread handler + @param[out] definer definer +*/ + +void get_default_definer(THD *thd, LEX_USER *definer, bool role) +{ + const Security_context *sctx= thd->security_ctx; + + if (role) + { + definer->user.str= const_cast(sctx->priv_role); + definer->host= empty_clex_str; + } + else + { + definer->user.str= const_cast(sctx->priv_user); + definer->host.str= const_cast(sctx->priv_host); + definer->host.length= strlen(definer->host.str); + } + definer->user.length= strlen(definer->user.str); + definer->auth= NULL; +} + + +/** + Create default definer for the specified THD. + + @param[in] thd thread handler + + @return + - On success, return a valid pointer to the created and initialized + LEX_USER, which contains definer information. + - On error, return 0. +*/ + +LEX_USER *create_default_definer(THD *thd, bool role) +{ + LEX_USER *definer; + + if (unlikely(! (definer= (LEX_USER*) thd->alloc(sizeof(LEX_USER))))) + return 0; + + thd->get_definer(definer, role); + + if (role && definer->user.length == 0) + { + my_error(ER_MALFORMED_DEFINER, MYF(0)); + return 0; + } + else + return definer; +} + + +/** + Create definer with the given user and host names. + + @param[in] thd thread handler + @param[in] user_name user name + @param[in] host_name host name + + @return + - On success, return a valid pointer to the created and initialized + LEX_USER, which contains definer information. + - On error, return 0. +*/ + +LEX_USER *create_definer(THD *thd, LEX_CSTRING *user_name, + LEX_CSTRING *host_name) +{ + LEX_USER *definer; + + /* Create and initialize. */ + + if (unlikely(!(definer= (LEX_USER*) thd->alloc(sizeof(LEX_USER))))) + return 0; + + definer->user= *user_name; + definer->host= *host_name; + definer->auth= NULL; + + return definer; +} + + +/** + Check that byte length of a string does not exceed some limit. + + @param str string to be checked + @param err_msg Number of error message to be displayed if the string + is too long. 0 if empty error message. + @param max_length max length + + @retval + FALSE the passed string is not longer than max_length + @retval + TRUE the passed string is longer than max_length + + NOTE + The function is not used in existing code but can be useful later? +*/ + +bool check_string_byte_length(const LEX_CSTRING *str, uint err_msg, + size_t max_byte_length) +{ + if (str->length <= max_byte_length) + return FALSE; + + my_error(ER_WRONG_STRING_LENGTH, MYF(0), str->str, + err_msg ? ER(err_msg) : "", max_byte_length); + + return TRUE; +} + + +/* + Check that char length of a string does not exceed some limit. + + SYNOPSIS + check_string_char_length() + str string to be checked + err_msg Number of error message to be displayed if the string + is too long. 0 if empty error message. + max_char_length max length in symbols + cs string charset + + RETURN + FALSE the passed string is not longer than max_char_length + TRUE the passed string is longer than max_char_length +*/ + + +bool check_string_char_length(const LEX_CSTRING *str, uint err_msg, + size_t max_char_length, CHARSET_INFO *cs, + bool no_error) +{ + Well_formed_prefix prefix(cs, str->str, str->length, max_char_length); + if (likely(!prefix.well_formed_error_pos() && + str->length == prefix.length())) + return FALSE; + + if (!no_error) + { + ErrConvString err(str->str, str->length, cs); + my_error(ER_WRONG_STRING_LENGTH, MYF(0), err.ptr(), + err_msg ? ER(err_msg) : "", + max_char_length); + } + return TRUE; +} + + +bool check_ident_length(const LEX_CSTRING *ident) +{ + if (check_string_char_length(ident, 0, NAME_CHAR_LEN, system_charset_info, 1)) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), ident->str); + return 1; + } + return 0; +} + + +/* + Check if path does not contain mysql data home directory + + SYNOPSIS + path_starts_from_data_home_dir() + dir directory, with all symlinks resolved + + RETURN VALUES + 0 ok + 1 error ; Given path contains data directory +*/ +extern "C" { + +int path_starts_from_data_home_dir(const char *path) +{ + size_t dir_len= strlen(path); + DBUG_ENTER("path_starts_from_data_home_dir"); + + if (mysql_unpacked_real_data_home_len<= dir_len) + { + if (dir_len > mysql_unpacked_real_data_home_len && + path[mysql_unpacked_real_data_home_len] != FN_LIBCHAR) + DBUG_RETURN(0); + + if (lower_case_file_system) + { + if (!default_charset_info->strnncoll(path, + mysql_unpacked_real_data_home_len, + mysql_unpacked_real_data_home, + mysql_unpacked_real_data_home_len)) + { + DBUG_PRINT("error", ("Path is part of mysql_real_data_home")); + DBUG_RETURN(1); + } + } + else if (!memcmp(path, mysql_unpacked_real_data_home, + mysql_unpacked_real_data_home_len)) + { + DBUG_PRINT("error", ("Path is part of mysql_real_data_home")); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + +} + +/* + Check if path does not contain mysql data home directory + + SYNOPSIS + test_if_data_home_dir() + dir directory + + RETURN VALUES + 0 ok + 1 error ; Given path contains data directory +*/ + +int test_if_data_home_dir(const char *dir) +{ + char path[FN_REFLEN]; + DBUG_ENTER("test_if_data_home_dir"); + + if (!dir) + DBUG_RETURN(0); + + (void) fn_format(path, dir, "", "", MY_RETURN_REAL_PATH); + DBUG_RETURN(path_starts_from_data_home_dir(path)); +} + + +int error_if_data_home_dir(const char *path, const char *what) +{ + size_t dirlen; + char dirpath[FN_REFLEN]; + if (path) + { + dirname_part(dirpath, path, &dirlen); + if (test_if_data_home_dir(dirpath)) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), what); + return 1; + } + } + return 0; +} + +/** + Check that host name string is valid. + + @param[in] str string to be checked + + @return Operation status + @retval FALSE host name is ok + @retval TRUE host name string is longer than max_length or + has invalid symbols +*/ + +bool check_host_name(LEX_CSTRING *str) +{ + const char *name= str->str; + const char *end= str->str + str->length; + if (check_string_byte_length(str, ER_HOSTNAME, HOSTNAME_LENGTH)) + return TRUE; + + while (name != end) + { + if (*name == '@') + { + my_printf_error(ER_UNKNOWN_ERROR, + "Malformed hostname (illegal symbol: '%c')", MYF(0), + *name); + return TRUE; + } + name++; + } + return FALSE; +} + + +extern int MYSQLparse(THD *thd); // from yy_mariadb.cc +extern int ORAparse(THD *thd); // from yy_oracle.cc + + +/** + This is a wrapper of MYSQLparse(). All the code should call parse_sql() + instead of MYSQLparse(). + + @param thd Thread context. + @param parser_state Parser state. + @param creation_ctx Object creation context. + + @return Error status. + @retval FALSE on success. + @retval TRUE on parsing error. +*/ + +bool parse_sql(THD *thd, Parser_state *parser_state, + Object_creation_ctx *creation_ctx, bool do_pfs_digest) +{ + bool ret_value; + DBUG_ENTER("parse_sql"); + DBUG_ASSERT(thd->m_parser_state == NULL); + DBUG_ASSERT(thd->lex->m_sql_cmd == NULL); + + MYSQL_QUERY_PARSE_START(thd->query()); + /* Backup creation context. */ + + Object_creation_ctx *backup_ctx= NULL; + + if (creation_ctx) + backup_ctx= creation_ctx->set_n_backup(thd); + + /* Set parser state. */ + + thd->m_parser_state= parser_state; + + parser_state->m_digest_psi= NULL; + parser_state->m_lip.m_digest= NULL; + + if (do_pfs_digest) + { + /* Start Digest */ + parser_state->m_digest_psi= MYSQL_DIGEST_START(thd->m_statement_psi); + + if (parser_state->m_digest_psi != NULL) + { + /* + If either: + - the caller wants to compute a digest + - the performance schema wants to compute a digest + set the digest listener in the lexer. + */ + parser_state->m_lip.m_digest= thd->m_digest; + parser_state->m_lip.m_digest->m_digest_storage.m_charset_number= thd->charset()->number; + } + } + + /* Parse the query. */ + + bool mysql_parse_status= thd->variables.sql_mode & MODE_ORACLE + ? ORAparse(thd) : MYSQLparse(thd); + + if (mysql_parse_status) + /* + Restore the original LEX if it was replaced when parsing + a stored procedure. We must ensure that a parsing error + does not leave any side effects in the THD. + */ + LEX::cleanup_lex_after_parse_error(thd); + + DBUG_ASSERT(opt_bootstrap || mysql_parse_status || + thd->lex->select_stack_top == 0); + thd->lex->current_select= thd->lex->first_select_lex(); + + /* + Check that if MYSQLparse() failed either thd->is_error() is set, or an + internal error handler is set. + + The assert will not catch a situation where parsing fails without an + error reported if an error handler exists. The problem is that the + error handler might have intercepted the error, so thd->is_error() is + not set. However, there is no way to be 100% sure here (the error + handler might be for other errors than parsing one). + */ + + DBUG_ASSERT(!mysql_parse_status || + thd->is_error() || + thd->get_internal_handler()); + + /* Reset parser state. */ + + thd->m_parser_state= NULL; + + /* Restore creation context. */ + + if (creation_ctx) + creation_ctx->restore_env(thd, backup_ctx); + + /* That's it. */ + + ret_value= mysql_parse_status || thd->is_fatal_error; + + if ((ret_value == 0) && (parser_state->m_digest_psi != NULL)) + { + /* + On parsing success, record the digest in the performance schema. + */ + DBUG_ASSERT(do_pfs_digest); + DBUG_ASSERT(thd->m_digest != NULL); + MYSQL_DIGEST_END(parser_state->m_digest_psi, + & thd->m_digest->m_digest_storage); + } + + MYSQL_QUERY_PARSE_DONE(ret_value); + DBUG_RETURN(ret_value); +} + +/** + @} (end of group Runtime_Environment) +*/ + + +void LEX::mark_first_table_as_inserting() +{ + TABLE_LIST *t= first_select_lex()->table_list.first; + DBUG_ENTER("Query_tables_list::mark_tables_with_important_flags"); + DBUG_ASSERT(sql_command_flags[sql_command] & CF_INSERTS_DATA); + t->for_insert_data= TRUE; + DBUG_PRINT("info", ("table_list: %p name: %s db: %s command: %u", + t, t->table_name.str,t->db.str, sql_command)); + DBUG_VOID_RETURN; +} diff --git a/sql/sql_parse.h b/sql/sql_parse.h new file mode 100644 index 00000000..eeb7f832 --- /dev/null +++ b/sql/sql_parse.h @@ -0,0 +1,191 @@ +/* Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_PARSE_INCLUDED +#define SQL_PARSE_INCLUDED + +#include "sql_acl.h" /* GLOBAL_ACLS */ + +class Comp_creator; +class Item; +class Object_creation_ctx; +class Parser_state; +struct TABLE_LIST; +class THD; +class Table_ident; +struct LEX; + +enum enum_mysql_completiontype { + ROLLBACK_RELEASE=-2, ROLLBACK=1, ROLLBACK_AND_CHAIN=7, + COMMIT_RELEASE=-1, COMMIT=0, COMMIT_AND_CHAIN=6 +}; + +extern "C" int path_starts_from_data_home_dir(const char *dir); +int test_if_data_home_dir(const char *dir); +int error_if_data_home_dir(const char *path, const char *what); +my_bool net_allocate_new_packet(NET *net, void *thd, uint my_flags); + +bool multi_update_precheck(THD *thd, TABLE_LIST *tables); +bool multi_delete_precheck(THD *thd, TABLE_LIST *tables); +int mysql_multi_update_prepare(THD *thd); +int mysql_multi_delete_prepare(THD *thd); +int mysql_insert_select_prepare(THD *thd,select_result *sel_res); +bool update_precheck(THD *thd, TABLE_LIST *tables); +bool delete_precheck(THD *thd, TABLE_LIST *tables); +bool insert_precheck(THD *thd, TABLE_LIST *tables); +bool create_table_precheck(THD *thd, TABLE_LIST *tables, + TABLE_LIST *create_table); +bool check_fk_parent_table_access(THD *thd, + HA_CREATE_INFO *create_info, + Alter_info *alter_info, + const char* create_db); + +bool parse_sql(THD *thd, Parser_state *parser_state, + Object_creation_ctx *creation_ctx, bool do_pfs_digest=false); + +void free_items(Item *item); +void cleanup_items(Item *item); + +Comp_creator *comp_eq_creator(bool invert); +Comp_creator *comp_ge_creator(bool invert); +Comp_creator *comp_gt_creator(bool invert); +Comp_creator *comp_le_creator(bool invert); +Comp_creator *comp_lt_creator(bool invert); +Comp_creator *comp_ne_creator(bool invert); + +int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident, + enum enum_schema_tables schema_table_idx); +void get_default_definer(THD *thd, LEX_USER *definer, bool role); +LEX_USER *create_default_definer(THD *thd, bool role); +LEX_USER *create_definer(THD *thd, LEX_CSTRING *user_name, LEX_CSTRING *host_name); +LEX_USER *get_current_user(THD *thd, LEX_USER *user, bool lock=true); +bool sp_process_definer(THD *thd); +bool check_string_byte_length(const LEX_CSTRING *str, uint err_msg, + size_t max_byte_length); +bool check_string_char_length(const LEX_CSTRING *str, uint err_msg, + size_t max_char_length, CHARSET_INFO *cs, + bool no_error); +bool check_ident_length(const LEX_CSTRING *ident); +bool check_host_name(LEX_CSTRING *str); +bool check_identifier_name(LEX_CSTRING *str, uint max_char_length, + uint err_code, const char *param_for_err_msg); +bool mysql_test_parse_for_slave(THD *thd,char *inBuf,uint length); +bool sqlcom_can_generate_row_events(const THD *thd); +bool stmt_causes_implicit_commit(THD *thd, uint mask); +bool is_update_query(enum enum_sql_command command); +bool is_log_table_write_query(enum enum_sql_command command); +bool alloc_query(THD *thd, const char *packet, size_t packet_length); +void mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state); +bool mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *sel); +void create_select_for_variable(THD *thd, LEX_CSTRING *var_name); +void create_table_set_open_action_and_adjust_tables(LEX *lex); +void mysql_init_multi_delete(LEX *lex); +bool multi_delete_set_locks_and_link_aux_tables(LEX *lex); +void create_table_set_open_action_and_adjust_tables(LEX *lex); +int bootstrap(MYSQL_FILE *file); +bool run_set_statement_if_requested(THD *thd, LEX *lex); +int mysql_execute_command(THD *thd, bool is_called_from_prepared_stmt=false); +enum dispatch_command_return +{ + DISPATCH_COMMAND_SUCCESS=0, + DISPATCH_COMMAND_CLOSE_CONNECTION= 1, + DISPATCH_COMMAND_WOULDBLOCK= 2 +}; + +dispatch_command_return do_command(THD *thd, bool blocking = true); +dispatch_command_return dispatch_command(enum enum_server_command command, THD *thd, + char* packet, uint packet_length, bool blocking = true); +void log_slow_statement(THD *thd); +bool append_file_to_dir(THD *thd, const char **filename_ptr, + const LEX_CSTRING *table_name); +void execute_init_command(THD *thd, LEX_STRING *init_command, + mysql_rwlock_t *var_lock); +bool add_to_list(THD *thd, SQL_I_List &list, Item *group, bool asc); +void add_join_on(THD *thd, TABLE_LIST *b, Item *expr); +void add_join_natural(TABLE_LIST *a,TABLE_LIST *b,List *using_fields, + SELECT_LEX *lex); +bool add_proc_to_list(THD *thd, Item *item); +bool push_new_name_resolution_context(THD *thd, + TABLE_LIST *left_op, + TABLE_LIST *right_op); +void init_update_queries(void); +Item *normalize_cond(THD *thd, Item *cond); +Item *negate_expression(THD *thd, Item *expr); +bool check_stack_overrun(THD *thd, long margin, uchar *dummy); + +/* Variables */ + +extern const LEX_CSTRING any_db; +extern uint sql_command_flags[]; +extern uint server_command_flags[]; +extern const LEX_CSTRING command_name[]; +extern uint server_command_flags[]; + +/* Inline functions */ +inline bool check_identifier_name(LEX_CSTRING *str, uint err_code) +{ + return check_identifier_name(str, NAME_CHAR_LEN, err_code, ""); +} + +inline bool check_identifier_name(LEX_CSTRING *str) +{ + return check_identifier_name(str, NAME_CHAR_LEN, 0, ""); +} + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +bool check_one_table_access(THD *thd, privilege_t privilege, TABLE_LIST *tables); +bool check_single_table_access(THD *thd, privilege_t privilege, + TABLE_LIST *tables, bool no_errors); +bool check_routine_access(THD *thd, privilege_t want_access, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + const Sp_handler *sph, bool no_errors); +bool check_some_access(THD *thd, privilege_t want_access, TABLE_LIST *table); +bool check_some_routine_access(THD *thd, const char *db, const char *name, + const Sp_handler *sph); +bool check_table_access(THD *thd, privilege_t requirements,TABLE_LIST *tables, + bool any_combination_of_privileges_will_do, + uint number, + bool no_errors); +#else +inline bool check_one_table_access(THD *thd, privilege_t privilege, TABLE_LIST *tables) +{ return false; } +inline bool check_single_table_access(THD *thd, privilege_t privilege, + TABLE_LIST *tables, bool no_errors) +{ return false; } +inline bool check_routine_access(THD *thd, privilege_t want_access, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + const Sp_handler *sph, bool no_errors) +{ return false; } +inline bool check_some_access(THD *thd, privilege_t want_access, TABLE_LIST *table) +{ + table->grant.privilege= want_access; + return false; +} +inline bool check_some_routine_access(THD *thd, const char *db, + const char *name, + const Sp_handler *sph) +{ return false; } +inline bool +check_table_access(THD *thd, privilege_t requirements,TABLE_LIST *tables, + bool any_combination_of_privileges_will_do, + uint number, + bool no_errors) +{ return false; } +#endif /*NO_EMBEDDED_ACCESS_CHECKS*/ + +#endif /* SQL_PARSE_INCLUDED */ diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc new file mode 100644 index 00000000..3c1a803c --- /dev/null +++ b/sql/sql_partition.cc @@ -0,0 +1,9195 @@ +/* Copyright (c) 2005, 2017, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This file is a container for general functionality related + to partitioning introduced in MySQL version 5.1. It contains functionality + used by all handlers that support partitioning, such as + the partitioning handler itself and the NDB handler. + (Much of the code in this file has been split into partition_info.cc and + the header files partition_info.h + partition_element.h + sql_partition.h) + + The first version was written by Mikael Ronstrom 2004-2006. + Various parts of the optimizer code was written by Sergey Petrunia. + Code have been maintained by Mattias Jonsson. + The second version was written by Mikael Ronstrom 2006-2007 with some + final fixes for partition pruning in 2008-2009 with assistance from Sergey + Petrunia and Mattias Jonsson. + + The first version supports RANGE partitioning, LIST partitioning, HASH + partitioning and composite partitioning (hereafter called subpartitioning) + where each RANGE/LIST partitioning is HASH partitioned. The hash function + can either be supplied by the user or by only a list of fields (also + called KEY partitioning), where the MySQL server will use an internal + hash function. + There are quite a few defaults that can be used as well. + + The second version introduces a new variant of RANGE and LIST partitioning + which is often referred to as column lists in the code variables. This + enables a user to specify a set of columns and their concatenated value + as the partition value. By comparing the concatenation of these values + the proper partition can be chosen. +*/ + +/* Some general useful functions */ + +#define MYSQL_LEX 1 +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_partition.h" +#include "key.h" // key_restore +#include "sql_parse.h" // parse_sql +#include "sql_cache.h" // query_cache_invalidate3 +#include "lock.h" // mysql_lock_remove +#include "sql_show.h" // append_identifier +#include +#include "transaction.h" +#include "debug_sync.h" + +#include "sql_base.h" // close_all_tables_for_name +#include "sql_table.h" // build_table_filename, + // build_table_shadow_filename, + // table_to_filename + // mysql_*_alter_copy_data +#include "opt_range.h" // store_key_image_to_rec +#include "sql_alter.h" // Alter_table_ctx +#include "sql_select.h" +#include "ddl_log.h" +#include "tztime.h" // my_tz_OFFSET0 +#include "create_options.h" // engine_option_value + +#include +using std::max; +using std::min; + +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" + +/* + Partition related functions declarations and some static constants; +*/ +static int get_partition_id_list_col(partition_info *, uint32 *, longlong *); +static int get_partition_id_list(partition_info *, uint32 *, longlong *); +static int get_partition_id_range_col(partition_info *, uint32 *, longlong *); +static int get_partition_id_range(partition_info *, uint32 *, longlong *); +static int vers_get_partition_id(partition_info *, uint32 *, longlong *); +static int get_part_id_charset_func_part(partition_info *, uint32 *, longlong *); +static int get_part_id_charset_func_subpart(partition_info *, uint32 *); +static int get_partition_id_hash_nosub(partition_info *, uint32 *, longlong *); +static int get_partition_id_key_nosub(partition_info *, uint32 *, longlong *); +static int get_partition_id_linear_hash_nosub(partition_info *, uint32 *, longlong *); +static int get_partition_id_linear_key_nosub(partition_info *, uint32 *, longlong *); +static int get_partition_id_with_sub(partition_info *, uint32 *, longlong *); +static int get_partition_id_hash_sub(partition_info *part_info, uint32 *part_id); +static int get_partition_id_key_sub(partition_info *part_info, uint32 *part_id); +static int get_partition_id_linear_hash_sub(partition_info *part_info, uint32 *part_id); +static int get_partition_id_linear_key_sub(partition_info *part_info, uint32 *part_id); +static uint32 get_next_partition_via_walking(PARTITION_ITERATOR*); +static void set_up_range_analysis_info(partition_info *part_info); +static uint32 get_next_subpartition_via_walking(PARTITION_ITERATOR*); +#endif + +uint32 get_next_partition_id_range(PARTITION_ITERATOR* part_iter); +uint32 get_next_partition_id_list(PARTITION_ITERATOR* part_iter); + +#ifdef WITH_PARTITION_STORAGE_ENGINE +static int get_part_iter_for_interval_via_mapping(partition_info *, bool, + uint32 *, uchar *, uchar *, uint, uint, uint, PARTITION_ITERATOR *); +static int get_part_iter_for_interval_cols_via_map(partition_info *, bool, + uint32 *, uchar *, uchar *, uint, uint, uint, PARTITION_ITERATOR *); +static int get_part_iter_for_interval_via_walking(partition_info *, bool, + uint32 *, uchar *, uchar *, uint, uint, uint, PARTITION_ITERATOR *); +static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec); +static int cmp_rec_and_tuple_prune(part_column_list_val *val, + uint32 n_vals_in_rec, + bool is_left_endpoint, + bool include_endpoint); + +/* + Convert constants in VALUES definition to the character set the + corresponding field uses. + + SYNOPSIS + convert_charset_partition_constant() + item Item to convert + cs Character set to convert to + + RETURN VALUE + NULL Error + item New converted item +*/ + +Item* convert_charset_partition_constant(Item *item, CHARSET_INFO *cs) +{ + THD *thd= current_thd; + Name_resolution_context *context= &thd->lex->current_select->context; + TABLE_LIST *save_list= context->table_list; + const char *save_where= thd->where; + + item= item->safe_charset_converter(thd, cs); + context->table_list= NULL; + thd->where= "convert character set partition constant"; + if (item && item->fix_fields_if_needed(thd, (Item**)NULL)) + item= NULL; + thd->where= save_where; + context->table_list= save_list; + return item; +} + + +/** + A support function to check if a name is in a list of strings. + + @param name String searched for + @param list_names A list of names searched in + + @return True if if the name is in the list. + @retval true String found + @retval false String not found +*/ + +static bool is_name_in_list(const char *name, List list_names) +{ + List_iterator names_it(list_names); + uint num_names= list_names.elements; + uint i= 0; + + do + { + const char *list_name= names_it++; + if (!(my_strcasecmp(system_charset_info, name, list_name))) + return TRUE; + } while (++i < num_names); + return FALSE; +} + + + +/* + Set-up defaults for partitions. + + SYNOPSIS + partition_default_handling() + table Table object + part_info Partition info to set up + is_create_table_ind Is this part of a table creation + normalized_path Normalized path name of table and database + + RETURN VALUES + TRUE Error + FALSE Success +*/ + +bool partition_default_handling(THD *thd, TABLE *table, partition_info *part_info, + bool is_create_table_ind, + const char *normalized_path) +{ + DBUG_ENTER("partition_default_handling"); + + if (!is_create_table_ind) + { + if (part_info->use_default_num_partitions) + { + if (table->file->get_no_parts(normalized_path, &part_info->num_parts)) + { + DBUG_RETURN(TRUE); + } + } + else if (part_info->is_sub_partitioned() && + part_info->use_default_num_subpartitions) + { + uint num_parts; + if (table->file->get_no_parts(normalized_path, &num_parts)) + { + DBUG_RETURN(TRUE); + } + DBUG_ASSERT(part_info->num_parts > 0); + DBUG_ASSERT((num_parts % part_info->num_parts) == 0); + part_info->num_subparts= num_parts / part_info->num_parts; + } + } + part_info->set_up_defaults_for_partitioning(thd, table->file, + NULL, 0U); + DBUG_RETURN(FALSE); +} + + +/* + A useful routine used by update/delete_row for partition handlers to + calculate the partition id. + + SYNOPSIS + get_part_for_buf() + buf Buffer of old record + rec0 Reference to table->record[0] + part_info Reference to partition information + out:part_id The returned partition id to delete from + + RETURN VALUE + 0 Success + > 0 Error code + + DESCRIPTION + Dependent on whether buf is not record[0] we need to prepare the + fields. Then we call the function pointer get_partition_id to + calculate the partition id. +*/ + +int get_part_for_buf(const uchar *buf, const uchar *rec0, + partition_info *part_info, uint32 *part_id) +{ + int error; + longlong func_value; + DBUG_ENTER("get_part_for_buf"); + + if (buf == rec0) + { + error= part_info->get_partition_id(part_info, part_id, &func_value); + if (unlikely((error))) + goto err; + DBUG_PRINT("info", ("Partition %d", *part_id)); + } + else + { + Field **part_field_array= part_info->full_part_field_array; + part_info->table->move_fields(part_field_array, buf, rec0); + error= part_info->get_partition_id(part_info, part_id, &func_value); + part_info->table->move_fields(part_field_array, rec0, buf); + if (unlikely(error)) + goto err; + DBUG_PRINT("info", ("Partition %d (path2)", *part_id)); + } + DBUG_RETURN(0); +err: + part_info->err_value= func_value; + DBUG_RETURN(error); +} + + +/* + This method is used to set-up both partition and subpartitioning + field array and used for all types of partitioning. + It is part of the logic around fix_partition_func. + + SYNOPSIS + set_up_field_array() + table TABLE object for which partition fields are set-up + sub_part Is the table subpartitioned as well + + RETURN VALUE + TRUE Error, some field didn't meet requirements + FALSE Ok, partition field array set-up + + DESCRIPTION + + A great number of functions below here is part of the fix_partition_func + method. It is used to set up the partition structures for execution from + openfrm. It is called at the end of the openfrm when the table struct has + been set-up apart from the partition information. + It involves: + 1) Setting arrays of fields for the partition functions. + 2) Setting up binary search array for LIST partitioning + 3) Setting up array for binary search for RANGE partitioning + 4) Setting up key_map's to assist in quick evaluation whether one + can deduce anything from a given index of what partition to use + 5) Checking whether a set of partitions can be derived from a range on + a field in the partition function. + As part of doing this there is also a great number of error controls. + This is actually the place where most of the things are checked for + partition information when creating a table. + Things that are checked includes + 1) All fields of partition function in Primary keys and unique indexes + (if not supported) + + + Create an array of partition fields (NULL terminated). Before this method + is called fix_fields or find_table_in_sef has been called to set + GET_FIXED_FIELDS_FLAG on all fields that are part of the partition + function. +*/ + +static bool set_up_field_array(THD *thd, TABLE *table, + bool is_sub_part) +{ + Field **ptr, *field, **field_array; + uint num_fields= 0; + uint size_field_array; + uint i= 0; + uint inx; + partition_info *part_info= table->part_info; + int result= FALSE; + DBUG_ENTER("set_up_field_array"); + + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + { + if (table->versioned(VERS_TRX_ID) + && unlikely(field->flags & VERS_SYSTEM_FIELD)) + { + my_error(ER_VERS_TRX_PART_HISTORIC_ROW_NOT_SUPPORTED, MYF(0)); + DBUG_RETURN(TRUE); + } + num_fields++; + } + } + if (unlikely(num_fields > MAX_REF_PARTS)) + { + char *err_str; + if (is_sub_part) + err_str= (char*)"subpartition function"; + else + err_str= (char*)"partition function"; + my_error(ER_TOO_MANY_PARTITION_FUNC_FIELDS_ERROR, MYF(0), err_str); + DBUG_RETURN(TRUE); + } + if (num_fields == 0) + { + /* + We are using hidden key as partitioning field + */ + DBUG_ASSERT(!is_sub_part); + DBUG_RETURN(FALSE); + } + size_field_array= (num_fields+1)*sizeof(Field*); + field_array= (Field**) thd->calloc(size_field_array); + if (unlikely(!field_array)) + DBUG_RETURN(TRUE); + + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + { + field->flags&= ~GET_FIXED_FIELDS_FLAG; + field->flags|= FIELD_IN_PART_FUNC_FLAG; + if (likely(!result)) + { + if (!is_sub_part && part_info->column_list) + { + List_iterator it(part_info->part_field_list); + const char *field_name; + + DBUG_ASSERT(num_fields == part_info->part_field_list.elements); + inx= 0; + do + { + field_name= it++; + if (!my_strcasecmp(system_charset_info, + field_name, + field->field_name.str)) + break; + } while (++inx < num_fields); + if (inx == num_fields) + { + /* + Should not occur since it should already been checked in either + add_column_list_values, handle_list_of_fields, + check_partition_info etc. + */ + DBUG_ASSERT(0); + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + result= TRUE; + continue; + } + } + else + inx= i; + field_array[inx]= field; + i++; + + /* + We check that the fields are proper. It is required for each + field in a partition function to: + 1) Not be a BLOB of any type + A BLOB takes too long time to evaluate so we don't want it for + performance reasons. + */ + + if (unlikely(field->flags & BLOB_FLAG)) + { + my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0)); + result= TRUE; + } + } + } + } + field_array[num_fields]= 0; + if (!is_sub_part) + { + part_info->part_field_array= field_array; + part_info->num_part_fields= num_fields; + } + else + { + part_info->subpart_field_array= field_array; + part_info->num_subpart_fields= num_fields; + } + DBUG_RETURN(result); +} + + + +/* + Create a field array including all fields of both the partitioning and the + subpartitioning functions. + + SYNOPSIS + create_full_part_field_array() + thd Thread handle + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + + RETURN VALUE + TRUE Memory allocation of field array failed + FALSE Ok + + DESCRIPTION + If there is no subpartitioning then the same array is used as for the + partitioning. Otherwise a new array is built up using the flag + FIELD_IN_PART_FUNC in the field object. + This function is called from fix_partition_func +*/ + +static bool create_full_part_field_array(THD *thd, TABLE *table, + partition_info *part_info) +{ + bool result= FALSE; + Field **ptr; + my_bitmap_map *bitmap_buf; + DBUG_ENTER("create_full_part_field_array"); + + if (!part_info->is_sub_partitioned()) + { + part_info->full_part_field_array= part_info->part_field_array; + part_info->num_full_part_fields= part_info->num_part_fields; + } + else + { + Field *field, **field_array; + uint num_part_fields=0, size_field_array; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + num_part_fields++; + } + size_field_array= (num_part_fields+1)*sizeof(Field*); + field_array= (Field**) thd->calloc(size_field_array); + if (unlikely(!field_array)) + { + result= TRUE; + goto end; + } + num_part_fields= 0; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + field_array[num_part_fields++]= field; + } + field_array[num_part_fields]=0; + part_info->full_part_field_array= field_array; + part_info->num_full_part_fields= num_part_fields; + } + + /* + Initialize the set of all fields used in partition and subpartition + expression. Required for testing of partition fields in write_set + when updating. We need to set all bits in read_set because the row + may need to be inserted in a different [sub]partition. + */ + if (!(bitmap_buf= (my_bitmap_map*) + thd->alloc(bitmap_buffer_size(table->s->fields)))) + { + result= TRUE; + goto end; + } + if (unlikely(my_bitmap_init(&part_info->full_part_field_set, bitmap_buf, + table->s->fields))) + { + result= TRUE; + goto end; + } + /* + full_part_field_array may be NULL if storage engine supports native + partitioning. + */ + table->read_set= &part_info->full_part_field_set; + if ((ptr= part_info->full_part_field_array)) + for (; *ptr; ptr++) + table->mark_column_with_deps(*ptr); + table->default_column_bitmaps(); + +end: + DBUG_RETURN(result); +} + + +/* + + Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by + set_indicator_in_key_fields (always used in pairs). + + SYNOPSIS + clear_indicator_in_key_fields() + key_info Reference to find the key fields + + RETURN VALUE + NONE + + DESCRIPTION + These support routines is used to set/reset an indicator of all fields + in a certain key. It is used in conjunction with another support routine + that traverse all fields in the PF to find if all or some fields in the + PF is part of the key. This is used to check primary keys and unique + keys involve all fields in PF (unless supported) and to derive the + key_map's used to quickly decide whether the index can be used to + derive which partitions are needed to scan. +*/ + +static void clear_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->user_defined_key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG); +} + + +/* + Set flag GET_FIXED_FIELDS_FLAG in all fields of a key. + + SYNOPSIS + set_indicator_in_key_fields + key_info Reference to find the key fields + + RETURN VALUE + NONE +*/ + +static void set_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->user_defined_key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; +} + + +/* + Check if all or some fields in partition field array is part of a key + previously used to tag key fields. + + SYNOPSIS + check_fields_in_PF() + ptr Partition field array + out:all_fields Is all fields of partition field array used in key + out:some_fields Is some fields of partition field array used in key + + RETURN VALUE + all_fields, some_fields +*/ + +static void check_fields_in_PF(Field **ptr, bool *all_fields, + bool *some_fields) +{ + DBUG_ENTER("check_fields_in_PF"); + + *all_fields= TRUE; + *some_fields= FALSE; + if ((!ptr) || !(*ptr)) + { + *all_fields= FALSE; + DBUG_VOID_RETURN; + } + do + { + /* Check if the field of the PF is part of the current key investigated */ + if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG) + *some_fields= TRUE; + else + *all_fields= FALSE; + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table. + This routine is used for error handling purposes. + + SYNOPSIS + clear_field_flag() + table TABLE object for which partition fields are set-up + + RETURN VALUE + NONE +*/ + +static void clear_field_flag(TABLE *table) +{ + Field **ptr; + DBUG_ENTER("clear_field_flag"); + + for (ptr= table->field; *ptr; ptr++) + (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG); + DBUG_VOID_RETURN; +} + + +/* + find_field_in_table_sef finds the field given its name. All fields get + GET_FIXED_FIELDS_FLAG set. + + SYNOPSIS + handle_list_of_fields() + it A list of field names for the partition function + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + + RETURN VALUE + TRUE Fields in list of fields not part of table + FALSE All fields ok and array created + + DESCRIPTION + This routine sets-up the partition field array for KEY partitioning, it + also verifies that all fields in the list of fields is actually a part of + the table. + +*/ + + +static bool handle_list_of_fields(THD *thd, List_iterator it, + TABLE *table, + partition_info *part_info, + bool is_sub_part) +{ + Field *field; + bool result; + const char *field_name; + bool is_list_empty= TRUE; + DBUG_ENTER("handle_list_of_fields"); + + while ((field_name= it++)) + { + is_list_empty= FALSE; + field= find_field_in_table_sef(table, field_name); + if (likely(field != 0)) + field->flags|= GET_FIXED_FIELDS_FLAG; + else + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + clear_field_flag(table); + result= TRUE; + goto end; + } + } + if (is_list_empty && part_info->part_type == HASH_PARTITION) + { + uint primary_key= table->s->primary_key; + if (primary_key != MAX_KEY) + { + uint num_key_parts= table->key_info[primary_key].user_defined_key_parts, i; + /* + In the case of an empty list we use primary key as partition key. + */ + for (i= 0; i < num_key_parts; i++) + { + Field *field= table->key_info[primary_key].key_part[i].field; + field->flags|= GET_FIXED_FIELDS_FLAG; + } + } + else + { + if (table->s->db_type()->partition_flags && + (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) && + (table->s->db_type()->partition_flags() & HA_CAN_PARTITION)) + { + /* + This engine can handle automatic partitioning and there is no + primary key. In this case we rely on that the engine handles + partitioning based on a hidden key. Thus we allocate no + array for partitioning fields. + */ + DBUG_RETURN(FALSE); + } + else + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + } + } + result= set_up_field_array(thd, table, is_sub_part); +end: + DBUG_RETURN(result); +} + + +/* + Support function to check if all VALUES * (expression) is of the + right sign (no signed constants when unsigned partition function) + + SYNOPSIS + check_signed_flag() + part_info Partition info object + + RETURN VALUES + 0 No errors due to sign errors + >0 Sign error +*/ + +int check_signed_flag(partition_info *part_info) +{ + int error= 0; + uint i= 0; + if (part_info->part_type != HASH_PARTITION && + part_info->part_expr->unsigned_flag) + { + List_iterator part_it(part_info->partitions); + do + { + partition_element *part_elem= part_it++; + + if (part_elem->signed_flag) + { + my_error(ER_PARTITION_CONST_DOMAIN_ERROR, MYF(0)); + error= ER_PARTITION_CONST_DOMAIN_ERROR; + break; + } + } while (++i < part_info->num_parts); + } + return error; +} + +/* + init_lex_with_single_table and end_lex_with_single_table + are now in sql_lex.cc +*/ + +/* + The function uses a new feature in fix_fields where the flag + GET_FIXED_FIELDS_FLAG is set for all fields in the item tree. + This field must always be reset before returning from the function + since it is used for other purposes as well. + + SYNOPSIS + fix_fields_part_func() + thd The thread object + func_expr The item tree reference of the partition function + table The table object + part_info Reference to partitioning data structure + is_sub_part Is the table subpartitioned as well + is_create_table_ind Indicator of whether openfrm was called as part of + CREATE or ALTER TABLE + + RETURN VALUE + TRUE An error occurred, something was wrong with the + partition function. + FALSE Ok, a partition field array was created + + DESCRIPTION + This function is used to build an array of partition fields for the + partitioning function and subpartitioning function. The partitioning + function is an item tree that must reference at least one field in the + table. This is checked first in the parser that the function doesn't + contain non-cacheable parts (like a random function) and by checking + here that the function isn't a constant function. + + Calculate the number of fields in the partition function. + Use it allocate memory for array of Field pointers. + Initialise array of field pointers. Use information set when + calling fix_fields and reset it immediately after. + The get_fields_in_item_tree activates setting of bit in flags + on the field object. +*/ + +static bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table, + bool is_sub_part, bool is_create_table_ind) +{ + partition_info *part_info= table->part_info; + bool result= TRUE; + int error; + LEX *old_lex= thd->lex; + LEX lex; + DBUG_ENTER("fix_fields_part_func"); + + if (init_lex_with_single_table(thd, table, &lex)) + goto end; + table->get_fields_in_item_tree= true; + + func_expr->walk(&Item::change_context_processor, 0, + &lex.first_select_lex()->context); + thd->where= "partition function"; + /* + In execution we must avoid the use of thd->change_item_tree since + we might release memory before statement is completed. We do this + by temporarily setting the stmt_arena->mem_root to be the mem_root + of the table object, this also ensures that any memory allocated + during fix_fields will not be released at end of execution of this + statement. Thus the item tree will remain valid also in subsequent + executions of this table object. We do however not at the moment + support allocations during execution of val_int so any item class + that does this during val_int must be disallowed as partition + function. + SEE Bug #21658 + + This is a tricky call to prepare for since it can have a large number + of interesting side effects, both desirable and undesirable. + */ + { + const bool save_agg_field= thd->lex->current_select->non_agg_field_used(); + const bool save_agg_func= thd->lex->current_select->agg_func_used(); + const nesting_map saved_allow_sum_func= thd->lex->allow_sum_func; + thd->lex->allow_sum_func.clear_all(); + + if (likely(!(error= func_expr->fix_fields_if_needed(thd, (Item**)&func_expr)))) + func_expr->walk(&Item::post_fix_fields_part_expr_processor, 0, NULL); + + /* + Restore agg_field/agg_func and allow_sum_func, + fix_fields should not affect mysql_select later, see Bug#46923. + */ + thd->lex->current_select->set_non_agg_field_used(save_agg_field); + thd->lex->current_select->set_agg_func_used(save_agg_func); + thd->lex->allow_sum_func= saved_allow_sum_func; + } + if (unlikely(error)) + { + DBUG_PRINT("info", ("Field in partition function not part of table")); + clear_field_flag(table); + goto end; + } + if (unlikely(func_expr->const_item())) + { + my_error(ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0)); + clear_field_flag(table); + goto end; + } + + /* + We don't allow creating partitions with expressions with non matching + arguments as a (sub)partitioning function, + but we want to allow such expressions when opening existing tables for + easier maintenance. This exception should be deprecated at some point + in future so that we always throw an error. + */ + if (func_expr->walk(&Item::check_valid_arguments_processor, 0, NULL)) + { + if (is_create_table_ind) + { + my_error(ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0)); + goto end; + } + else + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR, + ER_THD(thd, ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR)); + } + + if (unlikely((!is_sub_part) && (error= check_signed_flag(part_info)))) + goto end; + result= set_up_field_array(thd, table, is_sub_part); +end: + end_lex_with_single_table(thd, table, old_lex); + func_expr->walk(&Item::change_context_processor, 0, 0); + DBUG_RETURN(result); +} + + +/* + Check that the primary key contains all partition fields if defined + + SYNOPSIS + check_primary_key() + table TABLE object for which partition fields are set-up + + RETURN VALUES + TRUE Not all fields in partitioning function was part + of primary key + FALSE Ok, all fields of partitioning function were part + of primary key + + DESCRIPTION + This function verifies that if there is a primary key that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. +*/ + +static bool check_primary_key(TABLE *table) +{ + uint primary_key= table->s->primary_key; + bool all_fields, some_fields; + bool result= FALSE; + DBUG_ENTER("check_primary_key"); + + if (primary_key < MAX_KEY) + { + set_indicator_in_key_fields(table->key_info+primary_key); + check_fields_in_PF(table->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+primary_key); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY"); + result= TRUE; + } + } + DBUG_RETURN(result); +} + + +/* + Check that unique keys contains all partition fields + + SYNOPSIS + check_unique_keys() + table TABLE object for which partition fields are set-up + + RETURN VALUES + TRUE Not all fields in partitioning function was part + of all unique keys + FALSE Ok, all fields of partitioning function were part + of unique keys + + DESCRIPTION + This function verifies that if there is a unique index that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. +*/ + +static bool check_unique_keys(TABLE *table) +{ + bool all_fields, some_fields; + bool result= FALSE; + uint keys= table->s->keys; + uint i; + DBUG_ENTER("check_unique_keys"); + + for (i= 0; i < keys; i++) + { + if (table->key_info[i].flags & HA_NOSAME) //Unique index + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(table->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+i); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX"); + result= TRUE; + break; + } + } + } + DBUG_RETURN(result); +} + + +/* + An important optimisation is whether a range on a field can select a subset + of the partitions. + A prerequisite for this to happen is that the PF is a growing function OR + a shrinking function. + This can never happen for a multi-dimensional PF. Thus this can only happen + with PF with at most one field involved in the PF. + The idea is that if the function is a growing function and you know that + the field of the PF is 4 <= A <= 6 then we can convert this to a range + in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the + case of RANGE PARTITIONING and LIST PARTITIONING this can be used to + calculate a set of partitions rather than scanning all of them. + Thus the following prerequisites are there to check if sets of partitions + can be found. + 1) Only possible for RANGE and LIST partitioning (not for subpartitioning) + 2) Only possible if PF only contains 1 field + 3) Possible if PF is a growing function of the field + 4) Possible if PF is a shrinking function of the field + OBSERVATION: + 1) IF f1(A) is a growing function AND f2(A) is a growing function THEN + f1(A) + f2(A) is a growing function + f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0 + 2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN + f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0 + 3) IF A is a growing function then a function f(A) that removes the + least significant portion of A is a growing function + E.g. DATE(datetime) is a growing function + MONTH(datetime) is not a growing/shrinking function + 4) IF f1(A) is a growing function and f2(A) is a growing function THEN + f1(f2(A)) and f2(f1(A)) are also growing functions + 5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN + f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function + 6) f1(A) = A is a growing function + 7) f1(A) = A*a + b (where a and b are constants) is a growing function + + By analysing the item tree of the PF we can use these deducements and + derive whether the PF is a growing function or a shrinking function or + neither of it. + + If the PF is range capable then a flag is set on the table object + indicating this to notify that we can use also ranges on the field + of the PF to deduce a set of partitions if the fields of the PF were + not all fully bound. + + SYNOPSIS + check_range_capable_PF() + table TABLE object for which partition fields are set-up + + DESCRIPTION + Support for this is not implemented yet. +*/ + +void check_range_capable_PF(TABLE *table) +{ + DBUG_ENTER("check_range_capable_PF"); + + DBUG_VOID_RETURN; +} + + +/** + Set up partition bitmaps + + @param thd Thread object + @param part_info Reference to partitioning data structure + + @return Operation status + @retval TRUE Memory allocation failure + @retval FALSE Success + + Allocate memory for bitmaps of the partitioned table + and initialise it. +*/ + +static bool set_up_partition_bitmaps(THD *thd, partition_info *part_info) +{ + uint32 *bitmap_buf; + uint bitmap_bits= part_info->num_subparts? + (part_info->num_subparts* part_info->num_parts): + part_info->num_parts; + uint bitmap_bytes= bitmap_buffer_size(bitmap_bits); + DBUG_ENTER("set_up_partition_bitmaps"); + + DBUG_ASSERT(!part_info->bitmaps_are_initialized); + + /* Allocate for both read and lock_partitions */ + if (unlikely(!(bitmap_buf= + (uint32*) alloc_root(&part_info->table->mem_root, + bitmap_bytes * 2)))) + DBUG_RETURN(TRUE); + + my_bitmap_init(&part_info->read_partitions, bitmap_buf, bitmap_bits); + /* Use the second half of the allocated buffer for lock_partitions */ + my_bitmap_init(&part_info->lock_partitions, bitmap_buf + (bitmap_bytes / 4), + bitmap_bits); + part_info->bitmaps_are_initialized= TRUE; + part_info->set_partition_bitmaps(NULL); + DBUG_RETURN(FALSE); +} + + +/* + Set up partition key maps + + SYNOPSIS + set_up_partition_key_maps() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + + RETURN VALUES + None + + DESCRIPTION + This function sets up a couple of key maps to be able to quickly check + if an index ever can be used to deduce the partition fields or even + a part of the fields of the partition function. + We set up the following key_map's. + PF = Partition Function + 1) All fields of the PF is set even by equal on the first fields in the + key + 2) All fields of the PF is set if all fields of the key is set + 3) At least one field in the PF is set if all fields is set + 4) At least one field in the PF is part of the key +*/ + +static void set_up_partition_key_maps(TABLE *table, + partition_info *part_info) +{ + uint keys= table->s->keys; + uint i; + bool all_fields, some_fields; + DBUG_ENTER("set_up_partition_key_maps"); + + part_info->all_fields_in_PF.clear_all(); + part_info->all_fields_in_PPF.clear_all(); + part_info->all_fields_in_SPF.clear_all(); + part_info->some_fields_in_PF.clear_all(); + for (i= 0; i < keys; i++) + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(part_info->full_part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PF.set_bit(i); + if (some_fields) + part_info->some_fields_in_PF.set_bit(i); + if (part_info->is_sub_partitioned()) + { + check_fields_in_PF(part_info->part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PPF.set_bit(i); + check_fields_in_PF(part_info->subpart_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_SPF.set_bit(i); + } + clear_indicator_in_key_fields(table->key_info+i); + } + DBUG_VOID_RETURN; +} + +static bool check_no_constants(THD *, partition_info*) +{ + return FALSE; +} + +/* + Support routines for check_list_constants used by qsort to sort the + constant list expressions. One routine for integers and one for + column lists. + + SYNOPSIS + list_part_cmp() + a First list constant to compare with + b Second list constant to compare with + + RETURN VALUE + +1 a > b + 0 a == b + -1 a < b +*/ + +extern "C" +int partition_info_list_part_cmp(const void* a, const void* b) +{ + longlong a1= ((LIST_PART_ENTRY*)a)->list_value; + longlong b1= ((LIST_PART_ENTRY*)b)->list_value; + if (a1 < b1) + return -1; + else if (a1 > b1) + return +1; + else + return 0; +} + + +/* + Compare two lists of column values in RANGE/LIST partitioning + SYNOPSIS + partition_info_compare_column_values() + first First column list argument + second Second column list argument + RETURN VALUES + 0 Equal + -1 First argument is smaller + +1 First argument is larger +*/ + +extern "C" +int partition_info_compare_column_values(const void *first_arg, + const void *second_arg) +{ + const part_column_list_val *first= (part_column_list_val*)first_arg; + const part_column_list_val *second= (part_column_list_val*)second_arg; + partition_info *part_info= first->part_info; + Field **field; + + for (field= part_info->part_field_array; *field; + field++, first++, second++) + { + if (first->max_value || second->max_value) + { + if (first->max_value && second->max_value) + return 0; + if (second->max_value) + return -1; + else + return +1; + } + if (first->null_value || second->null_value) + { + if (first->null_value && second->null_value) + continue; + if (second->null_value) + return +1; + else + return -1; + } + int res= (*field)->cmp((const uchar*)first->column_value, + (const uchar*)second->column_value); + if (res) + return res; + } + return 0; +} + + +/* + This routine allocates an array for all range constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that the range constants are defined in increasing order and + that the expressions are constant integer expressions. + + SYNOPSIS + check_range_constants() + thd Thread object + + RETURN VALUE + TRUE An error occurred during creation of range constants + FALSE Successful creation of range constant mapping + + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for RANGE PARTITIONed tables. +*/ + +static bool check_range_constants(THD *thd, partition_info *part_info) +{ + partition_element* part_def; + bool first= TRUE; + uint i; + List_iterator it(part_info->partitions); + bool result= TRUE; + DBUG_ENTER("check_range_constants"); + DBUG_PRINT("enter", ("RANGE with %d parts, column_list = %u", + part_info->num_parts, part_info->column_list)); + + if (part_info->column_list) + { + part_column_list_val *loc_range_col_array; + part_column_list_val *UNINIT_VAR(current_largest_col_val); + uint num_column_values= part_info->part_field_list.elements; + uint size_entries= sizeof(part_column_list_val) * num_column_values; + part_info->range_col_array= (part_column_list_val*) + thd->calloc(part_info->num_parts * size_entries); + if (unlikely(part_info->range_col_array == NULL)) + goto end; + + loc_range_col_array= part_info->range_col_array; + i= 0; + do + { + part_def= it++; + { + List_iterator list_val_it(part_def->list_val_list); + part_elem_value *range_val= list_val_it++; + part_column_list_val *col_val= range_val->col_val_array; + + if (part_info->fix_column_value_functions(thd, range_val, i)) + goto end; + memcpy(loc_range_col_array, (const void*)col_val, size_entries); + loc_range_col_array+= num_column_values; + if (!first) + { + if (partition_info_compare_column_values(current_largest_col_val, + col_val) >= 0) + goto range_not_increasing_error; + } + current_largest_col_val= col_val; + } + first= FALSE; + } while (++i < part_info->num_parts); + } + else + { + longlong UNINIT_VAR(current_largest); + longlong part_range_value; + bool signed_flag= !part_info->part_expr->unsigned_flag; + + part_info->range_int_array= (longlong*) + thd->alloc(part_info->num_parts * sizeof(longlong)); + if (unlikely(part_info->range_int_array == NULL)) + goto end; + + i= 0; + do + { + part_def= it++; + if ((i != part_info->num_parts - 1) || !part_info->defined_max_value) + { + part_range_value= part_def->range_value; + if (!signed_flag) + part_range_value-= 0x8000000000000000ULL; + } + else + part_range_value= LONGLONG_MAX; + + if (!first) + { + if (current_largest > part_range_value || + (current_largest == part_range_value && + (part_range_value < LONGLONG_MAX || + i != part_info->num_parts - 1 || + !part_info->defined_max_value))) + goto range_not_increasing_error; + } + part_info->range_int_array[i]= part_range_value; + current_largest= part_range_value; + first= FALSE; + } while (++i < part_info->num_parts); + } + result= FALSE; +end: + DBUG_RETURN(result); + +range_not_increasing_error: + my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0)); + goto end; +} + + +/* + This routine allocates an array for all list constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that there are no duplicates among the list constants and that + that the list expressions are constant integer expressions. + + SYNOPSIS + check_list_constants() + thd Thread object + + RETURN VALUE + TRUE An error occurred during creation of list constants + FALSE Successful creation of list constant mapping + + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for LIST PARTITIONed tables. +*/ + +static bool check_list_constants(THD *thd, partition_info *part_info) +{ + uint i, size_entries, num_column_values; + uint list_index= 0; + part_elem_value *list_value; + bool result= TRUE; + longlong type_add, calc_value; + void *curr_value; + void *UNINIT_VAR(prev_value); + partition_element* part_def; + bool found_null= FALSE; + qsort_cmp compare_func; + void *ptr; + List_iterator list_func_it(part_info->partitions); + DBUG_ENTER("check_list_constants"); + + DBUG_ASSERT(part_info->part_type == LIST_PARTITION); + + part_info->num_list_values= 0; + /* + We begin by calculating the number of list values that have been + defined in the first step. + + We use this number to allocate a properly sized array of structs + to keep the partition id and the value to use in that partition. + In the second traversal we assign them values in the struct array. + + Finally we sort the array of structs in order of values to enable + a quick binary search for the proper value to discover the + partition id. + After sorting the array we check that there are no duplicates in the + list. + */ + + i= 0; + do + { + part_def= list_func_it++; + if (part_def->has_null_value) + { + if (found_null) + { + my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0)); + goto end; + } + part_info->has_null_value= TRUE; + part_info->has_null_part_id= i; + found_null= TRUE; + } + part_info->num_list_values+= part_def->list_val_list.elements; + } while (++i < part_info->num_parts); + list_func_it.rewind(); + num_column_values= part_info->part_field_list.elements; + size_entries= part_info->column_list ? + (num_column_values * sizeof(part_column_list_val)) : + sizeof(LIST_PART_ENTRY); + if (!(ptr= thd->calloc((part_info->num_list_values+1) * size_entries))) + goto end; + if (part_info->column_list) + { + part_column_list_val *loc_list_col_array; + loc_list_col_array= (part_column_list_val*)ptr; + part_info->list_col_array= (part_column_list_val*)ptr; + compare_func= partition_info_compare_column_values; + i= 0; + do + { + part_def= list_func_it++; + if (part_def->max_value) + { + // DEFAULT is not a real value so let's exclude it from sorting. + DBUG_ASSERT(part_info->num_list_values); + part_info->num_list_values--; + continue; + } + List_iterator list_val_it2(part_def->list_val_list); + while ((list_value= list_val_it2++)) + { + part_column_list_val *col_val= list_value->col_val_array; + if (part_info->fix_column_value_functions(thd, list_value, i)) + DBUG_RETURN(result); + memcpy(loc_list_col_array, (const void*)col_val, size_entries); + loc_list_col_array+= num_column_values; + } + } while (++i < part_info->num_parts); + } + else + { + compare_func= partition_info_list_part_cmp; + part_info->list_array= (LIST_PART_ENTRY*)ptr; + i= 0; + /* + Fix to be able to reuse signed sort functions also for unsigned + partition functions. + */ + type_add= (longlong)(part_info->part_expr->unsigned_flag ? + 0x8000000000000000ULL : + 0ULL); + + do + { + part_def= list_func_it++; + if (part_def->max_value) + { + // DEFAULT is not a real value so let's exclude it from sorting. + DBUG_ASSERT(part_info->num_list_values); + part_info->num_list_values--; + continue; + } + List_iterator list_val_it2(part_def->list_val_list); + while ((list_value= list_val_it2++)) + { + calc_value= list_value->value ^ type_add; + part_info->list_array[list_index].list_value= calc_value; + part_info->list_array[list_index++].partition_id= i; + } + } while (++i < part_info->num_parts); + } + DBUG_ASSERT(part_info->fixed); + if (part_info->num_list_values) + { + bool first= TRUE; + /* + list_array and list_col_array are unions, so this works for both + variants of LIST partitioning. + */ + my_qsort(part_info->list_array, part_info->num_list_values, size_entries, + compare_func); + + i= 0; + do + { + DBUG_ASSERT(i < part_info->num_list_values); + curr_value= part_info->column_list + ? (void*)&part_info->list_col_array[num_column_values * i] + : (void*)&part_info->list_array[i]; + if (likely(first || compare_func(curr_value, prev_value))) + { + prev_value= curr_value; + first= FALSE; + } + else + { + my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0)); + goto end; + } + } while (++i < part_info->num_list_values); + } + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* Set partition boundaries when rotating by INTERVAL */ +static bool check_vers_constants(THD *thd, partition_info *part_info) +{ + uint hist_parts= part_info->num_parts - 1; + Vers_part_info *vers_info= part_info->vers_info; + vers_info->hist_part= part_info->partitions.head(); + vers_info->now_part= part_info->partitions.elem(hist_parts); + + if (!vers_info->interval.is_set()) + return 0; + + part_info->range_int_array= + (longlong*) thd->alloc(part_info->num_parts * sizeof(longlong)); + + MYSQL_TIME ltime; + List_iterator it(part_info->partitions); + partition_element *el; + my_tz_OFFSET0->gmt_sec_to_TIME(<ime, vers_info->interval.start); + while ((el= it++)->id < hist_parts) + { + if (date_add_interval(thd, <ime, vers_info->interval.type, + vers_info->interval.step)) + goto err; + uint error= 0; + part_info->range_int_array[el->id]= el->range_value= + my_tz_OFFSET0->TIME_to_gmt_sec(<ime, &error); + if (error) + goto err; + if (vers_info->hist_part->range_value <= thd->query_start()) + vers_info->hist_part= el; + } + DBUG_ASSERT(el == vers_info->now_part); + el->max_value= true; + part_info->range_int_array[el->id]= el->range_value= LONGLONG_MAX; + return 0; +err: + my_error(ER_DATA_OUT_OF_RANGE, MYF(0), "TIMESTAMP", "INTERVAL"); + return 1; +} + + +/* + Set up function pointers for partition function + + SYNOPSIS + set_up_partition_func_pointers() + part_info Reference to partitioning data structure + + RETURN VALUE + NONE + + DESCRIPTION + Set-up all function pointers for calculation of partition id, + subpartition id and the upper part in subpartitioning. This is to speed up + execution of get_partition_id which is executed once every record to be + written and deleted and twice for updates. +*/ + +static void set_up_partition_func_pointers(partition_info *part_info) +{ + DBUG_ENTER("set_up_partition_func_pointers"); + + if (part_info->is_sub_partitioned()) + { + part_info->get_partition_id= get_partition_id_with_sub; + if (part_info->part_type == RANGE_PARTITION) + { + if (part_info->column_list) + part_info->get_part_partition_id= get_partition_id_range_col; + else + part_info->get_part_partition_id= get_partition_id_range; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + else + part_info->get_subpartition_id= get_partition_id_key_sub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + else + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + else if (part_info->part_type == VERSIONING_PARTITION) + { + part_info->get_part_partition_id= vers_get_partition_id; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + else + part_info->get_subpartition_id= get_partition_id_key_sub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + else + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + else /* LIST Partitioning */ + { + if (part_info->column_list) + part_info->get_part_partition_id= get_partition_id_list_col; + else + part_info->get_part_partition_id= get_partition_id_list; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + else + part_info->get_subpartition_id= get_partition_id_key_sub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + else + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + else /* No subpartitioning */ + { + part_info->get_part_partition_id= NULL; + part_info->get_subpartition_id= NULL; + if (part_info->part_type == RANGE_PARTITION) + { + if (part_info->column_list) + part_info->get_partition_id= get_partition_id_range_col; + else + part_info->get_partition_id= get_partition_id_range; + } + else if (part_info->part_type == LIST_PARTITION) + { + if (part_info->column_list) + part_info->get_partition_id= get_partition_id_list_col; + else + part_info->get_partition_id= get_partition_id_list; + } + else if (part_info->part_type == VERSIONING_PARTITION) + { + part_info->get_partition_id= vers_get_partition_id; + } + else /* HASH partitioning */ + { + if (part_info->list_of_part_fields) + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_key_nosub; + else + part_info->get_partition_id= get_partition_id_key_nosub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_hash_nosub; + else + part_info->get_partition_id= get_partition_id_hash_nosub; + } + } + } + /* + We need special functions to handle character sets since they require copy + of field pointers and restore afterwards. For subpartitioned tables we do + the copy and restore individually on the part and subpart parts. For non- + subpartitioned tables we use the same functions as used for the parts part + of subpartioning. + Thus for subpartitioned tables the get_partition_id is always + get_partition_id_with_sub, even when character sets exists. + */ + if (part_info->part_charset_field_array) + { + if (part_info->is_sub_partitioned()) + { + DBUG_ASSERT(part_info->get_part_partition_id); + if (!part_info->column_list) + { + part_info->get_part_partition_id_charset= + part_info->get_part_partition_id; + part_info->get_part_partition_id= get_part_id_charset_func_part; + } + } + else + { + DBUG_ASSERT(part_info->get_partition_id); + if (!part_info->column_list) + { + part_info->get_part_partition_id_charset= part_info->get_partition_id; + part_info->get_part_partition_id= get_part_id_charset_func_part; + } + } + } + if (part_info->subpart_charset_field_array) + { + DBUG_ASSERT(part_info->get_subpartition_id); + part_info->get_subpartition_id_charset= + part_info->get_subpartition_id; + part_info->get_subpartition_id= get_part_id_charset_func_subpart; + } + if (part_info->part_type == RANGE_PARTITION) + part_info->check_constants= check_range_constants; + else if (part_info->part_type == LIST_PARTITION) + part_info->check_constants= check_list_constants; + else if (part_info->part_type == VERSIONING_PARTITION) + part_info->check_constants= check_vers_constants; + else + part_info->check_constants= check_no_constants; + DBUG_VOID_RETURN; +} + + +/* + For linear hashing we need a mask which is on the form 2**n - 1 where + 2**n >= num_parts. Thus if num_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7. + + SYNOPSIS + set_linear_hash_mask() + part_info Reference to partitioning data structure + num_parts Number of parts in linear hash partitioning + + RETURN VALUE + NONE +*/ + +void set_linear_hash_mask(partition_info *part_info, uint num_parts) +{ + uint mask; + + for (mask= 1; mask < num_parts; mask<<=1) + ; + part_info->linear_hash_mask= mask - 1; +} + + +/* + This function calculates the partition id provided the result of the hash + function using linear hashing parameters, mask and number of partitions. + + SYNOPSIS + get_part_id_from_linear_hash() + hash_value Hash value calculated by HASH function or KEY function + mask Mask calculated previously by set_linear_hash_mask + num_parts Number of partitions in HASH partitioned part + + RETURN VALUE + part_id The calculated partition identity (starting at 0) + + DESCRIPTION + The partition is calculated according to the theory of linear hashing. + See e.g. Linear hashing: a new tool for file and table addressing, + Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker + (ed.), Morgan Kaufmann 1994. +*/ + +static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask, + uint num_parts) +{ + uint32 part_id= (uint32)(hash_value & mask); + + if (part_id >= num_parts) + { + uint new_mask= ((mask + 1) >> 1) - 1; + part_id= (uint32)(hash_value & new_mask); + } + return part_id; +} + + +/* + Check if a particular field is in need of character set + handling for partition functions. + + SYNOPSIS + field_is_partition_charset() + field The field to check + + RETURN VALUES + FALSE Not in need of character set handling + TRUE In need of character set handling +*/ + +bool field_is_partition_charset(Field *field) +{ + if (!(field->type() == MYSQL_TYPE_STRING) && + !(field->type() == MYSQL_TYPE_VARCHAR)) + return FALSE; + { + CHARSET_INFO *cs= field->charset(); + if (!(field->type() == MYSQL_TYPE_STRING) || + !(cs->state & MY_CS_BINSORT)) + return TRUE; + return FALSE; + } +} + + +/* + Check that partition function doesn't contain any forbidden + character sets and collations. + + SYNOPSIS + check_part_func_fields() + ptr Array of Field pointers + ok_with_charsets Will we report allowed charset + fields as ok + RETURN VALUES + FALSE Success + TRUE Error + + DESCRIPTION + We will check in this routine that the fields of the partition functions + do not contain unallowed parts. It can also be used to check if there + are fields that require special care by calling strnxfrm before + calling the functions to calculate partition id. +*/ + +bool check_part_func_fields(Field **ptr, bool ok_with_charsets) +{ + Field *field; + DBUG_ENTER("check_part_func_fields"); + + while ((field= *(ptr++))) + { + /* + For CHAR/VARCHAR fields we need to take special precautions. + Binary collation with CHAR is automatically supported. Other + types need some kind of standardisation function handling + */ + if (field_is_partition_charset(field)) + { + CHARSET_INFO *cs= field->charset(); + if (!ok_with_charsets || + cs->mbmaxlen > 1 || + cs->strxfrm_multiply > 1) + { + DBUG_RETURN(TRUE); + } + } + } + DBUG_RETURN(FALSE); +} + + +/* + fix partition functions + + SYNOPSIS + fix_partition_func() + thd The thread object + table TABLE object for which partition fields are set-up + is_create_table_ind Indicator of whether openfrm was called as part of + CREATE or ALTER TABLE + + RETURN VALUE + TRUE Error + FALSE Success + + DESCRIPTION + The name parameter contains the full table name and is used to get the + database name of the table which is used to set-up a correct + TABLE_LIST object for use in fix_fields. + +NOTES + This function is called as part of opening the table by opening the .frm + file. It is a part of CREATE TABLE to do this so it is quite permissible + that errors due to erroneus syntax isn't found until we come here. + If the user has used a non-existing field in the table is one such example + of an error that is not discovered until here. +*/ + +bool fix_partition_func(THD *thd, TABLE *table, bool is_create_table_ind) +{ + bool result= TRUE; + partition_info *part_info= table->part_info; + enum_column_usage saved_column_usage= thd->column_usage; + DBUG_ENTER("fix_partition_func"); + + if (part_info->fixed) + { + DBUG_RETURN(FALSE); + } + thd->column_usage= COLUMNS_WRITE; + DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage)); + + if (!is_create_table_ind || + thd->lex->sql_command != SQLCOM_CREATE_TABLE) + { + if (partition_default_handling(thd, table, part_info, + is_create_table_ind, + table->s->normalized_path.str)) + { + DBUG_RETURN(TRUE); + } + } + if (part_info->is_sub_partitioned()) + { + DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION); + /* + Subpartition is defined. We need to verify that subpartitioning + function is correct. + */ + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->num_subparts); + if (part_info->list_of_subpart_fields) + { + List_iterator it(part_info->subpart_field_list); + if (unlikely(handle_list_of_fields(thd, it, table, part_info, TRUE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, part_info->subpart_expr, + table, TRUE, is_create_table_ind))) + goto end; + if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT)) + { + part_info->report_part_expr_error(TRUE); + goto end; + } + } + } + DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION); + /* + Partition is defined. We need to verify that partitioning + function is correct. + */ + set_up_partition_func_pointers(part_info); + if (part_info->part_type == HASH_PARTITION) + { + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->num_parts); + if (part_info->list_of_part_fields) + { + List_iterator it(part_info->part_field_list); + if (unlikely(handle_list_of_fields(thd, it, table, part_info, FALSE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, part_info->part_expr, + table, FALSE, is_create_table_ind))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + part_info->report_part_expr_error(FALSE); + goto end; + } + } + part_info->fixed= TRUE; + } + else + { + if (part_info->column_list) + { + List_iterator it(part_info->part_field_list); + if (unlikely(handle_list_of_fields(thd, it, table, part_info, FALSE))) + goto end; + } + else + { + if (part_info->part_type == VERSIONING_PARTITION && + part_info->vers_fix_field_list(thd)) + goto end; + if (unlikely(fix_fields_part_func(thd, part_info->part_expr, + table, FALSE, is_create_table_ind))) + goto end; + } + part_info->fixed= TRUE; + if (part_info->check_constants(thd, part_info)) + goto end; + if (unlikely(part_info->num_parts < 1)) + { + const char *error_str= part_info->part_type == LIST_PARTITION + ? "LIST" : "RANGE"; + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str); + goto end; + } + if (unlikely(!part_info->column_list && + part_info->part_expr->result_type() != INT_RESULT && + part_info->part_expr->result_type() != DECIMAL_RESULT)) + { + part_info->report_part_expr_error(FALSE); + goto end; + } + } + if (((part_info->part_type != HASH_PARTITION || + part_info->list_of_part_fields == FALSE) && + !part_info->column_list && + check_part_func_fields(part_info->part_field_array, TRUE)) || + (part_info->list_of_subpart_fields == FALSE && + part_info->is_sub_partitioned() && + check_part_func_fields(part_info->subpart_field_array, TRUE))) + { + /* + Range/List/HASH (but not KEY) and not COLUMNS or HASH subpartitioning + with columns in the partitioning expression using unallowed charset. + */ + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + goto end; + } + if (unlikely(create_full_part_field_array(thd, table, part_info))) + goto end; + if (unlikely(check_primary_key(table))) + goto end; + if (unlikely((!(table->s->db_type()->partition_flags && + (table->s->db_type()->partition_flags() & HA_CAN_PARTITION_UNIQUE))) && + check_unique_keys(table))) + goto end; + if (unlikely(set_up_partition_bitmaps(thd, part_info))) + goto end; + if (unlikely(part_info->set_up_charset_field_preps(thd))) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + goto end; + } + if (unlikely(part_info->check_partition_field_length())) + { + my_error(ER_PARTITION_FIELDS_TOO_LONG, MYF(0)); + goto end; + } + check_range_capable_PF(table); + set_up_partition_key_maps(table, part_info); + set_up_range_analysis_info(part_info); + table->file->set_part_info(part_info); + result= FALSE; +end: + thd->column_usage= saved_column_usage; + DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage)); + DBUG_RETURN(result); +} + + +/* + The code below is support routines for the reverse parsing of the + partitioning syntax. This feature is very useful to generate syntax for + all default values to avoid all default checking when opening the frm + file. It is also used when altering the partitioning by use of various + ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES. +*/ + +static int add_part_field_list(THD *thd, String *str, List field_list) +{ + int err= 0; + const char *field_name; + List_iterator part_it(field_list); + + err+= str->append('('); + while ((field_name= part_it++)) + { + err+= append_identifier(thd, str, field_name, strlen(field_name)); + err+= str->append(','); + } + if (field_list.elements) + str->length(str->length()-1); + err+= str->append(')'); + return err; +} + +/* + Must escape strings in partitioned tables frm-files, + parsing it later with mysql_unpack_partition will fail otherwise. +*/ + +static int add_keyword_string(String *str, const char *keyword, + bool quoted, const char *keystr) +{ + int err= str->append(' '); + err+= str->append(keyword, strlen(keyword)); + + str->append(STRING_WITH_LEN(" = ")); + if (quoted) + { + err+= str->append('\''); + err+= str->append_for_single_quote(keystr, strlen(keystr)); + err+= str->append('\''); + } + else + err+= str->append(keystr, strlen(keystr)); + return err; +} + + +/** + @brief Truncate the partition file name from a path it it exists. + + @note A partition file name will contian one or more '#' characters. +One of the occurances of '#' will be either "#P#" or "#p#" depending +on whether the storage engine has converted the filename to lower case. +*/ +void truncate_partition_filename(char *path) +{ + if (path) + { + char* last_slash= strrchr(path, FN_LIBCHAR); + + if (!last_slash) + last_slash= strrchr(path, FN_LIBCHAR2); + + if (last_slash) + { + /* Look for a partition-type filename */ + for (char* pound= strchr(last_slash, '#'); + pound; pound = strchr(pound + 1, '#')) + { + if ((pound[1] == 'P' || pound[1] == 'p') && pound[2] == '#') + { + last_slash[0] = '\0'; /* truncate the file name */ + break; + } + } + } + } +} + +/** + @brief Output a filepath. Similar to add_keyword_string except it +also converts \ to / on Windows and skips the partition file name at +the end if found. + + @note When Mysql sends a DATA DIRECTORY from SQL for partitions it does +not use a file name, but it does for DATA DIRECTORY on a non-partitioned +table. So when the storage engine is asked for the DATA DIRECTORY string +after a restart through Handler::update_create_options(), the storage +engine may include the filename. +*/ +static int add_keyword_path(String *str, const char *keyword, + const char *path) +{ + char temp_path[FN_REFLEN]; + strcpy(temp_path, path); +#ifdef _WIN32 + /* Convert \ to / to be able to create table on unix */ + char *pos, *end; + size_t length= strlen(temp_path); + for (pos= temp_path, end= pos+length ; pos < end ; pos++) + { + if (*pos == '\\') + *pos = '/'; + } +#endif + + /* + If the partition file name with its "#P#" identifier + is found after the last slash, truncate that filename. + */ + truncate_partition_filename(temp_path); + + return add_keyword_string(str, keyword, true, temp_path); +} + +static int add_keyword_int(String *str, const char *keyword, longlong num) +{ + int err= str->append(' '); + err+= str->append(keyword, strlen(keyword)); + str->append(STRING_WITH_LEN(" = ")); + return err + str->append_longlong(num); +} + +static int add_server_part_options(String *str, partition_element *p_elem) +{ + int err= 0; + + if (p_elem->nodegroup_id != UNDEF_NODEGROUP) + err+= add_keyword_int(str,"NODEGROUP",(longlong)p_elem->nodegroup_id); + if (p_elem->part_max_rows) + err+= add_keyword_int(str,"MAX_ROWS",(longlong)p_elem->part_max_rows); + if (p_elem->part_min_rows) + err+= add_keyword_int(str,"MIN_ROWS",(longlong)p_elem->part_min_rows); + if (!(current_thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE)) + { + if (p_elem->data_file_name) + err+= add_keyword_path(str, "DATA DIRECTORY", p_elem->data_file_name); + if (p_elem->index_file_name) + err+= add_keyword_path(str, "INDEX DIRECTORY", p_elem->index_file_name); + } + if (p_elem->part_comment) + err+= add_keyword_string(str, "COMMENT", true, p_elem->part_comment); + if (p_elem->connect_string.length) + err+= add_keyword_string(str, "CONNECTION", true, + p_elem->connect_string.str); + err += add_keyword_string(str, "ENGINE", false, + ha_resolve_storage_engine_name(p_elem->engine_type)); + return err; +} + +static int add_engine_part_options(String *str, partition_element *p_elem) +{ + engine_option_value *opt= p_elem->option_list; + + for (; opt; opt= opt->next) + { + if (!opt->value.str) + continue; + if ((add_keyword_string(str, opt->name.str, opt->quoted_value, + opt->value.str))) + return 1; + } + return 0; +} + +/* + Find the given field's Create_field object using name of field + + SYNOPSIS + get_sql_field() + field_name Field name + alter_info Info from ALTER TABLE/CREATE TABLE + + RETURN VALUE + sql_field Object filled in by parser about field + NULL No field found +*/ + +static Create_field* get_sql_field(const char *field_name, + Alter_info *alter_info) +{ + List_iterator it(alter_info->create_list); + Create_field *sql_field; + DBUG_ENTER("get_sql_field"); + + while ((sql_field= it++)) + { + if (!(my_strcasecmp(system_charset_info, + sql_field->field_name.str, + field_name))) + { + DBUG_RETURN(sql_field); + } + } + DBUG_RETURN(NULL); +} + + +static int add_column_list_values(String *str, partition_info *part_info, + part_elem_value *list_value, + HA_CREATE_INFO *create_info, + Alter_info *alter_info) +{ + int err= 0; + uint i; + List_iterator it(part_info->part_field_list); + uint num_elements= part_info->part_field_list.elements; + bool use_parenthesis= (part_info->part_type == LIST_PARTITION && + part_info->num_columns > 1U); + + if (use_parenthesis) + err+= str->append('('); + for (i= 0; i < num_elements; i++) + { + part_column_list_val *col_val= &list_value->col_val_array[i]; + const char *field_name= it++; + if (col_val->max_value) + err+= str->append(STRING_WITH_LEN("MAXVALUE")); + else if (col_val->null_value) + err+= str->append(NULL_clex_str); + else + { + Item *item_expr= col_val->item_expression; + if (item_expr->null_value) + err+= str->append(NULL_clex_str); + else + { + CHARSET_INFO *field_cs; + const Type_handler *th= NULL; + + /* + This function is called at a very early stage, even before + we have prepared the sql_field objects. Thus we have to + find the proper sql_field object and get the character set + from that object. + */ + if (create_info) + { + const Column_derived_attributes + derived_attr(create_info->default_table_charset); + Create_field *sql_field; + + if (!(sql_field= get_sql_field(field_name, + alter_info))) + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + return 1; + } + th= sql_field->type_handler(); + if (th->partition_field_check(sql_field->field_name, item_expr)) + return 1; + field_cs= sql_field->explicit_or_derived_charset(&derived_attr); + } + else + { + Field *field= part_info->part_field_array[i]; + th= field->type_handler(); + if (th->partition_field_check(field->field_name, item_expr)) + return 1; + field_cs= field->charset(); + } + if (th->partition_field_append_value(str, item_expr, field_cs, + alter_info == NULL ? + PARTITION_VALUE_PRINT_MODE_SHOW: + PARTITION_VALUE_PRINT_MODE_FRM)) + return 1; + } + } + if (i != (num_elements - 1)) + err+= str->append(','); + } + if (use_parenthesis) + err+= str->append(')'); + return err; +} + +static int add_partition_values(String *str, partition_info *part_info, + partition_element *p_elem, + HA_CREATE_INFO *create_info, + Alter_info *alter_info) +{ + int err= 0; + + if (part_info->part_type == RANGE_PARTITION) + { + err+= str->append(STRING_WITH_LEN(" VALUES LESS THAN ")); + if (part_info->column_list) + { + List_iterator list_val_it(p_elem->list_val_list); + part_elem_value *list_value= list_val_it++; + err+= str->append('('); + err+= add_column_list_values(str, part_info, list_value, + create_info, alter_info); + err+= str->append(')'); + } + else + { + if (!p_elem->max_value) + { + err+= str->append('('); + if (p_elem->signed_flag) + err+= str->append_longlong(p_elem->range_value); + else + err+= str->append_ulonglong(p_elem->range_value); + err+= str->append(')'); + } + else + err+= str->append(STRING_WITH_LEN("MAXVALUE")); + } + } + else if (part_info->part_type == LIST_PARTITION) + { + uint i; + List_iterator list_val_it(p_elem->list_val_list); + + if (p_elem->max_value) + { + DBUG_ASSERT(part_info->defined_max_value || + current_thd->lex->sql_command == SQLCOM_ALTER_TABLE); + err+= str->append(STRING_WITH_LEN(" DEFAULT")); + return err; + } + + err+= str->append(STRING_WITH_LEN(" VALUES IN ")); + uint num_items= p_elem->list_val_list.elements; + + err+= str->append('('); + if (p_elem->has_null_value) + { + err+= str->append(NULL_clex_str); + if (num_items == 0) + { + err+= str->append(')'); + goto end; + } + err+= str->append(','); + } + i= 0; + do + { + part_elem_value *list_value= list_val_it++; + + if (part_info->column_list) + err+= add_column_list_values(str, part_info, list_value, + create_info, alter_info); + else + { + if (!list_value->unsigned_flag) + err+= str->append_longlong(list_value->value); + else + err+= str->append_ulonglong(list_value->value); + } + if (i != (num_items-1)) + err+= str->append(','); + } while (++i < num_items); + err+= str->append(')'); + } + else if (part_info->part_type == VERSIONING_PARTITION) + { + switch (p_elem->type) + { + case partition_element::CURRENT: + err+= str->append(STRING_WITH_LEN(" CURRENT")); + break; + case partition_element::HISTORY: + err+= str->append(STRING_WITH_LEN(" HISTORY")); + break; + default: + DBUG_ASSERT(0 && "wrong p_elem->type"); + } + } +end: + return err; +} + + +/** + Add 'KEY' word, with optional 'ALGORTIHM = N'. + + @param str String to write to. + @param part_info partition_info holding the used key_algorithm + + @return Operation status. + @retval 0 Success + @retval != 0 Failure +*/ + +static int add_key_with_algorithm(String *str, const partition_info *part_info) +{ + int err= 0; + err+= str->append(STRING_WITH_LEN("KEY ")); + + if (part_info->key_algorithm == partition_info::KEY_ALGORITHM_51) + { + err+= str->append(STRING_WITH_LEN("ALGORITHM = ")); + err+= str->append_longlong(part_info->key_algorithm); + err+= str->append(' '); + } + return err; +} + +char *generate_partition_syntax_for_frm(THD *thd, partition_info *part_info, + uint *buf_length, + HA_CREATE_INFO *create_info, + Alter_info *alter_info) +{ + Sql_mode_save_for_frm_handling sql_mode_save(thd); + char *res= generate_partition_syntax(thd, part_info, buf_length, + true, create_info, alter_info); + DBUG_EXECUTE_IF("generate_partition_syntax_for_frm", + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_YES, + ErrConvString(res, (uint32) *buf_length, + system_charset_info).ptr());); + return res; +} + + +/* + Generate the partition type syntax from the partition data structure. + + @return Operation status. + @retval 0 Success + @retval > 0 Failure + @retval -1 Fatal error +*/ + +int partition_info::gen_part_type(THD *thd, String *str) const +{ + int err= 0; + switch (part_type) + { + case RANGE_PARTITION: + err+= str->append(STRING_WITH_LEN("RANGE ")); + break; + case LIST_PARTITION: + err+= str->append(STRING_WITH_LEN("LIST ")); + break; + case HASH_PARTITION: + if (linear_hash_ind) + err+= str->append(STRING_WITH_LEN("LINEAR ")); + if (list_of_part_fields) + { + err+= add_key_with_algorithm(str, this); + err+= add_part_field_list(thd, str, part_field_list); + } + else + err+= str->append(STRING_WITH_LEN("HASH ")); + break; + case VERSIONING_PARTITION: + err+= str->append(STRING_WITH_LEN("SYSTEM_TIME ")); + break; + default: + DBUG_ASSERT(0); + /* We really shouldn't get here, no use in continuing from here */ + my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL)); + return -1; + } + return err; +} + + +void part_type_error(THD *thd, partition_info *work_part_info, + const char *part_type, + partition_info *tab_part_info) +{ + StringBuffer<256> tab_part_type; + if (tab_part_info->gen_part_type(thd, &tab_part_type) < 0) + return; + tab_part_type.length(tab_part_type.length() - 1); + if (work_part_info) + { + DBUG_ASSERT(!part_type); + StringBuffer<256> work_part_type; + if (work_part_info->gen_part_type(thd, &work_part_type) < 0) + return; + work_part_type.length(work_part_type.length() - 1); + my_error(ER_PARTITION_WRONG_TYPE, MYF(0), work_part_type.c_ptr(), + tab_part_type.c_ptr()); + } + else + { + DBUG_ASSERT(part_type); + my_error(ER_PARTITION_WRONG_TYPE, MYF(0), part_type, + tab_part_type.c_ptr()); + } +} + + +/* + Generate the partition syntax from the partition data structure. + Useful for support of generating defaults, SHOW CREATE TABLES + and easy partition management. + + SYNOPSIS + generate_partition_syntax() + part_info The partitioning data structure + buf_length A pointer to the returned buffer length + show_partition_options Should we display partition options + create_info Info generated by parser + alter_info Info generated by parser + + RETURN VALUES + NULL error + buf, buf_length Buffer and its length + + DESCRIPTION + Here we will generate the full syntax for the given command where all + defaults have been expanded. By so doing the it is also possible to + make lots of checks of correctness while at it. + This could will also be reused for SHOW CREATE TABLES and also for all + type ALTER TABLE commands focusing on changing the PARTITION structure + in any fashion. + + The code is optimised for minimal code size since it is not used in any + common queries. +*/ + +char *generate_partition_syntax(THD *thd, partition_info *part_info, + uint *buf_length, + bool show_partition_options, + HA_CREATE_INFO *create_info, + Alter_info *alter_info) +{ + uint i,j, tot_num_parts, num_subparts; + partition_element *part_elem; + int err= 0; + List_iterator part_it(part_info->partitions); + StringBuffer<1024> str; + DBUG_ENTER("generate_partition_syntax"); + + err+= str.append(STRING_WITH_LEN(" PARTITION BY ")); + int err2= part_info->gen_part_type(thd, &str); + if (err2 < 0) + DBUG_RETURN(NULL); + err+= err2; + if (part_info->part_type == VERSIONING_PARTITION) + { + Vers_part_info *vers_info= part_info->vers_info; + DBUG_ASSERT(vers_info); + if (vers_info->interval.is_set()) + { + err+= str.append(STRING_WITH_LEN("INTERVAL ")); + err+= append_interval(&str, vers_info->interval.type, + vers_info->interval.step); + err+= str.append(STRING_WITH_LEN(" STARTS ")); + if (create_info) // not SHOW CREATE + { + err+= str.append_ulonglong(vers_info->interval.start); + } + else + { + MYSQL_TIME ltime; + char ctime[MAX_DATETIME_WIDTH + 1]; + thd->variables.time_zone->gmt_sec_to_TIME(<ime, vers_info->interval.start); + uint ctime_len= my_datetime_to_str(<ime, ctime, 0); + err+= str.append(STRING_WITH_LEN("TIMESTAMP'")); + err+= str.append(ctime, ctime_len); + err+= str.append('\''); + } + } + else if (vers_info->limit) + { + err+= str.append(STRING_WITH_LEN("LIMIT ")); + err+= str.append_ulonglong(vers_info->limit); + } + if (vers_info->auto_hist) + { + DBUG_ASSERT(vers_info->interval.is_set() || + vers_info->limit); + err+= str.append(STRING_WITH_LEN(" AUTO")); + } + } + else if (part_info->part_expr) + { + err+= str.append('('); + part_info->part_expr->print_for_table_def(&str); + err+= str.append(')'); + } + else if (part_info->column_list) + { + err+= str.append(STRING_WITH_LEN(" COLUMNS")); + err+= add_part_field_list(thd, &str, part_info->part_field_list); + } + if ((!part_info->use_default_num_partitions) && + part_info->use_default_partitions) + { + err+= str.append(STRING_WITH_LEN("\nPARTITIONS ")); + err+= str.append_ulonglong(part_info->num_parts); + } + if (part_info->is_sub_partitioned()) + { + err+= str.append(STRING_WITH_LEN("\nSUBPARTITION BY ")); + /* Must be hash partitioning for subpartitioning */ + if (part_info->linear_hash_ind) + err+= str.append(STRING_WITH_LEN("LINEAR ")); + if (part_info->list_of_subpart_fields) + { + err+= add_key_with_algorithm(&str, part_info); + err+= add_part_field_list(thd, &str, part_info->subpart_field_list); + } + else + err+= str.append(STRING_WITH_LEN("HASH ")); + if (part_info->subpart_expr) + { + err+= str.append('('); + part_info->subpart_expr->print_for_table_def(&str); + err+= str.append(')'); + } + if ((!part_info->use_default_num_subpartitions) && + part_info->use_default_subpartitions) + { + err+= str.append(STRING_WITH_LEN("\nSUBPARTITIONS ")); + err+= str.append_ulonglong(part_info->num_subparts); + } + } + tot_num_parts= part_info->partitions.elements; + num_subparts= part_info->num_subparts; + + if (!part_info->use_default_partitions) + { + bool first= TRUE; + err+= str.append(STRING_WITH_LEN("\n(")); + i= 0; + do + { + part_elem= part_it++; + if (part_elem->part_state != PART_TO_BE_DROPPED && + part_elem->part_state != PART_REORGED_DROPPED) + { + if (!first) + err+= str.append(STRING_WITH_LEN(",\n ")); + first= FALSE; + err+= str.append(STRING_WITH_LEN("PARTITION ")); + err+= append_identifier(thd, &str, part_elem->partition_name, + strlen(part_elem->partition_name)); + err+= add_partition_values(&str, part_info, part_elem, + create_info, alter_info); + if (!part_info->is_sub_partitioned() || + part_info->use_default_subpartitions) + { + if (show_partition_options) + { + err+= add_server_part_options(&str, part_elem); + err+= add_engine_part_options(&str, part_elem); + } + } + else + { + err+= str.append(STRING_WITH_LEN("\n (")); + List_iterator sub_it(part_elem->subpartitions); + j= 0; + do + { + part_elem= sub_it++; + err+= str.append(STRING_WITH_LEN("SUBPARTITION ")); + err+= append_identifier(thd, &str, part_elem->partition_name, + strlen(part_elem->partition_name)); + if (show_partition_options) + err+= add_server_part_options(&str, part_elem); + if (j != (num_subparts-1)) + err+= str.append(STRING_WITH_LEN(",\n ")); + else + err+= str.append(')'); + } while (++j < num_subparts); + } + } + if (i == (tot_num_parts-1)) + err+= str.append(')'); + } while (++i < tot_num_parts); + } + if (err) + DBUG_RETURN(NULL); + *buf_length= str.length(); + DBUG_RETURN(thd->strmake(str.ptr(), str.length())); +} + + +/* + Check if partition key fields are modified and if it can be handled by the + underlying storage engine. + + SYNOPSIS + partition_key_modified + table TABLE object for which partition fields are set-up + fields Bitmap representing fields to be modified + + RETURN VALUES + TRUE Need special handling of UPDATE + FALSE Normal UPDATE handling is ok +*/ + +bool partition_key_modified(TABLE *table, const MY_BITMAP *fields) +{ + Field **fld; + partition_info *part_info= table->part_info; + DBUG_ENTER("partition_key_modified"); + + if (!part_info) + DBUG_RETURN(FALSE); + if (table->s->db_type()->partition_flags && + (table->s->db_type()->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY)) + DBUG_RETURN(FALSE); + for (fld= part_info->full_part_field_array; *fld; fld++) + if (bitmap_is_set(fields, (*fld)->field_index)) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +/* + A function to handle correct handling of NULL values in partition + functions. + SYNOPSIS + part_val_int() + item_expr The item expression to evaluate + out:result The value of the partition function, + LONGLONG_MIN if any null value in function + RETURN VALUES + TRUE Error in val_int() + FALSE ok +*/ + +static inline int part_val_int(Item *item_expr, longlong *result) +{ + switch (item_expr->cmp_type()) + { + case DECIMAL_RESULT: + { + my_decimal buf; + my_decimal *val= item_expr->val_decimal(&buf); + if (val && my_decimal2int(E_DEC_FATAL_ERROR, val, item_expr->unsigned_flag, + result, FLOOR) != E_DEC_OK) + return true; + break; + } + case INT_RESULT: + *result= item_expr->val_int(); + break; + case STRING_RESULT: + case REAL_RESULT: + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); + break; + } + if (item_expr->null_value) + { + if (unlikely(current_thd->is_error())) + return true; + *result= LONGLONG_MIN; + } + return false; +} + + +/* + The next set of functions are used to calculate the partition identity. + A handler sets up a variable that corresponds to one of these functions + to be able to quickly call it whenever the partition id needs to calculated + based on the record in table->record[0] (or set up to fake that). + There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions. + In addition there are 4 variants for RANGE subpartitioning and 4 variants + for LIST subpartitioning thus in total there are 14 variants of this + function. + + We have a set of support functions for these 14 variants. There are 4 + variants of hash functions and there is a function for each. The KEY + partitioning uses the function calculate_key_hash_value to calculate the hash + value based on an array of fields. The linear hash variants uses the + method get_part_id_from_linear_hash to get the partition id using the + hash value and some parameters calculated from the number of partitions. +*/ + +/* + A simple support function to calculate part_id given local part and + sub part. + + SYNOPSIS + get_part_id_for_sub() + loc_part_id Local partition id + sub_part_id Subpartition id + num_subparts Number of subparts +*/ + +inline +static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id, + uint num_subparts) +{ + return (uint32)((loc_part_id * num_subparts) + sub_part_id); +} + + +/* + Calculate part_id for (SUB)PARTITION BY HASH + + SYNOPSIS + get_part_id_hash() + num_parts Number of hash partitions + part_expr Item tree of hash function + out:part_id The returned partition id + out:func_value Value of hash function + + RETURN VALUE + != 0 Error code + FALSE Success +*/ + +static int get_part_id_hash(uint num_parts, + Item *part_expr, + uint32 *part_id, + longlong *func_value) +{ + longlong int_hash_id; + DBUG_ENTER("get_part_id_hash"); + + if (part_val_int(part_expr, func_value)) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + + int_hash_id= *func_value % num_parts; + + *part_id= int_hash_id < 0 ? (uint32) -int_hash_id : (uint32) int_hash_id; + DBUG_RETURN(FALSE); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR HASH + + SYNOPSIS + get_part_id_linear_hash() + part_info A reference to the partition_info struct where all the + desired information is given + num_parts Number of hash partitions + part_expr Item tree of hash function + out:part_id The returned partition id + out:func_value Value of hash function + + RETURN VALUE + != 0 Error code + 0 OK +*/ + +static int get_part_id_linear_hash(partition_info *part_info, + uint num_parts, + Item *part_expr, + uint32 *part_id, + longlong *func_value) +{ + DBUG_ENTER("get_part_id_linear_hash"); + + if (part_val_int(part_expr, func_value)) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + + *part_id= get_part_id_from_linear_hash(*func_value, + part_info->linear_hash_mask, + num_parts); + DBUG_RETURN(FALSE); +} + + +/** + Calculate part_id for (SUB)PARTITION BY KEY + + @param file Handler to storage engine + @param field_array Array of fields for PARTTION KEY + @param num_parts Number of KEY partitions + @param func_value[out] Returns calculated hash value + + @return Calculated partition id +*/ + +inline +static uint32 get_part_id_key(handler *file, + Field **field_array, + uint num_parts, + longlong *func_value) +{ + DBUG_ENTER("get_part_id_key"); + *func_value= ha_partition::calculate_key_hash_value(field_array); + DBUG_RETURN((uint32) (*func_value % num_parts)); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR KEY + + SYNOPSIS + get_part_id_linear_key() + part_info A reference to the partition_info struct where all the + desired information is given + field_array Array of fields for PARTTION KEY + num_parts Number of KEY partitions + + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_key(partition_info *part_info, + Field **field_array, + uint num_parts, + longlong *func_value) +{ + DBUG_ENTER("get_part_id_linear_key"); + + *func_value= ha_partition::calculate_key_hash_value(field_array); + DBUG_RETURN(get_part_id_from_linear_hash(*func_value, + part_info->linear_hash_mask, + num_parts)); +} + +/* + Copy to field buffers and set up field pointers + + SYNOPSIS + copy_to_part_field_buffers() + ptr Array of fields to copy + field_bufs Array of field buffers to copy to + restore_ptr Array of pointers to restore to + + RETURN VALUES + NONE + DESCRIPTION + This routine is used to take the data from field pointer, convert + it to a standard format and store this format in a field buffer + allocated for this purpose. Next the field pointers are moved to + point to the field buffers. There is a separate to restore the + field pointers after this call. +*/ + +static void copy_to_part_field_buffers(Field **ptr, + uchar **field_bufs, + uchar **restore_ptr) +{ + Field *field; + while ((field= *(ptr++))) + { + *restore_ptr= field->ptr; + restore_ptr++; + if (!field->maybe_null() || !field->is_null()) + { + CHARSET_INFO *cs= field->charset(); + uint max_len= field->pack_length(); + uint data_len= field->data_length(); + uchar *field_buf= *field_bufs; + /* + We only use the field buffer for VARCHAR and CHAR strings + which isn't of a binary collation. We also only use the + field buffer for fields which are not currently NULL. + The field buffer will store a normalised string. We use + the strnxfrm method to normalise the string. + */ + if (field->type() == MYSQL_TYPE_VARCHAR) + { + uint len_bytes= ((Field_varstring*)field)->length_bytes; + cs->strnxfrm(field_buf + len_bytes, max_len, + field->ptr + len_bytes, data_len); + if (len_bytes == 1) + *field_buf= (uchar) data_len; + else + int2store(field_buf, data_len); + } + else + { + cs->strnxfrm(field_buf, max_len, + field->ptr, max_len); + } + field->ptr= field_buf; + } + field_bufs++; + } + return; +} + +/* + Restore field pointers + SYNOPSIS + restore_part_field_pointers() + ptr Array of fields to restore + restore_ptr Array of field pointers to restore to + + RETURN VALUES +*/ + +static void restore_part_field_pointers(Field **ptr, uchar **restore_ptr) +{ + Field *field; + while ((field= *(ptr++))) + { + field->ptr= *restore_ptr; + restore_ptr++; + } + return; +} + +/* + This function is used to calculate the partition id where all partition + fields have been prepared to point to a record where the partition field + values are bound. + + SYNOPSIS + get_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + out:part_id The partition id is returned through this pointer + out:func_value Value of partition function (longlong) + + RETURN VALUE + part_id Partition id of partition that would contain + row with given values of PF-fields + HA_ERR_NO_PARTITION_FOUND The fields of the partition function didn't + fit into any partition and thus the values of + the PF-fields are not allowed. + + DESCRIPTION + A routine used from write_row, update_row and delete_row from any + handler supporting partitioning. It is also a support routine for + get_partition_set used to find the set of partitions needed to scan + for a certain index scan or full table scan. + + It is actually 9 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_list_col + get_partition_id_range + get_partition_id_range_col + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub + get_partition_id_with_sub +*/ + +/* + This function is used to calculate the main partition to use in the case of + subpartitioning and we don't know enough to get the partition identity in + total. + + SYNOPSIS + get_part_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + out:part_id The partition id is returned through this pointer + out:func_value The value calculated by partition function + + RETURN VALUE + HA_ERR_NO_PARTITION_FOUND The fields of the partition function didn't + fit into any partition and thus the values of + the PF-fields are not allowed. + 0 OK + + DESCRIPTION + + It is actually 8 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_list_col + get_partition_id_range + get_partition_id_range_col + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub +*/ + +static int get_part_id_charset_func_part(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + int res; + DBUG_ENTER("get_part_id_charset_func_part"); + + copy_to_part_field_buffers(part_info->part_charset_field_array, + part_info->part_field_buffers, + part_info->restore_part_field_ptrs); + res= part_info->get_part_partition_id_charset(part_info, + part_id, func_value); + restore_part_field_pointers(part_info->part_charset_field_array, + part_info->restore_part_field_ptrs); + DBUG_RETURN(res); +} + + +static int get_part_id_charset_func_subpart(partition_info *part_info, + uint32 *part_id) +{ + int res; + DBUG_ENTER("get_part_id_charset_func_subpart"); + + copy_to_part_field_buffers(part_info->subpart_charset_field_array, + part_info->subpart_field_buffers, + part_info->restore_subpart_field_ptrs); + res= part_info->get_subpartition_id_charset(part_info, part_id); + restore_part_field_pointers(part_info->subpart_charset_field_array, + part_info->restore_subpart_field_ptrs); + DBUG_RETURN(res); +} + +int get_partition_id_list_col(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + part_column_list_val *list_col_array= part_info->list_col_array; + uint num_columns= part_info->part_field_list.elements; + int list_index, cmp; + int min_list_index= 0; + int max_list_index= part_info->num_list_values - 1; + DBUG_ENTER("get_partition_id_list_col"); + + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + cmp= cmp_rec_and_tuple(list_col_array + list_index*num_columns, + num_columns); + if (cmp > 0) + min_list_index= list_index + 1; + else if (cmp < 0) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + *part_id= (uint32)list_col_array[list_index*num_columns].partition_id; + DBUG_RETURN(0); + } + } +notfound: + if (part_info->defined_max_value) + { + *part_id= part_info->default_partition_id; + DBUG_RETURN(0); + } + *part_id= 0; + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); +} + + +int get_partition_id_list(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + LIST_PART_ENTRY *list_array= part_info->list_array; + int list_index; + int min_list_index= 0; + int max_list_index= part_info->num_list_values - 1; + longlong part_func_value; + int error= part_val_int(part_info->part_expr, &part_func_value); + longlong list_value; + bool unsigned_flag= part_info->part_expr->unsigned_flag; + DBUG_ENTER("get_partition_id_list"); + + if (error) + goto notfound; + + if (part_info->part_expr->null_value) + { + if (part_info->has_null_value) + { + *part_id= part_info->has_null_part_id; + DBUG_RETURN(0); + } + goto notfound; + } + *func_value= part_func_value; + if (unsigned_flag) + part_func_value-= 0x8000000000000000ULL; + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + *part_id= (uint32)list_array[list_index].partition_id; + DBUG_RETURN(0); + } + } +notfound: + if (part_info->defined_max_value) + { + *part_id= part_info->default_partition_id; + DBUG_RETURN(0); + } + *part_id= 0; + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); +} + + +uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint, + uint32 nparts) +{ + part_column_list_val *list_col_array= part_info->list_col_array; + uint num_columns= part_info->part_field_list.elements; + uint list_index; + uint min_list_index= 0; + int cmp; + /* Notice that max_list_index = last_index + 1 here! */ + uint max_list_index= part_info->num_list_values; + DBUG_ENTER("get_partition_id_cols_list_for_endpoint"); + + /* Find the matching partition (including taking endpoint into account). */ + do + { + /* Midpoint, adjusted down, so it can never be >= max_list_index. */ + list_index= (max_list_index + min_list_index) >> 1; + cmp= cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns, + nparts, left_endpoint, include_endpoint); + if (cmp > 0) + { + min_list_index= list_index + 1; + } + else + { + max_list_index= list_index; + if (cmp == 0) + break; + } + } while (max_list_index > min_list_index); + list_index= max_list_index; + + /* Given value must be LESS THAN or EQUAL to the found partition. */ + DBUG_ASSERT(list_index == part_info->num_list_values || + (0 >= cmp_rec_and_tuple_prune(list_col_array + + list_index*num_columns, + nparts, left_endpoint, + include_endpoint))); + /* Given value must be GREATER THAN the previous partition. */ + DBUG_ASSERT(list_index == 0 || + (0 < cmp_rec_and_tuple_prune(list_col_array + + (list_index - 1)*num_columns, + nparts, left_endpoint, + include_endpoint))); + + /* Include the right endpoint if not already passed end of array. */ + if (!left_endpoint && include_endpoint && cmp == 0 && + list_index < part_info->num_list_values) + list_index++; + + DBUG_RETURN(list_index); +} + + +/** + Find the sub-array part_info->list_array that corresponds to given interval. + + @param part_info Partitioning info (partitioning type must be LIST) + @param left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a) + @param include_endpoint TRUE iff the interval includes the endpoint + + This function finds the sub-array of part_info->list_array where values of + list_array[idx].list_value are contained within the specifed interval. + list_array is ordered by list_value, so + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought sub-array starts at some index idx and continues till array end. + The function returns first number idx, such that + list_array[idx].list_value is contained within the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought sub-array starts at array start and continues till some last + index idx. + The function returns first number idx, such that + list_array[idx].list_value is NOT contained within the passed interval. + If all array elements are contained, part_info->num_list_values is + returned. + + @note The caller will call this function and then will run along the + sub-array of list_array to collect partition ids. If the number of list + values is significantly higher then number of partitions, this could be slow + and we could invent some other approach. The "run over list array" part is + already wrapped in a get_next()-like function. + + @return The index of corresponding sub-array of part_info->list_array. +*/ + +uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + uint32 res; + copy_to_part_field_buffers(part_info->part_field_array, + part_info->part_field_buffers, + part_info->restore_part_field_ptrs); + res= get_list_array_idx_for_endpoint(part_info, left_endpoint, + include_endpoint); + restore_part_field_pointers(part_info->part_field_array, + part_info->restore_part_field_ptrs); + return res; +} + +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + LIST_PART_ENTRY *list_array= part_info->list_array; + uint list_index; + uint min_list_index= 0, max_list_index= part_info->num_list_values - 1; + longlong list_value; + /* Get the partitioning function value for the endpoint */ + longlong part_func_value= + part_info->part_expr->val_int_endpoint(left_endpoint, &include_endpoint); + bool unsigned_flag= part_info->part_expr->unsigned_flag; + DBUG_ENTER("get_list_array_idx_for_endpoint"); + + if (part_info->part_expr->null_value) + { + /* + Special handling for MONOTONIC functions that can return NULL for + values that are comparable. I.e. + '2000-00-00' can be compared to '2000-01-01' but TO_DAYS('2000-00-00') + returns NULL which cannot be compared used <, >, <=, >= etc. + + Otherwise, just return the the first index (lowest value). + */ + enum_monotonicity_info monotonic; + monotonic= part_info->part_expr->get_monotonicity_info(); + if (monotonic != MONOTONIC_INCREASING_NOT_NULL && + monotonic != MONOTONIC_STRICT_INCREASING_NOT_NULL) + { + /* F(col) can not return NULL, return index with lowest value */ + DBUG_RETURN(0); + } + } + + if (unsigned_flag) + part_func_value-= 0x8000000000000000ULL; + DBUG_ASSERT(part_info->num_list_values); + do + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + DBUG_RETURN(list_index + MY_TEST(left_endpoint ^ include_endpoint)); + } + } while (max_list_index >= min_list_index); +notfound: + if (list_value < part_func_value) + list_index++; + DBUG_RETURN(list_index); +} + + +int get_partition_id_range_col(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + part_column_list_val *range_col_array= part_info->range_col_array; + uint num_columns= part_info->part_field_list.elements; + uint max_partition= part_info->num_parts - 1; + uint min_part_id= 0; + uint max_part_id= max_partition; + uint loc_part_id; + DBUG_ENTER("get_partition_id_range_col"); + + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (cmp_rec_and_tuple(range_col_array + loc_part_id*num_columns, + num_columns) >= 0) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (loc_part_id != max_partition) + if (cmp_rec_and_tuple(range_col_array + loc_part_id*num_columns, + num_columns) >= 0) + loc_part_id++; + *part_id= (uint32)loc_part_id; + if (loc_part_id == max_partition && + (cmp_rec_and_tuple(range_col_array + loc_part_id*num_columns, + num_columns) >= 0)) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + + DBUG_PRINT("exit",("partition: %d", *part_id)); + DBUG_RETURN(0); +} + + +int vers_get_partition_id(partition_info *part_info, uint32 *part_id, + longlong *func_value) +{ + DBUG_ENTER("vers_get_partition_id"); + Field *row_end= part_info->part_field_array[STAT_TRX_END]; + Vers_part_info *vers_info= part_info->vers_info; + + if (row_end->is_max() || row_end->is_null()) + *part_id= vers_info->now_part->id; + else // row is historical + { + longlong *range_value= part_info->range_int_array; + uint max_hist_id= part_info->num_parts - 2; + uint min_hist_id= 0, loc_hist_id= vers_info->hist_part->id; + ulong unused; + my_time_t ts; + + if (!range_value) + goto done; // fastpath + + ts= row_end->get_timestamp(&unused); + if ((loc_hist_id == 0 || range_value[loc_hist_id - 1] < ts) && + (loc_hist_id == max_hist_id || range_value[loc_hist_id] >= ts)) + goto done; // fastpath + + while (max_hist_id > min_hist_id) + { + loc_hist_id= (max_hist_id + min_hist_id) / 2; + if (range_value[loc_hist_id] <= ts) + min_hist_id= loc_hist_id + 1; + else + max_hist_id= loc_hist_id; + } + loc_hist_id= max_hist_id; +done: + *part_id= (uint32)loc_hist_id; + } + DBUG_PRINT("exit",("partition: %d", *part_id)); + DBUG_RETURN(0); +} + + +int get_partition_id_range(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->num_parts - 1; + uint min_part_id= 0; + uint max_part_id= max_partition; + uint loc_part_id; + longlong part_func_value; + int error= part_val_int(part_info->part_expr, &part_func_value); + bool unsigned_flag= part_info->part_expr->unsigned_flag; + DBUG_ENTER("get_partition_id_range"); + + if (unlikely(error)) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + + if (part_info->part_expr->null_value) + { + *part_id= 0; + DBUG_RETURN(0); + } + *func_value= part_func_value; + if (unsigned_flag) + part_func_value-= 0x8000000000000000ULL; + /* Search for the partition containing part_func_value */ + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id) / 2; + if (range_array[loc_part_id] <= part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id; + } + loc_part_id= max_part_id; + *part_id= (uint32)loc_part_id; + if (loc_part_id == max_partition && + part_func_value >= range_array[loc_part_id] && + !part_info->defined_max_value) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + + DBUG_PRINT("exit",("partition: %d", *part_id)); + DBUG_RETURN(0); +} + + +/* + Find the sub-array of part_info->range_int_array that covers given interval + + SYNOPSIS + get_partition_id_range_for_endpoint() + part_info Partitioning info (partitioning type must be RANGE) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a). + include_endpoint TRUE <=> the endpoint itself is included in the + interval + + DESCRIPTION + This function finds the sub-array of part_info->range_int_array where the + elements have non-empty intersections with the given interval. + + A range_int_array element at index idx represents the interval + + [range_int_array[idx-1], range_int_array[idx]), + + intervals are disjoint and ordered by their right bound, so + + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought sub-array starts at some index idx and continues till array end. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has non empty intersection with + the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought sub-array starts at array start and continues till some last + index idx. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has EMPTY intersection with the + passed interval. + If the interval represented by the last array element has non-empty + intersection with the passed interval, part_info->num_parts is + returned. + + RETURN + The edge of corresponding part_info->range_int_array sub-array. +*/ + +static uint32 +get_partition_id_range_for_endpoint_charset(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + uint32 res; + copy_to_part_field_buffers(part_info->part_field_array, + part_info->part_field_buffers, + part_info->restore_part_field_ptrs); + res= get_partition_id_range_for_endpoint(part_info, left_endpoint, + include_endpoint); + restore_part_field_pointers(part_info->part_field_array, + part_info->restore_part_field_ptrs); + return res; +} + +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + longlong *range_array= part_info->range_int_array; + longlong part_end_val; + uint max_partition= part_info->num_parts - 1; + uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + /* Get the partitioning function value for the endpoint */ + longlong part_func_value= + part_info->part_expr->val_int_endpoint(left_endpoint, &include_endpoint); + + bool unsigned_flag= part_info->part_expr->unsigned_flag; + DBUG_ENTER("get_partition_id_range_for_endpoint"); + + if (part_info->part_expr->null_value) + { + /* + Special handling for MONOTONIC functions that can return NULL for + values that are comparable. I.e. + '2000-00-00' can be compared to '2000-01-01' but TO_DAYS('2000-00-00') + returns NULL which cannot be compared used <, >, <=, >= etc. + + Otherwise, just return the first partition + (may be included if not left endpoint) + */ + enum_monotonicity_info monotonic; + monotonic= part_info->part_expr->get_monotonicity_info(); + if (monotonic != MONOTONIC_INCREASING_NOT_NULL && + monotonic != MONOTONIC_STRICT_INCREASING_NOT_NULL) + { + /* F(col) can not return NULL, return partition with lowest value */ + if (!left_endpoint && include_endpoint) + DBUG_RETURN(1); + DBUG_RETURN(0); + + } + } + + if (unsigned_flag) + part_func_value-= 0x8000000000000000ULL; + if (left_endpoint && !include_endpoint) + part_func_value++; + + /* + Search for the partition containing part_func_value + (including the right endpoint). + */ + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id) / 2; + if (range_array[loc_part_id] < part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id; + } + loc_part_id= max_part_id; + + /* Adjust for endpoints */ + part_end_val= range_array[loc_part_id]; + if (left_endpoint) + { + DBUG_ASSERT(part_func_value > part_end_val ? + (loc_part_id == max_partition && + !part_info->defined_max_value) : + 1); + /* + In case of PARTITION p VALUES LESS THAN MAXVALUE + the maximum value is in the current (last) partition. + If value is equal or greater than the endpoint, + the range starts from the next partition. + */ + if (part_func_value >= part_end_val && + (loc_part_id < max_partition || !part_info->defined_max_value)) + loc_part_id++; + if (part_info->part_type == VERSIONING_PARTITION && + part_func_value < INT_MAX32 && + loc_part_id > part_info->vers_info->hist_part->id) + { + /* + Historical query with AS OF point after the last history partition must + include last history partition because it can be overflown (contain + history rows out of right endpoint). + */ + loc_part_id= part_info->vers_info->hist_part->id; + } + } + else + { + /* if 'WHERE <= X' and partition is LESS THAN (X) include next partition */ + if (include_endpoint && loc_part_id < max_partition && + part_func_value == part_end_val) + loc_part_id++; + + /* Right endpoint, set end after correct partition */ + loc_part_id++; + } + DBUG_RETURN(loc_part_id); +} + + +int get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + return get_part_id_hash(part_info->num_parts, part_info->part_expr, + part_id, func_value); +} + + +int get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + return get_part_id_linear_hash(part_info, part_info->num_parts, + part_info->part_expr, part_id, func_value); +} + + +int get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + *part_id= get_part_id_key(part_info->table->file, + part_info->part_field_array, + part_info->num_parts, func_value); + return 0; +} + + +int get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + *part_id= get_part_id_linear_key(part_info, + part_info->part_field_array, + part_info->num_parts, func_value); + return 0; +} + + +int get_partition_id_with_sub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint num_subparts; + int error; + DBUG_ENTER("get_partition_id_with_sub"); + + if (unlikely((error= part_info->get_part_partition_id(part_info, + &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + num_subparts= part_info->num_subparts; + if (unlikely((error= part_info->get_subpartition_id(part_info, + &sub_part_id)))) + { + DBUG_RETURN(error); + } + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, num_subparts); + DBUG_RETURN(0); +} + + +/* + This function is used to calculate the subpartition id + + SYNOPSIS + get_subpartition_id() + part_info A reference to the partition_info struct where all the + desired information is given + + RETURN VALUE + part_id The subpartition identity + + DESCRIPTION + A routine used in some SELECT's when only partial knowledge of the + partitions is known. + + It is actually 4 different variants of this function which are called + through a function pointer. + + get_partition_id_hash_sub + get_partition_id_key_sub + get_partition_id_linear_hash_sub + get_partition_id_linear_key_sub +*/ + +int get_partition_id_hash_sub(partition_info *part_info, + uint32 *part_id) +{ + longlong func_value; + return get_part_id_hash(part_info->num_subparts, part_info->subpart_expr, + part_id, &func_value); +} + + +int get_partition_id_linear_hash_sub(partition_info *part_info, + uint32 *part_id) +{ + longlong func_value; + return get_part_id_linear_hash(part_info, part_info->num_subparts, + part_info->subpart_expr, part_id, + &func_value); +} + + +int get_partition_id_key_sub(partition_info *part_info, + uint32 *part_id) +{ + longlong func_value; + *part_id= get_part_id_key(part_info->table->file, + part_info->subpart_field_array, + part_info->num_subparts, &func_value); + return FALSE; +} + + +int get_partition_id_linear_key_sub(partition_info *part_info, + uint32 *part_id) +{ + longlong func_value; + *part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + part_info->num_subparts, &func_value); + return FALSE; +} + + +/* + Set an indicator on all partition fields that are set by the key + + SYNOPSIS + set_PF_fields_in_key() + key_info Information about the index + key_length Length of key + + RETURN VALUE + TRUE Found partition field set by key + FALSE No partition field set by key +*/ + +static bool set_PF_fields_in_key(KEY *key_info, uint key_length) +{ + KEY_PART_INFO *key_part; + bool found_part_field= FALSE; + DBUG_ENTER("set_PF_fields_in_key"); + + for (key_part= key_info->key_part; (int)key_length > 0; key_part++) + { + if (key_part->null_bit) + key_length--; + if (key_part->type == HA_KEYTYPE_BIT) + { + if (((Field_bit*)key_part->field)->bit_len) + key_length--; + } + if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART)) + { + key_length-= HA_KEY_BLOB_LENGTH; + } + if (key_length < key_part->length) + break; + key_length-= key_part->length; + if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG) + { + found_part_field= TRUE; + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; + } + } + DBUG_RETURN(found_part_field); +} + + +/* + We have found that at least one partition field was set by a key, now + check if a partition function has all its fields bound or not. + + SYNOPSIS + check_part_func_bound() + ptr Array of fields NULL terminated (partition fields) + + RETURN VALUE + TRUE All fields in partition function are set + FALSE Not all fields in partition function are set +*/ + +static bool check_part_func_bound(Field **ptr) +{ + bool result= TRUE; + DBUG_ENTER("check_part_func_bound"); + + for (; *ptr; ptr++) + { + if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG)) + { + result= FALSE; + break; + } + } + DBUG_RETURN(result); +} + + +/* + Get the id of the subpartitioning part by using the key buffer of the + index scan. + + SYNOPSIS + get_sub_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + out:part_id The returned partition id + + RETURN VALUES + TRUE All fields in partition function are set + FALSE Not all fields in partition function are set + + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ + +static int get_sub_part_id_from_key(const TABLE *table,uchar *buf, + KEY *key_info, + const key_range *key_spec, + uint32 *part_id) +{ + uchar *rec0= table->record[0]; + partition_info *part_info= table->part_info; + int res; + DBUG_ENTER("get_sub_part_id_from_key"); + + key_restore(buf, (uchar*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + { + res= part_info->get_subpartition_id(part_info, part_id); + } + else + { + Field **part_field_array= part_info->subpart_field_array; + part_info->table->move_fields(part_field_array, buf, rec0); + res= part_info->get_subpartition_id(part_info, part_id); + part_info->table->move_fields(part_field_array, rec0, buf); + } + DBUG_RETURN(res); +} + +/* + Get the id of the partitioning part by using the key buffer of the + index scan. + + SYNOPSIS + get_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + out:part_id Partition to use + + RETURN VALUES + TRUE Partition to use not found + FALSE Ok, part_id indicates partition to use + + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ + +bool get_part_id_from_key(const TABLE *table, uchar *buf, KEY *key_info, + const key_range *key_spec, uint32 *part_id) +{ + bool result; + uchar *rec0= table->record[0]; + partition_info *part_info= table->part_info; + longlong func_value; + DBUG_ENTER("get_part_id_from_key"); + + key_restore(buf, (uchar*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + { + result= part_info->get_part_partition_id(part_info, part_id, + &func_value); + } + else + { + Field **part_field_array= part_info->part_field_array; + part_info->table->move_fields(part_field_array, buf, rec0); + result= part_info->get_part_partition_id(part_info, part_id, + &func_value); + part_info->table->move_fields(part_field_array, rec0, buf); + } + DBUG_RETURN(result); +} + +/* + Get the partitioning id of the full PF by using the key buffer of the + index scan. + + SYNOPSIS + get_full_part_id_from_key() + table The table object + buf A buffer that is used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + out:part_spec A partition id containing start part and end part + + RETURN VALUES + part_spec + No partitions to scan is indicated by end_part > start_part when returning + + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers if needed and + get the partition identity and restore field pointers afterwards. +*/ + +void get_full_part_id_from_key(const TABLE *table, uchar *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec) +{ + bool result; + partition_info *part_info= table->part_info; + uchar *rec0= table->record[0]; + longlong func_value; + DBUG_ENTER("get_full_part_id_from_key"); + + key_restore(buf, (uchar*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + { + result= part_info->get_partition_id(part_info, &part_spec->start_part, + &func_value); + } + else + { + Field **part_field_array= part_info->full_part_field_array; + part_info->table->move_fields(part_field_array, buf, rec0); + result= part_info->get_partition_id(part_info, &part_spec->start_part, + &func_value); + part_info->table->move_fields(part_field_array, rec0, buf); + } + part_spec->end_part= part_spec->start_part; + if (unlikely(result)) + part_spec->start_part++; + DBUG_VOID_RETURN; +} + + +/** + @brief Verify that all rows in a table is in the given partition + + @param table Table which contains the data that will be checked if + it is matching the partition definition. + @param part_table Partitioned table containing the partition to check. + @param part_id Which partition to match with. + + @return Operation status + @retval TRUE Not all rows match the given partition + @retval FALSE OK +*/ +bool verify_data_with_partition(TABLE *table, TABLE *part_table, + uint32 part_id) +{ + uint32 found_part_id; + longlong func_value; /* Unused */ + handler *file; + int error; + uchar *old_rec; + partition_info *part_info; + DBUG_ENTER("verify_data_with_partition"); + DBUG_ASSERT(table && table->file && part_table && part_table->part_info && + part_table->file); + + /* + Verify all table rows. + First implementation uses full scan + evaluates partition functions for + every row. TODO: add optimization to use index if possible, see WL#5397. + + 1) Open both tables (already done) and set the row buffers to use + the same buffer (to avoid copy). + 2) Init rnd on table. + 3) loop over all rows. + 3.1) verify that partition_id on the row is correct. Break if error. + */ + file= table->file; + part_info= part_table->part_info; + bitmap_union(table->read_set, &part_info->full_part_field_set); + old_rec= part_table->record[0]; + part_table->record[0]= table->record[0]; + part_info->table->move_fields(part_info->full_part_field_array, table->record[0], old_rec); + if (unlikely(error= file->ha_rnd_init_with_error(TRUE))) + goto err; + + do + { + if (unlikely((error= file->ha_rnd_next(table->record[0])))) + { + if (error == HA_ERR_END_OF_FILE) + error= 0; + else + file->print_error(error, MYF(0)); + break; + } + if (unlikely((error= part_info->get_partition_id(part_info, &found_part_id, + &func_value)))) + { + part_table->file->print_error(error, MYF(0)); + break; + } + DEBUG_SYNC(current_thd, "swap_partition_first_row_read"); + if (found_part_id != part_id) + { + my_error(ER_ROW_DOES_NOT_MATCH_PARTITION, MYF(0)); + error= 1; + break; + } + } while (TRUE); + (void) file->ha_rnd_end(); +err: + part_info->table->move_fields(part_info->full_part_field_array, old_rec, + table->record[0]); + part_table->record[0]= old_rec; + DBUG_RETURN(unlikely(error) ? TRUE : FALSE); +} + + +/* + Prune the set of partitions to use in query + + SYNOPSIS + prune_partition_set() + table The table object + out:part_spec Contains start part, end part + + DESCRIPTION + This function is called to prune the range of partitions to scan by + checking the read_partitions bitmap. + If start_part > end_part at return it means no partition needs to be + scanned. If start_part == end_part it always means a single partition + needs to be scanned. + + RETURN VALUE + part_spec +*/ +void prune_partition_set(const TABLE *table, part_id_range *part_spec) +{ + int last_partition= -1; + uint i; + partition_info *part_info= table->part_info; + + DBUG_ENTER("prune_partition_set"); + for (i= part_spec->start_part; i <= part_spec->end_part; i++) + { + if (bitmap_is_set(&(part_info->read_partitions), i)) + { + DBUG_PRINT("info", ("Partition %d is set", i)); + if (last_partition == -1) + /* First partition found in set and pruned bitmap */ + part_spec->start_part= i; + last_partition= i; + } + } + if (last_partition == -1) + /* No partition found in pruned bitmap */ + part_spec->start_part= part_spec->end_part + 1; + else //if (last_partition != -1) + part_spec->end_part= last_partition; + + DBUG_VOID_RETURN; +} + +/* + Get the set of partitions to use in query. + + SYNOPSIS + get_partition_set() + table The table object + buf A buffer that can be used to evaluate the partition function + index The index of the key used, if MAX_KEY no index used + key_spec A key_range containing key and key length + out:part_spec Contains start part, end part and indicator if bitmap is + used for which partitions to scan + + DESCRIPTION + This function is called to discover which partitions to use in an index + scan or a full table scan. + It returns a range of partitions to scan. If there are holes in this + range with partitions that are not needed to scan a bit array is used + to signal which partitions to use and which not to use. + If start_part > end_part at return it means no partition needs to be + scanned. If start_part == end_part it always means a single partition + needs to be scanned. + + RETURN VALUE + part_spec +*/ +void get_partition_set(const TABLE *table, uchar *buf, const uint index, + const key_range *key_spec, part_id_range *part_spec) +{ + partition_info *part_info= table->part_info; + uint num_parts= part_info->get_tot_partitions(); + uint i, part_id; + uint sub_part= num_parts; + uint32 part_part= num_parts; + KEY *key_info= NULL; + bool found_part_field= FALSE; + DBUG_ENTER("get_partition_set"); + + part_spec->start_part= 0; + part_spec->end_part= num_parts - 1; + if ((index < MAX_KEY) && + key_spec && key_spec->flag == (uint)HA_READ_KEY_EXACT && + part_info->some_fields_in_PF.is_set(index)) + { + key_info= table->key_info+index; + /* + The index can potentially provide at least one PF-field (field in the + partition function). Thus it is interesting to continue our probe. + */ + if (key_spec->length == key_info->key_length) + { + /* + The entire key is set so we can check whether we can immediately + derive either the complete PF or if we can derive either + the top PF or the subpartitioning PF. This can be established by + checking precalculated bits on each index. + */ + if (part_info->all_fields_in_PF.is_set(index)) + { + /* + We can derive the exact partition to use, no more than this one + is needed. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + /* + Check if range can be adjusted by looking in read_partitions + */ + prune_partition_set(table, part_spec); + DBUG_VOID_RETURN; + } + else if (part_info->is_sub_partitioned()) + { + if (part_info->all_fields_in_SPF.is_set(index)) + { + if (get_sub_part_id_from_key(table, buf, key_info, key_spec, &sub_part)) + { + part_spec->start_part= num_parts; + DBUG_VOID_RETURN; + } + } + else if (part_info->all_fields_in_PPF.is_set(index)) + { + if (get_part_id_from_key(table,buf,key_info, + key_spec,(uint32*)&part_part)) + { + /* + The value of the RANGE or LIST partitioning was outside of + allowed values. Thus it is certain that the result of this + scan will be empty. + */ + part_spec->start_part= num_parts; + DBUG_VOID_RETURN; + } + } + } + } + else + { + /* + Set an indicator on all partition fields that are bound. + If at least one PF-field was bound it pays off to check whether + the PF or PPF or SPF has been bound. + (PF = Partition Function, SPF = Subpartition Function and + PPF = Partition Function part of subpartitioning) + */ + if ((found_part_field= set_PF_fields_in_key(key_info, + key_spec->length))) + { + if (check_part_func_bound(part_info->full_part_field_array)) + { + /* + We were able to bind all fields in the partition function even + by using only a part of the key. Calculate the partition to use. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + clear_indicator_in_key_fields(key_info); + /* + Check if range can be adjusted by looking in read_partitions + */ + prune_partition_set(table, part_spec); + DBUG_VOID_RETURN; + } + else if (part_info->is_sub_partitioned()) + { + if (check_part_func_bound(part_info->subpart_field_array)) + { + if (get_sub_part_id_from_key(table, buf, key_info, key_spec, &sub_part)) + { + part_spec->start_part= num_parts; + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + } + else if (check_part_func_bound(part_info->part_field_array)) + { + if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part)) + { + part_spec->start_part= num_parts; + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + } + } + } + } + } + { + /* + The next step is to analyse the table condition to see whether any + information about which partitions to scan can be derived from there. + Currently not implemented. + */ + } + /* + If we come here we have found a range of sorts we have either discovered + nothing or we have discovered a range of partitions with possible holes + in it. We need a bitvector to further the work here. + */ + if (!(part_part == num_parts && sub_part == num_parts)) + { + /* + We can only arrive here if we are using subpartitioning. + */ + if (part_part != num_parts) + { + /* + We know the top partition and need to scan all underlying + subpartitions. This is a range without holes. + */ + DBUG_ASSERT(sub_part == num_parts); + part_spec->start_part= part_part * part_info->num_subparts; + part_spec->end_part= part_spec->start_part+part_info->num_subparts - 1; + } + else + { + DBUG_ASSERT(sub_part != num_parts); + part_spec->start_part= sub_part; + part_spec->end_part=sub_part+ + (part_info->num_subparts*(part_info->num_parts-1)); + for (i= 0, part_id= sub_part; i < part_info->num_parts; + i++, part_id+= part_info->num_subparts) + ; //Set bit part_id in bit array + } + } + if (found_part_field) + clear_indicator_in_key_fields(key_info); + /* + Check if range can be adjusted by looking in read_partitions + */ + prune_partition_set(table, part_spec); + DBUG_VOID_RETURN; +} + +/* + If the table is partitioned we will read the partition info into the + .frm file here. + ------------------------------- + | Fileinfo 64 bytes | + ------------------------------- + | Formnames 7 bytes | + ------------------------------- + | Not used 4021 bytes | + ------------------------------- + | Keyinfo + record | + ------------------------------- + | Padded to next multiple | + | of IO_SIZE | + ------------------------------- + | Forminfo 288 bytes | + ------------------------------- + | Screen buffer, to make | + |field names readable | + ------------------------------- + | Packed field info | + |17 + 1 + strlen(field_name) | + | + 1 end of file character | + ------------------------------- + | Partition info | + ------------------------------- + We provide the length of partition length in Fileinfo[55-58]. + + Read the partition syntax from the frm file and parse it to get the + data structures of the partitioning. + + SYNOPSIS + mysql_unpack_partition() + thd Thread object + part_buf Partition info from frm file + part_info_len Length of partition syntax + table Table object of partitioned table + create_table_ind Is it called from CREATE TABLE + default_db_type What is the default engine of the table + work_part_info_used Flag is raised if we don't create new + part_info, but used thd->work_part_info + + RETURN VALUE + TRUE Error + FALSE Success + + DESCRIPTION + Read the partition syntax from the current position in the frm file. + Initiate a LEX object, save the list of item tree objects to free after + the query is done. Set-up partition info object such that parser knows + it is called from internally. Call parser to create data structures + (best possible recreation of item trees and so forth since there is no + serialisation of these objects other than in parseable text format). + We need to save the text of the partition functions since it is not + possible to retrace this given an item tree. +*/ + +bool mysql_unpack_partition(THD *thd, + char *part_buf, uint part_info_len, + TABLE* table, bool is_create_table_ind, + handlerton *default_db_type, + bool *work_part_info_used) +{ + bool result= TRUE; + partition_info *part_info; + CHARSET_INFO *old_character_set_client= thd->variables.character_set_client; + LEX *old_lex= thd->lex; + LEX lex; + PSI_statement_locker *parent_locker= thd->m_statement_psi; + DBUG_ENTER("mysql_unpack_partition"); + + thd->variables.character_set_client= system_charset_info; + + Parser_state parser_state; + if (unlikely(parser_state.init(thd, part_buf, part_info_len))) + goto end; + + if (unlikely(init_lex_with_single_table(thd, table, &lex))) + goto end; + + *work_part_info_used= FALSE; + + if (unlikely(!(lex.part_info= new partition_info()))) + goto end; + + lex.part_info->table= table; /* Indicates MYSQLparse from this place */ + part_info= lex.part_info; + DBUG_PRINT("info", ("Parse: %s", part_buf)); + + thd->m_statement_psi= NULL; + if (unlikely(parse_sql(thd, & parser_state, NULL)) || + unlikely(part_info->fix_parser_data(thd))) + { + thd->free_items(); + thd->m_statement_psi= parent_locker; + goto end; + } + thd->m_statement_psi= parent_locker; + /* + The parsed syntax residing in the frm file can still contain defaults. + The reason is that the frm file is sometimes saved outside of this + MySQL Server and used in backup and restore of clusters or partitioned + tables. It is not certain that the restore will restore exactly the + same default partitioning. + + The easiest manner of handling this is to simply continue using the + part_info we already built up during mysql_create_table if we are + in the process of creating a table. If the table already exists we + need to discover the number of partitions for the default parts. Since + the handler object hasn't been created here yet we need to postpone this + to the fix_partition_func method. + */ + + DBUG_PRINT("info", ("Successful parse")); + DBUG_PRINT("info", ("default engine = %s, default_db_type = %s", + ha_resolve_storage_engine_name(part_info->default_engine_type), + ha_resolve_storage_engine_name(default_db_type))); + if (is_create_table_ind && old_lex->sql_command == SQLCOM_CREATE_TABLE) + { + /* + When we come here we are doing a create table. In this case we + have already done some preparatory work on the old part_info + object. We don't really need this new partition_info object. + Thus we go back to the old partition info object. + We need to free any memory objects allocated on item_free_list + by the parser since we are keeping the old info from the first + parser call in CREATE TABLE. + + This table object can not be used any more. However, since + this is CREATE TABLE, we know that it will be destroyed by the + caller, and rely on that. + */ + thd->free_items(); + part_info= thd->work_part_info; + *work_part_info_used= true; + } + table->part_info= part_info; + part_info->table= table; + table->file->set_part_info(part_info); + if (!part_info->default_engine_type) + part_info->default_engine_type= default_db_type; + DBUG_ASSERT(part_info->default_engine_type == default_db_type); + DBUG_ASSERT(part_info->default_engine_type->db_type != DB_TYPE_UNKNOWN); + DBUG_ASSERT(part_info->default_engine_type != partition_hton); + result= FALSE; +end: + end_lex_with_single_table(thd, table, old_lex); + thd->variables.character_set_client= old_character_set_client; + DBUG_RETURN(result); +} + + +/* + Set engine type on all partition element objects + SYNOPSIS + set_engine_all_partitions() + part_info Partition info + engine_type Handlerton reference of engine + RETURN VALUES + NONE +*/ + +static +void +set_engine_all_partitions(partition_info *part_info, + handlerton *engine_type) +{ + uint i= 0; + List_iterator part_it(part_info->partitions); + do + { + partition_element *part_elem= part_it++; + + part_elem->engine_type= engine_type; + if (part_info->is_sub_partitioned()) + { + List_iterator sub_it(part_elem->subpartitions); + uint j= 0; + + do + { + partition_element *sub_elem= sub_it++; + + sub_elem->engine_type= engine_type; + } while (++j < part_info->num_subparts); + } + } while (++i < part_info->num_parts); +} + + +/** + Support routine to handle the successful cases for partition management. + + @param thd Thread object + @param copied Number of records copied + @param deleted Number of records deleted + @param table_list Table list with the one table in it + + @return Operation status + @retval FALSE Success + @retval TRUE Failure +*/ + +static int fast_end_partition(THD *thd, ulonglong copied, + ulonglong deleted, + TABLE_LIST *table_list) +{ + char tmp_name[80]; + DBUG_ENTER("fast_end_partition"); + + thd->proc_info="end"; + + query_cache_invalidate3(thd, table_list, 0); + + my_snprintf(tmp_name, sizeof(tmp_name), ER_THD(thd, ER_INSERT_INFO), + (ulong) (copied + deleted), + (ulong) deleted, + (ulong) 0); + my_ok(thd, (ha_rows) (copied+deleted),0L, tmp_name); + DBUG_RETURN(FALSE); +} + + +/* + We need to check if engine used by all partitions can handle + partitioning natively. + + SYNOPSIS + check_native_partitioned() + create_info Create info in CREATE TABLE + out:ret_val Return value + part_info Partition info + thd Thread object + + RETURN VALUES + Value returned in bool ret_value + TRUE Native partitioning supported by engine + FALSE Need to use partition handler + + Return value from function + TRUE Error + FALSE Success +*/ + +static bool check_native_partitioned(HA_CREATE_INFO *create_info,bool *ret_val, + partition_info *part_info, THD *thd) +{ + bool table_engine_set; + handlerton *engine_type= part_info->default_engine_type; + handlerton *old_engine_type= engine_type; + DBUG_ENTER("check_native_partitioned"); + + if (create_info->used_fields & HA_CREATE_USED_ENGINE) + { + table_engine_set= TRUE; + engine_type= create_info->db_type; + } + else + { + table_engine_set= FALSE; + if (thd->lex->sql_command != SQLCOM_CREATE_TABLE) + { + table_engine_set= TRUE; + DBUG_ASSERT(engine_type && engine_type != partition_hton); + } + } + DBUG_PRINT("info", ("engine_type = %s, table_engine_set = %u", + ha_resolve_storage_engine_name(engine_type), + table_engine_set)); + if (part_info->check_engine_mix(engine_type, table_engine_set)) + goto error; + + /* + All engines are of the same type. Check if this engine supports + native partitioning. + */ + + if (!engine_type) + engine_type= old_engine_type; + DBUG_PRINT("info", ("engine_type = %s", + ha_resolve_storage_engine_name(engine_type))); + if (engine_type->partition_flags && + (engine_type->partition_flags() & HA_CAN_PARTITION)) + { + create_info->db_type= engine_type; + DBUG_PRINT("info", ("Changed to native partitioning")); + *ret_val= TRUE; + } + DBUG_RETURN(FALSE); +error: + /* + Mixed engines not yet supported but when supported it will need + the partition handler + */ + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + *ret_val= FALSE; + DBUG_RETURN(TRUE); +} + + +/** + Sets which partitions to be used in the command. + + @param alter_info Alter_info pointer holding partition names and flags. + @param tab_part_info partition_info holding all partitions. + @param part_state Which state to set for the named partitions. + + @return Operation status + @retval false Success + @retval true Failure +*/ + +bool set_part_state(Alter_info *alter_info, partition_info *tab_part_info, + enum partition_state part_state) +{ + uint part_count= 0; + uint num_parts_found= 0; + List_iterator part_it(tab_part_info->partitions); + + do + { + partition_element *part_elem= part_it++; + if ((alter_info->partition_flags & ALTER_PARTITION_ALL) || + (is_name_in_list(part_elem->partition_name, + alter_info->partition_names))) + { + /* + Mark the partition. + I.e mark the partition as a partition to be "changed" by + analyzing/optimizing/rebuilding/checking/repairing/... + */ + num_parts_found++; + part_elem->part_state= part_state; + DBUG_PRINT("info", ("Setting part_state to %u for partition %s", + part_state, part_elem->partition_name)); + } + else + part_elem->part_state= PART_NORMAL; + } while (++part_count < tab_part_info->num_parts); + + if (num_parts_found != alter_info->partition_names.elements && + !(alter_info->partition_flags & ALTER_PARTITION_ALL)) + { + /* Not all given partitions found, revert and return failure */ + part_it.rewind(); + part_count= 0; + do + { + partition_element *part_elem= part_it++; + part_elem->part_state= PART_NORMAL; + } while (++part_count < tab_part_info->num_parts); + return true; + } + return false; +} + + +/** + @brief Check if partition is exchangable with table by checking table options + + @param table_create_info Table options from table. + @param part_elem All the info of the partition. + + @retval FALSE if they are equal, otherwise TRUE. + + @note Any differens that would cause a change in the frm file is prohibited. + Such options as data_file_name, index_file_name, min_rows, max_rows etc. are + not allowed to differ. But comment is allowed to differ. +*/ +bool compare_partition_options(HA_CREATE_INFO *table_create_info, + partition_element *part_elem) +{ +#define MAX_COMPARE_PARTITION_OPTION_ERRORS 5 + const char *option_diffs[MAX_COMPARE_PARTITION_OPTION_ERRORS + 1]; + int i, errors= 0; + DBUG_ENTER("compare_partition_options"); + + /* + Note that there are not yet any engine supporting tablespace together + with partitioning. TODO: when there are, add compare. + */ + if (part_elem->part_max_rows != table_create_info->max_rows) + option_diffs[errors++]= "MAX_ROWS"; + if (part_elem->part_min_rows != table_create_info->min_rows) + option_diffs[errors++]= "MIN_ROWS"; + + for (i= 0; i < errors; i++) + my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), + option_diffs[i]); + DBUG_RETURN(errors != 0); +} + + +/** + Check if the ALTER command tries to change DATA DIRECTORY + or INDEX DIRECTORY for its partitions and warn if so. + @param thd THD + @param part_elem partition_element to check + */ +static void warn_if_datadir_altered(THD *thd, + const partition_element *part_elem) +{ + DBUG_ASSERT(part_elem); + + if (part_elem->engine_type && + part_elem->engine_type->db_type != DB_TYPE_INNODB) + return; + + if (part_elem->data_file_name) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_INNODB_PARTITION_OPTION_IGNORED, + ER(WARN_INNODB_PARTITION_OPTION_IGNORED), + "DATA DIRECTORY"); + } + if (part_elem->index_file_name) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_INNODB_PARTITION_OPTION_IGNORED, + ER(WARN_INNODB_PARTITION_OPTION_IGNORED), + "INDEX DIRECTORY"); + } +} + + +/** + Currently changing DATA DIRECTORY and INDEX DIRECTORY for InnoDB partitions is + not possible. This function checks it and warns on that case. + @param thd THD + @param tab_part_info old partition info + @param alt_part_info new partition info + */ +static void check_datadir_altered_for_innodb(THD *thd, + partition_info *tab_part_info, + partition_info *alt_part_info) +{ + if (tab_part_info->default_engine_type->db_type != DB_TYPE_INNODB) + return; + + for (List_iterator_fast it(alt_part_info->partitions); + partition_element *part_elem= it++;) + { + if (alt_part_info->is_sub_partitioned()) + { + for (List_iterator_fast it2(part_elem->subpartitions); + const partition_element *sub_part_elem= it2++;) + { + warn_if_datadir_altered(thd, sub_part_elem); + } + } + else + warn_if_datadir_altered(thd, part_elem); + } +} + + +/* + Prepare for ALTER TABLE of partition structure + + @param[in] thd Thread object + @param[in] table Table object + @param[in,out] alter_info Alter information + @param[in,out] create_info Create info for CREATE TABLE + @param[in] alter_ctx ALTER TABLE runtime context + @param[out] partition_changed Boolean indicating whether partition changed + @param[out] fast_alter_table Boolean indicating if fast partition alter is + possible. + @param[out] thd->work_part_info Prepared part_info for the new table + + @return Operation status + @retval TRUE Error + @retval FALSE Success + + @note + This method handles all preparations for ALTER TABLE for partitioned + tables. + We need to handle both partition management command such as Add Partition + and others here as well as an ALTER TABLE that completely changes the + partitioning and yet others that don't change anything at all. We start + by checking the partition management variants and then check the general + change patterns. +*/ + +uint prep_alter_part_table(THD *thd, TABLE *table, Alter_info *alter_info, + HA_CREATE_INFO *create_info, + bool *partition_changed, + bool *fast_alter_table) +{ + DBUG_ENTER("prep_alter_part_table"); + + /* Foreign keys on partitioned tables are not supported, waits for WL#148 */ + if (table->part_info && (alter_info->flags & (ALTER_ADD_FOREIGN_KEY | + ALTER_DROP_FOREIGN_KEY))) + { + my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), "FOREIGN KEY"); + DBUG_RETURN(TRUE); + } + /* Remove partitioning on a not partitioned table is not possible */ + if (!table->part_info && (alter_info->partition_flags & + ALTER_PARTITION_REMOVE)) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + + partition_info *alt_part_info= thd->lex->part_info; + /* + This variable is TRUE in very special case when we add only DEFAULT + partition to the existing table + */ + bool only_default_value_added= + (alt_part_info && + alt_part_info->current_partition && + alt_part_info->current_partition->list_val_list.elements == 1 && + alt_part_info->current_partition->list_val_list.head()-> + added_items >= 1 && + alt_part_info->current_partition->list_val_list.head()-> + col_val_array[0].max_value) && + alt_part_info->part_type == LIST_PARTITION && + (alter_info->partition_flags & ALTER_PARTITION_ADD); + if (only_default_value_added && + !thd->lex->part_info->num_columns) + thd->lex->part_info->num_columns= 1; // to make correct clone + + /* + One of these is done in handle_if_exists_option(): + thd->work_part_info= thd->lex->part_info; + or + thd->work_part_info= NULL; + */ + if (thd->work_part_info && + !(thd->work_part_info= thd->work_part_info->get_clone(thd))) + DBUG_RETURN(TRUE); + + /* ALTER_PARTITION_ADMIN is handled in mysql_admin_table */ + DBUG_ASSERT(!(alter_info->partition_flags & ALTER_PARTITION_ADMIN)); + + partition_info *saved_part_info= NULL; + + if (alter_info->partition_flags & + (ALTER_PARTITION_ADD | + ALTER_PARTITION_DROP | + ALTER_PARTITION_CONVERT_OUT | + ALTER_PARTITION_COALESCE | + ALTER_PARTITION_REORGANIZE | + ALTER_PARTITION_TABLE_REORG | + ALTER_PARTITION_REBUILD | + ALTER_PARTITION_CONVERT_IN)) + { + /* + You can't add column when we are doing alter related to partition + */ + DBUG_EXECUTE_IF("test_pseudo_invisible", { + my_error(ER_INTERNAL_ERROR, MYF(0), "Don't to it with test_pseudo_invisible"); + DBUG_RETURN(1); + }); + DBUG_EXECUTE_IF("test_completely_invisible", { + my_error(ER_INTERNAL_ERROR, MYF(0), "Don't to it with test_completely_invisible"); + DBUG_RETURN(1); + }); + partition_info *tab_part_info; + ulonglong flags= 0; + bool is_last_partition_reorged= FALSE; + part_elem_value *tab_max_elem_val= NULL; + part_elem_value *alt_max_elem_val= NULL; + longlong tab_max_range= 0, alt_max_range= 0; + alt_part_info= thd->work_part_info; + + if (!table->part_info) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* + Open our intermediate table, we will operate on a temporary instance + of the original table, to be able to skip copying all partitions. + Open it as a copy of the original table, and modify its partition_info + object to allow fast_alter_partition_table to perform the changes. + */ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, + table->s->db.str, + table->s->table_name.str, + MDL_INTENTION_EXCLUSIVE)); + + tab_part_info= table->part_info; + + if (alter_info->partition_flags & ALTER_PARTITION_TABLE_REORG) + { + uint new_part_no, curr_part_no; + /* + 'ALTER TABLE t REORG PARTITION' only allowed with auto partition + if default partitioning is used. + */ + + if (tab_part_info->part_type != HASH_PARTITION || + ((table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) && + !tab_part_info->use_default_num_partitions) || + ((!(table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION)) && + tab_part_info->use_default_num_partitions)) + { + my_error(ER_REORG_NO_PARAM_ERROR, MYF(0)); + goto err; + } + new_part_no= table->file->get_default_no_partitions(create_info); + curr_part_no= tab_part_info->num_parts; + if (new_part_no == curr_part_no) + { + /* + No change is needed, we will have the same number of partitions + after the change as before. Thus we can reply ok immediately + without any changes at all. + */ + flags= table->file->alter_table_flags(alter_info->flags); + if (flags & (HA_FAST_CHANGE_PARTITION | HA_PARTITION_ONE_PHASE)) + { + *fast_alter_table= true; + /* Force table re-open for consistency with the main case. */ + table->mark_table_for_reopen(); + } + else + { + /* + Create copy of partition_info to avoid modifying original + TABLE::part_info, to keep it safe for later use. + */ + if (!(tab_part_info= tab_part_info->get_clone(thd))) + DBUG_RETURN(TRUE); + } + + thd->work_part_info= tab_part_info; + DBUG_RETURN(FALSE); + } + else if (new_part_no > curr_part_no) + { + /* + We will add more partitions, we use the ADD PARTITION without + setting the flag for no default number of partitions + */ + alter_info->partition_flags|= ALTER_PARTITION_ADD; + thd->work_part_info->num_parts= new_part_no - curr_part_no; + } + else + { + /* + We will remove hash partitions, we use the COALESCE PARTITION + without setting the flag for no default number of partitions + */ + alter_info->partition_flags|= ALTER_PARTITION_COALESCE; + alter_info->num_parts= curr_part_no - new_part_no; + } + } + if (!(flags= table->file->alter_table_flags(alter_info->flags))) + { + my_error(ER_PARTITION_FUNCTION_FAILURE, MYF(0)); + goto err; + } + if ((flags & (HA_FAST_CHANGE_PARTITION | HA_PARTITION_ONE_PHASE)) != 0) + { + /* + "Fast" change of partitioning is supported in this case. + We will change TABLE::part_info (as this is how we pass + information to storage engine in this case), so the table + must be reopened. + */ + *fast_alter_table= true; + table->mark_table_for_reopen(); + } + else + { + /* + "Fast" changing of partitioning is not supported. Create + a copy of TABLE::part_info object, so we can modify it safely. + Modifying original TABLE::part_info will cause problems when + we read data from old version of table using this TABLE object + while copying them to new version of table. + */ + if (!(tab_part_info= tab_part_info->get_clone(thd))) + DBUG_RETURN(TRUE); + } + DBUG_PRINT("info", ("*fast_alter_table flags: 0x%llx", flags)); + if ((alter_info->partition_flags & ALTER_PARTITION_ADD) || + (alter_info->partition_flags & ALTER_PARTITION_REORGANIZE)) + { + if ((alter_info->partition_flags & ALTER_PARTITION_CONVERT_IN) && + !(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "CONVERT TABLE TO"); + goto err; + } + if (thd->work_part_info->part_type != tab_part_info->part_type) + { + if (thd->work_part_info->part_type == NOT_A_PARTITION) + { + if (tab_part_info->part_type == RANGE_PARTITION) + { + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), "RANGE"); + goto err; + } + else if (tab_part_info->part_type == LIST_PARTITION) + { + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), "LIST"); + goto err; + } + /* + Hash partitions can be altered without parser finds out about + that it is HASH partitioned. So no error here. + */ + } + else + { + if (thd->work_part_info->part_type == RANGE_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + } + else if (thd->work_part_info->part_type == LIST_PARTITION) + { + DBUG_ASSERT(thd->work_part_info->part_type == LIST_PARTITION); + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "LIST", "IN"); + } + /* + Adding history partitions to non-history partitioning or + non-history parittions to history partitioning is prohibited. + */ + else if (thd->work_part_info->part_type == VERSIONING_PARTITION || + tab_part_info->part_type == VERSIONING_PARTITION) + { + part_type_error(thd, thd->work_part_info, NULL, tab_part_info); + } + else + { + DBUG_ASSERT(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION); + (void) tab_part_info->error_if_requires_values(); + } + goto err; + } + } + if ((tab_part_info->column_list && + alt_part_info->num_columns != tab_part_info->num_columns && + !only_default_value_added) || + (!tab_part_info->column_list && + (tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION) && + alt_part_info->num_columns != 1U && + !only_default_value_added) || + (!tab_part_info->column_list && + tab_part_info->part_type == HASH_PARTITION && + (alt_part_info->num_columns != 0))) + { + my_error(ER_PARTITION_COLUMN_LIST_ERROR, MYF(0)); + goto err; + } + alt_part_info->column_list= tab_part_info->column_list; + if (alt_part_info->fix_parser_data(thd)) + { + goto err; + } + } + if (alter_info->partition_flags & ALTER_PARTITION_ADD) + { + if (*fast_alter_table && thd->locked_tables_mode) + { + MEM_ROOT *old_root= thd->mem_root; + thd->mem_root= &thd->locked_tables_list.m_locked_tables_root; + saved_part_info= tab_part_info->get_clone(thd); + thd->mem_root= old_root; + saved_part_info->read_partitions= tab_part_info->read_partitions; + saved_part_info->lock_partitions= tab_part_info->lock_partitions; + saved_part_info->bitmaps_are_initialized= tab_part_info->bitmaps_are_initialized; + } + /* + We start by moving the new partitions to the list of temporary + partitions. We will then check that the new partitions fit in the + partitioning scheme as currently set-up. + Partitions are always added at the end in ADD PARTITION. + */ + uint num_new_partitions= alt_part_info->num_parts; + uint num_orig_partitions= tab_part_info->num_parts; + uint check_total_partitions= num_new_partitions + num_orig_partitions; + uint new_total_partitions= check_total_partitions; + /* + We allow quite a lot of values to be supplied by defaults, however we + must know the number of new partitions in this case. + */ + if (thd->lex->no_write_to_binlog && + tab_part_info->part_type != HASH_PARTITION && + tab_part_info->part_type != VERSIONING_PARTITION) + { + my_error(ER_NO_BINLOG_ERROR, MYF(0)); + goto err; + } + if (tab_part_info->defined_max_value && + (tab_part_info->part_type == RANGE_PARTITION || + alt_part_info->defined_max_value)) + { + my_error((tab_part_info->part_type == RANGE_PARTITION ? + ER_PARTITION_MAXVALUE_ERROR : + ER_PARTITION_DEFAULT_ERROR), MYF(0)); + goto err; + } + if (num_new_partitions == 0) + { + my_error(ER_ADD_PARTITION_NO_NEW_PARTITION, MYF(0)); + goto err; + } + if (tab_part_info->is_sub_partitioned()) + { + if (alt_part_info->num_subparts == 0) + alt_part_info->num_subparts= tab_part_info->num_subparts; + else if (alt_part_info->num_subparts != tab_part_info->num_subparts) + { + my_error(ER_ADD_PARTITION_SUBPART_ERROR, MYF(0)); + goto err; + } + check_total_partitions= new_total_partitions* + alt_part_info->num_subparts; + } + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto err; + } + alt_part_info->part_type= tab_part_info->part_type; + alt_part_info->subpart_type= tab_part_info->subpart_type; + if (alt_part_info->set_up_defaults_for_partitioning(thd, table->file, 0, + tab_part_info->next_part_no(num_new_partitions))) + { + goto err; + } +/* +Handling of on-line cases: + +ADD PARTITION for RANGE/LIST PARTITIONING: +------------------------------------------ +For range and list partitions add partition is simply adding a +new empty partition to the table. If the handler support this we +will use the simple method of doing this. The figure below shows +an example of this and the states involved in making this change. + +Existing partitions New added partitions +------ ------ ------ ------ | ------ ------ +| | | | | | | | | | | | | +| p0 | | p1 | | p2 | | p3 | | | p4 | | p5 | +------ ------ ------ ------ | ------ ------ +PART_NORMAL PART_NORMAL PART_NORMAL PART_NORMAL PART_TO_BE_ADDED*2 +PART_NORMAL PART_NORMAL PART_NORMAL PART_NORMAL PART_IS_ADDED*2 + +The first line is the states before adding the new partitions and the +second line is after the new partitions are added. All the partitions are +in the partitions list, no partitions are placed in the temp_partitions +list. + +ADD PARTITION for HASH PARTITIONING +----------------------------------- +This little figure tries to show the various partitions involved when +adding two new partitions to a linear hash based partitioned table with +four partitions to start with, which lists are used and the states they +pass through. Adding partitions to a normal hash based is similar except +that it is always all the existing partitions that are reorganised not +only a subset of them. + +Existing partitions New added partitions +------ ------ ------ ------ | ------ ------ +| | | | | | | | | | | | | +| p0 | | p1 | | p2 | | p3 | | | p4 | | p5 | +------ ------ ------ ------ | ------ ------ +PART_CHANGED PART_CHANGED PART_NORMAL PART_NORMAL PART_TO_BE_ADDED +PART_IS_CHANGED*2 PART_NORMAL PART_NORMAL PART_IS_ADDED +PART_NORMAL PART_NORMAL PART_NORMAL PART_NORMAL PART_IS_ADDED + +Reorganised existing partitions +------ ------ +| | | | +| p0'| | p1'| +------ ------ + +p0 - p5 will be in the partitions list of partitions. +p0' and p1' will actually not exist as separate objects, there presence can +be deduced from the state of the partition and also the names of those +partitions can be deduced this way. + +After adding the partitions and copying the partition data to p0', p1', +p4 and p5 from p0 and p1 the states change to adapt for the new situation +where p0 and p1 is dropped and replaced by p0' and p1' and the new p4 and +p5 are in the table again. + +The first line above shows the states of the partitions before we start +adding and copying partitions, the second after completing the adding +and copying and finally the third line after also dropping the partitions +that are reorganised. +*/ + if (*fast_alter_table && tab_part_info->part_type == HASH_PARTITION) + { + uint part_no= 0, start_part= 1, start_sec_part= 1; + uint end_part= 0, end_sec_part= 0; + uint upper_2n= tab_part_info->linear_hash_mask + 1; + uint lower_2n= upper_2n >> 1; + bool all_parts= TRUE; + if (tab_part_info->linear_hash_ind && num_new_partitions < upper_2n) + { + /* + An analysis of which parts needs reorganisation shows that it is + divided into two intervals. The first interval is those parts + that are reorganised up until upper_2n - 1. From upper_2n and + onwards it starts again from partition 0 and goes on until + it reaches p(upper_2n - 1). If the last new partition reaches + beyond upper_2n - 1 then the first interval will end with + p(lower_2n - 1) and start with p(num_orig_partitions - lower_2n). + If lower_2n partitions are added then p0 to p(lower_2n - 1) will + be reorganised which means that the two interval becomes one + interval at this point. Thus only when adding less than + lower_2n partitions and going beyond a total of upper_2n we + actually get two intervals. + + To exemplify this assume we have 6 partitions to start with and + add 1, 2, 3, 5, 6, 7, 8, 9 partitions. + The first to add after p5 is p6 = 110 in bit numbers. Thus we + can see that 10 = p2 will be partition to reorganise if only one + partition. + If 2 partitions are added we reorganise [p2, p3]. Those two + cases are covered by the second if part below. + If 3 partitions are added we reorganise [p2, p3] U [p0,p0]. This + part is covered by the else part below. + If 5 partitions are added we get [p2,p3] U [p0, p2] = [p0, p3]. + This is covered by the first if part where we need the max check + to here use lower_2n - 1. + If 7 partitions are added we get [p2,p3] U [p0, p4] = [p0, p4]. + This is covered by the first if part but here we use the first + calculated end_part. + Finally with 9 new partitions we would also reorganise p6 if we + used the method below but we cannot reorganise more partitions + than what we had from the start and thus we simply set all_parts + to TRUE. In this case we don't get into this if-part at all. + */ + all_parts= FALSE; + if (num_new_partitions >= lower_2n) + { + /* + In this case there is only one interval since the two intervals + overlap and this starts from zero to last_part_no - upper_2n + */ + start_part= 0; + end_part= new_total_partitions - (upper_2n + 1); + end_part= max(lower_2n - 1, end_part); + } + else if (new_total_partitions <= upper_2n) + { + /* + Also in this case there is only one interval since we are not + going over a 2**n boundary + */ + start_part= num_orig_partitions - lower_2n; + end_part= start_part + (num_new_partitions - 1); + } + else + { + /* We have two non-overlapping intervals since we are not + passing a 2**n border and we have not at least lower_2n + new parts that would ensure that the intervals become + overlapping. + */ + start_part= num_orig_partitions - lower_2n; + end_part= upper_2n - 1; + start_sec_part= 0; + end_sec_part= new_total_partitions - (upper_2n + 1); + } + } + List_iterator tab_it(tab_part_info->partitions); + part_no= 0; + do + { + partition_element *p_elem= tab_it++; + if (all_parts || + (part_no >= start_part && part_no <= end_part) || + (part_no >= start_sec_part && part_no <= end_sec_part)) + { + p_elem->part_state= PART_CHANGED; + } + } while (++part_no < num_orig_partitions); + } + /* + Need to concatenate the lists here to make it possible to check the + partition info for correctness using check_partition_info. + For on-line add partition we set the state of this partition to + PART_TO_BE_ADDED to ensure that it is known that it is not yet + usable (becomes usable when partition is created and the switch of + partition configuration is made. + */ + { + partition_element *now_part= NULL; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + List_iterator it(tab_part_info->partitions); + partition_element *el; + while ((el= it++)) + { + if (el->type == partition_element::CURRENT) + { + /* now_part is always last partition, we add it to the end of partitions list. */ + it.remove(); + now_part= el; + } + } + if (*fast_alter_table && + !(alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST) && + tab_part_info->vers_info->interval.is_set()) + { + partition_element *hist_part= tab_part_info->vers_info->hist_part; + if (hist_part->range_value <= thd->query_start()) + hist_part->part_state= PART_CHANGED; + } + } + List_iterator alt_it(alt_part_info->partitions); + uint part_count= 0; + do + { + partition_element *part_elem= alt_it++; + if (*fast_alter_table) + part_elem->part_state= PART_TO_BE_ADDED; + if (unlikely(tab_part_info->partitions.push_back(part_elem, + thd->mem_root))) + goto err; + } while (++part_count < num_new_partitions); + tab_part_info->num_parts+= num_new_partitions; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + DBUG_ASSERT(now_part); + if (unlikely(tab_part_info->partitions.push_back(now_part, + thd->mem_root))) + goto err; + } + } + /* + If we specify partitions explicitly we don't use defaults anymore. + Using ADD PARTITION also means that we don't have the default number + of partitions anymore. We use this code also for Table reorganisations + and here we don't set any default flags to FALSE. + */ + if (!(alter_info->partition_flags & ALTER_PARTITION_TABLE_REORG)) + { + if (!alt_part_info->use_default_partitions) + { + DBUG_PRINT("info", ("part_info: %p", tab_part_info)); + tab_part_info->use_default_partitions= FALSE; + } + tab_part_info->use_default_num_partitions= FALSE; + tab_part_info->is_auto_partitioned= FALSE; + } + } + else if ((alter_info->partition_flags & ALTER_PARTITION_DROP) | + (alter_info->partition_flags & ALTER_PARTITION_CONVERT_OUT)) + { + const char * const cmd= + (alter_info->partition_flags & ALTER_PARTITION_CONVERT_OUT) ? + "CONVERT" : "DROP"; + /* + Drop a partition from a range partition and list partitioning is + always safe and can be made more or less immediate. It is necessary + however to ensure that the partition to be removed is safely removed + and that REPAIR TABLE can remove the partition if for some reason the + command to drop the partition failed in the middle. + */ + uint part_count= 0; + uint num_parts_dropped= alter_info->partition_names.elements; + uint num_parts_found= 0; + List_iterator part_it(tab_part_info->partitions); + + tab_part_info->is_auto_partitioned= FALSE; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + if (num_parts_dropped >= tab_part_info->num_parts - 1) + { + DBUG_ASSERT(table && table->s && table->s->table_name.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), table->s->table_name.str); + goto err; + } + tab_part_info->use_default_partitions= false; + } + else + { + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), cmd); + goto err; + } + if (num_parts_dropped >= tab_part_info->num_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + goto err; + } + } + do + { + partition_element *part_elem= part_it++; + if (is_name_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + if (part_elem->type == partition_element::CURRENT) + { + my_error(ER_VERS_WRONG_PARTS, MYF(0), table->s->table_name.str); + goto err; + } + if (tab_part_info->vers_info->interval.is_set()) + { + if (num_parts_found < part_count) + { + my_error(ER_VERS_DROP_PARTITION_INTERVAL, MYF(0)); + goto err; + } + tab_part_info->vers_info->interval.start= + (my_time_t)part_elem->range_value; + } + } + /* + Set state to indicate that the partition is to be dropped. + */ + num_parts_found++; + part_elem->part_state= PART_TO_BE_DROPPED; + } + } while (++part_count < tab_part_info->num_parts); + if (num_parts_found != num_parts_dropped) + { + my_error(ER_PARTITION_DOES_NOT_EXIST, MYF(0)); + goto err; + } + if (table->file->is_fk_defined_on_table_or_index(MAX_KEY)) + { + my_error(ER_ROW_IS_REFERENCED, MYF(0)); + goto err; + } + DBUG_ASSERT(!(alter_info->partition_flags & ALTER_PARTITION_CONVERT_OUT) || + num_parts_dropped == 1); + /* NOTE: num_parts is used in generate_partition_syntax() */ + tab_part_info->num_parts-= num_parts_dropped; + if ((alter_info->partition_flags & ALTER_PARTITION_CONVERT_OUT) && + tab_part_info->is_sub_partitioned()) + { + // TODO technically this can be converted to a *partitioned* table + my_error(ER_PARTITION_CONVERT_SUBPARTITIONED, MYF(0)); + goto err; + } + } + else if (alter_info->partition_flags & ALTER_PARTITION_REBUILD) + { + set_engine_all_partitions(tab_part_info, + tab_part_info->default_engine_type); + if (set_part_state(alter_info, tab_part_info, PART_CHANGED)) + { + my_error(ER_PARTITION_DOES_NOT_EXIST, MYF(0)); + goto err; + } + if (!(*fast_alter_table)) + { + table->file->print_error(HA_ERR_WRONG_COMMAND, MYF(0)); + goto err; + } + } + else if (alter_info->partition_flags & ALTER_PARTITION_COALESCE) + { + uint num_parts_coalesced= alter_info->num_parts; + uint num_parts_remain= tab_part_info->num_parts - num_parts_coalesced; + List_iterator part_it(tab_part_info->partitions); + if (tab_part_info->part_type != HASH_PARTITION) + { + my_error(ER_COALESCE_ONLY_ON_HASH_PARTITION, MYF(0)); + goto err; + } + if (num_parts_coalesced == 0) + { + my_error(ER_COALESCE_PARTITION_NO_PARTITION, MYF(0)); + goto err; + } + if (num_parts_coalesced >= tab_part_info->num_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + goto err; + } +/* +Online handling: +COALESCE PARTITION: +------------------- +The figure below shows the manner in which partitions are handled when +performing an on-line coalesce partition and which states they go through +at start, after adding and copying partitions and finally after dropping +the partitions to drop. The figure shows an example using four partitions +to start with, using linear hash and coalescing one partition (always the +last partition). + +Using linear hash then all remaining partitions will have a new reorganised +part. + +Existing partitions Coalesced partition +------ ------ ------ | ------ +| | | | | | | | | +| p0 | | p1 | | p2 | | | p3 | +------ ------ ------ | ------ +PART_NORMAL PART_CHANGED PART_NORMAL PART_REORGED_DROPPED +PART_NORMAL PART_IS_CHANGED PART_NORMAL PART_TO_BE_DROPPED +PART_NORMAL PART_NORMAL PART_NORMAL PART_IS_DROPPED + +Reorganised existing partitions + ------ + | | + | p1'| + ------ + +p0 - p3 is in the partitions list. +The p1' partition will actually not be in any list it is deduced from the +state of p1. +*/ + { + uint part_count= 0, start_part= 1, start_sec_part= 1; + uint end_part= 0, end_sec_part= 0; + bool all_parts= TRUE; + if (*fast_alter_table && + tab_part_info->linear_hash_ind) + { + uint upper_2n= tab_part_info->linear_hash_mask + 1; + uint lower_2n= upper_2n >> 1; + all_parts= FALSE; + if (num_parts_coalesced >= lower_2n) + { + all_parts= TRUE; + } + else if (num_parts_remain >= lower_2n) + { + end_part= tab_part_info->num_parts - (lower_2n + 1); + start_part= num_parts_remain - lower_2n; + } + else + { + start_part= 0; + end_part= tab_part_info->num_parts - (lower_2n + 1); + end_sec_part= (lower_2n >> 1) - 1; + start_sec_part= end_sec_part - (lower_2n - (num_parts_remain + 1)); + } + } + do + { + partition_element *p_elem= part_it++; + if (*fast_alter_table && + (all_parts || + (part_count >= start_part && part_count <= end_part) || + (part_count >= start_sec_part && part_count <= end_sec_part))) + p_elem->part_state= PART_CHANGED; + if (++part_count > num_parts_remain) + { + if (*fast_alter_table) + p_elem->part_state= PART_REORGED_DROPPED; + else + part_it.remove(); + } + } while (part_count < tab_part_info->num_parts); + tab_part_info->num_parts= num_parts_remain; + } + if (!(alter_info->partition_flags & ALTER_PARTITION_TABLE_REORG)) + { + tab_part_info->use_default_num_partitions= FALSE; + tab_part_info->is_auto_partitioned= FALSE; + } + } + else if (alter_info->partition_flags & ALTER_PARTITION_REORGANIZE) + { + /* + Reorganise partitions takes a number of partitions that are next + to each other (at least for RANGE PARTITIONS) and then uses those + to create a set of new partitions. So data is copied from those + partitions into the new set of partitions. Those new partitions + can have more values in the LIST value specifications or less both + are allowed. The ranges can be different but since they are + changing a set of consecutive partitions they must cover the same + range as those changed from. + This command can be used on RANGE and LIST partitions. + */ + uint num_parts_reorged= alter_info->partition_names.elements; + uint num_parts_new= thd->work_part_info->partitions.elements; + uint check_total_partitions; + + tab_part_info->is_auto_partitioned= FALSE; + if (num_parts_reorged > tab_part_info->num_parts) + { + my_error(ER_REORG_PARTITION_NOT_EXIST, MYF(0)); + goto err; + } + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION) && + (num_parts_new != num_parts_reorged)) + { + my_error(ER_REORG_HASH_ONLY_ON_SAME_NO, MYF(0)); + goto err; + } + if (tab_part_info->is_sub_partitioned() && + alt_part_info->num_subparts && + alt_part_info->num_subparts != tab_part_info->num_subparts) + { + my_error(ER_PARTITION_WRONG_NO_SUBPART_ERROR, MYF(0)); + goto err; + } + check_total_partitions= tab_part_info->num_parts + num_parts_new; + check_total_partitions-= num_parts_reorged; + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto err; + } + alt_part_info->part_type= tab_part_info->part_type; + alt_part_info->subpart_type= tab_part_info->subpart_type; + alt_part_info->num_subparts= tab_part_info->num_subparts; + DBUG_ASSERT(!alt_part_info->use_default_partitions); + /* We specified partitions explicitly so don't use defaults anymore. */ + tab_part_info->use_default_partitions= FALSE; + if (alt_part_info->set_up_defaults_for_partitioning(thd, table->file, 0, + 0)) + { + goto err; + } + check_datadir_altered_for_innodb(thd, tab_part_info, alt_part_info); + +/* +Online handling: +REORGANIZE PARTITION: +--------------------- +The figure exemplifies the handling of partitions, their state changes and +how they are organised. It exemplifies four partitions where two of the +partitions are reorganised (p1 and p2) into two new partitions (p4 and p5). +The reason of this change could be to change range limits, change list +values or for hash partitions simply reorganise the partition which could +also involve moving them to new disks or new node groups (MySQL Cluster). + +Existing partitions +------ ------ ------ ------ +| | | | | | | | +| p0 | | p1 | | p2 | | p3 | +------ ------ ------ ------ +PART_NORMAL PART_TO_BE_REORGED PART_NORMAL +PART_NORMAL PART_TO_BE_DROPPED PART_NORMAL +PART_NORMAL PART_IS_DROPPED PART_NORMAL + +Reorganised new partitions (replacing p1 and p2) +------ ------ +| | | | +| p4 | | p5 | +------ ------ +PART_TO_BE_ADDED +PART_IS_ADDED +PART_IS_ADDED + +All unchanged partitions and the new partitions are in the partitions list +in the order they will have when the change is completed. The reorganised +partitions are placed in the temp_partitions list. PART_IS_ADDED is only a +temporary state not written in the frm file. It is used to ensure we write +the generated partition syntax in a correct manner. +*/ + { + List_iterator tab_it(tab_part_info->partitions); + uint part_count= 0; + bool found_first= FALSE; + bool found_last= FALSE; + uint drop_count= 0; + do + { + partition_element *part_elem= tab_it++; + is_last_partition_reorged= FALSE; + if (is_name_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + is_last_partition_reorged= TRUE; + drop_count++; + if (tab_part_info->column_list) + { + List_iterator p(part_elem->list_val_list); + tab_max_elem_val= p++; + } + else + tab_max_range= part_elem->range_value; + if (*fast_alter_table && + unlikely(tab_part_info->temp_partitions. + push_back(part_elem, thd->mem_root))) + goto err; + + if (*fast_alter_table) + part_elem->part_state= PART_TO_BE_REORGED; + if (!found_first) + { + uint alt_part_count= 0; + partition_element *alt_part_elem; + List_iterator + alt_it(alt_part_info->partitions); + found_first= TRUE; + do + { + alt_part_elem= alt_it++; + if (tab_part_info->column_list) + { + List_iterator p(alt_part_elem->list_val_list); + alt_max_elem_val= p++; + } + else + alt_max_range= alt_part_elem->range_value; + + if (*fast_alter_table) + alt_part_elem->part_state= PART_TO_BE_ADDED; + if (alt_part_count == 0) + tab_it.replace(alt_part_elem); + else + tab_it.after(alt_part_elem); + } while (++alt_part_count < num_parts_new); + } + else if (found_last) + { + my_error(ER_CONSECUTIVE_REORG_PARTITIONS, MYF(0)); + goto err; + } + else + tab_it.remove(); + } + else + { + if (found_first) + found_last= TRUE; + } + } while (++part_count < tab_part_info->num_parts); + if (drop_count != num_parts_reorged) + { + my_error(ER_PARTITION_DOES_NOT_EXIST, MYF(0)); + goto err; + } + tab_part_info->num_parts= check_total_partitions; + } + } + else + { + DBUG_ASSERT(FALSE); + } + *partition_changed= TRUE; + thd->work_part_info= tab_part_info; + if (alter_info->partition_flags & (ALTER_PARTITION_ADD | + ALTER_PARTITION_REORGANIZE)) + { + if (tab_part_info->use_default_subpartitions && + !alt_part_info->use_default_subpartitions) + { + tab_part_info->use_default_subpartitions= FALSE; + tab_part_info->use_default_num_subpartitions= FALSE; + } + + if (tab_part_info->check_partition_info(thd, (handlerton**)NULL, + table->file, 0, alt_part_info)) + { + goto err; + } + /* + The check below needs to be performed after check_partition_info + since this function "fixes" the item trees of the new partitions + to reorganize into + */ + if (alter_info->partition_flags == ALTER_PARTITION_REORGANIZE && + tab_part_info->part_type == RANGE_PARTITION && + ((is_last_partition_reorged && + (tab_part_info->column_list ? + (partition_info_compare_column_values( + alt_max_elem_val->col_val_array, + tab_max_elem_val->col_val_array) < 0) : + alt_max_range < tab_max_range)) || + (!is_last_partition_reorged && + (tab_part_info->column_list ? + (partition_info_compare_column_values( + alt_max_elem_val->col_val_array, + tab_max_elem_val->col_val_array) != 0) : + alt_max_range != tab_max_range)))) + { + /* + For range partitioning the total resulting range before and + after the change must be the same except in one case. This is + when the last partition is reorganised, in this case it is + acceptable to increase the total range. + The reason is that it is not allowed to have "holes" in the + middle of the ranges and thus we should not allow to reorganise + to create "holes". + */ + my_error(ER_REORG_OUTSIDE_RANGE, MYF(0)); + goto err; + } + } + } // ADD, DROP, COALESCE, REORGANIZE, TABLE_REORG, REBUILD, CONVERT + else + { + /* + When thd->lex->part_info has a reference to a partition_info the + ALTER TABLE contained a definition of a partitioning. + + Case I: + If there was a partition before and there is a new one defined. + We use the new partitioning. The new partitioning is already + defined in the correct variable so no work is needed to + accomplish this. + We do however need to update partition_changed to ensure that not + only the frm file is changed in the ALTER TABLE command. + + Case IIa: + There was a partitioning before and there is no new one defined. + Also the user has not specified to remove partitioning explicitly. + + We use the old partitioning also for the new table. We do this + by assigning the partition_info from the table loaded in + open_table to the partition_info struct used by mysql_create_table + later in this method. + + Case IIb: + There was a partitioning before and there is no new one defined. + The user has specified explicitly to remove partitioning + + Since the user has specified explicitly to remove partitioning + we override the old partitioning info and create a new table using + the specified engine. + In this case the partition also is changed. + + Case III: + There was no partitioning before altering the table, there is + partitioning defined in the altered table. Use the new partitioning. + No work needed since the partitioning info is already in the + correct variable. + + In this case we discover one case where the new partitioning is using + the same partition function as the default (PARTITION BY KEY or + PARTITION BY LINEAR KEY with the list of fields equal to the primary + key fields OR PARTITION BY [LINEAR] KEY() for tables without primary + key) + Also here partition has changed and thus a new table must be + created. + + Case IV: + There was no partitioning before and no partitioning defined. + Obviously no work needed. + */ + partition_info *tab_part_info= table->part_info; + + if (tab_part_info) + { + if (alter_info->partition_flags & ALTER_PARTITION_REMOVE) + { + DBUG_PRINT("info", ("Remove partitioning")); + if (!(create_info->used_fields & HA_CREATE_USED_ENGINE)) + { + DBUG_PRINT("info", ("No explicit engine used")); + create_info->db_type= tab_part_info->default_engine_type; + } + DBUG_PRINT("info", ("New engine type: %s", + ha_resolve_storage_engine_name(create_info->db_type))); + thd->work_part_info= NULL; + *partition_changed= TRUE; + } + else if (!thd->work_part_info) + { + /* + Retain partitioning but possibly with a new storage engine + beneath. + + Create a copy of TABLE::part_info to be able to modify it freely. + */ + if (!(tab_part_info= tab_part_info->get_clone(thd))) + DBUG_RETURN(TRUE); + thd->work_part_info= tab_part_info; + if (create_info->used_fields & HA_CREATE_USED_ENGINE && + create_info->db_type != tab_part_info->default_engine_type) + { + /* + Make sure change of engine happens to all partitions. + */ + DBUG_PRINT("info", ("partition changed")); + if (tab_part_info->is_auto_partitioned) + { + /* + If the user originally didn't specify partitioning to be + used we can remove it now. + */ + thd->work_part_info= NULL; + } + else + { + /* + Ensure that all partitions have the proper engine set-up + */ + set_engine_all_partitions(thd->work_part_info, + create_info->db_type); + } + *partition_changed= TRUE; + } + } + /* + Prohibit inplace when partitioned by primary key and the primary key is changed. + */ + if (!*partition_changed && + tab_part_info->part_field_array && + !tab_part_info->part_field_list.elements && + table->s->primary_key != MAX_KEY) + { + + if (alter_info->flags & (ALTER_DROP_SYSTEM_VERSIONING | + ALTER_ADD_SYSTEM_VERSIONING)) + { + *partition_changed= true; + } + else + { + KEY *primary_key= table->key_info + table->s->primary_key; + List_iterator_fast drop_it(alter_info->drop_list); + const char *primary_name= primary_key->name.str; + const Alter_drop *drop; + drop_it.rewind(); + while ((drop= drop_it++)) + { + if (drop->type == Alter_drop::KEY && + 0 == my_strcasecmp(system_charset_info, primary_name, drop->name)) + break; + } + if (drop) + *partition_changed= TRUE; + } + } + } + if (thd->work_part_info) + { + partition_info *part_info= thd->work_part_info; + bool is_native_partitioned= FALSE; + if (tab_part_info && tab_part_info->part_type == VERSIONING_PARTITION && + tab_part_info != part_info && part_info->part_type == VERSIONING_PARTITION && + part_info->num_parts == 0) + { + if (part_info->vers_info->interval.is_set() && ( + !tab_part_info->vers_info->interval.is_set() || + part_info->vers_info->interval == tab_part_info->vers_info->interval)) + { + /* If interval is changed we can not do fast alter */ + tab_part_info= tab_part_info->get_clone(thd); + } + else + { + /* NOTE: fast_alter_partition_table() works on existing TABLE data. */ + *fast_alter_table= true; + table->mark_table_for_reopen(); + } + *tab_part_info->vers_info= *part_info->vers_info; + thd->work_part_info= part_info= tab_part_info; + *partition_changed= true; + } + + /* + Need to cater for engine types that can handle partition without + using the partition handler. + */ + else if (part_info != tab_part_info) + { + if (part_info->fix_parser_data(thd)) + { + goto err; + } + /* + Compare the old and new part_info. If only key_algorithm + change is done, don't consider it as changed partitioning (to avoid + rebuild). This is to handle KEY (numeric_cols) partitioned tables + created in 5.1. For more info, see bug#14521864. + */ + if (alter_info->partition_flags != ALTER_PARTITION_INFO || + !table->part_info || + alter_info->algorithm(thd) != + Alter_info::ALTER_TABLE_ALGORITHM_INPLACE || + !table->part_info->has_same_partitioning(part_info)) + { + DBUG_PRINT("info", ("partition changed")); + *partition_changed= true; + } + } + + /* + Set up partition default_engine_type either from the create_info + or from the previus table + */ + if (create_info->used_fields & HA_CREATE_USED_ENGINE) + part_info->default_engine_type= create_info->db_type; + else + { + if (tab_part_info) + part_info->default_engine_type= tab_part_info->default_engine_type; + else + part_info->default_engine_type= create_info->db_type; + } + DBUG_ASSERT(part_info->default_engine_type && + part_info->default_engine_type != partition_hton); + if (check_native_partitioned(create_info, &is_native_partitioned, + part_info, thd)) + { + goto err; + } + if (!is_native_partitioned) + { + DBUG_ASSERT(create_info->db_type); + create_info->db_type= partition_hton; + } + } + } + DBUG_RETURN(FALSE); +err: + *fast_alter_table= false; + if (saved_part_info) + table->part_info= saved_part_info; + DBUG_RETURN(TRUE); +} + + +/* + Change partitions, used to implement ALTER TABLE ADD/REORGANIZE/COALESCE + partitions. This method is used to implement both single-phase and multi- + phase implementations of ADD/REORGANIZE/COALESCE partitions. + + SYNOPSIS + mysql_change_partitions() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + + DESCRIPTION + Request handler to add partitions as set in states of the partition + + Elements of the lpt parameters used: + create_info Create information used to create partitions + db Database name + table_name Table name + copied Output parameter where number of copied + records are added + deleted Output parameter where number of deleted + records are added +*/ + +static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, bool copy_data) +{ + char path[FN_REFLEN+1]; + int error; + handler *file= lpt->table->file; + THD *thd= lpt->thd; + DBUG_ENTER("mysql_change_partitions"); + + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + + if(copy_data && mysql_trans_prepare_alter_copy_data(thd)) + DBUG_RETURN(TRUE); + + /* TODO: test if bulk_insert would increase the performance */ + + if (unlikely((error= file->ha_change_partitions(lpt->create_info, path, + &lpt->copied, + &lpt->deleted, + lpt->pack_frm_data, + lpt->pack_frm_len)))) + { + file->print_error(error, MYF(error != ER_OUTOFMEMORY ? 0 : ME_FATAL)); + } + + DBUG_ASSERT(copy_data || (!lpt->copied && !lpt->deleted)); + + if (copy_data && mysql_trans_commit_alter_copy_data(thd)) + error= 1; /* The error has been reported */ + + DBUG_RETURN(MY_TEST(error)); +} + + +/* + Rename partitions in an ALTER TABLE of partitions + + SYNOPSIS + mysql_rename_partitions() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + + DESCRIPTION + Request handler to rename partitions as set in states of the partition + + Parameters used: + db Database name + table_name Table name +*/ + +static bool mysql_rename_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + char path[FN_REFLEN+1]; + int error; + DBUG_ENTER("mysql_rename_partitions"); + + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + if (unlikely((error= lpt->table->file->ha_rename_partitions(path)))) + { + if (error != 1) + lpt->table->file->print_error(error, MYF(0)); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + Drop partitions in an ALTER TABLE of partitions + + SYNOPSIS + mysql_drop_partitions() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + DESCRIPTION + Drop the partitions marked with PART_TO_BE_DROPPED state and remove + those partitions from the list. + + Parameters used: + table Table object + db Database name + table_name Table name +*/ + +static bool mysql_drop_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + char path[FN_REFLEN+1]; + partition_info *part_info= lpt->table->part_info; + List_iterator part_it(part_info->partitions); + int error; + DBUG_ENTER("mysql_drop_partitions"); + + DBUG_ASSERT(lpt->thd->mdl_context.is_lock_owner(MDL_key::TABLE, + lpt->table->s->db.str, + lpt->table->s->table_name.str, + MDL_EXCLUSIVE)); + + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + if ((error= lpt->table->file->ha_drop_partitions(path))) + { + lpt->table->file->print_error(error, MYF(0)); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + Convert partition to a table in an ALTER TABLE of partitions + + SYNOPSIS + alter_partition_convert_out() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + + DESCRIPTION + Rename partition table marked with PART_TO_BE_DROPPED into a separate table + under the name lpt->alter_ctx->(new_db, new_name). + + This is ddl-logged by write_log_convert_out_partition(). +*/ + +static bool alter_partition_convert_out(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + partition_info *part_info= lpt->table->part_info; + THD *thd= lpt->thd; + int error; + handler *file= get_new_handler(NULL, thd->mem_root, part_info->default_engine_type); + + DBUG_ASSERT(lpt->thd->mdl_context.is_lock_owner(MDL_key::TABLE, + lpt->table->s->db.str, + lpt->table->s->table_name.str, + MDL_EXCLUSIVE)); + + char from_name[FN_REFLEN + 1], to_name[FN_REFLEN + 1]; + const char *path= lpt->table->s->path.str; + + build_table_filename(to_name, sizeof(to_name) - 1, lpt->alter_ctx->new_db.str, + lpt->alter_ctx->new_name.str, "", 0); + + for (const partition_element &e: part_info->partitions) + { + if (e.part_state != PART_TO_BE_DROPPED) + continue; + + if (unlikely((error= create_partition_name(from_name, sizeof(from_name), + path, e.partition_name, + NORMAL_PART_NAME, FALSE)))) + { + DBUG_ASSERT(thd->is_error()); + return true; + } + if (DBUG_IF("error_convert_partition_00") || + unlikely(error= file->ha_rename_table(from_name, to_name))) + { + my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, to_name, my_errno); + lpt->table->file->print_error(error, MYF(0)); + return true; + } + break; + } + + return false; +} + + +/* + Release all log entries for this partition info struct + SYNOPSIS + release_part_info_log_entries() + first_log_entry First log entry in list to release + RETURN VALUES + NONE +*/ + +static void release_part_info_log_entries(DDL_LOG_MEMORY_ENTRY *log_entry) +{ + DBUG_ENTER("release_part_info_log_entries"); + + while (log_entry) + { + DDL_LOG_MEMORY_ENTRY *next= log_entry->next_active_log_entry; + ddl_log_release_memory_entry(log_entry); + log_entry= next; + } + DBUG_VOID_RETURN; +} + + +/* + Log an rename frm file + SYNOPSIS + write_log_replace_frm() + lpt Struct for parameters + next_entry Next reference to use in log record + from_path Name to rename from + to_path Name to rename to + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + Support routine that writes a replace of an frm file into the + ddl log. It also inserts an entry that keeps track of used space into + the partition info object +*/ + +bool write_log_replace_frm(ALTER_PARTITION_PARAM_TYPE *lpt, + uint next_entry, + const char *from_path, + const char *to_path) +{ + DDL_LOG_ENTRY ddl_log_entry; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("write_log_replace_frm"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION; + ddl_log_entry.next_entry= next_entry; + lex_string_set(&ddl_log_entry.handler_name, reg_ext); + lex_string_set(&ddl_log_entry.name, to_path); + lex_string_set(&ddl_log_entry.from_name, from_path); + + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + { + DBUG_RETURN(true); + } + ddl_log_add_entry(lpt->part_info, log_entry); + DBUG_RETURN(false); +} + + +/* + Log final partition changes in change partition + SYNOPSIS + write_log_changed_partitions() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + This code is used to perform safe ADD PARTITION for HASH partitions + and COALESCE for HASH partitions and REORGANIZE for any type of + partitions. + We prepare entries for all partitions except the reorganised partitions + in REORGANIZE partition, those are handled by + write_log_dropped_partitions. For those partitions that are replaced + special care is needed to ensure that this is performed correctly and + this requires a two-phased approach with this log as a helper for this. + + This code is closely intertwined with the code in rename_partitions in + the partition handler. +*/ + +static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, + uint *next_entry, const char *path) +{ + DDL_LOG_ENTRY ddl_log_entry; + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry; + char tmp_path[FN_REFLEN + 1]; + char normal_path[FN_REFLEN + 1]; + List_iterator part_it(part_info->partitions); + uint temp_partitions= part_info->temp_partitions.elements; + uint num_elements= part_info->partitions.elements; + uint i= 0; + DBUG_ENTER("write_log_changed_partitions"); + + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_IS_CHANGED || + (part_elem->part_state == PART_IS_ADDED && temp_partitions)) + { + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + if (part_info->is_sub_partitioned()) + { + List_iterator sub_it(part_elem->subpartitions); + uint num_subparts= part_info->num_subparts; + uint j= 0; + do + { + partition_element *sub_elem= sub_it++; + ddl_log_entry.next_entry= *next_entry; + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(sub_elem-> + engine_type)); + if (create_subpartition_name(tmp_path, sizeof(tmp_path), path, + part_elem->partition_name, + sub_elem->partition_name, + TEMP_PART_NAME) || + create_subpartition_name(normal_path, sizeof(normal_path), path, + part_elem->partition_name, + sub_elem->partition_name, + NORMAL_PART_NAME)) + DBUG_RETURN(TRUE); + lex_string_set(&ddl_log_entry.name, normal_path); + lex_string_set(&ddl_log_entry.from_name, tmp_path); + if (part_elem->part_state == PART_IS_CHANGED) + ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION; + else + ddl_log_entry.action_type= DDL_LOG_RENAME_ACTION; + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + DBUG_RETURN(TRUE); + + *next_entry= log_entry->entry_pos; + sub_elem->log_entry= log_entry; + ddl_log_add_entry(part_info, log_entry); + } while (++j < num_subparts); + } + else + { + ddl_log_entry.next_entry= *next_entry; + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(part_elem->engine_type)); + if (create_partition_name(tmp_path, sizeof(tmp_path), path, + part_elem->partition_name, TEMP_PART_NAME, + TRUE) || + create_partition_name(normal_path, sizeof(normal_path), path, + part_elem->partition_name, NORMAL_PART_NAME, + TRUE)) + DBUG_RETURN(TRUE); + lex_string_set(&ddl_log_entry.name, normal_path); + lex_string_set(&ddl_log_entry.from_name, tmp_path); + if (part_elem->part_state == PART_IS_CHANGED) + ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION; + else + ddl_log_entry.action_type= DDL_LOG_RENAME_ACTION; + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + { + DBUG_RETURN(TRUE); + } + *next_entry= log_entry->entry_pos; + part_elem->log_entry= log_entry; + ddl_log_add_entry(part_info, log_entry); + } + } + } while (++i < num_elements); + DBUG_RETURN(FALSE); +} + + +/* + Log dropped or converted partitions + SYNOPSIS + log_drop_or_convert_action() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success +*/ + +enum log_action_enum +{ + ACT_DROP = 0, + ACT_CONVERT_IN, + ACT_CONVERT_OUT +}; + +static bool log_drop_or_convert_action(ALTER_PARTITION_PARAM_TYPE *lpt, + uint *next_entry, const char *path, + const char *from_name, bool temp_list, + const log_action_enum convert_action) +{ + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ASSERT(convert_action == ACT_DROP || (from_name != NULL)); + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry; + char tmp_path[FN_REFLEN + 1]; + List_iterator part_it(part_info->partitions); + List_iterator temp_it(part_info->temp_partitions); + uint num_temp_partitions= part_info->temp_partitions.elements; + uint num_elements= part_info->partitions.elements; + DBUG_ENTER("log_drop_or_convert_action"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + ddl_log_entry.action_type= convert_action ? + DDL_LOG_RENAME_ACTION : + DDL_LOG_DELETE_ACTION; + if (temp_list) + num_elements= num_temp_partitions; + while (num_elements--) + { + partition_element *part_elem; + if (temp_list) + part_elem= temp_it++; + else + part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_DROPPED || + part_elem->part_state == PART_TO_BE_ADDED || + part_elem->part_state == PART_CHANGED) + { + uint name_variant; + if (part_elem->part_state == PART_CHANGED || + (part_elem->part_state == PART_TO_BE_ADDED && + num_temp_partitions)) + name_variant= TEMP_PART_NAME; + else + name_variant= NORMAL_PART_NAME; + DBUG_ASSERT(convert_action != ACT_CONVERT_IN || + part_elem->part_state == PART_TO_BE_ADDED); + DBUG_ASSERT(convert_action != ACT_CONVERT_OUT || + part_elem->part_state == PART_TO_BE_DROPPED); + if (part_info->is_sub_partitioned()) + { + DBUG_ASSERT(!convert_action); + List_iterator sub_it(part_elem->subpartitions); + uint num_subparts= part_info->num_subparts; + uint j= 0; + do + { + partition_element *sub_elem= sub_it++; + ddl_log_entry.next_entry= *next_entry; + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(sub_elem-> + engine_type)); + if (create_subpartition_name(tmp_path, sizeof(tmp_path), path, + part_elem->partition_name, + sub_elem->partition_name, name_variant)) + DBUG_RETURN(TRUE); + lex_string_set(&ddl_log_entry.name, tmp_path); + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + { + DBUG_RETURN(TRUE); + } + *next_entry= log_entry->entry_pos; + sub_elem->log_entry= log_entry; + ddl_log_add_entry(part_info, log_entry); + } while (++j < num_subparts); + } + else + { + ddl_log_entry.next_entry= *next_entry; + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(part_elem->engine_type)); + if (create_partition_name(tmp_path, sizeof(tmp_path), path, + part_elem->partition_name, name_variant, + TRUE)) + DBUG_RETURN(TRUE); + switch (convert_action) + { + case ACT_CONVERT_OUT: + ddl_log_entry.from_name= { from_name, strlen(from_name) }; + /* fall through */ + case ACT_DROP: + ddl_log_entry.name= { tmp_path, strlen(tmp_path) }; + break; + case ACT_CONVERT_IN: + ddl_log_entry.name= { from_name, strlen(from_name) }; + ddl_log_entry.from_name= { tmp_path, strlen(tmp_path) }; + } + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + { + DBUG_RETURN(TRUE); + } + *next_entry= log_entry->entry_pos; + part_elem->log_entry= log_entry; + ddl_log_add_entry(part_info, log_entry); + } + } + } + DBUG_RETURN(FALSE); +} + + +inline +static bool write_log_dropped_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, + uint *next_entry, const char *path, + bool temp_list) +{ + return log_drop_or_convert_action(lpt, next_entry, path, NULL, temp_list, + ACT_DROP); +} + +inline +static bool write_log_convert_partition(ALTER_PARTITION_PARAM_TYPE *lpt, + uint *next_entry, const char *path) +{ + char other_table[FN_REFLEN + 1]; + const ulong f= lpt->alter_info->partition_flags; + DBUG_ASSERT((f & ALTER_PARTITION_CONVERT_IN) || (f & ALTER_PARTITION_CONVERT_OUT)); + const log_action_enum convert_action= (f & ALTER_PARTITION_CONVERT_IN) + ? ACT_CONVERT_IN : ACT_CONVERT_OUT; + build_table_filename(other_table, sizeof(other_table) - 1, lpt->alter_ctx->new_db.str, + lpt->alter_ctx->new_name.str, "", 0); + DDL_LOG_MEMORY_ENTRY *main_entry= lpt->part_info->main_entry; + bool res= log_drop_or_convert_action(lpt, next_entry, path, other_table, + false, convert_action); + /* + NOTE: main_entry is "drop shadow frm", we have to keep it like this + because partitioning crash-safety disables it at install shadow FRM phase. + This is needed to avoid spurious drop action when the shadow frm is replaced + by the backup frm and there is nothing to drop. + */ + lpt->part_info->main_entry= main_entry; + return res; +} + + +/* + Write the log entry to ensure that the shadow frm file is removed at + crash. + SYNOPSIS + write_log_drop_frm() + lpt Struct containing parameters + + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + Prepare an entry to the ddl log indicating a drop/install of the shadow frm + file and its corresponding handler file. +*/ + +static bool write_log_drop_frm(ALTER_PARTITION_PARAM_TYPE *lpt, + DDL_LOG_STATE *drop_chain) +{ + char path[FN_REFLEN + 1]; + DBUG_ENTER("write_log_drop_frm"); + const DDL_LOG_STATE *main_chain= lpt->part_info; + const bool drop_backup= (drop_chain != main_chain); + + build_table_shadow_filename(path, sizeof(path) - 1, lpt, drop_backup); + mysql_mutex_lock(&LOCK_gdl); + if (ddl_log_delete_frm(drop_chain, (const char*)path)) + goto error; + + if (drop_backup && (lpt->alter_info->partition_flags & ALTER_PARTITION_CONVERT_IN)) + { + TABLE_LIST *table_from= lpt->table_list->next_local; + build_table_filename(path, sizeof(path) - 1, table_from->db.str, + table_from->table_name.str, "", 0); + + if (ddl_log_delete_frm(drop_chain, (const char*) path)) + goto error; + } + + if (ddl_log_write_execute_entry(drop_chain->list->entry_pos, + drop_backup ? + main_chain->execute_entry->entry_pos : 0, + &drop_chain->execute_entry)) + goto error; + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(FALSE); + +error: + release_part_info_log_entries(drop_chain->list); + mysql_mutex_unlock(&LOCK_gdl); + drop_chain->list= NULL; + my_error(ER_DDL_LOG_ERROR, MYF(0)); + DBUG_RETURN(TRUE); +} + + +static inline +bool write_log_drop_shadow_frm(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + return write_log_drop_frm(lpt, lpt->part_info); +} + + +/* + Log renaming of shadow frm to real frm name and dropping of old frm + SYNOPSIS + write_log_rename_frm() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + Prepare an entry to ensure that we complete the renaming of the frm + file if failure occurs in the middle of the rename process. +*/ + +static bool write_log_rename_frm(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry; + DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->execute_entry; + char path[FN_REFLEN + 1]; + char shadow_path[FN_REFLEN + 1]; + DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->list; + DBUG_ENTER("write_log_rename_frm"); + + part_info->list= NULL; + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt); + mysql_mutex_lock(&LOCK_gdl); + if (write_log_replace_frm(lpt, 0UL, shadow_path, path)) + goto error; + log_entry= part_info->list; + part_info->main_entry= log_entry; + if (ddl_log_write_execute_entry(log_entry->entry_pos, + &exec_log_entry)) + goto error; + release_part_info_log_entries(old_first_log_entry); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(FALSE); + +error: + release_part_info_log_entries(part_info->list); + mysql_mutex_unlock(&LOCK_gdl); + part_info->list= old_first_log_entry; + part_info->main_entry= NULL; + my_error(ER_DDL_LOG_ERROR, MYF(0)); + DBUG_RETURN(TRUE); +} + + +/* + Write the log entries to ensure that the drop partition command is completed + even in the presence of a crash. + + SYNOPSIS + write_log_drop_partition() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + Prepare entries to the ddl log indicating all partitions to drop and to + install the shadow frm file and remove the old frm file. +*/ + +static bool write_log_drop_partition(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry; + DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->execute_entry; + char tmp_path[FN_REFLEN + 1]; + char path[FN_REFLEN + 1]; + uint next_entry= 0; + DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->list; + DBUG_ENTER("write_log_drop_partition"); + + part_info->list= NULL; + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + build_table_shadow_filename(tmp_path, sizeof(tmp_path) - 1, lpt); + mysql_mutex_lock(&LOCK_gdl); + if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path, + FALSE)) + goto error; + if (write_log_replace_frm(lpt, next_entry, (const char*)tmp_path, + (const char*)path)) + goto error; + log_entry= part_info->list; + part_info->main_entry= log_entry; + if (ddl_log_write_execute_entry(log_entry->entry_pos, + &exec_log_entry)) + goto error; + release_part_info_log_entries(old_first_log_entry); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(FALSE); + +error: + release_part_info_log_entries(part_info->list); + mysql_mutex_unlock(&LOCK_gdl); + part_info->list= old_first_log_entry; + part_info->main_entry= NULL; + my_error(ER_DDL_LOG_ERROR, MYF(0)); + DBUG_RETURN(TRUE); +} + + +static bool write_log_convert_partition(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + partition_info *part_info= lpt->part_info; + char tmp_path[FN_REFLEN + 1]; + char path[FN_REFLEN + 1]; + uint next_entry= part_info->list ? part_info->list->entry_pos : 0; + + build_table_filename(path, sizeof(path) - 1, + lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + build_table_shadow_filename(tmp_path, sizeof(tmp_path) - 1, lpt); + + mysql_mutex_lock(&LOCK_gdl); + + if (write_log_convert_partition(lpt, &next_entry, (const char*)path)) + goto error; + DBUG_ASSERT(next_entry == part_info->list->entry_pos); + if (ddl_log_write_execute_entry(part_info->list->entry_pos, + &part_info->execute_entry)) + goto error; + mysql_mutex_unlock(&LOCK_gdl); + return false; + +error: + mysql_mutex_unlock(&LOCK_gdl); + part_info->main_entry= NULL; + my_error(ER_DDL_LOG_ERROR, MYF(0)); + return true; +} + + +/* + Write the log entries to ensure that the add partition command is not + executed at all if a crash before it has completed + + SYNOPSIS + write_log_add_change_partition() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + Prepare entries to the ddl log indicating all partitions to drop and to + remove the shadow frm file. + We always inject entries backwards in the list in the ddl log since we + don't know the entry position until we have written it. +*/ + +static bool write_log_add_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry; + char tmp_path[FN_REFLEN + 1]; + char path[FN_REFLEN + 1]; + uint next_entry= 0; + DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->list; + /* write_log_drop_shadow_frm(lpt) must have been run first */ + DBUG_ASSERT(old_first_log_entry); + DBUG_ENTER("write_log_add_change_partition"); + + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + build_table_shadow_filename(tmp_path, sizeof(tmp_path) - 1, lpt); + mysql_mutex_lock(&LOCK_gdl); + + /* Relink the previous drop shadow frm entry */ + if (old_first_log_entry) + next_entry= old_first_log_entry->entry_pos; + if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path, + FALSE)) + goto error; + log_entry= part_info->list; + + if (ddl_log_write_execute_entry(log_entry->entry_pos, + &part_info->execute_entry)) + goto error; + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(FALSE); + +error: + release_part_info_log_entries(part_info->list); + mysql_mutex_unlock(&LOCK_gdl); + part_info->list= old_first_log_entry; + my_error(ER_DDL_LOG_ERROR, MYF(0)); + DBUG_RETURN(TRUE); +} + + +/* + Write description of how to complete the operation after first phase of + change partitions. + + SYNOPSIS + write_log_final_change_partition() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success + DESCRIPTION + We will write log entries that specify to + 1) Install the shadow frm file. + 2) Remove all partitions reorganized. (To be able to reorganize a partition + to the same name. Like in REORGANIZE p0 INTO (p0, p1), + so that the later rename from the new p0-temporary name to p0 don't + fail because the partition already exists. + 3) Rename others to reflect the new naming scheme. + + Note that it is written in the ddl log in reverse. +*/ + +static bool write_log_final_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry; + DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->execute_entry; + char path[FN_REFLEN + 1]; + char shadow_path[FN_REFLEN + 1]; + DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->list; + uint next_entry= 0; + DBUG_ENTER("write_log_final_change_partition"); + + /* + Do not link any previous log entry. + Replace the revert operations with forced retry operations. + */ + part_info->list= NULL; + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt); + mysql_mutex_lock(&LOCK_gdl); + if (write_log_changed_partitions(lpt, &next_entry, (const char*)path)) + goto error; + if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path, + lpt->alter_info->partition_flags & + ALTER_PARTITION_REORGANIZE)) + goto error; + if (write_log_replace_frm(lpt, next_entry, shadow_path, path)) + goto error; + log_entry= part_info->list; + part_info->main_entry= log_entry; + /* Overwrite the revert execute log entry with this retry execute entry */ + if (ddl_log_write_execute_entry(log_entry->entry_pos, + &exec_log_entry)) + goto error; + release_part_info_log_entries(old_first_log_entry); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(FALSE); + +error: + release_part_info_log_entries(part_info->list); + mysql_mutex_unlock(&LOCK_gdl); + part_info->list= old_first_log_entry; + part_info->main_entry= NULL; + my_error(ER_DDL_LOG_ERROR, MYF(0)); + DBUG_RETURN(TRUE); +} + + +/* + Remove entry from ddl log and release resources for others to use + + SYNOPSIS + write_log_completed() + lpt Struct containing parameters + RETURN VALUES + TRUE Error + FALSE Success +*/ + +/* + TODO: Partitioning atomic DDL refactoring: this should be replaced with + ddl_log_complete(). +*/ +static void write_log_completed(ALTER_PARTITION_PARAM_TYPE *lpt, + bool dont_crash) +{ + partition_info *part_info= lpt->part_info; + DDL_LOG_MEMORY_ENTRY *log_entry= part_info->execute_entry; + DBUG_ENTER("write_log_completed"); + + DBUG_ASSERT(log_entry); + mysql_mutex_lock(&LOCK_gdl); + if (ddl_log_disable_execute_entry(&log_entry)) + { + /* + Failed to write, Bad... + We have completed the operation but have log records to REMOVE + stuff that shouldn't be removed. What clever things could one do + here? An error output was written to the error output by the + above method so we don't do anything here. + */ + ; + } + release_part_info_log_entries(part_info->list); + release_part_info_log_entries(part_info->execute_entry); + mysql_mutex_unlock(&LOCK_gdl); + part_info->execute_entry= NULL; + part_info->list= NULL; + DBUG_VOID_RETURN; +} + + +/* + Release all log entries + SYNOPSIS + release_log_entries() + part_info Partition info struct + RETURN VALUES + NONE +*/ + +/* + TODO: Partitioning atomic DDL refactoring: this should be replaced with + ddl_log_release_entries(). +*/ +static void release_log_entries(partition_info *part_info) +{ + mysql_mutex_lock(&LOCK_gdl); + release_part_info_log_entries(part_info->list); + release_part_info_log_entries(part_info->execute_entry); + mysql_mutex_unlock(&LOCK_gdl); + part_info->list= NULL; + part_info->execute_entry= NULL; +} + + +/* + Final part of partition changes to handle things when under + LOCK TABLES. + SYNPOSIS + alter_partition_lock_handling() + lpt Struct carrying parameters + RETURN VALUES + true on error +*/ +static bool alter_partition_lock_handling(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + THD *thd= lpt->thd; + + if (lpt->table) + { + /* + Remove all instances of the table and its locks and other resources. + */ + close_all_tables_for_name(thd, lpt->table->s, HA_EXTRA_NOT_USED, NULL); + } + lpt->table= 0; + lpt->table_list->table= 0; + if (thd->locked_tables_mode) + return thd->locked_tables_list.reopen_tables(thd, false); + + return false; +} + + +/** + Unlock and close table before renaming and dropping partitions. + + @param lpt Struct carrying parameters + + @return error code if external_unlock fails +*/ + +static int alter_close_table(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + THD *thd= lpt->thd; + TABLE_SHARE *share= lpt->table->s; + DBUG_ENTER("alter_close_table"); + + TABLE *table= thd->open_tables; + do { + table= find_locked_table(table, share->db.str, share->table_name.str); + if (!table) + { + DBUG_RETURN(0); + } + + if (table->db_stat) + { + if (int error= mysql_lock_remove(thd, thd->lock, table)) + { + DBUG_RETURN(error); + } + if (int error= table->file->ha_close()) + { + DBUG_RETURN(error); + } + table->db_stat= 0; // Mark file closed + } + } while ((table= table->next)); + + DBUG_RETURN(0); +} + + +/** + Handle errors for ALTER TABLE for partitioning. + + @param lpt Struct carrying parameters + @param action_completed The action must be completed, NOT reverted + @param drop_partition Partitions has not been dropped yet + @param frm_install The shadow frm-file has not yet been installed + @param close_table Table is still open, close it before reverting +*/ + +/* + TODO: Partitioning atomic DDL refactoring: this should be replaced with + correct combination of ddl_log_revert() / ddl_log_complete() +*/ +static void handle_alter_part_error(ALTER_PARTITION_PARAM_TYPE *lpt, + bool action_completed, + bool drop_partition, + bool frm_install, + bool reopen) +{ + THD *thd= lpt->thd; + partition_info *part_info= lpt->part_info->get_clone(thd); + TABLE *table= lpt->table; + DBUG_ENTER("handle_alter_part_error"); + DBUG_ASSERT(table->needs_reopen()); + + /* + All instances of this table needs to be closed. + Better to do that here, than leave the cleaning up to others. + Acquire EXCLUSIVE mdl lock if not already acquired. + */ + if (!thd->mdl_context.is_lock_owner(MDL_key::TABLE, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, + MDL_EXCLUSIVE) && + wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN)) + { + /* + Did not succeed in getting exclusive access to the table. + + Since we have altered a cached table object (and its part_info) we need + at least to remove this instance so it will not be reused. + + Temporarily remove it from the locked table list, so that it will get + reopened. + */ + thd->locked_tables_list.unlink_from_list(thd, + table->pos_in_locked_tables, + false); + /* + Make sure that the table is unlocked, closed and removed from + the table cache. + */ + mysql_lock_remove(thd, thd->lock, table); + close_thread_table(thd, &thd->open_tables); + lpt->table_list->table= NULL; + } + else + { + /* Ensure the share is destroyed and reopened. */ + close_all_tables_for_name(thd, table->s, HA_EXTRA_NOT_USED, NULL); + } + + if (!reopen) + DBUG_VOID_RETURN; + + if (part_info->list && + ddl_log_execute_entry(thd, part_info->list->entry_pos)) + { + /* + We couldn't recover from error, most likely manual interaction + is required. + */ + write_log_completed(lpt, FALSE); + release_log_entries(part_info); + if (!action_completed) + { + if (drop_partition) + { + /* Table is still ok, but we left a shadow frm file behind. */ + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 1, + "Operation was unsuccessful, table is still " + "intact, but it is possible that a shadow frm " + "file was left behind"); + } + else + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 1, + "Operation was unsuccessful, table is still " + "intact, but it is possible that a shadow frm " + "file was left behind. " + "It is also possible that temporary partitions " + "are left behind, these could be empty or more " + "or less filled with records"); + } + } + else + { + if (frm_install) + { + /* + Failed during install of shadow frm file, table isn't intact + and dropped partitions are still there + */ + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 1, + "Failed during alter of partitions, table is no " + "longer intact. " + "The frm file is in an unknown state, and a " + "backup is required."); + } + else if (drop_partition) + { + /* + Table is ok, we have switched to new table but left dropped + partitions still in their places. We remove the log records and + ask the user to perform the action manually. We remove the log + records and ask the user to perform the action manually. + */ + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 1, + "Failed during drop of partitions, table is " + "intact. " + "Manual drop of remaining partitions is required"); + } + else + { + /* + We failed during renaming of partitions. The table is most + certainly in a very bad state so we give user warning and disable + the table by writing an ancient frm version into it. + */ + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 1, + "Failed during renaming of partitions. We are now " + "in a position where table is not reusable " + "Table is disabled by writing ancient frm file " + "version into it"); + } + } + } + else + { + release_log_entries(part_info); + if (!action_completed) + { + /* + We hit an error before things were completed but managed + to recover from the error. An error occurred and we have + restored things to original so no need for further action. + */ + ; + } + else + { + /* + We hit an error after we had completed most of the operation + and were successful in a second attempt so the operation + actually is successful now. We need to issue a warning that + even though we reported an error the operation was successfully + completed. + */ + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 1, + "Operation was successfully completed by failure " + "handling, after failure of normal operation"); + } + } + + if (thd->locked_tables_mode) + { + Diagnostics_area *stmt_da= NULL; + Diagnostics_area tmp_stmt_da(true); + + if (unlikely(thd->is_error())) + { + /* reopen might fail if we have a previous error, use a temporary da. */ + stmt_da= thd->get_stmt_da(); + thd->set_stmt_da(&tmp_stmt_da); + } + + /* NB: error status is not needed here, the statement fails with + the original error. */ + if (unlikely(thd->locked_tables_list.reopen_tables(thd, false))) + sql_print_warning("We failed to reacquire LOCKs in ALTER TABLE"); + + if (stmt_da) + thd->set_stmt_da(stmt_da); + } + + DBUG_VOID_RETURN; +} + + +/** + Downgrade an exclusive MDL lock if under LOCK TABLE. + + If we don't downgrade the lock, it will not be downgraded or released + until the table is unlocked, resulting in blocking other threads using + the table. +*/ + +static void downgrade_mdl_if_lock_tables_mode(THD *thd, MDL_ticket *ticket, + enum_mdl_type type) +{ + if (thd->locked_tables_mode) + ticket->downgrade_lock(type); +} + + +bool log_partition_alter_to_ddl_log(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + LEX_CSTRING old_engine_lex; + lex_string_set(&old_engine_lex, lpt->table->file->real_table_type()); + + ddl_log.query= { C_STRING_WITH_LEN("ALTER") }; + ddl_log.org_storage_engine_name= old_engine_lex; + ddl_log.org_partitioned= true; + ddl_log.org_database= lpt->alter_info->db; + ddl_log.org_table= lpt->alter_info->table_name; + ddl_log.org_table_id= lpt->org_tabledef_version; + ddl_log.new_storage_engine_name= old_engine_lex; + ddl_log.new_partitioned= true; + ddl_log.new_database= lpt->alter_info->db; + ddl_log.new_table= lpt->alter_info->table_name; + ddl_log.new_table_id= lpt->create_info->tabledef_version; + backup_log_ddl(&ddl_log); // This sets backup_log_error on failure + return 0; +} + + +extern bool alter_partition_convert_in(ALTER_PARTITION_PARAM_TYPE *lpt); + +/** + Check that definition of source table fits definition of partition being + added and every row stored in the table conforms partition's expression. + + @param lpt Structure containing parameters required for checking + @param[in,out] part_file_name_buf Buffer for storing a partition name + @param part_file_name_buf_sz Size of buffer for storing a partition name + @param part_file_name_len Length of partition prefix stored in the buffer + on invocation of function + + @return false on success, true on error +*/ + +static bool check_table_data(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + /* + TODO: if destination is partitioned by range(X) and source is indexed by X + then just get min(X) and max(X) from index. + */ + THD *thd= lpt->thd; + TABLE *table_to= lpt->table_list->table; + TABLE *table_from= lpt->table_list->next_local->table; + + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, + table_to->s->db.str, + table_to->s->table_name.str, + MDL_EXCLUSIVE)); + + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, + table_from->s->db.str, + table_from->s->table_name.str, + MDL_EXCLUSIVE)); + + uint32 new_part_id; + partition_element *part_elem; + const char* partition_name= thd->lex->part_info->curr_part_elem->partition_name; + part_elem= table_to->part_info->get_part_elem(partition_name, + nullptr, 0, &new_part_id); + if (unlikely(!part_elem)) + return true; + + if (unlikely(new_part_id == NOT_A_PARTITION_ID)) + { + DBUG_ASSERT(table_to->part_info->is_sub_partitioned()); + my_error(ER_PARTITION_INSTEAD_OF_SUBPARTITION, MYF(0)); + return true; + } + + if (verify_data_with_partition(table_from, table_to, new_part_id)) + { + return true; + } + + return false; +} + + +/** + Actually perform the change requested by ALTER TABLE of partitions + previously prepared. + + @param thd Thread object + @param table Original table object with new part_info + @param alter_info ALTER TABLE info + @param create_info Create info for CREATE TABLE + @param table_list List of the table involved + @param db Database name of new table + @param table_name Table name of new table + + @return Operation status + @retval TRUE Error + @retval FALSE Success + + @note + Perform all ALTER TABLE operations for partitioned tables that can be + performed fast without a full copy of the original table. +*/ + +uint fast_alter_partition_table(THD *thd, TABLE *table, + Alter_info *alter_info, + Alter_table_ctx *alter_ctx, + HA_CREATE_INFO *create_info, + TABLE_LIST *table_list) +{ + /* + TODO: Partitioning atomic DDL refactoring. + + DDL log chain state is stored in partition_info: + + struct st_ddl_log_memory_entry *first_log_entry; + struct st_ddl_log_memory_entry *exec_log_entry; + struct st_ddl_log_memory_entry *frm_log_entry; + + Make it stored and used in DDL_LOG_STATE like it was done in MDEV-17567. + This requires mysql_write_frm() refactoring (see comment there). + */ + + /* Set-up struct used to write frm files */ + partition_info *part_info; + ALTER_PARTITION_PARAM_TYPE lpt_obj, *lpt= &lpt_obj; + bool action_completed= FALSE; + bool frm_install= FALSE; + MDL_ticket *mdl_ticket= table->mdl_ticket; + /* option_bits is used to mark if we should log the query with IF EXISTS */ + ulonglong save_option_bits= thd->variables.option_bits; + DBUG_ENTER("fast_alter_partition_table"); + DBUG_ASSERT(table->needs_reopen()); + + part_info= table->part_info; + lpt->thd= thd; + lpt->table_list= table_list; + lpt->part_info= part_info; + lpt->alter_info= alter_info; + lpt->alter_ctx= alter_ctx; + lpt->create_info= create_info; + lpt->db_options= create_info->table_options_with_row_type(); + lpt->table= table; + lpt->key_info_buffer= 0; + lpt->key_count= 0; + lpt->org_tabledef_version= table->s->tabledef_version; + lpt->copied= 0; + lpt->deleted= 0; + lpt->pack_frm_data= NULL; + lpt->pack_frm_len= 0; + + /* Add IF EXISTS to binlog if shared table */ + if (table->file->partition_ht()->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + thd->variables.option_bits|= OPTION_IF_EXISTS; + + if (table->file->alter_table_flags(alter_info->flags) & + HA_PARTITION_ONE_PHASE && + !(alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST)) + { + /* + In the case where the engine supports one phase online partition + changes it is not necessary to have any exclusive locks. The + correctness is upheld instead by transactions being aborted if they + access the table after its partition definition has changed (if they + are still using the old partition definition). + + The handler is in this case responsible to ensure that all users + start using the new frm file after it has changed. To implement + one phase it is necessary for the handler to have the master copy + of the frm file and use discovery mechanisms to renew it. Thus + write frm will write the frm, pack the new frm and finally + the frm is deleted and the discovery mechanisms will either restore + back to the old or installing the new after the change is activated. + + Thus all open tables will be discovered that they are old, if not + earlier as soon as they try an operation using the old table. One + should ensure that this is checked already when opening a table, + even if it is found in the cache of open tables. + + change_partitions will perform all operations and it is the duty of + the handler to ensure that the frm files in the system gets updated + in synch with the changes made and if an error occurs that a proper + error handling is done. + + If the MySQL Server crashes at this moment but the handler succeeds + in performing the change then the binlog is not written for the + change. There is no way to solve this as long as the binlog is not + transactional and even then it is hard to solve it completely. + + The first approach here was to downgrade locks. Now a different approach + is decided upon. The idea is that the handler will have access to the + Alter_info when store_lock arrives with TL_WRITE_ALLOW_READ. So if the + handler knows that this functionality can be handled with a lower lock + level it will set the lock level to TL_WRITE_ALLOW_WRITE immediately. + Thus the need to downgrade the lock disappears. + 1) Write the new frm, pack it and then delete it + 2) Perform the change within the handler + */ + if (mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || + mysql_change_partitions(lpt, true)) + { + goto err; + } + } + else if (alter_info->partition_flags & ALTER_PARTITION_DROP) + { + /* + Now after all checks and setting state on dropped partitions we can + start the actual dropping of the partitions. + + Drop partition is actually two things happening. The first is that + a lot of records are deleted. The second is that the behaviour of + subsequent updates and writes and deletes will change. The delete + part can be handled without any particular high lock level by + transactional engines whereas non-transactional engines need to + ensure that this change is done with an exclusive lock on the table. + The second part, the change of partitioning does however require + an exclusive lock to install the new partitioning as one atomic + operation. If this is not the case, it is possible for two + transactions to see the change in a different order than their + serialisation order. Thus we need an exclusive lock for both + transactional and non-transactional engines. + + For LIST partitions it could be possible to avoid the exclusive lock + (and for RANGE partitions if they didn't rearrange range definitions + after a DROP PARTITION) if one ensured that failed accesses to the + dropped partitions was aborted for sure (thus only possible for + transactional engines). + + 0) Write an entry that removes the shadow frm file if crash occurs + 1) Write the new frm file as a shadow frm + 2) Get an exclusive metadata lock on the table (waits for all active + transactions using this table). This ensures that we + can release all other locks on the table and since no one can open + the table, there can be no new threads accessing the table. They + will be hanging on this exclusive lock. + 3) Write the ddl log to ensure that the operation is completed + even in the presence of a MySQL Server crash (the log is executed + before any other threads are started, so there are no locking issues). + 4) Close the table that have already been opened but didn't stumble on + the abort locked previously. This is done as part of the + alter_close_table call. + 5) Old place for binary logging + 6) Install the previously written shadow frm file + 7) Prepare handlers for drop of partitions + 8) Drop the partitions + 9) Remove entries from ddl log + 10) Reopen table if under lock tables + 11) Write the bin log + Unfortunately the writing of the binlog is not synchronised with + other logging activities. So no matter in which order the binlog + is written compared to other activities there will always be cases + where crashes make strange things occur. In this placement it can + happen that the ALTER TABLE DROP PARTITION gets performed in the + master but not in the slaves if we have a crash, after writing the + ddl log but before writing the binlog. A solution to this would + require writing the statement first in the ddl log and then + when recovering from the crash read the binlog and insert it into + the binlog if not written already. + 12) Complete query + + We insert Error injections at all places where it could be interesting + to test if recovery is properly done. + */ + if (write_log_drop_shadow_frm(lpt) || + ERROR_INJECT("drop_partition_1") || + mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || + ERROR_INJECT("drop_partition_2") || + wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) || + ERROR_INJECT("drop_partition_3") || + write_log_drop_partition(lpt) || + (action_completed= TRUE, FALSE) || + ERROR_INJECT("drop_partition_4") || + alter_close_table(lpt) || + ERROR_INJECT("drop_partition_5") || + ERROR_INJECT("drop_partition_6") || + (frm_install= TRUE, FALSE) || + mysql_write_frm(lpt, WFRM_INSTALL_SHADOW) || + log_partition_alter_to_ddl_log(lpt) || + (frm_install= FALSE, FALSE) || + ERROR_INJECT("drop_partition_7") || + mysql_drop_partitions(lpt) || + ERROR_INJECT("drop_partition_8") || + (write_log_completed(lpt, FALSE), FALSE) || + ((!thd->lex->no_write_to_binlog) && + (write_bin_log(thd, FALSE, + thd->query(), thd->query_length()), FALSE)) || + ERROR_INJECT("drop_partition_9")) + { + handle_alter_part_error(lpt, action_completed, TRUE, frm_install, true); + goto err; + } + if (alter_partition_lock_handling(lpt)) + goto err; + } + else if (alter_info->partition_flags & ALTER_PARTITION_CONVERT_OUT) + { + DDL_LOG_STATE chain_drop_backup; + bzero(&chain_drop_backup, sizeof(chain_drop_backup)); + + if (mysql_write_frm(lpt, WFRM_WRITE_CONVERTED_TO) || + ERROR_INJECT("convert_partition_1") || + write_log_drop_shadow_frm(lpt) || + ERROR_INJECT("convert_partition_2") || + mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || + ERROR_INJECT("convert_partition_3") || + wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) || + ERROR_INJECT("convert_partition_4") || + write_log_convert_partition(lpt) || + ERROR_INJECT("convert_partition_5") || + alter_close_table(lpt) || + ERROR_INJECT("convert_partition_6") || + alter_partition_convert_out(lpt) || + ERROR_INJECT("convert_partition_7") || + write_log_drop_frm(lpt, &chain_drop_backup) || + mysql_write_frm(lpt, WFRM_INSTALL_SHADOW|WFRM_BACKUP_ORIGINAL) || + log_partition_alter_to_ddl_log(lpt) || + ERROR_INJECT("convert_partition_8") || + ((!thd->lex->no_write_to_binlog) && + ((thd->binlog_xid= thd->query_id), + ddl_log_update_xid(lpt->part_info, thd->binlog_xid), + write_bin_log(thd, false, thd->query(), thd->query_length()), + (thd->binlog_xid= 0))) || + ERROR_INJECT("convert_partition_9")) + { + DDL_LOG_STATE main_state= *lpt->part_info; + handle_alter_part_error(lpt, true, true, false, false); + ddl_log_complete(&chain_drop_backup); + (void) ddl_log_revert(thd, &main_state); + if (thd->locked_tables_mode) + thd->locked_tables_list.reopen_tables(thd, false); + goto err; + } + ddl_log_complete(lpt->part_info); + ERROR_INJECT("convert_partition_10"); + (void) ddl_log_revert(thd, &chain_drop_backup); + if (alter_partition_lock_handling(lpt) || + ERROR_INJECT("convert_partition_11")) + goto err; + } + else if ((alter_info->partition_flags & ALTER_PARTITION_CONVERT_IN)) + { + DDL_LOG_STATE chain_drop_backup; + bzero(&chain_drop_backup, sizeof(chain_drop_backup)); + TABLE *table_from= table_list->next_local->table; + + if (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) || + wait_while_table_is_used(thd, table_from, HA_EXTRA_PREPARE_FOR_RENAME) || + ERROR_INJECT("convert_partition_1") || + compare_table_with_partition(thd, table_from, table, NULL, 0) || + ERROR_INJECT("convert_partition_2") || + check_table_data(lpt)) + goto err; + + if (write_log_drop_shadow_frm(lpt) || + ERROR_INJECT("convert_partition_3") || + mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || + ERROR_INJECT("convert_partition_4") || + alter_close_table(lpt) || + ERROR_INJECT("convert_partition_5") || + write_log_convert_partition(lpt) || + ERROR_INJECT("convert_partition_6") || + alter_partition_convert_in(lpt) || + ERROR_INJECT("convert_partition_7") || + (frm_install= true, false) || + write_log_drop_frm(lpt, &chain_drop_backup) || + mysql_write_frm(lpt, WFRM_INSTALL_SHADOW|WFRM_BACKUP_ORIGINAL) || + log_partition_alter_to_ddl_log(lpt) || + (frm_install= false, false) || + ERROR_INJECT("convert_partition_8") || + ((!thd->lex->no_write_to_binlog) && + ((thd->binlog_xid= thd->query_id), + ddl_log_update_xid(lpt->part_info, thd->binlog_xid), + write_bin_log(thd, false, thd->query(), thd->query_length()), + (thd->binlog_xid= 0))) || + ERROR_INJECT("convert_partition_9")) + { + DDL_LOG_STATE main_state= *lpt->part_info; + handle_alter_part_error(lpt, true, true, false, false); + ddl_log_complete(&chain_drop_backup); + (void) ddl_log_revert(thd, &main_state); + if (thd->locked_tables_mode) + thd->locked_tables_list.reopen_tables(thd, false); + goto err; + } + ddl_log_complete(lpt->part_info); + ERROR_INJECT("convert_partition_10"); + (void) ddl_log_revert(thd, &chain_drop_backup); + if (alter_partition_lock_handling(lpt) || + ERROR_INJECT("convert_partition_11")) + goto err; + } + /* + TODO: would be good if adding new empty VERSIONING partitions would always + go this way, auto or not. + */ + else if ((alter_info->partition_flags & ALTER_PARTITION_ADD) && + (part_info->part_type == RANGE_PARTITION || + part_info->part_type == LIST_PARTITION || + alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST)) + { + DBUG_ASSERT(!(alter_info->partition_flags & ALTER_PARTITION_CONVERT_IN)); + /* + ADD RANGE/LIST PARTITIONS + In this case there are no tuples removed and no tuples are added. + Thus the operation is merely adding a new partition. Thus it is + necessary to perform the change as an atomic operation. Otherwise + someone reading without seeing the new partition could potentially + miss updates made by a transaction serialised before it that are + inserted into the new partition. + + 0) Write an entry that removes the shadow frm file if crash occurs + 1) Write the new frm file as a shadow frm file + 2) Get an exclusive metadata lock on the table (waits for all active + transactions using this table). This ensures that we + can release all other locks on the table and since no one can open + the table, there can be no new threads accessing the table. They + will be hanging on this exclusive lock. + 3) Write an entry to remove the new parttions if crash occurs + 4) Add the new partitions. + 5) Close all instances of the table and remove them from the table cache. + 6) Old place for write binlog + 7) Now the change is completed except for the installation of the + new frm file. We thus write an action in the log to change to + the shadow frm file + 8) Install the new frm file of the table where the partitions are + added to the table. + 9) Remove entries from ddl log + 10)Reopen tables if under lock tables + 11)Write to binlog + 12)Complete query + */ + if (write_log_drop_shadow_frm(lpt) || + ERROR_INJECT("add_partition_1") || + mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || + ERROR_INJECT("add_partition_2") || + wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) || + ERROR_INJECT("add_partition_3") || + write_log_add_change_partition(lpt) || + ERROR_INJECT("add_partition_4") || + mysql_change_partitions(lpt, false) || + ERROR_INJECT("add_partition_5") || + alter_close_table(lpt) || + ERROR_INJECT("add_partition_6") || + ERROR_INJECT("add_partition_7") || + write_log_rename_frm(lpt) || + (action_completed= TRUE, FALSE) || + ERROR_INJECT("add_partition_8") || + (frm_install= TRUE, FALSE) || + mysql_write_frm(lpt, WFRM_INSTALL_SHADOW) || + log_partition_alter_to_ddl_log(lpt) || + (frm_install= FALSE, FALSE) || + ERROR_INJECT("add_partition_9") || + (write_log_completed(lpt, FALSE), FALSE) || + ((!thd->lex->no_write_to_binlog) && + (write_bin_log(thd, FALSE, + thd->query(), thd->query_length()), FALSE)) || + ERROR_INJECT("add_partition_10")) + { + handle_alter_part_error(lpt, action_completed, FALSE, frm_install, true); + goto err; + } + if (alter_partition_lock_handling(lpt)) + goto err; + } + else + { + /* + ADD HASH PARTITION/ + COALESCE PARTITION/ + REBUILD PARTITION/ + REORGANIZE PARTITION + + In this case all records are still around after the change although + possibly organised into new partitions, thus by ensuring that all + updates go to both the old and the new partitioning scheme we can + actually perform this operation lock-free. The only exception to + this is when REORGANIZE PARTITION adds/drops ranges. In this case + there needs to be an exclusive lock during the time when the range + changes occur. + This is only possible if the handler can ensure double-write for a + period. The double write will ensure that it doesn't matter where the + data is read from since both places are updated for writes. If such + double writing is not performed then it is necessary to perform the + change with the usual exclusive lock. With double writes it is even + possible to perform writes in parallel with the reorganisation of + partitions. + + Without double write procedure we get the following procedure. + The only difference with using double write is that we can downgrade + the lock to TL_WRITE_ALLOW_WRITE. Double write in this case only + double writes from old to new. If we had double writing in both + directions we could perform the change completely without exclusive + lock for HASH partitions. + Handlers that perform double writing during the copy phase can actually + use a lower lock level. This can be handled inside store_lock in the + respective handler. + + 0) Write an entry that removes the shadow frm file if crash occurs. + 1) Write the shadow frm file of new partitioning. + 2) Log such that temporary partitions added in change phase are + removed in a crash situation. + 3) Add the new partitions. + Copy from the reorganised partitions to the new partitions. + 4) Get an exclusive metadata lock on the table (waits for all active + transactions using this table). This ensures that we + can release all other locks on the table and since no one can open + the table, there can be no new threads accessing the table. They + will be hanging on this exclusive lock. + 5) Close the table. + 6) Log that operation is completed and log all complete actions + needed to complete operation from here. + 7) Old place for write bin log. + 8) Prepare handlers for rename and delete of partitions. + 9) Rename and drop the reorged partitions such that they are no + longer used and rename those added to their real new names. + 10) Install the shadow frm file. + 11) Reopen the table if under lock tables. + 12) Write to binlog + 13) Complete query. + */ + if (write_log_drop_shadow_frm(lpt) || + ERROR_INJECT("change_partition_1") || + mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || + ERROR_INJECT("change_partition_2") || + write_log_add_change_partition(lpt) || + ERROR_INJECT("change_partition_3") || + mysql_change_partitions(lpt, true) || + ERROR_INJECT("change_partition_4") || + wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) || + ERROR_INJECT("change_partition_5") || + alter_close_table(lpt) || + ERROR_INJECT("change_partition_6") || + write_log_final_change_partition(lpt) || + (action_completed= TRUE, FALSE) || + ERROR_INJECT("change_partition_7") || + ERROR_INJECT("change_partition_8") || + ((frm_install= TRUE), FALSE) || + mysql_write_frm(lpt, WFRM_INSTALL_SHADOW) || + log_partition_alter_to_ddl_log(lpt) || + (frm_install= FALSE, FALSE) || + ERROR_INJECT("change_partition_9") || + mysql_drop_partitions(lpt) || + ERROR_INJECT("change_partition_10") || + mysql_rename_partitions(lpt) || + ERROR_INJECT("change_partition_11") || + (write_log_completed(lpt, FALSE), FALSE) || + ((!thd->lex->no_write_to_binlog) && + (write_bin_log(thd, FALSE, + thd->query(), thd->query_length()), FALSE)) || + ERROR_INJECT("change_partition_12")) + { + handle_alter_part_error(lpt, action_completed, FALSE, frm_install, true); + goto err; + } + if (alter_partition_lock_handling(lpt)) + goto err; + } + thd->variables.option_bits= save_option_bits; + downgrade_mdl_if_lock_tables_mode(thd, mdl_ticket, MDL_SHARED_NO_READ_WRITE); + /* + A final step is to write the query to the binlog and send ok to the + user + */ + DBUG_RETURN(fast_end_partition(thd, lpt->copied, lpt->deleted, table_list)); +err: + thd->variables.option_bits= save_option_bits; + downgrade_mdl_if_lock_tables_mode(thd, mdl_ticket, MDL_SHARED_NO_READ_WRITE); + DBUG_RETURN(TRUE); +} +#endif + + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + + SYNOPSIS + set_field_ptr() + ptr Array of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_field_ptr(Field **ptr, const uchar *new_buf, + const uchar *old_buf) +{ + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_field_ptr"); + + do + { + (*ptr)->move_field_offset(diff); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + This variant works on a key_part reference. + It is not required that all fields are NOT NULL fields. + + SYNOPSIS + set_key_field_ptr() + key_info key info with a set of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_key_field_ptr(KEY *key_info, const uchar *new_buf, + const uchar *old_buf) +{ + KEY_PART_INFO *key_part= key_info->key_part; + uint key_parts= key_info->user_defined_key_parts; + uint i= 0; + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_key_field_ptr"); + + do + { + key_part->field->move_field_offset(diff); + key_part++; + } while (++i < key_parts); + DBUG_VOID_RETURN; +} + + +/** + Append all fields in read_set to string + + @param[in,out] str String to append to. + @param[in] row Row to append. + @param[in] table Table containing read_set and fields for the row. +*/ +void append_row_to_str(String &str, const uchar *row, TABLE *table) +{ + Field **fields, **field_ptr; + const uchar *rec; + uint num_fields= bitmap_bits_set(table->read_set); + uint curr_field_index= 0; + bool is_rec0= !row || row == table->record[0]; + if (!row) + rec= table->record[0]; + else + rec= row; + + /* Create a new array of all read fields. */ + fields= (Field**) my_malloc(PSI_INSTRUMENT_ME, sizeof(void*) * (num_fields + 1), + MYF(0)); + if (!fields) + return; + fields[num_fields]= NULL; + for (field_ptr= table->field; + *field_ptr; + field_ptr++) + { + if (!bitmap_is_set(table->read_set, (*field_ptr)->field_index)) + continue; + fields[curr_field_index++]= *field_ptr; + } + + + if (!is_rec0) + set_field_ptr(fields, rec, table->record[0]); + + for (field_ptr= fields; + *field_ptr; + field_ptr++) + { + Field *field= *field_ptr; + str.append(' '); + str.append(&field->field_name); + str.append(':'); + field_unpack(&str, field, rec, 0, false); + } + + if (!is_rec0) + set_field_ptr(fields, table->record[0], rec); + my_free(fields); +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/** + Return comma-separated list of used partitions in the provided given string. + + @param mem_root Where to allocate following list + @param part_info Partitioning info + @param[out] parts The resulting list of string to fill + @param[out] used_partitions_list result list to fill + + Generate a list of used partitions (from bits in part_info->read_partitions + bitmap), and store it into the provided String object. + + @note + The produced string must not be longer then MAX_PARTITIONS * (1 + FN_LEN). + In case of UPDATE, only the partitions read is given, not the partitions + that was written or locked. +*/ + +void make_used_partitions_str(MEM_ROOT *alloc, + partition_info *part_info, + String *parts_str, + String_list &used_partitions_list) +{ + parts_str->length(0); + partition_element *pe; + uint partition_id= 0; + List_iterator it(part_info->partitions); + + if (part_info->is_sub_partitioned()) + { + partition_element *head_pe; + while ((head_pe= it++)) + { + List_iterator it2(head_pe->subpartitions); + while ((pe= it2++)) + { + if (bitmap_is_set(&part_info->read_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + uint index= parts_str->length(); + parts_str->append(head_pe->partition_name, + strlen(head_pe->partition_name), + system_charset_info); + parts_str->append('_'); + parts_str->append(pe->partition_name, + strlen(pe->partition_name), + system_charset_info); + used_partitions_list.append_str(alloc, parts_str->ptr() + index); + } + partition_id++; + } + } + } + else + { + while ((pe= it++)) + { + if (bitmap_is_set(&part_info->read_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + used_partitions_list.append_str(alloc, pe->partition_name); + parts_str->append(pe->partition_name, strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } +} +#endif + +/**************************************************************************** + * Partition interval analysis support + ***************************************************************************/ + +/* + Setup partition_info::* members related to partitioning range analysis + + SYNOPSIS + set_up_partition_func_pointers() + part_info Partitioning info structure + + DESCRIPTION + Assuming that passed partition_info structure already has correct values + for members that specify [sub]partitioning type, table fields, and + functions, set up partition_info::* members that are related to + Partitioning Interval Analysis (see get_partitions_in_range_iter for its + definition) + + IMPLEMENTATION + There are three available interval analyzer functions: + (1) get_part_iter_for_interval_via_mapping + (2) get_part_iter_for_interval_cols_via_map + (3) get_part_iter_for_interval_via_walking + + They all have limited applicability: + (1) is applicable for "PARTITION BY (func(t.field))", where + func is a monotonic function. + + (2) is applicable for "PARTITION BY COLUMNS (field_list) + + (3) is applicable for + "[SUB]PARTITION BY (any_func(t.integer_field))" + + If both (1) and (3) are applicable, (1) is preferred over (3). + + This function sets part_info::get_part_iter_for_interval according to + this criteria, and also sets some auxilary fields that the function + uses. +*/ +#ifdef WITH_PARTITION_STORAGE_ENGINE +static void set_up_range_analysis_info(partition_info *part_info) +{ + /* Set the catch-all default */ + part_info->get_part_iter_for_interval= NULL; + part_info->get_subpart_iter_for_interval= NULL; + + /* + Check if get_part_iter_for_interval_via_mapping() can be used for + partitioning + */ + switch (part_info->part_type) { + case VERSIONING_PARTITION: + if (!part_info->vers_info->interval.is_set()) + break; + /* Fall through */ + case RANGE_PARTITION: + case LIST_PARTITION: + if (!part_info->column_list) + { + if (part_info->part_expr->get_monotonicity_info() != NON_MONOTONIC) + { + part_info->get_part_iter_for_interval= + get_part_iter_for_interval_via_mapping; + goto setup_subparts; + } + } + else + { + part_info->get_part_iter_for_interval= + get_part_iter_for_interval_cols_via_map; + goto setup_subparts; + } + default: + ; + } + + /* + Check if get_part_iter_for_interval_via_walking() can be used for + partitioning + */ + if (part_info->num_part_fields == 1) + { + Field *field= part_info->part_field_array[0]; + switch (field->type()) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + part_info->get_part_iter_for_interval= + get_part_iter_for_interval_via_walking; + break; + default: + ; + } + } + +setup_subparts: + /* + Check if get_part_iter_for_interval_via_walking() can be used for + subpartitioning + */ + if (part_info->num_subpart_fields == 1) + { + Field *field= part_info->subpart_field_array[0]; + switch (field->type()) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + part_info->get_subpart_iter_for_interval= + get_part_iter_for_interval_via_walking; + break; + default: + ; + } + } +} + + +/* + This function takes a memory of packed fields in opt-range format + and stores it in record format. To avoid having to worry about how + the length of fields are calculated in opt-range format we send + an array of lengths used for each field in store_length_array. + + SYNOPSIS + store_tuple_to_record() + pfield Field array + store_length_array Array of field lengths + value Memory where fields are stored + value_end End of memory + + RETURN VALUE + nparts Number of fields assigned +*/ +uint32 store_tuple_to_record(Field **pfield, + uint32 *store_length_array, + uchar *value, + uchar *value_end) +{ + /* This function is inspired by store_key_image_rec. */ + uint32 nparts= 0; + uchar *loc_value; + while (value < value_end) + { + loc_value= value; + if ((*pfield)->real_maybe_null()) + { + if (*loc_value) + (*pfield)->set_null(); + else + (*pfield)->set_notnull(); + loc_value++; + } + uint len= (*pfield)->pack_length(); + (*pfield)->set_key_image(loc_value, len); + value+= *store_length_array; + store_length_array++; + nparts++; + pfield++; + } + return nparts; +} + +/** + RANGE(columns) partitioning: compare partition value bound and probe tuple. + + @param val Partition column values. + @param nvals_in_rec Number of (prefix) fields to compare. + + @return Less than/Equal to/Greater than 0 if the record is L/E/G than val. + + @note The partition value bound is always a full tuple (but may include the + MAXVALUE special value). The probe tuple may be a prefix of partitioning + tuple. +*/ + +static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec) +{ + partition_info *part_info= val->part_info; + Field **field= part_info->part_field_array; + Field **fields_end= field + nvals_in_rec; + int res; + + for (; field != fields_end; field++, val++) + { + if (val->max_value) + return -1; + if ((*field)->is_null()) + { + if (val->null_value) + continue; + return -1; + } + if (val->null_value) + return +1; + res= (*field)->cmp((const uchar*)val->column_value); + if (res) + return res; + } + return 0; +} + + +/** + Compare record and columns partition tuple including endpoint handling. + + @param val Columns partition tuple + @param n_vals_in_rec Number of columns to compare + @param is_left_endpoint True if left endpoint (part_tuple < rec or + part_tuple <= rec) + @param include_endpoint If endpoint is included (part_tuple <= rec or + rec <= part_tuple) + + @return Less than/Equal to/Greater than 0 if the record is L/E/G than + the partition tuple. + + @see get_list_array_idx_for_endpoint() and + get_partition_id_range_for_endpoint(). +*/ + +static int cmp_rec_and_tuple_prune(part_column_list_val *val, + uint32 n_vals_in_rec, + bool is_left_endpoint, + bool include_endpoint) +{ + int cmp; + Field **field; + if ((cmp= cmp_rec_and_tuple(val, n_vals_in_rec))) + return cmp; + field= val->part_info->part_field_array + n_vals_in_rec; + if (!(*field)) + { + /* Full match. Only equal if including endpoint. */ + if (include_endpoint) + return 0; + + if (is_left_endpoint) + return +4; /* Start of range, part_tuple < rec, return higher. */ + return -4; /* End of range, rec < part_tupe, return lesser. */ + } + /* + The prefix is equal and there are more partition columns to compare. + + If including left endpoint or not including right endpoint + then the record is considered lesser compared to the partition. + + i.e: + part(10, x) <= rec(10, unknown) and rec(10, unknown) < part(10, x) + part <= rec -> lesser (i.e. this or previous partitions) + rec < part -> lesser (i.e. this or previous partitions) + */ + if (is_left_endpoint == include_endpoint) + return -2; + + /* + If right endpoint and the first additional partition value + is MAXVALUE, then the record is lesser. + */ + if (!is_left_endpoint && (val + n_vals_in_rec)->max_value) + return -3; + + /* + Otherwise the record is considered greater. + + rec <= part -> greater (i.e. does not match this partition, seek higher). + part < rec -> greater (i.e. does not match this partition, seek higher). + */ + return 2; +} + + +typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint, + bool include_endpoint); + +typedef uint32 (*get_col_endpoint_func)(partition_info*, bool left_endpoint, + bool include_endpoint, + uint32 num_parts); + +/** + Get partition for RANGE COLUMNS endpoint. + + @param part_info Partitioning metadata. + @param is_left_endpoint True if left endpoint (const <=/< cols) + @param include_endpoint True if range includes the endpoint (<=/>=) + @param nparts Total number of partitions + + @return Partition id of matching partition. + + @see get_partition_id_cols_list_for_endpoint and + get_partition_id_range_for_endpoint. +*/ + +uint32 get_partition_id_cols_range_for_endpoint(partition_info *part_info, + bool is_left_endpoint, + bool include_endpoint, + uint32 nparts) +{ + uint min_part_id= 0, max_part_id= part_info->num_parts, loc_part_id; + part_column_list_val *range_col_array= part_info->range_col_array; + uint num_columns= part_info->part_field_list.elements; + DBUG_ENTER("get_partition_id_cols_range_for_endpoint"); + + /* Find the matching partition (including taking endpoint into account). */ + do + { + /* Midpoint, adjusted down, so it can never be > last partition. */ + loc_part_id= (max_part_id + min_part_id) >> 1; + if (0 <= cmp_rec_and_tuple_prune(range_col_array + + loc_part_id * num_columns, + nparts, + is_left_endpoint, + include_endpoint)) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id; + } while (max_part_id > min_part_id); + loc_part_id= max_part_id; + + /* Given value must be LESS THAN the found partition. */ + DBUG_ASSERT(loc_part_id == part_info->num_parts || + (0 > cmp_rec_and_tuple_prune(range_col_array + + loc_part_id * num_columns, + nparts, is_left_endpoint, + include_endpoint))); + /* Given value must be GREATER THAN or EQUAL to the previous partition. */ + DBUG_ASSERT(loc_part_id == 0 || + (0 <= cmp_rec_and_tuple_prune(range_col_array + + (loc_part_id - 1) * num_columns, + nparts, is_left_endpoint, + include_endpoint))); + + if (!is_left_endpoint) + { + /* Set the end after this partition if not already after the last. */ + if (loc_part_id < part_info->num_parts) + loc_part_id++; + } + DBUG_RETURN(loc_part_id); +} + + +static int get_part_iter_for_interval_cols_via_map(partition_info *part_info, + bool is_subpart, uint32 *store_length_array, + uchar *min_value, uchar *max_value, + uint min_len, uint max_len, + uint flags, PARTITION_ITERATOR *part_iter) +{ + bool can_match_multiple_values; + uint32 nparts; + get_col_endpoint_func UNINIT_VAR(get_col_endpoint); + uint full_length= 0; + DBUG_ENTER("get_part_iter_for_interval_cols_via_map"); + + if (part_info->part_type == RANGE_PARTITION || part_info->part_type == VERSIONING_PARTITION) + { + get_col_endpoint= get_partition_id_cols_range_for_endpoint; + part_iter->get_next= get_next_partition_id_range; + } + else if (part_info->part_type == LIST_PARTITION) + { + if (part_info->has_default_partititon() && + part_info->num_parts == 1) + DBUG_RETURN(-1); //only DEFAULT partition + get_col_endpoint= get_partition_id_cols_list_for_endpoint; + part_iter->get_next= get_next_partition_id_list; + part_iter->part_info= part_info; + DBUG_ASSERT(part_info->num_list_values); + } + else + assert(0); + + for (uint32 i= 0; i < part_info->num_columns; i++) + full_length+= store_length_array[i]; + + can_match_multiple_values= ((flags & + (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | + NEAR_MAX)) || + (min_len != max_len) || + (min_len != full_length) || + memcmp(min_value, max_value, min_len)); + DBUG_ASSERT(can_match_multiple_values || (flags & EQ_RANGE) || flags == 0); + if (can_match_multiple_values && part_info->has_default_partititon()) + part_iter->ret_default_part= part_iter->ret_default_part_orig= TRUE; + + if (flags & NO_MIN_RANGE) + part_iter->part_nums.start= part_iter->part_nums.cur= 0; + else + { + // Copy from min_value to record + nparts= store_tuple_to_record(part_info->part_field_array, + store_length_array, + min_value, + min_value + min_len); + part_iter->part_nums.start= part_iter->part_nums.cur= + get_col_endpoint(part_info, TRUE, !(flags & NEAR_MIN), + nparts); + } + if (flags & NO_MAX_RANGE) + { + if (part_info->part_type == RANGE_PARTITION || part_info->part_type == VERSIONING_PARTITION) + part_iter->part_nums.end= part_info->num_parts; + else /* LIST_PARTITION */ + { + DBUG_ASSERT(part_info->part_type == LIST_PARTITION); + part_iter->part_nums.end= part_info->num_list_values; + } + } + else + { + // Copy from max_value to record + nparts= store_tuple_to_record(part_info->part_field_array, + store_length_array, + max_value, + max_value + max_len); + part_iter->part_nums.end= get_col_endpoint(part_info, FALSE, + !(flags & NEAR_MAX), + nparts); + } + if (part_iter->part_nums.start == part_iter->part_nums.end) + { + // No matching partition found. + if (part_info->has_default_partititon()) + { + part_iter->ret_default_part= part_iter->ret_default_part_orig= TRUE; + DBUG_RETURN(1); + } + DBUG_RETURN(0); + } + DBUG_RETURN(1); +} + + +/** + Partitioning Interval Analysis: Initialize the iterator for "mapping" case + + @param part_info Partition info + @param is_subpart TRUE - act for subpartitioning + FALSE - act for partitioning + @param store_length_array Ignored. + @param min_value minimum field value, in opt_range key format. + @param max_value minimum field value, in opt_range key format. + @param min_len Ignored. + @param max_len Ignored. + @param flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE, + NO_MAX_RANGE. + @param part_iter Iterator structure to be initialized + + @details Initialize partition set iterator to walk over the interval in + ordered-array-of-partitions (for RANGE partitioning) or + ordered-array-of-list-constants (for LIST partitioning) space. + + This function is used when partitioning is done by + (ascending_func(t.field)), and we can map an interval in + t.field space into a sub-array of partition_info::range_int_array or + partition_info::list_array (see get_partition_id_range_for_endpoint, + get_list_array_idx_for_endpoint for details). + + The function performs this interval mapping, and sets the iterator to + traverse the sub-array and return appropriate partitions. + + @return Status of iterator + @retval 0 No matching partitions (iterator not initialized) + @retval 1 Ok, iterator intialized for traversal of matching partitions. + @retval -1 All partitions would match (iterator not initialized) +*/ + +static int get_part_iter_for_interval_via_mapping(partition_info *part_info, + bool is_subpart, + uint32 *store_length_array, /* ignored */ + uchar *min_value, uchar *max_value, + uint min_len, uint max_len, /* ignored */ + uint flags, PARTITION_ITERATOR *part_iter) +{ + Field *field= part_info->part_field_array[0]; + uint32 UNINIT_VAR(max_endpoint_val); + get_endpoint_func UNINIT_VAR(get_endpoint); + bool can_match_multiple_values; /* is not '=' */ + uint field_len= field->pack_length_in_rec(); + MYSQL_TIME start_date; + bool check_zero_dates= false; + bool zero_in_start_date= true; + DBUG_ENTER("get_part_iter_for_interval_via_mapping"); + DBUG_ASSERT(!is_subpart); + (void) store_length_array; + (void)min_len; + (void)max_len; + part_iter->ret_null_part= part_iter->ret_null_part_orig= FALSE; + part_iter->ret_default_part= part_iter->ret_default_part_orig= FALSE; + + if (part_info->part_type == RANGE_PARTITION || + part_info->part_type == VERSIONING_PARTITION) + { + if (part_info->part_charset_field_array) + get_endpoint= get_partition_id_range_for_endpoint_charset; + else + get_endpoint= get_partition_id_range_for_endpoint; + max_endpoint_val= part_info->num_parts; + part_iter->get_next= get_next_partition_id_range; + } + else if (part_info->part_type == LIST_PARTITION) + { + + if (part_info->part_charset_field_array) + get_endpoint= get_list_array_idx_for_endpoint_charset; + else + get_endpoint= get_list_array_idx_for_endpoint; + max_endpoint_val= part_info->num_list_values; + part_iter->get_next= get_next_partition_id_list; + part_iter->part_info= part_info; + if (max_endpoint_val == 0) + { + /* + We handle this special case without optimisations since it is + of little practical value but causes a great number of complex + checks later in the code. + */ + part_iter->part_nums.start= part_iter->part_nums.end= 0; + part_iter->part_nums.cur= 0; + part_iter->ret_null_part= part_iter->ret_null_part_orig= TRUE; + DBUG_RETURN(-1); + } + } + else + MY_ASSERT_UNREACHABLE(); + + can_match_multiple_values= ((flags & + (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | + NEAR_MAX)) || + memcmp(min_value, max_value, field_len)); + DBUG_ASSERT(can_match_multiple_values || (flags & EQ_RANGE) || flags == 0); + if (can_match_multiple_values && part_info->has_default_partititon()) + part_iter->ret_default_part= part_iter->ret_default_part_orig= TRUE; + if (can_match_multiple_values && + (part_info->part_type == RANGE_PARTITION || + part_info->has_null_value)) + { + /* Range scan on RANGE or LIST partitioned table */ + enum_monotonicity_info monotonic; + monotonic= part_info->part_expr->get_monotonicity_info(); + if (monotonic == MONOTONIC_INCREASING_NOT_NULL || + monotonic == MONOTONIC_STRICT_INCREASING_NOT_NULL) + { + /* col is NOT NULL, but F(col) can return NULL, add NULL partition */ + part_iter->ret_null_part= part_iter->ret_null_part_orig= TRUE; + check_zero_dates= true; + } + } + + /* + Find minimum: Do special handling if the interval has left bound in form + " NULL <= X ": + */ + if (field->real_maybe_null() && part_info->has_null_value && + !(flags & (NO_MIN_RANGE | NEAR_MIN)) && *min_value) + { + part_iter->ret_null_part= part_iter->ret_null_part_orig= TRUE; + part_iter->part_nums.start= part_iter->part_nums.cur= 0; + if (!(flags & NO_MAX_RANGE) && *max_value) + { + /* The right bound is X <= NULL, i.e. it is a "X IS NULL" interval */ + part_iter->part_nums.end= 0; + /* + It is something like select * from tbl where col IS NULL + and we have partition with NULL to catch it, so we do not need + DEFAULT partition + */ + part_iter->ret_default_part= part_iter->ret_default_part_orig= FALSE; + DBUG_RETURN(1); + } + } + else + { + if (flags & NO_MIN_RANGE) + part_iter->part_nums.start= part_iter->part_nums.cur= 0; + else + { + /* + Store the interval edge in the record buffer, and call the + function that maps the edge in table-field space to an edge + in ordered-set-of-partitions (for RANGE partitioning) or + index-in-ordered-array-of-list-constants (for LIST) space. + */ + store_key_image_to_rec(field, min_value, field_len); + bool include_endp= !MY_TEST(flags & NEAR_MIN); + part_iter->part_nums.start= get_endpoint(part_info, 1, include_endp); + if (!can_match_multiple_values && part_info->part_expr->null_value) + { + /* col = x and F(x) = NULL -> only search NULL partition */ + part_iter->part_nums.cur= part_iter->part_nums.start= 0; + part_iter->part_nums.end= 0; + /* + if NULL partition exists: + for RANGE it is the first partition (always exists); + for LIST should be indicator that it is present + */ + if (part_info->part_type == RANGE_PARTITION || + part_info->has_null_value) + { + part_iter->ret_null_part= part_iter->ret_null_part_orig= TRUE; + DBUG_RETURN(1); + } + // If no NULL partition look up in DEFAULT or there is no such value + goto not_found; + } + part_iter->part_nums.cur= part_iter->part_nums.start; + if (check_zero_dates && !part_info->part_expr->null_value) + { + if (!(flags & NO_MAX_RANGE) && + (field->type() == MYSQL_TYPE_DATE || + field->type() == MYSQL_TYPE_DATETIME)) + { + /* Monotonic, but return NULL for dates with zeros in month/day. */ + DBUG_ASSERT(field->cmp_type() == TIME_RESULT); // No rounding/truncation + zero_in_start_date= field->get_date(&start_date, date_mode_t(0)); + DBUG_PRINT("info", ("zero start %u %04d-%02d-%02d", + zero_in_start_date, start_date.year, + start_date.month, start_date.day)); + } + } + if (part_iter->part_nums.start == max_endpoint_val) + goto not_found; + } + } + + /* Find maximum, do the same as above but for right interval bound */ + if (flags & NO_MAX_RANGE) + part_iter->part_nums.end= max_endpoint_val; + else + { + store_key_image_to_rec(field, max_value, field_len); + bool include_endp= !MY_TEST(flags & NEAR_MAX); + part_iter->part_nums.end= get_endpoint(part_info, 0, include_endp); + if (check_zero_dates && + !zero_in_start_date && + !part_info->part_expr->null_value) + { + MYSQL_TIME end_date; + DBUG_ASSERT(field->cmp_type() == TIME_RESULT); // No rounding/truncation + bool zero_in_end_date= field->get_date(&end_date, date_mode_t(0)); + /* + This is an optimization for TO_DAYS()/TO_SECONDS() to avoid scanning + the NULL partition for ranges that cannot include a date with 0 as + month/day. + */ + DBUG_PRINT("info", ("zero end %u %04d-%02d-%02d", + zero_in_end_date, + end_date.year, end_date.month, end_date.day)); + DBUG_ASSERT(!memcmp(((Item_func*) part_info->part_expr)->func_name(), + "to_days", 7) || + !memcmp(((Item_func*) part_info->part_expr)->func_name(), + "to_seconds", 10)); + if (!zero_in_end_date && + start_date.month == end_date.month && + start_date.year == end_date.year) + part_iter->ret_null_part= part_iter->ret_null_part_orig= false; + } + if (part_iter->part_nums.start >= part_iter->part_nums.end && + !part_iter->ret_null_part) + goto not_found; + } + DBUG_RETURN(1); /* Ok, iterator initialized */ + +not_found: + if (part_info->has_default_partititon()) + { + part_iter->ret_default_part= part_iter->ret_default_part_orig= TRUE; + DBUG_RETURN(1); + } + DBUG_RETURN(0); /* No partitions */ +} + + +/* See get_part_iter_for_interval_via_walking for definition of what this is */ +#define MAX_RANGE_TO_WALK 32 + + +/* + Partitioning Interval Analysis: Initialize iterator to walk field interval + + SYNOPSIS + get_part_iter_for_interval_via_walking() + part_info Partition info + is_subpart TRUE - act for subpartitioning + FALSE - act for partitioning + min_value minimum field value, in opt_range key format. + max_value minimum field value, in opt_range key format. + flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE, + NO_MAX_RANGE. + part_iter Iterator structure to be initialized + + DESCRIPTION + Initialize partition set iterator to walk over interval in integer field + space. That is, for "const1 <=? t.field <=? const2" interval, initialize + the iterator to return a set of [sub]partitions obtained with the + following procedure: + get partition id for t.field = const1, return it + get partition id for t.field = const1+1, return it + ... t.field = const1+2, ... + ... ... ... + ... t.field = const2 ... + + IMPLEMENTATION + See get_partitions_in_range_iter for general description of interval + analysis. We support walking over the following intervals: + "t.field IS NULL" + "c1 <=? t.field <=? c2", where c1 and c2 are finite. + Intervals with +inf/-inf, and [NULL, c1] interval can be processed but + that is more tricky and I don't have time to do it right now. + + RETURN + 0 - No matching partitions, iterator not initialized + 1 - Some partitions would match, iterator intialized for traversing them + -1 - All partitions would match, iterator not initialized +*/ + +static int get_part_iter_for_interval_via_walking(partition_info *part_info, + bool is_subpart, + uint32 *store_length_array, /* ignored */ + uchar *min_value, uchar *max_value, + uint min_len, uint max_len, /* ignored */ + uint flags, PARTITION_ITERATOR *part_iter) +{ + Field *field; + uint total_parts; + partition_iter_func get_next_func; + DBUG_ENTER("get_part_iter_for_interval_via_walking"); + (void)store_length_array; + (void)min_len; + (void)max_len; + + part_iter->ret_null_part= part_iter->ret_null_part_orig= FALSE; + part_iter->ret_default_part= part_iter->ret_default_part_orig= FALSE; + + if (is_subpart) + { + field= part_info->subpart_field_array[0]; + total_parts= part_info->num_subparts; + get_next_func= get_next_subpartition_via_walking; + } + else + { + field= part_info->part_field_array[0]; + total_parts= part_info->num_parts; + get_next_func= get_next_partition_via_walking; + } + + /* Handle the "t.field IS NULL" interval, it is a special case */ + if (field->real_maybe_null() && !(flags & (NO_MIN_RANGE | NO_MAX_RANGE)) && + *min_value && *max_value) + { + /* + We don't have a part_iter->get_next() function that would find which + partition "t.field IS NULL" belongs to, so find partition that contains + NULL right here, and return an iterator over singleton set. + */ + uint32 part_id; + field->set_null(); + if (is_subpart) + { + if (!part_info->get_subpartition_id(part_info, &part_id)) + { + init_single_partition_iterator(part_id, part_iter); + DBUG_RETURN(1); /* Ok, iterator initialized */ + } + } + else + { + longlong dummy; + int res= part_info->is_sub_partitioned() ? + part_info->get_part_partition_id(part_info, &part_id, + &dummy): + part_info->get_partition_id(part_info, &part_id, &dummy); + if (!res) + { + init_single_partition_iterator(part_id, part_iter); + DBUG_RETURN(1); /* Ok, iterator initialized */ + } + } + DBUG_RETURN(0); /* No partitions match */ + } + + if ((field->real_maybe_null() && + ((!(flags & NO_MIN_RANGE) && *min_value) || // NULL pack_length_in_rec(); + store_key_image_to_rec(field, min_value, len); + a= field->val_int(); + + store_key_image_to_rec(field, max_value, len); + b= field->val_int(); + + /* + Handle a special case where the distance between interval bounds is + exactly 4G-1. This interval is too big for range walking, and if it is an + (x,y]-type interval then the following "b +=..." code will convert it to + an empty interval by "wrapping around" a + 4G-1 + 1 = a. + */ + if ((ulonglong)b - (ulonglong)a == ~0ULL) + DBUG_RETURN(-1); + + a+= MY_TEST(flags & NEAR_MIN); + b+= MY_TEST(!(flags & NEAR_MAX)); + ulonglong n_values= b - a; + + /* + Will it pay off to enumerate all values in the [a..b] range and evaluate + the partitioning function for every value? It depends on + 1. whether we'll be able to infer that some partitions are not used + 2. if time savings from not scanning these partitions will be greater + than time spent in enumeration. + We will assume that the cost of accessing one extra partition is greater + than the cost of evaluating the partitioning function O(#partitions). + This means we should jump at any chance to eliminate a partition, which + gives us this logic: + + Do the enumeration if + - the number of values to enumerate is comparable to the number of + partitions, or + - there are not many values to enumerate. + */ + if ((n_values > 2*total_parts) && n_values > MAX_RANGE_TO_WALK) + DBUG_RETURN(-1); + + part_iter->field_vals.start= part_iter->field_vals.cur= a; + part_iter->field_vals.end= b; + part_iter->part_info= part_info; + part_iter->get_next= get_next_func; + DBUG_RETURN(1); +} + + +/* + PARTITION_ITERATOR::get_next implementation: enumerate partitions in range + + SYNOPSIS + get_next_partition_id_range() + part_iter Partition set iterator structure + + DESCRIPTION + This is implementation of PARTITION_ITERATOR::get_next() that returns + [sub]partition ids in [min_partition_id, max_partition_id] range. + The function conforms to partition_iter_func type. + + RETURN + partition id + NOT_A_PARTITION_ID if there are no more partitions +*/ + +uint32 get_next_partition_id_range(PARTITION_ITERATOR* part_iter) +{ + if (part_iter->part_nums.cur >= part_iter->part_nums.end) + { + if (part_iter->ret_null_part) + { + part_iter->ret_null_part= FALSE; + return 0; /* NULL always in first range partition */ + } + // we do not have default partition in RANGE partitioning + DBUG_ASSERT(!part_iter->ret_default_part); + + part_iter->part_nums.cur= part_iter->part_nums.start; + part_iter->ret_null_part= part_iter->ret_null_part_orig; + return NOT_A_PARTITION_ID; + } + else + return part_iter->part_nums.cur++; +} + + +/* + PARTITION_ITERATOR::get_next implementation for LIST partitioning + + SYNOPSIS + get_next_partition_id_list() + part_iter Partition set iterator structure + + DESCRIPTION + This implementation of PARTITION_ITERATOR::get_next() is special for + LIST partitioning: it enumerates partition ids in + part_info->list_array[i] (list_col_array[i*cols] for COLUMNS LIST + partitioning) where i runs over [min_idx, max_idx] interval. + The function conforms to partition_iter_func type. + + RETURN + partition id + NOT_A_PARTITION_ID if there are no more partitions +*/ + +uint32 get_next_partition_id_list(PARTITION_ITERATOR *part_iter) +{ + if (part_iter->part_nums.cur >= part_iter->part_nums.end) + { + if (part_iter->ret_null_part) + { + part_iter->ret_null_part= FALSE; + return part_iter->part_info->has_null_part_id; + } + if (part_iter->ret_default_part) + { + part_iter->ret_default_part= FALSE; + return part_iter->part_info->default_partition_id; + } + /* Reset partition for next read */ + part_iter->part_nums.cur= part_iter->part_nums.start; + part_iter->ret_null_part= part_iter->ret_null_part_orig; + part_iter->ret_default_part= part_iter->ret_default_part_orig; + return NOT_A_PARTITION_ID; + } + else + { + partition_info *part_info= part_iter->part_info; + uint32 num_part= part_iter->part_nums.cur++; + if (part_info->column_list) + { + uint num_columns= part_info->part_field_list.elements; + return part_info->list_col_array[num_part*num_columns].partition_id; + } + return part_info->list_array[num_part].partition_id; + } +} + + +/* + PARTITION_ITERATOR::get_next implementation: walk over field-space interval + + SYNOPSIS + get_next_partition_via_walking() + part_iter Partitioning iterator + + DESCRIPTION + This implementation of PARTITION_ITERATOR::get_next() returns ids of + partitions that contain records with partitioning field value within + [start_val, end_val] interval. + The function conforms to partition_iter_func type. + + RETURN + partition id + NOT_A_PARTITION_ID if there are no more partitioning. +*/ + +static uint32 get_next_partition_via_walking(PARTITION_ITERATOR *part_iter) +{ + uint32 part_id; + Field *field= part_iter->part_info->part_field_array[0]; + while (part_iter->field_vals.cur != part_iter->field_vals.end) + { + longlong dummy; + field->store(part_iter->field_vals.cur++, field->flags & UNSIGNED_FLAG); + if ((part_iter->part_info->is_sub_partitioned() && + !part_iter->part_info->get_part_partition_id(part_iter->part_info, + &part_id, &dummy)) || + !part_iter->part_info->get_partition_id(part_iter->part_info, + &part_id, &dummy)) + return part_id; + } + part_iter->field_vals.cur= part_iter->field_vals.start; + return NOT_A_PARTITION_ID; +} + + +/* Same as get_next_partition_via_walking, but for subpartitions */ + +static uint32 get_next_subpartition_via_walking(PARTITION_ITERATOR *part_iter) +{ + Field *field= part_iter->part_info->subpart_field_array[0]; + uint32 res; + if (part_iter->field_vals.cur == part_iter->field_vals.end) + { + part_iter->field_vals.cur= part_iter->field_vals.start; + return NOT_A_PARTITION_ID; + } + field->store(part_iter->field_vals.cur++, field->flags & UNSIGNED_FLAG); + if (part_iter->part_info->get_subpartition_id(part_iter->part_info, + &res)) + return NOT_A_PARTITION_ID; + return res; +} + +/* used in error messages below */ +static const char *longest_str(const char *s1, const char *s2, + const char *s3=0) +{ + if (strlen(s2) > strlen(s1)) s1= s2; + if (s3 && strlen(s3) > strlen(s1)) s1= s3; + return s1; +} + + +/* + Create partition names + + SYNOPSIS + create_partition_name() + out:out The buffer for the created partition name string + must be *at least* of FN_REFLEN+1 bytes + in1 First part + in2 Second part + name_variant Normal, temporary or renamed partition name + + RETURN VALUE + 0 if ok, error if name too long + + DESCRIPTION + This method is used to calculate the partition name, service routine to + the del_ren_cre_table method. +*/ + +int create_partition_name(char *out, size_t outlen, const char *in1, + const char *in2, uint name_variant, bool translate) +{ + char transl_part_name[FN_REFLEN]; + const char *transl_part, *end; + DBUG_ASSERT(outlen >= FN_REFLEN + 1); // consistency! same limit everywhere + + if (translate) + { + tablename_to_filename(in2, transl_part_name, FN_REFLEN); + transl_part= transl_part_name; + } + else + transl_part= in2; + + if (name_variant == NORMAL_PART_NAME) + end= strxnmov(out, outlen-1, in1, "#P#", transl_part, NullS); + else if (name_variant == TEMP_PART_NAME) + end= strxnmov(out, outlen-1, in1, "#P#", transl_part, "#TMP#", NullS); + else + { + DBUG_ASSERT(name_variant == RENAMED_PART_NAME); + end= strxnmov(out, outlen-1, in1, "#P#", transl_part, "#REN#", NullS); + } + if (end - out == static_cast(outlen-1)) + { + my_error(ER_PATH_LENGTH, MYF(0), longest_str(in1, transl_part)); + return HA_WRONG_CREATE_OPTION; + } + return 0; +} + +/** + Create subpartition name. This method is used to calculate the + subpartition name, service routine to the del_ren_cre_table method. + The output buffer size should be FN_REFLEN + 1(terminating '\0'). + + @param [out] out Created partition name string + @param in1 First part + @param in2 Second part + @param in3 Third part + @param name_variant Normal, temporary or renamed partition name + + @retval true Error. + @retval false Success. +*/ + +int create_subpartition_name(char *out, size_t outlen, + const char *in1, const char *in2, + const char *in3, uint name_variant) +{ + char transl_part_name[FN_REFLEN], transl_subpart_name[FN_REFLEN], *end; + DBUG_ASSERT(outlen >= FN_REFLEN + 1); // consistency! same limit everywhere + + tablename_to_filename(in2, transl_part_name, FN_REFLEN); + tablename_to_filename(in3, transl_subpart_name, FN_REFLEN); + + if (name_variant == NORMAL_PART_NAME) + end= strxnmov(out, outlen-1, in1, "#P#", transl_part_name, + "#SP#", transl_subpart_name, NullS); + else if (name_variant == TEMP_PART_NAME) + end= strxnmov(out, outlen-1, in1, "#P#", transl_part_name, + "#SP#", transl_subpart_name, "#TMP#", NullS); + else + { + DBUG_ASSERT(name_variant == RENAMED_PART_NAME); + end= strxnmov(out, outlen-1, in1, "#P#", transl_part_name, + "#SP#", transl_subpart_name, "#REN#", NullS); + } + if (end - out == static_cast(outlen-1)) + { + my_error(ER_PATH_LENGTH, MYF(0), + longest_str(in1, transl_part_name, transl_subpart_name)); + return HA_WRONG_CREATE_OPTION; + } + return 0; +} + +uint get_partition_field_store_length(Field *field) +{ + uint store_length; + + store_length= field->key_length(); + if (field->real_maybe_null()) + store_length+= HA_KEY_NULL_LENGTH; + if (field->real_type() == MYSQL_TYPE_VARCHAR) + store_length+= HA_KEY_BLOB_LENGTH; + return store_length; +} + +#endif diff --git a/sql/sql_partition.h b/sql/sql_partition.h new file mode 100644 index 00000000..cff3214f --- /dev/null +++ b/sql/sql_partition.h @@ -0,0 +1,330 @@ +#ifndef SQL_PARTITION_INCLUDED +#define SQL_PARTITION_INCLUDED + +/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. + Copyright (c) 2011, 2017, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "sql_list.h" /* List */ +#include "table.h" /* TABLE_LIST */ + +class Alter_info; +class Alter_table_ctx; +class Field; +class String; +class handler; +class partition_info; +struct TABLE; +struct TABLE_LIST; +typedef struct st_bitmap MY_BITMAP; +typedef struct st_key KEY; +typedef struct st_key_range key_range; + +/* Flags for partition handlers */ +#define HA_CAN_PARTITION (1 << 0) /* Partition support */ +#define HA_CAN_UPDATE_PARTITION_KEY (1 << 1) +#define HA_CAN_PARTITION_UNIQUE (1 << 2) +#define HA_USE_AUTO_PARTITION (1 << 3) +#define HA_ONLY_VERS_PARTITION (1 << 4) + +#define NORMAL_PART_NAME 0 +#define TEMP_PART_NAME 1 +#define RENAMED_PART_NAME 2 + +typedef struct st_lock_param_type +{ + TABLE_LIST *table_list; + ulonglong copied; + ulonglong deleted; + THD *thd; + HA_CREATE_INFO *create_info; + Alter_info *alter_info; + Alter_table_ctx *alter_ctx; + TABLE *table; + KEY *key_info_buffer; + LEX_CUSTRING org_tabledef_version; + uchar *pack_frm_data; + uint key_count; + uint db_options; + size_t pack_frm_len; + // TODO: remove duplicate data: part_info can be accessed via table->part_info + partition_info *part_info; +} ALTER_PARTITION_PARAM_TYPE; + +typedef struct { + longlong list_value; + uint32 partition_id; +} LIST_PART_ENTRY; + +typedef struct { + uint32 start_part; + uint32 end_part; +} part_id_range; + +class String_list; +struct st_partition_iter; +#define NOT_A_PARTITION_ID UINT_MAX32 + +bool is_partition_in_list(char *part_name, List list_part_names); +char *are_partitions_in_table(partition_info *new_part_info, + partition_info *old_part_info); +bool check_reorganise_list(partition_info *new_part_info, + partition_info *old_part_info, + List list_part_names); +handler *get_ha_partition(partition_info *part_info); +int get_part_for_buf(const uchar *buf, const uchar *rec0, + partition_info *part_info, uint32 *part_id); +void prune_partition_set(const TABLE *table, part_id_range *part_spec); +bool check_partition_info(partition_info *part_info,handlerton **eng_type, + TABLE *table, handler *file, HA_CREATE_INFO *info); +void set_linear_hash_mask(partition_info *part_info, uint num_parts); +bool fix_partition_func(THD *thd, TABLE *table, bool create_table_ind); +void get_partition_set(const TABLE *table, uchar *buf, const uint index, + const key_range *key_spec, + part_id_range *part_spec); +uint get_partition_field_store_length(Field *field); +void get_full_part_id_from_key(const TABLE *table, uchar *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec); +bool mysql_unpack_partition(THD *thd, char *part_buf, + uint part_info_len, + TABLE *table, bool is_create_table_ind, + handlerton *default_db_type, + bool *work_part_info_used); +void make_used_partitions_str(MEM_ROOT *mem_root, + partition_info *part_info, String *parts_str, + String_list &used_partitions_list); +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); +bool check_part_func_fields(Field **ptr, bool ok_with_charsets); +bool field_is_partition_charset(Field *field); +Item* convert_charset_partition_constant(Item *item, CHARSET_INFO *cs); +/** + Append all fields in read_set to string + + @param[in,out] str String to append to. + @param[in] row Row to append. + @param[in] table Table containing read_set and fields for the row. +*/ +void append_row_to_str(String &str, const uchar *row, TABLE *table); +void truncate_partition_filename(char *path); + +/* + A "Get next" function for partition iterator. + + SYNOPSIS + partition_iter_func() + part_iter Partition iterator, you call only "iter.get_next(&iter)" + + DESCRIPTION + Depending on whether partitions or sub-partitions are iterated, the + function returns next subpartition id/partition number. The sequence of + returned numbers is not ordered and may contain duplicates. + + When the end of sequence is reached, NOT_A_PARTITION_ID is returned, and + the iterator resets itself (so next get_next() call will start to + enumerate the set all over again). + + RETURN + NOT_A_PARTITION_ID if there are no more partitions. + [sub]partition_id of the next partition +*/ + +typedef uint32 (*partition_iter_func)(st_partition_iter* part_iter); + + +/* + Partition set iterator. Used to enumerate a set of [sub]partitions + obtained in partition interval analysis (see get_partitions_in_range_iter). + + For the user, the only meaningful field is get_next, which may be used as + follows: + part_iterator.get_next(&part_iterator); + + Initialization is done by any of the following calls: + - get_partitions_in_range_iter-type function call + - init_single_partition_iterator() + - init_all_partitions_iterator() + Cleanup is not needed. +*/ + +typedef struct st_partition_iter +{ + partition_iter_func get_next; + /* + Valid for "Interval mapping" in LIST partitioning: if true, let the + iterator also produce id of the partition that contains NULL value. + */ + bool ret_null_part, ret_null_part_orig; + /* + We should return DEFAULT partition. + */ + bool ret_default_part, ret_default_part_orig; + struct st_part_num_range + { + uint32 start; + uint32 cur; + uint32 end; + }; + + struct st_field_value_range + { + longlong start; + longlong cur; + longlong end; + }; + + union + { + struct st_part_num_range part_nums; + struct st_field_value_range field_vals; + }; + partition_info *part_info; +} PARTITION_ITERATOR; + + +/* + Get an iterator for set of partitions that match given field-space interval + + SYNOPSIS + get_partitions_in_range_iter() + part_info Partitioning info + is_subpart + store_length_array Length of fields packed in opt_range_key format + min_val Left edge, field value in opt_range_key format + max_val Right edge, field value in opt_range_key format + min_len Length of minimum value + max_len Length of maximum value + flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE, + NO_MAX_RANGE + part_iter Iterator structure to be initialized + + DESCRIPTION + Functions with this signature are used to perform "Partitioning Interval + Analysis". This analysis is applicable for any type of [sub]partitioning + by some function of a single fieldX. The idea is as follows: + Given an interval "const1 <=? fieldX <=? const2", find a set of partitions + that may contain records with value of fieldX within the given interval. + + The min_val, max_val and flags parameters specify the interval. + The set of partitions is returned by initializing an iterator in *part_iter + + NOTES + There are currently three functions of this type: + - get_part_iter_for_interval_via_walking + - get_part_iter_for_interval_cols_via_map + - get_part_iter_for_interval_via_mapping + + RETURN + 0 - No matching partitions, iterator not initialized + 1 - Some partitions would match, iterator intialized for traversing them + -1 - All partitions would match, iterator not initialized +*/ + +typedef int (*get_partitions_in_range_iter)(partition_info *part_info, + bool is_subpart, + uint32 *store_length_array, + uchar *min_val, uchar *max_val, + uint min_len, uint max_len, + uint flags, + PARTITION_ITERATOR *part_iter); + +#include "partition_info.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +uint fast_alter_partition_table(THD *thd, TABLE *table, + Alter_info *alter_info, + Alter_table_ctx *alter_ctx, + HA_CREATE_INFO *create_info, + TABLE_LIST *table_list); +bool set_part_state(Alter_info *alter_info, partition_info *tab_part_info, + enum partition_state part_state); +uint prep_alter_part_table(THD *thd, TABLE *table, Alter_info *alter_info, + HA_CREATE_INFO *create_info, + bool *partition_changed, + bool *fast_alter_table); +char *generate_partition_syntax(THD *thd, partition_info *part_info, + uint *buf_length, + bool show_partition_options, + HA_CREATE_INFO *create_info, + Alter_info *alter_info); +char *generate_partition_syntax_for_frm(THD *thd, partition_info *part_info, + uint *buf_length, + HA_CREATE_INFO *create_info, + Alter_info *alter_info); +bool verify_data_with_partition(TABLE *table, TABLE *part_table, + uint32 part_id); +bool compare_partition_options(HA_CREATE_INFO *table_create_info, + partition_element *part_elem); +bool compare_table_with_partition(THD *thd, TABLE *table, + TABLE *part_table, + partition_element *part_elem, + uint part_id); +bool partition_key_modified(TABLE *table, const MY_BITMAP *fields); +bool write_log_replace_frm(ALTER_PARTITION_PARAM_TYPE *lpt, + uint next_entry, + const char *from_path, + const char *to_path); + +#else +#define partition_key_modified(X,Y) 0 +#endif + +int __attribute__((warn_unused_result)) + create_partition_name(char *out, size_t outlen, const char *in1, const char + *in2, uint name_variant, bool translate); +int __attribute__((warn_unused_result)) + create_subpartition_name(char *out, size_t outlen, const char *in1, const + char *in2, const char *in3, uint name_variant); + +void set_key_field_ptr(KEY *key_info, const uchar *new_buf, + const uchar *old_buf); + +/** Set up table for creating a partition. +Copy info from partition to the table share so the created partition +has the correct info. + @param thd THD object + @param share Table share to be updated. + @param info Create info to be updated. + @param part_elem partition_element containing the info. + + @return status + @retval TRUE Error + @retval FALSE Success + + @details + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition +*/ +bool set_up_table_before_create(THD *thd, + TABLE_SHARE *share, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *part_elem); + +#endif /* SQL_PARTITION_INCLUDED */ + diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc new file mode 100644 index 00000000..d290d0f5 --- /dev/null +++ b/sql/sql_partition_admin.cc @@ -0,0 +1,1053 @@ +/* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2014, SkySQL Ab. + Copyright (c) 2016, 2018, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_parse.h" // check_one_table_access + // check_merge_table_access + // check_one_table_access +#include "sql_table.h" // mysql_alter_table, etc. +#include "sql_cmd.h" // Sql_cmd +#include "sql_alter.h" // Sql_cmd_alter_table +#include "sql_partition.h" // struct partition_info, etc. +#include "debug_sync.h" // DEBUG_SYNC +#include "sql_truncate.h" // mysql_truncate_table, + // Sql_cmd_truncate_table +#include "sql_admin.h" // Analyze/Check/.._table_statement +#include "sql_partition_admin.h" // Alter_table_*_partition +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" // ha_partition +#endif +#include "sql_base.h" // open_and_lock_tables +#include "ddl_log.h" +#include "wsrep_mysqld.h" + +#ifndef WITH_PARTITION_STORAGE_ENGINE + +bool Sql_cmd_partition_unsupported::execute(THD *) +{ + DBUG_ENTER("Sql_cmd_partition_unsupported::execute"); + /* error, partitioning support not compiled in... */ + my_error(ER_FEATURE_DISABLED, MYF(0), "partitioning", + "--with-plugin-partition"); + DBUG_RETURN(TRUE); +} + +#else + +static bool return_with_logging(THD *thd) +{ + if (thd->slave_thread && + write_bin_log_with_if_exists(thd, true, false, true)) + return(true); + my_ok(thd); + return(false); +} + + +bool Sql_cmd_alter_table_exchange_partition::execute(THD *thd) +{ + /* Moved from mysql_execute_command */ + LEX *lex= thd->lex; + /* first SELECT_LEX (have special meaning for many of non-SELECTcommands) */ + SELECT_LEX *select_lex= lex->first_select_lex(); + /* first table of first SELECT_LEX */ + TABLE_LIST *first_table= (TABLE_LIST*) select_lex->table_list.first; + /* + Code in mysql_alter_table() may modify its HA_CREATE_INFO argument, + so we have to use a copy of this structure to make execution + prepared statement- safe. A shallow copy is enough as no memory + referenced from this structure will be modified. + @todo move these into constructor... + */ + IF_DBUG(HA_CREATE_INFO create_info(lex->create_info);,) + Alter_info alter_info(lex->alter_info, thd->mem_root); + privilege_t priv_needed(ALTER_ACL | DROP_ACL | INSERT_ACL | CREATE_ACL); + + DBUG_ENTER("Sql_cmd_alter_table_exchange_partition::execute"); + + if (unlikely(thd->is_fatal_error)) + { + /* out of memory creating a copy of alter_info */ + DBUG_RETURN(TRUE); + } + + /* Must be set in the parser */ + DBUG_ASSERT(select_lex->db.str); + /* also check the table to be exchanged with the partition */ + DBUG_ASSERT(alter_info.partition_flags & ALTER_PARTITION_EXCHANGE); + + if (unlikely(check_access(thd, priv_needed, first_table->db.str, + &first_table->grant.privilege, + &first_table->grant.m_internal, + 0, 0)) || + unlikely(check_access(thd, priv_needed, first_table->next_local->db.str, + &first_table->next_local->grant.privilege, + &first_table->next_local->grant.m_internal, + 0, 0))) + DBUG_RETURN(TRUE); + + if (unlikely(check_grant(thd, priv_needed, first_table, FALSE, UINT_MAX, + FALSE))) + DBUG_RETURN(TRUE); + + /* Not allowed with EXCHANGE PARTITION */ + DBUG_ASSERT(!create_info.data_file_name && !create_info.index_file_name); + WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); + + DBUG_RETURN(exchange_partition(thd, first_table, &alter_info)); +#ifdef WITH_WSREP + wsrep_error_label: + /* handle errors in TO_ISOLATION here */ + DBUG_RETURN(true); +#endif /* WITH_WSREP */ +} + + +/** + @brief Checks that the tables will be able to be used for EXCHANGE PARTITION. + @param table Non partitioned table. + @param part_table Partitioned table. + + @retval FALSE if OK, otherwise error is reported and TRUE is returned. +*/ + +static bool check_exchange_partition(TABLE *table, TABLE *part_table) +{ + DBUG_ENTER("check_exchange_partition"); + + /* Both tables must exist */ + if (unlikely(!part_table || !table)) + { + my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* The first table must be partitioned, and the second must not */ + if (unlikely(!part_table->part_info)) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + if (unlikely(table->part_info)) + { + my_error(ER_PARTITION_EXCHANGE_PART_TABLE, MYF(0), + table->s->table_name.str); + DBUG_RETURN(TRUE); + } + + if (unlikely(part_table->file->ht != partition_hton)) + { + /* + Only allowed on partitioned tables throught the generic ha_partition + handler, i.e not yet for native partitioning. + */ + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (unlikely(table->file->ht != part_table->part_info->default_engine_type)) + { + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* Verify that table is not tmp table, partitioned tables cannot be tmp. */ + if (unlikely(table->s->tmp_table != NO_TMP_TABLE)) + { + my_error(ER_PARTITION_EXCHANGE_TEMP_TABLE, MYF(0), + table->s->table_name.str); + DBUG_RETURN(TRUE); + } + + /* The table cannot have foreign keys constraints or be referenced */ + if (unlikely(!table->file->can_switch_engines())) + { + my_error(ER_PARTITION_EXCHANGE_FOREIGN_KEY, MYF(0), + table->s->table_name.str); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/** + @brief Compare table structure/options between a non partitioned table + and a specific partition of a partitioned table. + + @param thd Thread object. + @param table Non partitioned table. + @param part_table Partitioned table. + @param part_elem Partition element to use for partition specific compare. +*/ +bool compare_table_with_partition(THD *thd, TABLE *table, TABLE *part_table, + partition_element *part_elem, uint part_id) +{ + HA_CREATE_INFO table_create_info; + Table_specification_st part_create_info; + Alter_info part_alter_info; + Alter_table_ctx part_alter_ctx; // Not used + DBUG_ENTER("compare_table_with_partition"); + + bool metadata_equal= false; + part_create_info.init(); + table_create_info.init(); + + update_create_info_from_table(&table_create_info, table); + /* get the current auto_increment value */ + table->file->update_create_info(&table_create_info); + /* mark all columns used, since they are used when preparing the new table */ + part_table->use_all_columns(); + table->use_all_columns(); + if (unlikely(mysql_prepare_alter_table(thd, part_table, &part_create_info, + &part_alter_info, &part_alter_ctx))) + { + my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); + DBUG_RETURN(TRUE); + } + /* db_type is not set in prepare_alter_table */ + part_create_info.db_type= part_table->part_info->default_engine_type; + ((ha_partition*)(part_table->file))->update_part_create_info(&part_create_info, part_id); + /* + Since we exchange the partition with the table, allow exchanging + auto_increment value as well. + */ + part_create_info.auto_increment_value= + table_create_info.auto_increment_value; + + /* Check compatible row_types and set create_info accordingly. */ + { + enum row_type part_row_type= part_table->file->get_row_type(); + enum row_type table_row_type= table->file->get_row_type(); + if (part_row_type != table_row_type) + { + my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), + "ROW_FORMAT"); + DBUG_RETURN(true); + } + part_create_info.row_type= table->s->row_type; + } + + /* + NOTE: ha_blackhole does not support check_if_compatible_data, + so this always fail for blackhole tables. + ha_myisam compares pointers to verify that DATA/INDEX DIRECTORY is + the same, so any table using data/index_file_name will fail. + */ + if (mysql_compare_tables(table, &part_alter_info, &part_create_info, + &metadata_equal)) + { + my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); + DBUG_RETURN(TRUE); + } + + DEBUG_SYNC(thd, "swap_partition_after_compare_tables"); + if (!metadata_equal) + { + my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (table->s->db_create_options != part_table->s->db_create_options) + { + my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); + DBUG_RETURN(TRUE); + } + + DBUG_ASSERT(table->s->db_options_in_use == + part_table->s->db_options_in_use); + + if (table_create_info.avg_row_length != part_create_info.avg_row_length) + { + my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), + "AVG_ROW_LENGTH"); + DBUG_RETURN(TRUE); + } + + if (table_create_info.table_options != part_create_info.table_options) + { + my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), + "TABLE OPTION"); + DBUG_RETURN(TRUE); + } + + if (table->s->table_charset != part_table->s->table_charset) + { + my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), + "CHARACTER SET"); + DBUG_RETURN(TRUE); + } + + /* + NOTE: We do not support update of frm-file, i.e. change + max/min_rows, data/index_file_name etc. + The workaround is to use REORGANIZE PARTITION to rewrite + the frm file and then use EXCHANGE PARTITION when they are the same. + */ + if (part_elem && compare_partition_options(&table_create_info, part_elem)) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +/** + @brief Exchange partition/table with ddl log. + + @details How to handle a crash in the middle of the rename (break on error): + 1) register in ddl_log that we are going to exchange swap_table with part. + 2) do the first rename (swap_table -> tmp-name) and sync the ddl_log. + 3) do the second rename (part -> swap_table) and sync the ddl_log. + 4) do the last rename (tmp-name -> part). + 5) mark the entry done. + + Recover by: + 5) is done, All completed. Nothing to recover. + 4) is done see 3). (No mark or sync in the ddl_log...) + 3) is done -> try rename part -> tmp-name (ignore failure) goto 2). + 2) is done -> try rename swap_table -> part (ignore failure) goto 1). + 1) is done -> try rename tmp-name -> swap_table (ignore failure). + before 1) Nothing to recover... + + @param thd Thread handle + @param name name of table/partition 1 (to be exchanged with 2) + @param from_name name of table/partition 2 (to be exchanged with 1) + @param tmp_name temporary name to use while exchaning + @param ht handlerton of the table/partitions + + @return Operation status + @retval TRUE Error + @retval FALSE Success + + @note ha_heap always succeeds in rename (since it is created upon usage). + This is OK when to recover from a crash since all heap are empty and the + recover is done early in the startup of the server (right before + read_init_file which can populate the tables). + + And if no crash we can trust the syncs in the ddl_log. + + What about if the rename is put into a background thread? That will cause + corruption and is avoided by the exlusive metadata lock. +*/ +static bool exchange_name_with_ddl_log(THD *thd, + const char *name, + const char *from_name, + const char *tmp_name, + handlerton *ht) +{ + DDL_LOG_ENTRY exchange_entry; + DDL_LOG_MEMORY_ENTRY *log_entry= NULL; + DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL; + bool error= TRUE; + bool error_set= FALSE; + handler *file= NULL; + DBUG_ENTER("exchange_name_with_ddl_log"); + + if (unlikely(!(file= get_new_handler(NULL, thd->mem_root, ht)))) + DBUG_RETURN(TRUE); + + /* prepare the action entry */ + bzero(&exchange_entry, sizeof(exchange_entry)); + exchange_entry.entry_type= DDL_LOG_ENTRY_CODE; + exchange_entry.action_type= DDL_LOG_EXCHANGE_ACTION; + lex_string_set(&exchange_entry.name, name); + lex_string_set(&exchange_entry.from_name, from_name); + lex_string_set(&exchange_entry.tmp_name, tmp_name); + lex_string_set(&exchange_entry.handler_name, + ha_resolve_storage_engine_name(ht)); + exchange_entry.phase= EXCH_PHASE_NAME_TO_TEMP; + + mysql_mutex_lock(&LOCK_gdl); + /* + write to the ddl log what to do by: + 1) write the action entry (i.e. which names to be exchanged) + 2) write the execution entry with a link to the action entry + */ + DBUG_EXECUTE_IF("exchange_partition_fail_1", goto err_no_action_written;); + DBUG_EXECUTE_IF("exchange_partition_abort_1", DBUG_SUICIDE();); + if (unlikely(ddl_log_write_entry(&exchange_entry, &log_entry))) + goto err_no_action_written; + + DBUG_EXECUTE_IF("exchange_partition_fail_2", goto err_no_execute_written;); + DBUG_EXECUTE_IF("exchange_partition_abort_2", DBUG_SUICIDE();); + if (unlikely(ddl_log_write_execute_entry(log_entry->entry_pos, + &exec_log_entry))) + goto err_no_execute_written; + /* ddl_log is written and synced */ + + mysql_mutex_unlock(&LOCK_gdl); + /* + Execute the name exchange. + Do one rename, increase the phase, update the action entry and sync. + In case of errors in the ddl_log we must fail and let the ddl_log try + to revert the changes, since otherwise it could revert the command after + we sent OK to the client. + */ + /* call rename table from table to tmp-name */ + DBUG_EXECUTE_IF("exchange_partition_fail_3", + my_error(ER_ERROR_ON_RENAME, MYF(0), name, tmp_name, 0); + error_set= TRUE; + goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_3", DBUG_SUICIDE();); + if (unlikely(file->ha_rename_table(name, tmp_name))) + { + my_error(ER_ERROR_ON_RENAME, MYF(0), name, tmp_name, my_errno); + error_set= TRUE; + goto err_rename; + } + DBUG_EXECUTE_IF("exchange_partition_fail_4", goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_4", DBUG_SUICIDE();); + if (unlikely(ddl_log_increment_phase(log_entry->entry_pos))) + goto err_rename; + + /* call rename table from partition to table */ + DBUG_EXECUTE_IF("exchange_partition_fail_5", + my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, name, 0); + error_set= TRUE; + goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_5", DBUG_SUICIDE();); + if (unlikely(file->ha_rename_table(from_name, name))) + { + my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, name, my_errno); + error_set= TRUE; + goto err_rename; + } + DBUG_EXECUTE_IF("exchange_partition_fail_6", goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_6", DBUG_SUICIDE();); + if (unlikely(ddl_log_increment_phase(log_entry->entry_pos))) + goto err_rename; + + /* call rename table from tmp-nam to partition */ + DBUG_EXECUTE_IF("exchange_partition_fail_7", + my_error(ER_ERROR_ON_RENAME, MYF(0), tmp_name, from_name, 0); + error_set= TRUE; + goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_7", DBUG_SUICIDE();); + if (unlikely(file->ha_rename_table(tmp_name, from_name))) + { + my_error(ER_ERROR_ON_RENAME, MYF(0), tmp_name, from_name, my_errno); + error_set= TRUE; + goto err_rename; + } + DBUG_EXECUTE_IF("exchange_partition_fail_8", goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_8", DBUG_SUICIDE();); + if (unlikely(ddl_log_increment_phase(log_entry->entry_pos))) + goto err_rename; + + /* The exchange is complete and ddl_log is deactivated */ + DBUG_EXECUTE_IF("exchange_partition_fail_9", goto err_rename;); + DBUG_EXECUTE_IF("exchange_partition_abort_9", DBUG_SUICIDE();); + /* all OK */ + error= FALSE; + delete file; + DBUG_RETURN(error); +err_rename: + /* + Nothing to do if any of these commands fails :( the commands itselfs + will log to the error log about the failures... + */ + /* execute the ddl log entry to revert the renames */ + (void) ddl_log_execute_entry(current_thd, log_entry->entry_pos); + mysql_mutex_lock(&LOCK_gdl); + /* mark the execute log entry done */ + (void) ddl_log_disable_execute_entry(&exec_log_entry); + /* release the execute log entry */ + (void) ddl_log_release_memory_entry(exec_log_entry); +err_no_execute_written: + /* release the action log entry */ + (void) ddl_log_release_memory_entry(log_entry); +err_no_action_written: + mysql_mutex_unlock(&LOCK_gdl); + delete file; + if (!error_set) + my_error(ER_DDL_LOG_ERROR, MYF(0)); + DBUG_RETURN(error); +} + + +/** + @brief Swap places between a partition and a table. + + @details Verify that the tables are compatible (same engine, definition etc), + verify that all rows in the table will fit in the partition, + if all OK, rename table to tmp name, rename partition to table + and finally rename tmp name to partition. + + 1) Take upgradable mdl, open tables and then lock them (inited in parse) + 2) Verify that metadata matches + 3) verify data + 4) Upgrade to exclusive mdl for both tables + 5) Rename table <-> partition + 6) Rely on close_thread_tables to release mdl and table locks + + @param thd Thread handle + @param table_list Table where the partition exists as first table, + Table to swap with the partition as second table + @param alter_info Contains partition name to swap + + @note This is a DDL operation so triggers will not be used. +*/ +bool Sql_cmd_alter_table_exchange_partition:: + exchange_partition(THD *thd, TABLE_LIST *table_list, Alter_info *alter_info) +{ + TABLE *part_table, *swap_table; + TABLE_LIST *swap_table_list; + handlerton *table_hton; + partition_element *part_elem; + const char *partition_name; + char temp_name[FN_REFLEN+1]; + char part_file_name[2*FN_REFLEN+1]; + char swap_file_name[FN_REFLEN+1]; + char temp_file_name[FN_REFLEN+1]; + char part_table_name[NAME_LEN + 1]; + char part_db[NAME_LEN + 1]; + char swap_table_name[NAME_LEN + 1]; + char swap_db[NAME_LEN + 1]; + uchar part_tabledef_version[MY_UUID_SIZE]; + uchar swap_tabledef_version[MY_UUID_SIZE]; + + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + + uint swap_part_id; + uint part_file_name_len; + Alter_table_prelocking_strategy alter_prelocking_strategy; + MDL_ticket *swap_table_mdl_ticket= NULL; + MDL_ticket *part_table_mdl_ticket= NULL; + uint table_counter; + bool error= TRUE, force_if_exists= 0; + ulonglong save_option_bits= thd->variables.option_bits; + DBUG_ENTER("mysql_exchange_partition"); + DBUG_ASSERT(alter_info->partition_flags & ALTER_PARTITION_EXCHANGE); + + /* Don't allow to exchange with log table */ + swap_table_list= table_list->next_local; + if (check_if_log_table(swap_table_list, FALSE, "ALTER PARTITION")) + DBUG_RETURN(TRUE); + + /* + Currently no MDL lock that allows both read and write and is upgradeable + to exclusive, so leave the lock type to TL_WRITE_ALLOW_READ also on the + partitioned table. + + TODO: add MDL lock that allows both read and write and is upgradable to + exclusive lock. This would allow to continue using the partitioned table + also with update/insert/delete while the verification of the swap table + is running. + */ + + /* + NOTE: It is not possible to exchange a crashed partition/table since + we need some info from the engine, which we can only access after open, + to be able to verify the structure/metadata. + */ + table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE); + if (unlikely(open_tables(thd, &table_list, &table_counter, 0, + &alter_prelocking_strategy))) + { + if (thd->lex->if_exists() && + thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE) + { + /* + ALTER TABLE IF EXISTS was used on not existing table + We have to log the query on a slave as the table may be a shared one + from the master and we need to ensure that the next slave can see + the statement as this slave may not have the table shared + */ + thd->clear_error(); + if (thd->slave_thread && + write_bin_log(thd, true, thd->query(), thd->query_length())) + DBUG_RETURN(true); + my_ok(thd); + DBUG_RETURN(false); + } + DBUG_RETURN(true); + } + + part_table= table_list->table; + swap_table= swap_table_list->table; + + /* Don't allow to exchange with a VIEW */ + if (unlikely(swap_table_list->view)) + { + my_error(ER_WRONG_OBJECT, MYF(0), table_list->db.str, + swap_table_list->table_name.str, "BASE TABLE"); + DBUG_RETURN(TRUE); + } + + if (unlikely(check_exchange_partition(swap_table, part_table))) + DBUG_RETURN(TRUE); + + if (part_table->file->check_if_updates_are_ignored("ALTER")) + DBUG_RETURN(return_with_logging(thd)); + + /* Add IF EXISTS to binlog if shared table */ + if (part_table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + force_if_exists= 1; + + ddl_log.org_table.str= part_table_name; + DBUG_ASSERT(part_table->s->table_name.length <= NAME_LEN); + ddl_log.org_table.length= part_table->s->table_name.length; + strmake(part_table_name, part_table->s->table_name.str, NAME_LEN); + + ddl_log.org_database.str= part_db; + DBUG_ASSERT(part_table->s->db.length <= NAME_LEN); + ddl_log.org_database.length= part_table->s->db.length; + strmake(part_db, part_table->s->db.str, NAME_LEN); + + ddl_log.new_table.str= swap_table_name; + DBUG_ASSERT(swap_table->s->table_name.length <= NAME_LEN); + ddl_log.new_table.length= swap_table->s->table_name.length; + strmake(swap_table_name, swap_table->s->table_name.str, NAME_LEN); + + ddl_log.new_database.str= swap_db; + DBUG_ASSERT(swap_table->s->db.length <= NAME_LEN); + ddl_log.new_database.length= swap_table->s->db.length; + strmake(swap_db, swap_table->s->db.str, NAME_LEN); + + memcpy(part_tabledef_version, part_table->s->tabledef_version.str, + MY_UUID_SIZE); + ddl_log.org_table_id.str= part_tabledef_version; + ddl_log.org_table_id.length= MY_UUID_SIZE; + memcpy(swap_tabledef_version, swap_table->s->tabledef_version.str, + MY_UUID_SIZE); + ddl_log.new_table_id.str= swap_tabledef_version; + ddl_log.new_table_id.length= MY_UUID_SIZE; + + /* set lock pruning on first table */ + partition_name= alter_info->partition_names.head(); + if (unlikely(table_list->table->part_info-> + set_named_partition_bitmap(partition_name, + strlen(partition_name)))) + DBUG_RETURN(true); + + if (unlikely(lock_tables(thd, table_list, table_counter, 0))) + DBUG_RETURN(true); + + table_hton= swap_table->file->ht; + + THD_STAGE_INFO(thd, stage_verifying_table); + + /* Will append the partition name later in part_info->get_part_elem() */ + part_file_name_len= build_table_filename(part_file_name, + sizeof(part_file_name), + table_list->db.str, + table_list->table_name.str, + "", 0); + build_table_filename(swap_file_name, + sizeof(swap_file_name), + swap_table_list->db.str, + swap_table_list->table_name.str, + "", 0); + /* create a unique temp name */ + my_snprintf(temp_name, sizeof(temp_name), "%s-exchange-%lx-%llx", + tmp_file_prefix, current_pid, thd->thread_id); + if (lower_case_table_names) + my_casedn_str(files_charset_info, temp_name); + build_table_filename(temp_file_name, sizeof(temp_file_name), + table_list->next_local->db.str, + temp_name, "", FN_IS_TMP); + + if (unlikely(!(part_elem= + part_table->part_info->get_part_elem(partition_name, + part_file_name + + part_file_name_len, + sizeof(part_file_name) - + part_file_name_len, + &swap_part_id)))) + { + DBUG_RETURN(TRUE); + } + + if (unlikely(swap_part_id == NOT_A_PARTITION_ID)) + { + DBUG_ASSERT(part_table->part_info->is_sub_partitioned()); + my_error(ER_PARTITION_INSTEAD_OF_SUBPARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (unlikely(compare_table_with_partition(thd, swap_table, part_table, + part_elem, + swap_part_id))) + DBUG_RETURN(TRUE); + + /* Table and partition has same structure/options, OK to exchange */ + + thd_proc_info(thd, "Verifying data with partition"); + + if (unlikely(verify_data_with_partition(swap_table, part_table, + swap_part_id))) + DBUG_RETURN(TRUE); + + /* + Get exclusive mdl lock on both tables, alway the non partitioned table + first. Remember the tickets for downgrading locks later. + */ + swap_table_mdl_ticket= swap_table->mdl_ticket; + part_table_mdl_ticket= part_table->mdl_ticket; + + /* + No need to set used_partitions to only propagate + HA_EXTRA_PREPARE_FOR_RENAME to one part since no built in engine uses + that flag. And the action would probably be to force close all other + instances which is what we are doing any way. + */ + if (wait_while_table_is_used(thd, swap_table, HA_EXTRA_PREPARE_FOR_RENAME) || + wait_while_table_is_used(thd, part_table, HA_EXTRA_PREPARE_FOR_RENAME)) + goto err; + + DEBUG_SYNC(thd, "swap_partition_after_wait"); + + close_all_tables_for_name(thd, swap_table->s, HA_EXTRA_NOT_USED, NULL); + close_all_tables_for_name(thd, part_table->s, HA_EXTRA_NOT_USED, NULL); + + DEBUG_SYNC(thd, "swap_partition_before_rename"); + + if (unlikely(exchange_name_with_ddl_log(thd, swap_file_name, part_file_name, + temp_file_name, table_hton))) + goto err; + + /* + Reopen tables under LOCK TABLES. Ignore the return value for now. It's + better to keep master/slave in consistent state. Alternative would be to + try to revert the exchange operation and issue error. + */ + (void) thd->locked_tables_list.reopen_tables(thd, false); + + if (force_if_exists) + thd->variables.option_bits|= OPTION_IF_EXISTS; + + if (unlikely((error= write_bin_log(thd, TRUE, thd->query(), + thd->query_length())))) + { + /* + The error is reported in write_bin_log(). + We try to revert to make it easier to keep the master/slave in sync. + */ + (void) exchange_name_with_ddl_log(thd, part_file_name, swap_file_name, + temp_file_name, table_hton); + } + else + { + ddl_log.query= { C_STRING_WITH_LEN("EXCHANGE_PARTITION") }; + ddl_log.org_partitioned= true; + ddl_log.new_partitioned= false; + ddl_log.org_storage_engine_name= *hton_name(table_hton); + ddl_log.new_storage_engine_name= *hton_name(table_hton); + backup_log_ddl(&ddl_log); + } + thd->variables.option_bits= save_option_bits; + +err: + if (thd->locked_tables_mode) + { + if (swap_table_mdl_ticket) + swap_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + if (part_table_mdl_ticket) + part_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + } + + if (unlikely(!error)) + my_ok(thd); + + // For query cache + table_list->table= NULL; + table_list->next_local->table= NULL; + query_cache_invalidate3(thd, table_list, FALSE); + + DBUG_RETURN(error); +} + +bool Sql_cmd_alter_table_analyze_partition::execute(THD *thd) +{ + bool res; + DBUG_ENTER("Sql_cmd_alter_table_analyze_partition::execute"); + + /* + Flag that it is an ALTER command which administrates partitions, used + by ha_partition + */ + thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; + + res= Sql_cmd_analyze_table::execute(thd); + + DBUG_RETURN(res); +} + + +bool Sql_cmd_alter_table_check_partition::execute(THD *thd) +{ + bool res; + DBUG_ENTER("Sql_cmd_alter_table_check_partition::execute"); + + /* + Flag that it is an ALTER command which administrates partitions, used + by ha_partition + */ + thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; + + res= Sql_cmd_check_table::execute(thd); + + DBUG_RETURN(res); +} + + +bool Sql_cmd_alter_table_optimize_partition::execute(THD *thd) +{ + bool res; + DBUG_ENTER("Alter_table_optimize_partition_statement::execute"); + + /* + Flag that it is an ALTER command which administrates partitions, used + by ha_partition + */ + thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; + + res= Sql_cmd_optimize_table::execute(thd); + + DBUG_RETURN(res); +} + + +bool Sql_cmd_alter_table_repair_partition::execute(THD *thd) +{ + bool res; + DBUG_ENTER("Sql_cmd_alter_table_repair_partition::execute"); + + /* + Flag that it is an ALTER command which administrates partitions, used + by ha_partition + */ + thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; + + res= Sql_cmd_repair_table::execute(thd); + + DBUG_RETURN(res); +} + + +bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) +{ + int error; + ha_partition *partition; + ulong timeout= thd->variables.lock_wait_timeout; + TABLE_LIST *first_table= thd->lex->first_select_lex()->table_list.first; + Alter_info *alter_info= &thd->lex->alter_info; + uint table_counter, i; + List partition_names_list; + bool binlog_stmt, force_if_exists= 0; + DBUG_ENTER("Sql_cmd_alter_table_truncate_partition::execute"); + + /* + Flag that it is an ALTER command which administrates partitions, used + by ha_partition. + */ + thd->lex->alter_info.partition_flags|= (ALTER_PARTITION_ADMIN | + ALTER_PARTITION_TRUNCATE); + + /* Fix the lock types (not the same as ordinary ALTER TABLE). */ + first_table->lock_type= TL_WRITE; + first_table->mdl_request.set_type(MDL_EXCLUSIVE); + + /* + Check table permissions and open it with a exclusive lock. + Ensure it is a partitioned table and finally, upcast the + handler and invoke the partition truncate method. Lastly, + write the statement to the binary log if necessary. + */ + + if (check_one_table_access(thd, DROP_ACL, first_table)) + DBUG_RETURN(TRUE); + +#ifdef WITH_WSREP + if (WSREP(thd) && + (!thd->is_current_stmt_binlog_format_row() || + !thd->find_temporary_table(first_table)) && + wsrep_to_isolation_begin( + thd, first_table->db.str, first_table->table_name.str, NULL) + ) + { + WSREP_WARN("ALTER TABLE TRUNCATE PARTITION isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ + + if (open_tables(thd, &first_table, &table_counter, 0)) + { + if (thd->lex->if_exists() && + thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE) + { + /* + ALTER TABLE IF EXISTS was used on not existing table + We have to log the query on a slave as the table may be a shared one + from the master and we need to ensure that the next slave can see + the statement as this slave may not have the table shared + */ + thd->clear_error(); + DBUG_RETURN(return_with_logging(thd)); + } + DBUG_RETURN(TRUE); + } + + if (!first_table->table || first_table->view) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (first_table->table->file->check_if_updates_are_ignored("ALTER")) + DBUG_RETURN(return_with_logging(thd)); + + if (first_table->table->s->db_type() != partition_hton) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (first_table->table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + force_if_exists= 1; + + /* + Prune all, but named partitions, + to avoid excessive calls to external_lock(). + */ + List_iterator partition_names_it(alter_info->partition_names); + uint num_names= alter_info->partition_names.elements; + for (i= 0; i < num_names; i++) + { + const char *partition_name= partition_names_it++; + String *str_partition_name= new (thd->mem_root) + String(partition_name, strlen(partition_name), system_charset_info); + if (!str_partition_name) + DBUG_RETURN(true); + partition_names_list.push_back(str_partition_name, thd->mem_root); + } + if (first_table->table-> + part_info->set_partition_bitmaps(&partition_names_list)) + DBUG_RETURN(true); + + if (lock_tables(thd, first_table, table_counter, 0)) + DBUG_RETURN(true); + + /* + Under locked table modes this might still not be an exclusive + lock. Hence, upgrade the lock since the handler truncate method + mandates an exclusive metadata lock. + */ + MDL_ticket *ticket= first_table->table->mdl_ticket; + if (thd->mdl_context.upgrade_shared_lock(ticket, MDL_EXCLUSIVE, timeout)) + DBUG_RETURN(TRUE); + + first_table->table->s->tdc->flush(thd, true); + + partition= (ha_partition*) first_table->table->file; + /* Invoke the handler method responsible for truncating the partition. */ + if (unlikely(error= partition->truncate_partition(alter_info, + &binlog_stmt))) + partition->print_error(error, MYF(0)); + + /* + All effects of a truncate operation are committed even if the + operation fails. Thus, the query must be written to the binary + log. The exception is a unimplemented truncate method or failure + before any call to handler::truncate() is done. + Also, it is logged in statement format, regardless of the binlog format. + + Since we've changed data within the table, we also have to invalidate + the query cache for it. + */ + if (likely(error != HA_ERR_WRONG_COMMAND)) + { + ulonglong save_option_bits= thd->variables.option_bits; + if (force_if_exists) + thd->variables.option_bits|= OPTION_IF_EXISTS; + + query_cache_invalidate3(thd, first_table, FALSE); + if (binlog_stmt) + error|= write_bin_log(thd, !error, thd->query(), thd->query_length()); + thd->variables.option_bits= save_option_bits; + } + + /* + A locked table ticket was upgraded to a exclusive lock. After the + the query has been written to the binary log, downgrade the lock + to a shared one. + */ + if (thd->locked_tables_mode) + ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + + if (likely(!error)) + my_ok(thd); + + // Invalidate query cache + DBUG_ASSERT(!first_table->next_local); + query_cache_invalidate3(thd, first_table, FALSE); + + DBUG_RETURN(error); +} + + +/** + Move a table specified in the CONVERT TABLE TO PARTITION ... + to the new partition. + + @param lpt A structure containing parameters regarding to the statement + ALTER TABLE ... TO PARTITION ... + @param part_file_name a file name of the partition being added + + @return false on success, true on error +*/ + +bool alter_partition_convert_in(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + char part_file_name[2*FN_REFLEN+1]; + THD *thd= lpt->thd; + const char *path= lpt->table_list->table->s->path.str; + TABLE_LIST *table_from= lpt->table_list->next_local; + + const char *partition_name= + thd->lex->part_info->curr_part_elem->partition_name; + + if (create_partition_name(part_file_name, sizeof(part_file_name), path, + partition_name, NORMAL_PART_NAME, false)) + return true; + + char from_file_name[FN_REFLEN+1]; + + build_table_filename(from_file_name, sizeof(from_file_name), + table_from->db.str, table_from->table_name.str, "", 0); + + handler *file= get_new_handler(nullptr, thd->mem_root, + table_from->table->file->ht); + if (unlikely(!file)) + return true; + + close_all_tables_for_name(thd, table_from->table->s, + HA_EXTRA_PREPARE_FOR_RENAME, nullptr); + + bool res= file->ha_rename_table(from_file_name, part_file_name); + + if (res) + my_error(ER_ERROR_ON_RENAME, MYF(0), from_file_name, + part_file_name, my_errno); + + delete file; + return res; +} + +#endif /* WITH_PARTITION_STORAGE_ENGINE */ diff --git a/sql/sql_partition_admin.h b/sql/sql_partition_admin.h new file mode 100644 index 00000000..b50c3555 --- /dev/null +++ b/sql/sql_partition_admin.h @@ -0,0 +1,262 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_PARTITION_ADMIN_H +#define SQL_PARTITION_ADMIN_H + +#ifndef WITH_PARTITION_STORAGE_ENGINE + +/** + Stub class that returns a error if the partition storage engine is + not supported. +*/ +class Sql_cmd_partition_unsupported : public Sql_cmd +{ +public: + Sql_cmd_partition_unsupported() + {} + + ~Sql_cmd_partition_unsupported() + {} + + /* Override SQLCOM_*, since it is an ALTER command */ + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_ALTER_TABLE; + } + + bool execute(THD *thd); +}; + + +class Sql_cmd_alter_table_exchange_partition : + public Sql_cmd_partition_unsupported +{ +public: + Sql_cmd_alter_table_exchange_partition() + {} + + ~Sql_cmd_alter_table_exchange_partition() + {} +}; + + +class Sql_cmd_alter_table_analyze_partition : + public Sql_cmd_partition_unsupported +{ +public: + Sql_cmd_alter_table_analyze_partition() + {} + + ~Sql_cmd_alter_table_analyze_partition() + {} +}; + + +class Sql_cmd_alter_table_check_partition : + public Sql_cmd_partition_unsupported +{ +public: + Sql_cmd_alter_table_check_partition() + {} + + ~Sql_cmd_alter_table_check_partition() + {} +}; + + +class Sql_cmd_alter_table_optimize_partition : + public Sql_cmd_partition_unsupported +{ +public: + Sql_cmd_alter_table_optimize_partition() + {} + + ~Sql_cmd_alter_table_optimize_partition() + {} +}; + + +class Sql_cmd_alter_table_repair_partition : + public Sql_cmd_partition_unsupported +{ +public: + Sql_cmd_alter_table_repair_partition() + {} + + ~Sql_cmd_alter_table_repair_partition() + {} +}; + + +class Sql_cmd_alter_table_truncate_partition : + public Sql_cmd_partition_unsupported +{ +public: + Sql_cmd_alter_table_truncate_partition() + {} + + ~Sql_cmd_alter_table_truncate_partition() + {} +}; + +#else + +/** + Class that represents the ALTER TABLE t1 ANALYZE PARTITION p statement. +*/ +class Sql_cmd_alter_table_exchange_partition : public Sql_cmd_common_alter_table +{ +public: + /** + Constructor, used to represent a ALTER TABLE EXCHANGE PARTITION statement. + */ + Sql_cmd_alter_table_exchange_partition() + : Sql_cmd_common_alter_table() + {} + + ~Sql_cmd_alter_table_exchange_partition() = default; + + bool execute(THD *thd); + +private: + bool exchange_partition(THD *thd, TABLE_LIST *, Alter_info *); +}; + + +/** + Class that represents the ALTER TABLE t1 ANALYZE PARTITION p statement. +*/ +class Sql_cmd_alter_table_analyze_partition : public Sql_cmd_analyze_table +{ +public: + /** + Constructor, used to represent a ALTER TABLE ANALYZE PARTITION statement. + */ + Sql_cmd_alter_table_analyze_partition() + : Sql_cmd_analyze_table() + {} + + ~Sql_cmd_alter_table_analyze_partition() = default; + + bool execute(THD *thd); + + /* Override SQLCOM_ANALYZE, since it is an ALTER command */ + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_ALTER_TABLE; + } +}; + + +/** + Class that represents the ALTER TABLE t1 CHECK PARTITION p statement. +*/ +class Sql_cmd_alter_table_check_partition : public Sql_cmd_check_table +{ +public: + /** + Constructor, used to represent a ALTER TABLE CHECK PARTITION statement. + */ + Sql_cmd_alter_table_check_partition() + : Sql_cmd_check_table() + {} + + ~Sql_cmd_alter_table_check_partition() = default; + + bool execute(THD *thd); + + /* Override SQLCOM_CHECK, since it is an ALTER command */ + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_ALTER_TABLE; + } +}; + + +/** + Class that represents the ALTER TABLE t1 OPTIMIZE PARTITION p statement. +*/ +class Sql_cmd_alter_table_optimize_partition : public Sql_cmd_optimize_table +{ +public: + /** + Constructor, used to represent a ALTER TABLE OPTIMIZE PARTITION statement. + */ + Sql_cmd_alter_table_optimize_partition() + : Sql_cmd_optimize_table() + {} + + ~Sql_cmd_alter_table_optimize_partition() = default; + + bool execute(THD *thd); + + /* Override SQLCOM_OPTIMIZE, since it is an ALTER command */ + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_ALTER_TABLE; + } +}; + + +/** + Class that represents the ALTER TABLE t1 REPAIR PARTITION p statement. +*/ +class Sql_cmd_alter_table_repair_partition : public Sql_cmd_repair_table +{ +public: + /** + Constructor, used to represent a ALTER TABLE REPAIR PARTITION statement. + */ + Sql_cmd_alter_table_repair_partition() + : Sql_cmd_repair_table() + {} + + ~Sql_cmd_alter_table_repair_partition() = default; + + bool execute(THD *thd); + + /* Override SQLCOM_REPAIR, since it is an ALTER command */ + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_ALTER_TABLE; + } +}; + + +/** + Class that represents the ALTER TABLE t1 TRUNCATE PARTITION p statement. +*/ +class Sql_cmd_alter_table_truncate_partition : public Sql_cmd_truncate_table +{ +public: + /** + Constructor, used to represent a ALTER TABLE TRUNCATE PARTITION statement. + */ + Sql_cmd_alter_table_truncate_partition() = default; + + virtual ~Sql_cmd_alter_table_truncate_partition() = default; + + bool execute(THD *thd); + + /* Override SQLCOM_TRUNCATE, since it is an ALTER command */ + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_ALTER_TABLE; + } +}; + +#endif /* WITH_PARTITION_STORAGE_ENGINE */ +#endif /* SQL_PARTITION_ADMIN_H */ diff --git a/sql/sql_plist.h b/sql/sql_plist.h new file mode 100644 index 00000000..7f75208c --- /dev/null +++ b/sql/sql_plist.h @@ -0,0 +1,297 @@ +#ifndef SQL_PLIST_H +#define SQL_PLIST_H +/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +template +class I_P_List_iterator; +class I_P_List_null_counter; +template class I_P_List_no_push_back; + + +/** + Intrusive parameterized list. + + Unlike I_List does not require its elements to be descendant of ilink + class and therefore allows them to participate in several such lists + simultaneously. + + Unlike List is doubly-linked list and thus supports efficient deletion + of element without iterator. + + @param T Type of elements which will belong to list. + @param B Class which via its methods specifies which members + of T should be used for participating in this list. + Here is typical layout of such class: + + struct B + { + static inline T **next_ptr(T *el) + { + return &el->next; + } + static inline T ***prev_ptr(T *el) + { + return &el->prev; + } + }; + @param C Policy class specifying how counting of elements in the list + should be done. Instance of this class is also used as a place + where information about number of list elements is stored. + @sa I_P_List_null_counter, I_P_List_counter + @param I Policy class specifying whether I_P_List should support + efficient push_back() operation. Instance of this class + is used as place where we store information to support + this operation. + @sa I_P_List_no_push_back, I_P_List_fast_push_back. +*/ + +template > +class I_P_List : public C, public I +{ + T *m_first; + + /* + Do not prohibit copying of I_P_List object to simplify their usage in + backup/restore scenarios. Note that performing any operations on such + is a bad idea. + */ +public: + I_P_List() : I(&m_first), m_first(NULL) {}; + /* + empty() is used in many places in the code instead of a constructor, to + initialize a bzero-ed I_P_List instance. + */ + + inline void empty() { m_first= NULL; C::reset(); I::set_last(&m_first); } + inline bool is_empty() const { return (m_first == NULL); } + inline void push_front(T* a) + { + *B::next_ptr(a)= m_first; + if (m_first) + *B::prev_ptr(m_first)= B::next_ptr(a); + else + I::set_last(B::next_ptr(a)); + m_first= a; + *B::prev_ptr(a)= &m_first; + C::inc(); + } + inline void push_back(T *a) + { + T **last= I::get_last(); + *B::next_ptr(a)= *last; + *last= a; + *B::prev_ptr(a)= last; + I::set_last(B::next_ptr(a)); + C::inc(); + } + inline void insert_after(T *pos, T *a) + { + if (pos == NULL) + push_front(a); + else + { + *B::next_ptr(a)= *B::next_ptr(pos); + *B::prev_ptr(a)= B::next_ptr(pos); + *B::next_ptr(pos)= a; + if (*B::next_ptr(a)) + { + T *old_next= *B::next_ptr(a); + *B::prev_ptr(old_next)= B::next_ptr(a); + } + else + I::set_last(B::next_ptr(a)); + C::inc(); + } + } + inline void remove(T *a) + { + T *next= *B::next_ptr(a); + if (next) + *B::prev_ptr(next)= *B::prev_ptr(a); + else + I::set_last(*B::prev_ptr(a)); + **B::prev_ptr(a)= next; + C::dec(); + } + inline T* front() { return m_first; } + inline const T *front() const { return m_first; } + inline T* pop_front() + { + T *result= front(); + + if (result) + remove(result); + + return result; + } + void swap(I_P_List &rhs) + { + swap_variables(T *, m_first, rhs.m_first); + I::swap(rhs); + if (m_first) + *B::prev_ptr(m_first)= &m_first; + else + I::set_last(&m_first); + if (rhs.m_first) + *B::prev_ptr(rhs.m_first)= &rhs.m_first; + else + I::set_last(&rhs.m_first); + C::swap(rhs); + } + typedef B Adapter; + typedef I_P_List Base; + typedef I_P_List_iterator Iterator; + typedef I_P_List_iterator Const_Iterator; +#ifndef _lint + friend class I_P_List_iterator; + friend class I_P_List_iterator; +#endif +}; + + +/** + Iterator for I_P_List. +*/ + +template +class I_P_List_iterator +{ + const L *list; + T *current; +public: + I_P_List_iterator(const L &a) + : list(&a), current(a.m_first) {} + I_P_List_iterator(const L &a, T* current_arg) + : list(&a), current(current_arg) {} + inline void init(const L &a) + { + list= &a; + current= a.m_first; + } + /** + Operator for it++ + + @note since we save next element pointer, caller may remove current element. + Such modification doesn't invalidate iterator. + */ + inline T* operator++(int) + { + T *result= current; + if (result) + current= *L::Adapter::next_ptr(current); + return result; + } + /* Operator for ++it */ + inline T* operator++() + { + current= *L::Adapter::next_ptr(current); + return current; + } + inline void rewind() + { + current= list->m_first; + } +}; + + +/** + Hook class which via its methods specifies which members + of T should be used for participating in a intrusive list. +*/ + +template +struct I_P_List_adapter +{ + static inline T **next_ptr(T *el) { return &(el->*next); } + static inline const T* const* next_ptr(const T *el) { return &(el->*next); } + static inline T ***prev_ptr(T *el) { return &(el->*prev); } +}; + + +/** + Element counting policy class for I_P_List to be used in + cases when no element counting should be done. +*/ + +class I_P_List_null_counter +{ +protected: + void reset() {} + void inc() {} + void dec() {} + void swap(I_P_List_null_counter &) {} +}; + + +/** + Element counting policy class for I_P_List which provides + basic element counting. +*/ + +class I_P_List_counter +{ + uint m_counter; +protected: + I_P_List_counter() : m_counter (0) {} + void reset() {m_counter= 0;} + void inc() {m_counter++;} + void dec() {m_counter--;} + void swap(I_P_List_counter &rhs) + { swap_variables(uint, m_counter, rhs.m_counter); } +public: + uint elements() const { return m_counter; } +}; + + +/** + A null insertion policy class for I_P_List to be used + in cases when push_back() operation is not necessary. +*/ + +template class I_P_List_no_push_back +{ +protected: + I_P_List_no_push_back(T **) {} + void set_last(T **) {} + /* + T** get_last() const method is intentionally left unimplemented + in order to prohibit usage of push_back() method in lists which + use this policy. + */ + void swap(I_P_List_no_push_back &) {} +}; + + +/** + An insertion policy class for I_P_List which can + be used when fast push_back() operation is required. +*/ + +template class I_P_List_fast_push_back +{ + T **m_last; +protected: + I_P_List_fast_push_back(T **a) : m_last(a) { }; + void set_last(T **a) { m_last= a; } + T** get_last() const { return m_last; } + void swap(I_P_List_fast_push_back &rhs) + { swap_variables(T**, m_last, rhs.m_last); } +}; + +#endif diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc new file mode 100644 index 00000000..35767307 --- /dev/null +++ b/sql/sql_plugin.cc @@ -0,0 +1,4534 @@ +/* + Copyright (c) 2005, 2018, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_plugin.h" // SHOW_MY_BOOL +#include "sql_priv.h" +#include "unireg.h" +#include "sql_class.h" // set_var.h: THD +#include "sys_vars_shared.h" +#include "sql_locale.h" +#include "sql_plugin.h" +#include "sql_parse.h" // check_table_access +#include "sql_base.h" // close_mysql_tables +#include "key.h" // key_copy +#include "sql_table.h" +#include "sql_show.h" // remove_status_vars, add_status_vars +#include "strfunc.h" // find_set +#include "records.h" // init_read_record, end_read_record +#include +#include +#include "sql_audit.h" +#include +#include "lock.h" // MYSQL_LOCK_IGNORE_TIMEOUT +#include +#include +#include +#include +#include +#include "sql_plugin_compat.h" +#include "wsrep_mysqld.h" + +static PSI_memory_key key_memory_plugin_mem_root; +static PSI_memory_key key_memory_plugin_int_mem_root; +static PSI_memory_key key_memory_mysql_plugin; +static PSI_memory_key key_memory_mysql_plugin_dl; +static PSI_memory_key key_memory_plugin_bookmark; + +#ifdef HAVE_LINK_H +#include +#endif + +extern struct st_maria_plugin *mysql_optional_plugins[]; +extern struct st_maria_plugin *mysql_mandatory_plugins[]; + +/** + @note The order of the enumeration is critical. + @see construct_options +*/ +const char *global_plugin_typelib_names[]= + { "OFF", "ON", "FORCE", "FORCE_PLUS_PERMANENT", NULL }; +static TYPELIB global_plugin_typelib= + { array_elements(global_plugin_typelib_names)-1, + "", global_plugin_typelib_names, NULL }; + +static I_List opt_plugin_load_list; +I_List *opt_plugin_load_list_ptr= &opt_plugin_load_list; +char *opt_plugin_dir_ptr; +char opt_plugin_dir[FN_REFLEN]; +ulong plugin_maturity; + +static LEX_CSTRING MYSQL_PLUGIN_NAME= {STRING_WITH_LEN("plugin") }; + +/* + not really needed now, this map will become essential when we add more + maturity levels. We cannot change existing maturity constants, + so the next value - even if it will be MariaDB_PLUGIN_MATURITY_VERY_BUGGY - + will inevitably be larger than MariaDB_PLUGIN_MATURITY_STABLE. + To be able to compare them we use this mapping array +*/ +uint plugin_maturity_map[]= +{ 0, 1, 2, 3, 4, 5, 6 }; + +/* + When you add a new plugin type, add both a string and make sure that the + init and deinit array are correctly updated. +*/ +const LEX_CSTRING plugin_type_names[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + { STRING_WITH_LEN("UDF") }, + { STRING_WITH_LEN("STORAGE ENGINE") }, + { STRING_WITH_LEN("FTPARSER") }, + { STRING_WITH_LEN("DAEMON") }, + { STRING_WITH_LEN("INFORMATION SCHEMA") }, + { STRING_WITH_LEN("AUDIT") }, + { STRING_WITH_LEN("REPLICATION") }, + { STRING_WITH_LEN("AUTHENTICATION") }, + { STRING_WITH_LEN("PASSWORD VALIDATION") }, + { STRING_WITH_LEN("ENCRYPTION") }, + { STRING_WITH_LEN("DATA TYPE") }, + { STRING_WITH_LEN("FUNCTION") } +}; + +extern int initialize_schema_table(st_plugin_int *plugin); +extern int finalize_schema_table(st_plugin_int *plugin); + +extern int initialize_audit_plugin(st_plugin_int *plugin); +extern int finalize_audit_plugin(st_plugin_int *plugin); + +extern int initialize_encryption_plugin(st_plugin_int *plugin); +extern int finalize_encryption_plugin(st_plugin_int *plugin); + +extern int initialize_data_type_plugin(st_plugin_int *plugin); + +/* + The number of elements in both plugin_type_initialize and + plugin_type_deinitialize should equal to the number of plugins + defined. +*/ +plugin_type_init plugin_type_initialize[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0, ha_initialize_handlerton, 0, 0,initialize_schema_table, + initialize_audit_plugin, 0, 0, 0, initialize_encryption_plugin, + initialize_data_type_plugin, 0 +}; + +plugin_type_init plugin_type_deinitialize[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0, ha_finalize_handlerton, 0, 0, finalize_schema_table, + finalize_audit_plugin, 0, 0, 0, finalize_encryption_plugin, 0, + 0 // FUNCTION +}; + +/* + Defines in which order plugin types have to be initialized. + Essentially, we want to initialize MYSQL_KEY_MANAGEMENT_PLUGIN before + MYSQL_STORAGE_ENGINE_PLUGIN, and that before MYSQL_INFORMATION_SCHEMA_PLUGIN +*/ +static int plugin_type_initialization_order[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + MYSQL_DAEMON_PLUGIN, + MariaDB_ENCRYPTION_PLUGIN, + MariaDB_DATA_TYPE_PLUGIN, + MariaDB_FUNCTION_PLUGIN, + MYSQL_STORAGE_ENGINE_PLUGIN, + MYSQL_INFORMATION_SCHEMA_PLUGIN, + MYSQL_FTPARSER_PLUGIN, + MYSQL_AUTHENTICATION_PLUGIN, + MariaDB_PASSWORD_VALIDATION_PLUGIN, + MYSQL_AUDIT_PLUGIN, + MYSQL_REPLICATION_PLUGIN, + MYSQL_UDF_PLUGIN +}; + +#ifdef HAVE_DLOPEN +static const char *plugin_interface_version_sym= + "_mysql_plugin_interface_version_"; +static const char *sizeof_st_plugin_sym= + "_mysql_sizeof_struct_st_plugin_"; +static const char *plugin_declarations_sym= "_mysql_plugin_declarations_"; +static int min_plugin_interface_version= MYSQL_PLUGIN_INTERFACE_VERSION & ~0xFF; +static const char *maria_plugin_interface_version_sym= + "_maria_plugin_interface_version_"; +static const char *maria_sizeof_st_plugin_sym= + "_maria_sizeof_struct_st_plugin_"; +static const char *maria_plugin_declarations_sym= + "_maria_plugin_declarations_"; +static int min_maria_plugin_interface_version= + MARIA_PLUGIN_INTERFACE_VERSION & ~0xFF; +#endif + +/* Note that 'int version' must be the first field of every plugin + sub-structure (plugin->info). +*/ +static int min_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0x0000, + MYSQL_HANDLERTON_INTERFACE_VERSION, + MYSQL_FTPARSER_INTERFACE_VERSION, + MYSQL_DAEMON_INTERFACE_VERSION, + MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION, + MYSQL_AUDIT_INTERFACE_VERSION, + MYSQL_REPLICATION_INTERFACE_VERSION, + MIN_AUTHENTICATION_INTERFACE_VERSION, + MariaDB_PASSWORD_VALIDATION_INTERFACE_VERSION, + MariaDB_ENCRYPTION_INTERFACE_VERSION, + MariaDB_DATA_TYPE_INTERFACE_VERSION, + MariaDB_FUNCTION_INTERFACE_VERSION +}; +static int cur_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0x0000, /* UDF: not implemented */ + MYSQL_HANDLERTON_INTERFACE_VERSION, + MYSQL_FTPARSER_INTERFACE_VERSION, + MYSQL_DAEMON_INTERFACE_VERSION, + MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION, + MYSQL_AUDIT_INTERFACE_VERSION, + MYSQL_REPLICATION_INTERFACE_VERSION, + MYSQL_AUTHENTICATION_INTERFACE_VERSION, + MariaDB_PASSWORD_VALIDATION_INTERFACE_VERSION, + MariaDB_ENCRYPTION_INTERFACE_VERSION, + MariaDB_DATA_TYPE_INTERFACE_VERSION, + MariaDB_FUNCTION_INTERFACE_VERSION +}; + +static struct +{ + const char *plugin_name; + enum enum_plugin_load_option override; +} override_plugin_load_policy[]={ + /* + If the performance schema is compiled in, + treat the storage engine plugin as 'mandatory', + to suppress any plugin-level options such as '--performance-schema'. + This is specific to the performance schema, and is done on purpose: + the server-level option '--performance-schema' controls the overall + performance schema initialization, which consists of much more that + the underlying storage engine initialization. + See mysqld.cc, set_vars.cc. + Suppressing ways to interfere directly with the storage engine alone + prevents awkward situations where: + - the user wants the performance schema functionality, by using + '--enable-performance-schema' (the server option), + - yet disable explicitly a component needed for the functionality + to work, by using '--skip-performance-schema' (the plugin) + */ + { "performance_schema", PLUGIN_FORCE } + + /* we disable few other plugins by default */ + ,{ "feedback", PLUGIN_OFF } +}; + +/* support for Services */ + +#include "sql_plugin_services.inl" + +/* + A mutex LOCK_plugin must be acquired before accessing the + following variables/structures. + We are always manipulating ref count, so a rwlock here is unneccessary. +*/ +mysql_mutex_t LOCK_plugin; +static DYNAMIC_ARRAY plugin_dl_array; +static DYNAMIC_ARRAY plugin_array; +static HASH plugin_hash[MYSQL_MAX_PLUGIN_TYPE_NUM]; +static MEM_ROOT plugin_mem_root; +static bool reap_needed= false; +volatile int global_plugin_version= 1; + +static bool initialized= 0; +ulong dlopen_count; + + +/* + write-lock on LOCK_system_variables_hash is required before modifying + the following variables/structures +*/ +static MEM_ROOT plugin_vars_mem_root; +static size_t global_variables_dynamic_size= 0; +static HASH bookmark_hash; + + +/* + hidden part of opaque value passed to variable check functions. + Used to provide a object-like structure to non C++ consumers. +*/ +struct st_item_value_holder : public st_mysql_value +{ + Item *item; +}; + + +/* + stored in bookmark_hash, this structure is never removed from the + hash and is used to mark a single offset for a thd local variable + even if plugins have been uninstalled and reinstalled, repeatedly. + This structure is allocated from plugin_mem_root. + + The key format is as follows: + 1 byte - variable type code + name_len bytes - variable name + '\0' - end of key +*/ +struct st_bookmark +{ + uint name_len; + int offset; + uint version; + bool loaded; + char key[1]; +}; + + +/* + skeleton of a plugin variable - portion of structure common to all. +*/ +struct st_mysql_sys_var +{ + MYSQL_PLUGIN_VAR_HEADER; +}; + +enum install_status { INSTALL_GOOD, INSTALL_FAIL_WARN_OK, INSTALL_FAIL_NOT_OK }; +/* + sys_var class for access to all plugin variables visible to the user +*/ +class sys_var_pluginvar: public sys_var, public Sql_alloc +{ +public: + struct st_plugin_int *plugin; + struct st_mysql_sys_var *plugin_var; + + sys_var_pluginvar(sys_var_chain *chain, const char *name_arg, + st_plugin_int *p, st_mysql_sys_var *plugin_var_arg, + const char *substitute); + sys_var_pluginvar *cast_pluginvar() { return this; } + uchar* real_value_ptr(THD *thd, enum_var_type type) const; + TYPELIB* plugin_var_typelib(void) const; + const uchar* do_value_ptr(THD *thd, enum_var_type type, const LEX_CSTRING *base) const; + const uchar* session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return do_value_ptr(thd, OPT_SESSION, base); } + const uchar* global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return do_value_ptr(thd, OPT_GLOBAL, base); } + const uchar *default_value_ptr(THD *thd) const + { return do_value_ptr(thd, OPT_DEFAULT, 0); } + bool do_check(THD *thd, set_var *var); + virtual void session_save_default(THD *thd, set_var *var) {} + virtual void global_save_default(THD *thd, set_var *var) {} + bool session_update(THD *thd, set_var *var); + bool global_update(THD *thd, set_var *var); + bool session_is_default(THD *thd); +}; + + +/* prototypes */ +static void plugin_load(MEM_ROOT *tmp_root); +static bool plugin_load_list(MEM_ROOT *, const char *); +static int test_plugin_options(MEM_ROOT *, struct st_plugin_int *, + int *, char **); +static bool register_builtin(struct st_maria_plugin *, struct st_plugin_int *, + struct st_plugin_int **); +static void unlock_variables(THD *thd, struct system_variables *vars); +static void cleanup_variables(struct system_variables *vars); +static void plugin_vars_free_values(st_mysql_sys_var **vars); +static void restore_ptr_backup(uint n, st_ptr_backup *backup); +static void intern_plugin_unlock(LEX *lex, plugin_ref plugin); +static void reap_plugins(void); + +bool plugin_is_forced(struct st_plugin_int *p) +{ + return p->load_option == PLUGIN_FORCE || + p->load_option == PLUGIN_FORCE_PLUS_PERMANENT; +} + +/** + Check if the provided path is valid in the sense that it does cause + a relative reference outside the directory. + + @note Currently, this function only check if there are any + characters in FN_DIRSEP in the string, but it might change in the + future. + + @code + check_valid_path("../foo.so") -> true + check_valid_path("foo.so") -> false + @endcode + */ +bool check_valid_path(const char *path, size_t len) +{ + size_t prefix= my_strcspn(files_charset_info, path, path + len, FN_DIRSEP); + return prefix < len; +} + +static void fix_dl_name(MEM_ROOT *root, LEX_CSTRING *dl) +{ + const size_t so_ext_len= sizeof(SO_EXT) - 1; + if (dl->length < so_ext_len || + my_strcasecmp(&my_charset_latin1, dl->str + dl->length - so_ext_len, + SO_EXT)) + { + char *s= (char*)alloc_root(root, dl->length + so_ext_len + 1); + memcpy(s, dl->str, dl->length); + strcpy(s + dl->length, SO_EXT); + dl->str= s; + dl->length+= so_ext_len; + } +} + + +/**************************************************************************** + Value type thunks, allows the C world to play in the C++ world +****************************************************************************/ + +static int item_value_type(struct st_mysql_value *value) +{ + switch (((st_item_value_holder*)value)->item->result_type()) { + case INT_RESULT: + return MYSQL_VALUE_TYPE_INT; + case REAL_RESULT: + return MYSQL_VALUE_TYPE_REAL; + default: + return MYSQL_VALUE_TYPE_STRING; + } +} + +static const char *item_val_str(struct st_mysql_value *value, + char *buffer, int *length) +{ + size_t org_length= *length; + String str(buffer, org_length, system_charset_info), *res; + if (!(res= ((st_item_value_holder*)value)->item->val_str(&str))) + return NULL; + *length= res->length(); + if (res->ptr() == buffer && res->length() < org_length) + { + buffer[res->length()]= 0; + return buffer; + } + + /* + Lets be nice and create a temporary string since the + buffer was too small + */ + return current_thd->strmake(res->ptr(), res->length()); +} + + +static int item_val_int(struct st_mysql_value *value, long long *buf) +{ + Item *item= ((st_item_value_holder*)value)->item; + *buf= item->val_int(); + if (item->is_null()) + return 1; + return 0; +} + +static int item_is_unsigned(struct st_mysql_value *value) +{ + Item *item= ((st_item_value_holder*)value)->item; + return item->unsigned_flag; +} + +static int item_val_real(struct st_mysql_value *value, double *buf) +{ + Item *item= ((st_item_value_holder*)value)->item; + *buf= item->val_real(); + if (item->is_null()) + return 1; + return 0; +} + + +/**************************************************************************** + Plugin support code +****************************************************************************/ + +#ifdef HAVE_DLOPEN + +static struct st_plugin_dl *plugin_dl_find(const LEX_CSTRING *dl) +{ + size_t i; + struct st_plugin_dl *tmp; + DBUG_ENTER("plugin_dl_find"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + tmp= *dynamic_element(&plugin_dl_array, i, struct st_plugin_dl **); + if (tmp->ref_count && + ! files_charset_info->strnncoll(dl->str, dl->length, + tmp->dl.str, tmp->dl.length)) + DBUG_RETURN(tmp); + } + DBUG_RETURN(0); +} + + +static st_plugin_dl *plugin_dl_insert_or_reuse(struct st_plugin_dl *plugin_dl) +{ + size_t i; + struct st_plugin_dl *tmp; + DBUG_ENTER("plugin_dl_insert_or_reuse"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + tmp= *dynamic_element(&plugin_dl_array, i, struct st_plugin_dl **); + if (! tmp->ref_count) + { + memcpy(tmp, plugin_dl, sizeof(struct st_plugin_dl)); + DBUG_RETURN(tmp); + } + } + if (insert_dynamic(&plugin_dl_array, (uchar*)&plugin_dl)) + DBUG_RETURN(0); + tmp= *dynamic_element(&plugin_dl_array, plugin_dl_array.elements - 1, + struct st_plugin_dl **)= + (struct st_plugin_dl *) memdup_root(&plugin_mem_root, (uchar*)plugin_dl, + sizeof(struct st_plugin_dl)); + DBUG_RETURN(tmp); +} +#else +static struct st_plugin_dl *plugin_dl_find(const LEX_STRING *) +{ + return 0; +} +#endif /* HAVE_DLOPEN */ + + +static void free_plugin_mem(struct st_plugin_dl *p) +{ +#ifdef HAVE_DLOPEN + if (p->ptr_backup) + { + DBUG_ASSERT(p->nbackups); + DBUG_ASSERT(p->handle); + restore_ptr_backup(p->nbackups, p->ptr_backup); + my_free(p->ptr_backup); + } + if (p->handle) + dlclose(p->handle); +#endif + my_free(const_cast(p->dl.str)); + if (p->allocated) + my_free(p->plugins); +} + + +/** + Reads data from mysql plugin interface + + @param plugin_dl Structure where the data should be put + @param sym Reverence on version info + @param dlpath Path to the module + @param MyFlags Where errors should be reported (0 or ME_ERROR_LOG) + + @retval FALSE OK + @retval TRUE ERROR +*/ + +#ifdef HAVE_DLOPEN +static my_bool read_mysql_plugin_info(struct st_plugin_dl *plugin_dl, + void *sym, char *dlpath, myf MyFlags) +{ + DBUG_ENTER("read_maria_plugin_info"); + /* Determine interface version */ + if (!sym) + { + my_error(ER_CANT_FIND_DL_ENTRY, MyFlags, plugin_interface_version_sym); + DBUG_RETURN(TRUE); + } + plugin_dl->mariaversion= 0; + plugin_dl->mysqlversion= *(int *)sym; + /* Versioning */ + if (plugin_dl->mysqlversion < min_plugin_interface_version || + (plugin_dl->mysqlversion >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8)) + { + my_error(ER_CANT_OPEN_LIBRARY, MyFlags, dlpath, ENOEXEC, + "plugin interface version mismatch"); + DBUG_RETURN(TRUE); + } + /* Find plugin declarations */ + if (!(sym= dlsym(plugin_dl->handle, plugin_declarations_sym))) + { + my_error(ER_CANT_FIND_DL_ENTRY, MyFlags, plugin_declarations_sym); + DBUG_RETURN(TRUE); + } + + /* convert mysql declaration to maria one */ + { + int i; + uint sizeof_st_plugin; + struct st_mysql_plugin *old; + struct st_maria_plugin *cur; + char *ptr= (char *)sym; + + if ((sym= dlsym(plugin_dl->handle, sizeof_st_plugin_sym))) + sizeof_st_plugin= *(int *)sym; + else + { + DBUG_ASSERT(min_plugin_interface_version == 0); + sizeof_st_plugin= (int)offsetof(struct st_mysql_plugin, version); + } + + for (i= 0; + ((struct st_mysql_plugin *)(ptr + i * sizeof_st_plugin))->info; + i++) + /* no op */; + + cur= (struct st_maria_plugin*) + my_malloc(key_memory_mysql_plugin, (i + 1) * sizeof(struct st_maria_plugin), + MYF(MY_ZEROFILL|MY_WME)); + if (!cur) + { + my_error(ER_OUTOFMEMORY, MyFlags, + static_cast(plugin_dl->dl.length)); + DBUG_RETURN(TRUE); + } + /* + All st_plugin fields not initialized in the plugin explicitly, are + set to 0. It matches C standard behaviour for struct initializers that + have less values than the struct definition. + */ + for (i=0; + (old= (struct st_mysql_plugin *)(ptr + i * sizeof_st_plugin))->info; + i++) + { + + cur[i].type= old->type; + cur[i].info= old->info; + cur[i].name= old->name; + cur[i].author= old->author; + cur[i].descr= old->descr; + cur[i].license= old->license; + cur[i].init= old->init; + cur[i].deinit= old->deinit; + cur[i].version= old->version; + cur[i].status_vars= old->status_vars; + cur[i].system_vars= old->system_vars; + /* + Something like this should be added to process + new mysql plugin versions: + if (plugin_dl->mysqlversion > 0x0101) + { + cur[i].newfield= CONSTANT_MEANS_UNKNOWN; + } + else + { + cur[i].newfield= old->newfield; + } + */ + /* Maria only fields */ + cur[i].version_info= "Unknown"; + cur[i].maturity= MariaDB_PLUGIN_MATURITY_UNKNOWN; + } + plugin_dl->allocated= true; + plugin_dl->plugins= (struct st_maria_plugin *)cur; + } + + DBUG_RETURN(FALSE); +} + + +/** + Reads data from maria plugin interface + + @param plugin_dl Structure where the data should be put + @param sym Reverence on version info + @param dlpath Path to the module + @param MyFlags Where errors should be reported (0 or ME_ERROR_LOG) + + @retval FALSE OK + @retval TRUE ERROR +*/ + +static my_bool read_maria_plugin_info(struct st_plugin_dl *plugin_dl, + void *sym, char *dlpath, myf MyFlags) +{ + DBUG_ENTER("read_maria_plugin_info"); + + /* Determine interface version */ + if (!(sym)) + { + /* + Actually this branch impossible because in case of absence of maria + version we try mysql version. + */ + my_error(ER_CANT_FIND_DL_ENTRY, MyFlags, + maria_plugin_interface_version_sym); + DBUG_RETURN(TRUE); + } + plugin_dl->mariaversion= *(int *)sym; + plugin_dl->mysqlversion= 0; + /* Versioning */ + if (plugin_dl->mariaversion < min_maria_plugin_interface_version || + (plugin_dl->mariaversion >> 8) > (MARIA_PLUGIN_INTERFACE_VERSION >> 8)) + { + my_error(ER_CANT_OPEN_LIBRARY, MyFlags, dlpath, ENOEXEC, + "plugin interface version mismatch"); + DBUG_RETURN(TRUE); + } + /* Find plugin declarations */ + if (!(sym= dlsym(plugin_dl->handle, maria_plugin_declarations_sym))) + { + my_error(ER_CANT_FIND_DL_ENTRY, MyFlags, maria_plugin_declarations_sym); + DBUG_RETURN(TRUE); + } + if (plugin_dl->mariaversion != MARIA_PLUGIN_INTERFACE_VERSION) + { + uint sizeof_st_plugin; + struct st_maria_plugin *old, *cur; + char *ptr= (char *)sym; + + if ((sym= dlsym(plugin_dl->handle, maria_sizeof_st_plugin_sym))) + sizeof_st_plugin= *(int *)sym; + else + { + my_error(ER_CANT_FIND_DL_ENTRY, MyFlags, maria_sizeof_st_plugin_sym); + DBUG_RETURN(TRUE); + } + + if (sizeof_st_plugin != sizeof(st_mysql_plugin)) + { + int i; + for (i= 0; + ((struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info; + i++) + /* no op */; + + cur= (struct st_maria_plugin*) + my_malloc(key_memory_mysql_plugin, (i + 1) * sizeof(struct st_maria_plugin), + MYF(MY_ZEROFILL|MY_WME)); + if (!cur) + { + my_error(ER_OUTOFMEMORY, MyFlags, + static_cast(plugin_dl->dl.length)); + DBUG_RETURN(TRUE); + } + /* + All st_plugin fields not initialized in the plugin explicitly, are + set to 0. It matches C standard behaviour for struct initializers that + have less values than the struct definition. + */ + for (i=0; + (old= (struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info; + i++) + memcpy(cur + i, old, MY_MIN(sizeof(cur[i]), sizeof_st_plugin)); + + sym= cur; + plugin_dl->allocated= true; + } + else + sym= ptr; + } + plugin_dl->plugins= (struct st_maria_plugin *)sym; + + DBUG_RETURN(FALSE); +} +#endif /* HAVE_DLOPEN */ + +static st_plugin_dl *plugin_dl_add(const LEX_CSTRING *dl, myf MyFlags) +{ +#ifdef HAVE_DLOPEN + char dlpath[FN_REFLEN]; + size_t plugin_dir_len,i; + uint dummy_errors; + struct st_plugin_dl *tmp= 0, plugin_dl; + void *sym; + st_ptr_backup tmp_backup[array_elements(list_of_services)]; + DBUG_ENTER("plugin_dl_add"); + DBUG_PRINT("enter", ("dl->str: '%s', dl->length: %d", + dl->str, (int) dl->length)); + mysql_mutex_assert_owner(&LOCK_plugin); + plugin_dir_len= strlen(opt_plugin_dir); + /* + Ensure that the dll doesn't have a path. + This is done to ensure that only approved libraries from the + plugin directory are used (to make this even remotely secure). + */ + if (check_string_char_length((LEX_CSTRING *) dl, 0, NAME_CHAR_LEN, + system_charset_info, 1) || + check_valid_path(dl->str, dl->length) || + plugin_dir_len + dl->length + 1 >= FN_REFLEN) + { + my_error(ER_UDF_NO_PATHS, MyFlags); + DBUG_RETURN(0); + } + /* If this dll is already loaded just increase ref_count. */ + if ((tmp= plugin_dl_find(dl))) + { + tmp->ref_count++; + DBUG_RETURN(tmp); + } + bzero(&plugin_dl, sizeof(plugin_dl)); + /* Compile dll path */ + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", dl->str, NullS); + (void) unpack_filename(dlpath, dlpath); + plugin_dl.ref_count= 1; + /* Open new dll handle */ + if (!(plugin_dl.handle= dlopen(dlpath, RTLD_NOW))) + { + my_error(ER_CANT_OPEN_LIBRARY, MyFlags, dlpath, errno, my_dlerror(dlpath)); + goto ret; + } + dlopen_count++; + +#ifdef HAVE_LINK_H + if (global_system_variables.log_warnings > 2) + { + struct link_map *lm = (struct link_map*) plugin_dl.handle; + sql_print_information("Loaded '%s' with offset 0x%zx", dl->str, (size_t)lm->l_addr); + } +#endif + + /* Checks which plugin interface present and reads info */ + if (!(sym= dlsym(plugin_dl.handle, maria_plugin_interface_version_sym))) + { + if (read_mysql_plugin_info(&plugin_dl, + dlsym(plugin_dl.handle, + plugin_interface_version_sym), + dlpath, + MyFlags)) + goto ret; + } + else + { + if (read_maria_plugin_info(&plugin_dl, sym, dlpath, MyFlags)) + goto ret; + } + + /* link the services in */ + for (i= 0; i < array_elements(list_of_services); i++) + { + if ((sym= dlsym(plugin_dl.handle, list_of_services[i].name))) + { + void **ptr= (void **)sym; + uint ver= (uint)(intptr)*ptr; + if (ver > list_of_services[i].version || + (ver >> 8) < (list_of_services[i].version >> 8)) + { + char buf[MYSQL_ERRMSG_SIZE]; + my_snprintf(buf, sizeof(buf), + "service '%s' interface version mismatch", + list_of_services[i].name); + my_error(ER_CANT_OPEN_LIBRARY, MyFlags, dlpath, ENOEXEC, buf); + goto ret; + } + tmp_backup[plugin_dl.nbackups++].save(ptr); + *ptr= list_of_services[i].service; + } + } + + if (plugin_dl.nbackups) + { + size_t bytes= plugin_dl.nbackups * sizeof(plugin_dl.ptr_backup[0]); + plugin_dl.ptr_backup= (st_ptr_backup *)my_malloc(key_memory_mysql_plugin_dl, + bytes, MYF(0)); + if (!plugin_dl.ptr_backup) + { + restore_ptr_backup(plugin_dl.nbackups, tmp_backup); + my_error(ER_OUTOFMEMORY, MyFlags, bytes); + goto ret; + } + memcpy(plugin_dl.ptr_backup, tmp_backup, bytes); + } + + /* Duplicate and convert dll name */ + plugin_dl.dl.length= dl->length * files_charset_info->mbmaxlen + 1; + if (! (plugin_dl.dl.str= (char*) my_malloc(key_memory_mysql_plugin_dl, + plugin_dl.dl.length, MYF(0)))) + { + my_error(ER_OUTOFMEMORY, MyFlags, + static_cast(plugin_dl.dl.length)); + goto ret; + } + plugin_dl.dl.length= copy_and_convert((char*) plugin_dl.dl.str, + plugin_dl.dl.length, + files_charset_info, dl->str, + dl->length, system_charset_info, + &dummy_errors); + ((char*) plugin_dl.dl.str)[plugin_dl.dl.length]= 0; + /* Add this dll to array */ + if (! (tmp= plugin_dl_insert_or_reuse(&plugin_dl))) + { + my_error(ER_OUTOFMEMORY, MyFlags, + static_cast(sizeof(struct st_plugin_dl))); + goto ret; + } + +ret: + if (!tmp) + free_plugin_mem(&plugin_dl); + + DBUG_RETURN(tmp); + +#else + DBUG_ENTER("plugin_dl_add"); + my_error(ER_FEATURE_DISABLED, MyFlags, "plugin", "HAVE_DLOPEN"); + DBUG_RETURN(0); +#endif +} + + +static void plugin_dl_del(struct st_plugin_dl *plugin_dl) +{ + DBUG_ENTER("plugin_dl_del"); + + if (!plugin_dl) + DBUG_VOID_RETURN; + + mysql_mutex_assert_owner(&LOCK_plugin); + + /* Do not remove this element, unless no other plugin uses this dll. */ + if (! --plugin_dl->ref_count) + { + free_plugin_mem(plugin_dl); + bzero(plugin_dl, sizeof(struct st_plugin_dl)); + } + + DBUG_VOID_RETURN; +} + + +static struct st_plugin_int *plugin_find_internal(const LEX_CSTRING *name, + int type) +{ + uint i; + DBUG_ENTER("plugin_find_internal"); + if (! initialized) + DBUG_RETURN(0); + + mysql_mutex_assert_owner(&LOCK_plugin); + + if (type == MYSQL_ANY_PLUGIN) + { + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + struct st_plugin_int *plugin= (st_plugin_int *) + my_hash_search(&plugin_hash[i], (const uchar *)name->str, name->length); + if (plugin) + DBUG_RETURN(plugin); + } + } + else + DBUG_RETURN((st_plugin_int *) + my_hash_search(&plugin_hash[type], (const uchar *)name->str, + name->length)); + DBUG_RETURN(0); +} + + +static SHOW_COMP_OPTION plugin_status(const LEX_CSTRING *name, int type) +{ + SHOW_COMP_OPTION rc= SHOW_OPTION_NO; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_is_ready"); + mysql_mutex_lock(&LOCK_plugin); + if ((plugin= plugin_find_internal(name, type))) + { + rc= SHOW_OPTION_DISABLED; + if (plugin->state == PLUGIN_IS_READY) + rc= SHOW_OPTION_YES; + } + mysql_mutex_unlock(&LOCK_plugin); + DBUG_RETURN(rc); +} + + +bool plugin_is_ready(const LEX_CSTRING *name, int type) +{ + bool rc= FALSE; + if (plugin_status(name, type) == SHOW_OPTION_YES) + rc= TRUE; + return rc; +} + + +SHOW_COMP_OPTION plugin_status(const char *name, size_t len, int type) +{ + LEX_CSTRING plugin_name= { name, len }; + return plugin_status(&plugin_name, type); +} + + +/* + If LEX is passed non-NULL, an automatic unlock of the plugin will happen + in the LEX destructor. +*/ +static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc, + uint state_mask= PLUGIN_IS_READY | + PLUGIN_IS_UNINITIALIZED | + PLUGIN_IS_DELETED) +{ + st_plugin_int *pi= plugin_ref_to_int(rc); + DBUG_ENTER("intern_plugin_lock"); + + mysql_mutex_assert_owner(&LOCK_plugin); + + if (pi->state & state_mask) + { + plugin_ref plugin; +#ifdef DBUG_OFF + /* + In optimized builds we don't do reference counting for built-in + (plugin->plugin_dl == 0) plugins. + */ + if (!pi->plugin_dl) + DBUG_RETURN(pi); + + plugin= pi; +#else + /* + For debugging, we do an additional malloc which allows the + memory manager and/or valgrind to track locked references and + double unlocks to aid resolving reference counting problems. + */ + if (!(plugin= (plugin_ref) my_malloc(PSI_NOT_INSTRUMENTED, sizeof(pi), + MYF(MY_WME)))) + DBUG_RETURN(NULL); + + *plugin= pi; +#endif + pi->ref_count++; + DBUG_PRINT("lock",("thd: %p plugin: \"%s\" LOCK ref_count: %d", + current_thd, pi->name.str, pi->ref_count)); + + if (lex) + insert_dynamic(&lex->plugins, (uchar*)&plugin); + DBUG_RETURN(plugin); + } + DBUG_RETURN(NULL); +} + + +/* + Notes on lifetime: + + If THD is passed as non-NULL (and with a non-NULL thd->lex), an entry is made + in the thd->lex which will cause an automatic unlock of the plugin in the LEX + destructor. In this case, no manual unlock must be done. + + Otherwise, when passing a NULL THD, the caller must arrange that plugin + unlock happens later. +*/ +plugin_ref plugin_lock(THD *thd, plugin_ref ptr) +{ + LEX *lex= thd ? thd->lex : 0; + plugin_ref rc; + DBUG_ENTER("plugin_lock"); + +#ifdef DBUG_OFF + /* + In optimized builds we don't do reference counting for built-in + (plugin->plugin_dl == 0) plugins. + + Note that we access plugin->plugin_dl outside of LOCK_plugin, and for + dynamic plugins a 'plugin' could correspond to plugin that was unloaded + meanwhile! But because st_plugin_int is always allocated on + plugin_mem_root, the pointer can never be invalid - the memory is never + freed. + Of course, the memory that 'plugin' points to can be overwritten by + another plugin being loaded, but plugin->plugin_dl can never change + from zero to non-zero or vice versa. + That is, it's always safe to check for plugin->plugin_dl==0 even + without a mutex. + */ + if (! plugin_dlib(ptr)) + { + plugin_ref_to_int(ptr)->locks_total++; + DBUG_RETURN(ptr); + } +#endif + mysql_mutex_lock(&LOCK_plugin); + plugin_ref_to_int(ptr)->locks_total++; + rc= intern_plugin_lock(lex, ptr); + mysql_mutex_unlock(&LOCK_plugin); + DBUG_RETURN(rc); +} + + +/* + Notes on lifetime: + + If THD is passed as non-NULL (and with a non-NULL thd->lex), an entry is made + in the thd->lex which will cause an automatic unlock of the plugin in the LEX + destructor. In this case, no manual unlock must be done. + + Otherwise, when passing a NULL THD, the caller must arrange that plugin + unlock happens later. +*/ +plugin_ref plugin_lock_by_name(THD *thd, const LEX_CSTRING *name, int type) +{ + LEX *lex= thd ? thd->lex : 0; + plugin_ref rc= NULL; + st_plugin_int *plugin; + DBUG_ENTER("plugin_lock_by_name"); + if (!name->length) + DBUG_RETURN(NULL); + mysql_mutex_lock(&LOCK_plugin); + if ((plugin= plugin_find_internal(name, type))) + rc= intern_plugin_lock(lex, plugin_int_to_ref(plugin)); + mysql_mutex_unlock(&LOCK_plugin); + DBUG_RETURN(rc); +} + + +static st_plugin_int *plugin_insert_or_reuse(struct st_plugin_int *plugin) +{ + size_t i; + struct st_plugin_int *tmp; + DBUG_ENTER("plugin_insert_or_reuse"); + for (i= 0; i < plugin_array.elements; i++) + { + tmp= *dynamic_element(&plugin_array, i, struct st_plugin_int **); + if (tmp->state == PLUGIN_IS_FREED) + { + memcpy(tmp, plugin, sizeof(struct st_plugin_int)); + DBUG_RETURN(tmp); + } + } + if (insert_dynamic(&plugin_array, (uchar*)&plugin)) + DBUG_RETURN(0); + tmp= *dynamic_element(&plugin_array, plugin_array.elements - 1, + struct st_plugin_int **)= + (struct st_plugin_int *) memdup_root(&plugin_mem_root, (uchar*)plugin, + sizeof(struct st_plugin_int)); + DBUG_RETURN(tmp); +} + + +/* + NOTE + Requires that a write-lock is held on LOCK_system_variables_hash +*/ +static enum install_status plugin_add(MEM_ROOT *tmp_root, bool if_not_exists, + const LEX_CSTRING *name, LEX_CSTRING *dl, myf MyFlags) +{ + struct st_plugin_int tmp, *maybe_dupe; + struct st_maria_plugin *plugin; + uint oks= 0, errs= 0, dupes= 0; + DBUG_ENTER("plugin_add"); + DBUG_PRINT("enter", ("name: %s dl: %s", name->str, dl->str)); + + if (name->str && plugin_find_internal(name, MYSQL_ANY_PLUGIN)) + { + if (if_not_exists) + MyFlags|= ME_NOTE; + my_error(ER_PLUGIN_INSTALLED, MyFlags, name->str); + DBUG_RETURN(if_not_exists ? INSTALL_FAIL_WARN_OK : INSTALL_FAIL_NOT_OK); + } + /* Clear the whole struct to catch future extensions. */ + bzero((char*) &tmp, sizeof(tmp)); + fix_dl_name(tmp_root, dl); + if (! (tmp.plugin_dl= plugin_dl_add(dl, MyFlags))) + DBUG_RETURN(INSTALL_FAIL_NOT_OK); + /* Find plugin by name */ + for (plugin= tmp.plugin_dl->plugins; plugin->info; plugin++) + { + tmp.name.str= (char *)plugin->name; + tmp.name.length= strlen(plugin->name); + + if (plugin->type < 0 || plugin->type >= MYSQL_MAX_PLUGIN_TYPE_NUM) + continue; // invalid plugin type + + if (plugin->type == MYSQL_UDF_PLUGIN || + (plugin->type == MariaDB_PASSWORD_VALIDATION_PLUGIN && + tmp.plugin_dl->mariaversion == 0)) + continue; // unsupported plugin type + + if (name->str && system_charset_info->strnncoll(name->str, name->length, + tmp.name.str, tmp.name.length)) + continue; // plugin name doesn't match + + if (!name->str && + (maybe_dupe= plugin_find_internal(&tmp.name, MYSQL_ANY_PLUGIN))) + { + if (plugin->name != maybe_dupe->plugin->name) + { + my_error(ER_UDF_EXISTS, MyFlags, plugin->name); + DBUG_RETURN(INSTALL_FAIL_NOT_OK); + } + dupes++; + continue; // already installed + } + struct st_plugin_int *tmp_plugin_ptr; + if (*(int*)plugin->info < + min_plugin_info_interface_version[plugin->type] || + ((*(int*)plugin->info) >> 8) > + (cur_plugin_info_interface_version[plugin->type] >> 8)) + { + char buf[256]; + strxnmov(buf, sizeof(buf) - 1, "API version for ", + plugin_type_names[plugin->type].str, + " plugin ", tmp.name.str, + " not supported by this version of the server", NullS); + my_error(ER_CANT_OPEN_LIBRARY, MyFlags, dl->str, ENOEXEC, buf); + goto err; + } + + if (plugin_maturity_map[plugin->maturity] < plugin_maturity) + { + char buf[256]; + strxnmov(buf, sizeof(buf) - 1, "Loading of ", + plugin_maturity_names[plugin->maturity], + " plugin ", tmp.name.str, + " is prohibited by --plugin-maturity=", + plugin_maturity_names[plugin_maturity], + NullS); + my_error(ER_CANT_OPEN_LIBRARY, MyFlags, dl->str, EPERM, buf); + goto err; + } + else if (plugin_maturity_map[plugin->maturity] < SERVER_MATURITY_LEVEL) + { + sql_print_warning("Plugin '%s' is of maturity level %s while the server is %s", + tmp.name.str, + plugin_maturity_names[plugin->maturity], + plugin_maturity_names[SERVER_MATURITY_LEVEL]); + } + + tmp.plugin= plugin; + tmp.ref_count= 0; + tmp.state= PLUGIN_IS_UNINITIALIZED; + tmp.load_option= PLUGIN_ON; + + if (!(tmp_plugin_ptr= plugin_insert_or_reuse(&tmp))) + goto err; + if (my_hash_insert(&plugin_hash[plugin->type], (uchar*)tmp_plugin_ptr)) + tmp_plugin_ptr->state= PLUGIN_IS_FREED; + init_alloc_root(key_memory_plugin_int_mem_root, &tmp_plugin_ptr->mem_root, + 4096, 4096, MYF(0)); + + if (name->str) + DBUG_RETURN(INSTALL_GOOD); // all done + + oks++; + tmp.plugin_dl->ref_count++; + continue; // otherwise - go on + +err: + errs++; + if (name->str) + break; + } + + DBUG_ASSERT(!name->str || !dupes); // dupes is ONLY for name->str == 0 + + if (errs == 0 && oks == 0 && !dupes) // no plugin was found + my_error(ER_CANT_FIND_DL_ENTRY, MyFlags, name->str); + + plugin_dl_del(tmp.plugin_dl); + if (errs > 0 || oks + dupes == 0) + DBUG_RETURN(INSTALL_FAIL_NOT_OK); + DBUG_RETURN(INSTALL_GOOD); +} + +static void plugin_variables_deinit(struct st_plugin_int *plugin) +{ + + for (sys_var *var= plugin->system_vars; var; var= var->next) + (*var->test_load)= FALSE; + mysql_del_sys_var_chain(plugin->system_vars); +} + +static void plugin_deinitialize(struct st_plugin_int *plugin, bool ref_check) +{ + /* + we don't want to hold the LOCK_plugin mutex as it may cause + deinitialization to deadlock if plugins have worker threads + with plugin locks + */ + mysql_mutex_assert_not_owner(&LOCK_plugin); + + if (plugin->plugin->status_vars) + { + /* + historical ndb behavior caused MySQL plugins to specify + status var names in full, with the plugin name prefix. + this was never fixed in MySQL. + MariaDB fixes that but supports MySQL style too. + */ + SHOW_VAR *show_vars= plugin->plugin->status_vars; + SHOW_VAR tmp_array[2]= { + {plugin->plugin->name, (char*)plugin->plugin->status_vars, SHOW_ARRAY}, + {0, 0, SHOW_UNDEF} + }; + if (strncasecmp(show_vars->name, plugin->name.str, plugin->name.length)) + show_vars= tmp_array; + + remove_status_vars(show_vars); + } + + plugin_type_init deinit= plugin_type_deinitialize[plugin->plugin->type]; + if (!deinit) + deinit= (plugin_type_init)(plugin->plugin->deinit); + + if (deinit && deinit(plugin)) + { + if (THD *thd= current_thd) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_PLUGIN_BUSY, ER_THD(thd, WARN_PLUGIN_BUSY)); + } + else + plugin->state= PLUGIN_IS_UNINITIALIZED; // free to unload + + if (ref_check && plugin->ref_count) + sql_print_error("Plugin '%s' has ref_count=%d after deinitialization.", + plugin->name.str, plugin->ref_count); + plugin_variables_deinit(plugin); +} + +static void plugin_del(struct st_plugin_int *plugin, uint del_mask) +{ + DBUG_ENTER("plugin_del"); + mysql_mutex_assert_owner(&LOCK_plugin); + del_mask|= PLUGIN_IS_UNINITIALIZED | PLUGIN_IS_DISABLED; // always use these + if (!(plugin->state & del_mask)) + DBUG_VOID_RETURN; + /* Free allocated strings before deleting the plugin. */ + plugin_vars_free_values(plugin->plugin->system_vars); + restore_ptr_backup(plugin->nbackups, plugin->ptr_backup); + if (plugin->plugin_dl) + { + my_hash_delete(&plugin_hash[plugin->plugin->type], (uchar*)plugin); + plugin_dl_del(plugin->plugin_dl); + plugin->state= PLUGIN_IS_FREED; + free_root(&plugin->mem_root, MYF(0)); + } + else + plugin->state= PLUGIN_IS_UNINITIALIZED; + DBUG_VOID_RETURN; +} + +static void reap_plugins(void) +{ + size_t count; + struct st_plugin_int *plugin, **reap, **list; + + mysql_mutex_assert_owner(&LOCK_plugin); + + if (!reap_needed) + return; + + reap_needed= false; + count= plugin_array.elements; + reap= (struct st_plugin_int **)my_alloca(sizeof(plugin)*(count+1)); + *(reap++)= NULL; + + for (uint i=0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + HASH *hash= plugin_hash + plugin_type_initialization_order[i]; + for (uint j= 0; j < hash->records; j++) + { + plugin= (struct st_plugin_int *) my_hash_element(hash, j); + if (plugin->state == PLUGIN_IS_DELETED && !plugin->ref_count) + { + /* change the status flag to prevent reaping by another thread */ + plugin->state= PLUGIN_IS_DYING; + *(reap++)= plugin; + } + } + } + + mysql_mutex_unlock(&LOCK_plugin); + + list= reap; + while ((plugin= *(--list))) + plugin_deinitialize(plugin, true); + + mysql_mutex_lock(&LOCK_plugin); + + while ((plugin= *(--reap))) + plugin_del(plugin, 0); + + my_afree(reap); +} + +static void intern_plugin_unlock(LEX *lex, plugin_ref plugin) +{ + ssize_t i; + st_plugin_int *pi; + DBUG_ENTER("intern_plugin_unlock"); + + mysql_mutex_assert_owner(&LOCK_plugin); + + if (!plugin) + DBUG_VOID_RETURN; + + pi= plugin_ref_to_int(plugin); + +#ifdef DBUG_OFF + if (!pi->plugin_dl) + DBUG_VOID_RETURN; +#else + my_free(plugin); +#endif + + if (lex) + { + /* + Remove one instance of this plugin from the use list. + We are searching backwards so that plugins locked last + could be unlocked faster - optimizing for LIFO semantics. + */ + for (i= lex->plugins.elements - 1; i >= 0; i--) + if (plugin == *dynamic_element(&lex->plugins, i, plugin_ref*)) + { + delete_dynamic_element(&lex->plugins, i); + break; + } + DBUG_ASSERT(i >= 0); + } + + DBUG_ASSERT(pi->ref_count); + pi->ref_count--; + + DBUG_PRINT("lock",("thd: %p plugin: \"%s\" UNLOCK ref_count: %d", + current_thd, pi->name.str, pi->ref_count)); + + if (pi->state == PLUGIN_IS_DELETED && !pi->ref_count) + reap_needed= true; + + DBUG_VOID_RETURN; +} + + +void plugin_unlock(THD *thd, plugin_ref plugin) +{ + LEX *lex= thd ? thd->lex : 0; + DBUG_ENTER("plugin_unlock"); + if (!plugin) + DBUG_VOID_RETURN; +#ifdef DBUG_OFF + /* built-in plugins don't need ref counting */ + if (!plugin_dlib(plugin)) + DBUG_VOID_RETURN; +#endif + mysql_mutex_lock(&LOCK_plugin); + intern_plugin_unlock(lex, plugin); + reap_plugins(); + mysql_mutex_unlock(&LOCK_plugin); + DBUG_VOID_RETURN; +} + + +void plugin_unlock_list(THD *thd, plugin_ref *list, size_t count) +{ + LEX *lex= thd ? thd->lex : 0; + DBUG_ENTER("plugin_unlock_list"); + if (count == 0) + DBUG_VOID_RETURN; + + DBUG_ASSERT(list); + mysql_mutex_lock(&LOCK_plugin); + while (count--) + intern_plugin_unlock(lex, *list++); + reap_plugins(); + mysql_mutex_unlock(&LOCK_plugin); + DBUG_VOID_RETURN; +} + +static void print_init_failed_error(st_plugin_int *p) +{ + sql_print_error("Plugin '%s' registration as a %s failed.", + p->name.str, + plugin_type_names[p->plugin->type].str); +} + +static int plugin_do_initialize(struct st_plugin_int *plugin, uint &state) +{ + DBUG_ENTER("plugin_do_initialize"); + mysql_mutex_assert_not_owner(&LOCK_plugin); + plugin_type_init init= plugin_type_initialize[plugin->plugin->type]; + if (!init) + init= (plugin_type_init) plugin->plugin->init; + if (init) + if (int ret= init(plugin)) + { + /* Plugin init failed and did not requested a retry */ + if (ret != HA_ERR_RETRY_INIT) + print_init_failed_error(plugin); + DBUG_RETURN(ret); + } + state= PLUGIN_IS_READY; // plugin->init() succeeded + + if (plugin->plugin->status_vars) + { + /* + historical ndb behavior caused MySQL plugins to specify + status var names in full, with the plugin name prefix. + this was never fixed in MySQL. + MariaDB fixes that but supports MySQL style too. + */ + SHOW_VAR *show_vars= plugin->plugin->status_vars; + SHOW_VAR tmp_array[2]= {{plugin->plugin->name, + (char *) plugin->plugin->status_vars, SHOW_ARRAY}, + {0, 0, SHOW_UNDEF}}; + if (strncasecmp(show_vars->name, plugin->name.str, plugin->name.length)) + show_vars= tmp_array; + + if (add_status_vars(show_vars)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +static int plugin_initialize(MEM_ROOT *tmp_root, struct st_plugin_int *plugin, + int *argc, char **argv, bool options_only) +{ + int ret= 1; + DBUG_ENTER("plugin_initialize"); + + mysql_mutex_assert_owner(&LOCK_plugin); + uint state= plugin->state; + DBUG_ASSERT(state == PLUGIN_IS_UNINITIALIZED); + + mysql_mutex_unlock(&LOCK_plugin); + + mysql_prlock_wrlock(&LOCK_system_variables_hash); + if (test_plugin_options(tmp_root, plugin, argc, argv)) + state= PLUGIN_IS_DISABLED; + mysql_prlock_unlock(&LOCK_system_variables_hash); + + if (options_only || state == PLUGIN_IS_DISABLED) + { + ret= !options_only && plugin_is_forced(plugin); + state= PLUGIN_IS_DISABLED; + } + else + ret= plugin_do_initialize(plugin, state); + + if (ret) + plugin_variables_deinit(plugin); + + mysql_mutex_lock(&LOCK_plugin); + plugin->state= state; + + DBUG_RETURN(ret); +} + + +extern "C" uchar *get_plugin_hash_key(const uchar *, size_t *, my_bool); +extern "C" uchar *get_bookmark_hash_key(const uchar *, size_t *, my_bool); + + +uchar *get_plugin_hash_key(const uchar *buff, size_t *length, + my_bool not_used __attribute__((unused))) +{ + struct st_plugin_int *plugin= (st_plugin_int *)buff; + *length= (uint)plugin->name.length; + return((uchar *)plugin->name.str); +} + + +uchar *get_bookmark_hash_key(const uchar *buff, size_t *length, + my_bool not_used __attribute__((unused))) +{ + struct st_bookmark *var= (st_bookmark *)buff; + *length= var->name_len + 1; + return (uchar*) var->key; +} + +static inline void convert_dash_to_underscore(char *str, size_t len) +{ + for (char *p= str; p <= str+len; p++) + if (*p == '-') + *p= '_'; +} + +static inline void convert_underscore_to_dash(char *str, size_t len) +{ + for (char *p= str; p <= str+len; p++) + if (*p == '_') + *p= '-'; +} + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_LOCK_plugin; + +static PSI_mutex_info all_plugin_mutexes[]= +{ + { &key_LOCK_plugin, "LOCK_plugin", PSI_FLAG_GLOBAL} +}; + +static PSI_memory_info all_plugin_memory[]= +{ + { &key_memory_plugin_mem_root, "plugin_mem_root", PSI_FLAG_GLOBAL}, + { &key_memory_plugin_int_mem_root, "plugin_int_mem_root", 0}, + { &key_memory_mysql_plugin_dl, "mysql_plugin_dl", 0}, + { &key_memory_mysql_plugin, "mysql_plugin", 0}, + { &key_memory_plugin_bookmark, "plugin_bookmark", PSI_FLAG_GLOBAL} +}; + +static void init_plugin_psi_keys(void) +{ + const char* category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_plugin_mutexes); + PSI_server->register_mutex(category, all_plugin_mutexes, count); + + count= array_elements(all_plugin_memory); + mysql_memory_register(category, all_plugin_memory, count); +} +#else +static void init_plugin_psi_keys(void) {} +#endif /* HAVE_PSI_INTERFACE */ + +/* + The logic is that we first load and initialize all compiled in plugins. + From there we load up the dynamic types (assuming we have not been told to + skip this part). + + Finally we initialize everything, aka the dynamic that have yet to initialize. +*/ +int plugin_init(int *argc, char **argv, int flags) +{ + size_t i; + struct st_maria_plugin **builtins; + struct st_maria_plugin *plugin; + struct st_plugin_int tmp, *plugin_ptr, **reap, **retry_end, **retry_start; + MEM_ROOT tmp_root; + bool reaped_mandatory_plugin= false; + bool mandatory= true; + I_List_iterator opt_plugin_load_list_iter(opt_plugin_load_list); + char plugin_table_engine_name_buf[NAME_CHAR_LEN + 1]; + LEX_CSTRING plugin_table_engine_name= { plugin_table_engine_name_buf, 0 }; + LEX_CSTRING MyISAM= { STRING_WITH_LEN("MyISAM") }; + DBUG_ENTER("plugin_init"); + + if (initialized) + DBUG_RETURN(0); + + dlopen_count =0; + + init_plugin_psi_keys(); + + init_alloc_root(key_memory_plugin_mem_root, &plugin_mem_root, 4096, 4096, MYF(0)); + init_alloc_root(key_memory_plugin_mem_root, &plugin_vars_mem_root, 4096, 4096, MYF(0)); + init_alloc_root(PSI_NOT_INSTRUMENTED, &tmp_root, 4096, 4096, MYF(0)); + + if (my_hash_init(key_memory_plugin_bookmark, &bookmark_hash, &my_charset_bin, 32, 0, 0, + get_bookmark_hash_key, NULL, HASH_UNIQUE)) + goto err; + + /* + The 80 is from 2016-04-27 when we had 71 default plugins + Big enough to avoid many mallocs even in future + */ + if (my_init_dynamic_array(key_memory_mysql_plugin_dl, &plugin_dl_array, + sizeof(struct st_plugin_dl *), 16, 16, MYF(0)) || + my_init_dynamic_array(key_memory_mysql_plugin, &plugin_array, + sizeof(struct st_plugin_int *), 80, 32, MYF(0))) + goto err; + + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + if (my_hash_init(key_memory_plugin_mem_root, &plugin_hash[i], system_charset_info, 32, 0, 0, + get_plugin_hash_key, NULL, HASH_UNIQUE)) + goto err; + } + + /* prepare debug_sync service */ + DBUG_ASSERT(strcmp(list_of_services[1].name, "debug_sync_service") == 0); + list_of_services[1].service= *(void**)&debug_sync_C_callback_ptr; + + /* prepare encryption_keys service */ + finalize_encryption_plugin(0); + + mysql_mutex_lock(&LOCK_plugin); + + initialized= 1; + + /* + First we register builtin plugins + */ + if (global_system_variables.log_warnings >= 9) + sql_print_information("Initializing built-in plugins"); + + for (builtins= mysql_mandatory_plugins; *builtins || mandatory; builtins++) + { + if (!*builtins) + { + builtins= mysql_optional_plugins; + mandatory= false; + if (!*builtins) + break; + } + for (plugin= *builtins; plugin->info; plugin++) + { + if (opt_ignore_builtin_innodb && + !my_charset_latin1.strnncoll(plugin->name, 6, "InnoDB", 6)) + continue; + + bzero(&tmp, sizeof(tmp)); + tmp.plugin= plugin; + tmp.name.str= (char *)plugin->name; + tmp.name.length= strlen(plugin->name); + tmp.state= 0; + tmp.load_option= mandatory ? PLUGIN_FORCE : PLUGIN_ON; + + for (i=0; i < array_elements(override_plugin_load_policy); i++) + { + if (!my_strcasecmp(&my_charset_latin1, plugin->name, + override_plugin_load_policy[i].plugin_name)) + { + tmp.load_option= override_plugin_load_policy[i].override; + break; + } + } + + free_root(&tmp_root, MYF(MY_MARK_BLOCKS_FREE)); + tmp.state= PLUGIN_IS_UNINITIALIZED; + if (register_builtin(plugin, &tmp, &plugin_ptr)) + goto err_unlock; + } + } + + /* + First, we initialize only MyISAM - that should almost always succeed + (almost always, because plugins can be loaded outside of the server, too). + */ + plugin_ptr= plugin_find_internal(&MyISAM, MYSQL_STORAGE_ENGINE_PLUGIN); + DBUG_ASSERT(plugin_ptr || !mysql_mandatory_plugins[0]); + if (plugin_ptr) + { + DBUG_ASSERT(plugin_ptr->load_option == PLUGIN_FORCE); + + if (plugin_initialize(&tmp_root, plugin_ptr, argc, argv, false)) + goto err_unlock; + + /* + set the global default storage engine variable so that it will + not be null in any child thread. + */ + global_system_variables.table_plugin = + intern_plugin_lock(NULL, plugin_int_to_ref(plugin_ptr)); + DBUG_SLOW_ASSERT(plugin_ptr->ref_count == 1); + } + mysql_mutex_unlock(&LOCK_plugin); + + /* Register (not initialize!) all dynamic plugins */ + if (global_system_variables.log_warnings >= 9) + sql_print_information("Initializing plugins specified on the command line"); + while (i_string *item= opt_plugin_load_list_iter++) + plugin_load_list(&tmp_root, item->ptr); + + if (!(flags & PLUGIN_INIT_SKIP_PLUGIN_TABLE)) + { + char path[FN_REFLEN + 1]; + build_table_filename(path, sizeof(path) - 1, "mysql", "plugin", reg_ext, 0); + Table_type ttype= dd_frm_type(0, path, &plugin_table_engine_name, + NULL, NULL); + if (ttype != TABLE_TYPE_NORMAL) + plugin_table_engine_name=empty_clex_str; + } + + /* + Now we initialize all remaining plugins + */ + + mysql_mutex_lock(&LOCK_plugin); + /* List of plugins to reap */ + reap= (st_plugin_int **) my_alloca((plugin_array.elements+1) * sizeof(void*)); + *(reap++)= NULL; + /* List of plugins to retry */ + retry_start= retry_end= + (st_plugin_int **) my_alloca((plugin_array.elements+1) * sizeof(void*)); + + for(;;) + { + int error; + for (i=0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + HASH *hash= plugin_hash + plugin_type_initialization_order[i]; + for (uint idx= 0; idx < hash->records; idx++) + { + plugin_ptr= (struct st_plugin_int *) my_hash_element(hash, idx); + if (plugin_ptr->state == PLUGIN_IS_UNINITIALIZED) + { + bool plugin_table_engine= lex_string_eq(&plugin_table_engine_name, + &plugin_ptr->name); + bool opts_only= flags & PLUGIN_INIT_SKIP_INITIALIZATION && + (flags & PLUGIN_INIT_SKIP_PLUGIN_TABLE || + !plugin_table_engine); + error= plugin_initialize(&tmp_root, plugin_ptr, argc, argv, + opts_only); + if (error) + { + plugin_ptr->state= PLUGIN_IS_DYING; + /* The plugin wants a retry of the initialisation, + possibly due to dependency on other plugins */ + if (unlikely(error == HA_ERR_RETRY_INIT)) + *(retry_end++)= plugin_ptr; + else + *(reap++)= plugin_ptr; + } + } + } + } + /* Retry plugins that asked for it */ + while (retry_start < retry_end) + { + st_plugin_int **to_re_retry, **retrying; + for (to_re_retry= retrying= retry_start; retrying < retry_end; retrying++) + { + plugin_ptr= *retrying; + uint state= plugin_ptr->state; + mysql_mutex_unlock(&LOCK_plugin); + error= plugin_do_initialize(plugin_ptr, state); + mysql_mutex_lock(&LOCK_plugin); + plugin_ptr->state= state; + if (error == HA_ERR_RETRY_INIT) + *(to_re_retry++)= plugin_ptr; + else if (error) + *(reap++)= plugin_ptr; + } + /* If the retry list has not changed, i.e. if all retry attempts + result in another retry request, empty the retry list */ + if (to_re_retry == retry_end) + while (to_re_retry > retry_start) + { + plugin_ptr= *(--to_re_retry); + *(reap++)= plugin_ptr; + /** `plugin_do_initialize()' did not print any error in this + case, so we do it here. */ + print_init_failed_error(plugin_ptr); + } + retry_end= to_re_retry; + } + + /* load and init plugins from the plugin table (unless done already) */ + if (flags & PLUGIN_INIT_SKIP_PLUGIN_TABLE) + break; + + mysql_mutex_unlock(&LOCK_plugin); + plugin_load(&tmp_root); + flags|= PLUGIN_INIT_SKIP_PLUGIN_TABLE; + mysql_mutex_lock(&LOCK_plugin); + } + + /* + Check if any plugins have to be reaped + */ + while ((plugin_ptr= *(--reap))) + { + mysql_mutex_unlock(&LOCK_plugin); + if (plugin_is_forced(plugin_ptr)) + reaped_mandatory_plugin= TRUE; + plugin_deinitialize(plugin_ptr, true); + mysql_mutex_lock(&LOCK_plugin); + plugin_del(plugin_ptr, 0); + } + + mysql_mutex_unlock(&LOCK_plugin); + my_afree(retry_start); + my_afree(reap); + if (reaped_mandatory_plugin && !opt_help) + goto err; + + free_root(&tmp_root, MYF(0)); + + DBUG_RETURN(0); + +err_unlock: + mysql_mutex_unlock(&LOCK_plugin); +err: + free_root(&tmp_root, MYF(0)); + DBUG_RETURN(1); +} + + +static bool register_builtin(struct st_maria_plugin *plugin, + struct st_plugin_int *tmp, + struct st_plugin_int **ptr) +{ + DBUG_ENTER("register_builtin"); + tmp->ref_count= 0; + tmp->plugin_dl= 0; + + if (insert_dynamic(&plugin_array, (uchar*)&tmp)) + DBUG_RETURN(1); + + *ptr= *dynamic_element(&plugin_array, plugin_array.elements - 1, + struct st_plugin_int **)= + (struct st_plugin_int *) memdup_root(&plugin_mem_root, (uchar*)tmp, + sizeof(struct st_plugin_int)); + + if (my_hash_insert(&plugin_hash[plugin->type],(uchar*) *ptr)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + called only by plugin_init() +*/ +static void plugin_load(MEM_ROOT *tmp_root) +{ + TABLE_LIST tables; + TABLE *table; + READ_RECORD read_record_info; + int error; + THD *new_thd= new THD(0); + bool result; + unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = + { MYSQL_AUDIT_GENERAL_CLASSMASK }; + DBUG_ENTER("plugin_load"); + + if (global_system_variables.log_warnings >= 9) + sql_print_information("Initializing installed plugins"); + + new_thd->thread_stack= (char*) &tables; + new_thd->store_globals(); + new_thd->db= MYSQL_SCHEMA_NAME; + bzero((char*) &new_thd->net, sizeof(new_thd->net)); + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_PLUGIN_NAME, 0, TL_READ); + tables.open_strategy= TABLE_LIST::OPEN_NORMAL; + + result= open_and_lock_tables(new_thd, &tables, FALSE, MYSQL_LOCK_IGNORE_TIMEOUT); + + table= tables.table; + if (result) + { + DBUG_PRINT("error",("Can't open plugin table")); + if (!opt_help) + sql_print_error("Could not open mysql.plugin table: \"%s\". " + "Some plugins may be not loaded", + new_thd->get_stmt_da()->message()); + else + sql_print_warning("Could not open mysql.plugin table: \"%s\". " + "Some options may be missing from the help text", + new_thd->get_stmt_da()->message()); + goto end; + } + + if (init_read_record(&read_record_info, new_thd, table, NULL, NULL, 1, 0, + FALSE)) + { + sql_print_error("Could not initialize init_read_record; Plugins not " + "loaded"); + goto end; + } + table->use_all_columns(); + while (!(error= read_record_info.read_record())) + { + DBUG_PRINT("info", ("init plugin record")); + String str_name, str_dl; + get_field(tmp_root, table->field[0], &str_name); + get_field(tmp_root, table->field[1], &str_dl); + + LEX_CSTRING name= {str_name.ptr(), str_name.length()}; + LEX_CSTRING dl= {str_dl.ptr(), str_dl.length()}; + + if (!name.length || !dl.length) + continue; + + /* + Pre-acquire audit plugins for events that may potentially occur + during [UN]INSTALL PLUGIN. + + When audit event is triggered, audit subsystem acquires interested + plugins by walking through plugin list. Evidently plugin list + iterator protects plugin list by acquiring LOCK_plugin, see + plugin_foreach_with_mask(). + + On the other hand plugin_load is acquiring LOCK_plugin + rather for a long time. + + When audit event is triggered during plugin_load plugin + list iterator acquires the same lock (within the same thread) + second time. + + This hack should be removed when LOCK_plugin is fixed so it + protects only what it supposed to protect. + + See also mysql_install_plugin(), mysql_uninstall_plugin() and + initialize_audit_plugin() + */ + if (mysql_audit_general_enabled()) + mysql_audit_acquire_plugins(new_thd, event_class_mask); + + /* + there're no other threads running yet, so we don't need a mutex. + but plugin_add() before is designed to work in multi-threaded + environment, and it uses mysql_mutex_assert_owner(), so we lock + the mutex here to satisfy the assert + */ + mysql_mutex_lock(&LOCK_plugin); + plugin_add(tmp_root, false, &name, &dl, MYF(ME_ERROR_LOG)); + free_root(tmp_root, MYF(MY_MARK_BLOCKS_FREE)); + mysql_mutex_unlock(&LOCK_plugin); + } + if (unlikely(error > 0)) + sql_print_error(ER_THD(new_thd, ER_GET_ERRNO), my_errno, + table->file->table_type()); + end_read_record(&read_record_info); + table->mark_table_for_reopen(); + close_mysql_tables(new_thd); +end: + new_thd->db= null_clex_str; // Avoid free on thd->db + delete new_thd; + DBUG_VOID_RETURN; +} + + +/* + called only by plugin_init() +*/ +static bool plugin_load_list(MEM_ROOT *tmp_root, const char *list) +{ + char buffer[FN_REFLEN]; + LEX_CSTRING name= {buffer, 0}, dl= {NULL, 0}, *str= &name; + char *p= buffer; + DBUG_ENTER("plugin_load_list"); + while (list) + { + if (p == buffer + sizeof(buffer) - 1) + { + sql_print_error("plugin-load parameter too long"); + DBUG_RETURN(TRUE); + } + + switch ((*(p++)= *(list++))) { + case '\0': + list= NULL; /* terminate the loop */ + /* fall through */ + case ';': +#ifndef _WIN32 + case ':': /* can't use this as delimiter as it may be drive letter */ +#endif + p[-1]= 0; + if (str == &name) // load all plugins in named module + { + if (!name.length) + { + p--; /* reset pointer */ + continue; + } + + dl= name; + mysql_mutex_lock(&LOCK_plugin); + free_root(tmp_root, MYF(MY_MARK_BLOCKS_FREE)); + name.str= 0; // load everything + if (plugin_add(tmp_root, false, &name, &dl, + MYF(ME_ERROR_LOG)) != INSTALL_GOOD) + goto error; + } + else + { + free_root(tmp_root, MYF(MY_MARK_BLOCKS_FREE)); + mysql_mutex_lock(&LOCK_plugin); + if (plugin_add(tmp_root, false, &name, &dl, + MYF(ME_ERROR_LOG)) != INSTALL_GOOD) + goto error; + } + mysql_mutex_unlock(&LOCK_plugin); + name.length= dl.length= 0; + dl.str= NULL; name.str= p= buffer; + str= &name; + continue; + case '=': + case '#': + if (str == &name) + { + p[-1]= 0; + str= &dl; + str->str= p; + continue; + } + /* fall through */ + default: + str->length++; + continue; + } + } + DBUG_RETURN(FALSE); +error: + mysql_mutex_unlock(&LOCK_plugin); + if (name.str) + sql_print_error("Couldn't load plugin '%s' from '%s'.", + name.str, dl.str); + else + sql_print_error("Couldn't load plugins from '%s'.", dl.str); + DBUG_RETURN(TRUE); +} + + +void plugin_shutdown(void) +{ + size_t i, count= plugin_array.elements; + struct st_plugin_int **plugins, *plugin; + struct st_plugin_dl **dl; + DBUG_ENTER("plugin_shutdown"); + + if (initialized) + { + if (opt_gtid_pos_auto_plugins) + { + free_engine_list(opt_gtid_pos_auto_plugins); + opt_gtid_pos_auto_plugins= NULL; + } + + mysql_mutex_lock(&LOCK_plugin); + + reap_needed= true; + + /* + We want to shut down plugins in a reasonable order, this will + become important when we have plugins which depend upon each other. + Circular references cannot be reaped so they are forced afterwards. + TODO: Have an additional step here to notify all active plugins that + shutdown is requested to allow plugins to deinitialize in parallel. + */ + while (reap_needed && (count= plugin_array.elements)) + { + reap_plugins(); + for (i= 0; i < count; i++) + { + plugin= *dynamic_element(&plugin_array, i, struct st_plugin_int **); + if (plugin->state == PLUGIN_IS_READY) + { + plugin->state= PLUGIN_IS_DELETED; + reap_needed= true; + } + } + if (!reap_needed) + { + /* + release any plugin references held. + */ + unlock_variables(NULL, &global_system_variables); + unlock_variables(NULL, &max_system_variables); + } + } + + plugins= (struct st_plugin_int **) my_alloca(sizeof(void*) * (count+1)); + + /* + If we have any plugins which did not die cleanly, we force shutdown. + Don't re-deinit() plugins that failed deinit() earlier (already dying) + */ + for (i= 0; i < count; i++) + { + plugins[i]= *dynamic_element(&plugin_array, i, struct st_plugin_int **); + if (plugins[i]->state == PLUGIN_IS_DYING) + plugins[i]->state= PLUGIN_IS_UNINITIALIZED; + if (plugins[i]->state == PLUGIN_IS_DELETED) + plugins[i]->state= PLUGIN_IS_DYING; + } + mysql_mutex_unlock(&LOCK_plugin); + + /* + We loop through all plugins and call deinit() if they have one. + */ + for (i= 0; i < count; i++) + if (!(plugins[i]->state & (PLUGIN_IS_UNINITIALIZED | PLUGIN_IS_FREED | + PLUGIN_IS_DISABLED))) + { + /* + We are forcing deinit on plugins so we don't want to do a ref_count + check until we have processed all the plugins. + */ + plugin_deinitialize(plugins[i], false); + } + + /* + It's perfectly safe not to lock LOCK_plugin, as there're no + concurrent threads anymore. But some functions called from here + use mysql_mutex_assert_owner(), so we lock the mutex to satisfy it + */ + mysql_mutex_lock(&LOCK_plugin); + + /* + We defer checking ref_counts until after all plugins are deinitialized + as some may have worker threads holding on to plugin references. + */ + for (i= 0; i < count; i++) + { + if (plugins[i]->ref_count) + sql_print_error("Plugin '%s' has ref_count=%d after shutdown.", + plugins[i]->name.str, plugins[i]->ref_count); + plugin_del(plugins[i], PLUGIN_IS_DYING); + } + + /* + Now we can deallocate all memory. + */ + + cleanup_variables(&global_system_variables); + cleanup_variables(&max_system_variables); + mysql_mutex_unlock(&LOCK_plugin); + + initialized= 0; + mysql_mutex_destroy(&LOCK_plugin); + + my_afree(plugins); + } + + /* Dispose of the memory */ + + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + my_hash_free(&plugin_hash[i]); + delete_dynamic(&plugin_array); + + count= plugin_dl_array.elements; + dl= (struct st_plugin_dl **)my_alloca(sizeof(void*) * count); + for (i= 0; i < count; i++) + dl[i]= *dynamic_element(&plugin_dl_array, i, struct st_plugin_dl **); + for (i= 0; i < plugin_dl_array.elements; i++) + free_plugin_mem(dl[i]); + my_afree(dl); + delete_dynamic(&plugin_dl_array); + + my_hash_free(&bookmark_hash); + free_root(&plugin_mem_root, MYF(0)); + free_root(&plugin_vars_mem_root, MYF(0)); + + global_variables_dynamic_size= 0; + + DBUG_VOID_RETURN; +} + +/** + complete plugin installation (after plugin_add). + + That is, initialize it, and update mysql.plugin table +*/ +static bool finalize_install(THD *thd, TABLE *table, const LEX_CSTRING *name, + int *argc, char **argv) +{ + struct st_plugin_int *tmp= plugin_find_internal(name, MYSQL_ANY_PLUGIN); + int error; + DBUG_ASSERT(tmp); + mysql_mutex_assert_owner(&LOCK_plugin); // because of tmp->state + + if (tmp->state != PLUGIN_IS_UNINITIALIZED) + { + /* already installed */ + return 0; + } + else + { + if (plugin_initialize(thd->mem_root, tmp, argc, argv, false)) + { + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), name->str, + "Plugin initialization function failed."); + tmp->state= PLUGIN_IS_DELETED; + return 1; + } + } + if (tmp->state == PLUGIN_IS_DISABLED) + { + if (global_system_variables.log_warnings) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CANT_INITIALIZE_UDF, + ER_THD(thd, ER_CANT_INITIALIZE_UDF), + name->str, "Plugin is disabled"); + } + + /* + We do not replicate the INSTALL PLUGIN statement. Disable binlogging + of the insert into the plugin table, so that it is not replicated in + row based mode. + */ + DBUG_ASSERT(!table->file->row_logging); + table->use_all_columns(); + restore_record(table, s->default_values); + table->field[0]->store(name->str, name->length, system_charset_info); + table->field[1]->store(tmp->plugin_dl->dl.str, tmp->plugin_dl->dl.length, + files_charset_info); + error= table->file->ha_write_row(table->record[0]); + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + tmp->state= PLUGIN_IS_DELETED; + return 1; + } + return 0; +} + +bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name, + const LEX_CSTRING *dl_arg) +{ + TABLE_LIST tables; + TABLE *table; + LEX_CSTRING dl= *dl_arg; + enum install_status error; + int argc=orig_argc; + char **argv=orig_argv; + unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = + { MYSQL_AUDIT_GENERAL_CLASSMASK }; + DBUG_ENTER("mysql_install_plugin"); + + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_PLUGIN_NAME, 0, TL_WRITE); + if (!opt_noacl && check_table_access(thd, INSERT_ACL, &tables, FALSE, 1, FALSE)) + DBUG_RETURN(TRUE); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* need to open before acquiring LOCK_plugin or it will deadlock */ + if (! (table = open_ltable(thd, &tables, TL_WRITE, + MYSQL_LOCK_IGNORE_TIMEOUT))) + DBUG_RETURN(TRUE); + + if (my_load_defaults(MYSQL_CONFIG_NAME, load_default_groups, &argc, &argv, NULL)) + { + my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), name->str); + DBUG_RETURN(TRUE); + } + + /* + Pre-acquire audit plugins for events that may potentially occur + during [UN]INSTALL PLUGIN. + + When audit event is triggered, audit subsystem acquires interested + plugins by walking through plugin list. Evidently plugin list + iterator protects plugin list by acquiring LOCK_plugin, see + plugin_foreach_with_mask(). + + On the other hand [UN]INSTALL PLUGIN is acquiring LOCK_plugin + rather for a long time. + + When audit event is triggered during [UN]INSTALL PLUGIN, plugin + list iterator acquires the same lock (within the same thread) + second time. + + This hack should be removed when LOCK_plugin is fixed so it + protects only what it supposed to protect. + + See also mysql_uninstall_plugin() and initialize_audit_plugin() + */ + if (mysql_audit_general_enabled()) + mysql_audit_acquire_plugins(thd, event_class_mask); + + mysql_mutex_lock(&LOCK_plugin); + DEBUG_SYNC(thd, "acquired_LOCK_plugin"); + error= plugin_add(thd->mem_root, thd->lex->create_info.if_not_exists(), + name, &dl, MYF(0)); + if (unlikely(error != INSTALL_GOOD)) + goto err; + + if (name->str) + error= finalize_install(thd, table, name, &argc, argv) + ? INSTALL_FAIL_NOT_OK : INSTALL_GOOD; + else + { + st_plugin_dl *plugin_dl= plugin_dl_find(&dl); + struct st_maria_plugin *plugin; + for (plugin= plugin_dl->plugins; plugin->info; plugin++) + { + LEX_CSTRING str= { plugin->name, strlen(plugin->name) }; + if (finalize_install(thd, table, &str, &argc, argv)) + error= INSTALL_FAIL_NOT_OK; + } + } + + if (unlikely(error != INSTALL_GOOD)) + { + reap_needed= true; + reap_plugins(); + } +err: + global_plugin_version++; + mysql_mutex_unlock(&LOCK_plugin); + if (argv) + free_defaults(argv); + DBUG_RETURN(error == INSTALL_FAIL_NOT_OK); +#ifdef WITH_WSREP +wsrep_error_label: + DBUG_RETURN(true); +#endif +} + + +static bool do_uninstall(THD *thd, TABLE *table, const LEX_CSTRING *name) +{ + struct st_plugin_int *plugin; + mysql_mutex_assert_owner(&LOCK_plugin); + + if (!(plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN)) || + plugin->state & (PLUGIN_IS_UNINITIALIZED | PLUGIN_IS_DYING)) + { + // maybe plugin is present in mysql.plugin; postpone the error + plugin= nullptr; + } + + if (plugin) + { + if (!plugin->plugin_dl) + { + my_error(ER_PLUGIN_DELETE_BUILTIN, MYF(0)); + return 1; + } + if (plugin->load_option == PLUGIN_FORCE_PLUS_PERMANENT) + { + my_error(ER_PLUGIN_IS_PERMANENT, MYF(0), name->str); + return 1; + } + + plugin->state= PLUGIN_IS_DELETED; + if (plugin->ref_count) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_PLUGIN_BUSY, ER_THD(thd, WARN_PLUGIN_BUSY)); + else + reap_needed= true; + } + + uchar user_key[MAX_KEY_LENGTH]; + table->use_all_columns(); + table->field[0]->store(name->str, name->length, system_charset_info); + key_copy(user_key, table->record[0], table->key_info, + table->key_info->key_length); + if (! table->file->ha_index_read_idx_map(table->record[0], 0, user_key, + HA_WHOLE_KEY, HA_READ_KEY_EXACT)) + { + int error; + /* + We do not replicate the UNINSTALL PLUGIN statement. Disable binlogging + of the delete from the plugin table, so that it is not replicated in + row based mode. + */ + table->file->row_logging= 0; // No logging + error= table->file->ha_delete_row(table->record[0]); + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + return 1; + } + } + else if (!plugin) + { + const myf MyFlags= thd->lex->if_exists() ? ME_NOTE : 0; + my_error(ER_SP_DOES_NOT_EXIST, MyFlags, "PLUGIN", name->str); + return !MyFlags; + } + return 0; +} + + +bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, + const LEX_CSTRING *dl_arg) +{ + TABLE *table; + TABLE_LIST tables; + LEX_CSTRING dl= *dl_arg; + bool error= false; + unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = + { MYSQL_AUDIT_GENERAL_CLASSMASK }; + DBUG_ENTER("mysql_uninstall_plugin"); + + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_PLUGIN_NAME, 0, TL_WRITE); + + if (!opt_noacl && check_table_access(thd, DELETE_ACL, &tables, FALSE, 1, FALSE)) + DBUG_RETURN(TRUE); + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* need to open before acquiring LOCK_plugin or it will deadlock */ + if (! (table= open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT))) + DBUG_RETURN(TRUE); + + if (!table->key_info) + { + my_printf_error(ER_UNKNOWN_ERROR, + "The table %s.%s has no primary key. " + "Please check the table definition and " + "create the primary key accordingly.", MYF(0), + table->s->db.str, table->s->table_name.str); + DBUG_RETURN(TRUE); + } + + /* + Pre-acquire audit plugins for events that may potentially occur + during [UN]INSTALL PLUGIN. + + When audit event is triggered, audit subsystem acquires interested + plugins by walking through plugin list. Evidently plugin list + iterator protects plugin list by acquiring LOCK_plugin, see + plugin_foreach_with_mask(). + + On the other hand [UN]INSTALL PLUGIN is acquiring LOCK_plugin + rather for a long time. + + When audit event is triggered during [UN]INSTALL PLUGIN, plugin + list iterator acquires the same lock (within the same thread) + second time. + + This hack should be removed when LOCK_plugin is fixed so it + protects only what it supposed to protect. + + See also mysql_install_plugin() and initialize_audit_plugin() + */ + if (mysql_audit_general_enabled()) + mysql_audit_acquire_plugins(thd, event_class_mask); + + mysql_mutex_lock(&LOCK_plugin); + + if (name->str) + error= do_uninstall(thd, table, name); + else + { + fix_dl_name(thd->mem_root, &dl); + st_plugin_dl *plugin_dl= plugin_dl_find(&dl); + if (plugin_dl) + { + for (struct st_maria_plugin *plugin= plugin_dl->plugins; + plugin->info; plugin++) + { + LEX_CSTRING str= { plugin->name, strlen(plugin->name) }; + error|= do_uninstall(thd, table, &str); + } + } + else + { + myf MyFlags= thd->lex->if_exists() ? ME_NOTE : 0; + my_error(ER_SP_DOES_NOT_EXIST, MyFlags, "SONAME", dl.str); + error|= !MyFlags; + } + } + reap_plugins(); + + global_plugin_version++; + mysql_mutex_unlock(&LOCK_plugin); + DBUG_RETURN(error); +#ifdef WITH_WSREP +wsrep_error_label: + DBUG_RETURN(true); +#endif +} + + +bool plugin_foreach_with_mask(THD *thd, plugin_foreach_func *func, + int type, uint state_mask, void *arg) +{ + size_t idx, total= 0; + struct st_plugin_int *plugin; + plugin_ref *plugins; + my_bool res= FALSE; + DBUG_ENTER("plugin_foreach_with_mask"); + + if (!initialized) + DBUG_RETURN(FALSE); + + mysql_mutex_lock(&LOCK_plugin); + /* + Do the alloca out here in case we do have a working alloca: + leaving the nested stack frame invalidates alloca allocation. + */ + if (type == MYSQL_ANY_PLUGIN) + { + plugins= (plugin_ref*) my_alloca(plugin_array.elements * sizeof(plugin_ref)); + for (idx= 0; idx < plugin_array.elements; idx++) + { + plugin= *dynamic_element(&plugin_array, idx, struct st_plugin_int **); + if ((plugins[total]= intern_plugin_lock(0, plugin_int_to_ref(plugin), + state_mask))) + total++; + } + } + else + { + HASH *hash= plugin_hash + type; + plugins= (plugin_ref*) my_alloca(hash->records * sizeof(plugin_ref)); + for (idx= 0; idx < hash->records; idx++) + { + plugin= (struct st_plugin_int *) my_hash_element(hash, idx); + if ((plugins[total]= intern_plugin_lock(0, plugin_int_to_ref(plugin), + state_mask))) + total++; + } + } + mysql_mutex_unlock(&LOCK_plugin); + + for (idx= 0; idx < total; idx++) + { + /* It will stop iterating on first engine error when "func" returns TRUE */ + if ((res= func(thd, plugins[idx], arg))) + break; + } + + plugin_unlock_list(0, plugins, total); + my_afree(plugins); + DBUG_RETURN(res); +} + + +static bool plugin_dl_foreach_internal(THD *thd, st_plugin_dl *plugin_dl, + st_maria_plugin *plug, + plugin_foreach_func *func, void *arg) +{ + for (; plug->name; plug++) + { + st_plugin_int tmp, *plugin; + + tmp.name.str= const_cast(plug->name); + tmp.name.length= strlen(plug->name); + tmp.plugin= plug; + tmp.plugin_dl= plugin_dl; + + mysql_mutex_lock(&LOCK_plugin); + if ((plugin= plugin_find_internal(&tmp.name, plug->type)) && + plugin->plugin == plug) + + { + tmp.state= plugin->state; + tmp.load_option= plugin->load_option; + } + else + { + tmp.state= PLUGIN_IS_FREED; + tmp.load_option= PLUGIN_OFF; + } + mysql_mutex_unlock(&LOCK_plugin); + + plugin= &tmp; + if (func(thd, plugin_int_to_ref(plugin), arg)) + return 1; + } + return 0; +} + +bool plugin_dl_foreach(THD *thd, const LEX_CSTRING *dl, + plugin_foreach_func *func, void *arg) +{ + bool err= 0; + + if (dl) + { + mysql_mutex_lock(&LOCK_plugin); + st_plugin_dl *plugin_dl= plugin_dl_add(dl, MYF(0)); + mysql_mutex_unlock(&LOCK_plugin); + + if (!plugin_dl) + return 1; + + err= plugin_dl_foreach_internal(thd, plugin_dl, plugin_dl->plugins, + func, arg); + + mysql_mutex_lock(&LOCK_plugin); + plugin_dl_del(plugin_dl); + mysql_mutex_unlock(&LOCK_plugin); + } + else + { + struct st_maria_plugin **builtins; + for (builtins= mysql_mandatory_plugins; !err && *builtins; builtins++) + err= plugin_dl_foreach_internal(thd, 0, *builtins, func, arg); + for (builtins= mysql_optional_plugins; !err && *builtins; builtins++) + err= plugin_dl_foreach_internal(thd, 0, *builtins, func, arg); + } + return err; +} + + +/**************************************************************************** + Internal type declarations for variables support +****************************************************************************/ + +#undef MYSQL_SYSVAR_NAME +#define MYSQL_SYSVAR_NAME(name) name +#define PLUGIN_VAR_TYPEMASK 0x7f +#define BOOKMARK_MEMALLOC 0x80 + +static inline char plugin_var_bookmark_key(uint flags) +{ + return (flags & PLUGIN_VAR_TYPEMASK) | + (flags & PLUGIN_VAR_MEMALLOC ? BOOKMARK_MEMALLOC : 0); +} + +#define EXTRA_OPTIONS 3 /* options for: 'foo', 'plugin-foo' and NULL */ + +typedef DECLARE_MYSQL_SYSVAR_BASIC(sysvar_bool_t, my_bool); +typedef DECLARE_MYSQL_THDVAR_BASIC(thdvar_bool_t, my_bool); +typedef DECLARE_MYSQL_SYSVAR_BASIC(sysvar_str_t, char *); +typedef DECLARE_MYSQL_THDVAR_BASIC(thdvar_str_t, char *); + +typedef DECLARE_MYSQL_SYSVAR_TYPELIB(sysvar_enum_t, unsigned long); +typedef DECLARE_MYSQL_THDVAR_TYPELIB(thdvar_enum_t, unsigned long); +typedef DECLARE_MYSQL_SYSVAR_TYPELIB(sysvar_set_t, ulonglong); +typedef DECLARE_MYSQL_THDVAR_TYPELIB(thdvar_set_t, ulonglong); + +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_int_t, int); +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_long_t, long); +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_longlong_t, longlong); +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_uint_t, uint); +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_ulong_t, ulong); +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_ulonglong_t, ulonglong); +typedef DECLARE_MYSQL_SYSVAR_SIMPLE(sysvar_double_t, double); + +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_int_t, int); +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_long_t, long); +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_longlong_t, longlong); +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_uint_t, uint); +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_ulong_t, ulong); +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_ulonglong_t, ulonglong); +typedef DECLARE_MYSQL_THDVAR_SIMPLE(thdvar_double_t, double); + + +/**************************************************************************** + default variable data check and update functions +****************************************************************************/ + +static int check_func_bool(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int result, length; + long long tmp; + + if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) + { + length= sizeof(buff); + if (!(str= value->val_str(value, buff, &length)) || + (result= find_type(&bool_typelib, str, length, 1)-1) < 0) + goto err; + } + else + { + if (value->val_int(value, &tmp) < 0) + goto err; + if (tmp != 0 && tmp != 1) + goto err; + result= (int) tmp; + } + *(my_bool *) save= result ? 1 : 0; + return 0; +err: + return 1; +} + + +static int check_func_int(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + my_bool fixed1, fixed2; + long long orig, val; + struct my_option options; + value->val_int(value, &orig); + val= orig; + plugin_opt_set_limits(&options, var); + + if (var->flags & PLUGIN_VAR_UNSIGNED) + { + if ((fixed1= (!value->is_unsigned(value) && val < 0))) + val=0; + *(uint *)save= (uint) getopt_ull_limit_value((ulonglong) val, &options, + &fixed2); + } + else + { + if ((fixed1= (value->is_unsigned(value) && val < 0))) + val=LONGLONG_MAX; + *(int *)save= (int) getopt_ll_limit_value(val, &options, &fixed2); + } + + return throw_bounds_warning(thd, var->name, fixed1 || fixed2, + value->is_unsigned(value), (longlong) orig); +} + + +static int check_func_long(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + my_bool fixed1, fixed2; + long long orig, val; + struct my_option options; + value->val_int(value, &orig); + val= orig; + plugin_opt_set_limits(&options, var); + + if (var->flags & PLUGIN_VAR_UNSIGNED) + { + if ((fixed1= (!value->is_unsigned(value) && val < 0))) + val=0; + *(ulong *)save= (ulong) getopt_ull_limit_value((ulonglong) val, &options, + &fixed2); + } + else + { + if ((fixed1= (value->is_unsigned(value) && val < 0))) + val=LONGLONG_MAX; + *(long *)save= (long) getopt_ll_limit_value(val, &options, &fixed2); + } + + return throw_bounds_warning(thd, var->name, fixed1 || fixed2, + value->is_unsigned(value), (longlong) orig); +} + + +static int check_func_longlong(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + my_bool fixed1, fixed2; + long long orig, val; + struct my_option options; + value->val_int(value, &orig); + val= orig; + plugin_opt_set_limits(&options, var); + + if (var->flags & PLUGIN_VAR_UNSIGNED) + { + if ((fixed1= (!value->is_unsigned(value) && val < 0))) + val=0; + *(ulonglong *)save= getopt_ull_limit_value((ulonglong) val, &options, + &fixed2); + } + else + { + if ((fixed1= (value->is_unsigned(value) && val < 0))) + val=LONGLONG_MAX; + *(longlong *)save= getopt_ll_limit_value(val, &options, &fixed2); + } + + return throw_bounds_warning(thd, var->name, fixed1 || fixed2, + value->is_unsigned(value), (longlong) orig); +} + +static int check_func_str(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int length; + + length= sizeof(buff); + if ((str= value->val_str(value, buff, &length))) + str= thd->strmake(str, length); + *(const char**)save= str; + return 0; +} + + +static int check_func_enum(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + TYPELIB *typelib; + long long tmp; + long result; + int length; + + if (var->flags & PLUGIN_VAR_THDLOCAL) + typelib= ((thdvar_enum_t*) var)->typelib; + else + typelib= ((sysvar_enum_t*) var)->typelib; + + if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) + { + length= sizeof(buff); + if (!(str= value->val_str(value, buff, &length))) + goto err; + if ((result= (long)find_type(typelib, str, length, 0) - 1) < 0) + goto err; + } + else + { + if (value->val_int(value, &tmp)) + goto err; + if (tmp < 0 || tmp >= typelib->count) + goto err; + result= (long) tmp; + } + *(long*)save= result; + return 0; +err: + return 1; +} + + +static int check_func_set(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + char buff[STRING_BUFFER_USUAL_SIZE], *error= 0; + const char *str; + TYPELIB *typelib; + ulonglong result; + uint error_len= 0; // init as only set on error + bool not_used; + int length; + + if (var->flags & PLUGIN_VAR_THDLOCAL) + typelib= ((thdvar_set_t*) var)->typelib; + else + typelib= ((sysvar_set_t*)var)->typelib; + + if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING) + { + length= sizeof(buff); + if (!(str= value->val_str(value, buff, &length))) + goto err; + result= find_set(typelib, str, length, NULL, + &error, &error_len, ¬_used); + if (unlikely(error_len)) + goto err; + } + else + { + if (value->val_int(value, (long long *)&result)) + goto err; + if (unlikely((result >= (1ULL << typelib->count)) && + (typelib->count < sizeof(long)*8))) + goto err; + } + *(ulonglong*)save= result; + return 0; +err: + return 1; +} + +static int check_func_double(THD *thd, struct st_mysql_sys_var *var, + void *save, st_mysql_value *value) +{ + double v; + my_bool fixed; + struct my_option option; + + value->val_real(value, &v); + plugin_opt_set_limits(&option, var); + *(double *) save= getopt_double_limit_value(v, &option, &fixed); + + return throw_bounds_warning(thd, var->name, fixed, v); +} + + +static void update_func_bool(THD *thd, struct st_mysql_sys_var *var, + void *tgt, const void *save) +{ + *(my_bool *) tgt= *(my_bool *) save ? 1 : 0; +} + + +static void update_func_int(THD *thd, struct st_mysql_sys_var *var, + void *tgt, const void *save) +{ + *(int *)tgt= *(int *) save; +} + + +static void update_func_long(THD *thd, struct st_mysql_sys_var *var, + void *tgt, const void *save) +{ + *(long *)tgt= *(long *) save; +} + + +static void update_func_longlong(THD *thd, struct st_mysql_sys_var *var, + void *tgt, const void *save) +{ + *(longlong *)tgt= *(ulonglong *) save; +} + + +static void update_func_str(THD *thd, struct st_mysql_sys_var *var, + void *tgt, const void *save) +{ + char *value= *(char**) save; + if (var->flags & PLUGIN_VAR_MEMALLOC) + { + char *old= *(char**) tgt; + if (value) + *(char**) tgt= my_strdup(key_memory_global_system_variables, + value, MYF(0)); + else + *(char**) tgt= 0; + my_free(old); + } + else + *(char**) tgt= value; +} + +static void update_func_double(THD *thd, struct st_mysql_sys_var *var, + void *tgt, const void *save) +{ + *(double *) tgt= *(double *) save; +} + +/**************************************************************************** + System Variables support +****************************************************************************/ + +sys_var *find_sys_var(THD *thd, const char *str, size_t length, + bool throw_error) +{ + sys_var *var; + sys_var_pluginvar *pi; + DBUG_ENTER("find_sys_var"); + DBUG_PRINT("enter", ("var '%.*s'", (int)length, str)); + + mysql_prlock_rdlock(&LOCK_system_variables_hash); + if ((var= intern_find_sys_var(str, length)) && + (pi= var->cast_pluginvar())) + { + mysql_mutex_lock(&LOCK_plugin); + if (!intern_plugin_lock(thd ? thd->lex : 0, plugin_int_to_ref(pi->plugin), + PLUGIN_IS_READY)) + var= NULL; /* failed to lock it, it must be uninstalling */ + mysql_mutex_unlock(&LOCK_plugin); + } + mysql_prlock_unlock(&LOCK_system_variables_hash); + + if (unlikely(!throw_error && !var)) + my_error(ER_UNKNOWN_SYSTEM_VARIABLE, MYF(0), + (int) (length ? length : strlen(str)), (char*) str); + DBUG_RETURN(var); +} + + +/* + called by register_var, construct_options and test_plugin_options. + Returns the 'bookmark' for the named variable. + returns null for non thd-local variables. + LOCK_system_variables_hash should be at least read locked +*/ +static st_bookmark *find_bookmark(const char *plugin, const char *name, + int flags) +{ + st_bookmark *result= NULL; + size_t namelen, length, pluginlen= 0; + char *varname, *p; + + if (!(flags & PLUGIN_VAR_THDLOCAL)) + return NULL; + + namelen= strlen(name); + if (plugin) + pluginlen= strlen(plugin) + 1; + length= namelen + pluginlen + 2; + varname= (char*) my_alloca(length); + + if (plugin) + { + strxmov(varname + 1, plugin, "_", name, NullS); + for (p= varname + 1; *p; p++) + if (*p == '-') + *p= '_'; + } + else + memcpy(varname + 1, name, namelen + 1); + + varname[0]= plugin_var_bookmark_key(flags); + + result= (st_bookmark*) my_hash_search(&bookmark_hash, + (const uchar*) varname, length - 1); + + my_afree(varname); + return result; +} + + +static size_t var_storage_size(int flags) +{ + switch (flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: return sizeof(my_bool); + case PLUGIN_VAR_INT: return sizeof(int); + case PLUGIN_VAR_LONG: return sizeof(long); + case PLUGIN_VAR_ENUM: return sizeof(long); + case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong); + case PLUGIN_VAR_SET: return sizeof(ulonglong); + case PLUGIN_VAR_STR: return sizeof(char*); + case PLUGIN_VAR_DOUBLE: return sizeof(double); + default: DBUG_ASSERT(0); return 0; + } +} + + +/* + returns a bookmark for thd-local variables, creating if neccessary. + Requires that a write lock is obtained on LOCK_system_variables_hash +*/ +static st_bookmark *register_var(const char *plugin, const char *name, + int flags) +{ + size_t length= strlen(plugin) + strlen(name) + 3, size, offset, new_size; + st_bookmark *result; + char *varname, *p; + + DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL); + + size= var_storage_size(flags); + varname= ((char*) my_alloca(length)); + strxmov(varname + 1, plugin, "_", name, NullS); + for (p= varname + 1; *p; p++) + if (*p == '-') + *p= '_'; + + if (!(result= find_bookmark(NULL, varname + 1, flags))) + { + result= (st_bookmark*) alloc_root(&plugin_vars_mem_root, + sizeof(struct st_bookmark) + length-1); + varname[0]= plugin_var_bookmark_key(flags); + memcpy(result->key, varname, length); + result->name_len= (uint)(length - 2); + result->offset= -1; + + DBUG_ASSERT(size && !(size & (size-1))); /* must be power of 2 */ + + offset= global_system_variables.dynamic_variables_size; + offset= (offset + size - 1) & ~(size - 1); + result->offset= (int) offset; + + new_size= (offset + size + 63) & ~63; + + if (new_size > global_variables_dynamic_size) + { + global_system_variables.dynamic_variables_ptr= (char*) + my_realloc(key_memory_global_system_variables, + global_system_variables.dynamic_variables_ptr, new_size, + MYF(MY_WME | MY_FAE | MY_ALLOW_ZERO_PTR)); + max_system_variables.dynamic_variables_ptr= (char*) + my_realloc(key_memory_global_system_variables, + max_system_variables.dynamic_variables_ptr, new_size, + MYF(MY_WME | MY_FAE | MY_ALLOW_ZERO_PTR)); + /* + Clear the new variable value space. This is required for string + variables. If their value is non-NULL, it must point to a valid + string. + */ + bzero(global_system_variables.dynamic_variables_ptr + + global_variables_dynamic_size, + new_size - global_variables_dynamic_size); + bzero(max_system_variables.dynamic_variables_ptr + + global_variables_dynamic_size, + new_size - global_variables_dynamic_size); + global_variables_dynamic_size= new_size; + } + + global_system_variables.dynamic_variables_head= (uint)offset; + max_system_variables.dynamic_variables_head= (uint)offset; + global_system_variables.dynamic_variables_size= (uint)(offset + size); + max_system_variables.dynamic_variables_size= (uint)(offset + size); + global_system_variables.dynamic_variables_version++; + max_system_variables.dynamic_variables_version++; + + result->version= global_system_variables.dynamic_variables_version; + + /* this should succeed because we have already checked if a dup exists */ + if (my_hash_insert(&bookmark_hash, (uchar*) result)) + { + fprintf(stderr, "failed to add placeholder to hash"); + DBUG_ASSERT(0); + } + } + my_afree(varname); + return result; +} + + +void sync_dynamic_session_variables(THD* thd, bool global_lock) +{ + uint idx; + + thd->variables.dynamic_variables_ptr= (char*) + my_realloc(key_memory_THD_variables, + thd->variables.dynamic_variables_ptr, + global_variables_dynamic_size, + MYF(MY_WME | MY_FAE | MY_ALLOW_ZERO_PTR)); + + if (global_lock) + mysql_mutex_lock(&LOCK_global_system_variables); + + mysql_mutex_assert_owner(&LOCK_global_system_variables); + + memcpy(thd->variables.dynamic_variables_ptr + + thd->variables.dynamic_variables_size, + global_system_variables.dynamic_variables_ptr + + thd->variables.dynamic_variables_size, + global_system_variables.dynamic_variables_size - + thd->variables.dynamic_variables_size); + + /* + now we need to iterate through any newly copied 'defaults' + and if it is a string type with MEMALLOC flag, we need to strdup + */ + for (idx= 0; idx < bookmark_hash.records; idx++) + { + st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx); + + if (v->version <= thd->variables.dynamic_variables_version) + continue; /* already in thd->variables */ + + /* Here we do anything special that may be required of the data types */ + + if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && + v->key[0] & BOOKMARK_MEMALLOC) + { + char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset); + if (*pp) + *pp= my_strdup(key_memory_THD_variables, *pp, MYF(MY_WME|MY_FAE)); + } + } + + if (global_lock) + mysql_mutex_unlock(&LOCK_global_system_variables); + + thd->variables.dynamic_variables_version= + global_system_variables.dynamic_variables_version; + thd->variables.dynamic_variables_head= + global_system_variables.dynamic_variables_head; + thd->variables.dynamic_variables_size= + global_system_variables.dynamic_variables_size; +} + + +/* + returns a pointer to the memory which holds the thd-local variable or + a pointer to the global variable if thd==null. + If required, will sync with global variables if the requested variable + has not yet been allocated in the current thread. +*/ +static uchar *intern_sys_var_ptr(THD* thd, int offset, bool global_lock) +{ + DBUG_ENTER("intern_sys_var_ptr"); + DBUG_ASSERT(offset >= 0); + DBUG_ASSERT((uint)offset <= global_system_variables.dynamic_variables_head); + + if (!thd) + DBUG_RETURN((uchar*) global_system_variables.dynamic_variables_ptr + offset); + + /* + dynamic_variables_head points to the largest valid offset + */ + if (!thd->variables.dynamic_variables_ptr || + (uint)offset > thd->variables.dynamic_variables_head) + { + mysql_prlock_rdlock(&LOCK_system_variables_hash); + sync_dynamic_session_variables(thd, global_lock); + mysql_prlock_unlock(&LOCK_system_variables_hash); + } + DBUG_RETURN((uchar*)thd->variables.dynamic_variables_ptr + offset); +} + + +/** + For correctness and simplicity's sake, a pointer to a function + must be compatible with pointed-to type, that is, the return and + parameters types must be the same. Thus, a callback function is + defined for each scalar type. The functions are assigned in + construct_options to their respective types. +*/ + +static char *mysql_sys_var_char(THD* thd, int offset) +{ + return (char *) intern_sys_var_ptr(thd, offset, true); +} + +static int *mysql_sys_var_int(THD* thd, int offset) +{ + return (int *) intern_sys_var_ptr(thd, offset, true); +} + +static long *mysql_sys_var_long(THD* thd, int offset) +{ + return (long *) intern_sys_var_ptr(thd, offset, true); +} + +static unsigned long *mysql_sys_var_ulong(THD* thd, int offset) +{ + return (unsigned long *) intern_sys_var_ptr(thd, offset, true); +} + +static long long *mysql_sys_var_longlong(THD* thd, int offset) +{ + return (long long *) intern_sys_var_ptr(thd, offset, true); +} + +static unsigned long long *mysql_sys_var_ulonglong(THD* thd, int offset) +{ + return (unsigned long long *) intern_sys_var_ptr(thd, offset, true); +} + +static char **mysql_sys_var_str(THD* thd, int offset) +{ + return (char **) intern_sys_var_ptr(thd, offset, true); +} + +static double *mysql_sys_var_double(THD* thd, int offset) +{ + return (double *) intern_sys_var_ptr(thd, offset, true); +} + +void plugin_thdvar_init(THD *thd) +{ + plugin_ref old_table_plugin= thd->variables.table_plugin; + plugin_ref old_tmp_table_plugin= thd->variables.tmp_table_plugin; + plugin_ref old_enforced_table_plugin= thd->variables.enforced_table_plugin; + DBUG_ENTER("plugin_thdvar_init"); + + // This function may be called many times per THD (e.g. on COM_CHANGE_USER) + thd->variables.table_plugin= NULL; + thd->variables.tmp_table_plugin= NULL; + thd->variables.enforced_table_plugin= NULL; + cleanup_variables(&thd->variables); + + /* This and all other variable cleanups are here for COM_CHANGE_USER :( */ +#ifndef EMBEDDED_LIBRARY + thd->session_tracker.sysvars.deinit(thd); +#endif + + thd->variables= global_system_variables; + + /* we are going to allocate these lazily */ + thd->variables.dynamic_variables_version= 0; + thd->variables.dynamic_variables_size= 0; + thd->variables.dynamic_variables_ptr= 0; + + mysql_mutex_lock(&LOCK_plugin); + thd->variables.table_plugin= + intern_plugin_lock(NULL, global_system_variables.table_plugin); + if (global_system_variables.tmp_table_plugin) + thd->variables.tmp_table_plugin= + intern_plugin_lock(NULL, global_system_variables.tmp_table_plugin); + if (global_system_variables.enforced_table_plugin) + thd->variables.enforced_table_plugin= + intern_plugin_lock(NULL, global_system_variables.enforced_table_plugin); + intern_plugin_unlock(NULL, old_table_plugin); + intern_plugin_unlock(NULL, old_tmp_table_plugin); + intern_plugin_unlock(NULL, old_enforced_table_plugin); + mysql_mutex_unlock(&LOCK_plugin); + +#ifndef EMBEDDED_LIBRARY + thd->session_tracker.sysvars.init(thd); +#endif + DBUG_VOID_RETURN; +} + + +/* + Unlocks all system variables which hold a reference +*/ +static void unlock_variables(THD *thd, struct system_variables *vars) +{ + intern_plugin_unlock(NULL, vars->table_plugin); + intern_plugin_unlock(NULL, vars->tmp_table_plugin); + intern_plugin_unlock(NULL, vars->enforced_table_plugin); + vars->table_plugin= vars->tmp_table_plugin= vars->enforced_table_plugin= NULL; +} + + +/* + Frees memory used by system variables + + Unlike plugin_vars_free_values() it frees all variables of all plugins, + it's used on shutdown. +*/ +static void cleanup_variables(struct system_variables *vars) +{ + st_bookmark *v; + uint idx; + + mysql_prlock_rdlock(&LOCK_system_variables_hash); + for (idx= 0; idx < bookmark_hash.records; idx++) + { + v= (st_bookmark*) my_hash_element(&bookmark_hash, idx); + + if (v->version > vars->dynamic_variables_version) + continue; /* not in vars */ + + DBUG_ASSERT((uint)v->offset <= vars->dynamic_variables_head); + + /* free allocated strings (PLUGIN_VAR_STR | PLUGIN_VAR_MEMALLOC) */ + if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && + v->key[0] & BOOKMARK_MEMALLOC) + { + char **ptr= (char**)(vars->dynamic_variables_ptr + v->offset); + my_free(*ptr); + *ptr= NULL; + } + } + mysql_prlock_unlock(&LOCK_system_variables_hash); + + DBUG_ASSERT(vars->table_plugin == NULL); + DBUG_ASSERT(vars->tmp_table_plugin == NULL); + DBUG_ASSERT(vars->enforced_table_plugin == NULL); + + my_free(vars->dynamic_variables_ptr); + vars->dynamic_variables_ptr= NULL; + vars->dynamic_variables_size= 0; + vars->dynamic_variables_version= 0; +} + + +void plugin_thdvar_cleanup(THD *thd) +{ + size_t idx; + plugin_ref *list; + DBUG_ENTER("plugin_thdvar_cleanup"); + +#ifndef EMBEDDED_LIBRARY + thd->session_tracker.sysvars.deinit(thd); +#endif + + mysql_mutex_lock(&LOCK_plugin); + + unlock_variables(thd, &thd->variables); + cleanup_variables(&thd->variables); + + if ((idx= thd->lex->plugins.elements)) + { + list= ((plugin_ref*) thd->lex->plugins.buffer) + idx - 1; + DBUG_PRINT("info",("unlocking %d plugins", idx)); + while ((uchar*) list >= thd->lex->plugins.buffer) + intern_plugin_unlock(NULL, *list--); + } + + reap_plugins(); + mysql_mutex_unlock(&LOCK_plugin); + + reset_dynamic(&thd->lex->plugins); + + DBUG_VOID_RETURN; +} + + +/** + @brief Free values of thread variables of a plugin. + + This must be called before a plugin is deleted. Otherwise its + variables are no longer accessible and the value space is lost. Note + that only string values with PLUGIN_VAR_MEMALLOC are allocated and + must be freed. +*/ + +static void plugin_vars_free_values(st_mysql_sys_var **vars) +{ + DBUG_ENTER("plugin_vars_free_values"); + + if (!vars) + DBUG_VOID_RETURN; + + while(st_mysql_sys_var *var= *vars++) + { + if ((var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && + var->flags & PLUGIN_VAR_MEMALLOC) + { + char **val; + if (var->flags & PLUGIN_VAR_THDLOCAL) + { + st_bookmark *v= find_bookmark(0, var->name, var->flags); + if (!v) + continue; + val= (char**)(global_system_variables.dynamic_variables_ptr + v->offset); + } + else + val= *(char***) (var + 1); + + DBUG_PRINT("plugin", ("freeing value for: '%s' addr: %p", + var->name, val)); + my_free(*val); + *val= NULL; + } + } + DBUG_VOID_RETURN; +} + +static SHOW_TYPE pluginvar_show_type(const st_mysql_sys_var *plugin_var) +{ + switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_UNSIGNED)) { + case PLUGIN_VAR_BOOL: + return SHOW_MY_BOOL; + case PLUGIN_VAR_INT: + return SHOW_SINT; + case PLUGIN_VAR_INT | PLUGIN_VAR_UNSIGNED: + return SHOW_UINT; + case PLUGIN_VAR_LONG: + return SHOW_SLONG; + case PLUGIN_VAR_LONG | PLUGIN_VAR_UNSIGNED: + return SHOW_ULONG; + case PLUGIN_VAR_LONGLONG: + return SHOW_SLONGLONG; + case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_UNSIGNED: + return SHOW_ULONGLONG; + case PLUGIN_VAR_STR: + return SHOW_CHAR_PTR; + case PLUGIN_VAR_ENUM: + case PLUGIN_VAR_SET: + return SHOW_CHAR; + case PLUGIN_VAR_DOUBLE: + return SHOW_DOUBLE; + default: + DBUG_ASSERT(0); + return SHOW_UNDEF; + } +} + + +static int pluginvar_sysvar_flags(const st_mysql_sys_var *p) +{ + return (p->flags & PLUGIN_VAR_THDLOCAL ? sys_var::SESSION : sys_var::GLOBAL) + | (p->flags & PLUGIN_VAR_READONLY ? sys_var::READONLY : 0); +} + +sys_var_pluginvar::sys_var_pluginvar(sys_var_chain *chain, const char *name_arg, + st_plugin_int *p, st_mysql_sys_var *pv, const char *substitute) + : sys_var(chain, name_arg, pv->comment, pluginvar_sysvar_flags(pv), + 0, pv->flags & PLUGIN_VAR_NOCMDOPT ? -1 : 0, NO_ARG, + pluginvar_show_type(pv), 0, + NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, substitute), + plugin(p), plugin_var(pv) +{ + plugin_var->name= name_arg; + plugin_opt_set_limits(&option, pv); +} + +uchar* sys_var_pluginvar::real_value_ptr(THD *thd, enum_var_type type) const +{ + if (type == OPT_DEFAULT) + { + switch (plugin_var->flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: + thd->sys_var_tmp.my_bool_value= (my_bool)option.def_value; + return (uchar*) &thd->sys_var_tmp.my_bool_value; + case PLUGIN_VAR_INT: + thd->sys_var_tmp.int_value= (int)option.def_value; + return (uchar*) &thd->sys_var_tmp.int_value; + case PLUGIN_VAR_LONG: + case PLUGIN_VAR_ENUM: + thd->sys_var_tmp.long_value= (long)option.def_value; + return (uchar*) &thd->sys_var_tmp.long_value; + case PLUGIN_VAR_LONGLONG: + case PLUGIN_VAR_SET: + return (uchar*) &option.def_value; + case PLUGIN_VAR_STR: + thd->sys_var_tmp.ptr_value= (void*) option.def_value; + return (uchar*) &thd->sys_var_tmp.ptr_value; + case PLUGIN_VAR_DOUBLE: + thd->sys_var_tmp.double_value= getopt_ulonglong2double(option.def_value); + return (uchar*) &thd->sys_var_tmp.double_value; + default: + DBUG_ASSERT(0); + } + } + + DBUG_ASSERT(thd || (type == OPT_GLOBAL)); + if (plugin_var->flags & PLUGIN_VAR_THDLOCAL) + { + if (type == OPT_GLOBAL) + thd= NULL; + + return intern_sys_var_ptr(thd, *(int*) (plugin_var+1), false); + } + return *(uchar**) (plugin_var+1); +} + + +bool sys_var_pluginvar::session_is_default(THD *thd) +{ + uchar *value= plugin_var->flags & PLUGIN_VAR_THDLOCAL + ? intern_sys_var_ptr(thd, *(int*) (plugin_var+1), true) + : *(uchar**) (plugin_var+1); + + real_value_ptr(thd, OPT_SESSION); + + switch (plugin_var->flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: + return option.def_value == *(my_bool*)value; + case PLUGIN_VAR_INT: + return option.def_value == *(int*)value; + case PLUGIN_VAR_LONG: + case PLUGIN_VAR_ENUM: + return option.def_value == *(long*)value; + case PLUGIN_VAR_LONGLONG: + case PLUGIN_VAR_SET: + return option.def_value == *(longlong*)value; + case PLUGIN_VAR_STR: + { + const char *a=(char*)option.def_value; + const char *b=(char*)value; + return (!a && !b) || (a && b && strcmp(a,b)); + } + case PLUGIN_VAR_DOUBLE: + return getopt_ulonglong2double(option.def_value) == *(double*)value; + } + DBUG_ASSERT(0); + return 0; +} + + +TYPELIB* sys_var_pluginvar::plugin_var_typelib(void) const +{ + switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) { + case PLUGIN_VAR_ENUM: + return ((sysvar_enum_t *)plugin_var)->typelib; + case PLUGIN_VAR_SET: + return ((sysvar_set_t *)plugin_var)->typelib; + case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL: + return ((thdvar_enum_t *)plugin_var)->typelib; + case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL: + return ((thdvar_set_t *)plugin_var)->typelib; + default: + return NULL; + } + return NULL; /* Keep compiler happy */ +} + + +const uchar* sys_var_pluginvar::do_value_ptr(THD *thd, enum_var_type type, + const LEX_CSTRING *base) const +{ + const uchar* result= real_value_ptr(thd, type); + + if ((plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_ENUM) + result= (uchar*) get_type(plugin_var_typelib(), *(ulong*)result); + else if ((plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_SET) + result= (uchar*) set_to_string(thd, 0, *(ulonglong*) result, + plugin_var_typelib()->type_names); + return result; +} + +bool sys_var_pluginvar::do_check(THD *thd, set_var *var) +{ + st_item_value_holder value; + DBUG_ASSERT(!is_readonly()); + DBUG_ASSERT(plugin_var->check); + + value.value_type= item_value_type; + value.val_str= item_val_str; + value.val_int= item_val_int; + value.val_real= item_val_real; + value.is_unsigned= item_is_unsigned; + value.item= var->value; + + return plugin_var->check(thd, plugin_var, &var->save_result, &value); +} + +bool sys_var_pluginvar::session_update(THD *thd, set_var *var) +{ + DBUG_ASSERT(!is_readonly()); + DBUG_ASSERT(plugin_var->flags & PLUGIN_VAR_THDLOCAL); + DBUG_ASSERT(thd == current_thd); + + mysql_mutex_lock(&LOCK_global_system_variables); + void *tgt= real_value_ptr(thd, OPT_SESSION); + const void *src= var->value ? (void*)&var->save_result + : (void*)real_value_ptr(thd, OPT_GLOBAL); + mysql_mutex_unlock(&LOCK_global_system_variables); + + plugin_var->update(thd, plugin_var, tgt, src); + + return false; +} + +static const void *var_def_ptr(st_mysql_sys_var *pv) +{ + switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) { + case PLUGIN_VAR_INT: + return &((sysvar_uint_t*) pv)->def_val; + case PLUGIN_VAR_LONG: + return &((sysvar_ulong_t*) pv)->def_val; + case PLUGIN_VAR_LONGLONG: + return &((sysvar_ulonglong_t*) pv)->def_val; + case PLUGIN_VAR_ENUM: + return &((sysvar_enum_t*) pv)->def_val; + case PLUGIN_VAR_SET: + return &((sysvar_set_t*) pv)->def_val; + case PLUGIN_VAR_BOOL: + return &((sysvar_bool_t*) pv)->def_val; + case PLUGIN_VAR_STR: + return &((sysvar_str_t*) pv)->def_val; + case PLUGIN_VAR_DOUBLE: + return &((sysvar_double_t*) pv)->def_val; + case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL: + return &((thdvar_uint_t*) pv)->def_val; + case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL: + return &((thdvar_ulong_t*) pv)->def_val; + case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL: + return &((thdvar_ulonglong_t*) pv)->def_val; + case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL: + return &((thdvar_enum_t*) pv)->def_val; + case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL: + return &((thdvar_set_t*) pv)->def_val; + case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL: + return &((thdvar_bool_t*) pv)->def_val; + case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL: + return &((thdvar_str_t*) pv)->def_val; + case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL: + return &((thdvar_double_t*) pv)->def_val; + default: + DBUG_ASSERT(0); + return NULL; + } +} + + +bool sys_var_pluginvar::global_update(THD *thd, set_var *var) +{ + DBUG_ASSERT(!is_readonly()); + mysql_mutex_assert_owner(&LOCK_global_system_variables); + + void *tgt= real_value_ptr(thd, OPT_GLOBAL); + const void *src= &var->save_result; + + if (!var->value) + src= var_def_ptr(plugin_var); + + plugin_var->update(thd, plugin_var, tgt, src); + return false; +} + + +#define OPTION_SET_LIMITS(type, options, opt) \ + options->var_type= type; \ + options->def_value= (opt)->def_val; \ + options->min_value= (opt)->min_val; \ + options->max_value= (opt)->max_val; \ + options->block_size= (long) (opt)->blk_sz + +#define OPTION_SET_LIMITS_DOUBLE(options, opt) \ + options->var_type= GET_DOUBLE; \ + options->def_value= (longlong) getopt_double2ulonglong((opt)->def_val); \ + options->min_value= (longlong) getopt_double2ulonglong((opt)->min_val); \ + options->max_value= getopt_double2ulonglong((opt)->max_val); \ + options->block_size= (long) (opt)->blk_sz; + +void plugin_opt_set_limits(struct my_option *options, + const struct st_mysql_sys_var *opt) +{ + options->sub_size= 0; + + switch (opt->flags & (PLUGIN_VAR_TYPEMASK | + PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_THDLOCAL)) { + /* global system variables */ + case PLUGIN_VAR_INT: + OPTION_SET_LIMITS(GET_INT, options, (sysvar_int_t*) opt); + break; + case PLUGIN_VAR_INT | PLUGIN_VAR_UNSIGNED: + OPTION_SET_LIMITS(GET_UINT, options, (sysvar_uint_t*) opt); + break; + case PLUGIN_VAR_LONG: + OPTION_SET_LIMITS(GET_LONG, options, (sysvar_long_t*) opt); + break; + case PLUGIN_VAR_LONG | PLUGIN_VAR_UNSIGNED: + OPTION_SET_LIMITS(GET_ULONG, options, (sysvar_ulong_t*) opt); + break; + case PLUGIN_VAR_LONGLONG: + OPTION_SET_LIMITS(GET_LL, options, (sysvar_longlong_t*) opt); + break; + case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_UNSIGNED: + OPTION_SET_LIMITS(GET_ULL, options, (sysvar_ulonglong_t*) opt); + break; + case PLUGIN_VAR_ENUM: + options->var_type= GET_ENUM; + options->typelib= ((sysvar_enum_t*) opt)->typelib; + options->def_value= ((sysvar_enum_t*) opt)->def_val; + options->min_value= options->block_size= 0; + options->max_value= options->typelib->count - 1; + break; + case PLUGIN_VAR_SET: + options->var_type= GET_SET; + options->typelib= ((sysvar_set_t*) opt)->typelib; + options->def_value= ((sysvar_set_t*) opt)->def_val; + options->min_value= options->block_size= 0; + options->max_value= (1ULL << options->typelib->count) - 1; + break; + case PLUGIN_VAR_BOOL: + options->var_type= GET_BOOL; + options->def_value= ((sysvar_bool_t*) opt)->def_val; + options->typelib= &bool_typelib; + break; + case PLUGIN_VAR_STR: + options->var_type= ((opt->flags & PLUGIN_VAR_MEMALLOC) ? + GET_STR_ALLOC : GET_STR); + options->def_value= (intptr) ((sysvar_str_t*) opt)->def_val; + break; + case PLUGIN_VAR_DOUBLE: + OPTION_SET_LIMITS_DOUBLE(options, (sysvar_double_t*) opt); + break; + /* threadlocal variables */ + case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS(GET_INT, options, (thdvar_int_t*) opt); + break; + case PLUGIN_VAR_INT | PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS(GET_UINT, options, (thdvar_uint_t*) opt); + break; + case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS(GET_LONG, options, (thdvar_long_t*) opt); + break; + case PLUGIN_VAR_LONG | PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS(GET_ULONG, options, (thdvar_ulong_t*) opt); + break; + case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS(GET_LL, options, (thdvar_longlong_t*) opt); + break; + case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS(GET_ULL, options, (thdvar_ulonglong_t*) opt); + break; + case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL: + OPTION_SET_LIMITS_DOUBLE(options, (thdvar_double_t*) opt); + break; + case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL: + options->var_type= GET_ENUM; + options->typelib= ((thdvar_enum_t*) opt)->typelib; + options->def_value= ((thdvar_enum_t*) opt)->def_val; + options->min_value= options->block_size= 0; + options->max_value= options->typelib->count - 1; + break; + case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL: + options->var_type= GET_SET; + options->typelib= ((thdvar_set_t*) opt)->typelib; + options->def_value= ((thdvar_set_t*) opt)->def_val; + options->min_value= options->block_size= 0; + options->max_value= (1ULL << options->typelib->count) - 1; + break; + case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL: + options->var_type= GET_BOOL; + options->def_value= ((thdvar_bool_t*) opt)->def_val; + options->typelib= &bool_typelib; + break; + case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL: + options->var_type= ((opt->flags & PLUGIN_VAR_MEMALLOC) ? + GET_STR_ALLOC : GET_STR); + options->def_value= (intptr) ((thdvar_str_t*) opt)->def_val; + break; + default: + DBUG_ASSERT(0); + } + options->arg_type= REQUIRED_ARG; + if (opt->flags & PLUGIN_VAR_NOCMDARG) + options->arg_type= NO_ARG; + if (opt->flags & PLUGIN_VAR_OPCMDARG) + options->arg_type= OPT_ARG; +} + +/** + Creates a set of my_option objects associated with a specified plugin- + handle. + + @param mem_root Memory allocator to be used. + @param tmp A pointer to a plugin handle + @param[out] options A pointer to a pre-allocated static array + + The set is stored in the pre-allocated static array supplied to the function. + The size of the array is calculated as (number_of_plugin_varaibles*2+3). The + reason is that each option can have a prefix '--plugin-' in addtion to the + shorter form '--<plugin-name>'. There is also space allocated for + terminating NULL pointers. + + @return + @retval -1 An error occurred + @retval 0 Success +*/ + +static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp, + my_option *options) +{ + const char *plugin_name= tmp->plugin->name; + const LEX_CSTRING plugin_dash = { STRING_WITH_LEN("plugin-") }; + size_t plugin_name_len= strlen(plugin_name); + size_t optnamelen; + const int max_comment_len= 255; + char *comment= (char *) alloc_root(mem_root, max_comment_len + 1); + char *optname; + + int index= 0, UNINIT_VAR(offset); + st_mysql_sys_var *opt, **plugin_option; + st_bookmark *v; + + /** Used to circumvent the const attribute on my_option::name */ + char *plugin_name_ptr, *plugin_name_with_prefix_ptr; + + DBUG_ENTER("construct_options"); + + plugin_name_ptr= (char*) alloc_root(mem_root, plugin_name_len + 1); + strcpy(plugin_name_ptr, plugin_name); + my_casedn_str(&my_charset_latin1, plugin_name_ptr); + convert_underscore_to_dash(plugin_name_ptr, plugin_name_len); + plugin_name_with_prefix_ptr= (char*) alloc_root(mem_root, + plugin_name_len + + plugin_dash.length + 1); + strxmov(plugin_name_with_prefix_ptr, plugin_dash.str, plugin_name_ptr, NullS); + + if (!plugin_is_forced(tmp)) + { + /* support --skip-plugin-foo syntax */ + options[0].name= plugin_name_ptr; + options[1].name= plugin_name_with_prefix_ptr; + options[0].id= options[1].id= 0; + options[0].var_type= options[1].var_type= GET_ENUM; + options[0].arg_type= options[1].arg_type= OPT_ARG; + options[0].def_value= options[1].def_value= 1; /* ON */ + options[0].typelib= options[1].typelib= &global_plugin_typelib; + + strxnmov(comment, max_comment_len, "Enable or disable ", plugin_name, + " plugin. One of: ON, OFF, FORCE (don't start if the plugin" + " fails to load), FORCE_PLUS_PERMANENT (like FORCE, but the" + " plugin can not be uninstalled).", NullS); + options[0].comment= comment; + /* + Allocate temporary space for the value of the tristate. + This option will have a limited lifetime and is not used beyond + server initialization. + GET_ENUM value is an unsigned long integer. + */ + options[0].value= options[1].value= + (uchar **)alloc_root(mem_root, sizeof(ulong)); + *((ulong*) options[0].value)= (ulong) options[0].def_value; + + options+= 2; + } + + /* + Two passes as the 2nd pass will take pointer addresses for use + by my_getopt and register_var() in the first pass uses realloc + */ + + for (plugin_option= tmp->plugin->system_vars; + plugin_option && *plugin_option; plugin_option++, index++) + { + opt= *plugin_option; + + if (!opt->name) + { + sql_print_error("Missing variable name in plugin '%s'.", + plugin_name); + DBUG_RETURN(-1); + } + + if (!(opt->flags & PLUGIN_VAR_THDLOCAL)) + continue; + if (!(register_var(plugin_name_ptr, opt->name, opt->flags))) + continue; + switch (opt->flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: + ((thdvar_bool_t *) opt)->resolve= mysql_sys_var_char; + break; + case PLUGIN_VAR_INT: + ((thdvar_int_t *) opt)->resolve= mysql_sys_var_int; + break; + case PLUGIN_VAR_LONG: + ((thdvar_long_t *) opt)->resolve= mysql_sys_var_long; + break; + case PLUGIN_VAR_LONGLONG: + ((thdvar_longlong_t *) opt)->resolve= mysql_sys_var_longlong; + break; + case PLUGIN_VAR_STR: + ((thdvar_str_t *) opt)->resolve= mysql_sys_var_str; + break; + case PLUGIN_VAR_ENUM: + ((thdvar_enum_t *) opt)->resolve= mysql_sys_var_ulong; + break; + case PLUGIN_VAR_SET: + ((thdvar_set_t *) opt)->resolve= mysql_sys_var_ulonglong; + break; + case PLUGIN_VAR_DOUBLE: + ((thdvar_double_t *) opt)->resolve= mysql_sys_var_double; + break; + default: + sql_print_error("Unknown variable type code 0x%x in plugin '%s'.", + opt->flags, plugin_name); + DBUG_RETURN(-1); + }; + } + + for (plugin_option= tmp->plugin->system_vars; + plugin_option && *plugin_option; plugin_option++, index++) + { + switch ((opt= *plugin_option)->flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: + if (!opt->check) + opt->check= check_func_bool; + if (!opt->update) + opt->update= update_func_bool; + break; + case PLUGIN_VAR_INT: + if (!opt->check) + opt->check= check_func_int; + if (!opt->update) + opt->update= update_func_int; + break; + case PLUGIN_VAR_LONG: + if (!opt->check) + opt->check= check_func_long; + if (!opt->update) + opt->update= update_func_long; + break; + case PLUGIN_VAR_LONGLONG: + if (!opt->check) + opt->check= check_func_longlong; + if (!opt->update) + opt->update= update_func_longlong; + break; + case PLUGIN_VAR_STR: + if (!opt->check) + opt->check= check_func_str; + if (!opt->update) + { + opt->update= update_func_str; + if (!(opt->flags & (PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_READONLY))) + { + opt->flags|= PLUGIN_VAR_READONLY; + sql_print_warning("Server variable %s of plugin %s was forced " + "to be read-only: string variable without " + "update_func and PLUGIN_VAR_MEMALLOC flag", + opt->name, plugin_name); + } + } + break; + case PLUGIN_VAR_ENUM: + if (!opt->check) + opt->check= check_func_enum; + if (!opt->update) + opt->update= update_func_long; + break; + case PLUGIN_VAR_SET: + if (!opt->check) + opt->check= check_func_set; + if (!opt->update) + opt->update= update_func_longlong; + break; + case PLUGIN_VAR_DOUBLE: + if (!opt->check) + opt->check= check_func_double; + if (!opt->update) + opt->update= update_func_double; + break; + default: + sql_print_error("Unknown variable type code 0x%x in plugin '%s'.", + opt->flags, plugin_name); + DBUG_RETURN(-1); + } + + if ((opt->flags & (PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_THDLOCAL)) + == PLUGIN_VAR_NOCMDOPT) + continue; + + if (!(opt->flags & PLUGIN_VAR_THDLOCAL)) + { + optnamelen= strlen(opt->name); + optname= (char*) alloc_root(mem_root, plugin_name_len + optnamelen + 2); + strxmov(optname, plugin_name_ptr, "-", opt->name, NullS); + optnamelen= plugin_name_len + optnamelen + 1; + } + else + { + /* this should not fail because register_var should create entry */ + if (!(v= find_bookmark(plugin_name_ptr, opt->name, opt->flags))) + { + sql_print_error("Thread local variable '%s' not allocated " + "in plugin '%s'.", opt->name, plugin_name); + DBUG_RETURN(-1); + } + + *(int*)(opt + 1)= offset= v->offset; + + if (opt->flags & PLUGIN_VAR_NOCMDOPT) + { + char *val= global_system_variables.dynamic_variables_ptr + offset; + if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) && + (opt->flags & PLUGIN_VAR_MEMALLOC)) + { + char *def_val= *(char**)var_def_ptr(opt); + *(char**)val= def_val ? my_strdup(PSI_INSTRUMENT_ME, def_val, MYF(0)) : NULL; + } + else + memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags)); + continue; + } + + optname= (char*) memdup_root(mem_root, v->key + 1, + (optnamelen= v->name_len) + 1); + } + + convert_underscore_to_dash(optname, optnamelen); + + options->name= optname; + options->comment= opt->comment; + options->app_type= (opt->flags & PLUGIN_VAR_NOSYSVAR) ? NULL : opt; + options->id= 0; + + plugin_opt_set_limits(options, opt); + + if (opt->flags & PLUGIN_VAR_THDLOCAL) + options->value= options->u_max_value= (uchar**) + (global_system_variables.dynamic_variables_ptr + offset); + else + options->value= options->u_max_value= *(uchar***) (opt + 1); + + char *option_name_ptr; + options[1]= options[0]; + options[1].name= option_name_ptr= (char*) alloc_root(mem_root, + plugin_dash.length + + optnamelen + 1); + options[1].comment= 0; /* Hidden from the help text */ + strxmov(option_name_ptr, plugin_dash.str, optname, NullS); + + options+= 2; + } + + DBUG_RETURN(0); +} + + +static my_option *construct_help_options(MEM_ROOT *mem_root, + struct st_plugin_int *p) +{ + st_mysql_sys_var **opt; + my_option *opts; + uint count= EXTRA_OPTIONS; + DBUG_ENTER("construct_help_options"); + + for (opt= p->plugin->system_vars; opt && *opt; opt++, count+= 2) + ; + + if (!(opts= (my_option*) alloc_root(mem_root, sizeof(my_option) * count))) + DBUG_RETURN(NULL); + + bzero(opts, sizeof(my_option) * count); + + /** + some plugin variables + have their names prefixed with the plugin name. Restore the names here + to get the correct (not double-prefixed) help text. + We won't need @@sysvars anymore and don't care about their proper names. + */ + restore_ptr_backup(p->nbackups, p->ptr_backup); + + if (construct_options(mem_root, p, opts)) + DBUG_RETURN(NULL); + + DBUG_RETURN(opts); +} + +extern "C" my_bool mark_changed(const struct my_option *, const char *, + const char *); +my_bool mark_changed(const struct my_option *opt, const char *, + const char *filename) +{ + if (opt->app_type) + { + sys_var *var= (sys_var*) opt->app_type; + if (*filename) + { + var->origin_filename= filename; + var->value_origin= sys_var::CONFIG; + } + else + var->value_origin= sys_var::COMMAND_LINE; + } + return 0; +} + +/** + It is always false to mark global plugin variable unloaded just to be + safe because we have no way now to know truth about them. + + TODO: make correct mechanism for global plugin variables +*/ +static bool static_unload= FALSE; + +/** + Create and register system variables supplied from the plugin and + assigns initial values from corresponding command line arguments. + + @param tmp_root Temporary scratch space + @param[out] plugin Internal plugin structure + @param argc Number of command line arguments + @param argv Command line argument vector + + The plugin will be updated with a policy on how to handle errors during + initialization. + + @note Requires that a write-lock is held on LOCK_system_variables_hash + + @return How initialization of the plugin should be handled. + @retval 0 Initialization should proceed. + @retval 1 Plugin is disabled. + @retval -1 An error has occurred. +*/ + +static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, + int *argc, char **argv) +{ + struct sys_var_chain chain= { NULL, NULL }; + bool disable_plugin; + enum_plugin_load_option plugin_load_option= tmp->load_option; + + MEM_ROOT *mem_root= alloc_root_inited(&tmp->mem_root) ? + &tmp->mem_root : &plugin_vars_mem_root; + st_mysql_sys_var **opt; + my_option *opts= NULL; + int error= 1; + struct st_bookmark *var; + size_t len=0, count= EXTRA_OPTIONS; + st_ptr_backup *tmp_backup= 0; + DBUG_ENTER("test_plugin_options"); + DBUG_ASSERT(tmp->plugin && tmp->name.str); + + if (tmp->plugin->system_vars || (*argc > 1)) + { + for (opt= tmp->plugin->system_vars; opt && *opt; opt++) + { + len++; + if (!((*opt)->flags & PLUGIN_VAR_NOCMDOPT)) + count+= 2; /* --{plugin}-{optname} and --plugin-{plugin}-{optname} */ + } + + if (!(opts= (my_option*) alloc_root(tmp_root, sizeof(my_option) * count))) + { + sql_print_error("Out of memory for plugin '%s'.", tmp->name.str); + DBUG_RETURN(-1); + } + bzero(opts, sizeof(my_option) * count); + + if (construct_options(tmp_root, tmp, opts)) + { + sql_print_error("Bad options for plugin '%s'.", tmp->name.str); + DBUG_RETURN(-1); + } + + if (tmp->plugin->system_vars) + { + tmp_backup= (st_ptr_backup *)my_alloca(len * sizeof(tmp_backup[0])); + DBUG_ASSERT(tmp->nbackups == 0); + DBUG_ASSERT(tmp->ptr_backup == 0); + + for (opt= tmp->plugin->system_vars; *opt; opt++) + { + st_mysql_sys_var *o= *opt; + char *varname; + sys_var *v; + + tmp_backup[tmp->nbackups++].save(&o->name); + if ((var= find_bookmark(tmp->name.str, o->name, o->flags))) + { + varname= var->key + 1; + var->loaded= TRUE; + } + else + { + var= NULL; + len= tmp->name.length + strlen(o->name) + 2; + varname= (char*) alloc_root(mem_root, len); + strxmov(varname, tmp->name.str, "-", o->name, NullS); + my_casedn_str(&my_charset_latin1, varname); + convert_dash_to_underscore(varname, len-1); + } + if (o->flags & PLUGIN_VAR_NOSYSVAR) + { + o->name= varname; + continue; + } + + const char *s= o->flags & PLUGIN_VAR_DEPRECATED ? "" : NULL; + v= new (mem_root) sys_var_pluginvar(&chain, varname, tmp, o, s); + v->test_load= (var ? &var->loaded : &static_unload); + DBUG_ASSERT(static_unload == FALSE); + + if (!(o->flags & PLUGIN_VAR_NOCMDOPT)) + { + // update app_type, used for I_S.SYSTEM_VARIABLES + for (my_option *mo=opts; mo->name; mo++) + if (mo->app_type == o) + mo->app_type= v; + } + } + + if (tmp->nbackups) + { + size_t bytes= tmp->nbackups * sizeof(tmp->ptr_backup[0]); + tmp->ptr_backup= (st_ptr_backup *)alloc_root(mem_root, bytes); + if (!tmp->ptr_backup) + { + restore_ptr_backup(tmp->nbackups, tmp_backup); + my_afree(tmp_backup); + goto err; + } + memcpy(tmp->ptr_backup, tmp_backup, bytes); + } + my_afree(tmp_backup); + } + + /* + We adjust the default value to account for the hardcoded exceptions + we have set for the federated and ndbcluster storage engines. + */ + if (!plugin_is_forced(tmp)) + opts[0].def_value= opts[1].def_value= plugin_load_option; + + error= handle_options(argc, &argv, opts, mark_changed); + (*argc)++; /* add back one for the program name */ + + if (unlikely(error)) + { + sql_print_error("Parsing options for plugin '%s' failed.", + tmp->name.str); + goto err; + } + /* + Set plugin loading policy from option value. First element in the option + list is always the option value. + */ + if (!plugin_is_forced(tmp)) + plugin_load_option= (enum_plugin_load_option) *(ulong*) opts[0].value; + } + + disable_plugin= (plugin_load_option == PLUGIN_OFF); + tmp->load_option= plugin_load_option; + + error= 1; + + /* + If the plugin is disabled it should not be initialized. + */ + if (disable_plugin) + { + if (global_system_variables.log_warnings && !opt_help) + sql_print_information("Plugin '%s' is disabled.", + tmp->name.str); + goto err; + } + + if (tmp->plugin->system_vars) + { + if (mysqld_server_started) + { + /* + PLUGIN_VAR_STR command-line options without PLUGIN_VAR_MEMALLOC, point + directly to values in the argv[] array. For plugins started at the + server startup, argv[] array is allocated with load_defaults(), and + freed when the server is shut down. But for plugins loaded with + INSTALL PLUGIN, the memory allocated with load_defaults() is freed with + free() at the end of mysql_install_plugin(). Which means we cannot + allow any pointers into that area. + Thus, for all plugins loaded after the server was started, + we copy string values to a plugin's memroot. + */ + for (opt= tmp->plugin->system_vars; *opt; opt++) + { + if ((((*opt)->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_NOCMDOPT | + PLUGIN_VAR_MEMALLOC)) == PLUGIN_VAR_STR)) + { + sysvar_str_t* str= (sysvar_str_t *)*opt; + if (*str->value) + *str->value= strdup_root(mem_root, *str->value); + } + } + /* same issue with config file names */ + for (my_option *mo=opts; mo->name; mo++) + { + sys_var *var= (sys_var*) mo->app_type; + if (var && var->value_origin == sys_var::CONFIG) + var->origin_filename= strdup_root(mem_root, var->origin_filename); + } + } + + if (chain.first) + { + chain.last->next = NULL; + if (mysql_add_sys_var_chain(chain.first)) + { + sql_print_error("Plugin '%s' has conflicting system variables", + tmp->name.str); + goto err; + } + tmp->system_vars= chain.first; + } + } + + DBUG_RETURN(0); + +err: + if (opts) + my_cleanup_options(opts); + DBUG_RETURN(error); +} + + +/**************************************************************************** + Help Verbose text with Plugin System Variables +****************************************************************************/ + + +void add_plugin_options(DYNAMIC_ARRAY *options, MEM_ROOT *mem_root) +{ + struct st_plugin_int *p; + my_option *opt; + + if (!initialized) + return; + + for (size_t idx= 0; idx < plugin_array.elements; idx++) + { + p= *dynamic_element(&plugin_array, idx, struct st_plugin_int **); + + if (!(opt= construct_help_options(mem_root, p))) + continue; + + /* Only options with a non-NULL comment are displayed in help text */ + for (;opt->name; opt++) + if (opt->comment) + insert_dynamic(options, (uchar*) opt); + } +} + + +/** + Returns a sys_var corresponding to a particular MYSQL_SYSVAR(...) +*/ +sys_var *find_plugin_sysvar(st_plugin_int *plugin, st_mysql_sys_var *plugin_var) +{ + for (sys_var *var= plugin->system_vars; var; var= var->next) + { + sys_var_pluginvar *pvar=var->cast_pluginvar(); + if (pvar->plugin_var == plugin_var) + return var; + } + return 0; +} + +/* + On dlclose() we need to restore values of all symbols that we've modified in + the DSO. The reason is - the DSO might not actually be unloaded, so on the + next dlopen() these symbols will have old values, they won't be + reinitialized. + + Perhaps, there can be many reason, why a DSO won't be unloaded. Strictly + speaking, it's implementation defined whether to unload an unused DSO or to + keep it in memory. + + In particular, this happens for some plugins: In 2009 a new ELF stub was + introduced, see Ulrich Drepper's email "Unique symbols for C++" + http://www.redhat.com/archives/posix-c++-wg/2009-August/msg00002.html + + DSO that has objects with this stub (STB_GNU_UNIQUE) cannot be unloaded + (this is mentioned in the email, see the url above). + + These "unique" objects are, for example, static variables in templates, + in inline functions, in classes. So any DSO that uses them can + only be loaded once. And because Boost has them, any DSO that uses Boost + almost certainly cannot be unloaded. + + To know whether a particular DSO has these objects, one can use + + readelf -s /path/to/plugin.so|grep UNIQUE + + There's nothing we can do about it, but to reset the DSO to its initial + state before dlclose(). +*/ +static void restore_ptr_backup(uint n, st_ptr_backup *backup) +{ + while (n--) + (backup++)->restore(); +} + +/**************************************************************************** + thd specifics service, see include/mysql/service_thd_specifics.h +****************************************************************************/ +static const int INVALID_THD_KEY= -1; +static uint thd_key_no = 42; + +int thd_key_create(MYSQL_THD_KEY_T *key) +{ + int flags= PLUGIN_VAR_THDLOCAL | PLUGIN_VAR_STR | + PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT; + char namebuf[256]; + snprintf(namebuf, sizeof(namebuf), "%u", thd_key_no++); + mysql_prlock_wrlock(&LOCK_system_variables_hash); + // non-letters in the name as an extra safety + st_bookmark *bookmark= register_var("\a\v\a\t\a\r", namebuf, flags); + mysql_prlock_unlock(&LOCK_system_variables_hash); + if (bookmark) + { + *key= bookmark->offset; + return 0; + } + return ENOMEM; +} + +void thd_key_delete(MYSQL_THD_KEY_T *key) +{ + *key= INVALID_THD_KEY; +} + +void* thd_getspecific(MYSQL_THD thd, MYSQL_THD_KEY_T key) +{ + DBUG_ASSERT(key != INVALID_THD_KEY); + if (key == INVALID_THD_KEY || (!thd && !(thd= current_thd))) + return 0; + + return *(void**)(intern_sys_var_ptr(thd, key, true)); +} + +int thd_setspecific(MYSQL_THD thd, MYSQL_THD_KEY_T key, void *value) +{ + DBUG_ASSERT(key != INVALID_THD_KEY); + if (key == INVALID_THD_KEY || (!thd && !(thd= current_thd))) + return EINVAL; + + memcpy(intern_sys_var_ptr(thd, key, true), &value, sizeof(void*)); + return 0; +} + +void plugin_mutex_init() +{ + init_plugin_psi_keys(); + mysql_mutex_init(key_LOCK_plugin, &LOCK_plugin, MY_MUTEX_INIT_FAST); +} + +#ifdef WITH_WSREP + +/* + Placeholder for global_system_variables.table_plugin required during + initialization of startup wsrep threads. +*/ +static st_plugin_int wsrep_dummy_plugin; +static st_plugin_int *wsrep_dummy_plugin_ptr; + +/* + Initialize wsrep_dummy_plugin and assign it to + global_system_variables.table_plugin. +*/ +void wsrep_plugins_pre_init() +{ + wsrep_dummy_plugin_ptr= &wsrep_dummy_plugin; + wsrep_dummy_plugin.state= PLUGIN_IS_DISABLED; + global_system_variables.table_plugin= + plugin_int_to_ref(wsrep_dummy_plugin_ptr); +} + +/* + This function is intended to be called after the plugins and related + global system variables are initialized. It re-initializes some data + members of wsrep startup threads with correct values, as these value + were not available at the time these threads were created. +*/ + +my_bool post_init_callback(THD *thd, void *) +{ + DBUG_ASSERT(!current_thd); + if (thd->wsrep_applier) + { + // Save options_bits as it will get overwritten in + // plugin_thdvar_init() (verified) + ulonglong option_bits_saved= thd->variables.option_bits; + + set_current_thd(thd); + plugin_thdvar_init(thd); + + // Restore option_bits + thd->variables.option_bits= option_bits_saved; + } + set_current_thd(0); + return 0; +} + + +void wsrep_plugins_post_init() +{ + mysql_mutex_lock(&LOCK_global_system_variables); + server_threads.iterate(post_init_callback); + mysql_mutex_unlock(&LOCK_global_system_variables); +} +#endif /* WITH_WSREP */ diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h new file mode 100644 index 00000000..d4df8c64 --- /dev/null +++ b/sql/sql_plugin.h @@ -0,0 +1,206 @@ +/* Copyright (c) 2005, 2012, Oracle and/or its affiliates. + Copyright (c) 2009, 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef _sql_plugin_h +#define _sql_plugin_h + +/* + the following #define adds server-only members to enum_mysql_show_type, + that is defined in plugin.h +*/ +#define SHOW_always_last SHOW_KEY_CACHE_LONG, \ + SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, \ + SHOW_LONG_NOFLUSH, SHOW_LEX_STRING, SHOW_ATOMIC_COUNTER_UINT32_T, \ + /* SHOW_*_STATUS must be at the end, SHOW_LONG_STATUS being first */ \ + SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, SHOW_LONGLONG_STATUS, \ + SHOW_UINT32_STATUS +#include "mariadb.h" +#undef SHOW_always_last + +#include "m_string.h" /* LEX_STRING */ +#include "my_alloc.h" /* MEM_ROOT */ + +class sys_var; +enum SHOW_COMP_OPTION { SHOW_OPTION_YES, SHOW_OPTION_NO, SHOW_OPTION_DISABLED}; +enum enum_plugin_load_option { PLUGIN_OFF, PLUGIN_ON, PLUGIN_FORCE, + PLUGIN_FORCE_PLUS_PERMANENT }; +extern const char *global_plugin_typelib_names[]; + +extern volatile int global_plugin_version; +extern ulong dlopen_count; + +#include +#include "sql_list.h" + +#ifdef DBUG_OFF +#define plugin_ref_to_int(A) A +#define plugin_int_to_ref(A) A +#else +#define plugin_ref_to_int(A) (A ? A[0] : NULL) +#define plugin_int_to_ref(A) &(A) +#endif + +/* + the following flags are valid for plugin_init() +*/ +#define PLUGIN_INIT_SKIP_PLUGIN_TABLE 1U +#define PLUGIN_INIT_SKIP_INITIALIZATION 2U + +#define INITIAL_LEX_PLUGIN_LIST_SIZE 16 + +typedef enum enum_mysql_show_type SHOW_TYPE; +typedef struct st_mysql_show_var SHOW_VAR; + +#define MYSQL_ANY_PLUGIN -1 + +/* + different values of st_plugin_int::state + though they look like a bitmap, plugin may only + be in one of those eigenstates, not in a superposition of them :) + It's a bitmap, because it makes it easier to test + "whether the state is one of those..." +*/ +#define PLUGIN_IS_FREED 1U +#define PLUGIN_IS_DELETED 2U +#define PLUGIN_IS_UNINITIALIZED 4U +#define PLUGIN_IS_READY 8U +#define PLUGIN_IS_DYING 16U +#define PLUGIN_IS_DISABLED 32U + +struct st_ptr_backup { + void **ptr; + void *value; + void save(void **p) { ptr= p; value= *p; } + void save(const char **p) { save((void**)p); } + void restore() { *ptr= value; } +}; + +/* A handle for the dynamic library containing a plugin or plugins. */ + +struct st_plugin_dl +{ + LEX_CSTRING dl; + void *handle; + struct st_maria_plugin *plugins; + st_ptr_backup *ptr_backup; + uint nbackups; + uint ref_count; /* number of plugins loaded from the library */ + int mysqlversion; + int mariaversion; + bool allocated; +}; + +/* A handle of a plugin */ + +struct st_plugin_int +{ + LEX_CSTRING name; + struct st_maria_plugin *plugin; + struct st_plugin_dl *plugin_dl; + st_ptr_backup *ptr_backup; + uint nbackups; + uint state; + uint ref_count; /* number of threads using the plugin */ + uint locks_total; /* how many times the plugin was locked */ + void *data; /* plugin type specific, e.g. handlerton */ + MEM_ROOT mem_root; /* memory for dynamic plugin structures */ + sys_var *system_vars; /* server variables for this plugin */ + enum enum_plugin_load_option load_option; /* OFF, ON, FORCE, F+PERMANENT */ +}; + + +extern mysql_mutex_t LOCK_plugin; + +/* + See intern_plugin_lock() for the explanation for the + conditionally defined plugin_ref type +*/ +#ifdef DBUG_OFF +typedef struct st_plugin_int *plugin_ref; +#define plugin_ref_to_int(A) A +#define plugin_int_to_ref(A) A +#define plugin_decl(pi) ((pi)->plugin) +#define plugin_dlib(pi) ((pi)->plugin_dl) +#define plugin_data(pi,cast) ((cast)((pi)->data)) +#define plugin_name(pi) (&((pi)->name)) +#define plugin_state(pi) ((pi)->state) +#define plugin_load_option(pi) ((pi)->load_option) +#define plugin_equals(p1,p2) ((p1) == (p2)) +#else +typedef struct st_plugin_int **plugin_ref; +#define plugin_ref_to_int(A) (A ? A[0] : NULL) +#define plugin_int_to_ref(A) &(A) +#define plugin_decl(pi) ((pi)[0]->plugin) +#define plugin_dlib(pi) ((pi)[0]->plugin_dl) +#define plugin_data(pi,cast) ((cast)((pi)[0]->data)) +#define plugin_name(pi) (&((pi)[0]->name)) +#define plugin_state(pi) ((pi)[0]->state) +#define plugin_load_option(pi) ((pi)[0]->load_option) +#define plugin_equals(p1,p2) ((p1) && (p2) && (p1)[0] == (p2)[0]) +#endif + +typedef int (*plugin_type_init)(struct st_plugin_int *); + +extern I_List *opt_plugin_load_list_ptr; +extern char *opt_plugin_dir_ptr; +extern MYSQL_PLUGIN_IMPORT char opt_plugin_dir[FN_REFLEN]; +extern const LEX_CSTRING plugin_type_names[]; +extern ulong plugin_maturity; +extern TYPELIB plugin_maturity_values; +extern const char *plugin_maturity_names[]; + +extern int plugin_init(int *argc, char **argv, int init_flags); +extern void plugin_shutdown(void); +void add_plugin_options(DYNAMIC_ARRAY *options, MEM_ROOT *mem_root); +extern bool plugin_is_ready(const LEX_CSTRING *name, int type); +#define my_plugin_lock_by_name(A,B,C) plugin_lock_by_name(A,B,C) +#define my_plugin_lock(A,B) plugin_lock(A,B) +extern plugin_ref plugin_lock(THD *thd, plugin_ref ptr); +extern plugin_ref plugin_lock_by_name(THD *thd, const LEX_CSTRING *name, + int type); +extern void plugin_unlock(THD *thd, plugin_ref plugin); +extern void plugin_unlock_list(THD *thd, plugin_ref *list, size_t count); +extern bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name, + const LEX_CSTRING *dl); +extern bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, + const LEX_CSTRING *dl); +extern bool plugin_register_builtin(struct st_mysql_plugin *plugin); +extern void plugin_thdvar_init(THD *thd); +extern void plugin_thdvar_cleanup(THD *thd); +sys_var *find_plugin_sysvar(st_plugin_int *plugin, st_mysql_sys_var *var); +void plugin_opt_set_limits(struct my_option *, const struct st_mysql_sys_var *); +extern SHOW_COMP_OPTION plugin_status(const char *name, size_t len, int type); +extern bool check_valid_path(const char *path, size_t length); +extern void plugin_mutex_init(); + +typedef my_bool (plugin_foreach_func)(THD *thd, + plugin_ref plugin, + void *arg); +#define plugin_foreach(A,B,C,D) plugin_foreach_with_mask(A,B,C,PLUGIN_IS_READY,D) +extern bool plugin_foreach_with_mask(THD *thd, plugin_foreach_func *func, + int type, uint state_mask, void *arg); +extern void sync_dynamic_session_variables(THD* thd, bool global_lock); + +extern bool plugin_dl_foreach(THD *thd, const LEX_CSTRING *dl, + plugin_foreach_func *func, void *arg); + +extern void sync_dynamic_session_variables(THD* thd, bool global_lock); +#endif + +#ifdef WITH_WSREP +extern void wsrep_plugins_pre_init(); +extern void wsrep_plugins_post_init(); +#endif /* WITH_WSREP */ diff --git a/sql/sql_plugin_compat.h b/sql/sql_plugin_compat.h new file mode 100644 index 00000000..01b79c5e --- /dev/null +++ b/sql/sql_plugin_compat.h @@ -0,0 +1,65 @@ +/* Copyright (C) 2013 Sergei Golubchik and Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* old plugin api structures, used for backward compatibility */ + +#define upgrade_var(X) latest->X= X +#define upgrade_str(X) strmake_buf(latest->X, X) +#define downgrade_var(X) X= latest->X +#define downgrade_str(X) strmake_buf(X, latest->X) + +/**************************************************************/ +/* Authentication API, version 0x0100 *************************/ +#define MIN_AUTHENTICATION_INTERFACE_VERSION 0x0100 + +struct MYSQL_SERVER_AUTH_INFO_0x0100 { + const char *user_name; + unsigned int user_name_length; + const char *auth_string; + unsigned long auth_string_length; + char authenticated_as[49]; + char external_user[512]; + int password_used; + const char *host_or_ip; + unsigned int host_or_ip_length; + + void upgrade(MYSQL_SERVER_AUTH_INFO *latest) + { + upgrade_var(user_name); + upgrade_var(user_name_length); + upgrade_var(auth_string); + upgrade_var(auth_string_length); + upgrade_str(authenticated_as); + upgrade_str(external_user); + upgrade_var(password_used); + upgrade_var(host_or_ip); + upgrade_var(host_or_ip_length); + } + void downgrade(MYSQL_SERVER_AUTH_INFO *latest) + { + downgrade_var(user_name); + downgrade_var(user_name_length); + downgrade_var(auth_string); + downgrade_var(auth_string_length); + downgrade_str(authenticated_as); + downgrade_str(external_user); + downgrade_var(password_used); + downgrade_var(host_or_ip); + downgrade_var(host_or_ip_length); + } +}; + +/**************************************************************/ + diff --git a/sql/sql_plugin_services.inl b/sql/sql_plugin_services.inl new file mode 100644 index 00000000..f2b2d08d --- /dev/null +++ b/sql/sql_plugin_services.inl @@ -0,0 +1,358 @@ +/* Copyright (c) 2009, 2010, Oracle and/or its affiliates. + Copyright (c) 2012, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* support for Services */ +#include +#include +#include + +struct st_service_ref { + const char *name; + uint version; + void *service; +}; + +static struct my_snprintf_service_st my_snprintf_handler = { + my_snprintf, + my_vsnprintf +}; + +static struct thd_alloc_service_st thd_alloc_handler= { + thd_alloc, + thd_calloc, + thd_strdup, + thd_strmake, + thd_memdup, + thd_make_lex_string +}; + +static struct thd_wait_service_st thd_wait_handler= { + thd_wait_begin, + thd_wait_end +}; + +static struct progress_report_service_st progress_report_handler= { + thd_progress_init, + thd_progress_report, + thd_progress_next_stage, + thd_progress_end, + set_thd_proc_info +}; + +static struct kill_statement_service_st thd_kill_statement_handler= { + thd_kill_level +}; + +static struct thd_timezone_service_st thd_timezone_handler= { + thd_TIME_to_gmt_sec, + thd_gmt_sec_to_TIME +}; + +static struct my_sha2_service_st my_sha2_handler = { + my_sha224, + my_sha224_multi, + my_sha224_context_size, + my_sha224_init, + my_sha224_input, + my_sha224_result, + my_sha256, + my_sha256_multi, + my_sha256_context_size, + my_sha256_init, + my_sha256_input, + my_sha256_result, + my_sha384, + my_sha384_multi, + my_sha384_context_size, + my_sha384_init, + my_sha384_input, + my_sha384_result, + my_sha512, + my_sha512_multi, + my_sha512_context_size, + my_sha512_init, + my_sha512_input, + my_sha512_result, +}; + +static struct my_sha1_service_st my_sha1_handler = { + my_sha1, + my_sha1_multi, + my_sha1_context_size, + my_sha1_init, + my_sha1_input, + my_sha1_result +}; + +static struct my_md5_service_st my_md5_handler = { + my_md5, + my_md5_multi, + my_md5_context_size, + my_md5_init, + my_md5_input, + my_md5_result +}; + +static struct logger_service_st logger_service_handler= { + logger_init_mutexes, + logger_open, + logger_close, + logger_vprintf, + logger_printf, + logger_write, + logger_rotate +}; + +static struct thd_autoinc_service_st thd_autoinc_handler= { + thd_get_autoinc +}; + +static struct thd_rnd_service_st thd_rnd_handler= { + thd_rnd, + thd_create_random_password +}; + +static struct base64_service_st base64_handler= { + my_base64_needed_encoded_length, + my_base64_encode_max_arg_length, + my_base64_needed_decoded_length, + my_base64_decode_max_arg_length, + my_base64_encode, + my_base64_decode +}; + +static struct thd_error_context_service_st thd_error_context_handler= { + thd_get_error_message, + thd_get_error_number, + thd_get_error_row, + thd_inc_error_row, + thd_get_error_context_description +}; + +static struct wsrep_service_st wsrep_handler = { + get_wsrep_recovery, + wsrep_consistency_check, + wsrep_is_wsrep_xid, + wsrep_xid_seqno, + wsrep_xid_uuid, + wsrep_on, + wsrep_prepare_key_for_innodb, + wsrep_thd_LOCK, + wsrep_thd_TRYLOCK, + wsrep_thd_UNLOCK, + wsrep_thd_query, + wsrep_thd_retry_counter, + wsrep_thd_ignore_table, + wsrep_thd_trx_seqno, + wsrep_thd_is_aborting, + wsrep_set_data_home_dir, + wsrep_thd_is_BF, + wsrep_thd_is_local, + wsrep_thd_self_abort, + wsrep_thd_append_key, + wsrep_thd_append_table_key, + wsrep_thd_is_local_transaction, + wsrep_thd_client_state_str, + wsrep_thd_client_mode_str, + wsrep_thd_transaction_state_str, + wsrep_thd_transaction_id, + wsrep_thd_bf_abort, + wsrep_thd_order_before, + wsrep_handle_SR_rollback, + wsrep_thd_skip_locking, + wsrep_get_sr_table_name, + wsrep_get_debug, + wsrep_commit_ordered, + wsrep_thd_is_applying, + wsrep_OSU_method_get, + wsrep_thd_has_ignored_error, + wsrep_thd_set_ignored_error, + wsrep_report_bf_lock_wait, + wsrep_thd_kill_LOCK, + wsrep_thd_kill_UNLOCK, + wsrep_thd_set_PA_unsafe +}; + +static struct thd_specifics_service_st thd_specifics_handler= +{ + thd_key_create, + thd_key_delete, + thd_getspecific, + thd_setspecific +}; + +static struct encryption_scheme_service_st encryption_scheme_handler= +{ + encryption_scheme_encrypt, + encryption_scheme_decrypt +}; + +static struct my_crypt_service_st crypt_handler= +{ + my_aes_crypt_init, + my_aes_crypt_update, + my_aes_crypt_finish, + my_aes_crypt, + my_aes_get_size, + my_aes_ctx_size, + my_random_bytes +}; + +static struct my_print_error_service_st my_print_error_handler= +{ + my_error, + my_printf_error, + my_printv_error +}; + +static struct json_service_st json_handler= +{ + json_type, + json_get_array_item, + json_get_object_key, + json_get_object_nkey, + json_escape_string, + json_unescape_json +}; + +static struct thd_mdl_service_st thd_mdl_handler= +{ + thd_mdl_context +}; + +struct sql_service_st sql_service_handler= +{ + mysql_init, + mysql_real_connect_local, + mysql_real_connect, + mysql_errno, + mysql_error, + mysql_real_query, + mysql_affected_rows, + mysql_num_rows, + mysql_store_result, + mysql_free_result, + mysql_fetch_row, + mysql_close, + mysql_options, + mysql_fetch_lengths, + mysql_set_character_set, + mysql_num_fields, + mysql_select_db +}; + +#define DEFINE_warning_function(name, ret) { \ + static query_id_t last_query_id= -1; \ + THD *thd= current_thd; \ + if((thd ? thd->query_id : 0) != last_query_id) \ + { \ + my_error(ER_PROVIDER_NOT_LOADED, MYF(ME_ERROR_LOG|ME_WARNING), name); \ + last_query_id= thd ? thd->query_id : 0; \ + } \ + return ret; \ +} + +#include +static struct provider_service_lzma_st provider_handler_lzma= +{ + DEFINE_lzma_stream_buffer_decode([]) DEFINE_warning_function("LZMA compression", LZMA_PROG_ERROR), + DEFINE_lzma_easy_buffer_encode([]) DEFINE_warning_function("LZMA compression", LZMA_PROG_ERROR), + + false // .is_loaded +}; +struct provider_service_lzma_st *provider_service_lzma= &provider_handler_lzma; + +#include +static struct provider_service_lzo_st provider_handler_lzo= +{ + DEFINE_lzo1x_1_15_compress([]) DEFINE_warning_function("LZO compression", LZO_E_INTERNAL_ERROR), + DEFINE_lzo1x_decompress_safe([]) DEFINE_warning_function("LZO compression", LZO_E_INTERNAL_ERROR), + + false // .is_loaded +}; +struct provider_service_lzo_st *provider_service_lzo= &provider_handler_lzo; + +#include +static struct provider_service_bzip2_st provider_handler_bzip2= +{ + DEFINE_BZ2_bzBuffToBuffCompress([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzBuffToBuffDecompress([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzCompress([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzCompressEnd([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzCompressInit([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzDecompress([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzDecompressEnd([]) DEFINE_warning_function("BZip2 compression", -1), + DEFINE_BZ2_bzDecompressInit([]) DEFINE_warning_function("BZip2 compression", -1), + + false // .is_loaded +}; +struct provider_service_bzip2_st *provider_service_bzip2= &provider_handler_bzip2; + +#include +static struct provider_service_snappy_st provider_handler_snappy= +{ + DEFINE_snappy_max_compressed_length([]) -> size_t DEFINE_warning_function("Snappy compression", 0), + DEFINE_snappy_compress([]) DEFINE_warning_function("Snappy compression", SNAPPY_INVALID_INPUT), + DEFINE_snappy_uncompressed_length([]) DEFINE_warning_function("Snappy compression", SNAPPY_INVALID_INPUT), + DEFINE_snappy_uncompress([]) DEFINE_warning_function("Snappy compression", SNAPPY_INVALID_INPUT), + + false // .is_loaded +}; +struct provider_service_snappy_st *provider_service_snappy= &provider_handler_snappy; + +#include +static struct provider_service_lz4_st provider_handler_lz4= +{ + DEFINE_LZ4_compressBound([]) DEFINE_warning_function("LZ4 compression", 0), + DEFINE_LZ4_compress_default([]) DEFINE_warning_function("LZ4 compression", 0), + DEFINE_LZ4_decompress_safe([]) DEFINE_warning_function("LZ4 compression", -1), + + false // .is_loaded +}; +struct provider_service_lz4_st *provider_service_lz4= &provider_handler_lz4; + +static struct st_service_ref list_of_services[]= +{ + { "base64_service", VERSION_base64, &base64_handler }, + { "debug_sync_service", VERSION_debug_sync, 0 }, // updated in plugin_init() + { "encryption_scheme_service", VERSION_encryption_scheme, &encryption_scheme_handler }, + { "encryption_service", VERSION_encryption, &encryption_handler }, + { "logger_service", VERSION_logger, &logger_service_handler }, + { "my_crypt_service", VERSION_my_crypt, &crypt_handler}, + { "my_md5_service", VERSION_my_md5, &my_md5_handler}, + { "my_print_error_service", VERSION_my_print_error, &my_print_error_handler}, + { "my_sha1_service", VERSION_my_sha1, &my_sha1_handler}, + { "my_sha2_service", VERSION_my_sha2, &my_sha2_handler}, + { "my_snprintf_service", VERSION_my_snprintf, &my_snprintf_handler }, + { "progress_report_service", VERSION_progress_report, &progress_report_handler }, + { "thd_alloc_service", VERSION_thd_alloc, &thd_alloc_handler }, + { "thd_autoinc_service", VERSION_thd_autoinc, &thd_autoinc_handler }, + { "thd_error_context_service", VERSION_thd_error_context, &thd_error_context_handler }, + { "thd_kill_statement_service", VERSION_kill_statement, &thd_kill_statement_handler }, + { "thd_rnd_service", VERSION_thd_rnd, &thd_rnd_handler }, + { "thd_specifics_service", VERSION_thd_specifics, &thd_specifics_handler }, + { "thd_timezone_service", VERSION_thd_timezone, &thd_timezone_handler }, + { "thd_wait_service", VERSION_thd_wait, &thd_wait_handler }, + { "wsrep_service", VERSION_wsrep, &wsrep_handler }, + { "json_service", VERSION_json, &json_handler }, + { "thd_mdl_service", VERSION_thd_mdl, &thd_mdl_handler }, + { "sql_service", VERSION_sql_service, &sql_service_handler }, + { "provider_service_bzip2", VERSION_provider_bzip2, &provider_handler_bzip2 }, + { "provider_service_lz4", VERSION_provider_lz4, &provider_handler_lz4 }, + { "provider_service_lzma", VERSION_provider_lzma, &provider_handler_lzma }, + { "provider_service_lzo", VERSION_provider_lzo, &provider_handler_lzo }, + { "provider_service_snappy", VERSION_provider_snappy, &provider_handler_snappy } +}; diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc new file mode 100644 index 00000000..bf3c7cbb --- /dev/null +++ b/sql/sql_prepare.cc @@ -0,0 +1,6510 @@ +/* Copyright (c) 2002, 2015, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + +This file contains the implementation of prepared statements. + +When one prepares a statement: + + - Server gets the query from client with command 'COM_STMT_PREPARE'; + in the following format: + [COM_STMT_PREPARE:1] [query] + - Parse the query and recognize any parameter markers '?' and + store its information list in lex->param_list + - Allocate a new statement for this prepare; and keep this in + 'thd->stmt_map'. + - Without executing the query, return back to client the total + number of parameters along with result-set metadata information + (if any) in the following format: + @verbatim + [STMT_ID:4] + [Column_count:2] + [Param_count:2] + [Params meta info (stubs only for now)] (if Param_count > 0) + [Columns meta info] (if Column_count > 0) + @endverbatim + + During prepare the tables used in a statement are opened, but no + locks are acquired. Table opening will block any DDL during the + operation, and we do not need any locks as we neither read nor + modify any data during prepare. Tables are closed after prepare + finishes. + +When one executes a statement: + + - Server gets the command 'COM_STMT_EXECUTE' to execute the + previously prepared query. If there are any parameter markers, then the + client will send the data in the following format: + @verbatim + [COM_STMT_EXECUTE:1] + [STMT_ID:4] + [NULL_BITS:(param_count+7)/8)] + [TYPES_SUPPLIED_BY_CLIENT(0/1):1] + [[length]data] + [[length]data] .. [[length]data]. + @endverbatim + (Note: Except for string/binary types; all other types will not be + supplied with length field) + - If it is a first execute or types of parameters were altered by client, + then setup the conversion routines. + - Assign parameter items from the supplied data. + - Execute the query without re-parsing and send back the results + to client + + During execution of prepared statement tables are opened and locked + the same way they would for normal (non-prepared) statement + execution. Tables are unlocked and closed after the execution. + +When one supplies long data for a placeholder: + + - Server gets the long data in pieces with command type + 'COM_STMT_SEND_LONG_DATA'. + - The packet received will have the format as: + [COM_STMT_SEND_LONG_DATA:1][STMT_ID:4][parameter_number:2][data] + - data from the packet is appended to the long data value buffer for this + placeholder. + - It's up to the client to stop supplying data chunks at any point. The + server doesn't care; also, the server doesn't notify the client whether + it got the data or not; if there is any error, then it will be returned + at statement execute. +*/ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "unireg.h" +#include "sql_class.h" // set_var.h: THD +#include "set_var.h" +#include "sql_admin.h" // fill_check_table_metadata_fields +#include "sql_prepare.h" +#include "sql_parse.h" // insert_precheck, update_precheck, delete_precheck +#include "sql_base.h" // open_normal_and_derived_tables +#include "sql_cache.h" // query_cache_* +#include "sql_view.h" // create_view_precheck +#include "sql_delete.h" // mysql_prepare_delete +#include "sql_select.h" // for JOIN +#include "sql_insert.h" // upgrade_lock_type_for_insert, mysql_prepare_insert +#include "sql_update.h" // mysql_prepare_update +#include "sql_db.h" // mysql_opt_change_db, mysql_change_db +#include "sql_derived.h" // mysql_derived_prepare, + // mysql_handle_derived +#include "sql_cte.h" +#include "sql_cursor.h" +#include "sql_show.h" +#include "sql_repl.h" +#include "sql_help.h" // mysqld_help_prepare +#include "sql_table.h" // fill_checksum_table_metadata_fields +#include "slave.h" +#include "sp_head.h" +#include "sp.h" +#include "sp_cache.h" +#include "sql_handler.h" // mysql_ha_rm_tables +#include "probes_mysql.h" +#include "opt_trace.h" +#ifdef EMBEDDED_LIBRARY +/* include MYSQL_BIND headers */ +#include +#else +#include +/* Constants defining bits in parameter type flags. Flags are read from high byte of short value */ +static const uint PARAMETER_FLAG_UNSIGNED= 128U << 8; +#endif +#include "lock.h" // MYSQL_OPEN_FORCE_SHARED_MDL +#include "log_event.h" // class Log_event +#include "sql_handler.h" +#include "transaction.h" // trans_rollback_implicit +#include "mysql/psi/mysql_ps.h" // MYSQL_EXECUTE_PS +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ +#include "xa.h" // xa_recover_get_fields +#include "sql_audit.h" // mysql_audit_release + +/** + A result class used to send cursor rows using the binary protocol. +*/ + +class Select_fetch_protocol_binary: public select_send +{ + Protocol_binary protocol; +public: + Select_fetch_protocol_binary(THD *thd); + virtual bool send_result_set_metadata(List &list, uint flags); + virtual int send_data(List &items); + virtual bool send_eof(); +#ifdef EMBEDDED_LIBRARY + void begin_dataset() + { + protocol.begin_dataset(); + } +#endif +}; + +/****************************************************************************/ + +/** + Prepared_statement: a statement that can contain placeholders. +*/ + +class Prepared_statement: public Statement +{ +public: + enum flag_values + { + IS_IN_USE= 1, + IS_SQL_PREPARE= 2 + }; + + THD *thd; + PSI_prepared_stmt* m_prepared_stmt; + Select_fetch_protocol_binary result; + Item_param **param_array; + Server_side_cursor *cursor; + uchar *packet; + uchar *packet_end; +#ifdef PROTECT_STATEMENT_MEMROOT + /* + The following data member is wholly for debugging purpose. + It can be used for possible crash analysis to determine how many times + the stored routine was executed before the mem_root marked read_only + was requested for a memory chunk. Additionally, a value of this data + member is output to the log with DBUG_PRINT. + */ + ulong executed_counter; +#endif + uint param_count; + uint last_errno; + uint flags; + char last_error[MYSQL_ERRMSG_SIZE]; + my_bool iterations; + my_bool start_param; + my_bool read_types; + +#ifndef EMBEDDED_LIBRARY + bool (*set_params)(Prepared_statement *st, uchar *data, uchar *data_end, + uchar *read_pos, String *expanded_query); + bool (*set_bulk_params)(Prepared_statement *st, + uchar **read_pos, uchar *data_end, bool reset); +#else + bool (*set_params_data)(Prepared_statement *st, String *expanded_query); + /*TODO: add bulk support for builtin server */ +#endif + bool (*set_params_from_actual_params)(Prepared_statement *stmt, + List &list, + String *expanded_query); +public: + Prepared_statement(THD *thd_arg); + virtual ~Prepared_statement(); + void setup_set_params(); + Query_arena::Type type() const override; + bool cleanup_stmt(bool restore_set_statement_vars) override; + bool set_name(const LEX_CSTRING *name); + inline void close_cursor() { delete cursor; cursor= 0; } + inline bool is_in_use() { return flags & (uint) IS_IN_USE; } + inline bool is_sql_prepare() const { return flags & (uint) IS_SQL_PREPARE; } + void set_sql_prepare() { flags|= (uint) IS_SQL_PREPARE; } + bool prepare(const char *packet, uint packet_length); + bool execute_loop(String *expanded_query, + bool open_cursor, + uchar *packet_arg, uchar *packet_end_arg); + bool execute_bulk_loop(String *expanded_query, + bool open_cursor, + uchar *packet_arg, uchar *packet_end_arg); + bool execute_server_runnable(Server_runnable *server_runnable); + my_bool set_bulk_parameters(bool reset); + bool bulk_iterations() { return iterations; }; + /* Destroy this statement */ + void deallocate(); + bool execute_immediate(const char *query, uint query_length); +private: + /** + The memory root to allocate parsed tree elements (instances of Item, + SELECT_LEX and other classes). + */ + MEM_ROOT main_mem_root; + sql_mode_t m_sql_mode; +private: + bool set_db(const LEX_CSTRING *db); + bool set_parameters(String *expanded_query, + uchar *packet, uchar *packet_end); + bool execute(String *expanded_query, bool open_cursor); + void deallocate_immediate(); + bool reprepare(); + bool validate_metadata(Prepared_statement *copy); + void swap_prepared_statement(Prepared_statement *copy); +}; + +/** + Execute one SQL statement in an isolated context. +*/ + +class Execute_sql_statement: public Server_runnable +{ +public: + Execute_sql_statement(LEX_STRING sql_text); + virtual bool execute_server_code(THD *thd); +private: + LEX_STRING m_sql_text; +}; + + +class Ed_connection; + + +/****************************************************************************** + Implementation +******************************************************************************/ + + +inline bool is_param_null(const uchar *pos, ulong param_no) +{ + return pos[param_no/8] & (1 << (param_no & 7)); +} + +/** + Find a prepared statement in the statement map by id. + + Try to find a prepared statement and set THD error if it's not found. + + @param thd thread handle + @param id statement id + @param where the place from which this function is called (for + error reporting). + + @return + 0 if the statement was not found, a pointer otherwise. +*/ + +static Prepared_statement * +find_prepared_statement(THD *thd, ulong id) +{ + /* + To strictly separate namespaces of SQL prepared statements and C API + prepared statements find() will return 0 if there is a named prepared + statement with such id. + + LAST_STMT_ID is special value which mean last prepared statement ID + (it was made for COM_MULTI to allow prepare and execute a statement + in the same command but usage is not limited by COM_MULTI only). + */ + Statement *stmt= ((id == LAST_STMT_ID) ? + thd->last_stmt : + thd->stmt_map.find(id)); + + if (stmt == 0 || stmt->type() != Query_arena::PREPARED_STATEMENT) + return NULL; + + return (Prepared_statement *) stmt; +} + + +/** + Send prepared statement id and metadata to the client after prepare. + + @todo + Fix this nasty upcast from List to List + + @return + 0 in case of success, 1 otherwise +*/ + +#ifndef EMBEDDED_LIBRARY +static bool send_prep_stmt(Prepared_statement *stmt, uint columns) +{ + NET *net= &stmt->thd->net; + uchar buff[12]; + uint tmp; + int error; + THD *thd= stmt->thd; + DBUG_ENTER("send_prep_stmt"); + DBUG_PRINT("enter",("stmt->id: %lu columns: %d param_count: %d", + stmt->id, columns, stmt->param_count)); + + buff[0]= 0; /* OK packet indicator */ + int4store(buff+1, stmt->id); + int2store(buff+5, columns); + int2store(buff+7, stmt->param_count); + buff[9]= 0; // Guard against a 4.1 client + tmp= MY_MIN(stmt->thd->get_stmt_da()->current_statement_warn_count(), 65535); + int2store(buff+10, tmp); + + /* + Send types and names of placeholders to the client + XXX: fix this nasty upcast from List to List + */ + error= my_net_write(net, buff, sizeof(buff)); + if (stmt->param_count && likely(!error)) + { + /* + Force the column info to be written + (in this case PS parameter type info). + */ + error= thd->protocol_text.send_result_set_metadata( + (List *)&stmt->lex->param_list, + Protocol::SEND_EOF | Protocol::SEND_FORCE_COLUMN_INFO); + } + + if (likely(!error)) + { + /* Flag that a response has already been sent */ + thd->get_stmt_da()->disable_status(); + } + + DBUG_RETURN(error); +} +#else +static bool send_prep_stmt(Prepared_statement *stmt, + uint columns __attribute__((unused))) +{ + THD *thd= stmt->thd; + + thd->client_stmt_id= stmt->id; + thd->client_param_count= stmt->param_count; + thd->clear_error(); + thd->get_stmt_da()->disable_status(); + + return 0; +} +#endif /*!EMBEDDED_LIBRARY*/ + + +#ifndef EMBEDDED_LIBRARY + +/** + Read the length of the parameter data and return it back to + the caller. + + Read data length, position the packet to the first byte after it, + and return the length to the caller. + + @param packet a pointer to the data + @param len remaining packet length + + @return + Length of data piece. +*/ + +static ulong get_param_length(uchar **packet, ulong len) +{ + uchar *pos= *packet; + if (len < 1) + return 0; + if (*pos < 251) + { + (*packet)++; + return (ulong) *pos; + } + if (len < 3) + return 0; + if (*pos == 252) + { + (*packet)+=3; + return (ulong) uint2korr(pos+1); + } + if (len < 4) + return 0; + if (*pos == 253) + { + (*packet)+=4; + return (ulong) uint3korr(pos+1); + } + if (len < 5) + return 0; + (*packet)+=9; // Must be 254 when here + /* + In our client-server protocol all numbers bigger than 2^24 + stored as 8 bytes with uint8korr. Here we always know that + parameter length is less than 2^4 so don't look at the second + 4 bytes. But still we need to obey the protocol hence 9 in the + assignment above. + */ + return (ulong) uint4korr(pos+1); +} +#else +#define get_param_length(packet, len) len +#endif /*!EMBEDDED_LIBRARY*/ + +/** + Data conversion routines. + + All these functions read the data from pos, convert it to requested + type and assign to param; pos is advanced to predefined length. + + Make a note that the NULL handling is examined at first execution + (i.e. when input types altered) and for all subsequent executions + we don't read any values for this. + + @param pos input data buffer + @param len length of data in the buffer +*/ + +void Item_param::set_param_tiny(uchar **pos, ulong len) +{ +#ifndef EMBEDDED_LIBRARY + if (len < 1) + return; +#endif + int8 value= (int8) **pos; + set_int(unsigned_flag ? (longlong) ((uint8) value) : + (longlong) value, 4); + *pos+= 1; +} + +void Item_param::set_param_short(uchar **pos, ulong len) +{ + int16 value; +#ifndef EMBEDDED_LIBRARY + if (len < 2) + return; + value= sint2korr(*pos); +#else + shortget(value, *pos); +#endif + set_int(unsigned_flag ? (longlong) ((uint16) value) : + (longlong) value, 6); + *pos+= 2; +} + +void Item_param::set_param_int32(uchar **pos, ulong len) +{ + int32 value; +#ifndef EMBEDDED_LIBRARY + if (len < 4) + return; + value= sint4korr(*pos); +#else + longget(value, *pos); +#endif + set_int(unsigned_flag ? (longlong) ((uint32) value) : + (longlong) value, 11); + *pos+= 4; +} + +void Item_param::set_param_int64(uchar **pos, ulong len) +{ + longlong value; +#ifndef EMBEDDED_LIBRARY + if (len < 8) + return; + value= (longlong) sint8korr(*pos); +#else + longlongget(value, *pos); +#endif + set_int(value, 21); + *pos+= 8; +} + +void Item_param::set_param_float(uchar **pos, ulong len) +{ + float data; +#ifndef EMBEDDED_LIBRARY + if (len < 4) + return; + float4get(data,*pos); +#else + floatget(data, *pos); +#endif + set_double((double) data); + *pos+= 4; +} + +void Item_param::set_param_double(uchar **pos, ulong len) +{ + double data; +#ifndef EMBEDDED_LIBRARY + if (len < 8) + return; + float8get(data,*pos); +#else + doubleget(data, *pos); +#endif + set_double((double) data); + *pos+= 8; +} + +void Item_param::set_param_decimal(uchar **pos, ulong len) +{ + ulong length= get_param_length(pos, len); + set_decimal((char*)*pos, length); + *pos+= length; +} + +#ifndef EMBEDDED_LIBRARY + +/* + Read date/time/datetime parameter values from network (binary + protocol). See writing counterparts of these functions in + libmysql.c (store_param_{time,date,datetime}). +*/ + +/** + @todo + Add warning 'Data truncated' here +*/ +void Item_param::set_param_time(uchar **pos, ulong len) +{ + MYSQL_TIME tm; + ulong length= get_param_length(pos, len); + + if (length >= 8) + { + uchar *to= *pos; + uint day; + + tm.neg= (bool) to[0]; + day= (uint) sint4korr(to+1); + tm.hour= (uint) to[5] + day * 24; + tm.minute= (uint) to[6]; + tm.second= (uint) to[7]; + tm.second_part= (length > 8) ? (ulong) sint4korr(to+8) : 0; + if (tm.hour > 838) + { + /* TODO: add warning 'Data truncated' here */ + tm.hour= 838; + tm.minute= 59; + tm.second= 59; + } + tm.day= tm.year= tm.month= 0; + } + else + set_zero_time(&tm, MYSQL_TIMESTAMP_TIME); + set_time(&tm, MYSQL_TIMESTAMP_TIME, MAX_TIME_FULL_WIDTH); + *pos+= length; +} + +void Item_param::set_param_datetime(uchar **pos, ulong len) +{ + MYSQL_TIME tm; + ulong length= get_param_length(pos, len); + + if (length >= 4) + { + uchar *to= *pos; + + tm.neg= 0; + tm.year= (uint) sint2korr(to); + tm.month= (uint) to[2]; + tm.day= (uint) to[3]; + if (length > 4) + { + tm.hour= (uint) to[4]; + tm.minute= (uint) to[5]; + tm.second= (uint) to[6]; + } + else + tm.hour= tm.minute= tm.second= 0; + + tm.second_part= (length > 7) ? (ulong) sint4korr(to+7) : 0; + } + else + set_zero_time(&tm, MYSQL_TIMESTAMP_DATETIME); + set_time(&tm, MYSQL_TIMESTAMP_DATETIME, MAX_DATETIME_WIDTH); + *pos+= length; +} + + +void Item_param::set_param_date(uchar **pos, ulong len) +{ + MYSQL_TIME tm; + ulong length= get_param_length(pos, len); + + if (length >= 4) + { + uchar *to= *pos; + + tm.year= (uint) sint2korr(to); + tm.month= (uint) to[2]; + tm.day= (uint) to[3]; + + tm.hour= tm.minute= tm.second= 0; + tm.second_part= 0; + tm.neg= 0; + } + else + set_zero_time(&tm, MYSQL_TIMESTAMP_DATE); + set_time(&tm, MYSQL_TIMESTAMP_DATE, MAX_DATE_WIDTH); + *pos+= length; +} + +#else/*!EMBEDDED_LIBRARY*/ +/** + @todo + Add warning 'Data truncated' here +*/ +void Item_param::set_param_time(uchar **pos, ulong len) +{ + MYSQL_TIME tm= *((MYSQL_TIME*)*pos); + tm.hour+= tm.day * 24; + tm.day= tm.year= tm.month= 0; + if (tm.hour > 838) + { + /* TODO: add warning 'Data truncated' here */ + tm.hour= 838; + tm.minute= 59; + tm.second= 59; + } + set_time(&tm, MYSQL_TIMESTAMP_TIME, MAX_TIME_WIDTH); +} + +void Item_param::set_param_datetime(uchar **pos, ulong len) +{ + MYSQL_TIME tm= *((MYSQL_TIME*)*pos); + tm.neg= 0; + set_time(&tm, MYSQL_TIMESTAMP_DATETIME, MAX_DATETIME_WIDTH); +} + +void Item_param::set_param_date(uchar **pos, ulong len) +{ + MYSQL_TIME *to= (MYSQL_TIME*)*pos; + set_time(to, MYSQL_TIMESTAMP_DATE, MAX_DATE_WIDTH); +} +#endif /*!EMBEDDED_LIBRARY*/ + + +void Item_param::set_param_str(uchar **pos, ulong len) +{ + ulong length= get_param_length(pos, len); + if (length == 0 && m_empty_string_is_null) + set_null(); + else + { + if (length > len) + length= len; + /* + We use &my_charset_bin here. Conversion and setting real character + sets will be done in Item_param::convert_str_value(), after the + original value is appended to the query used for logging. + */ + set_str((const char *) *pos, length, &my_charset_bin, &my_charset_bin); + *pos+= length; + } +} + + +#undef get_param_length + + +void Item_param::setup_conversion(THD *thd, uchar param_type) +{ + const Type_handler *h= + Type_handler::get_handler_by_field_type((enum_field_types) param_type); + /* + The client library ensures that we won't get any unexpected typecodes + in the bound parameter. Translating unknown typecodes to + &type_handler_string lets us to handle malformed packets as well. + */ + if (!h) + h= &type_handler_string; + else if (unsigned_flag) + h= h->type_handler_unsigned(); + set_handler(h); + h->Item_param_setup_conversion(thd, this); +} + + +void Item_param::setup_conversion_blob(THD *thd) +{ + value.cs_info.character_set_of_placeholder= &my_charset_bin; + value.cs_info.character_set_client= thd->variables.character_set_client; + DBUG_ASSERT(thd->variables.character_set_client); + value.cs_info.final_character_set_of_str_value= &my_charset_bin; + m_empty_string_is_null= thd->variables.sql_mode & MODE_EMPTY_STRING_IS_NULL; +} + + +void Item_param::setup_conversion_string(THD *thd, CHARSET_INFO *fromcs) +{ + value.cs_info.set(thd, fromcs); + m_empty_string_is_null= thd->variables.sql_mode & MODE_EMPTY_STRING_IS_NULL; + /* + Exact value of max_length is not known unless data is converted to + charset of connection, so we have to set it later. + */ +} + +#ifndef EMBEDDED_LIBRARY + +/** + Routines to assign parameters from data supplied by the client. + + Update the parameter markers by reading data from the packet and + and generate a valid query for logging. + + @note + This function, along with other _with_log functions is called when one of + binary, slow or general logs is open. Logging of prepared statements in + all cases is performed by means of conventional queries: if parameter + data was supplied from C API, each placeholder in the query is + replaced with its actual value; if we're logging a [Dynamic] SQL + prepared statement, parameter markers are replaced with variable names. + Example: + @verbatim + mysqld_stmt_prepare("UPDATE t1 SET a=a*1.25 WHERE a=?") + --> general logs gets [Prepare] UPDATE t1 SET a*1.25 WHERE a=?" + mysqld_stmt_execute(stmt); + --> general and binary logs get + [Execute] UPDATE t1 SET a*1.25 WHERE a=1" + @endverbatim + + If a statement has been prepared using SQL syntax: + @verbatim + PREPARE stmt FROM "UPDATE t1 SET a=a*1.25 WHERE a=?" + --> general log gets + [Query] PREPARE stmt FROM "UPDATE ..." + EXECUTE stmt USING @a + --> general log gets + [Query] EXECUTE stmt USING @a; + @endverbatim + + @retval + 0 if success + @retval + 1 otherwise +*/ + +static bool insert_params_with_log(Prepared_statement *stmt, uchar *null_array, + uchar *read_pos, uchar *data_end, + String *query) +{ + THD *thd= stmt->thd; + Item_param **begin= stmt->param_array; + Item_param **end= begin + stmt->param_count; + Copy_query_with_rewrite acc(thd, stmt->query(), stmt->query_length(), query); + DBUG_ENTER("insert_params_with_log"); + + for (Item_param **it= begin; it < end; ++it) + { + Item_param *param= *it; + if (!param->has_long_data_value()) + { + if (is_param_null(null_array, (uint) (it - begin))) + param->set_null(); + else + { + if (read_pos >= data_end) + DBUG_RETURN(1); + param->set_param_func(&read_pos, (uint) (data_end - read_pos)); + if (param->has_no_value()) + DBUG_RETURN(1); + + if (param->limit_clause_param && !param->has_int_value()) + { + if (param->set_limit_clause_param(param->val_int())) + DBUG_RETURN(1); + } + } + } + /* + A long data stream was supplied for this parameter marker. + This was done after prepare, prior to providing a placeholder + type (the types are supplied at execute). Check that the + supplied type of placeholder can accept a data stream. + */ + else if (!param->type_handler()->is_param_long_data_type()) + DBUG_RETURN(1); + + if (acc.append(param)) + DBUG_RETURN(1); + + if (param->convert_str_value(thd)) + DBUG_RETURN(1); /* out of memory */ + + param->sync_clones(); + } + if (acc.finalize()) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +static bool insert_params(Prepared_statement *stmt, uchar *null_array, + uchar *read_pos, uchar *data_end, + String *expanded_query) +{ + Item_param **begin= stmt->param_array; + Item_param **end= begin + stmt->param_count; + + DBUG_ENTER("insert_params"); + + for (Item_param **it= begin; it < end; ++it) + { + Item_param *param= *it; + param->indicator= STMT_INDICATOR_NONE; // only for bulk parameters + if (!param->has_long_data_value()) + { + if (is_param_null(null_array, (uint) (it - begin))) + param->set_null(); + else + { + if (read_pos >= data_end) + DBUG_RETURN(1); + param->set_param_func(&read_pos, (uint) (data_end - read_pos)); + if (param->has_no_value()) + DBUG_RETURN(1); + } + } + /* + A long data stream was supplied for this parameter marker. + This was done after prepare, prior to providing a placeholder + type (the types are supplied at execute). Check that the + supplied type of placeholder can accept a data stream. + */ + else if (!param->type_handler()->is_param_long_data_type()) + DBUG_RETURN(1); + if (param->convert_str_value(stmt->thd)) + DBUG_RETURN(1); /* out of memory */ + param->sync_clones(); + } + DBUG_RETURN(0); +} + + +static bool insert_bulk_params(Prepared_statement *stmt, + uchar **read_pos, uchar *data_end, + bool reset) +{ + Item_param **begin= stmt->param_array; + Item_param **end= begin + stmt->param_count; + + DBUG_ENTER("insert_params"); + + for (Item_param **it= begin; it < end; ++it) + { + Item_param *param= *it; + if (reset) + param->reset(); + if (!param->has_long_data_value()) + { + param->indicator= (enum_indicator_type) *((*read_pos)++); + if ((*read_pos) > data_end) + DBUG_RETURN(1); + switch (param->indicator) + { + case STMT_INDICATOR_NONE: + if ((*read_pos) >= data_end) + DBUG_RETURN(1); + param->set_param_func(read_pos, (uint) (data_end - (*read_pos))); + if (param->has_no_value()) + DBUG_RETURN(1); + if (param->convert_str_value(stmt->thd)) + DBUG_RETURN(1); /* out of memory */ + break; + case STMT_INDICATOR_NULL: + param->set_null(); + break; + case STMT_INDICATOR_DEFAULT: + param->set_default(); + break; + case STMT_INDICATOR_IGNORE: + param->set_ignore(); + break; + default: + DBUG_ASSERT(0); + DBUG_RETURN(1); + } + } + else + DBUG_RETURN(1); // long is not supported here + param->sync_clones(); + } + DBUG_RETURN(0); +} + + +/** + Checking if parameter type and flags are valid + + @param typecode ushort value with type in low byte, and flags in high byte + + @retval true this parameter is wrong + @retval false this parameter is OK +*/ + +static bool +parameter_type_sanity_check(ushort typecode) +{ + /* Checking if type in lower byte is valid */ + switch (typecode & 0xff) { + case MYSQL_TYPE_DECIMAL: + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + case MYSQL_TYPE_NULL: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_NEWDATE: + break; + /* + This types normally cannot be sent by client, so maybe it'd be + better to treat them like an error here. + */ + case MYSQL_TYPE_TIMESTAMP2: + case MYSQL_TYPE_TIME2: + case MYSQL_TYPE_DATETIME2: + default: + return true; + }; + + // In Flags in high byte only unsigned bit may be set + if (typecode & ((~PARAMETER_FLAG_UNSIGNED) & 0x0000ff00)) + { + return true; + } + return false; +} + +static bool +set_conversion_functions(Prepared_statement *stmt, uchar **data) +{ + uchar *read_pos= *data; + + DBUG_ENTER("set_conversion_functions"); + /* + First execute or types altered by the client, setup the + conversion routines for all parameters (one time) + */ + Item_param **it= stmt->param_array; + Item_param **end= it + stmt->param_count; + THD *thd= stmt->thd; + for (; it < end; ++it) + { + ushort typecode; + + /* + stmt_execute_packet_sanity_check has already verified, that there + are enough data in the packet for data types + */ + typecode= sint2korr(read_pos); + read_pos+= 2; + if (parameter_type_sanity_check(typecode)) + { + DBUG_RETURN(1); + } + (**it).unsigned_flag= MY_TEST(typecode & PARAMETER_FLAG_UNSIGNED); + (*it)->setup_conversion(thd, (uchar) (typecode & 0xff)); + (*it)->sync_clones(); + } + *data= read_pos; + DBUG_RETURN(0); +} + + +static bool setup_conversion_functions(Prepared_statement *stmt, + uchar **data, + bool bulk_protocol= 0) +{ + /* skip null bits */ + uchar *read_pos= *data; + if (!bulk_protocol) + read_pos+= (stmt->param_count+7) / 8; + + DBUG_ENTER("setup_conversion_functions"); + + if (*read_pos++) //types supplied / first execute + { + *data= read_pos; + bool res= set_conversion_functions(stmt, data); + DBUG_RETURN(res); + } + *data= read_pos; + DBUG_RETURN(0); +} + +#else + +//TODO: support bulk parameters + +/** + Embedded counterparts of parameter assignment routines. + + The main difference between the embedded library and the server is + that in embedded case we don't serialize/deserialize parameters data. + + Additionally, for unknown reason, the client-side flag raised for + changed types of placeholders is ignored and we simply setup conversion + functions at each execute (TODO: fix). +*/ + +static bool emb_insert_params(Prepared_statement *stmt, String *expanded_query) +{ + THD *thd= stmt->thd; + Item_param **it= stmt->param_array; + Item_param **end= it + stmt->param_count; + MYSQL_BIND *client_param= stmt->thd->client_params; + + DBUG_ENTER("emb_insert_params"); + + for (; it < end; ++it, ++client_param) + { + Item_param *param= *it; + param->setup_conversion(thd, client_param->buffer_type); + if (!param->has_long_data_value()) + { + if (*client_param->is_null) + param->set_null(); + else + { + uchar *buff= (uchar*) client_param->buffer; + param->unsigned_flag= client_param->is_unsigned; + param->set_param_func(&buff, + client_param->length ? + *client_param->length : + client_param->buffer_length); + if (param->has_no_value()) + DBUG_RETURN(1); + } + param->sync_clones(); + } + if (param->convert_str_value(thd)) + DBUG_RETURN(1); /* out of memory */ + } + DBUG_RETURN(0); +} + + +static bool emb_insert_params_with_log(Prepared_statement *stmt, String *query) +{ + THD *thd= stmt->thd; + Item_param **it= stmt->param_array; + Item_param **end= it + stmt->param_count; + MYSQL_BIND *client_param= thd->client_params; + Copy_query_with_rewrite acc(thd, stmt->query(), stmt->query_length(), query); + DBUG_ENTER("emb_insert_params_with_log"); + + for (; it < end; ++it, ++client_param) + { + Item_param *param= *it; + param->setup_conversion(thd, client_param->buffer_type); + if (!param->has_long_data_value()) + { + if (*client_param->is_null) + param->set_null(); + else + { + uchar *buff= (uchar*)client_param->buffer; + param->unsigned_flag= client_param->is_unsigned; + param->set_param_func(&buff, + client_param->length ? + *client_param->length : + client_param->buffer_length); + if (param->has_no_value()) + DBUG_RETURN(1); + } + } + if (acc.append(param)) + DBUG_RETURN(1); + + if (param->convert_str_value(thd)) + DBUG_RETURN(1); /* out of memory */ + param->sync_clones(); + } + if (acc.finalize()) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + +#endif /*!EMBEDDED_LIBRARY*/ + +/** + Setup data conversion routines using an array of parameter + markers from the original prepared statement. + Swap the parameter data of the original prepared + statement to the new one. + + Used only when we re-prepare a prepared statement. + There are two reasons for this function to exist: + + 1) In the binary client/server protocol, parameter metadata + is sent only at first execute. Consequently, if we need to + reprepare a prepared statement at a subsequent execution, + we may not have metadata information in the packet. + In that case we use the parameter array of the original + prepared statement to setup parameter types of the new + prepared statement. + + 2) In the binary client/server protocol, we may supply + long data in pieces. When the last piece is supplied, + we assemble the pieces and convert them from client + character set to the connection character set. After + that the parameter value is only available inside + the parameter, the original pieces are lost, and thus + we can only assign the corresponding parameter of the + reprepared statement from the original value. + + @param[out] param_array_dst parameter markers of the new statement + @param[in] param_array_src parameter markers of the original + statement + @param[in] param_count total number of parameters. Is the + same in src and dst arrays, since + the statement query is the same + + @return this function never fails +*/ + +static void +swap_parameter_array(Item_param **param_array_dst, + Item_param **param_array_src, + uint param_count) +{ + Item_param **dst= param_array_dst; + Item_param **src= param_array_src; + Item_param **end= param_array_dst + param_count; + + for (; dst < end; ++src, ++dst) + { + (*dst)->set_param_type_and_swap_value(*src); + (*dst)->sync_clones(); + (*src)->sync_clones(); + } +} + + +/** + Assign prepared statement parameters from user variables. + + @param stmt Statement + @param params A list of parameters. Caller must ensure that number + of parameters in the list is equal to number of statement + parameters + @param query Ignored +*/ + +static bool +insert_params_from_actual_params(Prepared_statement *stmt, + List ¶ms, + String *query __attribute__((unused))) +{ + Item_param **begin= stmt->param_array; + Item_param **end= begin + stmt->param_count; + List_iterator param_it(params); + DBUG_ENTER("insert_params_from_actual_params"); + + for (Item_param **it= begin; it < end; ++it) + { + Item_param *param= *it; + Item *ps_param= param_it++; + if (ps_param->save_in_param(stmt->thd, param) || + param->convert_str_value(stmt->thd)) + DBUG_RETURN(1); + param->sync_clones(); + } + DBUG_RETURN(0); +} + + +/** + Do the same as insert_params_from_actual_params + but also construct query text for binary log. + + @param stmt Prepared statement + @param params A list of parameters. Caller must ensure that number of + parameters in the list is equal to number of statement + parameters + @param query The query with parameter markers replaced with corresponding + user variables that were used to execute the query. +*/ + +static bool +insert_params_from_actual_params_with_log(Prepared_statement *stmt, + List ¶ms, + String *query) +{ + Item_param **begin= stmt->param_array; + Item_param **end= begin + stmt->param_count; + List_iterator param_it(params); + THD *thd= stmt->thd; + Copy_query_with_rewrite acc(thd, stmt->query(), stmt->query_length(), query); + + DBUG_ENTER("insert_params_from_actual_params_with_log"); + + for (Item_param **it= begin; it < end; ++it) + { + Item_param *param= *it; + Item *ps_param= param_it++; + if (ps_param->save_in_param(thd, param)) + DBUG_RETURN(1); + + if (acc.append(param)) + DBUG_RETURN(1); + + if (param->convert_str_value(thd)) + DBUG_RETURN(1); + + param->sync_clones(); + } + if (acc.finalize()) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + Validate INSERT statement. + + @param stmt prepared statement + @param table_list global/local table list + @param fields list of the table's fields to insert values + @param values_list values to be inserted into the table + @param update_fields the update fields. + @param update_values the update values. + @param duplic a way to handle duplicates + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_insert_common(Prepared_statement *stmt, + TABLE_LIST *table_list, + List &fields, + List &values_list, + List &update_fields, + List &update_values, + enum_duplicates duplic, + bool ignore) +{ + THD *thd= stmt->thd; + List_iterator_fast its(values_list); + List_item *values; + DBUG_ENTER("mysql_test_insert_common"); + + if (insert_precheck(thd, table_list)) + goto error; + + //upgrade_lock_type_for_insert(thd, &table_list->lock_type, duplic, + // values_list.elements > 1); + /* + open temporary memory pool for temporary data allocated by derived + tables & preparation procedure + Note that this is done without locks (should not be needed as we will not + access any data here) + If we would use locks, then we have to ensure we are not using + TL_WRITE_DELAYED as having two such locks can cause table corruption. + */ + if (open_normal_and_derived_tables(thd, table_list, + MYSQL_OPEN_FORCE_SHARED_MDL, DT_INIT)) + goto error; + + if ((values= its++)) + { + uint value_count; + Item *unused_conds= 0; + + if (table_list->table) + { + // don't allocate insert_values + table_list->table->insert_values=(uchar *)1; + } + + if (mysql_prepare_insert(thd, table_list, fields, values, update_fields, + update_values, duplic, ignore, + &unused_conds, FALSE)) + goto error; + + value_count= values->elements; + its.rewind(); + + if (table_list->lock_type == TL_WRITE_DELAYED && + !(table_list->table->file->ha_table_flags() & HA_CAN_INSERT_DELAYED)) + { + my_error(ER_DELAYED_NOT_SUPPORTED, MYF(0), (table_list->view ? + table_list->view_name.str : + table_list->table_name.str)); + goto error; + } + while ((values= its++)) + { + if (values->elements != value_count) + { + my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), + thd->get_stmt_da()->current_row_for_warning()); + goto error; + } + if (setup_fields(thd, Ref_ptr_array(), + *values, COLUMNS_READ, 0, NULL, 0)) + goto error; + thd->get_stmt_da()->inc_current_row_for_warning(); + } + thd->get_stmt_da()->reset_current_row_for_warning(1); + } + DBUG_RETURN(FALSE); + +error: + /* insert_values is cleared in open_table */ + DBUG_RETURN(TRUE); +} + + +/** + Open temporary tables if required and validate INSERT statement. + + @param stmt prepared statement + @param tables global/local table list + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_insert(Prepared_statement *stmt, + TABLE_LIST *table_list, + List &fields, + List &values_list, + List &update_fields, + List &update_values, + enum_duplicates duplic, bool ignore) +{ + THD *thd= stmt->thd; + + /* + Since INSERT DELAYED doesn't support temporary tables, we could + not pre-open temporary tables for SQLCOM_INSERT / SQLCOM_REPLACE. + Open them here instead. + */ + if (table_list->lock_type != TL_WRITE_DELAYED) + { + if (thd->open_temporary_tables(table_list)) + return true; + } + + return mysql_test_insert_common(stmt, table_list, fields, values_list, + update_fields, update_values, duplic, ignore); +} + + +/** + Validate UPDATE statement. + + @param stmt prepared statement + @param tables list of tables used in this query + + @todo + - here we should send types of placeholders to the client. + + @retval + 0 success + @retval + 1 error, error message is set in THD + @retval + 2 convert to multi_update +*/ + +static int mysql_test_update(Prepared_statement *stmt, + TABLE_LIST *table_list) +{ + int res; + THD *thd= stmt->thd; + uint table_count= 0; + TABLE_LIST *update_source_table; + SELECT_LEX *select= stmt->lex->first_select_lex(); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + privilege_t want_privilege(NO_ACL); +#endif + DBUG_ENTER("mysql_test_update"); + + if (update_precheck(thd, table_list) || + open_tables(thd, &table_list, &table_count, MYSQL_OPEN_FORCE_SHARED_MDL)) + goto error; + + if (mysql_handle_derived(thd->lex, DT_INIT)) + goto error; + + if (((update_source_table= unique_table(thd, table_list, + table_list->next_global, 0)) || + table_list->is_multitable())) + { + DBUG_ASSERT(update_source_table || table_list->view != 0); + DBUG_PRINT("info", ("Switch to multi-update")); + /* pass counter value */ + thd->lex->table_count_update= table_count; + /* convert to multiupdate */ + DBUG_RETURN(2); + } + + /* + thd->fill_derived_tables() is false here for sure (because it is + preparation of PS, so we even do not check it). + */ + if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + goto error; + if (table_list->handle_derived(thd->lex, DT_PREPARE)) + goto error; + + if (!table_list->single_table_updatable()) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "UPDATE"); + goto error; + } + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* Force privilege re-checking for views after they have been opened. */ + want_privilege= (table_list->view ? UPDATE_ACL : + table_list->grant.want_privilege); +#endif + + if (mysql_prepare_update(thd, table_list, &select->where, + select->order_list.elements, + select->order_list.first)) + goto error; + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + table_list->grant.want_privilege= want_privilege; + table_list->table->grant.want_privilege= want_privilege; + table_list->register_want_access(want_privilege); +#endif + thd->lex->first_select_lex()->no_wrap_view_item= TRUE; + res= setup_fields(thd, Ref_ptr_array(), + select->item_list, MARK_COLUMNS_READ, 0, NULL, 0); + thd->lex->first_select_lex()->no_wrap_view_item= FALSE; + if (res) + goto error; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* Check values */ + table_list->grant.want_privilege= + table_list->table->grant.want_privilege= + (SELECT_ACL & ~table_list->table->grant.privilege); + table_list->register_want_access(SELECT_ACL); +#endif + if (setup_fields(thd, Ref_ptr_array(), + stmt->lex->value_list, COLUMNS_READ, 0, NULL, 0) || + check_unique_table(thd, table_list)) + goto error; + /* TODO: here we should send types of placeholders to the client. */ + DBUG_RETURN(0); +error: + DBUG_RETURN(1); +} + + +/** + Validate DELETE statement. + + @param stmt prepared statement + @param tables list of tables used in this query + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_delete(Prepared_statement *stmt, + TABLE_LIST *table_list) +{ + uint table_count= 0; + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + bool delete_while_scanning; + DBUG_ENTER("mysql_test_delete"); + + if (delete_precheck(thd, table_list) || + open_tables(thd, &table_list, &table_count, MYSQL_OPEN_FORCE_SHARED_MDL)) + goto error; + + if (mysql_handle_derived(thd->lex, DT_INIT)) + goto error; + if (mysql_handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + goto error; + if (mysql_handle_derived(thd->lex, DT_PREPARE)) + goto error; + + if (!table_list->single_table_updatable()) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "DELETE"); + goto error; + } + if (!table_list->table || !table_list->table->is_created()) + { + my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), + table_list->view_db.str, table_list->view_name.str); + goto error; + } + + DBUG_RETURN(mysql_prepare_delete(thd, table_list, + &lex->first_select_lex()->where, + &delete_while_scanning)); +error: + DBUG_RETURN(TRUE); +} + + +/** + Validate SELECT statement. + + In case of success, if this query is not EXPLAIN, send column list info + back to the client. + + @param stmt prepared statement + @param tables list of tables used in the query + + @retval + 0 success + @retval + 1 error, error message is set in THD + @retval + 2 success, and statement metadata has been sent +*/ + +static int mysql_test_select(Prepared_statement *stmt, + TABLE_LIST *tables) +{ + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + SELECT_LEX_UNIT *unit= &lex->unit; + DBUG_ENTER("mysql_test_select"); + + lex->first_select_lex()->context.resolve_in_select_list= TRUE; + + privilege_t privilege(lex->exchange ? SELECT_ACL | FILE_ACL : SELECT_ACL); + if (tables) + { + if (check_table_access(thd, privilege, tables, FALSE, UINT_MAX, FALSE)) + goto error; + } + else if (check_access(thd, privilege, any_db.str, NULL, NULL, 0, 0)) + goto error; + + if (!lex->result && !(lex->result= new (stmt->mem_root) select_send(thd))) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), + static_cast(sizeof(select_send))); + goto error; + } + + if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + goto error; + + thd->lex->used_tables= 0; // Updated by setup_fields + + /* + JOIN::prepare calls + It is not SELECT COMMAND for sure, so setup_tables will be called as + usual, and we pass 0 as setup_tables_done_option + */ + if (unit->prepare(unit->derived, 0, 0)) + goto error; + if (!lex->describe && !thd->lex->analyze_stmt && !stmt->is_sql_prepare()) + { + /* Make copy of item list, as change_columns may change it */ + SELECT_LEX_UNIT* master_unit= unit->first_select()->master_unit(); + bool is_union_op= + master_unit->is_unit_op() || master_unit->fake_select_lex; + + List fields(is_union_op ? unit->item_list : + lex->first_select_lex()->item_list); + + /* Change columns if a procedure like analyse() */ + if (unit->last_procedure && unit->last_procedure->change_columns(thd, fields)) + goto error; + + /* + We can use lex->result as it should've been prepared in + unit->prepare call above. + */ + if (send_prep_stmt(stmt, lex->result->field_count(fields)) || + lex->result->send_result_set_metadata(fields, Protocol::SEND_EOF) || + thd->protocol->flush()) + goto error; + DBUG_RETURN(2); + } + DBUG_RETURN(0); +error: + DBUG_RETURN(1); +} + + +/** + Validate and prepare for execution DO statement expressions. + + @param stmt prepared statement + @param tables list of tables used in this query + @param values list of expressions + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_do_fields(Prepared_statement *stmt, + TABLE_LIST *tables, + List *values) +{ + THD *thd= stmt->thd; + + DBUG_ENTER("mysql_test_do_fields"); + if (tables && check_table_access(thd, SELECT_ACL, tables, FALSE, + UINT_MAX, FALSE)) + DBUG_RETURN(TRUE); + + if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + DBUG_RETURN(TRUE); + DBUG_RETURN(setup_fields(thd, Ref_ptr_array(), + *values, COLUMNS_READ, 0, NULL, 0)); +} + + +/** + Validate and prepare for execution SET statement expressions. + + @param stmt prepared statement + @param tables list of tables used in this query + @param values list of expressions + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_set_fields(Prepared_statement *stmt, + TABLE_LIST *tables, + List *var_list) +{ + DBUG_ENTER("mysql_test_set_fields"); + List_iterator_fast it(*var_list); + THD *thd= stmt->thd; + set_var_base *var; + + if ((tables && + check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)) || + open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + goto error; + + while ((var= it++)) + { + if (var->light_check(thd)) + goto error; + } + DBUG_RETURN(FALSE); +error: + DBUG_RETURN(TRUE); +} + + +/** + Validate and prepare for execution CALL statement expressions. + + @param stmt prepared statement + @param tables list of tables used in this query + @param value_list list of expressions + + @retval FALSE success + @retval TRUE error, error message is set in THD +*/ + +static bool mysql_test_call_fields(Prepared_statement *stmt, + TABLE_LIST *tables, + List *value_list) +{ + DBUG_ENTER("mysql_test_call_fields"); + + List_iterator it(*value_list); + THD *thd= stmt->thd; + Item *item; + + if ((tables && + check_table_access(thd, SELECT_ACL, tables, FALSE, UINT_MAX, FALSE)) || + open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + goto err; + + while ((item= it++)) + { + if (item->fix_fields_if_needed(thd, it.ref())) + goto err; + } + DBUG_RETURN(FALSE); +err: + DBUG_RETURN(TRUE); +} + + +/** + Check internal SELECT of the prepared command. + + @param stmt prepared statement + @param specific_prepare function of command specific prepare + @param setup_tables_done_option options to be passed to LEX::unit.prepare() + + @note + This function won't directly open tables used in select. They should + be opened either by calling function (and in this case you probably + should use select_like_stmt_test_with_open()) or by + "specific_prepare" call (like this happens in case of multi-update). + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool select_like_stmt_test(Prepared_statement *stmt, + int (*specific_prepare)(THD *thd), + ulonglong setup_tables_done_option) +{ + DBUG_ENTER("select_like_stmt_test"); + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + + lex->first_select_lex()->context.resolve_in_select_list= TRUE; + + if (specific_prepare && (*specific_prepare)(thd)) + DBUG_RETURN(TRUE); + + thd->lex->used_tables= 0; // Updated by setup_fields + + /* Calls JOIN::prepare */ + DBUG_RETURN(lex->unit.prepare(lex->unit.derived, 0, setup_tables_done_option)); +} + +/** + Check internal SELECT of the prepared command (with opening of used + tables). + + @param stmt prepared statement + @param tables list of tables to be opened + before calling specific_prepare function + @param specific_prepare function of command specific prepare + @param setup_tables_done_option options to be passed to LEX::unit.prepare() + + @retval + FALSE success + @retval + TRUE error +*/ + +static bool +select_like_stmt_test_with_open(Prepared_statement *stmt, + TABLE_LIST *tables, + int (*specific_prepare)(THD *thd), + ulonglong setup_tables_done_option) +{ + uint table_count= 0; + DBUG_ENTER("select_like_stmt_test_with_open"); + + /* + We should not call LEX::unit.cleanup() after this + open_normal_and_derived_tables() call because we don't allow + prepared EXPLAIN yet so derived tables will clean up after + themself. + */ + THD *thd= stmt->thd; + if (open_tables(thd, &tables, &table_count, MYSQL_OPEN_FORCE_SHARED_MDL)) + DBUG_RETURN(TRUE); + + DBUG_RETURN(select_like_stmt_test(stmt, specific_prepare, + setup_tables_done_option)); +} + + +/** + Validate and prepare for execution CREATE TABLE statement. + + @param stmt prepared statement + @param tables list of tables used in this query + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_create_table(Prepared_statement *stmt) +{ + DBUG_ENTER("mysql_test_create_table"); + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + bool res= FALSE; + bool link_to_local; + TABLE_LIST *create_table= lex->query_tables; + TABLE_LIST *tables= lex->create_last_non_select_table->next_global; + + if (create_table_precheck(thd, tables, create_table)) + DBUG_RETURN(TRUE); + + if (select_lex->item_list.elements) + { + /* Base table and temporary table are not in the same name space. */ + if (!lex->create_info.tmp_table()) + create_table->open_type= OT_BASE_ONLY; + + if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, + MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + DBUG_RETURN(TRUE); + + select_lex->context.resolve_in_select_list= TRUE; + + lex->unlink_first_table(&link_to_local); + + res= select_like_stmt_test(stmt, 0, 0); + + lex->link_first_table_back(create_table, link_to_local); + } + else + { + /* + Check that the source table exist, and also record + its metadata version. Even though not strictly necessary, + we validate metadata of all CREATE TABLE statements, + which keeps metadata validation code simple. + */ + if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, + MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(res); +} + + +static int send_stmt_metadata(THD *thd, Prepared_statement *stmt, List *fields) +{ + if (stmt->is_sql_prepare()) + return 0; + + if (send_prep_stmt(stmt, fields->elements) || + thd->protocol->send_result_set_metadata(fields, Protocol::SEND_EOF) || + thd->protocol->flush()) + return 1; + + return 2; +} + + +/** + Validate and prepare for execution SHOW CREATE TABLE statement. + + @param stmt prepared statement + @param tables list of tables used in this query + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_create_table(Prepared_statement *stmt, + TABLE_LIST *tables) +{ + DBUG_ENTER("mysql_test_show_create_table"); + THD *thd= stmt->thd; + List fields; + char buff[2048]; + String buffer(buff, sizeof(buff), system_charset_info); + + if (mysqld_show_create_get_fields(thd, tables, &fields, &buffer)) + DBUG_RETURN(1); + + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} + + +/** + Validate and prepare for execution SHOW CREATE DATABASE statement. + + @param stmt prepared statement + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_create_db(Prepared_statement *stmt) +{ + DBUG_ENTER("mysql_test_show_create_db"); + THD *thd= stmt->thd; + List fields; + + mysqld_show_create_db_get_fields(thd, &fields); + + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} + + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +/** + Validate and prepare for execution SHOW GRANTS statement. + + @param stmt prepared statement + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_grants(Prepared_statement *stmt) +{ + DBUG_ENTER("mysql_test_show_grants"); + THD *thd= stmt->thd; + List fields; + char buff[1024]; + const char *username= NULL, *hostname= NULL, *rolename= NULL, *end; + + if (get_show_user(thd, thd->lex->grant_user, &username, &hostname, &rolename)) + DBUG_RETURN(1); + + if (username) + end= strxmov(buff,"Grants for ",username,"@",hostname, NullS); + else if (rolename) + end= strxmov(buff,"Grants for ",rolename, NullS); + else + DBUG_RETURN(1); + + mysql_show_grants_get_fields(thd, &fields, buff, (uint)(end - buff)); + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} +#endif /*NO_EMBEDDED_ACCESS_CHECKS*/ + + +#ifndef EMBEDDED_LIBRARY +/** + Validate and prepare for execution SHOW SLAVE STATUS statement. + + @param stmt prepared statement + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_slave_status(Prepared_statement *stmt, + bool show_all_slaves_stat) +{ + DBUG_ENTER("mysql_test_show_slave_status"); + THD *thd= stmt->thd; + List fields; + + show_master_info_get_fields(thd, &fields, show_all_slaves_stat, 0); + + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} + + +/** + Validate and prepare for execution SHOW BINLOG STATUS statement. + + @param stmt prepared statement + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_binlog_status(Prepared_statement *stmt) +{ + DBUG_ENTER("mysql_test_show_binlog_status"); + THD *thd= stmt->thd; + List fields; + + show_binlog_info_get_fields(thd, &fields); + + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} + + +/** + Validate and prepare for execution SHOW BINLOGS statement. + + @param stmt prepared statement + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_binlogs(Prepared_statement *stmt) +{ + DBUG_ENTER("mysql_test_show_binlogs"); + THD *thd= stmt->thd; + List fields; + + show_binlogs_get_fields(thd, &fields); + + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} + +#endif /* EMBEDDED_LIBRARY */ + + +/** + Validate and prepare for execution SHOW CREATE PROC/FUNC statement. + + @param stmt prepared statement + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static int mysql_test_show_create_routine(Prepared_statement *stmt, + const Sp_handler *sph) +{ + DBUG_ENTER("mysql_test_show_binlogs"); + THD *thd= stmt->thd; + List fields; + + sp_head::show_create_routine_get_fields(thd, sph, &fields); + + DBUG_RETURN(send_stmt_metadata(thd, stmt, &fields)); +} + + +/** + @brief Validate and prepare for execution CREATE VIEW statement + + @param stmt prepared statement + + @note This function handles create view commands. + + @retval FALSE Operation was a success. + @retval TRUE An error occurred. +*/ + +static bool mysql_test_create_view(Prepared_statement *stmt) +{ + DBUG_ENTER("mysql_test_create_view"); + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + bool res= TRUE; + /* Skip first table, which is the view we are creating */ + bool link_to_local; + TABLE_LIST *view= lex->unlink_first_table(&link_to_local); + TABLE_LIST *tables= lex->query_tables; + + if (create_view_precheck(thd, tables, view, lex->create_view->mode)) + goto err; + + /* + Since we can't pre-open temporary tables for SQLCOM_CREATE_VIEW, + (see mysql_create_view) we have to do it here instead. + */ + if (thd->open_temporary_tables(tables)) + goto err; + + lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_VIEW; + if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, + DT_INIT | DT_PREPARE)) + goto err; + + res= select_like_stmt_test(stmt, 0, 0); + +err: + /* put view back for PS rexecuting */ + lex->link_first_table_back(view, link_to_local); + DBUG_RETURN(res); +} + + +/* + Validate and prepare for execution a multi update statement. + + @param stmt prepared statement + @param tables list of tables used in this query + @param converted converted to multi-update from usual update + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_multiupdate(Prepared_statement *stmt, + TABLE_LIST *tables, + bool converted) +{ + /* if we switched from normal update, rights are checked */ + if (!converted && multi_update_precheck(stmt->thd, tables)) + return TRUE; + + return select_like_stmt_test(stmt, &mysql_multi_update_prepare, + OPTION_SETUP_TABLES_DONE); +} + + +/** + Validate and prepare for execution a multi delete statement. + + @param stmt prepared statement + @param tables list of tables used in this query + + @retval + FALSE success + @retval + TRUE error, error message in THD is set. +*/ + +static bool mysql_test_multidelete(Prepared_statement *stmt, + TABLE_LIST *tables) +{ + THD *thd= stmt->thd; + + thd->lex->current_select= thd->lex->first_select_lex(); + if (add_item_to_list(thd, new (thd->mem_root) + Item_null(thd))) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), 0); + goto error; + } + + if (multi_delete_precheck(thd, tables) || + select_like_stmt_test_with_open(stmt, tables, + &mysql_multi_delete_prepare, + OPTION_SETUP_TABLES_DONE)) + goto error; + if (!tables->table) + { + my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), + tables->view_db.str, tables->view_name.str); + goto error; + } + return FALSE; +error: + return TRUE; +} + + +/** + Wrapper for mysql_insert_select_prepare, to make change of local tables + after open_normal_and_derived_tables() call. + + @param thd thread handle + + @note + We need to remove the first local table after + open_normal_and_derived_tables(), because mysql_handle_derived + uses local tables lists. +*/ + +static int mysql_insert_select_prepare_tester(THD *thd) +{ + SELECT_LEX *first_select= thd->lex->first_select_lex(); + TABLE_LIST *second_table= first_select->table_list.first->next_local; + + /* Skip first table, which is the table we are inserting in */ + first_select->table_list.first= second_table; + thd->lex->first_select_lex()->context.table_list= + thd->lex->first_select_lex()->context.first_name_resolution_table= + second_table; + + return mysql_insert_select_prepare(thd, NULL); +} + + +/** + Validate and prepare for execution INSERT ... SELECT statement. + + @param stmt prepared statement + @param tables list of tables used in this query + + @retval + FALSE success + @retval + TRUE error, error message is set in THD +*/ + +static bool mysql_test_insert_select(Prepared_statement *stmt, + TABLE_LIST *tables) +{ + int res; + LEX *lex= stmt->lex; + TABLE_LIST *first_local_table; + + if (tables->table) + { + // don't allocate insert_values + tables->table->insert_values=(uchar *)1; + } + + if (insert_precheck(stmt->thd, tables)) + return 1; + + /* store it, because mysql_insert_select_prepare_tester change it */ + first_local_table= lex->first_select_lex()->table_list.first; + DBUG_ASSERT(first_local_table != 0); + + res= + select_like_stmt_test_with_open(stmt, tables, + &mysql_insert_select_prepare_tester, + OPTION_SETUP_TABLES_DONE); + /* revert changes made by mysql_insert_select_prepare_tester */ + lex->first_select_lex()->table_list.first= first_local_table; + return res; +} + +/** + Validate SELECT statement. + + In case of success, if this query is not EXPLAIN, send column list info + back to the client. + + @param stmt prepared statement + @param tables list of tables used in the query + + @retval 0 success + @retval 1 error, error message is set in THD + @retval 2 success, and statement metadata has been sent +*/ + +static int mysql_test_handler_read(Prepared_statement *stmt, + TABLE_LIST *tables) +{ + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + SQL_HANDLER *ha_table; + DBUG_ENTER("mysql_test_handler_read"); + + lex->first_select_lex()->context.resolve_in_select_list= TRUE; + + /* + We don't have to test for permissions as this is already done during + HANDLER OPEN + */ + if (!(ha_table= mysql_ha_read_prepare(thd, tables, lex->ha_read_mode, + lex->ident.str, + lex->insert_list, + lex->ha_rkey_mode, + lex->first_select_lex()->where))) + DBUG_RETURN(1); + + if (!stmt->is_sql_prepare()) + { + if (!lex->result && !(lex->result= new (stmt->mem_root) select_send(thd))) + DBUG_RETURN(1); + + if (send_prep_stmt(stmt, ha_table->fields.elements) || + lex->result->send_result_set_metadata(ha_table->fields, Protocol::SEND_EOF) || + thd->protocol->flush()) + DBUG_RETURN(1); + DBUG_RETURN(2); + } + DBUG_RETURN(0); +} + + +/** + Send metadata to a client on PREPARE phase of XA RECOVER statement + processing + + @param stmt prepared statement + + @return 0 on success, 1 on failure, 2 in case metadata was already sent +*/ + +static int mysql_test_xa_recover(Prepared_statement *stmt) +{ + THD *thd= stmt->thd; + List field_list; + + xa_recover_get_fields(thd, &field_list, nullptr); + return send_stmt_metadata(thd, stmt, &field_list); +} + + +/** + Send metadata to a client on PREPARE phase of HELP statement processing + + @param stmt prepared statement + + @return 0 on success, 1 on failure, 2 in case metadata was already sent +*/ + +static int mysql_test_help(Prepared_statement *stmt) +{ + THD *thd= stmt->thd; + List fields; + + if (mysqld_help_prepare(thd, stmt->lex->help_arg, &fields)) + return 1; + + return send_stmt_metadata(thd, stmt, &fields); +} + + +/** + Send metadata to a client on PREPARE phase of admin related statements + processing + + @param stmt prepared statement + + @return 0 on success, 1 on failure, 2 in case metadata was already sent +*/ + +static int mysql_test_admin_table(Prepared_statement *stmt) +{ + THD *thd= stmt->thd; + List fields; + + fill_check_table_metadata_fields(thd, &fields); + return send_stmt_metadata(thd, stmt, &fields); +} + + +/** + Send metadata to a client on PREPARE phase of CHECKSUM TABLE statement + processing + + @param stmt prepared statement + + @return 0 on success, 1 on failure, 2 in case metadata was already sent +*/ + +static int mysql_test_checksum_table(Prepared_statement *stmt) +{ + THD *thd= stmt->thd; + List fields; + + fill_checksum_table_metadata_fields(thd, &fields); + return send_stmt_metadata(thd, stmt, &fields); +} + + +/** + Perform semantic analysis of the parsed tree and send a response packet + to the client. + + This function + - opens all tables and checks access rights + - validates semantics of statement columns and SQL functions + by calling fix_fields. + + @param stmt prepared statement + + @retval + FALSE success, statement metadata is sent to client + @retval + TRUE error, error message is set in THD (but not sent) +*/ + +static bool check_prepared_statement(Prepared_statement *stmt) +{ + THD *thd= stmt->thd; + LEX *lex= stmt->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + TABLE_LIST *tables; + enum enum_sql_command sql_command= lex->sql_command; + int res= 0; + DBUG_ENTER("check_prepared_statement"); + DBUG_PRINT("enter",("command: %d param_count: %u", + sql_command, stmt->param_count)); + + lex->first_lists_tables_same(); + lex->fix_first_select_number(); + tables= lex->query_tables; + + /* set context for commands which do not use setup_tables */ + lex->first_select_lex()->context.resolve_in_table_list_only(select_lex-> + get_table_list()); + + /* + For the optimizer trace, this is the symmetric, for statement preparation, + of what is done at statement execution (in mysql_execute_command()). + */ + Opt_trace_start ots(thd); + ots.init(thd, tables, lex->sql_command, &lex->var_list, thd->query(), + thd->query_length(), thd->variables.character_set_client); + + Json_writer_object trace_command(thd); + Json_writer_array trace_command_steps(thd, "steps"); + + /* Reset warning count for each query that uses tables */ + if (tables) + thd->get_stmt_da()->opt_clear_warning_info(thd->query_id); + + if (sql_command_flags[sql_command] & CF_HA_CLOSE) + mysql_ha_rm_tables(thd, tables); + + /* + Open temporary tables that are known now. Temporary tables added by + prelocking will be opened afterwards (during open_tables()). + */ + if (sql_command_flags[sql_command] & CF_PREOPEN_TMP_TABLES) + { + if (thd->open_temporary_tables(tables)) + goto error; + } + +#ifdef WITH_WSREP + if (wsrep_sync_wait(thd, sql_command)) + goto error; +#endif + switch (sql_command) { + case SQLCOM_REPLACE: + case SQLCOM_INSERT: + res= mysql_test_insert(stmt, tables, lex->field_list, + lex->many_values, + lex->update_list, lex->value_list, + lex->duplicates, lex->ignore); + break; + + case SQLCOM_LOAD: + res= mysql_test_insert_common(stmt, tables, lex->field_list, + lex->many_values, + lex->update_list, lex->value_list, + lex->duplicates, lex->ignore); + break; + + case SQLCOM_UPDATE: + res= mysql_test_update(stmt, tables); + /* mysql_test_update returns 2 if we need to switch to multi-update */ + if (res != 2) + break; + /* fall through */ + case SQLCOM_UPDATE_MULTI: + res= mysql_test_multiupdate(stmt, tables, res == 2); + break; + + case SQLCOM_DELETE: + res= mysql_test_delete(stmt, tables); + break; + /* The following allow WHERE clause, so they must be tested like SELECT */ + case SQLCOM_SHOW_DATABASES: + case SQLCOM_SHOW_TABLES: + case SQLCOM_SHOW_TRIGGERS: + case SQLCOM_SHOW_EVENTS: + case SQLCOM_SHOW_OPEN_TABLES: + case SQLCOM_SHOW_FIELDS: + case SQLCOM_SHOW_KEYS: + case SQLCOM_SHOW_COLLATIONS: + case SQLCOM_SHOW_CHARSETS: + case SQLCOM_SHOW_VARIABLES: + case SQLCOM_SHOW_STATUS: + case SQLCOM_SHOW_TABLE_STATUS: + case SQLCOM_SHOW_STATUS_PROC: + case SQLCOM_SHOW_STATUS_FUNC: + case SQLCOM_SHOW_STATUS_PACKAGE: + case SQLCOM_SHOW_STATUS_PACKAGE_BODY: + case SQLCOM_SELECT: + res= mysql_test_select(stmt, tables); + if (res == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_CREATE_TABLE: + case SQLCOM_CREATE_SEQUENCE: + res= mysql_test_create_table(stmt); + break; + case SQLCOM_SHOW_CREATE: + if ((res= mysql_test_show_create_table(stmt, tables)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_SHOW_CREATE_DB: + if ((res= mysql_test_show_create_db(stmt)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + case SQLCOM_SHOW_GRANTS: + if ((res= mysql_test_show_grants(stmt)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ +#ifndef EMBEDDED_LIBRARY + case SQLCOM_SHOW_SLAVE_STAT: + { + DBUG_ASSERT(thd->lex->m_sql_cmd); + Sql_cmd_show_slave_status *cmd; + cmd= dynamic_cast(thd->lex->m_sql_cmd); + DBUG_ASSERT(cmd); + if ((res= mysql_test_show_slave_status(stmt, + cmd->is_show_all_slaves_stat())) + == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + } + case SQLCOM_SHOW_BINLOG_STAT: + if ((res= mysql_test_show_binlog_status(stmt)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_SHOW_BINLOGS: + if ((res= mysql_test_show_binlogs(stmt)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_SHOW_BINLOG_EVENTS: + case SQLCOM_SHOW_RELAYLOG_EVENTS: + { + List field_list; + Log_event::init_show_field_list(thd, &field_list); + + if ((res= send_stmt_metadata(thd, stmt, &field_list)) == 2) + DBUG_RETURN(FALSE); + } + break; +#endif /* EMBEDDED_LIBRARY */ + case SQLCOM_SHOW_CREATE_PROC: + if ((res= mysql_test_show_create_routine(stmt, &sp_handler_procedure)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_SHOW_CREATE_FUNC: + if ((res= mysql_test_show_create_routine(stmt, &sp_handler_function)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_SHOW_CREATE_PACKAGE: + if ((res= mysql_test_show_create_routine(stmt, &sp_handler_package_spec)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_SHOW_CREATE_PACKAGE_BODY: + if ((res= mysql_test_show_create_routine(stmt, + &sp_handler_package_body)) == 2) + { + /* Statement and field info has already been sent */ + DBUG_RETURN(FALSE); + } + break; + case SQLCOM_CREATE_VIEW: + res= mysql_test_create_view(stmt); + break; + case SQLCOM_DO: + res= mysql_test_do_fields(stmt, tables, lex->insert_list); + break; + + case SQLCOM_CALL: + res= mysql_test_call_fields(stmt, tables, &lex->value_list); + break; + case SQLCOM_SET_OPTION: + res= mysql_test_set_fields(stmt, tables, &lex->var_list); + break; + + case SQLCOM_DELETE_MULTI: + res= mysql_test_multidelete(stmt, tables); + break; + + case SQLCOM_INSERT_SELECT: + case SQLCOM_REPLACE_SELECT: + res= mysql_test_insert_select(stmt, tables); + break; + + case SQLCOM_HA_READ: + res= mysql_test_handler_read(stmt, tables); + /* Statement and field info has already been sent */ + DBUG_RETURN(res == 1 ? TRUE : FALSE); + + case SQLCOM_XA_RECOVER: + res= mysql_test_xa_recover(stmt); + if (res == 2) + /* Statement and field info has already been sent */ + DBUG_RETURN(false); + break; + + case SQLCOM_HELP: + res= mysql_test_help(stmt); + if (res == 2) + /* Statement and field info has already been sent */ + DBUG_RETURN(false); + break; + + case SQLCOM_ANALYZE: + case SQLCOM_ASSIGN_TO_KEYCACHE: + case SQLCOM_CHECK: + case SQLCOM_OPTIMIZE: + case SQLCOM_PRELOAD_KEYS: + case SQLCOM_REPAIR: + res= mysql_test_admin_table(stmt); + if (res == 2) + /* Statement and field info has already been sent */ + DBUG_RETURN(false); + break; + + case SQLCOM_CHECKSUM: + res= mysql_test_checksum_table(stmt); + if (res == 2) + /* Statement and field info has already been sent */ + DBUG_RETURN(false); + break; + + case SQLCOM_PREPARE: + case SQLCOM_EXECUTE: + case SQLCOM_EXECUTE_IMMEDIATE: + case SQLCOM_DEALLOCATE_PREPARE: + my_message(ER_UNSUPPORTED_PS, ER_THD(thd, ER_UNSUPPORTED_PS), MYF(0)); + goto error; + + default: + break; + } + if (res == 0) + { + if (!stmt->is_sql_prepare()) + { + if (lex->describe || lex->analyze_stmt) + { + select_send result(thd); + List field_list; + res= thd->prepare_explain_fields(&result, &field_list, + lex->describe, lex->analyze_stmt) || + send_prep_stmt(stmt, result.field_count(field_list)) || + result.send_result_set_metadata(field_list, + Protocol::SEND_EOF); + } + else + res= send_prep_stmt(stmt, 0); + if (!res) + thd->protocol->flush(); + } + DBUG_RETURN(FALSE); + } +error: + DBUG_RETURN(TRUE); +} + +/** + Initialize array of parameters in statement from LEX. + (We need to have quick access to items by number in mysql_stmt_get_longdata). + This is to avoid using malloc/realloc in the parser. +*/ + +static bool init_param_array(Prepared_statement *stmt) +{ + LEX *lex= stmt->lex; + if ((stmt->param_count= lex->param_list.elements)) + { + if (stmt->param_count > (uint) UINT_MAX16) + { + /* Error code to be defined in 5.0 */ + my_message(ER_PS_MANY_PARAM, ER_THD(stmt->thd, ER_PS_MANY_PARAM), + MYF(0)); + return TRUE; + } + Item_param **to; + List_iterator param_iterator(lex->param_list); + /* Use thd->mem_root as it points at statement mem_root */ + stmt->param_array= (Item_param **) + alloc_root(stmt->thd->mem_root, + sizeof(Item_param*) * stmt->param_count); + if (!stmt->param_array) + return TRUE; + for (to= stmt->param_array; + to < stmt->param_array + stmt->param_count; + ++to) + { + *to= param_iterator++; + } + } + return FALSE; +} + + +/** + COM_STMT_PREPARE handler. + + Given a query string with parameter markers, create a prepared + statement from it and send PS info back to the client. + + If parameter markers are found in the query, then store the information + using Item_param along with maintaining a list in lex->param_array, so + that a fast and direct retrieval can be made without going through all + field items. + + @param packet query to be prepared + @param packet_length query string length, including ignored + trailing NULL or quote char. + + @note + This function parses the query and sends the total number of parameters + and resultset metadata information back to client (if any), without + executing the query i.e. without any log/disk writes. This allows the + queries to be re-executed without re-parsing during execute. + + @return + none: in case of success a new statement id and metadata is sent + to the client, otherwise an error message is set in THD. +*/ + +void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length) +{ + Protocol *save_protocol= thd->protocol; + Prepared_statement *stmt; + DBUG_ENTER("mysqld_stmt_prepare"); + DBUG_PRINT("prep_query", ("%s", packet)); + + /* First of all clear possible warnings from the previous command */ + thd->reset_for_next_command(); + + if (! (stmt= new Prepared_statement(thd))) + goto end; /* out of memory: error is set in Sql_alloc */ + + if (thd->stmt_map.insert(thd, stmt)) + { + /* + The error is set in the insert. The statement itself + will be also deleted there (this is how the hash works). + */ + goto end; + } + + thd->protocol= &thd->protocol_binary; + + /* Create PS table entry, set query text after rewrite. */ + stmt->m_prepared_stmt= MYSQL_CREATE_PS(stmt, stmt->id, + thd->m_statement_psi, + stmt->name.str, stmt->name.length); + + if (stmt->prepare(packet, packet_length)) + { + /* + Prepare failed and stmt will be freed. + Now we have to save the query_string in the so the + audit plugin later gets the meaningful notification. + */ + if (alloc_query(thd, stmt->query_string.str(), stmt->query_string.length())) + { + thd->set_query(0, 0); + } + /* Statement map deletes statement on erase */ + thd->stmt_map.erase(stmt); + thd->clear_last_stmt(); + } + else + thd->set_last_stmt(stmt); + + thd->protocol= save_protocol; + + sp_cache_enforce_limit(thd->sp_proc_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_func_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_package_spec_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_package_body_cache, stored_program_cache_size); + + /* check_prepared_statemnt sends the metadata packet in case of success */ +end: + DBUG_VOID_RETURN; +} + +/** + Get an SQL statement from an item in m_code. + + This function can return pointers to very different memory classes: + - a static string "NULL", if the item returned NULL + - the result of prepare_stmt_code->val_str(), if no conversion was needed + - a thd->mem_root allocated string with the result of + prepare_stmt_code->val_str() converted to @@collation_connection, + if conversion was needed + + The caller must dispose the result before the life cycle of "buffer" ends. + As soon as buffer's destructor is called, the value is not valid any more! + + mysql_sql_stmt_prepare() and mysql_sql_stmt_execute_immediate() + call get_dynamic_sql_string() and then call respectively + Prepare_statement::prepare() and Prepare_statment::execute_immediate(), + who store the returned result into its permanent location using + alloc_query(). "buffer" is still not destructed at that time. + + @param[out] dst the result is stored here + @param[inout] buffer + + @retval false on success + @retval true on error (out of memory) +*/ + +bool Lex_prepared_stmt::get_dynamic_sql_string(THD *thd, + LEX_CSTRING *dst, + String *buffer) +{ + if (m_code->fix_fields_if_needed_for_scalar(thd, NULL)) + return true; + + const String *str= m_code->val_str(buffer); + if (m_code->null_value) + { + /* + Prepare source was NULL, so we need to set "str" to + something reasonable to get a readable error message during parsing + */ + dst->str= "NULL"; + dst->length= 4; + return false; + } + + /* + Character set conversion notes: + + 1) When PREPARE or EXECUTE IMMEDIATE are used with string literals: + PREPARE stmt FROM 'SELECT ''str'''; + EXECUTE IMMEDIATE 'SELECT ''str'''; + it's very unlikely that any conversion will happen below, because + @@character_set_client and @@collation_connection are normally + set to the same CHARSET_INFO pointer. + + In tricky environments when @@collation_connection is set to something + different from @@character_set_client, double conversion may happen: + - When the parser scans the string literal + (sql_yacc.yy rules "prepare_src" -> "expr" -> ... -> "text_literal") + it will convert 'str' from @@character_set_client to + @@collation_connection. + - Then in the code below will convert 'str' from @@collation_connection + back to @@character_set_client. + + 2) When PREPARE or EXECUTE IMMEDIATE is used with a user variable, + it should work about the same way, because user variables are usually + assigned like this: + SET @str='str'; + and thus have the same character set with string literals. + + 3) When PREPARE or EXECUTE IMMEDIATE is used with some + more complex expression, conversion will depend on this expression. + For example, a concatenation of string literals: + EXECUTE IMMEDIATE 'SELECT * FROM'||'t1'; + should work the same way with just a single literal, + so no conversion normally. + */ + CHARSET_INFO *to_cs= thd->variables.character_set_client; + + uint32 unused; + if (String::needs_conversion(str->length(), str->charset(), to_cs, &unused)) + { + if (!(dst->str= sql_strmake_with_convert(thd, str->ptr(), str->length(), + str->charset(), UINT_MAX32, + to_cs, &dst->length))) + { + dst->length= 0; + return true; + } + DBUG_ASSERT(dst->length <= UINT_MAX32); + return false; + } + dst->str= str->ptr(); + dst->length= str->length(); + return false; +} + + +/** + SQLCOM_PREPARE implementation. + + Prepare an SQL prepared statement. This is called from + mysql_execute_command and should therefore behave like an + ordinary query (e.g. should not reset any global THD data). + + @param thd thread handle + + @return + none: in case of success, OK packet is sent to the client, + otherwise an error message is set in THD +*/ + +void mysql_sql_stmt_prepare(THD *thd) +{ + LEX *lex= thd->lex; + CSET_STRING orig_query= thd->query_string; + const LEX_CSTRING *name= &lex->prepared_stmt.name(); + Prepared_statement *stmt; + LEX_CSTRING query; + DBUG_ENTER("mysql_sql_stmt_prepare"); + + if ((stmt= (Prepared_statement*) thd->stmt_map.find_by_name(name))) + { + /* + If there is a statement with the same name, remove it. It is ok to + remove old and fail to insert a new one at the same time. + */ + if (stmt->is_in_use()) + { + my_error(ER_PS_NO_RECURSION, MYF(0)); + DBUG_VOID_RETURN; + } + + stmt->deallocate(); + } + + /* + It's important for "buffer" not to be destructed before stmt->prepare()! + See comments in get_dynamic_sql_string(). + */ + StringBuffer<256> buffer; + if (lex->prepared_stmt.get_dynamic_sql_string(thd, &query, &buffer) || + ! (stmt= new Prepared_statement(thd))) + { + DBUG_VOID_RETURN; /* out of memory */ + } + + stmt->set_sql_prepare(); + + /* Set the name first, insert should know that this statement has a name */ + if (stmt->set_name(name)) + { + delete stmt; + DBUG_VOID_RETURN; + } + + if (thd->stmt_map.insert(thd, stmt)) + { + /* The statement is deleted and an error is set if insert fails */ + DBUG_VOID_RETURN; + } + + /* + Make sure we call Prepared_statement::prepare() with an empty + THD::change_list. It can be non-empty as LEX::get_dynamic_sql_string() + calls fix_fields() for the Item containing the PS source, + e.g. on character set conversion: + + SET NAMES utf8; + DELIMITER $$ + CREATE PROCEDURE p1() + BEGIN + PREPARE stmt FROM CONCAT('SELECT ',CONVERT(RAND() USING latin1)); + EXECUTE stmt; + END; + $$ + DELIMITER ; + CALL p1(); + */ + Item_change_list_savepoint change_list_savepoint(thd); + + /* Create PS table entry, set query text after rewrite. */ + stmt->m_prepared_stmt= MYSQL_CREATE_PS(stmt, stmt->id, + thd->m_statement_psi, + stmt->name.str, stmt->name.length); + + bool res= stmt->prepare(query.str, (uint) query.length); + /* + stmt->prepare() sets thd->query_string with the prepared + query, so the audit plugin gets adequate notification with the + mysqld_stmt_* set of functions. + But here we should restore the original query so it's mentioned in + logs properly. + */ + thd->set_query(orig_query); + if (res) + { + /* Statement map deletes the statement on erase */ + thd->stmt_map.erase(stmt); + } + else + { + thd->session_tracker.state_change.mark_as_changed(thd); + my_ok(thd, 0L, 0L, "Statement prepared"); + } + change_list_savepoint.rollback(thd); + + DBUG_VOID_RETURN; +} + + +void mysql_sql_stmt_execute_immediate(THD *thd) +{ + LEX *lex= thd->lex; + CSET_STRING orig_query= thd->query_string; + Prepared_statement *stmt; + LEX_CSTRING query; + DBUG_ENTER("mysql_sql_stmt_execute_immediate"); + + if (lex->prepared_stmt.params_fix_fields(thd)) + DBUG_VOID_RETURN; + + /* + Prepared_statement is quite large, + let's allocate it on the heap rather than on the stack. + + It's important for "buffer" not to be destructed + before stmt->execute_immediate(). + See comments in get_dynamic_sql_string(). + */ + StringBuffer<256> buffer; + if (lex->prepared_stmt.get_dynamic_sql_string(thd, &query, &buffer) || + !(stmt= new Prepared_statement(thd))) + DBUG_VOID_RETURN; // out of memory + + // See comments on thd->free_list in mysql_sql_stmt_execute() + Item *free_list_backup= thd->free_list; + thd->free_list= NULL; + /* + Make sure we call Prepared_statement::execute_immediate() + with an empty THD::change_list. It can be non empty as the above + LEX::prepared_stmt_params_fix_fields() and LEX::get_dynamic_str_string() + call fix_fields() for the PS source and PS parameter Items and + can do Item tree changes, e.g. on character set conversion: + + - Example #1: Item tree changes in get_dynamic_str_string() + SET NAMES utf8; + CREATE PROCEDURE p1() + EXECUTE IMMEDIATE CONCAT('SELECT ',CONVERT(RAND() USING latin1)); + CALL p1(); + + - Example #2: Item tree changes in prepared_stmt_param_fix_fields(): + SET NAMES utf8; + CREATE PROCEDURE p1(a VARCHAR(10) CHARACTER SET utf8) + EXECUTE IMMEDIATE 'SELECT ?' USING CONCAT(a, CONVERT(RAND() USING latin1)); + CALL p1('x'); + */ + Item_change_list_savepoint change_list_savepoint(thd); + (void) stmt->execute_immediate(query.str, (uint) query.length); + change_list_savepoint.rollback(thd); + thd->free_items(); + thd->free_list= free_list_backup; + + /* + stmt->execute_immediately() sets thd->query_string with the executed + query, so the audit plugin gets adequate notification with the + mysqld_stmt_* set of functions. + But here we should restore the original query so it's mentioned in + logs properly. + */ + thd->set_query_inner(orig_query); + stmt->lex->restore_set_statement_var(); + delete stmt; + DBUG_VOID_RETURN; +} + + +/** + Reinit prepared statement/stored procedure before execution. + + @todo + When the new table structure is ready, then have a status bit + to indicate the table is altered, and re-do the setup_* + and open the tables back. +*/ + +void reinit_stmt_before_use(THD *thd, LEX *lex) +{ + SELECT_LEX *sl= lex->all_selects_list; + DBUG_ENTER("reinit_stmt_before_use"); + Window_spec *win_spec; + + /* + We have to update "thd" pointer in LEX, all its units and in LEX::result, + since statements which belong to trigger body are associated with TABLE + object and because of this can be used in different threads. + */ + lex->thd= thd; + DBUG_ASSERT(!lex->explain); + + if (lex->empty_field_list_on_rset) + { + lex->empty_field_list_on_rset= 0; + lex->field_list.empty(); + } + for (; sl; sl= sl->next_select_in_list()) + { + if (sl->changed_elements & TOUCHED_SEL_COND) + { + /* remove option which was put by mysql_explain_union() */ + sl->options&= ~SELECT_DESCRIBE; + + /* see unique_table() */ + sl->exclude_from_table_unique_test= FALSE; + + /* + Copy WHERE, HAVING clause pointers to avoid damaging them + by optimisation + */ + if (sl->prep_where) + { + /* + We need this rollback because memory allocated in + copy_andor_structure() will be freed + */ + thd->change_item_tree((Item**)&sl->where, + sl->prep_where->copy_andor_structure(thd)); + sl->where->cleanup(); + } + else + sl->where= NULL; + if (sl->prep_having) + { + /* + We need this rollback because memory allocated in + copy_andor_structure() will be freed + */ + thd->change_item_tree((Item**)&sl->having, + sl->prep_having->copy_andor_structure(thd)); + sl->having->cleanup(); + } + else + sl->having= NULL; + DBUG_ASSERT(sl->join == 0); + ORDER *order; + /* Fix GROUP list */ + if (sl->group_list_ptrs && sl->group_list_ptrs->size() > 0) + { + for (uint ix= 0; ix < sl->group_list_ptrs->size() - 1; ++ix) + { + order= sl->group_list_ptrs->at(ix); + order->next= sl->group_list_ptrs->at(ix+1); + } + } + } + { // no harm to do it (item_ptr set on parsing) + ORDER *order; + for (order= sl->group_list.first; order; order= order->next) + { + order->item= &order->item_ptr; + } + /* Fix ORDER list */ + for (order= sl->order_list.first; order; order= order->next) + order->item= &order->item_ptr; + /* Fix window functions too */ + List_iterator it(sl->window_specs); + + while ((win_spec= it++)) + { + for (order= win_spec->partition_list->first; order; order= order->next) + order->item= &order->item_ptr; + for (order= win_spec->order_list->first; order; order= order->next) + order->item= &order->item_ptr; + } + + // Reinit Pushdown + sl->cond_pushed_into_where= NULL; + sl->cond_pushed_into_having= NULL; + } + if (sl->changed_elements & TOUCHED_SEL_DERIVED) + { +#ifdef DBUG_ASSERT_EXISTS + bool res= +#endif + sl->handle_derived(lex, DT_REINIT); + DBUG_ASSERT(res == 0); + } + + { + SELECT_LEX_UNIT *unit= sl->master_unit(); + unit->unclean(); + unit->types.empty(); + /* for derived tables & PS (which can't be reset by Item_subselect) */ + unit->reinit_exec_mechanism(); + unit->set_thd(thd); + } + } + + /* + TODO: When the new table structure is ready, then have a status bit + to indicate the table is altered, and re-do the setup_* + and open the tables back. + */ + /* + NOTE: We should reset whole table list here including all tables added + by prelocking algorithm (it is not a problem for substatements since + they have their own table list). + */ + for (TABLE_LIST *tables= lex->query_tables; + tables; + tables= tables->next_global) + { + tables->reinit_before_use(thd); + } + + /* Reset MDL tickets for procedures/functions */ + for (Sroutine_hash_entry *rt= + (Sroutine_hash_entry*)thd->lex->sroutines_list.first; + rt; rt= rt->next) + rt->mdl_request.ticket= NULL; + + /* + Cleanup of the special case of DELETE t1, t2 FROM t1, t2, t3 ... + (multi-delete). We do a full clean up, although at the moment all we + need to clean in the tables of MULTI-DELETE list is 'table' member. + */ + for (TABLE_LIST *tables= lex->auxiliary_table_list.first; + tables; + tables= tables->next_global) + { + tables->reinit_before_use(thd); + } + lex->current_select= lex->first_select_lex(); + + + if (lex->result) + { + lex->result->cleanup(); + lex->result->set_thd(thd); + } + lex->allow_sum_func.clear_all(); + lex->in_sum_func= NULL; + DBUG_VOID_RETURN; +} + + +/** + Clears parameters from data left from previous execution or long data. + + @param stmt prepared statement for which parameters should + be reset +*/ + +static void reset_stmt_params(Prepared_statement *stmt) +{ + Item_param **item= stmt->param_array; + Item_param **end= item + stmt->param_count; + for (;item < end ; ++item) + { + (**item).reset(); + (**item).sync_clones(); + } +} + + +static void mysql_stmt_execute_common(THD *thd, + ulong stmt_id, + uchar *packet, + uchar *packet_end, + ulong cursor_flags, + bool iteration, + bool types); + +/** + COM_STMT_EXECUTE handler: execute a previously prepared statement. + + If there are any parameters, then replace parameter markers with the + data supplied from the client, and then execute the statement. + This function uses binary protocol to send a possible result set + to the client. + + @param thd current thread + @param packet_arg parameter types and data, if any + @param packet_length packet length, including the terminator character. + + @return + none: in case of success OK packet or a result set is sent to the + client, otherwise an error message is set in THD. +*/ + +void mysqld_stmt_execute(THD *thd, char *packet_arg, uint packet_length) +{ + const uint packet_min_lenght= 9; + uchar *packet= (uchar*)packet_arg; // GCC 4.0.1 workaround + + DBUG_ENTER("mysqld_stmt_execute"); + + if (packet_length < packet_min_lenght) + { + my_error(ER_MALFORMED_PACKET, MYF(0)); + DBUG_VOID_RETURN; + } + ulong stmt_id= uint4korr(packet); + ulong flags= (ulong) packet[4]; + uchar *packet_end= packet + packet_length; + + packet+= 9; /* stmt_id + 5 bytes of flags */ + + mysql_stmt_execute_common(thd, stmt_id, packet, packet_end, flags, FALSE, + FALSE); + DBUG_VOID_RETURN; +} + + +/** + COM_STMT_BULK_EXECUTE handler: execute a previously prepared statement. + + If there are any parameters, then replace parameter markers with the + data supplied from the client, and then execute the statement. + This function uses binary protocol to send a possible result set + to the client. + + @param thd current thread + @param packet_arg parameter types and data, if any + @param packet_length packet length, including the terminator character. + + @return + none: in case of success OK packet or a result set is sent to the + client, otherwise an error message is set in THD. +*/ + +void mysqld_stmt_bulk_execute(THD *thd, char *packet_arg, uint packet_length) +{ + uchar *packet= (uchar*)packet_arg; // GCC 4.0.1 workaround + DBUG_ENTER("mysqld_stmt_execute_bulk"); + + const uint packet_header_lenght= 4 + 2; //ID & 2 bytes of flags + + if (packet_length < packet_header_lenght) + { + my_error(ER_MALFORMED_PACKET, MYF(0)); + DBUG_VOID_RETURN; + } + + ulong stmt_id= uint4korr(packet); + uint flags= (uint) uint2korr(packet + 4); + uchar *packet_end= packet + packet_length; + + if (!(thd->client_capabilities & + MARIADB_CLIENT_STMT_BULK_OPERATIONS)) + { + DBUG_PRINT("error", + ("An attempt to execute bulk operation without support")); + my_error(ER_UNSUPPORTED_PS, MYF(0)); + DBUG_VOID_RETURN; + } + /* Check for implemented parameters */ + if (flags & (~STMT_BULK_FLAG_CLIENT_SEND_TYPES)) + { + DBUG_PRINT("error", ("unsupported bulk execute flags %x", flags)); + my_error(ER_UNSUPPORTED_PS, MYF(0)); + DBUG_VOID_RETURN; + } + + /* stmt id and two bytes of flags */ + packet+= packet_header_lenght; + mysql_stmt_execute_common(thd, stmt_id, packet, packet_end, 0, TRUE, + (flags & STMT_BULK_FLAG_CLIENT_SEND_TYPES)); + DBUG_VOID_RETURN; +} + +/** + Additional packet checks for direct execution + + @param thd THD handle + @param stmt prepared statement being directly executed + @param paket packet with parameters to bind + @param packet_end pointer to the byte after parameters end + @param bulk_op is it bulk operation + @param direct_exec is it direct execution + @param read_bytes need to read types (only with bulk_op) + + @retval true this parameter is wrong + @retval false this parameter is OK +*/ + +static bool +stmt_execute_packet_sanity_check(Prepared_statement *stmt, + uchar *packet, uchar *packet_end, + bool bulk_op, bool direct_exec, + bool read_types) +{ + + DBUG_ASSERT((!read_types) || (read_types && bulk_op)); + if (stmt->param_count > 0) + { + uint packet_length= static_cast(packet_end - packet); + uint null_bitmap_bytes= (bulk_op ? 0 : (stmt->param_count + 7)/8); + uint min_len_for_param_count = null_bitmap_bytes + + (bulk_op ? 0 : 1); /* sent types byte */ + + if (!bulk_op && packet_length >= min_len_for_param_count) + { + if ((read_types= packet[null_bitmap_bytes])) + { + /* + Should be 0 or 1. If the byte is not 1, that could mean, + e.g. that we read incorrect byte due to incorrect number + of sent parameters for direct execution (i.e. null bitmap + is shorter or longer, than it should be) + */ + if (packet[null_bitmap_bytes] != '\1') + { + return true; + } + } + } + + if (read_types) + { + /* 2 bytes per parameter of the type and flags */ + min_len_for_param_count+= 2*stmt->param_count; + } + else + { + /* + If types are not sent, there is nothing to do here. + But for direct execution types should always be sent + */ + return direct_exec; + } + + /* + If true, the packet is guaranteed too short for the number of + parameters in the PS + */ + return (packet_length < min_len_for_param_count); + } + else + { + /* + If there is no parameters, this should be normally already end + of the packet, but it is not a problem if something left (popular + mistake in protocol implementation) because we will not read anymore + from the buffer. + */ + return false; + } + return false; +} + + +/** + Common part of prepared statement execution + + @param thd THD handle + @param stmt_id id of the prepared statement + @param paket packet with parameters to bind + @param packet_end pointer to the byte after parameters end + @param cursor_flags cursor flags + @param bulk_op id it bulk operation + @param read_types flag say that types muast been read +*/ + +static void mysql_stmt_execute_common(THD *thd, + ulong stmt_id, + uchar *packet, + uchar *packet_end, + ulong cursor_flags, + bool bulk_op, + bool read_types) +{ + /* Query text for binary, general or slow log, if any of them is open */ + String expanded_query; + Prepared_statement *stmt; + Protocol *save_protocol= thd->protocol; + bool open_cursor; + DBUG_ENTER("mysqld_stmt_execute_common"); + DBUG_ASSERT((!read_types) || (read_types && bulk_op)); + + /* First of all clear possible warnings from the previous command */ + thd->reset_for_next_command(); + + if (!(stmt= find_prepared_statement(thd, stmt_id))) + { + char llbuf[22]; + size_t length; + /* + Did not find the statement with the provided stmt_id. + Set thd->query_string with the stmt_id so the + audit plugin gets the meaningful notification. + */ + length= (size_t) (longlong10_to_str(stmt_id, llbuf, 10) - llbuf); + if (alloc_query(thd, llbuf, length + 1)) + thd->set_query(0, 0); + my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), (int) length, llbuf, + "mysqld_stmt_execute"); + DBUG_VOID_RETURN; + } + + /* + In case of direct execution application decides how many parameters + to send. + + Thus extra checks are required to prevent crashes caused by incorrect + interpretation of the packet data. Plus there can be always a broken + evil client. + */ + if (stmt_execute_packet_sanity_check(stmt, packet, packet_end, bulk_op, + stmt_id == LAST_STMT_ID, read_types)) + { + my_error(ER_MALFORMED_PACKET, MYF(0)); + /* + Let's set the thd->query_string so the audit plugin + can report the executed query that failed. + */ + thd->set_query_inner(stmt->query_string); + DBUG_VOID_RETURN; + } + + stmt->read_types= read_types; + +#if defined(ENABLED_PROFILING) + thd->profiling.set_query_source(stmt->query(), stmt->query_length()); +#endif + DBUG_PRINT("exec_query", ("%s", stmt->query())); + DBUG_PRINT("info",("stmt: %p bulk_op %d", stmt, bulk_op)); + + open_cursor= MY_TEST(cursor_flags & (ulong) CURSOR_TYPE_READ_ONLY); + + thd->protocol= &thd->protocol_binary; + MYSQL_EXECUTE_PS(thd->m_statement_psi, stmt->m_prepared_stmt); + + auto save_cur_stmt= thd->cur_stmt; + thd->cur_stmt= stmt; + + if (!bulk_op) + stmt->execute_loop(&expanded_query, open_cursor, packet, packet_end); + else + stmt->execute_bulk_loop(&expanded_query, open_cursor, packet, packet_end); + + thd->cur_stmt= save_cur_stmt; + thd->protocol= save_protocol; + + sp_cache_enforce_limit(thd->sp_proc_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_func_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_package_spec_cache, stored_program_cache_size); + sp_cache_enforce_limit(thd->sp_package_body_cache, stored_program_cache_size); + + /* Close connection socket; for use with client testing (Bug#43560). */ + DBUG_EXECUTE_IF("close_conn_after_stmt_execute", vio_shutdown(thd->net.vio,SHUT_RD);); + + DBUG_VOID_RETURN; +} + + +/** + SQLCOM_EXECUTE implementation. + + Execute prepared statement using parameter values from + lex->prepared_stmt.params() and send result to the client using + text protocol. This is called from mysql_execute_command and + therefore should behave like an ordinary query (e.g. not change + global THD data, such as warning count, server status, etc). + This function uses text protocol to send a possible result set. + + @param thd thread handle + + @return + none: in case of success, OK (or result set) packet is sent to the + client, otherwise an error is set in THD +*/ + +void mysql_sql_stmt_execute(THD *thd) +{ + LEX *lex= thd->lex; + Prepared_statement *stmt; + const LEX_CSTRING *name= &lex->prepared_stmt.name(); + /* Query text for binary, general or slow log, if any of them is open */ + String expanded_query; + DBUG_ENTER("mysql_sql_stmt_execute"); + DBUG_PRINT("info", ("EXECUTE: %.*s", (int) name->length, name->str)); + + if (!(stmt= (Prepared_statement*) thd->stmt_map.find_by_name(name))) + { + my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), + static_cast(name->length), name->str, "EXECUTE"); + DBUG_VOID_RETURN; + } + + if (stmt->param_count != lex->prepared_stmt.param_count()) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "EXECUTE"); + DBUG_VOID_RETURN; + } + + DBUG_PRINT("info",("stmt: %p", stmt)); + + if (lex->prepared_stmt.params_fix_fields(thd)) + DBUG_VOID_RETURN; + + /* + thd->free_list can already have some Items. + + Example queries: + - SET STATEMENT var=expr FOR EXECUTE stmt; + - EXECUTE stmt USING expr; + + E.g. for a query like this: + PREPARE stmt FROM 'INSERT INTO t1 VALUES (@@max_sort_length)'; + SET STATEMENT max_sort_length=2048 FOR EXECUTE stmt; + thd->free_list contains a pointer to Item_int corresponding to 2048. + + If Prepared_statement::execute() notices that the table metadata for "t1" + has changed since PREPARE, it returns an error asking the calling + Prepared_statement::execute_loop() to re-prepare the statement. + Before returning the error, Prepared_statement::execute() + calls Prepared_statement::cleanup_stmt(), + which calls thd->cleanup_after_query(), + which calls Query_arena::free_items(). + + We hide "external" Items, e.g. those created while parsing the + "SET STATEMENT" or "USING" parts of the query, + so they don't get freed in case of re-prepare. + See MDEV-10702 Crash in SET STATEMENT FOR EXECUTE + */ + Item *free_list_backup= thd->free_list; + thd->free_list= NULL; // Hide the external (e.g. "SET STATEMENT") Items + /* + Make sure we call Prepared_statement::execute_loop() with an empty + THD::change_list. It can be non-empty because the above + LEX::prepared_stmt_params_fix_fields() calls fix_fields() for + the PS parameter Items and can do some Item tree changes, + e.g. on character set conversion: + + SET NAMES utf8; + DELIMITER $$ + CREATE PROCEDURE p1(a VARCHAR(10) CHARACTER SET utf8) + BEGIN + PREPARE stmt FROM 'SELECT ?'; + EXECUTE stmt USING CONCAT(a, CONVERT(RAND() USING latin1)); + END; + $$ + DELIMITER ; + CALL p1('x'); + */ + Item_change_list_savepoint change_list_savepoint(thd); + MYSQL_EXECUTE_PS(thd->m_statement_psi, stmt->m_prepared_stmt); + + (void) stmt->execute_loop(&expanded_query, FALSE, NULL, NULL); + change_list_savepoint.rollback(thd); + thd->free_items(); // Free items created by execute_loop() + /* + Now restore the "external" (e.g. "SET STATEMENT") Item list. + It will be freed normaly in THD::cleanup_after_query(). + */ + thd->free_list= free_list_backup; + + stmt->lex->restore_set_statement_var(); + DBUG_VOID_RETURN; +} + + +/** + COM_STMT_FETCH handler: fetches requested amount of rows from cursor. + + @param thd Thread handle + @param packet Packet from client (with stmt_id & num_rows) + @param packet_length Length of packet +*/ + +void mysqld_stmt_fetch(THD *thd, char *packet, uint packet_length) +{ + /* assume there is always place for 8-16 bytes */ + ulong stmt_id= uint4korr(packet); + ulong num_rows= uint4korr(packet+4); + Prepared_statement *stmt; + Statement stmt_backup; + Server_side_cursor *cursor; + DBUG_ENTER("mysqld_stmt_fetch"); + + /* First of all clear possible warnings from the previous command */ + thd->reset_for_next_command(); + + status_var_increment(thd->status_var.com_stmt_fetch); + if (!(stmt= find_prepared_statement(thd, stmt_id))) + { + char llbuf[22]; + my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), static_cast(sizeof(llbuf)), + llstr(stmt_id, llbuf), "mysqld_stmt_fetch"); + DBUG_VOID_RETURN; + } + + cursor= stmt->cursor; + if (!cursor) + { + my_error(ER_STMT_HAS_NO_OPEN_CURSOR, MYF(0), stmt_id); + DBUG_VOID_RETURN; + } + + thd->stmt_arena= stmt; + thd->set_n_backup_statement(stmt, &stmt_backup); + + cursor->fetch(num_rows); + + if (!cursor->is_open()) + { + stmt->close_cursor(); + reset_stmt_params(stmt); + } + + thd->restore_backup_statement(stmt, &stmt_backup); + thd->stmt_arena= thd; + + DBUG_VOID_RETURN; +} + + +/** + Reset a prepared statement in case there was a recoverable error. + + This function resets statement to the state it was right after prepare. + It can be used to: + - clear an error happened during mysqld_stmt_send_long_data + - cancel long data stream for all placeholders without + having to call mysqld_stmt_execute. + - close an open cursor + Sends 'OK' packet in case of success (statement was reset) + or 'ERROR' packet (unrecoverable error/statement not found/etc). + + @param thd Thread handle + @param packet Packet with stmt id +*/ + +void mysqld_stmt_reset(THD *thd, char *packet) +{ + /* There is always space for 4 bytes in buffer */ + ulong stmt_id= uint4korr(packet); + Prepared_statement *stmt; + DBUG_ENTER("mysqld_stmt_reset"); + + /* First of all clear possible warnings from the previous command */ + thd->reset_for_next_command(); + + status_var_increment(thd->status_var.com_stmt_reset); + if (!(stmt= find_prepared_statement(thd, stmt_id))) + { + char llbuf[22]; + my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), static_cast(sizeof(llbuf)), + llstr(stmt_id, llbuf), "mysqld_stmt_reset"); + DBUG_VOID_RETURN; + } + + stmt->close_cursor(); + + /* + Clear parameters from data which could be set by + mysqld_stmt_send_long_data() call. + */ + reset_stmt_params(stmt); + + stmt->state= Query_arena::STMT_PREPARED; + + general_log_print(thd, thd->get_command(), NullS); + + my_ok(thd); + + DBUG_VOID_RETURN; +} + + +/** + Delete a prepared statement from memory. + + @note + we don't send any reply to this command. +*/ + +void mysqld_stmt_close(THD *thd, char *packet) +{ + /* There is always space for 4 bytes in packet buffer */ + ulong stmt_id= uint4korr(packet); + Prepared_statement *stmt; + DBUG_ENTER("mysqld_stmt_close"); + + thd->get_stmt_da()->disable_status(); + + if (!(stmt= find_prepared_statement(thd, stmt_id))) + DBUG_VOID_RETURN; + + /* + The only way currently a statement can be deallocated when it's + in use is from within Dynamic SQL. + */ + DBUG_ASSERT(! stmt->is_in_use()); + stmt->deallocate(); + general_log_print(thd, thd->get_command(), NullS); + + if (thd->last_stmt == stmt) + thd->clear_last_stmt(); + + DBUG_VOID_RETURN; +} + + +/** + SQLCOM_DEALLOCATE implementation. + + Close an SQL prepared statement. As this can be called from Dynamic + SQL, we should be careful to not close a statement that is currently + being executed. + + @return + none: OK packet is sent in case of success, otherwise an error + message is set in THD +*/ + +void mysql_sql_stmt_close(THD *thd) +{ + Prepared_statement* stmt; + const LEX_CSTRING *name= &thd->lex->prepared_stmt.name(); + DBUG_PRINT("info", ("DEALLOCATE PREPARE: %.*s", (int) name->length, + name->str)); + + if (! (stmt= (Prepared_statement*) thd->stmt_map.find_by_name(name))) + my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), + static_cast(name->length), name->str, "DEALLOCATE PREPARE"); + else if (stmt->is_in_use()) + my_error(ER_PS_NO_RECURSION, MYF(0)); + else + { + stmt->deallocate(); + thd->session_tracker.state_change.mark_as_changed(thd); + my_ok(thd); + } +} + + +/** + Handle long data in pieces from client. + + Get a part of a long data. To make the protocol efficient, we are + not sending any return packets here. If something goes wrong, then + we will send the error on 'execute' We assume that the client takes + care of checking that all parts are sent to the server. (No checking + that we get a 'end of column' in the server is performed). + + @param thd Thread handle + @param packet String to append + @param packet_length Length of string (including end \\0) +*/ + +void mysql_stmt_get_longdata(THD *thd, char *packet, ulong packet_length) +{ + ulong stmt_id; + uint param_number; + Prepared_statement *stmt; + Item_param *param; +#ifndef EMBEDDED_LIBRARY + char *packet_end= packet + packet_length; +#endif + DBUG_ENTER("mysql_stmt_get_longdata"); + + status_var_increment(thd->status_var.com_stmt_send_long_data); + + thd->get_stmt_da()->disable_status(); +#ifndef EMBEDDED_LIBRARY + /* Minimal size of long data packet is 6 bytes */ + if (packet_length < MYSQL_LONG_DATA_HEADER) + DBUG_VOID_RETURN; +#endif + + stmt_id= uint4korr(packet); + packet+= 4; + + if (!(stmt=find_prepared_statement(thd, stmt_id))) + DBUG_VOID_RETURN; + + param_number= uint2korr(packet); + packet+= 2; +#ifndef EMBEDDED_LIBRARY + if (param_number >= stmt->param_count) + { + /* Error will be sent in execute call */ + stmt->state= Query_arena::STMT_ERROR; + stmt->last_errno= ER_WRONG_ARGUMENTS; + sprintf(stmt->last_error, ER_THD(thd, ER_WRONG_ARGUMENTS), + "mysqld_stmt_send_long_data"); + DBUG_VOID_RETURN; + } +#endif + + param= stmt->param_array[param_number]; + + Diagnostics_area new_stmt_da(thd->query_id, false, true); + Diagnostics_area *save_stmt_da= thd->get_stmt_da(); + + thd->set_stmt_da(&new_stmt_da); + +#ifndef EMBEDDED_LIBRARY + param->set_longdata(packet, (ulong) (packet_end - packet)); +#else + param->set_longdata(thd->extra_data, thd->extra_length); +#endif + if (unlikely(thd->get_stmt_da()->is_error())) + { + stmt->state= Query_arena::STMT_ERROR; + stmt->last_errno= thd->get_stmt_da()->sql_errno(); + strmake_buf(stmt->last_error, thd->get_stmt_da()->message()); + } + thd->set_stmt_da(save_stmt_da); + + general_log_print(thd, thd->get_command(), NullS); + + DBUG_VOID_RETURN; +} + + +/*************************************************************************** + Select_fetch_protocol_binary +****************************************************************************/ + +Select_fetch_protocol_binary::Select_fetch_protocol_binary(THD *thd_arg): + select_send(thd_arg), protocol(thd_arg) +{} + +bool Select_fetch_protocol_binary::send_result_set_metadata(List &list, uint flags) +{ + bool rc; + Protocol *save_protocol= thd->protocol; + + /* + Protocol::send_result_set_metadata caches the information about column types: + this information is later used to send data. Therefore, the same + dedicated Protocol object must be used for all operations with + a cursor. + */ + thd->protocol= &protocol; + rc= select_send::send_result_set_metadata(list, flags); + thd->protocol= save_protocol; + + return rc; +} + +bool Select_fetch_protocol_binary::send_eof() +{ + /* + Don't send EOF if we're in error condition (which implies we've already + sent or are sending an error) + */ + if (unlikely(thd->is_error())) + return true; + + ::my_eof(thd); + return false; +} + + +int +Select_fetch_protocol_binary::send_data(List &fields) +{ + Protocol *save_protocol= thd->protocol; + int rc; + + thd->protocol= &protocol; + rc= select_send::send_data(fields); + thd->protocol= save_protocol; + return rc; +} + +/******************************************************************* +* Reprepare_observer +*******************************************************************/ +/** Push an error to the error stack and return TRUE for now. */ + +bool +Reprepare_observer::report_error(THD *thd) +{ + /* + This 'error' is purely internal to the server: + - No exception handler is invoked, + - No condition is added in the condition area (warn_list). + The diagnostics area is set to an error status to enforce + that this thread execution stops and returns to the caller, + backtracking all the way to Prepared_statement::execute_loop(). + */ + thd->get_stmt_da()->set_error_status(ER_NEED_REPREPARE); + m_invalidated= TRUE; + + return TRUE; +} + + +/******************************************************************* +* Server_runnable +*******************************************************************/ + +Server_runnable::~Server_runnable() = default; + +/////////////////////////////////////////////////////////////////////////// + +Execute_sql_statement:: +Execute_sql_statement(LEX_STRING sql_text) + :m_sql_text(sql_text) +{} + + +/** + Parse and execute a statement. Does not prepare the query. + + Allows to execute a statement from within another statement. + The main property of the implementation is that it does not + affect the environment -- i.e. you can run many + executions without having to cleanup/reset THD in between. +*/ + +static bool execute_server_code(THD *thd, + const char *sql_text, size_t sql_len) +{ + PSI_statement_locker *parent_locker; + bool error; + query_id_t save_query_id= thd->query_id; + query_id_t next_id= next_query_id(); + + if (alloc_query(thd, sql_text, sql_len)) + return TRUE; + + Parser_state parser_state; + if (parser_state.init(thd, thd->query(), thd->query_length())) + return TRUE; + + thd->query_id= next_id; + parser_state.m_lip.multi_statements= FALSE; + lex_start(thd); + + error= parse_sql(thd, &parser_state, NULL) || thd->is_error(); + + if (unlikely(error)) + goto end; + + thd->lex->set_trg_event_type_for_tables(); + + parent_locker= thd->m_statement_psi; + thd->m_statement_psi= NULL; + error= mysql_execute_command(thd); + thd->m_statement_psi= parent_locker; + + /* report error issued during command execution */ + if (likely(error == 0) && thd->spcont == NULL) + general_log_write(thd, COM_QUERY, + thd->query(), thd->query_length()); + +end: + thd->lex->restore_set_statement_var(); + thd->query_id= save_query_id; + delete_explain_query(thd->lex); + lex_end(thd->lex); + + return error; +} + +bool Execute_sql_statement::execute_server_code(THD *thd) +{ + return ::execute_server_code(thd, m_sql_text.str, m_sql_text.length); +} + +/*************************************************************************** + Prepared_statement +****************************************************************************/ + +Prepared_statement::Prepared_statement(THD *thd_arg) + :Statement(NULL, &main_mem_root, + STMT_INITIALIZED, + ((++thd_arg->statement_id_counter) & STMT_ID_MASK)), + thd(thd_arg), + m_prepared_stmt(NULL), + result(thd_arg), + param_array(0), + cursor(0), + packet(0), + packet_end(0), +#ifdef PROTECT_STATEMENT_MEMROOT + executed_counter(0), +#endif + param_count(0), + last_errno(0), + flags((uint) IS_IN_USE), + iterations(0), + start_param(0), + read_types(0), + m_sql_mode(thd->variables.sql_mode) +{ + init_sql_alloc(key_memory_prepared_statement_main_mem_root, + &main_mem_root, thd_arg->variables.query_alloc_block_size, + thd_arg->variables.query_prealloc_size, MYF(MY_THREAD_SPECIFIC)); + *last_error= '\0'; +} + + +void Prepared_statement::setup_set_params() +{ + /* + Note: BUG#25843 applies here too (query cache lookup uses thd->db, not + db from "prepare" time). + */ + if (query_cache_maybe_disabled(thd)) // we won't expand the query + lex->safe_to_cache_query= FALSE; // so don't cache it at Execution + + /* + Decide if we have to expand the query (because we must write it to logs or + because we want to look it up in the query cache) or not. + */ + bool replace_params_with_values= false; + // binlog + replace_params_with_values|= mysql_bin_log.is_open() && is_update_query(lex->sql_command); + // general or slow log + replace_params_with_values|= opt_log || thd->variables.sql_log_slow; + // query cache + replace_params_with_values|= query_cache_is_cacheable_query(lex); + // but never for compound statements + replace_params_with_values&= lex->sql_command != SQLCOM_COMPOUND; + + if (replace_params_with_values) + { + set_params_from_actual_params= insert_params_from_actual_params_with_log; +#ifndef EMBEDDED_LIBRARY + set_params= insert_params_with_log; + set_bulk_params= insert_bulk_params; // RBR is on for bulk operation +#else + //TODO: add bulk support for bulk parameters + set_params_data= emb_insert_params_with_log; +#endif + } + else + { + set_params_from_actual_params= insert_params_from_actual_params; +#ifndef EMBEDDED_LIBRARY + set_params= insert_params; + set_bulk_params= insert_bulk_params; +#else + //TODO: add bulk support for bulk parameters + set_params_data= emb_insert_params; +#endif + } +} + + +/** + Destroy this prepared statement, cleaning up all used memory + and resources. + + This is called from ::deallocate() to handle COM_STMT_CLOSE and + DEALLOCATE PREPARE or when THD ends and all prepared statements are freed. +*/ + +Prepared_statement::~Prepared_statement() +{ + DBUG_ENTER("Prepared_statement::~Prepared_statement"); +#ifdef PROTECT_STATEMENT_MEMROOT + DBUG_PRINT("enter",("stmt: %p cursor: %p executed_counter: %lu", + this, cursor, executed_counter)); +#else + DBUG_PRINT("enter",("stmt: %p cursor: %p", + this, cursor)); +#endif + + MYSQL_DESTROY_PS(m_prepared_stmt); + + delete cursor; + /* + We have to call free on the items even if cleanup is called as some items, + like Item_param, don't free everything until free_items() + */ + free_items(); + if (lex) + { + sp_head::destroy(lex->sphead); + delete lex->result; + delete (st_lex_local *) lex; + } + free_root(&main_mem_root, MYF(0)); + DBUG_VOID_RETURN; +} + + +Query_arena::Type Prepared_statement::type() const +{ + return PREPARED_STATEMENT; +} + + +bool Prepared_statement::cleanup_stmt(bool restore_set_statement_vars) +{ + bool error= false; + DBUG_ENTER("Prepared_statement::cleanup_stmt"); + DBUG_PRINT("enter",("stmt: %p", this)); + + if (restore_set_statement_vars) + error= lex->restore_set_statement_var(); + + thd->rollback_item_tree_changes(); + cleanup_items(free_list); + thd->cleanup_after_query(); + + DBUG_RETURN(error); +} + + +bool Prepared_statement::set_name(const LEX_CSTRING *name_arg) +{ + name.length= name_arg->length; + name.str= (char*) memdup_root(mem_root, name_arg->str, name_arg->length); + return name.str == 0; +} + + +/** + Remember the current database. + + We must reset/restore the current database during execution of + a prepared statement since it affects execution environment: + privileges, @@character_set_database, and other. + + @return 1 if out of memory. +*/ + +bool +Prepared_statement::set_db(const LEX_CSTRING *db_arg) +{ + /* Remember the current database. */ + if (db_arg->length) + { + if (!(db.str= this->strmake(db_arg->str, db_arg->length))) + return 1; + db.length= db_arg->length; + } + else + db= null_clex_str; + return 0; +} + +/************************************************************************** + Common parts of mysql_[sql]_stmt_prepare, mysql_[sql]_stmt_execute. + Essentially, these functions do all the magic of preparing/executing + a statement, leaving network communication, input data handling and + global THD state management to the caller. +***************************************************************************/ + +/** + Parse statement text, validate the statement, and prepare it for execution. + + You should not change global THD state in this function, if at all + possible: it may be called from any context, e.g. when executing + a COM_* command, and SQLCOM_* command, or a stored procedure. + + @param packet statement text + @param packet_len + + @note + Precondition: + The caller must ensure that thd->change_list and thd->free_list + is empty: this function will not back them up but will free + in the end of its execution. + + @note + Postcondition: + thd->mem_root contains unused memory allocated during validation. +*/ + +bool Prepared_statement::prepare(const char *packet, uint packet_len) +{ + bool error; + Statement stmt_backup; + Query_arena *old_stmt_arena; + DBUG_ENTER("Prepared_statement::prepare"); + DBUG_ASSERT(m_sql_mode == thd->variables.sql_mode); + /* + If this is an SQLCOM_PREPARE, we also increase Com_prepare_sql. + However, it seems handy if com_stmt_prepare is increased always, + no matter what kind of prepare is processed. + */ + status_var_increment(thd->status_var.com_stmt_prepare); + + if (! (lex= new (mem_root) st_lex_local)) + DBUG_RETURN(TRUE); + lex->stmt_lex= lex; + + if (set_db(&thd->db)) + DBUG_RETURN(TRUE); + + /* + alloc_query() uses thd->mem_root && thd->query, so we should call + both of backup_statement() and backup_query_arena() here. + */ + thd->set_n_backup_statement(this, &stmt_backup); + thd->set_n_backup_active_arena(this, &stmt_backup); + + if (alloc_query(thd, packet, packet_len)) + { + thd->restore_backup_statement(this, &stmt_backup); + thd->restore_active_arena(this, &stmt_backup); + DBUG_RETURN(TRUE); + } + + /* + We'd like to have thd->query to be set to the actual query + after the function ends. + This value will be sent to audit plugins later. + As the statement is created, the query will be stored + in statement's arena. Normally the statement lives longer than + the end of this query, so we can just set thd->query_string to + be the stmt->query_string. + Though errors can result in statement to be freed. These cases + should be handled appropriately. + */ + stmt_backup.query_string= thd->query_string; + + old_stmt_arena= thd->stmt_arena; + thd->stmt_arena= this; + auto save_cur_stmt= thd->cur_stmt; + thd->cur_stmt= this; + + Parser_state parser_state; + if (parser_state.init(thd, thd->query(), thd->query_length())) + { + thd->restore_backup_statement(this, &stmt_backup); + thd->restore_active_arena(this, &stmt_backup); + thd->stmt_arena= old_stmt_arena; + thd->cur_stmt = save_cur_stmt; + DBUG_RETURN(TRUE); + } + + parser_state.m_lip.stmt_prepare_mode= TRUE; + parser_state.m_lip.multi_statements= FALSE; + + lex_start(thd); + lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_PREPARE; + + + error= (parse_sql(thd, & parser_state, NULL) || + thd->is_error() || + init_param_array(this)); + + if (thd->security_ctx->password_expired && + lex->sql_command != SQLCOM_SET_OPTION && + lex->sql_command != SQLCOM_PREPARE && + lex->sql_command != SQLCOM_EXECUTE && + lex->sql_command != SQLCOM_DEALLOCATE_PREPARE) + { + thd->restore_backup_statement(this, &stmt_backup); + thd->restore_active_arena(this, &stmt_backup); + thd->stmt_arena= old_stmt_arena; + thd->cur_stmt = save_cur_stmt; + my_error(ER_MUST_CHANGE_PASSWORD, MYF(0)); + DBUG_RETURN(true); + } + lex->set_trg_event_type_for_tables(); + +#ifdef PROTECT_STATEMENT_MEMROOT + executed_counter= 0; +#endif + + /* + While doing context analysis of the query (in check_prepared_statement) + we allocate a lot of additional memory: for open tables, JOINs, derived + tables, etc. Let's save a snapshot of current parse tree to the + statement and restore original THD. In cases when some tree + transformation can be reused on execute, we set again thd->mem_root from + stmt->mem_root (see setup_wild for one place where we do that). + */ + thd->restore_active_arena(this, &stmt_backup); + + /* + If called from a stored procedure, ensure that we won't rollback + external changes when cleaning up after validation. + */ + DBUG_ASSERT(thd->Item_change_list::is_empty()); + + /* + Marker used to release metadata locks acquired while the prepared + statement is being checked. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + /* + Set variables specified by + SET STATEMENT var1=value1 [, var2=value2, ...] FOR + clause for duration of prepare phase. Original values of variable + listed in the SET STATEMENT clause is restored right after return + from the function check_prepared_statement() + */ + if (likely(error == 0)) + error= run_set_statement_if_requested(thd, lex); + + /* + The only case where we should have items in the thd->free_list is + after stmt->set_params_from_vars(), which may in some cases create + Item_null objects. + */ + + if (likely(error == 0)) + error= check_prepared_statement(this); + + if (unlikely(error)) + { + /* + let the following code know we're not in PS anymore, + the won't be any EXECUTE, so we need a full cleanup + */ + lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_PREPARE; + } + + /* The order is important */ + lex->unit.cleanup(); + + /* No need to commit statement transaction, it's not started. */ + DBUG_ASSERT(thd->transaction->stmt.is_empty()); + + close_thread_tables_for_query(thd); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + + /* + Transaction rollback was requested since MDL deadlock was discovered + while trying to open tables. Rollback transaction in all storage + engines including binary log and release all locks. + + Once dynamic SQL is allowed as substatements the below if-statement + has to be adjusted to not do rollback in substatement. + */ + DBUG_ASSERT(! thd->in_sub_stmt); + if (thd->transaction_rollback_request) + { + trans_rollback_implicit(thd); + thd->release_transactional_locks(); + } + + /* Preserve locked plugins for SET */ + if (lex->sql_command != SQLCOM_SET_OPTION) + lex_unlock_plugins(lex); + + /* + Pass the value true to restore original values of variables modified + on handling SET STATEMENT clause. + */ + error|= cleanup_stmt(true); + + thd->restore_backup_statement(this, &stmt_backup); + thd->stmt_arena= old_stmt_arena; + thd->cur_stmt= save_cur_stmt; + + if (likely(error == 0)) + { + setup_set_params(); + lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_PREPARE; + state= Query_arena::STMT_PREPARED; + flags&= ~ (uint) IS_IN_USE; + + MYSQL_SET_PS_TEXT(m_prepared_stmt, query(), query_length()); + + /* + Log COM_EXECUTE to the general log. Note, that in case of SQL + prepared statements this causes two records to be output: + + Query PREPARE stmt from @user_variable + Prepare + + This is considered user-friendly, since in the + second log entry we output the actual statement text. + + Do not print anything if this is an SQL prepared statement and + we're inside a stored procedure (also called Dynamic SQL) -- + sub-statements inside stored procedures are not logged into + the general log. + */ + if (thd->spcont == NULL) + general_log_write(thd, COM_STMT_PREPARE, query(), query_length()); + } + // The same format as for triggers to compare + hr_prepare_time= my_hrtime(); + DBUG_RETURN(error); +} + + +/** + Assign parameter values either from variables, in case of SQL PS + or from the execute packet. + + @param expanded_query a container with the original SQL statement. + '?' placeholders will be replaced with + their values in case of success. + The result is used for logging and replication + @param packet pointer to execute packet. + NULL in case of SQL PS + @param packet_end end of the packet. NULL in case of SQL PS + + @todo Use a paremeter source class family instead of 'if's, and + support stored procedure variables. + + @retval TRUE an error occurred when assigning a parameter (likely + a conversion error or out of memory, or malformed packet) + @retval FALSE success +*/ + +bool +Prepared_statement::set_parameters(String *expanded_query, + uchar *packet, uchar *packet_end) +{ + bool is_sql_ps= packet == NULL; + bool res= FALSE; + + if (is_sql_ps) + { + /* SQL prepared statement */ + res= set_params_from_actual_params(this, thd->lex->prepared_stmt.params(), + expanded_query); + } + else if (param_count) + { +#ifndef EMBEDDED_LIBRARY + uchar *null_array= packet; + res= (setup_conversion_functions(this, &packet) || + set_params(this, null_array, packet, packet_end, expanded_query)); +#else + /* + In embedded library we re-install conversion routines each time + we set parameters, and also we don't need to parse packet. + So we do it in one function. + */ + res= set_params_data(this, expanded_query); +#endif + } + if (res) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), + is_sql_ps ? "EXECUTE" : "mysqld_stmt_execute"); + reset_stmt_params(this); + } + return res; +} + + +/** + Execute a prepared statement. Re-prepare it a limited number + of times if necessary. + + Try to execute a prepared statement. If there is a metadata + validation error, prepare a new copy of the prepared statement, + swap the old and the new statements, and try again. + If there is a validation error again, repeat the above, but + perform no more than MAX_REPREPARE_ATTEMPTS. + + @note We have to try several times in a loop since we + release metadata locks on tables after prepared statement + prepare. Therefore, a DDL statement may sneak in between prepare + and execute of a new statement. If this happens repeatedly + more than MAX_REPREPARE_ATTEMPTS times, we give up. + + @return TRUE if an error, FALSE if success + @retval TRUE either MAX_REPREPARE_ATTEMPTS has been reached, + or some general error + @retval FALSE successfully executed the statement, perhaps + after having reprepared it a few times. +*/ +const static int MAX_REPREPARE_ATTEMPTS= 3; + +bool +Prepared_statement::execute_loop(String *expanded_query, + bool open_cursor, + uchar *packet, + uchar *packet_end) +{ + Reprepare_observer reprepare_observer; + int reprepare_attempt= 0; + bool error; + iterations= FALSE; + + /* + - In mysql_sql_stmt_execute() we hide all "external" Items + e.g. those created in the "SET STATEMENT" part of the "EXECUTE" query. + - In case of mysqld_stmt_execute() there should not be "external" Items. + */ + DBUG_ASSERT(thd->free_list == NULL); + + /* Check if we got an error when sending long data */ + if (unlikely(state == Query_arena::STMT_ERROR)) + { + my_message(last_errno, last_error, MYF(0)); + return TRUE; + } + + if (set_parameters(expanded_query, packet, packet_end)) + return TRUE; +#ifdef WITH_WSREP + if (thd->wsrep_delayed_BF_abort) + { + WSREP_DEBUG("delayed BF abort, quitting execute_loop, stmt: %d", id); + return TRUE; + } +#endif /* WITH_WSREP */ +reexecute: + // Make sure that reprepare() did not create any new Items. + DBUG_ASSERT(thd->free_list == NULL); + + /* + Install the metadata observer. If some metadata version is + different from prepare time and an observer is installed, + the observer method will be invoked to push an error into + the error stack. + */ + + if (sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) + { + reprepare_observer.reset_reprepare_observer(); + DBUG_ASSERT(thd->m_reprepare_observer == NULL); + thd->m_reprepare_observer= &reprepare_observer; + } + + error= execute(expanded_query, open_cursor) || thd->is_error(); + + thd->m_reprepare_observer= NULL; + + if (unlikely(error) && + (sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && + !thd->is_fatal_error && !thd->killed && + reprepare_observer.is_invalidated() && + reprepare_attempt++ < MAX_REPREPARE_ATTEMPTS) + { + DBUG_ASSERT(thd->get_stmt_da()->sql_errno() == ER_NEED_REPREPARE); + thd->clear_error(); + + error= reprepare(); + + if (likely(!error)) /* Success */ + { +#ifdef PROTECT_STATEMENT_MEMROOT + // There was reprepare so the counter of runs should be reset + executed_counter= 0; + mem_root->read_only= 0; +#endif + goto reexecute; + } + } + reset_stmt_params(this); +#ifdef PROTECT_STATEMENT_MEMROOT + if (!error) + { + mem_root->read_only= 1; + ++executed_counter; + + DBUG_PRINT("info", ("execute counter: %lu", executed_counter)); + } + else + { + // Error on call shouldn't be counted as a normal run + executed_counter= 0; + mem_root->read_only= 0; + } +#endif + + return error; +} + +my_bool bulk_parameters_set(THD *thd) +{ + DBUG_ENTER("bulk_parameters_set"); + Prepared_statement *stmt= (Prepared_statement *) thd->bulk_param; + + if (stmt && unlikely(stmt->set_bulk_parameters(FALSE))) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + +my_bool bulk_parameters_iterations(THD *thd) +{ + Prepared_statement *stmt= (Prepared_statement *) thd->bulk_param; + if (!stmt) + return FALSE; + return stmt->bulk_iterations(); +} + + +my_bool Prepared_statement::set_bulk_parameters(bool reset) +{ + DBUG_ENTER("Prepared_statement::set_bulk_parameters"); + DBUG_PRINT("info", ("iteration: %d", iterations)); + + if (iterations) + { +#ifndef EMBEDDED_LIBRARY + if ((*set_bulk_params)(this, &packet, packet_end, reset)) +#else + // bulk parameters are not supported for embedded, so it will an error +#endif + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), + "mysqld_stmt_bulk_execute"); + reset_stmt_params(this); + DBUG_RETURN(true); + } + if (packet >= packet_end) + iterations= FALSE; + } + start_param= 0; + DBUG_RETURN(false); +} + +bool +Prepared_statement::execute_bulk_loop(String *expanded_query, + bool open_cursor, + uchar *packet_arg, + uchar *packet_end_arg) +{ + Reprepare_observer reprepare_observer; + unsigned char *readbuff= NULL; + bool error= 0; + packet= packet_arg; + packet_end= packet_end_arg; + iterations= TRUE; + start_param= true; +#ifdef DBUG_ASSERT_EXISTS + Item *free_list_state= thd->free_list; +#endif + thd->set_bulk_execution((void *)this); + /* Check if we got an error when sending long data */ + if (state == Query_arena::STMT_ERROR) + { + my_message(last_errno, last_error, MYF(0)); + goto err; + } + /* Check for non zero parameter count*/ + if (param_count == 0) + { + DBUG_PRINT("error", ("Statement with no parameters for bulk execution.")); + my_error(ER_UNSUPPORTED_PS, MYF(0)); + goto err; + } + + if (!(sql_command_flags[lex->sql_command] & CF_PS_ARRAY_BINDING_SAFE)) + { + DBUG_PRINT("error", ("Command is not supported in bulk execution.")); + my_error(ER_UNSUPPORTED_PS, MYF(0)); + goto err; + } + /* + Here second buffer for not optimized commands, + optimized commands do it inside thier internal loop. + */ + if (!(sql_command_flags[lex->sql_command] & CF_PS_ARRAY_BINDING_OPTIMIZED) && + this->lex->has_returning()) + { + // Above check can be true for SELECT in future + DBUG_ASSERT(lex->sql_command != SQLCOM_SELECT); + readbuff= thd->net.buff; // old buffer + if (net_allocate_new_packet(&thd->net, thd, MYF(MY_THREAD_SPECIFIC))) + { + readbuff= NULL; // failure, net_allocate_new_packet keeps old buffer + goto err; + } + } + +#ifndef EMBEDDED_LIBRARY + if (read_types && + set_conversion_functions(this, &packet)) +#else + // bulk parameters are not supported for embedded, so it will an error +#endif + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), + "mysqld_stmt_bulk_execute"); + goto err; + } + read_types= FALSE; + + // iterations changed by set_bulk_parameters + while ((iterations || start_param) && !error && !thd->is_error()) + { + int reprepare_attempt= 0; + + /* + Here we set parameters for not optimized commands, + optimized commands do it inside thier internal loop. + */ + if (!(sql_command_flags[lex->sql_command] & CF_PS_ARRAY_BINDING_OPTIMIZED)) + { + if (set_bulk_parameters(TRUE)) + { + goto err; + } + } + +reexecute: + /* + If the free_list is not empty, we'll wrongly free some externally + allocated items when cleaning up after validation of the prepared + statement. + */ + DBUG_ASSERT(thd->free_list == free_list_state); + + /* + Install the metadata observer. If some metadata version is + different from prepare time and an observer is installed, + the observer method will be invoked to push an error into + the error stack. + */ + + if (sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) + { + reprepare_observer.reset_reprepare_observer(); + DBUG_ASSERT(thd->m_reprepare_observer == NULL); + thd->m_reprepare_observer= &reprepare_observer; + } + + error= execute(expanded_query, open_cursor) || thd->is_error(); + + thd->m_reprepare_observer= NULL; + +#ifdef WITH_WSREP + if (!(sql_command_flags[lex->sql_command] & CF_PS_ARRAY_BINDING_OPTIMIZED) && + WSREP(thd)) + { + if (wsrep_after_statement(thd)) + { + /* + Re-execution success is unlikely after an error from + wsrep_after_statement(), so retrun error immediately. + */ + thd->get_stmt_da()->reset_diagnostics_area(); + wsrep_override_error(thd, thd->wsrep_cs().current_error(), + thd->wsrep_cs().current_error_status()); + } + } + else +#endif /* WITH_WSREP */ + if (unlikely(error) && + (sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && + !thd->is_fatal_error && !thd->killed && + reprepare_observer.is_invalidated() && + reprepare_attempt++ < MAX_REPREPARE_ATTEMPTS) + { + DBUG_ASSERT(thd->get_stmt_da()->sql_errno() == ER_NEED_REPREPARE); + thd->clear_error(); + + error= reprepare(); + + if (likely(!error)) /* Success */ + goto reexecute; + } + } + reset_stmt_params(this); + thd->set_bulk_execution(0); + if (readbuff) + my_free(readbuff); + return error; + +err: + reset_stmt_params(this); + thd->set_bulk_execution(0); + if (readbuff) + my_free(readbuff); + return true; +} + + +bool +Prepared_statement::execute_server_runnable(Server_runnable *server_runnable) +{ + Statement stmt_backup; + bool error; + Query_arena *save_stmt_arena= thd->stmt_arena; + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; + Item_change_list save_change_list; + + thd->Item_change_list::move_elements_to(&save_change_list); + + state= STMT_CONVENTIONAL_EXECUTION; + + if (!(lex= new (mem_root) st_lex_local)) + return TRUE; + + thd->set_n_backup_statement(this, &stmt_backup); + thd->set_n_backup_active_arena(this, &stmt_backup); + + thd->stmt_arena= this; + thd->m_reprepare_observer= 0; + + error= server_runnable->execute_server_code(thd); + + thd->cleanup_after_query(); + + thd->m_reprepare_observer= save_reprepare_observer; + thd->restore_active_arena(this, &stmt_backup); + thd->restore_backup_statement(this, &stmt_backup); + thd->stmt_arena= save_stmt_arena; + + save_change_list.move_elements_to(thd); + + /* Items and memory will freed in destructor */ + + return error; +} + + +/** + Reprepare this prepared statement. + + Currently this is implemented by creating a new prepared + statement, preparing it with the original query and then + swapping the new statement and the original one. + + @retval TRUE an error occurred. Possible errors include + incompatibility of new and old result set + metadata + @retval FALSE success, the statement has been reprepared +*/ + +bool +Prepared_statement::reprepare() +{ + char saved_cur_db_name_buf[SAFE_NAME_LEN+1]; + LEX_STRING saved_cur_db_name= + { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) }; + LEX_CSTRING stmt_db_name= db; + bool cur_db_changed; + bool error; + + Prepared_statement copy(thd); + copy.m_sql_mode= m_sql_mode; + + copy.set_sql_prepare(); /* To suppress sending metadata to the client. */ + + status_var_increment(thd->status_var.com_stmt_reprepare); + + if (unlikely(mysql_opt_change_db(thd, &stmt_db_name, &saved_cur_db_name, + TRUE, &cur_db_changed))) + return TRUE; + + Sql_mode_instant_set sms(thd, m_sql_mode); + + error= ((name.str && copy.set_name(&name)) || + copy.prepare(query(), query_length()) || + validate_metadata(©)); + + if (cur_db_changed) + mysql_change_db(thd, (LEX_CSTRING*) &saved_cur_db_name, TRUE); + + if (likely(!error)) + { + MYSQL_REPREPARE_PS(m_prepared_stmt); + swap_prepared_statement(©); + swap_parameter_array(param_array, copy.param_array, param_count); +#ifdef DBUG_ASSERT_EXISTS + is_reprepared= TRUE; +#endif + /* + Clear possible warnings during reprepare, it has to be completely + transparent to the user. We use clear_warning_info() since + there were no separate query id issued for re-prepare. + Sic: we can't simply silence warnings during reprepare, because if + it's failed, we need to return all the warnings to the user. + */ + thd->get_stmt_da()->clear_warning_info(thd->query_id); + column_info_state.reset(); + } + else + { + /* + Prepare failed and the 'copy' will be freed. + Now we have to restore the query_string in the so the + audit plugin later gets the meaningful notification. + */ + thd->set_query(query(), query_length()); + } + return error; +} + + +/** + Validate statement result set metadata (if the statement returns + a result set). + + Currently we only check that the number of columns of the result + set did not change. + This is a helper method used during re-prepare. + + @param[in] copy the re-prepared prepared statement to verify + the metadata of + + @retval TRUE error, ER_PS_REBIND is reported + @retval FALSE statement return no or compatible metadata +*/ + + +bool Prepared_statement::validate_metadata(Prepared_statement *copy) +{ + /** + If this is an SQL prepared statement or EXPLAIN, + return FALSE -- the metadata of the original SELECT, + if any, has not been sent to the client. + */ + if (is_sql_prepare() || lex->describe) + return FALSE; + + if (lex->first_select_lex()->item_list.elements != + copy->lex->first_select_lex()->item_list.elements) + { + /** Column counts mismatch, update the client */ + thd->server_status|= SERVER_STATUS_METADATA_CHANGED; + } + + return FALSE; +} + + +/** + Replace the original prepared statement with a prepared copy. + + This is a private helper that is used as part of statement + reprepare + + @return This function does not return any errors. +*/ + +void +Prepared_statement::swap_prepared_statement(Prepared_statement *copy) +{ + Statement tmp_stmt; + + /* Swap memory roots. */ + swap_variables(MEM_ROOT, main_mem_root, copy->main_mem_root); + + /* Swap the arenas */ + tmp_stmt.set_query_arena(this); + set_query_arena(copy); + copy->set_query_arena(&tmp_stmt); + + /* Swap the statement parent classes */ + tmp_stmt.set_statement(this); + set_statement(copy); + copy->set_statement(&tmp_stmt); + + /* Swap ids back, we need the original id */ + swap_variables(ulong, id, copy->id); + /* Swap mem_roots back, they must continue pointing at the main_mem_roots */ + swap_variables(MEM_ROOT *, mem_root, copy->mem_root); + /* + Swap the old and the new parameters array. The old array + is allocated in the old arena. + */ + swap_variables(Item_param **, param_array, copy->param_array); + /* Don't swap flags: the copy has IS_SQL_PREPARE always set. */ + /* swap_variables(uint, flags, copy->flags); */ + /* Swap names, the old name is allocated in the wrong memory root */ + swap_variables(LEX_CSTRING, name, copy->name); + /* Ditto */ + swap_variables(LEX_CSTRING, db, copy->db); + + DBUG_ASSERT(param_count == copy->param_count); + DBUG_ASSERT(thd == copy->thd); + last_error[0]= '\0'; + last_errno= 0; +} + + +/** + Execute a prepared statement. + + You should not change global THD state in this function, if at all + possible: it may be called from any context, e.g. when executing + a COM_* command, and SQLCOM_* command, or a stored procedure. + + @param expanded_query A query for binlogging which has all parameter + markers ('?') replaced with their actual values. + @param open_cursor True if an attempt to open a cursor should be made. + Currenlty used only in the binary protocol. + + @note + Preconditions, postconditions. + - See the comment for Prepared_statement::prepare(). + + @retval + FALSE ok + @retval + TRUE Error +*/ + +bool Prepared_statement::execute(String *expanded_query, bool open_cursor) +{ + Statement stmt_backup; + Query_arena *old_stmt_arena; + bool error= TRUE; + bool qc_executed= FALSE; + + char saved_cur_db_name_buf[SAFE_NAME_LEN+1]; + LEX_STRING saved_cur_db_name= + { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) }; + bool cur_db_changed; + + LEX_CSTRING stmt_db_name= db; + + status_var_increment(thd->status_var.com_stmt_execute); + + if (flags & (uint) IS_IN_USE) + { + my_error(ER_PS_NO_RECURSION, MYF(0)); + return TRUE; + } + + /* + For SHOW VARIABLES lex->result is NULL, as it's a non-SELECT + command. For such queries we don't return an error and don't + open a cursor -- the client library will recognize this case and + materialize the result set. + For SELECT statements lex->result is created in + check_prepared_statement. lex->result->simple_select() is FALSE + in INSERT ... SELECT and similar commands. + */ + + if (open_cursor && lex->result && lex->result->check_simple_select()) + { + DBUG_PRINT("info",("Cursor asked for not SELECT stmt")); + return TRUE; + } + + /* In case the command has a call to SP which re-uses this statement name */ + flags|= IS_IN_USE; + + close_cursor(); + + /* + If the free_list is not empty, we'll wrongly free some externally + allocated items when cleaning up after execution of this statement. + */ + DBUG_ASSERT(thd->Item_change_list::is_empty()); + + /* + The only case where we should have items in the thd->free_list is + after stmt->set_params_from_vars(), which may in some cases create + Item_null objects. + */ + + thd->set_n_backup_statement(this, &stmt_backup); + + /* + Change the current database (if needed). + + Force switching, because the database of the prepared statement may be + NULL (prepared statements can be created while no current database + selected). + */ + + if (mysql_opt_change_db(thd, &stmt_db_name, &saved_cur_db_name, TRUE, + &cur_db_changed)) + goto error; + + /* Allocate query. */ + + if (expanded_query->length() && + alloc_query(thd, expanded_query->ptr(), expanded_query->length())) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), expanded_query->length()); + goto error; + } + /* + Expanded query is needed for slow logging, so we want thd->query + to point at it even after we restore from backup. This is ok, as + expanded query was allocated in thd->mem_root. + */ + stmt_backup.set_query_inner(thd->query_string); + + /* + At first execution of prepared statement we may perform logical + transformations of the query tree. Such changes should be performed + on the parse tree of current prepared statement and new items should + be allocated in its memory root. Set the appropriate pointer in THD + to the arena of the statement. + */ + old_stmt_arena= thd->stmt_arena; + thd->stmt_arena= this; + reinit_stmt_before_use(thd, lex); + + /* Go! */ + + /* + Log COM_EXECUTE to the general log. Note, that in case of SQL + prepared statements this causes two records to be output: + + Query EXECUTE + Execute + + This is considered user-friendly, since in the + second log entry we output values of parameter markers. + + Do not print anything if this is an SQL prepared statement and + we're inside a stored procedure (also called Dynamic SQL) -- + sub-statements inside stored procedures are not logged into + the general log. + */ + + if (thd->spcont == nullptr) + general_log_write(thd, COM_STMT_EXECUTE, thd->query(), thd->query_length()); + + if (open_cursor) + error= mysql_open_cursor(thd, &result, &cursor); + else + { + /* + Try to find it in the query cache, if not, execute it. + Note that multi-statements cannot exist here (they are not supported in + prepared statements). + */ + if (query_cache_send_result_to_client(thd, thd->query(), + thd->query_length()) <= 0) + { + MYSQL_QUERY_EXEC_START(thd->query(), thd->thread_id, thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip, 1); + error= mysql_execute_command(thd, true); + MYSQL_QUERY_EXEC_DONE(error); + thd->update_server_status(); + } + else + { + thd->lex->sql_command= SQLCOM_SELECT; + status_var_increment(thd->status_var.com_stat[SQLCOM_SELECT]); + thd->update_stats(); + qc_executed= TRUE; + } + } + + /* + Restore the current database (if changed). + + Force switching back to the saved current database (if changed), + because it may be NULL. In this case, mysql_change_db() would generate + an error. + */ + + if (cur_db_changed) + mysql_change_db(thd, (LEX_CSTRING*) &saved_cur_db_name, TRUE); + + /* Assert that if an error, no cursor is open */ + DBUG_ASSERT(! (error && cursor)); + + if (! cursor) + /* + Pass the value false to don't restore set statement variables. + See the next comment block for more details. + */ + cleanup_stmt(false); + + /* + Log the statement to slow query log if it passes filtering. + We do it here for prepared statements despite of the fact that the function + log_slow_statement() is also called upper the stack from the function + dispatch_command(). The reason for logging slow queries here is that + the function log_slow_statement() must be called before restoring system + variables that could be set on execution of SET STATEMENT clause. Since + for prepared statement restoring of system variables set on execution of + SET STATEMENT clause is performed on return from the method + Prepared_statement::execute(), by the time the function log_slow_statement() + be invoked from the function dispatch_command() all variables set by + the SET STATEMEN clause would be already reset to their original values + that break semantic of the SET STATEMENT clause. + + E.g., lets consider the following statements + SET slow_query_log= 1; + SET @@long_query_time=0.01; + PREPARE stmt FROM 'set statement slow_query_log=0 for select sleep(0.1)'; + EXECUTE stmt; + + It's expected that the above statements don't write any record + to slow query log since the system variable slow_query_log is set to 0 + during execution of the whole statement + 'set statement slow_query_log=0 for select sleep(0.1)' + + However, if the function log_slow_statement wasn't called here the record + for the statement would be written to slow query log since the variable + slow_query_log is restored to its original value by the time the function + log_slow_statement is called from disptach_command() to write a record + into slow query log. + */ + log_slow_statement(thd); + + error|= lex->restore_set_statement_var(); + + + /* + EXECUTE command has its own dummy "explain data". We don't need it, + instead, we want to keep the query plan of the statement that was + executed. + */ + if (!stmt_backup.lex->explain || + !stmt_backup.lex->explain->have_query_plan()) + { + delete_explain_query(stmt_backup.lex); + stmt_backup.lex->explain = thd->lex->explain; + thd->lex->explain= NULL; + } + else + delete_explain_query(thd->lex); + + thd->set_statement(&stmt_backup); + thd->stmt_arena= old_stmt_arena; + + if (state == Query_arena::STMT_PREPARED && !qc_executed) + state= Query_arena::STMT_EXECUTED; + + if (likely(error == 0) && this->lex->sql_command == SQLCOM_CALL) + { + if (is_sql_prepare()) + { + /* + Here we have the diagnostics area status already set to DA_OK. + sent_out_parameters() can raise errors when assigning OUT parameters: + DECLARE a DATETIME; + EXECUTE IMMEDIATE 'CALL p1(?)' USING a; + when the procedure p1 assigns a DATETIME-incompatible value (e.g. 10) + to the out parameter. Allow to overwrite status (to DA_ERROR). + */ + thd->get_stmt_da()->set_overwrite_status(true); + thd->protocol_text.send_out_parameters(&this->lex->param_list); + thd->get_stmt_da()->set_overwrite_status(false); + } + else + thd->protocol->send_out_parameters(&this->lex->param_list); + } + +error: + error|= thd->lex->restore_set_statement_var(); + flags&= ~ (uint) IS_IN_USE; + return error; +} + + +/** + Prepare, execute and clean-up a statement. + @param query - query text + @param length - query text length + @retval true - the query was not executed (parse error, wrong parameters) + @retval false - the query was prepared and executed + + Note, if some error happened during execution, it still returns "false". +*/ +bool Prepared_statement::execute_immediate(const char *query, uint query_len) +{ + DBUG_ENTER("Prepared_statement::execute_immediate"); + String expanded_query; + static LEX_CSTRING execute_immediate_stmt_name= + {STRING_WITH_LEN("(immediate)") }; + + set_sql_prepare(); + name= execute_immediate_stmt_name; // for DBUG_PRINT etc + + m_prepared_stmt= MYSQL_CREATE_PS(this, id, thd->m_statement_psi, + name.str, name.length); + + if (prepare(query, query_len)) + DBUG_RETURN(true); + + if (param_count != thd->lex->prepared_stmt.param_count()) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "EXECUTE"); + deallocate_immediate(); + DBUG_RETURN(true); + } + + MYSQL_EXECUTE_PS(thd->m_statement_psi, m_prepared_stmt); + (void) execute_loop(&expanded_query, FALSE, NULL, NULL); + deallocate_immediate(); + DBUG_RETURN(false); +} + + +/** + Common part of DEALLOCATE PREPARE, EXECUTE IMMEDIATE, mysqld_stmt_close. +*/ +void Prepared_statement::deallocate_immediate() +{ + /* We account deallocate in the same manner as mysqld_stmt_close */ + status_var_increment(thd->status_var.com_stmt_close); + + /* It should now be safe to reset CHANGE MASTER parameters */ + lex_end(lex); +} + + +/** Common part of DEALLOCATE PREPARE and mysqld_stmt_close. */ + +void Prepared_statement::deallocate() +{ + deallocate_immediate(); + /* Statement map calls delete stmt on erase */ + thd->stmt_map.erase(this); +} + + +/*************************************************************************** +* Ed_result_set +***************************************************************************/ +/** + Use operator delete to free memory of Ed_result_set. + Accessing members of a class after the class has been destroyed + is a violation of the C++ standard but is commonly used in the + server code. +*/ + +void Ed_result_set::operator delete(void *ptr, size_t size) throw () +{ + if (ptr) + { + /* + Make a stack copy, otherwise free_root() will attempt to + write to freed memory. + */ + MEM_ROOT own_root= ((Ed_result_set*) ptr)->m_mem_root; + free_root(&own_root, MYF(0)); + } +} + + +/** + Initialize an instance of Ed_result_set. + + Instances of the class, as well as all result set rows, are + always allocated in the memory root passed over as the second + argument. In the constructor, we take over ownership of the + memory root. It will be freed when the class is destroyed. + + sic: Ed_result_est is not designed to be allocated on stack. +*/ + +Ed_result_set::Ed_result_set(List *rows_arg, + size_t column_count_arg, + MEM_ROOT *mem_root_arg) + :m_mem_root(*mem_root_arg), + m_column_count(column_count_arg), + m_rows(rows_arg), + m_next_rset(NULL) +{ + /* Take over responsibility for the memory */ + clear_alloc_root(mem_root_arg); +} + +/*************************************************************************** +* Ed_result_set +***************************************************************************/ + +/** + Create a new "execute direct" connection. +*/ + +Ed_connection::Ed_connection(THD *thd) + :m_diagnostics_area(thd->query_id, false, true), + m_thd(thd), + m_rsets(0), + m_current_rset(0) +{ +} + + +/** + Free all result sets of the previous statement, if any, + and reset warnings and errors. + + Called before execution of the next query. +*/ + +void +Ed_connection::free_old_result() +{ + while (m_rsets) + { + Ed_result_set *rset= m_rsets->m_next_rset; + delete m_rsets; + m_rsets= rset; + } + m_current_rset= m_rsets; + m_diagnostics_area.reset_diagnostics_area(); + m_diagnostics_area.clear_warning_info(m_thd->query_id); +} + + +/** + A simple wrapper that uses a helper class to execute SQL statements. +*/ + +bool +Ed_connection::execute_direct(Protocol *p, LEX_STRING sql_text) +{ + Execute_sql_statement execute_sql_statement(sql_text); + DBUG_PRINT("ed_query", ("%s", sql_text.str)); + + return execute_direct(p, &execute_sql_statement); +} + + +/** + Execute a fragment of server functionality without an effect on + thd, and store results in memory. + + Conventions: + - the code fragment must finish with OK, EOF or ERROR. + - the code fragment doesn't have to close thread tables, + free memory, commit statement transaction or do any other + cleanup that is normally done in the end of dispatch_command(). + + @param server_runnable A code fragment to execute. +*/ + +bool Ed_connection::execute_direct(Protocol *p, Server_runnable *server_runnable) +{ + bool rc= FALSE; + Prepared_statement stmt(m_thd); + Protocol *save_protocol= m_thd->protocol; + Diagnostics_area *save_diagnostics_area= m_thd->get_stmt_da(); + + DBUG_ENTER("Ed_connection::execute_direct"); + + free_old_result(); /* Delete all data from previous execution, if any */ + + m_thd->protocol= p; + m_thd->set_stmt_da(&m_diagnostics_area); + + rc= stmt.execute_server_runnable(server_runnable); + m_thd->protocol->end_statement(); + + m_thd->protocol= save_protocol; + m_thd->set_stmt_da(save_diagnostics_area); + /* + Protocol_local makes use of m_current_rset to keep + track of the last result set, while adding result sets to the end. + Reset it to point to the first result set instead. + */ + m_current_rset= m_rsets; + + DBUG_RETURN(rc); +} + + +/** + A helper method that is called only during execution. + + Although Ed_connection doesn't support multi-statements, + a statement may generate many result sets. All subsequent + result sets are appended to the end. + + @pre This is called only by Protocol_local. +*/ + +void +Ed_connection::add_result_set(Ed_result_set *ed_result_set) +{ + if (m_rsets) + { + m_current_rset->m_next_rset= ed_result_set; + /* While appending, use m_current_rset as a pointer to the tail. */ + m_current_rset= ed_result_set; + } + else + m_current_rset= m_rsets= ed_result_set; +} + + +/** + Release ownership of the current result set to the client. + + Since we use a simple linked list for result sets, + this method uses a linear search of the previous result + set to exclude the released instance from the list. + + @todo Use double-linked list, when this is really used. + + XXX: This has never been tested with more than one result set! + + @pre There must be a result set. +*/ + +Ed_result_set * +Ed_connection::store_result_set() +{ + Ed_result_set *ed_result_set; + + DBUG_ASSERT(m_current_rset); + + if (m_current_rset == m_rsets) + { + /* Assign the return value */ + ed_result_set= m_current_rset; + /* Exclude the return value from the list. */ + m_current_rset= m_rsets= m_rsets->m_next_rset; + } + else + { + Ed_result_set *prev_rset= m_rsets; + /* Assign the return value. */ + ed_result_set= m_current_rset; + + /* Exclude the return value from the list */ + while (prev_rset->m_next_rset != m_current_rset) + prev_rset= ed_result_set->m_next_rset; + m_current_rset= prev_rset->m_next_rset= m_current_rset->m_next_rset; + } + ed_result_set->m_next_rset= NULL; /* safety */ + + return ed_result_set; +} + + +#include +#include "../libmysqld/embedded_priv.h" + +class Protocol_local : public Protocol_text +{ +public: + struct st_mysql_data *cur_data; + struct st_mysql_data *first_data; + struct st_mysql_data **data_tail; + void clear_data_list(); + struct st_mysql_data *alloc_new_dataset(); + char **next_field; + MYSQL_FIELD *next_mysql_field; + MEM_ROOT *alloc; + THD *new_thd; + Security_context empty_ctx; + ulonglong client_capabilities; + + my_bool do_log_bin; + + Protocol_local(THD *thd_arg, THD *new_thd_arg, ulong prealloc) : + Protocol_text(thd_arg, prealloc), + cur_data(0), first_data(0), data_tail(&first_data), alloc(0), + new_thd(new_thd_arg), do_log_bin(FALSE) + {} + + void set_binlog_vars(my_bool *sav_log_bin) + { + *sav_log_bin= thd->variables.sql_log_bin; + thd->variables.sql_log_bin= do_log_bin; + thd->set_binlog_bit(); + } + void restore_binlog_vars(my_bool sav_log_bin) + { + do_log_bin= thd->variables.sql_log_bin; + thd->variables.sql_log_bin= sav_log_bin; + thd->set_binlog_bit(); + } +protected: + bool net_store_data(const uchar *from, size_t length); + bool net_store_data_cs(const uchar *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); + bool net_send_eof(THD *thd, uint server_status, uint statement_warn_count); + bool net_send_ok(THD *, uint, uint, ulonglong, ulonglong, const char *, + bool); + bool net_send_error_packet(THD *, uint, const char *, const char *); + bool begin_dataset(); + bool begin_dataset(THD *thd, uint numfields); + + bool write(); + bool flush(); + + bool store_field_metadata(const THD *thd, const Send_field &field, + CHARSET_INFO *charset_for_protocol, + uint pos); + bool send_result_set_metadata(List *list, uint flags); + void remove_last_row(); + bool store_null(); + void prepare_for_resend(); + bool send_list_fields(List *list, const TABLE_LIST *table_list); + + enum enum_protocol_type type() { return PROTOCOL_LOCAL; }; +}; + +static +bool +write_eof_packet_local(THD *thd, + Protocol_local *p, uint server_status, uint statement_warn_count) +{ +// if (!thd->mysql) // bootstrap file handling +// return FALSE; + /* + The following test should never be true, but it's better to do it + because if 'is_fatal_error' is set the server is not going to execute + other queries (see the if test in dispatch_command / COM_QUERY) + */ + if (thd->is_fatal_error) + thd->server_status&= ~SERVER_MORE_RESULTS_EXISTS; + p->cur_data->embedded_info->server_status= server_status; + /* + Don't send warn count during SP execution, as the warn_list + is cleared between substatements, and mysqltest gets confused + */ + p->cur_data->embedded_info->warning_count= + (thd->spcont ? 0 : MY_MIN(statement_warn_count, 65535)); + return FALSE; +} + + +MYSQL_DATA *Protocol_local::alloc_new_dataset() +{ + MYSQL_DATA *data; + struct embedded_query_result *emb_data; + if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL), + &data, sizeof(*data), + &emb_data, sizeof(*emb_data), + NULL)) + return NULL; + + emb_data->prev_ptr= &data->data; + cur_data= data; + *data_tail= data; + data_tail= &emb_data->next; + data->embedded_info= emb_data; + return data; +} + + +void Protocol_local::clear_data_list() +{ + while (first_data) + { + MYSQL_DATA *data= first_data; + first_data= data->embedded_info->next; + free_rows(data); + } + data_tail= &first_data; + free_rows(cur_data); + cur_data= 0; +} + + +static char *dup_str_aux(MEM_ROOT *root, const char *from, uint length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) +{ + uint32 dummy32; + uint dummy_err; + char *result; + + /* 'tocs' is set 0 when client issues SET character_set_results=NULL */ + if (tocs && String::needs_conversion(0, fromcs, tocs, &dummy32)) + { + uint new_len= (tocs->mbmaxlen * length) / fromcs->mbminlen + 1; + result= (char *)alloc_root(root, new_len); + length= copy_and_convert(result, new_len, + tocs, from, length, fromcs, &dummy_err); + } + else + { + result= (char *)alloc_root(root, length + 1); + memcpy(result, from, length); + } + + result[length]= 0; + return result; +} + + +static char *dup_str_aux(MEM_ROOT *root, const LEX_CSTRING &from, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) +{ + return dup_str_aux(root, from.str, (uint) from.length, fromcs, tocs); +} + + +bool Protocol_local::net_store_data(const uchar *from, size_t length) +{ + char *field_buf; +// if (!thd->mysql) // bootstrap file handling +// return FALSE; + + if (!(field_buf= (char*) alloc_root(alloc, length + sizeof(uint) + 1))) + return TRUE; + *(uint *)field_buf= (uint) length; + *next_field= field_buf + sizeof(uint); + memcpy((uchar*) *next_field, from, length); + (*next_field)[length]= 0; + if (next_mysql_field->max_length < length) + next_mysql_field->max_length= (unsigned long) length; + ++next_field; + ++next_mysql_field; + return FALSE; +} + + +bool Protocol_local::net_store_data_cs(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint conv_length= (uint) (to_cs->mbmaxlen * length / from_cs->mbminlen); + uint dummy_error; + char *field_buf; +// if (!thd->mysql) // bootstrap file handling +// return false; + + if (!(field_buf= (char*) alloc_root(alloc, conv_length + sizeof(uint) + 1))) + return true; + *next_field= field_buf + sizeof(uint); + length= copy_and_convert(*next_field, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_error); + *(uint *) field_buf= (uint) length; + (*next_field)[length]= 0; + if (next_mysql_field->max_length < length) + next_mysql_field->max_length= (unsigned long) length; + ++next_field; + ++next_mysql_field; + return false; +} + + +/** + Embedded library implementation of OK response. + + This function is used by the server to write 'OK' packet to + the "network" when the server is compiled as an embedded library. + Since there is no network in the embedded configuration, + a different implementation is necessary. + Instead of marshalling response parameters to a network representation + and then writing it to the socket, here we simply copy the data to the + corresponding client-side connection structures. + + @sa Server implementation of net_send_ok in protocol.cc for + description of the arguments. + + @return + @retval TRUE An error occurred + @retval FALSE Success +*/ + +bool +Protocol_local::net_send_ok(THD *thd, + uint server_status, uint statement_warn_count, + ulonglong affected_rows, ulonglong id, const char *message, bool) +{ + DBUG_ENTER("emb_net_send_ok"); + MYSQL_DATA *data; +// MYSQL *mysql= thd->mysql; + +// if (!mysql) // bootstrap file handling +// DBUG_RETURN(FALSE); + if (!(data= alloc_new_dataset())) + DBUG_RETURN(TRUE); + data->embedded_info->affected_rows= affected_rows; + data->embedded_info->insert_id= id; + if (message) + strmake_buf(data->embedded_info->info, message); + + bool error= write_eof_packet_local(thd, this, + server_status, statement_warn_count); + cur_data= 0; + DBUG_RETURN(error); +} + + +/** + Embedded library implementation of EOF response. + + @sa net_send_ok + + @return + @retval TRUE An error occurred + @retval FALSE Success +*/ + +bool +Protocol_local::net_send_eof(THD *thd, uint server_status, + uint statement_warn_count) +{ + bool error= write_eof_packet_local(thd, this, server_status, + statement_warn_count); + cur_data= 0; + return error; +} + + +bool Protocol_local::net_send_error_packet(THD *thd, uint sql_errno, + const char *err, const char *sqlstate) +{ + uint error; + char converted_err[MYSQL_ERRMSG_SIZE]; + MYSQL_DATA *data= cur_data; + struct embedded_query_result *ei; + +// if (!thd->mysql) // bootstrap file handling +// { +// fprintf(stderr, "ERROR: %d %s\n", sql_errno, err); +// return TRUE; +// } + if (!data) + data= alloc_new_dataset(); + + ei= data->embedded_info; + ei->last_errno= sql_errno; + convert_error_message(converted_err, sizeof(converted_err), + thd->variables.character_set_results, + err, strlen(err), + system_charset_info, &error); + /* Converted error message is always null-terminated. */ + strmake_buf(ei->info, converted_err); + strmov(ei->sqlstate, sqlstate); + ei->server_status= thd->server_status; + cur_data= 0; + return FALSE; +} + + +bool Protocol_local::begin_dataset() +{ + MYSQL_DATA *data= alloc_new_dataset(); + if (!data) + return 1; + alloc= &data->alloc; + /* Assume rowlength < 8192 */ + init_alloc_root(PSI_INSTRUMENT_ME, alloc, 8192, 0, MYF(0)); + alloc->min_malloc= sizeof(MYSQL_ROWS); + return 0; +} + + +bool Protocol_local::begin_dataset(THD *thd, uint numfields) +{ + if (begin_dataset()) + return true; + MYSQL_DATA *data= cur_data; + data->fields= field_count= numfields; + if (!(data->embedded_info->fields_list= + (MYSQL_FIELD*)alloc_root(&data->alloc, sizeof(MYSQL_FIELD)*field_count))) + return true; + return false; +} + + +bool Protocol_local::write() +{ +// if (!thd->mysql) // bootstrap file handling +// return false; + + *next_field= 0; + return false; +} + + +bool Protocol_local::flush() +{ + return 0; +} + + +bool Protocol_local::store_field_metadata(const THD * thd, + const Send_field &server_field, + CHARSET_INFO *charset_for_protocol, + uint pos) +{ + CHARSET_INFO *cs= system_charset_info; + CHARSET_INFO *thd_cs= thd->variables.character_set_results; + MYSQL_DATA *data= cur_data; + MEM_ROOT *field_alloc= &data->alloc; + MYSQL_FIELD *client_field= &cur_data->embedded_info->fields_list[pos]; + DBUG_ASSERT(server_field.is_sane()); + + client_field->db= dup_str_aux(field_alloc, server_field.db_name, + cs, thd_cs); + client_field->table= dup_str_aux(field_alloc, server_field.table_name, + cs, thd_cs); + client_field->name= dup_str_aux(field_alloc, server_field.col_name, + cs, thd_cs); + client_field->org_table= dup_str_aux(field_alloc, server_field.org_table_name, + cs, thd_cs); + client_field->org_name= dup_str_aux(field_alloc, server_field.org_col_name, + cs, thd_cs); + if (charset_for_protocol == &my_charset_bin || thd_cs == NULL) + { + /* No conversion */ + client_field->charsetnr= charset_for_protocol->number; + client_field->length= server_field.length; + } + else + { + /* With conversion */ + client_field->charsetnr= thd_cs->number; + client_field->length= server_field.max_octet_length(charset_for_protocol, + thd_cs); + } + client_field->type= server_field.type_handler()->type_code_for_protocol(); + client_field->flags= (uint16) server_field.flags; + client_field->decimals= server_field.decimals; + + client_field->db_length= (unsigned int) strlen(client_field->db); + client_field->table_length= (unsigned int) strlen(client_field->table); + client_field->name_length= (unsigned int) strlen(client_field->name); + client_field->org_name_length= (unsigned int) strlen(client_field->org_name); + client_field->org_table_length= (unsigned int) strlen(client_field->org_table); + + client_field->catalog= dup_str_aux(field_alloc, "def", 3, cs, thd_cs); + client_field->catalog_length= 3; + + if (IS_NUM(client_field->type)) + client_field->flags|= NUM_FLAG; + + client_field->max_length= 0; + client_field->def= 0; + return false; +} + + +void Protocol_local::remove_last_row() +{ + MYSQL_DATA *data= cur_data; + MYSQL_ROWS **last_row_hook= &data->data; + my_ulonglong count= data->rows; + DBUG_ENTER("Protocol_text::remove_last_row"); + while (--count) + last_row_hook= &(*last_row_hook)->next; + + *last_row_hook= 0; + data->embedded_info->prev_ptr= last_row_hook; + data->rows--; + + DBUG_VOID_RETURN; +} + + +bool Protocol_local::send_result_set_metadata(List *list, uint flags) +{ + List_iterator_fast it(*list); + Item *item; + DBUG_ENTER("send_result_set_metadata"); + +// if (!thd->mysql) // bootstrap file handling +// DBUG_RETURN(0); + + if (begin_dataset(thd, list->elements)) + goto err; + + for (uint pos= 0 ; (item= it++); pos++) + { + if (store_item_metadata(thd, item, pos)) + goto err; + } + + if (flags & SEND_EOF) + write_eof_packet_local(thd, this, thd->server_status, + thd->get_stmt_da()->current_statement_warn_count()); + + DBUG_RETURN(prepare_for_send(list->elements)); + err: + my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */ + DBUG_RETURN(1); /* purecov: inspected */ +} + + +static void +list_fields_send_default(THD *thd, Protocol_local *p, Field *fld, uint pos) +{ + char buff[80]; + String tmp(buff, sizeof(buff), default_charset_info), *res; + MYSQL_FIELD *client_field= &p->cur_data->embedded_info->fields_list[pos]; + + if (fld->is_null() || !(res= fld->val_str(&tmp))) + { + client_field->def_length= 0; + client_field->def= strmake_root(&p->cur_data->alloc, "", 0); + } + else + { + client_field->def_length= res->length(); + client_field->def= strmake_root(&p->cur_data->alloc, res->ptr(), + client_field->def_length); + } +} + + +bool Protocol_local::send_list_fields(List *list, const TABLE_LIST *table_list) +{ + DBUG_ENTER("send_result_set_metadata"); + Protocol_text prot(thd); + List_iterator_fast it(*list); + Field *fld; + +// if (!thd->mysql) // bootstrap file handling +// DBUG_RETURN(0); + + if (begin_dataset(thd, list->elements)) + goto err; + + for (uint pos= 0 ; (fld= it++); pos++) + { + if (prot.store_field_metadata_for_list_fields(thd, fld, table_list, pos)) + goto err; + list_fields_send_default(thd, this, fld, pos); + } + + DBUG_RETURN(prepare_for_send(list->elements)); +err: + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(1); +} + + +void Protocol_local::prepare_for_resend() +{ + MYSQL_ROWS *cur; + MYSQL_DATA *data= cur_data; + DBUG_ENTER("send_data"); + +// if (!thd->mysql) // bootstrap file handling +// DBUG_VOID_RETURN; + + data->rows++; + if (!(cur= (MYSQL_ROWS *)alloc_root(alloc, sizeof(MYSQL_ROWS)+(field_count + 1) * sizeof(char *)))) + { + my_error(ER_OUT_OF_RESOURCES,MYF(0)); + DBUG_VOID_RETURN; + } + cur->data= (MYSQL_ROW)(((char *)cur) + sizeof(MYSQL_ROWS)); + + *data->embedded_info->prev_ptr= cur; + data->embedded_info->prev_ptr= &cur->next; + next_field=cur->data; + next_mysql_field= data->embedded_info->fields_list; +#ifndef DBUG_OFF + field_pos= 0; +#endif + + DBUG_VOID_RETURN; +} + +bool Protocol_local::store_null() +{ + *(next_field++)= NULL; + ++next_mysql_field; + return false; +} + + +#include +#include + +static void embedded_get_error(MYSQL *mysql, MYSQL_DATA *data) +{ + NET *net= &mysql->net; + struct embedded_query_result *ei= data->embedded_info; + net->last_errno= ei->last_errno; + strmake_buf(net->last_error, ei->info); + memcpy(net->sqlstate, ei->sqlstate, sizeof(net->sqlstate)); + mysql->server_status= ei->server_status; + my_free(data); +} + + +static my_bool loc_read_query_result(MYSQL *mysql) +{ + Protocol_local *p= (Protocol_local *) mysql->thd; + + MYSQL_DATA *res= p->first_data; + DBUG_ASSERT(!p->cur_data); + p->first_data= res->embedded_info->next; + if (res->embedded_info->last_errno && + !res->embedded_info->fields_list) + { + embedded_get_error(mysql, res); + return 1; + } + + mysql->warning_count= res->embedded_info->warning_count; + mysql->server_status= res->embedded_info->server_status; + mysql->field_count= res->fields; + if (!(mysql->fields= res->embedded_info->fields_list)) + { + mysql->affected_rows= res->embedded_info->affected_rows; + mysql->insert_id= res->embedded_info->insert_id; + } + net_clear_error(&mysql->net); + mysql->info= 0; + + if (res->embedded_info->info[0]) + { + strmake(mysql->info_buffer, res->embedded_info->info, MYSQL_ERRMSG_SIZE-1); + mysql->info= mysql->info_buffer; + } + + if (res->embedded_info->fields_list) + { + mysql->status=MYSQL_STATUS_GET_RESULT; + p->cur_data= res; + } + else + my_free(res); + + return 0; +} + + +static my_bool +loc_advanced_command(MYSQL *mysql, enum enum_server_command command, + const uchar *header, ulong header_length, + const uchar *arg, ulong arg_length, my_bool skip_check, + MYSQL_STMT *stmt) +{ + my_bool result= 1; + Protocol_local *p= (Protocol_local *) mysql->thd; + NET *net= &mysql->net; + + if (p->thd && p->thd->killed != NOT_KILLED) + { + if (p->thd->killed < KILL_CONNECTION) + p->thd->killed= NOT_KILLED; + else + return 1; + } + + p->clear_data_list(); + /* Check that we are calling the client functions in right order */ + if (mysql->status != MYSQL_STATUS_READY) + { + set_mysql_error(mysql, CR_COMMANDS_OUT_OF_SYNC, unknown_sqlstate); + goto end; + } + + /* Clear result variables */ + p->thd->clear_error(1); + mysql->affected_rows= ~(my_ulonglong) 0; + mysql->field_count= 0; + net_clear_error(net); + + /* + We have to call free_old_query before we start to fill mysql->fields + for new query. In the case of embedded server we collect field data + during query execution (not during data retrieval as it is in remote + client). So we have to call free_old_query here + */ + free_old_query(mysql); + + if (header) + { + arg= header; + arg_length= header_length; + } + + if (p->new_thd) + { + THD *thd_orig= current_thd; + set_current_thd(p->thd); + p->thd->thread_stack= (char*) &result; + p->thd->set_time(); + result= execute_server_code(p->thd, (const char *)arg, arg_length); + p->thd->cleanup_after_query(); + mysql_audit_release(p->thd); + p->end_statement(); + set_current_thd(thd_orig); + } + else + { + Ed_connection con(p->thd); + Security_context *ctx_orig= p->thd->security_ctx; + ulonglong cap_orig= p->thd->client_capabilities; + MYSQL_LEX_STRING sql_text; + my_bool log_bin_orig; + p->set_binlog_vars(&log_bin_orig); + + DBUG_ASSERT(current_thd == p->thd); + sql_text.str= (char *) arg; + sql_text.length= arg_length; + p->thd->security_ctx= &p->empty_ctx; + p->thd->client_capabilities= p->client_capabilities; + result= con.execute_direct(p, sql_text); + p->thd->client_capabilities= cap_orig; + p->thd->security_ctx= ctx_orig; + p->restore_binlog_vars(log_bin_orig); + } + if (skip_check) + result= 0; + p->cur_data= 0; + +end: + return result; +} + + +/* + reads dataset from the next query result + + SYNOPSIS + loc_read_rows() + mysql connection handle + other parameters are not used + + NOTES + It just gets next MYSQL_DATA from the result's queue + + RETURN + pointer to MYSQL_DATA with the coming recordset +*/ + +static MYSQL_DATA * +loc_read_rows(MYSQL *mysql, MYSQL_FIELD *mysql_fields __attribute__((unused)), + unsigned int fields __attribute__((unused))) +{ + MYSQL_DATA *result= ((Protocol_local *)mysql->thd)->cur_data; + ((Protocol_local *)mysql->thd)->cur_data= 0; + if (result->embedded_info->last_errno) + { + embedded_get_error(mysql, result); + return NULL; + } + *result->embedded_info->prev_ptr= NULL; + return result; +} + + +/************************************************************************** + Get column lengths of the current row + If one uses mysql_use_result, res->lengths contains the length information, + else the lengths are calculated from the offset between pointers. +**************************************************************************/ + +static void loc_fetch_lengths(ulong *to, MYSQL_ROW column, + unsigned int field_count) +{ + MYSQL_ROW end; + + for (end=column + field_count; column != end ; column++,to++) + *to= *column ? *(uint *)((*column) - sizeof(uint)) : 0; +} + + +static void loc_flush_use_result(MYSQL *mysql, my_bool) +{ + Protocol_local *p= (Protocol_local *) mysql->thd; + if (p->cur_data) + { + free_rows(p->cur_data); + p->cur_data= 0; + } + else if (p->first_data) + { + MYSQL_DATA *data= p->first_data; + p->first_data= data->embedded_info->next; + free_rows(data); + } +} + + +static void loc_on_close_free(MYSQL *mysql) +{ + Protocol_local *p= (Protocol_local *) mysql->thd; + THD *thd= p->new_thd; + delete p; + if (thd) + { + delete thd; + local_connection_thread_count--; + } + my_free(mysql->info_buffer); + mysql->info_buffer= 0; +} + +static MYSQL_RES *loc_use_result(MYSQL *mysql) +{ + return mysql_store_result(mysql); +} + +static MYSQL_METHODS local_methods= +{ + loc_read_query_result, /* read_query_result */ + loc_advanced_command, /* advanced_command */ + loc_read_rows, /* read_rows */ + loc_use_result, /* use_result */ + loc_fetch_lengths, /* fetch_lengths */ + loc_flush_use_result, /* flush_use_result */ + NULL, /* read_change_user_result */ + loc_on_close_free /* on_close_free */ +#ifdef EMBEDDED_LIBRARY + ,NULL, /* list_fields */ + NULL, /* read_prepare_result */ + NULL, /* stmt_execute */ + NULL, /* read_binary_rows */ + NULL, /* unbuffered_fetch */ + NULL, /* read_statistics */ + NULL, /* next_result */ + NULL /* read_rows_from_cursor */ +#endif +}; + + +Atomic_counter local_connection_thread_count; + +extern "C" MYSQL *mysql_real_connect_local(MYSQL *mysql) +{ + THD *thd_orig= current_thd; + THD *new_thd; + Protocol_local *p; + ulonglong client_flag; + DBUG_ENTER("mysql_real_connect_local"); + + /* Test whether we're already connected */ + if (mysql->server_version) + { + set_mysql_error(mysql, CR_ALREADY_CONNECTED, unknown_sqlstate); + DBUG_RETURN(0); + } + + mysql->methods= &local_methods; + mysql->user= NULL; + client_flag= mysql->options.client_flag; + client_flag|= CLIENT_MULTI_RESULTS;; + client_flag&= ~(CLIENT_COMPRESS | CLIENT_PLUGIN_AUTH); + + mysql->info_buffer= (char *) my_malloc(PSI_INSTRUMENT_ME, + MYSQL_ERRMSG_SIZE, MYF(0)); + if (!thd_orig || thd_orig->lock) + { + /* + When we start with the empty current_thd (that happens when plugins + are loaded during the server start) or when some tables are locked + with the current_thd already (that happens when INSTALL PLUGIN + calls the plugin_init or with queries), we create the new THD for + the local connection. So queries with this MYSQL will be run with + it rather than the current THD. + */ + + new_thd= new THD(0); + local_connection_thread_count++; + new_thd->thread_stack= (char*) &thd_orig; + new_thd->store_globals(); + new_thd->security_ctx->skip_grants(); + new_thd->query_cache_is_applicable= 0; + new_thd->variables.wsrep_on= 0; + new_thd->variables.sql_log_bin= 0; + new_thd->set_binlog_bit(); + new_thd->client_capabilities= client_flag; + + /* + TOSO: decide if we should turn the auditing off + for such threads. + We can do it like this: + new_thd->audit_class_mask[0]= ~0; + */ + bzero((char*) &new_thd->net, sizeof(new_thd->net)); + set_current_thd(thd_orig); + thd_orig= new_thd; + } + else + new_thd= NULL; + + p= new Protocol_local(thd_orig, new_thd, 0); + if (new_thd) + new_thd->protocol= p; + else + { + p->empty_ctx.init(); + p->empty_ctx.skip_grants(); + p->client_capabilities= client_flag; + } + + mysql->thd= p; + mysql->server_status= SERVER_STATUS_AUTOCOMMIT; + + + DBUG_PRINT("exit",("Mysql handler: %p", mysql)); + DBUG_RETURN(mysql); +} + diff --git a/sql/sql_prepare.h b/sql/sql_prepare.h new file mode 100644 index 00000000..ff6e986e --- /dev/null +++ b/sql/sql_prepare.h @@ -0,0 +1,358 @@ +#ifndef SQL_PREPARE_H +#define SQL_PREPARE_H +/* Copyright (c) 1995-2008 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_error.h" + + +#define LAST_STMT_ID 0xFFFFFFFF +#define STMT_ID_MASK 0x7FFFFFFF + +class THD; +struct LEX; + +/** + An interface that is used to take an action when + the locking module notices that a table version has changed + since the last execution. "Table" here may refer to any kind of + table -- a base table, a temporary table, a view or an + information schema table. + + When we open and lock tables for execution of a prepared + statement, we must verify that they did not change + since statement prepare. If some table did change, the statement + parse tree *may* be no longer valid, e.g. in case it contains + optimizations that depend on table metadata. + + This class provides an interface (a method) that is + invoked when such a situation takes place. + The implementation of the method simply reports an error, but + the exact details depend on the nature of the SQL statement. + + At most 1 instance of this class is active at a time, in which + case THD::m_reprepare_observer is not NULL. + + @sa check_and_update_table_version() for details of the + version tracking algorithm + + @sa Open_tables_state::m_reprepare_observer for the life cycle + of metadata observers. +*/ + +class Reprepare_observer +{ +public: + /** + Check if a change of metadata is OK. In future + the signature of this method may be extended to accept the old + and the new versions, but since currently the check is very + simple, we only need the THD to report an error. + */ + bool report_error(THD *thd); + bool is_invalidated() const { return m_invalidated; } + void reset_reprepare_observer() { m_invalidated= FALSE; } +private: + bool m_invalidated; +}; + + +void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length); +void mysqld_stmt_execute(THD *thd, char *packet, uint packet_length); +void mysqld_stmt_execute_bulk(THD *thd, char *packet, uint packet_length); +void mysqld_stmt_bulk_execute(THD *thd, char *packet, uint packet_length); +void mysqld_stmt_close(THD *thd, char *packet); +void mysql_sql_stmt_prepare(THD *thd); +void mysql_sql_stmt_execute(THD *thd); +void mysql_sql_stmt_execute_immediate(THD *thd); +void mysql_sql_stmt_close(THD *thd); +void mysqld_stmt_fetch(THD *thd, char *packet, uint packet_length); +void mysqld_stmt_reset(THD *thd, char *packet); +void mysql_stmt_get_longdata(THD *thd, char *pos, ulong packet_length); +void reinit_stmt_before_use(THD *thd, LEX *lex); + +my_bool bulk_parameters_iterations(THD *thd); +my_bool bulk_parameters_set(THD *thd); +/** + Execute a fragment of server code in an isolated context, so that + it doesn't leave any effect on THD. THD must have no open tables. + The code must not leave any open tables around. + The result of execution (if any) is stored in Ed_result. +*/ + +class Server_runnable +{ +public: + virtual bool execute_server_code(THD *thd)= 0; + virtual ~Server_runnable(); +}; + + +/** + Execute direct interface. + + @todo Implement support for prelocked mode. +*/ + +class Ed_row; + +/** + Ed_result_set -- a container with result set rows. + @todo Implement support for result set metadata and + automatic type conversion. +*/ + +class Ed_result_set +{ +public: + operator List&() { return *m_rows; } + unsigned int size() const { return m_rows->elements; } + + Ed_result_set(List *rows_arg, size_t column_count, + MEM_ROOT *mem_root_arg); + + /** We don't call member destructors, they all are POD types. */ + ~Ed_result_set() = default; + + size_t get_field_count() const { return m_column_count; } + + static void *operator new(size_t size, MEM_ROOT *mem_root) + { return alloc_root(mem_root, size); } + static void operator delete(void *ptr, size_t size) throw (); + static void operator delete(void *, MEM_ROOT *){} +private: + Ed_result_set(const Ed_result_set &); /* not implemented */ + Ed_result_set &operator=(Ed_result_set &); /* not implemented */ +private: + MEM_ROOT m_mem_root; + size_t m_column_count; + List *m_rows; + Ed_result_set *m_next_rset; + friend class Ed_connection; +}; + + +class Ed_connection +{ +public: + /** + Construct a new "execute direct" connection. + + The connection can be used to execute SQL statements. + If the connection failed to initialize, the error + will be returned on the attempt to execute a statement. + + @pre thd must have no open tables + while the connection is used. However, + Ed_connection works okay in LOCK TABLES mode. + Other properties of THD, such as the current warning + information, errors, etc. do not matter and are + preserved by Ed_connection. One thread may have many + Ed_connections created for it. + */ + Ed_connection(THD *thd); + + /** + Execute one SQL statement. + + Until this method is executed, no other methods of + Ed_connection can be used. Life cycle of Ed_connection is: + + Initialized -> a statement has been executed -> + look at result, move to next result -> + look at result, move to next result -> + ... + moved beyond the last result == Initialized. + + This method can be called repeatedly. Once it's invoked, + results of the previous execution are lost. + + A result of execute_direct() can be either: + + - success, no result set rows. In this case get_field_count() + returns 0. This happens after execution of INSERT, UPDATE, + DELETE, DROP and similar statements. Some other methods, such + as get_affected_rows() can be used to retrieve additional + result information. + + - success, there are some result set rows (maybe 0). E.g. + happens after SELECT. In this case get_field_count() returns + the number of columns in a result set and store_result() + can be used to retrieve a result set.. + + - an error, methods to retrieve error information can + be used. + + @return execution status + @retval FALSE success, use get_field_count() + to determine what to do next. + @retval TRUE error, use get_last_error() + to see the error number. + */ + bool execute_direct(Protocol *p, LEX_STRING sql_text); + + /** + Same as the previous, but takes an instance of Server_runnable + instead of SQL statement text. + + @return execution status + + @retval FALSE success, use get_field_count() + if your code fragment is supposed to + return a result set + @retval TRUE failure + */ + bool execute_direct(Protocol *p, Server_runnable *server_runnable); + + /** + Get the number of affected (deleted, updated) + rows for the current statement. Can be + used for statements with get_field_count() == 0. + + @sa Documentation for C API function + mysql_affected_rows(). + */ + ulonglong get_affected_rows() const + { + return m_diagnostics_area.affected_rows(); + } + + /** + Get the last insert id, if any. + + @sa Documentation for mysql_insert_id(). + */ + ulonglong get_last_insert_id() const + { + return m_diagnostics_area.last_insert_id(); + } + + /** + Get the total number of warnings for the last executed + statement. Note, that there is only one warning list even + if a statement returns multiple results. + + @sa Documentation for C API function + mysql_num_warnings(). + */ + ulong get_warn_count() const + { + return m_diagnostics_area.warn_count(); + } + + /** + The following members are only valid if execute_direct() + or move_to_next_result() returned an error. + They never fail, but if they are called when there is no + result, or no error, the result is not defined. + */ + const char *get_last_error() const { return m_diagnostics_area.message(); } + unsigned int get_last_errno() const { return m_diagnostics_area.sql_errno(); } + const char *get_last_sqlstate() const { return m_diagnostics_area.get_sqlstate(); } + + /** + Provided get_field_count() is not 0, this never fails. You don't + need to free the result set, this is done automatically when + you advance to the next result set or destroy the connection. + Not returning const because of List iterator not accepting + Should be used when you would like Ed_connection to manage + result set memory for you. + */ + Ed_result_set *use_result_set() { return m_current_rset; } + /** + Provided get_field_count() is not 0, this never fails. You + must free the returned result set. This can be called only + once after execute_direct(). + Should be used when you would like to get the results + and destroy the connection. + */ + Ed_result_set *store_result_set(); + + /** + If the query returns multiple results, this method + can be checked if there is another result beyond the next + one. + Never fails. + */ + bool has_next_result() const { return MY_TEST(m_current_rset->m_next_rset); } + /** + Only valid to call if has_next_result() returned true. + Otherwise the result is undefined. + */ + bool move_to_next_result() + { + m_current_rset= m_current_rset->m_next_rset; + return MY_TEST(m_current_rset); + } + + ~Ed_connection() { free_old_result(); } +private: + Diagnostics_area m_diagnostics_area; + /** + Execute direct interface does not support multi-statements, only + multi-results. So we never have a situation when we have + a mix of result sets and OK or error packets. We either + have a single result set, a single error, or a single OK, + or we have a series of result sets, followed by an OK or error. + */ + THD *m_thd; + Ed_result_set *m_rsets; + Ed_result_set *m_current_rset; +private: + void free_old_result(); + void add_result_set(Ed_result_set *ed_result_set); +private: + Ed_connection(const Ed_connection &); /* not implemented */ + Ed_connection &operator=(Ed_connection &); /* not implemented */ +}; + + +/** One result set column. */ + +struct Ed_column: public LEX_STRING +{ + /** Implementation note: destructor for this class is never called. */ +}; + + +/** One result set record. */ + +class Ed_row: public Sql_alloc +{ +public: + const Ed_column &operator[](const unsigned int column_index) const + { + return *get_column(column_index); + } + const Ed_column *get_column(const unsigned int column_index) const + { + DBUG_ASSERT(column_index < size()); + return m_column_array + column_index; + } + size_t size() const { return m_column_count; } + + Ed_row(Ed_column *column_array_arg, size_t column_count_arg) + :m_column_array(column_array_arg), + m_column_count(column_count_arg) + {} +private: + Ed_column *m_column_array; + size_t m_column_count; /* TODO: change to point to metadata */ +}; + +extern Atomic_counter local_connection_thread_count; + +#endif // SQL_PREPARE_H diff --git a/sql/sql_priv.h b/sql/sql_priv.h new file mode 100644 index 00000000..99e1d65d --- /dev/null +++ b/sql/sql_priv.h @@ -0,0 +1,434 @@ +/* Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2010, 2019, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @details + Mostly this file is used in the server. But a little part of it is used in + mysqlbinlog too (definition of SELECT_DISTINCT and others). + The consequence is that 90% of the file is wrapped in \#ifndef MYSQL_CLIENT, + except the part which must be in the server and in the client. +*/ + +#ifndef SQL_PRIV_INCLUDED +#define SQL_PRIV_INCLUDED + +#ifndef MYSQL_CLIENT + +/* + Generates a warning that a feature is deprecated. After a specified + version asserts that the feature is removed. + + Using it as + + WARN_DEPRECATED(thd, 6,2, "BAD", "'GOOD'"); + + Will result in a warning + + "The syntax 'BAD' is deprecated and will be removed in MySQL 6.2. Please + use 'GOOD' instead" + + Note that in macro arguments BAD is not quoted, while 'GOOD' is. + Note that the version is TWO numbers, separated with a comma + (two macro arguments, that is) +*/ +#define WARN_DEPRECATED(Thd,VerHi,VerLo,Old,New) \ + do { \ + compile_time_assert(MYSQL_VERSION_ID < VerHi * 10000 + VerLo * 100); \ + if (((THD *) Thd) != NULL) \ + push_warning_printf(((THD *) Thd), Sql_condition::WARN_LEVEL_WARN, \ + ER_WARN_DEPRECATED_SYNTAX, \ + ER_THD(((THD *) Thd), ER_WARN_DEPRECATED_SYNTAX), \ + (Old), (New)); \ + else \ + sql_print_warning("The syntax '%s' is deprecated and will be removed " \ + "in a future release. Please use %s instead.", \ + (Old), (New)); \ + } while(0) + + +/* + Generates a warning that a feature is deprecated and there is no replacement. + + Using it as + + WARN_DEPRECATED_NO_REPLACEMENT(thd, "BAD"); + + Will result in a warning + + "'BAD' is deprecated and will be removed in a future release." + + Note that in macro arguments BAD is not quoted. +*/ + +#define WARN_DEPRECATED_NO_REPLACEMENT(Thd,Old) \ + do { \ + THD *thd_= ((THD*) Thd); \ + if (thd_ != NULL) \ + push_warning_printf(thd_, Sql_condition::WARN_LEVEL_WARN, \ + ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT, \ + ER_THD(thd_, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT), \ + (Old)); \ + else \ + sql_print_warning("'%s' is deprecated and will be removed " \ + "in a future release.", (Old)); \ + } while(0) + +/*************************************************************************/ + +#endif + +/* + This is included in the server and in the client. + Options for select set by the yacc parser (stored in lex->options). + + NOTE + log_event.h defines OPTIONS_WRITTEN_TO_BIN_LOG to specify what THD + options list are written into binlog. These options can NOT change their + values, or it will break replication between version. + + context is encoded as following: + SELECT - SELECT_LEX_NODE::options + THD - THD::options + intern - neither. used only as + func(..., select_node->options | thd->options | OPTION_XXX, ...) + + TODO: separate three contexts above, move them to separate bitfields. +*/ + +#define SELECT_DISTINCT (1ULL << 0) // SELECT, user +#define SELECT_STRAIGHT_JOIN (1ULL << 1) // SELECT, user +#define SELECT_DESCRIBE (1ULL << 2) // SELECT, user +#define SELECT_SMALL_RESULT (1ULL << 3) // SELECT, user +#define SELECT_BIG_RESULT (1ULL << 4) // SELECT, user +#define OPTION_FOUND_ROWS (1ULL << 5) // SELECT, user +#define OPTION_TO_QUERY_CACHE (1ULL << 6) // SELECT, user +#define SELECT_NO_JOIN_CACHE (1ULL << 7) // intern +/** always the opposite of OPTION_NOT_AUTOCOMMIT except when in fix_autocommit() */ +#define OPTION_AUTOCOMMIT (1ULL << 8) // THD, user +#define OPTION_BIG_SELECTS (1ULL << 9) // THD, user +#define OPTION_LOG_OFF (1ULL << 10) // THD, user +#define OPTION_QUOTE_SHOW_CREATE (1ULL << 11) // THD, user +#define TMP_TABLE_ALL_COLUMNS (1ULL << 12) // SELECT, intern +#define OPTION_WARNINGS (1ULL << 13) // THD, user +#define OPTION_AUTO_IS_NULL (1ULL << 14) // THD, user, binlog +#define OPTION_NO_CHECK_CONSTRAINT_CHECKS (1ULL << 15) +#define OPTION_SAFE_UPDATES (1ULL << 16) // THD, user +#define OPTION_BUFFER_RESULT (1ULL << 17) // SELECT, user +#define OPTION_BIN_LOG (1ULL << 18) // THD, user +#define OPTION_NOT_AUTOCOMMIT (1ULL << 19) // THD, user +#define OPTION_BEGIN (1ULL << 20) // THD, intern +#define OPTION_TABLE_LOCK (1ULL << 21) // THD, intern +#define OPTION_QUICK (1ULL << 22) // SELECT (for DELETE) +#define OPTION_BINLOG_THIS_TRX (1ULL << 23) // THD + +#define OPTION_EXPLICIT_DEF_TIMESTAMP (1ULL << 24) // THD, user +#define OPTION_GTID_BEGIN (1ULL << 25) // GTID BEGIN found in log + +/** The following can be set when importing tables in a 'wrong order' + to suppress foreign key checks */ +#define OPTION_NO_FOREIGN_KEY_CHECKS (1ULL << 26) // THD, user, binlog +/** The following speeds up inserts to InnoDB tables by suppressing unique + key checks in some cases */ +#define OPTION_RELAXED_UNIQUE_CHECKS (1ULL << 27) // THD, user, binlog +#define OPTION_IF_EXISTS (1ULL << 28) // binlog +#define OPTION_SCHEMA_TABLE (1ULL << 29) // SELECT, intern +#define OPTION_INSERT_HISTORY (1ULL << 30) +/** If not set then the thread will ignore all warnings with level notes. */ +#define OPTION_SQL_NOTES (1ULL << 31) // THD, user +/** + Force the used temporary table to be a MyISAM table (because we will use + fulltext functions when reading from it. +*/ +#define TMP_TABLE_FORCE_MYISAM (1ULL << 32) +#define OPTION_PROFILING (1ULL << 33) +/** + Indicates that this is a HIGH_PRIORITY SELECT. + Currently used only for printing of such selects. + Type of locks to be acquired is specified directly. +*/ +#define SELECT_HIGH_PRIORITY (1ULL << 34) // SELECT, user +/** + Is set in slave SQL thread when there was an + error on master, which, when is not reproducible + on slave (i.e. the query succeeds on slave), + is not terminal to the state of repliation, + and should be ignored. The slave SQL thread, + however, needs to rollback the effects of the + succeeded statement to keep replication consistent. +*/ +#define OPTION_MASTER_SQL_ERROR (1ULL << 35) + +#define OPTION_BINLOG_THIS_STMT (1ULL << 36) // THD +#define OPTION_BINLOG_THIS (OPTION_BINLOG_THIS_STMT | OPTION_BINLOG_THIS_TRX) + +#define OPTION_SKIP_REPLICATION (1ULL << 37) // THD, user +#define OPTION_RPL_SKIP_PARALLEL (1ULL << 38) +#define OPTION_NO_QUERY_CACHE (1ULL << 39) // SELECT, user +#define OPTION_PROCEDURE_CLAUSE (1ULL << 40) // Internal usage +#define SELECT_NO_UNLOCK (1ULL << 41) // SELECT, intern +#define OPTION_BIN_TMP_LOG_OFF (1ULL << 42) // disable binlog, intern +/* Disable commit of binlog. Used to combine many DDL's and DML's as one */ +#define OPTION_BIN_COMMIT_OFF (1ULL << 43) +/* The following is used to detect a conflict with DISTINCT */ +#define SELECT_ALL (1ULL << 44) // SELECT, user, parser +/** Flag set if setup_tables already done */ +#define OPTION_SETUP_TABLES_DONE (1ULL << 45) // intern + +#define OPTION_LEX_FOUND_COMMENT (1ULL << 0) // intern, parser + +/* The rest of the file is included in the server only */ +#ifndef MYSQL_CLIENT + +/* @@optimizer_switch flags. These must be in sync with optimizer_switch_names */ +#define OPTIMIZER_SWITCH_INDEX_MERGE (1ULL << 0) +#define OPTIMIZER_SWITCH_INDEX_MERGE_UNION (1ULL << 1) +#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION (1ULL << 2) +#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT (1ULL << 3) +#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT (1ULL << 4) +#define deprecated_ENGINE_CONDITION_PUSHDOWN (1ULL << 5) +#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN (1ULL << 6) +#define OPTIMIZER_SWITCH_DERIVED_MERGE (1ULL << 7) +#define OPTIMIZER_SWITCH_DERIVED_WITH_KEYS (1ULL << 8) +#define OPTIMIZER_SWITCH_FIRSTMATCH (1ULL << 9) +#define OPTIMIZER_SWITCH_LOOSE_SCAN (1ULL << 10) +#define OPTIMIZER_SWITCH_MATERIALIZATION (1ULL << 11) +#define OPTIMIZER_SWITCH_IN_TO_EXISTS (1ULL << 12) +#define OPTIMIZER_SWITCH_SEMIJOIN (1ULL << 13) +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE (1ULL << 14) +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1ULL << 15) +#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1ULL << 16) +/** If this is off, MRR is never used. */ +#define OPTIMIZER_SWITCH_MRR (1ULL << 17) +/** + If OPTIMIZER_SWITCH_MRR is on and this is on, MRR is used depending on a + cost-based choice ("automatic"). If OPTIMIZER_SWITCH_MRR is on and this is + off, MRR is "forced" (i.e. used as long as the storage engine is capable of + doing it). +*/ +#define OPTIMIZER_SWITCH_MRR_COST_BASED (1ULL << 18) +#define OPTIMIZER_SWITCH_MRR_SORT_KEYS (1ULL << 19) +#define OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE (1ULL << 20) +#define OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE (1ULL << 21) +#define OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL (1ULL << 22) +#define OPTIMIZER_SWITCH_JOIN_CACHE_HASHED (1ULL << 23) +#define OPTIMIZER_SWITCH_JOIN_CACHE_BKA (1ULL << 24) +#define OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE (1ULL << 25) +#define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1ULL << 26) +#define OPTIMIZER_SWITCH_EXTENDED_KEYS (1ULL << 27) +#define OPTIMIZER_SWITCH_EXISTS_TO_IN (1ULL << 28) +#define OPTIMIZER_SWITCH_ORDERBY_EQ_PROP (1ULL << 29) +#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED (1ULL << 30) +#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED (1ULL << 31) +#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY (1ULL << 32) +#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33) +#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34) +#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35) +#define OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY (1ULL << 36) +#define OPTIMIZER_SWITCH_CSET_NARROWING (1ULL << 37) + +#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ + OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ + OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ + OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \ + OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \ + OPTIMIZER_SWITCH_DERIVED_MERGE | \ + OPTIMIZER_SWITCH_DERIVED_WITH_KEYS | \ + OPTIMIZER_SWITCH_TABLE_ELIMINATION | \ + OPTIMIZER_SWITCH_EXTENDED_KEYS | \ + OPTIMIZER_SWITCH_IN_TO_EXISTS | \ + OPTIMIZER_SWITCH_MATERIALIZATION | \ + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ + OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE | \ + OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE | \ + OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL | \ + OPTIMIZER_SWITCH_JOIN_CACHE_HASHED | \ + OPTIMIZER_SWITCH_JOIN_CACHE_BKA | \ + OPTIMIZER_SWITCH_SUBQUERY_CACHE | \ + OPTIMIZER_SWITCH_SEMIJOIN | \ + OPTIMIZER_SWITCH_FIRSTMATCH | \ + OPTIMIZER_SWITCH_LOOSE_SCAN | \ + OPTIMIZER_SWITCH_EXISTS_TO_IN | \ + OPTIMIZER_SWITCH_ORDERBY_EQ_PROP | \ + OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED | \ + OPTIMIZER_SWITCH_SPLIT_MATERIALIZED | \ + OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY | \ + OPTIMIZER_SWITCH_USE_ROWID_FILTER | \ + OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING | \ + OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE) + +/* + Replication uses 8 bytes to store SQL_MODE in the binary log. The day you + use strictly more than 64 bits by adding one more define above, you should + contact the replication team because the replication code should then be + updated (to store more bytes on disk). + + NOTE: When adding new SQL_MODE types, make sure to also add them to + the scripts used for creating the MySQL system tables + in scripts/mysql_system_tables.sql and scripts/mysql_system_tables_fix.sql + +*/ + +/* + Flags below are set when we perform + context analysis of the statement and make + subqueries non-const. It prevents subquery + evaluation at context analysis stage. +*/ + +/* + Don't evaluate this subquery during statement prepare even if + it's a constant one. The flag is switched off in the end of + mysqld_stmt_prepare. +*/ +#define CONTEXT_ANALYSIS_ONLY_PREPARE 1 +/* + Special JOIN::prepare mode: changing of query is prohibited. + When creating a view, we need to just check its syntax omitting + any optimizations: afterwards definition of the view will be + reconstructed by means of ::print() methods and written to + to an .frm file. We need this definition to stay untouched. +*/ +#define CONTEXT_ANALYSIS_ONLY_VIEW 2 +/* + Don't evaluate this subquery during derived table prepare even if + it's a constant one. +*/ +#define CONTEXT_ANALYSIS_ONLY_DERIVED 4 +/* + Don't evaluate constant sub-expressions of virtual column + expressions when opening tables +*/ +#define CONTEXT_ANALYSIS_ONLY_VCOL_EXPR 8 + + +/* + Uncachable causes: +*/ +/* This subquery has fields from outer query (put by user) */ +#define UNCACHEABLE_DEPENDENT_GENERATED 1 +/* This subquery contains functions with random result */ +#define UNCACHEABLE_RAND 2 +/* This subquery contains functions with side effect */ +#define UNCACHEABLE_SIDEEFFECT 4 +/* Forcing to save JOIN tables for explain */ +#define UNCACHEABLE_EXPLAIN 8 +/* For uncorrelated SELECT in an UNION with some correlated SELECTs */ +#define UNCACHEABLE_UNITED 16 +#define UNCACHEABLE_CHECKOPTION 32 +/* + This subquery has fields from outer query injected during + transformation process +*/ +#define UNCACHEABLE_DEPENDENT_INJECTED 64 +/* This subquery has fields from outer query (any nature) */ +#define UNCACHEABLE_DEPENDENT (UNCACHEABLE_DEPENDENT_GENERATED | \ + UNCACHEABLE_DEPENDENT_INJECTED) + +#define FAKE_SELECT_LEX_ID UINT_MAX + +/* Used to check GROUP BY list in the MODE_ONLY_FULL_GROUP_BY mode */ +#define UNDEF_POS (-1) + +#define IN_SUBQUERY_CONVERSION_THRESHOLD 1000 + +#endif /* !MYSQL_CLIENT */ + +/* BINLOG_DUMP options */ + +#define BINLOG_DUMP_NON_BLOCK 1 +#define BINLOG_SEND_ANNOTATE_ROWS_EVENT 2 + +#ifndef MYSQL_CLIENT + +/* + Field::is_equal() return codes. +*/ +#define IS_EQUAL_NO 0 +#define IS_EQUAL_YES 1 +/** + new_field has compatible packed representation with old type, + so it is theoretically possible to perform change by only updating + data dictionary without changing table rows +*/ +#define IS_EQUAL_PACK_LENGTH 2 + +enum enum_parsing_place +{ + NO_MATTER, + IN_HAVING, + SELECT_LIST, + IN_WHERE, + IN_ON, + IN_GROUP_BY, + IN_ORDER_BY, + IN_UPDATE_ON_DUP_KEY, + IN_PART_FUNC, + BEFORE_OPT_LIST, + AFTER_LIST, + FOR_LOOP_BOUND, + IN_RETURNING, + PARSING_PLACE_SIZE /* always should be the last */ +}; + + +class sys_var; + +enum enum_yes_no_unknown +{ + TVL_YES, TVL_NO, TVL_UNKNOWN +}; + +#ifdef MYSQL_SERVER + +/* + External variables +*/ + + +/* yy_*.cc */ +#ifndef DBUG_OFF +extern void turn_parser_debug_on_MYSQLparse(); +extern void turn_parser_debug_on_ORAparse(); + +#endif + +/** + convert a hex digit into number. +*/ + +inline int hexchar_to_int(char c) +{ + if (c <= '9' && c >= '0') + return c-'0'; + c|=32; + if (c <= 'f' && c >= 'a') + return c-'a'+10; + return -1; +} + +/* This must match the path length limit in the ER_NOT_RW_DIR error msg. */ +#define ER_NOT_RW_DIR_PATHSIZE 200 + +bool db_name_is_in_ignore_db_dirs_list(const char *dbase); + +#endif /* MYSQL_SERVER */ + +#endif /* MYSQL_CLIENT */ + +#endif /* SQL_PRIV_INCLUDED */ diff --git a/sql/sql_profile.cc b/sql/sql_profile.cc new file mode 100644 index 00000000..f576e693 --- /dev/null +++ b/sql/sql_profile.cc @@ -0,0 +1,691 @@ +/* Copyright (c) 2007, 2012, Oracle and/or its affiliates. + Copyright (c) 2008, 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/** + @file + + Implement query profiling as as list of metaphorical fences, with one fence + per query, and each fencepost a change of thd->proc_info state (with a + snapshot of system statistics). When asked, we can then iterate over the + fenceposts and calculate the distance between them, to inform the user what + happened during a particular query or thd->proc_info state. + + User variables that inform profiling behavior: + - "profiling", boolean, session only, "Are queries profiled?" + - "profiling_history_size", integer, session + global, "Num queries stored?" +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_profile.h" +#include "sql_i_s.h" // schema_table_store_record +#include "sql_class.h" // THD + +#ifdef _WIN32 +#pragma comment(lib,"psapi.lib") +#endif + +#define TIME_FLOAT_DIGITS 9 +/** two vals encoded: (len*100)+dec */ +#define TIME_I_S_DECIMAL_SIZE (TIME_FLOAT_DIGITS*100)+(TIME_FLOAT_DIGITS-3) + +#define MAX_QUERY_LENGTH 300 +#define MAX_QUERY_HISTORY 101 + +/** + Connects Information_Schema and Profiling. +*/ +int fill_query_profile_statistics_info(THD *thd, TABLE_LIST *tables, + Item *cond) +{ +#if defined(ENABLED_PROFILING) + return(thd->profiling.fill_statistics_info(thd, tables, cond)); +#else + my_error(ER_FEATURE_DISABLED, MYF(0), "SHOW PROFILE", "enable-profiling"); + return(1); +#endif +} + +namespace Show { + +ST_FIELD_INFO query_profile_statistics_info[]= +{ + Column("QUERY_ID", SLong(20), NOT_NULL, "Query_id"), + Column("SEQ", SLong(20), NOT_NULL, "Seq"), + Column("STATE", Varchar(30), NOT_NULL, "Status"), + Column("DURATION", Decimal(TIME_I_S_DECIMAL_SIZE), NOT_NULL, "Duration"), + Column("CPU_USER", Decimal(TIME_I_S_DECIMAL_SIZE), NULLABLE, "CPU_user"), + Column("CPU_SYSTEM", Decimal(TIME_I_S_DECIMAL_SIZE), NULLABLE, "CPU_system"), + Column("CONTEXT_VOLUNTARY", SLong(20), NULLABLE, "Context_voluntary"), + Column("CONTEXT_INVOLUNTARY", SLong(20), NULLABLE, "Context_involuntary"), + Column("BLOCK_OPS_IN", SLong(20), NULLABLE, "Block_ops_in"), + Column("BLOCK_OPS_OUT", SLong(20), NULLABLE, "Block_ops_out"), + Column("MESSAGES_SENT", SLong(20), NULLABLE, "Messages_sent"), + Column("MESSAGES_RECEIVED", SLong(20), NULLABLE, "Messages_received"), + Column("PAGE_FAULTS_MAJOR", SLong(20), NULLABLE, "Page_faults_major"), + Column("PAGE_FAULTS_MINOR", SLong(20), NULLABLE, "Page_faults_minor"), + Column("SWAPS", SLong(20), NULLABLE, "Swaps"), + Column("SOURCE_FUNCTION", Varchar(30), NULLABLE, "Source_function"), + Column("SOURCE_FILE", Varchar(20), NULLABLE, "Source_file"), + Column("SOURCE_LINE", SLong(20), NULLABLE, "Source_line"), + CEnd() +}; + +} // namespace Show + +int make_profile_table_for_show(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + uint profile_options = thd->lex->profile_options; + uint fields_include_condition_truth_values[]= { + FALSE, /* Query_id */ + FALSE, /* Seq */ + TRUE, /* Status */ + TRUE, /* Duration */ + profile_options & PROFILE_CPU, /* CPU_user */ + profile_options & PROFILE_CPU, /* CPU_system */ + profile_options & PROFILE_CONTEXT, /* Context_voluntary */ + profile_options & PROFILE_CONTEXT, /* Context_involuntary */ + profile_options & PROFILE_BLOCK_IO, /* Block_ops_in */ + profile_options & PROFILE_BLOCK_IO, /* Block_ops_out */ + profile_options & PROFILE_IPC, /* Messages_sent */ + profile_options & PROFILE_IPC, /* Messages_received */ + profile_options & PROFILE_PAGE_FAULTS, /* Page_faults_major */ + profile_options & PROFILE_PAGE_FAULTS, /* Page_faults_minor */ + profile_options & PROFILE_SWAPS, /* Swaps */ + profile_options & PROFILE_SOURCE, /* Source_function */ + profile_options & PROFILE_SOURCE, /* Source_file */ + profile_options & PROFILE_SOURCE, /* Source_line */ + }; + + ST_FIELD_INFO *field_info; + Name_resolution_context *context= &thd->lex->first_select_lex()->context; + int i; + + for (i= 0; !schema_table->fields_info[i].end_marker(); i++) + { + if (! fields_include_condition_truth_values[i]) + continue; + + field_info= &schema_table->fields_info[i]; + Item_field *field= new (thd->mem_root) Item_field(thd, context, + field_info->name()); + if (field) + { + field->set_name(thd, field_info->old_name()); + if (add_item_to_list(thd, field)) + return 1; + } + } + return 0; +} + + +#if defined(ENABLED_PROFILING) + +#define RUSAGE_USEC(tv) ((tv).tv_sec*1000*1000 + (tv).tv_usec) +#define RUSAGE_DIFF_USEC(tv1, tv2) (RUSAGE_USEC((tv1))-RUSAGE_USEC((tv2))) + +#ifdef _WIN32 +static ULONGLONG FileTimeToQuadWord(FILETIME *ft) +{ + // Overlay FILETIME onto a ULONGLONG. + union { + ULONGLONG qwTime; + FILETIME ft; + } u; + + u.ft = *ft; + return u.qwTime; +} + + +// Get time difference between to FILETIME objects in seconds. +static double GetTimeDiffInSeconds(FILETIME *a, FILETIME *b) +{ + return ((FileTimeToQuadWord(a) - FileTimeToQuadWord(b)) / 1e7); +} +#endif + +PROF_MEASUREMENT::PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char + *status_arg) + :profile(profile_arg) +{ + collect(); + set_label(status_arg, NULL, NULL, 0); +} + +PROF_MEASUREMENT::PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, + const char *status_arg, + const char *function_arg, + const char *file_arg, + unsigned int line_arg) + :profile(profile_arg) +{ + collect(); + set_label(status_arg, function_arg, file_arg, line_arg); +} + +PROF_MEASUREMENT::~PROF_MEASUREMENT() +{ + my_free(allocated_status_memory); + status= function= file= NULL; +} + +void PROF_MEASUREMENT::set_label(const char *status_arg, + const char *function_arg, + const char *file_arg, unsigned int line_arg) +{ + size_t sizes[3]; /* 3 == status+function+file */ + char *cursor; + + /* + Compute all the space we'll need to allocate one block for everything + we'll need, instead of N mallocs. + */ + sizes[0]= (status_arg == NULL) ? 0 : strlen(status_arg) + 1; + sizes[1]= (function_arg == NULL) ? 0 : strlen(function_arg) + 1; + sizes[2]= (file_arg == NULL) ? 0 : strlen(file_arg) + 1; + + allocated_status_memory= (char *) my_malloc(key_memory_PROFILE, sizes[0] + + sizes[1] + sizes[2], MYF(0)); + DBUG_ASSERT(allocated_status_memory != NULL); + + cursor= allocated_status_memory; + + if (status_arg != NULL) + { + strcpy(cursor, status_arg); + status= cursor; + cursor+= sizes[0]; + } + else + status= NULL; + + if (function_arg != NULL) + { + strcpy(cursor, function_arg); + function= cursor; + cursor+= sizes[1]; + } + else + function= NULL; + + if (file_arg != NULL) + { + strcpy(cursor, file_arg); + file= cursor; + cursor+= sizes[2]; + } + else + file= NULL; + + line= line_arg; +} + +/** + This updates the statistics for this moment of time. It captures the state + of the running system, so later we can compare points in time and infer what + happened in the mean time. It should only be called immediately upon + instantiation of this PROF_MEASUREMENT. + + @todo Implement resource capture for OSes not like BSD. +*/ +void PROF_MEASUREMENT::collect() +{ + time_usecs= my_interval_timer() / 1e3; /* ns to us */ +#ifdef HAVE_GETRUSAGE + getrusage(RUSAGE_SELF, &rusage); +#elif defined(_WIN32) + FILETIME ftDummy; + // NOTE: Get{Process|Thread}Times has a granularity of the clock interval, + // which is typically ~15ms. So intervals shorter than that will not be + // measurable by this function. + GetProcessTimes(GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser); + GetProcessIoCounters(GetCurrentProcess(), &io_count); + GetProcessMemoryInfo(GetCurrentProcess(), &mem_count, sizeof(mem_count)); +#endif +} + + +QUERY_PROFILE::QUERY_PROFILE(PROFILING *profiling_arg, const char *status_arg) + :profiling(profiling_arg), profiling_query_id(0), query_source(NULL) +{ + m_seq_counter= 1; + PROF_MEASUREMENT *prof= new PROF_MEASUREMENT(this, status_arg); + prof->m_seq= m_seq_counter++; + m_start_time_usecs= prof->time_usecs; + m_end_time_usecs= m_start_time_usecs; + entries.push_back(prof); +} + +QUERY_PROFILE::~QUERY_PROFILE() +{ + while (! entries.is_empty()) + delete entries.pop(); + + my_free(query_source); +} + +/** + @todo Provide a way to include the full text, as in SHOW PROCESSLIST. +*/ +void QUERY_PROFILE::set_query_source(char *query_source_arg, size_t query_length_arg) +{ + /* Truncate to avoid DoS attacks. */ + size_t length= MY_MIN(MAX_QUERY_LENGTH, query_length_arg); + + DBUG_ASSERT(query_source == NULL); /* we don't leak memory */ + if (query_source_arg != NULL) + query_source= my_strndup(key_memory_PROFILE, query_source_arg, length, MYF(0)); +} + +void QUERY_PROFILE::new_status(const char *status_arg, + const char *function_arg, const char *file_arg, + unsigned int line_arg) +{ + PROF_MEASUREMENT *prof; + DBUG_ENTER("QUERY_PROFILE::status"); + + if (!status_arg) + DBUG_VOID_RETURN; + + if ((function_arg != NULL) && (file_arg != NULL)) + prof= new PROF_MEASUREMENT(this, status_arg, function_arg, base_name(file_arg), line_arg); + else + prof= new PROF_MEASUREMENT(this, status_arg); + + prof->m_seq= m_seq_counter++; + m_end_time_usecs= prof->time_usecs; + entries.push_back(prof); + + /* Maintain the query history size. */ + while (entries.elements > MAX_QUERY_HISTORY) + delete entries.pop(); + + DBUG_VOID_RETURN; +} + + + +PROFILING::PROFILING() + :profile_id_counter(1), current(NULL), last(NULL) +{ +} + +PROFILING::~PROFILING() +{ + restart(); +} + +/* + Restart profiling from scratch +*/ + +void PROFILING::restart() +{ + while (! history.is_empty()) + delete history.pop(); + + if (current != NULL) + delete current; + /* Ensure that profiling object can be reused */ + profile_id_counter= 1; + current= NULL; + last= NULL; +} + + +/** + Throw away the current profile, because it's useless or unwanted + or corrupted. +*/ +void PROFILING::discard_current_query() +{ + DBUG_ENTER("PROFILING::discard_current_profile"); + + delete current; + current= NULL; + + DBUG_VOID_RETURN; +} + +/** + Try to save the current profile entry, clean up the data if it shouldn't be + saved, and maintain the profile history size. Naturally, this may not + succeed if the profile was previously discarded, and that's expected. +*/ +void PROFILING::finish_current_query_impl() +{ + DBUG_ENTER("PROFILING::finish_current_profile"); + DBUG_ASSERT(current); + + /* The last fence-post, so we can support the span before this. */ + status_change("ending", NULL, NULL, 0); + + if (enabled && /* ON at end? */ + (current->query_source != NULL) && + (! current->entries.is_empty())) + { + current->profiling_query_id= next_profile_id(); /* assign an id */ + + history.push_back(current); + last= current; /* never contains something that is not in the history. */ + + /* Maintain the history size. */ + while (history.elements > thd->variables.profiling_history_size) + delete history.pop(); + } + else + delete current; + + current= NULL; + DBUG_VOID_RETURN; +} + +bool PROFILING::show_profiles() +{ + QUERY_PROFILE *prof; + List field_list; + MEM_ROOT *mem_root= thd->mem_root; + SELECT_LEX *sel= thd->lex->first_select_lex(); + SELECT_LEX_UNIT *unit= &thd->lex->unit; + ha_rows idx; + Protocol *protocol= thd->protocol; + void *iterator; + DBUG_ENTER("PROFILING::show_profiles"); + + field_list.push_back(new (mem_root) + Item_return_int(thd, "Query_ID", 10, + MYSQL_TYPE_LONG), + mem_root); + field_list.push_back(new (mem_root) + Item_return_int(thd, "Duration", + TIME_FLOAT_DIGITS - 1, + MYSQL_TYPE_DOUBLE), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Query", 40), + mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + unit->set_limit(sel); + + for (iterator= history.new_iterator(), idx= 1; + iterator != NULL; + iterator= history.iterator_next(iterator), idx++) + { + prof= history.iterator_value(iterator); + + double query_time_usecs= prof->m_end_time_usecs - prof->m_start_time_usecs; + + if (unit->lim.check_offset(idx)) + continue; + if (idx > unit->lim.get_select_limit()) + break; + + protocol->prepare_for_resend(); + protocol->store((uint32)(prof->profiling_query_id)); + protocol->store_double(query_time_usecs/(1000.0*1000), + (uint32) TIME_FLOAT_DIGITS-1); + if (prof->query_source != NULL) + protocol->store(prof->query_source, strlen(prof->query_source), + system_charset_info); + else + protocol->store_null(); + + if (protocol->write()) + DBUG_RETURN(TRUE); + } + my_eof(thd); + DBUG_RETURN(FALSE); +} + +/** + Fill the information schema table, "query_profile", as defined in show.cc . + There are two ways to get to this function: Selecting from the information + schema, and a SHOW command. +*/ +int PROFILING::fill_statistics_info(THD *thd_arg, TABLE_LIST *tables, Item *cond) +{ + DBUG_ENTER("PROFILING::fill_statistics_info"); + TABLE *table= tables->table; + ulonglong row_number= 0; + + QUERY_PROFILE *query; + /* Go through each query in this thread's stored history... */ + void *history_iterator; + for (history_iterator= history.new_iterator(); + history_iterator != NULL; + history_iterator= history.iterator_next(history_iterator)) + { + query= history.iterator_value(history_iterator); + + /* + Because we put all profiling info into a table that may be reordered, let + us also include a numbering of each state per query. The query_id and + the "seq" together are unique. + */ + ulong seq; + + void *entry_iterator; + PROF_MEASUREMENT *entry, *previous= NULL; + /* ...and for each query, go through all its state-change steps. */ + for (entry_iterator= query->entries.new_iterator(); + entry_iterator != NULL; + entry_iterator= query->entries.iterator_next(entry_iterator), + previous=entry, row_number++) + { + entry= query->entries.iterator_value(entry_iterator); + seq= entry->m_seq; + + /* Skip the first. We count spans of fence, not fence-posts. */ + if (previous == NULL) continue; + + if (thd_arg->lex->sql_command == SQLCOM_SHOW_PROFILE) + { + /* + We got here via a SHOW command. That means that we stored + information about the query we wish to show and that isn't + in a WHERE clause at a higher level to filter out rows we + wish to exclude. + + Because that functionality isn't available in the server yet, + we must filter here, at the wrong level. Once one can con- + struct where and having conditions at the SQL layer, then this + condition should be ripped out. + */ + if (thd_arg->lex->profile_query_id == 0) /* 0 == show final query */ + { + if (query != last) + continue; + } + else + { + if (thd_arg->lex->profile_query_id != query->profiling_query_id) + continue; + } + } + + /* Set default values for this row. */ + restore_record(table, s->default_values); + + /* + The order of these fields is set by the query_profile_statistics_info + array. + */ + table->field[0]->store((ulonglong) query->profiling_query_id, TRUE); + table->field[1]->store((ulonglong) seq, TRUE); /* the step in the sequence */ + /* + This entry, n, has a point in time, T(n), and a status phrase, S(n). + The status phrase S(n) describes the period of time that begins at + T(n). The previous status phrase S(n-1) describes the period of time + that starts at T(n-1) and ends at T(n). Since we want to describe the + time that a status phrase took T(n)-T(n-1), this line must describe the + previous status. + */ + table->field[2]->store(previous->status, strlen(previous->status), + system_charset_info); + + my_decimal duration_decimal; + double2my_decimal(E_DEC_FATAL_ERROR, + (entry->time_usecs-previous->time_usecs)/(1000.0*1000), + &duration_decimal); + + table->field[3]->store_decimal(&duration_decimal); + + +#ifdef HAVE_GETRUSAGE + + my_decimal cpu_utime_decimal, cpu_stime_decimal; + + double2my_decimal(E_DEC_FATAL_ERROR, + RUSAGE_DIFF_USEC(entry->rusage.ru_utime, + previous->rusage.ru_utime) / + (1000.0*1000), + &cpu_utime_decimal); + + double2my_decimal(E_DEC_FATAL_ERROR, + RUSAGE_DIFF_USEC(entry->rusage.ru_stime, + previous->rusage.ru_stime) / + (1000.0*1000), + &cpu_stime_decimal); + + table->field[4]->store_decimal(&cpu_utime_decimal); + table->field[5]->store_decimal(&cpu_stime_decimal); + table->field[4]->set_notnull(); + table->field[5]->set_notnull(); +#elif defined(_WIN32) + my_decimal cpu_utime_decimal, cpu_stime_decimal; + + double2my_decimal(E_DEC_FATAL_ERROR, + GetTimeDiffInSeconds(&entry->ftUser, + &previous->ftUser), + &cpu_utime_decimal); + double2my_decimal(E_DEC_FATAL_ERROR, + GetTimeDiffInSeconds(&entry->ftKernel, + &previous->ftKernel), + &cpu_stime_decimal); + + // Store the result. + table->field[4]->store_decimal(&cpu_utime_decimal); + table->field[5]->store_decimal(&cpu_stime_decimal); + table->field[4]->set_notnull(); + table->field[5]->set_notnull(); +#else + /* TODO: Add CPU-usage info for non-BSD systems */ +#endif + +#ifdef HAVE_GETRUSAGE + table->field[6]->store((uint32)(entry->rusage.ru_nvcsw - + previous->rusage.ru_nvcsw)); + table->field[6]->set_notnull(); + table->field[7]->store((uint32)(entry->rusage.ru_nivcsw - + previous->rusage.ru_nivcsw)); + table->field[7]->set_notnull(); +#else + /* TODO: Add context switch info for non-BSD systems */ +#endif + +#ifdef HAVE_GETRUSAGE + table->field[8]->store((uint32)(entry->rusage.ru_inblock - + previous->rusage.ru_inblock)); + table->field[8]->set_notnull(); + table->field[9]->store((uint32)(entry->rusage.ru_oublock - + previous->rusage.ru_oublock)); + table->field[9]->set_notnull(); +#elif defined(_WIN32) + ULONGLONG reads_delta = entry->io_count.ReadOperationCount - + previous->io_count.ReadOperationCount; + ULONGLONG writes_delta = entry->io_count.WriteOperationCount - + previous->io_count.WriteOperationCount; + + table->field[8]->store((uint32)reads_delta); + table->field[8]->set_notnull(); + + table->field[9]->store((uint32)writes_delta); + table->field[9]->set_notnull(); +#else + /* TODO: Add block IO info for non-BSD systems */ +#endif + +#ifdef HAVE_GETRUSAGE + table->field[10]->store((uint32)(entry->rusage.ru_msgsnd - + previous->rusage.ru_msgsnd), true); + table->field[10]->set_notnull(); + table->field[11]->store((uint32)(entry->rusage.ru_msgrcv - + previous->rusage.ru_msgrcv), true); + table->field[11]->set_notnull(); +#else + /* TODO: Add message info for non-BSD systems */ +#endif + +#ifdef HAVE_GETRUSAGE + table->field[12]->store((uint32)(entry->rusage.ru_majflt - + previous->rusage.ru_majflt), true); + table->field[12]->set_notnull(); + table->field[13]->store((uint32)(entry->rusage.ru_minflt - + previous->rusage.ru_minflt), true); + table->field[13]->set_notnull(); +#elif defined(_WIN32) + /* Windows APIs don't easily distinguish between hard and soft page + faults, so we just fill the 'major' column and leave the second NULL. + */ + table->field[12]->store((uint32)(entry->mem_count.PageFaultCount - + previous->mem_count.PageFaultCount), true); + table->field[12]->set_notnull(); +#else + /* TODO: Add page fault info for non-BSD systems */ +#endif + +#ifdef HAVE_GETRUSAGE + table->field[14]->store((uint32)(entry->rusage.ru_nswap - + previous->rusage.ru_nswap), true); + table->field[14]->set_notnull(); +#else + /* TODO: Add swap info for non-BSD systems */ +#endif + + /* Emit the location that started this step, not that ended it. */ + if ((previous->function != NULL) && (previous->file != NULL)) + { + table->field[15]->store(previous->function, strlen(previous->function), + system_charset_info); + table->field[15]->set_notnull(); + table->field[16]->store(previous->file, strlen(previous->file), system_charset_info); + table->field[16]->set_notnull(); + table->field[17]->store(previous->line, true); + table->field[17]->set_notnull(); + } + + if (schema_table_store_record(thd_arg, table)) + DBUG_RETURN(1); + + } + } + + DBUG_RETURN(0); +} + + +void PROFILING::reset() +{ + enabled= (thd->variables.option_bits & OPTION_PROFILING) != 0; +} +#endif /* ENABLED_PROFILING */ diff --git a/sql/sql_profile.h b/sql/sql_profile.h new file mode 100644 index 00000000..88136559 --- /dev/null +++ b/sql/sql_profile.h @@ -0,0 +1,336 @@ +/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef _SQL_PROFILE_H +#define _SQL_PROFILE_H + +class Item; +struct TABLE_LIST; +class THD; +class ST_FIELD_INFO; +typedef struct st_schema_table ST_SCHEMA_TABLE; + +namespace Show { +extern ST_FIELD_INFO query_profile_statistics_info[]; +} // namespace Show + +int fill_query_profile_statistics_info(THD *thd, TABLE_LIST *tables, Item *cond); +int make_profile_table_for_show(THD *thd, ST_SCHEMA_TABLE *schema_table); + + +#define PROFILE_NONE (uint)0 +#define PROFILE_CPU (uint)(1<<0) +#define PROFILE_MEMORY (uint)(1<<1) +#define PROFILE_BLOCK_IO (uint)(1<<2) +#define PROFILE_CONTEXT (uint)(1<<3) +#define PROFILE_PAGE_FAULTS (uint)(1<<4) +#define PROFILE_IPC (uint)(1<<5) +#define PROFILE_SWAPS (uint)(1<<6) +#define PROFILE_SOURCE (uint)(1<<16) +#define PROFILE_ALL (uint)(~0) + + +#if defined(ENABLED_PROFILING) +#include "sql_priv.h" +#include "unireg.h" + +#ifdef _WIN32 +#include +#endif + +#ifdef HAVE_SYS_RESOURCE_H +#include +#endif + +extern PSI_memory_key key_memory_queue_item; + +class PROF_MEASUREMENT; +class QUERY_PROFILE; +class PROFILING; + + +/** + Implements a persistent FIFO using server List method names. Not + thread-safe. Intended to be used on thread-local data only. +*/ +template class Queue +{ +private: + + struct queue_item + { + T *payload; + struct queue_item *next, *previous; + }; + + struct queue_item *first, *last; + +public: + Queue() + { + elements= 0; + first= last= NULL; + } + + void empty() + { + struct queue_item *i, *after_i; + for (i= first; i != NULL; i= after_i) + { + after_i= i->next; + my_free(i); + } + elements= 0; + } + + ulong elements; /* The count of items in the Queue */ + + void push_back(T *payload) + { + struct queue_item *new_item; + + new_item= (struct queue_item *) my_malloc(key_memory_queue_item, + sizeof(struct queue_item), MYF(0)); + + new_item->payload= payload; + + if (first == NULL) + first= new_item; + if (last != NULL) + { + DBUG_ASSERT(last->next == NULL); + last->next= new_item; + } + new_item->previous= last; + new_item->next= NULL; + last= new_item; + + elements++; + } + + T *pop() + { + struct queue_item *old_item= first; + T *ret= NULL; + + if (first == NULL) + { + DBUG_PRINT("warning", ("tried to pop nonexistent item from Queue")); + return NULL; + } + + ret= old_item->payload; + if (first->next != NULL) + first->next->previous= NULL; + else + last= NULL; + first= first->next; + + my_free(old_item); + elements--; + + return ret; + } + + bool is_empty() + { + DBUG_ASSERT(((elements > 0) && (first != NULL)) || ((elements == 0) || (first == NULL))); + return (elements == 0); + } + + void *new_iterator() + { + return first; + } + + void *iterator_next(void *current) + { + return ((struct queue_item *) current)->next; + } + + T *iterator_value(void *current) + { + return ((struct queue_item *) current)->payload; + } + +}; + + +/** + A single entry in a single profile. +*/ +class PROF_MEASUREMENT +{ +private: + friend class QUERY_PROFILE; + friend class PROFILING; + + QUERY_PROFILE *profile; + char *status; +#ifdef HAVE_GETRUSAGE + struct rusage rusage; +#elif defined(_WIN32) + FILETIME ftKernel, ftUser; + IO_COUNTERS io_count; + PROCESS_MEMORY_COUNTERS mem_count; +#endif + + char *function; + char *file; + unsigned int line; + + ulong m_seq; + double time_usecs; + char *allocated_status_memory; + + void set_label(const char *status_arg, const char *function_arg, + const char *file_arg, unsigned int line_arg); + void clean_up(); + + PROF_MEASUREMENT(); + PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char *status_arg); + PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char *status_arg, + const char *function_arg, + const char *file_arg, unsigned int line_arg); + ~PROF_MEASUREMENT(); + void collect(); +}; + + +/** + The full profile for a single query, and includes multiple PROF_MEASUREMENT + objects. +*/ +class QUERY_PROFILE +{ +private: + friend class PROFILING; + + PROFILING *profiling; + + query_id_t profiling_query_id; /* Session-specific id. */ + char *query_source; + + double m_start_time_usecs; + double m_end_time_usecs; + ulong m_seq_counter; + Queue entries; + + + QUERY_PROFILE(PROFILING *profiling_arg, const char *status_arg); + ~QUERY_PROFILE(); + + void set_query_source(char *query_source_arg, size_t query_length_arg); + + /* Add a profile status change to the current profile. */ + void new_status(const char *status_arg, + const char *function_arg, + const char *file_arg, unsigned int line_arg); + + /* Reset the contents of this profile entry. */ + void reset(); + + /* Show this profile. This is called by PROFILING. */ + bool show(uint options); +}; + + +/** + Profiling state for a single THD; contains multiple QUERY_PROFILE objects. +*/ +class PROFILING +{ +private: + friend class PROF_MEASUREMENT; + friend class QUERY_PROFILE; + + /* + Not the system query_id, but a counter unique to profiling. + */ + query_id_t profile_id_counter; + THD *thd; + bool keeping; + bool enabled; + + QUERY_PROFILE *current; + QUERY_PROFILE *last; + Queue history; + + query_id_t next_profile_id() { return(profile_id_counter++); } + +public: + PROFILING(); + ~PROFILING(); + + /** + At a point in execution where we know the query source, save the text + of it in the query profile. + + This must be called exactly once per descrete statement. + */ + void set_query_source(char *query_source_arg, size_t query_length_arg) + { + if (unlikely(current)) + current->set_query_source(query_source_arg, query_length_arg); + } + + /** + Prepare to start processing a new query. It is an error to do this + if there's a query already in process; nesting is not supported. + + @param initial_state (optional) name of period before first state change + */ + void start_new_query(const char *initial_state= "Starting") + { + DBUG_ASSERT(!current); + if (unlikely(enabled)) + current= new QUERY_PROFILE(this, initial_state); + } + + void discard_current_query(); + + void finish_current_query() + { + if (unlikely(current)) + finish_current_query_impl(); + } + + void finish_current_query_impl(); + + void status_change(const char *status_arg, + const char *function_arg, + const char *file_arg, unsigned int line_arg) + { + if (unlikely(current)) + current->new_status(status_arg, function_arg, file_arg, line_arg); + } + + inline void set_thd(THD *thd_arg) + { + thd= thd_arg; + reset(); + } + + /* SHOW PROFILES */ + bool show_profiles(); + + /* ... from INFORMATION_SCHEMA.PROFILING ... */ + int fill_statistics_info(THD *thd, TABLE_LIST *tables, Item *cond); + void reset(); + void restart(); +}; + +# endif /* ENABLED_PROFILING */ +#endif /* _SQL_PROFILE_H */ diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc new file mode 100644 index 00000000..8f0f15a9 --- /dev/null +++ b/sql/sql_reload.cc @@ -0,0 +1,656 @@ +/* Copyright (c) 2010, 2016, Oracle and/or its affiliates. + Copyright (c) 2011, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_reload.h" +#include "sql_priv.h" +#include "mysqld.h" // select_errors +#include "sql_class.h" // THD +#include "sql_acl.h" // acl_reload +#include "sql_servers.h" // servers_reload +#include "sql_connect.h" // reset_mqh +#include "thread_cache.h" +#include "sql_base.h" // close_cached_tables +#include "sql_parse.h" // check_single_table_access +#include "sql_db.h" // my_dbopt_cleanup +#include "hostname.h" // hostname_cache_refresh +#include "sql_repl.h" // reset_master, reset_slave +#include "rpl_mi.h" // Master_info::data_lock +#include "sql_show.h" +#include "debug_sync.h" +#include "des_key_file.h" +#include "transaction.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif + +static void disable_checkpoints(THD *thd); + +/** + Reload/resets privileges and the different caches. + + @param thd Thread handler (can be NULL!) + @param options What should be reset/reloaded (tables, privileges, slave...) + @param tables Tables to flush (if any) + @param write_to_binlog < 0 if there was an error while interacting with the binary log inside + reload_acl_and_cache, + 0 if we should not write to the binary log, + > 0 if we can write to the binlog. + + + @note Depending on 'options', it may be very bad to write the + query to the binlog (e.g. FLUSH SLAVE); this is a + pointer where reload_acl_and_cache() will put 0 if + it thinks we really should not write to the binlog. + Otherwise it will put 1. + + @return Error status code + @retval 0 Ok + @retval !=0 Error; thd->killed is set or thd->is_error() is true +*/ + +bool reload_acl_and_cache(THD *thd, unsigned long long options, + TABLE_LIST *tables, int *write_to_binlog) +{ + bool result=0; + select_errors=0; /* Write if more errors */ + int tmp_write_to_binlog= *write_to_binlog= 1; + + DBUG_ASSERT(!thd || !thd->in_sub_stmt); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (options & REFRESH_GRANT) + { + THD *tmp_thd= 0; + /* + If reload_acl_and_cache() is called from SIGHUP handler we have to + allocate temporary THD for execution of acl_reload()/grant_reload(). + */ + if (unlikely(!thd) && (thd= (tmp_thd= new THD(0)))) + { + thd->thread_stack= (char*) &tmp_thd; + thd->store_globals(); + } + + if (likely(thd)) + { + bool reload_acl_failed= acl_reload(thd); + bool reload_grants_failed= grant_reload(thd); + bool reload_servers_failed= servers_reload(thd); + + if (reload_acl_failed || reload_grants_failed || reload_servers_failed) + { + result= 1; + /* + When an error is returned, my_message may have not been called and + the client will hang waiting for a response. + */ + my_error(ER_UNKNOWN_ERROR, MYF(0)); + } + } + opt_noacl= 0; + + if (unlikely(tmp_thd)) + { + delete tmp_thd; + thd= 0; + } + reset_mqh((LEX_USER *)NULL, TRUE); + } +#endif + if (options & REFRESH_LOG) + { + /* + Flush the normal query log, the update log, the binary log, + the slow query log, the relay log (if it exists) and the log + tables. + */ + + options|= REFRESH_BINARY_LOG; + options|= REFRESH_RELAY_LOG; + options|= REFRESH_SLOW_LOG; + options|= REFRESH_GENERAL_LOG; + options|= REFRESH_ENGINE_LOG; + options|= REFRESH_ERROR_LOG; + } + + if (options & REFRESH_ERROR_LOG) + if (unlikely(flush_error_log())) + result= 1; + + if ((options & REFRESH_SLOW_LOG) && global_system_variables.sql_log_slow) + logger.flush_slow_log(); + + if ((options & REFRESH_GENERAL_LOG) && opt_log) + logger.flush_general_log(); + + if (options & REFRESH_ENGINE_LOG) + if (ha_flush_logs()) + result= 1; + + if (options & REFRESH_BINARY_LOG) + { + /* + Writing this command to the binlog may result in infinite loops + when doing mysqlbinlog|mysql, and anyway it does not really make + sense to log it automatically (would cause more trouble to users + than it would help them) + */ + tmp_write_to_binlog= 0; + if (mysql_bin_log.is_open()) + { + DYNAMIC_ARRAY *drop_gtid_domain= + (thd && (thd->lex->delete_gtid_domain.elements > 0)) ? + &thd->lex->delete_gtid_domain : NULL; + if (mysql_bin_log.rotate_and_purge(true, drop_gtid_domain)) + *write_to_binlog= -1; + + /* Note that WSREP(thd) might not be true here e.g. during + SST. */ + if (WSREP_ON) + { + /* Wait for last binlog checkpoint event to be logged. */ + mysql_bin_log.wait_for_last_checkpoint_event(); + } + } + } + if (options & REFRESH_RELAY_LOG) + { +#ifdef HAVE_REPLICATION + LEX_CSTRING connection_name; + Master_info *mi; + if (thd) + connection_name= thd->lex->relay_log_connection_name; + else + { + connection_name.str= (char*) ""; + connection_name.length= 0; + } + + /* + Writing this command to the binlog may cause problems as the + slave is not likely to have the same connection names. + */ + tmp_write_to_binlog= 0; + if (connection_name.length == 0) + { + if (master_info_index->flush_all_relay_logs()) + *write_to_binlog= -1; + } + else if (!(mi= (get_master_info(&connection_name, + Sql_condition::WARN_LEVEL_ERROR)))) + { + result= 1; + } + else + { + mysql_mutex_lock(&mi->data_lock); + if (rotate_relay_log(mi)) + *write_to_binlog= -1; + mysql_mutex_unlock(&mi->data_lock); + mi->release(); + } +#endif + } +#ifdef HAVE_QUERY_CACHE + if (options & REFRESH_QUERY_CACHE_FREE) + { + query_cache.pack(thd); // FLUSH QUERY CACHE + options &= ~REFRESH_QUERY_CACHE; // Don't flush cache, just free memory + } + if (options & (REFRESH_TABLES | REFRESH_QUERY_CACHE)) + { + query_cache.flush(); // RESET QUERY CACHE + } +#endif /*HAVE_QUERY_CACHE*/ + + DBUG_ASSERT(!thd || thd->locked_tables_mode || + !thd->mdl_context.has_locks() || + thd->handler_tables_hash.records || + thd->ull_hash.records || + thd->global_read_lock.is_acquired() || + thd->mdl_backup_lock || + thd->current_backup_stage != BACKUP_FINISHED + ); + + /* + Note that if REFRESH_READ_LOCK bit is set then REFRESH_TABLES is set too + (see sql_yacc.yy) + */ + if (options & (REFRESH_TABLES | REFRESH_READ_LOCK)) + { + if ((options & REFRESH_READ_LOCK) && thd) + { + DBUG_ASSERT(!(options & REFRESH_FAST) && !tables); + /* + On the first hand we need write lock on the tables to be flushed, + on the other hand we must not try to aspire a global read lock + if we have a write locked table as this would lead to a deadlock + when trying to reopen (and re-lock) the table after the flush. + */ + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + return 1; + } + + /* + Writing to the binlog could cause deadlocks, as we don't log + UNLOCK TABLES + */ + tmp_write_to_binlog= 0; + if (thd->global_read_lock.lock_global_read_lock(thd)) + return 1; // Killed + if (flush_tables(thd, FLUSH_ALL)) + { + /* + NOTE: my_error() has been already called by reopen_tables() within + close_cached_tables(). + */ + thd->global_read_lock.unlock_global_read_lock(thd); + return 1; + } + + if (thd->global_read_lock.make_global_read_lock_block_commit(thd)) // Killed + { + /* Don't leave things in a half-locked state */ + thd->global_read_lock.unlock_global_read_lock(thd); + return 1; + } + if (options & REFRESH_CHECKPOINT) + disable_checkpoints(thd); + /* + We need to do it second time after wsrep appliers were blocked in + make_global_read_lock_block_commit(thd) above since they could have + modified the tables too. + */ + if (WSREP(thd) && flush_tables(thd, FLUSH_ALL)) + result= 1; + } + else + { + if (thd && thd->locked_tables_mode) + { + /* + If we are under LOCK TABLES we should have a write + lock on tables which we are going to flush. + */ + if (tables) + { + int err; + for (TABLE_LIST *t= tables; t; t= t->next_local) + if (!find_table_for_mdl_upgrade(thd, t->db.str, t->table_name.str, &err)) + { + if (is_locked_view(thd, t)) + t->next_local= t->next_global; + else + { + my_error(err, MYF(0), t->table_name.str); + return 1; + } + } + } + else + { + /* + It is not safe to upgrade the metadata lock without GLOBAL IX lock. + This can happen with FLUSH TABLES WITH READ LOCK as we in + these cases don't take a GLOBAL IX lock in order to be compatible + with global read lock. + */ + if (thd->open_tables && + !thd->mdl_context.is_lock_owner(MDL_key::BACKUP, "", "", + MDL_BACKUP_DDL)) + { + my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), + thd->open_tables->s->table_name.str); + return true; + } + + for (TABLE *tab= thd->open_tables; tab; tab= tab->next) + { + if (! tab->mdl_ticket->is_upgradable_or_exclusive()) + { + my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), + tab->s->table_name.str); + return 1; + } + } + } + } + +#ifdef WITH_WSREP + /* In case of applier thread, do not call flush tables */ + if (!thd || !thd->wsrep_applier) +#endif /* WITH_WSREP */ + { + if (close_cached_tables(thd, tables, + ((options & REFRESH_FAST) ? FALSE : TRUE), + (thd ? thd->variables.lock_wait_timeout : + LONG_TIMEOUT))) + { + /* + NOTE: my_error() has been already called by reopen_tables() within + close_cached_tables(). + */ + result= 1; + } + } + } + my_dbopt_cleanup(); + } + if (options & REFRESH_HOSTS) + hostname_cache_refresh(); + if (thd && (options & REFRESH_STATUS)) + refresh_status(thd); + if (options & REFRESH_THREADS) + thread_cache.flush(); +#ifdef HAVE_REPLICATION + if (options & REFRESH_MASTER) + { + DBUG_ASSERT(thd); + tmp_write_to_binlog= 0; + if (reset_master(thd, NULL, 0, thd->lex->next_binlog_file_number)) + { + /* NOTE: my_error() has been already called by reset_master(). */ + result= 1; + } + } +#endif +#ifdef HAVE_OPENSSL + if (options & REFRESH_DES_KEY_FILE) + { + if (des_key_file && load_des_key_file(des_key_file)) + { + /* NOTE: my_error() has been already called by load_des_key_file(). */ + result= 1; + } + } +#endif +#ifdef HAVE_REPLICATION + if (options & REFRESH_SLAVE) + { + LEX_MASTER_INFO* lex_mi= &thd->lex->mi; + Master_info *mi; + tmp_write_to_binlog= 0; + + if (!(mi= get_master_info(&lex_mi->connection_name, + Sql_condition::WARN_LEVEL_ERROR))) + { + result= 1; + } + else + { + /* The following will fail if slave is running */ + if (reset_slave(thd, mi)) + { + mi->release(); + /* NOTE: my_error() has been already called by reset_slave(). */ + result= 1; + } + else if (mi->connection_name.length && thd->lex->reset_slave_info.all) + { + /* If not default connection and 'all' is used */ + mi->release(); + mysql_mutex_lock(&LOCK_active_mi); + if (master_info_index->remove_master_info(mi, 0)) + result= 1; + mysql_mutex_unlock(&LOCK_active_mi); + } + else + mi->release(); + } + } +#endif + if (options & REFRESH_USER_RESOURCES) + reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */ + if (options & REFRESH_SSL) + { + if (reinit_ssl()) + result= 1; +#ifdef WITH_WSREP + if (!result && + WSREP_ON && wsrep_reload_ssl()) + { + my_message(ER_UNKNOWN_ERROR, "Failed to refresh WSREP SSL.", MYF(0)); + result= 1; + } +#endif + } + if (options & REFRESH_GENERIC) + { + List_iterator_fast li(thd->lex->view_list); + LEX_CSTRING *ls; + while ((ls= li++)) + { + ST_SCHEMA_TABLE *table= find_schema_table(thd, ls); + if (table->reset_table()) + result= 1; + } + } + if (*write_to_binlog != -1) + *write_to_binlog= tmp_write_to_binlog; + /* + If the query was killed then this function must fail. + */ + return result || (thd ? thd->killed : 0); +} + + +/** + Implementation of FLUSH TABLES WITH READ LOCK + and FLUSH TABLES FOR EXPORT + + In brief: take exclusive locks, expel tables from the table + cache, reopen the tables, enter the 'LOCKED TABLES' mode, + downgrade the locks. + Note: the function is written to be called from + mysql_execute_command(), it is not reusable in arbitrary + execution context. + + Required privileges + ------------------- + Since the statement implicitly enters LOCK TABLES mode, + it requires LOCK TABLES privilege on every table. + But since the rest of FLUSH commands require + the global RELOAD_ACL, it also requires RELOAD_ACL. + + Compatibility with the global read lock + --------------------------------------- + We don't wait for the GRL, since neither the + 5.1 combination that this new statement is intended to + replace (LOCK TABLE WRITE; FLUSH TABLES;), + nor FLUSH TABLES WITH READ LOCK do. + @todo: this is not implemented, Dmitry disagrees. + Currently we wait for GRL in another connection, + but are compatible with a GRL in our own connection. + + Behaviour under LOCK TABLES + --------------------------- + Bail out: i.e. don't perform an implicit UNLOCK TABLES. + This is not consistent with LOCK TABLES statement, but is + in line with behaviour of FLUSH TABLES WITH READ LOCK, and we + try to not introduce any new statements with implicit + semantics. + + Compatibility with parallel updates + ----------------------------------- + As a result, we will wait for all open transactions + against the tables to complete. After the lock downgrade, + new transactions will be able to read the tables, but not + write to them. + + Differences from FLUSH TABLES + ------------------------------------- + - you can't flush WITH READ LOCK a non-existent table + - you can't flush WITH READ LOCK under LOCK TABLES + + Effect on views and temporary tables. + ------------------------------------ + You can only apply this command to existing base tables. + If a view with such name exists, ER_WRONG_OBJECT is returned. + If a temporary table with such name exists, it's ignored: + if there is a base table, it's used, otherwise ER_NO_SUCH_TABLE + is returned. + + Handling of MERGE tables + ------------------------ + For MERGE table this statement will open and lock child tables + for read (it is impossible to lock parent table without it). + Child tables won't be flushed unless they are explicitly present + in the statement's table list. + + Implicit commit + --------------- + This statement causes an implicit commit before and + after it. + + HANDLER SQL + ----------- + If this connection has HANDLERs open against + some of the tables being FLUSHed, these handlers + are implicitly flushed (lose their position). +*/ + +bool flush_tables_with_read_lock(THD *thd, TABLE_LIST *all_tables) +{ + Lock_tables_prelocking_strategy lock_tables_prelocking_strategy; + + /* + This is called from SQLCOM_FLUSH, the transaction has + been committed implicitly. + */ + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + goto error; + } + + if (thd->current_backup_stage != BACKUP_FINISHED) + { + my_error(ER_BACKUP_LOCK_IS_ACTIVE, MYF(0)); + goto error; + } + + /* Should not flush tables while BACKUP LOCK is active */ + if (thd->mdl_backup_lock) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + goto error; + } + + if (thd->lex->type & REFRESH_READ_LOCK) + { + /* + Acquire SNW locks on tables to be flushed. Don't acquire global + IX and database-scope IX locks on the tables as this will make + this statement incompatible with FLUSH TABLES WITH READ LOCK. + */ + if (lock_table_names(thd, all_tables, NULL, + thd->variables.lock_wait_timeout, + MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK)) + goto error; + + DEBUG_SYNC(thd,"flush_tables_with_read_lock_after_acquire_locks"); + + /* Reset ticket to satisfy asserts in open_tables(). */ + for (auto table_list= all_tables; table_list; + table_list= table_list->next_global) + table_list->mdl_request.ticket= NULL; + } + + thd->variables.option_bits|= OPTION_TABLE_LOCK; + + /* + Before opening and locking tables the below call also waits + for old shares to go away, so the fact that we don't pass + MYSQL_OPEN_IGNORE_FLUSH flag to it is important. + Also we don't pass MYSQL_OPEN_HAS_MDL_LOCK flag as we want + to open underlying tables if merge table is flushed. + For underlying tables of the merge the below call has to + acquire SNW locks to ensure that they can be locked for + read without further waiting. + */ + if (open_and_lock_tables(thd, all_tables, FALSE, + MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK, + &lock_tables_prelocking_strategy)) + goto error_reset_bits; + + if (thd->lex->type & (REFRESH_FOR_EXPORT|REFRESH_READ_LOCK)) + { + for (TABLE_LIST *table_list= all_tables; table_list; + table_list= table_list->next_global) + { + if (table_list->belong_to_view && + check_single_table_access(thd, PRIV_LOCK_TABLES, table_list, FALSE)) + { + table_list->hide_view_error(thd); + goto error_reset_bits; + } + if (table_list->is_view_or_derived()) + continue; + if (thd->lex->type & REFRESH_FOR_EXPORT && + table_list->table && + !(table_list->table->file->ha_table_flags() & HA_CAN_EXPORT)) + { + my_error(ER_ILLEGAL_HA, MYF(0),table_list->table->file->table_type(), + table_list->db.str, table_list->table_name.str); + goto error_reset_bits; + } + if (thd->lex->type & REFRESH_READ_LOCK && + table_list->table && + table_list->table->file->extra(HA_EXTRA_FLUSH)) + goto error_reset_bits; + } + } + + if (thd->locked_tables_list.init_locked_tables(thd)) + goto error_reset_bits; + + + /* + We don't downgrade MDL_SHARED_NO_WRITE here as the intended + post effect of this call is identical to LOCK TABLES <...> READ, + and we didn't use thd->in_lock_talbes and + thd->sql_command= SQLCOM_LOCK_TABLES hacks to enter the LTM. + */ + + return FALSE; + +error_reset_bits: + trans_rollback_stmt(thd); + close_thread_tables(thd); + thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +error: + return TRUE; +} + + +/** + Disable checkpoints for all handlers + This is released in unlock_global_read_lock() +*/ + +static void disable_checkpoints(THD *thd) +{ + if (!thd->global_disable_checkpoint) + { + thd->global_disable_checkpoint= 1; + if (!global_disable_checkpoint++) + ha_checkpoint_state(1); // Disable checkpoints + } +} diff --git a/sql/sql_reload.h b/sql/sql_reload.h new file mode 100644 index 00000000..699912e0 --- /dev/null +++ b/sql/sql_reload.h @@ -0,0 +1,26 @@ +#ifndef SQL_RELOAD_INCLUDED +#define SQL_RELOAD_INCLUDED +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +class THD; +struct TABLE_LIST; + +bool reload_acl_and_cache(THD *thd, unsigned long long options, + TABLE_LIST *tables, int *write_to_binlog); + +bool flush_tables_with_read_lock(THD *thd, TABLE_LIST *all_tables); + +#endif diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc new file mode 100644 index 00000000..1137e0e1 --- /dev/null +++ b/sql/sql_rename.cc @@ -0,0 +1,549 @@ +/* + Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2021, Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Atomic rename of table; RENAME TABLE t1 to t2, tmp to t1 [,...] +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_rename.h" +#include "sql_cache.h" // query_cache_* +#include "sql_table.h" // write_bin_log +#include "sql_view.h" // mysql_frm_type, mysql_rename_view +#include "sql_trigger.h" +#include "sql_base.h" // tdc_remove_table, lock_table_names, +#include "sql_handler.h" // mysql_ha_rm_tables +#include "sql_statistics.h" +#include "ddl_log.h" +#include "wsrep_mysqld.h" +#include "debug.h" + +/* used to hold table entries for as part of list of renamed temporary tables */ +struct TABLE_PAIR +{ + TABLE_LIST *from, *to; +}; + + +static bool rename_tables(THD *thd, TABLE_LIST *table_list, + DDL_LOG_STATE *ddl_log_state, + bool skip_error, bool if_exits, + bool *force_if_exists); + +/* + Every two entries in the table_list form a pair of original name and + the new name. +*/ + +bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent, + bool if_exists) +{ + bool error= 1; + bool binlog_error= 0, force_if_exists; + TABLE_LIST *ren_table= 0; + int to_table; + const char *rename_log_table[2]= {NULL, NULL}; + DDL_LOG_STATE ddl_log_state; + DBUG_ENTER("mysql_rename_tables"); + + /* + Avoid problems with a rename on a table that we have locked or + if the user is trying to to do this in a transcation context + */ + + if (thd->locked_tables_mode || thd->in_active_multi_stmt_transaction()) + { + my_message(ER_LOCK_OR_ACTIVE_TRANSACTION, + ER_THD(thd, ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); + DBUG_RETURN(1); + } + + mysql_ha_rm_tables(thd, table_list); + + if (logger.is_log_table_enabled(QUERY_LOG_GENERAL) || + logger.is_log_table_enabled(QUERY_LOG_SLOW)) + { + + /* + Rules for rename of a log table: + + IF 1. Log tables are enabled + AND 2. Rename operates on the log table and nothing is being + renamed to the log table. + DO 3. Throw an error message. + ELSE 4. Perform rename. + */ + + for (to_table= 0, ren_table= table_list; ren_table; + to_table= 1 - to_table, ren_table= ren_table->next_local) + { + int log_table_rename; + if ((log_table_rename= check_if_log_table(ren_table, TRUE, NullS))) + { + /* + as we use log_table_rename as an array index, we need it to start + with 0, while QUERY_LOG_SLOW == 1 and QUERY_LOG_GENERAL == 2. + So, we shift the value to start with 0; + */ + log_table_rename--; + if (rename_log_table[log_table_rename]) + { + if (to_table) + rename_log_table[log_table_rename]= NULL; + else + { + /* + Two renames of "log_table TO" w/o rename "TO log_table" in + between. + */ + my_error(ER_CANT_RENAME_LOG_TABLE, MYF(0), + ren_table->table_name.str, + ren_table->table_name.str); + goto err; + } + } + else + { + if (to_table) + { + /* + Attempt to rename a table TO log_table w/o renaming + log_table TO some table. + */ + my_error(ER_CANT_RENAME_LOG_TABLE, MYF(0), + ren_table->table_name.str, + ren_table->table_name.str); + goto err; + } + else + { + /* save the name of the log table to report an error */ + rename_log_table[log_table_rename]= ren_table->table_name.str; + } + } + } + } + if (rename_log_table[0] || rename_log_table[1]) + { + if (rename_log_table[0]) + my_error(ER_CANT_RENAME_LOG_TABLE, MYF(0), rename_log_table[0], + rename_log_table[0]); + else + my_error(ER_CANT_RENAME_LOG_TABLE, MYF(0), rename_log_table[1], + rename_log_table[1]); + goto err; + } + } + + if (lock_table_names(thd, table_list, 0, thd->variables.lock_wait_timeout, + 0)) + goto err; + + error=0; + bzero(&ddl_log_state, sizeof(ddl_log_state)); + + /* + An exclusive lock on table names is satisfactory to ensure + no other thread accesses this table. + */ + error= rename_tables(thd, table_list, &ddl_log_state, + 0, if_exists, &force_if_exists); + + if (likely(!silent && !error)) + { + ulonglong save_option_bits= thd->variables.option_bits; + if (force_if_exists && ! if_exists) + { + /* Add IF EXISTS to binary log */ + thd->variables.option_bits|= OPTION_IF_EXISTS; + } + + debug_crash_here("ddl_log_rename_before_binlog"); + /* + Store xid in ddl log and binary log so that we can check on ddl recovery + if the item is in the binary log (and thus the operation was complete + */ + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + binlog_error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + if (binlog_error) + error= 1; + thd->binlog_xid= 0; + thd->variables.option_bits= save_option_bits; + debug_crash_here("ddl_log_rename_after_binlog"); + + if (likely(!binlog_error)) + my_ok(thd); + } + + if (likely(!error)) + { + query_cache_invalidate3(thd, table_list, 0); + ddl_log_complete(&ddl_log_state); + } + else + { + /* Revert the renames of normal tables with the help of the ddl log */ + ddl_log_revert(thd, &ddl_log_state); + } + +err: + DBUG_RETURN(error || binlog_error); +} + + +static bool +do_rename_temporary(THD *thd, TABLE_LIST *ren_table, TABLE_LIST *new_table) +{ + LEX_CSTRING *new_alias; + DBUG_ENTER("do_rename_temporary"); + + new_alias= (lower_case_table_names == 2) ? &new_table->alias : + &new_table->table_name; + + if (thd->find_temporary_table(new_table, THD::TMP_TABLE_ANY)) + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias->str); + DBUG_RETURN(1); // This can't be skipped + } + + DBUG_RETURN(thd->rename_temporary_table(ren_table->table, + &new_table->db, new_alias)); +} + + +/** + Parameters for do_rename +*/ + +struct rename_param +{ + LEX_CSTRING old_alias, new_alias; + LEX_CUSTRING old_version; + handlerton *from_table_hton; +}; + + +/** + check_rename() + + Check pre-conditions for rename + - From table should exists + - To table should not exists. + + SYNOPSIS + @param new_table_name The new table/view name + @param new_table_alias The new table/view alias + @param if_exists If not set, give an error if the table does not + exists. If set, just give a warning in this case. + @return + @retval 0 ok + @retval >0 Error (from table doesn't exists or to table exists) + @retval <0 Can't do rename, but no error +*/ + +static int +check_rename(THD *thd, rename_param *param, + TABLE_LIST *ren_table, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table_name, + const LEX_CSTRING *new_table_alias, + bool if_exists) +{ + DBUG_ENTER("check_rename"); + DBUG_PRINT("enter", ("if_exists: %d", (int) if_exists)); + + + if (lower_case_table_names == 2) + { + param->old_alias= ren_table->alias; + param->new_alias= *new_table_alias; + } + else + { + param->old_alias= ren_table->table_name; + param->new_alias= *new_table_name; + } + DBUG_ASSERT(param->new_alias.str); + + if (!ha_table_exists(thd, &ren_table->db, ¶m->old_alias, + ¶m->old_version, NULL, + ¶m->from_table_hton) || + !param->from_table_hton) + { + my_error(ER_NO_SUCH_TABLE, MYF(if_exists ? ME_NOTE : 0), + ren_table->db.str, param->old_alias.str); + DBUG_RETURN(if_exists ? -1 : 1); + } + + if (param->from_table_hton != view_pseudo_hton && + ha_check_if_updates_are_ignored(thd, param->from_table_hton, "RENAME")) + { + /* + Shared table. Just drop the old .frm as it's not correct anymore + Discovery will find the old table when it's accessed + */ + tdc_remove_table(thd, ren_table->db.str, ren_table->table_name.str); + quick_rm_table(thd, 0, &ren_table->db, ¶m->old_alias, FRM_ONLY, 0); + DBUG_RETURN(-1); + } + + if (ha_table_exists(thd, new_db, ¶m->new_alias, NULL, NULL, 0)) + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), param->new_alias.str); + DBUG_RETURN(1); // This can't be skipped + } + DBUG_RETURN(0); +} + + +/* + Rename a single table or a view + + SYNPOSIS + do_rename() + thd Thread handle + ren_table A table/view to be renamed + new_db The database to which the table to be moved to + skip_error Skip error, but only if the table didn't exists + force_if_exists Set to 1 if we have to log the query with 'IF EXISTS' + Otherwise don't touch the value + + DESCRIPTION + Rename a single table or a view. + In case of failure, all changes will be reverted + + RETURN + false Ok + true rename failed +*/ + +static bool +do_rename(THD *thd, rename_param *param, DDL_LOG_STATE *ddl_log_state, + TABLE_LIST *ren_table, const LEX_CSTRING *new_db, + bool skip_error, bool *force_if_exists) +{ + int rc= 1; + handlerton *hton; + LEX_CSTRING *old_alias, *new_alias; + TRIGGER_RENAME_PARAM rename_param; + DBUG_ENTER("do_rename"); + DBUG_PRINT("enter", ("skip_error: %d", (int) skip_error)); + + old_alias= ¶m->old_alias; + new_alias= ¶m->new_alias; + hton= param->from_table_hton; + + DBUG_ASSERT(!thd->locked_tables_mode); + +#ifdef WITH_WSREP + if (WSREP(thd) && hton && hton != view_pseudo_hton && + !wsrep_should_replicate_ddl(thd, hton)) + DBUG_RETURN(1); +#endif + + tdc_remove_table(thd, ren_table->db.str, ren_table->table_name.str); + + if (hton != view_pseudo_hton) + { + if (hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + *force_if_exists= 1; + + /* Check if we can rename triggers */ + if (Table_triggers_list::prepare_for_rename(thd, &rename_param, + &ren_table->db, + old_alias, + &ren_table->table_name, + new_db, + new_alias)) + DBUG_RETURN(!skip_error); + + thd->replication_flags= 0; + + if (ddl_log_rename_table(ddl_log_state, hton, + &ren_table->db, old_alias, new_db, new_alias)) + DBUG_RETURN(1); + + debug_crash_here("ddl_log_rename_before_rename_table"); + if (!(rc= mysql_rename_table(hton, &ren_table->db, old_alias, + new_db, new_alias, ¶m->old_version, 0))) + { + /* Table rename succeded. + It's safe to start recovery at rename trigger phase + */ + debug_crash_here("ddl_log_rename_before_phase_trigger"); + ddl_log_update_phase(ddl_log_state, DDL_RENAME_PHASE_TRIGGER); + + debug_crash_here("ddl_log_rename_before_rename_trigger"); + + if (!(rc= Table_triggers_list::change_table_name(thd, + &rename_param, + &ren_table->db, + old_alias, + &ren_table->table_name, + new_db, + new_alias))) + { + debug_crash_here("ddl_log_rename_before_stat_tables"); + (void) rename_table_in_stat_tables(thd, &ren_table->db, + &ren_table->table_name, + new_db, new_alias); + debug_crash_here("ddl_log_rename_after_stat_tables"); + } + else + { + /* + We've succeeded in renaming table's .frm and in updating + corresponding handler data, but have failed to update table's + triggers appropriately. So let us revert operations on .frm + and handler's data and report about failure to rename table. + */ + debug_crash_here("ddl_log_rename_after_failed_rename_trigger"); + (void) mysql_rename_table(hton, new_db, new_alias, + &ren_table->db, old_alias, ¶m->old_version, + NO_FK_CHECKS); + debug_crash_here("ddl_log_rename_after_revert_rename_table"); + ddl_log_disable_entry(ddl_log_state); + debug_crash_here("ddl_log_rename_after_disable_entry"); + } + } + if (thd->replication_flags & OPTION_IF_EXISTS) + *force_if_exists= 1; + } + else + { + /* + Change of schema is not allowed + except of ALTER ...UPGRADE DATA DIRECTORY NAME command + because a view has valid internal db&table names in this case. + */ + if (thd->lex->sql_command != SQLCOM_ALTER_DB_UPGRADE && + cmp(&ren_table->db, new_db)) + { + my_error(ER_FORBID_SCHEMA_CHANGE, MYF(0), ren_table->db.str, new_db->str); + DBUG_RETURN(1); + } + + ddl_log_rename_view(ddl_log_state, &ren_table->db, + &ren_table->table_name, new_db, new_alias); + debug_crash_here("ddl_log_rename_before_rename_view"); + rc= mysql_rename_view(thd, new_db, new_alias, &ren_table->db, + &ren_table->table_name); + debug_crash_here("ddl_log_rename_after_rename_view"); + if (rc) + { + /* + On error mysql_rename_view() will leave things as such. + */ + ddl_log_disable_entry(ddl_log_state); + debug_crash_here("ddl_log_rename_after_disable_entry"); + } + } + DBUG_RETURN(rc && !skip_error ? 1 : 0); +} + + +/* + Rename all tables in list; Return pointer to wrong entry if something goes + wrong. Note that the table_list may be empty! +*/ + +/* + Rename tables/views in the list + + SYNPOSIS + rename_tables() + thd Thread handle + table_list List of tables to rename + ddl_log_state ddl logging + skip_error Whether to skip errors + if_exists Don't give an error if table doesn't exists + force_if_exists Set to 1 if we have to log the query with 'IF EXISTS' + Otherwise set it to 0 + + DESCRIPTION + Take a table/view name from and odd list element and rename it to a + the name taken from list element+1. Note that the table_list may be + empty. + + RETURN + 0 Ok + 1 error + All tables are reverted to their original names +*/ + +static bool +rename_tables(THD *thd, TABLE_LIST *table_list, DDL_LOG_STATE *ddl_log_state, + bool skip_error, bool if_exists, bool *force_if_exists) +{ + TABLE_LIST *ren_table, *new_table; + List tmp_tables; + DBUG_ENTER("rename_tables"); + + *force_if_exists= 0; + + for (ren_table= table_list; ren_table; ren_table= new_table->next_local) + { + new_table= ren_table->next_local; + + if (is_temporary_table(ren_table)) + { + /* + Store renamed temporary tables into a list. + We don't store these in the ddl log to avoid writes and syncs + when only using temporary tables. We don't need the log as + all temporary tables will disappear anyway in a crash. + */ + TABLE_PAIR *pair= (TABLE_PAIR*) thd->alloc(sizeof(*pair)); + if (! pair || tmp_tables.push_front(pair, thd->mem_root)) + goto revert_rename; + pair->from= ren_table; + pair->to= new_table; + + if (do_rename_temporary(thd, ren_table, new_table)) + goto revert_rename; + } + else + { + int error; + rename_param param; + error= check_rename(thd, ¶m, ren_table, &new_table->db, + &new_table->table_name, + &new_table->alias, (skip_error || if_exists)); + if (error < 0) + continue; // Ignore rename (if exists) + if (error > 0) + goto revert_rename; + + if (do_rename(thd, ¶m, ddl_log_state, + ren_table, &new_table->db, + skip_error, force_if_exists)) + goto revert_rename; + } + } + DBUG_RETURN(0); + +revert_rename: + /* Revert temporary tables. Normal tables are reverted in the caller */ + List_iterator_fast it(tmp_tables); + while (TABLE_PAIR *pair= it++) + do_rename_temporary(thd, pair->to, pair->from); + + DBUG_RETURN(1); +} diff --git a/sql/sql_rename.h b/sql/sql_rename.h new file mode 100644 index 00000000..1f5f94b0 --- /dev/null +++ b/sql/sql_rename.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_RENAME_INCLUDED +#define SQL_RENAME_INCLUDED + +class THD; +struct TABLE_LIST; + +bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent, + bool if_exists); + +#endif /* SQL_RENAME_INCLUDED */ diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc new file mode 100644 index 00000000..8bde0f3b --- /dev/null +++ b/sql/sql_repl.cc @@ -0,0 +1,4851 @@ +/* Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_base.h" +#include "sql_parse.h" // check_access +#ifdef HAVE_REPLICATION + +#include "rpl_mi.h" +#include "rpl_rli.h" +#include "sql_repl.h" +#include "log_event.h" +#include "rpl_filter.h" +#include +#include "debug_sync.h" +#include "semisync_master.h" +#include "semisync_slave.h" +#include "mysys_err.h" + + +enum enum_gtid_until_state { + GTID_UNTIL_NOT_DONE, + GTID_UNTIL_STOP_AFTER_STANDALONE, + GTID_UNTIL_STOP_AFTER_TRANSACTION +}; + + +int max_binlog_dump_events = 0; // unlimited +my_bool opt_sporadic_binlog_dump_fail = 0; +#ifndef DBUG_OFF +static int binlog_dump_count = 0; +#endif + +extern TYPELIB binlog_checksum_typelib; + + +static int +fake_event_header(String* packet, Log_event_type event_type, ulong extra_len, + my_bool *do_checksum, ha_checksum *crc, const char** errmsg, + enum enum_binlog_checksum_alg checksum_alg_arg, uint32 end_pos) +{ + char header[LOG_EVENT_HEADER_LEN]; + ulong event_len; + + *do_checksum= checksum_alg_arg != BINLOG_CHECKSUM_ALG_OFF && + checksum_alg_arg != BINLOG_CHECKSUM_ALG_UNDEF; + + /* + 'when' (the timestamp) is set to 0 so that slave could distinguish between + real and fake Rotate events (if necessary) + */ + memset(header, 0, 4); + header[EVENT_TYPE_OFFSET] = (uchar)event_type; + event_len= LOG_EVENT_HEADER_LEN + extra_len + + (*do_checksum ? BINLOG_CHECKSUM_LEN : 0); + int4store(header + SERVER_ID_OFFSET, global_system_variables.server_id); + int4store(header + EVENT_LEN_OFFSET, event_len); + int2store(header + FLAGS_OFFSET, LOG_EVENT_ARTIFICIAL_F); + // TODO: check what problems this may cause and fix them + int4store(header + LOG_POS_OFFSET, end_pos); + if (packet->append(header, sizeof(header))) + { + *errmsg= "Failed due to out-of-memory writing event"; + return -1; + } + if (*do_checksum) + { + *crc= my_checksum(0, (uchar*)header, sizeof(header)); + } + return 0; +} + + +static int +fake_event_footer(String *packet, my_bool do_checksum, ha_checksum crc, const char **errmsg) +{ + if (do_checksum) + { + char b[BINLOG_CHECKSUM_LEN]; + int4store(b, crc); + if (packet->append(b, sizeof(b))) + { + *errmsg= "Failed due to out-of-memory writing event checksum"; + return -1; + } + } + return 0; +} + + +static int +fake_event_write(NET *net, String *packet, const char **errmsg) +{ + if (my_net_write(net, (uchar*) packet->ptr(), packet->length())) + { + *errmsg = "failed on my_net_write()"; + return -1; + } + return 0; +} + + +/* + Helper structure, used to pass miscellaneous info from mysql_binlog_send() + into the helper functions that it calls. +*/ +struct binlog_send_info { + rpl_binlog_state until_binlog_state; + slave_connection_state gtid_state; + THD *thd; + NET *net; + String *packet; + char *const log_file_name; // ptr/alias to linfo.log_file_name + slave_connection_state *until_gtid_state; + slave_connection_state until_gtid_state_obj; + Format_description_log_event *fdev; + int mariadb_slave_capability; + enum_gtid_skip_type gtid_skip_group; + enum_gtid_until_state gtid_until_group; + ushort flags; + enum enum_binlog_checksum_alg current_checksum_alg; + bool slave_gtid_strict_mode; + bool send_fake_gtid_list; + bool slave_gtid_ignore_duplicates; + bool using_gtid_state; + + int error; + const char *errmsg; + char error_text[MAX_SLAVE_ERRMSG]; + rpl_gtid error_gtid; + + ulonglong heartbeat_period; + + /** start file/pos as requested by slave, for error message */ + char start_log_file_name[FN_REFLEN]; + my_off_t start_pos; + + /** last pos for error message */ + my_off_t last_pos; + +#ifndef DBUG_OFF + int left_events; + uint dbug_reconnect_counter; + ulong hb_info_counter; +#endif + + bool clear_initial_log_pos; + bool should_stop; + size_t dirlen; + + binlog_send_info(THD *thd_arg, String *packet_arg, ushort flags_arg, + char *lfn) + : thd(thd_arg), net(&thd_arg->net), packet(packet_arg), + log_file_name(lfn), until_gtid_state(NULL), fdev(NULL), + gtid_skip_group(GTID_SKIP_NOT), gtid_until_group(GTID_UNTIL_NOT_DONE), + flags(flags_arg), current_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), + slave_gtid_strict_mode(false), send_fake_gtid_list(false), + slave_gtid_ignore_duplicates(false), + error(0), + errmsg("Unknown error"), + heartbeat_period(0), +#ifndef DBUG_OFF + left_events(max_binlog_dump_events), + dbug_reconnect_counter(0), + hb_info_counter(0), +#endif + clear_initial_log_pos(false), + should_stop(false) + { + error_text[0] = 0; + bzero(&error_gtid, sizeof(error_gtid)); + until_binlog_state.init(); + } +}; + +// prototype +static int reset_transmit_packet(struct binlog_send_info *info, ushort flags, + ulong *ev_offset, const char **errmsg); + +/* + fake_rotate_event() builds a fake (=which does not exist physically in any + binlog) Rotate event, which contains the name of the binlog we are going to + send to the slave (because the slave may not know it if it just asked for + MASTER_LOG_FILE='', MASTER_LOG_POS=4). + < 4.0.14, fake_rotate_event() was called only if the requested pos was 4. + After this version we always call it, so that a 3.23.58 slave can rely on + it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has + zeros in the good positions which, by chance, make it possible for the 3.23 + slave to detect that this event is unexpected) (this is luck which happens + because the master and slave disagree on the size of the header of + Log_event). + + Relying on the event length of the Rotate event instead of these + well-placed zeros was not possible as Rotate events have a variable-length + part. +*/ + +static int fake_rotate_event(binlog_send_info *info, ulonglong position, + const char** errmsg, enum enum_binlog_checksum_alg checksum_alg_arg) +{ + DBUG_ENTER("fake_rotate_event"); + ulong ev_offset; + char buf[ROTATE_HEADER_LEN+100]; + my_bool do_checksum; + int err; + char* p = info->log_file_name+dirname_length(info->log_file_name); + uint ident_len = (uint) strlen(p); + String *packet= info->packet; + ha_checksum crc; + + /* reset transmit packet for the fake rotate event below */ + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) + DBUG_RETURN(1); + + if ((err= fake_event_header(packet, ROTATE_EVENT, + ident_len + ROTATE_HEADER_LEN, &do_checksum, + &crc, + errmsg, checksum_alg_arg, 0))) + { + info->error= ER_UNKNOWN_ERROR; + DBUG_RETURN(err); + } + + int8store(buf+R_POS_OFFSET,position); + packet->append(buf, ROTATE_HEADER_LEN); + packet->append(p, ident_len); + + if (do_checksum) + { + crc= my_checksum(crc, (uchar*)buf, ROTATE_HEADER_LEN); + crc= my_checksum(crc, (uchar*)p, ident_len); + } + + if ((err= fake_event_footer(packet, do_checksum, crc, errmsg)) || + (err= fake_event_write(info->net, packet, errmsg))) + { + info->error= ER_UNKNOWN_ERROR; + DBUG_RETURN(err); + } + DBUG_RETURN(0); +} + + +static int fake_gtid_list_event(binlog_send_info *info, + Gtid_list_log_event *glev, const char** errmsg, + uint32 current_pos) +{ + my_bool do_checksum; + int err; + ha_checksum crc; + char buf[128]; + String str(buf, sizeof(buf), system_charset_info); + String* packet= info->packet; + + str.length(0); + if (glev->to_packet(&str)) + { + info->error= ER_UNKNOWN_ERROR; + *errmsg= "Failed due to out-of-memory writing Gtid_list event"; + return -1; + } + if ((err= fake_event_header(packet, GTID_LIST_EVENT, + str.length(), &do_checksum, &crc, + errmsg, info->current_checksum_alg, current_pos))) + { + info->error= ER_UNKNOWN_ERROR; + return err; + } + + packet->append(str); + if (do_checksum) + { + crc= my_checksum(crc, (uchar*)str.ptr(), str.length()); + } + + if ((err= fake_event_footer(packet, do_checksum, crc, errmsg)) || + (err= fake_event_write(info->net, packet, errmsg))) + { + info->error= ER_UNKNOWN_ERROR; + return err; + } + + return 0; +} + + +/* + Reset thread transmit packet buffer for event sending + + This function allocates header bytes for event transmission, and + should be called before store the event data to the packet buffer. +*/ +static int reset_transmit_packet(binlog_send_info *info, ushort flags, + ulong *ev_offset, const char **errmsg) +{ + int ret= 0; + String *packet= &info->thd->packet; + + /* reserve and set default header */ + packet->length(0); + packet->set("\0", 1, &my_charset_bin); + + if (info->thd->semi_sync_slave) + { + if (repl_semisync_master.reserve_sync_header(packet)) + { + info->error= ER_UNKNOWN_ERROR; + *errmsg= "Failed to run hook 'reserve_header'"; + ret= 1; + } + } + + *ev_offset= packet->length(); + return ret; +} + +int get_user_var_int(const char *name, + long long int *value, int *null_value) +{ + bool null_val; + user_var_entry *entry= + (user_var_entry*) my_hash_search(¤t_thd->user_vars, + (uchar*) name, strlen(name)); + if (!entry) + return 1; + *value= entry->val_int(&null_val); + if (null_value) + *null_value= null_val; + return 0; +} + +inline bool is_semi_sync_slave() +{ + int null_value; + long long val= 0; + get_user_var_int("rpl_semi_sync_slave", &val, &null_value); + return val; +} + +static int send_file(THD *thd) +{ + NET* net = &thd->net; + int fd = -1, error = 1; + size_t bytes; + char fname[FN_REFLEN+1]; + const char *errmsg = 0; + int old_timeout; + unsigned long packet_len; + uchar buf[IO_SIZE]; // It's safe to alloc this + DBUG_ENTER("send_file"); + + /* + The client might be slow loading the data, give him wait_timeout to do + the job + */ + old_timeout= net->read_timeout; + my_net_set_read_timeout(net, thd->variables.net_wait_timeout); + + /* + We need net_flush here because the client will not know it needs to send + us the file name until it has processed the load event entry + */ + if (unlikely(net_flush(net))) + { + read_error: + errmsg = "while reading file name"; + goto err; + } + packet_len= my_net_read(net); + if (unlikely(packet_len == packet_error)) + goto read_error; + + // terminate with \0 for fn_format + *((char*)net->read_pos + packet_len) = 0; + fn_format(fname, (char*) net->read_pos + 1, "", "", 4); + // this is needed to make replicate-ignore-db + if (!strcmp(fname,"/dev/null")) + goto end; + + if ((fd= mysql_file_open(key_file_send_file, + fname, O_RDONLY, MYF(0))) < 0) + { + errmsg = "on open of file"; + goto err; + } + + while ((long) (bytes= mysql_file_read(fd, buf, IO_SIZE, MYF(0))) > 0) + { + if (my_net_write(net, buf, bytes)) + { + errmsg = "while writing data to client"; + goto err; + } + } + + end: + if (my_net_write(net, (uchar*) "", 0) || net_flush(net) || + (my_net_read(net) == packet_error)) + { + errmsg = "while negotiating file transfer close"; + goto err; + } + error = 0; + + err: + my_net_set_read_timeout(net, old_timeout); + if (fd >= 0) + mysql_file_close(fd, MYF(0)); + if (errmsg) + { + sql_print_error("Failed in send_file() %s", errmsg); + DBUG_PRINT("error", ("%s", errmsg)); + } + DBUG_RETURN(error); +} + + +/** + Internal to mysql_binlog_send() routine that recalculates checksum for + 1. FD event (asserted) that needs additional arrangement prior sending to slave. + 2. Start_encryption_log_event whose Ignored flag is set +TODO DBUG_ASSERT can be removed if this function is used for more general cases +*/ + +inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet, + ulong ev_offset) +{ + if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF || + checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF) + return; + /* recalculate the crc for this event */ + uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET); + ha_checksum crc; + DBUG_ASSERT((data_len == + LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN + + BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) || + (data_len == + LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH + + BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH + + BINLOG_CHECKSUM_LEN)); + crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len - + BINLOG_CHECKSUM_LEN); + int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc); +} + + +static user_var_entry * get_binlog_checksum_uservar(THD * thd) +{ + LEX_CSTRING name= { STRING_WITH_LEN("master_binlog_checksum")}; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry; +} + +/** + Function for calling in mysql_binlog_send + to check if slave initiated checksum-handshake. + + @param[in] thd THD to access a user variable + + @return TRUE if handshake took place, FALSE otherwise +*/ + +static bool is_slave_checksum_aware(THD * thd) +{ + DBUG_ENTER("is_slave_checksum_aware"); + user_var_entry *entry= get_binlog_checksum_uservar(thd); + DBUG_RETURN(entry? true : false); +} + +/** + Function for calling in mysql_binlog_send + to get the value of @@binlog_checksum of the master at + time of checksum-handshake. + + The value tells the master whether to compute or not, and the slave + to verify or not the first artificial Rotate event's checksum. + + @param[in] thd THD to access a user variable + + @return value of @@binlog_checksum alg according to + @c enum enum_binlog_checksum_alg +*/ + +static enum enum_binlog_checksum_alg get_binlog_checksum_value_at_connect(THD * thd) +{ + enum enum_binlog_checksum_alg ret; + + DBUG_ENTER("get_binlog_checksum_value_at_connect"); + user_var_entry *entry= get_binlog_checksum_uservar(thd); + if (!entry) + { + ret= BINLOG_CHECKSUM_ALG_UNDEF; + } + else + { + DBUG_ASSERT(entry->type == STRING_RESULT); + String str; + uint dummy_errors; + str.copy(entry->value, entry->length, &my_charset_bin, &my_charset_bin, + &dummy_errors); + ret= (enum_binlog_checksum_alg) + (find_type ((char*) str.ptr(), &binlog_checksum_typelib, 1) - 1); + DBUG_ASSERT(ret <= BINLOG_CHECKSUM_ALG_CRC32); // while it's just on CRC32 alg + } + DBUG_RETURN(ret); +} + + +/** + Set current_linfo + + Setting current_linfo needs to be done with LOCK_thd_data to ensure that + adjust_linfo_offsets doesn't use a structure that may be deleted. +*/ + +void THD::set_current_linfo(LOG_INFO *linfo) +{ + mysql_mutex_lock(&LOCK_thd_data); + current_linfo= linfo; + mysql_mutex_unlock(&LOCK_thd_data); +} + + +/* + Adjust the position pointer in the binary log file for all running slaves + + SYNOPSIS + adjust_linfo_offsets() + purge_offset Number of bytes removed from start of log index file + + NOTES + - This is called when doing a PURGE when we delete lines from the + index log file + + REQUIREMENTS + - Before calling this function, we have to ensure that no threads are + using any binary log file before purge_offset.a + + TODO + - Inform the slave threads that they should sync the position + in the binary log file with Relay_log_info::flush(). + Now they sync is done for next read. +*/ + +static my_bool adjust_callback(THD *thd, my_off_t *purge_offset) +{ + mysql_mutex_lock(&thd->LOCK_thd_data); + if (auto linfo= thd->current_linfo) + { + /* + Index file offset can be less that purge offset only if + we just started reading the index file. In that case + we have nothing to adjust + */ + if (linfo->index_file_offset < *purge_offset) + linfo->fatal= (linfo->index_file_offset != 0); + else + linfo->index_file_offset-= *purge_offset; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + return 0; +} + + +void adjust_linfo_offsets(my_off_t purge_offset) +{ + server_threads.iterate(adjust_callback, &purge_offset); +} + + +static my_bool log_in_use_callback(THD *thd, const char *log_name) +{ + my_bool result= 0; + mysql_mutex_lock(&thd->LOCK_thd_data); + if (auto linfo= thd->current_linfo) + result= !strcmp(log_name, linfo->log_file_name); + mysql_mutex_unlock(&thd->LOCK_thd_data); + return result; +} + + +bool log_in_use(const char* log_name) +{ + return server_threads.iterate(log_in_use_callback, log_name); +} + +bool purge_error_message(THD* thd, int res) +{ + uint errcode; + + if ((errcode= purge_log_get_error_code(res)) != 0) + { + my_message(errcode, ER_THD(thd, errcode), MYF(0)); + return TRUE; + } + my_ok(thd); + return FALSE; +} + + +/** + Execute a PURGE BINARY LOGS TO command. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @param to_log Name of the last log to purge. + + @retval FALSE success + @retval TRUE failure +*/ +bool purge_master_logs(THD* thd, const char* to_log) +{ + char search_file_name[FN_REFLEN]; + if (!mysql_bin_log.is_open()) + { + my_ok(thd); + return FALSE; + } + + mysql_bin_log.make_log_name(search_file_name, to_log); + return purge_error_message(thd, + mysql_bin_log.purge_logs(search_file_name, 0, 1, + 1, NULL)); +} + + +/** + Execute a PURGE BINARY LOGS BEFORE command. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @param purge_time Date before which logs should be purged. + + @retval FALSE success + @retval TRUE failure +*/ +bool purge_master_logs_before_date(THD* thd, time_t purge_time) +{ + if (!mysql_bin_log.is_open()) + { + my_ok(thd); + return 0; + } + return purge_error_message(thd, + mysql_bin_log.purge_logs_before_date(purge_time)); +} + +void set_read_error(binlog_send_info *info, int error) +{ + if (error == LOG_READ_EOF) + { + return; + } + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + switch (error) { + case LOG_READ_BOGUS: + info->errmsg= "bogus data in log event"; + break; + case LOG_READ_TOO_LARGE: + info->errmsg= "log event entry exceeded max_allowed_packet; " + "Increase max_allowed_packet on master"; + break; + case LOG_READ_IO: + info->errmsg= "I/O error reading log event"; + break; + case LOG_READ_MEM: + info->errmsg= "memory allocation failed reading log event"; + break; + case LOG_READ_TRUNC: + info->errmsg= "binlog truncated in the middle of event; " + "consider out of disk space on master"; + break; + case LOG_READ_CHECKSUM_FAILURE: + info->errmsg= "event read from binlog did not pass crc check"; + break; + case LOG_READ_DECRYPT: + info->errmsg= "event decryption failure"; + break; + default: + info->errmsg= "unknown error reading log event on the master"; + break; + } +} + + +/** + An auxiliary function for calling in mysql_binlog_send + to initialize the heartbeat timeout in waiting for a binlogged event. + + @param[in] thd THD to access a user variable + + @return heartbeat period an ulonglong of nanoseconds + or zero if heartbeat was not demanded by slave +*/ +static ulonglong get_heartbeat_period(THD * thd) +{ + bool null_value; + LEX_CSTRING name= { STRING_WITH_LEN("master_heartbeat_period")}; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry? entry->val_int(&null_value) : 0; +} + +/* + Lookup the capabilities of the slave, which it announces by setting a value + MARIA_SLAVE_CAPABILITY_XXX in @mariadb_slave_capability. + + Older MariaDB slaves, and other MySQL slaves, do not set + @mariadb_slave_capability, corresponding to a capability of + MARIA_SLAVE_CAPABILITY_UNKNOWN (0). +*/ +static int +get_mariadb_slave_capability(THD *thd) +{ + bool null_value; + const LEX_CSTRING name= { STRING_WITH_LEN("mariadb_slave_capability") }; + const user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry ? + (int)(entry->val_int(&null_value)) : MARIA_SLAVE_CAPABILITY_UNKNOWN; +} + + +/* + Get the value of the @slave_connect_state user variable into the supplied + String (this is the GTID connect state requested by the connecting slave). + + Returns false if error (ie. slave did not set the variable and does not + want to use GTID to set start position), true if success. +*/ +static bool +get_slave_connect_state(THD *thd, String *out_str) +{ + bool null_value; + + const LEX_CSTRING name= { STRING_WITH_LEN("slave_connect_state") }; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry && entry->val_str(&null_value, out_str, 0) && !null_value; +} + + +static bool +get_slave_gtid_strict_mode(THD *thd) +{ + bool null_value; + + const LEX_CSTRING name= { STRING_WITH_LEN("slave_gtid_strict_mode") }; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry && entry->val_int(&null_value) && !null_value; +} + + +static bool +get_slave_gtid_ignore_duplicates(THD *thd) +{ + bool null_value; + + const LEX_CSTRING name= { STRING_WITH_LEN("slave_gtid_ignore_duplicates") }; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry && entry->val_int(&null_value) && !null_value; +} + + +/* + Get the value of the @slave_until_gtid user variable into the supplied + String (this is the GTID position specified for START SLAVE UNTIL + master_gtid_pos='xxx'). + + Returns false if error (ie. slave did not set the variable and is not doing + START SLAVE UNTIL mater_gtid_pos='xxx'), true if success. +*/ +static bool +get_slave_until_gtid(THD *thd, String *out_str) +{ + bool null_value; + + const LEX_CSTRING name= { STRING_WITH_LEN("slave_until_gtid") }; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str, + name.length); + return entry && entry->val_str(&null_value, out_str, 0) && !null_value; +} + + +/* + Function prepares and sends repliation heartbeat event. + + @param net net object of THD + @param packet buffer to store the heartbeat instance + @param event_coordinates binlog file name and position of the last + real event master sent from binlog + + @note + Among three essential pieces of heartbeat data Log_event::when + is computed locally. + The error to send is serious and should force terminating + the dump thread. +*/ +static int send_heartbeat_event(binlog_send_info *info, + NET* net, String* packet, + const struct event_coordinates *coord, + enum enum_binlog_checksum_alg checksum_alg_arg) +{ + DBUG_ENTER("send_heartbeat_event"); + + ulong ev_offset; + char sub_header_buf[HB_SUB_HEADER_LEN]; + bool sub_header_in_use=false; + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) + DBUG_RETURN(1); + + char header[LOG_EVENT_HEADER_LEN]; + my_bool do_checksum= checksum_alg_arg != BINLOG_CHECKSUM_ALG_OFF && + checksum_alg_arg != BINLOG_CHECKSUM_ALG_UNDEF; + /* + 'when' (the timestamp) is set to 0 so that slave could distinguish between + real and fake Rotate events (if necessary) + */ + memset(header, 0, 4); // when + + header[EVENT_TYPE_OFFSET] = HEARTBEAT_LOG_EVENT; + + char* p= coord->file_name + dirname_length(coord->file_name); + + size_t ident_len = strlen(p); + size_t event_len = ident_len + LOG_EVENT_HEADER_LEN + + (do_checksum ? BINLOG_CHECKSUM_LEN : 0); + int4store(header + SERVER_ID_OFFSET, global_system_variables.server_id); + DBUG_EXECUTE_IF("simulate_pos_4G", + { + const_cast(coord)->pos= (UINT_MAX32 + (ulong)1); + DBUG_SET("-d, simulate_pos_4G"); + };); + if (coord->pos <= UINT_MAX32) + { + int4store(header + LOG_POS_OFFSET, coord->pos); // log_pos + } + else + { + // Set common_header.log_pos=0 to indicate its overflow + int4store(header + LOG_POS_OFFSET, 0); + sub_header_in_use= true; + int8store(sub_header_buf, coord->pos); + event_len+= HB_SUB_HEADER_LEN; + } + + int4store(header + EVENT_LEN_OFFSET, event_len); + int2store(header + FLAGS_OFFSET, 0); + + packet->append(header, sizeof(header)); + if (sub_header_in_use) + packet->append(sub_header_buf, sizeof(sub_header_buf)); + packet->append(p, ident_len); // log_file_name + + if (do_checksum) + { + char b[BINLOG_CHECKSUM_LEN]; + ha_checksum crc= my_checksum(0, (uchar*) header, sizeof(header)); + if (sub_header_in_use) + crc= my_checksum(crc, (uchar*) sub_header_buf, sizeof(sub_header_buf)); + crc= my_checksum(crc, (uchar*) p, ident_len); + int4store(b, crc); + packet->append(b, sizeof(b)); + } + + if (my_net_write(net, (uchar*) packet->ptr(), packet->length()) || + net_flush(net)) + { + info->error= ER_UNKNOWN_ERROR; + DBUG_RETURN(-1); + } + + DBUG_RETURN(0); +} + + +struct binlog_file_entry +{ + binlog_file_entry *next; + LEX_CSTRING name; + my_off_t size; +}; + +/** + Read all binary logs and return as a list + + @param memroot Use this for mem_root calls + @param reverse If set filenames returned in latest first order (reverse + order than in the index file) + @param already_locked If set, index file is already locked. + + @return 0 error + # pointer to list + + @notes + index_file is always unlocked at return +*/ + +static binlog_file_entry * +get_binlog_list(MEM_ROOT *memroot, bool reverse= true, + bool already_locked= false) +{ + IO_CACHE *index_file; + char *fname, *buff, *end_pos; + binlog_file_entry *current_list= NULL, *current_link= NULL, *e; + DBUG_ENTER("get_binlog_list"); + + if (!mysql_bin_log.is_open()) + { + if (already_locked) + mysql_bin_log.unlock_index(); + my_error(ER_NO_BINARY_LOGGING, MYF(0)); + DBUG_RETURN(NULL); + } + if (!already_locked) + mysql_bin_log.lock_index(); + index_file=mysql_bin_log.get_index_file(); + reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0); + + if (!(buff= (char*) alloc_root(memroot, + (size_t) (index_file->end_of_file+1)))) + goto err; + if (my_b_read(index_file, (uchar*) buff, (size_t) index_file->end_of_file)) + { + my_error(EE_READ, MYF(ME_ERROR_LOG), my_filename(index_file->file), + my_errno); + goto err; + } + buff[index_file->end_of_file]= 0; // For strchr + mysql_bin_log.unlock_index(); + + /* The file ends with EOF or empty line */ + for (fname= buff; + (end_pos= strchr(fname, '\n')) && (end_pos - fname) > 1; + fname= end_pos+1) + { + end_pos[0]= '\0'; // remove the newline + if (!(e= (binlog_file_entry *) alloc_root(memroot, sizeof(*e)))) + DBUG_RETURN(NULL); + if (reverse) + { + e->next= current_list; + current_list= e; + } + else + { + e->next= NULL; + if (!current_link) + current_list= e; + else + current_link->next= e; + current_link= e; + } + e->name.str= fname; + e->name.length= (size_t) (end_pos - fname); + } + DBUG_RETURN(current_list); + +err: + mysql_bin_log.unlock_index(); + DBUG_RETURN(0); +} + + +/* + Check if every GTID requested by the slave is contained in this (or a later) + binlog file. Return true if so, false if not. + + We do the check with a single scan of the list of GTIDs, avoiding the need + to build an in-memory hash or stuff like that. + + We need to check that slave did not request GTID D-S-N1, when the + Gtid_list_log_event for this binlog file has D-S-N2 with N2 >= N1. + (Because this means that requested GTID is in an earlier binlog). + However, if the Gtid_list_log_event indicates that D-S-N1 is the very last + GTID for domain D in prior binlog files, then it is ok to start from the + very start of this binlog file. This special case is important, as it + allows to purge old logs even if some domain is unused for long. + + In addition, we need to check that we do not have a GTID D-S-N3 in the + Gtid_list_log_event where D is not present in the requested slave state at + all. Since if D is not in requested slave state, it means that slave needs + to start at the very first GTID in domain D. +*/ +static bool +contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) +{ + uint32 i; + + for (i= 0; i < glev->count; ++i) + { + uint32 gl_domain_id= glev->list[i].domain_id; + const rpl_gtid *gtid= st->find(gl_domain_id); + if (!gtid) + { + /* + The slave needs to start from the very beginning of this domain, which + is in an earlier binlog file. So we need to search back further. + */ + return false; + } + if (gtid->server_id == glev->list[i].server_id && + gtid->seq_no <= glev->list[i].seq_no) + { + /* + The slave needs to start after gtid, but it is contained in an earlier + binlog file. So we need to search back further, unless it was the very + last gtid logged for the domain in earlier binlog files. + */ + if (gtid->seq_no < glev->list[i].seq_no) + return false; + + /* + The slave requested D-S-N1, which happens to be the last GTID logged + in prior binlog files with same domain id D and server id S. + + The Gtid_list is kept sorted on domain_id, with the last GTID in each + domain_id group being the last one logged. So if this is the last GTID + within the domain_id group, then it is ok to start from the very + beginning of this group, per the special case explained in comment at + the start of this function. If not, then we need to search back further. + */ + if (i+1 < glev->count && gl_domain_id == glev->list[i+1].domain_id) + return false; + } + } + + return true; +} + + +static void +give_error_start_pos_missing_in_binlog(int *err, const char **errormsg, + rpl_gtid *error_gtid) +{ + rpl_gtid binlog_gtid; + + if (mysql_bin_log.lookup_domain_in_binlog_state(error_gtid->domain_id, + &binlog_gtid) && + binlog_gtid.seq_no >= error_gtid->seq_no) + { + *errormsg= "Requested slave GTID state not found in binlog. The slave has " + "probably diverged due to executing erroneous transactions"; + *err= ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2; + } + else + { + *errormsg= "Requested slave GTID state not found in binlog"; + *err= ER_GTID_POSITION_NOT_FOUND_IN_BINLOG; + } +} + + +/* + Check the start GTID state requested by the slave against our binlog state. + + Give an error if the slave requests something that we do not have in our + binlog. +*/ + +static int +check_slave_start_position(binlog_send_info *info, const char **errormsg, + rpl_gtid *error_gtid) +{ + uint32 i; + int err; + slave_connection_state::entry **delete_list= NULL; + uint32 delete_idx= 0; + slave_connection_state *st= &info->gtid_state; + + if (rpl_load_gtid_slave_state(info->thd)) + { + *errormsg= "Failed to load replication slave GTID state"; + err= ER_CANNOT_LOAD_SLAVE_GTID_STATE; + goto end; + } + + for (i= 0; i < st->hash.records; ++i) + { + slave_connection_state::entry *slave_gtid_entry= + (slave_connection_state::entry *)my_hash_element(&st->hash, i); + rpl_gtid *slave_gtid= &slave_gtid_entry->gtid; + rpl_gtid master_gtid; + rpl_gtid master_replication_gtid; + rpl_gtid start_gtid; + bool start_at_own_slave_pos= + rpl_global_gtid_slave_state->domain_to_gtid(slave_gtid->domain_id, + &master_replication_gtid) && + slave_gtid->server_id == master_replication_gtid.server_id && + slave_gtid->seq_no == master_replication_gtid.seq_no; + + if (mysql_bin_log.find_in_binlog_state(slave_gtid->domain_id, + slave_gtid->server_id, + &master_gtid) && + master_gtid.seq_no >= slave_gtid->seq_no) + { + /* + If connecting slave requests to start at the GTID we last applied when + we were ourselves a slave, then this GTID may not exist in our binlog + (in case of --log-slave-updates=0). So set the flag to disable the + error about missing GTID in the binlog in this case. + */ + if (start_at_own_slave_pos) + slave_gtid_entry->flags|= slave_connection_state::START_OWN_SLAVE_POS; + continue; + } + + if (!start_at_own_slave_pos) + { + rpl_gtid domain_gtid; + slave_connection_state *until_gtid_state= info->until_gtid_state; + rpl_gtid *until_gtid; + + if (!mysql_bin_log.lookup_domain_in_binlog_state(slave_gtid->domain_id, + &domain_gtid)) + { + /* + We do not have anything in this domain, neither in the binlog nor + in the slave state. So we are probably one master in a multi-master + setup, and this domain is served by a different master. + + But set a flag so that if we then ever _do_ happen to encounter + anything in this domain, then we will re-check that the requested + slave position exists, and give the error at that time if not. + */ + slave_gtid_entry->flags|= slave_connection_state::START_ON_EMPTY_DOMAIN; + continue; + } + + if (info->slave_gtid_ignore_duplicates && + domain_gtid.seq_no < slave_gtid->seq_no) + { + /* + When --gtid-ignore-duplicates, it is ok for the slave to request + something that we do not have (yet) - they might already have gotten + it through another path in a multi-path replication hierarchy. + */ + continue; + } + + if (until_gtid_state && + ( !(until_gtid= until_gtid_state->find(slave_gtid->domain_id)) || + (mysql_bin_log.find_in_binlog_state(until_gtid->domain_id, + until_gtid->server_id, + &master_gtid) && + master_gtid.seq_no >= until_gtid->seq_no))) + { + /* + The slave requested to start from a position that is not (yet) in + our binlog, but it also specified an UNTIL condition that _is_ in + our binlog (or a missing UNTIL, which means stop at the very + beginning). So the stop position is before the start position, and + we just delete the entry from the UNTIL hash to mark that this + domain has already reached the UNTIL condition. + */ + if(until_gtid) + until_gtid_state->remove(until_gtid); + continue; + } + + *error_gtid= *slave_gtid; + give_error_start_pos_missing_in_binlog(&err, errormsg, error_gtid); + goto end; + } + + /* + Ok, so connecting slave asked to start at a GTID that we do not have in + our binlog, but it was in fact the last GTID we applied earlier, when we + were acting as a replication slave. + + So this means that we were running as a replication slave without + --log-slave-updates, but now we switched to be a master. It is worth it + to handle this special case, as it allows users to run a simple + master -> slave without --log-slave-updates, and then exchange slave and + master, as long as they make sure the slave is caught up before switching. + */ + + /* + First check if we logged something ourselves as a master after being a + slave. This will be seen as a GTID with our own server_id and bigger + seq_no than what is in the slave state. + + If we did not log anything ourselves, then start the connecting slave + replicating from the current binlog end position, which in this case + corresponds to our replication slave state and hence what the connecting + slave is requesting. + */ + if (mysql_bin_log.find_in_binlog_state(slave_gtid->domain_id, + global_system_variables.server_id, + &start_gtid) && + start_gtid.seq_no > slave_gtid->seq_no) + { + /* + Start replication within this domain at the first GTID that we logged + ourselves after becoming a master. + + Remember that this starting point is in fact a "fake" GTID which may + not exists in the binlog, so that we do not complain about it in + --gtid-strict-mode. + */ + slave_gtid->server_id= global_system_variables.server_id; + slave_gtid_entry->flags|= slave_connection_state::START_OWN_SLAVE_POS; + } + else if (mysql_bin_log.lookup_domain_in_binlog_state(slave_gtid->domain_id, + &start_gtid)) + { + slave_gtid->server_id= start_gtid.server_id; + slave_gtid->seq_no= start_gtid.seq_no; + } + else + { + /* + We do not have _anything_ in our own binlog for this domain. Just + delete the entry in the slave connection state, then it will pick up + anything new that arrives. + + We just queue up the deletion and do it later, after the loop, so that + we do not mess up the iteration over the hash. + */ + if (!delete_list) + { + if (!(delete_list= (slave_connection_state::entry **) + my_malloc(PSI_INSTRUMENT_ME, + sizeof(*delete_list) * st->hash.records, MYF(MY_WME)))) + { + *errormsg= "Out of memory while checking slave start position"; + err= ER_OUT_OF_RESOURCES; + goto end; + } + } + delete_list[delete_idx++]= slave_gtid_entry; + } + } + + /* Do any delayed deletes from the hash. */ + if (delete_list) + { + for (i= 0; i < delete_idx; ++i) + st->remove(&(delete_list[i]->gtid)); + } + err= 0; + +end: + if (delete_list) + my_free(delete_list); + return err; +} + +/* + Find the name of the binlog file to start reading for a slave that connects + using GTID state. + + Returns the file name in out_name, which must be of size at least FN_REFLEN. + + Returns NULL on ok, error message on error. + + In case of non-error return, the returned binlog file is guaranteed to + contain the first event to be transmitted to the slave for every domain + present in our binlogs. It is still necessary to skip all GTIDs up to + and including the GTID requested by slave within each domain. + + However, as a special case, if the event to be sent to the slave is the very + first event (within that domain) in the returned binlog, then nothing should + be skipped, so that domain is deleted from the passed in slave connection + state. + + This is necessary in case the slave requests a GTID within a replication + domain that has long been inactive. The binlog file containing that GTID may + have been long since purged. However, as long as no GTIDs after that have + been purged, we have the GTID requested by slave in the Gtid_list_log_event + of the latest binlog. So we can start from there, as long as we delete the + corresponding entry in the slave state so we do not wrongly skip any events + that might turn up if that domain becomes active again, vainly looking for + the requested GTID that was already purged. +*/ +static const char * +gtid_find_binlog_file(slave_connection_state *state, char *out_name, + slave_connection_state *until_gtid_state) +{ + MEM_ROOT memroot; + binlog_file_entry *list; + Gtid_list_log_event *glev= NULL; + const char *errormsg= NULL; + char buf[FN_REFLEN]; + + init_alloc_root(PSI_INSTRUMENT_ME, &memroot, + 10*(FN_REFLEN+sizeof(binlog_file_entry)), 0, + MYF(MY_THREAD_SPECIFIC)); + if (!(list= get_binlog_list(&memroot))) + { + errormsg= "Out of memory while looking for GTID position in binlog"; + goto end; + } + + while (list) + { + File file; + IO_CACHE cache; + + if (!list->next) + { + /* + It should be safe to read the currently used binlog, as we will only + read the header part that is already written. + + But if that does not work on windows, then we will need to cache the + event somewhere in memory I suppose - that could work too. + */ + } + /* + Read the Gtid_list_log_event at the start of the binlog file to + get the binlog state. + */ + if (normalize_binlog_name(buf, list->name.str, false)) + { + errormsg= "Failed to determine binlog file name while looking for " + "GTID position in binlog"; + goto end; + } + bzero((char*) &cache, sizeof(cache)); + if (unlikely((file= open_binlog(&cache, buf, &errormsg)) == (File)-1)) + goto end; + errormsg= get_gtid_list_event(&cache, &glev); + end_io_cache(&cache); + mysql_file_close(file, MYF(MY_WME)); + if (unlikely(errormsg)) + goto end; + + if (!glev || contains_all_slave_gtid(state, glev)) + { + strmake(out_name, buf, FN_REFLEN); + + if (glev) + { + uint32 i; + + /* + As a special case, we allow to start from binlog file N if the + requested GTID is the last event (in the corresponding domain) in + binlog file (N-1), but then we need to remove that GTID from the slave + state, rather than skipping events waiting for it to turn up. + + If slave is doing START SLAVE UNTIL, check for any UNTIL conditions + that are already included in a previous binlog file. Delete any such + from the UNTIL hash, to mark that such domains have already reached + their UNTIL condition. + */ + for (i= 0; i < glev->count; ++i) + { + const rpl_gtid *gtid= state->find(glev->list[i].domain_id); + if (!gtid) + { + /* + Contains_all_slave_gtid() returns false if there is any domain in + Gtid_list_event which is not in the requested slave position. + + We may delete a domain from the slave state inside this loop, but + we only do this when it is the very last GTID logged for that + domain in earlier binlogs, and then we can not encounter it in any + further GTIDs in the Gtid_list. + */ + DBUG_ASSERT(0); + } else if (gtid->server_id == glev->list[i].server_id && + gtid->seq_no == glev->list[i].seq_no) + { + /* + The slave requested to start from the very beginning of this + domain in this binlog file. So delete the entry from the state, + we do not need to skip anything. + */ + state->remove(gtid); + } + + if (until_gtid_state && + (gtid= until_gtid_state->find(glev->list[i].domain_id)) && + gtid->server_id == glev->list[i].server_id && + gtid->seq_no <= glev->list[i].seq_no) + { + /* + We've already reached the stop position in UNTIL for this domain, + since it is before the start position. + */ + until_gtid_state->remove(gtid); + } + } + } + + goto end; + } + delete glev; + glev= NULL; + list= list->next; + } + + /* We reached the end without finding anything. */ + errormsg= "Could not find GTID state requested by slave in any binlog " + "files. Probably the slave state is too old and required binlog files " + "have been purged."; + +end: + if (glev) + delete glev; + + free_root(&memroot, MYF(0)); + return errormsg; +} + + +/* + Given an old-style binlog position with file name and file offset, find the + corresponding gtid position. If the offset is not at an event boundary, give + an error. + + Return NULL on ok, error message string on error. + + ToDo: Improve the performance of this by using binlog index files. +*/ +static const char * +gtid_state_from_pos(const char *name, uint32 offset, + slave_connection_state *gtid_state) +{ + IO_CACHE cache; + File file; + const char *errormsg= NULL; + bool found_gtid_list_event= false; + bool found_format_description_event= false; + bool valid_pos= false; + enum enum_binlog_checksum_alg current_checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF; + int err; + String packet; + Format_description_log_event *fdev= NULL; + + if (unlikely(gtid_state->load((const rpl_gtid *)NULL, 0))) + { + errormsg= "Internal error (out of memory?) initializing slave state " + "while scanning binlog to find start position"; + return errormsg; + } + + if (unlikely((file= open_binlog(&cache, name, &errormsg)) == (File)-1)) + return errormsg; + + if (!(fdev= new Format_description_log_event(3))) + { + errormsg= "Out of memory initializing format_description event " + "while scanning binlog to find start position"; + goto end; + } + + /* + First we need to find the initial GTID_LIST_EVENT. We need this even + if the offset is at the very start of the binlog file. + + But if we do not find any GTID_LIST_EVENT, then this is an old binlog + with no GTID information, so we return empty GTID state. + */ + for (;;) + { + Log_event_type typ; + uint32 cur_pos; + + cur_pos= (uint32)my_b_tell(&cache); + if (cur_pos == offset) + valid_pos= true; + if (found_format_description_event && found_gtid_list_event && + cur_pos >= offset) + break; + + packet.length(0); + err= Log_event::read_log_event(&cache, &packet, fdev, + opt_master_verify_checksum ? current_checksum_alg + : BINLOG_CHECKSUM_ALG_OFF); + if (unlikely(err)) + { + errormsg= "Could not read binlog while searching for slave start " + "position on master"; + goto end; + } + /* + The cast to uchar is needed to avoid a signed char being converted to a + negative number. + */ + typ= (Log_event_type)(uchar)packet[EVENT_TYPE_OFFSET]; + if (typ == FORMAT_DESCRIPTION_EVENT) + { + Format_description_log_event *tmp; + + if (unlikely(found_format_description_event)) + { + errormsg= "Duplicate format description log event found while " + "searching for old-style position in binlog"; + goto end; + } + + current_checksum_alg= get_checksum_alg((uchar*) packet.ptr(), + packet.length()); + found_format_description_event= true; + if (unlikely(!(tmp= new Format_description_log_event((uchar*) packet.ptr(), + packet.length(), + fdev)))) + { + errormsg= "Corrupt Format_description event found or out-of-memory " + "while searching for old-style position in binlog"; + goto end; + } + delete fdev; + fdev= tmp; + } + else if (typ == START_ENCRYPTION_EVENT) + { + uint sele_len = packet.length(); + if (current_checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + sele_len -= BINLOG_CHECKSUM_LEN; + } + Start_encryption_log_event sele((uchar*) packet.ptr(), sele_len, fdev); + if (fdev->start_decryption(&sele)) + { + errormsg= "Could not start decryption of binlog."; + goto end; + } + } + else if (unlikely(typ != FORMAT_DESCRIPTION_EVENT && + !found_format_description_event)) + { + errormsg= "Did not find format description log event while searching " + "for old-style position in binlog"; + goto end; + } + else if (typ == ROTATE_EVENT || typ == STOP_EVENT || + typ == BINLOG_CHECKPOINT_EVENT) + continue; /* Continue looking */ + else if (typ == GTID_LIST_EVENT) + { + rpl_gtid *gtid_list; + bool status; + uint32 list_len; + + if (unlikely(found_gtid_list_event)) + { + errormsg= "Found duplicate Gtid_list_log_event while scanning binlog " + "to find slave start position"; + goto end; + } + status= Gtid_list_log_event::peek(packet.ptr(), packet.length(), + current_checksum_alg, + >id_list, &list_len, fdev); + if (unlikely(status)) + { + errormsg= "Error reading Gtid_list_log_event while searching " + "for old-style position in binlog"; + goto end; + } + err= gtid_state->load(gtid_list, list_len); + my_free(gtid_list); + if (unlikely(err)) + { + errormsg= "Internal error (out of memory?) initialising slave state " + "while scanning binlog to find start position"; + goto end; + } + found_gtid_list_event= true; + } + else if (unlikely(!found_gtid_list_event)) + { + /* We did not find any Gtid_list_log_event, must be old binlog. */ + goto end; + } + else if (typ == GTID_EVENT) + { + rpl_gtid gtid; + uchar flags2; + if (unlikely(Gtid_log_event::peek((uchar*) packet.ptr(), packet.length(), + current_checksum_alg, >id.domain_id, + >id.server_id, >id.seq_no, &flags2, + fdev))) + { + errormsg= "Corrupt gtid_log_event found while scanning binlog to find " + "initial slave position"; + goto end; + } + if (unlikely(gtid_state->update(>id))) + { + errormsg= "Internal error (out of memory?) updating slave state while " + "scanning binlog to find start position"; + goto end; + } + } + } + + if (unlikely(!valid_pos)) + { + errormsg= "Slave requested incorrect position in master binlog. " + "Requested position %u in file '%s', but this position does not " + "correspond to the location of any binlog event."; + } + +end: + delete fdev; + end_io_cache(&cache); + mysql_file_close(file, MYF(MY_WME)); + + return errormsg; +} + + +int +gtid_state_from_binlog_pos(const char *in_name, uint32 pos, String *out_str) +{ + slave_connection_state gtid_state; + const char *lookup_name; + char name_buf[FN_REFLEN]; + LOG_INFO linfo; + + if (!mysql_bin_log.is_open()) + { + my_error(ER_NO_BINARY_LOGGING, MYF(0)); + return 1; + } + + if (in_name && in_name[0]) + { + mysql_bin_log.make_log_name(name_buf, in_name); + lookup_name= name_buf; + } + else + lookup_name= NULL; + linfo.index_file_offset= 0; + if (mysql_bin_log.find_log_pos(&linfo, lookup_name, 1)) + return 1; + + if (pos < 4) + pos= 4; + + if (gtid_state_from_pos(linfo.log_file_name, pos, >id_state) || + gtid_state.to_string(out_str)) + return 1; + return 0; +} + + +static bool +is_until_reached(binlog_send_info *info, ulong *ev_offset, + Log_event_type event_type, const char **errmsg, + uint32 current_pos) +{ + switch (info->gtid_until_group) + { + case GTID_UNTIL_NOT_DONE: + return false; + case GTID_UNTIL_STOP_AFTER_STANDALONE: + if (Log_event::is_part_of_group(event_type)) + return false; + break; + case GTID_UNTIL_STOP_AFTER_TRANSACTION: + if (event_type != XID_EVENT && event_type != XA_PREPARE_LOG_EVENT && + (event_type != QUERY_EVENT || /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */ + !Query_log_event::peek_is_commit_rollback + ((uchar*) info->packet->ptr() + *ev_offset, + info->packet->length() - *ev_offset, + info->current_checksum_alg))) + return false; + break; + } + + /* + The last event group has been sent, now the START SLAVE UNTIL condition + has been reached. + + Send a last fake Gtid_list_log_event with a flag set to mark that we + stop due to UNTIL condition. + */ + if (reset_transmit_packet(info, info->flags, ev_offset, errmsg)) + return true; + Gtid_list_log_event glev(&info->until_binlog_state, + Gtid_list_log_event::FLAG_UNTIL_REACHED); + if (fake_gtid_list_event(info, &glev, errmsg, current_pos)) + return true; + *errmsg= NULL; + return true; +} + + +/* + Helper function for mysql_binlog_send() to write an event down the slave + connection. + + Returns NULL on success, error message string on error. +*/ +static const char * +send_event_to_slave(binlog_send_info *info, Log_event_type event_type, + IO_CACHE *log, ulong ev_offset, rpl_gtid *error_gtid) +{ + my_off_t pos; + String* const packet= info->packet; + size_t len= packet->length(); + int mariadb_slave_capability= info->mariadb_slave_capability; + enum enum_binlog_checksum_alg current_checksum_alg= info->current_checksum_alg; + slave_connection_state *gtid_state= &info->gtid_state; + slave_connection_state *until_gtid_state= info->until_gtid_state; + bool need_sync= false; + + if (event_type == GTID_LIST_EVENT && + info->using_gtid_state && until_gtid_state) + { + rpl_gtid *gtid_list; + uint32 list_len; + bool err; + + if (ev_offset > len || + Gtid_list_log_event::peek(packet->ptr()+ev_offset, len - ev_offset, + current_checksum_alg, + >id_list, &list_len, info->fdev)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed to read Gtid_list_log_event: corrupt binlog"; + } + err= info->until_binlog_state.load(gtid_list, list_len); + my_free(gtid_list); + if (err) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed in internal GTID book-keeping: Out of memory"; + } + } + + /* Skip GTID event groups until we reach slave position within a domain_id. */ + if (event_type == GTID_EVENT && info->using_gtid_state) + { + uchar flags2; + slave_connection_state::entry *gtid_entry; + rpl_gtid *gtid; + + if (gtid_state->count() > 0 || until_gtid_state) + { + rpl_gtid event_gtid; + + if (ev_offset > len || + Gtid_log_event::peek((uchar*) packet->ptr()+ev_offset, len - ev_offset, + current_checksum_alg, + &event_gtid.domain_id, &event_gtid.server_id, + &event_gtid.seq_no, &flags2, info->fdev)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed to read Gtid_log_event: corrupt binlog"; + } + + DBUG_EXECUTE_IF("gtid_force_reconnect_at_10_1_100", + { + rpl_gtid *dbug_gtid; + if ((dbug_gtid= info->until_binlog_state.find_nolock(10,1)) && + dbug_gtid->seq_no == 100) + { + DBUG_SET("-d,gtid_force_reconnect_at_10_1_100"); + DBUG_SET_INITIAL("-d,gtid_force_reconnect_at_10_1_100"); + info->error= ER_UNKNOWN_ERROR; + return "DBUG-injected forced reconnect"; + } + }); + + if (info->until_binlog_state.update_nolock(&event_gtid, false)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed in internal GTID book-keeping: Out of memory"; + } + + if (gtid_state->count() > 0) + { + gtid_entry= gtid_state->find_entry(event_gtid.domain_id); + if (gtid_entry != NULL) + { + gtid= >id_entry->gtid; + if (gtid_entry->flags & slave_connection_state::START_ON_EMPTY_DOMAIN) + { + rpl_gtid master_gtid; + if (!mysql_bin_log.find_in_binlog_state(gtid->domain_id, + gtid->server_id, + &master_gtid) || + master_gtid.seq_no < gtid->seq_no) + { + int err; + const char *errormsg; + *error_gtid= *gtid; + give_error_start_pos_missing_in_binlog(&err, &errormsg, error_gtid); + info->error= err; + return errormsg; + } + gtid_entry->flags&= ~(uint32)slave_connection_state::START_ON_EMPTY_DOMAIN; + } + + /* Skip this event group if we have not yet reached slave start pos. */ + if (event_gtid.server_id != gtid->server_id || + event_gtid.seq_no <= gtid->seq_no) + info->gtid_skip_group= (flags2 & Gtid_log_event::FL_STANDALONE ? + GTID_SKIP_STANDALONE : GTID_SKIP_TRANSACTION); + if (event_gtid.server_id == gtid->server_id && + event_gtid.seq_no >= gtid->seq_no) + { + if (info->slave_gtid_strict_mode && + event_gtid.seq_no > gtid->seq_no && + !(gtid_entry->flags & slave_connection_state::START_OWN_SLAVE_POS)) + { + /* + In strict mode, it is an error if the slave requests to start + in a "hole" in the master's binlog: a GTID that does not + exist, even though both the prior and subsequent seq_no exists + for same domain_id and server_id. + */ + info->error= ER_GTID_START_FROM_BINLOG_HOLE; + *error_gtid= *gtid; + return "The binlog on the master is missing the GTID requested " + "by the slave (even though both a prior and a subsequent " + "sequence number does exist), and GTID strict mode is enabled."; + } + + /* + Send a fake Gtid_list event to the slave. + This allows the slave to update its current binlog position + so MASTER_POS_WAIT() and MASTER_GTID_WAIT() can work. + The fake event will be sent at the end of this event group. + */ + info->send_fake_gtid_list= true; + + /* + Delete this entry if we have reached slave start position (so we + will not skip subsequent events and won't have to look them up + and check). + */ + gtid_state->remove(gtid); + } + } + } + + if (until_gtid_state) + { + gtid= until_gtid_state->find(event_gtid.domain_id); + if (gtid == NULL) + { + /* + This domain already reached the START SLAVE UNTIL stop condition, + so skip this event group. + */ + info->gtid_skip_group = (flags2 & Gtid_log_event::FL_STANDALONE ? + GTID_SKIP_STANDALONE : GTID_SKIP_TRANSACTION); + } + else if (event_gtid.server_id == gtid->server_id && + event_gtid.seq_no >= gtid->seq_no) + { + /* + We have reached the stop condition. + Delete this domain_id from the hash, so we will skip all further + events in this domain and eventually stop when all domains are + done. + */ + uint64 until_seq_no= gtid->seq_no; + until_gtid_state->remove(gtid); + if (until_gtid_state->count() == 0) + info->gtid_until_group= (flags2 & Gtid_log_event::FL_STANDALONE ? + GTID_UNTIL_STOP_AFTER_STANDALONE : + GTID_UNTIL_STOP_AFTER_TRANSACTION); + if (event_gtid.seq_no > until_seq_no) + { + /* + The GTID in START SLAVE UNTIL condition is missing in our binlog. + This should normally not happen (user error), but since we can be + sure that we are now beyond the position that the UNTIL condition + should be in, we can just stop now. And we also need to skip this + event group (as it is beyond the UNTIL condition). + */ + info->gtid_skip_group = (flags2 & Gtid_log_event::FL_STANDALONE ? + GTID_SKIP_STANDALONE : GTID_SKIP_TRANSACTION); + } + } + } + } + } + + /* + Skip event group if we have not yet reached the correct slave GTID position. + + Note that slave that understands GTID can also tolerate holes, so there is + no need to supply dummy event. + */ + switch (info->gtid_skip_group) + { + case GTID_SKIP_STANDALONE: + if (!Log_event::is_part_of_group(event_type)) + info->gtid_skip_group= GTID_SKIP_NOT; + return NULL; + case GTID_SKIP_TRANSACTION: + if (event_type == XID_EVENT || event_type == XA_PREPARE_LOG_EVENT || + (event_type == QUERY_EVENT && /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */ + Query_log_event::peek_is_commit_rollback((uchar*) packet->ptr() + + ev_offset, + len - ev_offset, + current_checksum_alg))) + info->gtid_skip_group= GTID_SKIP_NOT; + return NULL; + case GTID_SKIP_NOT: + break; + } + + /* Do not send annotate_rows events unless slave requested it. */ + if (event_type == ANNOTATE_ROWS_EVENT && + !(info->flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)) + { + if (mariadb_slave_capability >= MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES) + { + /* This slave can tolerate events omitted from the binlog stream. */ + return NULL; + } + else if (mariadb_slave_capability >= MARIA_SLAVE_CAPABILITY_ANNOTATE) + { + /* + The slave did not request ANNOTATE_ROWS_EVENT (it does not need them as + it will not log them in its own binary log). However, it understands the + event and will just ignore it, and it would break if we omitted it, + leaving a hole in the binlog stream. So just send the event as-is. + */ + } + else + { + /* + The slave does not understand ANNOTATE_ROWS_EVENT. + + Older MariaDB slaves (and MySQL slaves) will break replication if there + are holes in the binlog stream (they will miscompute the binlog offset + and request the wrong position when reconnecting). + + So replace the event with a dummy event of the same size that will be + a no-operation on the slave. + */ + if (Query_log_event::dummy_event(packet, ev_offset, current_checksum_alg)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed to replace row annotate event with dummy: too small event."; + } + } + } + + /* + Replace GTID events with old-style BEGIN events for slaves that do not + understand global transaction IDs. For stand-alone events, where there is + no terminating COMMIT query event, omit the GTID event or replace it with + a dummy event, as appropriate. + */ + if (event_type == GTID_EVENT && + mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_GTID) + { + bool need_dummy= + mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES; + bool err= Gtid_log_event::make_compatible_event(packet, &need_dummy, + ev_offset, + current_checksum_alg); + if (err) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed to replace GTID event with backwards-compatible event: " + "corrupt event."; + } + if (!need_dummy) + return NULL; + } + + /* + Do not send binlog checkpoint or gtid list events to a slave that does not + understand it. + */ + if ((unlikely(event_type == BINLOG_CHECKPOINT_EVENT) && + mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT) || + (unlikely(event_type == GTID_LIST_EVENT) && + mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_GTID)) + { + if (mariadb_slave_capability >= MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES) + { + /* This slave can tolerate events omitted from the binlog stream. */ + return NULL; + } + else + { + /* + The slave does not understand BINLOG_CHECKPOINT_EVENT. Send a dummy + event instead, with same length so slave does not get confused about + binlog positions. + */ + if (Query_log_event::dummy_event(packet, ev_offset, current_checksum_alg)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return "Failed to replace binlog checkpoint or gtid list event with " + "dummy: too small event."; + } + } + } + + /* + Skip events with the @@skip_replication flag set, if slave requested + skipping of such events. + */ + if (info->thd->variables.option_bits & OPTION_SKIP_REPLICATION) + { + uint16 event_flags= uint2korr(&((*packet)[FLAGS_OFFSET + ev_offset])); + + if (event_flags & LOG_EVENT_SKIP_REPLICATION_F) + return NULL; + } + + THD_STAGE_INFO(info->thd, stage_sending_binlog_event_to_slave); + + pos= my_b_tell(log); + if (repl_semisync_master.update_sync_header(info->thd, + (uchar*) packet->c_ptr_safe(), + info->log_file_name + info->dirlen, + pos, &need_sync)) + { + info->error= ER_UNKNOWN_ERROR; + return "run 'before_send_event' hook failed"; + } + + if (my_net_write(info->net, (uchar*) packet->ptr(), len)) + { + info->error= ER_UNKNOWN_ERROR; + return "Failed on my_net_write()"; + } + + DBUG_PRINT("info", ("log event code %d", (*packet)[LOG_EVENT_OFFSET+1] )); + if (event_type == LOAD_EVENT) + { + if (send_file(info->thd)) + { + info->error= ER_UNKNOWN_ERROR; + return "failed in send_file()"; + } + } + + if (need_sync && repl_semisync_master.flush_net(info->thd, + packet->c_ptr_safe())) + { + info->error= ER_UNKNOWN_ERROR; + return "Failed to run hook 'after_send_event'"; + } + + return NULL; /* Success */ +} + +static int check_start_offset(binlog_send_info *info, + const char *log_file_name, + my_off_t pos) +{ + IO_CACHE log; + File file= -1; + + /** check that requested position is inside of file */ + if ((file=open_binlog(&log, log_file_name, &info->errmsg)) < 0) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return 1; + } + + if (pos < BIN_LOG_HEADER_SIZE || pos > my_b_filelength(&log)) + { + const char* msg= "Client requested master to start replication from " + "impossible position"; + + info->errmsg= NULL; // don't do further modifications of error_text + snprintf(info->error_text, sizeof(info->error_text), + "%s; the first event '%s' at %lld, " + "the last event read from '%s' at %d, " + "the last byte read from '%s' at %d.", + msg, + my_basename(info->start_log_file_name), pos, + my_basename(info->start_log_file_name), BIN_LOG_HEADER_SIZE, + my_basename(info->start_log_file_name), BIN_LOG_HEADER_SIZE); + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + goto err; + } + +err: + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + return info->error; +} + +static int init_binlog_sender(binlog_send_info *info, + LOG_INFO *linfo, + const char *log_ident, + my_off_t *pos) +{ + THD *thd= info->thd; + int error; + char str_buf[128]; + String connect_gtid_state(str_buf, sizeof(str_buf), system_charset_info); + char str_buf2[128]; + String slave_until_gtid_str(str_buf2, sizeof(str_buf2), system_charset_info); + connect_gtid_state.length(0); + + /** save start file/pos that was requested by slave */ + strmake(info->start_log_file_name, log_ident, + sizeof(info->start_log_file_name)); + info->start_pos= *pos; + + /** init last pos */ + info->last_pos= *pos; + + info->current_checksum_alg= get_binlog_checksum_value_at_connect(thd); + info->mariadb_slave_capability= get_mariadb_slave_capability(thd); + info->using_gtid_state= get_slave_connect_state(thd, &connect_gtid_state); + DBUG_EXECUTE_IF("simulate_non_gtid_aware_master", + info->using_gtid_state= false;); + + if (info->using_gtid_state) + { + info->slave_gtid_strict_mode= get_slave_gtid_strict_mode(thd); + info->slave_gtid_ignore_duplicates= get_slave_gtid_ignore_duplicates(thd); + if (get_slave_until_gtid(thd, &slave_until_gtid_str)) + info->until_gtid_state= &info->until_gtid_state_obj; + } + + DBUG_EXECUTE_IF("binlog_force_reconnect_after_22_events", + { + DBUG_SET("-d,binlog_force_reconnect_after_22_events"); + DBUG_SET_INITIAL("-d,binlog_force_reconnect_after_22_events"); + info->dbug_reconnect_counter= 22; + }); + + if (global_system_variables.log_warnings > 1) + { + sql_print_information( + "Start binlog_dump to slave_server(%lu), pos(%s, %lu), " + "using_gtid(%d), gtid('%s')", thd->variables.server_id, + log_ident, (ulong)*pos, info->using_gtid_state, + connect_gtid_state.c_ptr_safe()); + } + +#ifndef DBUG_OFF + if (opt_sporadic_binlog_dump_fail && (binlog_dump_count++ % 2)) + { + info->errmsg= "Master failed COM_BINLOG_DUMP to test if slave can recover"; + info->error= ER_UNKNOWN_ERROR; + return 1; + } +#endif + + if (!mysql_bin_log.is_open()) + { + info->errmsg= "Binary log is not open"; + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return 1; + } + + char search_file_name[FN_REFLEN]; + const char *name=search_file_name; + if (info->using_gtid_state) + { + if (info->gtid_state.load(connect_gtid_state.ptr(), + connect_gtid_state.length())) + { + info->errmsg= "Out of memory or malformed slave request when obtaining " + "start position from GTID state"; + info->error= ER_UNKNOWN_ERROR; + return 1; + } + if (info->until_gtid_state && + info->until_gtid_state->load(slave_until_gtid_str.ptr(), + slave_until_gtid_str.length())) + { + info->errmsg= "Out of memory or malformed slave request when " + "obtaining UNTIL position sent from slave"; + info->error= ER_UNKNOWN_ERROR; + return 1; + } + if (unlikely((error= check_slave_start_position(info, &info->errmsg, + &info->error_gtid)))) + { + info->error= error; + return 1; + } + if ((info->errmsg= gtid_find_binlog_file(&info->gtid_state, + search_file_name, + info->until_gtid_state))) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return 1; + } + + /* start from beginning of binlog file */ + *pos = 4; + } + else + { + if (log_ident[0]) + mysql_bin_log.make_log_name(search_file_name, log_ident); + else + name=0; // Find first log + } + linfo->index_file_offset= 0; + + if (mysql_bin_log.find_log_pos(linfo, name, 1)) + { + info->errmsg= "Could not find first log file name in binary " + "log index file"; + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + return 1; + } + + // set current pos too + linfo->pos= *pos; + // note: publish that we use file, before we open it + thd->set_current_linfo(linfo); + + if (check_start_offset(info, linfo->log_file_name, *pos)) + return 1; + + if (*pos > BIN_LOG_HEADER_SIZE) + { + /* + mark that first format descriptor with "log_pos=0", so the slave + should not increment master's binlog position + (rli->group_master_log_pos) + */ + info->clear_initial_log_pos= true; + } + + return 0; +} + +/** + * send format descriptor event for one binlog file + */ +static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log, + LOG_INFO *linfo, my_off_t start_pos) +{ + int error; + ulong ev_offset; + THD *thd= info->thd; + String *packet= info->packet; + Log_event_type event_type; + bool initial_log_pos= info->clear_initial_log_pos; + DBUG_ENTER("send_format_descriptor_event"); + + /** + * 1) reset fdev before each log-file + * 2) read first event, should be the format descriptor + * 3) read second event, *might* be start encryption event + * if it's isn't, seek back to undo this read + */ + if (info->fdev != NULL) + delete info->fdev; + + if (!(info->fdev= new Format_description_log_event(3))) + { + info->errmsg= "Out of memory initializing format_description event"; + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + DBUG_RETURN(1); + } + + /* reset transmit packet for the event read from binary log file */ + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) + DBUG_RETURN(1); + + /* + Try to find a Format_description_log_event at the beginning of + the binlog + */ + info->last_pos= my_b_tell(log); + error= Log_event::read_log_event(log, packet, info->fdev, + opt_master_verify_checksum + ? info->current_checksum_alg + : BINLOG_CHECKSUM_ALG_OFF); + linfo->pos= my_b_tell(log); + + if (unlikely(error)) + { + set_read_error(info, error); + DBUG_RETURN(1); + } + + event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET+ev_offset]); + + /* + The packet has offsets equal to the normal offsets in a + binlog event + ev_offset (the first ev_offset characters are + the header (default \0)). + */ + DBUG_PRINT("info", + ("Looked for a Format_description_log_event, " + "found event type %d", (int)event_type)); + + if (event_type != FORMAT_DESCRIPTION_EVENT) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + info->errmsg= "Failed to find format descriptor event in start of binlog"; + sql_print_warning("Failed to find format descriptor event in " + "start of binlog: %s", + info->log_file_name); + DBUG_RETURN(1); + } + + info->current_checksum_alg= get_checksum_alg((uchar*) packet->ptr() + + ev_offset, + packet->length() - ev_offset); + + DBUG_ASSERT(info->current_checksum_alg == BINLOG_CHECKSUM_ALG_OFF || + info->current_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + info->current_checksum_alg == BINLOG_CHECKSUM_ALG_CRC32); + + if (!is_slave_checksum_aware(thd) && + info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF && + info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + info->errmsg= "Slave can not handle replication events with the " + "checksum that master is configured to log"; + sql_print_warning("Master is configured to log replication events " + "with checksum, but will not send such events to " + "slaves that cannot process them"); + DBUG_RETURN(1); + } + + uint ev_len= packet->length() - ev_offset; + if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) + ev_len-= BINLOG_CHECKSUM_LEN; + + Format_description_log_event *tmp; + if (!(tmp= new Format_description_log_event((uchar*) packet->ptr() + ev_offset, + ev_len, info->fdev))) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + info->errmsg= "Corrupt Format_description event found " + "or out-of-memory"; + DBUG_RETURN(1); + } + delete info->fdev; + info->fdev= tmp; + + (*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F; + + if (initial_log_pos) + { + info->clear_initial_log_pos= false; + /* + mark that this event with "log_pos=0", so the slave + should not increment master's binlog position + (rli->group_master_log_pos) + */ + int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0); + + /* + if reconnect master sends FD event with `created' as 0 + to avoid destroying temp tables. + */ + int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+ + ST_CREATED_OFFSET+ev_offset, (ulong) 0); + + /* fix the checksum due to latest changes in header */ + fix_checksum(info->current_checksum_alg, packet, ev_offset); + } + else if (info->using_gtid_state) + { + /* + If this event has the field `created' set, then it will cause the + slave to delete all active temporary tables. This must not happen + if the slave received any later GTIDs in a previous connect, as + those GTIDs might have created new temporary tables that are still + needed. + + So here, we check if the starting GTID position was already + reached before this format description event. If not, we clear the + `created' flag to preserve temporary tables on the slave. (If the + slave connects at a position past this event, it means that it + already received and handled it in a previous connect). + */ + if (!info->gtid_state.is_pos_reached()) + { + int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+ + ST_CREATED_OFFSET+ev_offset, (ulong) 0); + fix_checksum(info->current_checksum_alg, packet, ev_offset); + } + } + + /* send it */ + if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length())) + { + info->errmsg= "Failed on my_net_write()"; + info->error= ER_UNKNOWN_ERROR; + DBUG_RETURN(1); + } + + /* + Read the following Start_encryption_log_event and send it to slave as + Ignorable_log_event. Although Slave doesn't need to know whether master's + binlog is encrypted but it needs to update slave log pos (for mysqlbinlog). + + If slave want to encrypt its logs, it should generate its own + random nonce, it should not use the one from the master. + */ + /* reset transmit packet for the event read from binary log file */ + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) + DBUG_RETURN(1); + info->last_pos= linfo->pos; + error= Log_event::read_log_event(log, packet, info->fdev, + opt_master_verify_checksum + ? info->current_checksum_alg + : BINLOG_CHECKSUM_ALG_OFF); + linfo->pos= my_b_tell(log); + + if (unlikely(error)) + { + set_read_error(info, error); + DBUG_RETURN(1); + } + + event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]); + if (event_type == START_ENCRYPTION_EVENT) + { + Start_encryption_log_event *sele= (Start_encryption_log_event *) + Log_event::read_log_event((uchar*) packet->ptr() + ev_offset, + packet->length() + - ev_offset, &info->errmsg, info->fdev, + BINLOG_CHECKSUM_ALG_OFF); + if (!sele) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + DBUG_RETURN(1); + } + + if (info->fdev->start_decryption(sele)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + info->errmsg= "Could not decrypt binlog: encryption key error"; + delete sele; + DBUG_RETURN(1); + } + /* Make it Ignorable_log_event and send it */ + (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F; + if (initial_log_pos) + int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0); + /* fix the checksum due to latest changes in header */ + fix_checksum(info->current_checksum_alg, packet, ev_offset); + if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length())) + { + info->errmsg= "Failed on my_net_write()"; + info->error= ER_UNKNOWN_ERROR; + DBUG_RETURN(1); + } + delete sele; + } + else if (start_pos == BIN_LOG_HEADER_SIZE) + { + /* + not Start_encryption_log_event - seek back. But only if + send_one_binlog_file() isn't going to seek anyway + */ + my_b_seek(log, info->last_pos); + linfo->pos= info->last_pos; + } + + + /** all done */ + DBUG_RETURN(0); +} + +static bool should_stop(binlog_send_info *info, bool kill_server_check= false) +{ + return + info->net->error || + info->net->vio == NULL || + (info->thd->killed && + (info->thd->killed != KILL_SERVER || kill_server_check)) || + info->error != 0 || + info->should_stop; +} + +/** + * wait for new events to enter binlog + * this function will send heartbeats while waiting if so configured + */ +static int wait_new_events(binlog_send_info *info, /* in */ + LOG_INFO* linfo, /* in */ + char binlog_end_pos_filename[], /* out */ + my_off_t *end_pos_ptr) /* out */ +{ + int ret= 1; + PSI_stage_info old_stage; + + mysql_bin_log.lock_binlog_end_pos(); + info->thd->ENTER_COND(mysql_bin_log.get_bin_log_cond(), + mysql_bin_log.get_binlog_end_pos_lock(), + &stage_master_has_sent_all_binlog_to_slave, + &old_stage); + + while (!should_stop(info, true)) + { + *end_pos_ptr= mysql_bin_log.get_binlog_end_pos(binlog_end_pos_filename); + if (strcmp(linfo->log_file_name, binlog_end_pos_filename) != 0) + { + /* there has been a log file switch, we don't need to wait */ + ret= 0; + break; + } + + if (linfo->pos < *end_pos_ptr) + { + /* there is data to read, we don't need to wait */ + ret= 0; + break; + } + + if (info->heartbeat_period) + { + struct timespec ts; + set_timespec_nsec(ts, info->heartbeat_period); + ret= mysql_bin_log.wait_for_update_binlog_end_pos(info->thd, &ts); + if (ret == ETIMEDOUT || ret == ETIME) + { + struct event_coordinates coord = { linfo->log_file_name, linfo->pos }; +#ifndef DBUG_OFF + const ulong hb_info_counter_limit = 3; + if (info->hb_info_counter < hb_info_counter_limit) + { + sql_print_information("master sends heartbeat message %s:%llu", + linfo->log_file_name, linfo->pos); + info->hb_info_counter++; + if (info->hb_info_counter == hb_info_counter_limit) + sql_print_information("the rest of heartbeat info skipped ..."); + } +#endif + mysql_bin_log.unlock_binlog_end_pos(); + ret= send_heartbeat_event(info, + info->net, info->packet, &coord, + info->current_checksum_alg); + mysql_bin_log.lock_binlog_end_pos(); + + if (ret) + { + ret= 1; // error + break; + } + /** + * re-read heartbeat period after each sent + */ + info->heartbeat_period= get_heartbeat_period(info->thd); + } + else if (ret != 0) + { + ret= 1; // error + break; + } + } + else + { + ret= mysql_bin_log.wait_for_update_binlog_end_pos(info->thd, NULL); + if (ret != 0 && ret != ETIMEDOUT && ret != ETIME) + { + ret= 1; // error + break; + } + } + } + + /* it releases the lock set in ENTER_COND */ + info->thd->EXIT_COND(&old_stage); + return ret; +} + +/** + * get end pos of current log file, this function + * will wait if there is nothing available + */ +static my_off_t get_binlog_end_pos(binlog_send_info *info, + IO_CACHE* log, + LOG_INFO* linfo) +{ + my_off_t log_pos= my_b_tell(log); + + /** + * get current binlog end pos + */ + mysql_bin_log.lock_binlog_end_pos(); + char binlog_end_pos_filename[FN_REFLEN]; + my_off_t end_pos= mysql_bin_log.get_binlog_end_pos(binlog_end_pos_filename); + mysql_bin_log.unlock_binlog_end_pos(); + + do + { + if (strcmp(binlog_end_pos_filename, linfo->log_file_name) != 0) + { + /** + * this file is not active, since it's not written to again, + * it safe to check file length and use that as end_pos + */ + end_pos= my_b_filelength(log); + + if (log_pos == end_pos) + return 0; // already at end of file inactive file + else + return end_pos; // return size of inactive file + } + else + { + /** + * this is the active file + */ + + if (log_pos < end_pos) + { + /** + * there is data available to read + */ + return end_pos; + } + + /** + * check if we should wait for more data + */ + if ((info->flags & BINLOG_DUMP_NON_BLOCK) || + (info->thd->variables.server_id == 0)) + { + info->should_stop= true; + return 0; + } + + /** + * flush data before waiting + */ + if (net_flush(info->net)) + { + info->errmsg= "failed on net_flush()"; + info->error= ER_UNKNOWN_ERROR; + return 1; + } + + if (wait_new_events(info, linfo, binlog_end_pos_filename, &end_pos)) + return 1; + } + } while (!should_stop(info)); + + return 0; +} + +/** + * This function sends events from one binlog file + * but only up until end_pos + * + * return 0 - OK + * else NOK + */ +static int send_events(binlog_send_info *info, IO_CACHE* log, LOG_INFO* linfo, + my_off_t end_pos) +{ + int error; + ulong ev_offset; + + String *packet= info->packet; + linfo->pos= my_b_tell(log); + info->last_pos= my_b_tell(log); + + log->end_of_file= end_pos; + while (linfo->pos < end_pos) + { + if (should_stop(info)) + return 0; + + /* reset the transmit packet for the event read from binary log + file */ + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) + return 1; + + info->last_pos= linfo->pos; + error= Log_event::read_log_event(log, packet, info->fdev, + opt_master_verify_checksum ? info->current_checksum_alg + : BINLOG_CHECKSUM_ALG_OFF); + linfo->pos= my_b_tell(log); + + if (unlikely(error)) + { + set_read_error(info, error); + return 1; + } + + Log_event_type event_type= + (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET+ev_offset]); + +#ifndef DBUG_OFF + if (info->dbug_reconnect_counter > 0) + { + --info->dbug_reconnect_counter; + if (info->dbug_reconnect_counter == 0) + { + info->errmsg= "DBUG-injected forced reconnect"; + info->error= ER_UNKNOWN_ERROR; + return 1; + } + } +#endif + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid", + { + if (event_type == XID_EVENT) + { + net_flush(info->net); + const char act[]= + "now " + "wait_for signal.continue"; + DBUG_ASSERT(debug_sync_service); + DBUG_ASSERT(!debug_sync_set_action( + info->thd, + STRING_WITH_LEN(act))); + + const char act2[]= + "now " + "signal signal.continued"; + DBUG_ASSERT(!debug_sync_set_action( + info->thd, + STRING_WITH_LEN(act2))); + } + }); +#endif + + if (event_type != START_ENCRYPTION_EVENT && + ((info->errmsg= send_event_to_slave(info, event_type, log, + ev_offset, &info->error_gtid)))) + return 1; + + if (unlikely(info->send_fake_gtid_list) && + info->gtid_skip_group == GTID_SKIP_NOT) + { + Gtid_list_log_event glev(&info->until_binlog_state, 0); + + if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg) || + fake_gtid_list_event(info, &glev, &info->errmsg, (uint32)my_b_tell(log))) + { + info->error= ER_UNKNOWN_ERROR; + return 1; + } + info->send_fake_gtid_list= false; + } + + if (info->until_gtid_state && + is_until_reached(info, &ev_offset, event_type, &info->errmsg, + (uint32)my_b_tell(log))) + { + if (info->errmsg) + { + info->error= ER_UNKNOWN_ERROR; + return 1; + } + info->should_stop= true; + return 0; + } + + /* Abort server before it sends the XID_EVENT */ + DBUG_EXECUTE_IF("crash_before_send_xid", + { + if (event_type == XID_EVENT) + { + my_sleep(2000000); + DBUG_SUICIDE(); + } + }); + } + + return 0; +} + +/** + * This function sends one binlog file to slave + * + * return 0 - OK + * 1 - NOK + */ +static int send_one_binlog_file(binlog_send_info *info, + IO_CACHE* log, + LOG_INFO* linfo, + my_off_t start_pos) +{ + mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock()); + + /* seek to the requested position, to start the requested dump */ + if (start_pos != BIN_LOG_HEADER_SIZE) + { + my_b_seek(log, start_pos); + linfo->pos= start_pos; + } + + while (!should_stop(info)) + { + /** + * get end pos of current log file, this function + * will wait if there is nothing available + */ + my_off_t end_pos= get_binlog_end_pos(info, log, linfo); + if (end_pos <= 1) + { + /** end of file or error */ + return (int)end_pos; + } + info->dirlen= dirname_length(info->log_file_name); + /** + * send events from current position up to end_pos + */ + if (send_events(info, log, linfo, end_pos)) + return 1; + DBUG_EXECUTE_IF("Notify_binlog_EOF", + { + const char act[]= "now signal eof_reached"; + DBUG_ASSERT(!debug_sync_set_action(current_thd, + STRING_WITH_LEN(act))); + };); + } + + return 1; +} + +void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, + ushort flags) +{ + LOG_INFO linfo; + + IO_CACHE log; + File file = -1; + String* const packet= &thd->packet; + + binlog_send_info infoobj(thd, packet, flags, linfo.log_file_name); + binlog_send_info *info= &infoobj; + bool has_transmit_started= false; + + int old_max_allowed_packet= thd->variables.max_allowed_packet; + thd->variables.max_allowed_packet= MAX_MAX_ALLOWED_PACKET; + + DBUG_ENTER("mysql_binlog_send"); + DBUG_PRINT("enter",("log_ident: '%s' pos: %ld", log_ident, (long) pos)); + + bzero((char*) &log,sizeof(log)); + + if (init_binlog_sender(info, &linfo, log_ident, &pos)) + goto err; + + has_transmit_started= true; + + /* Check if the dump thread is created by a slave with semisync enabled. */ + thd->semi_sync_slave = is_semi_sync_slave(); + + DBUG_ASSERT(pos == linfo.pos); + + if (repl_semisync_master.dump_start(thd, linfo.log_file_name, linfo.pos)) + { + info->errmsg= "Failed to run hook 'transmit_start'"; + info->error= ER_UNKNOWN_ERROR; + goto err; + } +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("simulate_delay_at_shutdown", + { + const char act[]= + "now " + "WAIT_FOR greetings_from_kill_mysql"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif + + /* + heartbeat_period from @master_heartbeat_period user variable + NOTE: this is initialized after transmit_start-hook so that + the hook can affect value of heartbeat period + */ + info->heartbeat_period= get_heartbeat_period(thd); + + while (!should_stop(info)) + { + /* + Tell the client about the log name with a fake Rotate event; + this is needed even if we also send a Format_description_log_event + just after, because that event does not contain the binlog's name. + Note that as this Rotate event is sent before + Format_description_log_event, the slave cannot have any info to + understand this event's format, so the header len of + Rotate_log_event is FROZEN (so in 5.0 it will have a header shorter + than other events except FORMAT_DESCRIPTION_EVENT). + Before 4.0.14 we called fake_rotate_event below only if (pos == + BIN_LOG_HEADER_SIZE), because if this is false then the slave + already knows the binlog's name. + Since, we always call fake_rotate_event; if the slave already knew + the log's name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is + useless but does not harm much. It is nice for 3.23 (>=.58) slaves + which test Rotate events to see if the master is 4.0 (then they + choose to stop because they can't replicate 4.0); by always calling + fake_rotate_event we are sure that 3.23.58 and newer will detect the + problem as soon as replication starts (BUG#198). + Always calling fake_rotate_event makes sending of normal + (=from-binlog) Rotate events a priori unneeded, but it is not so + simple: the 2 Rotate events are not equivalent, the normal one is + before the Stop event, the fake one is after. If we don't send the + normal one, then the Stop event will be interpreted (by existing 4.0 + slaves) as "the master stopped", which is wrong. So for safety, + given that we want minimum modification of 4.0, we send the normal + and fake Rotates. + */ + if (fake_rotate_event(info, pos, &info->errmsg, info->current_checksum_alg)) + { + /* + This error code is not perfect, as fake_rotate_event() does not + read anything from the binlog; if it fails it's because of an + error in my_net_write(), fortunately it will say so in errmsg. + */ + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + goto err; + } + + if ((file=open_binlog(&log, linfo.log_file_name, &info->errmsg)) < 0) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + goto err; + } + + if (send_format_descriptor_event(info, &log, &linfo, pos)) + { + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + goto err; + } + + /* + We want to corrupt the first event that will be sent to the slave. + But we do not want the corruption to happen early, eg. when client does + BINLOG_GTID_POS(). So test case sets a DBUG trigger which causes us to + set the real DBUG injection here. + */ + DBUG_EXECUTE_IF("corrupt_read_log_event2_set", + { + DBUG_SET("-d,corrupt_read_log_event2_set"); + DBUG_SET("+d,corrupt_read_log_event2"); + }); + + /* + Handle the case of START SLAVE UNTIL with an UNTIL condition already + fulfilled at the start position. + + We will send one event, the format_description, and then stop. + */ + if (info->until_gtid_state && info->until_gtid_state->count() == 0) + info->gtid_until_group= GTID_UNTIL_STOP_AFTER_STANDALONE; + + THD_STAGE_INFO(thd, stage_sending_binlog_event_to_slave); + if (send_one_binlog_file(info, &log, &linfo, pos)) + break; + + if (should_stop(info)) + break; + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("wait_after_binlog_EOF", + { + const char act[]= "now wait_for signal.rotate_finished"; + DBUG_ASSERT(!debug_sync_set_action(current_thd, + STRING_WITH_LEN(act))); + };); +#endif + + THD_STAGE_INFO(thd, + stage_finished_reading_one_binlog_switching_to_next_binlog); + if (mysql_bin_log.find_next_log(&linfo, 1)) + { + info->errmsg= "could not find next log"; + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + break; + } + + /** start from start of next file */ + pos= BIN_LOG_HEADER_SIZE; + + /** close current cache/file */ + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + } + +err: + THD_STAGE_INFO(thd, stage_waiting_to_finalize_termination); + if (has_transmit_started) + { + repl_semisync_master.dump_end(thd); + } + + if (info->thd->killed == KILL_SLAVE_SAME_ID) + { + info->errmsg= "A slave with the same server_uuid/server_id as this slave " + "has connected to the master"; + info->error= ER_SLAVE_SAME_ID; + } + + const bool binlog_open = my_b_inited(&log); + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + } + + thd->reset_current_linfo(); + thd->variables.max_allowed_packet= old_max_allowed_packet; + delete info->fdev; + + if (likely(info->error == 0)) + { + my_eof(thd); + DBUG_VOID_RETURN; + } + + if ((info->error == ER_MASTER_FATAL_ERROR_READING_BINLOG || + info->error == ER_SLAVE_SAME_ID) && binlog_open) + { + /* + detailing the fatal error message with coordinates + of the last position read. + */ + my_snprintf(info->error_text, sizeof(info->error_text), + "%s; the first event '%s' at %lld, " + "the last event read from '%s' at %lld, " + "the last byte read from '%s' at %lld.", + info->errmsg, + my_basename(info->start_log_file_name), info->start_pos, + my_basename(info->log_file_name), info->last_pos, + my_basename(info->log_file_name), linfo.pos); + } + else if (info->error == ER_GTID_POSITION_NOT_FOUND_IN_BINLOG) + { + my_snprintf(info->error_text, sizeof(info->error_text), + "Error: connecting slave requested to start from GTID " + "%u-%u-%llu, which is not in the master's binlog", + info->error_gtid.domain_id, + info->error_gtid.server_id, + info->error_gtid.seq_no); + /* Use this error code so slave will know not to try reconnect. */ + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + } + else if (info->error == ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2) + { + my_snprintf(info->error_text, sizeof(info->error_text), + "Error: connecting slave requested to start from GTID " + "%u-%u-%llu, which is not in the master's binlog. Since the " + "master's binlog contains GTIDs with higher sequence numbers, " + "it probably means that the slave has diverged due to " + "executing extra erroneous transactions", + info->error_gtid.domain_id, + info->error_gtid.server_id, + info->error_gtid.seq_no); + /* Use this error code so slave will know not to try reconnect. */ + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + } + else if (info->error == ER_GTID_START_FROM_BINLOG_HOLE) + { + my_snprintf(info->error_text, sizeof(info->error_text), + "The binlog on the master is missing the GTID %u-%u-%llu " + "requested by the slave (even though both a prior and a " + "subsequent sequence number does exist), and GTID strict mode " + "is enabled", + info->error_gtid.domain_id, + info->error_gtid.server_id, + info->error_gtid.seq_no); + /* Use this error code so slave will know not to try reconnect. */ + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + } + else if (info->error == ER_CANNOT_LOAD_SLAVE_GTID_STATE) + { + my_snprintf(info->error_text, sizeof(info->error_text), + "Failed to load replication slave GTID state from table %s.%s", + "mysql", rpl_gtid_slave_state_table_name.str); + info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG; + } + else if (info->errmsg != NULL) + strcpy(info->error_text, info->errmsg); + + my_message(info->error, info->error_text, MYF(0)); + + DBUG_VOID_RETURN; +} + + +/** + Execute a START SLAVE statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @param mi Pointer to Master_info object for the slave's IO thread. + + @param net_report If true, saves the exit status into thd->stmt_da. + + @retval 0 success + @retval 1 error + @retval -1 fatal error +*/ + +int start_slave(THD* thd , Master_info* mi, bool net_report) +{ + int slave_errno= 0; + int thread_mask; + char master_info_file_tmp[FN_REFLEN]; + char relay_log_info_file_tmp[FN_REFLEN]; + DBUG_ENTER("start_slave"); + + if (check_global_access(thd, PRIV_STMT_START_SLAVE)) + DBUG_RETURN(-1); + + create_logfile_name_with_suffix(master_info_file_tmp, + sizeof(master_info_file_tmp), + master_info_file, 0, + &mi->cmp_connection_name); + create_logfile_name_with_suffix(relay_log_info_file_tmp, + sizeof(relay_log_info_file_tmp), + relay_log_info_file, 0, + &mi->cmp_connection_name); + + mi->lock_slave_threads(); + if (mi->killed) + { + /* connection was deleted while we waited for lock_slave_threads */ + mi->unlock_slave_threads(); + my_error(WARN_NO_MASTER_INFO, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + DBUG_RETURN(-1); + } + + // Get a mask of _stopped_ threads + init_thread_mask(&thread_mask,mi,1 /* inverse */); + + if (thd->lex->mi.gtid_pos_str.str) + { + if (thread_mask != (SLAVE_IO|SLAVE_SQL)) + { + slave_errno= ER_SLAVE_WAS_RUNNING; + goto err; + } + if (thd->lex->slave_thd_opt) + { + slave_errno= ER_BAD_SLAVE_UNTIL_COND; + goto err; + } + if (mi->using_gtid == Master_info::USE_GTID_NO) + { + slave_errno= ER_UNTIL_REQUIRES_USING_GTID; + goto err; + } + } + + /* + Below we will start all stopped threads. But if the user wants to + start only one thread, do as if the other thread was running (as we + don't wan't to touch the other thread), so set the bit to 0 for the + other thread + */ + if (thd->lex->slave_thd_opt) + thread_mask&= thd->lex->slave_thd_opt; + if (thread_mask) //some threads are stopped, start them + { + if (init_master_info(mi,master_info_file_tmp,relay_log_info_file_tmp, 0, + thread_mask)) + slave_errno=ER_MASTER_INFO; + else if (!*mi->host) + { + slave_errno= ER_BAD_SLAVE; net_report= 0; + my_message(slave_errno, "Misconfigured slave: MASTER_HOST was not set; Fix in config file or with CHANGE MASTER TO", + MYF(0)); + } + else + { + /* + If we will start SQL thread we will care about UNTIL options If + not and they are specified we will ignore them and warn user + about this fact. + */ + if (thread_mask & SLAVE_SQL) + { + mysql_mutex_lock(&mi->rli.data_lock); + + if (thd->lex->mi.pos) + { + if (thd->lex->mi.relay_log_pos) + slave_errno=ER_BAD_SLAVE_UNTIL_COND; + mi->rli.until_condition= Relay_log_info::UNTIL_MASTER_POS; + mi->rli.until_log_pos= thd->lex->mi.pos; + /* + We don't check thd->lex->mi.log_file_name for NULL here + since it is checked in sql_yacc.yy + */ + strmake_buf(mi->rli.until_log_name, thd->lex->mi.log_file_name); + } + else if (thd->lex->mi.relay_log_pos) + { + mi->rli.until_condition= Relay_log_info::UNTIL_RELAY_POS; + mi->rli.until_log_pos= thd->lex->mi.relay_log_pos; + strmake_buf(mi->rli.until_log_name, thd->lex->mi.relay_log_name); + } + else if (thd->lex->mi.gtid_pos_str.str) + { + if (mi->rli.until_gtid_pos.load(thd->lex->mi.gtid_pos_str.str, + thd->lex->mi.gtid_pos_str.length)) + { + slave_errno= ER_INCORRECT_GTID_STATE; + mysql_mutex_unlock(&mi->rli.data_lock); + goto err; + } + mi->rli.until_condition= Relay_log_info::UNTIL_GTID; + } + else + mi->rli.clear_until_condition(); + + if (mi->rli.until_condition == Relay_log_info::UNTIL_MASTER_POS || + mi->rli.until_condition == Relay_log_info::UNTIL_RELAY_POS) + { + /* Preparing members for effective until condition checking */ + const char *p= fn_ext(mi->rli.until_log_name); + char *p_end; + if (*p) + { + //p points to '.' + mi->rli.until_log_name_extension= strtoul(++p,&p_end, 10); + /* + p_end points to the first invalid character. If it equals + to p, no digits were found, error. If it contains '\0' it + means conversion went ok. + */ + if (p_end==p || *p_end) + slave_errno=ER_BAD_SLAVE_UNTIL_COND; + } + else + slave_errno=ER_BAD_SLAVE_UNTIL_COND; + + /* mark the cached result of the UNTIL comparison as "undefined" */ + mi->rli.until_log_names_cmp_result= + Relay_log_info::UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + if (mi->rli.until_condition != Relay_log_info::UNTIL_NONE) + { + /* Issuing warning then started without --skip-slave-start */ + if (!opt_skip_slave_start) + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_MISSING_SKIP_SLAVE, + ER_THD(thd, ER_MISSING_SKIP_SLAVE)); + } + + mysql_mutex_unlock(&mi->rli.data_lock); + } + else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos) + push_warning(thd, + Sql_condition::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED, + ER_THD(thd, ER_UNTIL_COND_IGNORED)); + + if (!slave_errno) + slave_errno = start_slave_threads(thd, + 1, + 1 /* wait for start */, + mi, + master_info_file_tmp, + relay_log_info_file_tmp, + thread_mask); + } + } + else + { + /* no error if all threads are already started, only a warning */ + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING, + ER_THD(thd, ER_SLAVE_WAS_RUNNING)); + } + +err: + mi->unlock_slave_threads(); + thd_proc_info(thd, 0); + + if (slave_errno) + { + if (net_report) + my_error(slave_errno, MYF(0), + (int) mi->connection_name.length, + mi->connection_name.str); + DBUG_RETURN(slave_errno == ER_BAD_SLAVE ? -1 : 1); + } + + DBUG_RETURN(0); +} + + +/** + Execute a STOP SLAVE statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @param mi Pointer to Master_info object for the slave's IO thread. + + @param net_report If true, saves the exit status into thd->stmt_da. + + @retval 0 success + @retval 1 error + @retval -1 error +*/ + +int stop_slave(THD* thd, Master_info* mi, bool net_report ) +{ + int slave_errno; + DBUG_ENTER("stop_slave"); + DBUG_PRINT("enter",("Connection: %s", mi->connection_name.str)); + + if (check_global_access(thd, PRIV_STMT_STOP_SLAVE)) + DBUG_RETURN(-1); + THD_STAGE_INFO(thd, stage_killing_slave); + int thread_mask; + mi->lock_slave_threads(); + /* + Get a mask of _running_ threads. + We don't have to test for mi->killed as the thread_mask will take care + of checking if threads exists + */ + init_thread_mask(&thread_mask,mi,0 /* not inverse*/); + /* + Below we will stop all running threads. + But if the user wants to stop only one thread, do as if the other thread + was stopped (as we don't wan't to touch the other thread), so set the + bit to 0 for the other thread + */ + if (thd->lex->slave_thd_opt) + thread_mask &= thd->lex->slave_thd_opt; + + if (thread_mask) + { + slave_errno= terminate_slave_threads(mi,thread_mask, 0 /* get lock */); + } + else + { + //no error if both threads are already stopped, only a warning + slave_errno= 0; + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_NOT_RUNNING, + ER_THD(thd, ER_SLAVE_WAS_NOT_RUNNING)); + } + + mi->unlock_slave_threads(); + + if (slave_errno) + { + if (net_report) + my_message(slave_errno, ER_THD(thd, slave_errno), MYF(0)); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + + +/** + Execute a RESET SLAVE statement. + + @param thd Pointer to THD object of the client thread executing the + statement. + + @param mi Pointer to Master_info object for the slave. + + @retval 0 success + @retval 1 error +*/ +int reset_slave(THD *thd, Master_info* mi) +{ + MY_STAT stat_area; + char fname[FN_REFLEN]; + int thread_mask= 0, error= 0; + uint sql_errno=ER_UNKNOWN_ERROR; + const char* errmsg= "Unknown error occurred while resetting slave"; + char master_info_file_tmp[FN_REFLEN]; + char relay_log_info_file_tmp[FN_REFLEN]; + DBUG_ENTER("reset_slave"); + + mi->lock_slave_threads(); + if (mi->killed) + { + /* connection was deleted while we waited for lock_slave_threads */ + mi->unlock_slave_threads(); + my_error(WARN_NO_MASTER_INFO, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + DBUG_RETURN(-1); + } + + init_thread_mask(&thread_mask,mi,0 /* not inverse */); + if (thread_mask) // We refuse if any slave thread is running + { + mi->unlock_slave_threads(); + my_error(ER_SLAVE_MUST_STOP, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + DBUG_RETURN(ER_SLAVE_MUST_STOP); + } + + // delete relay logs, clear relay log coordinates + if (unlikely((error= purge_relay_logs(&mi->rli, thd, + 1 /* just reset */, + &errmsg)))) + { + sql_errno= ER_RELAY_LOG_FAIL; + goto err; + } + + if (mi->using_gtid != Master_info::USE_GTID_SLAVE_POS && + mi->master_supports_gtid) + { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_NOTE, WARN_OPTION_CHANGING, + ER_THD(thd, WARN_OPTION_CHANGING), "RESET SLAVE", "Using_Gtid", + mi->using_gtid_astext(mi->using_gtid), + mi->using_gtid_astext(Master_info::USE_GTID_SLAVE_POS)); + } + + /* Clear master's log coordinates and associated information */ + mi->clear_in_memory_info(thd->lex->reset_slave_info.all); + + /* + Reset errors (the idea is that we forget about the + old master). + */ + mi->clear_error(); + mi->rli.clear_error(); + mi->rli.clear_until_condition(); + mi->rli.clear_sql_delay(); + mi->rli.slave_skip_counter= 0; + + // close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0 + end_master_info(mi); + + end_relay_log_info(&mi->rli); + // and delete these two files + create_logfile_name_with_suffix(master_info_file_tmp, + sizeof(master_info_file_tmp), + master_info_file, 0, + &mi->cmp_connection_name); + create_logfile_name_with_suffix(relay_log_info_file_tmp, + sizeof(relay_log_info_file_tmp), + relay_log_info_file, 0, + &mi->cmp_connection_name); + + fn_format(fname, master_info_file_tmp, mysql_data_home, "", 4+32); + if (mysql_file_stat(key_file_master_info, fname, &stat_area, MYF(0)) && + mysql_file_delete(key_file_master_info, fname, MYF(MY_WME))) + { + error=1; + goto err; + } + else if (global_system_variables.log_warnings > 1) + sql_print_information("Deleted Master_info file '%s'.", fname); + + // delete relay_log_info_file + fn_format(fname, relay_log_info_file_tmp, mysql_data_home, "", 4+32); + if (mysql_file_stat(key_file_relay_log_info, fname, &stat_area, MYF(0)) && + mysql_file_delete(key_file_relay_log_info, fname, MYF(MY_WME))) + { + error=1; + goto err; + } + else if (global_system_variables.log_warnings > 1) + sql_print_information("Deleted Master_info file '%s'.", fname); + + if (rpl_semi_sync_slave_enabled) + repl_semisync_slave.reset_slave(mi); +err: + mi->unlock_slave_threads(); + if (unlikely(error)) + my_error(sql_errno, MYF(0), errmsg); + DBUG_RETURN(error); +} + +/* + + Kill all Binlog_dump threads which previously talked to the same slave + ("same" means with the same server id). Indeed, if the slave stops, if the + Binlog_dump thread is waiting (mysql_cond_wait) for binlog update, then it + will keep existing until a query is written to the binlog. If the master is + idle, then this could last long, and if the slave reconnects, we could have 2 + Binlog_dump threads in SHOW PROCESSLIST, until a query is written to the + binlog. To avoid this, when the slave reconnects and sends COM_BINLOG_DUMP, + the master kills any existing thread with the slave's server id (if this id + is not zero; it will be true for real slaves, but false for mysqlbinlog when + it sends COM_BINLOG_DUMP to get a remote binlog dump). + + SYNOPSIS + kill_zombie_dump_threads() + slave_server_id the slave's server id +*/ + +struct kill_callback_arg +{ + kill_callback_arg(uint32 id): slave_server_id(id), thd(0) {} + uint32 slave_server_id; + THD *thd; +}; + +static my_bool kill_callback(THD *thd, kill_callback_arg *arg) +{ + if (thd->get_command() == COM_BINLOG_DUMP && + thd->variables.server_id == arg->slave_server_id) + { + arg->thd= thd; + mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete + mysql_mutex_lock(&thd->LOCK_thd_data); + return 1; + } + return 0; +} + + +void kill_zombie_dump_threads(uint32 slave_server_id) +{ + kill_callback_arg arg(slave_server_id); + server_threads.iterate(kill_callback, &arg); + + if (arg.thd) + { + /* + Here we do not call kill_one_thread() as + it will be slow because it will iterate through the list + again. We just to do kill the thread ourselves. + */ + arg.thd->awake_no_mutex(KILL_SLAVE_SAME_ID); + mysql_mutex_unlock(&arg.thd->LOCK_thd_kill); + mysql_mutex_unlock(&arg.thd->LOCK_thd_data); + } +} + +/** + Get value for a string parameter with error checking + + Note that in case of error the original string should not be updated! + + @ret 0 ok + @ret 1 error +*/ + +static bool get_string_parameter(char *to, const char *from, size_t length, + const char *name, CHARSET_INFO *cs) +{ + if (from) // Empty paramaters allowed + { + size_t from_length= strlen(from); + size_t from_numchars= cs->numchars(from, from + from_length); + if (from_numchars > length / cs->mbmaxlen) + { + my_error(ER_WRONG_STRING_LENGTH, MYF(0), from, name, + (int) (length / cs->mbmaxlen)); + return 1; + } + memcpy(to, from, from_length+1); + } + return 0; +} + + +/** + Execute a CHANGE MASTER statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @param mi Pointer to Master_info object belonging to the slave's IO + thread. + + @param master_info_added Out parameter saying if the Master_info *mi was + added to the global list of masters. This is useful in error conditions + to know if caller should free Master_info *mi. + + @retval FALSE success + @retval TRUE error +*/ +bool change_master(THD* thd, Master_info* mi, bool *master_info_added) +{ + int thread_mask; + const char* errmsg= 0; + bool need_relay_log_purge= 1; + bool ret= FALSE; + char saved_host[HOSTNAME_LENGTH + 1]; + uint saved_port; + char saved_log_name[FN_REFLEN]; + Master_info::enum_using_gtid saved_using_gtid; + char master_info_file_tmp[FN_REFLEN]; + char relay_log_info_file_tmp[FN_REFLEN]; + my_off_t saved_log_pos; + LEX_MASTER_INFO* lex_mi= &thd->lex->mi; + DYNAMIC_ARRAY *do_ids, *ignore_ids; + + DBUG_ENTER("change_master"); + + DBUG_ASSERT(master_info_index); + mysql_mutex_assert_owner(&LOCK_active_mi); + + *master_info_added= false; + /* + We need to check if there is an empty master_host. Otherwise + change master succeeds, a master.info file is created containing + empty master_host string and when issuing: start slave; an error + is thrown stating that the server is not configured as slave. + (See BUG#28796). + */ + if (lex_mi->host && !*lex_mi->host) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "MASTER_HOST"); + DBUG_RETURN(TRUE); + } + if (master_info_index->check_duplicate_master_info(&lex_mi->connection_name, + lex_mi->host, + lex_mi->port)) + DBUG_RETURN(TRUE); + + mi->lock_slave_threads(); + if (mi->killed) + { + /* connection was deleted while we waited for lock_slave_threads */ + mi->unlock_slave_threads(); + my_error(WARN_NO_MASTER_INFO, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + DBUG_RETURN(TRUE); + } + + init_thread_mask(&thread_mask,mi,0 /*not inverse*/); + if (thread_mask) // We refuse if any slave thread is running + { + my_error(ER_SLAVE_MUST_STOP, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + ret= TRUE; + goto err; + } + + THD_STAGE_INFO(thd, stage_changing_master); + + create_logfile_name_with_suffix(master_info_file_tmp, + sizeof(master_info_file_tmp), + master_info_file, 0, + &mi->cmp_connection_name); + create_logfile_name_with_suffix(relay_log_info_file_tmp, + sizeof(relay_log_info_file_tmp), + relay_log_info_file, 0, + &mi->cmp_connection_name); + + /* if new Master_info doesn't exists, add it */ + if (!master_info_index->get_master_info(&mi->connection_name, + Sql_condition::WARN_LEVEL_NOTE)) + { + if (master_info_index->add_master_info(mi, TRUE)) + { + my_error(ER_MASTER_INFO, MYF(0), + (int) lex_mi->connection_name.length, + lex_mi->connection_name.str); + ret= TRUE; + goto err; + } + *master_info_added= true; + } + if (global_system_variables.log_warnings > 1) + sql_print_information("Master connection name: '%.*s' " + "Master_info_file: '%s' " + "Relay_info_file: '%s'", + (int) mi->connection_name.length, + mi->connection_name.str, + master_info_file_tmp, relay_log_info_file_tmp); + + if (init_master_info(mi, master_info_file_tmp, relay_log_info_file_tmp, 0, + thread_mask)) + { + my_error(ER_MASTER_INFO, MYF(0), + (int) lex_mi->connection_name.length, + lex_mi->connection_name.str); + ret= TRUE; + goto err; + } + + /* + Data lock not needed since we have already stopped the running threads, + and we have the hold on the run locks which will keep all threads that + could possibly modify the data structures from running + */ + + /* + Before processing the command, save the previous state. + */ + strmake_buf(saved_host, mi->host); + saved_port= mi->port; + strmake_buf(saved_log_name, mi->master_log_name); + saved_log_pos= mi->master_log_pos; + saved_using_gtid= mi->using_gtid; + + /* + If the user specified host or port without binlog or position, + reset binlog's name to FIRST and position to 4. + */ + + if ((lex_mi->host || lex_mi->port) && !lex_mi->log_file_name && !lex_mi->pos) + { + mi->master_log_name[0] = 0; + mi->master_log_pos= BIN_LOG_HEADER_SIZE; + } + + if (lex_mi->log_file_name) + strmake_buf(mi->master_log_name, lex_mi->log_file_name); + if (lex_mi->pos) + { + mi->master_log_pos= lex_mi->pos; + } + DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); + + if (get_string_parameter(mi->host, lex_mi->host, sizeof(mi->host)-1, + "MASTER_HOST", system_charset_info) || + get_string_parameter(mi->user, lex_mi->user, sizeof(mi->user)-1, + "MASTER_USER", system_charset_info) || + get_string_parameter(mi->password, lex_mi->password, + sizeof(mi->password)-1, "MASTER_PASSWORD", + &my_charset_bin)) + { + ret= TRUE; + goto err; + } + + if (lex_mi->port) + mi->port = lex_mi->port; + if (lex_mi->connect_retry) + mi->connect_retry = lex_mi->connect_retry; + if (lex_mi->heartbeat_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED) + mi->heartbeat_period = lex_mi->heartbeat_period; + else + mi->heartbeat_period= (float) MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD, + (slave_net_timeout/2.0)); + mi->received_heartbeats= 0; // counter lives until master is CHANGEd + + /* + Reset the last time server_id list if the current CHANGE MASTER + is mentioning IGNORE_SERVER_IDS= (...) + */ + if (lex_mi->repl_ignore_server_ids_opt == LEX_MASTER_INFO::LEX_MI_ENABLE) + { + /* Check if the list contains replicate_same_server_id */ + for (uint i= 0; i < lex_mi->repl_ignore_server_ids.elements; i ++) + { + ulong s_id; + get_dynamic(&lex_mi->repl_ignore_server_ids, (uchar*) &s_id, i); + if (s_id == global_system_variables.server_id && replicate_same_server_id) + { + my_error(ER_SLAVE_IGNORE_SERVER_IDS, MYF(0), static_cast(s_id)); + ret= TRUE; + goto err; + } + } + + /* All ok. Update the old server ids with the new ones. */ + update_change_master_ids(&lex_mi->repl_ignore_server_ids, + &mi->ignore_server_ids); + } + + if (lex_mi->ssl != LEX_MASTER_INFO::LEX_MI_UNCHANGED) + mi->ssl= (lex_mi->ssl == LEX_MASTER_INFO::LEX_MI_ENABLE); + + if (lex_mi->sql_delay != -1) + mi->rli.set_sql_delay(lex_mi->sql_delay); + + if (lex_mi->ssl_verify_server_cert != LEX_MASTER_INFO::LEX_MI_UNCHANGED) + mi->ssl_verify_server_cert= + (lex_mi->ssl_verify_server_cert == LEX_MASTER_INFO::LEX_MI_ENABLE); + + if (lex_mi->ssl_ca) + strmake_buf(mi->ssl_ca, lex_mi->ssl_ca); + if (lex_mi->ssl_capath) + strmake_buf(mi->ssl_capath, lex_mi->ssl_capath); + if (lex_mi->ssl_cert) + strmake_buf(mi->ssl_cert, lex_mi->ssl_cert); + if (lex_mi->ssl_cipher) + strmake_buf(mi->ssl_cipher, lex_mi->ssl_cipher); + if (lex_mi->ssl_key) + strmake_buf(mi->ssl_key, lex_mi->ssl_key); + if (lex_mi->ssl_crl) + strmake_buf(mi->ssl_crl, lex_mi->ssl_crl); + if (lex_mi->ssl_crlpath) + strmake_buf(mi->ssl_crlpath, lex_mi->ssl_crlpath); + +#ifndef HAVE_OPENSSL + if (lex_mi->ssl || lex_mi->ssl_ca || lex_mi->ssl_capath || + lex_mi->ssl_cert || lex_mi->ssl_cipher || lex_mi->ssl_key || + lex_mi->ssl_verify_server_cert || lex_mi->ssl_crl || lex_mi->ssl_crlpath) + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SLAVE_IGNORED_SSL_PARAMS, + ER_THD(thd, ER_SLAVE_IGNORED_SSL_PARAMS)); +#endif + + if (lex_mi->relay_log_name) + { + need_relay_log_purge= 0; + char relay_log_name[FN_REFLEN]; + mi->rli.relay_log.make_log_name(relay_log_name, lex_mi->relay_log_name); + strmake_buf(mi->rli.group_relay_log_name, relay_log_name); + strmake_buf(mi->rli.event_relay_log_name, relay_log_name); + } + + if (lex_mi->relay_log_pos) + { + need_relay_log_purge= 0; + mi->rli.group_relay_log_pos= mi->rli.event_relay_log_pos= lex_mi->relay_log_pos; + } + + if (lex_mi->use_gtid_opt == LEX_MASTER_INFO::LEX_GTID_SLAVE_POS) + mi->using_gtid= Master_info::USE_GTID_SLAVE_POS; + else if (lex_mi->use_gtid_opt == LEX_MASTER_INFO::LEX_GTID_CURRENT_POS) + { + mi->using_gtid= Master_info::USE_GTID_CURRENT_POS; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX), + "master_use_gtid=current_pos", "master_demote_to_slave=1"); + } + else if (lex_mi->use_gtid_opt == LEX_MASTER_INFO::LEX_GTID_NO || + lex_mi->log_file_name || lex_mi->pos || + lex_mi->relay_log_name || lex_mi->relay_log_pos) + { + if (lex_mi->use_gtid_opt != LEX_MASTER_INFO::LEX_GTID_NO) + { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_NOTE, WARN_OPTION_CHANGING, + ER_THD(thd, WARN_OPTION_CHANGING), "CHANGE MASTER TO", "Using_Gtid", + mi->using_gtid_astext(mi->using_gtid), + mi->using_gtid_astext(Master_info::USE_GTID_NO)); + } + mi->using_gtid= Master_info::USE_GTID_NO; + } + + /* + Warn about ignored options if there are GTID/log coordinate option + conflicts + */ + if (mi->using_gtid != Master_info::USE_GTID_NO) + { + if (lex_mi->log_file_name) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), "MASTER_LOG_FILE"); + if (lex_mi->pos) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), "MASTER_LOG_POS"); + if (lex_mi->relay_log_name) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), "RELAY_LOG_FILE"); + if (lex_mi->relay_log_pos) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), "RELAY_LOG_POS"); + } + + do_ids= ((lex_mi->repl_do_domain_ids_opt == + LEX_MASTER_INFO::LEX_MI_ENABLE) ? + &lex_mi->repl_do_domain_ids : NULL); + + ignore_ids= ((lex_mi->repl_ignore_domain_ids_opt == + LEX_MASTER_INFO::LEX_MI_ENABLE) ? + &lex_mi->repl_ignore_domain_ids : NULL); + + /* + Note: mi->using_gtid stores the previous state in case no MASTER_USE_GTID + is specified. + */ + if (mi->domain_id_filter.update_ids(do_ids, ignore_ids, mi->using_gtid)) + { + my_error(ER_MASTER_INFO, MYF(0), + (int) lex_mi->connection_name.length, + lex_mi->connection_name.str); + ret= TRUE; + goto err; + } + + /* + If user did specify neither host nor port nor any log name nor any log + pos, i.e. he specified only user/password/master_connect_retry, he probably + wants replication to resume from where it had left, i.e. from the + coordinates of the **SQL** thread (imagine the case where the I/O is ahead + of the SQL; restarting from the coordinates of the I/O would lose some + events which is probably unwanted when you are just doing minor changes + like changing master_connect_retry). + A side-effect is that if only the I/O thread was started, this thread may + restart from ''/4 after the CHANGE MASTER. That's a minor problem (it is a + much more unlikely situation than the one we are fixing here). + Note: coordinates of the SQL thread must be read here, before the + 'if (need_relay_log_purge)' block which resets them. + */ + if (!lex_mi->host && !lex_mi->port && + !lex_mi->log_file_name && !lex_mi->pos && + need_relay_log_purge) + { + /* + Sometimes mi->rli.master_log_pos == 0 (it happens when the SQL thread is + not initialized), so we use a MY_MAX(). + What happens to mi->rli.master_log_pos during the initialization stages + of replication is not 100% clear, so we guard against problems using + MY_MAX(). + */ + mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE, + mi->rli.group_master_log_pos); + strmake_buf(mi->master_log_name, mi->rli.group_master_log_name); + } + + /* + MASTER_DEMOTE_TO_SLAVE is set. Merge gtid_binlog_pos into gtid_slave_pos. + */ + if (lex_mi->is_demotion_opt) + { + String new_gtid_state; + + if (mi->using_gtid != Master_info::USE_GTID_SLAVE_POS) + { + my_error(ER_CM_OPTION_MISSING_REQUIREMENT, MYF(0), + "MASTER_DEMOTE_TO_SLAVE", "TRUE", "Using_Gtid=Slave_Pos"); + ret= TRUE; + goto err; + } + + if (!mysql_bin_log.is_open()) + { + my_error(ER_NO_BINARY_LOGGING, MYF(0)); + ret= TRUE; + goto err; + } + + if ((ret= rpl_append_gtid_state(&new_gtid_state, true))) + goto err; + + if (rpl_global_gtid_slave_state->load( + thd, new_gtid_state.ptr(), new_gtid_state.length(), true, true)) + { + my_error(ER_FAILED_GTID_STATE_INIT, MYF(0)); + ret= TRUE; + goto err; + } + } + + /* + Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never + a slave before). + */ + if (flush_master_info(mi, FALSE, FALSE)) + { + my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file"); + ret= TRUE; + goto err; + } + if (need_relay_log_purge) + { + THD_STAGE_INFO(thd, stage_purging_old_relay_logs); + if (purge_relay_logs(&mi->rli, thd, + 0 /* not only reset, but also reinit */, + &errmsg)) + { + my_error(ER_RELAY_LOG_FAIL, MYF(0), errmsg); + ret= TRUE; + goto err; + } + } + else + { + const char* msg; + /* Relay log is already initialized */ + if (init_relay_log_pos(&mi->rli, + mi->rli.group_relay_log_name, + mi->rli.group_relay_log_pos, + 0 /*no data lock*/, + &msg, 0)) + { + my_error(ER_RELAY_LOG_INIT, MYF(0), msg); + ret= TRUE; + goto err; + } + } + /* + Coordinates in rli were spoilt by the 'if (need_relay_log_purge)' block, + so restore them to good values. If we left them to ''/0, that would work; + but that would fail in the case of 2 successive CHANGE MASTER (without a + START SLAVE in between): because first one would set the coords in mi to + the good values of those in rli, the set those in rli to ''/0, then + second CHANGE MASTER would set the coords in mi to those of rli, i.e. to + ''/0: we have lost all copies of the original good coordinates. + That's why we always save good coords in rli. + */ + mi->rli.group_master_log_pos= mi->master_log_pos; + DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); + strmake_buf(mi->rli.group_master_log_name,mi->master_log_name); + + if (!mi->rli.group_master_log_name[0]) // uninitialized case + mi->rli.group_master_log_pos=0; + + mysql_mutex_lock(&mi->rli.data_lock); + mi->rli.abort_pos_wait++; /* for MASTER_POS_WAIT() to abort */ + /* Clear the errors, for a clean start */ + mi->rli.clear_error(); + mi->rli.clear_until_condition(); + mi->rli.slave_skip_counter= 0; + + sql_print_information("'CHANGE MASTER TO executed'. " + "Previous state master_host='%s', master_port='%u', master_log_file='%s', " + "master_log_pos='%ld'. " + "New state master_host='%s', master_port='%u', master_log_file='%s', " + "master_log_pos='%ld'.", saved_host, saved_port, saved_log_name, + (ulong) saved_log_pos, mi->host, mi->port, mi->master_log_name, + (ulong) mi->master_log_pos); + if (saved_using_gtid != Master_info::USE_GTID_NO || + mi->using_gtid != Master_info::USE_GTID_NO) + sql_print_information("Previous Using_Gtid=%s. New Using_Gtid=%s", + mi->using_gtid_astext(saved_using_gtid), + mi->using_gtid_astext(mi->using_gtid)); + + /* + If we don't write new coordinates to disk now, then old will remain in + relay-log.info until START SLAVE is issued; but if mysqld is shutdown + before START SLAVE, then old will remain in relay-log.info, and will be the + in-memory value at restart (thus causing errors, as the old relay log does + not exist anymore). + */ + if (mi->rli.flush()) + ret= 1; + mysql_cond_broadcast(&mi->data_cond); + mysql_mutex_unlock(&mi->rli.data_lock); + +err: + mi->unlock_slave_threads(); + if (ret == FALSE) + my_ok(thd); + else + { + /* + Depending on where CHANGE MASTER failed, the logs may be waiting to be + reopened. This would break future log updates and CHANGE MASTER calls. + `try_fix_log_state()` allows the relay log to fix its state to no longer + expect to be reopened. + */ + mi->rli.relay_log.try_fix_log_state(); + } + DBUG_RETURN(ret); +} + + +/** + Execute a RESET MASTER statement. + + @param thd Pointer to THD object of the client thread executing the + statement. + + @retval 0 success + @retval 1 error +*/ +int reset_master(THD* thd, rpl_gtid *init_state, uint32 init_state_len, + ulong next_log_number) +{ + if (!mysql_bin_log.is_open()) + { + my_message(ER_FLUSH_MASTER_BINLOG_CLOSED, + ER_THD(thd, ER_FLUSH_MASTER_BINLOG_CLOSED), MYF(0)); + return 1; + } + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + /* RESET MASTER will initialize GTID sequence, and that would happen locally + in this node, so better reject it + */ + my_message(ER_NOT_ALLOWED_COMMAND, + "RESET MASTER not allowed when node is in cluster", MYF(0)); + return 1; + } +#endif /* WITH_WSREP */ + bool ret= 0; + /* Temporarily disable master semisync before resetting master. */ + repl_semisync_master.before_reset_master(); + ret= mysql_bin_log.reset_logs(thd, 1, init_state, init_state_len, + next_log_number); + repl_semisync_master.after_reset_master(); + DBUG_EXECUTE_IF("crash_after_reset_master", DBUG_SUICIDE();); + + return ret; +} + + +/** + Execute a SHOW BINLOG EVENTS statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @retval FALSE success + @retval TRUE failure +*/ +bool mysql_show_binlog_events(THD* thd) +{ + Protocol *protocol= thd->protocol; + List field_list; + char errmsg_buf[MYSYS_ERRMSG_SIZE]; + const char *errmsg = 0; + bool ret = TRUE; + /* + Using checksum validate the correctness of event pos specified in show + binlog events command. + */ + bool verify_checksum_once= false; + IO_CACHE log; + File file = -1; + MYSQL_BIN_LOG *binary_log= NULL; + int old_max_allowed_packet= thd->variables.max_allowed_packet; + Master_info *mi= 0; + LOG_INFO linfo; + LEX_MASTER_INFO *lex_mi= &thd->lex->mi; + enum enum_binlog_checksum_alg checksum_alg; + my_off_t binlog_size; + MY_STAT s; + + DBUG_ENTER("mysql_show_binlog_events"); + + Log_event::init_show_field_list(thd, &field_list); + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS || + thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS); + + /* select which binary log to use: binlog or relay */ + if ( thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ) + { + binary_log= &mysql_bin_log; + } + else /* showing relay log contents */ + { + if (!lex_mi->connection_name.str) + lex_mi->connection_name= thd->variables.default_master_connection; + if (!(mi= get_master_info(&lex_mi->connection_name, + Sql_condition::WARN_LEVEL_ERROR))) + { + DBUG_RETURN(TRUE); + } + binary_log= &(mi->rli.relay_log); + } + + Format_description_log_event *description_event= new + Format_description_log_event(3); /* MySQL 4.0 by default */ + + if (binary_log->is_open()) + { + SELECT_LEX_UNIT *unit= &thd->lex->unit; + ha_rows event_count; + my_off_t pos = MY_MAX(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly + char search_file_name[FN_REFLEN], *name; + const char *log_file_name = lex_mi->log_file_name; + mysql_mutex_t *log_lock = binary_log->get_log_lock(); + Log_event* ev; + + if (mi) + { + /* We can unlock the mutex as we have a lock on the file */ + mi->release(); + mi= 0; + } + + unit->set_limit(thd->lex->current_select); + + name= search_file_name; + if (log_file_name) + binary_log->make_log_name(search_file_name, log_file_name); + else + name=0; // Find first log + + linfo.index_file_offset = 0; + + if (binary_log->find_log_pos(&linfo, name, 1)) + { + errmsg = "Could not find target log"; + goto err; + } + + thd->set_current_linfo(&linfo); + + if ((file=open_binlog(&log, linfo.log_file_name, &errmsg)) < 0) + goto err; + + my_stat(linfo.log_file_name, &s, MYF(0)); + binlog_size= s.st_size; + if (lex_mi->pos > binlog_size) + { + snprintf(errmsg_buf, sizeof(errmsg_buf), "Invalid pos specified. Requested from pos:%llu is " + "greater than actual file size:%lu\n", lex_mi->pos, + (ulong)s.st_size); + errmsg= errmsg_buf; + goto err; + } + + /* + to account binlog event header size + */ + thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER; + + mysql_mutex_lock(log_lock); + + /* + open_binlog() sought to position 4. + Read the first event in case it's a Format_description_log_event, to + know the format. If there's no such event, we are 3.23 or 4.x. This + code, like before, can't read 3.23 binlogs. + Also read the second event, in case it's a Start_encryption_log_event. + This code will fail on a mixed relay log (one which has Format_desc then + Rotate then Format_desc). + */ + + my_off_t scan_pos = BIN_LOG_HEADER_SIZE; + while (scan_pos < pos) + { + ev= Log_event::read_log_event(&log, description_event, + opt_master_verify_checksum); + scan_pos = my_b_tell(&log); + if (ev == NULL || !ev->is_valid()) + { + mysql_mutex_unlock(log_lock); + errmsg = "Wrong offset or I/O error"; + goto err; + } + if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT) + { + delete description_event; + description_event= (Format_description_log_event*) ev; + } + else + { + if (ev->get_type_code() == START_ENCRYPTION_EVENT) + { + if (description_event->start_decryption((Start_encryption_log_event*) ev)) + { + delete ev; + mysql_mutex_unlock(log_lock); + errmsg = "Could not initialize decryption of binlog."; + goto err; + } + } + delete ev; + break; + } + } + + if (lex_mi->pos > BIN_LOG_HEADER_SIZE) + { + checksum_alg= description_event->checksum_alg; + /* Validate user given position using checksum */ + if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF && + checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) + { + if (!opt_master_verify_checksum) + verify_checksum_once= true; + my_b_seek(&log, pos); + } + else + { + my_off_t cur_pos= my_b_tell(&log); + ulong next_event_len= 0; + uchar buff[IO_SIZE]; + while (cur_pos < pos) + { + my_b_seek(&log, cur_pos + EVENT_LEN_OFFSET); + if (my_b_read(&log, (uchar *)buff, sizeof(next_event_len))) + { + mysql_mutex_unlock(log_lock); + errmsg = "Could not read event_length"; + goto err; + } + next_event_len= uint4korr(buff); + cur_pos= cur_pos + next_event_len; + } + if (cur_pos > pos) + { + mysql_mutex_unlock(log_lock); + errmsg= "Invalid input pos specified please provide valid one."; + goto err; + } + my_b_seek(&log, cur_pos); + } + } + + for (event_count = 0; + (ev = Log_event::read_log_event(&log, + description_event, + (opt_master_verify_checksum || + verify_checksum_once))); ) + { + if (!unit->lim.check_offset(event_count) && + ev->net_send(protocol, linfo.log_file_name, pos)) + { + errmsg = "Net error"; + delete ev; + mysql_mutex_unlock(log_lock); + goto err; + } + + if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT) + { + Format_description_log_event* new_fdle= + (Format_description_log_event*) ev; + new_fdle->copy_crypto_data(description_event); + delete description_event; + description_event= new_fdle; + } + else + { + if (ev->get_type_code() == START_ENCRYPTION_EVENT) + { + if (description_event->start_decryption((Start_encryption_log_event*) ev)) + { + errmsg = "Error starting decryption"; + delete ev; + mysql_mutex_unlock(log_lock); + goto err; + } + } + delete ev; + } + + verify_checksum_once= false; + pos = my_b_tell(&log); + + if (++event_count >= unit->lim.get_select_limit()) + break; + } + + if (unlikely(event_count < unit->lim.get_select_limit() && log.error)) + { + errmsg = "Wrong offset or I/O error"; + mysql_mutex_unlock(log_lock); + goto err; + } + + mysql_mutex_unlock(log_lock); + } + else if (mi) + mi->release(); + + // Check that linfo is still on the function scope. + DEBUG_SYNC(thd, "after_show_binlog_events"); + + ret= FALSE; + +err: + delete description_event; + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + } + + if (errmsg) + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), + "SHOW BINLOG EVENTS", errmsg); + else + my_eof(thd); + + thd->reset_current_linfo(); + thd->variables.max_allowed_packet= old_max_allowed_packet; + DBUG_RETURN(ret); +} + + +void show_binlog_info_get_fields(THD *thd, List *field_list) +{ + MEM_ROOT *mem_root= thd->mem_root; + field_list->push_back(new (mem_root) + Item_empty_string(thd, "File", FN_REFLEN), + mem_root); + field_list->push_back(new (mem_root) + Item_return_int(thd, "Position", 20, + MYSQL_TYPE_LONGLONG), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Binlog_Do_DB", 255), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Binlog_Ignore_DB", 255), + mem_root); +} + + +/** + Execute a SHOW BINLOG STATUS statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @retval FALSE success + @retval TRUE failure +*/ +bool show_binlog_info(THD* thd) +{ + Protocol *protocol= thd->protocol; + DBUG_ENTER("show_binlog_info"); + + List field_list; + show_binlog_info_get_fields(thd, &field_list); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + protocol->prepare_for_resend(); + + if (mysql_bin_log.is_open()) + { + LOG_INFO li; + mysql_bin_log.get_current_log(&li); + size_t dir_len = dirname_length(li.log_file_name); + const char *base= li.log_file_name + dir_len; + + protocol->store(base, strlen(base), &my_charset_bin); + protocol->store((ulonglong) li.pos); + protocol->store(binlog_filter->get_do_db()); + protocol->store(binlog_filter->get_ignore_db()); + if (protocol->write()) + DBUG_RETURN(TRUE); + } + my_eof(thd); + DBUG_RETURN(FALSE); +} + + +void show_binlogs_get_fields(THD *thd, List *field_list) +{ + MEM_ROOT *mem_root= thd->mem_root; + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Log_name", 255), + mem_root); + field_list->push_back(new (mem_root) + Item_return_int(thd, "File_size", 20, + MYSQL_TYPE_LONGLONG), + mem_root); +} + + +/** + Execute a SHOW BINARY LOGS statement. + + @param thd Pointer to THD object for the client thread executing the + statement. + + @retval FALSE success + @retval TRUE failure + + @notes + We only keep the index locked while reading all file names as + if there are 1000+ binary logs, there can be a serious impact + as getting the file sizes can take some notable time (up to 20 seconds + has been reported) and we don't want to block log rotations for that long. +*/ + +#define BINLOG_INDEX_RETRY_COUNT 5 + +bool show_binlogs(THD* thd) +{ + LOG_INFO cur; + MEM_ROOT mem_root; + binlog_file_entry *list; + List field_list; + Protocol *protocol= thd->protocol; + uint retry_count= 0; + size_t cur_dir_len; + uint64 expected_reset_masters; + DBUG_ENTER("show_binlogs"); + + if (!mysql_bin_log.is_open()) + { + my_error(ER_NO_BINARY_LOGGING, MYF(0)); + DBUG_RETURN(TRUE); + } + + show_binlogs_get_fields(thd, &field_list); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + init_alloc_root(PSI_INSTRUMENT_ME, &mem_root, 8192, 0, MYF(MY_THREAD_SPECIFIC)); +retry: + /* + The current mutex handling here is to ensure we get the current log position + and all the log files from the index in sync without any index rotation + in between. + */ + mysql_mutex_lock(mysql_bin_log.get_log_lock()); + mysql_bin_log.lock_index(); + mysql_bin_log.raw_get_current_log(&cur); + expected_reset_masters= mysql_bin_log.get_reset_master_count(); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + + /* The following call unlocks lock_index */ + if ((!(list= get_binlog_list(&mem_root, false, true)))) + goto err; + + DEBUG_SYNC(thd, "at_after_lock_index"); + + // the 1st loop computes the sizes; If stat() fails, then retry + cur_dir_len= dirname_length(cur.log_file_name); + for (binlog_file_entry *cur_link= list; cur_link; cur_link= cur_link->next) + { + const char *fname= cur_link->name.str; + size_t dir_len= dirname_length(fname); + size_t length= cur_link->name.length- dir_len; + + /* Skip directory name as we shouldn't include this in the result */ + cur_link->name.str+= dir_len; + cur_link->name.length-= dir_len; + + if (mysql_bin_log.get_reset_master_count() > expected_reset_masters) + { + /* + Reset master was called after we cached filenames. + Reinitialize the cache. + */ + free_root(&mem_root, MYF(MY_MARK_BLOCKS_FREE)); + goto retry; + } + + if (!(strncmp(fname+dir_len, cur.log_file_name+cur_dir_len, length))) + cur_link->size= cur.pos; /* The active log, use the active position */ + else + { + MY_STAT stat_info; + if (mysql_file_stat(key_file_binlog, fname, &stat_info, MYF(0))) + cur_link->size= stat_info.st_size; + else + { + if (retry_count++ < BINLOG_INDEX_RETRY_COUNT) + { + free_root(&mem_root, MYF(MY_MARK_BLOCKS_FREE)); + goto retry; + } + cur_link->size= 0; + } + } + } + + for (binlog_file_entry *cur_link= list; cur_link; cur_link= cur_link->next) + { + protocol->prepare_for_resend(); + protocol->store(cur_link->name.str, cur_link->name.length, &my_charset_bin); + protocol->store((ulonglong) cur_link->size); + if (protocol->write()) + goto err; + } + free_root(&mem_root, MYF(0)); + my_eof(thd); + DBUG_RETURN(FALSE); + +err: + free_root(&mem_root, MYF(0)); + DBUG_RETURN(TRUE); +} + +/** + Load data's io cache specific hook to be executed + before a chunk of data is being read into the cache's buffer + The fuction instantianates and writes into the binlog + replication events along LOAD DATA processing. + + @param file pointer to io-cache + @retval 0 success + @retval 1 failure +*/ +int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count) +{ + DBUG_ENTER("log_loaded_block"); + LOAD_FILE_IO_CACHE *lf_info= static_cast(file); + uint block_len; + /* buffer contains position where we started last read */ + uchar* buffer= (uchar*) my_b_get_buffer_start(file); + uint max_event_size= lf_info->thd->variables.max_allowed_packet; + + if (lf_info->thd->is_current_stmt_binlog_format_row()) + goto ret; + if (lf_info->last_pos_in_file != HA_POS_ERROR && + lf_info->last_pos_in_file >= my_b_get_pos_in_file(file)) + goto ret; + + for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0; + buffer += MY_MIN(block_len, max_event_size), + block_len -= MY_MIN(block_len, max_event_size)) + { + lf_info->last_pos_in_file= my_b_get_pos_in_file(file); + if (lf_info->wrote_create_file) + { + Append_block_log_event a(lf_info->thd, lf_info->thd->db.str, buffer, + MY_MIN(block_len, max_event_size), + lf_info->log_delayed); + if (mysql_bin_log.write(&a)) + DBUG_RETURN(1); + } + else + { + Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db.str, + buffer, + MY_MIN(block_len, max_event_size), + lf_info->log_delayed); + if (mysql_bin_log.write(&b)) + DBUG_RETURN(1); + lf_info->wrote_create_file= 1; + } + } +ret: + int res= Buffer ? lf_info->real_read_function(file, Buffer, Count) : 0; + DBUG_RETURN(res); +} + + +/** + Initialise the slave replication state from the mysql.gtid_slave_pos table. + + This is called each time an SQL thread starts, but the data is only actually + loaded on the first call. + + The slave state is the last GTID applied on the slave within each + replication domain. + + To avoid row lock contention, there are multiple rows for each domain_id. + The one containing the current slave state is the one with the maximal + sub_id value, within each domain_id. + + CREATE TABLE mysql.gtid_slave_pos ( + domain_id INT UNSIGNED NOT NULL, + sub_id BIGINT UNSIGNED NOT NULL, + server_id INT UNSIGNED NOT NULL, + seq_no BIGINT UNSIGNED NOT NULL, + PRIMARY KEY (domain_id, sub_id)) +*/ + +void +rpl_init_gtid_slave_state() +{ + rpl_global_gtid_slave_state= new rpl_slave_state; +} + + +void +rpl_deinit_gtid_slave_state() +{ + delete rpl_global_gtid_slave_state; +} + + +void +rpl_init_gtid_waiting() +{ + rpl_global_gtid_waiting.init(); +} + + +void +rpl_deinit_gtid_waiting() +{ + rpl_global_gtid_waiting.destroy(); +} + + +/* + Format the current GTID state as a string, for returning the value of + @@global.gtid_slave_pos. + + If the flag use_binlog is true, then the contents of the binary log (if + enabled) is merged into the current GTID state (@@global.gtid_current_pos). +*/ +int +rpl_append_gtid_state(String *dest, bool use_binlog) +{ + int err; + rpl_gtid *gtid_list= NULL; + uint32 num_gtids= 0; + + if (use_binlog && opt_bin_log && + (err= mysql_bin_log.get_most_recent_gtid_list(>id_list, &num_gtids))) + return err; + + err= rpl_global_gtid_slave_state->tostring(dest, gtid_list, num_gtids); + my_free(gtid_list); + + return err; +} + + +/* + Load the current GTID position into a slave_connection_state, for use when + connecting to a master server with GTID. + + If the flag use_binlog is true, then the contents of the binary log (if + enabled) is merged into the current GTID state (master_use_gtid=current_pos). +*/ +int +rpl_load_gtid_state(slave_connection_state *state, bool use_binlog) +{ + int err; + rpl_gtid *gtid_list= NULL; + uint32 num_gtids= 0; + + if (use_binlog && opt_bin_log && + (err= mysql_bin_log.get_most_recent_gtid_list(>id_list, &num_gtids))) + return err; + + err= state->load(rpl_global_gtid_slave_state, gtid_list, num_gtids); + my_free(gtid_list); + + return err; +} + + +bool +rpl_gtid_pos_check(THD *thd, char *str, size_t len) +{ + slave_connection_state tmp_slave_state; + bool gave_conflict_warning= false, gave_missing_warning= false; + + /* Check that we can parse the supplied string. */ + if (tmp_slave_state.load(str, len)) + return true; + + /* + Check our own binlog for any of our own transactions that are newer + than the GTID state the user is requesting. Any such transactions would + result in an out-of-order binlog, which could break anyone replicating + with us as master. + + So give an error if this is found, requesting the user to do a + RESET MASTER (to clean up the binlog) if they really want this. + */ + if (mysql_bin_log.is_open()) + { + rpl_gtid *binlog_gtid_list= NULL; + uint32 num_binlog_gtids= 0; + uint32 i; + + if (mysql_bin_log.get_most_recent_gtid_list(&binlog_gtid_list, + &num_binlog_gtids)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); + return true; + } + for (i= 0; i < num_binlog_gtids; ++i) + { + rpl_gtid *binlog_gtid= &binlog_gtid_list[i]; + rpl_gtid *slave_gtid; + if (binlog_gtid->server_id != global_system_variables.server_id) + continue; + if (!(slave_gtid= tmp_slave_state.find(binlog_gtid->domain_id))) + { + if (opt_gtid_strict_mode) + { + my_error(ER_MASTER_GTID_POS_MISSING_DOMAIN, MYF(0), + binlog_gtid->domain_id, binlog_gtid->domain_id, + binlog_gtid->server_id, binlog_gtid->seq_no); + break; + } + else if (!gave_missing_warning) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_MASTER_GTID_POS_MISSING_DOMAIN, + ER_THD(thd, ER_MASTER_GTID_POS_MISSING_DOMAIN), + binlog_gtid->domain_id, binlog_gtid->domain_id, + binlog_gtid->server_id, binlog_gtid->seq_no); + gave_missing_warning= true; + } + } + else if (slave_gtid->seq_no < binlog_gtid->seq_no) + { + if (opt_gtid_strict_mode) + { + my_error(ER_MASTER_GTID_POS_CONFLICTS_WITH_BINLOG, MYF(0), + slave_gtid->domain_id, slave_gtid->server_id, + slave_gtid->seq_no, binlog_gtid->domain_id, + binlog_gtid->server_id, binlog_gtid->seq_no); + break; + } + else if (!gave_conflict_warning) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_MASTER_GTID_POS_CONFLICTS_WITH_BINLOG, + ER_THD(thd, ER_MASTER_GTID_POS_CONFLICTS_WITH_BINLOG), + slave_gtid->domain_id, slave_gtid->server_id, + slave_gtid->seq_no, binlog_gtid->domain_id, + binlog_gtid->server_id, binlog_gtid->seq_no); + gave_conflict_warning= true; + } + } + } + my_free(binlog_gtid_list); + if (i != num_binlog_gtids) + return true; + } + + return false; +} + + +bool +rpl_gtid_pos_update(THD *thd, char *str, size_t len) +{ + if (rpl_global_gtid_slave_state->load(thd, str, len, true, true)) + { + my_error(ER_FAILED_GTID_STATE_INIT, MYF(0)); + return true; + } + else + return false; +} + +int compare_log_name(const char *log_1, const char *log_2) { + int res= 1; + const char *ext1_str= strrchr(log_1, '.'); + const char *ext2_str= strrchr(log_2, '.'); + char file_name_1[255], file_name_2[255]; + strmake(file_name_1, log_1, (ext1_str - log_1)); + strmake(file_name_2, log_2, (ext2_str - log_2)); + char *endptr = NULL; + res= strcmp(file_name_1, file_name_2); + if (!res) + { + ulong ext1= strtoul(++ext1_str, &endptr, 10); + ulong ext2= strtoul(++ext2_str, &endptr, 10); + res= (ext1 > ext2 ? 1 : ((ext1 == ext2) ? 0 : -1)); + } + return res; +} + +#endif /* HAVE_REPLICATION */ diff --git a/sql/sql_repl.h b/sql/sql_repl.h new file mode 100644 index 00000000..95916e31 --- /dev/null +++ b/sql/sql_repl.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_REPL_INCLUDED +#define SQL_REPL_INCLUDED + +#include "rpl_filter.h" + +#ifdef HAVE_REPLICATION +#include "slave.h" + +struct slave_connection_state; + +extern my_bool opt_show_slave_auth_info; +extern char *master_host, *master_info_file; + +extern int max_binlog_dump_events; +extern my_bool opt_sporadic_binlog_dump_fail; + +int start_slave(THD* thd, Master_info* mi, bool net_report); +int stop_slave(THD* thd, Master_info* mi, bool net_report); +bool change_master(THD* thd, Master_info* mi, bool *master_info_added); +bool mysql_show_binlog_events(THD* thd); +int reset_slave(THD *thd, Master_info* mi); +int reset_master(THD* thd, rpl_gtid *init_state, uint32 init_state_len, + ulong next_log_number); +bool purge_master_logs(THD* thd, const char* to_log); +bool purge_master_logs_before_date(THD* thd, time_t purge_time); +bool log_in_use(const char* log_name); +void adjust_linfo_offsets(my_off_t purge_offset); +void show_binlogs_get_fields(THD *thd, List *field_list); +bool show_binlogs(THD* thd); +extern int init_master_info(Master_info* mi); +void kill_zombie_dump_threads(uint32 slave_server_id); +int check_binlog_magic(IO_CACHE* log, const char** errmsg); +int compare_log_name(const char *log_1, const char *log_2); + +struct LOAD_FILE_IO_CACHE : public IO_CACHE +{ + THD* thd; + my_off_t last_pos_in_file; + bool wrote_create_file, log_delayed; + int (*real_read_function)(struct st_io_cache *,uchar *,size_t); +}; + +int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count); +int init_replication_sys_vars(); +void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ushort flags); + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state; +#endif +void rpl_init_gtid_slave_state(); +void rpl_deinit_gtid_slave_state(); +void rpl_init_gtid_waiting(); +void rpl_deinit_gtid_waiting(); +int gtid_state_from_binlog_pos(const char *name, uint32 pos, String *out_str); +int rpl_append_gtid_state(String *dest, bool use_binlog); +int rpl_load_gtid_state(slave_connection_state *state, bool use_binlog); +bool rpl_gtid_pos_check(THD *thd, char *str, size_t len); +bool rpl_gtid_pos_update(THD *thd, char *str, size_t len); +#else + +struct LOAD_FILE_IO_CACHE : public IO_CACHE { }; + +#endif /* HAVE_REPLICATION */ + +#endif /* SQL_REPL_INCLUDED */ diff --git a/sql/sql_schema.cc b/sql/sql_schema.cc new file mode 100644 index 00000000..f08204d2 --- /dev/null +++ b/sql/sql_schema.cc @@ -0,0 +1,141 @@ +/* + Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_type.h" +#include "sql_schema.h" +#include "sql_class.h" + +class Schema_oracle: public Schema +{ +public: + Schema_oracle(const LEX_CSTRING &name) + :Schema(name) + { } + const Type_handler *map_data_type(THD *thd, const Type_handler *src) + const + { + if (src == &type_handler_newdate) + return thd->type_handler_for_datetime(); + return src; + } + + Item *make_item_func_replace(THD *thd, + Item *subj, + Item *find, + Item *replace) const; + Item *make_item_func_substr(THD *thd, + const Lex_substring_spec_st &spec) const; + Item *make_item_func_trim(THD *thd, const Lex_trim_st &spec) const; +}; + + +class Schema_maxdb: public Schema +{ +public: + Schema_maxdb(const LEX_CSTRING &name) + :Schema(name) + { } + const Type_handler *map_data_type(THD *thd, const Type_handler *src) + const + { + if (src == &type_handler_timestamp || + src == &type_handler_timestamp2) + return thd->type_handler_for_datetime(); + return src; + } +}; + + +Schema mariadb_schema(Lex_cstring(STRING_WITH_LEN("mariadb_schema"))); +Schema_oracle oracle_schema(Lex_cstring(STRING_WITH_LEN("oracle_schema"))); +Schema_maxdb maxdb_schema(Lex_cstring(STRING_WITH_LEN("maxdb_schema"))); + + +Schema *Schema::find_by_name(const LEX_CSTRING &name) +{ + DBUG_ASSERT(name.str); + if (mariadb_schema.eq_name(name)) + return &mariadb_schema; + if (oracle_schema.eq_name(name)) + return &oracle_schema; + if (maxdb_schema.eq_name(name)) + return &maxdb_schema; + return NULL; +} + + +Schema *Schema::find_implied(THD *thd) +{ + if (thd->variables.sql_mode & MODE_ORACLE) + return &oracle_schema; + if (thd->variables.sql_mode & MODE_MAXDB) + return &maxdb_schema; + return &mariadb_schema; +} + + +Item *Schema::make_item_func_replace(THD *thd, + Item *subj, + Item *find, + Item *replace) const +{ + return new (thd->mem_root) Item_func_replace(thd, subj, find, replace); +} + + +Item *Schema::make_item_func_substr(THD *thd, + const Lex_substring_spec_st &spec) const +{ + return spec.m_for ? + new (thd->mem_root) Item_func_substr(thd, spec.m_subject, spec.m_from, + spec.m_for) : + new (thd->mem_root) Item_func_substr(thd, spec.m_subject, spec.m_from); +} + + +Item *Schema::make_item_func_trim(THD *thd, const Lex_trim_st &spec) const +{ + return spec.make_item_func_trim_std(thd); +} + + +Item *Schema_oracle::make_item_func_replace(THD *thd, + Item *subj, + Item *find, + Item *replace) const +{ + return new (thd->mem_root) Item_func_replace_oracle(thd, subj, find, replace); +} + + +Item *Schema_oracle::make_item_func_substr(THD *thd, + const Lex_substring_spec_st &spec) const +{ + return spec.m_for ? + new (thd->mem_root) Item_func_substr_oracle(thd, spec.m_subject, + spec.m_from, + spec.m_for) : + new (thd->mem_root) Item_func_substr_oracle(thd, spec.m_subject, + spec.m_from); +} + + +Item *Schema_oracle::make_item_func_trim(THD *thd, + const Lex_trim_st &spec) const +{ + return spec.make_item_func_trim_oracle(thd); +} diff --git a/sql/sql_schema.h b/sql/sql_schema.h new file mode 100644 index 00000000..0258ff2d --- /dev/null +++ b/sql/sql_schema.h @@ -0,0 +1,71 @@ +#ifndef SQL_SCHEMA_H_INCLUDED +#define SQL_SCHEMA_H_INCLUDED +/* + Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysqld.h" +#include "lex_string.h" + +class Schema +{ + LEX_CSTRING m_name; +public: + Schema(const LEX_CSTRING &name) + :m_name(name) + { } + virtual ~Schema() = default; + const LEX_CSTRING &name() const { return m_name; } + virtual const Type_handler *map_data_type(THD *thd, const Type_handler *src) + const + { + return src; + } + + // Builders for native SQL function with a special syntax in sql_yacc.yy + virtual Item *make_item_func_replace(THD *thd, + Item *subj, + Item *find, + Item *replace) const; + virtual Item *make_item_func_substr(THD *thd, + const Lex_substring_spec_st &spec) const; + + virtual Item *make_item_func_trim(THD *thd, const Lex_trim_st &spec) const; + + /* + For now we have *hard-coded* compatibility schemas: + schema_mariadb, schema_oracle, schema_maxdb. + But eventually we'll turn then into real databases on disk. + So the code below compares names according to the filesystem + case sensitivity, like it is done for regular databases. + + Note, this is different to information_schema, whose name + is always case insensitive. This is intentional! + The assymetry will be gone when we'll implement SQL standard + regular and delimited identifiers. + */ + bool eq_name(const LEX_CSTRING &name) const + { + return !table_alias_charset->strnncoll(m_name.str, m_name.length, + name.str, name.length); + } + static Schema *find_by_name(const LEX_CSTRING &name); + static Schema *find_implied(THD *thd); +}; + + +extern Schema mariadb_schema; + +#endif // SQL_SCHEMA_H_INCLUDED diff --git a/sql/sql_select.cc b/sql/sql_select.cc new file mode 100644 index 00000000..f4cbed58 --- /dev/null +++ b/sql/sql_select.cc @@ -0,0 +1,32034 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + mysql_select and join optimization + + + @defgroup Query_Optimizer Query Optimizer + @{ +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_select.h" +#include "sql_cache.h" // query_cache_* +#include "sql_table.h" // primary_key_name +#include "probes_mysql.h" +#include "key.h" // key_copy, key_cmp, key_cmp_if_same +#include "lock.h" // mysql_unlock_some_tables, + // mysql_unlock_read_tables +#include "sql_show.h" // append_identifier +#include "sql_base.h" // setup_wild, setup_fields, fill_record +#include "sql_parse.h" // check_stack_overrun +#include "sql_partition.h" // make_used_partitions_str +#include "sql_test.h" // print_where, print_keyuse_array, + // print_sjm, print_plan, TEST_join +#include "records.h" // init_read_record, end_read_record +#include "filesort.h" // filesort_free_buffers +#include "sql_union.h" // mysql_union +#include "opt_subselect.h" +#include "sql_derived.h" +#include "sql_statistics.h" +#include "sql_cte.h" +#include "sql_window.h" +#include "tztime.h" + +#include "debug_sync.h" // DEBUG_SYNC +#include +#include +#include +#include +#include "sys_vars_shared.h" +#include "sp_head.h" +#include "sp_rcontext.h" +#include "rowid_filter.h" +#include "select_handler.h" +#include "my_json_writer.h" +#include "opt_trace.h" +#include "derived_handler.h" +#include "create_tmp_table.h" + +/* + A key part number that means we're using a fulltext scan. + + In order not to confuse it with regular equalities, we need to pick + a number that's greater than MAX_REF_PARTS. + + Hash Join code stores field->field_index in KEYUSE::keypart, so the + number needs to be bigger than MAX_FIELDS, also. + + CAUTION: sql_test.cc has its own definition of FT_KEYPART. +*/ +#define FT_KEYPART (MAX_FIELDS+10) + +const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", + "MAYBE_REF","ALL","range","index","fulltext", + "ref_or_null","unique_subquery","index_subquery", + "index_merge", "hash_ALL", "hash_range", + "hash_index", "hash_index_merge" }; + +LEX_CSTRING group_key= {STRING_WITH_LEN("group_key")}; +LEX_CSTRING distinct_key= {STRING_WITH_LEN("distinct_key")}; + +struct st_sargable_param; + +static bool make_join_statistics(JOIN *join, List &leaves, + DYNAMIC_ARRAY *keyuse); +static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, + JOIN_TAB *join_tab, + uint tables, COND *conds, + table_map table_map, SELECT_LEX *select_lex, + SARGABLE_PARAM **sargables); +static int sort_keyuse(KEYUSE *a,KEYUSE *b); +static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables); +static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, + bool allow_full_scan, table_map used_tables); +static bool get_quick_record_count(THD *thd, SQL_SELECT *select, + TABLE *table, + const key_map *keys,ha_rows limit, + ha_rows *quick_count); +static void optimize_straight_join(JOIN *join, table_map join_tables); +static bool greedy_search(JOIN *join, table_map remaining_tables, + uint depth, uint use_cond_selectivity); + +enum enum_best_search { + SEARCH_ABORT= -2, + SEARCH_ERROR= -1, + SEARCH_OK= 0, + SEARCH_FOUND_EDGE=1 +}; + +static enum_best_search +best_extension_by_limited_search(JOIN *join, + table_map remaining_tables, + uint idx, double record_count, + double read_time, uint depth, + uint use_cond_selectivity, + table_map *processed_eq_ref_tables); +static uint determine_search_depth(JOIN* join); +C_MODE_START +static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2); +static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2); +static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2); +C_MODE_END +static uint cache_record_length(JOIN *join,uint index); +static store_key *get_store_key(THD *thd, + KEYUSE *keyuse, table_map used_tables, + KEY_PART_INFO *key_part, uchar *key_buff, + uint maybe_null); +static bool make_outerjoin_info(JOIN *join); +static Item* +make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables, + table_map sjm_tables, bool inside_or_clause); +static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item); +static void revise_cache_usage(JOIN_TAB *join_tab); +static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after); +static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables); +static void update_depend_map(JOIN *join); +static void update_depend_map_for_order(JOIN *join, ORDER *order); +static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond, + bool change_list, bool *simple_order); +static int return_zero_rows(JOIN *join, select_result *res, + List *tables, + List *fields, bool send_row, + ulonglong select_options, const char *info, + Item *having, List *all_fields); +static COND *build_equal_items(JOIN *join, COND *cond, + COND_EQUAL *inherited, + List *join_list, + bool ignore_on_conds, + COND_EQUAL **cond_equal_ref, + bool link_equal_fields= FALSE); +static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab, + COND *cond, + COND_EQUAL *cond_equal, + void *table_join_idx, + bool do_substitution); +static COND *simplify_joins(JOIN *join, List *join_list, + COND *conds, bool top, bool in_sj); +static bool check_interleaving_with_nj(JOIN_TAB *next); +static void restore_prev_nj_state(JOIN_TAB *last); +static uint reset_nj_counters(JOIN *join, List *join_list); +static uint build_bitmap_for_nested_joins(List *join_list, + uint first_unused); + +static COND *optimize_cond(JOIN *join, COND *conds, + List *join_list, + bool ignore_on_conds, + Item::cond_result *cond_value, + COND_EQUAL **cond_equal, + int flags= 0); +bool const_expression_in_where(COND *conds,Item *item, Item **comp_item); +static int do_select(JOIN *join, Procedure *procedure); + +static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int); +static enum_nested_loop_state +evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab); +static enum_nested_loop_state +end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +static enum_nested_loop_state +end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +static enum_nested_loop_state +end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +static enum_nested_loop_state +end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + +static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos); +static int join_read_system(JOIN_TAB *tab); +static int join_read_const(JOIN_TAB *tab); +static int join_read_key(JOIN_TAB *tab); +static void join_read_key_unlock_row(st_join_table *tab); +static void join_const_unlock_row(JOIN_TAB *tab); +static int join_read_always_key(JOIN_TAB *tab); +static int join_read_last_key(JOIN_TAB *tab); +static int join_no_more_records(READ_RECORD *info); +static int join_read_next(READ_RECORD *info); +static int join_init_quick_read_record(JOIN_TAB *tab); +static quick_select_return test_if_quick_select(JOIN_TAB *tab); +static int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab); +static int join_read_first(JOIN_TAB *tab); +static int join_read_next(READ_RECORD *info); +static int join_read_next_same(READ_RECORD *info); +static int join_read_last(JOIN_TAB *tab); +static int join_read_prev_same(READ_RECORD *info); +static int join_read_prev(READ_RECORD *info); +static int join_ft_read_first(JOIN_TAB *tab); +static int join_ft_read_next(READ_RECORD *info); +int join_read_always_key_or_null(JOIN_TAB *tab); +int join_read_next_same_or_null(READ_RECORD *info); +static COND *make_cond_for_table(THD *thd, Item *cond,table_map table, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond); +static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, + Item *cond, + table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond, + bool is_top_and_level); + +static Item* part_of_refkey(TABLE *form,Field *field); +uint find_shortest_key(TABLE *table, const key_map *usable_keys); +static bool test_if_cheaper_ordering(const JOIN_TAB *tab, + ORDER *order, TABLE *table, + key_map usable_keys, int key, + ha_rows select_limit, + int *new_key, int *new_key_direction, + ha_rows *new_select_limit, + uint *new_used_key_parts= NULL, + uint *saved_best_key_parts= NULL); +static int test_if_order_by_key(JOIN *, ORDER *, TABLE *, uint, uint *); +static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order, + ha_rows select_limit, bool no_changes, + const key_map *map, + bool *fatal_error); +static bool list_contains_unique_index(TABLE *table, + bool (*find_func) (Field *, void *), void *data); +static bool find_field_in_item_list (Field *field, void *data); +static bool find_field_in_order_list (Field *field, void *data); +int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); +static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field, + SORT_FIELD *sortorder, ulong keylength, + Item *having); +static int remove_dup_with_hash_index(THD *thd,TABLE *table, + uint field_count, Field **first_field, + SORT_FIELD *sortorder, + ulong key_length,Item *having); +static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref); +static bool setup_new_fields(THD *thd, List &fields, + List &all_fields, ORDER *new_order); +static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array, + ORDER *order, List &fields, + List &all_fields, + bool *all_order_by_fields_used); +static bool test_if_subpart(ORDER *group_by, ORDER *order_by); +static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List &tables, + table_map const_tables); +static void calc_group_buffer(JOIN *join, ORDER *group); +static bool make_group_fields(JOIN *main_join, JOIN *curr_join); +static bool alloc_group_fields(JOIN *join, ORDER *group); +static bool alloc_order_fields(JOIN *join, ORDER *group, + uint max_number_of_elements); +// Create list for using with tempory table +static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List &new_list1, + List &new_list2, + uint elements, List &items); +// Create list for using with tempory table +static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List &new_list1, + List &new_list2, + uint elements, List &items); +static void init_tmptable_sum_functions(Item_sum **func); +static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table); +static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end); +static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab); +static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr); +static bool prepare_sum_aggregators(THD *thd, Item_sum **func_ptr, + bool need_distinct); +static bool init_sum_functions(Item_sum **func, Item_sum **end); +static bool update_sum_func(Item_sum **func); +static void select_describe(JOIN *join, bool need_tmp_table,bool need_order, + bool distinct, const char *message=NullS); +static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); +static uint make_join_orderinfo(JOIN *join); +static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array); + +Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, + bool *inherited_fl); +JOIN_TAB *first_depth_first_tab(JOIN* join); +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab); + +static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, + uint n_top_tabs_count, JOIN_TAB *tab); +static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *, + List &, List &, bool, bool, bool); + +static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables); +void set_postjoin_aggr_write_func(JOIN_TAB *tab); + +static Item **get_sargable_cond(JOIN *join, TABLE *table); + +bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item); + +void print_list_item(String *str, List_item *list, + enum_query_type query_type); + +static +bool build_notnull_conds_for_range_scans(JOIN *join, COND *cond, + table_map allowed); +static +void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, + TABLE_LIST *nest_tbl); +static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex); +static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); +static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, + Item *cond); + +#ifndef DBUG_OFF + +/* + SHOW EXPLAIN testing: wait for, and serve n_calls APC requests. +*/ +void dbug_serve_apcs(THD *thd, int n_calls) +{ + const char *save_proc_info= thd->proc_info; + + /* Busy-wait for n_calls APC requests to arrive and be processed */ + int n_apcs= thd->apc_target.n_calls_processed + n_calls; + while (thd->apc_target.n_calls_processed < n_apcs) + { + /* This is so that mysqltest knows we're ready to serve requests: */ + thd_proc_info(thd, "show_explain_trap"); + my_sleep(30000); + thd_proc_info(thd, save_proc_info); + if (unlikely(thd->check_killed(1))) + break; + } +} + + +/* + Debugging: check if @name=value, comparing as integer + + Intended usage: + + DBUG_EXECUTE_IF("show_explain_probe_2", + if (dbug_user_var_equals_int(thd, "select_id", select_id)) + dbug_serve_apcs(thd, 1); + ); + +*/ + +bool dbug_user_var_equals_int(THD *thd, const char *name, int value) +{ + user_var_entry *var; + LEX_CSTRING varname= { name, strlen(name)}; + if ((var= get_variable(&thd->user_vars, &varname, FALSE))) + { + bool null_value; + longlong var_value= var->val_int(&null_value); + if (!null_value && var_value == value) + return TRUE; + } + return FALSE; +} + +/* + Debugging : check if @name= value, comparing as string + + Intended usage : + + DBUG_EXECUTE_IF("log_slow_statement_end", + if (dbug_user_var_equals_str(thd, "show_explain_probe_query", + thd->query())) + dbug_serve_apcs(thd, 1); + ); +*/ + +bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value) +{ + user_var_entry *var; + LEX_CSTRING varname= {name, strlen(name)}; + if ((var= get_variable(&thd->user_vars, &varname, FALSE))) + { + bool null_value; + String str; + auto var_value= var->val_str(&null_value, &str, 10)->ptr(); + if (!null_value && !strncmp(var_value, value, strlen(value))) + return TRUE; + } + return FALSE; +} +#endif /* DBUG_OFF */ + +/* + Intialize POSITION structure. +*/ + +POSITION::POSITION() +{ + table= 0; + records_read= cond_selectivity= read_time= 0.0; + prefix_record_count= 0.0; + key= 0; + use_join_buffer= 0; + sj_strategy= SJ_OPT_NONE; + n_sj_tables= 0; + spl_plan= 0; + range_rowid_filter_info= 0; + ref_depend_map= dups_producing_tables= 0; + inner_tables_handled_with_other_sjs= 0; + type= JT_UNKNOWN; + key_dependent= 0; + dups_weedout_picker.set_empty(); + firstmatch_picker.set_empty(); + loosescan_picker.set_empty(); + sjmat_picker.set_empty(); +} + + +void JOIN::init(THD *thd_arg, List &fields_arg, + ulonglong select_options_arg, select_result *result_arg) +{ + join_tab= 0; + table= 0; + table_count= 0; + top_join_tab_count= 0; + const_tables= 0; + const_table_map= found_const_table_map= not_usable_rowid_map= 0; + aggr_tables= 0; + eliminated_tables= 0; + join_list= 0; + implicit_grouping= FALSE; + sort_and_group= 0; + first_record= 0; + do_send_rows= 1; + duplicate_rows= send_records= 0; + found_records= accepted_rows= 0; + fetch_limit= HA_POS_ERROR; + thd= thd_arg; + sum_funcs= sum_funcs2= 0; + procedure= 0; + having= tmp_having= having_history= 0; + having_is_correlated= false; + group_list_for_estimates= 0; + select_options= select_options_arg; + result= result_arg; + lock= thd_arg->lock; + select_lex= 0; //for safety + select_distinct= MY_TEST(select_options & SELECT_DISTINCT); + no_order= 0; + simple_order= 0; + simple_group= 0; + ordered_index_usage= ordered_index_void; + need_distinct= 0; + skip_sort_order= 0; + with_two_phase_optimization= 0; + save_qep= 0; + spl_opt_info= 0; + ext_keyuses_for_splitting= 0; + spl_opt_info= 0; + need_tmp= 0; + hidden_group_fields= 0; /*safety*/ + error= 0; + select= 0; + return_tab= 0; + ref_ptrs.reset(); + items0.reset(); + items1.reset(); + items2.reset(); + items3.reset(); + zero_result_cause= 0; + optimization_state= JOIN::NOT_OPTIMIZED; + have_query_plan= QEP_NOT_PRESENT_YET; + initialized= 0; + cleaned= 0; + cond_equal= 0; + having_equal= 0; + exec_const_cond= 0; + group_optimized_away= 0; + no_rows_in_result_called= 0; + positions= best_positions= 0; + pushdown_query= 0; + original_join_tab= 0; + explain= NULL; + tmp_table_keep_current_rowid= 0; + allowed_top_level_tables= 0; + + all_fields= fields_arg; + if (&fields_list != &fields_arg) /* Avoid valgrind-warning */ + fields_list= fields_arg; + non_agg_fields.empty(); + bzero((char*) &keyuse,sizeof(keyuse)); + having_value= Item::COND_UNDEF; + tmp_table_param.init(); + tmp_table_param.end_write_records= HA_POS_ERROR; + rollup.state= ROLLUP::STATE_NONE; + + no_const_tables= FALSE; + first_select= sub_select; + set_group_rpa= false; + group_sent= 0; + + outer_ref_cond= pseudo_bits_cond= NULL; + in_to_exists_where= NULL; + in_to_exists_having= NULL; + emb_sjm_nest= NULL; + sjm_lookup_tables= 0; + sjm_scan_tables= 0; + is_orig_degenerated= false; + with_ties_order_count= 0; +}; + + +static void trace_table_dependencies(THD *thd, + JOIN_TAB *join_tabs, uint table_count) +{ + DBUG_ASSERT(thd->trace_started()); + Json_writer_object trace_wrapper(thd); + Json_writer_array trace_dep(thd, "table_dependencies"); + + for (uint i= 0; i < table_count; i++) + { + TABLE_LIST *table_ref= join_tabs[i].tab_list; + Json_writer_object trace_one_table(thd); + trace_one_table.add_table_name(&join_tabs[i]); + trace_one_table.add("row_may_be_null", + (bool)table_ref->table->maybe_null); + const table_map map= table_ref->get_map(); + DBUG_ASSERT(map < (1ULL << table_count)); + for (uint j= 0; j < table_count; j++) + { + if (map & (1ULL << j)) + { + trace_one_table.add("map_bit", j); + break; + } + } + Json_writer_array depends_on(thd, "depends_on_map_bits"); + Table_map_iterator it(join_tabs[i].dependent); + uint dep_bit; + while ((dep_bit= it++) != Table_map_iterator::BITMAP_END) + depends_on.add(static_cast(dep_bit)); + } +} + + +/** + This handles SELECT with and without UNION. +*/ + +bool handle_select(THD *thd, LEX *lex, select_result *result, + ulonglong setup_tables_done_option) +{ + bool res; + SELECT_LEX *select_lex= lex->first_select_lex(); + DBUG_ENTER("handle_select"); + MYSQL_SELECT_START(thd->query()); + + if (select_lex->master_unit()->is_unit_op() || + select_lex->master_unit()->fake_select_lex) + res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option); + else + { + SELECT_LEX_UNIT *unit= &lex->unit; + unit->set_limit(unit->global_parameters()); + /* + 'options' of mysql_select will be set in JOIN, as far as JOIN for + every PS/SP execution new, we will not need reset this flag if + setup_tables_done_option changed for next rexecution + */ + res= mysql_select(thd, + select_lex->table_list.first, + select_lex->item_list, + select_lex->where, + select_lex->order_list.elements + + select_lex->group_list.elements, + select_lex->order_list.first, + select_lex->group_list.first, + select_lex->having, + lex->proc_list.first, + select_lex->options | thd->variables.option_bits | + setup_tables_done_option, + result, unit, select_lex); + } + DBUG_PRINT("info",("res: %d is_error(): %d", res, + thd->is_error())); + res|= thd->is_error(); + if (unlikely(res)) + result->abort_result_set(); + if (unlikely(thd->killed == ABORT_QUERY && !thd->no_errors)) + { + /* + If LIMIT ROWS EXAMINED interrupted query execution, issue a warning, + continue with normal processing and produce an incomplete query result. + */ + bool saved_abort_on_warning= thd->abort_on_warning; + thd->abort_on_warning= false; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_QUERY_RESULT_INCOMPLETE, + ER_THD(thd, ER_QUERY_RESULT_INCOMPLETE), + "LIMIT ROWS EXAMINED", + thd->lex->limit_rows_examined->val_uint()); + thd->abort_on_warning= saved_abort_on_warning; + thd->reset_killed(); + } + /* Disable LIMIT ROWS EXAMINED after query execution. */ + thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX; + + MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows); + DBUG_RETURN(res); +} + + +/** + Fix fields referenced from inner selects. + + @param thd Thread handle + @param all_fields List of all fields used in select + @param select Current select + @param ref_pointer_array Array of references to Items used in current select + @param group_list GROUP BY list (is NULL by default) + + @details + The function serves 3 purposes + + - adds fields referenced from inner query blocks to the current select list + + - Decides which class to use to reference the items (Item_ref or + Item_direct_ref) + + - fixes references (Item_ref objects) to these fields. + + If a field isn't already on the select list and the ref_pointer_array + is provided then it is added to the all_fields list and the pointer to + it is saved in the ref_pointer_array. + + The class to access the outer field is determined by the following rules: + + -#. If the outer field isn't used under an aggregate function then the + Item_ref class should be used. + + -#. If the outer field is used under an aggregate function and this + function is, in turn, aggregated in the query block where the outer + field was resolved or some query nested therein, then the + Item_direct_ref class should be used. Also it should be used if we are + grouping by a subquery that references this outer field. + + The resolution is done here and not at the fix_fields() stage as + it can be done only after aggregate functions are fixed and pulled up to + selects where they are to be aggregated. + + When the class is chosen it substitutes the original field in the + Item_outer_ref object. + + After this we proceed with fixing references (Item_outer_ref objects) to + this field from inner subqueries. + + @return Status + @retval true An error occurred. + @retval false OK. + */ + +bool +fix_inner_refs(THD *thd, List &all_fields, SELECT_LEX *select, + Ref_ptr_array ref_pointer_array) +{ + Item_outer_ref *ref; + + /* + Mark the references from the inner_refs_list that are occurred in + the group by expressions. Those references will contain direct + references to the referred fields. The markers are set in + the found_in_group_by field of the references from the list. + */ + List_iterator_fast ref_it(select->inner_refs_list); + for (ORDER *group= select->join->group_list; group; group= group->next) + { + (*group->item)->walk(&Item::check_inner_refs_processor, TRUE, &ref_it); + } + + while ((ref= ref_it++)) + { + bool direct_ref= false; + Item *item= ref->outer_ref; + Item **item_ref= ref->ref; + Item_ref *new_ref; + /* + TODO: this field item already might be present in the select list. + In this case instead of adding new field item we could use an + existing one. The change will lead to less operations for copying fields, + smaller temporary tables and less data passed through filesort. + */ + if (!ref_pointer_array.is_null() && !ref->found_in_select_list) + { + int el= all_fields.elements; + ref_pointer_array[el]= item; + /* Add the field item to the select list of the current select. */ + all_fields.push_front(item, thd->mem_root); + /* + If it's needed reset each Item_ref item that refers this field with + a new reference taken from ref_pointer_array. + */ + item_ref= &ref_pointer_array[el]; + } + + if (ref->in_sum_func) + { + Item_sum *sum_func; + if (ref->in_sum_func->nest_level > select->nest_level) + direct_ref= TRUE; + else + { + for (sum_func= ref->in_sum_func; sum_func && + sum_func->aggr_level >= select->nest_level; + sum_func= sum_func->in_sum_func) + { + if (sum_func->aggr_level == select->nest_level) + { + direct_ref= TRUE; + break; + } + } + } + } + else if (ref->found_in_group_by) + direct_ref= TRUE; + + new_ref= direct_ref ? + new (thd->mem_root) Item_direct_ref(thd, ref->context, item_ref, ref->table_name, + ref->field_name, ref->alias_name_used) : + new (thd->mem_root) Item_ref(thd, ref->context, item_ref, ref->table_name, + ref->field_name, ref->alias_name_used); + if (!new_ref) + return TRUE; + ref->outer_ref= new_ref; + ref->ref= &ref->outer_ref; + + if (ref->fix_fields_if_needed(thd, 0)) + return TRUE; + thd->lex->used_tables|= item->used_tables(); + thd->lex->current_select->select_list_tables|= item->used_tables(); + } + return false; +} + +/** + The following clauses are redundant for subqueries: + + DISTINCT + GROUP BY if there are no aggregate functions and no HAVING + clause + + Because redundant clauses are removed both from JOIN and + select_lex, the removal is permanent. Thus, it only makes sense to + call this function for normal queries and on first execution of + SP/PS + + @param subq_select_lex select_lex that is part of a subquery + predicate. This object and the associated + join is modified. +*/ + +static +void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex) +{ + DBUG_ENTER("remove_redundant_subquery_clauses"); + Item_subselect *subq_predicate= subq_select_lex->master_unit()->item; + /* + The removal should happen for IN, ALL, ANY and EXISTS subqueries, + which means all but single row subqueries. Example single row + subqueries: + a) SELECT * FROM t1 WHERE t1.a = () + b) SELECT a, (substype() == Item_subselect::SINGLEROW_SUBS) + DBUG_VOID_RETURN; + + /* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */ + DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS || + subq_predicate->is_in_predicate()); + + if (subq_select_lex->options & SELECT_DISTINCT) + { + subq_select_lex->join->select_distinct= false; + subq_select_lex->options&= ~SELECT_DISTINCT; + DBUG_PRINT("info", ("DISTINCT removed")); + } + + /* + Remove GROUP BY if there are no aggregate functions and no HAVING + clause + */ + if (subq_select_lex->group_list.elements && + !subq_select_lex->with_sum_func && !subq_select_lex->join->having) + { + for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next) + { + /* + Do not remove the item if it is used in select list and then referred + from GROUP BY clause by its name or number. Example: + + select (select ... ) as SUBQ ... group by SUBQ + + Here SUBQ cannot be removed. + */ + if (!ord->in_field_list) + { + (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL); + /* + Remove from the JOIN::all_fields list any reference to the elements + of the eliminated GROUP BY list unless it is 'in_field_list'. + This is needed in order not to confuse JOIN::make_aggr_tables_info() + when it constructs different structure for execution phase. + */ + List_iterator li(subq_select_lex->join->all_fields); + Item *item; + while ((item= li++)) + { + if (item == *ord->item) + li.remove(); + } + } + } + subq_select_lex->join->group_list= NULL; + subq_select_lex->group_list.empty(); + DBUG_PRINT("info", ("GROUP BY removed")); + } + + /* + TODO: This would prevent processing quries with ORDER BY ... LIMIT + therefore we disable this optimization for now. + Remove GROUP BY if there are no aggregate functions and no HAVING + clause + if (subq_select_lex->group_list.elements && + !subq_select_lex->with_sum_func && !subq_select_lex->join->having) + { + subq_select_lex->join->group_list= NULL; + subq_select_lex->group_list.empty(); + } + */ + DBUG_VOID_RETURN; +} + + +/** + Function to setup clauses without sum functions. +*/ +static inline int +setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array, + TABLE_LIST *tables, + List &leaves, + List &fields, + List &all_fields, + COND **conds, + ORDER *order, + ORDER *group, + List &win_specs, + List &win_funcs, + bool *hidden_group_fields) +{ + int res; + enum_parsing_place save_place; + st_select_lex *const select= thd->lex->current_select; + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; + /* + Need to stave the value, so we can turn off only any new non_agg_field_used + additions coming from the WHERE + */ + const bool saved_non_agg_field_used= select->non_agg_field_used(); + DBUG_ENTER("setup_without_group"); + + thd->lex->allow_sum_func.clear_bit(select->nest_level); + res= setup_conds(thd, tables, leaves, conds); + + /* it's not wrong to have non-aggregated columns in a WHERE */ + select->set_non_agg_field_used(saved_non_agg_field_used); + + thd->lex->allow_sum_func.set_bit(select->nest_level); + + save_place= thd->lex->current_select->context_analysis_place; + thd->lex->current_select->context_analysis_place= IN_ORDER_BY; + res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields, + order); + thd->lex->allow_sum_func.clear_bit(select->nest_level); + thd->lex->current_select->context_analysis_place= IN_GROUP_BY; + res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields, + group, hidden_group_fields); + thd->lex->current_select->context_analysis_place= save_place; + thd->lex->allow_sum_func.set_bit(select->nest_level); + res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields, + win_specs, win_funcs); + thd->lex->allow_sum_func= save_allow_sum_func; + DBUG_RETURN(res); +} + +bool vers_select_conds_t::init_from_sysvar(THD *thd) +{ + vers_asof_timestamp_t &in= thd->variables.vers_asof_timestamp; + type= (vers_system_time_t) in.type; + delete_history= false; + start.unit= VERS_TIMESTAMP; + if (type != SYSTEM_TIME_UNSPECIFIED && type != SYSTEM_TIME_ALL) + { + DBUG_ASSERT(type == SYSTEM_TIME_AS_OF); + Datetime dt(in.unix_time, in.second_part, thd->variables.time_zone); + + start.item= new (thd->mem_root) + Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS); + if (!start.item) + return true; + } + else + start.item= NULL; + end.empty(); + return false; +} + +void vers_select_conds_t::print(String *str, enum_query_type query_type) const +{ + switch (orig_type) { + case SYSTEM_TIME_UNSPECIFIED: + break; + case SYSTEM_TIME_AS_OF: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME AS OF ")); + break; + case SYSTEM_TIME_FROM_TO: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME FROM ")); + end.print(str, query_type, STRING_WITH_LEN(" TO ")); + break; + case SYSTEM_TIME_BETWEEN: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BETWEEN ")); + end.print(str, query_type, STRING_WITH_LEN(" AND ")); + break; + case SYSTEM_TIME_BEFORE: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BEFORE ")); + break; + case SYSTEM_TIME_HISTORY: + // nothing to add + break; + case SYSTEM_TIME_ALL: + str->append(STRING_WITH_LEN(" FOR SYSTEM_TIME ALL")); + break; + } +} + +static +Item* period_get_condition(THD *thd, TABLE_LIST *table, SELECT_LEX *select, + vers_select_conds_t *conds, bool timestamp) +{ + DBUG_ASSERT(table); + DBUG_ASSERT(table->table); +#define newx new (thd->mem_root) + TABLE_SHARE *share= table->table->s; + const TABLE_SHARE::period_info_t *period= conds->period; + + const LEX_CSTRING &fstart= period->start_field(share)->field_name; + const LEX_CSTRING &fend= period->end_field(share)->field_name; + + conds->field_start= newx Item_field(thd, &select->context, + table->db, table->alias, + thd->strmake_lex_cstring(fstart)); + conds->field_end= newx Item_field(thd, &select->context, + table->db, table->alias, + thd->strmake_lex_cstring(fend)); + + Item *cond1= NULL, *cond2= NULL, *cond3= NULL, *curr= NULL; + if (timestamp) + { + MYSQL_TIME max_time; + switch (conds->type) + { + case SYSTEM_TIME_UNSPECIFIED: + case SYSTEM_TIME_HISTORY: + { + thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE); + max_time.second_part= TIME_MAX_SECOND_PART; + Datetime dt(&max_time); + curr= newx Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS); + if (conds->type == SYSTEM_TIME_UNSPECIFIED) + cond1= newx Item_func_eq(thd, conds->field_end, curr); + else + cond1= newx Item_func_lt(thd, conds->field_end, curr); + break; + } + case SYSTEM_TIME_AS_OF: + cond1= newx Item_func_le(thd, conds->field_start, conds->start.item); + cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item); + break; + case SYSTEM_TIME_FROM_TO: + cond1= newx Item_func_lt(thd, conds->field_start, conds->end.item); + cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item); + cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BETWEEN: + cond1= newx Item_func_le(thd, conds->field_start, conds->end.item); + cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item); + cond3= newx Item_func_le(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BEFORE: + cond1= newx Item_func_history(thd, conds->field_end); + cond2= newx Item_func_lt(thd, conds->field_end, conds->start.item); + break; + default: + DBUG_ASSERT(0); + } + } + else + { + DBUG_ASSERT(table->table->s && table->table->s->db_plugin); + + Item *trx_id0= conds->start.item; + Item *trx_id1= conds->end.item; + if (conds->start.item && conds->start.unit == VERS_TIMESTAMP) + { + bool backwards= conds->type != SYSTEM_TIME_AS_OF; + trx_id0= newx Item_func_trt_id(thd, conds->start.item, + TR_table::FLD_TRX_ID, backwards); + } + if (conds->end.item && conds->end.unit == VERS_TIMESTAMP) + { + trx_id1= newx Item_func_trt_id(thd, conds->end.item, + TR_table::FLD_TRX_ID, false); + } + + switch (conds->type) + { + case SYSTEM_TIME_UNSPECIFIED: + case SYSTEM_TIME_HISTORY: + curr= newx Item_int(thd, ULONGLONG_MAX); + if (conds->type == SYSTEM_TIME_UNSPECIFIED) + cond1= newx Item_func_eq(thd, conds->field_end, curr); + else + cond1= newx Item_func_lt(thd, conds->field_end, curr); + break; + DBUG_ASSERT(!conds->start.item); + DBUG_ASSERT(!conds->end.item); + break; + case SYSTEM_TIME_AS_OF: + cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id0, conds->field_start); + cond2= newx Item_func_trt_trx_sees(thd, conds->field_end, trx_id0); + DBUG_ASSERT(!conds->end.item); + break; + case SYSTEM_TIME_FROM_TO: + cond1= newx Item_func_trt_trx_sees(thd, trx_id1, conds->field_start); + cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0); + cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BETWEEN: + cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id1, conds->field_start); + cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0); + cond3= newx Item_func_le(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BEFORE: + cond1= newx Item_func_history(thd, conds->field_end); + cond2= newx Item_func_trt_trx_sees(thd, trx_id0, conds->field_end); + break; + default: + DBUG_ASSERT(0); + } + } + + if (cond1) + { + cond1= and_items(thd, cond2, cond1); + cond1= and_items(thd, cond3, cond1); + } + return cond1; +} + +static +bool skip_setup_conds(THD *thd) +{ + return (!thd->stmt_arena->is_conventional() + && !thd->stmt_arena->is_stmt_prepare_or_first_sp_execute()) + || thd->lex->is_view_context_analysis(); +} + +int SELECT_LEX::period_setup_conds(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("SELECT_LEX::period_setup_conds"); + const bool update_conds= !skip_setup_conds(thd); + + Query_arena backup; + Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup); + + DBUG_ASSERT(!tables->next_local && tables->table); + + Item *result= NULL; + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + if (!table->table) + continue; + vers_select_conds_t &conds= table->period_conditions; + if (!table->table->s->period.name.streq(conds.name)) + { + my_error(ER_PERIOD_NOT_FOUND, MYF(0), conds.name.str); + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(-1); + } + + if (update_conds) + { + conds.period= &table->table->s->period; + result= and_items(thd, result, + period_get_condition(thd, table, this, &conds, true)); + } + } + if (update_conds) + where= and_items(thd, where, result); + + if (arena) + thd->restore_active_arena(arena, &backup); + + DBUG_RETURN(0); +} + +int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("SELECT_LEX::vers_setup_conds"); + const bool update_conds= !skip_setup_conds(thd); + + if (!versioned_tables) + { + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + versioned_tables++; + else if (table->vers_conditions.is_set() && + (table->is_non_derived() || !table->vers_conditions.used)) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->alias.str); + DBUG_RETURN(-1); + } + } + } + + if (versioned_tables == 0) + DBUG_RETURN(0); + + /* For prepared statements we create items on statement arena, + because they must outlive execution phase for multiple executions. */ + Query_arena_stmt on_stmt_arena(thd); + + // find outer system_time + SELECT_LEX *outer_slex= outer_select(); + TABLE_LIST* outer_table= NULL; + + if (outer_slex) + { + TABLE_LIST* derived= master_unit()->derived; + // inner SELECT may not be a derived table (derived == NULL) + while (derived && outer_slex && !derived->vers_conditions.is_set()) + { + derived= outer_slex->master_unit()->derived; + outer_slex= outer_slex->outer_select(); + } + if (derived && outer_slex) + { + DBUG_ASSERT(derived->vers_conditions.is_set()); + outer_table= derived; + } + } + + bool is_select= false; + bool use_sysvar= false; + switch (thd->lex->sql_command) + { + case SQLCOM_SELECT: + use_sysvar= true; + /* fall through */ + case SQLCOM_CREATE_TABLE: + case SQLCOM_INSERT_SELECT: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE_MULTI: + is_select= true; + default: + break; + } + + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + if (!table->table || table->is_view() || !table->table->versioned()) + continue; + + vers_select_conds_t &vers_conditions= table->vers_conditions; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* + if the history is stored in partitions, then partitions + themselves are not versioned + */ + if (table->partition_names && table->table->part_info->vers_info) + { + /* If the history is stored in partitions, then partitions + themselves are not versioned. */ + if (vers_conditions.was_set()) + { + my_error(ER_VERS_QUERY_IN_PARTITION, MYF(0), table->alias.str); + DBUG_RETURN(-1); + } + else if (!vers_conditions.is_set()) + vers_conditions.set_all(); + } +#endif + + if (outer_table && !vers_conditions.is_set()) + { + // propagate system_time from nearest outer SELECT_LEX + vers_conditions= outer_table->vers_conditions; + outer_table->vers_conditions.used= true; + } + + // propagate system_time from sysvar + if (!vers_conditions.is_set() && use_sysvar) + { + if (vers_conditions.init_from_sysvar(thd)) + DBUG_RETURN(-1); + } + + if (vers_conditions.is_set()) + { + if (vers_conditions.was_set() && + table->lock_type >= TL_FIRST_WRITE && + !vers_conditions.delete_history) + { + my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table->alias.str); + DBUG_RETURN(-1); + } + + if (vers_conditions.type == SYSTEM_TIME_ALL) + continue; + } + + bool timestamps_only= table->table->versioned(VERS_TIMESTAMP); + + if (vers_conditions.is_set() && vers_conditions.type != SYSTEM_TIME_HISTORY) + { + thd->where= "FOR SYSTEM_TIME"; + /* TODO: do resolve fix_length_and_dec(), fix_fields(). This requires + storing vers_conditions as Item and make some magic related to + vers_system_time_t/VERS_TRX_ID at stage of fix_fields() + (this is large refactoring). */ + if (vers_conditions.check_units(thd)) + DBUG_RETURN(-1); + if (timestamps_only && (vers_conditions.start.unit == VERS_TRX_ID || + vers_conditions.end.unit == VERS_TRX_ID)) + { + my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name.str); + DBUG_RETURN(-1); + } + } + + if (update_conds) + { + vers_conditions.period = &table->table->s->vers; + Item *cond= period_get_condition(thd, table, this, &vers_conditions, + timestamps_only); + if (is_select) + table->on_expr= and_items(thd, table->on_expr, cond); + else + { + if (join) + { + where= and_items(thd, join->conds, cond); + join->conds= where; + } + else + where= and_items(thd, where, cond); + table->where= and_items(thd, table->where, cond); + } + + table->vers_conditions.set_all(); + } + } // for (table= tables; ...) + + DBUG_RETURN(0); +} + + +/***************************************************************************** + Check fields, find best join, do the select and output fields. + mysql_select assumes that all tables are already opened +*****************************************************************************/ + +/* + Check if we have a field reference. If yes, we have to use + mixed_implicit_grouping. +*/ + +static bool check_list_for_field(List *items) +{ + List_iterator_fast select_it(*items); + Item *select_el; + + while ((select_el= select_it++)) + { + if (select_el->with_field()) + return true; + } + return false; +} + +static bool check_list_for_field(ORDER *order) +{ + for (; order; order= order->next) + { + if (order->item[0]->with_field()) + return true; + } + return false; +} + + +/** + Prepare of whole select (including sub queries in future). + + @todo + Add check of calculation of GROUP functions and fields: + SELECT COUNT(*)+table.col1 from table1; + + @retval + -1 on error + @retval + 0 on success +*/ +int +JOIN::prepare(TABLE_LIST *tables_init, COND *conds_init, uint og_num, + ORDER *order_init, bool skip_order_by, + ORDER *group_init, Item *having_init, + ORDER *proc_param_init, SELECT_LEX *select_lex_arg, + SELECT_LEX_UNIT *unit_arg) +{ + DBUG_ENTER("JOIN::prepare"); + + // to prevent double initialization on EXPLAIN + if (optimization_state != JOIN::NOT_OPTIMIZED) + DBUG_RETURN(0); + + conds= conds_init; + order= order_init; + group_list= group_init; + having= having_init; + proc_param= proc_param_init; + tables_list= tables_init; + select_lex= select_lex_arg; + DBUG_PRINT("info", ("select %p (%u) = JOIN %p", + select_lex, select_lex->select_number, this)); + select_lex->join= this; + join_list= &select_lex->top_join_list; + union_part= unit_arg->is_unit_op(); + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_prepare(thd, "join_preparation"); + trace_prepare.add_select_number(select_lex->select_number); + Json_writer_array trace_steps(thd, "steps"); + + // simple check that we got usable conds + dbug_print_item(conds); + + /* Fix items that requires the join structure to exist */ + fix_items_after_optimize(thd, select_lex); + + /* + It is hack which force creating EXPLAIN object always on runt-time arena + (because very top JOIN::prepare executes always with runtime arena, but + constant subquery like (SELECT 'x') can be called with statement arena + during prepare phase of top SELECT). + */ + if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE)) + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + + if (select_lex->handle_derived(thd->lex, DT_PREPARE)) + DBUG_RETURN(-1); + + thd->lex->current_select->context_analysis_place= NO_MATTER; + thd->lex->current_select->is_item_list_lookup= 1; + /* + If we have already executed SELECT, then it have not sense to prevent + its table from update (see unique_table()) + Affects only materialized derived tables. + */ + /* Check that all tables, fields, conds and order are ok */ + if (!(select_options & OPTION_SETUP_TABLES_DONE) && + setup_tables_and_check_access(thd, &select_lex->context, join_list, + tables_list, select_lex->leaf_tables, + FALSE, SELECT_ACL, SELECT_ACL, FALSE)) + DBUG_RETURN(-1); + + /* System Versioning: handle FOR SYSTEM_TIME clause. */ + if (select_lex->vers_setup_conds(thd, tables_list) < 0) + DBUG_RETURN(-1); + + /* + mixed_implicit_grouping will be set to TRUE if the SELECT list + mixes elements with and without grouping, and there is no GROUP BY + clause. + Mixing non-aggregated fields with aggregate functions in the + SELECT list or HAVING is a MySQL extension that is allowed only if + the ONLY_FULL_GROUP_BY sql mode is not set. + */ + mixed_implicit_grouping= false; + if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) && + select_lex->with_sum_func && !group_list) + { + if (check_list_for_field(&fields_list) || + check_list_for_field(order)) + { + List_iterator_fast li(select_lex->leaf_tables); + + mixed_implicit_grouping= true; // mark for future + + while (TABLE_LIST *tbl= li++) + { + /* + If the query uses implicit grouping where the select list + contains both aggregate functions and non-aggregate fields, + any non-aggregated field may produce a NULL value. Set all + fields of each table as nullable before semantic analysis to + take into account this change of nullability. + + Note: this loop doesn't touch tables inside merged + semi-joins, because subquery-to-semijoin conversion has not + been done yet. This is intended. + */ + if (tbl->table) + tbl->table->maybe_null= 1; + } + } + } + table_count= select_lex->leaf_tables.elements; + + uint real_og_num= og_num; + if (skip_order_by && + select_lex != select_lex->master_unit()->global_parameters()) + real_og_num+= select_lex->order_list.elements; + + DBUG_ASSERT(select_lex->hidden_bit_fields == 0); + if (setup_wild(thd, tables_list, fields_list, &all_fields, select_lex, false)) + DBUG_RETURN(-1); + + if (thd->lex->current_select->first_cond_optimization) + { + if ( conds && ! thd->lex->current_select->merged_into) + select_lex->select_n_reserved= conds->exists2in_reserved_items(); + else + select_lex->select_n_reserved= 0; + } + + if (select_lex->setup_ref_array(thd, real_og_num)) + DBUG_RETURN(-1); + + ref_ptrs= ref_ptr_array_slice(0); + + enum_parsing_place save_place= + thd->lex->current_select->context_analysis_place; + thd->lex->current_select->context_analysis_place= SELECT_LIST; + + { + List_iterator_fast it(select_lex->leaf_tables); + while (TABLE_LIST *tbl= it++) + { + if (tbl->table_function && + tbl->table_function->setup(thd, tbl, select_lex_arg)) + DBUG_RETURN(-1); + } + } + + if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ, + &all_fields, &select_lex->pre_fix, 1)) + DBUG_RETURN(-1); + thd->lex->current_select->context_analysis_place= save_place; + + if (setup_without_group(thd, ref_ptrs, tables_list, + select_lex->leaf_tables, fields_list, + all_fields, &conds, order, group_list, + select_lex->window_specs, + select_lex->window_funcs, + &hidden_group_fields)) + DBUG_RETURN(-1); + + /* + Permanently remove redundant parts from the query if + 1) This is a subquery + 2) This is the first time this query is optimized (since the + transformation is permanent + 3) Not normalizing a view. Removal should take place when a + query involving a view is optimized, not when the view + is created + */ + if (select_lex->master_unit()->item && // 1) + select_lex->first_cond_optimization && // 2) + !thd->lex->is_view_context_analysis()) // 3) + { + remove_redundant_subquery_clauses(select_lex); + } + + /* Resolve the ORDER BY that was skipped, then remove it. */ + if (skip_order_by && select_lex != + select_lex->master_unit()->global_parameters()) + { + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; + thd->lex->allow_sum_func.set_bit(select_lex->nest_level); + thd->where= "order clause"; + for (ORDER *order= select_lex->order_list.first; order; order= order->next) + { + /* Don't add the order items to all fields. Just resolve them to ensure + the query is valid, we'll drop them immediately after. */ + if (find_order_in_list(thd, ref_ptrs, tables_list, order, + fields_list, all_fields, false, false, false)) + DBUG_RETURN(-1); + } + thd->lex->allow_sum_func= save_allow_sum_func; + select_lex->order_list.empty(); + } + + if (having) + { + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; + thd->where="having clause"; + thd->lex->allow_sum_func.set_bit(select_lex_arg->nest_level); + select_lex->having_fix_field= 1; + /* + Wrap alone field in HAVING clause in case it will be outer field + of subquery which need persistent pointer on it, but having + could be changed by optimizer + */ + if (having->type() == Item::REF_ITEM && + ((Item_ref *)having)->ref_type() == Item_ref::REF) + wrap_ident(thd, &having); + bool having_fix_rc= having->fix_fields_if_needed_for_bool(thd, &having); + select_lex->having_fix_field= 0; + + if (unlikely(having_fix_rc || thd->is_error())) + DBUG_RETURN(-1); /* purecov: inspected */ + thd->lex->allow_sum_func= save_allow_sum_func; + + if (having->with_window_func()) + { + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + DBUG_RETURN(-1); + } + } + + /* + After setting up window functions, we may have discovered additional + used tables from the PARTITION BY and ORDER BY list. Update all items + that contain window functions. + */ + if (select_lex->have_window_funcs()) + { + List_iterator_fast it(select_lex->item_list); + Item *item; + while ((item= it++)) + { + if (item->with_window_func()) + item->update_used_tables(); + } + } + + With_clause *with_clause=select_lex->get_with_clause(); + if (with_clause && with_clause->prepare_unreferenced_elements(thd)) + DBUG_RETURN(1); + + With_element *with_elem= select_lex->get_with_element(); + if (with_elem && + select_lex->check_unrestricted_recursive( + thd->variables.only_standard_compliant_cte)) + DBUG_RETURN(-1); + if (!(select_lex->changed_elements & TOUCHED_SEL_COND)) + select_lex->check_subqueries_with_recursive_references(); + + int res= check_and_do_in_subquery_rewrites(this); + + select_lex->fix_prepare_information(thd, &conds, &having); + + if (res) + DBUG_RETURN(res); + + if (order) + { + bool requires_sorting= FALSE; + /* + WITH TIES forces the results to be sorted, even if it's not sanely + sortable. + */ + if (select_lex->limit_params.with_ties) + requires_sorting= true; + + /* + Go through each ORDER BY item and perform the following: + 1. Detect if none of the items contain meaningful data, which means we + can drop the sorting altogether. + 2. Split any columns with aggregation functions or window functions into + their base components and store them as separate fields. + (see split_sum_func) for more details. + */ + for (ORDER *ord= order; ord; ord= ord->next) + { + Item *item= *ord->item; + /* + Disregard sort order if there's only + zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or + zero length NOT NULL string functions there. + Such tuples don't contain any data to sort. + */ + if (!requires_sorting && + /* Not a zero length NOT NULL field */ + ((item->type() != Item::FIELD_ITEM || + ((Item_field *) item)->field->maybe_null() || + ((Item_field *) item)->field->sort_length()) && + /* AND not a zero length NOT NULL string function. */ + (item->type() != Item::FUNC_ITEM || + item->maybe_null() || + item->result_type() != STRING_RESULT || + item->max_length))) + requires_sorting= TRUE; + + if ((item->with_sum_func() && item->type() != Item::SUM_FUNC_ITEM) || + item->with_window_func()) + item->split_sum_func(thd, ref_ptrs, all_fields, SPLIT_SUM_SELECT); + } + /* Drop the ORDER BY clause if none of the columns contain any data that + can produce a meaningful sorted set. */ + if (!requires_sorting) + order= NULL; + } + else + { + /* The current select does not have an ORDER BY */ + if (select_lex->limit_params.with_ties) + { + my_error(ER_WITH_TIES_NEEDS_ORDER, MYF(0)); + DBUG_RETURN(-1); + } + } + + if (having && (having->with_sum_func() || having->with_rownum_func())) + having->split_sum_func2(thd, ref_ptrs, all_fields, + &having, SPLIT_SUM_SKIP_REGISTERED); + if (select_lex->inner_sum_func_list) + { + Item_sum *end=select_lex->inner_sum_func_list; + Item_sum *item_sum= end; + do + { + item_sum= item_sum->next; + item_sum->split_sum_func2(thd, ref_ptrs, + all_fields, item_sum->ref_by, 0); + } while (item_sum != end); + } + + if (select_lex->inner_refs_list.elements && + fix_inner_refs(thd, all_fields, select_lex, ref_ptrs)) + DBUG_RETURN(-1); + + if (group_list) + { + /* + Because HEAP tables can't index BIT fields we need to use an + additional hidden field for grouping because later it will be + converted to a LONG field. Original field will remain of the + BIT type and will be returned to a client. + */ + for (ORDER *ord= group_list; ord; ord= ord->next) + { + if ((*ord->item)->type() == Item::FIELD_ITEM && + (*ord->item)->field_type() == MYSQL_TYPE_BIT) + { + Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item); + if (!field) + DBUG_RETURN(-1); + int el= all_fields.elements; + ref_ptrs[el]= field; + all_fields.push_front(field, thd->mem_root); + ord->item= &ref_ptrs[el]; + } + } + } + + /* + Check if there are references to un-aggregated columns when computing + aggregate functions with implicit grouping (there is no GROUP BY). + */ + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list && + !(select_lex->master_unit()->item && + select_lex->master_unit()->item->is_in_predicate() && + select_lex->master_unit()->item->get_IN_subquery()-> + test_set_strategy(SUBS_MAXMIN_INJECTED)) && + select_lex->non_agg_field_used() && + select_lex->agg_func_used()) + { + my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS, + ER_THD(thd, ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0)); + DBUG_RETURN(-1); + } + { + /* Caclulate the number of groups */ + send_group_parts= 0; + for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next) + send_group_parts++; + } + + procedure= setup_procedure(thd, proc_param, result, fields_list, &error); + if (unlikely(error)) + goto err; /* purecov: inspected */ + if (procedure) + { + if (setup_new_fields(thd, fields_list, all_fields, + procedure->param_fields)) + goto err; /* purecov: inspected */ + if (procedure->group) + { + if (!test_if_subpart(procedure->group,group_list)) + { /* purecov: inspected */ + my_message(ER_DIFF_GROUPS_PROC, ER_THD(thd, ER_DIFF_GROUPS_PROC), + MYF(0)); /* purecov: inspected */ + goto err; /* purecov: inspected */ + } + } + if (order && (procedure->flags & PROC_NO_SORT)) + { /* purecov: inspected */ + my_message(ER_ORDER_WITH_PROC, ER_THD(thd, ER_ORDER_WITH_PROC), + MYF(0)); /* purecov: inspected */ + goto err; /* purecov: inspected */ + } + if (thd->lex->derived_tables) + { + /* + Queries with derived tables and PROCEDURE are not allowed. + Many of such queries are disallowed grammatically, but there + are still some complex cases: + SELECT 1 FROM (SELECT 1) a PROCEDURE ANALYSE() + */ + my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", + thd->lex->derived_tables & DERIVED_VIEW ? + "view" : "subquery"); + goto err; + } + if (thd->lex->sql_command != SQLCOM_SELECT) + { + // EXPLAIN SELECT * FROM t1 PROCEDURE ANALYSE() + my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT"); + goto err; + } + } + + if (thd->trace_started()) + { + Json_writer_object trace_wrapper(thd); + opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper); + } + + if (!procedure && result && result->prepare(fields_list, unit_arg)) + goto err; /* purecov: inspected */ + + unit= unit_arg; + if (prepare_stage2()) + goto err; + + DBUG_RETURN(0); // All OK + +err: + delete procedure; /* purecov: inspected */ + procedure= 0; + DBUG_RETURN(-1); /* purecov: inspected */ +} + + +/** + Second phase of prepare where we collect some statistic. + + @details + We made this part separate to be able recalculate some statistic after + transforming subquery on optimization phase. +*/ + +bool JOIN::prepare_stage2() +{ + bool res= TRUE; + DBUG_ENTER("JOIN::prepare_stage2"); + + /* Init join struct */ + count_field_types(select_lex, &tmp_table_param, all_fields, 0); + this->group= group_list != 0; + + if (tmp_table_param.sum_func_count && !group_list) + { + implicit_grouping= TRUE; + // Result will contain zero or one row - ordering is meaningless + order= NULL; + } + +#ifdef RESTRICTED_GROUP + if (implicit_grouping) + { + my_message(ER_WRONG_SUM_SELECT,ER_THD(thd, ER_WRONG_SUM_SELECT),MYF(0)); + goto err; + } +#endif + if (select_lex->olap == ROLLUP_TYPE && rollup_init()) + goto err; + if (alloc_func_list() || + make_sum_func_list(all_fields, fields_list, false)) + goto err; + + res= FALSE; +err: + DBUG_RETURN(res); /* purecov: inspected */ +} + + +bool JOIN::build_explain() +{ + DBUG_ENTER("JOIN::build_explain"); + have_query_plan= QEP_AVAILABLE; + + /* + explain data must be created on the Explain_query::mem_root. Because it's + just a memroot, not an arena, explain data must not contain any Items + */ + MEM_ROOT *old_mem_root= thd->mem_root; + Item *old_free_list __attribute__((unused))= thd->free_list; + thd->mem_root= thd->lex->explain->mem_root; + bool res= save_explain_data(thd->lex->explain, false /* can overwrite */, + need_tmp, + !skip_sort_order && !no_order && (order || group_list), + select_distinct); + thd->mem_root= old_mem_root; + DBUG_ASSERT(thd->free_list == old_free_list); // no Items were created + if (res) + DBUG_RETURN(1); + uint select_nr= select_lex->select_number; + JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); + for (uint i= 0; i < aggr_tables; i++, curr_tab++) + { + if (select_nr == FAKE_SELECT_LEX_ID) + { + /* this is a fake_select_lex of a union */ + select_nr= select_lex->master_unit()->first_select()->select_number; + curr_tab->tracker= thd->lex->explain->get_union(select_nr)-> + get_tmptable_read_tracker(); + } + else if (select_nr < INT_MAX) + { + Explain_select *tmp= thd->lex->explain->get_select(select_nr); + if (tmp) + curr_tab->tracker= tmp->get_using_temporary_read_tracker(); + } + } + DBUG_RETURN(0); +} + + +int JOIN::optimize() +{ + int res= 0; + join_optimization_state init_state= optimization_state; + if (select_lex->pushdown_select) + { + // Do same as JOIN::optimize_inner does: + fields= &select_lex->item_list; + + if (!(select_options & SELECT_DESCRIBE)) + { + /* Prepare to execute the query pushed into a foreign engine */ + res= select_lex->pushdown_select->prepare(); + } + with_two_phase_optimization= false; + } + else if (optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE) + res= optimize_stage2(); + else + { + // to prevent double initialization on EXPLAIN + if (optimization_state != JOIN::NOT_OPTIMIZED) + return FALSE; + optimization_state= JOIN::OPTIMIZATION_IN_PROGRESS; + res= optimize_inner(); + } + if (!with_two_phase_optimization || + init_state == JOIN::OPTIMIZATION_PHASE_1_DONE) + { + if (!res && have_query_plan != QEP_DELETED) + res= build_explain(); + optimization_state= JOIN::OPTIMIZATION_DONE; + } + return res; +} + + +/** + @brief + Create range filters objects needed in execution for all join tables + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function creates a Rowid_filter object + for this range filter. In order to do this the function first constructs + a quick select to scan the range for this range filter. Then it creates + a container for the range filter and finally constructs a Range_rowid_filter + object a pointer to which is set in the field JOIN_TAB::rowid_filter of + the joined table. + + @retval false Ok + @retval true Error +*/ + +bool JOIN::make_range_rowid_filters() +{ + DBUG_ENTER("make_range_rowid_filters"); + + /* + Do not build range filters with detected impossible WHERE. + Anyway conditions cannot be used anymore to extract ranges for filters. + */ + if (const_table_map != found_const_table_map) + DBUG_RETURN(0); + + JOIN_TAB *tab; + + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (!tab->range_rowid_filter_info) + continue; + + DBUG_ASSERT(!(tab->ref.key >= 0 && + tab->ref.key == (int) tab->range_rowid_filter_info->key_no)); + DBUG_ASSERT(!(tab->ref.key == -1 && tab->quick && + tab->quick->index == tab->range_rowid_filter_info->key_no)); + + int err; + SQL_SELECT *sel= NULL; + Rowid_filter_container *filter_container= NULL; + Item **sargable_cond= get_sargable_cond(this, tab->table); + sel= make_select(tab->table, const_table_map, const_table_map, + *sargable_cond, (SORT_INFO*) 0, 1, &err); + if (!sel) + continue; + + key_map filter_map; + filter_map.clear_all(); + filter_map.set_bit(tab->range_rowid_filter_info->key_no); + filter_map.merge(tab->table->with_impossible_ranges); + bool force_index_save= tab->table->force_index; + tab->table->force_index= true; + quick_select_return rc; + rc= sel->test_quick_select(thd, filter_map, (table_map) 0, + (ha_rows) HA_POS_ERROR, true, false, true, + true); + tab->table->force_index= force_index_save; + if (rc == SQL_SELECT::ERROR || thd->is_error()) + { + DBUG_RETURN(true); /* Fatal error */ + } + /* + If SUBS_IN_TO_EXISTS strtrategy is chosen for the subquery then + additional conditions are injected into WHERE/ON/HAVING and it may + happen that the call of test_quick_select() discovers impossible range. + */ + if (rc == SQL_SELECT::IMPOSSIBLE_RANGE) + { + const_table_map|= tab->table->map; + goto no_filter; + } + DBUG_ASSERT(sel->quick); + filter_container= + tab->range_rowid_filter_info->create_container(); + if (filter_container) + { + tab->rowid_filter= + new (thd->mem_root) Range_rowid_filter(tab->table, + tab->range_rowid_filter_info, + filter_container, sel); + if (tab->rowid_filter) + continue; + } + no_filter: + if (sel->quick) + delete sel->quick; + delete sel; + } + + DBUG_RETURN(0); +} + + +/** + @brief + Allocate memory the rowid containers of the used the range filters + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function allocate memory for the + rowid container employed by the filter. On success it lets the table engine + know that what rowid filter will be used when accessing the table rows. + + @retval false always +*/ + +bool +JOIN::init_range_rowid_filters() +{ + DBUG_ENTER("init_range_rowid_filters"); + + JOIN_TAB *tab; + + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (!tab->rowid_filter) + continue; + if (tab->rowid_filter->get_container()->alloc()) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + continue; + } + tab->table->file->rowid_filter_push(tab->rowid_filter); + tab->is_rowid_filter_built= false; + } + DBUG_RETURN(0); +} + +/** + global select optimisation. + + @note + error code saved in field 'error' + + @retval + 0 success + @retval + 1 error +*/ + +int +JOIN::optimize_inner() +{ + DBUG_ENTER("JOIN::optimize_inner"); + subq_exit_fl= false; + + DEBUG_SYNC(thd, "before_join_optimize"); + THD_STAGE_INFO(thd, stage_optimizing); +#ifndef DBUG_OFF + dbug_join_tab_array_size= 0; +#endif + + // rownum used somewhere in query, no limits and it is derived + if (unlikely(thd->lex->with_rownum && + select_lex->first_cond_optimization && + select_lex->master_unit()->derived)) + optimize_upper_rownum_func(); + + do_send_rows = (unit->lim.get_select_limit()) ? 1 : 0; + + set_allowed_join_cache_types(); + need_distinct= TRUE; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_prepare(thd, "join_optimization"); + trace_prepare.add_select_number(select_lex->select_number); + Json_writer_array trace_steps(thd, "steps"); + + /* + Needed in case optimizer short-cuts, + set properly in make_aggr_tables_info() + */ + fields= &select_lex->item_list; + + if (select_lex->first_cond_optimization) + { + //Do it only for the first execution + /* Merge all mergeable derived tables/views in this SELECT. */ + if (select_lex->handle_derived(thd->lex, DT_MERGE)) + DBUG_RETURN(TRUE); + } + + if (select_lex->first_cond_optimization && + transform_in_predicates_into_in_subq(thd)) + DBUG_RETURN(1); + + /* + Update used tables after all handling derived table procedures + After this call, select_lex->select_list_tables contains the table + bits of all items in the select list (but not bits from WHERE clause or + other items). + */ + select_lex->update_used_tables(); + + /* + In fact we transform underlying subqueries after their 'prepare' phase and + before 'optimize' from upper query 'optimize' to allow semijoin + conversion happened (which done in the same way. + */ + if (select_lex->first_cond_optimization && + conds && conds->walk(&Item::exists2in_processor, 0, thd)) + DBUG_RETURN(1); + /* + TODO + make view to decide if it is possible to write to WHERE directly or make Semi-Joins able to process ON condition if it is possible + for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local) + { + if (tbl->on_expr && + tbl->on_expr->walk(&Item::exists2in_processor, 0, thd)) + DBUG_RETURN(1); + } + */ + + if (transform_max_min_subquery()) + DBUG_RETURN(1); /* purecov: inspected */ + + if (select_lex->first_cond_optimization) + { + /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ + if (convert_join_subqueries_to_semijoins(this)) + DBUG_RETURN(1); /* purecov: inspected */ + /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ + select_lex->update_used_tables(); + } + + eval_select_list_used_tables(); + + if (select_lex->options & OPTION_SCHEMA_TABLE && + optimize_schema_tables_memory_usage(select_lex->leaf_tables)) + DBUG_RETURN(1); + + if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */ + DBUG_RETURN(-1); + + row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR : + unit->lim.get_select_limit()); + /* select_limit is used to decide if we are likely to scan the whole table */ + select_limit= unit->lim.get_select_limit(); + if (having || (select_options & OPTION_FOUND_ROWS)) + select_limit= HA_POS_ERROR; +#ifdef HAVE_REF_TO_FIELDS // Not done yet + /* Add HAVING to WHERE if possible */ + if (having && !group_list && !sum_func_count) + { + if (!conds) + { + conds= having; + having= 0; + } + else if ((conds=new (thd->mem_root) Item_cond_and(conds,having))) + { + /* + Item_cond_and can't be fixed after creation, so we do not check + conds->fixed() + */ + conds->fix_fields(thd, &conds); + conds->change_ref_to_fields(thd, tables_list); + conds->top_level_item(); + having= 0; + } + } +#endif + + SELECT_LEX *sel= select_lex; + if (sel->first_cond_optimization) + { + /* + The following code will allocate the new items in a permanent + MEMROOT for prepared statements and stored procedures. + + But first we need to ensure that thd->lex->explain is allocated + in the execution arena + */ + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + sel->first_cond_optimization= 0; + + /* Convert all outer joins to inner joins if possible */ + conds= simplify_joins(this, join_list, conds, TRUE, FALSE); + + add_table_function_dependencies(join_list, table_map(-1)); + + if (thd->is_error() || select_lex->save_leaf_tables(thd)) + { + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(1); + } + build_bitmap_for_nested_joins(join_list, 0); + + sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0; + + sel->where= conds; + + select_lex->update_used_tables(); + + if (arena) + thd->restore_active_arena(arena, &backup); + } + + if (!allowed_top_level_tables) + calc_allowed_top_level_tables(select_lex); + + if (optimize_constant_subqueries()) + DBUG_RETURN(1); + + if (conds && conds->with_subquery()) + (void) conds->walk(&Item::cleanup_is_expensive_cache_processor, + 0, (void *) 0); + if (having && having->with_subquery()) + (void) having->walk(&Item::cleanup_is_expensive_cache_processor, + 0, (void *) 0); + + List eq_list; + + if (setup_degenerate_jtbm_semi_joins(this, join_list, eq_list)) + DBUG_RETURN(1); + + if (eq_list.elements != 0) + { + Item *new_cond; + + if (eq_list.elements == 1) + new_cond= eq_list.pop(); + else + new_cond= new (thd->mem_root) Item_cond_and(thd, eq_list); + + if (new_cond && + ((new_cond->fix_fields(thd, &new_cond) || + !(conds= and_items(thd, conds, new_cond)) || + conds->fix_fields(thd, &conds)))) + DBUG_RETURN(TRUE); + } + eq_list.empty(); + + if (select_lex->cond_pushed_into_where) + { + conds= and_conds(thd, conds, select_lex->cond_pushed_into_where); + if (conds && conds->fix_fields(thd, &conds)) + DBUG_RETURN(1); + } + if (select_lex->cond_pushed_into_having) + { + having= and_conds(thd, having, select_lex->cond_pushed_into_having); + if (having) + { + select_lex->having_fix_field= 1; + select_lex->having_fix_field_for_pushed_cond= 1; + if (having->fix_fields(thd, &having)) + DBUG_RETURN(1); + select_lex->having_fix_field= 0; + select_lex->having_fix_field_for_pushed_cond= 0; + } + } + + bool ignore_on_expr= false; + /* + PS/SP note: on_expr of versioned table can not be reallocated + (see build_equal_items() below) because it can be not rebuilt + at second invocation. + */ + if (!thd->stmt_arena->is_conventional() && thd->mem_root != thd->stmt_arena->mem_root) + for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local) + if (tbl->table && tbl->on_expr && tbl->table->versioned()) + { + ignore_on_expr= true; + break; + } + + transform_in_predicates_into_equalities(thd); + + conds= optimize_cond(this, conds, join_list, ignore_on_expr, + &cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS); + + if (thd->is_error()) + { + error= 1; + DBUG_PRINT("error",("Error from optimize_cond")); + DBUG_RETURN(1); + } + if (select_lex->with_rownum && ! order && ! group_list && + !select_distinct && conds && select_lex == unit->global_parameters()) + optimize_rownum(thd, unit, conds); + + having= optimize_cond(this, having, join_list, TRUE, + &having_value, &having_equal); + + if (thd->is_error()) + { + error= 1; + DBUG_PRINT("error",("Error from optimize_cond")); + DBUG_RETURN(1); + } + + /* Do not push into WHERE from HAVING if cond_value == Item::COND_FALSE */ + + if (thd->lex->sql_command == SQLCOM_SELECT && + optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING) && + cond_value != Item::COND_FALSE) + { + having= + select_lex->pushdown_from_having_into_where(thd, having); + if (select_lex->attach_to_conds.elements != 0) + { + conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal, + select_lex->attach_to_conds, + &cond_value); + sel->attach_to_conds.empty(); + } + } + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY)) + { + TABLE_LIST *tbl; + List_iterator_fast li(select_lex->leaf_tables); + while ((tbl= li++)) + if (tbl->jtbm_subselect) + { + if (tbl->jtbm_subselect->pushdown_cond_for_in_subquery(thd, conds)) + DBUG_RETURN(1); + } + } + + if (setup_jtbm_semi_joins(this, join_list, eq_list)) + DBUG_RETURN(1); + + if (eq_list.elements != 0) + { + conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal, + eq_list, &cond_value); + + if (!conds && + cond_value != Item::COND_FALSE && cond_value != Item::COND_TRUE) + DBUG_RETURN(TRUE); + } + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED)) + { + TABLE_LIST *tbl; + List_iterator_fast li(select_lex->leaf_tables); + while ((tbl= li++)) + { + /* + Do not push conditions from where into materialized inner tables + of outer joins: this is not valid. + */ + if (tbl->is_materialized_derived()) + { + JOIN *join= tbl->get_unit()->first_select()->join; + if (join && + join->optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE && + join->with_two_phase_optimization) + continue; + /* + Do not push conditions from where into materialized inner tables + of outer joins: this is not valid. + */ + if (!tbl->is_inner_table_of_outer_join()) + { + if (pushdown_cond_for_derived(thd, conds, tbl)) + DBUG_RETURN(1); + } + if (mysql_handle_single_derived(thd->lex, tbl, DT_OPTIMIZE)) + DBUG_RETURN(1); + } + } + } + else + { + /* Run optimize phase for all derived tables/views used in this SELECT. */ + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) + DBUG_RETURN(1); + } + { + if (select_lex->where) + { + select_lex->cond_value= cond_value; + if (sel->where != conds && cond_value == Item::COND_OK) + thd->change_item_tree(&sel->where, conds); + } + if (select_lex->having) + { + select_lex->having_value= having_value; + if (sel->having != having && having_value == Item::COND_OK) + thd->change_item_tree(&sel->having, having); + } + if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE || + (!unit->lim.get_select_limit() && + !(select_options & OPTION_FOUND_ROWS))) + { /* Impossible cond */ + if (unit->lim.get_select_limit()) + { + DBUG_PRINT("info", (having_value == Item::COND_FALSE ? + "Impossible HAVING" : "Impossible WHERE")); + zero_result_cause= having_value == Item::COND_FALSE ? + "Impossible HAVING" : "Impossible WHERE"; + } + else + { + DBUG_PRINT("info", ("Zero limit")); + zero_result_cause= "Zero limit"; + } + table_count= top_join_tab_count= 0; + handle_implicit_grouping_with_window_funcs(); + error= 0; + subq_exit_fl= true; + goto setup_subq_exit; + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + TABLE_LIST *tbl; + List_iterator_fast li(select_lex->leaf_tables); + while ((tbl= li++)) + { + Item **prune_cond= get_sargable_cond(this, tbl->table); + tbl->table->all_partitions_pruned_away= + prune_partitions(thd, tbl->table, *prune_cond); + } + } +#endif + + /* + Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if + there is implicit grouping (aggregate functions but no + group_list). In this case, the result set shall only contain one + row. + */ + if (tables_list && implicit_grouping) + { + int res; + /* + opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match + to the WHERE conditions, + or 1 if all items were resolved (optimized away), + or 0, or an error number HA_ERR_... + + If all items were resolved by opt_sum_query, there is no need to + open any tables. + */ + + /* + The following resetting and restoring of sum_funcs is needed to + go around a bug in spider where it assumes that + make_sum_func_list() has not been called yet and do logical + choices based on this if special handling of min/max functions should + be done. We disable this special handling while we are trying to find + out if we can replace MIN/MAX values with constants. + */ + Item_sum **save_func_sums= sum_funcs, *tmp_sum_funcs= 0; + sum_funcs= &tmp_sum_funcs; + res= opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds); + sum_funcs= save_func_sums; + + if (res) + { + DBUG_ASSERT(res >= 0); + if (res == HA_ERR_KEY_NOT_FOUND) + { + DBUG_PRINT("info",("No matching min/max row")); + zero_result_cause= "No matching min/max row"; + table_count= top_join_tab_count= 0; + error=0; + subq_exit_fl= true; + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + if (res > 1) + { + error= res; + DBUG_PRINT("error",("Error from opt_sum_query")); + DBUG_RETURN(1); + } + + DBUG_PRINT("info",("Select tables optimized away")); + if (!select_lex->have_window_funcs()) + zero_result_cause= "Select tables optimized away"; + tables_list= 0; // All tables resolved + select_lex->min_max_opt_list.empty(); + const_tables= top_join_tab_count= table_count; + handle_implicit_grouping_with_window_funcs(); + /* + Extract all table-independent conditions and replace the WHERE + clause with them. All other conditions were computed by opt_sum_query + and the MIN/MAX/COUNT function(s) have been replaced by constants, + so there is no need to compute the whole WHERE clause again. + Notice that make_cond_for_table() will always succeed to remove all + computed conditions, because opt_sum_query() is applicable only to + conjunctions. + Preserve conditions for EXPLAIN. + */ + if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED)) + { + COND *table_independent_conds= + make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1, + FALSE, FALSE); + if (!table_independent_conds && thd->is_error()) + DBUG_RETURN(1); + DBUG_EXECUTE("where", + print_where(table_independent_conds, + "where after opt_sum_query()", + QT_ORDINARY);); + conds= table_independent_conds; + } + } + } + if (!tables_list) + { + DBUG_PRINT("info",("No tables")); + error= 0; + subq_exit_fl= true; + goto setup_subq_exit; + } + error= -1; // Error is sent to client + /* get_sort_by_table() call used to be here: */ + MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table)); + + /* + We have to remove constants and duplicates from group_list before + calling make_join_statistics() as this may call get_best_group_min_max() + which needs a simplified group_list. + */ + if (group_list && table_count == 1) + { + group_list= remove_const(this, group_list, conds, + rollup.state == ROLLUP::STATE_NONE, + &simple_group); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + if (!group_list) + { + /* The output has only one row */ + order=0; + simple_order=1; + group_optimized_away= 1; + select_distinct=0; + } + } + + /* Calculate how to do the join */ + THD_STAGE_INFO(thd, stage_statistics); + result->prepare_to_read_rows(); + if (unlikely(make_join_statistics(this, select_lex->leaf_tables, + &keyuse)) || + unlikely(thd->is_error())) + { + DBUG_PRINT("error",("Error: make_join_statistics() failed")); + DBUG_RETURN(1); + } + + /* + If a splittable materialized derived/view dt_i is embedded into + into another splittable materialized derived/view dt_o then + splitting plans for dt_i and dt_o are evaluated independently. + First the optimizer looks for the best splitting plan sp_i for dt_i. + It happens when non-splitting plans for dt_o are evaluated. + The cost of sp_i is considered as the cost of materialization of dt_i + when evaluating any splitting plan for dt_o. + */ + if (fix_all_splittings_in_plan()) + DBUG_RETURN(1); + +setup_subq_exit: + with_two_phase_optimization= check_two_phase_optimization(thd); + if (with_two_phase_optimization) + optimization_state= JOIN::OPTIMIZATION_PHASE_1_DONE; + else + { + if (optimize_stage2()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +int JOIN::optimize_stage2() +{ + ulonglong select_opts_for_readinfo; + uint no_jbuf_after; + JOIN_TAB *tab; + DBUG_ENTER("JOIN::optimize_stage2"); + + if (subq_exit_fl) + goto setup_subq_exit; + + if (unlikely(thd->check_killed())) + DBUG_RETURN(1); + + /* Generate an execution plan from the found optimal join order. */ + if (get_best_combination()) + DBUG_RETURN(1); + + if (make_range_rowid_filters()) + DBUG_RETURN(1); + + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) + DBUG_RETURN(1); + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS)) + drop_unused_derived_keys(); + + if (rollup.state != ROLLUP::STATE_NONE) + { + if (rollup_process_const_fields()) + { + DBUG_PRINT("error", ("Error: rollup_process_fields() failed")); + DBUG_RETURN(1); + } + } + else + { + /* Remove distinct if only const tables */ + select_distinct= select_distinct && (const_tables != table_count); + } + + THD_STAGE_INFO(thd, stage_preparing); + if (result->initialize_tables(this)) + { + DBUG_PRINT("error",("Error: initialize_tables() failed")); + DBUG_RETURN(1); // error == -1 + } + if (const_table_map != found_const_table_map && + !(select_options & SELECT_DESCRIBE)) + { + // There is at least one empty const table + zero_result_cause= "no matching row in const table"; + DBUG_PRINT("error",("Error: %s", zero_result_cause)); + error= 0; + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) && + best_read > (double) thd->variables.max_join_size && + !(select_options & SELECT_DESCRIBE)) + { /* purecov: inspected */ + my_message(ER_TOO_BIG_SELECT, ER_THD(thd, ER_TOO_BIG_SELECT), MYF(0)); + error= -1; + DBUG_RETURN(1); + } + if (const_tables && !thd->locked_tables_mode && + !(select_options & SELECT_NO_UNLOCK)) + { + /* + Unlock all tables, except sequences, as accessing these may still + require table updates. It's safe to ignore result code as all + tables where opened for read only. + */ + (void) mysql_unlock_some_tables(thd, table, const_tables, + GET_LOCK_SKIP_SEQUENCES); + } + if (!conds && outer_join) + { + /* Handle the case where we have an OUTER JOIN without a WHERE */ + conds= (Item*) Item_true; + } + + if (impossible_where) + { + zero_result_cause= + "Impossible WHERE noticed after reading const tables"; + select_lex->mark_const_derived(zero_result_cause); + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + + select= make_select(*table, const_table_map, + const_table_map, conds, (SORT_INFO*) 0, 1, &error); + if (unlikely(error)) + { /* purecov: inspected */ + error= -1; /* purecov: inspected */ + DBUG_PRINT("error",("Error: make_select() failed")); + DBUG_RETURN(1); + } + + reset_nj_counters(this, join_list); + if (make_outerjoin_info(this)) + { + DBUG_RETURN(1); + } + + /* + Among the equal fields belonging to the same multiple equality + choose the one that is to be retrieved first and substitute + all references to these in where condition for a reference for + the selected field. + */ + if (conds) + { + conds= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, conds, + cond_equal, map2table, true); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_PRINT("error",("Error from substitute_for_best_equal")); + DBUG_RETURN(1); + } + conds->update_used_tables(); + + if (unlikely(thd->trace_started())) + trace_condition(thd, "WHERE", "substitute_best_equal", conds); + + DBUG_EXECUTE("where", + print_where(conds, + "after substitute_best_equal", + QT_ORDINARY);); + } + if (having) + { + having= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, having, + having_equal, map2table, false); + if (thd->is_error()) + { + error= 1; + DBUG_PRINT("error",("Error from substitute_for_best_equal")); + DBUG_RETURN(1); + } + if (having) + { + having->update_used_tables(); + if (unlikely(thd->trace_started())) + trace_condition(thd, "HAVING", "substitute_best_equal", having); + } + + DBUG_EXECUTE("having", + print_where(having, + "after substitute_best_equal", + QT_ORDINARY);); + } + + /* + Perform the optimization on fields evaluation mentioned above + for all on expressions. + */ + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (*tab->on_expr_ref) + { + *tab->on_expr_ref= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, + *tab->on_expr_ref, + tab->cond_equal, + map2table, true); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_PRINT("error",("Error from substitute_for_best_equal")); + DBUG_RETURN(1); + } + (*tab->on_expr_ref)->update_used_tables(); + if (unlikely(thd->trace_started())) + { + trace_condition(thd, "ON expr", "substitute_best_equal", + (*tab->on_expr_ref), tab->table->alias.c_ptr()); + } + } + } + + /* + Perform the optimization on fields evaliation mentioned above + for all used ref items. + */ + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + uint key_copy_index=0; + for (uint i=0; i < tab->ref.key_parts; i++) + { + Item **ref_item_ptr= tab->ref.items+i; + Item *ref_item= *ref_item_ptr; + if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE)) + continue; + COND_EQUAL *equals= cond_equal; + JOIN_TAB *first_inner= tab->first_inner; + while (equals) + { + ref_item= substitute_for_best_equal_field(thd, tab, ref_item, + equals, map2table, true); + if (unlikely(thd->is_error())) + DBUG_RETURN(1); + + if (first_inner) + { + equals= first_inner->cond_equal; + first_inner= first_inner->first_upper; + } + else + equals= 0; + } + ref_item->update_used_tables(); + if (*ref_item_ptr != ref_item) + { + *ref_item_ptr= ref_item; + Item *item= ref_item->real_item(); + store_key *key_copy= tab->ref.key_copy[key_copy_index]; + if (key_copy->type() == store_key::FIELD_STORE_KEY) + { + if (item->basic_const_item()) + { + /* It is constant propagated here */ + tab->ref.key_copy[key_copy_index]= + new store_key_const_item(*tab->ref.key_copy[key_copy_index], + item); + } + else if (item->const_item()) + { + tab->ref.key_copy[key_copy_index]= + new store_key_item(*tab->ref.key_copy[key_copy_index], + item, TRUE); + } + else + { + store_key_field *field_copy= ((store_key_field *)key_copy); + DBUG_ASSERT(item->type() == Item::FIELD_ITEM); + field_copy->change_source_field((Item_field *) item); + } + } + } + key_copy_index++; + } + } + + if (conds && const_table_map != found_const_table_map && + (select_options & SELECT_DESCRIBE)) + conds= (Item*) Item_false; + + /* Cache constant expressions in WHERE, HAVING, ON clauses. */ + cache_const_exprs(); + + if (setup_semijoin_loosescan(this)) + DBUG_RETURN(1); + + if (make_join_select(this, select, conds)) + { + if (thd->is_error()) + DBUG_RETURN(1); + zero_result_cause= + "Impossible WHERE noticed after reading const tables"; + select_lex->mark_const_derived(zero_result_cause); + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + + error= -1; /* if goto err */ + + /* Optimize distinct away if possible */ + { + ORDER *org_order= order; + order=remove_const(this, order,conds,1, &simple_order); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + + /* + If we are using ORDER BY NULL or ORDER BY const_expression, + return result in any order (even if we are using a GROUP BY) + */ + if (!order && org_order) + skip_sort_order= 1; + } + + /* + For FETCH ... WITH TIES save how many items order by had, after we've + removed constant items that have no relevance on the final sorting. + */ + if (unit->lim.is_with_ties()) + { + DBUG_ASSERT(with_ties_order_count == 0); + for (ORDER *it= order; it; it= it->next) + with_ties_order_count+= 1; + } + + + /* + Check if we can optimize away GROUP BY/DISTINCT. + We can do that if there are no aggregate functions, the + fields in DISTINCT clause (if present) and/or columns in GROUP BY + (if present) contain direct references to all key parts of + an unique index (in whatever order) and if the key parts of the + unique index cannot contain NULLs. + Note that the unique keys for DISTINCT and GROUP BY should not + be the same (as long as they are unique). + + The FROM clause must contain a single non-constant table. + */ + if (table_count - const_tables == 1 && (group || select_distinct) && + !tmp_table_param.sum_func_count && + (!join_tab[const_tables].select || + !join_tab[const_tables].select->quick || + join_tab[const_tables].select->quick->get_type() != + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) && + !select_lex->have_window_funcs()) + { + if (group && rollup.state == ROLLUP::STATE_NONE && + list_contains_unique_index(join_tab[const_tables].table, + find_field_in_order_list, + (void *) group_list)) + { + /* + We have found that grouping can be removed since groups correspond to + only one row anyway, but we still have to guarantee correct result + order. The line below effectively rewrites the query from GROUP BY + to ORDER BY . There are three exceptions: + - if skip_sort_order is set (see above), then we can simply skip + GROUP BY; + - if we are in a subquery, we don't have to maintain order unless there + is a limit clause in the subquery. + - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible' + with the GROUP BY ones, i.e. either one is a prefix of another. + We only check if the ORDER BY is a prefix of GROUP BY. In this case + test_if_subpart() copies the ASC/DESC attributes from the original + ORDER BY fields. + If GROUP BY is a prefix of ORDER BY, then it is safe to leave + 'order' as is. + */ + if (!order || test_if_subpart(group_list, order)) + { + if (skip_sort_order || + (select_lex->master_unit()->item && select_limit == HA_POS_ERROR)) // This is a subquery + order= NULL; + else + order= group_list; + } + /* + If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be + rewritten to IGNORE INDEX FOR ORDER BY(fields). + */ + join_tab->table->keys_in_use_for_order_by= + join_tab->table->keys_in_use_for_group_by; + group_list= 0; + group= 0; + } + if (select_distinct && + list_contains_unique_index(join_tab[const_tables].table, + find_field_in_item_list, + (void *) &fields_list)) + { + select_distinct= 0; + } + } + if (group || tmp_table_param.sum_func_count) + { + if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE + && !select_lex->have_window_funcs()) + select_distinct=0; + } + else if (select_distinct && table_count - const_tables == 1 && + rollup.state == ROLLUP::STATE_NONE && + !select_lex->have_window_funcs()) + { + /* + We are only using one table. In this case we change DISTINCT to a + GROUP BY query if: + - The GROUP BY can be done through indexes (no sort) and the ORDER + BY only uses selected fields. + (In this case we can later optimize away GROUP BY and ORDER BY) + - We are scanning the whole table without LIMIT + This can happen if: + - We are using CALC_FOUND_ROWS + - We are using an ORDER BY that can't be optimized away. + + We don't want to use this optimization when we are using LIMIT + because in this case we can just create a temporary table that + holds LIMIT rows and stop when this table is full. + */ + bool all_order_fields_used; + + tab= &join_tab[const_tables]; + if (order) + { + bool fatal_err; + skip_sort_order= + test_if_skip_sort_order(tab, order, select_limit, + true, // no_changes + &tab->table->keys_in_use_for_order_by, + &fatal_err); + if (fatal_err) + DBUG_RETURN(1); + } + if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array, + order, fields_list, all_fields, + &all_order_fields_used))) + { + bool fatal_err= 0; + const bool skip_group= + skip_sort_order && + test_if_skip_sort_order(tab, group_list, select_limit, + true, // no_changes + &tab->table->keys_in_use_for_group_by, + &fatal_err); + if (fatal_err) + DBUG_RETURN(1); + + count_field_types(select_lex, &tmp_table_param, all_fields, 0); + if ((skip_group && all_order_fields_used) || + select_limit == HA_POS_ERROR || + (order && !skip_sort_order)) + { + /* Change DISTINCT to GROUP BY */ + select_distinct= 0; + no_order= !order; + if (all_order_fields_used) + { + if (order && skip_sort_order) + { + /* + Force MySQL to read the table in sorted order to get result in + ORDER BY order. + */ + tmp_table_param.quick_group=0; + } + order=0; + } + group=1; // For end_write_group + } + else + group_list= 0; + } + else if (thd->is_error()) // End of memory + DBUG_RETURN(1); + } + simple_group= rollup.state == ROLLUP::STATE_NONE; + if (group) + { + /* + Update simple_group and group_list as we now have more information, like + which tables or columns are constant. + */ + group_list= remove_const(this, group_list, conds, + rollup.state == ROLLUP::STATE_NONE, + &simple_group); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + if (!group_list) + { + /* The output has only one row */ + order=0; + simple_order=1; + select_distinct= 0; + group_optimized_away= 1; + } + } + + calc_group_buffer(this, group_list); + send_group_parts= tmp_table_param.group_parts; /* Save org parts */ + if (procedure && procedure->group) + { + group_list= procedure->group= remove_const(this, procedure->group, conds, + 1, &simple_group); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + calc_group_buffer(this, group_list); + } + + /* + We can ignore ORDER BY if it's a prefix of the GROUP BY list + (as MariaDB is by default sorting on GROUP BY) or + if there is no GROUP BY and aggregate functions are used + (as the result will only contain one row). + */ + if (order && (test_if_subpart(group_list, order) || + (!group_list && tmp_table_param.sum_func_count))) + order=0; + + // Can't use sort on head table if using join buffering + if (full_join || hash_join) + { + TABLE *stable= (sort_by_table == (TABLE *) 1 ? + join_tab[const_tables].table : sort_by_table); + /* + FORCE INDEX FOR ORDER BY can be used to prevent join buffering when + sorting on the first table. + */ + if (!stable || (!stable->force_index_order && + !map2table[stable->tablenr]->keep_current_rowid)) + { + if (group_list) + simple_group= 0; + if (order) + simple_order= 0; + } + } + + need_tmp= test_if_need_tmp_table(); + + /* + If window functions are present then we can't have simple_order set to + TRUE as the window function needs a temp table for computation. + ORDER BY is computed after the window function computation is done, so + the sort will be done on the temp table. + */ + if (select_lex->have_window_funcs()) + simple_order= FALSE; + + /* + If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table + whose columns are required to be returned in a sorted order, then + the proper value for no_jbuf_after should be yielded by a call to + the make_join_orderinfo function. + Yet the current implementation of FORCE INDEX hints does not + allow us to do it in a clean manner. + */ + no_jbuf_after= 1 ? table_count : make_join_orderinfo(this); + + // Don't use join buffering when we use MATCH + select_opts_for_readinfo= + (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) | + (select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0); + + if (select_lex->options & OPTION_SCHEMA_TABLE && + optimize_schema_tables_reads(this)) + DBUG_RETURN(1); + + if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after)) + DBUG_RETURN(1); + + /* Perform FULLTEXT search before all regular searches */ + if (!(select_options & SELECT_DESCRIBE)) + if (init_ftfuncs(thd, select_lex, MY_TEST(order))) + DBUG_RETURN(1); + + /* + It's necessary to check const part of HAVING cond as + there is a chance that some cond parts may become + const items after make_join_statistics(for example + when Item is a reference to cost table field from + outer join). + This check is performed only for those conditions + which do not use aggregate functions. In such case + temporary table may not be used and const condition + elements may be lost during further having + condition transformation in JOIN::exec. + */ + if (having && const_table_map && !having->with_sum_func()) + { + having->update_used_tables(); + having= having->remove_eq_conds(thd, &select_lex->having_value, true); + if (select_lex->having_value == Item::COND_FALSE) + { + having= (Item*) Item_false; + zero_result_cause= "Impossible HAVING noticed after reading const tables"; + error= 0; + select_lex->mark_const_derived(zero_result_cause); + goto setup_subq_exit; + } + } + + if (optimize_unflattened_subqueries()) + DBUG_RETURN(1); + + int res; + if ((res= rewrite_to_index_subquery_engine(this)) != -1) + DBUG_RETURN(res); + if (setup_subquery_caches()) + DBUG_RETURN(-1); + + /* + Need to tell handlers that to play it safe, it should fetch all + columns of the primary key of the tables: this is because MySQL may + build row pointers for the rows, and for all columns of the primary key + the read set has not necessarily been set by the server code. + */ + if (need_tmp || select_distinct || group_list || order) + { + for (uint i= 0; i < table_count; i++) + { + if (!(table[i]->map & const_table_map)) + table[i]->prepare_for_position(); + } + } + + DBUG_EXECUTE("info",TEST_join(this);); + + if (!only_const_tables()) + { + JOIN_TAB *tab= &join_tab[const_tables]; + + if (order && !need_tmp) + { + /* + Force using of tmp table if sorting by a SP or UDF function due to + their expensive and probably non-deterministic nature. + */ + for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= *tmp_order->item; + if (item->is_expensive()) + { + /* Force tmp table without sort */ + need_tmp=1; simple_order=simple_group=0; + break; + } + } + } + + /* + Because filesort always does a full table scan or a quick range scan + we must add the removed reference to the select for the table. + We only need to do this when we have a simple_order or simple_group + as in other cases the join is done before the sort. + */ + if ((order || group_list) && + tab->type != JT_ALL && + tab->type != JT_FT && + tab->type != JT_REF_OR_NULL && + ((order && simple_order) || (group_list && simple_group))) + { + if (add_ref_to_table_cond(thd,tab)) { + DBUG_RETURN(1); + } + } + /* + Investigate whether we may use an ordered index as part of either + DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be + used for only the first of any of these terms to be executed. This + is reflected in the order which we check for test_if_skip_sort_order() + below. However we do not check for DISTINCT here, as it would have + been transformed to a GROUP BY at this stage if it is a candidate for + ordered index optimization. + If a decision was made to use an ordered index, the availability + of such an access path is stored in 'ordered_index_usage' for later + use by 'execute' or 'explain' + */ + DBUG_ASSERT(ordered_index_usage == ordered_index_void); + + if (group_list) // GROUP BY honoured first + // (DISTINCT was rewritten to GROUP BY if skippable) + { + /* + When there is SQL_BIG_RESULT do not sort using index for GROUP BY, + and thus force sorting on disk unless a group min-max optimization + is going to be used as it is applied now only for one table queries + with covering indexes. + */ + if (!(select_options & SELECT_BIG_RESULT) || + (tab->select && + tab->select->quick && + tab->select->quick->get_type() == + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) + { + if (simple_group && // GROUP BY is possibly skippable + !select_distinct) // .. if not preceded by a DISTINCT + { + /* + Calculate a possible 'limit' of table rows for 'GROUP BY': + A specified 'LIMIT' is relative to the final resultset. + 'need_tmp' implies that there will be more postprocessing + so the specified 'limit' should not be enforced yet. + */ + bool fatal_err; + const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit; + if (test_if_skip_sort_order(tab, group_list, limit, false, + &tab->table->keys_in_use_for_group_by, + &fatal_err)) + { + ordered_index_usage= ordered_index_group_by; + } + if (fatal_err) + DBUG_RETURN(1); + } + + /* + If we are going to use semi-join LooseScan, it will depend + on the selected index scan to be used. If index is not used + for the GROUP BY, we risk that sorting is put on the LooseScan + table. In order to avoid this, force use of temporary table. + TODO: Explain the quick_group part of the test below. + */ + if ((ordered_index_usage != ordered_index_group_by) && + ((tmp_table_param.quick_group && !procedure) || + (tab->emb_sj_nest && + best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN))) + { + need_tmp=1; + simple_order= simple_group= false; // Force tmp table without sort + } + } + } + else if (order && // ORDER BY wo/ preceding GROUP BY + (simple_order || skip_sort_order)) // which is possibly skippable + { + bool fatal_err; + if (test_if_skip_sort_order(tab, order, select_limit, false, + &tab->table->keys_in_use_for_order_by, + &fatal_err)) + { + ordered_index_usage= ordered_index_order_by; + } + if (fatal_err) + DBUG_RETURN(1); + } + } + + if (having) + having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT); + tmp_having= having; + + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + + /* + The loose index scan access method guarantees that all grouping or + duplicate row elimination (for distinct) is already performed + during data retrieval, and that all MIN/MAX functions are already + computed for each group. Thus all MIN/MAX functions should be + treated as regular functions, and there is no need to perform + grouping in the main execution loop. + Notice that currently loose index scan is applicable only for + single table queries, thus it is sufficient to test only the first + join_tab element of the plan for its access method. + */ + if (join_tab->is_using_loose_index_scan()) + { + tmp_table_param.precomputed_group_by= TRUE; + if (join_tab->is_using_agg_loose_index_scan()) + { + need_distinct= FALSE; + tmp_table_param.precomputed_group_by= FALSE; + } + } + + if (make_aggr_tables_info()) + DBUG_RETURN(1); + + init_join_cache_and_keyread(); + + if (init_range_rowid_filters()) + DBUG_RETURN(1); + + error= 0; + + if (select_options & SELECT_DESCRIBE) + goto derived_exit; + + DBUG_RETURN(0); + +setup_subq_exit: + /* Choose an execution strategy for this JOIN. */ + if (!tables_list || !table_count) + { + choose_tableless_subquery_plan(); + + /* The output has atmost one row */ + if (group_list) + { + group_list= NULL; + group_optimized_away= 1; + rollup.state= ROLLUP::STATE_NONE; + } + order= NULL; + simple_order= TRUE; + select_distinct= FALSE; + + if (select_lex->have_window_funcs()) + { + if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) + DBUG_RETURN(1); +#ifndef DBUG_OFF + dbug_join_tab_array_size= 1; +#endif + need_tmp= 1; + } + if (make_aggr_tables_info()) + DBUG_RETURN(1); + + /* + It could be that we've only done optimization stage 1 for + some of the derived tables, and never did stage 2. + Do it now, otherwise Explain data structure will not be complete. + */ + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) + DBUG_RETURN(1); + } + /* + Even with zero matching rows, subqueries in the HAVING clause may + need to be evaluated if there are aggregate functions in the query. + */ + if (optimize_unflattened_subqueries()) + DBUG_RETURN(1); + error= 0; + +derived_exit: + + select_lex->mark_const_derived(zero_result_cause); + DBUG_RETURN(0); +} + +/** + Add having condition as a where clause condition of the given temp table. + + @param tab Table to which having condition is added. + + @returns false if success, true if error. +*/ + +bool JOIN::add_having_as_table_cond(JOIN_TAB *tab) +{ + tmp_having->update_used_tables(); + table_map used_tables= tab->table->map | OUTER_REF_TABLE_BIT; + + /* If tmp table is not used then consider conditions of const table also */ + if (!need_tmp) + used_tables|= const_table_map; + + DBUG_ENTER("JOIN::add_having_as_table_cond"); + + Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables, + (table_map) 0, 0, false, false); + if (sort_table_cond) + { + if (!tab->select) + { + if (!(tab->select= new SQL_SELECT)) + DBUG_RETURN(true); + tab->select->head= tab->table; + } + if (!tab->select->cond) + tab->select->cond= sort_table_cond; + else + { + if (!(tab->select->cond= + new (thd->mem_root) Item_cond_and(thd, + tab->select->cond, + sort_table_cond))) + DBUG_RETURN(true); + } + if (tab->pre_idx_push_select_cond) + { + if (sort_table_cond->type() == Item::COND_ITEM) + sort_table_cond= sort_table_cond->copy_andor_structure(thd); + if (!(tab->pre_idx_push_select_cond= + new (thd->mem_root) Item_cond_and(thd, + tab->pre_idx_push_select_cond, + sort_table_cond))) + DBUG_RETURN(true); + } + if (tab->select->cond) + tab->select->cond->fix_fields_if_needed(thd, 0); + if (tab->pre_idx_push_select_cond) + tab->pre_idx_push_select_cond->fix_fields_if_needed(thd, 0); + tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond; + tab->set_select_cond(tab->select->cond, __LINE__); + tab->select_cond->top_level_item(); + DBUG_EXECUTE("where",print_where(tab->select->cond, + "select and having", + QT_ORDINARY);); + + having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0, + ~used_tables, 0, false, false); + if (!having && thd->is_error()) + DBUG_RETURN(true); + DBUG_EXECUTE("where", + print_where(having, "having after sort", QT_ORDINARY);); + } + else if (thd->is_error()) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +bool JOIN::add_fields_for_current_rowid(JOIN_TAB *cur, List *table_fields) +{ + /* + this will not walk into semi-join materialization nests but this is ok + because we will never need to save current rowids for those. + */ + for (JOIN_TAB *tab=join_tab; tab < cur; tab++) + { + if (!tab->keep_current_rowid) + continue; + Item *item= new (thd->mem_root) Item_temptable_rowid(tab->table); + item->fix_fields(thd, 0); + table_fields->push_back(item, thd->mem_root); + cur->tmp_table_param->func_count++; + } + return 0; +} + + +/** + Set info for aggregation tables + + @details + This function finalizes execution plan by taking following actions: + .) aggregation temporary tables are created, but not instantiated + (this is done during execution). + JOIN_TABs for aggregation tables are set appropriately + (see JOIN::create_postjoin_aggr_table). + .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for + each required stage of execution. These fields lists are set for + working tables' tabs and for the tab of last table in the join. + .) info for sorting/grouping/dups removal is prepared and saved in + appropriate tabs. Here is an example: + + @returns + false - Ok + true - Error +*/ + +bool JOIN::make_aggr_tables_info() +{ + List *curr_all_fields= &all_fields; + List *curr_fields_list= &fields_list; + JOIN_TAB *curr_tab= join_tab + const_tables; + TABLE *exec_tmp_table= NULL; + bool distinct= false; + const bool has_group_by= this->group; + bool keep_row_order= thd->lex->with_rownum && (group_list || order); + bool is_having_added_as_table_cond= false; + DBUG_ENTER("JOIN::make_aggr_tables_info"); + + + sort_and_group_aggr_tab= NULL; + + if (group_optimized_away) + implicit_grouping= true; + + bool implicit_grouping_with_window_funcs= implicit_grouping && + select_lex->have_window_funcs(); + bool implicit_grouping_without_tables= implicit_grouping && + !tables_list; + + /* + Setup last table to provide fields and all_fields lists to the next + node in the plan. + */ + if (join_tab && top_join_tab_count && tables_list) + { + join_tab[top_join_tab_count - 1].fields= &fields_list; + join_tab[top_join_tab_count - 1].all_fields= &all_fields; + } + + /* + All optimization is done. Check if we can use the storage engines + group by handler to evaluate the group by. + Some storage engines, like spider can also do joins, group by and + distinct in the engine, so we do this for all queries, not only + GROUP BY queries. + */ + if (tables_list && top_join_tab_count && !procedure) + { + /* + At the moment we only support push down for queries where + all tables are in the same storage engine + */ + TABLE_LIST *tbl= tables_list; + handlerton *ht= tbl && tbl->table ? tbl->table->file->partition_ht() : 0; + for (tbl= tbl->next_local; ht && tbl; tbl= tbl->next_local) + { + if (!tbl->table || tbl->table->file->partition_ht() != ht) + ht= 0; + } + + if (ht && ht->create_group_by) + { + /* + Check if the storage engine can intercept the query + + JOIN::optimize_stage2() might convert DISTINCT into GROUP BY and then + optimize away GROUP BY (group_list). In such a case, we need to notify + a storage engine supporting a group by handler of the existence of the + original DISTINCT. Thus, we set select_distinct || group_optimized_away + to Query::distinct. + */ + Query query= {&all_fields, select_distinct || group_optimized_away, + tables_list, conds, + group_list, order ? order : group_list, having, + &select_lex->master_unit()->lim}; + group_by_handler *gbh= ht->create_group_by(thd, &query); + + if (gbh) + { + if (!(pushdown_query= new (thd->mem_root) Pushdown_query(select_lex, + gbh))) + DBUG_RETURN(1); + /* + We must store rows in the tmp table if we need to do an ORDER BY + or DISTINCT and the storage handler can't handle it. + */ + need_tmp= query.order_by || query.group_by || query.distinct; + distinct= query.distinct; + keep_row_order= query.order_by || query.group_by; + + order= query.order_by; + + aggr_tables++; + curr_tab= join_tab + exec_join_tab_cnt(); + bzero((void*)curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + curr_tab->join= this; + + if (!(curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param))) + DBUG_RETURN(1); + curr_tab->tmp_table_param->func_count= all_fields.elements; + TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param, + all_fields, + NULL, distinct, + TRUE, select_options, HA_POS_ERROR, + &empty_clex_str, !need_tmp, + keep_row_order); + if (!table) + DBUG_RETURN(1); + + if (!(curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab))) + DBUG_RETURN(1); + curr_tab->aggr->set_write_func(::end_send); + curr_tab->table= table; + /* + Setup reference fields, used by summary functions and group by fields, + to point to the temporary table. + The actual switching to the temporary tables fields for HAVING + and ORDER BY is done in do_select() by calling + set_items_ref_array(items1). + */ + init_items_ref_array(); + items1= ref_ptr_array_slice(2); + //items1= items0 + all_fields.elements; + if (change_to_use_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(1); + + /* Give storage engine access to temporary table */ + gbh->table= table; + pushdown_query->store_data_in_temp_table= need_tmp; + pushdown_query->having= having; + + /* + Group by and having is calculated by the group_by handler. + Reset the group by and having + */ + DBUG_ASSERT(query.group_by == NULL); + group= 0; group_list= 0; + having= tmp_having= 0; + /* + Select distinct is handled by handler or by creating an unique index + over all fields in the temporary table + */ + select_distinct= 0; + order= query.order_by; + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + + fields= curr_fields_list; + + //todo: new: + curr_tab->ref_array= &items1; + curr_tab->all_fields= &tmp_all_fields1; + curr_tab->fields= &tmp_fields_list1; + + DBUG_RETURN(thd->is_error()); + } + } + } + + + /* + The loose index scan access method guarantees that all grouping or + duplicate row elimination (for distinct) is already performed + during data retrieval, and that all MIN/MAX functions are already + computed for each group. Thus all MIN/MAX functions should be + treated as regular functions, and there is no need to perform + grouping in the main execution loop. + Notice that currently loose index scan is applicable only for + single table queries, thus it is sufficient to test only the first + join_tab element of the plan for its access method. + */ + if (join_tab && top_join_tab_count && tables_list && + join_tab->is_using_loose_index_scan()) + tmp_table_param.precomputed_group_by= + !join_tab->is_using_agg_loose_index_scan(); + + group_list_for_estimates= group_list; + /* Create a tmp table if distinct or if the sort is too complicated */ + if (need_tmp) + { + aggr_tables++; + curr_tab= join_tab + exec_join_tab_cnt(); + DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size); + bzero((void*)curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + if (only_const_tables()) + first_select= sub_select_postjoin_aggr; + + /* + Create temporary table on first execution of this join. + (Will be reused if this is a subquery that is executed several times.) + */ + init_items_ref_array(); + + ORDER *tmp_group= (ORDER *) 0; + if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP)) + tmp_group= group_list; + + tmp_table_param.hidden_field_count= + all_fields.elements - fields_list.elements; + + distinct= select_distinct && !group_list && + !select_lex->have_window_funcs(); + keep_row_order= thd->lex->with_rownum && (group_list || order); + bool save_sum_fields= (group_list && simple_group) || + implicit_grouping_with_window_funcs; + if (create_postjoin_aggr_table(curr_tab, + &all_fields, tmp_group, + save_sum_fields, + distinct, keep_row_order)) + DBUG_RETURN(true); + exec_tmp_table= curr_tab->table; + + if (exec_tmp_table->distinct) + optimize_distinct(); + + /* Change sum_fields reference to calculated fields in tmp_table */ + items1= ref_ptr_array_slice(2); + if ((sort_and_group || curr_tab->table->group || + tmp_table_param.precomputed_group_by) && + !implicit_grouping_without_tables) + { + if (change_to_use_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(true); + } + else + { + if (change_refs_to_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(true); + } + curr_all_fields= &tmp_all_fields1; + curr_fields_list= &tmp_fields_list1; + // Need to set them now for correct group_fields setup, reset at the end. + set_items_ref_array(items1); + curr_tab->ref_array= &items1; + curr_tab->all_fields= &tmp_all_fields1; + curr_tab->fields= &tmp_fields_list1; + set_postjoin_aggr_write_func(curr_tab); + + /* + If having is not handled here, it will be checked before the row is sent + to the client. + */ + if (tmp_having && + (sort_and_group || (exec_tmp_table->distinct && !group_list) || + select_lex->have_window_funcs())) + { + /* + If there is no select distinct and there are no window functions + then move the having to table conds of tmp table. + NOTE : We cannot apply having after distinct or window functions + If columns of having are not part of select distinct, + then distinct may remove rows which can satisfy having. + In the case of window functions we *must* make sure to not + store any rows which don't match HAVING within the temp table, + as rows will end up being used during their computation. + */ + if (!select_distinct && !select_lex->have_window_funcs() && + add_having_as_table_cond(curr_tab)) + DBUG_RETURN(true); + is_having_added_as_table_cond= tmp_having != having; + + /* + Having condition which we are not able to add as tmp table conds are + kept as before. And, this will be applied before storing the rows in + tmp table. + */ + curr_tab->having= having; + having= NULL; // Already done + } + + tmp_table_param.func_count= 0; + tmp_table_param.field_count+= tmp_table_param.func_count; + if (sort_and_group || curr_tab->table->group) + { + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + } + + if (exec_tmp_table->group) + { // Already grouped + if (!order && !no_order && !skip_sort_order) + order= group_list; /* order by group */ + group_list= NULL; + } + + /* + If we have different sort & group then we must sort the data by group + and copy it to another tmp table. + + This code is also used if we are using distinct something + we haven't been able to store in the temporary table yet + like SEC_TO_TIME(SUM(...)). + + 3. Also, this is used when + - the query has Window functions, + - the GROUP BY operation is done with OrderedGroupBy algorithm. + In this case, the first temptable will contain pre-GROUP-BY data. Force + the creation of the second temporary table. Post-GROUP-BY dataset will be + written there, and then Window Function processing code will be able to + process it. + */ + if ((group_list && + (!test_if_subpart(group_list, order) || select_distinct)) || + (select_distinct && tmp_table_param.using_outer_summary_function) || + (group_list && !tmp_table_param.quick_group && // (3) + select_lex->have_window_funcs())) // (3) + { /* Must copy to another table */ + DBUG_PRINT("info",("Creating group table")); + + calc_group_buffer(this, group_list); + count_field_types(select_lex, &tmp_table_param, tmp_all_fields1, + select_distinct && !group_list); + tmp_table_param.hidden_field_count= + tmp_all_fields1.elements - tmp_fields_list1.elements; + + curr_tab++; + aggr_tables++; + DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size); + bzero((void*)curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + + /* group data to new table */ + /* + If the access method is loose index scan then all MIN/MAX + functions are precomputed, and should be treated as regular + functions. See extended comment above. + */ + if (join_tab->is_using_loose_index_scan()) + tmp_table_param.precomputed_group_by= TRUE; + + tmp_table_param.hidden_field_count= + curr_all_fields->elements - curr_fields_list->elements; + ORDER *dummy= NULL; //TODO can use table->group here also + + if (create_postjoin_aggr_table(curr_tab, curr_all_fields, dummy, true, + distinct, keep_row_order)) + DBUG_RETURN(true); + + if (group_list) + { + if (!only_const_tables()) // No need to sort a single row + { + if (add_sorting_to_table(curr_tab - 1, group_list)) + DBUG_RETURN(true); + } + + if (make_group_fields(this, this)) + DBUG_RETURN(true); + } + + // Setup sum funcs only when necessary, otherwise we might break info + // for the first table + if (group_list || tmp_table_param.sum_func_count) + { + if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true)) + DBUG_RETURN(true); + if (prepare_sum_aggregators(thd, sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + DBUG_RETURN(true); + group_list= NULL; + if (setup_sum_funcs(thd, sum_funcs)) + DBUG_RETURN(true); + } + // No sum funcs anymore + DBUG_ASSERT(items2.is_null()); + + items2= ref_ptr_array_slice(3); + if (change_to_use_tmp_fields(thd, items2, + tmp_fields_list2, tmp_all_fields2, + fields_list.elements, tmp_all_fields1)) + DBUG_RETURN(true); + + curr_fields_list= &tmp_fields_list2; + curr_all_fields= &tmp_all_fields2; + set_items_ref_array(items2); + curr_tab->ref_array= &items2; + curr_tab->all_fields= &tmp_all_fields2; + curr_tab->fields= &tmp_fields_list2; + set_postjoin_aggr_write_func(curr_tab); + + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + } + if (curr_tab->table->distinct) + select_distinct= false; /* Each row is unique */ + + if (select_distinct && !group_list) + { + if (having) + { + curr_tab->having= having; + having->update_used_tables(); + } + /* + We only need DISTINCT operation if the join is not degenerate. + If it is, we must not request DISTINCT processing, because + remove_duplicates() assumes there is a preceding computation step (and + in the degenerate join, there's none) + */ + if (top_join_tab_count && tables_list) + curr_tab->distinct= true; + + having= NULL; + select_distinct= false; + } + /* Clean tmp_table_param for the next tmp table. */ + tmp_table_param.field_count= tmp_table_param.sum_func_count= + tmp_table_param.func_count= 0; + + tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + first_record= sort_and_group=0; + + if (!group_optimized_away || implicit_grouping_with_window_funcs) + { + group= false; + } + else + { + /* + If grouping has been optimized away, a temporary table is + normally not needed unless we're explicitly requested to create + one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT). + + In this case (grouping was optimized away), temp_table was + created without a grouping expression and JOIN::exec() will not + perform the necessary grouping (by the use of end_send_group() + or end_write_group()) if JOIN::group is set to false. + */ + // the temporary table was explicitly requested + DBUG_ASSERT(select_options & OPTION_BUFFER_RESULT); + // the temporary table does not have a grouping expression + DBUG_ASSERT(!curr_tab->table->group); + } + calc_group_buffer(this, group_list); + count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false); + } + + if (group || + (implicit_grouping && !implicit_grouping_with_window_funcs) || + tmp_table_param.sum_func_count) + { + if (make_group_fields(this, this)) + DBUG_RETURN(true); + + DBUG_ASSERT(items3.is_null()); + + if (items0.is_null()) + init_items_ref_array(); + items3= ref_ptr_array_slice(4); + setup_copy_fields(thd, &tmp_table_param, + items3, tmp_fields_list3, tmp_all_fields3, + curr_fields_list->elements, *curr_all_fields); + + curr_fields_list= &tmp_fields_list3; + curr_all_fields= &tmp_all_fields3; + set_items_ref_array(items3); + if (join_tab) + { + JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1; + // Set grouped fields on the last table + last_tab->ref_array= &items3; + last_tab->all_fields= &tmp_all_fields3; + last_tab->fields= &tmp_fields_list3; + } + if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true)) + DBUG_RETURN(true); + if (prepare_sum_aggregators(thd, sum_funcs, + !join_tab || + !join_tab-> is_using_agg_loose_index_scan())) + DBUG_RETURN(true); + if (unlikely(setup_sum_funcs(thd, sum_funcs) || thd->is_error())) + DBUG_RETURN(true); + } + if (group_list || order) + { + DBUG_PRINT("info",("Sorting for send_result_set_metadata")); + THD_STAGE_INFO(thd, stage_sorting_result); + /* If we have already done the group, add HAVING to sorted table */ + if (tmp_having && !is_having_added_as_table_cond && + !group_list && !sort_and_group) + { + if (add_having_as_table_cond(curr_tab)) + DBUG_RETURN(true); + } + + if (group) + select_limit= HA_POS_ERROR; + else if (!need_tmp) + { + /* + We can abort sorting after thd->select_limit rows if there are no + filter conditions for any tables after the sorted one. + Filter conditions come in several forms: + 1. as a condition item attached to the join_tab, or + 2. as a keyuse attached to the join_tab (ref access). + */ + for (uint i= const_tables + 1; i < top_join_tab_count; i++) + { + JOIN_TAB *const tab= join_tab + i; + if (tab->select_cond || // 1 + (tab->keyuse && !tab->first_inner)) // 2 + { + /* We have to sort all rows */ + select_limit= HA_POS_ERROR; + break; + } + } + } + /* + Here we add sorting stage for ORDER BY/GROUP BY clause, if the + optimiser chose FILESORT to be faster than INDEX SCAN or there is + no suitable index present. + OPTION_FOUND_ROWS supersedes LIMIT and is taken into account. + */ + DBUG_PRINT("info",("Sorting for order by/group by")); + ORDER *order_arg= group_list ? group_list : order; + if (top_join_tab_count + aggr_tables > const_tables && + ordered_index_usage != + (group_list ? ordered_index_group_by : ordered_index_order_by) && + curr_tab->type != JT_CONST && + curr_tab->type != JT_EQ_REF) // Don't sort 1 row + { + // Sort either first non-const table or the last tmp table + JOIN_TAB *sort_tab= curr_tab; + + if (add_sorting_to_table(sort_tab, order_arg)) + DBUG_RETURN(true); + /* + filesort_limit: Return only this many rows from filesort(). + We can use select_limit_cnt only if we have no group_by and 1 table. + This allows us to use Bounded_queue for queries like: + "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;" + m_select_limit == HA_POS_ERROR (we need a full table scan) + unit->select_limit_cnt == 1 (we only need one row in the result set) + */ + sort_tab->filesort->limit= + (has_group_by || (join_tab + top_join_tab_count > curr_tab + 1)) ? + select_limit : unit->lim.get_select_limit(); + + if (unit->lim.is_with_ties()) + sort_tab->filesort->limit= HA_POS_ERROR; + } + if (!only_const_tables() && + !join_tab[const_tables].filesort && + !(select_options & SELECT_DESCRIBE)) + { + /* + If no IO cache exists for the first table then we are using an + INDEX SCAN and no filesort. Thus we should not remove the sorted + attribute on the INDEX SCAN. + */ + skip_sort_order= true; + } + } + + /* + Window functions computation step should be attached to the last join_tab + that's doing aggregation. + The last join_tab reads the data from the temp. table. It also may do + - sorting + - duplicate value removal + Both of these operations are done after window function computation step. + */ + curr_tab= join_tab + total_join_tab_cnt(); + if (select_lex->window_funcs.elements) + { + if (!(curr_tab->window_funcs_step= new Window_funcs_computation)) + DBUG_RETURN(true); + if (curr_tab->window_funcs_step->setup(thd, &select_lex->window_funcs, + curr_tab)) + DBUG_RETURN(true); + /* Count that we're using window functions. */ + status_var_increment(thd->status_var.feature_window_functions); + } + if (select_lex->custom_agg_func_used()) + status_var_increment(thd->status_var.feature_custom_aggregate_functions); + + /* + Allocate Cached_items of ORDER BY for FETCH FIRST .. WITH TIES. + The order list might have been modified prior to this, but we are + only interested in the initial order by columns, after all const + elements are removed. + */ + if (unit->lim.is_with_ties()) + { + if (alloc_order_fields(this, order, with_ties_order_count)) + DBUG_RETURN(true); + } + + fields= curr_fields_list; + // Reset before execution + set_items_ref_array(items0); + if (join_tab) + join_tab[exec_join_tab_cnt() + aggr_tables - 1].next_select= + setup_end_select_func(this); + group= has_group_by; + + DBUG_RETURN(false); +} + + + +bool +JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List *table_fields, + ORDER *table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_order) +{ + DBUG_ENTER("JOIN::create_postjoin_aggr_table"); + THD_STAGE_INFO(thd, stage_creating_tmp_table); + + /* + Pushing LIMIT to the post-join temporary table creation is not applicable + when there is ORDER BY or GROUP BY or there is no GROUP BY, but + there are aggregate functions, because in all these cases we need + all result rows. + + We also can not push limit if the limit is WITH TIES, as we do not know + how many rows we will actually have. This can happen if ORDER BY was + a constant and removed (during remove_const), thus we have an "unlimited" + WITH TIES. + */ + ha_rows table_rows_limit= ((order == NULL || skip_sort_order) && + !table_group && + !select_lex->with_sum_func && + !unit->lim.is_with_ties()) ? select_limit + : HA_POS_ERROR; + + if (!(tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param))) + DBUG_RETURN(true); + if (tmp_table_keep_current_rowid) + add_fields_for_current_rowid(tab, table_fields); + tab->tmp_table_param->skip_create_table= true; + TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields, + table_group, distinct, + save_sum_fields, select_options, + table_rows_limit, + &empty_clex_str, true, keep_row_order); + if (!table) + DBUG_RETURN(true); + tmp_table_param.using_outer_summary_function= + tab->tmp_table_param->using_outer_summary_function; + tab->join= this; + DBUG_ASSERT(tab > tab->join->join_tab || !top_join_tab_count || + !tables_list); + tab->table= table; + if (tab > join_tab) + (tab - 1)->next_select= sub_select_postjoin_aggr; + + /* if group or order on first table, sort first */ + if ((group_list && simple_group) || + (implicit_grouping && select_lex->have_window_funcs())) + { + DBUG_PRINT("info",("Sorting for group")); + THD_STAGE_INFO(thd, stage_sorting_for_group); + + if (ordered_index_usage != ordered_index_group_by && + !only_const_tables() && + (join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row + !implicit_grouping && + add_sorting_to_table(join_tab + const_tables, group_list)) + goto err; + + if (alloc_group_fields(this, group_list)) + goto err; + if (make_sum_func_list(all_fields, fields_list, true)) + goto err; + if (prepare_sum_aggregators(thd, sum_funcs, + !(tables_list && + join_tab->is_using_agg_loose_index_scan()))) + goto err; + if (setup_sum_funcs(thd, sum_funcs)) + goto err; + group_list= NULL; + } + else + { + if (prepare_sum_aggregators(thd, sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + goto err; + if (setup_sum_funcs(thd, sum_funcs)) + goto err; + + if (!group_list && !table->distinct && order && simple_order && + tab == join_tab + const_tables) + { + DBUG_PRINT("info",("Sorting for order")); + THD_STAGE_INFO(thd, stage_sorting_for_order); + + if (ordered_index_usage != ordered_index_order_by && + !only_const_tables() && + add_sorting_to_table(join_tab + const_tables, order)) + goto err; + order= NULL; + } + } + if (!(tab->aggr= new (thd->mem_root) AGGR_OP(tab))) + goto err; + table->reginfo.join_tab= tab; + DBUG_RETURN(false); + +err: + if (table != NULL) + free_tmp_table(thd, table); + tab->table= NULL; + DBUG_RETURN(true); +} + + +void +JOIN::optimize_distinct() +{ + for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;) + { + if (select_lex->select_list_tables & last_join_tab->table->map || + last_join_tab->use_join_cache) + break; + last_join_tab->shortcut_for_distinct= true; + if (last_join_tab == join_tab) + break; + --last_join_tab; + } + + /* Optimize "select distinct b from t1 order by key_part_1 limit #" */ + if (order && skip_sort_order && !unit->lim.is_with_ties()) + { + /* Should already have been optimized away */ + DBUG_ASSERT(ordered_index_usage == ordered_index_order_by); + if (ordered_index_usage == ordered_index_order_by) + { + order= NULL; + } + } +} + + +/** + @brief Add Filesort object to the given table to sort if with filesort + + @param tab the JOIN_TAB object to attach created Filesort object to + @param order List of expressions to sort the table by + + @note This function moves tab->select, if any, to filesort->select + + @return false on success, true on OOM +*/ + +bool +JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) +{ + tab->filesort= + new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid, + tab->select); + if (!tab->filesort) + return true; + + TABLE *table= tab->table; + if ((tab == join_tab + const_tables) && + table->pos_in_table_list && + table->pos_in_table_list->is_sjm_scan_table()) + { + tab->filesort->set_all_read_bits= TRUE; + tab->filesort->unpack= unpack_to_base_table_fields; + } + + /* + Select was moved to filesort->select to force join_init_read_record to use + sorted result instead of reading table through select. + */ + if (tab->select) + { + tab->select= NULL; + tab->set_select_cond(NULL, __LINE__); + } + tab->read_first_record= join_init_read_record; + return false; +} + + + + +/** + Setup expression caches for subqueries that need them + + @details + The function wraps correlated subquery expressions that return one value + into objects of the class Item_cache_wrapper setting up an expression + cache for each of them. The result values of the subqueries are to be + cached together with the corresponding sets of the parameters - outer + references of the subqueries. + + @retval FALSE OK + @retval TRUE Error +*/ + +bool JOIN::setup_subquery_caches() +{ + DBUG_ENTER("JOIN::setup_subquery_caches"); + + /* + We have to check all this condition together because items created in + one of this clauses can be moved to another one by optimizer + */ + if (select_lex->expr_cache_may_be_used[IN_WHERE] || + select_lex->expr_cache_may_be_used[IN_HAVING] || + select_lex->expr_cache_may_be_used[IN_ON] || + select_lex->expr_cache_may_be_used[NO_MATTER]) + { + JOIN_TAB *tab; + if (conds && + !(conds= conds->transform(thd, &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (tab->select_cond && + !(tab->select_cond= + tab->select_cond->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + if (tab->cache_select && tab->cache_select->cond) + if (!(tab->cache_select->cond= + tab->cache_select-> + cond->transform(thd, &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + + if (having && + !(having= having->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + + if (tmp_having) + { + DBUG_ASSERT(having == NULL); + if (!(tmp_having= + tmp_having->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + } + if (select_lex->expr_cache_may_be_used[SELECT_LIST] || + select_lex->expr_cache_may_be_used[IN_GROUP_BY] || + select_lex->expr_cache_may_be_used[NO_MATTER]) + { + List_iterator li(all_fields); + Item *item; + while ((item= li++)) + { + Item *new_item; + if (!(new_item= + item->transform(thd, &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + if (new_item != item) + { + thd->change_item_tree(li.ref(), new_item); + } + } + for (ORDER *tmp_group= group_list; tmp_group ; tmp_group= tmp_group->next) + { + if (!(*tmp_group->item= + (*tmp_group->item)->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + } + if (select_lex->expr_cache_may_be_used[NO_MATTER]) + { + for (ORDER *ord= order; ord; ord= ord->next) + { + if (!(*ord->item= + (*ord->item)->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/* + Shrink join buffers used for preceding tables to reduce the occupied space + + SYNOPSIS + shrink_join_buffers() + jt table up to which the buffers are to be shrunk + curr_space the size of the space used by the buffers for tables 1..jt + needed_space the size of the space that has to be used by these buffers + + DESCRIPTION + The function makes an attempt to shrink all join buffers used for the + tables starting from the first up to jt to reduce the total size of the + space occupied by the buffers used for tables 1,...,jt from curr_space + to needed_space. + The function assumes that the buffer for the table jt has not been + allocated yet. + + RETURN + FALSE if all buffer have been successfully shrunk + TRUE otherwise +*/ + +bool JOIN::shrink_join_buffers(JOIN_TAB *jt, + ulonglong curr_space, + ulonglong needed_space) +{ + JOIN_TAB *tab; + JOIN_CACHE *cache; + for (tab= first_linear_tab(this, WITHOUT_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab != jt; + tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS)) + { + cache= tab->cache; + if (cache) + { + size_t buff_size; + if (needed_space < cache->get_min_join_buffer_size()) + return TRUE; + if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space)) + { + revise_cache_usage(tab); + return TRUE; + } + buff_size= cache->get_join_buffer_size(); + curr_space-= buff_size; + needed_space-= buff_size; + } + } + + cache= jt->cache; + DBUG_ASSERT(cache); + if (needed_space < cache->get_min_join_buffer_size()) + return TRUE; + cache->set_join_buffer_size((size_t)needed_space); + + return FALSE; +} + + +int +JOIN::reinit() +{ + DBUG_ENTER("JOIN::reinit"); + + first_record= false; + group_sent= false; + cleaned= false; + accepted_rows= 0; + + if (aggr_tables) + { + JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); + JOIN_TAB *end_tab= curr_tab + aggr_tables; + for ( ; curr_tab < end_tab; curr_tab++) + { + TABLE *tmp_table= curr_tab->table; + if (!tmp_table->is_created()) + continue; + tmp_table->file->extra(HA_EXTRA_RESET_STATE); + tmp_table->file->ha_delete_all_rows(); + } + } + clear_sj_tmp_tables(this); + if (current_ref_ptrs != items0) + { + set_items_ref_array(items0); + set_group_rpa= false; + } + + /* need to reset ref access state (see join_read_key) */ + if (join_tab) + { + JOIN_TAB *tab; + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + tab->ref.key_err= TRUE; + } + } + + clear_sum_funcs(); + + if (no_rows_in_result_called) + { + /* Reset effect of possible no_rows_in_result() */ + List_iterator_fast it(fields_list); + Item *item; + no_rows_in_result_called= 0; + while ((item= it++)) + item->restore_to_before_no_rows_in_result(); + } + + if (!(select_options & SELECT_DESCRIBE)) + if (init_ftfuncs(thd, select_lex, MY_TEST(order))) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/** + Prepare join result. + + @details Prepare join result prior to join execution or describing. + Instantiate derived tables and get schema tables result if necessary. + + @return + TRUE An error during derived or schema tables instantiation. + FALSE Ok +*/ + +bool JOIN::prepare_result(List **columns_list) +{ + DBUG_ENTER("JOIN::prepare_result"); + + error= 0; + /* Create result tables for materialized views. */ + if (!zero_result_cause && + select_lex->handle_derived(thd->lex, DT_CREATE)) + goto err; + + if (result->prepare2(this)) + goto err; + + if ((select_lex->options & OPTION_SCHEMA_TABLE) && + get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC)) + goto err; + + DBUG_RETURN(FALSE); + +err: + error= 1; + DBUG_RETURN(TRUE); +} + + +/** + @retval + 0 ok + 1 error +*/ + + +bool JOIN::save_explain_data(Explain_query *output, bool can_overwrite, + bool need_tmp_table, bool need_order, + bool distinct) +{ + DBUG_ENTER("JOIN::save_explain_data"); + DBUG_PRINT("enter", ("Save explain Select_lex: %u (%p) parent lex: %p stmt_lex: %p present select: %u (%p)", + select_lex->select_number, select_lex, + select_lex->parent_lex, thd->lex->stmt_lex, + (output->get_select(select_lex->select_number) ? + select_lex->select_number : 0), + (output->get_select(select_lex->select_number) ? + output->get_select(select_lex->select_number) + ->select_lex : NULL))); + /* + If there is SELECT in this statement with the same number it must be the + same SELECT + */ + DBUG_ASSERT(select_lex->select_number == FAKE_SELECT_LEX_ID || !output || + !output->get_select(select_lex->select_number) || + output->get_select(select_lex->select_number)->select_lex == + select_lex); + + if (select_lex->select_number != FAKE_SELECT_LEX_ID && + have_query_plan != JOIN::QEP_NOT_PRESENT_YET && + have_query_plan != JOIN::QEP_DELETED && // this happens when there was + // no QEP ever, but then + //cleanup() is called multiple times + output && // for "SET" command in SPs. + (can_overwrite? true: !output->get_select(select_lex->select_number))) + { + const char *message= NULL; + if (!table_count || !tables_list || zero_result_cause) + { + /* It's a degenerate join */ + message= zero_result_cause ? zero_result_cause : "No tables used"; + } + bool rc= save_explain_data_intern(thd->lex->explain, need_tmp_table, + need_order, distinct, message); + DBUG_RETURN(rc); + } + + /* + Can have join_tab==NULL for degenerate cases (e.g. SELECT .. UNION ... SELECT LIMIT 0) + */ + if (select_lex == select_lex->master_unit()->fake_select_lex && join_tab) + { + /* + This is fake_select_lex. It has no query plan, but we need to set up a + tracker for ANALYZE + */ + uint nr= select_lex->master_unit()->first_select()->select_number; + Explain_union *eu= output->get_union(nr); + explain= &eu->fake_select_lex_explain; + join_tab[0].tracker= eu->get_fake_select_lex_tracker(); + for (uint i=0 ; i < exec_join_tab_cnt() + aggr_tables; i++) + { + if (join_tab[i].filesort) + { + if (!(join_tab[i].filesort->tracker= + new Filesort_tracker(thd->lex->analyze_stmt))) + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} + + +void JOIN::exec() +{ + DBUG_EXECUTE_IF("show_explain_probe_join_exec_start", + if (dbug_user_var_equals_int(thd, + "show_explain_probe_select_id", + select_lex->select_number)) + dbug_serve_apcs(thd, 1); + ); + ANALYZE_START_TRACKING(thd, &explain->time_tracker); + exec_inner(); + ANALYZE_STOP_TRACKING(thd, &explain->time_tracker); + + DBUG_EXECUTE_IF("show_explain_probe_join_exec_end", + if (dbug_user_var_equals_int(thd, + "show_explain_probe_select_id", + select_lex->select_number)) + dbug_serve_apcs(thd, 1); + ); +} + + +void JOIN::exec_inner() +{ + List *columns_list= &fields_list; + DBUG_ENTER("JOIN::exec_inner"); + DBUG_ASSERT(optimization_state == JOIN::OPTIMIZATION_DONE); + + THD_STAGE_INFO(thd, stage_executing); + + /* + Enable LIMIT ROWS EXAMINED during query execution if: + (1) This JOIN is the outermost query (not a subquery or derived table) + This ensures that the limit is enabled when actual execution begins, + and not if a subquery is evaluated during optimization of the outer + query. + (2) This JOIN is not the result of a UNION. In this case do not apply the + limit in order to produce the partial query result stored in the + UNION temp table. + */ + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_exec(thd, "join_execution"); + trace_exec.add_select_number(select_lex->select_number); + Json_writer_array trace_steps(thd, "steps"); + + if (!select_lex->outer_select() && // (1) + select_lex != select_lex->master_unit()->fake_select_lex) // (2) + thd->lex->set_limit_rows_examined(); + + if (procedure) + { + procedure_fields_list= fields_list; + if (procedure->change_columns(thd, procedure_fields_list) || + result->prepare(procedure_fields_list, unit)) + { + thd->set_examined_row_count(0); + thd->limit_found_rows= 0; + DBUG_VOID_RETURN; + } + columns_list= &procedure_fields_list; + } + if (result->prepare2(this)) + DBUG_VOID_RETURN; + + if (!tables_list && (table_count || !select_lex->with_sum_func) && + !select_lex->have_window_funcs()) + { // Only test of functions + if (select_options & SELECT_DESCRIBE) + select_describe(this, FALSE, FALSE, FALSE, + (zero_result_cause?zero_result_cause:"No tables used")); + else + { + if (result->send_result_set_metadata(*columns_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + { + DBUG_VOID_RETURN; + } + + /* + We have to test for 'conds' here as the WHERE may not be constant + even if we don't have any tables for prepared statements or if + conds uses something like 'rand()'. + If the HAVING clause is either impossible or always true, then + JOIN::having is set to NULL by optimize_cond. + In this case JOIN::exec must check for JOIN::having_value, in the + same way it checks for JOIN::cond_value. + */ + DBUG_ASSERT(error == 0); + if (cond_value != Item::COND_FALSE && + having_value != Item::COND_FALSE && + (!conds || conds->val_int()) && + (!having || having->val_int())) + { + if (do_send_rows && + (procedure ? (procedure->send_row(procedure_fields_list) || + procedure->end_of_records()): + result->send_data_with_check(fields_list, unit, 0)> 0)) + error= 1; + else + send_records= ((select_options & OPTION_FOUND_ROWS) ? 1 : + thd->get_sent_row_count()); + } + else + send_records= 0; + if (likely(!error)) + { + join_free(); // Unlock all cursors + error= (int) result->send_eof(); + } + } + /* Single select (without union) always returns 0 or 1 row */ + thd->limit_found_rows= send_records; + thd->set_examined_row_count(0); + DBUG_VOID_RETURN; + } + + /* + Evaluate expensive constant conditions that were not evaluated during + optimization. Do not evaluate them for EXPLAIN statements as these + condtions may be arbitrarily costly, and because the optimize phase + might not have produced a complete executable plan for EXPLAINs. + */ + if (!zero_result_cause && + exec_const_cond && !(select_options & SELECT_DESCRIBE) && + !exec_const_cond->val_int()) + zero_result_cause= "Impossible WHERE noticed after reading const tables"; + + /* + We've called exec_const_cond->val_int(). This may have caused an error. + */ + if (unlikely(thd->is_error())) + { + error= thd->is_error(); + DBUG_VOID_RETURN; + } + + if (zero_result_cause) + { + if (select_lex->have_window_funcs() && send_row_on_empty_set()) + { + /* + The query produces just one row but it has window functions. + + The only way to compute the value of window function(s) is to + run the entire window function computation step (there is no shortcut). + */ + const_tables= table_count; + first_select= sub_select_postjoin_aggr; + } + else + { + (void) return_zero_rows(this, result, &select_lex->leaf_tables, + columns_list, + send_row_on_empty_set(), + select_options, + zero_result_cause, + having ? having : tmp_having, &all_fields); + DBUG_VOID_RETURN; + } + } + + /* + Evaluate all constant expressions with subqueries in the + ORDER/GROUP clauses to make sure that all subqueries return a + single row. The evaluation itself will trigger an error if that is + not the case. + */ + if (exec_const_order_group_cond.elements && + !(select_options & SELECT_DESCRIBE) && + !select_lex->pushdown_select) + { + List_iterator_fast const_item_it(exec_const_order_group_cond); + Item *cur_const_item; + StringBuffer tmp; + while ((cur_const_item= const_item_it++)) + { + tmp.set_buffer_if_not_allocated(&my_charset_bin); + cur_const_item->val_str(&tmp); + if (unlikely(thd->is_error())) + { + error= thd->is_error(); + DBUG_VOID_RETURN; + } + } + } + + if ((this->select_lex->options & OPTION_SCHEMA_TABLE) && + get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC)) + DBUG_VOID_RETURN; + + if (select_options & SELECT_DESCRIBE) + { + select_describe(this, need_tmp, + order != 0 && !skip_sort_order, + select_distinct, + !table_count ? "No tables used" : NullS); + DBUG_VOID_RETURN; + } + else if (select_lex->pushdown_select) + { + /* Execute the query pushed into a foreign engine */ + error= select_lex->pushdown_select->execute(); + DBUG_VOID_RETURN; + } + else + { + /* it's a const select, materialize it. */ + select_lex->mark_const_derived(zero_result_cause); + } + + /* + Initialize examined rows here because the values from all join parts + must be accumulated in examined_row_count. Hence every join + iteration must count from zero. + */ + join_examined_rows= 0; + + /* XXX: When can we have here thd->is_error() not zero? */ + if (unlikely(thd->is_error())) + { + error= thd->is_error(); + DBUG_VOID_RETURN; + } + + THD_STAGE_INFO(thd, stage_sending_data); + DBUG_PRINT("info", ("%s", thd->proc_info)); + result->send_result_set_metadata( + procedure ? procedure_fields_list : *fields, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); + + error= result->view_structure_only() ? false : do_select(this, procedure); + /* Accumulate the counts from all join iterations of all join parts. */ + thd->inc_examined_row_count(join_examined_rows); + DBUG_PRINT("counts", ("thd->examined_row_count: %lu", + (ulong) thd->get_examined_row_count())); + + DBUG_VOID_RETURN; +} + + +/** + Clean up join. + + @return + Return error that hold JOIN. +*/ + +int +JOIN::destroy() +{ + DBUG_ENTER("JOIN::destroy"); + + DBUG_PRINT("info", ("select %p (%u) <> JOIN %p", + select_lex, select_lex->select_number, this)); + select_lex->join= 0; + + cond_equal= 0; + having_equal= 0; + + cleanup(1); + + if (join_tab) + { + for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS, + WITH_CONST_TABLES); + tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (tab->aggr) + { + free_tmp_table(thd, tab->table); + delete tab->tmp_table_param; + tab->tmp_table_param= NULL; + tab->aggr= NULL; + } + tab->table= NULL; + } + } + + /* Cleanup items referencing temporary table columns */ + cleanup_item_list(tmp_all_fields1); + cleanup_item_list(tmp_all_fields3); + destroy_sj_tmp_tables(this); + delete_dynamic(&keyuse); + if (save_qep) + delete(save_qep); + if (ext_keyuses_for_splitting) + delete(ext_keyuses_for_splitting); + delete procedure; + DBUG_RETURN(error); +} + + +void JOIN::cleanup_item_list(List &items) const +{ + DBUG_ENTER("JOIN::cleanup_item_list"); + if (!items.is_empty()) + { + List_iterator_fast it(items); + Item *item; + while ((item= it++)) + item->cleanup(); + } + DBUG_VOID_RETURN; +} + + +/** + @brief + Look for provision of the select_handler interface by a foreign engine + + @param thd The thread handler + + @details + The function checks that this is an upper level select and if so looks + through its tables searching for one whose handlerton owns a + create_select call-back function. If the call of this function returns + a select_handler interface object then the server will push the select + query into this engine. + This is a responsibility of the create_select call-back function to + check whether the engine can execute the query. + + @retval the found select_handler if the search is successful + 0 otherwise +*/ + +select_handler *find_select_handler(THD *thd, + SELECT_LEX* select_lex) +{ + if (select_lex->next_select()) + return 0; + if (select_lex->master_unit()->outer_select()) + return 0; + + TABLE_LIST *tbl= nullptr; + // For SQLCOM_INSERT_SELECT the server takes TABLE_LIST + // from thd->lex->query_tables and skips its first table + // b/c it is the target table for the INSERT..SELECT. + if (thd->lex->sql_command != SQLCOM_INSERT_SELECT) + { + tbl= select_lex->join->tables_list; + } + else if (thd->lex->query_tables && + thd->lex->query_tables->next_global) + { + tbl= thd->lex->query_tables->next_global; + } + else + return 0; + + for (;tbl; tbl= tbl->next_global) + { + if (!tbl->table) + continue; + handlerton *ht= tbl->table->file->partition_ht(); + if (!ht->create_select) + continue; + select_handler *sh= ht->create_select(thd, select_lex); + return sh; + } + return 0; +} + + +/** + An entry point to single-unit select (a select without UNION). + + @param thd thread handler + @param rref_pointer_array a reference to ref_pointer_array of + the top-level select_lex for this query + @param tables list of all tables used in this query. + The tables have been pre-opened. + @param fields list of items in SELECT list of the top-level + select + e.g. SELECT a, b, c FROM t1 will have Item_field + for a, b and c in this list. + @param conds top level item of an expression representing + WHERE clause of the top level select + @param og_num total number of ORDER BY and GROUP BY clauses + arguments + @param order linked list of ORDER BY agruments + @param group linked list of GROUP BY arguments + @param having top level item of HAVING expression + @param proc_param list of PROCEDUREs + @param select_options select options (BIG_RESULT, etc) + @param result an instance of result set handling class. + This object is responsible for send result + set rows to the client or inserting them + into a table. + @param select_lex the only SELECT_LEX of this query + @param unit top-level UNIT of this query + UNIT is an artificial object created by the + parser for every SELECT clause. + e.g. + SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2) + has 2 unions. + + @retval + FALSE success + @retval + TRUE an error +*/ + +bool +mysql_select(THD *thd, TABLE_LIST *tables, List &fields, COND *conds, + uint og_num, ORDER *order, ORDER *group, Item *having, + ORDER *proc_param, ulonglong select_options, select_result *result, + SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex) +{ + int err= 0; + bool free_join= 1; + DBUG_ENTER("mysql_select"); + + if (!fields.is_empty()) + select_lex->context.resolve_in_select_list= true; + JOIN *join; + if (select_lex->join != 0) + { + join= select_lex->join; + /* + is it single SELECT in derived table, called in derived table + creation + */ + if (select_lex->get_linkage() != DERIVED_TABLE_TYPE || + (select_options & SELECT_DESCRIBE)) + { + if (select_lex->get_linkage() != GLOBAL_OPTIONS_TYPE) + { + /* + Original join tabs might be overwritten at first + subselect execution. So we need to restore them. + */ + Item_subselect *subselect= select_lex->master_unit()->item; + if (subselect && subselect->is_uncacheable() && join->reinit()) + DBUG_RETURN(TRUE); + } + else + { + if ((err= join->prepare(tables, conds, og_num, order, false, group, + having, proc_param, select_lex, unit))) + { + goto err; + } + } + } + free_join= 0; + join->select_options= select_options; + } + else + { + if (thd->lex->describe) + select_options|= SELECT_DESCRIBE; + + /* + When in EXPLAIN, delay deleting the joins so that they are still + available when we're producing EXPLAIN EXTENDED warning text. + */ + if (select_options & SELECT_DESCRIBE) + free_join= 0; + + if (!(join= new (thd->mem_root) JOIN(thd, fields, select_options, result))) + DBUG_RETURN(TRUE); + THD_STAGE_INFO(thd, stage_init); + thd->lex->used_tables=0; + if ((err= join->prepare(tables, conds, og_num, order, false, group, having, + proc_param, select_lex, unit))) + { + goto err; + } + } + + thd->get_stmt_da()->reset_current_row_for_warning(1); + /* Look for a table owned by an engine with the select_handler interface */ + select_lex->pushdown_select= find_select_handler(thd, select_lex); + + if ((err= join->optimize())) + { + goto err; // 1 + } + + if (thd->lex->describe & DESCRIBE_EXTENDED) + { + join->conds_history= join->conds; + join->having_history= (join->having?join->having:join->tmp_having); + } + + if (unlikely(thd->is_error())) + goto err; + + join->exec(); + + if (thd->lex->describe & DESCRIBE_EXTENDED) + { + select_lex->where= join->conds_history; + select_lex->having= join->having_history; + } + +err: + + if (select_lex->pushdown_select) + { + delete select_lex->pushdown_select; + select_lex->pushdown_select= NULL; + } + + if (free_join) + { + THD_STAGE_INFO(thd, stage_end); + err|= (int)(select_lex->cleanup()); + DBUG_RETURN(err || thd->is_error()); + } + DBUG_RETURN(join->error ? join->error: err); +} + + +/** + Approximate how many records are going to be returned by this table in this + select with this key. + + @param thd Thread handle + @param select Select to be examined + @param table The table of interest + @param keys The keys of interest + @param limit Maximum number of rows of interest + @param quick_count Pointer to where we want the estimate written + + @return Status + @retval false Success + @retval true Error + +*/ + +static bool get_quick_record_count(THD *thd, SQL_SELECT *select, + TABLE *table, + const key_map *keys,ha_rows limit, + ha_rows *quick_count) +{ + quick_select_return error; + DBUG_ENTER("get_quick_record_count"); + uchar buff[STACK_BUFF_ALLOC]; + if (unlikely(check_stack_overrun(thd, STACK_MIN_SIZE, buff))) + DBUG_RETURN(false); // Fatal error flag is set + if (select) + { + select->head=table; + table->reginfo.impossible_range=0; + error= select->test_quick_select(thd, *(key_map *)keys, (table_map) 0, + limit, 0, FALSE, + TRUE, /* remove_where_parts*/ + FALSE, TRUE); + + if (error == SQL_SELECT::OK && select->quick) + { + *quick_count= select->quick->records; + DBUG_RETURN(false); + } + if (error == SQL_SELECT::IMPOSSIBLE_RANGE) + { + table->reginfo.impossible_range=1; + *quick_count= 0; + DBUG_RETURN(false); + } + if (unlikely(error == SQL_SELECT::ERROR)) + DBUG_RETURN(true); + + DBUG_PRINT("warning",("Couldn't use record count on const keypart")); + } + *quick_count= HA_POS_ERROR; + DBUG_RETURN(false); /* This shouldn't happen */ +} + +/* + This structure is used to collect info on potentially sargable + predicates in order to check whether they become sargable after + reading const tables. + We form a bitmap of indexes that can be used for sargable predicates. + Only such indexes are involved in range analysis. +*/ +struct SARGABLE_PARAM +{ + Field *field; /* field against which to check sargability */ + Item **arg_value; /* values of potential keys for lookups */ + uint num_values; /* number of values in the above array */ +}; + + +/* + Mark all tables inside a join nest as constant. + + @detail This is called when there is a local "Impossible WHERE" inside + a multi-table LEFT JOIN. +*/ + +void mark_join_nest_as_const(JOIN *join, + TABLE_LIST *join_nest, + table_map *found_const_table_map, + uint *const_count) +{ + List_iterator it(join_nest->nested_join->join_list); + TABLE_LIST *tbl; + Json_writer_object emb_obj(join->thd); + Json_writer_object trace_obj(join->thd, "mark_join_nest_as_const"); + Json_writer_array trace_array(join->thd, "members"); + + while ((tbl= it++)) + { + if (tbl->nested_join) + { + mark_join_nest_as_const(join, tbl, found_const_table_map, const_count); + continue; + } + JOIN_TAB *tab= tbl->table->reginfo.join_tab; + + if (!(join->const_table_map & tab->table->map)) + { + tab->type= JT_CONST; + tab->info= ET_IMPOSSIBLE_ON_CONDITION; + tab->table->const_table= 1; + + join->const_table_map|= tab->table->map; + *found_const_table_map|= tab->table->map; + set_position(join,(*const_count)++,tab,(KEYUSE*) 0); + mark_as_null_row(tab->table); // All fields are NULL + + trace_array.add_table_name(tab->table); + } + } +} + + +/* + @brief Get the condition that can be used to do range analysis/partition + pruning/etc + + @detail + Figure out which condition we can use: + - For INNER JOIN, we use the WHERE, + - "t1 LEFT JOIN t2 ON ..." uses t2's ON expression + - "t1 LEFT JOIN (...) ON ..." uses the join nest's ON expression. +*/ + +static Item **get_sargable_cond(JOIN *join, TABLE *table) +{ + Item **retval; + if (table->pos_in_table_list->on_expr) + { + /* + This is an inner table from a single-table LEFT JOIN, "t1 LEFT JOIN + t2 ON cond". Use the condition cond. + */ + retval= &table->pos_in_table_list->on_expr; + } + else if (table->pos_in_table_list->embedding && + !table->pos_in_table_list->embedding->sj_on_expr) + { + /* + This is the inner side of a multi-table outer join. Use the + appropriate ON expression. + */ + retval= &(table->pos_in_table_list->embedding->on_expr); + } + else + { + /* The table is not inner wrt some LEFT JOIN. Use the WHERE clause */ + retval= &join->conds; + } + return retval; +} + + +/** + Calculate the best possible join and initialize the join structure. + + @retval + 0 ok + @retval + 1 Fatal error +*/ + +static bool +make_join_statistics(JOIN *join, List &tables_list, + DYNAMIC_ARRAY *keyuse_array) +{ + int error= 0; + TABLE *UNINIT_VAR(table); /* inited in all loops */ + uint i,table_count,const_count,key; + uint sort_space; + table_map found_const_table_map, all_table_map; + key_map const_ref, eq_part; + bool has_expensive_keyparts; + TABLE **table_vector; + JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector; + KEYUSE *keyuse,*start_keyuse; + table_map outer_join=0; + table_map no_rows_const_tables= 0; + SARGABLE_PARAM *sargables= 0; + List_iterator ti(tables_list); + TABLE_LIST *tables; + THD *thd= join->thd; + DBUG_ENTER("make_join_statistics"); + + table_count=join->table_count; + + /* + best_extension_by_limited_search need sort space for 2POSITIION + objects per remaining table, which gives us + 2*(T + T-1 + T-2 + T-3...1 POSITIONS) = 2*(T+1)/2*T = (T*T+T) + */ + join->sort_space= sort_space= (table_count*table_count + table_count); + + /* + best_positions is ok to allocate with alloc() as we copy things to it with + memcpy() + */ + + if (!multi_alloc_root(join->thd->mem_root, + &stat, sizeof(JOIN_TAB)*(table_count), + &stat_ref, sizeof(JOIN_TAB*)* MAX_TABLES, + &stat_vector, sizeof(JOIN_TAB*)* (table_count +1), + &table_vector, sizeof(TABLE*)*(table_count*2), + &join->positions, sizeof(POSITION)*(table_count + 1), + &join->sort_positions, sizeof(POSITION)*(sort_space), + &join->best_positions, + sizeof(POSITION)*(table_count + 1), + NullS)) + DBUG_RETURN(1); + + /* The following should be optimized to only clear critical things */ + bzero((void*)stat, sizeof(JOIN_TAB)* table_count); + join->top_join_tab_count= table_count; + + /* Initialize POSITION objects */ + for (i=0 ; i <= table_count ; i++) + (void) new ((char*) (join->positions + i)) POSITION; + for (i=0 ; i < sort_space ; i++) + (void) new ((char*) (join->sort_positions + i)) POSITION; + + join->best_ref= stat_vector; + + stat_end=stat+table_count; + found_const_table_map= all_table_map=0; + const_count=0; + + for (s= stat, i= 0; (tables= ti++); s++, i++) + { + TABLE_LIST *embedding= tables->embedding; + stat_vector[i]=s; + table_vector[i]=s->table=table=tables->table; + s->tab_list= tables; + table->pos_in_table_list= tables; + error= tables->fetch_number_of_rows(); + set_statistics_for_table(join->thd, table); + bitmap_clear_all(&table->cond_set); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + const bool all_partitions_pruned_away= table->all_partitions_pruned_away; +#else + const bool all_partitions_pruned_away= FALSE; +#endif + + DBUG_EXECUTE_IF("bug11747970_raise_error", + { join->thd->set_killed(KILL_QUERY_HARD); }); + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + goto error; + } + table->opt_range_keys.clear_all(); + table->intersect_keys.clear_all(); + table->reginfo.join_tab=s; + table->reginfo.not_exists_optimize=0; + bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); + all_table_map|= table->map; + s->preread_init_done= FALSE; + s->join=join; + + s->dependent= tables->dep_tables; + if (tables->schema_table) + table->file->stats.records= table->used_stat_records= 2; + table->opt_range_condition_rows= table->stat_records(); + + s->on_expr_ref= &tables->on_expr; + if (*s->on_expr_ref) + { + /* s is the only inner table of an outer join */ + if (!table->is_filled_at_execution() && + ((!table->file->stats.records && + (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) || + all_partitions_pruned_away) && !embedding) + { // Empty table + s->dependent= 0; // Ignore LEFT JOIN depend. + no_rows_const_tables |= table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + continue; + } + outer_join|= table->map; + s->embedding_map= 0; + for (;embedding; embedding= embedding->embedding) + s->embedding_map|= embedding->nested_join->nj_map; + continue; + } + if (embedding) + { + /* s belongs to a nested join, maybe to several embedded joins */ + s->embedding_map= 0; + bool inside_an_outer_join= FALSE; + do + { + /* + If this is a semi-join nest, skip it, and proceed upwards. Maybe + we're in some outer join nest + */ + if (embedding->sj_on_expr) + { + embedding= embedding->embedding; + continue; + } + inside_an_outer_join= TRUE; + NESTED_JOIN *nested_join= embedding->nested_join; + s->embedding_map|=nested_join->nj_map; + s->dependent|= embedding->dep_tables; + embedding= embedding->embedding; + outer_join|= nested_join->used_tables; + } + while (embedding); + if (inside_an_outer_join) + continue; + } + if (!table->is_filled_at_execution() && + (table->s->system || + (table->file->stats.records <= 1 && + (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) || + all_partitions_pruned_away) && + !s->dependent && + !table->fulltext_searched && !join->no_const_tables) + { + set_position(join,const_count++,s,(KEYUSE*) 0); + no_rows_const_tables |= table->map; + } + + /* SJ-Materialization handling: */ + if (table->pos_in_table_list->jtbm_subselect && + table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab) + { + set_position(join,const_count++,s,(KEYUSE*) 0); + no_rows_const_tables |= table->map; + } + } + + stat_vector[i]=0; + join->outer_join=outer_join; + + if (join->outer_join) + { + /* + Build transitive closure for relation 'to be dependent on'. + This will speed up the plan search for many cases with outer joins, + as well as allow us to catch illegal cross references/ + Warshall's algorithm is used to build the transitive closure. + As we use bitmaps to represent the relation the complexity + of the algorithm is O((number of tables)^2). + + The classic form of the Warshall's algorithm would look like: + for (i= 0; i < table_count; i++) + { + for (j= 0; j < table_count; j++) + { + for (k= 0; k < table_count; k++) + { + if (bitmap_is_set(stat[j].dependent, i) && + bitmap_is_set(stat[i].dependent, k)) + bitmap_set_bit(stat[j].dependent, k); + } + } + } + */ + + for (s= stat ; s < stat_end ; s++) + { + table= s->table; + for (JOIN_TAB *t= stat ; t < stat_end ; t++) + { + if (t->dependent & table->map) + t->dependent |= table->reginfo.join_tab->dependent; + } + if (outer_join & s->table->map) + s->table->maybe_null= 1; + } + /* Catch illegal cross references for outer joins */ + for (i= 0, s= stat ; i < table_count ; i++, s++) + { + if (s->dependent & s->table->map) + { + join->table_count=0; // Don't use join->table + my_message(ER_WRONG_OUTER_JOIN, + ER_THD(join->thd, ER_WRONG_OUTER_JOIN), MYF(0)); + goto error; + } + s->key_dependent= s->dependent; + } + } + + { + for (JOIN_TAB *s= stat ; s < stat_end ; s++) + { + TABLE_LIST *tl= s->table->pos_in_table_list; + if (tl->embedding && tl->embedding->sj_subq_pred) + { + s->embedded_dependent= tl->embedding->original_subq_pred_used_tables; + } + } + } + + if (thd->trace_started()) + trace_table_dependencies(thd, stat, join->table_count); + + if (join->conds || outer_join) + { + if (update_ref_and_keys(thd, keyuse_array, stat, join->table_count, + join->conds, ~outer_join, join->select_lex, &sargables)) + goto error; + /* + Keyparts without prefixes may be useful if this JOIN is a subquery, and + if the subquery may be executed via the IN-EXISTS strategy. + */ + bool skip_unprefixed_keyparts= + !(join->is_in_subquery() && + join->unit->item->get_IN_subquery()->test_strategy(SUBS_IN_TO_EXISTS)); + + if (keyuse_array->elements && + sort_and_filter_keyuse(join, keyuse_array, + skip_unprefixed_keyparts)) + goto error; + DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array);); + if (thd->trace_started()) + print_keyuse_array_for_trace(thd, keyuse_array); + } + + join->const_table_map= no_rows_const_tables; + join->const_tables= const_count; + eliminate_tables(join); + join->const_table_map &= ~no_rows_const_tables; + const_count= join->const_tables; + found_const_table_map= join->const_table_map; + + /* Read tables with 0 or 1 rows (system tables) */ + for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count; + p_pos < p_end ; + p_pos++) + { + s= p_pos->table; + if (! (s->table->map & join->eliminated_tables)) + { + int tmp; + s->type=JT_SYSTEM; + join->const_table_map|=s->table->map; + if ((tmp=join_read_const_table(join->thd, s, p_pos))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + { + found_const_table_map|= s->table->map; + s->table->pos_in_table_list->optimized_away= TRUE; + } + } + } + + /* loop until no more const tables are found */ + int ref_changed; + do + { + ref_changed = 0; + more_const_tables_found: + + /* + We only have to loop from stat_vector + const_count as + set_position() will move all const_tables first in stat_vector + */ + + for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) + { + table=s->table; + + if (table->is_filled_at_execution()) + continue; + + /* + If equi-join condition by a key is null rejecting and after a + substitution of a const table the key value happens to be null + then we can state that there are no matches for this equi-join. + */ + if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map && + !(table->map & join->eliminated_tables)) + { + /* + When performing an outer join operation if there are no matching rows + for the single row of the outer table all the inner tables are to be + null complemented and thus considered as constant tables. + Here we apply this consideration to the case of outer join operations + with a single inner table only because the case with nested tables + would require a more thorough analysis. + TODO. Apply single row substitution to null complemented inner tables + for nested outer join operations. + */ + while (keyuse->table == table) + { + if (!keyuse->is_for_hash_join() && + !(keyuse->val->used_tables() & ~join->const_table_map) && + keyuse->val->is_null() && keyuse->null_rejecting) + { + s->type= JT_CONST; + s->table->const_table= 1; + mark_as_null_row(table); + found_const_table_map|= table->map; + join->const_table_map|= table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + goto more_const_tables_found; + } + keyuse++; + } + } + + if (s->dependent) // If dependent on some table + { + // All dep. must be constants + if (s->dependent & ~(found_const_table_map)) + continue; + if (table->file->stats.records <= 1L && + (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && + !table->pos_in_table_list->embedding && + !((outer_join & table->map) && + (*s->on_expr_ref)->is_expensive())) + { // system table + int tmp= 0; + s->type=JT_SYSTEM; + join->const_table_map|=table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + if ((tmp= join_read_const_table(join->thd, s, join->positions+const_count-1))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + found_const_table_map|= table->map; + continue; + } + } + /* check if table can be read by key or table only uses const refs */ + if ((keyuse=s->keyuse)) + { + s->type= JT_REF; + while (keyuse->table == table) + { + if (keyuse->is_for_hash_join()) + { + keyuse++; + continue; + } + start_keyuse=keyuse; + key=keyuse->key; + s->keys.set_bit(key); // TODO: remove this ? + + const_ref.clear_all(); + eq_part.clear_all(); + has_expensive_keyparts= false; + do + { + if (keyuse->val->type() != Item::NULL_ITEM && + !keyuse->optimize && + keyuse->keypart != FT_KEYPART) + { + if (!((~found_const_table_map) & keyuse->used_tables)) + { + const_ref.set_bit(keyuse->keypart); + if (keyuse->val->is_expensive()) + has_expensive_keyparts= true; + } + eq_part.set_bit(keyuse->keypart); + } + keyuse++; + } while (keyuse->table == table && keyuse->key == key); + + TABLE_LIST *embedding= table->pos_in_table_list->embedding; + /* + TODO (low priority): currently we ignore the const tables that + are within a semi-join nest which is within an outer join nest. + The effect of this is that we don't do const substitution for + such tables. + */ + KEY *keyinfo= table->key_info + key; + uint key_parts= table->actual_n_key_parts(keyinfo); + if (eq_part.is_prefix(key_parts) && + !table->fulltext_searched && + (!embedding || (embedding->sj_on_expr && !embedding->embedding))) + { + key_map base_part, base_const_ref, base_eq_part; + base_part.set_prefix(keyinfo->user_defined_key_parts); + base_const_ref= const_ref; + base_const_ref.intersect(base_part); + base_eq_part= eq_part; + base_eq_part.intersect(base_part); + if (table->actual_key_flags(keyinfo) & HA_NOSAME) + { + + if (base_const_ref == base_eq_part && + !has_expensive_keyparts && + !((outer_join & table->map) && + (*s->on_expr_ref)->is_expensive())) + { // Found everything for ref. + int tmp; + ref_changed = 1; + s->type= JT_CONST; + join->const_table_map|=table->map; + set_position(join,const_count++,s,start_keyuse); + /* create_ref_for_key will set s->table->const_table */ + if (create_ref_for_key(join, s, start_keyuse, FALSE, + found_const_table_map)) + goto error; + if ((tmp=join_read_const_table(join->thd, s, + join->positions+const_count-1))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + found_const_table_map|= table->map; + break; + } + } + else if (base_const_ref == base_eq_part) + s->const_keys.set_bit(key); + } + } + } + } + } while (ref_changed); + + join->sort_by_table= get_sort_by_table(join->order, join->group_list, + join->select_lex->leaf_tables, + join->const_table_map); + /* + Update info on indexes that can be used for search lookups as + reading const tables may has added new sargable predicates. + */ + if (const_count && sargables) + { + for( ; sargables->field ; sargables++) + { + Field *field= sargables->field; + JOIN_TAB *join_tab= field->table->reginfo.join_tab; + key_map possible_keys= field->key_start; + possible_keys.intersect(field->table->keys_in_use_for_query); + bool is_const= 1; + for (uint j=0; j < sargables->num_values; j++) + is_const&= sargables->arg_value[j]->const_item(); + if (is_const) + join_tab[0].const_keys.merge(possible_keys); + } + } + + join->impossible_where= false; + if (join->conds && const_count) + { + Item* &conds= join->conds; + COND_EQUAL *orig_cond_equal = join->cond_equal; + + conds->update_used_tables(); + conds= conds->remove_eq_conds(join->thd, &join->cond_value, true); + if (conds && conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + join->cond_equal= &((Item_cond_and*) conds)->m_cond_equal; + join->select_lex->where= conds; + if (join->cond_value == Item::COND_FALSE) + { + join->impossible_where= true; + conds= (Item*) Item_false; + } + + join->cond_equal= NULL; + if (conds) + { + if (conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + join->cond_equal= (&((Item_cond_and *) conds)->m_cond_equal); + else if (conds->type() == Item::FUNC_ITEM && + ((Item_func*) conds)->functype() == Item_func::MULT_EQUAL_FUNC) + { + if (!join->cond_equal) + join->cond_equal= new COND_EQUAL; + join->cond_equal->current_level.empty(); + join->cond_equal->current_level.push_back((Item_equal*) conds, + join->thd->mem_root); + } + } + + if (orig_cond_equal != join->cond_equal) + { + /* + If join->cond_equal has changed all references to it from COND_EQUAL + objects associated with ON expressions must be updated. + */ + for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) + { + if (*s->on_expr_ref && s->cond_equal && + s->cond_equal->upper_levels == orig_cond_equal) + s->cond_equal->upper_levels= join->cond_equal; + } + } + } + + join->join_tab= stat; + join->make_notnull_conds_for_range_scans(); + + /* Calc how many (possible) matched records in each table */ + + /* + Todo: add a function so that we can add these Json_writer_objects + easily. + Another way would be to enclose them in a scope {}; + */ + { + Json_writer_object rows_estimation_wrapper(thd); + Json_writer_array rows_estimation(thd, "rows_estimation"); + + for (s=stat ; s < stat_end ; s++) + { + s->startup_cost= 0; + if (s->type == JT_SYSTEM || s->type == JT_CONST) + { + + Json_writer_object table_records(thd); + /* Only one matching row */ + s->found_records= s->records= 1; + s->read_time=1.0; + s->worst_seeks=1.0; + table_records.add_table_name(s) + .add("rows", s->found_records) + .add("cost", s->read_time) + .add("table_type", s->type == JT_CONST ? + "const" : + "system"); + continue; + } + /* Approximate found rows and time to read them */ + if (s->table->is_filled_at_execution()) + { + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records; + s->table->opt_range_condition_rows=s->records; + } + else + s->scan_time(); + + if (s->table->is_splittable()) + s->add_keyuses_for_splitting(); + + /* + Set a max range of how many seeks we can expect when using keys + This is can't be to high as otherwise we are likely to use + table scan. + */ + s->worst_seeks= MY_MIN((double) s->found_records / 10, + (double) s->read_time*3); + if (s->worst_seeks < 2.0) // Fix for small tables + s->worst_seeks=2.0; + + /* + Add to stat->const_keys those indexes for which all group fields or + all select distinct fields participate in one index. + */ + add_group_and_distinct_keys(join, s); + + s->table->cond_selectivity= 1.0; + + /* + Perform range analysis if there are keys it could use (1). + Don't do range analysis for materialized subqueries (2). + Don't do range analysis for materialized derived tables/views (3) + */ + if ((!s->const_keys.is_clear_all() || + !bitmap_is_clear_all(&s->table->cond_set)) && // (1) + !s->table->is_filled_at_execution() && // (2) + !(s->table->pos_in_table_list->derived && // (3) + s->table->pos_in_table_list->is_materialized_derived())) // (3) + { + bool impossible_range= FALSE; + ha_rows records= HA_POS_ERROR; + SQL_SELECT *select= 0; + Item **sargable_cond= NULL; + if (!s->const_keys.is_clear_all()) + { + sargable_cond= get_sargable_cond(join, s->table); + bool is_sargable_cond_of_where= sargable_cond == &join->conds; + + select= make_select(s->table, found_const_table_map, + found_const_table_map, + *sargable_cond, + (SORT_INFO*) 0, 1, &error); + if (!select) + goto error; + if (get_quick_record_count(join->thd, select, s->table, + &s->const_keys, join->row_limit, &records)) + { + /* There was an error in test_quick_select */ + delete select; + goto error; + } + /* + Range analyzer might have modified the condition. Put it the new + condition to where we got it from. + */ + *sargable_cond= select->cond; + + if (is_sargable_cond_of_where && + join->conds && join->conds->type() == Item::COND_ITEM && + ((Item_cond*) (join->conds))->functype() == + Item_func::COND_AND_FUNC) + join->cond_equal= &((Item_cond_and*) (join->conds))->m_cond_equal; + + s->quick=select->quick; + select->quick=0; + s->needed_reg=select->needed_reg; + impossible_range= records == 0 && s->table->reginfo.impossible_range; + if (join->thd->lex->sql_command == SQLCOM_SELECT && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER)) + s->table->init_cost_info_for_usable_range_rowid_filters(join->thd); + } + if (!impossible_range) + { + if (!sargable_cond) + sargable_cond= get_sargable_cond(join, s->table); + if (join->thd->variables.optimizer_use_condition_selectivity > 1) + calculate_cond_selectivity_for_table(join->thd, s->table, + sargable_cond); + if (s->table->reginfo.impossible_range) + { + impossible_range= TRUE; + records= 0; + } + } + if (impossible_range) + { + /* + Impossible WHERE or ON expression + In case of ON, we mark that the we match one empty NULL row. + In case of WHERE, don't set found_const_table_map to get the + caller to abort with a zero row result. + */ + TABLE_LIST *emb= s->table->pos_in_table_list->embedding; + if (emb && !emb->sj_on_expr && !*s->on_expr_ref) + { + /* Mark all tables in a multi-table join nest as const */ + mark_join_nest_as_const(join, emb, &found_const_table_map, + &const_count); + } + else + { + join->const_table_map|= s->table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + s->type= JT_CONST; + s->table->const_table= 1; + if (*s->on_expr_ref) + { + /* Generate empty row */ + s->info= ET_IMPOSSIBLE_ON_CONDITION; + found_const_table_map|= s->table->map; + mark_as_null_row(s->table); // All fields are NULL + } + } + } + if (records != HA_POS_ERROR) + { + s->found_records=records; + s->read_time= s->quick ? s->quick->read_time : 0.0; + } + if (select) + delete select; + else + { + if (thd->trace_started()) + add_table_scan_values_to_trace(thd, s); + } + } + else + { + if (thd->trace_started()) + add_table_scan_values_to_trace(thd, s); + } + } + } + + if (pull_out_semijoin_tables(join)) + DBUG_RETURN(TRUE); + + join->join_tab=stat; + join->top_join_tab_count= table_count; + join->map2table=stat_ref; + join->table= table_vector; + join->const_tables=const_count; + join->found_const_table_map=found_const_table_map; + + if (join->const_tables != join->table_count) + optimize_keyuse(join, keyuse_array); + + DBUG_ASSERT(!join->conds || !join->cond_equal || + !join->cond_equal->current_level.elements || + (join->conds->type() == Item::COND_ITEM && + ((Item_cond*) (join->conds))->functype() == + Item_func::COND_AND_FUNC && + join->cond_equal == + &((Item_cond_and *) (join->conds))->m_cond_equal) || + (join->conds->type() == Item::FUNC_ITEM && + ((Item_func*) (join->conds))->functype() == + Item_func::MULT_EQUAL_FUNC && + join->cond_equal->current_level.elements == 1 && + join->cond_equal->current_level.head() == join->conds)); + + if (optimize_semijoin_nests(join, all_table_map)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + { + double records= 1; + SELECT_LEX_UNIT *unit= join->select_lex->master_unit(); + + /* Find an optimal join order of the non-constant tables. */ + if (join->const_tables != join->table_count) + { + if (choose_plan(join, all_table_map & ~join->const_table_map)) + goto error; + +#ifdef HAVE_valgrind + // JOIN::positions holds the current query plan. We've already + // made the plan choice, so we should only use JOIN::best_positions + for (uint k=join->const_tables; k < join->table_count; k++) + MEM_UNDEFINED(&join->positions[k], sizeof(join->positions[k])); +#endif + } + else + { + memcpy((uchar*) join->best_positions,(uchar*) join->positions, + sizeof(POSITION)*join->const_tables); + join->join_record_count= 1.0; + join->best_read=1.0; + } + + if (!(join->select_options & SELECT_DESCRIBE) && + unit->derived && unit->derived->is_materialized_derived()) + { + /* + Calculate estimated number of rows for materialized derived + table/view. + */ + for (i= 0; i < join->table_count ; i++) + if (double rr= join->best_positions[i].records_read) + records= COST_MULT(records, rr); + ha_rows rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records; + set_if_smaller(rows, unit->lim.get_select_limit()); + join->select_lex->increase_derived_records(rows); + } + } + + if (join->choose_subquery_plan(all_table_map & ~join->const_table_map)) + goto error; + + DEBUG_SYNC(join->thd, "inside_make_join_statistics"); + + DBUG_RETURN(0); + +error: + /* + Need to clean up join_tab from TABLEs in case of error. + They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab + may not be assigned yet by this function (which is building join_tab). + Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke. + */ + { + TABLE_LIST *tmp_table; + List_iterator ti2(tables_list); + while ((tmp_table= ti2++)) + tmp_table->table->reginfo.join_tab= NULL; + } + DBUG_RETURN (1); +} + + +/***************************************************************************** + Check with keys are used and with tables references with tables + Updates in stat: + keys Bitmap of all used keys + const_keys Bitmap of all keys with may be used with quick_select + keyuse Pointer to possible keys +*****************************************************************************/ + + +/** + Merge new key definitions to old ones, remove those not used in both. + + This is called for OR between different levels. + + That is, the function operates on an array of KEY_FIELD elements which has + two parts: + + $LEFT_PART $RIGHT_PART + +-----------------------+-----------------------+ + start new_fields end + + $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two + parts of the OR condition. Our task is to produce an array of KEY_FIELD + elements that would correspond to "$LEFT_PART OR $RIGHT_PART". + + The rules for combining elements are as follows: + + (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)= + + = AND_ij (keyfieldA_i OR keyfieldB_j) + + We discard all (keyfieldA_i OR keyfieldB_j) that refer to different + fields. For those referring to the same field, the logic is as follows: + + t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different + we can't produce a single equality, + so produce nothing) + + t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1 + + t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set + KEY_OPTIMIZE_REF_OR_NULL flag + + The last one is for ref_or_null access. We have handling for this special + because it's needed for evaluating IN subqueries that are internally + transformed into + + @code + EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL) + @endcode + + See add_key_fields() for discussion of what is and_level. + + KEY_FIELD::null_rejecting is processed as follows: @n + result has null_rejecting=true if it is set for both ORed references. + for example: + - (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true + - (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false + + @todo + The result of this is that we're missing some 'ref' accesses. + OptimizerTeam: Fix this +*/ + +static KEY_FIELD * +merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end, + uint and_level) +{ + if (start == new_fields) + return start; // Impossible or + if (new_fields == end) + return start; // No new fields, skip all + + KEY_FIELD *first_free=new_fields; + + /* Mark all found fields in old array */ + for (; new_fields != end ; new_fields++) + { + for (KEY_FIELD *old=start ; old != first_free ; old++) + { + if (old->field == new_fields->field) + { + /* + NOTE: below const_item() call really works as "!used_tables()", i.e. + it can return FALSE where it is feasible to make it return TRUE. + + The cause is as follows: Some of the tables are already known to be + const tables (the detection code is in make_join_statistics(), + above the update_ref_and_keys() call), but we didn't propagate + information about this: TABLE::const_table is not set to TRUE, and + Item::update_used_tables() hasn't been called for each item. + The result of this is that we're missing some 'ref' accesses. + TODO: OptimizerTeam: Fix this + */ + if (!new_fields->val->const_item()) + { + /* + If the value matches, we can use the key reference. + If not, we keep it until we have examined all new values + */ + if (old->val->eq(new_fields->val, old->field->binary())) + { + old->level= and_level; + old->optimize= ((old->optimize & new_fields->optimize & + KEY_OPTIMIZE_EXISTS) | + ((old->optimize | new_fields->optimize) & + KEY_OPTIMIZE_REF_OR_NULL)); + old->null_rejecting= (old->null_rejecting && + new_fields->null_rejecting); + } + } + else if (old->eq_func && new_fields->eq_func && + old->val->eq_by_collation(new_fields->val, + old->field->binary(), + old->field->charset())) + + { + old->level= and_level; + old->optimize= ((old->optimize & new_fields->optimize & + KEY_OPTIMIZE_EXISTS) | + ((old->optimize | new_fields->optimize) & + KEY_OPTIMIZE_REF_OR_NULL)); + old->null_rejecting= (old->null_rejecting && + new_fields->null_rejecting); + } + else if (old->eq_func && new_fields->eq_func && + ((old->val->can_eval_in_optimize() && old->val->is_null()) || + (!new_fields->val->is_expensive() && + new_fields->val->is_null()))) + { + /* field = expression OR field IS NULL */ + old->level= and_level; + if (old->field->maybe_null()) + { + old->optimize= KEY_OPTIMIZE_REF_OR_NULL; + /* The referred expression can be NULL: */ + old->null_rejecting= 0; + } + /* + Remember the NOT NULL value unless the value does not depend + on other tables. + */ + if (!old->val->used_tables() && !old->val->is_expensive() && + old->val->is_null()) + old->val= new_fields->val; + } + else + { + /* + We are comparing two different const. In this case we can't + use a key-lookup on this so it's better to remove the value + and let the range optimzier handle it + */ + if (old == --first_free) // If last item + break; + *old= *first_free; // Remove old value + old--; // Retry this value + } + } + } + } + /* Remove all not used items */ + for (KEY_FIELD *old=start ; old != first_free ;) + { + if (old->level != and_level) + { // Not used in all levels + if (old == --first_free) + break; + *old= *first_free; // Remove old value + continue; + } + old++; + } + return first_free; +} + + +/* + Given a field, return its index in semi-join's select list, or UINT_MAX + + DESCRIPTION + Given a field, we find its table; then see if the table is within a + semi-join nest and if the field was in select list of the subselect. + If it was, we return field's index in the select list. The value is used + by LooseScan strategy. +*/ + +static uint get_semi_join_select_list_index(Field *field) +{ + uint res= UINT_MAX; + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= field->table->pos_in_table_list->embedding) && + emb_sj_nest->sj_on_expr) + { + Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred; + st_select_lex *subq_lex= subq_pred->unit->first_select(); + uint ncols= subq_pred->left_exp()->cols(); + if (ncols == 1) + { + Item *sel_item= subq_lex->ref_pointer_array[0]; + if (sel_item->type() == Item::FIELD_ITEM && + ((Item_field*)sel_item)->field->eq(field)) + { + res= 0; + } + } + else + { + for (uint i= 0; i < ncols; i++) + { + Item *sel_item= subq_lex->ref_pointer_array[i]; + if (sel_item->type() == Item::FIELD_ITEM && + ((Item_field*)sel_item)->field->eq(field)) + { + res= i; + break; + } + } + } + } + return res; +} + + +/** + Add a possible key to array of possible keys if it's usable as a key + + @param key_fields Pointer to add key, if usable + @param and_level And level, to be stored in KEY_FIELD + @param cond Condition predicate + @param field Field used in comparision + @param eq_func True if we used =, <=> or IS NULL + @param value Value used for comparison with field + @param num_values Number of values[] that we are comparing against + @param usable_tables Tables which can be used for key optimization + @param sargables IN/OUT Array of found sargable candidates + @param row_col_no if = n that > 0 then field is compared only + against the n-th component of row values + + @note + If we are doing a NOT NULL comparison on a NOT NULL field in a outer join + table, we store this to be able to do not exists optimization later. + + @returns + *key_fields is incremented if we stored a key in the array +*/ + +static void +add_key_field(JOIN *join, + KEY_FIELD **key_fields,uint and_level, Item_bool_func *cond, + Field *field, bool eq_func, Item **value, uint num_values, + table_map usable_tables, SARGABLE_PARAM **sargables, + uint row_col_no= 0) +{ + uint optimize= 0; + if (eq_func && + ((join->is_allowed_hash_join_access() && + field->hash_join_is_possible() && + !(field->table->pos_in_table_list->is_materialized_derived() && + field->table->is_created())) || + (field->table->pos_in_table_list->is_materialized_derived() && + !field->table->is_created() && !(field->flags & BLOB_FLAG)))) + { + optimize= KEY_OPTIMIZE_EQ; + } + else if (!(field->flags & PART_KEY_FLAG)) + { + // Don't remove column IS NULL on a LEFT JOIN table + if (eq_func && (*value)->type() == Item::NULL_ITEM && + field->table->maybe_null && !field->null_ptr) + { + optimize= KEY_OPTIMIZE_EXISTS; + DBUG_ASSERT(num_values == 1); + } + } + if (optimize != KEY_OPTIMIZE_EXISTS) + { + table_map used_tables=0; + bool optimizable=0; + for (uint i=0; ireal_item()->type() == Item::ROW_ITEM) + { + Item_row *value_tuple= (Item_row *) (value[i]->real_item()); + curr_val= value_tuple->element_index(row_col_no - 1); + } + else + curr_val= value[i]; + table_map value_used_tables= curr_val->used_tables(); + used_tables|= value_used_tables; + if (!(value_used_tables & (field->table->map | RAND_TABLE_BIT))) + optimizable=1; + } + if (!optimizable) + return; + if (!(usable_tables & field->table->map)) + { + if (!eq_func || (*value)->type() != Item::NULL_ITEM || + !field->table->maybe_null || field->null_ptr) + return; // Can't use left join optimize + optimize= KEY_OPTIMIZE_EXISTS; + } + else + { + JOIN_TAB *stat=field->table->reginfo.join_tab; + key_map possible_keys=field->get_possible_keys(); + possible_keys.intersect(field->table->keys_in_use_for_query); + stat[0].keys.merge(possible_keys); // Add possible keys + + /* + Save the following cases: + Field op constant + Field LIKE constant where constant doesn't start with a wildcard + Field = field2 where field2 is in a different table + Field op formula + Field IS NULL + Field IS NOT NULL + Field BETWEEN ... + Field IN ... + */ + if (field->flags & PART_KEY_FLAG) + { + stat[0].key_dependent|= used_tables; + if (field->key_start.bits_set()) + stat[0].key_start_dependent= 1; + } + + bool is_const=1; + for (uint i=0; ireal_item()->type() == Item::ROW_ITEM) + { + Item_row *value_tuple= (Item_row *) (value[i]->real_item()); + curr_val= value_tuple->element_index(row_col_no - 1); + } + else + curr_val= value[i]; + if (!(is_const&= curr_val->const_item())) + break; + } + if (is_const) + { + stat[0].const_keys.merge(possible_keys); + bitmap_set_bit(&field->table->cond_set, field->field_index); + } + else if (!eq_func) + { + /* + Save info to be able check whether this predicate can be + considered as sargable for range analysis after reading const tables. + We do not save info about equalities as update_const_equal_items + will take care of updating info on keys from sargable equalities. + */ + (*sargables)--; + (*sargables)->field= field; + (*sargables)->arg_value= value; + (*sargables)->num_values= num_values; + } + if (!eq_func) // eq_func is NEVER true when num_values > 1 + return; + } + } + /* + For the moment eq_func is always true. This slot is reserved for future + extensions where we want to remembers other things than just eq comparisons + */ + DBUG_ASSERT(eq_func); + /* Store possible eq field */ + (*key_fields)->field= field; + (*key_fields)->eq_func= eq_func; + (*key_fields)->val= *value; + (*key_fields)->cond= cond; + (*key_fields)->level= and_level; + (*key_fields)->optimize= optimize; + /* + If the condition we are analyzing is NULL-rejecting and at least + one side of the equalities is NULLable, mark the KEY_FIELD object as + null-rejecting. This property is used by: + - add_not_null_conds() to add "column IS NOT NULL" conditions + - best_access_path() to produce better estimates for NULL-able unique keys. + */ + { + if ((cond->functype() == Item_func::EQ_FUNC || + cond->functype() == Item_func::MULT_EQUAL_FUNC) && + ((*value)->maybe_null() || field->real_maybe_null())) + (*key_fields)->null_rejecting= true; + else + (*key_fields)->null_rejecting= false; + } + (*key_fields)->cond_guard= NULL; + + (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field); + (*key_fields)++; +} + +/** + Add possible keys to array of possible keys originated from a simple + predicate. + + @param key_fields Pointer to add key, if usable + @param and_level And level, to be stored in KEY_FIELD + @param cond Condition predicate + @param field_item Field item used for comparison + @param eq_func True if we used =, <=> or IS NULL + @param value Value used for comparison with field_item + @param num_values Number of values[] that we are comparing against + @param usable_tables Tables which can be used for key optimization + @param sargables IN/OUT Array of found sargable candidates + @param row_col_no if = n that > 0 then field is compared only + against the n-th component of row values + + @note + If field items f1 and f2 belong to the same multiple equality and + a key is added for f1, the the same key is added for f2. + + @returns + *key_fields is incremented if we stored a key in the array +*/ + +static void +add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level, + Item_bool_func *cond, Item *field_item, + bool eq_func, Item **val, + uint num_values, table_map usable_tables, + SARGABLE_PARAM **sargables, uint row_col_no= 0) +{ + Field *field= ((Item_field *) (field_item->real_item()))->field; + add_key_field(join, key_fields, and_level, cond, field, + eq_func, val, num_values, usable_tables, sargables, + row_col_no); + Item_equal *item_equal= field_item->get_item_equal(); + if (item_equal) + { + /* + Add to the set of possible key values every substitution of + the field for an equal field included into item_equal + */ + Item_equal_fields_iterator it(*item_equal); + while (it++) + { + Field *equal_field= it.get_curr_field(); + if (!field->eq(equal_field)) + { + add_key_field(join, key_fields, and_level, cond, equal_field, + eq_func, val, num_values, usable_tables, + sargables, row_col_no); + } + } + } +} + + +/** + Check if an expression is a non-outer field. + + Checks if an expression is a field and belongs to the current select. + + @param field Item expression to check + + @return boolean + @retval TRUE the expression is a local field + @retval FALSE it's something else +*/ + +static bool +is_local_field (Item *field) +{ + return field->real_item()->type() == Item::FIELD_ITEM + && !(field->used_tables() & OUTER_REF_TABLE_BIT) + && !((Item_field *)field->real_item())->get_depended_from(); +} + + +/* + In this and other functions, and_level is a number that is ever-growing + and is different for the contents of every AND or OR clause. For example, + when processing clause + + (a AND b AND c) OR (x AND y) + + we'll have + * KEY_FIELD elements for (a AND b AND c) are assigned and_level=1 + * KEY_FIELD elements for (x AND y) are assigned and_level=2 + * OR operation is performed, and whatever elements are left after it are + assigned and_level=3. + + The primary reason for having and_level attribute is the OR operation which + uses and_level to mark KEY_FIELDs that should get into the result of the OR + operation +*/ + + +void +Item_cond_and::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + List_iterator_fast li(*argument_list()); + KEY_FIELD *org_key_fields= *key_fields; + + Item *item; + while ((item=li++)) + item->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + for (; org_key_fields != *key_fields ; org_key_fields++) + org_key_fields->level= *and_level; +} + + +void +Item_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + List_iterator_fast li(*argument_list()); + KEY_FIELD *org_key_fields= *key_fields; + + (*and_level)++; + (li++)->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + Item *item; + while ((item=li++)) + { + KEY_FIELD *start_key_fields= *key_fields; + (*and_level)++; + item->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + *key_fields= merge_key_fields(org_key_fields,start_key_fields, + *key_fields, ++(*and_level)); + } +} + + +void +Item_func_trig_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + /* + Subquery optimization: Conditions that are pushed down into subqueries + are wrapped into Item_func_trig_cond. We process the wrapped condition + but need to set cond_guard for KEYUSE elements generated from it. + */ + if (!join->group_list && !join->order && + join->unit->item && + join->unit->item->substype() == Item_subselect::IN_SUBS && + !join->unit->is_unit_op()) + { + KEY_FIELD *save= *key_fields; + args[0]->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + // Indicate that this ref access candidate is for subquery lookup: + for (; save != *key_fields; save++) + save->cond_guard= get_trig_var(); + } +} + + +void +Item_func_between::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + /* + Build list of possible keys for 'a BETWEEN low AND high'. + It is handled similar to the equivalent condition + 'a >= low AND a <= high': + */ + Item_field *field_item; + bool equal_func= false; + uint num_values= 2; + + bool binary_cmp= (args[0]->real_item()->type() == Item::FIELD_ITEM) + ? ((Item_field*) args[0]->real_item())->field->binary() + : true; + /* + Additional optimization: If 'low = high': + Handle as if the condition was "t.key = low". + */ + if (!negated && args[1]->eq(args[2], binary_cmp)) + { + equal_func= true; + num_values= 1; + } + + /* + Append keys for 'field value[]' if the + condition is of the form:: + ' BETWEEN value[1] AND value[2]' + */ + if (is_local_field(args[0])) + { + field_item= (Item_field *) (args[0]->real_item()); + add_key_equal_fields(join, key_fields, *and_level, this, + field_item, equal_func, &args[1], + num_values, usable_tables, sargables); + } + /* + Append keys for 'value[0] field' if the + condition is of the form: + 'value[0] BETWEEN field1 AND field2' + */ + for (uint i= 1; i <= num_values; i++) + { + if (is_local_field(args[i])) + { + field_item= (Item_field *) (args[i]->real_item()); + add_key_equal_fields(join, key_fields, *and_level, this, + field_item, equal_func, args, + 1, usable_tables, sargables); + } + } +} + + +void +Item_func_in::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT)) + { + DBUG_ASSERT(arg_count != 2); + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) (args[0]->real_item()), false, + args + 1, arg_count - 1, usable_tables, sargables); + } + else if (key_item()->type() == Item::ROW_ITEM && + !(used_tables() & OUTER_REF_TABLE_BIT)) + { + Item_row *key_row= (Item_row *) key_item(); + Item **key_col= key_row->addr(0); + uint row_cols= key_row->cols(); + for (uint i= 0; i < row_cols; i++, key_col++) + { + if (is_local_field(*key_col)) + { + Item_field *field_item= (Item_field *)((*key_col)->real_item()); + add_key_equal_fields(join, key_fields, *and_level, this, + field_item, false, args + 1, arg_count - 1, + usable_tables, sargables, i + 1); + } + } + } + +} + + +void +Item_func_ne::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + if (!(used_tables() & OUTER_REF_TABLE_BIT)) + { + /* + QQ: perhaps test for !is_local_field(args[1]) is not really needed here. + Other comparison functions, e.g. Item_func_le, Item_func_gt, etc, + do not have this test. See Item_bool_func2::add_key_fieldoptimize_op(). + Check with the optimizer team. + */ + if (is_local_field(args[0]) && !is_local_field(args[1])) + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) (args[0]->real_item()), false, + &args[1], 1, usable_tables, sargables); + /* + QQ: perhaps test for !is_local_field(args[0]) is not really needed here. + */ + if (is_local_field(args[1]) && !is_local_field(args[0])) + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) (args[1]->real_item()), false, + &args[0], 1, usable_tables, sargables); + } +} + + +void +Item_func_like::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + if (is_local_field(args[0]) && with_sargable_pattern()) + { + /* + SELECT * FROM t1 WHERE field LIKE const_pattern + const_pattern starts with a non-wildcard character + */ + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[0]->real_item(), false, + args + 1, 1, usable_tables, sargables); + } +} + + +void +Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, + table_map usable_tables, + SARGABLE_PARAM **sargables, + bool equal_func) +{ + /* If item is of type 'field op field/constant' add it to key_fields */ + if (is_local_field(args[0])) + { + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[0]->real_item(), equal_func, + args + 1, 1, usable_tables, sargables); + } + if (is_local_field(args[1])) + { + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[1]->real_item(), equal_func, + args, 1, usable_tables, sargables); + } +} + + +void +Item_func_null_predicate::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, + table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + /* column_name IS [NOT] NULL */ + if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT)) + { + Item *tmp= new (join->thd->mem_root) Item_null(join->thd); + if (unlikely(!tmp)) // Should never be true + return; + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[0]->real_item(), + functype() == Item_func::ISNULL_FUNC, + &tmp, 1, usable_tables, sargables); + } +} + + +void +Item_equal::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + Item *const_item2= get_const(); + Item_equal_fields_iterator it(*this); + if (const_item2) + { + + /* + For each field field1 from item_equal consider the equality + field1=const_item as a condition allowing an index access of the table + with field1 by the keys value of field1. + */ + while (it++) + { + Field *equal_field= it.get_curr_field(); + add_key_field(join, key_fields, *and_level, this, equal_field, + TRUE, &const_item2, 1, usable_tables, sargables); + } + } + else + { + /* + Consider all pairs of different fields included into item_equal. + For each of them (field1, field1) consider the equality + field1=field2 as a condition allowing an index access of the table + with field1 by the keys value of field2. + */ + Item_equal_fields_iterator fi(*this); + while (fi++) + { + Field *field= fi.get_curr_field(); + Item *item; + while ((item= it++)) + { + Field *equal_field= it.get_curr_field(); + if (!field->eq(equal_field)) + { + add_key_field(join, key_fields, *and_level, this, field, + TRUE, &item, 1, usable_tables, + sargables); + } + } + it.rewind(); + } + } +} + + +static uint +max_part_bit(key_part_map bits) +{ + uint found; + for (found=0; bits & 1 ; found++,bits>>=1) ; + return found; +} + + +/** + Add a new keuse to the specified array of KEYUSE objects + + @param[in,out] keyuse_array array of keyuses to be extended + @param[in] key_field info on the key use occurrence + @param[in] key key number for the keyse to be added + @param[in] part key part for the keyuse to be added + + @note + The function builds a new KEYUSE object for a key use utilizing the info + on the left and right parts of the given key use extracted from the + structure key_field, the key number and key part for this key use. + The built object is added to the dynamic array keyuse_array. + + @retval 0 the built object is successfully added + @retval 1 otherwise +*/ + +static bool +add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field, + uint key, uint part) +{ + KEYUSE keyuse; + Field *field= key_field->field; + + keyuse.table= field->table; + keyuse.val= key_field->val; + keyuse.key= key; + if (!is_hash_join_key_no(key)) + { + keyuse.keypart=part; + keyuse.keypart_map= (key_part_map) 1 << part; + } + else + { + keyuse.keypart= field->field_index; + keyuse.keypart_map= (key_part_map) 0; + } + keyuse.used_tables= key_field->val->used_tables(); + keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; + keyuse.ref_table_rows= 0; + keyuse.null_rejecting= key_field->null_rejecting; + keyuse.cond_guard= key_field->cond_guard; + keyuse.sj_pred_no= key_field->sj_pred_no; + keyuse.validity_ref= 0; + return (insert_dynamic(keyuse_array,(uchar*) &keyuse)); +} + + +/* + Add all keys with uses 'field' for some keypart + If field->and_level != and_level then only mark key_part as const_part + + RETURN + 0 - OK + 1 - Out of memory. +*/ + +static LEX_CSTRING equal_str= { STRING_WITH_LEN("=") }; + +static bool +add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field) +{ + Field *field=key_field->field; + TABLE *form= field->table; + THD *thd= form->in_use; + + if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS)) + { + for (uint key=0 ; key < form->s->keys ; key++) + { + if (!(form->keys_in_use_for_query.is_set(key))) + continue; + if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL)) + continue; // ToDo: ft-keys in non-ft queries. SerG + + KEY *keyinfo= form->key_info+key; + uint key_parts= form->actual_n_key_parts(keyinfo); + for (uint part=0 ; part < key_parts ; part++) + { + if (field->eq(form->key_info[key].key_part[part].field)) + { + Data_type_compatibility compat= + field->can_optimize_keypart_ref(key_field->cond, key_field->val); + if (compat == Data_type_compatibility::OK) + { + if (add_keyuse(keyuse_array, key_field, key, part)) + return TRUE; + } + else if (thd->give_notes_for_unusable_keys()) + { + field->raise_note_cannot_use_key_part(thd, key, part, + equal_str, + key_field->val, + compat); + } + } + } + } + if (field->hash_join_is_possible() && + (key_field->optimize & KEY_OPTIMIZE_EQ) && + key_field->val->used_tables()) + { + if (field->can_optimize_hash_join(key_field->cond, key_field->val) != + Data_type_compatibility::OK) + return false; + if (form->is_splittable()) + form->add_splitting_info_for_key_field(key_field); + /* + If a key use is extracted from an equi-join predicate then it is + added not only as a key use for every index whose component can + be evalusted utilizing this key use, but also as a key use for + hash join. Such key uses are marked with a special key number. + */ + if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0)) + return TRUE; + } + } + return FALSE; +} + +static bool +add_ft_keys(DYNAMIC_ARRAY *keyuse_array, + JOIN_TAB *stat,COND *cond,table_map usable_tables) +{ + Item_func_match *cond_func=NULL; + + if (!cond) + return FALSE; + + if (cond->type() == Item::FUNC_ITEM) + { + Item_func *func=(Item_func *)cond; + Item_func::Functype functype= func->functype(); + if (functype == Item_func::FT_FUNC) + cond_func=(Item_func_match *)cond; + else if (func->argument_count() == 2) + { + Item *arg0=(Item *)(func->arguments()[0]), + *arg1=(Item *)(func->arguments()[1]); + if (arg1->const_item() && arg1->cols() == 1 && + arg0->type() == Item::FUNC_ITEM && + ((Item_func *) arg0)->functype() == Item_func::FT_FUNC && + ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) || + (functype == Item_func::GT_FUNC && arg1->val_real() >=0))) + cond_func= (Item_func_match *) arg0; + else if (arg0->const_item() && arg0->cols() == 1 && + arg1->type() == Item::FUNC_ITEM && + ((Item_func *) arg1)->functype() == Item_func::FT_FUNC && + ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) || + (functype == Item_func::LT_FUNC && arg0->val_real() >=0))) + cond_func= (Item_func_match *) arg1; + } + } + else if (cond->type() == Item::COND_ITEM) + { + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + Item *item; + while ((item=li++)) + { + if (add_ft_keys(keyuse_array,stat,item,usable_tables)) + return TRUE; + } + } + } + + if (!cond_func || cond_func->key == NO_SUCH_KEY || + !(usable_tables & cond_func->table->map)) + return FALSE; + + KEYUSE keyuse; + keyuse.table= cond_func->table; + keyuse.val = cond_func; + keyuse.key = cond_func->key; + keyuse.keypart= FT_KEYPART; + keyuse.used_tables=cond_func->key_item()->used_tables(); + keyuse.optimize= 0; + keyuse.ref_table_rows= 0; + keyuse.keypart_map= 0; + keyuse.sj_pred_no= UINT_MAX; + keyuse.validity_ref= 0; + keyuse.null_rejecting= FALSE; + return insert_dynamic(keyuse_array,(uchar*) &keyuse); +} + + +static int +sort_keyuse(KEYUSE *a,KEYUSE *b) +{ + int res; + if (a->table->tablenr != b->table->tablenr) + return (int) (a->table->tablenr - b->table->tablenr); + if (a->key != b->key) + return (int) (a->key - b->key); + if (a->key == MAX_KEY && b->key == MAX_KEY && + a->used_tables != b->used_tables) + return (int) ((ulong) a->used_tables - (ulong) b->used_tables); + if (a->keypart != b->keypart) + return (int) (a->keypart - b->keypart); + // Place const values before other ones + if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) - + MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT)))) + return res; + /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */ + return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) - + (b->optimize & KEY_OPTIMIZE_REF_OR_NULL)); +} + + +/* + Add to KEY_FIELD array all 'ref' access candidates within nested join. + + This function populates KEY_FIELD array with entries generated from the + ON condition of the given nested join, and does the same for nested joins + contained within this nested join. + + @param[in] nested_join_table Nested join pseudo-table to process + @param[in,out] end End of the key field array + @param[in,out] and_level And-level + @param[in,out] sargables Array of found sargable candidates + + + @note + We can add accesses to the tables that are direct children of this nested + join (1), and are not inner tables w.r.t their neighbours (2). + + Example for #1 (outer brackets pair denotes nested join this function is + invoked for): + @code + ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond + @endcode + Example for #2: + @code + ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond + @endcode + In examples 1-2 for condition cond, we can add 'ref' access candidates to + t1 only. + Example #3: + @code + ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond + @endcode + Here we can add 'ref' access candidates for t1 and t2, but not for t3. +*/ + +static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table, + KEY_FIELD **end, uint *and_level, + SARGABLE_PARAM **sargables) +{ + List_iterator li(nested_join_table->nested_join->join_list); + List_iterator li2(nested_join_table->nested_join->join_list); + bool have_another = FALSE; + table_map tables= 0; + TABLE_LIST *table; + DBUG_ASSERT(nested_join_table->nested_join); + + while ((table= li++) || (have_another && (li=li2, have_another=FALSE, + (table= li++)))) + { + if (table->nested_join) + { + if (!table->on_expr) + { + /* It's a semi-join nest. Walk into it as if it wasn't a nest */ + have_another= TRUE; + li2= li; + li= List_iterator(table->nested_join->join_list); + } + else + add_key_fields_for_nj(join, table, end, and_level, sargables); + } + else + if (!table->on_expr) + tables |= table->table->map; + } + if (nested_join_table->on_expr) + nested_join_table->on_expr->add_key_fields(join, end, and_level, tables, + sargables); +} + + +void count_cond_for_nj(SELECT_LEX *sel, TABLE_LIST *nested_join_table) +{ + List_iterator li(nested_join_table->nested_join->join_list); + List_iterator li2(nested_join_table->nested_join->join_list); + bool have_another = FALSE; + TABLE_LIST *table; + + while ((table= li++) || (have_another && (li=li2, have_another=FALSE, + (table= li++)))) + if (table->nested_join) + { + if (!table->on_expr) + { + /* It's a semi-join nest. Walk into it as if it wasn't a nest */ + have_another= TRUE; + li2= li; + li= List_iterator(table->nested_join->join_list); + } + else + count_cond_for_nj(sel, table); + } + if (nested_join_table->on_expr) + nested_join_table->on_expr->walk(&Item::count_sargable_conds, 0, sel); + +} + +/** + Update keyuse array with all possible keys we can use to fetch rows. + + @param thd + @param[out] keyuse Put here ordered array of KEYUSE structures + @param join_tab Array in tablenr_order + @param tables Number of tables in join + @param cond WHERE condition (note that the function analyzes + join_tab[i]->on_expr too) + @param normal_tables Tables not inner w.r.t some outer join (ones + for which we can make ref access based the WHERE + clause) + @param select_lex current SELECT + @param[out] sargables Array of found sargable candidates + + @retval + 0 OK + @retval + 1 Out of memory. +*/ + +static bool +update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, + uint tables, COND *cond, table_map normal_tables, + SELECT_LEX *select_lex, SARGABLE_PARAM **sargables) +{ + uint and_level,i; + KEY_FIELD *key_fields, *end, *field; + uint sz; + uint m= MY_MAX(select_lex->max_equal_elems,1); + DBUG_ENTER("update_ref_and_keys"); + DBUG_PRINT("enter", ("normal_tables: %llx", normal_tables)); + + SELECT_LEX *sel=thd->lex->current_select; + sel->cond_count= 0; + sel->between_count= 0; + if (cond) + cond->walk(&Item::count_sargable_conds, 0, sel); + for (i=0 ; i < tables ; i++) + { + if (*join_tab[i].on_expr_ref) + (*join_tab[i].on_expr_ref)->walk(&Item::count_sargable_conds, 0, sel); + } + { + List_iterator li(*join_tab->join->join_list); + TABLE_LIST *table; + while ((table= li++)) + { + if (table->nested_join) + count_cond_for_nj(sel, table); + } + } + + /* + We use the same piece of memory to store both KEY_FIELD + and SARGABLE_PARAM structure. + KEY_FIELD values are placed at the beginning this memory + while SARGABLE_PARAM values are put at the end. + All predicates that are used to fill arrays of KEY_FIELD + and SARGABLE_PARAM structures have at most 2 arguments + except BETWEEN predicates that have 3 arguments and + IN predicates. + This any predicate if it's not BETWEEN/IN can be used + directly to fill at most 2 array elements, either of KEY_FIELD + or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements + can be filled as this predicate is considered as + saragable with respect to each of its argument. + An IN predicate can require at most 1 element as currently + it is considered as sargable only for its first argument. + Multiple equality can add elements that are filled after + substitution of field arguments by equal fields. There + can be not more than select_lex->max_equal_elems such + substitutions. + */ + sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))* + ((sel->cond_count*2 + sel->between_count)*m+1); + if (!(key_fields=(KEY_FIELD*) thd->alloc(sz))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + and_level= 0; + field= end= key_fields; + *sargables= (SARGABLE_PARAM *) key_fields + + (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM); + /* set a barrier for the array of SARGABLE_PARAM */ + (*sargables)[0].field= 0; + + if (my_init_dynamic_array2(thd->mem_root->psi_key, keyuse, sizeof(KEYUSE), + thd->alloc(sizeof(KEYUSE) * 20), 20, 64, + MYF(MY_THREAD_SPECIFIC))) + DBUG_RETURN(TRUE); + + if (cond) + { + KEY_FIELD *saved_field= field; + cond->add_key_fields(join_tab->join, &end, &and_level, normal_tables, + sargables); + for (; field != end ; field++) + { + + /* Mark that we can optimize LEFT JOIN */ + if (field->val->type() == Item::NULL_ITEM && + !field->field->real_maybe_null()) + field->field->table->reginfo.not_exists_optimize=1; + } + field= saved_field; + } + for (i=0 ; i < tables ; i++) + { + /* + Block the creation of keys for inner tables of outer joins. + Here only the outer joins that can not be converted to + inner joins are left and all nests that can be eliminated + are flattened. + In the future when we introduce conditional accesses + for inner tables in outer joins these keys will be taken + into account as well. + */ + if (*join_tab[i].on_expr_ref) + (*join_tab[i].on_expr_ref)->add_key_fields(join_tab->join, &end, + &and_level, + join_tab[i].table->map, + sargables); + } + + /* Process ON conditions for the nested joins */ + { + List_iterator li(*join_tab->join->join_list); + TABLE_LIST *table; + while ((table= li++)) + { + if (table->nested_join) + add_key_fields_for_nj(join_tab->join, table, &end, &and_level, + sargables); + } + } + + /* fill keyuse with found key parts */ + for ( ; field != end ; field++) + { + if (add_key_part(keyuse,field)) + DBUG_RETURN(TRUE); + } + + if (select_lex->ftfunc_list->elements) + { + if (add_ft_keys(keyuse,join_tab,cond,normal_tables)) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + +/* + check if key could be used with eq_ref + + The assumption is that all previous key parts where used +*/ + +static void remember_if_eq_ref_key(JOIN *join, KEYUSE *use) +{ + DBUG_ASSERT(use->keypart != FT_KEYPART && use->key != MAX_KEY); + TABLE *table= use->table; + KEY *key= table->key_info+use->key; + ulong key_flags= table->actual_key_flags(key); + + /* + Check if possible eq_ref key + This may include keys that does not have HA_NULL_PART_KEY + set, but this is ok as best_access_path will resolve this. + */ + if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME))) + { + uint key_parts= table->actual_n_key_parts(key); + if (use->keypart+1 == key_parts) + join->eq_ref_tables|= table->map; + } +} + + +/** + Sort the array of possible keys and remove the following key parts: + - ref if there is a keypart which is a ref and a const. + (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d, + then we skip the key part corresponding to b=t2.d) + - keyparts without previous keyparts + (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is + used in the query, we drop the partial key parts from consideration). + Special treatment for ft-keys. + Update join->eq_ref_tables with a bitmap of all tables that can possible + have a EQ_REF key. +*/ + +bool sort_and_filter_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse, + bool skip_unprefixed_keyparts) +{ + THD *thd= join->thd; + KEYUSE key_end, *prev, *save_pos, *use; + uint found_eq_constant, i; + bool found_unprefixed_key_part= 0; + + join->eq_ref_tables= 0; + DBUG_ASSERT(keyuse->elements); + + my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE), + (qsort_cmp) sort_keyuse); + + bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */ + if (insert_dynamic(keyuse, (uchar*) &key_end)) + return TRUE; + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS)) + generate_derived_keys(keyuse); + + use= save_pos= dynamic_element(keyuse,0,KEYUSE*); + prev= &key_end; + found_eq_constant= 0; + /* Loop over all elements except the last 'key_end' */ + for (i=0 ; i < keyuse->elements-1 ; i++,use++) + { + if (!use->is_for_hash_join()) + { + if (!(use->used_tables & ~OUTER_REF_TABLE_BIT) && + use->optimize != KEY_OPTIMIZE_REF_OR_NULL) + use->table->const_key_parts[use->key]|= use->keypart_map; + if (use->keypart != FT_KEYPART) + { + if (use->key == prev->key && use->table == prev->table) + { + if (prev->keypart == use->keypart && found_eq_constant) + continue; + if (prev->keypart+1 < use->keypart) + { + found_unprefixed_key_part= 1; + if (skip_unprefixed_keyparts) + continue; /* remove */ + } + } + else + { + /* Key changed, check if previous key was a primary/unique key lookup */ + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + found_unprefixed_key_part= 0; + if (use->keypart != 0) + { + found_unprefixed_key_part= 1; + if (skip_unprefixed_keyparts) + continue; /* remove - first found key part must be 0 */ + } + } + } + else /* FT_KEY_PART */ + { + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + found_unprefixed_key_part= 1; // This key cannot be EQ_REF + } + prev= use; + found_eq_constant= !use->used_tables; + use->table->reginfo.join_tab->checked_keys.set_bit(use->key); + } + else + { + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + prev= &key_end; + } + /* + Old gcc used a memcpy(), which is undefined if save_pos==use: + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410 + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480 + This also disables a valgrind warning, so better to have the test. + */ + if (save_pos != use) + *save_pos= *use; + /* Save ptr to first use */ + if (!use->table->reginfo.join_tab->keyuse) + use->table->reginfo.join_tab->keyuse= save_pos; + save_pos++; + } + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + i= (uint) (save_pos-(KEYUSE*) keyuse->buffer); + (void) set_dynamic(keyuse,(uchar*) &key_end,i); + keyuse->elements= i; + + return FALSE; +} + + +/** + Update some values in keyuse for faster choose_plan() loop. +*/ + +void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array) +{ + KEYUSE *end,*keyuse= dynamic_element(keyuse_array, 0, KEYUSE*); + + for (end= keyuse+ keyuse_array->elements ; keyuse < end ; keyuse++) + { + table_map map; + /* + If we find a ref, assume this table matches a proportional + part of this table. + For example 100 records matching a table with 5000 records + gives 5000/100 = 50 records per key + Constant tables are ignored. + To avoid bad matches, we don't make ref_table_rows less than 100. + */ + keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref + if (keyuse->used_tables & + (map= (keyuse->used_tables & ~join->const_table_map & + ~OUTER_REF_TABLE_BIT))) + { + uint n_tables= my_count_bits(map); + if (n_tables == 1) // Only one table + { + DBUG_ASSERT(!(map & PSEUDO_TABLE_BITS)); // Must be a real table + Table_map_iterator it(map); + int tablenr= it.next_bit(); + DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END); + TABLE *tmp_table=join->table[tablenr]; + if (tmp_table) // already created + keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100); + } + } + /* + Outer reference (external field) is constant for single executing + of subquery + */ + if (keyuse->used_tables == OUTER_REF_TABLE_BIT) + keyuse->ref_table_rows= 1; + } +} + +/** + Check for the presence of AGGFN(DISTINCT a) queries that may be subject + to loose index scan. + + Check if the query is a subject to AGGFN(DISTINCT) using loose index scan + (QUICK_GROUP_MIN_MAX_SELECT). + Optionally (if out_args is supplied) will push the arguments of + AGGFN(DISTINCT) to the list + + Check for every COUNT(DISTINCT), AVG(DISTINCT) or + SUM(DISTINCT). These can be resolved by Loose Index Scan as long + as all the aggregate distinct functions refer to the same + fields. Thus: + + SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS + SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS + SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS + SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS + etc. + + @param join the join to check + @param[out] out_args Collect the arguments of the aggregate functions + to a list. We don't worry about duplicates as + these will be sorted out later in + get_best_group_min_max. + + @return does the query qualify for indexed AGGFN(DISTINCT) + @retval true it does + @retval false AGGFN(DISTINCT) must apply distinct in it. +*/ + +bool +is_indexed_agg_distinct(JOIN *join, List *out_args) +{ + Item_sum **sum_item_ptr; + bool result= false; + + if (join->table_count != 1 || /* reference more than 1 table */ + join->select_distinct || /* or a DISTINCT */ + join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */ + return false; + + Bitmap first_aggdistinct_fields; + bool first_aggdistinct_fields_initialized= false; + for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++) + { + Item_sum *sum_item= *sum_item_ptr; + Item *expr; + /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */ + switch (sum_item->sum_func()) + { + case Item_sum::MIN_FUNC: + case Item_sum::MAX_FUNC: + continue; + case Item_sum::COUNT_DISTINCT_FUNC: + break; + case Item_sum::AVG_DISTINCT_FUNC: + case Item_sum::SUM_DISTINCT_FUNC: + if (sum_item->get_arg_count() == 1) + break; + /* fall through */ + default: return false; + } + /* + We arrive here for every COUNT(DISTINCT),AVG(DISTINCT) or SUM(DISTINCT). + Collect the arguments of the aggregate functions to a list. + We don't worry about duplicates as these will be sorted out later in + get_best_group_min_max + */ + Bitmap cur_aggdistinct_fields; + cur_aggdistinct_fields.clear_all(); + for (uint i= 0; i < sum_item->get_arg_count(); i++) + { + expr= sum_item->get_arg(i); + /* The AGGFN(DISTINCT) arg is not an attribute? */ + if (expr->real_item()->type() != Item::FIELD_ITEM) + return false; + + Item_field* item= static_cast(expr->real_item()); + if (out_args) + out_args->push_back(item, join->thd->mem_root); + + cur_aggdistinct_fields.set_bit(item->field->field_index); + result= true; + } + /* + If there are multiple aggregate functions, make sure that they all + refer to exactly the same set of columns. + */ + if (!first_aggdistinct_fields_initialized) + { + first_aggdistinct_fields= cur_aggdistinct_fields; + first_aggdistinct_fields_initialized=true; + } + else if (first_aggdistinct_fields != cur_aggdistinct_fields) + return false; + } + + return result; +} + + +/** + Discover the indexes that can be used for GROUP BY or DISTINCT queries. + + If the query has a GROUP BY clause, find all indexes that contain all + GROUP BY fields, and add those indexes to join->const_keys. + + If the query has a DISTINCT clause, find all indexes that contain all + SELECT fields, and add those indexes to join->const_keys. + This allows later on such queries to be processed by a + QUICK_GROUP_MIN_MAX_SELECT. + + @param join + @param join_tab + + @return + None +*/ + +static void +add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab) +{ + List indexed_fields; + List_iterator indexed_fields_it(indexed_fields); + ORDER *cur_group; + Item_field *cur_item; + key_map possible_keys(0); + + if (join->group_list) + { /* Collect all query fields referenced in the GROUP clause. */ + for (cur_group= join->group_list; cur_group; cur_group= cur_group->next) + (*cur_group->item)->walk(&Item::collect_item_field_processor, 0, + &indexed_fields); + } + else if (join->select_distinct) + { /* Collect all query fields referenced in the SELECT clause. */ + List &select_items= join->fields_list; + List_iterator select_items_it(select_items); + Item *item; + while ((item= select_items_it++)) + item->walk(&Item::collect_item_field_processor, 0, &indexed_fields); + } + else if (!join->tmp_table_param.sum_func_count || + !is_indexed_agg_distinct(join, &indexed_fields)) + { + /* + There where no GROUP BY fields and also either no aggregate + functions or not all aggregate functions where used with the + same DISTINCT (or MIN() / MAX() that works similarly). + Nothing to do there. + */ + return; + } + + if (indexed_fields.elements == 0) + { + /* There where no index we could use to satisfy the GROUP BY */ + return; + } + + /* Intersect the keys of all group fields. */ + cur_item= indexed_fields_it++; + possible_keys.merge(cur_item->field->part_of_key); + while ((cur_item= indexed_fields_it++)) + { + possible_keys.intersect(cur_item->field->part_of_key); + } + + if (!possible_keys.is_clear_all()) + join_tab->const_keys.merge(possible_keys); +} + + +/***************************************************************************** + Go through all combinations of not marked tables and find the one + which uses least records +*****************************************************************************/ + +/** Save const tables first as used tables. */ + +void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) +{ + join->positions[idx].table= table; + join->positions[idx].key=key; + join->positions[idx].records_read=1.0; /* This is a const table */ + join->positions[idx].cond_selectivity= 1.0; + join->positions[idx].ref_depend_map= 0; + join->positions[idx].partial_join_cardinality= 1; + +// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ + join->positions[idx].sj_strategy= SJ_OPT_NONE; + join->positions[idx].use_join_buffer= FALSE; + join->positions[idx].range_rowid_filter_info= 0; + + /* Move the const table as down as possible in best_ref */ + JOIN_TAB **pos=join->best_ref+idx+1; + JOIN_TAB *next=join->best_ref[idx]; + for (;next != table ; pos++) + { + JOIN_TAB *tmp=pos[0]; + pos[0]=next; + next=tmp; + } + join->best_ref[idx]=table; + join->positions[idx].spl_plan= 0; + join->positions[idx].spl_pd_boundary= 0; +} + + +/* + Estimate how many records we will get if we read just this table and apply + a part of WHERE that can be checked for it. + + @param s Current JOIN_TAB + @param use_cond_selectivity Value of optimizer_use_condition_selectivity. + If > 1 then use table->cond_selecitivity. + @param force_estiamte Set to 1 if we should not call + use_found_constraint. To be deleted in 11.0 + @return 0.0 No matching rows + @return >= 1.0 Number of expected matching rows + + @detail + Estimate how many records we will get if we + - read the given table with its "independent" access method (either quick + select or full table/index scan), + - apply the part of WHERE that refers only to this table. + + @see also + table_cond_selectivity() produces selectivity of condition that is checked + after joining rows from this table to rows from preceding tables. +*/ + +static double apply_selectivity_for_table(JOIN_TAB *s, + uint use_cond_selectivity, + bool *force_estimate) +{ + ha_rows records; + double dbl_records; + + if (use_cond_selectivity > 1) + { + TABLE *table= s->table; + double sel= table->cond_selectivity; + double table_records= rows2double(s->records); + dbl_records= table_records * sel; + *force_estimate= 1; // Don't call use_found_constraint() + return dbl_records; + } + + records = s->found_records; + + /* + If applicable, get a more accurate estimate. + */ + DBUG_ASSERT(s->table->opt_range_condition_rows <= s->found_records); + if (s->table->opt_range_condition_rows != s->found_records) + { + *force_estimate= 1; // Don't call use_found_constraint() + records= s->table->opt_range_condition_rows; + } + + dbl_records= (double)records; + return dbl_records; +} + +/* + Take into account that the table's WHERE clause has conditions on earlier + tables that can reduce the number of accepted rows. + + @param records Number of original rows (after selectivity) + + If there is a filtering condition on the table (i.e. ref analyzer found + at least one "table.keyXpartY= exprZ", where exprZ refers only to tables + preceding this table in the join order we're now considering), then + assume that 25% of the rows will be filtered out by this condition. + + This heuristic is supposed to force tables used in exprZ to be before + this table in join order. +*/ + +inline double use_found_constraint(double records) +{ + records-= records/4; + return records; +} + + +/* + Calculate the cost of reading a set of rows trough an index + + Logically this is identical to the code in multi_range_read_info_const() + excepts the function also takes into account io_blocks and multiple + ranges. + + One main difference between the functions is that + multi_range_read_info_const() adds a very small cost per range + (IDX_LOOKUP_COST) and also MULTI_RANGE_READ_SETUP_COST, to ensure that + 'ref' is preferred slightly over ranges. +*/ + +double cost_for_index_read(const THD *thd, const TABLE *table, uint key, + ha_rows records, ha_rows worst_seeks) +{ + DBUG_ENTER("cost_for_index_read"); + double cost; + handler *file= table->file; + + set_if_smaller(records, (ha_rows) thd->variables.max_seeks_for_key); + if (file->is_clustering_key(key)) + cost= file->read_time(key, 1, records); + else + if (table->covering_keys.is_set(key)) + cost= file->keyread_time(key, 1, records); + else + cost= ((file->keyread_time(key, 0, records) + + file->read_time(key, 1, MY_MIN(records, worst_seeks)))); + + DBUG_PRINT("statistics", ("cost: %.3f", cost)); + DBUG_RETURN(cost); +} + + +/* + Adjust cost from table->quick_costs calculated by + multi_range_read_info_const() to be comparable with cost_for_index_read() + + This functions is needed because best_access_path() doesn't add + TIME_FOR_COMPARE to it's costs until very late. + Preferably we should fix so that all costs are comparably. + (All compared costs should include TIME_FOR_COMPARE for all found + rows). +*/ + +double adjust_quick_cost(double quick_cost, ha_rows records) +{ + double cost= (quick_cost - MULTI_RANGE_READ_SETUP_COST - + rows2double(records)/TIME_FOR_COMPARE); + DBUG_ASSERT(cost > 0.0); + return cost; +} + + +/* + @brief + Compute the fanout of hash join operation using EITS data +*/ + +double hash_join_fanout(JOIN *join, JOIN_TAB *s, table_map remaining_tables, + double rnd_records, KEYUSE *hj_start_key, + bool *stats_found) +{ + THD *thd= join->thd; + /* + Before doing the hash join, we will scan the table and apply the local part + of the WHERE condition. This will produce rnd_records. + + The EITS statistics describes the entire table. Calling + + table->field[N]->get_avg_frequency() + + produces average #rows in the table with some value. + + What happens if we filter out rows so that rnd_records rows are left? + Something between the two outcomes: + A. filtering removes a fraction of rows for each value: + avg_frequency=avg_frequency * condition_selectivity + + B. filtering removes entire groups of rows with the same value, but + the remaining groups remain of the same size. + + We make pessimistic assumption and assume B. + We also handle an edge case: if rnd_records is less than avg_frequency, + assume we'll get rnd_records rows with the same value, and return + rnd_records as the fanout estimate. + */ + double min_freq= rnd_records; + + Json_writer_object trace_obj(thd, "hash_join_cardinality"); + /* + There can be multiple KEYUSE referring to same or different columns + + KEYUSE(tbl.col1 = ...) + KEYUSE(tbl.col1 = ...) + KEYUSE(tbl.col2 = ...) + + Hash join code can use multiple columns: (col1, col2) for joining. + We need n_distinct({col1, col2}). + + EITS only has statistics on individual columns: n_distinct(col1), + n_distinct(col2). + + Our current solution is to be very conservative and use selectivity + of one column with the lowest avg_frequency. + + In the future, we should an approach that cautiosly takes into account + multiple KEYUSEs either multiply by number of equalities or by sqrt + of the second most selective equality. + */ + Json_writer_array trace_arr(thd, "hash_join_columns"); + for (KEYUSE *keyuse= hj_start_key; + keyuse->table == s->table && is_hash_join_key_no(keyuse->key); + keyuse++) + { + if (!(remaining_tables & keyuse->used_tables) && + (!keyuse->validity_ref || *keyuse->validity_ref) && + s->access_from_tables_is_allowed(keyuse->used_tables, + join->sjm_lookup_tables)) + { + Field *field= s->table->field[keyuse->keypart]; + if (is_eits_usable(field)) + { + double freq= field->read_stats->get_avg_frequency(); + + Json_writer_object trace_field(thd); + trace_field.add("field",field->field_name.str). + add("avg_frequency", freq); + if (freq < min_freq) + min_freq= freq; + *stats_found= 1; + } + } + } + trace_arr.end(); + trace_obj.add("rows", min_freq); + return min_freq; +} + + +/** + Find the best access path for an extension of a partial execution + plan and add this path to the plan. + + The function finds the best access path to table 's' from the passed + partial plan where an access path is the general term for any means to + access the data in 's'. An access path may use either an index or a scan, + whichever is cheaper. The input partial plan is passed via the array + 'join->positions' of length 'idx'. The chosen access method for 's' and its + cost are stored in 'join->positions[idx]'. + + @param join pointer to the structure providing all context info + for the query + @param s the table to be joined by the function + @param thd thread for the connection that submitted the query + @param remaining_tables set of tables not included into the partial plan yet + @param idx the length of the partial plan + @param disable_jbuf TRUE<=> Don't use join buffering + @param record_count estimate for the number of records returned by the + partial plan + @param pos OUT Table access plan + @param loose_scan_pos OUT Table plan that uses loosescan, or set cost to + DBL_MAX if not possible. + + @return + None +*/ + +void +best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + const POSITION *join_positions, + uint idx, + bool disable_jbuf, + double record_count, + POSITION *pos, + POSITION *loose_scan_pos) +{ + THD *thd= join->thd; + uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; + KEYUSE *best_key= 0; + uint best_max_key_part= 0; + my_bool found_constraint= 0; + double best= DBL_MAX; + double best_time= DBL_MAX; + double records= DBL_MAX; + ha_rows records_for_key= 0; + table_map best_ref_depends_map= 0; + /* + key_dependent is 0 if all key parts could be used or if there was an + EQ_REF table found (which uses all key parts). In other words, we cannot + find a better key for the table even if remaining_tables is reduced. + Otherwise it's a bitmap of tables that could improve key usage. + */ + table_map key_dependent= 0; + Range_rowid_filter_cost_info *best_filter= 0; + double tmp; + double keyread_tmp= 0; + ha_rows rec; + bool best_uses_jbuf= FALSE; + MY_BITMAP *eq_join_set= &s->table->eq_join_set; + KEYUSE *hj_start_key= 0; + SplM_plan_info *spl_plan= 0; + table_map spl_pd_boundary= 0; + Range_rowid_filter_cost_info *filter= 0; + const char* cause= NULL; + enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN; + + disable_jbuf= disable_jbuf || idx == join->const_tables; + + Loose_scan_opt loose_scan_opt; + DBUG_ENTER("best_access_path"); + + Json_writer_object trace_wrapper(thd, "best_access_path"); + + trace_wrapper.add_table_name(s); + + bitmap_clear_all(eq_join_set); + + loose_scan_opt.init(join, s, remaining_tables); + + if (s->table->is_splittable()) + spl_plan= s->choose_best_splitting(idx, + remaining_tables, + join_positions, + &spl_pd_boundary); + + Json_writer_array trace_paths(thd, "considered_access_paths"); + if (s->keyuse) + { /* Use key if possible */ + KEYUSE *keyuse; + KEYUSE *start_key=0; + TABLE *table= s->table; + double best_records= DBL_MAX; + uint max_key_part=0; + + /* Test how we can use keys */ + rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key + for (keyuse=s->keyuse ; keyuse->table == table ;) + { + KEY *keyinfo; + ulong key_flags; + uint key_parts; + key_part_map found_part= 0; + key_part_map notnull_part=0; // key parts which won't have NULL in lookup tuple. + table_map found_ref= 0; + uint key= keyuse->key; + filter= 0; + bool ft_key= (keyuse->keypart == FT_KEYPART); + /* Bitmap of keyparts where the ref access is over 'keypart=const': */ + key_part_map const_part= 0; + /* The or-null keypart in ref-or-null access: */ + key_part_map ref_or_null_part= 0; + key_part_map all_parts= 0; + + if (is_hash_join_key_no(key)) + { + /* + Hash join as any join employing join buffer can be used to join + only those tables that are joined after the first non const table + */ + if (!(remaining_tables & keyuse->used_tables) && + idx > join->const_tables) + { + if (!hj_start_key) + hj_start_key= keyuse; + bitmap_set_bit(eq_join_set, keyuse->keypart); + } + keyuse++; + continue; + } + + keyinfo= table->key_info+key; + key_parts= table->actual_n_key_parts(keyinfo); + key_flags= table->actual_key_flags(keyinfo); + + /* Calculate how many key segments of the current key we can use */ + start_key= keyuse; + + loose_scan_opt.next_ref_key(); + DBUG_PRINT("info", ("Considering ref access on key %s", + keyuse->table->key_info[keyuse->key].name.str)); + + do /* For each keypart */ + { + uint keypart= keyuse->keypart; + table_map best_part_found_ref= 0, key_parts_dependent= 0; + double best_prev_record_reads= DBL_MAX; + + do /* For each way to access the keypart */ + { + /* + if 1. expression doesn't refer to forward tables + 2. we won't get two ref-or-null's + */ + all_parts|= keyuse->keypart_map; + if (!(remaining_tables & keyuse->used_tables) && + (!keyuse->validity_ref || *keyuse->validity_ref) && + s->access_from_tables_is_allowed(keyuse->used_tables, + join->sjm_lookup_tables) && + !(ref_or_null_part && (keyuse->optimize & + KEY_OPTIMIZE_REF_OR_NULL))) + { + found_part|= keyuse->keypart_map; + key_parts_dependent= 0; + if (!(keyuse->used_tables & ~join->const_table_map)) + const_part|= keyuse->keypart_map; + + if (!keyuse->val->maybe_null() || keyuse->null_rejecting) + notnull_part|=keyuse->keypart_map; + + double tmp2= prev_record_reads(join_positions, idx, + (found_ref | keyuse->used_tables)); + if (tmp2 < best_prev_record_reads) + { + best_part_found_ref= keyuse->used_tables & ~join->const_table_map; + best_prev_record_reads= tmp2; + } + if (rec > keyuse->ref_table_rows) + rec= keyuse->ref_table_rows; + /* + If there is one 'key_column IS NULL' expression, we can + use this ref_or_null optimisation of this field + */ + if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) + ref_or_null_part |= keyuse->keypart_map; + } + else if (!(found_part & keyuse->keypart_map)) + key_parts_dependent|= keyuse->used_tables; + + loose_scan_opt.add_keyuse(remaining_tables, keyuse); + keyuse++; + } while (keyuse->table == table && keyuse->key == key && + keyuse->keypart == keypart); + /* If we found a usable key, remember the dependent tables */ + if (all_parts & 1) + key_dependent|= key_parts_dependent; + found_ref|= best_part_found_ref; + } while (keyuse->table == table && keyuse->key == key); + + /* + Assume that that each key matches a proportional part of table. + */ + if (!found_part && !ft_key && !loose_scan_opt.have_a_case()) + continue; // Nothing usable found + + if (rec < MATCHING_ROWS_IN_OTHER_TABLE) + rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables + + Json_writer_object trace_access_idx(thd); + /* + full text keys require special treatment + */ + if (ft_key) + { + /* + Really, there should be records=0.0 (yes!) + but 1.0 would be probably safer + */ + tmp= prev_record_reads(join_positions, idx, found_ref); + records= 1.0; + type= JT_FT; + trace_access_idx.add("access_type", join_type_str[type]) + .add("full-text index", keyinfo->name); + } + else + { + found_constraint= MY_TEST(found_part); + loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part); + + /* Check if we found full key */ + const key_part_map all_key_parts= PREV_BITS(uint, key_parts); + if (found_part == all_key_parts && !ref_or_null_part) + { /* use eq key */ + max_key_part= (uint) ~0; + /* + If the index is a unique index (1), and + - all its columns are not null (2), or + - equalities we are using reject NULLs (3) + then the estimate is rows=1. + */ + if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)) && // (1) + (!(key_flags & HA_NULL_PART_KEY) || // (2) + all_key_parts == notnull_part)) // (3) + { + /* Check that eq_ref_tables are correctly updated */ + DBUG_ASSERT(join->eq_ref_tables & table->map); + /* TODO: Adjust cost for covering and clustering key */ + type= JT_EQ_REF; + trace_access_idx.add("access_type", join_type_str[type]) + .add("index", keyinfo->name); + + if (!found_ref && table->opt_range_keys.is_set(key)) + tmp= adjust_quick_cost(table->opt_range[key].cost, 1); + else + tmp= table->file->avg_io_cost(); + tmp*= prev_record_reads(join_positions, idx, + found_ref); + records=1.0; + } + else + { + type= JT_REF; + trace_access_idx.add("access_type", join_type_str[type]) + .add("index", keyinfo->name); + if (!found_ref) + { /* We found a const key */ + /* + ReuseRangeEstimateForRef-1: + We get here if we've found a ref(const) (c_i are constants): + "(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond] + + If range optimizer was able to construct a "range" + access on this index, then its condition "quick_cond" was + eqivalent to ref_const_cond (*), and we can re-use E(#rows) + from the range optimizer. + + Proof of (*): By properties of range and ref optimizers + quick_cond will be equal or tighther than ref_const_cond. + ref_const_cond already covers "smallest" possible interval - + a singlepoint interval over all keyparts. Therefore, + quick_cond is equivalent to ref_const_cond (if it was an + empty interval we wouldn't have got here). + */ + if (table->opt_range_keys.is_set(key)) + { + records= (double) table->opt_range[key].rows; + trace_access_idx.add("used_range_estimates", true); + tmp= adjust_quick_cost(table->opt_range[key].cost, + table->opt_range[key].rows); + goto got_cost; + } + else + { + /* quick_range couldn't use key! */ + records= (double) s->records/rec; + trace_access_idx.add("used_range_estimates", false) + .add("reason", "not available"); + } + } + else + { + if (!(records= keyinfo->actual_rec_per_key(key_parts-1))) + { /* Prefer longer keys */ + trace_access_idx.add("rec_per_key_stats_missing", true); + records= + ((double) s->records / (double) rec * + (1.0 + + ((double) (table->s->max_key_length-keyinfo->key_length) / + (double) table->s->max_key_length))); + if (records < 2.0) + records=2.0; /* Can't be as good as a unique */ + } + /* + ReuseRangeEstimateForRef-2: We get here if we could not reuse + E(#rows) from range optimizer. Make another try: + + If range optimizer produced E(#rows) for a prefix of the ref + access we're considering, and that E(#rows) is lower then our + current estimate, make an adjustment. The criteria of when we + can make an adjustment is a special case of the criteria used + in ReuseRangeEstimateForRef-3. + */ + if (table->opt_range_keys.is_set(key) && + (const_part & + (((key_part_map)1 << table->opt_range[key].key_parts)-1)) == + (((key_part_map)1 << table->opt_range[key].key_parts)-1) && + table->opt_range[key].ranges == 1 && + records > (double) table->opt_range[key].rows) + { + records= (double) table->opt_range[key].rows; + trace_access_idx.add("used_range_estimates", "clipped down"); + } + else + { + trace_access_idx.add("used_range_estimates", false); + if (table->opt_range_keys.is_set(key)) + { + trace_access_idx.add("reason", "not better than ref estimates"); + } + else + { + trace_access_idx.add("reason", "not available"); + } + } + } + /* Limit the number of matched rows */ + tmp= cost_for_index_read(thd, table, key, (ha_rows) records, + (ha_rows) s->worst_seeks); + records_for_key= (ha_rows) records; + set_if_smaller(records_for_key, thd->variables.max_seeks_for_key); + keyread_tmp= table->file->keyread_time(key, 1, records_for_key); + got_cost: + tmp= COST_MULT(tmp, record_count); + keyread_tmp= COST_MULT(keyread_tmp, record_count); + } + } + else + { + type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF; + trace_access_idx.add("access_type", join_type_str[type]) + .add("index", keyinfo->name); + /* + Use as much key-parts as possible and a uniq key is better + than a not unique key + Set tmp to (previous record count) * (records / combination) + */ + if ((found_part & 1) && + (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || + found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) + { + max_key_part= max_part_bit(found_part); + /* + ReuseRangeEstimateForRef-3: + We're now considering a ref[or_null] access via + (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR + (same-as-above but with one cond replaced + with "t.keypart_i IS NULL")] (**) + + Try re-using E(#rows) from "range" optimizer: + We can do so if "range" optimizer used the same intervals as + in (**). The intervals used by range optimizer may be not + available at this point (as "range" access might have chosen to + create quick select over another index), so we can't compare + them to (**). We'll make indirect judgements instead. + The sufficient conditions for re-use are: + (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if + this is not satisfied we have no way to know which ranges + will be actually scanned by 'ref' until we execute the + join) + (C2) max #key parts in 'range' access == K == max_key_part (this + is apparently a necessary requirement) + + We also have a property that "range optimizer produces equal or + tighter set of scan intervals than ref(const) optimizer". Each + of the intervals in (**) are "tightest possible" intervals when + one limits itself to using keyparts 1..K (which we do in #2). + From here it follows that range access used either one, or + both of the (I1) and (I2) intervals: + + (t.keypart1=c1 AND ... AND t.keypartK=eK) (I1) + (same-as-above but with one cond replaced + with "t.keypart_i IS NULL") (I2) + + The remaining part is to exclude the situation where range + optimizer used one interval while we're considering + ref-or-null and looking for estimate for two intervals. This + is done by last limitation: + + (C3) "range optimizer used (have ref_or_null?2:1) intervals" + */ + if (table->opt_range_keys.is_set(key) && !found_ref && //(C1) + table->opt_range[key].key_parts == max_key_part && //(C2) + table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3) + { + records= (double) table->opt_range[key].rows; + tmp= adjust_quick_cost(table->opt_range[key].cost, + table->opt_range[key].rows); + trace_access_idx.add("used_range_estimates", true); + goto got_cost2; + } + else + { + /* Check if we have statistic about the distribution */ + if ((records= keyinfo->actual_rec_per_key(max_key_part-1))) + { + /* + Fix for the case where the index statistics is too + optimistic: If + (1) We're considering ref(const) and there is quick select + on the same index, + (2) and that quick select uses more keyparts (i.e. it will + scan equal/smaller interval then this ref(const)) + (3) and E(#rows) for quick select is higher then our + estimate, + Then + We'll use E(#rows) from quick select. + + Q: Why do we choose to use 'ref'? Won't quick select be + cheaper in some cases ? + TODO: figure this out and adjust the plan choice if needed. + */ + if (table->opt_range_keys.is_set(key)) + { + if (table->opt_range[key].key_parts >= max_key_part) // (2) + { + double rows= (double) table->opt_range[key].rows; + if (!found_ref && // (1) + records < rows) // (3) + { + trace_access_idx.add("used_range_estimates", "clipped up"); + records= rows; + } + } + } + } + else + { + trace_access_idx.add("rec_per_key_stats_missing", true); + /* + Assume that the first key part matches 1% of the file + and that the whole key matches 10 (duplicates) or 1 + (unique) records. + Assume also that more key matches proportionally more + records + This gives the formula: + records = (x * (b-a) + a*c-b)/(c-1) + + b = records matched by whole key + a = records matched by first key part (1% of all records?) + c = number of key parts in key + x = used key parts (1 <= x <= c) + */ + double rec_per_key; + if (!(rec_per_key=(double) + keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1])) + rec_per_key=(double) s->records/rec+1; + + if (!s->records) + records= 0; + else if (rec_per_key/(double) s->records >= 0.01) + records= rec_per_key; + else + { + double a=s->records*0.01; + if (keyinfo->user_defined_key_parts > 1) + records= (max_key_part * (rec_per_key - a) + + a*keyinfo->user_defined_key_parts - rec_per_key)/ + (keyinfo->user_defined_key_parts-1); + else + records= a; + set_if_bigger(records, 1.0); + } + } + + if (ref_or_null_part) + { + /* We need to do two key searches to find row */ + records *= 2.0; + } + + /* + ReuseRangeEstimateForRef-4: We get here if we could not reuse + E(#rows) from range optimizer. Make another try: + + If range optimizer produced E(#rows) for a prefix of the ref + access we're considering, and that E(#rows) is lower then our + current estimate, make the adjustment. + + The decision whether we can re-use the estimate from the range + optimizer is the same as in ReuseRangeEstimateForRef-3, + applied to first table->quick_key_parts[key] key parts. + */ + if (table->opt_range_keys.is_set(key) && + table->opt_range[key].key_parts <= max_key_part && + const_part & + ((key_part_map)1 << table->opt_range[key].key_parts) && + table->opt_range[key].ranges == (1 + + MY_TEST(ref_or_null_part & + const_part)) && + records > (double) table->opt_range[key].rows) + { + trace_access_idx.add("used_range_estimates", true); + records= (double) table->opt_range[key].rows; + } + } + + /* Limit the number of matched rows */ + tmp= cost_for_index_read(thd, table, key, (ha_rows) records, + (ha_rows) s->worst_seeks); + records_for_key= (ha_rows) records; + set_if_smaller(records_for_key, thd->variables.max_seeks_for_key); + keyread_tmp= table->file->keyread_time(key, 1, records_for_key); + got_cost2: + tmp= COST_MULT(tmp, record_count); + keyread_tmp= COST_MULT(keyread_tmp, record_count); + } + else + { + if (!(found_part & 1)) + cause= "no predicate for first keypart"; + tmp= best_time; // Do nothing + } + } + + tmp= COST_ADD(tmp, s->startup_cost); + loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp, + found_ref); + } /* not ft_key */ + + if (records < DBL_MAX && + (found_part & 1) && // start_key->key can be used for index access + (table->file->index_flags(start_key->key,0,1) & + HA_DO_RANGE_FILTER_PUSHDOWN)) + { + double rows= record_count * records; + /* + If we use filter F with selectivity s the the cost of fetching data + by key using this filter will be + cost_of_fetching_1_row * rows * s + + cost_of_fetching_1_key_tuple * rows * (1 - s) + + cost_of_1_lookup_into_filter * rows + Without using any filter the cost would be just + cost_of_fetching_1_row * rows + + So the gain in access cost per row will be + cost_of_fetching_1_row * (1 - s) - + cost_of_fetching_1_key_tuple * (1 - s) - + cost_of_1_lookup_into_filter + = + (cost_of_fetching_1_row - cost_of_fetching_1_key_tuple) * (1 - s) + - cost_of_1_lookup_into_filter + + Here we have: + cost_of_fetching_1_row = tmp/rows + cost_of_fetching_1_key_tuple = keyread_tmp/rows + + Here's a more detailed explanation that uses the formulas behind + the function the call filter->get_adjusted_gain(). The function + takes as a parameter the number of probes/look-ups into the filter + that is equal to the number of fetched key entries that is equal to + the number of row fetches when no filter is used (assuming no + index condition pushdown is employed for the used key access). + Let this number be N. Then the total gain from using the filter is + N*a_adj - b where b is the cost of building the filter and + a_adj is calcilated as follows: + a - (1-access_cost_factor)*(1-s) = + (1+1_cond_eval_cost)*(1-s)-1_probe_cost - (1-access_cost_factor)*(1-s) + = (1-s)*(1_cond_eval_cost+access_cost_factor) - 1_probe_cost. + Here ((1-s)*(1_cond_eval_cost) * N is the gain from checking less + conditions pushed into the table, 1_probe_cost*N is the cost of the + probes and (1*s) * access_cost_factor * N must be the gain from + accessing less rows. + It does not matter how we calculate the cost of N full row fetches + cost_of_fetching_N_rows or + how we calculate the cost of fetching N key entries + cost_of_fetching_N_key_entries + the gain from less row fetches will be + (cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) * (1-s) + and this should be equal to (1*s) * access_cost_factor * N. + Thus access_cost_factor must be calculated as + (cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) / N. + + For safety we clip cost_of_fetching_N_key_entries by the value + of cost_of_fetching_N_row though formally it's not necessary. + */ + /* + For eq_ref access we assume that the cost of fetching N key entries + is equal to the half of fetching N rows + */ + double key_access_cost= + type == JT_EQ_REF ? 0.5 * tmp : MY_MIN(tmp, keyread_tmp); + double access_cost_factor= MY_MIN((tmp - key_access_cost) / rows, 1.0); + + if (!(records < s->worst_seeks && + records <= thd->variables.max_seeks_for_key)) + { + // Don't use rowid filter + trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping"); + filter= NULL; + } + else + { + filter= + table->best_range_rowid_filter_for_partial_join(start_key->key, + rows, + access_cost_factor); + } + if (filter) + { + tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows); + DBUG_ASSERT(tmp >= 0); + trace_access_idx.add("rowid_filter_key", + table->key_info[filter->key_no].name); + } + } + trace_access_idx.add("rows", records).add("cost", tmp); + + if (tmp + 0.0001 < best_time - records/TIME_FOR_COMPARE) + { + trace_access_idx.add("chosen", true); + best_time= COST_ADD(tmp, records/TIME_FOR_COMPARE); + best= tmp; + best_records= records; + best_key= start_key; + best_max_key_part= max_key_part; + best_ref_depends_map= found_ref; + best_filter= filter; + best_type= type; + } + else + { + trace_access_idx.add("chosen", false) + .add("cause", cause ? cause : "cost"); + } + cause= nullptr; + } /* for each key */ + records= best_records; + } + else + { + /* + No usable keys found. However, there may still be an option to use + "Range checked for each record" when all depending tables has + been read. s->key_dependent tells us which tables these could be and + s->key_start_dependent tells us if a first key part was used. + s->key_dependent may include more tables than could be used, + but this is ok as not having any usable keys is a rare thing and + the performance penalty for extra table bits is that + best_extension_by_limited_search() would not be able to prune tables + earlier. + Example query: + SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1 + */ + if (s->key_start_dependent) + key_dependent= s->key_dependent; + /* Add dependencey for sub queries */ + key_dependent|= s->embedded_dependent; + } + /* Check that s->key_dependent contains all used_tables found in s->keyuse */ + key_dependent&= ~PSEUDO_TABLE_BITS; + DBUG_ASSERT((key_dependent & (s->key_dependent | s->embedded_dependent)) == + key_dependent); + + /* + If there is no key to access the table, but there is an equi-join + predicate connecting the table with the privious tables then we + consider the possibility of using hash join. + We need also to check that: + (1) s is inner table of semi-join -> join cache is allowed for semijoins + (2) s is inner table of outer join -> join cache is allowed for outer joins + */ + if (idx > join->const_tables && best_key == 0 && + (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && + join->max_allowed_join_cache_level > 2 && + !bitmap_is_clear_all(eq_join_set) && !disable_jbuf && + (!s->emb_sj_nest || + join->allowed_semijoin_with_cache) && // (1) + (!(s->table->map & join->outer_join) || + join->allowed_outer_join_with_cache)) // (2) + { + double fanout; + double join_sel; + bool stats_found= 0, force_estimate= 0; + Json_writer_object trace_access_hash(thd); + trace_access_hash.add("type", "hash"); + trace_access_hash.add("index", "hj-key"); + /* Estimate the cost of the hash join access to the table */ + double rnd_records= apply_selectivity_for_table(s, use_cond_selectivity, + &force_estimate); + + DBUG_ASSERT(hj_start_key); + if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY)) + { + /* + Starting from this point, rnd_records should not be used anymore. + Use "fanout" for an estimate of # matching records. + */ + fanout= hash_join_fanout(join, s, remaining_tables, rnd_records, + hj_start_key, &stats_found); + join_sel= 1.0; // Don't do the "10% heuristic" + if (stats_found) + goto fanout_computed; + } + + /* + No OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY or no field statistics + found. + + Take into account if there is non constant constraints used with + earlier tables in the where expression. + If yes, this will set fanout to rnd_records/4. + We estimate that there will be HASH_FANOUT (10%) + hash matches / row. + */ + if (found_constraint && !force_estimate) + rnd_records= use_found_constraint(rnd_records); + fanout= rnd_records; + join_sel= 0.1; + + fanout_computed: + tmp= s->quick ? s->quick->read_time : s->scan_time(); + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); + + /* We read the table as many times as join buffer becomes full. */ + + double refills= (1.0 + floor((double) cache_record_length(join,idx) * + record_count / + (double) thd->variables.join_buff_size)); + tmp= COST_MULT(tmp, refills); + + // Add cost of reading/writing the join buffer + if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY)) + { + /* Set it to be 1/10th of TIME_FOR_COMPARE */ + double row_copy_cost= 1.0 / (10*TIME_FOR_COMPARE); + double join_buffer_operations= + COST_ADD( + COST_MULT(record_count, row_copy_cost), + COST_MULT(record_count, fanout * (idx - join->const_tables)) + ); + double jbuf_use_cost= row_copy_cost * join_buffer_operations; + trace_access_hash.add("jbuf_use_cost", jbuf_use_cost); + tmp= COST_ADD(tmp, jbuf_use_cost); + } + + double where_cost= COST_MULT((fanout*join_sel) / TIME_FOR_COMPARE, + record_count); + trace_access_hash.add("extra_cond_check_cost", where_cost); + + best_time= COST_ADD(tmp, where_cost); + + best= tmp; + records= fanout; + best_key= hj_start_key; + best_ref_depends_map= 0; + best_uses_jbuf= TRUE; + best_filter= 0; + best_type= JT_HASH; + trace_access_hash.add("rnd_records", rnd_records); + trace_access_hash.add("records", records); + trace_access_hash.add("cost", best); + trace_access_hash.add("chosen", true); + } + + /* + Don't test table scan if it can't be better. + Prefer key lookup if we would use the same key for scanning. + + Don't do a table scan on InnoDB tables, if we can read the used + parts of the row from any of the used index. + This is because table scans uses index and we would not win + anything by using a table scan. + + A word for word translation of the below if-statement in sergefp's + understanding: we check if we should use table scan if: + (1) The found 'ref' access produces more records than a table scan + (or index scan, or quick select), or 'ref' is more expensive than + any of them. + (2) This doesn't hold: the best way to perform table scan is to to perform + 'range' access using index IDX, and the best way to perform 'ref' + access is to use the same index IDX, with the same or more key parts. + (note: it is not clear how this rule is/should be extended to + index_merge quick selects). Also if we have a hash join we prefer that + over a table scan. This heuristic doesn't apply if the quick select + uses the group-by min-max optimization. + (3) See above note about InnoDB. + (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access + path, but there is no quick select) + If the condition in the above brackets holds, then the only possible + "table scan" access method is ALL/index (there is no quick select). + Since we have a 'ref' access path, and FORCE INDEX instructs us to + choose it over ALL/index, there is no need to consider a full table + scan. + (5) Non-flattenable semi-joins: don't consider doing a scan of temporary + table if we had an option to make lookups into it. In real-world cases, + lookups are cheaper than full scans, but when the table is small, they + can be [considered to be] more expensive, which causes lookups not to + be used for cases with small datasets, which is annoying. + */ + Json_writer_object trace_access_scan(thd); + if ((records >= s->found_records || best > s->read_time) && // (1) + !(best_key && best_key->key == MAX_KEY) && // (2) + !(s->quick && + s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) + best_key && s->quick->index == best_key->key && // (2) + s->table->opt_range_keys.is_set(best_key->key) && // (2) + best_max_key_part >= s->table->opt_range[best_key->key].key_parts) &&// (2) + !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) + ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) + !(s->table->force_index && best_key && !s->quick) && // (4) + !(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5) + { // Check full join + bool force_estimate= 0; + double rnd_records= apply_selectivity_for_table(s, + use_cond_selectivity, + &force_estimate); + rnd_records= ((found_constraint && !force_estimate) ? + use_found_constraint(rnd_records) : + rnd_records); + /* + Range optimizer never proposes a RANGE if it isn't better + than FULL: so if RANGE is present, it's always preferred to FULL. + Here we estimate its cost. + */ + + filter= 0; + if (s->quick) + { + /* + For each record we: + - read record range through 'quick' + - skip rows which does not satisfy WHERE constraints + TODO: + We take into account possible use of join cache for ALL/index + access (see first else-branch below), but we don't take it into + account here for range/index_merge access. Find out why this is so. + */ + double cmp_time= (s->found_records - rnd_records) / TIME_FOR_COMPARE; + tmp= COST_MULT(record_count, + COST_ADD(s->quick->read_time, cmp_time)); + + if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + { + double rows= record_count * s->found_records; + uint key_no= s->quick->index; + + /* See the comment concerning using rowid filter for with ref access */ + double row_access_cost= s->quick->read_time * record_count; + double key_access_cost= + MY_MIN(row_access_cost, + s->table->opt_range[key_no].index_only_cost * record_count); + double access_cost_factor= MY_MIN((row_access_cost - key_access_cost) / + rows, 1.0); + filter= + s->table->best_range_rowid_filter_for_partial_join(key_no, rows, + access_cost_factor); + if (filter) + { + tmp-= filter->get_adjusted_gain(rows); + DBUG_ASSERT(tmp >= 0); + } + + type= JT_RANGE; + } + else + { + type= JT_INDEX_MERGE; + best_filter= 0; + } + loose_scan_opt.check_range_access(join, idx, s->quick); + } + else + { + /* Estimate cost of reading table. */ + if (s->table->force_index && !best_key) // index scan + { + type= JT_NEXT; + tmp= s->table->file->read_time(s->ref.key, 1, s->records); + } + else // table scan + { + tmp= s->scan_time(); + type= JT_ALL; + } + + if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache + { + /* + For each record we have to: + - read the whole table record + - skip rows which does not satisfy join condition + */ + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_MULT(record_count, COST_ADD(tmp,cmp_time)); + } + else + { + double refills= (1.0 + floor((double) cache_record_length(join,idx) * + (record_count / + (double) thd->variables.join_buff_size))); + tmp= COST_MULT(tmp, refills); + /* + We don't make full cartesian product between rows in the scanned + table and existing records because we skip all rows from the + scanned table, which does not satisfy join condition when + we read the table (see flush_cached_records for details). Here we + take into account cost to read and skip these records. + */ + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); + } + } + + trace_access_scan.add("access_type", type == JT_ALL ? + "scan" : + join_type_str[type]); + /* Splitting technique cannot be used with join cache */ + if (s->table->is_splittable()) + tmp+= s->table->get_materialization_cost(); + else + tmp+= s->startup_cost; + + /* + We estimate the cost of evaluating WHERE clause for found records + as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus + tmp give us total cost of using TABLE SCAN + */ + + const double best_filter_cmp_gain= best_filter + ? best_filter->get_cmp_gain(record_count * records) + : 0; + trace_access_scan.add("resulting_rows", rnd_records); + trace_access_scan.add("cost", tmp); + + if (best == DBL_MAX || + COST_ADD(tmp, record_count/TIME_FOR_COMPARE*rnd_records) < + (best_key->is_for_hash_join() ? best_time : + COST_ADD(best - best_filter_cmp_gain, + record_count/TIME_FOR_COMPARE*records))) + { + /* + If the table has a range (s->quick is set) make_join_select() + will ensure that this will be used + */ + best= tmp; + records= rnd_records; + best_key= 0; + best_filter= 0; + if (s->quick && s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + best_filter= filter; + /* range/index_merge/ALL/index access method are "independent", so: */ + best_ref_depends_map= 0; + best_uses_jbuf= MY_TEST(!disable_jbuf && + (join->allowed_outer_join_with_cache || + !(s->table->map & join->outer_join))); + spl_plan= 0; + best_type= type; + } + trace_access_scan.add("chosen", best_key == NULL); + } + else + { + trace_access_scan.add("type", "scan"); + trace_access_scan.add("chosen", false); + trace_access_scan.add("cause", "cost"); + } + + /* Update the cost information for the current partial plan */ + pos->records_read= records; + pos->read_time= best; + pos->key= best_key; + pos->type= best_type; + pos->table= s; + pos->ref_depend_map= best_ref_depends_map; + pos->loosescan_picker.loosescan_key= MAX_KEY; + pos->use_join_buffer= best_uses_jbuf; + pos->spl_plan= spl_plan; + pos->spl_pd_boundary= !spl_plan ? 0 : spl_pd_boundary; + pos->range_rowid_filter_info= best_filter; + pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 : + key_dependent & remaining_tables); + + loose_scan_opt.save_to_position(s, loose_scan_pos); + + if (!best_key && + idx == join->const_tables && + s->table == join->sort_by_table && + join->unit->lim.get_select_limit() >= records) + { + trace_access_scan.add("use_tmp_table", true); + join->sort_by_table= (TABLE*) 1; // Must use temporary table + } + trace_access_scan.end(); + trace_paths.end(); + + if (unlikely(thd->trace_started())) + print_best_access_for_table(thd, pos, best_type); + + DBUG_VOID_RETURN; +} + + +/* + Find JOIN_TAB's embedding (i.e, parent) subquery. + - For merged semi-joins, tables inside the semi-join nest have their + semi-join nest as parent. We intentionally ignore results of table + pullout action here. + - For non-merged semi-joins (JTBM tabs), the embedding subquery is the + JTBM join tab itself. +*/ + +static TABLE_LIST* get_emb_subq(JOIN_TAB *tab) +{ + TABLE_LIST *tlist= tab->table->pos_in_table_list; + if (tlist->jtbm_subselect) + return tlist; + TABLE_LIST *embedding= tlist->embedding; + if (!embedding || !embedding->sj_subq_pred) + return NULL; + return embedding; +} + + +/* + Choose initial table order that "helps" semi-join optimizations. + + The idea is that we should start with the order that is the same as the one + we would have had if we had semijoin=off: + - Top-level tables go first + - subquery tables are grouped together by the subquery they are in, + - subquery tables are attached where the subquery predicate would have been + attached if we had semi-join off. + + This function relies on join_tab_cmp()/join_tab_cmp_straight() to produce + certain pre-liminary ordering, see compare_embedding_subqueries() for its + description. +*/ + +static void choose_initial_table_order(JOIN *join) +{ + TABLE_LIST *emb_subq; + JOIN_TAB **tab= join->best_ref + join->const_tables; + JOIN_TAB **tabs_end= tab + join->table_count - join->const_tables; + DBUG_ENTER("choose_initial_table_order"); + /* Find where the top-level JOIN_TABs end and subquery JOIN_TABs start */ + for (; tab != tabs_end; tab++) + { + if ((emb_subq= get_emb_subq(*tab))) + break; + } + uint n_subquery_tabs= (uint)(tabs_end - tab); + + if (!n_subquery_tabs) + DBUG_VOID_RETURN; + + /* Copy the subquery JOIN_TABs to a separate array */ + JOIN_TAB *subquery_tabs[MAX_TABLES]; + memcpy(subquery_tabs, tab, sizeof(JOIN_TAB*) * n_subquery_tabs); + + JOIN_TAB **last_top_level_tab= tab; + JOIN_TAB **subq_tab= subquery_tabs; + JOIN_TAB **subq_tabs_end= subquery_tabs + n_subquery_tabs; + TABLE_LIST *cur_subq_nest= NULL; + for (; subq_tab < subq_tabs_end; subq_tab++) + { + if (get_emb_subq(*subq_tab)!= cur_subq_nest) + { + /* + Reached the part of subquery_tabs that covers tables in some subquery. + */ + cur_subq_nest= get_emb_subq(*subq_tab); + + /* Determine how many tables the subquery has */ + JOIN_TAB **last_tab_for_subq; + for (last_tab_for_subq= subq_tab; + last_tab_for_subq < subq_tabs_end && + get_emb_subq(*last_tab_for_subq) == cur_subq_nest; + last_tab_for_subq++) {} + uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab); + + /* + Walk the original array and find where this subquery would have been + attached to + */ + table_map need_tables= cur_subq_nest->original_subq_pred_used_tables; + need_tables &= ~(join->const_table_map | PSEUDO_TABLE_BITS); + for (JOIN_TAB **top_level_tab= join->best_ref + join->const_tables; + top_level_tab < last_top_level_tab; + //top_level_tab < join->best_ref + join->table_count; + top_level_tab++) + { + need_tables &= ~(*top_level_tab)->table->map; + /* Check if this is the place where subquery should be attached */ + if (!need_tables) + { + /* Move away the top-level tables that are after top_level_tab */ + size_t top_tail_len= last_top_level_tab - top_level_tab - 1; + memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1, + sizeof(JOIN_TAB*)*top_tail_len); + last_top_level_tab += n_subquery_tables; + memcpy(top_level_tab + 1, subq_tab, sizeof(JOIN_TAB*)*n_subquery_tables); + break; + } + } + DBUG_ASSERT(!need_tables); + subq_tab += n_subquery_tables - 1; + } + } + DBUG_VOID_RETURN; +} + + +/** + Selects and invokes a search strategy for an optimal query plan. + + The function checks user-configurable parameters that control the search + strategy for an optimal plan, selects the search method and then invokes + it. Each specific optimization procedure stores the final optimal plan in + the array 'join->best_positions', and the cost of the plan in + 'join->best_read'. + + @param join pointer to the structure providing all context info for + the query + @param join_tables set of the tables in the query + + @retval + FALSE ok + @retval + TRUE Fatal error +*/ + +bool +choose_plan(JOIN *join, table_map join_tables) +{ + uint search_depth= join->thd->variables.optimizer_search_depth; + uint use_cond_selectivity= + join->thd->variables.optimizer_use_condition_selectivity; + bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN); + THD *thd= join->thd; + DBUG_ENTER("choose_plan"); + + join->cur_embedding_map= 0; + join->extra_heuristic_pruning= false; + join->prune_level= join->thd->variables.optimizer_prune_level; + + reset_nj_counters(join, join->join_list); + qsort2_cmp jtab_sort_func; + + if (join->emb_sjm_nest) + { + /* We're optimizing semi-join materialization nest, so put the + tables from this semi-join as first + */ + jtab_sort_func= join_tab_cmp_embedded_first; + } + else + { + /* + if (SELECT_STRAIGHT_JOIN option is set) + reorder tables so dependent tables come after tables they depend + on, otherwise keep tables in the order they were specified in the query + else + Apply heuristic: pre-sort all access plans with respect to the number of + records accessed. + */ + jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp; + } + + /* + psergey-todo: if we're not optimizing an SJM nest, + - sort that outer tables are first, and each sjm nest follows + - then, put each [sjm_table1, ... sjm_tableN] sub-array right where + WHERE clause pushdown would have put it. + */ + my_qsort2(join->best_ref + join->const_tables, + join->table_count - join->const_tables, sizeof(JOIN_TAB*), + jtab_sort_func, (void*)join->emb_sjm_nest); + + Json_writer_object wrapper(thd); + Json_writer_array trace_plan(thd,"considered_execution_plans"); + + if (!join->emb_sjm_nest) + { + choose_initial_table_order(join); + } + /* + Note: constant tables are already in the join prefix. We don't + put them into the cur_sj_inner_tables, though. + */ + join->cur_sj_inner_tables= 0; + + if (straight_join) + { + optimize_straight_join(join, join_tables); + } + else + { + DBUG_ASSERT(search_depth <= MAX_TABLES + 1); + if (search_depth == 0) + /* Automatically determine a reasonable value for 'search_depth' */ + search_depth= determine_search_depth(join); + + if (join->prune_level >= 1 && + search_depth >= thd->variables.optimizer_extra_pruning_depth) + { + join->extra_heuristic_pruning= true; + } + + if (greedy_search(join, join_tables, search_depth, use_cond_selectivity)) + DBUG_RETURN(TRUE); + } + + /* + Store the cost of this query into a user variable + Don't update last_query_cost for statements that are not "flat joins" : + i.e. they have subqueries, unions or call stored procedures. + TODO: calculate a correct cost for a query with subqueries and UNIONs. + */ + if (join->thd->lex->is_single_level_stmt()) + join->thd->status_var.last_query_cost= join->best_read; + DBUG_RETURN(FALSE); +} + + +/* + Compare two join tabs based on the subqueries they are from. + - top-level join tabs go first + - then subqueries are ordered by their select_id (we're using this + criteria because we need a cross-platform, deterministic ordering) + + @return + 0 - equal + -1 - jt1 < jt2 + 1 - jt1 > jt2 +*/ + +static int compare_embedding_subqueries(JOIN_TAB *jt1, JOIN_TAB *jt2) +{ + /* Determine if the first table is originally from a subquery */ + TABLE_LIST *tbl1= jt1->table->pos_in_table_list; + uint tbl1_select_no; + if (tbl1->jtbm_subselect) + { + tbl1_select_no= + tbl1->jtbm_subselect->unit->first_select()->select_number; + } + else if (tbl1->embedding && tbl1->embedding->sj_subq_pred) + { + tbl1_select_no= + tbl1->embedding->sj_subq_pred->unit->first_select()->select_number; + } + else + tbl1_select_no= 1; /* Top-level */ + + /* Same for the second table */ + TABLE_LIST *tbl2= jt2->table->pos_in_table_list; + uint tbl2_select_no; + if (tbl2->jtbm_subselect) + { + tbl2_select_no= + tbl2->jtbm_subselect->unit->first_select()->select_number; + } + else if (tbl2->embedding && tbl2->embedding->sj_subq_pred) + { + tbl2_select_no= + tbl2->embedding->sj_subq_pred->unit->first_select()->select_number; + } + else + tbl2_select_no= 1; /* Top-level */ + + /* + Put top-level tables in front. Tables from within subqueries must follow, + grouped by their owner subquery. We don't care about the order that + subquery groups are in, because choose_initial_table_order() will re-order + the groups. + */ + if (tbl1_select_no != tbl2_select_no) + return tbl1_select_no > tbl2_select_no ? 1 : -1; + return 0; +} + + +/** + Compare two JOIN_TAB objects based on the number of accessed records. + + @param ptr1 pointer to first JOIN_TAB object + @param ptr2 pointer to second JOIN_TAB object + + NOTES + The order relation implemented by join_tab_cmp() is not transitive, + i.e. it is possible to choose such a, b and c that (a < b) && (b < c) + but (c < a). This implies that result of a sort using the relation + implemented by join_tab_cmp() depends on the order in which + elements are compared, i.e. the result is implementation-specific. + Example: + a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0 + b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838 + c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0 + + As for subqueries, this function must produce order that can be fed to + choose_initial_table_order(). + + @retval + 1 if first is bigger + @retval + -1 if second is bigger + @retval + 0 if equal +*/ + +static int +join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2) +{ + JOIN_TAB *jt1= *(JOIN_TAB**) ptr1; + JOIN_TAB *jt2= *(JOIN_TAB**) ptr2; + int cmp; + + if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0) + return cmp; + /* + After that do ordering according to numbers of + records in the table. + */ + if (jt1->found_records > jt2->found_records) + return 1; + if (jt1->found_records < jt2->found_records) + return -1; + return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0); +} + + +/** + Same as join_tab_cmp, but for use with SELECT_STRAIGHT_JOIN. +*/ + +static int +join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2) +{ + JOIN_TAB *jt1= *(JOIN_TAB**) ptr1; + JOIN_TAB *jt2= *(JOIN_TAB**) ptr2; + + /* + We don't do subquery flattening if the parent or child select has + STRAIGHT_JOIN modifier. It is complicated to implement and the semantics + is hardly useful. + */ + DBUG_ASSERT(!jt1->emb_sj_nest); + DBUG_ASSERT(!jt2->emb_sj_nest); + + int cmp; + if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0) + return cmp; + + /* + We have to check dependency with straight_join as we don't reorder + later as we do for other plans in best_extension_by_limited_search(). + */ + if (jt1->dependent & jt2->table->map) + return 1; + if (jt2->dependent & jt1->table->map) + return -1; + + return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0); +} + + +/* + Same as join_tab_cmp but tables from within the given semi-join nest go + first. Used when the optimizing semi-join materialization nests. +*/ + +static int +join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void* ptr2) +{ + const TABLE_LIST *emb_nest= (TABLE_LIST*) emb; + JOIN_TAB *jt1= *(JOIN_TAB**) ptr1; + JOIN_TAB *jt2= *(JOIN_TAB**) ptr2; + + if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest) + return -1; + if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest) + return 1; + + if (jt1->found_records > jt2->found_records) + return 1; + if (jt1->found_records < jt2->found_records) + return -1; + + return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0); +} + + +/** + Heuristic procedure to automatically guess a reasonable degree of + exhaustiveness for the greedy search procedure. + + The procedure estimates the optimization time and selects a search depth + big enough to result in a near-optimal QEP, that doesn't take too long to + find. If the number of tables in the query exceeds some constant, then + search_depth is set to this constant. + + @param join pointer to the structure providing all context info for + the query + + @note + This is an extremely simplistic implementation that serves as a stub for a + more advanced analysis of the join. Ideally the search depth should be + determined by learning from previous query optimizations, because it will + depend on the CPU power (and other factors). + + @todo + this value should be determined dynamically, based on statistics: + uint max_tables_for_exhaustive_opt= 7; + + @todo + this value could be determined by some mapping of the form: + depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE] + + @return + A positive integer that specifies the search depth (and thus the + exhaustiveness) of the depth-first search algorithm used by + 'greedy_search'. +*/ + +static uint +determine_search_depth(JOIN *join) +{ + uint table_count= join->table_count - join->const_tables; + uint search_depth; + /* TODO: this value should be determined dynamically, based on statistics: */ + uint max_tables_for_exhaustive_opt= 7; + + if (table_count <= max_tables_for_exhaustive_opt) + search_depth= table_count+1; // use exhaustive for small number of tables + else + /* + TODO: this value could be determined by some mapping of the form: + depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE] + */ + search_depth= max_tables_for_exhaustive_opt; // use greedy search + + return search_depth; +} + + +/** + Select the best ways to access the tables in a query without reordering them. + + Find the best access paths for each query table and compute their costs + according to their order in the array 'join->best_ref' (thus without + reordering the join tables). The function calls sequentially + 'best_access_path' for each table in the query to select the best table + access method. The final optimal plan is stored in the array + 'join->best_positions', and the corresponding cost in 'join->best_read'. + + @param join pointer to the structure providing all context info + for the query + @param remaining_tables set of the tables in the query + + @note + This function can be applied to: + - queries with STRAIGHT_JOIN + - internally to compute the cost of an arbitrary QEP + @par + Thus 'optimize_straight_join' can be used at any stage of the query + optimization process to finalize a QEP as it is. +*/ + +static void +optimize_straight_join(JOIN *join, table_map remaining_tables) +{ + JOIN_TAB *s; + uint idx= join->const_tables; + bool disable_jbuf= join->thd->variables.join_cache_level == 0; + double record_count= 1.0; + double read_time= 0.0; + uint use_cond_selectivity= + join->thd->variables.optimizer_use_condition_selectivity; + POSITION loose_scan_pos; + THD *thd= join->thd; + + for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++) + { + POSITION *position= join->positions + idx; + Json_writer_object trace_one_table(thd); + if (unlikely(thd->trace_started())) + trace_plan_prefix(join, idx, remaining_tables); + /* Find the best access method from 's' to the current partial plan */ + best_access_path(join, s, remaining_tables, join->positions, idx, + disable_jbuf, record_count, + position, &loose_scan_pos); + + /* Compute the cost of the new plan extended with 's' */ + record_count= COST_MULT(record_count, position->records_read); + const double filter_cmp_gain= position->range_rowid_filter_info + ? position->range_rowid_filter_info->get_cmp_gain(record_count) + : 0; + read_time= COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + record_count / + TIME_FOR_COMPARE)); + optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time, + &loose_scan_pos); + + remaining_tables&= ~(s->table->map); + double pushdown_cond_selectivity= 1.0; + if (use_cond_selectivity > 1) + pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, + remaining_tables); + position->cond_selectivity= pushdown_cond_selectivity; + double partial_join_cardinality= record_count * + pushdown_cond_selectivity; + join->positions[idx].partial_join_cardinality= partial_join_cardinality; + ++idx; + } + + if (join->sort_by_table && + join->sort_by_table != join->positions[join->const_tables].table->table) + read_time+= record_count; // We have to make a temp table + memcpy((uchar*) join->best_positions, (uchar*) join->positions, + sizeof(POSITION)*idx); + join->join_record_count= record_count; + join->best_read= read_time - COST_EPS; +} + + +/** + Find a good, possibly optimal, query execution plan (QEP) by a greedy search. + + The search procedure uses a hybrid greedy/exhaustive search with controlled + exhaustiveness. The search is performed in N = card(remaining_tables) + steps. Each step evaluates how promising is each of the unoptimized tables, + selects the most promising table, and extends the current partial QEP with + that table. Currenly the most 'promising' table is the one with least + expensive extension.\ + + There are two extreme cases: + -# When (card(remaining_tables) < search_depth), the estimate finds the + best complete continuation of the partial QEP. This continuation can be + used directly as a result of the search. + -# When (search_depth == 1) the 'best_extension_by_limited_search' + consideres the extension of the current QEP with each of the remaining + unoptimized tables. + + All other cases are in-between these two extremes. Thus the parameter + 'search_depth' controlls the exhaustiveness of the search. The higher the + value, the longer the optimization time and possibly the better the + resulting plan. The lower the value, the fewer alternative plans are + estimated, but the more likely to get a bad QEP. + + All intermediate and final results of the procedure are stored in 'join': + - join->positions : modified for every partial QEP that is explored + - join->best_positions: modified for the current best complete QEP + - join->best_read : modified for the current best complete QEP + - join->best_ref : might be partially reordered + + The final optimal plan is stored in 'join->best_positions', and its + corresponding cost in 'join->best_read'. + + @note + The following pseudocode describes the algorithm of 'greedy_search': + + @code + procedure greedy_search + input: remaining_tables + output: pplan; + { + pplan = <>; + do { + (t, a) = best_extension(pplan, remaining_tables); + pplan = concat(pplan, (t, a)); + remaining_tables = remaining_tables - t; + } while (remaining_tables != {}) + return pplan; + } + + @endcode + where 'best_extension' is a placeholder for a procedure that selects the + most "promising" of all tables in 'remaining_tables'. + Currently this estimate is performed by calling + 'best_extension_by_limited_search' to evaluate all extensions of the + current QEP of size 'search_depth', thus the complexity of 'greedy_search' + mainly depends on that of 'best_extension_by_limited_search'. + + @par + If 'best_extension()' == 'best_extension_by_limited_search()', then the + worst-case complexity of this algorithm is <= + O(N*N^search_depth/search_depth). When serch_depth >= N, then the + complexity of greedy_search is O(N!). + + @par + In the future, 'greedy_search' might be extended to support other + implementations of 'best_extension', e.g. some simpler quadratic procedure. + + @param join pointer to the structure providing all context info + for the query + @param remaining_tables set of tables not included into the partial plan yet + @param search_depth controlls the exhaustiveness of the search + @param use_cond_selectivity specifies how the selectivity of the conditions + pushed to a table should be taken into account + + @retval + FALSE ok + @retval + TRUE Fatal error +*/ + +static bool +greedy_search(JOIN *join, + table_map remaining_tables, + uint search_depth, + uint use_cond_selectivity) +{ + double record_count= 1.0; + double read_time= 0.0; + uint idx= join->const_tables; // index into 'join->best_ref' + uint best_idx; + uint size_remain; // cardinality of remaining_tables + table_map usable_tables, eq_ref_tables; + POSITION best_pos; + JOIN_TAB *best_table; // the next plan node to be added to the curr QEP + // ==join->tables or # tables in the sj-mat nest we're optimizing + uint n_tables __attribute__((unused)); + DBUG_ENTER("greedy_search"); + + /* number of tables that remain to be optimized */ + usable_tables= (join->emb_sjm_nest ? + (join->emb_sjm_nest->sj_inner_tables & + ~join->const_table_map & remaining_tables): + remaining_tables); + n_tables= size_remain= my_count_bits(usable_tables); + + join->next_sort_position= join->sort_positions; + do { + /* + Find the extension of the current QEP with the lowest cost + We are using remaining_table instead of usable tables here as + in case of an emb_sjm_nest, we want to be able to check if + an embedded table is depending on an outer table. + */ + join->best_read= DBL_MAX; + if ((int) best_extension_by_limited_search(join, remaining_tables, idx, + record_count, + read_time, search_depth, + use_cond_selectivity, + &eq_ref_tables) < + (int) SEARCH_OK) + DBUG_RETURN(TRUE); + /* + 'best_read < DBL_MAX' means that optimizer managed to find + some plan and updated 'best_positions' array accordingly. + */ + DBUG_ASSERT(join->best_read < DBL_MAX); + + if (size_remain <= search_depth) + { + /* + 'join->best_positions' contains a complete optimal extension of the + current partial QEP. + */ + DBUG_EXECUTE("opt", print_plan(join, n_tables, + record_count, read_time, read_time, + "optimal");); + DBUG_RETURN(FALSE); + } + + /* select the first table in the optimal extension as most promising */ + best_pos= join->best_positions[idx]; + best_table= best_pos.table; + /* + Each subsequent loop of 'best_extension_by_limited_search' uses + 'join->positions' for cost estimates, therefore we have to update its + value. + */ + join->positions[idx]= best_pos; + + /* + Update the interleaving state after extending the current partial plan + with a new table. + We are doing this here because best_extension_by_limited_search reverts + the interleaving state to the one of the non-extended partial plan + on exit. + */ + bool is_interleave_error __attribute__((unused))= + check_interleaving_with_nj (best_table); + /* This has been already checked by best_extension_by_limited_search */ + DBUG_ASSERT(!is_interleave_error); + + /* + Also, update the semi-join optimization state. Information about the + picked semi-join operation is in best_pos->...picker, but we need to + update the global state in the JOIN object, too. + */ + if (!join->emb_sjm_nest) + update_sj_state(join, best_table, idx, remaining_tables); + + /* find the position of 'best_table' in 'join->best_ref' */ + best_idx= idx; + JOIN_TAB *pos= join->best_ref[best_idx]; + while (pos && best_table != pos) + pos= join->best_ref[++best_idx]; + DBUG_ASSERT((pos != NULL)); // should always find 'best_table' + + /* + Move 'best_table' at the first free position in the array of joins + We don't need to keep the array sorted as + best_extension_by_limited_search() will sort them. + */ + swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]); + + /* compute the cost of the new plan extended with 'best_table' */ + record_count= COST_MULT(record_count, join->positions[idx].records_read); + read_time= COST_ADD(read_time, + COST_ADD(join->positions[idx].read_time, + record_count / TIME_FOR_COMPARE)); + + remaining_tables&= ~(best_table->table->map); + --size_remain; + ++idx; + + DBUG_EXECUTE("opt", print_plan(join, idx, + record_count, read_time, read_time, + "extended");); + } while (TRUE); +} + + +/** + Get cost of execution and fanout produced by selected tables in the join + prefix (where prefix is defined as prefix in depth-first traversal) + + @param end_tab_idx The number of last tab to be taken into + account (in depth-first traversal prefix) + @param filter_map Bitmap of tables whose cost/fanout are to + be taken into account. + @param read_time_arg [out] store read time here + @param record_count_arg [out] store record count here + + @note + + @returns + read_time_arg and record_count_arg contain the computed cost and fanout +*/ + +void JOIN::get_partial_cost_and_fanout(int end_tab_idx, + table_map filter_map, + double *read_time_arg, + double *record_count_arg) +{ + double record_count= 1; + double read_time= 0.0; + double sj_inner_fanout= 1.0; + JOIN_TAB *end_tab= NULL; + JOIN_TAB *tab; + int i; + int last_sj_table= MAX_TABLES; + + /* + Handle a special case where the join is degenerate, and produces no + records + */ + if (table_count == const_tables) + { + *read_time_arg= 0.0; + /* + We return 1, because + - it is the pessimistic estimate (there might be grouping) + - it's safer, as we're less likely to hit the edge cases in + calculations. + */ + *record_count_arg=1.0; + return; + } + + for (tab= first_depth_first_tab(this), i= const_tables; + tab; + tab= next_depth_first_tab(this, tab), i++) + { + end_tab= tab; + if (i == end_tab_idx) + break; + } + + for (tab= first_depth_first_tab(this), i= const_tables; + ; + tab= next_depth_first_tab(this, tab), i++) + { + if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab) + { + /* + We've entered the SJM nest that contains the end_tab. The caller is + - interested in fanout inside the nest (because that's how many times + we'll invoke the attached WHERE conditions) + - not interested in cost + */ + record_count= 1.0; + read_time= 0.0; + } + + /* + Ignore fanout (but not cost) from sj-inner tables, as long as + the range that processes them finishes before the end_tab + */ + if (tab->sj_strategy != SJ_OPT_NONE) + { + sj_inner_fanout= 1.0; + last_sj_table= i + tab->n_sj_tables; + } + + table_map cur_table_map; + if (tab->table) + cur_table_map= tab->table->map; + else + { + /* This is a SJ-Materialization nest. Check all of its tables */ + TABLE *first_child= tab->bush_children->start->table; + TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding; + cur_table_map= sjm_nest->nested_join->used_tables; + } + if (tab->records_read && (cur_table_map & filter_map)) + { + record_count= COST_MULT(record_count, tab->records_read); + read_time= COST_ADD(read_time, + COST_ADD(tab->read_time, + record_count / TIME_FOR_COMPARE)); + if (tab->emb_sj_nest) + sj_inner_fanout= COST_MULT(sj_inner_fanout, tab->records_read); + } + + if (i == last_sj_table) + { + record_count /= sj_inner_fanout; + sj_inner_fanout= 1.0; + last_sj_table= MAX_TABLES; + } + + if (tab == end_tab) + break; + } + *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE; + *record_count_arg= record_count; +} + + +/* + Get prefix cost and fanout. This function is different from + get_partial_cost_and_fanout: + - it operates on a JOIN that haven't yet finished its optimization phase (in + particular, fix_semijoin_strategies_for_picked_join_order() and + get_best_combination() haven't been called) + - it assumes the the join prefix doesn't have any semi-join plans + + These assumptions are met by the caller of the function. +*/ + +void JOIN::get_prefix_cost_and_fanout(uint n_tables, + double *read_time_arg, + double *record_count_arg) +{ + double record_count= 1; + double read_time= 0.0; + for (uint i= const_tables; i < n_tables + const_tables ; i++) + { + if (best_positions[i].records_read) + { + record_count= COST_MULT(record_count, best_positions[i].records_read); + read_time= COST_ADD(read_time, best_positions[i].read_time); + } + /* TODO: Take into account condition selectivities here */ + } + *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE; + *record_count_arg= record_count; +} + + +/** + Estimate the number of rows that query execution will read. + + @todo This is a very pessimistic upper bound. Use join selectivity + when available to produce a more realistic number. +*/ + +double JOIN::get_examined_rows() +{ + double examined_rows; + double prev_fanout= 1; + double records; + JOIN_TAB *tab= first_breadth_first_tab(); + JOIN_TAB *prev_tab= tab; + + records= (double)tab->get_examined_rows(); + + while ((tab= next_breadth_first_tab(first_breadth_first_tab(), + top_join_tab_count, tab))) + { + prev_fanout= COST_MULT(prev_fanout, prev_tab->records_read); + records= + COST_ADD(records, + COST_MULT((double) (tab->get_examined_rows()), prev_fanout)); + prev_tab= tab; + } + examined_rows= (double) + (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records); + return examined_rows; +} + + +/** + @brief + Get the selectivity of equalities between columns when joining a table + + @param join The optimized join + @param idx The number of tables in the evaluated partual join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + @param keyparts The number of key parts to used when joining s + @param ref_keyuse_steps Array of references to keyuses employed to join s +*/ + +static +double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables, uint keyparts, + uint16 *ref_keyuse_steps) +{ + double sel= 1.0; + COND_EQUAL *cond_equal= join->cond_equal; + + if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse) + return sel; + + Item_equal *item_equal; + List_iterator_fast it(cond_equal->current_level); + TABLE *table= s->table; + table_map table_bit= table->map; + POSITION *pos= &join->positions[idx]; + + while ((item_equal= it++)) + { + /* + Check whether we need to take into account the selectivity of + multiple equality item_equal. If this is the case multiply + the current value of sel by this selectivity + */ + table_map used_tables= item_equal->used_tables(); + if (!(used_tables & table_bit)) + continue; + if (item_equal->get_const()) + continue; + + bool adjust_sel= FALSE; + Item_equal_fields_iterator fi(*item_equal); + while((fi++) && !adjust_sel) + { + Field *fld= fi.get_curr_field(); + if (fld->table->map != table_bit) + continue; + if (pos->key == 0) + adjust_sel= TRUE; + else + { + uint i; + KEYUSE *keyuse= pos->key; + uint key= keyuse->key; + for (i= 0; i < keyparts; i++) + { + if (i > 0) + keyuse+= ref_keyuse_steps[i-1]; + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[i].fieldnr - 1; + if (fld->field_index == fldno) + break; + } + keyuse= pos->key; + + if (i == keyparts) + { + /* + Field fld is included in multiple equality item_equal + and is not a part of the ref key. + The selectivity of the multiple equality must be taken + into account unless one of the ref arguments is + equal to fld. + */ + adjust_sel= TRUE; + for (uint j= 0; j < keyparts && adjust_sel; j++) + { + if (j > 0) + keyuse+= ref_keyuse_steps[j-1]; + Item *ref_item= keyuse->val; + if (ref_item->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *field_item= (Item_field *) (ref_item->real_item()); + if (item_equal->contains(field_item->field)) + adjust_sel= FALSE; + } + } + } + } + } + if (adjust_sel) + { + /* + If ref == 0 and there are no fields in the multiple equality + item_equal that belong to the tables joined prior to s + then the selectivity of multiple equality will be set to 1.0. + */ + double eq_fld_sel= 1.0; + fi.rewind(); + while ((fi++)) + { + double curr_eq_fld_sel; + Field *fld= fi.get_curr_field(); + if (!(fld->table->map & ~(table_bit | rem_tables))) + continue; + curr_eq_fld_sel= get_column_avg_frequency(fld) / + fld->table->stat_records(); + if (curr_eq_fld_sel < 1.0) + set_if_bigger(eq_fld_sel, curr_eq_fld_sel); + } + sel*= eq_fld_sel; + } + } + return sel; +} + + +/** + @brief + Get the selectivity of conditions when joining a table + + @param join The optimized join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + + @detail + Get selectivity of conditions that can be applied when joining this table + with previous tables. + + For quick selects and full table scans, selectivity of COND(this_table) + is accounted for in apply_selectivity_for_table(). Here, we only count + selectivity of COND(this_table, previous_tables). + + For other access methods, we need to calculate selectivity of the whole + condition, "COND(this_table) AND COND(this_table, previous_tables)". + + @retval + selectivity of the conditions imposed on the rows of s +*/ + +static +double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables) +{ + uint16 ref_keyuse_steps_buf[MAX_REF_PARTS]; + uint ref_keyuse_size= MAX_REF_PARTS; + uint16 *ref_keyuse_steps= ref_keyuse_steps_buf; + Field *field; + TABLE *table= s->table; + MY_BITMAP *read_set= table->read_set; + double sel= s->table->cond_selectivity; + POSITION *pos= &join->positions[idx]; + uint keyparts= 0; + uint found_part_ref_or_null= 0; + + if (pos->key != 0) + { + /* + A ref access or hash join is used for this table. ref access is created + from + + tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ... + + and it will only return rows for which this condition is satisified. + Suppose, certain expr{i} is a constant. Since ref access only returns + rows that satisfy + + tbl.keypart{i}=const (*) + + then selectivity of this equality should not be counted in return value + of this function. This function uses the value of + + table->cond_selectivity=selectivity(COND(tbl)) (**) + + as a starting point. This value includes selectivity of equality (*). We + should somehow discount it. + + Looking at calculate_cond_selectivity_for_table(), one can see that that + the value is not necessarily a direct multiplicand in + table->cond_selectivity + + There are three possible ways to discount + 1. There is a potential range access on t.keypart{i}=const. + (an important special case: the used ref access has a const prefix for + which a range estimate is available) + + 2. The field has a histogram. field[x]->cond_selectivity has the data. + + 3. Use index stats on this index: + rec_per_key[key_part+1]/rec_per_key[key_part] + + (TODO: more details about the "t.key=othertable.col" case) + */ + KEYUSE *keyuse= pos->key; + KEYUSE *prev_ref_keyuse= keyuse; + uint key= keyuse->key; + bool used_range_selectivity= false; + + /* + Check if we have a prefix of key=const that matches a quick select. + */ + if (!is_hash_join_key_no(key) && table->opt_range_keys.is_set(key)) + { + key_part_map quick_key_map= (key_part_map(1) << + table->opt_range[key].key_parts) - 1; + if (table->opt_range[key].rows && + !(quick_key_map & ~table->const_key_parts[key])) + { + /* + Ok, there is an equality for each of the key parts used by the + quick select. This means, quick select's estimate can be reused to + discount the selectivity of a prefix of a ref access. + */ + for (; quick_key_map & 1 ; quick_key_map>>= 1) + { + while (keyuse->table == table && keyuse->key == key && + keyuse->keypart == keyparts) + { + keyuse++; + } + keyparts++; + } + /* + Here we discount selectivity of the constant range CR. To calculate + this selectivity we use elements from the quick_rows[] array. + If we have indexes i1,...,ik with the same prefix compatible + with CR any of the estimate quick_rows[i1], ... quick_rows[ik] could + be used for this calculation but here we don't know which one was + actually used. So sel could be greater than 1 and we have to cap it. + However if sel becomes greater than 2 then with high probability + something went wrong. + */ + sel /= (double)table->opt_range[key].rows / (double) table->stat_records(); + set_if_smaller(sel, 1.0); + used_range_selectivity= true; + } + } + + /* + Go through the "keypart{N}=..." equalities and find those that were + already taken into account in table->cond_selectivity. + */ + keyuse= pos->key; + keyparts=0; + while (keyuse->table == table && keyuse->key == key) + { + if (!(keyuse->used_tables & (rem_tables | table->map))) + { + if (are_tables_local(s, keyuse->val->used_tables())) + { + if (is_hash_join_key_no(key)) + { + if (keyparts == keyuse->keypart) + keyparts++; + } + else + { + if (keyparts == keyuse->keypart && + !((keyuse->val->used_tables()) & ~pos->ref_depend_map) && + !(found_part_ref_or_null & keyuse->optimize)) + { + /* Found a KEYUSE object that will be used by ref access */ + keyparts++; + found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; + } + } + + if (keyparts > keyuse->keypart) + { + /* Ok this is the keyuse that will be used for ref access */ + if (!used_range_selectivity && keyuse->val->const_item()) + { + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; + + if (table->field[fldno]->cond_selectivity > 0) + { + sel /= table->field[fldno]->cond_selectivity; + set_if_smaller(sel, 1.0); + } + /* + TODO: we could do better here: + 1. cond_selectivity might be =1 (the default) because quick + select on some index prevented us from analyzing + histogram for this column. + 2. we could get an estimate through this? + rec_per_key[key_part-1] / rec_per_key[key_part] + */ + } + if (keyparts > 1) + { + /* + Prepare to set ref_keyuse_steps[keyparts-2]: resize the array + if it is not large enough + */ + if (keyparts - 2 >= ref_keyuse_size) + { + uint new_size= MY_MAX(ref_keyuse_size*2, keyparts); + void *new_buf; + if (!(new_buf= my_malloc(PSI_INSTRUMENT_ME, + sizeof(*ref_keyuse_steps)*new_size, + MYF(0)))) + { + sel= 1.0; // As if no selectivity was computed + goto exit; + } + memcpy(new_buf, ref_keyuse_steps, + sizeof(*ref_keyuse_steps)*ref_keyuse_size); + if (ref_keyuse_steps != ref_keyuse_steps_buf) + my_free(ref_keyuse_steps); + + ref_keyuse_steps= (uint16*)new_buf; + ref_keyuse_size= new_size; + } + + ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse); + prev_ref_keyuse= keyuse; + } + } + } + } + keyuse++; + } + } + else + { + /* + The table is accessed with full table scan, or quick select. + Selectivity of COND(table) is already accounted for in + apply_selectivity_for_table(). + */ + sel= 1; + } + + /* + If the field f from the table is equal to a field from one the + earlier joined tables then the selectivity of the range conditions + over the field f must be discounted. + + We need to discount selectivity only if we're using ref-based + access method (and have sel!=1). + If we use ALL/range/index_merge, then sel==1, and no need to discount. + */ + if (pos->key != NULL) + { + for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (!bitmap_is_set(read_set, field->field_index) || + !field->next_equal_field) + continue; + for (Field *next_field= field->next_equal_field; + next_field != field; + next_field= next_field->next_equal_field) + { + if (!(next_field->table->map & rem_tables) && next_field->table != table) + { + if (field->cond_selectivity > 0) + { + sel/= field->cond_selectivity; + set_if_smaller(sel, 1.0); + } + break; + } + } + } + } + + sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, + keyparts, ref_keyuse_steps); +exit: + if (ref_keyuse_steps != ref_keyuse_steps_buf) + my_free(ref_keyuse_steps); + return sel; +} + + +/* + Check if the table is an EQ_REF or similar table and there is no cost + to gain by moveing it to a later stage. + We call such a table a edge table (or hanging leaf) as it will read at + most one row and will not add to the number of row combinations in the join. +*/ + +static inline enum_best_search +check_if_edge_table(POSITION *pos, + double pushdown_cond_selectivity) +{ + + if ((pos->type == JT_EQ_REF || + (pos->type == JT_REF && + pos->records_read == 1 && + !pos->range_rowid_filter_info)) && + pushdown_cond_selectivity >= 0.999) + return SEARCH_FOUND_EDGE; + return SEARCH_OK; +} + + +struct SORT_POSITION +{ + JOIN_TAB **join_tab; + POSITION *position; +}; + + +/* + Sort SORT_POSITIONS according to expected number of rows found + If number of combinations are the same sort according to join_tab order + (same table order as used in the original SQL query) +*/ + +static int +sort_positions(SORT_POSITION *a, SORT_POSITION *b) +{ + int cmp; + if ((cmp= compare_embedding_subqueries(*a->join_tab, *b->join_tab)) != 0) + return cmp; + + if (a->position->records_read > b->position->records_read) + return 1; + if (a->position->records_read < b->position->records_read) + return -1; + return CMP_NUM(*a->join_tab, *b->join_tab); +} + + +/* + Call best_access_path() for a set of tables and collect results + + @param join JOIN object + @param trace_one_table Current optimizer_trace + @param pos Pointer to remanining tables + @param allowed_tables bitmap of allowed tables. On return set to + the collected tables. + @param store_poisition Points to where to store next found SORT_POSITION. + Will be updated to next free position. + @param stop_on_eq_ref Stop searching for more tables if we found an EQ_REF + table. + + @return + 0 Normal + 1 Eq_ref table found (only if stop_on_eq_ref is used) + + join->next_sort_position will be update to next free position. +*/ + +static bool +get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, + double record_count, + Json_writer_object *trace_one_table, + JOIN_TAB **pos, SORT_POSITION **store_position, + table_map *allowed_tables, + bool stop_on_eq_ref) +{ + THD *thd= join->thd; + POSITION *sort_position= join->next_sort_position; + SORT_POSITION *sort_end= *store_position; + JOIN_TAB *s; + table_map found_tables= 0; + bool found_eq_ref= 0; + bool disable_jbuf= join->thd->variables.join_cache_level == 0; + DBUG_ENTER("get_plans_for_tables"); + + s= *pos; + do + { + table_map real_table_bit= s->table->map; + if ((*allowed_tables & real_table_bit) && + !(remaining_tables & s->dependent)) + { +#ifdef DBUG_ASSERT_EXISTS + DBUG_ASSERT(!check_interleaving_with_nj(s)); + restore_prev_nj_state(s); // Revert effect of check_... call +#endif + sort_end->join_tab= pos; + sort_end->position= sort_position; + + + Json_writer_object wrapper(thd); + /* Find the best access method from 's' to the current partial plan */ + best_access_path(join, s, remaining_tables, join->positions, idx, + disable_jbuf, record_count, + sort_position, sort_position + 1); + found_tables|= s->table->map; + sort_end++; + sort_position+= 2; + if (unlikely(stop_on_eq_ref) && sort_position[-2].type == JT_EQ_REF) + { + /* Found an eq_ref tables. Use this, ignoring the other tables */ + found_eq_ref= 1; + if (found_tables == s->table->map) + break; // First table + + /* Store the found eq_ref table first in store_position */ + sort_position-= 2; + *allowed_tables= s->table->map; + (*store_position)->join_tab= pos; + (*store_position)->position= sort_position; + (*store_position)++; + join->next_sort_position[0]= sort_position[0]; + join->next_sort_position[1]= sort_position[1]; + join->next_sort_position+= 2; + DBUG_RETURN(1); + } + } + else + { + /* Verify that 'allowed_current_tables' was calculated correctly */ + DBUG_ASSERT((remaining_tables & s->dependent) || + !(remaining_tables & real_table_bit) || + !(*allowed_tables & real_table_bit) || + check_interleaving_with_nj(s)); + } + } while ((s= *++pos)); + + *allowed_tables= found_tables; + *store_position= sort_end; + join->next_sort_position= sort_position; + DBUG_RETURN(found_eq_ref); +} + +/** + Find a good, possibly optimal, query execution plan (QEP) by a possibly + exhaustive search. + + The procedure searches for the optimal ordering of the query tables in set + 'remaining_tables' of size N, and the corresponding optimal access paths to + each table. The choice of a table order and an access path for each table + constitutes a query execution plan (QEP) that fully specifies how to + execute the query. + + The maximal size of the found plan is controlled by the parameter + 'search_depth'. When search_depth == N, the resulting plan is complete and + can be used directly as a QEP. If search_depth < N, the found plan consists + of only some of the query tables. Such "partial" optimal plans are useful + only as input to query optimization procedures, and cannot be used directly + to execute a query. + + The algorithm begins with an empty partial plan stored in 'join->positions' + and a set of N tables - 'remaining_tables'. Each step of the algorithm + evaluates the cost of the partial plan extended by all access plans for + each of the relations in 'remaining_tables', expands the current partial + plan with the access plan that results in lowest cost of the expanded + partial plan, and removes the corresponding relation from + 'remaining_tables'. The algorithm continues until it either constructs a + complete optimal plan, or constructs an optimal plartial plan with size = + search_depth. + + The final optimal plan is stored in 'join->best_positions'. The + corresponding cost of the optimal plan is in 'join->best_read'. + + @note + The procedure uses a recursive depth-first search where the depth of the + recursion (and thus the exhaustiveness of the search) is controlled by the + parameter 'search_depth'. + + @note + The pseudocode below describes the algorithm of + 'best_extension_by_limited_search'. The worst-case complexity of this + algorithm is O(N*N^search_depth/search_depth). When serch_depth >= N, then + the complexity of greedy_search is O(N!). + + @code + procedure best_extension_by_limited_search( + pplan in, // in, partial plan of tables-joined-so-far + pplan_cost, // in, cost of pplan + remaining_tables, // in, set of tables not referenced in pplan + best_plan_so_far, // in/out, best plan found so far + best_plan_so_far_cost,// in/out, cost of best_plan_so_far + search_depth) // in, maximum size of the plans being considered + { + for each table T from remaining_tables + { + // Calculate the cost of using table T as above + cost = complex-series-of-calculations; + + // Add the cost to the cost so far. + pplan_cost+= cost; + + if (pplan_cost >= best_plan_so_far_cost) + // pplan_cost already too great, stop search + continue; + + pplan= expand pplan by best_access_method; + remaining_tables= remaining_tables - table T; + if (remaining_tables is not an empty set + and + search_depth > 1) + { + best_extension_by_limited_search(pplan, pplan_cost, + remaining_tables, + best_plan_so_far, + best_plan_so_far_cost, + search_depth - 1); + } + else + { + best_plan_so_far_cost= pplan_cost; + best_plan_so_far= pplan; + } + } + } + @endcode + + @note + When 'best_extension_by_limited_search' is called for the first time, + 'join->best_read' must be set to the largest possible value (e.g. DBL_MAX). + The actual implementation provides a way to optionally use pruning + heuristic to reduce the search space by skipping some partial plans. + + @note + The parameter 'search_depth' provides control over the recursion + depth, and thus the size of the resulting optimal plan. + + @param join pointer to the structure providing all context info + for the query + @param remaining_tables set of tables not included into the partial plan yet + @param idx length of the partial QEP in 'join->positions'; + since a depth-first search is used, also corresponds + to the current depth of the search tree; + also an index in the array 'join->best_ref'; + @param record_count estimate for the number of records returned by the + best partial plan + @param read_time the cost of the best partial plan + @param search_depth maximum depth of the recursion and thus size of the + found optimal plan + (0 < search_depth <= join->tables+1). + (values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS) + @param use_cond_selectivity specifies how the selectivity of the conditions + pushed to a table should be taken into account + + @retval + enum_best_search::SEARCH_OK All fine + @retval + enum_best_search::SEARCH_FOUND_EDGE All remaning tables are edge tables + @retval + enum_best_search::SEARCH_ABORT Killed by user + @retval + enum_best_search::SEARCH_ERROR Fatal error +*/ + + +static enum_best_search +best_extension_by_limited_search(JOIN *join, + table_map remaining_tables, + uint idx, + double record_count, + double read_time, + uint search_depth, + uint use_cond_selectivity, + table_map *processed_eq_ref_tables) +{ + THD *thd= join->thd; + /* + 'join' is a partial plan with lower cost than the best plan so far, + so continue expanding it further with the tables in 'remaining_tables'. + */ + JOIN_TAB *s; + double best_record_count= DBL_MAX; + double best_read_time= DBL_MAX; + enum_best_search best_res; + uint tables_left= join->table_count - idx, found_tables; + uint accepted_tables __attribute__((unused)); + table_map found_eq_ref_tables= 0, used_eq_ref_table= 0; + table_map allowed_tables, allowed_current_tables; + SORT_POSITION *sort= (SORT_POSITION*) alloca(sizeof(SORT_POSITION)*tables_left); + SORT_POSITION *sort_end; + DBUG_ENTER("best_extension_by_limited_search"); + + DBUG_EXECUTE_IF("show_explain_probe_best_ext_lim_search", + if (dbug_user_var_equals_int(thd, + "show_explain_probe_select_id", + join->select_lex->select_number)) + dbug_serve_apcs(thd, 1); + ); + + if (unlikely(thd->check_killed())) // Abort + DBUG_RETURN(SEARCH_ABORT); + + DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, + "part_plan");); + status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls); + + if (join->emb_sjm_nest) + { + /* + If we are searching for the execution plan of a materialized semi-join nest + then allowed_tables contains bits only for the tables from this nest. + */ + allowed_tables= (join->emb_sjm_nest->sj_inner_tables & remaining_tables); + allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables; + } + else + { + /* + allowed_tables is used to check if there are tables left that can improve + a key search and to see if there are more tables to add in next iteration. + + allowed_current_tables tells us which tables we can add to the current + plan at this stage. + */ + allowed_tables= remaining_tables; + allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables; + } + DBUG_ASSERT(allowed_tables & remaining_tables); + + sort_end= sort; + { + Json_writer_object trace_one_table(thd); + JOIN_TAB **best_ref= join->best_ref + idx; + if (unlikely(thd->trace_started())) + trace_plan_prefix(join, idx, remaining_tables); + + Json_writer_array arr(thd, "get_costs_for_tables"); + + if (idx > join->const_tables && join->prune_level >= 2 && + join->positions[idx-1].type == JT_EQ_REF && + (join->eq_ref_tables & allowed_current_tables)) + { + /* Previous table was an EQ REF table, only add other possible EQ_REF + tables to the chain, stop after first one is found. + */ + table_map table_map= join->eq_ref_tables & allowed_current_tables; + if (get_costs_for_tables(join, remaining_tables, idx, record_count, + &trace_one_table, best_ref, &sort_end, + &table_map, 1)) + used_eq_ref_table= (*sort->join_tab)->table->map; + else + { + /* We didn't find another EQ_REF table, add remaining tables */ + if ((table_map= allowed_current_tables & ~table_map)) + get_costs_for_tables(join, remaining_tables, idx, record_count, + &trace_one_table, best_ref, &sort_end, &table_map, + 0); + } + } + else + { + table_map table_map= allowed_current_tables; + get_costs_for_tables(join, remaining_tables, idx, record_count, + &trace_one_table, best_ref, &sort_end, &table_map, + 0); + } + found_tables= (uint) (sort_end - sort); + DBUG_ASSERT(found_tables > 0); + + /* + Sort tables in ascending order of generated row combinations + */ + if (found_tables > 1) + my_qsort(sort, found_tables, sizeof(SORT_POSITION), + (qsort_cmp) sort_positions); + } + DBUG_ASSERT(join->next_sort_position <= + join->sort_positions + join->sort_space); + + accepted_tables= 0; + double min_rec_count= DBL_MAX; + double min_rec_count_read_time= DBL_MAX; + + double min_cost= DBL_MAX; + double min_cost_record_count= DBL_MAX; + + for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++) + { + s= *pos->join_tab; + if (!(found_eq_ref_tables & s->table->map) && + !check_interleaving_with_nj(s)) + { + table_map real_table_bit= s->table->map; + double current_record_count, current_read_time; + double partial_join_cardinality; + POSITION *position= join->positions + idx, *loose_scan_pos; + Json_writer_object trace_one_table(thd); + + if (unlikely(thd->trace_started())) + { + trace_plan_prefix(join, idx, remaining_tables); + trace_one_table.add_table_name(s); + } + + accepted_tables++; + *position= *pos->position; // Get stored result + loose_scan_pos= pos->position+1; + + /* Compute the cost of the new plan extended with 's' */ + current_record_count= COST_MULT(record_count, position->records_read); + const double filter_cmp_gain= position->range_rowid_filter_info + ? position->range_rowid_filter_info->get_cmp_gain(current_record_count) + : 0; + current_read_time= COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + current_record_count / + TIME_FOR_COMPARE)); + + if (unlikely(thd->trace_started())) + { + trace_one_table.add("rows_for_plan", current_record_count); + trace_one_table.add("cost_for_plan", current_read_time); + } + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + ¤t_read_time, loose_scan_pos); + + /* Expand only partial plans with lower cost than the best QEP so far */ + if (current_read_time >= join->best_read) + { + DBUG_EXECUTE("opt", print_plan(join, idx+1, + current_record_count, + read_time, + current_read_time, + "prune_by_cost");); + trace_one_table + .add("pruned_by_cost", true) + .add("current_cost", current_read_time) + .add("best_cost", join->best_read + COST_EPS); + + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } + + /* + Prune some less promising partial plans. This heuristic may miss + the optimal QEPs, thus it results in a non-exhaustive search. + */ + if (join->prune_level >= 1) + { + // Collect the members with min_cost and min_read_time. + bool min_rec_hit= false; + bool min_cost_hit= false; + + if (join->extra_heuristic_pruning && + (!(position->key_dependent & allowed_tables) || + position->records_read < 2.0)) + { + if (current_record_count < min_rec_count) + { + min_rec_count= current_record_count; + min_rec_count_read_time= current_read_time; + min_rec_hit= true; + } + + if (current_read_time < min_cost) + { + min_cost_record_count= current_record_count; + min_cost= current_read_time; + min_cost_hit= true; + } + } + + if (best_record_count > current_record_count || + best_read_time > current_read_time || + (idx == join->const_tables && // 's' is the first table in the QEP + s->table == join->sort_by_table)) + { + /* + Store the current record count and cost as the best + possible cost at this level if the following holds: + - It's the lowest record number and cost so far + - There is no remaing table that could improve index usage + or we found an EQ_REF or REF key with less than 2 + matching records (good enough). + */ + if (best_record_count >= current_record_count && + best_read_time >= current_read_time && + (!(position->key_dependent & allowed_tables) || + position->records_read < 2.0)) + { + best_record_count= current_record_count; + best_read_time= current_read_time; + } + } + else + { + /* + Typically, we get here if: + best_record_count < current_record_count && + best_read_time < current_read_time + That is, both record_count and read_time are worse than the best_ + ones. This plan doesn't look promising, prune it away. + */ + DBUG_EXECUTE("opt", print_plan(join, idx+1, + current_record_count, + read_time, + current_read_time, + "pruned_by_heuristic");); + trace_one_table.add("pruned_by_heuristic", true); + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } + + const char* prune_reason= NULL; + if (!min_rec_hit && + current_record_count >= min_rec_count && + current_read_time >= min_rec_count_read_time) + prune_reason= "min_record_count"; + + if (!min_cost_hit && + current_record_count >= min_cost_record_count && + current_read_time >= min_cost) + prune_reason= "min_read_time"; + + if (prune_reason) + { + trace_one_table.add("pruned_by_heuristic", prune_reason); + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } + } + + double pushdown_cond_selectivity= 1.0; + if (use_cond_selectivity > 1) + pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, + remaining_tables & + ~real_table_bit); + join->positions[idx].cond_selectivity= pushdown_cond_selectivity; + + partial_join_cardinality= (current_record_count * + pushdown_cond_selectivity); + + if (unlikely(thd->trace_started())) + { + if (pushdown_cond_selectivity < 1.0) + { + trace_one_table.add("selectivity", pushdown_cond_selectivity); + trace_one_table.add("estimated_join_cardinality", + partial_join_cardinality); + } + } + + join->positions[idx].partial_join_cardinality= partial_join_cardinality; + + if ((search_depth > 1) && (remaining_tables & ~real_table_bit) & + allowed_tables) + { + /* Recursively expand the current partial plan */ + Json_writer_array trace_rest(thd, "rest_of_plan"); + + swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab); + best_res= + best_extension_by_limited_search(join, + remaining_tables & + ~real_table_bit, + idx + 1, + partial_join_cardinality, + current_read_time, + search_depth - 1, + use_cond_selectivity, + &found_eq_ref_tables); + swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab); + + if ((int) best_res < (int) SEARCH_OK) + goto end; // Return best_res + if (best_res == SEARCH_FOUND_EDGE && + check_if_edge_table(join->positions+ idx, + pushdown_cond_selectivity) != + SEARCH_FOUND_EDGE) + best_res= SEARCH_OK; + } + else + { + /* + 'join' is either the best partial QEP with 'search_depth' relations, + or the best complete QEP so far, whichever is smaller. + */ + if (join->sort_by_table && + join->sort_by_table != + join->positions[join->const_tables].table->table) + { + /* + We may have to make a temp table, note that this is only a + heuristic since we cannot know for sure at this point. + Hence it may be wrong. + */ + trace_one_table.add("cost_for_sorting", current_record_count); + current_read_time= COST_ADD(current_read_time, current_record_count); + } + if (current_read_time < join->best_read) + { + memcpy((uchar*) join->best_positions, (uchar*) join->positions, + sizeof(POSITION) * (idx + 1)); + join->join_record_count= partial_join_cardinality; + join->best_read= current_read_time - COST_EPS; + } + DBUG_EXECUTE("opt", print_plan(join, idx+1, + current_record_count, + read_time, + current_read_time, + "full_plan");); + best_res= check_if_edge_table(join->positions + idx, + pushdown_cond_selectivity); + } + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + if (best_res == SEARCH_FOUND_EDGE) + { + if (pos+1 < sort_end) // If not last table + trace_one_table.add("pruned_by_hanging_leaf", true); + goto end; + } + } + } + DBUG_ASSERT(accepted_tables > 0); + best_res= SEARCH_OK; + +end: + join->next_sort_position-= found_tables*2; + if (used_eq_ref_table) + *processed_eq_ref_tables|= used_eq_ref_table | found_eq_ref_tables; + else + *processed_eq_ref_tables= 0; + DBUG_RETURN(best_res); +} + + +/** + Find how much space the prevous read not const tables takes in cache. +*/ + +void JOIN_TAB::calc_used_field_length(bool max_fl) +{ + uint null_fields,blobs,fields; + ulong rec_length; + Field **f_ptr,*field; + uint uneven_bit_fields; + MY_BITMAP *read_set= table->read_set; + + uneven_bit_fields= null_fields= blobs= fields= rec_length=0; + for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (bitmap_is_set(read_set, field->field_index)) + { + uint flags=field->flags; + fields++; + rec_length+=field->pack_length(); + if (flags & BLOB_FLAG) + blobs++; + if (!(flags & NOT_NULL_FLAG)) + null_fields++; + if (field->type() == MYSQL_TYPE_BIT && + ((Field_bit*)field)->bit_len) + uneven_bit_fields++; + } + } + if (null_fields || uneven_bit_fields) + rec_length+=(table->s->null_fields+7)/8; + if (table->maybe_null) + rec_length+=sizeof(my_bool); + + /* Take into account that DuplicateElimination may need to store rowid */ + uint rowid_add_size= 0; + if (keep_current_rowid) + { + rowid_add_size= table->file->ref_length; + rec_length += rowid_add_size; + fields++; + } + + if (max_fl) + { + // TODO: to improve this estimate for max expected length + if (blobs) + { + ulong blob_length= table->file->stats.mean_rec_length; + if (ULONG_MAX - rec_length > blob_length) + rec_length+= blob_length; + else + rec_length= ULONG_MAX; + } + max_used_fieldlength= rec_length; + } + else if (table->file->stats.mean_rec_length) + set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size); + + used_fields=fields; + used_fieldlength=rec_length; + used_blobs=blobs; + used_null_fields= null_fields; + used_uneven_bit_fields= uneven_bit_fields; +} + + +/* + @brief + Extract pushdown conditions for a table scan + + @details + This functions extracts pushdown conditions usable when this table is scanned. + The conditions are extracted either from WHERE or from ON expressions. + The conditions are attached to the field cache_select of this table. + + @note + Currently the extracted conditions are used only by BNL and BNLH join. + algorithms. + + @retval 0 on success + 1 otherwise +*/ + +int JOIN_TAB::make_scan_filter() +{ + COND *tmp; + DBUG_ENTER("make_scan_filter"); + + Item *cond= is_inner_table_of_outer_join() ? + *get_first_inner_table()->on_expr_ref : join->conds; + + if (cond) + { + if ((tmp= make_cond_for_table(join->thd, cond, + join->const_table_map | table->map, + table->map, -1, FALSE, TRUE))) + { + DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY);); + if (!(cache_select= + (SQL_SELECT*) join->thd->memdup((uchar*) select, + sizeof(SQL_SELECT)))) + DBUG_RETURN(1); + cache_select->cond= tmp; + cache_select->read_tables=join->const_table_map; + } + else if (join->thd->is_error()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/** + @brief + Check whether hash join algorithm can be used to join this table + + @details + This function finds out whether the ref items that have been chosen + by the planner to access this table can be used for hash join algorithms. + The answer depends on a certain property of the the fields of the + joined tables on which the hash join key is built. + + @note + At present the function is supposed to be called only after the function + get_best_combination has been called. + + @retval TRUE it's possible to use hash join to join this table + @retval FALSE otherwise +*/ + +bool JOIN_TAB::hash_join_is_possible() +{ + if (type != JT_REF && type != JT_EQ_REF) + return FALSE; + if (!is_ref_for_hash_join()) + { + KEY *keyinfo= table->key_info + ref.key; + return keyinfo->key_part[0].field->hash_join_is_possible(); + } + return TRUE; +} + + +/** + @brief + Check whether a KEYUSE can be really used for access this join table + + @param join Join structure with the best join order + for which the check is performed + @param keyuse Evaluated KEYUSE structure + + @details + This function is supposed to be used after the best execution plan have been + already chosen and the JOIN_TAB array for the best join order been already set. + For a given KEYUSE to access this JOIN_TAB in the best execution plan the + function checks whether it really can be used. The function first performs + the check with access_from_tables_is_allowed(). If it succeeds it checks + whether the keyuse->val does not use some fields of a materialized semijoin + nest that cannot be used to build keys to access outer tables. + Such KEYUSEs exists for the query like this: + select * from ot + where ot.c in (select it1.c from it1, it2 where it1.c=f(it2.c)) + Here we have two KEYUSEs to access table ot: with val=it1.c and val=f(it2.c). + However if the subquery was materialized the second KEYUSE cannot be employed + to access ot. + + @retval true the given keyuse can be used for ref access of this JOIN_TAB + @retval false otherwise +*/ + +bool JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan(JOIN *join, + KEYUSE *keyuse) +{ + if (!access_from_tables_is_allowed(keyuse->used_tables, + join->sjm_lookup_tables)) + return false; + if (join->sjm_scan_tables & table->map) + return true; + table_map keyuse_sjm_scan_tables= keyuse->used_tables & + join->sjm_scan_tables; + if (!keyuse_sjm_scan_tables) + return true; + uint sjm_tab_nr= 0; + while (!(keyuse_sjm_scan_tables & table_map(1) << sjm_tab_nr)) + sjm_tab_nr++; + JOIN_TAB *sjm_tab= join->map2table[sjm_tab_nr]; + TABLE_LIST *emb_sj_nest= sjm_tab->emb_sj_nest; + if (!(emb_sj_nest->sj_mat_info && emb_sj_nest->sj_mat_info->is_used && + emb_sj_nest->sj_mat_info->is_sj_scan)) + return true; + st_select_lex *sjm_sel= emb_sj_nest->sj_subq_pred->unit->first_select(); + for (uint i= 0; i < sjm_sel->item_list.elements; i++) + { + DBUG_ASSERT(sjm_sel->ref_pointer_array[i]->real_item()->type() == Item::FIELD_ITEM); + if (keyuse->val->real_item()->type() == Item::FIELD_ITEM) + { + Field *field = ((Item_field*)sjm_sel->ref_pointer_array[i]->real_item())->field; + if (field->eq(((Item_field*)keyuse->val->real_item())->field)) + return true; + } + } + return false; +} + + +static uint +cache_record_length(JOIN *join,uint idx) +{ + uint length=0; + JOIN_TAB **pos,**end; + + for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ; + pos != end ; + pos++) + { + JOIN_TAB *join_tab= *pos; + length+= join_tab->get_used_fieldlength(); + } + return length; +} + + +/* + Get the number of different row combinations for subset of partial join + + SYNOPSIS + prev_record_reads() + join The join structure + idx Number of tables in the partial join order (i.e. the + partial join order is in join->positions[0..idx-1]) + found_ref Bitmap of tables for which we need to find # of distinct + row combinations. + + DESCRIPTION + Given a partial join order (in join->positions[0..idx-1]) and a subset of + tables within that join order (specified in found_ref), find out how many + distinct row combinations of subset tables will be in the result of the + partial join order. + + This is used as follows: Suppose we have a table accessed with a ref-based + method. The ref access depends on current rows of tables in found_ref. + We want to count # of different ref accesses. We assume two ref accesses + will be different if at least one of access parameters is different. + Example: consider a query + + SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field + + and a join order: + t1, ref access on t1.key=c1 + t2, ref access on t2.key=c2 + t3, ref access on t3.key=t1.field + + For t1: n_ref_scans = 1, n_distinct_ref_scans = 1 + For t2: n_ref_scans = records_read(t1), n_distinct_ref_scans=1 + For t3: n_ref_scans = records_read(t1)*records_read(t2) + n_distinct_ref_scans = #records_read(t1) + + The reason for having this function (at least the latest version of it) + is that we need to account for buffering in join execution. + + An edge-case example: if we have a non-first table in join accessed via + ref(const) or ref(param) where there is a small number of different + values of param, then the access will likely hit the disk cache and will + not require any disk seeks. + + The proper solution would be to assume an LRU disk cache of some size, + calculate probability of cache hits, etc. For now we just count + identical ref accesses as one. + + RETURN + Expected number of row combinations +*/ + +double +prev_record_reads(const POSITION *positions, uint idx, table_map found_ref) +{ + double found=1.0; + const POSITION *pos_end= positions - 1; + for (const POSITION *pos= positions + idx - 1; pos != pos_end; pos--) + { + if (pos->table->table->map & found_ref) + { + found_ref|= pos->ref_depend_map; + /* + For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table + with no matching row we will get position[t2].records_read==0. + Actually the size of output is one null-complemented row, therefore + we will use value of 1 whenever we get records_read==0. + + Note + - the above case can't occur if inner part of outer join has more + than one table: table with no matches will not be marked as const. + + - Ideally we should add 1 to records_read for every possible null- + complemented row. We're not doing it because: 1. it will require + non-trivial code and add overhead. 2. The value of records_read + is an inprecise estimate and adding 1 (or, in the worst case, + #max_nested_outer_joins=64-1) will not make it any more precise. + */ + if (pos->records_read) + { + found= COST_MULT(found, pos->records_read); + found*= pos->cond_selectivity; + } + } + } + return found; +} + + +/* + Enumerate join tabs in breadth-first fashion, including const tables. +*/ + +static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, + uint n_top_tabs_count, JOIN_TAB *tab) +{ + n_top_tabs_count += tab->join->aggr_tables; + if (!tab->bush_root_tab) + { + /* We're at top level. Get the next top-level tab */ + tab++; + if (tab < first_top_tab + n_top_tabs_count) + return tab; + + /* No more top-level tabs. Switch to enumerating SJM nest children */ + tab= first_top_tab; + } + else + { + /* We're inside of an SJM nest */ + if (!tab->last_leaf_in_bush) + { + /* There's one more table in the nest, return it. */ + return ++tab; + } + else + { + /* + There are no more tables in this nest. Get out of it and then we'll + proceed to the next nest. + */ + tab= tab->bush_root_tab + 1; + } + } + + /* + Ok, "tab" points to a top-level table, and we need to find the next SJM + nest and enter it. + */ + for (; tab < first_top_tab + n_top_tabs_count; tab++) + { + if (tab->bush_children) + return tab->bush_children->start; + } + return NULL; +} + + +/* + Enumerate JOIN_TABs in "EXPLAIN order". This order + - const tabs are included + - we enumerate "optimization tabs". + - +*/ + +JOIN_TAB *first_explain_order_tab(JOIN* join) +{ + JOIN_TAB* tab; + tab= join->join_tab; + if (!tab) + return NULL; /* Can happen when when the tables were optimized away */ + return (tab->bush_children) ? tab->bush_children->start : tab; +} + + +JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab) +{ + /* If we're inside SJM nest and have reached its end, get out */ + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing */ + tab++; + + if (tab == join->join_tab + join->top_join_tab_count) + return NULL; /* Outside SJM nest and reached EOF */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + + +JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls) +{ + JOIN_TAB *tab= join->join_tab; + if (const_tbls == WITHOUT_CONST_TABLES) + { + if (join->const_tables == join->table_count || !tab) + return NULL; + tab += join->const_tables; + } + return tab; +} + + +JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab) +{ + tab= next_breadth_first_tab(join->first_breadth_first_tab(), + join->top_join_tab_count, tab); + if (tab && tab->bush_root_tab) + tab= NULL; + return tab; +} + + +JOIN_TAB *first_linear_tab(JOIN *join, + enum enum_with_bush_roots include_bush_roots, + enum enum_with_const_tables const_tbls) +{ + JOIN_TAB *first= join->join_tab; + + if (!first) + return NULL; + + if (const_tbls == WITHOUT_CONST_TABLES) + first+= join->const_tables; + + if (first >= join->join_tab + join->top_join_tab_count) + return NULL; /* All are const tables */ + + if (first->bush_children && include_bush_roots == WITHOUT_BUSH_ROOTS) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + return first->bush_children->start; + } + + return first; +} + + +/* + A helper function to loop over all join's join_tab in sequential fashion + + DESCRIPTION + Depending on include_bush_roots parameter, JOIN_TABs that represent + SJM-scan/lookups are either returned or omitted. + + SJM-Bush children are returned right after (or in place of) their container + join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems + to) + + For example, if we have this structure: + + ot1--ot2--sjm1----------------ot3-... + | + +--it1--it2--it3 + + calls to next_linear_tab( include_bush_roots=TRUE) will return: + + ot1 ot2 sjm1 it1 it2 it3 ot3 ... + + while calls to next_linear_tab( include_bush_roots=FALSE) will return: + + ot1 ot2 it1 it2 it3 ot3 ... + + (note that sjm1 won't be returned). +*/ + +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, + enum enum_with_bush_roots include_bush_roots) +{ + if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + return tab->bush_children->start; + } + + DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab); + + if (tab->bush_root_tab) /* Are we inside an SJM nest */ + { + /* Inside SJM nest */ + if (!tab->last_leaf_in_bush) + return tab+1; /* Return next in nest */ + /* Continue from the sjm on the top level */ + tab= tab->bush_root_tab; + } + + /* If no more JOIN_TAB's on the top level */ + if (++tab >= join->join_tab + join->exec_join_tab_cnt() + join->aggr_tables) + return NULL; + + if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + tab= tab->bush_children->start; + } + return tab; +} + + +/* + Start to iterate over all join tables in bush-children-first order, excluding + the const tables (see next_depth_first_tab() comment for details) +*/ + +JOIN_TAB *first_depth_first_tab(JOIN* join) +{ + JOIN_TAB* tab; + /* This means we're starting the enumeration */ + if (join->const_tables == join->top_join_tab_count || !join->join_tab) + return NULL; + + tab= join->join_tab + join->const_tables; + + return (tab->bush_children) ? tab->bush_children->start : tab; +} + + +/* + A helper function to iterate over all join tables in bush-children-first order + + DESCRIPTION + + For example, for this join plan + + ot1--ot2--sjm1------------ot3-... + | + | + it1--it2--it3 + + call to first_depth_first_tab() will return ot1, and subsequent calls to + next_depth_first_tab() will return: + + ot2 it1 it2 it3 sjm ot3 ... +*/ + +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) +{ + /* If we're inside SJM nest and have reached its end, get out */ + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing */ + tab++; + + if (tab == join->join_tab +join->top_join_tab_count) + return NULL; /* Outside SJM nest and reached EOF */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + +bool JOIN::check_two_phase_optimization(THD *thd) +{ + if (check_for_splittable_materialized()) + return true; + return false; +} + + +bool JOIN::inject_cond_into_where(Item *injected_cond) +{ + Item *where_item= injected_cond; + List *and_args= NULL; + if (conds && conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + { + and_args= ((Item_cond*) conds)->argument_list(); + if (cond_equal) + and_args->disjoin((List *) &cond_equal->current_level); + } + + where_item= and_items(thd, conds, where_item); + if (where_item->fix_fields_if_needed(thd, 0)) + return true; + thd->change_item_tree(&select_lex->where, where_item); + select_lex->where->top_level_item(); + conds= select_lex->where; + + if (and_args && cond_equal) + { + and_args= ((Item_cond*) conds)->argument_list(); + List_iterator li(cond_equal->current_level); + Item_equal *elem; + while ((elem= li++)) + { + and_args->push_back(elem, thd->mem_root); + } + } + + return false; + +} + + +static Item * const null_ptr= NULL; + + +/* + Set up join struct according to the picked join order in + + SYNOPSIS + get_best_combination() + join The join to process (the picked join order is mainly in + join->best_positions) + + DESCRIPTION + Setup join structures according the picked join order + - finalize semi-join strategy choices (see + fix_semijoin_strategies_for_picked_join_order) + - create join->join_tab array and put there the JOIN_TABs in the join order + - create data structures describing ref access methods. + + NOTE + In this function we switch from pre-join-optimization JOIN_TABs to + post-join-optimization JOIN_TABs. This is achieved by copying the entire + JOIN_TAB objects. + + RETURN + FALSE OK + TRUE Out of memory +*/ + +bool JOIN::get_best_combination() +{ + uint tablenr; + table_map used_tables; + JOIN_TAB *j; + KEYUSE *keyuse; + JOIN_TAB *sjm_nest_end= NULL; + JOIN_TAB *sjm_nest_root= NULL; + DBUG_ENTER("get_best_combination"); + + /* + Additional plan nodes for postjoin tmp tables: + 1? + // For GROUP BY + 1? + // For DISTINCT + 1? + // For aggregation functions aggregated in outer query + // when used with distinct + 1? + // For ORDER BY + 1? // buffer result + Up to 2 tmp tables are actually used, but it's hard to tell exact number + at this stage. + */ + uint aggr_tables= (group_list ? 1 : 0) + + (select_distinct ? + (tmp_table_param.using_outer_summary_function ? 2 : 1) : 0) + + (order ? 1 : 0) + + (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ; + + if (aggr_tables == 0) + aggr_tables= 1; /* For group by pushdown */ + + if (select_lex->window_specs.elements) + aggr_tables++; + + if (aggr_tables > 2) + aggr_tables= 2; + + full_join=0; + hash_join= FALSE; + + fix_semijoin_strategies_for_picked_join_order(this); + top_join_tab_count= get_number_of_tables_at_top_level(this); + +#ifndef DBUG_OFF + dbug_join_tab_array_size= top_join_tab_count + aggr_tables; +#endif + /* + NOTE: The above computation of aggr_tables can produce wrong result because some + of the variables it uses may change their values after we leave this function. + Known examples: + - Dangerous: using_outer_summary_function=false at this point. Added + DBUG_ASSERT below to demonstrate. Can this cause us to allocate less + space than we would need? + - Not dangerous: select_distinct can be true here but be assigned false + afterwards. + */ + aggr_tables= 2; + DBUG_ASSERT(!tmp_table_param.using_outer_summary_function); + if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)* + (top_join_tab_count + aggr_tables)))) + DBUG_RETURN(TRUE); + + if (inject_splitting_cond_for_all_tables_with_split_opt()) + goto error; + + JOIN_TAB_RANGE *root_range; + if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE)) + goto error; + root_range->start= join_tab; + /* root_range->end will be set later */ + join_tab_ranges.empty(); + + if (join_tab_ranges.push_back(root_range, thd->mem_root)) + goto error; + + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) + { + TABLE *form; + POSITION *cur_pos= &best_positions[tablenr]; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + /* + Ok, we've entered an SJ-Materialization semi-join (note that this can't + be done recursively, semi-joins are not allowed to be nested). + 1. Put into main join order a JOIN_TAB that represents a lookup or scan + in the temptable. + */ + bzero((void*)j, sizeof(JOIN_TAB)); + j->join= this; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->on_expr_ref= (Item**) &null_ptr; + j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */ + + /* + 2. Proceed with processing SJM nest's join tabs, putting them into the + sub-order + */ + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j->records_read= (sjm->is_sj_scan? sjm->rows : 1); + j->records= (ha_rows) j->records_read; + j->cond_selectivity= 1.0; + JOIN_TAB *jt; + JOIN_TAB_RANGE *jt_range; + if (!(jt= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) || + !(jt_range= new JOIN_TAB_RANGE)) + goto error; + jt_range->start= jt; + jt_range->end= jt + sjm->tables; + join_tab_ranges.push_back(jt_range, thd->mem_root); + j->bush_children= jt_range; + sjm_nest_end= jt + sjm->tables; + sjm_nest_root= j; + + j= jt; + } + + *j= *best_positions[tablenr].table; + + j->bush_root_tab= sjm_nest_root; + + form= table[tablenr]= j->table; + form->reginfo.join_tab=j; + DBUG_PRINT("info",("type: %d", j->type)); + if (j->type == JT_CONST) + goto loop_end; // Handled in make_join_stat.. + + j->loosescan_match_tab= NULL; //non-nulls will be set later + j->inside_loosescan_range= FALSE; + j->ref.key = -1; + j->ref.key_parts=0; + + if (j->type == JT_SYSTEM) + goto loop_end; + if ( !(keyuse= best_positions[tablenr].key)) + { + j->type=JT_ALL; + if (best_positions[tablenr].use_join_buffer && + tablenr != const_tables) + full_join= 1; + } + + /*if (best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN) + { + DBUG_ASSERT(!keyuse || keyuse->key == + best_positions[tablenr].loosescan_picker.loosescan_key); + j->index= best_positions[tablenr].loosescan_picker.loosescan_key; + }*/ + + if ((j->type == JT_REF || j->type == JT_EQ_REF) && + is_hash_join_key_no(j->ref.key)) + hash_join= TRUE; + + j->range_rowid_filter_info= best_positions[tablenr].range_rowid_filter_info; + + loop_end: + /* + Save records_read in JOIN_TAB so that select_describe()/etc don't have + to access join->best_positions[]. + */ + j->records_read= best_positions[tablenr].records_read; + j->cond_selectivity= best_positions[tablenr].cond_selectivity; + map2table[j->table->tablenr]= j; + + /* If we've reached the end of sjm nest, switch back to main sequence */ + if (j + 1 == sjm_nest_end) + { + j->last_leaf_in_bush= TRUE; + j= sjm_nest_root; + sjm_nest_root= NULL; + sjm_nest_end= NULL; + } + } + root_range->end= j; + + used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) + { + if (j->bush_children) + j= j->bush_children->start; + + used_tables|= j->table->map; + if (j->type != JT_CONST && j->type != JT_SYSTEM) + { + if ((keyuse= best_positions[tablenr].key) && + create_ref_for_key(this, j, keyuse, TRUE, used_tables)) + goto error; // Something went wrong + } + if (j->last_leaf_in_bush) + j= j->bush_root_tab; + } + + top_join_tab_count= (uint)(join_tab_ranges.head()->end - + join_tab_ranges.head()->start); + + if (unlikely(thd->trace_started())) + print_final_join_order(this); + + update_depend_map(this); + DBUG_RETURN(0); + +error: + /* join_tab was not correctly setup. Don't use it */ + join_tab= 0; + DBUG_RETURN(1); +} + +/** + Create a descriptor of hash join key to access a given join table + + @param join join which the join table belongs to + @param join_tab the join table to access + @param org_keyuse beginning of the key uses to join this table + @param used_tables bitmap of the previous tables + + @details + This function first finds key uses that can be utilized by the hash join + algorithm to join join_tab to the previous tables marked in the bitmap + used_tables. The tested key uses are taken from the array of all key uses + for 'join' starting from the position org_keyuse. After all interesting key + uses have been found the function builds a descriptor of the corresponding + key that is used by the hash join algorithm would it be chosen to join + the table join_tab. + + @retval FALSE the descriptor for a hash join key is successfully created + @retval TRUE otherwise +*/ + +static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab, + KEYUSE *org_keyuse, table_map used_tables) +{ + KEY *keyinfo; + KEY_PART_INFO *key_part_info; + KEYUSE *keyuse= org_keyuse; + uint key_parts= 0; + THD *thd= join->thd; + TABLE *table= join_tab->table; + bool first_keyuse= TRUE; + DBUG_ENTER("create_hj_key_for_table"); + + do + { + if (!(~used_tables & keyuse->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) && + are_tables_local(join_tab, keyuse->used_tables)) + { + if (first_keyuse) + { + key_parts++; + } + else + { + KEYUSE *curr= org_keyuse; + for( ; curr < keyuse; curr++) + { + if (curr->keypart == keyuse->keypart && + !(~used_tables & curr->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, + curr) && + are_tables_local(join_tab, curr->used_tables)) + break; + } + if (curr == keyuse) + key_parts++; + } + } + first_keyuse= FALSE; + keyuse++; + } while (keyuse->table == table && keyuse->is_for_hash_join()); + if (!key_parts) + DBUG_RETURN(TRUE); + /* This memory is allocated only once for the joined table join_tab */ + if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) || + !(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)* + key_parts))) + DBUG_RETURN(TRUE); + keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts; + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->key_part= key_part_info; + keyinfo->key_length=0; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->flags= HA_GENERATED_KEY; + keyinfo->is_statistics_from_stat_tables= FALSE; + keyinfo->name.str= "$hj"; + keyinfo->name.length= 3; + keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts); + if (!keyinfo->rec_per_key) + DBUG_RETURN(TRUE); + keyinfo->key_part= key_part_info; + + first_keyuse= TRUE; + keyuse= org_keyuse; + do + { + if (!(~used_tables & keyuse->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) && + are_tables_local(join_tab, keyuse->used_tables)) + { + bool add_key_part= TRUE; + if (!first_keyuse) + { + for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++) + { + if (curr->keypart == keyuse->keypart && + !(~used_tables & curr->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, + curr) && + are_tables_local(join_tab, curr->used_tables)) + { + keyuse->keypart= NO_KEYPART; + add_key_part= FALSE; + break; + } + } + } + if (add_key_part) + { + Field *field= table->field[keyuse->keypart]; + uint fieldnr= keyuse->keypart+1; + table->create_key_part_by_field(key_part_info, field, fieldnr); + keyinfo->key_length += key_part_info->store_length; + key_part_info++; + } + } + first_keyuse= FALSE; + keyuse++; + } while (keyuse->table == table && keyuse->is_for_hash_join()); + + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->ext_key_part_map= 0; + + join_tab->hj_key= keyinfo; + + DBUG_RETURN(FALSE); +} + +/* + Check if a set of tables specified by used_tables can be accessed when + we're doing scan on join_tab jtab. +*/ +static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables) +{ + if (jtab->bush_root_tab) + { + /* + jtab is inside execution join nest. We may not refer to outside tables, + except the const tables. + */ + table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables | + jtab->join->const_table_map | + OUTER_REF_TABLE_BIT; + return !MY_TEST(used_tables & ~local_tables); + } + + /* + If we got here then jtab is at top level. + - all other tables at top level are accessible, + - tables in join nests are accessible too, because all their columns that + are needed at top level will be unpacked when scanning the + materialization table. + */ + return TRUE; +} + +static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, + KEYUSE *org_keyuse, bool allow_full_scan, + table_map used_tables) +{ + uint keyparts, length, key; + TABLE *table; + KEY *keyinfo; + KEYUSE *keyuse= org_keyuse; + bool ftkey= (keyuse->keypart == FT_KEYPART); + THD *thd= join->thd; + DBUG_ENTER("create_ref_for_key"); + + /* Use best key from find_best */ + table= j->table; + key= keyuse->key; + if (!is_hash_join_key_no(key)) + keyinfo= table->key_info+key; + else + { + if (create_hj_key_for_table(join, j, org_keyuse, used_tables)) + DBUG_RETURN(TRUE); + keyinfo= j->hj_key; + } + + if (ftkey) + { + Item_func_match *ifm=(Item_func_match *)keyuse->val; + + length=0; + keyparts=1; + ifm->join_key=1; + } + else + { + keyparts=length=0; + uint found_part_ref_or_null= 0; + /* + Calculate length for the used key + Stop if there is a missing key part or when we find second key_part + with KEY_OPTIMIZE_REF_OR_NULL + */ + do + { + if (!(~used_tables & keyuse->used_tables) && + (!keyuse->validity_ref || *keyuse->validity_ref) && + j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse)) + { + if (are_tables_local(j, keyuse->val->used_tables())) + { + if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) || + (!is_hash_join_key_no(key) && keyparts == keyuse->keypart && + !(found_part_ref_or_null & keyuse->optimize))) + { + length+= keyinfo->key_part[keyparts].store_length; + keyparts++; + found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; + } + } + } + keyuse++; + } while (keyuse->table == table && keyuse->key == key); + + if (!keyparts && allow_full_scan) + { + /* It's a LooseIndexScan strategy scanning whole index */ + j->type= JT_ALL; + j->index= key; + DBUG_RETURN(FALSE); + } + + DBUG_ASSERT(length > 0); + DBUG_ASSERT(keyparts != 0); + } /* not ftkey */ + + /* set up fieldref */ + j->ref.key_parts= keyparts; + j->ref.key_length= length; + j->ref.key= (int) key; + if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) || + !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) * + (keyparts+1)))) || + !(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) || + !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts))) + { + DBUG_RETURN(TRUE); + } + j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length); + j->ref.key_err=1; + j->ref.has_record= FALSE; + j->ref.null_rejecting= 0; + j->ref.disable_cache= FALSE; + j->ref.null_ref_part= NO_REF_PART; + j->ref.const_ref_part_map= 0; + j->ref.uses_splitting= FALSE; + keyuse=org_keyuse; + + store_key **ref_key= j->ref.key_copy; + uchar *key_buff=j->ref.key_buff, *null_ref_key= 0; + uint null_ref_part= NO_REF_PART; + bool keyuse_uses_no_tables= TRUE; + uint not_null_keyparts= 0; + if (ftkey) + { + j->ref.items[0]=((Item_func*)(keyuse->val))->key_item(); + /* Predicates pushed down into subquery can't be used FT access */ + j->ref.cond_guards[0]= NULL; + if (keyuse->used_tables) + DBUG_RETURN(TRUE); // not supported yet. SerG + + j->type=JT_FT; + } + else + { + uint i; + for (i=0 ; i < keyparts ; keyuse++,i++) + { + while (((~used_tables) & keyuse->used_tables) || + (keyuse->validity_ref && !(*keyuse->validity_ref)) || + !j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) || + keyuse->keypart == NO_KEYPART || + (keyuse->keypart != + (is_hash_join_key_no(key) ? + keyinfo->key_part[i].field->field_index : i)) || + !are_tables_local(j, keyuse->val->used_tables())) + keyuse++; /* Skip other parts */ + + uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); + j->ref.items[i]=keyuse->val; // Save for cond removal + j->ref.cond_guards[i]= keyuse->cond_guard; + + if (!keyuse->val->maybe_null() || keyuse->null_rejecting) + not_null_keyparts++; + /* + Set ref.null_rejecting to true only if we are going to inject a + "keyuse->val IS NOT NULL" predicate. + */ + Item *real= (keyuse->val)->real_item(); + if (keyuse->null_rejecting && (real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null()) + j->ref.null_rejecting|= (key_part_map)1 << i; + + keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; + j->ref.uses_splitting |= (keyuse->validity_ref != NULL); + /* + We don't want to compute heavy expressions in EXPLAIN, an example would + select * from t1 where t1.key=(select thats very heavy); + + (select thats very heavy) => is a constant here + eg: (select avg(order_cost) from orders) => constant but expensive + */ + if (!keyuse->val->used_tables() && !thd->lex->describe) + { // Compare against constant + store_key_item tmp(thd, + keyinfo->key_part[i].field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + keyinfo->key_part[i].length, + keyuse->val, + FALSE); + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + tmp.copy(thd); + j->ref.const_ref_part_map |= key_part_map(1) << i ; + } + else + { + *ref_key++= get_store_key(thd, + keyuse,join->const_table_map, + &keyinfo->key_part[i], + key_buff, maybe_null); + if (!keyuse->val->used_tables()) + j->ref.const_ref_part_map |= key_part_map(1) << i ; + } + /* + Remember if we are going to use REF_OR_NULL + But only if field _really_ can be null i.e. we force JT_REF + instead of JT_REF_OR_NULL in case if field can't be null + */ + if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null) + { + null_ref_key= key_buff; + null_ref_part= i; + } + key_buff+= keyinfo->key_part[i].store_length; + } + } /* not ftkey */ + *ref_key=0; // end_marker + if (j->type == JT_FT) + DBUG_RETURN(0); + ulong key_flags= j->table->actual_key_flags(keyinfo); + if (j->type == JT_CONST) + j->table->const_table= 1; + else if (!((keyparts == keyinfo->user_defined_key_parts && + ( + (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME || + /* Unique key and all keyparts are NULL rejecting */ + ((key_flags & HA_NOSAME) && keyparts == not_null_keyparts) + )) || + /* true only for extended keys */ + (keyparts > keyinfo->user_defined_key_parts && + MY_TEST(key_flags & HA_EXT_NOSAME) && + keyparts == keyinfo->ext_key_parts) + ) || + null_ref_key) + { + /* Must read with repeat */ + j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF; + j->ref.null_ref_key= null_ref_key; + j->ref.null_ref_part= null_ref_part; + } + else if (keyuse_uses_no_tables) + { + /* + This happen if we are using a constant expression in the ON part + of an LEFT JOIN. + SELECT * FROM a LEFT JOIN b ON b.key=30 + Here we should not mark the table as a 'const' as a field may + have a 'normal' value or a NULL value. + */ + j->type=JT_CONST; + } + else + j->type=JT_EQ_REF; + + if (j->type == JT_EQ_REF) + j->read_record.unlock_row= join_read_key_unlock_row; + else if (j->type == JT_CONST) + j->read_record.unlock_row= join_const_unlock_row; + else + j->read_record.unlock_row= rr_unlock_row; + DBUG_RETURN(0); +} + + + +static store_key * +get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, + KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null) +{ + if (!((~used_tables) & keyuse->used_tables)) // if const item + { + return new store_key_const_item(thd, + key_part->field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + key_part->length, + keyuse->val); + } + else if (keyuse->val->type() == Item::FIELD_ITEM || + (keyuse->val->type() == Item::REF_ITEM && + ((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF && + (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() == + Item_ref::DIRECT_REF) || + ((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) && + keyuse->val->real_item()->type() == Item::FIELD_ITEM)) + return new store_key_field(thd, + key_part->field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + key_part->length, + ((Item_field*) keyuse->val->real_item())->field, + keyuse->val->real_item()->full_name()); + + return new store_key_item(thd, + key_part->field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + key_part->length, + keyuse->val, FALSE); +} + + +inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2) +{ + if (*e1) + { + if (!e2) + return; + Item *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) + { + res->fix_fields(thd, 0); + res->update_used_tables(); + *e1= res; + } + } + else + *e1= e2; +} + + +/** + Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions + we've inferred from ref/eq_ref access performed. + + This function is a part of "Early NULL-values filtering for ref access" + optimization. + + Example of this optimization: + For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n + and plan " any-access(t1), ref(t2.key=t1.field) " @n + add "t1.field IS NOT NULL" to t1's table condition. @n + + Description of the optimization: + + We look through equalities chosen to perform ref/eq_ref access, + pick equalities that have form "tbl.part_of_key = othertbl.field" + (where othertbl is a non-const table and othertbl.field may be NULL) + and add them to conditions on correspoding tables (othertbl in this + example). + + Exception from that is the case when referred_tab->join != join. + I.e. don't add NOT NULL constraints from any embedded subquery. + Consider this query: + @code + SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1 + WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL; + @endocde + Here condition A.f3 IS NOT NULL is going to be added to the WHERE + condition of the embedding query. + Another example: + SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL) + AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12 + WHERE t12.b = t10.a )); + Here condition t10.a IS NOT NULL is going to be added. + In both cases addition of NOT NULL condition will erroneously reject + some rows of the result set. + referred_tab->join != join constraint would disallow such additions. + + This optimization doesn't affect the choices that ref, range, or join + optimizer make. This was intentional because this was added after 4.1 + was GA. + + Implementation overview + 1. update_ref_and_keys() accumulates info about null-rejecting + predicates in in KEY_FIELD::null_rejecting + 1.1 add_key_part saves these to KEYUSE. + 2. create_ref_for_key copies them to TABLE_REF. + 3. add_not_null_conds adds "x IS NOT NULL" to join_tab->select_cond of + appropiate JOIN_TAB members. +*/ + +static void add_not_null_conds(JOIN *join) +{ + JOIN_TAB *tab; + DBUG_ENTER("add_not_null_conds"); + + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab->type == JT_REF || tab->type == JT_EQ_REF || + tab->type == JT_REF_OR_NULL) + { + for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++) + { + if (tab->ref.null_rejecting & ((key_part_map)1 << keypart)) + { + Item *item= tab->ref.items[keypart]; + Item *notnull; + Item *real= item->real_item(); + if (real->can_eval_in_optimize() && real->type() != Item::FIELD_ITEM) + { + /* + It could be constant instead of field after constant + propagation. + */ + continue; + } + DBUG_ASSERT(real->type() == Item::FIELD_ITEM); + Item_field *not_null_item= (Item_field*)real; + JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab; + /* + For UPDATE queries such as: + UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1); + not_null_item is the t1.f1, but it's referred_tab is 0. + */ + if (!(notnull= new (join->thd->mem_root) + Item_func_isnotnull(join->thd, item))) + DBUG_VOID_RETURN; + /* + We need to do full fix_fields() call here in order to have correct + notnull->const_item(). This is needed e.g. by test_quick_select + when it is called from make_join_select after this function is + called. + */ + if (notnull->fix_fields(join->thd, ¬null)) + DBUG_VOID_RETURN; + + DBUG_EXECUTE("where",print_where(notnull, + (referred_tab ? + referred_tab->table->alias.c_ptr() : + "outer_ref_cond"), + QT_ORDINARY);); + if (!tab->first_inner) + { + COND *new_cond= (referred_tab && referred_tab->join == join) ? + referred_tab->select_cond : + join->outer_ref_cond; + add_cond_and_fix(join->thd, &new_cond, notnull); + if (referred_tab && referred_tab->join == join) + referred_tab->set_select_cond(new_cond, __LINE__); + else + join->outer_ref_cond= new_cond; + } + else + add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull); + } + } + } + } + DBUG_VOID_RETURN; +} + +/** + Build a predicate guarded by match variables for embedding outer joins. + The function recursively adds guards for predicate cond + assending from tab to the first inner table next embedding + nested outer join and so on until it reaches root_tab + (root_tab can be 0). + + In other words: + add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should + wrap parts of WHERE. The idea is that the part of WHERE should be only + evaluated after we've finished figuring out whether outer joins. + ^^^ is the above correct? + + @param tab the first inner table for most nested outer join + @param cond the predicate to be guarded (must be set) + @param root_tab the first inner table to stop + + @return + - pointer to the guarded predicate, if success + - 0, otherwise +*/ + +static COND* +add_found_match_trig_cond(THD *thd, JOIN_TAB *tab, COND *cond, + JOIN_TAB *root_tab) +{ + COND *tmp; + DBUG_ASSERT(cond != 0); + if (tab == root_tab) + return cond; + if ((tmp= add_found_match_trig_cond(thd, tab->first_upper, cond, root_tab))) + tmp= new (thd->mem_root) Item_func_trig_cond(thd, tmp, &tab->found); + if (tmp) + { + tmp->quick_fix_field(); + tmp->update_used_tables(); + } + return tmp; +} + + +bool TABLE_LIST::is_active_sjm() +{ + return sj_mat_info && sj_mat_info->is_used; +} + + +/** + Fill in outer join related info for the execution plan structure. + + For each outer join operation left after simplification of the + original query the function set up the following pointers in the linear + structure join->join_tab representing the selected execution plan. + The first inner table t0 for the operation is set to refer to the last + inner table tk through the field t0->last_inner. + Any inner table ti for the operation are set to refer to the first + inner table ti->first_inner. + The first inner table t0 for the operation is set to refer to the + first inner table of the embedding outer join operation, if there is any, + through the field t0->first_upper. + The on expression for the outer join operation is attached to the + corresponding first inner table through the field t0->on_expr_ref. + Here ti are structures of the JOIN_TAB type. + + In other words, for each join tab, set + - first_inner + - last_inner + - first_upper + - on_expr_ref, cond_equal + + EXAMPLE. For the query: + @code + SELECT * FROM t1 + LEFT JOIN + (t2, t3 LEFT JOIN t4 ON t3.a=t4.a) + ON (t1.a=t2.a AND t1.b=t3.b) + WHERE t1.c > 5, + @endcode + + given the execution plan with the table order t1,t2,t3,t4 + is selected, the following references will be set; + t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2] + t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2], + on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to + *t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref. + + @param join reference to the info fully describing the query + + @note + The function assumes that the simplification procedure has been + already applied to the join query (see simplify_joins). + This function can be called only after the execution plan + has been chosen. +*/ + +static bool +make_outerjoin_info(JOIN *join) +{ + DBUG_ENTER("make_outerjoin_info"); + + /* + Create temp. tables for merged SJ-Materialization nests. We need to do + this now, because further code relies on tab->table and + tab->table->pos_in_table_list being set. + */ + JOIN_TAB *tab; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab->bush_children) + { + if (setup_sj_materialization_part1(tab)) + DBUG_RETURN(TRUE); + tab->table->reginfo.join_tab= tab; + } + } + + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + TABLE *table= tab->table; + TABLE_LIST *tbl= table->pos_in_table_list; + TABLE_LIST *embedding= tbl->embedding; + + if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) + { + /* + Table tab is the only one inner table for outer join. + (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a + is in the query above.) + */ + tab->last_inner= tab->first_inner= tab; + tab->on_expr_ref= &tbl->on_expr; + tab->cond_equal= tbl->cond_equal; + if (embedding && !embedding->is_active_sjm()) + tab->first_upper= embedding->nested_join->first_nested; + } + else if (!embedding) + tab->table->reginfo.not_exists_optimize= 0; + + for ( ; embedding ; embedding= embedding->embedding) + { + if (embedding->is_active_sjm()) + { + /* We're trying to walk out of an SJ-Materialization nest. Don't do this. */ + break; + } + /* Ignore sj-nests: */ + if (!(embedding->on_expr && embedding->outer_join)) + { + tab->table->reginfo.not_exists_optimize= 0; + continue; + } + NESTED_JOIN *nested_join= embedding->nested_join; + if (!nested_join->counter) + { + /* + Table tab is the first inner table for nested_join. + Save reference to it in the nested join structure. + */ + nested_join->first_nested= tab; + tab->on_expr_ref= &embedding->on_expr; + tab->cond_equal= tbl->cond_equal; + if (embedding->embedding) + tab->first_upper= embedding->embedding->nested_join->first_nested; + } + if (!tab->first_inner) + tab->first_inner= nested_join->first_nested; + if (++nested_join->counter < nested_join->n_tables) + break; + /* Table tab is the last inner table for nested join. */ + nested_join->first_nested->last_inner= tab; + } + } + DBUG_RETURN(FALSE); +} + + +/* + @brief + Build a temporary join prefix condition for JOIN_TABs up to the last tab + + @param ret OUT the condition is returned here + + @return + false OK + true Out of memory + + @detail + Walk through the join prefix (from the first table to the last_tab) and + build a condition: + + join_tab_1_cond AND join_tab_2_cond AND ... AND last_tab_conds + + The condition is only intended to be used by the range optimizer, so: + - it is not normalized (can have Item_cond_and inside another + Item_cond_and) + - it does not include join->exec_const_cond and other similar conditions. +*/ + +bool build_tmp_join_prefix_cond(JOIN *join, JOIN_TAB *last_tab, Item **ret) +{ + THD *const thd= join->thd; + Item_cond_and *all_conds= NULL; + + Item *res= NULL; + + // Pick the ON-expression. Use the same logic as in get_sargable_cond(): + if (last_tab->on_expr_ref) + res= *last_tab->on_expr_ref; + else if (last_tab->table->pos_in_table_list && + last_tab->table->pos_in_table_list->embedding && + !last_tab->table->pos_in_table_list->embedding->sj_on_expr) + { + res= last_tab->table->pos_in_table_list->embedding->on_expr; + } + + for (JOIN_TAB *tab= first_depth_first_tab(join); + tab; + tab= next_depth_first_tab(join, tab)) + { + if (tab->select_cond) + { + if (!res) + res= tab->select_cond; + else + { + if (!all_conds) + { + if (!(all_conds= new (thd->mem_root)Item_cond_and(thd, res, + tab->select_cond))) + return true; + res= all_conds; + } + else + all_conds->add(tab->select_cond, thd->mem_root); + } + } + if (tab == last_tab) + break; + } + *ret= all_conds? all_conds: res; + return false; +} + + +static bool +make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) +{ + THD *thd= join->thd; + DBUG_ENTER("make_join_select"); + if (select) + { + add_not_null_conds(join); + table_map used_tables; + /* + Step #1: Extract constant condition + - Extract and check the constant part of the WHERE + - Extract constant parts of ON expressions from outer + joins and attach them appropriately. + */ + if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */ + { /* there may be a select without a cond. */ + if (join->table_count > 1) + cond->update_used_tables(); // Tablenr may have changed + + /* + Extract expressions that depend on constant tables + 1. Const part of the join's WHERE clause can be checked immediately + and if it is not satisfied then the join has empty result + 2. Constant parts of outer joins' ON expressions must be attached + there inside the triggers. + */ + { // Check const tables + Item* const_cond= NULL; + const_cond= make_cond_for_table(thd, cond, + join->const_table_map, + (table_map) 0, -1, FALSE, FALSE); + if (!const_cond && thd->is_error()) + DBUG_RETURN(1); + + /* Add conditions added by add_not_null_conds(). */ + for (uint i= 0 ; i < join->const_tables ; i++) + add_cond_and_fix(thd, &const_cond, + join->join_tab[i].select_cond); + + DBUG_EXECUTE("where",print_where(const_cond,"constants", + QT_ORDINARY);); + + if (const_cond) + { + Json_writer_object trace_const_cond(thd); + trace_const_cond.add("condition_on_constant_tables", const_cond); + if (const_cond->is_expensive()) + { + trace_const_cond.add("evaluated", "false") + .add("cause", "expensive cond"); + } + else + { + bool const_cond_result; + { + Json_writer_array a(thd, "computing_condition"); + const_cond_result= const_cond->val_int() != 0; + } + if (!const_cond_result) + { + DBUG_PRINT("info",("Found impossible WHERE condition")); + trace_const_cond.add("evaluated", "true") + .add("found", "impossible where"); + join->exec_const_cond= NULL; + DBUG_RETURN(1); + } + } + join->exec_const_cond= const_cond; + } + + if (join->table_count != join->const_tables) + { + COND *outer_ref_cond= make_cond_for_table(thd, cond, + join->const_table_map | + OUTER_REF_TABLE_BIT, + OUTER_REF_TABLE_BIT, + -1, FALSE, FALSE); + if (outer_ref_cond) + { + add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond); + join->outer_ref_cond= outer_ref_cond; + } + else if (thd->is_error()) + DBUG_RETURN(1); + } + else + { + COND *pseudo_bits_cond= + make_cond_for_table(thd, cond, + join->const_table_map | + PSEUDO_TABLE_BITS, + PSEUDO_TABLE_BITS, + -1, FALSE, FALSE); + if (pseudo_bits_cond) + { + add_cond_and_fix(thd, &pseudo_bits_cond, + join->pseudo_bits_cond); + join->pseudo_bits_cond= pseudo_bits_cond; + } + else if (thd->is_error()) + DBUG_RETURN(1); + } + } + } + + /* + Step #2: Extract WHERE/ON parts + */ + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_conditions(thd, "attaching_conditions_to_tables"); + Json_writer_array trace_attached_comp(thd, + "attached_conditions_computation"); + uint i; + for (i= join->top_join_tab_count - 1; i >= join->const_tables; i--) + { + if (!join->join_tab[i].bush_children) + break; + } + uint last_top_base_tab_idx= i; + + table_map save_used_tables= 0; + used_tables=((select->const_tables=join->const_table_map) | + OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + JOIN_TAB *tab; + table_map current_map; + i= join->const_tables; + for (tab= first_depth_first_tab(join); tab; + tab= next_depth_first_tab(join, tab)) + { + bool is_hj; + + /* + first_inner is the X in queries like: + SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X + */ + JOIN_TAB *first_inner_tab= tab->first_inner; + + if (!tab->bush_children) + current_map= tab->table->map; + else + current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; + + bool use_quick_range=0; + COND *tmp; + + /* + Tables that are within SJ-Materialization nests cannot have their + conditions referring to preceding non-const tables. + - If we're looking at the first SJM table, reset used_tables + to refer to only allowed tables + */ + if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info && + tab->emb_sj_nest->sj_mat_info->is_used && + !(used_tables & tab->emb_sj_nest->sj_inner_tables)) + { + save_used_tables= used_tables; + used_tables= join->const_table_map | OUTER_REF_TABLE_BIT | + RAND_TABLE_BIT; + } + + used_tables|=current_map; + + if (tab->type == JT_REF && tab->quick && + (((uint) tab->ref.key == tab->quick->index && + tab->ref.key_length < tab->quick->max_used_key_length) || + (!is_hash_join_key_no(tab->ref.key) && + tab->table->intersect_keys.is_set(tab->ref.key)))) + { + /* Range uses longer key; Use this instead of ref on key */ + Json_writer_object ref_to_range(thd); + ref_to_range.add("ref_to_range", true); + ref_to_range.add("cause", "range uses longer key"); + tab->type=JT_ALL; + use_quick_range=1; + tab->use_quick=1; + tab->ref.key= -1; + tab->ref.key_parts=0; // Don't use ref key. + join->best_positions[i].records_read= rows2double(tab->quick->records); + /* + We will use join cache here : prevent sorting of the first + table only and sort at the end. + */ + if (i != join->const_tables && + join->table_count > join->const_tables + 1 && + join->best_positions[i].use_join_buffer) + join->full_join= 1; + } + + tmp= NULL; + + if (cond) + { + if (tab->bush_children) + { + // Reached the materialization tab + tmp= make_cond_after_sjm(thd, cond, cond, save_used_tables, + used_tables, /*inside_or_clause=*/FALSE); + used_tables= save_used_tables | used_tables; + save_used_tables= 0; + } + else + { + tmp= make_cond_for_table(thd, cond, used_tables, current_map, i, + FALSE, FALSE); + if (!tmp && thd->is_error()) + DBUG_RETURN(1); + + if (tab == join->join_tab + last_top_base_tab_idx) + { + /* + This pushes conjunctive conditions of WHERE condition such that: + - their used_tables() contain RAND_TABLE_BIT + - the conditions does not refer to any fields + (such like rand() > 0.5) + */ + table_map rand_table_bit= (table_map) RAND_TABLE_BIT; + COND *rand_cond= make_cond_for_table(thd, cond, used_tables, + rand_table_bit, -1, + FALSE, FALSE); + if (rand_cond) + add_cond_and_fix(thd, &tmp, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); + } + } + /* Add conditions added by add_not_null_conds(). */ + if (tab->select_cond) + add_cond_and_fix(thd, &tmp, tab->select_cond); + } + + is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) && + (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && + ((join->max_allowed_join_cache_level+1)/2 == 2 || + ((join->max_allowed_join_cache_level+1)/2 > 2 && + is_hash_join_key_no(tab->ref.key))) && + (!tab->emb_sj_nest || + join->allowed_semijoin_with_cache) && + (!(tab->table->map & join->outer_join) || + join->allowed_outer_join_with_cache); + + if (cond && !tmp && tab->quick) + { // Outer join + if (tab->type != JT_ALL && !is_hj) + { + /* + Don't use the quick method + We come here in the case where we have 'key=constant' and + the test is removed by make_cond_for_table() + */ + delete tab->quick; + tab->quick= 0; + } + else + { + /* + Hack to handle the case where we only refer to a table + in the ON part of an OUTER JOIN. In this case we want the code + below to check if we should use 'quick' instead. + */ + DBUG_PRINT("info", ("Item_int")); + tmp= (Item*) Item_true; + } + + } + if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL || + tab->type == JT_EQ_REF || first_inner_tab) + { + DBUG_EXECUTE("where",print_where(tmp, + tab->table? tab->table->alias.c_ptr() :"sjm-nest", + QT_ORDINARY);); + SQL_SELECT *sel= tab->select= ((SQL_SELECT*) + thd->memdup((uchar*) select, + sizeof(*select))); + if (!sel) + DBUG_RETURN(1); // End of memory + /* + If tab is an inner table of an outer join operation, + add a match guard to the pushed down predicate. + The guard will turn the predicate on only after + the first match for outer tables is encountered. + */ + if (cond && tmp) + { + /* + Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without + a cond, so neutralize the hack above. + */ + COND *tmp_cond; + if (!(tmp_cond= add_found_match_trig_cond(thd, first_inner_tab, tmp, + 0))) + DBUG_RETURN(1); + sel->cond= tmp_cond; + tab->set_select_cond(tmp_cond, __LINE__); + /* Push condition to storage engine if this is enabled + and the condition is not guarded */ + if (tab->table) + { + tab->table->file->pushed_cond= NULL; + if ((tab->table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && + !first_inner_tab) + { + Json_writer_object wrap(thd); + Json_writer_object trace_cp(thd, "table_condition_pushdown"); + trace_cp.add_table_name(tab->table); + + COND *push_cond= + make_cond_for_table(thd, tmp_cond, current_map, current_map, + -1, FALSE, FALSE); + if (push_cond) + { + trace_cp.add("push_cond", push_cond); + /* Push condition to handler */ + if (!tab->table->file->cond_push(push_cond)) + tab->table->file->pushed_cond= push_cond; + } + else if (thd->is_error()) + DBUG_RETURN(1); + } + } + } + else + { + sel->cond= NULL; + tab->set_select_cond(NULL, __LINE__); + } + + sel->head=tab->table; + DBUG_EXECUTE("where", + print_where(tmp, + tab->table ? tab->table->alias.c_ptr() : + "(sjm-nest)", + QT_ORDINARY);); + if (tab->quick) + { + /* Use quick key read if it's a constant and it's not used + with key reading */ + if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF && + tab->type != JT_FT && + ((tab->type != JT_CONST && tab->type != JT_REF) || + (uint) tab->ref.key == tab->quick->index)) || is_hj) + { + DBUG_ASSERT(tab->quick->is_valid()); + sel->quick=tab->quick; // Use value from get_quick_... + sel->quick_keys.clear_all(); + sel->needed_reg.clear_all(); + if (is_hj && tab->rowid_filter) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + } + } + else + { + delete tab->quick; + } + tab->quick=0; + } + uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0; + if (i == join->const_tables && ref_key) + { + if (!tab->const_keys.is_clear_all() && + tab->table->reginfo.impossible_range) + DBUG_RETURN(1); + } + else if (tab->type == JT_ALL && ! use_quick_range) + { + if (!tab->const_keys.is_clear_all() && + tab->table->reginfo.impossible_range) + DBUG_RETURN(1); // Impossible range + /* + We plan to scan all rows. + Check again if we should use an index. + + There are two cases: + 1) There could be an index usage the refers to a previous + table that we didn't consider before, but could be consider + now as a "last resort". For example + SELECT * from t1,t2 where t1.a between t2.a and t2.b; + 2) If the current table is the first non const table + and there is a limit it still possibly beneficial + to use the index even if the index range is big as + we can stop when we've found limit rows. + + (1) - Don't switch the used index if we are using semi-join + LooseScan on this table. Using different index will not + produce the desired ordering and de-duplication. + */ + + if (!tab->table->is_filled_at_execution() && + !tab->loosescan_match_tab && // (1) + ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || + (!tab->const_keys.is_clear_all() && i == join->const_tables && + join->unit->lim.get_select_limit() < + join->best_positions[i].records_read && + !(join->select_options & OPTION_FOUND_ROWS)))) + { + /* Join with outer join condition */ + COND *orig_cond=sel->cond; + + if (build_tmp_join_prefix_cond(join, tab, &sel->cond)) + return true; + + /* + We can't call sel->cond->fix_fields, + as it will break tab->on_expr if it's AND condition + (fix_fields currently removes extra AND/OR levels). + Yet attributes of the just built condition are not needed. + Thus we call sel->cond->quick_fix_field for safety. + */ + if (sel->cond && !sel->cond->fixed()) + sel->cond->quick_fix_field(); + quick_select_return res; + + if ((res= sel->test_quick_select(thd, tab->keys, + ((used_tables & ~ current_map) | + OUTER_REF_TABLE_BIT), + (join->select_options & + OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->lim.get_select_limit()), + 0, + FALSE, FALSE, FALSE)) == + SQL_SELECT::IMPOSSIBLE_RANGE) + { + /* + Before reporting "Impossible WHERE" for the whole query + we have to check isn't it only "impossible ON" instead + */ + sel->cond=orig_cond; + if (!*tab->on_expr_ref || + (res= sel->test_quick_select(thd, tab->keys, + used_tables & ~ current_map, + (join->select_options & + OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->lim.get_select_limit()), + 0, FALSE, FALSE, FALSE, TRUE)) == + SQL_SELECT::IMPOSSIBLE_RANGE) + DBUG_RETURN(1); // Impossible WHERE + } + else + sel->cond=orig_cond; + + if (res == SQL_SELECT::ERROR) + DBUG_RETURN(1); /* Some error in one of test_quick_select calls */ + + /* Fix for EXPLAIN */ + if (sel->quick) + join->best_positions[i].records_read= (double)sel->quick->records; + } + else + { + sel->needed_reg=tab->needed_reg; + } + sel->quick_keys= tab->table->opt_range_keys; + if (!sel->quick_keys.is_subset(tab->checked_keys) || + !sel->needed_reg.is_subset(tab->checked_keys)) + { + /* + "Range checked for each record" is a "last resort" access method + that should only be used when the other option is a cross-product + join. + + We use the following condition (it's approximate): + 1. There are potential keys for (sel->needed_reg) + 2. There were no possible ways to construct a quick select, or + the quick select would be more expensive than the full table + scan. + */ + tab->use_quick= (!sel->needed_reg.is_clear_all() && + (sel->quick_keys.is_clear_all() || + (sel->quick && + sel->quick->read_time > + tab->table->file->scan_time() + + tab->table->file->stats.records/TIME_FOR_COMPARE + ))) ? + 2 : 1; + sel->read_tables= used_tables & ~current_map; + sel->quick_keys.clear_all(); + } + if (i != join->const_tables && tab->use_quick != 2 && + !tab->first_inner) + { /* Read with cache */ + /* + TODO: the execution also gets here when we will not be using + join buffer. Review these cases and perhaps, remove this call. + (The final decision whether to use join buffer is made in + check_join_cache_usage, so we should only call make_scan_filter() + there, too). + */ + if (tab->make_scan_filter()) + DBUG_RETURN(1); + } + } + } + + /* + Push down conditions from all ON expressions. + Each of these conditions are guarded by a variable + that turns if off just before null complemented row for + outer joins is formed. Thus, the condition from an + 'on expression' are guaranteed not to be checked for + the null complemented row. + */ + + /* + First push down constant conditions from ON expressions. + - Each pushed-down condition is wrapped into trigger which is + enabled only for non-NULL-complemented record + - The condition is attached to the first_inner_table. + + With regards to join nests: + - if we start at top level, don't walk into nests + - if we start inside a nest, stay within that nest. + */ + JOIN_TAB *start_from= tab->bush_root_tab? + tab->bush_root_tab->bush_children->start : + join->join_tab + join->const_tables; + JOIN_TAB *end_with= tab->bush_root_tab? + tab->bush_root_tab->bush_children->end : + join->join_tab + join->top_join_tab_count; + for (JOIN_TAB *join_tab= start_from; + join_tab != end_with; + join_tab++) + { + if (*join_tab->on_expr_ref) + { + JOIN_TAB *cond_tab= join_tab->first_inner; + COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref, + join->const_table_map, + (table_map) 0, -1, FALSE, FALSE); + if (!tmp_cond) + { + if (!thd->is_error()) + continue; + DBUG_RETURN(1); + } + tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, + &cond_tab->not_null_compl); + if (!tmp_cond) + DBUG_RETURN(1); + tmp_cond->quick_fix_field(); + cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : + new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond, + tmp_cond); + if (!cond_tab->select_cond) + DBUG_RETURN(1); + cond_tab->select_cond->quick_fix_field(); + cond_tab->select_cond->update_used_tables(); + if (cond_tab->select) + cond_tab->select->cond= cond_tab->select_cond; + } + } + + + /* Push down non-constant conditions from ON expressions */ + JOIN_TAB *last_tab= tab; + + /* + while we're inside of an outer join and last_tab is + the last of its tables ... + */ + while (first_inner_tab && first_inner_tab->last_inner == last_tab) + { + /* + Table tab is the last inner table of an outer join. + An on expression is always attached to it. + */ + COND *on_expr= *first_inner_tab->on_expr_ref; + + table_map used_tables2= (join->const_table_map | + OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + + start_from= tab->bush_root_tab? + tab->bush_root_tab->bush_children->start : + join->join_tab + join->const_tables; + for (JOIN_TAB *inner_tab= start_from; + inner_tab <= last_tab; + inner_tab++) + { + DBUG_ASSERT(inner_tab->table); + current_map= inner_tab->table->map; + used_tables2|= current_map; + /* + psergey: have put the -1 below. It's bad, will need to fix it. + */ + COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2, + current_map, + /*(inner_tab - first_tab)*/ -1, + FALSE, FALSE); + if (!tmp_cond && thd->is_error()) + DBUG_RETURN(1); + if (tab == last_tab) + { + /* + This pushes conjunctive conditions of ON expression of an outer + join such that: + - their used_tables() contain RAND_TABLE_BIT + - the conditions does not refer to any fields + (such like rand() > 0.5) + */ + table_map rand_table_bit= (table_map) RAND_TABLE_BIT; + COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2, + rand_table_bit, -1, + FALSE, FALSE); + if (rand_cond) + add_cond_and_fix(thd, &tmp_cond, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); + } + bool is_sjm_lookup_tab= FALSE; + if (inner_tab->bush_children) + { + /* + 'inner_tab' is an SJ-Materialization tab, i.e. we have a join + order like this: + + ot1 sjm_tab LEFT JOIN ot2 ot3 + ^ ^ + 'tab'-+ +--- left join we're adding triggers for + + LEFT JOIN's ON expression may not have references to subquery + columns. The subquery was in the WHERE clause, so IN-equality + is in the WHERE clause, also. + However, equality propagation code may have propagated the + IN-equality into ON expression, and we may get things like + + subquery_inner_table=const + + in the ON expression. We must not check such conditions during + SJM-lookup, because 1) subquery_inner_table has no valid current + row (materialization temp.table has it instead), and 2) they + would be true anyway. + */ + SJ_MATERIALIZATION_INFO *sjm= + inner_tab->bush_children->start->emb_sj_nest->sj_mat_info; + if (sjm->is_used && !sjm->is_sj_scan) + is_sjm_lookup_tab= TRUE; + } + + if (inner_tab == first_inner_tab && inner_tab->on_precond && + !is_sjm_lookup_tab) + add_cond_and_fix(thd, &tmp_cond, inner_tab->on_precond); + if (tmp_cond && !is_sjm_lookup_tab) + { + JOIN_TAB *cond_tab= (inner_tab < first_inner_tab ? + first_inner_tab : inner_tab); + Item **sel_cond_ref= (inner_tab < first_inner_tab ? + &first_inner_tab->on_precond : + &inner_tab->select_cond); + /* + First add the guards for match variables of + all embedding outer join operations. + */ + if (!(tmp_cond= add_found_match_trig_cond(thd, + cond_tab->first_inner, + tmp_cond, + first_inner_tab))) + DBUG_RETURN(1); + /* + Now add the guard turning the predicate off for + the null complemented row. + */ + DBUG_PRINT("info", ("Item_func_trig_cond")); + tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, + &first_inner_tab-> + not_null_compl); + DBUG_PRINT("info", ("Item_func_trig_cond %p", + tmp_cond)); + if (tmp_cond) + tmp_cond->quick_fix_field(); + /* Add the predicate to other pushed down predicates */ + DBUG_PRINT("info", ("Item_cond_and")); + *sel_cond_ref= !(*sel_cond_ref) ? + tmp_cond : + new (thd->mem_root) Item_cond_and(thd, *sel_cond_ref, tmp_cond); + DBUG_PRINT("info", ("Item_cond_and %p", + (*sel_cond_ref))); + if (!(*sel_cond_ref)) + DBUG_RETURN(1); + (*sel_cond_ref)->quick_fix_field(); + (*sel_cond_ref)->update_used_tables(); + if (cond_tab->select) + cond_tab->select->cond= cond_tab->select_cond; + } + } + first_inner_tab= first_inner_tab->first_upper; + } + if (!tab->bush_children) + i++; + } + + if (unlikely(thd->trace_started())) + { + trace_attached_comp.end(); + Json_writer_array trace_attached_summary(thd, + "attached_conditions_summary"); + for (tab= first_depth_first_tab(join); tab; + tab= next_depth_first_tab(join, tab)) + { + if (!tab->table) + continue; + Item *const cond = tab->select_cond; + Json_writer_object trace_one_table(thd); + trace_one_table.add_table_name(tab); + trace_one_table.add("attached", cond); + } + } + } + DBUG_RETURN(0); +} + + +static +uint get_next_field_for_derived_key(uchar *arg) +{ + KEYUSE *keyuse= *(KEYUSE **) arg; + if (!keyuse) + return (uint) (-1); + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint fldno= keyuse->keypart; + uint keypart= keyuse->keypart_map == (key_part_map) 1 ? + 0 : (keyuse-1)->keypart+1; + for ( ; + keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno; + keyuse++) + keyuse->keypart= keypart; + if (keyuse->key != key) + keyuse= 0; + *((KEYUSE **) arg)= keyuse; + return fldno; +} + + +static +uint get_next_field_for_derived_key_simple(uchar *arg) +{ + KEYUSE *keyuse= *(KEYUSE **) arg; + if (!keyuse) + return (uint) (-1); + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint fldno= keyuse->keypart; + for ( ; + keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno; + keyuse++) + ; + if (keyuse->key != key) + keyuse= 0; + *((KEYUSE **) arg)= keyuse; + return fldno; +} + +static +bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys) +{ + TABLE *table= keyuse->table; + if (table->alloc_keys(keys)) + return TRUE; + uint key_count= 0; + KEYUSE *first_keyuse= keyuse; + uint prev_part= keyuse->keypart; + uint parts= 0; + uint i= 0; + + for ( ; i < count && key_count < keys; ) + { + do + { + keyuse->key= table->s->keys; + keyuse->keypart_map= (key_part_map) (1 << parts); + keyuse++; + i++; + } + while (i < count && keyuse->used_tables == first_keyuse->used_tables && + keyuse->keypart == prev_part); + parts++; + if (i < count && keyuse->used_tables == first_keyuse->used_tables) + { + prev_part= keyuse->keypart; + } + else + { + KEYUSE *save_first_keyuse= first_keyuse; + if (table->check_tmp_key(table->s->keys, parts, + get_next_field_for_derived_key_simple, + (uchar *) &first_keyuse)) + + { + JOIN_TAB *tab; + first_keyuse= save_first_keyuse; + if (table->add_tmp_key(table->s->keys, parts, + get_next_field_for_derived_key, + (uchar *) &first_keyuse, + FALSE)) + return TRUE; + table->reginfo.join_tab->keys.set_bit(table->s->keys); + tab= table->reginfo.join_tab; + for (uint i=0; i < parts; i++) + tab->key_dependent|= save_first_keyuse[i].used_tables; + } + else + { + /* Mark keyuses for this key to be excluded */ + for (KEYUSE *curr=save_first_keyuse; curr < keyuse; curr++) + { + curr->key= MAX_KEY; + } + } + first_keyuse= keyuse; + key_count++; + parts= 0; + prev_part= keyuse->keypart; + } + } + + return FALSE; +} + + +static +bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array) +{ + KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*); + size_t elements= keyuse_array->elements; + TABLE *prev_table= 0; + for (size_t i= 0; i < elements; i++, keyuse++) + { + if (!keyuse->table) + break; + KEYUSE *first_table_keyuse= NULL; + table_map last_used_tables= 0; + uint count= 0; + uint keys= 0; + TABLE_LIST *derived= NULL; + if (keyuse->table != prev_table) + derived= keyuse->table->pos_in_table_list; + while (derived && derived->is_materialized_derived()) + { + if (keyuse->table != prev_table) + { + prev_table= keyuse->table; + while (keyuse->table == prev_table && keyuse->key != MAX_KEY) + { + keyuse++; + i++; + } + if (keyuse->table != prev_table) + { + keyuse--; + i--; + derived= NULL; + continue; + } + first_table_keyuse= keyuse; + last_used_tables= keyuse->used_tables; + count= 0; + keys= 0; + } + else if (keyuse->used_tables != last_used_tables) + { + keys++; + last_used_tables= keyuse->used_tables; + } + count++; + keyuse++; + i++; + if (keyuse->table != prev_table) + { + if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys)) + return TRUE; + keyuse--; + i--; + derived= NULL; + } + } + } + return FALSE; +} + + +/* + @brief + Drops unused keys for each materialized derived table/view + + @details + For materialized derived tables only ref access can be used, it employs + only one index, thus we don't need the rest. For each materialized derived + table/view call TABLE::use_index to save one index chosen by the optimizer + and free others. No key is chosen then all keys will be dropped. +*/ + +void JOIN::drop_unused_derived_keys() +{ + JOIN_TAB *tab; + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + + TABLE *tmp_tbl= tab->table; + if (!tmp_tbl) + continue; + if (!tmp_tbl->pos_in_table_list->is_materialized_derived()) + continue; + if (tmp_tbl->max_keys > 1 && !tab->is_ref_for_hash_join()) + tmp_tbl->use_index(tab->ref.key); + if (tmp_tbl->s->keys) + { + if (tab->ref.key >= 0 && tab->ref.key < MAX_KEY) + tab->ref.key= 0; + else + tmp_tbl->s->keys= 0; + } + tab->keys= (key_map) (tmp_tbl->s->keys ? 1 : 0); + } +} + + +/* + Evaluate the bitmap of used tables for items from the select list +*/ + +inline void JOIN::eval_select_list_used_tables() +{ + select_list_used_tables= 0; + Item *item; + List_iterator_fast it(fields_list); + while ((item= it++)) + { + select_list_used_tables|= item->used_tables(); + } + Item_outer_ref *ref; + List_iterator_fast ref_it(select_lex->inner_refs_list); + while ((ref= ref_it++)) + { + item= ref->outer_ref; + select_list_used_tables|= item->used_tables(); + } +} + + +/* + Determine {after which table we'll produce ordered set} + + SYNOPSIS + make_join_orderinfo() + join + + + DESCRIPTION + Determine if the set is already ordered for ORDER BY, so it can + disable join cache because it will change the ordering of the results. + Code handles sort table that is at any location (not only first after + the const tables) despite the fact that it's currently prohibited. + We must disable join cache if the first non-const table alone is + ordered. If there is a temp table the ordering is done as a last + operation and doesn't prevent join cache usage. + + RETURN + Number of table after which the set will be ordered + join->tables if we don't need an ordered set +*/ + +static uint make_join_orderinfo(JOIN *join) +{ + /* + This function needs to be fixed to take into account that we now have SJM + nests. + */ + DBUG_ASSERT(0); + + JOIN_TAB *tab; + if (join->need_tmp) + return join->table_count; + tab= join->get_sort_by_join_tab(); + return tab ? (uint)(tab-join->join_tab) : join->table_count; +} + +/* + Deny usage of join buffer for the specified table + + SYNOPSIS + set_join_cache_denial() + tab join table for which join buffer usage is to be denied + + DESCRIPTION + The function denies usage of join buffer when joining the table 'tab'. + The table is marked as not employing any join buffer. If a join cache + object has been already allocated for the table this object is destroyed. + + RETURN + none +*/ + +static +void set_join_cache_denial(JOIN_TAB *join_tab) +{ + if (join_tab->cache) + { + /* + If there is a previous cache linked to this cache through the + next_cache pointer: remove the link. + */ + if (join_tab->cache->prev_cache) + join_tab->cache->prev_cache->next_cache= 0; + /* + Same for the next_cache + */ + if (join_tab->cache->next_cache) + join_tab->cache->next_cache->prev_cache= 0; + + join_tab->cache->free(); + join_tab->cache= 0; + } + if (join_tab->use_join_cache) + { + join_tab->use_join_cache= FALSE; + join_tab->used_join_cache_level= 0; + /* + It could be only sub_select(). It could not be sub_seject_sjm because we + don't do join buffering for the first table in sjm nest. + */ + join_tab[-1].next_select= sub_select; + if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join()) + { + join_tab->type= JT_ALL; + join_tab->ref.key_parts= 0; + } + join_tab->join->return_tab= join_tab; + } +} + + +/** + The default implementation of unlock-row method of READ_RECORD, + used in all access methods. +*/ + +void rr_unlock_row(st_join_table *tab) +{ + READ_RECORD *info= &tab->read_record; + info->table->file->unlock_row(); +} + + +/** + Pick the appropriate access method functions + + Sets the functions for the selected table access method + + @param tab Table reference to put access method +*/ + +static void +pick_table_access_method(JOIN_TAB *tab) +{ + switch (tab->type) + { + case JT_REF: + tab->read_first_record= join_read_always_key; + tab->read_record.read_record_func= join_read_next_same; + break; + + case JT_REF_OR_NULL: + tab->read_first_record= join_read_always_key_or_null; + tab->read_record.read_record_func= join_read_next_same_or_null; + break; + + case JT_CONST: + tab->read_first_record= join_read_const; + tab->read_record.read_record_func= join_no_more_records; + break; + + case JT_EQ_REF: + tab->read_first_record= join_read_key; + tab->read_record.read_record_func= join_no_more_records; + break; + + case JT_FT: + tab->read_first_record= join_ft_read_first; + tab->read_record.read_record_func= join_ft_read_next; + break; + + case JT_SYSTEM: + tab->read_first_record= join_read_system; + tab->read_record.read_record_func= join_no_more_records; + break; + + /* keep gcc happy */ + default: + break; + } +} + + +/* + Revise usage of join buffer for the specified table and the whole nest + + SYNOPSIS + revise_cache_usage() + tab join table for which join buffer usage is to be revised + + DESCRIPTION + The function revise the decision to use a join buffer for the table 'tab'. + If this table happened to be among the inner tables of a nested outer join/ + semi-join the functions denies usage of join buffers for all of them + + RETURN + none +*/ + +static +void revise_cache_usage(JOIN_TAB *join_tab) +{ + JOIN_TAB *tab; + JOIN_TAB *first_inner; + + if (join_tab->first_inner) + { + JOIN_TAB *end_tab= join_tab; + for (first_inner= join_tab->first_inner; + first_inner; + first_inner= first_inner->first_upper) + { + for (tab= end_tab; tab >= first_inner; tab--) + set_join_cache_denial(tab); + end_tab= first_inner; + } + } + else if (join_tab->first_sj_inner_tab) + { + first_inner= join_tab->first_sj_inner_tab; + for (tab= join_tab; tab >= first_inner; tab--) + { + set_join_cache_denial(tab); + } + } + else set_join_cache_denial(join_tab); +} + + +/* + end_select-compatible function that writes the record into a sjm temptable + + SYNOPSIS + end_sj_materialize() + join The join + join_tab Points to right after the last join_tab in materialization bush + end_of_records FALSE <=> This call is made to pass another record + combination + TRUE <=> EOF (no action) + + DESCRIPTION + This function is used by semi-join materialization to capture suquery's + resultset and write it into the temptable (that is, materialize it). + + NOTE + This function is used only for semi-join materialization. Non-semijoin + materialization uses different mechanism. + + RETURN + NESTED_LOOP_OK + NESTED_LOOP_ERROR +*/ + +enum_nested_loop_state +end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + int error; + THD *thd= join->thd; + SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info; + DBUG_ENTER("end_sj_materialize"); + if (!end_of_records) + { + TABLE *table= sjm->table; + + List_iterator it(sjm->sjm_table_cols); + Item *item; + while ((item= it++)) + { + if (item->is_null()) + DBUG_RETURN(NESTED_LOOP_OK); + } + fill_record(thd, table, table->field, sjm->sjm_table_cols, TRUE, FALSE); + if (unlikely(thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + /* create_myisam_from_heap will generate error if needed */ + if (table->file->is_fatal_error(error, HA_CHECK_DUP) && + create_internal_tmp_table_from_heap(thd, table, + sjm->sjm_table_param.start_recinfo, + &sjm->sjm_table_param.recinfo, error, 1, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + Check whether a join buffer can be used to join the specified table + + SYNOPSIS + check_join_cache_usage() + tab joined table to check join buffer usage for + options options of the join + no_jbuf_after don't use join buffering after table with this number + prev_tab previous join table + + DESCRIPTION + The function finds out whether the table 'tab' can be joined using a join + buffer. This check is performed after the best execution plan for 'join' + has been chosen. If the function decides that a join buffer can be employed + then it selects the most appropriate join cache object that contains this + join buffer. + The result of the check and the type of the the join buffer to be used + depend on: + - the access method to access rows of the joined table + - whether the join table is an inner table of an outer join or semi-join + - whether the optimizer switches + outer_join_with_cache, semijoin_with_cache, join_cache_incremental, + join_cache_hashed, join_cache_bka, + are set on or off + - the join cache level set for the query + - the join 'options'. + + In any case join buffer is not used if the number of the joined table is + greater than 'no_jbuf_after'. It's also never used if the value of + join_cache_level is equal to 0. + If the optimizer switch outer_join_with_cache is off no join buffer is + used for outer join operations. + If the optimizer switch semijoin_with_cache is off no join buffer is used + for semi-join operations. + If the optimizer switch join_cache_incremental is off no incremental join + buffers are used. + If the optimizer switch join_cache_hashed is off then the optimizer uses + neither BNLH algorithm, nor BKAH algorithm to perform join operations. + + If the optimizer switch join_cache_bka is off then the optimizer uses + neither BKA algorithm, nor BKAH algorithm to perform join operation. + The valid settings for join_cache_level lay in the interval 0..8. + If it set to 0 no join buffers are used to perform join operations. + Currently we differentiate between join caches of 8 levels: + 1 : non-incremental join cache used for BNL join algorithm + 2 : incremental join cache used for BNL join algorithm + 3 : non-incremental join cache used for BNLH join algorithm + 4 : incremental join cache used for BNLH join algorithm + 5 : non-incremental join cache used for BKA join algorithm + 6 : incremental join cache used for BKA join algorithm + 7 : non-incremental join cache used for BKAH join algorithm + 8 : incremental join cache used for BKAH join algorithm + If the value of join_cache_level is set to n then no join caches of + levels higher than n can be employed. + + If the optimizer switches outer_join_with_cache, semijoin_with_cache, + join_cache_incremental, join_cache_hashed, join_cache_bka are all on + the following rules are applied. + If join_cache_level==1|2 then join buffer is used for inner joins, outer + joins and semi-joins with 'JT_ALL' access method. In this case a + JOIN_CACHE_BNL object is employed. + If join_cache_level==3|4 and then join buffer is used for a join operation + (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method + then a JOIN_CACHE_BNLH object is employed. + If an index is used to access rows of the joined table and the value of + join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed. + If an index is used to access rows of the joined table and the value of + join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed. + If the value of join_cache_level is odd then creation of a non-linked + join cache is forced. + + Currently for any join operation a join cache of the level of the + highest allowed and applicable level is used. + For example, if join_cache_level is set to 6 and the optimizer switch + join_cache_bka is off, while the optimizer switch join_cache_hashed is + on then for any inner join operation with JT_REF/JT_EQREF access method + to the joined table the BNLH join algorithm will be used, while for + the table accessed by the JT_ALL methods the BNL algorithm will be used. + + If the function decides that a join buffer can be used to join the table + 'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns + the selected join cache object to the field 'cache' of the previous + join table. + If the function creates a join cache object it tries to initialize it. The + failure to do this results in an invocation of the function that destructs + the created object. + If the function decides that but some reasons no join buffer can be used + for a table it calls the function revise_cache_usage that checks + whether join cache should be denied for some previous tables. In this case + a pointer to the first table for which join cache usage has been denied + is passed in join->return_val (see the function set_join_cache_denial). + + The functions changes the value the fields tab->icp_other_tables_ok and + tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm + requires it. + + NOTES + An inner table of a nested outer join or a nested semi-join can be currently + joined only when a linked cache object is employed. In these cases setting + join_cache_incremental to 'off' results in denial of usage of any join + buffer when joining the table. + For a nested outer join/semi-join, currently, we either use join buffers for + all inner tables or for none of them. + Some engines (e.g. Falcon) currently allow to use only a join cache + of the type JOIN_CACHE_BKAH when the joined table is accessed through + an index. For these engines setting the value of join_cache_level to 5 or 6 + results in that no join buffer is used to join the table. + + RETURN VALUE + cache level if cache is used, otherwise returns 0 + + TODO + Support BKA inside SJ-Materialization nests. When doing this, we'll need + to only store sj-inner tables in the join buffer. +#if 0 + JOIN_TAB *first_tab= join->join_tab+join->const_tables; + uint n_tables= i-join->const_tables; + / * + We normally put all preceding tables into the join buffer, except + for the constant tables. + If we're inside a semi-join materialization nest, e.g. + + outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ... + ^-- we're here + + then we need to put into the join buffer only the tables from + within the nest. + * / + if (i >= first_sjm_table && i < last_sjm_table) + { + n_tables= i - first_sjm_table; // will be >0 if we got here + first_tab= join->join_tab + first_sjm_table; + } +#endif +*/ + +static +uint check_join_cache_usage(JOIN_TAB *tab, + ulonglong options, + uint no_jbuf_after, + uint table_index, + JOIN_TAB *prev_tab) +{ + Cost_estimate cost; + uint flags= 0; + ha_rows rows= 0; + uint bufsz= 4096; + JOIN_CACHE *prev_cache=0; + JOIN *join= tab->join; + MEM_ROOT *root= join->thd->mem_root; + uint cache_level= tab->used_join_cache_level; + bool force_unlinked_cache= + !(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT); + bool no_hashed_cache= + !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT); + bool no_bka_cache= + !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT); + + join->return_tab= 0; + + if (tab->no_forced_join_cache) + goto no_join_cache; + + /* + Don't use join cache if @@join_cache_level==0 or this table is the first + one join suborder (either at top level or inside a bush) + */ + if (cache_level == 0 || !prev_tab) + return 0; + + if (force_unlinked_cache && (cache_level%2 == 0)) + cache_level--; + + if (options & SELECT_NO_JOIN_CACHE) + goto no_join_cache; + + if (tab->use_quick == 2) + goto no_join_cache; + + if (tab->table->map & join->complex_firstmatch_tables) + goto no_join_cache; + + /* + Don't use join cache if we're inside a join tab range covered by LooseScan + strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it + should be possible to use join buffering in the same way we're using it for + multi-table firstmatch ranges). + */ + if (tab->inside_loosescan_range) + goto no_join_cache; + + if (tab->is_inner_table_of_semijoin() && + !join->allowed_semijoin_with_cache) + goto no_join_cache; + if (tab->is_inner_table_of_outer_join() && + !join->allowed_outer_join_with_cache) + goto no_join_cache; + + if (tab->table->pos_in_table_list->table_function && + !tab->table->pos_in_table_list->table_function->join_cache_allowed()) + goto no_join_cache; + + /* + Non-linked join buffers can't guarantee one match + */ + if (tab->is_nested_inner()) + { + if (force_unlinked_cache || cache_level == 1) + goto no_join_cache; + if (cache_level & 1) + cache_level--; + } + + /* + Don't use BKA for materialized tables. We could actually have a + meaningful use of BKA when linked join buffers are used. + + The problem is, the temp.table is not filled (actually not even opened + properly) yet, and this doesn't let us call + handler->multi_range_read_info(). It is possible to come up with + estimates, etc. without acessing the table, but it seems not to worth the + effort now. + */ + if (tab->table->pos_in_table_list->is_materialized_derived()) + { + no_bka_cache= true; + /* + Don't use hash join algorithm if the temporary table for the rows + of the derived table will be created with an equi-join key. + */ + if (tab->table->s->keys) + no_hashed_cache= true; + } + + /* + Don't use join buffering if we're dictated not to by no_jbuf_after + (This is not meaningfully used currently) + */ + if (table_index > no_jbuf_after) + goto no_join_cache; + + /* + TODO: BNL join buffer should be perfectly ok with tab->bush_children. + */ + if (tab->loosescan_match_tab || tab->bush_children) + goto no_join_cache; + + for (JOIN_TAB *first_inner= tab->first_inner; first_inner; + first_inner= first_inner->first_upper) + { + if (first_inner != tab && + (!first_inner->use_join_cache || !(tab-1)->use_join_cache)) + goto no_join_cache; + } + if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab && + (!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache)) + goto no_join_cache; + if (!prev_tab->use_join_cache) + { + /* + Check whether table tab and the previous one belong to the same nest of + inner tables and if so do not use join buffer when joining table tab. + */ + if (tab->first_inner && tab != tab->first_inner) + { + for (JOIN_TAB *first_inner= tab[-1].first_inner; + first_inner; + first_inner= first_inner->first_upper) + { + if (first_inner == tab->first_inner) + goto no_join_cache; + } + } + else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab && + tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab) + goto no_join_cache; + } + + prev_cache= prev_tab->cache; + + switch (tab->type) { + case JT_ALL: + if (cache_level == 1) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BNL(join, tab, prev_cache))) + { + tab->icp_other_tables_ok= FALSE; + /* If make_join_select() hasn't called make_scan_filter(), do it now */ + if (!tab->cache_select && tab->make_scan_filter()) + goto no_join_cache; + return (2 - MY_TEST(!prev_cache)); + } + goto no_join_cache; + case JT_SYSTEM: + case JT_CONST: + case JT_REF: + case JT_EQ_REF: + if (cache_level <=2 || (no_hashed_cache && no_bka_cache)) + goto no_join_cache; + if (tab->ref.is_access_triggered()) + goto no_join_cache; + + if (!tab->is_ref_for_hash_join() && !no_bka_cache) + { + flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT; + if (tab->table->covering_keys.is_set(tab->ref.key)) + flags|= HA_MRR_INDEX_ONLY; + rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20, + tab->ref.key_parts, + &bufsz, &flags, &cost); + } + + if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache || + tab->is_ref_for_hash_join() || + ((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6)) + { + if (!tab->hash_join_is_possible() || + tab->make_scan_filter()) + goto no_join_cache; + if (cache_level == 3) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BNLH(join, tab, prev_cache))) + { + tab->icp_other_tables_ok= FALSE; + return (4 - MY_TEST(!prev_cache)); + } + goto no_join_cache; + } + if (cache_level > 4 && no_bka_cache) + goto no_join_cache; + + if ((flags & HA_MRR_NO_ASSOCIATION) && + (cache_level <= 6 || no_hashed_cache)) + goto no_join_cache; + + if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL)) + { + if (cache_level <= 6 || no_hashed_cache) + { + if (cache_level == 5) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BKA(join, tab, flags, prev_cache))) + return (6 - MY_TEST(!prev_cache)); + goto no_join_cache; + } + else + { + if (cache_level == 7) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BKAH(join, tab, flags, prev_cache))) + { + tab->idx_cond_fact_out= FALSE; + return (8 - MY_TEST(!prev_cache)); + } + goto no_join_cache; + } + } + goto no_join_cache; + default : ; + } + +no_join_cache: + if (tab->type != JT_ALL && tab->is_ref_for_hash_join()) + { + tab->type= JT_ALL; + tab->ref.key_parts= 0; + } + revise_cache_usage(tab); + return 0; +} + + +/* + Check whether join buffers can be used to join tables of a join + + SYNOPSIS + check_join_cache_usage() + join join whose tables are to be checked + options options of the join + no_jbuf_after don't use join buffering after table with this number + (The tables are assumed to be numbered in + first_linear_tab(join, WITHOUT_CONST_TABLES), + next_linear_tab(join, WITH_CONST_TABLES) order). + + DESCRIPTION + For each table after the first non-constant table the function checks + whether the table can be joined using a join buffer. If the function decides + that a join buffer can be employed then it selects the most appropriate join + cache object that contains this join buffer whose level is not greater + than join_cache_level set for the join. To make this check the function + calls the function check_join_cache_usage for every non-constant table. + + NOTES + In some situations (e.g. for nested outer joins, for nested semi-joins) only + incremental buffers can be used. If it turns out that for some inner table + no join buffer can be used then any inner table of an outer/semi-join nest + cannot use join buffer. In the case when already chosen buffer must be + denied for a table the function recalls check_join_cache_usage() + starting from this table. The pointer to the table from which the check + has to be restarted is returned in join->return_val (see the description + of check_join_cache_usage). +*/ + +void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, + uint no_jbuf_after) +{ + JOIN_TAB *tab; + JOIN_TAB *prev_tab; + + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + tab->used_join_cache_level= join->max_allowed_join_cache_level; + } + + uint idx= join->const_tables; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { +restart: + tab->icp_other_tables_ok= TRUE; + tab->idx_cond_fact_out= TRUE; + + /* + Check if we have a preceding join_tab, as something that will feed us + records that we could buffer. We don't have it, if + - this is the first non-const table in the join order, + - this is the first table inside an SJM nest. + */ + prev_tab= tab - 1; + if (tab == join->join_tab + join->const_tables || + (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)) + prev_tab= NULL; + + switch (tab->type) { + case JT_SYSTEM: + case JT_CONST: + case JT_EQ_REF: + case JT_REF: + case JT_REF_OR_NULL: + case JT_ALL: + tab->used_join_cache_level= check_join_cache_usage(tab, options, + no_jbuf_after, + idx, + prev_tab); + tab->use_join_cache= MY_TEST(tab->used_join_cache_level); + /* + psergey-merge: todo: raise the question that this is really stupid that + we can first allocate a join buffer, then decide not to use it and free + it. + */ + if (join->return_tab) + { + tab= join->return_tab; + goto restart; + } + break; + default: + tab->used_join_cache_level= 0; + } + if (!tab->bush_children) + idx++; + } +} + +/** + Remove pushdown conditions that are already checked by the scan phase + of BNL/BNLH joins. + + @note + If the single-table condition for this table will be used by a + blocked join to pre-filter this table's rows, there is no need + to re-check the same single-table condition for each joined record. + + This method removes from JOIN_TAB::select_cond and JOIN_TAB::select::cond + all top-level conjuncts that also appear in in JOIN_TAB::cache_select::cond. +*/ + +void JOIN_TAB::remove_redundant_bnl_scan_conds() +{ + if (!(select_cond && cache_select && cache && + (cache->get_join_alg() == JOIN_CACHE::BNL_JOIN_ALG || + cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG))) + return; + + /* + select->cond is not processed separately. This method assumes it is always + the same as select_cond. + */ + if (select && select->cond != select_cond) + return; + + if (is_cond_and(select_cond)) + { + List_iterator pushed_cond_li(*((Item_cond*) select_cond)->argument_list()); + Item *pushed_item; + Item_cond_and *reduced_select_cond= new (join->thd->mem_root) + Item_cond_and(join->thd); + + if (is_cond_and(cache_select->cond)) + { + List_iterator scan_cond_li(*((Item_cond*) cache_select->cond)->argument_list()); + Item *scan_item; + while ((pushed_item= pushed_cond_li++)) + { + bool found_cond= false; + scan_cond_li.rewind(); + while ((scan_item= scan_cond_li++)) + { + if (pushed_item->eq(scan_item, 0)) + { + found_cond= true; + break; + } + } + if (!found_cond) + reduced_select_cond->add(pushed_item, join->thd->mem_root); + } + } + else + { + while ((pushed_item= pushed_cond_li++)) + { + if (!pushed_item->eq(cache_select->cond, 0)) + reduced_select_cond->add(pushed_item, join->thd->mem_root); + } + } + + /* + JOIN_CACHE::check_match uses JOIN_TAB::select->cond instead of + JOIN_TAB::select_cond. set_cond() sets both pointers. + */ + if (reduced_select_cond->argument_list()->is_empty()) + set_cond(NULL); + else if (reduced_select_cond->argument_list()->elements == 1) + set_cond(reduced_select_cond->argument_list()->head()); + else + { + reduced_select_cond->quick_fix_field(); + set_cond(reduced_select_cond); + } + } + else if (select_cond->eq(cache_select->cond, 0)) + set_cond(NULL); +} + + +/* + Plan refinement stage: do various setup things for the executor + + SYNOPSIS + make_join_readinfo() + join Join being processed + options Join's options (checking for SELECT_DESCRIBE, + SELECT_NO_JOIN_CACHE) + no_jbuf_after Don't use join buffering after table with this number. + + DESCRIPTION + Plan refinement stage: do various set ups for the executioner + - set up use of join buffering + - push index conditions + - increment relevant counters + - etc + + RETURN + FALSE - OK + TRUE - Out of memory +*/ + +static bool +make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) +{ + JOIN_TAB *tab; + uint i; + DBUG_ENTER("make_join_readinfo"); + + bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE)); + bool sorted= 1; + + join->complex_firstmatch_tables= table_map(0); + + if (!join->select_lex->sj_nests.is_empty() && + setup_semijoin_dups_elimination(join, options, no_jbuf_after)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + /* For const tables, set partial_join_cardinality to 1. */ + for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++) + tab->partial_join_cardinality= 1; + + JOIN_TAB *prev_tab= NULL; + i= join->const_tables; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + /* + The approximation below for partial join cardinality is not good because + - it does not take into account some pushdown predicates + - it does not differentiate between inner joins, outer joins and + semi-joins. + Later it should be improved. + */ + + if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) + prev_tab= NULL; + DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table); + + tab->partial_join_cardinality= join->best_positions[i].records_read * + (prev_tab? prev_tab->partial_join_cardinality : 1); + if (!tab->bush_children) + i++; + } + + check_join_cache_usage_for_tables(join, options, no_jbuf_after); + + JOIN_TAB *first_tab; + for (tab= first_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab->bush_children) + { + if (setup_sj_materialization_part2(tab)) + return TRUE; + } + + TABLE *table=tab->table; + uint jcl= tab->used_join_cache_level; + tab->read_record.table= table; + tab->read_record.unlock_row= rr_unlock_row; + tab->read_record.print_error= true; + tab->sorted= sorted; + sorted= 0; // only first must be sorted + + + /* + We should not set tab->next_select for the last table in the + SMJ-nest, as setup_sj_materialization() has already set it to + end_sj_materialize. + */ + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + } + + + if (tab->loosescan_match_tab) + { + if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab-> + loosescan_key_len))) + return TRUE; /* purecov: inspected */ + tab->sorted= TRUE; + } + table->status=STATUS_NO_RECORD; + pick_table_access_method (tab); + + if (jcl) + tab[-1].next_select=sub_select_cache; + + if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG) + tab->type= JT_HASH; + + switch (tab->type) { + case JT_SYSTEM: // Only happens with left join + case JT_CONST: // Only happens with left join + /* Only happens with outer joins */ + tab->read_first_record= tab->type == JT_SYSTEM ? join_read_system + : join_read_const; + tab->read_record.unlock_row= join_const_unlock_row; + if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) && + (!jcl || jcl > 4) && !tab->ref.is_access_triggered()) + push_index_cond(tab, tab->ref.key); + break; + case JT_EQ_REF: + tab->read_record.unlock_row= join_read_key_unlock_row; + /* fall through */ + if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) && + (!jcl || jcl > 4) && !tab->ref.is_access_triggered()) + push_index_cond(tab, tab->ref.key); + break; + case JT_REF_OR_NULL: + case JT_REF: + if (tab->select) + { + delete tab->select->quick; + tab->select->quick=0; + } + delete tab->quick; + tab->quick=0; + if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) && + (!jcl || jcl > 4) && !tab->ref.is_access_triggered()) + push_index_cond(tab, tab->ref.key); + break; + case JT_ALL: + case JT_HASH: + /* + If previous table use cache + If the incoming data set is already sorted don't use cache. + Also don't use cache if this is the first table in semi-join + materialization nest. + */ + /* These init changes read_record */ + if (tab->use_quick == 2) + { + join->thd->set_status_no_good_index_used(); + tab->read_first_record= join_init_quick_read_record; + if (statistics) + join->thd->inc_status_select_range_check(); + } + else + { + if (!tab->bush_children) + tab->read_first_record= join_init_read_record; + if (tab == first_tab) + { + if (tab->select && tab->select->quick) + { + if (statistics) + join->thd->inc_status_select_range(); + } + else + { + join->thd->set_status_no_index_used(); + if (statistics) + { + join->thd->inc_status_select_scan(); + join->thd->query_plan_flags|= QPLAN_FULL_SCAN; + } + } + } + else + { + if (tab->select && tab->select->quick) + { + if (statistics) + join->thd->inc_status_select_full_range_join(); + } + else + { + join->thd->set_status_no_index_used(); + if (statistics) + { + join->thd->inc_status_select_full_join(); + join->thd->query_plan_flags|= QPLAN_FULL_JOIN; + } + } + } + if (!table->no_keyread) + { + if (!(tab->select && tab->select->quick && + tab->select->quick->index != MAX_KEY && //not index_merge + table->covering_keys.is_set(tab->select->quick->index)) && + (!table->covering_keys.is_clear_all() && + !(tab->select && tab->select->quick))) + { // Only read index tree + if (tab->loosescan_match_tab) + tab->index= tab->loosescan_key; + else + { +#ifdef BAD_OPTIMIZATION + /* + It has turned out that the below change, while speeding things + up for disk-bound loads, slows them down for cases when the data + is in disk cache (see BUG#35850): + See bug #26447: "Using the clustered index for a table scan + is always faster than using a secondary index". + */ + if (table->file->pk_is_clustering_key(table->s->primary_key)) + tab->index= table->s->primary_key; + else +#endif + tab->index=find_shortest_key(table, & table->covering_keys); + } + tab->read_first_record= join_read_first; + /* Read with index_first / index_next */ + tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT; + } + } + if (tab->select && tab->select->quick && + tab->select->quick->index != MAX_KEY && + !tab->table->covering_keys.is_set(tab->select->quick->index)) + push_index_cond(tab, tab->select->quick->index); + } + break; + case JT_FT: + break; + /* purecov: begin deadcode */ + default: + DBUG_PRINT("error",("Table type %d found",tab->type)); + break; + case JT_UNKNOWN: + case JT_MAYBE_REF: + abort(); + /* purecov: end */ + } + + DBUG_EXECUTE("where", + char buff[256]; + String str(buff,sizeof(buff),system_charset_info); + str.length(0); + if (tab->table) + str.append(tab->table->alias); + else + str.append(STRING_WITH_LEN("")); + str.append(STRING_WITH_LEN(" final_pushdown_cond")); + print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY);); + } + uint n_top_tables= (uint)(join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start); + + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ + + /* + If a join buffer is used to join a table the ordering by an index + for the first non-constant table cannot be employed anymore. + */ + for (tab= join->join_tab + join->const_tables ; + tab != join->join_tab + n_top_tables ; tab++) + { + if (tab->use_join_cache) + { + JOIN_TAB *sort_by_tab= join->group && join->simple_group && + join->group_list ? + join->join_tab+join->const_tables : + join->get_sort_by_join_tab(); + /* + It could be that sort_by_tab==NULL, and the plan is to use filesort() + on the first table. + */ + if (join->order) + { + join->simple_order= 0; + join->need_tmp= 1; + } + + if (join->group && !join->group_optimized_away) + { + join->need_tmp= 1; + join->simple_group= 0; + } + + if (sort_by_tab) + { + join->need_tmp= 1; + join->simple_order= join->simple_group= 0; + if (sort_by_tab->type == JT_NEXT && + !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index)) + { + sort_by_tab->type= JT_ALL; + sort_by_tab->read_first_record= join_init_read_record; + } + else if (sort_by_tab->type == JT_HASH_NEXT && + !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index)) + { + sort_by_tab->type= JT_HASH; + sort_by_tab->read_first_record= join_init_read_record; + } + } + break; + } + } + + DBUG_RETURN(FALSE); +} + + +/** + Give error if we some tables are done with a full join. + + This is used by multi_table_update and multi_table_delete when running + in safe mode. + + @param join Join condition + + @retval + 0 ok + @retval + 1 Error (full join used) +*/ + +bool error_if_full_join(JOIN *join) +{ + for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab; + tab= next_top_level_tab(join, tab)) + { + if (tab->type == JT_ALL && (!tab->select || !tab->select->quick)) + { + my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, + ER_THD(join->thd, + ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0)); + return(1); + } + } + return(0); +} + + +/** + Build rowid filter. + + @retval + 0 ok + @retval + 1 Error, transaction should be rolled back +*/ + +bool JOIN_TAB::build_range_rowid_filter_if_needed() +{ + bool result= false; + if (rowid_filter && !is_rowid_filter_built) + { + /** + The same handler object (table->file) is used to build a filter + and to perfom a primary table access (by the main query). + + To estimate the time for filter building tracker should be changed + and after building of the filter has been finished it should be + switched back to the previos tracker. + */ + Exec_time_tracker *table_tracker= table->file->get_time_tracker(); + Rowid_filter_tracker *rowid_tracker= rowid_filter->get_tracker(); + table->file->set_time_tracker(rowid_tracker->get_time_tracker()); + rowid_tracker->start_tracking(join->thd); + Rowid_filter::build_return_code build_rc= rowid_filter->build(); + if (build_rc == Rowid_filter::SUCCESS) + is_rowid_filter_built= true; + else + { + delete rowid_filter; + rowid_filter= 0; + } + rowid_tracker->stop_tracking(join->thd); + table->file->set_time_tracker(table_tracker); + result= (build_rc == Rowid_filter::FATAL_ERROR); + } + return result; +} + + +/** + cleanup JOIN_TAB. + + DESCRIPTION + This is invoked when we've finished all join executions. +*/ + +void JOIN_TAB::cleanup() +{ + DBUG_ENTER("JOIN_TAB::cleanup"); + + DBUG_PRINT("enter", ("tab: %p table %s.%s", + this, + (table ? table->s->db.str : "?"), + (table ? table->s->table_name.str : "?"))); + delete select; + select= 0; + delete quick; + quick= 0; + if (rowid_filter) + { + delete rowid_filter; + rowid_filter= 0; + } + if (cache) + { + cache->free(); + cache= 0; + } + limit= 0; + // Free select that was created for filesort outside of create_sort_index + if (filesort && filesort->select && !filesort->own_select) + delete filesort->select; + delete filesort; + filesort= NULL; + /* Skip non-existing derived tables/views result tables */ + if (table && + (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created())) + { + table->file->ha_end_keyread(); + table->file->ha_index_or_rnd_end(); + } + if (table) + { + table->file->ha_end_keyread(); + if (type == JT_FT) + table->file->ha_ft_end(); + else + table->file->ha_index_or_rnd_end(); + preread_init_done= FALSE; + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab) + { + /* + Set this to NULL so that cleanup_empty_jtbm_semi_joins() doesn't + attempt to make another free_tmp_table call. + */ + table->pos_in_table_list->table= NULL; + free_tmp_table(join->thd, table); + } + else + { + TABLE_LIST *tmp= table->pos_in_table_list; + end_read_record(&read_record); + tmp->jtbm_subselect->cleanup(); + /* + The above call freed the materializedd temptable. Set it to NULL so + that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked + multiple times (it may be) + */ + tmp->table= NULL; + } + table= NULL; + DBUG_VOID_RETURN; + } + /* + We need to reset this for next select + (Tested in part_of_refkey) + */ + table->reginfo.join_tab= 0; + } + end_read_record(&read_record); + explain_plan= NULL; + DBUG_VOID_RETURN; +} + + +/** + Estimate the time to get rows of the joined table +*/ + +double JOIN_TAB::scan_time() +{ + double res; + if (table->is_created()) + { + if (table->is_filled_at_execution()) + { + get_delayed_table_estimates(table, &records, &read_time, + &startup_cost); + found_records= records; + table->opt_range_condition_rows= records; + } + else + { + found_records= records= table->stat_records(); + read_time= table->file->scan_time(); + /* + table->opt_range_condition_rows has already been set to + table->file->stats.records + */ + } + res= read_time; + } + else + { + found_records= records=table->stat_records(); + read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub + res= read_time; + } + return res; +} + + +/** + Estimate the number of rows that a an access method will read from a table. + + @todo: why not use JOIN_TAB::found_records +*/ + +ha_rows JOIN_TAB::get_examined_rows() +{ + double examined_rows; + const SQL_SELECT *sel= get_sql_select(); + + if (sel && sel->quick && use_quick != 2) + examined_rows= (double)sel->quick->records; + else if (type == JT_NEXT || type == JT_ALL || + type == JT_HASH || type ==JT_HASH_NEXT) + { + if (limit) + { + /* + @todo This estimate is wrong, a LIMIT query may examine much more rows + than the LIMIT itself. + */ + examined_rows= (double)limit; + } + else + { + if (table->is_filled_at_execution()) + examined_rows= (double)records; + else + { + /* + handler->info(HA_STATUS_VARIABLE) has been called in + make_join_statistics() + */ + examined_rows= (double)table->stat_records(); + } + } + } + else + examined_rows= records_read; + + if (examined_rows >= (double) HA_ROWS_MAX) + return HA_ROWS_MAX; + return (ha_rows) examined_rows; +} + + +/** + Initialize the join_tab before reading. + Currently only derived table/view materialization is done here. + + TODO: consider moving this together with join_tab_execution_startup +*/ +bool JOIN_TAB::preread_init() +{ + TABLE_LIST *derived= table->pos_in_table_list; + DBUG_ENTER("JOIN_TAB::preread_init"); + + if (!derived || !derived->is_materialized_derived()) + { + preread_init_done= TRUE; + DBUG_RETURN(FALSE); + } + + /* Materialize derived table/view. */ + if ((!derived->get_unit()->executed || + derived->is_recursive_with_table() || + derived->get_unit()->uncacheable) && + mysql_handle_single_derived(join->thd->lex, + derived, DT_CREATE | DT_FILL)) + DBUG_RETURN(TRUE); + + if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) || + derived->is_nonrecursive_derived_with_rec_ref() || + is_split_derived) + preread_init_done= TRUE; + if (select && select->quick) + select->quick->replace_handler(table->file); + + DBUG_EXECUTE_IF("show_explain_probe_join_tab_preread", + if (dbug_user_var_equals_int(join->thd, + "show_explain_probe_select_id", + join->select_lex->select_number)) + dbug_serve_apcs(join->thd, 1); + ); + + /* init ftfuns for just initialized derived table */ + if (table->fulltext_searched) + if (init_ftfuncs(join->thd, join->select_lex, MY_TEST(join->order))) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +bool JOIN_TAB::pfs_batch_update(JOIN *join) +{ + /* + Use PFS batch mode if + 1. tab is an inner-most table, or + 2. will read more than one row (not eq_ref or const access type) + 3. no subqueries + */ + + return join->join_tab + join->table_count - 1 == this && // 1 + type != JT_EQ_REF && type != JT_CONST && type != JT_SYSTEM && // 2 + (!select_cond || !select_cond->with_subquery()); // 3 +} + + +/** + Build a TABLE_REF structure for index lookup in the temporary table + + @param thd Thread handle + @param tmp_key The temporary table key + @param it The iterator of items for lookup in the key + @param skip Number of fields from the beginning to skip + + @details + Build TABLE_REF object for lookup in the key 'tmp_key' using items + accessible via item iterator 'it'. + + @retval TRUE Error + @retval FALSE OK +*/ + +bool TABLE_REF::tmp_table_index_lookup_init(THD *thd, + KEY *tmp_key, + Item_iterator &it, + bool value, + uint skip) +{ + uint tmp_key_parts= tmp_key->user_defined_key_parts; + uint i; + DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init"); + + key= 0; /* The only temp table index. */ + key_length= tmp_key->key_length; + if (!(key_buff= + (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || + !(key_copy= + (store_key**) thd->alloc((sizeof(store_key*) * + (tmp_key_parts + 1)))) || + !(items= + (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) + DBUG_RETURN(TRUE); + + key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length); + + KEY_PART_INFO *cur_key_part= tmp_key->key_part; + store_key **ref_key= key_copy; + uchar *cur_ref_buff= key_buff; + + it.open(); + for (i= 0; i < skip; i++) it.next(); + for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) + { + Item *item= it.next(); + DBUG_ASSERT(item); + items[i]= item; + int null_count= MY_TEST(cur_key_part->field->real_maybe_null()); + *ref_key= new store_key_item(thd, cur_key_part->field, + /* TIMOUR: + the NULL byte is taken into account in + cur_key_part->store_length, so instead of + cur_ref_buff + MY_TEST(maybe_null), we could + use that information instead. + */ + cur_ref_buff + null_count, + null_count ? cur_ref_buff : 0, + cur_key_part->length, items[i], value); + cur_ref_buff+= cur_key_part->store_length; + } + *ref_key= NULL; /* End marker. */ + key_err= 1; + key_parts= tmp_key_parts; + DBUG_RETURN(FALSE); +} + + +/* + Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates + between ref access and full table scan) +*/ + +bool TABLE_REF::is_access_triggered() +{ + for (uint i = 0; i < key_parts; i++) + { + if (cond_guards[i]) + return TRUE; + } + return FALSE; +} + + +/** + Partially cleanup JOIN after it has executed: close index or rnd read + (table cursors), free quick selects. + + This function is called in the end of execution of a JOIN, before the used + tables are unlocked and closed. + + For a join that is resolved using a temporary table, the first sweep is + performed against actual tables and an intermediate result is inserted + into the temprorary table. + The last sweep is performed against the temporary table. Therefore, + the base tables and associated buffers used to fill the temporary table + are no longer needed, and this function is called to free them. + + For a join that is performed without a temporary table, this function + is called after all rows are sent, but before EOF packet is sent. + + For a simple SELECT with no subqueries this function performs a full + cleanup of the JOIN and calls mysql_unlock_read_tables to free used base + tables. + + If a JOIN is executed for a subquery or if it has a subquery, we can't + do the full cleanup and need to do a partial cleanup only. + - If a JOIN is not the top level join, we must not unlock the tables + because the outer select may not have been evaluated yet, and we + can't unlock only selected tables of a query. + - Additionally, if this JOIN corresponds to a correlated subquery, we + should not free quick selects and join buffers because they will be + needed for the next execution of the correlated subquery. + - However, if this is a JOIN for a [sub]select, which is not + a correlated subquery itself, but has subqueries, we can free it + fully and also free JOINs of all its subqueries. The exception + is a subquery in SELECT list, e.g: @n + SELECT a, (select MY_MAX(b) from t1) group by c @n + This subquery will not be evaluated at first sweep and its value will + not be inserted into the temporary table. Instead, it's evaluated + when selecting from the temporary table. Therefore, it can't be freed + here even though it's not correlated. + + @todo + Unlock tables even if the join isn't top level select in the tree +*/ + +void JOIN::join_free() +{ + SELECT_LEX_UNIT *tmp_unit; + SELECT_LEX *sl; + /* + Optimization: if not EXPLAIN and we are done with the JOIN, + free all tables. + */ + bool full= !(select_lex->uncacheable) && !(thd->lex->describe); + bool can_unlock= full; + DBUG_ENTER("JOIN::join_free"); + + cleanup(full); + + for (tmp_unit= select_lex->first_inner_unit(); + tmp_unit; + tmp_unit= tmp_unit->next_unit()) + { + if (tmp_unit->with_element && tmp_unit->with_element->is_recursive) + continue; + for (sl= tmp_unit->first_select(); sl; sl= sl->next_select()) + { + Item_subselect *subselect= sl->master_unit()->item; + bool full_local= full && (!subselect || subselect->is_evaluated()); + /* + If this join is evaluated, we can fully clean it up and clean up all + its underlying joins even if they are correlated -- they will not be + used any more anyway. + If this join is not yet evaluated, we still must clean it up to + close its table cursors -- it may never get evaluated, as in case of + ... HAVING FALSE OR a IN (SELECT ...)) + but all table cursors must be closed before the unlock. + */ + sl->cleanup_all_joins(full_local); + /* Can't unlock if at least one JOIN is still needed */ + can_unlock= can_unlock && full_local; + } + } + /* + We are not using tables anymore + Unlock all tables. We may be in an INSERT .... SELECT statement. + */ + if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode && + !(select_options & SELECT_NO_UNLOCK) && + !select_lex->subquery_in_having && + (select_lex == (thd->lex->unit.fake_select_lex ? + thd->lex->unit.fake_select_lex : + thd->lex->first_select_lex()))) + { + /* + TODO: unlock tables even if the join isn't top level select in the + tree. + */ + mysql_unlock_read_tables(thd, lock); // Don't free join->lock + lock= 0; + } + + DBUG_VOID_RETURN; +} + + +/** + Free resources of given join. + + @param full true if we should free all resources, call with full==1 + should be last, before it this function can be called with + full==0 + + @note + With subquery this function definitely will be called several times, + but even for simple query it can be called several times. +*/ + +void JOIN::cleanup(bool full) +{ + DBUG_ENTER("JOIN::cleanup"); + DBUG_PRINT("enter", ("select: %d (%p) join: %p full: %u", + select_lex->select_number, select_lex, this, + (uint) full)); + + if (full) + have_query_plan= QEP_DELETED; + + if (original_join_tab) + { + /* Free the original optimized join created for the group_by_handler */ + join_tab= original_join_tab; + original_join_tab= 0; + } + + if (join_tab) + { + JOIN_TAB *tab; + + if (full) + { + /* + Call cleanup() on join tabs used by the join optimization + (join->join_tab may now be pointing to result of make_simple_join + reading from the temporary table) + + We also need to check table_count to handle various degenerate joins + w/o tables: they don't have some members initialized and + WALK_OPTIMIZATION_TABS may not work correctly for them. + */ + if (top_join_tab_count && tables_list) + { + for (tab= first_breadth_first_tab(); tab; + tab= next_breadth_first_tab(first_breadth_first_tab(), + top_join_tab_count, tab)) + { + tab->cleanup(); + delete tab->filesort_result; + tab->filesort_result= NULL; + } + } + cleaned= true; + //psergey2: added (Q: why not in the above loop?) + { + JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); + for (uint i= 0; i < aggr_tables; i++, curr_tab++) + { + if (curr_tab->aggr) + { + free_tmp_table(thd, curr_tab->table); + curr_tab->table= NULL; + delete curr_tab->tmp_table_param; + curr_tab->tmp_table_param= NULL; + curr_tab->aggr= NULL; + + delete curr_tab->filesort_result; + curr_tab->filesort_result= NULL; + } + } + aggr_tables= 0; // psergey3 + } + } + else + { + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + tab->partial_cleanup(); + } + } + } + if (full) + { + cleanup_empty_jtbm_semi_joins(this, join_list); + + // Run Cached_item DTORs! + group_fields.delete_elements(); + order_fields.delete_elements(); + + /* + We can't call delete_elements() on copy_funcs as this will cause + problems in free_elements() as some of the elements are then deleted. + */ + tmp_table_param.copy_funcs.empty(); + /* + If we have tmp_join and 'this' JOIN is not tmp_join and + tmp_table_param.copy_field's of them are equal then we have to remove + pointer to tmp_table_param.copy_field from tmp_join, because it will + be removed in tmp_table_param.cleanup(). + */ + tmp_table_param.cleanup(); + + delete pushdown_query; + pushdown_query= 0; + + if (!join_tab) + { + List_iterator li(*join_list); + TABLE_LIST *table_ref; + while ((table_ref= li++)) + { + if (table_ref->table && + table_ref->jtbm_subselect && + table_ref->jtbm_subselect->is_jtbm_const_tab) + { + free_tmp_table(thd, table_ref->table); + table_ref->table= NULL; + } + } + } + free_pushdown_handlers(*join_list); + } + /* Restore ref array to original state */ + if (current_ref_ptrs != items0) + { + set_items_ref_array(items0); + set_group_rpa= false; + } + DBUG_VOID_RETURN; +} + +/** + Clean up all derived pushdown handlers in this join. + + @detail + Note that dt_handler is picked at the prepare stage (as opposed + to optimization stage where one could expect this). + Because of that, we have to do cleanups in this function that is called + from JOIN::cleanup() and not in JOIN_TAB::cleanup. + */ +void JOIN::free_pushdown_handlers(List& join_list) +{ + List_iterator li(join_list); + TABLE_LIST *table_ref; + while ((table_ref= li++)) + { + if (table_ref->nested_join) + free_pushdown_handlers(table_ref->nested_join->join_list); + if (table_ref->pushdown_derived) + { + delete table_ref->pushdown_derived; + table_ref->pushdown_derived= NULL; + } + delete table_ref->dt_handler; + table_ref->dt_handler= NULL; + } +} + +/** + Remove the following expressions from ORDER BY and GROUP BY: + Constant expressions @n + Expression that only uses tables that are of type EQ_REF and the reference + is in the ORDER list or if all refereed tables are of the above type. + + In the following, the X field can be removed: + @code + SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X + SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X + @endcode + + These can't be optimized: + @code + SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a + SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c + SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a + @endcode + + TODO: this function checks ORDER::used, which can only have a value of 0. +*/ + +static bool +eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab) +{ + if (tab->cached_eq_ref_table) // If cached + return tab->eq_ref_table; + tab->cached_eq_ref_table=1; + /* We can skip const tables only if not an outer table */ + if (tab->type == JT_CONST && !tab->first_inner) + return (tab->eq_ref_table=1); /* purecov: inspected */ + if (tab->type != JT_EQ_REF || tab->table->maybe_null) + return (tab->eq_ref_table=0); // We must use this + Item **ref_item=tab->ref.items; + Item **end=ref_item+tab->ref.key_parts; + uint found=0; + table_map map=tab->table->map; + + for (; ref_item != end ; ref_item++) + { + if (! (*ref_item)->const_item()) + { // Not a const ref + ORDER *order; + for (order=start_order ; order ; order=order->next) + { + if ((*ref_item)->eq(order->item[0],0)) + break; + } + if (order) + { + if (!(order->used & map)) + { + found++; + order->used|= map; + } + continue; // Used in ORDER BY + } + if (!only_eq_ref_tables(join,start_order, (*ref_item)->used_tables())) + return (tab->eq_ref_table=0); + } + } + /* Check that there was no reference to table before sort order */ + for (; found && start_order ; start_order=start_order->next) + { + if (start_order->used & map) + { + found--; + continue; + } + if (start_order->depend_map & map) + return (tab->eq_ref_table=0); + } + return tab->eq_ref_table=1; +} + + +static bool +only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables) +{ + tables&= ~PSEUDO_TABLE_BITS; + for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1) + { + if (tables & 1 && !eq_ref_table(join, order, *tab)) + return 0; + } + return 1; +} + + +/** Update the dependency map for the tables. */ + +static void update_depend_map(JOIN *join) +{ + JOIN_TAB *join_tab; + for (join_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITH_CONST_TABLES); + join_tab; + join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS)) + { + TABLE_REF *ref= &join_tab->ref; + table_map depend_map=0; + Item **item=ref->items; + uint i; + for (i=0 ; i < ref->key_parts ; i++,item++) + depend_map|=(*item)->used_tables(); + depend_map&= ~OUTER_REF_TABLE_BIT; + ref->depend_map= depend_map; + for (JOIN_TAB **tab=join->map2table; + depend_map ; + tab++,depend_map>>=1 ) + { + if (depend_map & 1) + ref->depend_map|=(*tab)->ref.depend_map; + } + } +} + + +/** Update the dependency map for the sort order. */ + +static void update_depend_map_for_order(JOIN *join, ORDER *order) +{ + for (; order ; order=order->next) + { + table_map depend_map; + order->item[0]->update_used_tables(); + order->depend_map=depend_map=order->item[0]->used_tables(); + order->used= 0; + // Not item_sum(), RAND() and no reference to table outside of sub select + if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) + && !order->item[0]->with_sum_func() && + join->join_tab) + { + for (JOIN_TAB **tab=join->map2table; + depend_map ; + tab++, depend_map>>=1) + { + if (depend_map & 1) + order->depend_map|=(*tab)->ref.depend_map; + } + } + } +} + + +/** + Remove all constants from ORDER and check if ORDER only contains simple + expressions. + + We also remove all duplicate expressions, keeping only the first one. + + simple_order is set to 1 if sort_order only uses fields from head table + and the head table is not a LEFT JOIN table. + + @param join Join handler + @param first_order List of SORT or GROUP order + @param cond WHERE statement + @param change_list Set to 1 if we should remove things from list. + If this is not set, then only simple_order is + calculated. This is not set when we + are using ROLLUP + @param simple_order Set to 1 if we are only using simple + expressions. + + @return + Returns new sort order +*/ + +static ORDER * +remove_const(JOIN *join,ORDER *first_order, COND *cond, + bool change_list, bool *simple_order) +{ + /* + We can't do ORDER BY using filesort if the select list contains a non + deterministic value like RAND() or ROWNUM(). + For example: + SELECT a,ROWNUM() FROM t1 ORDER BY a; + + If we would first sort the table 't1', the ROWNUM() column would be + generated during end_send() and the order would be wrong. + + Previously we had here also a test of ROLLUP: + 'join->rollup.state == ROLLUP::STATE_NONE' + + I deleted this because the ROLLUP was never enforced because of a + bug where the inital value of simple_order was ignored. Having + ROLLUP tested now when the code is fixed, causes many test failure + and some wrong results so better to leave the code as it was + related to ROLLUP. + */ + *simple_order= !join->select_lex->rownum_in_field_list; + if (join->only_const_tables()) + return change_list ? 0 : first_order; // No need to sort + + ORDER *order,**prev_ptr, *tmp_order; + table_map UNINIT_VAR(first_table); /* protected by first_is_base_table */ + table_map not_const_tables= ~join->const_table_map; + table_map ref; + bool first_is_base_table= FALSE; + DBUG_ENTER("remove_const"); + + /* + Join tab is set after make_join_statistics() has been called. + In case of one table with GROUP BY this function is called before + join_tab is set for the GROUP_BY expression + */ + if (join->join_tab) + { + if (join->join_tab[join->const_tables].table) + { + first_table= join->join_tab[join->const_tables].table->map; + first_is_base_table= TRUE; + } + + /* + Cleanup to avoid interference of calls of this function for + ORDER BY and GROUP BY + */ + for (JOIN_TAB *tab= join->join_tab + join->const_tables; + tab < join->join_tab + join->top_join_tab_count; + tab++) + tab->cached_eq_ref_table= FALSE; + + JOIN_TAB *head= join->join_tab + join->const_tables; + *simple_order&= head->on_expr_ref[0] == NULL; + if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS) + { + uint u1, u2, u3, u4; + /* + normally the condition is (see filesort_use_addons()) + + length + sortlength <= max_length_for_sort_data + + but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative + is to use a temporary table, which is rather expensive. + + TODO proper cost estimations + */ + *simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3, &u4); + } + } + else + { + first_is_base_table= FALSE; + first_table= 0; // Not used, for gcc + } + + prev_ptr= &first_order; + + /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */ + + update_depend_map_for_order(join, first_order); + for (order=first_order; order ; order=order->next) + { + table_map order_tables=order->item[0]->used_tables(); + if (order->item[0]->with_sum_func() || + order->item[0]->with_window_func() || + /* + If the outer table of an outer join is const (either by itself or + after applying WHERE condition), grouping on a field from such a + table will be optimized away and filesort without temporary table + will be used unless we prevent that now. Filesort is not fit to + handle joins and the join condition is not applied. We can't detect + the case without an expensive test, however, so we force temporary + table for all queries containing more than one table, ROLLUP, and an + outer join. + */ + (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED && + join->outer_join)) + *simple_order=0; // Must do a temp table to sort + else if (!(order_tables & not_const_tables)) + { + if (order->item[0]->with_subquery()) + { + /* + Delay the evaluation of constant ORDER and/or GROUP expressions that + contain subqueries until the execution phase. + */ + join->exec_const_order_group_cond.push_back(order->item[0], + join->thd->mem_root); + } + DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); + continue; + } + else + { + if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)) + *simple_order=0; + else + { + if (cond && const_expression_in_where(cond,order->item[0])) + { + DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); + continue; + } + if (first_is_base_table && + (ref=order_tables & (not_const_tables ^ first_table))) + { + if (!(order_tables & first_table) && + only_eq_ref_tables(join,first_order, ref)) + { + DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); + continue; + } + /* + UseMultipleEqualitiesToRemoveTempTable: + Can use multiple-equalities here to check that ORDER BY columns + can be used without tmp. table. + */ + bool can_subst_to_first_table= false; + if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && + first_is_base_table && + order->item[0]->real_item()->type() == Item::FIELD_ITEM && + join->cond_equal) + { + table_map first_table_bit= + join->join_tab[join->const_tables].table->map; + + Item *item= order->item[0]; + + /* + TODO: equality substitution in the context of ORDER BY is + sometimes allowed when it is not allowed in the general case. + + We make the below call for its side effect: it will locate the + multiple equality the item belongs to and set item->item_equal + accordingly. + */ + Item *res= item->propagate_equal_fields(join->thd, + Value_source:: + Context_identity(), + join->cond_equal); + Item_equal *item_eq; + if ((item_eq= res->get_item_equal())) + { + Item *first= item_eq->get_first(NO_PARTICULAR_TAB, NULL); + if (first->const_item() || first->used_tables() == + first_table_bit) + { + can_subst_to_first_table= true; + } + } + } + + if (!can_subst_to_first_table) + { + *simple_order=0; // Must do a temp table to sort + } + } + } + } + /* Remove ORDER BY entries that we have seen before */ + for (tmp_order= first_order; + tmp_order != order; + tmp_order= tmp_order->next) + { + if (tmp_order->item[0]->eq(order->item[0],1)) + break; + } + if (tmp_order != order) + continue; // Duplicate order by. Remove + + if (change_list) + *prev_ptr= order; // use this entry + prev_ptr= &order->next; + } + if (change_list) + *prev_ptr=0; + if (prev_ptr == &first_order) // Nothing to sort/group + *simple_order=1; +#ifndef DBUG_OFF + if (unlikely(join->thd->is_error())) + DBUG_PRINT("error",("Error from remove_const")); +#endif + DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order)); + DBUG_RETURN(first_order); +} + + +/** + Filter out ORDER items those are equal to constants in WHERE + + This function is a limited version of remove_const() for use + with non-JOIN statements (i.e. single-table UPDATE and DELETE). + + + @param order Linked list of ORDER BY arguments + @param cond WHERE expression + + @return pointer to new filtered ORDER list or NULL if whole list eliminated + + @note + This function overwrites input order list. +*/ + +ORDER *simple_remove_const(ORDER *order, COND *where) +{ + if (!order || !where) + return order; + + ORDER *first= NULL, *prev= NULL; + for (; order; order= order->next) + { + DBUG_ASSERT(!order->item[0]->with_sum_func()); // should never happen + if (!const_expression_in_where(where, order->item[0])) + { + if (!first) + first= order; + if (prev) + prev->next= order; + prev= order; + } + } + if (prev) + prev->next= NULL; + return first; +} + + +/* + Set all fields in the table to have a null value + + @param tables Table list +*/ + +static void make_tables_null_complemented(List *tables) +{ + List_iterator ti(*tables); + TABLE_LIST *table; + while ((table= ti++)) + { + /* + Don't touch semi-join materialization tables, as the a join_free() + call may have freed them (and HAVING clause can't have references to + them anyway). + */ + if (!table->is_jtbm()) + { + TABLE *tbl= table->table; + mark_as_null_row(tbl); // Set fields to NULL + } + } +} + + +static int +return_zero_rows(JOIN *join, select_result *result, List *tables, + List *fields, bool send_row, ulonglong select_options, + const char *info, Item *having, List *all_fields) +{ + DBUG_ENTER("return_zero_rows"); + + if (select_options & SELECT_DESCRIBE) + { + select_describe(join, FALSE, FALSE, FALSE, info); + DBUG_RETURN(0); + } + + if (send_row) + { + /* + Set all tables to have NULL row. This is needed as we will be evaluating + HAVING condition. + */ + make_tables_null_complemented(tables); + + List_iterator_fast it(*all_fields); + Item *item; + /* + Inform all items (especially aggregating) to calculate HAVING correctly, + also we will need it for sending results. + */ + join->no_rows_in_result_called= 1; + while ((item= it++)) + item->no_rows_in_result(); + if (having && having->val_int() == 0) + send_row=0; + } + + /* Update results for FOUND_ROWS */ + if (!join->send_row_on_empty_set()) + { + join->thd->set_examined_row_count(0); + join->thd->limit_found_rows= 0; + } + + if (!(result->send_result_set_metadata(*fields, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))) + { + bool send_error= FALSE; + if (send_row) + send_error= result->send_data_with_check(*fields, join->unit, 0) > 0; + if (likely(!send_error)) + result->send_eof(); // Should be safe + } + /* + JOIN::join_free() must be called after the virtual method + select::send_result_set_metadata() returned control since + implementation of this method could use data strutcures + that are released by the method JOIN::join_free(). + */ + join->join_free(); + + DBUG_RETURN(0); +} + +/** + Reset table rows to contain a null-complement row (all fields are null) + + Used only in JOIN::clear() and in do_select() if there where no matching rows. + + @param join JOIN + @param cleared_tables Used to mark all cleared tables in the map. Needed for + unclear_tables() to know which tables to restore to + their original state. +*/ + +static void clear_tables(JOIN *join, table_map *cleared_tables) +{ + DBUG_ASSERT(cleared_tables); + for (uint i= 0 ; i < join->table_count ; i++) + { + TABLE *table= join->table[i]; + + if (table->null_row) + continue; // Nothing more to do + (*cleared_tables)|= (((table_map) 1) << i); + if (table->s->null_bytes) + { + /* + Remember null bits for the record so that we can restore the + original const record in unclear_tables() + */ + memcpy(table->record[1], table->null_flags, table->s->null_bytes); + } + mark_as_null_row(table); // All fields are NULL + } +} + + +/** + Reverse null marking for tables and restore null bits. + This return the tables to the state of before clear_tables(). + + We have to do this because the tables may be re-used in a sub query + and the subquery will assume that the const tables contains the original + data before clear_tables(). +*/ + +static void unclear_tables(JOIN *join, table_map *cleared_tables) +{ + for (uint i= 0 ; i < join->table_count ; i++) + { + if ((*cleared_tables) & (((table_map) 1) << i)) + { + TABLE *table= join->table[i]; + if (table->s->null_bytes) + memcpy(table->null_flags, table->record[1], table->s->null_bytes); + unmark_as_null_row(table); + } + } +} + + +/***************************************************************************** + Make som simple condition optimization: + If there is a test 'field = const' change all refs to 'field' to 'const' + Remove all dummy tests 'item = item', 'const op const'. + Remove all 'item is NULL', when item can never be null! + item->marker should be 0 for all items on entry + Return in cond_value FALSE if condition is impossible (1 = 2) +*****************************************************************************/ + +class COND_CMP :public ilink { +public: + static void *operator new(size_t size, MEM_ROOT *mem_root) + { + return alloc_root(mem_root, size); + } + static void operator delete(void *ptr __attribute__((unused)), + size_t size __attribute__((unused))) + { TRASH_FREE(ptr, size); } + + static void operator delete(void *, MEM_ROOT*) {} + + Item *and_level; + Item_bool_func2 *cmp_func; + COND_CMP(Item *a,Item_bool_func2 *b) :and_level(a),cmp_func(b) {} +}; + +/** + Find the multiple equality predicate containing a field. + + The function retrieves the multiple equalities accessed through + the con_equal structure from current level and up looking for + an equality containing field. It stops retrieval as soon as the equality + is found and set up inherited_fl to TRUE if it's found on upper levels. + + @param cond_equal multiple equalities to search in + @param field field to look for + @param[out] inherited_fl set up to TRUE if multiple equality is found + on upper levels (not on current level of + cond_equal) + + @return + - Item_equal for the found multiple equality predicate if a success; + - NULL otherwise. +*/ + +Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, + bool *inherited_fl) +{ + Item_equal *item= 0; + bool in_upper_level= FALSE; + while (cond_equal) + { + List_iterator_fast li(cond_equal->current_level); + while ((item= li++)) + { + if (item->contains(field)) + goto finish; + } + in_upper_level= TRUE; + cond_equal= cond_equal->upper_levels; + } + in_upper_level= FALSE; +finish: + *inherited_fl= in_upper_level; + return item; +} + + +/** + Check whether an equality can be used to build multiple equalities. + + This function first checks whether the equality (left_item=right_item) + is a simple equality i.e. the one that equates a field with another field + or a constant (field=field_item or field=const_item). + If this is the case the function looks for a multiple equality + in the lists referenced directly or indirectly by cond_equal inferring + the given simple equality. If it doesn't find any, it builds a multiple + equality that covers the predicate, i.e. the predicate can be inferred + from this multiple equality. + The built multiple equality could be obtained in such a way: + create a binary multiple equality equivalent to the predicate, then + merge it, if possible, with one of old multiple equalities. + This guarantees that the set of multiple equalities covering equality + predicates will be minimal. + + EXAMPLE: + For the where condition + @code + WHERE a=b AND b=c AND + (b=2 OR f=e) + @endcode + the check_equality will be called for the following equality + predicates a=b, b=c, b=2 and f=e. + - For a=b it will be called with *cond_equal=(0,[]) and will transform + *cond_equal into (0,[Item_equal(a,b)]). + - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)]) + and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]). + - For b=2 it will be called with *cond_equal=(ptr(CE),[]) + and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]). + - For f=e it will be called with *cond_equal=(ptr(CE), []) + and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]). + + @note + Now only fields that have the same type definitions (verified by + the Field::eq_def method) are placed to the same multiple equalities. + Because of this some equality predicates are not eliminated and + can be used in the constant propagation procedure. + We could weeken the equlity test as soon as at least one of the + equal fields is to be equal to a constant. It would require a + more complicated implementation: we would have to store, in + general case, its own constant for each fields from the multiple + equality. But at the same time it would allow us to get rid + of constant propagation completely: it would be done by the call + to cond->build_equal_items(). + + + The implementation does not follow exactly the above rules to + build a new multiple equality for the equality predicate. + If it processes the equality of the form field1=field2, it + looks for multiple equalities me1 containig field1 and me2 containing + field2. If only one of them is found the fuction expands it with + the lacking field. If multiple equalities for both fields are + found they are merged. If both searches fail a new multiple equality + containing just field1 and field2 is added to the existing + multiple equalities. + If the function processes the predicate of the form field1=const, + it looks for a multiple equality containing field1. If found, the + function checks the constant of the multiple equality. If the value + is unknown, it is setup to const. Otherwise the value is compared with + const and the evaluation of the equality predicate is performed. + When expanding/merging equality predicates from the upper levels + the function first copies them for the current level. It looks + acceptable, as this happens rarely. The implementation without + copying would be much more complicated. + + For description of how equality propagation works with SJM nests, grep + for EqualityPropagationAndSjmNests. + + @param left_item left term of the quality to be checked + @param right_item right term of the equality to be checked + @param item equality item if the equality originates from a condition + predicate, 0 if the equality is the result of row + elimination + @param cond_equal multiple equalities that must hold together with the + equality + + @retval + TRUE if the predicate is a simple equality predicate to be used + for building multiple equalities + @retval + FALSE otherwise +*/ + +bool check_simple_equality(THD *thd, const Item::Context &ctx, + Item *left_item, Item *right_item, + COND_EQUAL *cond_equal) +{ + Item *orig_left_item= left_item; + Item *orig_right_item= right_item; + if (left_item->type() == Item::REF_ITEM) + { + Item_ref::Ref_Type left_ref= ((Item_ref*)left_item)->ref_type(); + + if (left_ref == Item_ref::VIEW_REF || + left_ref == Item_ref::REF) + { + if (((Item_ref*)left_item)->get_depended_from()) + return FALSE; + if (left_ref == Item_ref::VIEW_REF && + ((Item_direct_view_ref*)left_item)->get_null_ref_table() != + NO_NULL_TABLE && + !left_item->real_item()->used_tables()) + return FALSE; + left_item= left_item->real_item(); + } + } + if (right_item->type() == Item::REF_ITEM) + { + Item_ref::Ref_Type right_ref= ((Item_ref*)right_item)->ref_type(); + if (right_ref == Item_ref::VIEW_REF || + (right_ref == Item_ref::REF)) + { + if (((Item_ref*)right_item)->get_depended_from()) + return FALSE; + if (right_ref == Item_ref::VIEW_REF && + ((Item_direct_view_ref*)right_item)->get_null_ref_table() != + NO_NULL_TABLE && + !right_item->real_item()->used_tables()) + return FALSE; + right_item= right_item->real_item(); + } + } + if (left_item->type() == Item::FIELD_ITEM && + right_item->type() == Item::FIELD_ITEM && + !((Item_field*)left_item)->get_depended_from() && + !((Item_field*)right_item)->get_depended_from()) + { + /* The predicate the form field1=field2 is processed */ + + Field *left_field= ((Item_field*) left_item)->field; + Field *right_field= ((Item_field*) right_item)->field; + + if (!left_field->eq_def(right_field) && + !fields_equal_using_narrowing(thd, left_field, right_field)) + return FALSE; + + /* Search for multiple equalities containing field1 and/or field2 */ + bool left_copyfl, right_copyfl; + Item_equal *left_item_equal= + find_item_equal(cond_equal, left_field, &left_copyfl); + Item_equal *right_item_equal= + find_item_equal(cond_equal, right_field, &right_copyfl); + + /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */ + if (left_field->eq(right_field)) /* f = f */ + return (!(left_field->maybe_null() && !left_item_equal)); + + if (left_item_equal && left_item_equal == right_item_equal) + { + /* + The equality predicate is inference of one of the existing + multiple equalities, i.e the condition is already covered + by upper level equalities + */ + return TRUE; + } + + /* Copy the found multiple equalities at the current level if needed */ + if (left_copyfl) + { + /* left_item_equal of an upper level contains left_item */ + left_item_equal= new (thd->mem_root) Item_equal(thd, left_item_equal); + left_item_equal->set_context_field(((Item_field*) left_item)); + cond_equal->current_level.push_back(left_item_equal, thd->mem_root); + } + if (right_copyfl) + { + /* right_item_equal of an upper level contains right_item */ + right_item_equal= new (thd->mem_root) Item_equal(thd, right_item_equal); + right_item_equal->set_context_field(((Item_field*) right_item)); + cond_equal->current_level.push_back(right_item_equal, thd->mem_root); + } + + if (left_item_equal) + { + /* left item was found in the current or one of the upper levels */ + if (! right_item_equal) + left_item_equal->add(orig_right_item, thd->mem_root); + else + { + /* Merge two multiple equalities forming a new one */ + left_item_equal->merge(thd, right_item_equal); + /* Remove the merged multiple equality from the list */ + List_iterator li(cond_equal->current_level); + while ((li++) != right_item_equal) ; + li.remove(); + } + } + else + { + /* left item was not found neither the current nor in upper levels */ + if (right_item_equal) + right_item_equal->add(orig_left_item, thd->mem_root); + else + { + /* None of the fields was found in multiple equalities */ + Type_handler_hybrid_field_type + tmp(orig_left_item->type_handler_for_comparison()); + if (tmp.aggregate_for_comparison(orig_right_item-> + type_handler_for_comparison())) + return false; + Item_equal *item_equal= + new (thd->mem_root) Item_equal(thd, tmp.type_handler(), + orig_left_item, orig_right_item, + false); + item_equal->set_context_field((Item_field*)left_item); + cond_equal->current_level.push_back(item_equal, thd->mem_root); + } + } + return TRUE; + } + + { + /* The predicate of the form field=const/const=field is processed */ + Item *const_item= 0; + Item_field *field_item= 0; + Item *orig_field_item= 0; + if (left_item->type() == Item::FIELD_ITEM && + !((Item_field*)left_item)->get_depended_from() && + right_item->can_eval_in_optimize()) + { + orig_field_item= orig_left_item; + field_item= (Item_field *) left_item; + const_item= right_item; + } + else if (right_item->type() == Item::FIELD_ITEM && + !((Item_field*)right_item)->get_depended_from() && + left_item->can_eval_in_optimize()) + { + orig_field_item= orig_right_item; + field_item= (Item_field *) right_item; + const_item= left_item; + } + + if (const_item && + field_item->field->test_if_equality_guarantees_uniqueness(const_item)) + { + /* + field_item and const_item are arguments of a scalar or a row + comparison function: + WHERE column=constant + WHERE (column, ...) = (constant, ...) + + The owner comparison function has previously called fix_fields(), + so field_item and const_item should be directly comparable items, + field_item->cmp_context and const_item->cmp_context should be set. + In case of string comparison, charsets and collations of + field_item and const_item should have already be aggregated + for comparison, all necessary character set converters installed + and fixed. + + In case of string comparison, const_item can be either: + - a weaker constant that does not need to be converted to field_item: + WHERE latin1_field = 'latin1_const' + WHERE varbinary_field = 'latin1_const' + WHERE latin1_bin_field = 'latin1_general_ci_const' + - a stronger constant that does not need to be converted to field_item: + WHERE latin1_field = binary 0xDF + WHERE latin1_field = 'a' COLLATE latin1_bin + - a result of conversion (e.g. from the session character set) + to the character set of field_item: + WHERE latin1_field = 'utf8_string_with_latin1_repertoire' + */ + bool copyfl; + + Item_equal *item_equal = find_item_equal(cond_equal, + field_item->field, ©fl); + if (copyfl) + { + item_equal= new (thd->mem_root) Item_equal(thd, item_equal); + cond_equal->current_level.push_back(item_equal, thd->mem_root); + item_equal->set_context_field(field_item); + } + Item *const_item2= field_item->field->get_equal_const_item(thd, ctx, + const_item); + if (!const_item2) + return false; + + if (item_equal) + { + /* + The flag cond_false will be set to 1 after this, if item_equal + already contains a constant and its value is not equal to + the value of const_item. + */ + item_equal->add_const(thd, const_item2); + } + else + { + Type_handler_hybrid_field_type + tmp(orig_left_item->type_handler_for_comparison()); + if (tmp.aggregate_for_comparison(orig_right_item-> + type_handler_for_comparison())) + return false; + item_equal= new (thd->mem_root) Item_equal(thd, tmp.type_handler(), + const_item2, + orig_field_item, true); + item_equal->set_context_field(field_item); + cond_equal->current_level.push_back(item_equal, thd->mem_root); + } + return TRUE; + } + } + return FALSE; +} + + +/** + Convert row equalities into a conjunction of regular equalities. + + The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n) + into a list of equalities E1=E'1,...,En=E'n. For each of these equalities + Ei=E'i the function checks whether it is a simple equality or a row + equality. If it is a simple equality it is used to expand multiple + equalities of cond_equal. If it is a row equality it converted to a + sequence of equalities between row elements. If Ei=E'i is neither a + simple equality nor a row equality the item for this predicate is added + to eq_list. + + @param thd thread handle + @param left_row left term of the row equality to be processed + @param right_row right term of the row equality to be processed + @param cond_equal multiple equalities that must hold together with the + predicate + @param eq_list results of conversions of row equalities that are not + simple enough to form multiple equalities + + @retval + TRUE if conversion has succeeded (no fatal error) + @retval + FALSE otherwise +*/ + +static bool check_row_equality(THD *thd, const Arg_comparator *comparators, + Item *left_row, Item_row *right_row, + COND_EQUAL *cond_equal, List* eq_list) +{ + uint n= left_row->cols(); + for (uint i= 0 ; i < n; i++) + { + bool is_converted; + Item *left_item= left_row->element_index(i); + Item *right_item= right_row->element_index(i); + if (left_item->type() == Item::ROW_ITEM && + right_item->type() == Item::ROW_ITEM) + { + /* + Item_splocal for ROW SP variables return Item::ROW_ITEM. + Here we know that left_item and right_item are not Item_splocal, + because ROW SP variables with nested ROWs are not supported yet. + It's safe to cast left_item and right_item to Item_row. + */ + DBUG_ASSERT(!left_item->get_item_splocal()); + DBUG_ASSERT(!right_item->get_item_splocal()); + is_converted= check_row_equality(thd, + comparators[i].subcomparators(), + (Item_row *) left_item, + (Item_row *) right_item, + cond_equal, eq_list); + } + else + { + const Arg_comparator *tmp= &comparators[i]; + is_converted= check_simple_equality(thd, + Item::Context(Item::ANY_SUBST, + tmp->compare_type_handler(), + tmp->compare_collation()), + left_item, right_item, + cond_equal); + } + + if (!is_converted) + { + Item_func_eq *eq_item; + if (!(eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item)) || + eq_item->set_cmp_func(thd)) + return FALSE; + eq_item->quick_fix_field(); + eq_list->push_back(eq_item, thd->mem_root); + } + } + return TRUE; +} + + +/** + Eliminate row equalities and form multiple equalities predicates. + + This function checks whether the item is a simple equality + i.e. the one that equates a field with another field or a constant + (field=field_item or field=constant_item), or, a row equality. + For a simple equality the function looks for a multiple equality + in the lists referenced directly or indirectly by cond_equal inferring + the given simple equality. If it doesn't find any, it builds/expands + multiple equality that covers the predicate. + Row equalities are eliminated substituted for conjunctive regular + equalities which are treated in the same way as original equality + predicates. + + @param thd thread handle + @param item predicate to process + @param cond_equal multiple equalities that must hold together with the + predicate + @param eq_list results of conversions of row equalities that are not + simple enough to form multiple equalities + + @retval + TRUE if re-writing rules have been applied + @retval + FALSE otherwise, i.e. + if the predicate is not an equality, + or, if the equality is neither a simple one nor a row equality, + or, if the procedure fails by a fatal error. +*/ + +bool Item_func_eq::check_equality(THD *thd, COND_EQUAL *cond_equal, + List *eq_list) +{ + Item *left_item= arguments()[0]; + Item *right_item= arguments()[1]; + + if (left_item->type() == Item::ROW_ITEM && + right_item->type() == Item::ROW_ITEM) + { + /* + Item_splocal::type() for ROW variables returns Item::ROW_ITEM. + Distinguish ROW-type Item_splocal from Item_row. + Example query: + SELECT 1 FROM DUAL WHERE row_sp_variable=ROW(100,200); + */ + if (left_item->get_item_splocal() || + right_item->get_item_splocal()) + return false; + return check_row_equality(thd, + cmp.subcomparators(), + (Item_row *) left_item, + (Item_row *) right_item, + cond_equal, eq_list); + } + return check_simple_equality(thd, + Context(ANY_SUBST, + compare_type_handler(), + compare_collation()), + left_item, right_item, cond_equal); +} + + +/** + Item_xxx::build_equal_items() + + Replace all equality predicates in a condition referenced by "this" + by multiple equality items. + + At each 'and' level the function detects items for equality predicates + and replaced them by a set of multiple equality items of class Item_equal, + taking into account inherited equalities from upper levels. + If an equality predicate is used not in a conjunction it's just + replaced by a multiple equality predicate. + For each 'and' level the function set a pointer to the inherited + multiple equalities in the cond_equal field of the associated + object of the type Item_cond_and. + The function also traverses the cond tree and and for each field reference + sets a pointer to the multiple equality item containing the field, if there + is any. If this multiple equality equates fields to a constant the + function replaces the field reference by the constant in the cases + when the field is not of a string type or when the field reference is + just an argument of a comparison predicate. + The function also determines the maximum number of members in + equality lists of each Item_cond_and object assigning it to + thd->lex->current_select->max_equal_elems. + + @note + Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of + f1=f2, .., fn-1=fn. It substitutes any inference from these + equality predicates that is equivalent to the conjunction. + Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as + it is equivalent to ((a1=a2) AND (a2=a3)). + The function always makes a substitution of all equality predicates occurred + in a conjuction for a minimal set of multiple equality predicates. + This set can be considered as a canonical representation of the + sub-conjunction of the equality predicates. + E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by + (=(t1.a,t2.b,t3.c) AND t2.b>5), not by + (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5); + while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by + (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5), + but if additionally =(t4.d,t2.b) is inherited, it + will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5) + + The function performs the substitution in a recursive descent by + the condtion tree, passing to the next AND level a chain of multiple + equality predicates which have been built at the upper levels. + The Item_equal items built at the level are attached to other + non-equality conjucts as a sublist. The pointer to the inherited + multiple equalities is saved in the and condition object (Item_cond_and). + This chain allows us for any field reference occurrence easily to find a + multiple equality that must be held for this occurrence. + For each AND level we do the following: + - scan it for all equality predicate (=) items + - join them into disjoint Item_equal() groups + - process the included OR conditions recursively to do the same for + lower AND levels. + + We need to do things in this order as lower AND levels need to know about + all possible Item_equal objects in upper levels. + + @param thd thread handle + @param inherited path to all inherited multiple equality items + + @return + pointer to the transformed condition, + whose Used_tables_and_const_cache is up to date, + so no additional update_used_tables() is needed on the result. +*/ + +COND *Item_cond_and::build_equal_items(THD *thd, + COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + Item_equal *item_equal; + COND_EQUAL cond_equal; + cond_equal.upper_levels= inherited; + + if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL)) + return this; // Fatal error flag is set! + + List eq_list; + List *cond_args= argument_list(); + + List_iterator li(*cond_args); + Item *item; + + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + /* + Retrieve all conjuncts of this level detecting the equality + that are subject to substitution by multiple equality items and + removing each such predicate from the conjunction after having + found/created a multiple equality whose inference the predicate is. + */ + while ((item= li++)) + { + /* + PS/SP note: we can safely remove a node from AND-OR + structure here because it's restored before each + re-execution of any prepared statement/stored procedure. + */ + if (item->check_equality(thd, &cond_equal, &eq_list)) + li.remove(); + } + + /* + Check if we eliminated all the predicates of the level, e.g. + (a=a AND b=b AND a=a). + */ + if (!cond_args->elements && + !cond_equal.current_level.elements && + !eq_list.elements) + return (Item*) Item_true; + + List_iterator_fast it(cond_equal.current_level); + while ((item_equal= it++)) + { + item_equal->set_link_equal_fields(link_item_fields); + item_equal->fix_fields(thd, NULL); + item_equal->update_used_tables(); + set_if_bigger(thd->lex->current_select->max_equal_elems, + item_equal->n_field_items()); + } + + m_cond_equal.copy(cond_equal); + cond_equal.current_level= m_cond_equal.current_level; + inherited= &m_cond_equal; + + /* + Make replacement of equality predicates for lower levels + of the condition expression. + */ + li.rewind(); + while ((item= li++)) + { + Item *new_item; + if ((new_item= item->build_equal_items(thd, inherited, false, NULL)) + != item) + { + /* This replacement happens only for standalone equalities */ + /* + This is ok with PS/SP as the replacement is done for + cond_args of an AND/OR item, which are restored for each + execution of PS/SP. + */ + li.replace(new_item); + } + } + cond_args->append(&eq_list); + cond_args->append((List *)&cond_equal.current_level); + update_used_tables(); + if (cond_equal_ref) + *cond_equal_ref= &m_cond_equal; + return this; +} + + +COND *Item_cond::build_equal_items(THD *thd, + COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + List *cond_args= argument_list(); + + List_iterator li(*cond_args); + Item *item; + + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + /* + Make replacement of equality predicates for lower levels + of the condition expression. + Update used_tables_cache and const_item_cache on the way. + */ + used_tables_and_const_cache_init(); + while ((item= li++)) + { + Item *new_item; + if ((new_item= item->build_equal_items(thd, inherited, false, NULL)) + != item) + { + /* This replacement happens only for standalone equalities */ + /* + This is ok with PS/SP as the replacement is done for + arguments of an AND/OR item, which are restored for each + execution of PS/SP. + */ + li.replace(new_item); + } + used_tables_and_const_cache_join(new_item); + } + return this; +} + + +COND *Item_func_eq::build_equal_items(THD *thd, + COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + COND_EQUAL cond_equal; + cond_equal.upper_levels= inherited; + List eq_list; + + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + /* + If an equality predicate forms the whole and level, + we call it standalone equality and it's processed here. + E.g. in the following where condition + WHERE a=5 AND (b=5 or a=c) + (b=5) and (a=c) are standalone equalities. + In general we can't leave alone standalone eqalities: + for WHERE a=b AND c=d AND (b=c OR d=5) + b=c is replaced by =(a,b,c,d). + */ + if (Item_func_eq::check_equality(thd, &cond_equal, &eq_list)) + { + Item_equal *item_equal; + int n= cond_equal.current_level.elements + eq_list.elements; + if (n == 0) + return (Item*) Item_true; + else if (n == 1) + { + if ((item_equal= cond_equal.current_level.pop())) + { + item_equal->fix_fields(thd, NULL); + item_equal->update_used_tables(); + set_if_bigger(thd->lex->current_select->max_equal_elems, + item_equal->n_field_items()); + item_equal->upper_levels= inherited; + if (cond_equal_ref) + *cond_equal_ref= new (thd->mem_root) COND_EQUAL(item_equal, + thd->mem_root); + return item_equal; + } + Item *res= eq_list.pop(); + res->update_used_tables(); + DBUG_ASSERT(res->type() == FUNC_ITEM); + return res; + } + else + { + /* + Here a new AND level must be created. It can happen only + when a row equality is processed as a standalone predicate. + */ + Item_cond_and *and_cond= new (thd->mem_root) Item_cond_and(thd, eq_list); + and_cond->quick_fix_field(); + List *cond_args= and_cond->argument_list(); + List_iterator_fast it(cond_equal.current_level); + while ((item_equal= it++)) + { + if (item_equal->fix_length_and_dec(thd)) + return NULL; + item_equal->update_used_tables(); + set_if_bigger(thd->lex->current_select->max_equal_elems, + item_equal->n_field_items()); + } + and_cond->m_cond_equal.copy(cond_equal); + cond_equal.current_level= and_cond->m_cond_equal.current_level; + cond_args->append((List *)&cond_equal.current_level); + and_cond->update_used_tables(); + if (cond_equal_ref) + *cond_equal_ref= &and_cond->m_cond_equal; + return and_cond; + } + } + return Item_func::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); +} + + +COND *Item_func::build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + /* + For each field reference in cond, not from equal item predicates, + set a pointer to the multiple equality it belongs to (if there is any) + as soon the field is not of a string type or the field reference is + an argument of a comparison predicate. + */ + COND *cond= propagate_equal_fields(thd, Context_boolean(), inherited); + cond->update_used_tables(); + DBUG_ASSERT(cond == this); + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + return cond; +} + + +COND *Item_equal::build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + COND *cond= Item_func::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); + if (cond_equal_ref) + *cond_equal_ref= new (thd->mem_root) COND_EQUAL(this, thd->mem_root); + return cond; +} + + +/** + Build multiple equalities for a condition and all on expressions that + inherit these multiple equalities. + + The function first applies the cond->build_equal_items() method + to build all multiple equalities for condition cond utilizing equalities + referred through the parameter inherited. The extended set of + equalities is returned in the structure referred by the cond_equal_ref + parameter. After this the function calls itself recursively for + all on expressions whose direct references can be found in join_list + and who inherit directly the multiple equalities just having built. + + @note + The on expression used in an outer join operation inherits all equalities + from the on expression of the embedding join, if there is any, or + otherwise - from the where condition. + This fact is not obvious, but presumably can be proved. + Consider the following query: + @code + SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a + WHERE t1.a=t2.a; + @endcode + If the on expression in the query inherits =(t1.a,t2.a), then we + can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers + the equality t3.a=t4.a. Although the on expression + t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one + in the query the latter can be replaced by the former: the new query + will return the same result set as the original one. + + Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us + to use t1.a=t3.a AND t3.a=t4.a under the on condition: + @code + SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a + WHERE t1.a=t2.a + @endcode + This query equivalent to: + @code + SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2 + WHERE t1.a=t2.a + @endcode + Similarly the original query can be rewritten to the query: + @code + SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a + WHERE t1.a=t2.a + @endcode + that is equivalent to: + @code + SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1 + WHERE t1.a=t2.a + @endcode + Thus, applying equalities from the where condition we basically + can get more freedom in performing join operations. + Although we don't use this property now, it probably makes sense to use + it in the future. + @param thd Thread handler + @param cond condition to build the multiple equalities for + @param inherited path to all inherited multiple equality items + @param join_list list of join tables to which the condition + refers to + @ignore_on_conds TRUE <-> do not build multiple equalities + for on expressions + @param[out] cond_equal_ref pointer to the structure to place built + equalities in + @param link_equal_items equal fields are to be linked + + @return + pointer to the transformed condition containing multiple equalities +*/ + +static COND *build_equal_items(JOIN *join, COND *cond, + COND_EQUAL *inherited, + List *join_list, + bool ignore_on_conds, + COND_EQUAL **cond_equal_ref, + bool link_equal_fields) +{ + THD *thd= join->thd; + + *cond_equal_ref= NULL; + + if (cond) + { + cond= cond->build_equal_items(thd, inherited, link_equal_fields, + cond_equal_ref); + if (*cond_equal_ref) + { + (*cond_equal_ref)->upper_levels= inherited; + inherited= *cond_equal_ref; + } + } + + if (join_list && !ignore_on_conds) + { + TABLE_LIST *table; + List_iterator li(*join_list); + + while ((table= li++)) + { + if (table->on_expr) + { + List *nested_join_list= table->nested_join ? + &table->nested_join->join_list : NULL; + /* + We can modify table->on_expr because its old value will + be restored before re-execution of PS/SP. + */ + table->on_expr= build_equal_items(join, table->on_expr, inherited, + nested_join_list, ignore_on_conds, + &table->cond_equal); + if (unlikely(join->thd->trace_started())) + { + const char *table_name; + if (table->nested_join) + table_name= table->nested_join->join_list.head()->alias.str; + else + table_name= table->alias.str; + trace_condition(join->thd, "ON expr", "build_equal_items", + table->on_expr, table_name); + } + } + } + } + + return cond; +} + + +/** + Compare field items by table order in the execution plan. + + If field1 and field2 belong to different tables then + field1 considered as better than field2 if the table containing + field1 is accessed earlier than the table containing field2. + The function finds out what of two fields is better according + this criteria. + If field1 and field2 belong to the same table then the result + of comparison depends on whether the fields are parts of + the key that are used to access this table. + + @param field1 first field item to compare + @param field2 second field item to compare + @param table_join_idx index to tables determining table order + + @retval + 1 if field1 is better than field2 + @retval + -1 if field2 is better than field1 + @retval + 0 otherwise +*/ + +static int compare_fields_by_table_order(Item *field1, + Item *field2, + void *table_join_idx) +{ + int cmp= 0; + bool outer_ref= 0; + Item *field1_real= field1->real_item(); + Item *field2_real= field2->real_item(); + + if (field1->const_item() || field1_real->const_item()) + return -1; + if (field2->const_item() || field2_real->const_item()) + return 1; + Item_field *f1= (Item_field *) field1_real; + Item_field *f2= (Item_field *) field2_real; + if (f1->used_tables() & OUTER_REF_TABLE_BIT) + { + outer_ref= 1; + cmp= -1; + } + if (f2->used_tables() & OUTER_REF_TABLE_BIT) + { + outer_ref= 1; + cmp++; + } + if (outer_ref) + return cmp; + JOIN_TAB **idx= (JOIN_TAB **) table_join_idx; + + JOIN_TAB *tab1= idx[f1->field->table->tablenr]; + JOIN_TAB *tab2= idx[f2->field->table->tablenr]; + + /* + if one of the table is inside a merged SJM nest and another one isn't, + compare SJM bush roots of the tables. + */ + if (tab1->bush_root_tab != tab2->bush_root_tab) + { + if (tab1->bush_root_tab) + tab1= tab1->bush_root_tab; + + if (tab2->bush_root_tab) + tab2= tab2->bush_root_tab; + } + + cmp= (int)(tab1 - tab2); + + if (!cmp) + { + /* Fields f1, f2 belong to the same table */ + + JOIN_TAB *tab= idx[f1->field->table->tablenr]; + uint keyno= MAX_KEY; + if (tab->ref.key_parts) + keyno= tab->ref.key; + else if (tab->select && tab->select->quick) + keyno = tab->select->quick->index; + if (keyno != MAX_KEY) + { + if (f1->field->part_of_key.is_set(keyno)) + cmp= -1; + if (f2->field->part_of_key.is_set(keyno)) + cmp++; + /* + Here: + if both f1, f2 are components of the key tab->ref.key then cmp==0, + if only f1 is a component of the key then cmp==-1 (f1 is better), + if only f2 is a component of the key then cmp==1, (f2 is better), + if none of f1,f1 is component of the key cmp==0. + */ + if (!cmp) + { + KEY *key_info= tab->table->key_info + keyno; + for (uint i= 0; i < key_info->user_defined_key_parts; i++) + { + Field *fld= key_info->key_part[i].field; + if (fld->eq(f1->field)) + { + cmp= -1; // f1 is better + break; + } + if (fld->eq(f2->field)) + { + cmp= 1; // f2 is better + break; + } + } + } + } + if (!cmp) + cmp= f1->field->field_index-f2->field->field_index; + } + return cmp < 0 ? -1 : (cmp ? 1 : 0); +} + + +static TABLE_LIST* embedding_sjm(Item *item) +{ + Item_field *item_field= (Item_field *) (item->real_item()); + TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding; + if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used) + return nest; + else + return NULL; +} + +/** + Generate minimal set of simple equalities equivalent to a multiple equality. + + The function retrieves the fields of the multiple equality item + item_equal and for each field f: + - if item_equal contains const it generates the equality f=const_item; + - otherwise, if f is not the first field, generates the equality + f=item_equal->get_first(). + All generated equality are added to the cond conjunction. + + @param cond condition to add the generated equality to + @param upper_levels structure to access multiple equality of upper levels + @param item_equal multiple equality to generate simple equality from + + @note + Before generating an equality function checks that it has not + been generated for multiple equalities of the upper levels. + E.g. for the following where condition + WHERE a=5 AND ((a=b AND b=c) OR c>4) + the upper level AND condition will contain =(5,a), + while the lower level AND condition will contain =(5,a,b,c). + When splitting =(5,a,b,c) into a separate equality predicates + we should omit 5=a, as we have it already in the upper level. + The following where condition gives us a more complicated case: + WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ... + Given the tables are accessed in the order t1->t2->t3->t4 for + the selected query execution plan the lower level multiple + equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to + t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be + generated for the upper level. Also t3.c=t4.d will be generated there. + So only t1.a=t3.c should be left in the lower level. + If cond is equal to 0, then not more then one equality is generated + and a pointer to it is returned as the result of the function. + + Equality substutution and semi-join materialization nests: + + In case join order looks like this: + + outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 + + We must not construct equalities like + + outer_tbl1.col = inner_tbl1.col + + because they would get attached to inner_tbl1 and will get evaluated + during materialization phase, when we don't have current value of + outer_tbl1.col. + + Item_equal::get_first() also takes similar measures for dealing with + equality substitution in presense of SJM nests. + + Grep for EqualityPropagationAndSjmNests for a more verbose description. + + @return + - The condition with generated simple equalities or + a pointer to the simple generated equality, if success. + - 0, otherwise. +*/ + +Item *eliminate_item_equal(THD *thd, COND *cond, COND_EQUAL *upper_levels, + Item_equal *item_equal) +{ + List eq_list; + Item_func_eq *eq_item= 0; + if (((Item *) item_equal)->const_item() && !item_equal->val_int()) + return (Item*) Item_false; + Item *item_const= item_equal->get_const(); + Item_equal_fields_iterator it(*item_equal); + Item *head; + TABLE_LIST *current_sjm= NULL; + Item *current_sjm_head= NULL; + + DBUG_ASSERT(!cond || + cond->is_bool_literal() || + (cond->type() == Item::FUNC_ITEM && + ((Item_func *) cond)->functype() == Item_func::EQ_FUNC) || + (cond->type() == Item::COND_ITEM && + ((Item_func *) cond)->functype() == Item_func::COND_AND_FUNC)); + + /* + Pick the "head" item: the constant one or the first in the join order + (if the first in the join order happends to be inside an SJM nest, that's + ok, because this is where the value will be unpacked after + materialization). + */ + if (item_const) + head= item_const; + else + { + TABLE_LIST *emb_nest; + head= item_equal->get_first(NO_PARTICULAR_TAB, NULL); + it++; + if ((emb_nest= embedding_sjm(head))) + { + current_sjm= emb_nest; + current_sjm_head= head; + } + } + + Item *field_item; + /* + For each other item, generate "item=head" equality (except the tables that + are within SJ-Materialization nests, for those "head" is defined + differently) + */ + while ((field_item= it++)) + { + Item_equal *upper= field_item->find_item_equal(upper_levels); + Item *item= field_item; + TABLE_LIST *field_sjm= embedding_sjm(field_item); + if (!field_sjm) + { + current_sjm= NULL; + current_sjm_head= NULL; + } + + /* + Check if "field_item=head" equality is already guaranteed to be true + on upper AND-levels. + */ + if (upper) + { + TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field); + Item *upper_const= upper->get_const(); + if (item_const && upper_const) + { + /* + Upper item also has "field_item=const". + Don't produce equality if const is equal to item_const. + */ + Item_func_eq *func= new (thd->mem_root) Item_func_eq(thd, item_const, upper_const); + func->set_cmp_func(thd); + func->quick_fix_field(); + if (func->val_int()) + item= 0; + } + else + { + Item_equal_fields_iterator li(*item_equal); + while ((item= li++) != field_item) + { + if (embedding_sjm(item) == field_sjm && + item->find_item_equal(upper_levels) == upper) + break; + } + } + if (embedding_sjm(field_item) != native_sjm) + item= NULL; /* Don't produce equality */ + } + + bool produce_equality= MY_TEST(item == field_item); + if (!item_const && field_sjm && field_sjm != current_sjm) + { + /* Entering an SJM nest */ + current_sjm_head= field_item; + if (!field_sjm->sj_mat_info->is_sj_scan) + produce_equality= FALSE; + } + + if (produce_equality) + { + if (eq_item && eq_list.push_back(eq_item, thd->mem_root)) + return 0; + + /* + If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality + doesn't include a constant, we should produce equality with the first + of the equal items in this SJM (except for the first element inside the + SJM. For that, we produce the equality with the "head" item). + + In other cases, get the "head" item, which is either first of the + equals on top level, or the constant. + */ + Item *head_item= (!item_const && current_sjm && + current_sjm_head != field_item) ? current_sjm_head: head; + eq_item= new (thd->mem_root) Item_func_eq(thd, + field_item->remove_item_direct_ref(), + head_item->remove_item_direct_ref()); + + if (!eq_item || eq_item->set_cmp_func(thd)) + return 0; + eq_item->quick_fix_field(); + } + current_sjm= field_sjm; + } + + /* + We have produced zero, one, or more pair-wise equalities eq_i. We want to + return an expression in form: + + cond AND eq_1 AND eq_2 AND eq_3 AND ... + + 'cond' is a parameter for this function, which may be NULL, an Item_bool(1), + or an Item_func_eq or an Item_cond_and. + + We want to return a well-formed condition: no nested Item_cond_and objects, + or Item_cond_and with a single child: + - if 'cond' is an Item_cond_and, we add eq_i as its tail + - if 'cond' is Item_bool(1), we return eq_i + - otherwise, we create our own Item_cond_and and put 'cond' at the front of + it. + - if we have only one condition to return, we don't create an Item_cond_and + */ + + if (eq_item && eq_list.push_back(eq_item, thd->mem_root)) + return 0; + COND *res= 0; + switch (eq_list.elements) + { + case 0: + res= cond ? cond : (Item*) Item_true; + break; + case 1: + if (!cond || cond->is_bool_literal()) + res= eq_item; + break; + default: + break; + } + if (!res) + { + if (cond) + { + if (cond->type() == Item::COND_ITEM) + { + res= cond; + ((Item_cond *) res)->add_at_end(&eq_list); + } + else if (eq_list.push_front(cond, thd->mem_root)) + return 0; + } + } + if (!res) + res= new (thd->mem_root) Item_cond_and(thd, eq_list); + if (res) + { + res->quick_fix_field(); + res->update_used_tables(); + } + + return res; +} + + +/** + Substitute every field reference in a condition by the best equal field + and eliminate all multiple equality predicates. + + The function retrieves the cond condition and for each encountered + multiple equality predicate it sorts the field references in it + according to the order of tables specified by the table_join_idx + parameter. Then it eliminates the multiple equality predicate it + replacing it by the conjunction of simple equality predicates + equating every field from the multiple equality to the first + field in it, or to the constant, if there is any. + After this the function retrieves all other conjuncted + predicates substitute every field reference by the field reference + to the first equal field or equal constant if there are any. + + @param context_tab Join tab that 'cond' will be attached to, or + NO_PARTICULAR_TAB. See notes above. + @param cond condition to process + @param cond_equal multiple equalities to take into consideration + @param table_join_idx index to tables determining field preference + @param do_substitution if false: do not do any field substitution + + @note + At the first glance full sort of fields in multiple equality + seems to be an overkill. Yet it's not the case due to possible + new fields in multiple equality item of lower levels. We want + the order in them to comply with the order of upper levels. + + context_tab may be used to specify which join tab `cond` will be + attached to. There are two possible cases: + + 1. context_tab != NO_PARTICULAR_TAB + We're doing substitution for an Item which will be evaluated in the + context of a particular item. For example, if the optimizer does a + ref access on "tbl1.key= expr" then + = equality substitution will be perfomed on 'expr' + = it is known in advance that 'expr' will be evaluated when + table t1 is accessed. + Note that in this kind of substution we never have to replace Item_equal + objects. For example, for + + t.key= func(col1=col2 AND col2=const) + + we will not build Item_equal or do equality substution (if we decide to, + this function will need to be fixed to handle it) + + 2. context_tab == NO_PARTICULAR_TAB + We're doing substitution in WHERE/ON condition, which is not yet + attached to any particular join_tab. We will use information about the + chosen join order to make "optimal" substitions, i.e. those that allow + to apply filtering as soon as possible. See eliminate_item_equal() and + Item_equal::get_first() for details. + + @return + The transformed condition, or NULL in case of error +*/ + +static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab, + COND *cond, + COND_EQUAL *cond_equal, + void *table_join_idx, + bool do_substitution) +{ + Item_equal *item_equal; + COND *org_cond= cond; // Return this in case of fatal error + + if (cond->type() == Item::COND_ITEM) + { + List *cond_list= ((Item_cond*) cond)->argument_list(); + + bool and_level= ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC; + if (and_level) + { + cond_equal= &((Item_cond_and *) cond)->m_cond_equal; + cond_list->disjoin((List *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */ + + List_iterator_fast it(cond_equal->current_level); + while ((item_equal= it++)) + { + item_equal->sort(&compare_fields_by_table_order, table_join_idx); + } + } + + List_iterator li(*cond_list); + Item *item; + while ((item= li++)) + { + Item *new_item= substitute_for_best_equal_field(thd, context_tab, + item, cond_equal, + table_join_idx, + do_substitution); + /* + This works OK with PS/SP re-execution as changes are made to + the arguments of AND/OR items only + */ + if (new_item && new_item != item) + li.replace(new_item); + } + + if (and_level) + { + COND *eq_cond= 0; + List_iterator_fast it(cond_equal->current_level); + bool false_eq_cond= FALSE; + bool all_deleted= true; + while ((item_equal= it++)) + { + if (item_equal->get_extraction_flag() == MARKER_DELETION) + continue; + all_deleted= false; + eq_cond= eliminate_item_equal(thd, eq_cond, cond_equal->upper_levels, + item_equal); + if (!eq_cond) + { + eq_cond= 0; + break; + } + else if (eq_cond->is_bool_literal() && !eq_cond->val_bool()) + { + /* + This occurs when eliminate_item_equal() founds that cond is + always false and substitutes it with Item_int 0. + Due to this, value of item_equal will be 0, so just return it. + */ + cond= eq_cond; + false_eq_cond= TRUE; + break; + } + } + if (eq_cond && !false_eq_cond) + { + /* Insert the generated equalities before all other conditions */ + if (eq_cond->type() == Item::COND_ITEM) + ((Item_cond *) cond)->add_at_head( + ((Item_cond *) eq_cond)->argument_list()); + else + { + if (cond_list->is_empty()) + cond= eq_cond; + else + { + /* Do not add an equality condition if it's always true */ + if (!eq_cond->is_bool_literal() && + cond_list->push_front(eq_cond, thd->mem_root)) + eq_cond= 0; + } + } + } + if (!eq_cond && !all_deleted) + { + /* + We are out of memory doing the transformation. + This is a fatal error now. However we bail out by returning the + original condition that we had before we started the transformation. + */ + cond_list->append((List *) &cond_equal->current_level); + } + } + } + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + item_equal= (Item_equal *) cond; + item_equal->sort(&compare_fields_by_table_order, table_join_idx); + cond_equal= item_equal->upper_levels; + if (cond_equal && cond_equal->current_level.head() == item_equal) + cond_equal= cond_equal->upper_levels; + if (item_equal->get_extraction_flag() == MARKER_DELETION) + return 0; + cond= eliminate_item_equal(thd, 0, cond_equal, item_equal); + return cond ? cond : org_cond; + } + else if (do_substitution) + { + while (cond_equal) + { + List_iterator_fast it(cond_equal->current_level); + while((item_equal= it++)) + { + REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab}; + if (!(cond= cond->transform(thd, &Item::replace_equal_field, + (uchar *) &arg))) + return 0; + } + cond_equal= cond_equal->upper_levels; + } + } + return cond; +} + + +/** + Check appearance of new constant items in multiple equalities + of a condition after reading a constant table. + + The function retrieves the cond condition and for each encountered + multiple equality checks whether new constants have appeared after + reading the constant (single row) table tab. If so it adjusts + the multiple equality appropriately. + + @param cond condition whose multiple equalities are to be checked + @param table constant table that has been read + @param const_key mark key parts as constant +*/ + +static void update_const_equal_items(THD *thd, COND *cond, JOIN_TAB *tab, + bool const_key) +{ + if (!(cond->used_tables() & tab->table->map)) + return; + + if (cond->type() == Item::COND_ITEM) + { + List *cond_list= ((Item_cond*) cond)->argument_list(); + List_iterator_fast li(*cond_list); + Item *item; + while ((item= li++)) + update_const_equal_items(thd, item, tab, + cond->is_top_level_item() && + ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC); + } + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + Item_equal *item_equal= (Item_equal *) cond; + bool contained_const= item_equal->get_const() != NULL; + item_equal->update_const(thd); + if (!contained_const && item_equal->get_const()) + { + /* Update keys for range analysis */ + Item_equal_fields_iterator it(*item_equal); + while (it++) + { + Field *field= it.get_curr_field(); + JOIN_TAB *stat= field->table->reginfo.join_tab; + key_map possible_keys= field->key_start; + possible_keys.intersect(field->table->keys_in_use_for_query); + stat[0].const_keys.merge(possible_keys); + + /* + For each field in the multiple equality (for which we know that it + is a constant) we have to find its corresponding key part, and set + that key part in const_key_parts. + */ + if (!possible_keys.is_clear_all()) + { + TABLE *field_tab= field->table; + KEYUSE *use; + for (use= stat->keyuse; use && use->table == field_tab; use++) + if (const_key && + !use->is_for_hash_join() && possible_keys.is_set(use->key) && + field_tab->key_info[use->key].key_part[use->keypart].field == + field) + field_tab->const_key_parts[use->key]|= use->keypart_map; + } + } + } + } +} + + +/** + Check if + WHERE expr=value AND expr=const + can be rewritten as: + WHERE const=value AND expr=const + + @param target - the target operator whose "expr" argument will be + replaced to "const". + @param target_expr - the target's "expr" which will be replaced to "const". + @param target_value - the target's second argument, it will remain unchanged. + @param source - the equality expression ("=" or "<=>") that + can be used to rewrite the "target" part + (under certain conditions, see the code). + @param source_expr - the source's "expr". It should be exactly equal to + the target's "expr" to make condition rewrite possible. + @param source_const - the source's "const" argument, it will be inserted + into "target" instead of "expr". +*/ +static bool +can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) +{ + return target_expr->eq(source_expr,0) && + target_value != source_const && + target->compare_type_handler()-> + can_change_cond_ref_to_const(target, target_expr, target_value, + source, source_expr, source_const); +} + + +/* + change field = field to field = const for each found field = const in the + and_level +*/ + +static void +change_cond_ref_to_const(THD *thd, I_List *save_list, + Item *and_father, Item *cond, + Item_bool_func2 *field_value_owner, + Item *field, Item *value) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item, + field_value_owner, field, value); + return; + } + if (cond->eq_cmp_result() == Item::COND_OK) + return; // Not a boolean function + + Item_bool_func2 *func= (Item_bool_func2*) cond; + Item **args= func->arguments(); + Item *left_item= args[0]; + Item *right_item= args[1]; + Item_func::Functype functype= func->functype(); + + if (can_change_cond_ref_to_const(func, right_item, left_item, + field_value_owner, field, value)) + { + Item *tmp=value->clone_item(thd); + if (tmp) + { + tmp->collation.set(right_item->collation); + thd->change_item_tree(args + 1, tmp); + func->update_used_tables(); + if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC) + && and_father != cond && !left_item->const_item()) + { + cond->marker= MARKER_CHANGE_COND; + COND_CMP *tmp2; + /* Will work, even if malloc would fail */ + if ((tmp2= new (thd->mem_root) COND_CMP(and_father, func))) + save_list->push_back(tmp2); + } + /* + LIKE can be optimized for BINARY/VARBINARY/BLOB columns, e.g.: + + from: WHERE CONCAT(c1)='const1' AND CONCAT(c1) LIKE 'const2' + to: WHERE CONCAT(c1)='const1' AND 'const1' LIKE 'const2' + + So make sure to use set_cmp_func() only for non-LIKE operators. + */ + if (functype != Item_func::LIKE_FUNC) + ((Item_bool_rowready_func2*) func)->set_cmp_func(thd); + } + } + else if (can_change_cond_ref_to_const(func, left_item, right_item, + field_value_owner, field, value)) + { + Item *tmp= value->clone_item(thd); + if (tmp) + { + tmp->collation.set(left_item->collation); + thd->change_item_tree(args, tmp); + value= tmp; + func->update_used_tables(); + if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC) + && and_father != cond && !right_item->const_item()) + { + args[0]= args[1]; // For easy check + thd->change_item_tree(args + 1, value); + cond->marker= MARKER_CHANGE_COND; + COND_CMP *tmp2; + /* Will work, even if malloc would fail */ + if ((tmp2=new (thd->mem_root) COND_CMP(and_father, func))) + save_list->push_back(tmp2); + } + if (functype != Item_func::LIKE_FUNC) + ((Item_bool_rowready_func2*) func)->set_cmp_func(thd); + } + } +} + + +static void +propagate_cond_constants(THD *thd, I_List *save_list, + COND *and_father, COND *cond) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC; + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + Item *item; + I_List save; + while ((item=li++)) + { + propagate_cond_constants(thd, &save,and_level ? cond : item, item); + } + if (and_level) + { // Handle other found items + I_List_iterator cond_itr(save); + COND_CMP *cond_cmp; + while ((cond_cmp=cond_itr++)) + { + Item **args= cond_cmp->cmp_func->arguments(); + if (!args[0]->const_item()) + change_cond_ref_to_const(thd, &save,cond_cmp->and_level, + cond_cmp->and_level, + cond_cmp->cmp_func, args[0], args[1]); + } + } + } + else if (and_father != cond && cond->marker == MARKER_UNUSED) // In a AND group + { + if (cond->type() == Item::FUNC_ITEM && + (((Item_func*) cond)->functype() == Item_func::EQ_FUNC || + ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC)) + { + Item_bool_func2 *func= dynamic_cast(cond); + Item **args= func->arguments(); + bool left_const= args[0]->can_eval_in_optimize(); + bool right_const= args[1]->can_eval_in_optimize(); + if (!(left_const && right_const) && + args[0]->cmp_type() == args[1]->cmp_type()) + { + if (right_const) + { + resolve_const_item(thd, &args[1], args[0]); + func->update_used_tables(); + change_cond_ref_to_const(thd, save_list, and_father, and_father, + func, args[0], args[1]); + } + else if (left_const) + { + resolve_const_item(thd, &args[0], args[1]); + func->update_used_tables(); + change_cond_ref_to_const(thd, save_list, and_father, and_father, + func, args[1], args[0]); + } + } + } + } +} + +/** + Simplify joins replacing outer joins by inner joins whenever it's + possible. + + The function, during a retrieval of join_list, eliminates those + outer joins that can be converted into inner join, possibly nested. + It also moves the on expressions for the converted outer joins + and from inner joins to conds. + The function also calculates some attributes for nested joins: + - used_tables + - not_null_tables + - dep_tables. + - on_expr_dep_tables + The first two attributes are used to test whether an outer join can + be substituted for an inner join. The third attribute represents the + relation 'to be dependent on' for tables. If table t2 is dependent + on table t1, then in any evaluated execution plan table access to + table t2 must precede access to table t2. This relation is used also + to check whether the query contains invalid cross-references. + The forth attribute is an auxiliary one and is used to calculate + dep_tables. + As the attribute dep_tables qualifies possibles orders of tables in the + execution plan, the dependencies required by the straight join + modifiers are reflected in this attribute as well. + The function also removes all braces that can be removed from the join + expression without changing its meaning. + + @note + An outer join can be replaced by an inner join if the where condition + or the on expression for an embedding nested join contains a conjunctive + predicate rejecting null values for some attribute of the inner tables. + + E.g. in the query: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 + @endcode + the predicate t2.b < 5 rejects nulls. + The query is converted first to: + @code + SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 + @endcode + then to the equivalent form: + @code + SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a + @endcode + + + Similarly the following query: + @code + SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b + WHERE t2.c < 5 + @endcode + is converted to: + @code + SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b + + @endcode + + One conversion might trigger another: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a + LEFT JOIN t3 ON t3.b=t2.b + WHERE t3 IS NOT NULL => + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3 + WHERE t3 IS NOT NULL AND t3.b=t2.b => + SELECT * FROM t1, t2, t3 + WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a + @endcode + + The function removes all unnecessary braces from the expression + produced by the conversions. + E.g. + @code + SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b + @endcode + finally is converted to: + @code + SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b + + @endcode + + + It also will remove braces from the following queries: + @code + SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b + SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b. + @endcode + + The benefit of this simplification procedure is that it might return + a query for which the optimizer can evaluate execution plan with more + join orders. With a left join operation the optimizer does not + consider any plan where one of the inner tables is before some of outer + tables. + + IMPLEMENTATION + The function is implemented by a recursive procedure. On the recursive + ascent all attributes are calculated, all outer joins that can be + converted are replaced and then all unnecessary braces are removed. + As join list contains join tables in the reverse order sequential + elimination of outer joins does not require extra recursive calls. + + SEMI-JOIN NOTES + Remove all semi-joins that have are within another semi-join (i.e. have + an "ancestor" semi-join nest) + + EXAMPLES + Here is an example of a join query with invalid cross references: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b + @endcode + + @param join reference to the query info + @param join_list list representation of the join to be converted + @param conds conditions to add on expressions for converted joins + @param top true <=> conds is the where condition + @param in_sj TRUE <=> processing semi-join nest's children + @return + - The new condition, if success + - 0, otherwise +*/ + +static COND * +simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, + bool in_sj) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + TABLE_LIST *prev_table= 0; + List_iterator li(*join_list); + bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN); + DBUG_ENTER("simplify_joins"); + + /* + Try to simplify join operations from join_list. + The most outer join operation is checked for conversion first. + */ + while ((table= li++)) + { + table_map used_tables; + table_map not_null_tables= (table_map) 0; + + if ((nested_join= table->nested_join)) + { + /* + If the element of join_list is a nested join apply + the procedure to its nested join list first. + */ + if (table->on_expr) + { + Item *expr= table->on_expr; + /* + If an on expression E is attached to the table, + check all null rejected predicates in this expression. + If such a predicate over an attribute belonging to + an inner table of an embedded outer join is found, + the outer join is converted to an inner join and + the corresponding on expression is added to E. + */ + expr= simplify_joins(join, &nested_join->join_list, + expr, FALSE, in_sj || table->sj_on_expr); + + if (!table->prep_on_expr || expr != table->on_expr) + { + DBUG_ASSERT(expr); + + table->on_expr= expr; + table->prep_on_expr= expr->copy_andor_structure(join->thd); + } + } + nested_join->used_tables= (table_map) 0; + nested_join->not_null_tables=(table_map) 0; + conds= simplify_joins(join, &nested_join->join_list, conds, top, + in_sj || table->sj_on_expr); + used_tables= nested_join->used_tables; + not_null_tables= nested_join->not_null_tables; + /* The following two might become unequal after table elimination: */ + nested_join->n_tables= nested_join->join_list.elements; + } + else + { + if (!table->prep_on_expr) + table->prep_on_expr= table->on_expr; + used_tables= table->get_map(); + if (conds) + not_null_tables= conds->not_null_tables(); + } + + if (table->embedding) + { + table->embedding->nested_join->used_tables|= used_tables; + table->embedding->nested_join->not_null_tables|= not_null_tables; + } + + if (!(table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) || + (used_tables & not_null_tables)) + { + /* + For some of the inner tables there are conjunctive predicates + that reject nulls => the outer join can be replaced by an inner join. + */ + if (table->outer_join && !table->embedding && table->table) + table->table->maybe_null= FALSE; + table->outer_join= 0; + if (!(straight_join || table->straight)) + { + table->dep_tables= 0; + TABLE_LIST *embedding= table->embedding; + while (embedding) + { + if (embedding->nested_join->join_list.head()->outer_join) + { + if (!embedding->sj_subq_pred) + table->dep_tables= embedding->dep_tables; + break; + } + embedding= embedding->embedding; + } + } + if (table->on_expr) + { + /* Add ON expression to the WHERE or upper-level ON condition. */ + if (conds) + { + conds= and_conds(join->thd, conds, table->on_expr); + conds->top_level_item(); + /* conds is always a new item as both cond and on_expr existed */ + DBUG_ASSERT(!conds->fixed()); + conds->fix_fields(join->thd, &conds); + } + else + conds= table->on_expr; + table->prep_on_expr= table->on_expr= 0; + } + } + + /* + Only inner tables of non-convertible outer joins + remain with on_expr. + */ + if (table->on_expr) + { + table_map table_on_expr_used_tables= table->on_expr->used_tables(); + table->dep_tables|= table_on_expr_used_tables; + if (table->embedding) + { + table->dep_tables&= ~table->embedding->nested_join->used_tables; + /* + Embedding table depends on tables used + in embedded on expressions. + */ + table->embedding->on_expr_dep_tables|= table_on_expr_used_tables; + } + else + table->dep_tables&= ~table->get_map(); + } + + if (prev_table) + { + /* The order of tables is reverse: prev_table follows table */ + if (prev_table->straight || straight_join) + prev_table->dep_tables|= used_tables; + if (prev_table->on_expr) + { + prev_table->dep_tables|= table->on_expr_dep_tables; + table_map prev_used_tables= prev_table->nested_join ? + prev_table->nested_join->used_tables : + prev_table->get_map(); + /* + If on expression contains only references to inner tables + we still make the inner tables dependent on the outer tables. + It would be enough to set dependency only on one outer table + for them. Yet this is really a rare case. + Note: + RAND_TABLE_BIT mask should not be counted as it + prevents update of inner table dependences. + For example it might happen if RAND() function + is used in JOIN ON clause. + */ + if (!((prev_table->on_expr->used_tables() & + ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) & + ~prev_used_tables)) + prev_table->dep_tables|= used_tables; + } + } + prev_table= table; + } + + /* + Flatten nested joins that can be flattened. + no ON expression and not a semi-join => can be flattened. + */ + li.rewind(); + while ((table= li++)) + { + nested_join= table->nested_join; + if (table->sj_on_expr && !in_sj) + { + /* + If this is a semi-join that is not contained within another semi-join + leave it intact (otherwise it is flattened) + */ + /* + Make sure that any semi-join appear in + the join->select_lex->sj_nests list only once + */ + List_iterator_fast sj_it(join->select_lex->sj_nests); + TABLE_LIST *sj_nest; + while ((sj_nest= sj_it++)) + { + if (table == sj_nest) + break; + } + if (sj_nest) + continue; + join->select_lex->sj_nests.push_back(table, join->thd->mem_root); + + /* + Also, walk through semi-join children and mark those that are now + top-level + */ + TABLE_LIST *tbl; + List_iterator it(nested_join->join_list); + while ((tbl= it++)) + { + if (!tbl->on_expr && tbl->table) + tbl->table->maybe_null= FALSE; + } + } + else if (nested_join && !table->on_expr) + { + TABLE_LIST *tbl; + List_iterator it(nested_join->join_list); + List repl_list; + while ((tbl= it++)) + { + tbl->embedding= table->embedding; + if (!tbl->embedding && !tbl->on_expr && tbl->table) + tbl->table->maybe_null= FALSE; + tbl->join_list= table->join_list; + repl_list.push_back(tbl, join->thd->mem_root); + tbl->dep_tables|= table->dep_tables; + } + li.replace(repl_list); + } + } + DBUG_RETURN(conds); +} + + +/** + Assign each nested join structure a bit in nested_join_map. + + Assign each nested join structure (except ones that embed only one element + and so are redundant) a bit in nested_join_map. + + @param join Join being processed + @param join_list List of tables + @param first_unused Number of first unused bit in nested_join_map before the + call + + @note + This function is called after simplify_joins(), when there are no + redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so + we will not run out of bits in nested_join_map. + + @return + First unused bit in nested_join_map after the call. +*/ + +static uint build_bitmap_for_nested_joins(List *join_list, + uint first_unused) +{ + List_iterator li(*join_list); + TABLE_LIST *table; + DBUG_ENTER("build_bitmap_for_nested_joins"); + while ((table= li++)) + { + NESTED_JOIN *nested_join; + if ((nested_join= table->nested_join)) + { + /* + It is guaranteed by simplify_joins() function that a nested join + that has only one child represents a single table VIEW (and the child + is an underlying table). We don't assign bits to such nested join + structures because + 1. it is redundant (a "sequence" of one table cannot be interleaved + with anything) + 2. we could run out bits in nested_join_map otherwise. + */ + if (nested_join->n_tables != 1) + { + /* Don't assign bits to sj-nests */ + if (table->on_expr) + nested_join->nj_map= (nested_join_map) 1 << first_unused++; + first_unused= build_bitmap_for_nested_joins(&nested_join->join_list, + first_unused); + } + } + } + DBUG_RETURN(first_unused); +} + + +/** + Set NESTED_JOIN::counter and n_tables in all nested joins in passed list. + + For all nested joins contained in the passed join_list (including its + children), set: + - nested_join->counter=0 + - nested_join->n_tables= {number of non-degenerate direct children}. + + Non-degenerate means non-const base table or a join nest that has a + non-degenerate child. + + @param join_list List of nested joins to process. It may also contain base + tables which will be ignored. +*/ + +static uint reset_nj_counters(JOIN *join, List *join_list) +{ + List_iterator li(*join_list); + TABLE_LIST *table; + DBUG_ENTER("reset_nj_counters"); + uint n=0; + while ((table= li++)) + { + NESTED_JOIN *nested_join; + bool is_eliminated_nest= FALSE; + if ((nested_join= table->nested_join)) + { + nested_join->counter= 0; + nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list); + if (!nested_join->n_tables) + is_eliminated_nest= TRUE; + } + const table_map removed_tables= join->eliminated_tables | + join->const_table_map; + + if ((table->nested_join && !is_eliminated_nest) || + (!table->nested_join && (table->table->map & ~removed_tables))) + n++; + } + DBUG_RETURN(n); +} + + +/** + Check interleaving with an inner tables of an outer join for + extension table. + + Check if table next_tab can be added to current partial join order, and + if yes, record that it has been added. + + The function assumes that both current partial join order and its + extension with next_tab are valid wrt table dependencies. + + @verbatim + IMPLEMENTATION + LIMITATIONS ON JOIN ORDER + The nested [outer] joins executioner algorithm imposes these + limitations on join order: + 1. "Outer tables first" - any "outer" table must be before any + corresponding "inner" table. + 2. "No interleaving" - tables inside a nested join must form a + continuous sequence in join order (i.e. the sequence must not be + interrupted by tables that are outside of this nested join). + + #1 is checked elsewhere, this function checks #2 provided that #1 has + been already checked. + + WHY NEED NON-INTERLEAVING + Consider an example: + + select * from t0 join t1 left join (t2 join t3) on cond1 + + The join order "t1 t2 t0 t3" is invalid: + + table t0 is outside of the nested join, so WHERE condition + for t0 is attached directly to t0 (without triggers, and it + may be used to access t0). Applying WHERE(t0) to (t2,t0,t3) + record is invalid as we may miss combinations of (t1, t2, t3) + that satisfy condition cond1, and produce a null-complemented + (t1, t2.NULLs, t3.NULLs) row, which should not have been + produced. + + If table t0 is not between t2 and t3, the problem doesn't exist: + If t0 is located after (t2,t3), WHERE(t0) is applied after nested + join processing has finished. + If t0 is located before (t2,t3), predicates like WHERE_cond(t0, t2) + are wrapped into condition triggers, which takes care of correct + nested join processing. + + HOW IT IS IMPLEMENTED + The limitations on join order can be rephrased as follows: for valid + join order one must be able to: + 1. write down the used tables in the join order on one line. + 2. for each nested join, put one '(' and one ')' on the said line + 3. write "LEFT JOIN" and "ON (...)" where appropriate + 4. get a query equivalent to the query we're trying to execute. + + Calls to check_interleaving_with_nj() are equivalent to writing the + above described line from left to right. + + A single check_interleaving_with_nj(A,B) call is equivalent + to writing table B and appropriate brackets on condition that + table A and appropriate brackets is the last what was + written. Graphically the transition is as follows: + + +---- current position + | + ... last_tab ))) | ( next_tab ) )..) | ... + X Y Z | + +- need to move to this + position. + + Notes about the position: + The caller guarantees that there is no more then one X-bracket by + checking "!(remaining_tables & s->dependent)" before calling this + function. X-bracket may have a pair in Y-bracket. + + When "writing" we store/update this auxilary info about the current + position: + 1. join->cur_embedding_map - bitmap of pairs of brackets (aka nested + joins) we've opened but didn't close. + 2. {each NESTED_JOIN structure not simplified away}->counter - number + of this nested join's children that have already been added to to + the partial join order. + @endverbatim + + @param next_tab Table we're going to extend the current partial join with + + @retval + FALSE Join order extended, nested joins info about current join + order (see NOTE section) updated. + @retval + TRUE Requested join order extension not allowed. +*/ + +static bool check_interleaving_with_nj(JOIN_TAB *next_tab) +{ + JOIN *join= next_tab->join; + + if (join->cur_embedding_map & ~next_tab->embedding_map) + { + /* + next_tab is outside of the "pair of brackets" we're currently in. + Cannot add it. + */ + return TRUE; + } + + TABLE_LIST *next_emb= next_tab->table->pos_in_table_list->embedding; + /* + Do update counters for "pairs of brackets" that we've left (marked as + X,Y,Z in the above picture) + */ + for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding) + { + if (!next_emb->sj_on_expr) + { + next_emb->nested_join->counter++; + if (next_emb->nested_join->counter == 1) + { + /* + next_emb is the first table inside a nested join we've "entered". In + the picture above, we're looking at the 'X' bracket. Don't exit yet as + X bracket might have Y pair bracket. + */ + join->cur_embedding_map |= next_emb->nested_join->nj_map; + } + + if (next_emb->nested_join->n_tables != + next_emb->nested_join->counter) + break; + + /* + We're currently at Y or Z-bracket as depicted in the above picture. + Mark that we've left it and continue walking up the brackets hierarchy. + */ + join->cur_embedding_map &= ~next_emb->nested_join->nj_map; + } + } + return FALSE; +} + + +/** + Nested joins perspective: Remove the last table from the join order. + + The algorithm is the reciprocal of check_interleaving_with_nj(), hence + parent join nest nodes are updated only when the last table in its child + node is removed. The ASCII graphic below will clarify. + + %A table nesting such as t1 x [ ( t2 x t3 ) x ( t4 x t5 ) ] is + represented by the below join nest tree. + + @verbatim + NJ1 + _/ / \ + _/ / NJ2 + _/ / / \ + / / / \ + t1 x [ (t2 x t3) x (t4 x t5) ] + @endverbatim + + At the point in time when check_interleaving_with_nj() adds the table t5 to + the query execution plan, QEP, it also directs the node named NJ2 to mark + the table as covered. NJ2 does so by incrementing its @c counter + member. Since all of NJ2's tables are now covered by the QEP, the algorithm + proceeds up the tree to NJ1, incrementing its counter as well. All join + nests are now completely covered by the QEP. + + restore_prev_nj_state() does the above in reverse. As seen above, the node + NJ1 contains the nodes t2, t3, and NJ2. Its counter being equal to 3 means + that the plan covers t2, t3, and NJ2, @e and that the sub-plan (t4 x t5) + completely covers NJ2. The removal of t5 from the partial plan will first + decrement NJ2's counter to 1. It will then detect that NJ2 went from being + completely to partially covered, and hence the algorithm must continue + upwards to NJ1 and decrement its counter to 2. %A subsequent removal of t4 + will however not influence NJ1 since it did not un-cover the last table in + NJ2. + + SYNOPSIS + restore_prev_nj_state() + last join table to remove, it is assumed to be the last in current + partial join order. + + DESCRIPTION + + Remove the last table from the partial join order and update the nested + joins counters and join->cur_embedding_map. It is ok to call this + function for the first table in join order (for which + check_interleaving_with_nj has not been called) + + @param last join table to remove, it is assumed to be the last in current + partial join order. +*/ + +static void restore_prev_nj_state(JOIN_TAB *last) +{ + TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding; + JOIN *join= last->join; + for (;last_emb != NULL && last_emb != join->emb_sjm_nest; + last_emb= last_emb->embedding) + { + if (!last_emb->sj_on_expr) + { + NESTED_JOIN *nest= last_emb->nested_join; + DBUG_ASSERT(nest->counter > 0); + + bool was_fully_covered= nest->is_fully_covered(); + + join->cur_embedding_map|= nest->nj_map; + + if (--nest->counter == 0) + join->cur_embedding_map&= ~nest->nj_map; + + if (!was_fully_covered) + break; + } + } +} + + +/* + Compute allowed_top_level_tables - a bitmap of tables one can put into the + join order if the last table in the join prefix is not inside any outer + join nest. + + NESTED_JOIN::direct_children_map - a bitmap of tables ... if the last + table in the join prefix is inside the join nest. + + Note: it looks like a sensible way to do this is a top-down descent on + JOIN::join_list, but apparently that list is missing I_S tables. + e.g. for SHOW TABLES WHERE col IN (SELECT ...) it will just have a + semi-join nest. +*/ + +void JOIN::calc_allowed_top_level_tables(SELECT_LEX *lex) +{ + TABLE_LIST *tl; + List_iterator ti(lex->leaf_tables); + DBUG_ENTER("JOIN::calc_allowed_top_level_tables"); + DBUG_ASSERT(allowed_top_level_tables == 0); // Should only be called once + + while ((tl= ti++)) + { + table_map map; + TABLE_LIST *embedding= tl->embedding; + + if (tl->table) + map= tl->table->map; + else + { + DBUG_ASSERT(tl->jtbm_subselect); + map= table_map(1) << tl->jtbm_table_no; + } + + if (!(embedding= tl->embedding)) + { + allowed_top_level_tables |= map; + continue; + } + + // Walk out of any semi-join nests + while (embedding && !embedding->on_expr) + { + // semi-join nest or an INSERT-INTO view... + embedding->nested_join->direct_children_map |= map; + embedding= embedding->embedding; + } + + // Ok we are in the parent nested outer join nest. + if (!embedding) + { + allowed_top_level_tables |= map; + continue; + } + embedding->nested_join->direct_children_map |= map; + + // Walk to grand-parent join nest. + embedding= embedding->embedding; + + // Walk out of any semi-join nests + while (embedding && !embedding->on_expr) + { + DBUG_ASSERT(embedding->sj_on_expr); + embedding->nested_join->direct_children_map |= map; + embedding= embedding->embedding; + } + + if (embedding) + { + DBUG_ASSERT(embedding->on_expr); // Impossible, see above + embedding->nested_join->direct_children_map |= map; + } + else + allowed_top_level_tables |= map; + } + DBUG_VOID_RETURN; +} + + +/* + Get the tables that one is allowed to have as the next table in the + current plan +*/ + +table_map JOIN::get_allowed_nj_tables(uint idx) +{ + TABLE_LIST *last_emb; + if (idx > const_tables && + (last_emb= positions[idx-1].table->table->pos_in_table_list->embedding)) + { + for (;last_emb && last_emb != emb_sjm_nest; + last_emb= last_emb->embedding) + { + if (!last_emb->sj_on_expr) + { + NESTED_JOIN *nest= last_emb->nested_join; + if (!nest->is_fully_covered()) + { + // Return tables that are direct members of this join nest + return nest->direct_children_map; + } + } + } + } + // Return bitmap of tables not in any join nest + if (emb_sjm_nest) + return emb_sjm_nest->nested_join->direct_children_map; + return allowed_top_level_tables; +} + + +/* + Change access methods not to use join buffering and adjust costs accordingly + + SYNOPSIS + optimize_wo_join_buffering() + join + first_tab The first tab to do re-optimization for + last_tab The last tab to do re-optimization for + last_remaining_tables Bitmap of tables that are not in the + [0...last_tab] join prefix + first_alt TRUE <=> Use the LooseScan plan for the first_tab + no_jbuf_before Don't allow to use join buffering before this + table + reopt_rec_count OUT New output record count + reopt_cost OUT New join prefix cost + + DESCRIPTION + Given a join prefix [0; ... first_tab], change the access to the tables + in the [first_tab; last_tab] not to use join buffering. This is needed + because some semi-join strategies cannot be used together with the join + buffering. + In general case the best table order in [first_tab; last_tab] range with + join buffering is different from the best order without join buffering but + we don't try finding a better join order. (TODO ask Igor why did we + chose not to do this in the end. that's actually the difference from the + forking approach) +*/ + +void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, + table_map last_remaining_tables, + bool first_alt, uint no_jbuf_before, + double *outer_rec_count, double *reopt_cost) +{ + double cost, rec_count; + table_map reopt_remaining_tables= last_remaining_tables; + uint i; + THD *thd= join->thd; + Json_writer_temp_disable trace_wo_join_buffering(thd); + + if (first_tab > join->const_tables) + { + cost= join->positions[first_tab - 1].prefix_cost; + rec_count= join->positions[first_tab - 1].prefix_record_count; + } + else + { + cost= 0.0; + rec_count= 1; + } + + *outer_rec_count= rec_count; + for (i= first_tab; i <= last_tab; i++) + reopt_remaining_tables |= join->positions[i].table->table->map; + + /* + best_access_path() optimization depends on the value of + join->cur_sj_inner_tables. Our goal in this function is to do a + re-optimization with disabled join buffering, but no other changes. + In order to achieve this, cur_sj_inner_tables needs have the same + value it had during the original invocations of best_access_path. + + We know that this function, optimize_wo_join_buffering() is called to + re-optimize semi-join join order range, which allows to conclude that + the "original" value of cur_sj_inner_tables was 0. + */ + table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; + + for (i= first_tab; i <= last_tab; i++) + { + JOIN_TAB *rs= join->positions[i].table; + POSITION pos, loose_scan_pos; + + if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer) + { + /* Find the best access method that would not use join buffering */ + best_access_path(join, rs, reopt_remaining_tables, + join->positions, i, + TRUE, rec_count, + &pos, &loose_scan_pos); + } + else + pos= join->positions[i]; + + if ((i == first_tab && first_alt)) + pos= loose_scan_pos; + + reopt_remaining_tables &= ~rs->table->map; + rec_count= COST_MULT(rec_count, pos.records_read); + cost= COST_ADD(cost, pos.read_time); + cost= COST_ADD(cost, rec_count / TIME_FOR_COMPARE); + //TODO: take into account join condition selectivity here + double pushdown_cond_selectivity= 1.0; + table_map real_table_bit= rs->table->map; + if (join->thd->variables.optimizer_use_condition_selectivity > 1) + { + pushdown_cond_selectivity= table_cond_selectivity(join, i, rs, + reopt_remaining_tables & + ~real_table_bit); + } + double partial_join_cardinality= rec_count * + pushdown_cond_selectivity; + join->positions[i].partial_join_cardinality= partial_join_cardinality; + (*outer_rec_count) *= pushdown_cond_selectivity; + if (!rs->emb_sj_nest) + *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read); + + } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; + + *reopt_cost= cost; +} + + +static COND * +optimize_cond(JOIN *join, COND *conds, + List *join_list, bool ignore_on_conds, + Item::cond_result *cond_value, COND_EQUAL **cond_equal, + int flags) +{ + THD *thd= join->thd; + DBUG_ENTER("optimize_cond"); + + if (!conds) + { + *cond_value= Item::COND_TRUE; + if (!ignore_on_conds) + build_equal_items(join, NULL, NULL, join_list, ignore_on_conds, + cond_equal); + } + else + { + /* + Build all multiple equality predicates and eliminate equality + predicates that can be inferred from these multiple equalities. + For each reference of a field included into a multiple equality + that occurs in a function set a pointer to the multiple equality + predicate. Substitute a constant instead of this field if the + multiple equality contains a constant. + */ + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_cond(thd, "condition_processing"); + trace_cond.add("condition", join->conds == conds ? "WHERE" : "HAVING") + .add("original_condition", conds); + + Json_writer_array trace_steps(thd, "steps"); + DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY);); + conds= build_equal_items(join, conds, NULL, join_list, + ignore_on_conds, cond_equal, + MY_TEST(flags & OPT_LINK_EQUAL_FIELDS)); + DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY);); + { + Json_writer_object equal_prop_wrapper(thd); + equal_prop_wrapper.add("transformation", "equality_propagation") + .add("resulting_condition", conds); + } + + /* change field = field to field = const for each found field = const */ + propagate_cond_constants(thd, (I_List *) 0, conds, conds); + /* + Remove all instances of item == item + Remove all and-levels where CONST item != CONST item + */ + DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY);); + { + Json_writer_object const_prop_wrapper(thd); + const_prop_wrapper.add("transformation", "constant_propagation") + .add("resulting_condition", conds); + } + conds= conds->remove_eq_conds(thd, cond_value, true); + if (conds && conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + *cond_equal= &((Item_cond_and*) conds)->m_cond_equal; + + { + Json_writer_object cond_removal_wrapper(thd); + cond_removal_wrapper.add("transformation", "trivial_condition_removal") + .add("resulting_condition", conds); + } + DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY);); + } + DBUG_RETURN(conds); +} + + +/** + @brief + Propagate multiple equalities to the sub-expressions of a condition + + @param thd thread handle + @param cond the condition where equalities are to be propagated + @param *new_equalities the multiple equalities to be propagated + @param inherited path to all inherited multiple equality items + @param[out] is_simplifiable_cond 'cond' may be simplified after the + propagation of the equalities + + @details + The function recursively traverses the tree of the condition 'cond' and + for each its AND sub-level of any depth the function merges the multiple + equalities from the list 'new_equalities' into the multiple equalities + attached to the AND item created for this sub-level. + The function also [re]sets references to the equalities formed by the + merges of multiple equalities in all field items occurred in 'cond' + that are encountered in the equalities. + If the result of any merge of multiple equalities is an impossible + condition the function returns TRUE in the parameter is_simplifiable_cond. +*/ + +void propagate_new_equalities(THD *thd, Item *cond, + List *new_equalities, + COND_EQUAL *inherited, + bool *is_simplifiable_cond) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC; + if (and_level) + { + Item_cond_and *cond_and= (Item_cond_and *) cond; + List *cond_equalities= &cond_and->m_cond_equal.current_level; + cond_and->m_cond_equal.upper_levels= inherited; + if (!cond_equalities->is_empty() && cond_equalities != new_equalities) + { + Item_equal *equal_item; + List_iterator it(*new_equalities); + while ((equal_item= it++)) + { + equal_item->merge_into_list(thd, cond_equalities, true, true); + } + List_iterator ei(*cond_equalities); + while ((equal_item= ei++)) + { + if (equal_item->const_item() && !equal_item->val_int()) + { + *is_simplifiable_cond= true; + return; + } + } + } + } + + Item *item; + List_iterator li(*((Item_cond*) cond)->argument_list()); + while ((item= li++)) + { + COND_EQUAL *new_inherited= and_level && item->type() == Item::COND_ITEM ? + &((Item_cond_and *) cond)->m_cond_equal : + inherited; + propagate_new_equalities(thd, item, new_equalities, new_inherited, + is_simplifiable_cond); + } + } + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + Item_equal *equal_item; + List_iterator it(*new_equalities); + Item_equal *equality= (Item_equal *) cond; + equality->upper_levels= inherited; + while ((equal_item= it++)) + { + equality->merge_with_check(thd, equal_item, true); + } + if (equality->const_item() && !equality->val_int()) + *is_simplifiable_cond= true; + } + else + { + cond= cond->propagate_equal_fields(thd, + Item::Context_boolean(), inherited); + cond->update_used_tables(); + } +} + +/* + Check if cond_is_datetime_is_null() is true for the condition cond, or + for any of its AND/OR-children +*/ +bool cond_has_datetime_is_null(Item *cond) +{ + if (cond_is_datetime_is_null(cond)) + return true; + + if (cond->type() == Item::COND_ITEM) + { + List *cond_arg_list= ((Item_cond*) cond)->argument_list(); + List_iterator li(*cond_arg_list); + Item *item; + while ((item= li++)) + { + if (cond_has_datetime_is_null(item)) + return true; + } + } + return false; +} + +/* + Check if passed condtition has for of + + not_null_date_col IS NULL + + where not_null_date_col has a datte or datetime type +*/ + +bool cond_is_datetime_is_null(Item *cond) +{ + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC) + { + return ((Item_func_isnull*) cond)->arg_is_datetime_notnull_field(); + } + return false; +} + + +/** + @brief + Evaluate all constant boolean sub-expressions in a condition + + @param thd thread handle + @param cond condition where where to evaluate constant sub-expressions + @param[out] cond_value : the returned value of the condition + (TRUE/FALSE/UNKNOWN: + Item::COND_TRUE/Item::COND_FALSE/Item::COND_OK) + @return + the item that is the result of the substitution of all inexpensive constant + boolean sub-expressions into cond, or, + NULL if the condition is constant and is evaluated to FALSE. + + @details + This function looks for all inexpensive constant boolean sub-expressions in + the given condition 'cond' and substitutes them for their values. + For example, the condition 2 > (5 + 1) or a < (10 / 2) + will be transformed to the condition a < (10 / 2). + Note that a constant sub-expression is evaluated only if it is constant and + inexpensive. A sub-expression with an uncorrelated subquery may be evaluated + only if the subquery is considered as inexpensive. + The function does not evaluate a constant sub-expression if it is not on one + of AND/OR levels of the condition 'cond'. For example, the subquery in the + condition a > (select max(b) from t1 where b > 5) will never be evaluated + by this function. + If a constant boolean sub-expression is evaluated to TRUE then: + - when the sub-expression is a conjunct of an AND formula it is simply + removed from this formula + - when the sub-expression is a disjunct of an OR formula the whole OR + formula is converted to TRUE + If a constant boolean sub-expression is evaluated to FALSE then: + - when the sub-expression is a disjunct of an OR formula it is simply + removed from this formula + - when the sub-expression is a conjuct of an AND formula the whole AND + formula is converted to FALSE + When a disjunct/conjunct is removed from an OR/AND formula it might happen + that there is only one conjunct/disjunct remaining. In this case this + remaining disjunct/conjunct must be merged into underlying AND/OR formula, + because AND/OR levels must alternate in the same way as they alternate + after fix_fields() is called for the original condition. + The specifics of merging a formula f into an AND formula A appears + when A contains multiple equalities and f contains multiple equalities. + In this case the multiple equalities from f and A have to be merged. + After this the resulting multiple equalities have to be propagated into + the all AND/OR levels of the formula A (see propagate_new_equalities()). + The propagation of multiple equalities might result in forming multiple + equalities that are always FALSE. This, in its turn, might trigger further + simplification of the condition. + + @note + EXAMPLE 1: + SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5 OR 1 != 1); + First 1 != 1 will be removed from the second conjunct: + => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5); + Then (b = 5 AND a = 5) will be merged into the top level condition: + => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5) AND (a = 5); + Then (b = 5), (a = 5) will be propagated into the disjuncs of + (b = 1 OR a = 1): + => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR + (a = 1) AND (b = 5) AND (a = 5)) AND + (b = 5) AND (a = 5) + => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR + (FALSE AND (b = 5))) AND + (b = 5) AND (a = 5) + After this an additional call of remove_eq_conds() converts it + to FALSE + + EXAMPLE 2: + SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5 OR 1 != 1); + => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5); + => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5) AND (a = 5); + => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR + (a = 5) AND (b = 5) AND (a = 5)) AND + (b = 5) AND (a = 5) + => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR + ((b = 5) AND (a = 5))) AND + (b = 5) AND (a = 5) + After this an additional call of remove_eq_conds() converts it to + => SELECT * FROM t1 WHERE (b = 5) AND (a = 5) +*/ + + +COND * +Item_cond::remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level_arg) +{ + bool and_level= functype() == Item_func::COND_AND_FUNC; + List *cond_arg_list= argument_list(); + + if (and_level) + { + /* + Remove multiple equalities that became always true (e.g. after + constant row substitution). + They would be removed later in the function anyway, but the list of + them cond_equal.current_level also must be adjusted correspondingly. + So it's easier to do it at one pass through the list of the equalities. + */ + List *cond_equalities= + &((Item_cond_and *) this)->m_cond_equal.current_level; + cond_arg_list->disjoin((List *) cond_equalities); + List_iterator it(*cond_equalities); + Item_equal *eq_item; + while ((eq_item= it++)) + { + if (eq_item->const_item() && eq_item->val_int()) + it.remove(); + } + cond_arg_list->append((List *) cond_equalities); + } + + List new_equalities; + List_iterator li(*cond_arg_list); + bool should_fix_fields= 0; + Item::cond_result tmp_cond_value; + Item *item; + + /* + If the list cond_arg_list became empty then it consisted only + of always true multiple equalities. + */ + *cond_value= cond_arg_list->elements ? Item::COND_UNDEF : Item::COND_TRUE; + + while ((item=li++)) + { + Item *new_item= item->remove_eq_conds(thd, &tmp_cond_value, false); + if (!new_item) + { + /* This can happen only when item is converted to TRUE or FALSE */ + li.remove(); + } + else if (item != new_item) + { + /* + This can happen when: + - item was an OR formula converted to one disjunct + - item was an AND formula converted to one conjunct + In these cases the disjunct/conjunct must be merged into the + argument list of cond. + */ + if (new_item->type() == Item::COND_ITEM && + item->type() == Item::COND_ITEM) + { + DBUG_ASSERT(functype() == ((Item_cond *) new_item)->functype()); + List *new_item_arg_list= + ((Item_cond *) new_item)->argument_list(); + if (and_level) + { + /* + If new_item is an AND formula then multiple equalities + of new_item_arg_list must merged into multiple equalities + of cond_arg_list. + */ + List *new_item_equalities= + &((Item_cond_and *) new_item)->m_cond_equal.current_level; + if (!new_item_equalities->is_empty()) + { + /* + Cut the multiple equalities from the new_item_arg_list and + append them on the list new_equalities. Later the equalities + from this list will be merged into the multiple equalities + of cond_arg_list all together. + */ + new_item_arg_list->disjoin((List *) new_item_equalities); + new_equalities.append(new_item_equalities); + } + } + if (new_item_arg_list->is_empty()) + li.remove(); + else + { + uint cnt= new_item_arg_list->elements; + li.replace(*new_item_arg_list); + /* Make iterator li ignore new items */ + for (cnt--; cnt; cnt--) + li++; + should_fix_fields= 1; + } + } + else if (and_level && + new_item->type() == Item::FUNC_ITEM && + ((Item_func*) new_item)->functype() == + Item_func::MULT_EQUAL_FUNC) + { + li.remove(); + new_equalities.push_back((Item_equal *) new_item, thd->mem_root); + } + else + { + if (new_item->type() == Item::COND_ITEM && + ((Item_cond*) new_item)->functype() == functype()) + { + List *new_item_arg_list= + ((Item_cond *) new_item)->argument_list(); + uint cnt= new_item_arg_list->elements; + li.replace(*new_item_arg_list); + /* Make iterator li ignore new items */ + for (cnt--; cnt; cnt--) + li++; + } + else + li.replace(new_item); + should_fix_fields= 1; + } + } + if (*cond_value == Item::COND_UNDEF) + *cond_value= tmp_cond_value; + switch (tmp_cond_value) { + case Item::COND_OK: // Not TRUE or FALSE + if (and_level || *cond_value == Item::COND_FALSE) + *cond_value=tmp_cond_value; + break; + case Item::COND_FALSE: + if (and_level) + { + *cond_value= tmp_cond_value; + return (COND*) 0; // Always false + } + break; + case Item::COND_TRUE: + if (!and_level) + { + *cond_value= tmp_cond_value; + return (COND*) 0; // Always true + } + break; + case Item::COND_UNDEF: // Impossible + break; /* purecov: deadcode */ + } + } + COND *cond= this; + if (!new_equalities.is_empty()) + { + DBUG_ASSERT(and_level); + /* + Merge multiple equalities that were cut from the results of + simplification of OR formulas converted into AND formulas. + These multiple equalities are to be merged into the + multiple equalities of cond_arg_list. + */ + COND_EQUAL *cond_equal= &((Item_cond_and *) this)->m_cond_equal; + List *cond_equalities= &cond_equal->current_level; + cond_arg_list->disjoin((List *) cond_equalities); + Item_equal *equality; + List_iterator_fast it(new_equalities); + while ((equality= it++)) + { + equality->upper_levels= cond_equal->upper_levels; + equality->merge_into_list(thd, cond_equalities, false, false); + List_iterator_fast ei(*cond_equalities); + while ((equality= ei++)) + { + if (equality->const_item() && !equality->val_int()) + { + *cond_value= Item::COND_FALSE; + return (COND*) 0; + } + } + } + cond_arg_list->append((List *) cond_equalities); + /* + Propagate the newly formed multiple equalities to + the all AND/OR levels of cond + */ + bool is_simplifiable_cond= false; + propagate_new_equalities(thd, this, cond_equalities, + cond_equal->upper_levels, + &is_simplifiable_cond); + /* + If the above propagation of multiple equalities brings us + to multiple equalities that are always FALSE then try to + simplify the condition with remove_eq_cond() again. + */ + if (is_simplifiable_cond) + { + if (!(cond= cond->remove_eq_conds(thd, cond_value, false))) + return cond; + } + should_fix_fields= 1; + } + if (should_fix_fields) + cond->update_used_tables(); + + if (!((Item_cond*) cond)->argument_list()->elements || + *cond_value != Item::COND_OK) + return (COND*) 0; + if (((Item_cond*) cond)->argument_list()->elements == 1) + { // Remove list + item= ((Item_cond*) cond)->argument_list()->head(); + ((Item_cond*) cond)->argument_list()->empty(); + return item; + } + *cond_value= Item::COND_OK; + return cond; +} + + +COND * +Item::remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level_arg) +{ + if (can_eval_in_optimize()) + { + *cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE; + return (COND*) 0; + } + *cond_value= Item::COND_OK; + return this; // Point at next and level +} + + +COND * +Item_bool_func2::remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level_arg) +{ + if (can_eval_in_optimize()) + { + *cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE; + return (COND*) 0; + } + if ((*cond_value= eq_cmp_result()) != Item::COND_OK) + { + if (args[0]->eq(args[1], true)) + { + if (*cond_value == Item::COND_FALSE || + !args[0]->maybe_null() || functype() == Item_func::EQUAL_FUNC) + return (COND*) 0; // Compare of identical items + } + } + *cond_value= Item::COND_OK; + return this; // Point at next and level +} + + +/** + Remove const and eq items. Return new item, or NULL if no condition + cond_value is set to according: + COND_OK query is possible (field = constant) + COND_TRUE always true ( 1 = 1 ) + COND_FALSE always false ( 1 = 2 ) + + SYNPOSIS + remove_eq_conds() + thd THD environment + cond the condition to handle + cond_value the resulting value of the condition + + NOTES + calls the inner_remove_eq_conds to check all the tree reqursively + + RETURN + *COND with the simplified condition +*/ + +COND * +Item_func_isnull::remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level_arg) +{ + Item *real_item= args[0]->real_item(); + if (real_item->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field*) real_item)->field; + + if ((field->flags & NOT_NULL_FLAG) && + field->type_handler()->cond_notnull_field_isnull_to_field_eq_zero()) + { + /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */ + /* + See BUG#12594011 + Documentation says that + SELECT datetime_notnull d FROM t1 WHERE d IS NULL + shall return rows where d=='0000-00-00' + + Thus, for DATE and DATETIME columns defined as NOT NULL, + "date_notnull IS NULL" has to be modified to + "date_notnull IS NULL OR date_notnull == 0" (if outer join) + "date_notnull == 0" (otherwise) + + */ + + Item *item0= (Item*) Item_false; + Item *eq_cond= new(thd->mem_root) Item_func_eq(thd, args[0], item0); + if (!eq_cond) + return this; + + COND *cond= this; + if (field->table->pos_in_table_list->is_inner_table_of_outer_join()) + { + // outer join: transform "col IS NULL" to "col IS NULL or col=0" + Item *or_cond= new(thd->mem_root) Item_cond_or(thd, eq_cond, this); + if (!or_cond) + return this; + cond= or_cond; + } + else + { + // not outer join: transform "col IS NULL" to "col=0" + cond= eq_cond; + } + + cond->fix_fields(thd, &cond); + /* + Note: although args[0] is a field, cond can still be a constant + (in case field is a part of a dependent subquery). + + Note: we call cond->Item::remove_eq_conds() non-virtually (statically) + for performance purpose. + A non-qualified call, i.e. just cond->remove_eq_conds(), + would call Item_bool_func2::remove_eq_conds() instead, which would + try to do some extra job to detect if args[0] and args[1] are + equivalent items. We know they are not (we have field=0 here). + */ + return cond->Item::remove_eq_conds(thd, cond_value, false); + } + + /* + Handles this special case for some ODBC applications: + The are requesting the row that was just updated with a auto_increment + value with this construct: + + SELECT * from table_name where auto_increment_column IS NULL + This will be changed to: + SELECT * from table_name where auto_increment_column = LAST_INSERT_ID + + Note, this substitution is done if the NULL test is the only condition! + If the NULL test is a part of a more complex condition, it is not + substituted and is treated normally: + WHERE auto_increment IS NULL AND something_else + */ + + if (top_level_arg) // "auto_increment_column IS NULL" is the only condition + { + if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null && + (thd->variables.option_bits & OPTION_AUTO_IS_NULL) && + (thd->first_successful_insert_id_in_prev_stmt > 0 && + thd->substitute_null_with_insert_id)) + { + #ifdef HAVE_QUERY_CACHE + query_cache_abort(thd, &thd->query_cache_tls); + #endif + COND *new_cond, *cond= this; + /* If this fails, we will catch it later before executing query */ + if ((new_cond= new (thd->mem_root) Item_func_eq(thd, args[0], + new (thd->mem_root) Item_int(thd, "last_insert_id()", + thd->read_first_successful_insert_id_in_prev_stmt(), + MY_INT64_NUM_DECIMAL_DIGITS)))) + { + cond= new_cond; + /* + Item_func_eq can't be fixed after creation so we do not check + cond->fixed(), also it do not need tables so we use 0 as second + argument. + */ + cond->fix_fields(thd, &cond); + } + /* + IS NULL should be mapped to LAST_INSERT_ID only for first row, so + clear for next row + */ + thd->substitute_null_with_insert_id= FALSE; + + *cond_value= Item::COND_OK; + return cond; + } + } + } + return Item::remove_eq_conds(thd, cond_value, top_level_arg); +} + + +/** + Check if equality can be used in removing components of GROUP BY/DISTINCT + + @param l the left comparison argument (a field if any) + @param r the right comparison argument (a const of any) + + @details + Checks if an equality predicate can be used to take away + DISTINCT/GROUP BY because it is known to be true for exactly one + distinct value (e.g. == ). + Arguments must be compared in the native type of the left argument + and (for strings) in the native collation of the left argument. + Otherwise, for example, + = may match more than 1 distinct value or + the . + + @note We don't need to aggregate l and r collations here, because r - + the constant item - has already been converted to a proper collation + for comparison. We only need to compare this collation with field's collation. + + @retval true can be used + @retval false cannot be used +*/ + +/* + psergey-todo: this returns false for int_column='1234' (here '1234' is a + constant. Need to discuss this with Bar). + + See also Field::test_if_equality_guaranees_uniqueness(const Item *item); +*/ +static bool +test_if_equality_guarantees_uniqueness(Item *l, Item *r) +{ + return (r->const_item() || !(r->used_tables() & ~OUTER_REF_TABLE_BIT)) && + item_cmp_type(l, r) == l->cmp_type() && + (l->cmp_type() != STRING_RESULT || + l->collation.collation == r->collation.collation); +} + + +/* + Return TRUE if i1 and i2 (if any) are equal items, + or if i1 is a wrapper item around the f2 field. +*/ + +static bool equal(Item *i1, Item *i2, Field *f2) +{ + DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL)); + + if (i2 != NULL) + return i1->eq(i2, 1); + else if (i1->type() == Item::FIELD_ITEM) + return f2->eq(((Item_field *) i1)->field); + else + return FALSE; +} + + +/** + Test if a field or an item is equal to a constant value in WHERE + + @param cond WHERE clause expression + @param comp_item Item to find in WHERE expression + (if comp_field != NULL) + @param comp_field Field to find in WHERE expression + (if comp_item != NULL) + @param[out] const_item intermediate arg, set to Item pointer to NULL + + @return TRUE if the field is a constant value in WHERE + + @note + comp_item and comp_field parameters are mutually exclusive. +*/ +bool +const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field, + Item **const_item) +{ + DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL)); + + Item *intermediate= NULL; + if (const_item == NULL) + const_item= &intermediate; + + if (cond->type() == Item::COND_ITEM) + { + bool and_level= (((Item_cond*) cond)->functype() + == Item_func::COND_AND_FUNC); + List_iterator_fast li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + bool res=const_expression_in_where(item, comp_item, comp_field, + const_item); + if (res) // Is a const value + { + if (and_level) + return 1; + } + else if (!and_level) + return 0; + } + return and_level ? 0 : 1; + } + else if (cond->eq_cmp_result() != Item::COND_OK) + { // boolean compare function + Item_func* func= (Item_func*) cond; + if (func->functype() != Item_func::EQUAL_FUNC && + func->functype() != Item_func::EQ_FUNC) + return 0; + Item *left_item= ((Item_func*) cond)->arguments()[0]; + Item *right_item= ((Item_func*) cond)->arguments()[1]; + if (equal(left_item, comp_item, comp_field)) + { + if (test_if_equality_guarantees_uniqueness (left_item, right_item)) + { + if (*const_item) + return right_item->eq(*const_item, 1); + *const_item=right_item; + return 1; + } + } + else if (equal(right_item, comp_item, comp_field)) + { + if (test_if_equality_guarantees_uniqueness (right_item, left_item)) + { + if (*const_item) + return left_item->eq(*const_item, 1); + *const_item=left_item; + return 1; + } + } + } + return 0; +} + + +/**************************************************************************** + Create internal temporary table +****************************************************************************/ + +Field *Item::create_tmp_field_int(MEM_ROOT *root, TABLE *table, + uint convert_int_length) +{ + const Type_handler *h= &type_handler_slong; + if (max_char_length() > convert_int_length) + h= &type_handler_slonglong; + if (unsigned_flag) + h= h->type_handler_unsigned(); + return h->make_and_init_table_field(root, &name, Record_addr(maybe_null()), + *this, table); +} + +Field *Item::tmp_table_field_from_field_type_maybe_null(MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param, + bool is_explicit_null) +{ + /* + item->type() == CONST_ITEM excluded due to making fields for counter + With help of Item_uint + */ + DBUG_ASSERT(!param->make_copy_field() || type() == CONST_ITEM); + DBUG_ASSERT(!is_result_field()); + Field *result; + if ((result= tmp_table_field_from_field_type(root, table))) + { + if (result && is_explicit_null) + result->is_created_from_null_item= true; + } + return result; +} + + +Field *Item_sum::create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) +{ + Field *UNINIT_VAR(new_field); + + switch (cmp_type()) { + case REAL_RESULT: + { + new_field= new (root) + Field_double(max_char_length(), maybe_null(), &name, decimals, TRUE); + break; + } + case INT_RESULT: + case TIME_RESULT: + case DECIMAL_RESULT: + case STRING_RESULT: + new_field= tmp_table_field_from_field_type(root, table); + break; + case ROW_RESULT: + // This case should never be chosen + DBUG_ASSERT(0); + new_field= 0; + break; + } + if (new_field) + new_field->init(table); + return new_field; +} + + +/** + Create a temporary field for Item_field (or its descendant), + either direct or referenced by an Item_ref. + + param->modify_item is set when we create a field for an internal temporary + table. In this case we have to ensure the new field name is identical to + the original field name as the field will info will be sent to the client. + In other cases, the field name is set from orig_item or name if org_item is + not set. +*/ + +Field * +Item_field::create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table, + Item_ref *orig_item, + const Tmp_field_param *param) +{ + DBUG_ASSERT(!is_result_field()); + Field *result; + LEX_CSTRING *new_name= (orig_item ? &orig_item->name : + !param->modify_item() ? &name : + &field->field_name); + + /* + If item have to be able to store NULLs but underlaid field can't do it, + create_tmp_field_from_field() can't be used for tmp field creation. + */ + if (((maybe_null() && in_rollup()) || + (new_table->in_use->create_tmp_table_for_derived && /* for mat. view/dt */ + orig_item && orig_item->maybe_null())) && + !field->maybe_null()) + { + /* + The item the ref points to may have maybe_null flag set while + the ref doesn't have it. This may happen for outer fields + when the outer query decided at some point after name resolution phase + that this field might be null. Take this into account here. + */ + Record_addr rec(orig_item ? orig_item->maybe_null() : maybe_null()); + const Type_handler *handler= type_handler()-> + type_handler_for_tmp_table(this); + result= handler->make_and_init_table_field(root, new_name, + rec, *this, new_table); + } + else if (param->table_cant_handle_bit_fields() && + field->type() == MYSQL_TYPE_BIT) + { + const Type_handler *handler= + Type_handler::type_handler_long_or_longlong(max_char_length(), true); + result= handler->make_and_init_table_field(root, new_name, + Record_addr(maybe_null()), + *this, new_table); + } + else + { + bool tmp_maybe_null= param->modify_item() ? maybe_null() : + field->maybe_null(); + result= field->create_tmp_field(root, new_table, tmp_maybe_null); + if (result && ! param->modify_item()) + result->field_name= *new_name; + } + if (result && param->modify_item()) + result_field= result; + return result; +} + + +Field *Item_field::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + DBUG_ASSERT(!is_result_field()); + Field *result; + src->set_field(field); + if (!(result= create_tmp_field_from_item_field(root, table, NULL, param))) + return NULL; + if (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + field->eq_def(result)) + src->set_default_field(field); + return result; +} + + +Field *Item_default_value::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + if (field->default_value || (field->flags & BLOB_FLAG)) + { + /* + We have to use a copy function when using a blob with default value + as the we have to calculate the default value before we can use it. + */ + get_tmp_field_src(src, param); + Field *result= tmp_table_field_from_field_type(root, table); + if (result && param->modify_item()) + result_field= result; + return result; + } + /* + Same code as in Item_field::create_tmp_field_ex, except no default field + handling + */ + src->set_field(field); + return create_tmp_field_from_item_field(root, table, nullptr, param); +} + + +Field *Item_ref::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + Item *item= real_item(); + DBUG_ASSERT(is_result_field()); + if (item->type() == Item::FIELD_ITEM) + { + Field *result; + Item_field *field= (Item_field*) item; + Tmp_field_param prm2(*param); + prm2.set_modify_item(false); + src->set_field(field->field); + if (!(result= field->create_tmp_field_from_item_field(root, table, + this, &prm2))) + return NULL; + if (param->modify_item()) + result_field= result; + return result; + } + return Item_result_field::create_tmp_field_ex(root, table, src, param); +} + + +void Item_result_field::get_tmp_field_src(Tmp_field_src *src, + const Tmp_field_param *param) +{ + if (param->make_copy_field()) + { + DBUG_ASSERT(result_field); + src->set_field(result_field); + } + else + { + src->set_item_result_field(this); // Save for copy_funcs + } +} + + +Field * +Item_result_field::create_tmp_field_ex_from_handler( + MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param, + const Type_handler *h) +{ + /* + Possible Item types: + - Item_cache_wrapper (only for CREATE..SELECT ?) + - Item_func + - Item_subselect + */ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(is_result_field()); + DBUG_ASSERT(type() != NULL_ITEM); + get_tmp_field_src(src, param); + Field *result; + if ((result= h->make_and_init_table_field(root, &name, + Record_addr(maybe_null()), + *this, table)) && + param->modify_item()) + result_field= result; + return result; +} + + +Field *Item_func_sp::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + Field *result; + get_tmp_field_src(src, param); + if ((result= sp_result_field->create_tmp_field(root, table))) + { + result->field_name= name; + if (param->modify_item()) + result_field= result; + } + return result; +} + + +static bool make_json_valid_expr(TABLE *table, Field *field) +{ + THD *thd= table->in_use; + Query_arena backup_arena; + Item *expr, *item_field; + + if (!table->expr_arena && table->init_expr_arena(thd->mem_root)) + return 1; + + thd->set_n_backup_active_arena(table->expr_arena, &backup_arena); + if ((item_field= new (thd->mem_root) Item_field(thd, field)) && + (expr= new (thd->mem_root) Item_func_json_valid(thd, item_field))) + field->check_constraint= add_virtual_expression(thd, expr); + thd->restore_active_arena(table->expr_arena, &backup_arena); + return field->check_constraint == NULL; +} + + +/** + Create field for temporary table. + + @param table Temporary table + @param item Item to create a field for + @param type Type of item (normally item->type) + @param copy_func If set and item is a function, store copy of item + in this array + @param from_field if field will be created using other field as example, + pointer example field will be written here + @param default_field If field has a default value field, store it here + @param group 1 if we are going to do a relative group by on result + @param modify_item 1 if item->result_field should point to new item. + This is relevent for how fill_record() is going to + work: + If modify_item is 1 then fill_record() will update + the record in the original table. + If modify_item is 0 then fill_record() will update + the temporary table + @param table_cant_handle_bit_fields + Set to 1 if the temporary table cannot handle bit + fields. Only set for heap tables when the bit field + is part of an index. + @param make_copy_field + Set when using with rollup when we want to have + an exact copy of the field. + @retval + 0 on error + @retval + new_created field + Create a temporary field for Item_field (or its descendant), + either direct or referenced by an Item_ref. +*/ +Field *create_tmp_field(TABLE *table, Item *item, + Item ***copy_func, Field **from_field, + Field **default_field, + bool group, bool modify_item, + bool table_cant_handle_bit_fields, + bool make_copy_field) +{ + Tmp_field_src src; + Tmp_field_param prm(group, modify_item, table_cant_handle_bit_fields, + make_copy_field); + Field *result= item->create_tmp_field_ex(table->in_use->mem_root, + table, &src, &prm); + if (is_json_type(item) && make_json_valid_expr(table, result)) + result= NULL; + + *from_field= src.field(); + *default_field= src.default_field(); + if (src.item_result_field()) + *((*copy_func)++)= src.item_result_field(); + return result; +} + +/* + Set up column usage bitmaps for a temporary table + + IMPLEMENTATION + For temporary tables, we need one bitmap with all columns set and + a tmp_set bitmap to be used by things like filesort. +*/ + +void +setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count) +{ + uint bitmap_size= bitmap_buffer_size(field_count); + + DBUG_ASSERT(table->s->virtual_fields == 0); + + my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->tmp_set, + (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->eq_join_set, + (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->cond_set, + (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->has_value_set, + (my_bitmap_map*) bitmaps, field_count); + /* write_set and all_set are copies of read_set */ + table->def_write_set= table->def_read_set; + table->s->all_set= table->def_read_set; + bitmap_set_all(&table->s->all_set); + table->default_column_bitmaps(); +} + + +Create_tmp_table::Create_tmp_table(ORDER *group, bool distinct, + bool save_sum_fields, + ulonglong select_options, + ha_rows rows_limit) + :m_alloced_field_count(0), + m_using_unique_constraint(false), + m_temp_pool_slot(MY_BIT_NONE), + m_group(group), + m_distinct(distinct), + m_save_sum_fields(save_sum_fields), + m_with_cycle(false), + m_select_options(select_options), + m_rows_limit(rows_limit), + m_group_null_items(0), + current_counter(other) +{ + m_field_count[Create_tmp_table::distinct]= 0; + m_field_count[Create_tmp_table::other]= 0; + m_null_count[Create_tmp_table::distinct]= 0; + m_null_count[Create_tmp_table::other]= 0; + m_blobs_count[Create_tmp_table::distinct]= 0; + m_blobs_count[Create_tmp_table::other]= 0; + m_uneven_bit[Create_tmp_table::distinct]= 0; + m_uneven_bit[Create_tmp_table::other]= 0; +} + + +void Create_tmp_table::add_field(TABLE *table, Field *field, uint fieldnr, + bool force_not_null_cols) +{ + DBUG_ASSERT(!field->field_name.str || + strlen(field->field_name.str) == field->field_name.length); + + if (force_not_null_cols) + { + field->flags|= NOT_NULL_FLAG; + field->null_ptr= NULL; + } + + if (!(field->flags & NOT_NULL_FLAG)) + m_null_count[current_counter]++; + + table->s->reclength+= field->pack_length(); + + // Assign it here, before update_data_type_statistics() changes m_blob_count + if (field->flags & BLOB_FLAG) + { + table->s->blob_field[m_blob_count]= fieldnr; + m_blobs_count[current_counter]++; + } + + table->field[fieldnr]= field; + field->field_index= fieldnr; + + field->update_data_type_statistics(this); +} + + +/** + Create a temp table according to a field list. + + Given field pointers are changed to point at tmp_table for + send_result_set_metadata. The table object is self contained: it's + allocated in its own memory root, as well as Field objects + created for table columns. + This function will replace Item_sum items in 'fields' list with + corresponding Item_field items, pointing at the fields in the + temporary table, unless this was prohibited by TRUE + value of argument save_sum_fields. The Item_field objects + are created in THD memory root. + + @param thd thread handle + @param param a description used as input to create the table + @param fields list of items that will be used to define + column types of the table (also see NOTES) + @param group Create an unique key over all group by fields. + This is used to retrive the row during + end_write_group() and update them. + @param distinct should table rows be distinct + @param save_sum_fields see NOTES + @param select_options Optiions for how the select is run. + See sql_priv.h for a list of options. + @param rows_limit Maximum number of rows to insert into the + temporary table + @param table_alias possible name of the temporary table that can + be used for name resolving; can be "". + @param do_not_open only create the TABLE object, do not + open the table in the engine + @param keep_row_order rows need to be read in the order they were + inserted, the engine should preserve this order +*/ + +TABLE *Create_tmp_table::start(THD *thd, + TMP_TABLE_PARAM *param, + const LEX_CSTRING *table_alias) +{ + MEM_ROOT *mem_root_save, own_root; + TABLE *table; + TABLE_SHARE *share; + uint copy_func_count= param->func_count; + char *tmpname,path[FN_REFLEN]; + Field **reg_field; + uint *blob_field; + key_part_map *const_key_parts; + /* Treat sum functions as normal ones when loose index scan is used. */ + m_save_sum_fields|= param->precomputed_group_by; + DBUG_ENTER("Create_tmp_table::start"); + DBUG_PRINT("enter", + ("table_alias: '%s' distinct: %d save_sum_fields: %d " + "rows_limit: %lu group: %d", table_alias->str, + (int) m_distinct, (int) m_save_sum_fields, + (ulong) m_rows_limit, MY_TEST(m_group))); + + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) + m_temp_pool_slot = temp_pool_set_next(); + + if (m_temp_pool_slot != MY_BIT_NONE) // we got a slot + sprintf(path, "%s-%s-%lx-%i", tmp_file_prefix, param->tmp_name, + current_pid, m_temp_pool_slot); + else + { + /* if we run out of slots or we are not using tempool */ + sprintf(path, "%s-%s-%lx-%llx-%x", tmp_file_prefix, param->tmp_name, + current_pid, thd->thread_id, thd->tmp_table++); + } + + /* + No need to change table name to lower case as we are only creating + MyISAM, Aria or HEAP tables here + */ + fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + if (m_group) + { + ORDER **prev= &m_group; + if (!param->quick_group) + m_group= 0; // Can't use group key + else for (ORDER *tmp= m_group ; tmp ; tmp= tmp->next) + { + /* Exclude found constant from the list */ + if ((*tmp->item)->const_item()) + { + *prev= tmp->next; + param->group_parts--; + continue; + } + else + prev= &(tmp->next); + /* + marker == 4 means two things: + - store NULLs in the key, and + - convert BIT fields to 64-bit long, needed because MEMORY tables + can't index BIT fields. + */ + (*tmp->item)->marker= MARKER_NULL_KEY; // Store null in key + if ((*tmp->item)->too_big_for_varchar()) + m_using_unique_constraint= true; + } + if (param->group_length >= MAX_BLOB_WIDTH) + m_using_unique_constraint= true; + if (m_group) + m_distinct= 0; // Can't use distinct + } + + m_alloced_field_count= param->field_count+param->func_count+param->sum_func_count; + DBUG_ASSERT(m_alloced_field_count); + const uint field_count= m_alloced_field_count; + + /* + When loose index scan is employed as access method, it already + computes all groups and the result of all aggregate functions. We + make space for the items of the aggregate function in the list of + functions TMP_TABLE_PARAM::items_to_copy, so that the values of + these items are stored in the temporary table. + */ + if (param->precomputed_group_by) + copy_func_count+= param->sum_func_count; + param->copy_func_count= copy_func_count; + + init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(MY_THREAD_SPECIFIC)); + + if (!multi_alloc_root(&own_root, + &table, sizeof(*table), + &share, sizeof(*share), + ®_field, sizeof(Field*) * (field_count+1), + &m_default_field, sizeof(Field*) * (field_count), + &blob_field, sizeof(uint)*(field_count+1), + &m_from_field, sizeof(Field*)*field_count, + ¶m->items_to_copy, + sizeof(param->items_to_copy[0])*(copy_func_count+1), + ¶m->keyinfo, sizeof(*param->keyinfo), + &m_key_part_info, + sizeof(*m_key_part_info)*(param->group_parts+1), + ¶m->start_recinfo, + sizeof(*param->recinfo)*(field_count*2+4), + &tmpname, (uint) strlen(path)+1, + &m_group_buff, (m_group && ! m_using_unique_constraint ? + param->group_length : 0), + &m_bitmaps, bitmap_buffer_size(field_count)*6, + &const_key_parts, sizeof(*const_key_parts), + NullS)) + { + DBUG_RETURN(NULL); /* purecov: inspected */ + } + /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ + if (!(param->copy_field= new (thd->mem_root) Copy_field[field_count])) + { + free_root(&own_root, MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NULL); /* purecov: inspected */ + } + strmov(tmpname, path); + /* make table according to fields */ + + bzero((char*) table,sizeof(*table)); + bzero((char*) reg_field, sizeof(Field*) * (field_count+1)); + bzero((char*) m_default_field, sizeof(Field*) * (field_count)); + bzero((char*) m_from_field, sizeof(Field*) * field_count); + /* const_key_parts is used in sort_and_filter_keyuse */ + bzero((char*) const_key_parts, sizeof(*const_key_parts)); + + table->mem_root= own_root; + mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + table->field=reg_field; + table->const_key_parts= const_key_parts; + table->alias.set(table_alias->str, table_alias->length, table_alias_charset); + + table->reginfo.lock_type=TL_WRITE; /* Will be updated */ + table->map=1; + table->temp_pool_slot= m_temp_pool_slot; + table->copy_blobs= 1; + table->in_use= thd; + table->no_rows_with_nulls= param->force_not_null_cols; + table->expr_arena= thd; + + table->s= share; + init_tmp_table_share(thd, share, "", 0, "(temporary)", tmpname); + share->blob_field= blob_field; + share->table_charset= param->table_charset; + share->primary_key= MAX_KEY; // Indicate no primary key + if (param->schema_table) + share->db= INFORMATION_SCHEMA_NAME; + + param->using_outer_summary_function= 0; + thd->mem_root= mem_root_save; + DBUG_RETURN(table); +} + + +bool Create_tmp_table::add_fields(THD *thd, + TABLE *table, + TMP_TABLE_PARAM *param, + List &fields) +{ + DBUG_ENTER("Create_tmp_table::add_fields"); + DBUG_ASSERT(table); + DBUG_ASSERT(table->field); + DBUG_ASSERT(table->s->blob_field); + DBUG_ASSERT(table->s->reclength == 0); + DBUG_ASSERT(table->s->fields == 0); + DBUG_ASSERT(table->s->blob_fields == 0); + + const bool not_all_columns= !(m_select_options & TMP_TABLE_ALL_COLUMNS); + bool distinct_record_structure= m_distinct; + uint fieldnr= 0; + TABLE_SHARE *share= table->s; + Item **copy_func= param->items_to_copy; + + MEM_ROOT *mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + List_iterator_fast li(fields); + Item *item; + Field **tmp_from_field= m_from_field; + while (!m_with_cycle && (item= li++)) + if (item->is_in_with_cycle()) + { + m_with_cycle= true; + /* + Following distinct_record_structure is (m_distinct || m_with_cycle) + + Note: distinct_record_structure can be true even if m_distinct is + false, for example for incr_table in recursive CTE + (see select_union_recursive::create_result_table) + */ + distinct_record_structure= true; + } + li.rewind(); + while ((item=li++)) + { + uint uneven_delta; + current_counter= (((param->hidden_field_count < (fieldnr + 1)) && + distinct_record_structure && + (!m_with_cycle || item->is_in_with_cycle())) ? + distinct : + other); + Item::Type type= item->type(); + if (type == Item::COPY_STR_ITEM) + { + item= ((Item_copy *)item)->get_item(); + type= item->type(); + } + if (not_all_columns) + { + if (item->with_sum_func() && type != Item::SUM_FUNC_ITEM) + { + if (item->used_tables() & OUTER_REF_TABLE_BIT) + item->update_used_tables(); + if ((item->real_type() == Item::SUBSELECT_ITEM) || + (item->used_tables() & ~OUTER_REF_TABLE_BIT)) + { + /* + Mark that the we have ignored an item that refers to a summary + function. We need to know this if someone is going to use + DISTINCT on the result. + */ + param->using_outer_summary_function=1; + continue; + } + } + if (item->const_item() && + param->hidden_field_count < (fieldnr + 1)) + continue; // We don't have to store this + } + if (type == Item::SUM_FUNC_ITEM && !m_group && !m_save_sum_fields) + { /* Can't calc group yet */ + Item_sum *sum_item= (Item_sum *) item; + sum_item->result_field=0; + for (uint i= 0 ; i < sum_item->get_arg_count() ; i++) + { + Item *arg= sum_item->get_arg(i); + if (!arg->const_item()) + { + Item *tmp_item; + Field *new_field= + create_tmp_field(table, arg, ©_func, + tmp_from_field, &m_default_field[fieldnr], + m_group != 0, not_all_columns, + distinct_record_structure , false); + if (!new_field) + goto err; // Should be OOM + tmp_from_field++; + + thd->mem_root= mem_root_save; + if (!(tmp_item= new (thd->mem_root) + Item_field(thd, new_field))) + goto err; + ((Item_field*) tmp_item)->set_refers_to_temp_table(); + arg= sum_item->set_arg(i, thd, tmp_item); + thd->mem_root= &table->mem_root; + + uneven_delta= m_uneven_bit_length; + add_field(table, new_field, fieldnr++, param->force_not_null_cols); + m_field_count[current_counter]++; + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + + if (!(new_field->flags & NOT_NULL_FLAG)) + { + /* + new_field->maybe_null() is still false, it will be + changed below. But we have to setup Item_field correctly + */ + arg->set_maybe_null(); + } + if (current_counter == distinct) + new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; + } + } + } + else + { + /* + The last parameter to create_tmp_field_ex() is a bit tricky: + + We need to set it to 0 in union, to get fill_record() to modify the + temporary table. + We need to set it to 1 on multi-table-update and in select to + write rows to the temporary table. + We here distinguish between UNION and multi-table-updates by the fact + that in the later case group is set to the row pointer. + + The test for item->marker == MARKER_NULL_KEY is ensure we + don't create a group-by key over a bit field as heap tables + can't handle that. + */ + DBUG_ASSERT(!param->schema_table); + Field *new_field= + create_tmp_field(table, item, ©_func, + tmp_from_field, &m_default_field[fieldnr], + m_group != 0, + !param->force_copy_fields && + (not_all_columns || m_group !=0), + /* + If item->marker == MARKER_NULL_KEY then we + force create_tmp_field to create a 64-bit + longs for BIT fields because HEAP tables + can't index BIT fields directly. We do the + same for distinct, as we want the distinct + index to be usable in this case too. + */ + item->marker == MARKER_NULL_KEY || + param->bit_fields_as_long, + param->force_copy_fields); + if (unlikely(!new_field)) + { + if (unlikely(thd->is_fatal_error)) + goto err; // Got OOM + continue; // Some kind of const item + } + if (type == Item::SUM_FUNC_ITEM) + { + Item_sum *agg_item= (Item_sum *) item; + /* + Update the result field only if it has never been set, or if the + created temporary table is not to be used for subquery + materialization. + + The reason is that for subqueries that require + materialization as part of their plan, we create the + 'external' temporary table needed for IN execution, after + the 'internal' temporary table needed for grouping. Since + both the external and the internal temporary tables are + created for the same list of SELECT fields of the subquery, + setting 'result_field' for each invocation of + create_tmp_table overrides the previous value of + 'result_field'. + + The condition below prevents the creation of the external + temp table to override the 'result_field' that was set for + the internal temp table. + */ + if (!agg_item->result_field || !param->materialized_subquery) + agg_item->result_field= new_field; + } + tmp_from_field++; + + uneven_delta= m_uneven_bit_length; + add_field(table, new_field, fieldnr++, param->force_not_null_cols); + m_field_count[current_counter]++; + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + + if (item->marker == MARKER_NULL_KEY && item->maybe_null()) + { + m_group_null_items++; + new_field->flags|= GROUP_FLAG; + } + if (current_counter == distinct) + new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; + } + } + + DBUG_ASSERT(fieldnr == m_field_count[other] + m_field_count[distinct]); + DBUG_ASSERT(m_blob_count == m_blobs_count[other] + m_blobs_count[distinct]); + share->fields= fieldnr; + share->blob_fields= m_blob_count; + table->field[fieldnr]= 0; // End marker + share->blob_field[m_blob_count]= 0; // End marker + copy_func[0]= 0; // End marker + param->func_count= (uint) (copy_func - param->items_to_copy); + DBUG_ASSERT(param->func_count <= param->copy_func_count); + + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + thd->mem_root= mem_root_save; + DBUG_RETURN(false); + +err: + thd->mem_root= mem_root_save; + DBUG_RETURN(true); +} + + +bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param) +{ + TABLE_SHARE *share= table->s; + DBUG_ENTER("Create_tmp_table::choose_engine"); + /* + If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria) + In the future we should try making storage engine selection more dynamic + */ + + if (share->blob_fields || m_using_unique_constraint || + (thd->variables.big_tables && + !(m_select_options & SELECT_SMALL_RESULT)) || + (m_select_options & TMP_TABLE_FORCE_MYISAM) || + thd->variables.tmp_memory_table_size == 0) + { + share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + if (m_group && + (param->group_parts > table->file->max_key_parts() || + param->group_length > table->file->max_key_length())) + m_using_unique_constraint= true; + } + else + { + share->db_plugin= ha_lock_engine(0, heap_hton); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + } + DBUG_RETURN(!table->file); +} + + +bool Create_tmp_table::finalize(THD *thd, + TABLE *table, + TMP_TABLE_PARAM *param, + bool do_not_open, bool keep_row_order) +{ + DBUG_ENTER("Create_tmp_table::finalize"); + DBUG_ASSERT(table); + + uint null_pack_length[2]; + uint null_pack_base[2]; + uint null_counter[2]= {0, 0}; + uint whole_null_pack_length; + bool use_packed_rows= false; + bool save_abort_on_warning; + uchar *pos; + uchar *null_flags; + KEY *keyinfo; + TMP_ENGINE_COLUMNDEF *recinfo; + TABLE_SHARE *share= table->s; + Copy_field *copy= param->copy_field; + MEM_ROOT *mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + DBUG_ASSERT(m_alloced_field_count >= share->fields); + DBUG_ASSERT(m_alloced_field_count >= share->blob_fields); + + if (choose_engine(thd, table, param)) + goto err; + + if (table->file->set_ha_share_ref(&share->ha_share)) + { + delete table->file; + table->file= 0; + goto err; + } + table->file->set_table(table); + + if (!m_using_unique_constraint) + share->reclength+= m_group_null_items; // null flag is stored separately + + if (share->blob_fields == 0) + { + /* We need to ensure that first byte is not 0 for the delete link */ + if (m_field_count[other]) + m_null_count[other]++; + else + m_null_count[distinct]++; + } + + null_pack_length[other]= (m_null_count[other] + 7 + + m_uneven_bit[other]) / 8; + null_pack_base[other]= 0; + null_pack_length[distinct]= (m_null_count[distinct] + 7 + + m_uneven_bit[distinct]) / 8; + null_pack_base[distinct]= null_pack_length[other]; + whole_null_pack_length= null_pack_length[other] + + null_pack_length[distinct]; + share->reclength+= whole_null_pack_length; + if (!share->reclength) + share->reclength= 1; // Dummy select + share->stored_rec_length= share->reclength; + /* Use packed rows if there is blobs or a lot of space to gain */ + if (share->blob_fields || + (string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && + (share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS || + string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS))) + use_packed_rows= 1; + + { + uint alloc_length= ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1); + share->rec_buff_length= alloc_length; + if (!(table->record[0]= (uchar*) + alloc_root(&table->mem_root, alloc_length*3))) + goto err; + table->record[1]= table->record[0]+alloc_length; + share->default_values= table->record[1]+alloc_length; + } + + setup_tmp_table_column_bitmaps(table, m_bitmaps, table->s->fields); + + recinfo=param->start_recinfo; + null_flags=(uchar*) table->record[0]; + pos=table->record[0]+ whole_null_pack_length; + if (whole_null_pack_length) + { + bzero((uchar*) recinfo,sizeof(*recinfo)); + recinfo->type=FIELD_NORMAL; + recinfo->length= whole_null_pack_length; + recinfo++; + bfill(null_flags, whole_null_pack_length, 255); // Set null fields + + table->null_flags= (uchar*) table->record[0]; + share->null_fields= m_null_count[other] + m_null_count[distinct]; + share->null_bytes= share->null_bytes_for_compare= whole_null_pack_length; + } + + if (share->blob_fields == 0) + { + null_counter[(m_field_count[other] ? other : distinct)]++; + } + + /* Protect against warnings in field_conv() in the next loop*/ + save_abort_on_warning= thd->abort_on_warning; + thd->abort_on_warning= 0; + + for (uint i= 0; i < share->fields; i++, recinfo++) + { + Field *field= table->field[i]; + uint length; + bzero((uchar*) recinfo,sizeof(*recinfo)); + + current_counter= ((field->flags & FIELD_PART_OF_TMP_UNIQUE) ? + distinct : + other); + + if (!(field->flags & NOT_NULL_FLAG)) + { + recinfo->null_bit= (uint8)1 << (null_counter[current_counter] & 7); + recinfo->null_pos= (null_pack_base[current_counter] + + null_counter[current_counter]/8); + field->move_field(pos, null_flags + recinfo->null_pos, recinfo->null_bit); + null_counter[current_counter]++; + } + else + field->move_field(pos,(uchar*) 0,0); + if (field->type() == MYSQL_TYPE_BIT) + { + /* We have to reserve place for extra bits among null bits */ + ((Field_bit*) field)->set_bit_ptr(null_flags + + null_pack_base[current_counter] + + null_counter[current_counter]/8, + null_counter[current_counter] & 7); + null_counter[current_counter]+= (field->field_length & 7); + } + field->reset(); + + /* + Test if there is a default field value. The test for ->ptr is to skip + 'offset' fields generated by initialize_tables + */ + if (m_default_field[i] && m_default_field[i]->ptr) + { + /* + default_field[i] is set only in the cases when 'field' can + inherit the default value that is defined for the field referred + by the Item_field object from which 'field' has been created. + */ + Field *orig_field= m_default_field[i]; + /* Get the value from default_values */ + if (orig_field->is_null_in_record(orig_field->table->s->default_values)) + field->set_null(); + else + { + /* + Copy default value. We have to use field_conv() for copy, instead of + memcpy(), because bit_fields may be stored differently. + But otherwise we copy as is, in particular, ignore NO_ZERO_DATE, etc + */ + Use_relaxed_field_copy urfc(thd); + my_ptrdiff_t ptr_diff= (orig_field->table->s->default_values - + orig_field->table->record[0]); + field->set_notnull(); + orig_field->move_field_offset(ptr_diff); + field_conv(field, orig_field); + orig_field->move_field_offset(-ptr_diff); + } + } + + if (m_from_field[i]) + { /* Not a table Item */ + copy->set(field, m_from_field[i], m_save_sum_fields); + copy++; + } + length=field->pack_length_in_rec(); + pos+= length; + + /* Make entry for create table */ + recinfo->length=length; + recinfo->type= field->tmp_engine_column_type(use_packed_rows); + + // fix table name in field entry + field->set_table_name(&table->alias); + } + /* Handle group_null_items */ + bzero(pos, table->s->reclength - (pos - table->record[0])); + MEM_CHECK_DEFINED(table->record[0], table->s->reclength); + + thd->abort_on_warning= save_abort_on_warning; + param->copy_field_end= copy; + param->recinfo= recinfo; // Pointer to after last field + store_record(table,s->default_values); // Make empty default record + + if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0) // No limit + share->max_rows= ~(ha_rows) 0; + else + share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ? + MY_MIN(thd->variables.tmp_memory_table_size, + thd->variables.max_heap_table_size) : + thd->variables.tmp_disk_table_size) / + share->reclength); + set_if_bigger(share->max_rows,1); // For dummy start options + /* + Push the LIMIT clause to the temporary table creation, so that we + materialize only up to 'rows_limit' records instead of all result records. + */ + set_if_smaller(share->max_rows, m_rows_limit); + param->end_write_records= m_rows_limit; + + keyinfo= param->keyinfo; + + if (m_group) + { + DBUG_PRINT("info",("Creating group key in temporary table")); + table->group= m_group; /* Table is grouped by key */ + param->group_buff= m_group_buff; + share->keys=1; + share->uniques= MY_TEST(m_using_unique_constraint); + table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); + keyinfo->key_part= m_key_part_info; + keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->usable_key_parts=keyinfo->user_defined_key_parts= param->group_parts; + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->key_length=0; + keyinfo->rec_per_key=NULL; + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->is_statistics_from_stat_tables= FALSE; + keyinfo->name= group_key; + ORDER *cur_group= m_group; + for (; cur_group ; cur_group= cur_group->next, m_key_part_info++) + { + Field *field=(*cur_group->item)->get_tmp_table_field(); + DBUG_ASSERT(field->table == table); + bool maybe_null=(*cur_group->item)->maybe_null(); + m_key_part_info->null_bit=0; + m_key_part_info->field= field; + m_key_part_info->fieldnr= field->field_index + 1; + if (cur_group == m_group) + field->key_start.set_bit(0); + m_key_part_info->offset= field->offset(table->record[0]); + m_key_part_info->length= (uint16) field->key_length(); + m_key_part_info->type= (uint8) field->key_type(); + m_key_part_info->key_type = + ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; + m_key_part_info->key_part_flag= 0; + if (!m_using_unique_constraint) + { + cur_group->buff=(char*) m_group_buff; + + if (maybe_null && !field->null_bit) + { + /* + This can only happen in the unusual case where an outer join + table was found to be not-nullable by the optimizer and we + the item can't really be null. + We solve this by marking the item as !maybe_null to ensure + that the key,field and item definition match. + */ + maybe_null= 0; + (*cur_group->item)->base_flags&= ~item_base_t::MAYBE_NULL; + } + + if (!(cur_group->field= field->new_key_field(thd->mem_root,table, + m_group_buff + + MY_TEST(maybe_null), + m_key_part_info->length, + field->null_ptr, + field->null_bit))) + goto err; /* purecov: inspected */ + + if (maybe_null) + { + /* + To be able to group on NULL, we reserved place in group_buff + for the NULL flag just before the column. (see above). + The field data is after this flag. + The NULL flag is updated in 'end_update()' and 'end_write()' + */ + keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL + m_key_part_info->null_bit=field->null_bit; + m_key_part_info->null_offset= (uint) (field->null_ptr - + (uchar*) table->record[0]); + cur_group->buff++; // Pointer to field data + m_group_buff++; // Skipp null flag + } + m_group_buff+= cur_group->field->pack_length(); + } + keyinfo->key_length+= m_key_part_info->length; + } + /* + Ensure we didn't overrun the group buffer. The < is only true when + some maybe_null fields was changed to be not null fields. + */ + DBUG_ASSERT(m_using_unique_constraint || + m_group_buff <= param->group_buff + param->group_length); + } + + if (m_distinct && (share->fields != param->hidden_field_count || + m_with_cycle)) + { + uint i; + Field **reg_field; + /* + Create an unique key or an unique constraint over all columns + that should be in the result. In the temporary table, there are + 'param->hidden_field_count' extra columns, whose null bits are stored + in the first 'hidden_null_pack_length' bytes of the row. + */ + DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count)); + + if (m_blobs_count[distinct]) + { + /* + Special mode for index creation in MyISAM used to support unique + indexes on blobs with arbitrary length. Such indexes cannot be + used for lookups. + */ + share->uniques= 1; + } + keyinfo->user_defined_key_parts= m_field_count[distinct] + + (share->uniques ? MY_TEST(null_pack_length[distinct]) : 0); + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->usable_key_parts= keyinfo->user_defined_key_parts; + table->distinct= 1; + share->keys= 1; + if (!(m_key_part_info= (KEY_PART_INFO*) + alloc_root(&table->mem_root, + keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO)))) + goto err; + bzero((void*) m_key_part_info, keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); + table->key_info= table->s->key_info= keyinfo; + keyinfo->key_part= m_key_part_info; + keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->key_length= 0; // Will compute the sum of the parts below. + keyinfo->name= distinct_key; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->is_statistics_from_stat_tables= FALSE; + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; + + /* + Needed by non-merged semi-joins: SJ-Materialized table must have a valid + rec_per_key array, because it participates in join optimization. Since + the table has no data, the only statistics we can provide is "unknown", + i.e. zero values. + + (For table record count, we calculate and set JOIN_TAB::found_records, + see get_delayed_table_estimates()). + */ + size_t rpk_size= keyinfo->user_defined_key_parts * sizeof(keyinfo->rec_per_key[0]); + if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root, + rpk_size))) + goto err; + bzero(keyinfo->rec_per_key, rpk_size); + + /* + Create an extra field to hold NULL bits so that unique indexes on + blobs can distinguish NULL from 0. This extra field is not needed + when we do not use UNIQUE indexes for blobs. + */ + if (null_pack_length[distinct] && share->uniques) + { + m_key_part_info->null_bit=0; + m_key_part_info->offset= null_pack_base[distinct]; + m_key_part_info->length= null_pack_length[distinct]; + m_key_part_info->field= new Field_string(table->record[0], + (uint32) m_key_part_info->length, + (uchar*) 0, + (uint) 0, + Field::NONE, + &null_clex_str, &my_charset_bin); + if (!m_key_part_info->field) + goto err; + m_key_part_info->field->init(table); + m_key_part_info->key_type=FIELDFLAG_BINARY; + m_key_part_info->type= HA_KEYTYPE_BINARY; + m_key_part_info->fieldnr= m_key_part_info->field->field_index + 1; + m_key_part_info++; + } + /* Create a distinct key over the columns we are going to return */ + for (i= param->hidden_field_count, reg_field= table->field + i ; + i < share->fields; + i++, reg_field++) + { + if (!((*reg_field)->flags & FIELD_PART_OF_TMP_UNIQUE)) + continue; + m_key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (m_key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + m_key_part_info->null_bit= (*reg_field)->null_bit; + m_key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); + + m_key_part_info->offset= (*reg_field)->offset(table->record[0]); + m_key_part_info->length= (uint16) (*reg_field)->pack_length(); + m_key_part_info->fieldnr= (*reg_field)->field_index + 1; + /* TODO: + The below method of computing the key format length of the + key part is a copy/paste from opt_range.cc, and table.cc. + This should be factored out, e.g. as a method of Field. + In addition it is not clear if any of the Field::*_length + methods is supposed to compute the same length. If so, it + might be reused. + */ + m_key_part_info->store_length= m_key_part_info->length; + + if ((*reg_field)->real_maybe_null()) + { + m_key_part_info->store_length+= HA_KEY_NULL_LENGTH; + m_key_part_info->key_part_flag |= HA_NULL_PART; + } + m_key_part_info->key_part_flag|= (*reg_field)->key_part_flag(); + m_key_part_info->store_length+= (*reg_field)->key_part_length_bytes(); + keyinfo->key_length+= m_key_part_info->store_length; + + m_key_part_info->type= (uint8) (*reg_field)->key_type(); + m_key_part_info->key_type = + ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; + + m_key_part_info++; + } + } + + if (unlikely(thd->is_fatal_error)) // If end of memory + goto err; /* purecov: inspected */ + share->db_record_offset= 1; + table->used_for_duplicate_elimination= (param->sum_func_count == 0 && + (table->group || table->distinct)); + table->keep_row_order= keep_row_order; + + if (!do_not_open) + { + if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo, + ¶m->recinfo, m_select_options)) + goto err; + } + + /* record[0] and share->default_values should now have been set up */ + MEM_CHECK_DEFINED(table->record[0], table->s->reclength); + MEM_CHECK_DEFINED(share->default_values, table->s->reclength); + + empty_record(table); + table->status= STATUS_NO_RECORD; + thd->mem_root= mem_root_save; + + DBUG_RETURN(false); + +err: + thd->mem_root= mem_root_save; + DBUG_RETURN(true); /* purecov: inspected */ +} + + +bool Create_tmp_table::add_schema_fields(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param, + const ST_SCHEMA_TABLE &schema_table) +{ + DBUG_ENTER("Create_tmp_table::add_schema_fields"); + DBUG_ASSERT(table); + DBUG_ASSERT(table->field); + DBUG_ASSERT(table->s->blob_field); + DBUG_ASSERT(table->s->reclength == 0); + DBUG_ASSERT(table->s->fields == 0); + DBUG_ASSERT(table->s->blob_fields == 0); + + TABLE_SHARE *share= table->s; + ST_FIELD_INFO *defs= schema_table.fields_info; + uint fieldnr; + MEM_ROOT *mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + for (fieldnr= 0; !defs[fieldnr].end_marker(); fieldnr++) + { + const ST_FIELD_INFO &def= defs[fieldnr]; + Record_addr addr(def.nullable()); + const Type_handler *h= def.type_handler(); + Field *field= h->make_schema_field(&table->mem_root, table, addr, def); + if (!field) + { + thd->mem_root= mem_root_save; + DBUG_RETURN(true); // EOM + } + field->init(table); + field->flags|= NO_DEFAULT_VALUE_FLAG; + add_field(table, field, fieldnr, param->force_not_null_cols); + } + + share->fields= fieldnr; + share->blob_fields= m_blob_count; + table->field[fieldnr]= 0; // End marker + share->blob_field[m_blob_count]= 0; // End marker + param->func_count= 0; + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + thd->mem_root= mem_root_save; + DBUG_RETURN(false); +} + + +void Create_tmp_table::cleanup_on_failure(THD *thd, TABLE *table) +{ + if (table) + free_tmp_table(thd, table); + if (m_temp_pool_slot != MY_BIT_NONE) + temp_pool_clear_bit(m_temp_pool_slot); +} + + +TABLE *create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List &fields, + ORDER *group, bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, + const LEX_CSTRING *table_alias, bool do_not_open, + bool keep_row_order) +{ + TABLE *table; + Create_tmp_table maker(group, distinct, save_sum_fields, select_options, + rows_limit); + if (!(table= maker.start(thd, param, table_alias)) || + maker.add_fields(thd, table, param, fields) || + maker.finalize(thd, table, param, do_not_open, keep_row_order)) + { + maker.cleanup_on_failure(thd, table); + return NULL; + } + return table; +} + + +TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param, + const ST_SCHEMA_TABLE &schema_table, + longlong select_options, + const LEX_CSTRING &table_alias, + bool do_not_open, bool keep_row_order) +{ + TABLE *table; + Create_tmp_table maker((ORDER *) NULL, false, false, + select_options, HA_POS_ERROR); + if (!(table= maker.start(thd, param, &table_alias)) || + maker.add_schema_fields(thd, table, param, schema_table) || + maker.finalize(thd, table, param, do_not_open, keep_row_order)) + { + maker.cleanup_on_failure(thd, table); + return NULL; + } + return table; +} + + +/****************************************************************************/ + +void *Virtual_tmp_table::operator new(size_t size, THD *thd) throw() +{ + return (Virtual_tmp_table *) alloc_root(thd->mem_root, size); +} + + +bool Virtual_tmp_table::init(uint field_count) +{ + uint *blob_field; + uchar *bitmaps; + DBUG_ENTER("Virtual_tmp_table::init"); + if (!multi_alloc_root(in_use->mem_root, + &s, sizeof(*s), + &field, (field_count + 1) * sizeof(Field*), + &blob_field, (field_count + 1) * sizeof(uint), + &bitmaps, bitmap_buffer_size(field_count) * 6, + NullS)) + DBUG_RETURN(true); + s->reset(); + s->blob_field= blob_field; + setup_tmp_table_column_bitmaps(this, bitmaps, field_count); + m_alloced_field_count= field_count; + DBUG_RETURN(false); +}; + + +bool Virtual_tmp_table::add(List &field_list) +{ + /* Create all fields and calculate the total length of record */ + Spvar_definition *cdef; /* column definition */ + List_iterator_fast it(field_list); + DBUG_ENTER("Virtual_tmp_table::add"); + while ((cdef= it++)) + { + Field *tmp; + Record_addr addr(f_maybe_null(cdef->pack_flag)); + if (!(tmp= cdef->make_field(s, in_use->mem_root, &addr, &cdef->field_name))) + DBUG_RETURN(true); + add(tmp); + } + DBUG_RETURN(false); +} + + +void Virtual_tmp_table::setup_field_pointers() +{ + uchar *null_pos= record[0]; + uchar *field_pos= null_pos + s->null_bytes; + uint null_bit= 1; + + for (Field **cur_ptr= field; *cur_ptr; ++cur_ptr) + { + Field *cur_field= *cur_ptr; + if ((cur_field->flags & NOT_NULL_FLAG)) + cur_field->move_field(field_pos); + else + { + cur_field->move_field(field_pos, (uchar*) null_pos, null_bit); + null_bit<<= 1; + if (null_bit == (uint)1 << 8) + { + ++null_pos; + null_bit= 1; + } + } + if (cur_field->type() == MYSQL_TYPE_BIT && + cur_field->key_type() == HA_KEYTYPE_BIT) + { + /* This is a Field_bit since key_type is HA_KEYTYPE_BIT */ + static_cast(cur_field)->set_bit_ptr(null_pos, null_bit); + null_bit+= cur_field->field_length & 7; + if (null_bit > 7) + { + null_pos++; + null_bit-= 8; + } + } + cur_field->reset(); + field_pos+= cur_field->pack_length(); + } +} + + +bool Virtual_tmp_table::open() +{ + // Make sure that we added all the fields we planned to: + DBUG_ASSERT(s->fields == m_alloced_field_count); + field[s->fields]= NULL; // mark the end of the list + s->blob_field[s->blob_fields]= 0; // mark the end of the list + + uint null_pack_length= (s->null_fields + 7) / 8; // NULL-bit array length + s->reclength+= null_pack_length; + s->rec_buff_length= ALIGN_SIZE(s->reclength + 1); + if (!(record[0]= (uchar*) in_use->alloc(s->rec_buff_length))) + return true; + if (null_pack_length) + { + null_flags= (uchar*) record[0]; + s->null_bytes= s->null_bytes_for_compare= null_pack_length; + } + setup_field_pointers(); + return false; +} + + +bool Virtual_tmp_table::sp_find_field_by_name(uint *idx, + const LEX_CSTRING &name) const +{ + Field *f; + for (uint i= 0; (f= field[i]); i++) + { + // Use the same comparison style with sp_context::find_variable() + if (!system_charset_info->strnncoll(f->field_name.str, f->field_name.length, + name.str, name.length)) + { + *idx= i; + return false; + } + } + return true; +} + + +bool +Virtual_tmp_table::sp_find_field_by_name_or_error(uint *idx, + const LEX_CSTRING &var_name, + const LEX_CSTRING &field_name) + const +{ + if (sp_find_field_by_name(idx, field_name)) + { + my_error(ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD, MYF(0), + var_name.str, field_name.str); + return true; + } + return false; +} + + +bool Virtual_tmp_table::sp_set_all_fields_from_item_list(THD *thd, + List &items) +{ + DBUG_ASSERT(s->fields == items.elements); + List_iterator it(items); + Item *item; + for (uint i= 0 ; (item= it++) ; i++) + { + if (field[i]->sp_prepare_and_store_item(thd, &item)) + return true; + } + return false; +} + + +bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value) +{ + DBUG_ASSERT(value->fixed()); + DBUG_ASSERT(value->cols() == s->fields); + for (uint i= 0; i < value->cols(); i++) + { + if (field[i]->sp_prepare_and_store_item(thd, value->addr(i))) + return true; + } + return false; +} + + +bool open_tmp_table(TABLE *table) +{ + int error; + if (unlikely((error= table->file->ha_open(table, table->s->path.str, O_RDWR, + HA_OPEN_TMP_TABLE | + HA_OPEN_INTERNAL_TABLE)))) + { + table->file->print_error(error, MYF(0)); /* purecov: inspected */ + table->db_stat= 0; + return 1; + } + table->db_stat= HA_OPEN_KEYFILE; + (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */ + if (!table->is_created()) + { + table->set_created(); + table->in_use->inc_status_created_tmp_tables(); + } + + return 0; +} + + +#ifdef USE_ARIA_FOR_TMP_TABLES +/* + Create internal (MyISAM or Maria) temporary table + + SYNOPSIS + create_internal_tmp_table() + table Table object that descrimes the table to be created + keyinfo Description of the index (there is always one index) + start_recinfo engine's column descriptions + recinfo INOUT End of engine's column descriptions + options Option bits + + DESCRIPTION + Create an internal emporary table according to passed description. The is + assumed to have one unique index or constraint. + + The passed array or TMP_ENGINE_COLUMNDEF structures must have this form: + + 1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte + when there are many nullable columns) + 2. Table columns + 3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here) + + This function may use the free element to create hash column for unique + constraint. + + RETURN + FALSE - OK + TRUE - Error +*/ + + +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options) +{ + int error; + MARIA_KEYDEF keydef; + MARIA_UNIQUEDEF uniquedef; + TABLE_SHARE *share= table->s; + MARIA_CREATE_INFO create_info; + DBUG_ENTER("create_internal_tmp_table"); + + if (share->keys) + { // Get keys for ni_create + bool using_unique_constraint=0; + HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root, + sizeof(*seg) * keyinfo->user_defined_key_parts); + if (!seg) + goto err; + + bzero(seg, sizeof(*seg) * keyinfo->user_defined_key_parts); + /* + Note that a similar check is performed during + subquery_types_allow_materialization. See MDEV-7122 for more details as + to why. Whenever this changes, it must be updated there as well, for + all tmp_table engines. + */ + if (keyinfo->key_length > table->file->max_key_length() || + keyinfo->user_defined_key_parts > table->file->max_key_parts() || + share->uniques) + { + if (!share->uniques && !(keyinfo->flags & HA_NOSAME)) + { + my_error(ER_INTERNAL_ERROR, MYF(0), + "Using too big key for internal temp tables"); + DBUG_RETURN(1); + } + + /* Can't create a key; Make a unique constraint instead of a key */ + share->keys= 0; + share->uniques= 1; + using_unique_constraint=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + uniquedef.keysegs=keyinfo->user_defined_key_parts; + uniquedef.seg=seg; + uniquedef.null_are_equal=1; + + /* Create extra column for hash value */ + bzero((uchar*) *recinfo,sizeof(**recinfo)); + (*recinfo)->type= FIELD_CHECK; + (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH; + (*recinfo)++; + + /* Avoid warnings from valgrind */ + bzero(table->record[0]+ share->reclength, MARIA_UNIQUE_HASH_LENGTH); + bzero(share->default_values+ share->reclength, MARIA_UNIQUE_HASH_LENGTH); + share->reclength+= MARIA_UNIQUE_HASH_LENGTH; + } + else + { + /* Create a key */ + bzero((char*) &keydef,sizeof(keydef)); + keydef.flag= keyinfo->flags & HA_NOSAME; + keydef.keysegs= keyinfo->user_defined_key_parts; + keydef.seg= seg; + } + for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++) + { + Field *field=keyinfo->key_part[i].field; + seg->flag= 0; + seg->language= field->charset()->number; + seg->length= keyinfo->key_part[i].length; + seg->start= keyinfo->key_part[i].offset; + if (field->flags & BLOB_FLAG) + { + seg->type= + ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? + HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); + seg->bit_start= (uint8)(field->pack_length() - + portable_sizeof_char_ptr); + seg->flag= HA_BLOB_PART; + seg->length=0; // Whole blob in unique constraint + } + else + { + seg->type= keyinfo->key_part[i].type; + /* Tell handler if it can do suffic space compression */ + if (field->real_type() == MYSQL_TYPE_STRING && + keyinfo->key_part[i].length > 32) + seg->flag|= HA_SPACE_PACK; + } + if (!(field->flags & NOT_NULL_FLAG)) + { + seg->null_bit= field->null_bit; + seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]); + /* + We are using a GROUP BY on something that contains NULL + In this case we have to tell Aria that two NULL should + on INSERT be regarded at the same value + */ + if (!using_unique_constraint) + keydef.flag|= HA_NULL_ARE_EQUAL; + } + } + } + bzero((char*) &create_info,sizeof(create_info)); + create_info.data_file_length= table->in_use->variables.tmp_disk_table_size; + + /* + The logic for choosing the record format: + The STATIC_RECORD format is the fastest one, because it's so simple, + so we use this by default for short rows. + BLOCK_RECORD caches both row and data, so this is generally faster than + DYNAMIC_RECORD. The one exception is when we write to tmp table and + want to use keys for duplicate elimination as with BLOCK RECORD + we first write the row, then check for key conflicts and then we have to + delete the row. The cases when this can happen is when there is + a group by and no sum functions or if distinct is used. + */ + { + enum data_file_type file_type= table->no_rows ? NO_RECORD : + (share->reclength < 64 && !share->blob_fields ? STATIC_RECORD : + table->used_for_duplicate_elimination ? DYNAMIC_RECORD : BLOCK_RECORD); + uint create_flags= HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE | + (table->keep_row_order ? HA_PRESERVE_INSERT_ORDER : 0); + + if (file_type != NO_RECORD && encrypt_tmp_disk_tables) + { + /* encryption is only supported for BLOCK_RECORD */ + file_type= BLOCK_RECORD; + if (table->used_for_duplicate_elimination) + { + /* + sql-layer expect the last column to be stored/restored also + when it's null. + + This is probably a bug (that sql-layer doesn't annotate + the column as not-null) but both heap, aria-static, aria-dynamic and + myisam has this property. aria-block_record does not since it + does not store null-columns at all. + Emulate behaviour by making column not-nullable when creating the + table. + */ + uint cols= (uint)(*recinfo-start_recinfo); + start_recinfo[cols-1].null_bit= 0; + } + } + + if (unlikely((error= maria_create(share->path.str, file_type, share->keys, + &keydef, (uint) (*recinfo-start_recinfo), + start_recinfo, share->uniques, &uniquedef, + &create_info, create_flags)))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + table->db_stat=0; + goto err; + } + } + + table->in_use->inc_status_created_tmp_disk_tables(); + table->in_use->inc_status_created_tmp_tables(); + share->db_record_offset= 1; + table->set_created(); + DBUG_RETURN(0); + err: + DBUG_RETURN(1); +} + +#else + +/* + Create internal (MyISAM or Maria) temporary table + + SYNOPSIS + create_internal_tmp_table() + table Table object that descrimes the table to be created + keyinfo Description of the index (there is always one index) + start_recinfo engine's column descriptions + recinfo INOUT End of engine's column descriptions + options Option bits + + DESCRIPTION + Create an internal emporary table according to passed description. The is + assumed to have one unique index or constraint. + + The passed array or TMP_ENGINE_COLUMNDEF structures must have this form: + + 1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte + when there are many nullable columns) + 2. Table columns + 3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here) + + This function may use the free element to create hash column for unique + constraint. + + RETURN + FALSE - OK + TRUE - Error +*/ + +/* Create internal MyISAM temporary table */ + +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options) +{ + int error; + MI_KEYDEF keydef; + MI_UNIQUEDEF uniquedef; + TABLE_SHARE *share= table->s; + DBUG_ENTER("create_internal_tmp_table"); + + if (share->keys) + { // Get keys for ni_create + bool using_unique_constraint=0; + HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root, + sizeof(*seg) * keyinfo->user_defined_key_parts); + if (!seg) + goto err; + + bzero(seg, sizeof(*seg) * keyinfo->user_defined_key_parts); + /* + Note that a similar check is performed during + subquery_types_allow_materialization. See MDEV-7122 for more details as + to why. Whenever this changes, it must be updated there as well, for + all tmp_table engines. + */ + if (keyinfo->key_length > table->file->max_key_length() || + keyinfo->user_defined_key_parts > table->file->max_key_parts() || + share->uniques) + { + /* Can't create a key; Make a unique constraint instead of a key */ + share->keys= 0; + share->uniques= 1; + using_unique_constraint=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + uniquedef.keysegs=keyinfo->user_defined_key_parts; + uniquedef.seg=seg; + uniquedef.null_are_equal=1; + + /* Create extra column for hash value */ + bzero((uchar*) *recinfo,sizeof(**recinfo)); + (*recinfo)->type= FIELD_CHECK; + (*recinfo)->length=MI_UNIQUE_HASH_LENGTH; + (*recinfo)++; + /* Avoid warnings from valgrind */ + bzero(table->record[0]+ share->reclength, MI_UNIQUE_HASH_LENGTH); + bzero(share->default_values+ share->reclength, MI_UNIQUE_HASH_LENGTH); + share->reclength+= MI_UNIQUE_HASH_LENGTH; + } + else + { + /* Create an unique key */ + bzero((char*) &keydef,sizeof(keydef)); + keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY | + HA_PACK_KEY); + keydef.keysegs= keyinfo->user_defined_key_parts; + keydef.seg= seg; + } + for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++) + { + Field *field=keyinfo->key_part[i].field; + seg->flag= 0; + seg->language= field->charset()->number; + seg->length= keyinfo->key_part[i].length; + seg->start= keyinfo->key_part[i].offset; + if (field->flags & BLOB_FLAG) + { + seg->type= + ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? + HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); + seg->bit_start= (uint8)(field->pack_length() - portable_sizeof_char_ptr); + seg->flag= HA_BLOB_PART; + seg->length=0; // Whole blob in unique constraint + } + else + { + seg->type= keyinfo->key_part[i].type; + /* Tell handler if it can do suffic space compression */ + if (field->real_type() == MYSQL_TYPE_STRING && + keyinfo->key_part[i].length > 4) + seg->flag|= HA_SPACE_PACK; + } + if (!(field->flags & NOT_NULL_FLAG)) + { + seg->null_bit= field->null_bit; + seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]); + /* + We are using a GROUP BY on something that contains NULL + In this case we have to tell MyISAM that two NULL should + on INSERT be regarded at the same value + */ + if (!using_unique_constraint) + keydef.flag|= HA_NULL_ARE_EQUAL; + } + } + } + MI_CREATE_INFO create_info; + bzero((char*) &create_info,sizeof(create_info)); + create_info.data_file_length= table->in_use->variables.tmp_disk_table_size; + + if (unlikely((error= mi_create(share->path.str, share->keys, &keydef, + (uint) (*recinfo-start_recinfo), + start_recinfo, + share->uniques, &uniquedef, + &create_info, + HA_CREATE_TMP_TABLE | + HA_CREATE_INTERNAL_TABLE | + ((share->db_create_options & + HA_OPTION_PACK_RECORD) ? + HA_PACK_RECORD : 0) + )))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + table->db_stat=0; + goto err; + } + table->in_use->inc_status_created_tmp_disk_tables(); + table->in_use->inc_status_created_tmp_tables(); + share->db_record_offset= 1; + table->set_created(); + DBUG_RETURN(0); + err: + DBUG_RETURN(1); +} + +#endif /* USE_ARIA_FOR_TMP_TABLES */ + + +/* + If a HEAP table gets full, create a internal table in MyISAM or Maria + and copy all rows to this +*/ + + +bool +create_internal_tmp_table_from_heap(THD *thd, TABLE *table, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + int error, + bool ignore_last_dupp_key_error, + bool *is_duplicate) +{ + TABLE new_table; + TABLE_SHARE share; + const char *save_proc_info; + int write_err= 0; + DBUG_ENTER("create_internal_tmp_table_from_heap"); + if (is_duplicate) + *is_duplicate= FALSE; + + if (table->s->db_type() != heap_hton || error != HA_ERR_RECORD_FILE_FULL) + { + /* + We don't want this error to be converted to a warning, e.g. in case of + INSERT IGNORE ... SELECT. + */ + table->file->print_error(error, MYF(ME_FATAL)); + DBUG_RETURN(1); + } + new_table= *table; + share= *table->s; + new_table.s= &share; + new_table.s->db_plugin= ha_lock_engine(thd, TMP_ENGINE_HTON); + if (unlikely(!(new_table.file= get_new_handler(&share, &new_table.mem_root, + TMP_ENGINE_HTON)))) + DBUG_RETURN(1); // End of memory + + if (unlikely(new_table.file->set_ha_share_ref(&share.ha_share))) + { + delete new_table.file; + DBUG_RETURN(1); + } + + save_proc_info=thd->proc_info; + THD_STAGE_INFO(thd, stage_converting_heap_to_myisam); + + new_table.no_rows= table->no_rows; + if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo, + recinfo, + thd->lex->first_select_lex()->options | + thd->variables.option_bits)) + goto err2; + if (open_tmp_table(&new_table)) + goto err1; + if (table->file->indexes_are_disabled()) + new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL); + table->file->ha_index_or_rnd_end(); + if (table->file->ha_rnd_init_with_error(1)) + DBUG_RETURN(1); + if (new_table.no_rows) + new_table.file->extra(HA_EXTRA_NO_ROWS); + else + { + /* update table->file->stats.records */ + table->file->info(HA_STATUS_VARIABLE); + new_table.file->ha_start_bulk_insert(table->file->stats.records); + } + + /* + copy all old rows from heap table to MyISAM table + This is the only code that uses record[1] to read/write but this + is safe as this is a temporary MyISAM table without timestamp/autoincrement + or partitioning. + */ + while (!table->file->ha_rnd_next(new_table.record[1])) + { + write_err= new_table.file->ha_write_tmp_row(new_table.record[1]); + DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;); + if (write_err) + goto err; + if (unlikely(thd->check_killed())) + goto err_killed; + } + if (!new_table.no_rows && new_table.file->ha_end_bulk_insert()) + goto err; + /* copy row that filled HEAP table */ + if (unlikely((write_err=new_table.file->ha_write_tmp_row(table->record[0])))) + { + if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) || + !ignore_last_dupp_key_error) + goto err; + if (is_duplicate) + *is_duplicate= TRUE; + } + else + { + if (is_duplicate) + *is_duplicate= FALSE; + } + + /* remove heap table and change to use myisam table */ + (void) table->file->ha_rnd_end(); + (void) table->file->ha_close(); // This deletes the table ! + delete table->file; + table->file=0; + plugin_unlock(0, table->s->db_plugin); + share.db_plugin= my_plugin_lock(0, share.db_plugin); + new_table.s= table->s; // Keep old share + *table= new_table; + *table->s= share; + + table->file->change_table_ptr(table, table->s); + table->use_all_columns(); + if (save_proc_info) + thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ? + "Copying to tmp table on disk" : save_proc_info)); + DBUG_RETURN(0); + + err: + DBUG_PRINT("error",("Got error: %d",write_err)); + table->file->print_error(write_err, MYF(0)); +err_killed: + (void) table->file->ha_rnd_end(); + (void) new_table.file->ha_close(); + err1: + TMP_ENGINE_HTON->drop_table(TMP_ENGINE_HTON, new_table.s->path.str); + err2: + delete new_table.file; + thd_proc_info(thd, save_proc_info); + table->mem_root= new_table.mem_root; + DBUG_RETURN(1); +} + + +void +free_tmp_table(THD *thd, TABLE *entry) +{ + MEM_ROOT own_root= entry->mem_root; + const char *save_proc_info; + DBUG_ENTER("free_tmp_table"); + DBUG_PRINT("enter",("table: %s alias: %s",entry->s->table_name.str, + entry->alias.c_ptr())); + + save_proc_info=thd->proc_info; + THD_STAGE_INFO(thd, stage_removing_tmp_table); + + if (entry->file && entry->is_created()) + { + if (entry->db_stat) + { + /* The table was properly opened in open_tmp_table() */ + entry->file->ha_index_or_rnd_end(); + entry->file->info(HA_STATUS_VARIABLE); + thd->tmp_tables_size+= (entry->file->stats.data_file_length + + entry->file->stats.index_file_length); + } + entry->file->ha_drop_table(entry->s->path.str); + delete entry->file; + entry->file= NULL; + entry->reset_created(); + } + + /* free blobs */ + for (Field **ptr=entry->field ; *ptr ; ptr++) + (*ptr)->free(); + + if (entry->temp_pool_slot != MY_BIT_NONE) + temp_pool_clear_bit(entry->temp_pool_slot); + + plugin_unlock(0, entry->s->db_plugin); + entry->alias.free(); + + if (entry->pos_in_table_list && entry->pos_in_table_list->table) + { + DBUG_ASSERT(entry->pos_in_table_list->table == entry); + entry->pos_in_table_list->table= NULL; + } + + free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ + thd_proc_info(thd, save_proc_info); + + DBUG_VOID_RETURN; +} + + +/** + @brief + Set write_func of AGGR_OP object + + @param join_tab JOIN_TAB of the corresponding tmp table + + @details + Function sets up write_func according to how AGGR_OP object that + is attached to the given join_tab will be used in the query. +*/ + +void set_postjoin_aggr_write_func(JOIN_TAB *tab) +{ + JOIN *join= tab->join; + TABLE *table= tab->table; + AGGR_OP *aggr= tab->aggr; + TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param; + + DBUG_ASSERT(table && aggr); + + if (table->group && tmp_tbl->sum_func_count && + !tmp_tbl->precomputed_group_by) + { + /* + Note for MyISAM tmp tables: if uniques is true keys won't be + created. + */ + if (table->s->keys && !table->s->uniques) + { + DBUG_PRINT("info",("Using end_update")); + aggr->set_write_func(end_update); + } + else + { + DBUG_PRINT("info",("Using end_unique_update")); + aggr->set_write_func(end_unique_update); + } + } + else if (join->sort_and_group && !tmp_tbl->precomputed_group_by && + !join->sort_and_group_aggr_tab && join->tables_list && + join->top_join_tab_count) + { + DBUG_PRINT("info",("Using end_write_group")); + aggr->set_write_func(end_write_group); + join->sort_and_group_aggr_tab= tab; + } + else + { + DBUG_PRINT("info",("Using end_write")); + aggr->set_write_func(end_write); + if (tmp_tbl->precomputed_group_by) + { + /* + A preceding call to create_tmp_table in the case when loose + index scan is used guarantees that + TMP_TABLE_PARAM::items_to_copy has enough space for the group + by functions. It is OK here to use memcpy since we copy + Item_sum pointers into an array of Item pointers. + */ + memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count, + join->sum_funcs, + sizeof(Item*)*tmp_tbl->sum_func_count); + tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0; + } + } +} + + +/** + @details + Rows produced by a join sweep may end up in a temporary table or be sent + to a client. Set the function of the nested loop join algorithm which + handles final fully constructed and matched records. + + @param join join to setup the function for. + + @return + end_select function to use. This function can't fail. +*/ + +Next_select_func setup_end_select_func(JOIN *join) +{ + TMP_TABLE_PARAM *tmp_tbl= &join->tmp_table_param; + + /* + Choose method for presenting result to user. Use end_send_group + if the query requires grouping (has a GROUP BY clause and/or one or + more aggregate functions). Use end_send if the query should not + be grouped. + */ + if (join->sort_and_group && !tmp_tbl->precomputed_group_by) + { + DBUG_PRINT("info",("Using end_send_group")); + return end_send_group; + } + DBUG_PRINT("info",("Using end_send")); + return end_send; +} + + +/** + Make a join of all tables and write it on socket or to table. + + @retval + 0 if ok + @retval + 1 if error is sent + @retval + -1 if error should be sent +*/ + +static int +do_select(JOIN *join, Procedure *procedure) +{ + int rc= 0; + enum_nested_loop_state error= NESTED_LOOP_OK; + DBUG_ENTER("do_select"); + + if (join->pushdown_query) + { + /* Select fields are in the temporary table */ + join->fields= &join->tmp_fields_list1; + /* Setup HAVING to work with fields in temporary table */ + join->set_items_ref_array(join->items1); + /* The storage engine will take care of the group by query result */ + int res= join->pushdown_query->execute(join); + + if (res) + DBUG_RETURN(res); + + if (join->pushdown_query->store_data_in_temp_table) + { + JOIN_TAB *last_tab= join->join_tab + join->exec_join_tab_cnt(); + last_tab->next_select= end_send; + + enum_nested_loop_state state= last_tab->aggr->end_send(); + if (state >= NESTED_LOOP_OK) + state= sub_select(join, last_tab, true); + + if (state < NESTED_LOOP_OK) + res= 1; + + if (join->result->send_eof()) + res= 1; + } + DBUG_RETURN(res); + } + + join->procedure= procedure; + join->duplicate_rows= join->send_records=0; + if (join->only_const_tables() && !join->need_tmp) + { + Next_select_func end_select= setup_end_select_func(join); + + /* + HAVING will be checked after processing aggregate functions, + But WHERE should checked here (we alredy have read tables). + Notice that make_join_select() splits all conditions in this case + into two groups exec_const_cond and outer_ref_cond. + If join->table_count == join->const_tables then it is + sufficient to check only the condition pseudo_bits_cond. + */ + DBUG_ASSERT(join->outer_ref_cond == NULL); + if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int()) + { + // HAVING will be checked by end_select + error= (*end_select)(join, 0, 0); + if (error >= NESTED_LOOP_OK) + error= (*end_select)(join, 0, 1); + + /* + If we don't go through evaluate_join_record(), do the counting + here. join->send_records is increased on success in end_send(), + so we don't touch it here. + */ + join->join_examined_rows++; + DBUG_ASSERT(join->join_examined_rows <= 1); + } + else if (join->send_row_on_empty_set()) + { + table_map cleared_tables= (table_map) 0; + if (end_select == end_send_group) + { + /* + Was a grouping query but we did not find any rows. In this case + we clear all tables to get null in any referenced fields, + like in case of: + SELECT MAX(a) AS f1, a AS f2 FROM t1 WHERE VALUE(a) IS NOT NULL + */ + clear_tables(join, &cleared_tables); + } + if (!join->having || join->having->val_int()) + { + List *columns_list= (procedure ? &join->procedure_fields_list : + join->fields); + rc= join->result->send_data_with_check(*columns_list, + join->unit, 0) > 0; + } + /* + We have to remove the null markings from the tables as this table + may be part of a sub query that is re-evaluated + */ + if (cleared_tables) + unclear_tables(join, &cleared_tables); + } + /* + An error can happen when evaluating the conds + (the join condition and piece of where clause + relevant to this join table). + */ + if (unlikely(join->thd->is_error())) + error= NESTED_LOOP_ERROR; + } + else + { + DBUG_EXECUTE_IF("show_explain_probe_do_select", + if (dbug_user_var_equals_int(join->thd, + "show_explain_probe_select_id", + join->select_lex->select_number)) + dbug_serve_apcs(join->thd, 1); + ); + + JOIN_TAB *join_tab= join->join_tab + + (join->tables_list ? join->const_tables : 0); + if (join->outer_ref_cond && !join->outer_ref_cond->val_int()) + error= NESTED_LOOP_NO_MORE_ROWS; + else + error= join->first_select(join,join_tab,0); + if (error >= NESTED_LOOP_OK && likely(join->thd->killed != ABORT_QUERY)) + error= join->first_select(join,join_tab,1); + } + + join->thd->limit_found_rows= join->send_records - join->duplicate_rows; + + if (error == NESTED_LOOP_NO_MORE_ROWS || + unlikely(join->thd->killed == ABORT_QUERY)) + error= NESTED_LOOP_OK; + + /* + For "order by with limit", we cannot rely on send_records, but need + to use the rowcount read originally into the join_tab applying the + filesort. There cannot be any post-filtering conditions, nor any + following join_tabs in this case, so this rowcount properly represents + the correct number of qualifying rows. + */ + if (join->order) + { + // Save # of found records prior to cleanup + JOIN_TAB *sort_tab; + JOIN_TAB *join_tab= join->join_tab; + uint const_tables= join->const_tables; + + // Take record count from first non constant table or from last tmp table + if (join->aggr_tables > 0) + sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1; + else + { + DBUG_ASSERT(!join->only_const_tables()); + sort_tab= join_tab + const_tables; + } + if (sort_tab->filesort && + join->select_options & OPTION_FOUND_ROWS && + sort_tab->filesort->sortorder && + sort_tab->filesort->limit != HA_POS_ERROR) + { + join->thd->limit_found_rows= sort_tab->records; + } + } + + { + /* + The following will unlock all cursors if the command wasn't an + update command + */ + join->join_free(); // Unlock all cursors + } + if (error == NESTED_LOOP_OK) + { + /* + Sic: this branch works even if rc != 0, e.g. when + send_data above returns an error. + */ + if (unlikely(join->result->send_eof())) + rc= 1; // Don't send error + DBUG_PRINT("info",("%ld records output", (long) join->send_records)); + } + else + rc= -1; +#ifndef DBUG_OFF + if (rc) + { + DBUG_PRINT("error",("Error: do_select() failed")); + } +#endif + rc= join->thd->is_error() ? -1 : rc; + DBUG_RETURN(rc); +} + + +/** + @brief + Instantiates temporary table + + @param table Table object that describes the table to be + instantiated + @param keyinfo Description of the index (there is always one index) + @param start_recinfo Column descriptions + @param recinfo INOUT End of column descriptions + @param options Option bits + + @details + Creates tmp table and opens it. + + @return + FALSE - OK + TRUE - Error +*/ + +bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options) +{ + if (table->s->db_type() == TMP_ENGINE_HTON) + { + /* + If it is not heap (in-memory) table then convert index to unique + constrain. + */ + MEM_CHECK_DEFINED(table->record[0], table->s->reclength); + if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo, + options)) + return TRUE; + // Make empty record so random data is not written to disk + empty_record(table); + table->status= STATUS_NO_RECORD; + } + if (open_tmp_table(table)) + return TRUE; + + return FALSE; +} + + +/** + @brief + Accumulate rows of the result of an aggregation operation in a tmp table + + @param join pointer to the structure providing all context info for the query + @param join_tab the JOIN_TAB object to which the operation is attached + @param end_records TRUE <=> all records were accumulated, send them further + + @details + This function accumulates records of the aggreagation operation for + the node join_tab from the execution plan in a tmp table. To add a new + record the function calls join_tab->aggr->put_records. + When there is no more records to save, in this + case the end_of_records argument == true, function tells the operation to + send records further by calling aggr->send_records(). + When all records are sent this function passes 'end_of_records' signal + further by calling sub_select() with end_of_records argument set to + true. After that aggr->end_send() is called to tell the operation that + it could end internal buffer scan. + + @note + This function is not expected to be called when dynamic range scan is + used to scan join_tab because range scans aren't used for tmp tables. + + @return + return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + enum_nested_loop_state rc; + AGGR_OP *aggr= join_tab->aggr; + + /* This function cannot be called if join_tab has no associated aggregation */ + DBUG_ASSERT(aggr != NULL); + + DBUG_ENTER("sub_select_aggr_tab"); + + if (join->thd->killed) + { + /* The user has aborted the execution of the query */ + join->thd->send_kill_message(); + DBUG_RETURN(NESTED_LOOP_KILLED); + } + + if (end_of_records) + { + rc= aggr->end_send(); + if (rc >= NESTED_LOOP_OK) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); + } + + rc= aggr->put_record(); + + DBUG_RETURN(rc); +} + + +/* + Fill the join buffer with partial records, retrieve all full matches for + them + + SYNOPSIS + sub_select_cache() + join pointer to the structure providing all context info for the + query + join_tab the first next table of the execution plan to be retrieved + end_records true when we need to perform final steps of the retrieval + + DESCRIPTION + For a given table Ti= join_tab from the sequence of tables of the chosen + execution plan T1,...,Ti,...,Tn the function just put the partial record + t1,...,t[i-1] into the join buffer associated with table Ti unless this + is the last record added into the buffer. In this case, the function + additionally finds all matching full records for all partial + records accumulated in the buffer, after which it cleans the buffer up. + If a partial join record t1,...,ti is extended utilizing a dynamic + range scan then it is not put into the join buffer. Rather all matching + records are found for it at once by the function sub_select. + + NOTES + The function implements the algorithmic schema for both Blocked Nested + Loop Join and Batched Key Access Join. The difference can be seen only at + the level of of the implementation of the put_record and join_records + virtual methods for the cache object associated with the join_tab. + The put_record method accumulates records in the cache, while the + join_records method builds all matching join records and send them into + the output stream. + + RETURN + return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. +*/ + +enum_nested_loop_state +sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + enum_nested_loop_state rc; + JOIN_CACHE *cache= join_tab->cache; + int err; + DBUG_ENTER("sub_select_cache"); + + /* + This function cannot be called if join_tab has no associated join + buffer + */ + DBUG_ASSERT(cache != NULL); + + join_tab->cache->reset_join(join); + + if (end_of_records) + { + rc= cache->join_records(FALSE); + if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS || + rc == NESTED_LOOP_QUERY_LIMIT) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); + } + if (unlikely(join->thd->check_killed())) + { + /* The user has aborted the execution of the query */ + DBUG_RETURN(NESTED_LOOP_KILLED); + } + join_tab->jbuf_loops_tracker->on_scan_init(); + + if (!(err= test_if_use_dynamic_range_scan(join_tab))) + { + if (!cache->put_record()) + DBUG_RETURN(NESTED_LOOP_OK); + /* + We has decided that after the record we've just put into the buffer + won't add any more records. Now try to find all the matching + extensions for all records in the buffer. + */ + rc= cache->join_records(FALSE); + DBUG_RETURN(rc); + } + + if (err < 0) + DBUG_RETURN(NESTED_LOOP_ERROR); + + /* + TODO: Check whether we really need the call below and we can't do + without it. If it's not the case remove it. + */ + rc= cache->join_records(TRUE); + if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS || + rc == NESTED_LOOP_QUERY_LIMIT) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); +} + +/** + Retrieve records ends with a given beginning from the result of a join. + + For a given partial join record consisting of records from the tables + preceding the table join_tab in the execution plan, the function + retrieves all matching full records from the result set and + send them to the result set stream. + + @note + The function effectively implements the final (n-k) nested loops + of nested loops join algorithm, where k is the ordinal number of + the join_tab table and n is the total number of tables in the join query. + It performs nested loops joins with all conjunctive predicates from + the where condition pushed as low to the tables as possible. + E.g. for the query + @code + SELECT * FROM t1,t2,t3 + WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9 + @endcode + the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1, + given the selected plan prescribes to nest retrievals of the + joined tables in the following order: t1,t2,t3. + A pushed down predicate are attached to the table which it pushed to, + at the field join_tab->select_cond. + When executing a nested loop of level k the function runs through + the rows of 'join_tab' and for each row checks the pushed condition + attached to the table. + If it is false the function moves to the next row of the + table. If the condition is true the function recursively executes (n-k-1) + remaining embedded nested loops. + The situation becomes more complicated if outer joins are involved in + the execution plan. In this case the pushed down predicates can be + checked only at certain conditions. + Suppose for the query + @code + SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a + WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL) + @endcode + the optimizer has chosen a plan with the table order t1,t2,t3. + The predicate P1=t1>2 will be pushed down to the table t1, while the + predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table + t2. But the second predicate can not be unconditionally tested right + after a row from t2 has been read. This can be done only after the + first row with t3.a=t1.a has been encountered. + Thus, the second predicate P2 is supplied with a guarded value that are + stored in the field 'found' of the first inner table for the outer join + (table t2). When the first row with t3.a=t1.a for the current row + of table t1 appears, the value becomes true. For now on the predicate + is evaluated immediately after the row of table t2 has been read. + When the first row with t3.a=t1.a has been encountered all + conditions attached to the inner tables t2,t3 must be evaluated. + Only when all of them are true the row is sent to the output stream. + If not, the function returns to the lowest nest level that has a false + attached condition. + The predicates from on expressions are also pushed down. If in the + the above example the on expression were (t3.a=t1.a AND t2.a=t1.a), + then t1.a=t2.a would be pushed down to table t2, and without any + guard. + If after the run through all rows of table t2, the first inner table + for the outer join operation, it turns out that no matches are + found for the current row of t1, then current row from table t1 + is complemented by nulls for t2 and t3. Then the pushed down predicates + are checked for the composed row almost in the same way as it had + been done for the first row with a match. The only difference is + the predicates from on expressions are not checked. + + @par + @b IMPLEMENTATION + @par + The function forms output rows for a current partial join of k + tables tables recursively. + For each partial join record ending with a certain row from + join_tab it calls sub_select that builds all possible matching + tails from the result set. + To be able check predicates conditionally items of the class + Item_func_trig_cond are employed. + An object of this class is constructed from an item of class COND + and a pointer to a guarding boolean variable. + When the value of the guard variable is true the value of the object + is the same as the value of the predicate, otherwise it's just returns + true. + To carry out a return to a nested loop level of join table t the pointer + to t is remembered in the field 'return_rtab' of the join structure. + Consider the following query: + @code + SELECT * FROM t1, + LEFT JOIN + (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a) + ON t4.a=t2.a + WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL) + @endcode + Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5 + and suppose for a given joined rows from tables t1,t2,t3 there are + no rows in the result set yet. + When first row from t5 that satisfies the on condition + t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL + becomes 'activated', as well the predicate t4.a=t2.a. But + the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until + t4.a=t2.a becomes true. + In order not to re-evaluate the predicates that were already evaluated + as attached pushed down predicates, a pointer to the the first + most inner unmatched table is maintained in join_tab->first_unmatched. + Thus, when the first row from t5 with t5.a=t3.a is found + this pointer for t5 is changed from t4 to t2. + + @par + @b STRUCTURE @b NOTES + @par + join_tab->first_unmatched points always backwards to the first inner + table of the embedding nested join, if any. + + @param join pointer to the structure providing all context info for + the query + @param join_tab the first next table of the execution plan to be retrieved + @param end_records true when we need to perform final steps of retrival + + @return + return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. +*/ + +enum_nested_loop_state +sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) +{ + DBUG_ENTER("sub_select"); + + if (join_tab->split_derived_to_update && !end_of_records) + { + table_map tab_map= join_tab->split_derived_to_update; + for (uint i= 0; tab_map; i++, tab_map>>= 1) + { + if (tab_map & 1) + join->map2table[i]->preread_init_done= false; + } + } + + /* Restore state if mark_as_null_row() have been called */ + if (join_tab->last_inner) + { + JOIN_TAB *last_inner_tab= join_tab->last_inner; + for (JOIN_TAB *jt= join_tab; jt <= last_inner_tab; jt++) + jt->table->null_row= 0; + } + else + join_tab->table->null_row=0; + + if (end_of_records) + { + enum_nested_loop_state nls= + (*join_tab->next_select)(join,join_tab+1,end_of_records); + DBUG_RETURN(nls); + } + join_tab->tracker->r_scans++; + + int error; + enum_nested_loop_state rc= NESTED_LOOP_OK; + READ_RECORD *info= &join_tab->read_record; + + + for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table; + flush_dups_table; + flush_dups_table= flush_dups_table->next_flush_table) + { + flush_dups_table->sj_weedout_delete_rows(); + } + + if (!join_tab->preread_init_done && join_tab->preread_init()) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (join_tab->build_range_rowid_filter_if_needed()) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (join_tab->rowid_filter && join_tab->rowid_filter->is_empty()) + rc= NESTED_LOOP_NO_MORE_ROWS; + + join->return_tab= join_tab; + + if (join_tab->last_inner) + { + /* join_tab is the first inner table for an outer join operation. */ + + /* Set initial state of guard variables for this table.*/ + join_tab->found=0; + join_tab->not_null_compl= 1; + + /* Set first_unmatched for the last inner table of this group */ + join_tab->last_inner->first_unmatched= join_tab; + if (join_tab->on_precond && !join_tab->on_precond->val_int()) + rc= NESTED_LOOP_NO_MORE_ROWS; + } + join->thd->get_stmt_da()->reset_current_row_for_warning(1); + + if (rc != NESTED_LOOP_NO_MORE_ROWS && + (rc= join_tab_execution_startup(join_tab)) < 0) + DBUG_RETURN(rc); + + if (join_tab->loosescan_match_tab) + join_tab->loosescan_match_tab->found_match= FALSE; + + const bool pfs_batch_update= join_tab->pfs_batch_update(join); + if (pfs_batch_update) + join_tab->table->file->start_psi_batch_mode(); + + if (rc != NESTED_LOOP_NO_MORE_ROWS) + { + error= (*join_tab->read_first_record)(join_tab); + if (!error && join_tab->keep_current_rowid) + join_tab->table->file->position(join_tab->table->record[0]); + rc= evaluate_join_record(join, join_tab, error); + } + + /* + Note: psergey has added the 2nd part of the following condition; the + change should probably be made in 5.1, too. + */ + bool skip_over= FALSE; + while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab) + { + if (join_tab->loosescan_match_tab && + join_tab->loosescan_match_tab->found_match) + { + KEY *key= join_tab->table->key_info + join_tab->loosescan_key; + key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key, + join_tab->loosescan_key_len); + skip_over= TRUE; + } + + error= info->read_record(); + + if (skip_over && likely(!error)) + { + if (!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part, + join_tab->loosescan_buf, join_tab->loosescan_key_len)) + { + /* + This is the LooseScan action: skip over records with the same key + value if we already had a match for them. + */ + continue; + } + join_tab->loosescan_match_tab->found_match= FALSE; + skip_over= FALSE; + } + + if (join_tab->keep_current_rowid && likely(!error)) + join_tab->table->file->position(join_tab->table->record[0]); + + rc= evaluate_join_record(join, join_tab, error); + } + + if (rc == NESTED_LOOP_NO_MORE_ROWS && + join_tab->last_inner && !join_tab->found) + rc= evaluate_null_complemented_join_record(join, join_tab); + + if (pfs_batch_update) + join_tab->table->file->end_psi_batch_mode(); + + if (rc == NESTED_LOOP_NO_MORE_ROWS) + rc= NESTED_LOOP_OK; + DBUG_RETURN(rc); +} + +/** + @brief Process one row of the nested loop join. + + This function will evaluate parts of WHERE/ON clauses that are + applicable to the partial row on hand and in case of success + submit this row to the next level of the nested loop. + + @param join - The join object + @param join_tab - The most inner join_tab being processed + @param error > 0: Error, terminate processing + = 0: (Partial) row is available + < 0: No more rows available at this level + @return Nested loop state (Ok, No_more_rows, Error, Killed) +*/ + +static enum_nested_loop_state +evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, + int error) +{ + bool shortcut_for_distinct= join_tab->shortcut_for_distinct; + ha_rows found_records=join->found_records; + COND *select_cond= join_tab->select_cond; + bool select_cond_result= TRUE; + + DBUG_ENTER("evaluate_join_record"); + DBUG_PRINT("enter", + ("evaluate_join_record join: %p join_tab: %p " + "cond: %p abort: %d alias %s", + join, join_tab, select_cond, error, + join_tab->table->alias.ptr())); + + if (error > 0 || unlikely(join->thd->is_error())) // Fatal error + DBUG_RETURN(NESTED_LOOP_ERROR); + if (error < 0) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + if (unlikely(join->thd->check_killed())) // Aborted by user + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + + join_tab->tracker->r_rows++; + + if (select_cond) + { + select_cond_result= MY_TEST(select_cond->val_int()); + + /* check for errors evaluating the condition */ + if (unlikely(join->thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + + if (!select_cond || select_cond_result) + { + /* + There is no select condition or the attached pushed down + condition is true => a match is found. + */ + join_tab->tracker->r_rows_after_where++; + + bool found= 1; + while (join_tab->first_unmatched && found) + { + /* + The while condition is always false if join_tab is not + the last inner join table of an outer join operation. + */ + JOIN_TAB *first_unmatched= join_tab->first_unmatched; + /* + Mark that a match for current outer table is found. + This activates push down conditional predicates attached + to the all inner tables of the outer join. + */ + first_unmatched->found= 1; + for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++) + { + /* + Check whether 'not exists' optimization can be used here. + If tab->table->reginfo.not_exists_optimize is set to true + then WHERE contains a conjunctive predicate IS NULL over + a non-nullable field of tab. When activated this predicate + will filter out all records with matches for the left part + of the outer join whose inner tables start from the + first_unmatched table and include table tab. To safely use + 'not exists' optimization we have to check that the + IS NULL predicate is really activated, i.e. all guards + that wrap it are in the 'open' state. + */ + bool not_exists_opt_is_applicable= + tab->table->reginfo.not_exists_optimize; + for (JOIN_TAB *first_upper= first_unmatched->first_upper; + not_exists_opt_is_applicable && first_upper; + first_upper= first_upper->first_upper) + { + if (!first_upper->found) + not_exists_opt_is_applicable= false; + } + /* Check all predicates that has just been activated. */ + /* + Actually all predicates non-guarded by first_unmatched->found + will be re-evaluated again. It could be fixed, but, probably, + it's not worth doing now. + */ + if (tab->select_cond) + { + const longlong res= tab->select_cond->val_int(); + if (join->thd->is_error()) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (!res) + { + /* The condition attached to table tab is false */ + if (tab == join_tab) + { + found= 0; + if (not_exists_opt_is_applicable) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + } + else + { + /* + Set a return point if rejected predicate is attached + not to the last table of the current nest level. + */ + join->return_tab= tab; + if (not_exists_opt_is_applicable) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + else + DBUG_RETURN(NESTED_LOOP_OK); + } + } + } + } + /* + Check whether join_tab is not the last inner table + for another embedding outer join. + */ + if ((first_unmatched= first_unmatched->first_upper) && + first_unmatched->last_inner != join_tab) + first_unmatched= 0; + join_tab->first_unmatched= first_unmatched; + } + + JOIN_TAB *return_tab= join->return_tab; + join_tab->found_match= TRUE; + + if (join_tab->check_weed_out_table && found) + { + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); + DBUG_PRINT("info", ("weedout_check: %d", res)); + if (res == -1) + DBUG_RETURN(NESTED_LOOP_ERROR); + else if (res == 1) + found= FALSE; + } + else if (join_tab->do_firstmatch) + { + /* + We should return to the join_tab->do_firstmatch after we have + enumerated all the suffixes for current prefix row combination + */ + return_tab= join_tab->do_firstmatch; + } + + /* + It was not just a return to lower loop level when one + of the newly activated predicates is evaluated as false + (See above join->return_tab= tab). + */ + join->join_examined_rows++; + DBUG_PRINT("counts", ("join->examined_rows++: %lu found: %d", + (ulong) join->join_examined_rows, (int) found)); + + if (found) + { + enum enum_nested_loop_state rc; + /* A match from join_tab is found for the current partial join. */ + rc= (*join_tab->next_select)(join, join_tab+1, 0); + join->thd->get_stmt_da()->inc_current_row_for_warning(); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + DBUG_RETURN(rc); + if (return_tab < join->return_tab) + join->return_tab= return_tab; + + /* check for errors evaluating the condition */ + if (unlikely(join->thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (join->return_tab < join_tab) + DBUG_RETURN(NESTED_LOOP_OK); + /* + Test if this was a SELECT DISTINCT query on a table that + was not in the field list; In this case we can abort if + we found a row, as no new rows can be added to the result. + */ + if (shortcut_for_distinct && found_records != join->found_records) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + + DBUG_RETURN(NESTED_LOOP_OK); + } + } + else + { + /* + The condition pushed down to the table join_tab rejects all rows + with the beginning coinciding with the current partial join. + */ + join->join_examined_rows++; + } + + join->thd->get_stmt_da()->inc_current_row_for_warning(); + join_tab->read_record.unlock_row(join_tab); + + DBUG_RETURN(NESTED_LOOP_OK); +} + +/** + + @details + Construct a NULL complimented partial join record and feed it to the next + level of the nested loop. This function is used in case we have + an OUTER join and no matching record was found. +*/ + +static enum_nested_loop_state +evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab) +{ + /* + The table join_tab is the first inner table of a outer join operation + and no matches has been found for the current outer row. + */ + JOIN_TAB *last_inner_tab= join_tab->last_inner; + /* Cache variables for faster loop */ + COND *select_cond; + for ( ; join_tab <= last_inner_tab ; join_tab++) + { + /* Change the the values of guard predicate variables. */ + join_tab->found= 1; + join_tab->not_null_compl= 0; + /* The outer row is complemented by nulls for each inner tables */ + restore_record(join_tab->table,s->default_values); // Make empty record + mark_as_null_row(join_tab->table); // For group by without error + select_cond= join_tab->select_cond; + /* Check all attached conditions for inner table rows. */ + if (select_cond && !select_cond->val_int()) + return NESTED_LOOP_OK; + } + join_tab--; + /* + The row complemented by nulls might be the first row + of embedding outer joins. + If so, perform the same actions as in the code + for the first regular outer join row above. + */ + for ( ; ; ) + { + JOIN_TAB *first_unmatched= join_tab->first_unmatched; + if ((first_unmatched= first_unmatched->first_upper) && + first_unmatched->last_inner != join_tab) + first_unmatched= 0; + join_tab->first_unmatched= first_unmatched; + if (!first_unmatched) + break; + first_unmatched->found= 1; + for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++) + { + if (tab->select_cond && !tab->select_cond->val_int()) + { + join->return_tab= tab; + return NESTED_LOOP_OK; + } + } + } + /* + The row complemented by nulls satisfies all conditions + attached to inner tables. + */ + if (join_tab->check_weed_out_table) + { + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); + if (res == -1) + return NESTED_LOOP_ERROR; + else if (res == 1) + return NESTED_LOOP_OK; + } + else if (join_tab->do_firstmatch) + { + /* + We should return to the join_tab->do_firstmatch after we have + enumerated all the suffixes for current prefix row combination + */ + if (join_tab->do_firstmatch < join->return_tab) + join->return_tab= join_tab->do_firstmatch; + } + + /* + Send the row complemented by nulls to be joined with the + remaining tables. + */ + return (*join_tab->next_select)(join, join_tab+1, 0); +} + +/***************************************************************************** + The different ways to read a record + Returns -1 if row was not found, 0 if row was found and 1 on errors +*****************************************************************************/ + +/** Help function when we get some an error from the table handler. */ + +int report_error(TABLE *table, int error) +{ + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + { + table->status= STATUS_GARBAGE; + return -1; // key not found; ok + } + /* + Locking reads can legally return also these errors, do not + print them to the .err log + */ + if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT + && error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed) + sql_print_error("Got error %d when reading table '%s'", + error, table->s->path.str); + table->file->print_error(error,MYF(0)); + return 1; +} + + +int safe_index_read(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + if (unlikely((error= + table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_KEY_EXACT)))) + return report_error(table, error); + return 0; +} + + +/** + Reads content of constant table + + @param tab table + @param pos position of table in query plan + + @retval 0 ok, one row was found or one NULL-complemented row was created + @retval -1 ok, no row was found and no NULL-complemented row was created + @retval 1 error +*/ + +static int +join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos) +{ + int error; + TABLE_LIST *tbl; + DBUG_ENTER("join_read_const_table"); + TABLE *table=tab->table; + table->const_table=1; + table->null_row=0; + table->status=STATUS_NO_RECORD; + + if (tab->table->pos_in_table_list->is_materialized_derived() && + !tab->table->pos_in_table_list->fill_me) + { + //TODO: don't get here at all + /* Skip materialized derived tables/views. */ + DBUG_RETURN(0); + } + else if (tab->table->pos_in_table_list->jtbm_subselect && + tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab) + { + /* Row will not be found */ + int res; + if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found) + res= 0; + else + res= -1; + DBUG_RETURN(res); + } + else if (tab->type == JT_SYSTEM) + { + if (unlikely((error=join_read_system(tab)))) + { // Info for DESCRIBE + tab->info= ET_CONST_ROW_NOT_FOUND; + /* Mark for EXPLAIN that the row was not found */ + pos->records_read=0.0; + pos->ref_depend_map= 0; + if (!table->pos_in_table_list->outer_join || error > 0) + DBUG_RETURN(error); + } + /* + The optimizer trust the engine that when stats.records is 0, there + was no found rows + */ + DBUG_ASSERT(table->file->stats.records > 0 || error); + } + else + { + if (/*!table->file->key_read && */ + table->covering_keys.is_set(tab->ref.key) && !table->no_keyread && + (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY) + { + table->file->ha_start_keyread(tab->ref.key); + tab->index= tab->ref.key; + } + error=join_read_const(tab); + table->file->ha_end_keyread(); + if (unlikely(error)) + { + tab->info= ET_UNIQUE_ROW_NOT_FOUND; + /* Mark for EXPLAIN that the row was not found */ + pos->records_read=0.0; + pos->ref_depend_map= 0; + if (!table->pos_in_table_list->outer_join || error > 0) + DBUG_RETURN(error); + } + } + /* + Evaluate an on-expression only if it is not considered expensive. + This mainly prevents executing subqueries in optimization phase. + This is necessary since proper setup for such execution has not been + done at this stage. + */ + if (*tab->on_expr_ref && !table->null_row && + !(*tab->on_expr_ref)->is_expensive()) + { +#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL) + /* + This test could be very useful to find bugs in the optimizer + where we would call this function with an expression that can't be + evaluated yet. We can't have this enabled by default as long as + have items like Item_equal, that doesn't report they are const but + they can still be called even if they contain not const items. + */ + (*tab->on_expr_ref)->update_used_tables(); + DBUG_ASSERT((*tab->on_expr_ref)->const_item()); +#endif + if ((table->null_row= MY_TEST((*tab->on_expr_ref)->val_int() == 0))) + mark_as_null_row(table); + } + if (!table->null_row && ! tab->join->mixed_implicit_grouping) + table->maybe_null= 0; + + { + JOIN *join= tab->join; + List_iterator ti(join->select_lex->leaf_tables); + /* Check appearance of new constant items in Item_equal objects */ + if (join->conds) + update_const_equal_items(thd, join->conds, tab, TRUE); + while ((tbl= ti++)) + { + TABLE_LIST *embedded; + TABLE_LIST *embedding= tbl; + do + { + embedded= embedding; + if (embedded->on_expr) + update_const_equal_items(thd, embedded->on_expr, tab, TRUE); + embedding= embedded->embedding; + } + while (embedding && + embedding->nested_join->join_list.head() == embedded); + } + } + DBUG_RETURN(0); +} + + +/** + Read a constant table when there is at most one matching row, using a table + scan. + + @param tab Table to read + + @retval 0 Row was found + @retval -1 Row was not found + @retval 1 Got an error (other than row not found) during read +*/ +static int +join_read_system(JOIN_TAB *tab) +{ + TABLE *table= tab->table; + int error; + if (table->status & STATUS_GARBAGE) // If first read + { + if (unlikely((error= + table->file->ha_read_first_row(table->record[0], + table->s->primary_key)))) + { + if (error != HA_ERR_END_OF_FILE) + return report_error(table, error); + table->const_table= 1; + mark_as_null_row(tab->table); + empty_record(table); // Make empty record + return -1; + } + store_record(table,record[1]); + } + else if (!table->status) // Only happens with left join + restore_record(table,record[1]); // restore old record + table->null_row=0; + return table->status ? -1 : 0; +} + + +/** + Read a table when there is at most one matching row. + + @param tab Table to read + + @retval 0 Row was found + @retval -1 Row was not found + @retval 1 Got an error (other than row not found) during read +*/ + +static int +join_read_const(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + if (table->status & STATUS_GARBAGE) // If first read + { + table->status= 0; + if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref)) + error=HA_ERR_KEY_NOT_FOUND; + else + { + error= table->file->ha_index_read_idx_map(table->record[0],tab->ref.key, + (uchar*) tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_KEY_EXACT); + } + if (unlikely(error)) + { + table->status= STATUS_NOT_FOUND; + mark_as_null_row(tab->table); + empty_record(table); + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + return -1; + } + store_record(table,record[1]); + } + else if (!(table->status & ~STATUS_NULL_ROW)) // Only happens with left join + { + table->status=0; + restore_record(table,record[1]); // restore old record + } + table->null_row=0; + return table->status ? -1 : 0; +} + +/* + eq_ref access method implementation: "read_first" function + + SYNOPSIS + join_read_key() + tab JOIN_TAB of the accessed table + + DESCRIPTION + This is "read_fist" function for the eq_ref access method. The difference + from ref access function is that is that it has a one-element lookup + cache (see cmp_buffer_with_ref) + + RETURN + 0 - Ok + -1 - Row not found + 1 - Error +*/ + + +static int +join_read_key(JOIN_TAB *tab) +{ + return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref); +} + + +/* + eq_ref access handler but generalized a bit to support TABLE and TABLE_REF + not from the join_tab. See join_read_key for detailed synopsis. +*/ +int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref) +{ + int error; + if (!table->file->inited) + { + error= table->file->ha_index_init(table_ref->key, tab ? tab->sorted : TRUE); + if (unlikely(error)) + { + (void) report_error(table, error); + return 1; + } + } + + /* + The following is needed when one makes ref (or eq_ref) access from row + comparisons: one must call row->bring_value() to get the new values. + */ + if (tab && tab->bush_children) + { + TABLE_LIST *emb_sj_nest= tab->bush_children->start->emb_sj_nest; + emb_sj_nest->sj_subq_pred->left_exp()->bring_value(); + } + + /* TODO: Why don't we do "Late NULLs Filtering" here? */ + + if (cmp_buffer_with_ref(thd, table, table_ref) || + (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW))) + { + if (table_ref->key_err) + { + table->status=STATUS_NOT_FOUND; + return -1; + } + /* + Moving away from the current record. Unlock the row + in the handler if it did not match the partial WHERE. + */ + if (tab && tab->ref.has_record && tab->ref.use_count == 0) + { + tab->read_record.table->file->unlock_row(); + table_ref->has_record= FALSE; + } + error=table->file->ha_index_read_map(table->record[0], + table_ref->key_buff, + make_prev_keypart_map(table_ref->key_parts), + HA_READ_KEY_EXACT); + if (unlikely(error) && + error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + + if (likely(!error)) + { + table_ref->has_record= TRUE; + table_ref->use_count= 1; + } + } + else if (table->status == 0) + { + DBUG_ASSERT(table_ref->has_record); + table_ref->use_count++; + } + table->null_row=0; + return table->status ? -1 : 0; +} + + +/** + Since join_read_key may buffer a record, do not unlock + it if it was not used in this invocation of join_read_key(). + Only count locks, thus remembering if the record was left unused, + and unlock already when pruning the current value of + TABLE_REF buffer. + @sa join_read_key() +*/ + +static void +join_read_key_unlock_row(st_join_table *tab) +{ + DBUG_ASSERT(tab->ref.use_count); + if (tab->ref.use_count) + tab->ref.use_count--; +} + +/** + Rows from const tables are read once but potentially used + multiple times during execution of a query. + Ensure such rows are never unlocked during query execution. +*/ + +void +join_const_unlock_row(JOIN_TAB *tab) +{ + DBUG_ASSERT(tab->type == JT_CONST); +} + + +/* + ref access method implementation: "read_first" function + + SYNOPSIS + join_read_always_key() + tab JOIN_TAB of the accessed table + + DESCRIPTION + This is "read_fist" function for the "ref" access method. + + The functon must leave the index initialized when it returns. + ref_or_null access implementation depends on that. + + RETURN + 0 - Ok + -1 - Row not found + 1 - Error +*/ + +static int +join_read_always_key(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + /* Initialize the index first */ + if (!table->file->inited) + { + if (unlikely((error= table->file->ha_index_init(tab->ref.key, + tab->sorted)))) + { + (void) report_error(table, error); + return 1; + } + } + + if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref))) + return -1; + if (unlikely((error= + table->file->prepare_index_key_scan_map(tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts))))) + { + report_error(table,error); + return -1; + } + if ((error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_KEY_EXACT))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + return -1; /* purecov: inspected */ + } + return 0; +} + + +/** + This function is used when optimizing away ORDER BY in + SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC. +*/ + +static int +join_read_last_key(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + if (!table->file->inited && + unlikely((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))) + { + (void) report_error(table, error); + return 1; + } + + if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref))) + return -1; + if (unlikely((error= + table->file->prepare_index_key_scan_map(tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts)))) ) + { + report_error(table,error); + return -1; + } + if (unlikely((error= + table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_PREFIX_LAST)))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + return -1; /* purecov: inspected */ + } + return 0; +} + + + /* ARGSUSED */ +static int +join_no_more_records(READ_RECORD *info __attribute__((unused))) +{ + return -1; +} + + +static int +join_read_next_same(READ_RECORD *info) +{ + int error; + TABLE *table= info->table; + JOIN_TAB *tab=table->reginfo.join_tab; + + if (unlikely((error= table->file->ha_index_next_same(table->record[0], + tab->ref.key_buff, + tab->ref.key_length)))) + { + if (error != HA_ERR_END_OF_FILE) + return report_error(table, error); + table->status= STATUS_GARBAGE; + return -1; + } + return 0; +} + + +static int +join_read_prev_same(READ_RECORD *info) +{ + int error; + TABLE *table= info->table; + JOIN_TAB *tab=table->reginfo.join_tab; + + if (unlikely((error= table->file->ha_index_prev(table->record[0])))) + return report_error(table, error); + if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key, + tab->ref.key_length)) + { + table->status=STATUS_NOT_FOUND; + error= -1; + } + return error; +} + + +static int +join_init_quick_read_record(JOIN_TAB *tab) +{ + quick_select_return res= test_if_quick_select(tab); + + if (res == SQL_SELECT::ERROR) + return 1; /* Fatal error */ + + if (res == SQL_SELECT::IMPOSSIBLE_RANGE) + return -1; /* No possible records */ + + /* + Proceed to read rows. If we've created a quick select, use it, otherwise + do a full scan. + */ + return join_init_read_record(tab); +} + + +int read_first_record_seq(JOIN_TAB *tab) +{ + if (unlikely(tab->read_record.table->file->ha_rnd_init_with_error(1))) + return 1; + return tab->read_record.read_record(); +} + + +/* + @brief + Create a new (dynamic) quick select. +*/ + +static quick_select_return +test_if_quick_select(JOIN_TAB *tab) +{ + DBUG_EXECUTE_IF("show_explain_probe_test_if_quick_select", + if (dbug_user_var_equals_int(tab->join->thd, + "show_explain_probe_select_id", + tab->join->select_lex->select_number)) + dbug_serve_apcs(tab->join->thd, 1); + ); + + + delete tab->select->quick; + tab->select->quick=0; + + if (tab->table->file->inited != handler::NONE) + tab->table->file->ha_index_or_rnd_end(); + + quick_select_return res; + res= tab->select->test_quick_select(tab->join->thd, tab->keys, + (table_map) 0, HA_POS_ERROR, 0, + FALSE, /*remove where parts*/FALSE, + FALSE, /* no warnings */ TRUE); + if (tab->explain_plan && tab->explain_plan->range_checked_fer) + tab->explain_plan->range_checked_fer->collect_data(tab->select->quick); + + return res; +} + + +/* + @return + 1 - Yes, use dynamically built range + 0 - No, don't use dynamic range (but there's no error) + -1 - Fatal error +*/ + +static +int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab) +{ + if (unlikely(join_tab->use_quick == 2)) + { + quick_select_return res= test_if_quick_select(join_tab); + if (res == SQL_SELECT::ERROR) + return -1; + else + { + /* Both OK and IMPOSSIBLE_RANGE go here */ + return join_tab->select->quick ? 1 : 0; + } + } + else + return 0; +} + +int join_init_read_record(JOIN_TAB *tab) +{ + bool need_unpacking= FALSE; + JOIN *join= tab->join; + /* + Note: the query plan tree for the below operations is constructed in + save_agg_explain_data. + */ + if (tab->distinct && tab->remove_duplicates()) // Remove duplicates. + return 1; + + if (join->top_join_tab_count != join->const_tables) + { + TABLE_LIST *tbl= tab->table->pos_in_table_list; + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; + } + + if (tab->build_range_rowid_filter_if_needed()) + return 1; + + if (tab->filesort && tab->sort_table()) // Sort table. + return 1; + + DBUG_EXECUTE_IF("kill_join_init_read_record", + tab->join->thd->set_killed(KILL_QUERY);); + + + if (!tab->preread_init_done && tab->preread_init()) + return 1; + + if (tab->select && tab->select->quick && tab->select->quick->reset()) + { + /* Ensures error status is propagated back to client */ + report_error(tab->table, + tab->join->thd->killed ? HA_ERR_QUERY_INTERRUPTED : HA_ERR_OUT_OF_MEM); + return 1; + } + /* make sure we won't get ER_QUERY_INTERRUPTED from any code below */ + DBUG_EXECUTE_IF("kill_join_init_read_record", + tab->join->thd->reset_killed();); + + Copy_field *save_copy, *save_copy_end; + + /* + init_read_record resets all elements of tab->read_record(). + Remember things that we don't want to have reset. + */ + save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select, tab->filesort_result, 1, 1, FALSE)) + return 1; + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + + if (need_unpacking) + { + tab->read_record.read_record_func_and_unpack_calls= + tab->read_record.read_record_func; + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; + } + + return tab->read_record.read_record(); +} + + +/* + Helper function for sorting table with filesort. +*/ + +bool +JOIN_TAB::sort_table() +{ + int rc; + DBUG_PRINT("info",("Sorting for index")); + THD_STAGE_INFO(join->thd, stage_creating_sort_index); + DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ? + JOIN::ordered_index_order_by : + JOIN::ordered_index_group_by)); + rc= create_sort_index(join->thd, join, this, NULL); + /* Disactivate rowid filter if it was used when creating sort index */ + if (rowid_filter) + table->file->rowid_filter_is_active= false; + return (rc != 0); +} + + +static int +join_read_first(JOIN_TAB *tab) +{ + int error= 0; + TABLE *table=tab->table; + DBUG_ENTER("join_read_first"); + + DBUG_ASSERT(table->no_keyread || + !table->covering_keys.is_set(tab->index) || + table->file->keyread == tab->index); + tab->table->status=0; + tab->read_record.read_record_func= join_read_next; + tab->read_record.table=table; + if (!table->file->inited) + error= table->file->ha_index_init(tab->index, tab->sorted); + if (likely(!error)) + error= table->file->prepare_index_scan(); + if (unlikely(error) || + unlikely(error= tab->table->file->ha_index_first(tab->table->record[0]))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + report_error(table, error); + DBUG_RETURN(-1); + } + DBUG_RETURN(0); +} + + +static int +join_read_next(READ_RECORD *info) +{ + int error; + if (unlikely((error= info->table->file->ha_index_next(info->record())))) + return report_error(info->table, error); + + return 0; +} + + +static int +join_read_last(JOIN_TAB *tab) +{ + TABLE *table=tab->table; + int error= 0; + DBUG_ENTER("join_read_last"); + + DBUG_ASSERT(table->no_keyread || + !table->covering_keys.is_set(tab->index) || + table->file->keyread == tab->index); + tab->table->status=0; + tab->read_record.read_record_func= join_read_prev; + tab->read_record.table=table; + if (!table->file->inited) + error= table->file->ha_index_init(tab->index, 1); + if (likely(!error)) + error= table->file->prepare_index_scan(); + if (unlikely(error) || + unlikely(error= tab->table->file->ha_index_last(tab->table->record[0]))) + DBUG_RETURN(report_error(table, error)); + + DBUG_RETURN(0); +} + + +static int +join_read_prev(READ_RECORD *info) +{ + int error; + if (unlikely((error= info->table->file->ha_index_prev(info->record())))) + return report_error(info->table, error); + return 0; +} + + +static int +join_ft_read_first(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + if (!table->file->inited && + (error= table->file->ha_index_init(tab->ref.key, 1))) + { + (void) report_error(table, error); + return 1; + } + + table->file->ft_init(); + + if (unlikely((error= table->file->ha_ft_read(table->record[0])))) + return report_error(table, error); + return 0; +} + +static int +join_ft_read_next(READ_RECORD *info) +{ + int error; + if (unlikely((error= info->table->file->ha_ft_read(info->record())))) + return report_error(info->table, error); + return 0; +} + + +/** + Reading of key with key reference and one part that may be NULL. +*/ + +int +join_read_always_key_or_null(JOIN_TAB *tab) +{ + int res; + + /* First read according to key which is NOT NULL */ + *tab->ref.null_ref_key= 0; // Clear null byte + if ((res= join_read_always_key(tab)) >= 0) + return res; + + /* Then read key with null value */ + *tab->ref.null_ref_key= 1; // Set null byte + return safe_index_read(tab); +} + + +int +join_read_next_same_or_null(READ_RECORD *info) +{ + int error; + if (unlikely((error= join_read_next_same(info)) >= 0)) + return error; + JOIN_TAB *tab= info->table->reginfo.join_tab; + + /* Test if we have already done a read after null key */ + if (*tab->ref.null_ref_key) + return -1; // All keys read + *tab->ref.null_ref_key= 1; // Set null byte + return safe_index_read(tab); // then read null keys +} + + +/***************************************************************************** + DESCRIPTION + Functions that end one nested loop iteration. Different functions + are used to support GROUP BY clause and to redirect records + to a table (e.g. in case of SELECT into a temporary table) or to the + network client. + + RETURN VALUES + NESTED_LOOP_OK - the record has been successfully handled + NESTED_LOOP_ERROR - a fatal error (like table corruption) + was detected + NESTED_LOOP_KILLED - thread shutdown was requested while processing + the record + NESTED_LOOP_QUERY_LIMIT - the record has been successfully handled; + additionally, the nested loop produced the + number of rows specified in the LIMIT clause + for the query + NESTED_LOOP_CURSOR_LIMIT - the record has been successfully handled; + additionally, there is a cursor and the nested + loop algorithm produced the number of rows + that is specified for current cursor fetch + operation. + All return values except NESTED_LOOP_OK abort the nested loop. +*****************************************************************************/ + +/* ARGSUSED */ +static enum_nested_loop_state +end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + DBUG_ENTER("end_send"); + /* + When all tables are const this function is called with jointab == NULL. + This function shouldn't be called for the first join_tab as it needs + to get fields from previous tab. + */ + DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab); + //TODO pass fields via argument + List *fields= join_tab ? (join_tab-1)->fields : join->fields; + + if (end_of_records) + { + if (join->procedure && join->procedure->end_of_records()) + DBUG_RETURN(NESTED_LOOP_ERROR); + DBUG_RETURN(NESTED_LOOP_OK); + } + + if (join->table_count && + join->join_tab->is_using_loose_index_scan()) + { + /* Copy non-aggregated fields when loose index scan is used. */ + copy_fields(&join->tmp_table_param); + } + if (join->having && join->having->val_int() == 0) + DBUG_RETURN(NESTED_LOOP_OK); // Didn't match having + if (join->procedure) + { + if (join->procedure->send_row(join->procedure_fields_list)) + DBUG_RETURN(NESTED_LOOP_ERROR); + DBUG_RETURN(NESTED_LOOP_OK); + } + + if (join->send_records >= join->unit->lim.get_select_limit() && + join->unit->lim.is_with_ties()) + { + /* + Stop sending rows if the order fields corresponding to WITH TIES + have changed. + */ + int idx= test_if_item_cache_changed(join->order_fields); + if (idx >= 0) + join->do_send_rows= false; + } + + if (join->do_send_rows) + { + int error; + /* result < 0 if row was not accepted and should not be counted */ + if (unlikely((error= join->result->send_data_with_check(*fields, + join->unit, + join->send_records)))) + { + if (error > 0) + DBUG_RETURN(NESTED_LOOP_ERROR); + // error < 0 => duplicate row + join->duplicate_rows++; + } + } + + join->send_records++; + join->accepted_rows++; + if (join->send_records >= join->unit->lim.get_select_limit()) + { + if (!join->do_send_rows) + { + /* + If we have used Priority Queue for optimizing order by with limit, + then stop here, there are no more records to consume. + When this optimization is used, end_send is called on the next + join_tab. + */ + if (join->order && + join->select_options & OPTION_FOUND_ROWS && + join_tab > join->join_tab && + (join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq) + { + DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT")); + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); + } + DBUG_RETURN(NESTED_LOOP_OK); + } + + /* For WITH TIES we keep sending rows until a group has changed. */ + if (join->unit->lim.is_with_ties()) + { + /* Prepare the order_fields comparison for with ties. */ + if (join->send_records == join->unit->lim.get_select_limit()) + (void) test_if_group_changed(join->order_fields); + /* One more loop, to check if the next row matches with_ties or not. */ + DBUG_RETURN(NESTED_LOOP_OK); + } + if (join->select_options & OPTION_FOUND_ROWS) + { + JOIN_TAB *jt=join->join_tab; + if ((join->table_count == 1) && !join->sort_and_group + && !join->send_group_parts && !join->having && !jt->select_cond && + !(jt->select && jt->select->quick) && + (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && + (jt->ref.key < 0)) + { + /* Join over all rows in table; Return number of found rows */ + TABLE *table=jt->table; + + if (jt->filesort_result) // If filesort was used + { + join->send_records= jt->filesort_result->found_rows; + } + else + { + table->file->info(HA_STATUS_VARIABLE); + join->send_records= table->file->stats.records; + } + } + else + { + join->do_send_rows= 0; + if (join->unit->fake_select_lex) + join->unit->fake_select_lex->limit_params.select_limit= 0; + DBUG_RETURN(NESTED_LOOP_OK); + } + } + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely + } + else if (join->send_records >= join->fetch_limit) + { + /* + There is a server side cursor and all rows for + this fetch request are sent. + */ + DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT); + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + @brief + Perform OrderedGroupBy operation and write the output into join->result. + + @detail + The input stream is ordered by the GROUP BY expression, so groups come + one after another. We only need to accumulate the aggregate value, when + a GROUP BY group ends, check the HAVING and send the group. + + Note that the output comes in the GROUP BY order, which is required by + the MySQL's GROUP BY semantics. No further sorting is needed. + + @seealso end_write_group() also implements SortAndGroup +*/ + +enum_nested_loop_state +end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + int idx= -1; + enum_nested_loop_state ok_code= NESTED_LOOP_OK; + /* + join_tab can be 0 in the case all tables are const tables and we did not + need a temporary table to store the result. + In this case we use the original given fields, which is stored in + join->fields. + */ + List *fields= join_tab ? (join_tab-1)->fields : join->fields; + DBUG_ENTER("end_send_group"); + + if (!join->items3.is_null() && !join->set_group_rpa) + { + /* Move ref_pointer_array to points to items3 */ + join->set_group_rpa= true; + join->set_items_ref_array(join->items3); + } + + if (!join->first_record || end_of_records || + (idx=test_if_group_changed(join->group_fields)) >= 0) + { + + if (!join->group_sent && + (join->first_record || + (end_of_records && !join->group && !join->group_optimized_away))) + { + table_map cleared_tables= (table_map) 0; + if (join->procedure) + join->procedure->end_group(); + /* Test if there was a group change. */ + if (idx < (int) join->send_group_parts) + { + int error=0; + if (join->procedure) + { + if (join->having && join->having->val_int() == 0) + error= -1; // Didn't satisfy having + else + { + if (join->do_send_rows) + error=join->procedure->send_row(*fields) ? 1 : 0; + join->send_records++; + } + if (end_of_records && join->procedure->end_of_records()) + error= 1; // Fatal error + } + else + { + /* Reset all sum functions on group change. */ + if (!join->first_record) + { + /* No matching rows for group function */ + + List_iterator_fast it(*fields); + Item *item; + join->no_rows_in_result_called= 1; + + join->clear(&cleared_tables); + while ((item= it++)) + item->no_rows_in_result(); + } + if (join->having && join->having->val_int() == 0) + error= -1; // Didn't satisfy having + else + { + if (join->do_send_rows) + { + error= join->result->send_data_with_check(*fields, + join->unit, + join->send_records); + if (unlikely(error < 0)) + { + /* Duplicate row, don't count */ + join->duplicate_rows++; + error= 0; + } + } + join->send_records++; + join->group_sent= true; + } + if (unlikely(join->rollup.state != ROLLUP::STATE_NONE && error <= 0)) + { + if (join->rollup_send_data((uint) (idx+1))) + error= 1; + } + if (join->no_rows_in_result_called) + { + /* Restore null tables to original state */ + join->no_rows_in_result_called= 0; + if (cleared_tables) + unclear_tables(join, &cleared_tables); + } + } + if (unlikely(error > 0)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + if (join->send_records >= join->unit->lim.get_select_limit() && + join->do_send_rows) + { + /* WITH TIES can be computed during end_send_group if + the order by is a subset of group by and we had an index + available to compute group by order directly. */ + if (!join->unit->lim.is_with_ties() || + idx < (int)join->with_ties_order_count) + { + if (!(join->select_options & OPTION_FOUND_ROWS)) + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely + join->do_send_rows= 0; + join->unit->lim.set_unlimited(); + } + } + else if (join->send_records >= join->fetch_limit) + { + /* + There is a server side cursor and all rows + for this fetch request are sent. + + Preventing code duplication. When finished with the group reset + the group functions and copy_fields. We fall through. bug #11904 + */ + ok_code= NESTED_LOOP_CURSOR_LIMIT; + } + } + } + else + { + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + join->first_record=1; + (void) test_if_group_changed(join->group_fields); + } + if (idx < (int) join->send_group_parts) + { + /* + This branch is executed also for cursors which have finished their + fetch limit - the reason for ok_code. + */ + copy_fields(&join->tmp_table_param); + if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1])) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (join->procedure) + join->procedure->add(); + join->group_sent= false; + join->accepted_rows++; + DBUG_RETURN(ok_code); + } + } + if (update_sum_func(join->sum_funcs)) + DBUG_RETURN(NESTED_LOOP_ERROR); + join->accepted_rows++; + if (join->procedure) + join->procedure->add(); + DBUG_RETURN(NESTED_LOOP_OK); +} + + + /* ARGSUSED */ +static enum_nested_loop_state +end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *const table= join_tab->table; + DBUG_ENTER("end_write"); + + if (!end_of_records) + { + copy_fields(join_tab->tmp_table_param); + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + + if (likely(!join_tab->having || join_tab->having->val_int())) + { + int error; + join->found_records++; + join->accepted_rows++; + if ((error= table->file->ha_write_tmp_row(table->record[0]))) + { + if (likely(!table->file->is_fatal_error(error, HA_CHECK_DUP))) + goto end; // Ignore duplicate keys + bool is_duplicate; + if (create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 1, &is_duplicate)) + DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error + if (is_duplicate) + goto end; + table->s->uniques=0; // To ensure rows are the same + } + if (++join_tab->send_records >= + join_tab->tmp_table_param->end_write_records && + join->do_send_rows) + { + if (!(join->select_options & OPTION_FOUND_ROWS)) + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); + join->do_send_rows=0; + join->unit->lim.set_unlimited(); + } + } + } +end: + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + @brief + Perform GROUP BY operation over rows coming in arbitrary order: use + TemporaryTableWithPartialSums algorithm. + + @detail + The TemporaryTableWithPartialSums algorithm is: + + CREATE TEMPORARY TABLE tmp ( + group_by_columns PRIMARY KEY, + partial_sum + ); + + for each row R in join output { + INSERT INTO tmp (R.group_by_columns, R.sum_value) + ON DUPLICATE KEY UPDATE partial_sum=partial_sum + R.sum_value; + } + + @detail + Also applies HAVING, etc. + + @seealso end_unique_update() +*/ + +static enum_nested_loop_state +end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *const table= join_tab->table; + ORDER *group; + int error; + DBUG_ENTER("end_update"); + + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + + join->found_records++; + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. + /* Make a key of group index */ + for (group=table->group ; group ; group=group->next) + { + Item *item= *group->item; + if (group->fast_field_copier_setup != group->field) + { + DBUG_PRINT("info", ("new setup %p -> %p", + group->fast_field_copier_setup, + group->field)); + group->fast_field_copier_setup= group->field; + group->fast_field_copier_func= + item->setup_fast_field_copier(group->field); + } + item->save_org_in_field(group->field, group->fast_field_copier_func); + /* Store in the used key if the field was 0 */ + if (item->maybe_null()) + group->buff[-1]= (char) group->field->is_null(); + } + if (!table->file->ha_index_read_map(table->record[1], + join_tab->tmp_table_param->group_buff, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)) + { /* Update old record */ + restore_record(table,record[1]); + update_tmptable_sum_func(join->sum_funcs,table); + if (unlikely((error= table->file->ha_update_tmp_row(table->record[1], + table->record[0])))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + goto end; + } + + init_tmptable_sum_functions(join->sum_funcs); + if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy, + join->thd))) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + if (create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 0, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error + /* Change method to update rows */ + if (unlikely((error= table->file->ha_index_init(0, 0)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(NESTED_LOOP_ERROR); + } + + join_tab->aggr->set_write_func(end_unique_update); + } + join_tab->send_records++; +end: + join->accepted_rows++; // For rownum() + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/** + Like end_update, but this is done with unique constraints instead of keys. +*/ + +static enum_nested_loop_state +end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *table= join_tab->table; + int error; + DBUG_ENTER("end_unique_update"); + + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + + init_tmptable_sum_functions(join->sum_funcs); + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + + join->accepted_rows++; + if (likely(!(error= table->file->ha_write_tmp_row(table->record[0])))) + join_tab->send_records++; // New group + else + { + if (unlikely((int) table->file->get_dup_key(error) < 0)) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + /* Prepare table for random positioning */ + bool rnd_inited= (table->file->inited == handler::RND); + if (!rnd_inited && + ((error= table->file->ha_index_end()) || + (error= table->file->ha_rnd_init(0)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(NESTED_LOOP_ERROR); + } + if (unlikely(table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + restore_record(table,record[1]); + update_tmptable_sum_func(join->sum_funcs,table); + if (unlikely((error= table->file->ha_update_tmp_row(table->record[1], + table->record[0])))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + if (!rnd_inited && + ((error= table->file->ha_rnd_end()) || + (error= table->file->ha_index_init(0, 0)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + join->accepted_rows++; // For rownum() + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + @brief + Perform OrderedGroupBy operation and write the output into the temporary + table (join_tab->table). + + @detail + The input stream is ordered by the GROUP BY expression, so groups come + one after another. We only need to accumulate the aggregate value, when + a GROUP BY group ends, check the HAVING and write the group. + + @seealso end_send_group() also implements OrderedGroupBy +*/ + +enum_nested_loop_state +end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *table= join_tab->table; + int idx= -1; + DBUG_ENTER("end_write_group"); + + join->accepted_rows++; + if (!join->first_record || end_of_records || + (idx=test_if_group_changed(join->group_fields)) >= 0) + { + if (join->first_record || (end_of_records && !join->group)) + { + table_map cleared_tables= (table_map) 0; + if (join->procedure) + join->procedure->end_group(); + int send_group_parts= join->send_group_parts; + if (idx < send_group_parts) + { + if (!join->first_record) + { + /* No matching rows for group function */ + join->clear(&cleared_tables); + } + copy_sum_funcs(join->sum_funcs, + join->sum_funcs_end[send_group_parts]); + if (!join_tab->having || join_tab->having->val_int()) + { + int error= table->file->ha_write_tmp_row(table->record[0]); + if (unlikely(error) && + create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 0, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + if (unlikely(join->rollup.state != ROLLUP::STATE_NONE)) + { + if (unlikely(join->rollup_write_data((uint) (idx+1), + join_tab->tmp_table_param, + table))) + { + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + if (cleared_tables) + unclear_tables(join, &cleared_tables); + if (end_of_records) + goto end; + } + } + else + { + if (end_of_records) + goto end; + join->first_record=1; + (void) test_if_group_changed(join->group_fields); + } + if (idx < (int) join->send_group_parts) + { + copy_fields(join_tab->tmp_table_param); + if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy, + join->thd))) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (unlikely(init_sum_functions(join->sum_funcs, + join->sum_funcs_end[idx+1]))) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (unlikely(join->procedure)) + join->procedure->add(); + goto end; + } + } + if (unlikely(update_sum_func(join->sum_funcs))) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (unlikely(join->procedure)) + join->procedure->add(); +end: + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/***************************************************************************** + Remove calculation with tables that aren't yet read. Remove also tests + against fields that are read through key where the table is not a + outer join table. + We can't remove tests that are made against columns which are stored + in sorted order. +*****************************************************************************/ + +/** + Check if "left_item=right_item" equality is guaranteed to be true by use of + [eq]ref access on left_item->field->table. + + SYNOPSIS + test_if_ref() + root_cond + left_item + right_item + + DESCRIPTION + Check if the given "left_item = right_item" equality is guaranteed to be + true by use of [eq_]ref access method. + + We need root_cond as we can't remove ON expressions even if employed ref + access guarantees that they are true. This is because TODO + + RETURN + TRUE if right_item is used removable reference key on left_item + FALSE Otherwise + +*/ + +bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) +{ + Field *field=left_item->field; + JOIN_TAB *join_tab= field->table->reginfo.join_tab; + // No need to change const test + if (!field->table->const_table && join_tab && + !join_tab->is_ref_for_hash_join() && + (!join_tab->first_inner || + *join_tab->first_inner->on_expr_ref == root_cond)) + { + /* + If ref access uses "Full scan on NULL key" (i.e. it actually alternates + between ref access and full table scan), then no equality can be + guaranteed to be true. + */ + if (join_tab->ref.is_access_triggered()) + return FALSE; + + Item *ref_item=part_of_refkey(field->table,field); + if (ref_item && (ref_item->eq(right_item,1) || + ref_item->real_item()->eq(right_item,1))) + { + right_item= right_item->real_item(); + if (right_item->type() == Item::FIELD_ITEM) + return (field->eq_def(((Item_field *) right_item)->field)); + /* remove equalities injected by IN->EXISTS transformation */ + else if (right_item->type() == Item::CACHE_ITEM) + return ((Item_cache *)right_item)->eq_def (field); + if (right_item->const_item() && !(right_item->is_null())) + { + /* + We can remove binary fields and numerical fields except float, + as float comparison isn't 100 % safe + We have to keep normal strings to be able to check for end spaces + */ + if (field->binary() && + field->real_type() != MYSQL_TYPE_STRING && + field->real_type() != MYSQL_TYPE_VARCHAR && + (field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0)) + { + return !right_item->save_in_field_no_warnings(field, 1); + } + } + } + } + return 0; // keep test +} + + +/** + Extract a condition that can be checked after reading given table + @fn make_cond_for_table() + + @param cond Condition to analyze + @param tables Tables for which "current field values" are available + @param used_table Table that we're extracting the condition for + tables Tables for which "current field values" are available (this + includes used_table) + (may also include PSEUDO_TABLE_BITS, and may be zero) + @param join_tab_idx_arg + The index of the JOIN_TAB this Item is being extracted + for. MAX_TABLES if there is no corresponding JOIN_TAB. + @param exclude_expensive_cond + Do not push expensive conditions + @param retain_ref_cond + Retain ref conditions + + @retval <>NULL Generated condition + @retval =NULL Already checked, OR error + + @details + Extract the condition that can be checked after reading the table + specified in 'used_table', given that current-field values for tables + specified in 'tables' bitmap are available. + If 'used_table' is 0 + - extract conditions for all tables in 'tables'. + - extract conditions are unrelated to any tables + in the same query block/level(i.e. conditions + which have used_tables == 0). + + The function assumes that + - Constant parts of the condition has already been checked. + - Condition that could be checked for tables in 'tables' has already + been checked. + + The function takes into account that some parts of the condition are + guaranteed to be true by employed 'ref' access methods (the code that + does this is located at the end, search down for "EQ_FUNC"). + + @note + Make sure to keep the implementations of make_cond_for_table() and + make_cond_after_sjm() synchronized. + make_cond_for_info_schema() uses similar algorithm as well. +*/ + +static Item * +make_cond_for_table(THD *thd, Item *cond, table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond __attribute__((unused)), + bool retain_ref_cond) +{ + return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table, + join_tab_idx_arg, + exclude_expensive_cond, + retain_ref_cond, true); +} + + +static Item * +make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, + table_map tables, table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond __attribute__ + ((unused)), + bool retain_ref_cond, + bool is_top_and_level) + +{ + table_map rand_table_bit= (table_map) RAND_TABLE_BIT; + + if (used_table && !(cond->used_tables() & used_table)) + return (COND*) 0; // Already checked + + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond=new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + /* + Special handling of top level conjuncts with RAND_TABLE_BIT: + if such a conjunct contains a reference to a field that is not + an outer field then it is pushed to the corresponding table by + the same rule as all other conjuncts. Otherwise, if the conjunct + is used in WHERE is is pushed to the last joined table, if is it + is used in ON condition of an outer join it is pushed into the + last inner table of the outer join. Such conjuncts are pushed in + a call of make_cond_for_table_from_pred() with the + parameter 'used_table' equal to PSEUDO_TABLE_BITS. + */ + if (is_top_and_level && used_table == rand_table_bit && + (item->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit) + { + /* The conjunct with RAND_TABLE_BIT has been allready pushed */ + continue; + } + Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, + tables, used_table, + join_tab_idx_arg, + exclude_expensive_cond, + retain_ref_cond, false); + if (fix) + new_cond->argument_list()->push_back(fix, thd->mem_root); + else if (thd->is_error()) + return ((COND*) 0); + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; // Always true + case 1: + return new_cond->argument_list()->head(); + default: + /* + Call fix_fields to propagate all properties of the children to + the new parent Item. This should not be expensive because all + children of Item_cond_and should be fixed by now. + */ + if (new_cond->fix_fields(thd, 0)) + return (COND*) 0; + new_cond->used_tables_cache= + ((Item_cond_and*) cond)->used_tables_cache & + tables; + return new_cond; + } + } + else + { // Or list + if (is_top_and_level && used_table == rand_table_bit && + (cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit) + { + /* This top level formula with RAND_TABLE_BIT has been already pushed */ + return (COND*) 0; + } + + Item_cond_or *new_cond=new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, + tables, 0L, + join_tab_idx_arg, + exclude_expensive_cond, + retain_ref_cond, false); + if (!fix) + return (COND*) 0; // Always true or error + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + /* + Call fix_fields to propagate all properties of the children to + the new parent Item. This should not be expensive because all + children of Item_cond_and should be fixed by now. + */ + if (new_cond->fix_fields(thd, 0)) + return (COND*) 0; + new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; + new_cond->top_level_item(); + return new_cond; + } + } + else if (cond->basic_const_item()) + return cond; + + if (is_top_and_level && used_table == rand_table_bit && + (cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit) + { + /* This top level formula with RAND_TABLE_BIT has been already pushed */ + return (COND*) 0; + } + + /* + Because the following test takes a while and it can be done + table_count times, we mark each item that we have examined with the result + of the test + */ + if ((cond->marker == MARKER_CHECK_ON_READ && !retain_ref_cond) || + (cond->used_tables() & ~tables)) + return (COND*) 0; // Can't check this yet + + if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK) + { + cond->set_join_tab_idx((uint8) join_tab_idx_arg); + return cond; // Not boolean op + } + + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::EQ_FUNC) + { + Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item(); + Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item(); + if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond && + test_if_ref(root_cond, (Item_field*) left_item,right_item)) + { + cond->marker= MARKER_CHECK_ON_READ; // Checked when read + return (COND*) 0; + } + if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond && + test_if_ref(root_cond, (Item_field*) right_item,left_item)) + { + cond->marker= MARKER_CHECK_ON_READ; // Checked when read + return (COND*) 0; + } + } + cond->marker= MARKER_PROCESSED; + cond->set_join_tab_idx((uint8) join_tab_idx_arg); + return cond; +} + + +/* + The difference of this from make_cond_for_table() is that we're in the + following state: + 1. conditions referring to 'tables' have been checked + 2. conditions referring to sjm_tables have been checked, too + 3. We need condition that couldn't be checked in #1 or #2 but + can be checked when we get both (tables | sjm_tables). + +*/ +static COND * +make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables, + table_map sjm_tables, bool inside_or_clause) +{ + /* + We assume that conditions that refer to only join prefix tables or + sjm_tables have already been checked. + */ + if (!inside_or_clause) + { + table_map cond_used_tables= cond->used_tables(); + if((!(cond_used_tables & ~tables) || + !(cond_used_tables & ~sjm_tables))) + return (COND*) 0; // Already checked + } + + /* AND/OR recursive descent */ + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix=make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables, + inside_or_clause); + if (fix) + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; // Always true + case 1: + return new_cond->argument_list()->head(); + default: + /* + Item_cond_and do not need fix_fields for execution, its parameters + are fixed or do not need fix_fields, too + */ + new_cond->quick_fix_field(); + new_cond->used_tables_cache= + ((Item_cond_and*) cond)->used_tables_cache & + tables; + return new_cond; + } + } + else + { // Or list + Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables, + /*inside_or_clause= */TRUE); + if (!fix) + return (COND*) 0; // Always true + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + /* + Item_cond_or do not need fix_fields for execution, its parameters + are fixed or do not need fix_fields, too + */ + new_cond->quick_fix_field(); + new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; + new_cond->top_level_item(); + return new_cond; + } + } + + /* + Because the following test takes a while and it can be done + table_count times, we mark each item that we have examined with the result + of the test + */ + + if (cond->marker == MARKER_CHECK_ON_READ || + (cond->used_tables() & ~(tables | sjm_tables))) + return (COND*) 0; // Can't check this yet + if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK) + return cond; // Not boolean op + + /* + Remove equalities that are guaranteed to be true by use of 'ref' access + method + */ + if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC) + { + Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item(); + Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item(); + if (left_item->type() == Item::FIELD_ITEM && + test_if_ref(root_cond, (Item_field*) left_item,right_item)) + { + cond->marker= MARKER_CHECK_ON_READ; + return (COND*) 0; + } + if (right_item->type() == Item::FIELD_ITEM && + test_if_ref(root_cond, (Item_field*) right_item,left_item)) + { + cond->marker= MARKER_CHECK_ON_READ; + return (COND*) 0; + } + } + cond->marker= MARKER_PROCESSED; + return cond; +} + + +/* + @brief + + Check if + - @table uses "ref"-like access + - it is based on "@field=certain_item" equality + - the equality will be true for any record returned by the access method + and return the certain_item if yes. + + @detail + + Equality won't necessarily hold if: + - the used index covers only part of the @field. + Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup + for 'abc', you will get both record with 'abc' and with 'abcde'. + - The type of access is actually ref_or_null, and so @field can be either + a value or NULL. + + @return + Item that the field will be equal to + NULL if no such item +*/ + +static Item * +part_of_refkey(TABLE *table,Field *field) +{ + JOIN_TAB *join_tab= table->reginfo.join_tab; + if (!join_tab) + return (Item*) 0; // field from outer non-select (UPDATE,...) + + uint ref_parts= join_tab->ref.key_parts; + if (ref_parts) /* if it's ref/eq_ref/ref_or_null */ + { + uint key= join_tab->ref.key; + KEY *key_info= join_tab->get_keyinfo_by_key_no(key); + KEY_PART_INFO *key_part= key_info->key_part; + + for (uint part=0 ; part < ref_parts ; part++,key_part++) + { + if (field->eq(key_part->field)) + { + /* + Found the field in the key. Check that + 1. ref_or_null doesn't alternate this component between a value and + a NULL + 2. index fully covers the key + */ + if (part != join_tab->ref.null_ref_part && // (1) + !(key_part->key_part_flag & HA_PART_KEY_SEG)) // (2) + { + return join_tab->ref.items[part]; + } + break; + } + } + } + return (Item*) 0; +} + + +/** + Test if one can use the key to resolve ORDER BY. + + @param join if not NULL, can use the join's top-level + multiple-equalities. + @param order Sort order + @param table Table to sort + @param idx Index to check + @param used_key_parts [out] NULL by default, otherwise return value for + used key parts. + + + @note + used_key_parts is set to correct key parts used if return value != 0 + (On other cases, used_key_part may be changed) + Note that the value may actually be greater than the number of index + key parts. This can happen for storage engines that have the primary + key parts as a suffix for every secondary key. + + @retval + 1 key is ok. + @retval + 0 Key can't be used + @retval + -1 Reverse key can be used +*/ + +static int test_if_order_by_key(JOIN *join, + ORDER *order, TABLE *table, uint idx, + uint *used_key_parts) +{ + KEY_PART_INFO *key_part,*key_part_end; + key_part=table->key_info[idx].key_part; + key_part_end=key_part + table->key_info[idx].ext_key_parts; + key_part_map const_key_parts=table->const_key_parts[idx]; + uint user_defined_kp= table->key_info[idx].user_defined_key_parts; + int reverse=0; + uint key_parts; + bool have_pk_suffix= false; + uint pk= table->s->primary_key; + DBUG_ENTER("test_if_order_by_key"); + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + table->key_info[idx].ext_key_part_map && + pk != MAX_KEY && pk != idx) + { + have_pk_suffix= true; + } + + for (; order ; order=order->next, const_key_parts>>=1) + { + Item_field *item_field= ((Item_field*) (*order->item)->real_item()); + Field *field= item_field->field; + int flag; + + /* + Skip key parts that are constants in the WHERE clause. + These are already skipped in the ORDER BY by const_expression_in_where() + */ + for (; const_key_parts & 1 ; const_key_parts>>= 1) + key_part++; + + /* + This check was in this function historically (although I think it's + better to check it outside of this function): + + "Test if the primary key parts were all const (i.e. there's one row). + The sorting doesn't matter" + + So, we're checking that + (1) this is an extended key + (2) we've reached its end + */ + key_parts= (uint)(key_part - table->key_info[idx].key_part); + if (have_pk_suffix && + reverse == 0 && // all were =const so far + key_parts == table->key_info[idx].ext_key_parts && + table->const_key_parts[pk] == PREV_BITS(uint, + table->key_info[pk]. + user_defined_key_parts)) + { + key_parts= 0; + reverse= 1; // Key is ok to use + goto ok; + } + + if (key_part == key_part_end) + { + /* + There are some items left in ORDER BY that we don't + */ + DBUG_RETURN(0); + } + + if (key_part->field != field) + { + /* + Check if there is a multiple equality that allows to infer that field + and key_part->field are equal + (see also: compute_part_of_sort_key_for_equals) + */ + if (item_field->item_equal && + item_field->item_equal->contains(key_part->field)) + field= key_part->field; + } + if (key_part->field != field || !field->part_of_sortkey.is_set(idx)) + DBUG_RETURN(0); + + const ORDER::enum_order keypart_order= + (key_part->key_part_flag & HA_REVERSE_SORT) ? + ORDER::ORDER_DESC : ORDER::ORDER_ASC; + /* set flag to 1 if we can use read-next on key, else to -1 */ + flag= (order->direction == keypart_order) ? 1 : -1; + if (reverse && flag != reverse) + DBUG_RETURN(0); + reverse=flag; // Remember if reverse + if (key_part < key_part_end) + key_part++; + } + + key_parts= (uint) (key_part - table->key_info[idx].key_part); + + if (reverse == -1 && + !(table->file->index_flags(idx, user_defined_kp-1, 1) & HA_READ_PREV)) + reverse= 0; // Index can't be used + + if (have_pk_suffix && reverse == -1) + { + uint pk_parts= table->key_info[pk].user_defined_key_parts; + if (!(table->file->index_flags(pk, pk_parts-1, 1) & HA_READ_PREV)) + reverse= 0; // Index can't be used + } + +ok: + *used_key_parts= key_parts; + DBUG_RETURN(reverse); +} + + +/** + Find shortest key suitable for full table scan. + + @param table Table to scan + @param usable_keys Allowed keys + + @return + MAX_KEY no suitable key found + key index otherwise +*/ + +uint find_shortest_key(TABLE *table, const key_map *usable_keys) +{ + double min_cost= DBL_MAX; + uint best= MAX_KEY; + if (!usable_keys->is_clear_all()) + { + for (uint nr=0; nr < table->s->keys ; nr++) + { + if (usable_keys->is_set(nr)) + { + double cost= table->file->keyread_time(nr, 1, table->file->records()); + if (cost < min_cost) + { + min_cost= cost; + best=nr; + } + DBUG_ASSERT(best < MAX_KEY); + } + } + } + return best; +} + +/** + Test if a second key is the subkey of the first one. + + @param key_part First key parts + @param ref_key_part Second key parts + @param ref_key_part_end Last+1 part of the second key + + @note + Second key MUST be shorter than the first one. + + @retval + 1 is a subkey + @retval + 0 no sub key +*/ + +inline bool +is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part, + KEY_PART_INFO *ref_key_part_end) +{ + for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++) + if (!key_part->field->eq(ref_key_part->field)) + return 0; + return 1; +} + +/** + Test if we can use one of the 'usable_keys' instead of 'ref' key + for sorting. + + @param ref Number of key, used for WHERE clause + @param usable_keys Keys for testing + + @return + - MAX_KEY If we can't use other key + - the number of found key Otherwise +*/ + +static uint +test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, + const key_map *usable_keys) +{ + uint nr; + uint min_length= (uint) ~0; + uint best= MAX_KEY; + KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part; + KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts; + + /* + Find the shortest key that + - produces the required ordering + - has key #ref (up to ref_key_parts) as its subkey. + */ + for (nr= 0 ; nr < table->s->keys ; nr++) + { + uint not_used; + if (usable_keys->is_set(nr) && + table->key_info[nr].key_length < min_length && + table->key_info[nr].user_defined_key_parts >= ref_key_parts && + is_subkey(table->key_info[nr].key_part, ref_key_part, + ref_key_part_end) && + test_if_order_by_key(NULL, order, table, nr, ¬_used)) + { + min_length= table->key_info[nr].key_length; + best= nr; + } + } + return best; +} + + +/** + Check if GROUP BY/DISTINCT can be optimized away because the set is + already known to be distinct. + + Used in removing the GROUP BY/DISTINCT of the following types of + statements: + @code + SELECT [DISTINCT] ... FROM + [GROUP BY ,...] + @endcode + + If (a,b,c is distinct) + then ,{whatever} is also distinct + + This function checks if all the key parts of any of the unique keys + of the table are referenced by a list : either the select list + through find_field_in_item_list or GROUP BY list through + find_field_in_order_list. + If the above holds and the key parts cannot contain NULLs then we + can safely remove the GROUP BY/DISTINCT, + as no result set can be more distinct than an unique key. + + @param table The table to operate on. + @param find_func function to iterate over the list and search + for a field + + @retval + 1 found + @retval + 0 not found. +*/ + +static bool +list_contains_unique_index(TABLE *table, + bool (*find_func) (Field *, void *), void *data) +{ + for (uint keynr= 0; keynr < table->s->keys; keynr++) + { + if (keynr == table->s->primary_key || + (table->key_info[keynr].flags & HA_NOSAME)) + { + KEY *keyinfo= table->key_info + keynr; + KEY_PART_INFO *key_part, *key_part_end; + + for (key_part=keyinfo->key_part, + key_part_end=key_part+ keyinfo->user_defined_key_parts; + key_part < key_part_end; + key_part++) + { + if (key_part->field->maybe_null() || + !find_func(key_part->field, data)) + break; + } + if (key_part == key_part_end) + return 1; + } + } + return 0; +} + + +/** + Helper function for list_contains_unique_index. + Find a field reference in a list of ORDER structures. + Finds a direct reference of the Field in the list. + + @param field The field to search for. + @param data ORDER *.The list to search in + + @retval + 1 found + @retval + 0 not found. +*/ + +static bool +find_field_in_order_list (Field *field, void *data) +{ + ORDER *group= (ORDER *) data; + bool part_found= 0; + for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next) + { + Item *item= (*tmp_group->item)->real_item(); + if (item->type() == Item::FIELD_ITEM && + ((Item_field*) item)->field->eq(field)) + { + part_found= 1; + break; + } + } + return part_found; +} + + +/** + Helper function for list_contains_unique_index. + Find a field reference in a dynamic list of Items. + Finds a direct reference of the Field in the list. + + @param[in] field The field to search for. + @param[in] data List *.The list to search in + + @retval + 1 found + @retval + 0 not found. +*/ + +static bool +find_field_in_item_list (Field *field, void *data) +{ + List *fields= (List *) data; + bool part_found= 0; + List_iterator li(*fields); + Item *item; + + while ((item= li++)) + { + if (item->real_item()->type() == Item::FIELD_ITEM && + ((Item_field*) (item->real_item()))->field->eq(field)) + { + part_found= 1; + break; + } + } + return part_found; +} + + +/* + Fill *col_keys with a union of Field::part_of_sortkey of all fields + that belong to 'table' and are equal to 'item_field'. +*/ + +void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, + Item_field *item_field, + key_map *col_keys) +{ + col_keys->clear_all(); + col_keys->merge(item_field->field->part_of_sortkey); + + if (!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP)) + return; + + Item_equal *item_eq= NULL; + + if (item_field->item_equal) + { + /* + The item_field is from ORDER structure, but it already has an item_equal + pointer set (UseMultipleEqualitiesToRemoveTempTable code have set it) + */ + item_eq= item_field->item_equal; + } + else + { + /* + Walk through join's muliple equalities and find the one that contains + item_field. + */ + if (!join->cond_equal) + return; + table_map needed_tbl_map= item_field->used_tables() | table->map; + List_iterator li(join->cond_equal->current_level); + Item_equal *cur_item_eq; + while ((cur_item_eq= li++)) + { + if ((cur_item_eq->used_tables() & needed_tbl_map) && + cur_item_eq->contains(item_field->field)) + { + item_eq= cur_item_eq; + item_field->item_equal= item_eq; // Save the pointer to our Item_equal. + break; + } + } + } + + if (item_eq) + { + Item_equal_fields_iterator it(*item_eq); + Item *item; + /* Loop through other members that belong to table table */ + while ((item= it++)) + { + if (item->type() == Item::FIELD_ITEM && + ((Item_field*)item)->field->table == table) + { + col_keys->merge(((Item_field*)item)->field->part_of_sortkey); + } + } + } +} + + +/** + Test if we can skip the ORDER BY by using an index. + + If we can use an index, the JOIN_TAB / tab->select struct + is changed to use the index. + + The index must cover all fields in , or it will not be considered. + + @param no_changes No changes will be made to the query plan. + @param fatal_error OUT A fatal error occurred + + @todo + - sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + + @retval + 0 We have to use filesort to do the sorting + @retval + 1 We can use an index. +*/ + +static bool +test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, + bool no_changes, const key_map *map, bool *fatal_error) +{ + int ref_key; + uint UNINIT_VAR(ref_key_parts); + int order_direction= 0; + uint used_key_parts= 0; + TABLE *table=tab->table; + SQL_SELECT *select=tab->select; + key_map usable_keys; + QUICK_SELECT_I *save_quick= select ? select->quick : 0; + Item *orig_cond= 0; + bool orig_cond_saved= false; + int best_key= -1; + bool changed_key= false; + THD *thd= tab->join->thd; + Json_writer_object trace_wrapper(thd); + Json_writer_array trace_arr(thd, "test_if_skip_sort_order"); + DBUG_ENTER("test_if_skip_sort_order"); + + *fatal_error= false; + /* Check that we are always called with first non-const table */ + DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables); + + /* Sorting a single row can always be skipped */ + if (tab->type == JT_EQ_REF || + tab->type == JT_CONST || + tab->type == JT_SYSTEM) + { + Json_writer_object trace_skip(thd); + trace_skip.add("skipped", "single row access method"); + DBUG_RETURN(1); + } + + /* + Keys disabled by ALTER TABLE ... DISABLE KEYS should have already + been taken into account. + */ + usable_keys= *map; + + /* Find indexes that cover all ORDER/GROUP BY fields */ + for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= (*tmp_order->item)->real_item(); + if (item->type() != Item::FIELD_ITEM) + { + usable_keys.clear_all(); + DBUG_RETURN(0); + } + + /* + Take multiple-equalities into account. Suppose we have + ORDER BY col1, col10 + and there are + multiple-equal(col1, col2, col3), + multiple-equal(col10, col11). + + Then, + - when item=col1, we find the set of indexes that cover one of {col1, + col2, col3} + - when item=col10, we find the set of indexes that cover one of {col10, + col11} + + And we compute an intersection of these sets to find set of indexes that + cover all ORDER BY components. + */ + key_map col_keys; + compute_part_of_sort_key_for_equals(tab->join, table, (Item_field*)item, + &col_keys); + usable_keys.intersect(col_keys); + if (usable_keys.is_clear_all()) + goto use_filesort; // No usable keys + } + + ref_key= -1; + /* Test if constant range in WHERE */ + if (tab->ref.key >= 0 && tab->ref.key_parts) + { + ref_key= tab->ref.key; + ref_key_parts= tab->ref.key_parts; + /* + todo: why does JT_REF_OR_NULL mean filesort? We could find another index + that satisfies the ordering. I would just set ref_key=MAX_KEY here... + */ + if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT || + tab->ref.uses_splitting) + goto use_filesort; + } + else if (select && select->quick) // Range found by opt_range + { + int quick_type= select->quick->get_type(); + /* + assume results are not ordered when index merge is used + TODO: sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + */ + + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) + { + /* + we set ref_key=MAX_KEY instead of -1, because test_if_cheaper ordering + assumes that "ref_key==-1" means doing full index scan. + (This is not very straightforward and we got into this situation for + historical reasons. Should be fixed at some point). + */ + ref_key= MAX_KEY; + } + else + { + ref_key= select->quick->index; + ref_key_parts= select->quick->used_key_parts; + } + } + + if (ref_key >= 0 && ref_key != MAX_KEY) + { + /* Current access method uses index ref_key with ref_key_parts parts */ + if (!usable_keys.is_set(ref_key)) + { + /* However, ref_key doesn't match the needed ordering */ + uint new_ref_key; + + /* + If using index only read, only consider other possible index only + keys + */ + if (table->covering_keys.is_set(ref_key)) + usable_keys.intersect(table->covering_keys); + if (tab->pre_idx_push_select_cond) + { + orig_cond= tab->set_cond(tab->pre_idx_push_select_cond); + orig_cond_saved= true; + } + + if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts, + &usable_keys)) < MAX_KEY) + { + /* + Index new_ref_key + - produces the required ordering, + - also has the same columns as ref_key for #ref_key_parts (this + means we will read the same number of rows as with ref_key). + */ + + /* + If new_ref_key allows to construct a quick select which uses more key + parts than ref(new_ref_key) would, do that. + + Otherwise, construct a ref access (todo: it's not clear what is the + win in using ref access when we could use quick select also?) + */ + if ((table->opt_range_keys.is_set(new_ref_key) && + table->opt_range[new_ref_key].key_parts > ref_key_parts) || + !(tab->ref.key >= 0)) + { + /* + The range optimizer constructed QUICK_RANGE for ref_key, and + we want to use instead new_ref_key as the index. We can't + just change the index of the quick select, because this may + result in an inconsistent QUICK_SELECT object. Below we + create a new QUICK_SELECT from scratch so that all its + parameters are set correctly by the range optimizer. + */ + key_map new_ref_key_map; + COND *save_cond; + quick_select_return res; + new_ref_key_map.clear_all(); // Force the creation of quick select + new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key. + + /* Reset quick; This will be restored in 'use_filesort' if needed */ + select->quick= 0; + save_cond= select->cond; + if (select->pre_idx_push_select_cond) + select->cond= select->pre_idx_push_select_cond; + res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0, + (tab->join->select_options & + OPTION_FOUND_ROWS) ? + HA_POS_ERROR : + tab->join->unit-> + lim.get_select_limit(), + TRUE, TRUE, FALSE, FALSE); + // if we cannot use quick select + if (res != SQL_SELECT::OK || !tab->select->quick) + { + if (res == SQL_SELECT::ERROR) + *fatal_error= true; + select->cond= save_cond; + goto use_filesort; + } + tab->type= JT_ALL; + tab->ref.key= -1; + tab->ref.key_parts= 0; + tab->use_quick= 1; + best_key= new_ref_key; + /* + We don't restore select->cond as we want to use the + original condition as index condition pushdown is not + active for the new index. + todo: why not perform index condition pushdown for the new index? + */ + } + else + { + /* + We'll use ref access method on key new_ref_key. In general case + the index search tuple for new_ref_key will be different (e.g. + when one index is defined as (part1, part2, ...) and another as + (part1, part2(N), ...) and the WHERE clause contains + "part1 = const1 AND part2=const2". + So we build tab->ref from scratch here. + */ + KEYUSE *keyuse= tab->keyuse; + while (keyuse->key != new_ref_key && keyuse->table == tab->table) + keyuse++; + if (create_ref_for_key(tab->join, tab, keyuse, FALSE, + (tab->join->const_table_map | + OUTER_REF_TABLE_BIT))) + goto use_filesort; + + pick_table_access_method(tab); + } + + ref_key= new_ref_key; + changed_key= true; + } + } + /* Check if we get the rows in requested sorted order by using the key */ + if (usable_keys.is_set(ref_key) && + (order_direction= test_if_order_by_key(tab->join, order,table,ref_key, + &used_key_parts))) + goto check_reverse_order; + } + { + uint UNINIT_VAR(best_key_parts); + uint saved_best_key_parts= 0; + int best_key_direction= 0; + JOIN *join= tab->join; + ha_rows table_records= table->stat_records(); + + test_if_cheaper_ordering(tab, order, table, usable_keys, + ref_key, select_limit, + &best_key, &best_key_direction, + &select_limit, &best_key_parts, + &saved_best_key_parts); + + /* + filesort() and join cache are usually faster than reading in + index order and not using join cache, except in case that chosen + index is clustered key. + */ + if (best_key < 0 || + ((select_limit >= table_records) && + (tab->type == JT_ALL && + tab->join->table_count > tab->join->const_tables + 1) && + !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX))) + goto use_filesort; + + if (select && // psergey: why doesn't this use a quick? + table->opt_range_keys.is_set(best_key) && best_key != ref_key) + { + key_map tmp_map; + tmp_map.clear_all(); // Force the creation of quick select + tmp_map.set_bit(best_key); // only best_key. + select->quick= 0; + + bool cond_saved= false; + Item *saved_cond; + + /* + Index Condition Pushdown may have removed parts of the condition for + this table. Temporarily put them back because we want the whole + condition for the range analysis. + */ + if (select->pre_idx_push_select_cond) + { + saved_cond= select->cond; + select->cond= select->pre_idx_push_select_cond; + cond_saved= true; + } + + quick_select_return res; + res = select->test_quick_select(join->thd, tmp_map, 0, + join->select_options & OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->lim.get_select_limit(), + TRUE, FALSE, FALSE, FALSE); + if (res == SQL_SELECT::ERROR) + { + *fatal_error= true; + goto use_filesort; + } + + if (cond_saved) + select->cond= saved_cond; + } + order_direction= best_key_direction; + /* + saved_best_key_parts is actual number of used keyparts found by the + test_if_order_by_key function. It could differ from keyinfo->user_defined_key_parts, + thus we have to restore it in case of desc order as it affects + QUICK_SELECT_DESC behaviour. + */ + used_key_parts= (order_direction == -1) ? + saved_best_key_parts : best_key_parts; + changed_key= true; + } + +check_reverse_order: + DBUG_ASSERT(order_direction != 0); + + if (order_direction == -1) // If ORDER BY ... DESC + { + int quick_type; + if (select && select->quick) + { + /* + Don't reverse the sort order, if it's already done. + (In some cases test_if_order_by_key() can be called multiple times + */ + if (select->quick->reverse_sorted()) + goto skipped_filesort; + + quick_type= select->quick->get_type(); + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + tab->limit= 0; + goto use_filesort; // Use filesort + } + } + } + + /* + Update query plan with access pattern for doing ordered access + according to what we have decided above. + */ + if (!no_changes) // We are allowed to update QEP + { + if (best_key >= 0) + { + bool quick_created= + (select && select->quick && select->quick!=save_quick); + + if (!quick_created) + { + if (select) // Throw any existing quick select + select->quick= 0; // Cleanup either reset to save_quick, + // or 'delete save_quick' + tab->index= best_key; + tab->read_first_record= order_direction > 0 ? + join_read_first:join_read_last; + tab->type=JT_NEXT; // Read with index_first(), index_next() + + /* + Currently usage of rowid filters is not supported in InnoDB + if the table is accessed by the primary key + */ + if (tab->rowid_filter && + table->file->is_clustering_key(tab->index)) + { + tab->range_rowid_filter_info= 0; + delete tab->rowid_filter; + tab->rowid_filter= 0; + } + + if (tab->pre_idx_push_select_cond) + { + tab->set_cond(tab->pre_idx_push_select_cond); + /* + orig_cond is a part of pre_idx_push_cond, + no need to restore it. + */ + orig_cond= 0; + orig_cond_saved= false; + } + + table->file->ha_index_or_rnd_end(); + if (tab->join->select_options & SELECT_DESCRIBE) + { + tab->ref.key= -1; + tab->ref.key_parts= 0; + if (select_limit < table->stat_records()) + tab->limit= select_limit; + } + } + else if (tab->type != JT_ALL || tab->select->quick) + { + /* + We're about to use a quick access to the table. + We need to change the access method so as the quick access + method is actually used. + */ + DBUG_ASSERT(tab->select->quick); + tab->type=JT_ALL; + tab->use_quick=1; + tab->ref.key= -1; + tab->ref.key_parts=0; // Don't use ref key. + tab->range_rowid_filter_info= 0; + if (tab->rowid_filter) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + } + tab->read_first_record= join_init_read_record; + if (tab->is_using_loose_index_scan()) + tab->join->tmp_table_param.precomputed_group_by= TRUE; + + /* + Restore the original condition as changes done by pushdown + condition are not relevant anymore + */ + if (tab->select && tab->select->pre_idx_push_select_cond) + { + tab->set_cond(tab->select->pre_idx_push_select_cond); + tab->table->file->cancel_pushed_idx_cond(); + } + /* + TODO: update the number of records in join->best_positions[tablenr] + */ + } + } // best_key >= 0 + + if (order_direction == -1) // If ORDER BY ... DESC + { + if (select && select->quick) + { + /* ORDER BY range_key DESC */ + QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts); + if (!tmp) + { + tab->limit= 0; + goto use_filesort; // Reverse sort failed -> filesort + } + /* + Cancel Pushed Index Condition, as it doesn't work for reverse scans. + */ + if (tab->select && tab->select->pre_idx_push_select_cond) + { + tab->set_cond(tab->select->pre_idx_push_select_cond); + tab->table->file->cancel_pushed_idx_cond(); + } + if (select->quick == save_quick) + save_quick= 0; // make_reverse() consumed it + select->set_quick(tmp); + /* Cancel "Range checked for each record" */ + if (tab->use_quick == 2) + { + tab->use_quick= 1; + tab->read_first_record= join_init_read_record; + } + } + else if (tab->type != JT_NEXT && tab->type != JT_REF_OR_NULL && + tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts) + { + /* + SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC + + Use a traversal function that starts by reading the last row + with key part (A) and then traverse the index backwards. + */ + tab->read_first_record= join_read_last_key; + tab->read_record.read_record_func= join_read_prev_same; + /* Cancel "Range checked for each record" */ + if (tab->use_quick == 2) + { + tab->use_quick= 1; + tab->read_first_record= join_init_read_record; + } + /* + Cancel Pushed Index Condition, as it doesn't work for reverse scans. + */ + if (tab->select && tab->select->pre_idx_push_select_cond) + { + tab->set_cond(tab->select->pre_idx_push_select_cond); + tab->table->file->cancel_pushed_idx_cond(); + } + } + } + else if (select && select->quick) + { + /* Cancel "Range checked for each record" */ + if (tab->use_quick == 2) + { + tab->use_quick= 1; + tab->read_first_record= join_init_read_record; + } + select->quick->need_sorted_output(); + } + + if (tab->type == JT_EQ_REF) + tab->read_record.unlock_row= join_read_key_unlock_row; + else if (tab->type == JT_CONST) + tab->read_record.unlock_row= join_const_unlock_row; + else + tab->read_record.unlock_row= rr_unlock_row; + + } // QEP has been modified + + /* + Cleanup: + We may have both a 'select->quick' and 'save_quick' (original) + at this point. Delete the one that we wan't use. + */ + +skipped_filesort: + // Keep current (ordered) select->quick + if (select && save_quick != select->quick) + { + delete save_quick; + save_quick= NULL; + } + if (orig_cond_saved && !changed_key) + tab->set_cond(orig_cond); + if (!no_changes && changed_key && table->file->pushed_idx_cond) + table->file->cancel_pushed_idx_cond(); + + DBUG_RETURN(1); + +use_filesort: + // Restore original save_quick + if (select && select->quick != save_quick) + { + delete select->quick; + select->quick= save_quick; + } + if (orig_cond_saved) + tab->set_cond(orig_cond); + + DBUG_RETURN(0); +} + + +/* + If not selecting by given key, create an index how records should be read + + SYNOPSIS + create_sort_index() + thd Thread handler + join Join with table to sort + join_tab What table to sort + fsort Filesort object. NULL means "use tab->filesort". + + IMPLEMENTATION + - If there is an index that can be used, the first non-const join_tab in + 'join' is modified to use this index. + - If no index, create with filesort() an index file that can be used to + retrieve rows in order (should be done with 'read_record'). + The sorted data is stored in tab->filesort + + RETURN VALUES + 0 ok + -1 Some fatal error + 1 No records +*/ + +int +create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort) +{ + TABLE *table; + SQL_SELECT *select; + bool quick_created= FALSE; + SORT_INFO *file_sort= 0; + DBUG_ENTER("create_sort_index"); + + if (fsort == NULL) + fsort= tab->filesort; + + table= tab->table; + select= fsort->select; + + table->status=0; // May be wrong if quick_select + + if (!tab->preread_init_done && tab->preread_init()) + goto err; + + // If table has a range, move it to select + if (select && tab->ref.key >= 0) + { + if (!select->quick) + { + if (tab->quick) + { + select->quick= tab->quick; + tab->quick= NULL; + /* + We can only use 'Only index' if quick key is same as ref_key + and in index_merge 'Only index' cannot be used + */ + if (((uint) tab->ref.key != select->quick->index)) + table->file->ha_end_keyread(); + } + else + { + /* + We have a ref on a const; Change this to a range that filesort + can use. + For impossible ranges (like when doing a lookup on NULL on a NOT NULL + field, quick will contain an empty record set. + */ + if (!(select->quick= (tab->type == JT_FT ? + get_ft_select(thd, table, tab->ref.key) : + get_quick_select_for_ref(thd, table, &tab->ref, + tab->found_records)))) + goto err; + quick_created= TRUE; + } + fsort->own_select= true; + } + else + { + DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF); + // Update ref value + if (unlikely(cp_buffer_from_ref(thd, table, &tab->ref) && + thd->is_error())) + goto err; // out of memory + } + } + + + /* Fill schema tables with data before filesort if it's necessary */ + if ((join->select_lex->options & OPTION_SCHEMA_TABLE) && + unlikely(get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))) + goto err; + + if (table->s->tmp_table) + table->file->info(HA_STATUS_VARIABLE); // Get record count + fsort->accepted_rows= &join->accepted_rows; // For ROWNUM + file_sort= filesort(thd, table, fsort, fsort->tracker, join, tab->table->map); + DBUG_ASSERT(tab->filesort_result == 0); + tab->filesort_result= file_sort; + tab->records= 0; + if (file_sort) + { + tab->records= join->select_options & OPTION_FOUND_ROWS ? + file_sort->found_rows : file_sort->return_rows; + tab->join->join_examined_rows+= file_sort->examined_rows; + } + + if (quick_created) + { + /* This will delete the quick select. */ + select->cleanup(); + } + + table->file->ha_end_keyread(); + if (tab->type == JT_FT) + table->file->ha_ft_end(); + else + table->file->ha_index_or_rnd_end(); + + DBUG_RETURN(file_sort == 0); +err: + DBUG_RETURN(-1); +} + + +/** + Compare fields from table->record[0] and table->record[1], + possibly skipping few first fields. + + @param table + @param ptr field to start the comparison from, + somewhere in the table->field[] array + + @retval 1 different + @retval 0 identical +*/ +static bool compare_record(TABLE *table, Field **ptr) +{ + for (; *ptr ; ptr++) + { + Field *f= *ptr; + if (f->is_null() != f->is_null(table->s->rec_buff_length) || + (!f->is_null() && f->cmp_offset(table->s->rec_buff_length))) + return 1; + } + return 0; +} + +static bool copy_blobs(Field **ptr) +{ + for (; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + if (((Field_blob *) (*ptr))->copy()) + return 1; // Error + } + return 0; +} + +static void free_blobs(Field **ptr) +{ + for (; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + ((Field_blob *) (*ptr))->free(); + } +} + + +/* + @brief + Remove duplicates from a temporary table. + + @detail + Remove duplicate rows from a temporary table. This is used for e.g. queries + like + + select distinct count(*) as CNT from tbl group by col + + Here, we get a group table with count(*) values. It is not possible to + prevent duplicates from appearing in the table (as we don't know the values + before we've done the grouping). Because of that, we have this function to + scan the temptable (maybe, multiple times) and remove the duplicate rows + + Rows that do not satisfy 'having' condition are also removed. +*/ + +bool +JOIN_TAB::remove_duplicates() + +{ + bool error; + ulong keylength= 0, sort_field_keylength= 0; + uint field_count, item_count; + List *fields= (this-1)->fields; + Item *item; + THD *thd= join->thd; + SORT_FIELD *sortorder, *sorder; + DBUG_ENTER("remove_duplicates"); + + DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE); + THD_STAGE_INFO(join->thd, stage_removing_duplicates); + + if (!(sortorder= (SORT_FIELD*) my_malloc(PSI_INSTRUMENT_ME, + (fields->elements+1) * + sizeof(SORT_FIELD), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + + /* Calculate how many saved fields there is in list */ + field_count= item_count= 0; + + List_iterator it(*fields); + for (sorder= sortorder ; (item=it++) ;) + { + if (!item->const_item()) + { + if (item->get_tmp_table_field()) + { + /* Field is stored in temporary table, skipp */ + field_count++; + } + else + { + /* Item is not stored in temporary table, remember it */ + sorder->item= item; + sorder->type= sorder->item->type_handler()->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + /* Calculate sorder->length */ + item->type_handler()->sort_length(thd, item, sorder); + sorder++; + item_count++; + } + } + } + sorder->item= 0; // End marker + + if ((field_count + item_count == 0) && ! having && + !(join->select_options & OPTION_FOUND_ROWS)) + { + // only const items with no OPTION_FOUND_ROWS + join->unit->lim.send_first_row(); // Only send first row + my_free(sortorder); + DBUG_RETURN(false); + } + + /* + The table contains first fields that will be in the output, then + temporary results pointed to by the fields list. + Example: SELECT DISTINCT sum(a), sum(d) > 2 FROM ... + In this case the temporary table contains sum(a), sum(d). + */ + + Field **first_field=table->field+table->s->fields - field_count; + for (Field **ptr=first_field; *ptr; ptr++) + keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null(); + for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++) + sort_field_keylength+= ptr->length + (ptr->item->maybe_null() ? 1 : 0); + + /* + Disable LIMIT ROWS EXAMINED in order to avoid interrupting prematurely + duplicate removal, and produce a possibly incomplete query result. + */ + thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX; + if (thd->killed == ABORT_QUERY) + thd->reset_killed(); + + table->file->info(HA_STATUS_VARIABLE); + table->reginfo.lock_type=TL_WRITE; + + if (table->s->db_type() == heap_hton || + (!table->s->blob_fields && + ((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records < + thd->variables.sortbuff_size))) + error= remove_dup_with_hash_index(join->thd, table, field_count, + first_field, sortorder, + keylength + sort_field_keylength, having); + else + error=remove_dup_with_compare(join->thd, table, first_field, sortorder, + sort_field_keylength, having); + + if (join->select_lex != join->select_lex->master_unit()->fake_select_lex) + thd->lex->set_limit_rows_examined(); + free_blobs(first_field); + my_free(sortorder); + DBUG_RETURN(error); +} + + +/* + Create a sort/compare key from items + + Key is of fixed length and binary comparable +*/ + +static uchar *make_sort_key(SORT_FIELD *sortorder, uchar *key_buffer, + String *tmp_value) +{ + for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++) + { + ptr->item->type_handler()->make_sort_key_part(key_buffer, + ptr->item, + ptr, tmp_value); + key_buffer+= (ptr->item->maybe_null() ? 1 : 0) + ptr->length; + } + return key_buffer; +} + + +/* + Remove duplicates by comparing all rows with all other rows + + @param thd THD + @param table Temporary table + @param first_field Pointer to fields in temporary table that are part of + distinct, ends with null pointer + @param sortorder An array of Items part of distsinct. Terminated with an + element N with sortorder[N]->item=NULL. + @param keylength Length of key produced by sortorder + @param having Having expression (NULL if no having) +*/ + +static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, + SORT_FIELD *sortorder, ulong keylength, + Item *having) +{ + handler *file=table->file; + uchar *record=table->record[0], *key_buffer, *key_buffer2; + char *tmp_buffer; + int error; + String tmp_value; + DBUG_ENTER("remove_dup_with_compare"); + + if (unlikely(!my_multi_malloc(PSI_INSTRUMENT_ME, + MYF(MY_WME), + &key_buffer, keylength, + &key_buffer2, keylength, + &tmp_buffer, keylength+1, + NullS))) + DBUG_RETURN(1); + tmp_value.set(tmp_buffer, keylength, &my_charset_bin); + + if (unlikely(file->ha_rnd_init_with_error(1))) + DBUG_RETURN(1); + + error= file->ha_rnd_next(record); + for (;;) + { + if (unlikely(thd->check_killed())) + { + error= 1; + goto end; + } + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + if (having && !having->val_int()) + { + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + error= file->ha_rnd_next(record); + continue; + } + if (unlikely(copy_blobs(first_field))) + { + my_message(ER_OUTOFMEMORY, ER_THD(thd,ER_OUTOFMEMORY), + MYF(ME_FATAL)); + error= 1; + goto end; + } + make_sort_key(sortorder, key_buffer, &tmp_value); + store_record(table,record[1]); + + /* Read through rest of file and mark duplicated rows deleted */ + bool found=0; + for (;;) + { + if (unlikely((error= file->ha_rnd_next(record)))) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + make_sort_key(sortorder, key_buffer2, &tmp_value); + if (compare_record(table, first_field) == 0 && + (!keylength || + memcmp(key_buffer, key_buffer2, keylength) == 0)) + { + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + } + else if (!found) + { + found=1; + if (unlikely((error= file->remember_rnd_pos()))) + goto err; + } + } + if (!found) + break; // End of file + /* Restart search on saved row */ + if (unlikely((error= file->restart_rnd_next(record)))) + goto err; + } + + error= 0; +end: + my_free(key_buffer); + file->extra(HA_EXTRA_NO_CACHE); + (void) file->ha_rnd_end(); + DBUG_RETURN(error); + +err: + DBUG_ASSERT(error); + file->print_error(error,MYF(0)); + goto end; +} + + +/** + Generate a hash index for each row to quickly find duplicate rows. + + @param thd THD + @param table Temporary table + @param field_count Number of fields part of distinct + @param first_field Pointer to fields in temporary table that are part of + distinct, ends with null pointer + @param sortorder An array of Items part of distsinct. Terminated with an + element N with sortorder[N]->item=NULL. + @param keylength Length of hash key + @param having Having expression (NULL if no having) + + @note + Note that this will not work on tables with blobs! +*/ + +static int remove_dup_with_hash_index(THD *thd, TABLE *table, + uint field_count, + Field **first_field, + SORT_FIELD *sortorder, + ulong key_length, + Item *having) +{ + uchar *key_buffer, *key_pos, *record=table->record[0]; + char *tmp_buffer; + int error; + handler *file= table->file; + ulong extra_length= ALIGN_SIZE(key_length)-key_length; + uint *field_lengths, *field_length; + HASH hash; + String tmp_value; + DBUG_ENTER("remove_dup_with_hash_index"); + + if (!my_multi_malloc(key_memory_hash_index_key_buffer, MYF(MY_WME), + &key_buffer, + (uint) ((key_length + extra_length) * + (long) file->stats.records), + &field_lengths, + (uint) (field_count*sizeof(*field_lengths)), + &tmp_buffer, key_length+1, + NullS)) + DBUG_RETURN(1); + + tmp_value.set(tmp_buffer, key_length, &my_charset_bin); + field_length= field_lengths; + for (Field **ptr= first_field ; *ptr ; ptr++) + (*field_length++)= (*ptr)->sort_length(); + + if (my_hash_init(key_memory_hash_index_key_buffer, &hash, &my_charset_bin, + (uint) file->stats.records, 0, key_length, + (my_hash_get_key) 0, 0, 0)) + { + my_free(key_buffer); + DBUG_RETURN(1); + } + + if (unlikely((error= file->ha_rnd_init(1)))) + goto err; + + key_pos= key_buffer; + for (;;) + { + uchar *org_key_pos; + if (unlikely(thd->check_killed())) + { + error=0; + goto err; + } + if (unlikely((error= file->ha_rnd_next(record)))) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + if (having && !having->val_int()) + { + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + continue; + } + + /* copy fields to key buffer */ + org_key_pos= key_pos; + field_length=field_lengths; + for (Field **ptr= first_field ; *ptr ; ptr++) + { + (*ptr)->make_sort_key_part(key_pos, *field_length); + key_pos+= (*ptr)->maybe_null() + *field_length++; + } + /* Copy result fields not stored in table to key buffer */ + key_pos= make_sort_key(sortorder, key_pos, &tmp_value); + + /* Check if it exists before */ + if (my_hash_search(&hash, org_key_pos, key_length)) + { + /* Duplicated found ; Remove the row */ + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + } + else + { + if (my_hash_insert(&hash, org_key_pos)) + goto err; + } + key_pos+=extra_length; + } + my_free(key_buffer); + my_hash_free(&hash); + file->extra(HA_EXTRA_NO_CACHE); + (void) file->ha_rnd_end(); + DBUG_RETURN(0); + +err: + my_free(key_buffer); + my_hash_free(&hash); + file->extra(HA_EXTRA_NO_CACHE); + (void) file->ha_rnd_end(); + if (unlikely(error)) + file->print_error(error,MYF(0)); + DBUG_RETURN(1); +} + + +/* + eq_ref: Create the lookup key and check if it is the same as saved key + + SYNOPSIS + cmp_buffer_with_ref() + tab Join tab of the accessed table + table The table to read. This is usually tab->table, except for + semi-join when we might need to make a lookup in a temptable + instead. + tab_ref The structure with methods to collect index lookup tuple. + This is usually table->ref, except for the case of when we're + doing lookup into semi-join materialization table. + + DESCRIPTION + Used by eq_ref access method: create the index lookup key and check if + we've used this key at previous lookup (If yes, we don't need to repeat + the lookup - the record has been already fetched) + + RETURN + TRUE No cached record for the key, or failed to create the key (due to + out-of-domain error) + FALSE The created key is the same as the previous one (and the record + is already in table->record) +*/ + +static bool +cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref) +{ + bool no_prev_key; + if (!tab_ref->disable_cache) + { + if (!(no_prev_key= tab_ref->key_err)) + { + /* Previous access found a row. Copy its key */ + memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length); + } + } + else + no_prev_key= TRUE; + if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) || + no_prev_key) + return 1; + return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length) + != 0; +} + + +bool +cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref) +{ + enum_check_fields org_count_cuted_fields= thd->count_cuted_fields; + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set); + bool result= 0; + key_part_map map= 1; + + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + for (store_key **copy=ref->key_copy ; *copy ; copy++, map <<= 1) + { + while (map & ref->const_ref_part_map) // skip const ref parts + map <<= 1; // no store_key objects for them + if ((*copy)->copy(thd) & 1 || + ((ref->null_rejecting & map) && (*copy)->null_key)) + { + result= 1; + break; + } + } + thd->count_cuted_fields= org_count_cuted_fields; + dbug_tmp_restore_column_map(&table->write_set, old_map); + return result; +} + + +/***************************************************************************** + Group and order functions +*****************************************************************************/ + +/** + Resolve an ORDER BY or GROUP BY column reference. + + Given a column reference (represented by 'order') from a GROUP BY or ORDER + BY clause, find the actual column it represents. If the column being + resolved is from the GROUP BY clause, the procedure searches the SELECT + list 'fields' and the columns in the FROM list 'tables'. If 'order' is from + the ORDER BY clause, only the SELECT list is being searched. + + If 'order' is resolved to an Item, then order->item is set to the found + Item. If there is no item for the found column (that is, it was resolved + into a table field), order->item is 'fixed' and is added to all_fields and + ref_pointer_array. + + ref_pointer_array and all_fields are updated. + + @param[in] thd Pointer to current thread structure + @param[in,out] ref_pointer_array All select, group and order by fields + @param[in] tables List of tables to search in (usually + FROM clause) + @param[in] order Column reference to be resolved + @param[in] fields List of fields to search in (usually + SELECT list) + @param[in,out] all_fields All select, group and order by fields + @param[in] is_group_field True if order is a GROUP field, false if + ORDER by field + @param[in] add_to_all_fields If the item is to be added to all_fields and + ref_pointer_array, this flag can be set to + false to stop the automatic insertion. + @param[in] from_window_spec If true then order is from a window spec + + @retval + FALSE if OK + @retval + TRUE if error occurred +*/ + +static bool +find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array, + TABLE_LIST *tables, + ORDER *order, List &fields, List &all_fields, + bool is_group_field, bool add_to_all_fields, + bool from_window_spec) +{ + Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */ + Item::Type order_item_type; + Item **select_item; /* The corresponding item from the SELECT clause. */ + Field *from_field; /* The corresponding field from the FROM clause. */ + uint counter; + enum_resolution_type resolution; + + if (order_item->is_order_clause_position() && !from_window_spec) + { /* Order by position */ + uint count; + if (order->counter_used) + count= order->counter; // counter was once resolved + else + count= (uint) order_item->val_int(); + if (!count || count > fields.elements) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), + order_item->full_name(), thd->where); + return TRUE; + } + thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]); + order->in_field_list= 1; + order->counter= count; + order->counter_used= 1; + return FALSE; + } + /* Lookup the current GROUP/ORDER field in the SELECT clause. */ + select_item= find_item_in_list(order_item, fields, &counter, + REPORT_EXCEPT_NOT_FOUND, &resolution); + if (!select_item) + return TRUE; /* The item is not unique, or some other error occurred. */ + + + /* Check whether the resolved field is not ambiguos. */ + if (select_item != not_found_item) + { + Item *view_ref= NULL; + /* + If we have found field not by its alias in select list but by its + original field name, we should additionally check if we have conflict + for this name (in case if we would perform lookup in all tables). + */ + if (resolution == RESOLVED_BEHIND_ALIAS && + order_item->fix_fields_if_needed_for_order_by(thd, order->item)) + return TRUE; + + /* Lookup the current GROUP field in the FROM clause. */ + order_item_type= order_item->type(); + from_field= (Field*) not_found_field; + if ((is_group_field && order_item_type == Item::FIELD_ITEM) || + order_item_type == Item::REF_ITEM) + { + from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables, + NULL, ignored_tables_list_t(NULL), + &view_ref, IGNORE_ERRORS, FALSE, FALSE); + if (!from_field) + from_field= (Field*) not_found_field; + } + + if (from_field == not_found_field || + (from_field != view_ref_found ? + /* it is field of base table => check that fields are same */ + ((*select_item)->type() == Item::FIELD_ITEM && + ((Item_field*) (*select_item))->field->eq(from_field)) : + /* + in is field of view table => check that references on translation + table are same + */ + ((*select_item)->type() == Item::REF_ITEM && + view_ref->type() == Item::REF_ITEM && + ((Item_ref *) (*select_item))->ref == + ((Item_ref *) view_ref)->ref))) + { + /* + If there is no such field in the FROM clause, or it is the same field + as the one found in the SELECT clause, then use the Item created for + the SELECT field. As a result if there was a derived field that + 'shadowed' a table field with the same name, the table field will be + chosen over the derived field. + */ + order->item= &ref_pointer_array[counter]; + order->in_field_list=1; + return FALSE; + } + else + { + /* + There is a field with the same name in the FROM clause. This + is the field that will be chosen. In this case we issue a + warning so the user knows that the field from the FROM clause + overshadows the column reference from the SELECT list. + */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NON_UNIQ_ERROR, + ER_THD(thd, ER_NON_UNIQ_ERROR), + ((Item_ident*) order_item)->field_name.str, + thd->where); + } + } + else if (from_window_spec) + { + Item **found_item= find_item_in_list(order_item, all_fields, &counter, + REPORT_EXCEPT_NOT_FOUND, &resolution, + all_fields.elements - fields.elements); + if (found_item != not_found_item) + { + order->item= &ref_pointer_array[all_fields.elements-1-counter]; + order->in_field_list= 0; + return FALSE; + } + } + + order->in_field_list=0; + /* + The call to order_item->fix_fields() means that here we resolve + 'order_item' to a column from a table in the list 'tables', or to + a column in some outer query. Exactly because of the second case + we come to this point even if (select_item == not_found_item), + inspite of that fix_fields() calls find_item_in_list() one more + time. + + We check order_item->fixed() because Item_func_group_concat can put + arguments for which fix_fields already was called. + */ + if (order_item->fix_fields_if_needed_for_order_by(thd, order->item) || + thd->is_error()) + return TRUE; /* Wrong field. */ + order_item= *order->item; // Item can change during fix_fields() + + if (!add_to_all_fields) + return FALSE; + + uint el= all_fields.elements; + /* Add new field to field list. */ + all_fields.push_front(order_item, thd->mem_root); + ref_pointer_array[el]= order_item; + /* + If the order_item is a SUM_FUNC_ITEM, when fix_fields is called + ref_by is set to order->item which is the address of order_item. + But this needs to be address of order_item in the all_fields list. + As a result, when it gets replaced with Item_aggregate_ref + object in Item::split_sum_func2, we will be able to retrieve the + newly created object. + */ + if (order_item->type() == Item::SUM_FUNC_ITEM) + ((Item_sum *)order_item)->ref_by= all_fields.head_ref(); + + order->item= &ref_pointer_array[el]; + return FALSE; +} + + +/** + Change order to point at item in select list. + + If item isn't a number and doesn't exits in the select list, add it the + the field list. +*/ + +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List &fields, List &all_fields, ORDER *order, + bool from_window_spec) +{ + SELECT_LEX *select = thd->lex->current_select; + enum_parsing_place context_analysis_place= + thd->lex->current_select->context_analysis_place; + thd->where="order clause"; + const bool for_union= select->master_unit()->is_unit_op() && + select == select->master_unit()->fake_select_lex; + for (uint number = 1; order; order=order->next, number++) + { + if (find_order_in_list(thd, ref_pointer_array, tables, order, fields, + all_fields, false, true, from_window_spec)) + return 1; + Item * const item= *order->item; + if (item->with_window_func() && context_analysis_place != IN_ORDER_BY) + { + my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0)); + return 1; + } + + /* + UNION queries cannot be used with an aggregate function in + an ORDER BY clause + */ + + if (for_union && (item->with_sum_func() || item->with_window_func())) + { + my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number); + return 1; + } + + if ((from_window_spec && item->with_sum_func() && + item->type() != Item::SUM_FUNC_ITEM) || item->with_window_func()) + { + item->split_sum_func(thd, ref_pointer_array, + all_fields, SPLIT_SUM_SELECT); + } + } + return 0; +} + + +/** + Intitialize the GROUP BY list. + + @param thd Thread handler + @param ref_pointer_array We store references to all fields that was + not in 'fields' here. + @param fields All fields in the select part. Any item in + 'order' that is part of these list is replaced + by a pointer to this fields. + @param all_fields Total list of all unique fields used by the + select. All items in 'order' that was not part + of fields will be added first to this list. + @param order The fields we should do GROUP/PARTITION BY on + @param hidden_group_fields Pointer to flag that is set to 1 if we added + any fields to all_fields. + @param from_window_spec If true then list is from a window spec + + @todo + change ER_WRONG_FIELD_WITH_GROUP to more detailed + ER_NON_GROUPING_FIELD_USED + + @retval + 0 ok + @retval + 1 error (probably out of memory) +*/ + +int +setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List &fields, List &all_fields, ORDER *order, + bool *hidden_group_fields, bool from_window_spec) +{ + enum_parsing_place context_analysis_place= + thd->lex->current_select->context_analysis_place; + *hidden_group_fields=0; + ORDER *ord; + + if (!order) + return 0; /* Everything is ok */ + + uint org_fields=all_fields.elements; + + thd->where="group statement"; + for (ord= order; ord; ord= ord->next) + { + if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields, + all_fields, true, true, from_window_spec)) + return 1; + (*ord->item)->marker= MARKER_UNDEF_POS; /* Mark found */ + if ((*ord->item)->with_sum_func() && context_analysis_place == IN_GROUP_BY) + { + my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name()); + return 1; + } + if ((*ord->item)->with_window_func()) + { + if (context_analysis_place == IN_GROUP_BY) + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + else + my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0)); + return 1; + } + if (from_window_spec && (*ord->item)->with_sum_func() && + (*ord->item)->type() != Item::SUM_FUNC_ITEM) + (*ord->item)->split_sum_func(thd, ref_pointer_array, + all_fields, SPLIT_SUM_SELECT); + } + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && + context_analysis_place == IN_GROUP_BY) + { + /* + Don't allow one to use fields that is not used in GROUP BY + For each select a list of field references that aren't under an + aggregate function is created. Each field in this list keeps the + position of the select list expression which it belongs to. + + First we check an expression from the select list against the GROUP BY + list. If it's found there then it's ok. It's also ok if this expression + is a constant or an aggregate function. Otherwise we scan the list + of non-aggregated fields and if we'll find at least one field reference + that belongs to this expression and doesn't occur in the GROUP BY list + we throw an error. If there are no fields in the created list for a + select list expression this means that all fields in it are used under + aggregate functions. + + Note that for items in the select list (fields), Item_field->markers + contains the position of the field in the select list. + */ + Item *item; + Item_field *field; + int cur_pos_in_select_list= 0; + List_iterator li(fields); + List_iterator naf_it(thd->lex->current_select->join->non_agg_fields); + + field= naf_it++; + while (field && (item=li++)) + { + if (item->type() != Item::SUM_FUNC_ITEM && + item->marker != MARKER_UNDEF_POS && + !item->const_item() && + !(item->real_item()->type() == Item::FIELD_ITEM && + item->used_tables() & OUTER_REF_TABLE_BIT)) + { + while (field) + { + /* Skip fields from previous expressions. */ + if (field->marker < cur_pos_in_select_list) + goto next_field; + /* Found a field from the next expression. */ + if (field->marker > cur_pos_in_select_list) + break; + /* + Check whether the field occur in the GROUP BY list. + Throw the error later if the field isn't found. + */ + for (ord= order; ord; ord= ord->next) + if ((*ord->item)->eq((Item*)field, 0)) + goto next_field; + /* + TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed + ER_NON_GROUPING_FIELD_USED + */ + my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), field->full_name()); + return 1; +next_field: + field= naf_it++; + } + } + cur_pos_in_select_list++; + } + } + if (org_fields != all_fields.elements) + *hidden_group_fields=1; // group fields is not used + return 0; +} + +/** + Add fields with aren't used at start of field list. + + @return + FALSE if ok +*/ + +static bool +setup_new_fields(THD *thd, List &fields, + List &all_fields, ORDER *new_field) +{ + Item **item; + uint counter; + enum_resolution_type not_used; + DBUG_ENTER("setup_new_fields"); + + thd->column_usage= MARK_COLUMNS_READ; // Not really needed, but... + for (; new_field ; new_field= new_field->next) + { + if ((item= find_item_in_list(*new_field->item, fields, &counter, + IGNORE_ERRORS, ¬_used))) + new_field->item=item; /* Change to shared Item */ + else + { + thd->where="procedure list"; + if ((*new_field->item)->fix_fields(thd, new_field->item)) + DBUG_RETURN(1); /* purecov: inspected */ + all_fields.push_front(*new_field->item, thd->mem_root); + new_field->item=all_fields.head_ref(); + } + } + DBUG_RETURN(0); +} + +/** + Create a group by that consist of all non const fields. + + Try to use the fields in the order given by 'order' to allow one to + optimize away 'order by'. + + @retval + 0 OOM error if thd->is_fatal_error is set. Otherwise group was eliminated + # Pointer to new group +*/ + +ORDER * +create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array, + ORDER *order_list, List &fields, + List &all_fields, + bool *all_order_by_fields_used) +{ + List_iterator li(fields); + Item *item; + Ref_ptr_array orig_ref_pointer_array= ref_pointer_array; + ORDER *order,*group,**prev; + uint idx= 0; + + *all_order_by_fields_used= 1; + while ((item=li++)) + item->marker= MARKER_UNUSED; /* Marker that field is not used */ + + prev= &group; group=0; + for (order=order_list ; order; order=order->next) + { + if (order->in_field_list) + { + ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER)); + if (!ord) + return 0; + *prev=ord; + prev= &ord->next; + (*ord->item)->marker= MARKER_FOUND_IN_ORDER; + } + else + *all_order_by_fields_used= 0; + } + + li.rewind(); + while ((item=li++)) + { + if (!item->const_item() && !item->with_sum_func() && + item->marker == MARKER_UNUSED) + { + /* + Don't put duplicate columns from the SELECT list into the + GROUP BY list. + */ + ORDER *ord_iter; + for (ord_iter= group; ord_iter; ord_iter= ord_iter->next) + if ((*ord_iter->item)->eq(item, 1)) + goto next_item; + + ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER)); + if (!ord) + return 0; + + if (item->type() == Item::FIELD_ITEM && + item->field_type() == MYSQL_TYPE_BIT) + { + /* + Because HEAP tables can't index BIT fields we need to use an + additional hidden field for grouping because later it will be + converted to a LONG field. Original field will remain of the + BIT type and will be returned [el]client. + */ + Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item); + if (!new_item) + return 0; + int el= all_fields.elements; + orig_ref_pointer_array[el]= new_item; + all_fields.push_front(new_item, thd->mem_root); + ord->item=&orig_ref_pointer_array[el]; + } + else + { + /* + We have here only field_list (not all_field_list), so we can use + simple indexing of ref_pointer_array (order in the array and in the + list are same) + */ + ord->item= &ref_pointer_array[idx]; + } + ord->direction= ORDER::ORDER_ASC; + *prev=ord; + prev= &ord->next; + } +next_item: + idx++; + } + *prev=0; + return group; +} + + +/** + Update join with count of the different type of fields. +*/ + +void +count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, + List &fields, bool reset_with_sum_func) +{ + List_iterator li(fields); + Item *field; + + param->field_count=param->sum_func_count=param->func_count= + param->hidden_field_count=0; + param->quick_group=1; + while ((field=li++)) + { + Item::Type real_type= field->real_item()->type(); + if (real_type == Item::FIELD_ITEM) + param->field_count++; + else if (real_type == Item::SUM_FUNC_ITEM) + { + if (! field->const_item()) + { + Item_sum *sum_item=(Item_sum*) field->real_item(); + if (!sum_item->depended_from() || + sum_item->depended_from() == select_lex) + { + if (!sum_item->quick_group) + param->quick_group=0; // UDF SUM function + param->sum_func_count++; + + for (uint i=0 ; i < sum_item->get_arg_count() ; i++) + { + if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM) + param->field_count++; + else + param->func_count++; + } + } + param->func_count++; + } + } + else + { + param->func_count++; + if (reset_with_sum_func) + field->with_flags&= ~item_with_t::SUM_FUNC; + } + } +} + + +/** + Return 1 if second is a subpart of first argument. + + SIDE EFFECT: + For all the first items in the group by list that match, the sort + direction of the GROUP BY items are set to the same as those given by the + ORDER BY. + The direction of the group does not matter if the ORDER BY clause overrides + it anyway. +*/ + +static bool +test_if_subpart(ORDER *group_by, ORDER *order_by) +{ + while (group_by && order_by) + { + if ((*group_by->item)->eq(*order_by->item, 1)) + group_by->direction= order_by->direction; + else + return 0; + group_by= group_by->next; + order_by= order_by->next; + } + return MY_TEST(!order_by); +} + +/** + Return table number if there is only one table in sort order + and group and order is compatible, else return 0. +*/ + +static TABLE * +get_sort_by_table(ORDER *a,ORDER *b, List &tables, + table_map const_tables) +{ + TABLE_LIST *table; + List_iterator ti(tables); + table_map map= (table_map) 0; + DBUG_ENTER("get_sort_by_table"); + + if (!a) + a=b; // Only one need to be given + else if (!b) + b=a; + + for (; a && b; a=a->next,b=b->next) + { + /* Skip elements of a that are constant */ + while (!((*a->item)->used_tables() & ~const_tables)) + { + if (!(a= a->next)) + break; + } + + /* Skip elements of b that are constant */ + while (!((*b->item)->used_tables() & ~const_tables)) + { + if (!(b= b->next)) + break; + } + + if (!a || !b) + break; + + if (!(*a->item)->eq(*b->item,1)) + DBUG_RETURN(0); + map|=a->item[0]->used_tables(); + } + if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))) + DBUG_RETURN(0); + + map&= ~const_tables; + while ((table= ti++) && !(map & table->table->map)) ; + if (map != table->table->map) + DBUG_RETURN(0); // More than one table + DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr)); + DBUG_RETURN(table->table); +} + + +/** + calc how big buffer we need for comparing group entries. +*/ + +void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) +{ + uint key_length=0, parts=0, null_parts=0; + + for (; group ; group=group->next) + { + Item *group_item= *group->item; + Field *field= group_item->get_tmp_table_field(); + if (field) + { + enum_field_types type; + if ((type= field->type()) == MYSQL_TYPE_BLOB) + key_length+=MAX_BLOB_WIDTH; // Can't be used as a key + else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING) + key_length+= field->field_length + HA_KEY_BLOB_LENGTH; + else if (type == MYSQL_TYPE_BIT) + { + /* Bit is usually stored as a longlong key for group fields */ + key_length+= 8; // Big enough + } + else + key_length+= field->pack_length(); + } + else + { + switch (group_item->cmp_type()) { + case REAL_RESULT: + key_length+= sizeof(double); + break; + case INT_RESULT: + key_length+= sizeof(longlong); + break; + case DECIMAL_RESULT: + key_length+= my_decimal_get_binary_size(group_item->max_length - + (group_item->decimals ? 1 : 0), + group_item->decimals); + break; + case TIME_RESULT: + { + /* + As items represented as DATE/TIME fields in the group buffer + have STRING_RESULT result type, we increase the length + by 8 as maximum pack length of such fields. + */ + key_length+= 8; + break; + } + case STRING_RESULT: + { + enum enum_field_types type= group_item->field_type(); + if (type == MYSQL_TYPE_BLOB) + key_length+= MAX_BLOB_WIDTH; // Can't be used as a key + else + { + /* + Group strings are taken as varstrings and require an length field. + A field is not yet created by create_tmp_field_ex() + and the sizes should match up. + */ + key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH; + } + break; + } + default: + /* This case should never be chosen */ + DBUG_ASSERT(0); + my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL)); + } + } + parts++; + if (group_item->maybe_null()) + null_parts++; + } + param->group_length= key_length + null_parts; + param->group_parts= parts; + param->group_null_parts= null_parts; +} + +static void calc_group_buffer(JOIN *join, ORDER *group) +{ + if (group) + join->group= 1; + calc_group_buffer(&join->tmp_table_param, group); +} + + +/** + allocate group fields or take prepared (cached). + + @param main_join join of current select + @param curr_join current join (join of current select or temporary copy + of it) + + @retval + 0 ok + @retval + 1 failed +*/ + +static bool +make_group_fields(JOIN *main_join, JOIN *curr_join) +{ + if (main_join->group_fields_cache.elements) + { + curr_join->group_fields= main_join->group_fields_cache; + curr_join->sort_and_group= 1; + } + else + { + if (alloc_group_fields(curr_join, curr_join->group_list)) + return (1); + main_join->group_fields_cache= curr_join->group_fields; + } + return (0); +} + +static bool +fill_cached_item_list(THD *thd, List *list, ORDER *order, + uint max_number_of_elements = UINT_MAX) +{ + for (; order && max_number_of_elements ; + order= order->next, max_number_of_elements--) + { + Cached_item *tmp= new_Cached_item(thd, *order->item, true); + if (!tmp || list->push_front(tmp)) + return true; + } + return false; +} + +/** + Get a list of buffers for saving last group. + + Groups are saved in reverse order for easier check loop. +*/ + +static bool +alloc_group_fields(JOIN *join, ORDER *group) +{ + if (fill_cached_item_list(join->thd, &join->group_fields, group)) + return true; + join->sort_and_group=1; /* Mark for do_select */ + return false; +} + +static bool +alloc_order_fields(JOIN *join, ORDER *order, uint max_number_of_elements) +{ + return fill_cached_item_list(join->thd, &join->order_fields, order, + max_number_of_elements); +} + + +/* + Test if a single-row cache of items changed, and update the cache. + + @details Test if a list of items that typically represents a result + row has changed. If the value of some item changed, update the cached + value for this item. + + @param list list of pairs stored as Cached_item. + + @return -1 if no item changed + @return index of the first item that changed +*/ + +int test_if_item_cache_changed(List &list) +{ + DBUG_ENTER("test_if_item_cache_changed"); + List_iterator li(list); + int idx= -1,i; + Cached_item *buff; + + for (i=(int) list.elements-1 ; (buff=li++) ; i--) + { + if (buff->cmp()) + idx=i; + } + DBUG_PRINT("info", ("idx: %d", idx)); + DBUG_RETURN(idx); +} + + +/* + @return + -1 - Group not changed + value>=0 - Number of the component where the group changed +*/ + +int +test_if_group_changed(List &list) +{ + DBUG_ENTER("test_if_group_changed"); + List_iterator li(list); + int idx= -1,i; + Cached_item *buff; + + for (i=(int) list.elements-1 ; (buff=li++) ; i--) + { + if (buff->cmp()) + idx=i; + } + DBUG_PRINT("info", ("idx: %d", idx)); + DBUG_RETURN(idx); +} + + +/** + Setup copy_fields to save fields at start of new group. + + Setup copy_fields to save fields at start of new group + + Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups. + Change old item_field to use a new field with points at saved fieldvalue + This function is only called before use of send_result_set_metadata. + + @param thd THD pointer + @param param temporary table parameters + @param ref_pointer_array array of pointers to top elements of filed list + @param res_selected_fields new list of items of select item list + @param res_all_fields new list of all items + @param elements number of elements in select item list + @param all_fields all fields list + + @todo + In most cases this result will be sent to the user. + This should be changed to use copy_int or copy_real depending + on how the value is to be used: In some cases this may be an + argument in a group function, like: IF(ISNULL(col),0,COUNT(*)) + + @retval + 0 ok + @retval + !=0 error +*/ + +bool +setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, + Ref_ptr_array ref_pointer_array, + List &res_selected_fields, List &res_all_fields, + uint elements, List &all_fields) +{ + Item *pos; + List_iterator_fast li(all_fields); + Copy_field *copy= NULL; + Copy_field *copy_start __attribute__((unused)); + res_selected_fields.empty(); + res_all_fields.empty(); + List_iterator_fast itr(res_all_fields); + List extra_funcs; + uint i, border= all_fields.elements - elements; + DBUG_ENTER("setup_copy_fields"); + + if (param->field_count && + !(copy=param->copy_field= new (thd->mem_root) Copy_field[param->field_count])) + goto err2; + + param->copy_funcs.empty(); + copy_start= copy; + for (i= 0; (pos= li++); i++) + { + Field *field; + uchar *tmp; + Item *real_pos= pos->real_item(); + /* + Aggregate functions can be substituted for fields (by e.g. temp tables). + We need to filter those substituted fields out. + */ + if (real_pos->type() == Item::FIELD_ITEM && + !(real_pos != pos && + ((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF)) + { + Item_field *item; + if (!(item= new (thd->mem_root) Item_field(thd, ((Item_field*) real_pos)))) + goto err; + if (pos->type() == Item::REF_ITEM) + { + /* preserve the names of the ref when dereferncing */ + Item_ref *ref= (Item_ref *) pos; + item->db_name= ref->db_name; + item->table_name= ref->table_name; + item->name= ref->name; + } + pos= item; + if (item->field->flags & BLOB_FLAG) + { + if (!(pos= new (thd->mem_root) Item_copy_string(thd, pos))) + goto err; + /* + Item_copy_string::copy for function can call + Item_copy_string::val_int for blob via Item_ref. + But if Item_copy_string::copy for blob isn't called before, + it's value will be wrong + so let's insert Item_copy_string for blobs in the beginning of + copy_funcs + (to see full test case look at having.test, BUG #4358) + */ + if (param->copy_funcs.push_front(pos, thd->mem_root)) + goto err; + } + else + { + /* + set up save buffer and change result_field to point at + saved value + */ + field= item->field; + item->result_field=field->make_new_field(thd->mem_root, + field->table, 1); + /* + We need to allocate one extra byte for null handling and + another extra byte to not get warnings from purify in + Field_string::val_int + */ + if (!(tmp= (uchar*) thd->alloc(field->pack_length()+2))) + goto err; + if (copy) + { + DBUG_ASSERT (param->field_count > (uint) (copy - copy_start)); + copy->set(tmp, item->result_field); + item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1); +#ifdef HAVE_valgrind + copy->to_ptr[copy->from_length]= 0; +#endif + copy++; + } + } + } + else if ((real_pos->type() == Item::FUNC_ITEM || + real_pos->real_type() == Item::SUBSELECT_ITEM || + real_pos->type() == Item::CACHE_ITEM || + real_pos->type() == Item::COND_ITEM) && + !real_pos->with_sum_func()) + { // Save for send fields + LEX_CSTRING real_name= pos->name; + pos= real_pos; + pos->name= real_name; + /* TODO: + In most cases this result will be sent to the user. + This should be changed to use copy_int or copy_real depending + on how the value is to be used: In some cases this may be an + argument in a group function, like: IF(ISNULL(col),0,COUNT(*)) + */ + if (!(pos= pos->type_handler()->create_item_copy(thd, pos))) + goto err; + if (i < border) // HAVING, ORDER and GROUP BY + { + if (extra_funcs.push_back(pos, thd->mem_root)) + goto err; + } + else if (param->copy_funcs.push_back(pos, thd->mem_root)) + goto err; + } + res_all_fields.push_back(pos, thd->mem_root); + ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= + pos; + } + param->copy_field_end= copy; + + for (i= 0; i < border; i++) + itr++; + itr.sublist(res_selected_fields, elements); + /* + Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any + reference used in these will resolve to a item that is already calculated + */ + param->copy_funcs.append(&extra_funcs); + + DBUG_RETURN(0); + + err: + if (copy) + delete [] param->copy_field; // This is never 0 + param->copy_field= 0; +err2: + DBUG_RETURN(TRUE); +} + + +/** + Make a copy of all simple SELECT'ed items. + + This is done at the start of a new group so that we can retrieve + these later when the group changes. +*/ + +void +copy_fields(TMP_TABLE_PARAM *param) +{ + Copy_field *ptr=param->copy_field; + Copy_field *end=param->copy_field_end; + + DBUG_ASSERT((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL)); + + for (; ptr != end; ptr++) + (*ptr->do_copy)(ptr); + + List_iterator_fast it(param->copy_funcs); + Item_copy *item; + while ((item= (Item_copy*) it++)) + item->copy(); +} + + +/** + Make an array of pointers to sum_functions to speed up + sum_func calculation. + + @retval + 0 ok + @retval + 1 Error +*/ + +bool JOIN::alloc_func_list() +{ + uint func_count, group_parts; + DBUG_ENTER("alloc_func_list"); + + func_count= tmp_table_param.sum_func_count; + /* + If we are using rollup, we need a copy of the summary functions for + each level + */ + if (rollup.state != ROLLUP::STATE_NONE) + func_count*= (send_group_parts+1); + + group_parts= send_group_parts; + /* + If distinct, reserve memory for possible + disctinct->group_by optimization + */ + if (select_distinct) + { + group_parts+= fields_list.elements; + /* + If the ORDER clause is specified then it's possible that + it also will be optimized, so reserve space for it too + */ + if (order) + { + ORDER *ord; + for (ord= order; ord; ord= ord->next) + group_parts++; + } + } + + /* This must use calloc() as rollup_make_fields depends on this */ + sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) + + sizeof(Item_sum***) * (group_parts+1)); + sum_funcs_end= (Item_sum***) (sum_funcs+func_count+1); + DBUG_RETURN(sum_funcs == 0); +} + + +/** + Initialize 'sum_funcs' array with all Item_sum objects. + + @param field_list All items + @param send_result_set_metadata Items in select list + @param before_group_by Set to 1 if this is called before GROUP BY handling + + @retval + 0 ok + @retval + 1 error +*/ + +bool JOIN::make_sum_func_list(List &field_list, + List &send_result_set_metadata, + bool before_group_by) +{ + List_iterator_fast it(field_list); + Item_sum **func; + Item *item; + DBUG_ENTER("make_sum_func_list"); + + func= sum_funcs; + while ((item=it++)) + { + if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() && + (!((Item_sum*) item)->depended_from() || + ((Item_sum *)item)->depended_from() == select_lex)) + *func++= (Item_sum*) item; + } + if (before_group_by && rollup.state == ROLLUP::STATE_INITED) + { + rollup.state= ROLLUP::STATE_READY; + if (rollup_make_fields(field_list, send_result_set_metadata, &func)) + DBUG_RETURN(TRUE); // Should never happen + } + else if (rollup.state == ROLLUP::STATE_NONE) + { + for (uint i=0 ; i <= send_group_parts ;i++) + sum_funcs_end[i]= func; + } + else if (rollup.state == ROLLUP::STATE_READY) + DBUG_RETURN(FALSE); // Don't put end marker + *func=0; // End marker + DBUG_RETURN(FALSE); +} + + +/** + Change all funcs and sum_funcs to fields in tmp table, and create + new list of all items. + + @param thd THD pointer + @param ref_pointer_array array of pointers to top elements of filed list + @param res_selected_fields new list of items of select item list + @param res_all_fields new list of all items + @param elements number of elements in select item list + @param all_fields all fields list + + @retval + 0 ok + @retval + !=0 error +*/ + +static bool +change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List &res_selected_fields, + List &res_all_fields, + uint elements, List &all_fields) +{ + List_iterator_fast it(all_fields); + Item *item_field,*item; + DBUG_ENTER("change_to_use_tmp_fields"); + + res_selected_fields.empty(); + res_all_fields.empty(); + + uint border= all_fields.elements - elements; + for (uint i= 0; (item= it++); i++) + { + Field *field; + enum Item::Type item_type= item->type(); + if ((item->with_sum_func() && item_type != Item::SUM_FUNC_ITEM) || + item->with_window_func()) + item_field= item; + else if (item_type == Item::FIELD_ITEM || + item_type == Item::DEFAULT_VALUE_ITEM) + { + if (!(item_field= item->get_tmp_table_item(thd))) + DBUG_RETURN(true); + } + else if (item_type == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC) + { + field= item->get_tmp_table_field(); + if (field != NULL) + { + /* + Replace "@:=" with "@:=". Otherwise, we would re-evaluate , and + if expression were a subquery, this would access + already-unlocked tables. + */ + Item_func_set_user_var* suv= + new (thd->mem_root) Item_func_set_user_var(thd, (Item_func_set_user_var*) item); + Item_field *new_field= new (thd->mem_root) Item_field(thd, field); + if (!suv || !new_field) + DBUG_RETURN(true); // Fatal error + new_field->set_refers_to_temp_table(); + List list; + list.push_back(new_field, thd->mem_root); + suv->set_arguments(thd, list); + item_field= suv; + } + else + item_field= item; + } + else if ((field= item->get_tmp_table_field())) + { + if (item->type() == Item::SUM_FUNC_ITEM && field->table->group) + { + item_field= ((Item_sum*) item)->result_item(thd, field); + } + else + { + item_field= (Item*) new (thd->mem_root) Item_field(thd, field); + if (item_field) + ((Item_field*) item_field)->set_refers_to_temp_table(); + } + if (!item_field) + DBUG_RETURN(true); // Fatal error + + if (item->real_item()->type() != Item::FIELD_ITEM) + field->orig_table= 0; + item_field->name= item->name; + if (item->type() == Item::REF_ITEM) + { + Item_field *ifield= (Item_field *) item_field; + Item_ref *iref= (Item_ref *) item; + ifield->table_name= iref->table_name; + ifield->db_name= iref->db_name; + } +#ifndef DBUG_OFF + if (!item_field->name.str) + { + char buff[256]; + String str(buff,sizeof(buff),&my_charset_bin); + str.length(0); + str.extra_allocation(1024); + item->print(&str, QT_ORDINARY); + item_field->name.str= thd->strmake(str.ptr(), str.length()); + item_field->name.length= str.length(); + } +#endif + } + else + item_field= item; + + res_all_fields.push_back(item_field, thd->mem_root); + ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= + item_field; + } + + List_iterator_fast itr(res_all_fields); + for (uint i= 0; i < border; i++) + itr++; + itr.sublist(res_selected_fields, elements); + DBUG_RETURN(false); +} + + +/** + Change all sum_func refs to fields to point at fields in tmp table. + Change all funcs to be fields in tmp table. + + @param thd THD pointer + @param ref_pointer_array array of pointers to top elements of field list + @param res_selected_fields new list of items of select item list + @param res_all_fields new list of all items + @param elements number of elements in select item list + @param all_fields all fields list + + @retval + 0 ok + @retval + 1 error +*/ + +static bool +change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List &res_selected_fields, + List &res_all_fields, uint elements, + List &all_fields) +{ + List_iterator_fast it(all_fields); + Item *item, *new_item; + res_selected_fields.empty(); + res_all_fields.empty(); + + uint i, border= all_fields.elements - elements; + for (i= 0; (item= it++); i++) + { + if (item->type() == Item::SUM_FUNC_ITEM && item->const_item()) + new_item= item; + else + { + if (!(new_item= item->get_tmp_table_item(thd))) + return 1; + } + + if (res_all_fields.push_back(new_item, thd->mem_root)) + return 1; + ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= + new_item; + } + + List_iterator_fast itr(res_all_fields); + for (i= 0; i < border; i++) + itr++; + itr.sublist(res_selected_fields, elements); + + return thd->is_error(); +} + + + +/****************************************************************************** + Code for calculating functions +******************************************************************************/ + + +/** + Call ::setup for all sum functions. + + @param thd thread handler + @param func_ptr sum function list + + @retval + FALSE ok + @retval + TRUE error +*/ + +static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr) +{ + Item_sum *func; + DBUG_ENTER("setup_sum_funcs"); + while ((func= *(func_ptr++))) + { + if (func->aggregator_setup(thd)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +static bool prepare_sum_aggregators(THD *thd,Item_sum **func_ptr, + bool need_distinct) +{ + Item_sum *func; + DBUG_ENTER("prepare_sum_aggregators"); + while ((func= *(func_ptr++))) + { + if (func->set_aggregator(thd, + need_distinct && func->has_with_distinct() ? + Aggregator::DISTINCT_AGGREGATOR : + Aggregator::SIMPLE_AGGREGATOR)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +static void +init_tmptable_sum_functions(Item_sum **func_ptr) +{ + Item_sum *func; + while ((func= *(func_ptr++))) + func->reset_field(); +} + + +/** Update record 0 in tmp_table from record 1. */ + +static void +update_tmptable_sum_func(Item_sum **func_ptr, + TABLE *tmp_table __attribute__((unused))) +{ + Item_sum *func; + while ((func= *(func_ptr++))) + func->update_field(); +} + + +/** Copy result of sum functions to record in tmp_table. */ + +static void +copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr) +{ + for (; func_ptr != end_ptr ; func_ptr++) + (void) (*func_ptr)->save_in_result_field(1); + return; +} + + +static bool +init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr) +{ + for (; func_ptr != end_ptr ;func_ptr++) + { + if ((*func_ptr)->reset_and_add()) + return 1; + } + /* If rollup, calculate the upper sum levels */ + for ( ; *func_ptr ; func_ptr++) + { + if ((*func_ptr)->aggregator_add()) + return 1; + } + return 0; +} + + +static bool +update_sum_func(Item_sum **func_ptr) +{ + Item_sum *func; + for (; (func= (Item_sum*) *func_ptr) ; func_ptr++) + if (func->aggregator_add()) + return 1; + return 0; +} + +/** + Copy result of functions to record in tmp_table. + + Uses the thread pointer to check for errors in + some of the val_xxx() methods called by the + save_in_result_field() function. + TODO: make the Item::val_xxx() return error code + + @param func_ptr array of the function Items to copy to the tmp table + @param thd pointer to the current thread for error checking + @retval + FALSE if OK + @retval + TRUE on error +*/ + +bool +copy_funcs(Item **func_ptr, const THD *thd) +{ + Item *func; + for (; (func = *func_ptr) ; func_ptr++) + { + if (func->type() == Item::FUNC_ITEM && + ((Item_func *) func)->with_window_func()) + continue; + func->save_in_result_field(1); + /* + Need to check the THD error state because Item::val_xxx() don't + return error code, but can generate errors + TODO: change it for a real status check when Item::val_xxx() + are extended to return status code. + */ + if (unlikely(thd->is_error())) + return TRUE; + } + return FALSE; +} + + +/** + Create a condition for a const reference and add this to the + currenct select for the table. +*/ + +static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab) +{ + DBUG_ENTER("add_ref_to_table_cond"); + if (!join_tab->ref.key_parts) + DBUG_RETURN(FALSE); + + Item_cond_and *cond= new (thd->mem_root) Item_cond_and(thd); + TABLE *table=join_tab->table; + int error= 0; + if (!cond) + DBUG_RETURN(TRUE); + + for (uint i=0 ; i < join_tab->ref.key_parts ; i++) + { + Field *field=table->field[table->key_info[join_tab->ref.key].key_part[i]. + fieldnr-1]; + Item *value=join_tab->ref.items[i]; + cond->add(new (thd->mem_root) + Item_func_equal(thd, new (thd->mem_root) Item_field(thd, field), + value), + thd->mem_root); + } + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + if (!cond->fixed()) + { + Item *tmp_item= (Item*) cond; + cond->fix_fields(thd, &tmp_item); + DBUG_ASSERT(cond == tmp_item); + } + if (join_tab->select) + { + Item *UNINIT_VAR(cond_copy); + if (join_tab->select->pre_idx_push_select_cond) + cond_copy= cond->copy_andor_structure(thd); + if (join_tab->select->cond) + error=(int) cond->add(join_tab->select->cond, thd->mem_root); + join_tab->select->cond= cond; + if (join_tab->select->pre_idx_push_select_cond) + { + Item *new_cond= and_conds(thd, cond_copy, + join_tab->select->pre_idx_push_select_cond); + if (new_cond->fix_fields_if_needed(thd, &new_cond)) + error= 1; + join_tab->pre_idx_push_select_cond= + join_tab->select->pre_idx_push_select_cond= new_cond; + } + join_tab->set_select_cond(cond, __LINE__); + } + else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, + (SORT_INFO*) 0, 0, &error))) + join_tab->set_select_cond(cond, __LINE__); + + DBUG_RETURN(error ? TRUE : FALSE); +} + + +/** + Free joins of subselect of this select. + + @param thd THD pointer + @param select pointer to st_select_lex which subselects joins we will free +*/ + +void free_underlaid_joins(THD *thd, SELECT_LEX *select) +{ + for (SELECT_LEX_UNIT *unit= select->first_inner_unit(); + unit; + unit= unit->next_unit()) + unit->cleanup(); +} + +/**************************************************************************** + ROLLUP handling +****************************************************************************/ + +/** + Replace occurrences of group by fields in an expression by ref items. + + The function replaces occurrences of group by fields in expr + by ref objects for these fields unless they are under aggregate + functions. + The function also corrects value of the the maybe_null attribute + for the items of all subexpressions containing group by fields. + + @b EXAMPLES + @code + SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP + SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP + @endcode + + @b IMPLEMENTATION + + The function recursively traverses the tree of the expr expression, + looks for occurrences of the group by fields that are not under + aggregate functions and replaces them for the corresponding ref items. + + @note + This substitution is needed GROUP BY queries with ROLLUP if + SELECT list contains expressions over group by attributes. + + @param thd reference to the context + @param expr expression to make replacement + @param group_list list of references to group by items + @param changed out: returns 1 if item contains a replaced field item + + @todo + - TODO: Some functions are not null-preserving. For those functions + updating of the maybe_null attribute is an overkill. + + @retval + 0 if ok + @retval + 1 on error +*/ + +static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list, + bool *changed) +{ + if (expr->argument_count()) + { + Name_resolution_context *context= &thd->lex->current_select->context; + Item **arg,**arg_end; + bool arg_changed= FALSE; + for (arg= expr->arguments(), + arg_end= expr->arguments() + expr->argument_count(); + arg != arg_end; arg++) + { + Item *item= *arg; + if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM) + { + ORDER *group_tmp; + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) + { + if (item->eq(*group_tmp->item,0)) + { + Item *new_item; + if (!(new_item= new (thd->mem_root) Item_ref(thd, context, + group_tmp->item, + null_clex_str, + item->name))) + return 1; // fatal_error is set + thd->change_item_tree(arg, new_item); + arg_changed= TRUE; + } + } + } + else if (item->type() == Item::FUNC_ITEM) + { + if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed)) + return 1; + } + } + if (arg_changed) + { + expr->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP; + *changed= TRUE; + } + } + return 0; +} + + +/** Allocate memory needed for other rollup functions. */ + +bool JOIN::rollup_init() +{ + uint i,j; + Item **ref_array; + + tmp_table_param.quick_group= 0; // Can't create groups in tmp table + /* + Each group can potentially be replaced with Item_func_rollup_const() which + needs a copy_func placeholder. + */ + tmp_table_param.func_count+= send_group_parts; + rollup.state= ROLLUP::STATE_INITED; + + /* + Create pointers to the different sum function groups + These are updated by rollup_make_fields() + */ + tmp_table_param.group_parts= send_group_parts; + + Item_null_result **null_items= + static_cast(thd->alloc(sizeof(Item*)*send_group_parts)); + + rollup.null_items= Item_null_array(null_items, send_group_parts); + rollup.ref_pointer_arrays= + static_cast + (thd->alloc((sizeof(Ref_ptr_array) + + all_fields.elements * sizeof(Item*)) * send_group_parts)); + rollup.fields= + static_cast*>(thd->alloc(sizeof(List) * send_group_parts)); + + if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields) + return true; + + ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts); + + /* + Prepare space for field list for the different levels + These will be filled up in rollup_make_fields() + */ + for (i= 0 ; i < send_group_parts ; i++) + { + if (!(rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd))) + return true; + + List *rollup_fields= &rollup.fields[i]; + rollup_fields->empty(); + rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements); + ref_array+= all_fields.elements; + } + for (i= 0 ; i < send_group_parts; i++) + { + for (j=0 ; j < fields_list.elements ; j++) + rollup.fields[i].push_back(rollup.null_items[i], thd->mem_root); + } + List_iterator it(all_fields); + Item *item; + while ((item= it++)) + { + ORDER *group_tmp; + bool found_in_group= 0; + + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) + { + if (*group_tmp->item == item) + { + item->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP; + found_in_group= 1; + break; + } + } + if (item->type() == Item::FUNC_ITEM && !found_in_group) + { + bool changed= FALSE; + if (change_group_ref(thd, (Item_func *) item, group_list, &changed)) + return 1; + /* + We have to prevent creation of a field in a temporary table for + an expression that contains GROUP BY attributes. + Marking the expression item as 'with_sum_func' will ensure this. + */ + if (changed) + item->with_flags|= item_with_t::SUM_FUNC; + } + } + return 0; +} + +/** + Wrap all constant Items in GROUP BY list. + + For ROLLUP queries each constant item referenced in GROUP BY list + is wrapped up into an Item_func object yielding the same value + as the constant item. The objects of the wrapper class are never + considered as constant items and besides they inherit all + properties of the Item_result_field class. + This wrapping allows us to ensure writing constant items + into temporary tables whenever the result of the ROLLUP + operation has to be written into a temporary table, e.g. when + ROLLUP is used together with DISTINCT in the SELECT list. + Usually when creating temporary tables for a intermidiate + result we do not include fields for constant expressions. + + @retval + 0 if ok + @retval + 1 on error +*/ + +bool JOIN::rollup_process_const_fields() +{ + ORDER *group_tmp; + Item *item; + List_iterator it(all_fields); + + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) + { + if (!(*group_tmp->item)->const_item()) + continue; + while ((item= it++)) + { + if (*group_tmp->item == item) + { + Item* new_item= new (thd->mem_root) Item_func_rollup_const(thd, item); + if (!new_item) + return 1; + new_item->fix_fields(thd, (Item **) 0); + thd->change_item_tree(it.ref(), new_item); + for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next) + { + if (*tmp->item == item) + thd->change_item_tree(tmp->item, new_item); + } + break; + } + } + it.rewind(); + } + return 0; +} + + +/** + Fill up rollup structures with pointers to fields to use. + + Creates copies of item_sum items for each sum level. + + @param fields_arg List of all fields (hidden and real ones) + @param sel_fields Pointer to selected fields + @param func Store here a pointer to all fields + + @retval + 0 if ok; + In this case func is pointing to next not used element. + @retval + 1 on error +*/ + +bool JOIN::rollup_make_fields(List &fields_arg, List &sel_fields, + Item_sum ***func) +{ + List_iterator_fast it(fields_arg); + Item *first_field= sel_fields.head(); + uint level; + + /* + Create field lists for the different levels + + The idea here is to have a separate field list for each rollup level to + avoid all runtime checks of which columns should be NULL. + + The list is stored in reverse order to get sum function in such an order + in func that it makes it easy to reset them with init_sum_functions() + + Assuming: SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP + + rollup.fields[0] will contain list where a,b,c is NULL + rollup.fields[1] will contain list where b,c is NULL + ... + rollup.ref_pointer_array[#] points to fields for rollup.fields[#] + ... + sum_funcs_end[0] points to all sum functions + sum_funcs_end[1] points to all sum functions, except grand totals + ... + */ + + for (level=0 ; level < send_group_parts ; level++) + { + uint i; + uint pos= send_group_parts - level -1; + bool real_fields= 0; + Item *item; + List_iterator new_it(rollup.fields[pos]); + Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos]; + ORDER *start_group; + + /* Point to first hidden field */ + uint ref_array_ix= fields_arg.elements-1; + + /* Remember where the sum functions ends for the previous level */ + sum_funcs_end[pos+1]= *func; + + /* Find the start of the group for this level */ + for (i= 0, start_group= group_list ; + i++ < pos ; + start_group= start_group->next) + ; + + it.rewind(); + while ((item= it++)) + { + if (item == first_field) + { + real_fields= 1; // End of hidden fields + ref_array_ix= 0; + } + + if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() && + (!((Item_sum*) item)->depended_from() || + ((Item_sum *)item)->depended_from() == select_lex)) + + { + /* + This is a top level summary function that must be replaced with + a sum function that is reset for this level. + + NOTE: This code creates an object which is not that nice in a + sub select. Fortunately it's not common to have rollup in + sub selects. + */ + item= item->copy_or_same(thd); + ((Item_sum*) item)->make_unique(); + *(*func)= (Item_sum*) item; + (*func)++; + } + else + { + /* Check if this is something that is part of this group by */ + ORDER *group_tmp; + for (group_tmp= start_group, i= pos ; + group_tmp ; group_tmp= group_tmp->next, i++) + { + if (*group_tmp->item == item) + { + /* + This is an element that is used by the GROUP BY and should be + set to NULL in this level + */ + Item_null_result *null_item= new (thd->mem_root) Item_null_result(thd); + if (!null_item) + return 1; + // Value will be null sometimes + item->set_maybe_null(); + null_item->result_field= item->get_tmp_table_field(); + item= null_item; + break; + } + } + } + ref_array_start[ref_array_ix]= item; + if (real_fields) + { + (void) new_it++; // Point to next item + new_it.replace(item); // Replace previous + ref_array_ix++; + } + else + ref_array_ix--; + } + } + sum_funcs_end[0]= *func; // Point to last function + return 0; +} + +/** + Send all rollup levels higher than the current one to the client. + + @b SAMPLE + @code + SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP + @endcode + + @param idx Level we are on: + - 0 = Total sum level + - 1 = First group changed (a) + - 2 = Second group changed (a,b) + + @retval + 0 ok + @retval + 1 If send_data_failed() +*/ + +int JOIN::rollup_send_data(uint idx) +{ + uint i; + for (i= send_group_parts ; i-- > idx ; ) + { + int res= 0; + /* Get reference pointers to sum functions in place */ + copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); + if ((!having || having->val_int())) + { + if (send_records < unit->lim.get_select_limit() && do_send_rows && + (res= result->send_data_with_check(rollup.fields[i], + unit, send_records)) > 0) + return 1; + if (!res) + send_records++; + } + } + /* Restore ref_pointer_array */ + set_items_ref_array(current_ref_ptrs); + return 0; +} + +/** + Write all rollup levels higher than the current one to a temp table. + + @b SAMPLE + @code + SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP + @endcode + + @param idx Level we are on: + - 0 = Total sum level + - 1 = First group changed (a) + - 2 = Second group changed (a,b) + @param table reference to temp table + + @retval + 0 ok + @retval + 1 if write_data_failed() +*/ + +int JOIN::rollup_write_data(uint idx, TMP_TABLE_PARAM *tmp_table_param_arg, + TABLE *table_arg) +{ + uint i; + for (i= send_group_parts ; i-- > idx ; ) + { + /* Get reference pointers to sum functions in place */ + copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); + if ((!having || having->val_int())) + { + int write_error; + Item *item; + List_iterator_fast it(rollup.fields[i]); + while ((item= it++)) + { + if (item->type() == Item::NULL_ITEM && item->is_result_field()) + item->save_in_result_field(1); + } + copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]); + if (unlikely((write_error= + table_arg->file->ha_write_tmp_row(table_arg->record[0])))) + { + if (create_internal_tmp_table_from_heap(thd, table_arg, + tmp_table_param_arg->start_recinfo, + &tmp_table_param_arg->recinfo, + write_error, 0, NULL)) + return 1; + } + } + } + /* Restore ref_pointer_array */ + set_items_ref_array(current_ref_ptrs); + return 0; +} + +/** + clear results if there are not rows found for group + (end_send_group/end_write_group) +*/ + +void inline JOIN::clear_sum_funcs() +{ + if (sum_funcs) + { + Item_sum *func, **func_ptr= sum_funcs; + while ((func= *(func_ptr++))) + func->clear(); + } +} + + +/* + Prepare for returning 'empty row' when there is no matching row. + + - Mark all tables with mark_as_null_row() + - Make a copy of of all simple SELECT items + - Reset all sum functions to NULL or 0. +*/ + +void JOIN::clear(table_map *cleared_tables) +{ + clear_tables(this, cleared_tables); + copy_fields(&tmp_table_param); + clear_sum_funcs(); +} + + +/** + Print an EXPLAIN line with all NULLs and given message in the 'Extra' column + + @retval + 0 ok + 1 OOM error or error from send_data() +*/ + +int print_explain_message_line(select_result_sink *result, + uint8 options, bool is_analyze, + uint select_number, + const char *select_type, + ha_rows *rows, + const char *message) +{ + /* Note: for SHOW EXPLAIN, this is caller thread's THD */ + THD *thd= result->thd; + MEM_ROOT *mem_root= thd->mem_root; + Item *item_null= new (mem_root) Item_null(thd); + List item_list; + + item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number), + mem_root); + item_list.push_back(new (mem_root) Item_string_sys(thd, select_type), + mem_root); + /* `table` */ + item_list.push_back(item_null, mem_root); + + /* `partitions` */ + if (options & DESCRIBE_PARTITIONS) + item_list.push_back(item_null, mem_root); + + /* type, possible_keys, key, key_len, ref */ + for (uint i=0 ; i < 5; i++) + item_list.push_back(item_null, mem_root); + + /* `rows` */ + StringBuffer<64> rows_str; + if (rows) + { + rows_str.append_ulonglong((ulonglong)(*rows)); + item_list.push_back(new (mem_root) + Item_string_sys(thd, rows_str.ptr(), + rows_str.length()), mem_root); + } + else + item_list.push_back(item_null, mem_root); + + /* `r_rows` */ + if (is_analyze) + item_list.push_back(item_null, mem_root); + + /* `filtered` */ + if (is_analyze || options & DESCRIBE_EXTENDED) + item_list.push_back(item_null, mem_root); + + /* `r_filtered` */ + if (is_analyze) + item_list.push_back(item_null, mem_root); + + /* `Extra` */ + if (message) + item_list.push_back(new (mem_root) Item_string_sys(thd, message), + mem_root); + else + item_list.push_back(item_null, mem_root); + + if (unlikely(thd->is_error()) || unlikely(result->send_data(item_list))) + return 1; + return 0; +} + + +/* + Append MRR information from quick select to the given string +*/ + +void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res) +{ + char mrr_str_buf[128]; + mrr_str_buf[0]=0; + int len; + handler *h= quick->head->file; + len= h->multi_range_read_explain_info(quick->mrr_flags, mrr_str_buf, + sizeof(mrr_str_buf)); + if (len > 0) + { + //res->append(STRING_WITH_LEN("; ")); + res->append(mrr_str_buf, len); + } +} + + +/////////////////////////////////////////////////////////////////////////////// +int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, + key_map possible_keys) +{ + uint j; + for (j=0 ; j < table->s->keys ; j++) + { + if (possible_keys.is_set(j)) + if (!(list.append_str(alloc, table->key_info[j].name.str))) + return 1; + } + return 0; +} + + +bool JOIN_TAB::save_explain_data(Explain_table_access *eta, + table_map prefix_tables, + bool distinct_arg, JOIN_TAB *first_top_tab) +{ + int quick_type; + CHARSET_INFO *cs= system_charset_info; + THD *thd= join->thd; + TABLE_LIST *table_list= table->pos_in_table_list; + QUICK_SELECT_I *cur_quick= NULL; + my_bool key_read; + char table_name_buffer[SAFE_NAME_LEN]; + KEY *key_info= 0; + uint key_len= 0; + quick_type= -1; + + explain_plan= eta; + eta->key.clear(); + eta->quick_info= NULL; + + /* + We assume that if this table does pre-sorting, then it doesn't do filtering + with SQL_SELECT. + */ + DBUG_ASSERT(!(select && filesort)); + const SQL_SELECT *tab_select= get_sql_select(); + + if (filesort) + { + if (!(eta->pre_join_sort= + new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, + thd->lex->analyze_stmt, + filesort))) + return 1; + } + // psergey-todo: data for filtering! + tracker= &eta->tracker; + jbuf_tracker= &eta->jbuf_tracker; + jbuf_loops_tracker= &eta->jbuf_loops_tracker; + jbuf_unpack_tracker= &eta->jbuf_unpack_tracker; + + /* Enable the table access time tracker only for "ANALYZE stmt" */ + if (unlikely(thd->lex->analyze_stmt || + thd->variables.log_slow_verbosity & + LOG_SLOW_VERBOSITY_ENGINE)) + { + table->file->set_time_tracker(&eta->op_tracker); + + /* + Set handler_for_stats even if we are not running an ANALYZE command. + There's no harm, and in case somebody runs a SHOW ANALYZE command we'll + be able to print the engine statistics. + */ + if (table->file->handler_stats && + table->s->tmp_table != INTERNAL_TMP_TABLE) + eta->handler_for_stats= table->file; + + if (likely(thd->lex->analyze_stmt)) + { + eta->op_tracker.set_gap_tracker(&eta->extra_time_tracker); + eta->jbuf_unpack_tracker.set_gap_tracker(&eta->jbuf_extra_time_tracker); + } + } + /* No need to save id and select_type here, they are kept in Explain_select */ + + /* table */ + if (table->derived_select_number) + { + /* Derived table name generation */ + size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1, + "", + table->derived_select_number); + eta->table_name.copy(table_name_buffer, len, cs); + } + else if (bush_children) + { + JOIN_TAB *ctab= bush_children->start; + /* table */ + size_t len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "", + ctab->emb_sj_nest->sj_subq_pred->get_identifier()); + eta->table_name.copy(table_name_buffer, len, cs); + } + else + { + TABLE_LIST *real_table= table->pos_in_table_list; + /* + When multi-table UPDATE/DELETE does updates/deletes to a VIEW, the view + is merged in a certain particular way (grep for DT_MERGE_FOR_INSERT). + + As a result, view's underlying tables have $tbl->pos_in_table_list={view}. + We don't want to print view name in EXPLAIN, we want underlying table's + alias (like specified in the view definition). + */ + if (real_table->merged_for_insert) + { + TABLE_LIST *view_child= + real_table->view->first_select_lex()->table_list.first; + for (;view_child; view_child= view_child->next_local) + { + if (view_child->table == table) + { + real_table= view_child; + break; + } + } + } + eta->table_name.copy(real_table->alias.str, real_table->alias.length, cs); + } + + /* "partitions" column */ + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + if (!table->derived_select_number && + (part_info= table->part_info)) + { //TODO: all thd->mem_root here should be fixed + make_used_partitions_str(thd->mem_root, part_info, &eta->used_partitions, + eta->used_partitions_list); + eta->used_partitions_set= true; + } + else + eta->used_partitions_set= false; +#else + /* just produce empty column if partitioning is not compiled in */ + eta->used_partitions_set= false; +#endif + } + + /* "type" column */ + enum join_type tab_type= type; + if ((type == JT_ALL || type == JT_HASH) && + tab_select && tab_select->quick && use_quick != 2) + { + cur_quick= tab_select->quick; + quick_type= cur_quick->get_type(); + if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || + (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) + tab_type= type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE; + else + tab_type= type == JT_ALL ? JT_RANGE : JT_HASH_RANGE; + } + eta->type= tab_type; + + /* Build "possible_keys" value */ + // psergey-todo: why does this use thd MEM_ROOT??? Doesn't this + // break ANALYZE ? thd->mem_root will be freed, and after that we will + // attempt to print the query plan? + if (append_possible_keys(thd->mem_root, eta->possible_keys, table, keys)) + return 1; + // psergey-todo: ^ check for error return code + + /* Build "key", "key_len", and "ref" */ + + if (rowid_filter) + { + Range_rowid_filter *range_filter= (Range_rowid_filter *) rowid_filter; + QUICK_SELECT_I *quick= range_filter->get_select()->quick; + + Explain_rowid_filter *erf= new (thd->mem_root) Explain_rowid_filter; + erf->quick= quick->get_explain(thd->mem_root); + erf->selectivity= range_rowid_filter_info->selectivity; + erf->rows= quick->records; + if (!(erf->tracker= new Rowid_filter_tracker(thd->lex->analyze_stmt))) + return 1; + rowid_filter->set_tracker(erf->tracker); + eta->rowid_filter= erf; + } + + if (tab_type == JT_NEXT) + { + key_info= table->key_info+index; + key_len= key_info->key_length; + } + else if (ref.key_parts) + { + key_info= get_keyinfo_by_key_no(ref.key); + key_len= ref.key_length; + } + + /* + In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type + that still have quick selects. + */ + if (tab_select && tab_select->quick && tab_type != JT_CONST) + { + if (!(eta->quick_info= tab_select->quick->get_explain(thd->mem_root))) + return 1; + } + + if (key_info) /* 'index' or 'ref' access */ + { + eta->key.set(thd->mem_root, key_info, key_len); + + if (ref.key_parts && tab_type != JT_FT) + { + store_key **key_ref= ref.key_copy; + for (uint kp= 0; kp < ref.key_parts; kp++) + { + if ((key_part_map(1) << kp) & ref.const_ref_part_map) + { + if (!(eta->ref_list.append_str(thd->mem_root, "const"))) + return 1; + /* + create_ref_for_key() handles keypart=const equalities as follows: + - non-EXPLAIN execution will copy the "const" to lookup tuple + immediately and will not add an element to ref.key_copy + - EXPLAIN will put an element into ref.key_copy. Since we've + just printed "const" for it, we should skip it here + */ + if (thd->lex->describe) + key_ref++; + } + else + { + if (!(eta->ref_list.append_str(thd->mem_root, (*key_ref)->name()))) + return 1; + key_ref++; + } + } + } + } + + if (tab_type == JT_HASH_NEXT) /* full index scan + hash join */ + { + eta->hash_next_key.set(thd->mem_root, + & table->key_info[index], + table->key_info[index].key_length); + // psergey-todo: ^ is the above correct? are we necessarily joining on all + // columns? + } + + if (!key_info) + { + if (table_list && /* SJM bushes don't have table_list */ + table_list->schema_table && + table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) + { + IS_table_read_plan *is_table_read_plan= table_list->is_table_read_plan; + StringBuffer<64> key_name_buf; + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_db_lookup_value()) + { + /* The "key" has the name of the column referring to the database */ + int f_idx= table_list->schema_table->idx_field1; + LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name(); + key_name_buf.append(tmp, cs); + } + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_table_lookup_value()) + { + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_db_lookup_value()) + key_name_buf.append(','); + + int f_idx= table_list->schema_table->idx_field2; + LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name(); + key_name_buf.append(tmp, cs); + } + + if (key_name_buf.length()) + eta->key.set_pseudo_key(thd->mem_root, key_name_buf.c_ptr_safe()); + } + } + + /* "rows" */ + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table) + { + /* I_S tables have rows=extra=NULL */ + eta->rows_set= false; + eta->filtered_set= false; + } + else + { + ha_rows examined_rows= get_examined_rows(); + + eta->rows_set= true; + eta->rows= examined_rows; + + /* "filtered" */ + float f= 0.0; + if (examined_rows) + { + double pushdown_cond_selectivity= cond_selectivity; + if (pushdown_cond_selectivity == 1.0) + f= (float) (100.0 * records_read / examined_rows); + else + f= (float) (100.0 * pushdown_cond_selectivity); + } + set_if_smaller(f, 100.0); + eta->filtered_set= true; + eta->filtered= f; + } + + /* Build "Extra" field and save it */ + key_read= table->file->keyread_enabled(); + if ((tab_type == JT_NEXT || tab_type == JT_CONST) && + table->covering_keys.is_set(index)) + key_read=1; + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT && + !((QUICK_ROR_INTERSECT_SELECT*)cur_quick)->need_to_fetch_row) + key_read=1; + + if (table_list->table_function) + eta->push_extra(ET_TABLE_FUNCTION); + + if (info) + { + eta->push_extra(info); + } + else if (packed_info & TAB_INFO_HAVE_VALUE) + { + if (packed_info & TAB_INFO_USING_INDEX) + eta->push_extra(ET_USING_INDEX); + if (packed_info & TAB_INFO_USING_WHERE) + eta->push_extra(ET_USING_WHERE); + if (packed_info & TAB_INFO_FULL_SCAN_ON_NULL) + eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY); + } + else + { + uint keyno= MAX_KEY; + if (ref.key_parts) + keyno= ref.key; + else if (tab_select && cur_quick) + keyno = cur_quick->index; + + if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno && + table->file->pushed_idx_cond) + { + eta->push_extra(ET_USING_INDEX_CONDITION); + eta->pushed_index_cond= table->file->pushed_idx_cond; + } + else if (cache_idx_cond) + { + eta->push_extra(ET_USING_INDEX_CONDITION_BKA); + eta->pushed_index_cond= cache_idx_cond; + } + + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + { + eta->push_extra(ET_USING); + } + if (tab_select) + { + if (use_quick == 2) + { + eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD); + eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer; + if (eta->range_checked_fer) + eta->range_checked_fer-> + append_possible_keys_stat(thd->mem_root, table, keys); + } + else if (tab_select->cond || + (cache_select && cache_select->cond)) + { + const COND *pushed_cond= table->file->pushed_cond; + + if ((table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && + pushed_cond) + { + eta->push_extra(ET_USING_WHERE_WITH_PUSHED_CONDITION); + } + else + { + eta->where_cond= tab_select->cond; + eta->cache_cond= cache_select? cache_select->cond : NULL; + eta->push_extra(ET_USING_WHERE); + } + } + } + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table && + table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) + { + if (!table_list->table_open_method) + eta->push_extra(ET_SKIP_OPEN_TABLE); + else if (table_list->table_open_method == OPEN_FRM_ONLY) + eta->push_extra(ET_OPEN_FRM_ONLY); + else + eta->push_extra(ET_OPEN_FULL_TABLE); + /* psergey-note: the following has a bug.*/ + if (table_list->is_table_read_plan->trivial_show_command || + (table_list->is_table_read_plan->has_db_lookup_value() && + table_list->is_table_read_plan->has_table_lookup_value())) + eta->push_extra(ET_SCANNED_0_DATABASES); + else if (table_list->is_table_read_plan->has_db_lookup_value() || + table_list->is_table_read_plan->has_table_lookup_value()) + eta->push_extra(ET_SCANNED_1_DATABASE); + else + eta->push_extra(ET_SCANNED_ALL_DATABASES); + } + if (key_read) + { + if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + QUICK_GROUP_MIN_MAX_SELECT *qgs= + (QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick; + eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY); + eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning(); + } + else + eta->push_extra(ET_USING_INDEX); + } + if (table->reginfo.not_exists_optimize) + eta->push_extra(ET_NOT_EXISTS); + + if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) + { + explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick), + &eta->mrr_type); + if (eta->mrr_type.length() > 0) + eta->push_extra(ET_USING_MRR); + } + + if (shortcut_for_distinct) + eta->push_extra(ET_DISTINCT); + + if (loosescan_match_tab) + { + eta->push_extra(ET_LOOSESCAN); + } + + if (first_weedout_table) + { + eta->start_dups_weedout= true; + eta->push_extra(ET_START_TEMPORARY); + } + if (check_weed_out_table) + { + eta->push_extra(ET_END_TEMPORARY); + eta->end_dups_weedout= true; + } + + else if (do_firstmatch) + { + if (do_firstmatch == /*join->join_tab*/ first_top_tab - 1) + eta->push_extra(ET_FIRST_MATCH); + else + { + eta->push_extra(ET_FIRST_MATCH); + TABLE *prev_table=do_firstmatch->table; + if (prev_table->derived_select_number) + { + char namebuf[NAME_LEN]; + /* Derived table name generation */ + size_t len= my_snprintf(namebuf, sizeof(namebuf)-1, + "", + prev_table->derived_select_number); + eta->firstmatch_table_name.append(namebuf, len); + } + else + eta->firstmatch_table_name.append(&prev_table->pos_in_table_list->alias); + } + } + + for (uint part= 0; part < ref.key_parts; part++) + { + if (ref.cond_guards[part]) + { + eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY); + eta->full_scan_on_null_key= true; + break; + } + } + + if (cache) + { + eta->push_extra(ET_USING_JOIN_BUFFER); + if (cache->save_explain_data(&eta->bka_type)) + return 1; + } + } + + /* + In case this is a derived table, here we remember the number of + subselect that used to produce it. + */ + if (!(table_list && table_list->is_with_table_recursive_reference())) + eta->derived_select_number= table->derived_select_number; + + /* The same for non-merged semi-joins */ + eta->non_merged_sjm_number = get_non_merged_semijoin_select(); + + return 0; +} + + +/* + Walk through join->aggr_tables and save aggregation/grouping query plan into + an Explain_select object + + @retval + 0 ok + 1 error +*/ + +bool save_agg_explain_data(JOIN *join, Explain_select *xpl_sel) +{ + JOIN_TAB *join_tab=join->join_tab + join->exec_join_tab_cnt(); + Explain_aggr_node *prev_node; + Explain_aggr_node *node= xpl_sel->aggr_tree; + bool is_analyze= join->thd->lex->analyze_stmt; + THD *thd= join->thd; + + for (uint i= 0; i < join->aggr_tables; i++, join_tab++) + { + // Each aggregate means a temp.table + prev_node= node; + if (!(node= new (thd->mem_root) Explain_aggr_tmp_table)) + return 1; + node->child= prev_node; + + if (join_tab->window_funcs_step) + { + Explain_aggr_node *new_node= + join_tab->window_funcs_step->save_explain_plan(thd->mem_root, + is_analyze); + if (!new_node) + return 1; + + prev_node=node; + node= new_node; + node->child= prev_node; + } + + /* The below matches execution in join_init_read_record() */ + if (join_tab->distinct) + { + prev_node= node; + if (!(node= new (thd->mem_root) Explain_aggr_remove_dups)) + return 1; + node->child= prev_node; + } + + if (join_tab->filesort) + { + Explain_aggr_filesort *eaf = + new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, is_analyze, join_tab->filesort); + if (!eaf) + return 1; + prev_node= node; + node= eaf; + node->child= prev_node; + } + } + xpl_sel->aggr_tree= node; + return 0; +} + + +/** + Save Query Plan Footprint + + @note + Currently, this function may be called multiple times + + @retval + 0 ok + 1 error +*/ + +int JOIN::save_explain_data_intern(Explain_query *output, + bool need_tmp_table_arg, + bool need_order_arg, bool distinct_arg, + const char *message) +{ + JOIN *join= this; /* Legacy: this code used to be a non-member function */ + DBUG_ENTER("JOIN::save_explain_data_intern"); + DBUG_PRINT("info", ("Select %p (%u), type %s, message %s", + join->select_lex, join->select_lex->select_number, + join->select_lex->type, + message ? message : "NULL")); + DBUG_ASSERT(have_query_plan == QEP_AVAILABLE); + /* fake_select_lex is created/printed by Explain_union */ + DBUG_ASSERT(join->select_lex != join->unit->fake_select_lex); + + /* There should be no attempts to save query plans for merged selects */ + DBUG_ASSERT(!join->select_lex->master_unit()->derived || + join->select_lex->master_unit()->derived->is_materialized_derived() || + join->select_lex->master_unit()->derived->is_with_table()); + + /* Don't log this into the slow query log */ + + if (message) + { + if (!(explain= new (output->mem_root) + Explain_select(output->mem_root, + thd->lex->analyze_stmt))) + DBUG_RETURN(1); +#ifndef DBUG_OFF + explain->select_lex= select_lex; +#endif + join->select_lex->set_explain_type(true); + + explain->select_id= join->select_lex->select_number; + explain->select_type= join->select_lex->type; + explain->linkage= select_lex->get_linkage(); + explain->using_temporary= need_tmp; + explain->using_filesort= need_order_arg; + /* Setting explain->message means that all other members are invalid */ + explain->message= message; + + if (select_lex->master_unit()->derived) + explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + if (save_agg_explain_data(this, explain)) + DBUG_RETURN(1); + + output->add_node(explain); + } + else if (pushdown_query) + { + if (!(explain= new (output->mem_root) + Explain_select(output->mem_root, + thd->lex->analyze_stmt))) + DBUG_RETURN(1); + select_lex->set_explain_type(true); + + explain->select_id= select_lex->select_number; + explain->select_type= select_lex->type; + explain->linkage= select_lex->get_linkage(); + explain->using_temporary= need_tmp; + explain->using_filesort= need_order_arg; + explain->message= "Storage engine handles GROUP BY"; + + if (select_lex->master_unit()->derived) + explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + output->add_node(explain); + } + else + { + Explain_select *xpl_sel; + explain= xpl_sel= + new (output->mem_root) Explain_select(output->mem_root, + thd->lex->analyze_stmt); + if (!explain) + DBUG_RETURN(1); + + table_map used_tables=0; + + join->select_lex->set_explain_type(true); + xpl_sel->select_id= join->select_lex->select_number; + xpl_sel->select_type= join->select_lex->type; + xpl_sel->linkage= select_lex->get_linkage(); + xpl_sel->is_lateral= ((select_lex->get_linkage() == DERIVED_TABLE_TYPE) && + (select_lex->uncacheable & UNCACHEABLE_DEPENDENT)); + if (select_lex->master_unit()->derived) + xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + + if (save_agg_explain_data(this, xpl_sel)) + DBUG_RETURN(1); + + xpl_sel->exec_const_cond= exec_const_cond; + xpl_sel->outer_ref_cond= outer_ref_cond; + xpl_sel->pseudo_bits_cond= pseudo_bits_cond; + if (tmp_having) + xpl_sel->having= tmp_having; + else + xpl_sel->having= having; + xpl_sel->having_value= having_value; + + JOIN_TAB* const first_top_tab= join->first_breadth_first_tab(); + JOIN_TAB* prev_bush_root_tab= NULL; + + Explain_basic_join *cur_parent= xpl_sel; + + for (JOIN_TAB *tab= first_explain_order_tab(join); tab; + tab= next_explain_order_tab(join, tab)) + { + JOIN_TAB *saved_join_tab= NULL; + TABLE *cur_table= tab->table; + + /* Don't show eliminated tables */ + if (cur_table->map & join->eliminated_tables) + { + used_tables|= cur_table->map; + continue; + } + + + Explain_table_access *eta= (new (output->mem_root) + Explain_table_access(output->mem_root, + thd->lex->analyze_stmt)); + + if (!eta) + DBUG_RETURN(1); + if (tab->bush_root_tab != prev_bush_root_tab) + { + if (tab->bush_root_tab) + { + /* + We've entered an SJ-Materialization nest. Create an object for it. + */ + if (!(cur_parent= + new (output->mem_root) Explain_basic_join(output->mem_root))) + DBUG_RETURN(1); + + JOIN_TAB *first_child= tab->bush_root_tab->bush_children->start; + cur_parent->select_id= + first_child->emb_sj_nest->sj_subq_pred->get_identifier(); + } + else + { + /* + We've just left an SJ-Materialization nest. We are at the join tab + that 'embeds the nest' + */ + DBUG_ASSERT(tab->bush_children); + eta->sjm_nest= cur_parent; + cur_parent= xpl_sel; + } + } + prev_bush_root_tab= tab->bush_root_tab; + + cur_parent->add_table(eta, output); + if (tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab)) + DBUG_RETURN(1); + + if (saved_join_tab) + tab= saved_join_tab; + + // For next iteration + used_tables|= cur_table->map; + } + output->add_node(xpl_sel); + } + + /* + Don't try to add query plans for child selects if this select was pushed + down into a Smart Storage Engine: + - the entire statement was pushed down ("PUSHED SELECT"), or + - this derived table was pushed down ("PUSHED DERIVED") + */ + if (!select_lex->pushdown_select && select_lex->type != pushed_derived_text) + for (SELECT_LEX_UNIT *tmp_unit= join->select_lex->first_inner_unit(); + tmp_unit; + tmp_unit= tmp_unit->next_unit()) + if (tmp_unit->explainable()) + explain->add_child(tmp_unit->first_select()->select_number); + + if (select_lex->is_top_level_node()) + output->query_plan_ready(); + + DBUG_RETURN(0); +} + + +/* + This function serves as "shortcut point" for EXPLAIN queries. + + The EXPLAIN statement executes just like its SELECT counterpart would + execute, except that JOIN::exec() will call select_describe() instead of + actually executing the query. + + Inside select_describe(): + - Query plan is updated with latest QEP choices made at the start of + JOIN::exec(). + - the proces of "almost execution" is invoked for the children subqueries. + + Overall, select_describe() is a legacy of old EXPLAIN implementation and + should be removed. +*/ + +static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, + bool distinct,const char *message) +{ + THD *thd=join->thd; + select_result *result=join->result; + DBUG_ENTER("select_describe"); + + if (join->select_lex->pushdown_select) + { + /* + The whole statement was pushed down to a Smart Storage Engine. Do not + attempt to produce a query plan locally. + */ + DBUG_VOID_RETURN; + } + + /* Update the QPF with latest values of using_temporary, using_filesort */ + for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit(); + unit; + unit= unit->next_unit()) + { + /* + This fix_fields() call is to handle an edge case like this: + + SELECT ... UNION SELECT ... ORDER BY (SELECT ...) + + for such queries, we'll get here before having called + subquery_expr->fix_fields(), which will cause failure to + */ + if (unit->item && !unit->item->fixed()) + { + Item *ref= unit->item; + if (unit->item->fix_fields(thd, &ref)) + DBUG_VOID_RETURN; + DBUG_ASSERT(ref == unit->item); + } + + if (unit->explainable()) + { + if (mysql_explain_union(thd, unit, result)) + DBUG_VOID_RETURN; + } + } + DBUG_VOID_RETURN; +} + + +bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result) +{ + DBUG_ENTER("mysql_explain_union"); + bool res= 0; + SELECT_LEX *first= unit->first_select(); + bool is_pushed_union= unit->derived && unit->derived->pushdown_derived; + + for (SELECT_LEX *sl= first; sl; sl= sl->next_select()) + { + sl->set_explain_type(FALSE); + sl->options|= SELECT_DESCRIBE; + } + + if (unit->is_unit_op() || unit->fake_select_lex) + { + ulonglong save_options= 0; + + if (unit->union_needs_tmp_table() && unit->fake_select_lex) + { + save_options= unit->fake_select_lex->options; + unit->fake_select_lex->select_number= FAKE_SELECT_LEX_ID; // just for initialization + unit->fake_select_lex->type= unit_operation_text[unit->common_op()]; + unit->fake_select_lex->options|= SELECT_DESCRIBE; + } + if (!(res= unit->prepare(unit->derived, result, + SELECT_NO_UNLOCK | SELECT_DESCRIBE))) + { + if (!is_pushed_union) + res= unit->exec(); + } + + if (unit->union_needs_tmp_table() && unit->fake_select_lex) + unit->fake_select_lex->options= save_options; + } + else + { + thd->lex->current_select= first; + unit->set_limit(unit->global_parameters()); + res= mysql_select(thd, first->table_list.first, first->item_list, + first->where, + first->order_list.elements + first->group_list.elements, + first->order_list.first, first->group_list.first, + first->having, thd->lex->proc_list.first, + first->options | thd->variables.option_bits | SELECT_DESCRIBE, + result, unit, first); + } + + DBUG_RETURN(res || thd->is_error()); +} + + +static void print_table_array(THD *thd, + table_map eliminated_tables, + String *str, TABLE_LIST **table, + TABLE_LIST **end, + enum_query_type query_type) +{ + (*table)->print(thd, eliminated_tables, str, query_type); + + for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++) + { + TABLE_LIST *curr= *tbl; + + /* + The "eliminated_tables &&" check guards againist the case of + printing the query for CREATE VIEW. We do that without having run + JOIN::optimize() and so will have nested_join->used_tables==0. + */ + if (eliminated_tables && + ((curr->table && (curr->table->map & eliminated_tables)) || + (curr->nested_join && !(curr->nested_join->used_tables & + ~eliminated_tables)))) + { + /* as of 5.5, print_join doesnt put eliminated elements into array */ + DBUG_ASSERT(0); + continue; + } + + /* JOIN_TYPE_OUTER is just a marker unrelated to real join */ + if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT)) + { + /* MySQL converts right to left joins */ + str->append(STRING_WITH_LEN(" left join ")); + } + else if (curr->straight) + str->append(STRING_WITH_LEN(" straight_join ")); + else if (curr->sj_inner_tables) + str->append(STRING_WITH_LEN(" semi join ")); + else + str->append(STRING_WITH_LEN(" join ")); + + curr->print(thd, eliminated_tables, str, query_type); + if (curr->on_expr) + { + str->append(STRING_WITH_LEN(" on(")); + curr->on_expr->print(str, query_type); + str->append(')'); + } + } +} + + +/* + Check if the passed table is + - a base table which was eliminated, or + - a join nest which only contained eliminated tables (and so was eliminated, + too) +*/ + +bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl) +{ + return eliminated_tables && + ((tbl->table && (tbl->table->map & eliminated_tables)) || + (tbl->nested_join && !(tbl->nested_join->used_tables & + ~eliminated_tables))); +} + +/** + Print joins from the FROM clause. + + @param thd thread handler + @param str string where table should be printed + @param tables list of tables in join + @query_type type of the query is being generated +*/ + +static void print_join(THD *thd, + table_map eliminated_tables, + String *str, + List *tables, + enum_query_type query_type) +{ + /* List is reversed => we should reverse it before using */ + List_iterator_fast ti(*tables); + TABLE_LIST **table; + DBUG_ENTER("print_join"); + + /* + If the QT_NO_DATA_EXPANSION flag is specified, we print the + original table list, including constant tables that have been + optimized away, as the constant tables may be referenced in the + expression printed by Item_field::print() when this flag is given. + Otherwise, only non-const tables are printed. + + Example: + + Original SQL: + select * from (select 1) t + + Printed without QT_NO_DATA_EXPANSION: + select '1' AS `1` from dual + + Printed with QT_NO_DATA_EXPANSION: + select `t`.`1` from (select 1 AS `1`) `t` + */ + const bool print_const_tables= (query_type & QT_NO_DATA_EXPANSION); + size_t tables_to_print= 0; + + for (TABLE_LIST *t= ti++; t ; t= ti++) + { + /* See comment in print_table_array() about the second condition */ + if (print_const_tables || !t->optimized_away) + if (!is_eliminated_table(eliminated_tables, t)) + tables_to_print++; + } + if (tables_to_print == 0) + { + str->append(STRING_WITH_LEN("dual")); + DBUG_VOID_RETURN; // all tables were optimized away + } + ti.rewind(); + + if (!(table= static_cast(thd->alloc(sizeof(TABLE_LIST*) * + tables_to_print)))) + DBUG_VOID_RETURN; // out of memory + + TABLE_LIST *tmp, **t= table + (tables_to_print - 1); + while ((tmp= ti++)) + { + if (tmp->optimized_away && !print_const_tables) + continue; + if (is_eliminated_table(eliminated_tables, tmp)) + continue; + *t--= tmp; + } + + DBUG_ASSERT(tables->elements >= 1); + /* + Assert that the first table in the list isn't eliminated. This comes from + the fact that the first table can't be inner table of an outer join. + */ + DBUG_ASSERT(!eliminated_tables || + !(((*table)->table && ((*table)->table->map & eliminated_tables)) || + ((*table)->nested_join && !((*table)->nested_join->used_tables & + ~eliminated_tables)))); + /* + If the first table is a semi-join nest, swap it with something that is + not a semi-join nest. + */ + if ((*table)->sj_inner_tables) + { + TABLE_LIST **end= table + tables_to_print; + for (TABLE_LIST **t2= table; t2!=end; t2++) + { + if (!(*t2)->sj_inner_tables) + { + tmp= *t2; + *t2= *table; + *table= tmp; + break; + } + } + } + print_table_array(thd, eliminated_tables, str, table, + table + tables_to_print, query_type); + DBUG_VOID_RETURN; +} + +/** + @brief Print an index hint + + @details Prints out the USE|FORCE|IGNORE index hint. + + @param thd the current thread + @param[out] str appends the index hint here + @param hint what the hint is (as string : "USE INDEX"| + "FORCE INDEX"|"IGNORE INDEX") + @param hint_length the length of the string in 'hint' + @param indexes a list of index names for the hint +*/ + +void +Index_hint::print(THD *thd, String *str) +{ + switch (type) + { + case INDEX_HINT_IGNORE: str->append(STRING_WITH_LEN("IGNORE INDEX")); break; + case INDEX_HINT_USE: str->append(STRING_WITH_LEN("USE INDEX")); break; + case INDEX_HINT_FORCE: str->append(STRING_WITH_LEN("FORCE INDEX")); break; + } + str->append(STRING_WITH_LEN(" (")); + if (key_name.length) + { + if (thd && !system_charset_info->strnncoll( + (const uchar *)key_name.str, key_name.length, + (const uchar *)primary_key_name.str, + primary_key_name.length)) + str->append(primary_key_name); + else + append_identifier(thd, str, &key_name); +} + str->append(')'); +} + + +/** + Print table as it should be in join list. + + @param str string where table should be printed +*/ + +void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, + enum_query_type query_type) +{ + if (nested_join) + { + str->append('('); + print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); + str->append(')'); + } + else if (jtbm_subselect) + { + if (jtbm_subselect->engine->engine_type() == + subselect_engine::SINGLE_SELECT_ENGINE) + { + /* + We get here when conversion into materialization didn't finish (this + happens when + - The subquery is a degenerate case which produces 0 or 1 record + - subquery's optimization didn't finish because of @@max_join_size + limits + - ... maybe some other cases like this + */ + str->append(STRING_WITH_LEN(" (")); + jtbm_subselect->engine->print(str, query_type); + str->append(')'); + } + else + { + str->append(STRING_WITH_LEN(" (")); + subselect_hash_sj_engine *hash_engine; + hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine; + hash_engine->materialize_engine->print(str, query_type); + str->append(')'); + } + } + else + { + const char *cmp_name; // Name to compare with alias + if (view_name.str) + { + // A view + + if (!(belong_to_view && + belong_to_view->compact_view_format) && + !(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES)) + { + append_identifier(thd, str, &view_db); + str->append('.'); + } + append_identifier(thd, str, &view_name); + cmp_name= view_name.str; + } + else if (derived) + { + if (!is_with_table()) + { + // A derived table + str->append('('); + derived->print(str, query_type); + str->append(')'); + cmp_name= ""; // Force printing of alias + } + else + { + append_identifier(thd, str, &table_name); + cmp_name= table_name.str; + } + } + else if (table_function) + { + /* A table function. */ + (void) table_function->print(thd, this, str, query_type); + str->append(' '); + append_identifier(thd, str, &alias); + cmp_name= alias.str; + } + else + { + // A normal table + + if (!(belong_to_view && + belong_to_view->compact_view_format) && + !(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES)) + { + append_identifier(thd, str, &db); + str->append('.'); + } + if (schema_table) + { + append_identifier(thd, str, &schema_table_name); + cmp_name= schema_table_name.str; + } + else + { + append_identifier(thd, str, &table_name); + cmp_name= table_name.str; + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_names && partition_names->elements) + { + int i, num_parts= partition_names->elements; + List_iterator name_it(*(partition_names)); + str->append(STRING_WITH_LEN(" PARTITION (")); + for (i= 1; i <= num_parts; i++) + { + String *name= name_it++; + append_identifier(thd, str, name->ptr(), name->length()); + if (i != num_parts) + str->append(','); + } + str->append(')'); + } +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + } + if (table && table->versioned()) + vers_conditions.print(str, query_type); + + if (my_strcasecmp(table_alias_charset, cmp_name, alias.str)) + { + char t_alias_buff[MAX_ALIAS_NAME]; + LEX_CSTRING t_alias= alias; + + str->append(' '); + if (lower_case_table_names == 1) + { + if (alias.str && alias.str[0]) + { + strmov(t_alias_buff, alias.str); + t_alias.length= my_casedn_str(files_charset_info, t_alias_buff); + t_alias.str= t_alias_buff; + } + } + + append_identifier(thd, str, &t_alias); + } + + if (index_hints) + { + List_iterator it(*index_hints); + Index_hint *hint; + + while ((hint= it++)) + { + str->append(' '); + hint->print(thd, str); + } + } + } +} + +enum explainable_cmd_type +{ + SELECT_CMD, INSERT_CMD, REPLACE_CMD, UPDATE_CMD, DELETE_CMD, NO_CMD +}; + +static +const LEX_CSTRING explainable_cmd_name []= +{ + {STRING_WITH_LEN("select ")}, + {STRING_WITH_LEN("insert ")}, + {STRING_WITH_LEN("replace ")}, + {STRING_WITH_LEN("update ")}, + {STRING_WITH_LEN("delete ")}, +}; + +static +const LEX_CSTRING* get_explainable_cmd_name(enum explainable_cmd_type cmd) +{ + return explainable_cmd_name + cmd; +} + +static +enum explainable_cmd_type get_explainable_cmd_type(THD *thd) +{ + switch (thd->lex->sql_command) { + case SQLCOM_SELECT: + return SELECT_CMD; + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + return INSERT_CMD; + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + return REPLACE_CMD; + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + return UPDATE_CMD; + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + return DELETE_CMD; + default: + return SELECT_CMD; + } +} + + +void TABLE_LIST::print_leaf_tables(THD *thd, String *str, + enum_query_type query_type) +{ + if (merge_underlying_list) + { + for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local) + tbl->print_leaf_tables(thd, str, query_type); + } + else + print(thd, 0, str, query_type); +} + + +void st_select_lex::print_item_list(THD *thd, String *str, + enum_query_type query_type) +{ + bool first= 1; + /* + outer_select() can not be used here because it is for name resolution + and will return NULL at any end of name resolution chain (view/derived) + */ + bool top_level= is_query_topmost(thd); + List_iterator_fast it(item_list); + Item *item; + while ((item= it++)) + { + if (first) + first= 0; + else + str->append(','); + + if ((is_subquery_function() && !item->is_explicit_name()) || + !item->name.str) + { + /* + Do not print auto-generated aliases in subqueries. It has no purpose + in a view definition or other contexts where the query is printed. + */ + item->print(str, query_type); + } + else + { + /* + Do not print illegal names (if it is not top level SELECT). + Top level view checked (and correct name are assigned), + other cases of top level SELECT are not important, because + it is not "table field". + */ + if (top_level || + item->is_explicit_name() || + !check_column_name(item->name.str)) + item->print_item_w_name(str, query_type); + else + item->print(str, query_type); + } + } +} + + +void st_select_lex::print_set_clause(THD *thd, String *str, + enum_query_type query_type) +{ + bool first= 1; + /* + outer_select() can not be used here because it is for name resolution + and will return NULL at any end of name resolution chain (view/derived) + */ + List_iterator_fast it(item_list); + List_iterator_fast vt(thd->lex->value_list); + Item *item; + Item *val; + while ((item= it++, val= vt++ )) + { + if (first) + { + str->append(STRING_WITH_LEN(" set ")); + first= 0; + } + else + str->append(','); + + item->print(str, (enum_query_type) (query_type | QT_NO_DATA_EXPANSION)); + str->append(STRING_WITH_LEN(" = ")); + val->print(str, query_type); + } +} + + +void st_select_lex::print_on_duplicate_key_clause(THD *thd, String *str, + enum_query_type query_type) +{ + bool first= 1; + List_iterator_fast it(thd->lex->update_list); + List_iterator_fast vt(thd->lex->value_list); + Item *item; + Item *val; + while ((item= it++, val= vt++ )) + { + if (first) + { + str->append(STRING_WITH_LEN(" on duplicate key update ")); + first= 0; + } + else + str->append(','); + + item->print(str, query_type); + str->append(STRING_WITH_LEN(" = ")); + val->print(str, query_type); + } +} + +void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) +{ + DBUG_ASSERT(thd); + + if (tvc) + { + tvc->print(thd, str, query_type); + return; + } + + if (is_tvc_wrapper && (query_type & QT_NO_WRAPPERS_FOR_TVC_IN_VIEW)) + { + first_inner_unit()->first_select()->print(thd, str, query_type); + return; + } + + bool top_level= is_query_topmost(thd); + enum explainable_cmd_type sel_type= SELECT_CMD; + if (top_level) + sel_type= get_explainable_cmd_type(thd); + + if (sel_type == INSERT_CMD || sel_type == REPLACE_CMD) + { + str->append(get_explainable_cmd_name(sel_type)); + str->append(STRING_WITH_LEN("into ")); + TABLE_LIST *tbl= thd->lex->query_tables; + while (tbl->merge_underlying_list) + tbl= tbl->merge_underlying_list; + tbl->print(thd, 0, str, query_type); + if (thd->lex->field_list.elements) + { + str->append ('('); + List_iterator_fast it(thd->lex->field_list); + Item *item; + bool first= true; + while ((item= it++)) + { + if (first) + first= false; + else + str->append(','); + str->append(item->name); + } + str->append(')'); + } + + str->append(' '); + + if (thd->lex->sql_command == SQLCOM_INSERT || + thd->lex->sql_command == SQLCOM_REPLACE) + { + str->append(STRING_WITH_LEN("values ")); + bool is_first_elem= true; + List_iterator_fast li(thd->lex->many_values); + List_item *list; + + while ((list= li++)) + { + if (is_first_elem) + is_first_elem= false; + else + str->append(','); + + print_list_item(str, list, query_type); + } + if (thd->lex->update_list.elements) + print_on_duplicate_key_clause(thd, str, query_type); + return; + } + } + + if ((query_type & QT_SHOW_SELECT_NUMBER) && + thd->lex->all_selects_list && + thd->lex->all_selects_list->link_next && + select_number != FAKE_SELECT_LEX_ID) + { + str->append(STRING_WITH_LEN("/* select#")); + str->append_ulonglong(select_number); + if (thd->lex->describe & DESCRIBE_EXTENDED2) + { + str->append('/'); + str->append_ulonglong(nest_level); + + if (master_unit()->fake_select_lex && + master_unit()->first_select() == this) + { + str->append(STRING_WITH_LEN(" Filter Select: ")); + master_unit()->fake_select_lex->print(thd, str, query_type); + } + } + str->append(STRING_WITH_LEN(" */ ")); + } + + if (sel_type == SELECT_CMD || + sel_type == INSERT_CMD || + sel_type == REPLACE_CMD) + str->append(STRING_WITH_LEN("select ")); + + if (join && join->cleaned) + { + /* + JOIN already cleaned up so it is dangerous to print items + because temporary tables they pointed on could be freed. + */ + str->append('#'); + str->append(select_number); + return; + } + + /* First add options */ + if (options & SELECT_STRAIGHT_JOIN) + str->append(STRING_WITH_LEN("straight_join ")); + if (options & SELECT_HIGH_PRIORITY) + str->append(STRING_WITH_LEN("high_priority ")); + if (options & SELECT_DISTINCT) + str->append(STRING_WITH_LEN("distinct ")); + if (options & SELECT_SMALL_RESULT) + str->append(STRING_WITH_LEN("sql_small_result ")); + if (options & SELECT_BIG_RESULT) + str->append(STRING_WITH_LEN("sql_big_result ")); + if (options & OPTION_BUFFER_RESULT) + str->append(STRING_WITH_LEN("sql_buffer_result ")); + if (options & OPTION_FOUND_ROWS) + str->append(STRING_WITH_LEN("sql_calc_found_rows ")); + if (this == parent_lex->first_select_lex()) + { + switch (parent_lex->sql_cache) + { + case LEX::SQL_NO_CACHE: + str->append(STRING_WITH_LEN("sql_no_cache ")); + break; + case LEX::SQL_CACHE: + str->append(STRING_WITH_LEN("sql_cache ")); + break; + case LEX::SQL_CACHE_UNSPECIFIED: + break; + default: + DBUG_ASSERT(0); + } + } + + //Item List + if (sel_type == SELECT_CMD || + sel_type == INSERT_CMD || + sel_type == REPLACE_CMD) + print_item_list(thd, str, query_type); + /* + from clause + TODO: support USING/FORCE/IGNORE index + */ + if (table_list.elements) + { + if (sel_type == SELECT_CMD || + sel_type == INSERT_CMD || + sel_type == REPLACE_CMD) + { + str->append(STRING_WITH_LEN(" from ")); + /* go through join tree */ + print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, + query_type); + } + if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) + str->append(get_explainable_cmd_name(sel_type)); + if (sel_type == DELETE_CMD) + { + str->append(STRING_WITH_LEN(" from ")); + bool first= true; + for (TABLE_LIST *target_tbl= thd->lex->auxiliary_table_list.first; + target_tbl; + target_tbl= target_tbl->next_local) + { + if (first) + first= false; + else + str->append(','); + target_tbl->correspondent_table->print_leaf_tables(thd, str, + query_type); + } + + if (!first) + str->append(STRING_WITH_LEN(" using ")); + } + if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) + { + if (join) + print_join(thd, 0, str, &top_join_list, query_type); + else + { + bool first= true; + List_iterator_fast li(leaf_tables); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (first) + first= false; + else + str->append(','); + tbl->print(thd, 0, str, query_type); + } + } + } + } + else if (where) + { + /* + "SELECT 1 FROM DUAL WHERE 2" should not be printed as + "SELECT 1 WHERE 2": the 1st syntax is valid, but the 2nd is not. + */ + str->append(STRING_WITH_LEN(" from DUAL ")); + } + + if (sel_type == UPDATE_CMD) + print_set_clause(thd, str, query_type); + + // Where + Item *cur_where= where; + if (join) + cur_where= join->conds; + else if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) + cur_where= thd->lex->upd_del_where; + if (cur_where || cond_value != Item::COND_UNDEF) + { + str->append(STRING_WITH_LEN(" where ")); + if (cur_where) + cur_where->print(str, query_type); + else + str->append(cond_value != Item::COND_FALSE ? '1' : '0'); + } + + // group by & olap + if (group_list.elements) + { + str->append(STRING_WITH_LEN(" group by ")); + print_order(str, group_list.first, query_type); + switch (olap) + { + case CUBE_TYPE: + str->append(STRING_WITH_LEN(" with cube")); + break; + case ROLLUP_TYPE: + str->append(STRING_WITH_LEN(" with rollup")); + break; + default: + ; //satisfy compiler + } + } + + // having + Item *cur_having= having; + if (join) + cur_having= join->having; + + if (cur_having || having_value != Item::COND_UNDEF) + { + str->append(STRING_WITH_LEN(" having ")); + if (cur_having) + cur_having->print(str, query_type); + else + str->append(having_value != Item::COND_FALSE ? '1' : '0'); + } + + if (order_list.elements) + { + str->append(STRING_WITH_LEN(" order by ")); + print_order(str, order_list.first, query_type); + } + + // limit + print_limit(thd, str, query_type); + + // lock type + if (select_lock == select_lock_type::IN_SHARE_MODE) + str->append(STRING_WITH_LEN(" lock in share mode")); + else if (select_lock == select_lock_type::FOR_UPDATE) + str->append(STRING_WITH_LEN(" for update")); + if (unlikely(skip_locked)) + str->append(STRING_WITH_LEN(" skip locked")); + + if ((sel_type == INSERT_CMD || sel_type == REPLACE_CMD) && + thd->lex->update_list.elements) + print_on_duplicate_key_clause(thd, str, query_type); + + // returning clause + if (sel_type == DELETE_CMD && !item_list.elements) + { + print_item_list(thd, str, query_type); + } + // PROCEDURE unsupported here +} + + +/** + Change the select_result object of the JOIN. + + If old_result is not used, forward the call to the current + select_result in case it is a wrapper around old_result. + + Call prepare() and prepare2() on the new select_result if we decide + to use it. + + @param new_result New select_result object + @param old_result Old select_result object (NULL to force change) + + @retval false Success + @retval true Error +*/ + +bool JOIN::change_result(select_result *new_result, select_result *old_result) +{ + DBUG_ENTER("JOIN::change_result"); + if (old_result == NULL || result == old_result) + { + result= new_result; + if (result->prepare(fields_list, select_lex->master_unit()) || + result->prepare2(this)) + DBUG_RETURN(true); /* purecov: inspected */ + DBUG_RETURN(false); + } + DBUG_RETURN(result->change_result(new_result)); +} + + +/** + @brief + Set allowed types of join caches that can be used for join operations + + @details + The function sets a bitmap of allowed join buffers types in the field + allowed_join_cache_types of this JOIN structure: + bit 1 is set if tjoin buffers are allowed to be incremental + bit 2 is set if the join buffers are allowed to be hashed + but 3 is set if the join buffers are allowed to be used for BKA + join algorithms. + The allowed types are read from system variables. + Besides the function sets maximum allowed join cache level that is + also read from a system variable. +*/ + +void JOIN::set_allowed_join_cache_types() +{ + allowed_join_cache_types= 0; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL)) + allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED)) + allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA)) + allowed_join_cache_types|= JOIN_CACHE_BKA_BIT; + allowed_semijoin_with_cache= + optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE); + allowed_outer_join_with_cache= + optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE); + max_allowed_join_cache_level= thd->variables.join_cache_level; +} + + +/** + Save a query execution plan so that the caller can revert to it if needed, + and reset the current query plan so that it can be reoptimized. + + @param save_to The object into which the current query plan state is saved +*/ + +void JOIN::save_query_plan(Join_plan_state *save_to) +{ + DYNAMIC_ARRAY tmp_keyuse; + /* Swap the current and the backup keyuse internal arrays. */ + tmp_keyuse= keyuse; + keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */ + save_to->keyuse= tmp_keyuse; + + for (uint i= 0; i < table_count; i++) + { + save_to->join_tab_keyuse[i]= join_tab[i].keyuse; + join_tab[i].keyuse= NULL; + save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys; + join_tab[i].checked_keys.clear_all(); + } + memcpy((uchar*) save_to->best_positions, (uchar*) best_positions, + sizeof(POSITION) * (table_count + 1)); + memset((uchar*) best_positions, 0, sizeof(POSITION) * (table_count + 1)); + + /* Save SJM nests */ + List_iterator it(select_lex->sj_nests); + TABLE_LIST *tlist; + SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info; + while ((tlist= it++)) + { + *(p_info++)= tlist->sj_mat_info; + } +} + + +/** + Reset a query execution plan so that it can be reoptimized in-place. +*/ +void JOIN::reset_query_plan() +{ + for (uint i= 0; i < table_count; i++) + { + join_tab[i].keyuse= NULL; + join_tab[i].checked_keys.clear_all(); + } +} + + +/** + Restore a query execution plan previously saved by the caller. + + @param The object from which the current query plan state is restored. +*/ + +void JOIN::restore_query_plan(Join_plan_state *restore_from) +{ + DYNAMIC_ARRAY tmp_keyuse; + tmp_keyuse= keyuse; + keyuse= restore_from->keyuse; + restore_from->keyuse= tmp_keyuse; + + for (uint i= 0; i < table_count; i++) + { + join_tab[i].keyuse= restore_from->join_tab_keyuse[i]; + join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i]; + } + + memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions, + sizeof(POSITION) * (table_count + 1)); + /* Restore SJM nests */ + List_iterator it(select_lex->sj_nests); + TABLE_LIST *tlist; + SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info; + while ((tlist= it++)) + { + tlist->sj_mat_info= *(p_info++); + } +} + + +/** + Reoptimize a query plan taking into account an additional conjunct to the + WHERE clause. + + @param added_where An extra conjunct to the WHERE clause to reoptimize with + @param join_tables The set of tables to reoptimize + @param save_to If != NULL, save here the state of the current query plan, + otherwise reuse the existing query plan structures. + + @notes + Given a query plan that was already optimized taking into account some WHERE + clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. + The reoptimization works as follows: + + 1. Call update_ref_and_keys *only* for the new conditions 'added_where' + that are about to be injected into the query. + 2. Expand if necessary the original KEYUSE array JOIN::keyuse to + accommodate the new REF accesses computed for the 'added_where' condition. + 3. Add the new KEYUSEs into JOIN::keyuse. + 4. Re-sort and re-filter the JOIN::keyuse array with the newly added + KEYUSE elements. + + @retval REOPT_NEW_PLAN there is a new plan. + @retval REOPT_OLD_PLAN no new improved plan was produced, use the old one. + @retval REOPT_ERROR an irrecovarable error occurred during reoptimization. +*/ + +JOIN::enum_reopt_result +JOIN::reoptimize(Item *added_where, table_map join_tables, + Join_plan_state *save_to) +{ + DYNAMIC_ARRAY added_keyuse; + SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */ + size_t org_keyuse_elements; + + /* Re-run the REF optimizer to take into account the new conditions. */ + if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count, added_where, + ~outer_join, select_lex, &sargables)) + { + delete_dynamic(&added_keyuse); + return REOPT_ERROR; + } + + if (!added_keyuse.elements) + { + delete_dynamic(&added_keyuse); + return REOPT_OLD_PLAN; + } + + if (save_to) + save_query_plan(save_to); + else + reset_query_plan(); + + if (!keyuse.buffer && + my_init_dynamic_array(thd->mem_root->psi_key, &keyuse, sizeof(KEYUSE), + 20, 64, MYF(MY_THREAD_SPECIFIC))) + { + delete_dynamic(&added_keyuse); + return REOPT_ERROR; + } + + org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements; + allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements); + + /* If needed, add the access methods from the original query plan. */ + if (save_to) + { + DBUG_ASSERT(!keyuse.elements); + keyuse.elements= save_to->keyuse.elements; + if (size_t e= keyuse.elements) + memcpy(keyuse.buffer, + save_to->keyuse.buffer, e * keyuse.size_of_element); + } + + /* Add the new access methods to the keyuse array. */ + memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element, + added_keyuse.buffer, + (size_t) added_keyuse.elements * added_keyuse.size_of_element); + keyuse.elements+= added_keyuse.elements; + /* added_keyuse contents is copied, and it is no longer needed. */ + delete_dynamic(&added_keyuse); + + if (sort_and_filter_keyuse(this, &keyuse, true)) + return REOPT_ERROR; + optimize_keyuse(this, &keyuse); + + if (optimize_semijoin_nests(this, join_tables)) + return REOPT_ERROR; + + /* Re-run the join optimizer to compute a new query plan. */ + if (choose_plan(this, join_tables)) + return REOPT_ERROR; + + return REOPT_NEW_PLAN; +} + + +/** + Cache constant expressions in WHERE, HAVING, ON conditions. +*/ + +void JOIN::cache_const_exprs() +{ + uchar cache_flag= FALSE; + uchar *analyzer_arg= &cache_flag; + + /* No need in cache if all tables are constant. */ + if (const_tables == table_count) + return; + + if (conds) + conds->top_level_compile(thd, &Item::cache_const_expr_analyzer, &analyzer_arg, + &Item::cache_const_expr_transformer, &cache_flag); + cache_flag= FALSE; + if (having) + having->top_level_compile(thd, &Item::cache_const_expr_analyzer, + &analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag); + + for (JOIN_TAB *tab= first_depth_first_tab(this); tab; + tab= next_depth_first_tab(this, tab)) + { + if (*tab->on_expr_ref) + { + cache_flag= FALSE; + (*tab->on_expr_ref)->top_level_compile(thd, &Item::cache_const_expr_analyzer, + &analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag); + } + } +} + + +/* + Get the cost of using index keynr to read #LIMIT matching rows + + @detail + - If there is a quick select, we try to use it. + - if there is a ref(const) access, we try to use it, too. + - quick and ref(const) use different cost formulas, so if both are possible + we should make a cost-based choice. + + rows_limit is the number of rows we would need to read when using a full + index scan. This is generally higher than the N from "LIMIT N" clause, + because there's a WHERE condition (a part of which is used to construct a + range access we are considering using here) + + @param tab JOIN_TAB with table access (is NULL for single-table + UPDATE/DELETE) + @param rows_limit See explanation above + @param read_time OUT Cost of reading using quick or ref(const) access. + + + @return + true There was a possible quick or ref access, its cost is in the OUT + parameters. + false No quick or ref(const) possible (and so, the caller will attempt + to use a full index scan on this index). +*/ + +static bool get_range_limit_read_cost(const JOIN_TAB *tab, + const TABLE *table, + ha_rows table_records, + uint keynr, + ha_rows rows_limit, + double *read_time) +{ + bool res= false; + /* + We need to adjust the estimates if we had a quick select (or ref(const)) on + index keynr. + */ + if (table->opt_range_keys.is_set(keynr)) + { + /* + Start from quick select's rows and cost. These are always cheaper than + full index scan/cost. + */ + double best_rows= (double) table->opt_range[keynr].rows; + double best_cost= (double) table->opt_range[keynr].cost; + + /* + Check if ref(const) access was possible on this index. + */ + if (tab) + { + key_part_map map= 1; + uint kp; + /* Find how many key parts would be used by ref(const) */ + for (kp=0; kp < MAX_REF_PARTS; map=map << 1, kp++) + { + if (!(table->const_key_parts[keynr] & map)) + break; + } + + if (kp > 0) + { + ha_rows ref_rows; + /* + Two possible cases: + 1. ref(const) uses the same #key parts as range access. + 2. ref(const) uses fewer key parts, becasue there is a + range_cond(key_part+1). + */ + if (kp == table->opt_range[keynr].key_parts) + ref_rows= table->opt_range[keynr].rows; + else + ref_rows= (ha_rows) table->key_info[keynr].actual_rec_per_key(kp-1); + + if (ref_rows > 0) + { + double tmp= cost_for_index_read(tab->join->thd, table, keynr, + ref_rows, + (ha_rows) tab->worst_seeks); + if (tmp < best_cost) + { + best_cost= tmp; + best_rows= (double)ref_rows; + } + } + } + } + + /* + Consider an example: + + SELECT * + FROM t1 + WHERE key1 BETWEEN 10 AND 20 AND col2='foo' + ORDER BY key1 LIMIT 10 + + If we were using a full index scan on key1, we would need to read this + many rows to get 10 matches: + + 10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo') + + This is the number we get in rows_limit. + But we intend to use range access on key1. The rows returned by quick + select will satisfy the range part of the condition, + "key1 BETWEEN 10 and 20". We will still need to filter them with + the remainder condition, (col2='foo'). + + The selectivity of the range access is (best_rows/table_records). We need + to discount it from the rows_limit: + */ + double rows_limit_for_quick= rows_limit * (best_rows / table_records); + + if (best_rows > rows_limit_for_quick) + { + /* + LIMIT clause specifies that we will need to read fewer records than + quick select will return. Assume that quick select's cost is + proportional to the number of records we need to return (e.g. if we + only need 1/3rd of records, it will cost us 1/3rd of quick select's + read time) + */ + best_cost *= rows_limit_for_quick / best_rows; + } + *read_time= best_cost; + res= true; + } + return res; +} + + +/** + Find a cheaper access key than a given @a key + + @param tab NULL or JOIN_TAB of the accessed table + @param order Linked list of ORDER BY arguments + @param table Table if tab == NULL or tab->table + @param usable_keys Key map to find a cheaper key in + @param ref_key + 0 <= key < MAX_KEY - Key that is currently used for finding + row + MAX_KEY - means index_merge is used + -1 - means we're currently not using an + index to find rows. + + @param select_limit LIMIT value + @param [out] new_key Key number if success, otherwise undefined + @param [out] new_key_direction Return -1 (reverse) or +1 if success, + otherwise undefined + @param [out] new_select_limit Return adjusted LIMIT + @param [out] new_used_key_parts NULL by default, otherwise return number + of new_key prefix columns if success + or undefined if the function fails + @param [out] saved_best_key_parts NULL by default, otherwise preserve the + value for further use in QUICK_SELECT_DESC + + @note + This function takes into account table->opt_range_condition_rows statistic + (that is calculated by the make_join_statistics function). + However, single table procedures such as mysql_update() and mysql_delete() + never call make_join_statistics, so they have to update it manually + (@see get_index_for_order()). +*/ + +static bool +test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, + key_map usable_keys, int ref_key, + ha_rows select_limit_arg, + int *new_key, int *new_key_direction, + ha_rows *new_select_limit, uint *new_used_key_parts, + uint *saved_best_key_parts) +{ + DBUG_ENTER("test_if_cheaper_ordering"); + /* + Check whether there is an index compatible with the given order + usage of which is cheaper than usage of the ref_key index (ref_key>=0) + or a table scan. + It may be the case if ORDER/GROUP BY is used with LIMIT. + */ + ha_rows best_select_limit= HA_POS_ERROR; + JOIN *join= tab ? tab->join : NULL; + uint nr; + key_map keys; + uint best_key_parts= 0; + int best_key_direction= 0; + ha_rows best_records= 0; + double read_time; + int best_key= -1; + bool is_best_covering= FALSE; + double fanout= 1; + ha_rows table_records= table->stat_records(); + bool group= join && join->group && order == join->group_list; + ha_rows refkey_rows_estimate= table->opt_range_condition_rows; + const bool has_limit= (select_limit_arg != HA_POS_ERROR); + THD* thd= join ? join->thd : table->in_use; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_cheaper_ordering( + thd, "reconsidering_access_paths_for_index_ordering"); + trace_cheaper_ordering.add("clause", group ? "GROUP BY" : "ORDER BY"); + + /* + If not used with LIMIT, only use keys if the whole query can be + resolved with a key; This is because filesort() is usually faster than + retrieving all rows through an index. + */ + if (select_limit_arg >= table_records) + { + keys= *table->file->keys_to_use_for_scanning(); + keys.merge(table->covering_keys); + + /* + We are adding here also the index specified in FORCE INDEX clause, + if any. + This is to allow users to use index in ORDER BY. + */ + if (table->force_index) + keys.merge(group ? table->keys_in_use_for_group_by : + table->keys_in_use_for_order_by); + keys.intersect(usable_keys); + } + else + keys= usable_keys; + + if (join) + { + uint tablenr= (uint)(tab - join->join_tab); + read_time= join->best_positions[tablenr].read_time; + for (uint i= tablenr+1; i < join->table_count; i++) + { + fanout*= join->best_positions[i].records_read; // fanout is always >= 1 + // But selectivity is =< 1 : + fanout*= join->best_positions[i].cond_selectivity; + } + } + else + read_time= table->file->scan_time(); + + trace_cheaper_ordering.add("fanout", fanout); + /* + TODO: add cost of sorting here. + */ + read_time += COST_EPS; + trace_cheaper_ordering.add("read_time", read_time); + /* + Calculate the selectivity of the ref_key for REF_ACCESS. For + RANGE_ACCESS we use table->opt_range_condition_rows. + */ + if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) + { + /* + If ref access uses keypart=const for all its key parts, + and quick select uses the same # of key parts, then they are equivalent. + Reuse #rows estimate from quick select as it is more precise. + */ + if (tab->ref.const_ref_part_map == + make_prev_keypart_map(tab->ref.key_parts) && + table->opt_range_keys.is_set(ref_key) && + table->opt_range[ref_key].key_parts == tab->ref.key_parts) + refkey_rows_estimate= table->opt_range[ref_key].rows; + else + { + const KEY *ref_keyinfo= table->key_info + ref_key; + refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1]; + } + set_if_bigger(refkey_rows_estimate, 1); + } + + if (tab) + trace_cheaper_ordering.add_table_name(tab); + else + trace_cheaper_ordering.add_table_name(table); + trace_cheaper_ordering.add("rows_estimation", refkey_rows_estimate); + + Json_writer_array possible_keys(thd,"possible_keys"); + for (nr=0; nr < table->s->keys ; nr++) + { + int direction; + ha_rows select_limit= select_limit_arg; + uint used_key_parts= 0; + Json_writer_object possible_key(thd); + possible_key.add("index", table->key_info[nr].name); + + if (keys.is_set(nr) && + (direction= test_if_order_by_key(join, order, table, nr, + &used_key_parts))) + { + /* + At this point we are sure that ref_key is a non-ordering + key (where "ordering key" is a key that will return rows + in the order required by ORDER BY). + */ + DBUG_ASSERT (ref_key != (int) nr); + + possible_key.add("can_resolve_order", true); + possible_key.add("direction", direction); + bool is_covering= (table->covering_keys.is_set(nr) || + (table->file->index_flags(nr, 0, 1) & + HA_CLUSTERED_INDEX)); + /* + Don't use an index scan with ORDER BY without limit. + For GROUP BY without limit always use index scan + if there is a suitable index. + Why we hold to this asymmetry hardly can be explained + rationally. It's easy to demonstrate that using + temporary table + filesort could be cheaper for grouping + queries too. + */ + if (is_covering || + select_limit != HA_POS_ERROR || + (ref_key < 0 && (group || table->force_index))) + { + double rec_per_key; + double index_scan_time; + KEY *keyinfo= table->key_info+nr; + if (select_limit == HA_POS_ERROR) + select_limit= table_records; + if (group) + { + /* + Used_key_parts can be larger than keyinfo->user_defined_key_parts + when using a secondary index clustered with a primary + key (e.g. as in Innodb). + See Bug #28591 for details. + */ + uint used_index_parts= keyinfo->user_defined_key_parts; + uint used_pk_parts= 0; + if (used_key_parts > used_index_parts) + used_pk_parts= used_key_parts-used_index_parts; + rec_per_key= used_key_parts ? + keyinfo->actual_rec_per_key(used_key_parts-1) : 1; + /* Take into account the selectivity of the used pk prefix */ + if (used_pk_parts) + { + KEY *pkinfo=tab->table->key_info+table->s->primary_key; + /* + If the values of of records per key for the prefixes + of the primary key are considered unknown we assume + they are equal to 1. + */ + if (used_key_parts == pkinfo->user_defined_key_parts || + pkinfo->rec_per_key[0] == 0) + rec_per_key= 1; + if (rec_per_key > 1) + { + rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1); + rec_per_key/= pkinfo->actual_rec_per_key(0); + /* + The value of rec_per_key for the extended key has + to be adjusted accordingly if some components of + the secondary key are included in the primary key. + */ + for(uint i= 1; i < used_pk_parts; i++) + { + if (pkinfo->key_part[i].field->key_start.is_set(nr)) + { + /* + We presume here that for any index rec_per_key[i] != 0 + if rec_per_key[0] != 0. + */ + DBUG_ASSERT(pkinfo->actual_rec_per_key(i)); + rec_per_key*= pkinfo->actual_rec_per_key(i-1); + rec_per_key/= pkinfo->actual_rec_per_key(i); + } + } + } + } + set_if_bigger(rec_per_key, 1); + /* + With a grouping query each group containing on average + rec_per_key records produces only one row that will + be included into the result set. + */ + if (select_limit > table_records/rec_per_key) + select_limit= table_records; + else + select_limit= (ha_rows) (select_limit*rec_per_key); + } /* group */ + + /* + If tab=tk is not the last joined table tn then to get first + L records from the result set we can expect to retrieve + only L/fanout(tk,tn) where fanout(tk,tn) says how many + rows in the record set on average will match each row tk. + Usually our estimates for fanouts are too pessimistic. + So the estimate for L/fanout(tk,tn) will be too optimistic + and as result we'll choose an index scan when using ref/range + access + filesort will be cheaper. + */ + select_limit= (ha_rows) (select_limit < fanout ? + 1 : select_limit/fanout); + + /* + refkey_rows_estimate is E(#rows) produced by the table access + strategy that was picked without regard to ORDER BY ... LIMIT. + + It will be used as the source of selectivity data. + Use table->cond_selectivity as a better estimate which includes + condition selectivity too. + */ + { + // we use MIN(...), because "Using LooseScan" queries have + // cond_selectivity=1 while refkey_rows_estimate has a better + // estimate. + refkey_rows_estimate= MY_MIN(refkey_rows_estimate, + ha_rows(table_records * + table->cond_selectivity)); + } + + /* + We assume that each of the tested indexes is not correlated + with ref_key. Thus, to select first N records we have to scan + N/selectivity(ref_key) index entries. + selectivity(ref_key) = #scanned_records/#table_records = + refkey_rows_estimate/table_records. + In any case we can't select more than #table_records. + N/(refkey_rows_estimate/table_records) > table_records + <=> N > refkey_rows_estimate. + */ + + if (select_limit > refkey_rows_estimate) + select_limit= table_records; + else + select_limit= (ha_rows) (select_limit * + (double) table_records / + refkey_rows_estimate); + possible_key.add("updated_limit", select_limit); + rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1); + set_if_bigger(rec_per_key, 1); + /* + Here we take into account the fact that rows are + accessed in sequences rec_per_key records in each. + Rows in such a sequence are supposed to be ordered + by rowid/primary key. When reading the data + in a sequence we'll touch not more pages than the + table file contains. + TODO. Use the formula for a disk sweep sequential access + to calculate the cost of accessing data rows for one + index entry. + */ + index_scan_time= select_limit/rec_per_key * + MY_MIN(rec_per_key, table->file->scan_time()); + double range_scan_time; + if (get_range_limit_read_cost(tab, table, table_records, nr, + select_limit, &range_scan_time)) + { + possible_key.add("range_scan_time", range_scan_time); + if (range_scan_time < index_scan_time) + index_scan_time= range_scan_time; + } + possible_key.add("index_scan_time", index_scan_time); + + if ((ref_key < 0 && (group || table->force_index || is_covering)) || + index_scan_time < read_time) + { + ha_rows quick_records= table_records; + ha_rows refkey_select_limit= (ref_key >= 0 && + !is_hash_join_key_no(ref_key) && + table->covering_keys.is_set(ref_key)) ? + refkey_rows_estimate : + HA_POS_ERROR; + if (is_best_covering && !is_covering) + { + possible_key.add("chosen", false); + possible_key.add("cause", "covering index already found"); + continue; + } + + if (is_covering && refkey_select_limit < select_limit) + { + possible_key.add("chosen", false); + possible_key.add("cause", "ref estimates better"); + continue; + } + if (table->opt_range_keys.is_set(nr)) + quick_records= table->opt_range[nr].rows; + possible_key.add("records", quick_records); + if (best_key < 0 || + (select_limit <= MY_MIN(quick_records,best_records) ? + keyinfo->user_defined_key_parts < best_key_parts : + quick_records < best_records) || + (!is_best_covering && is_covering)) + { + possible_key.add("chosen", true); + best_key= nr; + best_key_parts= keyinfo->user_defined_key_parts; + if (saved_best_key_parts) + *saved_best_key_parts= used_key_parts; + best_records= quick_records; + is_best_covering= is_covering; + best_key_direction= direction; + best_select_limit= select_limit; + } + else + { + char const *cause; + possible_key.add("chosen", false); + if (is_covering) + cause= "covering index already found"; + else + { + if (select_limit <= MY_MIN(quick_records,best_records)) + cause= "keyparts greater than the current best keyparts"; + else + cause= "rows estimation greater"; + } + possible_key.add("cause", cause); + } + } + else + { + possible_key.add("usable", false); + possible_key.add("cause", "cost"); + } + } + else + { + possible_key.add("usable", false); + if (!group && select_limit == HA_POS_ERROR) + possible_key.add("cause", "order by without limit"); + } + } + else + { + if (keys.is_set(nr)) + { + possible_key.add("can_resolve_order", false); + possible_key.add("cause", "order can not be resolved by key"); + } + else + { + possible_key.add("can_resolve_order", false); + possible_key.add("cause", "not usable index for the query"); + } + } + } + + if (best_key < 0 || best_key == ref_key) + DBUG_RETURN(FALSE); + + *new_key= best_key; + *new_key_direction= best_key_direction; + *new_select_limit= has_limit ? best_select_limit : table_records; + if (new_used_key_parts != NULL) + *new_used_key_parts= best_key_parts; + DBUG_RETURN(TRUE); +} + + +/** + Find a key to apply single table UPDATE/DELETE by a given ORDER + + @param order Linked list of ORDER BY arguments + @param table Table to find a key + @param select Pointer to access/update select->quick (if any) + @param limit LIMIT clause parameter + @param [out] scanned_limit How many records we expect to scan + Valid if *need_sort=FALSE. + @param [out] need_sort TRUE if filesort needed + @param [out] reverse + TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY) + + @return + - MAX_KEY if no key found (need_sort == TRUE) + - MAX_KEY if quick select result order is OK (need_sort == FALSE) + - key number (either index scan or quick select) (need_sort == FALSE) + + @note + Side effects: + - may deallocate or deallocate and replace select->quick; + - may set table->opt_range_condition_rows and table->quick_rows[...] + to table->file->stats.records. +*/ + +uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, + ha_rows limit, ha_rows *scanned_limit, + bool *need_sort, bool *reverse) +{ + if (!order) + { + *need_sort= FALSE; + if (select && select->quick) + return select->quick->index; // index or MAX_KEY, use quick select as is + else + return table->file->key_used_on_scan; // MAX_KEY or index for some engines + } + + if (!is_simple_order(order)) // just to cut further expensive checks + { + *need_sort= TRUE; + return MAX_KEY; + } + + if (select && select->quick) + { + if (select->quick->index == MAX_KEY) + { + *need_sort= TRUE; + return MAX_KEY; + } + + uint used_key_parts; + switch (test_if_order_by_key(NULL, order, table, select->quick->index, + &used_key_parts)) { + case 1: // desired order + *need_sort= FALSE; + *scanned_limit= MY_MIN(limit, select->quick->records); + return select->quick->index; + case 0: // unacceptable order + *need_sort= TRUE; + return MAX_KEY; + case -1: // desired order, but opposite direction + { + QUICK_SELECT_I *reverse_quick; + if ((reverse_quick= + select->quick->make_reverse(used_key_parts))) + { + select->set_quick(reverse_quick); + *need_sort= FALSE; + *scanned_limit= MY_MIN(limit, select->quick->records); + return select->quick->index; + } + else + { + *need_sort= TRUE; + return MAX_KEY; + } + } + } + DBUG_ASSERT(0); + } + else if (limit != HA_POS_ERROR) + { // check if some index scan & LIMIT is more efficient than filesort + + /* + Update opt_range_condition_rows since single table UPDATE/DELETE + procedures don't call make_join_statistics() and leave this + variable uninitialized. + */ + table->opt_range_condition_rows= table->stat_records(); + + int key, direction; + if (test_if_cheaper_ordering(NULL, order, table, + table->keys_in_use_for_order_by, -1, + limit, + &key, &direction, &limit) && + !is_key_used(table, key, table->write_set)) + { + *need_sort= FALSE; + *scanned_limit= limit; + *reverse= (direction < 0); + return key; + } + } + *need_sort= TRUE; + return MAX_KEY; +} + + +/* + Count how many times the specified conditions are true for first rows_to_read + rows of the table. + + @param thd Thread handle + @param rows_to_read How many rows to sample + @param table Table to use + @conds conds INOUT List of conditions and counters for them + + @return Number of we've checked. It can be equal or less than rows_to_read. + 0 is returned for error or when the table had no rows. +*/ + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List *conds) +{ + ulong count= 0; + COND_STATISTIC *cond; + List_iterator_fast it(*conds); + handler *file= table->file; + uchar *record= table->record[0]; + int error= 0; + DBUG_ENTER("check_selectivity"); + + DBUG_ASSERT(rows_to_read > 0); + while ((cond= it++)) + { + DBUG_ASSERT(cond->cond); + DBUG_ASSERT(cond->cond->used_tables() == table->map); + cond->positive= 0; + } + it.rewind(); + + if (unlikely(file->ha_rnd_init_with_error(1))) + DBUG_RETURN(0); + do + { + error= file->ha_rnd_next(record); + + if (unlikely(thd->killed)) + { + thd->send_kill_message(); + count= 0; + goto err; + } + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + + count++; + while ((cond= it++)) + { + if (cond->cond->val_bool()) + cond->positive++; + } + it.rewind(); + + } while (count < rows_to_read); + + file->ha_rnd_end(); + DBUG_RETURN(count); + +err: + DBUG_PRINT("error", ("error %d", error)); + file->ha_rnd_end(); + DBUG_RETURN(0); +} + +/**************************************************************************** + AGGR_OP implementation +****************************************************************************/ + +/** + @brief Instantiate tmp table for aggregation and start index scan if needed + @todo Tmp table always would be created, even for empty result. Extend + executor to avoid tmp table creation when no rows were written + into tmp table. + @return + true error + false ok +*/ + +bool +AGGR_OP::prepare_tmp_table() +{ + TABLE *table= join_tab->table; + JOIN *join= join_tab->join; + int rc= 0; + + if (!join_tab->table->is_created()) + { + if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + join->select_options)) + return true; + (void) table->file->extra(HA_EXTRA_WRITE_CACHE); + } + /* If it wasn't already, start index scan for grouping using table index. */ + if (!table->file->inited && table->group && + join_tab->tmp_table_param->sum_func_count && table->s->keys) + rc= table->file->ha_index_init(0, 0); + else + { + /* Start index scan in scanning mode */ + rc= table->file->ha_rnd_init(true); + } + if (rc) + { + table->file->print_error(rc, MYF(0)); + return true; + } + return false; +} + + +/** + @brief Prepare table if necessary and call write_func to save record + + @param end_of_records the end_of_record signal to pass to the writer + + @return return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +AGGR_OP::put_record(bool end_of_records) +{ + // Lasy tmp table creation/initialization + if (!join_tab->table->file->inited) + if (prepare_tmp_table()) + return NESTED_LOOP_ERROR; + enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab, + end_of_records); + return rc; +} + + +/** + @brief Finish rnd/index scan after accumulating records, switch ref_array, + and send accumulated records further. + @return return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +AGGR_OP::end_send() +{ + enum_nested_loop_state rc= NESTED_LOOP_OK; + TABLE *table= join_tab->table; + JOIN *join= join_tab->join; + + // All records were stored, send them further + int tmp, new_errno= 0; + + if ((rc= put_record(true)) < NESTED_LOOP_OK) + return rc; + + if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE))) + { + DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed")); + new_errno= tmp; + } + if ((tmp= table->file->ha_index_or_rnd_end())) + { + DBUG_PRINT("error",("ha_index_or_rnd_end() failed")); + new_errno= tmp; + } + if (new_errno) + { + table->file->print_error(new_errno,MYF(0)); + return NESTED_LOOP_ERROR; + } + + // Update ref array + join_tab->join->set_items_ref_array(*join_tab->ref_array); + bool keep_last_filesort_result = join_tab->filesort ? false : true; + if (join_tab->window_funcs_step) + { + if (join_tab->window_funcs_step->exec(join, keep_last_filesort_result)) + return NESTED_LOOP_ERROR; + } + + table->reginfo.lock_type= TL_UNLOCK; + + bool in_first_read= true; + + /* + Reset the counter before copying rows from internal temporary table to + INSERT table. + */ + join_tab->join->thd->get_stmt_da()->reset_current_row_for_warning(1); + while (rc == NESTED_LOOP_OK) + { + int error; + if (in_first_read) + { + in_first_read= false; + error= join_init_read_record(join_tab); + } + else + error= join_tab->read_record.read_record(); + + if (unlikely(error > 0 || (join->thd->is_error()))) // Fatal error + rc= NESTED_LOOP_ERROR; + else if (error < 0) + break; + else if (unlikely(join->thd->killed)) // Aborted by user + { + join->thd->send_kill_message(); + rc= NESTED_LOOP_KILLED; + } + else + { + rc= evaluate_join_record(join, join_tab, 0); + } + } + + if (keep_last_filesort_result) + { + delete join_tab->filesort_result; + join_tab->filesort_result= NULL; + } + + // Finish rnd scn after sending records + if (join_tab->table->file->inited) + join_tab->table->file->ha_rnd_end(); + + return rc; +} + + +/** + @brief + Remove marked top conjuncts of a condition + + @param thd The thread handle + @param cond The condition which subformulas are to be removed + + @details + The function removes all top conjuncts marked with the flag + MARKER_FULL_EXTRACTION from the condition 'cond'. The resulting + formula is returned a the result of the function + If 'cond' s marked with such flag the function returns 0. + The function clear the extraction flags for the removed + formulas + + @retval + condition without removed subformulas + 0 if the whole 'cond' is removed +*/ + +Item *remove_pushed_top_conjuncts(THD *thd, Item *cond) +{ + if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + cond->clear_extraction_flag(); + return 0; + } + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + item->clear_extraction_flag(); + li.remove(); + } + } + switch (((Item_cond*) cond)->argument_list()->elements) + { + case 0: + return 0; + case 1: + return ((Item_cond*) cond)->argument_list()->head(); + default: + return cond; + } + } + } + return cond; +} + + +/* + There are 5 cases in which we shortcut the join optimization process as we + conclude that the join would be a degenerate one + 1) IMPOSSIBLE WHERE + 2) MIN/MAX optimization (@see opt_sum_query) + 3) EMPTY CONST TABLE + If a window function is present in any of the above cases then to get the + result of the window function, we need to execute it. So we need to + create a temporary table for its execution. Here we need to take in mind + that aggregate functions and non-aggregate function need not be executed. + +*/ + +void JOIN::handle_implicit_grouping_with_window_funcs() +{ + if (select_lex->have_window_funcs() && send_row_on_empty_set()) + { + const_tables= top_join_tab_count= table_count= 0; + } +} + + + +/* + @brief + Perform a partial cleanup for the JOIN_TAB structure + + @note + this is used to cleanup resources for the re-execution of correlated + subqueries. +*/ +void JOIN_TAB::partial_cleanup() +{ + if (!table) + return; + + if (table->is_created()) + { + table->file->ha_index_or_rnd_end(); + DBUG_PRINT("info", ("close index: %s.%s alias: %s", + table->s->db.str, + table->s->table_name.str, + table->alias.c_ptr())); + if (aggr) + { + int tmp= 0; + if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE))) + table->file->print_error(tmp, MYF(0)); + } + } + delete filesort_result; + filesort_result= NULL; + free_cache(&read_record); +} + +/** + @brief + Construct not null conditions for provingly not nullable fields + + @details + For each non-constant joined table the function creates a conjunction + of IS NOT NULL predicates containing a predicate for each field used + in the WHERE clause or an OR expression such that + - is declared as nullable + - for which it can proved be that it is null-rejected + - is a part of some index. + This conjunction could be anded with either the WHERE condition or with + an ON expression and the modified join query would produce the same + result set as the original one. + If a conjunction of IS NOT NULL predicates is constructed for an inner + table of an outer join OJ that is not an inner table of embedded outer + joins then it is to be anded with the ON expression of OJ. + The constructed conjunctions of IS NOT NULL predicates are attached + to the corresponding tables. They used for range analysis complementary + to other sargable range conditions. + + @note + Let f be a field of the joined table t. In the context of the upper + paragraph field f is called null-rejected if any the following holds: + + - t is a table of a top inner join and a conjunctive formula that rejects + rows with null values for f can be extracted from the WHERE condition + + - t is an outer table of a top outer join operation and a conjunctive + formula over the outer tables of the outer join that rejects rows with + null values for can be extracted from the WHERE condition + + - t is an outer table of a non-top outer join operation and a conjunctive + formula over the outer tables of the outer join that rejects rows with + null values for f can be extracted from the ON expression of the + embedding outer join + + - the joined table is an inner table of a outer join operation and + a conjunctive formula over inner tables of the outer join that rejects + rows with null values for f can be extracted from the ON expression of + the outer join operation. + + It is assumed above that all inner join nests have been eliminated and + that all possible conversions of outer joins into inner joins have been + already done. +*/ + +void JOIN::make_notnull_conds_for_range_scans() +{ + DBUG_ENTER("JOIN::make_notnull_conds_for_range_scans"); + + if (impossible_where || + !optimizer_flag(thd, OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN)) + { + /* Complementary range analysis is not needed */ + DBUG_VOID_RETURN; + } + + if (conds && build_notnull_conds_for_range_scans(this, conds, + conds->used_tables())) + { + /* + Found a IS NULL conjunctive predicate for a null-rejected field + in the WHERE clause + */ + conds= (Item*) Item_false; + cond_equal= 0; + impossible_where= true; + DBUG_VOID_RETURN; + } + + List_iterator li(*join_list); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (tbl->on_expr) + { + if (tbl->nested_join) + { + build_notnull_conds_for_inner_nest_of_outer_join(this, tbl); + } + else if (build_notnull_conds_for_range_scans(this, tbl->on_expr, + tbl->table->map)) + { + /* + Found a IS NULL conjunctive predicate for a null-rejected field + of the inner table of an outer join with ON expression tbl->on_expr + */ + tbl->on_expr= (Item*) Item_false; + } + } + } + DBUG_VOID_RETURN; +} + + +/** + @brief + Build not null conditions for range scans of given join tables + + @param join the join for whose tables not null conditions are to be built + @param cond the condition from which not null predicates are to be inferred + @param allowed the bit map of join tables to be taken into account + + @details + For each join table t from the 'allowed' set of tables the function finds + all fields whose null-rejectedness can be inferred from null-rejectedness + of the condition cond. For each found field f from table t such that it + participates at least in one index on table t a NOT NULL predicate is + constructed and a conjunction of all such predicates is attached to t. + If when looking for null-rejecting fields of t it is discovered one of its + fields has to be null-rejected and there is IS NULL conjunctive top level + predicate for this field then the function immediately returns true. + The function uses the bitmap TABLE::tmp_set to mark found null-rejected + fields of table t. + + @note + Currently only top level conjuncts without disjunctive sub-formulas are + are taken into account when looking for null-rejected fields. + + @retval + true if a contradiction is inferred + false otherwise +*/ + +static +bool build_notnull_conds_for_range_scans(JOIN *join, Item *cond, + table_map allowed) +{ + THD *thd= join->thd; + DBUG_ENTER("build_notnull_conds_for_range_scans"); + + for (JOIN_TAB *s= join->join_tab; + s < join->join_tab + join->table_count ; s++) + { + /* Clear all needed bitmaps to mark found fields */ + if ((allowed & s->table->map) && + !(s->table->map & join->const_table_map)) + bitmap_clear_all(&s->table->tmp_set); + } + + /* + Find all null-rejected fields assuming that cond is null-rejected and + only formulas over tables from 'allowed' are to be taken into account + */ + if (cond->find_not_null_fields(allowed)) + DBUG_RETURN(true); + + /* + For each table t from 'allowed' build a conjunction of NOT NULL predicates + constructed for all found fields if they are included in some indexes. + If the construction of the conjunction succeeds attach the formula to + t->table->notnull_cond. The condition will be used to look for + complementary range scans. + */ + for (JOIN_TAB *s= join->join_tab ; + s < join->join_tab + join->table_count ; s++) + { + TABLE *tab= s->table; + List notnull_list; + Item *notnull_cond= 0; + + if (!(allowed & tab->map) || + (s->table->map && join->const_table_map)) + continue; + + for (Field** field_ptr= tab->field; *field_ptr; field_ptr++) + { + Field *field= *field_ptr; + if (field->part_of_key.is_clear_all()) + continue; + if (!bitmap_is_set(&tab->tmp_set, field->field_index)) + continue; + Item_field *field_item= new (thd->mem_root) Item_field(thd, field); + if (!field_item) + continue; + Item *isnotnull_item= + new (thd->mem_root) Item_func_isnotnull(thd, field_item); + if (!isnotnull_item) + continue; + if (notnull_list.push_back(isnotnull_item, thd->mem_root)) + continue; + s->const_keys.merge(field->part_of_key); + } + + switch (notnull_list.elements) { + case 0: + break; + case 1: + notnull_cond= notnull_list.head(); + break; + default: + notnull_cond= + new (thd->mem_root) Item_cond_and(thd, notnull_list); + } + if (notnull_cond && !notnull_cond->fix_fields(thd, 0)) + { + tab->notnull_cond= notnull_cond; + } + } + DBUG_RETURN(false); +} + + +/** + @brief + Build not null conditions for inner nest tables of an outer join + + @param join the join for whose table nest not null conditions are to be built + @param nest_tbl the nest of the inner tables of an outer join + + @details + The function assumes that nest_tbl is the nest of the inner tables of an + outer join and so an ON expression for this outer join is attached to + nest_tbl. + The function selects the tables of the nest_tbl that are not inner tables of + embedded outer joins and then it calls build_notnull_conds_for_range_scans() + for nest_tbl->on_expr and the bitmap for the selected tables. This call + finds all fields belonging to the selected tables whose null-rejectedness + can be inferred from the null-rejectedness of nest_tbl->on_expr. After this + the function recursively finds all null_rejected fields for the remaining + tables from the nest of nest_tbl. +*/ + +static +void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, + TABLE_LIST *nest_tbl) +{ + TABLE_LIST *tbl; + table_map used_tables= 0; + List_iterator li(nest_tbl->nested_join->join_list); + + while ((tbl= li++)) + { + if (!tbl->on_expr) + used_tables|= tbl->table->map; + } + if (used_tables && + build_notnull_conds_for_range_scans(join, nest_tbl->on_expr, used_tables)) + { + nest_tbl->on_expr= (Item*) Item_false; + } + + li.rewind(); + while ((tbl= li++)) + { + if (tbl->on_expr) + { + if (tbl->nested_join) + { + build_notnull_conds_for_inner_nest_of_outer_join(join, tbl); + } + else if (build_notnull_conds_for_range_scans(join, tbl->on_expr, + tbl->table->map)) + tbl->on_expr= (Item*) Item_false; + } + } +} + + +/* + @brief + Initialize join cache and enable keyread +*/ +void JOIN::init_join_cache_and_keyread() +{ + JOIN_TAB *tab; + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + TABLE *table= tab->table; + switch (tab->type) { + case JT_SYSTEM: + case JT_CONST: + case JT_FT: + case JT_UNKNOWN: + case JT_MAYBE_REF: + break; + case JT_EQ_REF: + case JT_REF_OR_NULL: + case JT_REF: + if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) + table->file->ha_start_keyread(tab->ref.key); + break; + case JT_HASH: + case JT_ALL: + SQL_SELECT *select; + select= tab->select ? tab->select : + (tab->filesort ? tab->filesort->select : NULL); + if (select && select->quick && select->quick->index != MAX_KEY && + table->covering_keys.is_set(select->quick->index) && + !table->no_keyread) + table->file->ha_start_keyread(select->quick->index); + break; + case JT_HASH_NEXT: + case JT_NEXT: + if ((tab->read_first_record == join_read_first || + tab->read_first_record == join_read_last) && + table->covering_keys.is_set(tab->index) && + !table->no_keyread) + { + DBUG_ASSERT(!tab->filesort); + table->file->ha_start_keyread(tab->index); + } + break; + default: + break; + /* purecov: end */ + } + + if (table->file->keyread_enabled()) + { + /* + Here we set the read_set bitmap for all covering keys + except CLUSTERED indexes, with all the key-parts inside the key. + This is needed specifically for an index that contains virtual column. + + Example: + Lets say we have this query + SELECT b FROM t1; + + and the table definition is like + CREATE TABLE t1( + a varchar(10) DEFAULT NULL, + b varchar(255) GENERATED ALWAYS AS (a) VIRTUAL, + KEY key1 (b)); + + So we a virtual column b and an index key1 defined on the virtual + column. So if a query uses a vcol, base columns that it + depends on are automatically added to the read_set - because they're + needed to calculate the vcol. + But if we're doing keyread, vcol is taken + from the index, not calculated, and base columns do not need to be + in the read set. To ensure this we try to set the read_set to only + the key-parts of the indexes. + + Another side effect of this is + Lets say you have a query + select a, b from t1 + and there is an index key1 (a,b,c) + then as key1 is covering and we would have the keyread enable for + this key, so the below call will also set the read_set for column + c, which is not a problem as we read all the columns from the index + tuple. + */ + if (!(table->file->index_flags(table->file->keyread, 0, 1) & HA_CLUSTERED_INDEX)) + table->mark_index_columns(table->file->keyread, table->read_set); + } + if (tab->cache && tab->cache->init(select_options & SELECT_DESCRIBE)) + revise_cache_usage(tab); + else + tab->remove_redundant_bnl_scan_conds(); + } +} + + +/* + @brief + Unpack temp table fields to base table fields. +*/ + +void unpack_to_base_table_fields(TABLE *table) +{ + JOIN_TAB *tab= table->reginfo.join_tab; + for (Copy_field *cp= tab->read_record.copy_field; + cp != tab->read_record.copy_field_end; cp++) + (*cp->do_copy)(cp); +} + +/* + Call item->fix_after_optimize for all items registered in + lex->fix_after_optimize + + This is needed for items like ROWNUM(), which needs access to structures + created by the early optimizer pass, like JOIN +*/ + +static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex) +{ + List_iterator li(select_lex->fix_after_optimize); + + while (Item *item= li++) + item->fix_after_optimize(thd); +} + + +/* + Set a limit for the SELECT_LEX_UNIT based on ROWNUM usage. + The limit is shown in EXPLAIN +*/ + +static bool set_limit_for_unit(THD *thd, SELECT_LEX_UNIT *unit, ha_rows lim) +{ + SELECT_LEX *gpar= unit->global_parameters(); + if (gpar->limit_params.select_limit != 0 && + // limit can not be an expression but can be parameter + (!gpar->limit_params.select_limit->basic_const_item() || + ((ha_rows)gpar->limit_params.select_limit->val_int()) < lim)) + return false; + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + gpar->limit_params.select_limit= + new (thd->mem_root) Item_int(thd, lim, MAX_BIGINT_WIDTH); + if (gpar->limit_params.select_limit == 0) + return true; // EOM + + unit->set_limit(gpar); + + gpar->limit_params.explicit_limit= true; // to show in EXPLAIN + + if (arena) + thd->restore_active_arena(arena, &backup); + + return false; +} + + +/** + Check possibility of LIMIT setting by rownum() of upper SELECT and do it + + @note Ideal is to convert something like + SELECT ... + FROM (SELECT ...) table + WHERE rownum() < ; + to + SELECT ... + FROM (SELECT ... LIMIT ) table + WHERE rownum() < ; + + @retval true EOM + @retval false no errors +*/ + +bool JOIN::optimize_upper_rownum_func() +{ + DBUG_ASSERT(select_lex->master_unit()->derived); + + if (select_lex->master_unit()->first_select() != select_lex) + return false; // first will set parameter + + if (select_lex->master_unit()->global_parameters()-> + limit_params.offset_limit != NULL) + return false; // offset is set, we cannot set limit + + SELECT_LEX *outer_select= select_lex->master_unit()->outer_select(); + /* + Check that it is safe to use rownum-limit from the outer query + (the one that has 'WHERE rownum()...') + */ + if (outer_select == NULL || + !outer_select->with_rownum || + (outer_select->options & SELECT_DISTINCT) || + outer_select->table_list.elements != 1 || + outer_select->where == NULL || + outer_select->where->type() != Item::FUNC_ITEM) + return false; + + return process_direct_rownum_comparison(thd, unit, outer_select->where); +} + + +/** + Test if the predicate compares rownum() with a constant + + @return 1 No or invalid rownum() compare + @return 0 rownum() is compared with a constant. + In this case *args contains the constant and + *inv_order constains 1 if the rownum() was the right + argument, like in 'WHERE 2 >= rownum()'. +*/ + +static bool check_rownum_usage(Item_func *func_item, longlong *limit, + bool *inv_order) +{ + Item *arg1, *arg2; + *inv_order= 0; + DBUG_ASSERT(func_item->argument_count() == 2); + + /* 'rownum op const' or 'const op field' */ + arg1= func_item->arguments()[0]->real_item(); + if (arg1->type() == Item::FUNC_ITEM && + ((Item_func*) arg1)->functype() == Item_func::ROWNUM_FUNC) + { + arg2= func_item->arguments()[1]->real_item(); + if (arg2->can_eval_in_optimize()) + { + *limit= arg2->val_int(); + return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR; + } + } + else if (arg1->can_eval_in_optimize()) + { + arg2= func_item->arguments()[1]->real_item(); + if (arg2->type() == Item::FUNC_ITEM && + ((Item_func*) arg2)->functype() == Item_func::ROWNUM_FUNC) + { + *limit= arg1->val_int(); + *inv_order= 1; + return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR; + } + } + return 1; +} + + +/* + Limit optimization for ROWNUM() + + Go through the WHERE clause and find out if there are any of the following + constructs on the top level: + rownum() <= integer_constant + rownum() < integer_constant + rownum() = 1 + + If yes, then threat the select as if 'LIMIT integer_constant' would + have been used +*/ + +static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, + Item *cond) +{ + DBUG_ENTER("optimize_rownum"); + + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + optimize_rownum(thd, unit, item); + } + DBUG_VOID_RETURN; + } + + process_direct_rownum_comparison(thd, unit, cond); + DBUG_VOID_RETURN; +} + + +static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, + Item *cond) +{ + DBUG_ENTER("process_direct_rownum_comparison"); + if (cond->real_type() == Item::FUNC_ITEM) + { + Item_func *pred= (Item_func*) cond; + longlong limit; + bool inv; + + if (pred->argument_count() != 2) + DBUG_RETURN(false); // Not a compare functions + if (check_rownum_usage(pred, &limit, &inv)) + DBUG_RETURN(false); + + Item_func::Functype pred_type= pred->functype(); + + if (inv && pred_type != Item_func::EQ_FUNC) + { + if (pred_type == Item_func::GT_FUNC) // # > rownum() + pred_type= Item_func::LT_FUNC; + else if (pred_type == Item_func::GE_FUNC) // # >= rownum() + pred_type= Item_func::LE_FUNC; + else + DBUG_RETURN(false); + } + switch (pred_type) { + case Item_func::LT_FUNC: // rownum() < # + { + if (limit <= 0) + DBUG_RETURN(false); + DBUG_RETURN(set_limit_for_unit(thd, unit, limit - 1)); + case Item_func::LE_FUNC: + DBUG_RETURN(set_limit_for_unit(thd, unit, limit)); + case Item_func::EQ_FUNC: + if (limit == 1) + DBUG_RETURN(set_limit_for_unit(thd, unit, limit)); + break; + default: + break; + } + } + } + DBUG_RETURN(false); +} + +/** + @brief + Transform IN predicates having equal constant elements to equalities + + @param thd The context of the statement + + @details + If all elements in an IN predicate are constant and equal to each other + then clause + - "a IN (e1,..,en)" can be transformed to "a = e1" + - "a NOT IN (e1,..,en)" can be transformed to "a != e1". + This means an object of Item_func_in can be replaced with an object of + Item_func_eq for IN (e1,..,en) clause or Item_func_ne for + NOT IN (e1,...,en). + Such a replacement allows the optimizer to choose a better execution plan. + + This methods applies such transformation for each IN predicate of the WHERE + condition and ON expressions of this join where possible + + @retval + false success + true failure +*/ +bool JOIN::transform_in_predicates_into_equalities(THD *thd) +{ + DBUG_ENTER("JOIN::transform_in_predicates_into_equalities"); + DBUG_RETURN(transform_all_conds_and_on_exprs( + thd, &Item::in_predicate_to_equality_transformer)); +} + + +/** + @brief + Transform all items in WHERE and ON expressions using a given transformer + + @param thd The context of the statement + transformer Pointer to the transformation function + + @details + For each item of the WHERE condition and ON expressions of the SELECT + for this join the method performs the intransformation using the given + transformation function + + @retval + false success + true failure +*/ +bool JOIN::transform_all_conds_and_on_exprs(THD *thd, + Item_transformer transformer) +{ + if (conds) + { + conds= conds->top_level_transform(thd, transformer, (uchar *) 0); + if (!conds) + return true; + } + if (join_list) + { + if (transform_all_conds_and_on_exprs_in_join_list(thd, join_list, + transformer)) + return true; + } + return false; +} + + +bool JOIN::transform_all_conds_and_on_exprs_in_join_list( + THD *thd, List *join_list, Item_transformer transformer) +{ + TABLE_LIST *table; + List_iterator li(*join_list); + + while ((table= li++)) + { + if (table->nested_join) + { + if (transform_all_conds_and_on_exprs_in_join_list( + thd, &table->nested_join->join_list, transformer)) + return true; + } + if (table->on_expr) + { + table->on_expr= table->on_expr->top_level_transform(thd, transformer, 0); + if (!table->on_expr) + return true; + } + } + return false; +} + + +/** + @} (end of group Query_Optimizer) +*/ diff --git a/sql/sql_select.h b/sql/sql_select.h new file mode 100644 index 00000000..4f2719f1 --- /dev/null +++ b/sql/sql_select.h @@ -0,0 +1,2595 @@ +#ifndef SQL_SELECT_INCLUDED +#define SQL_SELECT_INCLUDED + +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + classes to use when handling where clause +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "procedure.h" +#include "sql_array.h" /* Array */ +#include "records.h" /* READ_RECORD */ +#include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */ +#include "filesort.h" + +#include "cset_narrowing.h" + +typedef struct st_join_table JOIN_TAB; +/* Values in optimize */ +#define KEY_OPTIMIZE_EXISTS 1U +#define KEY_OPTIMIZE_REF_OR_NULL 2U +#define KEY_OPTIMIZE_EQ 4U + +inline uint get_hash_join_key_no() { return MAX_KEY; } + +inline bool is_hash_join_key_no(uint key) { return key == MAX_KEY; } + +typedef struct keyuse_t { + TABLE *table; + Item *val; /**< or value if no field */ + table_map used_tables; + uint key, keypart, optimize; + key_part_map keypart_map; + ha_rows ref_table_rows; + /** + If true, the comparison this value was created from will not be + satisfied if val has NULL 'value'. + */ + bool null_rejecting; + /* + !NULL - This KEYUSE was created from an equality that was wrapped into + an Item_func_trig_cond. This means the equality (and validity of + this KEYUSE element) can be turned on and off. The on/off state + is indicted by the pointed value: + *cond_guard == TRUE <=> equality condition is on + *cond_guard == FALSE <=> equality condition is off + + NULL - Otherwise (the source equality can't be turned off) + */ + bool *cond_guard; + /* + 0..64 <=> This was created from semi-join IN-equality # sj_pred_no. + MAX_UINT Otherwise + */ + uint sj_pred_no; + + /* + If this is NULL than KEYUSE is always enabled. + Otherwise it points to the enabling flag for this keyuse (true <=> enabled) + */ + bool *validity_ref; + + bool is_for_hash_join() { return is_hash_join_key_no(key); } +} KEYUSE; + + +struct KEYUSE_EXT: public KEYUSE +{ + /* + This keyuse can be used only when the partial join being extended + contains the tables from this table map + */ + table_map needed_in_prefix; + /* The enabling flag for keyuses usable for splitting */ + bool validity_var; +}; + +/// Used when finding key fields +struct KEY_FIELD { + Field *field; + Item_bool_func *cond; + Item *val; ///< May be empty if diff constant + uint level; + uint optimize; + bool eq_func; + /** + If true, the condition this struct represents will not be satisfied + when val IS NULL. + */ + bool null_rejecting; + bool *cond_guard; /* See KEYUSE::cond_guard */ + uint sj_pred_no; /* See KEYUSE::sj_pred_no */ +}; + + +#define NO_KEYPART ((uint)(-1)) + +class store_key; + +const int NO_REF_PART= uint(-1); + +typedef struct st_table_ref +{ + bool key_err; + /** True if something was read into buffer in join_read_key. */ + bool has_record; + uint key_parts; ///< num of ... + uint key_length; ///< length of key_buff + int key; ///< key no + uchar *key_buff; ///< value to look for with key + uchar *key_buff2; ///< key_buff+key_length + store_key **key_copy; // + + /* + Bitmap of key parts which refer to constants. key_copy only has copiers for + non-const key parts. + */ + key_part_map const_ref_part_map; + + Item **items; ///< val()'s for each keypart + /* + Array of pointers to trigger variables. Some/all of the pointers may be + NULL. The ref access can be used iff + + for each used key part i, (!cond_guards[i] || *cond_guards[i]) + + This array is used by subquery code. The subquery code may inject + triggered conditions, i.e. conditions that can be 'switched off'. A ref + access created from such condition is not valid when at least one of the + underlying conditions is switched off (see subquery code for more details) + */ + bool **cond_guards; + /** + (null_rejecting & (1< disable the "cache" as doing lookup with the same key value may + produce different results (because of Index Condition Pushdown) + + */ + bool disable_cache; + + /* + If true, this ref access was constructed from equalities generated by + LATERAL DERIVED (aka GROUP BY splitting) optimization + */ + bool uses_splitting; + + bool tmp_table_index_lookup_init(THD *thd, KEY *tmp_key, Item_iterator &it, + bool value, uint skip= 0); + bool is_access_triggered(); +} TABLE_REF; + + +/* + The structs which holds the join connections and join states +*/ +enum join_type { JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF, + JT_ALL, JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL, + JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE, + JT_HASH, JT_HASH_RANGE, JT_HASH_NEXT, JT_HASH_INDEX_MERGE}; + +class JOIN; + +enum enum_nested_loop_state +{ + NESTED_LOOP_KILLED= -2, NESTED_LOOP_ERROR= -1, + NESTED_LOOP_OK= 0, NESTED_LOOP_NO_MORE_ROWS= 1, + NESTED_LOOP_QUERY_LIMIT= 3, NESTED_LOOP_CURSOR_LIMIT= 4 +}; + + +/* Possible sj_strategy values */ +enum sj_strategy_enum +{ + SJ_OPT_NONE=0, + SJ_OPT_DUPS_WEEDOUT=1, + SJ_OPT_LOOSE_SCAN =2, + SJ_OPT_FIRST_MATCH =3, + SJ_OPT_MATERIALIZE =4, + SJ_OPT_MATERIALIZE_SCAN=5 +}; + +/* Values for JOIN_TAB::packed_info */ +#define TAB_INFO_HAVE_VALUE 1U +#define TAB_INFO_USING_INDEX 2U +#define TAB_INFO_USING_WHERE 4U +#define TAB_INFO_FULL_SCAN_ON_NULL 8U + +typedef enum_nested_loop_state +(*Next_select_func)(JOIN *, struct st_join_table *, bool); +Next_select_func setup_end_select_func(JOIN *join); +int rr_sequential(READ_RECORD *info); +int read_record_func_for_rr_and_unpack(READ_RECORD *info); +Item *remove_pushed_top_conjuncts(THD *thd, Item *cond); +Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, + COND_EQUAL **cond_eq, + List &new_conds, + Item::cond_result *cond_value); + +#include "sql_explain.h" + +/************************************************************************************** + * New EXPLAIN structures END + *************************************************************************************/ + +class JOIN_CACHE; +class SJ_TMP_TABLE; +class JOIN_TAB_RANGE; +class AGGR_OP; +class Filesort; +struct SplM_plan_info; +class SplM_opt_info; + +typedef struct st_join_table { + TABLE *table; + TABLE_LIST *tab_list; + KEYUSE *keyuse; /**< pointer to first used key */ + KEY *hj_key; /**< descriptor of the used best hash join key + not supported by any index */ + SQL_SELECT *select; + COND *select_cond; + COND *on_precond; /**< part of on condition to check before + accessing the first inner table */ + QUICK_SELECT_I *quick; + /* + The value of select_cond before we've attempted to do Index Condition + Pushdown. We may need to restore everything back if we first choose one + index but then reconsider (see test_if_skip_sort_order() for such + scenarios). + NULL means no index condition pushdown was performed. + */ + Item *pre_idx_push_select_cond; + /* + Pointer to the associated ON expression. on_expr_ref=!NULL except for + degenerate joins. + + Optimization phase: *on_expr_ref!=NULL for tables that are the single + tables on the inner side of the outer join (t1 LEFT JOIN t2 ON...) + + Execution phase: *on_expr_ref!=NULL for tables that are first inner tables + within an outer join (which may have multiple tables) + */ + Item **on_expr_ref; + COND_EQUAL *cond_equal; /**< multiple equalities for the on expression */ + st_join_table *first_inner; /**< first inner table for including outerjoin */ + bool found; /**< true after all matches or null complement */ + bool not_null_compl;/**< true before null complement is added */ + st_join_table *last_inner; /**< last table table for embedding outer join */ + st_join_table *first_upper; /**< first inner table for embedding outer join */ + st_join_table *first_unmatched; /**< used for optimization purposes only */ + + /* + For join tabs that are inside an SJM bush: root of the bush + */ + st_join_table *bush_root_tab; + + /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ + bool last_leaf_in_bush; + + /* + ptr - this is a bush, and ptr points to description of child join_tab + range + NULL - this join tab has no bush children + */ + JOIN_TAB_RANGE *bush_children; + + /* Special content for EXPLAIN 'Extra' column or NULL if none */ + enum explain_extra_tag info; + + Table_access_tracker *tracker; + + Table_access_tracker *jbuf_tracker; + Time_and_counter_tracker *jbuf_unpack_tracker; + Counter_tracker *jbuf_loops_tracker; + /* + Bitmap of TAB_INFO_* bits that encodes special line for EXPLAIN 'Extra' + column, or 0 if there is no info. + */ + uint packed_info; + + // READ_RECORD::Setup_func materialize_table; + READ_RECORD::Setup_func read_first_record; + Next_select_func next_select; + READ_RECORD read_record; + /* + Currently the following two fields are used only for a [NOT] IN subquery + if it is executed by an alternative full table scan when the left operand of + the subquery predicate is evaluated to NULL. + */ + READ_RECORD::Setup_func save_read_first_record;/* to save read_first_record */ + READ_RECORD::Read_func save_read_record;/* to save read_record.read_record */ + double worst_seeks; + key_map const_keys; /**< Keys with constant part */ + key_map checked_keys; /**< Keys checked in find_best */ + key_map needed_reg; + key_map keys; /**< all keys with can be used */ + + /* Either #rows in the table or 1 for const table. */ + ha_rows records; + /* + Number of records that will be scanned (yes scanned, not returned) by the + best 'independent' access method, i.e. table scan or QUICK_*_SELECT) + */ + ha_rows found_records; + /* + Cost of accessing the table using "ALL" or range/index_merge access + method (but not 'index' for some reason), i.e. this matches method which + E(#records) is in found_records. + */ + double read_time; + + /* Copy of POSITION::records_read, set by get_best_combination() */ + double records_read; + + /* The selectivity of the conditions that can be pushed to the table */ + double cond_selectivity; + + /* Startup cost for execution */ + double startup_cost; + + double partial_join_cardinality; + + table_map dependent,key_dependent; + /* + This is set for embedded sub queries. It contains the table map of + the outer expression, like 'A' in the following expression: + WHERE A in (SELECT ....) + */ + table_map embedded_dependent; + + /* + 1 - use quick select + 2 - use "Range checked for each record" + */ + uint use_quick; + /* + Index to use. Note: this is valid only for 'index' access, but not range or + ref access. + */ + uint index; + uint status; ///< Save status for cache + uint used_fields; + ulong used_fieldlength; + ulong max_used_fieldlength; + uint used_blobs; + uint used_null_fields; + uint used_uneven_bit_fields; + enum join_type type; + /* If first key part is used for any key in 'key_dependent' */ + bool key_start_dependent; + bool cached_eq_ref_table,eq_ref_table; + bool shortcut_for_distinct; + bool sorted; + /* + If it's not 0 the number stored this field indicates that the index + scan has been chosen to access the table data and we expect to scan + this number of rows for the table. + */ + ha_rows limit; + TABLE_REF ref; + /* TRUE <=> condition pushdown supports other tables presence */ + bool icp_other_tables_ok; + /* + TRUE <=> condition pushed to the index has to be factored out of + the condition pushed to the table + */ + bool idx_cond_fact_out; + bool use_join_cache; + /* TRUE <=> it is prohibited to join this table using join buffer */ + bool no_forced_join_cache; + uint used_join_cache_level; + JOIN_CACHE *cache; + /* + Index condition for BKA access join + */ + Item *cache_idx_cond; + SQL_SELECT *cache_select; + AGGR_OP *aggr; + JOIN *join; + /* + Embedding SJ-nest (may be not the direct parent), or NULL if none. + This variable holds the result of table pullout. + */ + TABLE_LIST *emb_sj_nest; + + /* FirstMatch variables (final QEP) */ + struct st_join_table *first_sj_inner_tab; + struct st_join_table *last_sj_inner_tab; + + /* Variables for semi-join duplicate elimination */ + SJ_TMP_TABLE *flush_weedout_table; + SJ_TMP_TABLE *check_weed_out_table; + /* for EXPLAIN only: */ + SJ_TMP_TABLE *first_weedout_table; + + /** + reference to saved plan and execution statistics + */ + Explain_table_access *explain_plan; + + /* + If set, means we should stop join enumeration after we've got the first + match and return to the specified join tab. May point to + join->join_tab[-1] which means stop join execution after the first + match. + */ + struct st_join_table *do_firstmatch; + + /* + ptr - We're doing a LooseScan, this join tab is the first (i.e. + "driving") join tab), and ptr points to the last join tab + handled by the strategy. loosescan_match_tab->found_match + should be checked to see if the current value group had a match. + NULL - Not doing a loose scan on this join tab. + */ + struct st_join_table *loosescan_match_tab; + + /* TRUE <=> we are inside LooseScan range */ + bool inside_loosescan_range; + + /* Buffer to save index tuple to be able to skip duplicates */ + uchar *loosescan_buf; + + /* + Index used by LooseScan (we store it here separately because ref access + stores it in tab->ref.key, while range scan stores it in tab->index, etc) + */ + uint loosescan_key; + + /* Length of key tuple (depends on #keyparts used) to store in the above */ + uint loosescan_key_len; + + /* Used by LooseScan. TRUE<=> there has been a matching record combination */ + bool found_match; + + /* + Used by DuplicateElimination. tab->table->ref must have the rowid + whenever we have a current record. + */ + int keep_current_rowid; + + /* NestedOuterJoins: Bitmap of nested joins this table is part of */ + nested_join_map embedding_map; + + /* Tmp table info */ + TMP_TABLE_PARAM *tmp_table_param; + + /* Sorting related info */ + Filesort *filesort; + SORT_INFO *filesort_result; + + /* + Non-NULL value means this join_tab must do window function computation + before reading. + */ + Window_funcs_computation* window_funcs_step; + + /** + List of topmost expressions in the select list. The *next* JOIN_TAB + in the plan should use it to obtain correct values. Same applicable to + all_fields. These lists are needed because after tmp tables functions + will be turned to fields. These variables are pointing to + tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp + table in the query plan. + @see JOIN::make_aggr_tables_info() + */ + List *fields; + /** List of all expressions in the select list */ + List *all_fields; + /* + Pointer to the ref array slice which to switch to before sending + records. Valid only for tmp tables. + */ + Ref_ptr_array *ref_array; + + /** Number of records saved in tmp table */ + ha_rows send_records; + + /** HAVING condition for checking prior saving a record into tmp table*/ + Item *having; + + /** TRUE <=> remove duplicates on this table. */ + bool distinct; + + /* + Semi-join strategy to be used for this join table. This is a copy of + POSITION::sj_strategy field. This field is set up by the + fix_semijoin_strategies_for_picked_join_order. + */ + enum sj_strategy_enum sj_strategy; + + uint n_sj_tables; + + bool preread_init_done; + + /* true <=> split optimization has been applied to this materialized table */ + bool is_split_derived; + + /* + Bitmap of split materialized derived tables that can be filled just before + this join table is to be joined. All parameters of the split derived tables + belong to tables preceding this join table. + */ + table_map split_derived_to_update; + + /* + Cost info to the range filter used when joining this join table + (Defined when the best join order has been already chosen) + */ + Range_rowid_filter_cost_info *range_rowid_filter_info; + /* Rowid filter to be used when joining this join table */ + Rowid_filter *rowid_filter; + /* Becomes true just after the used range filter has been built / filled */ + bool is_rowid_filter_built; + + bool build_range_rowid_filter_if_needed(); + + void cleanup(); + inline bool is_using_loose_index_scan() + { + const SQL_SELECT *sel= get_sql_select(); + return (sel && sel->quick && + (sel->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); + } + bool is_using_agg_loose_index_scan () + { + const SQL_SELECT *sel= get_sql_select(); + return (is_using_loose_index_scan() && + ((QUICK_GROUP_MIN_MAX_SELECT *)sel->quick)->is_agg_distinct()); + } + const SQL_SELECT *get_sql_select() + { + return filesort ? filesort->select : select; + } + bool is_inner_table_of_semi_join_with_first_match() + { + return first_sj_inner_tab != NULL; + } + bool is_inner_table_of_semijoin() + { + return emb_sj_nest != NULL; + } + bool is_inner_table_of_outer_join() + { + return first_inner != NULL; + } + bool is_single_inner_of_semi_join_with_first_match() + { + return first_sj_inner_tab == this && last_sj_inner_tab == this; + } + bool is_single_inner_of_outer_join() + { + return first_inner == this && first_inner->last_inner == this; + } + bool is_first_inner_for_outer_join() + { + return first_inner == this; + } + bool use_match_flag() + { + return is_first_inner_for_outer_join() || first_sj_inner_tab == this ; + } + bool check_only_first_match() + { + return is_inner_table_of_semi_join_with_first_match() || + (is_inner_table_of_outer_join() && + table->reginfo.not_exists_optimize); + } + bool is_last_inner_table() + { + return (first_inner && first_inner->last_inner == this) || + last_sj_inner_tab == this; + } + /* + Check whether the table belongs to a nest of inner tables of an + outer join or to a nest of inner tables of a semi-join + */ + bool is_nested_inner() + { + if (first_inner && + (first_inner != first_inner->last_inner || first_inner->first_upper)) + return TRUE; + if (first_sj_inner_tab && first_sj_inner_tab != last_sj_inner_tab) + return TRUE; + return FALSE; + } + struct st_join_table *get_first_inner_table() + { + if (first_inner) + return first_inner; + return first_sj_inner_tab; + } + void set_select_cond(COND *to, uint line) + { + DBUG_PRINT("info", ("select_cond changes %p -> %p at line %u tab %p", + select_cond, to, line, this)); + select_cond= to; + } + COND *set_cond(COND *new_cond) + { + COND *tmp_select_cond= select_cond; + set_select_cond(new_cond, __LINE__); + if (select) + select->cond= new_cond; + return tmp_select_cond; + } + void calc_used_field_length(bool max_fl); + ulong get_used_fieldlength() + { + if (!used_fieldlength) + calc_used_field_length(FALSE); + return used_fieldlength; + } + ulong get_max_used_fieldlength() + { + if (!max_used_fieldlength) + calc_used_field_length(TRUE); + return max_used_fieldlength; + } + double get_partial_join_cardinality() { return partial_join_cardinality; } + bool hash_join_is_possible(); + int make_scan_filter(); + bool is_ref_for_hash_join() { return is_hash_join_key_no(ref.key); } + KEY *get_keyinfo_by_key_no(uint key) + { + return (is_hash_join_key_no(key) ? hj_key : table->key_info+key); + } + double scan_time(); + ha_rows get_examined_rows(); + bool preread_init(); + + bool pfs_batch_update(JOIN *join); + + bool is_sjm_nest() { return MY_TEST(bush_children); } + + /* + If this join_tab reads a non-merged semi-join (also called jtbm), return + the select's number. Otherwise, return 0. + */ + int get_non_merged_semijoin_select() const + { + Item_in_subselect *subq; + if (table->pos_in_table_list && + (subq= table->pos_in_table_list->jtbm_subselect)) + { + return subq->unit->first_select()->select_number; + } + return 0; /* Not a merged semi-join */ + } + + bool access_from_tables_is_allowed(table_map used_tables, + table_map sjm_lookup_tables) + { + table_map used_sjm_lookup_tables= used_tables & sjm_lookup_tables; + return !used_sjm_lookup_tables || + (emb_sj_nest && + !(used_sjm_lookup_tables & ~emb_sj_nest->sj_inner_tables)); + } + + bool keyuse_is_valid_for_access_in_chosen_plan(JOIN *join, KEYUSE *keyuse); + + void remove_redundant_bnl_scan_conds(); + + bool save_explain_data(Explain_table_access *eta, table_map prefix_tables, + bool distinct, struct st_join_table *first_top_tab); + + bool use_order() const; ///< Use ordering provided by chosen index? + bool sort_table(); + bool remove_duplicates(); + + void partial_cleanup(); + void add_keyuses_for_splitting(); + SplM_plan_info *choose_best_splitting(uint idx, + table_map remaining_tables, + const POSITION *join_positions, + table_map *spl_pd_boundary); + bool fix_splitting(SplM_plan_info *spl_plan, table_map excluded_tables, + bool is_const_table); +} JOIN_TAB; + + +#include "sql_join_cache.h" + +enum_nested_loop_state +sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + +enum_nested_loop_state +end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records); +enum_nested_loop_state +end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records); + + +class Semi_join_strategy_picker +{ +public: + /* Called when starting to build a new join prefix */ + virtual void set_empty() = 0; + + /* + Update internal state after another table has been added to the join + prefix + */ + virtual void set_from_prev(POSITION *prev) = 0; + + virtual bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) = 0; + + virtual void mark_used() = 0; + + virtual ~Semi_join_strategy_picker() = default; +}; + + +/* + Duplicate Weedout strategy optimization state +*/ + +class Duplicate_weedout_picker : public Semi_join_strategy_picker +{ + /* The first table that the strategy will need to handle */ + uint first_dupsweedout_table; + + /* + Tables that we will need to have in the prefix to do the weedout step + (all inner and all outer that the involved semi-joins are correlated with) + */ + table_map dupsweedout_tables; + + bool is_used; +public: + void set_empty() + { + dupsweedout_tables= 0; + first_dupsweedout_table= MAX_TABLES; + is_used= FALSE; + } + void set_from_prev(POSITION *prev); + + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *stratey, + POSITION *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Firstmatch_picker : public Semi_join_strategy_picker +{ + /* + Index of the first inner table that we intend to handle with this + strategy + */ + uint first_firstmatch_table; + /* + Tables that were not in the join prefix when we've started considering + FirstMatch strategy. + */ + table_map first_firstmatch_rtbl; + /* + Tables that need to be in the prefix before we can calculate the cost + of using FirstMatch strategy. + */ + table_map firstmatch_need_tables; + + bool is_used; + + bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } + void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } +public: + void set_empty() + { + invalidate_firstmatch_prefix(); + is_used= FALSE; + } + + void set_from_prev(POSITION *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class LooseScan_picker : public Semi_join_strategy_picker +{ +public: + /* The first (i.e. driving) table we're doing loose scan for */ + uint first_loosescan_table; + /* + Tables that need to be in the prefix before we can calculate the cost + of using LooseScan strategy. + */ + table_map loosescan_need_tables; + + /* + keyno - Planning to do LooseScan on this key. If keyuse is NULL then + this is a full index scan, otherwise this is a ref+loosescan + scan (and keyno matches the KEUSE's) + MAX_KEY - Not doing a LooseScan + */ + uint loosescan_key; // final (one for strategy instance ) + uint loosescan_parts; /* Number of keyparts to be kept distinct */ + + bool is_used; + void set_empty() + { + first_loosescan_table= MAX_TABLES; + is_used= FALSE; + } + + void set_from_prev(POSITION *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend class Loose_scan_opt; + friend void best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + const POSITION *join_positions, + uint idx, + bool disable_jbuf, + double record_count, + POSITION *pos, + POSITION *loose_scan_pos); + friend bool get_best_combination(JOIN *join); + friend int setup_semijoin_loosescan(JOIN *join); + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Sj_materialization_picker : public Semi_join_strategy_picker +{ + bool is_used; + + /* The last inner table (valid once we're after it) */ + uint sjm_scan_last_inner; + /* + Tables that we need to have in the prefix to calculate the correct cost. + Basically, we need all inner tables and outer tables mentioned in the + semi-join's ON expression so we can correctly account for fanout. + */ + table_map sjm_scan_need_tables; + +public: + void set_empty() + { + sjm_scan_need_tables= 0; + sjm_scan_last_inner= 0; + is_used= FALSE; + } + void set_from_prev(POSITION *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Range_rowid_filter_cost_info; +class Rowid_filter; + + +/** + Information about a position of table within a join order. Used in join + optimization. +*/ +class POSITION +{ +public: + /* The table that's put into join order */ + JOIN_TAB *table; + + /* + The "fanout": number of output rows that will be produced (after + pushed down selection condition is applied) per each row combination of + previous tables. + */ + double records_read; + + /* The selectivity of the pushed down conditions */ + double cond_selectivity; + + /* + Cost accessing the table in course of the entire complete join execution, + i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times + number the access method will be invoked. + */ + double read_time; + + double prefix_record_count; + + /* Cost for the join prefix */ + double prefix_cost; + + /* + NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used. + Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr} + */ + KEYUSE *key; + + /* Cardinality of current partial join ending with this position */ + double partial_join_cardinality; + + /* Info on splitting plan used at this position */ + SplM_plan_info *spl_plan; + + /* + If spl_plan is NULL the value of spl_pd_boundary is 0. Otherwise + spl_pd_boundary contains the bitmap of the table from the current + partial join ending at this position that starts the sub-sequence of + tables S from which no conditions are allowed to be used in the plan + spl_plan for the split table joined at this position. + */ + table_map spl_pd_boundary; + + /* Cost info for the range filter used at this position */ + Range_rowid_filter_cost_info *range_rowid_filter_info; + + /* If ref-based access is used: bitmap of tables this table depends on */ + table_map ref_depend_map; + + /* tables that may help best_access_path() to find a better key */ + table_map key_dependent; + /* + Bitmap of semi-join inner tables that are in the join prefix and for + which there's no provision for how to eliminate semi-join duplicates + they produce. + */ + table_map dups_producing_tables; + + table_map inner_tables_handled_with_other_sjs; + + Duplicate_weedout_picker dups_weedout_picker; + Firstmatch_picker firstmatch_picker; + LooseScan_picker loosescan_picker; + Sj_materialization_picker sjmat_picker; + + /* + Current optimization state: Semi-join strategy to be used for this + and preceding join tables. + + Join optimizer sets this for the *last* join_tab in the + duplicate-generating range. That is, in order to interpret this field, + one needs to traverse join->[best_]positions array from right to left. + When you see a join table with sj_strategy!= SJ_OPT_NONE, some other + field (depending on the strategy) tells how many preceding positions + this applies to. The values of covered_preceding_positions->sj_strategy + must be ignored. + */ + enum sj_strategy_enum sj_strategy; + + /* Type of join (EQ_REF, REF etc) */ + enum join_type type; + /* + Valid only after fix_semijoin_strategies_for_picked_join_order() call: + if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that + are covered by the specified semi-join strategy + */ + uint n_sj_tables; + + /* + TRUE <=> join buffering will be used. At the moment this is based on + *very* imprecise guesses made in best_access_path(). + */ + bool use_join_buffer; + POSITION(); +}; + +typedef Bounds_checked_array Item_null_array; + +typedef struct st_rollup +{ + enum State { STATE_NONE, STATE_INITED, STATE_READY }; + State state; + Item_null_array null_items; + Ref_ptr_array *ref_pointer_arrays; + List *fields; +} ROLLUP; + + +class JOIN_TAB_RANGE: public Sql_alloc +{ +public: + JOIN_TAB *start; + JOIN_TAB *end; +}; + +class Pushdown_query; + +/** + @brief + Class to perform postjoin aggregation operations + + @details + The result records are obtained on the put_record() call. + The aggrgation process is determined by the write_func, it could be: + end_write Simply store all records in tmp table. + end_write_group Perform grouping using join->group_fields, + records are expected to be sorted. + end_update Perform grouping using the key generated on tmp + table. Input records aren't expected to be sorted. + Tmp table uses the heap engine + end_update_unique Same as above, but the engine is myisam. + + Lazy table initialization is used - the table will be instantiated and + rnd/index scan started on the first put_record() call. + +*/ + +class AGGR_OP :public Sql_alloc +{ +public: + JOIN_TAB *join_tab; + + AGGR_OP(JOIN_TAB *tab) : join_tab(tab), write_func(NULL) + {}; + + enum_nested_loop_state put_record() { return put_record(false); }; + /* + Send the result of operation further (to a next operation/client) + This function is called after all records were put into tmp table. + + @return return one of enum_nested_loop_state values. + */ + enum_nested_loop_state end_send(); + /** write_func setter */ + void set_write_func(Next_select_func new_write_func) + { + write_func= new_write_func; + } + +private: + /** Write function that would be used for saving records in tmp table. */ + Next_select_func write_func; + enum_nested_loop_state put_record(bool end_of_records); + bool prepare_tmp_table(); +}; + + +class JOIN :public Sql_alloc +{ +private: + JOIN(const JOIN &rhs); /**< not implemented */ + JOIN& operator=(const JOIN &rhs); /**< not implemented */ + +protected: + + /** + The subset of the state of a JOIN that represents an optimized query + execution plan. Allows saving/restoring different JOIN plans for the same + query. + */ + class Join_plan_state { + public: + DYNAMIC_ARRAY keyuse; /* Copy of the JOIN::keyuse array. */ + POSITION *best_positions; /* Copy of JOIN::best_positions */ + /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */ + KEYUSE **join_tab_keyuse; + /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */ + key_map *join_tab_checked_keys; + SJ_MATERIALIZATION_INFO **sj_mat_info; + my_bool error; + public: + Join_plan_state(uint tables) : error(0) + { + keyuse.elements= 0; + keyuse.buffer= NULL; + keyuse.malloc_flags= 0; + best_positions= 0; /* To detect errors */ + error= my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), + &best_positions, + sizeof(*best_positions) * (tables + 1), + &join_tab_keyuse, + sizeof(*join_tab_keyuse) * tables, + &join_tab_checked_keys, + sizeof(*join_tab_checked_keys) * tables, + &sj_mat_info, + sizeof(sj_mat_info) * tables, + NullS) == 0; + } + Join_plan_state(JOIN *join); + ~Join_plan_state() + { + delete_dynamic(&keyuse); + my_free(best_positions); + } + }; + + /* Results of reoptimizing a JOIN via JOIN::reoptimize(). */ + enum enum_reopt_result { + REOPT_NEW_PLAN, /* there is a new reoptimized plan */ + REOPT_OLD_PLAN, /* no new improved plan can be found, use the old one */ + REOPT_ERROR, /* an irrecovarable error occurred during reoptimization */ + REOPT_NONE /* not yet reoptimized */ + }; + + /* Support for plan reoptimization with rewritten conditions. */ + enum_reopt_result reoptimize(Item *added_where, table_map join_tables, + Join_plan_state *save_to); + /* Choose a subquery plan for a table-less subquery. */ + bool choose_tableless_subquery_plan(); + void handle_implicit_grouping_with_window_funcs(); + +public: + void save_query_plan(Join_plan_state *save_to); + void reset_query_plan(); + void restore_query_plan(Join_plan_state *restore_from); + +public: + JOIN_TAB *join_tab, **best_ref; + + /* List of fields that aren't under an aggregate function */ + List non_agg_fields; + + JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs + List join_tab_ranges; + + /* + Base tables participating in the join. After join optimization is done, the + tables are stored in the join order (but the only really important part is + that const tables are first). + */ + TABLE **table; + /** + The table which has an index that allows to produce the requried ordering. + A special value of 0x1 means that the ordering will be produced by + passing 1st non-const table to filesort(). NULL means no such table exists. + */ + TABLE *sort_by_table; + /* + Number of tables in the join. + (In MySQL, it is named 'tables' and is also the number of elements in + join->join_tab array. In MariaDB, the latter is not true, so we've renamed + the variable) + */ + uint table_count; + uint outer_tables; /**< Number of tables that are not inside semijoin */ + uint const_tables; + /* + Number of tables in the top join_tab array. Normally this matches + (join_tab_ranges.head()->end - join_tab_ranges.head()->start). + + We keep it here so that it is saved/restored with JOIN::restore_tmp. + */ + uint top_join_tab_count; + uint aggr_tables; ///< Number of post-join tmp tables + uint send_group_parts; + /* + This represents the number of items in ORDER BY *after* removing + all const items. This is computed before other optimizations take place, + such as removal of ORDER BY when it is a prefix of GROUP BY, for example: + GROUP BY a, b ORDER BY a + + This is used when deciding to send rows, by examining the correct number + of items in the group_fields list when ORDER BY was previously eliminated. + */ + uint with_ties_order_count; + /* + True if the query has GROUP BY. + (that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it + will set this, too. It is not clear why we need a separate var from + group_list) + */ + bool group; + bool need_distinct; + + /** + Indicates that grouping will be performed on the result set during + query execution. This field belongs to query execution. + + If 'sort_and_group' is set, then the optimizer is going to use on of + the following algorithms to resolve GROUP BY. + + - If one table, sort the table and then calculate groups on the fly. + - If more than one table, create a temporary table to hold the join, + sort it and then resolve group by on the fly. + + The 'on the fly' calculation is done in end_send_group() + + @see make_group_fields, alloc_group_fields, JOIN::exec, + setup_end_select_func + */ + bool sort_and_group; + bool first_record,full_join, no_field_update; + bool hash_join; + bool do_send_rows; + table_map const_table_map; + /** + Bitmap of semijoin tables that the current partial plan decided + to materialize and access by lookups + */ + table_map sjm_lookup_tables; + /** + Bitmap of semijoin tables that the chosen plan decided + to materialize to scan the results of materialization + */ + table_map sjm_scan_tables; + /* + Constant tables for which we have found a row (as opposed to those for + which we didn't). + */ + table_map found_const_table_map; + + /* Tables removed by table elimination. Set to 0 before the elimination. */ + table_map eliminated_tables; + /* + Bitmap of all inner tables from outer joins (set at start of + make_join_statistics) + */ + table_map outer_join; + /* Bitmap of tables used in the select list items */ + table_map select_list_used_tables; + /* Tables that has HA_NON_COMPARABLE_ROWID (does not support rowid) set */ + table_map not_usable_rowid_map; + /* Tables that have a possiblity to use EQ_ref */ + table_map eq_ref_tables; + + table_map allowed_top_level_tables; + ha_rows send_records,found_records,join_examined_rows, accepted_rows; + + /* + LIMIT for the JOIN operation. When not using aggregation or DISITNCT, this + is the same as select's LIMIT clause specifies. + Note that this doesn't take sql_calc_found_rows into account. + */ + ha_rows row_limit; + + /* + How many output rows should be produced after GROUP BY. + (if sql_calc_found_rows is used, LIMIT is ignored) + */ + ha_rows select_limit; + /* + Number of duplicate rows found in UNION. + */ + ha_rows duplicate_rows; + /** + Used to fetch no more than given amount of rows per one + fetch operation of server side cursor. + The value is checked in end_send and end_send_group in fashion, similar + to offset_limit_cnt: + - fetch_limit= HA_POS_ERROR if there is no cursor. + - when we open a cursor, we set fetch_limit to 0, + - on each fetch iteration we add num_rows to fetch to fetch_limit + NOTE: currently always HA_POS_ERROR. + */ + ha_rows fetch_limit; + + /* Finally picked QEP. This is result of join optimization */ + POSITION *best_positions; + POSITION *sort_positions; /* Temporary space used by greedy_search */ + POSITION *next_sort_position; /* Next free space in sort_positions */ + + Pushdown_query *pushdown_query; + JOIN_TAB *original_join_tab; + uint sort_space; + +/******* Join optimization state members start *******/ + /* + pointer - we're doing optimization for a semi-join materialization nest. + NULL - otherwise + */ + TABLE_LIST *emb_sjm_nest; + + /* Current join optimization state */ + POSITION *positions; + + /* + Bitmap of nested joins embedding the position at the end of the current + partial join (valid only during join optimizer run). + */ + nested_join_map cur_embedding_map; + + /* + Bitmap of inner tables of semi-join nests that have a proper subset of + their tables in the current join prefix. That is, of those semi-join + nests that have their tables both in and outside of the join prefix. + (Note: tables that are constants but have not been pulled out of semi-join + nests are not considered part of semi-join nests) + */ + table_map cur_sj_inner_tables; + + /* A copy of thd->variables.optimizer_prune_level */ + uint prune_level; + /* + If true, do extra heuristic pruning (enabled based on + optimizer_extra_pruning_depth) + */ + bool extra_heuristic_pruning; +#ifndef DBUG_OFF + void dbug_verify_sj_inner_tables(uint n_positions) const; + int dbug_join_tab_array_size; +#endif + + /* We also maintain a stack of join optimization states in * join->positions[] */ +/******* Join optimization state members end *******/ + + /* + Tables within complex firstmatch ranges (i.e. those where inner tables are + interleaved with outer tables). Join buffering cannot be used for these. + */ + table_map complex_firstmatch_tables; + + Next_select_func first_select; + /* + The cost of best complete join plan found so far during optimization, + after optimization phase - cost of picked join order (not taking into + account the changes made by test_if_skip_sort_order()). + */ + double best_read; + /* + Estimated result rows (fanout) of the join operation. If this is a subquery + that is reexecuted multiple times, this value includes the estiamted # of + reexecutions. This value is equal to the multiplication of all + join->positions[i].records_read of a JOIN. + */ + double join_record_count; + List *fields; + + /* Used only for FETCH ... WITH TIES to identify peers. */ + List order_fields; + /* Used during GROUP BY operations to identify when a group has changed. */ + List group_fields, group_fields_cache; + THD *thd; + Item_sum **sum_funcs, ***sum_funcs_end; + /** second copy of sumfuncs (for queries with 2 temporary tables */ + Item_sum **sum_funcs2, ***sum_funcs_end2; + Procedure *procedure; + Item *having; + Item *tmp_having; ///< To store having when processed temporary table + Item *having_history; ///< Store having for explain + ORDER *group_list_for_estimates; + bool having_is_correlated; + ulonglong select_options; + /* + Bitmap of allowed types of the join caches that + can be used for join operations + */ + uint allowed_join_cache_types; + bool allowed_semijoin_with_cache; + bool allowed_outer_join_with_cache; + /* Maximum level of the join caches that can be used for join operations */ + uint max_allowed_join_cache_level; + select_result *result; + TMP_TABLE_PARAM tmp_table_param; + MYSQL_LOCK *lock; + /// unit structure (with global parameters) for this select + SELECT_LEX_UNIT *unit; + /// select that processed + SELECT_LEX *select_lex; + /** + TRUE <=> optimizer must not mark any table as a constant table. + This is needed for subqueries in form "a IN (SELECT .. UNION SELECT ..): + when we optimize the select that reads the results of the union from a + temporary table, we must not mark the temp. table as constant because + the number of rows in it may vary from one subquery execution to another. + */ + bool no_const_tables; + /* + This flag is set if we call no_rows_in_result() as par of end_group(). + This is used as a simple speed optimization to avoiding calling + restore_no_rows_in_result() in ::reinit() + */ + bool no_rows_in_result_called; + + /** + This is set if SQL_CALC_ROWS was calculated by filesort() + and should be taken from the appropriate JOIN_TAB + */ + bool filesort_found_rows; + + bool subq_exit_fl; + + ROLLUP rollup; ///< Used with rollup + + bool mixed_implicit_grouping; + bool select_distinct; ///< Set if SELECT DISTINCT + /** + If we have the GROUP BY statement in the query, + but the group_list was emptied by optimizer, this + flag is TRUE. + It happens when fields in the GROUP BY are from + constant table + */ + bool group_optimized_away; + + /* + simple_xxxxx is set if ORDER/GROUP BY doesn't include any references + to other tables than the first non-constant table in the JOIN. + It's also set if ORDER/GROUP BY is empty. + Used for deciding for or against using a temporary table to compute + GROUP/ORDER BY. + */ + bool simple_order, simple_group; + + /* + ordered_index_usage is set if an ordered index access + should be used instead of a filesort when computing + ORDER/GROUP BY. + */ + enum + { + ordered_index_void, // No ordered index avail. + ordered_index_group_by, // Use index for GROUP BY + ordered_index_order_by // Use index for ORDER BY + } ordered_index_usage; + + /** + Is set only in case if we have a GROUP BY clause + and no ORDER BY after constant elimination of 'order'. + */ + bool no_order; + /** Is set if we have a GROUP BY and we have ORDER BY on a constant. */ + bool skip_sort_order; + + bool need_tmp; + bool hidden_group_fields; + /* TRUE if there was full cleunap of the JOIN */ + bool cleaned; + DYNAMIC_ARRAY keyuse; + Item::cond_result cond_value, having_value; + /** + Impossible where after reading const tables + (set in make_join_statistics()) + */ + bool impossible_where; + + /* + All fields used in the query processing. + + Initially this is a list of fields from the query's SQL text. + + Then, ORDER/GROUP BY and Window Function code add columns that need to + be saved to be available in the post-group-by context. These extra columns + are added to the front, because this->all_fields points to the suffix of + this list. + */ + List all_fields; + ///Above list changed to use temporary table + List tmp_all_fields1, tmp_all_fields2, tmp_all_fields3; + ///Part, shared with list above, emulate following list + List tmp_fields_list1, tmp_fields_list2, tmp_fields_list3; + + /* + The original field list as it was passed to mysql_select(). This refers + to select_lex->item_list. + CAUTION: this list is a suffix of this->all_fields list, that is, it shares + elements with that list! + */ + List &fields_list; + List procedure_fields_list; + int error; + + ORDER *order, *group_list, *proc_param; //hold parameters of mysql_select + COND *conds; // ---"--- + Item *conds_history; // store WHERE for explain + COND *outer_ref_cond; /// *join_list; ///< list of joined tables in reverse order + COND_EQUAL *cond_equal; + COND_EQUAL *having_equal; + /* + Constant codition computed during optimization, but evaluated during + join execution. Typically expensive conditions that should not be + evaluated at optimization time. + */ + Item *exec_const_cond; + /* + Constant ORDER and/or GROUP expressions that contain subqueries. Such + expressions need to evaluated to verify that the subquery indeed + returns a single row. The evaluation of such expressions is delayed + until query execution. + */ + List exec_const_order_group_cond; + SQL_SELECT *select; ///ref_pointer_array contains five "slices" of the same length: + |========|========|========|========|========| + ref_ptrs items0 items1 items2 items3 + */ + Ref_ptr_array ref_ptrs; + // Copy of the initial slice above, to be used with different lists + Ref_ptr_array items0, items1, items2, items3; + // Used by rollup, to restore ref_ptrs after overwriting it. + Ref_ptr_array current_ref_ptrs; + + const char *zero_result_cause; ///< not 0 if exec must return zero result + + bool union_part; ///< this subselect is part of union + + enum join_optimization_state { NOT_OPTIMIZED=0, + OPTIMIZATION_IN_PROGRESS=1, + OPTIMIZATION_PHASE_1_DONE=2, + OPTIMIZATION_DONE=3}; + // state of JOIN optimization + enum join_optimization_state optimization_state; + bool initialized; ///< flag to avoid double init_execution calls + + Explain_select *explain; + + enum { QEP_NOT_PRESENT_YET, QEP_AVAILABLE, QEP_DELETED} have_query_plan; + + // if keep_current_rowid=true, whether they should be saved in temporary table + bool tmp_table_keep_current_rowid; + + /* + Additional WHERE and HAVING predicates to be considered for IN=>EXISTS + subquery transformation of a JOIN object. + */ + Item *in_to_exists_where; + Item *in_to_exists_having; + + /* Temporary tables used to weed-out semi-join duplicates */ + List
sj_tmp_tables; + /* SJM nests that are executed with SJ-Materialization strategy */ + List sjm_info_list; + + /** TRUE <=> ref_pointer_array is set to items3. */ + bool set_group_rpa; + /** Exec time only: TRUE <=> current group has been sent */ + bool group_sent; + /** + TRUE if the query contains an aggregate function but has no GROUP + BY clause. + */ + bool implicit_grouping; + + bool with_two_phase_optimization; + + /* Saved execution plan for this join */ + Join_plan_state *save_qep; + /* Info on splittability of the table materialized by this plan*/ + SplM_opt_info *spl_opt_info; + /* Contains info on keyuses usable for splitting */ + Dynamic_array *ext_keyuses_for_splitting; + + JOIN_TAB *sort_and_group_aggr_tab; + /* + Flag is set to true if select_lex was found to be degenerated before + the optimize_cond() call in JOIN::optimize_inner() method. + */ + bool is_orig_degenerated; + + JOIN(THD *thd_arg, List &fields_arg, ulonglong select_options_arg, + select_result *result_arg) + :fields_list(fields_arg) + { + init(thd_arg, fields_arg, select_options_arg, result_arg); + } + + void init(THD *thd_arg, List &fields_arg, ulonglong select_options_arg, + select_result *result_arg); + + /* True if the plan guarantees that it will be returned zero or one row */ + bool only_const_tables() { return const_tables == table_count; } + /* Number of tables actually joined at the top level */ + uint exec_join_tab_cnt() { return tables_list ? top_join_tab_count : 0; } + + /* + Number of tables in the join which also includes the temporary tables + created for GROUP BY, DISTINCT , WINDOW FUNCTION etc. + */ + uint total_join_tab_cnt() + { + return exec_join_tab_cnt() + aggr_tables - 1; + } + + int prepare(TABLE_LIST *tables, COND *conds, uint og_num, ORDER *order, + bool skip_order_by, ORDER *group, Item *having, + ORDER *proc_param, SELECT_LEX *select, SELECT_LEX_UNIT *unit); + bool prepare_stage2(); + int optimize(); + int optimize_inner(); + int optimize_stage2(); + bool build_explain(); + int reinit(); + int init_execution(); + void exec(); + + void exec_inner(); + bool prepare_result(List **columns_list); + int destroy(); + void restore_tmp(); + bool alloc_func_list(); + bool flatten_subqueries(); + bool optimize_unflattened_subqueries(); + bool optimize_constant_subqueries(); + bool make_range_rowid_filters(); + bool init_range_rowid_filters(); + bool make_sum_func_list(List &all_fields, List &send_fields, + bool before_group_by); + + /// Initialzes a slice, see comments for ref_ptrs above. + Ref_ptr_array ref_ptr_array_slice(size_t slice_num) + { + size_t slice_sz= select_lex->ref_pointer_array.size() / 5U; + DBUG_ASSERT(select_lex->ref_pointer_array.size() % 5 == 0); + DBUG_ASSERT(slice_num < 5U); + return Ref_ptr_array(&select_lex->ref_pointer_array[slice_num * slice_sz], + slice_sz); + } + + /** + Overwrites one slice with the contents of another slice. + In the normal case, dst and src have the same size(). + However: the rollup slices may have smaller size than slice_sz. + */ + void copy_ref_ptr_array(Ref_ptr_array dst_arr, Ref_ptr_array src_arr) + { + DBUG_ASSERT(dst_arr.size() >= src_arr.size()); + if (src_arr.size() == 0) + return; + + void *dest= dst_arr.array(); + const void *src= src_arr.array(); + memcpy(dest, src, src_arr.size() * src_arr.element_size()); + } + + /// Overwrites 'ref_ptrs' and remembers the the source as 'current'. + void set_items_ref_array(Ref_ptr_array src_arr) + { + copy_ref_ptr_array(ref_ptrs, src_arr); + current_ref_ptrs= src_arr; + } + + /// Initializes 'items0' and remembers that it is 'current'. + void init_items_ref_array() + { + items0= ref_ptr_array_slice(1); + copy_ref_ptr_array(items0, ref_ptrs); + current_ref_ptrs= items0; + } + + bool rollup_init(); + bool rollup_process_const_fields(); + bool rollup_make_fields(List &all_fields, List &fields, + Item_sum ***func); + int rollup_send_data(uint idx); + int rollup_write_data(uint idx, TMP_TABLE_PARAM *tmp_table_param, TABLE *table); + void join_free(); + /** Cleanup this JOIN, possibly for reuse */ + void cleanup(bool full); + void clear(table_map *cleared_tables); + void inline clear_sum_funcs(); + bool send_row_on_empty_set() + { + return (do_send_rows && implicit_grouping && !group_optimized_away && + having_value != Item::COND_FALSE); + } + bool empty_result() { return (zero_result_cause && !implicit_grouping); } + bool change_result(select_result *new_result, select_result *old_result); + bool is_top_level_join() const + { + return (unit == &thd->lex->unit && (unit->fake_select_lex == 0 || + select_lex == unit->fake_select_lex)); + } + void cache_const_exprs(); + inline table_map all_tables_map() + { + return (table_map(1) << table_count) - 1; + } + void drop_unused_derived_keys(); + bool get_best_combination(); + bool add_sorting_to_table(JOIN_TAB *tab, ORDER *order); + inline void eval_select_list_used_tables(); + /* + Return the table for which an index scan can be used to satisfy + the sort order needed by the ORDER BY/(implicit) GROUP BY clause + */ + JOIN_TAB *get_sort_by_join_tab() + { + return (need_tmp || !sort_by_table || skip_sort_order || + ((group || tmp_table_param.sum_func_count) && !group_list)) ? + NULL : join_tab+const_tables; + } + bool setup_subquery_caches(); + bool shrink_join_buffers(JOIN_TAB *jt, + ulonglong curr_space, + ulonglong needed_space); + void set_allowed_join_cache_types(); + bool is_allowed_hash_join_access() + { + return MY_TEST(allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && + max_allowed_join_cache_level > JOIN_CACHE_HASHED_BIT; + } + /* + Check if we need to create a temporary table. + This has to be done if all tables are not already read (const tables) + and one of the following conditions holds: + - We are using DISTINCT (simple distinct's are already optimized away) + - We are using an ORDER BY or GROUP BY on fields not in the first table + - We are using different ORDER BY and GROUP BY orders + - The user wants us to buffer the result. + - We are using WINDOW functions. + When the WITH ROLLUP modifier is present, we cannot skip temporary table + creation for the DISTINCT clause just because there are only const tables. + */ + bool test_if_need_tmp_table() + { + return ((const_tables != table_count && + ((select_distinct || !simple_order || !simple_group) || + (group_list && order) || + MY_TEST(select_options & OPTION_BUFFER_RESULT))) || + (rollup.state != ROLLUP::STATE_NONE && select_distinct) || + select_lex->have_window_funcs()); + } + bool choose_subquery_plan(table_map join_tables); + void get_partial_cost_and_fanout(int end_tab_idx, + table_map filter_map, + double *read_time_arg, + double *record_count_arg); + void get_prefix_cost_and_fanout(uint n_tables, + double *read_time_arg, + double *record_count_arg); + double get_examined_rows(); + /* defined in opt_subselect.cc */ + bool transform_max_min_subquery(); + /* True if this JOIN is a subquery under an IN predicate. */ + bool is_in_subquery() + { + return (unit->item && unit->item->is_in_predicate()); + } + bool save_explain_data(Explain_query *output, bool can_overwrite, + bool need_tmp_table, bool need_order, bool distinct); + int save_explain_data_intern(Explain_query *output, bool need_tmp_table, + bool need_order, bool distinct, + const char *message); + JOIN_TAB *first_breadth_first_tab() { return join_tab; } + bool check_two_phase_optimization(THD *thd); + bool inject_cond_into_where(Item *injected_cond); + bool check_for_splittable_materialized(); + void add_keyuses_for_splitting(); + bool inject_best_splitting_cond(table_map remaining_tables); + bool fix_all_splittings_in_plan(); + bool inject_splitting_cond_for_all_tables_with_split_opt(); + void make_notnull_conds_for_range_scans(); + + bool transform_in_predicates_into_in_subq(THD *thd); + + bool optimize_upper_rownum_func(); + void calc_allowed_top_level_tables(SELECT_LEX *lex); + table_map get_allowed_nj_tables(uint idx); + +private: + /** + Create a temporary table to be used for processing DISTINCT/ORDER + BY/GROUP BY. + + @note Will modify JOIN object wrt sort/group attributes + + @param tab the JOIN_TAB object to attach created table to + @param tmp_table_fields List of items that will be used to define + column types of the table. + @param tmp_table_group Group key to use for temporary table, NULL if none. + @param save_sum_fields If true, do not replace Item_sum items in + @c tmp_fields list with Item_field items referring + to fields in temporary table. + + @returns false on success, true on failure + */ + bool create_postjoin_aggr_table(JOIN_TAB *tab, List *tmp_table_fields, + ORDER *tmp_table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_ordermake); + /** + Optimize distinct when used on a subset of the tables. + + E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b + In this case we can stop scanning t2 when we have found one t1.a + */ + void optimize_distinct(); + + void cleanup_item_list(List &items) const; + bool add_having_as_table_cond(JOIN_TAB *tab); + bool make_aggr_tables_info(); + bool add_fields_for_current_rowid(JOIN_TAB *cur, List *fields); + void free_pushdown_handlers(List& join_list); + void init_join_cache_and_keyread(); + bool transform_in_predicates_into_equalities(THD *thd); + bool transform_all_conds_and_on_exprs(THD *thd, + Item_transformer transformer); + bool transform_all_conds_and_on_exprs_in_join_list(THD *thd, + List *join_list, + Item_transformer transformer); +}; + +enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; +enum enum_with_const_tables { WITH_CONST_TABLES, WITHOUT_CONST_TABLES}; + +JOIN_TAB *first_linear_tab(JOIN *join, + enum enum_with_bush_roots include_bush_roots, + enum enum_with_const_tables const_tbls); +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, + enum enum_with_bush_roots include_bush_roots); + +JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const); +JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab); + +typedef struct st_select_check { + uint const_ref,reg_ref; +} SELECT_CHECK; + +extern const char *join_type_str[]; + +/* Extern functions in sql_select.cc */ +void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, + List &fields, bool reset_with_sum_func); +bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, + Ref_ptr_array ref_pointer_array, + List &new_list1, List &new_list2, + uint elements, List &fields); +void copy_fields(TMP_TABLE_PARAM *param); +bool copy_funcs(Item **func_ptr, const THD *thd); +uint find_shortest_key(TABLE *table, const key_map *usable_keys); +bool is_indexed_agg_distinct(JOIN *join, List *out_args); + +/* functions from opt_sum.cc */ +bool simple_pred(Item_func *func_item, Item **args, bool *inv_order); +int opt_sum_query(THD* thd, + List &tables, List &all_fields, COND *conds); + +/* from sql_delete.cc, used by opt_range.cc */ +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); + +/** class to copying an field/item to a key struct */ + +class store_key :public Sql_alloc +{ +public: + bool null_key; /* TRUE <=> the value of the key has a null part */ + enum store_key_result { STORE_KEY_OK, STORE_KEY_FATAL, STORE_KEY_CONV }; + enum Type { FIELD_STORE_KEY, ITEM_STORE_KEY, CONST_ITEM_STORE_KEY }; + store_key(THD *thd, Field *field_arg, uchar *ptr, uchar *null, uint length) + :null_key(0), null_ptr(null), err(0) + { + to_field=field_arg->new_key_field(thd->mem_root, field_arg->table, + ptr, length, null, 1); + } + store_key(store_key &arg) + :Sql_alloc(), null_key(arg.null_key), to_field(arg.to_field), + null_ptr(arg.null_ptr), err(arg.err) + + {} + virtual ~store_key() = default; /** Not actually needed */ + virtual enum Type type() const=0; + virtual const char *name() const=0; + virtual bool store_key_is_const() { return false; } + + /** + @brief sets ignore truncation warnings mode and calls the real copy method + + @details this function makes sure truncation warnings when preparing the + key buffers don't end up as errors (because of an enclosing INSERT/UPDATE). + */ + enum store_key_result copy(THD *thd) + { + enum_check_fields org_count_cuted_fields= thd->count_cuted_fields; + Use_relaxed_field_copy urfc(to_field->table->in_use); + + /* If needed, perform CharsetNarrowing for making ref access lookup keys. */ + Utf8_narrow do_narrow(to_field, do_cset_narrowing); + + store_key_result result= copy_inner(); + + do_narrow.stop(); + + thd->count_cuted_fields= org_count_cuted_fields; + return result; + } + + protected: + Field *to_field; // Store data here + uchar *null_ptr; + uchar err; + + /* + This is set to true if we need to do Charset Narrowing when making a lookup + key. + */ + bool do_cset_narrowing= false; + + virtual enum store_key_result copy_inner()=0; +}; + + +class store_key_field: public store_key +{ + Copy_field copy_field; + const char *field_name; + public: + store_key_field(THD *thd, Field *to_field_arg, uchar *ptr, + uchar *null_ptr_arg, + uint length, Field *from_field, const char *name_arg) + :store_key(thd, to_field_arg,ptr, + null_ptr_arg ? null_ptr_arg : from_field->maybe_null() ? &err + : (uchar*) 0, length), field_name(name_arg) + { + if (to_field) + { + copy_field.set(to_field,from_field,0); + setup_charset_narrowing(); + } + } + + enum Type type() const override { return FIELD_STORE_KEY; } + const char *name() const override { return field_name; } + + void change_source_field(Item_field *fld_item) + { + copy_field.set(to_field, fld_item->field, 0); + field_name= fld_item->full_name(); + setup_charset_narrowing(); + } + + /* Setup CharsetNarrowing if necessary */ + void setup_charset_narrowing() + { + do_cset_narrowing= + Utf8_narrow::should_do_narrowing(copy_field.to_field, + copy_field.from_field->charset()); + } + + protected: + enum store_key_result copy_inner() override + { + TABLE *table= copy_field.to_field->table; + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, + &table->write_set); + + /* + It looks like the next statement is needed only for a simplified + hash function over key values used now in BNLH join. + When the implementation of this function will be replaced for a proper + full version this statement probably should be removed. + */ + bzero(copy_field.to_ptr,copy_field.to_length); + + copy_field.do_copy(©_field); + dbug_tmp_restore_column_map(&table->write_set, old_map); + null_key= to_field->is_null(); + return err != 0 ? STORE_KEY_FATAL : STORE_KEY_OK; + } +}; + + +class store_key_item :public store_key +{ + protected: + Item *item; + /* + Flag that forces usage of save_val() method which save value of the + item instead of save_in_field() method which saves result. + */ + bool use_value; +public: + store_key_item(THD *thd, Field *to_field_arg, uchar *ptr, + uchar *null_ptr_arg, uint length, Item *item_arg, bool val) + :store_key(thd, to_field_arg, ptr, + null_ptr_arg ? null_ptr_arg : item_arg->maybe_null() ? + &err : (uchar*) 0, length), item(item_arg), use_value(val) + { + /* Setup CharsetNarrowing to be done if necessary */ + do_cset_narrowing= + Utf8_narrow::should_do_narrowing(to_field, + item->collation.collation); + } + store_key_item(store_key &arg, Item *new_item, bool val) + :store_key(arg), item(new_item), use_value(val) + {} + + + enum Type type() const override { return ITEM_STORE_KEY; } + const char *name() const override { return "func"; } + + protected: + enum store_key_result copy_inner() override + { + TABLE *table= to_field->table; + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, + &table->write_set); + int res= FALSE; + + /* + It looks like the next statement is needed only for a simplified + hash function over key values used now in BNLH join. + When the implementation of this function will be replaced for a proper + full version this statement probably should be removed. + */ + to_field->reset(); + + if (use_value) + item->save_val(to_field); + else + res= item->save_in_field(to_field, 1); + /* + Item::save_in_field() may call Item::val_xxx(). And if this is a subquery + we need to check for errors executing it and react accordingly + */ + if (!res && table->in_use->is_error()) + res= 1; /* STORE_KEY_FATAL */ + dbug_tmp_restore_column_map(&table->write_set, old_map); + null_key= to_field->is_null() || item->null_value; + return ((err != 0 || res < 0 || res > 2) ? STORE_KEY_FATAL : + (store_key_result) res); + } +}; + + +class store_key_const_item :public store_key_item +{ + bool inited; +public: + store_key_const_item(THD *thd, Field *to_field_arg, uchar *ptr, + uchar *null_ptr_arg, uint length, + Item *item_arg) + :store_key_item(thd, to_field_arg, ptr, + null_ptr_arg ? null_ptr_arg : item_arg->maybe_null() ? + &err : (uchar*) 0, length, item_arg, FALSE), inited(0) + { + } + store_key_const_item(store_key &arg, Item *new_item) + :store_key_item(arg, new_item, FALSE), inited(0) + {} + + enum Type type() const override { return CONST_ITEM_STORE_KEY; } + const char *name() const override { return "const"; } + bool store_key_is_const() override { return true; } + +protected: + enum store_key_result copy_inner() override + { + int res; + if (!inited) + { + inited=1; + TABLE *table= to_field->table; + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, + &table->write_set); + if ((res= item->save_in_field(to_field, 1))) + { + if (!err) + err= res < 0 ? 1 : res; /* 1=STORE_KEY_FATAL */ + } + /* + Item::save_in_field() may call Item::val_xxx(). And if this is a subquery + we need to check for errors executing it and react accordingly + */ + if (!err && to_field->table->in_use->is_error()) + err= 1; /* STORE_KEY_FATAL */ + dbug_tmp_restore_column_map(&table->write_set, old_map); + } + null_key= to_field->is_null() || item->null_value; + return (err > 2 ? STORE_KEY_FATAL : (store_key_result) err); + } +}; + +void best_access_path(JOIN *join, JOIN_TAB *s, + table_map remaining_tables, + const POSITION *join_positions, uint idx, + bool disable_jbuf, double record_count, + POSITION *pos, POSITION *loose_scan_pos); +bool cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref); +bool error_if_full_join(JOIN *join); +int report_error(TABLE *table, int error); +int safe_index_read(JOIN_TAB *tab); +int get_quick_record(SQL_SELECT *select); +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List &fields, List &all_fields, ORDER *order, + bool from_window_spec= false); +int setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List &fields, List &all_fields, ORDER *order, + bool *hidden_group_fields, bool from_window_spec= false); +bool fix_inner_refs(THD *thd, List &all_fields, SELECT_LEX *select, + Ref_ptr_array ref_pointer_array); +int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table, + struct st_table_ref *table_ref); + +bool handle_select(THD *thd, LEX *lex, select_result *result, + ulonglong setup_tables_done_option); +bool mysql_select(THD *thd, TABLE_LIST *tables, List &list, + COND *conds, uint og_num, ORDER *order, ORDER *group, + Item *having, ORDER *proc_param, ulonglong select_type, + select_result *result, SELECT_LEX_UNIT *unit, + SELECT_LEX *select_lex); +void free_underlaid_joins(THD *thd, SELECT_LEX *select); +bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, + select_result *result); + +/* + General routine to change field->ptr of a NULL-terminated array of Field + objects. Useful when needed to call val_int, val_str or similar and the + field data is not in table->record[0] but in some other structure. + set_key_field_ptr changes all fields of an index using a key_info object. + All methods presume that there is at least one field to change. +*/ + + +class Virtual_tmp_table: public TABLE +{ + /** + Destruct collected fields. This method can be called on errors, + when we could not make the virtual temporary table completely, + e.g. when some of the fields could not be created or added. + + This is needed to avoid memory leaks, as some fields can be BLOB + variants and thus can have String onboard. Strings must be destructed + as they store data on the heap (not on MEM_ROOT). + */ + void destruct_fields() + { + for (uint i= 0; i < s->fields; i++) + { + field[i]->free(); + delete field[i]; // to invoke the field destructor + } + s->fields= 0; // safety + } + +protected: + /** + The number of the fields that are going to be in the table. + We remember the number of the fields at init() time, and + at open() we check that all of the fields were really added. + */ + uint m_alloced_field_count; + + /** + Setup field pointers and null-bit pointers. + */ + void setup_field_pointers(); + +public: + /** + Create a new empty virtual temporary table on the thread mem_root. + After creation, the caller must: + - call init() + - populate the table with new fields using add(). + - call open(). + @param thd - Current thread. + */ + static void *operator new(size_t size, THD *thd) throw(); + static void operator delete(void *ptr, size_t size) { TRASH_FREE(ptr, size); } + static void operator delete(void *, THD *) throw(){} + + Virtual_tmp_table(THD *thd) : m_alloced_field_count(0) + { + reset(); + temp_pool_slot= MY_BIT_NONE; + in_use= thd; + copy_blobs= true; + alias.set("", 0, &my_charset_bin); + } + + ~Virtual_tmp_table() + { + if (s) + destruct_fields(); + } + + /** + Allocate components for the given number of fields. + - fields[] + - s->blob_fields[], + - bitmaps: def_read_set, def_write_set, tmp_set, eq_join_set, cond_set. + @param field_count - The number of fields we plan to add to the table. + @returns false - on success. + @returns true - on error. + */ + bool init(uint field_count); + + /** + Add one Field to the end of the field array, update members: + s->reclength, s->fields, s->blob_fields, s->null_fuelds. + */ + bool add(Field *new_field) + { + DBUG_ASSERT(s->fields < m_alloced_field_count); + new_field->init(this); + field[s->fields]= new_field; + s->reclength+= new_field->pack_length(); + if (!(new_field->flags & NOT_NULL_FLAG)) + s->null_fields++; + if (new_field->flags & BLOB_FLAG) + { + // Note, s->blob_fields was incremented in Field_blob::Field_blob + DBUG_ASSERT(s->blob_fields); + DBUG_ASSERT(s->blob_fields <= m_alloced_field_count); + s->blob_field[s->blob_fields - 1]= s->fields; + } + new_field->field_index= s->fields++; + return false; + } + + /** + Add fields from a Spvar_definition list + @returns false - on success. + @returns true - on error. + */ + bool add(List &field_list); + + /** + Open a virtual table for read/write: + - Setup end markers in TABLE::field and TABLE_SHARE::blob_fields, + - Allocate a buffer in TABLE::record[0]. + - Set field pointers (Field::ptr, Field::null_pos, Field::null_bit) to + the allocated record. + This method is called when all of the fields have been added to the table. + After calling this method the table is ready for read and write operations. + @return false - on success + @return true - on error (e.g. could not allocate the record buffer). + */ + bool open(); + + void set_all_fields_to_null() + { + for (uint i= 0; i < s->fields; i++) + field[i]->set_null(); + } + /** + Set all fields from a compatible item list. + The number of fields in "this" must be equal to the number + of elements in "value". + */ + bool sp_set_all_fields_from_item_list(THD *thd, List &items); + + /** + Set all fields from a compatible item. + The number of fields in "this" must be the same with the number + of elements in "value". + */ + bool sp_set_all_fields_from_item(THD *thd, Item *value); + + /** + Find a ROW element index by its name + Assumes that "this" is used as a storage for a ROW-type SP variable. + @param [OUT] idx - the index of the found field is returned here + @param [IN] field_name - find a field with this name + @return true - on error (the field was not found) + @return false - on success (idx[0] was set to the field index) + */ + bool sp_find_field_by_name(uint *idx, const LEX_CSTRING &name) const; + + /** + Find a ROW element index by its name. + If the element is not found, and error is issued. + @param [OUT] idx - the index of the found field is returned here + @param [IN] var_name - the name of the ROW variable (for error reporting) + @param [IN] field_name - find a field with this name + @return true - on error (the field was not found) + @return false - on success (idx[0] was set to the field index) + */ + bool sp_find_field_by_name_or_error(uint *idx, + const LEX_CSTRING &var_name, + const LEX_CSTRING &field_name) const; +}; + + +/** + Create a reduced TABLE object with properly set up Field list from a + list of field definitions. + + The created table doesn't have a table handler associated with + it, has no keys, no group/distinct, no copy_funcs array. + The sole purpose of this TABLE object is to use the power of Field + class to read/write data to/from table->record[0]. Then one can store + the record in any container (RB tree, hash, etc). + The table is created in THD mem_root, so are the table's fields. + Consequently, if you don't BLOB fields, you don't need to free it. + + @param thd connection handle + @param field_list list of column definitions + + @return + 0 if out of memory, or a + TABLE object ready for read and write in case of success +*/ + +inline Virtual_tmp_table * +create_virtual_tmp_table(THD *thd, List &field_list) +{ + Virtual_tmp_table *table; + if (!(table= new(thd) Virtual_tmp_table(thd))) + return NULL; + + /* + If "simulate_create_virtual_tmp_table_out_of_memory" debug option + is enabled, we now enable "simulate_out_of_memory". This effectively + makes table->init() fail on OOM inside multi_alloc_root(). + This is done to test that ~Virtual_tmp_table() called from the "delete" + below correcly handles OOM. + */ + DBUG_EXECUTE_IF("simulate_create_virtual_tmp_table_out_of_memory", + DBUG_SET("+d,simulate_out_of_memory");); + + if (table->init(field_list.elements) || + table->add(field_list) || + table->open()) + { + delete table; + return NULL; + } + return table; +} + + +/** + Create a new virtual temporary table consisting of a single field. + SUM(DISTINCT expr) and similar numeric aggregate functions use this. + @param thd - Current thread + @param field - The field that will be added into the table. + @return NULL - On error. + @return !NULL - A pointer to the created table that is ready + for read and write. +*/ +inline TABLE * +create_virtual_tmp_table(THD *thd, Field *field) +{ + Virtual_tmp_table *table; + DBUG_ASSERT(field); + if (!(table= new(thd) Virtual_tmp_table(thd))) + return NULL; + if (table->init(1) || + table->add(field) || + table->open()) + { + delete table; + return NULL; + } + return table; +} + + +int test_if_item_cache_changed(List &list); +int join_init_read_record(JOIN_TAB *tab); +void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); +inline Item * and_items(THD *thd, Item* cond, Item *item) +{ + return (cond ? (new (thd->mem_root) Item_cond_and(thd, cond, item)) : item); +} +inline Item * or_items(THD *thd, Item* cond, Item *item) +{ + return (cond ? (new (thd->mem_root) Item_cond_or(thd, cond, item)) : item); +} +bool choose_plan(JOIN *join, table_map join_tables); +void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, + table_map last_remaining_tables, + bool first_alt, uint no_jbuf_before, + double *outer_rec_count, double *reopt_cost); +Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, + bool *inherited_fl); +extern bool test_if_ref(Item *, + Item_field *left_item,Item *right_item); + +inline bool optimizer_flag(const THD *thd, ulonglong flag) +{ + return (thd->variables.optimizer_switch & flag); +} + +/* +int print_fake_select_lex_join(select_result_sink *result, bool on_the_fly, + SELECT_LEX *select_lex, uint8 select_options); +*/ + +uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, + ha_rows limit, ha_rows *scanned_limit, + bool *need_sort, bool *reverse); +ORDER *simple_remove_const(ORDER *order, COND *where); +bool const_expression_in_where(COND *cond, Item *comp_item, + Field *comp_field= NULL, + Item **const_item= NULL); +bool cond_is_datetime_is_null(Item *cond); +bool cond_has_datetime_is_null(Item *cond); + +/* Table elimination entry point function */ +void eliminate_tables(JOIN *join); + +/* Index Condition Pushdown entry point function */ +void push_index_cond(JOIN_TAB *tab, uint keyno); + +#define OPT_LINK_EQUAL_FIELDS 1 + +/* EXPLAIN-related utility functions */ +int print_explain_message_line(select_result_sink *result, + uint8 options, bool is_analyze, + uint select_number, + const char *select_type, + ha_rows *rows, + const char *message); +void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res); +int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, + key_map possible_keys); +void unpack_to_base_table_fields(TABLE *table); + +/**************************************************************************** + Temporary table support for SQL Runtime + ***************************************************************************/ + +#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128 +#define AVG_STRING_LENGTH_TO_PACK_ROWS 64 +#define RATIO_TO_PACK_ROWS 2 +#define MIN_STRING_LENGTH_TO_PACK_ROWS 10 + +void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group); +TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List &fields, + ORDER *group, bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, + const LEX_CSTRING *alias, bool do_not_open=FALSE, + bool keep_row_order= FALSE); +TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param, + const ST_SCHEMA_TABLE &schema_table, + longlong select_options, + const LEX_CSTRING &alias, + bool do_not_open, bool keep_row_order); + +void free_tmp_table(THD *thd, TABLE *entry); +bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + int error, bool ignore_last_dupp_key_error, + bool *is_duplicate); +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options); +bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options); +bool open_tmp_table(TABLE *table); +double prev_record_reads(const POSITION *positions, uint idx, table_map found_ref); +void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List *tlist); +double get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size); +double get_tmp_table_write_cost(THD *thd, double row_count, uint row_size); +void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array); +bool sort_and_filter_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse, + bool skip_unprefixed_keyparts); + +struct st_cond_statistic +{ + Item *cond; + Field *field_arg; + ulong positive; +}; +typedef struct st_cond_statistic COND_STATISTIC; + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List *conds); + +class Pushdown_query: public Sql_alloc +{ +public: + SELECT_LEX *select_lex; + bool store_data_in_temp_table; + group_by_handler *handler; + Item *having; + + Pushdown_query(SELECT_LEX *select_lex_arg, group_by_handler *handler_arg) + : select_lex(select_lex_arg), store_data_in_temp_table(0), + handler(handler_arg), having(0) {} + + ~Pushdown_query() { delete handler; } + + /* Function that calls the above scan functions */ + int execute(JOIN *); +}; + +class derived_handler; + +class Pushdown_derived: public Sql_alloc +{ +public: + TABLE_LIST *derived; + derived_handler *handler; + + Pushdown_derived(TABLE_LIST *tbl, derived_handler *h); + + int execute(); +}; + + +class select_handler; + + +bool test_if_order_compatible(SQL_I_List &a, SQL_I_List &b); +int test_if_group_changed(List &list); +int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); + +JOIN_TAB *first_explain_order_tab(JOIN* join); +JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab); + +bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl); + +bool check_simple_equality(THD *thd, const Item::Context &ctx, + Item *left_item, Item *right_item, + COND_EQUAL *cond_equal); + +void propagate_new_equalities(THD *thd, Item *cond, + List *new_equalities, + COND_EQUAL *inherited, + bool *is_simplifiable_cond); + +bool dbug_user_var_equals_str(THD *thd, const char *name, const char *value); +#endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_sequence.cc b/sql/sql_sequence.cc new file mode 100644 index 00000000..f5652bd8 --- /dev/null +++ b/sql/sql_sequence.cc @@ -0,0 +1,1046 @@ +/* + Copyright (c) 2017, MariaDB Corporation, Alibaba Corporation + Copyrgiht (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "mariadb.h" +#include "sql_class.h" +#include "sql_list.h" +#include "sql_sequence.h" +#include "ha_sequence.h" +#include "sql_base.h" +#include "sql_table.h" // write_bin_log +#include "transaction.h" +#include "lock.h" +#include "sql_acl.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif + +struct Field_definition +{ + const char *field_name; + uint length; + const Type_handler *type_handler; + LEX_CSTRING comment; + ulong flags; +}; + +/* + Structure for all SEQUENCE tables + + Note that the first field is named "next_val" to all us to have + NEXTVAL a reserved word that will on access be changed to + NEXTVAL(sequence_table). For this to work, the table can't have + a column named NEXTVAL. +*/ + +#define FL (NOT_NULL_FLAG | NO_DEFAULT_VALUE_FLAG) + +static Field_definition sequence_structure[]= +{ + {"next_not_cached_value", 21, &type_handler_slonglong, + {STRING_WITH_LEN("")}, FL}, + {"minimum_value", 21, &type_handler_slonglong, {STRING_WITH_LEN("")}, FL}, + {"maximum_value", 21, &type_handler_slonglong, {STRING_WITH_LEN("")}, FL}, + {"start_value", 21, &type_handler_slonglong, {STRING_WITH_LEN("start value when sequences is created or value if RESTART is used")}, FL}, + {"increment", 21, &type_handler_slonglong, + {STRING_WITH_LEN("increment value")}, FL}, + {"cache_size", 21, &type_handler_ulonglong, {STRING_WITH_LEN("")}, + FL | UNSIGNED_FLAG}, + {"cycle_option", 1, &type_handler_utiny, {STRING_WITH_LEN("0 if no cycles are allowed, 1 if the sequence should begin a new cycle when maximum_value is passed")}, + FL | UNSIGNED_FLAG }, + {"cycle_count", 21, &type_handler_slonglong, + {STRING_WITH_LEN("How many cycles have been done")}, FL}, + {NULL, 0, &type_handler_slonglong, {STRING_WITH_LEN("")}, 0} +}; + +#undef FL + + +#define MAX_AUTO_INCREMENT_VALUE 65535 + +/* + Check whether sequence values are valid. + Sets default values for fields that are not used, according to Oracle spec. + + RETURN VALUES + false valid + true invalid +*/ + +bool sequence_definition::check_and_adjust(bool set_reserved_until) +{ + longlong max_increment; + DBUG_ENTER("sequence_definition::check"); + + if (!(real_increment= increment)) + real_increment= global_system_variables.auto_increment_increment; + + /* + If min_value is not set, set it to LONGLONG_MIN or 1, depending on + real_increment + */ + if (!(used_fields & seq_field_used_min_value)) + min_value= real_increment < 0 ? LONGLONG_MIN+1 : 1; + + /* + If max_value is not set, set it to LONGLONG_MAX or -1, depending on + real_increment + */ + if (!(used_fields & seq_field_used_max_value)) + max_value= real_increment < 0 ? -1 : LONGLONG_MAX-1; + + if (!(used_fields & seq_field_used_start)) + { + /* Use min_value or max_value for start depending on real_increment */ + start= real_increment < 0 ? max_value : min_value; + } + + if (set_reserved_until) + reserved_until= start; + + adjust_values(reserved_until); + + /* To ensure that cache * real_increment will never overflow */ + max_increment= (real_increment ? + llabs(real_increment) : + MAX_AUTO_INCREMENT_VALUE); + + if (max_value >= start && + max_value > min_value && + start >= min_value && + max_value != LONGLONG_MAX && + min_value != LONGLONG_MIN && + cache >= 0 && cache < (LONGLONG_MAX - max_increment) / max_increment && + ((real_increment > 0 && reserved_until >= min_value) || + (real_increment < 0 && reserved_until <= max_value))) + DBUG_RETURN(FALSE); + + DBUG_RETURN(TRUE); // Error +} + + +/* + Read sequence values from a table +*/ + +void sequence_definition::read_fields(TABLE *table) +{ + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set); + reserved_until= table->field[0]->val_int(); + min_value= table->field[1]->val_int(); + max_value= table->field[2]->val_int(); + start= table->field[3]->val_int(); + increment= table->field[4]->val_int(); + cache= table->field[5]->val_int(); + cycle= table->field[6]->val_int(); + round= table->field[7]->val_int(); + dbug_tmp_restore_column_map(&table->read_set, old_map); + used_fields= ~(uint) 0; + print_dbug(); +} + + +/* + Store sequence into a table row +*/ + +void sequence_definition::store_fields(TABLE *table) +{ + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set); + + /* zero possible delete markers & null bits */ + memcpy(table->record[0], table->s->default_values, table->s->null_bytes); + table->field[0]->store(reserved_until, 0); + table->field[1]->store(min_value, 0); + table->field[2]->store(max_value, 0); + table->field[3]->store(start, 0); + table->field[4]->store(increment, 0); + table->field[5]->store(cache, 0); + table->field[6]->store((longlong) cycle != 0, 0); + table->field[7]->store((longlong) round, 1); + + dbug_tmp_restore_column_map(&table->write_set, old_map); + print_dbug(); +} + + +/* + Check the sequence fields through seq_fields when creating a sequence. + + RETURN VALUES + false Success + true Failure +*/ + +bool check_sequence_fields(LEX *lex, List *fields, + const LEX_CSTRING db, const LEX_CSTRING table_name) +{ + Create_field *field; + List_iterator_fast it(*fields); + uint field_count; + uint field_no; + const char *reason; + DBUG_ENTER("check_sequence_fields"); + + field_count= fields->elements; + if (field_count != array_elements(sequence_structure)-1) + { + reason= "Wrong number of columns"; + goto err; + } + if (lex->alter_info.key_list.elements > 0) + { + reason= "Sequence tables cannot have any keys"; + goto err; + } + if (lex->alter_info.check_constraint_list.elements > 0) + { + reason= "Sequence tables cannot have any constraints"; + goto err; + } + if (lex->alter_info.flags & ALTER_ORDER) + { + reason= "ORDER BY"; + goto err; + } + + for (field_no= 0; (field= it++); field_no++) + { + Field_definition *field_def= &sequence_structure[field_no]; + if (my_strcasecmp(system_charset_info, field_def->field_name, + field->field_name.str) || + field->flags != field_def->flags || + field->type_handler() != field_def->type_handler || + field->check_constraint || field->vcol_info) + { + reason= field->field_name.str; + goto err; + } + } + DBUG_RETURN(FALSE); + +err: + my_error(ER_SEQUENCE_INVALID_TABLE_STRUCTURE, MYF(0), + db.str, table_name.str, reason); + DBUG_RETURN(TRUE); +} + + +/* + Create the fields for a SEQUENCE TABLE + + RETURN VALUES + false Success + true Failure (out of memory) +*/ + +bool prepare_sequence_fields(THD *thd, List *fields) +{ + Field_definition *field_info; + DBUG_ENTER("prepare_sequence_fields"); + + for (field_info= sequence_structure; field_info->field_name ; field_info++) + { + Create_field *new_field; + LEX_CSTRING field_name= {field_info->field_name, + strlen(field_info->field_name)}; + + if (unlikely(!(new_field= new Create_field()))) + DBUG_RETURN(TRUE); /* purify inspected */ + + new_field->field_name= field_name; + new_field->set_handler(field_info->type_handler); + new_field->length= field_info->length; + new_field->char_length= field_info->length; + new_field->comment= field_info->comment; + new_field->flags= field_info->flags; + if (unlikely(fields->push_back(new_field))) + DBUG_RETURN(TRUE); /* purify inspected */ + } + DBUG_RETURN(FALSE); +} + +/* + Initialize the sequence table record as part of CREATE SEQUENCE + + Store one row with sequence information. + + RETURN VALUES + false Success + true Failure. Error reported. + + NOTES + This function is called as part of CREATE SEQUENCE. When called + there are now active transactions and no open tables. + There is also a MDL lock on the table. +*/ + +bool sequence_insert(THD *thd, LEX *lex, TABLE_LIST *org_table_list) +{ + int error; + TABLE *table; + Reprepare_observer *save_reprepare_observer; + sequence_definition *seq= lex->create_info.seq_create_info; + bool temporary_table= org_table_list->table != 0; + Open_tables_backup open_tables_backup; + Query_tables_list query_tables_list_backup; + TABLE_LIST table_list; // For sequence table + DBUG_ENTER("sequence_insert"); + DBUG_EXECUTE_IF("kill_query_on_sequence_insert", + thd->set_killed(KILL_QUERY);); + /* + seq is 0 if sequence was created with CREATE TABLE instead of + CREATE SEQUENCE + */ + if (!seq) + { + if (!(seq= new (thd->mem_root) sequence_definition)) + DBUG_RETURN(TRUE); + } + + /* If not temporary table */ + if (!temporary_table) + { + /* + The following code works like open_system_tables_for_read() + The idea is: + - Copy the table_list object for the sequence that was created + - Backup the current state of open tables and create a new + environment for open tables without any tables opened + - open the newly sequence table for write + This is safe as the sequence table has a mdl lock thanks to the + create sequence statement that is calling this function + */ + + table_list.init_one_table(&org_table_list->db, + &org_table_list->table_name, + NULL, TL_WRITE_DEFAULT); + table_list.updating= 1; + table_list.open_strategy= TABLE_LIST::OPEN_IF_EXISTS; + table_list.open_type= OT_BASE_ONLY; + + DBUG_ASSERT(!thd->locked_tables_mode || + (thd->variables.option_bits & OPTION_TABLE_LOCK)); + lex->reset_n_backup_query_tables_list(&query_tables_list_backup); + thd->reset_n_backup_open_tables_state(&open_tables_backup); + + /* + The FOR CREATE flag is needed to ensure that ha_open() doesn't try to + read the not yet existing row in the sequence table + */ + thd->open_options|= HA_OPEN_FOR_CREATE; + /* + We have to reset the reprepare observer to be able to open the + table under prepared statements. + */ + save_reprepare_observer= thd->m_reprepare_observer; + thd->m_reprepare_observer= 0; + lex->sql_command= SQLCOM_CREATE_SEQUENCE; + error= open_and_lock_tables(thd, &table_list, FALSE, + MYSQL_LOCK_IGNORE_TIMEOUT | + MYSQL_OPEN_HAS_MDL_LOCK); + thd->open_options&= ~HA_OPEN_FOR_CREATE; + thd->m_reprepare_observer= save_reprepare_observer; + if (unlikely(error)) + { + lex->restore_backup_query_tables_list(&query_tables_list_backup); + thd->restore_backup_open_tables_state(&open_tables_backup); + DBUG_RETURN(error); + } + table= table_list.table; + } + else + table= org_table_list->table; + + seq->reserved_until= seq->start; + error= seq->write_initial_sequence(table); + { + uint save_unsafe_rollback_flags= + thd->transaction->stmt.m_unsafe_rollback_flags; + if (trans_commit_stmt(thd)) + error= 1; + thd->transaction->stmt.m_unsafe_rollback_flags= + save_unsafe_rollback_flags; + } + if (trans_commit_implicit(thd)) + error= 1; + + if (!temporary_table) + { + close_thread_tables(thd); + lex->restore_backup_query_tables_list(&query_tables_list_backup); + thd->restore_backup_open_tables_state(&open_tables_backup); + + /* OPTION_TABLE_LOCK was reset in trans_commit_implicit */ + if (thd->locked_tables_mode) + thd->variables.option_bits|= OPTION_TABLE_LOCK; + } + DBUG_RETURN(error); +} + + +/* Create a SQUENCE object */ + +SEQUENCE::SEQUENCE() :all_values_used(0), initialized(SEQ_UNINTIALIZED) +{ + mysql_rwlock_init(key_LOCK_SEQUENCE, &mutex); +} + +SEQUENCE::~SEQUENCE() +{ + mysql_rwlock_destroy(&mutex); +} + +/* + The following functions is to ensure that we when reserve new values + trough sequence object sequence we have only one writer at at time. + A sequence table can have many readers (trough normal SELECT's). + + We mark that we have a write lock in the table object so that + ha_sequence::ha_write() can check if we have a lock. If already locked, then + ha_write() knows that we are running a sequence operation. If not, then + ha_write() knows that it's an INSERT. +*/ + +void SEQUENCE::write_lock(TABLE *table) +{ + DBUG_ASSERT(((ha_sequence*) table->file)->is_locked() == 0); + mysql_rwlock_wrlock(&mutex); + ((ha_sequence*) table->file)->write_lock(); +} +void SEQUENCE::write_unlock(TABLE *table) +{ + ((ha_sequence*) table->file)->unlock(); + mysql_rwlock_unlock(&mutex); +} +void SEQUENCE::read_lock(TABLE *table) +{ + if (!((ha_sequence*) table->file)->is_locked()) + mysql_rwlock_rdlock(&mutex); +} +void SEQUENCE::read_unlock(TABLE *table) +{ + if (!((ha_sequence*) table->file)->is_locked()) + mysql_rwlock_unlock(&mutex); +} + +/** + Read values from the sequence tables to table_share->sequence. + This is called from ha_open() when the table is not yet locked +*/ + +int SEQUENCE::read_initial_values(TABLE *table) +{ + int error= 0; + enum thr_lock_type save_lock_type; + MDL_request mdl_request; // Empty constructor! + DBUG_ENTER("SEQUENCE::read_initial_values"); + + if (likely(initialized != SEQ_UNINTIALIZED)) + DBUG_RETURN(0); + write_lock(table); + if (likely(initialized == SEQ_UNINTIALIZED)) + { + MYSQL_LOCK *lock; + bool mdl_lock_used= 0; + THD *thd= table->in_use; + bool has_active_transaction= !thd->transaction->stmt.is_empty(); + /* + There is already a mdl_ticket for this table. However, for list_fields + the MDL lock is of type MDL_SHARED_HIGH_PRIO which is not usable + for doing a table lock. Get a proper read lock to solve this. + */ + if (table->mdl_ticket == 0) + { + MDL_request_list mdl_requests; + mdl_lock_used= 1; + /* + This happens if first request is SHOW CREATE TABLE or LIST FIELDS + where we don't have a mdl lock on the table + */ + + MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, table->s->db.str, + table->s->table_name.str, MDL_SHARED_READ, + MDL_EXPLICIT); + mdl_requests.push_front(&mdl_request); + if (thd->mdl_context.acquire_locks(&mdl_requests, + thd->variables.lock_wait_timeout)) + { + write_unlock(table); + DBUG_RETURN(HA_ERR_LOCK_WAIT_TIMEOUT); + } + } + save_lock_type= table->reginfo.lock_type; + table->reginfo.lock_type= TL_READ; + if (!(lock= mysql_lock_tables(thd, &table, 1, + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY))) + { + if (mdl_lock_used) + thd->mdl_context.release_lock(mdl_request.ticket); + write_unlock(table); + + if (!has_active_transaction && !thd->transaction->stmt.is_empty() && + !thd->in_sub_stmt) + trans_commit_stmt(thd); + DBUG_RETURN(HA_ERR_LOCK_WAIT_TIMEOUT); + } + DBUG_ASSERT(table->reginfo.lock_type == TL_READ); + if (likely(!(error= read_stored_values(table)))) + initialized= SEQ_READY_TO_USE; + mysql_unlock_tables(thd, lock); + if (mdl_lock_used) + thd->mdl_context.release_lock(mdl_request.ticket); + + /* Reset value to default */ + table->reginfo.lock_type= save_lock_type; + /* + Doing mysql_lock_tables() may have started a read only transaction. + If that happend, it's better that we commit it now, as a lot of + code assumes that there is no active stmt transaction directly after + open_tables(). + But we also don't want to commit the stmt transaction while in a + substatement, see MDEV-15977. + */ + if (!has_active_transaction && !thd->transaction->stmt.is_empty() && + !thd->in_sub_stmt) + trans_commit_stmt(thd); + } + write_unlock(table); + DBUG_RETURN(error); +} + + +/* + Do the actiual reading of data from sequence table and + update values in the sequence object. + + Called once from when table is opened +*/ + +int SEQUENCE::read_stored_values(TABLE *table) +{ + int error; + DBUG_ENTER("SEQUENCE::read_stored_values"); + + MY_BITMAP *save_read_set= tmp_use_all_columns(table, &table->read_set); + error= table->file->ha_read_first_row(table->record[0], MAX_KEY); + tmp_restore_column_map(&table->read_set, save_read_set); + + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + read_fields(table); + adjust_values(reserved_until); + + all_values_used= 0; + DBUG_RETURN(0); +} + + +/* + Adjust values after reading a the stored state +*/ + +void sequence_definition::adjust_values(longlong next_value) +{ + next_free_value= next_value; + if (!(real_increment= increment)) + { + longlong offset= 0; + longlong off, to_add; + /* Use auto_increment_increment and auto_increment_offset */ + + if ((real_increment= global_system_variables.auto_increment_increment) + != 1) + offset= (global_system_variables.auto_increment_offset % + global_system_variables.auto_increment_increment); + + /* + Ensure that next_free_value has the right offset, so that we + can generate a serie by just adding real_increment. + */ + off= next_free_value % real_increment; + if (off < 0) + off+= real_increment; + to_add= (real_increment + offset - off) % real_increment; + + /* + Check if add will make next_free_value bigger than max_value, + taken into account that next_free_value or max_value addition + may overflow + */ + if (next_free_value > max_value - to_add || + next_free_value + to_add > max_value) + next_free_value= max_value+1; + else + { + next_free_value+= to_add; + DBUG_ASSERT(llabs(next_free_value % real_increment) == offset); + } + } +} + + +/** + Write initial sequence information for CREATE and ALTER to sequence table +*/ + +int sequence_definition::write_initial_sequence(TABLE *table) +{ + int error; + MY_BITMAP *save_write_set; + + store_fields(table); + /* Store the sequence values in table share */ + table->s->sequence->copy(this); + /* + Sequence values will be replicated as a statement + like 'create sequence'. So disable row logging for this table & statement + */ + table->file->row_logging= table->file->row_logging_init= 0; + save_write_set= table->write_set; + table->write_set= &table->s->all_set; + table->s->sequence->initialized= SEQUENCE::SEQ_IN_PREPARE; + error= table->file->ha_write_row(table->record[0]); + table->s->sequence->initialized= SEQUENCE::SEQ_UNINTIALIZED; + table->write_set= save_write_set; + if (unlikely(error)) + table->file->print_error(error, MYF(0)); + else + { + /* + Sequence structure is up to date and table has one row, + sequence is now usable + */ + table->s->sequence->initialized= SEQUENCE::SEQ_READY_TO_USE; + } + return error; +} + + +/** + Store current sequence values into the sequence table +*/ + +int sequence_definition::write(TABLE *table, bool all_fields) +{ + int error; + MY_BITMAP *save_rpl_write_set, *save_write_set, *save_read_set; + DBUG_ASSERT(((ha_sequence*) table->file)->is_locked()); + + save_rpl_write_set= table->rpl_write_set; + if (likely(!all_fields)) + { + /* Only write next_value and round to binary log */ + table->rpl_write_set= &table->def_rpl_write_set; + bitmap_clear_all(table->rpl_write_set); + bitmap_set_bit(table->rpl_write_set, NEXT_FIELD_NO); + bitmap_set_bit(table->rpl_write_set, ROUND_FIELD_NO); + } + else + table->rpl_write_set= &table->s->all_set; + + /* Update table */ + save_write_set= table->write_set; + save_read_set= table->read_set; + table->read_set= table->write_set= &table->s->all_set; + table->file->column_bitmaps_signal(); + store_fields(table); + if (unlikely((error= table->file->ha_write_row(table->record[0])))) + table->file->print_error(error, MYF(0)); + table->rpl_write_set= save_rpl_write_set; + table->read_set= save_read_set; + table->write_set= save_write_set; + table->file->column_bitmaps_signal(); + return error; +} + + +/** + Get next value for sequence + + @param in table Sequence table + @param in second_round + 1 if recursive call (out of values once) + @param out error Set this to <> 0 in case of error + push_warning_printf(WARN_LEVEL_WARN) has been called + + + @retval 0 Next number or error. Check error variable + # Next sequence number + + NOTES: + Return next_free_value and increment next_free_value to next allowed + value or reserved_value if out of range + if next_free_value >= reserved_value reserve a new range by writing + a record to the sequence table. + + The state of the variables: + next_free_value contains next value to use. It may be + bigger than max_value or less than min_value if end of sequence. + reserved_until contains the last value written to the file. All + values up to this one can be used. + If next_free_value >= reserved_until we have to reserve new + values from the sequence. +*/ + +longlong SEQUENCE::next_value(TABLE *table, bool second_round, int *error) +{ + longlong res_value, org_reserved_until, add_to; + bool out_of_values; + THD *thd= table->in_use; + DBUG_ENTER("SEQUENCE::next_value"); + DBUG_ASSERT(thd); + + *error= 0; + if (!second_round) + write_lock(table); + + res_value= next_free_value; + next_free_value= increment_value(next_free_value); + + if ((real_increment > 0 && res_value < reserved_until) || + (real_increment < 0 && res_value > reserved_until)) + { + write_unlock(table); + DBUG_RETURN(res_value); + } + + if (all_values_used) + goto err; + + org_reserved_until= reserved_until; + + /* + Out of cached values, reserve 'cache' new ones + The cache value is checked on insert so the following can't + overflow + */ + add_to= cache ? real_increment * cache : real_increment; + out_of_values= 0; + + if (real_increment > 0) + { + if (reserved_until > max_value - add_to || + reserved_until + add_to > max_value) + { + reserved_until= max_value + 1; + out_of_values= res_value >= reserved_until; + } + else + reserved_until+= add_to; + } + else + { + if (reserved_until + add_to < min_value || + reserved_until < min_value - add_to) + { + reserved_until= min_value - 1; + out_of_values= res_value <= reserved_until; + } + else + reserved_until+= add_to; + } + if (out_of_values) + { + if (!cycle || second_round) + goto err; + round++; + reserved_until= real_increment >0 ? min_value : max_value; + adjust_values(reserved_until); // Fix next_free_value + /* + We have to do everything again to ensure that the given range was + not empty, which could happen if increment == 0 + */ + DBUG_RETURN(next_value(table, 1, error)); + } + + if (unlikely((*error= write(table, thd->variables.binlog_row_image != + BINLOG_ROW_IMAGE_MINIMAL)))) + { + reserved_until= org_reserved_until; + next_free_value= res_value; + } + + write_unlock(table); + DBUG_RETURN(res_value); + +err: + write_unlock(table); + my_error(ER_SEQUENCE_RUN_OUT, MYF(0), table->s->db.str, + table->s->table_name.str); + *error= ER_SEQUENCE_RUN_OUT; + all_values_used= 1; + DBUG_RETURN(0); +} + + +/* + The following functions is to detect if a table has been dropped + and re-created since last call to PREVIOUS VALUE. + + This is needed as we don't delete dropped sequences from THD->sequence + for DROP TABLE. +*/ + +bool SEQUENCE_LAST_VALUE::check_version(TABLE *table) +{ + DBUG_ASSERT(table->s->tabledef_version.length == MY_UUID_SIZE); + return memcmp(table->s->tabledef_version.str, table_version, + MY_UUID_SIZE) != 0; +} + +void SEQUENCE_LAST_VALUE::set_version(TABLE *table) +{ + memcpy(table_version, table->s->tabledef_version.str, MY_UUID_SIZE); +} + +/** + Set the next value for sequence + + @param in table Sequence table + @param in next_val Next free value + @param in next_round Round for 'next_value' (in case of cycles) + @param in is_used 1 if next_val is already used + + @retval 0 ok, value adjusted + -1 value was less than current value + 1 error when storing value + + @comment + A new value is set only if "nextval,next_round" is less than + "next_free_value,round". This is needed as in replication + setvalue() calls may come out to the slave out-of-order. + Storing only the highest value ensures that sequence object will always + contain the highest used value when the slave is promoted to a master. +*/ + +int SEQUENCE::set_value(TABLE *table, longlong next_val, ulonglong next_round, + bool is_used) +{ + int error= -1; + bool needs_to_be_stored= 0; + longlong org_reserved_until= reserved_until; + longlong org_next_free_value= next_free_value; + ulonglong org_round= round; + THD *thd= table->in_use; + DBUG_ENTER("SEQUENCE::set_value"); + DBUG_ASSERT(thd); + + write_lock(table); + if (is_used) + next_val= increment_value(next_val); + + if (round > next_round) + goto end; // error = -1 + if (round == next_round) + { + if (real_increment > 0 ? + next_val < next_free_value : + next_val > next_free_value) + goto end; // error = -1 + if (next_val == next_free_value) + { + error= 0; + goto end; + } + } + else if (cycle == 0) + { + // round < next_round && no cycles, which is impossible + my_error(ER_SEQUENCE_RUN_OUT, MYF(0), table->s->db.str, + table->s->table_name.str); + error= 1; + goto end; + } + else + needs_to_be_stored= 1; + + round= next_round; + adjust_values(next_val); + if ((real_increment > 0 ? + next_free_value > reserved_until : + next_free_value < reserved_until) || + needs_to_be_stored) + { + reserved_until= next_free_value; + if (write(table, + thd->variables.binlog_row_image != BINLOG_ROW_IMAGE_MINIMAL)) + { + reserved_until= org_reserved_until; + next_free_value= org_next_free_value; + round= org_round; + error= 1; + goto end; + } + } + error= 0; + +end: + write_unlock(table); + DBUG_RETURN(error); +} + +#if defined(HAVE_REPLICATION) +class wait_for_commit_raii +{ +private: + THD *m_thd; + wait_for_commit *m_wfc; + +public: + wait_for_commit_raii(THD* thd) : + m_thd(thd), m_wfc(thd->suspend_subsequent_commits()) + {} + ~wait_for_commit_raii() { m_thd->resume_subsequent_commits(m_wfc); } +}; +#endif + +bool Sql_cmd_alter_sequence::execute(THD *thd) +{ + int error= 0; + int trapped_errors= 0; + LEX *lex= thd->lex; + TABLE_LIST *first_table= lex->query_tables; + TABLE *table; + sequence_definition *new_seq= lex->create_info.seq_create_info; + SEQUENCE *seq; + No_such_table_error_handler no_such_table_handler; + DBUG_ENTER("Sql_cmd_alter_sequence::execute"); +#if defined(HAVE_REPLICATION) + /* No wakeup():s of subsequent commits is allowed in this function. */ + wait_for_commit_raii suspend_wfc(thd); +#endif + + if (check_access(thd, ALTER_ACL, first_table->db.str, + &first_table->grant.privilege, + &first_table->grant.m_internal, + 0, 0)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + if (check_grant(thd, ALTER_ACL, first_table, FALSE, 1, FALSE)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_thd_is_local(thd)) + { + if (wsrep_check_sequence(thd, new_seq)) + DBUG_RETURN(TRUE); + + if (wsrep_to_isolation_begin(thd, first_table->db.str, + first_table->table_name.str, + first_table)) + { + DBUG_RETURN(TRUE); + } + } +#endif /* WITH_WSREP */ + + if (if_exists()) + thd->push_internal_handler(&no_such_table_handler); + error= open_and_lock_tables(thd, first_table, FALSE, 0); + if (if_exists()) + { + trapped_errors= no_such_table_handler.safely_trapped_errors(); + thd->pop_internal_handler(); + } + if (unlikely(error)) + { + if (trapped_errors) + { + StringBuffer tbl_name; + tbl_name.append(&first_table->db); + tbl_name.append('.'); + tbl_name.append(&first_table->table_name); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_SEQUENCES, + ER_THD(thd, ER_UNKNOWN_SEQUENCES), + tbl_name.c_ptr_safe()); + my_ok(thd); + DBUG_RETURN(FALSE); + } + DBUG_RETURN(TRUE); + } + + table= first_table->table; + seq= table->s->sequence; + + seq->write_lock(table); + new_seq->reserved_until= seq->reserved_until; + + /* Copy from old sequence those fields that the user didn't specified */ + if (!(new_seq->used_fields & seq_field_used_increment)) + new_seq->increment= seq->increment; + if (!(new_seq->used_fields & seq_field_used_min_value)) + new_seq->min_value= seq->min_value; + if (!(new_seq->used_fields & seq_field_used_max_value)) + new_seq->max_value= seq->max_value; + if (!(new_seq->used_fields & seq_field_used_start)) + new_seq->start= seq->start; + if (!(new_seq->used_fields & seq_field_used_cache)) + new_seq->cache= seq->cache; + if (!(new_seq->used_fields & seq_field_used_cycle)) + new_seq->cycle= seq->cycle; + + /* If we should restart from a new value */ + if (new_seq->used_fields & seq_field_used_restart) + { + if (!(new_seq->used_fields & seq_field_used_restart_value)) + new_seq->restart= new_seq->start; + new_seq->reserved_until= new_seq->restart; + } + + /* Let check_and_adjust think all fields are used */ + new_seq->used_fields= ~0; + if (new_seq->check_and_adjust(0)) + { + my_error(ER_SEQUENCE_INVALID_DATA, MYF(0), + first_table->db.str, + first_table->table_name.str); + error= 1; + seq->write_unlock(table); + goto end; + } + + if (likely(!(error= new_seq->write(table, 1)))) + { + /* Store the sequence values in table share */ + seq->copy(new_seq); + } + else + table->file->print_error(error, MYF(0)); + seq->write_unlock(table); + if (trans_commit_stmt(thd)) + error= 1; + if (trans_commit_implicit(thd)) + error= 1; + DBUG_EXECUTE_IF("hold_worker_on_schedule", + { + /* delay binlogging of a parent trx in rpl_parallel_seq */ + my_sleep(100000); + }); + if (likely(!error)) + error= write_bin_log(thd, 1, thd->query(), thd->query_length()); + if (likely(!error)) + my_ok(thd); + +end: + DBUG_RETURN(error); +} diff --git a/sql/sql_sequence.h b/sql/sql_sequence.h new file mode 100644 index 00000000..fba04686 --- /dev/null +++ b/sql/sql_sequence.h @@ -0,0 +1,169 @@ +/* Copyright (c) 2017, MariaDB corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef SQL_SEQUENCE_INCLUDED +#define SQL_SEQUENCE_INCLUDED + +#define seq_field_used_min_value 1 +#define seq_field_used_max_value 2 +#define seq_field_used_start 4 +#define seq_field_used_increment 8 +#define seq_field_used_cache 16 +#define seq_field_used_cycle 32 +#define seq_field_used_restart 64 +#define seq_field_used_restart_value 128 + +/* Field position in sequence table for some fields we refer to directly */ +#define NEXT_FIELD_NO 0 +#define MIN_VALUE_FIELD_NO 1 +#define ROUND_FIELD_NO 7 + +/** + sequence_definition is used when defining a sequence as part of create +*/ + +class sequence_definition :public Sql_alloc +{ +public: + sequence_definition(): + min_value(1), max_value(LONGLONG_MAX-1), start(1), increment(1), + cache(1000), round(0), restart(0), cycle(0), used_fields(0) + {} + longlong reserved_until; + longlong min_value; + longlong max_value; + longlong start; + longlong increment; + longlong cache; + ulonglong round; + longlong restart; // alter sequence restart value + bool cycle; + uint used_fields; // Which fields where used in CREATE + + bool check_and_adjust(bool set_reserved_until); + void store_fields(TABLE *table); + void read_fields(TABLE *table); + int write_initial_sequence(TABLE *table); + int write(TABLE *table, bool all_fields); + /* This must be called after sequence data has been updated */ + void adjust_values(longlong next_value); + inline void print_dbug() + { + DBUG_PRINT("sequence", ("reserved: %lld start: %lld increment: %lld min_value: %lld max_value: %lld cache: %lld round: %lld", + reserved_until, start, increment, min_value, + max_value, cache, round)); + } +protected: + /* + The following values are the values from sequence_definition + merged with global auto_increment_offset and auto_increment_increment + */ + longlong real_increment; + longlong next_free_value; +}; + +/** + SEQUENCE is in charge of managing the sequence values. + It's also responsible to generate new values and updating the sequence + table (engine=SQL_SEQUENCE) trough it's specialized handler interface. + + If increment is 0 then the sequence will be be using + auto_increment_increment and auto_increment_offset variables, just like + AUTO_INCREMENT is using. +*/ + +class SEQUENCE :public sequence_definition +{ +public: + enum seq_init { SEQ_UNINTIALIZED, SEQ_IN_PREPARE, SEQ_IN_ALTER, + SEQ_READY_TO_USE }; + SEQUENCE(); + ~SEQUENCE(); + int read_initial_values(TABLE *table); + int read_stored_values(TABLE *table); + void write_lock(TABLE *table); + void write_unlock(TABLE *table); + void read_lock(TABLE *table); + void read_unlock(TABLE *table); + void copy(sequence_definition *seq) + { + sequence_definition::operator= (*seq); + adjust_values(reserved_until); + all_values_used= 0; + } + longlong next_value(TABLE *table, bool second_round, int *error); + int set_value(TABLE *table, longlong next_value, ulonglong round_arg, + bool is_used); + longlong increment_value(longlong value) + { + if (real_increment > 0) + { + if (value > max_value - real_increment || + value + real_increment > max_value) + value= max_value + 1; + else + value+= real_increment; + } + else + { + if (value + real_increment < min_value || + value < min_value - real_increment) + value= min_value - 1; + else + value+= real_increment; + } + return value; + } + + bool all_values_used; + seq_init initialized; + +private: + mysql_rwlock_t mutex; +}; + + +/** + Class to cache last value of NEXT VALUE from the sequence +*/ + +class SEQUENCE_LAST_VALUE +{ +public: + SEQUENCE_LAST_VALUE(uchar *key_arg, uint length_arg) + :key(key_arg), length(length_arg) + {} + ~SEQUENCE_LAST_VALUE() + { my_free((void*) key); } + /* Returns 1 if table hasn't been dropped or re-created */ + bool check_version(TABLE *table); + void set_version(TABLE *table); + + const uchar *key; + uint length; + bool null_value; + longlong value; + uchar table_version[MY_UUID_SIZE]; +}; + + +class Create_field; +extern bool prepare_sequence_fields(THD *thd, List *fields); +extern bool check_sequence_fields(LEX *lex, List *fields, + const LEX_CSTRING db, + const LEX_CSTRING table_name); +extern bool sequence_insert(THD *thd, LEX *lex, TABLE_LIST *table_list); +#endif /* SQL_SEQUENCE_INCLUDED */ diff --git a/sql/sql_servers.cc b/sql/sql_servers.cc new file mode 100644 index 00000000..d52d6071 --- /dev/null +++ b/sql/sql_servers.cc @@ -0,0 +1,1428 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* + The servers are saved in the system table "servers" + + Currently, when the user performs an ALTER SERVER or a DROP SERVER + operation, it will cause all open tables which refer to the named + server connection to be flushed. This may cause some undesirable + behaviour with regard to currently running transactions. It is + expected that the DBA knows what s/he is doing when s/he performs + the ALTER SERVER or DROP SERVER operation. + + TODO: + It is desirable for us to implement a callback mechanism instead where + callbacks can be registered for specific server protocols. The callback + will be fired when such a server name has been created/altered/dropped + or when statistics are to be gathered such as how many actual connections. + Storage engines etc will be able to make use of the callback so that + currently running transactions etc will not be disrupted. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_servers.h" +#include "unireg.h" +#include "sql_base.h" // close_mysql_tables +#include "records.h" // init_read_record, end_read_record +#include +#include +#include "sp_head.h" +#include "sp.h" +#include "transaction.h" +#include "lock.h" // MYSQL_LOCK_IGNORE_TIMEOUT + +/* + We only use 1 mutex to guard the data structures - THR_LOCK_servers. + Read locked when only reading data and write-locked for all other access. +*/ + +static HASH servers_cache; +static MEM_ROOT mem; +static mysql_rwlock_t THR_LOCK_servers; +static LEX_CSTRING MYSQL_SERVERS_NAME= {STRING_WITH_LEN("servers") }; + + +static bool get_server_from_table_to_cache(TABLE *table); + +/* insert functions */ +static int insert_server(THD *thd, FOREIGN_SERVER *server_options); +static int insert_server_record(TABLE *table, FOREIGN_SERVER *server); +static int insert_server_record_into_cache(FOREIGN_SERVER *server); +static FOREIGN_SERVER * +prepare_server_struct_for_insert(LEX_SERVER_OPTIONS *server_options); +/* drop functions */ +static int delete_server_record(TABLE *table, LEX_CSTRING *name); +static int delete_server_record_in_cache(LEX_SERVER_OPTIONS *server_options); + +/* update functions */ +static void prepare_server_struct_for_update(LEX_SERVER_OPTIONS *server_options, + FOREIGN_SERVER *existing, + FOREIGN_SERVER *altered); +static int update_server(THD *thd, FOREIGN_SERVER *existing, + FOREIGN_SERVER *altered); +static int update_server_record(TABLE *table, FOREIGN_SERVER *server); +static int update_server_record_in_cache(FOREIGN_SERVER *existing, + FOREIGN_SERVER *altered); +/* utility functions */ +static void merge_server_struct(FOREIGN_SERVER *from, FOREIGN_SERVER *to); + +static uchar *servers_cache_get_key(FOREIGN_SERVER *server, size_t *length, + my_bool not_used __attribute__((unused))) +{ + DBUG_ENTER("servers_cache_get_key"); + DBUG_PRINT("info", ("server_name_length %zd server_name %s", + server->server_name_length, + server->server_name)); + + *length= (uint) server->server_name_length; + DBUG_RETURN((uchar*) server->server_name); +} + +static PSI_memory_key key_memory_servers; + +#ifdef HAVE_PSI_INTERFACE +static PSI_rwlock_key key_rwlock_THR_LOCK_servers; + +static PSI_rwlock_info all_servers_cache_rwlocks[]= +{ + { &key_rwlock_THR_LOCK_servers, "THR_LOCK_servers", PSI_FLAG_GLOBAL} +}; + +static PSI_memory_info all_servers_cache_memory[]= +{ + { &key_memory_servers, "servers_cache", PSI_FLAG_GLOBAL} +}; + +static void init_servers_cache_psi_keys(void) +{ + const char* category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_servers_cache_rwlocks); + PSI_server->register_rwlock(category, all_servers_cache_rwlocks, count); + + count= array_elements(all_servers_cache_memory); + mysql_memory_register(category, all_servers_cache_memory, count); +} +#endif /* HAVE_PSI_INTERFACE */ + + +struct close_cached_connection_tables_arg +{ + THD *thd; + LEX_CSTRING *connection; + TABLE_LIST *tables; +}; + + +static my_bool close_cached_connection_tables_callback( + TDC_element *element, close_cached_connection_tables_arg *arg) +{ + TABLE_LIST *tmp; + + mysql_mutex_lock(&element->LOCK_table_share); + /* Ignore if table is not open or does not have a connect_string */ + if (!element->share || !element->share->connect_string.length || + !element->ref_count) + goto end; + + /* Compare the connection string */ + if (arg->connection && + (arg->connection->length > element->share->connect_string.length || + (arg->connection->length < element->share->connect_string.length && + (element->share->connect_string.str[arg->connection->length] != '/' && + element->share->connect_string.str[arg->connection->length] != '\\')) || + strncasecmp(arg->connection->str, element->share->connect_string.str, + arg->connection->length))) + goto end; + + /* close_cached_tables() only uses these elements */ + if (!(tmp= (TABLE_LIST*) alloc_root(arg->thd->mem_root, sizeof(TABLE_LIST))) || + !(arg->thd->make_lex_string(&tmp->db, element->share->db.str, element->share->db.length)) || + !(arg->thd->make_lex_string(&tmp->table_name, element->share->table_name.str, + element->share->table_name.length))) + { + mysql_mutex_unlock(&element->LOCK_table_share); + return TRUE; + } + + tmp->next_global= tmp->next_local= arg->tables; + MDL_REQUEST_INIT(&tmp->mdl_request, MDL_key::TABLE, tmp->db.str, + tmp->table_name.str, MDL_EXCLUSIVE, MDL_TRANSACTION); + arg->tables= tmp; + +end: + mysql_mutex_unlock(&element->LOCK_table_share); + return FALSE; +} + + +/** + Close all tables which match specified connection string or + if specified string is NULL, then any table with a connection string. + + @return false ok + @return true error, some tables may keep using old server info +*/ + +static bool close_cached_connection_tables(THD *thd, LEX_CSTRING *connection) +{ + close_cached_connection_tables_arg argument= { thd, connection, 0 }; + DBUG_ENTER("close_cached_connections"); + + if (tdc_iterate(thd, + (my_hash_walk_action) close_cached_connection_tables_callback, + &argument)) + DBUG_RETURN(true); + + DBUG_RETURN(argument.tables ? + close_cached_tables(thd, argument.tables, true, + thd->variables.lock_wait_timeout) : false); +} + + +/* + Initialize structures responsible for servers used in federated + server scheme information for them from the server + table in the 'mysql' database. + + SYNOPSIS + servers_init() + dont_read_server_table TRUE if we want to skip loading data from + server table and disable privilege checking. + + NOTES + This function is mostly responsible for preparatory steps, main work + on initialization and grants loading is done in servers_reload(). + + RETURN VALUES + 0 ok + 1 Could not initialize servers +*/ + +bool servers_init(bool dont_read_servers_table) +{ + THD *thd; + bool return_val= FALSE; + DBUG_ENTER("servers_init"); + +#ifdef HAVE_PSI_INTERFACE + init_servers_cache_psi_keys(); +#endif + + /* init the mutex */ + if (mysql_rwlock_init(key_rwlock_THR_LOCK_servers, &THR_LOCK_servers)) + DBUG_RETURN(TRUE); + + /* initialise our servers cache */ + if (my_hash_init(key_memory_servers, &servers_cache, system_charset_info, 32, 0, 0, + (my_hash_get_key) servers_cache_get_key, 0, 0)) + { + return_val= TRUE; /* we failed, out of memory? */ + goto end; + } + + /* Initialize the mem root for data */ + init_sql_alloc(key_memory_servers, &mem, ACL_ALLOC_BLOCK_SIZE, 0, + MYF(MY_THREAD_SPECIFIC)); + + if (dont_read_servers_table) + goto end; + + /* + To be able to run this from boot, we allocate a temporary THD + */ + if (!(thd=new THD(0))) + DBUG_RETURN(TRUE); + thd->thread_stack= (char*) &thd; + thd->store_globals(); + /* + It is safe to call servers_reload() since servers_* arrays and hashes which + will be freed there are global static objects and thus are initialized + by zeros at startup. + */ + return_val= servers_reload(thd); + delete thd; + +end: + DBUG_RETURN(return_val); +} + +/* + Initialize server structures + + SYNOPSIS + servers_load() + thd Current thread + tables List containing open "mysql.servers" + + RETURN VALUES + FALSE Success + TRUE Error + + TODO + Revert back to old list if we failed to load new one. +*/ + +static bool servers_load(THD *thd, TABLE_LIST *tables) +{ + TABLE *table; + READ_RECORD read_record_info; + bool return_val= TRUE; + DBUG_ENTER("servers_load"); + + my_hash_reset(&servers_cache); + free_root(&mem, MYF(0)); + init_sql_alloc(key_memory_servers, &mem, ACL_ALLOC_BLOCK_SIZE, 0, MYF(0)); + + if (init_read_record(&read_record_info,thd,table=tables[0].table, NULL, NULL, + 1,0, FALSE)) + DBUG_RETURN(1); + while (!(read_record_info.read_record())) + { + /* return_val is already TRUE, so no need to set */ + if ((get_server_from_table_to_cache(table))) + goto end; + } + + return_val= FALSE; + +end: + end_read_record(&read_record_info); + DBUG_RETURN(return_val); +} + + +/* + Forget current servers cache and read new servers + from the conneciton table. + + SYNOPSIS + servers_reload() + thd Current thread + + NOTE + All tables of calling thread which were open and locked by LOCK TABLES + statement will be unlocked and closed. + This function is also used for initialization of structures responsible + for user/db-level privilege checking. + + RETURN VALUE + FALSE Success + TRUE Failure +*/ + +bool servers_reload(THD *thd) +{ + TABLE_LIST tables[1]; + bool return_val= TRUE; + DBUG_ENTER("servers_reload"); + + DBUG_PRINT("info", ("locking servers_cache")); + mysql_rwlock_wrlock(&THR_LOCK_servers); + + tables[0].init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_SERVERS_NAME, 0, TL_READ); + + if (unlikely(open_and_lock_tables(thd, tables, FALSE, + MYSQL_LOCK_IGNORE_TIMEOUT))) + { + /* + Execution might have been interrupted; only print the error message + if an error condition has been raised. + */ + if (thd->get_stmt_da()->is_error()) + sql_print_error("Can't open and lock privilege tables: %s", + thd->get_stmt_da()->message()); + return_val= FALSE; + goto end; + } + + if ((return_val= servers_load(thd, tables))) + { // Error. Revert to old list + /* blast, for now, we have no servers, discuss later way to preserve */ + + DBUG_PRINT("error",("Reverting to old privileges")); + servers_free(); + } + +end: + close_mysql_tables(thd); + DBUG_PRINT("info", ("unlocking servers_cache")); + mysql_rwlock_unlock(&THR_LOCK_servers); + DBUG_RETURN(return_val); +} + + +/* + Initialize structures responsible for servers used in federated + server scheme information for them from the server + table in the 'mysql' database. + + SYNOPSIS + get_server_from_table_to_cache() + TABLE *table open table pointer + + + NOTES + This function takes a TABLE pointer (pointing to an opened + table). With this open table, a FOREIGN_SERVER struct pointer + is allocated into root memory, then each member of the FOREIGN_SERVER + struct is populated. A char pointer takes the return value of get_field + for each column we're interested in obtaining, and if that pointer + isn't 0x0, the FOREIGN_SERVER member is set to that value, otherwise, + is set to the value of an empty string, since get_field would set it to + 0x0 if the column's value is empty, even if the default value for that + column is NOT NULL. + + RETURN VALUES + 0 ok + 1 could not insert server struct into global servers cache +*/ + +static bool +get_server_from_table_to_cache(TABLE *table) +{ + /* alloc a server struct */ + char *ptr; + char * const blank= (char*)""; + FOREIGN_SERVER *server= (FOREIGN_SERVER *)alloc_root(&mem, + sizeof(FOREIGN_SERVER)); + DBUG_ENTER("get_server_from_table_to_cache"); + table->use_all_columns(); + + /* get each field into the server struct ptr */ + ptr= get_field(&mem, table->field[0]); + server->server_name= ptr ? ptr : blank; + server->server_name_length= (uint) strlen(server->server_name); + ptr= get_field(&mem, table->field[1]); + server->host= ptr ? ptr : blank; + ptr= get_field(&mem, table->field[2]); + server->db= ptr ? ptr : blank; + ptr= get_field(&mem, table->field[3]); + server->username= ptr ? ptr : blank; + ptr= get_field(&mem, table->field[4]); + server->password= ptr ? ptr : blank; + ptr= get_field(&mem, table->field[5]); + server->sport= ptr ? ptr : blank; + + server->port= server->sport ? atoi(server->sport) : 0; + + ptr= get_field(&mem, table->field[6]); + server->socket= ptr && strlen(ptr) ? ptr : blank; + ptr= get_field(&mem, table->field[7]); + server->scheme= ptr ? ptr : blank; + ptr= get_field(&mem, table->field[8]); + server->owner= ptr ? ptr : blank; + DBUG_PRINT("info", ("server->server_name %s", server->server_name)); + DBUG_PRINT("info", ("server->host %s", server->host)); + DBUG_PRINT("info", ("server->db %s", server->db)); + DBUG_PRINT("info", ("server->username %s", server->username)); + DBUG_PRINT("info", ("server->password %s", server->password)); + DBUG_PRINT("info", ("server->socket %s", server->socket)); + if (my_hash_insert(&servers_cache, (uchar*) server)) + { + DBUG_PRINT("info", ("had a problem inserting server %s at %p", + server->server_name, server)); + // error handling needed here + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + SYNOPSIS + insert_server() + THD *thd - thread pointer + FOREIGN_SERVER *server - pointer to prepared FOREIGN_SERVER struct + + NOTES + This function takes a server object that is has all members properly + prepared, ready to be inserted both into the mysql.servers table and + the servers cache. + + THR_LOCK_servers must be write locked. + + RETURN VALUES + 0 - no error + other - error code +*/ + +static int +insert_server(THD *thd, FOREIGN_SERVER *server) +{ + int error= -1; + TABLE_LIST tables; + TABLE *table; + DBUG_ENTER("insert_server"); + + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_SERVERS_NAME, 0, TL_WRITE); + + /* need to open before acquiring THR_LOCK_plugin or it will deadlock */ + if (! (table= open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT))) + goto end; + table->file->row_logging= 0; // Don't log to binary log + + /* insert the server into the table */ + if (unlikely(error= insert_server_record(table, server))) + goto end; + + /* insert the server into the cache */ + if (unlikely((error= insert_server_record_into_cache(server)))) + goto end; + +end: + DBUG_RETURN(error); +} + + +/* + SYNOPSIS + int insert_server_record_into_cache() + FOREIGN_SERVER *server + + NOTES + This function takes a FOREIGN_SERVER pointer to an allocated (root mem) + and inserts it into the global servers cache + + THR_LOCK_servers must be write locked. + + RETURN VALUE + 0 - no error + >0 - error code + +*/ + +static int +insert_server_record_into_cache(FOREIGN_SERVER *server) +{ + int error=0; + DBUG_ENTER("insert_server_record_into_cache"); + /* + We succeeded in insertion of the server to the table, now insert + the server to the cache + */ + DBUG_PRINT("info", ("inserting server %s at %p, length %zd", + server->server_name, server, + server->server_name_length)); + if (my_hash_insert(&servers_cache, (uchar*) server)) + { + DBUG_PRINT("info", ("had a problem inserting server %s at %p", + server->server_name, server)); + // error handling needed here + error= 1; + } + DBUG_RETURN(error); +} + + +/* + SYNOPSIS + store_server_fields() + TABLE *table + FOREIGN_SERVER *server + + NOTES + This function takes an opened table object, and a pointer to an + allocated FOREIGN_SERVER struct, and then stores each member of + the FOREIGN_SERVER to the appropriate fields in the table, in + advance of insertion into the mysql.servers table + + RETURN VALUE + VOID + +*/ + +static void +store_server_fields(TABLE *table, FOREIGN_SERVER *server) +{ + + table->use_all_columns(); + /* + "server" has already been prepped by prepare_server_struct_for_<> + so, all we need to do is check if the value is set (> -1 for port) + + If this happens to be an update, only the server members that + have changed will be set. If an insert, then all will be set, + even if with empty strings + */ + if (server->host) + table->field[1]->store(server->host, + (uint) strlen(server->host), system_charset_info); + if (server->db) + table->field[2]->store(server->db, + (uint) strlen(server->db), system_charset_info); + if (server->username) + table->field[3]->store(server->username, + (uint) strlen(server->username), system_charset_info); + if (server->password) + table->field[4]->store(server->password, + (uint) strlen(server->password), system_charset_info); + if (server->port > -1) + table->field[5]->store(server->port); + + if (server->socket) + table->field[6]->store(server->socket, + (uint) strlen(server->socket), system_charset_info); + if (server->scheme) + table->field[7]->store(server->scheme, + (uint) strlen(server->scheme), system_charset_info); + if (server->owner) + table->field[8]->store(server->owner, + (uint) strlen(server->owner), system_charset_info); +} + +/* + SYNOPSIS + insert_server_record() + TABLE *table + FOREIGN_SERVER *server + + NOTES + This function takes the arguments of an open table object and a pointer + to an allocated FOREIGN_SERVER struct. It stores the server_name into + the first field of the table (the primary key, server_name column). With + this, index_read_idx is called, if the record is found, an error is set + to ER_FOREIGN_SERVER_EXISTS (the server with that server name exists in the + table), if not, then store_server_fields stores all fields of the + FOREIGN_SERVER to the table, then ha_write_row is inserted. If an error + is encountered in either index_read_idx or ha_write_row, then that error + is returned + + RETURN VALUE + 0 - no errors + >0 - error code + + */ + +static +int insert_server_record(TABLE *table, FOREIGN_SERVER *server) +{ + int error; + DBUG_ENTER("insert_server_record"); + DBUG_ASSERT(!table->file->row_logging); + + table->use_all_columns(); + empty_record(table); + + /* set the field that's the PK to the value we're looking for */ + table->field[0]->store(server->server_name, + server->server_name_length, + system_charset_info); + + /* read index until record is that specified in server_name */ + if (unlikely((error= + table->file->ha_index_read_idx_map(table->record[0], 0, + (uchar *)table->field[0]-> + ptr, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)))) + { + /* if not found, err */ + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + table->file->print_error(error, MYF(0)); + error= 1; + } + /* store each field to be inserted */ + store_server_fields(table, server); + + DBUG_PRINT("info",("record for server '%s' not found!", + server->server_name)); + /* write/insert the new server */ + if (unlikely(error=table->file->ha_write_row(table->record[0]))) + table->file->print_error(error, MYF(0)); + } + else + error= ER_FOREIGN_SERVER_EXISTS; + DBUG_RETURN(error); +} + +/* + SYNOPSIS + drop_server() + THD *thd + LEX_SERVER_OPTIONS *server_options + + NOTES + This function takes as its arguments a THD object pointer and a pointer + to a LEX_SERVER_OPTIONS struct from the parser. The member 'server_name' + of this LEX_SERVER_OPTIONS struct contains the value of the server to be + deleted. The mysql.servers table is opened via open_ltable, + a table object returned, then delete_server_record is + called with this table object and LEX_SERVER_OPTIONS server_name and + server_name_length passed, containing the name of the server to be + dropped/deleted, then delete_server_record_in_cache is called to delete + the server from the servers cache. + + RETURN VALUE + 0 - no error + > 0 - error code +*/ + +static int drop_server_internal(THD *thd, LEX_SERVER_OPTIONS *server_options) +{ + int error; + TABLE_LIST tables; + TABLE *table; + + DBUG_ENTER("drop_server_internal"); + DBUG_PRINT("info", ("server name server->server_name %s", + server_options->server_name.str)); + + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_SERVERS_NAME, 0, TL_WRITE); + + /* hit the memory hit first */ + if (unlikely((error= delete_server_record_in_cache(server_options)))) + goto end; + + if (unlikely(!(table= open_ltable(thd, &tables, TL_WRITE, + MYSQL_LOCK_IGNORE_TIMEOUT)))) + { + error= my_errno; + goto end; + } + + error= delete_server_record(table, &server_options->server_name); + + /* close the servers table before we call closed_cached_connection_tables */ + close_mysql_tables(thd); + + if (close_cached_connection_tables(thd, &server_options->server_name)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, "Server connection in use"); + } + +end: + DBUG_RETURN(error); +} + + +/** + Drop a server with servers cache mutex lock. +*/ +int drop_server(THD *thd, LEX_SERVER_OPTIONS *server_options) +{ + mysql_rwlock_wrlock(&THR_LOCK_servers); + int rc= drop_server_internal(thd, server_options); + mysql_rwlock_unlock(&THR_LOCK_servers); + return rc; +} + + +/* + + SYNOPSIS + delete_server_record_in_cache() + LEX_SERVER_OPTIONS *server_options + + NOTES + This function's argument is a LEX_SERVER_OPTIONS struct pointer. This + function uses the "server_name" and "server_name_length" members of the + lex->server_options to search for the server in the servers_cache. Upon + returned the server (pointer to a FOREIGN_SERVER struct), it then deletes + that server from the servers_cache hash. + + RETURN VALUE + 0 - no error + +*/ + +static int +delete_server_record_in_cache(LEX_SERVER_OPTIONS *server_options) +{ + int error= ER_FOREIGN_SERVER_DOESNT_EXIST; + FOREIGN_SERVER *server; + DBUG_ENTER("delete_server_record_in_cache"); + + DBUG_PRINT("info",("trying to obtain server name %s length %zu", + server_options->server_name.str, + server_options->server_name.length)); + + + if (!(server= (FOREIGN_SERVER *) + my_hash_search(&servers_cache, + (uchar*) server_options->server_name.str, + server_options->server_name.length))) + { + DBUG_PRINT("info", ("server_name %s length %zu not found!", + server_options->server_name.str, + server_options->server_name.length)); + goto end; + } + /* + We succeeded in deletion of the server to the table, now delete + the server from the cache + */ + DBUG_PRINT("info",("deleting server %s length %zd", + server->server_name, + server->server_name_length)); + + my_hash_delete(&servers_cache, (uchar*) server); + + error= 0; + +end: + DBUG_RETURN(error); +} + + +/* + + SYNOPSIS + update_server() + THD *thd + FOREIGN_SERVER *existing + FOREIGN_SERVER *altered + + NOTES + This function takes as arguments a THD object pointer, and two pointers, + one pointing to the existing FOREIGN_SERVER struct "existing" (which is + the current record as it is) and another pointer pointing to the + FOREIGN_SERVER struct with the members containing the modified/altered + values that need to be updated in both the mysql.servers table and the + servers_cache. It opens a table, passes the table and the altered + FOREIGN_SERVER pointer, which will be used to update the mysql.servers + table for the particular server via the call to update_server_record, + and in the servers_cache via update_server_record_in_cache. + + THR_LOCK_servers must be write locked. + + RETURN VALUE + 0 - no error + >0 - error code + +*/ + +int update_server(THD *thd, FOREIGN_SERVER *existing, FOREIGN_SERVER *altered) +{ + int error; + TABLE *table; + TABLE_LIST tables; + DBUG_ENTER("update_server"); + + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_SERVERS_NAME, 0, TL_WRITE); + + if (!(table= open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT))) + { + error= my_errno; + goto end; + } + + if (unlikely((error= update_server_record(table, altered)))) + goto end; + + error= update_server_record_in_cache(existing, altered); + + /* + Perform a reload so we don't have a 'hole' in our mem_root + */ + servers_load(thd, &tables); + +end: + DBUG_RETURN(error); +} + + +/* + + SYNOPSIS + update_server_record_in_cache() + FOREIGN_SERVER *existing + FOREIGN_SERVER *altered + + NOTES + This function takes as an argument the FOREIGN_SERVER structi pointer + for the existing server and the FOREIGN_SERVER struct populated with only + the members which have been updated. It then "merges" the "altered" struct + members to the existing server, the existing server then represents an + updated server. Then, the existing record is deleted from the servers_cache + HASH, then the updated record inserted, in essence replacing the old + record. + + THR_LOCK_servers must be write locked. + + RETURN VALUE + 0 - no error + 1 - error + +*/ + +int update_server_record_in_cache(FOREIGN_SERVER *existing, + FOREIGN_SERVER *altered) +{ + int error= 0; + DBUG_ENTER("update_server_record_in_cache"); + + /* + update the members that haven't been change in the altered server struct + with the values of the existing server struct + */ + merge_server_struct(existing, altered); + + /* + delete the existing server struct from the server cache + */ + my_hash_delete(&servers_cache, (uchar*)existing); + + /* + Insert the altered server struct into the server cache + */ + if (my_hash_insert(&servers_cache, (uchar*)altered)) + { + DBUG_PRINT("info", ("had a problem inserting server %s at %p", + altered->server_name,altered)); + error= ER_OUT_OF_RESOURCES; + } + + DBUG_RETURN(error); +} + + +/* + + SYNOPSIS + merge_server_struct() + FOREIGN_SERVER *from + FOREIGN_SERVER *to + + NOTES + This function takes as its arguments two pointers each to an allocated + FOREIGN_SERVER struct. The first FOREIGN_SERVER struct represents the struct + that we will obtain values from (hence the name "from"), the second + FOREIGN_SERVER struct represents which FOREIGN_SERVER struct we will be + "copying" any members that have a value to (hence the name "to") + + RETURN VALUE + VOID + +*/ + +void merge_server_struct(FOREIGN_SERVER *from, FOREIGN_SERVER *to) +{ + DBUG_ENTER("merge_server_struct"); + if (!to->host) + to->host= strdup_root(&mem, from->host); + if (!to->db) + to->db= strdup_root(&mem, from->db); + if (!to->username) + to->username= strdup_root(&mem, from->username); + if (!to->password) + to->password= strdup_root(&mem, from->password); + if (to->port == -1) + to->port= from->port; + if (!to->socket && from->socket) + to->socket= strdup_root(&mem, from->socket); + if (!to->scheme && from->scheme) + to->scheme= strdup_root(&mem, from->scheme); + if (!to->owner) + to->owner= strdup_root(&mem, from->owner); + + DBUG_VOID_RETURN; +} + + +/* + + SYNOPSIS + update_server_record() + TABLE *table + FOREIGN_SERVER *server + + NOTES + This function takes as its arguments an open TABLE pointer, and a pointer + to an allocated FOREIGN_SERVER structure representing an updated record + which needs to be inserted. The primary key, server_name is stored to field + 0, then index_read_idx is called to read the index to that record, the + record then being ready to be updated, if found. If not found an error is + set and error message printed. If the record is found, store_record is + called, then store_server_fields stores each field from the the members of + the updated FOREIGN_SERVER struct. + + RETURN VALUE + 0 - no error + +*/ + + +static int +update_server_record(TABLE *table, FOREIGN_SERVER *server) +{ + int error=0; + DBUG_ENTER("update_server_record"); + DBUG_ASSERT(!table->file->row_logging); + + table->use_all_columns(); + /* set the field that's the PK to the value we're looking for */ + table->field[0]->store(server->server_name, + server->server_name_length, + system_charset_info); + + if (unlikely((error= + table->file->ha_index_read_idx_map(table->record[0], 0, + (uchar *)table->field[0]-> + ptr, + ~(longlong)0, + HA_READ_KEY_EXACT)))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + table->file->print_error(error, MYF(0)); + DBUG_PRINT("info",("server not found!")); + error= ER_FOREIGN_SERVER_DOESNT_EXIST; + } + else + { + /* ok, so we can update since the record exists in the table */ + store_record(table,record[1]); + store_server_fields(table, server); + if (unlikely((error=table->file->ha_update_row(table->record[1], + table->record[0])) && + error != HA_ERR_RECORD_IS_THE_SAME)) + { + DBUG_PRINT("info",("problems with ha_update_row %d", error)); + goto end; + } + else + error= 0; + } + +end: + DBUG_RETURN(error); +} + + +/* + + SYNOPSIS + delete_server_record() + TABLE *table + char *server_name + int server_name_length + + NOTES + + RETURN VALUE + 0 - no error + +*/ + +static int +delete_server_record(TABLE *table, LEX_CSTRING *name) +{ + int error; + DBUG_ENTER("delete_server_record"); + DBUG_ASSERT(!table->file->row_logging); + + table->use_all_columns(); + + /* set the field that's the PK to the value we're looking for */ + table->field[0]->store(name->str, name->length, system_charset_info); + + if (unlikely((error= + table->file->ha_index_read_idx_map(table->record[0], 0, + (uchar *)table->field[0]-> + ptr, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + table->file->print_error(error, MYF(0)); + DBUG_PRINT("info",("server not found!")); + error= ER_FOREIGN_SERVER_DOESNT_EXIST; + } + else + { + if (unlikely((error= table->file->ha_delete_row(table->record[0])))) + table->file->print_error(error, MYF(0)); + } + + DBUG_RETURN(error); +} + +/* + + SYNOPSIS + create_server() + THD *thd + LEX_SERVER_OPTIONS *server_options + + NOTES + + RETURN VALUE + 0 - no error + +*/ + +int create_server(THD *thd, LEX_SERVER_OPTIONS *server_options) +{ + int error= ER_FOREIGN_SERVER_EXISTS; + FOREIGN_SERVER *server; + + DBUG_ENTER("create_server"); + DBUG_PRINT("info", ("server_options->server_name %s", + server_options->server_name.str)); + + mysql_rwlock_wrlock(&THR_LOCK_servers); + + /* hit the memory first */ + if (my_hash_search(&servers_cache, (uchar*) server_options->server_name.str, + server_options->server_name.length)) + { + if (thd->lex->create_info.or_replace()) + { + if (unlikely((error= drop_server_internal(thd, server_options)))) + goto end; + } + else if (thd->lex->create_info.if_not_exists()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_FOREIGN_SERVER_EXISTS, + ER_THD(thd, ER_FOREIGN_SERVER_EXISTS), + server_options->server_name.str); + error= 0; + goto end; + } + else + goto end; + } + + if (!(server= prepare_server_struct_for_insert(server_options))) + { + /* purecov: begin inspected */ + error= ER_OUT_OF_RESOURCES; + goto end; + /* purecov: end */ + } + + error= insert_server(thd, server); + + DBUG_PRINT("info", ("error returned %d", error)); + +end: + mysql_rwlock_unlock(&THR_LOCK_servers); + + if (unlikely(error)) + { + DBUG_PRINT("info", ("problem creating server <%s>", + server_options->server_name.str)); + my_error(error, MYF(0), server_options->server_name.str); + } + else + my_ok(thd); + + DBUG_RETURN(error); +} + + +/* + + SYNOPSIS + alter_server() + THD *thd + LEX_SERVER_OPTIONS *server_options + + NOTES + + RETURN VALUE + 0 - no error + +*/ + +int alter_server(THD *thd, LEX_SERVER_OPTIONS *server_options) +{ + int error= ER_FOREIGN_SERVER_DOESNT_EXIST; + FOREIGN_SERVER altered, *existing; + DBUG_ENTER("alter_server"); + DBUG_PRINT("info", ("server_options->server_name %s", + server_options->server_name.str)); + + mysql_rwlock_wrlock(&THR_LOCK_servers); + + if (!(existing= (FOREIGN_SERVER *) my_hash_search(&servers_cache, + (uchar*) server_options->server_name.str, + server_options->server_name.length))) + goto end; + + prepare_server_struct_for_update(server_options, existing, &altered); + + error= update_server(thd, existing, &altered); + + /* close the servers table before we call closed_cached_connection_tables */ + close_mysql_tables(thd); + + if (close_cached_connection_tables(thd, &server_options->server_name)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, "Server connection in use"); + } + +end: + DBUG_PRINT("info", ("error returned %d", error)); + mysql_rwlock_unlock(&THR_LOCK_servers); + DBUG_RETURN(error); +} + + +/* + + SYNOPSIS + prepare_server_struct_for_insert() + LEX_SERVER_OPTIONS *server_options + + NOTES + As FOREIGN_SERVER members are allocated on mem_root, we do not need to + free them in case of error. + + RETURN VALUE + On success filled FOREIGN_SERVER, or NULL in case out of memory. + +*/ + +static FOREIGN_SERVER * +prepare_server_struct_for_insert(LEX_SERVER_OPTIONS *server_options) +{ + FOREIGN_SERVER *server; + ulong default_port= 0; + DBUG_ENTER("prepare_server_struct"); + + if (!(server= (FOREIGN_SERVER *)alloc_root(&mem, sizeof(FOREIGN_SERVER)))) + DBUG_RETURN(NULL); /* purecov: inspected */ + +#define SET_SERVER_OR_RETURN(X, DEFAULT) \ + do { \ + if (!(server->X= server_options->X.str ? \ + strmake_root(&mem, server_options->X.str, \ + server_options->X.length) : "")) \ + DBUG_RETURN(NULL); \ + } while(0) + + /* name and scheme are always set (the parser guarantees it) */ + SET_SERVER_OR_RETURN(server_name, NULL); + SET_SERVER_OR_RETURN(scheme, NULL); + + /* scheme-specific checks */ + if (!strcasecmp(server->scheme, "mysql")) + { + default_port= MYSQL_PORT; + if (!server_options->host.str && !server_options->socket.str) + { + my_error(ER_CANT_CREATE_FEDERATED_TABLE, MYF(0), + "either HOST or SOCKET must be set"); + DBUG_RETURN(NULL); + } + } + + SET_SERVER_OR_RETURN(host, ""); + SET_SERVER_OR_RETURN(db, ""); + SET_SERVER_OR_RETURN(username, ""); + SET_SERVER_OR_RETURN(password, ""); + SET_SERVER_OR_RETURN(socket, ""); + SET_SERVER_OR_RETURN(owner, ""); + + server->server_name_length= server_options->server_name.length; + + /* set to default_port if not specified */ + server->port= server_options->port > -1 ? + server_options->port : default_port; + + DBUG_RETURN(server); +} + +/* + + SYNOPSIS + prepare_server_struct_for_update() + LEX_SERVER_OPTIONS *server_options + + NOTES + + RETURN VALUE + 0 - no error + +*/ + +static void +prepare_server_struct_for_update(LEX_SERVER_OPTIONS *server_options, + FOREIGN_SERVER *existing, + FOREIGN_SERVER *altered) +{ + DBUG_ENTER("prepare_server_struct_for_update"); + + altered->server_name= existing->server_name; + altered->server_name_length= existing->server_name_length; + DBUG_PRINT("info", ("existing name %s altered name %s", + existing->server_name, altered->server_name)); + + /* + The logic here is this: is this value set AND is it different + than the existing value? + */ +#define SET_ALTERED(X) \ + do { \ + altered->X= \ + (server_options->X.str && strcmp(server_options->X.str, existing->X)) \ + ? strmake_root(&mem, server_options->X.str, server_options->X.length) \ + : 0; \ + } while(0) + + SET_ALTERED(host); + SET_ALTERED(db); + SET_ALTERED(username); + SET_ALTERED(password); + SET_ALTERED(socket); + SET_ALTERED(scheme); + SET_ALTERED(owner); + + /* + port is initialised to -1, so if unset, it will be -1 + */ + altered->port= (server_options->port > -1 && + server_options->port != existing->port) ? + server_options->port : -1; + + DBUG_VOID_RETURN; +} + +/* + + SYNOPSIS + servers_free() + bool end + + NOTES + + RETURN VALUE + void + +*/ + +void servers_free(bool end) +{ + DBUG_ENTER("servers_free"); + if (!my_hash_inited(&servers_cache)) + DBUG_VOID_RETURN; + if (!end) + { + free_root(&mem, MYF(MY_MARK_BLOCKS_FREE)); + my_hash_reset(&servers_cache); + DBUG_VOID_RETURN; + } + mysql_rwlock_destroy(&THR_LOCK_servers); + free_root(&mem,MYF(0)); + my_hash_free(&servers_cache); + DBUG_VOID_RETURN; +} + + +/* + SYNOPSIS + + clone_server(MEM_ROOT *mem_root, FOREIGN_SERVER *orig, FOREIGN_SERVER *buff) + + Create a clone of FOREIGN_SERVER. If the supplied mem_root is of + thd->mem_root then the copy is automatically disposed at end of statement. + + NOTES + + ARGS + MEM_ROOT pointer (strings are copied into this mem root) + FOREIGN_SERVER pointer (made a copy of) + FOREIGN_SERVER buffer (if not-NULL, this pointer is returned) + + RETURN VALUE + FOREIGN_SEVER pointer (copy of one supplied FOREIGN_SERVER) +*/ + +static FOREIGN_SERVER *clone_server(MEM_ROOT *mem, const FOREIGN_SERVER *server, + FOREIGN_SERVER *buffer) +{ + DBUG_ENTER("sql_server.cc:clone_server"); + + if (!buffer) + buffer= (FOREIGN_SERVER *) alloc_root(mem, sizeof(FOREIGN_SERVER)); + + buffer->server_name= strmake_root(mem, server->server_name, + server->server_name_length); + buffer->port= server->port; + buffer->server_name_length= server->server_name_length; + + /* TODO: We need to examine which of these can really be NULL */ + buffer->db= safe_strdup_root(mem, server->db); + buffer->scheme= safe_strdup_root(mem, server->scheme); + buffer->username= safe_strdup_root(mem, server->username); + buffer->password= safe_strdup_root(mem, server->password); + buffer->socket= safe_strdup_root(mem, server->socket); + buffer->owner= safe_strdup_root(mem, server->owner); + buffer->host= safe_strdup_root(mem, server->host); + + DBUG_RETURN(buffer); +} + + +/* + + SYNOPSIS + get_server_by_name() + const char *server_name + + NOTES + + RETURN VALUE + FOREIGN_SERVER * + +*/ + +FOREIGN_SERVER *get_server_by_name(MEM_ROOT *mem, const char *server_name, + FOREIGN_SERVER *buff) +{ + size_t server_name_length; + FOREIGN_SERVER *server; + DBUG_ENTER("get_server_by_name"); + DBUG_PRINT("info", ("server_name %s", server_name)); + + server_name_length= strlen(server_name); + + if (! server_name || !strlen(server_name)) + { + DBUG_PRINT("info", ("server_name not defined!")); + DBUG_RETURN((FOREIGN_SERVER *)NULL); + } + + DBUG_PRINT("info", ("locking servers_cache")); + mysql_rwlock_rdlock(&THR_LOCK_servers); + if (!(server= (FOREIGN_SERVER *) my_hash_search(&servers_cache, + (uchar*) server_name, + server_name_length))) + { + DBUG_PRINT("info", ("server_name %s length %u not found!", + server_name, (unsigned) server_name_length)); + server= (FOREIGN_SERVER *) NULL; + } + /* otherwise, make copy of server */ + else + server= clone_server(mem, server, buff); + + DBUG_PRINT("info", ("unlocking servers_cache")); + mysql_rwlock_unlock(&THR_LOCK_servers); + DBUG_RETURN(server); + +} diff --git a/sql/sql_servers.h b/sql/sql_servers.h new file mode 100644 index 00000000..cb5703ef --- /dev/null +++ b/sql/sql_servers.h @@ -0,0 +1,52 @@ +#ifndef SQL_SERVERS_INCLUDED +#define SQL_SERVERS_INCLUDED + +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "slave.h" // for tables_ok(), rpl_filter + +class THD; +typedef struct st_lex_server_options LEX_SERVER_OPTIONS; +typedef struct st_mem_root MEM_ROOT; + +/* structs */ +typedef struct st_federated_server +{ + const char *server_name; + long port; + size_t server_name_length; + const char *db, *scheme, *username, *password, *socket, *owner, *host, *sport; +} FOREIGN_SERVER; + +/* cache handlers */ +bool servers_init(bool dont_read_server_table); +bool servers_reload(THD *thd); +void servers_free(bool end=0); + +/* insert functions */ +int create_server(THD *thd, LEX_SERVER_OPTIONS *server_options); + +/* drop functions */ +int drop_server(THD *thd, LEX_SERVER_OPTIONS *server_options); + +/* update functions */ +int alter_server(THD *thd, LEX_SERVER_OPTIONS *server_options); + +/* lookup functions */ +FOREIGN_SERVER *get_server_by_name(MEM_ROOT *mem, const char *server_name, + FOREIGN_SERVER *server_buffer); + +#endif /* SQL_SERVERS_INCLUDED */ diff --git a/sql/sql_show.cc b/sql/sql_show.cc new file mode 100644 index 00000000..32b29468 --- /dev/null +++ b/sql/sql_show.cc @@ -0,0 +1,10669 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2009, 2023, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Function with list databases, tables or fields */ + +#include "sql_plugin.h" // SHOW_MY_BOOL +#include "sql_priv.h" +#include "unireg.h" +#include "sql_acl.h" // fill_schema_*_privileges +#include "sql_select.h" // For select_describe +#include "sql_base.h" // close_tables_for_reopen +#include "create_options.h" +#include "sql_show.h" +#include "sql_table.h" // filename_to_tablename, + // primary_key_name, + // build_table_filename +#include "sql_view.h" +#include "repl_failsafe.h" +#include "sql_parse.h" // check_access, check_table_access +#include "sql_partition.h" // partition_element +#include "sql_derived.h" // mysql_derived_prepare, + // mysql_handle_derived, +#include "sql_db.h" // check_db_dir_existence, load_db_opt_by_name +#include "sql_time.h" // interval_type_to_name +#include "tztime.h" // struct Time_zone +#include "sql_acl.h" // TABLE_ACLS, check_grant, DB_ACLS, acl_get, + // check_grant_db +#include "sp.h" +#include "sp_head.h" +#include "sp_pcontext.h" +#include "set_var.h" +#include "sql_trigger.h" +#include "sql_derived.h" +#include "sql_statistics.h" +#include "sql_connect.h" +#include "authors.h" +#include "contributors.h" +#include "sql_partition.h" +#ifdef HAVE_EVENT_SCHEDULER +#include "events.h" +#include "event_data_objects.h" +#endif +#include +#include "lock.h" // MYSQL_OPEN_IGNORE_FLUSH +#include "debug_sync.h" +#include "keycaches.h" +#include "ha_sequence.h" +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" +#endif +#include "transaction.h" +#include "opt_trace.h" +#include "my_cpu.h" +#include "key.h" + +#include "lex_symbol.h" +#define KEYWORD_SIZE 64 + +extern SYMBOL symbols[]; +extern size_t symbols_length; + +extern SYMBOL sql_functions[]; +extern size_t sql_functions_length; + +extern Native_func_registry_array native_func_registry_array; + +enum enum_i_s_events_fields +{ + ISE_EVENT_CATALOG= 0, + ISE_EVENT_SCHEMA, + ISE_EVENT_NAME, + ISE_DEFINER, + ISE_TIME_ZONE, + ISE_EVENT_BODY, + ISE_EVENT_DEFINITION, + ISE_EVENT_TYPE, + ISE_EXECUTE_AT, + ISE_INTERVAL_VALUE, + ISE_INTERVAL_FIELD, + ISE_SQL_MODE, + ISE_STARTS, + ISE_ENDS, + ISE_STATUS, + ISE_ON_COMPLETION, + ISE_CREATED, + ISE_LAST_ALTERED, + ISE_LAST_EXECUTED, + ISE_EVENT_COMMENT, + ISE_ORIGINATOR, + ISE_CLIENT_CS, + ISE_CONNECTION_CL, + ISE_DB_CL +}; + + +static const LEX_CSTRING trg_action_time_type_names[]= +{ + { STRING_WITH_LEN("BEFORE") }, + { STRING_WITH_LEN("AFTER") } +}; + +static const LEX_CSTRING trg_event_type_names[]= +{ + { STRING_WITH_LEN("INSERT") }, + { STRING_WITH_LEN("UPDATE") }, + { STRING_WITH_LEN("DELETE") } +}; + + +LEX_CSTRING DATA_clex_str= { STRING_WITH_LEN("DATA") }; +LEX_CSTRING INDEX_clex_str= { STRING_WITH_LEN("INDEX") }; + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +static const char *grant_names[]={ + "select","insert","update","delete","create","drop","reload","shutdown", + "process","file","grant","references","index","alter"}; + +static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **), + "grant_types", + grant_names, NULL}; +#endif + +/* Match the values of enum ha_choice */ +static const LEX_CSTRING ha_choice_values[]= +{ + { STRING_WITH_LEN("") }, + { STRING_WITH_LEN("0") }, + { STRING_WITH_LEN("1") } +}; + +static void store_key_options(THD *, String *, TABLE *, KEY *); + +static int show_create_view(THD *thd, TABLE_LIST *table, String *buff); +static int show_create_sequence(THD *thd, TABLE_LIST *table_list, + String *packet); + +static const LEX_CSTRING *view_algorithm(TABLE_LIST *table); + +bool get_lookup_field_values(THD *, COND *, bool, TABLE_LIST *, + LOOKUP_FIELD_VALUES *); + +/** + Try to lock a mutex, but give up after a short while to not cause deadlocks + + The loop is short, as the mutex we are trying to lock are mutex the should + never be locked a long time, just over a few instructions. + + @return 0 ok + @return 1 error +*/ + +static bool trylock_short(mysql_mutex_t *mutex) +{ + uint i; + for (i= 0 ; i < 100 ; i++) + { + if (!mysql_mutex_trylock(mutex)) + return 0; + LF_BACKOFF(); + } + return 1; +} + + +/*************************************************************************** +** List all table types supported +***************************************************************************/ + + +static bool is_show_command(THD *thd) +{ + return sql_command_flags[thd->lex->sql_command] & CF_STATUS_COMMAND; +} + +static int make_version_string(char *buf, int buf_length, uint version) +{ + return (int)my_snprintf(buf, buf_length, "%d.%d", version>>8,version&0xff); +} + + +static const LEX_CSTRING maturity_name[]={ + { STRING_WITH_LEN("Unknown") }, + { STRING_WITH_LEN("Experimental") }, + { STRING_WITH_LEN("Alpha") }, + { STRING_WITH_LEN("Beta") }, + { STRING_WITH_LEN("Gamma") }, + { STRING_WITH_LEN("Stable") }}; + + +static my_bool show_plugins(THD *thd, plugin_ref plugin, + void *arg) +{ + TABLE *table= (TABLE*) arg; + struct st_maria_plugin *plug= plugin_decl(plugin); + struct st_plugin_dl *plugin_dl= plugin_dlib(plugin); + CHARSET_INFO *cs= system_charset_info; + char version_buf[20]; + + restore_record(table, s->default_values); + + table->field[0]->store(plugin_name(plugin)->str, + plugin_name(plugin)->length, cs); + + table->field[1]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), plug->version), + cs); + + switch (plugin_state(plugin)) { + case PLUGIN_IS_DELETED: + table->field[2]->store(STRING_WITH_LEN("DELETED"), cs); + break; + case PLUGIN_IS_UNINITIALIZED: + table->field[2]->store(STRING_WITH_LEN("INACTIVE"), cs); + break; + case PLUGIN_IS_READY: + table->field[2]->store(STRING_WITH_LEN("ACTIVE"), cs); + break; + case PLUGIN_IS_DISABLED: + table->field[2]->store(STRING_WITH_LEN("DISABLED"), cs); + break; + case PLUGIN_IS_DYING: + table->field[2]->store(STRING_WITH_LEN("INACTIVE"), cs); + break; + case PLUGIN_IS_FREED: // filtered in fill_plugins, used in fill_all_plugins + table->field[2]->store(STRING_WITH_LEN("NOT INSTALLED"), cs); + break; + default: + DBUG_ASSERT(0); + } + + table->field[3]->store(plugin_type_names[plug->type].str, + plugin_type_names[plug->type].length, + cs); + table->field[4]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), + *(uint *)plug->info), cs); + + if (plugin_dl) + { + table->field[5]->store(plugin_dl->dl.str, plugin_dl->dl.length, cs); + table->field[5]->set_notnull(); + table->field[6]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), + plugin_dl->mariaversion), + cs); + table->field[6]->set_notnull(); + } + else + { + table->field[5]->set_null(); + table->field[6]->set_null(); + } + + + if (plug->author) + { + table->field[7]->store(plug->author, strlen(plug->author), cs); + table->field[7]->set_notnull(); + } + else + table->field[7]->set_null(); + + if (plug->descr) + { + table->field[8]->store(plug->descr, strlen(plug->descr), cs); + table->field[8]->set_notnull(); + } + else + table->field[8]->set_null(); + + switch (plug->license) { + case PLUGIN_LICENSE_GPL: + table->field[9]->store(PLUGIN_LICENSE_GPL_STRING, + strlen(PLUGIN_LICENSE_GPL_STRING), cs); + break; + case PLUGIN_LICENSE_BSD: + table->field[9]->store(PLUGIN_LICENSE_BSD_STRING, + strlen(PLUGIN_LICENSE_BSD_STRING), cs); + break; + default: + table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING, + strlen(PLUGIN_LICENSE_PROPRIETARY_STRING), cs); + break; + } + + table->field[10]->store( + global_plugin_typelib_names[plugin_load_option(plugin)], + strlen(global_plugin_typelib_names[plugin_load_option(plugin)]), + cs); + + if (plug->maturity <= MariaDB_PLUGIN_MATURITY_STABLE) + table->field[11]->store(maturity_name[plug->maturity].str, + maturity_name[plug->maturity].length, + cs); + else + table->field[11]->store("Unknown", 7, cs); + + if (plug->version_info) + { + table->field[12]->store(plug->version_info, + strlen(plug->version_info), cs); + table->field[12]->set_notnull(); + } + else + table->field[12]->set_null(); + + return schema_table_store_record(thd, table); +} + + +int fill_plugins(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_plugins"); + TABLE *table= tables->table; + + if (plugin_foreach_with_mask(thd, show_plugins, MYSQL_ANY_PLUGIN, + ~PLUGIN_IS_FREED, table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +int fill_all_plugins(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_all_plugins"); + TABLE *table= tables->table; + LOOKUP_FIELD_VALUES lookup; + + if (get_lookup_field_values(thd, cond, true, tables, &lookup)) + DBUG_RETURN(0); + + if (lookup.db_value.str && !lookup.db_value.str[0]) + DBUG_RETURN(0); // empty string never matches a valid SONAME + + MY_DIR *dirp= my_dir(opt_plugin_dir, MY_THREAD_SPECIFIC); + if (!dirp) + { + my_error(ER_CANT_READ_DIR, MYF(0), opt_plugin_dir, my_errno); + DBUG_RETURN(1); + } + + if (!lookup.db_value.str) + plugin_dl_foreach(thd, 0, show_plugins, table); + + const char *wstr= lookup.db_value.str, *wend= wstr + lookup.db_value.length; + for (size_t i=0; i < dirp->number_of_files; i++) + { + FILEINFO *file= dirp->dir_entry+i; + LEX_CSTRING dl= { file->name, strlen(file->name) }; + const char *dlend= dl.str + dl.length; + const size_t so_ext_len= sizeof(SO_EXT) - 1; + + if (strcasecmp(dlend - so_ext_len, SO_EXT)) + continue; + + if (lookup.db_value.str) + { + if (lookup.wild_db_value) + { + if (files_charset_info->wildcmp(dl.str, dlend, wstr, wend, + wild_prefix, wild_one, wild_many)) + continue; + } + else + { + if (files_charset_info->strnncoll(dl.str, dl.length, + lookup.db_value.str, lookup.db_value.length)) + continue; + } + } + + plugin_dl_foreach(thd, &dl, show_plugins, table); + thd->clear_error(); + } + + my_dirend(dirp); + DBUG_RETURN(0); +} + + +/*************************************************************************** +** List all Authors. +** If you can update it, you get to be in it :) +***************************************************************************/ + +bool mysqld_show_authors(THD *thd) +{ + List field_list; + Protocol *protocol= thd->protocol; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("mysqld_show_authors"); + + field_list.push_back(new (mem_root) Item_empty_string(thd, "Name", 40), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Location", 40), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Comment", 512), + mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + show_table_authors_st *authors; + for (authors= show_table_authors; authors->name; authors++) + { + protocol->prepare_for_resend(); + protocol->store(authors->name, strlen(authors->name), system_charset_info); + protocol->store(authors->location, strlen(authors->location), + system_charset_info); + protocol->store(authors->comment, strlen(authors->comment), + system_charset_info); + if (protocol->write()) + DBUG_RETURN(TRUE); + } + my_eof(thd); + DBUG_RETURN(FALSE); +} + + +/*************************************************************************** +** List all Contributors. +** Please get permission before updating +***************************************************************************/ + +bool mysqld_show_contributors(THD *thd) +{ + List field_list; + Protocol *protocol= thd->protocol; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("mysqld_show_contributors"); + + field_list.push_back(new (mem_root) Item_empty_string(thd, "Name", 40), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Location", 40), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Comment", 512), + mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + show_table_contributors_st *contributors; + for (contributors= show_table_contributors; contributors->name; contributors++) + { + protocol->prepare_for_resend(); + protocol->store(contributors->name, strlen(contributors->name), + system_charset_info); + protocol->store(contributors->location, strlen(contributors->location), + system_charset_info); + protocol->store(contributors->comment, strlen(contributors->comment), + system_charset_info); + if (protocol->write()) + DBUG_RETURN(TRUE); + } + my_eof(thd); + DBUG_RETURN(FALSE); +} + + +/*************************************************************************** + List all privileges supported +***************************************************************************/ + +struct show_privileges_st { + const char *privilege; + const char *context; + const char *comment; +}; + +static struct show_privileges_st sys_privileges[]= +{ + {"Alter", "Tables", "To alter the table"}, + {"Alter routine", "Functions,Procedures", "To alter or drop stored functions/procedures"}, + {"Create", "Databases,Tables,Indexes", "To create new databases and tables"}, + {"Create routine","Databases","To use CREATE FUNCTION/PROCEDURE"}, + {"Create temporary tables","Databases","To use CREATE TEMPORARY TABLE"}, + {"Create view", "Tables", "To create new views"}, + {"Create user", "Server Admin", "To create new users"}, + {"Delete", "Tables", "To delete existing rows"}, + {"Delete history", "Tables", "To delete versioning table historical rows"}, + {"Drop", "Databases,Tables", "To drop databases, tables, and views"}, +#ifdef HAVE_EVENT_SCHEDULER + {"Event","Server Admin","To create, alter, drop and execute events"}, +#endif + {"Execute", "Functions,Procedures", "To execute stored routines"}, + {"File", "File access on server", "To read and write files on the server"}, + {"Grant option", "Databases,Tables,Functions,Procedures", "To give to other users those privileges you possess"}, + {"Index", "Tables", "To create or drop indexes"}, + {"Insert", "Tables", "To insert data into tables"}, + {"Lock tables","Databases","To use LOCK TABLES (together with SELECT privilege)"}, + {"Process", "Server Admin", "To view the plain text of currently executing queries"}, + {"Proxy", "Server Admin", "To make proxy user possible"}, + {"References", "Databases,Tables", "To have references on tables"}, + {"Reload", "Server Admin", "To reload or refresh tables, logs and privileges"}, + {"Binlog admin", "Server", "To purge binary logs"}, + {"Binlog monitor", "Server", "To use SHOW BINLOG STATUS and SHOW BINARY LOG"}, + {"Binlog replay", "Server", "To use BINLOG (generated by mariadb-binlog)"}, + {"Replication master admin", "Server", "To monitor connected slaves"}, + {"Replication slave admin", "Server", "To start/stop slave and apply binlog events"}, + {"Slave monitor", "Server", "To use SHOW SLAVE STATUS and SHOW RELAYLOG EVENTS"}, + {"Replication slave","Server Admin","To read binary log events from the master"}, + {"Select", "Tables", "To retrieve rows from table"}, + {"Show databases","Server Admin","To see all databases with SHOW DATABASES"}, + {"Show view","Tables","To see views with SHOW CREATE VIEW"}, + {"Shutdown","Server Admin", "To shut down the server"}, + {"Super","Server Admin","To use KILL thread, SET GLOBAL, CHANGE MASTER, etc."}, + {"Trigger","Tables", "To use triggers"}, + {"Create tablespace", "Server Admin", "To create/alter/drop tablespaces"}, + {"Update", "Tables", "To update existing rows"}, + {"Set user","Server", "To create views and stored routines with a different definer"}, + {"Federated admin", "Server", "To execute the CREATE SERVER, ALTER SERVER, DROP SERVER statements"}, + {"Connection admin", "Server", "To bypass connection limits and kill other users' connections"}, + {"Read_only admin", "Server", "To perform write operations even if @@read_only=ON"}, + {"Usage","Server Admin","No privileges - allow connect only"}, + {NullS, NullS, NullS} +}; + +bool mysqld_show_privileges(THD *thd) +{ + List field_list; + Protocol *protocol= thd->protocol; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("mysqld_show_privileges"); + + field_list.push_back(new (mem_root) Item_empty_string(thd, "Privilege", 10), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Context", 15), + mem_root); + field_list.push_back(new (mem_root) Item_empty_string(thd, "Comment", + NAME_CHAR_LEN), + mem_root); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + show_privileges_st *privilege= sys_privileges; + for (privilege= sys_privileges; privilege->privilege ; privilege++) + { + protocol->prepare_for_resend(); + protocol->store(privilege->privilege, strlen(privilege->privilege), + system_charset_info); + protocol->store(privilege->context, strlen(privilege->context), + system_charset_info); + protocol->store(privilege->comment, strlen(privilege->comment), + system_charset_info); + if (protocol->write()) + DBUG_RETURN(TRUE); + } + my_eof(thd); + DBUG_RETURN(FALSE); +} + + +/** Hash of LEX_STRINGs used to search for ignored db directories. */ +static HASH ignore_db_dirs_hash; + +/** + An array of LEX_STRING pointers to collect the options at + option parsing time. +*/ +static DYNAMIC_ARRAY ignore_db_dirs_array; + +/** + A value for the read only system variable to show a list of + ignored directories. +*/ +char *opt_ignore_db_dirs= NULL; + +/** + This flag is ON if: + - the list of ignored directories is not empty + + - and some of the ignored directory names + need no tablename-to-filename conversion. + Otherwise, if the name of the directory contains + unconditional characters like '+' or '.', they + never can match the database directory name. So the + db_name_is_in_ignore_db_dirs_list() can just return at once. +*/ +static bool skip_ignored_dir_check= TRUE; + +/** + Sets up the data structures for collection of directories at option + processing time. + We need to collect the directories in an array first, because + we need the character sets initialized before setting up the hash. + + @return state + @retval TRUE failed + @retval FALSE success +*/ + +bool +ignore_db_dirs_init() +{ + return my_init_dynamic_array(key_memory_ignored_db, &ignore_db_dirs_array, + sizeof(LEX_STRING *), 0, 0, MYF(0)); +} + + +/** + Retrieves the key (the string itself) from the LEX_STRING hash members. + + Needed by hash_init(). + + @param data the data element from the hash + @param out len_ret Placeholder to return the length of the key + @param unused + @return a pointer to the key +*/ + +static uchar * +db_dirs_hash_get_key(const uchar *data, size_t *len_ret, + my_bool __attribute__((unused))) +{ + LEX_CSTRING *e= (LEX_CSTRING *) data; + + *len_ret= e->length; + return (uchar *) e->str; +} + + +/** + Wrap a directory name into a LEX_STRING and push it to the array. + + Called at option processing time for each --ignore-db-dir option. + + @param path the name of the directory to push + @return state + @retval TRUE failed + @retval FALSE success +*/ + +bool push_ignored_db_dir(const char *path) +{ + LEX_CSTRING *new_elt; + char *new_elt_buffer; + size_t path_len= strlen(path); + + if (!path_len || path_len >= FN_REFLEN) + return true; + + // No need to normalize, it's only a directory name, not a path. + if (!my_multi_malloc(key_memory_ignored_db, MYF(0), + &new_elt, sizeof(LEX_STRING), + &new_elt_buffer, path_len + 1, + NullS)) + return true; + new_elt->str= new_elt_buffer; + memcpy(new_elt_buffer, path, path_len); + new_elt_buffer[path_len]= 0; + new_elt->length= path_len; + return insert_dynamic(&ignore_db_dirs_array, (uchar*) &new_elt); +} + + +/** + Clean up the directory ignore options accumulated so far. + + Called at option processing time for each --ignore-db-dir option + with an empty argument. +*/ + +void +ignore_db_dirs_reset() +{ + LEX_CSTRING **elt; + while (NULL!= (elt= (LEX_CSTRING **) pop_dynamic(&ignore_db_dirs_array))) + if (elt && *elt) + my_free(*elt); +} + + +/** + Free the directory ignore option variables. + + Called at server shutdown. +*/ + +void +ignore_db_dirs_free() +{ + if (opt_ignore_db_dirs) + { + my_free(opt_ignore_db_dirs); + opt_ignore_db_dirs= NULL; + } + ignore_db_dirs_reset(); + delete_dynamic(&ignore_db_dirs_array); + my_hash_free(&ignore_db_dirs_hash); +} + + +/** + Initialize the ignore db directories hash and status variable from + the options collected in the array. + + Called when option processing is over and the server's in-memory + structures are fully initialized. + + @return state + @retval TRUE failed + @retval FALSE success +*/ + +static void dispose_db_dir(void *ptr) +{ + my_free(ptr); +} + + +/* + Append an element into @@ignore_db_dirs + + This is a function to be called after regular option processing has been + finalized. +*/ + +void ignore_db_dirs_append(const char *dirname_arg) +{ + char *new_entry_buf; + LEX_STRING *new_entry; + size_t len= strlen(dirname_arg); + + if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(0), + &new_entry, sizeof(LEX_STRING), + &new_entry_buf, len + 1, + NullS)) + return; + + memcpy(new_entry_buf, dirname_arg, len+1); + new_entry->str = new_entry_buf; + new_entry->length= len; + + if (my_hash_insert(&ignore_db_dirs_hash, (uchar *)new_entry)) + { + // Either the name is already there or out-of-memory. + my_free(new_entry); + return; + } + + // Append the name to the option string. + size_t curlen= strlen(opt_ignore_db_dirs); + // Add one for comma and one for \0. + size_t newlen= curlen + len + 1 + 1; + char *new_db_dirs; + if (!(new_db_dirs= (char*)my_malloc(PSI_INSTRUMENT_ME, newlen, MYF(0)))) + { + // This is not a critical condition + return; + } + + memcpy(new_db_dirs, opt_ignore_db_dirs, curlen); + if (curlen != 0) + new_db_dirs[curlen]=','; + memcpy(new_db_dirs + (curlen + ((curlen!=0)?1:0)), dirname_arg, len+1); + + if (opt_ignore_db_dirs) + my_free(opt_ignore_db_dirs); + opt_ignore_db_dirs= new_db_dirs; +} + +bool +ignore_db_dirs_process_additions() +{ + ulong i; + size_t len; + char *ptr; + LEX_CSTRING *dir; + + skip_ignored_dir_check= TRUE; + + if (my_hash_init(key_memory_ignored_db, &ignore_db_dirs_hash, + lower_case_table_names ? character_set_filesystem : + &my_charset_bin, 0, 0, 0, db_dirs_hash_get_key, + dispose_db_dir, HASH_UNIQUE)) + return true; + + /* len starts from 1 because of the terminating zero. */ + len= 1; + for (i= 0; i < ignore_db_dirs_array.elements; i++) + { + get_dynamic(&ignore_db_dirs_array, (uchar *) &dir, i); + len+= dir->length + 1; // +1 for the comma + if (skip_ignored_dir_check) + { + char buff[FN_REFLEN]; + (void) tablename_to_filename(dir->str, buff, sizeof(buff)); + skip_ignored_dir_check= strcmp(dir->str, buff) != 0; + } + } + + /* No delimiter for the last directory. */ + if (len > 1) + len--; + + /* +1 the terminating zero */ + ptr= opt_ignore_db_dirs= (char *) my_malloc(key_memory_ignored_db, len + 1, + MYF(0)); + if (!ptr) + return true; + + /* Make sure we have an empty string to start with. */ + *ptr= 0; + + for (i= 0; i < ignore_db_dirs_array.elements; i++) + { + get_dynamic(&ignore_db_dirs_array, (uchar *) &dir, i); + if (my_hash_insert(&ignore_db_dirs_hash, (uchar *)dir)) + { + /* ignore duplicates from the config file */ + if (my_hash_search(&ignore_db_dirs_hash, (uchar *)dir->str, dir->length)) + { + sql_print_warning("Duplicate ignore-db-dir directory name '%.*s' " + "found in the config file(s). Ignoring the duplicate.", + (int) dir->length, dir->str); + my_free(dir); + goto continue_loop; + } + + return true; + } + ptr= strnmov(ptr, dir->str, dir->length); + *(ptr++)= ','; + +continue_loop: + /* + Set the transferred array element to NULL to avoid double free + in case of error. + */ + dir= NULL; + set_dynamic(&ignore_db_dirs_array, (uchar *) &dir, i); + } + + if (ptr > opt_ignore_db_dirs) + { + ptr--; + DBUG_ASSERT(*ptr == ','); + } + + /* make sure the string is terminated */ + DBUG_ASSERT(ptr - opt_ignore_db_dirs <= (ptrdiff_t) len); + *ptr= 0; + + /* + It's OK to empty the array here as the allocated elements are + referenced through the hash now. + */ + reset_dynamic(&ignore_db_dirs_array); + + return false; +} + + +/** + Check if a directory name is in the hash of ignored directories. + + @return search result + @retval TRUE found + @retval FALSE not found +*/ + +static inline bool +is_in_ignore_db_dirs_list(const char *directory) +{ + return ignore_db_dirs_hash.records && + NULL != my_hash_search(&ignore_db_dirs_hash, (const uchar *) directory, + strlen(directory)); +} + + +/** + Check if a database name is in the hash of ignored directories. + + @return search result + @retval TRUE found + @retval FALSE not found +*/ + +bool +db_name_is_in_ignore_db_dirs_list(const char *directory) +{ + char buff[FN_REFLEN]; + uint buff_len; + + if (skip_ignored_dir_check) + return 0; + + buff_len= tablename_to_filename(directory, buff, sizeof(buff)); + + return my_hash_search(&ignore_db_dirs_hash, (uchar *) buff, buff_len)!=NULL; +} + +enum find_files_result { + FIND_FILES_OK, + FIND_FILES_OOM, + FIND_FILES_DIR +}; + +/* + find_files() - find files in a given directory. + + SYNOPSIS + find_files() + thd thread handler + files put found files in this list + db database name to search tables in + or NULL to search for databases + path path to database + wild filter for found files + + RETURN + FIND_FILES_OK success + FIND_FILES_OOM out of memory error + FIND_FILES_DIR no such directory, or directory can't be read +*/ + + +static find_files_result +find_files(THD *thd, Dynamic_array *files, LEX_CSTRING *db, + const char *path, const LEX_CSTRING *wild) +{ + MY_DIR *dirp; + Discovered_table_list tl(thd, files, wild); + DBUG_ENTER("find_files"); + + if (!(dirp = my_dir(path, MY_THREAD_SPECIFIC | (db ? 0 : MY_WANT_STAT)))) + { + if (my_errno == ENOENT && db) + my_error(ER_BAD_DB_ERROR, MYF(0), db->str); + else + my_error(ER_CANT_READ_DIR, MYF(0), path, my_errno); + DBUG_RETURN(FIND_FILES_DIR); + } + + if (!db) /* Return databases */ + { + for (size_t i=0; i < dirp->number_of_files; i++) + { + FILEINFO *file= dirp->dir_entry+i; +#ifdef USE_SYMDIR + char *ext; + char buff[FN_REFLEN]; + if (my_use_symdir && !strcmp(ext=fn_ext(file->name), ".sym")) + { + /* Only show the sym file if it points to a directory */ + char *end; + *ext=0; /* Remove extension */ + unpack_dirname(buff, file->name); + end= strend(buff); + if (end != buff && end[-1] == FN_LIBCHAR) + end[-1]= 0; // Remove end FN_LIBCHAR + if (!mysql_file_stat(key_file_misc, buff, file->mystat, MYF(0))) + continue; + } +#endif + if (!MY_S_ISDIR(file->mystat->st_mode)) + continue; + + if (is_in_ignore_db_dirs_list(file->name)) + continue; + + if (tl.add_file(file->name)) + goto err; + } + } + else + { + if (ha_discover_table_names(thd, db, dirp, &tl, false)) + goto err; + } + if (is_show_command(thd)) + tl.sort(); +#ifndef DBUG_OFF + else + { + /* + sort_desc() is used to find easier unstable mtr tests that query + INFORMATION_SCHEMA.{SCHEMATA|TABLES} without a proper ORDER BY. + This can be removed in some release after 10.3 (e.g. in 10.4). + */ + tl.sort_desc(); + } +#endif + + DBUG_PRINT("info",("found: %zu files", files->elements())); + my_dirend(dirp); + + DBUG_RETURN(FIND_FILES_OK); + +err: + my_dirend(dirp); + DBUG_RETURN(FIND_FILES_OOM); +} + + +/** + An Internal_error_handler that suppresses errors regarding views' + underlying tables that occur during privilege checking within SHOW CREATE + VIEW commands. This happens in the cases when + + - A view's underlying table (e.g. referenced in its SELECT list) does not + exist. There should not be an error as no attempt was made to access it + per se. + + - Access is denied for some table, column, function or stored procedure + such as mentioned above. This error gets raised automatically, since we + can't untangle its access checking from that of the view itself. + */ +class Show_create_error_handler : public Internal_error_handler { + + TABLE_LIST *m_top_view; + bool m_handling; + Security_context *m_sctx; + + char m_view_access_denied_message[MYSQL_ERRMSG_SIZE]; + char *m_view_access_denied_message_ptr; + +public: + + /** + Creates a new Show_create_error_handler for the particular security + context and view. + + @thd Thread context, used for security context information if needed. + @top_view The view. We do not verify at this point that top_view is in + fact a view since, alas, these things do not stay constant. + */ + explicit Show_create_error_handler(THD *thd, TABLE_LIST *top_view) : + m_top_view(top_view), m_handling(FALSE), + m_view_access_denied_message_ptr(NULL) + { + + m_sctx= MY_TEST(m_top_view->security_ctx) ? + m_top_view->security_ctx : thd->security_ctx; + } + + /** + Lazy instantiation of 'view access denied' message. The purpose of the + Show_create_error_handler is to hide details of underlying tables for + which we have no privileges behind ER_VIEW_INVALID messages. But this + obviously does not apply if we lack privileges on the view itself. + Unfortunately the information about for which table privilege checking + failed is not available at this point. The only way for us to check is by + reconstructing the actual error message and see if it's the same. + */ + char* get_view_access_denied_message(THD *thd) + { + if (!m_view_access_denied_message_ptr) + { + m_view_access_denied_message_ptr= m_view_access_denied_message; + my_snprintf(m_view_access_denied_message, MYSQL_ERRMSG_SIZE, + ER_THD(thd, ER_TABLEACCESS_DENIED_ERROR), "SHOW VIEW", + m_sctx->priv_user, + m_sctx->host_or_ip, + m_top_view->get_db_name(), m_top_view->get_table_name()); + } + return m_view_access_denied_message_ptr; + } + + bool handle_condition(THD *thd, uint sql_errno, const char * /* sqlstate */, + Sql_condition::enum_warning_level *level, + const char *message, Sql_condition ** /* cond_hdl */) + { + /* + The handler does not handle the errors raised by itself. + At this point we know if top_view is really a view. + */ + if (m_handling || !m_top_view->view) + return FALSE; + + m_handling= TRUE; + + bool is_handled; + + switch (sql_errno) + { + case ER_TABLEACCESS_DENIED_ERROR: + if (!strcmp(get_view_access_denied_message(thd), message)) + { + /* Access to top view is not granted, don't interfere. */ + is_handled= FALSE; + break; + } + /* fall through */ + case ER_COLUMNACCESS_DENIED_ERROR: + case ER_VIEW_NO_EXPLAIN: /* Error was anonymized, ignore all the same. */ + case ER_PROCACCESS_DENIED_ERROR: + is_handled= TRUE; + break; + + case ER_BAD_FIELD_ERROR: + case ER_SP_DOES_NOT_EXIST: + case ER_NO_SUCH_TABLE: + case ER_NO_SUCH_TABLE_IN_ENGINE: + /* Established behavior: warn if underlying tables, columns, or functions + are missing. */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_VIEW_INVALID, + ER_THD(thd, ER_VIEW_INVALID), + m_top_view->get_db_name(), + m_top_view->get_table_name()); + is_handled= TRUE; + break; + + default: + is_handled= FALSE; + } + + m_handling= FALSE; + return is_handled; + } +}; + + +/* + Return metadata for CREATE command for table or view + + @param thd Thread handler + @param table_list Table / view + @param field_list resulting list of fields + @param buffer resulting CREATE statement + + @return + @retval 0 OK + @retval 1 Error + +*/ + +bool +mysqld_show_create_get_fields(THD *thd, TABLE_LIST *table_list, + List *field_list, String *buffer) +{ + bool error= TRUE; + LEX *lex= thd->lex; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("mysqld_show_create_get_fields"); + DBUG_PRINT("enter",("db: %s table: %s",table_list->db.str, + table_list->table_name.str)); + + if (lex->table_type == TABLE_TYPE_VIEW) + { + if (check_table_access(thd, SELECT_ACL, table_list, FALSE, 1, FALSE)) + { + DBUG_PRINT("debug", ("check_table_access failed")); + my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), + "SHOW", thd->security_ctx->priv_user, + thd->security_ctx->host_or_ip, + table_list->db.str, table_list->alias.str); + goto exit; + } + DBUG_PRINT("debug", ("check_table_access succeeded")); + + /* Ignore temporary tables if this is "SHOW CREATE VIEW" */ + table_list->open_type= OT_BASE_ONLY; + } + else + { + /* + Temporary tables should be opened for SHOW CREATE TABLE, but not + for SHOW CREATE VIEW. + */ + if (thd->open_temporary_tables(table_list)) + goto exit; + + /* + The fact that check_some_access() returned FALSE does not mean that + access is granted. We need to check if table_list->grant.privilege + contains any table-specific privilege. + */ + DBUG_PRINT("debug", ("table_list->grant.privilege: %llx", + (longlong) (table_list->grant.privilege))); + if (check_some_access(thd, SHOW_CREATE_TABLE_ACLS, table_list) || + (table_list->grant.privilege & SHOW_CREATE_TABLE_ACLS) == NO_ACL) + { + my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), + "SHOW", thd->security_ctx->priv_user, + thd->security_ctx->host_or_ip, + table_list->db.str, table_list->alias.str); + goto exit; + } + } + /* Access is granted. Execute the command. */ + + /* We want to preserve the tree for views. */ + lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_VIEW; + + { + /* + Use open_tables() directly rather than + open_normal_and_derived_tables(). This ensures that + close_thread_tables() is not called if open tables fails and the + error is ignored. This allows us to handle broken views nicely. + */ + uint counter; + Show_create_error_handler view_error_suppressor(thd, table_list); + thd->push_internal_handler(&view_error_suppressor); + bool open_error= + open_tables(thd, &table_list, &counter, + MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL) || + mysql_handle_derived(lex, DT_INIT | DT_PREPARE); + thd->pop_internal_handler(); + if (unlikely(open_error && (thd->killed || thd->is_error()))) + goto exit; + } + + /* TODO: add environment variables show when it become possible */ + if (lex->table_type == TABLE_TYPE_VIEW && !table_list->view) + { + my_error(ER_WRONG_OBJECT, MYF(0), + table_list->db.str, table_list->table_name.str, "VIEW"); + goto exit; + } + else if (lex->table_type == TABLE_TYPE_SEQUENCE && + (!table_list->table || + table_list->table->s->table_type != TABLE_TYPE_SEQUENCE)) + { + my_error(ER_NOT_SEQUENCE, MYF(0), + table_list->db.str, table_list->table_name.str); + goto exit; + } + + buffer->length(0); + + if (table_list->view) + buffer->set_charset(table_list->view_creation_ctx->get_client_cs()); + + if ((table_list->view ? + show_create_view(thd, table_list, buffer) : + lex->table_type == TABLE_TYPE_SEQUENCE ? + show_create_sequence(thd, table_list, buffer) : + show_create_table(thd, table_list, buffer, NULL, WITHOUT_DB_NAME))) + goto exit; + + if (table_list->view) + { + field_list->push_back(new (mem_root) + Item_empty_string(thd, "View", NAME_CHAR_LEN), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Create View", + MY_MAX(buffer->length(),1024)), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "character_set_client", + MY_CS_CHARACTER_SET_NAME_SIZE), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "collation_connection", + MY_CS_COLLATION_NAME_SIZE), + mem_root); + } + else + { + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Table", NAME_CHAR_LEN), + mem_root); + // 1024 is for not to confuse old clients + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Create Table", + MY_MAX(buffer->length(),1024)), + mem_root); + } + error= FALSE; + +exit: + DBUG_RETURN(error); +} + + +/* + Return CREATE command for table or view + + @param thd Thread handler + @param table_list Table / view + + @return + @retval 0 OK + @retval 1 Error + + @notes + table_list->db and table_list->table_name are kept unchanged to + not cause problems with SP. +*/ + +bool +mysqld_show_create(THD *thd, TABLE_LIST *table_list) +{ + Protocol *protocol= thd->protocol; + char buff[2048]; + String buffer(buff, sizeof(buff), system_charset_info); + List field_list; + bool error= TRUE; + DBUG_ENTER("mysqld_show_create"); + DBUG_PRINT("enter",("db: %s table: %s",table_list->db.str, + table_list->table_name.str)); + + /* + Metadata locks taken during SHOW CREATE should be released when + the statmement completes as it is an information statement. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + TABLE_LIST archive; + + if (mysqld_show_create_get_fields(thd, table_list, &field_list, &buffer)) + goto exit; + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + goto exit; + + protocol->prepare_for_resend(); + if (table_list->view) + protocol->store(&table_list->view_name, system_charset_info); + else + { + if (table_list->schema_table) + protocol->store(table_list->schema_table->table_name, + strlen(table_list->schema_table->table_name), + system_charset_info); + else + protocol->store(table_list->table->alias.ptr(), + table_list->table->alias.length(), + system_charset_info); + } + + if (table_list->view) + { + buffer.set_charset(table_list->view_creation_ctx->get_client_cs()); + protocol->store(&buffer); + + protocol->store(&table_list->view_creation_ctx->get_client_cs()->cs_name, + system_charset_info); + + protocol->store(&table_list->view_creation_ctx->get_connection_cl()-> + coll_name, system_charset_info); + } + else + protocol->store(&buffer); + + if (protocol->write()) + goto exit; + + error= FALSE; + my_eof(thd); + +exit: + close_thread_tables(thd); + /* Release any metadata locks taken during SHOW CREATE. */ + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + DBUG_RETURN(error); +} + + +void mysqld_show_create_db_get_fields(THD *thd, List *field_list) +{ + MEM_ROOT *mem_root= thd->mem_root; + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Database", NAME_CHAR_LEN), + mem_root); + field_list->push_back(new (mem_root) + Item_empty_string(thd, "Create Database", 1024), + mem_root); +} + + +bool mysqld_show_create_db(THD *thd, LEX_CSTRING *dbname, + LEX_CSTRING *orig_dbname, + const DDL_options_st &options) +{ + char buff[2048+DATABASE_COMMENT_MAXLEN]; + String buffer(buff, sizeof(buff), system_charset_info); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Security_context *sctx= thd->security_ctx; + privilege_t db_access(NO_ACL); +#endif + Schema_specification_st create; + Protocol *protocol=thd->protocol; + List field_list; + DBUG_ENTER("mysql_show_create_db"); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (test_all_bits(sctx->master_access, DB_ACLS)) + db_access=DB_ACLS; + else + db_access= acl_get_all3(sctx, dbname->str, FALSE) | + sctx->master_access; + + if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname->str)) + { + status_var_increment(thd->status_var.access_denied_errors); + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->host_or_ip, dbname->str); + general_log_print(thd,COM_INIT_DB,ER_THD(thd, ER_DBACCESS_DENIED_ERROR), + sctx->priv_user, sctx->host_or_ip, orig_dbname->str); + DBUG_RETURN(TRUE); + } +#endif + if (is_infoschema_db(dbname)) + { + *dbname= INFORMATION_SCHEMA_NAME; + create.default_table_charset= system_charset_info; + create.schema_comment= NULL; + } + else + { + if (check_db_dir_existence(dbname->str)) + { + my_error(ER_BAD_DB_ERROR, MYF(0), dbname->str); + DBUG_RETURN(TRUE); + } + + load_db_opt_by_name(thd, dbname->str, &create); + } + + mysqld_show_create_db_get_fields(thd, &field_list); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + protocol->prepare_for_resend(); + protocol->store(orig_dbname->str, orig_dbname->length, system_charset_info); + buffer.length(0); + buffer.append(STRING_WITH_LEN("CREATE DATABASE ")); + if (options.if_not_exists()) + buffer.append(STRING_WITH_LEN("/*!32312 IF NOT EXISTS*/ ")); + append_identifier(thd, &buffer, dbname); + + if (create.default_table_charset) + { + buffer.append(STRING_WITH_LEN(" /*!40100")); + buffer.append(STRING_WITH_LEN(" DEFAULT CHARACTER SET ")); + buffer.append(create.default_table_charset->cs_name); + if (Charset(create.default_table_charset).can_have_collate_clause()) + { + buffer.append(STRING_WITH_LEN(" COLLATE ")); + buffer.append(create.default_table_charset->coll_name); + } + buffer.append(STRING_WITH_LEN(" */")); + } + + if (create.schema_comment) + { + buffer.append(STRING_WITH_LEN(" COMMENT ")); + append_unescaped(&buffer, create.schema_comment->str, + create.schema_comment->length); + } + protocol->store(buffer.ptr(), buffer.length(), buffer.charset()); + + if (protocol->write()) + DBUG_RETURN(TRUE); + my_eof(thd); + DBUG_RETURN(FALSE); +} + + + +/**************************************************************************** + Return only fields for API mysql_list_fields + Use "show table wildcard" in mysql instead of this +****************************************************************************/ + +void +mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild) +{ + TABLE *table; + DBUG_ENTER("mysqld_list_fields"); + DBUG_PRINT("enter",("table: %s", table_list->table_name.str)); + + if (open_normal_and_derived_tables(thd, table_list, + MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL, + DT_INIT | DT_PREPARE)) + DBUG_VOID_RETURN; + table= table_list->table; + + List field_list; + + Field **ptr,*field; + for (ptr=table->field ; (field= *ptr); ptr++) + { + if (!wild || !wild[0] || + !wild_case_compare(system_charset_info, field->field_name.str,wild)) + field_list.push_back(field); + } + restore_record(table, s->default_values); // Get empty record + table->use_all_columns(); + if (thd->protocol->send_list_fields(&field_list, table_list)) + DBUG_VOID_RETURN; + my_eof(thd); + DBUG_VOID_RETURN; +} + +/* + Go through all character combinations and ensure that sql_lex.cc can + parse it as an identifier. + + SYNOPSIS + require_quotes() + name attribute name + name_length length of name + + RETURN + # Pointer to conflicting character + 0 No conflicting character +*/ + +static const char *require_quotes(const char *name, uint name_length) +{ + bool pure_digit= TRUE; + const char *end= name + name_length; + + for (; name < end ; name++) + { + uchar chr= (uchar) *name; + int length= system_charset_info->charlen(name, end); + if (length == 1 && !system_charset_info->ident_map[chr]) + return name; + if (length == 1 && (chr < '0' || chr > '9')) + pure_digit= FALSE; + } + if (pure_digit) + return name; + return 0; +} + + +/** + Convert and quote the given identifier if needed and append it to the + target string. If the given identifier is empty, it will be quoted. + @thd thread handler + @packet target string + @name the identifier to be appended + @length length of the appending identifier + + @return + 0 success + 1 error +*/ + +bool +append_identifier(THD *thd, String *packet, const char *name, size_t length) +{ + const char *name_end; + char quote_char; + int q= get_quote_char_for_identifier(thd, name, length); + + if (q == EOF) + return packet->append(name, length, packet->charset()); + + /* + The identifier must be quoted as it includes a quote character or + it's a keyword + */ + + /* + Special code for swe7. It encodes the letter "E WITH ACUTE" on + the position 0x60, where backtick normally resides. + In swe7 we cannot append 0x60 using system_charset_info, + because it cannot be converted to swe7 and will be replaced to + question mark '?'. Use &my_charset_bin to avoid this. + It will prevent conversion and will append the backtick as is. + */ + CHARSET_INFO *quote_charset= q == 0x60 && + (packet->charset()->state & MY_CS_NONASCII) && + packet->charset()->mbmaxlen == 1 ? + &my_charset_bin : system_charset_info; + + (void) packet->reserve(length*2 + 2); + quote_char= (char) q; + if (packet->append("e_char, 1, quote_charset)) + return true; + + for (name_end= name+length ; name < name_end ; ) + { + uchar chr= (uchar) *name; + int char_length= system_charset_info->charlen(name, name_end); + /* + charlen can return 0 and negative numbers on a wrong multibyte + sequence. It is possible when upgrading from 4.0, + and identifier contains some accented characters. + The manual says it does not work. So we'll just + change char_length to 1 not to hang in the endless loop. + */ + if (char_length <= 0) + char_length= 1; + if (char_length == 1 && chr == (uchar) quote_char && + packet->append("e_char, 1, quote_charset)) + return true; + if (packet->append(name, char_length, system_charset_info)) + return true; + name+= char_length; + } + return packet->append("e_char, 1, quote_charset); +} + + +/* + Get the quote character for displaying an identifier. + + SYNOPSIS + get_quote_char_for_identifier() + thd Thread handler + name name to quote + length length of name + + IMPLEMENTATION + Force quoting in the following cases: + - name is empty (for one, it is possible when we use this function for + quoting user and host names for DEFINER clause); + - name is a keyword; + - name includes a special character; + Otherwise identifier is quoted only if the option OPTION_QUOTE_SHOW_CREATE + is set. + + RETURN + EOF No quote character is needed + # Quote character +*/ + +int get_quote_char_for_identifier(THD *thd, const char *name, size_t length) +{ + if (length && + !is_keyword(name,(uint)length) && + !require_quotes(name, (uint)length) && + !(thd->variables.option_bits & OPTION_QUOTE_SHOW_CREATE)) + return EOF; + if (thd->variables.sql_mode & MODE_ANSI_QUOTES) + return '"'; + return '`'; +} + + +/* Append directory name (if exists) to CREATE INFO */ + +static void append_directory(THD *thd, String *packet, LEX_CSTRING *dir_type, + const char *filename) +{ + if (filename && !(thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE)) + { + size_t length= dirname_length(filename); + packet->append(' '); + packet->append(dir_type); + packet->append(STRING_WITH_LEN(" DIRECTORY='")); +#ifdef _WIN32 + /* Convert \ to / to be able to create table on unix */ + char *winfilename= (char*) thd->memdup(filename, length); + char *pos, *end; + for (pos= winfilename, end= pos+length ; pos < end ; pos++) + { + if (*pos == '\\') + *pos = '/'; + } + filename= winfilename; +#endif + packet->append(filename, length); + packet->append('\''); + } +} + + +#define LIST_PROCESS_HOST_LEN 64 + + +/** + Print "ON UPDATE" clause of a field into a string. + + @param timestamp_field Pointer to timestamp field of a table. + @param field The field to generate ON UPDATE clause for. + @bool lcase Whether to print in lower case. + @return false on success, true on error. +*/ +static bool print_on_update_clause(Field *field, String *val, bool lcase) +{ + DBUG_ASSERT(val->charset()->mbminlen == 1); + val->length(0); + if (field->has_update_default_function()) + { + if (lcase) + val->append(STRING_WITH_LEN("on update ")); + else + val->append(STRING_WITH_LEN("ON UPDATE ")); + val->append(STRING_WITH_LEN("current_timestamp")); + if (field->decimals() > 0) + val->append_parenthesized(field->decimals()); + else + val->append(STRING_WITH_LEN("()")); + return true; + } + return false; +} + + +static bool get_field_default_value(THD *thd, Field *field, String *def_value, + bool quoted) +{ + bool has_default; + enum enum_field_types field_type= field->type(); + + has_default= (field->default_value || + (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + !field->vers_sys_field() && + field->unireg_check != Field::NEXT_NUMBER)); + + def_value->length(0); + if (has_default) + { + StringBuffer str(field->charset()); + if (field->default_value) + { + field->default_value->print(&str); + if (field->default_value->expr->need_parentheses_in_default()) + { + def_value->set_charset(&my_charset_utf8mb4_general_ci); + def_value->append('('); + def_value->append(str); + def_value->append(')'); + } + else + def_value->append(str); + } + else if (!field->is_null()) + { // Not null by default + if (field_type == MYSQL_TYPE_BIT) + { + str.qs_append('b'); + str.qs_append('\''); + str.qs_append(field->val_int(), 2); + str.qs_append('\''); + quoted= 0; + } + else + { + field->val_str(&str); + if (!field->str_needs_quotes()) + quoted= 0; + } + if (str.length()) + { + StringBuffer def_val; + uint dummy_errors; + /* convert to system_charset_info == utf8 */ + def_val.copy(str.ptr(), str.length(), field->charset(), + system_charset_info, &dummy_errors); + if (quoted) + append_unescaped(def_value, def_val.ptr(), def_val.length()); + else + def_value->append(def_val); + } + else if (quoted) + def_value->set(STRING_WITH_LEN("''"), system_charset_info); + } + else if (field->maybe_null() && quoted) + def_value->set(STRING_WITH_LEN("NULL"), system_charset_info); // Null as default + else + return 0; + + } + return has_default; +} + + +/** + Appends list of options to string + + @param thd thread handler + @param packet string to append + @param opt list of options + @param check_options print all used options + @param rules list of known options +*/ + +static void append_create_options(THD *thd, String *packet, + engine_option_value *opt, + bool check_options, + ha_create_table_option *rules) +{ + bool in_comment= false; + for(; opt; opt= opt->next) + { + if (check_options) + { + if (is_engine_option_known(opt, rules)) + { + if (in_comment) + packet->append(STRING_WITH_LEN(" */")); + in_comment= false; + } + else + { + if (!in_comment) + packet->append(STRING_WITH_LEN(" /*")); + in_comment= true; + } + } + + DBUG_ASSERT(opt->value.str); + packet->append(' '); + append_identifier(thd, packet, &opt->name); + packet->append('='); + if (opt->quoted_value) + append_unescaped(packet, opt->value.str, opt->value.length); + else + packet->append(&opt->value); + } + if (in_comment) + packet->append(STRING_WITH_LEN(" */")); +} + +/** + Add table options to end of CREATE statement + + @param schema_table 1 if schema table + @param sequence 1 if sequence. If sequence, we flush out options + not relevant for sequences. +*/ + +static void add_table_options(THD *thd, TABLE *table, + Table_specification_st *create_info_arg, + bool schema_table, bool sequence, + String *packet) +{ + sql_mode_t sql_mode= thd->variables.sql_mode; + TABLE_SHARE *share= table->s; + handlerton *hton; + HA_CREATE_INFO create_info; + bool check_options= (!(sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) && + (!create_info_arg || + create_info_arg->used_fields & + HA_CREATE_PRINT_ALL_OPTIONS)); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + hton= table->part_info->default_engine_type; + else +#endif + hton= table->file->ht; + + bzero((char*) &create_info, sizeof(create_info)); + /* Allow update_create_info to update row type, page checksums and options */ + create_info.row_type= share->row_type; + create_info.page_checksum= share->page_checksum; + create_info.options= share->db_create_options; + table->file->update_create_info(&create_info); + + /* + IF check_create_info + THEN add ENGINE only if it was used when creating the table + */ + if (!create_info_arg || + (create_info_arg->used_fields & HA_CREATE_USED_ENGINE)) + { + LEX_CSTRING *engine_name= table->file->engine_name(); + + if (sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) + packet->append(STRING_WITH_LEN(" TYPE=")); + else + packet->append(STRING_WITH_LEN(" ENGINE=")); + + packet->append(engine_name->str, engine_name->length); + } + + if (sequence) + goto end_options; + + /* + Add AUTO_INCREMENT=... if there is an AUTO_INCREMENT column, + and NEXT_ID > 1 (the default). We must not print the clause + for engines that do not support this as it would break the + import of dumps, but as of this writing, the test for whether + AUTO_INCREMENT columns are allowed and wether AUTO_INCREMENT=... + is supported is identical, !(file->table_flags() & HA_NO_AUTO_INCREMENT)) + Because of that, we do not explicitly test for the feature, + but may extrapolate its existence from that of an AUTO_INCREMENT column. + */ + + if (create_info.auto_increment_value > 1) + { + packet->append(STRING_WITH_LEN(" AUTO_INCREMENT=")); + packet->append_ulonglong(create_info.auto_increment_value); + } + + if (share->table_charset && !(sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) && + share->table_type != TABLE_TYPE_SEQUENCE) + { + /* + IF check_create_info + THEN add DEFAULT CHARSET only if it was used when creating the table + */ + if (!create_info_arg || + (create_info_arg->used_fields & HA_CREATE_USED_DEFAULT_CHARSET)) + { + packet->append(STRING_WITH_LEN(" DEFAULT CHARSET=")); + packet->append(share->table_charset->cs_name); + if (Charset(table->s->table_charset).can_have_collate_clause()) + { + packet->append(STRING_WITH_LEN(" COLLATE=")); + packet->append(table->s->table_charset->coll_name); + } + } + } + + if (share->min_rows) + { + packet->append(STRING_WITH_LEN(" MIN_ROWS=")); + packet->append_ulonglong(share->min_rows); + } + + if (share->max_rows && !schema_table && !sequence) + { + packet->append(STRING_WITH_LEN(" MAX_ROWS=")); + packet->append_ulonglong(share->max_rows); + } + + if (share->avg_row_length) + { + packet->append(STRING_WITH_LEN(" AVG_ROW_LENGTH=")); + packet->append_ulonglong(share->avg_row_length); + } + + if (create_info.options & HA_OPTION_PACK_KEYS) + packet->append(STRING_WITH_LEN(" PACK_KEYS=1")); + if (create_info.options & HA_OPTION_NO_PACK_KEYS) + packet->append(STRING_WITH_LEN(" PACK_KEYS=0")); + if (share->db_create_options & HA_OPTION_STATS_PERSISTENT) + packet->append(STRING_WITH_LEN(" STATS_PERSISTENT=1")); + if (share->db_create_options & HA_OPTION_NO_STATS_PERSISTENT) + packet->append(STRING_WITH_LEN(" STATS_PERSISTENT=0")); + if (share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON) + packet->append(STRING_WITH_LEN(" STATS_AUTO_RECALC=1")); + else if (share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF) + packet->append(STRING_WITH_LEN(" STATS_AUTO_RECALC=0")); + if (share->stats_sample_pages != 0) + { + packet->append(STRING_WITH_LEN(" STATS_SAMPLE_PAGES=")); + packet->append_ulonglong(share->stats_sample_pages); + } + + /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compatibility */ + if (create_info.options & HA_OPTION_CHECKSUM) + packet->append(STRING_WITH_LEN(" CHECKSUM=1")); + if (create_info.page_checksum != HA_CHOICE_UNDEF) + { + packet->append(STRING_WITH_LEN(" PAGE_CHECKSUM=")); + packet->append(ha_choice_values[create_info.page_checksum]); + } + if (create_info.options & HA_OPTION_DELAY_KEY_WRITE) + packet->append(STRING_WITH_LEN(" DELAY_KEY_WRITE=1")); + if (create_info.row_type != ROW_TYPE_DEFAULT) + { + packet->append(STRING_WITH_LEN(" ROW_FORMAT=")); + packet->append(&ha_row_type[(uint) create_info.row_type]); + } + if (share->transactional != HA_CHOICE_UNDEF) + { + bool do_comment= !table->file->has_transactional_option() && check_options; + if (do_comment) + packet->append(STRING_WITH_LEN(" /*")); + packet->append(STRING_WITH_LEN(" TRANSACTIONAL=")); + packet->append(ha_choice_values[(uint) share->transactional]); + if (do_comment) + packet->append(STRING_WITH_LEN(" */")); + } + if (share->table_type == TABLE_TYPE_SEQUENCE) + packet->append(STRING_WITH_LEN(" SEQUENCE=1")); + if (table->s->key_block_size) + { + packet->append(STRING_WITH_LEN(" KEY_BLOCK_SIZE=")); + packet->append_ulonglong(table->s->key_block_size); + } + table->file->append_create_info(packet); + +end_options: + if (share->comment.length) + { + packet->append(STRING_WITH_LEN(" COMMENT=")); + append_unescaped(packet, share->comment.str, share->comment.length); + } + if (share->connect_string.length) + { + packet->append(STRING_WITH_LEN(" CONNECTION=")); + append_unescaped(packet, share->connect_string.str, share->connect_string.length); + } + append_create_options(thd, packet, share->option_list, check_options, + hton->table_options); + append_directory(thd, packet, &DATA_clex_str, create_info.data_file_name); + append_directory(thd, packet, &INDEX_clex_str, create_info.index_file_name); +} + +static void append_period(THD *thd, String *packet, const LEX_CSTRING &start, + const LEX_CSTRING &end, const LEX_CSTRING &period, + bool ident) +{ + packet->append(STRING_WITH_LEN(",\n PERIOD FOR ")); + if (ident) + append_identifier(thd, packet, period.str, period.length); + else + packet->append(period); + packet->append(STRING_WITH_LEN(" (")); + append_identifier(thd, packet, start.str, start.length); + packet->append(STRING_WITH_LEN(", ")); + append_identifier(thd, packet, end.str, end.length); + packet->append(STRING_WITH_LEN(")")); +} + +int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, + Table_specification_st *create_info_arg, + enum_with_db_name with_db_name) +{ + return show_create_table_ex(thd, table_list, NULL, NULL, packet, + create_info_arg, with_db_name); +} + +/* + Build a CREATE TABLE statement for a table. + + SYNOPSIS + show_create_table() + thd The thread + table_list A list containing one table to write statement + for. + force_db If not NULL, database name to use in the CREATE + TABLE statement. + force_name If not NULL, table name to use in the CREATE TABLE + statement. if NULL, the name from table_list will be + used. + packet Pointer to a string where statement will be + written. + create_info_arg Pointer to create information that can be used + to tailor the format of the statement. Can be + NULL, in which case only SQL_MODE is considered + when building the statement. + with_db_name Add database name to table name + + NOTE + Currently always return 0, but might return error code in the + future. + + RETURN + 0 OK + */ + +int show_create_table_ex(THD *thd, TABLE_LIST *table_list, + const char *force_db, const char *force_name, + String *packet, + Table_specification_st *create_info_arg, + enum_with_db_name with_db_name) +{ + List field_list; + char tmp[MAX_FIELD_WIDTH], *for_str, def_value_buf[MAX_FIELD_WIDTH]; + LEX_CSTRING alias; + String type; + String def_value; + Field **ptr,*field; + uint primary_key; + KEY *key_info; + TABLE *table= table_list->table; + TABLE_SHARE *share= table->s; + TABLE_SHARE::period_info_t &period= share->period; + sql_mode_t sql_mode= thd->variables.sql_mode; + bool explicit_fields= false; + bool foreign_db_mode= sql_mode & (MODE_POSTGRESQL | MODE_ORACLE | + MODE_MSSQL | MODE_DB2 | + MODE_MAXDB | MODE_ANSI); + bool limited_mysql_mode= sql_mode & (MODE_NO_FIELD_OPTIONS | MODE_MYSQL323 | + MODE_MYSQL40); + bool show_table_options= !(sql_mode & MODE_NO_TABLE_OPTIONS) && + !foreign_db_mode; + bool check_options= !(sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) && + !create_info_arg; + handlerton *hton; + int error= 0; + DBUG_ENTER("show_create_table"); + DBUG_PRINT("enter",("table: %s", table->s->table_name.str)); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + hton= table->part_info->default_engine_type; + else +#endif + hton= table->file->ht; + + restore_record(table, s->default_values); // Get empty record + + packet->append(STRING_WITH_LEN("CREATE ")); + if (create_info_arg && + ((create_info_arg->or_replace() && + !create_info_arg->or_replace_slave_generated()) || + create_info_arg->table_was_deleted)) + packet->append(STRING_WITH_LEN("OR REPLACE ")); + if (share->tmp_table) + packet->append(STRING_WITH_LEN("TEMPORARY ")); + packet->append(STRING_WITH_LEN("TABLE ")); + if (create_info_arg && create_info_arg->if_not_exists()) + packet->append(STRING_WITH_LEN("IF NOT EXISTS ")); + + if (force_name) + { + if (force_db) + { + append_identifier(thd, packet, force_db, strlen(force_db)); + packet->append(STRING_WITH_LEN(".")); + } + append_identifier(thd, packet, force_name, strlen(force_name)); + } + else + { + if (table_list->schema_table) + { + alias.str= table_list->schema_table->table_name; + alias.length= strlen(alias.str); + } + else + { + if (lower_case_table_names == 2) + { + alias.str= table->alias.c_ptr(); + alias.length= table->alias.length(); + } + else + alias= share->table_name; + } + + /* + Print the database before the table name if told to do that. The + database name is only printed in the event that it is different + from the current database. The main reason for doing this is to + avoid having to update gazillions of tests and result files, but + it also saves a few bytes of the binary log. + */ + if (with_db_name == WITH_DB_NAME) + { + const LEX_CSTRING *const db= + table_list->schema_table ? &INFORMATION_SCHEMA_NAME : &table->s->db; + if (!thd->db.str || cmp(db, &thd->db)) + { + append_identifier(thd, packet, db); + packet->append(STRING_WITH_LEN(".")); + } + } + + append_identifier(thd, packet, &alias); + } + + packet->append(STRING_WITH_LEN(" (\n")); + /* + We need this to get default values from the table + We have to restore the read_set if we are called from insert in case + of row based replication. + */ + MY_BITMAP *old_map= tmp_use_all_columns(table, &table->read_set); + + bool not_the_first_field= false; + for (ptr=table->field ; (field= *ptr); ptr++) + { + + uint flags = field->flags; + + if (field->invisible > INVISIBLE_USER) + continue; + if (not_the_first_field) + packet->append(STRING_WITH_LEN(",\n")); + + not_the_first_field= true; + packet->append(STRING_WITH_LEN(" ")); + append_identifier(thd, packet, &field->field_name); + packet->append(' '); + + const Type_handler *th= field->type_handler(); + const Schema *implied_schema= Schema::find_implied(thd); + if (th != implied_schema->map_data_type(thd, th)) + { + packet->append(th->schema()->name(), system_charset_info); + packet->append(STRING_WITH_LEN("."), system_charset_info); + } + type.set(tmp, sizeof(tmp), system_charset_info); + field->sql_type(type); + packet->append(type.ptr(), type.length(), system_charset_info); + + if (field->has_charset() && !(sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))) + { + if (field->charset() != share->table_charset) + { + packet->append(STRING_WITH_LEN(" CHARACTER SET ")); + packet->append(field->charset()->cs_name); + if (Charset(field->charset()).can_have_collate_clause()) + { + packet->append(STRING_WITH_LEN(" COLLATE ")); + packet->append(field->charset()->coll_name); + } + } + } + + if (field->vcol_info) + { + StringBuffer str(&my_charset_utf8mb4_general_ci); + field->vcol_info->print(&str); + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS (")); + packet->append(str); + packet->append(STRING_WITH_LEN(")")); + if (field->vcol_info->stored_in_db) + packet->append(STRING_WITH_LEN(" STORED")); + else + packet->append(STRING_WITH_LEN(" VIRTUAL")); + if (field->invisible == INVISIBLE_USER) + { + packet->append(STRING_WITH_LEN(" INVISIBLE")); + } + } + else + { + if (field->flags & VERS_ROW_START) + { + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS ROW START")); + } + else if (field->flags & VERS_ROW_END) + { + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS ROW END")); + } + else if (flags & NOT_NULL_FLAG) + packet->append(STRING_WITH_LEN(" NOT NULL")); + else if (field->type() == MYSQL_TYPE_TIMESTAMP) + { + /* + TIMESTAMP field require explicit NULL flag, because unlike + all other fields they are treated as NOT NULL by default. + */ + packet->append(STRING_WITH_LEN(" NULL")); + } + + if (field->invisible == INVISIBLE_USER) + { + packet->append(STRING_WITH_LEN(" INVISIBLE")); + } + def_value.set(def_value_buf, sizeof(def_value_buf), system_charset_info); + if (get_field_default_value(thd, field, &def_value, 1)) + { + packet->append(STRING_WITH_LEN(" DEFAULT ")); + packet->append(def_value.ptr(), def_value.length(), system_charset_info); + } + + if (field->vers_update_unversioned()) + { + packet->append(STRING_WITH_LEN(" WITHOUT SYSTEM VERSIONING")); + } + + if (!limited_mysql_mode && + print_on_update_clause(field, &def_value, false)) + { + packet->append(STRING_WITH_LEN(" ")); + packet->append(def_value); + } + + if (field->unireg_check == Field::NEXT_NUMBER && + !(sql_mode & MODE_NO_FIELD_OPTIONS)) + packet->append(STRING_WITH_LEN(" AUTO_INCREMENT")); + } + + if (field->comment.length) + { + packet->append(STRING_WITH_LEN(" COMMENT ")); + append_unescaped(packet, field->comment.str, field->comment.length); + } + + append_create_options(thd, packet, field->option_list, check_options, + hton->field_options); + + if (field->check_constraint) + { + StringBuffer str(&my_charset_utf8mb4_general_ci); + field->check_constraint->print(&str); + packet->append(STRING_WITH_LEN(" CHECK (")); + packet->append(str); + packet->append(STRING_WITH_LEN(")")); + } + + } + + if (period.name) + { + append_period(thd, packet, + period.start_field(share)->field_name, + period.end_field(share)->field_name, + period.name, true); + } + + key_info= table->s->key_info; + primary_key= share->primary_key; + + for (uint i=0 ; i < share->keys ; i++,key_info++) + { + if (key_info->flags & HA_INVISIBLE_KEY) + continue; + KEY_PART_INFO *key_part= key_info->key_part; + bool found_primary=0; + packet->append(STRING_WITH_LEN(",\n ")); + + if (i == primary_key && !strcmp(key_info->name.str, primary_key_name.str)) + { + found_primary=1; + /* + No space at end, because a space will be added after where the + identifier would go, but that is not added for primary key. + */ + packet->append(STRING_WITH_LEN("PRIMARY KEY")); + } + else if (key_info->flags & HA_NOSAME) + packet->append(STRING_WITH_LEN("UNIQUE KEY ")); + else if (key_info->flags & HA_FULLTEXT) + packet->append(STRING_WITH_LEN("FULLTEXT KEY ")); + else if (key_info->flags & HA_SPATIAL) + packet->append(STRING_WITH_LEN("SPATIAL KEY ")); + else + packet->append(STRING_WITH_LEN("KEY ")); + + if (!found_primary) + append_identifier(thd, packet, &key_info->name); + + packet->append(STRING_WITH_LEN(" (")); + + uint key_parts= key_info->user_defined_key_parts; + if (key_info->without_overlaps) + key_parts-= 2; + + for (uint j=0 ; j < key_parts ; j++,key_part++) + { + Field *field= key_part->field; + if (field->invisible > INVISIBLE_USER) + continue; + + if (j) + packet->append(','); + + if (key_part->field) + append_identifier(thd, packet, &key_part->field->field_name); + if (key_part->field && + (key_part->length != + table->field[key_part->fieldnr-1]->key_length() && + !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL)))) + { + packet->append_parenthesized((long) key_part->length / + key_part->field->charset()->mbmaxlen); + } + if (table->file->index_flags(i, j, 0) & HA_READ_ORDER && + key_part->key_part_flag & HA_REVERSE_SORT) /* same in SHOW KEYS */ + packet->append(STRING_WITH_LEN(" DESC")); + } + + if (key_info->without_overlaps) + { + packet->append(','); + append_identifier(thd, packet, &share->period.name); + packet->append(STRING_WITH_LEN(" WITHOUT OVERLAPS")); + } + + packet->append(')'); + store_key_options(thd, packet, table, &table->key_info[i]); + if (key_info->parser) + { + LEX_CSTRING *parser_name= plugin_name(key_info->parser); + packet->append(STRING_WITH_LEN(" /*!50100 WITH PARSER ")); + append_identifier(thd, packet, parser_name); + packet->append(STRING_WITH_LEN(" */ ")); + } + append_create_options(thd, packet, key_info->option_list, check_options, + hton->index_options); + } + + if (table->versioned()) + { + const Field *fs = table->vers_start_field(); + const Field *fe = table->vers_end_field(); + DBUG_ASSERT(fs); + DBUG_ASSERT(fe); + explicit_fields= fs->invisible < INVISIBLE_SYSTEM; + DBUG_ASSERT(!explicit_fields || fe->invisible < INVISIBLE_SYSTEM); + if (explicit_fields) + { + append_period(thd, packet, fs->field_name, fe->field_name, + table->s->vers.name, false); + } + else + { + DBUG_ASSERT(fs->invisible == INVISIBLE_SYSTEM); + DBUG_ASSERT(fe->invisible == INVISIBLE_SYSTEM); + } + } + + /* + Get possible foreign key definitions stored in InnoDB and append them + to the CREATE TABLE statement + */ + + if ((for_str= table->file->get_foreign_key_create_info())) + { + packet->append(for_str, strlen(for_str)); + table->file->free_foreign_key_create_info(for_str); + } + + /* Add table level check constraints */ + if (share->table_check_constraints) + { + StringBuffer str(&my_charset_utf8mb4_general_ci); + for (uint i= share->field_check_constraints; + i < share->table_check_constraints ; i++) + { + Virtual_column_info *check= table->check_constraints[i]; + // period constraint is implicit + if (share->period.constr_name.streq(check->name)) + continue; + + str.set_buffer_if_not_allocated(&my_charset_utf8mb4_general_ci); + str.length(0); // Print appends to str + check->print(&str); + + packet->append(STRING_WITH_LEN(",\n ")); + if (check->name.str) + { + packet->append(STRING_WITH_LEN("CONSTRAINT ")); + append_identifier(thd, packet, &check->name); + } + packet->append(STRING_WITH_LEN(" CHECK (")); + packet->append(str); + packet->append(STRING_WITH_LEN(")")); + } + } + + packet->append(STRING_WITH_LEN("\n)")); + if (show_table_options) + add_table_options(thd, table, create_info_arg, + table_list->schema_table != 0, 0, packet); + + if (!DBUG_IF("sysvers_hide") && table->versioned()) + packet->append(STRING_WITH_LEN(" WITH SYSTEM VERSIONING")); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + if (table->part_info && + !((table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) && + table->part_info->is_auto_partitioned)) + { + /* + Partition syntax for CREATE TABLE is at the end of the syntax. + */ + uint part_syntax_len; + char *part_syntax; + if ((part_syntax= generate_partition_syntax(thd, table->part_info, + &part_syntax_len, + show_table_options, + NULL, NULL))) + { + packet->append('\n'); + if (packet->append(part_syntax, part_syntax_len)) + error= 1; + } + } + } +#endif + tmp_restore_column_map(&table->read_set, old_map); + DBUG_RETURN(error); +} + + +static void store_key_options(THD *thd, String *packet, TABLE *table, + KEY *key_info) +{ + bool limited_mysql_mode= (thd->variables.sql_mode & + (MODE_NO_FIELD_OPTIONS | MODE_MYSQL323 | + MODE_MYSQL40)) != 0; + bool foreign_db_mode= (thd->variables.sql_mode & (MODE_POSTGRESQL | + MODE_ORACLE | + MODE_MSSQL | + MODE_DB2 | + MODE_MAXDB | + MODE_ANSI)) != 0; + char *end, buff[32]; + + if (!(thd->variables.sql_mode & MODE_NO_KEY_OPTIONS) && + !limited_mysql_mode && !foreign_db_mode) + { + + if (key_info->algorithm == HA_KEY_ALG_BTREE) + packet->append(STRING_WITH_LEN(" USING BTREE")); + + if (key_info->algorithm == HA_KEY_ALG_HASH || + key_info->algorithm == HA_KEY_ALG_LONG_HASH) + packet->append(STRING_WITH_LEN(" USING HASH")); + + /* send USING only in non-default case: non-spatial rtree */ + if ((key_info->algorithm == HA_KEY_ALG_RTREE) && + !(key_info->flags & HA_SPATIAL)) + packet->append(STRING_WITH_LEN(" USING RTREE")); + + if ((key_info->flags & HA_USES_BLOCK_SIZE) && + table->s->key_block_size != key_info->block_size) + { + packet->append(STRING_WITH_LEN(" KEY_BLOCK_SIZE=")); + end= longlong10_to_str(key_info->block_size, buff, 10); + packet->append(buff, (uint) (end - buff)); + } + DBUG_ASSERT(MY_TEST(key_info->flags & HA_USES_COMMENT) == + (key_info->comment.length > 0)); + if (key_info->flags & HA_USES_COMMENT) + { + packet->append(STRING_WITH_LEN(" COMMENT ")); + append_unescaped(packet, key_info->comment.str, + key_info->comment.length); + } + + if (key_info->is_ignored) + packet->append(STRING_WITH_LEN(" IGNORED")); + } +} + + +void view_store_options(THD *thd, TABLE_LIST *table, String *buff) +{ + if (table->algorithm != VIEW_ALGORITHM_INHERIT) + { + buff->append(STRING_WITH_LEN("ALGORITHM=")); + buff->append(view_algorithm(table)); + } + buff->append(' '); + append_definer(thd, buff, &table->definer.user, &table->definer.host); + if (table->view_suid) + buff->append(STRING_WITH_LEN("SQL SECURITY DEFINER ")); + else + buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER ")); +} + + +/** + Returns ALGORITHM clause of a view +*/ + +static const LEX_CSTRING *view_algorithm(TABLE_LIST *table) +{ + static const LEX_CSTRING undefined= { STRING_WITH_LEN("UNDEFINED") }; + static const LEX_CSTRING merge= { STRING_WITH_LEN("MERGE") }; + static const LEX_CSTRING temptable= { STRING_WITH_LEN("TEMPTABLE") }; + switch (table->algorithm) { + case VIEW_ALGORITHM_TMPTABLE: + return &temptable; + case VIEW_ALGORITHM_MERGE: + return &merge; + default: + DBUG_ASSERT(0); // never should happen + /* fall through */ + case VIEW_ALGORITHM_UNDEFINED: + return &undefined; + } +} + + +static bool append_at_host(THD *thd, String *buffer, const LEX_CSTRING *host) +{ + if (!host->str || !host->str[0]) + return false; + return + buffer->append('@') || + append_identifier(thd, buffer, host); +} + + +/* + Append DEFINER clause to the given buffer. + + SYNOPSIS + append_definer() + thd [in] thread handle + buffer [inout] buffer to hold DEFINER clause + definer_user [in] user name part of definer + definer_host [in] host name part of definer +*/ + +bool append_definer(THD *thd, String *buffer, const LEX_CSTRING *definer_user, + const LEX_CSTRING *definer_host) +{ + return + buffer->append(STRING_WITH_LEN("DEFINER=")) || + append_identifier(thd, buffer, definer_user) || + append_at_host(thd, buffer, definer_host) || + buffer->append(' '); +} + + +static int show_create_view(THD *thd, TABLE_LIST *table, String *buff) +{ + my_bool compact_view_name= TRUE; + my_bool foreign_db_mode= (thd->variables.sql_mode & (MODE_POSTGRESQL | + MODE_ORACLE | + MODE_MSSQL | + MODE_DB2 | + MODE_MAXDB | + MODE_ANSI)) != 0; + + if (!thd->db.str || cmp(&thd->db, &table->view_db)) + /* + print compact view name if the view belongs to the current database + */ + compact_view_name= table->compact_view_format= FALSE; + else + { + /* + Compact output format for view body can be used + if this view only references table inside it's own db + */ + TABLE_LIST *tbl; + table->compact_view_format= TRUE; + for (tbl= thd->lex->query_tables; + tbl; + tbl= tbl->next_global) + { + if (!tbl->is_derived() && + cmp(&table->view_db, tbl->view ? &tbl->view_db : &tbl->db)) + { + table->compact_view_format= FALSE; + break; + } + } + } + + buff->append(STRING_WITH_LEN("CREATE ")); + if (!foreign_db_mode) + { + view_store_options(thd, table, buff); + } + buff->append(STRING_WITH_LEN("VIEW ")); + if (!compact_view_name) + { + append_identifier(thd, buff, &table->view_db); + buff->append('.'); + } + append_identifier(thd, buff, &table->view_name); + buff->append(STRING_WITH_LEN(" AS ")); + + /* + We can't just use table->query, because our SQL_MODE may trigger + a different syntax, like when ANSI_QUOTES is defined. + */ + table->view->unit.print(buff, enum_query_type(QT_VIEW_INTERNAL | + QT_ITEM_ORIGINAL_FUNC_NULLIF | + QT_NO_WRAPPERS_FOR_TVC_IN_VIEW)); + + if (table->with_check != VIEW_CHECK_NONE) + { + if (table->with_check == VIEW_CHECK_LOCAL) + buff->append(STRING_WITH_LEN(" WITH LOCAL CHECK OPTION")); + else + buff->append(STRING_WITH_LEN(" WITH CASCADED CHECK OPTION")); + } + return 0; +} + + +static int show_create_sequence(THD *thd, TABLE_LIST *table_list, + String *packet) +{ + TABLE *table= table_list->table; + SEQUENCE *seq= table->s->sequence; + LEX_CSTRING alias; + sql_mode_t sql_mode= thd->variables.sql_mode; + bool foreign_db_mode= sql_mode & (MODE_POSTGRESQL | MODE_ORACLE | + MODE_MSSQL | MODE_DB2 | + MODE_MAXDB | MODE_ANSI); + bool show_table_options= !(sql_mode & MODE_NO_TABLE_OPTIONS) && + !foreign_db_mode; + + if (lower_case_table_names == 2) + { + alias.str= table->alias.c_ptr(); + alias.length= table->alias.length(); + } + else + alias= table->s->table_name; + + packet->append(STRING_WITH_LEN("CREATE SEQUENCE ")); + append_identifier(thd, packet, &alias); + packet->append(STRING_WITH_LEN(" start with ")); + packet->append_longlong(seq->start); + packet->append(STRING_WITH_LEN(" minvalue ")); + packet->append_longlong(seq->min_value); + packet->append(STRING_WITH_LEN(" maxvalue ")); + packet->append_longlong(seq->max_value); + packet->append(STRING_WITH_LEN(" increment by ")); + packet->append_longlong(seq->increment); + if (seq->cache) + { + packet->append(STRING_WITH_LEN(" cache ")); + packet->append_longlong(seq->cache); + } + else + packet->append(STRING_WITH_LEN(" nocache")); + if (seq->cycle) + packet->append(STRING_WITH_LEN(" cycle")); + else + packet->append(STRING_WITH_LEN(" nocycle")); + + if (show_table_options) + add_table_options(thd, table, 0, 0, 1, packet); + return 0; +} + + +/**************************************************************************** + Return info about all processes + returns for each thread: thread id, user, host, db, command, info +****************************************************************************/ + +class thread_info :public ilink { +public: + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () + { return alloc_root(mem_root, size); } + static void operator delete(void *ptr __attribute__((unused)), + size_t size __attribute__((unused))) + { TRASH_FREE(ptr, size); } + static void operator delete(void *, MEM_ROOT *){} + + my_thread_id thread_id; + uint32 os_thread_id; + ulonglong start_time; + uint command; + const char *user,*host,*db,*proc_info,*state_info; + CSET_STRING query_string; + double progress; +}; + +static const char *thread_state_info(THD *tmp) +{ +#ifndef EMBEDDED_LIBRARY + if (tmp->net.reading_or_writing) + { + if (tmp->net.reading_or_writing == 2) + return "Writing to net"; + if (tmp->get_command() == COM_SLEEP) + return ""; + return "Reading from net"; + } +#else + if (tmp->get_command() == COM_SLEEP) + return ""; +#endif + + if (tmp->proc_info) + return tmp->proc_info; + + /* Check if we are waiting on a condition */ + if (!trylock_short(&tmp->LOCK_thd_kill)) + { + /* mysys_var is protected by above mutex */ + bool cond= tmp->mysys_var && tmp->mysys_var->current_cond; + mysql_mutex_unlock(&tmp->LOCK_thd_kill); + if (cond) + return "Waiting on cond"; + } + return NULL; +} + + +struct list_callback_arg +{ + list_callback_arg(const char *u, THD *t, ulong m): + user(u), thd(t), max_query_length(m) {} + I_List thread_infos; + const char *user; + THD *thd; + ulong max_query_length; +}; + + +static my_bool list_callback(THD *tmp, list_callback_arg *arg) +{ + + Security_context *tmp_sctx= tmp->security_ctx; + bool got_thd_data; + if ((tmp->vio_ok() || tmp->system_thread) && + (!arg->user || (!tmp->system_thread && + tmp_sctx->user && !strcmp(tmp_sctx->user, arg->user)))) + { + thread_info *thd_info= new (arg->thd->mem_root) thread_info; + + thd_info->thread_id=tmp->thread_id; + thd_info->os_thread_id=tmp->os_thread_id; + thd_info->user= arg->thd->strdup(tmp_sctx->user ? tmp_sctx->user : + (tmp->system_thread ? + "system user" : "unauthenticated user")); + if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && + arg->thd->security_ctx->host_or_ip[0]) + { + if ((thd_info->host= (char*) arg->thd->alloc(LIST_PROCESS_HOST_LEN+1))) + my_snprintf((char *) thd_info->host, LIST_PROCESS_HOST_LEN, + "%s:%u", tmp_sctx->host_or_ip, tmp->peer_port); + } + else + thd_info->host= arg->thd->strdup(tmp_sctx->host_or_ip[0] ? + tmp_sctx->host_or_ip : + tmp_sctx->host ? tmp_sctx->host : ""); + thd_info->command=(int) tmp->get_command(); + + if ((got_thd_data= !trylock_short(&tmp->LOCK_thd_data))) + { + /* This is an approximation */ + thd_info->proc_info= (char*) (tmp->killed >= KILL_QUERY ? + "Killed" : 0); + + /* The following variables are only safe to access under a lock */ + thd_info->db= 0; + if (tmp->db.str) + thd_info->db= arg->thd->strmake(tmp->db.str, tmp->db.length); + + if (tmp->query()) + { + uint length= MY_MIN(arg->max_query_length, tmp->query_length()); + char *q= arg->thd->strmake(tmp->query(),length); + /* Safety: in case strmake failed, we set length to 0. */ + thd_info->query_string= + CSET_STRING(q, q ? length : 0, tmp->query_charset()); + } + + /* + Progress report. We need to do this under a lock to ensure that all + is from the same stage. + */ + if (tmp->progress.max_counter) + { + uint max_stage= MY_MAX(tmp->progress.max_stage, 1); + thd_info->progress= (((tmp->progress.stage / (double) max_stage) + + ((tmp->progress.counter / + (double) tmp->progress.max_counter) / + (double) max_stage)) * + 100.0); + set_if_smaller(thd_info->progress, 100); + } + else + thd_info->progress= 0.0; + } + else + { + thd_info->proc_info= "Busy"; + thd_info->progress= 0.0; + thd_info->db= ""; + } + + thd_info->state_info= thread_state_info(tmp); + thd_info->start_time= tmp->start_utime; + ulonglong utime_after_query_snapshot= tmp->utime_after_query; + if (thd_info->start_time < utime_after_query_snapshot) + thd_info->start_time= utime_after_query_snapshot; // COM_SLEEP + + if (got_thd_data) + mysql_mutex_unlock(&tmp->LOCK_thd_data); + arg->thread_infos.append(thd_info); + } + return 0; +} + + +void mysqld_list_processes(THD *thd,const char *user, bool verbose) +{ + Item *field; + List field_list; + list_callback_arg arg(user, thd, + verbose ? thd->variables.max_allowed_packet : + PROCESS_LIST_WIDTH); + Protocol *protocol= thd->protocol; + MEM_ROOT *mem_root= thd->mem_root; + DBUG_ENTER("mysqld_list_processes"); + + field_list.push_back(new (mem_root) + Item_int(thd, "Id", 0, MY_INT32_NUM_DECIMAL_DIGITS), + mem_root); + field_list.push_back(new (mem_root) + Item_empty_string(thd, "User", + USERNAME_CHAR_LENGTH), + mem_root); + field_list.push_back(new (mem_root) + Item_empty_string(thd, "Host", + LIST_PROCESS_HOST_LEN), + mem_root); + field_list.push_back(field=new (mem_root) + Item_empty_string(thd, "db", NAME_CHAR_LEN), + mem_root); + field->set_maybe_null();; + field_list.push_back(new (mem_root) Item_empty_string(thd, "Command", 16), + mem_root); + field_list.push_back(field= new (mem_root) + Item_return_int(thd, "Time", 7, MYSQL_TYPE_LONG), + mem_root); + field->unsigned_flag= 0; + field_list.push_back(field=new (mem_root) + Item_empty_string(thd, "State", 30), + mem_root); + field->set_maybe_null();; + field_list.push_back(field=new (mem_root) + Item_empty_string(thd, "Info", arg.max_query_length), + mem_root); + field->set_maybe_null();; + if (!(thd->variables.old_behavior & OLD_MODE_NO_PROGRESS_INFO)) + { + field_list.push_back(field= new (mem_root) + Item_float(thd, "Progress", 0.0, 3, 7), + mem_root); + field->base_flags&= ~item_base_t::MAYBE_NULL; + } + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_VOID_RETURN; + + if (thd->killed) + DBUG_VOID_RETURN; + + server_threads.iterate(list_callback, &arg); + + ulonglong now= microsecond_interval_timer(); + + while (thread_info *thd_info= arg.thread_infos.get()) + { + protocol->prepare_for_resend(); + protocol->store(thd_info->thread_id); + protocol->store(thd_info->user, strlen(thd_info->user), system_charset_info); + protocol->store(thd_info->host, strlen(thd_info->host), system_charset_info); + protocol->store_string_or_null(thd_info->db, system_charset_info); + if (thd_info->proc_info) + protocol->store(thd_info->proc_info, strlen(thd_info->proc_info), + system_charset_info); + else + protocol->store(&command_name[thd_info->command], system_charset_info); + if (thd_info->start_time && now > thd_info->start_time) + protocol->store_long((now - thd_info->start_time) / HRTIME_RESOLUTION); + else + protocol->store_null(); + protocol->store_string_or_null(thd_info->state_info, system_charset_info); + if (thd_info->query_string.length()) + protocol->store(thd_info->query_string.str(), + thd_info->query_string.length(), + thd_info->query_string.charset()); + else + protocol->store_null(); + if (!(thd->variables.old_behavior & OLD_MODE_NO_PROGRESS_INFO)) + protocol->store_double(thd_info->progress, 3); + if (protocol->write()) + break; /* purecov: inspected */ + } + my_eof(thd); + DBUG_VOID_RETURN; +} + + +/* + Produce EXPLAIN data. + + This function is APC-scheduled to be run in the context of the thread that + we're producing EXPLAIN for. +*/ + +void Show_explain_request::call_in_target_thread() +{ + Query_arena backup_arena; + bool printed_anything= FALSE; + + /* + Change the arena because JOIN::print_explain and co. are going to allocate + items. Let them allocate them on our arena. + */ + target_thd->set_n_backup_active_arena((Query_arena*)request_thd, + &backup_arena); + + query_str.copy(target_thd->query(), + target_thd->query_length(), + target_thd->query_charset()); + + DBUG_ASSERT(current_thd == target_thd); + + /* + When producing JSON output, one should not change current_thd. + (If one does that, they will hit an assert when printing constant item + fields). + */ + if (target_thd->lex->print_explain(explain_buf, 0 /* explain flags*/, + is_analyze, is_json_format, + &printed_anything)) + { + failed_to_produce= TRUE; + } + + if (!printed_anything) + failed_to_produce= TRUE; + + target_thd->restore_active_arena((Query_arena*)request_thd, &backup_arena); +} + + +int select_result_explain_buffer::send_data(List &items) +{ + int res; + THD *cur_thd= current_thd; + DBUG_ENTER("select_result_explain_buffer::send_data"); + + /* + Switch to the receiveing thread, so that we correctly count memory used + by it. This is needed as it's the receiving thread that will free the + memory. + (TODO: Now that we don't change current_thd in + Show_explain_request::call_in_target_thread, is this necessary anymore?) + */ + set_current_thd(thd); + fill_record(thd, dst_table, dst_table->field, items, TRUE, FALSE); + res= dst_table->file->ha_write_tmp_row(dst_table->record[0]); + set_current_thd(cur_thd); + DBUG_RETURN(MY_TEST(res)); +} + +bool select_result_text_buffer::send_result_set_metadata(List &fields, + uint flag) +{ + n_columns= fields.elements; + return append_row(fields, true /*send item names */); +} + + +int select_result_text_buffer::send_data(List &items) +{ + return append_row(items, false /*send item values */); +} + +int select_result_text_buffer::append_row(List &items, bool send_names) +{ + List_iterator it(items); + Item *item; + char **row; + int column= 0; + + if (!(row= (char**) thd->alloc(sizeof(char*) * n_columns)) || + rows.push_back(row, thd->mem_root)) + return true; + + StringBuffer<32> buf; + + while ((item= it++)) + { + DBUG_ASSERT(column < n_columns); + const char *data_ptr; + char *ptr; + size_t data_len; + + buf.set_buffer_if_not_allocated(&my_charset_bin); + if (send_names) + { + DBUG_ASSERT(strlen(item->name.str) == item->name.length); + data_ptr= item->name.str; + data_len= item->name.length; + } + else + { + String *res; + res= item->val_str(&buf); + if (item->null_value) + { + data_ptr= "NULL"; + data_len=4; + } + else + { + data_ptr= res->c_ptr_safe(); + data_len= res->length(); + } + } + + if (!(ptr= (char*) thd->memdup(data_ptr, data_len + 1))) + return true; + row[column]= ptr; + + column++; + } + return false; +} + + +void select_result_text_buffer::save_to(String *res) +{ + List_iterator it(rows); + char **row; + res->append(STRING_WITH_LEN("#\n")); + while ((row= it++)) + { + res->append(STRING_WITH_LEN("# explain: ")); + for (int i=0; i < n_columns; i++) + { + if (i) + res->append('\t'); + res->append(row[i], strlen(row[i])); + } + res->append('\n'); + } + res->append(STRING_WITH_LEN("#\n")); +} + + +/* + Store the SHOW EXPLAIN/SHOW ANALYZE output in the temporary table. +*/ + +int fill_show_explain_or_analyze(THD *thd, TABLE_LIST *table, COND *cond, + bool json_format, bool is_analyze) +{ + const char *calling_user; + THD *tmp; + my_thread_id thread_id; + DBUG_ENTER("fill_show_explain_or_analyze"); + + DBUG_ASSERT(cond==NULL); + thread_id= thd->lex->value_list.head()->val_int(); + calling_user= (thd->security_ctx->master_access & PRIV_STMT_SHOW_EXPLAIN) ? + NullS : thd->security_ctx->priv_user; + + if ((tmp= find_thread_by_id(thread_id))) + { + Security_context *tmp_sctx= tmp->security_ctx; + MEM_ROOT explain_mem_root, *save_mem_root; + + /* + If calling_user==NULL, calling thread has SUPER or PROCESS + privilege, and so can do SHOW EXPLAIN/SHOW ANALYZE on any user. + + if calling_user!=NULL, he's only allowed to view + SHOW EXPLAIN/SHOW ANALYZE on his own threads. + */ + if (calling_user && (!tmp_sctx->user || strcmp(calling_user, + tmp_sctx->user))) + { + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "PROCESS"); + mysql_mutex_unlock(&tmp->LOCK_thd_kill); + DBUG_RETURN(1); + } + + if (tmp == thd) + { + mysql_mutex_unlock(&tmp->LOCK_thd_kill); + my_error(ER_TARGET_NOT_EXPLAINABLE, MYF(0)); + DBUG_RETURN(1); + } + + bool bres; + /* + Ok we've found the thread of interest and it won't go away because + we're holding its LOCK_thd_kill. Post it a SHOW EXPLAIN/SHOW ANALYZE + request. + */ + bool timed_out; + int timeout_sec= 30; + Show_explain_request explain_req; + explain_req.is_json_format= json_format; + select_result_explain_buffer *explain_buf; + + if (!(explain_buf= new (thd->mem_root) + select_result_explain_buffer(thd, table->table))) + DBUG_RETURN(1); + + explain_req.is_analyze= is_analyze; + explain_req.explain_buf= explain_buf; + explain_req.target_thd= tmp; + explain_req.request_thd= thd; + explain_req.failed_to_produce= FALSE; + + /* + Do not use default memroot as this is only to be used by the + target thread (It's marked as thread MY_THREAD_SPECIFIC). + */ + init_sql_alloc(key_memory_thd_main_mem_root, + &explain_mem_root, 0, 8000, MYF(0)); + save_mem_root= thd->mem_root; + thd->mem_root= &explain_mem_root; + + /* Ok, we have a lock on target->LOCK_thd_kill, can call: */ + bres= tmp->apc_target.make_apc_call(thd, &explain_req, timeout_sec, + &timed_out); + thd->mem_root= save_mem_root; + + if (bres || explain_req.failed_to_produce) + { + if (thd->killed) + thd->send_kill_message(); + else if (timed_out) + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); + else + my_error(ER_TARGET_NOT_EXPLAINABLE, MYF(0)); + + bres= TRUE; + } + else + { + /* + Push the query string as a warning. The query may be in a different + charset than the charset that's used for error messages, so, convert it + if needed. + */ + CHARSET_INFO *fromcs= explain_req.query_str.charset(); + CHARSET_INFO *tocs= error_message_charset_info; + char *warning_text; + if (!my_charset_same(fromcs, tocs)) + { + uint conv_length= 1 + tocs->mbmaxlen * explain_req.query_str.length() / + fromcs->mbminlen; + uint dummy_errors; + char *to, *p; + if (!(to= (char*)thd->alloc(conv_length + 1))) + DBUG_RETURN(1); + p= to; + p+= copy_and_convert(to, conv_length, tocs, + explain_req.query_str.c_ptr(), + explain_req.query_str.length(), fromcs, + &dummy_errors); + *p= 0; + warning_text= to; + } + else + warning_text= explain_req.query_str.c_ptr_safe(); + + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_YES, warning_text); + } + free_root(&explain_mem_root, MYF(0)); + DBUG_RETURN(bres); + } + my_error(ER_NO_SUCH_THREAD, MYF(0), (ulong) thread_id); + DBUG_RETURN(1); +} + + +int fill_show_explain_tabular(THD *thd, TABLE_LIST *table, COND *cond) +{ + return fill_show_explain_or_analyze( + thd, table, cond, FALSE /* json_format */, FALSE /* is_analyze */); +} + + +int fill_show_explain_json(THD *thd, TABLE_LIST *table, COND *cond) +{ + return fill_show_explain_or_analyze( + thd, table, cond, TRUE /* json_format */, FALSE /* is_analyze */); +} + + +int fill_show_analyze_tabular(THD * thd, TABLE_LIST * table, COND * cond) +{ + return fill_show_explain_or_analyze( + thd, table, cond, FALSE /* json_format */, TRUE /* is_analyze */); +} + + +int fill_show_analyze_json(THD * thd, TABLE_LIST * table, COND * cond) +{ + return fill_show_explain_or_analyze( + thd, table, cond, TRUE /* json_format */, TRUE /* is_analyze */); +} + + +struct processlist_callback_arg +{ + processlist_callback_arg(THD *thd_arg, TABLE *table_arg): + thd(thd_arg), table(table_arg), unow(microsecond_interval_timer()) {} + THD *thd; + TABLE *table; + ulonglong unow; +}; + + +static my_bool processlist_callback(THD *tmp, processlist_callback_arg *arg) +{ + Security_context *tmp_sctx= tmp->security_ctx; + CHARSET_INFO *cs= system_charset_info; + const char *val; + ulonglong max_counter; + bool got_thd_data; + char *user= + arg->thd->security_ctx->master_access & PRIV_STMT_SHOW_PROCESSLIST ? + NullS : arg->thd->security_ctx->priv_user; + + if ((!tmp->vio_ok() && !tmp->system_thread) || + (user && (tmp->system_thread || !tmp_sctx->user || + strcmp(tmp_sctx->user, user)))) + return 0; + + restore_record(arg->table, s->default_values); + /* ID */ + arg->table->field[0]->store((longlong) tmp->thread_id, TRUE); + /* USER */ + val= tmp_sctx->user ? tmp_sctx->user : + (tmp->system_thread ? "system user" : "unauthenticated user"); + arg->table->field[1]->store(val, strlen(val), cs); + /* HOST */ + if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && + arg->thd->security_ctx->host_or_ip[0]) + { + char host[LIST_PROCESS_HOST_LEN + 1]; + my_snprintf(host, LIST_PROCESS_HOST_LEN, "%s:%u", + tmp_sctx->host_or_ip, tmp->peer_port); + arg->table->field[2]->store(host, strlen(host), cs); + } + else + arg->table->field[2]->store(tmp_sctx->host_or_ip, + strlen(tmp_sctx->host_or_ip), cs); + + if ((got_thd_data= !trylock_short(&tmp->LOCK_thd_data))) + { + /* DB */ + if (tmp->db.str) + { + arg->table->field[3]->store(tmp->db.str, tmp->db.length, cs); + arg->table->field[3]->set_notnull(); + } + } + + /* COMMAND */ + if ((val= (char *) (!got_thd_data ? "Busy" : + (tmp->killed >= KILL_QUERY ? + "Killed" : 0)))) + arg->table->field[4]->store(val, strlen(val), cs); + else + arg->table->field[4]->store(command_name[tmp->get_command()].str, + command_name[tmp->get_command()].length, cs); + + /* MYSQL_TIME */ + ulonglong utime= tmp->start_utime; + ulonglong utime_after_query_snapshot= tmp->utime_after_query; + if (utime < utime_after_query_snapshot) + utime= utime_after_query_snapshot; // COM_SLEEP + utime= utime && utime < arg->unow ? arg->unow - utime : 0; + + arg->table->field[5]->store(utime / HRTIME_RESOLUTION, TRUE); + + if (got_thd_data) + { + if (tmp->query()) + { + arg->table->field[7]->store(tmp->query(), + MY_MIN(PROCESS_LIST_INFO_WIDTH, + tmp->query_length()), cs); + arg->table->field[7]->set_notnull(); + + /* INFO_BINARY */ + arg->table->field[16]->store(tmp->query(), + MY_MIN(PROCESS_LIST_INFO_WIDTH, + tmp->query_length()), + &my_charset_bin); + arg->table->field[16]->set_notnull(); + } + + /* + Progress report. We need to do this under a lock to ensure that all + is from the same stage. + */ + if ((max_counter= tmp->progress.max_counter)) + { + arg->table->field[9]->store((longlong) tmp->progress.stage + 1, 1); + arg->table->field[10]->store((longlong) tmp->progress.max_stage, 1); + arg->table->field[11]->store((double) tmp->progress.counter / + (double) max_counter*100.0); + } + mysql_mutex_unlock(&tmp->LOCK_thd_data); + } + + /* STATE */ + if ((val= thread_state_info(tmp))) + { + arg->table->field[6]->store(val, strlen(val), cs); + arg->table->field[6]->set_notnull(); + } + + /* TIME_MS */ + arg->table->field[8]->store((double)(utime / (HRTIME_RESOLUTION / 1000.0))); + + /* + This may become negative if we free a memory allocated by another + thread in this thread. However it's better that we notice it eventually + than hide it. + */ + arg->table->field[12]->store((longlong) tmp->status_var.local_memory_used, + FALSE); + arg->table->field[13]->store((longlong) tmp->status_var.max_local_memory_used, + FALSE); + arg->table->field[14]->store((longlong) tmp->get_examined_row_count(), TRUE); + + /* QUERY_ID */ + arg->table->field[15]->store(tmp->query_id, TRUE); + + arg->table->field[17]->store(tmp->os_thread_id); + + if (schema_table_store_record(arg->thd, arg->table)) + return 1; + return 0; +} + + +int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) +{ + processlist_callback_arg arg(thd, tables->table); + DBUG_ENTER("fill_schema_processlist"); + DEBUG_SYNC(thd,"fill_schema_processlist_after_unow"); + if (!thd->killed && + server_threads.iterate(processlist_callback, &arg)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + +/***************************************************************************** + Status functions +*****************************************************************************/ + +DYNAMIC_ARRAY all_status_vars; +static bool status_vars_inited= 0; +ulonglong status_var_array_version= 0; + +C_MODE_START +static int show_var_cmp(const void *var1, const void *var2) +{ + return strcasecmp(((SHOW_VAR*)var1)->name, ((SHOW_VAR*)var2)->name); +} +C_MODE_END + +/* + deletes all the SHOW_UNDEF elements from the array and calls + delete_dynamic() if it's completely empty. +*/ +static void shrink_var_array(DYNAMIC_ARRAY *array) +{ + uint a,b; + SHOW_VAR *all= dynamic_element(array, 0, SHOW_VAR *); + + for (a= b= 0; b < array->elements; b++) + if (all[b].type != SHOW_UNDEF) + all[a++]= all[b]; + if (a) + { + bzero(all+a, sizeof(SHOW_VAR)); // writing NULL-element to the end + array->elements= a; + } + else // array is completely empty - delete it + delete_dynamic(array); + status_var_array_version++; +} + +/* + Adds an array of SHOW_VAR entries to the output of SHOW STATUS + + SYNOPSIS + add_status_vars(SHOW_VAR *list) + list - an array of SHOW_VAR entries to add to all_status_vars + the last entry must be {0,0,SHOW_UNDEF} + + NOTE + The handling of all_status_vars[] is completely internal, it's allocated + automatically when something is added to it, and deleted completely when + the last entry is removed. + + As a special optimization, if add_status_vars() is called before + init_status_vars(), it assumes "startup mode" - neither concurrent access + to the array nor SHOW STATUS are possible (thus it skips locks and qsort) + + The last entry of the all_status_vars[] should always be {0,0,SHOW_UNDEF} +*/ +int add_status_vars(SHOW_VAR *list) +{ + int res= 0; + if (status_vars_inited) + mysql_rwlock_wrlock(&LOCK_all_status_vars); + if (!all_status_vars.buffer && // array is not allocated yet - do it now + my_init_dynamic_array(PSI_INSTRUMENT_ME, &all_status_vars, + sizeof(SHOW_VAR), 250, 50, MYF(0))) + { + res= 1; + goto err; + } + while (list->name) + res|= insert_dynamic(&all_status_vars, (uchar*)list++); + res|= insert_dynamic(&all_status_vars, (uchar*)list); // appending NULL-element + all_status_vars.elements--; // but next insert_dynamic should overwite it + if (status_vars_inited) + sort_dynamic(&all_status_vars, show_var_cmp); + status_var_array_version++; +err: + if (status_vars_inited) + mysql_rwlock_unlock(&LOCK_all_status_vars); + return res; +} + +/* + Make all_status_vars[] usable for SHOW STATUS + + NOTE + See add_status_vars(). Before init_status_vars() call, add_status_vars() + works in a special fast "startup" mode. Thus init_status_vars() + should be called as late as possible but before enabling multi-threading. +*/ +void init_status_vars() +{ + status_vars_inited=1; + sort_dynamic(&all_status_vars, show_var_cmp); + status_var_array_version++; +} + +void reset_status_vars() +{ + SHOW_VAR *ptr= (SHOW_VAR*) all_status_vars.buffer; + SHOW_VAR *last= ptr + all_status_vars.elements; + for (; ptr < last; ptr++) + { + /* Note that SHOW_LONG_NOFLUSH variables are not reset */ + if (ptr->type == SHOW_LONG) + *(ulong*) ptr->value= 0; + } +} + +/* + catch-all cleanup function, cleans up everything no matter what + + DESCRIPTION + This function is not strictly required if all add_status_vars/ + remove_status_vars are properly paired, but it's a safety measure that + deletes everything from the all_status_vars[] even if some + remove_status_vars were forgotten +*/ +void free_status_vars() +{ + delete_dynamic(&all_status_vars); + status_var_array_version++; +} + +/* + Removes an array of SHOW_VAR entries from the output of SHOW STATUS + + SYNOPSIS + remove_status_vars(SHOW_VAR *list) + list - an array of SHOW_VAR entries to remove to all_status_vars + the last entry must be {0,0,SHOW_UNDEF} + + NOTE + there's lots of room for optimizing this, especially in non-sorted mode, + but nobody cares - it may be called only in case of failed plugin + initialization in the mysqld startup. +*/ + +void remove_status_vars(SHOW_VAR *list) +{ + if (status_vars_inited) + { + mysql_rwlock_wrlock(&LOCK_all_status_vars); + SHOW_VAR *all= dynamic_element(&all_status_vars, 0, SHOW_VAR *); + + for (; list->name; list++) + { + int first= 0, last= ((int) all_status_vars.elements) - 1; + for ( ; first <= last; ) + { + int res, middle= (first + last) / 2; + if ((res= show_var_cmp(list, all + middle)) < 0) + last= middle - 1; + else if (res > 0) + first= middle + 1; + else + { + all[middle].type= SHOW_UNDEF; + break; + } + } + } + shrink_var_array(&all_status_vars); + mysql_rwlock_unlock(&LOCK_all_status_vars); + } + else + { + SHOW_VAR *all= dynamic_element(&all_status_vars, 0, SHOW_VAR *); + uint i; + for (; list->name; list++) + { + for (i= 0; i < all_status_vars.elements; i++) + { + if (show_var_cmp(list, all+i)) + continue; + all[i].type= SHOW_UNDEF; + break; + } + } + shrink_var_array(&all_status_vars); + } +} + +/* Current version of the all_status_vars. */ +ulonglong get_status_vars_version(void) +{ + return status_var_array_version; +} + +/** + A union holding a pointer to a type that can be referred by a status variable. + */ +union Any_pointer { + const void *as_void; + const uchar *as_uchar; + const char *as_char; + const char ** as_charptr; + const double *as_double; + const int * as_int; + const uint * as_uint; + const long *as_long; + const longlong *as_longlong; + const bool *as_bool; + const my_bool *as_my_bool; + const sys_var *as_sys_var; + const system_status_var *as_system_status_var; + const ha_rows *as_ha_rows; + const LEX_STRING *as_lex_cstring; + const SHOW_COMP_OPTION *as_show_comp_options; + intptr as_intptr; + Atomic_counter* as_atomic_counter; +}; + +/** + @brief Returns the value of a system or a status variable. + + @param thd [in] The handle of the current THD. + @param variable [in] Details of the variable. + @param value_type [in] Variable type. + @param show_type [in] Variable show type. + @param status_var [in] Status variable pointer + @param charset [out] Character set of the value. + @param buff [in,out] Buffer to store the value. + (Needs to have enough memory + to hold the value of variable.) + @param length [out] Length of the value. + + @return Pointer to the value buffer. +*/ + +const char* get_one_variable(THD *thd, + const SHOW_VAR *variable, + enum_var_type value_type, SHOW_TYPE show_type, + system_status_var *status_var, + const CHARSET_INFO **charset, char *buff, + size_t *length) +{ + Any_pointer value, status_var_value; + value.as_void= variable->value; + status_var_value.as_system_status_var= status_var; + const char *pos= buff; + const char *end= buff; + + + if (show_type == SHOW_SYS) + { + const sys_var *var= value.as_sys_var; + show_type= var->show_type(); + value.as_uchar= var->value_ptr(thd, value_type, &null_clex_str); + *charset= var->charset(thd); + } + + /* + note that value may be == buff. All SHOW_xxx code below + should still work in this case + */ + switch (show_type) { + case SHOW_DOUBLE_STATUS: + value.as_char= status_var_value.as_char + value.as_intptr; + /* fall through */ + case SHOW_DOUBLE: + /* 6 is the default precision for '%f' in sprintf() */ + end= buff + my_fcvt(*value.as_double, 6, buff, NULL); + break; + case SHOW_LONG_STATUS: + value.as_char= status_var_value.as_char + value.as_intptr; + /* fall through */ + case SHOW_ULONG: + case SHOW_LONG_NOFLUSH: // the difference lies in refresh_status() +#ifndef _WIN64 + case SHOW_SIZE_T: +#endif + end= int10_to_str(*value.as_long, buff, 10); + break; + case SHOW_LONGLONG_STATUS: + value.as_char= status_var_value.as_char + value.as_intptr; + /* fall through */ + case SHOW_ULONGLONG: +#ifdef _WIN64 + case SHOW_SIZE_T: +#endif + end= longlong10_to_str(*value.as_longlong, buff, 10); + break; + case SHOW_HA_ROWS: + end= longlong10_to_str((longlong) *value.as_ha_rows, buff, 10); + break; + case SHOW_BOOL: + end= strmov(buff, *value.as_bool ? "ON" : "OFF"); + break; + case SHOW_MY_BOOL: + end= strmov(buff, *value.as_my_bool ? "ON" : "OFF"); + break; + case SHOW_UINT32_STATUS: + value.as_char= status_var_value.as_char + value.as_intptr; + /* fall through */ + case SHOW_UINT: + end= int10_to_str((long) *value.as_uint, buff, 10); + break; + case SHOW_SINT: + end= int10_to_str((long) *value.as_int, buff, -10); + break; + case SHOW_SLONG: + end= int10_to_str(*value.as_long, buff, -10); + break; + case SHOW_SLONGLONG: + end= longlong10_to_str(*value.as_longlong, buff, -10); + break; + case SHOW_HAVE: + { + pos= show_comp_option_name[(int) *value.as_show_comp_options]; + end= strend(pos); + break; + } + case SHOW_CHAR: + { + if (!(pos= value.as_char)) + pos= ""; + end= strend(pos); + break; + } + case SHOW_CHAR_PTR: + { + if (!(pos= *value.as_charptr)) + pos= ""; + + end= strend(pos); + break; + } + case SHOW_LEX_STRING: + { + if (!(pos= value.as_lex_cstring->str)) + end= pos= ""; + else + end= pos + value.as_lex_cstring->length; + break; + } + case SHOW_ATOMIC_COUNTER_UINT32_T: + end= int10_to_str(static_cast(*value.as_atomic_counter), buff, 10); + break; + case SHOW_UNDEF: + break; // Return empty string + case SHOW_SYS: // Cannot happen + default: + DBUG_ASSERT(0); + break; + } + + *length= (size_t) (end - pos); + return pos; +} + + +static bool show_status_array(THD *thd, const char *wild, + SHOW_VAR *variables, + enum enum_var_type scope, + struct system_status_var *status_var, + const char *prefix, TABLE *table, + bool ucase_names, + COND *cond) +{ + my_aligned_storage buffer; + char * const buff= buffer.data; + char *prefix_end; + char name_buffer[NAME_CHAR_LEN]; + int len; + SHOW_VAR tmp, *var; + bool res= FALSE; + CHARSET_INFO *charset= system_charset_info; + DBUG_ENTER("show_status_array"); + + prefix_end=strnmov(name_buffer, prefix, sizeof(name_buffer)-1); + if (*prefix) + *prefix_end++= '_'; + len=(int)(name_buffer + sizeof(name_buffer) - prefix_end); + +#ifdef WITH_WSREP + bool is_wsrep_var= FALSE; + /* + This is a workaround for lp:1306875 (PBX) to skip switching of wsrep + status variable name's first letter to uppercase. This is an optimization + for status variables defined under wsrep plugin. + TODO: remove once lp:1306875 has been addressed. + */ + if (*prefix && !my_strcasecmp(system_charset_info, prefix, "wsrep")) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + + for (; variables->name; variables++) + { + bool wild_checked= false; + strnmov(prefix_end, variables->name, len); + name_buffer[sizeof(name_buffer)-1]=0; /* Safety */ + +#ifdef WITH_WSREP + /* + If the prefix is NULL, that means we are looking into the status variables + defined directly under mysqld.cc. Do not capitalize wsrep status variable + names until lp:1306875 has been fixed. + TODO: remove once lp:1306875 has been addressed. + */ + if (!(*prefix) && !strncasecmp(name_buffer, "wsrep", strlen("wsrep"))) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + + if (ucase_names) + my_caseup_str(system_charset_info, name_buffer); + else + { + my_casedn_str(system_charset_info, name_buffer); + DBUG_ASSERT(name_buffer[0] >= 'a'); + DBUG_ASSERT(name_buffer[0] <= 'z'); + + // WSREP_TODO: remove once lp:1306875 has been addressed. + if (IF_WSREP(is_wsrep_var == FALSE, 1) && + status_var) + name_buffer[0]-= 'a' - 'A'; + } + + + restore_record(table, s->default_values); + table->field[0]->store(name_buffer, strlen(name_buffer), + system_charset_info); + + /* + Compare name for types that can't return arrays. We do this to not + calculate the value for function variables that we will not access + */ + if ((variables->type != SHOW_FUNC && variables->type != SHOW_ARRAY)) + { + if (wild && wild[0] && wild_case_compare(system_charset_info, + name_buffer, wild)) + continue; + wild_checked= 1; // Avoid checking it again + } + + /* + if var->type is SHOW_FUNC or SHOW_SIMPLE_FUNC, call the function. + Repeat as necessary, if new var is again one of the above + */ + for (var=variables; var->type == SHOW_FUNC || + var->type == SHOW_SIMPLE_FUNC; var= &tmp) + ((mysql_show_var_func)(var->value))(thd, &tmp, buff, + status_var, scope); + + SHOW_TYPE show_type=var->type; + if (show_type == SHOW_ARRAY) + { + show_status_array(thd, wild, (SHOW_VAR *) var->value, scope, + status_var, name_buffer, table, ucase_names, cond); + } + else + { + if ((wild_checked || + !(wild && wild[0] && wild_case_compare(system_charset_info, + name_buffer, wild))) && + (!cond || cond->val_int())) + { + const char *pos; // We assign a lot of const's + size_t length; + + if (show_type == SHOW_SYS) + mysql_mutex_lock(&LOCK_global_system_variables); + else if (show_type >= SHOW_LONG_STATUS && scope == OPT_GLOBAL) + calc_sum_of_all_status_if_needed(status_var); + + pos= get_one_variable(thd, var, scope, show_type, status_var, + &charset, buff, &length); + + if (table->field[1]->field_length) + thd->count_cuted_fields= CHECK_FIELD_WARN; + table->field[1]->store(pos, (uint32) length, charset); + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + table->field[1]->set_notnull(); + if (show_type == SHOW_SYS) + mysql_mutex_unlock(&LOCK_global_system_variables); + + + if (schema_table_store_record(thd, table)) + { + res= TRUE; + goto end; + } + thd->get_stmt_da()->inc_current_row_for_warning(); + } + } + } +end: + DBUG_RETURN(res); +} + +/* + collect status for all running threads + Return number of threads used +*/ + +struct calc_sum_callback_arg +{ + calc_sum_callback_arg(STATUS_VAR *to_arg): to(to_arg), count(0) {} + STATUS_VAR *to; + uint count; +}; + + +static my_bool calc_sum_callback(THD *thd, calc_sum_callback_arg *arg) +{ + arg->count++; + if (!thd->status_in_global) + { + add_to_status(arg->to, &thd->status_var); + arg->to->local_memory_used+= thd->status_var.local_memory_used; + } + if (thd->get_command() != COM_SLEEP) + arg->to->threads_running++; + return 0; +} + + +uint calc_sum_of_all_status(STATUS_VAR *to) +{ + calc_sum_callback_arg arg(to); + DBUG_ENTER("calc_sum_of_all_status"); + + to->local_memory_used= 0; + /* Add to this status from existing threads */ + server_threads.iterate(calc_sum_callback, &arg); + DBUG_RETURN(arg.count); +} + + +/* This is only used internally, but we need it here as a forward reference */ +extern ST_SCHEMA_TABLE schema_tables[]; + +/* + Store record to I_S table, convert HEAP table + to MyISAM if necessary + + SYNOPSIS + schema_table_store_record() + thd thread handler + table Information schema table to be updated + + RETURN + 0 success + 1 error +*/ + +bool schema_table_store_record(THD *thd, TABLE *table) +{ + int error; + + if (unlikely(thd->killed)) + { + thd->send_kill_message(); + return 1; + } + + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + TMP_TABLE_PARAM *param= table->pos_in_table_list->schema_table_param; + if (unlikely(create_internal_tmp_table_from_heap(thd, table, + param->start_recinfo, + ¶m->recinfo, error, 0, + NULL))) + + return 1; + } + return 0; +} + + +static int make_table_list(THD *thd, SELECT_LEX *sel, + LEX_CSTRING *db_name, LEX_CSTRING *table_name) +{ + Table_ident *table_ident; + table_ident= new Table_ident(thd, db_name, table_name, 1); + if (!sel->add_table_to_list(thd, table_ident, 0, 0, TL_READ, MDL_SHARED_READ)) + return 1; + return 0; +} + + +/** + @brief Get lookup value from the part of 'WHERE' condition + + @details This function gets lookup value from + the part of 'WHERE' condition if it's possible and + fill appropriate lookup_field_vals struct field + with this value. + + @param[in] thd thread handler + @param[in] item_func part of WHERE condition + @param[in] table I_S table + @param[in, out] lookup_field_vals Struct which holds lookup values + + @return + 0 success + 1 error, there can be no matching records for the condition +*/ + +bool get_lookup_value(THD *thd, Item_func *item_func, + TABLE_LIST *table, + LOOKUP_FIELD_VALUES *lookup_field_vals) +{ + ST_SCHEMA_TABLE *schema_table= table->schema_table; + ST_FIELD_INFO *field_info= schema_table->fields_info; + const char *field_name1= schema_table->idx_field1 >= 0 ? + field_info[schema_table->idx_field1].name().str : ""; + const char *field_name2= schema_table->idx_field2 >= 0 ? + field_info[schema_table->idx_field2].name().str : ""; + + if (item_func->functype() == Item_func::EQ_FUNC || + item_func->functype() == Item_func::EQUAL_FUNC) + { + int idx_field, idx_val; + char tmp[MAX_FIELD_WIDTH]; + String *tmp_str, str_buff(tmp, sizeof(tmp), system_charset_info); + Item_field *item_field; + CHARSET_INFO *cs= system_charset_info; + + if (item_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + item_func->arguments()[1]->const_item()) + { + idx_field= 0; + idx_val= 1; + } + else if (item_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + item_func->arguments()[0]->const_item()) + { + idx_field= 1; + idx_val= 0; + } + else + return 0; + + item_field= (Item_field*) item_func->arguments()[idx_field]->real_item(); + if (table->table != item_field->field->table) + return 0; + tmp_str= item_func->arguments()[idx_val]->val_str(&str_buff); + + /* impossible value */ + if (!tmp_str) + return 1; + + /* Lookup value is database name */ + if (!cs->strnncollsp(field_name1, strlen(field_name1), + item_field->field_name.str, + item_field->field_name.length)) + { + thd->make_lex_string(&lookup_field_vals->db_value, + tmp_str->ptr(), tmp_str->length()); + } + /* Lookup value is table name */ + else if (!cs->strnncollsp(field_name2, + strlen(field_name2), + item_field->field_name.str, + item_field->field_name.length)) + { + thd->make_lex_string(&lookup_field_vals->table_value, + tmp_str->ptr(), tmp_str->length()); + } + } + return 0; +} + + +/** + @brief Calculates lookup values from 'WHERE' condition + + @details This function calculates lookup value(database name, table name) + from 'WHERE' condition if it's possible and + fill lookup_field_vals struct fields with these values. + + @param[in] thd thread handler + @param[in] cond WHERE condition + @param[in] table I_S table + @param[in, out] lookup_field_vals Struct which holds lookup values + + @return + 0 success + 1 error, there can be no matching records for the condition +*/ + +bool calc_lookup_values_from_cond(THD *thd, COND *cond, TABLE_LIST *table, + LOOKUP_FIELD_VALUES *lookup_field_vals) +{ + if (!cond) + return 0; + + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (item->type() == Item::FUNC_ITEM) + { + if (get_lookup_value(thd, (Item_func*)item, table, lookup_field_vals)) + return 1; + } + else + { + if (calc_lookup_values_from_cond(thd, item, table, lookup_field_vals)) + return 1; + } + } + } + return 0; + } + else if (cond->type() == Item::FUNC_ITEM && + get_lookup_value(thd, (Item_func*) cond, table, lookup_field_vals)) + return 1; + return 0; +} + + +bool uses_only_table_name_fields(Item *item, TABLE_LIST *table) +{ + if (item->type() == Item::FUNC_ITEM) + { + Item_func *item_func= (Item_func*)item; + for (uint i=0; iargument_count(); i++) + { + if (!uses_only_table_name_fields(item_func->arguments()[i], table)) + return 0; + } + } + else if (item->type() == Item::ROW_ITEM) + { + Item_row *item_row= static_cast(item); + for (uint i= 0; i < item_row->cols(); i++) + { + if (!uses_only_table_name_fields(item_row->element_index(i), table)) + return 0; + } + } + else if (item->type() == Item::FIELD_ITEM) + { + Item_field *item_field= (Item_field*)item; + CHARSET_INFO *cs= system_charset_info; + ST_SCHEMA_TABLE *schema_table= table->schema_table; + ST_FIELD_INFO *field_info= schema_table->fields_info; + const char *field_name1= schema_table->idx_field1 >= 0 ? + field_info[schema_table->idx_field1].name().str : ""; + const char *field_name2= schema_table->idx_field2 >= 0 ? + field_info[schema_table->idx_field2].name().str : ""; + if (table->table != item_field->field->table || + (cs->strnncollsp(field_name1, strlen(field_name1), + item_field->field_name.str, + item_field->field_name.length) && + cs->strnncollsp(field_name2, strlen(field_name2), + item_field->field_name.str, + item_field->field_name.length))) + return 0; + } + else if (item->type() == Item::EXPR_CACHE_ITEM) + { + Item_cache_wrapper *tmp= static_cast(item); + return uses_only_table_name_fields(tmp->get_orig_item(), table); + } + else if (item->type() == Item::REF_ITEM) + return uses_only_table_name_fields(item->real_item(), table); + + if (item->real_type() == Item::SUBSELECT_ITEM && !item->const_item()) + return 0; + + return 1; +} + + +COND *make_cond_for_info_schema(THD *thd, COND *cond, TABLE_LIST *table) +{ + if (!cond) + return (COND*) 0; + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond=new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_for_info_schema(thd, item, table); + if (fix) + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; + case 1: + return new_cond->argument_list()->head(); + default: + new_cond->quick_fix_field(); + return new_cond; + } + } + else + { // Or list + Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix=make_cond_for_info_schema(thd, item, table); + if (!fix) + return (COND*) 0; + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + new_cond->quick_fix_field(); + new_cond->top_level_item(); + return new_cond; + } + } + + if (!uses_only_table_name_fields(cond, table)) + return (COND*) 0; + return cond; +} + + +/** + @brief Calculate lookup values(database name, table name) + + @details This function calculates lookup values(database name, table name) + from 'WHERE' condition or wild values (for 'SHOW' commands only) + from LEX struct and fill lookup_field_vals struct field + with these values. + + @param[in] thd thread handler + @param[in] cond WHERE condition + @param[in] tables I_S table + @param[in, out] lookup_field_values Struct which holds lookup values + + @return + 0 success + 1 error, there can be no matching records for the condition +*/ + +bool get_lookup_field_values(THD *thd, COND *cond, bool fix_table_name_case, + TABLE_LIST *tables, + LOOKUP_FIELD_VALUES *lookup_field_values) +{ + LEX *lex= thd->lex; + String *wild= lex->wild; + bool rc= 0; + + bzero((char*) lookup_field_values, sizeof(LOOKUP_FIELD_VALUES)); + switch (lex->sql_command) { + case SQLCOM_SHOW_PLUGINS: + if (lex->ident.str) + { + thd->make_lex_string(&lookup_field_values->db_value, + lex->ident.str, lex->ident.length); + break; + } + /* fall through */ + case SQLCOM_SHOW_GENERIC: + case SQLCOM_SHOW_DATABASES: + if (wild) + { + thd->make_lex_string(&lookup_field_values->db_value, + wild->ptr(), wild->length()); + lookup_field_values->wild_db_value= 1; + } + break; + case SQLCOM_SHOW_TABLES: + case SQLCOM_SHOW_TABLE_STATUS: + case SQLCOM_SHOW_TRIGGERS: + case SQLCOM_SHOW_EVENTS: + thd->make_lex_string(&lookup_field_values->db_value, + lex->first_select_lex()->db.str, + lex->first_select_lex()->db.length); + if (wild) + { + thd->make_lex_string(&lookup_field_values->table_value, + wild->ptr(), wild->length()); + lookup_field_values->wild_table_value= 1; + } + break; + default: + /* + The "default" is for queries over I_S. + All previous cases handle SHOW commands. + */ + rc= calc_lookup_values_from_cond(thd, cond, tables, lookup_field_values); + break; + } + + if (lower_case_table_names && !rc) + { + /* + We can safely do in-place upgrades here since all of the above cases + are allocating a new memory buffer for these strings. + */ + if (lookup_field_values->db_value.str && lookup_field_values->db_value.str[0]) + my_casedn_str(system_charset_info, + (char*) lookup_field_values->db_value.str); + if (fix_table_name_case && + lookup_field_values->table_value.str && + lookup_field_values->table_value.str[0]) + my_casedn_str(system_charset_info, + (char*) lookup_field_values->table_value.str); + } + + return rc; +} + + +enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table) +{ + return (enum enum_schema_tables) (schema_table - &schema_tables[0]); +} + + +/* + Create db names list. Information schema name always is first in list + + SYNOPSIS + make_db_list() + thd thread handler + files list of db names + wild wild string + idx_field_vals idx_field_vals->db_name contains db name or + wild string + + RETURN + zero success + non-zero error +*/ + +static int make_db_list(THD *thd, Dynamic_array *files, + LOOKUP_FIELD_VALUES *lookup_field_vals) +{ + if (lookup_field_vals->wild_db_value) + { + /* + This part of code is only for SHOW DATABASES command. + idx_field_vals->db_value can be 0 when we don't use + LIKE clause (see also get_index_field_values() function) + */ + if (!lookup_field_vals->db_value.str || + !wild_case_compare(system_charset_info, + INFORMATION_SCHEMA_NAME.str, + lookup_field_vals->db_value.str)) + { + if (files->append_val(&INFORMATION_SCHEMA_NAME)) + return 1; + } + return find_files(thd, files, 0, mysql_data_home, + &lookup_field_vals->db_value); + } + + + /* + If we have db lookup value we just add it to list and + exit from the function. + We don't do this for database names longer than the maximum + name length. + */ + if (lookup_field_vals->db_value.str) + { + if (lookup_field_vals->db_value.length > NAME_LEN) + { + /* + Impossible value for a database name, + found in a WHERE DATABASE_NAME = 'xxx' clause. + */ + return 0; + } + + if (is_infoschema_db(&lookup_field_vals->db_value)) + { + if (files->append_val(&INFORMATION_SCHEMA_NAME)) + return 1; + return 0; + } + if (files->append_val(&lookup_field_vals->db_value)) + return 1; + return 0; + } + + /* + Create list of existing databases. It is used in case + of select from information schema table + */ + if (files->append_val(&INFORMATION_SCHEMA_NAME)) + return 1; + return find_files(thd, files, 0, mysql_data_home, &null_clex_str); +} + + +struct st_add_schema_table +{ + Dynamic_array *files; + const char *wild; +}; + + +static my_bool add_schema_table(THD *thd, plugin_ref plugin, + void* p_data) +{ + LEX_CSTRING *file_name= 0; + st_add_schema_table *data= (st_add_schema_table *)p_data; + Dynamic_array *file_list= data->files; + const char *wild= data->wild; + ST_SCHEMA_TABLE *schema_table= plugin_data(plugin, ST_SCHEMA_TABLE *); + DBUG_ENTER("add_schema_table"); + + if (schema_table->hidden) + DBUG_RETURN(0); + if (wild) + { + if (lower_case_table_names) + { + if (wild_case_compare(files_charset_info, + schema_table->table_name, + wild)) + DBUG_RETURN(0); + } + else if (wild_compare(schema_table->table_name, wild, 0)) + DBUG_RETURN(0); + } + + if ((file_name= thd->make_clex_string(schema_table->table_name, + strlen(schema_table->table_name))) && + !file_list->append(file_name)) + DBUG_RETURN(0); + DBUG_RETURN(1); +} + + +int schema_tables_add(THD *thd, Dynamic_array *files, + const char *wild) +{ + LEX_CSTRING *file_name; + ST_SCHEMA_TABLE *tmp_schema_table= schema_tables; + st_add_schema_table add_data; + DBUG_ENTER("schema_tables_add"); + + for (; tmp_schema_table->table_name; tmp_schema_table++) + { + if (tmp_schema_table->hidden) + continue; + if (wild) + { + if (lower_case_table_names) + { + if (wild_case_compare(files_charset_info, + tmp_schema_table->table_name, + wild)) + continue; + } + else if (wild_compare(tmp_schema_table->table_name, wild, 0)) + continue; + } + if ((file_name= + thd->make_clex_string(tmp_schema_table->table_name, + strlen(tmp_schema_table->table_name))) && + !files->append(file_name)) + continue; + DBUG_RETURN(1); + } + + add_data.files= files; + add_data.wild= wild; + if (plugin_foreach(thd, add_schema_table, + MYSQL_INFORMATION_SCHEMA_PLUGIN, &add_data)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/** + @brief Create table names list + + @details The function creates the list of table names in + database + + @param[in] thd thread handler + @param[in] table_names List of table names in database + @param[in] lex pointer to LEX struct + @param[in] lookup_field_vals pointer to LOOKUP_FIELD_VALUE struct + @param[in] db_name database name + + @return Operation status + @retval 0 ok + @retval 1 fatal error + @retval 2 Not fatal error; Safe to ignore this file list +*/ + +static int +make_table_name_list(THD *thd, Dynamic_array *table_names, + LEX *lex, LOOKUP_FIELD_VALUES *lookup_field_vals, + LEX_CSTRING *db_name) +{ + char path[FN_REFLEN + 1]; + build_table_filename(path, sizeof(path) - 1, db_name->str, "", "", 0); + if (!lookup_field_vals->wild_table_value && + lookup_field_vals->table_value.str) + { + if (check_table_name(lookup_field_vals->table_value.str, + lookup_field_vals->table_value.length, + false)) + { + /* + Impossible value for a table name, + found in a WHERE TABLE_NAME = 'xxx' clause. + */ + return 0; + } + if (db_name == &INFORMATION_SCHEMA_NAME) + { + LEX_CSTRING *name; + ST_SCHEMA_TABLE *schema_table= + find_schema_table(thd, &lookup_field_vals->table_value); + if (schema_table && !schema_table->hidden) + { + if (!(name= thd->make_clex_string(schema_table->table_name, + strlen(schema_table->table_name))) || + table_names->append(name)) + return 1; + } + } + else + { + if (table_names->append_val(&lookup_field_vals->table_value)) + return 1; + } + return 0; + } + + /* + This call will add all matching the wildcards (if specified) IS tables + to the list + */ + if (db_name == &INFORMATION_SCHEMA_NAME) + return (schema_tables_add(thd, table_names, + lookup_field_vals->table_value.str)); + + if (check_db_name((LEX_STRING*)db_name)) + return 0; // Impossible TABLE_SCHEMA name + + find_files_result res= find_files(thd, table_names, db_name, path, + &lookup_field_vals->table_value); + if (res != FIND_FILES_OK) + { + /* + Downgrade errors about problems with database directory to + warnings if this is not a 'SHOW' command. Another thread + may have dropped database, and we may still have a name + for that directory. + */ + if (res == FIND_FILES_DIR) + { + if (is_show_command(thd)) + return 1; + thd->clear_error(); + return 2; + } + return 1; + } + return 0; +} + + +static void get_table_engine_for_i_s(THD *thd, char *buf, TABLE_LIST *tl, + LEX_CSTRING *db, LEX_CSTRING *table) +{ + LEX_CSTRING engine_name= { buf, 0 }; + + if (thd->get_stmt_da()->sql_errno() == ER_UNKNOWN_STORAGE_ENGINE) + { + char path[FN_REFLEN]; + build_table_filename(path, sizeof(path) - 1, + db->str, table->str, reg_ext, 0); + if (dd_frm_type(thd, path, &engine_name, NULL, NULL) == TABLE_TYPE_NORMAL) + tl->option= engine_name.str; + } +} + + +/** + Fill I_S table with data obtained by performing full-blown table open. + + @param thd Thread handler. + @param is_show_fields_or_keys Indicates whether it is a legacy SHOW + COLUMNS or SHOW KEYS statement. + @param table TABLE object for I_S table to be filled. + @param schema_table I_S table description structure. + @param orig_db_name Database name. + @param orig_table_name Table name. + @param open_tables_state_backup Open_tables_state object which is used + to save/restore original status of + variables related to open tables state. + @param can_deadlock Indicates that deadlocks are possible + due to metadata locks, so to avoid + them we should not wait in case if + conflicting lock is present. + + @retval FALSE - Success. + @retval TRUE - Failure. +*/ +static bool +fill_schema_table_by_open(THD *thd, MEM_ROOT *mem_root, + bool is_show_fields_or_keys, + TABLE *table, ST_SCHEMA_TABLE *schema_table, + LEX_CSTRING *orig_db_name, + LEX_CSTRING *orig_table_name, + Open_tables_backup *open_tables_state_backup, + bool can_deadlock) +{ + Query_arena i_s_arena(mem_root, Query_arena::STMT_CONVENTIONAL_EXECUTION), + backup_arena, *old_arena; + LEX *old_lex= thd->lex, temp_lex, *lex; + LEX_CSTRING db_name, table_name; + TABLE_LIST *table_list; + bool result= true; + DBUG_ENTER("fill_schema_table_by_open"); + + /* + When a view is opened its structures are allocated on a permanent + statement arena and linked into the LEX tree for the current statement + (this happens even in cases when view is handled through TEMPTABLE + algorithm). + + To prevent this process from unnecessary hogging of memory in the permanent + arena of our I_S query and to avoid damaging its LEX we use temporary + arena and LEX for table/view opening. + + Use temporary arena instead of statement permanent arena. Also make + it active arena and save original one for successive restoring. + */ + old_arena= thd->stmt_arena; + thd->stmt_arena= &i_s_arena; + thd->set_n_backup_active_arena(&i_s_arena, &backup_arena); + + /* Prepare temporary LEX. */ + thd->lex= lex= &temp_lex; + lex_start(thd); + lex->sql_command= old_lex->sql_command; + + /* Disable constant subquery evaluation as we won't be locking tables. */ + lex->context_analysis_only= CONTEXT_ANALYSIS_ONLY_VIEW; + + /* + Some of process_table() functions rely on wildcard being passed from + old LEX (or at least being initialized). + */ + lex->wild= old_lex->wild; + + /* + Since make_table_list() might change database and table name passed + to it (if lower_case_table_names) we create copies of orig_db_name and + orig_table_name here. These copies are used for make_table_list() + while unaltered values are passed to process_table() functions. + */ + if (!thd->make_lex_string(&db_name, + orig_db_name->str, orig_db_name->length) || + !thd->make_lex_string(&table_name, + orig_table_name->str, orig_table_name->length)) + goto end; + + /* + Create table list element for table to be open. Link it with the + temporary LEX. The latter is required to correctly open views and + produce table describing their structure. + */ + if (make_table_list(thd, lex->first_select_lex(), &db_name, &table_name)) + goto end; + + table_list= lex->first_select_lex()->table_list.first; + + if (is_show_fields_or_keys) + { + /* + Restore thd->temporary_tables to be able to process + temporary tables (only for 'show index' & 'show columns'). + This should be changed when processing of temporary tables for + I_S tables will be done. + */ + thd->temporary_tables= open_tables_state_backup->temporary_tables; + } + else + { + /* + Apply optimization flags for table opening which are relevant for + this I_S table. We can't do this for SHOW COLUMNS/KEYS because of + backward compatibility. + */ + table_list->i_s_requested_object= schema_table->i_s_requested_object; + } + + DBUG_ASSERT(thd->lex == lex); + result= open_tables_only_view_structure(thd, table_list, can_deadlock); + + DEBUG_SYNC(thd, "after_open_table_ignore_flush"); + + /* + XXX: show_table_list has a flag i_is_requested, + and when it's set, open_normal_and_derived_tables() + can return an error without setting an error message + in THD, which is a hack. This is why we have to + check for res, then for thd->is_error() and only then + for thd->main_da.sql_errno(). + + Again we don't do this for SHOW COLUMNS/KEYS because + of backward compatibility. + */ + if (!is_show_fields_or_keys && result && thd->is_error() && + (thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE || + thd->get_stmt_da()->sql_errno() == ER_WRONG_OBJECT || + thd->get_stmt_da()->sql_errno() == ER_NOT_SEQUENCE)) + { + /* + Hide error for a non-existing table. + For example, this error can occur when we use a where condition + with a db name and table, but the table does not exist or + there is a view with the same name. + */ + result= false; + thd->clear_error(); + } + else + { + char buf[NAME_CHAR_LEN + 1]; + if (unlikely(thd->is_error())) + get_table_engine_for_i_s(thd, buf, table_list, &db_name, &table_name); + + result= schema_table->process_table(thd, table_list, + table, result, + orig_db_name, + orig_table_name); + } + + +end: + lex->unit.cleanup(); + + /* Restore original LEX value, statement's arena and THD arena values. */ + lex_end(thd->lex); + + // Free items, before restoring backup_arena below. + DBUG_ASSERT(i_s_arena.free_list == NULL); + thd->free_items(); + + /* + For safety reset list of open temporary tables before closing + all tables open within this Open_tables_state. + */ + thd->temporary_tables= NULL; + + close_thread_tables(thd); + /* + Release metadata lock we might have acquired. + See comment in fill_schema_table_from_frm() for details. + */ + thd->mdl_context.rollback_to_savepoint(open_tables_state_backup->mdl_system_tables_svp); + + thd->lex= old_lex; + + thd->stmt_arena= old_arena; + thd->restore_active_arena(&i_s_arena, &backup_arena); + + DBUG_RETURN(result); +} + + +/** + @brief Fill I_S table for SHOW TABLE NAMES commands + + @param[in] thd thread handler + @param[in] table TABLE struct for I_S table + @param[in] db_name database name + @param[in] table_name table name + + @return Operation status + @retval 0 success + @retval 1 error +*/ + +static int fill_schema_table_names(THD *thd, TABLE_LIST *tables, + LEX_CSTRING *db_name, + LEX_CSTRING *table_name) +{ + TABLE *table= tables->table; + if (db_name == &INFORMATION_SCHEMA_NAME) + { + table->field[3]->store(STRING_WITH_LEN("SYSTEM VIEW"), + system_charset_info); + } + else if (tables->table_open_method != SKIP_OPEN_TABLE) + { + CHARSET_INFO *cs= system_charset_info; + handlerton *hton; + bool is_sequence; + + if (ha_table_exists(thd, db_name, table_name, NULL, NULL, + &hton, &is_sequence)) + { + if (hton == view_pseudo_hton) + table->field[3]->store(STRING_WITH_LEN("VIEW"), cs); + else if (is_sequence) + table->field[3]->store(STRING_WITH_LEN("SEQUENCE"), cs); + else + table->field[3]->store(STRING_WITH_LEN("BASE TABLE"), cs); + } + else + table->field[3]->store(STRING_WITH_LEN("ERROR"), cs); + + if (unlikely(thd->is_error() && + thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE)) + { + thd->clear_error(); + return 0; + } + } + if (unlikely(schema_table_store_record(thd, table))) + return 1; + return 0; +} + + +/** + @brief Get open table method + + @details The function calculates the method which will be used + for table opening: + SKIP_OPEN_TABLE - do not open table + OPEN_FRM_ONLY - open FRM file only + OPEN_FULL_TABLE - open FRM, data, index files + @param[in] tables I_S table table_list + @param[in] schema_table I_S table struct + @param[in] schema_table_idx I_S table index + + @return return a set of flags + @retval SKIP_OPEN_TABLE | OPEN_FRM_ONLY | OPEN_FULL_TABLE +*/ + +uint get_table_open_method(TABLE_LIST *tables, + ST_SCHEMA_TABLE *schema_table, + enum enum_schema_tables schema_table_idx) +{ + /* + determine which method will be used for table opening + */ + if (schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) + { + Field **ptr, *field; + uint table_open_method= 0, field_indx= 0; + uint star_table_open_method= OPEN_FULL_TABLE; + bool used_star= true; // true if '*' is used in select + for (ptr=tables->table->field; (field= *ptr) ; ptr++) + { + const ST_FIELD_INFO &def= schema_table->fields_info[field_indx]; + star_table_open_method= + MY_MIN(star_table_open_method, (uint) def.open_method()); + if (bitmap_is_set(tables->table->read_set, field->field_index)) + { + used_star= false; + table_open_method|= (uint) def.open_method(); + } + field_indx++; + } + if (used_star) + return star_table_open_method; + return table_open_method; + } + /* I_S tables which use get_all_tables but can not be optimized */ + return (uint) OPEN_FULL_TABLE; +} + + +/** + Try acquire high priority share metadata lock on a table (with + optional wait for conflicting locks to go away). + + @param thd Thread context. + @param mdl_request Pointer to memory to be used for MDL_request + object for a lock request. + @param table Table list element for the table + @param can_deadlock Indicates that deadlocks are possible due to + metadata locks, so to avoid them we should not + wait in case if conflicting lock is present. + + @note This is an auxiliary function to be used in cases when we want to + access table's description by looking up info in TABLE_SHARE without + going through full-blown table open. + @note This function assumes that there are no other metadata lock requests + in the current metadata locking context. + + @retval FALSE No error, if lock was obtained TABLE_LIST::mdl_request::ticket + is set to non-NULL value. + @retval TRUE Some error occurred (probably thread was killed). +*/ + +static bool +try_acquire_high_prio_shared_mdl_lock(THD *thd, TABLE_LIST *table, + bool can_deadlock) +{ + bool error; + MDL_REQUEST_INIT(&table->mdl_request, MDL_key::TABLE, table->db.str, + table->table_name.str, MDL_SHARED_HIGH_PRIO, + MDL_TRANSACTION); + + if (can_deadlock) + { + /* + When .FRM is being open in order to get data for an I_S table, + we might have some tables not only open but also locked. + E.g. this happens when a SHOW or I_S statement is run + under LOCK TABLES or inside a stored function. + By waiting for the conflicting metadata lock to go away we + might create a deadlock which won't entirely belong to the + MDL subsystem and thus won't be detectable by this subsystem's + deadlock detector. To avoid such situation, when there are + other locked tables, we prefer not to wait on a conflicting + lock. + */ + error= thd->mdl_context.try_acquire_lock(&table->mdl_request); + } + else + error= thd->mdl_context.acquire_lock(&table->mdl_request, + thd->variables.lock_wait_timeout); + + return error; +} + + +/** + @brief Fill I_S table with data from FRM file only + + @param[in] thd thread handler + @param[in] table TABLE struct for I_S table + @param[in] schema_table I_S table struct + @param[in] db_name database name + @param[in] table_name table name + @param[in] schema_table_idx I_S table index + @param[in] open_tables_state_backup Open_tables_state object which is used + to save/restore original state of metadata + locks. + @param[in] can_deadlock Indicates that deadlocks are possible + due to metadata locks, so to avoid + them we should not wait in case if + conflicting lock is present. + + @return Operation status + @retval 0 Table is processed and we can continue + with new table + @retval 1 It's view and we have to use + open_tables function for this table +*/ + +static int fill_schema_table_from_frm(THD *thd, MEM_ROOT *mem_root, + TABLE *table, + ST_SCHEMA_TABLE *schema_table, + LEX_CSTRING *db_name, + LEX_CSTRING *table_name, + Open_tables_backup *open_tables_state_backup, + bool can_deadlock) +{ + TABLE_SHARE *share; + TABLE tbl; + TABLE_LIST table_list; + uint res= 0; + char db_name_buff[NAME_LEN + 1], table_name_buff[NAME_LEN + 1]; + Query_arena i_s_arena(mem_root, Query_arena::STMT_CONVENTIONAL_EXECUTION); + Query_arena backup_arena, *old_arena; + bool i_s_arena_active= false; + + bzero((char*) &table_list, sizeof(TABLE_LIST)); + bzero((char*) &tbl, sizeof(TABLE)); + + DBUG_ASSERT(db_name->length <= NAME_LEN); + DBUG_ASSERT(table_name->length <= NAME_LEN); + + if (lower_case_table_names) + { + /* + In lower_case_table_names > 0 metadata locking and table definition + cache subsystems require normalized (lowercased) database and table + names as input. + */ + strmov(db_name_buff, db_name->str); + strmov(table_name_buff, table_name->str); + table_list.db.length= my_casedn_str(files_charset_info, db_name_buff); + table_list.table_name.length= my_casedn_str(files_charset_info, table_name_buff); + table_list.db.str= db_name_buff; + table_list.table_name.str= table_name_buff; + } + else + { + table_list.table_name= *table_name; + table_list.db= *db_name; + } + + /* + TODO: investigate if in this particular situation we can get by + simply obtaining internal lock of the data-dictionary + instead of obtaining full-blown metadata lock. + */ + if (try_acquire_high_prio_shared_mdl_lock(thd, &table_list, can_deadlock)) + { + /* + Some error occurred (most probably we have been killed while + waiting for conflicting locks to go away), let the caller to + handle the situation. + */ + return 1; + } + + if (! table_list.mdl_request.ticket) + { + /* + We are in situation when we have encountered conflicting metadata + lock and deadlocks can occur due to waiting for it to go away. + So instead of waiting skip this table with an appropriate warning. + */ + DBUG_ASSERT(can_deadlock); + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_I_S_SKIPPED_TABLE, + ER_THD(thd, ER_WARN_I_S_SKIPPED_TABLE), + table_list.db.str, table_list.table_name.str); + return 0; + } + + if (schema_table->i_s_requested_object & OPEN_TRIGGER_ONLY) + { + init_sql_alloc(key_memory_table_triggers_list, + &tbl.mem_root, TABLE_ALLOC_BLOCK_SIZE, 0, MYF(0)); + if (!Table_triggers_list::check_n_load(thd, db_name, + table_name, &tbl, 1)) + { + table_list.table= &tbl; + res= schema_table->process_table(thd, &table_list, table, + res, db_name, table_name); + delete tbl.triggers; + } + free_root(&tbl.mem_root, MYF(0)); + goto end; + } + + old_arena= thd->stmt_arena; + thd->stmt_arena= &i_s_arena; + thd->set_n_backup_active_arena(&i_s_arena, &backup_arena); + i_s_arena_active= true; + + share= tdc_acquire_share(thd, &table_list, GTS_TABLE | GTS_VIEW); + if (!share) + { + if (thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE || + thd->get_stmt_da()->sql_errno() == ER_WRONG_OBJECT || + thd->get_stmt_da()->sql_errno() == ER_NOT_SEQUENCE) + { + res= 0; + } + else + { + char buf[NAME_CHAR_LEN + 1]; + get_table_engine_for_i_s(thd, buf, &table_list, db_name, table_name); + + res= schema_table->process_table(thd, &table_list, table, + true, db_name, table_name); + } + goto end; + } + + if (share->is_view) + { + if (schema_table->i_s_requested_object & OPEN_TABLE_ONLY) + { + /* skip view processing */ + res= 0; + goto end_share; + } + else if (schema_table->i_s_requested_object & OPEN_VIEW_FULL) + { + /* + tell get_all_tables() to fall back to + open_normal_and_derived_tables() + */ + res= 1; + goto end_share; + } + + if (mysql_make_view(thd, share, &table_list, true)) + goto end_share; + table_list.view= (LEX*) share->is_view; + res= schema_table->process_table(thd, &table_list, table, + res, db_name, table_name); + goto end_share; + } + + if (!open_table_from_share(thd, share, table_name, 0, + (EXTRA_RECORD | OPEN_FRM_FILE_ONLY), + thd->open_options, &tbl, FALSE)) + { + tbl.s= share; + table_list.table= &tbl; + table_list.view= (LEX*) share->is_view; + res= schema_table->process_table(thd, &table_list, table, + res, db_name, table_name); + closefrm(&tbl); + } + + +end_share: + tdc_release_share(share); + +end: + /* + Release metadata lock we might have acquired. + + Without this step metadata locks acquired for each table processed + will be accumulated. In situation when a lot of tables are processed + by I_S query this will result in transaction with too many metadata + locks. As result performance of acquisition of new lock will suffer. + + Of course, the fact that we don't hold metadata lock on tables which + were processed till the end of I_S query makes execution less isolated + from concurrent DDL. Consequently one might get 'dirty' results from + such a query. But we have never promised serializability of I_S queries + anyway. + + We don't have any tables open since we took backup, so rolling back to + savepoint is safe. + */ + DBUG_ASSERT(thd->open_tables == NULL); + + thd->mdl_context.rollback_to_savepoint(open_tables_state_backup->mdl_system_tables_svp); + + if (i_s_arena_active) + { + thd->stmt_arena= old_arena; + thd->restore_active_arena(&i_s_arena, &backup_arena); + i_s_arena.free_items(); + } + + if (!thd->is_fatal_error) + thd->clear_error(); + return res; +} + + +class Warnings_only_error_handler : public Internal_error_handler +{ +public: + bool handle_condition(THD *thd, uint sql_errno, const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, Sql_condition ** cond_hdl) + { + if (sql_errno == ER_TRG_NO_DEFINER || sql_errno == ER_TRG_NO_CREATION_CTX + || sql_errno == ER_PARSE_ERROR) + return true; + + if (*level != Sql_condition::WARN_LEVEL_ERROR) + return false; + + if (likely(!thd->get_stmt_da()->is_error())) + thd->get_stmt_da()->set_error_status(sql_errno, msg, sqlstate, *cond_hdl); + return true; // handled! + } +}; + +/** + @brief Fill I_S tables whose data are retrieved + from frm files and storage engine + + @details The information schema tables are internally represented as + temporary tables that are filled at query execution time. + Those I_S tables whose data are retrieved + from frm files and storage engine are filled by the function + get_all_tables(). + + @note This function assumes optimize_for_get_all_tables() has been + run for the table and produced a "read plan" in + tables->is_table_read_plan. + + @param[in] thd thread handler + @param[in] tables I_S table + @param[in] cond 'WHERE' condition + + @return Operation status + @retval 0 success + @retval 1 error +*/ + +int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("get_all_tables"); + LEX *lex= thd->lex; + TABLE *table= tables->table; + TABLE_LIST table_acl_check; + SELECT_LEX *lsel= tables->schema_select_lex; + ST_SCHEMA_TABLE *schema_table= tables->schema_table; + IS_table_read_plan *plan= tables->is_table_read_plan; + enum enum_schema_tables schema_table_idx; + Dynamic_array db_names(PSI_INSTRUMENT_MEM); + Item *partial_cond= plan->partial_cond; + int error= 1; + Open_tables_backup open_tables_state_backup; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Security_context *sctx= thd->security_ctx; +#endif + uint table_open_method= tables->table_open_method; + bool can_deadlock; + MEM_ROOT tmp_mem_root; + /* + We're going to open FRM files for tables. + In case of VIEWs that contain stored function calls, + these stored functions will be parsed and put to the SP cache. + + Suppose we have a view containing a stored function call: + CREATE VIEW v1 AS SELECT f1() AS c1; + and now we're running: + SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=f1(); + If a parallel thread invalidates the cache, + e.g. by creating or dropping some stored routine, + the SELECT query will re-parse f1() when processing "v1" + and replace the outdated cached version of f1() to a new one. + But the old version of f1() is referenced from the m_sp member + of the Item_func_sp instances used in the WHERE condition. + We cannot destroy it. To avoid such clashes, let's remember + all old routines into a temporary SP cache collection + and process tables with a new empty temporary SP cache collection. + Then restore to the old SP cache collection at the end. + */ + Sp_caches old_sp_caches; + + old_sp_caches.sp_caches_swap(*thd); + + bzero(&tmp_mem_root, sizeof(tmp_mem_root)); + + /* + In cases when SELECT from I_S table being filled by this call is + part of statement which also uses other tables or is being executed + under LOCK TABLES or is part of transaction which also uses other + tables waiting for metadata locks which happens below might result + in deadlocks. + To avoid them we don't wait if conflicting metadata lock is + encountered and skip table with emitting an appropriate warning. + */ + can_deadlock= thd->mdl_context.has_locks(); + + /* + We should not introduce deadlocks even if we already have some + tables open and locked, since we won't lock tables which we will + open and will ignore pending exclusive metadata locks for these + tables by using high-priority requests for shared metadata locks. + */ + thd->reset_n_backup_open_tables_state(&open_tables_state_backup); + + schema_table_idx= get_schema_table_idx(schema_table); + /* + this branch processes SHOW FIELDS, SHOW INDEXES commands. + see sql_parse.cc, prepare_schema_table() function where + this values are initialized + */ + if (lsel && lsel->table_list.first) + { + error= fill_schema_table_by_open(thd, thd->mem_root, TRUE, + table, schema_table, + &lsel->table_list.first->db, + &lsel->table_list.first->table_name, + &open_tables_state_backup, + can_deadlock); + goto err; + } + + if (plan->no_rows) + { + error= 0; + goto err; + } + + if (lex->describe) + { + /* EXPLAIN SELECT */ + error= 0; + goto err; + } + + bzero((char*) &table_acl_check, sizeof(table_acl_check)); + + if (make_db_list(thd, &db_names, &plan->lookup_field_vals)) + goto err; + + /* Use tmp_mem_root to allocate data for opened tables */ + init_alloc_root(PSI_INSTRUMENT_ME, &tmp_mem_root, SHOW_ALLOC_BLOCK_SIZE, + SHOW_ALLOC_BLOCK_SIZE, MY_THREAD_SPECIFIC); + + for (size_t i=0; i < db_names.elements(); i++) + { + LEX_CSTRING *db_name= db_names.at(i); + DBUG_ASSERT(db_name->length <= NAME_LEN); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (!(check_access(thd, SELECT_ACL, db_name->str, + &thd->col_access, NULL, 0, 1) || + (!thd->col_access && check_grant_db(thd, db_name->str))) || + sctx->master_access & (DB_ACLS | SHOW_DB_ACL) || + acl_get_all3(sctx, db_name->str, 0)) +#endif + { + Dynamic_array table_names(PSI_INSTRUMENT_MEM); + int res= make_table_name_list(thd, &table_names, lex, + &plan->lookup_field_vals, db_name); + if (unlikely(res == 2)) /* Not fatal error, continue */ + continue; + if (unlikely(res)) + goto err; + + for (size_t i=0; i < table_names.elements(); i++) + { + LEX_CSTRING *table_name= table_names.at(i); + DBUG_ASSERT(table_name->length <= NAME_LEN); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (!(thd->col_access & TABLE_ACLS)) + { + table_acl_check.db= *db_name; + table_acl_check.table_name= *table_name; + table_acl_check.grant.privilege= thd->col_access; + if (check_grant(thd, TABLE_ACLS, &table_acl_check, TRUE, 1, TRUE)) + continue; + } +#endif + restore_record(table, s->default_values); + table->field[schema_table->idx_field1]-> + store(db_name->str, db_name->length, system_charset_info); + table->field[schema_table->idx_field2]-> + store(table_name->str, table_name->length, system_charset_info); + + if (!partial_cond || partial_cond->val_int()) + { + /* + If table is I_S.tables and open_table_method is 0 (eg SKIP_OPEN) + we can skip table opening and we don't have lookup value for + table name or lookup value is wild string(table name list is + already created by make_table_name_list() function). + */ + if (!table_open_method && schema_table_idx == SCH_TABLES && + (!plan->lookup_field_vals.table_value.length || + plan->lookup_field_vals.wild_table_value)) + { + table->field[0]->store(STRING_WITH_LEN("def"), system_charset_info); + if (schema_table_store_record(thd, table)) + goto err; /* Out of space in temporary table */ + continue; + } + + /* SHOW TABLE NAMES command */ + if (schema_table_idx == SCH_TABLE_NAMES) + { + if (fill_schema_table_names(thd, tables, db_name, table_name)) + continue; + } + else if (schema_table_idx == SCH_TRIGGERS && + db_name == &INFORMATION_SCHEMA_NAME) + { + continue; + } + else + { + if (!(table_open_method & ~OPEN_FRM_ONLY) && + db_name != &INFORMATION_SCHEMA_NAME) + { + if (!fill_schema_table_from_frm(thd, &tmp_mem_root, + table, schema_table, + db_name, table_name, + &open_tables_state_backup, + can_deadlock)) + continue; + } + + if (thd->killed == ABORT_QUERY) + { + error= 0; + goto err; + } + if (thd->is_fatal_error) + goto err; + + DEBUG_SYNC(thd, "before_open_in_get_all_tables"); + if (fill_schema_table_by_open(thd, &tmp_mem_root, FALSE, + table, schema_table, + db_name, table_name, + &open_tables_state_backup, + can_deadlock)) + goto err; + free_root(&tmp_mem_root, MY_MARK_BLOCKS_FREE); + } + } + if (thd->killed == ABORT_QUERY) + { + error= 0; + goto err; + } + } + } + } + + error= 0; +err: + thd->restore_backup_open_tables_state(&open_tables_state_backup); + free_root(&tmp_mem_root, 0); + + /* + Now restore to the saved SP cache collection + and clear the temporary SP cache collection. + */ + old_sp_caches.sp_caches_swap(*thd); + old_sp_caches.sp_caches_clear(); + + DBUG_RETURN(error); +} + + +bool store_schema_schemata(THD* thd, TABLE *table, LEX_CSTRING *db_name, + CHARSET_INFO *cs, LEX_CSTRING *schema_comment= NULL) +{ + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), system_charset_info); + table->field[1]->store(db_name, system_charset_info); + table->field[2]->store(&cs->cs_name, system_charset_info); + table->field[3]->store(&cs->coll_name, system_charset_info); + if (schema_comment) + table->field[5]->store(schema_comment->str, schema_comment->length, + system_charset_info); + return schema_table_store_record(thd, table); +} + + +/* + Check if the specified database exists on disk. + + @param dbname - the database name + @retval true - on error, the database directory does not exists + @retval false - on success, the database directory exists +*/ +static bool verify_database_directory_exists(const LEX_CSTRING &dbname) +{ + DBUG_ENTER("verify_database_directory_exists"); + char path[FN_REFLEN + 16]; + uint path_len; + MY_STAT stat_info; + if (!dbname.str[0]) + DBUG_RETURN(true); // Empty database name: does not exist. + path_len= build_table_filename(path, sizeof(path) - 1, dbname.str, "", "", 0); + path[path_len - 1]= 0; + if (!mysql_file_stat(key_file_misc, path, &stat_info, MYF(0))) + DBUG_RETURN(true); // The database directory was not found: does not exist. + DBUG_RETURN(false); // The database directory was found. +} + + +int fill_schema_schemata(THD *thd, TABLE_LIST *tables, COND *cond) +{ + /* + TODO: fill_schema_shemata() is called when new client is connected. + Returning error status in this case leads to client hangup. + */ + + LOOKUP_FIELD_VALUES lookup_field_vals; + Dynamic_array db_names(PSI_INSTRUMENT_MEM); + Schema_specification_st create; + TABLE *table= tables->table; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Security_context *sctx= thd->security_ctx; +#endif + DBUG_ENTER("fill_schema_shemata"); + + if (get_lookup_field_values(thd, cond, true, tables, &lookup_field_vals)) + DBUG_RETURN(0); + DBUG_PRINT("INDEX VALUES",("db_name: %s table_name: %s", + lookup_field_vals.db_value.str, + lookup_field_vals.table_value.str)); + if (make_db_list(thd, &db_names, &lookup_field_vals)) + DBUG_RETURN(1); + + /* + If we have lookup db value we should check that the database exists + */ + if(lookup_field_vals.db_value.str && !lookup_field_vals.wild_db_value && + (!db_names.elements() /* The database name was too long */|| + (db_names.at(0) != &INFORMATION_SCHEMA_NAME && + verify_database_directory_exists(lookup_field_vals.db_value)))) + DBUG_RETURN(0); + + for (size_t i=0; i < db_names.elements(); i++) + { + LEX_CSTRING *db_name= db_names.at(i); + DBUG_ASSERT(db_name->length <= NAME_LEN); + if (db_name == &INFORMATION_SCHEMA_NAME) + { + if (store_schema_schemata(thd, table, db_name, + system_charset_info)) + DBUG_RETURN(1); + continue; + } +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (sctx->master_access & (DB_ACLS | SHOW_DB_ACL) || + acl_get_all3(sctx, db_name->str, false) || + !check_grant_db(thd, db_name->str)) +#endif + { + load_db_opt_by_name(thd, db_name->str, &create); + if (store_schema_schemata(thd, table, db_name, + create.default_table_charset, + create.schema_comment)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +static int get_schema_tables_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + const char *tmp_buff; + MYSQL_TIME time; + int info_error= 0; + CHARSET_INFO *cs= system_charset_info; + DBUG_ENTER("get_schema_tables_record"); + + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(table_name->str, table_name->length, cs); + + if (res) + { + /* There was a table open error, so set the table type and return */ + if (tables->view) + table->field[3]->store(STRING_WITH_LEN("VIEW"), cs); + else if (tables->schema_table) + table->field[3]->store(STRING_WITH_LEN("SYSTEM VIEW"), cs); + else + table->field[3]->store(STRING_WITH_LEN("BASE TABLE"), cs); + + if (tables->option) + { + table->field[4]->store(tables->option, strlen(tables->option), cs); + table->field[4]->set_notnull(); + } + goto err; + } + + if (tables->view) + { + table->field[3]->store(STRING_WITH_LEN("VIEW"), cs); + table->field[20]->store(STRING_WITH_LEN("VIEW"), cs); + } + else + { + char option_buff[512]; + String str(option_buff,sizeof(option_buff), system_charset_info); + TABLE *show_table= tables->table; + TABLE_SHARE *share= show_table->s; + handler *file= show_table->db_stat ? show_table->file : 0; + handlerton *tmp_db_type= share->db_type(); +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool is_partitioned= FALSE; +#endif + + if (share->tmp_table == SYSTEM_TMP_TABLE) + table->field[3]->store(STRING_WITH_LEN("SYSTEM VIEW"), cs); + else if (share->table_type == TABLE_TYPE_SEQUENCE) + table->field[3]->store(STRING_WITH_LEN("SEQUENCE"), cs); + else + { + DBUG_ASSERT(share->tmp_table == NO_TMP_TABLE); + if (share->versioned) + table->field[3]->store(STRING_WITH_LEN("SYSTEM VERSIONED"), cs); + else + table->field[3]->store(STRING_WITH_LEN("BASE TABLE"), cs); + } + + for (uint i= 4; i < table->s->fields; i++) + { + if (i == 7 || (i > 12 && i < 17) || i == 18) + continue; + table->field[i]->set_notnull(); + } + + /* Collect table info from the table share */ + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (share->db_type() == partition_hton && + share->partition_info_str_len) + { + tmp_db_type= plugin_hton(share->default_part_plugin); + is_partitioned= TRUE; + } +#endif + + tmp_buff= (char *) ha_resolve_storage_engine_name(tmp_db_type); + table->field[4]->store(tmp_buff, strlen(tmp_buff), cs); + table->field[5]->store((longlong) share->frm_version, TRUE); + + str.length(0); + + if (share->min_rows) + { + str.qs_append(STRING_WITH_LEN(" min_rows=")); + str.qs_append(share->min_rows); + } + + if (share->max_rows) + { + str.qs_append(STRING_WITH_LEN(" max_rows=")); + str.qs_append(share->max_rows); + } + + if (share->avg_row_length) + { + str.qs_append(STRING_WITH_LEN(" avg_row_length=")); + str.qs_append(share->avg_row_length); + } + + if (share->db_create_options & HA_OPTION_PACK_KEYS) + str.qs_append(STRING_WITH_LEN(" pack_keys=1")); + + if (share->db_create_options & HA_OPTION_NO_PACK_KEYS) + str.qs_append(STRING_WITH_LEN(" pack_keys=0")); + + if (share->db_create_options & HA_OPTION_STATS_PERSISTENT) + str.qs_append(STRING_WITH_LEN(" stats_persistent=1")); + + if (share->db_create_options & HA_OPTION_NO_STATS_PERSISTENT) + str.qs_append(STRING_WITH_LEN(" stats_persistent=0")); + + if (share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON) + str.qs_append(STRING_WITH_LEN(" stats_auto_recalc=1")); + else if (share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF) + str.qs_append(STRING_WITH_LEN(" stats_auto_recalc=0")); + + if (share->stats_sample_pages != 0) + { + str.qs_append(STRING_WITH_LEN(" stats_sample_pages=")); + str.qs_append(share->stats_sample_pages); + } + + /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compatibility */ + if (share->db_create_options & HA_OPTION_CHECKSUM) + str.qs_append(STRING_WITH_LEN(" checksum=1")); + + if (share->page_checksum != HA_CHOICE_UNDEF) + { + str.qs_append(STRING_WITH_LEN(" page_checksum=")); + str.qs_append(&ha_choice_values[(uint) share->page_checksum]); + } + + if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE) + str.qs_append(STRING_WITH_LEN(" delay_key_write=1")); + + if (share->row_type != ROW_TYPE_DEFAULT) + { + str.qs_append(STRING_WITH_LEN(" row_format=")); + str.qs_append(&ha_row_type[(uint) share->row_type]); + } + + if (share->key_block_size) + { + str.qs_append(STRING_WITH_LEN(" key_block_size=")); + str.qs_append(share->key_block_size); + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (is_partitioned) + str.qs_append(STRING_WITH_LEN(" partitioned")); +#endif + + /* + Write transactional=0|1 for tables where the user has specified the + option or for tables that supports both transactional and non + transactional tables + */ + if (share->transactional != HA_CHOICE_UNDEF || + (share->db_type() && + share->db_type()->flags & HTON_TRANSACTIONAL_AND_NON_TRANSACTIONAL && + file)) + { + uint choice= share->transactional; + if (choice == HA_CHOICE_UNDEF) + choice= ((file->ha_table_flags() & + (HA_NO_TRANSACTIONS | HA_CRASH_SAFE)) == + HA_NO_TRANSACTIONS ? + HA_CHOICE_NO : HA_CHOICE_YES); + + str.qs_append(STRING_WITH_LEN(" transactional=")); + str.qs_append(&ha_choice_values[choice]); + } + append_create_options(thd, &str, share->option_list, false, 0); + + if (file) + { + HA_CREATE_INFO create_info; + create_info.init(); + file->update_create_info(&create_info); + append_directory(thd, &str, &DATA_clex_str, create_info.data_file_name); + append_directory(thd, &str, &INDEX_clex_str, create_info.index_file_name); + } + + if (str.length()) + table->field[19]->store(str.ptr()+1, str.length()-1, cs); + + LEX_CSTRING tmp_str; + if (share->table_charset) + tmp_str= share->table_charset->coll_name; + else + tmp_str= { STRING_WITH_LEN("default") }; + table->field[17]->store(&tmp_str, cs); + + if (share->comment.str) + table->field[20]->store(&share->comment, cs); + + /* Collect table info from the storage engine */ + + if (file) + { + /* If info() fails, then there's nothing else to do */ + if (unlikely((info_error= file->info(HA_STATUS_VARIABLE | + HA_STATUS_TIME | + HA_STATUS_VARIABLE_EXTRA | + HA_STATUS_AUTO)) != 0)) + { + file->print_error(info_error, MYF(0)); + goto err; + } + + enum row_type row_type = file->get_row_type(); + switch (row_type) { + case ROW_TYPE_NOT_USED: + case ROW_TYPE_DEFAULT: + tmp_buff= ((share->db_options_in_use & + HA_OPTION_COMPRESS_RECORD) ? "Compressed" : + (share->db_options_in_use & HA_OPTION_PACK_RECORD) ? + "Dynamic" : "Fixed"); + break; + case ROW_TYPE_FIXED: + tmp_buff= "Fixed"; + break; + case ROW_TYPE_DYNAMIC: + tmp_buff= "Dynamic"; + break; + case ROW_TYPE_COMPRESSED: + tmp_buff= "Compressed"; + break; + case ROW_TYPE_REDUNDANT: + tmp_buff= "Redundant"; + break; + case ROW_TYPE_COMPACT: + tmp_buff= "Compact"; + break; + case ROW_TYPE_PAGE: + tmp_buff= "Page"; + break; + } + + table->field[6]->store(tmp_buff, strlen(tmp_buff), cs); + + if (!tables->schema_table) + { + table->field[7]->store((longlong) file->stats.records, TRUE); + table->field[7]->set_notnull(); + } + table->field[8]->store((longlong) file->stats.mean_rec_length, TRUE); + table->field[9]->store((longlong) file->stats.data_file_length, TRUE); + if (file->stats.max_data_file_length) + { + table->field[10]->store((longlong) file->stats.max_data_file_length, + TRUE); + table->field[10]->set_notnull(); + } + table->field[11]->store((longlong) file->stats.index_file_length, TRUE); + if (file->stats.max_index_file_length) + { + table->field[21]->store((longlong) file->stats.max_index_file_length, + TRUE); + table->field[21]->set_notnull(); + } + table->field[12]->store((longlong) file->stats.delete_length, TRUE); + if (show_table->found_next_number_field) + { + table->field[13]->store((longlong) file->stats.auto_increment_value, + TRUE); + table->field[13]->set_notnull(); + } + if (file->stats.create_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + (my_time_t) file->stats.create_time); + table->field[14]->store_time(&time); + table->field[14]->set_notnull(); + } + if (file->stats.update_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + (my_time_t) file->stats.update_time); + table->field[15]->store_time(&time); + table->field[15]->set_notnull(); + } + if (file->stats.check_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + (my_time_t) file->stats.check_time); + table->field[16]->store_time(&time); + table->field[16]->set_notnull(); + } + if ((file->ha_table_flags() & + (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM)) && + !file->stats.checksum_null) + { + table->field[18]->store((longlong) file->stats.checksum, TRUE); + table->field[18]->set_notnull(); + } + } + /* If table is a temporary table */ + LEX_CSTRING tmp= { STRING_WITH_LEN("N") }; + if (show_table->s->tmp_table != NO_TMP_TABLE) + tmp.str= "Y"; + table->field[22]->store(tmp.str, tmp.length, cs); + } + +err: + if (unlikely(res || info_error)) + { + /* + If an error was encountered, push a warning, set the TABLE COMMENT + column with the error text, and clear the error so that the operation + can continue. + */ + const char *error= thd->get_stmt_da()->message(); + table->field[20]->store(error, strlen(error), cs); + + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), error); + thd->clear_error(); + } + + DBUG_RETURN(schema_table_store_record(thd, table)); +} + + +/** + @brief Store field characteristics into appropriate I_S table columns + + @param[in] table I_S table + @param[in] field processed field + @param[in] cs I_S table charset + @param[in] offset offset from beginning of table + to DATE_TYPE column in I_S table + + @return void +*/ + +static void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs, + uint offset) +{ + const char *tmp_buff; + char column_type_buff[MAX_FIELD_WIDTH]; + String column_type(column_type_buff, sizeof(column_type_buff), cs); + + field->sql_type(column_type); + /* DTD_IDENTIFIER column */ + table->field[offset + 8]->store(column_type.ptr(), column_type.length(), cs); + table->field[offset + 8]->set_notnull(); + /* + DATA_TYPE column: + MySQL column type has the following format: + base_type [(dimension)] [unsigned] [zerofill]. + For DATA_TYPE column we extract only base type. + */ + tmp_buff= strchr(column_type.c_ptr_safe(), '('); + if (!tmp_buff) + /* + if there is no dimention part then check the presence of + [unsigned] [zerofill] attributes and cut them of if exist. + */ + tmp_buff= strchr(column_type.c_ptr_safe(), ' '); + table->field[offset]->store(column_type.ptr(), + (tmp_buff ? (uint)(tmp_buff - column_type.ptr()) : + column_type.length()), cs); + + Information_schema_character_attributes cattr= + field->information_schema_character_attributes(); + if (cattr.has_char_length()) + { + /* CHARACTER_MAXIMUM_LENGTH column*/ + table->field[offset + 1]->store((longlong) cattr.char_length(), true); + table->field[offset + 1]->set_notnull(); + } + if (cattr.has_octet_length()) + { + /* CHARACTER_OCTET_LENGTH column */ + table->field[offset + 2]->store((longlong) cattr.octet_length(), true); + table->field[offset + 2]->set_notnull(); + } + + /* + Calculate field_length and decimals. + They are set to -1 if they should not be set (we should return NULL) + */ + + Information_schema_numeric_attributes num= + field->information_schema_numeric_attributes(); + + switch (field->type()) { + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_DATETIME: + /* DATETIME_PRECISION column */ + table->field[offset + 5]->store((longlong) field->decimals(), TRUE); + table->field[offset + 5]->set_notnull(); + break; + default: + break; + } + + /* NUMERIC_PRECISION column */ + if (num.has_precision()) + { + table->field[offset + 3]->store((longlong) num.precision(), true); + table->field[offset + 3]->set_notnull(); + + /* NUMERIC_SCALE column */ + if (num.has_scale()) + { + table->field[offset + 4]->store((longlong) num.scale(), true); + table->field[offset + 4]->set_notnull(); + } + } + if (field->has_charset()) + { + /* CHARACTER_SET_NAME column*/ + table->field[offset + 6]->store(&field->charset()->cs_name, cs); + table->field[offset + 6]->set_notnull(); + /* COLLATION_NAME column */ + table->field[offset + 7]->store(&field->charset()->coll_name, cs); + table->field[offset + 7]->set_notnull(); + } +} + + +/* + Print DATA_TYPE independently from sql_mode. + It's only a brief human-readable description, without attributes, + so it should not be used by client programs to generate SQL scripts. +*/ +static bool print_anchor_data_type(const Spvar_definition *def, + String *data_type) +{ + if (def->column_type_ref()) + return data_type->append(STRING_WITH_LEN("TYPE OF")); + if (def->is_table_rowtype_ref()) + return data_type->append(STRING_WITH_LEN("ROW TYPE OF")); + /* + "ROW TYPE OF cursor" is not possible yet. + May become possible when we add package-wide cursors. + */ + DBUG_ASSERT(0); + return false; +} + + +/* + DTD_IDENTIFIER is the full data type description with attributes. + It can be used by client programs to generate SQL scripts. + Let's print it according to the current sql_mode. + It will make output in line with the value in mysql.proc.param_list, + so both I_S.XXX.DTD_IDENTIFIER and mysql.proc.param_list use the same notation: + default or Oracle, according to the sql_mode at the SP creation time. + The caller must make sure to set thd->variables.sql_mode to the routine sql_mode. +*/ +static bool print_anchor_dtd_identifier(THD *thd, const Spvar_definition *def, + String *dtd_identifier) +{ + if (def->column_type_ref()) + return (thd->variables.sql_mode & MODE_ORACLE) ? + def->column_type_ref()->append_to(thd, dtd_identifier) || + dtd_identifier->append(STRING_WITH_LEN("%TYPE")) : + dtd_identifier->append(STRING_WITH_LEN("TYPE OF ")) || + def->column_type_ref()->append_to(thd, dtd_identifier); + if (def->is_table_rowtype_ref()) + return (thd->variables.sql_mode & MODE_ORACLE) ? + def->table_rowtype_ref()->append_to(thd, dtd_identifier) || + dtd_identifier->append(STRING_WITH_LEN("%ROWTYPE")) : + dtd_identifier->append(STRING_WITH_LEN("ROW TYPE OF ")) || + def->table_rowtype_ref()->append_to(thd, dtd_identifier); + DBUG_ASSERT(0); // See comments in print_anchor_data_type() + return false; +} + + +/* + Set columns DATA_TYPE and DTD_IDENTIFIER from an SP variable definition +*/ +static void store_variable_type(THD *thd, const sp_variable *spvar, + TABLE *tmptbl, + TABLE_SHARE *tmpshare, + CHARSET_INFO *cs, + TABLE *table, uint offset) +{ + if (spvar->field_def.is_explicit_data_type()) + { + if (spvar->field_def.is_row()) + { + // Explicit ROW + table->field[offset]->store(STRING_WITH_LEN("ROW"), cs); + table->field[offset]->set_notnull(); + // Perhaps eventually we need to print all ROW elements in DTD_IDENTIFIER + table->field[offset + 8]->store(STRING_WITH_LEN("ROW"), cs); + table->field[offset + 8]->set_notnull(); + } + else + { + // Explicit scalar data type + Field *field= spvar->field_def.make_field(tmpshare, thd->mem_root, + &spvar->name); + field->table= tmptbl; + tmptbl->in_use= thd; + store_column_type(table, field, cs, offset); + } + } + else + { + StringBuffer<128> data_type(cs), dtd_identifier(cs); + + if (print_anchor_data_type(&spvar->field_def, &data_type)) + { + table->field[offset]->store(STRING_WITH_LEN("ERROR"), cs); // EOM? + table->field[offset]->set_notnull(); + } + else + { + DBUG_ASSERT(data_type.length()); + table->field[offset]->store(data_type.ptr(), data_type.length(), cs); + table->field[offset]->set_notnull(); + } + + if (print_anchor_dtd_identifier(thd, &spvar->field_def, &dtd_identifier)) + { + table->field[offset + 8]->store(STRING_WITH_LEN("ERROR"), cs); // EOM? + table->field[offset + 8]->set_notnull(); + } + else + { + DBUG_ASSERT(dtd_identifier.length()); + table->field[offset + 8]->store(dtd_identifier.ptr(), + dtd_identifier.length(), cs); + table->field[offset + 8]->set_notnull(); + } + } +} + + +static int get_schema_column_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + LEX *lex= thd->lex; + const char *wild= lex->wild ? lex->wild->ptr() : NullS; + CHARSET_INFO *cs= system_charset_info; + TABLE *show_table; + Field **ptr, *field; + int count; + bool quoted_defaults= lex->sql_command != SQLCOM_SHOW_FIELDS; + DBUG_ENTER("get_schema_column_record"); + + if (res) + { + if (lex->sql_command != SQLCOM_SHOW_FIELDS) + { + /* + I.e. we are in SELECT FROM INFORMATION_SCHEMA.COLUMS + rather than in SHOW COLUMNS + */ + if (thd->is_error()) + convert_error_to_warning(thd); + res= 0; + } + DBUG_RETURN(res); + } + show_table= tables->table; + count= 0; + ptr= show_table->field; + show_table->use_all_columns(); // Required for default + restore_record(show_table, s->default_values); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + check_access(thd, SELECT_ACL, db_name->str, + &tables->grant.privilege, 0, 0, MY_TEST(tables->schema_table)); + if (is_temporary_table(tables)) + { + tables->grant.privilege|= TMP_TABLE_ACLS; + } +#endif + + for (; (field= *ptr) ; ptr++) + { + if(field->invisible > INVISIBLE_USER) + continue; + uchar *pos; + char tmp[MAX_FIELD_WIDTH]; + String type(tmp,sizeof(tmp), system_charset_info); + + DEBUG_SYNC(thd, "get_schema_column"); + + if (wild && wild[0] && + wild_case_compare(system_charset_info, field->field_name.str, wild)) + continue; + + count++; + /* Get default row, with all NULL fields set to NULL */ + restore_record(table, s->default_values); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + ulonglong col_access= + get_column_grant(thd, &tables->grant, db_name->str, table_name->str, + field->field_name.str) & COL_ACLS; + + if (!col_access && !tables->schema_table) + continue; + + char *end= tmp; + for (uint bitnr=0; col_access ; col_access>>=1,bitnr++) + { + if (col_access & 1) + { + *end++=','; + end=strmov(end,grant_types.type_names[bitnr]); + } + } + table->field[18]->store(tmp+1,end == tmp ? 0 : (uint) (end-tmp-1), cs); + +#endif + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(table_name->str, table_name->length, cs); + table->field[3]->store(field->field_name.str, field->field_name.length, + cs); + table->field[4]->store((longlong) count, TRUE); + + if (get_field_default_value(thd, field, &type, quoted_defaults)) + { + table->field[5]->store(type.ptr(), type.length(), cs); + table->field[5]->set_notnull(); + } + pos=(uchar*) ((field->flags & NOT_NULL_FLAG) ? "NO" : "YES"); + table->field[6]->store((const char*) pos, + strlen((const char*) pos), cs); + store_column_type(table, field, cs, 7); + pos=(uchar*) ((field->flags & PRI_KEY_FLAG) ? "PRI" : + (field->flags & UNIQUE_KEY_FLAG) ? "UNI" : + (field->flags & MULTIPLE_KEY_FLAG) ? "MUL":""); + table->field[16]->store((const char*) pos, + strlen((const char*) pos), cs); + + StringBuffer<256> buf; + if (field->unireg_check == Field::NEXT_NUMBER) + buf.set(STRING_WITH_LEN("auto_increment"),cs); + if (print_on_update_clause(field, &type, true)) + buf.set(type.ptr(), type.length(),cs); + if (field->vcol_info) + { + String gen_s(tmp,sizeof(tmp), system_charset_info); + gen_s.length(0); + field->vcol_info->print(&gen_s); + table->field[21]->store(gen_s.ptr(), gen_s.length(), cs); + table->field[21]->set_notnull(); + table->field[20]->store(STRING_WITH_LEN("ALWAYS"), cs); + + if (field->vcol_info->stored_in_db) + buf.set(STRING_WITH_LEN("STORED GENERATED"), cs); + else + buf.set(STRING_WITH_LEN("VIRTUAL GENERATED"), cs); + } + else if (field->flags & VERS_SYSTEM_FIELD) + { + if (field->flags & VERS_ROW_START) + { + table->field[21]->store(STRING_WITH_LEN("ROW START"), cs); + buf.set(STRING_WITH_LEN("STORED GENERATED"), cs); + } + else + { + table->field[21]->store(STRING_WITH_LEN("ROW END"), cs); + buf.set(STRING_WITH_LEN("STORED GENERATED"), cs); + } + table->field[21]->set_notnull(); + table->field[20]->store(STRING_WITH_LEN("ALWAYS"), cs); + } + else + table->field[20]->store(STRING_WITH_LEN("NEVER"), cs); + /*Invisible can coexist with auto_increment and virtual */ + if (field->invisible == INVISIBLE_USER) + { + if (buf.length()) + buf.append(STRING_WITH_LEN(", ")); + buf.append(STRING_WITH_LEN("INVISIBLE"),cs); + } + if (field->vers_update_unversioned()) + { + if (buf.length()) + buf.append(STRING_WITH_LEN(", ")); + buf.append(STRING_WITH_LEN("WITHOUT SYSTEM VERSIONING"), cs); + } + table->field[17]->store(buf.ptr(), buf.length(), cs); + table->field[19]->store(field->comment.str, field->comment.length, cs); + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond) +{ + CHARSET_INFO **cs; + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + TABLE *table= tables->table; + CHARSET_INFO *scs= system_charset_info; + + for (cs= all_charsets ; + cs < all_charsets + array_elements(all_charsets) ; + cs++) + { + CHARSET_INFO *tmp_cs= cs[0]; + if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) && + (tmp_cs->state & MY_CS_AVAILABLE) && + !(tmp_cs->state & MY_CS_HIDDEN) && + !(wild && wild[0] && + wild_case_compare(scs, tmp_cs->cs_name.str,wild))) + { + const char *comment; + restore_record(table, s->default_values); + table->field[0]->store(&tmp_cs->cs_name, scs); + table->field[1]->store(&tmp_cs->coll_name, scs); + comment= tmp_cs->comment ? tmp_cs->comment : ""; + table->field[2]->store(comment, strlen(comment), scs); + table->field[3]->store((longlong) tmp_cs->mbmaxlen, TRUE); + if (schema_table_store_record(thd, table)) + return 1; + } + } + return 0; +} + + +static my_bool iter_schema_engines(THD *thd, plugin_ref plugin, + void *ptable) +{ + TABLE *table= (TABLE *) ptable; + handlerton *hton= plugin_hton(plugin); + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + CHARSET_INFO *scs= system_charset_info; + handlerton *default_type= ha_default_handlerton(thd); + DBUG_ENTER("iter_schema_engines"); + + + /* Disabled plugins */ + if (plugin_state(plugin) != PLUGIN_IS_READY) + { + + struct st_maria_plugin *plug= plugin_decl(plugin); + if (!(wild && wild[0] && + wild_case_compare(scs, plug->name,wild))) + { + restore_record(table, s->default_values); + table->field[0]->store(plug->name, strlen(plug->name), scs); + table->field[1]->store(STRING_WITH_LEN("NO"), scs); + table->field[2]->store(plug->descr, strlen(plug->descr), scs); + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); + } + + if (!(hton->flags & HTON_HIDDEN)) + { + LEX_CSTRING *name= plugin_name(plugin); + if (!(wild && wild[0] && + wild_case_compare(scs, name->str,wild))) + { + LEX_CSTRING yesno[2]= {{ STRING_WITH_LEN("NO") }, + { STRING_WITH_LEN("YES") }}; + LEX_CSTRING *tmp; + const char *option_name= default_type != hton ? yesno[1].str + : "DEFAULT"; + restore_record(table, s->default_values); + + table->field[0]->store(name->str, name->length, scs); + table->field[1]->store(option_name, strlen(option_name), scs); + table->field[2]->store(plugin_decl(plugin)->descr, + strlen(plugin_decl(plugin)->descr), scs); + tmp= &yesno[MY_TEST(hton->commit && !(hton->flags & HTON_NO_ROLLBACK))]; + table->field[3]->store(tmp->str, tmp->length, scs); + table->field[3]->set_notnull(); + tmp= &yesno[MY_TEST(hton->prepare)]; + table->field[4]->store(tmp->str, tmp->length, scs); + table->field[4]->set_notnull(); + tmp= &yesno[MY_TEST(hton->savepoint_set)]; + table->field[5]->store(tmp->str, tmp->length, scs); + table->field[5]->set_notnull(); + + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + +int fill_schema_engines(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_schema_engines"); + if (plugin_foreach_with_mask(thd, iter_schema_engines, + MYSQL_STORAGE_ENGINE_PLUGIN, + ~(PLUGIN_IS_FREED | PLUGIN_IS_DYING), + tables->table)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) +{ + CHARSET_INFO **cs; + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + TABLE *table= tables->table; + CHARSET_INFO *scs= system_charset_info; + for (cs= all_charsets ; + cs < all_charsets + array_elements(all_charsets) ; + cs++ ) + { + CHARSET_INFO **cl; + CHARSET_INFO *tmp_cs= cs[0]; + if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || + (tmp_cs->state & MY_CS_HIDDEN) || + !(tmp_cs->state & MY_CS_PRIMARY)) + continue; + for (cl= all_charsets; + cl < all_charsets + array_elements(all_charsets) ; + cl ++) + { + CHARSET_INFO *tmp_cl= cl[0]; + if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || + !my_charset_same(tmp_cs, tmp_cl)) + continue; + if (!(wild && wild[0] && + wild_case_compare(scs, tmp_cl->coll_name.str, wild))) + { + LEX_CSTRING context_collation_name= + tmp_cl->get_collation_name(MY_COLLATION_NAME_MODE_CONTEXT); + LEX_CSTRING full_collation_name= + tmp_cl->get_collation_name(MY_COLLATION_NAME_MODE_FULL); + bool is_context= cmp(context_collation_name, full_collation_name); + /* + Some collations are applicable to multiple character sets. + Display them only once, with the short name (without the + character set prefix). + */ + if (is_context && + cmp(tmp_cl->cs_name, Lex_cstring(STRING_WITH_LEN("utf8mb4")))) + continue; + restore_record(table, s->default_values); + table->field[0]->store(context_collation_name, scs); + if (is_context) + { + table->field[1]->set_null(); // CHARACTER_SET_NAME + table->field[2]->set_null(); // ID + table->field[3]->set_null(); // IS_DEFAULT + } + else + { + table->field[1]->set_notnull(); // CHARACTER_SET_NAME + table->field[1]->store(tmp_cl->cs_name, scs); + table->field[2]->set_notnull(); // ID + table->field[2]->store((longlong) tmp_cl->number, TRUE); + table->field[3]->set_notnull(); // IS_DEFAULT + table->field[3]->store( + Show::Yes_or_empty::value(tmp_cl->default_flag()), scs); + } + table->field[4]->store( + Show::Yes_or_empty::value(tmp_cl->compiled_flag()), scs); + table->field[5]->store((longlong) tmp_cl->strxfrm_multiply, TRUE); + if (schema_table_store_record(thd, table)) + return 1; + } + } + } + return 0; +} + + +int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond) +{ + CHARSET_INFO **cs; + TABLE *table= tables->table; + CHARSET_INFO *scs= system_charset_info; + for (cs= all_charsets ; + cs < all_charsets + array_elements(all_charsets) ; + cs++ ) + { + CHARSET_INFO **cl; + CHARSET_INFO *tmp_cs= cs[0]; + if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || + !(tmp_cs->state & MY_CS_PRIMARY)) + continue; + for (cl= all_charsets; + cl < all_charsets + array_elements(all_charsets) ; + cl ++) + { + CHARSET_INFO *tmp_cl= cl[0]; + if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || + (tmp_cl->state & MY_CS_HIDDEN) || + !my_charset_same(tmp_cs,tmp_cl)) + continue; + restore_record(table, s->default_values); + LEX_CSTRING context_collation_name= + tmp_cl->get_collation_name(MY_COLLATION_NAME_MODE_CONTEXT); + LEX_CSTRING full_collation_name= + tmp_cl->get_collation_name(MY_COLLATION_NAME_MODE_FULL); + table->field[0]->store(context_collation_name, scs); + table->field[1]->store(&tmp_cl->cs_name, scs); + table->field[2]->store(full_collation_name, scs); + table->field[3]->store(tmp_cl->number); + table->field[4]->store( + Show::Yes_or_empty::value(tmp_cl->default_flag()), scs); + if (schema_table_store_record(thd, table)) + return 1; + } + } + return 0; +} + + +static inline void copy_field_as_string(Field *to_field, Field *from_field) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp_str(buff, sizeof(buff), system_charset_info); + from_field->val_str(&tmp_str); + to_field->store(tmp_str.ptr(), tmp_str.length(), system_charset_info); +} + + +/** + @brief When scanning mysql.proc check if we should skip this record or even + stop the scan + + @param name_field_charset mysql.proc.name field charset info + @param lookup values from the WHERE clause which are + used for the index lookup + @param db mysql.proc.db field value of + the current record + @param name mysql.proc.name field value of + the current record + + @return Result + @retval -1 The record is match (do further processing) + @retval 0 Skip this record, it doesn't match. + @retval HA_ERR_END_OF_FILE Stop scanning, no further matches possible +*/ + +int check_proc_record(const CHARSET_INFO *name_field_charset, + const LOOKUP_FIELD_VALUES *lookup, + const LEX_CSTRING &db, + const LEX_CSTRING &name) +{ + if (lookup->db_value.str && cmp(lookup->db_value, db)) + { + /* + We have the name of target database. If we got a non-matching + record, this means we've finished reading matching mysql.proc records + */ + return HA_ERR_END_OF_FILE; + } + + if (lookup->table_value.str) + { + if ((my_ci_strnncoll(name_field_charset, + (const uchar *) lookup->table_value.str, + lookup->table_value.length, + (const uchar *) name.str, name.length, 0))) + { + /* Routine name doesn't match. */ + if (lookup->db_value.str) + { + /* + We're using index lookup. A non-matching record means we've + finished reading matches. + */ + return HA_ERR_END_OF_FILE; + } + else + { + /* The routine name doesn't match, but we're scanning all databases */ + return 0; /* Continue scanning */ + } + } + } + return -1; /* This is a match */ +} + +/** + @brief Store record into I_S.PARAMETERS table + + @param[in] thd thread handler + @param[in] table I_S table + @param[in] proc_table 'mysql.proc' table + @param[in] full_access if 1 user has privileges on the routine + @param[in] sp_user user in 'user@host' format + + @return Operation status + @retval 0 ok + @retval != 0 error / HA_ERR_END_OF_FILE + (if there are no more + matching records) +*/ + +int store_schema_params(THD *thd, TABLE *table, TABLE *proc_table, + LOOKUP_FIELD_VALUES *lookup, bool full_access, + const char *sp_user) +{ + TABLE_SHARE share; + TABLE tbl; + CHARSET_INFO *cs= system_charset_info; + LEX_CSTRING definer, params, returns= empty_clex_str; + LEX_CSTRING db, name; + char path[FN_REFLEN]; + sp_head *sp; + const Sp_handler *sph; + bool free_sp_head; + bool error= 0; + sql_mode_t sql_mode; + int rc; + DBUG_ENTER("store_schema_params"); + + bzero((char*) &tbl, sizeof(TABLE)); + (void) build_table_filename(path, sizeof(path), "", "", "", 0); + init_tmp_table_share(thd, &share, "", 0, "", path); + + proc_table->field[MYSQL_PROC_FIELD_DB]->val_str_nopad(thd->mem_root, &db); + proc_table->field[MYSQL_PROC_FIELD_NAME]->val_str_nopad(thd->mem_root, &name); + + CHARSET_INFO *name_cs= proc_table->field[MYSQL_PROC_FIELD_NAME]->charset(); + + if ((rc= check_proc_record(name_cs, lookup, db, name)) != -1) + DBUG_RETURN(rc); /* either HA_ERR_END_OF_FILE or 0 if name didn't match */ + + proc_table->field[MYSQL_PROC_FIELD_DEFINER]->val_str_nopad(thd->mem_root, &definer); + sql_mode= (sql_mode_t) proc_table->field[MYSQL_PROC_FIELD_SQL_MODE]->val_int(); + sph= Sp_handler::handler_mysql_proc((enum_sp_type) + proc_table->field[MYSQL_PROC_MYSQL_TYPE]-> + val_int()); + if (!sph || sph->type() == SP_TYPE_PACKAGE || + sph->type() == SP_TYPE_PACKAGE_BODY) + DBUG_RETURN(0); + + if (!full_access) + full_access= !strcmp(sp_user, definer.str); + if (!full_access && + check_some_routine_access(thd, db.str, name.str, sph)) + DBUG_RETURN(0); + + proc_table->field[MYSQL_PROC_FIELD_PARAM_LIST]->val_str_nopad(thd->mem_root, + ¶ms); + if (sph->type() == SP_TYPE_FUNCTION) + proc_table->field[MYSQL_PROC_FIELD_RETURNS]->val_str_nopad(thd->mem_root, + &returns); + sp= sph->sp_load_for_information_schema(thd, proc_table, db, name, + params, returns, sql_mode, + &free_sp_head); + if (sp) + { + Field *field; + LEX_CSTRING tmp_string; + Sql_mode_save sql_mode_backup(thd); + thd->variables.sql_mode= sql_mode; + + if (sph->type() == SP_TYPE_FUNCTION) + { + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db, cs); + table->field[2]->store(name, cs); + table->field[3]->store((longlong) 0, TRUE); + proc_table->field[MYSQL_PROC_MYSQL_TYPE]->val_str_nopad(thd->mem_root, + &tmp_string); + table->field[15]->store(tmp_string, cs); + field= sp->m_return_field_def.make_field(&share, thd->mem_root, + &empty_clex_str); + field->table= &tbl; + tbl.in_use= thd; + store_column_type(table, field, cs, 6); + if (schema_table_store_record(thd, table)) + { + free_table_share(&share); + if (free_sp_head) + sp_head::destroy(sp); + DBUG_RETURN(1); + } + } + + sp_pcontext *spcont= sp->get_parse_context(); + uint params= spcont->context_var_count(); + for (uint i= 0 ; i < params ; i++) + { + const char *tmp_buff; + sp_variable *spvar= spcont->find_variable(i); + switch (spvar->mode) { + case sp_variable::MODE_IN: + tmp_buff= "IN"; + break; + case sp_variable::MODE_OUT: + tmp_buff= "OUT"; + break; + case sp_variable::MODE_INOUT: + tmp_buff= "INOUT"; + break; + default: + tmp_buff= ""; + break; + } + + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db, cs); + table->field[2]->store(name, cs); + table->field[3]->store((longlong) i + 1, TRUE); + table->field[4]->store(tmp_buff, strlen(tmp_buff), cs); + table->field[4]->set_notnull(); + table->field[5]->store(spvar->name.str, spvar->name.length, cs); + table->field[5]->set_notnull(); + proc_table->field[MYSQL_PROC_MYSQL_TYPE]->val_str_nopad(thd->mem_root, + &tmp_string); + table->field[15]->store(tmp_string, cs); + + store_variable_type(thd, spvar, &tbl, &share, cs, table, 6); + if (schema_table_store_record(thd, table)) + { + error= 1; + break; + } + } + if (free_sp_head) + sp_head::destroy(sp); + } + free_table_share(&share); + DBUG_RETURN(error); +} + + +int store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table, + LOOKUP_FIELD_VALUES *lookup, bool full_access, + const char *sp_user) +{ + LEX *lex= thd->lex; + CHARSET_INFO *cs= system_charset_info; + const Sp_handler *sph; + LEX_CSTRING db, name, definer, returns= empty_clex_str; + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + int rc; + + proc_table->field[MYSQL_PROC_FIELD_DB]->val_str_nopad(thd->mem_root, &db); + proc_table->field[MYSQL_PROC_FIELD_NAME]->val_str_nopad(thd->mem_root, &name); + + CHARSET_INFO *name_cs= proc_table->field[MYSQL_PROC_FIELD_NAME]->charset(); + if ((rc= check_proc_record(name_cs, lookup, db, name)) != -1) + return rc; /* either HA_ERR_END_OF_FILE or 0 if name didn't match */ + + proc_table->field[MYSQL_PROC_FIELD_DEFINER]->val_str_nopad(thd->mem_root, &definer); + sph= Sp_handler::handler_mysql_proc((enum_sp_type) + proc_table->field[MYSQL_PROC_MYSQL_TYPE]-> + val_int()); + if (!sph) + return 0; + + if (!full_access) + full_access= !strcmp(sp_user, definer.str); + if (!full_access && + check_some_routine_access(thd, db.str, name.str, sph)) + return 0; + + if (!is_show_command(thd) || + sph == Sp_handler::handler(lex->sql_command)) + { + restore_record(table, s->default_values); + if (!wild || !wild[0] || !wild_case_compare(system_charset_info, + name.str, wild)) + { + int enum_idx= (int) proc_table->field[MYSQL_PROC_FIELD_ACCESS]->val_int(); + table->field[3]->store(name, cs); + + copy_field_as_string(table->field[0], + proc_table->field[MYSQL_PROC_FIELD_SPECIFIC_NAME]); + table->field[1]->store(STRING_WITH_LEN("def"), cs); + table->field[2]->store(db, cs); + copy_field_as_string(table->field[4], + proc_table->field[MYSQL_PROC_MYSQL_TYPE]); + + if (sph->type() == SP_TYPE_FUNCTION) + { + sp_head *sp; + bool free_sp_head; + proc_table->field[MYSQL_PROC_FIELD_RETURNS]->val_str_nopad(thd->mem_root, + &returns); + sp= sph->sp_load_for_information_schema(thd, proc_table, + db, name, + empty_clex_str /*params*/, + returns, + (ulong) proc_table-> + field[MYSQL_PROC_FIELD_SQL_MODE]-> + val_int(), + &free_sp_head); + if (sp) + { + char path[FN_REFLEN]; + TABLE_SHARE share; + TABLE tbl; + Field *field; + + bzero((char*) &tbl, sizeof(TABLE)); + (void) build_table_filename(path, sizeof(path), "", "", "", 0); + init_tmp_table_share(thd, &share, "", 0, "", path); + field= sp->m_return_field_def.make_field(&share, thd->mem_root, + &empty_clex_str); + field->table= &tbl; + tbl.in_use= thd; + store_column_type(table, field, cs, 5); + free_table_share(&share); + if (free_sp_head) + sp_head::destroy(sp); + } + } + + if (full_access) + { + copy_field_as_string(table->field[15], + proc_table->field[MYSQL_PROC_FIELD_BODY_UTF8]); + table->field[15]->set_notnull(); + } + table->field[14]->store(STRING_WITH_LEN("SQL"), cs); + table->field[18]->store(STRING_WITH_LEN("SQL"), cs); + copy_field_as_string(table->field[19], + proc_table->field[MYSQL_PROC_FIELD_DETERMINISTIC]); + table->field[20]->store(sp_data_access_name[enum_idx].str, + sp_data_access_name[enum_idx].length , cs); + copy_field_as_string(table->field[22], + proc_table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]); + + proc_table->field[MYSQL_PROC_FIELD_CREATED]-> + save_in_field(table->field[23]); + proc_table->field[MYSQL_PROC_FIELD_MODIFIED]-> + save_in_field(table->field[24]); + + copy_field_as_string(table->field[25], + proc_table->field[MYSQL_PROC_FIELD_SQL_MODE]); + copy_field_as_string(table->field[26], + proc_table->field[MYSQL_PROC_FIELD_COMMENT]); + + table->field[27]->store(definer, cs); + copy_field_as_string(table->field[28], + proc_table-> + field[MYSQL_PROC_FIELD_CHARACTER_SET_CLIENT]); + copy_field_as_string(table->field[29], + proc_table-> + field[MYSQL_PROC_FIELD_COLLATION_CONNECTION]); + copy_field_as_string(table->field[30], + proc_table->field[MYSQL_PROC_FIELD_DB_COLLATION]); + + return schema_table_store_record(thd, table); + } + } + return 0; +} + + +int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond) +{ + TABLE *proc_table; + TABLE_LIST proc_tables; + int res= 0; + TABLE *table= tables->table; + bool full_access; + char definer[USER_HOST_BUFF_SIZE]; + enum enum_schema_tables schema_table_idx= + get_schema_table_idx(tables->schema_table); + DBUG_ENTER("fill_schema_proc"); + + strxmov(definer, thd->security_ctx->priv_user, "@", + thd->security_ctx->priv_host, NullS); + /* We use this TABLE_LIST instance only for checking of privileges. */ + bzero((char*) &proc_tables,sizeof(proc_tables)); + proc_tables.db= MYSQL_SCHEMA_NAME; + proc_tables.table_name= MYSQL_PROC_NAME; + proc_tables.alias= MYSQL_PROC_NAME; + proc_tables.lock_type= TL_READ; + full_access= !check_table_access(thd, SELECT_ACL, &proc_tables, FALSE, + 1, TRUE); + + LOOKUP_FIELD_VALUES lookup; + if (get_lookup_field_values(thd, cond, false, tables, &lookup)) + { + // There can be no matching records for the condition + DBUG_RETURN(0); + } + + start_new_trans new_trans(thd); + + if (!(proc_table= open_proc_table_for_read(thd))) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(1); + } + + /* Disable padding temporarily so it doesn't break the query */ + ulonglong sql_mode_was = thd->variables.sql_mode; + thd->variables.sql_mode &= ~MODE_PAD_CHAR_TO_FULL_LENGTH; + + if (proc_table->file->ha_index_init(0, 1)) + { + res= 1; + goto err; + } + + if (lookup.db_value.str) + { + KEY *keyinfo= proc_table->key_info; + uint keylen= keyinfo->key_part[0].length; + key_part_map keypart_map= 1; + enum ha_rkey_function find_flag= HA_READ_PREFIX; + const auto sp_name_len= NAME_LEN * 2 + 1 /*for type*/; + StringBuffer keybuf; + keybuf.alloc(proc_table->key_info->key_length); + keybuf.length(proc_table->key_info->key_length); + proc_table->field[0]->store(lookup.db_value.str, lookup.db_value.length, + system_charset_info); + if (lookup.table_value.str) + { + proc_table->field[1]->store(lookup.table_value.str, + lookup.table_value.length, + system_charset_info); + keylen+= keyinfo->key_part[1].length; + keypart_map= 3; + find_flag= HA_READ_KEY_EXACT; + } + key_copy((uchar*)keybuf.ptr(), proc_table->record[0], keyinfo, keylen, 0); + res= proc_table->file->ha_index_read_map(proc_table->record[0], + (const uchar*) keybuf.ptr(), + keypart_map, find_flag); + } + else + res= proc_table->file->ha_index_first(proc_table->record[0]); + + if (res) + goto err; + + res= schema_table_idx == SCH_PROCEDURES ? + store_schema_proc(thd, table, proc_table, &lookup, full_access,definer) : + store_schema_params(thd, table, proc_table, &lookup, full_access, definer); + while (!res && !proc_table->file->ha_index_next(proc_table->record[0])) + { + res= schema_table_idx == SCH_PROCEDURES ? + store_schema_proc(thd, table, proc_table, &lookup, full_access, definer) : + store_schema_params(thd, table, proc_table, &lookup, full_access, definer); + } + +err: + if (proc_table->file->inited) + (void) proc_table->file->ha_index_end(); + + if (res == HA_ERR_END_OF_FILE || res == HA_ERR_KEY_NOT_FOUND) + res= 0; + + thd->commit_whole_transaction_and_close_tables(); + new_trans.restore_old_transaction(); + + thd->variables.sql_mode = sql_mode_was; + DBUG_RETURN(res); +} + + +static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + CHARSET_INFO *cs= system_charset_info; + DBUG_ENTER("get_schema_stat_record"); + if (res) + { + if (thd->lex->sql_command != SQLCOM_SHOW_KEYS) + { + /* + I.e. we are in SELECT FROM INFORMATION_SCHEMA.STATISTICS + rather than in SHOW KEYS + */ + if (unlikely(thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + res= 0; + } + DBUG_RETURN(res); + } + else if (!tables->view) + { + TABLE *show_table= tables->table; + KEY *key_info=show_table->s->key_info; + if (show_table->file) + { + (void) read_statistics_for_tables(thd, tables, false); + show_table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | + HA_STATUS_CONST | HA_STATUS_TIME); + set_statistics_for_table(thd, show_table); + } + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + bool need_column_checks= false; + /* we know that the table or at least some of the columns have + necessary privileges, but the caller didn't pass down the GRANT_INFO + object, so we have to rediscover everything again :( */ + if (!(thd->col_access & TABLE_ACLS)) + { + check_grant(thd, SELECT_ACL, tables, 0, 1, 1); + + if (!(tables->grant.privilege & TABLE_ACLS)) + need_column_checks= true; + } +#endif + + for (uint i=0 ; i < show_table->s->keys ; i++,key_info++) + { + if ((key_info->flags & HA_INVISIBLE_KEY) && + !DBUG_IF("test_invisible_index")) + continue; + KEY_PART_INFO *key_part= key_info->key_part; + LEX_CSTRING *str; + LEX_CSTRING unknown= {STRING_WITH_LEN("?unknown field?") }; + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (need_column_checks) + { + uint j; + for (j=0 ; j < key_info->user_defined_key_parts ; j++,key_part++) + { + uint access= get_column_grant(thd, &tables->grant, db_name->str, + table_name->str, + key_part->field->field_name.str); + + if (!access) + break; + } + if (j != key_info->user_defined_key_parts) + continue; + key_part= key_info->key_part; + } +#endif + + for (uint j=0 ; j < key_info->user_defined_key_parts ; j++,key_part++) + { + if (key_part->field->invisible >= INVISIBLE_SYSTEM && + !DBUG_IF("test_completely_invisible")) + { + /* + NOTE: we will get SEQ_IN_INDEX gap inside the result if this key_part + is not last (currently not possible). Though nothing is wrong with + that probably. + */ + continue; + } + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(table_name->str, table_name->length, cs); + table->field[3]->store((longlong) ((key_info->flags & + HA_NOSAME) ? 0 : 1), TRUE); + table->field[4]->store(db_name->str, db_name->length, cs); + table->field[5]->store(key_info->name.str, key_info->name.length, cs); + table->field[6]->store((longlong) (j+1), TRUE); + str= (key_part->field ? &key_part->field->field_name : + &unknown); + table->field[7]->store(str->str, str->length, cs); + if (show_table->file) + { + if (show_table->file->index_flags(i, j, 0) & HA_READ_ORDER) + { + table->field[8]->store(((key_part->key_part_flag & + HA_REVERSE_SORT) ? + "D" : "A"), 1, cs); + table->field[8]->set_notnull(); + } + if (key_info->algorithm == HA_KEY_ALG_LONG_HASH) + table->field[13]->store(STRING_WITH_LEN("HASH"), cs); + else + { + /* + We have to use table key information to get the key statistics + from table as key_info points to TABLE_SHARE which has no + statistics. + */ + KEY *key_info= show_table->key_info + i; + if (key_info->rec_per_key[j]) + { + ha_rows records= (ha_rows) ((double) show_table->stat_records() / + key_info->actual_rec_per_key(j)); + table->field[9]->store((longlong) records, TRUE); + table->field[9]->set_notnull(); + } + const char *tmp= show_table->file->index_type(i); + table->field[13]->store(tmp, strlen(tmp), cs); + } + } + if (!(key_info->flags & HA_FULLTEXT) && + (key_part->field && + key_part->length != + show_table->s->field[key_part->fieldnr-1]->key_length())) + { + table->field[10]->store((longlong) key_part->length / + key_part->field->charset()->mbmaxlen, TRUE); + table->field[10]->set_notnull(); + } + uint flags= key_part->field ? key_part->field->flags : 0; + const char *pos=(char*) ((flags & NOT_NULL_FLAG) ? "" : "YES"); + table->field[12]->store(pos, strlen(pos), cs); + if (!show_table->s->keys_in_use.is_set(i)) + table->field[14]->store(STRING_WITH_LEN("disabled"), cs); + else + table->field[14]->store("", 0, cs); + table->field[14]->set_notnull(); + DBUG_ASSERT(MY_TEST(key_info->flags & HA_USES_COMMENT) == + (key_info->comment.length > 0)); + if (key_info->flags & HA_USES_COMMENT) + table->field[15]->store(key_info->comment.str, + key_info->comment.length, cs); + + // IGNORED column + const char *is_ignored= key_info->is_ignored ? "YES" : "NO"; + table->field[16]->store(is_ignored, strlen(is_ignored), cs); + table->field[16]->set_notnull(); + + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(res); +} + + +static int get_schema_views_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + CHARSET_INFO *cs= system_charset_info; + char definer[USER_HOST_BUFF_SIZE]; + uint definer_len; + bool updatable_view; + DBUG_ENTER("get_schema_views_record"); + + if (tables->view) + { + Security_context *sctx= thd->security_ctx; + if (!tables->allowed_show) + { + if (!my_strcasecmp(system_charset_info, tables->definer.user.str, + sctx->priv_user) && + !my_strcasecmp(system_charset_info, tables->definer.host.str, + sctx->priv_host)) + tables->allowed_show= TRUE; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + else + { + if ((thd->col_access & (SHOW_VIEW_ACL|SELECT_ACL)) == + (SHOW_VIEW_ACL|SELECT_ACL)) + tables->allowed_show= TRUE; + else + { + TABLE_LIST table_list; + table_list.reset(); + table_list.db= tables->db; + table_list.table_name= tables->table_name; + table_list.grant.privilege= thd->col_access; + privilege_t view_access(get_table_grant(thd, &table_list)); + if ((view_access & (SHOW_VIEW_ACL|SELECT_ACL)) == + (SHOW_VIEW_ACL|SELECT_ACL)) + tables->allowed_show= TRUE; + } + } +#endif + } + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(table_name->str, table_name->length, cs); + + if (tables->allowed_show) + { + table->field[3]->store(tables->view_body_utf8.str, + tables->view_body_utf8.length, + cs); + } + + if (tables->with_check != VIEW_CHECK_NONE) + { + if (tables->with_check == VIEW_CHECK_LOCAL) + table->field[4]->store(STRING_WITH_LEN("LOCAL"), cs); + else + table->field[4]->store(STRING_WITH_LEN("CASCADED"), cs); + } + else + table->field[4]->store(STRING_WITH_LEN("NONE"), cs); + + /* + Only try to fill in the information about view updatability + if it is requested as part of the top-level query (i.e. + it's select * from i_s.views, as opposed to, say, select + security_type from i_s.views). Do not try to access the + underlying tables if there was an error when opening the + view: all underlying tables are released back to the table + definition cache on error inside open_normal_and_derived_tables(). + If a field is not assigned explicitly, it defaults to NULL. + */ + if (res == FALSE && + table->pos_in_table_list->table_open_method & OPEN_FULL_TABLE) + { + updatable_view= 0; + if (tables->algorithm != VIEW_ALGORITHM_TMPTABLE) + { + /* + We should use tables->view->select_lex.item_list here + and can not use Field_iterator_view because the view + always uses temporary algorithm during opening for I_S + and TABLE_LIST fields 'field_translation' + & 'field_translation_end' are uninitialized is this + case. + */ + List *fields= &tables->view->first_select_lex()->item_list; + List_iterator it(*fields); + Item *item; + Item_field *field; + /* + check that at least one column in view is updatable + */ + while ((item= it++)) + { + if ((field= item->field_for_view_update()) && field->field && + !field->field->table->pos_in_table_list->schema_table) + { + updatable_view= 1; + break; + } + } + if (updatable_view && !tables->view->can_be_merged()) + updatable_view= 0; + } + if (updatable_view) + table->field[5]->store(STRING_WITH_LEN("YES"), cs); + else + table->field[5]->store(STRING_WITH_LEN("NO"), cs); + } + + definer_len= (uint)(strxmov(definer, tables->definer.user.str, "@", + tables->definer.host.str, NullS) - definer); + table->field[6]->store(definer, definer_len, cs); + if (tables->view_suid) + table->field[7]->store(STRING_WITH_LEN("DEFINER"), cs); + else + table->field[7]->store(STRING_WITH_LEN("INVOKER"), cs); + + table->field[8]->store(&tables->view_creation_ctx->get_client_cs()->cs_name, + cs); + table->field[9]->store(&tables->view_creation_ctx-> + get_connection_cl()->coll_name, cs); + table->field[10]->store(view_algorithm(tables), cs); + + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + if (unlikely(res && thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + } + if (res) + thd->clear_error(); + DBUG_RETURN(0); +} + + +static bool +store_constraints(THD *thd, TABLE *table, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name, const char *key_name, + size_t key_len, const char *con_type, size_t con_len) +{ + CHARSET_INFO *cs= system_charset_info; + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(key_name, key_len, cs); + table->field[3]->store(db_name->str, db_name->length, cs); + table->field[4]->store(table_name->str, table_name->length, cs); + table->field[5]->store(con_type, con_len, cs); + return schema_table_store_record(thd, table); +} + +static int get_check_constraints_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + DBUG_ENTER("get_check_constraints_record"); + if (res) + { + if (thd->is_error()) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + DBUG_RETURN(0); + } + if (!tables->view) + { + StringBuffer str(system_charset_info); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + TABLE_LIST table_acl_check; + bzero((char*) &table_acl_check, sizeof(table_acl_check)); + + if (!(thd->col_access & TABLE_ACLS)) + { + table_acl_check.db= *db_name; + table_acl_check.table_name= *table_name; + table_acl_check.grant.privilege= thd->col_access; + if (check_grant(thd, TABLE_ACLS, &table_acl_check, FALSE, 1, TRUE)) + DBUG_RETURN(res); + } +#endif + for (uint i= 0; i < tables->table->s->table_check_constraints; i++) + { + Virtual_column_info *check= tables->table->check_constraints[i]; + table->field[0]->store(STRING_WITH_LEN("def"), system_charset_info); + table->field[3]->store(check->name.str, check->name.length, + system_charset_info); + const char *tmp_buff; + tmp_buff= (check->get_vcol_type() == VCOL_CHECK_FIELD ? + "Column" : "Table"); + table->field[4]->store(tmp_buff, strlen(tmp_buff), system_charset_info); + /* Make sure the string is empty between each print. */ + str.length(0); + check->print(&str); + table->field[5]->store(str.ptr(), str.length(), system_charset_info); + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(res); +} + +static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + DBUG_ENTER("get_schema_constraints_record"); + if (res) + { + if (unlikely(thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + DBUG_RETURN(0); + } + else if (!tables->view) + { +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* need any non-SELECT privilege on the table or any of its columns */ + const privilege_t need= TABLE_ACLS & ~SELECT_ACL; + if (!(thd->col_access & need)) + { + /* we know that the table or at least some of the columns have + necessary privileges, but the caller didn't pass down the GRANT_INFO + object, so we have to rediscover everything again :( */ + check_grant(thd, SELECT_ACL, tables, 0, 1, 1); + + if (!(tables->grant.all_privilege() & need)) + DBUG_RETURN(0); + } +#endif + + List f_key_list; + TABLE *show_table= tables->table; + KEY *key_info=show_table->s->key_info; + uint primary_key= show_table->s->primary_key; + show_table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | + HA_STATUS_TIME); + for (uint i=0 ; i < show_table->s->keys ; i++, key_info++) + { + if (i == primary_key && !strcmp(key_info->name.str, + primary_key_name.str)) + { + if (store_constraints(thd, table, db_name, table_name, + key_info->name.str, key_info->name.length, + STRING_WITH_LEN("PRIMARY KEY"))) + DBUG_RETURN(1); + } + else if (key_info->flags & HA_NOSAME) + { + if (store_constraints(thd, table, db_name, table_name, + key_info->name.str, key_info->name.length, + STRING_WITH_LEN("UNIQUE"))) + DBUG_RETURN(1); + } + } + + // Table check constraints + for (uint i = 0; i < show_table->s->table_check_constraints; i++) + { + Virtual_column_info *check = show_table->check_constraints[i]; + + if (store_constraints(thd, table, db_name, table_name, + check->name.str, check->name.length, + STRING_WITH_LEN("CHECK"))) + DBUG_RETURN(1); + } + + show_table->file->get_foreign_key_list(thd, &f_key_list); + FOREIGN_KEY_INFO *f_key_info; + List_iterator_fast it(f_key_list); + while ((f_key_info=it++)) + { + if (store_constraints(thd, table, db_name, table_name, + f_key_info->foreign_id->str, + strlen(f_key_info->foreign_id->str), + STRING_WITH_LEN("FOREIGN KEY"))) + DBUG_RETURN(1); + } + } + DBUG_RETURN(res); +} + + +static bool store_trigger(THD *thd, Trigger *trigger, + TABLE *table, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + CHARSET_INFO *cs= system_charset_info; + LEX_CSTRING sql_mode_rep; + MYSQL_TIME timestamp; + char definer_holder[USER_HOST_BUFF_SIZE]; + LEX_STRING definer_buffer; + LEX_CSTRING trigger_stmt, trigger_body; + definer_buffer.str= definer_holder; + + trigger->get_trigger_info(&trigger_stmt, &trigger_body, &definer_buffer); + + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(trigger->name.str, trigger->name.length, cs); + table->field[3]->store(trg_event_type_names[trigger->event].str, + trg_event_type_names[trigger->event].length, cs); + table->field[4]->store(STRING_WITH_LEN("def"), cs); + table->field[5]->store(db_name->str, db_name->length, cs); + table->field[6]->store(table_name->str, table_name->length, cs); + table->field[7]->store(trigger->action_order); + table->field[9]->store(trigger_body.str, trigger_body.length, cs); + table->field[10]->store(STRING_WITH_LEN("ROW"), cs); + table->field[11]->store(trg_action_time_type_names[trigger->action_time].str, + trg_action_time_type_names[trigger->action_time].length, cs); + table->field[14]->store(STRING_WITH_LEN("OLD"), cs); + table->field[15]->store(STRING_WITH_LEN("NEW"), cs); + + if (trigger->hr_create_time.val) + { + /* timestamp is in microseconds */ + table->field[16]->set_notnull(); + thd->variables.time_zone-> + gmt_sec_to_TIME(×tamp, + (my_time_t) hrtime_to_time(trigger->hr_create_time)); + timestamp.second_part= hrtime_sec_part(trigger->hr_create_time); + table->field[16]->store_time_dec(×tamp, 2); + } + + sql_mode_string_representation(thd, trigger->sql_mode, &sql_mode_rep); + table->field[17]->store(sql_mode_rep.str, sql_mode_rep.length, cs); + table->field[18]->store(definer_buffer.str, definer_buffer.length, cs); + table->field[19]->store(&trigger->client_cs_name, cs); + table->field[20]->store(&trigger->connection_cl_name, cs); + table->field[21]->store(&trigger->db_cl_name, cs); + + return schema_table_store_record(thd, table); +} + + +static int get_schema_triggers_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + DBUG_ENTER("get_schema_triggers_record"); + /* + res can be non zero value when processed table is a view or + error happened during opening of processed table. + */ + if (res) + { + if (unlikely(thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + DBUG_RETURN(0); + } + if (!tables->view && tables->table->triggers) + { + Table_triggers_list *triggers= tables->table->triggers; + int event, timing; + + if (check_table_access(thd, TRIGGER_ACL, tables, FALSE, 1, TRUE)) + goto ret; + + for (event= 0; event < (int)TRG_EVENT_MAX; event++) + { + for (timing= 0; timing < (int)TRG_ACTION_MAX; timing++) + { + Trigger *trigger; + for (trigger= triggers-> + get_trigger((enum trg_event_type) event, + (enum trg_action_time_type) timing) ; + trigger; + trigger= trigger->next) + { + if (store_trigger(thd, trigger, table, db_name, table_name)) + DBUG_RETURN(1); + } + } + } + } +ret: + DBUG_RETURN(0); +} + + +static void +store_key_column_usage(TABLE *table, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name, const char *key_name, + size_t key_len, const char *con_type, size_t con_len, + longlong idx) +{ + CHARSET_INFO *cs= system_charset_info; + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(key_name, key_len, cs); + table->field[3]->store(STRING_WITH_LEN("def"), cs); + table->field[4]->store(db_name->str, db_name->length, cs); + table->field[5]->store(table_name->str, table_name->length, cs); + table->field[6]->store(con_type, con_len, cs); + table->field[7]->store((longlong) idx, TRUE); +} + + +static int get_schema_key_column_usage_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + DBUG_ENTER("get_schema_key_column_usage_record"); + if (res) + { + if (unlikely(thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + DBUG_RETURN(0); + } + else if (!tables->view) + { + List f_key_list; + TABLE *show_table= tables->table; + KEY *key_info=show_table->s->key_info; + uint primary_key= show_table->s->primary_key; + show_table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | + HA_STATUS_TIME); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + bool need_column_checks= false; + /* we know that the table or at least some of the columns have + necessary privileges, but the caller didn't pass down the GRANT_INFO + object, so we have to rediscover everything again :( */ + if (!(thd->col_access & TABLE_ACLS)) + { + check_grant(thd, SELECT_ACL, tables, 0, 1, 1); + + if (!(tables->grant.privilege & TABLE_ACLS)) + need_column_checks= true; + } +#endif + + for (uint i=0 ; i < show_table->s->keys ; i++, key_info++) + { + if (i != primary_key && !(key_info->flags & HA_NOSAME)) + continue; + uint f_idx= 0; + KEY_PART_INFO *key_part= key_info->key_part; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (need_column_checks) + { + uint j; + for (j=0 ; j < key_info->user_defined_key_parts ; j++,key_part++) + { + uint access= get_column_grant(thd, &tables->grant, db_name->str, + table_name->str, + key_part->field->field_name.str); + + if (!access) + break; + } + if (j != key_info->user_defined_key_parts) + continue; + key_part= key_info->key_part; + } +#endif + + for (uint j=0 ; j < key_info->user_defined_key_parts ; j++,key_part++) + { + f_idx++; + restore_record(table, s->default_values); + store_key_column_usage(table, db_name, table_name, + key_info->name.str, key_info->name.length, + key_part->field->field_name.str, + key_part->field->field_name.length, + (longlong) f_idx); + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + + show_table->file->get_foreign_key_list(thd, &f_key_list); + FOREIGN_KEY_INFO *f_key_info; + List_iterator_fast fkey_it(f_key_list); + while ((f_key_info= fkey_it++)) + { + LEX_CSTRING *f_info; + LEX_CSTRING *r_info; + List_iterator_fast it(f_key_info->foreign_fields), + it1(f_key_info->referenced_fields); + uint f_idx= 0; + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (need_column_checks) + { + while ((r_info= it1++)) + { + uint access= get_column_grant(thd, &tables->grant, db_name->str, + table_name->str, r_info->str); + + if (!access) + break; + } + if (!it1.at_end()) + continue; + it1.rewind(); + } +#endif + while ((f_info= it++)) + { + r_info= it1++; + f_idx++; + restore_record(table, s->default_values); + store_key_column_usage(table, db_name, table_name, + f_key_info->foreign_id->str, + f_key_info->foreign_id->length, + f_info->str, f_info->length, + (longlong) f_idx); + table->field[8]->store((longlong) f_idx, TRUE); + table->field[8]->set_notnull(); + table->field[9]->store(f_key_info->referenced_db->str, + f_key_info->referenced_db->length, + system_charset_info); + table->field[9]->set_notnull(); + table->field[10]->store(f_key_info->referenced_table->str, + f_key_info->referenced_table->length, + system_charset_info); + table->field[10]->set_notnull(); + table->field[11]->store(r_info->str, r_info->length, + system_charset_info); + table->field[11]->set_notnull(); + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(res); +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +static void collect_partition_expr(THD *thd, List &field_list, + String *str) +{ + List_iterator part_it(field_list); + ulong no_fields= field_list.elements; + const char *field_str; + str->length(0); + while ((field_str= part_it++)) + { + append_identifier(thd, str, field_str, strlen(field_str)); + if (--no_fields != 0) + str->append(','); + } + return; +} + +#endif + + +static void store_schema_partitions_record(THD *thd, TABLE *schema_table, + TABLE *showing_table, + partition_element *part_elem, + handler *file, uint part_id) +{ + TABLE* table= schema_table; + CHARSET_INFO *cs= system_charset_info; + PARTITION_STATS stat_info; + MYSQL_TIME time; + file->get_dynamic_partition_info(&stat_info, part_id); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[12]->store((longlong) stat_info.records, TRUE); + table->field[13]->store((longlong) stat_info.mean_rec_length, TRUE); + table->field[14]->store((longlong) stat_info.data_file_length, TRUE); + if (stat_info.max_data_file_length) + { + table->field[15]->store((longlong) stat_info.max_data_file_length, TRUE); + table->field[15]->set_notnull(); + } + table->field[16]->store((longlong) stat_info.index_file_length, TRUE); + table->field[17]->store((longlong) stat_info.delete_length, TRUE); + if (stat_info.create_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + (my_time_t)stat_info.create_time); + table->field[18]->store_time(&time); + table->field[18]->set_notnull(); + } + if (stat_info.update_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + (my_time_t)stat_info.update_time); + table->field[19]->store_time(&time); + table->field[19]->set_notnull(); + } + if (stat_info.check_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + (my_time_t)stat_info.check_time); + table->field[20]->store_time(&time); + table->field[20]->set_notnull(); + } + if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM)) + { + table->field[21]->store((longlong) stat_info.check_sum, TRUE); + table->field[21]->set_notnull(); + } + if (part_elem) + { + if (part_elem->part_comment) + table->field[22]->store(part_elem->part_comment, + strlen(part_elem->part_comment), cs); + else + table->field[22]->store(STRING_WITH_LEN(""), cs); + if (part_elem->nodegroup_id != UNDEF_NODEGROUP) + table->field[23]->store((longlong) part_elem->nodegroup_id, TRUE); + else + table->field[23]->store(STRING_WITH_LEN("default"), cs); + + table->field[24]->set_notnull(); + table->field[24]->set_null(); // Tablespace + } + return; +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE +static int get_partition_column_description(THD *thd, partition_info *part_info, + part_elem_value *list_value, String &tmp_str) +{ + uint num_elements= part_info->part_field_list.elements; + uint i; + DBUG_ENTER("get_partition_column_description"); + + for (i= 0; i < num_elements; i++) + { + part_column_list_val *col_val= &list_value->col_val_array[i]; + if (col_val->max_value) + tmp_str.append(STRING_WITH_LEN("MAXVALUE")); + else if (col_val->null_value) + tmp_str.append(NULL_clex_str); + else + { + Item *item= col_val->item_expression; + StringBuffer val; + const Field *field= part_info->part_field_array[i]; + const Type_handler *th= field->type_handler(); + th->partition_field_append_value(&val, item, + field->charset(), + PARTITION_VALUE_PRINT_MODE_SHOW); + tmp_str.append(val); + } + if (i != num_elements - 1) + tmp_str.append(','); + } + DBUG_RETURN(0); +} +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + +static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + CHARSET_INFO *cs= system_charset_info; + char buff[61]; + String tmp_res(buff, sizeof(buff), cs); + String tmp_str; + TABLE *show_table= tables->table; + handler *file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; +#endif + DBUG_ENTER("get_schema_partitions_record"); + + if (res) + { + if (unlikely(thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + DBUG_RETURN(0); + } + file= show_table->file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + part_info= show_table->part_info; + if (part_info) + { + partition_element *part_elem; + List_iterator part_it(part_info->partitions); + uint part_pos= 0, part_id= 0; + + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[2]->store(table_name->str, table_name->length, cs); + + + /* Partition method*/ + switch (part_info->part_type) { + case RANGE_PARTITION: + case LIST_PARTITION: + tmp_res.length(0); + if (part_info->part_type == RANGE_PARTITION) + tmp_res.append(STRING_WITH_LEN("RANGE")); + else + tmp_res.append(STRING_WITH_LEN("LIST")); + if (part_info->column_list) + tmp_res.append(STRING_WITH_LEN(" COLUMNS")); + table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs); + break; + case HASH_PARTITION: + tmp_res.length(0); + if (part_info->linear_hash_ind) + tmp_res.append(STRING_WITH_LEN("LINEAR ")); + if (part_info->list_of_part_fields) + tmp_res.append(STRING_WITH_LEN("KEY")); + else + tmp_res.append(STRING_WITH_LEN("HASH")); + table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs); + break; + case VERSIONING_PARTITION: + table->field[7]->store(STRING_WITH_LEN("SYSTEM_TIME"), cs); + break; + default: + DBUG_ASSERT(0); + my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL)); + DBUG_RETURN(1); + } + table->field[7]->set_notnull(); + + /* Partition expression */ + if (part_info->part_expr) + { + StringBuffer str(cs); + part_info->part_expr->print_for_table_def(&str); + table->field[9]->store(str.ptr(), str.length(), str.charset()); + } + else if (part_info->list_of_part_fields) + { + collect_partition_expr(thd, part_info->part_field_list, &tmp_str); + table->field[9]->store(tmp_str.ptr(), tmp_str.length(), cs); + } + table->field[9]->set_notnull(); + + if (part_info->is_sub_partitioned()) + { + /* Subpartition method */ + tmp_res.length(0); + if (part_info->linear_hash_ind) + tmp_res.append(STRING_WITH_LEN("LINEAR ")); + if (part_info->list_of_subpart_fields) + tmp_res.append(STRING_WITH_LEN("KEY")); + else + tmp_res.append(STRING_WITH_LEN("HASH")); + table->field[8]->store(tmp_res.ptr(), tmp_res.length(), cs); + table->field[8]->set_notnull(); + + /* Subpartition expression */ + if (part_info->subpart_expr) + { + StringBuffer str(cs); + part_info->subpart_expr->print_for_table_def(&str); + table->field[10]->store(str.ptr(), str.length(), str.charset()); + } + else if (part_info->list_of_subpart_fields) + { + collect_partition_expr(thd, part_info->subpart_field_list, &tmp_str); + table->field[10]->store(tmp_str.ptr(), tmp_str.length(), cs); + } + table->field[10]->set_notnull(); + } + + while ((part_elem= part_it++)) + { + table->field[3]->store(part_elem->partition_name, + strlen(part_elem->partition_name), cs); + table->field[3]->set_notnull(); + /* PARTITION_ORDINAL_POSITION */ + table->field[5]->store((longlong) ++part_pos, TRUE); + table->field[5]->set_notnull(); + + /* Partition description */ + if (part_info->part_type == RANGE_PARTITION) + { + if (part_info->column_list) + { + List_iterator list_val_it(part_elem->list_val_list); + part_elem_value *list_value= list_val_it++; + tmp_str.length(0); + if (get_partition_column_description(thd, part_info, list_value, + tmp_str)) + DBUG_RETURN(1); + table->field[11]->store(tmp_str.ptr(), tmp_str.length(), cs); + } + else + { + if (part_elem->range_value != LONGLONG_MAX) + table->field[11]->store((longlong) part_elem->range_value, FALSE); + else + table->field[11]->store(STRING_WITH_LEN("MAXVALUE"), cs); + } + table->field[11]->set_notnull(); + } + else if (part_info->part_type == LIST_PARTITION) + { + List_iterator list_val_it(part_elem->list_val_list); + part_elem_value *list_value; + uint num_items= part_elem->list_val_list.elements; + tmp_str.length(0); + tmp_res.length(0); + if (part_elem->has_null_value) + { + tmp_str.append(NULL_clex_str); + if (num_items > 0) + tmp_str.append(','); + } + while ((list_value= list_val_it++)) + { + if (part_info->column_list) + { + if (part_info->part_field_list.elements > 1U) + tmp_str.append(STRING_WITH_LEN("(")); + if (get_partition_column_description(thd, part_info, list_value, + tmp_str)) + DBUG_RETURN(1); + if (part_info->part_field_list.elements > 1U) + tmp_str.append(')'); + } + else + { + if (!list_value->unsigned_flag) + tmp_res.set(list_value->value, cs); + else + tmp_res.set((ulonglong)list_value->value, cs); + tmp_str.append(tmp_res); + } + if (--num_items != 0) + tmp_str.append(','); + } + table->field[11]->store(tmp_str.ptr(), tmp_str.length(), cs); + table->field[11]->set_notnull(); + } + else if (part_info->part_type == VERSIONING_PARTITION) + { + if (part_elem == part_info->vers_info->now_part) + { + table->field[11]->store(STRING_WITH_LEN("CURRENT"), cs); + table->field[11]->set_notnull(); + } + else if (part_info->vers_info->interval.is_set()) + { + Timeval tv((my_time_t) part_elem->range_value, 0); + table->field[11]->store_timestamp_dec(tv, AUTO_SEC_PART_DIGITS); + table->field[11]->set_notnull(); + } + } + + if (part_elem->subpartitions.elements) + { + List_iterator sub_it(part_elem->subpartitions); + partition_element *subpart_elem; + uint subpart_pos= 0; + + while ((subpart_elem= sub_it++)) + { + table->field[4]->store(subpart_elem->partition_name, + strlen(subpart_elem->partition_name), cs); + table->field[4]->set_notnull(); + /* SUBPARTITION_ORDINAL_POSITION */ + table->field[6]->store((longlong) ++subpart_pos, TRUE); + table->field[6]->set_notnull(); + + store_schema_partitions_record(thd, table, show_table, subpart_elem, + file, part_id); + part_id++; + if(schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + else + { + store_schema_partitions_record(thd, table, show_table, part_elem, + file, part_id); + part_id++; + if(schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); + } + else +#endif + { + store_schema_partitions_record(thd, table, show_table, 0, file, 0); + if(schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +#ifdef HAVE_EVENT_SCHEDULER +/* + Loads an event from mysql.event and copies it's data to a row of + I_S.EVENTS + + Synopsis + copy_event_to_schema_table() + thd Thread + sch_table The schema table (information_schema.event) + event_table The event table to use for loading (mysql.event). + + Returns + 0 OK + 1 Error +*/ + +int +copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table) +{ + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + CHARSET_INFO *scs= system_charset_info; + MYSQL_TIME time; + Event_timed et; + DBUG_ENTER("copy_event_to_schema_table"); + + restore_record(sch_table, s->default_values); + + if (et.load_from_row(thd, event_table)) + { + my_error(ER_CANNOT_LOAD_FROM_TABLE_V2, MYF(0), "mysql", "event"); + DBUG_RETURN(1); + } + + if (!(!wild || !wild[0] || !wild_case_compare(scs, et.name.str, wild))) + DBUG_RETURN(0); + + /* + Skip events in schemas one does not have access to. The check is + optimized. It's guaranteed in case of SHOW EVENTS that the user + has access. + */ + if (thd->lex->sql_command != SQLCOM_SHOW_EVENTS && + check_access(thd, EVENT_ACL, et.dbname.str, NULL, NULL, 0, 1)) + DBUG_RETURN(0); + + sch_table->field[ISE_EVENT_CATALOG]->store(STRING_WITH_LEN("def"), scs); + sch_table->field[ISE_EVENT_SCHEMA]-> + store(et.dbname.str, et.dbname.length,scs); + sch_table->field[ISE_EVENT_NAME]-> + store(et.name.str, et.name.length, scs); + sch_table->field[ISE_DEFINER]-> + store(et.definer.str, et.definer.length, scs); + const String *tz_name= et.time_zone->get_name(); + sch_table->field[ISE_TIME_ZONE]-> + store(tz_name->ptr(), tz_name->length(), scs); + sch_table->field[ISE_EVENT_BODY]-> + store(STRING_WITH_LEN("SQL"), scs); + sch_table->field[ISE_EVENT_DEFINITION]->store( + et.body_utf8.str, et.body_utf8.length, scs); + + /* SQL_MODE */ + { + LEX_CSTRING sql_mode; + sql_mode_string_representation(thd, et.sql_mode, &sql_mode); + sch_table->field[ISE_SQL_MODE]-> + store(sql_mode.str, sql_mode.length, scs); + } + + int not_used=0; + + if (et.expression) + { + String show_str; + /* type */ + sch_table->field[ISE_EVENT_TYPE]->store(STRING_WITH_LEN("RECURRING"), scs); + + if (Events::reconstruct_interval_expression(&show_str, et.interval, + et.expression)) + DBUG_RETURN(1); + + sch_table->field[ISE_INTERVAL_VALUE]->set_notnull(); + sch_table->field[ISE_INTERVAL_VALUE]-> + store(show_str.ptr(), show_str.length(), scs); + + LEX_CSTRING *ival= &interval_type_to_name[et.interval]; + sch_table->field[ISE_INTERVAL_FIELD]->set_notnull(); + sch_table->field[ISE_INTERVAL_FIELD]->store(ival->str, ival->length, scs); + + /* starts & ends . STARTS is always set - see sql_yacc.yy */ + et.time_zone->gmt_sec_to_TIME(&time, et.starts); + sch_table->field[ISE_STARTS]->set_notnull(); + sch_table->field[ISE_STARTS]->store_time(&time); + + if (!et.ends_null) + { + et.time_zone->gmt_sec_to_TIME(&time, et.ends); + sch_table->field[ISE_ENDS]->set_notnull(); + sch_table->field[ISE_ENDS]->store_time(&time); + } + } + else + { + /* type */ + sch_table->field[ISE_EVENT_TYPE]->store(STRING_WITH_LEN("ONE TIME"), scs); + + et.time_zone->gmt_sec_to_TIME(&time, et.execute_at); + sch_table->field[ISE_EXECUTE_AT]->set_notnull(); + sch_table->field[ISE_EXECUTE_AT]->store_time(&time); + } + + /* status */ + + switch (et.status) + { + case Event_parse_data::ENABLED: + sch_table->field[ISE_STATUS]->store(STRING_WITH_LEN("ENABLED"), scs); + break; + case Event_parse_data::SLAVESIDE_DISABLED: + sch_table->field[ISE_STATUS]->store(STRING_WITH_LEN("SLAVESIDE_DISABLED"), + scs); + break; + case Event_parse_data::DISABLED: + sch_table->field[ISE_STATUS]->store(STRING_WITH_LEN("DISABLED"), scs); + break; + default: + DBUG_ASSERT(0); + } + sch_table->field[ISE_ORIGINATOR]->store(et.originator, TRUE); + + /* on_completion */ + if (et.on_completion == Event_parse_data::ON_COMPLETION_DROP) + sch_table->field[ISE_ON_COMPLETION]-> + store(STRING_WITH_LEN("NOT PRESERVE"), scs); + else + sch_table->field[ISE_ON_COMPLETION]-> + store(STRING_WITH_LEN("PRESERVE"), scs); + + number_to_datetime_or_date(et.created, 0, &time, 0, ¬_used); + DBUG_ASSERT(not_used==0); + sch_table->field[ISE_CREATED]->store_time(&time); + + number_to_datetime_or_date(et.modified, 0, &time, 0, ¬_used); + DBUG_ASSERT(not_used==0); + sch_table->field[ISE_LAST_ALTERED]->store_time(&time); + + if (et.last_executed) + { + et.time_zone->gmt_sec_to_TIME(&time, et.last_executed); + sch_table->field[ISE_LAST_EXECUTED]->set_notnull(); + sch_table->field[ISE_LAST_EXECUTED]->store_time(&time); + } + + sch_table->field[ISE_EVENT_COMMENT]-> + store(et.comment.str, et.comment.length, scs); + + sch_table->field[ISE_CLIENT_CS]->set_notnull(); + sch_table->field[ISE_CLIENT_CS]->store(&et.creation_ctx->get_client_cs()-> + cs_name, scs); + sch_table->field[ISE_CONNECTION_CL]->set_notnull(); + sch_table->field[ISE_CONNECTION_CL]->store(&et.creation_ctx-> + get_connection_cl()->coll_name, + scs); + sch_table->field[ISE_DB_CL]->set_notnull(); + sch_table->field[ISE_DB_CL]->store(&et.creation_ctx->get_db_cl()->coll_name, + scs); + + if (schema_table_store_record(thd, sch_table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} +#endif + +int fill_open_tables(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_open_tables"); + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + TABLE *table= tables->table; + CHARSET_INFO *cs= system_charset_info; + OPEN_TABLE_LIST *open_list; + if (!(open_list= list_open_tables(thd, thd->lex->first_select_lex()->db.str, + wild)) + && thd->is_fatal_error) + DBUG_RETURN(1); + + for (; open_list ; open_list=open_list->next) + { + restore_record(table, s->default_values); + table->field[0]->store(open_list->db, strlen(open_list->db), cs); + table->field[1]->store(open_list->table, strlen(open_list->table), cs); + table->field[2]->store((longlong) open_list->in_use, TRUE); + table->field[3]->store((longlong) open_list->locked, TRUE); + if (unlikely(schema_table_store_record(thd, table))) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +int fill_variables(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_variables"); + int res= 0; + LEX *lex= thd->lex; + const char *wild= lex->wild ? lex->wild->ptr() : NullS; + enum enum_schema_tables schema_table_idx= + get_schema_table_idx(tables->schema_table); + enum enum_var_type scope= OPT_SESSION; + bool upper_case_names= lex->sql_command != SQLCOM_SHOW_VARIABLES; + bool sorted_vars= lex->sql_command == SQLCOM_SHOW_VARIABLES; + + if ((sorted_vars && lex->option_type == OPT_GLOBAL) || + schema_table_idx == SCH_GLOBAL_VARIABLES) + scope= OPT_GLOBAL; + + COND *partial_cond= make_cond_for_info_schema(thd, cond, tables); + + mysql_prlock_rdlock(&LOCK_system_variables_hash); + + /* + Avoid recursive LOCK_system_variables_hash acquisition in + intern_sys_var_ptr() by pre-syncing dynamic session variables. + */ + if (scope == OPT_SESSION && + (!thd->variables.dynamic_variables_ptr || + global_system_variables.dynamic_variables_head > + thd->variables.dynamic_variables_head)) + sync_dynamic_session_variables(thd, true); + + res= show_status_array(thd, wild, enumerate_sys_vars(thd, sorted_vars, scope), + scope, NULL, "", tables->table, + upper_case_names, partial_cond); + mysql_prlock_unlock(&LOCK_system_variables_hash); + DBUG_RETURN(res); +} + +int add_symbol_to_table(const char* name, TABLE* table){ + DBUG_ENTER("add_symbol_to_table"); + + size_t length= strlen(name); + + // If you've added a new SQL keyword longer than KEYWORD_SIZE, + // please increase the defined max length + DBUG_ASSERT(length < KEYWORD_SIZE); + + restore_record(table, s->default_values); + table->field[0]->set_notnull(); + table->field[0]->store(name, length, + system_charset_info); + if (schema_table_store_record(table->in_use, table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + +int fill_i_s_keywords(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_i_s_keywords"); + + TABLE *table= tables->table; + + for (uint i= 0; i < symbols_length; i++){ + const char *name= symbols[i].name; + if (add_symbol_to_table(name, table)) + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + +int fill_i_s_sql_functions(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_i_s_sql_functions"); + + TABLE *table= tables->table; + + for (uint i= 0; i < sql_functions_length; i++) + if (add_symbol_to_table(sql_functions[i].name, table)) + DBUG_RETURN(1); + + for (uint i= 0; i < native_func_registry_array.count(); i++) + if (add_symbol_to_table(native_func_registry_array.element(i).name.str, + table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +int fill_status(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_status"); + LEX *lex= thd->lex; + const char *wild= lex->wild ? lex->wild->ptr() : NullS; + int res= 0; + STATUS_VAR *tmp1, tmp; + enum enum_schema_tables schema_table_idx= + get_schema_table_idx(tables->schema_table); + enum enum_var_type scope; + bool upper_case_names= lex->sql_command != SQLCOM_SHOW_STATUS; + + if (lex->sql_command == SQLCOM_SHOW_STATUS) + { + scope= lex->option_type; + if (scope == OPT_GLOBAL) + tmp1= &tmp; + else + tmp1= thd->initial_status_var; + } + else if (schema_table_idx == SCH_GLOBAL_STATUS) + { + scope= OPT_GLOBAL; + tmp1= &tmp; + } + else + { + scope= OPT_SESSION; + tmp1= &thd->status_var; + } + + COND *partial_cond= make_cond_for_info_schema(thd, cond, tables); + // Evaluate and cache const subqueries now, before the mutex. + if (partial_cond) + partial_cond->val_int(); + + tmp.local_memory_used= 0; // meaning tmp was not populated yet + + mysql_rwlock_rdlock(&LOCK_all_status_vars); + res= show_status_array(thd, wild, + (SHOW_VAR *)all_status_vars.buffer, + scope, tmp1, "", tables->table, + upper_case_names, partial_cond); + mysql_rwlock_unlock(&LOCK_all_status_vars); + DBUG_RETURN(res); +} + + +/* + Fill and store records into I_S.referential_constraints table + + SYNOPSIS + get_referential_constraints_record() + thd thread handle + tables table list struct(processed table) + table I_S table + res 1 means the error during opening of the processed table + 0 means processed table is opened without error + base_name db name + file_name table name + + RETURN + 0 ok + # error +*/ + +static int +get_referential_constraints_record(THD *thd, TABLE_LIST *tables, + TABLE *table, bool res, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + CHARSET_INFO *cs= system_charset_info; + LEX_CSTRING *s; + DBUG_ENTER("get_referential_constraints_record"); + + if (res) + { + if (unlikely(thd->is_error())) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message()); + thd->clear_error(); + DBUG_RETURN(0); + } + if (!tables->view) + { + List f_key_list; + TABLE *show_table= tables->table; + show_table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | + HA_STATUS_TIME); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* need any non-SELECT privilege on the table or any of its columns */ + const privilege_t need= TABLE_ACLS & ~SELECT_ACL; + if (!(thd->col_access & need)) + { + /* we know that the table or at least some of the columns have + necessary privileges, but the caller didn't pass down the GRANT_INFO + object, so we have to rediscover everything again :( */ + check_grant(thd, SELECT_ACL, tables, 0, 1, 1); + + if (!(tables->grant.all_privilege() & need)) + DBUG_RETURN(0); + } +#endif + + show_table->file->get_foreign_key_list(thd, &f_key_list); + FOREIGN_KEY_INFO *f_key_info; + List_iterator_fast it(f_key_list); + while ((f_key_info= it++)) + { + restore_record(table, s->default_values); + table->field[0]->store(STRING_WITH_LEN("def"), cs); + table->field[1]->store(db_name->str, db_name->length, cs); + table->field[9]->store(table_name->str, table_name->length, cs); + table->field[2]->store(f_key_info->foreign_id->str, + f_key_info->foreign_id->length, cs); + table->field[3]->store(STRING_WITH_LEN("def"), cs); + table->field[4]->store(f_key_info->referenced_db->str, + f_key_info->referenced_db->length, cs); + bool show_ref_table= true; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* need any non-SELECT privilege on the table or any of its columns */ + if (!(thd->col_access & need)) + { + TABLE_LIST table_acl_check; + bzero((char*) &table_acl_check, sizeof(table_acl_check)); + table_acl_check.db= *f_key_info->referenced_db; + table_acl_check.table_name= *f_key_info->referenced_table; + table_acl_check.grant.privilege= thd->col_access; + check_grant(thd, SELECT_ACL, &table_acl_check, 0, 1, 1); + + if (!(table_acl_check.grant.all_privilege() & need)) + show_ref_table= false; + } +#endif + if (show_ref_table) + { + table->field[10]->set_notnull(); + table->field[10]->store(f_key_info->referenced_table->str, + f_key_info->referenced_table->length, cs); + } + if (f_key_info->referenced_key_name) + { + table->field[5]->store(f_key_info->referenced_key_name->str, + f_key_info->referenced_key_name->length, cs); + table->field[5]->set_notnull(); + } + else + table->field[5]->set_null(); + table->field[6]->store(STRING_WITH_LEN("NONE"), cs); + s= fk_option_name(f_key_info->update_method); + table->field[7]->store(s->str, s->length, cs); + s= fk_option_name(f_key_info->delete_method); + table->field[8]->store(s->str, s->length, cs); + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + +struct schema_table_ref +{ + const char *table_name; + ST_SCHEMA_TABLE *schema_table; +}; + +/* + Find schema_tables elment by name + + SYNOPSIS + find_schema_table_in_plugin() + thd thread handler + plugin plugin + table_name table name + + RETURN + 0 table not found + 1 found the schema table +*/ +static my_bool find_schema_table_in_plugin(THD *thd, plugin_ref plugin, + void* p_table) +{ + schema_table_ref *p_schema_table= (schema_table_ref *)p_table; + const char* table_name= p_schema_table->table_name; + ST_SCHEMA_TABLE *schema_table= plugin_data(plugin, ST_SCHEMA_TABLE *); + DBUG_ENTER("find_schema_table_in_plugin"); + + if (!my_strcasecmp(system_charset_info, + schema_table->table_name, + table_name)) + { + my_plugin_lock(thd, plugin); + p_schema_table->schema_table= schema_table; + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + + +/* + Find schema_tables element by name + + SYNOPSIS + find_schema_table() + thd thread handler + table_name table name + + RETURN + 0 table not found + # pointer to 'schema_tables' element +*/ + +ST_SCHEMA_TABLE *find_schema_table(THD *thd, const LEX_CSTRING *table_name, + bool *in_plugin) +{ + schema_table_ref schema_table_a; + ST_SCHEMA_TABLE *schema_table= schema_tables; + DBUG_ENTER("find_schema_table"); + + *in_plugin= false; + for (; schema_table->table_name; schema_table++) + { + if (!my_strcasecmp(system_charset_info, + schema_table->table_name, + table_name->str)) + DBUG_RETURN(schema_table); + } + + *in_plugin= true; + schema_table_a.table_name= table_name->str; + if (plugin_foreach(thd, find_schema_table_in_plugin, + MYSQL_INFORMATION_SCHEMA_PLUGIN, &schema_table_a)) + DBUG_RETURN(schema_table_a.schema_table); + + DBUG_RETURN(NULL); +} + + +ST_SCHEMA_TABLE *get_schema_table(enum enum_schema_tables schema_table_idx) +{ + return &schema_tables[schema_table_idx]; +} + +/** + Create information_schema table using schema_table data. + + @note + For MYSQL_TYPE_DECIMAL fields only, the field_length member has encoded + into it two numbers, based on modulus of base-10 numbers. In the ones + position is the number of decimals. Tens position is unused. In the + hundreds and thousands position is a two-digit decimal number representing + length. Encode this value with (length*100)+decimals , where + 0schema_table; + ST_FIELD_INFO *fields= schema_table->fields_info; + bool need_all_fields= table_list->schema_table_reformed || // SHOW command + thd->lex->only_view_structure(); // need table structure + bool keep_row_order; + TMP_TABLE_PARAM *tmp_table_param; + SELECT_LEX *select_lex; + DBUG_ENTER("create_schema_table"); + + for (; !fields->end_marker(); fields++) + field_count++; + + tmp_table_param = new (thd->mem_root) TMP_TABLE_PARAM; + tmp_table_param->init(); + tmp_table_param->table_charset= system_charset_info; + tmp_table_param->field_count= field_count; + tmp_table_param->schema_table= 1; + select_lex= table_list->select_lex; + keep_row_order= is_show_command(thd); + if (!(table= + create_tmp_table_for_schema(thd, tmp_table_param, *schema_table, + (select_lex->options | + thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS), + table_list->alias, !need_all_fields, + keep_row_order))) + DBUG_RETURN(0); + my_bitmap_map* bitmaps= + (my_bitmap_map*) thd->alloc(bitmap_buffer_size(field_count)); + my_bitmap_init(&table->def_read_set, bitmaps, field_count); + table->read_set= &table->def_read_set; + bitmap_clear_all(table->read_set); + table_list->schema_table_param= tmp_table_param; + DBUG_RETURN(table); +} + + +/* + For old SHOW compatibility. It is used when + old SHOW doesn't have generated column names + Make list of fields for SHOW + + SYNOPSIS + make_old_format() + thd thread handler + schema_table pointer to 'schema_tables' element + + RETURN + 1 error + 0 success +*/ + +static int make_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + ST_FIELD_INFO *field_info= schema_table->fields_info; + Name_resolution_context *context= &thd->lex->first_select_lex()->context; + for (; !field_info->end_marker(); field_info++) + { + if (field_info->old_name().str) + { + LEX_CSTRING field_name= field_info->name(); + Item_field *field= new (thd->mem_root) + Item_field(thd, context, field_name); + if (field) + { + field->set_name(thd, field_info->old_name()); + if (add_item_to_list(thd, field)) + return 1; + } + } + } + return 0; +} + + +int make_schemata_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + char tmp[128]; + LEX *lex= thd->lex; + SELECT_LEX *sel= lex->current_select; + Name_resolution_context *context= &sel->context; + + if (!sel->item_list.elements) + { + ST_FIELD_INFO *field_info= &schema_table->fields_info[1]; + String buffer(tmp,sizeof(tmp), system_charset_info); + Item_field *field= new (thd->mem_root) Item_field(thd, context, + field_info->name()); + if (!field || add_item_to_list(thd, field)) + return 1; + buffer.length(0); + buffer.append(field_info->old_name()); + if (lex->wild && lex->wild->ptr()) + { + buffer.append(STRING_WITH_LEN(" (")); + buffer.append(*lex->wild); + buffer.append(')'); + } + field->set_name(thd, &buffer); + } + return 0; +} + + +int make_table_names_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + char tmp[128]; + String buffer(tmp, sizeof(tmp), system_charset_info); + LEX *lex= thd->lex; + Name_resolution_context *context= &lex->first_select_lex()->context; + ST_FIELD_INFO *field_info= &schema_table->fields_info[2]; + LEX_CSTRING field_name= field_info->name(); + + buffer.length(0); + buffer.append(field_info->old_name()); + buffer.append(&lex->first_select_lex()->db); + if (lex->wild && lex->wild->ptr()) + { + buffer.append(STRING_WITH_LEN(" (")); + buffer.append(*lex->wild); + buffer.append(')'); + } + Item_field *field= new (thd->mem_root) Item_field(thd, context, field_name); + if (add_item_to_list(thd, field)) + return 1; + field->set_name(thd, &buffer); + if (thd->lex->verbose) + { + field_info= &schema_table->fields_info[3]; + field= new (thd->mem_root) Item_field(thd, context, field_info->name()); + if (add_item_to_list(thd, field)) + return 1; + field->set_name(thd, field_info->old_name()); + } + return 0; +} + + +int make_columns_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + int fields_arr[]= {3, 15, 14, 6, 16, 5, 17, 18, 19, -1}; + int *field_num= fields_arr; + ST_FIELD_INFO *field_info; + Name_resolution_context *context= &thd->lex->first_select_lex()->context; + + for (; *field_num >= 0; field_num++) + { + field_info= &schema_table->fields_info[*field_num]; + if (!thd->lex->verbose && (*field_num == 14 || + *field_num == 18 || + *field_num == 19)) + continue; + Item_field *field= new (thd->mem_root) Item_field(thd, context, + field_info->name()); + if (field) + { + field->set_name(thd, field_info->old_name()); + if (add_item_to_list(thd, field)) + return 1; + } + } + return 0; +} + + +int make_character_sets_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + int fields_arr[]= {0, 2, 1, 3, -1}; + int *field_num= fields_arr; + ST_FIELD_INFO *field_info; + Name_resolution_context *context= &thd->lex->first_select_lex()->context; + + for (; *field_num >= 0; field_num++) + { + field_info= &schema_table->fields_info[*field_num]; + Item_field *field= new (thd->mem_root) Item_field(thd, context, + field_info->name()); + if (field) + { + field->set_name(thd, field_info->old_name()); + if (add_item_to_list(thd, field)) + return 1; + } + } + return 0; +} + + +int make_proc_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +{ + int fields_arr[]= {2, 3, 4, 27, 24, 23, 22, 26, 28, 29, 30, -1}; + int *field_num= fields_arr; + ST_FIELD_INFO *field_info; + Name_resolution_context *context= &thd->lex->first_select_lex()->context; + + for (; *field_num >= 0; field_num++) + { + field_info= &schema_table->fields_info[*field_num]; + Item_field *field= new (thd->mem_root) Item_field(thd, context, + field_info->name()); + if (field) + { + field->set_name(thd, field_info->old_name()); + if (add_item_to_list(thd, field)) + return 1; + } + } + return 0; +} + + +/* + Create information_schema table + + SYNOPSIS + mysql_schema_table() + thd thread handler + lex pointer to LEX + table_list pointer to table_list + + RETURN + 0 success + 1 error +*/ + +int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list) +{ + TABLE *table; + DBUG_ENTER("mysql_schema_table"); + if (!(table= create_schema_table(thd, table_list))) + DBUG_RETURN(1); + table->s->tmp_table= SYSTEM_TMP_TABLE; + table->grant.privilege= SELECT_ACL; + /* + This test is necessary to make + case insensitive file systems + + upper case table names(information schema tables) + + views + working correctly + */ + if (table_list->schema_table_name.str) + table->alias_name_used= my_strcasecmp(table_alias_charset, + table_list->schema_table_name.str, + table_list->alias.str); + table_list->table= table; + table->next= thd->derived_tables; + thd->derived_tables= table; + table_list->select_lex->options |= OPTION_SCHEMA_TABLE; + lex->safe_to_cache_query= 0; + + if (table_list->schema_table_reformed) // show command + { + SELECT_LEX *sel= lex->current_select; + Item *item; + Field_translator *transl, *org_transl; + + if (table_list->field_translation) + { + Field_translator *end= table_list->field_translation_end; + for (transl= table_list->field_translation; transl < end; transl++) + { + if (transl->item->fix_fields_if_needed(thd, &transl->item)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); + } + List_iterator_fast it(sel->item_list); + if (!(transl= + (Field_translator*)(thd->stmt_arena-> + alloc(sel->item_list.elements * + sizeof(Field_translator))))) + { + DBUG_RETURN(1); + } + for (org_transl= transl; (item= it++); transl++) + { + transl->item= item; + transl->name= item->name; + if (item->fix_fields_if_needed(thd, &transl->item)) + DBUG_RETURN(1); + } + table_list->field_translation= org_transl; + table_list->field_translation_end= transl; + } + + DBUG_RETURN(0); +} + + +/* + Generate select from information_schema table + + SYNOPSIS + make_schema_select() + thd thread handler + sel pointer to SELECT_LEX + schema_table_idx index of 'schema_tables' element + + RETURN + 0 success + 1 error +*/ + +int make_schema_select(THD *thd, SELECT_LEX *sel, + ST_SCHEMA_TABLE *schema_table) +{ + LEX_CSTRING db, table; + DBUG_ENTER("make_schema_select"); + DBUG_PRINT("enter", ("mysql_schema_select: %s", schema_table->table_name)); + /* + We have to make non const db_name & table_name + because of lower_case_table_names + */ + if (!thd->make_lex_string(&db, INFORMATION_SCHEMA_NAME.str, + INFORMATION_SCHEMA_NAME.length)) + DBUG_RETURN(1); + + if (!thd->make_lex_string(&table, schema_table->table_name, + strlen(schema_table->table_name))) + DBUG_RETURN(1); + + if (schema_table->old_format(thd, schema_table)) + DBUG_RETURN(1); + + if (!sel->add_table_to_list(thd, new Table_ident(thd, &db, &table, 0), + 0, 0, TL_READ, MDL_SHARED_READ)) + DBUG_RETURN(1); + + sel->table_list.first->schema_table_reformed= 1; + DBUG_RETURN(0); +} + + +/* + Optimize reading from an I_S table. + + @detail + This function prepares a plan for populating an I_S table with + get_all_tables(). + + The plan is in IS_table_read_plan structure, it is saved in + tables->is_table_read_plan. + + @return + false - Ok + true - Out Of Memory + +*/ + +static bool optimize_for_get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) +{ + SELECT_LEX *lsel= tables->schema_select_lex; + ST_SCHEMA_TABLE *schema_table= tables->schema_table; + enum enum_schema_tables schema_table_idx; + IS_table_read_plan *plan; + DBUG_ENTER("get_all_tables"); + + if (!(plan= new IS_table_read_plan())) + DBUG_RETURN(1); + + tables->is_table_read_plan= plan; + + schema_table_idx= get_schema_table_idx(schema_table); + tables->table_open_method= get_table_open_method(tables, schema_table, + schema_table_idx); + DBUG_PRINT("open_method", ("%d", tables->table_open_method)); + + /* + this branch processes SHOW FIELDS, SHOW INDEXES commands. + see sql_parse.cc, prepare_schema_table() function where + this values are initialized + */ + if (lsel && lsel->table_list.first) + { + /* These do not need to have a query plan */ + plan->trivial_show_command= true; + goto end; + } + + if (get_lookup_field_values(thd, cond, true, tables, + &plan->lookup_field_vals)) + { + plan->no_rows= true; + goto end; + } + + DBUG_PRINT("info",("db_name='%s', table_name='%s'", + plan->lookup_field_vals.db_value.str, + plan->lookup_field_vals.table_value.str)); + + if (!plan->lookup_field_vals.wild_db_value && + !plan->lookup_field_vals.wild_table_value) + { + /* + if lookup value is empty string then + it's impossible table name or db name + */ + if ((plan->lookup_field_vals.db_value.str && + !plan->lookup_field_vals.db_value.str[0]) || + (plan->lookup_field_vals.table_value.str && + !plan->lookup_field_vals.table_value.str[0])) + { + plan->no_rows= true; + goto end; + } + } + + if (plan->has_db_lookup_value() && plan->has_table_lookup_value()) + plan->partial_cond= 0; + else + plan->partial_cond= make_cond_for_info_schema(thd, cond, tables); + +end: + DBUG_RETURN(0); +} + + +bool optimize_schema_tables_memory_usage(List &tables) +{ + DBUG_ENTER("optimize_schema_tables_memory_usage"); + + List_iterator tli(tables); + + while (TABLE_LIST *table_list= tli++) + { + if (!table_list->schema_table) + continue; + + TABLE *table= table_list->table; + THD *thd=table->in_use; + + if (!thd->fill_information_schema_tables()) + continue; + + if (!table->is_created()) + { + TMP_TABLE_PARAM *p= table_list->schema_table_param; + TMP_ENGINE_COLUMNDEF *from_recinfo, *to_recinfo; + DBUG_ASSERT(table->s->keys == 0); + DBUG_ASSERT(table->s->uniques == 0); + + uchar *cur= table->field[0]->ptr; + /* first recinfo could be a NULL bitmap, not an actual Field */ + from_recinfo= to_recinfo= p->start_recinfo + (cur != table->record[0]); + for (uint i=0; i < table->s->fields; i++, from_recinfo++) + { + Field *field= table->field[i]; + DBUG_ASSERT(field->vcol_info == 0); + DBUG_ASSERT(from_recinfo->length); + DBUG_ASSERT(from_recinfo->length == field->pack_length_in_rec()); + if (bitmap_is_set(table->read_set, i)) + { + field->move_field(cur); + field->reset(); + *to_recinfo++= *from_recinfo; + cur+= from_recinfo->length; + } + else + { + field= new (thd->mem_root) Field_string(cur, 0, field->null_ptr, + field->null_bit, Field::NONE, + &field->field_name, field->dtcollation()); + field->init(table); + field->field_index= i; + DBUG_ASSERT(field->pack_length_in_rec() == 0); + table->field[i]= field; + } + } + if ((table->s->reclength= (ulong)(cur - table->record[0])) == 0) + { + /* all fields were optimized away. Force a non-0-length row */ + table->s->reclength= to_recinfo->length= 1; + to_recinfo->type= FIELD_NORMAL; + to_recinfo++; + } + store_record(table, s->default_values); + p->recinfo= to_recinfo; + + // TODO switch from Aria to Memory if all blobs were optimized away? + if (instantiate_tmp_table(table, p->keyinfo, p->start_recinfo, &p->recinfo, + table_list->select_lex->options | thd->variables.option_bits)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + This is the optimizer part of get_schema_tables_result(). +*/ + +bool optimize_schema_tables_reads(JOIN *join) +{ + THD *thd= join->thd; + DBUG_ENTER("optimize_schema_tables_reads"); + + JOIN_TAB *tab; + for (tab= first_linear_tab(join, WITHOUT_BUSH_ROOTS, WITH_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (!tab->table || !tab->table->pos_in_table_list) + continue; + + TABLE_LIST *table_list= tab->table->pos_in_table_list; + if (table_list->schema_table && thd->fill_information_schema_tables()) + { + /* A value of 0 indicates a dummy implementation */ + if (table_list->schema_table->fill_table == 0) + continue; + + /* skip I_S optimizations specific to get_all_tables */ + if (table_list->schema_table->fill_table != get_all_tables) + continue; + + Item *cond= tab->select_cond; + if (tab->cache_select && tab->cache_select->cond) + { + /* + If join buffering is used, we should use the condition that is + attached to the join cache. Cache condition has a part of WHERE that + can be checked when we're populating this table. + join_tab->select_cond is of no interest, because it only has + conditions that depend on both this table and previous tables in the + join order. + */ + cond= tab->cache_select->cond; + } + if (optimize_for_get_all_tables(thd, table_list, cond)) + DBUG_RETURN(1); // Handle OOM + } + } + DBUG_RETURN(0); +} + + +/* + Fill temporary schema tables before SELECT + + SYNOPSIS + get_schema_tables_result() + join join which use schema tables + executed_place place where I_S table processed + + SEE ALSO + The optimization part is done by get_schema_tables_result(). This function + is run on query execution. + + RETURN + FALSE success + TRUE error +*/ + +bool get_schema_tables_result(JOIN *join, + enum enum_schema_table_state executed_place) +{ + THD *thd= join->thd; + LEX *lex= thd->lex; + bool result= 0; + PSI_stage_info org_stage; + DBUG_ENTER("get_schema_tables_result"); + + Warnings_only_error_handler err_handler; + thd->push_internal_handler(&err_handler); + thd->backup_stage(&org_stage); + THD_STAGE_INFO(thd, stage_filling_schema_table); + + JOIN_TAB *tab; + for (tab= first_linear_tab(join, WITHOUT_BUSH_ROOTS, WITH_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (!tab->table || !tab->table->pos_in_table_list) + break; + + TABLE_LIST *table_list= tab->table->pos_in_table_list; + if (table_list->schema_table && thd->fill_information_schema_tables()) + { + /* + I_S tables only need to be re-populated if make_cond_for_info_schema() + preserves outer fields + */ + bool is_subselect= &lex->unit != lex->current_select->master_unit() && + lex->current_select->master_unit()->item && + tab->select_cond && + tab->select_cond->used_tables() & OUTER_REF_TABLE_BIT; + + /* A value of 0 indicates a dummy implementation */ + if (table_list->schema_table->fill_table == 0) + continue; + + /* + Do not fill in tables thare are marked as JT_CONST as these will never + be read and they also don't have a tab->read_record.table set! + This can happen with queries like + SELECT * FROM t1 LEFT JOIN (t1 AS t1b JOIN INFORMATION_SCHEMA.ROUTINES) + ON (t1b.a IS NULL); + */ + if (tab->type == JT_CONST) + continue; + + /* skip I_S optimizations specific to get_all_tables */ + if (lex->describe && + (table_list->schema_table->fill_table != get_all_tables)) + continue; + + /* + If schema table is already processed and the statement is not a + subselect then we don't need to fill this table again. If schema table + is already processed and schema_table_state != executed_place then + table is already processed and we should skip second data processing. + */ + if (table_list->schema_table_state && + (!is_subselect || table_list->schema_table_state != executed_place)) + continue; + + /* + if table is used in a subselect and + table has been processed earlier with the same + 'executed_place' value then we should refresh the table. + */ + if (table_list->schema_table_state && is_subselect) + { + table_list->table->file->extra(HA_EXTRA_NO_CACHE); + table_list->table->file->extra(HA_EXTRA_RESET_STATE); + table_list->table->file->ha_delete_all_rows(); + table_list->table->null_row= 0; + } + else + table_list->table->file->stats.records= 0; + + Item *cond= tab->select_cond; + if (tab->cache_select && tab->cache_select->cond) + { + /* + If join buffering is used, we should use the condition that is + attached to the join cache. Cache condition has a part of WHERE that + can be checked when we're populating this table. + join_tab->select_cond is of no interest, because it only has + conditions that depend on both this table and previous tables in the + join order. + */ + cond= tab->cache_select->cond; + } + + Switch_to_definer_security_ctx backup_ctx(thd, table_list); + Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE); + if (table_list->schema_table->fill_table(thd, table_list, cond)) + { + result= 1; + join->error= 1; + tab->read_record.table->file= table_list->table->file; + table_list->schema_table_state= executed_place; + break; + } + tab->read_record.table->file= table_list->table->file; + table_list->schema_table_state= executed_place; + } + } + thd->pop_internal_handler(); + if (unlikely(thd->is_error())) + { + /* + This hack is here, because I_S code uses thd->clear_error() a lot. + Which means, a Warnings_only_error_handler cannot handle the error + corectly as it does not know whether an error is real (e.g. caused + by tab->select_cond->val_int()) or will be cleared later. + Thus it ignores all errors, and the real one (that is, the error + that was not cleared) is pushed now. + + It also means that an audit plugin cannot process the error correctly + either. See also thd->clear_error() + */ + thd->get_stmt_da()->push_warning(thd, thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->get_sqlstate(), + Sql_condition::WARN_LEVEL_ERROR, + thd->get_stmt_da()->message()); + } + else if (result) + my_error(ER_UNKNOWN_ERROR, MYF(0)); + THD_STAGE_INFO(thd, org_stage); + DBUG_RETURN(result); +} + +struct run_hton_fill_schema_table_args +{ + TABLE_LIST *tables; + COND *cond; +}; + +static my_bool run_hton_fill_schema_table(THD *thd, plugin_ref plugin, + void *arg) +{ + struct run_hton_fill_schema_table_args *args= + (run_hton_fill_schema_table_args *) arg; + handlerton *hton= plugin_hton(plugin); + if (hton->fill_is_table) + hton->fill_is_table(hton, thd, args->tables, args->cond, + get_schema_table_idx(args->tables->schema_table)); + return false; +} + +int hton_fill_schema_table(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("hton_fill_schema_table"); + + struct run_hton_fill_schema_table_args args; + args.tables= tables; + args.cond= cond; + + plugin_foreach(thd, run_hton_fill_schema_table, + MYSQL_STORAGE_ENGINE_PLUGIN, &args); + + DBUG_RETURN(0); +} + + +static +int store_key_cache_table_record(THD *thd, TABLE *table, + const char *name, size_t name_length, + KEY_CACHE *key_cache, + uint partitions, uint partition_no) +{ + KEY_CACHE_STATISTICS keycache_stats; + uint err; + DBUG_ENTER("store_key_cache_table_record"); + + get_key_cache_statistics(key_cache, partition_no, &keycache_stats); + + if (!key_cache->key_cache_inited || keycache_stats.mem_size == 0) + DBUG_RETURN(0); + + restore_record(table, s->default_values); + table->field[0]->store(name, name_length, system_charset_info); + if (partitions == 0) + table->field[1]->set_null(); + else + { + table->field[1]->set_notnull(); + table->field[1]->store((long) partitions, TRUE); + } + + if (partition_no == 0) + table->field[2]->set_null(); + else + { + table->field[2]->set_notnull(); + table->field[2]->store((long) partition_no, TRUE); + } + table->field[3]->store(keycache_stats.mem_size, TRUE); + table->field[4]->store(keycache_stats.block_size, TRUE); + table->field[5]->store(keycache_stats.blocks_used, TRUE); + table->field[6]->store(keycache_stats.blocks_unused, TRUE); + table->field[7]->store(keycache_stats.blocks_changed, TRUE); + table->field[8]->store(keycache_stats.read_requests, TRUE); + table->field[9]->store(keycache_stats.reads, TRUE); + table->field[10]->store(keycache_stats.write_requests, TRUE); + table->field[11]->store(keycache_stats.writes, TRUE); + + err= schema_table_store_record(thd, table); + DBUG_RETURN(err); +} + +int run_fill_key_cache_tables(const char *name, KEY_CACHE *key_cache, void *p) +{ + DBUG_ENTER("run_fill_key_cache_tables"); + + if (!key_cache->key_cache_inited) + DBUG_RETURN(0); + + TABLE *table= (TABLE *)p; + THD *thd= table->in_use; + uint partitions= key_cache->partitions; + size_t namelen= strlen(name); + DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS); + + if (partitions) + { + for (uint i= 0; i < partitions; i++) + { + if (store_key_cache_table_record(thd, table, name, namelen, + key_cache, partitions, i+1)) + DBUG_RETURN(1); + } + } + + if (store_key_cache_table_record(thd, table, name, namelen, + key_cache, partitions, 0)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + +int fill_key_cache_tables(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_key_cache_tables"); + + int res= process_key_caches(run_fill_key_cache_tables, tables->table); + + DBUG_RETURN(res); +} + + +namespace Show { + +ST_FIELD_INFO schema_fields_info[]= +{ + Column("CATALOG_NAME", Catalog(), NOT_NULL), + Column("SCHEMA_NAME", Name(), NOT_NULL, "Database"), + Column("DEFAULT_CHARACTER_SET_NAME", CSName(), NOT_NULL), + Column("DEFAULT_COLLATION_NAME", CLName(), NOT_NULL), + Column("SQL_PATH", Varchar(FN_REFLEN), NULLABLE), + Column("SCHEMA_COMMENT", Varchar(DATABASE_COMMENT_MAXLEN), NOT_NULL), + CEnd() +}; + + +ST_FIELD_INFO tables_fields_info[]= +{ + Column("TABLE_CATALOG", Catalog(), NOT_NULL), + Column("TABLE_SCHEMA", Name(), NOT_NULL), + Column("TABLE_NAME", Name(), NOT_NULL, "Name"), + Column("TABLE_TYPE", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("ENGINE", Name(), NULLABLE, "Engine", OPEN_FRM_ONLY), + Column("VERSION", ULonglong(), NULLABLE, "Version", OPEN_FRM_ONLY), + Column("ROW_FORMAT", Varchar(10), NULLABLE, "Row_format", OPEN_FULL_TABLE), + Column("TABLE_ROWS", ULonglong(), NULLABLE, "Rows", OPEN_FULL_TABLE), + Column("AVG_ROW_LENGTH", ULonglong(), NULLABLE, "Avg_row_length", + OPEN_FULL_TABLE), + Column("DATA_LENGTH", ULonglong(), NULLABLE, "Data_length",OPEN_FULL_TABLE), + Column("MAX_DATA_LENGTH", ULonglong(), NULLABLE, "Max_data_length", + OPEN_FULL_TABLE), + Column("INDEX_LENGTH", ULonglong(), NULLABLE, "Index_length",OPEN_FULL_TABLE), + Column("DATA_FREE", ULonglong(), NULLABLE, "Data_free", OPEN_FULL_TABLE), + Column("AUTO_INCREMENT", ULonglong(), NULLABLE, "Auto_increment", + OPEN_FULL_TABLE), + Column("CREATE_TIME", Datetime(0), NULLABLE, "Create_time",OPEN_FULL_TABLE), + Column("UPDATE_TIME", Datetime(0), NULLABLE, "Update_time",OPEN_FULL_TABLE), + Column("CHECK_TIME", Datetime(0), NULLABLE, "Check_time", OPEN_FULL_TABLE), + Column("TABLE_COLLATION", CLName(), NULLABLE, "Collation", OPEN_FRM_ONLY), + Column("CHECKSUM", ULonglong(), NULLABLE, "Checksum", OPEN_FULL_TABLE), + Column("CREATE_OPTIONS", Varchar(2048),NULLABLE, "Create_options", + OPEN_FULL_TABLE), + Column("TABLE_COMMENT", Varchar(TABLE_COMMENT_MAXLEN), + NOT_NULL, "Comment", OPEN_FRM_ONLY), + Column("MAX_INDEX_LENGTH",ULonglong(), NULLABLE, "Max_index_length", + OPEN_FULL_TABLE), + Column("TEMPORARY", Varchar(1), NULLABLE, "Temporary", OPEN_FRM_ONLY), + CEnd() +}; + + +ST_FIELD_INFO columns_fields_info[]= +{ + Column("TABLE_CATALOG", Catalog(), NOT_NULL, OPEN_FRM_ONLY), + Column("TABLE_SCHEMA", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("COLUMN_NAME", Name(), NOT_NULL, "Field", OPEN_FRM_ONLY), + Column("ORDINAL_POSITION", ULonglong(), NOT_NULL, OPEN_FRM_ONLY), + Column("COLUMN_DEFAULT", Longtext(MAX_FIELD_VARCHARLENGTH), + NULLABLE, "Default",OPEN_FRM_ONLY), + Column("IS_NULLABLE", Yes_or_empty(), NOT_NULL, "Null", OPEN_FRM_ONLY), + Column("DATA_TYPE", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("CHARACTER_MAXIMUM_LENGTH",ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("CHARACTER_OCTET_LENGTH", ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("NUMERIC_PRECISION", ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("NUMERIC_SCALE", ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("DATETIME_PRECISION", ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("CHARACTER_SET_NAME", CSName(), NULLABLE, OPEN_FRM_ONLY), + Column("COLLATION_NAME", CLName(), NULLABLE, "Collation", OPEN_FRM_ONLY), + Column("COLUMN_TYPE", Longtext(65535), NOT_NULL, "Type", OPEN_FRM_ONLY), + Column("COLUMN_KEY", Varchar(3), NOT_NULL, "Key", OPEN_FRM_ONLY), + Column("EXTRA", Varchar(80), NOT_NULL, "Extra", OPEN_FRM_ONLY), + Column("PRIVILEGES", Varchar(80), NOT_NULL, "Privileges", OPEN_FRM_ONLY), + Column("COLUMN_COMMENT", Varchar(COLUMN_COMMENT_MAXLEN), NOT_NULL, "Comment", + OPEN_FRM_ONLY), + Column("IS_GENERATED", Varchar(6), NOT_NULL, OPEN_FRM_ONLY), + Column("GENERATION_EXPRESSION", Longtext(MAX_FIELD_VARCHARLENGTH), + NULLABLE, OPEN_FRM_ONLY), + CEnd() +}; + + +ST_FIELD_INFO charsets_fields_info[]= +{ + Column("CHARACTER_SET_NAME", CSName(), NOT_NULL, "Charset"), + Column("DEFAULT_COLLATE_NAME", CLName(), NOT_NULL, "Default collation"), + Column("DESCRIPTION", Varchar(60), NOT_NULL, "Description"), + Column("MAXLEN", SLonglong(3), NOT_NULL, "Maxlen"), + CEnd() +}; + + +ST_FIELD_INFO collation_fields_info[]= +{ + Column("COLLATION_NAME", CLName(), NOT_NULL, "Collation"), + Column("CHARACTER_SET_NAME", CSName(), NULLABLE, "Charset"), + Column("ID", SLonglong(MY_INT32_NUM_DECIMAL_DIGITS), NULLABLE, "Id"), + Column("IS_DEFAULT", Yes_or_empty(), NULLABLE, "Default"), + Column("IS_COMPILED", Yes_or_empty(), NOT_NULL, "Compiled"), + Column("SORTLEN", SLonglong(3), NOT_NULL, "Sortlen"), + CEnd() +}; + + +ST_FIELD_INFO applicable_roles_fields_info[]= +{ + Column("GRANTEE", Userhost(), NOT_NULL), + Column("ROLE_NAME", Varchar(USERNAME_CHAR_LENGTH), NOT_NULL), + Column("IS_GRANTABLE", Yes_or_empty(), NOT_NULL), + Column("IS_DEFAULT", Yes_or_empty(), NULLABLE), + CEnd() +}; + + +ST_FIELD_INFO enabled_roles_fields_info[]= +{ + Column("ROLE_NAME", Varchar(USERNAME_CHAR_LENGTH), NULLABLE), + CEnd() +}; + +ST_FIELD_INFO keywords_field_info[]= +{ + Column("WORD", Varchar(KEYWORD_SIZE), NULLABLE), + CEnd() +}; + +ST_FIELD_INFO sql_functions_field_info[]= +{ + Column("FUNCTION", Varchar(KEYWORD_SIZE), NULLABLE), + CEnd() +}; + + +ST_FIELD_INFO engines_fields_info[]= +{ + Column("ENGINE", Varchar(64), NOT_NULL, "Engine"), + Column("SUPPORT", Varchar(8), NOT_NULL, "Support"), + Column("COMMENT", Varchar(160), NOT_NULL, "Comment"), + Column("TRANSACTIONS", Varchar(3), NULLABLE, "Transactions"), + Column("XA", Varchar(3), NULLABLE, "XA"), + Column("SAVEPOINTS", Varchar(3), NULLABLE, "Savepoints"), + CEnd() +}; + + +ST_FIELD_INFO events_fields_info[]= +{ + // QQ: shouldn't EVENT_CATALOG be Catalog() like in all other places? + Column("EVENT_CATALOG", Name(), NOT_NULL), + Column("EVENT_SCHEMA", Name(), NOT_NULL, "Db"), + Column("EVENT_NAME", Name(), NOT_NULL, "Name"), + Column("DEFINER", Definer(), NOT_NULL, "Definer"), + Column("TIME_ZONE", Varchar(64), NOT_NULL, "Time zone"), + Column("EVENT_BODY", Varchar(8), NOT_NULL), + Column("EVENT_DEFINITION", Longtext(65535), NOT_NULL), + Column("EVENT_TYPE", Varchar(9), NOT_NULL, "Type"), + Column("EXECUTE_AT", Datetime(0), NULLABLE, "Execute at"), + Column("INTERVAL_VALUE", Varchar(256),NULLABLE, "Interval value"), + Column("INTERVAL_FIELD", Varchar(18), NULLABLE, "Interval field"), + Column("SQL_MODE", SQLMode(), NOT_NULL), + Column("STARTS", Datetime(0), NULLABLE, "Starts"), + Column("ENDS", Datetime(0), NULLABLE, "Ends"), + Column("STATUS", Varchar(18), NOT_NULL, "Status"), + Column("ON_COMPLETION", Varchar(12), NOT_NULL), + Column("CREATED", Datetime(0), NOT_NULL), + Column("LAST_ALTERED", Datetime(0), NOT_NULL), + Column("LAST_EXECUTED", Datetime(0), NULLABLE), + Column("EVENT_COMMENT", Name(), NOT_NULL), + Column("ORIGINATOR", SLonglong(10),NOT_NULL,"Originator"), + Column("CHARACTER_SET_CLIENT", CSName(), NOT_NULL, "character_set_client"), + Column("COLLATION_CONNECTION", CLName(), NOT_NULL, "collation_connection"), + Column("DATABASE_COLLATION", CLName(), NOT_NULL, "Database Collation"), + CEnd() +}; + + + +ST_FIELD_INFO coll_charset_app_fields_info[]= +{ + Column("COLLATION_NAME", CLName(), NOT_NULL), + Column("CHARACTER_SET_NAME", CSName(), NOT_NULL), + Column("FULL_COLLATION_NAME",CLName(), NOT_NULL), + Column("ID", SLonglong(MY_INT32_NUM_DECIMAL_DIGITS), NOT_NULL), + Column("IS_DEFAULT", Yes_or_empty(), NOT_NULL), + CEnd() +}; + + +ST_FIELD_INFO proc_fields_info[]= +{ + Column("SPECIFIC_NAME", Name(), NOT_NULL), + Column("ROUTINE_CATALOG", Catalog(), NOT_NULL), + Column("ROUTINE_SCHEMA", Name(), NOT_NULL, "Db"), + Column("ROUTINE_NAME", Name(), NOT_NULL, "Name"), + Column("ROUTINE_TYPE", Varchar(13),NOT_NULL, "Type"), + Column("DATA_TYPE", Name(), NOT_NULL), + Column("CHARACTER_MAXIMUM_LENGTH",SLong(21), NULLABLE), + Column("CHARACTER_OCTET_LENGTH", SLong(21), NULLABLE), + Column("NUMERIC_PRECISION", SLong(21), NULLABLE), + Column("NUMERIC_SCALE", SLong(21), NULLABLE), + Column("DATETIME_PRECISION", ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("CHARACTER_SET_NAME", Varchar(64),NULLABLE), + Column("COLLATION_NAME", Varchar(64),NULLABLE), + Column("DTD_IDENTIFIER", Longtext(65535), NULLABLE), + Column("ROUTINE_BODY", Varchar(8), NOT_NULL), + Column("ROUTINE_DEFINITION", Longtext(65535), NULLABLE), + Column("EXTERNAL_NAME", Name(), NULLABLE), + Column("EXTERNAL_LANGUAGE", Name(), NULLABLE), + Column("PARAMETER_STYLE", Varchar(8), NOT_NULL), + Column("IS_DETERMINISTIC", Varchar(3), NOT_NULL), + Column("SQL_DATA_ACCESS", Name(), NOT_NULL), + Column("SQL_PATH", Name(), NULLABLE), + Column("SECURITY_TYPE", Varchar(7), NOT_NULL, "Security_type"), + Column("CREATED", Datetime(0), NOT_NULL, "Created"), + Column("LAST_ALTERED", Datetime(0), NOT_NULL, "Modified"), + Column("SQL_MODE", SQLMode(), NOT_NULL), + Column("ROUTINE_COMMENT", Longtext(65535), NOT_NULL, "Comment"), + Column("DEFINER", Definer(), NOT_NULL, "Definer"), + Column("CHARACTER_SET_CLIENT", CSName(), NOT_NULL, "character_set_client"), + Column("COLLATION_CONNECTION", CLName(), NOT_NULL, "collation_connection"), + Column("DATABASE_COLLATION", CLName(), NOT_NULL, "Database Collation"), + CEnd() +}; + + +ST_FIELD_INFO stat_fields_info[]= +{ + Column("TABLE_CATALOG", Catalog(), NOT_NULL, OPEN_FRM_ONLY), + Column("TABLE_SCHEMA", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("TABLE_NAME", Name(), NOT_NULL, "Table", OPEN_FRM_ONLY), + Column("NON_UNIQUE", SLonglong(1),NOT_NULL, "Non_unique", OPEN_FRM_ONLY), + Column("INDEX_SCHEMA", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("INDEX_NAME", Name(), NOT_NULL, "Key_name", OPEN_FRM_ONLY), + Column("SEQ_IN_INDEX", SLonglong(2),NOT_NULL, "Seq_in_index",OPEN_FRM_ONLY), + Column("COLUMN_NAME", Name(), NOT_NULL, "Column_name", OPEN_FRM_ONLY), + Column("COLLATION", Varchar(1), NULLABLE, "Collation", OPEN_FULL_TABLE), + Column("CARDINALITY", SLonglong(), NULLABLE, "Cardinality", OPEN_FULL_TABLE), + Column("SUB_PART", SLonglong(3),NULLABLE, "Sub_part", OPEN_FRM_ONLY), + Column("PACKED", Varchar(10), NULLABLE, "Packed", OPEN_FRM_ONLY), + Column("NULLABLE", Varchar(3), NOT_NULL, "Null", OPEN_FRM_ONLY), + Column("INDEX_TYPE", Varchar(16), NOT_NULL, "Index_type", OPEN_FULL_TABLE), + Column("COMMENT", Varchar(16), NULLABLE, "Comment", OPEN_FRM_ONLY), + Column("INDEX_COMMENT", Varchar(INDEX_COMMENT_MAXLEN), + NOT_NULL, "Index_comment",OPEN_FRM_ONLY), + Column("IGNORED", Varchar(3), NOT_NULL, "Ignored", OPEN_FRM_ONLY), + CEnd() +}; + + +ST_FIELD_INFO view_fields_info[]= +{ + Column("TABLE_CATALOG", Catalog(), NOT_NULL, OPEN_FRM_ONLY), + Column("TABLE_SCHEMA", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("VIEW_DEFINITION", Longtext(65535), NOT_NULL, OPEN_FRM_ONLY), + Column("CHECK_OPTION", Varchar(8), NOT_NULL, OPEN_FRM_ONLY), + Column("IS_UPDATABLE", Yes_or_empty(), NOT_NULL, OPEN_FULL_TABLE), + Column("DEFINER", Definer(), NOT_NULL, OPEN_FRM_ONLY), + Column("SECURITY_TYPE", Varchar(7), NOT_NULL, OPEN_FRM_ONLY), + Column("CHARACTER_SET_CLIENT", CSName(), NOT_NULL, OPEN_FRM_ONLY), + Column("COLLATION_CONNECTION", CLName(), NOT_NULL, OPEN_FRM_ONLY), + Column("ALGORITHM", Varchar(10),NOT_NULL, OPEN_FRM_ONLY), + CEnd() +}; + + +ST_FIELD_INFO user_privileges_fields_info[]= +{ + Column("GRANTEE", Userhost(), NOT_NULL), + Column("TABLE_CATALOG", Catalog(), NOT_NULL), + Column("PRIVILEGE_TYPE", Name(), NOT_NULL), + Column("IS_GRANTABLE", Yes_or_empty(), NOT_NULL), + CEnd() +}; + + +ST_FIELD_INFO schema_privileges_fields_info[]= +{ + Column("GRANTEE", Userhost(), NOT_NULL), + Column("TABLE_CATALOG", Catalog(), NOT_NULL), + Column("TABLE_SCHEMA", Name(), NOT_NULL), + Column("PRIVILEGE_TYPE", Name(), NOT_NULL), + Column("IS_GRANTABLE", Yes_or_empty(), NOT_NULL), + CEnd() +}; + + +ST_FIELD_INFO table_privileges_fields_info[]= +{ + Column("GRANTEE", Userhost(), NOT_NULL), + Column("TABLE_CATALOG", Catalog(), NOT_NULL), + Column("TABLE_SCHEMA", Name(), NOT_NULL), + Column("TABLE_NAME", Name(), NOT_NULL), + Column("PRIVILEGE_TYPE", Name(), NOT_NULL), + Column("IS_GRANTABLE", Yes_or_empty(), NOT_NULL), + CEnd() +}; + + +ST_FIELD_INFO column_privileges_fields_info[]= +{ + Column("GRANTEE", Userhost(), NOT_NULL), + Column("TABLE_CATALOG", Catalog(), NOT_NULL), + Column("TABLE_SCHEMA", Name(), NOT_NULL), + Column("TABLE_NAME", Name(), NOT_NULL), + Column("COLUMN_NAME", Name(), NOT_NULL), + Column("PRIVILEGE_TYPE", Name(), NOT_NULL), + Column("IS_GRANTABLE", Yes_or_empty(), NOT_NULL), + CEnd() +}; + + +ST_FIELD_INFO table_constraints_fields_info[]= +{ + Column("CONSTRAINT_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_TYPE", Name(), NOT_NULL, OPEN_FULL_TABLE), + CEnd() +}; + + +ST_FIELD_INFO key_column_usage_fields_info[]= +{ + Column("CONSTRAINT_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("COLUMN_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("ORDINAL_POSITION", SLonglong(10), NOT_NULL, OPEN_FULL_TABLE), + Column("POSITION_IN_UNIQUE_CONSTRAINT", SLonglong(10), NULLABLE, OPEN_FULL_TABLE), + Column("REFERENCED_TABLE_SCHEMA", Name(), NULLABLE, OPEN_FULL_TABLE), + Column("REFERENCED_TABLE_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + Column("REFERENCED_COLUMN_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + CEnd() +}; + + +ST_FIELD_INFO table_names_fields_info[]= +{ + Column("TABLE_CATALOG", Catalog(), NOT_NULL), + Column("TABLE_SCHEMA", Name(), NOT_NULL), + Column("TABLE_NAME", Varchar(NAME_CHAR_LEN + MYSQL50_TABLE_NAME_PREFIX_LENGTH), + NOT_NULL, "Tables_in_"), + Column("TABLE_TYPE", Name(), NOT_NULL, "Table_type", OPEN_FRM_ONLY), + CEnd() +}; + + +ST_FIELD_INFO open_tables_fields_info[]= +{ + Column("Database", Name(), NOT_NULL, "Database"), + Column("Table", Name(), NOT_NULL, "Table"), + Column("In_use", SLonglong(1), NOT_NULL, "In_use"), + Column("Name_locked", SLonglong(4), NOT_NULL, "Name_locked"), + CEnd() +}; + + +ST_FIELD_INFO triggers_fields_info[]= +{ + Column("TRIGGER_CATALOG", Catalog(), NOT_NULL, OPEN_FRM_ONLY), + Column("TRIGGER_SCHEMA", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("TRIGGER_NAME", Name(), NOT_NULL, "Trigger", OPEN_FRM_ONLY), + Column("EVENT_MANIPULATION", Varchar(6), NOT_NULL, "Event", OPEN_FRM_ONLY), + Column("EVENT_OBJECT_CATALOG", Catalog(), NOT_NULL, OPEN_FRM_ONLY), + Column("EVENT_OBJECT_SCHEMA", Name(), NOT_NULL, OPEN_FRM_ONLY), + Column("EVENT_OBJECT_TABLE", Name(), NOT_NULL, "Table", OPEN_FRM_ONLY), + Column("ACTION_ORDER", SLonglong(4), NOT_NULL, OPEN_FRM_ONLY), + Column("ACTION_CONDITION", Longtext(65535), NULLABLE, OPEN_FRM_ONLY), + Column("ACTION_STATEMENT", Longtext(65535), NOT_NULL, "Statement",OPEN_FRM_ONLY), + Column("ACTION_ORIENTATION", Varchar(9), NOT_NULL, OPEN_FRM_ONLY), + Column("ACTION_TIMING", Varchar(6), NOT_NULL, "Timing", OPEN_FRM_ONLY), + Column("ACTION_REFERENCE_OLD_TABLE",Name(), NULLABLE, OPEN_FRM_ONLY), + Column("ACTION_REFERENCE_NEW_TABLE",Name(), NULLABLE, OPEN_FRM_ONLY), + Column("ACTION_REFERENCE_OLD_ROW",Varchar(3),NOT_NULL, OPEN_FRM_ONLY), + Column("ACTION_REFERENCE_NEW_ROW",Varchar(3),NOT_NULL, OPEN_FRM_ONLY), + /* 2 here indicates 2 decimals */ + Column("CREATED", Datetime(2), NULLABLE, "Created", OPEN_FRM_ONLY), + Column("SQL_MODE", SQLMode(), NOT_NULL, "sql_mode", OPEN_FRM_ONLY), + Column("DEFINER", Definer(), NOT_NULL, "Definer", OPEN_FRM_ONLY), + Column("CHARACTER_SET_CLIENT", CSName(), NOT_NULL, "character_set_client", + OPEN_FRM_ONLY), + Column("COLLATION_CONNECTION", CLName(), NOT_NULL, "collation_connection", + OPEN_FRM_ONLY), + Column("DATABASE_COLLATION", CLName(), NOT_NULL, "Database Collation", + OPEN_FRM_ONLY), + CEnd() +}; + + +ST_FIELD_INFO partitions_fields_info[]= +{ + Column("TABLE_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("PARTITION_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + Column("SUBPARTITION_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + Column("PARTITION_ORDINAL_POSITION", ULonglong(), NULLABLE, OPEN_FULL_TABLE), + Column("SUBPARTITION_ORDINAL_POSITION",ULonglong(),NULLABLE, OPEN_FULL_TABLE), + Column("PARTITION_METHOD", Varchar(18), NULLABLE, OPEN_FULL_TABLE), + Column("SUBPARTITION_METHOD", Varchar(12), NULLABLE, OPEN_FULL_TABLE), + Column("PARTITION_EXPRESSION", Longtext(65535), NULLABLE, OPEN_FULL_TABLE), + Column("SUBPARTITION_EXPRESSION", Longtext(65535), NULLABLE, OPEN_FULL_TABLE), + Column("PARTITION_DESCRIPTION", Longtext(65535), NULLABLE, OPEN_FULL_TABLE), + Column("TABLE_ROWS", ULonglong(), NOT_NULL, OPEN_FULL_TABLE), + Column("AVG_ROW_LENGTH", ULonglong(), NOT_NULL, OPEN_FULL_TABLE), + Column("DATA_LENGTH", ULonglong(), NOT_NULL, OPEN_FULL_TABLE), + Column("MAX_DATA_LENGTH", ULonglong(), NULLABLE, OPEN_FULL_TABLE), + Column("INDEX_LENGTH", ULonglong(), NOT_NULL, OPEN_FULL_TABLE), + Column("DATA_FREE", ULonglong(), NOT_NULL, OPEN_FULL_TABLE), + Column("CREATE_TIME", Datetime(0), NULLABLE, OPEN_FULL_TABLE), + Column("UPDATE_TIME", Datetime(0), NULLABLE, OPEN_FULL_TABLE), + Column("CHECK_TIME", Datetime(0), NULLABLE, OPEN_FULL_TABLE), + Column("CHECKSUM", ULonglong(), NULLABLE, OPEN_FULL_TABLE), + Column("PARTITION_COMMENT", Varchar(80), NOT_NULL, OPEN_FULL_TABLE), + Column("NODEGROUP", Varchar(12), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLESPACE_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + CEnd() +}; + + +ST_FIELD_INFO variables_fields_info[]= +{ + Column("VARIABLE_NAME", Varchar(64), NOT_NULL, "Variable_name"), + Column("VARIABLE_VALUE", Varchar(2048), NOT_NULL, "Value"), + CEnd() +}; + + +ST_FIELD_INFO sysvars_fields_info[]= +{ + Column("VARIABLE_NAME", Name(), NOT_NULL), + Column("SESSION_VALUE", Varchar(2048), NULLABLE), + Column("GLOBAL_VALUE", Varchar(2048), NULLABLE), + Column("GLOBAL_VALUE_ORIGIN", Name(), NOT_NULL), + Column("DEFAULT_VALUE", Varchar(2048), NULLABLE), + Column("VARIABLE_SCOPE", Name(), NOT_NULL), + Column("VARIABLE_TYPE", Name(), NOT_NULL), + Column("VARIABLE_COMMENT", Varchar(TABLE_COMMENT_MAXLEN), NOT_NULL), + Column("NUMERIC_MIN_VALUE", Varchar(MY_INT64_NUM_DECIMAL_DIGITS), NULLABLE), + Column("NUMERIC_MAX_VALUE", Varchar(MY_INT64_NUM_DECIMAL_DIGITS), NULLABLE), + Column("NUMERIC_BLOCK_SIZE", Varchar(MY_INT64_NUM_DECIMAL_DIGITS), NULLABLE), + Column("ENUM_VALUE_LIST", Longtext(65535), NULLABLE), + Column("READ_ONLY", Yes_or_empty(), NOT_NULL), + Column("COMMAND_LINE_ARGUMENT",Name(), NULLABLE), + Column("GLOBAL_VALUE_PATH", Varchar(2048), NULLABLE), + CEnd() +}; + + +ST_FIELD_INFO processlist_fields_info[]= +{ + Column("ID", SLonglong(4), NOT_NULL, "Id"), + Column("USER", Varchar(USERNAME_CHAR_LENGTH), NOT_NULL, "User"), + Column("HOST", Varchar(LIST_PROCESS_HOST_LEN),NOT_NULL, "Host"), + Column("DB", Name(), NULLABLE, "Db"), + Column("COMMAND", Varchar(16), NOT_NULL, "Command"), + Column("TIME", SLong(7), NOT_NULL, "Time"), + Column("STATE", Varchar(64), NULLABLE, "State"), + Column("INFO", Longtext(PROCESS_LIST_INFO_WIDTH), + NULLABLE, "Info"), + Column("TIME_MS", Decimal(100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3), + NOT_NULL, "Time_ms"), + Column("STAGE", STiny(2), NOT_NULL, "Stage"), + Column("MAX_STAGE", STiny(2), NOT_NULL, "Max_stage"), + Column("PROGRESS", Decimal(703), NOT_NULL, "Progress"), + Column("MEMORY_USED", SLonglong(7), NOT_NULL, "Memory_used"), + Column("MAX_MEMORY_USED",SLonglong(7), NOT_NULL, "Max_memory_used"), + Column("EXAMINED_ROWS", SLong(7), NOT_NULL, "Examined_rows"), + Column("QUERY_ID", SLonglong(4), NOT_NULL), + Column("INFO_BINARY",Blob(PROCESS_LIST_INFO_WIDTH),NULLABLE, "Info_binary"), + Column("TID", SLonglong(4), NOT_NULL, "Tid"), + CEnd() +}; + + +ST_FIELD_INFO plugin_fields_info[]= +{ + Column("PLUGIN_NAME", Name(), NOT_NULL, "Name"), + Column("PLUGIN_VERSION", Varchar(20), NOT_NULL), + Column("PLUGIN_STATUS", Varchar(16), NOT_NULL, "Status"), + Column("PLUGIN_TYPE", Varchar(80), NOT_NULL, "Type"), + Column("PLUGIN_TYPE_VERSION", Varchar(20), NOT_NULL), + Column("PLUGIN_LIBRARY", Name(), NULLABLE, "Library"), + Column("PLUGIN_LIBRARY_VERSION", Varchar(20), NULLABLE), + Column("PLUGIN_AUTHOR", Name(), NULLABLE), + Column("PLUGIN_DESCRIPTION", Longtext(65535), NULLABLE), + Column("PLUGIN_LICENSE", Varchar(80), NOT_NULL, "License"), + Column("LOAD_OPTION", Varchar(64), NOT_NULL), + Column("PLUGIN_MATURITY", Varchar(12), NOT_NULL), + Column("PLUGIN_AUTH_VERSION", Varchar(80), NULLABLE), + CEnd() +}; + +ST_FIELD_INFO files_fields_info[]= +{ + Column("FILE_ID", SLonglong(4), NOT_NULL), + Column("FILE_NAME", Varchar(FN_REFLEN),NULLABLE), + Column("FILE_TYPE", Varchar(20), NOT_NULL), + Column("TABLESPACE_NAME", Name(), NULLABLE), + Column("TABLE_CATALOG", Name(), NOT_NULL), + Column("TABLE_SCHEMA", Name(), NULLABLE), + Column("TABLE_NAME", Name(), NULLABLE), + Column("LOGFILE_GROUP_NAME", Name(), NULLABLE), + Column("LOGFILE_GROUP_NUMBER",SLonglong(4), NULLABLE), + Column("ENGINE", Name(), NOT_NULL), + Column("FULLTEXT_KEYS", Name(), NULLABLE), + Column("DELETED_ROWS", SLonglong(4), NULLABLE), + Column("UPDATE_COUNT", SLonglong(4), NULLABLE), + Column("FREE_EXTENTS", SLonglong(4), NULLABLE), + Column("TOTAL_EXTENTS", SLonglong(4), NULLABLE), + Column("EXTENT_SIZE", SLonglong(4), NOT_NULL), + Column("INITIAL_SIZE", ULonglong(), NULLABLE), + Column("MAXIMUM_SIZE", ULonglong(), NULLABLE), + Column("AUTOEXTEND_SIZE", ULonglong(), NULLABLE), + Column("CREATION_TIME", Datetime(0), NULLABLE), + Column("LAST_UPDATE_TIME", Datetime(0), NULLABLE), + Column("LAST_ACCESS_TIME", Datetime(0), NULLABLE), + Column("RECOVER_TIME", SLonglong(4), NULLABLE), + Column("TRANSACTION_COUNTER", SLonglong(4), NULLABLE), + Column("VERSION", ULonglong(), NULLABLE, "Version"), + Column("ROW_FORMAT", Varchar(10), NULLABLE, "Row_format"), + Column("TABLE_ROWS", ULonglong(), NULLABLE, "Rows"), + Column("AVG_ROW_LENGTH", ULonglong(), NULLABLE, "Avg_row_length"), + Column("DATA_LENGTH", ULonglong(), NULLABLE, "Data_length"), + Column("MAX_DATA_LENGTH", ULonglong(), NULLABLE, "Max_data_length"), + Column("INDEX_LENGTH", ULonglong(), NULLABLE, "Index_length"), + Column("DATA_FREE", ULonglong(), NULLABLE, "Data_free"), + Column("CREATE_TIME", Datetime(0), NULLABLE, "Create_time"), + Column("UPDATE_TIME", Datetime(0), NULLABLE, "Update_time"), + Column("CHECK_TIME", Datetime(0), NULLABLE, "Check_time"), + Column("CHECKSUM", ULonglong(), NULLABLE, "Checksum"), + Column("STATUS", Varchar(20), NOT_NULL), + Column("EXTRA", Varchar(255), NULLABLE), + CEnd() +}; + +}; // namespace Show + + +void init_fill_schema_files_row(TABLE* table) +{ + int i; + for(i=0; !Show::files_fields_info[i].end_marker(); i++) + table->field[i]->set_null(); + + table->field[IS_FILES_STATUS]->set_notnull(); + table->field[IS_FILES_STATUS]->store("NORMAL", 6, system_charset_info); +} + + +namespace Show { + +ST_FIELD_INFO referential_constraints_fields_info[]= +{ + Column("CONSTRAINT_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("UNIQUE_CONSTRAINT_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("UNIQUE_CONSTRAINT_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("UNIQUE_CONSTRAINT_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + Column("MATCH_OPTION", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("UPDATE_RULE", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("DELETE_RULE", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("REFERENCED_TABLE_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + CEnd() +}; + + +ST_FIELD_INFO parameters_fields_info[]= +{ + Column("SPECIFIC_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("SPECIFIC_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("SPECIFIC_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("ORDINAL_POSITION", SLong(21), NOT_NULL, OPEN_FULL_TABLE), + Column("PARAMETER_MODE", Varchar(5), NULLABLE, OPEN_FULL_TABLE), + Column("PARAMETER_NAME", Name(), NULLABLE, OPEN_FULL_TABLE), + Column("DATA_TYPE", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("CHARACTER_MAXIMUM_LENGTH",SLong(21), NULLABLE, OPEN_FULL_TABLE), + Column("CHARACTER_OCTET_LENGTH", SLong(21), NULLABLE, OPEN_FULL_TABLE), + Column("NUMERIC_PRECISION", SLong(21), NULLABLE, OPEN_FULL_TABLE), + Column("NUMERIC_SCALE", SLong(21), NULLABLE, OPEN_FULL_TABLE), + Column("DATETIME_PRECISION", ULonglong(), NULLABLE, OPEN_FRM_ONLY), + Column("CHARACTER_SET_NAME", Varchar(64), NULLABLE, OPEN_FULL_TABLE), + Column("COLLATION_NAME", Varchar(64), NULLABLE, OPEN_FULL_TABLE), + Column("DTD_IDENTIFIER", Longtext(65535), NOT_NULL, OPEN_FULL_TABLE), + Column("ROUTINE_TYPE", Varchar(9), NOT_NULL, OPEN_FULL_TABLE), + CEnd() +}; + + +ST_FIELD_INFO tablespaces_fields_info[]= +{ + Column("TABLESPACE_NAME", Name(), NOT_NULL), + Column("ENGINE", Name(), NOT_NULL), + Column("TABLESPACE_TYPE", Name(), NULLABLE), + Column("LOGFILE_GROUP_NAME", Name(), NULLABLE), + Column("EXTENT_SIZE", ULonglong(), NULLABLE), + Column("AUTOEXTEND_SIZE", ULonglong(), NULLABLE), + Column("MAXIMUM_SIZE", ULonglong(), NULLABLE), + Column("NODEGROUP_ID", ULonglong(), NULLABLE), + Column("TABLESPACE_COMMENT", Varchar(2048), NULLABLE), + CEnd() +}; + + +ST_FIELD_INFO keycache_fields_info[]= +{ + Column("KEY_CACHE_NAME",Varchar(NAME_LEN),NOT_NULL), + Column("SEGMENTS", ULong(3), NULLABLE), + Column("SEGMENT_NUMBER", ULong(3), NULLABLE), + Column("FULL_SIZE", ULonglong(), NOT_NULL), + Column("BLOCK_SIZE", ULonglong(), NOT_NULL), + Column("USED_BLOCKS", ULonglong(), NOT_NULL, "Key_blocks_used"), + Column("UNUSED_BLOCKS", ULonglong(), NOT_NULL, "Key_blocks_unused"), + Column("DIRTY_BLOCKS", ULonglong(), NOT_NULL, "Key_blocks_not_flushed"), + Column("READ_REQUESTS", ULonglong(), NOT_NULL, "Key_read_requests"), + Column("READS", ULonglong(), NOT_NULL, "Key_reads"), + Column("WRITE_REQUESTS", ULonglong(), NOT_NULL, "Key_write_requests"), + Column("WRITES", ULonglong(), NOT_NULL, "Key_writes"), + CEnd() +}; + + +ST_FIELD_INFO show_explain_tabular_fields_info[]= +{ + Column("id", SLonglong(3), NULLABLE, "id"), + Column("select_type", Varchar(19), NOT_NULL, "select_type"), + Column("table", Name(), NULLABLE, "table"), + Column("type", Varchar(15), NULLABLE, "type"), + Column("possible_keys",Varchar(NAME_CHAR_LEN*MAX_KEY), NULLABLE, "possible_keys"), + Column("key", Varchar(NAME_CHAR_LEN*MAX_KEY), NULLABLE, "key"), + Column("key_len", Varchar(NAME_CHAR_LEN*MAX_KEY), NULLABLE, "key_len"), + Column("ref", Varchar(NAME_CHAR_LEN*MAX_REF_PARTS),NULLABLE, "ref"), + Column("rows", SLonglong(10), NULLABLE, "rows"), + Column("Extra", Varchar(255), NOT_NULL, "Extra"), + CEnd() +}; + + +ST_FIELD_INFO show_explain_json_fields_info[]= +{ + Column("EXPLAIN", Longtext(MAX_FIELD_VARCHARLENGTH), NOT_NULL, "SHOW EXPLAIN"), + CEnd() +}; + + +ST_FIELD_INFO show_analyze_tabular_fields_info[]= +{ + Column("id", SLonglong(3), NULLABLE, "id"), + Column("select_type", Varchar(19), NOT_NULL, "select_type"), + Column("table", Name(), NULLABLE, "table"), + Column("type", Varchar(15), NULLABLE, "type"), + Column("possible_keys",Varchar(NAME_CHAR_LEN*MAX_KEY), NULLABLE, "possible_keys"), + Column("key", Varchar(NAME_CHAR_LEN*MAX_KEY), NULLABLE, "key"), + Column("key_len", Varchar(NAME_CHAR_LEN*MAX_KEY), NULLABLE, "key_len"), + Column("ref", Varchar(NAME_CHAR_LEN*MAX_REF_PARTS),NULLABLE, "ref"), + Column("rows", SLonglong(10), NULLABLE, "rows"), + Column("r_rows", Varchar(NAME_CHAR_LEN), NULLABLE, "r_rows"), + + /* Fields of type DECIMAL(5,2) to represent percentage. + See Show::Type::decimal_precision() and Show::Type::decimal_scale() to learn + how 502 converts to precision and scale (5 and 2)*/ + Column("filtered", Decimal(502), NULLABLE, "filtered"), + Column("r_filtered", Decimal(502), NULLABLE, "r_filtered"), + Column("Extra", Varchar(255), NOT_NULL, "Extra"), + CEnd() +}; + + +ST_FIELD_INFO show_analyze_json_fields_info[]= { + Column("ANALYZE", Longtext(MAX_FIELD_VARCHARLENGTH), NOT_NULL, "SHOW ANALYZE"), + CEnd()}; + + +ST_FIELD_INFO check_constraints_fields_info[]= +{ + Column("CONSTRAINT_CATALOG", Catalog(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_SCHEMA", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("TABLE_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("CONSTRAINT_NAME", Name(), NOT_NULL, OPEN_FULL_TABLE), + Column("LEVEL", Varchar(6),NOT_NULL, OPEN_FULL_TABLE), + Column("CHECK_CLAUSE", Longtext(MAX_FIELD_VARCHARLENGTH), + NOT_NULL, OPEN_FULL_TABLE), + CEnd() +}; + +}; // namespace Show + + +namespace Show { + +/** For creating fields of information_schema.OPTIMIZER_TRACE */ +extern ST_FIELD_INFO optimizer_trace_info[]; + +} //namespace Show + +/* + Description of ST_FIELD_INFO in sql_i_s.h + + Make sure that the order of schema_tables and enum_schema_tables are the same. + +*/ + +ST_SCHEMA_TABLE schema_tables[]= +{ + {"ALL_PLUGINS", Show::plugin_fields_info, 0, + fill_all_plugins, make_old_format, 0, 5, -1, 0, 0}, + {"APPLICABLE_ROLES", Show::applicable_roles_fields_info, 0, + fill_schema_applicable_roles, 0, 0, -1, -1, 0, 0}, + {"CHARACTER_SETS", Show::charsets_fields_info, 0, + fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0}, + {"CHECK_CONSTRAINTS", Show::check_constraints_fields_info, 0, + get_all_tables, 0, + get_check_constraints_record, 1, 2, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, + {"COLLATIONS", Show::collation_fields_info, 0, + fill_schema_collation, make_old_format, 0, -1, -1, 0, 0}, + {"COLLATION_CHARACTER_SET_APPLICABILITY", Show::coll_charset_app_fields_info, + 0, fill_schema_coll_charset_app, 0, 0, -1, -1, 0, 0}, + {"COLUMNS", Show::columns_fields_info, 0, + get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0, + OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL}, + {"COLUMN_PRIVILEGES", Show::column_privileges_fields_info, 0, + fill_schema_column_privileges, 0, 0, -1, -1, 0, 0}, + {"ENABLED_ROLES", Show::enabled_roles_fields_info, 0, + fill_schema_enabled_roles, 0, 0, -1, -1, 0, 0}, + {"ENGINES", Show::engines_fields_info, 0, + fill_schema_engines, make_old_format, 0, -1, -1, 0, 0}, +#ifdef HAVE_EVENT_SCHEDULER + {"EVENTS", Show::events_fields_info, 0, + Events::fill_schema_events, make_old_format, 0, -1, -1, 0, 0}, +#else + {"EVENTS", Show::events_fields_info, 0, + 0, make_old_format, 0, -1, -1, 0, 0}, +#endif + {"EXPLAIN", Show::show_explain_tabular_fields_info, 0, + fill_show_explain_tabular, make_old_format, 0, -1, -1, + TRUE /*hidden*/ , 0}, + {"EXPLAIN_JSON", Show::show_explain_json_fields_info, 0, + fill_show_explain_json, make_old_format, 0, -1, -1, + TRUE /*hidden*/ , 0}, + {"ANALYZE", Show::show_analyze_tabular_fields_info, 0, + fill_show_analyze_tabular, make_old_format, 0, -1, -1, + TRUE /*hidden*/, 0}, + {"ANALYZE_JSON", Show::show_analyze_json_fields_info, 0, + fill_show_analyze_json, make_old_format, 0, -1, -1, + TRUE /*hidden*/, 0}, + {"FILES", Show::files_fields_info, 0, + hton_fill_schema_table, 0, 0, -1, -1, 0, 0}, + {"GLOBAL_STATUS", Show::variables_fields_info, 0, + fill_status, make_old_format, 0, 0, -1, 0, 0}, + {"GLOBAL_VARIABLES", Show::variables_fields_info, 0, + fill_variables, make_old_format, 0, 0, -1, 0, 0}, + {"KEYWORDS", Show::keywords_field_info, 0, + fill_i_s_keywords, 0, 0, -1, -1, 0, 0}, + {"KEY_CACHES", Show::keycache_fields_info, 0, + fill_key_cache_tables, 0, 0, -1,-1, 0, 0}, + {"KEY_COLUMN_USAGE", Show::key_column_usage_fields_info, 0, + get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0, + OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, + {"OPEN_TABLES", Show::open_tables_fields_info, 0, + fill_open_tables, make_old_format, 0, -1, -1, 1, 0}, + {"OPTIMIZER_TRACE", Show::optimizer_trace_info, 0, + fill_optimizer_trace_info, NULL, NULL, -1, -1, false, 0}, + {"PARAMETERS", Show::parameters_fields_info, 0, + fill_schema_proc, 0, 0, 1, 2, 0, 0}, + {"PARTITIONS", Show::partitions_fields_info, 0, + get_all_tables, 0, get_schema_partitions_record, 1, 2, 0, + OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, + {"PLUGINS", Show::plugin_fields_info, 0, + fill_plugins, make_old_format, 0, -1, -1, 0, 0}, + {"PROCESSLIST", Show::processlist_fields_info, 0, + fill_schema_processlist, make_old_format, 0, -1, -1, 0, 0}, + {"PROFILING", Show::query_profile_statistics_info, 0, + fill_query_profile_statistics_info, make_profile_table_for_show, + NULL, -1, -1, false, 0}, + {"REFERENTIAL_CONSTRAINTS", Show::referential_constraints_fields_info, + 0, get_all_tables, 0, get_referential_constraints_record, + 1, 9, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, + {"ROUTINES", Show::proc_fields_info, 0, + fill_schema_proc, make_proc_old_format, 0, 2, 3, 0, 0}, + {"SCHEMATA", Show::schema_fields_info, 0, + fill_schema_schemata, make_schemata_old_format, 0, 1, -1, 0, 0}, + {"SCHEMA_PRIVILEGES", Show::schema_privileges_fields_info, 0, + fill_schema_schema_privileges, 0, 0, -1, -1, 0, 0}, + {"SESSION_STATUS", Show::variables_fields_info, 0, + fill_status, make_old_format, 0, 0, -1, 0, 0}, + {"SESSION_VARIABLES", Show::variables_fields_info, 0, + fill_variables, make_old_format, 0, 0, -1, 0, 0}, + {"STATISTICS", Show::stat_fields_info, 0, + get_all_tables, make_old_format, get_schema_stat_record, 1, 2, 0, + OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE}, + {"SQL_FUNCTIONS", Show::sql_functions_field_info, 0, + fill_i_s_sql_functions, 0, 0, -1, -1, 0, 0}, + {"SYSTEM_VARIABLES", Show::sysvars_fields_info, 0, + fill_sysvars, make_old_format, 0, 0, -1, 0, 0}, + {"TABLES", Show::tables_fields_info, 0, + get_all_tables, make_old_format, get_schema_tables_record, 1, 2, 0, + OPTIMIZE_I_S_TABLE}, + {"TABLESPACES", Show::tablespaces_fields_info, 0, + hton_fill_schema_table, 0, 0, -1, -1, 0, 0}, + {"TABLE_CONSTRAINTS", Show::table_constraints_fields_info, 0, + get_all_tables, 0, get_schema_constraints_record, 3, 4, 0, + OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, + {"TABLE_NAMES", Show::table_names_fields_info, 0, + get_all_tables, make_table_names_old_format, 0, 1, 2, 1, OPTIMIZE_I_S_TABLE}, + {"TABLE_PRIVILEGES", Show::table_privileges_fields_info, 0, + fill_schema_table_privileges, 0, 0, -1, -1, 0, 0}, + {"TRIGGERS", Show::triggers_fields_info, 0, + get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0, + OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE}, + {"USER_PRIVILEGES", Show::user_privileges_fields_info, 0, + fill_schema_user_privileges, 0, 0, -1, -1, 0, 0}, + {"VIEWS", Show::view_fields_info, 0, + get_all_tables, 0, get_schema_views_record, 1, 2, 0, + OPEN_VIEW_ONLY|OPTIMIZE_I_S_TABLE}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +}; + + +int initialize_schema_table(st_plugin_int *plugin) +{ + ST_SCHEMA_TABLE *schema_table; + DBUG_ENTER("initialize_schema_table"); + + if (!(schema_table= (ST_SCHEMA_TABLE *)my_malloc(key_memory_ST_SCHEMA_TABLE, + sizeof(ST_SCHEMA_TABLE), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(1); + /* Historical Requirement */ + plugin->data= schema_table; // shortcut for the future + if (plugin->plugin->init) + { + schema_table->idx_field1= -1, + schema_table->idx_field2= -1; + + /* Make the name available to the init() function. */ + schema_table->table_name= plugin->name.str; + + if (plugin->plugin->init(schema_table)) + { + sql_print_error("Plugin '%s' init function returned error.", + plugin->name.str); + plugin->data= NULL; + my_free(schema_table); + DBUG_RETURN(1); + } + + if (!schema_table->old_format) + for (ST_FIELD_INFO *f= schema_table->fields_info; !f->end_marker(); f++) + if (f->old_name().str && f->old_name().str[0]) + { + schema_table->old_format= make_old_format; + break; + } + + /* Make sure the plugin name is not set inside the init() function. */ + schema_table->table_name= plugin->name.str; + } + DBUG_RETURN(0); +} + +int finalize_schema_table(st_plugin_int *plugin) +{ + int deinit_status= 0; + ST_SCHEMA_TABLE *schema_table= (ST_SCHEMA_TABLE *)plugin->data; + DBUG_ENTER("finalize_schema_table"); + + if (schema_table) + { + if (plugin->plugin->deinit) + deinit_status= plugin->plugin->deinit(NULL); + my_free(schema_table); + } + DBUG_RETURN(deinit_status); +} + + +/** + Output trigger information (SHOW CREATE TRIGGER) to the client. + + @param thd Thread context. + @param trigger Trigger to dump + + @return Operation status + @retval TRUE Error. + @retval FALSE Success. +*/ + +static bool show_create_trigger_impl(THD *thd, Trigger *trigger) +{ + int ret_code; + Protocol *p= thd->protocol; + List fields; + LEX_CSTRING trg_sql_mode_str, trg_body; + LEX_CSTRING trg_sql_original_stmt; + LEX_STRING trg_definer; + CHARSET_INFO *trg_client_cs; + MEM_ROOT *mem_root= thd->mem_root; + char definer_holder[USER_HOST_BUFF_SIZE]; + trg_definer.str= definer_holder; + + /* + TODO: Check privileges here. This functionality will be added by + implementation of the following WL items: + - WL#2227: New privileges for new objects + - WL#3482: Protect SHOW CREATE PROCEDURE | FUNCTION | VIEW | TRIGGER + properly + + SHOW TRIGGERS and I_S.TRIGGERS will be affected too. + */ + + /* Prepare trigger "object". */ + + trigger->get_trigger_info(&trg_sql_original_stmt, &trg_body, &trg_definer); + sql_mode_string_representation(thd, trigger->sql_mode, &trg_sql_mode_str); + + /* Resolve trigger client character set. */ + myf utf8_flag= thd->get_utf8_flag(); + if (resolve_charset(trigger->client_cs_name.str, NULL, &trg_client_cs, + MYF(utf8_flag))) + return TRUE; + + /* Send header. */ + + fields.push_back(new (mem_root) Item_empty_string(thd, "Trigger", NAME_LEN), + mem_root); + fields.push_back(new (mem_root) + Item_empty_string(thd, "sql_mode", (uint)trg_sql_mode_str.length), + mem_root); + + { + /* + NOTE: SQL statement field must be not less than 1024 in order not to + confuse old clients. + */ + + Item_empty_string *stmt_fld= + new (mem_root) Item_empty_string(thd, "SQL Original Statement", + (uint)MY_MAX(trg_sql_original_stmt.length, + 1024)); + + stmt_fld->set_maybe_null(); + + fields.push_back(stmt_fld, mem_root); + } + + fields.push_back(new (mem_root) + Item_empty_string(thd, "character_set_client", + MY_CS_CHARACTER_SET_NAME_SIZE), + mem_root); + + fields.push_back(new (mem_root) + Item_empty_string(thd, "collation_connection", + MY_CS_COLLATION_NAME_SIZE), + mem_root); + + fields.push_back(new (mem_root) + Item_empty_string(thd, "Database Collation", + MY_CS_COLLATION_NAME_SIZE), + mem_root); + + static const Datetime zero_datetime(Datetime::zero()); + Item_datetime_literal *tmp= (new (mem_root) + Item_datetime_literal(thd, &zero_datetime, 2)); + tmp->set_name(thd, Lex_cstring(STRING_WITH_LEN("Created"))); + fields.push_back(tmp, mem_root); + + if (p->send_result_set_metadata(&fields, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + return TRUE; + + /* Send data. */ + + p->prepare_for_resend(); + + p->store(trigger->name.str, + trigger->name.length, + system_charset_info); + + p->store(trg_sql_mode_str.str, + trg_sql_mode_str.length, + system_charset_info); + + p->store(trg_sql_original_stmt.str, + trg_sql_original_stmt.length, + trg_client_cs); + + p->store(&trigger->client_cs_name, system_charset_info); + + p->store(&trigger->connection_cl_name, system_charset_info); + + p->store(&trigger->db_cl_name, system_charset_info); + + if (trigger->hr_create_time.val) + { + MYSQL_TIME timestamp; + thd->variables.time_zone-> + gmt_sec_to_TIME(×tamp, + (my_time_t) + hrtime_to_time(trigger->hr_create_time)); + timestamp.second_part= hrtime_sec_part(trigger->hr_create_time); + p->store_datetime(×tamp, 2); + } + else + p->store_null(); + + + ret_code= p->write(); + + if (!ret_code) + my_eof(thd); + + return ret_code != 0; +} + + +/** + Read TRN and TRG files to obtain base table name for the specified + trigger name and construct TABE_LIST object for the base table. + + @param thd Thread context. + @param trg_name Trigger name. + + @return TABLE_LIST object corresponding to the base table. + + TODO: This function is a copy&paste from add_table_to_list() and + sp_add_to_query_tables(). The problem is that in order to be compatible + with Stored Programs (Prepared Statements), we should not touch thd->lex. + The "source" functions also add created TABLE_LIST object to the + thd->lex->query_tables. + + The plan to eliminate this copy&paste is to: + + - get rid of sp_add_to_query_tables() and use Lex::add_table_to_list(). + Only add_table_to_list() must be used to add tables from the parser + into Lex::query_tables list. + + - do not update Lex::query_tables in add_table_to_list(). +*/ + +static +TABLE_LIST *get_trigger_table(THD *thd, const sp_name *trg_name) +{ + char trn_path_buff[FN_REFLEN]; + LEX_CSTRING trn_path= { trn_path_buff, 0 }; + LEX_CSTRING db; + LEX_CSTRING tbl_name; + TABLE_LIST *table; + + build_trn_path(thd, trg_name, (LEX_STRING*) &trn_path); + + if (check_trn_exists(&trn_path)) + { + my_error(ER_TRG_DOES_NOT_EXIST, MYF(0)); + return NULL; + } + + if (load_table_name_for_trigger(thd, trg_name, &trn_path, &tbl_name)) + return NULL; + + /* We need to reset statement table list to be PS/SP friendly. */ + if (!(table= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST)))) + return NULL; + + db= trg_name->m_db; + + db.str= thd->strmake(db.str, db.length); + if (lower_case_table_names) + db.length= my_casedn_str(files_charset_info, (char*) db.str); + + tbl_name.str= thd->strmake(tbl_name.str, tbl_name.length); + + if (db.str == NULL || tbl_name.str == NULL) + return NULL; + + table->init_one_table(&db, &tbl_name, 0, TL_IGNORE); + + return table; +} + + +/** + SHOW CREATE TRIGGER high-level implementation. + + @param thd Thread context. + @param trg_name Trigger name. + + @return Operation status + @retval TRUE Error. + @retval FALSE Success. +*/ + +bool show_create_trigger(THD *thd, const sp_name *trg_name) +{ + TABLE_LIST *lst= get_trigger_table(thd, trg_name); + uint num_tables; /* NOTE: unused, only to pass to open_tables(). */ + Table_triggers_list *triggers; + Trigger *trigger; + bool error= TRUE; + + if (!lst) + return TRUE; + + if (check_table_access(thd, TRIGGER_ACL, lst, FALSE, 1, TRUE)) + { + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "TRIGGER"); + return TRUE; + } + + /* + Metadata locks taken during SHOW CREATE TRIGGER should be released when + the statement completes as it is an information statement. + */ + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + /* + Open the table by name in order to load Table_triggers_list object. + */ + if (open_tables(thd, &lst, &num_tables, + MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL)) + { + my_error(ER_TRG_CANT_OPEN_TABLE, MYF(0), + (const char *) trg_name->m_db.str, + (const char *) lst->table_name.str); + + goto exit; + + /* Perform closing actions and return error status. */ + } + + triggers= lst->table->triggers; + + if (!triggers) + { + my_error(ER_TRG_DOES_NOT_EXIST, MYF(0)); + goto exit; + } + + trigger= triggers->find_trigger(&trg_name->m_name, 0); + + if (!trigger) + { + my_error(ER_TRG_CORRUPTED_FILE, MYF(0), + (const char *) trg_name->m_db.str, + (const char *) lst->table_name.str); + goto exit; + } + + error= show_create_trigger_impl(thd, trigger); + + /* + NOTE: if show_create_trigger_impl() failed, that means we could not + send data to the client. In this case we simply raise the error + status and client connection will be closed. + */ + +exit: + close_thread_tables(thd); + /* Release any metadata locks taken during SHOW CREATE TRIGGER. */ + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + return error; +} + +class IS_internal_schema_access : public ACL_internal_schema_access +{ +public: + IS_internal_schema_access() = default; + + ~IS_internal_schema_access() = default; + + ACL_internal_access_result check(privilege_t want_access, + privilege_t *save_priv) const; + + const ACL_internal_table_access *lookup(const char *name) const; +}; + +ACL_internal_access_result +IS_internal_schema_access::check(privilege_t want_access, + privilege_t *save_priv) const +{ + want_access &= ~SELECT_ACL; + + /* + We don't allow any simple privileges but SELECT_ACL on + the information_schema database. + */ + if (unlikely((want_access & DB_ACLS) != NO_ACL)) + return ACL_INTERNAL_ACCESS_DENIED; + + /* Always grant SELECT for the information schema. */ + *save_priv|= SELECT_ACL; + + return want_access ? ACL_INTERNAL_ACCESS_CHECK_GRANT : + ACL_INTERNAL_ACCESS_GRANTED; +} + +const ACL_internal_table_access * +IS_internal_schema_access::lookup(const char *name) const +{ + /* There are no per table rules for the information schema. */ + return NULL; +} + +static IS_internal_schema_access is_internal_schema_access; + +void initialize_information_schema_acl() +{ + ACL_internal_schema_registry::register_schema(&INFORMATION_SCHEMA_NAME, + &is_internal_schema_access); +} + + +/** + Dumps a text description of a thread, its security context + (user, host) and the current query. + + @param thd thread context + @param buffer pointer to preferred result buffer + @param length length of buffer + @param max_query_len how many chars of query to copy (0 for all) + + @return Pointer to string +*/ + +extern "C" +char *thd_get_error_context_description(THD *thd, char *buffer, + unsigned int length, + unsigned int max_query_len) +{ + String str(buffer, length, &my_charset_latin1); + const Security_context *sctx= &thd->main_security_ctx; + char header[256]; + size_t len; + + len= my_snprintf(header, sizeof(header), + "MariaDB thread id %u, OS thread handle %lu, query id %llu", + (uint)thd->thread_id, (ulong) thd->real_id, (ulonglong) thd->query_id); + str.length(0); + str.append(header, len); + + if (sctx->host) + { + str.append(' '); + str.append(sctx->host, strlen(sctx->host)); + } + + if (sctx->ip) + { + str.append(' '); + str.append(sctx->ip, strlen(sctx->ip)); + } + + if (sctx->user) + { + str.append(' '); + str.append(sctx->user, strlen(sctx->user)); + } + + /* Don't wait if LOCK_thd_data is used as this could cause a deadlock */ + if (!mysql_mutex_trylock(&thd->LOCK_thd_data)) + { + if (const char *info= thread_state_info(thd)) + { + str.append(' '); + str.append(info, strlen(info)); + } + + if (thd->query()) + { + if (max_query_len < 1) + len= thd->query_length(); + else + len= MY_MIN(thd->query_length(), max_query_len); + str.append('\n'); + str.append(thd->query(), len); + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + + if (str.c_ptr_safe() == buffer) + return buffer; + + /* + We have to copy the new string to the destination buffer because the string + was reallocated to a larger buffer to be able to fit. + */ + DBUG_ASSERT(buffer != NULL); + length= MY_MIN(str.length(), length-1); + memcpy(buffer, str.ptr(), length); + /* Make sure that the new string is null terminated */ + buffer[length]= '\0'; + return buffer; +} diff --git a/sql/sql_show.h b/sql/sql_show.h new file mode 100644 index 00000000..9a269b49 --- /dev/null +++ b/sql/sql_show.h @@ -0,0 +1,268 @@ +/* Copyright (c) 2005, 2010, Oracle and/or its affiliates. + Copyright (c) 2012, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_SHOW_H +#define SQL_SHOW_H + +#include "sql_list.h" /* List */ +#include "handler.h" /* enum_schema_tables */ +#include "table.h" /* enum_schema_table_state */ +#include "my_apc.h" + +/* Forward declarations */ +class JOIN; +class String; +class THD; +class sp_name; +struct TABLE_LIST; +typedef class st_select_lex SELECT_LEX; +struct LEX; +typedef struct st_mysql_show_var SHOW_VAR; +typedef struct st_schema_table ST_SCHEMA_TABLE; +struct TABLE; +typedef struct system_status_var STATUS_VAR; + +/* Used by handlers to store things in schema tables */ +#define IS_FILES_FILE_ID 0 +#define IS_FILES_FILE_NAME 1 +#define IS_FILES_FILE_TYPE 2 +#define IS_FILES_TABLESPACE_NAME 3 +#define IS_FILES_TABLE_CATALOG 4 +#define IS_FILES_TABLE_SCHEMA 5 +#define IS_FILES_TABLE_NAME 6 +#define IS_FILES_LOGFILE_GROUP_NAME 7 +#define IS_FILES_LOGFILE_GROUP_NUMBER 8 +#define IS_FILES_ENGINE 9 +#define IS_FILES_FULLTEXT_KEYS 10 +#define IS_FILES_DELETED_ROWS 11 +#define IS_FILES_UPDATE_COUNT 12 +#define IS_FILES_FREE_EXTENTS 13 +#define IS_FILES_TOTAL_EXTENTS 14 +#define IS_FILES_EXTENT_SIZE 15 +#define IS_FILES_INITIAL_SIZE 16 +#define IS_FILES_MAXIMUM_SIZE 17 +#define IS_FILES_AUTOEXTEND_SIZE 18 +#define IS_FILES_CREATION_TIME 19 +#define IS_FILES_LAST_UPDATE_TIME 20 +#define IS_FILES_LAST_ACCESS_TIME 21 +#define IS_FILES_RECOVER_TIME 22 +#define IS_FILES_TRANSACTION_COUNTER 23 +#define IS_FILES_VERSION 24 +#define IS_FILES_ROW_FORMAT 25 +#define IS_FILES_TABLE_ROWS 26 +#define IS_FILES_AVG_ROW_LENGTH 27 +#define IS_FILES_DATA_LENGTH 28 +#define IS_FILES_MAX_DATA_LENGTH 29 +#define IS_FILES_INDEX_LENGTH 30 +#define IS_FILES_DATA_FREE 31 +#define IS_FILES_CREATE_TIME 32 +#define IS_FILES_UPDATE_TIME 33 +#define IS_FILES_CHECK_TIME 34 +#define IS_FILES_CHECKSUM 35 +#define IS_FILES_STATUS 36 +#define IS_FILES_EXTRA 37 + +typedef enum { WITHOUT_DB_NAME, WITH_DB_NAME } enum_with_db_name; + +int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond); + +int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, + Table_specification_st *create_info_arg, + enum_with_db_name with_db_name); + +int show_create_table_ex(THD *thd, TABLE_LIST *table_list, + const char * forced_db, const char *forced_name, + String *packet, + Table_specification_st *create_info_arg, + enum_with_db_name with_db_name); + +int copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table); + +bool append_identifier(THD *thd, String *packet, const char *name, size_t length); +static inline bool append_identifier(THD *thd, String *packet, const LEX_CSTRING *name) +{ + return append_identifier(thd, packet, name->str, name->length); +} +void mysqld_list_fields(THD *thd,TABLE_LIST *table, const char *wild); +int mysqld_dump_create_info(THD *thd, TABLE_LIST *table_list, int fd); +bool mysqld_show_create_get_fields(THD *thd, TABLE_LIST *table_list, + List *field_list, String *buffer); +bool mysqld_show_create(THD *thd, TABLE_LIST *table_list); +void mysqld_show_create_db_get_fields(THD *thd, List *field_list); +bool mysqld_show_create_db(THD *thd, LEX_CSTRING *db_name, + LEX_CSTRING *orig_db_name, + const DDL_options_st &options); + +void mysqld_list_processes(THD *thd,const char *user,bool verbose); +int mysqld_show_status(THD *thd); +int mysqld_show_variables(THD *thd,const char *wild); +bool mysqld_show_storage_engines(THD *thd); +bool mysqld_show_authors(THD *thd); +bool mysqld_show_contributors(THD *thd); +bool mysqld_show_privileges(THD *thd); +char *make_backup_log_name(char *buff, const char *name, const char* log_ext); +uint calc_sum_of_all_status(STATUS_VAR *to); +bool append_definer(THD *thd, String *buffer, const LEX_CSTRING *definer_user, + const LEX_CSTRING *definer_host); +int add_status_vars(SHOW_VAR *list); +void remove_status_vars(SHOW_VAR *list); +ulonglong get_status_vars_version(void); +void init_status_vars(); +void free_status_vars(); +void reset_status_vars(); +bool show_create_trigger(THD *thd, const sp_name *trg_name); +void view_store_options(THD *thd, TABLE_LIST *table, String *buff); + +void init_fill_schema_files_row(TABLE* table); +void initialize_information_schema_acl(); + +ST_SCHEMA_TABLE *find_schema_table(THD *thd, const LEX_CSTRING *table_name, + bool *in_plugin); +static inline ST_SCHEMA_TABLE *find_schema_table(THD *thd, const LEX_CSTRING *table_name) +{ bool unused; return find_schema_table(thd, table_name, &unused); } + +ST_SCHEMA_TABLE *get_schema_table(enum enum_schema_tables schema_table_idx); +int make_schema_select(THD *thd, SELECT_LEX *sel, + ST_SCHEMA_TABLE *schema_table); +int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list); +bool get_schema_tables_result(JOIN *join, + enum enum_schema_table_state executed_place); +enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table); +TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list); + +const char* get_one_variable(THD *thd, const SHOW_VAR *variable, + enum_var_type value_type, SHOW_TYPE show_type, + system_status_var *status_var, + const CHARSET_INFO **charset, char *buff, + size_t *length); + +/* These functions were under INNODB_COMPATIBILITY_HOOKS */ +int get_quote_char_for_identifier(THD *thd, const char *name, size_t length); +THD *find_thread_by_id(longlong id, bool query_id= false); + +class select_result_explain_buffer; +/* + SHOW EXPLAIN/SHOW ANALYZE request object. +*/ + +class Show_explain_request : public Apc_target::Apc_call +{ +public: + THD *target_thd; /* thd that we're running SHOW EXPLAIN/ANALYZE for */ + THD *request_thd; /* thd that run SHOW EXPLAIN/ANALYZE command */ + + /* + Set to TRUE if you need the result in JSON format, + FALSE - in traditional tabular + */ + bool is_json_format= false; + + /* FALSE for SHOW EXPLAIN, TRUE - for SHOW ANALYZE*/ + bool is_analyze; + + /* If true, there was some error when producing EXPLAIN output. */ + bool failed_to_produce; + + /* SHOW EXPLAIN/ANALYZE will be stored here */ + select_result_explain_buffer *explain_buf; + + /* Query that we've got SHOW EXPLAIN/ANALYZE for */ + String query_str; + + void call_in_target_thread() override; +}; + + +/** + Condition pushdown used for INFORMATION_SCHEMA / SHOW queries. + This structure is to implement an optimization when + accessing data dictionary data in the INFORMATION_SCHEMA + or SHOW commands. + When the query contain a TABLE_SCHEMA or TABLE_NAME clause, + narrow the search for data based on the constraints given. +*/ +typedef struct st_lookup_field_values +{ + /** + Value of a TABLE_SCHEMA clause. + Note that this value length may exceed @c NAME_LEN. + @sa wild_db_value + */ + LEX_CSTRING db_value; + /** + Value of a TABLE_NAME clause. + Note that this value length may exceed @c NAME_LEN. + @sa wild_table_value + */ + LEX_CSTRING table_value; + /** + True when @c db_value is a LIKE clause, + false when @c db_value is an '=' clause. + */ + bool wild_db_value; + /** + True when @c table_value is a LIKE clause, + false when @c table_value is an '=' clause. + */ + bool wild_table_value; +} LOOKUP_FIELD_VALUES; + +/* + INFORMATION_SCHEMA: Execution plan for get_all_tables() call +*/ + +class IS_table_read_plan : public Sql_alloc +{ +public: + IS_table_read_plan() : no_rows(false), trivial_show_command(FALSE) {} + + bool no_rows; + /* + For EXPLAIN only: For SHOW KEYS and SHOW COLUMNS, we know which + db_name.table_name will be read, however for some reason we don't + set the fields in this->lookup_field_vals. + In order to not have JOIN::save_explain_data() walking over uninitialized + data, we set trivial_show_command=true. + */ + bool trivial_show_command; + + LOOKUP_FIELD_VALUES lookup_field_vals; + Item *partial_cond; + + bool has_db_lookup_value() + { + return (lookup_field_vals.db_value.length && + !lookup_field_vals.wild_db_value); + } + bool has_table_lookup_value() + { + return (lookup_field_vals.table_value.length && + !lookup_field_vals.wild_table_value); + } +}; + +bool optimize_schema_tables_reads(JOIN *join); +bool optimize_schema_tables_memory_usage(List &tables); + +/* Handle the ignored database directories list for SHOW/I_S. */ +bool ignore_db_dirs_init(); +void ignore_db_dirs_free(); +void ignore_db_dirs_reset(); +bool ignore_db_dirs_process_additions(); +bool push_ignored_db_dir(const char *path); +extern char *opt_ignore_db_dirs; + +#endif /* SQL_SHOW_H */ diff --git a/sql/sql_signal.cc b/sql/sql_signal.cc new file mode 100644 index 00000000..4e86cc4d --- /dev/null +++ b/sql/sql_signal.cc @@ -0,0 +1,480 @@ +/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sp_head.h" +#include "sp_pcontext.h" +#include "sp_rcontext.h" +#include "sql_signal.h" + +/* + The parser accepts any error code (desired) + The runtime internally supports any error code (desired) + The client server protocol is limited to 16 bits error codes (restriction), + and the value of 65535 is reserved for progress reporting. + Enforcing the 65534 limit in the runtime until the protocol can change. +*/ +#define MAX_MYSQL_ERRNO 65534 + +const LEX_CSTRING Diag_condition_item_names[]= +{ + { STRING_WITH_LEN("CLASS_ORIGIN") }, + { STRING_WITH_LEN("SUBCLASS_ORIGIN") }, + { STRING_WITH_LEN("CONSTRAINT_CATALOG") }, + { STRING_WITH_LEN("CONSTRAINT_SCHEMA") }, + { STRING_WITH_LEN("CONSTRAINT_NAME") }, + { STRING_WITH_LEN("CATALOG_NAME") }, + { STRING_WITH_LEN("SCHEMA_NAME") }, + { STRING_WITH_LEN("TABLE_NAME") }, + { STRING_WITH_LEN("COLUMN_NAME") }, + { STRING_WITH_LEN("CURSOR_NAME") }, + { STRING_WITH_LEN("MESSAGE_TEXT") }, + { STRING_WITH_LEN("MYSQL_ERRNO") }, + { STRING_WITH_LEN("ROW_NUMBER") }, + + { STRING_WITH_LEN("CONDITION_IDENTIFIER") }, + { STRING_WITH_LEN("CONDITION_NUMBER") }, + { STRING_WITH_LEN("CONNECTION_NAME") }, + { STRING_WITH_LEN("MESSAGE_LENGTH") }, + { STRING_WITH_LEN("MESSAGE_OCTET_LENGTH") }, + { STRING_WITH_LEN("PARAMETER_MODE") }, + { STRING_WITH_LEN("PARAMETER_NAME") }, + { STRING_WITH_LEN("PARAMETER_ORDINAL_POSITION") }, + { STRING_WITH_LEN("RETURNED_SQLSTATE") }, + { STRING_WITH_LEN("ROUTINE_CATALOG") }, + { STRING_WITH_LEN("ROUTINE_NAME") }, + { STRING_WITH_LEN("ROUTINE_SCHEMA") }, + { STRING_WITH_LEN("SERVER_NAME") }, + { STRING_WITH_LEN("SPECIFIC_NAME") }, + { STRING_WITH_LEN("TRIGGER_CATALOG") }, + { STRING_WITH_LEN("TRIGGER_NAME") }, + { STRING_WITH_LEN("TRIGGER_SCHEMA") } +}; + + +Set_signal_information::Set_signal_information( + const Set_signal_information& set) +{ + memcpy(m_item, set.m_item, sizeof(m_item)); +} + +void Set_signal_information::clear() +{ + memset(m_item, 0, sizeof(m_item)); +} + + +static bool assign_fixed_string(MEM_ROOT *mem_root, + CHARSET_INFO *dst_cs, + size_t max_char, + String *dst, + const String* src) +{ + bool truncated; + size_t numchars; + CHARSET_INFO *src_cs; + const char* src_str; + const char* src_end; + size_t src_len; + size_t to_copy; + char* dst_str; + size_t dst_len; + size_t dst_copied; + uint32 dummy_offset; + + src_str= src->ptr(); + if (src_str == NULL) + { + dst->set((const char*) NULL, 0, dst_cs); + return false; + } + + src_cs= src->charset(); + src_len= src->length(); + src_end= src_str + src_len; + numchars= src_cs->numchars(src_str, src_end); + + if (numchars <= max_char) + { + to_copy= src->length(); + truncated= false; + } + else + { + numchars= max_char; + to_copy= dst_cs->charpos(src_str, src_end, numchars); + truncated= true; + } + + if (String::needs_conversion(to_copy, src_cs, dst_cs, & dummy_offset)) + { + dst_len= numchars * dst_cs->mbmaxlen; + dst_str= (char*) alloc_root(mem_root, dst_len + 1); + if (dst_str) + { + dst_copied= String_copier().well_formed_copy(dst_cs, dst_str, dst_len, + src_cs, src_str, src_len, + numchars); + DBUG_ASSERT(dst_copied <= dst_len); + dst_len= dst_copied; /* In case the copy truncated the data */ + dst_str[dst_copied]= '\0'; + } + } + else + { + dst_len= to_copy; + dst_str= (char*) alloc_root(mem_root, dst_len + 1); + if (dst_str) + { + memcpy(dst_str, src_str, to_copy); + dst_str[to_copy]= '\0'; + } + } + dst->set(dst_str, dst_len, dst_cs); + + return truncated; +} + +static int assign_condition_item(MEM_ROOT *mem_root, const char* name, THD *thd, + Item *set, String *ci) +{ + char str_buff[(64+1)*4]; /* Room for a null terminated UTF8 String 64 */ + String str_value(str_buff, sizeof(str_buff), & my_charset_utf8mb3_bin); + String *str; + bool truncated; + + DBUG_ENTER("assign_condition_item"); + + if (set->is_null()) + { + thd->raise_error_printf(ER_WRONG_VALUE_FOR_VAR, name, "NULL"); + DBUG_RETURN(1); + } + + str= set->val_str(& str_value); + truncated= assign_fixed_string(mem_root, & my_charset_utf8mb3_bin, 64, ci, str); + if (truncated) + { + if (thd->is_strict_mode()) + { + thd->raise_error_printf(ER_COND_ITEM_TOO_LONG, name); + DBUG_RETURN(1); + } + + thd->raise_warning_printf(WARN_COND_ITEM_TRUNCATED, name); + } + + DBUG_RETURN(0); +} + + +int Sql_cmd_common_signal::eval_signal_informations(THD *thd, Sql_condition *cond) +{ + struct cond_item_map + { + enum enum_diag_condition_item_name m_item; + String Sql_condition::*m_member; + }; + + static cond_item_map map[]= + { + { DIAG_CLASS_ORIGIN, & Sql_condition::m_class_origin }, + { DIAG_SUBCLASS_ORIGIN, & Sql_condition::m_subclass_origin }, + { DIAG_CONSTRAINT_CATALOG, & Sql_condition::m_constraint_catalog }, + { DIAG_CONSTRAINT_SCHEMA, & Sql_condition::m_constraint_schema }, + { DIAG_CONSTRAINT_NAME, & Sql_condition::m_constraint_name }, + { DIAG_CATALOG_NAME, & Sql_condition::m_catalog_name }, + { DIAG_SCHEMA_NAME, & Sql_condition::m_schema_name }, + { DIAG_TABLE_NAME, & Sql_condition::m_table_name }, + { DIAG_COLUMN_NAME, & Sql_condition::m_column_name }, + { DIAG_CURSOR_NAME, & Sql_condition::m_cursor_name } + }; + + Item *set; + String str_value; + String *str; + int i; + uint j; + int result= 1; + enum enum_diag_condition_item_name item_enum; + String *member; + const LEX_CSTRING *name; + + DBUG_ENTER("Sql_cmd_common_signal::eval_signal_informations"); + + for (i= FIRST_DIAG_SET_PROPERTY; + i <= LAST_DIAG_SET_PROPERTY; + i++) + { + if ((set= m_set_signal_information.m_item[i]) && + set->fix_fields_if_needed(thd, &m_set_signal_information.m_item[i])) + goto end; + } + + /* + Generically assign all the UTF8 String 64 condition items + described in the map. + */ + for (j= 0; j < array_elements(map); j++) + { + item_enum= map[j].m_item; + set= m_set_signal_information.m_item[item_enum]; + if (set != NULL) + { + member= & (cond->* map[j].m_member); + name= & Diag_condition_item_names[item_enum]; + if (assign_condition_item(cond->m_mem_root, name->str, thd, set, member)) + goto end; + } + } + + /* + Assign the remaining attributes. + */ + + set= m_set_signal_information.m_item[DIAG_MESSAGE_TEXT]; + if (set != NULL) + { + if (set->is_null()) + { + thd->raise_error_printf(ER_WRONG_VALUE_FOR_VAR, + "MESSAGE_TEXT", "NULL"); + goto end; + } + /* + Enforce that SET MESSAGE_TEXT = evaluates the value + as VARCHAR(MYSQL_ERRMSG_SIZE) CHARACTER SET UTF8. + */ + bool truncated; + String utf8_text; + str= set->val_str(& str_value); + truncated= assign_fixed_string(thd->mem_root, & my_charset_utf8mb3_bin, + MYSQL_ERRMSG_SIZE, + & utf8_text, str); + if (truncated) + { + if (thd->is_strict_mode()) + { + thd->raise_error_printf(ER_COND_ITEM_TOO_LONG, + "MESSAGE_TEXT"); + goto end; + } + + thd->raise_warning_printf(WARN_COND_ITEM_TRUNCATED, + "MESSAGE_TEXT"); + } + + /* + See the comments + "Design notes about Sql_condition::m_message_text." + in file sql_error.cc + */ + String converted_text; + converted_text.set_charset(error_message_charset_info); + converted_text.append(utf8_text.ptr(), utf8_text.length(), + utf8_text.charset()); + cond->set_builtin_message_text(converted_text.c_ptr_safe()); + } + + set= m_set_signal_information.m_item[DIAG_MYSQL_ERRNO]; + if (set != NULL) + { + if (set->is_null()) + { + thd->raise_error_printf(ER_WRONG_VALUE_FOR_VAR, + "MYSQL_ERRNO", "NULL"); + goto end; + } + longlong code= set->val_int(); + if ((code <= 0) || (code > MAX_MYSQL_ERRNO)) + { + str= set->val_str(& str_value); + thd->raise_error_printf(ER_WRONG_VALUE_FOR_VAR, + "MYSQL_ERRNO", str->c_ptr_safe()); + goto end; + } + cond->m_sql_errno= (int) code; + } + + set= m_set_signal_information.m_item[DIAG_ROW_NUMBER]; + if (set != NULL) + { + if (set->is_null()) + { + thd->raise_error_printf(ER_WRONG_VALUE_FOR_VAR, + "ROW_NUMBER", "NULL"); + goto end; + } + longlong row_number_value= set->val_int(); + if (row_number_value < 0) + { + str= set->val_str(& str_value); + thd->raise_error_printf(ER_WRONG_VALUE_FOR_VAR, + "ROW_NUMBER", str->c_ptr_safe()); + goto end; + } + cond->m_row_number= (ulong) row_number_value; + } + + /* + The various item->val_xxx() methods don't return an error code, + but flag thd in case of failure. + */ + if (likely(!thd->is_error())) + result= 0; + +end: + for (i= FIRST_DIAG_SET_PROPERTY; + i <= LAST_DIAG_SET_PROPERTY; + i++) + { + set= m_set_signal_information.m_item[i]; + if (set) + { + if (set->fixed()) + set->cleanup(); + } + } + + DBUG_RETURN(result); +} + +bool Sql_cmd_common_signal::raise_condition(THD *thd, Sql_condition *cond) +{ + bool result= TRUE; + + DBUG_ENTER("Sql_cmd_common_signal::raise_condition"); + + DBUG_ASSERT(thd->lex->query_tables == NULL); + + cond->assign_defaults(thd, m_cond); + if (eval_signal_informations(thd, cond)) + DBUG_RETURN(result); + + /* SIGNAL should not signal WARN_LEVEL_NOTE, but RESIGNAL can */ + DBUG_ASSERT(cond->m_level == Sql_condition::WARN_LEVEL_ERROR || + cond->m_level != Sql_condition::WARN_LEVEL_NOTE || + sql_command_code() == SQLCOM_RESIGNAL); + + (void) thd->raise_condition(cond); + + if (cond->m_level == Sql_condition::WARN_LEVEL_WARN || + cond->m_level == Sql_condition::WARN_LEVEL_NOTE) + { + my_ok(thd); + result= FALSE; + } + + DBUG_RETURN(result); +} + +bool Sql_cmd_signal::execute(THD *thd) +{ + bool result= TRUE; + DBUG_ASSERT(m_cond); + Sql_condition cond(thd->mem_root, m_cond->get_user_condition_identity()); + + DBUG_ENTER("Sql_cmd_signal::execute"); + + /* + WL#2110 SIGNAL specification says: + + When SIGNAL is executed, it has five effects, in the following order: + + (1) First, the diagnostics area is completely cleared. So if the + SIGNAL is in a DECLARE HANDLER then any pending errors or warnings + are gone. So is 'row count'. + + This has roots in the SQL standard specification for SIGNAL. + */ + + thd->get_stmt_da()->reset_diagnostics_area(); + thd->set_row_count_func(0); + thd->get_stmt_da()->clear_warning_info(thd->query_id); + + result= raise_condition(thd, &cond); + + DBUG_RETURN(result); +} + + +/** + Execute RESIGNAL SQL-statement. + + @param thd Thread context. + + @return Error status + @retval true in case of error + @retval false on success +*/ + +bool Sql_cmd_resignal::execute(THD *thd) +{ + Diagnostics_area *da= thd->get_stmt_da(); + const sp_rcontext::Sql_condition_info *signaled; + int result= TRUE; + + DBUG_ENTER("Resignal_statement::execute"); + + // This is a way to force sql_conditions from the current Warning_info to be + // passed to the caller's Warning_info. + da->set_warning_info_id(thd->query_id); + + if (! thd->spcont || ! (signaled= thd->spcont->raised_condition())) + { + thd->raise_error(ER_RESIGNAL_WITHOUT_ACTIVE_HANDLER); + DBUG_RETURN(result); + } + + Sql_condition signaled_err(thd->mem_root, *signaled, signaled->message, + signaled->m_row_number); + + if (m_cond) + { + query_cache_abort(thd, &thd->query_cache_tls); + + /* Keep handled conditions. */ + da->unmark_sql_conditions_from_removal(); + + /* Check if the old condition still exists. */ + if (da->has_sql_condition(signaled->message, strlen(signaled->message))) + { + /* + Make room for the new RESIGNAL condition and one for the stack trace + note. + */ + da->reserve_space(thd, 2); + } + else + { + /* + Make room for old condition + the new RESIGNAL condition + the stack + trace note. + */ + da->reserve_space(thd, 3); + + da->push_warning(thd, &signaled_err); + } + } + + /* RESIGNAL with signal_value */ + result= raise_condition(thd, &signaled_err); + + DBUG_RETURN(result); + +} + diff --git a/sql/sql_signal.h b/sql/sql_signal.h new file mode 100644 index 00000000..abc9905a --- /dev/null +++ b/sql/sql_signal.h @@ -0,0 +1,123 @@ +/* Copyright (c) 2008 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_SIGNAL_H +#define SQL_SIGNAL_H + +/** + Sql_cmd_common_signal represents the common properties of the + SIGNAL and RESIGNAL statements. +*/ +class Sql_cmd_common_signal : public Sql_cmd +{ +protected: + /** + Constructor. + @param cond the condition signaled if any, or NULL. + @param set collection of signal condition item assignments. + */ + Sql_cmd_common_signal(const sp_condition_value *cond, + const Set_signal_information& set) + : Sql_cmd(), + m_cond(cond), + m_set_signal_information(set) + {} + + virtual ~Sql_cmd_common_signal() = default; + + /** + Evaluate each signal condition items for this statement. + @param thd the current thread. + @param cond the condition to update. + @return 0 on success. + */ + int eval_signal_informations(THD *thd, Sql_condition *cond); + + /** + Raise a SQL condition. + @param thd the current thread. + @param cond the condition to raise. + @return false on success. + */ + bool raise_condition(THD *thd, Sql_condition *cond); + + /** + The condition to signal or resignal. + This member is optional and can be NULL (RESIGNAL). + */ + const sp_condition_value *m_cond; + + /** + Collection of 'SET item = value' assignments in the + SIGNAL/RESIGNAL statement. + */ + Set_signal_information m_set_signal_information; +}; + +/** + Sql_cmd_signal represents a SIGNAL statement. +*/ +class Sql_cmd_signal : public Sql_cmd_common_signal +{ +public: + /** + Constructor, used to represent a SIGNAL statement. + @param cond the SQL condition to signal (required). + @param set the collection of signal information to signal. + */ + Sql_cmd_signal(const sp_condition_value *cond, + const Set_signal_information& set) + : Sql_cmd_common_signal(cond, set) + {} + + virtual ~Sql_cmd_signal() = default; + + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_SIGNAL; + } + + virtual bool execute(THD *thd); +}; + +/** + Sql_cmd_resignal represents a RESIGNAL statement. +*/ +class Sql_cmd_resignal : public Sql_cmd_common_signal +{ +public: + /** + Constructor, used to represent a RESIGNAL statement. + @param cond the SQL condition to resignal (optional, may be NULL). + @param set the collection of signal information to resignal. + */ + Sql_cmd_resignal(const sp_condition_value *cond, + const Set_signal_information& set) + : Sql_cmd_common_signal(cond, set) + {} + + virtual ~Sql_cmd_resignal() = default; + + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_RESIGNAL; + } + + virtual bool execute(THD *thd); +}; + +#endif + diff --git a/sql/sql_sort.h b/sql/sql_sort.h new file mode 100644 index 00000000..82e1f492 --- /dev/null +++ b/sql/sql_sort.h @@ -0,0 +1,717 @@ +#ifndef SQL_SORT_INCLUDED +#define SQL_SORT_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "my_base.h" /* ha_rows */ +#include /* qsort2_cmp */ +#include "queues.h" +#include "sql_string.h" +#include "sql_class.h" + +class Field; +struct TABLE; + +/* Defines used by filesort and uniques */ + +#define MERGEBUFF 7 +#define MERGEBUFF2 15 + +/* + The structure SORT_ADDON_FIELD describes a fixed layout + for field values appended to sorted values in records to be sorted + in the sort buffer. + Only fixed layout is supported now. + Null bit maps for the appended values is placed before the values + themselves. Offsets are from the last sorted field, that is from the + record referefence, which is still last component of sorted records. + It is preserved for backward compatiblility. + The structure is used tp store values of the additional fields + in the sort buffer. It is used also when these values are read + from a temporary file/buffer. As the reading procedures are beyond the + scope of the 'filesort' code the values have to be retrieved via + the callback function 'unpack_addon_fields'. +*/ + +typedef struct st_sort_addon_field +{ + /* Sort addon packed field */ + Field *field; /* Original field */ + uint offset; /* Offset from the last sorted field */ + uint null_offset; /* Offset to to null bit from the last sorted field */ + uint length; /* Length in the sort buffer */ + uint8 null_bit; /* Null bit mask for the field */ +} SORT_ADDON_FIELD; + +struct BUFFPEK_COMPARE_CONTEXT +{ + qsort_cmp2 key_compare; + void *key_compare_arg; +}; + + +/** + Descriptor for a merge chunk to be sort-merged. + A merge chunk is a sequence of pre-sorted records, written to a + temporary file. A Merge_chunk instance describes where this chunk is stored + in the file, and where it is located when it is in memory. + + It is a POD because + - we read/write them from/to files. + + We have accessors (getters/setters) for all struct members. + */ + +struct Merge_chunk { +public: + my_off_t file_position() const { return m_file_position; } + void set_file_position(my_off_t val) { m_file_position= val; } + void advance_file_position(my_off_t val) { m_file_position+= val; } + + uchar *buffer_start() { return m_buffer_start; } + const uchar *buffer_end() const { return m_buffer_end; } + + void set_buffer(uchar *start, uchar *end) + { + m_buffer_start= start; + m_buffer_end= end; + } + void set_buffer_start(uchar *start) + { + m_buffer_start= start; + } + void set_buffer_end(uchar *end) + { + DBUG_ASSERT(m_buffer_end == NULL || end <= m_buffer_end); + m_buffer_end= end; + } + + void init_current_key() { m_current_key= m_buffer_start; } + uchar *current_key() { return m_current_key; } + void advance_current_key(uint val) { m_current_key+= val; } + + void decrement_rowcount(ha_rows val) { m_rowcount-= val; } + void set_rowcount(ha_rows val) { m_rowcount= val; } + ha_rows rowcount() const { return m_rowcount; } + + ha_rows mem_count() const { return m_mem_count; } + void set_mem_count(ha_rows val) { m_mem_count= val; } + ha_rows decrement_mem_count() { return --m_mem_count; } + + ha_rows max_keys() const { return m_max_keys; } + void set_max_keys(ha_rows val) { m_max_keys= val; } + + size_t buffer_size() const { return m_buffer_end - m_buffer_start; } + + /** + Tries to merge *this with *mc, returns true if successful. + The assumption is that *this is no longer in use, + and the space it has been allocated can be handed over to a + buffer which is adjacent to it. + */ + bool merge_freed_buff(Merge_chunk *mc) const + { + if (mc->m_buffer_end == m_buffer_start) + { + mc->m_buffer_end= m_buffer_end; + mc->m_max_keys+= m_max_keys; + return true; + } + else if (mc->m_buffer_start == m_buffer_end) + { + mc->m_buffer_start= m_buffer_start; + mc->m_max_keys+= m_max_keys; + return true; + } + return false; + } + + /// The current key for this chunk + uchar *m_current_key= nullptr; + /// Current position in the file to be sorted. + my_off_t m_file_position= 0; + /// Start of main-memory buffer for this chunk. + uchar *m_buffer_start= nullptr; + /// End of main-memory buffer for this chunk. + uchar *m_buffer_end= nullptr; + /// Number of unread rows in this chunk. + ha_rows m_rowcount= 0; + /// Number of rows in the main-memory buffer. + ha_rows m_mem_count= 0; + /// If we have fixed-size rows: max number of rows in buffer. + ha_rows m_max_keys= 0; +}; + +typedef Bounds_checked_array Addon_fields_array; +typedef Bounds_checked_array Sort_keys_array; + +/** + This class wraps information about usage of addon fields. + An Addon_fields object is used both during packing of data in the filesort + buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'. + + @see documentation for the Sort_addon_field struct. + @see documentation for get_addon_fields() + */ +class Addon_fields { +public: + Addon_fields(Addon_fields_array arr) + : m_field_descriptors(arr), + m_addon_buf(), + m_addon_buf_length(), + m_using_packed_addons(false) + { + DBUG_ASSERT(!arr.is_null()); + } + + SORT_ADDON_FIELD *begin() { return m_field_descriptors.begin(); } + SORT_ADDON_FIELD *end() { return m_field_descriptors.end(); } + + /// rr_unpack_from_tempfile needs an extra buffer when unpacking. + uchar *allocate_addon_buf(uint sz) + { + m_addon_buf= (uchar *)my_malloc(PSI_INSTRUMENT_ME, sz, MYF(MY_WME | MY_THREAD_SPECIFIC)); + if (m_addon_buf) + m_addon_buf_length= sz; + return m_addon_buf; + } + + void free_addon_buff() + { + my_free(m_addon_buf); + m_addon_buf= NULL; + m_addon_buf_length= 0; + } + + uchar *get_addon_buf() { return m_addon_buf; } + uint get_addon_buf_length() const { return m_addon_buf_length; } + + void set_using_packed_addons(bool val) + { + m_using_packed_addons= val; + } + + bool using_packed_addons() const + { + return m_using_packed_addons; + } + + static bool can_pack_addon_fields(uint record_length) + { + return (record_length <= (0xFFFF)); + } + + /** + @returns Total number of bytes used for packed addon fields. + the size of the length field + size of null bits + sum of field sizes. + */ + static uint read_addon_length(uchar *p) + { + return size_of_length_field + uint2korr(p); + } + + /** + Stores the number of bytes used for packed addon fields. + */ + static void store_addon_length(uchar *p, uint sz) + { + // We actually store the length of everything *after* the length field. + int2store(p, sz - size_of_length_field); + } + + static const uint size_of_length_field= 2; + +private: + Addon_fields_array m_field_descriptors; + + uchar *m_addon_buf; ///< Buffer for unpacking addon fields. + uint m_addon_buf_length; ///< Length of the buffer. + bool m_using_packed_addons; ///< Are we packing the addon fields? +}; + +/** + This class wraps information about usage of sort keys. + A Sort_keys object is used both during packing of data in the filesort + buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'. + + @see SORT_FIELD struct. +*/ + +class Sort_keys :public Sql_alloc, + public Sort_keys_array +{ +public: + Sort_keys(SORT_FIELD* arr, size_t count): + Sort_keys_array(arr, count), + m_using_packed_sortkeys(false), + size_of_packable_fields(0), + sort_length_with_original_values(0), + sort_length_with_memcmp_values(0), + parameters_computed(false) + { + DBUG_ASSERT(!is_null()); + } + + bool using_packed_sortkeys() const + { return m_using_packed_sortkeys; } + + void set_using_packed_sortkeys(bool val) + { + m_using_packed_sortkeys= val; + } + void set_size_of_packable_fields(uint len) + { + size_of_packable_fields= len; + } + + uint get_size_of_packable_fields() + { + return size_of_packable_fields; + } + + void set_sort_length_with_original_values(uint len) + { + sort_length_with_original_values= len; + } + + uint get_sort_length_with_original_values() + { + return sort_length_with_original_values; + } + + void set_sort_length_with_memcmp_values(uint len) + { + sort_length_with_memcmp_values= len; + } + + uint get_sort_length_with_memcmp_values() + { + return sort_length_with_memcmp_values; + } + + static void store_sortkey_length(uchar *p, uint sz) + { + int4store(p, sz - size_of_length_field); + } + + static uint read_sortkey_length(uchar *p) + { + return size_of_length_field + uint4korr(p); + } + + void increment_size_of_packable_fields(uint len) + { + size_of_packable_fields+= len; + } + + void increment_original_sort_length(uint len) + { + sort_length_with_original_values+= len; + } + + bool is_parameters_computed() { return parameters_computed; } + void set_parameters_computed(bool val) { parameters_computed= val; } + + static const uint size_of_length_field= 4; + +private: + bool m_using_packed_sortkeys; // Are we packing sort keys + uint size_of_packable_fields; // Total length bytes for packable columns + + /* + The sort length for all the keyparts storing the original values + */ + uint sort_length_with_original_values; + + /* + The sort length for all the keyparts storing the mem-comparable images + */ + uint sort_length_with_memcmp_values; + + /* + TRUE parameters(like sort_length_* , size_of_packable_field) + are computed + FALSE otherwise. + */ + bool parameters_computed; +}; + + +/** +PACKED SORT KEYS + +Description + +In this optimization where we would like the pack the values of the sort key +inside the sort buffer for each record. + +Contents: +1. Background +1.1 Implementation details +2. Solution : Packed Sort Keys +2.1 Packed key format +2.2 Which format to use +3. Special cases +3.1 Handling very long strings +3.2 Handling for long binary strings +3.3 Handling very long strings with Packed sort keys +4. Sort key columns in addon_fields + +1. Background +Before this optimization of using packed sort keys, filesort() sorted the +data using mem-comparable keys. + +That is, if we wanted to sort by + + ORDER BY col1, col2, ... colN +then the filesort code would for each row generate one "Sort Key" +and then sort the rows by their Sort Keys. + +The Sort Keys are mem-comparable (that is, are compared by memcmp()) and +they are of FIXED SIZE. The sort key has the same length regardless of +what value it represents. This causes INEFFICIENT MEMORY USAGE. + +1.1 Implementation details + +make_sortkey() is the function that produces a sort key +from a record. + +The function treats Field and Item objects differently. + +class Field has: + +a) void make_sort_key(uchar *buff, uint length); + make_sort_key is a non-virtual function which handles encoding of + SQL null values. + +b) virtual void sort_string(uchar *buff,uint length)=0; + sort_string produces mem-comparable image of the field value + for each datatype. + +For Items, Type_handler has a virtual function: + + virtual void make_sort_key(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const= 0; + which various datatypes overload. + + +2. SOLUTION: PACKED SORT KEYS + +Note that one can have mem-comparable keys are that are not fixed-size. +MyRocks uses such encoding for example. + +However for this optimization it was decided to store the original +(non-mem-comparable) values instead and use a datatype-aware +key comparison function. + +2.1 Packed key format +The keys are stored in a new variable-size data format called "packed". + +The format is as follows: + + ....... + + format for a n-part sort key + + is the length of the whole key. +Each packed value is encoded as follows: + + // This is a an SQL NULL + [] // this a non-NULL value +null_byte is present if the field/item is NULLable. +SQL NULL is encoded as just one NULL-indicator byte. The value itself is omitted. + +The format of the packed_value depends on the datatype. +For "non-packable" datatypes it is just their mem-comparable form, as before. + +The "packable" datatypes are currently variable-length strings and the +packed format for them is (for binary blobs, see a note below): + + +2.2 Which format to use + +The advantage of Packed Key Format is potential space savings for +variable-length fields. + +The disadvantages are: + +a) It may actually take more space, because of sort_key_length and + length fields. +b) The comparison function is more expensive. + +Currently the logic is: use Packed Key Format if we would save 128 or more +bytes when constructing a sort key from values that have empty string +for each packable component. + +3. SPECIAL CASES +3.1 HANDLING VERY LONG STRINGS +the size of sort key part was limited by @@max_sort_length variable. +It is defined as: + +The number of bytes to use when sorting data values. The server uses only the +first max_sort_length bytes of each value and ignores the rest. + +3.2 HANDLING VERY LONG BINARY STRINGS +Long binary strings receive special treatment. A sort key for the long +binary string is truncated at max_sort_length bytes like described above, +but then a "suffix" is appended which contains the total length of the +value before the truncation. + +3.3 HANDLING VERY LONG STRINGS WITH PACKED SORT KEY +Truncating multi-byte string at N bytes is not safe because one can cut in the +middle of a character. One is tempted to solve this by discarding the partial +character but that's also not a good idea as in some collations multiple +characters may produce one weight (this is called "contraction"). + +This combination of circumstances: + +The string value is very long, so truncation is necessary +The collation is "complex", so truncation is dangerous +is deemed to be relatively rare so it was decided to just use +the non-packed sort keys in this case. + +4. SORT KEY COLUMNS IN ADDON FIELDS +Currently, each sort key column is actually stored twice +1. as part of the sort key +2. in the addon_fields +This made total sense when sort key stored the mem-comparable image +(from which one cannot restore the original value in general case). +But since we now store the original value, we could also remove it from the +addon_fields and further save space. This is still a limitation and needs +to be fixed later + +@see Sort_keys + +**/ + +/** + The sort record format may use one of two formats for the non-sorted part of + the record: + + 1. Use the rowid + + || | + / / ref_length / + + 2. Use "addon fields" + + |||...| + / / addon_length / + + The packed format for "addon fields" + + ||||...| + / / addon_length / + + The key may use one of the two formats: + A. fixed-size mem-comparable form. The record is always + sort_length bytes long. + B. "PackedKeyFormat" - the records are variable-size. + + Fields are fixed-size, specially encoded with + Field::make_sort_key() so we can do byte-by-byte compare. + + Contains the *actual* packed length (after packing) of + everything after the sort keys. + The size of the length field is 2 bytes, + which should cover most use cases: addon data <= 65535 bytes. + This is the same as max record size in MySQL. + One bit for each nullable field, indicating whether the field + is null or not. May have size zero if no fields are nullable. + Are stored with field->pack(), and retrieved with + field->unpack(). Addon fields within a record are stored + consecutively, with no "holes" or padding. They will have zero + size for NULL values. + +*/ + +class Sort_param { +public: + uint rec_length; // Length of sorted records. + uint sort_length; // Length of sorted columns. + uint ref_length; // Length of record ref. + uint addon_length; // Length of addon_fields + uint res_length; // Length of records in final sorted file/buffer. + uint max_keys_per_buffer; // Max keys / buffer. + uint min_dupl_count; + ha_rows max_rows; // Select limit, or HA_POS_ERROR if unlimited. + ha_rows examined_rows; // Number of examined rows. + TABLE *sort_form; // For quicker make_sortkey. + /** + ORDER BY list with some precalculated info for filesort. + Array is created and owned by a Filesort instance. + */ + Bounds_checked_array local_sortorder; + Addon_fields *addon_fields; // Descriptors for companion fields. + Sort_keys *sort_keys; + ha_rows *accepted_rows; /* For ROWNUM */ + bool using_pq; + bool set_all_read_bits; + + uchar *unique_buff; + bool not_killable; + String tmp_buffer; + // The fields below are used only by Unique class. + qsort2_cmp compare; + BUFFPEK_COMPARE_CONTEXT cmp_context; + + Sort_param() + { + memset(reinterpret_cast(this), 0, sizeof(*this)); + tmp_buffer.set_thread_specific(); + /* + Fix memset() clearing the charset. + TODO: The constructor should be eventually rewritten not to use memset(). + */ + tmp_buffer.set_charset(&my_charset_bin); + } + void init_for_filesort(uint sortlen, TABLE *table, + ha_rows maxrows, Filesort *filesort); + + void (*unpack)(TABLE *); + /// Enables the packing of addons if possible. + void try_to_pack_addons(ulong max_length_for_sort_data); + + /// Are we packing the "addon fields"? + bool using_packed_addons() const + { + DBUG_ASSERT(m_using_packed_addons == + (addon_fields != NULL && + addon_fields->using_packed_addons())); + return m_using_packed_addons; + } + + bool using_packed_sortkeys() const + { + DBUG_ASSERT(m_using_packed_sortkeys == + (sort_keys != NULL && sort_keys->using_packed_sortkeys())); + return m_using_packed_sortkeys; + } + + /// Are we using "addon fields"? + bool using_addon_fields() const + { + return addon_fields != NULL; + } + + uint32 get_result_length(uchar *plen) + { + if (!m_using_packed_addons) + return res_length; + return Addon_fields::read_addon_length(plen); + } + + uint32 get_addon_length(uchar *plen) + { + if (using_packed_addons()) + return Addon_fields::read_addon_length(plen); + else + return addon_length; + } + + uint32 get_sort_length(uchar *plen) + { + if (using_packed_sortkeys()) + return Sort_keys::read_sortkey_length(plen) + + /* + when addon fields are not present, then the sort_length also + includes the res_length. For packed keys here we add + the res_length + */ + (using_addon_fields() ? 0: res_length); + else + return sort_length; + } + + uint get_record_length(uchar *plen) + { + if (m_packed_format) + { + uint sort_len= get_sort_length(plen); + return sort_len + get_addon_length(plen + sort_len); + } + else + return rec_length; + } + + /** + Getter for record length and result length. + @param record_start Pointer to record. + @param [out] recl Store record length here. + @param [out] resl Store result length here. + */ + void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl) + { + if (m_packed_format) + { + uint sort_len= get_sort_length(record_start); + uint addon_len= get_addon_length(record_start + sort_len); + *recl= sort_len + addon_len; + *resl= using_addon_fields() ? addon_len : res_length; + } + else + { + *recl= rec_length; + *resl= res_length; + } + } + + void try_to_pack_sortkeys(); + + qsort2_cmp get_compare_function() const + { + return using_packed_sortkeys() ? + get_packed_keys_compare_ptr() : + get_ptr_compare(sort_length); + } + void* get_compare_argument(size_t *sort_len) const + { + return using_packed_sortkeys() ? + (void*) this : + (void*) sort_len; + } + + bool is_packed_format() const + { + return m_packed_format; + } + +private: + uint m_packable_length; + bool m_using_packed_addons; ///< caches the value of using_packed_addons() + /* caches the value of using_packed_sortkeys() */ + bool m_using_packed_sortkeys; + bool m_packed_format; +}; + +typedef Bounds_checked_array Sort_buffer; + +int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer, + Merge_chunk *buffpek, uint *maxbuffer, IO_CACHE *t_file); +ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, + Sort_param *param, bool packing_format); +bool merge_buffers(Sort_param *param,IO_CACHE *from_file, + IO_CACHE *to_file, Sort_buffer sort_buffer, + Merge_chunk *lastbuff, Merge_chunk *Fb, + Merge_chunk *Tb, int flag); +int merge_index(Sort_param *param, Sort_buffer sort_buffer, + Merge_chunk *buffpek, uint maxbuffer, + IO_CACHE *tempfile, IO_CACHE *outfile); +void reuse_freed_buff(QUEUE *queue, Merge_chunk *reuse, uint key_length); + +#endif /* SQL_SORT_INCLUDED */ diff --git a/sql/sql_state.c b/sql/sql_state.c new file mode 100644 index 00000000..66c63dca --- /dev/null +++ b/sql/sql_state.c @@ -0,0 +1,55 @@ +/* Copyright (C) 2000-2003 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Functions to map mysqld errno to sql_state */ + +#include "mariadb.h" +#include +#include + +struct st_map_errno_to_sqlstate +{ + uint mysql_errno; + const char *odbc_state; + const char *jdbc_state; +}; + +struct st_map_errno_to_sqlstate sqlstate_map[]= +{ +#include +#include +}; + +const char *mysql_errno_to_sqlstate(uint mysql_errno) +{ + uint first=0, end= array_elements(sqlstate_map)-1; + struct st_map_errno_to_sqlstate *map; + + /* Do binary search in the sorted array */ + while (first != end) + { + uint mid= (first+end)/2; + map= sqlstate_map+mid; + if (map->mysql_errno < mysql_errno) + first= mid+1; + else + end= mid; + } + map= sqlstate_map+first; + if (map->mysql_errno == mysql_errno) + return map->odbc_state; + return "HY000"; /* General error */ +} diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc new file mode 100644 index 00000000..ec369006 --- /dev/null +++ b/sql/sql_statistics.cc @@ -0,0 +1,4493 @@ +/* Copyright (C) 2009 MySQL AB + Copyright (c) 2019, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + functions to update persitent statistical tables and to read from them + + @defgroup Query_Optimizer Query Optimizer + @{ +*/ + +#include "mariadb.h" +#include "sql_base.h" +#include "key.h" +#include "sql_statistics.h" +#include "opt_histogram_json.h" +#include "opt_range.h" +#include "uniques.h" +#include "sql_show.h" +#include "sql_partition.h" +#include "sql_alter.h" // RENAME_STAT_PARAMS + +#include +#include + +/* + The system variable 'use_stat_tables' can take one of the + following values: + "never", "complementary", "preferably". + If the values of the variable 'use_stat_tables' is set to + "never then any statistical data from the persistent statistical tables + is ignored by the optimizer. + If the value of the variable 'use_stat_tables' is set to + "complementary" then a particular statistical characteristic is used + by the optimizer only if the database engine does not provide similar + statistics. For example, 'nulls_ratio' for table columns currently + are not provided by any engine. So optimizer uses this statistical data + from the statistical tables. At the same time it does not use + 'avg_frequency' for any index prefix from the statistical tables since + the a similar statistical characteristic 'records_per_key' can be + requested from the database engine. + If the value the variable 'use_stat_tables' is set to + "preferably" the optimizer uses a particular statistical data only if + it can't be found in the statistical data. + If an ANALYZE command is executed then it results in collecting + statistical data for the tables specified by the command and storing + the collected statistics in the persistent statistical tables only + when the value of the variable 'use_stat_tables' is not + equal to "never". +*/ + +Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type); + +/* Currently there are only 3 persistent statistical tables */ +static const uint STATISTICS_TABLES= 3; + +/* + The names of the statistical tables in this array must correspond the + definitions of the tables in the file ../scripts/mysql_system_tables.sql +*/ +static const LEX_CSTRING stat_table_name[STATISTICS_TABLES]= +{ + { STRING_WITH_LEN("table_stats") }, + { STRING_WITH_LEN("column_stats") }, + { STRING_WITH_LEN("index_stats") } +}; + + +TABLE_STATISTICS_CB::TABLE_STATISTICS_CB(): + usage_count(0), table_stats(0), + stats_available(TABLE_STAT_NO_STATS), histograms_exists_on_disk(0) +{ + init_sql_alloc(PSI_INSTRUMENT_ME, &mem_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(0)); +} + +TABLE_STATISTICS_CB::~TABLE_STATISTICS_CB() +{ + Column_statistics *column_stats= table_stats->column_stats; + Column_statistics *column_stats_end= column_stats + table_stats->columns; + DBUG_ASSERT(usage_count == 0); + + /* Free json histograms */ + for (; column_stats < column_stats_end ; column_stats++) + { + delete column_stats->histogram; + /* + Protect against possible other free in free_statistics_for_table() + */ + column_stats->histogram= 0; + } + free_root(&mem_root, MYF(0)); +} + + +/** + @details + The function builds a list of TABLE_LIST elements for system statistical + tables using array of TABLE_LIST passed as a parameter. + The lock type of each element is set to TL_READ if for_write = FALSE, + otherwise it is set to TL_WRITE. +*/ + +static +inline void init_table_list_for_stat_tables(TABLE_LIST *tables, bool for_write) +{ + uint i; + + memset((char *) &tables[0], 0, sizeof(TABLE_LIST) * STATISTICS_TABLES); + + for (i= 0; i < STATISTICS_TABLES; i++) + { + tables[i].db= MYSQL_SCHEMA_NAME; + tables[i].table_name= stat_table_name[i]; + tables[i].alias= stat_table_name[i]; + tables[i].lock_type= for_write ? TL_WRITE : TL_READ; + if (i < STATISTICS_TABLES - 1) + tables[i].next_global= tables[i].next_local= + tables[i].next_name_resolution_table= &tables[i+1]; + if (i != 0) + tables[i].prev_global= &tables[i-1].next_global; + } +} + +static Table_check_intact_log_error stat_table_intact; + +static const +TABLE_FIELD_TYPE table_stat_fields[TABLE_STAT_N_FIELDS] = +{ + { + { STRING_WITH_LEN("db_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("table_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("cardinality") }, + { STRING_WITH_LEN("bigint(21)") }, + { NULL, 0 } + }, +}; +static const uint table_stat_pk_col[]= {0,1}; +static const TABLE_FIELD_DEF +table_stat_def= {TABLE_STAT_N_FIELDS, table_stat_fields, 2, table_stat_pk_col }; + +static const +TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] = +{ + { + { STRING_WITH_LEN("db_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("table_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("column_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("min_value") }, + { STRING_WITH_LEN("varbinary(255)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("max_value") }, + { STRING_WITH_LEN("varbinary(255)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("nulls_ratio") }, + { STRING_WITH_LEN("decimal(12,4)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("avg_length") }, + { STRING_WITH_LEN("decimal(12,4)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("avg_frequency") }, + { STRING_WITH_LEN("decimal(12,4)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("hist_size") }, + { STRING_WITH_LEN("tinyint(3)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("hist_type") }, + { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("histogram") }, + { STRING_WITH_LEN("longblob") }, + { NULL, 0 } + } +}; +static const uint column_stat_pk_col[]= {0,1,2}; +static const TABLE_FIELD_DEF +column_stat_def= {COLUMN_STAT_N_FIELDS, column_stat_fields, 3, column_stat_pk_col}; + +static const +TABLE_FIELD_TYPE index_stat_fields[INDEX_STAT_N_FIELDS] = +{ + { + { STRING_WITH_LEN("db_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("table_name") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("index") }, + { STRING_WITH_LEN("varchar(64)") }, + { STRING_WITH_LEN("utf8mb3") } + }, + { + { STRING_WITH_LEN("prefix_arity") }, + { STRING_WITH_LEN("int(11)") }, + { NULL, 0 } + }, + { + { STRING_WITH_LEN("avg_frequency") }, + { STRING_WITH_LEN("decimal(12,4)") }, + { NULL, 0 } + } +}; +static const uint index_stat_pk_col[]= {0,1,2,3}; +static const TABLE_FIELD_DEF +index_stat_def= {INDEX_STAT_N_FIELDS, index_stat_fields, 4, index_stat_pk_col}; + + +/** + @brief + Open all statistical tables and lock them +*/ + +static int open_stat_tables(THD *thd, TABLE_LIST *tables, bool for_write) +{ + int rc; + Dummy_error_handler deh; // suppress errors + DBUG_ASSERT(thd->internal_transaction()); + + thd->push_internal_handler(&deh); + init_table_list_for_stat_tables(tables, for_write); + init_mdl_requests(tables); + thd->in_sub_stmt|= SUB_STMT_STAT_TABLES; + rc= open_system_tables_for_read(thd, tables); + thd->in_sub_stmt&= ~SUB_STMT_STAT_TABLES; + thd->pop_internal_handler(); + + + /* If the number of tables changes, we should revise the check below. */ + compile_time_assert(STATISTICS_TABLES == 3); + + if (!rc && + (stat_table_intact.check(tables[TABLE_STAT].table, &table_stat_def) || + stat_table_intact.check(tables[COLUMN_STAT].table, &column_stat_def) || + stat_table_intact.check(tables[INDEX_STAT].table, &index_stat_def))) + { + close_thread_tables(thd); + rc= 1; + } + + return rc; +} + + +/** + @brief + Open a statistical table and lock it + + @details + This is used by DDLs. When a column or index is dropped or renamed, + stat tables need to be adjusted accordingly. + + This function should not generate any errors as the callers are not checking + the result of delete_statistics_for_table() + +*/ +static inline int open_stat_table_for_ddl(THD *thd, TABLE_LIST *table, + const LEX_CSTRING *stat_tab_name) +{ + table->init_one_table(&MYSQL_SCHEMA_NAME, stat_tab_name, NULL, TL_WRITE); + Dummy_error_handler error_handler; + thd->push_internal_handler(&error_handler); + int res= open_system_tables_for_read(thd, table); + thd->pop_internal_handler(); + if (res && error_handler.any_error()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CHECK_NO_SUCH_TABLE, + "Got error %d when trying to open statistics " + "table %`s for updating statistics", + error_handler.got_error(), stat_table_name->str); + } + return res; +} + + +/* + The class Column_statistics_collected is a helper class used to collect + statistics on a table column. The class is derived directly from + the class Column_statistics, and, additionally to the fields of the + latter, it contains the fields to accumulate the results of aggregation + for the number of nulls in the column and for the size of the column + values. There is also a container for distinct column values used + to calculate the average number of records per distinct column value. +*/ + +class Column_statistics_collected :public Column_statistics +{ + +private: + Field *column; /* The column to collect statistics on */ + ha_rows nulls; /* To accumulate the number of nulls in the column */ + ulonglong column_total_length; /* To accumulate the size of column values */ + Count_distinct_field *count_distinct; /* The container for distinct + column values */ + + bool is_single_pk_col; /* TRUE <-> the only column of the primary key */ + +public: + + inline void init(THD *thd, Field * table_field); + inline bool add(); + inline bool finish(MEM_ROOT *mem_root, ha_rows rows, double sample_fraction); + inline void cleanup(); +}; + + +/** + Stat_table is the base class for classes Table_stat, Column_stat and + Index_stat. The methods of these classes allow us to read statistical + data from statistical tables, write collected statistical data into + statistical tables and update statistical data in these tables + as well as update access fields belonging to the primary key and + delete records by prefixes of the primary key. + Objects of the classes Table_stat, Column_stat and Index stat are used + for reading/writing statistics from/into persistent tables table_stats, + column_stats and index_stats correspondingly. These tables are stored in + the system database 'mysql'. + + Statistics is read and written always for a given database table t. When + an object of any of these classes is created a pointer to the TABLE + structure for this database table is passed as a parameter to the constructor + of the object. The other parameter is a pointer to the TABLE structure for + the corresponding statistical table st. So construction of an object to + read/write statistical data on table t from/into statistical table st + requires both table t and st to be opened. + In some cases the TABLE structure for table t may be undefined. Then + the objects of the classes Table_stat, Column_stat and Index stat are + created by the alternative constructor that require only the name + of the table t and the name of the database it belongs to. Currently the + alternative constructors are used only in the cases when some records + belonging to the table are to be deleted, or its keys are to be updated + + Reading/writing statistical data from/into a statistical table is always + performed by a key. At the moment there is only one key defined for each + statistical table and this key is primary. + The primary key for the table table_stats is built as (db_name, table_name). + The primary key for the table column_stats is built as (db_name, table_name, + column_name). + The primary key for the table index_stats is built as (db_name, table_name, + index_name, prefix_arity). + + Reading statistical data from a statistical table is performed by the + following pattern. First a table dependent method sets the values of the + the fields that comprise the lookup key. Then, get_stat_values(...) call + finds the row from the statistical table by the set key. If the row is + found the values of statistical fields are read from this row and are + distributed in the internal structures. + + Let's assume the statistical data is read for table t from database db. + + When statistical data is searched in the table table_stats first + Table_stat::set_key_fields() should set the fields of db_name and + table_name. Then get_stat_values looks for a row by the set key value, + and, if the row is found, reads the value from the column + table_stats.cardinality into the field read_stat.cardinality of the TABLE + structure for table t and sets the value of read_stat.cardinality_is_null + from this structure to FALSE. If the value of the 'cardinality' column + in the row is null or if no row is found read_stat.cardinality_is_null + is set to TRUE. + + When statistical data is searched in the table column_stats first + Column_stat::set_key_fields() should set the fields of db_name, table_name + and column_name with column_name taken out of the only parameter f of the + Field* type passed to this method. After this get_stat_values looks + for a row by the set key value. If the row is found the values of statistical + data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency, + hist_size, hist_type, histogram are read into internal structures. Values + of nulls_ratio, avg_length, avg_frequency, hist_size, hist_type, histogram + are read into the corresponding fields of the read_stat structure from + the Field object f, while values from min_value and max_value are copied + into the min_value and max_value record buffers attached to the TABLE + structure for table t. + If the value of a statistical column in the found row is null, then the + corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off. + Otherwise the flag is set on. If no row is found for the column the all flags + in f->column_stat_nulls are set off. + + When statistical data is searched in the table index_stats first + Index_stat::set_key_fields() has to be called to set the fields of db_name, + table_name, index_name and prefix_arity. The value of index_name is extracted + from the first parameter key_info of the KEY* type passed to the method. + This parameter specifies the index of interest idx. The second parameter + passed to the method specifies the arity k of the index prefix for which + statistical data is to be read. E.g. if the index idx consists of 3 + components (p1,p2,p3) the table index_stats usually will contain 3 rows for + this index: the first - for the prefix (p1), the second - for the prefix + (p1,p2), and the third - for the the prefix (p1,p2,p3). After the key fields + has been set a call of get_stat_value looks for a row by the set key value. + If the row is found and the value of the avg_frequency column is not null + then this value is assigned to key_info->read_stat.avg_frequency[k]. + Otherwise 0 is assigned to this element. + + The method Stat_table::update_stat is used to write statistical data + collected in the internal structures into a statistical table st. + It is assumed that before any invocation of this method a call of the + function st.set_key_fields has set the values of the primary key fields + that serve to locate the row from the statistical table st where the + the collected statistical data from internal structures are to be written + to. The statistical data is written from the counterparts of the + statistical fields of internal structures into which it would be read + by the functions get_stat_values. The counterpart fields are used + only when statistics is collected + When updating/inserting a row from the statistical table st the method + Stat_table::update_stat calls the implementation of the pure virtual + method store_field_values to transfer statistical data from the fields + of internal structures to the fields of record buffer used for updates + of the statistical table st. +*/ + +class Stat_table +{ + +private: + + /* Handler used for the retrieval of the statistical table stat_table */ + handler *stat_file; + + uint stat_key_length; /* Length of the key to access stat_table */ + uchar *record[2]; /* Record buffers used to access/update stat_table */ + + + /* This is a helper function used only by the Stat_table constructors */ + void common_init_stat_table() + { + stat_file= stat_table->file; + /* Currently any statistical table has only one key */ + stat_key_idx= 0; + stat_key_info= &stat_table->key_info[stat_key_idx]; + stat_key_length= stat_key_info->key_length; + last_key_length= last_prefix_parts= 0; + record[0]= stat_table->record[0]; + record[1]= stat_table->record[1]; + } + +protected: + + /* Statistical table to read statistics from or to update/delete */ + TABLE *stat_table; + KEY *stat_key_info; /* Structure for the index to access stat_table */ + + /* Table for which statistical data is read / updated */ + const TABLE *table; + const TABLE_SHARE *table_share; /* Table share for 'table */ + const LEX_CSTRING *db_name; /* Name of the database containing 'table' */ + const LEX_CSTRING *table_name; /* Name of the table 'table' */ + + uchar last_key[MAX_KEY_LENGTH]; + uint last_key_length; + uint last_prefix_parts; + + void store_record_for_lookup() + { + DBUG_ASSERT(record[0] == stat_table->record[0]); + } + + int update_record() + { + int err; + if ((err= stat_file->ha_update_row(record[1], record[0])) && + err != HA_ERR_RECORD_IS_THE_SAME) + return err; + return 0; + } + +public: + + uint stat_key_idx; /* The number of the key to access stat_table */ + void store_record_for_update() + { + store_record(stat_table, record[1]); + } + + /** + @details + This constructor has to be called by any constructor of the derived + classes. The constructor 'tunes' the private and protected members of + the constructed object to the statistical table 'stat_table' with the + statistical data of our interest and to the table 'tab' for which this + statistics has been collected. + */ + + Stat_table(TABLE *stat, const TABLE *tab) + :stat_table(stat), table(tab) + { + table_share= tab->s; + common_init_stat_table(); + db_name= &table_share->db; + table_name= &table_share->table_name; + } + + + /** + @details + This constructor has to be called by any constructor of the derived + classes. The constructor 'tunes' the private and protected members of + the constructed object to the statistical table 'stat_table' with the + statistical data of our interest and to the table t for which this + statistics has been collected. The table t is uniquely specified + by the database name 'db' and the table name 'tab'. + */ + + Stat_table(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab) + :stat_table(stat), table_share(NULL),db_name(db), table_name(tab) + { + common_init_stat_table(); + } + + + virtual ~Stat_table() = default; + + /** + @brief + Store the given values of fields for database name and table name + + @details + This is a purely virtual method. + The implementation for any derived class shall store the given + values of the database name and table name in the corresponding + fields of stat_table. + + @note + The method is called by the update_table_name_key_parts function. + */ + + virtual void change_full_table_name(const LEX_CSTRING *db, + const LEX_CSTRING *tab)= 0; + + + /** + @brief + Store statistical data into fields of the statistical table + + @details + This is a purely virtual method. + The implementation for any derived class shall put the appropriate + statistical data into the corresponding fields of stat_table. + + @note + The method is called by the update_stat function. + */ + + virtual void store_stat_fields()= 0; + + + /** + @brief + Find a record in the statistical table by a primary key + + @details + The function looks for a record in stat_table by its primary key. + It assumes that the key fields have been already stored in the record + buffer of stat_table. + + @retval + FALSE the record is not found + @retval + TRUE the record is found + */ + + bool find_stat() + { + last_key_length= stat_key_length; + key_copy(last_key, record[0], stat_key_info, stat_key_length); + return !stat_file->ha_index_read_idx_map(record[0], stat_key_idx, last_key, + HA_WHOLE_KEY, HA_READ_KEY_EXACT); + } + + void create_key_for_read(uint prefix_parts) + { + last_key_length= 0; + last_prefix_parts= prefix_parts; + for (uint i= 0; i < prefix_parts; i++) + last_key_length+= stat_key_info->key_part[i].store_length; + key_copy(last_key, record[0], stat_key_info, last_key_length); + } + + + /** + @brief + Find a record in the statistical table by a key prefix value + + @details + The function looks for a record in stat_table by the key value consisting + of 'prefix_parts' major components for the primary index. + It assumes that the key prefix fields have been already stored in the record + buffer of stat_table. + + @retval + FALSE the record is not found + @retval + TRUE the record is found + */ + + bool find_next_stat_for_prefix(uint prefix_parts) + { + create_key_for_read(prefix_parts); + key_part_map prefix_map= (key_part_map) ((1 << prefix_parts) - 1); + return !stat_file->ha_index_read_idx_map(record[0], stat_key_idx, last_key, + prefix_map, HA_READ_KEY_EXACT); + } + + bool find_next_stat_for_prefix_with_next(uint prefix_parts) + { + create_key_for_read(prefix_parts); + key_part_map prefix_map= (key_part_map) ((1 << prefix_parts) - 1); + return !stat_file->ha_index_read_map(record[0], last_key, + prefix_map, + HA_READ_KEY_EXACT); + } + + /* + Read row with same key parts as last find_next_stat_for_prefix_with_next() + */ + + bool find_stat_with_next() + { + key_copy(last_key, record[0], stat_key_info, last_key_length); + key_part_map prefix_map= (key_part_map) ((1 << last_prefix_parts) - 1); + return !stat_file->ha_index_read_map(record[0], last_key, + prefix_map, HA_READ_KEY_EXACT); + } + + /** + @brief + Update/insert a record in the statistical table with new statistics + + @details + The function first looks for a record by its primary key in the statistical + table stat_table. If the record is found the function updates statistical + fields of the records. The data for these fields are taken from internal + structures containing info on the table 'table'. If the record is not + found the function inserts a new record with the primary key set to the + search key and the statistical data taken from the internal structures. + The function assumes that the key fields have been already stored in + the record buffer of stat_table. + + @retval + FALSE success with the update/insert of the record + @retval + TRUE failure with the update/insert of the record + + @note + The function calls the virtual method store_stat_fields to populate the + statistical fields of the updated/inserted row with new statistics. + */ + + bool update_stat() + { + if (find_stat()) + { + bool res; + store_record_for_update(); + store_stat_fields(); + res= update_record() != 0; + DBUG_ASSERT(res == 0); + return res; + } + else + { + int err; + store_stat_fields(); + if ((err= stat_file->ha_write_row(record[0]))) + { + DBUG_ASSERT(0); + return TRUE; + } + } + return FALSE; + } + + /** + @brief + Update the table name fields in the current record of stat_table + + @details + The function updates the fields containing database name and table name + for the last found record in the statistical table stat_table. + The corresponding names for update is taken from the parameters + db and tab. + + @retval + FALSE success with the update of the record + @retval + TRUE failure with the update of the record + + @note + The function calls the virtual method change_full_table_name + to store the new names in the record buffer used for updates. + */ + + bool update_table_name_key_parts(const LEX_CSTRING *db, const LEX_CSTRING *tab) + { + store_record_for_update(); + change_full_table_name(db, tab); + bool rc= update_record() != 0; + store_record_for_lookup(); + return rc; + } + + + /** + @brief + Delete the current record of the statistical table stat_table + + @details + The function deletes the last found record from the statistical + table stat_table. + + @retval + FALSE success with the deletion of the record + @retval + TRUE failure with the deletion of the record + */ + + bool delete_stat() + { + int err; + if ((err= stat_file->ha_delete_row(record[0]))) + return TRUE; + return FALSE; + } + + void flush() + { + stat_file->extra(HA_EXTRA_FLUSH); + } + + friend class Stat_table_write_iter; +}; + + +/* + An object of the class Table_stat is created to read statistical + data on tables from the statistical table table_stats, to update + table_stats with such statistical data, or to update columns + of the primary key, or to delete the record by its primary key or + its prefix. + Rows from the statistical table are read and updated always by + primary key. +*/ + +class Table_stat: public Stat_table +{ + +private: + + Field *db_name_field; /* Field for the column table_stats.db_name */ + Field *table_name_field; /* Field for the column table_stats.table_name */ + + void common_init_table_stat() + { + db_name_field= stat_table->field[TABLE_STAT_DB_NAME]; + table_name_field= stat_table->field[TABLE_STAT_TABLE_NAME]; + } + + void change_full_table_name(const LEX_CSTRING *db, + const LEX_CSTRING *tab) override + { + db_name_field->store(db, system_charset_info); + table_name_field->store(tab, system_charset_info); + } + +public: + + /** + @details + The constructor 'tunes' the private and protected members of the + constructed object for the statistical table table_stats to read/update + statistics on table 'tab'. The TABLE structure for the table table_stat + must be passed as a value for the parameter 'stat'. + */ + + Table_stat(TABLE *stat, const TABLE *tab) :Stat_table(stat, tab) + { + common_init_table_stat(); + } + + + /** + @details + The constructor 'tunes' the private and protected members of the + object constructed for the statistical table table_stat for + the future updates/deletes of the record concerning the table 'tab' + from the database 'db'. + */ + + Table_stat(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab) + :Stat_table(stat, db, tab) + { + common_init_table_stat(); + } + + + /** + @brief + Set the key fields for the statistical table table_stat + + @details + The function sets the values of the fields db_name and table_name + in the record buffer for the statistical table table_stat. + These fields comprise the primary key for the table. + + @note + The function is supposed to be called before any use of the + method find_stat for an object of the Table_stat class. + */ + + void set_key_fields() + { + db_name_field->store(db_name, system_charset_info); + table_name_field->store(table_name, system_charset_info); + } + + + /** + @brief + Store statistical data into statistical fields of table_stat + + @details + This implementation of a purely virtual method sets the value of the + column 'cardinality' of the statistical table table_stat according to + the value of the flag write_stat.cardinality_is_null and the value of + the field write_stat.cardinality' from the TABLE structure for 'table'. + */ + + void store_stat_fields() override + { + Field *stat_field= stat_table->field[TABLE_STAT_CARDINALITY]; + if (table->collected_stats->cardinality_is_null) + stat_field->set_null(); + else + { + stat_field->set_notnull(); + stat_field->store(table->collected_stats->cardinality,true); + } + } + + + /** + @brief + Read statistical data from statistical fields of table_stat + + @details + Find a record in mysql.table_stat that has statistics for this table. + We search for record using a PK lookup. The lookup values are in the stat + table's record buffer, they were put there by Table_stat::set_key_fields. + + The result is stored in *read_stats. + */ + + bool get_stat_values(Table_statistics *read_stats) + { + bool res; + read_stats->cardinality_is_null= TRUE; + read_stats->cardinality= 0; + if ((res= find_stat())) + { + Field *stat_field= stat_table->field[TABLE_STAT_CARDINALITY]; + if (!stat_field->is_null()) + { + read_stats->cardinality_is_null= FALSE; + read_stats->cardinality= stat_field->val_int(); + } + } + return res; + } +}; + + +/* + An object of the class Column_stat is created to read statistical data + on table columns from the statistical table column_stats, to update + column_stats with such statistical data, or to update columns + of the primary key, or to delete the record by its primary key or + its prefix. + Rows from the statistical table are read and updated always by + primary key. +*/ + +class Column_stat: public Stat_table +{ + +private: + + Field *db_name_field; /* Field for the column column_stats.db_name */ + Field *table_name_field; /* Field for the column column_stats.table_name */ + Field *column_name_field; /* Field for the column column_stats.column_name */ + + Field *table_field; /* Field from 'table' to read /update statistics on */ + + void common_init_column_stat_table() + { + db_name_field= stat_table->field[COLUMN_STAT_DB_NAME]; + table_name_field= stat_table->field[COLUMN_STAT_TABLE_NAME]; + column_name_field= stat_table->field[COLUMN_STAT_COLUMN_NAME]; + } + + void change_full_table_name(const LEX_CSTRING *db, + const LEX_CSTRING *tab) override + { + db_name_field->store(db, system_charset_info); + table_name_field->store(tab, system_charset_info); + } + +public: + + /** + @details + The constructor 'tunes' the private and protected members of the + constructed object for the statistical table column_stats to read/update + statistics on fields of the table 'tab'. The TABLE structure for the table + column_stats must be passed as a value for the parameter 'stat'. + */ + + Column_stat(TABLE *stat, const TABLE *tab) :Stat_table(stat, tab) + { + common_init_column_stat_table(); + } + + + /** + @details + The constructor 'tunes' the private and protected members of the + object constructed for the statistical table column_stats for + the future updates/deletes of the record concerning the table 'tab' + from the database 'db'. + */ + + Column_stat(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab) + :Stat_table(stat, db, tab) + { + common_init_column_stat_table(); + } + + /** + @brief + Set table name fields for the statistical table column_stats + + @details + The function stores the values of the fields db_name and table_name + of the statistical table column_stats in the record buffer. + */ + + void set_full_table_name() + { + db_name_field->store(db_name, system_charset_info); + table_name_field->store(table_name, system_charset_info); + } + + + /** + @brief + Set the key fields for the statistical table column_stats + + @param + col Field for the 'table' column to read/update statistics on + + @details + The function stores the values of the fields db_name, table_name and + column_name in the record buffer for the statistical table column_stats. + These fields comprise the primary key for the table. + It also sets table_field to the passed parameter. + + @note + The function is supposed to be called before any use of the + method find_stat for an object of the Column_stat class. + */ + + void set_key_fields(Field *col) + { + set_full_table_name(); + column_name_field->store(&col->field_name, system_charset_info); + table_field= col; + } + + void set_key_fields(LEX_CSTRING *field_name) + { + set_full_table_name(); + column_name_field->store(field_name, system_charset_info); + table_field= 0; // Safety + } + + + /** + @brief + Update the table name fields in the current record of stat_table + + @details + The function updates the primary key fields containing database name, + table name, and column name for the last found record in the statistical + table column_stats. + + @retval + 0 success with the update of the record + @retval + # handler error in case of failure + */ + + int update_column_key_part(LEX_CSTRING *col) + { + int rc; + store_record_for_update(); + rc= update_column(col); + store_record_for_lookup(); + return rc; + } + + int update_column(LEX_CSTRING *col) + { + column_name_field->store(col, system_charset_info); + return update_record(); + } + + + /** + @brief + Store statistical data into statistical fields of column_stats + + @details + This implementation of a purely virtual method sets the value of the + columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length', + 'avg_frequency', 'hist_size', 'hist_type' and 'histogram' of the + stistical table columns_stat according to the contents of the bitmap + write_stat.column_stat_nulls and the values of the fields min_value, + max_value, nulls_ratio, avg_length, avg_frequency, hist_size, hist_type + and histogram of the structure write_stat from the Field structure + for the field 'table_field'. + The value of the k-th column in the table columns_stat is set to NULL + if the k-th bit in the bitmap 'column_stat_nulls' is set to 1. + + @note + A value from the field min_value/max_value is always converted + into a varbinary string. If the length of the column 'min_value'/'max_value' + is less than the length of the string the string is trimmed to fit the + length of the column. + */ + + void store_stat_fields() override + { + StringBuffer val; + + MY_BITMAP *old_map= dbug_tmp_use_all_columns(stat_table, + &stat_table->read_set); + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++) + { + Field *stat_field= stat_table->field[i]; + Column_statistics *stats= table_field->collected_stats; + if (stats->is_null(i)) + stat_field->set_null(); + else + { + stat_field->set_notnull(); + switch (i) { + case COLUMN_STAT_MIN_VALUE: + { + /* + TODO varun: After MDEV-22583 is fixed, add a function in Field_bit + and move this implementation there + */ + if (table_field->type() == MYSQL_TYPE_BIT) + stat_field->store(stats->min_value->val_int(),true); + else + stats->min_value->store_to_statistical_minmax_field(stat_field, &val); + break; + } + case COLUMN_STAT_MAX_VALUE: + { + if (table_field->type() == MYSQL_TYPE_BIT) + stat_field->store(stats->max_value->val_int(),true); + else + stats->max_value->store_to_statistical_minmax_field(stat_field, &val); + break; + } + case COLUMN_STAT_NULLS_RATIO: + stat_field->store(stats->get_nulls_ratio()); + break; + case COLUMN_STAT_AVG_LENGTH: + stat_field->store(stats->get_avg_length()); + break; + case COLUMN_STAT_AVG_FREQUENCY: + stat_field->store(stats->get_avg_frequency()); + break; + case COLUMN_STAT_HIST_SIZE: + /* + This is only here so that one can see the size when selecting + from the table. It is not used. + */ + stat_field->store(stats->histogram ? + stats->histogram->get_size() : 0); + break; + case COLUMN_STAT_HIST_TYPE: + if (stats->histogram) + stat_field->store(stats->histogram->get_type() + 1); + else + stat_field->set_null(); + break; + case COLUMN_STAT_HISTOGRAM: + if (stats->histogram) + stats->histogram->serialize(stat_field); + else + stat_field->set_null(); + break; + } + } + } + dbug_tmp_restore_column_map(&stat_table->read_set, old_map); + } + + + /** + @brief + Read statistical data from statistical fields of column_stats + + @details + Find a record in mysql.column_stats that has statistics for this column. + We search for record using a PK lookup. The lookup values are in the stat + table's record buffer. Then, if the row is + found, the function reads the values of the columns 'min_value', + 'max_value', 'nulls_ratio', 'avg_length', 'avg_frequency', 'hist_size' and + 'hist_type" of the table column_stat and sets the members of *read_stats + accordingly. + */ + + bool get_stat_values(Column_statistics *read_stats, MEM_ROOT *mem_root, + bool want_histograms) + { + bool res; + read_stats->set_all_nulls(); + + if (read_stats->min_value) + read_stats->min_value->set_null(); + if (read_stats->max_value) + read_stats->max_value->set_null(); + read_stats->histogram= 0; + + if ((res= find_stat())) + { + char buff[MAX_FIELD_WIDTH]; + String val(buff, sizeof(buff), &my_charset_bin); + Histogram_type hist_type= INVALID_HISTOGRAM; + + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++) + { + Field *stat_field= stat_table->field[i]; + + if (!stat_field->is_null() && + (i > COLUMN_STAT_MAX_VALUE || + (i == COLUMN_STAT_MIN_VALUE && + read_stats->min_value) || + (i == COLUMN_STAT_MAX_VALUE && + read_stats->max_value))) + { + read_stats->set_not_null(i); + + switch (i) { + case COLUMN_STAT_MIN_VALUE: + { + Field *field= read_stats->min_value; + field->set_notnull(); + if (table_field->type() == MYSQL_TYPE_BIT) + field->store(stat_field->val_int(), true); + else + field->store_from_statistical_minmax_field(stat_field, &val, + mem_root); + break; + } + case COLUMN_STAT_MAX_VALUE: + { + Field *field= read_stats->max_value; + field->set_notnull(); + if (table_field->type() == MYSQL_TYPE_BIT) + field->store(stat_field->val_int(), true); + else + field->store_from_statistical_minmax_field(stat_field, &val, + mem_root); + break; + } + case COLUMN_STAT_NULLS_RATIO: + read_stats->set_nulls_ratio(stat_field->val_real()); + break; + case COLUMN_STAT_AVG_LENGTH: + read_stats->set_avg_length(stat_field->val_real()); + break; + case COLUMN_STAT_AVG_FREQUENCY: + read_stats->set_avg_frequency(stat_field->val_real()); + break; + case COLUMN_STAT_HIST_SIZE: + /* + Ignore the contents of mysql.column_stats.hist_size. We take the + size from the mysql.column_stats.histogram column, itself. + */ + break; + case COLUMN_STAT_HIST_TYPE: + hist_type= (Histogram_type) (stat_field->val_int() - 1); + break; + case COLUMN_STAT_HISTOGRAM: + { + Histogram_base *hist= 0; + read_stats->histogram_exists= 0; + if (hist_type != INVALID_HISTOGRAM) + { + if (want_histograms) + { + char buff[MAX_FIELD_WIDTH]; + String val(buff, sizeof(buff), &my_charset_bin), *result; + result= stat_field->val_str(&val); + if (result->length()) + { + MY_BITMAP *old_sets[2]; + TABLE *tbl= (TABLE *) table; + dbug_tmp_use_all_columns(tbl, old_sets, + &tbl->read_set, &tbl->write_set); + + if ((hist= create_histogram(mem_root, hist_type))) + { + if (hist->parse(mem_root, db_name->str, table_name->str, + table->field[table_field->field_index], + result->ptr(), result->length())) + { + delete hist; + } + else + { + read_stats->histogram= hist; + read_stats->histogram_exists= 1; + } + } + dbug_tmp_restore_column_maps(&tbl->read_set, + &tbl->write_set, + old_sets); + } + } + else + read_stats->histogram_exists= 1; + } + if (!hist) + read_stats->set_null(COLUMN_STAT_HISTOGRAM); + break; + } + } + } + } + } + return res; + } +}; + + +bool Histogram_binary::parse(MEM_ROOT *mem_root, const char*, const char*, + Field*, + const char *hist_data, size_t hist_data_len) +{ + size= hist_data_len; // 'size' holds the size of histogram in bytes + if (!(values= (uchar*)alloc_root(mem_root, hist_data_len))) + return true; + + memcpy(values, hist_data, hist_data_len); + return false; +} + +/* + Save the histogram data info a table field. +*/ +void Histogram_binary::serialize(Field *field) +{ + field->store((char*)values, size, &my_charset_bin); +} + +void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, + Histogram_type htype_arg, + ulonglong size_arg) +{ + type= htype_arg; + values= (uchar*)alloc_root(mem_root, (size_t)size_arg); + size= (uint8) size_arg; +} + + +/* + An object of the class Index_stat is created to read statistical + data on tables from the statistical table table_stat, to update + index_stats with such statistical data, or to update columns + of the primary key, or to delete the record by its primary key or + its prefix. + Rows from the statistical table are read and updated always by + primary key. +*/ + +class Index_stat: public Stat_table +{ + +private: + + Field *db_name_field; /* Field for the column index_stats.db_name */ + Field *table_name_field; /* Field for the column index_stats.table_name */ + Field *index_name_field; /* Field for the column index_stats.table_name */ + Field *prefix_arity_field; /* Field for the column index_stats.prefix_arity */ + const KEY *table_key_info; /* Info on the index to read/update statistics on */ + uint prefix_arity; /* Number of components of the index prefix of interest */ + + void common_init_index_stat_table() + { + db_name_field= stat_table->field[INDEX_STAT_DB_NAME]; + table_name_field= stat_table->field[INDEX_STAT_TABLE_NAME]; + index_name_field= stat_table->field[INDEX_STAT_INDEX_NAME]; + prefix_arity_field= stat_table->field[INDEX_STAT_PREFIX_ARITY]; + } + + void change_full_table_name(const LEX_CSTRING *db, + const LEX_CSTRING *tab) override + { + db_name_field->store(db, system_charset_info); + table_name_field->store(tab, system_charset_info); + } + +public: + + + /** + @details + The constructor 'tunes' the private and protected members of the + constructed object for the statistical table index_stats to read/update + statistics on prefixes of different indexes of the table 'tab'. + The TABLE structure for the table index_stats must be passed as a value + for the parameter 'stat'. + */ + + Index_stat(TABLE *stat, const TABLE *tab) :Stat_table(stat, tab) + { + common_init_index_stat_table(); + } + + + /** + @details + The constructor 'tunes' the private and protected members of the + object constructed for the statistical table index_stats for + the future updates/deletes of the record concerning the table 'tab' + from the database 'db'. + */ + + Index_stat(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab) + :Stat_table(stat, db, tab) + { + common_init_index_stat_table(); + } + + + /** + @brief + Set table name fields for the statistical table index_stats + + @details + The function stores the values of the fields db_name and table_name + of the statistical table index_stats in the record buffer. + */ + + void set_full_table_name() + { + db_name_field->store(db_name, system_charset_info); + table_name_field->store(table_name, system_charset_info); + } + + inline void set_index_name(const LEX_CSTRING *name) + { + index_name_field->store(name, system_charset_info); + } + + /** + @brief + Set the key fields of index_stats used to access records for index prefixes + + @param + index_info Info for the index of 'table' to read/update statistics on + + @details + The function sets the values of the fields db_name, table_name and + index_name in the record buffer for the statistical table index_stats. + It also sets table_key_info to the passed parameter. + + @note + The function is supposed to be called before any use of the method + find_next_stat_for_prefix for an object of the Index_stat class. + */ + + void set_index_prefix_key_fields(const KEY *index_info) + { + set_full_table_name(); + set_index_name(&index_info->name); + table_key_info= index_info; + } + + + /** + @brief + Set the key fields for the statistical table index_stats + + @param + index_info Info for the index of 'table' to read/update statistics on + @param + index_prefix_arity Number of components in the index prefix of interest + + @details + The function sets the values of the fields db_name, table_name and + index_name, prefix_arity in the record buffer for the statistical + table index_stats. These fields comprise the primary key for the table. + + @note + The function is supposed to be called before any use of the + method find_stat for an object of the Index_stat class. + */ + + void set_key_fields(KEY *index_info, uint index_prefix_arity) + { + set_index_prefix_key_fields(index_info); + prefix_arity= index_prefix_arity; + prefix_arity_field->store(index_prefix_arity, TRUE); + } + + + int update_index_name(const LEX_CSTRING *name) + { + index_name_field->store(name, system_charset_info); + return update_record(); + } + + + int read_next() + { + return stat_table->file->ha_index_next_same(stat_table->record[0], + last_key, + last_key_length); + } + + /** + @brief + Store statistical data into statistical fields of table index_stats + + @details + This implementation of a purely virtual method sets the value of the + column 'avg_frequency' of the statistical table index_stats according to + the value of write_stat.avg_frequency[Index_stat::prefix_arity] + from the KEY_INFO structure 'table_key_info'. + If the value of write_stat. avg_frequency[Index_stat::prefix_arity] is + equal to 0, the value of the column is set to NULL. + */ + + void store_stat_fields() override + { + Field *stat_field= stat_table->field[INDEX_STAT_AVG_FREQUENCY]; + double avg_frequency= + table_key_info->collected_stats->get_avg_frequency(prefix_arity-1); + if (avg_frequency == 0) + stat_field->set_null(); + else + { + stat_field->set_notnull(); + stat_field->store(avg_frequency); + } + } + + + /** + @brief + Read statistical data from statistical fields of index_stats + + @details + Find a record in mysql.index_stats that has statistics for the index prefix + of interest (the prefix length is in this->prefix_arity). + We search for record using a PK lookup. The lookup values are in the stat + table's record buffer. + + The result is stored in read_stats->avg_frequency[this->prefix_arity]. + If mysql.index_stats doesn't have the value or has SQL NULL, we store the + value of 0. + */ + + bool get_stat_values(Index_statistics *read_stats) + { + double avg_frequency= 0; + bool res; + if ((res= find_stat())) + { + Field *stat_field= stat_table->field[INDEX_STAT_AVG_FREQUENCY]; + if (!stat_field->is_null()) + avg_frequency= stat_field->val_real(); + } + read_stats->set_avg_frequency(prefix_arity-1, avg_frequency); + return res; + } +}; + + +/* + An iterator to enumerate statistics table rows which allows to modify + the rows while reading them. + + Used by RENAME TABLE handling to assign new dbname.tablename to statistic + rows. +*/ +class Stat_table_write_iter +{ + Stat_table *owner; + IO_CACHE io_cache; + uchar *rowid_buf; + uint rowid_size; + +public: + Stat_table_write_iter(Stat_table *stat_table_arg) + : owner(stat_table_arg), rowid_buf(NULL), + rowid_size(owner->stat_file->ref_length) + { + my_b_clear(&io_cache); + } + + /* + Initialize the iterator. It will return rows with n_keyparts matching the + curernt values. + + @return false - OK + true - Error + */ + bool init(uint n_keyparts) + { + if (!(rowid_buf= (uchar*)my_malloc(PSI_INSTRUMENT_ME, rowid_size, MYF(0)))) + return true; + + if (open_cached_file(&io_cache, mysql_tmpdir, TEMP_PREFIX, + 1024, MYF(MY_WME))) + return true; + + handler *h= owner->stat_file; + uchar key[MAX_KEY_LENGTH]; + uint prefix_len= 0; + for (uint i= 0; i < n_keyparts; i++) + prefix_len += owner->stat_key_info->key_part[i].store_length; + + key_copy(key, owner->record[0], owner->stat_key_info, + prefix_len); + key_part_map prefix_map= (key_part_map) ((1 << n_keyparts) - 1); + h->ha_index_init(owner->stat_key_idx, false); + int res= h->ha_index_read_map(owner->record[0], key, prefix_map, + HA_READ_KEY_EXACT); + if (res) + { + reinit_io_cache(&io_cache, READ_CACHE, 0L, 0, 0); + /* "Key not found" is not considered an error */ + return (res == HA_ERR_KEY_NOT_FOUND)? false: true; + } + + do { + h->position(owner->record[0]); + my_b_write(&io_cache, h->ref, rowid_size); + + } while (!h->ha_index_next_same(owner->record[0], key, prefix_len)); + + /* Prepare for reading */ + reinit_io_cache(&io_cache, READ_CACHE, 0L, 0, 0); + h->ha_index_or_rnd_end(); + if (h->ha_rnd_init(false)) + return true; + + return false; + } + + /* + Read the next row. + + @return + false OK + true No more rows or error. + */ + bool get_next_row() + { + if (!my_b_inited(&io_cache) || my_b_read(&io_cache, rowid_buf, rowid_size)) + return true; /* No more data */ + + handler *h= owner->stat_file; + /* + We should normally be able to find the row that we have rowid for. If we + don't, let's consider this an error. + */ + int res= h->ha_rnd_pos(owner->record[0], rowid_buf); + + return (res==0)? false : true; + } + + void cleanup() + { + if (rowid_buf) + my_free(rowid_buf); + rowid_buf= NULL; + owner->stat_file->ha_index_or_rnd_end(); + close_cached_file(&io_cache); + my_b_clear(&io_cache); + } + + ~Stat_table_write_iter() + { + /* Ensure that cleanup has been run */ + DBUG_ASSERT(rowid_buf == 0); + } +}; + +class Histogram_binary_builder : public Histogram_builder +{ + Field *min_value; /* pointer to the minimal value for the field */ + Field *max_value; /* pointer to the maximal value for the field */ + Histogram_binary *histogram; /* the histogram location */ + uint hist_width; /* the number of points in the histogram */ + double bucket_capacity; /* number of rows in a bucket of the histogram */ + uint curr_bucket; /* number of the current bucket to be built */ + +public: + Histogram_binary_builder(Field *col, uint col_len, ha_rows rows) + : Histogram_builder(col, col_len, rows) + { + Column_statistics *col_stats= col->collected_stats; + min_value= col_stats->min_value; + max_value= col_stats->max_value; + histogram= (Histogram_binary*)col_stats->histogram; + hist_width= histogram->get_width(); + bucket_capacity= (double) records / (hist_width + 1); + curr_bucket= 0; + } + + int next(void *elem, element_count elem_cnt) override + { + counters.next(elem, elem_cnt); + ulonglong count= counters.get_count(); + if (curr_bucket == hist_width) + return 0; + if (count > bucket_capacity * (curr_bucket + 1)) + { + column->store_field_value((uchar *) elem, col_length); + histogram->set_value(curr_bucket, + column->pos_in_interval(min_value, max_value)); + curr_bucket++; + while (curr_bucket != hist_width && + count > bucket_capacity * (curr_bucket + 1)) + { + histogram->set_prev_value(curr_bucket); + curr_bucket++; + } + } + return 0; + } + void finalize() override {} +}; + + +Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len, + ha_rows rows) +{ + return new Histogram_binary_builder(col, col_len, rows); +} + + +Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type) +{ + Histogram_base *res= NULL; + switch (hist_type) { + case SINGLE_PREC_HB: + case DOUBLE_PREC_HB: + res= new (mem_root) Histogram_binary(hist_type); + break; + case JSON_HB: + res= new (mem_root) Histogram_json_hb(); + break; + default: + DBUG_ASSERT(0); + } + return res; +} + + +C_MODE_START + +static int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) +{ + Histogram_builder *hist_builder= (Histogram_builder *) arg; + return hist_builder->next(elem, elem_cnt); +} + +int basic_stats_collector_walk(void *elem, element_count count, + void *arg) +{ + ((Basic_stats_collector*)arg)->next(elem, count); + return 0; +} + +C_MODE_END +/* + The class Count_distinct_field is a helper class used to calculate + the number of distinct values for a column. The class employs the + Unique class for this purpose. + The class Count_distinct_field is used only by the function + collect_statistics_for_table to calculate the values for + column avg_frequency of the statistical table column_stats. +*/ + +class Count_distinct_field: public Sql_alloc +{ +protected: + + /* Field for which the number of distinct values is to be find out */ + Field *table_field; + Unique *tree; /* The helper object to contain distinct values */ + uint tree_key_length; /* The length of the keys for the elements of 'tree */ + + ulonglong distincts; + ulonglong distincts_single_occurence; + +public: + + Count_distinct_field() = default; + + /** + @param + field Field for which the number of distinct values is + to be find out + @param + max_heap_table_size The limit for the memory used by the RB tree container + of the constructed Unique object 'tree' + + @details + The constructor sets the values of 'table_field' and 'tree_key_length', + and then calls the 'new' operation to create a Unique object for 'tree'. + The type of 'field' and the value max_heap_table_size of determine the set + of the parameters to be passed to the constructor of the Unique object. + */ + + Count_distinct_field(Field *field, size_t max_heap_table_size) + { + table_field= field; + tree_key_length= field->pack_length(); + + tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, + tree_key_length, max_heap_table_size, 1); + } + + virtual ~Count_distinct_field() + { + delete tree; + tree= NULL; + } + + /* + @brief + Check whether the Unique object tree has been successfully created + */ + bool exists() + { + return (tree != NULL); + } + + /* + @brief + Add the value of 'field' to the container of the Unique object 'tree' + */ + virtual bool add() + { + table_field->mark_unused_memory_as_defined(); + return tree->unique_add(table_field->ptr); + } + + /* + @brief + Calculate the number of elements accumulated in the container of 'tree' + */ + void walk_tree() + { + Basic_stats_collector stats_collector; + tree->walk(table_field->table, basic_stats_collector_walk, + (void*)&stats_collector ); + distincts= stats_collector.get_count_distinct(); + distincts_single_occurence= stats_collector.get_count_single_occurence(); + } + + /* + @brief + Calculate a histogram of the tree + */ + bool walk_tree_with_histogram(ha_rows rows) + { + Histogram_base *hist= table_field->collected_stats->histogram; + Histogram_builder *hist_builder= + hist->create_builder(table_field, tree_key_length, rows); + + if (tree->walk(table_field->table, histogram_build_walk, + (void*)hist_builder)) + { + delete hist_builder; + return true; // Error + } + hist_builder->finalize(); + distincts= hist_builder->counters.get_count_distinct(); + distincts_single_occurence= hist_builder->counters. + get_count_single_occurence(); + delete hist_builder; + return false; + } + + ulonglong get_count_distinct() + { + return distincts; + } + + ulonglong get_count_distinct_single_occurence() + { + return distincts_single_occurence; + } + + /* + @brief + Get the pointer to the histogram built for table_field + */ + Histogram_base *get_histogram() + { + return table_field->collected_stats->histogram; + } +}; + + +static +int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2) +{ + ulonglong *val1= (ulonglong *) key1; + ulonglong *val2= (ulonglong *) key2; + return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1; +} + + +/* + The class Count_distinct_field_bit is derived from the class + Count_distinct_field to be used only for fields of the MYSQL_TYPE_BIT type. + The class provides a different implementation for the method add +*/ + +class Count_distinct_field_bit: public Count_distinct_field +{ +public: + + Count_distinct_field_bit(Field *field, size_t max_heap_table_size) + { + table_field= field; + tree_key_length= sizeof(ulonglong); + + tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, + (void*) &tree_key_length, + tree_key_length, max_heap_table_size, 1); + } + + bool add() + { + longlong val= table_field->val_int(); + return tree->unique_add(&val); + } +}; + + +/* + The class Index_prefix_calc is a helper class used to calculate the values + for the column 'avg_frequency' of the statistical table index_stats. + For any table t from the database db and any k-component prefix of the + index i for this table the row from index_stats with the primary key + (db,t,i,k) must contain in the column 'avg_frequency' either NULL or + the number that is the ratio of N and V, where N is the number of index + entries without NULL values in the first k components of the index i, + and V is the number of distinct tuples composed of the first k components + encountered among these index entries. + Currently the objects of this class are used only by the function + collect_statistics_for_index. +*/ + +class Index_prefix_calc: public Sql_alloc +{ + +private: + + /* Info for the index i for whose prefix 'avg_frequency' is calculated */ + KEY *index_info; + /* The maximum number of the components in the prefixes of interest */ + uint prefixes; + bool empty; + + /* This structure is created for every k components of the index i */ + class Prefix_calc_state + { + public: + /* + The number of the scanned index entries without nulls + in the first k components + */ + ulonglong entry_count; + /* + The number if the scanned index entries without nulls with + the last encountered k-component prefix + */ + ulonglong prefix_count; + /* The values of the last encountered k-component prefix */ + Cached_item *last_prefix; + }; + + /* + Array of structures used to calculate 'avg_frequency' for different + prefixes of the index i + */ + Prefix_calc_state *calc_state; + +public: + + bool is_single_comp_pk; + bool is_partial_fields_present; + + Index_prefix_calc(THD *thd, TABLE *table, KEY *key_info) + : index_info(key_info), prefixes(0), empty(true), + calc_state(NULL), is_single_comp_pk(false), is_partial_fields_present(false) + { + uint i; + Prefix_calc_state *state; + uint key_parts= table->actual_n_key_parts(key_info); + + uint pk= table->s->primary_key; + if ((uint) (table->key_info - key_info) == pk && + table->key_info[pk].user_defined_key_parts == 1) + { + prefixes= 1; + is_single_comp_pk= TRUE; + return; + } + + if ((calc_state= + (Prefix_calc_state *) thd->alloc(sizeof(Prefix_calc_state)*key_parts))) + { + uint keyno= (uint)(key_info-table->key_info); + for (i= 0, state= calc_state; i < key_parts; i++, state++) + { + /* + Do not consider prefixes containing a component that is only part + of the field. This limitation is set to avoid fetching data when + calculating the values of 'avg_frequency' for prefixes. + */ + if (!key_info->key_part[i].field->part_of_key.is_set(keyno)) + { + is_partial_fields_present= TRUE; + break; + } + + if (!(state->last_prefix= + new (thd->mem_root) + Cached_item_field(thd, key_info->key_part[i].field))) + break; + state->entry_count= state->prefix_count= 0; + prefixes++; + } + } + } + + + /** + @breif + Change the elements of calc_state after reading the next index entry + + @details + This function is to be called at the index scan each time the next + index entry has been read into the record buffer. + For each of the index prefixes the function checks whether nulls + are encountered in any of the k components of the prefix. + If this is not the case the value of calc_state[k-1].entry_count + is incremented by 1. Then the function checks whether the value of + any of these k components has changed. If so, the value of + calc_state[k-1].prefix_count is incremented by 1. + */ + + void add() + { + uint i; + Prefix_calc_state *state; + uint first_changed= prefixes; + for (i= prefixes, state= calc_state+prefixes-1; i; i--, state--) + { + if (state->last_prefix->cmp()) + first_changed= i-1; + } + if (empty) + { + first_changed= 0; + empty= FALSE; + } + for (i= 0, state= calc_state; i < prefixes; i++, state++) + { + if (state->last_prefix->null_value) + break; + if (i >= first_changed) + state->prefix_count++; + state->entry_count++; + } + } + + /** + @brief + Calculate the values of avg_frequency for all prefixes of an index + + @details + This function is to be called after the index scan to count the number + of distinct index prefixes has been done. The function calculates + the value of avg_frequency for the index prefix with k components + as calc_state[k-1].entry_count/calc_state[k-1].prefix_count. + If calc_state[k-1].prefix_count happens to be 0, the value of + avg_frequency[k-1] is set to 0, i.e. is considered as unknown. + */ + + void get_avg_frequency() + { + uint i; + Prefix_calc_state *state; + + if (is_single_comp_pk) + { + index_info->collected_stats->set_avg_frequency(0, 1.0); + return; + } + + for (i= 0, state= calc_state; i < prefixes; i++, state++) + { + if (i < prefixes) + { + double val= state->prefix_count == 0 ? + 0 : (double) state->entry_count / state->prefix_count; + index_info->collected_stats->set_avg_frequency(i, val); + } + } + } +}; + + +/** + @brief + Create fields for min/max values to collect column statistics + + @param thd The thread handle + @param table Table the fields are created for + @param fields Fields for which we want to have statistics + + @details + The function first allocates record buffers to store min/max values + for 'table's fields. Then for each table field f it creates Field structures + that points to these buffers rather that to the record buffer as the + Field object for f does. The pointers of the created fields are placed + in the collected_stats structure of the Field object for f. + The function allocates the buffers for min/max values in the table + memory. + + @note + The buffers allocated when min/max values are used to read statistics + from the persistent statistical tables differ from those buffers that + are used when statistics on min/max values for column is collected + as they are allocated in different mem_roots. + The same is true for the fields created for min/max values. +*/ + +static +void create_min_max_statistical_fields_for_table(THD *thd, TABLE *table, + MY_BITMAP *fields) +{ + uint rec_buff_length= table->s->rec_buff_length; + + if ((table->collected_stats->min_max_record_buffers= + (uchar *) alloc_root(thd->mem_root, 2*rec_buff_length))) + { + uchar *record= table->collected_stats->min_max_record_buffers; + memset(record, 0, 2*rec_buff_length); + + for (uint i=0; i < 2; i++, record+= rec_buff_length) + { + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *fld; + Field *table_field= *field_ptr; + my_ptrdiff_t diff= record-table->record[0]; + if (!bitmap_is_set(fields, table_field->field_index)) + continue; + if (!(fld= table_field->clone(thd->mem_root, table, diff))) + continue; + if (i == 0) + table_field->collected_stats->min_value= fld; + else + table_field->collected_stats->max_value= fld; + } + } + } +} + + +/** + @brief + Create fields for min/max values to read column statistics + + @param + thd Thread handler + @param + table_share Table share the fields are created for + @param + stats_cb TABLE_STATISTICS_CB object whose mem_root is used for allocations + + @details + The function first allocates record buffers to store min/max values for + fields in the table. For each field f it creates Field structures + that points to these buffers rather that to the record buffer as the + Field object for f does. The pointers of the created fields are placed + in the read_stats structure of the Field object for f. + The function allocates the buffers for min/max values in the stats_cb + memory. + + @note + The buffers allocated when min/max values are used to collect statistics + from the persistent statistical tables differ from those buffers that + are used when statistics on min/max values for column is read as they + are allocated in different mem_roots. + The same is true for the fields created for min/max values. +*/ + +static void +create_min_max_statistical_fields(THD *thd, + const TABLE_SHARE *table_share, + TABLE_STATISTICS_CB *stats_cb) +{ + Table_statistics *stats= stats_cb->table_stats; + + if (stats->min_max_record_buffers) + return; + + uint rec_buff_length= table_share->rec_buff_length; + + if ((stats->min_max_record_buffers= + (uchar *) alloc_root(&stats_cb->mem_root, 2*rec_buff_length))) + { + uchar *record= stats->min_max_record_buffers; + memset(record, 0, 2*rec_buff_length); + + for (uint i=0; i < 2; i++, record+= rec_buff_length) + { + Column_statistics *column_stats= stats_cb->table_stats->column_stats; + for (Field **field_ptr= table_share->field; + *field_ptr; + field_ptr++, column_stats++) + { + Field *fld; + Field *table_field= *field_ptr; + my_ptrdiff_t diff= record - table_share->default_values; + if (!(fld= table_field->clone(&stats_cb->mem_root, NULL, diff))) + continue; + if (i == 0) + column_stats->min_value= fld; + else + column_stats->max_value= fld; + } + } + } + +} + + +/** + @brief + Allocate memory for the table's statistical data to be collected + + @param thd The thread handle + @param table Table for which we should allocate statistical data + @param stat_fields Fields for which we want to have statistics + + @note + The function allocates the memory for the statistical data on 'table' with + the intention to collect the data there. The memory is allocated for + the statistics on the table, on the table's columns, and on the table's + indexes. The memory is allocated in the thd's mem_root. + + @retval + 0 If the memory for all statistical data has been successfully allocated + @retval + 1 Otherwise + + @note + Each thread allocates its own memory to collect statistics on the table + It allows us, for example, to collect statistics on the different indexes + of the same table in parallel. +*/ + +int alloc_statistics_for_table(THD* thd, TABLE *table, MY_BITMAP *stat_fields) +{ + Field **field_ptr; + uint fields= bitmap_bits_set(stat_fields); + uint keys= table->s->keys; + uint key_parts= table->s->ext_key_parts; + uint hist_size= thd->variables.histogram_size; + Table_statistics *table_stats; + Column_statistics_collected *column_stats; + Index_statistics *index_stats; + ulonglong *idx_avg_frequency; + uchar *histogram; + DBUG_ENTER("alloc_statistics_for_table"); + + if (!multi_alloc_root(thd->mem_root, + &table_stats, sizeof(*table_stats), + &column_stats, sizeof(*column_stats) * fields, + &index_stats, sizeof(*index_stats) * keys, + &idx_avg_frequency, + sizeof(*idx_avg_frequency) * key_parts, + &histogram, hist_size * fields, + NullS)) + DBUG_RETURN(1); + + if (hist_size > 0) + bzero(histogram, hist_size * fields); + else + histogram= 0; + + table->collected_stats= table_stats; + table_stats->column_stats= column_stats; + table_stats->index_stats= index_stats; + table_stats->idx_avg_frequency= idx_avg_frequency; + + bzero((void*) column_stats, sizeof(Column_statistics) * fields); + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + if (bitmap_is_set(stat_fields, (*field_ptr)->field_index)) + { + column_stats->histogram = NULL; + (*field_ptr)->collected_stats= column_stats++; + } + else + (*field_ptr)->collected_stats= 0; + } + + memset(idx_avg_frequency, 0, sizeof(ulonglong) * key_parts); + + KEY *key_info, *end; + for (key_info= table->key_info, end= key_info + table->s->keys; + key_info < end; + key_info++, index_stats++) + { + key_info->collected_stats= index_stats; + key_info->collected_stats->init_avg_frequency(idx_avg_frequency); + idx_avg_frequency+= key_info->ext_key_parts; + } + /* + idx_avg_frequency can be less than + table_stats->idx_avg_frequency + key_parts + in the case of LONG_UNIQUE_HASH_FIELD as these has a hidden + ext_key_part which is counted in table_share->ext_keyparts but not + in keyinfo->ext_key_parts. + */ + DBUG_ASSERT(idx_avg_frequency <= table_stats->idx_avg_frequency + key_parts); + + create_min_max_statistical_fields_for_table(thd, table, stat_fields); + + DBUG_RETURN(0); +} + +/* + Free the "local" statistics for table allocated during getting statistics +*/ + +void free_statistics_for_table(TABLE *table) +{ + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + delete (*field_ptr)->collected_stats; + (*field_ptr)->collected_stats= 0; + } +} + +/** + @brief + Allocate memory for the statistical data used by a table share + + @param + thd Thread handler + @param + table_share Table share for which the memory for statistical data is allocated + @param + stats_cb TABLE_STATISTICS_CB object for storing the statistical data + + @note + The function allocates the memory for the statistical data on a table in the + table's share memory with the intention to read the statistics there from + the system persistent statistical tables mysql.table_stat, mysql.column_stats, + mysql.index_stats. The memory is allocated for the statistics on the table, + on the tables's columns, and on the table's indexes. The memory is allocated + in the table_share's mem_root. + + @retval + 0 If the memory for all statistical data has been successfully allocated + @retval + 1 Otherwise + + @note + The situation when more than one thread try to allocate memory for + statistical data is rare. It happens under the following scenario: + 1. One thread executes a query over table t with the system variable + 'use_stat_tables' set to 'never'. + 2. After this the second thread sets 'use_stat_tables' to 'preferably' + and executes a query over table t. + 3. Simultaneously the third thread sets 'use_stat_tables' to 'preferably' + and executes a query over table t. + Here the second and the third threads try to allocate the memory for + statistical data at the same time. The precautions are taken to + guarantee the correctness of the allocation. +*/ + +static int +alloc_engine_independent_statistics(THD *thd, const TABLE_SHARE *table_share, + TABLE_STATISTICS_CB *stats_cb) +{ + Table_statistics *table_stats= stats_cb->table_stats; + uint fields= table_share->fields; + uint keys= table_share->keys; + uint key_parts= table_share->ext_key_parts; + Index_statistics *index_stats; + ulonglong *idx_avg_frequency; + DBUG_ENTER("alloc_engine_independent_statistics"); + + Column_statistics *column_stats; + if (!multi_alloc_root(&stats_cb->mem_root, + &table_stats, sizeof(Table_statistics), + &column_stats, sizeof(Column_statistics) * fields, + &index_stats, sizeof(Index_statistics) * keys, + &idx_avg_frequency, + sizeof(*idx_avg_frequency) * key_parts, + NullS)) + DBUG_RETURN(1); + + /* Zero variables but not the gaps between them */ + bzero(table_stats, sizeof(Table_statistics)); + bzero((void*) column_stats, sizeof(Column_statistics) * fields); + bzero(index_stats, sizeof(Index_statistics) * keys); + bzero(idx_avg_frequency, sizeof(idx_avg_frequency) * key_parts); + + stats_cb->table_stats= table_stats; + table_stats->columns= table_share->fields; + table_stats->column_stats= column_stats; + table_stats->index_stats= index_stats; + table_stats->idx_avg_frequency= idx_avg_frequency; + + create_min_max_statistical_fields(thd, table_share, stats_cb); + + for (KEY *key_info= table_share->key_info, *end= key_info + keys; + key_info < end; + key_info++, index_stats++) + { + index_stats->init_avg_frequency(idx_avg_frequency); + idx_avg_frequency+= key_info->ext_key_parts; + } + DBUG_ASSERT(idx_avg_frequency <= table_stats->idx_avg_frequency + key_parts); + DBUG_RETURN(0); +} + + +/** + @brief + Initialize the aggregation fields to collect statistics on a column + + @param + thd Thread handler + @param + table_field Column to collect statistics for +*/ + +inline +void Column_statistics_collected::init(THD *thd, Field *table_field) +{ + size_t max_heap_table_size= (size_t)thd->variables.max_heap_table_size; + TABLE *table= table_field->table; + uint pk= table->s->primary_key; + + is_single_pk_col= FALSE; + + if (pk != MAX_KEY && table->key_info[pk].user_defined_key_parts == 1 && + table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1) + is_single_pk_col= TRUE; + + column= table_field; + + set_all_nulls(); + + nulls= 0; + column_total_length= 0; + count_distinct= NULL; + if (!is_single_pk_col && !(table_field->flags & BLOB_FLAG)) + { + count_distinct= + table_field->type() == MYSQL_TYPE_BIT ? + new (thd->mem_root) Count_distinct_field_bit(table_field, + max_heap_table_size) : + new (thd->mem_root) Count_distinct_field(table_field, + max_heap_table_size); + if (count_distinct && !count_distinct->exists()) + { + /* Allocation failed */ + delete count_distinct; + count_distinct= NULL; + } + } +} + + +/** + @brief + Perform aggregation for a row when collecting statistics on a column + + @param + rowno The order number of the row +*/ + +inline +bool Column_statistics_collected::add() +{ + + bool err= 0; + if (column->is_null()) + nulls++; + else + { + column_total_length+= column->value_length(); + if (min_value && column->update_min(min_value, + is_null(COLUMN_STAT_MIN_VALUE))) + set_not_null(COLUMN_STAT_MIN_VALUE); + if (max_value && column->update_max(max_value, + is_null(COLUMN_STAT_MAX_VALUE))) + set_not_null(COLUMN_STAT_MAX_VALUE); + if (count_distinct) + err= count_distinct->add(); + } + return err; +} + + +/** + @brief + Get the results of aggregation when collecting the statistics on a column + + @param + rows The total number of rows in the table +*/ + +inline +bool Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, + double sample_fraction) +{ + double val; + + if (rows) + { + val= (double) nulls / rows; + set_nulls_ratio(val); + set_not_null(COLUMN_STAT_NULLS_RATIO); + } + if (rows - nulls) + { + val= (double) column_total_length / (rows - nulls); + set_avg_length(val); + set_not_null(COLUMN_STAT_AVG_LENGTH); + } + if (count_distinct) + { + uint hist_size= current_thd->variables.histogram_size; + Histogram_type hist_type= + (Histogram_type) (current_thd->variables.histogram_type); + bool have_histogram= false; + if (hist_size != 0 && hist_type != INVALID_HISTOGRAM) + { + histogram= create_histogram(mem_root, hist_type); + histogram->init_for_collection(mem_root, hist_type, hist_size); + + if (count_distinct->walk_tree_with_histogram(rows - nulls)) + { + delete histogram; + histogram= NULL; + delete count_distinct; + count_distinct= NULL; + return true; // Error + } + have_histogram= true; + } + else + { + /* Compute cardinality statistics */ + count_distinct->walk_tree(); + } + + ulonglong distincts= count_distinct->get_count_distinct(); + ulonglong distincts_single_occurence= + count_distinct->get_count_distinct_single_occurence(); + + if (distincts) + { + /* + We use the unsmoothed first-order jackknife estimator" to estimate + the number of distinct values. + With a sufficient large percentage of rows sampled (80%), we revert back + to computing the avg_frequency off of the raw data. + */ + if (sample_fraction > 0.8) + val= (double) (rows - nulls) / distincts; + else + { + if (nulls == 1) + distincts_single_occurence+= 1; + if (nulls) + distincts+= 1; + double fraction_single_occurence= + static_cast(distincts_single_occurence) / rows; + double total_number_of_rows= rows / sample_fraction; + double estimate_total_distincts= total_number_of_rows / + (distincts / + (1.0 - (1.0 - sample_fraction) * fraction_single_occurence)); + val = std::fmax(estimate_total_distincts * (rows - nulls) / rows, 1.0); + } + + set_avg_frequency(val); + set_not_null(COLUMN_STAT_AVG_FREQUENCY); + } + else + have_histogram= false; + + set_not_null(COLUMN_STAT_HIST_SIZE); + if (have_histogram && distincts && histogram) + { + set_not_null(COLUMN_STAT_HIST_TYPE); + set_not_null(COLUMN_STAT_HISTOGRAM); + } + delete count_distinct; + count_distinct= NULL; + } + else if (is_single_pk_col) + { + val= 1.0; + set_avg_frequency(val); + set_not_null(COLUMN_STAT_AVG_FREQUENCY); + } + return false; +} + + +/** + @brief + Clean up auxiliary structures used for aggregation +*/ + +inline +void Column_statistics_collected::cleanup() +{ + if (count_distinct) + { + delete count_distinct; + count_distinct= NULL; + } +} + + +/** + @brief + Collect statistical data on an index + + @param + table The table the index belongs to + index The number of this index in the table + + @details + The function collects the value of 'avg_frequency' for the prefixes + on an index from 'table'. The index is specified by its number. + If the scan is successful the calculated statistics is saved in the + elements of the array write_stat.avg_frequency of the KEY_INFO structure + for the index. The statistics for the prefix with k components is saved + in the element number k-1. + + @retval + 0 If the statistics has been successfully collected + @retval + 1 Otherwise + + @note + The function collects statistics for the index prefixes for one index + scan during which no data is fetched from the table records. That's why + statistical data for prefixes that contain part of a field is not + collected. + The function employs an object of the helper class Index_prefix_calc to + count for each index prefix the number of index entries without nulls and + the number of distinct entries among them. + +*/ + +static +int collect_statistics_for_index(THD *thd, TABLE *table, uint index) +{ + int rc= 0; + KEY *key_info= &table->key_info[index]; + DBUG_ENTER("collect_statistics_for_index"); + + /* No statistics for FULLTEXT indexes. */ + if (key_info->flags & (HA_FULLTEXT|HA_SPATIAL)) + DBUG_RETURN(rc); + + Index_prefix_calc index_prefix_calc(thd, table, key_info); + + DEBUG_SYNC(table->in_use, "statistics_collection_start1"); + DEBUG_SYNC(table->in_use, "statistics_collection_start2"); + + if (index_prefix_calc.is_single_comp_pk) + { + index_prefix_calc.get_avg_frequency(); + DBUG_RETURN(rc); + } + + /* + Request "only index read" in case of absence of fields which are + partially in the index to avoid problems with partitioning (for example) + which want to get whole field value. + */ + if (!index_prefix_calc.is_partial_fields_present) + table->file->ha_start_keyread(index); + table->file->ha_index_init(index, TRUE); + rc= table->file->ha_index_first(table->record[0]); + while (rc != HA_ERR_END_OF_FILE) + { + if (thd->killed) + break; + + if (rc) + break; + index_prefix_calc.add(); + rc= table->file->ha_index_next(table->record[0]); + } + table->file->ha_end_keyread(); + table->file->ha_index_end(); + + rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1; + + if (!rc) + index_prefix_calc.get_avg_frequency(); + + DBUG_RETURN(rc); +} + + +/** + @brief + Collect statistical data for a table + + @param + thd The thread handle + @param + table The table to collect statistics on + + @details + The function collects data for various statistical characteristics on + the table 'table'. These data is saved in the internal fields that could + be reached from 'table'. The data is prepared to be saved in the persistent + statistical table by the function update_statistics_for_table. + The collected statistical values are not placed in the same fields that + keep the statistical data used by the optimizer. Therefore, at any time, + there is no collision between the statistics being collected and the one + used by the optimizer to look for optimal query execution plans for other + clients. + + @retval + 0 If the statistics has been successfully collected + @retval + 1 Otherwise + + @note + The function first collects statistical data for statistical characteristics + to be saved in the statistical tables table_stat and column_stats. To do this + it performs a full table scan of 'table'. At this scan the function collects + statistics on each column of the table and count the total number of the + scanned rows. To calculate the value of 'avg_frequency' for a column the + function constructs an object of the helper class Count_distinct_field + (or its derivation). Currently this class cannot count the number of + distinct values for blob columns. So the value of 'avg_frequency' for + blob columns is always null. + After the full table scan the function calls collect_statistics_for_index + for each table index. The latter performs full index scan for each index. + + @note + Currently the statistical data is collected indiscriminately for all + columns/indexes of 'table', for all statistical characteristics. + TODO. Collect only specified statistical characteristics for specified + columns/indexes. + + @note + Currently the process of collecting statistical data is not optimized. + For example, 'avg_frequency' for a column could be copied from the + 'avg_frequency' collected for an index if this column is used as the + first component of the index. Min and min values for this column could + be extracted from the index as well. +*/ + +int collect_statistics_for_table(THD *thd, TABLE *table) +{ + int rc; + Field **field_ptr; + Field *table_field; + ha_rows rows= 0; + handler *file=table->file; + double sample_fraction= thd->variables.sample_percentage / 100; + const ha_rows MIN_THRESHOLD_FOR_SAMPLING= 50000; + DBUG_ENTER("collect_statistics_for_table"); + + table->collected_stats->cardinality_is_null= TRUE; + table->collected_stats->cardinality= 0; + + if (thd->variables.sample_percentage == 0) + { + if (file->records() < MIN_THRESHOLD_FOR_SAMPLING) + { + sample_fraction= 1; + } + else + { + sample_fraction= std::fmin( + (MIN_THRESHOLD_FOR_SAMPLING + 4096 * + log(200 * file->records())) / file->records(), 1); + } + } + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + if (!table_field->collected_stats) + continue; + table_field->collected_stats->init(thd, table_field); + } + + restore_record(table, s->default_values); + + /* Perform a full table scan to collect statistics on 'table's columns */ + if (!(rc= file->ha_rnd_init(TRUE))) + { + DEBUG_SYNC(table->in_use, "statistics_collection_start"); + + while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) + { + if (thd->killed) + break; + + if (rc) + break; + + if (thd_rnd(thd) <= sample_fraction) + { + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + if (!table_field->collected_stats) + continue; + if ((rc= table_field->collected_stats->add())) + break; + } + if (rc) + break; + rows++; + } + } + file->ha_rnd_end(); + } + rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1; + + /* + Calculate values for all statistical characteristics on columns and + and for each field f of 'table' save them in the write_stat structure + from the Field object for f. + */ + if (!rc) + { + table->collected_stats->cardinality_is_null= FALSE; + table->collected_stats->cardinality= + static_cast(rows / sample_fraction); + } + + bitmap_clear_all(table->write_set); + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + if (!table_field->collected_stats) + continue; + bitmap_set_bit(table->write_set, table_field->field_index); + if (!rc) + rc= table_field->collected_stats->finish(thd->mem_root, rows, + sample_fraction); + else + table_field->collected_stats->cleanup(); + } + bitmap_clear_all(table->write_set); + + if (!rc) + { + uint key; + key_map::Iterator it(table->keys_in_use_for_query); + + MY_BITMAP *save_read_set= table->read_set; + table->read_set= &table->tmp_set; + bitmap_set_all(table->read_set); + + /* Collect statistics for indexes */ + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + if ((rc= collect_statistics_for_index(thd, table, key))) + break; + } + + table->read_set= save_read_set; + } + + DBUG_RETURN(rc); +} + + +/** + @brief + Update statistics for a table in the persistent statistical tables + + @param thd The thread handle + @param table The table to collect statistics on + + @details + For each statistical table st the function looks for the rows from this + table that contain statistical data on 'table'. If rows with given + statistical characteristics exist they are updated with the new statistical + values taken from internal structures for 'table'. Otherwise new rows + with these statistical characteristics are added into st. + It is assumed that values stored in the statistical tables are found and + saved by the function collect_statistics_for_table. + + @retval + 0 If all statistical tables has been successfully updated + @retval + 1 Otherwise + + @note + The function is called when executing the ANALYZE actions for 'table'. + The function first unlocks the opened table the statistics on which has + been collected, but does not closes it, so all collected statistical data + remains in internal structures for 'table'. Then the function opens the + statistical tables and writes the statistical data for 'table'into them. + It is not allowed just to open statistical tables for writing when some + other tables are locked for reading. + After the statistical tables have been opened they are updated one by one + with the new statistics on 'table'. Objects of the helper classes + Table_stat, Column_stat and Index_stat are employed for this. + After having been updated the statistical system tables are closed. +*/ + +int update_statistics_for_table(THD *thd, TABLE *table) +{ + TABLE_LIST tables[STATISTICS_TABLES]; + uint i; + int err; + enum_binlog_format save_binlog_format; + int rc= 0; + TABLE *stat_table; + DBUG_ENTER("update_statistics_for_table"); + + DEBUG_SYNC(thd, "statistics_update_start"); + + start_new_trans new_trans(thd); + + if (open_stat_tables(thd, tables, TRUE)) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); + } + + /* + Ensure that no one is reading satistics while we are writing them + This ensures that statistics is always read consistently + */ + mysql_mutex_lock(&table->s->LOCK_statistics); + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Update the statistical table table_stats */ + stat_table= tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, table); + restore_record(stat_table, s->default_values); + table_stat.set_key_fields(); + err= table_stat.update_stat(); + if (err) + rc= 1; + + /* Update the statistical table colum_stats */ + stat_table= tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, table); + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *table_field= *field_ptr; + if (!table_field->collected_stats) + continue; + restore_record(stat_table, s->default_values); + column_stat.set_key_fields(table_field); + err= column_stat.update_stat(); + if (err && !rc) + rc= 1; + } + + /* Update the statistical table index_stats */ + stat_table= tables[INDEX_STAT].table; + uint key; + key_map::Iterator it(table->keys_in_use_for_query); + Index_stat index_stat(stat_table, table); + + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *key_info= table->key_info+key; + uint key_parts= table->actual_n_key_parts(key_info); + for (i= 0; i < key_parts; i++) + { + restore_record(stat_table, s->default_values); + index_stat.set_key_fields(key_info, i+1); + err= index_stat.update_stat(); + if (err && !rc) + rc= 1; + } + } + + tables[TABLE_STAT].table->file->extra(HA_EXTRA_FLUSH); + tables[COLUMN_STAT].table->file->extra(HA_EXTRA_FLUSH); + tables[INDEX_STAT].table->file->extra(HA_EXTRA_FLUSH); + + thd->restore_stmt_binlog_format(save_binlog_format); + if (thd->commit_whole_transaction_and_close_tables()) + rc= 1; + + mysql_mutex_unlock(&table->s->LOCK_statistics); + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + @brief + Read statistics for a table from the persistent statistical tables + + @param + thd The thread handle + @param + table The table to read statistics on. + @param + stat_tables The array of TABLE_LIST objects for statistical tables + @param + force_reload Flag to require reloading the statistics from the tables + even if it has been already loaded + + @details + For each statistical table the function looks for the rows from this + table that contain statistical data on 'table'. If such rows is found + the data from statistical columns of it is read into the appropriate + fields of internal structures for 'table'. Later at the query processing + this data are supposed to be used by the optimizer. + The parameter stat_tables should point to an array of TABLE_LIST + objects for all statistical tables linked into a list. All statistical + tables are supposed to be opened. + The function is called by read_statistics_for_tables_if_needed(). + + @retval + pointer to object If data has been successfully read for the table + @retval + 0 Otherwise + + @note + Objects of the helper classes Table_stat, Column_stat and Index_stat + are employed to read statistical data from the statistical tables. + now. +*/ + +static +TABLE_STATISTICS_CB* +read_statistics_for_table(THD *thd, TABLE *table, + TABLE_LIST *stat_tables, bool force_reload, + bool want_histograms) +{ + uint i; + TABLE *stat_table; + Field *table_field; + Field **field_ptr; + KEY *key_info, *key_info_end; + TABLE_SHARE *table_share= table->s; + DBUG_ENTER("read_statistics_for_table"); + + if (!force_reload && table_share->stats_cb && + (!want_histograms || !table_share->histograms_exists())) + { + if (table->stats_cb == table_share->stats_cb) + DBUG_RETURN(table->stats_cb); // Use current + table->update_engine_independent_stats(); // Copy table_share->stats_cb + DBUG_RETURN(table->stats_cb); + } + + /* + Read data into a new TABLE_STATISTICS_CB object and replace + TABLE_SHARE::stats_cb with this new one once the reading is finished + */ + TABLE_STATISTICS_CB *new_stats_cb; + if (!(new_stats_cb= new TABLE_STATISTICS_CB)) + DBUG_RETURN(0); /* purecov: inspected */ + + if (alloc_engine_independent_statistics(thd, table_share, new_stats_cb)) + { + /* purecov: begin inspected */ + delete new_stats_cb; + DBUG_RETURN(0); + /* purecov: end */ + } + + /* Don't write warnings for internal field conversions */ + Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE); + + /* Read statistics from the statistical table table_stats */ + Table_statistics *read_stats= new_stats_cb->table_stats; + stat_table= stat_tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, table); + table_stat.set_key_fields(); + if (table_stat.get_stat_values(new_stats_cb->table_stats)) + new_stats_cb->stats_available|= TABLE_STAT_TABLE; + + /* Read statistics from the statistical table column_stats */ + stat_table= stat_tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, table); + Column_statistics *column_statistics= new_stats_cb->table_stats->column_stats; + for (field_ptr= table_share->field; + *field_ptr; + field_ptr++, column_statistics++) + { + table_field= *field_ptr; + column_stat.set_key_fields(table_field); + if (column_stat.get_stat_values(column_statistics, + &new_stats_cb->mem_root, + want_histograms)) + new_stats_cb->stats_available|= TABLE_STAT_COLUMN; + if (column_statistics->histogram_exists) + { + new_stats_cb->histograms_exists_on_disk= 1; + if (column_statistics->histogram) + new_stats_cb->stats_available|= TABLE_STAT_HISTOGRAM; + } + } + + /* Read statistics from the statistical table index_stats */ + stat_table= stat_tables[INDEX_STAT].table; + Index_stat index_stat(stat_table, table); + Index_statistics *index_statistics= new_stats_cb->table_stats->index_stats; + for (key_info= table_share->key_info, + key_info_end= key_info + table_share->keys; + key_info < key_info_end; key_info++, index_statistics++) + { + uint key_parts= key_info->ext_key_parts; + bool found= 0; + for (i= 0; i < key_parts; i++) + { + index_stat.set_key_fields(key_info, i+1); + found|= index_stat.get_stat_values(index_statistics); + } + if (found) + new_stats_cb->stats_available|= TABLE_STAT_INDEX; + + key_part_map ext_key_part_map= key_info->ext_key_part_map; + if (key_info->user_defined_key_parts != key_info->ext_key_parts && + index_statistics->get_avg_frequency(key_info->user_defined_key_parts) == 0) + { + KEY *pk_key_info= table_share->key_info + table_share->primary_key; + uint k= key_info->user_defined_key_parts; + uint pk_parts= pk_key_info->user_defined_key_parts; + ha_rows n_rows= read_stats->cardinality; + double k_dist= n_rows / index_statistics->get_avg_frequency(k-1); + uint m= 0; + Index_statistics *pk_read_stats= (new_stats_cb->table_stats->index_stats + + table_share->primary_key); + for (uint j= 0; j < pk_parts; j++) + { + if (!(ext_key_part_map & 1 << j)) + { + for (uint l= k; l < k + m; l++) + { + double avg_frequency= pk_read_stats->get_avg_frequency(j-1); + set_if_smaller(avg_frequency, 1); + double val= (pk_read_stats->get_avg_frequency(j) / + avg_frequency); + index_statistics->set_avg_frequency (l, val); + } + } + else + { + double avg_frequency= pk_read_stats->get_avg_frequency(j); + index_statistics->set_avg_frequency(k + m, avg_frequency); + m++; + } + } + for (uint l= k; l < k + m; l++) + { + double avg_frequency= index_statistics->get_avg_frequency(l); + if (avg_frequency == 0 || read_stats->cardinality_is_null) + avg_frequency= 1; + else if (avg_frequency > 1) + { + avg_frequency/= k_dist; + set_if_bigger(avg_frequency, 1); + } + index_statistics->set_avg_frequency(l, avg_frequency); + } + } + } + DBUG_RETURN(new_stats_cb); +} + + +/** + @brief + Read statistics for tables from a table list if it is needed + + @param + thd The thread handle + @param + tables The tables list for whose tables to read statistics + + @details + The function first checks whether for any of the tables opened and locked + for a statement statistics from statistical tables is needed to be read. + Then, if so, it opens system statistical tables for read and reads + the statistical data from them for those tables from the list for which it + makes sense. Then the function closes system statistical tables. + + @retval + 0 Statistics for tables was successfully read + @retval + 1 Otherwise +*/ + +int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables) +{ + switch (thd->lex->sql_command) { + case SQLCOM_SELECT: + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_CREATE_TABLE: + case SQLCOM_SET_OPTION: + case SQLCOM_DO: + return read_statistics_for_tables(thd, tables, 0); + default: + return 0; + } +} + + +/* + Update TABLE field and key objects with pointers to + the current statistical data in table->stats_cb +*/ + + +void TABLE_STATISTICS_CB::update_stats_in_table(TABLE *table) +{ + DBUG_ASSERT(table->stats_cb == this); + + /* + Table_statistics doesn't need to be updated: set_statistics_for_table() + sets TABLE::used_stat_records from table->stats_cb.table_stats.cardinality + */ + + KEY *key_info= table->key_info; + KEY *key_info_end= key_info + table->s->keys; + Index_statistics *index_stats= table_stats->index_stats; + + for ( ; key_info < key_info_end; key_info++, index_stats++) + key_info->read_stats= index_stats; + + Field **field_ptr= table->field; + Column_statistics *column_stats= table_stats->column_stats; + + for ( ; *field_ptr; field_ptr++, column_stats++) + (*field_ptr)->read_stats= column_stats; + /* Mark that stats are now usable */ + table->stats_is_read= (table->stats_cb->stats_available != + TABLE_STAT_NO_STATS); +} + + +int +read_statistics_for_tables(THD *thd, TABLE_LIST *tables, bool force_reload) +{ + int rc= 0; + TABLE_LIST stat_tables[STATISTICS_TABLES]; + bool found_stat_table= false; + bool statistics_for_tables_is_needed= false; + bool want_histograms= thd->variables.optimizer_use_condition_selectivity > 3; + DBUG_ENTER("read_statistics_for_tables"); + + if (thd->bootstrap || thd->variables.use_stat_tables == NEVER || !tables) + DBUG_RETURN(0); + + for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) + { + TABLE *table= tl->table; + TABLE_SHARE *table_share; + + /* Skip tables that can't have statistics. */ + if (tl->is_view_or_derived() || !table || !(table_share= table->s)) + continue; + /* Skip temporary tables */ + if (table_share->tmp_table != NO_TMP_TABLE) + continue; + + if (table_share->table_category == TABLE_CATEGORY_USER) + { + /* Force reloading means we always read all stats tables. */ + if (force_reload || !table_share->stats_cb) + { + statistics_for_tables_is_needed= true; + continue; + } + + /* Stats versions don't match, take a reference under a mutex. */ + if (table->stats_cb != table_share->stats_cb) + { + table->update_engine_independent_stats(); + table->stats_cb->update_stats_in_table(table); + } + /* + We need to read histograms if they exist but have not yet been + loaded into memory. + */ + if (want_histograms && + table->stats_cb->histograms_exists() && + !(table->stats_cb->stats_available & TABLE_STAT_HISTOGRAM)) + { + statistics_for_tables_is_needed= true; + } + } + else if (is_stat_table(&tl->db, &tl->alias)) + found_stat_table= true; + } + + DEBUG_SYNC(thd, "statistics_read_start"); + + /* + Do not read statistics for any query that explicity involves + statistical tables, failure to to do so we may end up + in a deadlock. + */ + if (found_stat_table || !statistics_for_tables_is_needed) + DBUG_RETURN(0); + + start_new_trans new_trans(thd); + + if (open_stat_tables(thd, stat_tables, FALSE)) + { + rc= 1; + goto end; + } + + for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) + { + TABLE *table= tl->table; + TABLE_SHARE *table_share; + + /* Skip tables that can't have statistics. */ + if (tl->is_view_or_derived() || !table || !(table_share= table->s) || + table_share->tmp_table != NO_TMP_TABLE || + table_share->table_category != TABLE_CATEGORY_USER) + continue; + + if (force_reload || !table_share->stats_cb || + table->stats_cb != table_share->stats_cb || + (want_histograms && table->stats_cb->histograms_exists() && + !(table->stats_cb->stats_available & TABLE_STAT_HISTOGRAM))) + { + TABLE_STATISTICS_CB *stats_cb; + DEBUG_SYNC(thd, "read_statistics_for_table_start1"); + DEBUG_SYNC(thd, "read_statistics_for_table_start2"); + + /* + The following lock is here to ensure that if a lot of threads are + accessing the table at the same time after a ANALYZE TABLE, + only one thread is loading the data from the the stats tables + and the others threads are reusing the loaded data. + */ + mysql_mutex_lock(&table_share->LOCK_statistics); + if (!(stats_cb= read_statistics_for_table(thd, table, stat_tables, + force_reload, want_histograms))) + { + /* purecov: begin inspected */ + mysql_mutex_unlock(&table_share->LOCK_statistics); + continue; + /* purecov: end */ + } + + if (stats_cb->unused()) + { + /* New object created, update share to use it */ + table_share->update_engine_independent_stats(stats_cb); + table->update_engine_independent_stats(); + } + mysql_mutex_unlock(&table_share->LOCK_statistics); + table->stats_cb->update_stats_in_table(table); + } + } + + thd->commit_whole_transaction_and_close_tables(); + +end: + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + @brief + Delete statistics on a table from all statistical tables + + @param + thd The thread handle + @param + db The name of the database the table belongs to + @param + tab The name of the table whose statistics is to be deleted + + @details + The function delete statistics on the table called 'tab' of the database + 'db' from all statistical tables: table_stats, column_stats, index_stats. + + @retval + 0 If all deletions are successful or we couldn't open statistics table + @retval + 1 Otherwise + + @note + The function is called when executing the statement DROP TABLE 'tab'. +*/ + +int delete_statistics_for_table(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *tab) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables[STATISTICS_TABLES]; + Open_tables_backup open_tables_backup; + int rc= 0; + DBUG_ENTER("delete_statistics_for_table"); + + start_new_trans new_trans(thd); + + if (open_stat_tables(thd, tables, TRUE)) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Delete statistics on table from the statistical table index_stats */ + stat_table= tables[INDEX_STAT].table; + Index_stat index_stat(stat_table, db, tab); + index_stat.set_full_table_name(); + while (index_stat.find_next_stat_for_prefix(2)) + { + err= index_stat.delete_stat(); + if (err & !rc) + rc= 1; + } + + /* Delete statistics on table from the statistical table column_stats */ + stat_table= tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, db, tab); + column_stat.set_full_table_name(); + while (column_stat.find_next_stat_for_prefix(2)) + { + err= column_stat.delete_stat(); + if (err & !rc) + rc= 1; + } + + /* Delete statistics on table from the statistical table table_stats */ + stat_table= tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, db, tab); + table_stat.set_key_fields(); + if (table_stat.find_stat()) + { + err= table_stat.delete_stat(); + if (err & !rc) + rc= 1; + } + + err= del_global_table_stat(thd, db, tab); + if (err & !rc) + rc= 1; + + tables[TABLE_STAT].table->file->extra(HA_EXTRA_FLUSH); + tables[COLUMN_STAT].table->file->extra(HA_EXTRA_FLUSH); + tables[INDEX_STAT].table->file->extra(HA_EXTRA_FLUSH); + + thd->restore_stmt_binlog_format(save_binlog_format); + thd->commit_whole_transaction_and_close_tables(); + + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + @brief + Delete statistics on a column of the specified table + + @param thd The thread handle + @param tab The table the column belongs to + @param col The field of the column whose statistics is to be deleted + + @details + The function delete statistics on the column 'col' belonging to the table + 'tab' from the statistical table column_stats. + + @retval 0 If all deletions are successful or we couldn't open statistics table + @retval 1 Otherwise + + @note + The function is called when dropping a table column or when changing + the definition of this column. +*/ + +int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + int rc= 0; + DBUG_ENTER("delete_statistics_for_column"); + + start_new_trans new_trans(thd); + + if (open_stat_table_for_ddl(thd, &tables, &stat_table_name[1])) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); // Not an error + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + stat_table= tables.table; + Column_stat column_stat(stat_table, tab); + column_stat.set_key_fields(col); + if (column_stat.find_stat()) + { + err= column_stat.delete_stat(); + if (err) + rc= 1; + } + + column_stat.flush(); + thd->restore_stmt_binlog_format(save_binlog_format); + if (thd->commit_whole_transaction_and_close_tables()) + rc= 1; + + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + Generate tempoary column or index name for renames +*/ + +static LEX_CSTRING *generate_tmp_name(LEX_CSTRING *to, uint counter) +{ + char *res=int10_to_str(counter, strmov((char*) to->str, "#sql_tmp_name#"), + 10); + /* + Include an end zero in the tmp name to avoid any possible conflict + with existing column names. + */ + to->length= (size_t) (res - to->str) + 1; + return to; +} + + +/** + Rename a set of columns in the statistical table column_stats + + @param thd The thread handle + @param tab The table the column belongs to + @param fields List of fields and names to be renamed + + @details + The function replaces the names of the columns in fields that belongs + to the table 'tab' in the statistical table column_stats. + + @retval 0 If update was successful, tmp table or could not open stat table + @retval -1 Commit failed + @retval >0 Error number from engine + + @note + The function is called when executing any statement that renames a column, + but does not change the column definition. +*/ + +int rename_columns_in_stat_table(THD *thd, TABLE *tab, + List + *fields) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + int rc= 0; + uint duplicate_counter= 0; + uint org_elements= fields->elements+1; + List_iterator it(*fields); + char tmp_name_buffer[32]; + LEX_CSTRING tmp_name= {tmp_name_buffer, 0}; + DBUG_ENTER("rename_column_in_stat_tables"); + + if (tab->s->tmp_table != NO_TMP_TABLE) + DBUG_RETURN(0); + + start_new_trans new_trans(thd); + + if (open_stat_table_for_ddl(thd, &tables, &stat_table_name[1])) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Rename column in the statistical table table_stat */ + + stat_table= tables.table; + + /* Loop until fields is empty or previous round did nothing */ + while (!fields->is_empty() && fields->elements != org_elements) + { + Alter_info::RENAME_COLUMN_STAT_PARAMS *field; + org_elements= fields->elements; + it.rewind(); + while ((field= it++)) + { + Column_stat column_stat(stat_table, tab); + LEX_CSTRING *from_name; + from_name= (!field->duplicate_counter ? + &field->field->field_name : + generate_tmp_name(&tmp_name, + field->duplicate_counter)); + column_stat.set_key_fields(from_name); + if (column_stat.find_stat()) + { + err= column_stat.update_column_key_part(field->name); + if (likely(err != HA_ERR_FOUND_DUPP_KEY)) + it.remove(); + else if (!field->duplicate_counter) + { + /* + This is probably an ALTER TABLE of type rename a->b, b->a + Rename the column to a temporary name + */ + LEX_CSTRING *new_name= + generate_tmp_name(&tmp_name, ++duplicate_counter); + field->duplicate_counter= duplicate_counter; + + if ((err= column_stat.update_column(new_name))) + { + if (likely(err != HA_ERR_FOUND_DUPP_KEY)) + { + DBUG_ASSERT(0); + it.remove(); // Unknown error, ignore column + } + else + { + /* + The only way this could happen is if the table has a column + with same name as the temporary column name, probably from a + failed alter table. + Remove the conflicting row and update it again. + */ + if (!column_stat.find_stat()) + DBUG_ASSERT(0); + else if (column_stat.delete_stat()) + DBUG_ASSERT(0); + else + { + column_stat.set_key_fields(from_name); + if (!column_stat.find_stat()) + DBUG_ASSERT(0); + else if (column_stat.update_column_key_part(&tmp_name)) + DBUG_ASSERT(0); + } + } + } + } + } + else /* column_stat.find_stat() */ + { + /* Statistics for the field did not exists */ + it.remove(); + } + } + } + + if (!fields->is_empty()) + { + /* + All unhandled renamed fields has now a temporary name. + Remove all conflicing rows and rename the temporary name to + the final name. + */ + + Alter_info::RENAME_COLUMN_STAT_PARAMS *field; + it.rewind(); + while ((field= it++)) + { + Column_stat column_stat(stat_table, tab); + DBUG_ASSERT(field->duplicate_counter); + + /* Remove the conflicting row */ + column_stat.set_key_fields(field->name); + if (column_stat.find_stat()) + { + int err __attribute__((unused)); + err= column_stat.delete_stat(); + DBUG_ASSERT(err == 0); + } + + /* Restore saved row with old statistics to new name */ + column_stat. + set_key_fields(generate_tmp_name(&tmp_name, + field->duplicate_counter)); + if (column_stat.find_stat()) + { + int err __attribute__((unused)); + err= column_stat.update_column_key_part(field->name); + DBUG_ASSERT(err == 0); + } + else + { + DBUG_ASSERT(0); + } + } + } + + stat_table->file->extra(HA_EXTRA_FLUSH); + thd->restore_stmt_binlog_format(save_binlog_format); + if (thd->commit_whole_transaction_and_close_tables()) + rc= -1; + + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + @brief + Delete statistics on an index of the specified table + + @param thd The thread handle + @param tab The table the index belongs to + @param key_info The descriptor of the index whose statistics is to be + deleted + @param ext_prefixes_only Delete statistics only on the index prefixes + extended by the components of the primary key + + @details + The function delete statistics on the index specified by 'key_info' + defined on the table 'tab' from the statistical table index_stats. + + @retval 0 If all deletions are successful or we couldn't open statistics + table + @retval 1 Otherwise + + @note + The function is called when dropping an index, or dropping/changing the + definition of a column used in the definition of the index. +*/ + +int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info, + bool ext_prefixes_only) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + int rc= 0; + DBUG_ENTER("delete_statistics_for_index"); + + start_new_trans new_trans(thd); + + if (open_stat_table_for_ddl(thd, &tables, &stat_table_name[2])) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); // Not an error + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + stat_table= tables.table; + Index_stat index_stat(stat_table, tab); + if (!ext_prefixes_only) + { + index_stat.set_index_prefix_key_fields(key_info); + while (index_stat.find_next_stat_for_prefix(3)) + { + err= index_stat.delete_stat(); + if (err && !rc) + rc= 1; + } + } + else + { + for (uint i= key_info->user_defined_key_parts; i < key_info->ext_key_parts; i++) + { + index_stat.set_key_fields(key_info, i+1); + if (index_stat.find_next_stat_for_prefix(4)) + { + err= index_stat.delete_stat(); + if (err && !rc) + rc= 1; + } + } + } + + err= del_global_index_stat(thd, tab, key_info); + if (err && !rc) + rc= 1; + + /* Make change permanent and avoid 'table is marked as crashed' errors */ + index_stat.flush(); + + thd->restore_stmt_binlog_format(save_binlog_format); + if (thd->commit_whole_transaction_and_close_tables()) + rc= 1; + + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + Rename a set of indexes in the statistical table index_stats + + @param thd The thread handle + @param tab The table the indexes belongs to + @param fields List of indexes to be renamed + + @details + The function replaces the names of the indexe in fields that belongs + to the table 'tab' in the statistical table index_stats. + + @retval 0 If update was successful, tmp table or could not open stat table + @retval -1 Commit failed + @retval >0 Error number from engine + + @note + The function is called when executing any statement that renames a column, + but does not change the column definition. +*/ + +int rename_indexes_in_stat_table(THD *thd, TABLE *tab, + List + *indexes) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + int rc= 0; + uint duplicate_counter= 0; + List_iterator it(*indexes); + Alter_info::RENAME_INDEX_STAT_PARAMS *index; + char tmp_name_buffer[32]; + LEX_CSTRING tmp_name= {tmp_name_buffer, 0}; + DBUG_ENTER("rename_indexes_in_stat_tables"); + + if (tab->s->tmp_table != NO_TMP_TABLE) + DBUG_RETURN(0); + + start_new_trans new_trans(thd); + + if (open_stat_table_for_ddl(thd, &tables, &stat_table_name[2])) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Rename index in the statistical table index_stat */ + + stat_table= tables.table; + + /* + Loop over all indexes and rename to new name or temp name in case of + conflicts + */ + + while ((index= it++)) + { + Index_stat index_stat(stat_table, tab); + uint found= 0; + + /* We have to make a loop as one index may have many entries */ + for (;;) + { + index_stat.set_index_prefix_key_fields(index->key); + if (!index_stat.find_next_stat_for_prefix(3)) + break; + index_stat.store_record_for_update(); + err= index_stat.update_index_name(index->name); + + if (unlikely(err == HA_ERR_FOUND_DUPP_KEY)) + { + /* + This is probably an ALTER TABLE of type rename a->b, b->a + Rename the column to a temporary name + */ + if (!found++) + ++duplicate_counter; + index->duplicate_counter= duplicate_counter; + index->usage_count++; + if ((err= index_stat.update_index_name(generate_tmp_name(&tmp_name, duplicate_counter)))) + { + if (err != HA_ERR_FOUND_DUPP_KEY) + { + DBUG_ASSERT(0); + } + else + { + /* + The only way this could happen is if the table has an index + with same name as the temporary column index, probably from a + failed alter table. + Remove the conflicting row and update it again. + */ + if (!index_stat.find_stat()) + DBUG_ASSERT(0); + else if (index_stat.delete_stat()) + DBUG_ASSERT(0); + else + { + index_stat.set_index_prefix_key_fields(index->key); + if (!index_stat.find_stat()) + DBUG_ASSERT(0); + else + { + index_stat.store_record_for_update(); + if (index_stat.update_index_name(&tmp_name)) + DBUG_ASSERT(0); + } + } + } + } + } + } + if (!found) + it.remove(); // All renames succeded + } + + if (!indexes->is_empty()) + { + /* + All unhandled renamed index has now a temporary name. + Remove all conflicing rows and rename the temporary name to + the final name. + */ + + Alter_info::RENAME_INDEX_STAT_PARAMS *index; + it.rewind(); + Index_stat index_stat(stat_table, tab); + stat_table->file->ha_index_init(index_stat.stat_key_idx, 0); + + while ((index= it++)) + { + int err __attribute__((unused)); + + /* Remove the conflicting rows */ + index_stat.set_index_prefix_key_fields(index->key); + index_stat.set_index_name(index->name); + + if (index_stat.find_next_stat_for_prefix_with_next(3)) + { + do + { + err= index_stat.delete_stat(); + DBUG_ASSERT(err == 0); + } + while (index_stat.read_next() == 0); + } + + /* Restore saved row with old statistics to new name */ + index_stat.set_index_name(generate_tmp_name(&tmp_name, + index->duplicate_counter)); + if (!index_stat.find_stat_with_next()) + DBUG_ASSERT(0); + else + { + uint updated= 0; + do + { + index_stat.store_record_for_update(); + err= index_stat.update_index_name(index->name); + DBUG_ASSERT(err == 0); + } while (++updated < index->usage_count && index_stat.read_next() == 0); + } + } + stat_table->file->ha_index_end(); + } + + stat_table->file->extra(HA_EXTRA_FLUSH); + thd->restore_stmt_binlog_format(save_binlog_format); + if (thd->commit_whole_transaction_and_close_tables()) + rc= -1; + + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + @brief + Rename a table in all statistical tables + + @param + thd The thread handle + @param + db The name of the database the table belongs to + @param + tab The name of the table to be renamed in statistical tables + @param + new_tab The new name of the table + + @details + The function replaces the name of the table 'tab' from the database 'db' + for 'new_tab' in all all statistical tables: table_stats, column_stats, + index_stats. + + @retval + 0 If all updates of the table name are successful + @retval + 1 Otherwise + + @note + The function is called when executing any statement that renames a table +*/ + +int rename_table_in_stat_tables(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *tab, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_tab) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables[STATISTICS_TABLES]; + int rc= 0; + DBUG_ENTER("rename_table_in_stat_tables"); + + start_new_trans new_trans(thd); + + if (open_stat_tables(thd, tables, TRUE)) + { + new_trans.restore_old_transaction(); + DBUG_RETURN(0); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Rename table in the statistical table index_stats */ + stat_table= tables[INDEX_STAT].table; + Index_stat index_stat(stat_table, db, tab); + index_stat.set_full_table_name(); + + Stat_table_write_iter index_iter(&index_stat); + if (index_iter.init(2)) + rc= 1; + while (!index_iter.get_next_row()) + { + err= index_stat.update_table_name_key_parts(new_db, new_tab); + if (err & !rc) + rc= 1; + index_stat.set_full_table_name(); + } + index_iter.cleanup(); + + /* Rename table in the statistical table column_stats */ + stat_table= tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, db, tab); + column_stat.set_full_table_name(); + Stat_table_write_iter column_iter(&column_stat); + if (column_iter.init(2)) + rc= 1; + while (!column_iter.get_next_row()) + { + err= column_stat.update_table_name_key_parts(new_db, new_tab); + if (err & !rc) + rc= 1; + column_stat.set_full_table_name(); + } + column_iter.cleanup(); + + /* Rename table in the statistical table table_stats */ + stat_table= tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, db, tab); + table_stat.set_key_fields(); + if (table_stat.find_stat()) + { + err= table_stat.update_table_name_key_parts(new_db, new_tab); + if (err & !rc) + rc= 1; + } + + tables[TABLE_STAT].table->file->extra(HA_EXTRA_FLUSH); + tables[COLUMN_STAT].table->file->extra(HA_EXTRA_FLUSH); + tables[INDEX_STAT].table->file->extra(HA_EXTRA_FLUSH); + + thd->restore_stmt_binlog_format(save_binlog_format); + if (thd->commit_whole_transaction_and_close_tables()) + rc= 1; + + new_trans.restore_old_transaction(); + DBUG_RETURN(rc); +} + + +/** + @brief + Set statistics for a table that will be used by the optimizer + + @param + thd The thread handle + @param + table The table to set statistics for + + @details + Depending on the value of thd->variables.use_stat_tables + the function performs the settings for the table that will control + from where the statistical data used by the optimizer will be taken. +*/ + +void set_statistics_for_table(THD *thd, TABLE *table) +{ + TABLE_STATISTICS_CB *stats_cb= table->stats_cb; + + Table_statistics *read_stats= stats_cb ? stats_cb->table_stats : 0; + table->used_stat_records= + (!check_eits_preferred(thd) || + !table->stats_is_read || !read_stats || read_stats->cardinality_is_null) ? + table->file->stats.records : read_stats->cardinality; + + /* + For partitioned table, EITS statistics is based on data from all partitions. + + On the other hand, Partition Pruning figures which partitions will be + accessed and then computes the estimate of rows in used_partitions. + + Use the estimate from Partition Pruning as it is typically more precise. + Ideally, EITS should provide per-partition statistics but this is not + implemented currently. + */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + table->used_stat_records= table->file->stats.records; +#endif + + KEY *key_info, *key_info_end; + for (key_info= table->key_info, key_info_end= key_info+table->s->keys; + key_info < key_info_end; key_info++) + { + key_info->is_statistics_from_stat_tables= + (check_eits_preferred(thd) && + table->stats_is_read && + key_info->read_stats->avg_frequency_is_inited() && + key_info->read_stats->get_avg_frequency(0) > 0.5); + } +} + + +/** + @brief + Get the average frequency for a column + + @param + field The column whose average frequency is required + + @retval + The required average frequency +*/ + +double get_column_avg_frequency(Field * field) +{ + double res; + TABLE *table= field->table; + + /* + Statistics is shared by table instances and is accessed through + the table share. If table->s->field is not set for 'table', then + no column statistics is available for the table . + */ + if (!table->s->field) + { + res= (double)table->stat_records(); + return res; + } + + Column_statistics *col_stats= field->read_stats; + + if (!col_stats) + res= (double)table->stat_records(); + else + res= col_stats->get_avg_frequency(); + return res; +} + + +/** + @brief + Estimate the number of rows in a column range using data from stat tables + + @param + field The column whose range cardinality is to be estimated + @param + min_endp The left end of the range whose cardinality is required + @param + max_endp The right end of the range whose cardinality is required + @param + range_flag The range flags + + @details + The function gets an estimate of the number of rows in a column range + using the statistical data from the table column_stats. + + @retval + - The required estimate of the rows in the column range + - If there is some kind of error, this function should return DBL_MAX (and + not HA_POS_ERROR as that is an integer constant). + +*/ + +double get_column_range_cardinality(Field *field, + key_range *min_endp, + key_range *max_endp, + uint range_flag) +{ + double res; + TABLE *table= field->table; + Column_statistics *col_stats= field->read_stats; + double tab_records= (double)table->stat_records(); + + if (!col_stats) + return tab_records; + /* + Use statistics for a table only when we have actually read + the statistics from the stat tables. For example due to + chances of getting a deadlock we disable reading statistics for + a table. + */ + + if (!table->stats_is_read) + return tab_records; + + THD *thd= table->in_use; + double col_nulls= tab_records * col_stats->get_nulls_ratio(); + + double col_non_nulls= tab_records - col_nulls; + + bool nulls_incl= field->null_ptr && min_endp && min_endp->key[0] && + !(range_flag & NEAR_MIN); + + if (col_non_nulls < 1) + { + if (nulls_incl) + res= col_nulls; + else + res= 0; + } + else if (min_endp && max_endp && min_endp->length == max_endp->length && + !memcmp(min_endp->key, max_endp->key, min_endp->length)) + { + if (nulls_incl) + { + /* This is null single point range */ + res= col_nulls; + } + else + { + double avg_frequency= col_stats->get_avg_frequency(); + res= avg_frequency; + if (avg_frequency > 1.0 + 0.000001 && + col_stats->min_max_values_are_provided()) + { + Histogram_base *hist = col_stats->histogram; + if (hist && hist->is_usable(thd)) + { + res= col_non_nulls * + hist->point_selectivity(field, min_endp, + avg_frequency / col_non_nulls); + } + } + else if (avg_frequency == 0.0) + { + /* This actually means there is no statistics data */ + res= tab_records; + } + } + } + else + { + if (col_stats->min_max_values_are_provided()) + { + Histogram_base *hist= col_stats->histogram; + double avg_frequency= col_stats->get_avg_frequency(); + double sel; + if (hist && hist->is_usable(thd)) + { + sel= hist->range_selectivity(field, min_endp, max_endp, + avg_frequency / col_non_nulls); + res= col_non_nulls * sel; + } + else + { + double min_mp_pos, max_mp_pos; + if (min_endp && !(field->null_ptr && min_endp->key[0])) + { + store_key_image_to_rec(field, (uchar *) min_endp->key, + field->key_length()); + min_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + min_mp_pos= 0.0; + if (max_endp) + { + store_key_image_to_rec(field, (uchar *) max_endp->key, + field->key_length()); + max_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + max_mp_pos= 1.0; + + sel = (max_mp_pos - min_mp_pos); + res= col_non_nulls * sel; + set_if_bigger(res, avg_frequency); + } + } + else + res= col_non_nulls; + if (nulls_incl) + res+= col_nulls; + } + return res; +} + +/* + Estimate selectivity of "col=const" using a histogram + + @param field the field to estimate its selectivity. + + @param endpoint The constant + + @param avg_sel Average selectivity of condition "col=const" in this table. + It is calcuated as (#non_null_values / #distinct_values). + + @return + Expected condition selectivity (a number between 0 and 1) + + @notes + [re_zero_length_buckets] If a bucket with zero value-length is in the + middle of the histogram, we will not have min==max. Example: suppose, + pos_value=0x12, and the histogram is: + + #n #n+1 #n+2 + ... 0x10 0x12 0x12 0x14 ... + | + +------------- bucket with zero value-length + + Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula. + + The problem happens at the histogram ends. if pos_value=0, and the + histogram is: + + 0x00 0x10 ... + + then min=0, max=0. This means pos_value is contained within bucket #0, + but on the other hand, histogram data says that the bucket has only one + value. +*/ + +double Histogram_binary::point_selectivity(Field *field, key_range *endpoint, + double avg_sel) +{ + double sel; + Column_statistics *col_stats= field->read_stats; + store_key_image_to_rec(field, (uchar *) endpoint->key, + field->key_length()); + double pos= field->pos_in_interval(col_stats->min_value, + col_stats->max_value); + /* Find the bucket that contains the value 'pos'. */ + uint min= find_bucket(pos, TRUE); + uint pos_value= (uint) (pos * prec_factor()); + + /* Find how many buckets this value occupies */ + uint max= min; + while (max + 1 < get_width() && get_value(max + 1) == pos_value) + max++; + + /* + A special case: we're looking at a single bucket, and that bucket has + zero value-length. Use the multi-bucket formula (attempt to use + single-bucket formula will cause divison by zero). + + For more details see [re_zero_length_buckets] above. + */ + if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1))) + max++; + + if (max > min) + { + /* + The value occupies multiple buckets. Use start_bucket ... end_bucket as + selectivity. + */ + double bucket_sel= 1.0/(get_width() + 1); + sel= bucket_sel * (max - min + 1); + } + else + { + /* + The value 'pos' fits within one single histogram bucket. + + We also have avg_sel which is per-table average selectivity of col=const. + If there are popular values, this may be larger than one bucket, so + cap the returned number by the selectivity of one bucket. + */ + double avg_bucket_width= 1.0 / (get_width() + 1); + + sel= MY_MIN(avg_bucket_width, avg_sel); + } + return sel; +} + + +double Histogram_binary::range_selectivity(Field *field, + key_range *min_endp, + key_range *max_endp, + double avg_sel) +{ + double sel, min_mp_pos, max_mp_pos; + Column_statistics *col_stats= field->read_stats; + + if (min_endp && !(field->null_ptr && min_endp->key[0])) + { + store_key_image_to_rec(field, (uchar *) min_endp->key, + field->key_length()); + min_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + min_mp_pos= 0.0; + if (max_endp) + { + store_key_image_to_rec(field, (uchar *) max_endp->key, + field->key_length()); + max_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + max_mp_pos= 1.0; + + double bucket_sel= 1.0 / (get_width() + 1); + uint min= find_bucket(min_mp_pos, TRUE); + uint max= find_bucket(max_mp_pos, FALSE); + sel= bucket_sel * (max - min + 1); + + set_if_bigger(sel, avg_sel); + return sel; +} + +/* + Check whether the table is one of the persistent statistical tables. +*/ +bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table) +{ + DBUG_ASSERT(db->str && table->str); + + if (!my_strcasecmp(table_alias_charset, db->str, MYSQL_SCHEMA_NAME.str)) + { + for (uint i= 0; i < STATISTICS_TABLES; i ++) + { + if (!my_strcasecmp(table_alias_charset, table->str, stat_table_name[i].str)) + return true; + } + } + return false; +} + +/* + Check wheter we can use EITS statistics for a field or not + + TRUE : Use EITS for the columns + FALSE: Otherwise +*/ + +bool is_eits_usable(Field *field) +{ + Column_statistics* col_stats= field->read_stats; + + // check if column_statistics was allocated for this field + if (!col_stats || !field->table->stats_is_read) + return false; + + /* + (1): checks if we have EITS statistics for a particular column + (2): Don't use EITS for GEOMETRY columns + (3): Disabling reading EITS statistics for columns involved in the + partition list of a table. We assume the selecticivity for + such columns would be handled during partition pruning. + */ + + return !col_stats->no_stat_values_provided() && //(1) + field->type() != MYSQL_TYPE_GEOMETRY && //(2) +#ifdef WITH_PARTITION_STORAGE_ENGINE + (!field->table->part_info || + !field->table->part_info->field_in_partition_expr(field)) && //(3) +#endif + true; +} diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h new file mode 100644 index 00000000..12802bc9 --- /dev/null +++ b/sql/sql_statistics.h @@ -0,0 +1,625 @@ +/* Copyright 2006-2008 MySQL AB, 2008 Sun Microsystems, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_STATISTICS_H +#define SQL_STATISTICS_H + +#include +#include + +/* + For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are + similar to the COMPLEMENTARY and PREFERABLY respectively except that + with these values we would not be collecting EITS for queries like + ANALYZE TABLE t1; + To collect EITS with these values, we have to use PERSISITENT FOR + analyze table t1 persistent for + columns (col1,col2...) index (idx1, idx2...) + or + analyze table t1 persistent for all +*/ + +typedef +enum enum_use_stat_tables_mode +{ + NEVER, + COMPLEMENTARY, + PREFERABLY, + COMPLEMENTARY_FOR_QUERIES, + PREFERABLY_FOR_QUERIES +} Use_stat_tables_mode; + +typedef +enum enum_histogram_type +{ + SINGLE_PREC_HB, + DOUBLE_PREC_HB, + JSON_HB, + INVALID_HISTOGRAM +} Histogram_type; + +enum enum_stat_tables +{ + TABLE_STAT, + COLUMN_STAT, + INDEX_STAT, +}; + + +/* + These enumeration types comprise the dictionary of three + statistical tables table_stat, column_stat and index_stat + as they defined in ../scripts/mysql_system_tables.sql. + + It would be nice if the declarations of these types were + generated automatically by the table definitions. +*/ + +enum enum_table_stat_col +{ + TABLE_STAT_DB_NAME, + TABLE_STAT_TABLE_NAME, + TABLE_STAT_CARDINALITY, + TABLE_STAT_N_FIELDS +}; + +enum enum_column_stat_col +{ + COLUMN_STAT_DB_NAME, + COLUMN_STAT_TABLE_NAME, + COLUMN_STAT_COLUMN_NAME, + COLUMN_STAT_MIN_VALUE, + COLUMN_STAT_MAX_VALUE, + COLUMN_STAT_NULLS_RATIO, + COLUMN_STAT_AVG_LENGTH, + COLUMN_STAT_AVG_FREQUENCY, + COLUMN_STAT_HIST_SIZE, + COLUMN_STAT_HIST_TYPE, + COLUMN_STAT_HISTOGRAM, + COLUMN_STAT_N_FIELDS +}; + +enum enum_index_stat_col +{ + INDEX_STAT_DB_NAME, + INDEX_STAT_TABLE_NAME, + INDEX_STAT_INDEX_NAME, + INDEX_STAT_PREFIX_ARITY, + INDEX_STAT_AVG_FREQUENCY, + INDEX_STAT_N_FIELDS +}; + +inline +Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) +{ + return (Use_stat_tables_mode) (thd->variables.use_stat_tables); +} +inline +bool check_eits_collection_allowed(THD *thd) +{ + return (get_use_stat_tables_mode(thd) == COMPLEMENTARY || + get_use_stat_tables_mode(thd) == PREFERABLY); +} + +inline +bool check_eits_preferred(THD *thd) +{ + return (get_use_stat_tables_mode(thd) == PREFERABLY || + get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES); +} + +int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); +int read_statistics_for_tables(THD *thd, TABLE_LIST *tables, + bool force_reload); +int collect_statistics_for_table(THD *thd, TABLE *table); +int alloc_statistics_for_table(THD *thd, TABLE *table, MY_BITMAP *stat_fields); +void free_statistics_for_table(TABLE *table); +int update_statistics_for_table(THD *thd, TABLE *table); +int delete_statistics_for_table(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *tab); +int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col); +int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info, + bool ext_prefixes_only); +int rename_table_in_stat_tables(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *tab, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_tab); +int rename_columns_in_stat_table(THD *thd, TABLE *tab, + List *fields); +int rename_indexes_in_stat_table(THD *thd, TABLE *tab, + List *indexes); +void set_statistics_for_table(THD *thd, TABLE *table); + +double get_column_avg_frequency(Field * field); + +double get_column_range_cardinality(Field *field, + key_range *min_endp, + key_range *max_endp, + uint range_flag); +bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); +bool is_eits_usable(Field* field); + +class Histogram_builder; + +/* + Common base for all histograms +*/ +class Histogram_base :public Sql_alloc +{ +public: + Histogram_base() {} + virtual ~Histogram_base()= default; + + virtual bool parse(MEM_ROOT *mem_root, + const char *db_name, const char *table_name, + Field *field, const char *hist_data, + size_t hist_data_len)= 0; + virtual void serialize(Field *to_field)= 0; + + virtual Histogram_type get_type()=0; + + virtual uint get_width()=0; + + /* + The creation-time workflow is: + * create a histogram + * init_for_collection() + * create_builder() + * feed the data to the builder + * serialize(); + */ + virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, + ulonglong size)=0; + virtual Histogram_builder *create_builder(Field *col, uint col_len, + ha_rows rows)=0; + + /* + This function checks that histograms should be usable only when + 1) the level of optimizer_use_condition_selectivity > 3 + */ + bool is_usable(THD *thd) + { + return thd->variables.optimizer_use_condition_selectivity > 3; + } + + + virtual double point_selectivity(Field *field, key_range *endpoint, + double avg_sel)=0; + virtual double range_selectivity(Field *field, key_range *min_endp, + key_range *max_endp, double avg_sel)=0; + /* + Legacy: return the size of the histogram on disk. + + This will be stored in mysql.column_stats.hist_size column. + The value is not really needed as one can look at + LENGTH(mysql.column_stats.histogram) directly. + */ + virtual uint get_size()=0; +}; + + +/* + A Height-balanced histogram that stores numeric fractions +*/ + +class Histogram_binary final : public Histogram_base +{ +private: + Histogram_type type; + size_t size; /* Size of values array, in bytes */ + uchar *values; + + uint prec_factor() + { + switch (type) { + case SINGLE_PREC_HB: + return ((uint) (1 << 8) - 1); + case DOUBLE_PREC_HB: + return ((uint) (1 << 16) - 1); + default: + DBUG_ASSERT(0); + } + return 1; + } + +public: + Histogram_binary(Histogram_type type_arg) : type(type_arg) + {} + + uint get_width() override + { + switch (type) { + case SINGLE_PREC_HB: + return (uint) size; + case DOUBLE_PREC_HB: + return (uint) (size / 2); + default: + DBUG_ASSERT(0); + } + return 0; + } +private: + uint get_value(uint i) + { + DBUG_ASSERT(i < get_width()); + switch (type) { + case SINGLE_PREC_HB: + return (uint) (((uint8 *) values)[i]); + case DOUBLE_PREC_HB: + return (uint) uint2korr(values + i * 2); + default: + DBUG_ASSERT(0); + } + return 0; + } + + /* Find the bucket which value 'pos' falls into. */ + uint find_bucket(double pos, bool first) + { + size_t val= (size_t) (pos * prec_factor()); + int lp= 0; + int rp= get_width() - 1; + int d= get_width() / 2; + uint i= lp + d; + for ( ; d; d= (rp - lp) / 2, i= lp + d) + { + if (val == get_value(i)) + break; + if (val < get_value(i)) + rp= i; + else if (val > get_value(i + 1)) + lp= i + 1; + else + break; + } + + if (val > get_value(i) && i < (get_width() - 1)) + i++; + + if (val == get_value(i)) + { + if (first) + { + while(i && val == get_value(i - 1)) + i--; + } + else + { + while(i + 1 < get_width() && val == get_value(i + 1)) + i++; + } + } + return i; + } + +public: + uint get_size() override {return (uint)size;} + + Histogram_type get_type() override { return type; } + + bool parse(MEM_ROOT *mem_root, const char*, const char*, Field*, + const char *hist_data, size_t hist_data_len) override; + void serialize(Field *to_field) override; + void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, + ulonglong size) override; + Histogram_builder *create_builder(Field *col, uint col_len, + ha_rows rows) override; + + void set_value(uint i, double val) + { + switch (type) { + case SINGLE_PREC_HB: + ((uint8 *) values)[i]= (uint8) (val * prec_factor()); + return; + case DOUBLE_PREC_HB: + int2store(values + i * 2, val * prec_factor()); + return; + default: + DBUG_ASSERT(0); + return; + } + } + + void set_prev_value(uint i) + { + switch (type) { + case SINGLE_PREC_HB: + ((uint8 *) values)[i]= ((uint8 *) values)[i-1]; + return; + case DOUBLE_PREC_HB: + int2store(values + i * 2, uint2korr(values + i * 2 - 2)); + return; + default: + DBUG_ASSERT(0); + return; + } + } + + double range_selectivity(Field *field, key_range *min_endp, + key_range *max_endp, double avg_sel) override; + + /* + Estimate selectivity of "col=const" using a histogram + */ + double point_selectivity(Field *field, key_range *endpoint, + double avg_sel) override; +}; + + +/* + This is used to collect the the basic statistics from a Unique object: + - count of values + - count of distinct values + - count of distinct values that have occurred only once +*/ + +class Basic_stats_collector +{ + ulonglong count; /* number of values retrieved */ + ulonglong count_distinct; /* number of distinct values retrieved */ + /* number of distinct values that occurred only once */ + ulonglong count_distinct_single_occurence; + +public: + Basic_stats_collector() + { + count= 0; + count_distinct= 0; + count_distinct_single_occurence= 0; + } + + ulonglong get_count_distinct() const { return count_distinct; } + ulonglong get_count_single_occurence() const + { + return count_distinct_single_occurence; + } + ulonglong get_count() const { return count; } + + void next(void *elem, element_count elem_cnt) + { + count_distinct++; + if (elem_cnt == 1) + count_distinct_single_occurence++; + count+= elem_cnt; + } +}; + + +/* + Histogram_builder is a helper class that is used to build histograms + for columns. + + Do not create directly, call Histogram->get_builder(...); +*/ + +class Histogram_builder: public Sql_alloc +{ +protected: + Field *column; /* table field for which the histogram is built */ + uint col_length; /* size of this field */ + ha_rows records; /* number of records the histogram is built for */ + + Histogram_builder(Field *col, uint col_len, ha_rows rows) : + column(col), col_length(col_len), records(rows) + {} + +public: + // A histogram builder will also collect the counters + Basic_stats_collector counters; + + virtual int next(void *elem, element_count elem_cnt)=0; + virtual void finalize()=0; + virtual ~Histogram_builder(){} +}; + + +class Column_statistics; +class Index_statistics; + +/* Statistical data on a table */ + +class Table_statistics +{ +public: + my_bool cardinality_is_null; /* TRUE if the cardinality is unknown */ + uint columns; /* Number of columns in table */ + ha_rows cardinality; /* Number of rows in the table */ + uchar *min_max_record_buffers; /* Record buffers for min/max values */ + Column_statistics *column_stats; /* Array of statistical data for columns */ + Index_statistics *index_stats; /* Array of statistical data for indexes */ + + /* Array of records per key for index prefixes */ + ulonglong *idx_avg_frequency; + uchar *histograms; /* Sequence of histograms */ +}; + + +/* + Statistical data on a column + + Note: objects of this class may be "empty", where they have almost all fields + as zeros, for example, get_avg_frequency() will return 0. + + objects are allocated in alloc_statistics_for_table[_share]. +*/ + +class Column_statistics :public Sql_alloc +{ + +private: + static const uint Scale_factor_nulls_ratio= 100000; + static const uint Scale_factor_avg_length= 100000; + static const uint Scale_factor_avg_frequency= 100000; + +public: + ~Column_statistics() + { + delete histogram; + } + /* + Bitmap indicating what statistical characteristics + are available for the column + */ + uint32 column_stat_nulls; + + /* For the below two, see comments in get_column_range_cardinality() */ + /* Minimum value for the column */ + Field *min_value; + /* Maximum value for the column */ + Field *max_value; + +private: + + /* + The ratio Z/N multiplied by the scale factor Scale_factor_nulls_ratio, + where + N is the total number of rows, + Z is the number of nulls in the column + */ + ulong nulls_ratio; + + /* + Average number of bytes occupied by the representation of a + value of the column in memory buffers such as join buffer + multiplied by the scale factor Scale_factor_avg_length. + CHAR values are stripped of trailing spaces. + Flexible values are stripped of their length prefixes. + */ + ulonglong avg_length; + + /* + The ratio N/D multiplied by the scale factor Scale_factor_avg_frequency, + where + N is the number of rows with not null value in the column, + D the number of distinct values among them + */ + ulonglong avg_frequency; + +public: + Histogram_base *histogram; + bool histogram_exists; + + uint32 no_values_provided_bitmap() + { + return + ((1 << (COLUMN_STAT_HISTOGRAM-COLUMN_STAT_COLUMN_NAME))-1) << + (COLUMN_STAT_COLUMN_NAME+1); + } + + void set_all_nulls() + { + column_stat_nulls= no_values_provided_bitmap(); + } + + void set_not_null(uint stat_field_no) + { + column_stat_nulls&= ~(1 << stat_field_no); + } + + void set_null(uint stat_field_no) + { + column_stat_nulls|= (1 << stat_field_no); + } + + bool is_null(uint stat_field_no) + { + return MY_TEST(column_stat_nulls & (1 << stat_field_no)); + } + + double get_nulls_ratio() + { + return (double) nulls_ratio / Scale_factor_nulls_ratio; + } + + double get_avg_length() + { + return (double) avg_length / Scale_factor_avg_length; + } + + double get_avg_frequency() + { + return (double) avg_frequency / Scale_factor_avg_frequency; + } + + void set_nulls_ratio (double val) + { + nulls_ratio= (ulong) (val * Scale_factor_nulls_ratio); + } + + void set_avg_length (double val) + { + avg_length= (ulonglong) (val * Scale_factor_avg_length); + } + + void set_avg_frequency (double val) + { + avg_frequency= (ulonglong) (val * Scale_factor_avg_frequency); + } + + bool min_max_values_are_provided() + { + return !is_null(COLUMN_STAT_MIN_VALUE) && + !is_null(COLUMN_STAT_MAX_VALUE); + } + /* + This function checks whether the values for the fields of the statistical + tables that were NULL by DEFAULT for a column have changed or not. + + @retval + TRUE: Statistics are not present for a column + FALSE: Statisitics are present for a column + */ + bool no_stat_values_provided() + { + return (column_stat_nulls == no_values_provided_bitmap()); + } +}; + + +/* Statistical data on an index prefixes */ + +class Index_statistics +{ + +private: + static const uint Scale_factor_avg_frequency= 100000; + /* + The k-th element of this array contains the ratio N/D + multiplied by the scale factor Scale_factor_avg_frequency, + where N is the number of index entries without nulls + in the first k components, and D is the number of distinct + k-component prefixes among them + */ + ulonglong *avg_frequency; + +public: + + void init_avg_frequency(ulonglong *ptr) { avg_frequency= ptr; } + + bool avg_frequency_is_inited() { return avg_frequency != NULL; } + + double get_avg_frequency(uint i) + { + return (double) avg_frequency[i] / Scale_factor_avg_frequency; + } + + void set_avg_frequency(uint i, double val) + { + avg_frequency[i]= (ulonglong) (val * Scale_factor_avg_frequency); + } + +}; + +#endif /* SQL_STATISTICS_H */ diff --git a/sql/sql_string.cc b/sql/sql_string.cc new file mode 100644 index 00000000..25521bb3 --- /dev/null +++ b/sql/sql_string.cc @@ -0,0 +1,1284 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2016, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* This file is originally from the mysql distribution. Coded by monty */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include +#include +#include + +#include "sql_string.h" + +/***************************************************************************** +** String functions +*****************************************************************************/ + +bool Binary_string::real_alloc(size_t length) +{ + size_t arg_length= ALIGN_SIZE(length + 1); + DBUG_ASSERT(arg_length > length); + if (arg_length <= length) + return TRUE; /* Overflow */ + DBUG_ASSERT(length < UINT_MAX32); // cast to uint32 is safe + str_length=0; + if (Alloced_length < arg_length) + { + free_buffer(); + if (!(Ptr=(char*) my_malloc(STRING_PSI_MEMORY_KEY, + arg_length,MYF(MY_WME | (thread_specific ? + MY_THREAD_SPECIFIC : 0))))) + return TRUE; + Alloced_length=(uint32) arg_length; + alloced=1; + } + Ptr[0]=0; + return FALSE; +} + + +/** + Allocates a new buffer on the heap for this String if current buffer is + smaller. + + - If the String's internal buffer is privately owned and heap allocated, + one of the following is performed. + + - If the requested length is greater than what fits in the buffer, a new + buffer is allocated, data moved and the old buffer freed. + + - If the requested length is less or equal to what fits in the buffer, a + null character is inserted at the appropriate position. + + - If the String does not keep a private buffer on the heap, such a buffer + will be allocated and the string copied accoring to its length, as found + in String::length(). + + For C compatibility, the new string buffer is null terminated if it was + allocated. + + @param alloc_length The requested string size in characters, excluding any + null terminator. + + @retval false Either the copy operation is complete or, if the size of the + new buffer is smaller than the currently allocated buffer (if one exists), + no allocation occurred. + + @retval true An error occurred when attempting to allocate memory. +*/ + +bool Binary_string::realloc_raw(size_t alloc_length) +{ + if (Alloced_length < alloc_length) + { + char *new_ptr; + uint32 len= ALIGN_SIZE(alloc_length+1); + DBUG_ASSERT(len > alloc_length); + if (len <= alloc_length) + return TRUE; /* Overflow */ + if (alloced) + { + if (!(new_ptr= (char*) my_realloc(STRING_PSI_MEMORY_KEY, Ptr,len, + MYF(MY_WME | + (thread_specific ? + MY_THREAD_SPECIFIC : 0))))) + return TRUE; // Signal error + } + else if ((new_ptr= (char*) my_malloc(STRING_PSI_MEMORY_KEY, len, + MYF(MY_WME | + (thread_specific ? + MY_THREAD_SPECIFIC : 0))))) + { + DBUG_ASSERT(str_length < len); + if (str_length) // Avoid bugs in memcpy on AIX + memcpy(new_ptr,Ptr,str_length); + new_ptr[str_length]=0; + alloced=1; + } + else + return TRUE; // Signal error + Ptr= new_ptr; + DBUG_ASSERT(len < UINT_MAX32); + Alloced_length= (uint32)len; + } + return FALSE; +} + + +bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs) +{ + /* + This allocates a few bytes extra in the unlikely case that cs->mb_maxlen + > 1, but we can live with that + */ + uint l= LONGLONG_BUFFER_SIZE * cs->mbmaxlen; + int base= unsigned_flag ? 10 : -10; + + if (alloc(l)) + return TRUE; + str_length=(uint32) (cs->longlong10_to_str)(Ptr,l,base,num); + set_charset(cs); + return FALSE; +} + + +// Convert a number into its HEX representation +bool Binary_string::set_hex(ulonglong num) +{ + char *n_end; + if (alloc(65) || !(n_end= longlong2str(num, Ptr, 16))) + return true; + length((uint32) (n_end - Ptr)); + return false; +} + + +/** + Append a hex representation of the byte "value" into "to". + Note: + "to" is incremented for the caller by two bytes. It's passed by reference! + So it resembles a macros, hence capital letters in the name. +*/ +static inline void APPEND_HEX(char *&to, uchar value) +{ + *to++= _dig_vec_upper[((uchar) value) >> 4]; + *to++= _dig_vec_upper[((uchar) value) & 0x0F]; +} + + +void Binary_string::qs_append_hex(const char *str, uint32 len) +{ + ASSERT_LENGTH(len*2); + const char *str_end= str + len; + for (char *to= Ptr + str_length ; str < str_end; str++) + APPEND_HEX(to, (uchar) *str); + str_length+= len * 2; +} + + +void Binary_string::qs_append_hex_uint32(uint32 num) +{ + char *to= Ptr + str_length; + APPEND_HEX(to, (uchar) (num >> 24)); + APPEND_HEX(to, (uchar) (num >> 16)); + APPEND_HEX(to, (uchar) (num >> 8)); + APPEND_HEX(to, (uchar) num); + str_length+= 8; +} + + +// Convert a string to its HEX representation +bool Binary_string::set_hex(const char *str, uint32 len) +{ + /* + Safety: cut the source string if "len" is too large. + Note, alloc() can allocate some more space than requested, due to: + - ALIGN_SIZE + - one extra byte for a null terminator + So cut the source string to 0x7FFFFFF0 rather than 0x7FFFFFFE. + */ + set_if_smaller(len, 0x7FFFFFF0); + if (alloc(len * 2)) + return true; + length(0); + qs_append_hex(str, len); + return false; +} + + +bool Binary_string::set_fcvt(double num, uint decimals) +{ + // Assert that `decimals` is small enough to fit into FLOATING_POINT_BUFFER + DBUG_ASSERT(decimals < DECIMAL_NOT_SPECIFIED); + if (alloc(FLOATING_POINT_BUFFER)) + return true; + length(my_fcvt(num, decimals, Ptr, NULL)); + return false; +} + + +bool String::set_real(double num,uint decimals, CHARSET_INFO *cs) +{ + char buff[FLOATING_POINT_BUFFER]; + uint dummy_errors; + size_t len; + + set_charset(cs); + if (decimals >= FLOATING_POINT_DECIMALS) + { + len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL); + return copy(buff, (uint)len, &my_charset_latin1, cs, &dummy_errors); + } + len= my_fcvt(num, decimals, buff, NULL); + return copy(buff, (uint32) len, &my_charset_latin1, cs, + &dummy_errors); +} + + +bool Binary_string::copy() +{ + if (!alloced) + { + Alloced_length=0; // Force realloc + return realloc(str_length); + } + return FALSE; +} + +/** + Copies the internal buffer from str. If this String has a private heap + allocated buffer where new data does not fit, a new buffer is allocated + before copying and the old buffer freed. Character set information is also + copied. + + @param str The string whose internal buffer is to be copied. + + @retval false Success. + @retval true Memory allocation failed. +*/ +bool Binary_string::copy(const Binary_string &str) +{ + if (alloc(str.str_length+1)) + return TRUE; + if ((str_length=str.str_length)) + bmove(Ptr,str.Ptr,str_length); // May be overlapping + Ptr[str_length]=0; + return FALSE; +} + +bool Binary_string::copy(const char *str, size_t arg_length) +{ + DBUG_ASSERT(arg_length < UINT_MAX32); + if (alloc(arg_length+1)) + return TRUE; + if (Ptr == str && arg_length == uint32(str_length)) + { + /* + This can happen in some cases. This code is here mainly to avoid + warnings from valgrind, but can also be an indication of error. + */ + DBUG_PRINT("warning", ("Copying string on itself: %p %zu", + str, arg_length)); + } + else if ((str_length=uint32(arg_length))) + memcpy(Ptr,str,arg_length); + Ptr[arg_length]=0; + return FALSE; +} + +/* + Copy string, where strings may overlap. + Same as String::copy, but use memmove instead of memcpy to avoid warnings + from valgrind +*/ + +bool Binary_string::copy_or_move(const char *str, size_t arg_length) +{ + DBUG_ASSERT(arg_length < UINT_MAX32); + if (alloc(arg_length+1)) + return TRUE; + if ((str_length=uint32(arg_length))) + memmove(Ptr,str,arg_length); + Ptr[arg_length]=0; + return FALSE; +} + + +/* + Checks that the source string can be just copied to the destination string + without conversion. + + SYNPOSIS + + needs_conversion() + arg_length Length of string to copy. + from_cs Character set to copy from + to_cs Character set to copy to + uint32 *offset Returns number of unaligned characters. + + RETURN + 0 No conversion needed + 1 Either character set conversion or adding leading zeros + (e.g. for UCS-2) must be done + + NOTE + to_cs may be NULL for "no conversion" if the system variable + character_set_results is NULL. +*/ + +bool String::needs_conversion(size_t arg_length, + CHARSET_INFO *from_cs, + CHARSET_INFO *to_cs, + uint32 *offset) +{ + *offset= 0; + if (!to_cs || + (to_cs == &my_charset_bin) || + (to_cs == from_cs) || + my_charset_same(from_cs, to_cs) || + ((from_cs == &my_charset_bin) && + (!(*offset=(uint32)(arg_length % to_cs->mbminlen))))) + return FALSE; + return TRUE; +} + + +/* + Checks that the source string can just be copied to the destination string + without conversion. + Unlike needs_conversion it will require conversion on incoming binary data + to ensure the data are verified for vailidity first. + + @param arg_length Length of string to copy. + @param from_cs Character set to copy from + @param to_cs Character set to copy to + + @return conversion needed +*/ +bool String::needs_conversion_on_storage(size_t arg_length, + CHARSET_INFO *cs_from, + CHARSET_INFO *cs_to) +{ + uint32 offset; + return (needs_conversion(arg_length, cs_from, cs_to, &offset) || + /* force conversion when storing a binary string */ + (cs_from == &my_charset_bin && + /* into a non-binary destination */ + cs_to != &my_charset_bin && + /* and any of the following is true :*/ + ( + /* it's a variable length encoding */ + cs_to->mbminlen != cs_to->mbmaxlen || + /* longer than 2 bytes : neither 1 byte nor ucs2 */ + cs_to->mbminlen > 2 || + /* and is not a multiple of the char byte size */ + 0 != (arg_length % cs_to->mbmaxlen) + ) + ) + ); +} + + +/* + Copy a multi-byte character sets with adding leading zeros. + + SYNOPSIS + + copy_aligned() + str String to copy + arg_length Length of string. This should NOT be dividable with + cs->mbminlen. + offset arg_length % cs->mb_minlength + cs Character set for 'str' + + NOTES + For real multi-byte, ascii incompatible charactser sets, + like UCS-2, add leading zeros if we have an incomplete character. + Thus, + SELECT _ucs2 0xAA + will automatically be converted into + SELECT _ucs2 0x00AA + + RETURN + 0 ok + 1 error +*/ + +bool String::copy_aligned(const char *str, size_t arg_length, size_t offset, + CHARSET_INFO *cs) +{ + /* How many bytes are in incomplete character */ + offset= cs->mbminlen - offset; /* How many zeros we should prepend */ + DBUG_ASSERT(offset && offset != cs->mbminlen); + + size_t aligned_length= arg_length + offset; + if (alloc(aligned_length+1)) + return TRUE; + + /* + Note, this is only safe for big-endian UCS-2. + If we add little-endian UCS-2 sometimes, this code + will be more complicated. But it's OK for now. + */ + bzero((char*) Ptr, offset); + memcpy(Ptr + offset, str, arg_length); + Ptr[aligned_length]=0; + /* str_length is always >= 0 as arg_length is != 0 */ + str_length= (uint32)aligned_length; + set_charset(cs); + return FALSE; +} + + +bool String::set_or_copy_aligned(const char *str, size_t arg_length, + CHARSET_INFO *cs) +{ + /* How many bytes are in incomplete character */ + size_t offset= (arg_length % cs->mbminlen); + + if (!offset) + { + /* All characters are complete, just use given string */ + set(str, arg_length, cs); + return FALSE; + } + return copy_aligned(str, arg_length, offset, cs); +} + + +/** + Copies the character data into this String, with optional character set + conversion. + + @return + FALSE ok + TRUE Could not allocate result buffer + +*/ + +bool String::copy(const char *str, size_t arg_length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors) +{ + uint32 offset; + + DBUG_ASSERT(!str || str != Ptr || !is_alloced()); + + if (!needs_conversion(arg_length, from_cs, to_cs, &offset)) + { + *errors= 0; + return copy(str, arg_length, to_cs); + } + if ((from_cs == &my_charset_bin) && offset) + { + *errors= 0; + return copy_aligned(str, arg_length, offset, to_cs); + } + size_t new_length= to_cs->mbmaxlen*arg_length; + if (alloc(new_length)) + return TRUE; + str_length=copy_and_convert((char*) Ptr, new_length, to_cs, + str, arg_length, from_cs, errors); + set_charset(to_cs); + return FALSE; +} + + +/* + Set a string to the value of a latin1-string, keeping the original charset + + SYNOPSIS + copy_or_set() + str String of a simple charset (latin1) + arg_length Length of string + + IMPLEMENTATION + If string object is of a simple character set, set it to point to the + given string. + If not, make a copy and convert it to the new character set. + + RETURN + 0 ok + 1 Could not allocate result buffer + +*/ + +bool String::set_ascii(const char *str, size_t arg_length) +{ + if (mbminlen() == 1) + { + set(str, arg_length, charset()); + return 0; + } + uint dummy_errors; + return copy(str, (uint32) arg_length, &my_charset_latin1, + charset(), &dummy_errors); +} + + +/* This is used by mysql.cc */ + +bool Binary_string::fill(size_t max_length,char fill_char) +{ + DBUG_ASSERT(max_length < UINT_MAX32); // cast to uint32 is safe + if (str_length > max_length) + Ptr[str_length= (uint32) max_length]=0; + else + { + if (realloc(max_length)) + return TRUE; + bfill(Ptr+str_length,max_length-str_length,fill_char); + str_length= (uint32) max_length; + } + return FALSE; +} + +void String::strip_sp() +{ + while (str_length && my_isspace(charset(), Ptr[str_length-1])) + str_length--; +} + + +/* + Append an ASCII string to the a string of the current character set +*/ + +bool String::append(const char *s,size_t size) +{ + DBUG_ASSERT(size <= UINT_MAX32); // cast to uint32 is safe + uint32 arg_length= (uint32) size; + if (!arg_length) + return FALSE; + + /* + For an ASCII incompatible string, e.g. UCS-2, we need to convert + */ + if (mbminlen() > 1) + { + uint32 add_length= arg_length * mbmaxlen(); + uint dummy_errors; + if (realloc_with_extra_if_needed(str_length+ add_length)) + return TRUE; + str_length+= copy_and_convert(Ptr + str_length, add_length, charset(), + s, arg_length, &my_charset_latin1, + &dummy_errors); + return FALSE; + } + + /* + For an ASCII compatible string we can just append. + */ + return Binary_string::append(s, arg_length); +} + + +bool Binary_string::append_longlong(longlong val) +{ + if (realloc(str_length+MAX_BIGINT_WIDTH+2)) + return TRUE; + char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, -10); + str_length= (uint32)(end - Ptr); + return FALSE; +} + + +bool Binary_string::append_ulonglong(ulonglong val) +{ + if (realloc(str_length+MAX_BIGINT_WIDTH+2)) + return TRUE; + char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10); + str_length= (uint32) (end - Ptr); + return FALSE; +} + +/* + Append a string in the given charset to the string + with character set recoding +*/ + +bool String::append(const char *s, size_t arg_length, CHARSET_INFO *cs) +{ + if (!arg_length) + return false; + + uint32 offset; + + if (needs_conversion((uint32)arg_length, cs, charset(), &offset)) + { + size_t add_length; + if ((cs == &my_charset_bin) && offset) + { + DBUG_ASSERT(mbminlen() > offset); + offset= mbminlen() - offset; // How many characters to pad + add_length= arg_length + offset; + if (realloc(str_length + add_length)) + return TRUE; + bzero((char*) Ptr + str_length, offset); + memcpy(Ptr + str_length + offset, s, arg_length); + str_length+= (uint32)add_length; + return FALSE; + } + + add_length= arg_length / cs->mbminlen * mbmaxlen(); + uint dummy_errors; + if (realloc_with_extra_if_needed(str_length + add_length)) + return TRUE; + str_length+= copy_and_convert(Ptr + str_length, (uint32)add_length, charset(), + s, (uint32)arg_length, cs, &dummy_errors); + return false; + } + return Binary_string::append(s, arg_length); +} + + +bool Binary_string::append(IO_CACHE* file, uint32 arg_length) +{ + if (realloc_with_extra_if_needed(str_length+arg_length)) + return TRUE; + if (my_b_read(file, (uchar*) Ptr + str_length, arg_length)) + { + shrink(str_length ? str_length : 1); + return TRUE; + } + str_length+=arg_length; + return FALSE; +} + + +/** + Append a parenthesized number to String. + Used in various pieces of SHOW related code. + + @param nr Number + @param radix Radix, optional parameter, 10 by default. +*/ +bool String::append_parenthesized(long nr, int radix) +{ + char buff[64], *end; + buff[0]= '('; + end= int10_to_str(nr, buff + 1, radix); + *end++ = ')'; + return append(buff, (uint) (end - buff)); +} + + +int Binary_string::strstr(const char *search, uint32 search_length, uint32 offset) const +{ + if (search_length + offset <= str_length) + { + if (!search_length) + return ((int) offset); // Empty string is always found + + const char *str= Ptr + offset; + const char *end= Ptr + str_length - search_length + 1; + const char *search_end= search + search_length; +skip: + while (str != end) + { + if (*str++ == *search) + { + char *i= (char*) str; + char *j= (char*) search + 1 ; + while (j != search_end) + if (*i++ != *j++) goto skip; + return (int) (str-Ptr) -1; + } + } + } + return -1; +} + +int Binary_string::strstr(const Binary_string &s, uint32 offset) const +{ + return strstr(s.ptr(), s.length(), offset); +} + +/* +** Search string from end. Offset is offset to the end of string +*/ + +int Binary_string::strrstr(const Binary_string &s, uint32 offset) const +{ + if (s.length() <= offset && offset <= str_length) + { + if (!s.length()) + return offset; // Empty string is always found + const char *str = Ptr+offset-1; + const char *search=s.ptr()+s.length()-1; + + const char *end=Ptr+s.length()-2; + const char *search_end=s.ptr()-1; +skip: + while (str != end) + { + if (*str-- == *search) + { + char *i,*j; + i=(char*) str; j=(char*) search-1; + while (j != search_end) + if (*i-- != *j--) goto skip; + return (int) (i-Ptr) +1; + } + } + } + return -1; +} + + +bool Binary_string::replace(uint32 offset, uint32 arg_length, + const char *to, uint32 to_length) +{ + long diff = (long) to_length-(long) arg_length; + if (offset+arg_length <= str_length) + { + if (diff < 0) + { + if (to_length) + memcpy(Ptr+offset,to,to_length); + bmove(Ptr+offset+to_length,Ptr+offset+arg_length, + str_length-offset-arg_length); + } + else + { + if (diff) + { + if (realloc_with_extra_if_needed(str_length+(uint32) diff)) + return TRUE; + bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length, + str_length-offset-arg_length); + } + if (to_length) + memcpy(Ptr+offset,to,to_length); + } + str_length+=(uint32) diff; + } + return FALSE; +} + + +// added by Holyfoot for "geometry" needs +int Binary_string::reserve(size_t space_needed, size_t grow_by) +{ + if (Alloced_length < str_length + space_needed) + { + if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1)) + return TRUE; + } + return FALSE; +} + +void Binary_string::qs_append(const char *str, size_t len) +{ + ASSERT_LENGTH(len); + memcpy(Ptr + str_length, str, len + 1); + str_length += (uint32)len; +} + +void Binary_string::qs_append(double d) +{ + char *buff = Ptr + str_length; + size_t length= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, + buff, NULL); + ASSERT_LENGTH(length); + str_length+= (uint32) length; +} + +void Binary_string::qs_append(const double *d) +{ + double ld; + float8get(ld, (const char*) d); + qs_append(ld); +} + +void Binary_string::qs_append(int i) +{ + char *buff= Ptr + str_length; + char *end= int10_to_str(i, buff, -10); + ASSERT_LENGTH((size_t) (end-buff)); + str_length+= (uint32) (end-buff); +} + +void Binary_string::qs_append(ulonglong i) +{ + char *buff= Ptr + str_length; + char *end= longlong10_to_str(i, buff, 10); + ASSERT_LENGTH((size_t) (end-buff)); + str_length+= (uint32) (end-buff); +} + + +bool Binary_string::copy_printable_hhhh(CHARSET_INFO *to_cs, + CHARSET_INFO *from_cs, + const char *from, + size_t from_length) +{ + DBUG_ASSERT(from_length < UINT_MAX32); + uint errors; + uint one_escaped_char_length= MY_CS_PRINTABLE_CHAR_LENGTH * to_cs->mbminlen; + uint one_char_length= MY_MAX(one_escaped_char_length, to_cs->mbmaxlen); + ulonglong bytes_needed= from_length * one_char_length; + if (bytes_needed >= UINT_MAX32 || alloc((size_t) bytes_needed)) + return true; + str_length= my_convert_using_func(Ptr, Alloced_length, to_cs, + to_cs->cset->wc_to_printable, + from, from_length, + from_cs, + from_cs->cset->mb_wc, + &errors); + return false; +} + + +/* + Compare strings according to collation, without end space. + + SYNOPSIS + sortcmp() + s First string + t Second string + cs Collation + + NOTE: + Normally this is case sensitive comparison + + RETURN + < 0 s < t + 0 s == t + > 0 s > t +*/ + + +int sortcmp(const Binary_string *s, const Binary_string *t, CHARSET_INFO *cs) +{ + return cs->strnncollsp(s->ptr(), s->length(), t->ptr(), t->length()); +} + + +/* + Compare strings byte by byte. End spaces are also compared. + + SYNOPSIS + stringcmp() + s First string + t Second string + + NOTE: + Strings are compared as a stream of uchars + + RETURN + < 0 s < t + 0 s == t + > 0 s > t +*/ + + +int stringcmp(const Binary_string *s, const Binary_string *t) +{ + uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len); + int cmp= len ? memcmp(s->ptr(), t->ptr(), len) : 0; + return (cmp) ? cmp : (int) (s_len - t_len); +} + + +/** + Return a string which has the same value with "from" and + which is safe to modify, trying to avoid unnecessary allocation + and copying when possible. + + @param to Buffer. Must not be a constant string. + @param from Some existing value. We'll try to reuse it. + Can be a constant or a variable string. + @param from_length The total size that will be possibly needed. + Note, can be 0. + + Note, in some cases "from" and "to" can point to the same object. + + If "from" is a variable string and its allocated memory is enough + to store "from_length" bytes, then "from" is returned as is. + + If "from" is a variable string and its allocated memory is not enough + to store "from_length" bytes, then "from" is reallocated and returned. + + Otherwise (if "from" is a constant string, or looks like a constant string), + then "to" is reallocated to fit "from_length" bytes, the value is copied + from "from" to "to", then "to" is returned. +*/ +String *copy_if_not_alloced(String *to,String *from,uint32 from_length) +{ + DBUG_ASSERT(to); + /* + If "from" is a constant string, e.g.: + SELECT INSERT('', , , ); + we should not return it. See MDEV-9332. + + The code below detects different string types: + + a. All constant strings have Alloced_length==0 and alloced==false. + They point to a static memory array, or a mem_root memory, + and should stay untouched until the end of their life cycle. + Not safe to reuse. + + b. Some variable string have Alloced_length==0 and alloced==false initially, + they are not bound to any char array and allocate space on the first use + (and become #d). A typical example of such String is Item::str_value. + This type of string could be reused, but there is no a way to distinguish + them from the true constant strings (#a). + Not safe to reuse. + + c. Some variable strings have Alloced_length>0 and alloced==false. + They point to a fixed size writtable char array (typically on stack) + initially but can later allocate more space on the heap when the + fixed size array is too small (these strings become #d after allocation). + Safe to reuse. + + d. Some variable strings have Alloced_length>0 and alloced==true. + They already store data on the heap. + Safe to reuse. + + e. Some strings can have Alloced_length==0 and alloced==true. + This type of strings allocate space on the heap, but then are marked + as constant strings using String::mark_as_const(). + A typical example - the result of a character set conversion + of a constant string. + Not safe to reuse. + */ + if (from->alloced_length() > 0) // "from" is #c or #d (not a constant) + { + if (from->alloced_length() >= from_length) + return from; // #c or #d (large enough to store from_length bytes) + + if (from->is_alloced()) + { + (void) from->realloc(from_length); + return from; // #d (reallocated to fit from_length bytes) + } + /* + "from" is of type #c. It currently points to a writtable char array + (typically on stack), but is too small for "from_length" bytes. + We need to reallocate either "from" or "to". + + "from" typically points to a temporary buffer inside Item_xxx::val_str(), + or to Item::str_value, and thus is "less permanent" than "to". + + Reallocating "to" may give more benifits: + - "to" can point to a "more permanent" storage and can be reused + for multiple rows, e.g. str_buffer in Protocol::send_result_set_row(), + which is passed to val_str() for all string type rows. + - "from" can stay pointing to its original fixed size stack char array, + and thus reduce the total amount of my_alloc/my_free. + */ + } + + if (from == to) + { + /* + Possible string types: + #a not possible (constants should not be passed as "to") + #b possible (a fresh variable with no associated char buffer) + #c possible (a variable with a char buffer, + in case it's smaller than fixed_length) + #d not possible (handled earlier) + #e not possible (constants should not be passed as "to") + + If a string of types #a or #e appears here, that means the caller made + something wrong. Otherwise, it's safe to reallocate and return "to". + + Note, as we can't distinguish between #a and #b for sure, + so we can't assert "not #a", but we can at least assert "not #e". + */ + DBUG_ASSERT(!from->is_alloced() || from->alloced_length() > 0); // Not #e + + (void) from->realloc(from_length); + return from; + } + if (from->uses_buffer_owned_by(to)) + { + DBUG_ASSERT(!from->is_alloced()); + DBUG_ASSERT(to->is_alloced()); + /* + "from" is a constant string pointing to a fragment of alloced string "to": + to= xxxFFFyyy + - FFF is the part of "to" pointed by "from" + - xxx is the part of "to" before "from" + - yyy is the part of "to" after "from" + */ + uint32 xxx_length= (uint32) (from->ptr() - to->ptr()); + uint32 yyy_length= (uint32) (to->end() - from->end()); + DBUG_ASSERT(to->length() >= yyy_length); + to->length(to->length() - yyy_length); // Remove the "yyy" part + DBUG_ASSERT(to->length() >= xxx_length); + to->replace(0, xxx_length, "", 0); // Remove the "xxx" part + to->realloc(from_length); + to->set_charset(from->charset()); + return to; + } + if (to->alloc(from_length)) + return from; // Actually an error + if ((to->str_length=MY_MIN(from->str_length,from_length))) + memcpy(to->Ptr,from->Ptr,to->str_length); + to->set_charset(*from); + return to; // "from" was of types #a, #b, #e, or small #c. +} + + +/**************************************************************************** + Help functions +****************************************************************************/ + +/** + Copy string with HEX-encoding of "bad" characters. + + @details This functions copies the string pointed by "src" + to the string pointed by "dst". Not more than "srclen" bytes + are read from "src". Any sequences of bytes representing + a not-well-formed substring (according to cs) are hex-encoded, + and all well-formed substrings (according to cs) are copied as is. + Not more than "dstlen" bytes are written to "dst". The number + of bytes written to "dst" is returned. + + @param cs character set pointer of the destination string + @param[out] dst destination string + @param dstlen size of dst + @param src source string + @param srclen length of src + + @retval result length +*/ + +size_t +my_copy_with_hex_escaping(CHARSET_INFO *cs, + char *dst, size_t dstlen, + const char *src, size_t srclen) +{ + const char *srcend= src + srclen; + char *dst0= dst; + + for ( ; src < srcend ; ) + { + size_t chlen; + if ((chlen= my_ismbchar(cs, src, srcend))) + { + if (dstlen < chlen) + break; /* purecov: inspected */ + memcpy(dst, src, chlen); + src+= chlen; + dst+= chlen; + dstlen-= chlen; + } + else if (*src & 0x80) + { + if (dstlen < 4) + break; /* purecov: inspected */ + *dst++= '\\'; + *dst++= 'x'; + APPEND_HEX(dst, (uchar) *src); + src++; + dstlen-= 4; + } + else + { + if (dstlen < 1) + break; /* purecov: inspected */ + *dst++= *src++; + dstlen--; + } + } + return dst - dst0; +} + + +/* + Copy a string, + with optional character set conversion, + with optional left padding (for binary -> UCS2 conversion) + + Bad input bytes are replaced to '?'. + + The string that is written to "to" is always well-formed. + + @param to The destination string + @param to_length Space available in "to" + @param to_cs Character set of the "to" string + @param from The source string + @param from_length Length of the "from" string + @param from_cs Character set of the "from" string + @param nchars Copy not more than "nchars" characters + + The members as set as follows: + m_well_formed_error_pos To the position when "from" is not well formed + or NULL otherwise. + m_cannot_convert_error_pos To the position where a not convertable + character met, or NULL otherwise. + m_source_end_pos To the position where scanning of the "from" + string stopped. + + @returns number of bytes that were written to 'to' +*/ +uint +String_copier::well_formed_copy(CHARSET_INFO *to_cs, + char *to, size_t to_length, + CHARSET_INFO *from_cs, + const char *from, size_t from_length, + size_t nchars) +{ + if ((to_cs == &my_charset_bin) || + (from_cs == &my_charset_bin) || + (to_cs == from_cs) || + my_charset_same(from_cs, to_cs)) + { + m_cannot_convert_error_pos= NULL; + return (uint) to_cs->copy_fix(to, to_length, from, from_length, + nchars, this); + } + return (uint) my_convert_fix(to_cs, to, to_length, from_cs, from, from_length, + nchars, this, this); +} + + + +/* + Append characters to a single-quoted string '...', escaping special + characters with backslashes as necessary. + Does not add the enclosing quotes, this is left up to caller. +*/ +#define APPEND(...) if (append(__VA_ARGS__)) return 1; +bool String::append_for_single_quote(const char *st, size_t len) +{ + const char *end= st+len; + int chlen; + for (; st < end; st++) + { + switch (*st) + { + case '\\': APPEND(STRING_WITH_LEN("\\\\")); break; + case '\0': APPEND(STRING_WITH_LEN("\\0")); break; + case '\'': APPEND(STRING_WITH_LEN("\\'")); break; + case '\n': APPEND(STRING_WITH_LEN("\\n")); break; + case '\r': APPEND(STRING_WITH_LEN("\\r")); break; + case '\032': APPEND(STRING_WITH_LEN("\\Z")); break; + default: if ((chlen=charset()->charlen(st, end)) > 0) + { + APPEND(st, chlen); + st+= chlen-1; + } + else + APPEND(*st); + } + } + return 0; +} + +void String::print(String *str) const +{ + str->append_for_single_quote(Ptr, str_length); +} + + +void String::print_with_conversion(String *print, CHARSET_INFO *cs) const +{ + StringBuffer<256> tmp(cs); + uint errors= 0; + tmp.copy(this, cs, &errors); + tmp.print(print); +} + + +/** + Convert string to printable ASCII string + + @details This function converts input string "from" replacing non-ASCII bytes + with hexadecimal sequences ("\xXX") optionally appending "..." to the end of + the resulting string. + This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages, + e.g. when a string cannot be converted to a result charset. + + + @param to output buffer + @param to_len size of the output buffer (8 bytes or greater) + @param from input string + @param from_len size of the input string + @param from_cs input charset + @param nbytes maximal number of bytes to convert (from_len if 0) + + @return number of bytes in the output string +*/ + +uint convert_to_printable(char *to, size_t to_len, + const char *from, size_t from_len, + CHARSET_INFO *from_cs, size_t nbytes /*= 0*/) +{ + /* needs at least 8 bytes for '\xXX...' and zero byte */ + DBUG_ASSERT(to_len >= 8); + + char *t= to; + char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end + const char *f= from; + const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len); + char *dots= to; // last safe place to append '...' + + if (!f || t == t_end) + return 0; + + for (; t < t_end && f < f_end; f++) + { + /* + If the source string is ASCII compatible (mbminlen==1) + and the source character is in ASCII printable range (0x20..0x7F), + then display the character as is. + + Otherwise, if the source string is not ASCII compatible (e.g. UCS2), + or the source character is not in the printable range, + then print the character using HEX notation. + */ + if (((unsigned char) *f) >= 0x20 && + ((unsigned char) *f) <= 0x7F && + from_cs->mbminlen == 1) + { + *t++= *f; + } + else + { + if (t_end - t < 4) // \xXX + break; + *t++= '\\'; + *t++= 'x'; + APPEND_HEX(t, *f); + } + if (t_end - t >= 3) // '...' + dots= t; + } + if (f < from + from_len) + memcpy(dots, STRING_WITH_LEN("...\0")); + else + *t= '\0'; + return (uint) (t - to); +} + +size_t convert_to_printable_required_length(uint len) +{ + return static_cast(len) * 4 + 3/*dots*/ + 1/*trailing \0 */; +} + +bool String::append_semi_hex(const char *s, uint len, CHARSET_INFO *cs) +{ + if (!len) + return false; + size_t dst_len= convert_to_printable_required_length(len); + if (reserve(dst_len)) + return true; + uint nbytes= convert_to_printable(Ptr + str_length, dst_len, s, len, cs); + DBUG_ASSERT((ulonglong) str_length + nbytes < UINT_MAX32); + str_length+= nbytes; + return false; +} + + +// Shrink the buffer, but only if it is allocated on the heap. +void Binary_string::shrink(size_t arg_length) +{ + if (is_alloced() && ALIGN_SIZE(arg_length + 1) < Alloced_length) + { + /* my_realloc() can't fail as new buffer is less than the original one */ + Ptr= (char*) my_realloc(STRING_PSI_MEMORY_KEY, Ptr, arg_length, + MYF(thread_specific ? + MY_THREAD_SPECIFIC : 0)); + Alloced_length= (uint32) arg_length; + } +} diff --git a/sql/sql_string.h b/sql/sql_string.h new file mode 100644 index 00000000..20073592 --- /dev/null +++ b/sql/sql_string.h @@ -0,0 +1,1280 @@ +#ifndef SQL_STRING_INCLUDED +#define SQL_STRING_INCLUDED + +/* + Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2008, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* This file is originally from the mysql distribution. Coded by monty */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "m_ctype.h" /* my_charset_bin */ +#include /* alloc_root, my_free, my_realloc */ +#include "m_string.h" /* TRASH */ +#include "sql_list.h" + +class String; +#ifdef MYSQL_SERVER +extern PSI_memory_key key_memory_String_value; +#define STRING_PSI_MEMORY_KEY key_memory_String_value +#else +#define STRING_PSI_MEMORY_KEY PSI_NOT_INSTRUMENTED +#endif + +typedef struct st_io_cache IO_CACHE; +typedef struct st_mem_root MEM_ROOT; +#define ASSERT_LENGTH(A) DBUG_ASSERT(str_length + (uint32) (A) <= Alloced_length) + +#include "pack.h" +class Binary_string; +int sortcmp(const Binary_string *s, const Binary_string *t, CHARSET_INFO *cs); +int stringcmp(const Binary_string *s, const Binary_string *t); +String *copy_if_not_alloced(String *a,String *b,uint32 arg_length); +inline uint32 copy_and_convert(char *to, size_t to_length, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs, uint *errors) +{ + return my_convert(to, (uint)to_length, to_cs, from, (uint)from_length, + from_cs, errors); +} + + +class String_copy_status: protected MY_STRCOPY_STATUS +{ +public: + const char *source_end_pos() const + { return m_source_end_pos; } + const char *well_formed_error_pos() const + { return m_well_formed_error_pos; } +}; + + +class Well_formed_prefix_status: public String_copy_status +{ +public: + Well_formed_prefix_status(CHARSET_INFO *cs, + const char *str, const char *end, size_t nchars) + { cs->well_formed_char_length(str, end, nchars, this); } +}; + + +class Well_formed_prefix: public Well_formed_prefix_status +{ + const char *m_str; // The beginning of the string +public: + Well_formed_prefix(CHARSET_INFO *cs, const char *str, const char *end, + size_t nchars) + :Well_formed_prefix_status(cs, str, end, nchars), m_str(str) + { } + Well_formed_prefix(CHARSET_INFO *cs, const char *str, size_t length, + size_t nchars) + :Well_formed_prefix_status(cs, str, str + length, nchars), m_str(str) + { } + Well_formed_prefix(CHARSET_INFO *cs, const char *str, size_t length) + :Well_formed_prefix_status(cs, str, str + length, length), m_str(str) + { } + Well_formed_prefix(CHARSET_INFO *cs, LEX_CSTRING str, size_t nchars) + :Well_formed_prefix_status(cs, str.str, str.str + str.length, nchars), + m_str(str.str) + { } + size_t length() const { return m_source_end_pos - m_str; } +}; + + +class String_copier: public String_copy_status, + protected MY_STRCONV_STATUS +{ +public: + const char *cannot_convert_error_pos() const + { return m_cannot_convert_error_pos; } + const char *most_important_error_pos() const + { + return well_formed_error_pos() ? well_formed_error_pos() : + cannot_convert_error_pos(); + } + /* + Convert a string between character sets. + "dstcs" and "srccs" cannot be &my_charset_bin. + */ + size_t convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length, + CHARSET_INFO *srccs, const char *src, size_t src_length, + size_t nchars) + { + return my_convert_fix(dstcs, dst, dst_length, + srccs, src, src_length, nchars, this, this); + } + /* + Copy a string. Fix bad bytes/characters to '?'. + */ + uint well_formed_copy(CHARSET_INFO *to_cs, char *to, size_t to_length, + CHARSET_INFO *from_cs, const char *from, + size_t from_length, size_t nchars); + // Same as above, but without the "nchars" limit. + uint well_formed_copy(CHARSET_INFO *to_cs, char *to, size_t to_length, + CHARSET_INFO *from_cs, const char *from, + size_t from_length) + { + return well_formed_copy(to_cs, to, to_length, + from_cs, from, from_length, + from_length /* No limit on "nchars"*/); + } +}; + + +size_t my_copy_with_hex_escaping(CHARSET_INFO *cs, + char *dst, size_t dstlen, + const char *src, size_t srclen); +uint convert_to_printable(char *to, size_t to_len, + const char *from, size_t from_len, + CHARSET_INFO *from_cs, size_t nbytes= 0); +size_t convert_to_printable_required_length(uint len); + + +class Charset +{ + CHARSET_INFO *m_charset; +public: + Charset() :m_charset(&my_charset_bin) { } + Charset(CHARSET_INFO *cs) :m_charset(cs) { } + + CHARSET_INFO *charset() const { return m_charset; } + bool use_mb() const { return m_charset->use_mb(); } + uint mbminlen() const { return m_charset->mbminlen; } + uint mbmaxlen() const { return m_charset->mbmaxlen; } + bool is_good_for_ft() const + { + // Binary and UCS2/UTF16/UTF32 are not supported + return m_charset != &my_charset_bin && m_charset->mbminlen == 1; + } + + size_t numchars(const char *str, const char *end) const + { + return m_charset->numchars(str, end); + } + size_t lengthsp(const char *str, size_t length) const + { + return m_charset->lengthsp(str, length); + } + size_t charpos(const char *str, const char *end, size_t pos) const + { + return m_charset->charpos(str, end, pos); + } + void set_charset(CHARSET_INFO *charset_arg) + { + m_charset= charset_arg; + } + void set_charset(const Charset &other) + { + m_charset= other.m_charset; + } + void swap(Charset &other) + { + swap_variables(CHARSET_INFO*, m_charset, other.m_charset); + } + bool same_encoding(const Charset &other) const + { + return my_charset_same(m_charset, other.m_charset); + } + /* + Collation name without the character set name. + For example, in case of "latin1_swedish_ci", + this method returns "_swedish_ci". + */ + LEX_CSTRING collation_specific_name() const; + bool encoding_allows_reinterpret_as(CHARSET_INFO *cs) const; + bool eq_collation_specific_names(CHARSET_INFO *cs) const; + bool can_have_collate_clause() const + { + return m_charset != &my_charset_bin; + } + + /* + The MariaDB version when the last collation change happened, + e.g. due to a bug fix. See functions below. + */ + static ulong latest_mariadb_version_with_collation_change() + { + return 110002; + } + + /* + Check if the collation with the given ID changed its order + since the given MariaDB version. + */ + static bool collation_changed_order(ulong mysql_version, uint cs_number) + { + if ((mysql_version < 50048 && + (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */ + cs_number == 41 || /* latin7_general_ci - bug #29461 */ + cs_number == 42 || /* latin7_general_cs - bug #29461 */ + cs_number == 20 || /* latin7_estonian_cs - bug #29461 */ + cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */ + cs_number == 22 || /* koi8u_general_ci - bug #29461 */ + cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */ + cs_number == 26)) || /* cp1250_general_ci - bug #29461 */ + (mysql_version < 50124 && + (cs_number == 33 || /* utf8mb3_general_ci - bug #27877 */ + cs_number == 35))) /* ucs2_general_ci - bug #27877 */ + return true; + + if (cs_number == 159 && /* ucs2_general_mysql500_ci - MDEV-30746 */ + ((mysql_version >= 100400 && mysql_version < 100429) || + (mysql_version >= 100500 && mysql_version < 100520) || + (mysql_version >= 100600 && mysql_version < 100613) || + (mysql_version >= 100700 && mysql_version < 100708) || + (mysql_version >= 100800 && mysql_version < 100808) || + (mysql_version >= 100900 && mysql_version < 100906) || + (mysql_version >= 101000 && mysql_version < 101004) || + (mysql_version >= 101100 && mysql_version < 101103) || + (mysql_version >= 110000 && mysql_version < 110002))) + return true; + return false; + } + + /** + Check if a collation has changed ID since the given version. + Return the new ID. + + @param mysql_version + @param cs_number - collation ID + + @retval the new collation ID (or cs_number, if no change) + */ + + static uint upgrade_collation_id(ulong mysql_version, uint cs_number) + { + if (mysql_version >= 50300 && mysql_version <= 50399) + { + switch (cs_number) { + case 149: return MY_PAGE2_COLLATION_ID_UCS2; // ucs2_crotian_ci + case 213: return MY_PAGE2_COLLATION_ID_UTF8; // utf8_crotian_ci + } + } + if ((mysql_version >= 50500 && mysql_version <= 50599) || + (mysql_version >= 100000 && mysql_version <= 100005)) + { + switch (cs_number) { + case 149: return MY_PAGE2_COLLATION_ID_UCS2; // ucs2_crotian_ci + case 213: return MY_PAGE2_COLLATION_ID_UTF8; // utf8_crotian_ci + case 214: return MY_PAGE2_COLLATION_ID_UTF32; // utf32_croatian_ci + case 215: return MY_PAGE2_COLLATION_ID_UTF16; // utf16_croatian_ci + case 245: return MY_PAGE2_COLLATION_ID_UTF8MB4;// utf8mb4_croatian_ci + } + } + return cs_number; + } + +}; + + +/** + Storage for strings with both length and allocated length. + Automatically grows on demand. +*/ + +class Binary_string: public Sql_alloc +{ +protected: + char *Ptr; + uint32 str_length, Alloced_length, extra_alloc; + bool alloced, thread_specific; + void init_private_data() + { + Ptr= 0; + Alloced_length= extra_alloc= str_length= 0; + alloced= thread_specific= false; + } + inline void free_buffer() + { + if (alloced) + { + alloced=0; + my_free(Ptr); + } + } +public: + Binary_string() + { + init_private_data(); + } + explicit Binary_string(size_t length_arg) + { + init_private_data(); + (void) real_alloc(length_arg); + } + /* + NOTE: If one intend to use the c_ptr() method, the following two + contructors need the size of memory for STR to be at least LEN+1 (to make + room for zero termination). + */ + Binary_string(const char *str, size_t len) + { + Ptr= (char*) str; + str_length= (uint32) len; + Alloced_length= 0; /* Memory cannot be written to */ + extra_alloc= 0; + alloced= thread_specific= 0; + } + Binary_string(char *str, size_t len) + { + Ptr= str; + str_length= Alloced_length= (uint32) len; + extra_alloc= 0; + alloced= thread_specific= 0; + } + explicit Binary_string(const Binary_string &str) + { + Ptr= str.Ptr; + str_length= str.str_length; + Alloced_length= str.Alloced_length; + extra_alloc= 0; + alloced= thread_specific= 0; + } + + ~Binary_string() + { + free(); + } + + inline uint32 length() const { return str_length;} + inline char& operator [] (size_t i) const { return Ptr[i]; } + inline void length(size_t len) { str_length=(uint32)len ; } + inline bool is_empty() const { return (str_length == 0); } + inline const char *ptr() const { return Ptr; } + inline const char *end() const { return Ptr + str_length; } + bool has_8bit_bytes() const + { + for (const char *c= ptr(), *c_end= end(); c < c_end; c++) + { + if (!my_isascii(*c)) + return true; + } + return false; + } + + bool bin_eq(const Binary_string *other) const + { + return length() == other->length() && + !memcmp(ptr(), other->ptr(), length()); + } + + /* + PMG 2004.11.12 + This is a method that works the same as perl's "chop". It simply + drops the last character of a string. This is useful in the case + of the federated storage handler where I'm building a unknown + number, list of values and fields to be used in a sql insert + statement to be run on the remote server, and have a comma after each. + When the list is complete, I "chop" off the trailing comma + + ex. + String stringobj; + stringobj.append("VALUES ('foo', 'fi', 'fo',"); + stringobj.chop(); + stringobj.append(")"); + + In this case, the value of string was: + + VALUES ('foo', 'fi', 'fo', + VALUES ('foo', 'fi', 'fo' + VALUES ('foo', 'fi', 'fo') + */ + inline void chop() + { + if (str_length) + { + str_length--; + Ptr[str_length]= '\0'; + DBUG_ASSERT(strlen(Ptr) == str_length); + } + } + + // Returns offset to substring or -1 + int strstr(const Binary_string &search, uint32 offset=0) const; + int strstr(const char *search, uint32 search_length, uint32 offset=0) const; + // Returns offset to substring or -1 + int strrstr(const Binary_string &search, uint32 offset=0) const; + + /* + The following append operations do not extend the strings and in production + mode do NOT check that alloced memory! + q_*** methods writes values of parameters itself + qs_*** methods writes string representation of value + */ + void q_append(const char c) + { + ASSERT_LENGTH(1); + Ptr[str_length++] = c; + } + void q_append2b(const uint32 n) + { + ASSERT_LENGTH(2); + int2store(Ptr + str_length, n); + str_length += 2; + } + void q_append(const uint32 n) + { + ASSERT_LENGTH(4); + int4store(Ptr + str_length, n); + str_length += 4; + } + void q_append(double d) + { + ASSERT_LENGTH(8); + float8store(Ptr + str_length, d); + str_length += 8; + } + void q_append(double *d) + { + ASSERT_LENGTH(8); + float8store(Ptr + str_length, *d); + str_length += 8; + } + /* + Append a wide character. + The caller must have allocated at least cs->mbmaxlen bytes. + */ + int q_append_wc(my_wc_t wc, CHARSET_INFO *cs) + { + int mblen; + if ((mblen= cs->cset->wc_mb(cs, wc, + (uchar *) end(), + (uchar *) end() + cs->mbmaxlen)) > 0) + str_length+= (uint32) mblen; + return mblen; + } + void q_append(const char *data, size_t data_len) + { + ASSERT_LENGTH(data_len); + if (data_len) + memcpy(Ptr + str_length, data, data_len); + DBUG_ASSERT(str_length <= UINT_MAX32 - data_len); + str_length += (uint)data_len; + } + void q_append(const LEX_CSTRING *ls) + { + DBUG_ASSERT(ls->length < UINT_MAX32 && + ((ls->length == 0 && !ls->str) || + ls->length == strlen(ls->str))); + q_append(ls->str, (uint32) ls->length); + } + + void write_at_position(uint32 position, uint32 value) + { + DBUG_ASSERT(str_length >= position + 4); + int4store(Ptr + position,value); + } + + void qs_append(const LEX_CSTRING *ls) + { + DBUG_ASSERT(ls->length < UINT_MAX32 && + ((ls->length == 0 && !ls->str) || + ls->length == strlen(ls->str))); + qs_append(ls->str, (uint32)ls->length); + } + void qs_append(const char *str, size_t len); + void qs_append_hex(const char *str, uint32 len); + void qs_append_hex_uint32(uint32 num); + void qs_append(double d); + void qs_append(const double *d); + inline void qs_append(const char c) + { + ASSERT_LENGTH(1); + Ptr[str_length]= c; + str_length++; + } + void qs_append(int i); + void qs_append(uint i) + { + qs_append((ulonglong)i); + } + void qs_append(ulong i) + { + qs_append((ulonglong)i); + } + void qs_append(ulonglong i); + void qs_append(longlong i, int radix) + { + ASSERT_LENGTH(22); + char *buff= Ptr + str_length; + char *end= ll2str(i, buff, radix, 0); + str_length+= (uint32) (end-buff); + } + + /* Mark variable thread specific it it's not allocated already */ + inline void set_thread_specific() + { + if (!alloced) + thread_specific= 1; + } + bool is_alloced() const { return alloced; } + inline uint32 alloced_length() const { return Alloced_length;} + inline uint32 extra_allocation() const { return extra_alloc;} + inline void extra_allocation(size_t len) { extra_alloc= (uint32)len; } + inline void mark_as_const() { Alloced_length= 0;} + + inline bool uses_buffer_owned_by(const Binary_string *s) const + { + return (s->alloced && Ptr >= s->Ptr && Ptr < s->Ptr + s->Alloced_length); + } + + /* Swap two string objects. Efficient way to exchange data without memcpy. */ + void swap(Binary_string &s) + { + swap_variables(char *, Ptr, s.Ptr); + swap_variables(uint32, str_length, s.str_length); + swap_variables(uint32, Alloced_length, s.Alloced_length); + swap_variables(bool, alloced, s.alloced); + } + + /** + Points the internal buffer to the supplied one. The old buffer is freed. + @param str Pointer to the new buffer. + @param arg_length Length of the new buffer in characters, excluding any + null character. + @note The new buffer will not be null terminated. + */ + void set_alloced(char *str, size_t length, size_t alloced_length) + { + free_buffer(); + Ptr= str; + str_length= (uint32) length; + DBUG_ASSERT(alloced_length < UINT_MAX32); + Alloced_length= (uint32) alloced_length; + } + inline void set(char *str, size_t arg_length) + { + set_alloced(str, arg_length, arg_length); + } + inline void set(const char *str, size_t length) + { + free_buffer(); + Ptr= (char*) str; + str_length= (uint32) length; + Alloced_length= 0; + } + + void set(Binary_string &str, size_t offset, size_t length) + { + DBUG_ASSERT(&str != this); + free_buffer(); + Ptr= str.Ptr + offset; + str_length= (uint32) length; + Alloced_length= 0; + if (str.Alloced_length) + Alloced_length= (uint32) (str.Alloced_length - offset); + } + LEX_CSTRING to_lex_cstring() const + { + LEX_CSTRING tmp= {Ptr, str_length}; + return tmp; + } + inline LEX_CSTRING *get_value(LEX_CSTRING *res) + { + res->str= Ptr; + res->length= str_length; + return res; + } + + /* Take over handling of buffer from some other object */ + void reset(char *ptr_arg, size_t length_arg, size_t alloced_length_arg) + { + set_alloced(ptr_arg, length_arg, alloced_length_arg); + alloced= ptr_arg != 0; + } + + /* Forget about the buffer, let some other object handle it */ + char *release() + { + char *old= Ptr; + init_private_data(); + return old; + } + + /* + This is used to set a new buffer for String. + However if the String already has an allocated buffer, it will + keep that one. + It's not to be used to set the value or length of the string. + */ + inline void set_buffer_if_not_allocated(char *str, size_t arg_length) + { + if (!alloced) + { + /* + Following should really set str_length= 0, but some code may + depend on that the String length is same as buffer length. + */ + Ptr= str; + str_length= Alloced_length= (uint32) arg_length; + } + /* One should set str_length before using it */ + MEM_UNDEFINED(&str_length, sizeof(str_length)); + } + + inline Binary_string& operator=(const Binary_string &s) + { + if (&s != this) + { + /* + It is forbidden to do assignments like + some_string = substring_of_that_string + */ + DBUG_ASSERT(!s.uses_buffer_owned_by(this)); + set_alloced((char *) s.Ptr, s.str_length, s.Alloced_length); + } + return *this; + } + + bool set_hex(ulonglong num); + bool set_hex(const char *str, uint32 len); + bool set_fcvt(double num, uint decimals); + + bool copy(); // Alloc string if not alloced + bool copy(const Binary_string &s); // Allocate new string + bool copy(const char *s, size_t arg_length); // Allocate new string + bool copy_or_move(const char *s,size_t arg_length); + + /** + Convert a string to a printable format. + All non-convertable and control characters are replaced to 5-character + sequences '\hhhh'. + */ + bool copy_printable_hhhh(CHARSET_INFO *to_cs, + CHARSET_INFO *from_cs, + const char *from, size_t from_length); + + bool append_ulonglong(ulonglong val); + bool append_longlong(longlong val); + + bool append(const char *s, size_t size) + { + if (!size) + return false; + if (realloc_with_extra_if_needed(str_length + size)) + return true; + q_append(s, size); + return false; + } + bool append(const LEX_CSTRING &s) + { + return append(s.str, s.length); + } + bool append(const Binary_string &s) + { + return append(s.ptr(), s.length()); + } + bool append(IO_CACHE* file, uint32 arg_length); + + inline bool append_char(char chr) + { + if (str_length < Alloced_length) + { + Ptr[str_length++]= chr; + } + else + { + if (unlikely(realloc_with_extra(str_length + 1))) + return true; + Ptr[str_length++]= chr; + } + return false; + } + bool append_hex(const char *src, uint32 srclen) + { + for (const char *src_end= src + srclen ; src != src_end ; src++) + { + if (unlikely(append_char(_dig_vec_lower[((uchar) *src) >> 4])) || + unlikely(append_char(_dig_vec_lower[((uchar) *src) & 0x0F]))) + return true; + } + return false; + } + bool append_hex_uint32(uint32 num) + { + if (reserve(8)) + return true; + qs_append_hex_uint32(num); + return false; + } + bool append_with_step(const char *s, uint32 arg_length, uint32 step_alloc) + { + uint32 new_length= arg_length + str_length; + if (new_length > Alloced_length && + unlikely(realloc(new_length + step_alloc))) + return true; + q_append(s, arg_length); + return false; + } + + inline char *c_ptr() + { + if (unlikely(!Ptr)) + return (char*) ""; + /* + Here we assume that any buffer used to initalize String has + an end \0 or have at least an accessable character at end. + This is to handle the case of String("Hello",5) and + String("hello",5) efficiently. + + We have two options here. To test for !Alloced_length or !alloced. + Using "Alloced_length" is slightly safer so that we do not read + from potentially unintialized memory (normally not dangerous but + may give warnings in valgrind), but "alloced" is safer as there + are less change to get memory loss from code that is using + String((char*), length) or String.set((char*), length) and does + not free things properly (and there is several places in the code + where this happens and it is hard to find out if any of these will call + c_ptr(). + */ + if (unlikely(!alloced && !Ptr[str_length])) + return Ptr; + if (str_length < Alloced_length) + { + Ptr[str_length]=0; + return Ptr; + } + (void) realloc(str_length); /* This will add end \0 */ + return Ptr; + } + /* + One should use c_ptr() instead for most cases. This will be deleted soon, + kept for compatiblity. + */ + inline char *c_ptr_quick() + { + return c_ptr_safe(); + } + /* + This is to be used only in the case when one cannot use c_ptr(). + The cases are: + - When one initializes String with an external buffer and length and + buffer[length] could be uninitalized when c_ptr() is called. + - When valgrind gives warnings about uninitialized memory with c_ptr(). + */ + inline char *c_ptr_safe() + { + if (Ptr && str_length < Alloced_length) + Ptr[str_length]=0; + else + (void) realloc(str_length); + return Ptr; + } + + inline void free() + { + free_buffer(); + /* + We have to clear the values as some Strings, like in Field, are + reused after free(). Because of this we cannot use MEM_UNDEFINED() here. + */ + Ptr= 0; + str_length= 0; + Alloced_length= extra_alloc= 0; + } + + inline bool alloc(size_t arg_length) + { + /* + Allocate if we need more space or if we don't have done any + allocation yet (we don't want to have Ptr to be NULL for empty strings). + + Note that if arg_length == Alloced_length then we don't allocate. + This ensures we don't do any extra allocations in protocol and String:int, + but the string will not be automatically null terminated if c_ptr() is not + called. + */ + if (arg_length <= Alloced_length && Alloced_length) + return 0; + return real_alloc(arg_length); + } + bool real_alloc(size_t arg_length); // Empties old string + bool realloc_raw(size_t arg_length); + bool realloc(size_t arg_length) + { + if (realloc_raw(arg_length+1)) + return TRUE; + Ptr[arg_length]= 0; // This make other funcs shorter + return FALSE; + } + bool realloc_with_extra(size_t arg_length) + { + if (extra_alloc < 4096) + extra_alloc= extra_alloc*2+128; + if (realloc_raw(arg_length + extra_alloc)) + return TRUE; + Ptr[arg_length]=0; // This make other funcs shorter + return FALSE; + } + bool realloc_with_extra_if_needed(size_t arg_length) + { + if (arg_length < Alloced_length) + { + Ptr[arg_length]=0; // behave as if realloc was called. + return 0; + } + return realloc_with_extra(arg_length); + } + // Shrink the buffer, but only if it is allocated on the heap. + void shrink(size_t arg_length); + + void move(Binary_string &s) + { + set_alloced(s.Ptr, s.str_length, s.Alloced_length); + extra_alloc= s.extra_alloc; + alloced= s.alloced; + thread_specific= s.thread_specific; + s.alloced= 0; + } + bool fill(size_t max_length,char fill); + /* + Replace substring with string + If wrong parameter or not enough memory, do nothing + */ + bool replace(uint32 offset,uint32 arg_length, const char *to, uint32 length); + bool replace(uint32 offset,uint32 arg_length, const Binary_string &to) + { + return replace(offset,arg_length,to.ptr(),to.length()); + } + + int reserve(size_t space_needed) + { + DBUG_ASSERT((ulonglong) str_length + space_needed < UINT_MAX32); + return realloc(str_length + space_needed); + } + int reserve(size_t space_needed, size_t grow_by); + + inline char *prep_append(uint32 arg_length, uint32 step_alloc) + { + uint32 new_length= arg_length + str_length; + if (new_length > Alloced_length) + { + if (unlikely(realloc(new_length + step_alloc))) + return 0; + } + uint32 old_length= str_length; + str_length+= arg_length; + return Ptr + old_length; // Area to use + } + + + void q_net_store_length(ulonglong length) + { + DBUG_ASSERT(Alloced_length >= (str_length + net_length_size(length))); + char *pos= (char *) net_store_length((uchar *)(Ptr + str_length), length); + str_length= uint32(pos - Ptr); + } + void q_net_store_data(const uchar *from, size_t length) + { + DBUG_ASSERT(length < UINT_MAX32); + DBUG_ASSERT(Alloced_length >= (str_length + length + + net_length_size(length))); + q_net_store_length(length); + q_append((const char *)from, (uint32) length); + } +}; + + +class String: public Charset, public Binary_string +{ +public: + String() = default; + String(size_t length_arg) :Binary_string(length_arg) + { } + /* + NOTE: If one intend to use the c_ptr() method, the following two + contructors need the size of memory for STR to be at least LEN+1 (to make + room for zero termination). + */ + String(const char *str, size_t len, CHARSET_INFO *cs) + :Charset(cs), Binary_string(str, len) + { } + String(char *str, size_t len, CHARSET_INFO *cs) + :Charset(cs), Binary_string(str, len) + { } + String(const String &str) = default; + + void set(String &str,size_t offset,size_t arg_length) + { + Binary_string::set(str, offset, arg_length); + set_charset(str); + } + inline void set(char *str,size_t arg_length, CHARSET_INFO *cs) + { + Binary_string::set(str, arg_length); + set_charset(cs); + } + inline void set(const char *str,size_t arg_length, CHARSET_INFO *cs) + { + Binary_string::set(str, arg_length); + set_charset(cs); + } + bool set_ascii(const char *str, size_t arg_length); + inline void set_buffer_if_not_allocated(char *str,size_t arg_length, + CHARSET_INFO *cs) + { + Binary_string::set_buffer_if_not_allocated(str, arg_length); + set_charset(cs); + } + bool set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs); + bool set(int num, CHARSET_INFO *cs) { return set_int(num, false, cs); } + bool set(uint num, CHARSET_INFO *cs) { return set_int(num, true, cs); } + bool set(long num, CHARSET_INFO *cs) { return set_int(num, false, cs); } + bool set(ulong num, CHARSET_INFO *cs) { return set_int(num, true, cs); } + bool set(longlong num, CHARSET_INFO *cs) { return set_int(num, false, cs); } + bool set(ulonglong num, CHARSET_INFO *cs) { return set_int((longlong)num, true, cs); } + bool set_real(double num,uint decimals, CHARSET_INFO *cs); + bool set_fcvt(double num, uint decimals) + { + set_charset(&my_charset_latin1); + return Binary_string::set_fcvt(num, decimals); + } + + bool set_hex(ulonglong num) + { + set_charset(&my_charset_latin1); + return Binary_string::set_hex(num); + } + bool set_hex(const char *str, uint32 len) + { + set_charset(&my_charset_latin1); + return Binary_string::set_hex(str, len); + } + + /* Take over handling of buffer from some other object */ + void reset(char *ptr_arg, size_t length_arg, size_t alloced_length_arg, + CHARSET_INFO *cs) + { + Binary_string::reset(ptr_arg, length_arg, alloced_length_arg); + set_charset(cs); + } + + inline String& operator = (const String &s) + { + if (&s != this) + { + set_charset(s); + Binary_string::operator=(s); + } + return *this; + } + + bool copy() + { + return Binary_string::copy(); + } + bool copy(const String &s) + { + set_charset(s); + return Binary_string::copy(s); + } + bool copy(const char *s, size_t arg_length, CHARSET_INFO *cs) + { + set_charset(cs); + return Binary_string::copy(s, arg_length); + } + bool copy_or_move(const char *s, size_t arg_length, CHARSET_INFO *cs) + { + set_charset(cs); + return Binary_string::copy_or_move(s, arg_length); + } + static bool needs_conversion(size_t arg_length, + CHARSET_INFO *cs_from, CHARSET_INFO *cs_to, + uint32 *offset); + static bool needs_conversion_on_storage(size_t arg_length, + CHARSET_INFO *cs_from, + CHARSET_INFO *cs_to); + bool copy_aligned(const char *s, size_t arg_length, size_t offset, + CHARSET_INFO *cs); + bool set_or_copy_aligned(const char *s, size_t arg_length, CHARSET_INFO *cs); + bool can_be_safely_converted_to(CHARSET_INFO *tocs) const + { + if (charset() == &my_charset_bin) + return Well_formed_prefix(tocs, ptr(), length()).length() == length(); + String try_val; + uint try_conv_error= 0; + try_val.copy(ptr(), length(), charset(), tocs, &try_conv_error); + return try_conv_error == 0; + } + bool copy(const char*s, size_t arg_length, CHARSET_INFO *csfrom, + CHARSET_INFO *csto, uint *errors); + bool copy(const String *str, CHARSET_INFO *tocs, uint *errors) + { + return copy(str->ptr(), str->length(), str->charset(), tocs, errors); + } + bool copy(CHARSET_INFO *tocs, + CHARSET_INFO *fromcs, const char *src, size_t src_length, + size_t nchars, String_copier *copier) + { + if (unlikely(alloc(tocs->mbmaxlen * src_length))) + return true; + str_length= copier->well_formed_copy(tocs, Ptr, alloced_length(), + fromcs, src, (uint) src_length, + (uint) nchars); + set_charset(tocs); + return false; + } + // Append without character set conversion + bool append(const String &s) + { + return Binary_string::append(s); + } + inline bool append(char chr) + { + return Binary_string::append_char(chr); + } + bool append_hex(const char *src, uint32 srclen) + { + return Binary_string::append_hex(src, srclen); + } + bool append_hex(const uchar *src, uint32 srclen) + { + return Binary_string::append_hex((const char*)src, srclen); + } + bool append_introducer_and_hex(const String *str) + { + return + append('_') || + append(str->charset()->cs_name) || + append(STRING_WITH_LEN(" 0x")) || + append_hex(str->ptr(), (uint32) str->length()); + } + bool append(IO_CACHE* file, uint32 arg_length) + { + return Binary_string::append(file, arg_length); + } + inline bool append(const char *s, uint32 arg_length, uint32 step_alloc) + { + return append_with_step(s, arg_length, step_alloc); + } + + // Append with optional character set conversion from ASCII (e.g. to UCS2) + bool append(const LEX_STRING *ls) + { + DBUG_ASSERT(ls->length < UINT_MAX32 && + ((ls->length == 0 && !ls->str) || + ls->length == strlen(ls->str))); + return append(ls->str, (uint32) ls->length); + } + bool append(const LEX_CSTRING *ls) + { + DBUG_ASSERT(ls->length < UINT_MAX32 && + ((ls->length == 0 && !ls->str) || + ls->length == strlen(ls->str))); + return append(ls->str, (uint32) ls->length); + } + bool append(const LEX_CSTRING &ls) + { + return append(&ls); + } + bool append_name_value(const LEX_CSTRING &name, + const LEX_CSTRING &value, + uchar quot= '\0') + { + return + append(name) || + append('=') || + (quot && append(quot)) || + append(value) || + (quot && append(quot)); + } + bool append(const char *s, size_t size); + bool append_parenthesized(long nr, int radix= 10); + + // Append with optional character set conversion from cs to charset() + bool append(const char *s, size_t arg_length, CHARSET_INFO *cs); + bool append(const LEX_CSTRING &s, CHARSET_INFO *cs) + { + return append(s.str, s.length, cs); + } + + // Append a wide character + bool append_wc(my_wc_t wc) + { + if (reserve(mbmaxlen())) + return true; + int mblen= q_append_wc(wc, charset()); + if (mblen > 0) + return false; + else if (mblen == MY_CS_ILUNI && wc != '?') + return q_append_wc('?', charset()) <= 0; + return true; + } + + // Append a number with zero prefilling + bool append_zerofill(uint num, uint width) + { + static const char zeros[15]= "00000000000000"; + char intbuff[15]; + uint length= (uint) (int10_to_str(num, intbuff, 10) - intbuff); + if (length < width && + append(zeros, width - length, &my_charset_latin1)) + return true; + return append(intbuff, length, &my_charset_latin1); + } + + /* + Append a bitmask in an uint32 with a translation into a + C-style human readable representation, e.g.: + 0x05 -> "(flag04|flag01)" + + @param flags - the flags to translate + @param names - an array of flag names + @param count - the number of available elements in "names" + */ + bool append_flag32_names(uint32 flags, LEX_CSTRING names[], size_t count) + { + bool added= false; + if (flags && append('(')) + return true; + for (ulong i= 0; i <= 31; i++) + { + ulong bit= 31 - i; + if (flags & (1 << bit)) + { + if (added && append('|')) + return true; + if (bit < count ? append(names[bit]) : append('?')) + return true; + added= true; + } + } + if (flags && append(')')) + return true; + return false; + } + + void strip_sp(); + friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length); + friend class Field; + uint32 numchars() const + { + return (uint32) Charset::numchars(ptr(), end()); + } + int charpos(longlong i, uint32 offset=0) + { + if (i <= 0) + return (int) i; + return (int) Charset::charpos(ptr() + offset, end(), (size_t) i); + } + size_t lengthsp() const + { + return Charset::lengthsp(Ptr, str_length); + } + + void print(String *to) const; + void print_with_conversion(String *to, CHARSET_INFO *cs) const; + void print(String *to, CHARSET_INFO *cs) const + { + if (my_charset_same(charset(), cs)) + print(to); + else + print_with_conversion(to, cs); + } + + bool append_for_single_quote(const char *st, size_t len); + bool append_for_single_quote(const String *s) + { + return append_for_single_quote(s->ptr(), s->length()); + } + + void swap(String &s) + { + Charset::swap(s); + Binary_string::swap(s); + } + + uint well_formed_length() const + { + return (uint) Well_formed_prefix(charset(), ptr(), length()).length(); + } + bool is_ascii() const + { + if (length() == 0) + return TRUE; + if (charset()->mbminlen > 1) + return FALSE; + return !has_8bit_bytes(); + } + bool eq(const String *other, CHARSET_INFO *cs) const + { + return !sortcmp(this, other, cs); + } +private: + bool append_semi_hex(const char *s, uint len, CHARSET_INFO *cs); +}; + + +// The following class is a backport from MySQL 5.6: +/** + String class wrapper with a preallocated buffer of size buff_sz + + This class allows to replace sequences of: + char buff[12345]; + String str(buff, sizeof(buff)); + str.length(0); + with a simple equivalent declaration: + StringBuffer<12345> str; +*/ + +template +class StringBuffer : public String +{ + char buff[buff_sz]; + +public: + StringBuffer() : String(buff, buff_sz, &my_charset_bin) { length(0); } + explicit StringBuffer(CHARSET_INFO *cs) : String(buff, buff_sz, cs) + { + length(0); + } + void set_buffer_if_not_allocated(CHARSET_INFO *cs) + { + if (!is_alloced()) + { + Ptr= buff; + Alloced_length= (uint32) buff_sz; + } + str_length= 0; /* Safety, not required */ + /* One should set str_length before using it */ + MEM_UNDEFINED(&str_length, sizeof(str_length)); + set_charset(cs); + } +}; + + +template +class BinaryStringBuffer : public Binary_string +{ + char buff[buff_sz]; +public: + BinaryStringBuffer() : Binary_string(buff, buff_sz) { length(0); } +}; + +static inline bool check_if_only_end_space(CHARSET_INFO *cs, + const char *str, + const char *end) +{ + return str + cs->scan(str, end, MY_SEQ_SPACES) == end; +} + +int append_query_string(CHARSET_INFO *csinfo, String *to, + const char *str, size_t len, bool no_backslash); + +#endif /* SQL_STRING_INCLUDED */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc new file mode 100644 index 00000000..b33d2ff4 --- /dev/null +++ b/sql/sql_table.cc @@ -0,0 +1,12720 @@ +/* + Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* drop and alter of tables */ + +#include "mariadb.h" +#include "sql_class.h" +#include "sql_priv.h" +#include "unireg.h" +#include "debug_sync.h" +#include "sql_table.h" +#include "sql_parse.h" // test_if_data_home_dir +#include "sql_cache.h" // query_cache_* +#include "sql_base.h" // lock_table_names +#include "lock.h" // mysql_unlock_tables +#include "strfunc.h" // find_type2, find_set +#include "sql_truncate.h" // regenerate_locked_table +#include "ha_partition.h" // PAR_EXT + // mem_alloc_error, + // partition_info + // NOT_A_PARTITION_ID +#include "sql_db.h" // load_db_opt_by_name +#include "records.h" // init_read_record, end_read_record +#include "filesort.h" // filesort_free_buffers +#include "sql_select.h" // setup_order +#include "sql_handler.h" // mysql_ha_rm_tables +#include "discover.h" // readfrm +#include "my_pthread.h" // pthread_mutex_t +#include "log_event.h" // Query_log_event +#include "sql_statistics.h" +#include +#include +#include +#include "create_options.h" +#include "sp_head.h" +#include "sp.h" +#include "sql_trigger.h" +#include "sql_show.h" +#include "transaction.h" +#include "sql_audit.h" +#include "sql_sequence.h" +#include "tztime.h" +#include "sql_insert.h" // binlog_drop_table +#include "ddl_log.h" +#include "debug.h" // debug_crash_here() +#include +#include "rpl_mi.h" +#include "rpl_rli.h" +#include "log.h" + +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" + +/** RAII class for temporarily enabling wsrep_ctas in the connection. */ +class Enable_wsrep_ctas_guard +{ + public: + /** + @param thd - pointer to the context of connection in which + wsrep_ctas mode needs to be enabled. + @param ctas - true if this is CREATE TABLE AS SELECT and + wsrep_on + */ + explicit Enable_wsrep_ctas_guard(THD *thd, const bool ctas) + : m_thd(thd) + { + if (ctas) + thd->wsrep_ctas= true; + } + + ~Enable_wsrep_ctas_guard() + { + m_thd->wsrep_ctas= false; + } + private: + THD* m_thd; +}; + +#endif /* WITH_WSREP */ + +#include "sql_debug.h" + +#ifdef _WIN32 +#include +#endif + +const LEX_CSTRING primary_key_name= { STRING_WITH_LEN("PRIMARY") }; +static const LEX_CSTRING generated_by_server= +{ STRING_WITH_LEN(" /* generated by server */") }; +static const LEX_CSTRING SEQUENCE_clex_str= { STRING_WITH_LEN("SEQUENCE") }; +static const LEX_CSTRING TABLE_clex_str= { STRING_WITH_LEN("TABLE") }; + +static int check_if_keyname_exists(const char *name,KEY *start, KEY *end); +static char *make_unique_key_name(THD *, const char *, KEY *, KEY *); +static bool make_unique_constraint_name(THD *, LEX_CSTRING *, const char *, + List *, uint *); +static const char *make_unique_invisible_field_name(THD *, const char *, + List *); +static int copy_data_between_tables(THD *, TABLE *,TABLE *, bool, uint, + ORDER *, ha_rows *, ha_rows *, + Alter_info *, Alter_table_ctx *); +static int append_system_key_parts(THD *, HA_CREATE_INFO *, Key *); +static int mysql_prepare_create_table(THD *, HA_CREATE_INFO *, Alter_info *, + uint *, handler *, KEY **, uint *, int); +static uint blob_length_by_type(enum_field_types type); +static bool fix_constraints_names(THD *, List *, + const HA_CREATE_INFO *); +static bool wait_for_master(THD *thd); +static int process_master_state(THD *thd, int alter_result, + uint64 &start_alter_id, bool if_exists); +static bool +write_bin_log_start_alter_rollback(THD *thd, uint64 &start_alter_id, + bool &partial_alter, bool if_exists); + +/** + @brief Helper function for explain_filename + @param thd Thread handle + @param to_p Explained name in system_charset_info + @param end_p End of the to_p buffer + @param name Name to be converted + @param name_len Length of the name, in bytes +*/ +static char* add_identifier(THD* thd, char *to_p, const char * end_p, + const char* name, size_t name_len) +{ + uint res; + uint errors; + const char *conv_name, *conv_name_end; + char tmp_name[FN_REFLEN]; + char conv_string[FN_REFLEN]; + int quote; + + DBUG_ENTER("add_identifier"); + if (!name[name_len]) + conv_name= name; + else + { + strnmov(tmp_name, name, name_len); + tmp_name[name_len]= 0; + conv_name= tmp_name; + } + res= strconvert(&my_charset_filename, conv_name, name_len, + system_charset_info, + conv_string, FN_REFLEN, &errors); + if (unlikely(!res || errors)) + { + DBUG_PRINT("error", ("strconvert of '%s' failed with %u (errors: %u)", conv_name, res, errors)); + conv_name= name; + conv_name_end= name + name_len; + } + else + { + DBUG_PRINT("info", ("conv '%s' -> '%s'", conv_name, conv_string)); + conv_name= conv_string; + conv_name_end= conv_string + res; + } + + quote= (likely(thd) ? + get_quote_char_for_identifier(thd, conv_name, res - 1) : + '`'); + + if (quote != EOF && (end_p - to_p > 2)) + { + *(to_p++)= (char) quote; + while (*conv_name && (end_p - to_p - 1) > 0) + { + int length= system_charset_info->charlen(conv_name, conv_name_end); + if (length <= 0) + length= 1; + if (length == 1 && *conv_name == (char) quote) + { + if ((end_p - to_p) < 3) + break; + *(to_p++)= (char) quote; + *(to_p++)= *(conv_name++); + } + else if (((long) length) < (end_p - to_p)) + { + to_p= strnmov(to_p, conv_name, length); + conv_name+= length; + } + else + break; /* string already filled */ + } + if (end_p > to_p) { + *(to_p++)= (char) quote; + if (end_p > to_p) + *to_p= 0; /* terminate by NUL, but do not include it in the count */ + } + } + else + to_p= strnmov(to_p, conv_name, end_p - to_p); + DBUG_RETURN(to_p); +} + + +/** + @brief Explain a path name by split it to database, table etc. + + @details Break down the path name to its logic parts + (database, table, partition, subpartition). + filename_to_tablename cannot be used on partitions, due to the #P# part. + There can be up to 6 '#', #P# for partition, #SP# for subpartition + and #TMP# or #REN# for temporary or renamed partitions. + This should be used when something should be presented to a user in a + diagnostic, error etc. when it would be useful to know what a particular + file [and directory] means. Such as SHOW ENGINE STATUS, error messages etc. + + Examples: + + t1#P#p1 table t1 partition p1 + t1#P#p1#SP#sp1 table t1 partition p1 subpartition sp1 + t1#P#p1#SP#sp1#TMP# table t1 partition p1 subpartition sp1 temporary + t1#P#p1#SP#sp1#REN# table t1 partition p1 subpartition sp1 renamed + + @param thd Thread handle + @param from Path name in my_charset_filename + Null terminated in my_charset_filename, normalized + to use '/' as directory separation character. + @param to Explained name in system_charset_info + @param to_length Size of to buffer + @param explain_mode Requested output format. + EXPLAIN_ALL_VERBOSE -> + [Database `db`, ]Table `tbl`[,[ Temporary| Renamed] + Partition `p` [, Subpartition `sp`]] + EXPLAIN_PARTITIONS_VERBOSE -> `db`.`tbl` + [[ Temporary| Renamed] Partition `p` + [, Subpartition `sp`]] + EXPLAIN_PARTITIONS_AS_COMMENT -> `db`.`tbl` |* + [,[ Temporary| Renamed] Partition `p` + [, Subpartition `sp`]] *| + (| is really a /, and it is all in one line) + + @retval Length of returned string +*/ + +uint explain_filename(THD* thd, + const char *from, + char *to, + uint to_length, + enum_explain_filename_mode explain_mode) +{ + char *to_p= to; + char *end_p= to_p + to_length; + const char *db_name= NULL; + size_t db_name_len= 0; + const char *table_name; + size_t table_name_len= 0; + const char *part_name= NULL; + size_t part_name_len= 0; + const char *subpart_name= NULL; + size_t subpart_name_len= 0; + uint part_type= NORMAL_PART_NAME; + + const char *tmp_p; + DBUG_ENTER("explain_filename"); + DBUG_PRINT("enter", ("from '%s'", from)); + tmp_p= from; + table_name= from; + /* + If '/' then take last directory part as database. + '/' is the directory separator, not FN_LIB_CHAR + */ + while ((tmp_p= strchr(tmp_p, '/'))) + { + db_name= table_name; + /* calculate the length */ + db_name_len= (int)(tmp_p - db_name); + tmp_p++; + table_name= tmp_p; + } + tmp_p= table_name; + /* Look if there are partition tokens in the table name. */ + while ((tmp_p= strchr(tmp_p, '#'))) + { + tmp_p++; + switch (tmp_p[0]) { + case 'P': + case 'p': + if (tmp_p[1] == '#') + { + part_name= tmp_p + 2; + tmp_p+= 2; + } + break; + case 'S': + case 's': + if ((tmp_p[1] == 'P' || tmp_p[1] == 'p') && tmp_p[2] == '#') + { + part_name_len= (int)(tmp_p - part_name - 1); + subpart_name= tmp_p + 3; + tmp_p+= 3; + } + break; + case 'T': + case 't': + if ((tmp_p[1] == 'M' || tmp_p[1] == 'm') && + (tmp_p[2] == 'P' || tmp_p[2] == 'p') && + tmp_p[3] == '#' && !tmp_p[4]) + { + part_type= TEMP_PART_NAME; + tmp_p+= 4; + } + break; + case 'R': + case 'r': + if ((tmp_p[1] == 'E' || tmp_p[1] == 'e') && + (tmp_p[2] == 'N' || tmp_p[2] == 'n') && + tmp_p[3] == '#' && !tmp_p[4]) + { + part_type= RENAMED_PART_NAME; + tmp_p+= 4; + } + break; + default: + /* Not partition name part. */ + ; + } + } + if (part_name) + { + table_name_len= (int)(part_name - table_name - 3); + if (subpart_name) + subpart_name_len= strlen(subpart_name); + else + part_name_len= strlen(part_name); + if (part_type != NORMAL_PART_NAME) + { + if (subpart_name) + subpart_name_len-= 5; + else + part_name_len-= 5; + } + } + else + table_name_len= strlen(table_name); + if (db_name) + { + if (explain_mode == EXPLAIN_ALL_VERBOSE) + { + to_p= strnmov(to_p, ER_THD_OR_DEFAULT(thd, ER_DATABASE_NAME), + end_p - to_p); + *(to_p++)= ' '; + to_p= add_identifier(thd, to_p, end_p, db_name, db_name_len); + to_p= strnmov(to_p, ", ", end_p - to_p); + } + else + { + to_p= add_identifier(thd, to_p, end_p, db_name, db_name_len); + to_p= strnmov(to_p, ".", end_p - to_p); + } + } + if (explain_mode == EXPLAIN_ALL_VERBOSE) + { + to_p= strnmov(to_p, ER_THD_OR_DEFAULT(thd, ER_TABLE_NAME), end_p - to_p); + *(to_p++)= ' '; + to_p= add_identifier(thd, to_p, end_p, table_name, table_name_len); + } + else + to_p= add_identifier(thd, to_p, end_p, table_name, table_name_len); + if (part_name) + { + if (explain_mode == EXPLAIN_PARTITIONS_AS_COMMENT) + to_p= strnmov(to_p, " /* ", end_p - to_p); + else if (explain_mode == EXPLAIN_PARTITIONS_VERBOSE) + to_p= strnmov(to_p, " ", end_p - to_p); + else + to_p= strnmov(to_p, ", ", end_p - to_p); + if (part_type != NORMAL_PART_NAME) + { + if (part_type == TEMP_PART_NAME) + to_p= strnmov(to_p, ER_THD_OR_DEFAULT(thd, ER_TEMPORARY_NAME), + end_p - to_p); + else + to_p= strnmov(to_p, ER_THD_OR_DEFAULT(thd, ER_RENAMED_NAME), + end_p - to_p); + to_p= strnmov(to_p, " ", end_p - to_p); + } + to_p= strnmov(to_p, ER_THD_OR_DEFAULT(thd, ER_PARTITION_NAME), + end_p - to_p); + *(to_p++)= ' '; + to_p= add_identifier(thd, to_p, end_p, part_name, part_name_len); + if (subpart_name) + { + to_p= strnmov(to_p, ", ", end_p - to_p); + to_p= strnmov(to_p, ER_THD_OR_DEFAULT(thd, ER_SUBPARTITION_NAME), + end_p - to_p); + *(to_p++)= ' '; + to_p= add_identifier(thd, to_p, end_p, subpart_name, subpart_name_len); + } + if (explain_mode == EXPLAIN_PARTITIONS_AS_COMMENT) + to_p= strnmov(to_p, " */", end_p - to_p); + } + DBUG_PRINT("exit", ("to '%s'", to)); + DBUG_RETURN((uint)(to_p - to)); +} + + +/* + Translate a file name to a table name (WL #1324). + + SYNOPSIS + filename_to_tablename() + from The file name in my_charset_filename. + to OUT The table name in system_charset_info. + to_length The size of the table name buffer. + + RETURN + Table name length. +*/ + +uint filename_to_tablename(const char *from, char *to, size_t to_length, + bool stay_quiet) +{ + uint errors; + size_t res; + DBUG_ENTER("filename_to_tablename"); + DBUG_PRINT("enter", ("from '%s'", from)); + + res= strconvert(&my_charset_filename, from, FN_REFLEN, + system_charset_info, to, to_length, &errors); + if (unlikely(errors)) // Old 5.0 name + { + res= strxnmov(to, to_length, MYSQL50_TABLE_NAME_PREFIX, from, NullS) - to; + if (!stay_quiet) + sql_print_error("Invalid (old?) table or database name '%s'", from); + } + + DBUG_PRINT("exit", ("to '%s'", to)); + DBUG_RETURN((uint)res); +} + + +/** + Check if given string begins with "#mysql50#" prefix + + @param name string to check cut + + @retval + FALSE no prefix found + @retval + TRUE prefix found +*/ + +bool check_mysql50_prefix(const char *name) +{ + return (name[0] == '#' && + !strncmp(name, MYSQL50_TABLE_NAME_PREFIX, + MYSQL50_TABLE_NAME_PREFIX_LENGTH)); +} + + +/** + Check if given string begins with "#mysql50#" prefix, cut it if so. + + @param from string to check and cut + @param to[out] buffer for result string + @param to_length its size + + @retval + 0 no prefix found + @retval + non-0 result string length +*/ + +uint check_n_cut_mysql50_prefix(const char *from, char *to, size_t to_length) +{ + if (check_mysql50_prefix(from)) + return (uint) (strmake(to, from + MYSQL50_TABLE_NAME_PREFIX_LENGTH, + to_length - 1) - to); + return 0; +} + + +static bool check_if_frm_exists(char *path, const char *db, const char *table) +{ + fn_format(path, table, db, reg_ext, MYF(0)); + return !access(path, F_OK); +} + + +/* + Translate a table name to a file name (WL #1324). + + SYNOPSIS + tablename_to_filename() + from The table name in system_charset_info. + to OUT The file name in my_charset_filename. + to_length The size of the file name buffer. + + RETURN + File name length. +*/ + +uint tablename_to_filename(const char *from, char *to, size_t to_length) +{ + uint errors, length; + DBUG_ENTER("tablename_to_filename"); + DBUG_PRINT("enter", ("from '%s'", from)); + + if ((length= check_n_cut_mysql50_prefix(from, to, to_length))) + { + /* + Check if the name supplied is a valid mysql 5.0 name and + make the name a zero length string if it's not. + Note that just returning zero length is not enough : + a lot of places don't check the return value and expect + a zero terminated string. + */ + if (check_table_name(to, length, TRUE)) + { + to[0]= 0; + length= 0; + } + DBUG_RETURN(length); + } + length= strconvert(system_charset_info, from, FN_REFLEN, + &my_charset_filename, to, to_length, &errors); + if (check_if_legal_tablename(to) && + length + 4 < to_length) + { + memcpy(to + length, "@@@", 4); + length+= 3; + } + DBUG_PRINT("exit", ("to '%s'", to)); + DBUG_RETURN(length); +} + + +/* + Creates path to a file: mysql_data_dir/db/table.ext + + SYNOPSIS + build_table_filename() + buff Where to write result in my_charset_filename. + This may be the same as table_name. + bufflen buff size + db Database name in system_charset_info. + table_name Table name in system_charset_info. + ext File extension. + flags FN_FROM_IS_TMP or FN_TO_IS_TMP or FN_IS_TMP + table_name is temporary, do not change. + + NOTES + + Uses database and table name, and extension to create + a file name in mysql_data_dir. Database and table + names are converted from system_charset_info into "fscs". + Unless flags indicate a temporary table name. + 'db' is always converted. + 'ext' is not converted. + + The conversion suppression is required for ALTER TABLE. This + statement creates intermediate tables. These are regular + (non-temporary) tables with a temporary name. Their path names must + be derivable from the table name. So we cannot use + build_tmptable_filename() for them. + + RETURN + path length +*/ + +uint build_table_filename(char *buff, size_t bufflen, const char *db, + const char *table_name, const char *ext, uint flags) +{ + char dbbuff[FN_REFLEN]; + char tbbuff[FN_REFLEN]; + DBUG_ENTER("build_table_filename"); + DBUG_PRINT("enter", ("db: '%s' table_name: '%s' ext: '%s' flags: %x", + db, table_name, ext, flags)); + + (void) tablename_to_filename(db, dbbuff, sizeof(dbbuff)); + + /* + Check if this is a temporary table name. Allow it if a corresponding .frm + file exists. + */ + if (!(flags & FN_IS_TMP) && + is_prefix(table_name, tmp_file_prefix) && + strlen(table_name) < NAME_CHAR_LEN && + check_if_frm_exists(tbbuff, dbbuff, table_name)) + flags|= FN_IS_TMP; + + if (flags & FN_IS_TMP) // FN_FROM_IS_TMP | FN_TO_IS_TMP + strmake(tbbuff, table_name, sizeof(tbbuff)-1); + else + (void) tablename_to_filename(table_name, tbbuff, sizeof(tbbuff)); + + char *end = buff + bufflen; + char *pos= strnmov(buff, mysql_data_home, bufflen-3); + /* + Add FN_LIBCHAR if mysql_data_home does not include it + In most cases mysql_data_home is just '.' + */ + if (pos[-1] != FN_LIBCHAR) + *pos++= FN_LIBCHAR; + pos= strxnmov(pos, end - 2 - pos, dbbuff,NullS); + *pos++= FN_LIBCHAR; + *pos= 0; +#ifdef USE_SYMDIR + if (!(flags & SKIP_SYMDIR_ACCESS)) + { + unpack_dirname(buff, buff); + pos= strend(buff); + } +#endif + pos= strxnmov(pos, end - pos, tbbuff, ext, NullS); + + DBUG_PRINT("exit", ("buff: '%s'", buff)); + DBUG_RETURN((uint)(pos - buff)); +} + + +/** + Create path to a temporary table mysql_tmpdir/#sql-temptable-1234-12-1 + (i.e. to its .FRM file but without an extension). + + @param thd The thread handle. + @param buff Where to write result in my_charset_filename. + @param bufflen buff size + + @note + Uses current_pid, thread_id, and tmp_table counter to create + a file name in mysql_tmpdir. + + @return Path length. +*/ + +uint build_tmptable_filename(THD* thd, char *buff, size_t bufflen) +{ + DBUG_ENTER("build_tmptable_filename"); + + char *p= strnmov(buff, mysql_tmpdir, bufflen); + my_snprintf(p, bufflen - (p - buff), "/%s-temptable-%lx-%llx-%x", + tmp_file_prefix, current_pid, + thd->thread_id, thd->tmp_table++); + + if (lower_case_table_names) + { + /* Convert all except tmpdir to lower case */ + my_casedn_str(files_charset_info, p); + } + + size_t length= unpack_filename(buff, buff); + DBUG_PRINT("exit", ("buff: '%s'", buff)); + DBUG_RETURN((uint)length); +} + +/* + Create lower case paths for engines that requires them +*/ + +void build_lower_case_table_filename(char *buff, size_t bufflen, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + uint flags) +{ + char table_name[SAFE_NAME_LEN+1], db_name[SAFE_NAME_LEN+1]; + + DBUG_ASSERT(db->length <= SAFE_NAME_LEN && table->length <= SAFE_NAME_LEN); + + memcpy(db_name, db->str, db->length); + db_name[db->length]= 0; + my_casedn_str(files_charset_info, db_name); + + memcpy(table_name, table->str, table->length); + table_name[table->length]= 0; + my_casedn_str(files_charset_info, table_name); + + build_table_filename(buff, bufflen, db_name, table_name, "", + flags & FN_IS_TMP); +} + + +/** + @brief construct a temporary shadow file name. + + @details Make a shadow file name used by ALTER TABLE to construct the + modified table (with keeping the original). The modified table is then + moved back as original table. The name must start with the temp file + prefix so it gets filtered out by table files listing routines. + + @param[out] buff buffer to receive the constructed name + @param bufflen size of buff + @param lpt alter table data structure + + @retval path length +*/ + +uint build_table_shadow_filename(char *buff, size_t bufflen, + ALTER_PARTITION_PARAM_TYPE *lpt, + bool backup) +{ + char tmp_name[FN_REFLEN]; + my_snprintf(tmp_name, sizeof (tmp_name), "%s-%s-%lx-%s", tmp_file_prefix, + backup ? "backup" : "shadow", + (ulong) current_thd->thread_id, lpt->alter_info->table_name.str); + return build_table_filename(buff, bufflen, lpt->alter_info->db.str, tmp_name, + "", FN_IS_TMP); +} + + +/* + SYNOPSIS + mysql_write_frm() + lpt Struct carrying many parameters needed for this + method + flags Flags as defined below + WFRM_INITIAL_WRITE If set we need to prepare table before + creating the frm file + WFRM_INSTALL_SHADOW If set we should install the new frm + WFRM_KEEP_SHARE If set we know that the share is to be + retained and thus we should ensure share + object is correct, if not set we don't + set the new partition syntax string since + we know the share object is destroyed. + WFRM_PACK_FRM If set we should pack the frm file and delete + the frm file + + RETURN VALUES + TRUE Error + FALSE Success + + DESCRIPTION + A support method that creates a new frm file and in this process it + regenerates the partition data. It works fine also for non-partitioned + tables since it only handles partitioned data if it exists. +*/ + + +/* + TODO: Partitioning atomic DDL refactoring: WFRM_WRITE_SHADOW + should be merged with create_table_impl(frm_only == true). +*/ +bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags) +{ + /* + Prepare table to prepare for writing a new frm file where the + partitions in add/drop state have temporarily changed their state + We set tmp_table to avoid get errors on naming of primary key index. + */ + int error= 0; + char path[FN_REFLEN+1]; + char shadow_path[FN_REFLEN+1]; + char shadow_frm_name[FN_REFLEN+1]; + char frm_name[FN_REFLEN+1]; +#ifdef WITH_PARTITION_STORAGE_ENGINE + char bak_path[FN_REFLEN+1]; + char bak_frm_name[FN_REFLEN+1]; + char *part_syntax_buf; + uint syntax_len; + partition_info *part_info= lpt->part_info; +#endif + DBUG_ENTER("mysql_write_frm"); + + /* + Build shadow frm file name + */ + build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt); + strxmov(shadow_frm_name, shadow_path, reg_ext, NullS); + if (flags & WFRM_WRITE_SHADOW) + { + if (mysql_prepare_create_table(lpt->thd, lpt->create_info, lpt->alter_info, + &lpt->db_options, lpt->table->file, + &lpt->key_info_buffer, &lpt->key_count, + C_ALTER_TABLE)) + { + DBUG_RETURN(TRUE); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + partition_info *part_info= lpt->table->part_info; + if (part_info) + { + part_syntax_buf= generate_partition_syntax_for_frm(lpt->thd, part_info, + &syntax_len, lpt->create_info, lpt->alter_info); + if (!part_syntax_buf) + DBUG_RETURN(TRUE); + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + } + } +#endif + /* Write shadow frm file */ + lpt->create_info->table_options= lpt->db_options; + LEX_CUSTRING frm= build_frm_image(lpt->thd, lpt->alter_info->table_name, + lpt->create_info, + lpt->alter_info->create_list, + lpt->key_count, lpt->key_info_buffer, + lpt->table->file); + if (!frm.str) + { + error= 1; + goto end; + } + + int error= writefile(shadow_frm_name, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, + lpt->create_info->tmp_table(), frm.str, frm.length); + my_free(const_cast(frm.str)); + + if (unlikely(error) || + unlikely(lpt->table->file-> + ha_create_partitioning_metadata(shadow_path, + NULL, CHF_CREATE_FLAG))) + { + mysql_file_delete(key_file_frm, shadow_frm_name, MYF(0)); + error= 1; + goto end; + } + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (flags & WFRM_WRITE_CONVERTED_TO) + { + THD *thd= lpt->thd; + Alter_table_ctx *alter_ctx= lpt->alter_ctx; + HA_CREATE_INFO *create_info= lpt->create_info; + + LEX_CSTRING new_path= { alter_ctx->get_new_path(), 0 }; + partition_info *work_part_info= thd->work_part_info; + handlerton *db_type= create_info->db_type; + DBUG_ASSERT(lpt->table->part_info); + DBUG_ASSERT(lpt->table->part_info == part_info); + handler *file= ((ha_partition *)(lpt->table->file))->get_child_handlers()[0]; + DBUG_ASSERT(file); + new_path.length= strlen(new_path.str); + strxnmov(frm_name, sizeof(frm_name) - 1, new_path.str, reg_ext, NullS); + create_info->alias= alter_ctx->table_name; + thd->work_part_info= NULL; + create_info->db_type= work_part_info->default_engine_type; + /* NOTE: partitioned temporary tables are not supported. */ + DBUG_ASSERT(!create_info->tmp_table()); + if (ddl_log_create_table(part_info, create_info->db_type, &new_path, + &alter_ctx->new_db, &alter_ctx->new_name, true) || + ERROR_INJECT("create_before_create_frm")) + DBUG_RETURN(TRUE); + + if (mysql_prepare_create_table(thd, create_info, lpt->alter_info, + &lpt->db_options, file, + &lpt->key_info_buffer, &lpt->key_count, + C_ALTER_TABLE)) + DBUG_RETURN(TRUE); + + lpt->create_info->table_options= lpt->db_options; + LEX_CUSTRING frm= build_frm_image(thd, alter_ctx->new_name, create_info, + lpt->alter_info->create_list, + lpt->key_count, lpt->key_info_buffer, + file); + if (unlikely(!frm.str)) + DBUG_RETURN(TRUE); + + thd->work_part_info= work_part_info; + create_info->db_type= db_type; + + ERROR_INJECT("alter_partition_after_create_frm"); + + error= writefile(frm_name, alter_ctx->new_db.str, alter_ctx->new_name.str, + create_info->tmp_table(), frm.str, frm.length); + my_free((void *) frm.str); + if (unlikely(error) || ERROR_INJECT("alter_partition_after_write_frm")) + { + mysql_file_delete(key_file_frm, frm_name, MYF(0)); + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(false); + } + if (flags & WFRM_BACKUP_ORIGINAL) + { + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + strxnmov(frm_name, sizeof(frm_name), path, reg_ext, NullS); + + build_table_shadow_filename(bak_path, sizeof(bak_path) - 1, lpt, true); + strxmov(bak_frm_name, bak_path, reg_ext, NullS); + + DDL_LOG_MEMORY_ENTRY *main_entry= part_info->main_entry; + mysql_mutex_lock(&LOCK_gdl); + if (write_log_replace_frm(lpt, part_info->list->entry_pos, + (const char*) bak_path, + (const char*) path) || + ddl_log_write_execute_entry(part_info->list->entry_pos, + &part_info->execute_entry)) + { + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(TRUE); + } + mysql_mutex_unlock(&LOCK_gdl); + part_info->main_entry= main_entry; + if (mysql_file_rename(key_file_frm, frm_name, bak_frm_name, MYF(MY_WME))) + DBUG_RETURN(TRUE); + if (lpt->table->file->ha_create_partitioning_metadata(bak_path, path, + CHF_RENAME_FLAG)) + DBUG_RETURN(TRUE); + } +#else /* !WITH_PARTITION_STORAGE_ENGINE */ + DBUG_ASSERT(!(flags & WFRM_BACKUP_ORIGINAL)); +#endif /* !WITH_PARTITION_STORAGE_ENGINE */ + if (flags & WFRM_INSTALL_SHADOW) + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= lpt->part_info; +#endif + /* + Build frm file name + */ + build_table_filename(path, sizeof(path) - 1, lpt->alter_info->db.str, + lpt->alter_info->table_name.str, "", 0); + strxnmov(frm_name, sizeof(frm_name), path, reg_ext, NullS); + /* + When we are changing to use new frm file we need to ensure that we + don't collide with another thread in process to open the frm file. + We start by deleting the .frm file and possible .par file. Then we + write to the DDL log that we have completed the delete phase by + increasing the phase of the log entry. Next step is to rename the + new .frm file and the new .par file to the real name. After + completing this we write a new phase to the log entry that will + deactivate it. + */ + if (!(flags & WFRM_BACKUP_ORIGINAL) && ( + mysql_file_delete(key_file_frm, frm_name, MYF(MY_WME)) +#ifdef WITH_PARTITION_STORAGE_ENGINE + || lpt->table->file->ha_create_partitioning_metadata(path, shadow_path, + CHF_DELETE_FLAG) || + ddl_log_increment_phase(part_info->main_entry->entry_pos) || + (ddl_log_sync(), FALSE) +#endif + )) + { + error= 1; + goto err; + } + if (mysql_file_rename(key_file_frm, shadow_frm_name, frm_name, MYF(MY_WME)) +#ifdef WITH_PARTITION_STORAGE_ENGINE + || lpt->table->file->ha_create_partitioning_metadata(path, shadow_path, + CHF_RENAME_FLAG) +#endif + ) + { + error= 1; + goto err; + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info && (flags & WFRM_KEEP_SHARE)) + { + TABLE_SHARE *share= lpt->table->s; + char *tmp_part_syntax_str; + part_syntax_buf= generate_partition_syntax_for_frm(lpt->thd, + part_info, &syntax_len, lpt->create_info, lpt->alter_info); + if (!part_syntax_buf) + { + error= 1; + goto err; + } + if (share->partition_info_buffer_size < syntax_len + 1) + { + share->partition_info_buffer_size= syntax_len+1; + if (!(tmp_part_syntax_str= (char*) strmake_root(&share->mem_root, + part_syntax_buf, + syntax_len))) + { + error= 1; + goto err; + } + share->partition_info_str= tmp_part_syntax_str; + } + else + memcpy((char*) share->partition_info_str, part_syntax_buf, + syntax_len + 1); + share->partition_info_str_len= part_info->part_info_len= syntax_len; + part_info->part_info_string= part_syntax_buf; + } +#endif + +err: +#ifdef WITH_PARTITION_STORAGE_ENGINE + ddl_log_increment_phase(part_info->main_entry->entry_pos); + part_info->main_entry= NULL; + (void) ddl_log_sync(); +#endif + ; + } + +end: + DBUG_RETURN(error); +} + + +/* + SYNOPSIS + write_bin_log() + thd Thread object + clear_error is clear_error to be called + query Query to log + query_length Length of query + is_trans if the event changes either + a trans or non-trans engine. + + RETURN VALUES + NONE + + DESCRIPTION + Write the binlog if open, routine used in multiple places in this + file +*/ + +int write_bin_log(THD *thd, bool clear_error, + char const *query, ulong query_length, bool is_trans) +{ + int error= 0; + if (mysql_bin_log.is_open()) + { + int errcode= 0; + thd_proc_info(thd, "Writing to binlog"); + if (clear_error) + { + if (global_system_variables.log_warnings > 2) + { + uint err_clear= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0; + if (err_clear) + sql_print_warning("Error code %d of query '%s' is cleared at its " + "binary logging.", err_clear, query); + } + thd->clear_error(); + } + else + errcode= query_error_code(thd, TRUE); + error= thd->binlog_query(THD::STMT_QUERY_TYPE, + query, query_length, is_trans, FALSE, FALSE, + errcode) > 0; + thd_proc_info(thd, 0); + } + return error; +} + + +/** + Write to binary log the query event whose text is taken from thd->query(). + + @param thd Thread handle. + @param clear_error Whether to clear out any error from + execution context and binlog event. + @param is_trans Whether the query changed transactional data. + @param add_if_exists True indicates the binary logging of the query + should be done with "if exists" option. + @param commit_alter Whether the query should be binlogged as + Commit Alter. + + @return false on Success + @return true otherwise +*/ + +int write_bin_log_with_if_exists(THD *thd, bool clear_error, + bool is_trans, bool add_if_exists, + bool commit_alter) +{ + int result; + ulonglong save_option_bits= thd->variables.option_bits; + if (add_if_exists) + thd->variables.option_bits|= OPTION_IF_EXISTS; + if (commit_alter) + thd->set_binlog_flags_for_alter(Gtid_log_event::FL_COMMIT_ALTER_E1); + + result= write_bin_log(thd, clear_error, thd->query(), thd->query_length(), + is_trans); + if (commit_alter) + { + thd->set_binlog_flags_for_alter(0); + thd->set_binlog_start_alter_seq_no(0); + } + thd->variables.option_bits= save_option_bits; + return result; +} + + +/* + delete (drop) tables. + + SYNOPSIS + mysql_rm_table() + thd Thread handle + tables List of tables to delete + if_exists If 1, don't give error if one table doesn't exists + drop_temporary 1 if DROP TEMPORARY + drop_sequence 1 if DROP SEQUENCE + dont_log_query 1 if no write to binary log and no send of ok + + NOTES + Will delete all tables that can be deleted and give a compact error + messages for tables that could not be deleted. + If a table is in use, we will wait for all users to free the table + before dropping it + + Wait if global_read_lock (FLUSH TABLES WITH READ LOCK) is set, but + not if under LOCK TABLES. + + RETURN + FALSE OK. In this case ok packet is sent to user + TRUE Error + +*/ + +bool mysql_rm_table(THD *thd,TABLE_LIST *tables, bool if_exists, + bool drop_temporary, bool drop_sequence, + bool dont_log_query) +{ + bool error; + Drop_table_error_handler err_handler; + TABLE_LIST *table; + DBUG_ENTER("mysql_rm_table"); + + /* Disable drop of enabled log tables, must be done before name locking */ + for (table= tables; table; table= table->next_local) + { + if (check_if_log_table(table, TRUE, "DROP")) + DBUG_RETURN(true); + } + + if (!drop_temporary) + { + if (!thd->locked_tables_mode) + { + if (drop_sequence) + { + /* We are trying to drop a sequence. + Change all temporary tables that are not sequences to + normal tables so that we can try to drop them instead. + If we don't do this, we will get an error 'not a sequence' + when trying to drop a sequence that is hidden by a temporary + table. + */ + for (table= tables; table; table= table->next_global) + { + if (table->open_type == OT_TEMPORARY_OR_BASE && + is_temporary_table(table) && !table->table->s->sequence) + { + thd->mark_tmp_table_as_free_for_reuse(table->table); + table->table= NULL; + } + } + } + if (lock_table_names(thd, tables, NULL, + thd->variables.lock_wait_timeout, 0)) + DBUG_RETURN(true); + } + else + { + for (table= tables; table; table= table->next_local) + { + if (is_temporary_table(table)) + { + /* + A temporary table. + + Don't try to find a corresponding MDL lock or assign it + to table->mdl_request.ticket. There can't be metadata + locks for temporary tables: they are local to the session. + + Later in this function we release the MDL lock only if + table->mdl_requeset.ticket is not NULL. Thus here we + ensure that we won't release the metadata lock on the base + table locked with LOCK TABLES as a side effect of temporary + table drop. + */ + DBUG_ASSERT(table->mdl_request.ticket == NULL); + } + else + { + /* + Not a temporary table. + + Since 'tables' list can't contain duplicates (this is ensured + by parser) it is safe to cache pointer to the TABLE instances + in its elements. + */ + table->table= find_table_for_mdl_upgrade(thd, table->db.str, + table->table_name.str, NULL); + if (!table->table) + DBUG_RETURN(true); + table->mdl_request.ticket= table->table->mdl_ticket; + } + } + } + /* We remove statistics for table last, after we have the DDL lock */ + for (table= tables; table; table= table->next_local) + { + LEX_CSTRING db_name= table->db; + LEX_CSTRING table_name= table->table_name; + if (!is_temporary_table(table)) + (void) delete_statistics_for_table(thd, &db_name, &table_name); + } + } + + /* mark for close and remove all cached entries */ + thd->push_internal_handler(&err_handler); + error= mysql_rm_table_no_locks(thd, tables, &thd->db, (DDL_LOG_STATE*) 0, + if_exists, + drop_temporary, + false, drop_sequence, dont_log_query, + false); + thd->pop_internal_handler(); + + if (unlikely(error)) + DBUG_RETURN(TRUE); + if (!dont_log_query) + my_ok(thd); + DBUG_RETURN(FALSE); +} + + +/** + Find the comment in the query. + That's auxiliary function to be used handling DROP TABLE [comment]. + + @param thd Thread handler + @param comment_pos How many characters to skip before the comment. + Can be either 9 for DROP TABLE or + 17 for DROP TABLE IF EXISTS + @param comment_start returns the beginning of the comment if found. + + @retval 0 no comment found + @retval >0 the lenght of the comment found + +*/ +static uint32 get_comment(THD *thd, uint32 comment_pos, + const char **comment_start) +{ + /* We use uchar * here to make array indexing portable */ + const uchar *query= (uchar*) thd->query(); + const uchar *query_end= (uchar*) query + thd->query_length(); + const uchar *const state_map= thd->charset()->state_map; + + *comment_start= ""; // Ensure it points to something + for (; query < query_end; query++) + { + if (state_map[static_cast(*query)] == MY_LEX_SKIP) + continue; + if (comment_pos-- == 0) + break; + } + if (query > query_end - 3 /* comment can't be shorter than 4 */ || + state_map[static_cast(*query)] != MY_LEX_LONG_COMMENT || query[1] != '*') + return 0; + + *comment_start= (char*) query; + + for (query+= 3; query < query_end; query++) + { + if (query[-1] == '*' && query[0] == '/') + return (uint32)((char*) query - *comment_start + 1); + } + return 0; +} + +/** + Execute the drop of a sequence, view or table (normal or temporary). + + @param thd Thread handler + @param tables Tables to drop + @param current_db Current database, used for ddl logs + @param ddl_log_state DDL log state, for global ddl logging (used by + DROP DATABASE. If not set, an internal ddl log state + will be used. If set then the caller must call + ddl_log_complete(ddl_log_state); + @param if_exists If set, don't give an error if table doesn't exists. + In this case we give an warning of level 'NOTE' + @param drop_temporary Only drop temporary tables + @param drop_view Allow to delete VIEW .frm + @param dont_log_query Don't write query to log files. This will also not + generate warnings if the handler files doesn't exists + @param dont_free_locks Don't do automatic UNLOCK TABLE if no more locked + tables + + @retval 0 ok + @retval 1 Error + @retval -1 Thread was killed + + @note This function assumes that metadata locks have already been taken. + It is also assumed that the tables have been removed from TDC. + + @note This function assumes that temporary tables to be dropped have + been pre-opened using corresponding table list elements. + + @todo When logging to the binary log, we should log + tmp_tables and transactional tables as separate statements if we + are in a transaction; This is needed to get these tables into the + cached binary log that is only written on COMMIT. + The current code only writes DROP statements that only uses temporary + tables to the cache binary log. This should be ok on most cases, but + not all. +*/ + +int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, + const LEX_CSTRING *current_db, + DDL_LOG_STATE *ddl_log_state, + bool if_exists, + bool drop_temporary, bool drop_view, + bool drop_sequence, + bool dont_log_query, + bool dont_free_locks) +{ + TABLE_LIST *table; + char path[FN_REFLEN + 1]; + LEX_CSTRING alias= null_clex_str; + LEX_CUSTRING version; + LEX_CSTRING partition_engine_name= {NULL, 0}; + StringBuffer<160> unknown_tables(system_charset_info); + DDL_LOG_STATE local_ddl_log_state; + const char *comment_start; + uint table_count= 0, non_temp_tables_count= 0; + int error= 0; + uint32 comment_len; + bool trans_tmp_table_deleted= 0, non_trans_tmp_table_deleted= 0; + bool is_drop_tmp_if_exists_added= 0, non_tmp_table_deleted= 0; + bool log_if_exists= if_exists; + const LEX_CSTRING *object_to_drop= ((drop_sequence) ? + &SEQUENCE_clex_str : + &TABLE_clex_str); + String normal_tables; + String built_trans_tmp_query, built_non_trans_tmp_query; + DBUG_ENTER("mysql_rm_table_no_locks"); + + if (!ddl_log_state) + { + ddl_log_state= &local_ddl_log_state; + bzero(ddl_log_state, sizeof(*ddl_log_state)); + } + + unknown_tables.length(0); + comment_len= get_comment(thd, if_exists ? 17:9, &comment_start); + + /* + Prepares the drop statements that will be written into the binary + log as follows: + + 1 - If we are not processing a "DROP TEMPORARY" it prepares a + "DROP". + + 2 - A "DROP" may result in a "DROP TEMPORARY" but the opposite is + not true. + + 3 - If the current format is row, the IF EXISTS token needs to be + appended because one does not know if CREATE TEMPORARY was previously + written to the binary log. + + 4 - Add the IF_EXISTS token if necessary, i.e. if_exists is TRUE. + + 5 - For temporary tables, there is a need to differentiate tables + in transactional and non-transactional storage engines. For that, + reason, two types of drop statements are prepared. + + The need to different the type of tables when dropping a temporary + table stems from the fact that such drop does not commit an ongoing + transaction and changes to non-transactional tables must be written + ahead of the transaction in some circumstances. + + 6- Slave SQL thread ignores all replicate-* filter rules + for temporary tables with 'IF EXISTS' clause. (See sql/sql_parse.cc: + mysql_execute_command() for details). These commands will be binlogged + as they are, even if the default database (from USE `db`) is not present + on the Slave. This can cause point in time recovery failures later + when user uses the slave's binlog to re-apply. Hence at the time of binary + logging, these commands will be written with fully qualified table names + and use `db` will be suppressed. + */ + + normal_tables.set_charset(thd->charset()); + if (!dont_log_query) + { + built_trans_tmp_query.set_charset(system_charset_info); + built_trans_tmp_query.append(STRING_WITH_LEN("DROP TEMPORARY ")); + built_trans_tmp_query.append(object_to_drop); + built_trans_tmp_query.append(' '); + if (thd->is_current_stmt_binlog_format_row() || if_exists) + { + is_drop_tmp_if_exists_added= true; + built_trans_tmp_query.append(STRING_WITH_LEN("IF EXISTS ")); + } + built_non_trans_tmp_query.set_charset(system_charset_info); + built_non_trans_tmp_query.copy(built_trans_tmp_query); + } + + for (table= tables; table; table= table->next_local) + { + bool is_trans= 0, temporary_table_was_dropped= 0; + bool table_creation_was_logged= 0; + bool wrong_drop_sequence= 0; + bool table_dropped= 0, res; + bool is_temporary= 0; + const LEX_CSTRING db= table->db; + const LEX_CSTRING table_name= table->table_name; + LEX_CSTRING cpath= {0,0}; + handlerton *hton= 0; + Table_type table_type; + size_t path_length= 0; + char *path_end= 0; + error= 0; + + DBUG_PRINT("table", ("table_l: '%s'.'%s' table: %p s: %p", + db.str, table_name.str, table->table, + table->table ? table->table->s : NULL)); + + /* + If we are in locked tables mode and are dropping a temporary table, + the ticket should be NULL to ensure that we don't release a lock + on a base table later. + */ + DBUG_ASSERT(!(thd->locked_tables_mode && + table->open_type != OT_BASE_ONLY && + thd->find_temporary_table(table) && + table->mdl_request.ticket != NULL)); + + if (drop_sequence && table->table && + table->table->s->table_type != TABLE_TYPE_SEQUENCE) + { + if (if_exists) + { + char buff[FN_REFLEN]; + String tbl_name(buff, sizeof(buff), system_charset_info); + tbl_name.length(0); + tbl_name.append(&db); + tbl_name.append('.'); + tbl_name.append(&table->table_name); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_NOT_SEQUENCE2, ER_THD(thd, ER_NOT_SEQUENCE2), + tbl_name.c_ptr_safe()); + + /* + Our job is done here. This statement was added to avoid executing + unnecessary code farther below which in some strange corner cases + caused the server to crash (see MDEV-17896). + */ + continue; + } + /* "DROP SEQUENCE" but a sequence table was not found */ + unknown_tables.append(&db); + unknown_tables.append('.'); + unknown_tables.append(&table_name); + unknown_tables.append(','); + error= ENOENT; + continue; + } + + /* First try to delete temporary tables and temporary sequences */ + if ((table->open_type != OT_BASE_ONLY && is_temporary_table(table))) + { + table_creation_was_logged= table->table->s->table_creation_was_logged; + if (thd->drop_temporary_table(table->table, &is_trans, true)) + error= 1; + else + { + table->table= 0; + temporary_table_was_dropped= 1; + } + is_temporary= 1; + } + + if ((drop_temporary && if_exists) || temporary_table_was_dropped) + { + /* + This handles the case of temporary tables. We have the following cases: + + - "DROP TEMPORARY" was executed and table was dropped + temporary_table_was_dropped == 1 + - "DROP TEMPORARY IF EXISTS" was specified but no temporary table + existed + temporary_table_was_dropped == 0 + */ + if (!dont_log_query && table_creation_was_logged) + { + if (is_trans) + trans_tmp_table_deleted= TRUE; + else + non_trans_tmp_table_deleted= TRUE; + + String *built_ptr_query= + (is_trans ? &built_trans_tmp_query : &built_non_trans_tmp_query); + /* + Write the database name if it is not the current one or if + thd->db is NULL or 'IF EXISTS' clause is present in 'DROP TEMPORARY' + query. + */ + if (thd->db.str == NULL || cmp(&db, &thd->db) || + is_drop_tmp_if_exists_added ) + { + append_identifier(thd, built_ptr_query, &db); + built_ptr_query->append('.'); + } + append_identifier(thd, built_ptr_query, &table_name); + built_ptr_query->append(','); + } + /* + This means that a temporary table was droped and as such there + is no need to proceed with the code that tries to drop a regular + table. + */ + if (temporary_table_was_dropped) + continue; + } + else if (!drop_temporary) + { + non_temp_tables_count++; + + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db.str, + table_name.str, MDL_SHARED)); + + alias= (lower_case_table_names == 2) ? table->alias : table_name; + /* remove .frm file and engine files */ + path_length= build_table_filename(path, sizeof(path) - 1, db.str, + alias.str, reg_ext, 0); + path_end= path + path_length - reg_ext_length; + } + + DEBUG_SYNC(thd, "rm_table_no_locks_before_delete_table"); + if (drop_temporary) + { + /* "DROP TEMPORARY" but a temporary table was not found */ + unknown_tables.append(&db); + unknown_tables.append('.'); + unknown_tables.append(&table_name); + unknown_tables.append(','); + error= ENOENT; + continue; + } + + lex_string_set3(&cpath, path, (size_t) (path_end - path)); + + { + char engine_buf[NAME_CHAR_LEN + 1]; + LEX_CSTRING engine= { engine_buf, 0 }; + + table_type= dd_frm_type(thd, path, &engine, &partition_engine_name, + &version); + if (table_type == TABLE_TYPE_NORMAL || table_type == TABLE_TYPE_SEQUENCE) + { + plugin_ref p= plugin_lock_by_name(thd, &engine, + MYSQL_STORAGE_ENGINE_PLUGIN); + hton= p ? plugin_hton(p) : NULL; + } + // note that for TABLE_TYPE_VIEW and TABLE_TYPE_UNKNOWN hton == NULL + } + + thd->replication_flags= 0; + const bool was_view= table_type == TABLE_TYPE_VIEW; + + if (!table_count++) + { + LEX_CSTRING comment= {comment_start, (size_t) comment_len}; + if (ddl_log_drop_table_init(ddl_log_state, current_db, &comment)) + { + error= 1; + goto err; + } + } + + if ((table_type == TABLE_TYPE_UNKNOWN) || (was_view && !drop_view) || + (table_type != TABLE_TYPE_SEQUENCE && drop_sequence)) + { + /* + One of the following cases happened: + . "DROP" but table was not found + . "DROP TABLE" statement, but it's a view. + . "DROP SEQUENCE", but it's not a sequence + */ + wrong_drop_sequence= drop_sequence && hton; + error= table_type == TABLE_TYPE_UNKNOWN ? ENOENT : -1; + tdc_remove_table(thd, db.str, table_name.str); + if (wrong_drop_sequence) + goto report_error; + } + else + { +#ifdef WITH_WSREP + if (WSREP(thd) && hton && !wsrep_should_replicate_ddl(thd, hton)) + { + error= 1; + goto err; + } +#endif + + if (thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) + { + if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED)) + { + error= -1; + goto err; + } + close_all_tables_for_name(thd, table->table->s, + HA_EXTRA_PREPARE_FOR_DROP, NULL); + table->table= 0; + } + else + tdc_remove_table(thd, db.str, table_name.str); + + /* Check that we have an exclusive lock on the table to be dropped. */ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db.str, + table_name.str, MDL_EXCLUSIVE)); + + // Remove extension for delete + *path_end= '\0'; + + if (hton && hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) + log_if_exists= 1; + + bool enoent_warning= !dont_log_query && !(hton && hton->discover_table); + + if (was_view) + res= ddl_log_drop_view(ddl_log_state, &cpath, &db, + &table_name); + else + res= ddl_log_drop_table(ddl_log_state, hton, &cpath, &db, + &table_name); + if (res) + { + error= -1; + goto err; + } + + debug_crash_here("ddl_log_drop_before_delete_table"); + error= ha_delete_table(thd, hton, path, &db, &table_name, + enoent_warning); + debug_crash_here("ddl_log_drop_after_delete_table"); + + if (!error) + table_dropped= 1; + else if (error < 0) + error= 0; // Table didn't exists + else if (error) + { + if (thd->is_killed()) + { + error= -1; + goto err; + } + } + + /* + Delete the .frm file if we managed to delete the table from the + engine or the table didn't exists in the engine + */ + strmov(path_end, reg_ext); + if ((likely(!error) || non_existing_table_error(error)) && + !access(path, F_OK)) + { + int frm_delete_error= 0; + /* Delete the table definition file */ + if (hton && (hton->discover_table || error)) + { + /* + Table type is using discovery and may not need a .frm file + or the .frm file existed but no table in engine. + Delete it silently if it exists + */ + if (mysql_file_delete(key_file_frm, path, + MYF(MY_WME | MY_IGNORE_ENOENT))) + frm_delete_error= my_errno; + } + else if (unlikely(mysql_file_delete(key_file_frm, path, + !error ? MYF(MY_WME) : + MYF(MY_WME | MY_IGNORE_ENOENT)))) + { + frm_delete_error= my_errno; + DBUG_ASSERT(frm_delete_error); + } + + if (frm_delete_error) + { + /* Remember unexpected error from dropping the .frm file */ + error= frm_delete_error; + } + else + { + error= 0; // We succeeded to delete the frm + table_dropped= 1; + } + } + } + + /* + If there was no .frm file and the table is not temporary, + scan all engines try to drop the table from there. + This is to ensure we don't have any partial table files left. + */ + if (non_existing_table_error(error)) + { + int ferror= 0; + DBUG_ASSERT(!was_view); + + if (ddl_log_drop_table(ddl_log_state, 0, &cpath, &db, + &table_name)) + { + error= -1; + goto err; + } + + /* Remove extension for delete */ + *path_end= '\0'; + ferror= ha_delete_table_force(thd, path, &db, &table_name); + if (!ferror) + { + table_dropped= 1; + error= 0; + } + if (ferror <= 0) + { + ferror= 0; // Ignore table not found + + /* Delete the frm file again (just in case it was rediscovered) */ + strmov(path_end, reg_ext); + if (mysql_file_delete(key_file_frm, path, MYF(MY_WME|MY_IGNORE_ENOENT))) + ferror= my_errno; + } + if (!error) + error= ferror; + } + + /* + This may be set + - by the storage engine in handler::delete_table() + - when deleting a table without .frm file: delete_table_force() will + check if the storage engine that had the table had + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE flag + */ + if (thd->replication_flags & OPTION_IF_EXISTS) + log_if_exists= 1; + + if (!was_view) + { + debug_crash_here("ddl_log_drop_before_drop_trigger"); + ddl_log_update_phase(ddl_log_state, DDL_DROP_PHASE_TRIGGER); + debug_crash_here("ddl_log_drop_before_drop_trigger2"); + } + + if (likely(!error) || non_existing_table_error(error)) + { + if (Table_triggers_list::drop_all_triggers(thd, &db, &table_name, + MYF(MY_WME | MY_IGNORE_ENOENT))) + error= error ? error : -1; + } + debug_crash_here("ddl_log_drop_after_drop_trigger"); + +report_error: + if (error) + { + StringBuffer tbl_name(system_charset_info); + uint is_note= (if_exists && (was_view || wrong_drop_sequence) ? + ME_NOTE : 0); + + tbl_name.length(0); + tbl_name.append(&db); + tbl_name.append('.'); + tbl_name.append(&table_name); + + if (!non_existing_table_error(error) || is_note) + { + /* + Error from engine already given. Here we only have to take + care about errors for trying to drop view or sequence + */ + if (was_view) + my_error(ER_IT_IS_A_VIEW, MYF(is_note), tbl_name.c_ptr_safe()); + else if (wrong_drop_sequence) + my_error(ER_NOT_SEQUENCE2, MYF(is_note), tbl_name.c_ptr_safe()); + if (is_note) + error= ENOENT; + } + else + { + if (unknown_tables.append(tbl_name) || unknown_tables.append(',')) + { + error= 1; + goto err; + } + } + } + + /* + Don't give an error if we are using IF EXISTS for a table that + didn't exists + */ + if (if_exists && non_existing_table_error(error)) + error= 0; + + if (!error && table_dropped) + { + PSI_CALL_drop_table_share(temporary_table_was_dropped, + db.str, (uint)db.length, + table_name.str, (uint)table_name.length); + mysql_audit_drop_table(thd, table); + if (!is_temporary) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("DROP") }; + if ((ddl_log.org_partitioned= (partition_engine_name.str != 0))) + ddl_log.org_storage_engine_name= partition_engine_name; + else + lex_string_set(&ddl_log.org_storage_engine_name, + ha_resolve_storage_engine_name(hton)); + ddl_log.org_database= table->db; + ddl_log.org_table= table->table_name; + ddl_log.org_table_id= version; + backup_log_ddl(&ddl_log); + } + } + if (!was_view) + ddl_log_update_phase(ddl_log_state, DDL_DROP_PHASE_BINLOG); + + if (!dont_log_query && + (!error || table_dropped || non_existing_table_error(error))) + { + non_tmp_table_deleted|= (if_exists || table_dropped); + /* + Don't write the database name if it is the current one (or if + thd->db is NULL). + */ + if (thd->db.str == NULL || cmp(&db, &thd->db) != 0) + { + append_identifier(thd, &normal_tables, &db); + normal_tables.append('.'); + } + + append_identifier(thd, &normal_tables, &table_name); + normal_tables.append(','); + } + DBUG_PRINT("table", ("table: %p s: %p", table->table, + table->table ? table->table->s : NULL)); + } + DEBUG_SYNC(thd, "rm_table_no_locks_before_binlog"); + thd->used|= THD::THREAD_SPECIFIC_USED; + error= 0; + +err: + if (unknown_tables.length() > 1) + { + uint is_note= if_exists ? ME_NOTE : 0; + unknown_tables.chop(); + my_error((drop_sequence ? ER_UNKNOWN_SEQUENCES : ER_BAD_TABLE_ERROR), + MYF(is_note), unknown_tables.c_ptr_safe()); + } + error= thd->is_error(); + + if (non_temp_tables_count) + query_cache_invalidate3(thd, tables, 0); + + /* + We are always logging drop of temporary tables. + The reason is to handle the following case: + - Use statement based replication + - CREATE TEMPORARY TABLE foo (logged) + - set row based replication + - DROP TEMPORARY TABLE foo (needs to be logged) + This should be fixed so that we remember if creation of the + temporary table was logged and only log it if the creation was + logged. + */ + + if (non_trans_tmp_table_deleted || + trans_tmp_table_deleted || non_tmp_table_deleted) + { + if (non_trans_tmp_table_deleted || trans_tmp_table_deleted) + thd->transaction->stmt.mark_dropped_temp_table(); + + if (!dont_log_query && mysql_bin_log.is_open()) + { + debug_crash_here("ddl_log_drop_before_binlog"); + if (non_trans_tmp_table_deleted) + { + /* Chop of the last comma */ + built_non_trans_tmp_query.chop(); + built_non_trans_tmp_query.append(generated_by_server); + error |= (thd->binlog_query(THD::STMT_QUERY_TYPE, + built_non_trans_tmp_query.ptr(), + built_non_trans_tmp_query.length(), + FALSE, FALSE, + is_drop_tmp_if_exists_added, + 0) > 0); + } + if (trans_tmp_table_deleted) + { + /* Chop of the last comma */ + built_trans_tmp_query.chop(); + built_trans_tmp_query.append(generated_by_server); + error |= (thd->binlog_query(THD::STMT_QUERY_TYPE, + built_trans_tmp_query.ptr(), + built_trans_tmp_query.length(), + TRUE, FALSE, + is_drop_tmp_if_exists_added, + 0) > 0); + } + if (non_tmp_table_deleted) + { + String built_query; + + built_query.set_charset(thd->charset()); + built_query.append(STRING_WITH_LEN("DROP ")); + built_query.append(object_to_drop); + built_query.append(' '); + if (log_if_exists) + built_query.append(STRING_WITH_LEN("IF EXISTS ")); + + /* Preserve comment in original query */ + if (comment_len) + { + built_query.append(comment_start, comment_len); + built_query.append(' '); + } + + /* Chop of the last comma */ + normal_tables.chop(); + built_query.append(normal_tables.ptr(), normal_tables.length()); + built_query.append(generated_by_server); + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(ddl_log_state, thd->binlog_xid); + error |= (thd->binlog_query(THD::STMT_QUERY_TYPE, + built_query.ptr(), + built_query.length(), + TRUE, FALSE, FALSE, 0) > 0); + thd->binlog_xid= 0; + } + debug_crash_here("ddl_log_drop_after_binlog"); + } + } + if (ddl_log_state == &local_ddl_log_state) + ddl_log_complete(ddl_log_state); + + if (!drop_temporary) + { + /* + Under LOCK TABLES we should release meta-data locks on the tables + which were dropped. + + Leave LOCK TABLES mode if we managed to drop all tables which were + locked. Additional check for 'non_temp_tables_count' is to avoid + leaving LOCK TABLES mode if we have dropped only temporary tables. + */ + if (thd->locked_tables_mode) + { + if (thd->lock && thd->lock->table_count == 0 && + non_temp_tables_count > 0 && !dont_free_locks) + { + if (thd->locked_tables_list.unlock_locked_tables(thd)) + error= 1; + goto end; + } + for (table= tables; table; table= table->next_local) + { + /* Drop locks for all successfully dropped tables. */ + if (table->table == NULL && table->mdl_request.ticket) + { + /* + Under LOCK TABLES we may have several instances of table open + and locked and therefore have to remove several metadata lock + requests associated with them. + */ + thd->mdl_context.release_all_locks_for_name(table->mdl_request.ticket); + } + } + } + /* + Rely on the caller to implicitly commit the transaction + and release metadata locks. + */ + } + +end: + DBUG_RETURN(error || thd->is_error()); +} + + +/** + Log the drop of a table. + + @param thd Thread handler + @param db_name Database name + @param table_name Table name + @param temporary_table 1 if table was a temporary table + + This code is only used in the case of failed CREATE OR REPLACE TABLE + when the original table was dropped but we could not create the new one. +*/ + +bool log_drop_table(THD *thd, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name, + const LEX_CSTRING *handler_name, + bool partitioned, + const LEX_CUSTRING *id, + bool temporary_table) +{ + char buff[NAME_LEN*2 + 80]; + String query(buff, sizeof(buff), system_charset_info); + bool error= 0; + DBUG_ENTER("log_drop_table"); + + if (mysql_bin_log.is_open()) + { + query.length(0); + query.append(STRING_WITH_LEN("DROP ")); + if (temporary_table) + query.append(STRING_WITH_LEN("TEMPORARY ")); + query.append(STRING_WITH_LEN("TABLE IF EXISTS ")); + append_identifier(thd, &query, db_name); + query.append('.'); + append_identifier(thd, &query, table_name); + query.append(STRING_WITH_LEN("/* Generated to handle " + "failed CREATE OR REPLACE */")); + + /* + In case of temporary tables we don't have to log the database name + in the binary log. We log this for non temporary tables, as the slave + may use a filter to ignore queries for a specific database. + */ + error= thd->binlog_query(THD::STMT_QUERY_TYPE, + query.ptr(), query.length(), + FALSE, FALSE, temporary_table, 0) > 0; + } + if (!temporary_table) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("DROP_AFTER_CREATE") }; + ddl_log.org_storage_engine_name= *handler_name; + ddl_log.org_partitioned= partitioned; + ddl_log.org_database= *db_name; + ddl_log.org_table= *table_name; + ddl_log.org_table_id= *id; + backup_log_ddl(&ddl_log); + } + DBUG_RETURN(error); +} + + +/** + Quickly remove a table, without any logging + + @param thd Thread context. + @param base The handlerton handle. + @param db The database name. + @param table_name The table name. + @param flags Flags for build_table_filename() as well as describing + if handler files / .FRM should be deleted as well. + + @return False in case of success, True otherwise. +*/ + +bool quick_rm_table(THD *thd, handlerton *base, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, uint flags, + const char *table_path) +{ + char path[FN_REFLEN + 1]; + const size_t pathmax = sizeof(path) - 1 - reg_ext_length; + int error= 0; + DBUG_ENTER("quick_rm_table"); + + size_t path_length= table_path ? + (strxnmov(path, pathmax, table_path, NullS) - path) : + build_table_filename(path, pathmax, db->str, table_name->str, "", flags); + if ((flags & (NO_HA_TABLE | NO_PAR_TABLE)) == NO_HA_TABLE) + { + handler *file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root, base); + if (!file) + DBUG_RETURN(true); + (void) file->ha_create_partitioning_metadata(path, NULL, CHF_DELETE_FLAG); + delete file; + } + if (!(flags & (FRM_ONLY|NO_HA_TABLE))) + error|= ha_delete_table(thd, base, path, db, table_name, 0) > 0; + + if (!(flags & NO_FRM_RENAME)) + { + memcpy(path + path_length, reg_ext, reg_ext_length + 1); + if (mysql_file_delete(key_file_frm, path, MYF(0))) + error= 1; /* purecov: inspected */ + } + + if (likely(error == 0)) + { + PSI_CALL_drop_table_share(flags & FN_IS_TMP, db->str, (uint)db->length, + table_name->str, (uint)table_name->length); + } + + DBUG_RETURN(error); +} + + +/* + Sort keys in the following order: + - PRIMARY KEY + - UNIQUE keys where all column are NOT NULL + - UNIQUE keys that don't contain partial segments + - Other UNIQUE keys + - LONG UNIQUE keys + - Normal keys + - Fulltext keys + + This will make checking for duplicated keys faster and ensure that + PRIMARY keys are prioritized. +*/ + +static int sort_keys(KEY *a, KEY *b) +{ + ulong a_flags= a->flags, b_flags= b->flags; + + /* + Do not reorder LONG_HASH indexes, because they must match the order + of their LONG_UNIQUE_HASH_FIELD's. + */ + if (a->algorithm == HA_KEY_ALG_LONG_HASH && + b->algorithm == HA_KEY_ALG_LONG_HASH) + return a->usable_key_parts - b->usable_key_parts; + + if (a_flags & HA_NOSAME) + { + if (!(b_flags & HA_NOSAME)) + return -1; + /* + Long Unique keys should always be last unique key. + Before this patch they used to change order wrt to partial keys (MDEV-19049) + */ + if (a->algorithm == HA_KEY_ALG_LONG_HASH) + return 1; + if (b->algorithm == HA_KEY_ALG_LONG_HASH) + return -1; + if ((a_flags ^ b_flags) & HA_NULL_PART_KEY) + { + /* Sort NOT NULL keys before other keys */ + return (a_flags & HA_NULL_PART_KEY) ? 1 : -1; + } + if (a->name.str == primary_key_name.str) + return -1; + if (b->name.str == primary_key_name.str) + return 1; + /* Sort keys don't containing partial segments before others */ + if ((a_flags ^ b_flags) & HA_KEY_HAS_PART_KEY_SEG) + return (a_flags & HA_KEY_HAS_PART_KEY_SEG) ? 1 : -1; + } + else if (b_flags & HA_NOSAME) + return 1; // Prefer b + + if ((a_flags ^ b_flags) & HA_FULLTEXT) + { + return (a_flags & HA_FULLTEXT) ? 1 : -1; + } + /* + Prefer original key order. usable_key_parts contains here + the original key position. + */ + return a->usable_key_parts - b->usable_key_parts; +} + +/* + Check TYPELIB (set or enum) for duplicates + + SYNOPSIS + check_duplicates_in_interval() + set_or_name "SET" or "ENUM" string for warning message + name name of the checked column + typelib list of values for the column + dup_val_count returns count of duplicate elements + + DESCRIPTION + This function prints an warning for each value in list + which has some duplicates on its right + + RETURN VALUES + 0 ok + 1 Error +*/ + +bool check_duplicates_in_interval(const char *set_or_name, + const char *name, const TYPELIB *typelib, + CHARSET_INFO *cs, unsigned int *dup_val_count) +{ + TYPELIB tmp= *typelib; + const char **cur_value= typelib->type_names; + unsigned int *cur_length= typelib->type_lengths; + *dup_val_count= 0; + + for ( ; tmp.count > 1; cur_value++, cur_length++) + { + tmp.type_names++; + tmp.type_lengths++; + tmp.count--; + if (find_type2(&tmp, (const char*)*cur_value, *cur_length, cs)) + { + THD *thd= current_thd; + ErrConvString err(*cur_value, *cur_length, cs); + if (current_thd->is_strict_mode()) + { + my_error(ER_DUPLICATED_VALUE_IN_TYPE, MYF(0), + name, err.ptr(), set_or_name); + return 1; + } + push_warning_printf(thd,Sql_condition::WARN_LEVEL_NOTE, + ER_DUPLICATED_VALUE_IN_TYPE, + ER_THD(thd, ER_DUPLICATED_VALUE_IN_TYPE), + name, err.ptr(), set_or_name); + (*dup_val_count)++; + } + } + return 0; +} + + +/* + Resolves the column collation if: + - it was not typed at all, or + - it was contextually typed + according to the table level character set. + Generates an error to the diagnostics area in case of a failure. +*/ +bool Column_definition:: + prepare_charset_for_string(const Column_derived_attributes *dattr) +{ + CHARSET_INFO *tmp= charset_collation_attrs(). + resolved_to_character_set(dattr->charset()); + if (!tmp) + return true; + charset= tmp; + /* + Remove the "is contextually typed collation" indicator on success, + for safety. + */ + flags&= ~CONTEXT_COLLATION_FLAG; + return false; +} + + +bool Column_definition::prepare_stage2_blob(handler *file, + ulonglong table_flags, + uint field_flags) +{ + if (table_flags & HA_NO_BLOBS) + { + my_error(ER_TABLE_CANT_HANDLE_BLOB, MYF(0), file->table_type()); + return true; + } + pack_flag= field_flags | + pack_length_to_packflag(pack_length - portable_sizeof_char_ptr); + if (charset->state & MY_CS_BINSORT) + pack_flag|= FIELDFLAG_BINARY; + length= 8; // Unireg field length + return false; +} + + +bool Column_definition::prepare_stage2_typelib(const char *type_name, + uint field_flags, + uint *dup_val_count) +{ + pack_flag= pack_length_to_packflag(pack_length) | field_flags; + if (charset->state & MY_CS_BINSORT) + pack_flag|= FIELDFLAG_BINARY; + return check_duplicates_in_interval(type_name, field_name.str, interval, + charset, dup_val_count); +} + + +uint Column_definition::pack_flag_numeric() const +{ + return (FIELDFLAG_NUMBER | + (flags & UNSIGNED_FLAG ? 0 : FIELDFLAG_DECIMAL) | + (flags & ZEROFILL_FLAG ? FIELDFLAG_ZEROFILL : 0)); +} + + +bool Column_definition::prepare_stage2_varchar(ulonglong table_flags) +{ + pack_flag= (charset->state & MY_CS_BINSORT) ? FIELDFLAG_BINARY : 0; + return false; +} + + +/* + Prepare a Column_definition instance for packing + Members such as pack_flag are valid after this call. + + @param IN handler - storage engine handler, + or NULL if preparing for an SP variable + @param IN table_flags - table flags + + @retval false - ok + @retval true - error (not supported type, bad definition, etc) +*/ + +bool Column_definition::prepare_stage2(handler *file, + ulonglong table_flags) +{ + DBUG_ENTER("Column_definition::prepare_stage2"); + + /* + This code came from mysql_prepare_create_table. + Indent preserved to make patching easier + */ + DBUG_ASSERT(charset); + + if (type_handler()->Column_definition_prepare_stage2(this, file, table_flags)) + DBUG_RETURN(true); + + if (!(flags & NOT_NULL_FLAG) || + (vcol_info)) /* Make virtual columns allow NULL values */ + pack_flag|= FIELDFLAG_MAYBE_NULL; + if (flags & NO_DEFAULT_VALUE_FLAG) + pack_flag|= FIELDFLAG_NO_DEFAULT; + DBUG_RETURN(false); +} + + +/** + Modifies the first column definition whose SQL type is TIMESTAMP + by adding the features DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP. + + If the first TIMESTAMP column appears to be nullable, or to have an + explicit default, or to be a virtual column, or to be part of table period, + then no promotion is done. + + @param column_definitions The list of column definitions, in the physical + order in which they appear in the table. +*/ + +void promote_first_timestamp_column(List *column_definitions) +{ + bool first= true; + for (Create_field &column_definition : *column_definitions) + { + if (column_definition.is_timestamp_type() || // TIMESTAMP + column_definition.unireg_check == Field::TIMESTAMP_OLD_FIELD) // Legacy + { + if (!column_definition.explicitly_nullable) + column_definition.flags|= NOT_NULL_FLAG; + DBUG_PRINT("info", ("field-ptr:%p", column_definition.field)); + if (first && + (column_definition.flags & NOT_NULL_FLAG) != 0 && // NOT NULL, + column_definition.default_value == NULL && // no constant default, + column_definition.unireg_check == Field::NONE && // no function default + column_definition.vcol_info == NULL && + column_definition.period == NULL && + !(column_definition.flags & VERS_SYSTEM_FIELD)) // column isn't generated + { + DBUG_PRINT("info", ("First TIMESTAMP column '%s' was promoted to " + "DEFAULT CURRENT_TIMESTAMP ON UPDATE " + "CURRENT_TIMESTAMP", + column_definition.field_name.str + )); + column_definition.unireg_check= Field::TIMESTAMP_DNUN_FIELD; + } + first= false; + } + } +} + +static bool key_cmp(const Key_part_spec &a, const Key_part_spec &b) +{ + return a.length == b.length && a.asc == b.asc && + !lex_string_cmp(system_charset_info, &a.field_name, &b.field_name); +} + +/** + Check if there is a duplicate key. Report a warning for every duplicate key. + + @param thd Thread context. + @param key Key to be checked. + @param key_info Key meta-data info. + @param key_list List of existing keys. +*/ +static void check_duplicate_key(THD *thd, const Key *key, const KEY *key_info, + const List *key_list) +{ + /* + We only check for duplicate indexes if it is requested and the + key is not auto-generated. + + Check is requested if the key was explicitly created or altered + by the user (unless it's a foreign key). + */ + if (key->old || key->type == Key::FOREIGN_KEY || key->generated) + return; + + for (const Key &k : *key_list) + { + // Looking for a similar key... + + if (&k == key) + break; + + if (k.generated || + (key->type != k.type) || + (key->key_create_info.algorithm != k.key_create_info.algorithm) || + (key->columns.elements != k.columns.elements)) + { + // Keys are different. + continue; + } + + if (std::equal(key->columns.begin(), key->columns.end(), k.columns.begin(), + key_cmp)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, ER_DUP_INDEX, + ER_THD(thd, ER_DUP_INDEX), key_info->name.str); + return; + } + } +} + + +bool Column_definition::prepare_stage1_typelib(THD *thd, + MEM_ROOT *mem_root, + column_definition_type_t deftype) +{ + /* + Pass the last parameter to prepare_interval_field() as follows: + - If we are preparing for an SP variable, we pass "false", + to force allocation and full copying of TYPELIB values on the given + mem_root, even if no character set conversion is needed. This is needed + because a life cycle of an SP variable is longer than the current query. + + - If we are preparing for a CREATE TABLE, we pass "true". + This will create the typelib in runtime memory - we will free the + occupied memory at the same time when we free this + sql_field -- at the end of execution. + Pass "true" as the last argument to reuse "interval_list" + values in "interval" in cases when no character conversion is needed, + to avoid extra copying. + */ + if (prepare_interval_field(mem_root, + deftype == COLUMN_DEFINITION_TABLE_FIELD)) + return true; // E.g. wrong values with commas: SET('a,b') + create_length_to_internal_length_typelib(); + + if (default_value && default_value->expr->basic_const_item()) + { + if ((charset != default_value->expr->collation.collation && + prepare_stage1_convert_default(thd, mem_root, charset)) || + prepare_stage1_check_typelib_default()) + return true; + } + return false; +} + + +bool Column_definition::prepare_stage1_string(THD *thd, + MEM_ROOT *mem_root) +{ + create_length_to_internal_length_string(); + if (prepare_blob_field(thd)) + return true; + /* + Convert the default value from client character + set into the column character set if necessary. + We can only do this for constants as we have not yet run fix_fields. + But not for blobs, as they will be stored as SQL expressions, not + written down into the record image. + */ + if (!(flags & BLOB_FLAG) && default_value && + default_value->expr->basic_const_item() && + charset != default_value->expr->collation.collation) + { + if (prepare_stage1_convert_default(thd, mem_root, charset)) + return true; + } + return false; +} + + +bool Column_definition::prepare_stage1_bit(THD *thd, + MEM_ROOT *mem_root) +{ + pack_flag= FIELDFLAG_NUMBER; + create_length_to_internal_length_bit(); + return false; +} + + +bool Column_definition::prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + column_definition_type_t deftype, + const Column_derived_attributes + *derived_attr) +{ + // SP variables have no default_value + DBUG_ASSERT(deftype == COLUMN_DEFINITION_TABLE_FIELD || !default_value); + + return type_handler()->Column_definition_prepare_stage1(thd, mem_root, + this, deftype, + derived_attr); +} + + +bool Column_definition::prepare_stage1_convert_default(THD *thd, + MEM_ROOT *mem_root, + CHARSET_INFO *cs) +{ + DBUG_ASSERT(thd->mem_root == mem_root); + Item *item; + if (!(item= default_value->expr->safe_charset_converter(thd, cs))) + { + my_error(ER_INVALID_DEFAULT, MYF(0), field_name.str); + return true; // Could not convert + } + /* Fix for prepare statement */ + thd->change_item_tree(&default_value->expr, item); + return false; +} + + +bool Column_definition::prepare_stage1_check_typelib_default() +{ + StringBuffer str; + String *def= default_value->expr->val_str(&str); + bool not_found; + if (def == NULL) /* SQL "NULL" maps to NULL */ + { + not_found= flags & NOT_NULL_FLAG; + } + else + { + not_found= false; + if (real_field_type() == MYSQL_TYPE_SET) + { + char *not_used; + uint not_used2; + find_set(interval, def->ptr(), def->length(), + charset, ¬_used, ¬_used2, ¬_found); + } + else /* MYSQL_TYPE_ENUM */ + { + def->length(charset->lengthsp(def->ptr(), def->length())); + not_found= !find_type2(interval, def->ptr(), def->length(), charset); + } + } + if (not_found) + { + my_error(ER_INVALID_DEFAULT, MYF(0), field_name.str); + return true; + } + return false; +} +/* + This function adds a invisible field to field_list + SYNOPSIS + mysql_add_invisible_field() + thd Thread Object + field_list list of all table fields + field_name name/prefix of invisible field + ( Prefix in the case when it is + *INVISIBLE_FULL* + and given name is duplicate) + type_handler field data type + invisible + default value + RETURN VALUE + Create_field pointer +*/ +int mysql_add_invisible_field(THD *thd, List * field_list, + const char *field_name, Type_handler *type_handler, + field_visibility_t invisible, Item* default_value) +{ + Create_field *fld= new(thd->mem_root)Create_field(); + const char *new_name= NULL; + /* Get unique field name if invisible == INVISIBLE_FULL */ + if (invisible == INVISIBLE_FULL) + { + if ((new_name= make_unique_invisible_field_name(thd, field_name, + field_list))) + { + fld->field_name.str= new_name; + fld->field_name.length= strlen(new_name); + } + else + return 1; //Should not happen + } + else + { + fld->field_name.str= thd->strmake(field_name, strlen(field_name)); + fld->field_name.length= strlen(field_name); + } + fld->set_handler(type_handler); + fld->invisible= invisible; + if (default_value) + { + Virtual_column_info *v= new (thd->mem_root) Virtual_column_info(); + v->expr= default_value; + v->utf8= 0; + fld->default_value= v; + } + field_list->push_front(fld, thd->mem_root); + return 0; +} + +#define LONG_HASH_FIELD_NAME_LENGTH 30 +static inline void make_long_hash_field_name(LEX_CSTRING *buf, uint num) +{ + buf->length= my_snprintf((char *)buf->str, + LONG_HASH_FIELD_NAME_LENGTH, "DB_ROW_HASH_%u", num); +} + +/** + Add fully invisible hash field to table in case of long + unique column + @param thd Thread Context. + @param create_list List of table fields. + @param key_info current long unique key info +*/ +static Create_field * add_hash_field(THD * thd, List *create_list, + KEY *key_info) +{ + List_iterator it(*create_list); + Create_field *dup_field, *cf= new (thd->mem_root) Create_field(); + cf->flags|= UNSIGNED_FLAG | LONG_UNIQUE_HASH_FIELD; + cf->decimals= 0; + cf->length= cf->char_length= cf->pack_length= HA_HASH_FIELD_LENGTH; + cf->invisible= INVISIBLE_FULL; + cf->pack_flag|= FIELDFLAG_MAYBE_NULL; + cf->vcol_info= new (thd->mem_root) Virtual_column_info(); + cf->vcol_info->stored_in_db= false; + uint num= 1; + LEX_CSTRING field_name; + field_name.str= (char *)thd->alloc(LONG_HASH_FIELD_NAME_LENGTH); + make_long_hash_field_name(&field_name, num); + /* + Check for collisions + */ + while ((dup_field= it++)) + { + if (!my_strcasecmp(system_charset_info, field_name.str, dup_field->field_name.str)) + { + num++; + make_long_hash_field_name(&field_name, num); + it.rewind(); + } + } + cf->field_name= field_name; + cf->set_handler(&type_handler_slonglong); + key_info->algorithm= HA_KEY_ALG_LONG_HASH; + create_list->push_back(cf,thd->mem_root); + return cf; +} + +Key * +mysql_add_invisible_index(THD *thd, List *key_list, + LEX_CSTRING* field_name, enum Key::Keytype type) +{ + Key *key= new (thd->mem_root) Key(type, &null_clex_str, HA_KEY_ALG_UNDEF, + false, DDL_options(DDL_options::OPT_NONE)); + key->columns.push_back(new(thd->mem_root) Key_part_spec(field_name, 0, true), + thd->mem_root); + key_list->push_back(key, thd->mem_root); + return key; +} + + +bool Type_handler_string::Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) + const +{ + /* + Set length to 0. It's set to the real column width later for CHAR. + It has to be the correct col width for CHAR, as its data are not + prefixed with length (unlike blobs). + */ + part->length= 0; + return !Charset(def.charset).is_good_for_ft(); +} + + +bool Type_handler_varchar::Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) + const +{ + part->length= 0; + return !Charset(def.charset).is_good_for_ft(); +} + + +bool +Type_handler_blob_common::Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) + const +{ + /* + Set keyseg length to 1 for blobs. + It's ignored in ft code: the data length is taken from the length prefix. + */ + part->length= 1; + return !Charset(def.charset).is_good_for_ft(); +} + + +static bool +key_add_part_check_null(const handler *file, KEY *key_info, + const Column_definition *sql_field, + const Key_part_spec *column) +{ + if (!(sql_field->flags & NOT_NULL_FLAG)) + { + key_info->flags|= HA_NULL_PART_KEY; + if (!(file->ha_table_flags() & HA_NULL_IN_KEY)) + { + my_error(ER_NULL_COLUMN_IN_INDEX, MYF(0), column->field_name.str); + return true; + } + } + return false; +} + + +/* + Prepare for a table creation. + Stage 1: prepare the field list. +*/ +static bool mysql_prepare_create_table_stage1(THD *thd, + HA_CREATE_INFO *create_info, + Alter_info *alter_info) +{ + DBUG_ENTER("mysql_prepare_create_table_stage1"); + const Column_derived_attributes dattr(create_info->default_table_charset); + const Column_bulk_alter_attributes + battr(create_info->alter_table_convert_to_charset); + Create_field *sql_field; + List_iterator_fast it(alter_info->create_list); + + DBUG_EXECUTE_IF("test_pseudo_invisible",{ + mysql_add_invisible_field(thd, &alter_info->create_list, + "invisible", &type_handler_slong, INVISIBLE_SYSTEM, + new (thd->mem_root)Item_int(thd, 9)); + }); + DBUG_EXECUTE_IF("test_completely_invisible",{ + mysql_add_invisible_field(thd, &alter_info->create_list, + "invisible", &type_handler_slong, INVISIBLE_FULL, + new (thd->mem_root)Item_int(thd, 9)); + }); + DBUG_EXECUTE_IF("test_invisible_index",{ + LEX_CSTRING temp; + temp.str= "invisible"; + temp.length= strlen("invisible"); + mysql_add_invisible_index(thd, &alter_info->key_list + , &temp, Key::MULTIPLE); + }); + + + for ( ; (sql_field=it++) ; ) + { + /* Virtual fields are always NULL */ + if (sql_field->vcol_info) + sql_field->flags&= ~NOT_NULL_FLAG; + + /* + Initialize length from its original value (number of characters), + which was set in the parser. This is necessary if we're + executing a prepared statement for the second time. + */ + sql_field->length= sql_field->char_length; + + if (sql_field->bulk_alter(&dattr, &battr)) + DBUG_RETURN(true); + + if (sql_field->prepare_stage1(thd, thd->mem_root, + COLUMN_DEFINITION_TABLE_FIELD, + &dattr)) + DBUG_RETURN(true); + + DBUG_ASSERT(sql_field->charset); + + if (check_column_name(sql_field->field_name.str)) + { + my_error(ER_WRONG_COLUMN_NAME, MYF(0), sql_field->field_name.str); + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(false); +} + + +/* + Preparation for table creation, final stage. + + SYNOPSIS + mysql_prepare_create_table_finalize() + thd Thread object. + create_info Create information (like MAX_ROWS). + alter_info List of columns and indexes to create + db_options INOUT Table options (like HA_OPTION_PACK_RECORD). + file The handler for the new table. + key_info_buffer OUT An array of KEY structs for the indexes. + key_count OUT The number of elements in the array. + create_table_mode C_ORDINARY_CREATE, C_ALTER_TABLE, + C_CREATE_SELECT, C_ASSISTED_DISCOVERY + + DESCRIPTION + Prepares the table and key structures for table creation. + + NOTES + sets create_info->varchar if the table has a varchar + + RETURN VALUES + FALSE OK + TRUE error +*/ + +static int +mysql_prepare_create_table_finalize(THD *thd, HA_CREATE_INFO *create_info, + Alter_info *alter_info, uint *db_options, + handler *file, KEY **key_info_buffer, + uint *key_count, int create_table_mode) +{ + const char *key_name; + Create_field *sql_field,*dup_field; + uint field,null_fields,max_key_length; + ulong record_offset= 0; + KEY_PART_INFO *key_part_info; + int field_no,dup_no; + int select_field_pos,auto_increment=0; + List_iterator_fast it(alter_info->create_list); + List_iterator it2(alter_info->create_list); + uint total_uneven_bit_length= 0; + int select_field_count= C_CREATE_SELECT(create_table_mode); + bool tmp_table= create_table_mode == C_ALTER_TABLE; + const bool create_simple= thd->lex->create_simple(); + bool is_hash_field_needed= false; + const CHARSET_INFO *scs= system_charset_info; + DBUG_ENTER("mysql_prepare_create_table"); + + LEX_CSTRING* connstr = &create_info->connect_string; + if (connstr->length > CONNECT_STRING_MAXLEN && + scs->charpos(connstr->str, connstr->str + connstr->length, + CONNECT_STRING_MAXLEN) < connstr->length) + { + my_error(ER_WRONG_STRING_LENGTH, MYF(0), connstr->str, "CONNECTION", + CONNECT_STRING_MAXLEN); + DBUG_RETURN(TRUE); + } + + select_field_pos= alter_info->create_list.elements - select_field_count; + null_fields= 0; + create_info->varchar= 0; + max_key_length= file->max_key_length(); + + /* Handle creation of sequences */ + if (create_info->sequence) + { + if (!(file->ha_table_flags() & HA_CAN_TABLES_WITHOUT_ROLLBACK)) + { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), file->table_type(), + SEQUENCE_clex_str.str); + DBUG_RETURN(TRUE); + } + + /* The user specified fields: check that structure is ok */ + if (check_sequence_fields(thd->lex, &alter_info->create_list, + alter_info->db, alter_info->table_name)) + DBUG_RETURN(TRUE); + } + + + for (field_no=0; (sql_field=it++) ; field_no++) + { + if (!(sql_field->flags & NOT_NULL_FLAG)) + null_fields++; + + if (sql_field->real_field_type() == MYSQL_TYPE_BIT && + file->ha_table_flags() & HA_CAN_BIT_FIELD) + total_uneven_bit_length+= sql_field->length & 7; + + /* Check if we have used the same field name before */ + for (dup_no=0; (dup_field=it2++) != sql_field; dup_no++) + { + if (lex_string_cmp(scs, &sql_field->field_name, &dup_field->field_name) == 0) + { + /* + If this was a CREATE ... SELECT statement, accept a field + redefinition if we are changing a field in the SELECT part + */ + if (field_no < select_field_pos || dup_no >= select_field_pos || + dup_field->invisible >= INVISIBLE_SYSTEM) + { + my_error(ER_DUP_FIELDNAME, MYF(0), sql_field->field_name.str); + DBUG_RETURN(TRUE); + } + else + { + /* Field redefined */ + + /* + If we are replacing a BIT field, revert the increment + of total_uneven_bit_length that was done above. + */ + if (sql_field->real_field_type() == MYSQL_TYPE_BIT && + file->ha_table_flags() & HA_CAN_BIT_FIELD) + total_uneven_bit_length-= sql_field->length & 7; + + /* + We're making one field from two, the result field will have + dup_field->flags as flags. If we've incremented null_fields + because of sql_field->flags, decrement it back. + */ + if (!(sql_field->flags & NOT_NULL_FLAG)) + null_fields--; + + if (sql_field->redefine_stage1(dup_field, file)) + DBUG_RETURN(true); + + it2.remove(); // Remove first (create) definition + select_field_pos--; + break; + } + } + } + /* Don't pack rows in old tables if the user has requested this */ + if ((sql_field->flags & BLOB_FLAG) || + (sql_field->real_field_type() == MYSQL_TYPE_VARCHAR && + create_info->row_type != ROW_TYPE_FIXED)) + (*db_options)|= HA_OPTION_PACK_RECORD; + it2.rewind(); + } + + /* record_offset will be increased with 'length-of-null-bits' later */ + record_offset= 0; + null_fields+= total_uneven_bit_length; + + it.rewind(); + while ((sql_field=it++)) + { + DBUG_ASSERT(sql_field->charset != 0); + if (sql_field->prepare_stage2(file, file->ha_table_flags())) + DBUG_RETURN(TRUE); + if (sql_field->real_field_type() == MYSQL_TYPE_VARCHAR) + create_info->varchar= TRUE; + sql_field->offset= record_offset; + if (MTYP_TYPENR(sql_field->unireg_check) == Field::NEXT_NUMBER) + auto_increment++; + if (parse_option_list(thd, create_info->db_type, &sql_field->option_struct, + &sql_field->option_list, + create_info->db_type->field_options, FALSE, + thd->mem_root)) + DBUG_RETURN(TRUE); + /* + For now skip fields that are not physically stored in the database + (virtual fields) and update their offset later + (see the next loop). + */ + if (sql_field->stored_in_db()) + record_offset+= sql_field->pack_length; + } + /* Update virtual fields' offset and give error if + All fields are invisible */ + bool is_all_invisible= true; + it.rewind(); + while ((sql_field=it++)) + { + if (!sql_field->stored_in_db()) + { + sql_field->offset= record_offset; + record_offset+= sql_field->pack_length; + } + if (sql_field->invisible == VISIBLE) + is_all_invisible= false; + } + if (is_all_invisible) + { + my_error(ER_TABLE_MUST_HAVE_COLUMNS, MYF(0)); + DBUG_RETURN(TRUE); + } + if (auto_increment > 1) + { + my_message(ER_WRONG_AUTO_KEY, ER_THD(thd, ER_WRONG_AUTO_KEY), MYF(0)); + DBUG_RETURN(TRUE); + } + if (auto_increment && + (file->ha_table_flags() & HA_NO_AUTO_INCREMENT)) + { + my_error(ER_TABLE_CANT_HANDLE_AUTO_INCREMENT, MYF(0), file->table_type()); + DBUG_RETURN(TRUE); + } + + /* + CREATE TABLE[with auto_increment column] SELECT is unsafe as the rows + inserted in the created table depends on the order of the rows fetched + from the select tables. This order may differ on master and slave. We + therefore mark it as unsafe. + */ + if (select_field_count > 0 && auto_increment) + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_SELECT_AUTOINC); + + /* Create keys */ + + List_iterator key_iterator(alter_info->key_list); + List_iterator key_iterator2(alter_info->key_list); + uint key_parts=0; + bool primary_key=0,unique_key=0; + Key *key, *key2; + uint tmp, key_number; + + /* Calculate number of key segements */ + *key_count= 0; + + while ((key=key_iterator++)) + { + DBUG_PRINT("info", ("key name: '%s' type: %d", key->name.str ? key->name.str : + "(none)" , key->type)); + if (key->type == Key::FOREIGN_KEY) + { + Foreign_key *fk_key= (Foreign_key*) key; + if (fk_key->validate(alter_info->create_list)) + DBUG_RETURN(TRUE); + if (fk_key->ref_columns.elements) + { + if (fk_key->ref_columns.elements != fk_key->columns.elements) + { + my_error(ER_WRONG_FK_DEF, MYF(0), + (fk_key->name.str ? fk_key->name.str : + "foreign key without name"), + ER_THD(thd, ER_KEY_REF_DO_NOT_MATCH_TABLE_REF)); + DBUG_RETURN(TRUE); + } + } + else + fk_key->ref_columns.append(&fk_key->columns); + continue; + } + (*key_count)++; + tmp=file->max_key_parts(); + if (key->columns.elements > tmp) + { + my_error(ER_TOO_MANY_KEY_PARTS,MYF(0),tmp); + DBUG_RETURN(TRUE); + } + if (check_ident_length(&key->name)) + DBUG_RETURN(TRUE); + key_iterator2.rewind (); + if (key->type != Key::FOREIGN_KEY) + { + while ((key2 = key_iterator2++) != key) + { + /* + is_foreign_key_prefix(key, key2) returns true if key or key2, or + both, is 'generated', and a generated key is a prefix of the other + key. Then we do not need the generated shorter key. + */ + if (key2->type != Key::FOREIGN_KEY && key2->type != Key::IGNORE_KEY && + is_foreign_key_prefix(key, key2)) + { + /* mark that the generated key should be ignored */ + if (!key2->generated || + (key->generated && key->columns.elements < + key2->columns.elements)) + key->type= Key::IGNORE_KEY; + else + { + key2->type= Key::IGNORE_KEY; + key_parts-= key2->columns.elements; + (*key_count)--; + } + break; + } + } + } + if (key->type != Key::IGNORE_KEY) + key_parts+=key->columns.elements; + else + (*key_count)--; + if (key->name.str && !tmp_table && (key->type != Key::PRIMARY) && + !my_strcasecmp(scs, key->name.str, primary_key_name.str)) + { + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key->name.str); + DBUG_RETURN(TRUE); + } + if (key->type == Key::PRIMARY && key->name.str && + my_strcasecmp(scs, key->name.str, primary_key_name.str) != 0) + { + bool sav_abort_on_warning= thd->abort_on_warning; + thd->abort_on_warning= FALSE; /* Don't make an error out of this. */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_NAME_FOR_INDEX, + "Name '%-.100s' ignored for PRIMARY key.", + key->name.str); + thd->abort_on_warning= sav_abort_on_warning; + } + } + + KEY *key_info= *key_info_buffer= (KEY*)thd->calloc(sizeof(KEY) * (*key_count)); + if (!*key_info_buffer) + DBUG_RETURN(true); // Out of memory + + key_iterator.rewind(); + while ((key=key_iterator++)) + { + if (key->type == Key::IGNORE_KEY) + { + /* The key was replaced by another key */ + if (!create_info->tmp_table() && + alter_info->add_stat_drop_index(thd, &key->name)) + DBUG_RETURN(true); + continue; + } + if (key->type == Key::FOREIGN_KEY) + continue; + /* Create the key name based on the first column (if not given) */ + if (key->type == Key::PRIMARY) + { + if (primary_key) + { + my_message(ER_MULTIPLE_PRI_KEY, ER_THD(thd, ER_MULTIPLE_PRI_KEY), + MYF(0)); + DBUG_RETURN(true); + } + key_name= primary_key_name.str; + primary_key=1; + } + else if (!(key_name= key->name.str)) + { + auto field_name= key->columns.elem(0)->field_name; + it.rewind(); + while ((sql_field=it++) && + lex_string_cmp(scs, &field_name, &sql_field->field_name)); + if (sql_field) + field_name= sql_field->field_name; + key_name=make_unique_key_name(thd, field_name.str, + *key_info_buffer, key_info); + } + if (check_if_keyname_exists(key_name, *key_info_buffer, key_info)) + { + my_error(ER_DUP_KEYNAME, MYF(0), key_name); + DBUG_RETURN(true); + } + + key_info->name.str= (char*) key_name; + key_info->name.length= strlen(key_name); + key->name= key_info->name; + + int parts_added= append_system_key_parts(thd, create_info, key); + if (parts_added < 0) + DBUG_RETURN(true); + key_parts += parts_added; + key_info++; + } + tmp=file->max_keys(); + if (*key_count > tmp) + { + my_error(ER_TOO_MANY_KEYS,MYF(0),tmp); + DBUG_RETURN(TRUE); + } + + key_part_info=(KEY_PART_INFO*) thd->calloc(sizeof(KEY_PART_INFO)*key_parts); + if (!key_part_info) + DBUG_RETURN(true); // Out of memory + + key_info= *key_info_buffer; + key_iterator.rewind(); + key_number=0; + for (; (key=key_iterator++) ; key_number++) + { + uint key_length=0; + Create_field *auto_increment_key= 0; + Key_part_spec *column; + + is_hash_field_needed= false; + if (key->type == Key::IGNORE_KEY) + { + /* ignore redundant keys */ + do + key=key_iterator++; + while (key && key->type == Key::IGNORE_KEY); + if (!key) + break; + } + + switch (key->type) { + case Key::MULTIPLE: + key_info->flags= 0; + break; + case Key::FULLTEXT: + key_info->flags= HA_FULLTEXT; + if ((key_info->parser_name= &key->key_create_info.parser_name)->str) + key_info->flags|= HA_USES_PARSER; + else + key_info->parser_name= 0; + break; + case Key::SPATIAL: +#ifdef HAVE_SPATIAL + key_info->flags= HA_SPATIAL; + break; +#else + my_error(ER_FEATURE_DISABLED, MYF(0), + sym_group_geom.name, sym_group_geom.needed_define); + DBUG_RETURN(TRUE); +#endif + case Key::FOREIGN_KEY: + key_number--; // Skip this key + continue; + case Key::IGNORE_KEY: + DBUG_ASSERT(0); + break; + default: + key_info->flags = HA_NOSAME; + break; + } + if (key->generated) + key_info->flags|= HA_GENERATED_KEY; + + key_info->user_defined_key_parts=(uint8) key->columns.elements; + key_info->key_part=key_part_info; + key_info->usable_key_parts= key_number; + key_info->algorithm= key->key_create_info.algorithm; + key_info->option_list= key->option_list; + if (parse_option_list(thd, create_info->db_type, &key_info->option_struct, + &key_info->option_list, + create_info->db_type->index_options, FALSE, + thd->mem_root)) + DBUG_RETURN(TRUE); + + if (key->type == Key::FULLTEXT) + { + if (!(file->ha_table_flags() & HA_CAN_FULLTEXT)) + { + my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0), file->table_type()); + DBUG_RETURN(TRUE); + } + } + /* + Make SPATIAL to be RTREE by default + SPATIAL only on BLOB or at least BINARY, this + actually should be replaced by special GEOM type + in near future when new frm file is ready + checking for proper key parts number: + */ + + /* TODO: Add proper checks if handler supports key_type and algorithm */ + if (key_info->flags & HA_SPATIAL) + { + if (!(file->ha_table_flags() & HA_CAN_RTREEKEYS)) + { + my_error(ER_TABLE_CANT_HANDLE_SPKEYS, MYF(0), file->table_type()); + DBUG_RETURN(TRUE); + } + if (key_info->user_defined_key_parts != 1) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "SPATIAL INDEX"); + DBUG_RETURN(TRUE); + } + } + else if (key_info->algorithm == HA_KEY_ALG_RTREE) + { +#ifdef HAVE_RTREE_KEYS + if ((key_info->user_defined_key_parts & 1) == 1) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), "RTREE INDEX"); + DBUG_RETURN(TRUE); + } + /* TODO: To be deleted */ + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "RTREE INDEX"); + DBUG_RETURN(TRUE); +#else + my_error(ER_FEATURE_DISABLED, MYF(0), + sym_group_rtree.name, sym_group_rtree.needed_define); + DBUG_RETURN(TRUE); +#endif + } + + /* Take block size from key part or table part */ + /* + TODO: Add warning if block size changes. We can't do it here, as + this may depend on the size of the key + */ + key_info->block_size= (key->key_create_info.block_size ? + key->key_create_info.block_size : + create_info->key_block_size); + + /* + Remember block_size for the future if the block size was given + either for key or table and it was given for the key during + create/alter table or we have an active key_block_size for the + table. + The idea is that table specific key_block_size > 0 will only affect + new keys and old keys will remember their original value. + */ + if (key_info->block_size && + ((key->key_create_info.flags & HA_USES_BLOCK_SIZE) || + create_info->key_block_size)) + key_info->flags|= HA_USES_BLOCK_SIZE; + + List_iterator cols(key->columns), cols2(key->columns); + CHARSET_INFO *ft_key_charset=0; // for FULLTEXT + for (uint column_nr=0 ; (column=cols++) ; column_nr++) + { + Key_part_spec *dup_column; + + it.rewind(); + field=0; + while ((sql_field=it++) && + lex_string_cmp(scs, &column->field_name, &sql_field->field_name)) + field++; + /* + Either field is not present or field visibility is > INVISIBLE_USER + */ + if (!sql_field || (sql_field->invisible > INVISIBLE_USER && + !column->generated)) + { + my_error(ER_KEY_COLUMN_DOES_NOT_EXIST, MYF(0), column->field_name.str); + DBUG_RETURN(TRUE); + } + if (sql_field->invisible > INVISIBLE_USER && + !(sql_field->flags & VERS_SYSTEM_FIELD) && + !key->invisible && !DBUG_IF("test_invisible_index")) + { + my_error(ER_KEY_COLUMN_DOES_NOT_EXIST, MYF(0), column->field_name.str); + DBUG_RETURN(TRUE); + } + while ((dup_column= cols2++) != column) + { + if (!lex_string_cmp(scs, &column->field_name, &dup_column->field_name)) + { + my_error(ER_DUP_FIELDNAME, MYF(0), column->field_name.str); + DBUG_RETURN(TRUE); + } + } + + if (sql_field->compression_method()) + { + my_error(ER_COMPRESSED_COLUMN_USED_AS_KEY, MYF(0), + column->field_name.str); + DBUG_RETURN(TRUE); + } + + cols2.rewind(); + switch(key->type) { + + case Key::FULLTEXT: + if (sql_field->type_handler()->Key_part_spec_init_ft(column, + *sql_field) || + (ft_key_charset && sql_field->charset != ft_key_charset)) + { + my_error(ER_BAD_FT_COLUMN, MYF(0), column->field_name.str); + DBUG_RETURN(-1); + } + ft_key_charset= sql_field->charset; + break; + + case Key::SPATIAL: + if (sql_field->type_handler()->Key_part_spec_init_spatial(column, + *sql_field) || + sql_field->check_vcol_for_key(thd)) + DBUG_RETURN(TRUE); + if (!(sql_field->flags & NOT_NULL_FLAG)) + { + my_message(ER_SPATIAL_CANT_HAVE_NULL, + ER_THD(thd, ER_SPATIAL_CANT_HAVE_NULL), MYF(0)); + DBUG_RETURN(TRUE); + } + break; + + case Key::PRIMARY: + if (sql_field->vcol_info) + { + my_error(ER_PRIMARY_KEY_BASED_ON_GENERATED_COLUMN, MYF(0)); + DBUG_RETURN(TRUE); + } + if (sql_field->type_handler()->Key_part_spec_init_primary(column, + *sql_field, + file)) + DBUG_RETURN(TRUE); + if (!(sql_field->flags & NOT_NULL_FLAG)) + { + /* Implicitly set primary key fields to NOT NULL for ISO conf. */ + sql_field->flags|= NOT_NULL_FLAG; + sql_field->pack_flag&= ~FIELDFLAG_MAYBE_NULL; + null_fields--; + } + break; + + case Key::MULTIPLE: + if (sql_field->type_handler()->Key_part_spec_init_multiple(column, + *sql_field, + file) || + sql_field->check_vcol_for_key(thd) || + key_add_part_check_null(file, key_info, sql_field, column)) + DBUG_RETURN(TRUE); + break; + + case Key::FOREIGN_KEY: + if (sql_field->type_handler()->Key_part_spec_init_foreign(column, + *sql_field, + file) || + sql_field->check_vcol_for_key(thd) || + key_add_part_check_null(file, key_info, sql_field, column)) + DBUG_RETURN(TRUE); + break; + + case Key::UNIQUE: + if (sql_field->type_handler()->Key_part_spec_init_unique(column, + *sql_field, file, + &is_hash_field_needed) || + sql_field->check_vcol_for_key(thd) || + key_add_part_check_null(file, key_info, sql_field, column)) + DBUG_RETURN(TRUE); + break; + case Key::IGNORE_KEY: + break; + } + + if (MTYP_TYPENR(sql_field->unireg_check) == Field::NEXT_NUMBER) + { + DBUG_ASSERT(key->type != Key::FULLTEXT); + DBUG_ASSERT(key->type != Key::SPATIAL); + if (column_nr == 0 || (file->ha_table_flags() & HA_AUTO_PART_KEY)) + auto_increment--; // Field is used + auto_increment_key= sql_field; + } + + key_part_info->fieldnr= field; + key_part_info->offset= (uint16) sql_field->offset; + key_part_info->key_type=sql_field->pack_flag; + key_part_info->key_part_flag= column->asc ? 0 : HA_REVERSE_SORT; + uint key_part_length= sql_field->type_handler()-> + calc_key_length(*sql_field); + + if (column->length) + { + if (f_is_blob(sql_field->pack_flag)) + { + key_part_length= MY_MIN(column->length, + blob_length_by_type(sql_field->real_field_type()) + * sql_field->charset->mbmaxlen); + if (key_part_length > max_key_length || + key_part_length > file->max_key_part_length()) + { + if (key->type == Key::MULTIPLE) + { + key_part_length= MY_MIN(max_key_length, file->max_key_part_length()); + /* not a critical problem */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_TOO_LONG_KEY, ER_THD(thd, ER_TOO_LONG_KEY), + key_part_length); + /* Align key length to multibyte char boundary */ + key_part_length-= key_part_length % sql_field->charset->mbmaxlen; + } + } + } + // Catch invalid use of partial keys + else if (!f_is_geom(sql_field->pack_flag) && + // is the key partial? + column->length != key_part_length && + // is prefix length bigger than field length? + (column->length > key_part_length || + // can the field have a partial key? + !sql_field->type_handler()->type_can_have_key_part() || + // a packed field can't be used in a partial key + f_is_packed(sql_field->pack_flag) || + // does the storage engine allow prefixed search? + ((file->ha_table_flags() & HA_NO_PREFIX_CHAR_KEYS) && + // and is this a 'unique' key? + (key_info->flags & HA_NOSAME)))) + { + my_message(ER_WRONG_SUB_KEY, ER_THD(thd, ER_WRONG_SUB_KEY), MYF(0)); + DBUG_RETURN(TRUE); + } + else if (!(file->ha_table_flags() & HA_NO_PREFIX_CHAR_KEYS)) + key_part_length= column->length; + } + else if (key_part_length == 0 && (sql_field->flags & NOT_NULL_FLAG) && + !is_hash_field_needed) + { + my_error(ER_WRONG_KEY_COLUMN, MYF(0), file->table_type(), + column->field_name.str); + DBUG_RETURN(TRUE); + } + if (key_part_length > file->max_key_part_length() && + key->type != Key::FULLTEXT) + { + if (key->type == Key::MULTIPLE) + { + key_part_length= file->max_key_part_length(); + /* not a critical problem */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_TOO_LONG_KEY, ER_THD(thd, ER_TOO_LONG_KEY), + key_part_length); + /* Align key length to multibyte char boundary */ + key_part_length-= key_part_length % sql_field->charset->mbmaxlen; + } + else + { + if (key->type != Key::UNIQUE) + { + key_part_length= MY_MIN(max_key_length, file->max_key_part_length()); + my_error(ER_TOO_LONG_KEY, MYF(0), key_part_length); + DBUG_RETURN(TRUE); + } + } + } + + if (key->type == Key::UNIQUE + && key_part_length > MY_MIN(max_key_length, + file->max_key_part_length())) + is_hash_field_needed= true; + + /* We can not store key_part_length more then 2^16 - 1 in frm */ + if (is_hash_field_needed && column->length > UINT_MAX16) + { + my_error(ER_TOO_LONG_KEYPART, MYF(0), UINT_MAX16); + DBUG_RETURN(TRUE); + } + else + key_part_info->length= (uint16) key_part_length; + /* Use packed keys for long strings on the first column */ + if (!((*db_options) & HA_OPTION_NO_PACK_KEYS) && + !((create_info->table_options & HA_OPTION_NO_PACK_KEYS)) && + (key_part_length >= KEY_DEFAULT_PACK_LENGTH) && + !is_hash_field_needed) + { + key_info->flags|= sql_field->type_handler()->KEY_pack_flags(column_nr); + } + /* Check if the key segment is partial, set the key flag accordingly */ + if (key_part_length != sql_field->type_handler()-> + calc_key_length(*sql_field) && + key_part_length != sql_field->type_handler()->max_octet_length()) + key_info->flags|= HA_KEY_HAS_PART_KEY_SEG; + + key_length+= key_part_length; + key_part_info++; + } + if (!key_info->name.str || check_column_name(key_info->name.str)) + { + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key_info->name.str); + DBUG_RETURN(TRUE); + } + if (key->type == Key::UNIQUE && !(key_info->flags & HA_NULL_PART_KEY)) + unique_key=1; + key_info->key_length=(uint16) key_length; + if (key_info->key_length > max_key_length && key->type == Key::UNIQUE) + is_hash_field_needed= true; // for case "a BLOB UNIQUE" + if (key_length > max_key_length && key->type != Key::FULLTEXT && + !is_hash_field_needed) + { + my_error(ER_TOO_LONG_KEY, MYF(0), max_key_length); + DBUG_RETURN(TRUE); + } + + /* Check long unique keys */ + if (is_hash_field_needed) + { + if (auto_increment_key) + { + my_error(ER_NO_AUTOINCREMENT_WITH_UNIQUE, MYF(0), + sql_field->field_name.str, + key_info->name.str); + DBUG_RETURN(TRUE); + } + if (key_info->algorithm != HA_KEY_ALG_UNDEF && + key_info->algorithm != HA_KEY_ALG_HASH ) + { + my_error(ER_TOO_LONG_KEY, MYF(0), max_key_length); + DBUG_RETURN(TRUE); + } + } + if (is_hash_field_needed || + (key_info->algorithm == HA_KEY_ALG_HASH && + key->type != Key::PRIMARY && + key_info->flags & HA_NOSAME && + !(file->ha_table_flags() & HA_CAN_HASH_KEYS ) && + file->ha_table_flags() & HA_CAN_VIRTUAL_COLUMNS)) + { + Create_field *hash_fld= add_hash_field(thd, &alter_info->create_list, + key_info); + if (!hash_fld) + DBUG_RETURN(TRUE); + hash_fld->offset= record_offset; + hash_fld->charset= create_info->default_table_charset; + record_offset+= hash_fld->pack_length; + if (key_info->flags & HA_NULL_PART_KEY) + null_fields++; + else + { + hash_fld->flags|= NOT_NULL_FLAG; + hash_fld->pack_flag&= ~FIELDFLAG_MAYBE_NULL; + } + } + if (validate_comment_length(thd, &key->key_create_info.comment, + INDEX_COMMENT_MAXLEN, + ER_TOO_LONG_INDEX_COMMENT, + key_info->name.str)) + DBUG_RETURN(TRUE); + + key_info->comment.length= key->key_create_info.comment.length; + if (key_info->comment.length > 0) + { + key_info->flags|= HA_USES_COMMENT; + key_info->comment.str= key->key_create_info.comment.str; + } + + // Check if a duplicate index is defined. + check_duplicate_key(thd, key, key_info, &alter_info->key_list); + + key_info->without_overlaps= key->without_overlaps; + if (key_info->without_overlaps) + { + if (key_info->algorithm == HA_KEY_ALG_HASH || + key_info->algorithm == HA_KEY_ALG_LONG_HASH) + + { +without_overlaps_err: + my_error(ER_KEY_CANT_HAVE_WITHOUT_OVERLAPS, MYF(0), key_info->name.str); + DBUG_RETURN(true); + } + key_iterator2.rewind(); + while ((key2 = key_iterator2++)) + { + if (key2->type != Key::FOREIGN_KEY) + continue; + DBUG_ASSERT(key != key2); + Foreign_key *fk= (Foreign_key*) key2; + if (fk->update_opt != FK_OPTION_CASCADE) + continue; + for (Key_part_spec& kp: key->columns) + { + for (Key_part_spec& kp2: fk->columns) + { + if (!lex_string_cmp(scs, &kp.field_name, &kp2.field_name)) + { + goto without_overlaps_err; + } + } + } + } + create_info->period_info.unique_keys++; + } + key_info->is_ignored= key->key_create_info.is_ignored; + key_info++; + } + + if (!unique_key && !primary_key && !create_info->sequence && + (file->ha_table_flags() & HA_REQUIRE_PRIMARY_KEY)) + { + my_message(ER_REQUIRES_PRIMARY_KEY, ER_THD(thd, ER_REQUIRES_PRIMARY_KEY), + MYF(0)); + DBUG_RETURN(TRUE); + } + if (auto_increment > 0) + { + my_message(ER_WRONG_AUTO_KEY, ER_THD(thd, ER_WRONG_AUTO_KEY), MYF(0)); + DBUG_RETURN(TRUE); + } + /* + We cannot do qsort of key info if MyISAM/Aria does inplace. These engines + do not synchronise key info on inplace alter and that qsort is + indeterministic (MDEV-25803). + + Yet we do not know whether we do inplace or not. That detection is done + after this create_table_impl() and that cannot be changed because of chicken + and egg problem (inplace processing requires key info made by + create_table_impl()). + + MyISAM/Aria cannot add index inplace so we are safe to qsort key info in + that case. And if we don't add index then we do not need qsort at all. + */ + if (!(create_info->options & HA_SKIP_KEY_SORT)) + { + /* + Sort keys in optimized order. + + Note: PK must be always first key, otherwise init_from_binary_frm_image() + can not understand it. + */ + my_qsort((uchar*) *key_info_buffer, *key_count, sizeof(KEY), + (qsort_cmp) sort_keys); + } + create_info->null_bits= null_fields; + + /* Check fields. */ + it.rewind(); + while ((sql_field=it++)) + { + Field::utype type= (Field::utype) MTYP_TYPENR(sql_field->unireg_check); + + /* + Set NO_DEFAULT_VALUE_FLAG if this field doesn't have a default value and + it is NOT NULL, not an AUTO_INCREMENT field, not a TIMESTAMP and not + updated trough a NOW() function. + */ + if (!sql_field->default_value && + !sql_field->has_default_function() && + (sql_field->flags & NOT_NULL_FLAG) && + (!sql_field->is_timestamp_type() || + (thd->variables.option_bits & OPTION_EXPLICIT_DEF_TIMESTAMP))&& + !sql_field->vers_sys_field()) + { + sql_field->flags|= NO_DEFAULT_VALUE_FLAG; + sql_field->pack_flag|= FIELDFLAG_NO_DEFAULT; + } + + if (thd->variables.sql_mode & MODE_NO_ZERO_DATE && + !sql_field->default_value && !sql_field->vcol_info && + !sql_field->vers_sys_field() && + sql_field->is_timestamp_type() && + !(thd->variables.option_bits & OPTION_EXPLICIT_DEF_TIMESTAMP) && + (sql_field->flags & NOT_NULL_FLAG) && + (type == Field::NONE || type == Field::TIMESTAMP_UN_FIELD)) + { + /* + An error should be reported if: + - NO_ZERO_DATE SQL mode is active; + - there is no explicit DEFAULT clause (default column value); + - this is a TIMESTAMP column; + - the column is not NULL; + - this is not the DEFAULT CURRENT_TIMESTAMP column. + + In other words, an error should be reported if + - NO_ZERO_DATE SQL mode is active; + - the column definition is equivalent to + 'column_name TIMESTAMP DEFAULT 0'. + */ + + my_error(ER_INVALID_DEFAULT, MYF(0), sql_field->field_name.str); + DBUG_RETURN(TRUE); + } + if (sql_field->invisible == INVISIBLE_USER && + sql_field->flags & NOT_NULL_FLAG && + sql_field->flags & NO_DEFAULT_VALUE_FLAG && + !sql_field->vers_sys_field()) + { + my_error(ER_INVISIBLE_NOT_NULL_WITHOUT_DEFAULT, MYF(0), + sql_field->field_name.str); + DBUG_RETURN(TRUE); + } + + if (create_simple) + { + if (sql_field->vcol_info && sql_field->vcol_info->expr && + check_expression(sql_field->vcol_info, &sql_field->field_name, + sql_field->vcol_info->stored_in_db + ? VCOL_GENERATED_STORED : VCOL_GENERATED_VIRTUAL, + alter_info)) + DBUG_RETURN(TRUE); + + if (sql_field->default_value && + check_expression(sql_field->default_value, &sql_field->field_name, + VCOL_DEFAULT, alter_info)) + DBUG_RETURN(TRUE); + + if (sql_field->check_constraint && + check_expression(sql_field->check_constraint, &sql_field->field_name, + VCOL_CHECK_FIELD, alter_info)) + DBUG_RETURN(TRUE); + } + } + + /* Check table level constraints */ + create_info->check_constraint_list= &alter_info->check_constraint_list; + { + List_iterator_fast c_it(alter_info->check_constraint_list); + while (Virtual_column_info *check= c_it++) + { + if (check->name.length && !check->automatic_name) + { + /* Check that there's no repeating table CHECK constraint names. */ + List_iterator_fast + dup_it(alter_info->check_constraint_list); + const Virtual_column_info *dup_check; + while ((dup_check= dup_it++) && dup_check != check) + { + if (check->name.streq(dup_check->name)) + { + my_error(ER_DUP_CONSTRAINT_NAME, MYF(0), "CHECK", check->name.str); + DBUG_RETURN(TRUE); + } + } + + /* Check that there's no repeating key constraint names. */ + List_iterator_fast key_it(alter_info->key_list); + while (const Key *key= key_it++) + { + if (key->type != Key::PRIMARY && key->type != Key::UNIQUE && + key->type != Key::FOREIGN_KEY) + continue; + + if (check->name.length == key->name.length && + my_strcasecmp(scs, check->name.str, key->name.str) == 0) + { + my_error(ER_DUP_CONSTRAINT_NAME, MYF(0), "CHECK", check->name.str); + DBUG_RETURN(TRUE); + } + } + + if (check_string_char_length(&check->name, 0, NAME_CHAR_LEN, scs, 1)) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), check->name.str); + DBUG_RETURN(TRUE); + } + } + if (check_expression(check, &check->name, VCOL_CHECK_TABLE, alter_info)) + DBUG_RETURN(TRUE); + } + } + + /* Give warnings for not supported table options */ + if (create_info->used_fields & HA_CREATE_USED_TRANSACTIONAL && + !file->has_transactional_option()) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_OPTION, + ER_THD(thd, ER_UNKNOWN_OPTION), "transactional"); + + if (parse_option_list(thd, file->partition_ht(), &create_info->option_struct, + &create_info->option_list, + file->partition_ht()->table_options, FALSE, + thd->mem_root)) + DBUG_RETURN(TRUE); + + DBUG_EXECUTE_IF("key", + Debug_key::print_keys(thd, "prep_create_table: ", + *key_info_buffer, *key_count); + ); + + DBUG_RETURN(FALSE); +} + + +/* + Preparation for table creation + + SYNOPSIS + mysql_prepare_create_table() + thd Thread object. + create_info Create information (like MAX_ROWS). + alter_info List of columns and indexes to create + db_options INOUT Table options (like HA_OPTION_PACK_RECORD). + file The handler for the new table. + key_info_buffer OUT An array of KEY structs for the indexes. + key_count OUT The number of elements in the array. + create_table_mode C_ORDINARY_CREATE, C_ALTER_TABLE, + C_CREATE_SELECT, C_ASSISTED_DISCOVERY + + DESCRIPTION + Prepares the table and key structures for table creation. + + NOTES + sets create_info->varchar if the table has a varchar + + RETURN VALUES + FALSE OK + TRUE error +*/ + +static int +mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, + Alter_info *alter_info, uint *db_options, + handler *file, KEY **key_info_buffer, + uint *key_count, int create_table_mode) +{ + return mysql_prepare_create_table_stage1(thd, create_info, alter_info) || + mysql_prepare_create_table_finalize(thd, create_info, alter_info, + db_options, file, key_info_buffer, + key_count, create_table_mode); +} + + +/** + check comment length of table, column, index and partition + + If comment length is more than the standard length + truncate it and store the comment length upto the standard + comment length size + + @param thd Thread handle + @param[in,out] comment Comment + @param max_len Maximum allowed comment length + @param err_code Error message + @param name Name of commented object + + @return Operation status + @retval true Error found + @retval false On Success +*/ +bool validate_comment_length(THD *thd, LEX_CSTRING *comment, size_t max_len, + uint err_code, const char *name) +{ + DBUG_ENTER("validate_comment_length"); + if (comment->length == 0) + DBUG_RETURN(false); + + size_t tmp_len= + Well_formed_prefix(system_charset_info, *comment, max_len).length(); + if (tmp_len < comment->length) + { + if (comment->length <= max_len) + { + if (thd->is_strict_mode()) + { + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), + system_charset_info->cs_name.str, comment->str); + DBUG_RETURN(true); + } + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_INVALID_CHARACTER_STRING, + ER_THD(thd, ER_INVALID_CHARACTER_STRING), + system_charset_info->cs_name.str, comment->str); + comment->length= tmp_len; + DBUG_RETURN(false); + } + if (thd->is_strict_mode()) + { + my_error(err_code, MYF(0), name, static_cast(max_len)); + DBUG_RETURN(true); + } + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, err_code, + ER_THD(thd, err_code), name, + static_cast(max_len)); + comment->length= tmp_len; + } + DBUG_RETURN(false); +} + + +/* + Extend long VARCHAR fields to blob & prepare field if it's a blob + + SYNOPSIS + prepare_blob_field() + + RETURN + 0 ok + 1 Error (sql_field can't be converted to blob) + In this case the error is given +*/ + +bool Column_definition::prepare_blob_field(THD *thd) +{ + DBUG_ENTER("Column_definition::prepare_blob_field"); + + if (length > MAX_FIELD_VARCHARLENGTH && !(flags & BLOB_FLAG)) + { + /* Convert long VARCHAR columns to TEXT or BLOB */ + char warn_buff[MYSQL_ERRMSG_SIZE]; + + if (thd->is_strict_mode()) + { + my_error(ER_TOO_BIG_FIELDLENGTH, MYF(0), field_name.str, + static_cast(MAX_FIELD_VARCHARLENGTH / charset->mbmaxlen)); + DBUG_RETURN(1); + } + set_handler(&type_handler_blob); + flags|= BLOB_FLAG; + my_snprintf(warn_buff, sizeof(warn_buff), ER_THD(thd, ER_AUTO_CONVERT), + field_name.str, + (charset == &my_charset_bin) ? "VARBINARY" : "VARCHAR", + (charset == &my_charset_bin) ? "BLOB" : "TEXT"); + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_AUTO_CONVERT, + warn_buff); + } + + if ((flags & BLOB_FLAG) && length) + { + if (real_field_type() == FIELD_TYPE_BLOB || + real_field_type() == FIELD_TYPE_TINY_BLOB || + real_field_type() == FIELD_TYPE_MEDIUM_BLOB) + { + /* The user has given a length to the blob column */ + set_handler(Type_handler::blob_type_handler((uint) length)); + pack_length= type_handler()->calc_pack_length(0); + } + length= 0; + } + DBUG_RETURN(0); +} + + +/* + Preparation of Create_field for SP function return values. + Based on code used in the inner loop of mysql_prepare_create_table() + above. + + SYNOPSIS + sp_prepare_create_field() + thd Thread object + mem_root Memory root to allocate components on (e.g. interval) + + DESCRIPTION + Prepares the field structures for field creation. + +*/ + +bool Column_definition::sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root) +{ + const Column_derived_attributes dattr(thd->variables.collation_database); + return prepare_stage1(thd, mem_root, + COLUMN_DEFINITION_ROUTINE_LOCAL, &dattr) || + prepare_stage2(NULL, HA_CAN_GEOMETRY); +} + + +/** + Appends key parts generated by mariadb server. + Adds row_end in UNIQUE keys for system versioning, + and period fields for WITHOUT OVERLAPS. + @param thd Thread data + @param create_info Table create info + @param key Parsed key + @return a number of key parts added to key. + */ +static int append_system_key_parts(THD *thd, HA_CREATE_INFO *create_info, + Key *key) +{ + const Lex_ident &row_start_field= create_info->vers_info.as_row.start; + const Lex_ident &row_end_field= create_info->vers_info.as_row.end; + DBUG_ASSERT(!create_info->versioned() || (row_start_field && row_end_field)); + + int result = 0; + if (create_info->versioned() && (key->type == Key::PRIMARY + || key->type == Key::UNIQUE)) + { + Key_part_spec *key_part=NULL; + List_iterator part_it(key->columns); + while ((key_part=part_it++)) + { + if (row_start_field.streq(key_part->field_name) || + row_end_field.streq(key_part->field_name)) + break; + } + if (!key_part) + { + key->columns.push_back(new (thd->mem_root) + Key_part_spec(&row_end_field, 0, true)); + result++; + } + + } + + if (key->without_overlaps) + { + DBUG_ASSERT(key->type == Key::PRIMARY || key->type == Key::UNIQUE); + if (!create_info->period_info.is_set() + || !key->period.streq(create_info->period_info.name)) + { + my_error(ER_PERIOD_NOT_FOUND, MYF(0), key->period.str); + return -1; + } + + const auto &period_start= create_info->period_info.period.start; + const auto &period_end= create_info->period_info.period.end; + List_iterator part_it(key->columns); + while (Key_part_spec *key_part= part_it++) + { + if (period_start.streq(key_part->field_name) + || period_end.streq(key_part->field_name)) + { + my_error(ER_KEY_CONTAINS_PERIOD_FIELDS, MYF(0), key->name.str, + key_part->field_name.str); + return -1; + } + } + const auto &period= create_info->period_info.period; + key->columns.push_back(new (thd->mem_root) + Key_part_spec(&period.end, 0, true)); + key->columns.push_back(new (thd->mem_root) + Key_part_spec(&period.start, 0, true)); + result += 2; + } + + return result; +} + +handler *mysql_create_frm_image(THD *thd, HA_CREATE_INFO *create_info, + Alter_info *alter_info, int create_table_mode, + KEY **key_info, uint *key_count, + LEX_CUSTRING *frm) +{ + uint db_options; + handler *file; + DBUG_ENTER("mysql_create_frm_image"); + + DBUG_ASSERT(create_info->default_table_charset); + + if (!alter_info->create_list.elements) + { + my_error(ER_TABLE_MUST_HAVE_COLUMNS, MYF(0)); + DBUG_RETURN(NULL); + } + + if (mysql_prepare_create_table_stage1(thd, create_info, alter_info)) + DBUG_RETURN(NULL); + + db_options= create_info->table_options_with_row_type(); + + if (unlikely(!(file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root, + create_info->db_type)))) + DBUG_RETURN(NULL); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->work_part_info; + + if (!part_info && create_info->db_type->partition_flags && + (create_info->db_type->partition_flags() & HA_USE_AUTO_PARTITION)) + { + /* + Table is not defined as a partitioned table but the engine handles + all tables as partitioned. The handler will set up the partition info + object with the default settings. + */ + thd->work_part_info= part_info= new partition_info(); + if (unlikely(!part_info)) + goto err; + + file->set_auto_partitions(part_info); + part_info->default_engine_type= create_info->db_type; + part_info->is_auto_partitioned= TRUE; + } + if (part_info) + { + /* + The table has been specified as a partitioned table. + If this is part of an ALTER TABLE the handler will be the partition + handler but we need to specify the default handler to use for + partitions also in the call to check_partition_info. We transport + this information in the default_db_type variable, it is either + DB_TYPE_DEFAULT or the engine set in the ALTER TABLE command. + */ + handlerton *part_engine_type= create_info->db_type; + char *part_syntax_buf; + uint syntax_len; + handlerton *engine_type; + List_iterator part_it(part_info->partitions); + partition_element *part_elem; + + while ((part_elem= part_it++)) + { + if (part_elem->part_comment) + { + LEX_CSTRING comment= { part_elem->part_comment, + strlen(part_elem->part_comment) + }; + if (validate_comment_length(thd, &comment, + TABLE_PARTITION_COMMENT_MAXLEN, + ER_TOO_LONG_TABLE_PARTITION_COMMENT, + part_elem->partition_name)) + DBUG_RETURN(NULL); + /* cut comment length. Safe to do in all cases */ + ((char*)part_elem->part_comment)[comment.length]= '\0'; + } + if (part_elem->subpartitions.elements) + { + List_iterator sub_it(part_elem->subpartitions); + partition_element *subpart_elem; + while ((subpart_elem= sub_it++)) + { + if (subpart_elem->part_comment) + { + LEX_CSTRING comment= { + subpart_elem->part_comment, strlen(subpart_elem->part_comment) + }; + if (validate_comment_length(thd, &comment, + TABLE_PARTITION_COMMENT_MAXLEN, + ER_TOO_LONG_TABLE_PARTITION_COMMENT, + subpart_elem->partition_name)) + DBUG_RETURN(NULL); + /* cut comment length. Safe to do in all cases */ + ((char*)subpart_elem->part_comment)[comment.length]= '\0'; + } + } + } + } + + if (create_info->tmp_table()) + { + my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), "CREATE TEMPORARY TABLE"); + goto err; + } + if ((part_engine_type == partition_hton) && + part_info->default_engine_type) + { + /* + This only happens at ALTER TABLE. + default_engine_type was assigned from the engine set in the ALTER + TABLE command. + */ + ; + } + else + { + if (create_info->used_fields & HA_CREATE_USED_ENGINE) + { + part_info->default_engine_type= create_info->db_type; + } + else + { + if (part_info->default_engine_type == NULL) + { + part_info->default_engine_type= ha_default_handlerton(thd); + } + } + } + DBUG_PRINT("info", ("db_type = %s create_info->db_type = %s", + ha_resolve_storage_engine_name(part_info->default_engine_type), + ha_resolve_storage_engine_name(create_info->db_type))); + if (part_info->check_partition_info(thd, &engine_type, file, + create_info, FALSE)) + goto err; + part_info->default_engine_type= engine_type; + + if (part_info->vers_info && !create_info->versioned()) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), alter_info->table_name.str); + goto err; + } + + /* + We reverse the partitioning parser and generate a standard format + for syntax stored in frm file. + */ + part_syntax_buf= generate_partition_syntax_for_frm(thd, part_info, + &syntax_len, create_info, alter_info); + if (!part_syntax_buf) + goto err; + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + if ((!(engine_type->partition_flags && + ((engine_type->partition_flags() & HA_CAN_PARTITION) || + (part_info->part_type == VERSIONING_PARTITION && + engine_type->partition_flags() & HA_ONLY_VERS_PARTITION)) + )) || + create_info->db_type == partition_hton) + { + /* + The handler assigned to the table cannot handle partitioning. + Assign the partition handler as the handler of the table. + */ + DBUG_PRINT("info", ("db_type: %s", + ha_resolve_storage_engine_name(create_info->db_type))); + delete file; + create_info->db_type= partition_hton; + if (!(file= get_ha_partition(part_info))) + DBUG_RETURN(NULL); + + /* + If we have default number of partitions or subpartitions we + might require to set-up the part_info object such that it + creates a proper .par file. The current part_info object is + only used to create the frm-file and .par-file. + */ + if (part_info->use_default_num_partitions && + part_info->num_parts && + (int)part_info->num_parts != + file->get_default_no_partitions(create_info)) + { + uint i; + List_iterator part_it(part_info->partitions); + part_it++; + DBUG_ASSERT(thd->lex->sql_command != SQLCOM_CREATE_TABLE); + for (i= 1; i < part_info->partitions.elements; i++) + (part_it++)->part_state= PART_TO_BE_DROPPED; + } + else if (part_info->is_sub_partitioned() && + part_info->use_default_num_subpartitions && + part_info->num_subparts && + (int)part_info->num_subparts != + file->get_default_no_partitions(create_info)) + { + DBUG_ASSERT(thd->lex->sql_command != SQLCOM_CREATE_TABLE); + part_info->num_subparts= file->get_default_no_partitions(create_info); + } + } + else if (create_info->db_type != engine_type) + { + /* + We come here when we don't use a partitioned handler. + Since we use a partitioned table it must be "native partitioned". + We have switched engine from defaults, most likely only specified + engines in partition clauses. + */ + delete file; + if (unlikely(!(file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root, + engine_type)))) + DBUG_RETURN(NULL); + } + } + /* + Unless table's storage engine supports partitioning natively + don't allow foreign keys on partitioned tables (they won't + work work even with InnoDB beneath of partitioning engine). + If storage engine handles partitioning natively (like NDB) + foreign keys support is possible, so we let the engine decide. + */ + if (create_info->db_type == partition_hton) + { + List_iterator_fast key_iterator(alter_info->key_list); + Key *key; + while ((key= key_iterator++)) + { + if (key->type == Key::FOREIGN_KEY) + { + my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), + "FOREIGN KEY"); + goto err; + } + } + } +#endif + + if (mysql_prepare_create_table_finalize(thd, create_info, + alter_info, &db_options, + file, key_info, key_count, + create_table_mode)) + goto err; + create_info->table_options=db_options; + + *frm= build_frm_image(thd, alter_info->table_name, create_info, + alter_info->create_list, *key_count, *key_info, file); + + if (frm->str) + DBUG_RETURN(file); + +err: + delete file; + DBUG_RETURN(NULL); +} + + +/** + Create a table + + @param thd Thread object + @param orig_db Database for error messages + @param orig_table_name Table name for error messages + (it's different from table_name for ALTER TABLE) + @param db Database + @param table_name Table name + @param path Path to table (i.e. to its .FRM file without + the extension). + @param create_info Create information (like MAX_ROWS) + @param alter_info Description of fields and keys for new table + @param create_table_mode C_ORDINARY_CREATE, C_ALTER_TABLE, + C_ASSISTED_DISCOVERY or C_ALTER_TABLE_FRM_ONLY. + or any positive number (for C_CREATE_SELECT). + If set to C_ALTER_TABLE_FRM_ONY then no frm or + table is created, only the frm image in memory. + @param[out] is_trans Identifies the type of engine where the table + was created: either trans or non-trans. + @param[out] key_info Array of KEY objects describing keys in table + which was created. + @param[out] key_count Number of keys in table which was created. + @param[out] frm The frm image. + + If one creates a temporary table, its is automatically opened and its + TABLE_SHARE is added to THD::all_temp_tables list. + + Note that this function assumes that caller already have taken + exclusive metadata lock on table being created or used some other + way to ensure that concurrent operations won't intervene. + mysql_create_table() is a wrapper that can be used for this. + + @retval 0 OK + @retval 1 error + @retval -1 table existed but IF NOT EXISTS was used +*/ + +static +int create_table_impl(THD *thd, + DDL_LOG_STATE *ddl_log_state_create, + DDL_LOG_STATE *ddl_log_state_rm, + const LEX_CSTRING &orig_db, + const LEX_CSTRING &orig_table_name, + const LEX_CSTRING &db, const LEX_CSTRING &table_name, + const LEX_CSTRING &path, const DDL_options_st options, + HA_CREATE_INFO *create_info, Alter_info *alter_info, + int create_table_mode, bool *is_trans, KEY **key_info, + uint *key_count, LEX_CUSTRING *frm) +{ + LEX_CSTRING *alias; + handler *file= 0; + int error= 1; + bool frm_only= create_table_mode == C_ALTER_TABLE_FRM_ONLY; + bool internal_tmp_table= create_table_mode == C_ALTER_TABLE || frm_only; + DBUG_ENTER("create_table_impl"); + DBUG_PRINT("enter", ("db: '%s' table: '%s' tmp: %d path: %s", + db.str, table_name.str, internal_tmp_table, path.str)); + + DBUG_ASSERT(create_info->default_table_charset); + + /* Easy check for ddl logging if we are creating a temporary table */ + if (create_info->tmp_table()) + { + ddl_log_state_create= 0; + ddl_log_state_rm= 0; + } + + if (fix_constraints_names(thd, &alter_info->check_constraint_list, + create_info)) + DBUG_RETURN(1); + + if (thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE) + { + if (create_info->data_file_name) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), + "DATA DIRECTORY"); + if (create_info->index_file_name) + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_OPTION_IGNORED, + ER_THD(thd, WARN_OPTION_IGNORED), + "INDEX DIRECTORY"); + create_info->data_file_name= create_info->index_file_name= 0; + } + else + { + if (unlikely(error_if_data_home_dir(create_info->data_file_name, + "DATA DIRECTORY")) || + unlikely(error_if_data_home_dir(create_info->index_file_name, + "INDEX DIRECTORY")) || + unlikely(check_partition_dirs(thd->lex->part_info))) + goto err; + } + + alias= const_cast(table_case_name(create_info, &table_name)); + + /* Check if table exists */ + if (create_info->tmp_table()) + { + /* + If a table exists, it must have been pre-opened. Try looking for one + in-use in THD::all_temp_tables list of TABLE_SHAREs. + */ + TABLE *tmp_table= thd->find_temporary_table(db.str, table_name.str, + THD::TMP_TABLE_ANY); + + if (tmp_table) + { + bool table_creation_was_logged= tmp_table->s->table_creation_was_logged; + if (options.or_replace()) + { + /* + We are using CREATE OR REPLACE on an existing temporary table + Remove the old table so that we can re-create it. + */ + if (thd->drop_temporary_table(tmp_table, NULL, true)) + goto err; + } + else if (options.if_not_exists()) + goto warn; + else + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias->str); + goto err; + } + /* + We have to log this query, even if it failed later to ensure the + drop is done. + */ + if (table_creation_was_logged) + { + thd->variables.option_bits|= OPTION_BINLOG_THIS; + create_info->table_was_deleted= 1; + } + } + } + else + { + if (ha_check_if_updates_are_ignored(thd, create_info->db_type, "CREATE")) + { + /* + Don't create table. CREATE will still be logged in binary log + This can happen for shared storage engines that supports + ENGINE= in the create statement (Note that S3 doesn't support this. + */ + error= 0; + goto err; + } + + handlerton *db_type; + if (!internal_tmp_table && + ha_table_exists(thd, &db, &table_name, + &create_info->org_tabledef_version, NULL, &db_type)) + { + if (ha_check_if_updates_are_ignored(thd, db_type, "CREATE")) + { + /* Don't create table. CREATE will still be logged in binary log */ + error= 0; + goto err; + } + + if (options.or_replace()) + { + (void) delete_statistics_for_table(thd, &db, &table_name); + + TABLE_LIST table_list; + table_list.init_one_table(&db, &table_name, 0, TL_WRITE_ALLOW_WRITE); + table_list.table= create_info->table; + + if (check_if_log_table(&table_list, TRUE, "CREATE OR REPLACE")) + goto err; + + /* + Rollback the empty transaction started in mysql_create_table() + call to open_and_lock_tables() when we are using LOCK TABLES. + */ + { + uint save_unsafe_rollback_flags= + thd->transaction->stmt.m_unsafe_rollback_flags; + (void) trans_rollback_stmt(thd); + thd->transaction->stmt.m_unsafe_rollback_flags= + save_unsafe_rollback_flags; + } + /* Remove normal table without logging. Keep tables locked */ + if (mysql_rm_table_no_locks(thd, &table_list, &thd->db, + ddl_log_state_rm, + 0, 0, 0, 0, 1, 1)) + goto err; + + debug_crash_here("ddl_log_create_after_drop"); + + /* + We have to log this query, even if it failed later to ensure the + drop is done. + */ + thd->variables.option_bits|= OPTION_BINLOG_THIS; + create_info->table_was_deleted= 1; + lex_string_set(&create_info->org_storage_engine_name, + ha_resolve_storage_engine_name(db_type)); + DBUG_EXECUTE_IF("send_kill_after_delete", + thd->set_killed(KILL_QUERY);); + /* + Restart statement transactions for the case of CREATE ... SELECT. + */ + if (thd->lex->first_select_lex()->item_list.elements && + restart_trans_for_tables(thd, thd->lex->query_tables)) + goto err; + } + else if (options.if_not_exists()) + { + /* + We never come here as part of normal create table as table existence + is checked in open_and_lock_tables(). We may come here as part of + ALTER TABLE when converting a table for a distributed engine to a + a local one. + */ + + /* Log CREATE IF NOT EXISTS on slave for distributed engines */ + if (thd->slave_thread && db_type && + db_type->flags & HTON_IGNORE_UPDATES) + thd->variables.option_bits|= OPTION_BINLOG_THIS; + goto warn; + } + else + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), table_name.str); + goto err; + } + } + } + + THD_STAGE_INFO(thd, stage_creating_table); + + if (check_engine(thd, orig_db.str, orig_table_name.str, create_info)) + goto err; + + if (create_table_mode == C_ASSISTED_DISCOVERY) + { + /* check that it's used correctly */ + DBUG_ASSERT(alter_info->create_list.elements == 0); + DBUG_ASSERT(alter_info->key_list.elements == 0); + + TABLE_SHARE share; + handlerton *hton= create_info->db_type; + int ha_err; + Field *no_fields= 0; + + if (!hton->discover_table_structure) + { + my_error(ER_TABLE_MUST_HAVE_COLUMNS, MYF(0)); + goto err; + } + + init_tmp_table_share(thd, &share, db.str, 0, table_name.str, path.str); + + /* prepare everything for discovery */ + share.field= &no_fields; + share.db_plugin= ha_lock_engine(thd, hton); + share.option_list= create_info->option_list; + share.connect_string= create_info->connect_string; + + if (parse_engine_table_options(thd, hton, &share)) + goto err; + + /* + Log that we are going to do discovery. If things fails, any generated + .frm files are deleted + */ + if (ddl_log_state_create) + ddl_log_create_table(ddl_log_state_create, (handlerton*) 0, &path, + &db, &table_name, 1); + + ha_err= hton->discover_table_structure(hton, thd, &share, create_info); + + /* + if discovery failed, the plugin will be auto-unlocked, as it + was locked on the THD, see above. + if discovery succeeded, the plugin was replaced by a globally + locked plugin, that will be unlocked by free_table_share() + */ + if (ha_err) + share.db_plugin= 0; // will be auto-freed, locked above on the THD + + free_table_share(&share); + + if (ha_err) + { + my_error(ER_GET_ERRNO, MYF(0), ha_err, hton_name(hton)->str); + goto err; + } + } + else + { + if (ddl_log_state_create) + ddl_log_create_table(ddl_log_state_create, create_info->db_type, + &path, &db, &table_name, frm_only); + debug_crash_here("ddl_log_create_before_create_frm"); + + alter_info->db= orig_db; + alter_info->table_name= orig_table_name; + file= mysql_create_frm_image(thd, create_info, alter_info, + create_table_mode, key_info, key_count, frm); + /* + TODO: remove this check of thd->is_error() (now it intercept + errors in some val_*() methods and bring some single place to + such error interception). + */ + if (!file || thd->is_error()) + { + if (!file) + deletefrm(path.str); + goto err; + } + + if (thd->variables.keep_files_on_create) + create_info->options|= HA_CREATE_KEEP_FILES; + + if (file->ha_create_partitioning_metadata(path.str, NULL, CHF_CREATE_FLAG)) + goto err; + + if (!frm_only) + { + debug_crash_here("ddl_log_create_before_create_table"); + if (ha_create_table(thd, path.str, db.str, table_name.str, create_info, + frm, 0)) + { + file->ha_create_partitioning_metadata(path.str, NULL, CHF_DELETE_FLAG); + deletefrm(path.str); + goto err; + } + debug_crash_here("ddl_log_create_after_create_table"); + } + } + + create_info->table= 0; + if (!frm_only && create_info->tmp_table()) + { + TABLE *table= thd->create_and_open_tmp_table(frm, path.str, db.str, + table_name.str, + false); + + if (!table) + { + (void) thd->rm_temporary_table(create_info->db_type, path.str); + goto err; + } + + if (is_trans != NULL) + *is_trans= table->file->has_transactions(); + + thd->used|= THD::THREAD_SPECIFIC_USED; + create_info->table= table; // Store pointer to table + } + + error= 0; +err: + if (unlikely(error) && ddl_log_state_create) + { + /* Table was never created, so we can ignore the ddl log entry */ + ddl_log_complete(ddl_log_state_create); + } + + THD_STAGE_INFO(thd, stage_after_create); + delete file; + DBUG_PRINT("exit", ("return: %d", error)); + DBUG_RETURN(error); + +warn: + error= -1; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_TABLE_EXISTS_ERROR, + ER_THD(thd, ER_TABLE_EXISTS_ERROR), + alias->str); + goto err; +} + +/** + Simple wrapper around create_table_impl() to be used + in various version of CREATE TABLE statement. + + @result + 1 unspecifed error + 2 error; Don't log create statement + 0 ok + -1 Table was used with IF NOT EXISTS and table existed (warning, not error) +*/ + +int mysql_create_table_no_lock(THD *thd, + DDL_LOG_STATE *ddl_log_state_create, + DDL_LOG_STATE *ddl_log_state_rm, + Table_specification_st *create_info, + Alter_info *alter_info, bool *is_trans, + int create_table_mode, TABLE_LIST *table_list) +{ + KEY *not_used_1; + uint not_used_2; + int res; + uint path_length; + char path[FN_REFLEN + 1]; + LEX_CSTRING cpath; + const LEX_CSTRING *db= &table_list->db; + const LEX_CSTRING *table_name= &table_list->table_name; + LEX_CUSTRING frm= {0,0}; + + DBUG_ASSERT(create_info->default_table_charset); + + if (create_info->tmp_table()) + path_length= build_tmptable_filename(thd, path, sizeof(path)); + else + { + const LEX_CSTRING *alias= table_case_name(create_info, table_name); + path_length= build_table_filename(path, sizeof(path) - 1, db->str, + alias->str, + "", 0); + // Check if we hit FN_REFLEN bytes along with file extension. + if (path_length+reg_ext_length > FN_REFLEN) + { + my_error(ER_IDENT_CAUSES_TOO_LONG_PATH, MYF(0), (int) sizeof(path)-1, + path); + return true; + } + } + lex_string_set3(&cpath, path, path_length); + + res= create_table_impl(thd, ddl_log_state_create, ddl_log_state_rm, + *db, *table_name, *db, *table_name, cpath, + *create_info, create_info, + alter_info, create_table_mode, + is_trans, ¬_used_1, ¬_used_2, &frm); + my_free(const_cast(frm.str)); + + if (!res && create_info->sequence) + { + /* Set create_info.table if temporary table */ + if (create_info->tmp_table()) + table_list->table= create_info->table; + else + table_list->table= 0; + res= sequence_insert(thd, thd->lex, table_list); + if (res) + { + DBUG_ASSERT(thd->is_error()); + /* + Drop the new table, we were not completely done. + + Temporarily modify table_list to avoid dropping source sequence + in CREATE TABLE LIKE . + */ + TABLE_LIST *tail= table_list->next_local; + table_list->next_local= NULL; + /* Drop the table as it wasn't completely done */ + if (!mysql_rm_table_no_locks(thd, table_list, &thd->db, + (DDL_LOG_STATE*) 0, + 1, + create_info->tmp_table(), + false, true /* Sequence*/, + true /* Don't log_query */, + true /* Don't free locks */ )) + { + /* + From the user point of view, the table creation failed + We return 2 to indicate that this statement doesn't have + to be logged. + */ + res= 2; + } + table_list->next_local= tail; + } + } + + return res; +} + +#ifdef WITH_WSREP +/** Additional sequence checks for Galera cluster. + +@param thd thread handle +@param seq sequence definition +@retval false success +@retval true failure +*/ +bool wsrep_check_sequence(THD* thd, const sequence_definition *seq) +{ + enum legacy_db_type db_type; + + DBUG_ASSERT(WSREP(thd)); + + if (thd->lex->create_info.used_fields & HA_CREATE_USED_ENGINE) + { + db_type= thd->lex->create_info.db_type->db_type; + } + else + { + const handlerton *hton= ha_default_handlerton(thd); + db_type= hton->db_type; + } + + // In Galera cluster we support only InnoDB sequences + if (db_type != DB_TYPE_INNODB) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "non-InnoDB sequences in Galera cluster"); + return(true); + } + + // In Galera cluster it is best to use INCREMENT BY 0 with CACHE + // or NOCACHE + if (seq && + seq->increment && + seq->cache) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "CACHE without INCREMENT BY 0 in Galera cluster"); + return(true); + } + + return (false); +} +#endif /* WITH_WSREP */ + +/** + Implementation of SQLCOM_CREATE_TABLE. + + Take the metadata locks (including a shared lock on the affected + schema) and create the table. Is written to be called from + mysql_execute_command(), to which it delegates the common parts + with other commands (i.e. implicit commit before and after, + close of thread tables. +*/ + + +static +bool mysql_create_table(THD *thd, TABLE_LIST *create_table, + Table_specification_st *create_info, + Alter_info *alter_info) +{ + TABLE_LIST *pos_in_locked_tables= 0; + MDL_ticket *mdl_ticket= 0; + DDL_LOG_STATE ddl_log_state_create, ddl_log_state_rm; + int create_table_mode; + uint save_thd_create_info_options; + bool is_trans= FALSE; + bool result; + DBUG_ENTER("mysql_create_table"); + + DBUG_ASSERT(create_info->default_table_charset); + + DBUG_ASSERT(create_table == thd->lex->query_tables); + + bzero(&ddl_log_state_create, sizeof(ddl_log_state_create)); + bzero(&ddl_log_state_rm, sizeof(ddl_log_state_rm)); + + /* Copy temporarily the statement flags to thd for lock_table_names() */ + save_thd_create_info_options= thd->lex->create_info.options; + thd->lex->create_info.options|= create_info->options; + + /* Open or obtain an exclusive metadata lock on table being created */ + create_table->db_type= 0; + result= open_and_lock_tables(thd, *create_info, create_table, FALSE, 0); + + thd->lex->create_info.options= save_thd_create_info_options; + + if (result) + { + if (thd->slave_thread && + !thd->is_error() && create_table->db_type && + (create_table->db_type->flags & HTON_IGNORE_UPDATES)) + { + /* Table existed in distributed engine. Log query to binary log */ + result= 0; + goto err; + } + /* is_error() may be 0 if table existed and we generated a warning */ + DBUG_RETURN(thd->is_error()); + } + /* The following is needed only in case of lock tables */ + if ((create_info->table= create_table->table)) + { + pos_in_locked_tables= create_info->table->pos_in_locked_tables; + mdl_ticket= create_table->table->mdl_ticket; + } + + /* Got lock. */ + DEBUG_SYNC(thd, "locked_table_name"); + + if (alter_info->create_list.elements || alter_info->key_list.elements) + create_table_mode= C_ORDINARY_CREATE; + else + create_table_mode= C_ASSISTED_DISCOVERY; + + if (!(thd->variables.option_bits & OPTION_EXPLICIT_DEF_TIMESTAMP)) + promote_first_timestamp_column(&alter_info->create_list); + +#ifdef WITH_WSREP + if (thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE && + WSREP(thd) && wsrep_thd_is_local_toi(thd)) + { + if (wsrep_check_sequence(thd, create_info->seq_create_info)) + DBUG_RETURN(true); + } +#endif /* WITH_WSREP */ + + /* We can abort create table for any table type */ + thd->abort_on_warning= thd->is_strict_mode(); + + if (mysql_create_table_no_lock(thd, &ddl_log_state_create, &ddl_log_state_rm, + create_info, alter_info, &is_trans, + create_table_mode, create_table) > 0) + { + result= 1; + goto err; + } + + /* + Check if we are doing CREATE OR REPLACE TABLE under LOCK TABLES + on a non temporary table + */ + if (thd->locked_tables_mode && pos_in_locked_tables && + create_info->or_replace()) + { + DBUG_ASSERT(thd->variables.option_bits & OPTION_TABLE_LOCK); + /* + Add back the deleted table and re-created table as a locked table + This should always work as we have a meta lock on the table. + */ + thd->locked_tables_list.add_back_last_deleted_lock(pos_in_locked_tables); + if (thd->locked_tables_list.reopen_tables(thd, false)) + { + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + result= 1; + goto err; + } + else + { + TABLE *table= pos_in_locked_tables->table; + table->mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + } + } + +err: + thd->abort_on_warning= 0; + + /* In RBR or readonly server we don't need to log CREATE TEMPORARY TABLE */ + if (!result && create_info->tmp_table() && + (thd->is_current_stmt_binlog_format_row() || (opt_readonly && !thd->slave_thread))) + { + /* Note that table->s->table_creation_was_logged is not set! */ + DBUG_RETURN(result); + } + + if (create_info->tmp_table()) + thd->transaction->stmt.mark_created_temp_table(); + + /* Write log if no error or if we already deleted a table */ + if (!result || thd->log_current_statement()) + { + if (unlikely(result) && create_info->table_was_deleted && + pos_in_locked_tables) + { + /* + Possible locked table was dropped. We should remove meta data locks + associated with it and do UNLOCK_TABLES if no more locked tables. + */ + (void) thd->locked_tables_list.unlock_locked_table(thd, mdl_ticket); + } + else if (likely(!result) && create_info->table) + { + /* + Remember that table creation was logged so that we know if + we should log a delete of it. + If create_info->table was not set, it's a normal table and + table_creation_was_logged will be set when the share is created. + */ + create_info->table->s->table_creation_was_logged= 1; + } + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state_create, thd->binlog_xid); + if (ddl_log_state_rm.is_active()) + ddl_log_update_xid(&ddl_log_state_rm, thd->binlog_xid); + debug_crash_here("ddl_log_create_before_binlog"); + if (unlikely(write_bin_log(thd, result ? FALSE : TRUE, thd->query(), + thd->query_length(), is_trans))) + result= 1; + debug_crash_here("ddl_log_create_after_binlog"); + thd->binlog_xid= 0; + + if (!create_info->tmp_table()) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("CREATE") }; + ddl_log.org_partitioned= (create_info->db_type == partition_hton); + ddl_log.org_storage_engine_name= create_info->new_storage_engine_name; + ddl_log.org_database= create_table->db; + ddl_log.org_table= create_table->table_name; + ddl_log.org_table_id= create_info->tabledef_version; + backup_log_ddl(&ddl_log); + } + } + ddl_log_complete(&ddl_log_state_rm); + ddl_log_complete(&ddl_log_state_create); + DBUG_RETURN(result); +} + + +/* +** Give the key name after the first field with an optional '_#' after + @returns + 0 if keyname does not exists + [1..) index + 1 of duplicate key name +**/ + +static int +check_if_keyname_exists(const char *name, KEY *start, KEY *end) +{ + uint i= 1; + for (KEY *key=start; key != end ; key++, i++) + if (!my_strcasecmp(system_charset_info, name, key->name.str)) + return i; + return 0; +} + +/** + Returns 1 if field name exists otherwise 0 +*/ +static bool +check_if_field_name_exists(const char *name, List * fields) +{ + Create_field *fld; + List_iteratorit(*fields); + while ((fld = it++)) + { + if (!my_strcasecmp(system_charset_info, fld->field_name.str, name)) + return 1; + } + return 0; +} + +static char * +make_unique_key_name(THD *thd, const char *field_name,KEY *start,KEY *end) +{ + char buff[MAX_FIELD_NAME],*buff_end; + + if (!check_if_keyname_exists(field_name,start,end) && + my_strcasecmp(system_charset_info,field_name,primary_key_name.str)) + return (char*) field_name; // Use fieldname + buff_end=strmake(buff,field_name, sizeof(buff)-4); + + /* + Only 3 chars + '\0' left, so need to limit to 2 digit + This is ok as we can't have more than 100 keys anyway + */ + for (uint i=2 ; i< 100; i++) + { + *buff_end= '_'; + int10_to_str(i, buff_end+1, 10); + if (!check_if_keyname_exists(buff,start,end)) + return thd->strdup(buff); + } + return (char*) "not_specified"; // Should never happen +} + +/** + Make an unique name for constraints without a name +*/ + +static bool make_unique_constraint_name(THD *thd, LEX_CSTRING *name, + const char *own_name_base, + List *vcol, + uint *nr) +{ + char buff[MAX_FIELD_NAME], *end; + List_iterator_fast it(*vcol); + end=strmov(buff, own_name_base ? own_name_base : "CONSTRAINT_"); + for (int round= 0;; round++) + { + Virtual_column_info *check; + char *real_end= end; + if (round == 1 && own_name_base) + *end++= '_'; + // if own_base_name provided, try it first + if (round != 0 || !own_name_base) + real_end= int10_to_str((*nr)++, end, 10); + it.rewind(); + while ((check= it++)) + { + if (check->name.str && + !my_strcasecmp(system_charset_info, buff, check->name.str)) + break; + } + if (!check) // Found unique name + { + name->length= (size_t) (real_end - buff); + name->str= strmake_root(thd->stmt_arena->mem_root, buff, name->length); + return (name->str == NULL); + } + } + return FALSE; +} + +/** + INVISIBLE_FULL are internally created. They are completely invisible + to Alter command (Opposite of SYSTEM_INVISIBLE which throws an + error when same name column is added by Alter). So in the case of when + user added a same column name as of INVISIBLE_FULL , we change + INVISIBLE_FULL column name. +*/ +static const +char * make_unique_invisible_field_name(THD *thd, const char *field_name, + List *fields) +{ + if (!check_if_field_name_exists(field_name, fields)) + return field_name; + char buff[MAX_FIELD_NAME], *buff_end; + buff_end= strmake_buf(buff, field_name); + if (buff_end - buff < 5) + return NULL; // Should not happen + + for (uint i=1 ; i < 10000; i++) + { + char *real_end= int10_to_str(i, buff_end, 10); + if (check_if_field_name_exists(buff, fields)) + continue; + return (const char *)thd->strmake(buff, real_end - buff); + } + return NULL; //Should not happen +} + +/**************************************************************************** +** Alter a table definition +****************************************************************************/ + +bool operator!=(const MYSQL_TIME &lhs, const MYSQL_TIME &rhs) +{ + return lhs.year != rhs.year || lhs.month != rhs.month || lhs.day != rhs.day || + lhs.hour != rhs.hour || lhs.minute != rhs.minute || + lhs.second_part != rhs.second_part || lhs.neg != rhs.neg || + lhs.time_type != rhs.time_type; +} + +/** + Rename a table. + + @param base The handlerton handle. + @param old_db The old database name. + @param old_name The old table name. + @param new_db The new database name. + @param new_name The new table name. + @param id Table version id (for ddl log) + @param flags flags + FN_FROM_IS_TMP old_name is temporary. + FN_TO_IS_TMP new_name is temporary. + NO_FRM_RENAME Don't rename the FRM file + but only the table in the storage engine. + NO_HA_TABLE Don't rename table in engine. + NO_FK_CHECKS Don't check FK constraints during rename. + @return false OK + @return true Error +*/ + +bool +mysql_rename_table(handlerton *base, const LEX_CSTRING *old_db, + const LEX_CSTRING *old_name, const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, LEX_CUSTRING *id, uint flags) +{ + THD *thd= current_thd; + char from[FN_REFLEN], to[FN_REFLEN], lc_from[FN_REFLEN], lc_to[FN_REFLEN]; + char *from_base= from, *to_base= to; + handler *file; + int error=0; + ulonglong save_bits= thd->variables.option_bits; + int length; + bool log_query= 0; + DBUG_ENTER("mysql_rename_table"); + DBUG_ASSERT(base); + DBUG_PRINT("enter", ("old: '%s'.'%s' new: '%s'.'%s'", + old_db->str, old_name->str, new_db->str, + new_name->str)); + + // Temporarily disable foreign key checks + if (flags & NO_FK_CHECKS) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + + file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root, base); + + build_table_filename(from, sizeof(from) - 1, old_db->str, old_name->str, "", + flags & FN_FROM_IS_TMP); + length= build_table_filename(to, sizeof(to) - 1, new_db->str, + new_name->str, "", flags & FN_TO_IS_TMP); + // Check if we hit FN_REFLEN bytes along with file extension. + if (length+reg_ext_length > FN_REFLEN) + { + my_error(ER_IDENT_CAUSES_TOO_LONG_PATH, MYF(0), (int) sizeof(to)-1, to); + DBUG_RETURN(TRUE); + } + + if (file && file->needs_lower_case_filenames()) + { + build_lower_case_table_filename(lc_from, sizeof(lc_from) -1, + old_db, old_name, flags & FN_FROM_IS_TMP); + build_lower_case_table_filename(lc_to, sizeof(lc_from) -1, + new_db, new_name, flags & FN_TO_IS_TMP); + from_base= lc_from; + to_base= lc_to; + } + + if (flags & NO_HA_TABLE) + { + if (rename_file_ext(from,to,reg_ext)) + error= my_errno; + log_query= true; + if (file && !(flags & NO_PAR_TABLE)) + (void) file->ha_create_partitioning_metadata(to, from, CHF_RENAME_FLAG); + } + else if (!file || likely(!(error=file->ha_rename_table(from_base, to_base)))) + { + if (!(flags & NO_FRM_RENAME) && unlikely(rename_file_ext(from,to,reg_ext))) + { + error=my_errno; + if (file) + { + if (error == ENOENT) + error= 0; // this is ok if file->ha_rename_table() succeeded + else + file->ha_rename_table(to_base, from_base); // Restore old file name + } + } + else + log_query= true; + } + if (!error && log_query && !(flags & (FN_TO_IS_TMP | FN_FROM_IS_TMP))) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("RENAME") }; + ddl_log.org_partitioned= file->partition_engine(); + ddl_log.new_partitioned= ddl_log.org_partitioned; + lex_string_set(&ddl_log.org_storage_engine_name, file->real_table_type()); + ddl_log.org_database= *old_db; + ddl_log.org_table= *old_name; + ddl_log.org_table_id= *id; + ddl_log.new_storage_engine_name= ddl_log.org_storage_engine_name; + ddl_log.new_database= *new_db; + ddl_log.new_table= *new_name; + ddl_log.new_table_id= *id; + backup_log_ddl(&ddl_log); + } + delete file; + + if (error == HA_ERR_WRONG_COMMAND) + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "ALTER TABLE"); + else if (error == ENOTDIR) + my_error(ER_BAD_DB_ERROR, MYF(0), new_db->str); + else if (error) + my_error(ER_ERROR_ON_RENAME, MYF(0), from, to, error); + else if (!(flags & FN_IS_TMP)) + mysql_audit_rename_table(thd, old_db, old_name, new_db, new_name); + + /* + Remove the old table share from the pfs table share array. The new table + share will be created when the renamed table is first accessed. + */ + if (likely(error == 0)) + { + PSI_CALL_drop_table_share(flags & FN_FROM_IS_TMP, + old_db->str, (uint)old_db->length, + old_name->str, (uint)old_name->length); + } + + // Restore options bits to the original value + thd->variables.option_bits= save_bits; + + DBUG_RETURN(error != 0); +} + + +/* + Create a table identical to the specified table + + SYNOPSIS + mysql_create_like_table() + thd Thread object + table Table list element for target table + src_table Table list element for source table + create_info Create info + + RETURN VALUES + FALSE OK + TRUE error +*/ + +static +bool mysql_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + Table_specification_st *create_info) +{ + Table_specification_st local_create_info; + TABLE_LIST *pos_in_locked_tables= 0; + Alter_info local_alter_info; + Alter_table_ctx local_alter_ctx; // Not used + DDL_LOG_STATE ddl_log_state_create, ddl_log_state_rm; + int res= 1; + bool is_trans= FALSE; + bool do_logging= FALSE; + bool force_generated_create= false; + bool src_table_exists= FALSE; + uint not_used; + int create_res; + DBUG_ENTER("mysql_create_like_table"); + + bzero(&ddl_log_state_create, sizeof(ddl_log_state_create)); + bzero(&ddl_log_state_rm, sizeof(ddl_log_state_rm)); + +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier && + wsrep_create_like_table(thd, table, src_table, create_info)) + { + DBUG_RETURN(res); + } +#endif + + /* + We the open source table to get its description in HA_CREATE_INFO + and Alter_info objects. This also acquires a shared metadata lock + on this table which ensures that no concurrent DDL operation will + mess with it. + Also in case when we create non-temporary table open_tables() + call obtains an exclusive metadata lock on target table ensuring + that we can safely perform table creation. + Thus by holding both these locks we ensure that our statement is + properly isolated from all concurrent operations which matter. + */ + + res= open_tables(thd, *create_info, &thd->lex->query_tables, ¬_used, 0); + + if (res) + { + /* is_error() may be 0 if table existed and we generated a warning */ + res= thd->is_error(); + src_table_exists= !res; + goto err; + } + /* Ensure we don't try to create something from which we select from */ + if (create_info->or_replace() && !create_info->tmp_table()) + { + TABLE_LIST *duplicate; + if ((duplicate= unique_table(thd, table, src_table, 0))) + { + update_non_unique_table_error(src_table, "CREATE", duplicate); + res= 1; + goto err; + } + } + + src_table->table->use_all_columns(); + + DEBUG_SYNC(thd, "create_table_like_after_open"); + + /* + Fill Table_specification_st and Alter_info with the source table description. + Set OR REPLACE and IF NOT EXISTS option as in the CREATE TABLE LIKE + statement. + */ + local_create_info.init(create_info->create_like_options()); + local_create_info.db_type= src_table->table->s->db_type(); + local_create_info.row_type= src_table->table->s->row_type; + local_create_info.alter_info= &local_alter_info; + /* + This statement: + CREATE TABLE t1 LIKE t2 + does not support table charset/collation clauses. + No needs to copy. Assert they are empty. + */ + DBUG_ASSERT(create_info->default_charset_collation.is_empty()); + DBUG_ASSERT(create_info->convert_charset_collation.is_empty()); + if (mysql_prepare_alter_table(thd, src_table->table, &local_create_info, + &local_alter_info, &local_alter_ctx)) + goto err; +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* Partition info is not handled by mysql_prepare_alter_table() call. */ + if (src_table->table->part_info) + { + /* + The CREATE TABLE LIKE should not inherit the DATA DIRECTORY + and INDEX DIRECTORY from the base table. + So that TRUE argument for the get_clone. + */ + thd->work_part_info= src_table->table->part_info->get_clone(thd, TRUE); + } +#endif /*WITH_PARTITION_STORAGE_ENGINE*/ + + /* + Adjust description of source table before using it for creation of + target table. + + Similarly to SHOW CREATE TABLE we ignore MAX_ROWS attribute of + temporary table which represents I_S table. + */ + if (src_table->schema_table) + local_create_info.max_rows= 0; + /* Replace type of source table with one specified in the statement. */ + local_create_info.options&= ~HA_LEX_CREATE_TMP_TABLE; + local_create_info.options|= create_info->options; + /* Reset auto-increment counter for the new table. */ + local_create_info.auto_increment_value= 0; + /* + Do not inherit values of DATA and INDEX DIRECTORY options from + the original table. This is documented behavior. + */ + local_create_info.data_file_name= local_create_info.index_file_name= NULL; + + if (src_table->table->versioned() && + local_create_info.vers_info.fix_create_like(local_alter_info, local_create_info, + *src_table, *table)) + { + goto err; + } + + /* The following is needed only in case of lock tables */ + if ((local_create_info.table= thd->lex->query_tables->table)) + pos_in_locked_tables= local_create_info.table->pos_in_locked_tables; + + res= ((create_res= + mysql_create_table_no_lock(thd, + &ddl_log_state_create, &ddl_log_state_rm, + &local_create_info, &local_alter_info, + &is_trans, C_ORDINARY_CREATE, + table)) > 0); + /* Remember to log if we deleted something */ + do_logging= thd->log_current_statement(); + if (res) + goto err; + + /* + Check if we are doing CREATE OR REPLACE TABLE under LOCK TABLES + on a non temporary table + */ + if (thd->locked_tables_mode && pos_in_locked_tables && + create_info->or_replace()) + { + /* + Add back the deleted table and re-created table as a locked table + This should always work as we have a meta lock on the table. + */ + thd->locked_tables_list.add_back_last_deleted_lock(pos_in_locked_tables); + if (thd->locked_tables_list.reopen_tables(thd, false)) + { + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + res= 1; // We got an error + } + else + { + /* + Get pointer to the newly opened table. We need this to ensure we + don't reopen the table when doing statment logging below. + */ + table->table= pos_in_locked_tables->table; + table->table->mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + } + } + else + { + /* + Ensure that we have an exclusive lock on target table if we are creating + non-temporary table. We don't have or need the lock if the create failed + because of existing table when using "if exists". + */ + DBUG_ASSERT((create_info->tmp_table()) || create_res < 0 || + thd->mdl_context.is_lock_owner(MDL_key::TABLE, table->db.str, + table->table_name.str, + MDL_EXCLUSIVE) || + (thd->locked_tables_mode && pos_in_locked_tables && + create_info->if_not_exists())); + } + + DEBUG_SYNC(thd, "create_table_like_before_binlog"); + + /* + We have to write the query before we unlock the tables. + */ + if (thd->is_current_stmt_binlog_disabled()) + goto err; + +#ifdef ENABLE_WHEN_S3_CAN_CREATE_TABLES + /* + If we do a create based on a shared table, log the full create of the + resulting table. This is needed as a shared table may look different + when the slave executes the command. + */ + force_generated_create= + (((src_table->table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) && + src_table->table->s->db_type() != local_create_info.db_type)); +#endif + + if (thd->is_current_stmt_binlog_format_row() || force_generated_create) + { + /* + Since temporary tables are not replicated under row-based + replication, CREATE TABLE ... LIKE ... needs special + treatement. We have some cases to consider, according to the + following decision table: + + ==== ========= ========= ============================== + Case Target Source Write to binary log + ==== ========= ========= ============================== + 1 normal normal Original statement + 2 normal temporary Generated statement if the table + was created. + 3 temporary normal Nothing + 4 temporary temporary Nothing + 5 any shared Generated statement if the table + was created if engine changed + ==== ========= ========= ============================== + */ + if (!(create_info->tmp_table()) || force_generated_create) + { + // Case 2 & 5 + if (src_table->table->s->tmp_table || force_generated_create) + { + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + Open_table_context ot_ctx(thd, MYSQL_OPEN_REOPEN | + MYSQL_OPEN_IGNORE_KILLED); + bool new_table= FALSE; // Whether newly created table is open. + + if (create_res != 0) + { + /* + Table or view with same name already existed and we where using + IF EXISTS. Continue without logging anything. + */ + do_logging= 0; + goto err; + } + if (!table->table) + { + TABLE_LIST::enum_open_strategy save_open_strategy; + int open_res; + /* Force the newly created table to be opened */ + save_open_strategy= table->open_strategy; + table->open_strategy= TABLE_LIST::OPEN_NORMAL; + + /* + In order for show_create_table() to work we need to open + destination table if it is not already open (i.e. if it + has not existed before). We don't need acquire metadata + lock in order to do this as we already hold exclusive + lock on this table. The table will be closed by + close_thread_table() at the end of this branch. + */ + open_res= open_table(thd, table, &ot_ctx); + /* Restore */ + table->open_strategy= save_open_strategy; + if (open_res) + { + res= 1; + goto err; + } + new_table= TRUE; + } + /* + We have to re-test if the table was a view as the view may not + have been opened until just above. + */ + if (!table->view) + { + /* + After opening a MERGE table add the children to the query list of + tables, so that children tables info can be used on "CREATE TABLE" + statement generation by the binary log. + Note that placeholders don't have the handler open. + */ + if (table->table->file->extra(HA_EXTRA_ADD_CHILDREN_LIST)) + goto err; + + /* + As the reference table is temporary and may not exist on slave, we + must force the ENGINE to be present into CREATE TABLE. + */ + create_info->used_fields|= HA_CREATE_USED_ENGINE; + + int result __attribute__((unused))= + show_create_table(thd, table, &query, create_info, WITH_DB_NAME); + + DBUG_ASSERT(result == 0); // show_create_table() always return 0 + do_logging= FALSE; + if (write_bin_log(thd, TRUE, query.ptr(), query.length())) + { + res= 1; + goto err; + } + + if (new_table) + { + DBUG_ASSERT(thd->open_tables == table->table); + /* + When opening the table, we ignored the locked tables + (MYSQL_OPEN_GET_NEW_TABLE). Now we can close the table + without risking to close some locked table. + */ + close_thread_table(thd, &thd->open_tables); + } + } + } + else // Case 1 + do_logging= TRUE; + } + /* + Case 3 and 4 does nothing under RBR + */ + } + else + { + DBUG_PRINT("info", + ("res: %d tmp_table: %d create_info->table: %p", + res, create_info->tmp_table(), local_create_info.table)); + if (create_info->tmp_table()) + { + thd->transaction->stmt.mark_created_temp_table(); + if (!res && local_create_info.table) + { + /* + Remember that tmp table creation was logged so that we know if + we should log a delete of it. + */ + local_create_info.table->s->table_creation_was_logged= 1; + } + } + do_logging= TRUE; + } + +err: + if (do_logging) + { + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state_create, thd->binlog_xid); + if (ddl_log_state_rm.is_active()) + ddl_log_update_xid(&ddl_log_state_rm, thd->binlog_xid); + debug_crash_here("ddl_log_create_before_binlog"); + if (res && create_info->table_was_deleted) + { + /* + Table was not deleted. Original table was deleted. + We have to log it. + */ + DBUG_ASSERT(ddl_log_state_rm.is_active()); + log_drop_table(thd, &table->db, &table->table_name, + &create_info->org_storage_engine_name, + create_info->db_type == partition_hton, + &create_info->org_tabledef_version, + create_info->tmp_table()); + } + else if (res != 2) // Table was not dropped + { + if (write_bin_log(thd, res ? FALSE : TRUE, thd->query(), + thd->query_length(), is_trans)) + res= 1; + } + debug_crash_here("ddl_log_create_after_binlog"); + thd->binlog_xid= 0; + } + + if (!res && !src_table_exists && !create_info->tmp_table()) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("CREATE") }; + ddl_log.org_storage_engine_name= local_create_info.new_storage_engine_name; + ddl_log.org_database= table->db; + ddl_log.org_table= table->table_name; + ddl_log.org_table_id= local_create_info.tabledef_version; + backup_log_ddl(&ddl_log); + } + + ddl_log_complete(&ddl_log_state_rm); + ddl_log_complete(&ddl_log_state_create); + DBUG_RETURN(res != 0); +} + + +/* table_list should contain just one table */ +int mysql_discard_or_import_tablespace(THD *thd, + TABLE_LIST *table_list, + bool discard) +{ + Alter_table_prelocking_strategy alter_prelocking_strategy; + int error; + DBUG_ENTER("mysql_discard_or_import_tablespace"); + + mysql_audit_alter_table(thd, table_list); + + /* + Note that DISCARD/IMPORT TABLESPACE always is the only operation in an + ALTER TABLE + */ + + THD_STAGE_INFO(thd, stage_discard_or_import_tablespace); + + /* + We set this flag so that ha_innobase::open and ::external_lock() do + not complain when we lock the table + */ + thd->tablespace_op= TRUE; + /* + Adjust values of table-level and metadata which was set in parser + for the case general ALTER TABLE. + */ + table_list->mdl_request.set_type(MDL_EXCLUSIVE); + table_list->lock_type= TL_WRITE; + /* Do not open views. */ + table_list->required_type= TABLE_TYPE_NORMAL; + + if (open_and_lock_tables(thd, table_list, FALSE, 0, + &alter_prelocking_strategy)) + { + thd->tablespace_op=FALSE; + DBUG_RETURN(-1); + } + + error= table_list->table->file->ha_discard_or_import_tablespace(discard); + + THD_STAGE_INFO(thd, stage_end); + + if (unlikely(error)) + goto err; + + if (discard) + table_list->table->s->tdc->flush(thd, true); + + /* + The 0 in the call below means 'not in a transaction', which means + immediate invalidation; that is probably what we wish here + */ + query_cache_invalidate3(thd, table_list, 0); + + /* The ALTER TABLE is always in its own transaction */ + error= trans_commit_stmt(thd); + if (unlikely(trans_commit_implicit(thd))) + error=1; + if (likely(!error)) + error= write_bin_log(thd, FALSE, thd->query(), thd->query_length()); + +err: + thd->tablespace_op=FALSE; + + if (likely(error == 0)) + { + my_ok(thd); + DBUG_RETURN(0); + } + + table_list->table->file->print_error(error, MYF(0)); + + DBUG_RETURN(-1); +} + + +/** + Check if key is a candidate key, i.e. a unique index with no index + fields partial or nullable. +*/ + +static bool is_candidate_key(KEY *key) +{ + KEY_PART_INFO *key_part; + KEY_PART_INFO *key_part_end= key->key_part + key->user_defined_key_parts; + + if (!(key->flags & HA_NOSAME) || (key->flags & HA_NULL_PART_KEY) || + (key->flags & HA_KEY_HAS_PART_KEY_SEG)) + return false; + + for (key_part= key->key_part; key_part < key_part_end; key_part++) + { + if (key_part->key_part_flag & HA_PART_KEY_SEG) + return false; + } + return true; +} + + +/* + Preparation for table creation + + SYNOPSIS + handle_if_exists_option() + thd Thread object. + table The altered table. + alter_info List of columns and indexes to create + period_info Application-time period info + + DESCRIPTION + Looks for the IF [NOT] EXISTS options, checks the states and remove items + from the list if existing found. + + RETURN VALUES + TRUE error + FALSE OK +*/ + +static bool +handle_if_exists_options(THD *thd, TABLE *table, Alter_info *alter_info, + Table_period_info *period_info) +{ + Field **f_ptr; + DBUG_ENTER("handle_if_exists_option"); + + /* Handle ADD COLUMN IF NOT EXISTS. */ + { + List_iterator it(alter_info->create_list); + Create_field *sql_field; + + while ((sql_field=it++)) + { + if (!sql_field->create_if_not_exists || sql_field->change.str) + continue; + /* + If there is a field with the same name in the table already, + remove the sql_field from the list. + */ + for (f_ptr=table->field; *f_ptr; f_ptr++) + { + if (lex_string_cmp(system_charset_info, + &sql_field->field_name, + &(*f_ptr)->field_name) == 0) + goto drop_create_field; + } + { + /* + If in the ADD list there is a field with the same name, + remove the sql_field from the list. + */ + List_iterator chk_it(alter_info->create_list); + Create_field *chk_field; + while ((chk_field= chk_it++) && chk_field != sql_field) + { + if (lex_string_cmp(system_charset_info, + &sql_field->field_name, + &chk_field->field_name) == 0) + goto drop_create_field; + } + } + continue; +drop_create_field: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DUP_FIELDNAME, ER_THD(thd, ER_DUP_FIELDNAME), + sql_field->field_name.str); + it.remove(); + if (alter_info->create_list.is_empty()) + { + alter_info->flags&= ~ALTER_PARSER_ADD_COLUMN; + if (alter_info->key_list.is_empty()) + alter_info->flags&= ~(ALTER_ADD_INDEX | ALTER_ADD_FOREIGN_KEY); + } + } + } + + /* Handle MODIFY COLUMN IF EXISTS. */ + { + List_iterator it(alter_info->create_list); + Create_field *sql_field; + + while ((sql_field=it++)) + { + if (!sql_field->create_if_not_exists || !sql_field->change.str) + continue; + /* + If there is NO field with the same name in the table already, + remove the sql_field from the list. + */ + for (f_ptr=table->field; *f_ptr; f_ptr++) + { + if (lex_string_cmp(system_charset_info, + &sql_field->change, + &(*f_ptr)->field_name) == 0) + { + break; + } + } + if (unlikely(*f_ptr == NULL)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_BAD_FIELD_ERROR, + ER_THD(thd, ER_BAD_FIELD_ERROR), + sql_field->change.str, table->s->table_name.str); + it.remove(); + if (alter_info->create_list.is_empty()) + { + alter_info->flags&= ~(ALTER_PARSER_ADD_COLUMN | ALTER_CHANGE_COLUMN); + if (alter_info->key_list.is_empty()) + alter_info->flags&= ~ALTER_ADD_INDEX; + } + } + } + } + + /* Handle ALTER/RENAME COLUMN IF EXISTS. */ + { + List_iterator it(alter_info->alter_list); + Alter_column *acol; + + while ((acol=it++)) + { + if (!acol->alter_if_exists) + continue; + /* + If there is NO field with the same name in the table already, + remove the acol from the list. + */ + for (f_ptr=table->field; *f_ptr; f_ptr++) + { + if (my_strcasecmp(system_charset_info, + acol->name.str, (*f_ptr)->field_name.str) == 0) + break; + } + if (unlikely(*f_ptr == NULL)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_BAD_FIELD_ERROR, + ER_THD(thd, ER_BAD_FIELD_ERROR), + acol->name.str, table->s->table_name.str); + it.remove(); + if (alter_info->alter_list.is_empty()) + { + alter_info->flags&= ~(ALTER_CHANGE_COLUMN_DEFAULT); + } + } + } + } + + /* Handle DROP COLUMN/KEY IF EXISTS. */ + { + List_iterator drop_it(alter_info->drop_list); + Alter_drop *drop; + bool remove_drop; + ulonglong left_flags= 0; + while ((drop= drop_it++)) + { + ulonglong cur_flag= 0; + switch (drop->type) { + case Alter_drop::COLUMN: + cur_flag= ALTER_PARSER_DROP_COLUMN; + break; + case Alter_drop::FOREIGN_KEY: + cur_flag= ALTER_DROP_FOREIGN_KEY; + break; + case Alter_drop::KEY: + cur_flag= ALTER_DROP_INDEX; + break; + default: + break; + } + if (!drop->drop_if_exists) + { + left_flags|= cur_flag; + continue; + } + remove_drop= TRUE; + if (drop->type == Alter_drop::COLUMN) + { + /* + If there is NO field with that name in the table, + remove the 'drop' from the list. + */ + for (f_ptr=table->field; *f_ptr; f_ptr++) + { + if (my_strcasecmp(system_charset_info, + drop->name, (*f_ptr)->field_name.str) == 0) + { + remove_drop= FALSE; + break; + } + } + } + else if (drop->type == Alter_drop::CHECK_CONSTRAINT) + { + for (uint i=table->s->field_check_constraints; + i < table->s->table_check_constraints; + i++) + { + if (my_strcasecmp(system_charset_info, drop->name, + table->check_constraints[i]->name.str) == 0) + { + remove_drop= FALSE; + break; + } + } + } + else if (drop->type == Alter_drop::PERIOD) + { + if (table->s->period.name.streq(drop->name)) + remove_drop= FALSE; + } + else /* Alter_drop::KEY and Alter_drop::FOREIGN_KEY */ + { + uint n_key; + if (drop->type != Alter_drop::FOREIGN_KEY) + { + for (n_key=0; n_key < table->s->keys; n_key++) + { + if (my_strcasecmp(system_charset_info, + drop->name, + table->key_info[n_key].name.str) == 0) + { + remove_drop= FALSE; + break; + } + } + } + else + { + List fk_child_key_list; + FOREIGN_KEY_INFO *f_key; + table->file->get_foreign_key_list(thd, &fk_child_key_list); + List_iterator fk_key_it(fk_child_key_list); + while ((f_key= fk_key_it++)) + { + if (my_strcasecmp(system_charset_info, f_key->foreign_id->str, + drop->name) == 0) + { + remove_drop= FALSE; + break; + } + } + } + } + + if (!remove_drop) + { + /* + Check if the name appears twice in the DROP list. + */ + List_iterator chk_it(alter_info->drop_list); + Alter_drop *chk_drop; + while ((chk_drop= chk_it++) && chk_drop != drop) + { + if (drop->type == chk_drop->type && + my_strcasecmp(system_charset_info, + drop->name, chk_drop->name) == 0) + { + remove_drop= TRUE; + break; + } + } + } + + if (remove_drop) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_CANT_DROP_FIELD_OR_KEY, + ER_THD(thd, ER_CANT_DROP_FIELD_OR_KEY), + drop->type_name(), drop->name); + drop_it.remove(); + } + else + left_flags|= cur_flag; + } + /* Reset state to what's left in drop list */ + alter_info->flags&= ~(ALTER_PARSER_DROP_COLUMN | + ALTER_DROP_INDEX | + ALTER_DROP_FOREIGN_KEY); + alter_info->flags|= left_flags; + } + + /* Handle RENAME KEY IF EXISTS. */ + { + List_iterator rename_key_it(alter_info->alter_rename_key_list); + Alter_rename_key *rename_key; + while ((rename_key= rename_key_it++)) + { + if (!rename_key->alter_if_exists) + continue; + bool exists= false; + for (uint n_key= 0; n_key < table->s->keys; n_key++) + { + if (my_strcasecmp(system_charset_info, + rename_key->old_name.str, + table->key_info[n_key].name.str) == 0) + { + exists= true; + break; + } + } + if (exists) + continue; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_KEY_DOES_NOT_EXISTS, + ER_THD(thd, ER_KEY_DOES_NOT_EXISTS), + rename_key->old_name.str, table->s->table_name.str); + rename_key_it.remove(); + } + } + /* Handle ALTER KEY IF EXISTS. */ + { + List_iterator ignor_it(alter_info->alter_index_ignorability_list); + Alter_index_ignorability *aii; + while ((aii= ignor_it++)) + { + if (!aii->if_exists()) + continue; + bool exists= false; + for (uint n_key= 0; n_key < table->s->keys; n_key++) + { + if (my_strcasecmp(system_charset_info, aii->name(), + table->key_info[n_key].name.str) == 0) + { + exists= true; + break; + } + } + if (exists) + continue; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_KEY_DOES_NOT_EXISTS, + ER_THD(thd, ER_KEY_DOES_NOT_EXISTS), + aii->name(), table->s->table_name.str); + ignor_it.remove(); + } + } + /* ALTER TABLE ADD KEY IF NOT EXISTS */ + /* ALTER TABLE ADD FOREIGN KEY IF NOT EXISTS */ + { + Key *key; + List_iterator key_it(alter_info->key_list); + uint n_key; + const char *keyname= NULL; + while ((key=key_it++)) + { + if (!key->if_not_exists() && !key->or_replace()) + continue; + + /* Check if the table already has a PRIMARY KEY */ + bool dup_primary_key= + key->type == Key::PRIMARY && + table->s->primary_key != MAX_KEY && + (keyname= table->s->key_info[table->s->primary_key].name.str) && + my_strcasecmp(system_charset_info, keyname, primary_key_name.str) == 0; + if (dup_primary_key) + goto remove_key; + + /* If the name of the key is not specified, */ + /* let us check the name of the first key part. */ + if ((keyname= key->name.str) == NULL) + { + if (key->type == Key::PRIMARY) + keyname= primary_key_name.str; + else + { + List_iterator part_it(key->columns); + Key_part_spec *kp; + if ((kp= part_it++)) + keyname= kp->field_name.str; + if (keyname == NULL) + continue; + } + } + if (key->type != Key::FOREIGN_KEY) + { + for (n_key=0; n_key < table->s->keys; n_key++) + { + if (my_strcasecmp(system_charset_info, + keyname, table->key_info[n_key].name.str) == 0) + { + goto remove_key; + } + } + } + else + { + List fk_child_key_list; + FOREIGN_KEY_INFO *f_key; + table->file->get_foreign_key_list(thd, &fk_child_key_list); + List_iterator fk_key_it(fk_child_key_list); + while ((f_key= fk_key_it++)) + { + if (my_strcasecmp(system_charset_info, f_key->foreign_id->str, + keyname) == 0) + goto remove_key; + } + } + + { + Key *chk_key; + List_iterator chk_it(alter_info->key_list); + const char *chkname; + while ((chk_key=chk_it++) && chk_key != key) + { + if ((chkname= chk_key->name.str) == NULL) + { + List_iterator part_it(chk_key->columns); + Key_part_spec *kp; + if ((kp= part_it++)) + chkname= kp->field_name.str; + if (chkname == NULL) + continue; + } + if (key->type == chk_key->type && + my_strcasecmp(system_charset_info, keyname, chkname) == 0) + goto remove_key; + } + } + continue; + +remove_key: + if (key->if_not_exists()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DUP_KEYNAME, ER_THD(thd, dup_primary_key + ? ER_MULTIPLE_PRI_KEY : ER_DUP_KEYNAME), keyname); + key_it.remove(); + if (key->type == Key::FOREIGN_KEY) + { + /* ADD FOREIGN KEY appends two items. */ + key_it.remove(); + } + if (alter_info->key_list.is_empty()) + alter_info->flags&= ~(ALTER_ADD_INDEX | ALTER_ADD_FOREIGN_KEY); + } + else + { + DBUG_ASSERT(key->or_replace()); + Alter_drop::drop_type type= (key->type == Key::FOREIGN_KEY) ? + Alter_drop::FOREIGN_KEY : Alter_drop::KEY; + Alter_drop *ad= new (thd->mem_root) Alter_drop(type, key->name.str, FALSE); + if (ad != NULL) + { + // Adding the index into the drop list for replacing + alter_info->flags |= ALTER_DROP_INDEX; + alter_info->drop_list.push_back(ad, thd->mem_root); + } + } + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *tab_part_info= table->part_info; + thd->work_part_info= thd->lex->part_info; + if (tab_part_info) + { + /* ALTER TABLE ADD PARTITION IF NOT EXISTS */ + if ((alter_info->partition_flags & ALTER_PARTITION_ADD) && + thd->lex->create_info.if_not_exists()) + { + partition_info *alt_part_info= thd->lex->part_info; + if (alt_part_info) + { + List_iterator new_part_it(alt_part_info->partitions); + partition_element *pe; + while ((pe= new_part_it++)) + { + if (!tab_part_info->has_unique_name(pe)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_SAME_NAME_PARTITION, + ER_THD(thd, ER_SAME_NAME_PARTITION), + pe->partition_name); + alter_info->partition_flags&= ~ALTER_PARTITION_ADD; + thd->work_part_info= NULL; + break; + } + } + } + } + /* ALTER TABLE DROP PARTITION IF EXISTS */ + if ((alter_info->partition_flags & ALTER_PARTITION_DROP) && + thd->lex->if_exists()) + { + List_iterator names_it(alter_info->partition_names); + const char *name; + + while ((name= names_it++)) + { + List_iterator part_it(tab_part_info->partitions); + partition_element *part_elem; + while ((part_elem= part_it++)) + { + if (my_strcasecmp(system_charset_info, + part_elem->partition_name, name) == 0) + break; + } + if (!part_elem) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_PARTITION_DOES_NOT_EXIST, + ER_THD(thd, ER_PARTITION_DOES_NOT_EXIST)); + names_it.remove(); + } + } + if (alter_info->partition_names.elements == 0) + alter_info->partition_flags&= ~ALTER_PARTITION_DROP; + } + } +#endif /*WITH_PARTITION_STORAGE_ENGINE*/ + + /* ADD CONSTRAINT IF NOT EXISTS. */ + { + List_iterator it(alter_info->check_constraint_list); + Virtual_column_info *check; + TABLE_SHARE *share= table->s; + uint c; + + while ((check=it++)) + { + if (!check->if_not_exists && check->name.length) + continue; + for (c= share->field_check_constraints; + c < share->table_check_constraints ; c++) + { + Virtual_column_info *dup= table->check_constraints[c]; + if (dup->name.length == check->name.length && + lex_string_cmp(system_charset_info, + &check->name, &dup->name) == 0) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DUP_CONSTRAINT_NAME, ER_THD(thd, ER_DUP_CONSTRAINT_NAME), + "CHECK", check->name.str); + it.remove(); + if (alter_info->check_constraint_list.elements == 0) + alter_info->flags&= ~ALTER_ADD_CHECK_CONSTRAINT; + + break; + } + } + } + } + + /* ADD PERIOD */ + + if (period_info->create_if_not_exists && table->s->period.name + && table->s->period.name.streq(period_info->name)) + { + DBUG_ASSERT(period_info->is_set()); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DUP_FIELDNAME, ER_THD(thd, ER_DUP_FIELDNAME), + period_info->name.str, table->s->table_name.str); + + List_iterator vit(alter_info->check_constraint_list); + while (vit++ != period_info->constr) + { + // do nothing + } + vit.remove(); + + *period_info= {}; + } + + DBUG_RETURN(false); +} + + +static bool fix_constraints_names(THD *thd, List + *check_constraint_list, + const HA_CREATE_INFO *create_info) +{ + List_iterator it((*check_constraint_list)); + Virtual_column_info *check; + uint nr= 1; + DBUG_ENTER("fix_constraints_names"); + if (!check_constraint_list) + DBUG_RETURN(FALSE); + // Prevent accessing freed memory during generating unique names + while ((check=it++)) + { + if (check->automatic_name) + { + check->name.str= NULL; + check->name.length= 0; + } + } + it.rewind(); + // Generate unique names if needed + while ((check=it++)) + { + if (!check->name.length) + { + check->automatic_name= TRUE; + + const char *own_name_base= create_info->period_info.constr == check + ? create_info->period_info.name.str : NULL; + + if (make_unique_constraint_name(thd, &check->name, + own_name_base, + check_constraint_list, + &nr)) + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +static int compare_uint(const uint *s, const uint *t) +{ + return (*s < *t) ? -1 : ((*s > *t) ? 1 : 0); +} + +static Compare_keys merge(Compare_keys current, Compare_keys add) { + if (current == Compare_keys::Equal) + return add; + + if (add == Compare_keys::Equal) + return current; + + if (current == add) + return current; + + if (current == Compare_keys::EqualButComment) { + return Compare_keys::NotEqual; + } + + if (current == Compare_keys::EqualButKeyPartLength) { + if (add == Compare_keys::EqualButComment) + return Compare_keys::NotEqual; + DBUG_ASSERT(add == Compare_keys::NotEqual); + return Compare_keys::NotEqual; + } + + DBUG_ASSERT(current == Compare_keys::NotEqual); + return current; +} + +Compare_keys compare_keys_but_name(const KEY *table_key, const KEY *new_key, + Alter_info *alter_info, const TABLE *table, + const KEY *const new_pk, + const KEY *const old_pk) +{ + if (table_key->algorithm != new_key->algorithm) + return Compare_keys::NotEqual; + + if ((table_key->flags & HA_KEYFLAG_MASK) != + (new_key->flags & HA_KEYFLAG_MASK)) + return Compare_keys::NotEqual; + + if (table_key->user_defined_key_parts != new_key->user_defined_key_parts) + return Compare_keys::NotEqual; + + if (table_key->block_size != new_key->block_size) + return Compare_keys::NotEqual; + + /* + Rebuild the index if following condition get satisfied: + + (i) Old table doesn't have primary key, new table has it and vice-versa + (ii) Primary key changed to another existing index + */ + if ((new_key == new_pk) != (table_key == old_pk)) + return Compare_keys::NotEqual; + + if (engine_options_differ(table_key->option_struct, new_key->option_struct, + table->file->ht->index_options)) + return Compare_keys::NotEqual; + + Compare_keys result= Compare_keys::Equal; + + for (const KEY_PART_INFO * + key_part= table_key->key_part, + *new_part= new_key->key_part, + *end= table_key->key_part + table_key->user_defined_key_parts; + key_part < end; key_part++, new_part++) + { + /* + For prefix keys KEY_PART_INFO::field points to cloned Field + object with adjusted length. So below we have to check field + indexes instead of simply comparing pointers to Field objects. + */ + const Create_field &new_field= + *alter_info->create_list.elem(new_part->fieldnr); + + if (!new_field.field || + new_field.field->field_index != key_part->fieldnr - 1) + { + return Compare_keys::NotEqual; + } + + /* + Check the descending flag for index field. + */ + if ((new_part->key_part_flag ^ key_part->key_part_flag) & HA_REVERSE_SORT) + { + return Compare_keys::NotEqual; + } + + auto compare= table->file->compare_key_parts( + *table->field[key_part->fieldnr - 1], new_field, *key_part, *new_part); + result= merge(result, compare); + } + + /* Check that key comment is not changed. */ + if (cmp(table_key->comment, new_key->comment) != 0) + result= merge(result, Compare_keys::EqualButComment); + + return result; +} + + +/** + Look-up KEY object by index name using case-insensitive comparison. + + @param key_name Index name. + @param key_start Start of array of KEYs for table. + @param key_end End of array of KEYs for table. + + @note Case-insensitive comparison is necessary to correctly + handle renaming of keys. + + @retval non-NULL - pointer to KEY object for index found. + @retval NULL - no index with such name found (or it is marked + as renamed). +*/ + +static KEY *find_key_ci(const char *key_name, KEY *key_start, KEY *key_end) +{ + for (KEY *key = key_start; key < key_end; key++) + { + if (!my_strcasecmp(system_charset_info, key_name, key->name.str)) + return key; + } + return NULL; +} + + +/** + Compare original and new versions of a table and fill Alter_inplace_info + describing differences between those versions. + + @param thd Thread + @param table The original table. + @param varchar Indicates that new definition has new + VARCHAR column. + @param[in/out] ha_alter_info Data structure which already contains + basic information about create options, + field and keys for the new version of + table and which should be completed with + more detailed information needed for + in-place ALTER. + + First argument 'table' contains information of the original + table, which includes all corresponding parts that the new + table has in arguments create_list, key_list and create_info. + + Compare the changes between the original and new table definitions. + The result of this comparison is then passed to SE which determines + whether it can carry out these changes in-place. + + Mark any changes detected in the ha_alter_flags. + We generally try to specify handler flags only if there are real + changes. But in cases when it is cumbersome to determine if some + attribute has really changed we might choose to set flag + pessimistically, for example, relying on parser output only. + + If there are no data changes, but index changes, 'index_drop_buffer' + and/or 'index_add_buffer' are populated with offsets into + table->key_info or key_info_buffer respectively for the indexes + that need to be dropped and/or (re-)created. + + Note that this function assumes that it is OK to change Alter_info + and HA_CREATE_INFO which it gets. It is caller who is responsible + for creating copies for this structures if he needs them unchanged. + + @retval true error + @retval false success +*/ + +static bool fill_alter_inplace_info(THD *thd, TABLE *table, bool varchar, + Alter_inplace_info *ha_alter_info) +{ + Field **f_ptr, *field; + List_iterator_fast new_field_it; + Create_field *new_field; + Alter_info *alter_info= ha_alter_info->alter_info; + DBUG_ENTER("fill_alter_inplace_info"); + DBUG_PRINT("info", ("alter_info->flags: %llu", alter_info->flags)); + + /* Allocate result buffers. */ + DBUG_ASSERT(ha_alter_info->rename_keys.mem_root() == thd->mem_root); + if (! (ha_alter_info->index_drop_buffer= + (KEY**) thd->alloc(sizeof(KEY*) * table->s->keys)) || + ! (ha_alter_info->index_add_buffer= + (uint*) thd->alloc(sizeof(uint) * + alter_info->key_list.elements)) || + ha_alter_info->rename_keys.reserve(ha_alter_info->index_add_count) || + ! (ha_alter_info->index_altered_ignorability_buffer= + (KEY_PAIR*)thd->alloc(sizeof(KEY_PAIR) * + alter_info->alter_index_ignorability_list.elements))) + DBUG_RETURN(true); + + /* + Copy parser flags, but remove some flags that handlers doesn't + need to care about (old engines may not ignore these parser flags). + ALTER_RENAME_COLUMN is replaced by ALTER_COLUMN_NAME. + ALTER_CHANGE_COLUMN_DEFAULT is replaced by ALTER_CHANGE_COLUMN + ALTER_PARSE_ADD_COLUMN, ALTER_PARSE_DROP_COLUMN, ALTER_ADD_INDEX and + ALTER_DROP_INDEX are replaced with versions that have higher granuality. + */ + + alter_table_operations flags_to_remove= + ALTER_ADD_INDEX | ALTER_DROP_INDEX | ALTER_PARSER_ADD_COLUMN | + ALTER_PARSER_DROP_COLUMN | ALTER_COLUMN_ORDER | ALTER_RENAME_COLUMN | + ALTER_CHANGE_COLUMN; + + if (!table->file->native_versioned()) + flags_to_remove|= ALTER_COLUMN_UNVERSIONED; + + ha_alter_info->handler_flags|= (alter_info->flags & ~flags_to_remove); + /* + Comparing new and old default values of column is cumbersome. + So instead of using such a comparison for detecting if default + has really changed we rely on flags set by parser to get an + approximate value for storage engine flag. + */ + if (alter_info->flags & ALTER_CHANGE_COLUMN) + ha_alter_info->handler_flags|= ALTER_COLUMN_DEFAULT; + + /* + If we altering table with old VARCHAR fields we will be automatically + upgrading VARCHAR column types. + */ + if (table->s->frm_version < FRM_VER_TRUE_VARCHAR && varchar) + ha_alter_info->handler_flags|= ALTER_STORED_COLUMN_TYPE; + + DBUG_PRINT("info", ("handler_flags: %llu", ha_alter_info->handler_flags)); + + /* + Go through fields in old version of table and detect changes to them. + We don't want to rely solely on Alter_info flags for this since: + a) new definition of column can be fully identical to the old one + despite the fact that this column is mentioned in MODIFY clause. + b) even if new column type differs from its old column from metadata + point of view, it might be identical from storage engine point + of view (e.g. when ENUM('a','b') is changed to ENUM('a','b',c')). + c) flags passed to storage engine contain more detailed information + about nature of changes than those provided from parser. + */ + bool maybe_alter_vcol= false; + uint field_stored_index= 0; + for (f_ptr= table->field; (field= *f_ptr); f_ptr++, + field_stored_index+= field->stored_in_db()) + { + /* Clear marker for renamed or dropped field + which we are going to set later. */ + field->flags&= ~(FIELD_IS_RENAMED | FIELD_IS_DROPPED); + + /* Use transformed info to evaluate flags for storage engine. */ + uint new_field_index= 0, new_field_stored_index= 0; + new_field_it.init(alter_info->create_list); + while ((new_field= new_field_it++)) + { + if (new_field->field == field) + break; + new_field_index++; + new_field_stored_index+= new_field->stored_in_db(); + } + + if (new_field) + { + /* Field is not dropped. Evaluate changes bitmap for it. */ + + /* + Check if type of column has changed. + */ + bool is_equal= field->is_equal(*new_field); + if (!is_equal) + { + if (field->table->file->can_convert_nocopy(*field, *new_field)) + { + /* + New column type differs from the old one, but storage engine can + change it by itself. + (for example, VARCHAR(300) is changed to VARCHAR(400)). + */ + ha_alter_info->handler_flags|= ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE; + } + else + { + /* New column type is incompatible with old one. */ + ha_alter_info->handler_flags|= field->stored_in_db() + ? ALTER_STORED_COLUMN_TYPE + : ALTER_VIRTUAL_COLUMN_TYPE; + + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (alter_info->drop_stat_fields.push_back(field, thd->mem_root)) + DBUG_RETURN(true); + + KEY *key_info= table->key_info; + for (uint i= 0; i < table->s->keys; i++, key_info++) + { + if (!field->part_of_key.is_set(i)) + continue; + + uint key_parts= table->actual_n_key_parts(key_info); + for (uint j= 0; j < key_parts; j++) + { + if (key_info->key_part[j].fieldnr - 1 == field->field_index) + { + if (alter_info->add_stat_drop_index(key_info, + j >= key_info->user_defined_key_parts, + thd->mem_root)) + DBUG_RETURN(true); + break; + } + } + } + } + } + } + + if (field->vcol_info || new_field->vcol_info) + { + /* base <-> virtual or stored <-> virtual */ + if (field->stored_in_db() != new_field->stored_in_db()) + ha_alter_info->handler_flags|= ( ALTER_STORED_COLUMN_TYPE | + ALTER_VIRTUAL_COLUMN_TYPE); + if (field->vcol_info && new_field->vcol_info) + { + bool value_changes= !is_equal; + alter_table_operations alter_expr; + if (field->stored_in_db()) + alter_expr= ALTER_STORED_GCOL_EXPR; + else + alter_expr= ALTER_VIRTUAL_GCOL_EXPR; + if (!field->vcol_info->is_equal(new_field->vcol_info)) + { + ha_alter_info->handler_flags|= alter_expr; + value_changes= true; + } + + if ((ha_alter_info->handler_flags & ALTER_COLUMN_DEFAULT) + && !(ha_alter_info->handler_flags & alter_expr)) + { /* + a DEFAULT value of a some column was changed. see if this vcol + uses DEFAULT() function. The check is kind of expensive, so don't + do it if ALTER_COLUMN_VCOL is already set. + */ + if (field->vcol_info->expr->walk( + &Item::check_func_default_processor, 0, 0)) + { + ha_alter_info->handler_flags|= alter_expr; + value_changes= true; + } + } + + if (field->vcol_info->is_in_partitioning_expr() || + field->flags & PART_KEY_FLAG || field->stored_in_db()) + { + if (value_changes) + ha_alter_info->handler_flags|= ALTER_COLUMN_VCOL; + else + maybe_alter_vcol= true; + } + } + else /* base <-> stored */ + ha_alter_info->handler_flags|= ALTER_STORED_COLUMN_TYPE; + } + + /* + Check if field was renamed (case-sensitive for detecting case change) + */ + if (cmp(&field->field_name, &new_field->field_name)) + { + field->flags|= FIELD_IS_RENAMED; + ha_alter_info->handler_flags|= ALTER_COLUMN_NAME; + if (alter_info->add_stat_rename_field(field, + &new_field->field_name, + thd->mem_root)) + DBUG_RETURN(true); + } + + /* Check that NULL behavior is same for old and new fields */ + if ((new_field->flags & NOT_NULL_FLAG) != + (uint) (field->flags & NOT_NULL_FLAG)) + { + if (new_field->flags & NOT_NULL_FLAG) + ha_alter_info->handler_flags|= ALTER_COLUMN_NOT_NULLABLE; + else + ha_alter_info->handler_flags|= ALTER_COLUMN_NULLABLE; + } + + /* + We do not detect changes to default values in this loop. + See comment above for more details. + */ + + /* + Detect changes in column order. + */ + if (field->stored_in_db()) + { + if (field_stored_index != new_field_stored_index) + ha_alter_info->handler_flags|= ALTER_STORED_COLUMN_ORDER; + } + else + { + if (field->field_index != new_field_index) + ha_alter_info->handler_flags|= ALTER_VIRTUAL_COLUMN_ORDER; + } + + /* Detect changes in storage type of column */ + if (new_field->field_storage_type() != field->field_storage_type()) + ha_alter_info->handler_flags|= ALTER_COLUMN_STORAGE_TYPE; + + /* Detect changes in column format of column */ + if (new_field->column_format() != field->column_format()) + ha_alter_info->handler_flags|= ALTER_COLUMN_COLUMN_FORMAT; + + if (engine_options_differ(field->option_struct, new_field->option_struct, + table->file->ht->field_options)) + { + ha_alter_info->handler_flags|= ALTER_COLUMN_OPTION; + ha_alter_info->create_info->fields_option_struct[f_ptr - table->field]= + new_field->option_struct; + } + } + else + { + // Field is not present in new version of table and therefore was dropped. + field->flags|= FIELD_IS_DROPPED; + if (field->stored_in_db()) + ha_alter_info->handler_flags|= ALTER_DROP_STORED_COLUMN; + else + ha_alter_info->handler_flags|= ALTER_DROP_VIRTUAL_COLUMN; + } + } + + if (maybe_alter_vcol) + { + /* + What if one of the normal columns was altered and it was part of the some + virtual column expression? Currently we don't detect this correctly + (FIXME), so let's just say that a vcol *might* be affected if any other + column was altered. + */ + if (ha_alter_info->handler_flags & (ALTER_STORED_COLUMN_TYPE | + ALTER_VIRTUAL_COLUMN_TYPE | + ALTER_COLUMN_NOT_NULLABLE | + ALTER_COLUMN_OPTION)) + ha_alter_info->handler_flags|= ALTER_COLUMN_VCOL; + } + + new_field_it.init(alter_info->create_list); + while ((new_field= new_field_it++)) + { + if (! new_field->field) + { + // Field is not present in old version of table and therefore was added. + if (new_field->vcol_info) + { + if (new_field->stored_in_db()) + ha_alter_info->handler_flags|= ALTER_ADD_STORED_GENERATED_COLUMN; + else + ha_alter_info->handler_flags|= ALTER_ADD_VIRTUAL_COLUMN; + } + else + ha_alter_info->handler_flags|= ALTER_ADD_STORED_BASE_COLUMN; + } + } + + /* + Go through keys and check if the original ones are compatible + with new table. + */ + KEY *table_key; + KEY *table_key_end= table->key_info + table->s->keys; + KEY *new_key; + KEY *new_key_end= + ha_alter_info->key_info_buffer + ha_alter_info->key_count; + /* + Primary key index for the new table + */ + const KEY* const new_pk= (ha_alter_info->key_count > 0 && + (!my_strcasecmp(system_charset_info, + ha_alter_info->key_info_buffer->name.str, + primary_key_name.str) || + is_candidate_key(ha_alter_info->key_info_buffer))) ? + ha_alter_info->key_info_buffer : NULL; + const KEY *const old_pk= table->s->primary_key == MAX_KEY ? NULL : + table->key_info + table->s->primary_key; + + DBUG_PRINT("info", ("index count old: %d new: %d", + table->s->keys, ha_alter_info->key_count)); + + /* + Step through all keys of the old table and search matching new keys. + */ + ha_alter_info->index_drop_count= 0; + ha_alter_info->index_add_count= 0; + for (table_key= table->key_info; table_key < table_key_end; table_key++) + { + /* Search a new key with the same name. */ + for (new_key= ha_alter_info->key_info_buffer; + new_key < new_key_end; + new_key++) + { + if (!lex_string_cmp(system_charset_info, &table_key->name, + &new_key->name)) + break; + } + if (new_key >= new_key_end) + { + /* Key not found. Add the key to the drop buffer. */ + ha_alter_info->index_drop_buffer + [ha_alter_info->index_drop_count++]= + table_key; + DBUG_PRINT("info", ("index dropped: '%s'", table_key->name.str)); + continue; + } + + switch (compare_keys_but_name(table_key, new_key, alter_info, table, new_pk, + old_pk)) + { + case Compare_keys::Equal: + continue; + case Compare_keys::EqualButKeyPartLength: + ha_alter_info->handler_flags|= ALTER_COLUMN_INDEX_LENGTH; + continue; + case Compare_keys::EqualButComment: + ha_alter_info->handler_flags|= ALTER_CHANGE_INDEX_COMMENT; + continue; + case Compare_keys::NotEqual: + break; + } + + /* Key modified. Add the key / key offset to both buffers. */ + ha_alter_info->index_drop_buffer + [ha_alter_info->index_drop_count++]= + table_key; + ha_alter_info->index_add_buffer + [ha_alter_info->index_add_count++]= + (uint)(new_key - ha_alter_info->key_info_buffer); + /* Mark all old fields which are used in newly created index. */ + DBUG_PRINT("info", ("index changed: '%s'", table_key->name.str)); + } + /*end of for (; table_key < table_key_end;) */ + + /* + Step through all keys of the new table and find matching old keys. + */ + for (new_key= ha_alter_info->key_info_buffer; + new_key < new_key_end; + new_key++) + { + /* Search an old key with the same name. */ + for (table_key= table->key_info; table_key < table_key_end; table_key++) + { + if (!lex_string_cmp(system_charset_info, &table_key->name, + &new_key->name)) + break; + } + if (table_key >= table_key_end) + { + /* Key not found. Add the offset of the key to the add buffer. */ + ha_alter_info->index_add_buffer + [ha_alter_info->index_add_count++]= + (uint)(new_key - ha_alter_info->key_info_buffer); + DBUG_PRINT("info", ("index added: '%s'", new_key->name.str)); + } + else + ha_alter_info->create_info->indexes_option_struct[table_key - table->key_info]= + new_key->option_struct; + } + + for (uint i= 0; i < ha_alter_info->index_add_count; i++) + { + uint *add_buffer= ha_alter_info->index_add_buffer; + const KEY *new_key= ha_alter_info->key_info_buffer + add_buffer[i]; + + for (uint j= 0; j < ha_alter_info->index_drop_count; j++) + { + KEY **drop_buffer= ha_alter_info->index_drop_buffer; + const KEY *old_key= drop_buffer[j]; + + if (compare_keys_but_name(old_key, new_key, alter_info, table, new_pk, + old_pk) != Compare_keys::Equal) + { + continue; + } + + DBUG_ASSERT( + lex_string_cmp(system_charset_info, &old_key->name, &new_key->name)); + + ha_alter_info->handler_flags|= ALTER_RENAME_INDEX; + ha_alter_info->rename_keys.push_back( + Alter_inplace_info::Rename_key_pair(old_key, new_key)); + + --ha_alter_info->index_add_count; + --ha_alter_info->index_drop_count; + memmove(add_buffer + i, add_buffer + i + 1, + sizeof(add_buffer[0]) * (ha_alter_info->index_add_count - i)); + memmove(drop_buffer + j, drop_buffer + j + 1, + sizeof(drop_buffer[0]) * (ha_alter_info->index_drop_count - j)); + --i; // this index once again + break; + } + } + + List_iterator + ignorability_index_it(alter_info->alter_index_ignorability_list); + Alter_index_ignorability *alter_index_ignorability; + while((alter_index_ignorability= ignorability_index_it++)) + { + const char *name= alter_index_ignorability->name(); + + KEY *old_key, *new_key; + old_key= find_key_ci(name, table->key_info, table_key_end); + new_key= find_key_ci(name, ha_alter_info->key_info_buffer, new_key_end); + + DBUG_ASSERT(old_key != NULL); + + if (new_key == NULL) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), name, table->s->table_name.str); + DBUG_RETURN(true); + } + new_key->is_ignored= alter_index_ignorability->is_ignored(); + ha_alter_info->handler_flags|= ALTER_RENAME_INDEX; + ha_alter_info->add_altered_index_ignorability(old_key, new_key); + } + + /* + Sort index_add_buffer according to how key_info_buffer is sorted. + I.e. with primary keys first - see sort_keys(). + */ + my_qsort(ha_alter_info->index_add_buffer, + ha_alter_info->index_add_count, + sizeof(uint), (qsort_cmp) compare_uint); + + /* Now let us calculate flags for storage engine API. */ + + /* Figure out what kind of indexes we are dropping. */ + KEY **dropped_key; + KEY **dropped_key_end= ha_alter_info->index_drop_buffer + + ha_alter_info->index_drop_count; + + for (dropped_key= ha_alter_info->index_drop_buffer; + dropped_key < dropped_key_end; dropped_key++) + { + table_key= *dropped_key; + + if (table_key->flags & HA_NOSAME) + { + if (table_key == old_pk) + ha_alter_info->handler_flags|= ALTER_DROP_PK_INDEX; + else + ha_alter_info->handler_flags|= ALTER_DROP_UNIQUE_INDEX; + } + else + ha_alter_info->handler_flags|= ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX; + } + + /* Now figure out what kind of indexes we are adding. */ + for (uint add_key_idx= 0; add_key_idx < ha_alter_info->index_add_count; add_key_idx++) + { + new_key= ha_alter_info->key_info_buffer + ha_alter_info->index_add_buffer[add_key_idx]; + + if (new_key->flags & HA_NOSAME) + { + if (new_key == new_pk) + ha_alter_info->handler_flags|= ALTER_ADD_PK_INDEX; + else + ha_alter_info->handler_flags|= ALTER_ADD_UNIQUE_INDEX; + } + else + ha_alter_info->handler_flags|= ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX; + } + + DBUG_PRINT("exit", ("handler_flags: %llu", ha_alter_info->handler_flags)); + DBUG_RETURN(false); +} + + +/** + Mark fields participating in newly added indexes in TABLE object which + corresponds to new version of altered table. + + @param ha_alter_info Alter_inplace_info describing in-place ALTER. + @param altered_table TABLE object for new version of TABLE in which + fields should be marked. +*/ + +static void update_altered_table(const Alter_inplace_info &ha_alter_info, + TABLE *altered_table) +{ + uint field_idx, add_key_idx; + KEY *key; + KEY_PART_INFO *end, *key_part; + + /* + Clear marker for all fields, as we are going to set it only + for fields which participate in new indexes. + */ + for (field_idx= 0; field_idx < altered_table->s->fields; ++field_idx) + altered_table->field[field_idx]->flags&= ~FIELD_IN_ADD_INDEX; + + /* + Go through array of newly added indexes and mark fields + participating in them. + */ + for (add_key_idx= 0; add_key_idx < ha_alter_info.index_add_count; + add_key_idx++) + { + key= ha_alter_info.key_info_buffer + + ha_alter_info.index_add_buffer[add_key_idx]; + + end= key->key_part + key->user_defined_key_parts; + for (key_part= key->key_part; key_part < end; key_part++) + altered_table->field[key_part->fieldnr]->flags|= FIELD_IN_ADD_INDEX; + } +} + + +/** + Compare two tables to see if their metadata are compatible. + One table specified by a TABLE instance, the other using Alter_info + and HA_CREATE_INFO. + + @param[in] table The first table. + @param[in] alter_info Alter options, fields and keys for the + second table. + @param[in] create_info Create options for the second table. + @param[out] metadata_equal Result of comparison. + + @retval true error + @retval false success +*/ + +bool mysql_compare_tables(TABLE *table, Alter_info *alter_info, + HA_CREATE_INFO *create_info, bool *metadata_equal) +{ + DBUG_ENTER("mysql_compare_tables"); + + uint changes= IS_EQUAL_NO; + uint key_count; + List_iterator_fast tmp_new_field_it; + THD *thd= table->in_use; + *metadata_equal= false; + + /* + Create a copy of alter_info. + To compare definitions, we need to "prepare" the definition - transform it + from parser output to a format that describes the table layout (all column + defaults are initialized, duplicate columns are removed). This is done by + mysql_prepare_create_table. Unfortunately, mysql_prepare_create_table + performs its transformations "in-place", that is, modifies the argument. + Since we would like to keep mysql_compare_tables() idempotent (not altering + any of the arguments) we create a copy of alter_info here and pass it to + mysql_prepare_create_table, then use the result to compare the tables, and + then destroy the copy. + */ + Alter_info tmp_alter_info(*alter_info, thd->mem_root); + uint db_options= 0; /* not used */ + KEY *key_info_buffer= NULL; + + /* Create the prepared information. */ + int create_table_mode= table->s->tmp_table == NO_TMP_TABLE ? + C_ORDINARY_CREATE : C_ALTER_TABLE; + if (mysql_prepare_create_table(thd, create_info, &tmp_alter_info, + &db_options, table->file, &key_info_buffer, + &key_count, create_table_mode)) + DBUG_RETURN(1); + + /* Some very basic checks. */ + if (table->s->fields != alter_info->create_list.elements || + table->s->db_type() != create_info->db_type || + table->s->tmp_table || + (table->s->row_type != create_info->row_type)) + DBUG_RETURN(false); + + /* Go through fields and check if they are compatible. */ + tmp_new_field_it.init(tmp_alter_info.create_list); + for (Field **f_ptr= table->field; *f_ptr; f_ptr++) + { + Field *field= *f_ptr; + Create_field *tmp_new_field= tmp_new_field_it++; + + /* Check that NULL behavior is the same. */ + if ((tmp_new_field->flags & NOT_NULL_FLAG) != + (uint) (field->flags & NOT_NULL_FLAG)) + DBUG_RETURN(false); + + /* + mysql_prepare_alter_table() clears HA_OPTION_PACK_RECORD bit when + preparing description of existing table. In ALTER TABLE it is later + updated to correct value by create_table_impl() call. + So to get correct value of this bit in this function we have to + mimic behavior of create_table_impl(). + */ + if (create_info->row_type == ROW_TYPE_DYNAMIC || + create_info->row_type == ROW_TYPE_PAGE || + (tmp_new_field->flags & BLOB_FLAG) || + (tmp_new_field->real_field_type() == MYSQL_TYPE_VARCHAR && + create_info->row_type != ROW_TYPE_FIXED)) + create_info->table_options|= HA_OPTION_PACK_RECORD; + + /* Check if field was renamed */ + if (lex_string_cmp(system_charset_info, + &field->field_name, + &tmp_new_field->field_name)) + DBUG_RETURN(false); + + /* Evaluate changes bitmap and send to check_if_incompatible_data() */ + uint field_changes= field->is_equal(*tmp_new_field); + if (field_changes != IS_EQUAL_YES) + DBUG_RETURN(false); + + changes|= field_changes; + } + + /* Check if changes are compatible with current handler. */ + if (table->file->check_if_incompatible_data(create_info, changes)) + DBUG_RETURN(false); + + /* Go through keys and check if they are compatible. */ + KEY *table_key; + KEY *table_key_end= table->key_info + table->s->keys; + KEY *new_key; + KEY *new_key_end= key_info_buffer + key_count; + + /* Step through all keys of the first table and search matching keys. */ + for (table_key= table->key_info; table_key < table_key_end; table_key++) + { + /* Search a key with the same name. */ + for (new_key= key_info_buffer; new_key < new_key_end; new_key++) + { + if (!lex_string_cmp(system_charset_info, &table_key->name, + &new_key->name)) + break; + } + if (new_key >= new_key_end) + DBUG_RETURN(false); + + /* Check that the key types are compatible. */ + if ((table_key->algorithm != new_key->algorithm) || + ((table_key->flags & HA_KEYFLAG_MASK) != + (new_key->flags & HA_KEYFLAG_MASK)) || + (table_key->user_defined_key_parts != + new_key->user_defined_key_parts)) + DBUG_RETURN(false); + + /* Check that the key parts remain compatible. */ + KEY_PART_INFO *table_part; + KEY_PART_INFO *table_part_end= table_key->key_part + table_key->user_defined_key_parts; + KEY_PART_INFO *new_part; + for (table_part= table_key->key_part, new_part= new_key->key_part; + table_part < table_part_end; + table_part++, new_part++) + { + /* + Key definition is different if we are using a different field or + if the used key part length is different. We know that the fields + are equal. Comparing field numbers is sufficient. + */ + if ((table_part->length != new_part->length) || + (table_part->fieldnr - 1 != new_part->fieldnr) || + ((table_part->key_part_flag ^ new_part->key_part_flag) & HA_REVERSE_SORT)) + DBUG_RETURN(false); + } + } + + /* Step through all keys of the second table and find matching keys. */ + for (new_key= key_info_buffer; new_key < new_key_end; new_key++) + { + /* Search a key with the same name. */ + for (table_key= table->key_info; table_key < table_key_end; table_key++) + { + if (!lex_string_cmp(system_charset_info, &table_key->name, + &new_key->name)) + break; + } + if (table_key >= table_key_end) + DBUG_RETURN(false); + } + + *metadata_equal= true; // Tables are compatible + DBUG_RETURN(false); +} + + +/* + Manages enabling/disabling of indexes for ALTER TABLE + + SYNOPSIS + alter_table_manage_keys() + table Target table + indexes_were_disabled Whether the indexes of the from table + were disabled + keys_onoff ENABLE | DISABLE | LEAVE_AS_IS + + RETURN VALUES + FALSE OK + TRUE Error +*/ + +static +bool alter_table_manage_keys(TABLE *table, int indexes_were_disabled, + Alter_info::enum_enable_or_disable keys_onoff) +{ + int error= 0; + DBUG_ENTER("alter_table_manage_keys"); + DBUG_PRINT("enter", ("table=%p were_disabled=%d on_off=%d", + table, indexes_were_disabled, keys_onoff)); + + switch (keys_onoff) { + case Alter_info::ENABLE: + DEBUG_SYNC(table->in_use, "alter_table_enable_indexes"); + error= table->file->ha_enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE); + break; + case Alter_info::LEAVE_AS_IS: + if (!indexes_were_disabled) + break; + /* fall through */ + case Alter_info::DISABLE: + error= table->file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE); + } + + if (unlikely(error)) + { + if (error == HA_ERR_WRONG_COMMAND) + { + THD *thd= table->in_use; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_ILLEGAL_HA, ER_THD(thd, ER_ILLEGAL_HA), + table->file->table_type(), + table->s->db.str, table->s->table_name.str); + error= 0; + } + else + table->file->print_error(error, MYF(0)); + } + DBUG_RETURN(error); +} + + +/** + Check if the pending ALTER TABLE operations support the in-place + algorithm based on restrictions in the SQL layer or given the + nature of the operations themselves. If in-place isn't supported, + it won't be necessary to check with the storage engine. + + @param table The original TABLE. + @param create_info Information from the parsing phase about new + table properties. + @param alter_info Data related to detected changes. + + @return false In-place is possible, check with storage engine. + @return true Incompatible operations, must use table copy. +*/ + +static bool is_inplace_alter_impossible(TABLE *table, + HA_CREATE_INFO *create_info, + const Alter_info *alter_info) +{ + DBUG_ENTER("is_inplace_alter_impossible"); + + /* At the moment we can't handle altering temporary tables without a copy. */ + if (table->s->tmp_table) + DBUG_RETURN(true); + + /* + For the ALTER TABLE tbl_name ORDER BY ... we always use copy + algorithm. In theory, this operation can be done in-place by some + engine, but since a) no current engine does this and b) our current + API lacks infrastructure for passing information about table ordering + to storage engine we simply always do copy now. + + ENABLE/DISABLE KEYS is a MyISAM/Heap specific operation that is + not supported for in-place in combination with other operations. + Alone, it will be done by simple_rename_or_index_change(). + */ + if (alter_info->flags & (ALTER_ORDER | ALTER_KEYS_ONOFF)) + DBUG_RETURN(true); + + /* + If the table engine is changed explicitly (using ENGINE clause) + or implicitly (e.g. when non-partitioned table becomes + partitioned) a regular alter table (copy) needs to be + performed. + */ + if (create_info->db_type != table->s->db_type()) + DBUG_RETURN(true); + + /* + There was a bug prior to mysql-4.0.25. Number of null fields was + calculated incorrectly. As a result frm and data files gets out of + sync after fast alter table. There is no way to determine by which + mysql version (in 4.0 and 4.1 branches) table was created, thus we + disable fast alter table for all tables created by mysql versions + prior to 5.0 branch. + See BUG#6236. + */ + if (!table->s->mysql_version) + DBUG_RETURN(true); + + /* + If we are using a MySQL 5.7 table with virtual fields, ALTER TABLE must + recreate the table as we need to rewrite generated fields + */ + if (table->s->mysql_version > 50700 && table->s->mysql_version < 100000 && + table->s->virtual_fields) + DBUG_RETURN(TRUE); + + DBUG_RETURN(false); +} + + +/* + Notify engine that table definition has changed as part of inplace alter + table +*/ + +static bool notify_tabledef_changed(TABLE_LIST *table_list) +{ + TABLE *table= table_list->table; + DBUG_ENTER("notify_tabledef_changed"); + + if (table->file->partition_ht()->notify_tabledef_changed) + { + char db_buff[FN_REFLEN], table_buff[FN_REFLEN]; + handlerton *hton= table->file->ht; + LEX_CSTRING tmp_db, tmp_table; + + tmp_db.str= db_buff; + tmp_table.str= table_buff; + tmp_db.length= tablename_to_filename(table_list->db.str, + db_buff, sizeof(db_buff)); + tmp_table.length= tablename_to_filename(table_list->table_name.str, + table_buff, sizeof(table_buff)); + if ((hton->notify_tabledef_changed)(hton, &tmp_db, &tmp_table, + table->s->frm_image, + &table->s->tabledef_version, + table->file)) + { + my_error(HA_ERR_INCOMPATIBLE_DEFINITION, MYF(0)); + DBUG_RETURN(true); + } + } + DBUG_RETURN(0); +} + +/** + The function is invoked in error branches of ALTER processing. + Write Rollback alter in case of partial_alter is true else + call process_master_state. + + @param thd Thread handle. + @param[in/out] + start_alter_id Start Alter identifier or zero, + it is reset to zero. + @param[in/out] + partial_alter When is true at the function enter + that indicates Start Alter phase completed; + it then is reset to false. + @param if_exists True indicates the binary logging of the query + should be done with "if exists" option. + + @return false on Success + @return true otherwise +*/ +static bool +write_bin_log_start_alter_rollback(THD *thd, uint64 &start_alter_id, + bool &partial_alter, bool if_exists) +{ +#if defined(HAVE_REPLICATION) + if (start_alter_id) + { + start_alter_info *info= thd->rgi_slave->sa_info; + Master_info *mi= thd->rgi_slave->rli->mi; + + if (info->sa_seq_no == 0) + { + /* + Error occurred before SA got to processing incl its binlogging. + So it's a failure to apply and thus no need to wait for master's + completion result. + */ + return true; + } + mysql_mutex_lock(&mi->start_alter_lock); + if (info->direct_commit_alter) + { + DBUG_ASSERT(info->state == start_alter_state::ROLLBACK_ALTER); + + /* + SA may end up in the rollback state through FTWRL that breaks + SA's waiting for a master decision. + Then it completes "officially", and `direct_commit_alter` true status + will affect the future of CA to re-execute the whole query. + */ + info->state= start_alter_state::COMPLETED; + if (info->direct_commit_alter) + mysql_cond_broadcast(&info->start_alter_cond); + mysql_mutex_unlock(&mi->start_alter_lock); + + return true; // not really an error to be handled by caller specifically + } + mysql_mutex_unlock(&mi->start_alter_lock); + /* + We have to call wait for master here because in main calculation + we can error out before calling wait for master + (for example if copy_data_between_tables fails) + */ + if (info->state == start_alter_state::REGISTERED) + wait_for_master(thd); + if(process_master_state(thd, 1, start_alter_id, if_exists)) + return true; + } + else +#endif + if (partial_alter) // Write only if SA written + { + // Send the rollback message + Write_log_with_flags wlwf(thd, Gtid_log_event::FL_ROLLBACK_ALTER_E1); + if(write_bin_log_with_if_exists(thd, false, false, if_exists, false)) + return true; + partial_alter= false; + } + return false; +} + + +/** + Perform in-place alter table. + + @param thd Thread handle. + @param table_list TABLE_LIST for the table to change. + @param table The original TABLE. + @param altered_table TABLE object for new version of the table. + @param ha_alter_info Structure describing ALTER TABLE to be carried + out and serving as a storage place for data + used during different phases. + @param target_mdl_request Metadata request/lock on the target table name. + @param alter_ctx ALTER TABLE runtime context. + @param partial_alter Is set to true to return the fact of the first + "START ALTER" binlogging phase is done. + @param[in/out] + start_alter_id Gtid seq_no of START ALTER or zero otherwise; + it may get changed to return to the caller. + @param if_exists True indicates the binary logging of the query + should be done with "if exists" option. + + @retval >=1 Error{ 1= ROLLBACK recieved from master , 2= error + in alter so no ROLLBACK in binlog } + @retval 0 Success + + @note + If mysql_alter_table does not need to copy the table, it is + either an alter table where the storage engine does not + need to know about the change, only the frm will change, + or the storage engine supports performing the alter table + operation directly, in-place without mysql having to copy + the table. + + @note This function frees the TABLE object associated with the new version of + the table and removes the .FRM file for it in case of both success and + failure. +*/ + +static bool mysql_inplace_alter_table(THD *thd, + TABLE_LIST *table_list, + TABLE *table, + TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + MDL_request *target_mdl_request, + DDL_LOG_STATE *ddl_log_state, + TRIGGER_RENAME_PARAM *trigger_param, + Alter_table_ctx *alter_ctx, + bool &partial_alter, + uint64 &start_alter_id, bool if_exists) +{ + Open_table_context ot_ctx(thd, MYSQL_OPEN_REOPEN | MYSQL_OPEN_IGNORE_KILLED); + handlerton *db_type= table->s->db_type(); + Alter_info *alter_info= ha_alter_info->alter_info; + bool reopen_tables= false; + bool res, commit_succeded_with_error= 0; + + const enum_alter_inplace_result inplace_supported= + ha_alter_info->inplace_supported; + DBUG_ENTER("mysql_inplace_alter_table"); + + /* Downgrade DDL lock while we are waiting for exclusive lock below */ + backup_set_alter_copy_lock(thd, table); + + /* + Upgrade to EXCLUSIVE lock if: + - This is requested by the storage engine + - Or the storage engine needs exclusive lock for just the prepare + phase + - Or requested by the user + + Note that we handle situation when storage engine needs exclusive + lock for prepare phase under LOCK TABLES in the same way as when + exclusive lock is required for duration of the whole statement. + */ + if (inplace_supported == HA_ALTER_INPLACE_EXCLUSIVE_LOCK || + ((inplace_supported == HA_ALTER_INPLACE_COPY_NO_LOCK || + inplace_supported == HA_ALTER_INPLACE_COPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_NO_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_INSTANT) && + (thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES)) || + alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE) + { + if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN)) + goto cleanup; + /* + Get rid of all TABLE instances belonging to this thread + except one to be used for in-place ALTER TABLE. + + This is mostly needed to satisfy InnoDB assumptions/asserts. + */ + close_all_tables_for_name(thd, table->s, + alter_ctx->is_table_renamed() ? + HA_EXTRA_PREPARE_FOR_RENAME : + HA_EXTRA_NOT_USED, + table); + /* + If we are under LOCK TABLES we will need to reopen tables which we + just have closed in case of error. + */ + reopen_tables= true; + } + else if (inplace_supported == HA_ALTER_INPLACE_COPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_COPY_NO_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_NO_LOCK || + inplace_supported == HA_ALTER_INPLACE_INSTANT) + { + /* + Storage engine has requested exclusive lock only for prepare phase + and we are not under LOCK TABLES. + Don't mark TABLE_SHARE as old in this case, as this won't allow opening + of table by other threads during main phase of in-place ALTER TABLE. + */ + if (thd->mdl_context.upgrade_shared_lock(table->mdl_ticket, MDL_EXCLUSIVE, + thd->variables.lock_wait_timeout)) + goto cleanup; + + table->s->tdc->flush(thd, false); + } + + /* + Upgrade to SHARED_NO_WRITE lock if: + - The storage engine needs writes blocked for the whole duration + - Or this is requested by the user + Note that under LOCK TABLES, we will already have SHARED_NO_READ_WRITE. + */ + if ((inplace_supported == HA_ALTER_INPLACE_SHARED_LOCK || + alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED) && + thd->mdl_context.upgrade_shared_lock(table->mdl_ticket, + MDL_SHARED_NO_WRITE, + thd->variables.lock_wait_timeout)) + goto cleanup; + + DBUG_ASSERT(table->s->tmp_table == NO_TMP_TABLE || start_alter_id == 0); + + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (write_bin_log_start_alter(thd, partial_alter, start_alter_id, + if_exists)) + goto cleanup; + } + else if (start_alter_id) + { + DBUG_ASSERT(thd->rgi_slave); + + my_error(ER_INCONSISTENT_SLAVE_TEMP_TABLE, MYF(0), thd->query(), + table_list->db.str, table_list->table_name.str); + goto cleanup; + } + + DBUG_EXECUTE_IF("start_alter_kill_after_binlog", { + DBUG_SUICIDE(); + }); + + + // It's now safe to take the table level lock. + if (lock_tables(thd, table_list, alter_ctx->tables_opened, 0)) + goto cleanup; + DEBUG_SYNC(thd, "alter_table_inplace_after_lock_upgrade"); + THD_STAGE_INFO(thd, stage_alter_inplace_prepare); + + switch (inplace_supported) { + case HA_ALTER_ERROR: + case HA_ALTER_INPLACE_NOT_SUPPORTED: + DBUG_ASSERT(0); + // fall through + case HA_ALTER_INPLACE_NO_LOCK: + case HA_ALTER_INPLACE_INSTANT: + case HA_ALTER_INPLACE_COPY_NO_LOCK: + case HA_ALTER_INPLACE_NOCOPY_NO_LOCK: + switch (alter_info->requested_lock) { + case Alter_info::ALTER_TABLE_LOCK_DEFAULT: + case Alter_info::ALTER_TABLE_LOCK_NONE: + ha_alter_info->online= true; + break; + case Alter_info::ALTER_TABLE_LOCK_SHARED: + case Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE: + break; + } + break; + case HA_ALTER_INPLACE_EXCLUSIVE_LOCK: + case HA_ALTER_INPLACE_SHARED_LOCK: + case HA_ALTER_INPLACE_COPY_LOCK: + case HA_ALTER_INPLACE_NOCOPY_LOCK: + break; + } + + ddl_log_update_phase(ddl_log_state, DDL_ALTER_TABLE_PHASE_PREPARE_INPLACE); + + if (table->file->ha_prepare_inplace_alter_table(altered_table, + ha_alter_info)) + goto rollback; + + debug_crash_here("ddl_log_alter_after_prepare_inplace"); + + /* + Store the new table_version() as it may have not been available before + in some engines, like InnoDB. + */ + ddl_log_update_unique_id(ddl_log_state, + table->file->table_version()); + /* + Mark that we have started inplace alter table. DDL recover will + have to decide if it should use the old or new version of the table, based + on if the new version did commit or not. + */ + ddl_log_update_phase(ddl_log_state, DDL_ALTER_TABLE_PHASE_INPLACE); + + /* + Downgrade the lock if storage engine has told us that exclusive lock was + necessary only for prepare phase (unless we are not under LOCK TABLES) and + user has not explicitly requested exclusive lock. + */ + if (!ha_alter_info->mdl_exclusive_after_prepare && + (inplace_supported == HA_ALTER_INPLACE_COPY_NO_LOCK || + inplace_supported == HA_ALTER_INPLACE_COPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_NO_LOCK) && + !(thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) && + (alter_info->requested_lock != Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE)) + { + /* If storage engine or user requested shared lock downgrade to SNW. */ + if (inplace_supported == HA_ALTER_INPLACE_COPY_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_LOCK || + alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED) + table->mdl_ticket->downgrade_lock(MDL_SHARED_NO_WRITE); + else + { + DBUG_ASSERT(inplace_supported == HA_ALTER_INPLACE_COPY_NO_LOCK || + inplace_supported == HA_ALTER_INPLACE_NOCOPY_NO_LOCK); + table->mdl_ticket->downgrade_lock(MDL_SHARED_UPGRADABLE); + } + } + + DEBUG_SYNC(thd, "alter_table_inplace_after_lock_downgrade"); + THD_STAGE_INFO(thd, stage_alter_inplace); + DBUG_EXECUTE_IF("start_alter_delay_master", { + debug_sync_set_action(thd, + STRING_WITH_LEN("now wait_for alter_cont NO_CLEAR_EVENT")); + }); + + /* We can abort alter table for any table type */ + thd->abort_on_warning= !ha_alter_info->ignore && thd->is_strict_mode(); + res= table->file->ha_inplace_alter_table(altered_table, ha_alter_info); + thd->abort_on_warning= false; + + if (start_alter_id && wait_for_master(thd)) + goto rollback; + + if (res) + goto rollback; + + + DEBUG_SYNC(thd, "alter_table_inplace_before_lock_upgrade"); + // Upgrade to EXCLUSIVE before commit. + if (wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_RENAME)) + goto rollback; + + /* Set MDL_BACKUP_DDL */ + if (backup_reset_alter_copy_lock(thd)) + goto rollback; + + /* Crashing here should cause the original table to be used */ + debug_crash_here("ddl_log_alter_after_copy"); + /* + If we are killed after this point, we should ignore and continue. + We have mostly completed the operation at this point, there should + be no long waits left. + */ + + DEBUG_SYNC(thd, "alter_table_inplace_before_commit"); + THD_STAGE_INFO(thd, stage_alter_inplace_commit); + + DBUG_EXECUTE_IF("alter_table_rollback_new_index", { + table->file->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, + false); + my_error(ER_UNKNOWN_ERROR, MYF(0)); + goto cleanup; + }); + + /* + Notify the engine that the table definition has changed so that it can + store the new ID as part of the commit + */ + + if (!(table->file->partition_ht()->flags & + HTON_REQUIRES_NOTIFY_TABLEDEF_CHANGED_AFTER_COMMIT) && + notify_tabledef_changed(table_list)) + goto rollback; + + { + TR_table trt(thd, true); + if (trt != *table_list && table->file->ht->prepare_commit_versioned) + { + ulonglong trx_start_id= 0; + ulonglong trx_end_id= table->file->ht->prepare_commit_versioned(thd, &trx_start_id); + if (trx_end_id) + { + if (!TR_table::use_transaction_registry) + { + my_error(ER_VERS_TRT_IS_DISABLED, MYF(0)); + goto rollback; + } + if (trt.update(trx_start_id, trx_end_id)) + goto rollback; + } + } + + if (table->file->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, + true)) + goto rollback; + DEBUG_SYNC(thd, "alter_table_inplace_after_commit"); + } + + /* + We are new ready to use the new table. Update the state in the + ddl log so that we recovery know that the new table is ready and + in case of crash it should use the new one and log the query + to the binary log. + */ + ha_alter_info->alter_info->apply_statistics_deletes_renames(thd, table); + + ddl_log_update_phase(ddl_log_state, DDL_ALTER_TABLE_PHASE_INPLACE_COPIED); + debug_crash_here("ddl_log_alter_after_log"); + + if ((table->file->partition_ht()->flags & + HTON_REQUIRES_NOTIFY_TABLEDEF_CHANGED_AFTER_COMMIT) && + notify_tabledef_changed(table_list)) + { + /* + The above should never fail. If it failed, the new structure is + commited and we have no way to roll back. + The best we can do is to continue, but send an error to the + user that something when wrong + */ + commit_succeded_with_error= 1; + } + + close_all_tables_for_name(thd, table->s, + alter_ctx->is_table_renamed() ? + HA_EXTRA_PREPARE_FOR_RENAME : + HA_EXTRA_NOT_USED, + NULL); + table_list->table= table= NULL; + + /* + Replace the old .FRM with the new .FRM, but keep the old name for now. + Rename to the new name (if needed) will be handled separately below. + */ + /* + TODO: remove this check of thd->is_error() (now it intercept + errors in some val_*() methods and bring some single place to + such error interception). + */ + if (mysql_rename_table(db_type, &alter_ctx->new_db, &alter_ctx->tmp_name, + &alter_ctx->db, &alter_ctx->alias, + &alter_ctx->tmp_id, + FN_FROM_IS_TMP | NO_HA_TABLE) || + thd->is_error()) + { + // Since changes were done in-place, we can't revert them. + goto err; + } + debug_crash_here("ddl_log_alter_after_rename_frm"); + + // Rename altered table in case of ALTER TABLE ... RENAME + if (alter_ctx->is_table_renamed()) + { + DBUG_ASSERT(!tdc_share_is_cached(thd, alter_ctx->db.str, + alter_ctx->table_name.str)); + if (mysql_rename_table(db_type, &alter_ctx->db, &alter_ctx->table_name, + &alter_ctx->new_db, &alter_ctx->new_alias, + &alter_ctx->tmp_id, 0)) + { + /* + If the rename fails we will still have a working table + with the old name, but with other changes applied. + */ + goto err; + } + debug_crash_here("ddl_log_alter_before_rename_triggers"); + if (Table_triggers_list::change_table_name(thd, trigger_param, + &alter_ctx->db, + &alter_ctx->alias, + &alter_ctx->table_name, + &alter_ctx->new_db, + &alter_ctx->new_alias)) + { + /* + If the rename of trigger files fails, try to rename the table + back so we at least have matching table and trigger files. + */ + (void) mysql_rename_table(db_type, + &alter_ctx->new_db, &alter_ctx->new_alias, + &alter_ctx->db, &alter_ctx->alias, + &alter_ctx->id, + NO_FK_CHECKS); + ddl_log_disable_entry(ddl_log_state); + DBUG_RETURN(true); + } + rename_table_in_stat_tables(thd, &alter_ctx->db, &alter_ctx->alias, + &alter_ctx->new_db, &alter_ctx->new_alias); + debug_crash_here("ddl_log_alter_after_rename_triggers"); + } + + DBUG_RETURN(commit_succeded_with_error); + + rollback: + table->file->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, + false); + cleanup: + if (reopen_tables) + { + /* Close the only table instance which is still around. */ + close_all_tables_for_name(thd, table->s, + alter_ctx->is_table_renamed() ? + HA_EXTRA_PREPARE_FOR_RENAME : + HA_EXTRA_NOT_USED, + NULL); + if (thd->locked_tables_list.reopen_tables(thd, false)) + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + } + +err: + DBUG_RETURN(true); +} + + +/** + maximum possible length for certain blob types. + + @param[in] type Blob type (e.g. MYSQL_TYPE_TINY_BLOB) + + @return + length +*/ + +static uint +blob_length_by_type(enum_field_types type) +{ + switch (type) + { + case MYSQL_TYPE_TINY_BLOB: + return 255; + case MYSQL_TYPE_BLOB: + return 65535; + case MYSQL_TYPE_MEDIUM_BLOB: + return 16777215; + case MYSQL_TYPE_LONG_BLOB: + return (uint) UINT_MAX32; + default: + DBUG_ASSERT(0); // we should never go here + return 0; + } +} + + +static inline +void append_drop_column(THD *thd, String *str, Field *field) +{ + if (str->length()) + str->append(STRING_WITH_LEN(", ")); + str->append(STRING_WITH_LEN("DROP COLUMN ")); + append_identifier(thd, str, &field->field_name); +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +static inline +void rename_field_in_list(Create_field *field, List *field_list) +{ + DBUG_ASSERT(field->change.str); + List_iterator it(*field_list); + while (const char *name= it++) + { + if (my_strcasecmp(system_charset_info, name, field->change.str)) + continue; + it.replace(field->field_name.str); + } +} +#endif + + +/** + Prepare column and key definitions for CREATE TABLE in ALTER TABLE. + + This function transforms parse output of ALTER TABLE - lists of + columns and keys to add, drop or modify into, essentially, + CREATE TABLE definition - a list of columns and keys of the new + table. While doing so, it also performs some (bug not all) + semantic checks. + + This function is invoked when we know that we're going to + perform ALTER TABLE via a temporary table -- i.e. in-place ALTER TABLE + is not possible, perhaps because the ALTER statement contains + instructions that require change in table data, not only in + table definition or indexes. + + @param[in,out] thd thread handle. Used as a memory pool + and source of environment information. + @param[in] table the source table, open and locked + Used as an interface to the storage engine + to acquire additional information about + the original table. + @param[in,out] create_info A blob with CREATE/ALTER TABLE + parameters + @param[in,out] alter_info Another blob with ALTER/CREATE parameters. + Originally create_info was used only in + CREATE TABLE and alter_info only in ALTER TABLE. + But since ALTER might end-up doing CREATE, + this distinction is gone and we just carry + around two structures. + @param[in,out] alter_ctx Runtime context for ALTER TABLE. + + @return + Fills various create_info members based on information retrieved + from the storage engine. + Sets create_info->varchar if the table has a VARCHAR column. + Prepares alter_info->create_list and alter_info->key_list with + columns and keys of the new table. + + @retval TRUE error, out of memory or a semantical error in ALTER + TABLE instructions + @retval FALSE success +*/ + +bool +mysql_prepare_alter_table(THD *thd, TABLE *table, + Table_specification_st *create_info, + Alter_info *alter_info, + Alter_table_ctx *alter_ctx) +{ + /* New column definitions are added here */ + List new_create_list; + /* System-invisible fields must be added last */ + List new_create_tail; + /* New key definitions are added here */ + List new_key_list; + List fk_list; + List rename_key_list(alter_info->alter_rename_key_list); + + /* + Create a deep copy of the list of visibility for indexes, as it will be + altered here. + */ + List + alter_index_ignorability_list(alter_info->alter_index_ignorability_list, + thd->mem_root); + + list_copy_and_replace_each_value(alter_index_ignorability_list, thd->mem_root); + + List_iterator drop_it(alter_info->drop_list); + List_iterator def_it(alter_info->create_list); + List_iterator alter_it(alter_info->alter_list); + List_iterator key_it(alter_info->key_list); + List_iterator find_it(new_create_list); + List_iterator field_it(new_create_list); + List key_parts; + List new_constraint_list; + uint db_create_options= (table->s->db_create_options + & ~(HA_OPTION_PACK_RECORD)); + Item::func_processor_rename column_rename_param; + uint used_fields, dropped_sys_vers_fields= 0; + KEY *key_info=table->key_info; + bool rc= TRUE; + bool vers_system_invisible= false; + Create_field *def; + Field **f_ptr,*field; + MY_BITMAP *dropped_fields= NULL; // if it's NULL - no dropped fields + bool drop_period= false; + LEX_CSTRING period_start_name= {nullptr, 0}; + LEX_CSTRING period_end_name= {nullptr, 0}; + DBUG_ENTER("mysql_prepare_alter_table"); + + if (table->s->period.name) + { + period_start_name= table->s->period_start_field()->field_name; + period_end_name= table->s->period_end_field()->field_name; + } + + /* + Merge incompatible changes flag in case of upgrade of a table from an + old MariaDB or MySQL version. This ensures that we don't try to do an + online alter table if field packing or character set changes are required. + */ + create_info->used_fields|= table->s->incompatible_version; + used_fields= create_info->used_fields; + + create_info->varchar= FALSE; + /* Let new create options override the old ones */ + if (!(used_fields & HA_CREATE_USED_MIN_ROWS)) + create_info->min_rows= table->s->min_rows; + if (!(used_fields & HA_CREATE_USED_MAX_ROWS)) + create_info->max_rows= table->s->max_rows; + if (!(used_fields & HA_CREATE_USED_AVG_ROW_LENGTH)) + create_info->avg_row_length= table->s->avg_row_length; + + if (create_info->resolve_to_charset_collation_context(thd, + thd->charset_collation_context_alter_table(table->s))) + DBUG_RETURN(true); + + if (!(used_fields & HA_CREATE_USED_AUTO) && table->found_next_number_field) + { + /* Table has an autoincrement, copy value to new table */ + table->file->info(HA_STATUS_AUTO); + create_info->auto_increment_value= table->file->stats.auto_increment_value; + } + + if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) + create_info->key_block_size= table->s->key_block_size; + + if (!(used_fields & HA_CREATE_USED_STATS_SAMPLE_PAGES)) + create_info->stats_sample_pages= table->s->stats_sample_pages; + + if (!(used_fields & HA_CREATE_USED_STATS_AUTO_RECALC)) + create_info->stats_auto_recalc= table->s->stats_auto_recalc; + + if (!(used_fields & HA_CREATE_USED_TRANSACTIONAL)) + create_info->transactional= table->s->transactional; + + if (!(used_fields & HA_CREATE_USED_CONNECTION)) + create_info->connect_string= table->s->connect_string; + + if (!(used_fields & HA_CREATE_USED_SEQUENCE)) + create_info->sequence= table->s->table_type == TABLE_TYPE_SEQUENCE; + + column_rename_param.db_name= table->s->db; + column_rename_param.table_name= table->s->table_name; + if (column_rename_param.fields.copy(&alter_info->create_list, thd->mem_root)) + DBUG_RETURN(1); // OOM + + restore_record(table, s->default_values); // Empty record for DEFAULT + + if ((create_info->fields_option_struct= (ha_field_option_struct**) + thd->calloc(sizeof(void*) * table->s->fields)) == NULL || + (create_info->indexes_option_struct= (ha_index_option_struct**) + thd->calloc(sizeof(void*) * table->s->keys)) == NULL) + DBUG_RETURN(1); + + if (merge_engine_options(table->s->option_list, create_info->option_list, + &create_info->option_list, thd->mem_root)) + DBUG_RETURN(1); + + table->file->get_foreign_key_list(thd, &fk_list); + + /* + First collect all fields from table which isn't in drop_list + */ + bitmap_clear_all(&table->tmp_set); + for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (field->invisible == INVISIBLE_FULL) + continue; + Alter_drop *drop; + if (field->type() == MYSQL_TYPE_VARCHAR) + create_info->varchar= TRUE; + /* Check if field should be dropped */ + drop_it.rewind(); + while ((drop=drop_it++)) + { + if (drop->type == Alter_drop::COLUMN && + !my_strcasecmp(system_charset_info,field->field_name.str, drop->name)) + break; + } + /* + DROP COLULMN xxx + 1. it does not see INVISIBLE_SYSTEM columns + 2. otherwise, normally a column is dropped + 3. unless it's a system versioning column (but see below). + */ + if (drop && field->invisible < INVISIBLE_SYSTEM && + !(field->flags & VERS_SYSTEM_FIELD && + !(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING))) + { + /* Reset auto_increment value if it was dropped */ + if (MTYP_TYPENR(field->unireg_check) == Field::NEXT_NUMBER && + !(used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value=0; + create_info->used_fields|=HA_CREATE_USED_AUTO; + } + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (alter_info->drop_stat_fields.push_back(field, thd->mem_root)) + DBUG_RETURN(true); + } + dropped_sys_vers_fields|= field->flags; + drop_it.remove(); + dropped_fields= &table->tmp_set; + bitmap_set_bit(dropped_fields, field->field_index); + continue; + } + if (field->invisible == INVISIBLE_SYSTEM && + field->flags & VERS_SYSTEM_FIELD) + { + vers_system_invisible= true; + } + /* + invisible versioning column is dropped automatically on + DROP SYSTEM VERSIONING + */ + if (!drop && field->invisible >= INVISIBLE_SYSTEM && + field->flags & VERS_SYSTEM_FIELD && + alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING) + { + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (alter_info->drop_stat_fields.push_back(field, thd->mem_root)) + DBUG_RETURN(true); + } + continue; + } + + /* Check if field is changed */ + def_it.rewind(); + while ((def=def_it++)) + { + if (def->change.str && + !lex_string_cmp(system_charset_info, &field->field_name, + &def->change)) + break; + } + if (def && field->invisible < INVISIBLE_SYSTEM) + { // Field is changed + def->field=field; + /* + Add column being updated to the list of new columns. + Note that columns with AFTER clauses are added to the end + of the list for now. Their positions will be corrected later. + */ + new_create_list.push_back(def, thd->mem_root); + if (field->stored_in_db() != def->stored_in_db()) + { + my_error(ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN, MYF(0)); + goto err; + } + if (!def->after.str) + { + /* + If this ALTER TABLE doesn't have an AFTER clause for the modified + column then remove this column from the list of columns to be + processed. So later we can iterate over the columns remaining + in this list and process modified columns with AFTER clause or + add new columns. + */ + def_it.remove(); + } + } + else if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING && + field->flags & VERS_SYSTEM_FIELD && + field->invisible < INVISIBLE_SYSTEM) + { + StringBuffer tmp; + append_drop_column(thd, &tmp, field); + my_error(ER_MISSING, MYF(0), table->s->table_name.str, tmp.c_ptr()); + goto err; + } + else if (drop && field->invisible < INVISIBLE_SYSTEM && + field->flags & VERS_SYSTEM_FIELD && + !(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)) + { + /* "dropping" a versioning field only hides it from the user */ + def= new (thd->mem_root) Create_field(thd, field, field); + def->invisible= INVISIBLE_SYSTEM; + alter_info->flags|= ALTER_CHANGE_COLUMN; + if (field->flags & VERS_ROW_START) + create_info->vers_info.period.start= + create_info->vers_info.as_row.start= + def->field_name= Vers_parse_info::default_start; + + else + create_info->vers_info.period.end= + create_info->vers_info.as_row.end= + def->field_name= Vers_parse_info::default_end; + new_create_list.push_back(def, thd->mem_root); + dropped_sys_vers_fields|= field->flags; + drop_it.remove(); + } + else if (field->invisible < INVISIBLE_SYSTEM) + { + /* + This field was not dropped and not changed, add it to the list + for the new table. + */ + def= new (thd->mem_root) Create_field(thd, field, field); + new_create_list.push_back(def, thd->mem_root); + alter_it.rewind(); // Change default if ALTER + Alter_column *alter; + while ((alter=alter_it++)) + { + if (!my_strcasecmp(system_charset_info,field->field_name.str, + alter->name.str)) + break; + } + if (alter && field->invisible < INVISIBLE_SYSTEM) + { + if (alter->is_rename()) + { + def->change= alter->name; + def->field_name= alter->new_name; + column_rename_param.fields.push_back(def); + if (field->flags & VERS_ROW_START) + { + create_info->vers_info.as_row.start= alter->new_name; + create_info->vers_info.period.start= alter->new_name; + } + else if (field->flags & VERS_ROW_END) + { + create_info->vers_info.as_row.end= alter->new_name; + create_info->vers_info.period.end= alter->new_name; + } + if (table->s->period.name) + { + if (field == table->period_start_field()) + period_start_name= alter->new_name; + else if (field == table->period_end_field()) + period_end_name= alter->new_name; + } + } + else + { + if ((def->default_value= alter->default_value)) + def->flags&= ~NO_DEFAULT_VALUE_FLAG; + else + def->flags|= NO_DEFAULT_VALUE_FLAG; + } + alter_it.remove(); + } + } + else + { + DBUG_ASSERT(field->invisible == INVISIBLE_SYSTEM); + def= new (thd->mem_root) Create_field(thd, field, field); + new_create_tail.push_back(def, thd->mem_root); + } + } + + /* + If we are doing a rename of a column, update all references in virtual + column expressions, constraints and defaults to use the new column name + */ + if (alter_info->flags & ALTER_RENAME_COLUMN) + { + alter_it.rewind(); + Alter_column *alter; + while ((alter=alter_it++)) + { + if (alter->is_rename()) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), alter->name.str, + table->s->table_name.str); + goto err; + } + } + for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (field->vcol_info) + field->vcol_info->expr->walk(&Item::rename_fields_processor, 1, + &column_rename_param); + if (field->check_constraint) + field->check_constraint->expr->walk(&Item::rename_fields_processor, 1, + &column_rename_param); + if (field->default_value) + field->default_value->expr->walk(&Item::rename_fields_processor, 1, + &column_rename_param); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (thd->work_part_info) + { + partition_info *part_info= thd->work_part_info; + List_iterator def_it(column_rename_param.fields); + const bool part_field_list= !part_info->part_field_list.is_empty(); + const bool subpart_field_list= !part_info->subpart_field_list.is_empty(); + if (part_info->part_expr) + part_info->part_expr->walk(&Item::rename_fields_processor, 1, + &column_rename_param); + if (part_info->subpart_expr) + part_info->subpart_expr->walk(&Item::rename_fields_processor, 1, + &column_rename_param); + if (part_field_list || subpart_field_list) + { + while (Create_field *def= def_it++) + { + if (def->change.str) + { + if (part_field_list) + rename_field_in_list(def, &part_info->part_field_list); + if (subpart_field_list) + rename_field_in_list(def, &part_info->subpart_field_list); + } /* if (def->change.str) */ + } /* while (def) */ + } /* if (part_field_list || subpart_field_list) */ + // Force reopen because new column name is on thd->mem_root + table->mark_table_for_reopen(); + } /* if (part_info) */ +#endif + // Force reopen because new column name is on thd->mem_root + table->mark_table_for_reopen(); + } + + dropped_sys_vers_fields &= VERS_SYSTEM_FIELD; + if ((dropped_sys_vers_fields || + alter_info->flags & ALTER_DROP_PERIOD) && + dropped_sys_vers_fields != VERS_SYSTEM_FIELD && + !vers_system_invisible) + { + StringBuffer tmp; + if (!(dropped_sys_vers_fields & VERS_ROW_START)) + append_drop_column(thd, &tmp, table->vers_start_field()); + if (!(dropped_sys_vers_fields & VERS_ROW_END)) + append_drop_column(thd, &tmp, table->vers_end_field()); + my_error(ER_MISSING, MYF(0), table->s->table_name.str, tmp.c_ptr()); + goto err; + } + else if (alter_info->flags & ALTER_DROP_PERIOD && vers_system_invisible) + { + my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0), "PERIOD FOR SYSTEM_TIME on", table->s->table_name.str); + goto err; + } + alter_info->flags &= ~(ALTER_DROP_PERIOD | ALTER_ADD_PERIOD); + def_it.rewind(); + while ((def=def_it++)) // Add new columns + { + Create_field *find; + if (def->change.str && ! def->field) + { + /* + Check if there is modify for newly added field. + */ + find_it.rewind(); + while((find=find_it++)) + { + if (!my_strcasecmp(system_charset_info,find->field_name.str, + def->field_name.str)) + break; + } + + if (likely(find && !find->field)) + find_it.remove(); + else + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), def->change.str, + table->s->table_name.str); + goto err; + } + } + /* + Check that the DATE/DATETIME not null field we are going to add is + either has a default value or the '0000-00-00' is allowed by the + set sql mode. + If the '0000-00-00' value isn't allowed then raise the error_if_not_empty + flag to allow ALTER TABLE only if the table to be altered is empty. + */ + if (!alter_ctx->implicit_default_value_error_field && !def->field && + !(~def->flags & (NO_DEFAULT_VALUE_FLAG | NOT_NULL_FLAG)) && + def->type_handler()->validate_implicit_default_value(thd, *def)) + { + alter_ctx->implicit_default_value_error_field= def; + alter_ctx->error_if_not_empty= TRUE; + } + if (!def->after.str) + new_create_list.push_back(def, thd->mem_root); + else + { + if (def->change.str) + { + find_it.rewind(); + /* + For columns being modified with AFTER clause we should first remove + these columns from the list and then add them back at their correct + positions. + */ + while ((find=find_it++)) + { + /* + Create_fields representing changed columns are added directly + from Alter_info::create_list to new_create_list. We can therefore + safely use pointer equality rather than name matching here. + This prevents removing the wrong column in case of column rename. + */ + if (find == def) + { + find_it.remove(); + break; + } + } + } + if (def->after.str == first_keyword) + new_create_list.push_front(def, thd->mem_root); + else + { + find_it.rewind(); + while ((find=find_it++)) + { + if (!lex_string_cmp(system_charset_info, &def->after, + &find->field_name)) + break; + } + if (unlikely(!find)) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), def->after.str, + table->s->table_name.str); + goto err; + } + find_it.after(def); // Put column after this + } + } + /* + Check if there is alter for newly added field. + */ + alter_it.rewind(); + Alter_column *alter; + while ((alter=alter_it++)) + { + if (!my_strcasecmp(system_charset_info,def->field_name.str, + alter->name.str)) + break; + } + if (alter) + { + if ((def->default_value= alter->default_value)) // Use new default + def->flags&= ~NO_DEFAULT_VALUE_FLAG; + else + def->flags|= NO_DEFAULT_VALUE_FLAG; + alter_it.remove(); + } + } + + new_create_list.append(&new_create_tail); + + if (unlikely(alter_info->alter_list.elements)) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), + alter_info->alter_list.head()->name.str, table->s->table_name.str); + goto err; + } + if (unlikely(!new_create_list.elements)) + { + my_message(ER_CANT_REMOVE_ALL_FIELDS, + ER_THD(thd, ER_CANT_REMOVE_ALL_FIELDS), + MYF(0)); + goto err; + } + + /* + Collect all keys which isn't in drop list. Add only those + for which some fields exists. + */ + for (uint i=0 ; i < table->s->keys ; i++,key_info++) + { + bool long_hash_key= false; + if (key_info->flags & HA_INVISIBLE_KEY) + continue; + const char *key_name= key_info->name.str; + const bool primary_key= table->s->primary_key == i; + const bool explicit_pk= primary_key && + !my_strcasecmp(system_charset_info, key_name, + primary_key_name.str); + const bool implicit_pk= primary_key && !explicit_pk; + + Alter_drop *drop; + drop_it.rewind(); + while ((drop=drop_it++)) + { + if (drop->type == Alter_drop::KEY && + !my_strcasecmp(system_charset_info,key_name, drop->name)) + break; + } + if (drop) + { + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (alter_info->add_stat_drop_index(key_info, FALSE, thd->mem_root)) + DBUG_RETURN(true); + if (primary_key) + { + KEY *tab_key_info= table->key_info; + for (uint j=0; j < table->s->keys; j++, tab_key_info++) + { + if (tab_key_info != key_info && + tab_key_info->user_defined_key_parts != + tab_key_info->ext_key_parts) + { + if (alter_info->add_stat_drop_index(tab_key_info, TRUE, + thd->mem_root)) + DBUG_RETURN(true); + } + } + } + } + drop_it.remove(); + continue; + } + + List_iterator + ignorability_index_it(alter_index_ignorability_list); + + Alter_index_ignorability *index_ignorability; + while((index_ignorability= ignorability_index_it++)) + { + const char* name= index_ignorability->name(); + if (!my_strcasecmp(system_charset_info, key_name, name)) + ignorability_index_it.remove(); + } + + + /* If this index is to stay in the table check if it has to be renamed. */ + List_iterator rename_key_it(rename_key_list); + Alter_rename_key *rename_key; + + while ((rename_key= rename_key_it++)) + { + if (!my_strcasecmp(system_charset_info, key_name, rename_key->old_name.str)) + { + if (!my_strcasecmp(system_charset_info, key_name, primary_key_name.str)) + { + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), rename_key->old_name.str); + goto err; + } + else if (!my_strcasecmp(system_charset_info, rename_key->new_name.str, + primary_key_name.str)) + { + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), rename_key->new_name.str); + goto err; + } + + key_name= rename_key->new_name.str; // New name of current key_info + if (cmp(&rename_key->old_name, &rename_key->new_name)) + { + /* Key was renamed */ + alter_info->add_stat_rename_index(key_info, &rename_key->new_name, + thd->mem_root); + } + rename_key_it.remove(); + + /* + If the user has explicitly renamed the key, we should no longer + treat it as generated. Otherwise this key might be automatically + dropped by mysql_prepare_create_table() and this will confuse + code in fill_alter_inplace_info(). + */ + key_info->flags&= ~HA_GENERATED_KEY; + break; + } + } + + if (key_info->algorithm == HA_KEY_ALG_LONG_HASH) + { + setup_keyinfo_hash(key_info); + long_hash_key= true; + } + const char *dropped_key_part= NULL; + bool user_keyparts= false; // some user-defined keyparts left + KEY_PART_INFO *key_part= key_info->key_part; + key_parts.empty(); + uint key_parts_nr= key_info->user_defined_key_parts; + if (key_info->without_overlaps) + key_parts_nr-= 2; + + bool delete_index_stat= FALSE; + for (uint j=0 ; j < key_parts_nr ; j++,key_part++) + { + Field *kfield= key_part->field; + if (!kfield) + continue; // Wrong field (from UNIREG) + const char *key_part_name=kfield->field_name.str; + Create_field *cfield; + uint key_part_length; + + field_it.rewind(); + while ((cfield=field_it++)) + { + if (cfield->change.str) + { + if (!my_strcasecmp(system_charset_info, key_part_name, + cfield->change.str)) + break; + } + else if (!my_strcasecmp(system_charset_info, + key_part_name, cfield->field_name.str)) + break; + } + if (!cfield) + { + if (primary_key) + alter_ctx->modified_primary_key= true; + delete_index_stat= TRUE; + if (!(kfield->flags & VERS_SYSTEM_FIELD)) + dropped_key_part= key_part_name; + continue; // Field is removed + } + + DBUG_ASSERT(!primary_key || kfield->flags & NOT_NULL_FLAG); + if (implicit_pk && !alter_ctx->modified_primary_key && + !(cfield->flags & NOT_NULL_FLAG)) + alter_ctx->modified_primary_key= true; + + key_part_length= key_part->length; + if (cfield->field) // Not new field + { + /* + If the field can't have only a part used in a key according to its + new type, or should not be used partially according to its + previous type, or the field length is less than the key part + length, unset the key part length. + + We also unset the key part length if it is the same as the + old field's length, so the whole new field will be used. + + BLOBs may have cfield->length == 0, which is why we test it before + checking whether cfield->length < key_part_length (in chars). + + In case of TEXTs we check the data type maximum length *in bytes* + to key part length measured *in characters* (i.e. key_part_length + devided to mbmaxlen). This is because it's OK to have: + CREATE TABLE t1 (a tinytext, key(a(254)) character set utf8); + In case of this example: + - data type maximum length is 255. + - key_part_length is 1016 (=254*4, where 4 is mbmaxlen) + */ + if (!cfield->field->type_handler()->type_can_have_key_part() || + !cfield->type_handler()->type_can_have_key_part() || + /* spatial keys can't have sub-key length */ + (key_info->flags & HA_SPATIAL) || + (cfield->field->field_length == key_part_length && + !f_is_blob(key_part->key_type)) || + (cfield->length && + (((cfield->real_field_type() >= MYSQL_TYPE_TINY_BLOB && + cfield->real_field_type() <= MYSQL_TYPE_BLOB) ? + blob_length_by_type(cfield->real_field_type()) : + cfield->length) < + key_part_length / kfield->charset()->mbmaxlen))) + key_part_length= 0; // Use whole field + } + key_part_length /= kfield->charset()->mbmaxlen; + Key_part_spec *kps= new (thd->mem_root) Key_part_spec(&cfield->field_name, + key_part_length, true); + kps->asc= !(key_part->key_part_flag & HA_REVERSE_SORT); + key_parts.push_back(kps, thd->mem_root); + if (!(cfield->invisible == INVISIBLE_SYSTEM && cfield->vers_sys_field())) + user_keyparts= true; + } + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (delete_index_stat) + { + if (alter_info->add_stat_drop_index(key_info, FALSE, thd->mem_root)) + DBUG_RETURN(true); + } + else if (alter_ctx->modified_primary_key && + key_info->user_defined_key_parts != key_info->ext_key_parts) + { + if (alter_info->add_stat_drop_index(key_info, FALSE, + thd->mem_root)) + DBUG_RETURN(true); + } + } + + if (!user_keyparts && key_parts.elements) + { + /* + If we dropped all user key-parts we also drop implicit system fields. + */ + key_parts.empty(); + } + + if (key_parts.elements) + { + KEY_CREATE_INFO key_create_info; + Key *key; + enum Key::Keytype key_type; + LEX_CSTRING tmp_name; + bzero((char*) &key_create_info, sizeof(key_create_info)); + if (key_info->algorithm == HA_KEY_ALG_LONG_HASH) + key_info->algorithm= HA_KEY_ALG_UNDEF; + key_create_info.algorithm= key_info->algorithm; + /* + We copy block size directly as some engines, like Area, sets this + automatically + */ + key_create_info.block_size= key_info->block_size; + key_create_info.flags= key_info->flags; // HA_USE_BLOCK_SIZE + if (key_info->flags & HA_USES_PARSER) + key_create_info.parser_name= *plugin_name(key_info->parser); + if (key_info->flags & HA_USES_COMMENT) + key_create_info.comment= key_info->comment; + key_create_info.is_ignored= key_info->is_ignored; + + if (key_info->flags & HA_SPATIAL) + key_type= Key::SPATIAL; + else if (key_info->flags & HA_NOSAME) + { + if (explicit_pk) + key_type= Key::PRIMARY; + else + key_type= Key::UNIQUE; + if (dropped_key_part) + { + my_error(ER_KEY_COLUMN_DOES_NOT_EXIST, MYF(0), dropped_key_part); + if (long_hash_key) + { + key_info->algorithm= HA_KEY_ALG_LONG_HASH; + re_setup_keyinfo_hash(key_info); + } + goto err; + } + } + else if (key_info->flags & HA_FULLTEXT) + key_type= Key::FULLTEXT; + else + key_type= Key::MULTIPLE; + + List_iterator + ignorability_index_it(alter_info->alter_index_ignorability_list); + Alter_index_ignorability *index_ignorability; + while((index_ignorability= ignorability_index_it++)) + { + const char *name= index_ignorability->name(); + if (!my_strcasecmp(system_charset_info, key_name, name)) + { + if (table->s->primary_key <= MAX_KEY && + table->key_info + table->s->primary_key == key_info) + { + my_error(ER_PK_INDEX_CANT_BE_IGNORED, MYF(0)); + goto err; + } + key_create_info.is_ignored= index_ignorability->is_ignored(); + } + } + + tmp_name.str= key_name; + tmp_name.length= strlen(key_name); + /* We dont need LONG_UNIQUE_HASH_FIELD flag because it will be autogenerated */ + key= new (thd->mem_root) Key(key_type, &tmp_name, &key_create_info, + key_info->flags & HA_GENERATED_KEY, + &key_parts, key_info->option_list, DDL_options()); + key->without_overlaps= key_info->without_overlaps; + key->period= table->s->period.name; + key->old= true; + new_key_list.push_back(key, thd->mem_root); + } + if (long_hash_key) + { + key_info->algorithm= HA_KEY_ALG_LONG_HASH; + re_setup_keyinfo_hash(key_info); + } + } + { + // add existing foreign keys + for (auto &fk : fk_list) + { + Alter_drop *drop; + for(drop_it.rewind(); (drop=drop_it++); ) + if (drop->type == Alter_drop::FOREIGN_KEY && + !my_strcasecmp(system_charset_info, fk.foreign_id->str, drop->name)) + break; + if (drop) + continue; + List cols, ref_cols; + for (LEX_CSTRING &c : fk.foreign_fields) + cols.push_back(new (thd->mem_root) Key_part_spec(&c, 0)); + for (LEX_CSTRING &c : fk.referenced_fields) + ref_cols.push_back(new (thd->mem_root) Key_part_spec(&c, 0)); + auto key= new (thd->mem_root) + Foreign_key(fk.foreign_id, &cols, fk.foreign_id, fk.referenced_db, + fk.referenced_table, &ref_cols, fk.delete_method, fk.update_method, + Foreign_key::FK_MATCH_UNDEF, DDL_options()); + key->old= true; + new_key_list.push_back(key, thd->mem_root); + } + } + { + Key *key; + while ((key=key_it++)) // Add new keys + { + if (key->type == Key::FOREIGN_KEY && + ((Foreign_key *)key)->validate(new_create_list)) + goto err; + new_key_list.push_back(key, thd->mem_root); + if (key->name.str && + !my_strcasecmp(system_charset_info, key->name.str, + primary_key_name.str)) + { + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key->name.str); + goto err; + } + } + } + + if (table->s->period.name) + { + drop_it.rewind(); + Alter_drop *drop; + for (bool found= false; !found && (drop= drop_it++); ) + { + found= drop->type == Alter_drop::PERIOD && + table->s->period.name.streq(drop->name); + } + + if (drop) + { + drop_period= true; + drop_it.remove(); + } + else if (create_info->period_info.is_set() && table->s->period.name) + { + my_error(ER_MORE_THAN_ONE_PERIOD, MYF(0)); + goto err; + } + else + { + create_info->period_info.set_period(period_start_name, period_end_name); + create_info->period_info.name= table->s->period.name; + } + } + + /* Add all table level constraints which are not in the drop list */ + if (table->s->table_check_constraints) + { + TABLE_SHARE *share= table->s; + + for (uint i= share->field_check_constraints; + i < share->table_check_constraints ; i++) + { + Virtual_column_info *check= table->check_constraints[i]; + Alter_drop *drop; + bool keep= true; + drop_it.rewind(); + while ((drop=drop_it++)) + { + if (drop->type == Alter_drop::CHECK_CONSTRAINT && + !my_strcasecmp(system_charset_info, check->name.str, drop->name)) + { + drop_it.remove(); + keep= false; + break; + } + } + + if (share->period.constr_name.streq(check->name.str)) + { + if (!drop_period && !keep) + { + my_error(ER_PERIOD_CONSTRAINT_DROP, MYF(0), check->name.str, + share->period.name.str); + goto err; + } + keep= keep && !drop_period; + + DBUG_ASSERT(create_info->period_info.constr == NULL || drop_period); + + if (keep) + { + Item *expr_copy= check->expr->get_copy(thd); + check= new Virtual_column_info(); + check->name= share->period.constr_name; + check->automatic_name= true; + check->expr= expr_copy; + create_info->period_info.constr= check; + } + } + /* see if the constraint depends on *only* on dropped fields */ + if (keep && dropped_fields) + { + table->default_column_bitmaps(); + bitmap_clear_all(table->read_set); + check->expr->walk(&Item::register_field_in_read_map, 1, 0); + if (bitmap_is_subset(table->read_set, dropped_fields)) + keep= false; + else if (bitmap_is_overlapping(dropped_fields, table->read_set)) + { + bitmap_intersect(table->read_set, dropped_fields); + uint field_nr= bitmap_get_first_set(table->read_set); + my_error(ER_BAD_FIELD_ERROR, MYF(0), + table->field[field_nr]->field_name.str, "CHECK"); + goto err; + } + } + if (keep) + { + if (alter_info->flags & ALTER_RENAME_COLUMN) + { + check->expr->walk(&Item::rename_fields_processor, 1, + &column_rename_param); + // Force reopen because new column name is on thd->mem_root + table->mark_table_for_reopen(); + } + new_constraint_list.push_back(check, thd->mem_root); + } + } + } + + if (!alter_info->check_constraint_list.is_empty()) + { + /* Check the table FOREIGN KEYs for name duplications. */ + FOREIGN_KEY_INFO *f_key; + List_iterator fk_key_it(fk_list); + while ((f_key= fk_key_it++)) + { + List_iterator_fast + c_it(alter_info->check_constraint_list); + Virtual_column_info *check; + while ((check= c_it++)) + { + if (!check->name.length || check->automatic_name) + continue; + + if (check->name.length == f_key->foreign_id->length && + my_strcasecmp(system_charset_info, f_key->foreign_id->str, + check->name.str) == 0) + { + my_error(ER_DUP_CONSTRAINT_NAME, MYF(0), "CHECK", check->name.str); + goto err; + } + } + } + } + + /* Add new constraints */ + new_constraint_list.append(&alter_info->check_constraint_list); + + if (alter_info->drop_list.elements) + { + Alter_drop *drop; + drop_it.rewind(); + while ((drop=drop_it++)) { + switch (drop->type) { + case Alter_drop::KEY: + case Alter_drop::COLUMN: + case Alter_drop::CHECK_CONSTRAINT: + case Alter_drop::PERIOD: + my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0), drop->type_name(), + alter_info->drop_list.head()->name); + goto err; + case Alter_drop::FOREIGN_KEY: + // Leave the DROP FOREIGN KEY names in the alter_info->drop_list. + break; + } + } + } + + if (rename_key_list.elements) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), rename_key_list.head()->old_name.str, + table->s->table_name.str); + goto err; + } + + if (alter_index_ignorability_list.elements) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), + alter_index_ignorability_list.head()->name(), + table->s->table_name.str); + goto err; + } + + if (!create_info->comment.str) + { + create_info->comment.str= table->s->comment.str; + create_info->comment.length= table->s->comment.length; + } + + table->file->update_create_info(create_info); + if ((create_info->table_options & + (HA_OPTION_PACK_KEYS | HA_OPTION_NO_PACK_KEYS)) || + (used_fields & HA_CREATE_USED_PACK_KEYS)) + db_create_options&= ~(HA_OPTION_PACK_KEYS | HA_OPTION_NO_PACK_KEYS); + if ((create_info->table_options & + (HA_OPTION_STATS_PERSISTENT | HA_OPTION_NO_STATS_PERSISTENT)) || + (used_fields & HA_CREATE_USED_STATS_PERSISTENT)) + db_create_options&= ~(HA_OPTION_STATS_PERSISTENT | HA_OPTION_NO_STATS_PERSISTENT); + + if (create_info->table_options & + (HA_OPTION_CHECKSUM | HA_OPTION_NO_CHECKSUM)) + db_create_options&= ~(HA_OPTION_CHECKSUM | HA_OPTION_NO_CHECKSUM); + if (create_info->table_options & + (HA_OPTION_DELAY_KEY_WRITE | HA_OPTION_NO_DELAY_KEY_WRITE)) + db_create_options&= ~(HA_OPTION_DELAY_KEY_WRITE | + HA_OPTION_NO_DELAY_KEY_WRITE); + create_info->table_options|= db_create_options; + + if (table->s->tmp_table) + create_info->options|=HA_LEX_CREATE_TMP_TABLE; + + rc= FALSE; + alter_info->create_list.swap(new_create_list); + alter_info->key_list.swap(new_key_list); + alter_info->check_constraint_list.swap(new_constraint_list); +err: + DBUG_RETURN(rc); +} + + +/** + Get Create_field object for newly created table by its name + in the old version of table. + + @param alter_info Alter_info describing newly created table. + @param old_name Name of field in old table. + + @returns Pointer to Create_field object, NULL - if field is + not present in new version of table. +*/ + +static Create_field *get_field_by_old_name(Alter_info *alter_info, + const char *old_name) +{ + List_iterator_fast new_field_it(alter_info->create_list); + Create_field *new_field; + + while ((new_field= new_field_it++)) + { + if (new_field->field && + (my_strcasecmp(system_charset_info, + new_field->field->field_name.str, + old_name) == 0)) + break; + } + return new_field; +} + + +/** Type of change to foreign key column, */ + +enum fk_column_change_type +{ + FK_COLUMN_NO_CHANGE, FK_COLUMN_DATA_CHANGE, + FK_COLUMN_RENAMED, FK_COLUMN_DROPPED +}; + +/** + Check that ALTER TABLE's changes on columns of a foreign key are allowed. + + @param[in] thd Thread context. + @param[in] alter_info Alter_info describing changes to be done + by ALTER TABLE. + @param[in] fk_columns List of columns of the foreign key to check. + @param[out] bad_column_name Name of field on which ALTER TABLE tries to + do prohibited operation. + + @note This function takes into account value of @@foreign_key_checks + setting. + + @retval FK_COLUMN_NO_CHANGE No significant changes are to be done on + foreign key columns. + @retval FK_COLUMN_DATA_CHANGE ALTER TABLE might result in value + change in foreign key column. + @retval FK_COLUMN_RENAMED Foreign key column is renamed. + @retval FK_COLUMN_DROPPED Foreign key column is dropped. +*/ + +static enum fk_column_change_type +fk_check_column_changes(THD *thd, Alter_info *alter_info, + List &fk_columns, + const char **bad_column_name) +{ + List_iterator_fast column_it(fk_columns); + LEX_CSTRING *column; + + *bad_column_name= NULL; + + while ((column= column_it++)) + { + Create_field *new_field= get_field_by_old_name(alter_info, column->str); + + if (new_field) + { + Field *old_field= new_field->field; + + if (lex_string_cmp(system_charset_info, &old_field->field_name, + &new_field->field_name)) + { + /* + Copy algorithm doesn't support proper renaming of columns in + the foreign key yet. At the moment we lack API which will tell + SE that foreign keys should be updated to use new name of column + like it happens in case of in-place algorithm. + */ + *bad_column_name= column->str; + return FK_COLUMN_RENAMED; + } + + /* + Field_{num|decimal}::is_equal evaluates to IS_EQUAL_NO where + the new_field adds an AUTO_INCREMENT flag on a column due to a + limitation in MyISAM/ARIA. For the purposes of FK determination + it doesn't matter if AUTO_INCREMENT is there or not. + */ + const uint flags= new_field->flags; + new_field->flags&= ~AUTO_INCREMENT_FLAG; + const bool equal_result= old_field->is_equal(*new_field); + new_field->flags= flags; + + if ((equal_result == IS_EQUAL_NO) || + ((new_field->flags & NOT_NULL_FLAG) && + !(old_field->flags & NOT_NULL_FLAG))) + { + /* + Column in a FK has changed significantly and it + may break referential intergrity. + */ + *bad_column_name= column->str; + return FK_COLUMN_DATA_CHANGE; + } + } + else + { + /* + Column in FK was dropped. Most likely this will break + integrity constraints of InnoDB data-dictionary (and thus + InnoDB will emit an error), so we prohibit this right away + even if foreign_key_checks are off. + This also includes a rare case when another field replaces + field being dropped since it is easy to break referential + integrity in this case. + */ + *bad_column_name= column->str; + return FK_COLUMN_DROPPED; + } + } + + return FK_COLUMN_NO_CHANGE; +} + + +/** + Check if ALTER TABLE we are about to execute using COPY algorithm + is not supported as it might break referential integrity. + + @note If foreign_key_checks is disabled (=0), we allow to break + referential integrity. But we still disallow some operations + like dropping or renaming columns in foreign key since they + are likely to break consistency of InnoDB data-dictionary + and thus will end-up in error anyway. + + @param[in] thd Thread context. + @param[in] table Table to be altered. + @param[in] alter_info Lists of fields, keys to be changed, added + or dropped. + @param[out] alter_ctx ALTER TABLE runtime context. + Alter_table_ctx::fk_error_if_delete flag + is set if deletion during alter can break + foreign key integrity. + + @retval false Success. + @retval true Error, ALTER - tries to do change which is not compatible + with foreign key definitions on the table. +*/ + +static bool fk_prepare_copy_alter_table(THD *thd, TABLE *table, + Alter_info *alter_info, + Alter_table_ctx *alter_ctx) +{ + List fk_parent_key_list; + List fk_child_key_list; + FOREIGN_KEY_INFO *f_key; + + DBUG_ENTER("fk_prepare_copy_alter_table"); + + table->file->get_parent_foreign_key_list(thd, &fk_parent_key_list); + + /* OOM when building list. */ + if (unlikely(thd->is_error())) + DBUG_RETURN(true); + + /* + Remove from the list all foreign keys in which table participates as + parent which are to be dropped by this ALTER TABLE. This is possible + when a foreign key has the same table as child and parent. + */ + List_iterator fk_parent_key_it(fk_parent_key_list); + + while ((f_key= fk_parent_key_it++)) + { + Alter_drop *drop; + List_iterator_fast drop_it(alter_info->drop_list); + + while ((drop= drop_it++)) + { + /* + InnoDB treats foreign key names in case-insensitive fashion. + So we do it here too. For database and table name type of + comparison used depends on lower-case-table-names setting. + For l_c_t_n = 0 we use case-sensitive comparison, for + l_c_t_n > 0 modes case-insensitive comparison is used. + */ + if ((drop->type == Alter_drop::FOREIGN_KEY) && + (my_strcasecmp(system_charset_info, f_key->foreign_id->str, + drop->name) == 0) && + (lex_string_cmp(table_alias_charset, f_key->foreign_db, + &table->s->db) == 0) && + (lex_string_cmp(table_alias_charset, f_key->foreign_table, + &table->s->table_name) == 0)) + fk_parent_key_it.remove(); + } + } + + /* + If there are FKs in which this table is parent which were not + dropped we need to prevent ALTER deleting rows from the table, + as it might break referential integrity. OTOH it is OK to do + so if foreign_key_checks are disabled. + */ + if (!fk_parent_key_list.is_empty() && + !(thd->variables.option_bits & OPTION_NO_FOREIGN_KEY_CHECKS)) + alter_ctx->set_fk_error_if_delete_row(fk_parent_key_list.head()); + + fk_parent_key_it.rewind(); + while ((f_key= fk_parent_key_it++)) + { + enum fk_column_change_type changes; + const char *bad_column_name; + + changes= fk_check_column_changes(thd, alter_info, + f_key->referenced_fields, + &bad_column_name); + + switch(changes) + { + case FK_COLUMN_NO_CHANGE: + /* No significant changes. We can proceed with ALTER! */ + break; + case FK_COLUMN_DATA_CHANGE: + { + char buff[NAME_LEN*2+2]; + strxnmov(buff, sizeof(buff)-1, f_key->foreign_db->str, ".", + f_key->foreign_table->str, NullS); + my_error(ER_FK_COLUMN_CANNOT_CHANGE_CHILD, MYF(0), bad_column_name, + f_key->foreign_id->str, buff); + DBUG_RETURN(true); + } + case FK_COLUMN_RENAMED: + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), + "ALGORITHM=COPY", + ER_THD(thd, ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME), + "ALGORITHM=INPLACE"); + DBUG_RETURN(true); + case FK_COLUMN_DROPPED: + { + StringBuffer buff(system_charset_info); + LEX_CSTRING *db= f_key->foreign_db, *tbl= f_key->foreign_table; + + append_identifier(thd, &buff, db); + buff.append('.'); + append_identifier(thd, &buff, tbl); + my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD, MYF(0), bad_column_name, + f_key->foreign_id->str, buff.c_ptr()); + DBUG_RETURN(true); + } + default: + DBUG_ASSERT(0); + } + } + + table->file->get_foreign_key_list(thd, &fk_child_key_list); + + /* OOM when building list. */ + if (unlikely(thd->is_error())) + DBUG_RETURN(true); + + /* + Remove from the list all foreign keys which are to be dropped + by this ALTER TABLE. + */ + List_iterator fk_key_it(fk_child_key_list); + + while ((f_key= fk_key_it++)) + { + Alter_drop *drop; + List_iterator_fast drop_it(alter_info->drop_list); + + while ((drop= drop_it++)) + { + /* Names of foreign keys in InnoDB are case-insensitive. */ + if ((drop->type == Alter_drop::FOREIGN_KEY) && + (my_strcasecmp(system_charset_info, f_key->foreign_id->str, + drop->name) == 0)) + fk_key_it.remove(); + } + } + + fk_key_it.rewind(); + while ((f_key= fk_key_it++)) + { + enum fk_column_change_type changes; + const char *bad_column_name; + + changes= fk_check_column_changes(thd, alter_info, + f_key->foreign_fields, + &bad_column_name); + + switch(changes) + { + case FK_COLUMN_NO_CHANGE: + /* No significant changes. We can proceed with ALTER! */ + break; + case FK_COLUMN_DATA_CHANGE: + my_error(ER_FK_COLUMN_CANNOT_CHANGE, MYF(0), bad_column_name, + f_key->foreign_id->str); + DBUG_RETURN(true); + case FK_COLUMN_RENAMED: + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), + "ALGORITHM=COPY", + ER_THD(thd, ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME), + "ALGORITHM=INPLACE"); + DBUG_RETURN(true); + case FK_COLUMN_DROPPED: + my_error(ER_FK_COLUMN_CANNOT_DROP, MYF(0), bad_column_name, + f_key->foreign_id->str); + DBUG_RETURN(true); + default: + DBUG_ASSERT(0); + } + } + + /* + Normally, an attempt to modify an FK parent table will cause + FK children to be prelocked, so the table-being-altered cannot + be modified by a cascade FK action, because ALTER holds a lock + and prelocking will wait. + + But if a new FK is being added by this very ALTER, then the target + table is not locked yet (it's a temporary table). So, we have to + lock FK parents explicitly. + */ + if (alter_info->flags & ALTER_ADD_FOREIGN_KEY) + { + List_iterator fk_list_it(alter_info->key_list); + + while (Key *key= fk_list_it++) + { + if (key->type != Key::FOREIGN_KEY || key->old) + continue; + + Foreign_key *fk= static_cast(key); + char dbuf[NAME_LEN]; + char tbuf[NAME_LEN]; + const char *ref_db= (fk->ref_db.str ? + fk->ref_db.str : + alter_ctx->new_db.str); + const char *ref_table= fk->ref_table.str; + MDL_request mdl_request; + + if (lower_case_table_names) + { + strmake_buf(dbuf, ref_db); + my_casedn_str(system_charset_info, dbuf); + strmake_buf(tbuf, ref_table); + my_casedn_str(system_charset_info, tbuf); + ref_db= dbuf; + ref_table= tbuf; + } + + MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, ref_db, ref_table, + MDL_SHARED_NO_WRITE, MDL_TRANSACTION); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(true); + } + } + + DBUG_RETURN(false); +} + +/** + Rename temporary table and/or turn indexes on/off without touching .FRM. + Its a variant of simple_rename_or_index_change() to be used exclusively + for temporary tables. + + @param thd Thread handler + @param table_list TABLE_LIST for the table to change + @param keys_onoff ENABLE or DISABLE KEYS? + @param alter_ctx ALTER TABLE runtime context. + + @return Operation status + @retval false Success + @retval true Failure +*/ +static bool +simple_tmp_rename_or_index_change(THD *thd, TABLE_LIST *table_list, + Alter_info::enum_enable_or_disable keys_onoff, + Alter_table_ctx *alter_ctx) +{ + DBUG_ENTER("simple_tmp_rename_or_index_change"); + + TABLE *table= table_list->table; + bool error= false; + + DBUG_ASSERT(table->s->tmp_table); + + if (keys_onoff != Alter_info::LEAVE_AS_IS) + { + THD_STAGE_INFO(thd, stage_manage_keys); + error= alter_table_manage_keys(table, table->file->indexes_are_disabled(), + keys_onoff); + } + + if (likely(!error) && alter_ctx->is_table_renamed()) + { + THD_STAGE_INFO(thd, stage_rename); + + /* + If THD::rename_temporary_table() fails, there is no need to rename it + back to the original name (unlike the case for non-temporary tables), + as it was an allocation error and the table was not renamed. + */ + error= thd->rename_temporary_table(table, &alter_ctx->new_db, + &alter_ctx->new_alias); + } + + if (likely(!error)) + { + /* + We do not replicate alter table statement on temporary tables under + ROW-based replication. + */ + if (!thd->is_current_stmt_binlog_format_row()) + { + error= write_bin_log(thd, true, thd->query(), thd->query_length()) != 0; + } + if (likely(!error)) + my_ok(thd); + } + + DBUG_RETURN(error); +} + + +/** + Rename table and/or turn indexes on/off without touching .FRM + + @param thd Thread handler + @param table_list TABLE_LIST for the table to change + @param keys_onoff ENABLE or DISABLE KEYS? + @param alter_ctx ALTER TABLE runtime context. + + @return Operation status + @retval false Success + @retval true Failure + + @notes + Normally with ALTER TABLE we roll forward as soon as data is copied + or new table is committed. For an ALTER TABLE that only does a RENAME, + we will roll back unless the RENAME fully completes. + If we crash while using enable/disable keys, this may have completed + and will not be rolled back. +*/ + +static bool +simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, + Alter_info::enum_enable_or_disable keys_onoff, + TRIGGER_RENAME_PARAM *trigger_param, + Alter_table_ctx *alter_ctx) +{ + TABLE *table= table_list->table; + MDL_ticket *mdl_ticket= table->mdl_ticket; + DDL_LOG_STATE ddl_log_state; + LEX_CSTRING storage_engine; + LEX_CUSTRING table_version; + uchar table_version_buff[MY_UUID_SIZE]; + char storage_engine_buff[NAME_LEN]; + int error= 0; + bool partitioned; + enum ha_extra_function extra_func= thd->locked_tables_mode + ? HA_EXTRA_NOT_USED + : HA_EXTRA_FORCE_REOPEN; + DBUG_ENTER("simple_rename_or_index_change"); + bzero(&ddl_log_state, sizeof(ddl_log_state)); + + table_version.str= table_version_buff; + storage_engine.str= storage_engine_buff; + if ((table_version.length= table->s->tabledef_version.length)) + memcpy((char*) table_version.str, table->s->tabledef_version.str, + table_version.length); + partitioned= table->file->partition_engine(); + storage_engine.length= (strmake((char*) storage_engine.str, + table->file->real_table_type(), + sizeof(storage_engine_buff)-1) - + storage_engine.str); + + + if (keys_onoff != Alter_info::LEAVE_AS_IS) + { + if (wait_while_table_is_used(thd, table, extra_func)) + DBUG_RETURN(true); + + // It's now safe to take the table level lock. + if (lock_tables(thd, table_list, alter_ctx->tables_opened, 0)) + DBUG_RETURN(true); + + THD_STAGE_INFO(thd, stage_manage_keys); + error= alter_table_manage_keys(table, + table->file->indexes_are_disabled(), + keys_onoff); + if (table->s->tmp_table == NO_TMP_TABLE) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("CHANGE_INDEX") }; + ddl_log.org_storage_engine_name= storage_engine; + ddl_log.org_partitioned= partitioned; + ddl_log.org_database= table_list->table->s->db; + ddl_log.org_table= table_list->table->s->table_name; + ddl_log.org_table_id= table_version; + backup_log_ddl(&ddl_log); + } + } + + if (likely(!error) && alter_ctx->is_table_renamed()) + { + THD_STAGE_INFO(thd, stage_rename); + handlerton *old_db_type= table->s->db_type(); + + /* + Then do a 'simple' rename of the table. First we need to close all + instances of 'source' table. + Note that if wait_while_table_is_used() returns error here (i.e. if + this thread was killed) then it must be that previous step of + simple rename did nothing and therefore we can safely return + without additional clean-up. + */ + if (wait_while_table_is_used(thd, table, extra_func)) + DBUG_RETURN(true); + close_all_tables_for_name(thd, table->s, HA_EXTRA_PREPARE_FOR_RENAME, + NULL); + + (void) ddl_log_rename_table(&ddl_log_state, old_db_type, + &alter_ctx->db, &alter_ctx->table_name, + &alter_ctx->new_db, &alter_ctx->new_alias); + if (mysql_rename_table(old_db_type, &alter_ctx->db, &alter_ctx->table_name, + &alter_ctx->new_db, &alter_ctx->new_alias, + &table_version, 0)) + error= -1; + if (!error) + ddl_log_update_phase(&ddl_log_state, DDL_RENAME_PHASE_TRIGGER); + debug_crash_here("ddl_log_alter_before_rename_triggers"); + if (!error && + Table_triggers_list::change_table_name(thd, trigger_param, + &alter_ctx->db, + &alter_ctx->alias, + &alter_ctx->table_name, + &alter_ctx->new_db, + &alter_ctx->new_alias)) + { + (void) mysql_rename_table(old_db_type, + &alter_ctx->new_db, &alter_ctx->new_alias, + &alter_ctx->db, &alter_ctx->table_name, + &table_version, + NO_FK_CHECKS); + ddl_log_disable_entry(&ddl_log_state); + error= -1; + } + /* + Update stat tables last. This is to be able to handle rename of + a stat table. + */ + if (error == 0) + (void) rename_table_in_stat_tables(thd, &alter_ctx->db, + &alter_ctx->table_name, + &alter_ctx->new_db, + &alter_ctx->new_alias); + debug_crash_here("ddl_log_alter_after_rename_triggers"); + } + + if (likely(!error)) + { + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + thd->binlog_xid= 0; + if (likely(!error)) + my_ok(thd); + } + ddl_log_complete(&ddl_log_state); + table_list->table= NULL; // For query cache + query_cache_invalidate3(thd, table_list, 0); + + if ((thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES)) + { + /* + Under LOCK TABLES we should adjust meta-data locks before finishing + statement. Otherwise we can rely on them being released + along with the implicit commit. + */ + if (alter_ctx->is_table_renamed()) + thd->mdl_context.release_all_locks_for_name(mdl_ticket); + else + mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + } + DBUG_RETURN(error != 0); +} + + +static void cleanup_table_after_inplace_alter_keep_files(TABLE *table) +{ + TABLE_SHARE *share= table->s; + closefrm(table); + free_table_share(share); +} + + +static void cleanup_table_after_inplace_alter(TABLE *table) +{ + table->file->ha_create_partitioning_metadata(table->s->normalized_path.str, 0, + CHF_DELETE_FLAG); + deletefrm(table->s->normalized_path.str); + cleanup_table_after_inplace_alter_keep_files(table); +} + + +static int create_table_for_inplace_alter(THD *thd, + const Alter_table_ctx &alter_ctx, + LEX_CUSTRING *frm, + TABLE_SHARE *share, + TABLE *table) +{ + init_tmp_table_share(thd, share, alter_ctx.new_db.str, 0, + alter_ctx.new_name.str, alter_ctx.get_tmp_path()); + if (share->init_from_binary_frm_image(thd, true, frm->str, frm->length) || + open_table_from_share(thd, share, &alter_ctx.new_name, 0, + EXTRA_RECORD, thd->open_options, + table, false)) + { + free_table_share(share); + deletefrm(alter_ctx.get_tmp_path()); + return 1; + } + if (table->internal_tables && open_and_lock_internal_tables(table, false)) + { + cleanup_table_after_inplace_alter(table); + return 1; + } + return 0; +} + + +/* + log query if slave thread and send my_ok() + + Help function for mysql_alter_table() +*/ + +static bool log_and_ok(THD *thd) +{ + if (thd->slave_thread && + write_bin_log(thd, true, thd->query(), thd->query_length())) + return(true); + my_ok(thd); + return(0); +} + +/* + Wait for master to send result of Alter table. + Returns + true when Rollback is decided + false otherwise +*/ +static bool wait_for_master(THD *thd) +{ +#ifdef HAVE_REPLICATION + start_alter_info* info= thd->rgi_slave->sa_info; + Master_info *mi= thd->rgi_slave->rli->mi; + + DBUG_ASSERT(info); + DBUG_ASSERT(info->state != start_alter_state::INVALID); + DBUG_ASSERT(mi); + + mysql_mutex_lock(&mi->start_alter_lock); + + DBUG_ASSERT(!info->direct_commit_alter || + info->state == start_alter_state::ROLLBACK_ALTER); + + while (info->state == start_alter_state::REGISTERED) + { + mysql_cond_wait(&info->start_alter_cond, &mi->start_alter_lock); + } + if (info->state == start_alter_state::ROLLBACK_ALTER) + { + /* + SA thread will not give error , We will set the error in info->error + and then RA worker will output the error + We can modify the info->error without taking mutex, because CA worker + will be waiting on ::COMPLETED wait condition + */ + if(thd->is_error()) + { + info->error= thd->get_stmt_da()->sql_errno(); + thd->clear_error(); + thd->reset_killed(); + } + } + mysql_mutex_unlock(&mi->start_alter_lock); + + return info->state == start_alter_state::ROLLBACK_ALTER; +#else + return 0; +#endif +} + +#ifdef HAVE_REPLICATION +/** + In this function, we are going to change info->state to ::COMPLETED. + This means we are messaging CA/RA worker that we have binlogged, so our + here is finished. + + @param thd Thread handle + @param start_alter_state ALTER replicaton execution context + @param mi Master_info of the replication source +*/ +static void alter_committed(THD *thd, start_alter_info* info, Master_info *mi) +{ + start_alter_state tmp= info->state; + mysql_mutex_lock(&mi->start_alter_lock); + info->state= start_alter_state::COMPLETED; + mysql_cond_broadcast(&info->start_alter_cond); + mysql_mutex_unlock(&mi->start_alter_lock); + if (tmp == start_alter_state::ROLLBACK_ALTER) + { + thd->clear_error(); + thd->reset_killed(); + } +} +#endif + +/** + process_master_state:- process the info->state recieved from master + We will comapre master state with alter_result + In the case of ROLLBACK_ALTER alter_result > 0 + In the case of COMMIT_ALTER alter_result == 0 + if the condition is not satisfied we will report error and + Return 1. Make sure wait_for_master is called before calling this function + This function should be called only at the write binlog time of commit/ + rollback alter. We will alter_committed if everything is fine. + + @param thd Thread handle. + @param alter_result Result of execution. + @param[in/out] + start_alter_id Start Alter identifier or zero, + it is reset to zero. + @param if_exists True indicates the binary logging of the query + should be done with "if exists" option. + @retval 1 error + @retval 0 Ok +*/ +static int process_master_state(THD *thd, int alter_result, + uint64 &start_alter_id, bool if_exists) +{ +#ifdef HAVE_REPLICATION + start_alter_info *info= thd->rgi_slave->sa_info; + bool partial_alter= false; + + if (info->state == start_alter_state::INVALID) + { + /* the caller has not yet called SA logging nor wait for master decision */ + if (!write_bin_log_start_alter(thd, partial_alter, start_alter_id, + if_exists)) + wait_for_master(thd); + + DBUG_ASSERT(!partial_alter); + } + + /* this function shouldn't be called twice */ + DBUG_ASSERT(start_alter_id); + + start_alter_id= 0; + if ((info->state == start_alter_state::ROLLBACK_ALTER && alter_result >= 0) + || (info->state == start_alter_state::COMMIT_ALTER && !alter_result)) + { + alter_committed(thd, info, thd->rgi_slave->rli->mi); + return 0; + } + else + { + thd->is_slave_error= 1; + return 1; + } +#else + return 0; +#endif +} + +/* + Returns a (global unique) identifier of START Alter when slave applier + executes mysql_alter_table(). + In non-slave context it is zero. +*/ +static uint64 get_start_alter_id(THD *thd) +{ + DBUG_ASSERT(!(thd->rgi_slave && + (thd->rgi_slave->gtid_ev_flags_extra & + Gtid_log_event::FL_START_ALTER_E1)) || + !thd->rgi_slave->direct_commit_alter); + return + thd->rgi_slave && + (thd->rgi_slave->gtid_ev_flags_extra & Gtid_log_event::FL_START_ALTER_E1) ? + thd->variables.gtid_seq_no : 0; +} + + +/** + Alter table + + @param thd Thread handle + @param new_db If there is a RENAME clause + @param new_name If there is a RENAME clause + @param create_info Information from the parsing phase about new + table properties. + @param table_list The table to change. + @param alter_info Lists of fields, keys to be changed, added + or dropped. + @param order_num How many ORDER BY fields has been specified. + @param order List of fields to ORDER BY. + @param ignore Whether we have ALTER IGNORE TABLE + + @retval true Error + @retval false Success + + This is a veery long function and is everything but the kitchen sink :) + It is used to alter a table and not only by ALTER TABLE but also + CREATE|DROP INDEX are mapped on this function. + + When the ALTER TABLE statement just does a RENAME or ENABLE|DISABLE KEYS, + or both, then this function short cuts its operation by renaming + the table and/or enabling/disabling the keys. In this case, the FRM is + not changed, directly by mysql_alter_table. However, if there is a + RENAME + change of a field, or an index, the short cut is not used. + See how `create_list` is used to generate the new FRM regarding the + structure of the fields. The same is done for the indices of the table. + + Altering a table can be done in two ways. The table can be modified + directly using an in-place algorithm, or the changes can be done using + an intermediate temporary table (copy). In-place is the preferred + algorithm as it avoids copying table data. The storage engine + selects which algorithm to use in check_if_supported_inplace_alter() + based on information about the table changes from fill_alter_inplace_info(). +*/ + +bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, + Table_specification_st *create_info, + TABLE_LIST *table_list, + Recreate_info *recreate_info, + Alter_info *alter_info, + uint order_num, ORDER *order, bool ignore, + bool if_exists) +{ + bool engine_changed, error, frm_is_created= false, error_handler_pushed= false; + bool no_ha_table= true; /* We have not created table in storage engine yet */ + TABLE *table, *new_table= nullptr; + DDL_LOG_STATE ddl_log_state; + Turn_errors_to_warnings_handler errors_to_warnings; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool partition_changed= false; + bool fast_alter_partition= false; +#endif + bool partial_alter= false; + /* + start_alter_id is the gtid seq no of the START Alter - the 1st part + of two-phase loggable ALTER. The variable is meaningful only + on slave execution. + */ + uint64 start_alter_id= get_start_alter_id(thd); + + /* + Create .FRM for new version of table with a temporary name. + We don't log the statement, it will be logged later. + + Keep information about keys in newly created table as it + will be used later to construct Alter_inplace_info object + and by fill_alter_inplace_info() call. + */ + KEY *key_info; + uint key_count; + /* + Remember if the new definition has new VARCHAR column; + create_info->varchar will be reset in create_table_impl()/ + mysql_prepare_create_table(). + */ + bool varchar= create_info->varchar, table_creation_was_logged= 0; + bool binlog_as_create_select= 0, log_if_exists= 0; + uint tables_opened; + handlerton *new_db_type= create_info->db_type, *old_db_type; + ha_rows copied=0, deleted=0; + LEX_CUSTRING frm= {0,0}; + LEX_CSTRING backup_name; + char index_file[FN_REFLEN], data_file[FN_REFLEN], backup_name_buff[60]; + uchar uuid_buffer[MY_UUID_SIZE]; + MDL_request target_mdl_request; + MDL_ticket *mdl_ticket= 0; + Alter_table_prelocking_strategy alter_prelocking_strategy; + TRIGGER_RENAME_PARAM trigger_param; + + /* + Callback function that an engine can request to be called after executing + inplace alter table. + */ + Alter_inplace_info::inplace_alter_table_commit_callback + *inplace_alter_table_committed= 0; + void *inplace_alter_table_committed_argument= 0; + DBUG_ENTER("mysql_alter_table"); + + /* + Check if we attempt to alter mysql.slow_log or + mysql.general_log table and return an error if + it is the case. + TODO: this design is obsolete and will be removed. + */ + int table_kind= check_if_log_table(table_list, FALSE, NullS); + + if (table_kind) + { + /* Disable alter of enabled log tables */ + if (logger.is_log_table_enabled(table_kind)) + { + my_error(ER_BAD_LOG_STATEMENT, MYF(0), "ALTER"); + DBUG_RETURN(true); + } + + /* Disable alter of log tables to unsupported engine */ + if ((create_info->used_fields & HA_CREATE_USED_ENGINE) && + (!create_info->db_type || /* unknown engine */ + !(create_info->db_type->flags & HTON_SUPPORT_LOG_TABLES))) + { + unsupported: + my_error(ER_UNSUPORTED_LOG_ENGINE, MYF(0), + hton_name(create_info->db_type)->str); + DBUG_RETURN(true); + } + + if (create_info->db_type == maria_hton && + create_info->transactional != HA_CHOICE_NO) + goto unsupported; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (alter_info->partition_flags & ALTER_PARTITION_INFO) + { + my_error(ER_WRONG_USAGE, MYF(0), "PARTITION", "log table"); + DBUG_RETURN(true); + } +#endif + } + + THD_STAGE_INFO(thd, stage_init_update); + bzero(&ddl_log_state, sizeof(ddl_log_state)); + + /* Temporary name for backup of original table */ + backup_name.str= backup_name_buff; + backup_name.length= my_snprintf(backup_name_buff, sizeof(backup_name_buff)-1, + "%s-backup-%lx-%llx", tmp_file_prefix, + current_pid, thd->thread_id); + + /* Check if the new table type is a shared table */ + if (ha_check_if_updates_are_ignored(thd, create_info->db_type, "ALTER")) + { + /* + Remove old local .frm file if it exists. We should use the new + shared one in the future. The drop is not logged, the ALTER table is + logged. + */ + table_list->mdl_request.type= MDL_EXCLUSIVE; + /* This will only drop the .frm file and local tables, not shared ones */ + error= mysql_rm_table(thd, table_list, 1, 0, 0, 1); + DBUG_RETURN(log_and_ok(thd)); + } + + /* + Code below can handle only base tables so ensure that we won't open a view. + Note that RENAME TABLE the only ALTER clause which is supported for views + has been already processed. + */ + table_list->required_type= TABLE_TYPE_NORMAL; + + DEBUG_SYNC(thd, "alter_table_before_open_tables"); + + thd->open_options|= HA_OPEN_FOR_ALTER; + thd->mdl_backup_ticket= 0; + error= open_tables(thd, &table_list, &tables_opened, 0, + &alter_prelocking_strategy); + thd->open_options&= ~HA_OPEN_FOR_ALTER; + + if (unlikely(error)) + { + if (if_exists) + { + int tmp_errno= thd->get_stmt_da()->sql_errno(); + if (tmp_errno == ER_NO_SUCH_TABLE) + { + /* + ALTER TABLE IF EXISTS was used on not existing table + We have to log the query on a slave as the table may be a shared one + from the master and we need to ensure that the next slave can see + the statement as this slave may not have the table shared + */ + thd->clear_error(); + DBUG_RETURN(log_and_ok(thd)); + } + } + DBUG_RETURN(true); + } + + table= table_list->table; + bool is_reg_table= table->s->tmp_table == NO_TMP_TABLE; + +#ifdef WITH_WSREP + /* + If this ALTER TABLE is actually SEQUENCE we need to check + if we can support implementing storage engine. + */ + if (WSREP(thd) && table && table->s->sequence && + wsrep_check_sequence(thd, thd->lex->create_info.seq_create_info)) + DBUG_RETURN(TRUE); + + if (WSREP(thd) && + (thd->lex->sql_command == SQLCOM_ALTER_TABLE || + thd->lex->sql_command == SQLCOM_CREATE_INDEX || + thd->lex->sql_command == SQLCOM_DROP_INDEX) && + !wsrep_should_replicate_ddl(thd, table_list->table->s->db_type())) + DBUG_RETURN(true); +#endif /* WITH_WSREP */ + + DEBUG_SYNC(thd, "alter_table_after_open_tables"); + + if (table->versioned()) + { + if (handlerton *hton1= create_info->db_type) + { + handlerton *hton2= table->file->partition_ht(); + if (hton1 != hton2 && + (ha_check_storage_engine_flag(hton1, HTON_NATIVE_SYS_VERSIONING) || + ha_check_storage_engine_flag(hton2, HTON_NATIVE_SYS_VERSIONING))) + { + my_error(ER_VERS_ALTER_ENGINE_PROHIBITED, MYF(0), table_list->db.str, + table_list->table_name.str); + DBUG_RETURN(true); + } + } + if (alter_info->vers_prohibited(thd)) + { + my_error(ER_VERS_ALTER_NOT_ALLOWED, MYF(0), + table_list->db.str, table_list->table_name.str); + DBUG_RETURN(true); + } + } + + DEBUG_SYNC(thd, "alter_opened_table"); + +#if defined WITH_WSREP && defined ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.alter_opened_table", + { + const char act[]= + "now " + "wait_for signal.alter_opened_table"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif // WITH_WSREP + + Alter_table_ctx alter_ctx(thd, table_list, tables_opened, new_db, new_name); + mdl_ticket= table->mdl_ticket; + + /* + We have to do a check also after table is opened as there could be no + ENGINE= on the command line or the table could a partitioned S3 table. + */ + if (table->file->check_if_updates_are_ignored("ALTER")) + { + /* + Table is a shared table. Remove the .frm file. Discovery will create + a new one if needed. + */ + table->s->tdc->flushed= 1; // Force close of all instances + if (thd->mdl_context.upgrade_shared_lock(mdl_ticket, + MDL_EXCLUSIVE, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(1); + quick_rm_table(thd, table->file->ht, &table_list->db, + &table_list->table_name, + NO_HA_TABLE, 0); + goto end_inplace; + } + if (!if_exists && + (table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)) + { + /* + Table is a shared table that may not exist on the slave. + We add 'if_exists' to the query if it was not used + */ + log_if_exists= 1; + } + table_creation_was_logged= table->s->table_creation_was_logged; + + table->use_all_columns(); + + /* + Prohibit changing of the UNION list of a non-temporary MERGE table + under LOCK tables. It would be quite difficult to reuse a shrinked + set of tables from the old table or to open a new TABLE object for + an extended list and verify that they belong to locked tables. + */ + if ((thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) && + (create_info->used_fields & HA_CREATE_USED_UNION) && + (table->s->tmp_table == NO_TMP_TABLE)) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(true); + } + + /* Check that we are not trying to rename to an existing table */ + if (alter_ctx.is_table_renamed()) + { + if (table->s->tmp_table != NO_TMP_TABLE) + { + /* + Check whether a temporary table exists with same requested new name. + If such table exists, there must be a corresponding TABLE_SHARE in + THD::all_temp_tables list. + */ + if (thd->find_tmp_table_share(alter_ctx.new_db.str, + alter_ctx.new_name.str)) + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alter_ctx.new_alias.str); + DBUG_RETURN(true); + } + } + else + { + MDL_request_list mdl_requests; + MDL_request target_db_mdl_request; + + MDL_REQUEST_INIT(&target_mdl_request, MDL_key::TABLE, + alter_ctx.new_db.str, alter_ctx.new_name.str, + MDL_EXCLUSIVE, MDL_TRANSACTION); + mdl_requests.push_front(&target_mdl_request); + + /* + If we are moving the table to a different database, we also + need IX lock on the database name so that the target database + is protected by MDL while the table is moved. + */ + if (alter_ctx.is_database_changed()) + { + MDL_REQUEST_INIT(&target_db_mdl_request, MDL_key::SCHEMA, + alter_ctx.new_db.str, "", MDL_INTENTION_EXCLUSIVE, + MDL_TRANSACTION); + mdl_requests.push_front(&target_db_mdl_request); + } + + /* + Protection against global read lock must have been acquired when table + to be altered was being opened. + */ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::BACKUP, + "", "", + MDL_BACKUP_DDL)); + + if (thd->mdl_context.acquire_locks(&mdl_requests, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(true); + + DEBUG_SYNC(thd, "locked_table_name"); + /* + Table maybe does not exist, but we got an exclusive lock + on the name, now we can safely try to find out for sure. + */ + if (!(alter_info->partition_flags & ALTER_PARTITION_CONVERT_IN) && + ha_table_exists(thd, &alter_ctx.new_db, &alter_ctx.new_name)) + { + /* Table will be closed in do_command() */ + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alter_ctx.new_alias.str); + DBUG_RETURN(true); + } + } + } + + if (!create_info->db_type) + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info && + create_info->used_fields & HA_CREATE_USED_ENGINE) + { + /* + This case happens when the user specified + ENGINE = x where x is a non-existing storage engine + We set create_info->db_type to default_engine_type + to ensure we don't change underlying engine type + due to a erroneously given engine name. + */ + create_info->db_type= table->part_info->default_engine_type; + } + else +#endif + create_info->db_type= table->s->db_type(); + } + + if (check_engine(thd, alter_ctx.new_db.str, alter_ctx.new_name.str, create_info)) + DBUG_RETURN(true); + + create_info->vers_check_native(); + if (create_info->vers_info.fix_alter_info(thd, alter_info, create_info, table)) + { + DBUG_RETURN(true); + } + + if ((create_info->db_type != table->s->db_type() || + (alter_info->partition_flags & ALTER_PARTITION_INFO)) && + !table->file->can_switch_engines()) + { + my_error(ER_ROW_IS_REFERENCED, MYF(0)); + DBUG_RETURN(true); + } + + /* + If foreign key is added then check permission to access parent table. + + In function "check_fk_parent_table_access", create_info->db_type is used + to identify whether engine supports FK constraint or not. Since + create_info->db_type is set here, check to parent table access is delayed + till this point for the alter operation. + */ + if ((alter_info->flags & ALTER_ADD_FOREIGN_KEY) && + check_fk_parent_table_access(thd, create_info, alter_info, new_db->str)) + DBUG_RETURN(true); + + /* + If this is an ALTER TABLE and no explicit row type specified reuse + the table's row type. + Note: this is the same as if the row type was specified explicitly. + */ + if (create_info->row_type == ROW_TYPE_NOT_USED) + { + /* ALTER TABLE without explicit row type */ + create_info->row_type= table->s->row_type; + } + else + { + /* ALTER TABLE with specific row type */ + create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT; + } + + old_db_type= table->s->db_type(); + new_db_type= create_info->db_type; + + DBUG_PRINT("info", ("old type: %s new type: %s", + ha_resolve_storage_engine_name(old_db_type), + ha_resolve_storage_engine_name(new_db_type))); + if (ha_check_storage_engine_flag(old_db_type, HTON_ALTER_NOT_SUPPORTED)) + { + DBUG_PRINT("info", ("doesn't support alter")); + my_error(ER_ILLEGAL_HA, MYF(0), hton_name(old_db_type)->str, + alter_ctx.db.str, alter_ctx.table_name.str); + DBUG_RETURN(true); + } + + if (ha_check_storage_engine_flag(new_db_type, HTON_ALTER_NOT_SUPPORTED)) + { + DBUG_PRINT("info", ("doesn't support alter")); + my_error(ER_ILLEGAL_HA, MYF(0), hton_name(new_db_type)->str, + alter_ctx.new_db.str, alter_ctx.new_name.str); + DBUG_RETURN(true); + } + + if (table->s->tmp_table == NO_TMP_TABLE) + mysql_audit_alter_table(thd, table_list); + else if (table_creation_was_logged && mysql_bin_log.is_open()) + { + /* Protect against MDL error in binary logging */ + MDL_request mdl_request; + DBUG_ASSERT(!mdl_ticket); + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_TRANSACTION); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(true); + } + + THD_STAGE_INFO(thd, stage_setup); + + if (alter_info->flags & ALTER_DROP_CHECK_CONSTRAINT) + { + /* + ALTER TABLE DROP CONSTRAINT + should be replaced with ... DROP [FOREIGN] KEY + if the constraint is the FOREIGN KEY or UNIQUE one. + */ + + List_iterator drop_it(alter_info->drop_list); + Alter_drop *drop; + List fk_child_key_list; + table->file->get_foreign_key_list(thd, &fk_child_key_list); + + alter_info->flags&= ~ALTER_DROP_CHECK_CONSTRAINT; + + while ((drop= drop_it++)) + { + if (drop->type == Alter_drop::CHECK_CONSTRAINT) + { + { + /* Test if there is a FOREIGN KEY with this name. */ + FOREIGN_KEY_INFO *f_key; + List_iterator fk_key_it(fk_child_key_list); + + while ((f_key= fk_key_it++)) + { + if (my_strcasecmp(system_charset_info, f_key->foreign_id->str, + drop->name) == 0) + { + drop->type= Alter_drop::FOREIGN_KEY; + alter_info->flags|= ALTER_DROP_FOREIGN_KEY; + goto do_continue; + } + } + } + + { + /* Test if there is an UNIQUE with this name. */ + uint n_key; + + for (n_key=0; n_key < table->s->keys; n_key++) + { + if ((table->key_info[n_key].flags & HA_NOSAME) && + my_strcasecmp(system_charset_info, + drop->name, table->key_info[n_key].name.str) == 0) + { + drop->type= Alter_drop::KEY; + alter_info->flags|= ALTER_DROP_INDEX; + goto do_continue; + } + } + } + } + alter_info->flags|= ALTER_DROP_CHECK_CONSTRAINT; +do_continue:; + } + } + + if (handle_if_exists_options(thd, table, alter_info, + &create_info->period_info) || + fix_constraints_names(thd, &alter_info->check_constraint_list, + create_info)) + DBUG_RETURN(true); + + /* Check if rename of triggers are supported */ + if (alter_ctx.is_table_renamed() && + Table_triggers_list::prepare_for_rename(thd, &trigger_param, + &alter_ctx.db, + &alter_ctx.alias, + &alter_ctx.table_name, + &alter_ctx.new_db, + &alter_ctx.new_alias)) + DBUG_RETURN(true); + + /* + Look if we have to do anything at all. + ALTER can become NOOP after handling + the IF (NOT) EXISTS options. + */ + if (alter_info->flags == 0 && alter_info->partition_flags == 0) + { + my_snprintf(alter_ctx.tmp_buff, sizeof(alter_ctx.tmp_buff), + ER_THD(thd, ER_INSERT_INFO), 0L, 0L, + thd->get_stmt_da()->current_statement_warn_count()); + my_ok(thd, 0L, 0L, alter_ctx.tmp_buff); + + /* We don't replicate alter table statement on temporary tables */ + if (table_creation_was_logged) + { + if (write_bin_log_with_if_exists(thd, true, false, log_if_exists)) + DBUG_RETURN(true); + } + + DBUG_RETURN(false); + } + + /* + Test if we are only doing RENAME or KEYS ON/OFF. This works + as we are testing if flags == 0 above. + */ + if (!(alter_info->flags & ~(ALTER_RENAME | ALTER_KEYS_ONOFF)) && + alter_info->partition_flags == 0 && + alter_info->algorithm(thd) != + Alter_info::ALTER_TABLE_ALGORITHM_COPY) // No need to touch frm. + { + bool res; + + if (!table->s->tmp_table) + { + // This requires X-lock, no other lock levels supported. + if (alter_info->requested_lock != Alter_info::ALTER_TABLE_LOCK_DEFAULT && + alter_info->requested_lock != Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE) + { + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0), + "LOCK=NONE/SHARED", "LOCK=EXCLUSIVE"); + DBUG_RETURN(true); + } + res= simple_rename_or_index_change(thd, table_list, + alter_info->keys_onoff, + &trigger_param, + &alter_ctx); + } + else + { + res= simple_tmp_rename_or_index_change(thd, table_list, + alter_info->keys_onoff, + &alter_ctx); + } + DBUG_RETURN(res); + } + + /* We have to do full alter table. */ + +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + /* + Partitioning: part_info is prepared and returned via thd->work_part_info + */ + if (prep_alter_part_table(thd, table, alter_info, create_info, + &partition_changed, &fast_alter_partition)) + { + DBUG_RETURN(true); + } + if (parse_engine_part_options(thd, table)) + DBUG_RETURN(true); + } + /* + If the old table had partitions and we are doing ALTER TABLE ... + engine= , the new table must preserve the original + partitioning. This means that the new engine is still the + partitioning engine, not the engine specified in the parser. + This is discovered in prep_alter_part_table, which in such case + updates create_info->db_type. + It's therefore important that the assignment below is done + after prep_alter_part_table. + */ + new_db_type= create_info->db_type; +#endif + + if (mysql_prepare_alter_table(thd, table, create_info, alter_info, + &alter_ctx)) + { + DBUG_RETURN(true); + } + + DBUG_ASSERT(create_info->default_table_charset); + + /* + The ALTER related code cannot alter partitions and change column data types + at the same time. So in case of partition change statements like: + ALTER TABLE t1 DROP PARTITION p1; + we skip implicit data type upgrade (such as "MariaDB 5.3 TIME" to + "MySQL 5.6 TIME" or vice versa according to mysql56_temporal_format). + Note, one can run a separate "ALTER TABLE t1 FORCE;" statement + before or after the partition change ALTER statement to upgrade data types. + */ + if (IF_PARTITIONING(!fast_alter_partition, 1)) + Create_field::upgrade_data_types(alter_info->create_list); + + if (create_info->check_fields(thd, alter_info, + table_list->table_name, table_list->db) || + create_info->fix_period_fields(thd, alter_info)) + DBUG_RETURN(true); + + if (!(thd->variables.option_bits & OPTION_EXPLICIT_DEF_TIMESTAMP)) + promote_first_timestamp_column(&alter_info->create_list); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (fast_alter_partition) + { + /* + ALGORITHM and LOCK clauses are generally not allowed by the + parser for operations related to partitioning. + The exceptions are ALTER_PARTITION_INFO and ALTER_PARTITION_REMOVE. + For consistency, we report ER_ALTER_OPERATION_NOT_SUPPORTED here. + */ + if (alter_info->requested_lock != + Alter_info::ALTER_TABLE_LOCK_DEFAULT) + { + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), + "LOCK=NONE/SHARED/EXCLUSIVE", + ER_THD(thd, ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_PARTITION), + "LOCK=DEFAULT"); + DBUG_RETURN(true); + } + else if (alter_info->algorithm(thd) != + Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT) + { + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), + "ALGORITHM=COPY/INPLACE", + ER_THD(thd, ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_PARTITION), + "ALGORITHM=DEFAULT"); + DBUG_RETURN(true); + } + + /* + Upgrade from MDL_SHARED_UPGRADABLE to MDL_SHARED_NO_WRITE. + Afterwards it's safe to take the table level lock. + */ + if ((thd->mdl_context.upgrade_shared_lock(mdl_ticket, MDL_SHARED_NO_WRITE, + thd->variables.lock_wait_timeout)) || + lock_tables(thd, table_list, alter_ctx.tables_opened, 0)) + { + DBUG_RETURN(true); + } + + // In-place execution of ALTER TABLE for partitioning. + alter_info->db= alter_ctx.db; + alter_info->table_name= alter_ctx.table_name; + DBUG_RETURN(fast_alter_partition_table(thd, table, alter_info, &alter_ctx, + create_info, table_list)); + } +#endif + +#ifdef WITH_WSREP + if (table->s->sequence && WSREP(thd) && + wsrep_thd_is_local_toi(thd)) + { + if (wsrep_check_sequence(thd, create_info->seq_create_info)) + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ + + /* + Use copy algorithm if: + - old_alter_table system variable is set without in-place requested using + the ALGORITHM clause. + - Or if in-place is impossible for given operation. + - Changes to partitioning which were not handled by fast_alter_part_table() + needs to be handled using table copying algorithm unless the engine + supports auto-partitioning as such engines can do some changes + using in-place API. + */ + if ((thd->variables.alter_algorithm == Alter_info::ALTER_TABLE_ALGORITHM_COPY && + alter_info->algorithm(thd) != + Alter_info::ALTER_TABLE_ALGORITHM_INPLACE) + || is_inplace_alter_impossible(table, create_info, alter_info) + || IF_PARTITIONING((partition_changed && + !(old_db_type->partition_flags() & HA_USE_AUTO_PARTITION)), 0)) + { + if (alter_info->algorithm(thd) == + Alter_info::ALTER_TABLE_ALGORITHM_INPLACE) + { + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0), + "ALGORITHM=INPLACE", "ALGORITHM=COPY"); + DBUG_RETURN(true); + } + alter_info->set_requested_algorithm( + Alter_info::ALTER_TABLE_ALGORITHM_COPY); + } + + /* + ALTER TABLE ... ENGINE to the same engine is a common way to + request table rebuild. Set ALTER_RECREATE flag to force table + rebuild. + */ + if (new_db_type == old_db_type && + create_info->used_fields & HA_CREATE_USED_ENGINE) + alter_info->flags|= ALTER_RECREATE; + + /* + Handling of symlinked tables: + If no rename: + Create new data file and index file on the same disk as the + old data and index files. + Copy data. + Rename new data file over old data file and new index file over + old index file. + Symlinks are not changed. + + If rename: + Create new data file and index file on the same disk as the + old data and index files. Create also symlinks to point at + the new tables. + Copy data. + At end, rename intermediate tables, and symlinks to intermediate + table, to final table name. + Remove old table and old symlinks + + If rename is made to another database: + Create new tables in new database. + Copy data. + Remove old table and symlinks. + */ + if (!alter_ctx.is_database_changed()) + { + if (create_info->index_file_name) + { + /* Fix index_file_name to have 'tmp_name' as basename */ + strmov(index_file, alter_ctx.tmp_name.str); + create_info->index_file_name=fn_same(index_file, + create_info->index_file_name, + 1); + } + if (create_info->data_file_name) + { + /* Fix data_file_name to have 'tmp_name' as basename */ + strmov(data_file, alter_ctx.tmp_name.str); + create_info->data_file_name=fn_same(data_file, + create_info->data_file_name, + 1); + } + } + else + { + /* Ignore symlink if db is changed. */ + create_info->data_file_name=create_info->index_file_name=0; + } + + DEBUG_SYNC(thd, "alter_table_before_create_table_no_lock"); + + /* Create a new table version id for the new table */ + my_uuid(uuid_buffer); + create_info->tabledef_version.str= uuid_buffer; + create_info->tabledef_version.length= MY_UUID_SIZE; + + if (!table->s->tmp_table) + { + LEX_CSTRING path_to_frm= alter_ctx.get_tmp_cstring_path(); + LEX_CSTRING tmp_table= backup_name; + if (alter_ctx.is_table_renamed()) + tmp_table= alter_ctx.new_alias; + + if (ddl_log_alter_table(&ddl_log_state, + old_db_type, + &alter_ctx.db, &alter_ctx.table_name, + new_db_type, + table->file->partition_ht(), + &alter_ctx.new_db, &alter_ctx.tmp_name, + &path_to_frm, + &tmp_table, + &create_info->tabledef_version, + table->file->table_version(), + alter_ctx.is_table_renamed()) || + ddl_log_store_query(thd, &ddl_log_state, + thd->query(), thd->query_length())) + { + error= 1; + goto err_cleanup; + } + } + + tmp_disable_binlog(thd); + create_info->options|=HA_CREATE_TMP_ALTER; + if (!(alter_info->flags & ALTER_ADD_INDEX) && !alter_ctx.modified_primary_key) + create_info->options|= HA_SKIP_KEY_SORT; + else + alter_info->flags|= ALTER_INDEX_ORDER; + create_info->alias= alter_ctx.table_name; + thd->abort_on_warning= !ignore && thd->is_strict_mode(); + + /* + This is to be able to call Alter_info::add_stat_drop_index(thd, key_name) + from mysql_prepare_create_table() + */ + alter_info->original_table= table; + + /* + Create the .frm file for the new table. Storage engine table will not be + created at this stage. + + No ddl logging needed as ddl_log_alter_query will take care of failed + table creations. + + Partitioning: part_info is passed via thd->work_part_info + */ + error= create_table_impl(thd, (DDL_LOG_STATE*) 0, (DDL_LOG_STATE*) 0, + alter_ctx.db, alter_ctx.table_name, + alter_ctx.new_db, alter_ctx.tmp_name, + alter_ctx.get_tmp_cstring_path(), + thd->lex->create_info, + create_info, alter_info, C_ALTER_TABLE_FRM_ONLY, + NULL, &key_info, &key_count, &frm); + thd->abort_on_warning= false; + reenable_binlog(thd); + + debug_crash_here("ddl_log_alter_after_create_frm"); + + if (unlikely(error)) + goto err_cleanup; + + /* Remember version id for temporary table */ + alter_ctx.tmp_id= create_info->tabledef_version; + + /* Remember that we have not created table in storage engine yet. */ + no_ha_table= true; + + if (alter_info->algorithm(thd) != Alter_info::ALTER_TABLE_ALGORITHM_COPY) + { + Alter_inplace_info ha_alter_info(create_info, alter_info, + key_info, key_count, + IF_PARTITIONING(thd->work_part_info, NULL), + ignore, alter_ctx.error_if_not_empty); + TABLE_SHARE altered_share; + TABLE altered_table; + bool use_inplace= true; + + /* Fill the Alter_inplace_info structure. */ + if (fill_alter_inplace_info(thd, table, varchar, &ha_alter_info)) + goto err_new_table_cleanup; + + alter_ctx.tmp_storage_engine_name_partitioned= + table->file->partition_engine(); + alter_ctx.tmp_storage_engine_name.length= + (strmake((char*) alter_ctx.tmp_storage_engine_name.str, + table->file->real_table_type(), + sizeof(alter_ctx.tmp_storage_engine_buff)-1) - + alter_ctx.tmp_storage_engine_name.str); + + /* + We can ignore ALTER_COLUMN_ORDER and instead check + ALTER_STORED_COLUMN_ORDER & ALTER_VIRTUAL_COLUMN_ORDER. This + is ok as ALTER_COLUMN_ORDER may be wrong if we use AFTER last_field + ALTER_COLUMN_NAME is set if field really was renamed. + */ + + if (!(ha_alter_info.handler_flags & + ~(ALTER_COLUMN_ORDER | ALTER_RENAME_COLUMN | ALTER_INDEX_ORDER))) + { + /* + No-op ALTER, no need to call handler API functions. + + If this code path is entered for an ALTER statement that + should not be a real no-op, new handler flags should be added + and fill_alter_inplace_info() adjusted. + + Note that we can end up here if an ALTER statement has clauses + that cancel each other out (e.g. ADD/DROP identically index). + + Also note that we ignore the LOCK clause here. + + TODO don't create partitioning metadata in the first place + + TODO: Now case-change index name is treated as noop which is not quite + correct. + */ + table->file->ha_create_partitioning_metadata(alter_ctx.get_tmp_path(), + NULL, CHF_DELETE_FLAG); + goto end_inplace; + } + + // We assume that the table is non-temporary. + DBUG_ASSERT(!table->s->tmp_table); + + if (create_table_for_inplace_alter(thd, alter_ctx, &frm, &altered_share, + &altered_table)) + goto err_new_table_cleanup; + /* + Avoid creating frm again in ha_create_table() if inplace alter will not + be used. + */ + frm_is_created= 1; + + /* Set markers for fields in TABLE object for altered table. */ + update_altered_table(ha_alter_info, &altered_table); + + /* + Mark all columns in 'altered_table' as used to allow usage + of its record[0] buffer and Field objects during in-place + ALTER TABLE. + */ + altered_table.column_bitmaps_set_no_signal(&altered_table.s->all_set, + &altered_table.s->all_set); + restore_record(&altered_table, s->default_values); // Create empty record + /* Check that we can call default functions with default field values */ + thd->count_cuted_fields= CHECK_FIELD_EXPRESSION; + altered_table.reset_default_fields(); + if (altered_table.default_field && + altered_table.update_default_fields(true)) + { + cleanup_table_after_inplace_alter(&altered_table); + goto err_new_table_cleanup; + } + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + + if (alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_NONE) + ha_alter_info.online= true; + // Ask storage engine whether to use copy or in-place + { + Check_level_instant_set check_level_save(thd, CHECK_FIELD_WARN); + ha_alter_info.inplace_supported= + table->file->check_if_supported_inplace_alter(&altered_table, + &ha_alter_info); + } + + if (ha_alter_info.inplace_supported != HA_ALTER_INPLACE_NOT_SUPPORTED) + { + List_iterator it(alter_info->key_list); + while (Key *k= it++) + { + if (k->without_overlaps) + { + ha_alter_info.inplace_supported= HA_ALTER_INPLACE_NOT_SUPPORTED; + break; + } + } + } + + if (alter_info->supports_algorithm(thd, &ha_alter_info) || + alter_info->supports_lock(thd, &ha_alter_info)) + { + cleanup_table_after_inplace_alter(&altered_table); + goto err_new_table_cleanup; + } + + // If SHARED lock and no particular algorithm was requested, use COPY. + if (ha_alter_info.inplace_supported == HA_ALTER_INPLACE_EXCLUSIVE_LOCK && + alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED && + alter_info->algorithm(thd) == + Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT && + thd->variables.alter_algorithm == + Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT) + use_inplace= false; + + if (ha_alter_info.inplace_supported == HA_ALTER_INPLACE_NOT_SUPPORTED) + use_inplace= false; + + if (use_inplace) + { + table->s->frm_image= &frm; + /* + Set the truncated column values of thd as warning + for alter table. + */ + enum_check_fields org_count_cuted_fields= thd->count_cuted_fields; + thd->count_cuted_fields= CHECK_FIELD_WARN; + int res= mysql_inplace_alter_table(thd, table_list, table, &altered_table, + &ha_alter_info, + &target_mdl_request, &ddl_log_state, + &trigger_param, + &alter_ctx, partial_alter, + start_alter_id, if_exists); + thd->count_cuted_fields= org_count_cuted_fields; + inplace_alter_table_committed= ha_alter_info.inplace_alter_table_committed; + inplace_alter_table_committed_argument= + ha_alter_info.inplace_alter_table_committed_argument; + if (res) + { + cleanup_table_after_inplace_alter(&altered_table); + goto err_cleanup; + } + cleanup_table_after_inplace_alter_keep_files(&altered_table); + + goto end_inplace; + } + else + cleanup_table_after_inplace_alter_keep_files(&altered_table); + } + + /* ALTER TABLE using copy algorithm. */ + + /* Check if ALTER TABLE is compatible with foreign key definitions. */ + if (fk_prepare_copy_alter_table(thd, table, alter_info, &alter_ctx)) + goto err_new_table_cleanup; + + if (!table->s->tmp_table) + { + // COPY algorithm doesn't work with concurrent writes. + if (alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_NONE) + { + my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), + "LOCK=NONE", + ER_THD(thd, ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COPY), + "LOCK=SHARED"); + goto err_new_table_cleanup; + } + + // If EXCLUSIVE lock is requested, upgrade already. + if (alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE && + wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN)) + goto err_new_table_cleanup; + + /* + Otherwise upgrade to SHARED_NO_WRITE. + Note that under LOCK TABLES, we will already have SHARED_NO_READ_WRITE. + */ + if (alter_info->requested_lock != Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE && + thd->mdl_context.upgrade_shared_lock(mdl_ticket, MDL_SHARED_NO_WRITE, + thd->variables.lock_wait_timeout)) + goto err_new_table_cleanup; + + DEBUG_SYNC(thd, "alter_table_copy_after_lock_upgrade"); + } + else + thd->close_unused_temporary_table_instances(table_list); + + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (write_bin_log_start_alter(thd, partial_alter, start_alter_id, + if_exists)) + goto err_new_table_cleanup; + } + else if (start_alter_id) + { + DBUG_ASSERT(thd->rgi_slave); + + my_error(ER_INCONSISTENT_SLAVE_TEMP_TABLE, MYF(0), thd->query(), + table_list->db.str, table_list->table_name.str); + goto err_new_table_cleanup; + } + + DBUG_EXECUTE_IF("start_alter_delay_master", { + debug_sync_set_action(thd, + STRING_WITH_LEN("now wait_for alter_cont NO_CLEAR_EVENT")); + }); + // It's now safe to take the table level lock. + if (lock_tables(thd, table_list, alter_ctx.tables_opened, + MYSQL_LOCK_USE_MALLOC)) + goto err_new_table_cleanup; + + ddl_log_update_phase(&ddl_log_state, DDL_ALTER_TABLE_PHASE_CREATED); + + if (ha_create_table(thd, alter_ctx.get_tmp_path(), + alter_ctx.new_db.str, alter_ctx.new_name.str, + create_info, &frm, frm_is_created)) + goto err_new_table_cleanup; + + debug_crash_here("ddl_log_alter_after_create_table"); + + /* Mark that we have created table in storage engine. */ + no_ha_table= false; + DEBUG_SYNC(thd, "alter_table_intermediate_table_created"); + + /* Open the table since we need to copy the data. */ + new_table= thd->create_and_open_tmp_table(&frm, + alter_ctx.get_tmp_path(), + alter_ctx.new_db.str, + alter_ctx.new_name.str, + true); + if (!new_table) + goto err_new_table_cleanup; + + if (table->s->tmp_table != NO_TMP_TABLE) + { + /* in case of alter temp table send the tracker in OK packet */ + thd->session_tracker.state_change.mark_as_changed(thd); + } + + /* + Note: In case of MERGE table, we do not attach children. We do not + copy data for MERGE tables. Only the children have data. + */ + + /* Copy the data if necessary. */ + thd->count_cuted_fields= CHECK_FIELD_WARN; // calc cuted fields + thd->cuted_fields=0L; + + /* + Collect fields that was renamed. + We do not do that if fill_alter_inplace_info() has + already collected renamed fields. + */ + if (alter_info->flags & (ALTER_CHANGE_COLUMN | ALTER_RENAME_COLUMN) && + alter_info->rename_stat_fields.is_empty()) + if (alter_info->collect_renamed_fields(thd)) + goto err_new_table_cleanup; + + /* + We do not copy data for MERGE tables. Only the children have data. + MERGE tables have HA_NO_COPY_ON_ALTER set. + */ + if (!(new_table->file->ha_table_flags() & HA_NO_COPY_ON_ALTER)) + { + new_table->next_number_field=new_table->found_next_number_field; + THD_STAGE_INFO(thd, stage_copy_to_tmp_table); + DBUG_EXECUTE_IF("abort_copy_table", { + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); + goto err_new_table_cleanup; + }); + + /* + If old table was a shared table and new table is not same type, + the slaves will not be able to recreate the data. In this case we + write the CREATE TABLE statement for the new table to the log and + log all inserted rows to the table. + */ + if ((table->file->partition_ht()->flags & + HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE) && + (table->file->partition_ht() != new_table->file->partition_ht()) && + thd->binlog_table_should_be_logged(&new_table->s->db)) + { + /* + 'new_table' is marked as internal temp table, but we want to have + the logging based on the original table type + */ + bool res; + tmp_table_type org_tmp_table= new_table->s->tmp_table; + new_table->s->tmp_table= table->s->tmp_table; + + /* Force row logging, even if the table was created as 'temporary' */ + new_table->s->can_do_row_logging= 1; + thd->binlog_start_trans_and_stmt(); + thd->variables.option_bits|= OPTION_BIN_COMMIT_OFF; + res= (binlog_drop_table(thd, table) || + binlog_create_table(thd, new_table, 1)); + new_table->s->tmp_table= org_tmp_table; + if (res) + goto err_new_table_cleanup; + /* + ha_write_row() will log inserted rows in copy_data_between_tables(). + No additional logging of query is needed + */ + binlog_as_create_select= 1; + DBUG_ASSERT(new_table->file->row_logging); + new_table->mark_columns_needed_for_insert(); + thd->binlog_write_table_map(new_table, 1); + } + if (copy_data_between_tables(thd, table, new_table, ignore, order_num, + order, &copied, &deleted, alter_info, + &alter_ctx)) + goto err_new_table_cleanup; + } + else + { + if (!table->s->tmp_table && + wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN)) + goto err_new_table_cleanup; + THD_STAGE_INFO(thd, stage_manage_keys); + alter_table_manage_keys(table, table->file->indexes_are_disabled(), + alter_info->keys_onoff); + if (trans_commit_stmt(thd) || trans_commit_implicit(thd)) + goto err_new_table_cleanup; + } + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + + if (start_alter_id) + { + DBUG_ASSERT(thd->slave_thread); + + if (wait_for_master(thd)) + goto err_new_table_cleanup; + } + if (table->s->tmp_table != NO_TMP_TABLE) + { + /* Release lock if this is a transactional temporary table */ + if (thd->lock) + { + if (thd->locked_tables_mode != LTM_LOCK_TABLES && + thd->locked_tables_mode != LTM_PRELOCKED_UNDER_LOCK_TABLES) + { + int tmp_error= mysql_unlock_tables(thd, thd->lock); + thd->lock= NULL; + if (tmp_error) + goto err_new_table_cleanup; + } + else + { + /* + If LOCK TABLES list is not empty and contains this table, + unlock the table and remove the table from this list. + */ + if (mysql_lock_remove(thd, thd->lock, table)) + goto err_new_table_cleanup; + } + } + + new_table->s->table_creation_was_logged= + table->s->table_creation_was_logged; + /* Remove link to old table and rename the new one */ + thd->drop_temporary_table(table, NULL, true); + /* Should pass the 'new_name' as we store table name in the cache */ + if (thd->rename_temporary_table(new_table, &alter_ctx.new_db, + &alter_ctx.new_name)) + goto err_new_table_cleanup; + + if (binlog_as_create_select) + { + /* + The original table is now deleted. Copy the + DROP + CREATE + data statement to the binary log + */ + thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; + binlog_commit(thd, true); + } + + DBUG_ASSERT(!start_alter_id); // no 2 phase logging for + DBUG_ASSERT(!partial_alter); // temporary table alter + + /* We don't replicate alter table statement on temporary tables */ + if (!thd->is_current_stmt_binlog_format_row() && + table_creation_was_logged && + !binlog_as_create_select) + { + int tmp_error; + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + tmp_error= write_bin_log_with_if_exists(thd, true, false, log_if_exists); + thd->binlog_xid= 0; + if (tmp_error) + goto err_cleanup; + } + goto end_temporary; + } + + /* Remember storage engine name for the new table */ + alter_ctx.tmp_storage_engine_name_partitioned= + new_table->file->partition_engine(); + alter_ctx.tmp_storage_engine_name.length= + (strmake((char*) alter_ctx.tmp_storage_engine_name.str, + new_table->file->real_table_type(), + sizeof(alter_ctx.tmp_storage_engine_buff)-1) - + alter_ctx.tmp_storage_engine_name.str); + + /* + Check if file names for the engine are unique. If we change engine + and file names are unique then we don't need to rename the original + table to a temporary name during the rename phase + + File names are unique if engine changed and + - Either new or old engine does not store the table in files + - Neither old or new engine uses files from another engine + The above is mainly true for the sequence and the partition engine. + */ + engine_changed= ((new_table->file->ht != table->file->ht) && + ((!(new_table->file->ha_table_flags() & HA_FILE_BASED) || + !(table->file->ha_table_flags() & HA_FILE_BASED))) && + !(table->file->ha_table_flags() & HA_REUSES_FILE_NAMES) && + !(new_table->file->ha_table_flags() & + HA_REUSES_FILE_NAMES)); + /* + Close the intermediate table that will be the new table, but do + not delete it! Even though MERGE tables do not have their children + attached here it is safe to call THD::drop_temporary_table(). + */ + thd->drop_temporary_table(new_table, NULL, false); + new_table= NULL; + + DEBUG_SYNC(thd, "alter_table_before_rename_result_table"); + + /* + Data is copied. Now we: + 1) Wait until all other threads will stop using old version of table + by upgrading shared metadata lock to exclusive one. + 2) Close instances of table open by this thread and replace them + with placeholders to simplify reopen process. + 3) Rename the old table to a temp name, rename the new one to the + old name. + 4) If we are under LOCK TABLES and don't do ALTER TABLE ... RENAME + we reopen new version of table. + 5) Write statement to the binary log. + 6) If we are under LOCK TABLES and do ALTER TABLE ... RENAME we + remove placeholders and release metadata locks. + 7) If we are not not under LOCK TABLES we rely on the caller + (mysql_execute_command()) to release metadata locks. + */ + + debug_crash_here("ddl_log_alter_after_copy"); // Use old table + /* + We are new ready to use the new table. Update the state in the + ddl log so that we recovery know that the new table is ready and + in case of crash it should use the new one and log the query + to the binary log. + */ + if (engine_changed) + ddl_log_add_flag(&ddl_log_state, DDL_LOG_FLAG_ALTER_ENGINE_CHANGED); + ddl_log_update_phase(&ddl_log_state, DDL_ALTER_TABLE_PHASE_COPIED); + debug_crash_here("ddl_log_alter_after_log"); // Use new table + + THD_STAGE_INFO(thd, stage_rename_result_table); + + if (wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_RENAME)) + goto err_new_table_cleanup; + + /* Now we are the only user. Update the data in EITS tables */ + alter_info->apply_statistics_deletes_renames(thd, table); + + close_all_tables_for_name(thd, table->s, + alter_ctx.is_table_renamed() ? + HA_EXTRA_PREPARE_FOR_RENAME: + HA_EXTRA_NOT_USED, + NULL); + table_list->table= table= NULL; /* Safety */ + + DBUG_PRINT("info", ("is_table_renamed: %d engine_changed: %d", + alter_ctx.is_table_renamed(), engine_changed)); + + /* + InnoDB cannot use the rename optimization when foreign key + constraint is involved because InnoDB fails to drop the + parent table due to foreign key constraint + */ + if (!alter_ctx.is_table_renamed() || alter_ctx.fk_error_if_delete_row) + { + /* + Rename the old table to temporary name to have a backup in case + anything goes wrong while renaming the new table. + + We only have to do this if name of the table is not changed. + If we are changing to use another table handler, we don't + have to do the rename as the table names will not interfer. + */ + if (mysql_rename_table(old_db_type, &alter_ctx.db, &alter_ctx.table_name, + &alter_ctx.db, &backup_name, &alter_ctx.id, + FN_TO_IS_TMP | + (engine_changed ? NO_HA_TABLE | NO_PAR_TABLE : 0))) + { + // Rename to temporary name failed, delete the new table, abort ALTER. + (void) quick_rm_table(thd, new_db_type, &alter_ctx.new_db, + &alter_ctx.tmp_name, FN_IS_TMP); + goto err_with_mdl; + } + } + else + { + /* The original table is the backup */ + backup_name= alter_ctx.table_name; + PSI_CALL_drop_table_share(0, alter_ctx.db.str, (int) alter_ctx.db.length, + alter_ctx.table_name.str, (int) alter_ctx.table_name.length); + } + debug_crash_here("ddl_log_alter_after_rename_to_backup"); + + if (!alter_ctx.is_table_renamed()) + { + /* + We should not set this stage in case of rename as we in this case + must execute DDL_ALTER_TABLE_PHASE_COPIED to remove the orignal table + */ + ddl_log_update_phase(&ddl_log_state, DDL_ALTER_TABLE_PHASE_OLD_RENAMED); + } + + debug_crash_here("ddl_log_alter_after_rename_to_backup_log"); + + // Rename the new table to the correct name. + if (mysql_rename_table(new_db_type, &alter_ctx.new_db, &alter_ctx.tmp_name, + &alter_ctx.new_db, &alter_ctx.new_alias, + &alter_ctx.tmp_id, + FN_FROM_IS_TMP)) + { + // Rename failed, delete the temporary table. + ddl_log_update_phase(&ddl_log_state, DDL_ALTER_TABLE_PHASE_RENAME_FAILED); + (void) quick_rm_table(thd, new_db_type, &alter_ctx.new_db, + &alter_ctx.tmp_name, FN_IS_TMP); + + if (!alter_ctx.is_table_renamed() || alter_ctx.fk_error_if_delete_row) + { + // Restore the backup of the original table to the old name. + (void) mysql_rename_table(old_db_type, &alter_ctx.db, &backup_name, + &alter_ctx.db, &alter_ctx.alias, &alter_ctx.id, + FN_FROM_IS_TMP | NO_FK_CHECKS | + (engine_changed ? NO_HA_TABLE | NO_PAR_TABLE : + 0)); + } + goto err_with_mdl; + } + debug_crash_here("ddl_log_alter_after_rename_to_original"); + + // Check if we renamed the table and if so update trigger files. + if (alter_ctx.is_table_renamed()) + { + debug_crash_here("ddl_log_alter_before_rename_triggers"); + if (Table_triggers_list::change_table_name(thd, &trigger_param, + &alter_ctx.db, + &alter_ctx.alias, + &alter_ctx.table_name, + &alter_ctx.new_db, + &alter_ctx.new_alias)) + { + // Rename succeeded, delete the new table. + (void) quick_rm_table(thd, new_db_type, + &alter_ctx.new_db, &alter_ctx.new_alias, 0); + // Restore the backup of the original table to the old name. + (void) mysql_rename_table(old_db_type, &alter_ctx.db, &backup_name, + &alter_ctx.db, &alter_ctx.alias, &alter_ctx.id, + FN_FROM_IS_TMP | NO_FK_CHECKS | + (engine_changed ? NO_HA_TABLE | NO_PAR_TABLE : + 0)); + goto err_with_mdl; + } + rename_table_in_stat_tables(thd, &alter_ctx.db, &alter_ctx.alias, + &alter_ctx.new_db, &alter_ctx.new_alias); + debug_crash_here("ddl_log_alter_after_rename_triggers"); + } + + // ALTER TABLE succeeded, delete the backup of the old table. + // a failure to delete isn't an error, as we cannot rollback ALTER anymore + thd->push_internal_handler(&errors_to_warnings); + error_handler_pushed=1; + + quick_rm_table(thd, old_db_type, &alter_ctx.db, &backup_name, + FN_IS_TMP | (engine_changed ? NO_HA_TABLE | NO_PAR_TABLE: 0)); + + debug_crash_here("ddl_log_alter_after_delete_backup"); + if (engine_changed) + { + /* the .frm file was removed but not the original table */ + quick_rm_table(thd, old_db_type, &alter_ctx.db, &alter_ctx.table_name, + NO_FRM_RENAME | (engine_changed ? 0 : FN_IS_TMP)); + } + + debug_crash_here("ddl_log_alter_after_drop_original_table"); + if (binlog_as_create_select) + { + /* + The original table is now deleted. Copy the + DROP + CREATE + data statement to the binary log + */ + thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + binlog_commit(thd, true); + thd->binlog_xid= 0; + } + +end_inplace: + thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; + + if (!error_handler_pushed) + thd->push_internal_handler(&errors_to_warnings); + + thd->locked_tables_list.reopen_tables(thd, false); + + thd->pop_internal_handler(); + + THD_STAGE_INFO(thd, stage_end); + DEBUG_SYNC(thd, "alter_table_before_main_binlog"); + + DBUG_ASSERT(!(mysql_bin_log.is_open() && + thd->is_current_stmt_binlog_format_row() && + (create_info->tmp_table()))); + + if(start_alter_id) + { + if (!is_reg_table) + { + my_error(ER_INCONSISTENT_SLAVE_TEMP_TABLE, MYF(0), thd->query(), + table_list->db.str, table_list->table_name.str); + DBUG_RETURN(true); + } + + if (process_master_state(thd, 0, start_alter_id, if_exists)) + DBUG_RETURN(true); + } + else if (!binlog_as_create_select) + { + int tmp_error; + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + tmp_error= write_bin_log_with_if_exists(thd, true, false, log_if_exists, + partial_alter); + thd->binlog_xid= 0; + if (tmp_error) + goto err_cleanup; + } + + /* + We have to close the ddl log as soon as possible, after binlogging the + query, for inplace alter table. + */ + ddl_log_complete(&ddl_log_state); + if (inplace_alter_table_committed) + { + /* Signal to storage engine that ddl log is committed */ + (*inplace_alter_table_committed)(inplace_alter_table_committed_argument); + inplace_alter_table_committed= 0; + } + + if (!alter_ctx.tmp_table) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("ALTER") }; + ddl_log.org_storage_engine_name= alter_ctx.storage_engine_name; + ddl_log.org_partitioned= alter_ctx.storage_engine_partitioned; + ddl_log.org_database= alter_ctx.db; + ddl_log.org_table= alter_ctx.table_name; + ddl_log.org_table_id= alter_ctx.id; + ddl_log.new_storage_engine_name= alter_ctx.tmp_storage_engine_name; + ddl_log.new_partitioned= alter_ctx.tmp_storage_engine_name_partitioned; + ddl_log.new_database= alter_ctx.new_db; + ddl_log.new_table= alter_ctx.new_alias; + ddl_log.new_table_id= alter_ctx.tmp_id; + backup_log_ddl(&ddl_log); + } + + table_list->table= NULL; // For query cache + query_cache_invalidate3(thd, table_list, false); + + if (thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) + { + if (alter_ctx.is_table_renamed()) + thd->mdl_context.release_all_locks_for_name(mdl_ticket); + else + mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + } + +end_temporary: + my_free(const_cast(frm.str)); + + thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; + + *recreate_info= Recreate_info(copied, deleted); + thd->my_ok_with_recreate_info(*recreate_info, + (ulong) thd->get_stmt_da()-> + current_statement_warn_count()); + DEBUG_SYNC(thd, "alter_table_inplace_trans_commit"); + DBUG_RETURN(false); + +err_new_table_cleanup: + DBUG_PRINT("error", ("err_new_table_cleanup")); + thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; + + /* + No default value was provided for a DATE/DATETIME field, the + current sql_mode doesn't allow the '0000-00-00' value and + the table to be altered isn't empty. + Report error here. + */ + if (unlikely(alter_ctx.error_if_not_empty && + thd->get_stmt_da()->current_row_for_warning())) + { + Abort_on_warning_instant_set aws(thd, true); + alter_ctx.report_implicit_default_value_error(thd, new_table + ? new_table->s : table->s); + } + + if (new_table) + { + thd->drop_temporary_table(new_table, NULL, true); + } + else + (void) quick_rm_table(thd, new_db_type, + &alter_ctx.new_db, &alter_ctx.tmp_name, + (FN_IS_TMP | (no_ha_table ? NO_HA_TABLE : 0)), + alter_ctx.get_tmp_path()); + DEBUG_SYNC(thd, "alter_table_after_temp_table_drop"); +err_cleanup: + my_free(const_cast(frm.str)); + ddl_log_complete(&ddl_log_state); + if (inplace_alter_table_committed) + { + /* Signal to storage engine that ddl log is committed */ + (*inplace_alter_table_committed)(inplace_alter_table_committed_argument); + } + DEBUG_SYNC(thd, "alter_table_after_temp_table_drop"); + if (partial_alter || start_alter_id) + write_bin_log_start_alter_rollback(thd, start_alter_id, partial_alter, + if_exists); + DBUG_RETURN(true); + +err_with_mdl: + ddl_log_complete(&ddl_log_state); + /* + An error happened while we were holding exclusive name metadata lock + on table being altered. To be safe under LOCK TABLES we should + remove all references to the altered table from the list of locked + tables and release the exclusive metadata lock. + */ + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + if (!table_list->table) + thd->mdl_context.release_all_locks_for_name(mdl_ticket); + goto err_cleanup; +} + + + +/** + Prepare the transaction for the alter table's copy phase. +*/ + +bool mysql_trans_prepare_alter_copy_data(THD *thd) +{ + DBUG_ENTER("mysql_trans_prepare_alter_copy_data"); + /* + Turn off recovery logging since rollback of an alter table is to + delete the new table so there is no need to log the changes to it. + + This needs to be done before external_lock. + */ + DBUG_RETURN(ha_enable_transaction(thd, FALSE) != 0); +} + + +/** + Commit the copy phase of the alter table. +*/ + +bool mysql_trans_commit_alter_copy_data(THD *thd) +{ + bool error= FALSE; + uint save_unsafe_rollback_flags; + DBUG_ENTER("mysql_trans_commit_alter_copy_data"); + + /* Save flags as trans_commit_implicit are deleting them */ + save_unsafe_rollback_flags= thd->transaction->stmt.m_unsafe_rollback_flags; + + DEBUG_SYNC(thd, "alter_table_copy_trans_commit"); + + if (ha_enable_transaction(thd, TRUE)) + DBUG_RETURN(TRUE); + + /* + Ensure that the new table is saved properly to disk before installing + the new .frm. + And that InnoDB's internal latches are released, to avoid deadlock + when waiting on other instances of the table before rename (Bug#54747). + */ + if (trans_commit_stmt(thd)) + error= TRUE; + if (trans_commit_implicit(thd)) + error= TRUE; + + thd->transaction->stmt.m_unsafe_rollback_flags= save_unsafe_rollback_flags; + DBUG_RETURN(error); +} + + +static int +copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, bool ignore, + uint order_num, ORDER *order, ha_rows *copied, + ha_rows *deleted, Alter_info *alter_info, + Alter_table_ctx *alter_ctx) +{ + int error= 1; + Copy_field *copy= NULL, *copy_end; + ha_rows found_count= 0, delete_count= 0; + SORT_INFO *file_sort= 0; + READ_RECORD info; + TABLE_LIST tables; + List fields; + List all_fields; + bool auto_increment_field_copied= 0; + bool cleanup_done= 0; + bool init_read_record_done= 0; + sql_mode_t save_sql_mode= thd->variables.sql_mode; + ulonglong prev_insert_id, time_to_report_progress; + Field **dfield_ptr= to->default_field; + uint save_to_s_default_fields= to->s->default_fields; + bool make_versioned= !from->versioned() && to->versioned(); + bool make_unversioned= from->versioned() && !to->versioned(); + bool keep_versioned= from->versioned() && to->versioned(); + bool bulk_insert_started= 0; + Field *to_row_start= NULL, *to_row_end= NULL, *from_row_end= NULL; + MYSQL_TIME query_start; + DBUG_ENTER("copy_data_between_tables"); + + /* Two or 3 stages; Sorting, copying data and update indexes */ + thd_progress_init(thd, 2 + MY_TEST(order)); + + if (!(copy= new (thd->mem_root) Copy_field[to->s->fields])) + DBUG_RETURN(-1); + + if (mysql_trans_prepare_alter_copy_data(thd)) + { + delete [] copy; + DBUG_RETURN(-1); + } + + /* We need external lock before we can disable/enable keys */ + if (to->file->ha_external_lock(thd, F_WRLCK)) + { + /* Undo call to mysql_trans_prepare_alter_copy_data() */ + ha_enable_transaction(thd, TRUE); + delete [] copy; + DBUG_RETURN(-1); + } + + backup_set_alter_copy_lock(thd, from); + + alter_table_manage_keys(to, from->file->indexes_are_disabled(), + alter_info->keys_onoff); + + from->default_column_bitmaps(); + + /* We can abort alter table for any table type */ + thd->abort_on_warning= !ignore && thd->is_strict_mode(); + + from->file->info(HA_STATUS_VARIABLE); + to->file->extra(HA_EXTRA_PREPARE_FOR_ALTER_TABLE); + if (!to->s->long_unique_table) + { + to->file->ha_start_bulk_insert(from->file->stats.records, + ignore ? 0 : HA_CREATE_UNIQUE_INDEX_BY_SORT); + bulk_insert_started= 1; + } + mysql_stage_set_work_estimated(thd->m_stage_progress_psi, from->file->stats.records); + List_iterator it(alter_info->create_list); + Create_field *def; + copy_end=copy; + to->s->default_fields= 0; + for (Field **ptr=to->field ; *ptr ; ptr++) + { + def=it++; + if (def->field) + { + if (*ptr == to->next_number_field) + { + auto_increment_field_copied= TRUE; + /* + If we are going to copy contents of one auto_increment column to + another auto_increment column it is sensible to preserve zeroes. + This condition also covers case when we are don't actually alter + auto_increment column. + */ + if (def->field == from->found_next_number_field) + thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO; + } + if (!(*ptr)->vcol_info) + { + bitmap_set_bit(from->read_set, def->field->field_index); + if ((*ptr)->check_assignability_from(def->field, ignore)) + goto err; + (copy_end++)->set(*ptr,def->field,0); + } + } + else + { + /* + Update the set of auto-update fields to contain only the new fields + added to the table. Only these fields should be updated automatically. + Old fields keep their current values, and therefore should not be + present in the set of autoupdate fields. + */ + if ((*ptr)->default_value) + { + *(dfield_ptr++)= *ptr; + ++to->s->default_fields; + } + } + } + if (dfield_ptr) + *dfield_ptr= NULL; + + if (order) + { + if (to->s->primary_key != MAX_KEY && + to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) + { + char warn_buff[MYSQL_ERRMSG_SIZE]; + Abort_on_warning_instant_set aws(thd, false); + my_snprintf(warn_buff, sizeof(warn_buff), + "ORDER BY ignored as there is a user-defined clustered index" + " in the table '%-.192s'", from->s->table_name.str); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + warn_buff); + } + else + { + bzero((char *) &tables, sizeof(tables)); + tables.table= from; + tables.alias= tables.table_name= from->s->table_name; + tables.db= from->s->db; + + THD_STAGE_INFO(thd, stage_sorting); + Filesort_tracker dummy_tracker(false); + Filesort fsort(order, HA_POS_ERROR, true, NULL); + + if (thd->lex->first_select_lex()->setup_ref_array(thd, order_num) || + setup_order(thd, thd->lex->first_select_lex()->ref_pointer_array, + &tables, fields, all_fields, order)) + goto err; + + if (!(file_sort= filesort(thd, from, &fsort, &dummy_tracker))) + goto err; + } + thd_progress_next_stage(thd); + } + + if (make_versioned) + { + query_start= thd->query_start_TIME(); + to_row_start= to->vers_start_field(); + to_row_end= to->vers_end_field(); + } + else if (make_unversioned) + { + from_row_end= from->vers_end_field(); + } + + if (from_row_end) + bitmap_set_bit(from->read_set, from_row_end->field_index); + + from->file->column_bitmaps_signal(); + + to->file->prepare_for_insert(0); + DBUG_ASSERT(to->file->inited == handler::NONE); + + /* Tell handler that we have values for all columns in the to table */ + to->use_all_columns(); + /* Add virtual columns to vcol_set to ensure they are updated */ + if (to->vfield) + to->mark_virtual_columns_for_write(TRUE); + if (init_read_record(&info, thd, from, (SQL_SELECT *) 0, file_sort, 1, 1, + FALSE)) + goto err; + init_read_record_done= 1; + + if (ignore && !alter_ctx->fk_error_if_delete_row) + to->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + thd->get_stmt_da()->reset_current_row_for_warning(1); + restore_record(to, s->default_values); // Create empty record + to->reset_default_fields(); + + thd->progress.max_counter= from->file->records(); + time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; + if (!ignore) /* for now, InnoDB needs the undo log for ALTER IGNORE */ + to->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + + while (likely(!(error= info.read_record()))) + { + if (unlikely(thd->killed)) + { + thd->send_kill_message(); + error= 1; + break; + } + + if (make_unversioned) + { + if (!from_row_end->is_max()) + continue; // Drop history rows. + } + + if (unlikely(++thd->progress.counter >= time_to_report_progress)) + { + time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10; + thd_progress_report(thd, thd->progress.counter, + thd->progress.max_counter); + } + + /* Return error if source table isn't empty. */ + if (unlikely(alter_ctx->error_if_not_empty)) + { + error= 1; + break; + } + + for (Copy_field *copy_ptr=copy ; copy_ptr != copy_end ; copy_ptr++) + { + copy_ptr->do_copy(copy_ptr); + } + + if (make_versioned) + { + to_row_start->set_notnull(); + to_row_start->store_time(&query_start); + to_row_end->set_max(); + } + + prev_insert_id= to->file->next_insert_id; + if (to->default_field) + to->update_default_fields(ignore); + if (to->vfield) + to->update_virtual_fields(to->file, VCOL_UPDATE_FOR_WRITE); + + /* This will set thd->is_error() if fatal failure */ + if (to->verify_constraints(ignore) == VIEW_CHECK_SKIP) + continue; + if (unlikely(thd->is_error())) + { + error= 1; + break; + } + if (keep_versioned && to->versioned(VERS_TRX_ID)) + to->vers_write= false; + + if (to->next_number_field) + { + if (auto_increment_field_copied) + to->auto_increment_field_not_null= TRUE; + else + to->next_number_field->reset(); + } + error= to->file->ha_write_row(to->record[0]); + to->auto_increment_field_not_null= FALSE; + if (unlikely(error)) + { + if (to->file->is_fatal_error(error, HA_CHECK_DUP)) + { + /* Not a duplicate key error. */ + to->file->print_error(error, MYF(0)); + error= 1; + break; + } + else + { + /* Duplicate key error. */ + if (unlikely(alter_ctx->fk_error_if_delete_row)) + { + /* + We are trying to omit a row from the table which serves as parent + in a foreign key. This might have broken referential integrity so + emit an error. Note that we can't ignore this error even if we are + executing ALTER IGNORE TABLE. IGNORE allows to skip rows, but + doesn't allow to break unique or foreign key constraints, + */ + my_error(ER_FK_CANNOT_DELETE_PARENT, MYF(0), + alter_ctx->fk_error_id, + alter_ctx->fk_error_table); + break; + } + + if (ignore) + { + /* This ALTER IGNORE TABLE. Simply skip row and continue. */ + to->file->restore_auto_increment(prev_insert_id); + delete_count++; + } + else + { + /* Ordinary ALTER TABLE. Report duplicate key error. */ + uint key_nr= to->file->get_dup_key(error); + if ((int) key_nr >= 0) + { + const char *err_msg= ER_THD(thd, ER_DUP_ENTRY_WITH_KEY_NAME); + if (key_nr == 0 && to->s->keys > 0 && + (to->key_info[0].key_part[0].field->flags & + AUTO_INCREMENT_FLAG)) + err_msg= ER_THD(thd, ER_DUP_ENTRY_AUTOINCREMENT_CASE); + print_keydup_error(to, + key_nr >= to->s->keys ? NULL : + &to->key_info[key_nr], + err_msg, MYF(0)); + } + else + to->file->print_error(error, MYF(0)); + break; + } + } + } + else + { + DEBUG_SYNC(thd, "copy_data_between_tables_before"); + found_count++; + mysql_stage_set_work_completed(thd->m_stage_progress_psi, found_count); + } + thd->get_stmt_da()->inc_current_row_for_warning(); + } + + THD_STAGE_INFO(thd, stage_enabling_keys); + thd_progress_next_stage(thd); + + if (error > 0 && !from->s->tmp_table) + { + /* We are going to drop the temporary table */ + to->file->extra(HA_EXTRA_PREPARE_FOR_DROP); + } + if (bulk_insert_started && to->file->ha_end_bulk_insert() && error <= 0) + { + /* Give error, if not already given */ + if (!thd->is_error()) + to->file->print_error(my_errno,MYF(0)); + error= 1; + } + bulk_insert_started= 0; + if (!ignore) + to->file->extra(HA_EXTRA_END_ALTER_COPY); + + cleanup_done= 1; + to->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + + DEBUG_SYNC(thd, "copy_data_between_tables_before_reset_backup_lock"); + if (backup_reset_alter_copy_lock(thd)) + error= 1; + + if (unlikely(mysql_trans_commit_alter_copy_data(thd))) + error= 1; + + err: + if (bulk_insert_started) + (void) to->file->ha_end_bulk_insert(); + +/* Free resources */ + if (init_read_record_done) + end_read_record(&info); + delete [] copy; + delete file_sort; + + thd->variables.sql_mode= save_sql_mode; + thd->abort_on_warning= 0; + *copied= found_count; + *deleted=delete_count; + to->file->ha_release_auto_increment(); + to->s->default_fields= save_to_s_default_fields; + + if (!cleanup_done) + { + /* This happens if we get an error during initialization of data */ + DBUG_ASSERT(error); + ha_enable_transaction(thd, TRUE); + } + + if (to->file->ha_external_unlock(thd)) + error=1; + if (error < 0 && !from->s->tmp_table && + to->file->extra(HA_EXTRA_PREPARE_FOR_RENAME)) + error= 1; + thd_progress_end(thd); + DBUG_RETURN(error > 0 ? -1 : 0); +} + + +/* + Recreates one table by calling mysql_alter_table(). + + SYNOPSIS + mysql_recreate_table() + thd Thread handler + table_list Table to recreate + table_copy Recreate the table by using + ALTER TABLE COPY algorithm + + RETURN + Like mysql_alter_table(). +*/ + +bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, + Recreate_info *recreate_info, bool table_copy) +{ + Table_specification_st create_info; + Alter_info alter_info; + TABLE_LIST *next_table= table_list->next_global; + DBUG_ENTER("mysql_recreate_table"); + + /* Set lock type which is appropriate for ALTER TABLE. */ + table_list->lock_type= TL_READ_NO_INSERT; + /* Same applies to MDL request. */ + table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE); + /* hide following tables from open_tables() */ + table_list->next_global= NULL; + + create_info.init(); + create_info.row_type=ROW_TYPE_NOT_USED; + create_info.alter_info= &alter_info; + /* Force alter table to recreate table */ + alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE); + + if (table_copy) + alter_info.set_requested_algorithm( + Alter_info::ALTER_TABLE_ALGORITHM_COPY); + + bool res= mysql_alter_table(thd, &null_clex_str, &null_clex_str, &create_info, + table_list, recreate_info, &alter_info, 0, + (ORDER *) 0, + // Ignore duplicate records on REPAIR + thd->lex->sql_command == SQLCOM_REPAIR, + 0); + table_list->next_global= next_table; + DBUG_RETURN(res); +} + + +/** + Collect field names of result set that will be sent to a client in result of + handling the CHECKSUM TABLE statement. + + @param thd Thread data object + @param[out] fields List of fields whose metadata should be collected for + sending to client + */ + +void fill_checksum_table_metadata_fields(THD *thd, List *fields) +{ + Item *item; + + item= new (thd->mem_root) Item_empty_string(thd, "Table", NAME_LEN*2); + item->set_maybe_null(); + fields->push_back(item, thd->mem_root); + + item= new (thd->mem_root) Item_int(thd, "Checksum", (longlong) 1, + MY_INT64_NUM_DECIMAL_DIGITS); + item->set_maybe_null(); + fields->push_back(item, thd->mem_root); +} + + +bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, + HA_CHECK_OPT *check_opt) +{ + TABLE_LIST *table; + List field_list; + Protocol *protocol= thd->protocol; + DBUG_ENTER("mysql_checksum_table"); + + /* + CHECKSUM TABLE returns results and rollbacks statement transaction, + so it should not be used in stored function or trigger. + */ + DBUG_ASSERT(! thd->in_sub_stmt); + + fill_checksum_table_metadata_fields(thd, &field_list); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + /* + Close all temporary tables which were pre-open to simplify + privilege checking. Clear all references to closed tables. + */ + close_thread_tables(thd); + for (table= tables; table; table= table->next_local) + table->table= NULL; + + /* Open one table after the other to keep lock time as short as possible. */ + for (table= tables; table; table= table->next_local) + { + char table_name_buff[SAFE_NAME_LEN*2+2]; + LEX_CSTRING table_name= { table_name_buff, 0}; + TABLE *t; + TABLE_LIST *save_next_global; + + table_name.length= strxmov(table_name_buff, table->db.str ,".", + table->table_name.str, NullS) - table_name_buff; + + /* Remember old 'next' pointer and break the list. */ + save_next_global= table->next_global; + table->next_global= NULL; + table->lock_type= TL_READ; + /* Allow to open real tables only. */ + table->required_type= TABLE_TYPE_NORMAL; + + if (thd->open_temporary_tables(table) || + open_and_lock_tables(thd, table, FALSE, 0)) + { + t= NULL; + } + else + t= table->table; + + table->next_global= save_next_global; + + protocol->prepare_for_resend(); + protocol->store(&table_name, system_charset_info); + + if (!t) + { + /* Table didn't exist */ + protocol->store_null(); + } + else + { + /* Call ->checksum() if the table checksum matches 'old_mode' settings */ + if (!(check_opt->flags & T_EXTEND) && + (((t->file->ha_table_flags() & HA_HAS_OLD_CHECKSUM) && + (thd->variables.old_behavior & OLD_MODE_COMPAT_5_1_CHECKSUM)) || + ((t->file->ha_table_flags() & HA_HAS_NEW_CHECKSUM) && + !(thd->variables.old_behavior & OLD_MODE_COMPAT_5_1_CHECKSUM)))) + { + if (t->file->info(HA_STATUS_VARIABLE) || t->file->stats.checksum_null) + protocol->store_null(); + else + protocol->store((longlong)t->file->stats.checksum); + } + else if (check_opt->flags & T_QUICK) + protocol->store_null(); + else + { + int error= t->file->calculate_checksum(); + if (thd->killed) + { + /* + we've been killed; let handler clean up, and remove the + partial current row from the recordset (embedded lib) + */ + t->file->ha_rnd_end(); + thd->protocol->remove_last_row(); + goto err; + } + if (error || t->file->stats.checksum_null) + protocol->store_null(); + else + protocol->store((longlong)t->file->stats.checksum); + } + trans_rollback_stmt(thd); + close_thread_tables(thd); + } + + if (thd->transaction_rollback_request) + { + /* + If transaction rollback was requested we honor it. To do this we + abort statement and return error as not only CHECKSUM TABLE is + rolled back but the whole transaction in which it was used. + */ + thd->protocol->remove_last_row(); + goto err; + } + + /* Hide errors from client. Return NULL for problematic tables instead. */ + thd->clear_error(); + + if (protocol->write()) + goto err; + } + + my_eof(thd); + DBUG_RETURN(FALSE); + +err: + DBUG_RETURN(TRUE); +} + +/** + @brief Check if the table can be created in the specified storage engine. + + Checks if the storage engine is enabled and supports the given table + type (e.g. normal, temporary, system). May do engine substitution + if the requested engine is disabled. + + @param thd Thread descriptor. + @param db_name Database name. + @param table_name Name of table to be created. + @param create_info Create info from parser, including engine. + + @retval true Engine not available/supported, error has been reported. + @retval false Engine available/supported. + create_info->db_type & create_info->new_storage_engine_name + are updated. +*/ + +bool check_engine(THD *thd, const char *db_name, + const char *table_name, HA_CREATE_INFO *create_info) +{ + DBUG_ENTER("check_engine"); + handlerton **new_engine= &create_info->db_type; + handlerton *req_engine= *new_engine; + handlerton *enf_engine= NULL; + bool no_substitution= thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION; + *new_engine= ha_checktype(thd, req_engine, no_substitution); + DBUG_ASSERT(*new_engine); + if (!*new_engine) + DBUG_RETURN(true); + + /* Enforced storage engine should not be used in + ALTER TABLE that does not use explicit ENGINE = x to + avoid unwanted unrelated changes.*/ + if (!(thd->lex->sql_command == SQLCOM_ALTER_TABLE && + !(create_info->used_fields & HA_CREATE_USED_ENGINE))) + enf_engine= thd->variables.enforced_table_plugin ? + plugin_hton(thd->variables.enforced_table_plugin) : NULL; + + if (enf_engine && enf_engine != *new_engine) + { + if (no_substitution) + { + const char *engine_name= ha_resolve_storage_engine_name(req_engine); + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), engine_name); + DBUG_RETURN(TRUE); + } + *new_engine= enf_engine; + } + + if (req_engine && req_engine != *new_engine) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_USING_OTHER_HANDLER, + ER_THD(thd, ER_WARN_USING_OTHER_HANDLER), + ha_resolve_storage_engine_name(*new_engine), + table_name); + } + if (create_info->tmp_table() && + ha_check_storage_engine_flag(*new_engine, HTON_TEMPORARY_NOT_SUPPORTED)) + { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), + hton_name(*new_engine)->str, "TEMPORARY"); + *new_engine= 0; + DBUG_RETURN(true); + } + lex_string_set(&create_info->new_storage_engine_name, + ha_resolve_storage_engine_name(*new_engine)); + DBUG_RETURN(false); +} + + +bool Sql_cmd_create_table_like::execute(THD *thd) +{ + DBUG_ENTER("Sql_cmd_create_table::execute"); + LEX *lex= thd->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + TABLE_LIST *first_table= select_lex->table_list.first; + DBUG_ASSERT(first_table == lex->query_tables); + DBUG_ASSERT(first_table != 0); + bool link_to_local; + TABLE_LIST *create_table= first_table; + TABLE_LIST *select_tables= lex->create_last_non_select_table->next_global; + /* most outer SELECT_LEX_UNIT of query */ + SELECT_LEX_UNIT *unit= &lex->unit; + int res= 0; + + const bool used_engine= lex->create_info.used_fields & HA_CREATE_USED_ENGINE; + ulong binlog_format= thd->wsrep_binlog_format(thd->variables.binlog_format); + DBUG_ASSERT((m_storage_engine_name.str != NULL) == used_engine); + + if (lex->create_info.resolve_to_charset_collation_context(thd, + thd->charset_collation_context_create_table_in_db(first_table->db.str))) + DBUG_RETURN(true); + + if (used_engine) + { + if (resolve_storage_engine_with_error(thd, &lex->create_info.db_type, + lex->create_info.tmp_table())) + DBUG_RETURN(true); // Engine not found, substitution is not allowed + + if (!lex->create_info.db_type) // Not found, but substitution is allowed + { + lex->create_info.use_default_db_type(thd); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_USING_OTHER_HANDLER, + ER_THD(thd, ER_WARN_USING_OTHER_HANDLER), + hton_name(lex->create_info.db_type)->str, + create_table->table_name.str); + } + } + + if (lex->tmp_table()) + { + status_var_decrement(thd->status_var.com_stat[SQLCOM_CREATE_TABLE]); + status_var_increment(thd->status_var.com_create_tmp_table); + } + + /* + Code below (especially in mysql_create_table() and select_create + methods) may modify HA_CREATE_INFO structure in LEX, so we have to + use a copy of this structure to make execution prepared statement- + safe. A shallow copy is enough as this code won't modify any memory + referenced from this structure. + */ + Table_specification_st create_info(lex->create_info); + /* + We need to copy alter_info for the same reasons of re-execution + safety, only in case of Alter_info we have to do (almost) a deep + copy. + */ + Alter_info alter_info(lex->alter_info, thd->mem_root); + +#ifdef WITH_WSREP + // If CREATE TABLE AS SELECT and wsrep_on + const bool wsrep_ctas= (select_lex->item_list.elements && WSREP(thd)); + + // This will be used in THD::decide_logging_format if CTAS + Enable_wsrep_ctas_guard wsrep_ctas_guard(thd, wsrep_ctas); +#endif + + if (unlikely(thd->is_fatal_error)) + { + /* If out of memory when creating a copy of alter_info. */ + res= 1; + goto end_with_restore_list; + } + + /* + Since CREATE_INFO is not full without Alter_info, it is better to pass them + as a signle parameter. TODO: remove alter_info argument where create_info is + passed. + */ + create_info.alter_info= &alter_info; + + /* Check privileges */ + if ((res= create_table_precheck(thd, select_tables, create_table))) + goto end_with_restore_list; + + /* Might have been updated in create_table_precheck */ + create_info.alias= create_table->alias; + + /* Fix names if symlinked or relocated tables */ + if (append_file_to_dir(thd, &create_info.data_file_name, + &create_table->table_name) || + append_file_to_dir(thd, &create_info.index_file_name, + &create_table->table_name)) + goto end_with_restore_list; + + /* + If no engine type was given, work out the default now + rather than at parse-time. + */ + if (!(create_info.used_fields & HA_CREATE_USED_ENGINE)) + create_info.use_default_db_type(thd); + + DBUG_ASSERT(!(create_info.used_fields & HA_CREATE_USED_CHARSET)); + DBUG_ASSERT(create_info.convert_charset_collation.is_empty()); + + /* + If we are a slave, we should add OR REPLACE if we don't have + IF EXISTS. This will help a slave to recover from + CREATE TABLE OR EXISTS failures by dropping the table and + retrying the create. + */ + if (thd->slave_thread && + slave_ddl_exec_mode_options == SLAVE_EXEC_MODE_IDEMPOTENT && + !lex->create_info.if_not_exists()) + { + create_info.add(DDL_options_st::OPT_OR_REPLACE); + create_info.add(DDL_options_st::OPT_OR_REPLACE_SLAVE_GENERATED); + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + thd->work_part_info= 0; + { + partition_info *part_info= thd->lex->part_info; + if (part_info && !(part_info= part_info->get_clone(thd))) + { + res= -1; + goto end_with_restore_list; + } + thd->work_part_info= part_info; + } +#endif + +#ifdef WITH_WSREP + if (wsrep_ctas) + { + if (thd->variables.wsrep_trx_fragment_size > 0) + { + my_message( + ER_NOT_ALLOWED_COMMAND, + "CREATE TABLE AS SELECT is not supported with streaming replication", + MYF(0)); + res= 1; + goto end_with_restore_list; + } + } +#endif /* WITH_WSREP */ + + if (select_lex->item_list.elements || select_lex->tvc) // With select or TVC + { + select_result *result; + + /* + CREATE TABLE...IGNORE/REPLACE SELECT... can be unsafe, unless + ORDER BY PRIMARY KEY clause is used in SELECT statement. We therefore + use row based logging if mixed or row based logging is available. + TODO: Check if the order of the output of the select statement is + deterministic. Waiting for BUG#42415 + */ + if(lex->ignore) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_IGNORE_SELECT); + + if(lex->duplicates == DUP_REPLACE) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_REPLACE_SELECT); + + /* + If: + a) we inside an SP and there was NAME_CONST substitution, + b) binlogging is on (STMT mode), + c) we log the SP as separate statements + raise a warning, as it may cause problems + (see 'NAME_CONST issues' in 'Binary Logging of Stored Programs') + */ + if (thd->query_name_consts && mysql_bin_log.is_open() && + binlog_format == BINLOG_FORMAT_STMT && + !mysql_bin_log.is_query_in_union(thd, thd->query_id)) + { + List_iterator_fast it(select_lex->item_list); + Item *item; + uint splocal_refs= 0; + /* Count SP local vars in the top-level SELECT list */ + while ((item= it++)) + { + if (item->get_item_splocal()) + splocal_refs++; + } + /* + If it differs from number of NAME_CONST substitution applied, + we may have a SOME_FUNC(NAME_CONST()) in the SELECT list, + that may cause a problem with binary log (see BUG#35383), + raise a warning. + */ + if (splocal_refs != thd->query_name_consts) + push_warning(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, +"Invoked routine ran a statement that may cause problems with " +"binary log, see 'NAME_CONST issues' in 'Binary Logging of Stored Programs' " +"section of the manual."); + } + + select_lex->options|= SELECT_NO_UNLOCK; + unit->set_limit(select_lex); + + /* + Disable non-empty MERGE tables with CREATE...SELECT. Too + complicated. See Bug #26379. Empty MERGE tables are read-only + and don't allow CREATE...SELECT anyway. + */ + if (create_info.used_fields & HA_CREATE_USED_UNION) + { + my_error(ER_WRONG_OBJECT, MYF(0), create_table->db.str, + create_table->table_name.str, "BASE TABLE"); + res= 1; + goto end_with_restore_list; + } + + res= open_and_lock_tables(thd, create_info, lex->query_tables, TRUE, 0); + if (unlikely(res)) + { + /* Got error or warning. Set res to 1 if error */ + if (!(res= thd->is_error())) + my_ok(thd); // CREATE ... IF NOT EXISTS + goto end_with_restore_list; + } + + /* Ensure we don't try to create something from which we select from */ + if (create_info.or_replace() && !create_info.tmp_table()) + { + if (TABLE_LIST *duplicate= unique_table(thd, lex->query_tables, + lex->query_tables->next_global, + CHECK_DUP_FOR_CREATE | + CHECK_DUP_SKIP_TEMP_TABLE)) + { + update_non_unique_table_error(lex->query_tables, "CREATE", + duplicate); + res= TRUE; + goto end_with_restore_list; + } + } + { + /* + Remove target table from main select and name resolution + context. This can't be done earlier as it will break view merging in + statements like "CREATE TABLE IF NOT EXISTS existing_view SELECT". + */ + lex->unlink_first_table(&link_to_local); + + /* Store reference to table in case of LOCK TABLES */ + create_info.table= create_table->table; + + DEBUG_SYNC(thd, "wsrep_create_table_as_select"); + + /* + select_create is currently not re-execution friendly and + needs to be created for every execution of a PS/SP. + Note: In wsrep-patch, CTAS is handled like a regular transaction. + */ + if ((result= new (thd->mem_root) select_create(thd, create_table, + &create_info, + &alter_info, + select_lex->item_list, + lex->duplicates, + lex->ignore, + select_tables))) + { + /* + CREATE from SELECT give its SELECT_LEX for SELECT, + and item_list belong to SELECT + */ + if (!(res= handle_select(thd, lex, result, 0))) + { + if (create_info.tmp_table()) + thd->variables.option_bits|= OPTION_BINLOG_THIS_TRX; + } + delete result; + } + lex->link_first_table_back(create_table, link_to_local); + } + } + else + { + /* regular create */ + if (create_info.like()) + { + /* CREATE TABLE ... LIKE ... */ + res= mysql_create_like_table(thd, create_table, select_tables, + &create_info); + } + else + { + if (create_info.fix_create_fields(thd, &alter_info, *create_table) || + create_info.check_fields(thd, &alter_info, + create_table->table_name, create_table->db)) + goto end_with_restore_list; + + /* + In STATEMENT format, we probably have to replicate also temporary + tables, like mysql replication does. Also check if the requested + engine is allowed/supported. + */ + if (WSREP(thd)) + { + handlerton *orig_ht= create_info.db_type; + if (!check_engine(thd, create_table->db.str, + create_table->table_name.str, + &create_info) && + (!thd->is_current_stmt_binlog_format_row() || + !create_info.tmp_table())) + { +#ifdef WITH_WSREP + if (thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE && + wsrep_check_sequence(thd, lex->create_info.seq_create_info)) + DBUG_RETURN(true); + + WSREP_TO_ISOLATION_BEGIN_ALTER(create_table->db.str, + create_table->table_name.str, + first_table, &alter_info, NULL, + &create_info) + { + WSREP_WARN("CREATE TABLE isolation failure"); + res= true; + goto end_with_restore_list; + } +#endif /* WITH_WSREP */ + } + // check_engine will set db_type to NULL if e.g. TEMPORARY is + // not supported by the storage engine, this case is checked + // again in mysql_create_table + create_info.db_type= orig_ht; + } + /* Regular CREATE TABLE */ + res= mysql_create_table(thd, create_table, &create_info, &alter_info); + } + if (!res) + { + /* So that CREATE TEMPORARY TABLE gets to binlog at commit/rollback */ + if (create_info.tmp_table()) + thd->variables.option_bits|= OPTION_BINLOG_THIS_TRX; + /* in case of create temp tables if @@session_track_state_change is + ON then send session state notification in OK packet */ + if (create_info.options & HA_LEX_CREATE_TMP_TABLE) + { + thd->session_tracker.state_change.mark_as_changed(thd); + } + my_ok(thd); + } + } + +end_with_restore_list: + DBUG_RETURN(res); +} + + +bool HA_CREATE_INFO:: + resolve_to_charset_collation_context(THD *thd, + const Lex_table_charset_collation_attrs_st &default_cscl_arg, + const Lex_table_charset_collation_attrs_st &convert_cscl, + const Charset_collation_context &ctx) +{ + /* + If CONVERT TO clauses are specified only (without table default clauses), + then we copy CONVERT TO clauses to default clauses, so e.g: + CONVERT TO CHARACTER SET utf8mb4 + means + CONVERT TO CHARACTER SET utf8mb4, DEFAULT CHARACTER SET utf8mb4 + */ + Lex_table_charset_collation_attrs_st default_cscl= + !convert_cscl.is_empty() && default_cscl_arg.is_empty() ? + convert_cscl : default_cscl_arg; + + if (default_cscl.is_empty()) + default_table_charset= ctx.collate_default().charset_info(); + else + { + // Make sure we don't do double resolution in direct SQL execution + DBUG_ASSERT(!default_table_charset || + thd->stmt_arena->is_stmt_execute() || + thd->stmt_arena->state == Query_arena::STMT_INITIALIZED_FOR_SP); + if (!(default_table_charset= + default_cscl.resolved_to_context(ctx))) + return true; + } + + if (convert_cscl.is_empty()) + alter_table_convert_to_charset= NULL; + else + { + // Make sure we don't do double resolution in direct SQL execution + DBUG_ASSERT(!alter_table_convert_to_charset || + thd->stmt_arena->is_stmt_execute() || + thd->stmt_arena->state == Query_arena::STMT_INITIALIZED_FOR_SP); + if (!(alter_table_convert_to_charset= + convert_cscl.resolved_to_context(ctx))) + return true; + } + return false; +} diff --git a/sql/sql_table.h b/sql/sql_table.h new file mode 100644 index 00000000..fded8353 --- /dev/null +++ b/sql/sql_table.h @@ -0,0 +1,223 @@ +/* Copyright (c) 2006, 2014, Oracle and/or its affiliates. + Copyright (c) 2011, 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_TABLE_INCLUDED +#define SQL_TABLE_INCLUDED + +#include // pthread_mutex_t +#include "m_string.h" // LEX_CUSTRING +#include "lex_charset.h" + +#define ERROR_INJECT(code) \ + ((DBUG_IF("crash_" code) && (DBUG_SUICIDE(), 0)) || \ + (DBUG_IF("fail_" code) && (my_error(ER_UNKNOWN_ERROR, MYF(0)), 1))) + +class Alter_info; +class Alter_table_ctx; +class Column_definition; +class Create_field; +struct TABLE_LIST; +class THD; +struct TABLE; +struct handlerton; +class handler; +class String; +typedef struct st_ha_check_opt HA_CHECK_OPT; +struct HA_CREATE_INFO; +struct Table_specification_st; +typedef struct st_key KEY; +typedef struct st_key_cache KEY_CACHE; +typedef struct st_lock_param_type ALTER_PARTITION_PARAM_TYPE; +typedef struct st_order ORDER; +typedef struct st_ddl_log_state DDL_LOG_STATE; + +enum enum_explain_filename_mode +{ + EXPLAIN_ALL_VERBOSE= 0, + EXPLAIN_PARTITIONS_VERBOSE, + EXPLAIN_PARTITIONS_AS_COMMENT +}; + + +/* depends on errmsg.txt Database `db`, Table `t` ... */ +#define EXPLAIN_FILENAME_MAX_EXTRA_LENGTH 63 + +#define WFRM_WRITE_SHADOW 1 +#define WFRM_INSTALL_SHADOW 2 +#define WFRM_KEEP_SHARE 4 +#define WFRM_WRITE_CONVERTED_TO 8 +#define WFRM_BACKUP_ORIGINAL 16 + +/* Flags for conversion functions. */ +static const uint FN_FROM_IS_TMP= 1 << 0; +static const uint FN_TO_IS_TMP= 1 << 1; +static const uint FN_IS_TMP= FN_FROM_IS_TMP | FN_TO_IS_TMP; +static const uint NO_FRM_RENAME= 1 << 2; +static const uint FRM_ONLY= 1 << 3; +/** Don't remove table in engine. Remove only .FRM and maybe .PAR files. */ +static const uint NO_HA_TABLE= 1 << 4; +/** Don't resolve MySQL's fake "foo.sym" symbolic directory names. */ +static const uint SKIP_SYMDIR_ACCESS= 1 << 5; +/** Don't check foreign key constraints while renaming table */ +static const uint NO_FK_CHECKS= 1 << 6; +/* Don't delete .par table in quick_rm_table() */ +static const uint NO_PAR_TABLE= 1 << 7; + +uint filename_to_tablename(const char *from, char *to, size_t to_length, + bool stay_quiet = false); +uint tablename_to_filename(const char *from, char *to, size_t to_length); +uint check_n_cut_mysql50_prefix(const char *from, char *to, size_t to_length); +bool check_mysql50_prefix(const char *name); +uint build_table_filename(char *buff, size_t bufflen, const char *db, + const char *table, const char *ext, uint flags); +uint build_table_shadow_filename(char *buff, size_t bufflen, + ALTER_PARTITION_PARAM_TYPE *lpt, + bool backup= false); +void build_lower_case_table_filename(char *buff, size_t bufflen, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + uint flags); +uint build_tmptable_filename(THD* thd, char *buff, size_t bufflen); +bool add_keyword_to_query(THD *thd, String *result, const LEX_CSTRING *keyword, + const LEX_CSTRING *add); + +/* + mysql_create_table_no_lock can be called in one of the following + mutually exclusive situations: + + - Just a normal ordinary CREATE TABLE statement that explicitly + defines the table structure. + + - CREATE TABLE ... SELECT. It is special, because only in this case, + the list of fields is allowed to have duplicates, as long as one of the + duplicates comes from the select list, and the other doesn't. For + example in + + CREATE TABLE t1 (a int(5) NOT NUL) SELECT b+10 as a FROM t2; + + the list in alter_info->create_list will have two fields `a`. + + - ALTER TABLE, that creates a temporary table #sql-xxx, which will be later + renamed to replace the original table. + + - ALTER TABLE as above, but which only modifies the frm file, it only + creates an frm file for the #sql-xxx, the table in the engine is not + created. + + - Assisted discovery, CREATE TABLE statement without the table structure. + + These situations are distinguished by the following "create table mode" + values, where a CREATE ... SELECT is denoted by any non-negative number + (which should be the number of fields in the SELECT ... part), and other + cases use constants as defined below. +*/ +#define C_CREATE_SELECT(X) ((X) > 0 ? (X) : 0) +#define C_ORDINARY_CREATE 0 +#define C_ALTER_TABLE -1 +#define C_ALTER_TABLE_FRM_ONLY -2 +#define C_ASSISTED_DISCOVERY -3 + +int mysql_create_table_no_lock(THD *thd, + DDL_LOG_STATE *ddl_log_state, + DDL_LOG_STATE *ddl_log_state_rm, + Table_specification_st *create_info, + Alter_info *alter_info, bool *is_trans, + int create_table_mode, TABLE_LIST *table); + +handler *mysql_create_frm_image(THD *thd, HA_CREATE_INFO *create_info, + Alter_info *alter_info, int create_table_mode, + KEY **key_info, uint *key_count, + LEX_CUSTRING *frm); + +int mysql_discard_or_import_tablespace(THD *thd, TABLE_LIST *table_list, + bool discard); + +bool mysql_prepare_alter_table(THD *thd, TABLE *table, + Table_specification_st *create_info, + Alter_info *alter_info, + Alter_table_ctx *alter_ctx); +bool mysql_trans_prepare_alter_copy_data(THD *thd); +bool mysql_trans_commit_alter_copy_data(THD *thd); +bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, + Table_specification_st *create_info, + TABLE_LIST *table_list, + class Recreate_info *recreate_info, + Alter_info *alter_info, + uint order_num, ORDER *order, bool ignore, + bool if_exists); +bool mysql_compare_tables(TABLE *table, + Alter_info *alter_info, + HA_CREATE_INFO *create_info, + bool *metadata_equal); +bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, + class Recreate_info *recreate_info, bool table_copy); +bool mysql_rename_table(handlerton *base, const LEX_CSTRING *old_db, + const LEX_CSTRING *old_name, const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, LEX_CUSTRING *id, + uint flags); +bool mysql_backup_table(THD* thd, TABLE_LIST* table_list); +bool mysql_restore_table(THD* thd, TABLE_LIST* table_list); + +template class List; +void fill_checksum_table_metadata_fields(THD *thd, List *fields); +bool mysql_checksum_table(THD* thd, TABLE_LIST* table_list, + HA_CHECK_OPT* check_opt); +bool mysql_rm_table(THD *thd,TABLE_LIST *tables, bool if_exists, + bool drop_temporary, bool drop_sequence, + bool dont_log_query); +int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, + const LEX_CSTRING *db, + DDL_LOG_STATE *ddl_log_state, + bool if_exists, + bool drop_temporary, bool drop_view, + bool drop_sequence, + bool dont_log_query, bool dont_free_locks); +bool log_drop_table(THD *thd, const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name, const LEX_CSTRING *handler, + bool partitioned, const LEX_CUSTRING *id, + bool temporary_table); +bool quick_rm_table(THD *thd, handlerton *base, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, uint flags, + const char *table_path=0); +void close_cached_table(THD *thd, TABLE *table); +void sp_prepare_create_field(THD *thd, Column_definition *sql_field); +bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags); +int write_bin_log(THD *thd, bool clear_error, + char const *query, ulong query_length, + bool is_trans= FALSE); +int write_bin_log_with_if_exists(THD *thd, bool clear_error, + bool is_trans, bool add_if_exists, + bool commit_alter= false); + +void promote_first_timestamp_column(List *column_definitions); + +/* + These prototypes where under INNODB_COMPATIBILITY_HOOKS. +*/ +uint explain_filename(THD* thd, const char *from, char *to, uint to_length, + enum_explain_filename_mode explain_mode); + + +extern MYSQL_PLUGIN_IMPORT const LEX_CSTRING primary_key_name; + +bool check_engine(THD *, const char *, const char *, HA_CREATE_INFO *); + +#ifdef WITH_WSREP +bool wsrep_check_sequence(THD* thd, const class sequence_definition *seq); +#endif + +#endif /* SQL_TABLE_INCLUDED */ diff --git a/sql/sql_test.cc b/sql/sql_test.cc new file mode 100644 index 00000000..9163d8fc --- /dev/null +++ b/sql/sql_test.cc @@ -0,0 +1,715 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Write some debug info */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_test.h" +#include "sql_base.h" +#include "sql_show.h" // calc_sum_of_all_status +#include "sql_select.h" +#include "keycaches.h" +#include "my_json_writer.h" +#include +#include +#include "sql_connect.h" +#include "thread_cache.h" +#if defined(HAVE_MALLINFO) && defined(HAVE_MALLOC_H) +#include +#elif defined(HAVE_MALLINFO) && defined(HAVE_SYS_MALLOC_H) +#include +#elif defined(HAVE_MALLOC_ZONE) +#include +#endif + +#ifdef HAVE_EVENT_SCHEDULER +#include "events.h" +#endif + +#define FT_KEYPART (MAX_FIELDS+10) + +static const char *lock_descriptions[] = +{ + /* TL_UNLOCK */ "No lock", + /* TL_READ_DEFAULT */ NULL, + /* TL_READ */ "Low priority read lock", + /* TL_READ_WITH_SHARED_LOCKS */ "Shared read lock", + /* TL_READ_HIGH_PRIORITY */ "High priority read lock", + /* TL_READ_NO_INSERT */ "Read lock without concurrent inserts", + /* TL_READ_SKIP_LOCKED */ "Read lock without blocking if row is locked", + /* TL_WRITE_ALLOW_WRITE */ "Write lock that allows other writers", + /* TL_WRITE_CONCURRENT_INSERT */ "Concurrent insert lock", + /* TL_WRITE_DELAYED */ "Lock used by delayed insert", + /* TL_WRITE_DEFAULT */ NULL, + /* TL_WRITE_LOW_PRIORITY */ "Low priority write lock", + /* TL_WRITE_SKIP_LOCKED */ "Write lock but skip existing locked rows", + /* TL_WRITE */ "High priority write lock", + /* TL_WRITE_ONLY */ "Highest priority write lock" +}; + + +#ifndef DBUG_OFF + +void +print_where(COND *cond,const char *info, enum_query_type query_type) +{ + char buff[1024]; + String str(buff,(uint32) sizeof(buff), system_charset_info); + str.length(0); + str.extra_allocation(1024); + if (cond) + cond->print(&str, query_type); + + DBUG_LOCK_FILE; + (void) fprintf(DBUG_FILE,"\nWHERE:(%s) %p ", info, cond); + (void) fputs(str.c_ptr_safe(),DBUG_FILE); + (void) fputc('\n',DBUG_FILE); + DBUG_UNLOCK_FILE; +} + +#ifdef EXTRA_DEBUG + /* This is for debugging purposes */ +static my_bool print_cached_tables_callback(TDC_element *element, + void *arg __attribute__((unused))) +{ + TABLE *entry; + + mysql_mutex_lock(&element->LOCK_table_share); + All_share_tables_list::Iterator it(element->all_tables); + while ((entry= it++)) + { + THD *in_use= entry->in_use; + printf("%-14.14s %-32s%8ld%6d %s\n", + entry->s->db.str, entry->s->table_name.str, + in_use ? (long) in_use->thread_id : (long) 0, + entry->db_stat ? 1 : 0, + in_use ? lock_descriptions[(int)entry->reginfo.lock_type] : + "Not in use"); + } + mysql_mutex_unlock(&element->LOCK_table_share); + return FALSE; +} + + +static void print_cached_tables(void) +{ + compile_time_assert(TL_WRITE_ONLY+1 == array_elements(lock_descriptions)); + + /* purecov: begin tested */ + puts("DB Table Version Thread Open Lock"); + + tdc_iterate(0, (my_hash_walk_action) print_cached_tables_callback, NULL, true); + + fflush(stdout); + /* purecov: end */ + return; +} +#endif + + +void TEST_filesort(SORT_FIELD *sortorder,uint s_length) +{ + char buff[256],buff2[256]; + String str(buff,sizeof(buff),system_charset_info); + String out(buff2,sizeof(buff2),system_charset_info); + DBUG_ASSERT(s_length > 0); + DBUG_ENTER("TEST_filesort"); + + out.length(0); + for (; s_length-- ; sortorder++) + { + if (sortorder->reverse) + out.append('-'); + if (sortorder->field) + { + if (sortorder->field->table_name) + { + const char *table_name= *sortorder->field->table_name; + out.append(table_name, strlen(table_name)); + out.append('.'); + } + const char *name= sortorder->field->field_name.str; + if (!name) + name= "tmp_table_column"; + out.append(name, strlen(name)); + } + else + { + str.length(0); + sortorder->item->print(&str, QT_ORDINARY); + out.append(str); + } + out.append(' '); + } + out.chop(); // Remove last space + DBUG_LOCK_FILE; + (void) fputs("\nInfo about FILESORT\n",DBUG_FILE); + fprintf(DBUG_FILE,"Sortorder: %s\n",out.c_ptr_safe()); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + + +void +TEST_join(JOIN *join) +{ + uint ref; + int i; + List_iterator it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + DBUG_ENTER("TEST_join"); + + DBUG_LOCK_FILE; + (void) fputs("\nInfo about JOIN\n",DBUG_FILE); + while ((jt_range= it++)) + { + /* + Assemble results of all the calls to full_name() first, + in order not to garble the tabular output below. + */ + String ref_key_parts[MAX_TABLES]; + int tables_in_range= (int)(jt_range->end - jt_range->start); + for (i= 0; i < tables_in_range; i++) + { + JOIN_TAB *tab= jt_range->start + i; + for (ref= 0; ref < tab->ref.key_parts; ref++) + { + ref_key_parts[i].append(tab->ref.items[ref]->full_name_cstring()); + ref_key_parts[i].append(STRING_WITH_LEN(" ")); + } + } + + for (i= 0; i < tables_in_range; i++) + { + JOIN_TAB *tab= jt_range->start + i; + TABLE *form=tab->table; + char key_map_buff[128]; + fprintf(DBUG_FILE,"%-16.16s type: %-7s q_keys: %s refs: %d key: %d len: %d\n", + form->alias.c_ptr(), + join_type_str[tab->type], + tab->keys.print(key_map_buff), + tab->ref.key_parts, + tab->ref.key, + tab->ref.key_length); + if (tab->select) + { + char buf[MAX_KEY/8+1]; + if (tab->use_quick == 2) + fprintf(DBUG_FILE, + " quick select checked for each record (keys: %s)\n", + tab->select->quick_keys.print(buf)); + else if (tab->select->quick) + { + fprintf(DBUG_FILE, " quick select used:\n"); + tab->select->quick->dbug_dump(18, FALSE); + } + else + (void)fputs(" select used\n",DBUG_FILE); + } + if (tab->ref.key_parts) + { + fprintf(DBUG_FILE, + " refs: %s\n", ref_key_parts[i].c_ptr_safe()); + } + } + (void)fputs("\n",DBUG_FILE); + } + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + + +static void print_keyuse(KEYUSE *keyuse) +{ + char buff[256]; + char buf2[64]; + const char *fieldname; + JOIN_TAB *join_tab= keyuse->table->reginfo.join_tab; + KEY *key_info= join_tab->get_keyinfo_by_key_no(keyuse->key); + String str(buff,(uint32) sizeof(buff), system_charset_info); + str.length(0); + keyuse->val->print(&str, QT_ORDINARY); + str.append('\0'); + if (keyuse->is_for_hash_join()) + fieldname= keyuse->table->field[keyuse->keypart]->field_name.str; + else if (keyuse->keypart == FT_KEYPART) + fieldname= "FT_KEYPART"; + else + fieldname= key_info->key_part[keyuse->keypart].field->field_name.str; + ll2str(keyuse->used_tables, buf2, 16, 0); + fprintf(DBUG_FILE, "KEYUSE: %s.%s=%s optimize: %u used_tables: %s " + "ref_table_rows: %lu keypart_map: %0lx\n", + keyuse->table->alias.c_ptr(), fieldname, str.ptr(), + (uint) keyuse->optimize, buf2, (ulong) keyuse->ref_table_rows, + (ulong) keyuse->keypart_map); +} + + +/* purecov: begin inspected */ +void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array) +{ + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, "KEYUSE array (%zu elements)\n", keyuse_array->elements); + for(uint i=0; i < keyuse_array->elements; i++) + print_keyuse((KEYUSE*)dynamic_array_ptr(keyuse_array, i)); + DBUG_UNLOCK_FILE; +} + +/* + Print the current state during query optimization. + + SYNOPSIS + print_plan() + join pointer to the structure providing all context info for + the query + read_time the cost of the best partial plan + record_count estimate for the number of records returned by the best + partial plan + idx length of the partial QEP in 'join->positions'; + also an index in the array 'join->best_ref'; + info comment string to appear above the printout + + DESCRIPTION + This function prints to the log file DBUG_FILE the members of 'join' that + are used during query optimization (join->positions, join->best_positions, + and join->best_ref) and few other related variables (read_time, + record_count). + Useful to trace query optimizer functions. + + RETURN + None +*/ + +void +print_plan(JOIN* join, uint idx, double record_count, double read_time, + double current_read_time, const char *info) +{ + uint i; + JOIN_TAB *join_table; + JOIN_TAB **plan_nodes; + TABLE* table; + + if (info == 0) + info= ""; + + DBUG_LOCK_FILE; + if (join->best_read == DBL_MAX) + { + fprintf(DBUG_FILE, + "%s; idx: %u best: DBL_MAX atime: %g itime: %g count: %g\n", + info, idx, current_read_time, read_time, record_count); + } + else + { + fprintf(DBUG_FILE, + "%s; idx :%u best: %g accumulated: %g increment: %g count: %g\n", + info, idx, join->best_read, current_read_time, read_time, + record_count); + } + + /* Print the tables in JOIN->positions */ + fputs(" POSITIONS: ", DBUG_FILE); + for (i= 0; i < idx ; i++) + { + POSITION *pos= join->positions + i; + table= pos->table->table; + if (table) + fputs(table->s->table_name.str, DBUG_FILE); + fputc(' ', DBUG_FILE); + } + fputc('\n', DBUG_FILE); + + /* + Print the tables in JOIN->best_positions only if at least one complete plan + has been found. An indicator for this is the value of 'join->best_read'. + */ + if (join->best_read < DBL_MAX) + { + fputs("BEST_POSITIONS: ", DBUG_FILE); + for (i= 0; i < idx ; i++) + { + POSITION *pos= join->best_positions + i; + table= pos->table->table; + if (table) + fputs(table->s->table_name.str, DBUG_FILE); + fputc(' ', DBUG_FILE); + } + } + fputc('\n', DBUG_FILE); + + /* Print the tables in JOIN->best_ref */ + fputs(" BEST_REF: ", DBUG_FILE); + for (plan_nodes= join->best_ref ; *plan_nodes ; plan_nodes++) + { + join_table= (*plan_nodes); + fputs(join_table->table->s->table_name.str, DBUG_FILE); + fprintf(DBUG_FILE, "(%lu,%lu,%lu)", + (ulong) join_table->found_records, + (ulong) join_table->records, + (ulong) join_table->read_time); + fputc(' ', DBUG_FILE); + } + fputc('\n', DBUG_FILE); + + DBUG_UNLOCK_FILE; +} + + +void print_sjm(SJ_MATERIALIZATION_INFO *sjm) +{ + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, "\nsemi-join nest{\n"); + fprintf(DBUG_FILE, " tables { \n"); + for (uint i= 0;i < sjm->tables; i++) + { + fprintf(DBUG_FILE, " %s%s\n", + sjm->positions[i].table->table->alias.c_ptr(), + (i == sjm->tables -1)? "": ","); + } + fprintf(DBUG_FILE, " }\n"); + fprintf(DBUG_FILE, " materialize_cost= %g\n", + sjm->materialization_cost.total_cost()); + fprintf(DBUG_FILE, " rows= %g\n", sjm->rows); + fprintf(DBUG_FILE, "}\n"); + DBUG_UNLOCK_FILE; +} +/* purecov: end */ + +/* + Debugging help: force List<...>::elem function not be removed as unused. +*/ +Item* (List::*dbug_list_item_elem_ptr)(uint)= &List::elem; +Item_equal* (List::*dbug_list_item_equal_elem_ptr)(uint)= + &List::elem; +TABLE_LIST* (List::*dbug_list_table_list_elem_ptr)(uint) = + &List::elem; + +#endif + +typedef struct st_debug_lock +{ + ulong thread_id; + char table_name[FN_REFLEN]; + bool waiting; + const char *lock_text; + enum thr_lock_type type; +} TABLE_LOCK_INFO; + +C_MODE_START +static int dl_compare(const void *p1, const void *p2) +{ + TABLE_LOCK_INFO *a, *b; + + a= (TABLE_LOCK_INFO *) p1; + b= (TABLE_LOCK_INFO *) p2; + + if (a->thread_id > b->thread_id) + return 1; + if (a->thread_id < b->thread_id) + return -1; + if (a->waiting == b->waiting) + return 0; + else if (a->waiting) + return -1; + return 1; +} +C_MODE_END + + +static void push_locks_into_array(DYNAMIC_ARRAY *ar, THR_LOCK_DATA *data, + bool wait, const char *text) +{ + if (data) + { + TABLE *table=(TABLE *)data->debug_print_param; + if (table && table->s->tmp_table == NO_TMP_TABLE) + { + TABLE_LOCK_INFO table_lock_info; + table_lock_info.thread_id= (ulong)table->in_use->thread_id; + memcpy(table_lock_info.table_name, table->s->table_cache_key.str, + table->s->table_cache_key.length); + table_lock_info.table_name[strlen(table_lock_info.table_name)]='.'; + table_lock_info.waiting=wait; + table_lock_info.lock_text=text; + // lock_type is also obtainable from THR_LOCK_DATA + table_lock_info.type=table->reginfo.lock_type; + (void) push_dynamic(ar,(uchar*) &table_lock_info); + } + } +} + + +/* + Regarding MERGE tables: + + For now, the best option is to use the common TABLE *pointer for all + cases; The drawback is that for MERGE tables we will see many locks + for the merge tables even if some of them are for individual tables. + + The way to solve this is to add to 'THR_LOCK' structure a pointer to + the filename and use this when printing the data. + (We can for now ignore this and just print the same name for all merge + table parts; Please add the above as a comment to the display_lock + function so that we can easily add this if we ever need this. +*/ + +static void display_table_locks(void) +{ + LIST *list; + void *saved_base; + DYNAMIC_ARRAY saved_table_locks; + + (void) my_init_dynamic_array(key_memory_locked_thread_list, + &saved_table_locks, sizeof(TABLE_LOCK_INFO), + tc_records() + 20, 50, MYF(0)); + mysql_mutex_lock(&THR_LOCK_lock); + for (list= thr_lock_thread_list; list; list= list_rest(list)) + { + THR_LOCK *lock=(THR_LOCK*) list->data; + + mysql_mutex_lock(&lock->mutex); + push_locks_into_array(&saved_table_locks, lock->write.data, FALSE, + "Locked - write"); + push_locks_into_array(&saved_table_locks, lock->write_wait.data, TRUE, + "Waiting - write"); + push_locks_into_array(&saved_table_locks, lock->read.data, FALSE, + "Locked - read"); + push_locks_into_array(&saved_table_locks, lock->read_wait.data, TRUE, + "Waiting - read"); + mysql_mutex_unlock(&lock->mutex); + } + mysql_mutex_unlock(&THR_LOCK_lock); + + if (!saved_table_locks.elements) + goto end; + + saved_base= dynamic_element(&saved_table_locks, 0, TABLE_LOCK_INFO *); + my_qsort(saved_base, saved_table_locks.elements, sizeof(TABLE_LOCK_INFO), + dl_compare); + freeze_size(&saved_table_locks); + + puts("\nThread database.table_name Locked/Waiting Lock_type\n"); + + unsigned int i; + for (i=0 ; i < saved_table_locks.elements ; i++) + { + TABLE_LOCK_INFO *dl_ptr=dynamic_element(&saved_table_locks,i,TABLE_LOCK_INFO*); + printf("%-8ld%-28.28s%-22s%s\n", + dl_ptr->thread_id,dl_ptr->table_name,dl_ptr->lock_text,lock_descriptions[(int)dl_ptr->type]); + } + puts("\n\n"); +end: + delete_dynamic(&saved_table_locks); +} + +C_MODE_START +static int print_key_cache_status(const char *name, KEY_CACHE *key_cache, + void *unused __attribute__((unused))) +{ + char llbuff1[22]; + char llbuff2[22]; + char llbuff3[22]; + char llbuff4[22]; + + if (!key_cache->key_cache_inited) + { + printf("%s: Not in use\n", name); + } + else + { + KEY_CACHE_STATISTICS stats; + get_key_cache_statistics(key_cache, 0, &stats); + + printf("%s\n\ +Buffer_size: %10lu\n\ +Block_size: %10lu\n\ +Division_limit: %10lu\n\ +Age_threshold: %10lu\n\ +Partitions: %10lu\n\ +blocks used: %10lu\n\ +not flushed: %10lu\n\ +w_requests: %10s\n\ +writes: %10s\n\ +r_requests: %10s\n\ +reads: %10s\n\n", + name, + (ulong)key_cache->param_buff_size, + (ulong)key_cache->param_block_size, + (ulong)key_cache->param_division_limit, + (ulong)key_cache->param_age_threshold, + (ulong)key_cache->param_partitions, + (ulong)stats.blocks_used, + (ulong)stats.blocks_changed, + llstr(stats.write_requests,llbuff1), + llstr(stats.writes,llbuff2), + llstr(stats.read_requests,llbuff3), + llstr(stats.reads,llbuff4)); + } + return 0; +} +C_MODE_END + + +void mysql_print_status() +{ + char current_dir[FN_REFLEN]; + STATUS_VAR tmp; + uint count; + + tmp= global_status_var; + count= calc_sum_of_all_status(&tmp); + printf("\nStatus information:\n\n"); + (void) my_getwd(current_dir, sizeof(current_dir),MYF(0)); + printf("Current dir: %s\n", current_dir); + printf("Running threads: %d Cached threads: %lu Stack size: %ld\n", + count, thread_cache.size(), + (long) my_thread_stack_size); +#ifdef EXTRA_DEBUG + thr_print_locks(); // Write some debug info + print_cached_tables(); +#endif + /* Print key cache status */ + puts("\nKey caches:"); + process_key_caches(print_key_cache_status, 0); + printf("\nhandler status:\n\ +read_key: %10lu\n\ +read_next: %10lu\n\ +read_rnd %10lu\n\ +read_first: %10lu\n\ +write: %10lu\n\ +delete %10lu\n\ +update: %10lu\n", + tmp.ha_read_key_count, + tmp.ha_read_next_count, + tmp.ha_read_rnd_count, + tmp.ha_read_first_count, + tmp.ha_write_count, + tmp.ha_delete_count, + tmp.ha_update_count); + printf("\nTable status:\n\ +Opened tables: %10lu\n\ +Open tables: %10u\n\ +Open files: %10u\n\ +Open streams: %10lu\n", + tmp.opened_tables, + tc_records(), + my_file_opened, + my_stream_opened); + +#ifndef DONT_USE_THR_ALARM + ALARM_INFO alarm_info; + thr_alarm_info(&alarm_info); + printf("\nAlarm status:\n\ +Active alarms: %u\n\ +Max used alarms: %u\n\ +Next alarm time: %lu\n", + alarm_info.active_alarms, + alarm_info.max_used_alarms, + (ulong)alarm_info.next_alarm_time); +#endif + display_table_locks(); +#if defined(HAVE_MALLINFO2) + struct mallinfo2 info = mallinfo2(); +#elif defined(HAVE_MALLINFO) + struct mallinfo info= mallinfo(); +#endif +#if __has_feature(memory_sanitizer) + /* Work around missing MSAN instrumentation */ + MEM_MAKE_DEFINED(&info, sizeof info); +#endif +#if defined(HAVE_MALLINFO) || defined(HAVE_MALLINFO2) + char llbuff[10][22]; + printf("\nMemory status:\n\ +Non-mmapped space allocated from system: %s\n\ +Number of free chunks: %lu\n\ +Number of fastbin blocks: %lu\n\ +Number of mmapped regions: %lu\n\ +Space in mmapped regions: %s\n\ +Maximum total allocated space: %s\n\ +Space available in freed fastbin blocks: %s\n\ +Total allocated space: %s\n\ +Total free space: %s\n\ +Top-most, releasable space: %s\n\ +Estimated memory (with thread stack): %s\n\ +Global memory allocated by server: %s\n\ +Memory allocated by threads: %s\n", + llstr(info.arena, llbuff[0]), + (ulong) info.ordblks, + (ulong) info.smblks, + (ulong) info.hblks, + llstr(info.hblkhd, llbuff[1]), + llstr(info.usmblks, llbuff[2]), + llstr(info.fsmblks, llbuff[3]), + llstr(info.uordblks, llbuff[4]), + llstr(info.fordblks, llbuff[5]), + llstr(info.keepcost, llbuff[6]), + llstr((count + thread_cache.size()) * my_thread_stack_size + + info.hblkhd + info.arena, llbuff[7]), + llstr(tmp.global_memory_used, llbuff[8]), + llstr(tmp.local_memory_used, llbuff[9])); + +#elif defined(HAVE_MALLOC_ZONE) + malloc_statistics_t info; + char llbuff[4][22]; + + malloc_zone_statistics(nullptr, &info); + printf("\nMemory status:\n\ +Total allocated space: %s\n\ +Total free space: %s\n\ +Global memory allocated by server: %s\n\ +Memory allocated by threads: %s\n", + llstr(info.size_allocated, llbuff[0]), + llstr((info.size_allocated - info.size_in_use), llbuff[1]), + llstr(tmp.global_memory_used, llbuff[2]), + llstr(tmp.local_memory_used, llbuff[3])); +#endif + +#ifdef HAVE_EVENT_SCHEDULER + Events::dump_internal_status(); +#endif + puts(""); + fflush(stdout); +} + +void print_keyuse_array_for_trace(THD *thd, DYNAMIC_ARRAY *keyuse_array) +{ + DBUG_ASSERT(thd->trace_started()); + + Json_writer_object wrapper(thd); + Json_writer_array trace_key_uses(thd, "ref_optimizer_key_uses"); + + for (uint i=0; i < keyuse_array->elements; i++) + { + KEYUSE *keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i); + Json_writer_object keyuse_elem(thd); + keyuse_elem.add_table_name(keyuse->table->reginfo.join_tab); + if (keyuse->keypart != FT_KEYPART && !keyuse->is_for_hash_join()) + { + keyuse_elem.add("index", keyuse->table->key_info[keyuse->key].name); + } + keyuse_elem.add("field", (keyuse->keypart == FT_KEYPART) ? "": + (keyuse->is_for_hash_join() ? + keyuse->table->field[keyuse->keypart] + ->field_name.str : + keyuse->table->key_info[keyuse->key] + .key_part[keyuse->keypart] + .field->field_name.str)); + keyuse_elem.add("equals",keyuse->val); + keyuse_elem.add("null_rejecting",keyuse->null_rejecting); + } +} diff --git a/sql/sql_test.h b/sql/sql_test.h new file mode 100644 index 00000000..2ab305ac --- /dev/null +++ b/sql/sql_test.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_TEST_INCLUDED +#define SQL_TEST_INCLUDED + +#include "mysqld.h" +#include "opt_trace_context.h" + +class JOIN; +struct TABLE_LIST; +typedef class Item COND; +typedef class st_select_lex SELECT_LEX; +struct SORT_FIELD; + +#ifndef DBUG_OFF +void print_where(COND *cond,const char *info, enum_query_type query_type); +void TEST_filesort(SORT_FIELD *sortorder,uint s_length); +void TEST_join(JOIN *join); +void print_plan(JOIN* join,uint idx, double record_count, double read_time, + double current_read_time, const char *info); +void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array); +void print_sjm(SJ_MATERIALIZATION_INFO *sjm); +void dump_TABLE_LIST_graph(SELECT_LEX *select_lex, TABLE_LIST* tl); +#endif +void print_keyuse_array_for_trace(THD *thd, DYNAMIC_ARRAY *keyuse_array); +void mysql_print_status(); + +#endif /* SQL_TEST_INCLUDED */ diff --git a/sql/sql_time.cc b/sql/sql_time.cc new file mode 100644 index 00000000..753f78cc --- /dev/null +++ b/sql/sql_time.cc @@ -0,0 +1,1394 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Functions to handle date and time */ + +#include "mariadb.h" +#include "sql_time.h" +#include "tztime.h" // struct Time_zone +#include "sql_class.h" // THD +#include + + +#define MAX_DAY_NUMBER 3652424L + + /* Some functions to calculate dates */ + +/* + Name description of interval names used in statements. + + 'interval_type_to_name' is ordered and sorted on interval size and + interval complexity. + Order of elements in 'interval_type_to_name' should correspond to + the order of elements in 'interval_type' enum + + See also interval_type, interval_names, append_interval +*/ + +LEX_CSTRING interval_type_to_name[INTERVAL_LAST] = { + { STRING_WITH_LEN("YEAR")}, + { STRING_WITH_LEN("QUARTER")}, + { STRING_WITH_LEN("MONTH")}, + { STRING_WITH_LEN("WEEK")}, + { STRING_WITH_LEN("DAY")}, + { STRING_WITH_LEN("HOUR")}, + { STRING_WITH_LEN("MINUTE")}, + { STRING_WITH_LEN("SECOND")}, + { STRING_WITH_LEN("MICROSECOND")}, + { STRING_WITH_LEN("YEAR_MONTH")}, + { STRING_WITH_LEN("DAY_HOUR")}, + { STRING_WITH_LEN("DAY_MINUTE")}, + { STRING_WITH_LEN("DAY_SECOND")}, + { STRING_WITH_LEN("HOUR_MINUTE")}, + { STRING_WITH_LEN("HOUR_SECOND")}, + { STRING_WITH_LEN("MINUTE_SECOND")}, + { STRING_WITH_LEN("DAY_MICROSECOND")}, + { STRING_WITH_LEN("HOUR_MICROSECOND")}, + { STRING_WITH_LEN("MINUTE_MICROSECOND")}, + { STRING_WITH_LEN("SECOND_MICROSECOND")} +}; + +int append_interval(String *str, interval_type int_type, const INTERVAL &interval) +{ + char buf[64]; + size_t len; + switch (int_type) { + case INTERVAL_YEAR: + len= my_snprintf(buf,sizeof(buf),"%lu", interval.year); + break; + case INTERVAL_QUARTER: + case INTERVAL_MONTH: + len= my_snprintf(buf,sizeof(buf),"%lu", interval.month); + int_type=INTERVAL_MONTH; + break; + case INTERVAL_WEEK: + case INTERVAL_DAY: + len= my_snprintf(buf,sizeof(buf),"%lu", interval.day); + int_type=INTERVAL_DAY; + break; + case INTERVAL_HOUR: + len= my_snprintf(buf,sizeof(buf),"%lu", interval.hour); + break; + case INTERVAL_MINUTE: + len= my_snprintf(buf,sizeof(buf),"%llu", interval.minute); + break; + case INTERVAL_SECOND: + len= my_snprintf(buf,sizeof(buf),"%llu", interval.second); + break; + case INTERVAL_MICROSECOND: + len= my_snprintf(buf,sizeof(buf),"%llu", interval.second_part); + break; + case INTERVAL_YEAR_MONTH: + len= my_snprintf(buf,sizeof(buf),"'%lu-%02lu'", + interval.year, interval.month); + break; + case INTERVAL_DAY_HOUR: + len= my_snprintf(buf,sizeof(buf),"'%lu %lu'", interval.day, interval.hour); + break; + case INTERVAL_DAY_MINUTE: + len= my_snprintf(buf,sizeof(buf),"'%lu %lu:%02llu'", + interval.day, interval.hour, interval.minute); + break; + case INTERVAL_DAY_SECOND: + len= my_snprintf(buf,sizeof(buf),"'%lu %lu:%02llu:%02llu'", + interval.day, interval.hour, interval.minute, interval.second); + break; + case INTERVAL_HOUR_MINUTE: + len= my_snprintf(buf,sizeof(buf),"'%lu:%02llu'", interval.hour, interval.minute); + break; + case INTERVAL_HOUR_SECOND: + len= my_snprintf(buf,sizeof(buf),"'%lu:%02llu:%02llu'", + interval.hour, interval.minute, interval.second); + break; + case INTERVAL_MINUTE_SECOND: + len= my_snprintf(buf,sizeof(buf),"'%llu:%02llu'", interval.minute, interval.second); + break; + case INTERVAL_DAY_MICROSECOND: + len= my_snprintf(buf,sizeof(buf),"'%lu %lu:%02llu:%02llu.%06llu'", + interval.day, interval.hour, interval.minute, + interval.second, interval.second_part); + break; + case INTERVAL_HOUR_MICROSECOND: + len= my_snprintf(buf,sizeof(buf),"'%lu:%02llu:%02llu.%06llu'", + interval.hour, interval.minute, interval.second, + interval.second_part); + break; + case INTERVAL_MINUTE_MICROSECOND: + len= my_snprintf(buf,sizeof(buf),"'%llu:%02llu.%06llu'", + interval.minute, interval.second, interval.second_part); + break; + case INTERVAL_SECOND_MICROSECOND: + len= my_snprintf(buf,sizeof(buf),"%llu.%06llu", interval.second, interval.second_part); + break; + default: + DBUG_ASSERT(0); + len= 0; + } + return str->append(buf, len) || str->append(' ') || + str->append(interval_type_to_name + int_type); +} + + +/* + Calc weekday from daynr + Returns 0 for monday, 1 for tuesday ... +*/ + +int calc_weekday(long daynr,bool sunday_first_day_of_week) +{ + DBUG_ENTER("calc_weekday"); + DBUG_RETURN ((int) ((daynr + 5L + (sunday_first_day_of_week ? 1L : 0L)) % 7)); +} + +/* + The bits in week_format has the following meaning: + WEEK_MONDAY_FIRST (0) If not set Sunday is first day of week + If set Monday is first day of week + WEEK_YEAR (1) If not set Week is in range 0-53 + + Week 0 is returned for the the last week of the previous year (for + a date at start of january) In this case one can get 53 for the + first week of next year. This flag ensures that the week is + relevant for the given year. Note that this flag is only + releveant if WEEK_JANUARY is not set. + + If set Week is in range 1-53. + + In this case one may get week 53 for a date in January (when + the week is that last week of previous year) and week 1 for a + date in December. + + WEEK_FIRST_WEEKDAY (2) If not set Weeks are numbered according + to ISO 8601:1988 + If set The week that contains the first + 'first-day-of-week' is week 1. + + ISO 8601:1988 means that if the week containing January 1 has + four or more days in the new year, then it is week 1; + Otherwise it is the last week of the previous year, and the + next week is week 1. +*/ + +uint calc_week(const MYSQL_TIME *l_time, uint week_behaviour, uint *year) +{ + uint days; + ulong daynr=calc_daynr(l_time->year,l_time->month,l_time->day); + ulong first_daynr=calc_daynr(l_time->year,1,1); + bool monday_first= MY_TEST(week_behaviour & WEEK_MONDAY_FIRST); + bool week_year= MY_TEST(week_behaviour & WEEK_YEAR); + bool first_weekday= MY_TEST(week_behaviour & WEEK_FIRST_WEEKDAY); + + uint weekday=calc_weekday(first_daynr, !monday_first); + *year=l_time->year; + + if (l_time->month == 1 && l_time->day <= 7-weekday) + { + if (!week_year && + ((first_weekday && weekday != 0) || + (!first_weekday && weekday >= 4))) + return 0; + week_year= 1; + (*year)--; + first_daynr-= (days=calc_days_in_year(*year)); + weekday= (weekday + 53*7- days) % 7; + } + + if ((first_weekday && weekday != 0) || + (!first_weekday && weekday >= 4)) + days= daynr - (first_daynr+ (7-weekday)); + else + days= daynr - (first_daynr - weekday); + + if (week_year && days >= 52*7) + { + weekday= (weekday + calc_days_in_year(*year)) % 7; + if ((!first_weekday && weekday < 4) || + (first_weekday && weekday == 0)) + { + (*year)++; + return 1; + } + } + return days/7+1; +} + + /* Change a daynr to year, month and day */ + /* Daynr 0 is returned as date 00.00.00 */ + +bool get_date_from_daynr(long daynr,uint *ret_year,uint *ret_month, + uint *ret_day) +{ + uint year,temp,leap_day,day_of_year,days_in_year; + uchar *month_pos; + DBUG_ENTER("get_date_from_daynr"); + + if (daynr < 366 || daynr > MAX_DAY_NUMBER) + DBUG_RETURN(1); + + year= (uint) (daynr*100 / 36525L); + temp=(((year-1)/100+1)*3)/4; + day_of_year=(uint) (daynr - (long) year * 365L) - (year-1)/4 +temp; + while (day_of_year > (days_in_year= calc_days_in_year(year))) + { + day_of_year-=days_in_year; + (year)++; + } + leap_day=0; + if (days_in_year == 366) + { + if (day_of_year > 31+28) + { + day_of_year--; + if (day_of_year == 31+28) + leap_day=1; /* Handle leapyears leapday */ + } + } + *ret_month=1; + for (month_pos= days_in_month ; + day_of_year > (uint) *month_pos ; + day_of_year-= *(month_pos++), (*ret_month)++) + ; + *ret_year=year; + *ret_day=day_of_year+leap_day; + DBUG_RETURN(0); +} + + /* Functions to handle periods */ + +ulong convert_period_to_month(ulong period) +{ + ulong a,b; + if (period == 0 || period > 999912) + return 0L; + if ((a=period/100) < YY_PART_YEAR) + a+=2000; + else if (a < 100) + a+=1900; + b=period%100; + return a*12+b-1; +} + + +ulong convert_month_to_period(ulong month) +{ + ulong year; + if (month == 0L) + return 0L; + if ((year=month/12) < 100) + { + year+=(year < YY_PART_YEAR) ? 2000 : 1900; + } + return year*100+month%12+1; +} + + +bool +check_date_with_warn(THD *thd, const MYSQL_TIME *ltime, + date_conv_mode_t fuzzydate, timestamp_type ts_type) +{ + int unused; + if (check_date(ltime, fuzzydate, &unused)) + { + ErrConvTime str(ltime); + make_truncated_value_warning(thd, Sql_condition::WARN_LEVEL_WARN, + &str, ts_type, nullptr, nullptr, nullptr); + return true; + } + return false; +} + + +bool +adjust_time_range_with_warn(THD *thd, MYSQL_TIME *ltime, uint dec) +{ + MYSQL_TIME copy= *ltime; + ErrConvTime str(©); + int warnings= 0; + if (check_time_range(ltime, dec, &warnings)) + return true; + if (warnings) + thd->push_warning_truncated_wrong_value("time", str.ptr()); + return false; +} + +/* + Convert a string to 8-bit representation, + for use in str_to_time/str_to_date/str_to_date. + + In the future to_ascii() can be extended to convert + non-ASCII digits to ASCII digits + (for example, ARABIC-INDIC, DEVANAGARI, BENGALI, and so on) + so DATE/TIME/DATETIME values understand digits in the + respected scripts. +*/ +static uint +to_ascii(CHARSET_INFO *cs, + const char *src, size_t src_length, + char *dst, size_t dst_length) + +{ + int cnvres; + my_wc_t wc; + const char *srcend= src + src_length; + char *dst0= dst, *dstend= dst + dst_length - 1; + while (dst < dstend && + (cnvres= cs->mb_wc(&wc, + (const uchar*) src, + (const uchar*) srcend)) > 0 && + wc < 128) + { + src+= cnvres; + *dst++= static_cast(wc); + } + *dst= '\0'; + return (uint)(dst - dst0); +} + + +class TemporalAsciiBuffer: public LEX_CSTRING +{ + char cnv[32]; +public: + TemporalAsciiBuffer(const char *str, size_t length, CHARSET_INFO *cs) + { + if ((cs->state & MY_CS_NONASCII) != 0) + { + LEX_CSTRING::str= cnv; + LEX_CSTRING::length= to_ascii(cs, str, length, cnv, sizeof(cnv)); + } + else + { + LEX_CSTRING::str= str; + LEX_CSTRING::length= length; + } + } +}; + + +/* Character set-aware version of ascii_to_datetime_or_date_or_time() */ +bool Temporal::str_to_datetime_or_date_or_time(THD *thd, MYSQL_TIME_STATUS *st, + const char *str, size_t length, + CHARSET_INFO *cs, + date_mode_t fuzzydate) +{ + TemporalAsciiBuffer tmp(str, length, cs); + return ascii_to_datetime_or_date_or_time(st, tmp.str, tmp.length, fuzzydate)|| + add_nanoseconds(thd, &st->warnings, fuzzydate, st->nanoseconds); +} + + +/* Character set-aware version of str_to_datetime_or_date() */ +bool Temporal::str_to_datetime_or_date(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t length, + CHARSET_INFO *cs, + date_mode_t flags) +{ + TemporalAsciiBuffer tmp(str, length, cs); + return ascii_to_datetime_or_date(status, tmp.str, tmp.length, flags) || + add_nanoseconds(thd, &status->warnings, flags, status->nanoseconds); +} + + +/* Character set-aware version of ascii_to_temporal() */ +bool Temporal::str_to_temporal(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t length, CHARSET_INFO *cs, + date_mode_t flags) +{ + TemporalAsciiBuffer tmp(str, length, cs); + return ascii_to_temporal(status, tmp.str, tmp.length, flags) || + add_nanoseconds(thd, &status->warnings, flags, status->nanoseconds); +} + + +/* Character set-aware version of str_to_DDhhmmssff() */ +bool Interval_DDhhmmssff::str_to_DDhhmmssff(MYSQL_TIME_STATUS *status, + const char *str, size_t length, + CHARSET_INFO *cs, ulong max_hour) +{ + TemporalAsciiBuffer tmp(str, length, cs); + bool rc= ::str_to_DDhhmmssff(tmp.str, tmp.length, this, UINT_MAX32, status); + DBUG_ASSERT(status->warnings || !rc); + return rc; +} + + +/* + Convert a timestamp string to a MYSQL_TIME value and produce a warning + if string was truncated during conversion. + + NOTE + See description of str_to_datetime_xxx() for more information. +*/ + +bool +str_to_datetime_with_warn(THD *thd, CHARSET_INFO *cs, + const char *str, size_t length, MYSQL_TIME *to, + date_mode_t mode) +{ + Temporal::Warn_push warn(thd, nullptr, nullptr, nullptr, to, mode); + Temporal_hybrid *t= new(to) Temporal_hybrid(thd, &warn, str, length, cs, mode); + return !t->is_valid_temporal(); +} + + +bool double_to_datetime_with_warn(THD *thd, double value, MYSQL_TIME *ltime, + date_mode_t fuzzydate, + const TABLE_SHARE *s, const char *field_name) +{ + Temporal::Warn_push warn(thd, s ? s->db.str : nullptr, + s ? s->table_name.str : nullptr, + field_name, ltime, fuzzydate); + Temporal_hybrid *t= new (ltime) Temporal_hybrid(thd, &warn, value, fuzzydate); + return !t->is_valid_temporal(); +} + + +bool decimal_to_datetime_with_warn(THD *thd, const my_decimal *value, + MYSQL_TIME *ltime, + date_mode_t fuzzydate, + const TABLE_SHARE *s, const char *field_name) +{ + Temporal::Warn_push warn(thd, s ? s->db.str : nullptr, + s ? s->table_name.str : nullptr, + field_name, ltime, fuzzydate); + Temporal_hybrid *t= new (ltime) Temporal_hybrid(thd, &warn, value, fuzzydate); + return !t->is_valid_temporal(); +} + + +bool int_to_datetime_with_warn(THD *thd, const Longlong_hybrid &nr, + MYSQL_TIME *ltime, + date_mode_t fuzzydate, + const TABLE_SHARE *s, const char *field_name) +{ + /* + Note: conversion from an integer to TIME can overflow to '838:59:59.999999', + so the conversion result can have fractional digits. + */ + Temporal::Warn_push warn(thd, s ? s->db.str : nullptr, + s ? s->table_name.str : nullptr, + field_name, ltime, fuzzydate); + Temporal_hybrid *t= new (ltime) Temporal_hybrid(thd, &warn, nr, fuzzydate); + return !t->is_valid_temporal(); +} + + +/* + Convert a datetime from broken-down MYSQL_TIME representation to + corresponding TIMESTAMP value. + + SYNOPSIS + TIME_to_timestamp() + thd - current thread + t - datetime in broken-down representation, + error_code - 0, if the conversion was successful; + ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value + which is out of TIMESTAMP range; + ER_WARN_INVALID_TIMESTAMP, if t represents value which + doesn't exists (falls into the spring time-gap). + + RETURN + Number seconds in UTC since start of Unix Epoch corresponding to t. + 0 - in case of ER_WARN_DATA_OUT_OF_RANGE +*/ + +my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, uint *error_code) +{ + thd->used|= THD::TIME_ZONE_USED; + return thd->variables.time_zone->TIME_to_gmt_sec(t, error_code); +} + + +/* + Convert a system time structure to TIME +*/ + +void localtime_to_TIME(MYSQL_TIME *to, struct tm *from) +{ + to->neg=0; + to->second_part=0; + to->year= (int) ((from->tm_year+1900) % 10000); + to->month= (int) from->tm_mon+1; + to->day= (int) from->tm_mday; + to->hour= (int) from->tm_hour; + to->minute= (int) from->tm_min; + to->second= (int) from->tm_sec; +} + + +void calc_time_from_sec(MYSQL_TIME *to, ulong seconds, ulong microseconds) +{ + long t_seconds; + // to->neg is not cleared, it may already be set to a useful value + to->time_type= MYSQL_TIMESTAMP_TIME; + to->year= 0; + to->month= 0; + to->day= 0; + to->hour= seconds/3600L; + t_seconds= seconds%3600L; + to->minute= t_seconds/60L; + to->second= t_seconds%60L; + to->second_part= microseconds; +} + + +/* + Parse a format string specification + + SYNOPSIS + parse_date_time_format() + format_type Format of string (time, date or datetime) + format_str String to parse + format_length Length of string + date_time_format Format to fill in + + NOTES + Fills in date_time_format->positions for all date time parts. + + positions marks the position for a datetime element in the format string. + The position array elements are in the following order: + YYYY-DD-MM HH-MM-DD.FFFFFF AM + 0 1 2 3 4 5 6 7 + + If positions[0]= 5, it means that year will be the forth element to + read from the parsed date string. + + RETURN + 0 ok + 1 error +*/ + +bool parse_date_time_format(timestamp_type format_type, + const char *format, uint format_length, + DATE_TIME_FORMAT *date_time_format) +{ + uint offset= 0, separators= 0; + const char *ptr= format, *format_str; + const char *end= ptr+format_length; + uchar *dt_pos= date_time_format->positions; + /* need_p is set if we are using AM/PM format */ + bool need_p= 0, allow_separator= 0; + ulong part_map= 0, separator_map= 0; + const char *parts[16]; + + date_time_format->time_separator= 0; + date_time_format->flag= 0; // For future + + /* + Fill position with 'dummy' arguments to found out if a format tag is + used twice (This limit's the format to 255 characters, but this is ok) + */ + dt_pos[0]= dt_pos[1]= dt_pos[2]= dt_pos[3]= + dt_pos[4]= dt_pos[5]= dt_pos[6]= dt_pos[7]= 255; + + for (; ptr != end; ptr++) + { + if (*ptr == '%' && ptr+1 != end) + { + uint UNINIT_VAR(position); + switch (*++ptr) { + case 'y': // Year + case 'Y': + position= 0; + break; + case 'c': // Month + case 'm': + position= 1; + break; + case 'd': + case 'e': + position= 2; + break; + case 'h': + case 'I': + case 'l': + need_p= 1; // Need AM/PM + /* Fall through */ + case 'k': + case 'H': + position= 3; + break; + case 'i': + position= 4; + break; + case 's': + case 'S': + position= 5; + break; + case 'f': + position= 6; + if (dt_pos[5] != offset-1 || ptr[-2] != '.') + return 1; // Wrong usage of %f + break; + case 'p': // AM/PM + if (offset == 0) // Can't be first + return 0; + position= 7; + break; + default: + return 1; // Unknown controll char + } + if (dt_pos[position] != 255) // Don't allow same tag twice + return 1; + parts[position]= ptr-1; + + /* + If switching from time to date, ensure that all time parts + are used + */ + if (part_map && position <= 2 && !(part_map & (1 | 2 | 4))) + offset=5; + part_map|= (ulong) 1 << position; + dt_pos[position]= offset++; + allow_separator= 1; + } + else + { + /* + Don't allow any characters in format as this could easily confuse + the date reader + */ + if (!allow_separator) + return 1; // No separator here + allow_separator= 0; // Don't allow two separators + separators++; + /* Store in separator_map which parts are punct characters */ + if (my_ispunct(&my_charset_latin1, *ptr)) + separator_map|= (ulong) 1 << (offset-1); + else if (!my_isspace(&my_charset_latin1, *ptr)) + return 1; + } + } + + /* If no %f, specify it after seconds. Move %p up, if necessary */ + if ((part_map & 32) && !(part_map & 64)) + { + dt_pos[6]= dt_pos[5] +1; + parts[6]= parts[5]; // For later test in (need_p) + if (dt_pos[6] == dt_pos[7]) // Move %p one step up if used + dt_pos[7]++; + } + + /* + Check that we have not used a non legal format specifier and that all + format specifiers have been used + + The last test is to ensure that %p is used if and only if + it's needed. + */ + if ((format_type == MYSQL_TIMESTAMP_DATETIME && + !test_all_bits(part_map, (1 | 2 | 4 | 8 | 16 | 32))) || + (format_type == MYSQL_TIMESTAMP_DATE && part_map != (1 | 2 | 4)) || + (format_type == MYSQL_TIMESTAMP_TIME && + !test_all_bits(part_map, 8 | 16 | 32)) || + !allow_separator || // %option should be last + (need_p && dt_pos[6] +1 != dt_pos[7]) || + (need_p ^ (dt_pos[7] != 255))) + return 1; + + if (dt_pos[6] != 255) // If fractional seconds + { + /* remove fractional seconds from later tests */ + uint pos= dt_pos[6] -1; + /* Remove separator before %f from sep map */ + separator_map= ((separator_map & ((ulong) (1 << pos)-1)) | + ((separator_map & ~((ulong) (1 << pos)-1)) >> 1)); + if (part_map & 64) + { + separators--; // There is always a separator + need_p= 1; // force use of separators + } + } + + /* + Remove possible separator before %p from sep_map + (This can either be at position 3, 4, 6 or 7) h.m.d.%f %p + */ + if (dt_pos[7] != 255) + { + if (need_p && parts[7] != parts[6]+2) + separators--; + } + /* + Calculate if %p is in first or last part of the datetime field + + At this point we have either %H-%i-%s %p 'year parts' or + 'year parts' &H-%i-%s %p" as %f was removed above + */ + offset= dt_pos[6] <= 3 ? 3 : 6; + /* Remove separator before %p from sep map */ + separator_map= ((separator_map & ((ulong) (1 << offset)-1)) | + ((separator_map & ~((ulong) (1 << offset)-1)) >> 1)); + + format_str= 0; + switch (format_type) { + case MYSQL_TIMESTAMP_DATE: + format_str= known_date_time_formats[INTERNAL_FORMAT].date_format; + /* fall through */ + case MYSQL_TIMESTAMP_TIME: + if (!format_str) + format_str=known_date_time_formats[INTERNAL_FORMAT].time_format; + + /* + If there is no separators, allow the internal format as we can read + this. If separators are used, they must be between each part + */ + if (format_length == 6 && !need_p && + !my_charset_bin.strnncoll(format, 6, format_str, 6)) + return 0; + if (separator_map == (1 | 2)) + { + if (format_type == MYSQL_TIMESTAMP_TIME) + { + if (*(format+2) != *(format+5)) + break; // Error + /* Store the character used for time formats */ + date_time_format->time_separator= *(format+2); + } + return 0; + } + break; + case MYSQL_TIMESTAMP_DATETIME: + /* + If there is no separators, allow the internal format as we can read + this. If separators are used, they must be between each part. + Between DATE and TIME we also allow space as separator + */ + if ((format_length == 12 && !need_p && + !my_charset_bin.strnncoll( + format, 12, + known_date_time_formats[INTERNAL_FORMAT].datetime_format, + 12)) || + (separators == 5 && separator_map == (1 | 2 | 8 | 16))) + return 0; + break; + default: + DBUG_ASSERT(0); + break; + } + return 1; // Error +} + + +/* + Create a DATE_TIME_FORMAT object from a format string specification + + SYNOPSIS + date_time_format_make() + format_type Format to parse (time, date or datetime) + format_str String to parse + format_length Length of string + + NOTES + The returned object should be freed with my_free() + + RETURN + NULL ponter: Error + new object +*/ + +DATE_TIME_FORMAT +*date_time_format_make(timestamp_type format_type, + const char *format_str, uint format_length) +{ + DATE_TIME_FORMAT tmp; + + if (format_length && format_length < 255 && + !parse_date_time_format(format_type, format_str, + format_length, &tmp)) + { + tmp.format.str= format_str; + tmp.format.length= format_length; + return date_time_format_copy((THD *)0, &tmp); + } + return 0; +} + + +/* + Create a copy of a DATE_TIME_FORMAT object + + SYNOPSIS + date_and_time_format_copy() + thd Set if variable should be allocated in thread mem + format format to copy + + NOTES + The returned object should be freed with my_free() + + RETURN + NULL ponter: Error + new object +*/ + +DATE_TIME_FORMAT *date_time_format_copy(THD *thd, DATE_TIME_FORMAT *format) +{ + DATE_TIME_FORMAT *new_format; + size_t length= sizeof(*format) + format->format.length + 1; + char *format_pos; + + if (thd) + new_format= (DATE_TIME_FORMAT *) thd->alloc(length); + else + new_format= (DATE_TIME_FORMAT *) my_malloc(key_memory_DATE_TIME_FORMAT, + length, MYF(MY_WME)); + if (new_format) + { + /* Put format string after current pos */ + new_format->format.str= format_pos= (char*) (new_format+1); + memcpy((char*) new_format->positions, (char*) format->positions, + sizeof(format->positions)); + new_format->time_separator= format->time_separator; + /* We make the string null terminated for easy printf in SHOW VARIABLES */ + memcpy(format_pos, format->format.str, format->format.length); + format_pos[format->format.length]= 0; + new_format->format.length= format->format.length; + } + return new_format; +} + + +KNOWN_DATE_TIME_FORMAT known_date_time_formats[6]= +{ + {"USA", "%m.%d.%Y", "%Y-%m-%d %H.%i.%s", "%h:%i:%s %p" }, + {"JIS", "%Y-%m-%d", "%Y-%m-%d %H:%i:%s", "%H:%i:%s" }, + {"ISO", "%Y-%m-%d", "%Y-%m-%d %H:%i:%s", "%H:%i:%s" }, + {"EUR", "%d.%m.%Y", "%Y-%m-%d %H.%i.%s", "%H.%i.%s" }, + {"INTERNAL", "%Y%m%d", "%Y%m%d%H%i%s", "%H%i%s" }, + { 0, 0, 0, 0 } +}; + + +const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format, + timestamp_type type) +{ + switch (type) { + case MYSQL_TIMESTAMP_DATE: + return format->date_format; + case MYSQL_TIMESTAMP_DATETIME: + return format->datetime_format; + case MYSQL_TIMESTAMP_TIME: + return format->time_format; + default: + DBUG_ASSERT(0); // Impossible + return 0; + } +} + + +/** + Convert TIME/DATE/DATETIME value to String. + @param l_time DATE value + @param OUT str String to convert to + @param dec Number of fractional digits. +*/ +bool my_TIME_to_str(const MYSQL_TIME *ltime, String *str, uint dec) +{ + if (str->alloc(MAX_DATE_STRING_REP_LENGTH)) + return true; + str->set_charset(&my_charset_numeric); + str->length(my_TIME_to_str(ltime, const_cast(str->ptr()), dec)); + return false; +} + + +void make_truncated_value_warning(THD *thd, + Sql_condition::enum_warning_level level, + const ErrConv *sval, + timestamp_type time_type, + const char *db_name, const char *table_name, + const char *field_name) +{ + const char *type_str= Temporal::type_name_by_timestamp_type(time_type); + return thd->push_warning_wrong_or_truncated_value + (level, time_type <= MYSQL_TIMESTAMP_ERROR, type_str, sval->ptr(), + db_name, table_name, field_name); +} + + +/* Daynumber from year 0 to 9999-12-31 */ +#define COMBINE(X) \ + (((((X)->day * 24LL + (X)->hour) * 60LL + \ + (X)->minute) * 60LL + (X)->second)*1000000LL + \ + (X)->second_part) +#define GET_PART(X, N) X % N ## LL; X/= N ## LL + +bool date_add_interval(THD *thd, MYSQL_TIME *ltime, interval_type int_type, + const INTERVAL &interval, bool push_warn) +{ + long period, sign; + + sign= (interval.neg == (bool)ltime->neg ? 1 : -1); + + switch (int_type) { + case INTERVAL_SECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + case INTERVAL_MINUTE: + case INTERVAL_HOUR: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_HOUR_SECOND: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_DAY_SECOND: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY: + { + longlong usec, daynr; + my_bool neg= 0; + enum enum_mysql_timestamp_type time_type= ltime->time_type; + + if (((ulonglong) interval.day + + (ulonglong) interval.hour / 24 + + (ulonglong) interval.minute / 24 / 60 + + (ulonglong) interval.second / 24 / 60 / 60) > MAX_DAY_NUMBER) + goto invalid_date; + + if (time_type != MYSQL_TIMESTAMP_TIME) + ltime->day+= calc_daynr(ltime->year, ltime->month, 1) - 1; + + usec= COMBINE(ltime) + sign*COMBINE(&interval); + + if (usec < 0) + { + neg= 1; + usec= -usec; + } + + ltime->second_part= GET_PART(usec, 1000000); + ltime->second= GET_PART(usec, 60); + ltime->minute= GET_PART(usec, 60); + ltime->neg^= neg; + + if (time_type == MYSQL_TIMESTAMP_TIME) + { + if (usec > TIME_MAX_HOUR) + goto invalid_date; + ltime->hour= static_cast(usec); + ltime->day= 0; + return 0; + } + else if (ltime->neg) + goto invalid_date; + + if (int_type != INTERVAL_DAY) + ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date + + ltime->hour= GET_PART(usec, 24); + daynr= usec; + + /* Day number from year 0 to 9999-12-31 */ + if (get_date_from_daynr((long) daynr, <ime->year, <ime->month, + <ime->day)) + goto invalid_date; + break; + } + case INTERVAL_WEEK: + period= (calc_daynr(ltime->year,ltime->month,ltime->day) + + sign * (long) interval.day); + /* Daynumber from year 0 to 9999-12-31 */ + if (get_date_from_daynr((long) period,<ime->year,<ime->month, + <ime->day)) + goto invalid_date; + break; + case INTERVAL_YEAR: + ltime->year+= sign * (long) interval.year; + if ((ulong) ltime->year >= 10000L) + goto invalid_date; + if (ltime->month == 2 && ltime->day == 29 && + calc_days_in_year(ltime->year) != 366) + ltime->day=28; // Was leap-year + break; + case INTERVAL_YEAR_MONTH: + case INTERVAL_QUARTER: + case INTERVAL_MONTH: + period= (ltime->year*12 + sign * (long) interval.year*12 + + ltime->month-1 + sign * (long) interval.month); + if ((ulong) period >= 120000L) + goto invalid_date; + ltime->year= (uint) (period / 12); + ltime->month= (uint) (period % 12L)+1; + /* Adjust day if the new month doesn't have enough days */ + if (ltime->day > days_in_month[ltime->month-1]) + { + ltime->day = days_in_month[ltime->month-1]; + if (ltime->month == 2 && calc_days_in_year(ltime->year) == 366) + ltime->day++; // Leap-year + } + break; + default: + goto null_date; + } + + if (ltime->time_type != MYSQL_TIMESTAMP_TIME) + return 0; // Ok + +invalid_date: + if (push_warn) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_DATETIME_FUNCTION_OVERFLOW, + ER_THD(thd, ER_DATETIME_FUNCTION_OVERFLOW), + ltime->time_type == MYSQL_TIMESTAMP_TIME ? + "time" : "datetime"); + } +null_date: + return 1; +} + + +/* + Calculate difference between two datetime values as seconds + microseconds. + + SYNOPSIS + calc_time_diff() + l_time1 - TIME/DATE/DATETIME value + l_time2 - TIME/DATE/DATETIME value + l_sign - 1 absolute values are substracted, + -1 absolute values are added. + seconds_out - Out parameter where difference between + l_time1 and l_time2 in seconds is stored. + microseconds_out- Out parameter where microsecond part of difference + between l_time1 and l_time2 is stored. + + NOTE + This function calculates difference between l_time1 and l_time2 absolute + values. So one should set l_sign and correct result if he want to take + signs into account (i.e. for MYSQL_TIME values). + + RETURN VALUES + Returns sign of difference. + 1 means negative result + 0 means positive result + +*/ + +bool +calc_time_diff(const MYSQL_TIME *l_time1, const MYSQL_TIME *l_time2, + int l_sign, ulonglong *seconds_out, ulong *microseconds_out) +{ + long days; + bool neg; + longlong microseconds; + + /* + We suppose that if first argument is MYSQL_TIMESTAMP_TIME + the second argument should be TIMESTAMP_TIME also. + We should check it before calc_time_diff call. + */ + if (l_time1->time_type == MYSQL_TIMESTAMP_TIME) // Time value + days= (long)l_time1->day - l_sign * (long)l_time2->day; + else + { + days= calc_daynr((uint) l_time1->year, + (uint) l_time1->month, + (uint) l_time1->day); + if (l_time2->time_type == MYSQL_TIMESTAMP_TIME) + days-= l_sign * (long)l_time2->day; + else + days-= l_sign*calc_daynr((uint) l_time2->year, + (uint) l_time2->month, + (uint) l_time2->day); + } + + microseconds= ((longlong)days * SECONDS_IN_24H + + (longlong)(l_time1->hour*3600LL + + l_time1->minute*60L + + l_time1->second) - + l_sign*(longlong)(l_time2->hour*3600LL + + l_time2->minute*60L + + l_time2->second)) * 1000000LL + + (longlong)l_time1->second_part - + l_sign*(longlong)l_time2->second_part; + + neg= 0; + if (microseconds < 0) + { + microseconds= -microseconds; + neg= 1; + } + *seconds_out= (ulonglong) microseconds/1000000L; + *microseconds_out= (ulong) (microseconds%1000000L); + return neg; +} + + +bool calc_time_diff(const MYSQL_TIME *l_time1, const MYSQL_TIME *l_time2, + int l_sign, MYSQL_TIME *l_time3, date_mode_t fuzzydate) +{ + ulonglong seconds; + ulong microseconds; + bzero((char *) l_time3, sizeof(*l_time3)); + l_time3->neg= calc_time_diff(l_time1, l_time2, l_sign, + &seconds, µseconds); + /* + For MYSQL_TIMESTAMP_TIME only: + If first argument was negative and diff between arguments + is non-zero we need to swap sign to get proper result. + */ + if (l_time1->neg && (seconds || microseconds)) + l_time3->neg= 1 - l_time3->neg; // Swap sign of result + + /* + seconds is longlong, when casted to long it may become a small number + even if the original seconds value was too large and invalid. + as a workaround we limit seconds by a large invalid long number + ("invalid" means > TIME_MAX_SECOND) + */ + set_if_smaller(seconds, INT_MAX32); + calc_time_from_sec(l_time3, (ulong) seconds, microseconds); + return ((fuzzydate & TIME_NO_ZERO_DATE) && (seconds == 0) && + (microseconds == 0)); +} + + +/* + Compares 2 MYSQL_TIME structures + + SYNOPSIS + my_time_compare() + + a - first time + b - second time + + RETURN VALUE + -1 - a < b + 0 - a == b + 1 - a > b + +*/ + +int my_time_compare(const MYSQL_TIME *a, const MYSQL_TIME *b) +{ + ulonglong a_t= pack_time(a); + ulonglong b_t= pack_time(b); + + if (a_t < b_t) + return -1; + if (a_t > b_t) + return 1; + + return 0; +} + + +/** + Convert TIME to DATETIME. + @param ltime The value to convert. + @return false on success, true of error (negative time). +*/ +bool time_to_datetime(MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ltime->time_type == MYSQL_TIMESTAMP_TIME); + DBUG_ASSERT(ltime->year == 0); + DBUG_ASSERT(ltime->month == 0); + DBUG_ASSERT(ltime->day == 0); + if (ltime->neg) + return true; + uint day= ltime->hour / 24; + ltime->hour%= 24; + ltime->month= day / 31; + ltime->day= day % 31; + return false; +} + + +/*** Conversion from TIME to DATETIME ***/ + +/* + Simple case: TIME is within normal 24 hours internal. + Mix DATE part of ldate and TIME part of ltime together. +*/ +static void +mix_date_and_time_simple(MYSQL_TIME *ldate, const MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ldate->time_type == MYSQL_TIMESTAMP_DATE || + ldate->time_type == MYSQL_TIMESTAMP_DATETIME); + ldate->hour= ltime->hour; + ldate->minute= ltime->minute; + ldate->second= ltime->second; + ldate->second_part= ltime->second_part; + ldate->time_type= MYSQL_TIMESTAMP_DATETIME; +} + + +/* + Complex case: TIME is negative or outside of the 24 hour interval. +*/ +static void +mix_date_and_time_complex(MYSQL_TIME *ldate, const MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ldate->time_type == MYSQL_TIMESTAMP_DATE || + ldate->time_type == MYSQL_TIMESTAMP_DATETIME); + ulonglong seconds; + ulong days, useconds; + int sign= ltime->neg ? 1 : -1; + ldate->neg= calc_time_diff(ldate, ltime, sign, &seconds, &useconds); + + DBUG_ASSERT(!ldate->neg); + DBUG_ASSERT(ldate->year > 0); + + days= (long) (seconds / SECONDS_IN_24H); + calc_time_from_sec(ldate, seconds % SECONDS_IN_24H, useconds); + get_date_from_daynr(days, &ldate->year, &ldate->month, &ldate->day); + ldate->time_type= MYSQL_TIMESTAMP_DATETIME; +} + + +/** + Mix a date value and a time value. + + @param IN/OUT ldate Date value. + @param ltime Time value. +*/ +static void +mix_date_and_time(MYSQL_TIME *to, const MYSQL_TIME *from) +{ + if (!from->neg && from->hour < 24) + mix_date_and_time_simple(to, from); + else + mix_date_and_time_complex(to, from); +} + + +/** + Get current date in DATE format +*/ +void set_current_date(THD *thd, MYSQL_TIME *to) +{ + thd->variables.time_zone->gmt_sec_to_TIME(to, thd->query_start()); + thd->used|= THD::TIME_ZONE_USED; + datetime_to_date(to); +} + + +/** + 5.5 compatible conversion from TIME to DATETIME +*/ +static bool +time_to_datetime_old(THD *thd, const MYSQL_TIME *from, MYSQL_TIME *to) +{ + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_TIME); + + if (from->neg) + return true; + + /* Set the date part */ + uint day= from->hour / 24; + to->day= day % 31; + to->month= day / 31; + to->year= 0; + /* Set the time part */ + to->hour= from->hour % 24; + to->minute= from->minute; + to->second= from->second; + to->second_part= from->second_part; + /* set sign and type */ + to->neg= 0; + to->time_type= MYSQL_TIMESTAMP_DATETIME; + return false; +} + + +/** + Convert time to datetime. + + The time value is added to the current datetime value. + @param IN ltime Time value to convert from. + @param OUT ltime2 Datetime value to convert to. +*/ +bool +time_to_datetime(THD *thd, const MYSQL_TIME *from, MYSQL_TIME *to) +{ + if (thd->variables.old_behavior & OLD_MODE_ZERO_DATE_TIME_CAST) + return time_to_datetime_old(thd, from, to); + set_current_date(thd, to); + mix_date_and_time(to, from); + return false; +} + + +bool +time_to_datetime_with_warn(THD *thd, + const MYSQL_TIME *from, MYSQL_TIME *to, + date_conv_mode_t fuzzydate) +{ + int warn= 0; + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_TIME); + /* + After time_to_datetime() we need to do check_date(), as + the caller may want TIME_NO_ZERO_DATE or TIME_NO_ZERO_IN_DATE. + Note, the SQL standard time->datetime conversion mode always returns + a valid date based on CURRENT_DATE. So we need to do check_date() + only in the old mode. + */ + if (time_to_datetime(thd, from, to) || + ((thd->variables.old_behavior & OLD_MODE_ZERO_DATE_TIME_CAST) && + check_date(to, fuzzydate, &warn))) + { + ErrConvTime str(from); + thd->push_warning_truncated_wrong_value("datetime", str.ptr()); + return true; + } + return false; +} + + +longlong pack_time(const MYSQL_TIME *my_time) +{ + return ((((((my_time->year * 13ULL + + my_time->month) * 32ULL + + my_time->day) * 24ULL + + my_time->hour) * 60ULL + + my_time->minute) * 60ULL + + my_time->second) * 1000000ULL + + my_time->second_part) * (my_time->neg ? -1 : 1); +} + +#define get_one(WHERE, FACTOR) WHERE= (ulong)(packed % FACTOR); packed/= FACTOR + +void unpack_time(longlong packed, MYSQL_TIME *my_time, + enum_mysql_timestamp_type ts_type) +{ + if ((my_time->neg= packed < 0)) + packed= -packed; + get_one(my_time->second_part, 1000000ULL); + get_one(my_time->second, 60U); + get_one(my_time->minute, 60U); + get_one(my_time->hour, 24U); + get_one(my_time->day, 32U); + get_one(my_time->month, 13U); + my_time->year= (uint)packed; + my_time->time_type= ts_type; + switch (ts_type) { + case MYSQL_TIMESTAMP_TIME: + my_time->hour+= (my_time->month * 32 + my_time->day) * 24; + my_time->month= my_time->day= 0; + break; + case MYSQL_TIMESTAMP_DATE: + my_time->hour= my_time->minute= my_time->second= my_time->second_part= 0; + break; + case MYSQL_TIMESTAMP_NONE: + case MYSQL_TIMESTAMP_ERROR: + DBUG_ASSERT(0); + case MYSQL_TIMESTAMP_DATETIME: + break; + } +} diff --git a/sql/sql_time.h b/sql/sql_time.h new file mode 100644 index 00000000..c918eb6d --- /dev/null +++ b/sql/sql_time.h @@ -0,0 +1,190 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. + Copyright (c) 2011, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_TIME_INCLUDED +#define SQL_TIME_INCLUDED + +#include "sql_basic_types.h" +#include "my_time.h" +#include "mysql_time.h" /* timestamp_type */ +#include "sql_error.h" /* Sql_condition */ +#include "structs.h" /* INTERVAL */ + +typedef enum enum_mysql_timestamp_type timestamp_type; +typedef struct st_date_time_format DATE_TIME_FORMAT; +typedef struct st_known_date_time_format KNOWN_DATE_TIME_FORMAT; + +/* Flags for calc_week() function. */ +#define WEEK_MONDAY_FIRST 1 +#define WEEK_YEAR 2 +#define WEEK_FIRST_WEEKDAY 4 + +ulong convert_period_to_month(ulong period); +ulong convert_month_to_period(ulong month); +void set_current_date(THD *thd, MYSQL_TIME *to); +bool time_to_datetime(MYSQL_TIME *ltime); +bool get_date_from_daynr(long daynr,uint *year, uint *month, uint *day); +my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, uint *error_code); +bool str_to_datetime_with_warn(THD *thd, + CHARSET_INFO *cs, const char *str, size_t length, + MYSQL_TIME *l_time, + date_mode_t flags); +bool double_to_datetime_with_warn(THD *thd, double value, MYSQL_TIME *ltime, + date_mode_t fuzzydate, + const TABLE_SHARE *s, const char *name); +bool decimal_to_datetime_with_warn(THD *thd, + const my_decimal *value, MYSQL_TIME *ltime, + date_mode_t fuzzydate, + const TABLE_SHARE *s, const char *name); +bool int_to_datetime_with_warn(THD *thd, const Longlong_hybrid &nr, + MYSQL_TIME *ltime, + date_mode_t fuzzydate, + const TABLE_SHARE *s, const char *name); + +bool time_to_datetime(THD *thd, const MYSQL_TIME *tm, MYSQL_TIME *dt); +bool time_to_datetime_with_warn(THD *thd, + const MYSQL_TIME *tm, MYSQL_TIME *dt, + date_conv_mode_t fuzzydate); + +inline void datetime_to_date(MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ltime->time_type == MYSQL_TIMESTAMP_DATE || + ltime->time_type == MYSQL_TIMESTAMP_DATETIME); + DBUG_ASSERT(ltime->neg == 0); + ltime->second_part= ltime->hour= ltime->minute= ltime->second= 0; + ltime->time_type= MYSQL_TIMESTAMP_DATE; +} +inline void date_to_datetime(MYSQL_TIME *ltime) +{ + DBUG_ASSERT(ltime->time_type == MYSQL_TIMESTAMP_DATE || + ltime->time_type == MYSQL_TIMESTAMP_DATETIME); + DBUG_ASSERT(ltime->neg == 0); + ltime->time_type= MYSQL_TIMESTAMP_DATETIME; +} +void make_truncated_value_warning(THD *thd, + Sql_condition::enum_warning_level level, + const ErrConv *str_val, + timestamp_type time_type, + const char *db_name, const char *table_name, + const char *field_name); + +extern DATE_TIME_FORMAT *date_time_format_make(timestamp_type format_type, + const char *format_str, + uint format_length); +extern DATE_TIME_FORMAT *date_time_format_copy(THD *thd, + DATE_TIME_FORMAT *format); +const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format, + timestamp_type type); +bool my_TIME_to_str(const MYSQL_TIME *ltime, String *str, uint dec); + +/* MYSQL_TIME operations */ +bool date_add_interval(THD *thd, MYSQL_TIME *ltime, interval_type int_type, + const INTERVAL &interval, bool push_warn= true); +bool calc_time_diff(const MYSQL_TIME *l_time1, const MYSQL_TIME *l_time2, + int l_sign, ulonglong *seconds_out, ulong *microseconds_out); +int append_interval(String *str, interval_type int_type, + const INTERVAL &interval); +/** + Calculate time difference between two MYSQL_TIME values and + store the result as an out MYSQL_TIME value in MYSQL_TIMESTAMP_TIME format. + + The result can be outside of the supported TIME range. + For example, calc_time_diff('2002-01-01 00:00:00', '2001-01-01 00:00:00') + returns '8760:00:00'. So the caller might want to do check_time_range() or + adjust_time_range_with_warn() on the result of a calc_time_diff() call. + + @param l_time1 - the minuend (TIME/DATE/DATETIME value) + @param l_time2 - the subtrahend TIME/DATE/DATETIME value + @param l_sign - +1 if absolute values are to be subtracted, + or -1 if absolute values are to be added. + @param[out] l_time3 - the result + @param fuzzydate - flags + + @return true - if TIME_NO_ZERO_DATE was passed in flags and + the result appeared to be '00:00:00.000000'. + This is important when calc_time_diff() is called + when calculating DATE_ADD(TIMEDIFF(...),...) + @return false - otherwise +*/ +bool calc_time_diff(const MYSQL_TIME *l_time1, const MYSQL_TIME *l_time2, + int lsign, MYSQL_TIME *l_time3, date_mode_t fuzzydate); +int my_time_compare(const MYSQL_TIME *a, const MYSQL_TIME *b); +void localtime_to_TIME(MYSQL_TIME *to, struct tm *from); + +void calc_time_from_sec(MYSQL_TIME *to, ulong seconds, ulong microseconds); +uint calc_week(const MYSQL_TIME *l_time, uint week_behaviour, uint *year); + +int calc_weekday(long daynr,bool sunday_first_day_of_week); +bool parse_date_time_format(timestamp_type format_type, + const char *format, uint format_length, + DATE_TIME_FORMAT *date_time_format); + +/* convenience wrapper */ +inline bool parse_date_time_format(timestamp_type format_type, + DATE_TIME_FORMAT *date_time_format) +{ + return parse_date_time_format(format_type, + date_time_format->format.str, + (uint) date_time_format->format.length, + date_time_format); +} + + +extern DATE_TIME_FORMAT global_date_format; +extern DATE_TIME_FORMAT global_datetime_format; +extern DATE_TIME_FORMAT global_time_format; +extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[]; +extern LEX_CSTRING interval_type_to_name[]; + +static inline bool +non_zero_hhmmssuu(const MYSQL_TIME *ltime) +{ + return ltime->hour || ltime->minute || ltime->second || ltime->second_part; +} +static inline bool +non_zero_YYMMDD(const MYSQL_TIME *ltime) +{ + return ltime->year || ltime->month || ltime->day; +} +static inline bool +non_zero_date(const MYSQL_TIME *ltime) +{ + return non_zero_YYMMDD(ltime) || + (ltime->time_type == MYSQL_TIMESTAMP_DATETIME && + non_zero_hhmmssuu(ltime)); +} +static inline bool +check_date(const MYSQL_TIME *ltime, date_conv_mode_t flags, int *was_cut) +{ + return check_date(ltime, non_zero_date(ltime), + ulonglong(flags & TIME_MODE_FOR_XXX_TO_DATE), was_cut); +} +bool check_date_with_warn(THD *thd, const MYSQL_TIME *ltime, + date_conv_mode_t fuzzy_date, timestamp_type ts_type); +static inline bool +check_date_with_warn(THD *thd, const MYSQL_TIME *ltime, + date_mode_t fuzzydate, timestamp_type ts_type) +{ + return check_date_with_warn(thd, ltime, date_conv_mode_t(fuzzydate), ts_type); +} + +bool adjust_time_range_with_warn(THD *thd, MYSQL_TIME *ltime, uint dec); + +longlong pack_time(const MYSQL_TIME *my_time); +void unpack_time(longlong packed, MYSQL_TIME *my_time, + enum_mysql_timestamp_type ts_type); + +#endif /* SQL_TIME_INCLUDED */ diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc new file mode 100644 index 00000000..067b921e --- /dev/null +++ b/sql/sql_trigger.cc @@ -0,0 +1,2810 @@ +/* + Copyright (c) 2004, 2012, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#define MYSQL_LEX 1 +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "unireg.h" +#include "sp_head.h" +#include "sql_trigger.h" +#include "sql_parse.h" // parse_sql +#include "parse_file.h" +#include "sp.h" +#include "sql_base.h" +#include "sql_show.h" // append_definer, append_identifier +#include "sql_table.h" // build_table_filename, + // check_n_cut_mysql50_prefix +#include "sql_db.h" // get_default_db_collation +#include "sql_handler.h" // mysql_ha_rm_tables +#include "sp_cache.h" // sp_invalidate_cache +#include +#include "ddl_log.h" // ddl_log_state +#include "debug_sync.h" // DEBUG_SYNC +#include "debug.h" // debug_crash_here +#include "mysql/psi/mysql_sp.h" +#include "wsrep_mysqld.h" +#include +#include + +/*************************************************************************/ + +static bool add_table_for_trigger_internal(THD *thd, + const sp_name *trg_name, + bool if_exists, + TABLE_LIST **table, + char *trn_path_buff); + +/* + Functions for TRIGGER_RENAME_PARAM +*/ + +void TRIGGER_RENAME_PARAM::reset() +{ + delete table.triggers; + table.triggers= 0; + free_root(&table.mem_root, MYF(0)); +} + + +/** + Trigger_creation_ctx -- creation context of triggers. +*/ + +class Trigger_creation_ctx : public Stored_program_creation_ctx, + public Sql_alloc +{ +public: + static Trigger_creation_ctx *create(THD *thd, + const char *db_name, + const char *table_name, + const LEX_CSTRING *client_cs_name, + const LEX_CSTRING *connection_cl_name, + const LEX_CSTRING *db_cl_name); + + Trigger_creation_ctx(CHARSET_INFO *client_cs, + CHARSET_INFO *connection_cl, + CHARSET_INFO *db_cl) + :Stored_program_creation_ctx(client_cs, connection_cl, db_cl) + { } + + virtual Stored_program_creation_ctx *clone(MEM_ROOT *mem_root) + { + return new (mem_root) Trigger_creation_ctx(m_client_cs, + m_connection_cl, + m_db_cl); + } + +protected: + virtual Object_creation_ctx *create_backup_ctx(THD *thd) const + { + return new Trigger_creation_ctx(thd); + } + + Trigger_creation_ctx(THD *thd) + :Stored_program_creation_ctx(thd) + { } +}; + +/************************************************************************** + Trigger_creation_ctx implementation. +**************************************************************************/ + +Trigger_creation_ctx * +Trigger_creation_ctx::create(THD *thd, + const char *db_name, + const char *table_name, + const LEX_CSTRING *client_cs_name, + const LEX_CSTRING *connection_cl_name, + const LEX_CSTRING *db_cl_name) +{ + CHARSET_INFO *client_cs; + CHARSET_INFO *connection_cl; + CHARSET_INFO *db_cl; + + bool invalid_creation_ctx= FALSE; + myf utf8_flag= thd->get_utf8_flag(); + + if (resolve_charset(client_cs_name->str, + thd->variables.character_set_client, + &client_cs, MYF(utf8_flag))) + { + sql_print_warning("Trigger for table '%s'.'%s': " + "invalid character_set_client value (%s).", + (const char *) db_name, + (const char *) table_name, + (const char *) client_cs_name->str); + + invalid_creation_ctx= TRUE; + } + + if (resolve_collation(connection_cl_name->str, + thd->variables.collation_connection, + &connection_cl,MYF(utf8_flag))) + { + sql_print_warning("Trigger for table '%s'.'%s': " + "invalid collation_connection value (%s).", + (const char *) db_name, + (const char *) table_name, + (const char *) connection_cl_name->str); + + invalid_creation_ctx= TRUE; + } + + if (resolve_collation(db_cl_name->str, NULL, &db_cl, MYF(utf8_flag))) + { + sql_print_warning("Trigger for table '%s'.'%s': " + "invalid database_collation value (%s).", + (const char *) db_name, + (const char *) table_name, + (const char *) db_cl_name->str); + + invalid_creation_ctx= TRUE; + } + + if (invalid_creation_ctx) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_TRG_INVALID_CREATION_CTX, + ER_THD(thd, ER_TRG_INVALID_CREATION_CTX), + (const char *) db_name, + (const char *) table_name); + } + + /* + If we failed to resolve the database collation, load the default one + from the disk. + */ + + if (!db_cl) + db_cl= get_default_db_collation(thd, db_name); + + return new Trigger_creation_ctx(client_cs, connection_cl, db_cl); +} + +/*************************************************************************/ + +static const LEX_CSTRING triggers_file_type= + { STRING_WITH_LEN("TRIGGERS") }; + +const char * const TRG_EXT= ".TRG"; + +/** + Table of .TRG file field descriptors. + We have here only one field now because in nearest future .TRG + files will be merged into .FRM files (so we don't need something + like md5 or created fields). +*/ +static File_option triggers_file_parameters[]= +{ + { + { STRING_WITH_LEN("triggers") }, + my_offsetof(class Table_triggers_list, definitions_list), + FILE_OPTIONS_STRLIST + }, + { + { STRING_WITH_LEN("sql_modes") }, + my_offsetof(class Table_triggers_list, definition_modes_list), + FILE_OPTIONS_ULLLIST + }, + { + { STRING_WITH_LEN("definers") }, + my_offsetof(class Table_triggers_list, definers_list), + FILE_OPTIONS_STRLIST + }, + { + { STRING_WITH_LEN("client_cs_names") }, + my_offsetof(class Table_triggers_list, client_cs_names), + FILE_OPTIONS_STRLIST + }, + { + { STRING_WITH_LEN("connection_cl_names") }, + my_offsetof(class Table_triggers_list, connection_cl_names), + FILE_OPTIONS_STRLIST + }, + { + { STRING_WITH_LEN("db_cl_names") }, + my_offsetof(class Table_triggers_list, db_cl_names), + FILE_OPTIONS_STRLIST + }, + { + { STRING_WITH_LEN("created") }, + my_offsetof(class Table_triggers_list, hr_create_times), + FILE_OPTIONS_ULLLIST + }, + { { 0, 0 }, 0, FILE_OPTIONS_STRING } +}; + +File_option sql_modes_parameters= +{ + { STRING_WITH_LEN("sql_modes") }, + my_offsetof(class Table_triggers_list, definition_modes_list), + FILE_OPTIONS_ULLLIST +}; + +/** + This must be kept up to date whenever a new option is added to the list + above, as it specifies the number of required parameters of the trigger in + .trg file. + This defines the maximum number of parameters that is read. If there are + more paramaters in the file they are ignored. Less number of parameters + is regarded as ok. +*/ + +static const int TRG_NUM_REQUIRED_PARAMETERS= 7; + +/* + Structure representing contents of .TRN file which are used to support + database wide trigger namespace. +*/ + +struct st_trigname +{ + LEX_CSTRING trigger_table; +}; + +static const LEX_CSTRING trigname_file_type= + { STRING_WITH_LEN("TRIGGERNAME") }; + +const char * const TRN_EXT= ".TRN"; + +static File_option trigname_file_parameters[]= +{ + { + { STRING_WITH_LEN("trigger_table")}, + offsetof(struct st_trigname, trigger_table), + FILE_OPTIONS_ESTRING + }, + { { 0, 0 }, 0, FILE_OPTIONS_STRING } +}; + + +class Handle_old_incorrect_sql_modes_hook: public Unknown_key_hook +{ +private: + const char *path; +public: + Handle_old_incorrect_sql_modes_hook(const char *file_path) + :path(file_path) + {}; + virtual bool process_unknown_string(const char *&unknown_key, uchar* base, + MEM_ROOT *mem_root, const char *end); +}; + + +class Handle_old_incorrect_trigger_table_hook: public Unknown_key_hook +{ +public: + Handle_old_incorrect_trigger_table_hook(const char *file_path, + LEX_CSTRING *trigger_table_arg) + :path(file_path), trigger_table_value(trigger_table_arg) + {}; + virtual bool process_unknown_string(const char *&unknown_key, uchar* base, + MEM_ROOT *mem_root, const char *end); +private: + const char *path; + LEX_CSTRING *trigger_table_value; +}; + + +/** + An error handler that catches all non-OOM errors which can occur during + parsing of trigger body. Such errors are ignored and corresponding error + message is used to construct a more verbose error message which contains + name of problematic trigger. This error message is later emitted when + one tries to perform DML or some of DDL on this table. + Also, if possible, grabs name of the trigger being parsed so it can be + used to correctly drop problematic trigger. +*/ +class Deprecated_trigger_syntax_handler : public Internal_error_handler +{ +private: + + char m_message[MYSQL_ERRMSG_SIZE]; + const LEX_CSTRING *m_trigger_name; + +public: + + Deprecated_trigger_syntax_handler() : m_trigger_name(NULL) {} + + virtual bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* message, + Sql_condition ** cond_hdl) + { + if (sql_errno != EE_OUTOFMEMORY && + sql_errno != ER_OUT_OF_RESOURCES) + { + // Check if the current LEX contains a non-empty spname + if(thd->lex->spname) + m_trigger_name= &thd->lex->spname->m_name; + else if (thd->lex->sphead) + { + /* + Some SP statements, for example IF, create their own local LEX. + All LEX instances are available in the LEX stack in sphead::m_lex. + Let's find the one that contains a non-zero spname. + Note, although a parse error has happened, the LEX instances + in sphead::m_lex are not freed yet at this point. The first + found non-zero spname contains the valid trigger name. + */ + const sp_name *spname= thd->lex->sphead->find_spname_recursive(); + if (spname) + m_trigger_name= &spname->m_name; + } + if (m_trigger_name) + my_snprintf(m_message, sizeof(m_message), + ER_THD(thd, ER_ERROR_IN_TRIGGER_BODY), + m_trigger_name->str, message); + else + my_snprintf(m_message, sizeof(m_message), + ER_THD(thd, ER_ERROR_IN_UNKNOWN_TRIGGER_BODY), message); + return true; + } + return false; + } + + const LEX_CSTRING *get_trigger_name() { return m_trigger_name; } + char *get_error_message() { return m_message; } +}; + + +Trigger::~Trigger() +{ + sp_head::destroy(body); +} + + +/** + Call a Table_triggers_list function for all triggers + + @return 0 ok + @return # Something went wrong. Pointer to the trigger that mailfuncted + returned +*/ + +Trigger* Table_triggers_list::for_all_triggers(Triggers_processor func, + void *arg) +{ + for (uint i= 0; i < (uint)TRG_EVENT_MAX; i++) + { + for (uint j= 0; j < (uint)TRG_ACTION_MAX; j++) + { + for (Trigger *trigger= get_trigger(i,j) ; + trigger ; + trigger= trigger->next) + if ((trigger->*func)(arg)) + return trigger; + } + } + return 0; +} + + +/** + Create or drop trigger for table. + + @param thd current thread context (including trigger definition in LEX) + @param tables table list containing one table for which trigger is created. + @param create whenever we create (TRUE) or drop (FALSE) trigger + + @note + This function is mainly responsible for opening and locking of table and + invalidation of all its instances in table cache after trigger creation. + Real work on trigger creation/dropping is done inside Table_triggers_list + methods. + + @todo + TODO: We should check if user has TRIGGER privilege for table here. + Now we just require SUPER privilege for creating/dropping because + we don't have proper privilege checking for triggers in place yet. + + @retval + FALSE Success + @retval + TRUE error +*/ + +bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) +{ + /* + FIXME: The code below takes too many different paths depending on the + 'create' flag, so that the justification for a single function + 'mysql_create_or_drop_trigger', compared to two separate functions + 'mysql_create_trigger' and 'mysql_drop_trigger' is not apparent. + This is a good candidate for a minor refactoring. + */ + TABLE *table; + bool result= true, refresh_metadata= false; + bool add_if_exists_to_binlog= false, action_executed= false; + String stmt_query; + bool lock_upgrade_done= FALSE; + bool backup_of_table_list_done= 0;; + MDL_ticket *mdl_ticket= NULL; + MDL_request mdl_request_for_trn; + Query_tables_list backup; + DDL_LOG_STATE ddl_log_state, ddl_log_state_tmp_file; + char trn_path_buff[FN_REFLEN]; + char path[FN_REFLEN + 1]; + + DBUG_ENTER("mysql_create_or_drop_trigger"); + + /* Charset of the buffer for statement must be system one. */ + stmt_query.set_charset(system_charset_info); + bzero(&ddl_log_state, sizeof(ddl_log_state)); + bzero(&ddl_log_state_tmp_file, sizeof(ddl_log_state_tmp_file)); + + /* + QQ: This function could be merged in mysql_alter_table() function + But do we want this ? + */ + + /* + Note that once we will have check for TRIGGER privilege in place we won't + need second part of condition below, since check_access() function also + checks that db is specified. + */ + if (!thd->lex->spname->m_db.length || (create && !tables->db.length)) + { + my_error(ER_NO_DB_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* + We don't allow creating triggers on tables in the 'mysql' schema + */ + if (create && lex_string_eq(&tables->db, STRING_WITH_LEN("mysql"))) + { + my_error(ER_NO_TRIGGERS_ON_SYSTEM_SCHEMA, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* + There is no DETERMINISTIC clause for triggers, so can't check it. + But a trigger can in theory be used to do nasty things (if it supported + DROP for example) so we do the check for privileges. For now there is + already a stronger test right above; but when this stronger test will + be removed, the test below will hold. Because triggers have the same + nature as functions regarding binlogging: their body is implicitly + binlogged, so they share the same danger, so trust_function_creators + applies to them too. + */ + if (!trust_function_creators && + (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && + !(thd->security_ctx->master_access & PRIV_LOG_BIN_TRUSTED_SP_CREATOR)) + { + my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* Protect against concurrent create/drop */ + MDL_REQUEST_INIT(&mdl_request_for_trn, MDL_key::TRIGGER, + create ? tables->db.str : thd->lex->spname->m_db.str, + thd->lex->spname->m_name.str, + MDL_EXCLUSIVE, MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(&mdl_request_for_trn, + thd->variables.lock_wait_timeout)) + goto end; + + if (!create) + { + bool if_exists= thd->lex->if_exists(); + + /* + Protect the query table list from the temporary and potentially + destructive changes necessary to open the trigger's table. + */ + backup_of_table_list_done= 1; + thd->lex->reset_n_backup_query_tables_list(&backup); + /* + Restore Query_tables_list::sql_command, which was + reset above, as the code that writes the query to the + binary log assumes that this value corresponds to the + statement that is being executed. + */ + thd->lex->sql_command= backup.sql_command; + + if (opt_readonly && + !(thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY) && + !thd->slave_thread) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + goto end; + } + + if (add_table_for_trigger_internal(thd, thd->lex->spname, if_exists, &tables, + trn_path_buff)) + goto end; + + if (!tables) + { + DBUG_ASSERT(if_exists); + /* + Since the trigger does not exist, there is no associated table, + and therefore : + - no TRIGGER privileges to check, + - no trigger to drop, + - no table to lock/modify, + so the drop statement is successful. + */ + result= FALSE; + /* Still, we need to log the query ... */ + stmt_query.set(thd->query(), thd->query_length(), system_charset_info); + action_executed= 1; + goto end; + } + } + + /* + Check that the user has TRIGGER privilege on the subject table. + */ + { + bool err_status; + TABLE_LIST **save_query_tables_own_last= thd->lex->query_tables_own_last; + thd->lex->query_tables_own_last= 0; + + err_status= check_table_access(thd, TRIGGER_ACL, tables, FALSE, 1, FALSE); + + thd->lex->query_tables_own_last= save_query_tables_own_last; + + if (err_status) + goto end; + } + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, tables); + + /* We should have only one table in table list. */ + DBUG_ASSERT(tables->next_global == 0); + + build_table_filename(path, sizeof(path) - 1, tables->db.str, tables->alias.str, ".frm", 0); + tables->required_type= dd_frm_type(NULL, path, NULL, NULL, NULL); + + /* We do not allow creation of triggers on temporary tables or sequence. */ + if (tables->required_type == TABLE_TYPE_SEQUENCE || + (create && thd->find_tmp_table_share(tables))) + { + my_error(ER_TRG_ON_VIEW_OR_TEMP_TABLE, MYF(0), tables->alias.str); + goto end; + } + + /* We also don't allow creation of triggers on views. */ + tables->required_type= TABLE_TYPE_NORMAL; + /* + Also prevent DROP TRIGGER from opening temporary table which might + shadow the subject table on which trigger to be dropped is defined. + */ + tables->open_type= OT_BASE_ONLY; + + /* Keep consistent with respect to other DDL statements */ + mysql_ha_rm_tables(thd, tables); + + if (thd->locked_tables_mode) + { + /* Under LOCK TABLES we must only accept write locked tables. */ + if (!(tables->table= find_table_for_mdl_upgrade(thd, tables->db.str, + tables->table_name.str, + NULL))) + goto end; + } + else + { + tables->table= open_n_lock_single_table(thd, tables, + TL_READ_NO_INSERT, 0); + if (! tables->table) + { + if (!create && thd->get_stmt_da()->sql_errno() == ER_NO_SUCH_TABLE) + { + /* TRN file exists but table does not. Drop the orphan trigger */ + thd->clear_error(); // Remove error from open + goto drop_orphan_trn; + } + goto end; + } + tables->table->use_all_columns(); + } + table= tables->table; + +#ifdef WITH_WSREP + if (WSREP(thd) && !wsrep_should_replicate_ddl(thd, table->s->db_type())) + goto end; +#endif + + /* Later on we will need it to downgrade the lock */ + mdl_ticket= table->mdl_ticket; + + /* + RENAME ensures that table is flushed properly and locked tables will + be removed from the active transaction + */ + if (wait_while_table_is_used(thd, table, HA_EXTRA_PREPARE_FOR_RENAME)) + goto end; + + lock_upgrade_done= TRUE; + + if (!table->triggers) + { + if (!create) + goto drop_orphan_trn; + if (!(table->triggers= new (&table->mem_root) Table_triggers_list(table))) + goto end; + } + +#if defined WITH_WSREP && defined ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.mdev_20225", + { + const char act[]= + "now " + "wait_for signal.mdev_20225_continue"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif /* WITH_WSREP && ENABLED_DEBUG_SYNC */ + + if (create) + result= table->triggers->create_trigger(thd, tables, &stmt_query, + &ddl_log_state, + &ddl_log_state_tmp_file); + else + { + result= table->triggers->drop_trigger(thd, tables, + &thd->lex->spname->m_name, + &stmt_query, + &ddl_log_state); + if (result) + { + thd->clear_error(); // Remove error from drop trigger + goto drop_orphan_trn; + } + } + action_executed= 1; + + refresh_metadata= TRUE; + +end: + if (!result && action_executed) + { + ulonglong save_option_bits= thd->variables.option_bits; + backup_log_info ddl_log; + + debug_crash_here("ddl_log_drop_before_binlog"); + if (add_if_exists_to_binlog) + thd->variables.option_bits|= OPTION_IF_EXISTS; + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + result= write_bin_log(thd, TRUE, stmt_query.ptr(), + stmt_query.length()); + thd->binlog_xid= 0; + thd->variables.option_bits= save_option_bits; + debug_crash_here("ddl_log_drop_after_binlog"); + + bzero(&ddl_log, sizeof(ddl_log)); + if (create) + ddl_log.query= { C_STRING_WITH_LEN("CREATE") }; + else + ddl_log.query= { C_STRING_WITH_LEN("DROP") }; + ddl_log.org_storage_engine_name= { C_STRING_WITH_LEN("TRIGGER") }; + ddl_log.org_database= thd->lex->spname->m_db; + ddl_log.org_table= thd->lex->spname->m_name; + backup_log_ddl(&ddl_log); + } + ddl_log_complete(&ddl_log_state); + debug_crash_here("ddl_log_drop_before_delete_tmp"); + /* delete any created log files */ + result|= ddl_log_revert(thd, &ddl_log_state_tmp_file); + + if (mdl_request_for_trn.ticket) + thd->mdl_context.release_lock(mdl_request_for_trn.ticket); + + if (refresh_metadata) + { + close_all_tables_for_name(thd, table->s, HA_EXTRA_NOT_USED, NULL); + + /* + Reopen the table if we were under LOCK TABLES. + Ignore the return value for now. It's better to + keep master/slave in consistent state. + */ + if (thd->locked_tables_list.reopen_tables(thd, false)) + thd->clear_error(); + + /* + Invalidate SP-cache. That's needed because triggers may change list of + pre-locking tables. + */ + sp_cache_invalidate(); + } + /* + If we are under LOCK TABLES we should restore original state of + meta-data locks. Otherwise all locks will be released along + with the implicit commit. + */ + if (thd->locked_tables_mode && tables && lock_upgrade_done) + mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + + /* Restore the query table list. Used only for drop trigger. */ + if (backup_of_table_list_done) + thd->lex->restore_backup_query_tables_list(&backup); + + if (!result) + { + my_ok(thd); + /* Drop statistics for this stored program from performance schema. */ + MYSQL_DROP_SP(SP_TYPE_TRIGGER, + thd->lex->spname->m_db.str, static_cast(thd->lex->spname->m_db.length), + thd->lex->spname->m_name.str, static_cast(thd->lex->spname->m_name.length)); + } + + DBUG_RETURN(result); + +#ifdef WITH_WSREP +wsrep_error_label: + DBUG_ASSERT(result == 1); + goto end; +#endif + +drop_orphan_trn: + my_error(ER_REMOVED_ORPHAN_TRIGGER, MYF(ME_WARNING), + thd->lex->spname->m_name.str, tables->table_name.str); + mysql_file_delete(key_file_trg, trn_path_buff, MYF(0)); + result= thd->is_error(); + add_if_exists_to_binlog= 1; + action_executed= 1; // Ensure query is binlogged + stmt_query.set(thd->query(), thd->query_length(), system_charset_info); + goto end; +} + + +/** + Build stmt_query to write it in the bin-log, the statement to write in + the trigger file and the trigger definer. + + @param thd current thread context (including trigger definition in + LEX) + @param tables table list containing one open table for which the + trigger is created. + @param[out] stmt_query after successful return, this string contains + well-formed statement for creation this trigger. + @param[out] trigger_def query to be stored in trigger file. As stmt_query, + but without "OR REPLACE" and no FOLLOWS/PRECEDES. + @param[out] trg_definer The triggger definer. + @param[out] trg_definer_holder Used as a buffer for definer. + + @note + - Assumes that trigger name is fully qualified. + - NULL-string means the following LEX_STRING instance: + { str = 0; length = 0 }. + - In other words, definer_user and definer_host should contain + simultaneously NULL-strings (non-SUID/old trigger) or valid strings + (SUID/new trigger). +*/ + +static void build_trig_stmt_query(THD *thd, TABLE_LIST *tables, + String *stmt_query, String *trigger_def, + LEX_CSTRING *trg_definer, + char trg_definer_holder[]) +{ + LEX_CSTRING stmt_definition; + LEX *lex= thd->lex; + size_t prefix_trimmed, suffix_trimmed; + size_t original_length; + + /* + Create a query with the full trigger definition. + The original query is not appropriate, as it can miss the DEFINER=XXX part. + */ + stmt_query->append(STRING_WITH_LEN("CREATE ")); + + trigger_def->copy(*stmt_query); + + if (lex->create_info.or_replace()) + stmt_query->append(STRING_WITH_LEN("OR REPLACE ")); + + if (lex->sphead->suid() != SP_IS_NOT_SUID) + { + /* SUID trigger */ + lex->definer->set_lex_string(trg_definer, trg_definer_holder); + append_definer(thd, stmt_query, &lex->definer->user, &lex->definer->host); + append_definer(thd, trigger_def, &lex->definer->user, &lex->definer->host); + } + else + { + *trg_definer= empty_clex_str; + } + + + /* Create statement for binary logging */ + stmt_definition.str= lex->stmt_definition_begin; + stmt_definition.length= (lex->stmt_definition_end - + lex->stmt_definition_begin); + original_length= stmt_definition.length; + trim_whitespace(thd->charset(), &stmt_definition, &prefix_trimmed); + suffix_trimmed= original_length - stmt_definition.length - prefix_trimmed; + + stmt_query->append(stmt_definition.str, stmt_definition.length); + + /* Create statement for storing trigger (without trigger order) */ + if (lex->trg_chistics.ordering_clause == TRG_ORDER_NONE) + { + /* + Not that here stmt_definition doesn't end with a \0, which is + normally expected from a LEX_CSTRING + */ + trigger_def->append(stmt_definition.str, stmt_definition.length); + } + else + { + /* Copy data before FOLLOWS/PRECEDES trigger_name */ + trigger_def->append(stmt_definition.str, + (lex->trg_chistics.ordering_clause_begin - + lex->stmt_definition_begin) - prefix_trimmed); + /* Copy data after FOLLOWS/PRECEDES trigger_name */ + trigger_def->append(stmt_definition.str + + (lex->trg_chistics.ordering_clause_end - + lex->stmt_definition_begin) + - prefix_trimmed, + (lex->stmt_definition_end - + lex->trg_chistics.ordering_clause_end) - + suffix_trimmed); + } +} + + +/** + Create trigger for table. + + @param thd current thread context (including trigger definition in + LEX) + @param tables table list containing one open table for which the + trigger is created. + @param[out] stmt_query after successful return, this string contains + well-formed statement for creation this trigger. + + @note + - Assumes that trigger name is fully qualified. + - NULL-string means the following LEX_STRING instance: + { str = 0; length = 0 }. + - In other words, definer_user and definer_host should contain + simultaneously NULL-strings (non-SUID/old trigger) or valid strings + (SUID/new trigger). + + @retval + False success + @retval + True error +*/ + +bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, + String *stmt_query, + DDL_LOG_STATE *ddl_log_state, + DDL_LOG_STATE *ddl_log_state_tmp_file) +{ + LEX *lex= thd->lex; + TABLE *table= tables->table; + char file_buff[FN_REFLEN], trigname_buff[FN_REFLEN]; + char backup_file_buff[FN_REFLEN]; + char trg_definer_holder[USER_HOST_BUFF_SIZE]; + LEX_CSTRING backup_name= { backup_file_buff, 0 }; + LEX_CSTRING file, trigname_file; + Item_trigger_field *trg_field; + struct st_trigname trigname; + String trigger_definition; + Trigger *trigger= 0; + int error; + bool trigger_exists; + DBUG_ENTER("create_trigger"); + + if (check_for_broken_triggers()) + DBUG_RETURN(true); + + /* Trigger must be in the same schema as target table. */ + if (lex_string_cmp(table_alias_charset, &table->s->db, &lex->spname->m_db)) + { + my_error(ER_TRG_IN_WRONG_SCHEMA, MYF(0)); + DBUG_RETURN(true); + } + + if (sp_process_definer(thd)) + DBUG_RETURN(true); + + /* + Let us check if all references to fields in old/new versions of row in + this trigger are ok. + + NOTE: We do it here more from ease of use standpoint. We still have to + do some checks on each execution. E.g. we can catch privilege changes + only during execution. Also in near future, when we will allow access + to other tables from trigger we won't be able to catch changes in other + tables... + + Since we don't plan to access to contents of the fields it does not + matter that we choose for both OLD and NEW values the same versions + of Field objects here. + */ + old_field= new_field= table->field; + + for (trg_field= lex->trg_table_fields.first; + trg_field; trg_field= trg_field->next_trg_field) + { + /* + NOTE: now we do not check privileges at CREATE TRIGGER time. This will + be changed in the future. + */ + trg_field->setup_field(thd, table, NULL); + + if (trg_field->fix_fields_if_needed(thd, (Item **)0)) + DBUG_RETURN(true); + } + + /* Ensure anchor trigger exists */ + if (lex->trg_chistics.ordering_clause != TRG_ORDER_NONE) + { + if (!(trigger= find_trigger(&lex->trg_chistics.anchor_trigger_name, 0)) || + trigger->event != lex->trg_chistics.event || + trigger->action_time != lex->trg_chistics.action_time) + { + my_error(ER_REFERENCED_TRG_DOES_NOT_EXIST, MYF(0), + lex->trg_chistics.anchor_trigger_name.str); + DBUG_RETURN(true); + } + } + + /* + Here we are creating file with triggers and save all triggers in it. + sql_create_definition_file() files handles renaming and backup of older + versions + */ + file.length= build_table_filename(file_buff, FN_REFLEN - 1, + tables->db.str, tables->table_name.str, + TRG_EXT, 0); + file.str= file_buff; + trigname_file.length= build_table_filename(trigname_buff, FN_REFLEN-1, + tables->db.str, + lex->spname->m_name.str, + TRN_EXT, 0); + trigname_file.str= trigname_buff; + + /* Use the filesystem to enforce trigger namespace constraints. */ + trigger_exists= !access(trigname_file.str, F_OK); + + ddl_log_create_trigger(ddl_log_state, &tables->db, &tables->table_name, + &lex->spname->m_name, + trigger_exists || table->triggers->count ? + DDL_CREATE_TRIGGER_PHASE_DELETE_COPY : + DDL_CREATE_TRIGGER_PHASE_NO_OLD_TRIGGER); + + /* Make a backup of the .TRG file that we can restore in case of crash */ + if (table->triggers->count && + (sql_backup_definition_file(&file, &backup_name) || + ddl_log_delete_tmp_file(ddl_log_state_tmp_file, &backup_name, + ddl_log_state))) + DBUG_RETURN(true); + + if (trigger_exists) + { + if (lex->create_info.or_replace()) + { + LEX_CSTRING *sp_name= &thd->lex->spname->m_name; // alias + + /* Make a backup of the .TRN file that we can restore in case of crash */ + if (sql_backup_definition_file(&trigname_file, &backup_name) || + ddl_log_delete_tmp_file(ddl_log_state_tmp_file, &backup_name, + ddl_log_state)) + DBUG_RETURN(true); + ddl_log_update_phase(ddl_log_state, DDL_CREATE_TRIGGER_PHASE_OLD_COPIED); + + /* + The following can fail if the trigger is for another table or + there exists a .TRN file but there was no trigger for it in + the .TRG file + */ + if (unlikely(drop_trigger(thd, tables, sp_name, 0, 0))) + DBUG_RETURN(true); + } + else if (lex->create_info.if_not_exists()) + { + strxnmov(trigname_buff, sizeof(trigname_buff) - 1, tables->db.str, ".", + lex->spname->m_name.str, NullS); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_TRG_ALREADY_EXISTS, + ER_THD(thd, ER_TRG_ALREADY_EXISTS), + trigname_buff); + LEX_CSTRING trg_definer_tmp; + String trigger_def; + + /* + Log query with IF NOT EXISTS to binary log. This is in line with + CREATE TABLE IF NOT EXISTS. + */ + build_trig_stmt_query(thd, tables, stmt_query, &trigger_def, + &trg_definer_tmp, trg_definer_holder); + DBUG_RETURN(false); + } + else + { + strxnmov(trigname_buff, sizeof(trigname_buff) - 1, tables->db.str, ".", + lex->spname->m_name.str, NullS); + my_error(ER_TRG_ALREADY_EXISTS, MYF(0), trigname_buff); + DBUG_RETURN(true); + } + } + else + { + if (table->triggers->count) + ddl_log_update_phase(ddl_log_state, DDL_CREATE_TRIGGER_PHASE_OLD_COPIED); + } + + trigname.trigger_table= tables->table_name; + + /* + We are not using lex->sphead here as an argument to Trigger() as we are + going to access lex->sphead later in build_trig_stmt_query() + */ + if (!(trigger= new (&table->mem_root) Trigger(this, 0))) + goto err; + + /* Time with in microseconds */ + trigger->hr_create_time= make_hr_time(thd->query_start(), + thd->query_start_sec_part()); + + /* Create trigger_name.TRN file to ensure trigger name is unique */ + if (sql_create_definition_file(NULL, &trigname_file, &trigname_file_type, + (uchar*)&trigname, trigname_file_parameters)) + { + delete trigger; + trigger= 0; + goto err; + } + + /* Populate the trigger object */ + + trigger->sql_mode= thd->variables.sql_mode; + build_trig_stmt_query(thd, tables, stmt_query, &trigger_definition, + &trigger->definer, trg_definer_holder); + + trigger->definition.str= trigger_definition.c_ptr(); + trigger->definition.length= trigger_definition.length(); + + /* + Fill character set information: + - client character set contains charset info only; + - connection collation contains pair {character set, collation}; + - database collation contains pair {character set, collation}; + */ + trigger->client_cs_name= thd->charset()->cs_name; + trigger->connection_cl_name= thd->variables.collation_connection->coll_name; + trigger->db_cl_name= get_default_db_collation(thd, tables->db.str)->coll_name; + trigger->name= lex->spname->m_name; + + /* Add trigger in it's correct place */ + add_trigger(lex->trg_chistics.event, + lex->trg_chistics.action_time, + lex->trg_chistics.ordering_clause, + &lex->trg_chistics.anchor_trigger_name, + trigger); + + /* Create trigger definition file .TRG */ + if (unlikely(create_lists_needed_for_files(thd->mem_root))) + goto err; + + debug_crash_here("ddl_log_create_before_create_trigger"); + error= sql_create_definition_file(NULL, &file, &triggers_file_type, + (uchar*)this, triggers_file_parameters); + debug_crash_here("ddl_log_create_after_create_trigger"); + + if (!error) + DBUG_RETURN(false); + +err: + DBUG_PRINT("error",("create trigger failed")); + if (trigger) + { + my_debug_put_break_here(); + /* Delete trigger from trigger list if it exists */ + find_trigger(&trigger->name, 1); + /* Free trigger memory */ + delete trigger; + } + + /* Recover the old .TRN and .TRG files & delete backup files */ + ddl_log_revert(thd, ddl_log_state); + /* All backup files are now deleted */ + ddl_log_complete(ddl_log_state_tmp_file); + DBUG_RETURN(true); +} + + +/** + Empty all list used to load and create .TRG file +*/ + +void Table_triggers_list::empty_lists() +{ + definitions_list.empty(); + definition_modes_list.empty(); + definers_list.empty(); + client_cs_names.empty(); + connection_cl_names.empty(); + db_cl_names.empty(); + hr_create_times.empty(); +} + + +/** + Create list of all trigger parameters for sql_create_definition_file() +*/ + +struct create_lists_param +{ + MEM_ROOT *root; +}; + + +bool Table_triggers_list::create_lists_needed_for_files(MEM_ROOT *root) +{ + create_lists_param param; + + empty_lists(); + param.root= root; + + return for_all_triggers(&Trigger::add_to_file_list, ¶m); +} + + +bool Trigger::add_to_file_list(void* param_arg) +{ + create_lists_param *param= (create_lists_param*) param_arg; + MEM_ROOT *mem_root= param->root; + + if (base->definitions_list.push_back(&definition, mem_root) || + base->definition_modes_list.push_back(&sql_mode, mem_root) || + base->definers_list.push_back(&definer, mem_root) || + base->client_cs_names.push_back(&client_cs_name, mem_root) || + base->connection_cl_names.push_back(&connection_cl_name, mem_root) || + base->db_cl_names.push_back(&db_cl_name, mem_root) || + base->hr_create_times.push_back(&hr_create_time.val, mem_root)) + return 1; + return 0; +} + + + +/** + Deletes the .TRG file for a table. + + @param path char buffer of size FN_REFLEN to be used + for constructing path to .TRG file. + @param db table's database name + @param table_name table's name + + @retval + False success + @retval + True error +*/ + +static bool rm_trigger_file(char *path, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, myf MyFlags) +{ + build_table_filename(path, FN_REFLEN-1, db->str, table_name->str, TRG_EXT, 0); + return mysql_file_delete(key_file_trg, path, MyFlags); +} + + +/** + Deletes the .TRN file for a trigger. + + @param path char buffer of size FN_REFLEN to be used + for constructing path to .TRN file. + @param db trigger's database name + @param trigger_name trigger's name + + @retval + False success + @retval + True error +*/ + +bool rm_trigname_file(char *path, const LEX_CSTRING *db, + const LEX_CSTRING *trigger_name, myf MyFlags) +{ + build_table_filename(path, FN_REFLEN - 1, db->str, trigger_name->str, + TRN_EXT, 0); + return mysql_file_delete(key_file_trn, path, MyFlags); +} + + +/** + Helper function that saves .TRG file for Table_triggers_list object. + + @param triggers Table_triggers_list object for which file should be saved + @param db Name of database for subject table + @param table_name Name of subject table + + @retval + FALSE Success + @retval + TRUE Error +*/ + +bool Table_triggers_list::save_trigger_file(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *table_name) +{ + char file_buff[FN_REFLEN]; + LEX_CSTRING file; + DBUG_ENTER("Table_triggers_list::save_trigger_file"); + + if (create_lists_needed_for_files(thd->mem_root)) + DBUG_RETURN(true); + + file.length= build_table_filename(file_buff, FN_REFLEN - 1, db->str, table_name->str, + TRG_EXT, 0); + file.str= file_buff; + DBUG_RETURN(sql_create_definition_file(NULL, &file, &triggers_file_type, + (uchar*) this, + triggers_file_parameters)); +} + + +/** + Find a trigger with a given name + + @param name Name of trigger + @param remove_from_list If set, remove trigger if found +*/ + +Trigger *Table_triggers_list::find_trigger(const LEX_CSTRING *name, + bool remove_from_list) +{ + for (uint i= 0; i < (uint)TRG_EVENT_MAX; i++) + { + for (uint j= 0; j < (uint)TRG_ACTION_MAX; j++) + { + Trigger **parent, *trigger; + + for (parent= &triggers[i][j]; + (trigger= *parent); + parent= &trigger->next) + { + if (lex_string_cmp(table_alias_charset, + &trigger->name, name) == 0) + { + if (remove_from_list) + { + *parent= trigger->next; + count--; + } + return trigger; + } + } + } + } + return 0; +} + + +/** + Drop trigger for table. + + @param thd current thread context + (including trigger definition in LEX) + @param tables table list containing one open table for which trigger + is dropped. + @param[out] stmt_query after successful return, this string contains + well-formed statement for creation this trigger. + + @todo + Probably instead of removing .TRG file we should move + to archive directory but this should be done as part of + parse_file.cc functionality (because we will need it + elsewhere). + + @retval + False success + @retval + True error +*/ + +bool Table_triggers_list::drop_trigger(THD *thd, TABLE_LIST *tables, + LEX_CSTRING *sp_name, + String *stmt_query, + DDL_LOG_STATE *ddl_log_state) +{ + char path[FN_REFLEN]; + Trigger *trigger; + DBUG_ENTER("Table_triggers_list::drop_trigger"); + + if (stmt_query) + stmt_query->set(thd->query(), thd->query_length(), stmt_query->charset()); + + /* Find and delete trigger from list */ + if (!(trigger= find_trigger(sp_name, true))) + { + my_message(ER_TRG_DOES_NOT_EXIST, ER_THD(thd, ER_TRG_DOES_NOT_EXIST), + MYF(0)); + DBUG_RETURN(1); + } + delete trigger; + + if (ddl_log_state) + { + LEX_CSTRING query= {0,0}; + if (stmt_query) + { + /* This code is executed in case of DROP TRIGGER */ + lex_string_set3(&query, thd->query(), thd->query_length()); + } + if (ddl_log_drop_trigger(ddl_log_state, + &tables->db, &tables->table_name, + sp_name, &query)) + goto err; + } + debug_crash_here("ddl_log_drop_before_drop_trigger"); + + if (!count) // If no more triggers + { + /* + It is safe to remove the trigger file. If something goes wrong during + drop or create ddl_log recovery will ensure that all related + trigger files are deleted or the original ones are restored. + */ + if (rm_trigger_file(path, &tables->db, &tables->table_name, MYF(MY_WME))) + goto err; + } + else + { + if (save_trigger_file(thd, &tables->db, &tables->table_name)) + goto err; + } + + debug_crash_here("ddl_log_drop_before_drop_trn"); + + if (rm_trigname_file(path, &tables->db, sp_name, MYF(MY_WME))) + goto err; + + debug_crash_here("ddl_log_drop_after_drop_trigger"); + + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + +Table_triggers_list::~Table_triggers_list() +{ + DBUG_ENTER("Table_triggers_list::~Table_triggers_list"); + + for (uint i= 0; i < (uint)TRG_EVENT_MAX; i++) + { + for (uint j= 0; j < (uint)TRG_ACTION_MAX; j++) + { + Trigger *next, *trigger; + for (trigger= get_trigger(i,j) ; trigger ; trigger= next) + { + next= trigger->next; + delete trigger; + } + } + } + + /* Free blobs used in insert */ + if (record0_field) + for (Field **fld_ptr= record0_field; *fld_ptr; fld_ptr++) + (*fld_ptr)->free(); + + if (record1_field) + for (Field **fld_ptr= record1_field; *fld_ptr; fld_ptr++) + delete *fld_ptr; + + DBUG_VOID_RETURN; +} + + +/** + Prepare array of Field objects referencing to TABLE::record[1] instead + of record[0] (they will represent OLD.* row values in ON UPDATE trigger + and in ON DELETE trigger which will be called during REPLACE execution). + + @param table pointer to TABLE object for which we are creating fields. + + @retval + False success + @retval + True error +*/ + +bool Table_triggers_list::prepare_record_accessors(TABLE *table) +{ + Field **fld, **trg_fld; + + if ((has_triggers(TRG_EVENT_INSERT,TRG_ACTION_BEFORE) || + has_triggers(TRG_EVENT_UPDATE,TRG_ACTION_BEFORE)) && + (table->s->stored_fields != table->s->null_fields)) + + { + int null_bytes= (table->s->fields - table->s->null_fields + 7)/8; + if (!(extra_null_bitmap= (uchar*)alloc_root(&table->mem_root, null_bytes))) + return 1; + if (!(record0_field= (Field **)alloc_root(&table->mem_root, + (table->s->fields + 1) * + sizeof(Field*)))) + return 1; + + uchar *null_ptr= extra_null_bitmap; + uchar null_bit= 1; + for (fld= table->field, trg_fld= record0_field; *fld; fld++, trg_fld++) + { + if (!(*fld)->null_ptr && !(*fld)->vcol_info && !(*fld)->vers_sys_field()) + { + Field *f; + if (!(f= *trg_fld= (*fld)->make_new_field(&table->mem_root, table, + table == (*fld)->table))) + return 1; + + f->flags= (*fld)->flags; + f->invisible= (*fld)->invisible; + f->null_ptr= null_ptr; + f->null_bit= null_bit; + if (null_bit == 128) + null_ptr++, null_bit= 1; + else + null_bit*= 2; + } + else + *trg_fld= *fld; + } + *trg_fld= 0; + DBUG_ASSERT(null_ptr <= extra_null_bitmap + null_bytes); + bzero(extra_null_bitmap, null_bytes); + } + else + { + record0_field= table->field; + } + + if (has_triggers(TRG_EVENT_UPDATE,TRG_ACTION_BEFORE) || + has_triggers(TRG_EVENT_UPDATE,TRG_ACTION_AFTER) || + has_triggers(TRG_EVENT_DELETE,TRG_ACTION_BEFORE) || + has_triggers(TRG_EVENT_DELETE,TRG_ACTION_AFTER)) + { + if (!(record1_field= (Field **)alloc_root(&table->mem_root, + (table->s->fields + 1) * + sizeof(Field*)))) + return 1; + + for (fld= table->field, trg_fld= record1_field; *fld; fld++, trg_fld++) + { + if (!(*trg_fld= (*fld)->make_new_field(&table->mem_root, table, + table == (*fld)->table))) + return 1; + (*trg_fld)->move_field_offset((my_ptrdiff_t)(table->record[1] - + table->record[0])); + } + *trg_fld= 0; + } + return 0; +} + + +/** + Check whenever .TRG file for table exist and load all triggers it contains. + + @param thd current thread context + @param db table's database name + @param table_name table's name + @param table pointer to table object + @param names_only stop after loading trigger names + + @todo + A lot of things to do here e.g. how about other funcs and being + more paranoical ? + + @todo + This could be avoided if there is no triggers for UPDATE and DELETE. + + @retval + False no triggers or triggers where correctly loaded + @retval + True error (wrong trigger file) +*/ + +bool Table_triggers_list::check_n_load(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, + TABLE *table, + bool names_only) +{ + char path_buff[FN_REFLEN]; + LEX_CSTRING path; + File_parser *parser; + LEX_CSTRING save_db; + DBUG_ENTER("Table_triggers_list::check_n_load"); + + path.length= build_table_filename(path_buff, FN_REFLEN - 1, + db->str, table_name->str, TRG_EXT, 0); + path.str= path_buff; + + // QQ: should we analyze errno somehow ? + if (access(path_buff, F_OK)) + DBUG_RETURN(0); + + /* File exists so we got to load triggers */ + + if ((parser= sql_parse_prepare(&path, &table->mem_root, 1))) + { + if (is_equal(&triggers_file_type, parser->type())) + { + Handle_old_incorrect_sql_modes_hook sql_modes_hook(path.str); + LEX_CSTRING *trg_create_str; + ulonglong *trg_sql_mode, *trg_create_time; + Trigger *trigger; + Table_triggers_list *trigger_list= + new (&table->mem_root) Table_triggers_list(table); + if (unlikely(!trigger_list)) + goto error; + + if (parser->parse((uchar*)trigger_list, &table->mem_root, + triggers_file_parameters, + TRG_NUM_REQUIRED_PARAMETERS, + &sql_modes_hook)) + goto error; + + List_iterator_fast it(trigger_list->definitions_list); + + if (!trigger_list->definitions_list.is_empty() && + (trigger_list->client_cs_names.is_empty() || + trigger_list->connection_cl_names.is_empty() || + trigger_list->db_cl_names.is_empty())) + { + /* We will later use the current character sets */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRG_NO_CREATION_CTX, + ER_THD(thd, ER_TRG_NO_CREATION_CTX), + db->str, + table_name->str); + } + + table->triggers= trigger_list; + status_var_increment(thd->status_var.feature_trigger); + + List_iterator_fast itm(trigger_list->definition_modes_list); + List_iterator_fast it_definer(trigger_list->definers_list); + List_iterator_fast it_client_cs_name(trigger_list->client_cs_names); + List_iterator_fast it_connection_cl_name(trigger_list->connection_cl_names); + List_iterator_fast it_db_cl_name(trigger_list->db_cl_names); + List_iterator_fast + it_create_times(trigger_list->hr_create_times); + LEX *old_lex= thd->lex; + LEX lex; + sp_rcontext *save_spcont= thd->spcont; + sql_mode_t save_sql_mode= thd->variables.sql_mode; + + thd->lex= &lex; + + save_db= thd->db; + thd->reset_db(db); + while ((trg_create_str= it++)) + { + sp_head *sp; + sql_mode_t sql_mode; + LEX_CSTRING *trg_definer; + Trigger_creation_ctx *creation_ctx; + + /* + It is old file format then sql_mode may not be filled in. + We use one mode (current) for all triggers, because we have not + information about mode in old format. + */ + sql_mode= ((trg_sql_mode= itm++) ? *trg_sql_mode : + (ulonglong) global_system_variables.sql_mode); + + trg_create_time= it_create_times++; // May be NULL if old file + trg_definer= it_definer++; // May be NULL if old file + + thd->variables.sql_mode= sql_mode; + + Parser_state parser_state; + if (parser_state.init(thd, (char*) trg_create_str->str, + trg_create_str->length)) + goto err_with_lex_cleanup; + + if (!trigger_list->client_cs_names.is_empty()) + creation_ctx= Trigger_creation_ctx::create(thd, + db->str, + table_name->str, + it_client_cs_name++, + it_connection_cl_name++, + it_db_cl_name++); + else + { + /* Old file with not stored character sets. Use current */ + creation_ctx= new + Trigger_creation_ctx(thd->variables.character_set_client, + thd->variables.collation_connection, + thd->variables.collation_database); + } + + lex_start(thd); + thd->spcont= NULL; + + /* The following is for catching parse errors */ + lex.trg_chistics.event= TRG_EVENT_MAX; + lex.trg_chistics.action_time= TRG_ACTION_MAX; + Deprecated_trigger_syntax_handler error_handler; + thd->push_internal_handler(&error_handler); + + bool parse_error= parse_sql(thd, & parser_state, creation_ctx); + thd->pop_internal_handler(); + DBUG_ASSERT(!parse_error || lex.sphead == 0); + + /* + Not strictly necessary to invoke this method here, since we know + that we've parsed CREATE TRIGGER and not an + UPDATE/DELETE/INSERT/REPLACE/LOAD/CREATE TABLE, but we try to + maintain the invariant that this method is called for each + distinct statement, in case its logic is extended with other + types of analyses in future. + */ + lex.set_trg_event_type_for_tables(); + + if (lex.sphead) + lex.sphead->m_sql_mode= sql_mode; + + if (unlikely(!(trigger= (new (&table->mem_root) + Trigger(trigger_list, lex.sphead))))) + goto err_with_lex_cleanup; + lex.sphead= NULL; /* Prevent double cleanup. */ + + sp= trigger->body; + + trigger->sql_mode= sql_mode; + trigger->definition= *trg_create_str; + trigger->hr_create_time.val= trg_create_time ? *trg_create_time : 0; + /* + Fix time if in 100th of second (comparison with max uint * 100 + (max possible timestamp in the old format)) + */ + if (trigger->hr_create_time.val < 429496729400ULL) + trigger->hr_create_time.val*= 10000; + trigger->name= sp ? sp->m_name : empty_clex_str; + trigger->on_table_name.str= (char*) lex.raw_trg_on_table_name_begin; + trigger->on_table_name.length= (lex.raw_trg_on_table_name_end - + lex.raw_trg_on_table_name_begin); + + /* Copy pointers to character sets to make trigger easier to use */ + trigger->client_cs_name= creation_ctx->get_client_cs()->cs_name; + trigger->connection_cl_name= creation_ctx->get_connection_cl()->coll_name; + trigger->db_cl_name= creation_ctx->get_db_cl()->coll_name; + + /* event can only be TRG_EVENT_MAX in case of fatal parse errors */ + if (lex.trg_chistics.event != TRG_EVENT_MAX) + trigger_list->add_trigger(lex.trg_chistics.event, + lex.trg_chistics.action_time, + TRG_ORDER_NONE, + &lex.trg_chistics.anchor_trigger_name, + trigger); + + if (unlikely(parse_error)) + { + const LEX_CSTRING *name; + + /* + In case of errors, disable all triggers for the table, but keep + the wrong trigger around to allow the user to fix it + */ + if (!trigger_list->m_has_unparseable_trigger) + trigger_list->set_parse_error_message(error_handler.get_error_message()); + /* Currently sphead is always set to NULL in case of a parse error */ + DBUG_ASSERT(lex.sphead == 0); + lex_end(&lex); + + if (likely((name= error_handler.get_trigger_name()))) + { + trigger->name= safe_lexcstrdup_root(&table->mem_root, *name); + if (unlikely(!trigger->name.str)) + goto err_with_lex_cleanup; + } + trigger->definer= ((!trg_definer || !trg_definer->length) ? + empty_clex_str : *trg_definer); + continue; + } + + sp->m_sql_mode= sql_mode; + sp->set_creation_ctx(creation_ctx); + + if (!trg_definer || !trg_definer->length) + { + /* + This trigger was created/imported from the previous version of + MySQL, which does not support trigger_list definers. We should emit + warning here. + */ + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_TRG_NO_DEFINER, + ER_THD(thd, ER_TRG_NO_DEFINER), + db->str, sp->m_name.str); + + /* + Set definer to the '' to correct displaying in the information + schema. + */ + + sp->set_definer("", 0); + trigger->definer= empty_clex_str; + + /* + trigger_list without definer information are executed under the + authorization of the invoker. + */ + + sp->set_suid(SP_IS_NOT_SUID); + } + else + { + sp->set_definer(trg_definer->str, trg_definer->length); + trigger->definer= *trg_definer; + } + + sp->m_sp_share= MYSQL_GET_SP_SHARE(SP_TYPE_TRIGGER, + sp->m_db.str, static_cast(sp->m_db.length), + sp->m_name.str, static_cast(sp->m_name.length)); + +#ifndef DBUG_OFF + /* + Let us check that we correctly update trigger definitions when we + rename tables with trigger_list. + + In special cases like "RENAME TABLE `#mysql50#somename` TO `somename`" + or "ALTER DATABASE `#mysql50#somename` UPGRADE DATA DIRECTORY NAME" + we might be given table or database name with "#mysql50#" prefix (and + trigger's definiton contains un-prefixed version of the same name). + To remove this prefix we use check_n_cut_mysql50_prefix(). + */ + + char fname[SAFE_NAME_LEN + 1]; + DBUG_ASSERT((!my_strcasecmp(table_alias_charset, lex.query_tables->db.str, db->str) || + (check_n_cut_mysql50_prefix(db->str, fname, sizeof(fname)) && + !my_strcasecmp(table_alias_charset, lex.query_tables->db.str, fname)))); + DBUG_ASSERT((!my_strcasecmp(table_alias_charset, lex.query_tables->table_name.str, table_name->str) || + (check_n_cut_mysql50_prefix(table_name->str, fname, sizeof(fname)) && + !my_strcasecmp(table_alias_charset, lex.query_tables->table_name.str, fname)))); +#endif + if (names_only) + { + lex_end(&lex); + continue; + } + + /* + Gather all Item_trigger_field objects representing access to fields + in old/new versions of row in trigger into lists containing all such + objects for the trigger_list with same action and timing. + */ + trigger->trigger_fields= lex.trg_table_fields.first; + /* + Also let us bind these objects to Field objects in table being + opened. + + We ignore errors here, because if even something is wrong we still + will be willing to open table to perform some operations (e.g. + SELECT)... + Anyway some things can be checked only during trigger execution. + */ + for (Item_trigger_field *trg_field= lex.trg_table_fields.first; + trg_field; + trg_field= trg_field->next_trg_field) + { + trg_field->setup_field(thd, table, + &trigger->subject_table_grants); + } + + lex_end(&lex); + } + thd->reset_db(&save_db); + thd->lex= old_lex; + thd->spcont= save_spcont; + thd->variables.sql_mode= save_sql_mode; + + if (!names_only && trigger_list->prepare_record_accessors(table)) + goto error; + + /* Ensure no one is accidently using the temporary load lists */ + trigger_list->empty_lists(); + DBUG_RETURN(0); + +err_with_lex_cleanup: + lex_end(&lex); + thd->lex= old_lex; + thd->spcont= save_spcont; + thd->variables.sql_mode= save_sql_mode; + thd->reset_db(&save_db); + /* Fall trough to error */ + } + } + +error: + if (unlikely(!thd->is_error())) + { + /* + We don't care about this error message much because .TRG files will + be merged into .FRM anyway. + */ + my_error(ER_WRONG_OBJECT, MYF(0), + table_name->str, TRG_EXT + 1, "TRIGGER"); + } + DBUG_RETURN(1); +} + + +/** + Add trigger in the correct position according to ordering clause + Also update action order + + If anchor_trigger doesn't exist, add it last. +*/ + +void Table_triggers_list::add_trigger(trg_event_type event, + trg_action_time_type action_time, + trigger_order_type ordering_clause, + LEX_CSTRING *anchor_trigger_name, + Trigger *trigger) +{ + Trigger **parent= &triggers[event][action_time]; + uint position= 0; + + for ( ; *parent ; parent= &(*parent)->next, position++) + { + if (ordering_clause != TRG_ORDER_NONE && + !lex_string_cmp(table_alias_charset, anchor_trigger_name, + &(*parent)->name)) + { + if (ordering_clause == TRG_ORDER_FOLLOWS) + { + parent= &(*parent)->next; // Add after this one + position++; + } + break; + } + } + + /* Add trigger where parent points to */ + trigger->next= *parent; + *parent= trigger; + + /* Update action_orders and position */ + trigger->event= event; + trigger->action_time= action_time; + trigger->action_order= ++position; + while ((trigger= trigger->next)) + trigger->action_order= ++position; + + count++; +} + + +/** + Obtains and returns trigger metadata. + + @param trigger_stmt returns statement of trigger + @param body returns body of trigger + @param definer returns definer/creator of trigger. The caller is + responsible to allocate enough space for storing + definer information. + + @retval + False success + @retval + True error +*/ + +void Trigger::get_trigger_info(LEX_CSTRING *trigger_stmt, + LEX_CSTRING *trigger_body, + LEX_STRING *definer) +{ + DBUG_ENTER("get_trigger_info"); + + *trigger_stmt= definition; + if (!body) + { + /* Parse error */ + *trigger_body= definition; + *definer= empty_lex_str; + DBUG_VOID_RETURN; + } + *trigger_body= body->m_body_utf8; + + if (body->suid() == SP_IS_NOT_SUID) + { + *definer= empty_lex_str; + } + else + { + definer->length= strxmov(definer->str, body->m_definer.user.str, "@", + body->m_definer.host.str, NullS) - definer->str; + } + DBUG_VOID_RETURN; +} + + +/** + Find trigger's table from trigger identifier and add it to + the statement table list. + + @param[in] thd Thread context. + @param[in] trg_name Trigger name. + @param[in] if_exists TRUE if SQL statement contains "IF EXISTS" clause. + That means a warning instead of error should be + thrown if trigger with given name does not exist. + @param[out] table Pointer to TABLE_LIST object for the + table trigger. + + @return Operation status + @retval FALSE On success. + @retval TRUE Otherwise. +*/ + +static bool add_table_for_trigger_internal(THD *thd, + const sp_name *trg_name, + bool if_exists, + TABLE_LIST **table, + char *trn_path_buff) +{ + LEX *lex= thd->lex; + LEX_CSTRING trn_path= { trn_path_buff, 0 }; + LEX_CSTRING tbl_name= null_clex_str; + DBUG_ENTER("add_table_for_trigger_internal"); + + build_trn_path(thd, trg_name, (LEX_STRING*) &trn_path); + + if (check_trn_exists(&trn_path)) + { + if (if_exists) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + ER_TRG_DOES_NOT_EXIST, + ER_THD(thd, ER_TRG_DOES_NOT_EXIST)); + + *table= NULL; + + DBUG_RETURN(FALSE); + } + + my_error(ER_TRG_DOES_NOT_EXIST, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (load_table_name_for_trigger(thd, trg_name, &trn_path, &tbl_name)) + DBUG_RETURN(TRUE); + + *table= sp_add_to_query_tables(thd, lex, &trg_name->m_db, + &tbl_name, TL_IGNORE, + MDL_SHARED_NO_WRITE); + + DBUG_RETURN(*table ? FALSE : TRUE); +} + + +/* + Same as above, but with an allocated buffer. + This is called by mysql_excute_command() in is here to keep stack + space down in the caller. +*/ + +bool add_table_for_trigger(THD *thd, + const sp_name *trg_name, + bool if_exists, + TABLE_LIST **table) +{ + char trn_path_buff[FN_REFLEN]; + return add_table_for_trigger_internal(thd, trg_name, if_exists, + table, trn_path_buff); +} + + +/** + Drop all triggers for table. + + @param thd current thread context + @param db schema for table + @param name name for table + + @retval + False success + @retval + True error +*/ + +bool Table_triggers_list::drop_all_triggers(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *name, + myf MyFlags) +{ + TABLE table; + char path[FN_REFLEN]; + bool result= 0; + DBUG_ENTER("Table_triggers_list::drop_all_triggers"); + + table.reset(); + init_sql_alloc(key_memory_Table_trigger_dispatcher, + &table.mem_root, 8192, 0, MYF(MY_WME)); + + if (Table_triggers_list::check_n_load(thd, db, name, &table, 1)) + { + result= 1; + /* We couldn't parse trigger file, best to just remove it */ + rm_trigger_file(path, db, name, MyFlags); + goto end; + } + if (table.triggers) + { + for (uint i= 0; i < (uint)TRG_EVENT_MAX; i++) + { + for (uint j= 0; j < (uint)TRG_ACTION_MAX; j++) + { + Trigger *trigger; + for (trigger= table.triggers->get_trigger(i,j) ; + trigger ; + trigger= trigger->next) + { + /* + Trigger, which body we failed to parse during call + Table_triggers_list::check_n_load(), might be missing name. + Such triggers have zero-length name and are skipped here. + */ + if (trigger->name.length && + rm_trigname_file(path, db, &trigger->name, MyFlags)) + { + /* + Instead of immediately bailing out with error if we were unable + to remove .TRN file we will try to drop other files. + */ + result= 1; + } + /* Drop statistics for this stored program from performance schema. */ + MYSQL_DROP_SP(SP_TYPE_TRIGGER, db->str, static_cast(db->length), + trigger->name.str, static_cast(trigger->name.length)); + } + } + } + if (rm_trigger_file(path, db, name, MyFlags)) + result= 1; + delete table.triggers; + } +end: + free_root(&table.mem_root, MYF(0)); + DBUG_RETURN(result); +} + + +/** + Update .TRG file after renaming triggers' subject table + (change name of table in triggers' definitions). + + @param thd Thread context + @param old_db_name Old database of subject table + @param new_db_name New database of subject table + @param old_table_name Old subject table's name + @param new_table_name New subject table's name + + @retval + FALSE Success + @retval + TRUE Failure +*/ + +struct change_table_name_param +{ + THD *thd; + LEX_CSTRING *old_db_name; + LEX_CSTRING *new_db_name; + LEX_CSTRING *new_table_name; + Trigger *stopper; +}; + + +bool +Table_triggers_list:: +change_table_name_in_triggers(THD *thd, + const LEX_CSTRING *old_db_name, + const LEX_CSTRING *new_db_name, + const LEX_CSTRING *old_table_name, + const LEX_CSTRING *new_table_name) +{ + struct change_table_name_param param; + sql_mode_t save_sql_mode= thd->variables.sql_mode; + char path_buff[FN_REFLEN]; + + param.thd= thd; + param.new_table_name= const_cast(new_table_name); + + for_all_triggers(&Trigger::change_table_name, ¶m); + + thd->variables.sql_mode= save_sql_mode; + + if (unlikely(thd->is_fatal_error)) + return TRUE; /* OOM */ + + if (save_trigger_file(thd, new_db_name, new_table_name)) + return TRUE; + + if (rm_trigger_file(path_buff, old_db_name, old_table_name, MYF(MY_WME))) + { + (void) rm_trigger_file(path_buff, new_db_name, new_table_name, + MYF(MY_WME)); + return TRUE; + } + return FALSE; +} + + +bool Trigger::change_table_name(void* param_arg) +{ + change_table_name_param *param= (change_table_name_param*) param_arg; + THD *thd= param->thd; + LEX_CSTRING *new_table_name= param->new_table_name; + LEX_CSTRING *def= &definition, new_def; + size_t on_q_table_name_len, before_on_len; + String buff; + + thd->variables.sql_mode= sql_mode; + + /* Construct CREATE TRIGGER statement with new table name. */ + buff.length(0); + + /* WARNING: 'on_table_name' is supposed to point inside 'def' */ + DBUG_ASSERT(on_table_name.str > def->str); + DBUG_ASSERT(on_table_name.str < (def->str + def->length)); + before_on_len= on_table_name.str - def->str; + + buff.append(def->str, before_on_len); + buff.append(STRING_WITH_LEN("ON ")); + append_identifier(thd, &buff, new_table_name); + buff.append(STRING_WITH_LEN(" ")); + on_q_table_name_len= buff.length() - before_on_len; + buff.append(on_table_name.str + on_table_name.length, + def->length - (before_on_len + on_table_name.length)); + /* + It is OK to allocate some memory on table's MEM_ROOT since this + table instance will be thrown out at the end of rename anyway. + */ + new_def.str= (char*) memdup_root(&base->trigger_table->mem_root, buff.ptr(), + buff.length()); + new_def.length= buff.length(); + on_table_name.str= new_def.str + before_on_len; + on_table_name.length= on_q_table_name_len; + definition= new_def; + return 0; +} + + +/** + Iterate though Table_triggers_list::names_list list and update + .TRN files after renaming triggers' subject table. + + @param old_db_name Old database of subject table + @param new_db_name New database of subject table + @param new_table_name New subject table's name + @param stopper Pointer to Table_triggers_list::names_list at + which we should stop updating. + + @retval + 0 Success + @retval + non-0 Failure, pointer to Table_triggers_list::names_list element + for which update failed. +*/ + +Trigger * +Table_triggers_list:: +change_table_name_in_trignames(const LEX_CSTRING *old_db_name, + const LEX_CSTRING *new_db_name, + const LEX_CSTRING *new_table_name, + Trigger *trigger) +{ + struct change_table_name_param param; + param.old_db_name= const_cast(old_db_name); + param.new_db_name= const_cast(new_db_name); + param.new_table_name= const_cast(new_table_name); + param.stopper= trigger; + + return for_all_triggers(&Trigger::change_on_table_name, ¶m); +} + + +bool Trigger::change_on_table_name(void* param_arg) +{ + change_table_name_param *param= (change_table_name_param*) param_arg; + + char trigname_buff[FN_REFLEN]; + struct st_trigname trigname; + LEX_CSTRING trigname_file; + + if (param->stopper == this) + return 0; // Stop processing + + trigname_file.length= build_table_filename(trigname_buff, FN_REFLEN-1, + param->new_db_name->str, name.str, + TRN_EXT, 0); + trigname_file.str= trigname_buff; + + trigname.trigger_table= *param->new_table_name; + + if (base->create_lists_needed_for_files(current_thd->mem_root)) + return true; + + if (sql_create_definition_file(NULL, &trigname_file, &trigname_file_type, + (uchar*)&trigname, trigname_file_parameters)) + return true; + + /* Remove stale .TRN file in case of database upgrade */ + if (param->old_db_name) + { + if (rm_trigname_file(trigname_buff, param->old_db_name, &name, + MYF(MY_WME))) + { + (void) rm_trigname_file(trigname_buff, param->new_db_name, &name, + MYF(MY_WME)); + return 1; + } + } + return 0; +} + + +/* + Check if we can rename triggers in change_table_name() + The idea is to ensure that it is close to impossible that + change_table_name() should fail. + + @return 0 ok + @return 1 Error: rename of triggers would fail +*/ + +bool +Table_triggers_list::prepare_for_rename(THD *thd, + TRIGGER_RENAME_PARAM *param, + const LEX_CSTRING *db, + const LEX_CSTRING *old_alias, + const LEX_CSTRING *old_table, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table) +{ + TABLE *table= ¶m->table; + bool result= 0; + DBUG_ENTER("Table_triggers_lists::prepare_change_table_name"); + + init_sql_alloc(key_memory_Table_trigger_dispatcher, + &table->mem_root, 8192, 0, MYF(0)); + + DBUG_ASSERT(my_strcasecmp(table_alias_charset, db->str, new_db->str) || + my_strcasecmp(table_alias_charset, old_alias->str, + new_table->str)); + + if (Table_triggers_list::check_n_load(thd, db, old_table, table, TRUE)) + { + result= 1; + goto end; + } + if (table->triggers) + { + if (table->triggers->check_for_broken_triggers()) + { + result= 1; + goto end; + } + /* + Since triggers should be in the same schema as their subject tables + moving table with them between two schemas raises too many questions. + (E.g. what should happen if in new schema we already have trigger + with same name ?). + + In case of "ALTER DATABASE `#mysql50#db1` UPGRADE DATA DIRECTORY NAME" + we will be given table name with "#mysql50#" prefix + To remove this prefix we use check_n_cut_mysql50_prefix(). + */ + if (my_strcasecmp(table_alias_charset, db->str, new_db->str)) + { + char dbname[SAFE_NAME_LEN + 1]; + if (check_n_cut_mysql50_prefix(db->str, dbname, sizeof(dbname)) && + !my_strcasecmp(table_alias_charset, dbname, new_db->str)) + { + param->upgrading50to51= TRUE; + } + else + { + my_error(ER_TRG_IN_WRONG_SCHEMA, MYF(0)); + result= 1; + goto end; + } + } + } + +end: + param->got_error= result; + DBUG_RETURN(result); +} + + +/** + Update .TRG and .TRN files after renaming triggers' subject table. + + @param[in,out] thd Thread context + @param[in] db Old database of subject table + @param[in] old_alias Old alias of subject table + @param[in] old_table Old name of subject table. The difference between + old_table and old_alias is that in case of lower_case_table_names + old_table == lowercase(old_alias) + @param[in] new_db New database for subject table + @param[in] new_table New name of subject table + + @note + This method tries to leave trigger related files in consistent state, + i.e. it either will complete successfully, or will fail leaving files + in their initial state. + Also this method assumes that subject table is not renamed to itself. + This method needs to be called under an exclusive table metadata lock. + + @retval FALSE Success + @retval TRUE Error +*/ + +bool Table_triggers_list::change_table_name(THD *thd, + TRIGGER_RENAME_PARAM *param, + const LEX_CSTRING *db, + const LEX_CSTRING *old_alias, + const LEX_CSTRING *old_table, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table) +{ + TABLE *table= ¶m->table; + bool result= 0; + bool upgrading50to51= FALSE; + Trigger *err_trigger; + DBUG_ENTER("Table_triggers_list::change_table_name"); + + DBUG_ASSERT(!param->got_error); + /* + This method interfaces the mysql server code protected by + an exclusive metadata lock. + */ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db->str, + old_table->str, + MDL_EXCLUSIVE)); + + if (table->triggers) + { + if (unlikely(table->triggers->change_table_name_in_triggers(thd, db, new_db, + old_alias, + new_table))) + { + result= 1; + goto end; + } + if ((err_trigger= table->triggers-> + change_table_name_in_trignames( upgrading50to51 ? db : NULL, + new_db, new_table, 0))) + { + /* + If we were unable to update one of .TRN files properly we will + revert all changes that we have done and report about error. + We assume that we will be able to undo our changes without errors + (we can't do much if there will be an error anyway). + */ + (void) table->triggers->change_table_name_in_trignames( + upgrading50to51 ? new_db : NULL, db, + old_alias, err_trigger); + (void) table->triggers->change_table_name_in_triggers( + thd, db, new_db, + new_table, old_alias); + result= 1; + goto end; + } + } + +end: + DBUG_RETURN(result); +} + + +/** + Execute trigger for given (event, time) pair. + + The operation executes trigger for the specified event (insert, update, + delete) and time (after, before) if it is set. + + @param thd + @param event + @param time_type + @param old_row_is_record1 + + @return Error status. + @retval FALSE on success. + @retval TRUE on error. +*/ + +bool Table_triggers_list::process_triggers(THD *thd, + trg_event_type event, + trg_action_time_type time_type, + bool old_row_is_record1) +{ + bool err_status; + Sub_statement_state statement_state; + Trigger *trigger; + SELECT_LEX *save_current_select; + + if (check_for_broken_triggers()) + return TRUE; + + if (!(trigger= get_trigger(event, time_type))) + return FALSE; + + if (old_row_is_record1) + { + old_field= record1_field; + new_field= record0_field; + } + else + { + DBUG_ASSERT(event == TRG_EVENT_DELETE); + new_field= record1_field; + old_field= record0_field; + } + /* + This trigger must have been processed by the pre-locking + algorithm. + */ + DBUG_ASSERT(trigger_table->pos_in_table_list->trg_event_map & trg2bit(event)); + + thd->reset_sub_statement_state(&statement_state, SUB_STMT_TRIGGER); + + /* + Reset current_select before call execute_trigger() and + restore it after return from one. This way error is set + in case of failure during trigger execution. + */ + save_current_select= thd->lex->current_select; + + do { + thd->lex->current_select= NULL; + err_status= + trigger->body->execute_trigger(thd, + &trigger_table->s->db, + &trigger_table->s->table_name, + &trigger->subject_table_grants); + status_var_increment(thd->status_var.executed_triggers); + } while (!err_status && (trigger= trigger->next)); + thd->lex->current_select= save_current_select; + + thd->restore_sub_statement_state(&statement_state); + + return err_status; +} + + +/** + Add triggers for table to the set of routines used by statement. + Add tables used by them to statement table list. Do the same for + routines used by triggers. + + @param thd Thread context. + @param prelocking_ctx Prelocking context of the statement. + @param table_list Table list element for table with trigger. + + @retval FALSE Success. + @retval TRUE Failure. +*/ + +bool +Table_triggers_list:: +add_tables_and_routines_for_triggers(THD *thd, + Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list) +{ + DBUG_ASSERT(static_cast(table_list->lock_type) >= + static_cast(TL_FIRST_WRITE)); + + for (int i= 0; i < (int)TRG_EVENT_MAX; i++) + { + if (table_list->trg_event_map & trg2bit(static_cast(i))) + { + for (int j= 0; j < (int)TRG_ACTION_MAX; j++) + { + Trigger *triggers= table_list->table->triggers->get_trigger(i,j); + + for ( ; triggers ; triggers= triggers->next) + { + sp_head *trigger= triggers->body; + + if (unlikely(!triggers->body)) // Parse error + continue; + + MDL_key key(MDL_key::TRIGGER, trigger->m_db.str, trigger->m_name.str); + + if (sp_add_used_routine(prelocking_ctx, thd->stmt_arena, + &key, &sp_handler_trigger, + table_list->belong_to_view)) + { + trigger->add_used_tables_to_table_list(thd, + &prelocking_ctx->query_tables_last, + table_list->belong_to_view); + sp_update_stmt_used_routines(thd, prelocking_ctx, + &trigger->m_sroutines, + table_list->belong_to_view); + trigger->propagate_attributes(prelocking_ctx); + } + } + } + } + } + return FALSE; +} + + +/** + Mark fields of subject table which we read/set in its triggers + as such. + + This method marks fields of subject table which are read/set in its + triggers as such (by properly updating TABLE::read_set/write_set) + and thus informs handler that values for these fields should be + retrieved/stored during execution of statement. + + @param thd Current thread context + @param event Type of event triggers for which we are going to inspect +*/ + +void Table_triggers_list::mark_fields_used(trg_event_type event) +{ + int action_time; + Item_trigger_field *trg_field; + DBUG_ENTER("Table_triggers_list::mark_fields_used"); + + for (action_time= 0; action_time < (int)TRG_ACTION_MAX; action_time++) + { + for (Trigger *trigger= get_trigger(event,action_time); + trigger ; + trigger= trigger->next) + { + for (trg_field= trigger->trigger_fields; + trg_field; + trg_field= trg_field->next_trg_field) + { + /* We cannot mark fields which does not present in table. */ + if (trg_field->field_idx != NO_CACHED_FIELD_INDEX) + { + DBUG_PRINT("info", ("marking field: %u", (uint) trg_field->field_idx)); + if (trg_field->get_settable_routine_parameter()) + bitmap_set_bit(trigger_table->write_set, trg_field->field_idx); + trigger_table->mark_column_with_deps( + trigger_table->field[trg_field->field_idx]); + } + } + } + } + trigger_table->file->column_bitmaps_signal(); + DBUG_VOID_RETURN; +} + + +/** + Signals to the Table_triggers_list that a parse error has occurred when + reading a trigger from file. This makes the Table_triggers_list enter an + error state flagged by m_has_unparseable_trigger == true. The error message + will be used whenever a statement invoking or manipulating triggers is + issued against the Table_triggers_list's table. + + @param error_message The error message thrown by the parser. + */ +void Table_triggers_list::set_parse_error_message(char *error_message) +{ + m_has_unparseable_trigger= true; + strnmov(m_parse_error_message, error_message, + sizeof(m_parse_error_message)-1); +} + + +/** + Trigger BUG#14090 compatibility hook. + + @param[in,out] unknown_key reference on the line with unknown + parameter and the parsing point + @param[in] base base address for parameter writing + (structure like TABLE) + @param[in] mem_root MEM_ROOT for parameters allocation + @param[in] end the end of the configuration + + @note + NOTE: this hook process back compatibility for incorrectly written + sql_modes parameter (see BUG#14090). + + @retval + FALSE OK + @retval + TRUE Error +*/ + +#define INVALID_SQL_MODES_LENGTH 13 + +bool +Handle_old_incorrect_sql_modes_hook:: +process_unknown_string(const char *&unknown_key, uchar* base, + MEM_ROOT *mem_root, const char *end) +{ + DBUG_ENTER("Handle_old_incorrect_sql_modes_hook::process_unknown_string"); + DBUG_PRINT("info", ("unknown key: %60s", unknown_key)); + + if (unknown_key + INVALID_SQL_MODES_LENGTH + 1 < end && + unknown_key[INVALID_SQL_MODES_LENGTH] == '=' && + !memcmp(unknown_key, STRING_WITH_LEN("sql_modes"))) + { + THD *thd= current_thd; + const char *ptr= unknown_key + INVALID_SQL_MODES_LENGTH + 1; + + DBUG_PRINT("info", ("sql_modes affected by BUG#14090 detected")); + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + ER_OLD_FILE_FORMAT, + ER_THD(thd, ER_OLD_FILE_FORMAT), + (char *)path, "TRIGGER"); + if (get_file_options_ulllist(ptr, end, unknown_key, base, + &sql_modes_parameters, mem_root)) + { + DBUG_RETURN(TRUE); + } + /* + Set parsing pointer to the last symbol of string (\n) + 1) to avoid problem with \0 in the junk after sql_modes + 2) to speed up skipping this line by parser. + */ + unknown_key= ptr-1; + } + DBUG_RETURN(FALSE); +} + +#define INVALID_TRIGGER_TABLE_LENGTH 15 + +/** + Trigger BUG#15921 compatibility hook. For details see + Handle_old_incorrect_sql_modes_hook::process_unknown_string(). +*/ +bool +Handle_old_incorrect_trigger_table_hook:: +process_unknown_string(const char *&unknown_key, uchar* base, + MEM_ROOT *mem_root, const char *end) +{ + DBUG_ENTER("Handle_old_incorrect_trigger_table_hook::process_unknown_string"); + DBUG_PRINT("info", ("unknown key: %60s", unknown_key)); + + if (unknown_key + INVALID_TRIGGER_TABLE_LENGTH + 1 < end && + unknown_key[INVALID_TRIGGER_TABLE_LENGTH] == '=' && + !memcmp(unknown_key, STRING_WITH_LEN("trigger_table"))) + { + THD *thd= current_thd; + const char *ptr= unknown_key + INVALID_TRIGGER_TABLE_LENGTH + 1; + + DBUG_PRINT("info", ("trigger_table affected by BUG#15921 detected")); + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + ER_OLD_FILE_FORMAT, + ER_THD(thd, ER_OLD_FILE_FORMAT), + (char *)path, "TRIGGER"); + + if (!(ptr= parse_escaped_string(ptr, end, mem_root, trigger_table_value))) + { + my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0), "trigger_table", + unknown_key); + DBUG_RETURN(TRUE); + } + + /* Set parsing pointer to the last symbol of string (\n). */ + unknown_key= ptr-1; + } + DBUG_RETURN(FALSE); +} + + +/** + Contruct path to TRN-file. + + @param thd[in] Thread context. + @param trg_name[in] Trigger name. + @param trn_path[out] Variable to store constructed path +*/ + +void build_trn_path(THD *thd, const sp_name *trg_name, LEX_STRING *trn_path) +{ + /* Construct path to the TRN-file. */ + + trn_path->length= build_table_filename(trn_path->str, + FN_REFLEN - 1, + trg_name->m_db.str, + trg_name->m_name.str, + TRN_EXT, + 0); +} + + +/** + Check if TRN-file exists. + + @return + @retval TRUE if TRN-file does not exist. + @retval FALSE if TRN-file exists. +*/ + +bool check_trn_exists(const LEX_CSTRING *trn_path) +{ + return access(trn_path->str, F_OK) != 0; +} + + +/** + Retrieve table name for given trigger. + + @param thd[in] Thread context. + @param trg_name[in] Trigger name. + @param trn_path[in] Path to the corresponding TRN-file. + @param tbl_name[out] Variable to store retrieved table name. + + @return Error status. + @retval FALSE on success. + @retval TRUE if table name could not be retrieved. +*/ + +bool load_table_name_for_trigger(THD *thd, + const sp_name *trg_name, + const LEX_CSTRING *trn_path, + LEX_CSTRING *tbl_name) +{ + File_parser *parser; + struct st_trigname trn_data; + Handle_old_incorrect_trigger_table_hook trigger_table_hook( + trn_path->str, + &trn_data.trigger_table); + DBUG_ENTER("load_table_name_for_trigger"); + + /* Parse the TRN-file. */ + + if (!(parser= sql_parse_prepare(trn_path, thd->mem_root, TRUE))) + DBUG_RETURN(TRUE); + + if (!is_equal(&trigname_file_type, parser->type())) + { + my_error(ER_WRONG_OBJECT, MYF(0), + trg_name->m_name.str, + TRN_EXT + 1, + "TRIGGERNAME"); + + DBUG_RETURN(TRUE); + } + + if (parser->parse((uchar*) &trn_data, thd->mem_root, + trigname_file_parameters, 1, + &trigger_table_hook)) + DBUG_RETURN(TRUE); + + /* Copy trigger table name. */ + + *tbl_name= trn_data.trigger_table; + + /* That's all. */ + + DBUG_RETURN(FALSE); +} diff --git a/sql/sql_trigger.h b/sql/sql_trigger.h new file mode 100644 index 00000000..774dca7c --- /dev/null +++ b/sql/sql_trigger.h @@ -0,0 +1,370 @@ +#ifndef SQL_TRIGGER_INCLUDED +#define SQL_TRIGGER_INCLUDED + +/* + Copyright (c) 2004, 2011, Oracle and/or its affiliates. + Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include + +/* Forward declarations */ + +class Item_trigger_field; +class sp_head; +class sp_name; +class Query_tables_list; +struct TABLE_LIST; +class Query_tables_list; +typedef struct st_ddl_log_state DDL_LOG_STATE; + +/** Event on which trigger is invoked. */ +enum trg_event_type +{ + TRG_EVENT_INSERT= 0, + TRG_EVENT_UPDATE= 1, + TRG_EVENT_DELETE= 2, + TRG_EVENT_MAX +}; + +static inline uint8 trg2bit(enum trg_event_type trg) +{ return static_cast(1 << static_cast(trg)); } + +#include "table.h" /* GRANT_INFO */ + +/* + We need this two enums here instead of sql_lex.h because + at least one of them is used by Item_trigger_field interface. + + Time when trigger is invoked (i.e. before or after row actually + inserted/updated/deleted). +*/ +enum trg_action_time_type +{ + TRG_ACTION_BEFORE= 0, TRG_ACTION_AFTER= 1, TRG_ACTION_MAX +}; + +enum trigger_order_type +{ + TRG_ORDER_NONE= 0, + TRG_ORDER_FOLLOWS= 1, + TRG_ORDER_PRECEDES= 2 +}; + + +struct st_trg_execution_order +{ + /** + FOLLOWS or PRECEDES as specified in the CREATE TRIGGER statement. + */ + enum trigger_order_type ordering_clause; + + /** + Trigger name referenced in the FOLLOWS/PRECEDES clause of the + CREATE TRIGGER statement. + */ + LEX_CSTRING anchor_trigger_name; +}; + + +/* + Parameter to change_table_name_in_triggers() +*/ + +class TRIGGER_RENAME_PARAM +{ +public: + TABLE table; + bool upgrading50to51; + bool got_error; + + TRIGGER_RENAME_PARAM() + { + upgrading50to51= got_error= 0; + table.reset(); + } + ~TRIGGER_RENAME_PARAM() + { + reset(); + } + void reset(); +}; + + +class Table_triggers_list; + +/** + The trigger object +*/ + +class Trigger :public Sql_alloc +{ +public: + Trigger(Table_triggers_list *base_arg, sp_head *code): + base(base_arg), body(code), next(0), trigger_fields(0), action_order(0) + { + bzero((char *)&subject_table_grants, sizeof(subject_table_grants)); + } + ~Trigger(); + Table_triggers_list *base; + sp_head *body; + Trigger *next; /* Next trigger of same type */ + + /** + Heads of the lists linking items for all fields used in triggers + grouped by event and action_time. + */ + Item_trigger_field *trigger_fields; + LEX_CSTRING name; + LEX_CSTRING on_table_name; /* Raw table name */ + LEX_CSTRING definition; + LEX_CSTRING definer; + + /* Character sets used */ + LEX_CSTRING client_cs_name; + LEX_CSTRING connection_cl_name; + LEX_CSTRING db_cl_name; + + GRANT_INFO subject_table_grants; + sql_mode_t sql_mode; + /* Store create time. Can't be mysql_time_t as this holds also sub seconds */ + my_hrtime_t hr_create_time; // Create time timestamp in microseconds + trg_event_type event; + trg_action_time_type action_time; + uint action_order; + + bool is_fields_updated_in_trigger(MY_BITMAP *used_fields); + void get_trigger_info(LEX_CSTRING *stmt, LEX_CSTRING *body, + LEX_STRING *definer); + /* Functions executed over each active trigger */ + bool change_on_table_name(void* param_arg); + bool change_table_name(void* param_arg); + bool add_to_file_list(void* param_arg); +}; + +typedef bool (Trigger::*Triggers_processor)(void *arg); + +/** + This class holds all information about triggers of table. +*/ + +class Table_triggers_list: public Sql_alloc +{ + friend class Trigger; + + /* Points to first trigger for a certain type */ + Trigger *triggers[TRG_EVENT_MAX][TRG_ACTION_MAX]; + /** + Copy of TABLE::Field array which all fields made nullable + (using extra_null_bitmap, if needed). Used for NEW values in + BEFORE INSERT/UPDATE triggers. + */ + Field **record0_field; + uchar *extra_null_bitmap; + /** + Copy of TABLE::Field array with field pointers set to TABLE::record[1] + buffer instead of TABLE::record[0] (used for OLD values in on UPDATE + trigger and DELETE trigger when it is called for REPLACE). + */ + Field **record1_field; + /** + During execution of trigger new_field and old_field should point to the + array of fields representing new or old version of row correspondingly + (so it can point to TABLE::field or to Tale_triggers_list::record1_field) + */ + Field **new_field; + Field **old_field; + + /* TABLE instance for which this triggers list object was created */ + TABLE *trigger_table; + + /** + This flag indicates that one of the triggers was not parsed successfully, + and as a precaution the object has entered a state where all trigger + access results in errors until all such triggers are dropped. It is not + safe to add triggers since we don't know if the broken trigger has the + same name or event type. Nor is it safe to invoke any trigger for the + aforementioned reasons. The only safe operations are drop_trigger and + drop_all_triggers. + + @see Table_triggers_list::set_parse_error + */ + bool m_has_unparseable_trigger; + + /** + This error will be displayed when the user tries to manipulate or invoke + triggers on a table that has broken triggers. It will get set only once + per statement and thus will contain the first parse error encountered in + the trigger file. + */ + char m_parse_error_message[MYSQL_ERRMSG_SIZE]; + uint count; /* Number of triggers */ + +public: + /** + Field responsible for storing triggers definitions in file. + It have to be public because we are using it directly from parser. + */ + List definitions_list; + /** + List of sql modes for triggers + */ + List definition_modes_list; + /** Create times for triggers */ + List hr_create_times; + + List definers_list; + + /* Character set context, used for parsing and executing triggers. */ + + List client_cs_names; + List connection_cl_names; + List db_cl_names; + + /* End of character ser context. */ + + Table_triggers_list(TABLE *table_arg) + :record0_field(0), extra_null_bitmap(0), record1_field(0), + trigger_table(table_arg), + m_has_unparseable_trigger(false), count(0) + { + bzero((char *) triggers, sizeof(triggers)); + } + ~Table_triggers_list(); + + bool create_trigger(THD *thd, TABLE_LIST *table, String *stmt_query, + DDL_LOG_STATE *ddl_log_state, + DDL_LOG_STATE *ddl_log_state_tmp_file); + bool drop_trigger(THD *thd, TABLE_LIST *table, + LEX_CSTRING *sp_name, + String *stmt_query, DDL_LOG_STATE *ddl_log_state); + bool process_triggers(THD *thd, trg_event_type event, + trg_action_time_type time_type, + bool old_row_is_record1); + void empty_lists(); + bool create_lists_needed_for_files(MEM_ROOT *root); + bool save_trigger_file(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table_name); + + static bool check_n_load(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table_name, + TABLE *table, bool names_only); + static bool drop_all_triggers(THD *thd, const LEX_CSTRING *db, + const LEX_CSTRING *table_name, myf MyFlags); + static bool prepare_for_rename(THD *thd, TRIGGER_RENAME_PARAM *param, + const LEX_CSTRING *db, + const LEX_CSTRING *old_alias, + const LEX_CSTRING *old_table, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table); + static bool change_table_name(THD *thd, TRIGGER_RENAME_PARAM *param, + const LEX_CSTRING *db, + const LEX_CSTRING *old_alias, + const LEX_CSTRING *old_table, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table); + void add_trigger(trg_event_type event_type, + trg_action_time_type action_time, + trigger_order_type ordering_clause, + LEX_CSTRING *anchor_trigger_name, + Trigger *trigger); + Trigger *get_trigger(trg_event_type event_type, + trg_action_time_type action_time) + { + return triggers[event_type][action_time]; + } + /* Simpler version of the above, to avoid casts in the code */ + Trigger *get_trigger(uint event_type, uint action_time) + { + return get_trigger((trg_event_type) event_type, + (trg_action_time_type) action_time); + } + + bool has_triggers(trg_event_type event_type, + trg_action_time_type action_time) + { + return get_trigger(event_type,action_time) != 0; + } + bool has_delete_triggers() + { + return (has_triggers(TRG_EVENT_DELETE,TRG_ACTION_BEFORE) || + has_triggers(TRG_EVENT_DELETE,TRG_ACTION_AFTER)); + } + + void mark_fields_used(trg_event_type event); + + void set_parse_error_message(char *error_message); + + friend class Item_trigger_field; + + bool add_tables_and_routines_for_triggers(THD *thd, + Query_tables_list *prelocking_ctx, + TABLE_LIST *table_list); + + Field **nullable_fields() { return record0_field; } + void reset_extra_null_bitmap() + { + size_t null_bytes= (trigger_table->s->fields - + trigger_table->s->null_fields + 7)/8; + bzero(extra_null_bitmap, null_bytes); + } + + Trigger *find_trigger(const LEX_CSTRING *name, bool remove_from_list); + + Trigger* for_all_triggers(Triggers_processor func, void *arg); + +private: + bool prepare_record_accessors(TABLE *table); + Trigger *change_table_name_in_trignames(const LEX_CSTRING *old_db_name, + const LEX_CSTRING *new_db_name, + const LEX_CSTRING *new_table_name, + Trigger *trigger); + bool change_table_name_in_triggers(THD *thd, + const LEX_CSTRING *old_db_name, + const LEX_CSTRING *new_db_name, + const LEX_CSTRING *old_table_name, + const LEX_CSTRING *new_table_name); + + bool check_for_broken_triggers() + { + if (m_has_unparseable_trigger) + { + my_message(ER_PARSE_ERROR, m_parse_error_message, MYF(0)); + return true; + } + return false; + } +}; + + +bool add_table_for_trigger(THD *thd, + const sp_name *trg_name, + bool continue_if_not_exist, + TABLE_LIST **table); + +void build_trn_path(THD *thd, const sp_name *trg_name, LEX_STRING *trn_path); + +bool check_trn_exists(const LEX_CSTRING *trn_path); + +bool load_table_name_for_trigger(THD *thd, + const sp_name *trg_name, + const LEX_CSTRING *trn_path, + LEX_CSTRING *tbl_name); +bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create); +bool rm_trigname_file(char *path, const LEX_CSTRING *db, + const LEX_CSTRING *trigger_name, myf MyFlags); + +extern const char * const TRG_EXT; +extern const char * const TRN_EXT; + +#endif /* SQL_TRIGGER_INCLUDED */ diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc new file mode 100644 index 00000000..beeee9da --- /dev/null +++ b/sql/sql_truncate.cc @@ -0,0 +1,585 @@ +/* Copyright (c) 2010, 2015, Oracle and/or its affiliates. + Copyright (c) 2012, 2018, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "debug_sync.h" // DEBUG_SYNC +#include "table.h" // TABLE, FOREIGN_KEY_INFO +#include "sql_class.h" // THD +#include "sql_base.h" // open_and_lock_tables +#include "sql_table.h" // write_bin_log +#include "datadict.h" // dd_recreate_table() +#include "lock.h" // MYSQL_OPEN_* flags +#include "sql_acl.h" // DROP_ACL +#include "sql_parse.h" // check_one_table_access() +#include "sql_truncate.h" +#include "wsrep_mysqld.h" +#include "sql_show.h" //append_identifier() +#include "sql_select.h" +#include "sql_delete.h" + +/** + Append a list of field names to a string. + + @param str The string. + @param fields The list of field names. + + @return TRUE on failure, FALSE otherwise. +*/ + +static bool fk_info_append_fields(THD *thd, String *str, + List *fields) +{ + bool res= FALSE; + LEX_CSTRING *field; + List_iterator_fast it(*fields); + + while ((field= it++)) + { + res|= append_identifier(thd, str, field); + res|= str->append(STRING_WITH_LEN(", ")); + } + + str->chop(); + str->chop(); + + return res; +} + + +/** + Generate a foreign key description suitable for a error message. + + @param thd Thread context. + @param fk_info The foreign key information. + + @return A human-readable string describing the foreign key. +*/ + +static const char *fk_info_str(THD *thd, FOREIGN_KEY_INFO *fk_info) +{ + bool res= FALSE; + char buffer[STRING_BUFFER_USUAL_SIZE*2]; + String str(buffer, sizeof(buffer), system_charset_info); + + str.length(0); + + /* + `db`.`tbl`, CONSTRAINT `id` FOREIGN KEY (`fk`) REFERENCES `db`.`tbl` (`fk`) + */ + + res|= append_identifier(thd, &str, fk_info->foreign_db); + res|= str.append('.'); + res|= append_identifier(thd, &str, fk_info->foreign_table); + res|= str.append(STRING_WITH_LEN(", CONSTRAINT ")); + res|= append_identifier(thd, &str, fk_info->foreign_id); + res|= str.append(STRING_WITH_LEN(" FOREIGN KEY (")); + res|= fk_info_append_fields(thd, &str, &fk_info->foreign_fields); + res|= str.append(STRING_WITH_LEN(") REFERENCES ")); + res|= append_identifier(thd, &str, fk_info->referenced_db); + res|= str.append('.'); + res|= append_identifier(thd, &str, fk_info->referenced_table); + res|= str.append(STRING_WITH_LEN(" (")); + res|= fk_info_append_fields(thd, &str, &fk_info->referenced_fields); + res|= str.append(')'); + + return res ? NULL : thd->strmake(str.ptr(), str.length()); +} + + +/** + Check and emit a fatal error if the table which is going to be + affected by TRUNCATE TABLE is a parent table in some non-self- + referencing foreign key. + + @remark The intention is to allow truncate only for tables that + are not dependent on other tables. + + @param thd Thread context. + @param table Table handle. + + @retval FALSE This table is not parent in a non-self-referencing foreign + key. Statement can proceed. + @retval TRUE This table is parent in a non-self-referencing foreign key, + error was emitted. +*/ + +static bool +fk_truncate_illegal_if_parent(THD *thd, TABLE *table) +{ + FOREIGN_KEY_INFO *fk_info; + List fk_list; + List_iterator_fast it; + + /* + Bail out early if the table is not referenced by a foreign key. + In this case, the table could only be, if at all, a child table. + */ + if (! table->file->referenced_by_foreign_key()) + return FALSE; + + /* + This table _is_ referenced by a foreign key. At this point, only + self-referencing keys are acceptable. For this reason, get the list + of foreign keys referencing this table in order to check the name + of the child (dependent) tables. + */ + table->file->get_parent_foreign_key_list(thd, &fk_list); + + /* Out of memory when building list. */ + if (unlikely(thd->is_error())) + return TRUE; + + it.init(fk_list); + + /* Loop over the set of foreign keys for which this table is a parent. */ + while ((fk_info= it++)) + { + if (lex_string_cmp(system_charset_info, fk_info->referenced_db, + &table->s->db) || + lex_string_cmp(system_charset_info, fk_info->referenced_table, + &table->s->table_name) || + lex_string_cmp(system_charset_info, fk_info->foreign_db, + &table->s->db) || + lex_string_cmp(system_charset_info, fk_info->foreign_table, + &table->s->table_name)) + break; + } + + /* Table is parent in a non-self-referencing foreign key. */ + if (fk_info) + { + my_error(ER_TRUNCATE_ILLEGAL_FK, MYF(0), fk_info_str(thd, fk_info)); + return TRUE; + } + + return FALSE; +} + + +/* + Open and truncate a locked table. + + @param thd Thread context. + @param table_ref Table list element for the table to be truncated. + @param is_tmp_table True if element refers to a temp table. + + @retval TRUNCATE_OK Truncate was successful and statement can be safely + binlogged. + @retval TRUNCATE_FAILED_BUT_BINLOG Truncate failed but still go ahead with + binlogging as in case of non transactional tables + partial truncation is possible. + + @retval TRUNCATE_FAILED_SKIP_BINLOG Truncate was not successful hence donot + binlong the statement. +*/ + +enum Sql_cmd_truncate_table::truncate_result +Sql_cmd_truncate_table::handler_truncate(THD *thd, TABLE_LIST *table_ref, + bool is_tmp_table) +{ + int error= 0; + uint flags= 0; + TABLE *table; + DBUG_ENTER("Sql_cmd_truncate_table::handler_truncate"); + + /* + Can't recreate, the engine must mechanically delete all rows + in the table. Use open_and_lock_tables() to open a write cursor. + */ + + /* If it is a temporary table, no need to take locks. */ + if (!is_tmp_table) + { + /* We don't need to load triggers. */ + DBUG_ASSERT(table_ref->trg_event_map == 0); + /* + Our metadata lock guarantees that no transaction is reading + or writing into the table. Yet, to open a write cursor we need + a thr_lock lock. Allow to open base tables only. + */ + table_ref->required_type= TABLE_TYPE_NORMAL; + /* + Ignore pending FLUSH TABLES since we don't want to release + the MDL lock taken above and otherwise there is no way to + wait for FLUSH TABLES in deadlock-free fashion. + */ + flags= MYSQL_OPEN_IGNORE_FLUSH; + /* + Even though we have an MDL lock on the table here, we don't + pass MYSQL_OPEN_HAS_MDL_LOCK to open_and_lock_tables + since to truncate a MERGE table, we must open and lock + merge children, and on those we don't have an MDL lock. + Thus clear the ticket to satisfy MDL asserts. + */ + table_ref->mdl_request.ticket= NULL; + } + + /* Open the table as it will handle some required preparations. */ + if (open_and_lock_tables(thd, table_ref, FALSE, flags)) + DBUG_RETURN(TRUNCATE_FAILED_SKIP_BINLOG); + + /* Whether to truncate regardless of foreign keys. */ + if (! (thd->variables.option_bits & OPTION_NO_FOREIGN_KEY_CHECKS)) + if (fk_truncate_illegal_if_parent(thd, table_ref->table)) + DBUG_RETURN(TRUNCATE_FAILED_SKIP_BINLOG); + + table= table_ref->table; + + if ((table->file->ht->flags & HTON_TRUNCATE_REQUIRES_EXCLUSIVE_USE) && + !is_tmp_table) + { + if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN)) + DBUG_RETURN(TRUNCATE_FAILED_SKIP_BINLOG); + /* + Get rid of all TABLE instances belonging to this thread + except one to be used for TRUNCATE + */ + close_all_tables_for_name(thd, table->s, + HA_EXTRA_NOT_USED, + table); + } + + error= table->file->ha_truncate(); + + if (!is_tmp_table && !error) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("TRUNCATE") }; + ddl_log.org_partitioned= table->file->partition_engine(); + lex_string_set(&ddl_log.org_storage_engine_name, + table->file->real_table_type()); + ddl_log.org_database= table->s->db; + ddl_log.org_table= table->s->table_name; + ddl_log.org_table_id= table->s->tabledef_version; + backup_log_ddl(&ddl_log); + } + + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + /* + If truncate method is not implemented then we don't binlog the + statement. If truncation has failed in a transactional engine then also + we don't binlog the statment. Only in non transactional engine we binlog + inspite of errors. + */ + if (error == HA_ERR_WRONG_COMMAND || + table->file->has_transactions_and_rollback()) + DBUG_RETURN(TRUNCATE_FAILED_SKIP_BINLOG); + else + DBUG_RETURN(TRUNCATE_FAILED_BUT_BINLOG); + } + DBUG_RETURN(TRUNCATE_OK); +} + + +/* + Handle locking a base table for truncate. + + @param[in] thd Thread context. + @param[in] table_ref Table list element for the table to + be truncated. + @param[out] hton_can_recreate Set to TRUE if table can be dropped + and recreated. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +bool Sql_cmd_truncate_table::lock_table(THD *thd, TABLE_LIST *table_ref, + bool *hton_can_recreate) +{ + handlerton *hton; + bool versioned; + bool sequence= false; + TABLE *table= NULL; + DBUG_ENTER("Sql_cmd_truncate_table::lock_table"); + + /* Lock types are set in the parser. */ + DBUG_ASSERT(table_ref->lock_type == TL_WRITE); + /* The handler truncate protocol dictates a exclusive lock. */ + DBUG_ASSERT(table_ref->mdl_request.type == MDL_EXCLUSIVE); + + /* + Before doing anything else, acquire a metadata lock on the table, + or ensure we have one. We don't use open_and_lock_tables() + right away because we want to be able to truncate (and recreate) + corrupted tables, those that we can't fully open. + + MySQL manual documents that TRUNCATE can be used to repair a + damaged table, i.e. a table that can not be fully "opened". + In particular MySQL manual says: As long as the table format + file tbl_name.frm is valid, the table can be re-created as + an empty table with TRUNCATE TABLE, even if the data or index + files have become corrupted. + */ + if (thd->locked_tables_mode) + { + if (!(table= find_table_for_mdl_upgrade(thd, table_ref->db.str, + table_ref->table_name.str, NULL))) + DBUG_RETURN(TRUE); + + versioned= table->versioned(); + hton= table->file->ht; +#ifdef WITH_WSREP + if (WSREP(thd) && + !wsrep_should_replicate_ddl(thd, hton)) + DBUG_RETURN(TRUE); +#endif + + table_ref->mdl_request.ticket= table->mdl_ticket; + } + else + { + DBUG_ASSERT(table_ref->next_global == NULL); + if (lock_table_names(thd, table_ref, NULL, + thd->variables.lock_wait_timeout, 0)) + DBUG_RETURN(TRUE); + + TABLE_SHARE *share= tdc_acquire_share(thd, table_ref, GTS_TABLE | GTS_VIEW); + if (share == NULL) + DBUG_RETURN(TRUE); + DBUG_ASSERT(share != UNUSABLE_TABLE_SHARE); + + versioned= share->versioned; + sequence= share->table_type == TABLE_TYPE_SEQUENCE; + hton= share->db_type(); +#ifdef WITH_WSREP + if (WSREP(thd) && + hton != view_pseudo_hton && + !wsrep_should_replicate_ddl(thd, hton)) + { + tdc_release_share(share); + DBUG_RETURN(TRUE); + } +#endif + + if (!versioned) + tdc_remove_referenced_share(thd, share); + else + tdc_release_share(share); + + if (hton == view_pseudo_hton) + { + my_error(ER_NO_SUCH_TABLE, MYF(0), table_ref->db.str, + table_ref->table_name.str); + DBUG_RETURN(TRUE); + } + } + + *hton_can_recreate= (!sequence && + ha_check_storage_engine_flag(hton, HTON_CAN_RECREATE)); + + if (versioned) + { + my_error(ER_VERS_NOT_SUPPORTED, MYF(0), "TRUNCATE TABLE"); + DBUG_RETURN(TRUE); + } + + /* + A storage engine can recreate or truncate the table only if there + are no references to it from anywhere, i.e. no cached TABLE in the + table cache. + */ + if (thd->locked_tables_mode) + { + DEBUG_SYNC(thd, "upgrade_lock_for_truncate"); + /* To remove the table from the cache we need an exclusive lock. */ + if (wait_while_table_is_used(thd, table, + *hton_can_recreate ? HA_EXTRA_PREPARE_FOR_DROP : HA_EXTRA_NOT_USED)) + DBUG_RETURN(TRUE); + m_ticket_downgrade= table->mdl_ticket; + /* Close if table is going to be recreated. */ + if (*hton_can_recreate) + close_all_tables_for_name(thd, table->s, HA_EXTRA_NOT_USED, NULL); + } + DBUG_RETURN(FALSE); +} + + +/* + Optimized delete of all rows by doing a full generate of the table. + + @remark Will work even if the .MYI and .MYD files are destroyed. + In other words, it works as long as the .FRM is intact and + the engine supports re-create. + + @param thd Thread context. + @param table_ref Table list element for the table to be truncated. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) +{ + int error; + bool binlog_stmt; + DBUG_ENTER("Sql_cmd_truncate_table::truncate_table"); + + DBUG_ASSERT((!table_ref->table) || + (table_ref->table && table_ref->table->s)); + + /* Initialize, or reinitialize in case of reexecution (SP). */ + m_ticket_downgrade= NULL; + + /* If it is a temporary table, no need to take locks. */ + if (is_temporary_table(table_ref)) + { + /* In RBR, the statement is not binlogged if the table is temporary. */ + binlog_stmt= !thd->is_current_stmt_binlog_format_row(); + + thd->close_unused_temporary_table_instances(table_ref); + + error= handler_truncate(thd, table_ref, TRUE); + + /* + No need to invalidate the query cache, queries with temporary + tables are not in the cache. No need to write to the binary + log a failed row-by-row delete even if under RBR as the table + might not exist on the slave. + */ + } + else /* It's not a temporary table. */ + { + bool hton_can_recreate; + +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_thd_is_local(thd)) + { + wsrep::key_array keys; + /* Do not start TOI if table is not found */ + if (!wsrep_append_fk_parent_table(thd, table_ref, &keys)) + { + if (keys.empty()) + { + if (wsrep_to_isolation_begin(thd, table_ref->db.str, table_ref->table_name.str, NULL)) + DBUG_RETURN(TRUE); + } + else + { + if (wsrep_to_isolation_begin(thd, NULL, NULL, table_ref, NULL, &keys)) + DBUG_RETURN(TRUE); + } + } + } +#endif /* WITH_WSREP */ + + if (lock_table(thd, table_ref, &hton_can_recreate)) + DBUG_RETURN(TRUE); + + /* + This is mainly here for truncate_notembedded.test, but it is still + useful to check killed after we got the lock + */ + + if (thd->killed) + DBUG_RETURN(TRUE); + + if (hton_can_recreate) + { + /* + The storage engine can truncate the table by creating an + empty table with the same structure. + */ + error= dd_recreate_table(thd, table_ref->db.str, table_ref->table_name.str); + + if (thd->locked_tables_mode && thd->locked_tables_list.reopen_tables(thd, false)) + { + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + error= 1; + } + /* No need to binlog a failed truncate-by-recreate. */ + binlog_stmt= !error; + } + else + { + /* + The engine does not support truncate-by-recreate. + Attempt to use the handler truncate method. + */ + error= handler_truncate(thd, table_ref, FALSE); + + if (error == TRUNCATE_OK && thd->locked_tables_mode && + (table_ref->table->file->ht->flags & + (HTON_REQUIRES_CLOSE_AFTER_TRUNCATE | + HTON_TRUNCATE_REQUIRES_EXCLUSIVE_USE))) + { + thd->locked_tables_list.mark_table_for_reopen(table_ref->table); + if (unlikely(thd->locked_tables_list.reopen_tables(thd, false))) + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + } + + /* + All effects of a TRUNCATE TABLE operation are committed even if + truncation fails in the case of non transactional tables. Thus, the + query must be written to the binary log. The only exception is a + unimplemented truncate method. + */ + if (unlikely(error == TRUNCATE_OK || error == TRUNCATE_FAILED_BUT_BINLOG)) + binlog_stmt= true; + else + binlog_stmt= false; + } + + /* + If we tried to open a MERGE table and failed due to problems with the + children tables, the table will have been closed and table_ref->table + will be invalid. Reset the pointer here in any case as + query_cache_invalidate does not need a valid TABLE object. + */ + table_ref->table= NULL; + query_cache_invalidate3(thd, table_ref, FALSE); + } + + /* DDL is logged in statement format, regardless of binlog format. */ + if (binlog_stmt) + error|= write_bin_log(thd, !error, thd->query(), thd->query_length()); + + /* + A locked table ticket was upgraded to a exclusive lock. After the + the query has been written to the binary log, downgrade the lock + to a shared one. + */ + if (m_ticket_downgrade) + m_ticket_downgrade->downgrade_lock(MDL_SHARED_NO_READ_WRITE); + + DBUG_RETURN(error); +} + +/** + Execute a TRUNCATE statement at runtime. + + @param thd The current thread. + + @return FALSE on success. +*/ + +bool Sql_cmd_truncate_table::execute(THD *thd) +{ + bool res= TRUE; + TABLE_LIST *table= thd->lex->first_select_lex()->table_list.first; + DBUG_ENTER("Sql_cmd_truncate_table::execute"); + + if (check_one_table_access(thd, DROP_ACL, table)) + DBUG_RETURN(res); + + if (! (res= truncate_table(thd, table))) + my_ok(thd); + + DBUG_RETURN(res); +} diff --git a/sql/sql_truncate.h b/sql/sql_truncate.h new file mode 100644 index 00000000..5704da1d --- /dev/null +++ b/sql/sql_truncate.h @@ -0,0 +1,71 @@ +#ifndef SQL_TRUNCATE_INCLUDED +#define SQL_TRUNCATE_INCLUDED +/* Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +class THD; +struct TABLE_LIST; + +/** + Sql_cmd_truncate_table represents the TRUNCATE statement. +*/ +class Sql_cmd_truncate_table : public Sql_cmd +{ +private: + /* Set if a lock must be downgraded after truncate is done. */ + MDL_ticket *m_ticket_downgrade; + +public: + /** + Constructor, used to represent a TRUNCATE statement. + */ + Sql_cmd_truncate_table() = default; + + virtual ~Sql_cmd_truncate_table() = default; + + /** + Execute a TRUNCATE statement at runtime. + @param thd the current thread. + @return false on success. + */ + bool execute(THD *thd); + + virtual enum_sql_command sql_command_code() const + { + return SQLCOM_TRUNCATE; + } + +protected: + enum truncate_result{ + TRUNCATE_OK=0, + TRUNCATE_FAILED_BUT_BINLOG, + TRUNCATE_FAILED_SKIP_BINLOG + }; + + /** Handle locking a base table for truncate. */ + bool lock_table(THD *, TABLE_LIST *, bool *); + + /** Truncate table via the handler method. */ + enum truncate_result handler_truncate(THD *, TABLE_LIST *, bool); + + /** + Optimized delete of all rows by doing a full regenerate of the table. + Depending on the storage engine, it can be accomplished through a + drop and recreate or via the handler truncate method. + */ + bool truncate_table(THD *, TABLE_LIST *); +}; + +#endif diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc new file mode 100644 index 00000000..c5a2b16a --- /dev/null +++ b/sql/sql_tvc.cc @@ -0,0 +1,1209 @@ +/* Copyright (c) 2017, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mariadb.h" +#include "sql_list.h" +#include "sql_tvc.h" +#include "sql_class.h" +#include "opt_range.h" +#include "sql_select.h" +#include "sql_explain.h" +#include "sql_parse.h" +#include "sql_cte.h" +#include "my_json_writer.h" + + +/** + @brief + Walk through all VALUES items. + @param + @param processor - the processor to call for each Item + @param walk_qubquery - if should dive into subquery items + @param argument - the argument to pass recursively + @retval + true on error + false on success +*/ +bool table_value_constr::walk_values(Item_processor processor, + bool walk_subquery, + void *argument) +{ + List_iterator_fast list_item_it(lists_of_values); + while (List_item *list= list_item_it++) + { + List_iterator_fast item_it(*list); + while (Item *item= item_it++) + { + if (item->walk(&Item::unknown_splocal_processor, false, argument)) + return true; + } + } + return false; +} + + +/** + @brief + Fix fields for TVC values + + @param + @param thd The context of the statement + @param li The iterator on the list of lists + + @details + Call fix_fields procedure for TVC values. + + @retval + true if an error was reported + false otherwise +*/ + +bool fix_fields_for_tvc(THD *thd, List_iterator_fast &li) +{ + DBUG_ENTER("fix_fields_for_tvc"); + List_item *lst; + li.rewind(); + + while ((lst= li++)) + { + List_iterator it(*lst); + Item *item; + + while ((item= it++)) + { + /* + Some items have already been fixed. + For example Item_splocal items get fixed in + Item_splocal::append_for_log(), which is called from subst_spvars() + while replacing their values to NAME_CONST()s. + So fix only those that have not been. + */ + if (item->fix_fields_if_needed_for_scalar(thd, it.ref()) || + item->check_is_evaluable_expression_or_error()) + DBUG_RETURN(true); + } + } + DBUG_RETURN(false); +} + + +/** + @brief + Defines types of matrix columns elements where matrix rows are defined by + some lists of values. + + @param + @param thd The context of the statement + @param li The iterator on the list of lists + @param holders The structure where types of matrix columns are stored + @param first_list_el_count Count of the list values. It should be the same + for each list of lists elements. It contains + number of elements of the first list from list of + lists. + + @details + For each list list_a from list of lists the procedure gets its elements + types and aggregates them with the previous ones stored in holders. If + list_a is the first one in the list of lists its elements types are put in + holders. The errors can be reported when count of list_a elements is + different from the first_list_el_count. Also error can be reported whe + n aggregation can't be made. + + @retval + true if an error was reported + false otherwise +*/ + +bool join_type_handlers_for_tvc(THD *thd, List_iterator_fast &li, + Type_holder *holders, uint first_list_el_count) +{ + DBUG_ENTER("join_type_handlers_for_tvc"); + List_item *lst; + li.rewind(); + bool first= true; + + while ((lst= li++)) + { + List_iterator_fast it(*lst); + Item *item; + + if (first_list_el_count != lst->elements) + { + my_message(ER_WRONG_NUMBER_OF_VALUES_IN_TVC, + ER_THD(thd, ER_WRONG_NUMBER_OF_VALUES_IN_TVC), + MYF(0)); + DBUG_RETURN(true); + } + for (uint pos= 0; (item=it++); pos++) + { + const Type_handler *item_type_handler= item->real_type_handler(); + if (first) + holders[pos].set_handler(item_type_handler); + else if (holders[pos].aggregate_for_result(item_type_handler)) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + holders[pos].type_handler()->name().ptr(), + item_type_handler->name().ptr(), + "TABLE VALUE CONSTRUCTOR"); + DBUG_RETURN(true); + } + } + first= false; + } + DBUG_RETURN(false); +} + + +/** + @brief + Define attributes of matrix columns elements where matrix rows are defined + by some lists of values. + + @param + @param thd The context of the statement + @param li The iterator on the list of lists + @param holders The structure where names of matrix columns are stored + @param count_of_lists Count of list of lists elements + @param first_list_el_count Count of the list values. It should be the same + for each list of lists elements. It contains + number of elements of the first list from list + of lists. + + @details + For each list list_a from list of lists the procedure gets its elements + attributes and aggregates them with the previous ones stored in holders. + The errors can be reported when aggregation can't be made. + + @retval + true if an error was reported + false otherwise +*/ + +bool get_type_attributes_for_tvc(THD *thd, + List_iterator_fast &li, + Type_holder *holders, uint count_of_lists, + uint first_list_el_count) +{ + DBUG_ENTER("get_type_attributes_for_tvc"); + List_item *lst; + li.rewind(); + + for (uint pos= 0; pos < first_list_el_count; pos++) + { + if (holders[pos].alloc_arguments(thd, count_of_lists)) + DBUG_RETURN(true); + } + + while ((lst= li++)) + { + List_iterator_fast it(*lst); + Item *item; + for (uint holder_pos= 0 ; (item= it++); holder_pos++) + { + DBUG_ASSERT(item->fixed()); + holders[holder_pos].add_argument(item); + } + } + + for (uint pos= 0; pos < first_list_el_count; pos++) + { + if (holders[pos].aggregate_attributes(thd)) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/** + @brief + Prepare of TVC + + @param + @param thd The context of the statement + @param sl The select where this TVC is defined + @param tmp_result Structure that contains the information + about where to send the result of the query + @param unit_arg The union where sl is defined + + @details + Gets types and attributes of values of this TVC that will be used + for temporary table creation for this TVC. It creates Item_type_holders + for each element of the first list from list of lists (VALUES from tvc), + using its elements name, defined type and attribute. + + @retval + true if an error was reported + false otherwise +*/ + +bool table_value_constr::prepare(THD *thd, SELECT_LEX *sl, + select_result *tmp_result, + st_select_lex_unit *unit_arg) +{ + DBUG_ENTER("table_value_constr::prepare"); + select_lex->in_tvc= true; + List_iterator_fast li(lists_of_values); + + List_item *first_elem= li++; + uint cnt= first_elem->elements; + Type_holder *holders= type_holders; + + if (cnt == 0) + { + my_error(ER_EMPTY_ROW_IN_TVC, MYF(0)); + DBUG_RETURN(true); + } + + if (fix_fields_for_tvc(thd, li)) + DBUG_RETURN(true); + + if (!holders) + { + holders= type_holders= new (thd->stmt_arena->mem_root) Type_holder[cnt]; + if (!holders || + join_type_handlers_for_tvc(thd, li, holders, cnt) || + get_type_attributes_for_tvc(thd, li, holders, + lists_of_values.elements, cnt)) + DBUG_RETURN(true); + List_iterator_fast it(*first_elem); + Item *item; + Query_arena *arena, backup; + arena=thd->activate_stmt_arena_if_needed(&backup); + + sl->item_list.empty(); + for (uint pos= 0; (item= it++); pos++) + { + /* Error's in 'new' will be detected after loop */ + Item_type_holder *new_holder= new (thd->mem_root) + Item_type_holder(thd, item, holders[pos].type_handler(), + &holders[pos]/*Type_all_attributes*/, + holders[pos].get_maybe_null()); + sl->item_list.push_back(new_holder); + } + if (arena) + thd->restore_active_arena(arena, &backup); + + if (unlikely(thd->is_fatal_error)) + DBUG_RETURN(true); // out of memory + } + + result= tmp_result; + + if (result && result->prepare(sl->item_list, unit_arg)) + DBUG_RETURN(true); + + /* + setup_order() for a TVC is not called when the following is true + (thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) + */ + + thd->where="order clause"; + ORDER *order= sl->order_list.first; + for (; order; order=order->next) + { + Item *order_item= *order->item; + if (order_item->is_order_clause_position()) + { + uint count= 0; + if (order->counter_used) + count= order->counter; // counter was once resolved + else + count= (uint) order_item->val_int(); + if (!count || count > first_elem->elements) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), + order_item->full_name(), thd->where); + DBUG_RETURN(true); + } + order->in_field_list= 1; + order->counter= count; + order->counter_used= 1; + } + } + + select_lex->in_tvc= false; + DBUG_RETURN(false); +} + + +/** + Save Query Plan Footprint +*/ + +int table_value_constr::save_explain_data_intern(THD *thd, + Explain_query *output) +{ + const char *message= "No tables used"; + DBUG_ENTER("table_value_constr::save_explain_data_intern"); + DBUG_PRINT("info", ("Select %p, type %s, message %s", + select_lex, select_lex->type, + message)); + DBUG_ASSERT(have_query_plan == QEP_AVAILABLE); + + /* There should be no attempts to save query plans for merged selects */ + DBUG_ASSERT(!select_lex->master_unit()->derived || + select_lex->master_unit()->derived->is_materialized_derived() || + select_lex->master_unit()->derived->is_with_table()); + + explain= new (output->mem_root) Explain_select(output->mem_root, + thd->lex->analyze_stmt); + if (!explain) + DBUG_RETURN(1); + + select_lex->set_explain_type(true); + + explain->select_id= select_lex->select_number; + explain->select_type= select_lex->type; + explain->linkage= select_lex->get_linkage(); + explain->using_temporary= false; + explain->using_filesort= false; + /* Setting explain->message means that all other members are invalid */ + explain->message= message; + + if (select_lex->master_unit()->derived) + explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + + for (SELECT_LEX_UNIT *unit= select_lex->first_inner_unit(); + unit; + unit= unit->next_unit()) + { + explain->add_child(unit->first_select()->select_number); + } + + output->add_node(explain); + + if (select_lex->is_top_level_node()) + output->query_plan_ready(); + + DBUG_RETURN(0); +} + + +/** + Optimization of TVC +*/ + +bool table_value_constr::optimize(THD *thd) +{ + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + have_query_plan= QEP_AVAILABLE; + + if (select_lex->select_number != FAKE_SELECT_LEX_ID && + have_query_plan != QEP_NOT_PRESENT_YET && + thd->lex->explain && // for "SET" command in SPs. + (!thd->lex->explain->get_select(select_lex->select_number))) + { + if (save_explain_data_intern(thd, thd->lex->explain)) + return true; + } + + if (select_lex->optimize_unflattened_subqueries(true)) + return true; + + return false; +} + + +/** + Execute of TVC +*/ + +bool table_value_constr::exec(SELECT_LEX *sl) +{ + DBUG_ENTER("table_value_constr::exec"); + List_iterator_fast li(lists_of_values); + List_item *elem; + THD *cur_thd= sl->parent_lex->thd; + ha_rows send_records= 0; + int rc=0; + + if (select_options & SELECT_DESCRIBE) + DBUG_RETURN(false); + + if (result->send_result_set_metadata(sl->item_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + { + DBUG_RETURN(true); + } + + fix_rownum_pointers(sl->parent_lex->thd, sl, &send_records); + + while ((elem= li++)) + { + cur_thd->get_stmt_da()->inc_current_row_for_warning(); + if (send_records >= sl->master_unit()->lim.get_select_limit()) + break; + rc= result->send_data_with_check(*elem, sl->master_unit(), send_records); + if (!rc) + send_records++; + else if (rc > 0) + DBUG_RETURN(true); + } + + if (result->send_eof()) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +/** + @brief + Print list + + @param str The reference on the string representation of the list + @param list The list that needed to be print + @param query_type The mode of printing + + @details + The method saves a string representation of list in the + string str. +*/ + +void print_list_item(String *str, List_item *list, + enum_query_type query_type) +{ + bool is_first_elem= true; + List_iterator_fast it(*list); + Item *item; + + str->append('('); + + while ((item= it++)) + { + if (is_first_elem) + is_first_elem= false; + else + str->append(','); + + item->print(str, query_type); + } + + str->append(')'); +} + + +/** + @brief + Print this TVC + + @param thd The context of the statement + @param str The reference on the string representation of this TVC + @param query_type The mode of printing + + @details + The method saves a string representation of this TVC in the + string str. +*/ + +void table_value_constr::print(THD *thd, String *str, + enum_query_type query_type) +{ + DBUG_ASSERT(thd); + + str->append(STRING_WITH_LEN("values ")); + + bool is_first_elem= true; + List_iterator_fast li(lists_of_values); + List_item *list; + + while ((list= li++)) + { + if (is_first_elem) + is_first_elem= false; + else + str->append(','); + + print_list_item(str, list, query_type); + } + if (select_lex->order_list.elements) + { + str->append(STRING_WITH_LEN(" order by ")); + select_lex->print_order(str, select_lex->order_list.first, query_type); + } + select_lex->print_limit(thd, str, query_type); +} + + +/** + @brief + Create list of lists for TVC from the list of this IN predicate + + @param thd The context of the statement + @param values TVC list of values + + @details + The method uses the list of values of this IN predicate to build + an equivalent list of values that can be used in TVC. + + E.g.: + + = 5,2,7 + = (5),(2),(7) + + = (5,2),(7,1) + = (5,2),(7,1) + + @retval + false if the method succeeds + true otherwise +*/ + +bool Item_func_in::create_value_list_for_tvc(THD *thd, + List< List > *values) +{ + bool is_list_of_rows= args[1]->type() == Item::ROW_ITEM; + + for (uint i=1; i < arg_count; i++) + { + char col_name[8]; + List *tvc_value; + if (!(tvc_value= new (thd->mem_root) List())) + return true; + + if (is_list_of_rows) + { + Item_row *row_list= (Item_row *)(args[i]); + + if (!row_list) + return true; + + for (uint j=0; j < row_list->cols(); j++) + { + if (i == 1) + { + sprintf(col_name, "_col_%i", j+1); + row_list->element_index(j)->set_name(thd, col_name, strlen(col_name), + thd->charset()); + } + if (tvc_value->push_back(row_list->element_index(j), + thd->mem_root)) + return true; + } + } + else + { + if (i == 1) + { + sprintf(col_name, "_col_%i", 1); + args[i]->set_name(thd, col_name, strlen(col_name), thd->charset()); + } + if (tvc_value->push_back(args[i])) + return true; + } + + if (values->push_back(tvc_value, thd->mem_root)) + return true; + } + return false; +} + + +/** + @brief + Create name for the derived table defined by TVC + + @param thd The context of the statement + @param parent_select The SELECT where derived table is used + @param alias The returned created name + + @details + Create name for the derived table using current TVC number + for this parent_select stored in parent_select + + @retval + true if creation fails + false otherwise +*/ + +static bool create_tvc_name(THD *thd, st_select_lex *parent_select, + LEX_CSTRING *alias) +{ + char buff[6]; + + alias->length= my_snprintf(buff, sizeof(buff), + "tvc_%u", + parent_select ? parent_select->curr_tvc_name : 0); + alias->str= thd->strmake(buff, alias->length); + if (!alias->str) + return true; + + return false; +} + + +/** + @brief + Check whether TVC used in unit is to be wrapped into select + + @details + TVC used in unit that contains more than one members is to be wrapped + into select if it is tailed with ORDER BY ... LIMIT n [OFFSET m] + + @retval + true if TVC is to be wrapped + false otherwise +*/ + +bool table_value_constr::to_be_wrapped_as_with_tail() +{ + return select_lex->master_unit()->first_select()->next_select() && + select_lex->order_list.elements && + select_lex->limit_params.explicit_limit; +} + + +/** + @brief + Wrap table value constructor into a select + + @param thd The context handler + @param tvc_sl The TVC to wrap + @parent_select The parent select if tvc_sl used in a subquery + + @details + The function wraps the TVC tvc_sl into a select: + the function transforms the TVC of the form VALUES (v1), ... (vn) into + the select of the form + SELECT * FROM (VALUES (v1), ... (vn)) tvc_x + + @retval pointer to the result of of the transformation if successful + NULL - otherwise +*/ + +static +st_select_lex *wrap_tvc(THD *thd, st_select_lex *tvc_sl, + st_select_lex *parent_select) +{ + LEX *lex= thd->lex; + select_result *save_result= lex->result; + uint8 save_derived_tables= lex->derived_tables; + thd->lex->result= NULL; + + Query_arena backup; + Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup); + + Item *item; + SELECT_LEX *wrapper_sl; + SELECT_LEX_UNIT *derived_unit; + + /* + Create SELECT_LEX wrapper_sl of the select used in the result + of the transformation + */ + if (!(wrapper_sl= new (thd->mem_root) SELECT_LEX())) + goto err; + wrapper_sl->select_number= ++thd->lex->stmt_lex->current_select_number; + wrapper_sl->parent_lex= lex; /* Used in init_query. */ + wrapper_sl->init_query(); + wrapper_sl->init_select(); + wrapper_sl->is_tvc_wrapper= true; + + wrapper_sl->nest_level= tvc_sl->nest_level; + wrapper_sl->parsing_place= tvc_sl->parsing_place; + wrapper_sl->set_linkage(tvc_sl->get_linkage()); + wrapper_sl->exclude_from_table_unique_test= + tvc_sl->exclude_from_table_unique_test; + + lex->current_select= wrapper_sl; + item= new (thd->mem_root) Item_field(thd, &wrapper_sl->context, + star_clex_str); + if (item == NULL || add_item_to_list(thd, item)) + goto err; + (wrapper_sl->with_wild)++; + + /* Include the newly created select into the global list of selects */ + wrapper_sl->include_global((st_select_lex_node**)&lex->all_selects_list); + + /* Substitute select node used of TVC for the newly created select */ + tvc_sl->substitute_in_tree(wrapper_sl); + + /* + Create a unit for the substituted select used for TVC and attach it + to the the wrapper select wrapper_sl as the only unit. The created + unit is the unit for the derived table tvc_x of the transformation. + */ + if (!(derived_unit= new (thd->mem_root) SELECT_LEX_UNIT())) + goto err; + derived_unit->init_query(); + derived_unit->thd= thd; + derived_unit->include_down(wrapper_sl); + + /* + Attach the select used of TVC as the only slave to the unit for + the derived table tvc_x of the transformation + */ + derived_unit->attach_single(tvc_sl); + tvc_sl->set_linkage(DERIVED_TABLE_TYPE); + + /* + Generate the name of the derived table created for TVC and + add it to the FROM list of the wrapping select + */ + Table_ident *ti; + LEX_CSTRING alias; + TABLE_LIST *derived_tab; + if (!(ti= new (thd->mem_root) Table_ident(derived_unit)) || + create_tvc_name(thd, parent_select, &alias)) + goto err; + if (!(derived_tab= + wrapper_sl->add_table_to_list(thd, + ti, &alias, 0, + TL_READ, MDL_SHARED_READ))) + goto err; + wrapper_sl->add_joined_table(derived_tab); + wrapper_sl->add_where_field(derived_unit->first_select()); + wrapper_sl->context.table_list= wrapper_sl->table_list.first; + wrapper_sl->context.first_name_resolution_table= wrapper_sl->table_list.first; + wrapper_sl->table_list.first->derived_type= DTYPE_TABLE | DTYPE_MATERIALIZE; + lex->derived_tables|= DERIVED_SUBQUERY; + + if (arena) + thd->restore_active_arena(arena, &backup); + lex->result= save_result; + return wrapper_sl; + +err: + if (arena) + thd->restore_active_arena(arena, &backup); + lex->result= save_result; + lex->derived_tables= save_derived_tables; + return 0; +} + + +/** + @brief + Wrap TVC with ORDER BY ... LIMIT tail into a select + + @param thd The context handler + @param tvc_sl The TVC to wrap + + @details + The function wraps the TVC tvc_sl into a select: + the function transforms the TVC with tail of the form + VALUES (v1), ... (vn) ORDER BY ... LIMIT n [OFFSET m] + into the select with the same tail of the form + SELECT * FROM (VALUES (v1), ... (vn)) tvc_x + ORDER BY ... LIMIT n [OFFSET m] + + @retval pointer to the result of of the transformation if successful + NULL - otherwise +*/ + +st_select_lex *wrap_tvc_with_tail(THD *thd, st_select_lex *tvc_sl) +{ + st_select_lex *wrapper_sl= wrap_tvc(thd, tvc_sl, NULL); + if (!wrapper_sl) + return NULL; + + wrapper_sl->order_list= tvc_sl->order_list; + wrapper_sl->limit_params= tvc_sl->limit_params; + wrapper_sl->braces= tvc_sl->braces; + tvc_sl->order_list.empty(); + tvc_sl->limit_params.clear(); + tvc_sl->braces= 0; + if (tvc_sl->select_number == 1) + { + tvc_sl->select_number= wrapper_sl->select_number; + wrapper_sl->select_number= 1; + } + if (tvc_sl->master_unit()->union_distinct == tvc_sl) + { + wrapper_sl->master_unit()->union_distinct= wrapper_sl; + } + wrapper_sl->distinct= tvc_sl->distinct; + thd->lex->current_select= wrapper_sl; + return wrapper_sl; +} + + +/** + @brief + Wrap TVC in a subselect into a select + + @param thd The context handler + @param tvc_sl The TVC to wrap + + @details + The function wraps the TVC tvc_sl used in a subselect into a select + the function transforms the TVC of the form VALUES (v1), ... (vn) + into the select the form + SELECT * FROM (VALUES (v1), ... (vn)) tvc_x + and replaces the subselect with the result of the transformation. + + @retval wrapping select if successful + 0 otherwise +*/ + +st_select_lex * +Item_subselect::wrap_tvc_into_select(THD *thd, st_select_lex *tvc_sl) +{ + LEX *lex= thd->lex; + /* SELECT_LEX object where the transformation is performed */ + SELECT_LEX *parent_select= lex->current_select; + SELECT_LEX *wrapper_sl= wrap_tvc(thd, tvc_sl, parent_select); + if (wrapper_sl) + { + if (engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE) + ((subselect_single_select_engine *) engine)->change_select(wrapper_sl); + } + lex->current_select= parent_select; + return wrapper_sl; +} + + +/* + @brief + Check whether the items are of comparable type or not + + @details + This check are done because materialization is not performed + if the left expr and right expr are of the same types. + @see subquery_types_allow_materialization() + + @retval + 0 comparable + 1 not comparable +*/ + +static bool cmp_row_types(Item* item1, Item* item2) +{ + uint n= item1->cols(); + if (item2->check_cols(n)) + return true; + + for (uint i=0; i < n; i++) + { + Item *inner= item1->element_index(i); + Item *outer= item2->element_index(i); + if (!inner->type_handler()->subquery_type_allows_materialization(inner, + outer, + true)) + return true; + } + return false; +} + + +/** + @brief + Transform IN predicate into IN subquery + + @param thd The context of the statement + @param arg Not used + + @details + The method transforms this IN predicate into in equivalent IN subquery: + + IN () + => + IN (SELECT * FROM (VALUES ) AS tvc_#) + + E.g.: + + = 5,2,7 + = (5),(2),(7) + + = (5,2),(7,1) + = (5,2),(7,1) + + If the transformation succeeds the method returns the result IN subquery, + otherwise this IN predicate is returned. + + @retval + pointer to the result of transformation if succeeded + pointer to this IN predicate otherwise +*/ + +Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd, + uchar *arg) +{ + if (!transform_into_subq) + return this; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_conv(thd, "in_to_subquery_conversion"); + trace_conv.add("item", this); + + List values; + + LEX *lex= thd->lex; + /* SELECT_LEX object where the transformation is performed */ + SELECT_LEX *parent_select= lex->current_select; + uint8 save_derived_tables= lex->derived_tables; + + /* + Make sure that create_tmp_table will not fail due to too long keys. + Here the strategy would mainly use materialization, so we need to make + sure that the materialized table can be created. + + The checks here are the same as in subquery_type_allows_materialization() + */ + uint32 length= max_length_of_left_expr(); + if (!length || length > tmp_table_max_key_length() || + args[0]->cols() > tmp_table_max_key_parts()) + { + trace_conv.add("done", false); + trace_conv.add("reason", "key is too long"); + return this; + } + + for (uint i=1; i < arg_count; i++) + { + if (!args[i]->const_item()) + { + trace_conv.add("done", false); + trace_conv.add("reason", "non-constant element in the IN-list"); + return this; + } + + if (cmp_row_types(args[i], args[0])) + { + trace_conv.add("done", false); + trace_conv.add("reason", "type mismatch"); + return this; + } + } + Json_writer_array trace_nested_obj(thd, "conversion"); + + Query_arena backup; + Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup); + + /* + Create SELECT_LEX of the subquery SQ used in the result of transformation + */ + if (mysql_new_select(lex, 1, NULL)) + goto err; + lex->init_select(); + /* Create item list as '*' for the subquery SQ */ + Item *item; + SELECT_LEX *sq_select; // select for IN subquery; + sq_select= lex->current_select; + sq_select->parsing_place= SELECT_LIST; + item= new (thd->mem_root) Item_field(thd, &sq_select->context, + star_clex_str); + if (item == NULL || add_item_to_list(thd, item)) + goto err; + (sq_select->with_wild)++; + /* + Create derived table DT that will wrap TVC in the result of transformation + */ + SELECT_LEX *tvc_select; // select for tvc + SELECT_LEX_UNIT *derived_unit; // unit for tvc_select + if (mysql_new_select(lex, 1, NULL)) + goto err; + lex->init_select(); + tvc_select= lex->current_select; + derived_unit= tvc_select->master_unit(); + tvc_select->set_linkage(DERIVED_TABLE_TYPE); + + /* Create TVC used in the transformation */ + if (create_value_list_for_tvc(thd, &values)) + goto err; + if (!(tvc_select->tvc= + new (thd->mem_root) + table_value_constr(values, + tvc_select, + tvc_select->options))) + goto err; + + lex->current_select= sq_select; + + /* + Create the name of the wrapping derived table and + add it to the FROM list of the subquery SQ + */ + Table_ident *ti; + LEX_CSTRING alias; + TABLE_LIST *derived_tab; + if (!(ti= new (thd->mem_root) Table_ident(derived_unit)) || + create_tvc_name(thd, parent_select, &alias)) + goto err; + if (!(derived_tab= + sq_select->add_table_to_list(thd, + ti, &alias, 0, + TL_READ, MDL_SHARED_READ))) + goto err; + sq_select->add_joined_table(derived_tab); + sq_select->add_where_field(derived_unit->first_select()); + sq_select->context.table_list= sq_select->table_list.first; + sq_select->context.first_name_resolution_table= sq_select->table_list.first; + sq_select->table_list.first->derived_type= DTYPE_TABLE | DTYPE_MATERIALIZE; + lex->derived_tables|= DERIVED_SUBQUERY; + + sq_select->where= 0; + sq_select->set_braces(false); + derived_unit->set_with_clause(0); + + /* Create IN subquery predicate */ + sq_select->parsing_place= parent_select->parsing_place; + Item_in_subselect *in_subs; + Item *sq; + if (!(in_subs= + new (thd->mem_root) Item_in_subselect(thd, args[0], sq_select))) + goto err; + in_subs->converted_from_in_predicate= TRUE; + sq= in_subs; + if (negated) + sq= negate_expression(thd, in_subs); + else + in_subs->emb_on_expr_nest= emb_on_expr_nest; + + if (arena) + thd->restore_active_arena(arena, &backup); + thd->lex->current_select= parent_select; + + if (sq->fix_fields(thd, (Item **)&sq)) + goto err; + + parent_select->curr_tvc_name++; + + return sq; + +err: + if (arena) + thd->restore_active_arena(arena, &backup); + lex->derived_tables= save_derived_tables; + thd->lex->current_select= parent_select; + return NULL; +} + + +uint32 Item_func_in::max_length_of_left_expr() +{ + uint n= args[0]->cols(); + uint32 length= 0; + for (uint i=0; i < n; i++) + length+= args[0]->element_index(i)->max_length; + return length; +} + + +/** + @brief + Check if this IN-predicate can be transformed in IN-subquery + with TVC + + @param thd The context of the statement + + @details + Compare the number of elements in the list of + values in this IN-predicate with the + in_subquery_conversion_threshold special variable + + @retval + true if transformation can be made + false otherwise +*/ + +bool Item_func_in::to_be_transformed_into_in_subq(THD *thd) +{ + bool is_row_list= args[1]->type() == Item::ROW_ITEM; + uint values_count= arg_count-1; + + if (is_row_list) + values_count*= ((Item_row *)(args[1]))->cols(); + + if (thd->variables.in_subquery_conversion_threshold == 0 || + thd->variables.in_subquery_conversion_threshold > values_count) + return false; + + if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE)) + return true; + + /* Occurence of '?' in IN list is checked only for PREPARE commands */ + for (uint i=1; i < arg_count; i++) + { + if (!is_row_list) + { + if (args[i]->type() == Item::PARAM_ITEM) + return false; + } + else + { + Item_row *row_list= (Item_row *)(args[i]); + for (uint j=0; j < row_list->cols(); j++) + { + if (row_list->element_index(j)->type() == Item::PARAM_ITEM) + return false; + } + } + } + + return true; +} + + +/** + @brief + Transform IN predicates into IN subqueries in WHERE and ON expressions + + @param thd The context of the statement + + @details + For each IN predicate from AND parts of the WHERE condition and/or + ON expressions of the SELECT for this join the method performs + the intransformation into an equivalent IN sunquery if it's needed. + + @retval + false always +*/ + +bool JOIN::transform_in_predicates_into_in_subq(THD *thd) +{ + DBUG_ENTER("JOIN::transform_in_predicates_into_in_subq"); + if (!select_lex->in_funcs.elements) + DBUG_RETURN(false); + + SELECT_LEX *save_current_select= thd->lex->current_select; + enum_parsing_place save_parsing_place= select_lex->parsing_place; + thd->lex->current_select= select_lex; + if (conds) + { + select_lex->parsing_place= IN_WHERE; + conds= + conds->top_level_transform(thd, + &Item::in_predicate_to_in_subs_transformer, 0); + if (!conds) + DBUG_RETURN(true); + select_lex->where= conds; + } + + if (join_list) + { + TABLE_LIST *table; + List_iterator li(*join_list); + select_lex->parsing_place= IN_ON; + + while ((table= li++)) + { + if (table->on_expr) + { + table->on_expr= + table->on_expr->top_level_transform(thd, + &Item::in_predicate_to_in_subs_transformer, 0); + if (!table->on_expr) + DBUG_RETURN(true); + } + } + } + + select_lex->in_funcs.empty(); + select_lex->parsing_place= save_parsing_place; + thd->lex->current_select= save_current_select; + DBUG_RETURN(false); +} diff --git a/sql/sql_tvc.h b/sql/sql_tvc.h new file mode 100644 index 00000000..710557f7 --- /dev/null +++ b/sql/sql_tvc.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2017, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SQL_TVC_INCLUDED +#define SQL_TVC_INCLUDED +#include "sql_type.h" + +typedef List List_item; +typedef bool (Item::*Item_processor) (void *arg); +class select_result; +class Explain_select; +class Explain_query; +class Item_func_in; +class st_select_lex_unit; +typedef class st_select_lex SELECT_LEX; +class Type_holder; + +/** + @class table_value_constr + @brief Definition of a Table Value Construction(TVC) + + It contains a list of lists of values which this TVC is defined by and + reference on SELECT where this TVC is defined. +*/ +class table_value_constr : public Sql_alloc +{ +public: + List lists_of_values; + select_result *result; + SELECT_LEX *select_lex; + Type_holder *type_holders; + + enum { QEP_NOT_PRESENT_YET, QEP_AVAILABLE} have_query_plan; + + Explain_select *explain; + ulonglong select_options; + + table_value_constr(List tvc_values, SELECT_LEX *sl, + ulonglong select_options_arg) : + lists_of_values(tvc_values), result(0), select_lex(sl), type_holders(0), + have_query_plan(QEP_NOT_PRESENT_YET), explain(0), + select_options(select_options_arg) + { }; + + ha_rows get_records() { return lists_of_values.elements; } + + bool prepare(THD *thd_arg, SELECT_LEX *sl, + select_result *tmp_result, + st_select_lex_unit *unit_arg); + + bool to_be_wrapped_as_with_tail(); + + int save_explain_data_intern(THD *thd_arg, + Explain_query *output); + bool optimize(THD *thd_arg); + bool exec(SELECT_LEX *sl); + + void print(THD *thd_arg, String *str, enum_query_type query_type); + bool walk_values(Item_processor processor, bool walk_subquery, void *arg); +}; + +st_select_lex *wrap_tvc_with_tail(THD *thd, st_select_lex *tvc_sl); + +#endif /* SQL_TVC_INCLUDED */ diff --git a/sql/sql_type.cc b/sql/sql_type.cc new file mode 100644 index 00000000..277f495a --- /dev/null +++ b/sql/sql_type.cc @@ -0,0 +1,9457 @@ +/* + Copyright (c) 2015, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "sql_type.h" +#include "sql_type_geom.h" +#include "sql_const.h" +#include "sql_class.h" +#include "sql_time.h" +#include "sql_string.h" +#include "item.h" +#include "log.h" +#include "tztime.h" +#include + + +const DTCollation &DTCollation_numeric::singleton() +{ + static const DTCollation_numeric tmp; + return tmp; +} + +Named_type_handler type_handler_row("row"); + +Named_type_handler type_handler_null("null"); + +Named_type_handler type_handler_bool("boolean"); +Named_type_handler type_handler_stiny("tinyint"); +Named_type_handler type_handler_sshort("smallint"); +Named_type_handler type_handler_slong("int"); +Named_type_handler type_handler_sint24("mediumint"); +Named_type_handler type_handler_slonglong("bigint"); +Named_type_handler type_handler_utiny("tiny unsigned"); +Named_type_handler type_handler_ushort("smallint unsigned"); +Named_type_handler type_handler_ulong("int unsigned"); +Named_type_handler type_handler_uint24("mediumint unsigned"); +Named_type_handler type_handler_ulonglong("bigint unsigned"); +Named_type_handler type_handler_vers_trx_id("bigint unsigned"); +Named_type_handler type_handler_float("float"); +Named_type_handler type_handler_double("double"); +Named_type_handler type_handler_bit("bit"); + +Named_type_handler type_handler_olddecimal("decimal"); +Named_type_handler type_handler_newdecimal("decimal"); + +Named_type_handler type_handler_year("year"); +Named_type_handler type_handler_year2("year"); +Named_type_handler type_handler_time("time"); +Named_type_handler type_handler_date("date"); +Named_type_handler type_handler_timestamp("timestamp"); +Named_type_handler type_handler_timestamp2("timestamp"); +Named_type_handler type_handler_datetime("datetime"); +Named_type_handler type_handler_time2("time"); +Named_type_handler type_handler_newdate("date"); +Named_type_handler type_handler_datetime2("datetime"); + +Named_type_handler type_handler_enum("enum"); +Named_type_handler type_handler_set("set"); + +Named_type_handler type_handler_string("char"); +Named_type_handler type_handler_var_string("varchar"); +Named_type_handler type_handler_varchar("varchar"); +Named_type_handler type_handler_hex_hybrid("hex_hybrid"); +Named_type_handler type_handler_varchar_compressed("varchar"); + +Named_type_handler type_handler_tiny_blob("tinyblob"); +Named_type_handler type_handler_medium_blob("mediumblob"); +Named_type_handler type_handler_long_blob("longblob"); +Named_type_handler type_handler_blob("blob"); +Named_type_handler type_handler_blob_compressed("blob"); + +Type_handler_interval_DDhhmmssff type_handler_interval_DDhhmmssff; + +Vers_type_timestamp vers_type_timestamp; +Vers_type_trx vers_type_trx; + +/***************************************************************************/ + + + +class Type_collection_std: public Type_collection +{ +public: + const Type_handler *aggregate_for_result(const Type_handler *a, + const Type_handler *b) + const override + { + return Type_handler::aggregate_for_result_traditional(a, b); + } + const Type_handler *aggregate_for_comparison(const Type_handler *a, + const Type_handler *b) + const override; + const Type_handler *aggregate_for_min_max(const Type_handler *a, + const Type_handler *b) + const override; + const Type_handler *aggregate_for_num_op(const Type_handler *a, + const Type_handler *b) + const override; +}; + + +static Type_collection_std type_collection_std; + +const Type_collection *Type_handler::type_collection() const +{ + return &type_collection_std; +} + + +bool Type_handler::is_traditional_scalar_type() const +{ + return type_collection() == &type_collection_std; +} + + +class Type_collection_row: public Type_collection +{ +public: + bool init(Type_handler_data *data) override + { + return false; + } + const Type_handler *aggregate_for_result(const Type_handler *a, + const Type_handler *b) + const override + { + return NULL; + } + const Type_handler *aggregate_for_comparison(const Type_handler *a, + const Type_handler *b) + const override + { + DBUG_ASSERT(a == &type_handler_row); + DBUG_ASSERT(b == &type_handler_row); + return &type_handler_row; + } + const Type_handler *aggregate_for_min_max(const Type_handler *a, + const Type_handler *b) + const override + { + return NULL; + } + const Type_handler *aggregate_for_num_op(const Type_handler *a, + const Type_handler *b) + const override + { + return NULL; + } +}; + + +static Type_collection_row type_collection_row; + +const Type_collection *Type_handler_row::type_collection() const +{ + return &type_collection_row; +} + + +bool Type_handler_data::init() +{ +#ifdef HAVE_SPATIAL + return type_collection_geometry.init(this); +#endif + return false; +} + + +Schema *Type_handler::schema() const +{ + return &mariadb_schema; +} + + +const Type_handler * +Type_handler::handler_by_name(THD *thd, const LEX_CSTRING &name) +{ + plugin_ref plugin; + if ((plugin= my_plugin_lock_by_name(thd, &name, MariaDB_DATA_TYPE_PLUGIN))) + { + /* + Data type plugins do not maintain ref_count yet. + For now we have only mandatory built-in plugins + and dynamic plugins for test purposes. + It should be safe to unlock the plugin immediately. + */ + const Type_handler *ph= reinterpret_cast + (plugin_decl(plugin)->info)->type_handler; + plugin_unlock(thd, plugin); + return ph; + } + +#ifdef HAVE_SPATIAL + const Type_handler *ha= Type_collection_geometry_handler_by_name(name); + if (ha) + return ha; +#endif + return NULL; +} + + +#ifndef DBUG_OFF +static const Type_handler *frm_data_type_info_emulate(const LEX_CSTRING &name) +{ + if (Name(STRING_WITH_LEN("xchar")).eq(name)) + return &type_handler_string; + if (Name(STRING_WITH_LEN("xblob")).eq(name)) + return &type_handler_blob; + return NULL; +} +#endif + + +const Type_handler * +Type_handler::handler_by_name_or_error(THD *thd, const LEX_CSTRING &name) +{ + const Type_handler *h= handler_by_name(thd, name); + DBUG_EXECUTE_IF("emulate_handler_by_name_or_error_failure", h= NULL;); + if (!h) + { + DBUG_EXECUTE_IF("frm_data_type_info_emulate", + if ((h= frm_data_type_info_emulate(name))) + return h; + ); + my_error(ER_UNKNOWN_DATA_TYPE, MYF(0), + ErrConvString(name.str, name.length, system_charset_info).ptr()); + } + return h; +} + + +Type_handler_data *type_handler_data= NULL; + + +bool Float::to_string(String *val_buffer, uint dec) const +{ + uint to_length= 70; + if (val_buffer->alloc(to_length)) + return true; + + char *to=(char*) val_buffer->ptr(); + size_t len; + + if (dec >= FLOATING_POINT_DECIMALS) + len= my_gcvt(m_value, MY_GCVT_ARG_FLOAT, to_length - 1, to, NULL); + else + { + /* + We are safe here because the buffer length is 70, and + fabs(float) < 10^39, dec < FLOATING_POINT_DECIMALS. So the resulting string + will be not longer than 69 chars + terminating '\0'. + */ + len= my_fcvt(m_value, (int) dec, to, NULL); + } + val_buffer->length((uint) len); + val_buffer->set_charset(&my_charset_numeric); + return false; +} + + +String_ptr::String_ptr(Item *item, String *buffer) + :m_string_ptr(item->val_str(buffer)) +{ } + + +Ascii_ptr::Ascii_ptr(Item *item, String *buffer) + :String_ptr(item->val_str_ascii(buffer)) +{ } + + +void VDec::set(Item *item) +{ + m_ptr= item->val_decimal(&m_buffer); + DBUG_ASSERT((m_ptr == NULL) == item->null_value); +} + + +VDec::VDec(Item *item) +{ + m_ptr= item->val_decimal(&m_buffer); + DBUG_ASSERT((m_ptr == NULL) == item->null_value); +} + + +VDec_op::VDec_op(Item_func_hybrid_field_type *item) +{ + m_ptr= item->decimal_op(&m_buffer); + DBUG_ASSERT((m_ptr == NULL) == item->null_value); +} + + +date_conv_mode_t Temporal::sql_mode_for_dates(THD *thd) +{ + return ::sql_mode_for_dates(thd); +} + + +time_round_mode_t Temporal::default_round_mode(THD *thd) +{ + return thd->temporal_round_mode(); +} + + +time_round_mode_t Timestamp::default_round_mode(THD *thd) +{ + return thd->temporal_round_mode(); +} + + +my_decimal *Temporal::to_decimal(my_decimal *to) const +{ + return date2my_decimal(this, to); +} + + +my_decimal *Temporal::bad_to_decimal(my_decimal *to) const +{ + my_decimal_set_zero(to); + return NULL; +} + + +void Temporal::make_from_str(THD *thd, Warn *warn, + const char *str, size_t length, + CHARSET_INFO *cs, date_mode_t fuzzydate) +{ + DBUG_EXECUTE_IF("str_to_datetime_warn", + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_YES, ErrConvString(str, length,cs).ptr());); + + if (str_to_temporal(thd, warn, str, length, cs, fuzzydate)) + make_fuzzy_date(&warn->warnings, date_conv_mode_t(fuzzydate)); + if (warn->warnings) + warn->set_str(str, length, &my_charset_bin); +} + + +Temporal_hybrid::Temporal_hybrid(THD *thd, Item *item, date_mode_t fuzzydate) +{ + if (item->get_date(thd, this, fuzzydate)) + time_type= MYSQL_TIMESTAMP_NONE; +} + + +uint Timestamp::binary_length_to_precision(uint length) +{ + switch (length) { + case 4: return 0; + case 5: return 2; + case 6: return 4; + case 7: return 6; + } + DBUG_ASSERT(0); + return 0; +} + + +Timestamp::Timestamp(const Native &native) +{ + DBUG_ASSERT(native.length() >= 4 && native.length() <= 7); + uint dec= binary_length_to_precision(native.length()); + my_timestamp_from_binary(this, (const uchar *) native.ptr(), dec); +} + + +bool Timestamp::to_native(Native *to, uint decimals) const +{ + uint len= my_timestamp_binary_length(decimals); + if (to->reserve(len)) + return true; + my_timestamp_to_binary(this, (uchar *) to->ptr(), decimals); + to->length(len); + return false; +} + + +bool Timestamp::to_TIME(THD *thd, MYSQL_TIME *to, date_mode_t fuzzydate) const +{ + return thd->timestamp_to_TIME(to, tv_sec, tv_usec, fuzzydate); +} + + +Timestamp::Timestamp(THD *thd, const MYSQL_TIME *ltime, uint *error_code) + :Timeval(TIME_to_timestamp(thd, ltime, error_code), ltime->second_part) +{ } + + +Timestamp_or_zero_datetime::Timestamp_or_zero_datetime(THD *thd, + const MYSQL_TIME *ltime, + uint *error_code) + :Timestamp(thd, ltime, error_code), + m_is_zero_datetime(*error_code == ER_WARN_DATA_OUT_OF_RANGE) +{ + if (m_is_zero_datetime) + { + if (!non_zero_date(ltime)) + *error_code= 0; // ltime was '0000-00-00 00:00:00' + } + else if (*error_code == ER_WARN_INVALID_TIMESTAMP) + *error_code= 0; // ltime fell into spring time gap, adjusted. +} + + +bool Timestamp_or_zero_datetime::to_TIME(THD *thd, MYSQL_TIME *to, + date_mode_t fuzzydate) const +{ + if (m_is_zero_datetime) + { + set_zero_time(to, MYSQL_TIMESTAMP_DATETIME); + return false; + } + return Timestamp::to_TIME(thd, to, fuzzydate); +} + + +bool Timestamp_or_zero_datetime::to_native(Native *to, uint decimals) const +{ + if (m_is_zero_datetime) + { + to->length(0); + return false; + } + return Timestamp::to_native(to, decimals); +} + + +int Timestamp_or_zero_datetime_native::save_in_field(Field *field, + uint decimals) const +{ + field->set_notnull(); + if (field->type_handler()->type_handler_for_native_format() == + &type_handler_timestamp2) + return field->store_native(*this); + if (is_zero_datetime()) + { + static Datetime zero(Datetime::zero()); + return field->store_time_dec(zero.get_mysql_time(), decimals); + } + return field->store_timestamp_dec(Timestamp(*this).tv(), decimals); +} + + +void Sec6::make_from_decimal(const my_decimal *d, ulong *nanoseconds) +{ + m_neg= my_decimal2seconds(d, &m_sec, &m_usec, nanoseconds); + m_truncated= (m_sec >= LONGLONG_MAX); +} + + +void Sec6::make_from_double(double nr, ulong *nanoseconds) +{ + if ((m_neg= nr < 0)) + nr= -nr; + if ((m_truncated= nr > (double) LONGLONG_MAX)) + { + m_sec= LONGLONG_MAX; + m_usec= 0; + *nanoseconds= 0; + } + else + { + m_sec= (ulonglong) nr; + m_usec= (ulong) ((nr - floor(nr)) * 1000000000); + *nanoseconds= m_usec % 1000; + m_usec/= 1000; + } +} + + +void Sec6::make_truncated_warning(THD *thd, const char *type_str) const +{ + char buff[1 + MAX_BIGINT_WIDTH + 1 + 6 + 1]; // '-' int '.' frac '\0' + to_string(buff, sizeof(buff)); + thd->push_warning_truncated_wrong_value(type_str, buff); +} + + +bool Sec6::convert_to_mysql_time(THD *thd, int *warn, MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + bool rc= fuzzydate & (TIME_INTERVAL_hhmmssff | TIME_INTERVAL_DAY) ? + to_datetime_or_to_interval_hhmmssff(ltime, warn) : + fuzzydate & TIME_TIME_ONLY ? + to_datetime_or_time(ltime, warn, date_conv_mode_t(fuzzydate)) : + to_datetime_or_date(ltime, warn, date_conv_mode_t(fuzzydate)); + DBUG_ASSERT(*warn || !rc); + if (truncated()) + *warn|= MYSQL_TIME_WARN_TRUNCATED; + return rc; +} + + +void Temporal::push_conversion_warnings(THD *thd, bool totally_useless_value, + int warn, + const char *typestr, + const char *db_name, + const char *table_name, + const char *field_name, + const char *value) +{ + if (MYSQL_TIME_WARN_HAVE_WARNINGS(warn)) + thd->push_warning_wrong_or_truncated_value(Sql_condition::WARN_LEVEL_WARN, + totally_useless_value, + typestr, value, + db_name, table_name, + field_name); + else if (MYSQL_TIME_WARN_HAVE_NOTES(warn)) + thd->push_warning_wrong_or_truncated_value(Sql_condition::WARN_LEVEL_NOTE, + false, typestr, value, + db_name, table_name, + field_name); +} + + +VSec9::VSec9(THD *thd, Item *item, const char *type_str, ulonglong limit) +{ + if (item->decimals == 0) + { // optimize for an important special case + Longlong_hybrid nr(item->val_int(), item->unsigned_flag); + make_from_int(nr); + m_is_null= item->null_value; + if (!m_is_null && m_sec > limit) + { + m_sec= limit; + m_truncated= true; + ErrConvInteger err(nr); + thd->push_warning_truncated_wrong_value(type_str, err.ptr()); + } + } + else if (item->cmp_type() == REAL_RESULT) + { + double nr= item->val_real(); + make_from_double(nr, &m_nsec); + m_is_null= item->null_value; + if (!m_is_null && m_sec > limit) + { + m_sec= limit; + m_truncated= true; + } + if (m_truncated) + { + ErrConvDouble err(nr); + thd->push_warning_truncated_wrong_value(type_str, err.ptr()); + } + } + else + { + VDec tmp(item); + (m_is_null= tmp.is_null()) ? reset() : make_from_decimal(tmp.ptr(), &m_nsec); + if (!m_is_null && m_sec > limit) + { + m_sec= limit; + m_truncated= true; + } + if (m_truncated) + { + ErrConvDecimal err(tmp.ptr()); + thd->push_warning_truncated_wrong_value(type_str, err.ptr()); + } + } +} + + +Year::Year(longlong value, bool unsigned_flag, uint length) +{ + if ((m_truncated= (value < 0))) // Negative or huge unsigned + m_year= unsigned_flag ? 9999 : 0; + else if (value > 9999) + { + m_truncated= true; + m_year= 9999; + } + else if (length == 2) + { + m_year= value < 70 ? (uint) value + 2000 : + value <= 1900 ? (uint) value + 1900 : + (uint) value; + } + else + m_year= (uint) value; + DBUG_ASSERT(m_year <= 9999); +} + + +uint Year::year_precision(const Item *item) const +{ + return item->type_handler() == &type_handler_year2 ? 2 : 4; +} + + +VYear::VYear(Item *item) + :Year_null(item->to_longlong_null(), item->unsigned_flag, year_precision(item)) +{ } + + +VYear_op::VYear_op(Item_func_hybrid_field_type *item) + :Year_null(item->to_longlong_null_op(), item->unsigned_flag, + year_precision(item)) +{ } + + +const LEX_CSTRING Interval_DDhhmmssff::m_type_name= + {STRING_WITH_LEN("INTERVAL DAY TO SECOND")}; + + +Interval_DDhhmmssff::Interval_DDhhmmssff(THD *thd, Status *st, + bool push_warnings, + Item *item, ulong max_hour, + time_round_mode_t mode, uint dec) +{ + switch (item->cmp_type()) { + case ROW_RESULT: + DBUG_ASSERT(0); + time_type= MYSQL_TIMESTAMP_NONE; + break; + case TIME_RESULT: + { + // Rounding mode is not important here + if (item->get_date(thd, this, Options(TIME_TIME_ONLY, TIME_FRAC_NONE))) + time_type= MYSQL_TIMESTAMP_NONE; + else if (time_type != MYSQL_TIMESTAMP_TIME) + { + st->warnings|= MYSQL_TIME_WARN_OUT_OF_RANGE; + push_warning_wrong_or_truncated_value(thd, ErrConvTime(this), + st->warnings); + time_type= MYSQL_TIMESTAMP_NONE; + } + break; + } + case INT_RESULT: + case REAL_RESULT: + case DECIMAL_RESULT: + case STRING_RESULT: + { + StringBuffer tmp; + String *str= item->val_str(&tmp); + if (!str) + time_type= MYSQL_TIMESTAMP_NONE; + else if (str_to_DDhhmmssff(st, str->ptr(), str->length(), str->charset(), + UINT_MAX32)) + { + if (push_warnings) + thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + m_type_name.str, + ErrConvString(str).ptr()); + time_type= MYSQL_TIMESTAMP_NONE; + } + else + { + if (mode == TIME_FRAC_ROUND) + time_round_or_set_max(dec, &st->warnings, max_hour, st->nanoseconds); + if (hour > max_hour) + { + st->warnings|= MYSQL_TIME_WARN_OUT_OF_RANGE; + time_type= MYSQL_TIMESTAMP_NONE; + } + // Warn if hour or nanosecond truncation happened + if (push_warnings) + push_warning_wrong_or_truncated_value(thd, ErrConvString(str), + st->warnings); + } + } + break; + } + DBUG_ASSERT(is_valid_value_slow()); +} + + +void +Interval_DDhhmmssff::push_warning_wrong_or_truncated_value(THD *thd, + const ErrConv &str, + int warnings) +{ + if (warnings & MYSQL_TIME_WARN_OUT_OF_RANGE) + { + thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + m_type_name.str, str.ptr()); + } + else if (MYSQL_TIME_WARN_HAVE_WARNINGS(warnings)) + { + thd->push_warning_truncated_wrong_value(Sql_condition::WARN_LEVEL_WARN, + m_type_name.str, str.ptr()); + } + else if (MYSQL_TIME_WARN_HAVE_NOTES(warnings)) + { + thd->push_warning_truncated_wrong_value(Sql_condition::WARN_LEVEL_NOTE, + m_type_name.str, str.ptr()); + } +} + + +uint Interval_DDhhmmssff::fsp(THD *thd, Item *item) +{ + switch (item->cmp_type()) { + case INT_RESULT: + case TIME_RESULT: + return item->decimals; + case REAL_RESULT: + case DECIMAL_RESULT: + return MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS); + case ROW_RESULT: + DBUG_ASSERT(0); + return 0; + case STRING_RESULT: + break; + } + if (!item->can_eval_in_optimize()) + return TIME_SECOND_PART_DIGITS; + Status st; + Interval_DDhhmmssff it(thd, &st, false/*no warnings*/, item, UINT_MAX32, + TIME_FRAC_TRUNCATE, TIME_SECOND_PART_DIGITS); + return it.is_valid_interval_DDhhmmssff() ? st.precision : + TIME_SECOND_PART_DIGITS; +} + + +bool Time::to_native(Native *to, uint decimals) const +{ + if (!is_valid_time()) + { + to->length(0); + return true; + } + uint len= my_time_binary_length(decimals); + if (to->reserve(len)) + return true; + longlong tmp= TIME_to_longlong_time_packed(get_mysql_time()); + my_time_packed_to_binary(tmp, (uchar*) to->ptr(), decimals); + to->length(len); + return false; +} + + +void Time::make_from_item(THD *thd, int *warn, Item *item, const Options opt) +{ + *warn= 0; + if (item->get_date(thd, this, opt)) + time_type= MYSQL_TIMESTAMP_NONE; + else + valid_MYSQL_TIME_to_valid_value(thd, warn, opt); +} + + +static uint msec_round_add[7]= +{ + 500000000, + 50000000, + 5000000, + 500000, + 50000, + 5000, + 0 +}; + + +Sec9 & Sec9::round(uint dec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + if (Sec6::add_nanoseconds(m_nsec + msec_round_add[dec])) + m_sec++; + m_nsec= 0; + Sec6::trunc(dec); + return *this; +} + + +void Timestamp::round_or_set_max(uint dec, int *warn) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + if (add_nanoseconds_usec(msec_round_add[dec]) && + tv_sec++ >= TIMESTAMP_MAX_VALUE) + { + tv_sec= TIMESTAMP_MAX_VALUE; + tv_usec= TIME_MAX_SECOND_PART; + *warn|= MYSQL_TIME_WARN_OUT_OF_RANGE; + } + my_timeval_trunc(this, dec); +} + + +bool Temporal::add_nanoseconds_with_round(THD *thd, int *warn, + date_conv_mode_t mode, + ulong nsec) +{ + switch (time_type) { + case MYSQL_TIMESTAMP_TIME: + { + ulong max_hour= (mode & (TIME_INTERVAL_DAY | TIME_INTERVAL_hhmmssff)) ? + TIME_MAX_INTERVAL_HOUR : TIME_MAX_HOUR; + time_round_or_set_max(6, warn, max_hour, nsec); + return false; + } + case MYSQL_TIMESTAMP_DATETIME: + return datetime_round_or_invalidate(thd, 6, warn, nsec); + case MYSQL_TIMESTAMP_DATE: + return false; + case MYSQL_TIMESTAMP_NONE: + return false; + case MYSQL_TIMESTAMP_ERROR: + break; + } + DBUG_ASSERT(0); + return false; +} + + +void Temporal::time_round_or_set_max(uint dec, int *warn, + ulong max_hour, ulong nsec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + if (add_nanoseconds_mmssff(nsec) && ++hour > max_hour) + { + time_hhmmssff_set_max(max_hour); + *warn|= MYSQL_TIME_WARN_OUT_OF_RANGE; + } + my_time_trunc(this, dec); +} + + +void Time::round_or_set_max(uint dec, int *warn, ulong nsec) +{ + Temporal::time_round_or_set_max(dec, warn, TIME_MAX_HOUR, nsec); + DBUG_ASSERT(is_valid_time_slow()); +} + + +void Time::round_or_set_max(uint dec, int *warn) +{ + round_or_set_max(dec, warn, msec_round_add[dec]); +} + +/** + Create from a DATETIME by subtracting a given number of days, + implementing an optimized version of calc_time_diff(). +*/ +void Time::make_from_datetime_with_days_diff(int *warn, const MYSQL_TIME *from, + long days) +{ + *warn= 0; + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_DATETIME || + from->time_type == MYSQL_TIMESTAMP_DATE); + long daynr= calc_daynr(from->year, from->month, from->day); + long daydiff= daynr - days; + if (!daynr) // Zero date + { + set_zero_time(this, MYSQL_TIMESTAMP_TIME); + neg= true; + hour= TIME_MAX_HOUR + 1; // to report "out of range" in "warn" + } + else if (daydiff >=0) + { + neg= false; + year= month= day= 0; + hhmmssff_copy(from); + hour+= daydiff * 24; + time_type= MYSQL_TIMESTAMP_TIME; + } + else + { + longlong timediff= ((((daydiff * 24LL + + from->hour) * 60LL + + from->minute) * 60LL + + from->second) * 1000000LL + + from->second_part); + unpack_time(timediff, this, MYSQL_TIMESTAMP_TIME); + if (year || month) + { + *warn|= MYSQL_TIME_WARN_OUT_OF_RANGE; + year= month= day= 0; + hour= TIME_MAX_HOUR + 1; + } + } + // The above code can generate TIME values outside of the valid TIME range. + adjust_time_range_or_invalidate(warn); +} + + +void Time::make_from_datetime_move_day_to_hour(int *warn, + const MYSQL_TIME *from) +{ + *warn= 0; + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_DATE || + from->time_type == MYSQL_TIMESTAMP_DATETIME); + time_type= MYSQL_TIMESTAMP_TIME; + neg= false; + year= month= day= 0; + hhmmssff_copy(from); + datetime_to_time_YYYYMMDD_000000DD_mix_to_hours(warn, from->year, + from->month, from->day); + adjust_time_range_or_invalidate(warn); +} + + +void Time::make_from_datetime(int *warn, const MYSQL_TIME *from, long curdays) +{ + if (!curdays) + make_from_datetime_move_day_to_hour(warn, from); + else + make_from_datetime_with_days_diff(warn, from, curdays); +} + + +void Time::make_from_time(int *warn, const MYSQL_TIME *from) +{ + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_TIME); + if (from->year || from->month) + make_from_out_of_range(warn); + else + { + *warn= 0; + DBUG_ASSERT(from->day == 0); + *(static_cast(this))= *from; + adjust_time_range_or_invalidate(warn); + } +} + + +uint Time::binary_length_to_precision(uint length) +{ + switch (length) { + case 3: return 0; + case 4: return 2; + case 5: return 4; + case 6: return 6; + } + DBUG_ASSERT(0); + return 0; +} + + +Time::Time(const Native &native) +{ + uint dec= binary_length_to_precision(native.length()); + longlong tmp= my_time_packed_from_binary((const uchar *) native.ptr(), dec); + TIME_from_longlong_time_packed(this, tmp); + DBUG_ASSERT(is_valid_time()); +} + + +Time::Time(int *warn, const MYSQL_TIME *from, long curdays) +{ + switch (from->time_type) { + case MYSQL_TIMESTAMP_NONE: + case MYSQL_TIMESTAMP_ERROR: + make_from_out_of_range(warn); + break; + case MYSQL_TIMESTAMP_DATE: + case MYSQL_TIMESTAMP_DATETIME: + make_from_datetime(warn, from, curdays); + break; + case MYSQL_TIMESTAMP_TIME: + make_from_time(warn, from); + break; + } + DBUG_ASSERT(is_valid_value_slow()); +} + + +Time::Time(int *warn, bool neg, ulonglong hour, uint minute, const Sec6 &second) +{ + DBUG_ASSERT(second.sec() <= 59); + *warn= 0; + set_zero_time(this, MYSQL_TIMESTAMP_TIME); + MYSQL_TIME::neg= neg; + MYSQL_TIME::hour= hour > TIME_MAX_HOUR ? (uint) (TIME_MAX_HOUR + 1) : + (uint) hour; + MYSQL_TIME::minute= minute; + MYSQL_TIME::second= (uint) second.sec(); + MYSQL_TIME::second_part= second.usec(); + adjust_time_range_or_invalidate(warn); +} + + +void Temporal_with_date::make_from_item(THD *thd, Item *item, + date_mode_t fuzzydate) +{ + date_conv_mode_t flags= date_conv_mode_t(fuzzydate) & ~TIME_TIME_ONLY; + /* + Some TIME type items return error when trying to do get_date() + without TIME_TIME_ONLY set (e.g. Item_field for Field_time). + In the SQL standard time->datetime conversion mode we add TIME_TIME_ONLY. + In the legacy time->datetime conversion mode we do not add TIME_TIME_ONLY + and leave it to get_date() to check date. + */ + date_conv_mode_t time_flag= (item->field_type() == MYSQL_TYPE_TIME && + !(thd->variables.old_behavior & OLD_MODE_ZERO_DATE_TIME_CAST)) ? + TIME_TIME_ONLY : TIME_CONV_NONE; + Options opt(flags | time_flag, time_round_mode_t(fuzzydate)); + if (item->get_date(thd, this, opt)) + time_type= MYSQL_TIMESTAMP_NONE; + else if (time_type == MYSQL_TIMESTAMP_TIME) + { + MYSQL_TIME tmp; + if (time_to_datetime_with_warn(thd, this, &tmp, flags)) + time_type= MYSQL_TIMESTAMP_NONE; + else + *(static_cast(this))= tmp; + } +} + + +void Temporal_with_date::check_date_or_invalidate(int *warn, + date_conv_mode_t flags) +{ + if (::check_date(this, pack_time(this) != 0, + ulonglong(flags & TIME_MODE_FOR_XXX_TO_DATE), warn)) + time_type= MYSQL_TIMESTAMP_NONE; +} + + +void Datetime::make_from_time(THD *thd, int *warn, const MYSQL_TIME *from, + date_conv_mode_t flags) +{ + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_TIME); + if (time_to_datetime(thd, from, this)) + make_from_out_of_range(warn); + else + { + *warn= 0; + check_date_or_invalidate(warn, flags); + } +} + + +void Datetime::make_from_datetime(THD *thd, int *warn, const MYSQL_TIME *from, + date_conv_mode_t flags) +{ + DBUG_ASSERT(from->time_type == MYSQL_TIMESTAMP_DATE || + from->time_type == MYSQL_TIMESTAMP_DATETIME); + if (from->neg || check_datetime_range(from)) + make_from_out_of_range(warn); + else + { + *warn= 0; + *(static_cast(this))= *from; + date_to_datetime(this); + check_date_or_invalidate(warn, flags); + } +} + + +Datetime::Datetime(THD *thd, const timeval &tv) +{ + thd->variables.time_zone->gmt_sec_to_TIME(this, tv.tv_sec); + second_part= tv.tv_usec; + thd->used|= THD::TIME_ZONE_USED; + DBUG_ASSERT(is_valid_value_slow()); +} + + +Datetime::Datetime(THD *thd, int *warn, const MYSQL_TIME *from, + date_conv_mode_t flags) +{ + DBUG_ASSERT(bool(flags & TIME_TIME_ONLY) == false); + switch (from->time_type) { + case MYSQL_TIMESTAMP_ERROR: + case MYSQL_TIMESTAMP_NONE: + make_from_out_of_range(warn); + break; + case MYSQL_TIMESTAMP_TIME: + make_from_time(thd, warn, from, flags); + break; + case MYSQL_TIMESTAMP_DATETIME: + case MYSQL_TIMESTAMP_DATE: + make_from_datetime(thd, warn, from, flags); + break; + } + DBUG_ASSERT(is_valid_value_slow()); +} + +Datetime::Datetime(my_time_t unix_time, ulong second_part_arg, + const Time_zone* time_zone) +{ + time_zone->gmt_sec_to_TIME(this, unix_time); + second_part= second_part_arg; +} + + +bool Temporal::datetime_add_nanoseconds_or_invalidate(THD *thd, int *warn, ulong nsec) +{ + if (!add_nanoseconds_mmssff(nsec)) + return false; + /* + Overflow happened on minutes. Now we need to add 1 hour to the value. + Catch a special case for the maximum possible date and hour==23, to + truncate '9999-12-31 23:59:59.9999999' (with 7 fractional digits) + to '9999-12-31 23:59:59.999999' (with 6 fractional digits), + with a warning, instead of returning an error, so this statement: + INSERT INTO (datetime_column) VALUES ('9999-12-31 23:59:59.9999999'); + inserts a value truncated to 6 fractional digits, instead of zero + date '0000-00-00 00:00:00.000000'. + */ + if (year == 9999 && month == 12 && day == 31 && hour == 23) + { + minute= 59; + second= 59; + second_part= 999999; + *warn= MYSQL_TIME_WARN_OUT_OF_RANGE; + return false; + } + INTERVAL interval; + memset(&interval, 0, sizeof(interval)); + interval.hour= 1; + /* + date_add_interval cannot handle bad dates with zero YYYY or MM. + Note, check_date(NO_ZERO_XX) does not check YYYY against zero, + so let's additionally check it. + */ + if (year == 0 || + check_date(TIME_NO_ZERO_IN_DATE | TIME_NO_ZERO_DATE, warn) || + date_add_interval(thd, this, INTERVAL_HOUR, interval, false/*no warn*/)) + { + char buf[MAX_DATE_STRING_REP_LENGTH]; + my_date_to_str(this, buf); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_TYPE, + ER_THD(thd, ER_WRONG_VALUE_FOR_TYPE), + "date", buf, "round(datetime)"); + make_from_out_of_range(warn); + return true; + } + return false; +} + + +bool Temporal::datetime_round_or_invalidate(THD *thd, uint dec, int *warn, ulong nsec) +{ + DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS); + if (datetime_add_nanoseconds_or_invalidate(thd, warn, nsec)) + return true; + my_datetime_trunc(this, dec); + return false; + +} + + +bool Datetime::round_or_invalidate(THD *thd, uint dec, int *warn) +{ + return round_or_invalidate(thd, dec, warn, msec_round_add[dec]); +} + + +Datetime_from_temporal::Datetime_from_temporal(THD *thd, Item *temporal, + date_conv_mode_t fuzzydate) + :Datetime(thd, temporal, Options(fuzzydate, TIME_FRAC_NONE)) +{ + // Exact rounding mode does not matter + DBUG_ASSERT(temporal->cmp_type() == TIME_RESULT); +} + + +Datetime_truncation_not_needed::Datetime_truncation_not_needed(THD *thd, Item *item, + date_conv_mode_t mode) + :Datetime(thd, item, Options(mode, TIME_FRAC_NONE)) +{ + /* + The called Datetime() constructor only would truncate nanoseconds if they + existed (but we know there were no nanoseconds). Here we assert that there + are also no microsecond digits outside of the scale specified in "dec". + */ + DBUG_ASSERT(!is_valid_datetime() || + fraction_remainder(MY_MIN(item->decimals, + TIME_SECOND_PART_DIGITS)) == 0); +} + +/********************************************************************/ + +decimal_digits_t Type_numeric_attributes::find_max_decimals(Item **item, uint nitems) +{ + decimal_digits_t res= 0; + for (uint i= 0; i < nitems; i++) + set_if_bigger(res, item[i]->decimals); + return res; +} + + +uint Type_numeric_attributes::count_unsigned(Item **item, uint nitems) +{ + uint res= 0; + for (uint i= 0 ; i < nitems ; i++) + { + if (item[i]->unsigned_flag) + res++; + } + return res; +} + + +uint32 Type_numeric_attributes::find_max_char_length(Item **item, uint nitems) +{ + uint32 char_length= 0; + for (uint i= 0; i < nitems ; i++) + set_if_bigger(char_length, item[i]->max_char_length()); + return char_length; +} + + +uint32 Type_numeric_attributes::find_max_octet_length(Item **item, uint nitems) +{ + uint32 octet_length= 0; + for (uint i= 0; i < nitems ; i++) + set_if_bigger(octet_length, item[i]->max_length); + return octet_length; +} + + +decimal_digits_t Type_numeric_attributes:: +find_max_decimal_int_part(Item **item, uint nitems) +{ + decimal_digits_t max_int_part= 0; + for (uint i=0 ; i < nitems ; i++) + set_if_bigger(max_int_part, item[i]->decimal_int_part()); + return max_int_part; +} + + +/** + Set max_length/decimals of function if function is fixed point and + result length/precision depends on argument ones. +*/ + +void +Type_numeric_attributes::aggregate_numeric_attributes_decimal(Item **item, + uint nitems, + bool unsigned_arg) +{ + decimal_digits_t max_int_part= find_max_decimal_int_part(item, nitems); + decimals= find_max_decimals(item, nitems); + decimal_digits_t precision= (decimal_digits_t) + MY_MIN(max_int_part + decimals, DECIMAL_MAX_PRECISION); + max_length= my_decimal_precision_to_length_no_truncation(precision, + decimals, + unsigned_flag); +} + + +/** + Set max_length/decimals of function if function is floating point and + result length/precision depends on argument ones. +*/ + +void +Type_numeric_attributes::aggregate_numeric_attributes_real(Item **items, + uint nitems) +{ + uint32 length= 0; + decimals= 0; + max_length= 0; + unsigned_flag= false; + for (uint i=0 ; i < nitems ; i++) + { + if (decimals < FLOATING_POINT_DECIMALS) + { + set_if_bigger(decimals, items[i]->decimals); + /* Will be ignored if items[i]->decimals >= FLOATING_POINT_DECIMALS */ + set_if_bigger(length, (items[i]->max_length - items[i]->decimals)); + } + set_if_bigger(max_length, items[i]->max_length); + } + if (decimals < FLOATING_POINT_DECIMALS) + { + max_length= length; + length+= decimals; + if (length < max_length) // If previous operation gave overflow + max_length= UINT_MAX32; + else + max_length= length; + } + // Corner case: COALESCE(DOUBLE(255,4), DOUBLE(255,3)) -> FLOAT(255, 4) + set_if_smaller(max_length, MAX_FIELD_CHARLENGTH); +} + + +/** + Calculate max_length and decimals for string functions. + + @param field_type Field type. + @param items Argument array. + @param nitems Number of arguments. + + @retval False on success, true on error. +*/ +bool Type_std_attributes:: +aggregate_attributes_string(const LEX_CSTRING &func_name, + Item **items, uint nitems) +{ + if (agg_arg_charsets_for_string_result(collation, func_name, + items, nitems, 1)) + return true; + if (collation.collation == &my_charset_bin) + max_length= find_max_octet_length(items, nitems); + else + fix_char_length(find_max_char_length(items, nitems)); + unsigned_flag= false; + decimals= max_length ? NOT_FIXED_DEC : 0; + return false; +} + + +/* + Find a handler by its ODBC literal data type. + + @param type_str - data type name, not necessarily 0-terminated + @retval - a pointer to data type handler if type_str points + to a known ODBC literal data type, or NULL otherwise +*/ +const Type_handler * +Type_handler::odbc_literal_type_handler(const LEX_CSTRING *type_str) +{ + if (type_str->length == 1) + { + if (type_str->str[0] == 'd') // {d'2001-01-01'} + return &type_handler_newdate; + else if (type_str->str[0] == 't') // {t'10:20:30'} + return &type_handler_time2; + } + else if (type_str->length == 2) // {ts'2001-01-01 10:20:30'} + { + if (type_str->str[0] == 't' && type_str->str[1] == 's') + return &type_handler_datetime2; + } + return NULL; // Not a known ODBC literal type +} + + +/** + This method is used by: + - Item_user_var_as_out_param::field_type() + - Item_func_udf_str::field_type() + - Item_empty_string::make_send_field() + + TODO: type_handler_adjusted_to_max_octet_length() and string_type_handler() + provide very similar functionality, to properly choose between + VARCHAR/VARBINARY vs TEXT/BLOB variations taking into accoung maximum + possible octet length. + + We should probably get rid of either of them and use the same method + all around the code. +*/ +const Type_handler * +Type_handler::string_type_handler(uint max_octet_length) +{ + if (max_octet_length >= 16777216) + return &type_handler_long_blob; + else if (max_octet_length >= 65536) + return &type_handler_medium_blob; + else if (max_octet_length >= MAX_FIELD_VARCHARLENGTH) + return &type_handler_blob; + return &type_handler_varchar; +} + + +const Type_handler * +Type_handler::varstring_type_handler(const Item *item) +{ + if (!item->max_length) + return &type_handler_string; + if (item->too_big_for_varchar()) + return blob_type_handler(item->max_length); + return &type_handler_varchar; +} + + +const Type_handler * +Type_handler::blob_type_handler(uint max_octet_length) +{ + if (max_octet_length <= 255) + return &type_handler_tiny_blob; + if (max_octet_length <= 65535) + return &type_handler_blob; + if (max_octet_length <= 16777215) + return &type_handler_medium_blob; + return &type_handler_long_blob; +} + + +const Type_handler * +Type_handler::blob_type_handler(const Item *item) +{ + return blob_type_handler(item->max_length); +} + +/** + This method is used by: + - Item_sum_hybrid, e.g. MAX(item), MIN(item). + - Item_func_set_user_var +*/ +const Type_handler * +Type_handler_string_result::type_handler_adjusted_to_max_octet_length( + uint max_octet_length, + CHARSET_INFO *cs) const +{ + if (max_octet_length / cs->mbmaxlen <= CONVERT_IF_BIGGER_TO_BLOB) + return &type_handler_varchar; // See also Item::too_big_for_varchar() + if (max_octet_length >= 16777216) + return &type_handler_long_blob; + else if (max_octet_length >= 65536) + return &type_handler_medium_blob; + return &type_handler_blob; +} + + +CHARSET_INFO *Type_handler::charset_for_protocol(const Item *item) const +{ + /* + For backward compatibility, to make numeric + data types return "binary" charset in client-side metadata. + */ + return &my_charset_bin; +} + + +bool +Type_handler::Item_func_or_sum_illegal_param(const LEX_CSTRING &funcname) const +{ + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + name().ptr(), funcname.str); + return true; +} + + +bool +Type_handler::Item_func_or_sum_illegal_param(const Item_func_or_sum *it) const +{ + return Item_func_or_sum_illegal_param(it->func_name_cstring()); +} + + +CHARSET_INFO * +Type_handler_string_result::charset_for_protocol(const Item *item) const +{ + return item->collation.collation; +} + + +const Type_handler * +Type_handler::get_handler_by_cmp_type(Item_result type) +{ + switch (type) { + case REAL_RESULT: return &type_handler_double; + case INT_RESULT: return &type_handler_slonglong; + case DECIMAL_RESULT: return &type_handler_newdecimal; + case STRING_RESULT: return &type_handler_long_blob; + case TIME_RESULT: return &type_handler_datetime; + case ROW_RESULT: return &type_handler_row; + } + DBUG_ASSERT(0); + return &type_handler_string; +} + + +/* + If we have a mixture of: + - a MariaDB standard (built-in permanent) data type, and + - a non-standard (optionally compiled or pluggable) data type, + then we ask the type collection of the non-standard type to aggregate + the mixture. + The standard type collection type_collection_std knows nothing + about non-standard types, while non-standard type collections + know everything about standard data types. +*/ +const Type_collection * +Type_handler::type_collection_for_aggregation(const Type_handler *h0, + const Type_handler *h1) +{ + const Type_collection *c0= h0->type_collection(); + const Type_collection *c1= h1->type_collection(); + if (c0 == c1) + return c0; + if (c0 == &type_collection_std) + return c1; + if (c1 == &type_collection_std) + return c0; + /* + A mixture of two non-standard collections. + The caller code will continue to aggregate through + the type aggregators in Type_handler_data. + */ + return NULL; +} + + +Type_handler_hybrid_field_type::Type_handler_hybrid_field_type() + :m_type_handler(&type_handler_double) +{ +} + + +/***************************************************************************/ + +/* number of bytes to store second_part part of the TIMESTAMP(N) */ +uint Type_handler_timestamp::m_sec_part_bytes[MAX_DATETIME_PRECISION + 1]= + { 0, 1, 1, 2, 2, 3, 3 }; + +/* number of bytes to store DATETIME(N) */ +uint Type_handler_datetime::m_hires_bytes[MAX_DATETIME_PRECISION + 1]= + { 5, 6, 6, 7, 7, 7, 8 }; + +/* number of bytes to store TIME(N) */ +uint Type_handler_time::m_hires_bytes[MAX_DATETIME_PRECISION + 1]= + { 3, 4, 4, 5, 5, 5, 6 }; + +/***************************************************************************/ + +const Name Type_handler::version() const +{ + static const Name ver(STRING_WITH_LEN("")); + return ver; +} + +const Name & Type_handler::version_mariadb53() +{ + static const Name ver(STRING_WITH_LEN("mariadb-5.3")); + return ver; +} + +const Name & Type_handler::version_mysql56() +{ + static const Name ver(STRING_WITH_LEN("mysql-5.6")); + return ver; +} + + +/***************************************************************************/ + +const Type_limits_int *Type_handler_tiny::type_limits_int() const +{ + static const Type_limits_sint8 limits_sint8; + return &limits_sint8; +} + +const Type_limits_int *Type_handler_utiny::type_limits_int() const +{ + static const Type_limits_uint8 limits_uint8; + return &limits_uint8; +} + +const Type_limits_int *Type_handler_short::type_limits_int() const +{ + static const Type_limits_sint16 limits_sint16; + return &limits_sint16; +} + +const Type_limits_int *Type_handler_ushort::type_limits_int() const +{ + static const Type_limits_uint16 limits_uint16; + return &limits_uint16; +} + +const Type_limits_int *Type_handler_int24::type_limits_int() const +{ + static const Type_limits_sint24 limits_sint24; + return &limits_sint24; +} + +const Type_limits_int *Type_handler_uint24::type_limits_int() const +{ + static const Type_limits_uint24 limits_uint24; + return &limits_uint24; +} + +const Type_limits_int *Type_handler_long::type_limits_int() const +{ + static const Type_limits_sint32 limits_sint32; + return &limits_sint32; +} + +const Type_limits_int *Type_handler_ulong::type_limits_int() const +{ + static const Type_limits_uint32 limits_uint32; + return &limits_uint32; +} + +const Type_limits_int *Type_handler_longlong::type_limits_int() const +{ + static const Type_limits_sint64 limits_sint64; + return &limits_sint64; +} + +const Type_limits_int *Type_handler_ulonglong::type_limits_int() const +{ + static const Type_limits_uint64 limits_uint64; + return &limits_uint64; +} + + +/***************************************************************************/ +const Type_handler *Type_handler_bool::type_handler_signed() const +{ + return &type_handler_bool; +} + +const Type_handler *Type_handler_bool::type_handler_unsigned() const +{ + return &type_handler_bool; +} + +const Type_handler *Type_handler_tiny::type_handler_signed() const +{ + return &type_handler_stiny; +} + +const Type_handler *Type_handler_tiny::type_handler_unsigned() const +{ + return &type_handler_utiny; +} + +const Type_handler *Type_handler_short::type_handler_signed() const +{ + return &type_handler_sshort; +} + +const Type_handler *Type_handler_short::type_handler_unsigned() const +{ + return &type_handler_ushort; +} + +const Type_handler *Type_handler_int24::type_handler_signed() const +{ + return &type_handler_sint24; +} + +const Type_handler *Type_handler_int24::type_handler_unsigned() const +{ + return &type_handler_uint24; +} + +const Type_handler *Type_handler_long::type_handler_signed() const +{ + return &type_handler_slong; +} + +const Type_handler *Type_handler_long::type_handler_unsigned() const +{ + return &type_handler_ulong; +} + +const Type_handler *Type_handler_longlong::type_handler_signed() const +{ + return &type_handler_slonglong; +} + +const Type_handler *Type_handler_longlong::type_handler_unsigned() const +{ + return &type_handler_ulonglong; +} + +/***************************************************************************/ + +const Type_handler *Type_handler_null::type_handler_for_comparison() const +{ + return &type_handler_null; +} + + +const Type_handler *Type_handler_int_result::type_handler_for_comparison() const +{ + return &type_handler_slonglong; +} + + +const Type_handler *Type_handler_string_result::type_handler_for_comparison() const +{ + return &type_handler_long_blob; +} + + +const Type_handler *Type_handler_decimal_result::type_handler_for_comparison() const +{ + return &type_handler_newdecimal; +} + + +const Type_handler *Type_handler_real_result::type_handler_for_comparison() const +{ + return &type_handler_double; +} + + +const Type_handler *Type_handler_time_common::type_handler_for_comparison() const +{ + return &type_handler_time; +} + +const Type_handler *Type_handler_date_common::type_handler_for_comparison() const +{ + return &type_handler_newdate; +} + + +const Type_handler *Type_handler_datetime_common::type_handler_for_comparison() const +{ + return &type_handler_datetime; +} + + +const Type_handler *Type_handler_timestamp_common::type_handler_for_comparison() const +{ + return &type_handler_timestamp; +} + + +const Type_handler *Type_handler_row::type_handler_for_comparison() const +{ + return &type_handler_row; +} + +/***************************************************************************/ + +const Type_handler * +Type_handler_timestamp_common::type_handler_for_native_format() const +{ + return &type_handler_timestamp2; +} + + +const Type_handler * +Type_handler_time_common::type_handler_for_native_format() const +{ + return &type_handler_time2; +} + + +/***************************************************************************/ + +const Type_handler *Type_handler_typelib::type_handler_for_item_field() const +{ + return &type_handler_varchar; +} + + +const Type_handler *Type_handler_typelib::cast_to_int_type_handler() const +{ + return &type_handler_slonglong; +} + + +/***************************************************************************/ + +bool +Type_handler_hybrid_field_type::aggregate_for_result(const Type_handler *other) +{ + Type_handler_pair tp(m_type_handler, other); + do + { + const Type_handler *hres; + const Type_collection *c; + if (((c= Type_handler::type_collection_for_aggregation(tp.a(), tp.b())) && + (hres= c->aggregate_for_result(tp.a(), tp.b()))) || + (hres= type_handler_data-> + m_type_aggregator_for_result.find_handler(tp.a(), tp.b()))) + { + m_type_handler= hres; + return false; + } + } while (tp.to_base()); + return true; +} + + +const Type_handler * +Type_handler::type_handler_long_or_longlong(uint max_char_length, + bool unsigned_flag) +{ + if (unsigned_flag) + { + if (max_char_length <= MY_INT32_NUM_DECIMAL_DIGITS - 2) + return &type_handler_ulong; + return &type_handler_ulonglong; + } + if (max_char_length <= MY_INT32_NUM_DECIMAL_DIGITS - 2) + return &type_handler_slong; + return &type_handler_slonglong; +} + + +/* + This method is called for CASE (and its abbreviations) and LEAST/GREATEST + when data type aggregation returned LONGLONG and there were some BIT + expressions. This helps to adjust the data type from LONGLONG to LONG + if all expressions fit. +*/ +const Type_handler * +Type_handler::bit_and_int_mixture_handler(uint max_char_length) +{ + if (max_char_length <= MY_INT32_NUM_DECIMAL_DIGITS) + return &type_handler_slong; + return &type_handler_slonglong; +} + + +/** + @brief Aggregates field types from the array of items. + + @param[in] items array of items to aggregate the type from + @param[in] nitems number of items in the array + @param[in] treat_bit_as_number - if BIT should be aggregated to a non-BIT + counterpart as a LONGLONG number or as a VARBINARY string. + + Currently behaviour depends on the function: + - LEAST/GREATEST treat BIT as VARBINARY when + aggregating with a non-BIT counterpart. + Note, UNION also works this way. + + - CASE, COALESCE, IF, IFNULL treat BIT as LONGLONG when + aggregating with a non-BIT counterpart; + + This inconsistency may be changed in the future. See MDEV-8867. + + Note, independently from "treat_bit_as_number": + - a single BIT argument gives BIT as a result + - two BIT couterparts give BIT as a result + - (BIT + explicit NULL) or (explicit NULL + BIT) give BIT + + @details This function aggregates field types from the array of items. + Found type is supposed to be used later as the result field type + of a multi-argument function. + Aggregation itself is performed by Type_handler::aggregate_for_result(). + + @note The term "aggregation" is used here in the sense of inferring the + result type of a function from its argument types. + + @retval false - on success + @retval true - on error +*/ + +bool Type_handler_hybrid_field_type:: +aggregate_for_result(const LEX_CSTRING &funcname, Item **items, uint nitems, + bool treat_bit_as_number) +{ + bool bit_and_non_bit_mixture_found= false; + uint32 max_display_length; + if (!nitems || items[0]->result_type() == ROW_RESULT) + { + DBUG_ASSERT(0); + set_handler(&type_handler_null); + return true; + } + set_handler(items[0]->type_handler()); + max_display_length= items[0]->max_display_length(); + for (uint i= 1 ; i < nitems ; i++) + { + const Type_handler *cur= items[i]->type_handler(); + set_if_bigger(max_display_length, items[i]->max_display_length()); + uint bit_count= (type_handler() == &type_handler_bit) + + (cur == &type_handler_bit); + uint null_count= (type_handler() == &type_handler_null) + + (cur == &type_handler_null); + if (treat_bit_as_number && bit_count == 1 && null_count == 0) + { + bit_and_non_bit_mixture_found= true; + if (type_handler() == &type_handler_bit) + set_handler(&type_handler_slonglong); // BIT + non-BIT + else + cur= &type_handler_slonglong; // non-BIT + BIT + } + if (aggregate_for_result(cur)) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + type_handler()->name().ptr(), cur->name().ptr(), funcname.str); + return true; + } + } + if (bit_and_non_bit_mixture_found && type_handler() == &type_handler_slonglong) + set_handler(Type_handler::bit_and_int_mixture_handler(max_display_length)); + return false; +} + +/** + Collect built-in data type handlers for comparison. + This method is very similar to item_cmp_type() defined in item.cc. + Now they coexist. Later item_cmp_type() will be removed. + In addition to item_cmp_type(), this method correctly aggregates + TIME with DATETIME/TIMESTAMP/DATE, so no additional find_date_time_item() + is needed after this call. +*/ + +bool +Type_handler_hybrid_field_type::aggregate_for_comparison(const Type_handler *h) +{ + DBUG_ASSERT(m_type_handler == m_type_handler->type_handler_for_comparison()); + DBUG_ASSERT(h == h->type_handler_for_comparison()); + const Type_handler *hres; + const Type_collection *c; + if (!(c= Type_handler::type_collection_for_aggregation(m_type_handler, h)) || + !(hres= c->aggregate_for_comparison(m_type_handler, h))) + hres= type_handler_data-> + m_type_aggregator_for_comparison.find_handler(m_type_handler, h); + if (!hres) + return true; + m_type_handler= hres; + DBUG_ASSERT(m_type_handler == m_type_handler->type_handler_for_comparison()); + return false; +} + + +const Type_handler * +Type_collection_std::aggregate_for_comparison(const Type_handler *ha, + const Type_handler *hb) const +{ + Item_result a= ha->cmp_type(); + Item_result b= hb->cmp_type(); + if (a == STRING_RESULT && b == STRING_RESULT) + return &type_handler_long_blob; + if (a == INT_RESULT && b == INT_RESULT) + return &type_handler_slonglong; + if (a == ROW_RESULT || b == ROW_RESULT) + return &type_handler_row; + if (a == TIME_RESULT || b == TIME_RESULT) + { + if ((a == TIME_RESULT) + (b == TIME_RESULT) == 1) + { + /* + We're here if there's only one temporal data type: + either m_type_handler or h. + Temporal types bit non-temporal types. + */ + const Type_handler *res= b == TIME_RESULT ? hb : ha; + /* + Compare TIMESTAMP to a non-temporal type as DATETIME. + This is needed to make queries with fuzzy dates work: + SELECT * FROM t1 + WHERE + ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00'; + */ + if (res->type_handler_for_native_format() == &type_handler_timestamp2) + return &type_handler_datetime; + return res; + } + else + { + /* + We're here if both m_type_handler and h are temporal data types. + - If both data types are TIME, we preserve TIME. + - If both data types are DATE, we preserve DATE. + Preserving DATE is needed for EXPLAIN FORMAT=JSON, + to print DATE constants using proper format: + 'YYYY-MM-DD' rather than 'YYYY-MM-DD 00:00:00'. + */ + if (ha->field_type() != hb->field_type()) + return &type_handler_datetime; + return ha; + } + } + if ((a == INT_RESULT && b == STRING_RESULT) || + (b == INT_RESULT && a == STRING_RESULT)) + return &type_handler_newdecimal; + if ((a == INT_RESULT || a == DECIMAL_RESULT) && + (b == INT_RESULT || b == DECIMAL_RESULT)) + return &type_handler_newdecimal; + return &type_handler_double; +} + + +/** + Aggregate data type handler for LEAST/GRATEST. + aggregate_for_min_max() is close to aggregate_for_comparison(), + but tries to preserve the exact type handler for string, int and temporal + data types (instead of converting to super-types). + FLOAT is not preserved and is converted to its super-type (DOUBLE). + This should probably fixed eventually, for symmetry. +*/ + +bool +Type_handler_hybrid_field_type::aggregate_for_min_max(const Type_handler *h) +{ + Type_handler_pair tp(m_type_handler, h); + do + { + const Type_handler *hres; + const Type_collection *c; + if (((c= Type_handler::type_collection_for_aggregation(tp.a(), tp.b())) && + (hres= c->aggregate_for_min_max(tp.a(), tp.b()))) || + (hres= type_handler_data-> + m_type_aggregator_for_result.find_handler(tp.a(), tp.b()))) + { + /* + For now we suppose that these two expressions: + - LEAST(type1, type2) + - COALESCE(type1, type2) + return the same data type (or both expressions return error) + if type1 and/or type2 are non-traditional. + This may change in the future. + */ + m_type_handler= hres; + return false; + } + } while (tp.to_base()); + return true; +} + + +const Type_handler * +Type_collection_std::aggregate_for_min_max(const Type_handler *ha, + const Type_handler *hb) const +{ + Item_result a= ha->cmp_type(); + Item_result b= hb->cmp_type(); + DBUG_ASSERT(a != ROW_RESULT); // Disallowed by check_cols() in fix_fields() + DBUG_ASSERT(b != ROW_RESULT); // Disallowed by check_cols() in fix_fields() + + if (a == STRING_RESULT && b == STRING_RESULT) + return Type_collection_std::aggregate_for_result(ha, hb); + if (a == INT_RESULT && b == INT_RESULT) + { + // BIT aggregates with non-BIT as BIGINT + if (ha != hb) + { + if (ha == &type_handler_bit) + ha= &type_handler_slonglong; + else if (hb == &type_handler_bit) + hb= &type_handler_slonglong; + } + return Type_collection_std::aggregate_for_result(ha, hb); + } + if (a == TIME_RESULT || b == TIME_RESULT) + { + if ((ha->type_handler_for_native_format() == &type_handler_timestamp2) + + (hb->type_handler_for_native_format() == &type_handler_timestamp2) == 1) + { + /* + Handle LEAST(TIMESTAMP, non-TIMESTAMP) as DATETIME, + to make sure fuzzy dates work in this context: + LEAST('2001-00-00', timestamp_field) + */ + return &type_handler_datetime2; + } + if ((a == TIME_RESULT) + (b == TIME_RESULT) == 1) + { + /* + We're here if there's only one temporal data type: + either m_type_handler or h. + Temporal types bit non-temporal types. + */ + return (b == TIME_RESULT) ? hb : ha; + } + /* + We're here if both m_type_handler and h are temporal data types. + */ + return Type_collection_std::aggregate_for_result(ha, hb); + } + if ((a == INT_RESULT || a == DECIMAL_RESULT) && + (b == INT_RESULT || b == DECIMAL_RESULT)) + { + return &type_handler_newdecimal; + } + // Preserve FLOAT if two FLOATs, set to DOUBLE otherwise. + if (ha == &type_handler_float && hb == &type_handler_float) + return &type_handler_float; + return &type_handler_double; +} + + +bool +Type_handler_hybrid_field_type::aggregate_for_min_max(const LEX_CSTRING &funcname, + Item **items, uint nitems) +{ + bool bit_and_non_bit_mixture_found= false; + // LEAST/GREATEST require at least two arguments + DBUG_ASSERT(nitems > 1); + set_handler(items[0]->type_handler()); + for (uint i= 1; i < nitems; i++) + { + const Type_handler *cur= items[i]->type_handler(); + // Check if BIT + non-BIT, or non-BIT + BIT + bit_and_non_bit_mixture_found|= (m_type_handler == &type_handler_bit) != + (cur == &type_handler_bit); + if (aggregate_for_min_max(cur)) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + type_handler()->name().ptr(), cur->name().ptr(), funcname.str); + return true; + } + } + if (bit_and_non_bit_mixture_found && type_handler() == &type_handler_slonglong) + { + uint32 max_display_length= items[0]->max_display_length(); + for (uint i= 1; i < nitems; i++) + set_if_bigger(max_display_length, items[i]->max_display_length()); + set_handler(Type_handler::bit_and_int_mixture_handler(max_display_length)); + } + return false; +} + + +const Type_handler * +Type_collection_std::aggregate_for_num_op(const Type_handler *h0, + const Type_handler *h1) const +{ + Item_result r0= h0->cmp_type(); + Item_result r1= h1->cmp_type(); + + if (r0 == REAL_RESULT || r1 == REAL_RESULT || + r0 == STRING_RESULT || r1 ==STRING_RESULT) + return &type_handler_double; + + if (r0 == TIME_RESULT || r1 == TIME_RESULT) + return &type_handler_datetime; + + if (r0 == DECIMAL_RESULT || r1 == DECIMAL_RESULT) + return &type_handler_newdecimal; + + DBUG_ASSERT(r0 == INT_RESULT && r1 == INT_RESULT); + return &type_handler_slonglong; +} + + +const Type_aggregator::Pair* +Type_aggregator::find_pair(const Type_handler *handler1, + const Type_handler *handler2) const +{ + for (uint i= 0; i < m_array.elements(); i++) + { + const Pair& el= m_array.at(i); + if (el.eq(handler1, handler2) || + (m_is_commutative && el.eq(handler2, handler1))) + return ⪙ + } + return NULL; +} + + +bool +Type_handler_hybrid_field_type::aggregate_for_num_op(const Type_aggregator *agg, + const Type_handler *h0, + const Type_handler *h1) +{ + Type_handler_pair tp(h0, h1); + do + { + const Type_handler *hres; + const Type_collection *c; + if (((c= Type_handler::type_collection_for_aggregation(tp.a(), tp.b())) && + (hres= c->aggregate_for_num_op(tp.a(), tp.b()))) || + (hres= agg->find_handler(tp.a(), tp.b()))) + { + m_type_handler= hres; + return false; + } + } while (tp.to_base()); + return true; +} + + +/***************************************************************************/ + +const Type_handler * +Type_handler::get_handler_by_field_type(enum_field_types type) +{ + switch (type) { + case MYSQL_TYPE_DECIMAL: return &type_handler_olddecimal; + case MYSQL_TYPE_NEWDECIMAL: return &type_handler_newdecimal; + case MYSQL_TYPE_TINY: return &type_handler_stiny; + case MYSQL_TYPE_SHORT: return &type_handler_sshort; + case MYSQL_TYPE_LONG: return &type_handler_slong; + case MYSQL_TYPE_LONGLONG: return &type_handler_slonglong; + case MYSQL_TYPE_INT24: return &type_handler_sint24; + case MYSQL_TYPE_YEAR: return &type_handler_year; + case MYSQL_TYPE_BIT: return &type_handler_bit; + case MYSQL_TYPE_FLOAT: return &type_handler_float; + case MYSQL_TYPE_DOUBLE: return &type_handler_double; + case MYSQL_TYPE_NULL: return &type_handler_null; + case MYSQL_TYPE_VARCHAR: return &type_handler_varchar; + case MYSQL_TYPE_TINY_BLOB: return &type_handler_tiny_blob; + case MYSQL_TYPE_MEDIUM_BLOB: return &type_handler_medium_blob; + case MYSQL_TYPE_LONG_BLOB: return &type_handler_long_blob; + case MYSQL_TYPE_BLOB: return &type_handler_blob; + case MYSQL_TYPE_VAR_STRING: return &type_handler_varchar; // Map to VARCHAR + case MYSQL_TYPE_STRING: return &type_handler_string; + case MYSQL_TYPE_ENUM: return &type_handler_varchar; // Map to VARCHAR + case MYSQL_TYPE_SET: return &type_handler_varchar; // Map to VARCHAR + case MYSQL_TYPE_GEOMETRY: +#ifdef HAVE_SPATIAL + return &type_handler_geometry; +#else + return NULL; +#endif + case MYSQL_TYPE_TIMESTAMP: return &type_handler_timestamp2;// Map to timestamp2 + case MYSQL_TYPE_TIMESTAMP2: return &type_handler_timestamp2; + case MYSQL_TYPE_DATE: return &type_handler_newdate; // Map to newdate + case MYSQL_TYPE_TIME: return &type_handler_time2; // Map to time2 + case MYSQL_TYPE_TIME2: return &type_handler_time2; + case MYSQL_TYPE_DATETIME: return &type_handler_datetime2; // Map to datetime2 + case MYSQL_TYPE_DATETIME2: return &type_handler_datetime2; + case MYSQL_TYPE_NEWDATE: + /* + NEWDATE is actually a real_type(), not a field_type(), + but it's used around the code in field_type() context. + We should probably clean up the code not to use MYSQL_TYPE_NEWDATE + in field_type() context and add DBUG_ASSERT(0) here. + */ + return &type_handler_newdate; + case MYSQL_TYPE_VARCHAR_COMPRESSED: + case MYSQL_TYPE_BLOB_COMPRESSED: + break; + }; + DBUG_ASSERT(0); + return &type_handler_string; +} + + +const Type_handler * +Type_handler::get_handler_by_real_type(enum_field_types type) +{ + switch (type) { + case MYSQL_TYPE_DECIMAL: return &type_handler_olddecimal; + case MYSQL_TYPE_NEWDECIMAL: return &type_handler_newdecimal; + case MYSQL_TYPE_TINY: return &type_handler_stiny; + case MYSQL_TYPE_SHORT: return &type_handler_sshort; + case MYSQL_TYPE_LONG: return &type_handler_slong; + case MYSQL_TYPE_LONGLONG: return &type_handler_slonglong; + case MYSQL_TYPE_INT24: return &type_handler_sint24; + case MYSQL_TYPE_YEAR: return &type_handler_year; + case MYSQL_TYPE_BIT: return &type_handler_bit; + case MYSQL_TYPE_FLOAT: return &type_handler_float; + case MYSQL_TYPE_DOUBLE: return &type_handler_double; + case MYSQL_TYPE_NULL: return &type_handler_null; + case MYSQL_TYPE_VARCHAR: return &type_handler_varchar; + case MYSQL_TYPE_VARCHAR_COMPRESSED: return &type_handler_varchar_compressed; + case MYSQL_TYPE_TINY_BLOB: return &type_handler_tiny_blob; + case MYSQL_TYPE_MEDIUM_BLOB: return &type_handler_medium_blob; + case MYSQL_TYPE_LONG_BLOB: return &type_handler_long_blob; + case MYSQL_TYPE_BLOB: return &type_handler_blob; + case MYSQL_TYPE_BLOB_COMPRESSED: return &type_handler_blob_compressed; + case MYSQL_TYPE_VAR_STRING: return &type_handler_var_string; + case MYSQL_TYPE_STRING: return &type_handler_string; + case MYSQL_TYPE_ENUM: return &type_handler_enum; + case MYSQL_TYPE_SET: return &type_handler_set; + case MYSQL_TYPE_GEOMETRY: +#ifdef HAVE_SPATIAL + return &type_handler_geometry; +#else + return NULL; +#endif + case MYSQL_TYPE_TIMESTAMP: return &type_handler_timestamp; + case MYSQL_TYPE_TIMESTAMP2: return &type_handler_timestamp2; + case MYSQL_TYPE_DATE: return &type_handler_date; + case MYSQL_TYPE_TIME: return &type_handler_time; + case MYSQL_TYPE_TIME2: return &type_handler_time2; + case MYSQL_TYPE_DATETIME: return &type_handler_datetime; + case MYSQL_TYPE_DATETIME2: return &type_handler_datetime2; + case MYSQL_TYPE_NEWDATE: return &type_handler_newdate; + }; + return NULL; +} + + +/** + Create a DOUBLE field by default. +*/ +Field * +Type_handler::make_num_distinct_aggregator_field(MEM_ROOT *mem_root, + const Item *item) const +{ + return new(mem_root) + Field_double(NULL, item->max_length, + (uchar *) (item->maybe_null() ? "" : 0), + item->maybe_null() ? 1 : 0, Field::NONE, + &item->name, (uint8) item->decimals, + 0, item->unsigned_flag); +} + + +Field * +Type_handler_float::make_num_distinct_aggregator_field(MEM_ROOT *mem_root, + const Item *item) + const +{ + return new(mem_root) + Field_float(NULL, item->max_length, + (uchar *) (item->maybe_null() ? "" : 0), + item->maybe_null() ? 1 : 0, Field::NONE, + &item->name, (uint8) item->decimals, + 0, item->unsigned_flag); +} + + +Field * +Type_handler_decimal_result::make_num_distinct_aggregator_field( + MEM_ROOT *mem_root, + const Item *item) + const +{ + return new (mem_root) + Field_new_decimal(NULL, item->max_length, + (uchar *) (item->maybe_null() ? "" : 0), + item->maybe_null() ? 1 : 0, Field::NONE, + &item->name, (uint8) item->decimals, + 0, item->unsigned_flag); +} + + +Field * +Type_handler_int_result::make_num_distinct_aggregator_field(MEM_ROOT *mem_root, + const Item *item) + const +{ + /** + Make a longlong field for all INT-alike types. It could create + smaller fields for TINYINT, SMALLINT, MEDIUMINT, INT though. + */ + return new(mem_root) + Field_longlong(NULL, item->max_length, + (uchar *) (item->maybe_null() ? "" : 0), + item->maybe_null() ? 1 : 0, Field::NONE, + &item->name, 0, item->unsigned_flag); +} + + +/***********************************************************************/ + +Field *Type_handler_tiny::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + /* + As we don't know if the integer was signed or not on the master, + assume we have same sign on master and slave. This is true when not + using conversions so it should be true also when using conversions. + */ + bool unsigned_flag= ((Field_num*) target)->unsigned_flag; + return new (root) + Field_tiny(NULL, 4 /*max_length*/, (uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*zerofill*/, unsigned_flag); +} + + +Field *Type_handler_short::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + bool unsigned_flag= ((Field_num*) target)->unsigned_flag; + return new (root) + Field_short(NULL, 6 /*max_length*/, (uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*zerofill*/, unsigned_flag); +} + + +Field *Type_handler_int24::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + bool unsigned_flag= ((Field_num*) target)->unsigned_flag; + return new (root) + Field_medium(NULL, 9 /*max_length*/, (uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*zerofill*/, unsigned_flag); +} + + +Field *Type_handler_long::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + bool unsigned_flag= ((Field_num*) target)->unsigned_flag; + return new (root) + Field_long(NULL, 11 /*max_length*/, (uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*zerofill*/, unsigned_flag); +} + + +Field *Type_handler_longlong::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + bool unsigned_flag= ((Field_num*) target)->unsigned_flag; + return new (root) + Field_longlong(NULL, 20 /*max_length*/,(uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*zerofill*/, unsigned_flag); +} + + + +Field *Type_handler_float::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new (root) + Field_float(NULL, 12 /*max_length*/, (uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*dec*/, 0/*zerofill*/, 0/*unsigned_flag*/); +} + + +Field *Type_handler_double::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new (root) + Field_double(NULL, 22 /*max_length*/, (uchar *) "", 1, Field::NONE, + &empty_clex_str, 0/*dec*/, 0/*zerofill*/, 0/*unsigned_flag*/); +} + + +Field *Type_handler_newdecimal::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + int precision= metadata >> 8; + uint8 decimals= metadata & 0x00ff; + uint32 max_length= my_decimal_precision_to_length(precision, decimals, false); + DBUG_ASSERT(decimals <= DECIMAL_MAX_SCALE); + return new (root) + Field_new_decimal(NULL, max_length, (uchar *) "", 1, Field::NONE, + &empty_clex_str, decimals, 0/*zerofill*/, 0/*unsigned*/); +} + + +Field *Type_handler_olddecimal::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + sql_print_error("In RBR mode, Slave received incompatible DECIMAL field " + "(old-style decimal field) from Master while creating " + "conversion table. Please consider changing datatype on " + "Master to new style decimal by executing ALTER command for" + " column Name: %s.%s.%s.", + target->table->s->db.str, + target->table->s->table_name.str, + target->field_name.str); + return NULL; +} + + +Field *Type_handler_year::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_year(NULL, 4, (uchar *) "", 1, Field::NONE, &empty_clex_str); +} + + +Field *Type_handler_null::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_null(NULL, 0, Field::NONE, &empty_clex_str, target->charset()); +} + + +Field *Type_handler_timestamp::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new_Field_timestamp(root, NULL, (uchar *) "", 1, + Field::NONE, &empty_clex_str, + table->s, target->decimals()); +} + + +Field *Type_handler_timestamp2::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_timestampf(NULL, (uchar *) "", 1, Field::NONE, + &empty_clex_str, table->s, metadata); +} + + +Field *Type_handler_newdate::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_newdate(NULL, (uchar *) "", 1, Field::NONE, &empty_clex_str); +} + + +Field *Type_handler_date::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_date(NULL, (uchar *) "", 1, Field::NONE, &empty_clex_str); +} + + +Field *Type_handler_time::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new_Field_time(root, NULL, (uchar *) "", 1, + Field::NONE, &empty_clex_str, target->decimals()); +} + + +Field *Type_handler_time2::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_timef(NULL, (uchar *) "", 1, Field::NONE, &empty_clex_str, metadata); +} + + +Field *Type_handler_datetime::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new_Field_datetime(root, NULL, (uchar *) "", 1, + Field::NONE, &empty_clex_str, target->decimals()); +} + + +Field *Type_handler_datetime2::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_datetimef(NULL, (uchar *) "", 1, + Field::NONE, &empty_clex_str, metadata); +} + + +Field *Type_handler_bit::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + DBUG_ASSERT((metadata & 0xff) <= 7); + uint32 max_length= 8 * (metadata >> 8U) + (metadata & 0x00ff); + return new(root) + Field_bit_as_char(NULL, max_length, (uchar *) "", 1, + Field::NONE, &empty_clex_str); +} + + +Field *Type_handler_string::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + /* This is taken from Field_string::unpack. */ + uint32 max_length= (((metadata >> 4) & 0x300) ^ 0x300) + (metadata & 0x00ff); + return new(root) + Field_string(NULL, max_length, (uchar *) "", 1, + Field::NONE, &empty_clex_str, target->charset()); +} + + +Field *Type_handler_varchar::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + DBUG_ASSERT(HA_VARCHAR_PACKLENGTH(metadata) <= MAX_FIELD_VARCHARLENGTH); + return new(root) + Field_varstring(NULL, metadata, HA_VARCHAR_PACKLENGTH(metadata), + (uchar *) "", 1, Field::NONE, &empty_clex_str, + table->s, target->charset()); +} + + +Field *Type_handler_varchar_compressed::make_conversion_table_field( + MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + return new(root) + Field_varstring_compressed(NULL, metadata, + HA_VARCHAR_PACKLENGTH(metadata), + (uchar *) "", 1, Field::NONE, + &empty_clex_str, + table->s, target->charset(), + zlib_compression_method); +} + + + +Field *Type_handler_blob_compressed::make_conversion_table_field( + MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + uint pack_length= metadata & 0x00ff; + if (pack_length < 1 || pack_length > 4) + return NULL; // Broken binary log? + return new(root) + Field_blob_compressed(NULL, (uchar *) "", 1, Field::NONE, + &empty_clex_str, + table->s, pack_length, target->charset(), + zlib_compression_method); +} + + +Field *Type_handler_enum::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + DBUG_ASSERT(target->type() == MYSQL_TYPE_STRING); + DBUG_ASSERT(target->real_type() == MYSQL_TYPE_ENUM); + return new(root) + Field_enum(NULL, target->field_length, + (uchar *) "", 1, Field::NONE, &empty_clex_str, + metadata & 0x00ff/*pack_length()*/, + ((const Field_enum*) target)->typelib, target->charset()); +} + + +Field *Type_handler_set::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + DBUG_ASSERT(target->type() == MYSQL_TYPE_STRING); + DBUG_ASSERT(target->real_type() == MYSQL_TYPE_SET); + return new(root) + Field_set(NULL, target->field_length, + (uchar *) "", 1, Field::NONE, &empty_clex_str, + metadata & 0x00ff/*pack_length()*/, + ((const Field_enum*) target)->typelib, target->charset()); +} + + +Field *Type_handler_enum::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + const Typelib *typelib= def.typelib(); + DBUG_ASSERT(typelib); + /* + Assume I_S columns don't have non-ASCII characters in names. + If we eventually want to, Typelib::max_char_length() must be implemented. + */ + return new (root) + Field_enum(addr.ptr(), (uint32) typelib->max_octet_length(), + addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, + get_enum_pack_length(typelib->count), + typelib, system_charset_info); + +} + + +/*************************************************************************/ + +bool Type_handler:: + Column_definition_validate_check_constraint(THD *thd, + Column_definition * c) const +{ + return c->validate_check_constraint(thd); +} + + +/*************************************************************************/ + +bool +Type_handler::Column_definition_set_attributes(THD *thd, + Column_definition *def, + const Lex_field_type_st &attr, + column_definition_type_t type) + const +{ + def->set_charset_collation_attrs(attr.charset_collation_attrs()); + def->set_length_and_dec(attr); + return false; +} + + +/* + In sql_mode=ORACLE, real size of VARCHAR and CHAR with no length + in SP parameters is fixed at runtime with the length of real args. + Let's translate VARCHAR to VARCHAR(4000) for return value. + + Since Oracle 9, maximum size for VARCHAR in PL/SQL is 32767. + + In MariaDB the limit for VARCHAR is 65535 bytes. + We could translate VARCHAR with no length to VARCHAR(65535), but + it would mean that for multi-byte character sets we'd have to translate + VARCHAR to MEDIUMTEXT, to guarantee 65535 characters. + + Also we could translate VARCHAR to VARCHAR(16383), where 16383 is + the maximum possible length in characters in case of mbmaxlen=4 + (e.g. utf32, utf16, utf8mb4). However, we'll have character sets with + mbmaxlen=5 soon (e.g. gb18030). +*/ + +bool +Type_handler_string::Column_definition_set_attributes( + THD *thd, + Column_definition *def, + const Lex_field_type_st &attr, + column_definition_type_t type) + const +{ + Type_handler::Column_definition_set_attributes(thd, def, attr, type); + if (attr.has_explicit_length()) + return false; + switch (type) { + case COLUMN_DEFINITION_ROUTINE_PARAM: + case COLUMN_DEFINITION_FUNCTION_RETURN: + if (thd->variables.sql_mode & MODE_ORACLE) + { + // See Type_handler_varchar::Column_definition_set_attributes() + def->length= def->decimals= 2000; + def->set_handler(&type_handler_varchar); + return false; + } + break; + case COLUMN_DEFINITION_ROUTINE_LOCAL: + case COLUMN_DEFINITION_TABLE_FIELD: + break; + } + def->length= 1; + return false; +} + + +bool +Type_handler_varchar::Column_definition_set_attributes( + THD *thd, + Column_definition *def, + const Lex_field_type_st &attr, + column_definition_type_t type) + const +{ + Type_handler::Column_definition_set_attributes(thd, def, attr, type); + if (attr.has_explicit_length()) + return false; + switch (type) { + case COLUMN_DEFINITION_ROUTINE_PARAM: + case COLUMN_DEFINITION_FUNCTION_RETURN: + if (thd->variables.sql_mode & MODE_ORACLE) + { + /* + Type_handler_varchar::adjust_spparam_type() tests "decimals" + to detect if the formal parameter length needs to be adjusted to + the actual parameter length. Non-zero decimals means that the length + was set implicitly to the default value and needs to be adjusted. + */ + def->length= def->decimals= 4000; + return false; + } + break; + case COLUMN_DEFINITION_ROUTINE_LOCAL: + case COLUMN_DEFINITION_TABLE_FIELD: + break; + } + thd->parse_error(); + return true; +} + + +/*************************************************************************/ +bool Type_handler_null:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return false; +} + +bool Type_handler_tiny:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_int(MAX_TINYINT_WIDTH + def->sign_length()); +} + +bool Type_handler_short:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_int(MAX_SMALLINT_WIDTH + def->sign_length()); +} + +bool Type_handler_int24:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_int(MAX_MEDIUMINT_WIDTH + def->sign_length()); +} + +bool Type_handler_long:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_int(MAX_INT_WIDTH + def->sign_length()); +} + +bool Type_handler_longlong:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_int(MAX_BIGINT_WIDTH/*no sign_length() added*/); +} + +bool Type_handler_newdecimal:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_decimal(); +} + +bool Type_handler_olddecimal:: + Column_definition_fix_attributes(Column_definition *def) const +{ + DBUG_ASSERT(0); // Obsolete + return true; +} + +bool Type_handler_var_string:: + Column_definition_fix_attributes(Column_definition *def) const +{ + DBUG_ASSERT(0); // Obsolete + return true; +} + +bool Type_handler_varchar:: + Column_definition_fix_attributes(Column_definition *def) const +{ + /* + Long VARCHAR's are automaticly converted to blobs in mysql_prepare_table + if they don't have a default value + */ + return def->check_length(ER_TOO_BIG_DISPLAYWIDTH, MAX_FIELD_BLOBLENGTH); +} + +bool Type_handler_string:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->check_length(ER_TOO_BIG_FIELDLENGTH, MAX_FIELD_CHARLENGTH); +} + +bool Type_handler_blob_common:: + Column_definition_fix_attributes(Column_definition *def) const +{ + def->flags|= BLOB_FLAG; + return def->check_length(ER_TOO_BIG_DISPLAYWIDTH, MAX_FIELD_BLOBLENGTH); +} + + +bool Type_handler_year:: + Column_definition_fix_attributes(Column_definition *def) const +{ + if (!def->length || def->length != 2) + def->length= 4; // Default length + def->flags|= ZEROFILL_FLAG | UNSIGNED_FLAG; + return false; +} + +bool Type_handler_float:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_real(MAX_FLOAT_STR_LENGTH); +} + + +bool Type_handler_double:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_real(DBL_DIG + 7); +} + +bool Type_handler_timestamp_common:: + Column_definition_fix_attributes(Column_definition *def) const +{ + def->flags|= UNSIGNED_FLAG; + return def->fix_attributes_temporal_with_time(MAX_DATETIME_WIDTH); +} + +bool Type_handler_date_common:: + Column_definition_fix_attributes(Column_definition *def) const +{ + // We don't support creation of MYSQL_TYPE_DATE anymore + def->set_handler(&type_handler_newdate); + def->length= MAX_DATE_WIDTH; + return false; +} + +bool Type_handler_time_common:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_temporal_with_time(MIN_TIME_WIDTH); +} + +bool Type_handler_datetime_common:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_temporal_with_time(MAX_DATETIME_WIDTH); +} + +bool Type_handler_set:: + Column_definition_fix_attributes(Column_definition *def) const +{ + def->pack_length= get_set_pack_length(def->interval_list.elements); + return false; +} + +bool Type_handler_enum:: + Column_definition_fix_attributes(Column_definition *def) const +{ + def->pack_length= get_enum_pack_length(def->interval_list.elements); + return false; +} + +bool Type_handler_bit:: + Column_definition_fix_attributes(Column_definition *def) const +{ + return def->fix_attributes_bit(); +} + +/*************************************************************************/ + +void Type_handler_typelib:: + Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *def, + const Field *field) const +{ + DBUG_ASSERT(def->flags & (ENUM_FLAG | SET_FLAG)); + def->interval= field->get_typelib(); +} + + +void Type_handler_year:: + Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *def, + const Field *field) const +{ + if (def->length != 4) + { + char buff[sizeof("YEAR()") + MY_INT64_NUM_DECIMAL_DIGITS + 1]; + my_snprintf(buff, sizeof(buff), "YEAR(%llu)", def->length); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX), + buff, "YEAR(4)"); + } +} + + +void Type_handler_real_result:: + Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *def, + const Field *field) const +{ + /* + Floating points are stored with FLOATING_POINT_DECIMALS but internally + in MariaDB used with NOT_FIXED_DEC, which is >= FLOATING_POINT_DECIMALS. + */ + if (def->decimals >= FLOATING_POINT_DECIMALS) + def->decimals= NOT_FIXED_DEC; +} + + +/*************************************************************************/ + +bool Type_handler:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->prepare_stage1_simple(&my_charset_bin); + return false; +} + +bool Type_handler_null:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->prepare_charset_for_string(derived_attr); + def->create_length_to_internal_length_null(); + return false; +} + +bool Type_handler_row:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->charset= &my_charset_bin; + def->create_length_to_internal_length_null(); + return false; +} + +bool Type_handler_temporal_result:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->prepare_stage1_simple(&my_charset_numeric); + return false; +} + + +bool Type_handler_numeric:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->prepare_stage1_simple(&my_charset_numeric); + return false; +} + +bool Type_handler_newdecimal:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->charset= &my_charset_numeric; + def->create_length_to_internal_length_newdecimal(); + return false; +} + +bool Type_handler_bit:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + def->charset= &my_charset_numeric; + return def->prepare_stage1_bit(thd, mem_root); +} + +bool Type_handler_typelib:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + return def->prepare_charset_for_string(derived_attr) || + def->prepare_stage1_typelib(thd, mem_root, type); +} + + +bool Type_handler_string_result:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const +{ + return def->prepare_charset_for_string(derived_attr) || + def->prepare_stage1_string(thd, mem_root); +} + + +/*************************************************************************/ + +bool Type_handler_general_purpose_string:: + Column_definition_bulk_alter(Column_definition *def, + const Column_derived_attributes + *derived_attr, + const Column_bulk_alter_attributes + *bulk_alter_attr) + const +{ + if (!bulk_alter_attr->alter_table_convert_to_charset()) + return false; // No "CONVERT TO" clause. + CHARSET_INFO *defcs= def->explicit_or_derived_charset(derived_attr); + DBUG_ASSERT(defcs); + /* + Handle 'ALTER TABLE t1 CONVERT TO CHARACTER SET csname'. + Change character sets for all varchar/char/text columns, + but do not touch varbinary/binary/blob columns. + */ + if (!(def->flags & CONTEXT_COLLATION_FLAG) && defcs != &my_charset_bin) + def->charset= bulk_alter_attr->alter_table_convert_to_charset(); + return false; +}; + + +/*************************************************************************/ + +bool Type_handler:: + Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const +{ + def->redefine_stage1_common(dup, file); + def->create_length_to_internal_length_simple(); + return false; +} + + +bool Type_handler_null:: + Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const +{ + def->redefine_stage1_common(dup, file); + def->create_length_to_internal_length_null(); + return false; +} + + +bool Type_handler_newdecimal:: + Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const +{ + def->redefine_stage1_common(dup, file); + def->create_length_to_internal_length_newdecimal(); + return false; +} + + +bool Type_handler_string_result:: + Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const +{ + def->redefine_stage1_common(dup, file); + def->set_compression_method(dup->compression_method()); + def->create_length_to_internal_length_string(); + return false; +} + + +bool Type_handler_typelib:: + Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const +{ + def->redefine_stage1_common(dup, file); + def->create_length_to_internal_length_typelib(); + return false; +} + + +bool Type_handler_bit:: + Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const +{ + def->redefine_stage1_common(dup, file); + /* + If we are replacing a field with a BIT field, we need + to initialize pack_flag. + */ + def->pack_flag= FIELDFLAG_NUMBER; + if (!(file->ha_table_flags() & HA_CAN_BIT_FIELD)) + def->pack_flag|= FIELDFLAG_TREAT_BIT_AS_CHAR; + def->create_length_to_internal_length_bit(); + return false; +} + + +/*************************************************************************/ + +bool Type_handler:: + Column_definition_prepare_stage2_legacy(Column_definition *def, + enum_field_types type) const +{ + def->pack_flag= f_settype((uint) type); + return false; +} + +bool Type_handler:: + Column_definition_prepare_stage2_legacy_num(Column_definition *def, + enum_field_types type) const +{ + def->pack_flag= def->pack_flag_numeric() | f_settype((uint) type); + return false; +} + +bool Type_handler:: + Column_definition_prepare_stage2_legacy_real(Column_definition *def, + enum_field_types type) const +{ + uint dec= def->decimals; + /* + User specified FLOAT() or DOUBLE() without precision. Change to + FLOATING_POINT_DECIMALS to keep things compatible with earlier MariaDB + versions. + */ + if (dec >= FLOATING_POINT_DECIMALS) + dec= FLOATING_POINT_DECIMALS; + def->decimals= dec; + def->pack_flag= def->pack_flag_numeric() | f_settype((uint) type); + return false; +} + +bool Type_handler_newdecimal:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + def->pack_flag= def->pack_flag_numeric(); + return false; +} + +bool Type_handler_blob_common:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + return def->prepare_stage2_blob(file, table_flags, FIELDFLAG_BLOB); +} + +bool Type_handler_varchar:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + return def->prepare_stage2_varchar(table_flags); +} + +bool Type_handler_string:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + def->pack_flag= (def->charset->state & MY_CS_BINSORT) ? FIELDFLAG_BINARY : 0; + return false; +} + +bool Type_handler_enum:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + uint dummy; + return def->prepare_stage2_typelib("ENUM", FIELDFLAG_INTERVAL, &dummy); +} + +bool Type_handler_set:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + uint dup_count; + if (def->prepare_stage2_typelib("SET", FIELDFLAG_BITFIELD, &dup_count)) + return true; + /* Check that count of unique members is not more then 64 */ + if (def->interval->count - dup_count > sizeof(longlong)*8) + { + my_error(ER_TOO_BIG_SET, MYF(0), def->field_name.str); + return true; + } + return false; +} + +bool Type_handler_bit:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + if (!(table_flags & HA_CAN_BIT_FIELD)) + { + def->pack_flag|= FIELDFLAG_TREAT_BIT_AS_CHAR; + def->create_length_to_internal_length_bit(); + } + return false; +} + + +/*************************************************************************/ +bool Type_handler::Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length*= def.charset->mbmaxlen; + return false; +} + + +bool Type_handler::Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *has_field_needed) const +{ + part->length*= def.charset->mbmaxlen; + return false; +} + + +bool Type_handler::Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length*= def.charset->mbmaxlen; + return false; +} + + +bool Type_handler::Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length*= def.charset->mbmaxlen; + return false; +} + + +bool Type_handler::Key_part_spec_init_spatial(Key_part_spec *part, + const Column_definition &def) + const +{ + my_error(ER_WRONG_ARGUMENTS, MYF(0), "SPATIAL INDEX"); + return true; +} + + +bool Type_handler_blob_common::Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length*= def.charset->mbmaxlen; + return part->check_primary_key_for_blob(file); +} + + +bool Type_handler_blob_common::Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *hash_field_needed) const +{ + if (!(part->length*= def.charset->mbmaxlen)) + *hash_field_needed= true; + return part->check_key_for_blob(file); +} + + +bool Type_handler_blob_common::Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length*= def.charset->mbmaxlen; + return part->init_multiple_key_for_blob(file); +} + + +bool Type_handler_blob_common::Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length*= def.charset->mbmaxlen; + return part->check_foreign_key_for_blob(file); +} + + + +/*************************************************************************/ + +uint32 Type_handler_time::calc_pack_length(uint32 length) const +{ + return length > MIN_TIME_WIDTH ? + hires_bytes(length - 1 - MIN_TIME_WIDTH) : 3; +} + +uint32 Type_handler_time2::calc_pack_length(uint32 length) const +{ + return length > MIN_TIME_WIDTH ? + my_time_binary_length(length - MIN_TIME_WIDTH - 1) : 3; +} + +uint32 Type_handler_timestamp::calc_pack_length(uint32 length) const +{ + return length > MAX_DATETIME_WIDTH ? + 4 + sec_part_bytes(length - 1 - MAX_DATETIME_WIDTH) : 4; +} + +uint32 Type_handler_timestamp2::calc_pack_length(uint32 length) const +{ + return length > MAX_DATETIME_WIDTH ? + my_timestamp_binary_length(length - MAX_DATETIME_WIDTH - 1) : 4; +} + +uint32 Type_handler_datetime::calc_pack_length(uint32 length) const +{ + return length > MAX_DATETIME_WIDTH ? + hires_bytes(length - 1 - MAX_DATETIME_WIDTH) : 8; +} + +uint32 Type_handler_datetime2::calc_pack_length(uint32 length) const +{ + return length > MAX_DATETIME_WIDTH ? + my_datetime_binary_length(length - MAX_DATETIME_WIDTH - 1) : 5; +} + +uint32 Type_handler_tiny_blob::calc_pack_length(uint32 length) const +{ + return 1 + portable_sizeof_char_ptr; +} + +uint32 Type_handler_blob::calc_pack_length(uint32 length) const +{ + return 2 + portable_sizeof_char_ptr; +} + +uint32 Type_handler_medium_blob::calc_pack_length(uint32 length) const +{ + return 3 + portable_sizeof_char_ptr; +} + +uint32 Type_handler_long_blob::calc_pack_length(uint32 length) const +{ + return 4 + portable_sizeof_char_ptr; +} + +uint32 Type_handler_newdecimal::calc_pack_length(uint32 length) const +{ + abort(); // This shouldn't happen + return 0; +} + +uint32 Type_handler_set::calc_pack_length(uint32 length) const +{ + abort(); // This shouldn't happen + return 0; +} + +uint32 Type_handler_enum::calc_pack_length(uint32 length) const +{ + abort(); // This shouldn't happen + return 0; +} + + +/*************************************************************************/ +uint Type_handler::calc_key_length(const Column_definition &def) const +{ + DBUG_ASSERT(def.pack_length == calc_pack_length((uint32) def.length)); + return def.pack_length; +} + +uint Type_handler_bit::calc_key_length(const Column_definition &def) const +{ + if (f_bit_as_char(def.pack_flag)) + return def.pack_length; + /* We need one extra byte to store the bits we save among the null bits */ + return def.pack_length + MY_TEST(def.length & 7); +} + +uint Type_handler_newdecimal::calc_key_length(const Column_definition &def) const +{ + return def.pack_length; +} + +uint +Type_handler_string_result::calc_key_length(const Column_definition &def) const +{ + return (uint) def.length; +} + +uint Type_handler_enum::calc_key_length(const Column_definition &def) const +{ + DBUG_ASSERT(def.interval); + return get_enum_pack_length(def.interval->count); +} + +uint Type_handler_set::calc_key_length(const Column_definition &def) const +{ + DBUG_ASSERT(def.interval); + return get_set_pack_length(def.interval->count); +} + +uint Type_handler_blob_common::calc_key_length(const Column_definition &def) const +{ + return 0; +} + +/*************************************************************************/ +Field *Type_handler::make_and_init_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE *table) const +{ + Field *field= make_table_field(root, name, addr, attr, table->s); + if (field) + field->init(table); + return field; +} + + +Field *Type_handler_int_result::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + DBUG_ASSERT(is_unsigned() == attr.unsigned_flag); + Column_definition_attributes dattr(attr); + return make_table_field_from_def(share, root, name, addr, + Bit_addr(), &dattr, 0); +} + + +Field *Type_handler_vers_trx_id::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + DBUG_ASSERT(is_unsigned() == attr.unsigned_flag); + return new (root) + Field_vers_trx_id(addr.ptr(), attr.max_char_length(), + addr.null_ptr(), addr.null_bit(), + Field::NONE, name, + 0/*zerofill*/, attr.unsigned_flag); +} + + +Field * +Type_handler_real_result::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + Column_definition_attributes dattr(attr); + return make_table_field_from_def(share, root, name, addr, + Bit_addr(), &dattr, 0); +} + + +Field * +Type_handler_olddecimal::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + /* + Currently make_table_field() is used for Item purpose only. + On Item level we have type_handler_newdecimal only. + For now we have DBUG_ASSERT(0). + It will be removed when we reuse Type_handler::make_table_field() + in make_field() in field.cc, to open old tables with old decimal. + */ + DBUG_ASSERT(0); + Column_definition_attributes dattr(attr); + return make_table_field_from_def(share, root, name, addr, + Bit_addr(), &dattr, 0); +} + + +Field * +Type_handler_newdecimal::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + uint8 dec= (uint8) attr.decimals; + uint8 intg= (uint8) (attr.decimal_precision() - dec); + uint32 len= attr.max_char_length(); + + /* + Trying to put too many digits overall in a DECIMAL(prec,dec) + will always throw a warning. We must limit dec to + DECIMAL_MAX_SCALE however to prevent an assert() later. + */ + + if (dec > 0) + { + signed int overflow; + + dec= MY_MIN(dec, DECIMAL_MAX_SCALE); + + /* + If the value still overflows the field with the corrected dec, + we'll throw out decimals rather than integers. This is still + bad and of course throws a truncation warning. + +1: for decimal point + */ + + const int required_length= + my_decimal_precision_to_length(intg + dec, dec, attr.unsigned_flag); + + overflow= required_length - len; + + if (overflow > 0) + dec= MY_MAX(0, dec - overflow); // too long, discard fract + else + /* Corrected value fits. */ + len= required_length; + } + return new (root) + Field_new_decimal(addr.ptr(), len, addr.null_ptr(), addr.null_bit(), + Field::NONE, name, + dec, 0/*zerofill*/, attr.unsigned_flag); +} + + +Field *Type_handler_null::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_null(addr.ptr(), attr.max_length, + Field::NONE, name, attr.collation.collation); +} + + +Field *Type_handler_timestamp::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new_Field_timestamp(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, attr.decimals); +} + + +Field *Type_handler_timestamp2::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + /* + Will be changed to "new Field_timestampf" when we reuse + make_table_field() for make_field() purposes in field.cc. + */ + return new_Field_timestamp(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, attr.decimals); +} + + +Field *Type_handler_newdate::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_newdate(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name); +} + + +Field *Type_handler_date::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + /* + DBUG_ASSERT will be removed when we reuse make_table_field() + for make_field() in field.cc + */ + DBUG_ASSERT(0); + return new (root) + Field_date(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name); +} + + +Field *Type_handler_time::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new_Field_time(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, attr.decimals); +} + + +Field *Type_handler_time2::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + + +{ + /* + Will be changed to "new Field_timef" when we reuse + make_table_field() for make_field() purposes in field.cc. + */ + return new_Field_time(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, attr.decimals); +} + + +Field *Type_handler_datetime::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new_Field_datetime(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, attr.decimals); +} + + +Field *Type_handler_datetime2::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + /* + Will be changed to "new Field_datetimef" when we reuse + make_table_field() for make_field() purposes in field.cc. + */ + return new_Field_datetime(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, attr.decimals); +} + + +Field *Type_handler_bit::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_bit_as_char(addr.ptr(), attr.max_length, + addr.null_ptr(), addr.null_bit(), + Field::NONE, name); +} + + +Field *Type_handler_string::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_string(addr.ptr(), attr.max_length, + addr.null_ptr(), addr.null_bit(), + Field::NONE, name, attr.collation); +} + + +Field *Type_handler_varchar::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + DBUG_ASSERT(HA_VARCHAR_PACKLENGTH(attr.max_length) <= + MAX_FIELD_VARCHARLENGTH); + return new (root) + Field_varstring(addr.ptr(), attr.max_length, + HA_VARCHAR_PACKLENGTH(attr.max_length), + addr.null_ptr(), addr.null_bit(), + Field::NONE, name, + share, attr.collation); +} + + +Field *Type_handler_tiny_blob::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_blob(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, + 1, attr.collation); +} + + +Field *Type_handler_blob::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_blob(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, + 2, attr.collation); +} + + +Field * +Type_handler_medium_blob::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_blob(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, + 3, attr.collation); +} + + +Field *Type_handler_long_blob::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + return new (root) + Field_blob(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, + 4, attr.collation); +} + + +Field *Type_handler_enum::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + const TYPELIB *typelib= attr.get_typelib(); + DBUG_ASSERT(typelib); + return new (root) + Field_enum(addr.ptr(), attr.max_length, + addr.null_ptr(), addr.null_bit(), + Field::NONE, name, + get_enum_pack_length(typelib->count), typelib, + attr.collation); +} + + +Field *Type_handler_set::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + const TYPELIB *typelib= attr.get_typelib(); + DBUG_ASSERT(typelib); + return new (root) + Field_set(addr.ptr(), attr.max_length, + addr.null_ptr(), addr.null_bit(), + Field::NONE, name, + get_enum_pack_length(typelib->count), typelib, + attr.collation); +} + + +/*************************************************************************/ + +Field *Type_handler_float::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_float(addr.ptr(), def.char_length(), + addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, + (uint8) NOT_FIXED_DEC, + 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_double::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_double(addr.ptr(), def.char_length(), + addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, + (uint8) NOT_FIXED_DEC, + 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_decimal_result::make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + uint dec= def.decimal_scale(); + uint prec= def.decimal_precision(); + DBUG_ASSERT(dec <= DECIMAL_MAX_SCALE); + uint32 len= my_decimal_precision_to_length(prec, dec, def.unsigned_flag()); + return new (root) + Field_new_decimal(addr.ptr(), len, addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, + (uint8) dec, 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_blob_common::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_blob(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, table->s, + length_bytes(), + &my_charset_bin); +} + + +Field *Type_handler_varchar::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + DBUG_ASSERT(def.char_length()); + LEX_CSTRING name= def.name(); + uint32 octet_length= (uint32) def.char_length() * 3; + if (octet_length > MAX_FIELD_VARCHARLENGTH) + { + Field *field= new (root) + Field_blob(addr.ptr(), addr.null_ptr(), addr.null_bit(), Field::NONE, + &name, table->s, 4, system_charset_info); + if (field) + field->field_length= octet_length; + return field; + } + else + { + return new (root) + Field_varstring(addr.ptr(), octet_length, + HA_VARCHAR_PACKLENGTH(octet_length), + addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, + table->s, system_charset_info); + } +} + + +Field *Type_handler_tiny::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_tiny(addr.ptr(), def.char_length(), + addr.null_ptr(), addr.null_bit(), Field::NONE, &name, + 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_short::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_short(addr.ptr(), def.char_length(), + addr.null_ptr(), addr.null_bit(), Field::NONE, &name, + 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_long::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_long(addr.ptr(), def.char_length(), + addr.null_ptr(), addr.null_bit(), Field::NONE, &name, + 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_longlong::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_longlong(addr.ptr(), def.char_length(), + addr.null_ptr(), addr.null_bit(), Field::NONE, &name, + 0/*zerofill*/, def.unsigned_flag()); +} + + +Field *Type_handler_date_common::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) + Field_newdate(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, &name); +} + + +Field *Type_handler_time_common::make_schema_field(MEM_ROOT *root, TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new_Field_time(root, + addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, def.fsp()); +} + + +Field *Type_handler_datetime_common::make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const +{ + LEX_CSTRING name= def.name(); + return new (root) Field_datetimef(addr.ptr(), + addr.null_ptr(), addr.null_bit(), + Field::NONE, &name, def.fsp()); +} + + +/*************************************************************************/ + +/* + If length is not specified for a varchar parameter, set length to the + maximum length of the actual argument. Goals are: + - avoid to allocate too much unused memory for m_var_table + - allow length check inside the callee rather than during copy of + returned values in output variables. + - allow varchar parameter size greater than 4000 + Default length has been stored in "decimal" member during parse. +*/ +bool Type_handler_varchar::adjust_spparam_type(Spvar_definition *def, + Item *from) const +{ + if (def->decimals) + { + uint def_max_char_length= MAX_FIELD_VARCHARLENGTH / def->charset->mbmaxlen; + uint arg_max_length= from->max_char_length(); + set_if_smaller(arg_max_length, def_max_char_length); + def->length= arg_max_length > 0 ? arg_max_length : def->decimals; + def->create_length_to_internal_length_string(); + } + return false; +} + +/*************************************************************************/ + +uint32 Type_handler_decimal_result::max_display_length(const Item *item) const +{ + return item->max_length; +} + + +uint32 Type_handler_temporal_result::max_display_length(const Item *item) const +{ + return item->max_length; +} + +uint32 Type_handler_string_result::max_display_length(const Item *item) const +{ + return item->max_length; +} + + +uint32 Type_handler_year::max_display_length(const Item *item) const +{ + return item->max_length; +} + + +uint32 Type_handler_bit::max_display_length(const Item *item) const +{ + return item->max_length; +} + +/*************************************************************************/ + +uint32 +Type_handler_decimal_result::Item_decimal_notation_int_digits(const Item *item) + const +{ + return item->decimal_int_part(); +} + + +uint32 +Type_handler_temporal_result::Item_decimal_notation_int_digits(const Item *item) + const +{ + return item->decimal_int_part(); +} + + +uint32 +Type_handler_bit::Item_decimal_notation_int_digits(const Item *item) + const +{ + return Bit_decimal_notation_int_digits_by_nbits(item->max_length); +} + + +uint32 +Type_handler_general_purpose_int::Item_decimal_notation_int_digits( + const Item *item) const +{ + return type_limits_int()->precision(); +} + +/*************************************************************************/ + +/* + Binary to Decimal digits ratio converges to log2(10) thus using 3 as + a divisor. +*/ +uint32 +Type_handler_bit::Bit_decimal_notation_int_digits_by_nbits(uint nbits) +{ + DBUG_ASSERT(nbits > 0); + DBUG_ASSERT(nbits <= 64); + set_if_smaller(nbits, 64); // Safety + static uint ndigits[65]= + {0, + 1,1,1,2,2,2,3,3, // 1..8 bits + 3,4,4,4,4,5,5,5, // 9..16 bits + 6,6,6,7,7,7,7,8, // 17..24 bits + 8,8,9,9,9,10,10,10, // 25..32 bits + 10,11,11,11,12,12,12,13, // 33..40 bits + 13,13,13,14,14,14,15,15, // 41..48 bits + 15,16,16,16,16,17,17,17, // 49..56 bits + 18,18,18,19,19,19,19,20 // 57..64 bits + }; + return ndigits[nbits]; +} + +/*************************************************************************/ + +void Type_handler_row::Item_update_null_value(Item *item) const +{ + DBUG_ASSERT(0); + item->null_value= true; +} + + +void Type_handler_time_common::Item_update_null_value(Item *item) const +{ + MYSQL_TIME ltime; + THD *thd= current_thd; + (void) item->get_date(thd, <ime, Time::Options(TIME_TIME_ONLY, thd)); +} + + +void Type_handler_temporal_with_date::Item_update_null_value(Item *item) const +{ + MYSQL_TIME ltime; + THD *thd= current_thd; + (void) item->get_date(thd, <ime, Datetime::Options(thd)); +} + + +void Type_handler_string_result::Item_update_null_value(Item *item) const +{ + StringBuffer tmp; + (void) item->val_str(&tmp); +} + + +void Type_handler_real_result::Item_update_null_value(Item *item) const +{ + (void) item->val_real(); +} + + +void Type_handler_decimal_result::Item_update_null_value(Item *item) const +{ + my_decimal tmp; + (void) item->val_decimal(&tmp); +} + + +void Type_handler_int_result::Item_update_null_value(Item *item) const +{ + (void) item->val_int(); +} + + +void Type_handler_bool::Item_update_null_value(Item *item) const +{ + (void) item->val_bool(); +} + + +/*************************************************************************/ + +int Type_handler_time_common::Item_save_in_field(Item *item, Field *field, + bool no_conversions) const +{ + return item->save_time_in_field(field, no_conversions); +} + +int Type_handler_temporal_with_date::Item_save_in_field(Item *item, + Field *field, + bool no_conversions) + const +{ + return item->save_date_in_field(field, no_conversions); +} + + +int Type_handler_timestamp_common::Item_save_in_field(Item *item, + Field *field, + bool no_conversions) + const +{ + Timestamp_or_zero_datetime_native_null tmp(field->table->in_use, item, true); + if (tmp.is_null()) + return set_field_to_null_with_conversions(field, no_conversions); + return tmp.save_in_field(field, item->decimals); +} + + +int Type_handler_string_result::Item_save_in_field(Item *item, Field *field, + bool no_conversions) const +{ + return item->save_str_in_field(field, no_conversions); +} + + +int Type_handler_real_result::Item_save_in_field(Item *item, Field *field, + bool no_conversions) const +{ + return item->save_real_in_field(field, no_conversions); +} + + +int Type_handler_decimal_result::Item_save_in_field(Item *item, Field *field, + bool no_conversions) const +{ + return item->save_decimal_in_field(field, no_conversions); +} + + +int Type_handler_int_result::Item_save_in_field(Item *item, Field *field, + bool no_conversions) const +{ + return item->save_int_in_field(field, no_conversions); +} + + +/***********************************************************************/ + +bool Type_handler_row:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_row(thd); +} + +bool Type_handler_int_result:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_int(thd); +} + +bool Type_handler_real_result:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_real(thd); +} + +bool Type_handler_decimal_result:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_decimal(thd); +} + +bool Type_handler_string_result:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_string(thd); +} + +bool Type_handler_time_common:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_time(thd); +} + +bool +Type_handler_temporal_with_date:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_datetime(thd); +} + +bool +Type_handler_timestamp_common:: +set_comparator_func(THD *thd, Arg_comparator *cmp) const +{ + return cmp->set_cmp_func_native(thd); +} + + +/*************************************************************************/ + +bool Type_handler_temporal_result:: + can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) + const +{ + if (source->compare_type_handler()->cmp_type() != TIME_RESULT) + return false; + + /* + Can't rewrite: + WHERE COALESCE(time_column)='00:00:00' + AND COALESCE(time_column)=DATE'2015-09-11' + to + WHERE DATE'2015-09-11'='00:00:00' + AND COALESCE(time_column)=DATE'2015-09-11' + because the left part will erroneously try to parse '00:00:00' + as DATE, not as TIME. + + TODO: It could still be rewritten to: + WHERE DATE'2015-09-11'=TIME'00:00:00' + AND COALESCE(time_column)=DATE'2015-09-11' + i.e. we need to replace both target_expr and target_value + at the same time. This is not supported yet. + */ + return target_value->cmp_type() == TIME_RESULT; +} + + +bool Type_handler_string_result:: + can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) + const +{ + if (source->compare_type_handler()->cmp_type() != STRING_RESULT) + return false; + /* + In this example: + SET NAMES utf8 COLLATE utf8_german2_ci; + DROP TABLE IF EXISTS t1; + CREATE TABLE t1 (a CHAR(10) CHARACTER SET utf8); + INSERT INTO t1 VALUES ('o-umlaut'),('oe'); + SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci AND a='oe'; + + the query should return only the row with 'oe'. + It should not return 'o-umlaut', because 'o-umlaut' does not match + the right part of the condition: a='oe' + ('o-umlaut' is not equal to 'oe' in utf8mb3_general_ci, + which is the collation of the field "a"). + + If we change the right part from: + ... AND a='oe' + to + ... AND 'oe' COLLATE utf8_german2_ci='oe' + it will be evalulated to TRUE and removed from the condition, + so the overall query will be simplified to: + + SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci; + + which will erroneously start to return both 'oe' and 'o-umlaut'. + So changing "expr" to "const" is not possible if the effective + collations of "target" and "source" are not exactly the same. + + Note, the code before the fix for MDEV-7152 only checked that + collations of "source_const" and "target_value" are the same. + This was not enough, as the bug report demonstrated. + */ + return + target->compare_collation() == source->compare_collation() && + target_value->collation.collation == source_const->collation.collation; +} + + +bool Type_handler_numeric:: + can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) + const +{ + /* + The collations of "target" and "source" do not make sense for numeric + data types. + */ + return target->compare_type_handler() == source->compare_type_handler(); +} + + +/*************************************************************************/ + +Item_cache * +Type_handler_row::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_row(thd); +} + +Item_cache * +Type_handler_int_result::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_int(thd, item->type_handler()); +} + +Item_cache * +Type_handler_year::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_year(thd, item->type_handler()); +} + +Item_cache * +Type_handler_double::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_double(thd); +} + +Item_cache * +Type_handler_float::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_float(thd); +} + +Item_cache * +Type_handler_decimal_result::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_decimal(thd); +} + +Item_cache * +Type_handler_string_result::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_str(thd, item); +} + +Item_cache * +Type_handler_timestamp_common::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_timestamp(thd); +} + +Item_cache * +Type_handler_datetime_common::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_datetime(thd); +} + +Item_cache * +Type_handler_time_common::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_time(thd); +} + +Item_cache * +Type_handler_date_common::Item_get_cache(THD *thd, const Item *item) const +{ + return new (thd->mem_root) Item_cache_date(thd); +} + + +/*************************************************************************/ + +Item_copy * +Type_handler::create_item_copy(THD *thd, Item *item) const +{ + return new (thd->mem_root) Item_copy_string(thd, item); +} + + +Item_copy * +Type_handler_timestamp_common::create_item_copy(THD *thd, Item *item) const +{ + return new (thd->mem_root) Item_copy_timestamp(thd, item); +} + +/*************************************************************************/ + +/* + This method handles YEAR and BIT data types. + It does not switch the data type to DECIAMAL on a + unsigned_flag mistmatch. This important for combinations + like YEAR+NULL, BIT+NULL. +*/ +bool Type_handler_int_result:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + func->aggregate_attributes_int(items, nitems); + return false; +} + + +/* + This method handles general purpose integer data types + TINYINT, SHORTINT, MEDIUNINT, BIGINT. + It switches to DECIMAL in case if a mismatch in unsigned_flag found. + + Note, we should fix this to ignore all items with + type_handler()==&type_handler_null. + It's too late for 10.4. Let's do it eventually in a higher version. +*/ +bool Type_handler_general_purpose_int:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + bool unsigned_flag= items[0]->unsigned_flag; + for (uint i= 1; i < nitems; i++) + { + if (unsigned_flag != items[i]->unsigned_flag) + { + // Convert a mixture of signed and unsigned int to decimal + handler->set_handler(&type_handler_newdecimal); + func->aggregate_attributes_decimal(items, nitems, false); + return false; + } + } + func->aggregate_attributes_int(items, nitems); + handler->set_handler(func->unsigned_flag ? + handler->type_handler()->type_handler_unsigned() : + handler->type_handler()->type_handler_signed()); + return false; +} + + +bool Type_handler_real_result:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + func->aggregate_attributes_real(items, nitems); + return false; +} + + +bool Type_handler_decimal_result:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + uint unsigned_count= func->count_unsigned(items, nitems); + func->aggregate_attributes_decimal(items, nitems, unsigned_count == nitems); + return false; +} + + +bool Type_handler_string_result:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &func_name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + return func->aggregate_attributes_string(func_name, items, nitems); +} + + + +/* + We can have enum/set type after merging only if we have one enum|set + field (or MIN|MAX(enum|set field)) and number of NULL fields +*/ +bool Type_handler_typelib:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &func_name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + const TYPELIB *typelib= NULL; + for (uint i= 0; i < nitems; i++) + { + const TYPELIB *typelib2; + if ((typelib2= items[i]->get_typelib())) + { + if (typelib) + { + /* + Two ENUM/SET columns found. We convert such combinations to VARCHAR. + This may change in the future to preserve ENUM/SET + if typelib definitions are equal. + */ + handler->set_handler(&type_handler_varchar); + return func->aggregate_attributes_string(func_name, items, nitems); + } + typelib= typelib2; + } + } + DBUG_ASSERT(typelib); // There must be at least one typelib + func->set_typelib(typelib); + return func->aggregate_attributes_string(func_name, items, nitems); +} + + +bool Type_handler_blob_common:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &func_name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + if (func->aggregate_attributes_string(func_name, items, nitems)) + return true; + handler->set_handler(blob_type_handler(func->max_length)); + return false; +} + + +bool Type_handler_date_common:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + func->fix_attributes_date(); + return false; +} + + +bool Type_handler_time_common:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + func->aggregate_attributes_temporal(MIN_TIME_WIDTH, items, nitems); + return false; +} + + +bool Type_handler_datetime_common:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + func->aggregate_attributes_temporal(MAX_DATETIME_WIDTH, items, nitems); + return false; +} + + +bool Type_handler_timestamp_common:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + func->aggregate_attributes_temporal(MAX_DATETIME_WIDTH, items, nitems); + return false; +} + +/*************************************************************************/ + +bool Type_handler:: + Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) const +{ + /* + Aggregating attributes for LEAST/GREATES is exactly the same + with aggregating for CASE-alike functions (e.g. COALESCE) + for the majority of data type handlers. + */ + return Item_hybrid_func_fix_attributes(thd, func->func_name_cstring(), + func, func, items, nitems); +} + + +bool Type_handler_temporal_result:: + Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) const +{ + DBUG_ASSERT(func->field_type() != MYSQL_TYPE_DATE); + bool rc= Type_handler::Item_func_min_max_fix_attributes(thd, func, + items, nitems); + bool is_time= func->field_type() == MYSQL_TYPE_TIME; + func->decimals= 0; + for (uint i= 0; i < nitems; i++) + { + uint deci= is_time ? items[i]->time_precision(thd) : + items[i]->datetime_precision(thd); + set_if_bigger(func->decimals, deci); + } + + if (rc || func->maybe_null()) + return rc; + /* + LEAST/GREATES(non-temporal, temporal) can return NULL. + CAST functions Item_{time|datetime|date}_typecast always set maybe_full + to true. Here we try to detect nullability more thoroughly. + Perhaps CAST functions should also reuse this idea eventually. + */ + const Type_handler *hf= func->type_handler(); + for (uint i= 0; i < nitems; i++) + { + /* + If items[i] does not need conversion to the current temporal data + type, then we trust items[i]->maybe_null, which was already ORred + to func->maybe_null in the argument loop in fix_fields(). + If items[i] requires conversion to the current temporal data type, + then conversion can fail and return NULL even for NOT NULL items. + */ + const Type_handler *ha= items[i]->type_handler(); + if (hf == ha) + continue; // No conversion. + if (ha->cmp_type() != TIME_RESULT) + { + // Conversion from non-temporal is not safe + func->set_maybe_null(); + break; + } + timestamp_type tf= hf->mysql_timestamp_type(); + timestamp_type ta= ha->mysql_timestamp_type(); + if (tf == ta || + (tf == MYSQL_TIMESTAMP_DATETIME && ta == MYSQL_TIMESTAMP_DATE)) + { + /* + If handlers have the same mysql_timestamp_type(), + then conversion is NULL safe. Conversion from DATE to DATETIME + is also safe. This branch includes data type pairs: + Function return type Argument type Comment + -------------------- ------------- ------------- + TIMESTAMP TIMESTAMP no conversion + TIMESTAMP DATETIME not possible + TIMESTAMP DATE not possible + DATETIME DATETIME no conversion + DATETIME TIMESTAMP safe conversion + DATETIME DATE safe conversion + TIME TIME no conversion + + Note, a function cannot return TIMESTAMP if it has non-TIMESTAMP + arguments (it would return DATETIME in such case). + */ + DBUG_ASSERT(hf->field_type() != MYSQL_TYPE_TIMESTAMP || tf == ta); + continue; + } + /* + Here we have the following data type pairs that did not match + the condition above: + + Function return type Argument type Comment + -------------------- ------------- ------- + TIMESTAMP TIME Not possible + DATETIME TIME depends on OLD_MODE_ZERO_DATE_TIME_CAST + TIME TIMESTAMP Not possible + TIME DATETIME Not possible + TIME DATE Not possible + + Most pairs are not possible, because the function data type + would be DATETIME (according to LEAST/GREATEST aggregation rules). + Conversion to DATETIME from TIME is not safe when + OLD_MODE_ZERO_DATE_TIME_CAST is set: + - negative TIME values cannot be converted to not-NULL DATETIME values + - TIME values can produce DATETIME values that do not pass + NO_ZERO_DATE and NO_ZERO_IN_DATE tests. + */ + DBUG_ASSERT(hf->field_type() == MYSQL_TYPE_DATETIME); + if (!(thd->variables.old_behavior & OLD_MODE_ZERO_DATE_TIME_CAST)) + continue; + func->set_maybe_null(); + break; + } + return rc; +} + + +bool Type_handler_date_common:: + Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) const +{ + func->fix_attributes_date(); + if (func->maybe_null()) + return false; + /* + We cannot trust the generic maybe_null value calculated during + fix_fields(). + If a conversion from non-temoral types to DATE happens, + then the result can be NULL (even if all arguments are not NULL). + */ + for (uint i= 0; i < nitems; i++) + { + if (items[i]->type_handler()->cmp_type() != TIME_RESULT) + { + func->set_maybe_null(); + break; + } + } + return false; +} + + +bool Type_handler_real_result:: + Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) const +{ + /* + DOUBLE is an exception and aggregates attributes differently + for LEAST/GREATEST vs CASE-alike functions. See the comment in + Item_func_min_max::aggregate_attributes_real(). + */ + func->aggregate_attributes_real(items, nitems); + return false; +} + +/*************************************************************************/ + +bool Type_handler_int_result:: + Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const +{ + /* + "this" is equal func->args[0]->type_handler() here, e.g. for MIN()/MAX(). + func->unsigned_flag is not reliably set yet. + It will be set by the call below (copied from args[0]). + */ + const Type_handler *h= is_unsigned() + ? (Type_handler *)&type_handler_ulonglong + : (Type_handler *)&type_handler_slonglong; + return func->fix_length_and_dec_numeric(h); +} + + +bool Type_handler_bool:: + Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const +{ + return func->fix_length_and_dec_numeric(&type_handler_bool); +} + + +bool Type_handler_real_result:: + Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const +{ + (void) func->fix_length_and_dec_numeric(&type_handler_double); + func->max_length= func->float_length(func->decimals); + return false; +} + + +bool Type_handler_decimal_result:: + Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const +{ + return func->fix_length_and_dec_numeric(&type_handler_newdecimal); +} + + +bool Type_handler_string_result:: + Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const +{ + return func->fix_length_and_dec_string(); +} + + +bool Type_handler_temporal_result:: + Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const +{ + return func->fix_length_and_dec_generic(); +} + + +/*************************************************************************/ + +bool Type_handler_int_result:: + Item_sum_sum_fix_length_and_dec(Item_sum_sum *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_sum_sum_fix_length_and_dec(Item_sum_sum *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_sum_sum_fix_length_and_dec(Item_sum_sum *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_real_result:: + Item_sum_sum_fix_length_and_dec(Item_sum_sum *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_string_result:: + Item_sum_sum_fix_length_and_dec(Item_sum_sum *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + + + +/*************************************************************************/ + +bool Type_handler_int_result:: + Item_sum_avg_fix_length_and_dec(Item_sum_avg *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_sum_avg_fix_length_and_dec(Item_sum_avg *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_sum_avg_fix_length_and_dec(Item_sum_avg *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_real_result:: + Item_sum_avg_fix_length_and_dec(Item_sum_avg *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_string_result:: + Item_sum_avg_fix_length_and_dec(Item_sum_avg *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + + + +/*************************************************************************/ + +bool Type_handler_int_result:: + Item_sum_variance_fix_length_and_dec(Item_sum_variance *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_sum_variance_fix_length_and_dec(Item_sum_variance *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_sum_variance_fix_length_and_dec(Item_sum_variance *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_real_result:: + Item_sum_variance_fix_length_and_dec(Item_sum_variance *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_string_result:: + Item_sum_variance_fix_length_and_dec(Item_sum_variance *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +/*************************************************************************/ + +bool Type_handler_real_result::Item_val_bool(Item *item) const +{ + return item->val_real() != 0.0; +} + +bool Type_handler_int_result::Item_val_bool(Item *item) const +{ + return item->val_int() != 0; +} + +bool Type_handler_temporal_result::Item_val_bool(Item *item) const +{ + return item->val_real() != 0.0; +} + +bool Type_handler_string_result::Item_val_bool(Item *item) const +{ + return item->val_real() != 0.0; +} + + +/*************************************************************************/ + + +bool Type_handler::Item_get_date_with_warn(THD *thd, Item *item, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + const TABLE_SHARE *s= item->field_table_or_null(); + Temporal::Warn_push warn(thd, s ? s->db.str : nullptr, + s ? s->table_name.str : nullptr, + item->field_name_or_null(), ltime, fuzzydate); + Item_get_date(thd, item, &warn, ltime, fuzzydate); + return ltime->time_type < 0; +} + + +bool Type_handler::Item_func_hybrid_field_type_get_date_with_warn(THD *thd, + Item_func_hybrid_field_type *item, + MYSQL_TIME *ltime, + date_mode_t mode) const +{ + const TABLE_SHARE *s= item->field_table_or_null(); + Temporal::Warn_push warn(thd, s ? s->db.str : nullptr, + s ? s->table_name.str : nullptr, + item->field_name_or_null(), ltime, mode); + Item_func_hybrid_field_type_get_date(thd, item, &warn, ltime, mode); + return ltime->time_type < 0; +} + + +/************************************************************************/ +void Type_handler_decimal_result::Item_get_date(THD *thd, Item *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + new(ltime) Temporal_hybrid(thd, warn, VDec(item).ptr(), fuzzydate); +} + + +void Type_handler_int_result::Item_get_date(THD *thd, Item *item, + Temporal::Warn *warn, + MYSQL_TIME *to, + date_mode_t mode) const +{ + new(to) Temporal_hybrid(thd, warn, item->to_longlong_hybrid_null(), mode); +} + + +void Type_handler_year::Item_get_date(THD *thd, Item *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + VYear year(item); + DBUG_ASSERT(!year.truncated()); + Longlong_hybrid_null nr(Longlong_null(year.to_YYYYMMDD(), year.is_null()), + item->unsigned_flag); + new(ltime) Temporal_hybrid(thd, warn, nr, fuzzydate); +} + + +void Type_handler_real_result::Item_get_date(THD *thd, Item *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + new(ltime) Temporal_hybrid(thd, warn, item->to_double_null(), fuzzydate); +} + + +void Type_handler_string_result::Item_get_date(THD *thd, Item *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t mode) const +{ + StringBuffer<40> tmp; + new(ltime) Temporal_hybrid(thd, warn, item->val_str(&tmp), mode); +} + + +void Type_handler_temporal_result::Item_get_date(THD *thd, Item *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + DBUG_ASSERT(0); // Temporal type items must implement native get_date() + item->null_value= true; + set_zero_time(ltime, MYSQL_TIMESTAMP_NONE); +} + + +/*************************************************************************/ + +longlong Type_handler_real_result:: + Item_val_int_signed_typecast(Item *item) const +{ + return item->val_int_signed_typecast_from_real(); +} + +longlong Type_handler_int_result:: + Item_val_int_signed_typecast(Item *item) const +{ + return item->val_int(); +} + +longlong Type_handler_decimal_result:: + Item_val_int_signed_typecast(Item *item) const +{ + return VDec(item).to_longlong(false); +} + +longlong Type_handler_temporal_result:: + Item_val_int_signed_typecast(Item *item) const +{ + return item->val_int(); +} + +longlong Type_handler_string_result:: + Item_val_int_signed_typecast(Item *item) const +{ + return item->val_int_signed_typecast_from_str(); +} + +/*************************************************************************/ + +longlong Type_handler_real_result:: + Item_val_int_unsigned_typecast(Item *item) const +{ + return item->val_int_unsigned_typecast_from_real(); +} + +longlong Type_handler_int_result:: + Item_val_int_unsigned_typecast(Item *item) const +{ + return item->val_int_unsigned_typecast_from_int(); +} + +longlong Type_handler_temporal_result:: + Item_val_int_unsigned_typecast(Item *item) const +{ + return item->val_int_unsigned_typecast_from_int(); +} + +longlong Type_handler_time_common:: + Item_val_int_unsigned_typecast(Item *item) const +{ + /* + TODO: this should eventually be fixed to do rounding + when TIME_ROUND_FRACTIONAL is enabled, together with + Field_{tiny|short|long|longlong}::store_time_dec(). + See MDEV-19502. + */ + THD *thd= current_thd; + Time tm(thd, item); + DBUG_ASSERT(!tm.is_valid_time() == item->null_value); + if (!tm.is_valid_time()) + return 0; + longlong res= tm.to_longlong(); + if (res < 0) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_DATA_OVERFLOW, ER_THD(thd, ER_DATA_OVERFLOW), + ErrConvTime(tm.get_mysql_time()).ptr(), + "UNSIGNED BIGINT"); + return 0; + } + return res; +} + +longlong Type_handler_string_result:: + Item_val_int_unsigned_typecast(Item *item) const +{ + return item->val_int_unsigned_typecast_from_str(); +} + +/*************************************************************************/ + +String * +Type_handler_real_result::Item_func_hex_val_str_ascii(Item_func_hex *item, + String *str) const +{ + return item->val_str_ascii_from_val_real(str); +} + + +String * +Type_handler_decimal_result::Item_func_hex_val_str_ascii(Item_func_hex *item, + String *str) const +{ + return item->val_str_ascii_from_val_real(str); +} + + +String * +Type_handler_int_result::Item_func_hex_val_str_ascii(Item_func_hex *item, + String *str) const +{ + return item->val_str_ascii_from_val_int(str); +} + + +String * +Type_handler_temporal_result::Item_func_hex_val_str_ascii(Item_func_hex *item, + String *str) const +{ + return item->val_str_ascii_from_val_str(str); +} + + +String * +Type_handler_string_result::Item_func_hex_val_str_ascii(Item_func_hex *item, + String *str) const +{ + return item->val_str_ascii_from_val_str(str); +} + +/***************************************************************************/ + +String * +Type_handler_decimal_result::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + return VDec_op(item).to_string_round(str, item->decimals); +} + + +double +Type_handler_decimal_result::Item_func_hybrid_field_type_val_real( + Item_func_hybrid_field_type *item) + const +{ + return VDec_op(item).to_double(); +} + + +longlong +Type_handler_decimal_result::Item_func_hybrid_field_type_val_int( + Item_func_hybrid_field_type *item) + const +{ + return VDec_op(item).to_longlong(item->unsigned_flag); +} + + +my_decimal * +Type_handler_decimal_result::Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *item, + my_decimal *dec) const +{ + return VDec_op(item).to_decimal(dec); +} + + +void +Type_handler_decimal_result::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + new (ltime) Temporal_hybrid(thd, warn, VDec_op(item).ptr(), fuzzydate); +} + + +void +Type_handler_year::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + VYear_op year(item); + DBUG_ASSERT(!year.truncated()); + Longlong_hybrid_null nr(Longlong_null(year.to_YYYYMMDD(), year.is_null()), + item->unsigned_flag); + new(ltime) Temporal_hybrid(thd, warn, nr, fuzzydate); +} + + +/***************************************************************************/ + + +String * +Type_handler_int_result::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + return item->val_str_from_int_op(str); +} + + +double +Type_handler_int_result::Item_func_hybrid_field_type_val_real( + Item_func_hybrid_field_type *item) + const +{ + return item->val_real_from_int_op(); +} + + +longlong +Type_handler_int_result::Item_func_hybrid_field_type_val_int( + Item_func_hybrid_field_type *item) + const +{ + return item->val_int_from_int_op(); +} + + +my_decimal * +Type_handler_int_result::Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *item, + my_decimal *dec) const +{ + return item->val_decimal_from_int_op(dec); +} + + +void +Type_handler_int_result::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *to, + date_mode_t mode) const +{ + new(to) Temporal_hybrid(thd, warn, item->to_longlong_hybrid_null_op(), mode); +} + + +/***************************************************************************/ + +String * +Type_handler_double::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + return item->val_str_from_real_op(str); +} + +String * +Type_handler_float::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + Float nr(item->real_op()); + if (item->null_value) + return 0; + nr.to_string(str, item->decimals); + return str; +} + +double +Type_handler_real_result::Item_func_hybrid_field_type_val_real( + Item_func_hybrid_field_type *item) + const +{ + return item->val_real_from_real_op(); +} + + +longlong +Type_handler_real_result::Item_func_hybrid_field_type_val_int( + Item_func_hybrid_field_type *item) + const +{ + return item->val_int_from_real_op(); +} + + +my_decimal * +Type_handler_real_result::Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *item, + my_decimal *dec) const +{ + return item->val_decimal_from_real_op(dec); +} + + +void +Type_handler_real_result::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *to, + date_mode_t mode) const +{ + new(to) Temporal_hybrid(thd, warn, item->to_double_null_op(), mode); +} + + +/***************************************************************************/ + +String * +Type_handler_temporal_result::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + return item->val_str_from_date_op(str); +} + + +double +Type_handler_temporal_result::Item_func_hybrid_field_type_val_real( + Item_func_hybrid_field_type *item) + const +{ + return item->val_real_from_date_op(); +} + + +longlong +Type_handler_temporal_result::Item_func_hybrid_field_type_val_int( + Item_func_hybrid_field_type *item) + const +{ + return item->val_int_from_date_op(); +} + + +my_decimal * +Type_handler_temporal_result::Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *item, + my_decimal *dec) const +{ + return item->val_decimal_from_date_op(dec); +} + + +void +Type_handler_temporal_result::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + if (item->date_op(thd, ltime, fuzzydate)) + set_zero_time(ltime, MYSQL_TIMESTAMP_NONE); +} + + +/***************************************************************************/ + +String * +Type_handler_time_common::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + return item->val_str_from_time_op(str); +} + + +double +Type_handler_time_common::Item_func_hybrid_field_type_val_real( + Item_func_hybrid_field_type *item) + const +{ + return item->val_real_from_time_op(); +} + + +longlong +Type_handler_time_common::Item_func_hybrid_field_type_val_int( + Item_func_hybrid_field_type *item) + const +{ + return item->val_int_from_time_op(); +} + + +my_decimal * +Type_handler_time_common::Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *item, + my_decimal *dec) const +{ + return item->val_decimal_from_time_op(dec); +} + + +void +Type_handler_time_common::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const +{ + if (item->time_op(thd, ltime)) + set_zero_time(ltime, MYSQL_TIMESTAMP_NONE); +} + + +/***************************************************************************/ + +String * +Type_handler_string_result::Item_func_hybrid_field_type_val_str( + Item_func_hybrid_field_type *item, + String *str) const +{ + return item->val_str_from_str_op(str); +} + + +double +Type_handler_string_result::Item_func_hybrid_field_type_val_real( + Item_func_hybrid_field_type *item) + const +{ + return item->val_real_from_str_op(); +} + + +longlong +Type_handler_string_result::Item_func_hybrid_field_type_val_int( + Item_func_hybrid_field_type *item) + const +{ + return item->val_int_from_str_op(); +} + + +my_decimal * +Type_handler_string_result::Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *item, + my_decimal *dec) const +{ + return item->val_decimal_from_str_op(dec); +} + + +void +Type_handler_string_result::Item_func_hybrid_field_type_get_date( + THD *thd, + Item_func_hybrid_field_type *item, + Temporal::Warn *warn, + MYSQL_TIME *ltime, + date_mode_t mode) const +{ + StringBuffer<40> tmp; + String *res= item->str_op(&tmp); + DBUG_ASSERT((res == NULL) == item->null_value); + new(ltime) Temporal_hybrid(thd, warn, res, mode); +} + +/***************************************************************************/ + +bool Type_handler_numeric:: + Item_func_between_fix_length_and_dec(Item_func_between *func) const +{ + return func->fix_length_and_dec_numeric(current_thd); +} + +bool Type_handler_temporal_result:: + Item_func_between_fix_length_and_dec(Item_func_between *func) const +{ + return func->fix_length_and_dec_temporal(current_thd); +} + +bool Type_handler_string_result:: + Item_func_between_fix_length_and_dec(Item_func_between *func) const +{ + return func->fix_length_and_dec_string(current_thd); +} + + +longlong Type_handler_row:: + Item_func_between_val_int(Item_func_between *func) const +{ + DBUG_ASSERT(0); + func->null_value= true; + return 0; +} + +longlong Type_handler_string_result:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_string(); +} + +longlong Type_handler_temporal_with_date:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_datetime(); +} + +longlong Type_handler_time_common:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_time(); +} + +longlong Type_handler_timestamp_common:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_native(); +} + +longlong Type_handler_int_result:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_int(); +} + +longlong Type_handler_real_result:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_real(); +} + +longlong Type_handler_decimal_result:: + Item_func_between_val_int(Item_func_between *func) const +{ + return func->val_int_cmp_decimal(); +} + +/***************************************************************************/ + +cmp_item *Type_handler_int_result::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_int; +} + +cmp_item *Type_handler_real_result::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_real; +} + +cmp_item *Type_handler_decimal_result::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_decimal; +} + + +cmp_item *Type_handler_string_result::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_sort_string(cs); +} + +cmp_item *Type_handler_row::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_row; +} + +cmp_item *Type_handler_time_common::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_time; +} + +cmp_item *Type_handler_temporal_with_date::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_datetime; +} + +cmp_item *Type_handler_timestamp_common::make_cmp_item(THD *thd, + CHARSET_INFO *cs) const +{ + return new (thd->mem_root) cmp_item_timestamp; +} + +/***************************************************************************/ + +static int srtcmp_in(const void *cs_, const void *x_, const void *y_) +{ + const CHARSET_INFO *cs= static_cast(cs_); + const String *x= static_cast(x_); + const String *y= static_cast(y_); + return cs->strnncollsp(x->ptr(), x->length(), y->ptr(), y->length()); +} + +in_vector *Type_handler_string_result::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_string(thd, nargs, (qsort2_cmp) srtcmp_in, + func->compare_collation()); + +} + + +in_vector *Type_handler_int_result::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_longlong(thd, nargs); +} + + +in_vector *Type_handler_real_result::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_double(thd, nargs); +} + + +in_vector *Type_handler_decimal_result::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_decimal(thd, nargs); +} + + +in_vector *Type_handler_time_common::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_time(thd, nargs); +} + + +in_vector * +Type_handler_temporal_with_date::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_datetime(thd, nargs); +} + + +in_vector * +Type_handler_timestamp_common::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_timestamp(thd, nargs); +} + + +in_vector *Type_handler_row::make_in_vector(THD *thd, + const Item_func_in *func, + uint nargs) const +{ + return new (thd->mem_root) in_row(thd, nargs, 0); +} + +/***************************************************************************/ + +bool Type_handler_string_result:: + Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) const +{ + if (func->agg_all_arg_charsets_for_comparison()) + return true; + if (func->compatible_types_scalar_bisection_possible()) + { + return func->value_list_convert_const_to_int(thd) || + func->fix_for_scalar_comparison_using_bisection(thd); + } + return + func->fix_for_scalar_comparison_using_cmp_items(thd, + 1U << (uint) STRING_RESULT); +} + + +bool Type_handler_int_result:: + Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) const +{ + /* + Does not need to call value_list_convert_const_to_int() + as already handled by int handler. + */ + return func->compatible_types_scalar_bisection_possible() ? + func->fix_for_scalar_comparison_using_bisection(thd) : + func->fix_for_scalar_comparison_using_cmp_items(thd, + 1U << (uint) INT_RESULT); +} + + +bool Type_handler_real_result:: + Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) const +{ + return func->compatible_types_scalar_bisection_possible() ? + (func->value_list_convert_const_to_int(thd) || + func->fix_for_scalar_comparison_using_bisection(thd)) : + func->fix_for_scalar_comparison_using_cmp_items(thd, + 1U << (uint) REAL_RESULT); +} + + +bool Type_handler_decimal_result:: + Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) const +{ + return func->compatible_types_scalar_bisection_possible() ? + (func->value_list_convert_const_to_int(thd) || + func->fix_for_scalar_comparison_using_bisection(thd)) : + func->fix_for_scalar_comparison_using_cmp_items(thd, + 1U << (uint) DECIMAL_RESULT); +} + + +bool Type_handler_temporal_result:: + Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) const +{ + return func->compatible_types_scalar_bisection_possible() ? + (func->value_list_convert_const_to_int(thd) || + func->fix_for_scalar_comparison_using_bisection(thd)) : + func->fix_for_scalar_comparison_using_cmp_items(thd, + 1U << (uint) TIME_RESULT); +} + + +bool Type_handler_row::Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) const +{ + return func->compatible_types_row_bisection_possible() ? + func->fix_for_row_comparison_using_bisection(thd) : + func->fix_for_row_comparison_using_cmp_items(thd); +} + +/***************************************************************************/ + +String *Type_handler_string_result:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return func->val_str_native(str); +} + + +String *Type_handler_time_common:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return Time(func).to_string(str, func->decimals); +} + + +String *Type_handler_date_common:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return Date(func).to_string(str); +} + + +String *Type_handler_datetime_common:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return Datetime(func).to_string(str, func->decimals); +} + + +String *Type_handler_timestamp_common:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + THD *thd= current_thd; + return Timestamp_or_zero_datetime_native_null(thd, func). + to_datetime(thd).to_string(str, func->decimals); +} + + +String *Type_handler_int_result:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return func->val_string_from_int(str); +} + + +String *Type_handler_decimal_result:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return VDec(func).to_string_round(str, func->decimals); +} + + +String *Type_handler_double:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + return func->val_string_from_real(str); +} + + +String *Type_handler_float:: + Item_func_min_max_val_str(Item_func_min_max *func, String *str) const +{ + Float nr(func->val_real()); + if (func->null_value) + return 0; + nr.to_string(str, func->decimals); + return str; +} + + +double Type_handler_string_result:: + Item_func_min_max_val_real(Item_func_min_max *func) const +{ + return func->val_real_native(); +} + + +double Type_handler_time_common:: + Item_func_min_max_val_real(Item_func_min_max *func) const +{ + return Time(current_thd, func).to_double(); +} + + +double Type_handler_date_common:: + Item_func_min_max_val_real(Item_func_min_max *func) const +{ + return Date(current_thd, func).to_double(); +} + + +double Type_handler_datetime_common:: + Item_func_min_max_val_real(Item_func_min_max *func) const +{ + return Datetime(current_thd, func).to_double(); +} + + +double Type_handler_timestamp_common:: + Item_func_min_max_val_real(Item_func_min_max *func) const +{ + THD *thd= current_thd; + return Timestamp_or_zero_datetime_native_null(thd, func). + to_datetime(thd).to_double(); +} + + +double Type_handler_numeric:: + Item_func_min_max_val_real(Item_func_min_max *func) const +{ + return func->val_real_native(); +} + + +longlong Type_handler_string_result:: + Item_func_min_max_val_int(Item_func_min_max *func) const +{ + return func->val_int_native(); +} + + +longlong Type_handler_time_common:: + Item_func_min_max_val_int(Item_func_min_max *func) const +{ + return Time(current_thd, func).to_longlong(); +} + + +longlong Type_handler_date_common:: + Item_func_min_max_val_int(Item_func_min_max *func) const +{ + return Date(current_thd, func).to_longlong(); +} + + +longlong Type_handler_datetime_common:: + Item_func_min_max_val_int(Item_func_min_max *func) const +{ + return Datetime(current_thd, func).to_longlong(); +} + + +longlong Type_handler_timestamp_common:: + Item_func_min_max_val_int(Item_func_min_max *func) const +{ + THD *thd= current_thd; + return Timestamp_or_zero_datetime_native_null(thd, func). + to_datetime(thd).to_longlong(); +} + + +longlong Type_handler_numeric:: + Item_func_min_max_val_int(Item_func_min_max *func) const +{ + return func->val_int_native(); +} + + +my_decimal *Type_handler_string_result:: + Item_func_min_max_val_decimal(Item_func_min_max *func, + my_decimal *dec) const +{ + return func->val_decimal_native(dec); +} + + +my_decimal *Type_handler_numeric:: + Item_func_min_max_val_decimal(Item_func_min_max *func, + my_decimal *dec) const +{ + return func->val_decimal_native(dec); +} + + +my_decimal *Type_handler_time_common:: + Item_func_min_max_val_decimal(Item_func_min_max *func, + my_decimal *dec) const +{ + return Time(current_thd, func).to_decimal(dec); +} + + +my_decimal *Type_handler_date_common:: + Item_func_min_max_val_decimal(Item_func_min_max *func, + my_decimal *dec) const +{ + return Date(current_thd, func).to_decimal(dec); +} + + +my_decimal *Type_handler_datetime_common:: + Item_func_min_max_val_decimal(Item_func_min_max *func, + my_decimal *dec) const +{ + return Datetime(current_thd, func).to_decimal(dec); +} + + +my_decimal *Type_handler_timestamp_common:: + Item_func_min_max_val_decimal(Item_func_min_max *func, + my_decimal *dec) const +{ + THD *thd= current_thd; + return Timestamp_or_zero_datetime_native_null(thd, func). + to_datetime(thd).to_decimal(dec); +} + + +bool Type_handler_string_result:: + Item_func_min_max_get_date(THD *thd, Item_func_min_max *func, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const +{ + /* + just like ::val_int() method of a string item can be called, + for example, SELECT CONCAT("10", "12") + 1, + ::get_date() can be called for non-temporal values, + for example, SELECT MONTH(GREATEST("2011-11-21", "2010-10-09")) + */ + return func->get_date_from_string(thd, ltime, fuzzydate); +} + + +bool Type_handler_numeric:: + Item_func_min_max_get_date(THD *thd, Item_func_min_max *func, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const +{ + return Item_get_date_with_warn(thd, func, ltime, fuzzydate); +} + + +bool Type_handler_temporal_result:: + Item_func_min_max_get_date(THD *thd, Item_func_min_max *func, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const +{ + /* + - If the caller specified TIME_TIME_ONLY, then it's going to convert + a DATETIME or DATE to TIME. So we pass the default flags for date. This is + exactly the same with what Item_func_min_max_val_{int|real|decimal|str} or + Item_send_datetime() do. We return the value in accordance with the + current session date flags and let the caller further convert it to TIME. + - If the caller did not specify TIME_TIME_ONLY, then return the value + according to the flags supplied by the caller. + */ + return func->get_date_native(thd, ltime, + fuzzydate & TIME_TIME_ONLY ? + Datetime::Options(thd) : + fuzzydate); +} + +bool Type_handler_time_common:: + Item_func_min_max_get_date(THD *thd, Item_func_min_max *func, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const +{ + return func->get_time_native(thd, ltime); +} + + +bool Type_handler_timestamp_common:: + Item_func_min_max_get_date(THD *thd, Item_func_min_max *func, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const +{ + return Timestamp_or_zero_datetime_native_null(thd, func). + to_datetime(thd).copy_to_mysql_time(ltime); +} + +/***************************************************************************/ + +/** + Get a string representation of the Item value. + See sql_type.h for details. +*/ +String *Type_handler_row:: + print_item_value(THD *thd, Item *item, String *str) const +{ + CHARSET_INFO *cs= thd->variables.character_set_client; + StringBuffer val(cs); + str->append(STRING_WITH_LEN("ROW(")); + for (uint i= 0 ; i < item->cols(); i++) + { + if (i > 0) + str->append(','); + Item *elem= item->element_index(i); + String *tmp= elem->type_handler()->print_item_value(thd, elem, &val); + if (tmp) + str->append(*tmp); + else + str->append(NULL_clex_str); + } + str->append(')'); + return str; +} + + +/** + Get a string representation of the Item value, + using the character string format with its charset and collation, e.g. + latin1 'string' COLLATE latin1_german2_ci +*/ +String *Type_handler:: + print_item_value_csstr(THD *thd, Item *item, String *str) const +{ + String *result= item->val_str(str); + + if (!result) + return NULL; + + StringBuffer buf(result->charset()); + CHARSET_INFO *cs= thd->variables.character_set_client; + + buf.append('_'); + buf.append(result->charset()->cs_name); + if (cs->escape_with_backslash_is_dangerous) + buf.append(' '); + append_query_string(cs, &buf, result->ptr(), result->length(), + thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES); + buf.append(STRING_WITH_LEN(" COLLATE '")); + buf.append(item->collation.collation->coll_name); + buf.append('\''); + str->copy(buf); + + return str; +} + + +String *Type_handler_numeric:: + print_item_value(THD *thd, Item *item, String *str) const +{ + return item->val_str(str); +} + + +String *Type_handler:: + print_item_value_temporal(THD *thd, Item *item, String *str, + const Name &type_name, String *buf) const +{ + String *result= item->val_str(buf); + return !result || + str->realloc(type_name.length() + result->length() + 2) || + str->copy(type_name.ptr(), type_name.length(), &my_charset_latin1) || + str->append('\'') || + str->append(result->ptr(), result->length()) || + str->append('\'') ? + NULL : + str; +} + + +String *Type_handler_time_common:: + print_item_value(THD *thd, Item *item, String *str) const +{ + StringBuffer buf; + return print_item_value_temporal(thd, item, str, + Name(STRING_WITH_LEN("TIME")), &buf); +} + + +String *Type_handler_date_common:: + print_item_value(THD *thd, Item *item, String *str) const +{ + StringBuffer buf; + return print_item_value_temporal(thd, item, str, + Name(STRING_WITH_LEN("DATE")), &buf); +} + + +String *Type_handler_datetime_common:: + print_item_value(THD *thd, Item *item, String *str) const +{ + StringBuffer buf; + return print_item_value_temporal(thd, item, str, + Name(STRING_WITH_LEN("TIMESTAMP")), &buf); +} + + +String *Type_handler_timestamp_common:: + print_item_value(THD *thd, Item *item, String *str) const +{ + StringBuffer buf; + return print_item_value_temporal(thd, item, str, + Name(STRING_WITH_LEN("TIMESTAMP")), &buf); +} + + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + DBUG_ASSERT(0); + return false; +} + + +bool Type_handler_int_result:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_int(this, item->arguments()[0], + field_type() == MYSQL_TYPE_LONGLONG); + return false; +} + + +bool Type_handler_year:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_int(&type_handler_ulong, item->arguments()[0], false); + return false; +} + + +bool Type_handler_hex_hybrid:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_hex_hybrid(); + return false; +} + + +bool Type_handler_bit:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + uint nbits= item->arguments()[0]->max_length; + item->fix_length_and_dec_ulong_or_ulonglong_by_nbits(nbits); + return false; +} + + +bool Type_handler_typelib:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_length_and_dec_long_or_longlong(5, true); + return false; +} + + +bool Type_handler_real_result:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_decimal(); + return false; +} + + +bool Type_handler_date_common:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + static const Type_std_attributes attr(Type_numeric_attributes(8, 0, true), + DTCollation_numeric()); + item->fix_arg_int(&type_handler_ulong, &attr, false); + return false; +} + + +bool Type_handler_time_common:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_time(); + return false; +} + + +bool Type_handler_datetime_common:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_datetime(); + return false; +} + + +bool Type_handler_timestamp_common:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_datetime(); + return false; +} + + +bool Type_handler_string_result:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + item->fix_arg_double(); + return false; +} + + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + DBUG_ASSERT(0); + return false; +} + + +bool Type_handler_int_result:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->Type_std_attributes::set(item->arguments()[0]); + item->set_handler(this); + return false; +} + + +bool Type_handler_year:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->Type_std_attributes::set(item->arguments()[0]); + item->set_handler(&type_handler_ulong); + return false; +} + + +bool Type_handler_bit:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + uint nbits= item->arguments()[0]->max_length; + item->fix_length_and_dec_ulong_or_ulonglong_by_nbits(nbits); + return false; +} + + +bool Type_handler_typelib:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_long_or_longlong(5, true); + return false; +} + + +bool Type_handler_hex_hybrid:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + uint nchars= item->arguments()[0]->decimal_precision(); + item->fix_length_and_dec_long_or_longlong(nchars, true); + return false; +} + + +bool Type_handler_real_result:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_int_or_decimal(); + return false; +} + + +bool Type_handler_date_common:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + static const Type_numeric_attributes attr(8, 0/*dec*/, true/*unsigned*/); + item->Type_std_attributes::set(attr, DTCollation_numeric()); + item->set_handler(&type_handler_ulong); + return false; +} + + +bool Type_handler_time_common:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_time(); + return false; +} + + +bool Type_handler_datetime_common:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_datetime(); + return false; +} + + +bool Type_handler_timestamp_common:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_datetime(); + return false; +} + + +bool Type_handler_string_result:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + DBUG_ASSERT(0); + return false; +} + + +bool Type_handler_int_result:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_string_result:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + DBUG_ASSERT(0); + return false; +} + + +bool Type_handler_int_result:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_string_result:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +/***************************************************************************/ + +bool Type_handler:: + Item_func_signed_fix_length_and_dec(Item_func_signed *item) const +{ + item->fix_length_and_dec_generic(); + return false; +} + + +bool Type_handler:: + Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const +{ + const Item *arg= item->arguments()[0]; + if (!arg->unsigned_flag && arg->val_int_min() < 0) + { + /* + Negative arguments produce long results: + CAST(1-2 AS UNSIGNED) -> 18446744073709551615 + */ + item->max_length= MAX_BIGINT_WIDTH; + return false; + } + item->fix_length_and_dec_generic(); + return false; +} + + +bool Type_handler_string_result:: + Item_func_signed_fix_length_and_dec(Item_func_signed *item) const +{ + item->fix_length_and_dec_string(); + return false; +} + + +bool Type_handler_string_result:: + Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const +{ + const Item *arg= item->arguments()[0]; + if (!arg->unsigned_flag && // Not HEX hybrid + arg->max_char_length() > 1) // Can be negative + { + // String arguments can give long results: '-1' -> 18446744073709551614 + item->max_length= MAX_BIGINT_WIDTH; + return false; + } + item->fix_length_and_dec_string(); + return false; +} + +bool Type_handler_real_result:: + Item_func_signed_fix_length_and_dec(Item_func_signed *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler:: + Item_double_typecast_fix_length_and_dec(Item_double_typecast *item) const +{ + item->fix_length_and_dec_generic(); + return false; +} + + +bool Type_handler:: + Item_float_typecast_fix_length_and_dec(Item_float_typecast *item) const +{ + item->fix_length_and_dec_generic(); + return false; +} + + +bool Type_handler:: + Item_decimal_typecast_fix_length_and_dec(Item_decimal_typecast *item) const +{ + item->fix_length_and_dec_generic(); + return false; +} + + +bool Type_handler:: + Item_char_typecast_fix_length_and_dec(Item_char_typecast *item) const +{ + item->fix_length_and_dec_generic(); + return false; +} + + +bool Type_handler_numeric:: + Item_char_typecast_fix_length_and_dec(Item_char_typecast *item) const +{ + item->fix_length_and_dec_numeric(); + return false; +} + + +bool Type_handler_string_result:: + Item_char_typecast_fix_length_and_dec(Item_char_typecast *item) const +{ + item->fix_length_and_dec_str(); + return false; +} + + +bool Type_handler:: + Item_time_typecast_fix_length_and_dec(Item_time_typecast *item) const +{ + uint dec= item->decimals == NOT_FIXED_DEC ? + item->arguments()[0]->time_precision(current_thd) : + item->decimals; + item->fix_attributes_temporal(MIN_TIME_WIDTH, dec); + item->set_maybe_null(); + return false; +} + + +bool Type_handler:: + Item_date_typecast_fix_length_and_dec(Item_date_typecast *item) const +{ + item->fix_attributes_temporal(MAX_DATE_WIDTH, 0); + item->set_maybe_null(); + return false; +} + + +bool Type_handler:: + Item_datetime_typecast_fix_length_and_dec(Item_datetime_typecast *item) + const +{ + uint dec= item->decimals == NOT_FIXED_DEC ? + item->arguments()[0]->datetime_precision(current_thd) : + item->decimals; + item->fix_attributes_temporal(MAX_DATETIME_WIDTH, dec); + item->set_maybe_null(); + return false; +} + + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_plus_fix_length_and_dec(Item_func_plus *item) const +{ + DBUG_ASSERT(0); + return true; +} + + +bool Type_handler_int_result:: + Item_func_plus_fix_length_and_dec(Item_func_plus *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_plus_fix_length_and_dec(Item_func_plus *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_plus_fix_length_and_dec(Item_func_plus *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_plus_fix_length_and_dec(Item_func_plus *item) const +{ + item->fix_length_and_dec_temporal(true); + return false; +} + + +bool Type_handler_string_result:: + Item_func_plus_fix_length_and_dec(Item_func_plus *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_minus_fix_length_and_dec(Item_func_minus *item) const +{ + DBUG_ASSERT(0); + return true; +} + + +bool Type_handler_int_result:: + Item_func_minus_fix_length_and_dec(Item_func_minus *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_minus_fix_length_and_dec(Item_func_minus *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_minus_fix_length_and_dec(Item_func_minus *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_minus_fix_length_and_dec(Item_func_minus *item) const +{ + item->fix_length_and_dec_temporal(true); + return false; +} + + +bool Type_handler_string_result:: + Item_func_minus_fix_length_and_dec(Item_func_minus *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_mul_fix_length_and_dec(Item_func_mul *item) const +{ + DBUG_ASSERT(0); + return true; +} + + +bool Type_handler_int_result:: + Item_func_mul_fix_length_and_dec(Item_func_mul *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_mul_fix_length_and_dec(Item_func_mul *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_mul_fix_length_and_dec(Item_func_mul *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_mul_fix_length_and_dec(Item_func_mul *item) const +{ + item->fix_length_and_dec_temporal(true); + return false; +} + + +bool Type_handler_string_result:: + Item_func_mul_fix_length_and_dec(Item_func_mul *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_div_fix_length_and_dec(Item_func_div *item) const +{ + DBUG_ASSERT(0); + return true; +} + + +bool Type_handler_int_result:: + Item_func_div_fix_length_and_dec(Item_func_div *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_div_fix_length_and_dec(Item_func_div *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_div_fix_length_and_dec(Item_func_div *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_div_fix_length_and_dec(Item_func_div *item) const +{ + item->fix_length_and_dec_temporal(false); + return false; +} + + +bool Type_handler_string_result:: + Item_func_div_fix_length_and_dec(Item_func_div *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + +/***************************************************************************/ + +bool Type_handler_row:: + Item_func_mod_fix_length_and_dec(Item_func_mod *item) const +{ + DBUG_ASSERT(0); + return true; +} + + +bool Type_handler_int_result:: + Item_func_mod_fix_length_and_dec(Item_func_mod *item) const +{ + item->fix_length_and_dec_int(); + return false; +} + + +bool Type_handler_real_result:: + Item_func_mod_fix_length_and_dec(Item_func_mod *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + + +bool Type_handler_decimal_result:: + Item_func_mod_fix_length_and_dec(Item_func_mod *item) const +{ + item->fix_length_and_dec_decimal(); + return false; +} + + +bool Type_handler_temporal_result:: + Item_func_mod_fix_length_and_dec(Item_func_mod *item) const +{ + item->fix_length_and_dec_temporal(true); + return false; +} + + +bool Type_handler_string_result:: + Item_func_mod_fix_length_and_dec(Item_func_mod *item) const +{ + item->fix_length_and_dec_double(); + return false; +} + +/***************************************************************************/ + +const Vers_type_handler* Type_handler_temporal_result::vers() const +{ + return &vers_type_timestamp; +} + +const Vers_type_handler* Type_handler_string_result::vers() const +{ + return &vers_type_timestamp; +} + +const Vers_type_handler* Type_handler_blob_common::vers() const + +{ + return &vers_type_timestamp; +} + +/***************************************************************************/ + +decimal_digits_t Type_handler::Item_time_precision(THD *thd, Item *item) const +{ + return MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS); +} + + +decimal_digits_t Type_handler::Item_datetime_precision(THD *thd, Item *item) const +{ + return MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS); +} + + +decimal_digits_t Type_handler_string_result:: +Item_temporal_precision(THD *thd, Item *item, bool is_time) const +{ + StringBuffer<64> buf; + String *tmp; + MYSQL_TIME_STATUS status; + DBUG_ASSERT(item->fixed()); + // Nanosecond rounding is not needed here, for performance purposes + if ((tmp= item->val_str(&buf)) && + (is_time ? + Time(thd, &status, tmp->ptr(), tmp->length(), tmp->charset(), + Time::Options(TIME_TIME_ONLY, TIME_FRAC_TRUNCATE, + Time::DATETIME_TO_TIME_YYYYMMDD_TRUNCATE)). + is_valid_time() : + Datetime(thd, &status, tmp->ptr(), tmp->length(), tmp->charset(), + Datetime::Options(TIME_FUZZY_DATES, TIME_FRAC_TRUNCATE)). + is_valid_datetime())) + return (decimal_digits_t) MY_MIN(status.precision, TIME_SECOND_PART_DIGITS); + return (decimal_digits_t) MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS); +} + +/***************************************************************************/ + +decimal_digits_t Type_handler::Item_decimal_scale(const Item *item) const +{ + return (item->decimals < NOT_FIXED_DEC ? + item->decimals : + (decimal_digits_t) MY_MIN(item->max_length, DECIMAL_MAX_SCALE)); +} + +decimal_digits_t Type_handler_temporal_result:: +Item_decimal_scale_with_seconds(const Item *item) const +{ + return (item->decimals < NOT_FIXED_DEC ? + item->decimals : + TIME_SECOND_PART_DIGITS); +} + +decimal_digits_t Type_handler::Item_divisor_precision_increment(const Item *item) const +{ + return item->decimals; +} + +decimal_digits_t Type_handler_temporal_result:: +Item_divisor_precision_increment_with_seconds(const Item *item) const +{ + return item->decimals < NOT_FIXED_DEC ? + item->decimals : + TIME_SECOND_PART_DIGITS; +} + +/***************************************************************************/ + +decimal_digits_t Type_handler_string_result::Item_decimal_precision(const Item *item) const +{ + uint res= item->max_char_length(); + /* + Return at least one decimal digit, even if Item::max_char_length() + returned 0. This is important to avoid attempts to create fields of types + INT(0) or DECIMAL(0,0) when converting NULL or empty strings to INT/DECIMAL: + CREATE TABLE t1 AS SELECT CONVERT(NULL,SIGNED) AS a; + */ + return res ? (decimal_digits_t) MY_MIN(res, DECIMAL_MAX_PRECISION) : (decimal_digits_t) 1; +} + +decimal_digits_t Type_handler_real_result::Item_decimal_precision(const Item *item) const +{ + uint res= item->max_char_length(); + return res ? (decimal_digits_t) MY_MIN(res, DECIMAL_MAX_PRECISION) : (decimal_digits_t) 1; +} + +decimal_digits_t Type_handler_decimal_result::Item_decimal_precision(const Item *item) const +{ + uint prec= my_decimal_length_to_precision(item->max_char_length(), + item->decimals, + item->unsigned_flag); + return (decimal_digits_t) MY_MIN(prec, DECIMAL_MAX_PRECISION); +} + +decimal_digits_t Type_handler_int_result::Item_decimal_precision(const Item *item) const +{ + uint prec= my_decimal_length_to_precision(item->max_char_length(), + item->decimals, + item->unsigned_flag); + return (decimal_digits_t) MY_MIN(prec, DECIMAL_MAX_PRECISION); +} + +decimal_digits_t Type_handler_time_common::Item_decimal_precision(const Item *item) const +{ + return (decimal_digits_t) (7 + MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS)); +} + +decimal_digits_t Type_handler_date_common::Item_decimal_precision(const Item *item) const +{ + return 8; +} + +decimal_digits_t Type_handler_datetime_common:: +Item_decimal_precision(const Item *item) const +{ + return (decimal_digits_t) (14 + MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS)); +} + +decimal_digits_t Type_handler_timestamp_common:: +Item_decimal_precision(const Item *item) const +{ + return (decimal_digits_t) (14 + MY_MIN(item->decimals, TIME_SECOND_PART_DIGITS)); +} + +/***************************************************************************/ + +bool Type_handler_real_result:: + subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) const +{ + DBUG_ASSERT(inner->cmp_type() == REAL_RESULT); + return outer->cmp_type() == REAL_RESULT; +} + + +bool Type_handler_int_result:: + subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) const +{ + DBUG_ASSERT(inner->cmp_type() == INT_RESULT); + return outer->cmp_type() == INT_RESULT; +} + + +bool Type_handler_decimal_result:: + subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) const +{ + DBUG_ASSERT(inner->cmp_type() == DECIMAL_RESULT); + return outer->cmp_type() == DECIMAL_RESULT; +} + + +bool Type_handler_string_result:: + subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) const +{ + DBUG_ASSERT(inner->cmp_type() == STRING_RESULT); + if (outer->cmp_type() == STRING_RESULT && + /* + Materialization also is unable to work when create_tmp_table() will + create a blob column because item->max_length is too big. + The following test is copied from varstring_type_handler(). + */ + !inner->too_big_for_varchar()) + { + if (outer->collation.collation == inner->collation.collation) + return true; + if (is_in_predicate) + { + Charset inner_col(inner->collation.collation); + if (inner_col.encoding_allows_reinterpret_as(outer-> + collation.collation) && + inner_col.eq_collation_specific_names(outer->collation.collation)) + return true; + } + } + return false; +} + + +bool Type_handler_temporal_result:: + subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) const +{ + DBUG_ASSERT(inner->cmp_type() == TIME_RESULT); + return mysql_timestamp_type() == + outer->type_handler()->mysql_timestamp_type(); +} + +/***************************************************************************/ + + +const Type_handler * +Type_handler_null::type_handler_for_tmp_table(const Item *item) const +{ + return &type_handler_string; +} + + +const Type_handler * +Type_handler_null::type_handler_for_union(const Item *item) const +{ + return &type_handler_string; +} + + +const Type_handler * +Type_handler_olddecimal::type_handler_for_tmp_table(const Item *item) const +{ + return &type_handler_newdecimal; +} + +const Type_handler * +Type_handler_olddecimal::type_handler_for_union(const Item *item) const +{ + return &type_handler_newdecimal; +} + + +/***************************************************************************/ + +bool Type_handler::check_null(const Item *item, st_value *value) const +{ + if (item->null_value) + { + value->m_type= DYN_COL_NULL; + return true; + } + return false; +} + + +bool Type_handler_null:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= DYN_COL_NULL; + return true; +} + + +bool Type_handler_row:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + DBUG_ASSERT(0); + value->m_type= DYN_COL_NULL; + return true; +} + + +bool Type_handler_int_result:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= item->unsigned_flag ? DYN_COL_UINT : DYN_COL_INT; + value->value.m_longlong= item->val_int(); + return check_null(item, value); +} + + +bool Type_handler_real_result:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= DYN_COL_DOUBLE; + value->value.m_double= item->val_real(); + return check_null(item, value); +} + + +bool Type_handler_decimal_result:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= DYN_COL_DECIMAL; + my_decimal *dec= item->val_decimal(&value->m_decimal); + if (dec != &value->m_decimal && !item->null_value) + my_decimal2decimal(dec, &value->m_decimal); + return check_null(item, value); +} + + +bool Type_handler_string_result:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= DYN_COL_STRING; + String *str= item->val_str(&value->m_string); + if (str != &value->m_string && !item->null_value) + value->m_string.set(str->ptr(), str->length(), str->charset()); + return check_null(item, value); +} + + +bool Type_handler_temporal_with_date:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= DYN_COL_DATETIME; + item->get_date(thd, &value->value.m_time, + Datetime::Options(thd, TIME_FRAC_NONE)); + return check_null(item, value); +} + + +bool Type_handler_time_common:: + Item_save_in_value(THD *thd, Item *item, st_value *value) const +{ + value->m_type= DYN_COL_DATETIME; + item->get_time(thd, &value->value.m_time); + return check_null(item, value); +} + +/***************************************************************************/ + +bool Type_handler_row:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + DBUG_ASSERT(0); + param->set_null(); + return true; +} + + +bool Type_handler_real_result:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + param->unsigned_flag= attr->unsigned_flag; + param->set_double(val->value.m_double); + return false; +} + + +bool Type_handler_int_result:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + param->unsigned_flag= attr->unsigned_flag; + param->set_int(val->value.m_longlong, attr->max_length); + return false; +} + + +bool Type_handler_decimal_result:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + param->unsigned_flag= attr->unsigned_flag; + param->set_decimal(&val->m_decimal, attr->unsigned_flag); + return false; +} + + +bool Type_handler_string_result:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + param->unsigned_flag= false; + param->setup_conversion_string(thd, attr->collation.collation); + /* + Exact value of max_length is not known unless data is converted to + charset of connection, so we have to set it later. + */ + return param->set_str(val->m_string.ptr(), val->m_string.length(), + attr->collation.collation, + attr->collation.collation); +} + + +bool Type_handler_temporal_result:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + param->unsigned_flag= attr->unsigned_flag; + param->set_time(&val->value.m_time, attr->max_length, attr->decimals); + return false; +} + + +/***************************************************************************/ + +bool Type_handler_null:: + Item_send(Item *item, Protocol *protocol, st_value *buf) const +{ + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_str(Item *item, Protocol *protocol, st_value *buf) const +{ + String *res; + if ((res= item->val_str(&buf->m_string))) + { + DBUG_ASSERT(!item->null_value); + return protocol->store(res->ptr(), res->length(), res->charset()); + } + DBUG_ASSERT(item->null_value); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_tiny(Item *item, Protocol *protocol, st_value *buf) const +{ + longlong nr= item->val_int(); + if (!item->null_value) + return protocol->store_tiny(nr); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_short(Item *item, Protocol *protocol, st_value *buf) const +{ + longlong nr= item->val_int(); + if (!item->null_value) + return protocol->store_short(nr); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_long(Item *item, Protocol *protocol, st_value *buf) const +{ + longlong nr= item->val_int(); + if (!item->null_value) + return protocol->store_long(nr); + return protocol->store_null(); +} + +bool Type_handler:: + Item_send_longlong(Item *item, Protocol *protocol, st_value *buf) const +{ + longlong nr= item->val_int(); + if (!item->null_value) + return protocol->store_longlong(nr, item->unsigned_flag); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_float(Item *item, Protocol *protocol, st_value *buf) const +{ + float nr= (float) item->val_real(); + if (!item->null_value) + return protocol->store_float(nr, item->decimals); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_double(Item *item, Protocol *protocol, st_value *buf) const +{ + double nr= item->val_real(); + if (!item->null_value) + return protocol->store_double(nr, item->decimals); + return protocol->store_null(); +} + + +bool Type_handler::Item_send_timestamp(Item *item, + Protocol *protocol, + st_value *buf) const +{ + Timestamp_or_zero_datetime_native_null native(protocol->thd, item); + if (native.is_null()) + return protocol->store_null(); + native.to_TIME(protocol->thd, &buf->value.m_time); + return protocol->store_datetime(&buf->value.m_time, item->decimals); +} + + +bool Type_handler:: + Item_send_datetime(Item *item, Protocol *protocol, st_value *buf) const +{ + item->get_date(protocol->thd, &buf->value.m_time, + Datetime::Options(protocol->thd)); + if (!item->null_value) + return protocol->store_datetime(&buf->value.m_time, item->decimals); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_date(Item *item, Protocol *protocol, st_value *buf) const +{ + item->get_date(protocol->thd, &buf->value.m_time, + Date::Options(protocol->thd)); + if (!item->null_value) + return protocol->store_date(&buf->value.m_time); + return protocol->store_null(); +} + + +bool Type_handler:: + Item_send_time(Item *item, Protocol *protocol, st_value *buf) const +{ + item->get_time(protocol->thd, &buf->value.m_time); + if (!item->null_value) + return protocol->store_time(&buf->value.m_time, item->decimals); + return protocol->store_null(); +} + +/***************************************************************************/ + +Item *Type_handler_int_result:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + longlong result= item->val_int(); + if (item->null_value) + return new (thd->mem_root) Item_null(thd, item->name.str); + return new (thd->mem_root) Item_int(thd, item->name.str, result, + item->max_length); +} + + +Item *Type_handler_real_result:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + double result= item->val_real(); + if (item->null_value) + return new (thd->mem_root) Item_null(thd, item->name.str); + return new (thd->mem_root) Item_float(thd, item->name.str, result, + item->decimals, item->max_length); +} + + +Item *Type_handler_decimal_result:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + VDec result(item); + if (result.is_null()) + return new (thd->mem_root) Item_null(thd, item->name.str); + return new (thd->mem_root) Item_decimal(thd, item->name.str, result.ptr(), + item->max_length, item->decimals); +} + + +Item *Type_handler_string_result:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + StringBuffer tmp; + String *result= item->val_str(&tmp); + if (item->null_value) + return new (thd->mem_root) Item_null(thd, item->name.str); + LEX_CSTRING value; + thd->make_lex_string(&value, result->ptr(), result->length()); + return new (thd->mem_root) Item_string(thd, item->name, value, + result->charset()); +} + + +Item *Type_handler_time_common:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + Item_cache_temporal *cache; + longlong value= item->val_time_packed(thd); + if (item->null_value) + return new (thd->mem_root) Item_null(thd, item->name.str); + cache= new (thd->mem_root) Item_cache_time(thd); + if (cache) + cache->store_packed(value, item); + return cache; +} + + +Item *Type_handler_temporal_with_date:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + Item_cache_temporal *cache; + longlong value= item->val_datetime_packed(thd); + if (item->null_value) + return new (thd->mem_root) Item_null(thd, item->name.str); + cache= new (thd->mem_root) Item_cache_datetime(thd); + if (cache) + cache->store_packed(value, item); + return cache; +} + + +Item *Type_handler_row:: + make_const_item_for_comparison(THD *thd, Item *item, const Item *cmp) const +{ + if (item->type() == Item::ROW_ITEM && cmp->type() == Item::ROW_ITEM) + { + /* + Substitute constants only in Item_row's. Don't affect other Items + with ROW_RESULT (eg Item_singlerow_subselect). + + For such Items more optimal is to detect if it is constant and replace + it with Item_row. This would optimize queries like this: + SELECT * FROM t1 WHERE (a,b) = (SELECT a,b FROM t2 LIMIT 1); + */ + Item_row *item_row= (Item_row*) item; + Item_row *comp_item_row= (Item_row*) cmp; + uint col; + /* + If item and comp_item are both Item_row's and have same number of cols + then process items in Item_row one by one. + We can't ignore NULL values here as this item may be used with <=>, in + which case NULL's are significant. + */ + DBUG_ASSERT(item->result_type() == cmp->result_type()); + DBUG_ASSERT(item_row->cols() == comp_item_row->cols()); + col= item_row->cols(); + while (col-- > 0) + resolve_const_item(thd, item_row->addr(col), + comp_item_row->element_index(col)); + } + return NULL; +} + +/***************************************************************************/ + +static const char* item_name(Item *a, String *str) +{ + if (a->name.str) + return a->name.str; + str->length(0); + a->print(str, QT_ORDINARY); + return str->c_ptr_safe(); +} + + +static void wrong_precision_error(uint errcode, Item *a, uint maximum) +{ + StringBuffer<1024> buf(system_charset_info); + my_error(errcode, MYF(0), item_name(a, &buf), maximum); +} + + +/** + Get precision and scale for a declaration + + return + 0 ok + 1 error +*/ + +bool get_length_and_scale(ulonglong length, ulonglong decimals, + uint *out_length, decimal_digits_t *out_decimals, + uint max_precision, uint max_scale, + Item *a) +{ + if (length > (ulonglong) max_precision) + { + wrong_precision_error(ER_TOO_BIG_PRECISION, a, max_precision); + return 1; + } + if (decimals > (ulonglong) max_scale) + { + wrong_precision_error(ER_TOO_BIG_SCALE, a, max_scale); + return 1; + } + + *out_decimals= (decimal_digits_t) decimals; + my_decimal_trim(&length, out_decimals); + *out_length= (uint) length; + + if (*out_length < *out_decimals) + { + my_error(ER_M_BIGGER_THAN_D, MYF(0), ""); + return 1; + } + return 0; +} + + +Item *Type_handler_longlong:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + if (this != &type_handler_ulonglong) + return new (thd->mem_root) Item_func_signed(thd, item); + return new (thd->mem_root) Item_func_unsigned(thd, item); + +} + + +Item *Type_handler_date_common:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + return new (thd->mem_root) Item_date_typecast(thd, item); +} + + + +Item *Type_handler_time_common:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + if (attr.decimals() > MAX_DATETIME_PRECISION) + { + wrong_precision_error(ER_TOO_BIG_PRECISION, item, MAX_DATETIME_PRECISION); + return 0; + } + return new (thd->mem_root) + Item_time_typecast(thd, item, (uint) attr.decimals()); +} + + +Item *Type_handler_datetime_common:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + if (attr.decimals() > MAX_DATETIME_PRECISION) + { + wrong_precision_error(ER_TOO_BIG_PRECISION, item, MAX_DATETIME_PRECISION); + return 0; + } + return new (thd->mem_root) + Item_datetime_typecast(thd, item, (uint) attr.decimals()); + +} + + +Item *Type_handler_decimal_result:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + uint len; + decimal_digits_t dec; + if (get_length_and_scale(attr.length(), attr.decimals(), &len, &dec, + DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE, item)) + return NULL; + return new (thd->mem_root) Item_decimal_typecast(thd, item, len, dec); +} + + +Item *Type_handler_double:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + uint len; + decimal_digits_t dec; + if (!attr.length_specified()) + return new (thd->mem_root) Item_double_typecast(thd, item, + DBL_DIG + 7, + NOT_FIXED_DEC); + + if (get_length_and_scale(attr.length(), attr.decimals(), &len, &dec, + DECIMAL_MAX_PRECISION, NOT_FIXED_DEC - 1, item)) + return NULL; + return new (thd->mem_root) Item_double_typecast(thd, item, len, dec); +} + + +Item *Type_handler_float:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + DBUG_ASSERT(!attr.length_specified()); + return new (thd->mem_root) Item_float_typecast(thd, item); +} + + +Item *Type_handler_long_blob:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + int len= -1; + CHARSET_INFO *real_cs= attr.charset() ? + attr.charset() : + thd->variables.collation_connection; + if (attr.length_specified()) + { + if (attr.length() > MAX_FIELD_BLOBLENGTH) + { + char buff[1024]; + String buf(buff, sizeof(buff), system_charset_info); + my_error(ER_TOO_BIG_DISPLAYWIDTH, MYF(0), item_name(item, &buf), + MAX_FIELD_BLOBLENGTH); + return NULL; + } + len= (int) attr.length(); + } + return new (thd->mem_root) Item_char_typecast(thd, item, len, real_cs); +} + +Item *Type_handler_interval_DDhhmmssff:: + create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const +{ + if (attr.decimals() > MAX_DATETIME_PRECISION) + { + wrong_precision_error(ER_TOO_BIG_PRECISION, item, MAX_DATETIME_PRECISION); + return 0; + } + return new (thd->mem_root) Item_interval_DDhhmmssff_typecast(thd, item, + (uint) + attr.decimals()); +} + +/***************************************************************************/ + +void Type_handler_string_result::Item_param_setup_conversion(THD *thd, + Item_param *param) + const +{ + param->setup_conversion_string(thd, thd->variables.character_set_client); +} + + +void Type_handler_blob_common::Item_param_setup_conversion(THD *thd, + Item_param *param) + const +{ + param->setup_conversion_blob(thd); +} + + +void Type_handler_tiny::Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const +{ + param->set_param_tiny(pos, len); +} + + +void Type_handler_short::Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const +{ + param->set_param_short(pos, len); +} + + +void Type_handler_long::Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const +{ + param->set_param_int32(pos, len); +} + + +void Type_handler_longlong::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_int64(pos, len); +} + + +void Type_handler_float::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_float(pos, len); +} + + +void Type_handler_double::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_double(pos, len); +} + + +void Type_handler_decimal_result::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_decimal(pos, len); +} + + +void Type_handler_string_result::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_str(pos, len); +} + + +void Type_handler_time_common::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_time(pos, len); +} + + +void Type_handler_date_common::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_date(pos, len); +} + + +void Type_handler_datetime_common::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_datetime(pos, len); +} + +Field *Type_handler_blob_common::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + uint pack_length= metadata & 0x00ff; + if (pack_length < 1 || pack_length > 4) + return NULL; // Broken binary log? + return new(root) + Field_blob(NULL, (uchar *) "", 1, Field::NONE, &empty_clex_str, + table->s, pack_length, target->charset()); +} + + +void Type_handler_timestamp_common::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_param_datetime(pos, len); +} + + +void Type_handler::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_null(); // Not possible type code in the client-server protocol +} + + +void Type_handler_typelib::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_null(); // Not possible type code in the client-server protocol +} + + +/***************************************************************************/ + +Field *Type_handler_row:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->length == 0); + DBUG_ASSERT(f_maybe_null(attr->pack_flag)); + return new (mem_root) Field_row(rec.ptr(), name); +} + + +Field *Type_handler_olddecimal:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(f_decimals(attr->pack_flag) == 0); + return new (mem_root) + Field_decimal(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + (uint8) attr->decimals, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_newdecimal:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(f_decimals(attr->pack_flag) == 0); + return new (mem_root) + Field_new_decimal(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + (uint8) attr->decimals, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_float:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(f_decimals(attr->pack_flag) == 0); + uint decimals= attr->decimals; + if (decimals == FLOATING_POINT_DECIMALS) + decimals= NOT_FIXED_DEC; + return new (mem_root) + Field_float(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, decimals, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag)== 0); +} + + +Field *Type_handler_double:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(f_decimals(attr->pack_flag) == 0); + uint decimals= attr->decimals; + if (decimals == FLOATING_POINT_DECIMALS) + decimals= NOT_FIXED_DEC; + return new (mem_root) + Field_double(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, decimals, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag)== 0); +} + + +Field *Type_handler_tiny:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_tiny(rec.ptr(), (uint32) attr->length, rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_short:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_short(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_int24:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_medium(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_long:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_long(rec.ptr(), (uint32) attr->length, rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_longlong:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + if (flags & (VERS_ROW_START|VERS_ROW_END)) + return new (mem_root) + Field_vers_trx_id(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); + return new (mem_root) + Field_longlong(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + f_is_zerofill(attr->pack_flag) != 0, + f_is_dec(attr->pack_flag) == 0); +} + + +Field *Type_handler_timestamp:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->decimals == attr->temporal_dec(MAX_DATETIME_WIDTH)); + return new_Field_timestamp(mem_root, + rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, share, + attr->temporal_dec(MAX_DATETIME_WIDTH)); +} + + +Field *Type_handler_timestamp2:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->decimals == attr->temporal_dec(MAX_DATETIME_WIDTH)); + return new (mem_root) + Field_timestampf(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, + name, share, attr->temporal_dec(MAX_DATETIME_WIDTH)); +} + + +Field *Type_handler_year:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_year(rec.ptr(), (uint32) attr->length, rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name); +} + + +Field *Type_handler_date:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_date(rec.ptr(),rec.null_ptr(),rec.null_bit(), + attr->unireg_check, name); +} + + +Field *Type_handler_newdate:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_newdate(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name); +} + + +Field *Type_handler_time:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->decimals == attr->temporal_dec(MIN_TIME_WIDTH)); + return new_Field_time(mem_root, rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + attr->temporal_dec(MIN_TIME_WIDTH)); +} + + +Field *Type_handler_time2:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->decimals == attr->temporal_dec(MIN_TIME_WIDTH)); + return new (mem_root) + Field_timef(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + attr->temporal_dec(MIN_TIME_WIDTH)); +} + + +Field *Type_handler_datetime:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->decimals == attr->temporal_dec(MAX_DATETIME_WIDTH)); + return new_Field_datetime(mem_root, rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + attr->temporal_dec(MAX_DATETIME_WIDTH)); +} + + +Field *Type_handler_datetime2:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + DBUG_ASSERT(attr->decimals == attr->temporal_dec(MAX_DATETIME_WIDTH)); + return new (mem_root) + Field_datetimef(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, + attr->temporal_dec(MAX_DATETIME_WIDTH)); +} + + +Field *Type_handler_null:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_null(rec.ptr(), (uint32) attr->length, attr->unireg_check, + name, attr->charset); +} + + +Field *Type_handler_bit:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return f_bit_as_char(attr->pack_flag) ? + new (mem_root) Field_bit_as_char(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name) : + new (mem_root) Field_bit(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + bit.ptr(), bit.offs(), attr->unireg_check, name); +} + + + + +Field *Type_handler_string:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_string(rec.ptr(), (uint32) attr->length, + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, attr->charset); +} + + +Field *Type_handler_varchar:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + if (attr->unireg_check == Field::TMYSQL_COMPRESSED) + return new (mem_root) + Field_varstring_compressed(rec.ptr(), (uint32) attr->length, + HA_VARCHAR_PACKLENGTH((uint32) attr->length), + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, share, attr->charset, + zlib_compression_method); + return new (mem_root) + Field_varstring(rec.ptr(), (uint32) attr->length, + HA_VARCHAR_PACKLENGTH((uint32) attr->length), + rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, share, attr->charset); +} + + +Field *Type_handler_blob_common:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + if (attr->unireg_check == Field::TMYSQL_COMPRESSED) + return new (mem_root) + Field_blob_compressed(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, share, + attr->pack_flag_to_pack_length(), attr->charset, + zlib_compression_method); + return new (mem_root) + Field_blob(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, share, + attr->pack_flag_to_pack_length(), attr->charset); +} + + +Field *Type_handler_enum:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_enum(rec.ptr(), (uint32) attr->length, rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, attr->pack_flag_to_pack_length(), + attr->interval, attr->charset); +} + + +Field *Type_handler_set:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + return new (mem_root) + Field_set(rec.ptr(), (uint32) attr->length, rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, attr->pack_flag_to_pack_length(), + attr->interval, attr->charset); +} + + +/***************************************************************************/ + +void Type_handler:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + def->frm_pack_basic(buff); + def->frm_pack_charset(buff); +} + + +void Type_handler_real_result:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + def->frm_pack_numeric_with_dec(buff); +} + + +void Type_handler_decimal_result:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + def->frm_pack_numeric_with_dec(buff); +} + + +void Type_handler_int_result:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag) == 0); + DBUG_ASSERT(def->decimals == 0); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_date_common:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag) == 0); + DBUG_ASSERT(def->decimals == 0); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_bit:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag & ~FIELDFLAG_TREAT_BIT_AS_CHAR) == 0); + DBUG_ASSERT(def->decimals == 0); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_blob_common:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag & ~FIELDFLAG_BLOB) == 0); + DBUG_ASSERT(def->decimals == 0 || + def->decimals == NOT_FIXED_DEC); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_null:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag) == 0); + DBUG_ASSERT(def->decimals == NOT_FIXED_DEC); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_string_result:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag) == 0); + DBUG_ASSERT(def->decimals == 0 || def->decimals == NOT_FIXED_DEC); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_enum:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag & ~FIELDFLAG_INTERVAL) == 0); + DBUG_ASSERT(def->decimals == 0); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_set:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag & ~FIELDFLAG_BITFIELD) == 0); + DBUG_ASSERT(def->decimals == 0); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +void Type_handler_temporal_result:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag) == 0); + Type_handler::Column_definition_attributes_frm_pack(def, buff); +} + + +/***************************************************************************/ + +bool Type_handler:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + attr->frm_unpack_basic(buffer); + return attr->frm_unpack_charset(share, buffer); +} + + +bool Type_handler_real_result:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + return attr->frm_unpack_numeric_with_dec(share, buffer); +} + + +bool Type_handler_decimal_result:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + return attr->frm_unpack_numeric_with_dec(share, buffer); +} + + +bool Type_handler_time_common:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + return attr->frm_unpack_temporal_with_dec(share, MIN_TIME_WIDTH, buffer); +} + + +bool Type_handler_datetime_common:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + return attr->frm_unpack_temporal_with_dec(share, MAX_DATETIME_WIDTH, buffer); +} + + +bool Type_handler_timestamp_common:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + return attr->frm_unpack_temporal_with_dec(share, MAX_DATETIME_WIDTH, buffer); +} + + +bool Type_handler_null::Item_const_eq(const Item_const *a, + const Item_const *b, + bool binary_cmp) const +{ + return true; +} + + +bool Type_handler_real_result::Item_const_eq(const Item_const *a, + const Item_const *b, + bool binary_cmp) const +{ + const double *va= a->const_ptr_double(); + const double *vb= b->const_ptr_double(); + return va[0] == vb[0]; +} + + +bool Type_handler_int_result::Item_const_eq(const Item_const *a, + const Item_const *b, + bool binary_cmp) const +{ + const longlong *va= a->const_ptr_longlong(); + const longlong *vb= b->const_ptr_longlong(); + bool res= va[0] == vb[0] && + (va[0] >= 0 || + (a->get_type_all_attributes_from_const()->unsigned_flag == + b->get_type_all_attributes_from_const()->unsigned_flag)); + return res; +} + + +bool Type_handler_string_result::Item_const_eq(const Item_const *a, + const Item_const *b, + bool binary_cmp) const +{ + const String *sa= a->const_ptr_string(); + const String *sb= b->const_ptr_string(); + return binary_cmp ? sa->bin_eq(sb) : + a->get_type_all_attributes_from_const()->collation.collation == + b->get_type_all_attributes_from_const()->collation.collation && + sa->eq(sb, a->get_type_all_attributes_from_const()->collation.collation); +} + + +bool +Type_handler_decimal_result::Item_const_eq(const Item_const *a, + const Item_const *b, + bool binary_cmp) const +{ + const my_decimal *da= a->const_ptr_my_decimal(); + const my_decimal *db= b->const_ptr_my_decimal(); + return !da->cmp(db) && + (!binary_cmp || + a->get_type_all_attributes_from_const()->decimals == + b->get_type_all_attributes_from_const()->decimals); +} + + +bool +Type_handler_temporal_result::Item_const_eq(const Item_const *a, + const Item_const *b, + bool binary_cmp) const +{ + const MYSQL_TIME *ta= a->const_ptr_mysql_time(); + const MYSQL_TIME *tb= b->const_ptr_mysql_time(); + return !my_time_compare(ta, tb) && + (!binary_cmp || + a->get_type_all_attributes_from_const()->decimals == + b->get_type_all_attributes_from_const()->decimals); +} + +/***************************************************************************/ + +const Type_handler * +Type_handler_hex_hybrid::cast_to_int_type_handler() const +{ + return &type_handler_ulonglong; +} + + +/***************************************************************************/ + +bool Type_handler_row::Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + DBUG_ASSERT(0); + return false; +} + + +bool Type_handler_int_result::Item_eq_value(THD *thd, + const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + longlong value0= a->val_int(); + longlong value1= b->val_int(); + return !a->null_value && !b->null_value && value0 == value1 && + (value0 >= 0 || a->unsigned_flag == b->unsigned_flag); +} + + +bool Type_handler_real_result::Item_eq_value(THD *thd, + const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + double value0= a->val_real(); + double value1= b->val_real(); + return !a->null_value && !b->null_value && value0 == value1; +} + + +bool Type_handler_time_common::Item_eq_value(THD *thd, + const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + longlong value0= a->val_time_packed(thd); + longlong value1= b->val_time_packed(thd); + return !a->null_value && !b->null_value && value0 == value1; +} + + +bool Type_handler_temporal_with_date::Item_eq_value(THD *thd, + const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + longlong value0= a->val_datetime_packed(thd); + longlong value1= b->val_datetime_packed(thd); + return !a->null_value && !b->null_value && value0 == value1; +} + + +bool Type_handler_timestamp_common::Item_eq_value(THD *thd, + const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + Timestamp_or_zero_datetime_native_null na(thd, a, true); + Timestamp_or_zero_datetime_native_null nb(thd, b, true); + return !na.is_null() && !nb.is_null() && !cmp_native(na, nb); +} + + +bool Type_handler_string_result::Item_eq_value(THD *thd, + const Type_cmp_attributes *attr, + Item *a, Item *b) const +{ + String *va, *vb; + StringBuffer<128> cmp_value1, cmp_value2; + return (va= a->val_str(&cmp_value1)) && + (vb= b->val_str(&cmp_value2)) && + va->eq(vb, attr->compare_collation()); +} + + +/***************************************************************************/ + +bool Type_handler_string_result::union_element_finalize(Item_type_holder* item) const +{ + if (item->collation.derivation == DERIVATION_NONE) + { + my_error(ER_CANT_AGGREGATE_NCOLLATIONS, MYF(0), "UNION"); + return true; + } + return false; +} + + +/***************************************************************************/ + +void Type_handler_var_string:: + Column_definition_implicit_upgrade(Column_definition *c) const +{ + // Change old VARCHAR to new VARCHAR + c->set_handler(&type_handler_varchar); +} + + +void Type_handler_time_common:: + Column_definition_implicit_upgrade(Column_definition *c) const +{ + if (opt_mysql56_temporal_format) + c->set_handler(&type_handler_time2); + else + c->set_handler(&type_handler_time); +} + + +void Type_handler_datetime_common:: + Column_definition_implicit_upgrade(Column_definition *c) const +{ + if (opt_mysql56_temporal_format) + c->set_handler(&type_handler_datetime2); + else + c->set_handler(&type_handler_datetime); +} + + +void Type_handler_timestamp_common:: + Column_definition_implicit_upgrade(Column_definition *c) const +{ + if (opt_mysql56_temporal_format) + c->set_handler(&type_handler_timestamp2); + else + c->set_handler(&type_handler_timestamp); +} + + +/***************************************************************************/ + + +int Type_handler_temporal_with_date::stored_field_cmp_to_item(THD *thd, + Field *field, + Item *item) const +{ + MYSQL_TIME field_time, item_time, item_time2, *item_time_cmp= &item_time; + field->get_date(&field_time, Datetime::Options(TIME_INVALID_DATES, thd)); + item->get_date(thd, &item_time, Datetime::Options(TIME_INVALID_DATES, thd)); + if (item_time.time_type == MYSQL_TIMESTAMP_TIME && + time_to_datetime(thd, &item_time, item_time_cmp= &item_time2)) + return 1; + return my_time_compare(&field_time, item_time_cmp); +} + + +int Type_handler_time_common::stored_field_cmp_to_item(THD *thd, + Field *field, + Item *item) const +{ + MYSQL_TIME field_time, item_time; + field->get_date(&field_time, Time::Options(thd)); + item->get_date(thd, &item_time, Time::Options(thd)); + return my_time_compare(&field_time, &item_time); +} + + +int Type_handler_string_result::stored_field_cmp_to_item(THD *thd, + Field *field, + Item *item) const +{ + StringBuffer item_tmp; + StringBuffer field_tmp; + String *item_result= item->val_str(&item_tmp); + /* + Some implementations of Item::val_str(String*) actually modify + the field Item::null_value, hence we can't check it earlier. + */ + if (item->null_value) + return 0; + String *field_result= field->val_str(&field_tmp); + return sortcmp(field_result, item_result, field->charset()); +} + + +int Type_handler_int_result::stored_field_cmp_to_item(THD *thd, + Field *field, + Item *item) const +{ + DBUG_ASSERT(0); // Not used yet + return 0; +} + + +int Type_handler_real_result::stored_field_cmp_to_item(THD *thd, + Field *field, + Item *item) const +{ + /* + The patch for Bug#13463415 started using this function for comparing + BIGINTs. That uncovered a bug in Visual Studio 32bit optimized mode. + Prefixing the auto variables with volatile fixes the problem.... + */ + volatile double result= item->val_real(); + if (item->null_value) + return 0; + volatile double field_result= field->val_real(); + if (field_result < result) + return -1; + else if (field_result > result) + return 1; + return 0; +} + + +/***************************************************************************/ + + +static bool have_important_literal_warnings(const MYSQL_TIME_STATUS *status) +{ + return (status->warnings & ~MYSQL_TIME_NOTE_TRUNCATED) != 0; +} + + +static void literal_warn(THD *thd, const Item *item, + const char *str, size_t length, CHARSET_INFO *cs, + const MYSQL_TIME_STATUS *st, + const char *typestr, bool send_error) +{ + if (likely(item)) + { + if (st->warnings) // e.g. a note on nanosecond truncation + { + ErrConvString err(str, length, cs); + thd->push_warning_wrong_or_truncated_value( + Sql_condition::time_warn_level(st->warnings), + false, typestr, err.ptr(), + nullptr, nullptr, nullptr); + } + } + else if (send_error) + { + ErrConvString err(str, length, cs); + my_error(ER_WRONG_VALUE, MYF(0), typestr, err.ptr()); + } +} + + +Item_literal * +Type_handler_date_common::create_literal_item(THD *thd, + const char *str, + size_t length, + CHARSET_INFO *cs, + bool send_error) const +{ + Temporal::Warn st; + Item_literal *item= NULL; + Temporal_hybrid tmp(thd, &st, str, length, cs, Temporal_hybrid::Options(thd)); + if (tmp.is_valid_temporal() && + tmp.get_mysql_time()->time_type == MYSQL_TIMESTAMP_DATE && + !have_important_literal_warnings(&st)) + { + Date d(&tmp); + item= new (thd->mem_root) Item_date_literal(thd, &d); + } + literal_warn(thd, item, str, length, cs, &st, "DATE", send_error); + return item; +} + + +Item_literal * +Type_handler_temporal_with_date::create_literal_item(THD *thd, + const char *str, + size_t length, + CHARSET_INFO *cs, + bool send_error) const +{ + Temporal::Warn st; + Item_literal *item= NULL; + Temporal_hybrid tmp(thd, &st, str, length, cs, Temporal_hybrid::Options(thd)); + if (tmp.is_valid_temporal() && + tmp.get_mysql_time()->time_type == MYSQL_TIMESTAMP_DATETIME && + !have_important_literal_warnings(&st)) + { + Datetime dt(&tmp); + item= new (thd->mem_root) Item_datetime_literal(thd, &dt, st.precision); + } + literal_warn(thd, item, str, length, cs, &st, "DATETIME", send_error); + return item; +} + + +Item_literal * +Type_handler_time_common::create_literal_item(THD *thd, + const char *str, + size_t length, + CHARSET_INFO *cs, + bool send_error) const +{ + MYSQL_TIME_STATUS st; + Item_literal *item= NULL; + Time::Options opt(TIME_TIME_ONLY, thd, Time::DATETIME_TO_TIME_DISALLOW); + Time tmp(thd, &st, str, length, cs, opt); + if (tmp.is_valid_time() && + !have_important_literal_warnings(&st)) + item= new (thd->mem_root) Item_time_literal(thd, &tmp, st.precision); + literal_warn(thd, item, str, length, cs, &st, "TIME", send_error); + return item; +} + + +bool +Type_handler_time_common::Item_val_native_with_conversion(THD *thd, + Item *item, + Native *to) const +{ + if (item->type_handler()->type_handler_for_native_format() == + &type_handler_time2) + return item->val_native(thd, to); + return Time(thd, item).to_native(to, item->time_precision(thd)); +} + + +bool +Type_handler_time_common::Item_val_native_with_conversion_result(THD *thd, + Item *item, + Native *to) + const +{ + if (item->type_handler()->type_handler_for_native_format() == + &type_handler_time2) + return item->val_native_result(thd, to); + MYSQL_TIME ltime; + if (item->get_date_result(thd, <ime, Time::Options(thd))) + return true; + int warn; + return Time(&warn, <ime, 0).to_native(to, item->time_precision(thd)); +} + + +int Type_handler_time_common::cmp_native(const Native &a, + const Native &b) const +{ + // Optimize a simple case: equal fractional precision: + if (a.length() == b.length()) + return memcmp(a.ptr(), b.ptr(), a.length()); + longlong lla= Time(a).to_packed(); + longlong llb= Time(b).to_packed(); + if (lla < llb) + return -1; + if (lla> llb) + return 1; + return 0; +} + + +bool Type_handler_timestamp_common::TIME_to_native(THD *thd, + const MYSQL_TIME *ltime, + Native *to, + uint decimals) const +{ + uint error_code; + Timestamp_or_zero_datetime tm(thd, ltime, &error_code); + if (error_code) + return true; + tm.trunc(decimals); + return tm.to_native(to, decimals); +} + + +bool +Type_handler_timestamp_common::Item_val_native_with_conversion(THD *thd, + Item *item, + Native *to) const +{ + if (item->type_handler()->type_handler_for_native_format() == + &type_handler_timestamp2) + return item->val_native(thd, to); + Datetime dt(thd, item, Datetime::Options(TIME_NO_ZERO_IN_DATE, thd)); + return + !dt.is_valid_datetime() || + TIME_to_native(thd, dt.get_mysql_time(), to, item->datetime_precision(thd)); +} + +bool Type_handler_null::union_element_finalize(Item_type_holder *item) const +{ + item->set_handler(&type_handler_string); + return false; +} + + +bool +Type_handler_timestamp_common::Item_val_native_with_conversion_result(THD *thd, + Item *item, + Native *to) + const +{ + MYSQL_TIME ltime; + if (item->type_handler()->type_handler_for_native_format() == + &type_handler_timestamp2) + return item->val_native_result(thd, to); + return + item->get_date_result(thd, <ime, + Datetime::Options(TIME_NO_ZERO_IN_DATE, thd)) || + TIME_to_native(thd, <ime, to, item->datetime_precision(thd)); +} + + +int Type_handler_timestamp_common::cmp_native(const Native &a, + const Native &b) const +{ + /* + Optimize a simple case: + Either both timeatamp values have the same fractional precision, + or both values are zero datetime '0000-00-00 00:00:00.000000', + */ + if (a.length() == b.length()) + return memcmp(a.ptr(), b.ptr(), a.length()); + return Timestamp_or_zero_datetime(a).cmp(Timestamp_or_zero_datetime(b)); +} + + +Timestamp_or_zero_datetime_native_null:: + Timestamp_or_zero_datetime_native_null(THD *thd, Item *item, bool conv) + :Null_flag(false) +{ + DBUG_ASSERT(item->type_handler()->type_handler_for_native_format() == + &type_handler_timestamp2 || conv); + if (conv ? + type_handler_timestamp2.Item_val_native_with_conversion(thd, item, this) : + item->val_native(thd, this)) + Null_flag::operator=(true); + // If no conversion, then is_null() should be equal to item->null_value + DBUG_ASSERT(is_null() == item->null_value || conv); + /* + is_null() can be true together with item->null_value==false, which means + a non-NULL item was evaluated, but then the conversion to TIMESTAMP failed. + But is_null() can never be false if item->null_value==true. + */ + DBUG_ASSERT(is_null() >= item->null_value); +} + + +bool +Type_handler::Item_param_val_native(THD *thd, + Item_param *item, + Native *to) const +{ + DBUG_ASSERT(0); // TODO-TYPE: MDEV-14271 + return item->null_value= true; +} + + +bool +Type_handler_timestamp_common::Item_param_val_native(THD *thd, + Item_param *item, + Native *to) const +{ + /* + The below code may not run well in corner cases. + This will be fixed under terms of MDEV-14271. + Item_param should: + - either remember @@time_zone at bind time + - or store TIMESTAMP in my_time_t format, rather than in MYSQL_TIME format. + */ + MYSQL_TIME ltime; + return + item->get_date(thd, <ime, Datetime::Options(TIME_NO_ZERO_IN_DATE, thd)) || + TIME_to_native(thd, <ime, to, item->datetime_precision(thd)); +} + + +bool +Type_handler_time_common::Item_param_val_native(THD *thd, + Item_param *item, + Native *to) const +{ + return Time(thd, item).to_native(to, item->decimals); +} + + +/***************************************************************************/ + +bool Type_handler::validate_implicit_default_value(THD *thd, + const Column_definition &def) const +{ + DBUG_EXECUTE_IF("validate_implicit_default_value_error", return true;); + return false; +} + + +bool Type_handler_date_common::validate_implicit_default_value(THD *thd, + const Column_definition &def) const +{ + return thd->variables.sql_mode & MODE_NO_ZERO_DATE; +} + + +bool Type_handler_datetime_common::validate_implicit_default_value(THD *thd, + const Column_definition &def) const +{ + return thd->variables.sql_mode & MODE_NO_ZERO_DATE; +} + + +/***************************************************************************/ + +const Name & Type_handler_row::default_value() const +{ + DBUG_ASSERT(0); + static Name def(STRING_WITH_LEN("")); + return def; +} + +const Name & Type_handler_numeric::default_value() const +{ + static Name def(STRING_WITH_LEN("0")); + return def; +} + +const Name & Type_handler_string_result::default_value() const +{ + static Name def(STRING_WITH_LEN("")); + return def; +} + +const Name & Type_handler_time_common::default_value() const +{ + static Name def(STRING_WITH_LEN("00:00:00")); + return def; +} + +const Name & Type_handler_date_common::default_value() const +{ + static Name def(STRING_WITH_LEN("0000-00-00")); + return def; +} + +const Name & Type_handler_datetime_common::default_value() const +{ + static Name def(STRING_WITH_LEN("0000-00-00 00:00:00")); + return def; +} + +const Name & Type_handler_timestamp_common::default_value() const +{ + static Name def(STRING_WITH_LEN("0000-00-00 00:00:00")); + return def; +} + +/***************************************************************************/ + +bool Type_handler::Column_definition_data_type_info_image(Binary_string *to, + const Column_definition &def) + const +{ + // Have *some* columns write type info (let's use string fields as an example) + DBUG_EXECUTE_IF("frm_data_type_info_emulate", + if (cmp_type() == STRING_RESULT) + return to->append_char('x') || + to->append(name().lex_cstring());); + if (type_collection() != &type_collection_std) + return to->append(name().lex_cstring()); + return false; +} + + +/***************************************************************************/ + +void +Type_handler::partition_field_type_not_allowed(const LEX_CSTRING &field_name) +{ + my_error(ER_FIELD_TYPE_NOT_ALLOWED_AS_PARTITION_FIELD, MYF(0), + field_name.str); +} + + +bool +Type_handler::partition_field_check_result_type(Item *item, + Item_result expected_type) +{ + if (item->result_type() != expected_type) + { + my_error(ER_WRONG_TYPE_COLUMN_VALUE_ERROR, MYF(0)); + return TRUE; + } + return false; +} + + +bool +Type_handler_blob_common::partition_field_check(const LEX_CSTRING &field_name, + Item *item_expr) const +{ + my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0)); + return true; +} + + +bool +Type_handler_general_purpose_int::partition_field_append_value( + String *str, + Item *item_expr, + CHARSET_INFO *field_cs, + partition_value_print_mode_t mode) + const +{ + DBUG_ASSERT(item_expr->cmp_type() == INT_RESULT); + StringBuffer tmp; + longlong value= item_expr->val_int(); + tmp.set(value, system_charset_info); + return str->append(tmp); +} + + +/* + Append an Item value to a String using a desired mode. + + @param [OUT] str The string to append the value to. + @param item_expr The item to get the value from + @param field_cs The character set of the value owner field. + @param mode The mode. + @retval true on error + @retval false on success + + The value is added using system_charset_info (no matter what mode is). + + (1) If mode is equal to PARTITION_VALUE_PRINT_MODE_FRM, + the value is appended as a pure ASCII string in the format '_latin1 0xdf', + i.e. a character set introducer followed by a hex hybrid. + + Before appending, we value is first converted to field_cs. + a) If the conversion succeeds, the value is printed in its field_cs + represenation. + b) If the conversion fails, the value is printed without conversion, + using the original character set introducer followed by the original + string hex representation. + In this case, open_table_from_share() will later notice that + the value cannot be actually stored to the field, and report + the error. So here we don't need to report errors such as + ER_PARTITION_FUNCTION_IS_NOT_ALLOWED. + + (2) If the mode is equal to PARTITION_VALUE_PRINT_SHOW, + then the value is needed for: + - SHOW CREATE TABLE, or + - the PARTITION_DESCRIPTION column in a + INFORMATION_SCHEMA.PARTITION query. + + The value generated here will be later sent to the client and + therefore will be converted to the client character set in the protocol. + + We try to generate the value as a simple quoted utf8 string without + introducers (e.g. 'utf8-string') when possible, to make it: + - as human readable as possible + - but still safe for mysqldump purposes. + + Simple quoted utf8 string is generated when these two conditions are true + at the same time: + a) The value can be safely converted to utf8, + so we can return it without data loss from this function. + b) The value can be safely converted to the client character set, + so we can convert it later without data loss to the client character + set in the protocol. + + If one of the conditions fail, the value is returned using + PARTITION_VALUE_PRINT_MODE_FRM representation. See (1). +*/ +bool Type_handler::partition_field_append_value( + String *str, + Item *item_expr, + CHARSET_INFO *field_cs, + partition_value_print_mode_t mode) + const +{ + DBUG_ASSERT(cmp_type() != INT_RESULT); + StringBuffer buf; + String *res; + + if (!(res= item_expr->val_str(&buf))) + return str->append(NULL_clex_str, system_charset_info); + + if (!res->length()) + return str->append(STRING_WITH_LEN("''"), system_charset_info); + + if (mode == PARTITION_VALUE_PRINT_MODE_FRM || + !res->can_be_safely_converted_to(current_thd-> + variables.character_set_client) || + !res->can_be_safely_converted_to(system_charset_info)) + { + StringBuffer<64> buf2; + uint cnverr2= 0; + buf2.copy(res->ptr(), res->length(), res->charset(), field_cs, &cnverr2); + if (!cnverr2) + return str->append_introducer_and_hex(&buf2); + return str->append_introducer_and_hex(res); + } + + StringBuffer<64> val(system_charset_info); + uint cnverr= 0; + val.copy(res->ptr(), res->length(), res->charset(), + system_charset_info, &cnverr); + append_unescaped(str, val.ptr(), val.length()); + return false; +} + + +bool Type_handler::can_return_extract_source(interval_type int_type) const +{ + return type_collection() == &type_collection_std; +} + +/***************************************************************************/ + +LEX_CSTRING Charset::collation_specific_name() const +{ + /* + User defined collations can provide arbitrary names + for character sets and collations, so a collation + name not necessarily starts with the character set name. + */ + size_t cs_name_length= m_charset->cs_name.length; + if (strncmp(m_charset->coll_name.str, m_charset->cs_name.str, + cs_name_length)) + return {NULL, 0}; + const char *ptr= m_charset->coll_name.str + cs_name_length; + return {ptr, m_charset->coll_name.length - cs_name_length }; +} + + +bool +Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const +{ + if (my_charset_same(m_charset, cs)) + return true; + + if (!strcmp(m_charset->cs_name.str, MY_UTF8MB3) && + !strcmp(cs->cs_name.str, MY_UTF8MB4)) + return true; + + /* + Originally we allowed here instant ALTER for ASCII-to-LATIN1 + and UCS2-to-UTF16, but this was wrong: + - MariaDB's ascii is not a subset for 8-bit character sets + like latin1, because it allows storing bytes 0x80..0xFF as + "unassigned" characters (see MDEV-19285). + - MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16, + because they treat surrogate codes differently (MDEV-19284). + */ + return false; +} + + +bool +Charset::eq_collation_specific_names(CHARSET_INFO *cs) const +{ + LEX_CSTRING name0= collation_specific_name(); + LEX_CSTRING name1= Charset(cs).collation_specific_name(); + return name0.length && !cmp(&name0, &name1); +} + +int initialize_data_type_plugin(st_plugin_int *plugin) +{ + st_mariadb_data_type *data= (st_mariadb_data_type*) plugin->plugin->info; + data->type_handler->set_name(Name(plugin->name)); + if (plugin->plugin->init && plugin->plugin->init(NULL)) + { + sql_print_error("Plugin '%s' init function returned error.", + plugin->name.str); + return 1; + } + return 0; +} diff --git a/sql/sql_type.h b/sql/sql_type.h new file mode 100644 index 00000000..35c13a38 --- /dev/null +++ b/sql/sql_type.h @@ -0,0 +1,7714 @@ +#ifndef SQL_TYPE_H_INCLUDED +#define SQL_TYPE_H_INCLUDED +/* + Copyright (c) 2015 MariaDB Foundation. + Copyright (c) 2015, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + + +#include "mysqld.h" +#include "lex_string.h" +#include "sql_array.h" +#include "sql_const.h" +#include "sql_time.h" +#include "sql_type_string.h" +#include "sql_type_real.h" +#include "compat56.h" +C_MODE_START +#include +C_MODE_END + +class Field; +class Column_definition; +class Column_definition_attributes; +class Key_part_spec; +class Item; +class Item_const; +class Item_literal; +class Item_param; +class Item_cache; +class Item_copy; +class Item_func_or_sum; +class Item_sum; +class Item_sum_hybrid; +class Item_sum_sum; +class Item_sum_avg; +class Item_sum_variance; +class Item_func_hex; +class Item_hybrid_func; +class Item_func_min_max; +class Item_func_hybrid_field_type; +class Item_bool_func2; +class Item_func_between; +class Item_func_in; +class Item_func_round; +class Item_func_int_val; +class Item_func_abs; +class Item_func_neg; +class Item_func_signed; +class Item_func_unsigned; +class Item_double_typecast; +class Item_float_typecast; +class Item_decimal_typecast; +class Item_char_typecast; +class Item_time_typecast; +class Item_date_typecast; +class Item_datetime_typecast; +class Item_func_plus; +class Item_func_minus; +class Item_func_mul; +class Item_func_div; +class Item_func_mod; +class Item_type_holder; +class cmp_item; +class in_vector; +class Type_handler_data; +class Type_handler_hybrid_field_type; +class Sort_param; +class Arg_comparator; +class Spvar_definition; +class st_value; +class Protocol; +class handler; +struct TABLE; +struct SORT_FIELD_ATTR; +struct SORT_FIELD; +class Vers_history_point; +class Virtual_column_info; +class Conv_source; +class ST_FIELD_INFO; +class Type_collection; +class Create_func; + +#define my_charset_numeric my_charset_latin1 + +enum protocol_send_type_t +{ + PROTOCOL_SEND_STRING, + PROTOCOL_SEND_FLOAT, + PROTOCOL_SEND_DOUBLE, + PROTOCOL_SEND_TINY, + PROTOCOL_SEND_SHORT, + PROTOCOL_SEND_LONG, + PROTOCOL_SEND_LONGLONG, + PROTOCOL_SEND_DATETIME, + PROTOCOL_SEND_DATE, + PROTOCOL_SEND_TIME +}; + + +enum scalar_comparison_op +{ + SCALAR_CMP_EQ, + SCALAR_CMP_EQUAL, + SCALAR_CMP_LT, + SCALAR_CMP_LE, + SCALAR_CMP_GE, + SCALAR_CMP_GT +}; + + +/* + This enum is intentionally defined as "class" to disallow its implicit + cast as "bool". This is needed to avoid pre-MDEV-32203 constructs like: + if (field->can_optimize_range(...)) + do_optimization(); + to merge automatically as such - that would change the meaning + to the opposite. The pre-MDEV-32203 code must to be changed to: + if (field->can_optimize_range(...) == Data_type_compatibility::OK) + do_optimization(); +*/ +enum class Data_type_compatibility +{ + OK, + INCOMPATIBLE_DATA_TYPE, + INCOMPATIBLE_COLLATION +}; + + +static inline const LEX_CSTRING +scalar_comparison_op_to_lex_cstring(scalar_comparison_op op) +{ + switch (op) { + case SCALAR_CMP_EQ: return LEX_CSTRING{STRING_WITH_LEN("=")}; + case SCALAR_CMP_EQUAL: return LEX_CSTRING{STRING_WITH_LEN("<=>")}; + case SCALAR_CMP_LT: return LEX_CSTRING{STRING_WITH_LEN("<")}; + case SCALAR_CMP_LE: return LEX_CSTRING{STRING_WITH_LEN("<=")}; + case SCALAR_CMP_GE: return LEX_CSTRING{STRING_WITH_LEN(">")}; + case SCALAR_CMP_GT: return LEX_CSTRING{STRING_WITH_LEN(">=")}; + } + DBUG_ASSERT(0); + return LEX_CSTRING{STRING_WITH_LEN("")}; +} + + +class Hasher +{ + ulong m_nr1; + ulong m_nr2; +public: + Hasher(): m_nr1(1), m_nr2(4) + { } + void add_null() + { + m_nr1^= (m_nr1 << 1) | 1; + } + void add(CHARSET_INFO *cs, const uchar *str, size_t length) + { + cs->coll->hash_sort(cs, str, length, &m_nr1, &m_nr2); + } + void add(CHARSET_INFO *cs, const char *str, size_t length) + { + add(cs, (const uchar *) str, length); + } + uint32 finalize() const + { + return (uint32) m_nr1; + } +}; + + +enum partition_value_print_mode_t +{ + PARTITION_VALUE_PRINT_MODE_SHOW= 0, + PARTITION_VALUE_PRINT_MODE_FRM= 1 +}; + + +enum column_definition_type_t +{ + COLUMN_DEFINITION_TABLE_FIELD, + COLUMN_DEFINITION_ROUTINE_PARAM, + COLUMN_DEFINITION_ROUTINE_LOCAL, + COLUMN_DEFINITION_FUNCTION_RETURN +}; + + +class Send_field_extended_metadata +{ + LEX_CSTRING m_attr[MARIADB_FIELD_ATTR_LAST+1]; +public: + Send_field_extended_metadata() + { + bzero(this, sizeof(*this)); + } + bool set_data_type_name(const LEX_CSTRING &str) + { + m_attr[MARIADB_FIELD_ATTR_DATA_TYPE_NAME]= str; + return false; + } + bool set_format_name(const LEX_CSTRING &str) + { + m_attr[MARIADB_FIELD_ATTR_FORMAT_NAME]= str; + return false; + } + bool has_extended_metadata() const + { + for (uint i= 0; i <= MARIADB_FIELD_ATTR_LAST; i++) + { + if (m_attr[i].str) + return true; + } + return false; + } + const LEX_CSTRING &attr(uint i) const + { + DBUG_ASSERT(i <= MARIADB_FIELD_ATTR_LAST); + return m_attr[i]; + } +}; + + +class Data_type_statistics +{ +public: + uint m_uneven_bit_length; + uint m_fixed_string_total_length; + uint m_fixed_string_count; + uint m_variable_string_total_length; + uint m_variable_string_count; + uint m_blob_count; + Data_type_statistics() + :m_uneven_bit_length(0), + m_fixed_string_total_length(0), + m_fixed_string_count(0), + m_variable_string_total_length(0), + m_variable_string_count(0), + m_blob_count(0) + { } + uint string_count() const + { + return m_fixed_string_count + m_variable_string_count; + } + uint string_total_length() const + { + return m_fixed_string_total_length + m_variable_string_total_length; + } +}; + + +class Typelib: public TYPELIB +{ +public: + Typelib(uint count, const char **type_names, unsigned int *type_lengths) + { + TYPELIB::count= count; + TYPELIB::name= ""; + TYPELIB::type_names= type_names; + TYPELIB::type_lengths= type_lengths; + } + uint max_octet_length() const + { + uint max_length= 0; + for (uint i= 0; i < TYPELIB::count; i++) + { + const uint length= TYPELIB::type_lengths[i]; + set_if_bigger(max_length, length); + } + return max_length; + } +}; + + +template +class TypelibBuffer: public Typelib +{ + const char *m_type_names[sz + 1]; + uint m_type_lengths[sz + 1]; +public: + TypelibBuffer(uint count, const LEX_CSTRING *values) + :Typelib(count, m_type_names, m_type_lengths) + { + DBUG_ASSERT(sz >= count); + for (uint i= 0; i < count; i++) + { + DBUG_ASSERT(values[i].str != NULL); + m_type_names[i]= values[i].str; + m_type_lengths[i]= (uint) values[i].length; + } + m_type_names[sz]= NullS; // End marker + m_type_lengths[sz]= 0; // End marker + } + TypelibBuffer(const LEX_CSTRING *values) + :TypelibBuffer(sz, values) + { } +}; + + +/* + A helper class to store column attributes that are inherited + by columns (from the table level) when not specified explicitly. +*/ +class Column_derived_attributes +{ + /* + Table level CHARACTER SET and COLLATE value: + + CREATE TABLE t1 (a VARCHAR(1), b CHAR(2)) CHARACTER SET latin1; + + All character string columns (CHAR, VARCHAR, TEXT) + inherit CHARACTER SET from the table level. + */ + CHARSET_INFO *m_charset; +public: + explicit Column_derived_attributes(CHARSET_INFO *cs) + :m_charset(cs) + { } + CHARSET_INFO *charset() const { return m_charset; } +}; + + +/* + A helper class to store requests for changes + in multiple column data types during ALTER. +*/ +class Column_bulk_alter_attributes +{ + /* + Target CHARACTER SET specification in ALTER .. CONVERT, e.g. + + ALTER TABLE t1 CONVERT TO CHARACTER SET utf8; + + All character string columns (CHAR, VARCHAR, TEXT) + get converted to the "CONVERT TO CHARACTER SET". + */ + CHARSET_INFO *m_alter_table_convert_to_charset; +public: + explicit Column_bulk_alter_attributes(CHARSET_INFO *convert) + :m_alter_table_convert_to_charset(convert) + { } + CHARSET_INFO *alter_table_convert_to_charset() const + { return m_alter_table_convert_to_charset; } +}; + + +class Native: public Binary_string +{ +public: + Native(char *str, size_t len) + :Binary_string(str, len) + { } +}; + + +template +class NativeBuffer: public Native +{ + char buff[buff_sz]; +public: + NativeBuffer() : Native(buff, buff_sz) { length(0); } +}; + + +class String_ptr +{ +protected: + String *m_string_ptr; +public: + String_ptr(String *str) + :m_string_ptr(str) + { } + String_ptr(Item *item, String *buffer); + const String *string() const + { + DBUG_ASSERT(m_string_ptr); + return m_string_ptr; + } + bool is_null() const { return m_string_ptr == NULL; } +}; + + +class Ascii_ptr: public String_ptr +{ +public: + Ascii_ptr(Item *item, String *buffer); +}; + + +template +class String_ptr_and_buffer: public StringBuffer, + public String_ptr +{ +public: + String_ptr_and_buffer(Item *item) + :String_ptr(item, this) + { } +}; + + +template +class Ascii_ptr_and_buffer: public StringBuffer, + public Ascii_ptr +{ +public: + Ascii_ptr_and_buffer(Item *item) + :Ascii_ptr(item, this) + { } +}; + + +class Dec_ptr +{ +protected: + my_decimal *m_ptr; + Dec_ptr() = default; +public: + Dec_ptr(my_decimal *ptr) :m_ptr(ptr) { } + bool is_null() const { return m_ptr == NULL; } + const my_decimal *ptr() const { return m_ptr; } + const my_decimal *ptr_or(const my_decimal *def) const + { + return m_ptr ? m_ptr : def; + } + my_decimal *to_decimal(my_decimal *to) const + { + if (!m_ptr) + return NULL; + *to= *m_ptr; + return to; + } + double to_double() const { return m_ptr ? m_ptr->to_double() : 0.0; } + longlong to_longlong(bool unsigned_flag) + { return m_ptr ? m_ptr->to_longlong(unsigned_flag) : 0; } + Longlong_null to_xlonglong_null() + { + return m_ptr ? Longlong_null(m_ptr->to_xlonglong()) : Longlong_null(); + } + bool to_bool() const { return m_ptr ? m_ptr->to_bool() : false; } + String *to_string(String *to) const + { + return m_ptr ? m_ptr->to_string(to) : NULL; + } + String *to_string(String *to, uint prec, uint dec, char filler) + { + return m_ptr ? m_ptr->to_string(to, prec, dec, filler) : NULL; + } + int to_binary(uchar *bin, int prec, decimal_digits_t scale) const + { + return (m_ptr ? m_ptr : &decimal_zero)->to_binary(bin, prec, scale); + } + int cmp(const my_decimal *dec) const + { + DBUG_ASSERT(m_ptr); + DBUG_ASSERT(dec); + return m_ptr->cmp(dec); + } + int cmp(const Dec_ptr &other) const + { + return cmp(other.m_ptr); + } +}; + + +// A helper class to handle results of val_decimal(), date_op(), etc. +class Dec_ptr_and_buffer: public Dec_ptr +{ +protected: + my_decimal m_buffer; +public: + /* scale is int as it can be negative here */ + int round_to(my_decimal *to, int scale, decimal_round_mode mode) + { + DBUG_ASSERT(m_ptr); + return m_ptr->round_to(to, scale, mode); + } + int round_self(decimal_digits_t scale, decimal_round_mode mode) + { + return round_to(&m_buffer, scale, mode); + } + int round_self_if_needed(int scale, decimal_round_mode mode) + { + if (scale >= m_ptr->frac) + return E_DEC_OK; + int res= m_ptr->round_to(&m_buffer, scale, mode); + m_ptr= &m_buffer; + return res; + } + String *to_string_round(String *to, decimal_digits_t dec) + { + /* + decimal_round() allows from==to + So it's save even if m_ptr points to m_buffer before this call: + */ + return m_ptr ? m_ptr->to_string_round(to, dec, &m_buffer) : NULL; + } +}; + + +// A helper class to handle val_decimal() results. +class VDec: public Dec_ptr_and_buffer +{ +public: + VDec(): Dec_ptr_and_buffer() { } + VDec(Item *item); + void set(Item *a); +}; + + +// A helper class to handler decimal_op() results. +class VDec_op: public Dec_ptr_and_buffer +{ +public: + VDec_op(Item_func_hybrid_field_type *item); +}; + + +/* + Get and cache val_decimal() values for two items. + If the first value appears to be NULL, the second value is not evaluated. +*/ +class VDec2_lazy +{ +public: + VDec m_a; + VDec m_b; + VDec2_lazy(Item *a, Item *b) :m_a(a) + { + if (!m_a.is_null()) + m_b.set(b); + } + bool has_null() const + { + return m_a.is_null() || m_b.is_null(); + } +}; + + +/** + Class Sec6 represents a fixed point value with 6 fractional digits. + Used e.g. to convert double and my_decimal values to TIME/DATETIME. +*/ + +class Sec6 +{ +protected: + ulonglong m_sec; // The integer part, between 0 and LONGLONG_MAX + ulong m_usec; // The fractional part, between 0 and 999999 + bool m_neg; // false if positive, true of negative + bool m_truncated; // Indicates if the constructor truncated the value + void make_from_decimal(const my_decimal *d, ulong *nanoseconds); + void make_from_double(double d, ulong *nanoseconds); + void make_from_int(const Longlong_hybrid &nr) + { + m_neg= nr.neg(); + m_sec= nr.abs(); + m_usec= 0; + m_truncated= false; + } + void reset() + { + m_sec= m_usec= m_neg= m_truncated= 0; + } + Sec6() = default; + bool add_nanoseconds(uint nanoseconds) + { + DBUG_ASSERT(nanoseconds <= 1000000000); + if (nanoseconds < 500) + return false; + m_usec+= (nanoseconds + 500) / 1000; + if (m_usec < 1000000) + return false; + m_usec%= 1000000; + return true; + } +public: + explicit Sec6(double nr) + { + ulong nanoseconds; + make_from_double(nr, &nanoseconds); + } + explicit Sec6(const my_decimal *d) + { + ulong nanoseconds; + make_from_decimal(d, &nanoseconds); + } + explicit Sec6(const Longlong_hybrid &nr) + { + make_from_int(nr); + } + explicit Sec6(longlong nr, bool unsigned_val) + { + make_from_int(Longlong_hybrid(nr, unsigned_val)); + } + bool neg() const { return m_neg; } + bool truncated() const { return m_truncated; } + ulonglong sec() const { return m_sec; } + long usec() const { return m_usec; } + /** + Converts Sec6 to MYSQL_TIME + @param thd current thd + @param [out] warn conversion warnings will be written here + @param [out] ltime converted value will be written here + @param fuzzydate conversion flags (TIME_INVALID_DATE, etc) + @returns false for success, true for a failure + */ + bool convert_to_mysql_time(THD *thd, + int *warn, + MYSQL_TIME *ltime, + date_mode_t fuzzydate) const; + +protected: + + bool to_interval_hhmmssff_only(MYSQL_TIME *to, int *warn) const + { + return number_to_time_only(m_neg, m_sec, m_usec, + TIME_MAX_INTERVAL_HOUR, to, warn); + } + bool to_datetime_or_to_interval_hhmmssff(MYSQL_TIME *to, int *warn) const + { + /* + Convert a number to a time interval. + The following formats are understood: + - 0 <= x <= 999999995959 - parse as hhhhmmss + - 999999995959 < x <= 99991231235959 - parse as YYYYMMDDhhmmss + (YYMMDDhhmmss) (YYYYMMDDhhmmss) + + Note, these formats are NOT understood: + - YYMMDD - overlaps with INTERVAL range + - YYYYMMDD - overlaps with INTERVAL range + - YYMMDDhhmmss - overlaps with INTERVAL range, partially + (see TIME_MAX_INTERVAL_HOUR) + + If we ever need wider intervals, this code switching between + full datetime and interval-only should be rewised. + */ + DBUG_ASSERT(TIME_MAX_INTERVAL_HOUR <= 999999995959); + /* (YYMMDDhhmmss) */ + if (m_sec > 999999995959ULL && + m_sec <= 99991231235959ULL && m_neg == 0) + return to_datetime_or_date(to, warn, TIME_INVALID_DATES); + if (m_sec / 10000 > TIME_MAX_INTERVAL_HOUR) + { + *warn= MYSQL_TIME_WARN_OUT_OF_RANGE; + return true; + } + return to_interval_hhmmssff_only(to, warn); + } +public: + // [-][DD]hhhmmss.ff, YYMMDDhhmmss.ff, YYYYMMDDhhmmss.ff + bool to_datetime_or_time(MYSQL_TIME *to, int *warn, + date_conv_mode_t mode) const + { + bool rc= m_sec > 9999999 && m_sec <= 99991231235959ULL && !m_neg ? + ::number_to_datetime_or_date(m_sec, m_usec, to, + ulonglong(mode & TIME_MODE_FOR_XXX_TO_DATE), warn) < 0 : + ::number_to_time_only(m_neg, m_sec, m_usec, TIME_MAX_HOUR, to, warn); + DBUG_ASSERT(*warn || !rc); + return rc; + } + /* + Convert a number in formats YYYYMMDDhhmmss.ff or YYMMDDhhmmss.ff to + TIMESTAMP'YYYY-MM-DD hh:mm:ss.ff' + */ + bool to_datetime_or_date(MYSQL_TIME *to, int *warn, + date_conv_mode_t flags) const + { + if (m_neg) + { + *warn= MYSQL_TIME_WARN_OUT_OF_RANGE; + return true; + } + bool rc= number_to_datetime_or_date(m_sec, m_usec, to, + ulonglong(flags & TIME_MODE_FOR_XXX_TO_DATE), + warn) == -1; + DBUG_ASSERT(*warn || !rc); + return rc; + } + // Convert elapsed seconds to TIME + bool sec_to_time(MYSQL_TIME *ltime, uint dec) const + { + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + ltime->neg= m_neg; + if (m_sec > TIME_MAX_VALUE_SECONDS) + { + // use check_time_range() to set ltime to the max value depending on dec + int unused; + ltime->hour= TIME_MAX_HOUR + 1; + check_time_range(ltime, dec, &unused); + return true; + } + DBUG_ASSERT(usec() <= TIME_MAX_SECOND_PART); + ltime->hour= (uint) (m_sec / 3600); + ltime->minute= (uint) (m_sec % 3600) / 60; + ltime->second= (uint) m_sec % 60; + ltime->second_part= m_usec; + return false; + } + Sec6 &trunc(uint dec) + { + m_usec-= my_time_fraction_remainder(m_usec, dec); + return *this; + } + size_t to_string(char *to, size_t nbytes) const + { + return m_usec ? + my_snprintf(to, nbytes, "%s%llu.%06lu", + m_neg ? "-" : "", m_sec, (uint) m_usec) : + my_snprintf(to, nbytes, "%s%llu", m_neg ? "-" : "", m_sec); + } + void make_truncated_warning(THD *thd, const char *type_str) const; +}; + + +class Sec9: public Sec6 +{ +protected: + ulong m_nsec; // Nanoseconds 0..999 + void make_from_int(const Longlong_hybrid &nr) + { + Sec6::make_from_int(nr); + m_nsec= 0; + } + Sec9() = default; +public: + Sec9(const my_decimal *d) + { + Sec6::make_from_decimal(d, &m_nsec); + } + Sec9(double d) + { + Sec6::make_from_double(d, &m_nsec); + } + ulong nsec() const { return m_nsec; } + Sec9 &trunc(uint dec) + { + m_nsec= 0; + Sec6::trunc(dec); + return *this; + } + Sec9 &round(uint dec); + Sec9 &round(uint dec, time_round_mode_t mode) + { + return mode == TIME_FRAC_TRUNCATE ? trunc(dec) : round(dec); + } +}; + + +class VSec9: protected Sec9 +{ + bool m_is_null; + Sec9& to_sec9() + { + DBUG_ASSERT(!is_null()); + return *this; + } +public: + VSec9(THD *thd, Item *item, const char *type_str, ulonglong limit); + bool is_null() const { return m_is_null; } + const Sec9& to_const_sec9() const + { + DBUG_ASSERT(!is_null()); + return *this; + } + bool neg() const { return to_const_sec9().neg(); } + bool truncated() const { return to_const_sec9().truncated(); } + ulonglong sec() const { return to_const_sec9().sec(); } + long usec() const { return to_const_sec9().usec(); } + bool sec_to_time(MYSQL_TIME *ltime, uint dec) const + { + return to_const_sec9().sec_to_time(ltime, dec); + } + void make_truncated_warning(THD *thd, const char *type_str) const + { + return to_const_sec9().make_truncated_warning(thd, type_str); + } + Sec9 &round(uint dec) + { + return to_sec9().round(dec); + } + Sec9 &round(uint dec, time_round_mode_t mode) + { + return to_sec9().round(dec, mode); + } +}; + + +/* + A heler class to perform additive operations between + two MYSQL_TIME structures and return the result as a + combination of seconds, microseconds and sign. +*/ +class Sec6_add +{ + ulonglong m_sec; // number of seconds + ulong m_usec; // number of microseconds + bool m_neg; // false if positive, true if negative + bool m_error; // false if the value is OK, true otherwise + void to_hh24mmssff(MYSQL_TIME *ltime, timestamp_type tstype) const + { + bzero(ltime, sizeof(*ltime)); + ltime->neg= m_neg; + calc_time_from_sec(ltime, (ulong) (m_sec % SECONDS_IN_24H), m_usec); + ltime->time_type= tstype; + } +public: + /* + @param ltime1 - the first value to add (must be a valid DATE,TIME,DATETIME) + @param ltime2 - the second value to add (must be a valid TIME) + @param sign - the sign of the operation + (+1 for addition, -1 for subtraction) + */ + Sec6_add(const MYSQL_TIME *ltime1, const MYSQL_TIME *ltime2, int sign) + { + DBUG_ASSERT(sign == -1 || sign == 1); + DBUG_ASSERT(!ltime1->neg || ltime1->time_type == MYSQL_TIMESTAMP_TIME); + if (!(m_error= (ltime2->time_type != MYSQL_TIMESTAMP_TIME))) + { + if (ltime1->neg != ltime2->neg) + sign= -sign; + m_neg= calc_time_diff(ltime1, ltime2, -sign, &m_sec, &m_usec); + if (ltime1->neg && (m_sec || m_usec)) + m_neg= !m_neg; // Swap sign + } + } + bool to_time(THD *thd, MYSQL_TIME *ltime, uint decimals) const + { + if (m_error) + return true; + to_hh24mmssff(ltime, MYSQL_TIMESTAMP_TIME); + ltime->hour+= static_cast(to_days_abs() * 24); + return adjust_time_range_with_warn(thd, ltime, decimals); + } + bool to_datetime(MYSQL_TIME *ltime) const + { + if (m_error || m_neg) + return true; + to_hh24mmssff(ltime, MYSQL_TIMESTAMP_DATETIME); + return get_date_from_daynr(to_days_abs(), + <ime->year, <ime->month, <ime->day) || + !ltime->day; + } + long to_days_abs() const { return (long) (m_sec / SECONDS_IN_24H); } +}; + + +class Year +{ +protected: + uint m_year; + bool m_truncated; + uint year_precision(const Item *item) const; +public: + Year(): m_year(0), m_truncated(false) { } + Year(longlong value, bool unsigned_flag, uint length); + uint year() const { return m_year; } + uint to_YYYYMMDD() const { return m_year * 10000; } + bool truncated() const { return m_truncated; } +}; + + +class Year_null: public Year, public Null_flag +{ +public: + Year_null(const Longlong_null &nr, bool unsigned_flag, uint length) + :Year(nr.is_null() ? 0 : nr.value(), unsigned_flag, length), + Null_flag(nr.is_null()) + { } +}; + + +class VYear: public Year_null +{ +public: + VYear(Item *item); +}; + + +class VYear_op: public Year_null +{ +public: + VYear_op(Item_func_hybrid_field_type *item); +}; + + +class Double_null: public Null_flag +{ +protected: + double m_value; +public: + Double_null(double value, bool is_null) + :Null_flag(is_null), m_value(value) + { } + double value() const { return m_value; } +}; + + +class Temporal: protected MYSQL_TIME +{ +public: + class Status: public MYSQL_TIME_STATUS + { + public: + Status() { my_time_status_init(this); } + }; + + class Warn: public ErrBuff, + public Status + { + public: + void push_conversion_warnings(THD *thd, bool totally_useless_value, + date_mode_t mode, timestamp_type tstype, + const char *db_name, const char *table_name, + const char *name) + { + const char *typestr= tstype >= 0 ? type_name_by_timestamp_type(tstype) : + mode & (TIME_INTERVAL_hhmmssff | TIME_INTERVAL_DAY) ? + "interval" : + mode & TIME_TIME_ONLY ? "time" : "datetime"; + Temporal::push_conversion_warnings(thd, totally_useless_value, warnings, + typestr, db_name, table_name, name, + ptr()); + } + }; + + class Warn_push: public Warn + { + THD * const m_thd; + const char * const m_db_name; + const char * const m_table_name; + const char * const m_name; + const MYSQL_TIME * const m_ltime; + const date_mode_t m_mode; + public: + Warn_push(THD *thd, const char *db_name, const char *table_name, + const char *name, const MYSQL_TIME *ltime, date_mode_t mode) + : m_thd(thd), m_db_name(db_name), m_table_name(table_name), m_name(name), + m_ltime(ltime), m_mode(mode) + { } + ~Warn_push() + { + if (warnings) + push_conversion_warnings(m_thd, m_ltime->time_type < 0, + m_mode, m_ltime->time_type, + m_db_name, m_table_name, m_name); + } + }; + +public: + static date_conv_mode_t sql_mode_for_dates(THD *thd); + static time_round_mode_t default_round_mode(THD *thd); + class Options: public date_mode_t + { + public: + explicit Options(date_mode_t flags) + :date_mode_t(flags) + { } + Options(date_conv_mode_t flags, time_round_mode_t round_mode) + :date_mode_t(flags | round_mode) + { + DBUG_ASSERT(ulonglong(flags) <= UINT_MAX32); + } + Options(date_conv_mode_t flags, THD *thd) + :Options(flags, default_round_mode(thd)) + { } + }; + + bool is_valid_temporal() const + { + DBUG_ASSERT(time_type != MYSQL_TIMESTAMP_ERROR); + return time_type != MYSQL_TIMESTAMP_NONE; + } + static const char *type_name_by_timestamp_type(timestamp_type time_type) + { + switch (time_type) { + case MYSQL_TIMESTAMP_DATE: return "date"; + case MYSQL_TIMESTAMP_TIME: return "time"; + case MYSQL_TIMESTAMP_DATETIME: // FALLTHROUGH + default: + break; + } + return "datetime"; + } + static void push_conversion_warnings(THD *thd, bool totally_useless_value, int warn, + const char *type_name, + const char *db_name, + const char *table_name, + const char *field_name, + const char *value); + /* + This method is used if the item was not null but convertion to + TIME/DATE/DATETIME failed. We return a zero date if allowed, + otherwise - null. + */ + void make_fuzzy_date(int *warn, date_conv_mode_t fuzzydate) + { + /* + In the following scenario: + - The caller expected to get a TIME value + - Item returned a not NULL string or numeric value + - But then conversion from string or number to TIME failed + we need to change the default time_type from MYSQL_TIMESTAMP_DATE + (which was set in bzero) to MYSQL_TIMESTAMP_TIME and therefore + return TIME'00:00:00' rather than DATE'0000-00-00'. + If we don't do this, methods like Item::get_time_with_conversion() + will erroneously subtract CURRENT_DATE from '0000-00-00 00:00:00' + and return TIME'-838:59:59' instead of TIME'00:00:00' as a result. + */ + timestamp_type tstype= !(fuzzydate & TIME_FUZZY_DATES) ? + MYSQL_TIMESTAMP_NONE : + fuzzydate & TIME_TIME_ONLY ? + MYSQL_TIMESTAMP_TIME : + MYSQL_TIMESTAMP_DATETIME; + set_zero_time(this, tstype); + } + +protected: + my_decimal *bad_to_decimal(my_decimal *to) const; + my_decimal *to_decimal(my_decimal *to) const; + static double to_double(bool negate, ulonglong num, ulong frac) + { + double d= static_cast(num) + static_cast(frac) / + TIME_SECOND_PART_FACTOR; + return negate ? -d : d; + } + longlong to_packed() const { return ::pack_time(this); } + void make_from_out_of_range(int *warn) + { + *warn= MYSQL_TIME_WARN_OUT_OF_RANGE; + time_type= MYSQL_TIMESTAMP_NONE; + } + void make_from_sec6(THD *thd, MYSQL_TIME_STATUS *st, + const Sec6 &nr, date_mode_t mode) + { + if (nr.convert_to_mysql_time(thd, &st->warnings, this, mode)) + make_fuzzy_date(&st->warnings, date_conv_mode_t(mode)); + } + void make_from_sec9(THD *thd, MYSQL_TIME_STATUS *st, + const Sec9 &nr, date_mode_t mode) + { + if (nr.convert_to_mysql_time(thd, &st->warnings, this, mode) || + add_nanoseconds(thd, &st->warnings, mode, nr.nsec())) + make_fuzzy_date(&st->warnings, date_conv_mode_t(mode)); + } + void make_from_str(THD *thd, Warn *warn, + const char *str, size_t length, CHARSET_INFO *cs, + date_mode_t fuzzydate); + void make_from_double(THD *thd, Warn *warn, double nr, date_mode_t mode) + { + make_from_sec9(thd, warn, Sec9(nr), mode); + if (warn->warnings) + warn->set_double(nr); + } + void make_from_longlong_hybrid(THD *thd, Warn *warn, + const Longlong_hybrid &nr, date_mode_t mode) + { + /* + Note: conversion from an integer to TIME can overflow to + '838:59:59.999999', so the conversion result can have fractional digits. + */ + make_from_sec6(thd, warn, Sec6(nr), mode); + if (warn->warnings) + warn->set_longlong(nr); + } + void make_from_decimal(THD *thd, Warn *warn, + const my_decimal *nr, date_mode_t mode) + { + make_from_sec9(thd, warn, Sec9(nr), mode); + if (warn->warnings) + warn->set_decimal(nr); + } + bool ascii_to_temporal(MYSQL_TIME_STATUS *st, + const char *str, size_t length, + date_mode_t mode) + { + if (mode & (TIME_INTERVAL_hhmmssff | TIME_INTERVAL_DAY)) + return ascii_to_datetime_or_date_or_interval_DDhhmmssff(st, str, length, + mode); + if (mode & TIME_TIME_ONLY) + return ascii_to_datetime_or_date_or_time(st, str, length, mode); + return ascii_to_datetime_or_date(st, str, length, mode); + } + bool ascii_to_datetime_or_date_or_interval_DDhhmmssff(MYSQL_TIME_STATUS *st, + const char *str, + size_t length, + date_mode_t mode) + { + longlong cflags= ulonglong(mode & TIME_MODE_FOR_XXX_TO_DATE); + bool rc= mode & TIME_INTERVAL_DAY ? + ::str_to_datetime_or_date_or_interval_day(str, length, this, cflags, st, + TIME_MAX_INTERVAL_HOUR, + TIME_MAX_INTERVAL_HOUR) : + ::str_to_datetime_or_date_or_interval_hhmmssff(str, length, this, + cflags, st, + TIME_MAX_INTERVAL_HOUR, + TIME_MAX_INTERVAL_HOUR); + DBUG_ASSERT(!rc || st->warnings); + return rc; + } + bool ascii_to_datetime_or_date_or_time(MYSQL_TIME_STATUS *status, + const char *str, size_t length, + date_mode_t fuzzydate) + { + ulonglong cflags= ulonglong(fuzzydate & TIME_MODE_FOR_XXX_TO_DATE); + bool rc= ::str_to_datetime_or_date_or_time(str, length, this, + cflags, status, + TIME_MAX_HOUR, UINT_MAX32); + DBUG_ASSERT(!rc || status->warnings); + return rc; + } + bool ascii_to_datetime_or_date(MYSQL_TIME_STATUS *status, + const char *str, size_t length, + date_mode_t fuzzydate) + { + DBUG_ASSERT(bool(fuzzydate & TIME_TIME_ONLY) == false); + bool rc= ::str_to_datetime_or_date(str, length, this, + ulonglong(fuzzydate & TIME_MODE_FOR_XXX_TO_DATE), + status); + DBUG_ASSERT(!rc || status->warnings); + return rc; + } + // Character set aware versions for string conversion routines + bool str_to_temporal(THD *thd, MYSQL_TIME_STATUS *st, + const char *str, size_t length, + CHARSET_INFO *cs, date_mode_t fuzzydate); + bool str_to_datetime_or_date_or_time(THD *thd, MYSQL_TIME_STATUS *st, + const char *str, size_t length, + CHARSET_INFO *cs, date_mode_t mode); + bool str_to_datetime_or_date(THD *thd, MYSQL_TIME_STATUS *st, + const char *str, size_t length, + CHARSET_INFO *cs, date_mode_t mode); + + bool has_valid_mmssff() const + { + return minute <= TIME_MAX_MINUTE && + second <= TIME_MAX_SECOND && + second_part <= TIME_MAX_SECOND_PART; + } + bool has_zero_YYYYMM() const + { + return year == 0 && month == 0; + } + bool has_zero_YYYYMMDD() const + { + return year == 0 && month == 0 && day == 0; + } + bool check_date(date_conv_mode_t flags, int *warn) const + { + return ::check_date(this, flags, warn); + } + void time_hhmmssff_set_max(uint max_hour) + { + hour= max_hour; + minute= TIME_MAX_MINUTE; + second= TIME_MAX_SECOND; + second_part= TIME_MAX_SECOND_PART; + } + /* + Add nanoseconds to ssff + retval true if seconds overflowed (the caller should increment minutes) + false if no overflow happened + */ + bool add_nanoseconds_ssff(uint nanoseconds) + { + DBUG_ASSERT(nanoseconds <= 1000000000); + if (nanoseconds < 500) + return false; + second_part+= (nanoseconds + 500) / 1000; + if (second_part < 1000000) + return false; + second_part%= 1000000; + if (second < 59) + { + second++; + return false; + } + second= 0; + return true; + } + /* + Add nanoseconds to mmssff + retval true if hours overflowed (the caller should increment hours) + false if no overflow happened + */ + bool add_nanoseconds_mmssff(uint nanoseconds) + { + if (!add_nanoseconds_ssff(nanoseconds)) + return false; + if (minute < 59) + { + minute++; + return false; + } + minute= 0; + return true; + } + void time_round_or_set_max(uint dec, int *warn, ulong max_hour, ulong nsec); + bool datetime_add_nanoseconds_or_invalidate(THD *thd, int *warn, ulong nsec); + bool datetime_round_or_invalidate(THD *thd, uint dec, int *warn, ulong nsec); + bool add_nanoseconds_with_round(THD *thd, int *warn, + date_conv_mode_t mode, ulong nsec); + bool add_nanoseconds(THD *thd, int *warn, date_mode_t mode, ulong nsec) + { + date_conv_mode_t cmode= date_conv_mode_t(mode); + return time_round_mode_t(mode) == TIME_FRAC_ROUND ? + add_nanoseconds_with_round(thd, warn, cmode, nsec) : false; + } +public: + static void *operator new(size_t size, MYSQL_TIME *ltime) throw() + { + DBUG_ASSERT(size == sizeof(MYSQL_TIME)); + return ltime; + } + static void operator delete(void *ptr, MYSQL_TIME *ltime) { } + + long fraction_remainder(uint dec) const + { + return my_time_fraction_remainder(second_part, dec); + } +}; + + +/* + Use this class when you need to get a MYSQL_TIME from an Item + using Item's native timestamp type, without automatic timestamp + type conversion. +*/ +class Temporal_hybrid: public Temporal +{ +public: + class Options: public Temporal::Options + { + public: + Options(THD *thd) + :Temporal::Options(sql_mode_for_dates(thd), default_round_mode(thd)) + { } + Options(date_conv_mode_t flags, time_round_mode_t round_mode) + :Temporal::Options(flags, round_mode) + { } + explicit Options(const Temporal::Options &opt) + :Temporal::Options(opt) + { } + explicit Options(date_mode_t fuzzydate) + :Temporal::Options(fuzzydate) + { } + }; + +public: + // Contructors for Item + Temporal_hybrid(THD *thd, Item *item, date_mode_t fuzzydate); + Temporal_hybrid(THD *thd, Item *item) + :Temporal_hybrid(thd, item, Options(thd)) + { } + Temporal_hybrid(Item *item) + :Temporal_hybrid(current_thd, item) + { } + + // Constructors for non-NULL values + Temporal_hybrid(THD *thd, Warn *warn, + const char *str, size_t length, CHARSET_INFO *cs, + date_mode_t fuzzydate) + { + make_from_str(thd, warn, str, length, cs, fuzzydate); + } + Temporal_hybrid(THD *thd, Warn *warn, + const Longlong_hybrid &nr, date_mode_t fuzzydate) + { + make_from_longlong_hybrid(thd, warn, nr, fuzzydate); + } + Temporal_hybrid(THD *thd, Warn *warn, double nr, date_mode_t fuzzydate) + { + make_from_double(thd, warn, nr, fuzzydate); + } + + // Constructors for nullable values + Temporal_hybrid(THD *thd, Warn *warn, const String *str, date_mode_t mode) + { + if (!str) + time_type= MYSQL_TIMESTAMP_NONE; + else + make_from_str(thd, warn, str->ptr(), str->length(), str->charset(), mode); + } + Temporal_hybrid(THD *thd, Warn *warn, + const Longlong_hybrid_null &nr, date_mode_t fuzzydate) + { + if (nr.is_null()) + time_type= MYSQL_TIMESTAMP_NONE; + else + make_from_longlong_hybrid(thd, warn, nr, fuzzydate); + } + Temporal_hybrid(THD *thd, Warn *warn, const Double_null &nr, date_mode_t mode) + { + if (nr.is_null()) + time_type= MYSQL_TIMESTAMP_NONE; + else + make_from_double(thd, warn, nr.value(), mode); + } + Temporal_hybrid(THD *thd, Warn *warn, const my_decimal *nr, date_mode_t mode) + { + if (!nr) + time_type= MYSQL_TIMESTAMP_NONE; + else + make_from_decimal(thd, warn, nr, mode); + } + // End of constuctors + + bool copy_valid_value_to_mysql_time(MYSQL_TIME *ltime) const + { + DBUG_ASSERT(is_valid_temporal()); + *ltime= *this; + return false; + } + + longlong to_longlong() const + { + if (!is_valid_temporal()) + return 0; + ulonglong v= TIME_to_ulonglong(this); + return neg ? -(longlong) v : (longlong) v; + } + double to_double() const + { + return is_valid_temporal() ? TIME_to_double(this) : 0; + } + my_decimal *to_decimal(my_decimal *to) + { + return is_valid_temporal() ? Temporal::to_decimal(to) : bad_to_decimal(to); + } + String *to_string(String *str, uint dec) const + { + if (!is_valid_temporal()) + return NULL; + str->set_charset(&my_charset_numeric); + if (!str->alloc(MAX_DATE_STRING_REP_LENGTH)) + str->length(my_TIME_to_str(this, const_cast(str->ptr()), dec)); + return str; + } + const MYSQL_TIME *get_mysql_time() const + { + DBUG_ASSERT(is_valid_temporal()); + return this; + } +}; + + +/* + This class resembles the SQL standard , + used in extract expressions, e.g: EXTRACT(DAY FROM dt) + ::= + EXTRACT FROM + ::= | +*/ +class Extract_source: public Temporal_hybrid +{ + /* + Convert a TIME value to DAY-TIME interval, e.g. for extraction: + EXTRACT(DAY FROM x), EXTRACT(HOUR FROM x), etc. + Moves full days from ltime->hour to ltime->day. + */ + void time_to_daytime_interval() + { + DBUG_ASSERT(time_type == MYSQL_TIMESTAMP_TIME); + DBUG_ASSERT(has_zero_YYYYMMDD()); + MYSQL_TIME::day= MYSQL_TIME::hour / 24; + MYSQL_TIME::hour%= 24; + } + bool is_valid_extract_source_slow() const + { + return is_valid_temporal() && MYSQL_TIME::hour < 24 && + (has_zero_YYYYMM() || time_type != MYSQL_TIMESTAMP_TIME); + } + bool is_valid_value_slow() const + { + return time_type == MYSQL_TIMESTAMP_NONE || is_valid_extract_source_slow(); + } +public: + Extract_source(THD *thd, Item *item, date_mode_t mode) + :Temporal_hybrid(thd, item, mode) + { + if (MYSQL_TIME::time_type == MYSQL_TIMESTAMP_TIME) + time_to_daytime_interval(); + DBUG_ASSERT(is_valid_value_slow()); + } + inline const MYSQL_TIME *get_mysql_time() const + { + DBUG_ASSERT(is_valid_extract_source_slow()); + return this; + } + bool is_valid_extract_source() const { return is_valid_temporal(); } + int sign() const { return get_mysql_time()->neg ? -1 : 1; } + uint year() const { return get_mysql_time()->year; } + uint month() const { return get_mysql_time()->month; } + int day() const { return (int) get_mysql_time()->day * sign(); } + int hour() const { return (int) get_mysql_time()->hour * sign(); } + int minute() const { return (int) get_mysql_time()->minute * sign(); } + int second() const { return (int) get_mysql_time()->second * sign(); } + int microsecond() const { return (int) get_mysql_time()->second_part * sign(); } + + uint year_month() const { return year() * 100 + month(); } + uint quarter() const { return (month() + 2)/3; } + uint week(THD *thd) const; + + longlong second_microsecond() const + { + return (second() * 1000000LL + microsecond()); + } + + // DAY TO XXX + longlong day_hour() const + { + return (longlong) day() * 100LL + hour(); + } + longlong day_minute() const + { + return day_hour() * 100LL + minute(); + } + longlong day_second() const + { + return day_minute() * 100LL + second(); + } + longlong day_microsecond() const + { + return day_second() * 1000000LL + microsecond(); + } + + // HOUR TO XXX + int hour_minute() const + { + return hour() * 100 + minute(); + } + int hour_second() const + { + return hour_minute() * 100 + second(); + } + longlong hour_microsecond() const + { + return hour_second() * 1000000LL + microsecond(); + } + + // MINUTE TO XXX + int minute_second() const + { + return minute() * 100 + second(); + } + longlong minute_microsecond() const + { + return minute_second() * 1000000LL + microsecond(); + } +}; + + +/* + This class is used for the "time_interval" argument of these SQL functions: + TIMESTAMP(tm,time_interval) + ADDTIME(tm,time_interval) + Features: + - DATE and DATETIME formats are treated as errors + - Preserves hours for TIME format as is, without limiting to TIME_MAX_HOUR +*/ +class Interval_DDhhmmssff: public Temporal +{ + static const LEX_CSTRING m_type_name; + bool str_to_DDhhmmssff(MYSQL_TIME_STATUS *status, + const char *str, size_t length, CHARSET_INFO *cs, + ulong max_hour); + void push_warning_wrong_or_truncated_value(THD *thd, + const ErrConv &str, + int warnings); + bool is_valid_interval_DDhhmmssff_slow() const + { + return time_type == MYSQL_TIMESTAMP_TIME && + has_zero_YYYYMMDD() && has_valid_mmssff(); + } + bool is_valid_value_slow() const + { + return time_type == MYSQL_TIMESTAMP_NONE || + is_valid_interval_DDhhmmssff_slow(); + } +public: + // Get fractional second precision from an Item + static uint fsp(THD *thd, Item *item); + /* + Maximum useful HOUR value: + TIMESTAMP'0001-01-01 00:00:00' + '87649415:59:59' = '9999-12-31 23:59:59' + This gives maximum possible interval values: + - '87649415:59:59.999999' (in 'hh:mm:ss.ff' format) + - '3652058 23:59:59.999999' (in 'DD hh:mm:ss.ff' format) + */ + static uint max_useful_hour() + { + return TIME_MAX_INTERVAL_HOUR; + } + static uint max_int_part_char_length() + { + // e.g. '+3652058 23:59:59' + return 1/*sign*/ + TIME_MAX_INTERVAL_DAY_CHAR_LENGTH + 1 + 8/*hh:mm:ss*/; + } + static uint max_char_length(uint fsp) + { + DBUG_ASSERT(fsp <= TIME_SECOND_PART_DIGITS); + return max_int_part_char_length() + (fsp ? 1 : 0) + fsp; + } + +public: + Interval_DDhhmmssff(THD *thd, Status *st, bool push_warnings, + Item *item, ulong max_hour, + time_round_mode_t mode, uint dec); + Interval_DDhhmmssff(THD *thd, Item *item, uint dec) + { + Status st; + new(this) Interval_DDhhmmssff(thd, &st, true, item, max_useful_hour(), + default_round_mode(thd), dec); + } + Interval_DDhhmmssff(THD *thd, Item *item) + :Interval_DDhhmmssff(thd, item, TIME_SECOND_PART_DIGITS) + { } + const MYSQL_TIME *get_mysql_time() const + { + DBUG_ASSERT(is_valid_interval_DDhhmmssff_slow()); + return this; + } + bool is_valid_interval_DDhhmmssff() const + { + return time_type == MYSQL_TIMESTAMP_TIME; + } + bool is_valid_value() const + { + return time_type == MYSQL_TIMESTAMP_NONE || is_valid_interval_DDhhmmssff(); + } + String *to_string(String *str, uint dec) const + { + if (!is_valid_interval_DDhhmmssff()) + return NULL; + str->set_charset(&my_charset_numeric); + if (!str->alloc(MAX_DATE_STRING_REP_LENGTH)) + str->length(my_interval_DDhhmmssff_to_str(this, + const_cast(str->ptr()), + dec)); + return str; + } +}; + +class Schema; + + +/** + Class Time is designed to store valid TIME values. + + 1. Valid value: + a. MYSQL_TIMESTAMP_TIME - a valid TIME within the supported TIME range + b. MYSQL_TIMESTAMP_NONE - an undefined value + + 2. Invalid value (internally only): + a. MYSQL_TIMESTAMP_TIME outside of the supported TIME range + a. MYSQL_TIMESTAMP_{DATE|DATETIME|ERROR} + + Temporarily Time is allowed to have an invalid value, but only internally, + during initialization time. All constructors and modification methods must + leave the Time value as described above (see "Valid values"). + + Time derives from MYSQL_TIME privately to make sure it is accessed + externally only in the valid state. +*/ +class Time: public Temporal +{ + static uint binary_length_to_precision(uint length); +public: + enum datetime_to_time_mode_t + { + DATETIME_TO_TIME_DISALLOW, + DATETIME_TO_TIME_YYYYMMDD_000000DD_MIX_TO_HOURS, + DATETIME_TO_TIME_YYYYMMDD_TRUNCATE, + DATETIME_TO_TIME_YYYYMMDD_00000000_ONLY, + DATETIME_TO_TIME_MINUS_CURRENT_DATE + }; + class Options: public Temporal::Options + { + datetime_to_time_mode_t m_datetime_to_time_mode; + public: + Options(THD *thd) + :Temporal::Options(default_flags_for_get_date(), default_round_mode(thd)), + m_datetime_to_time_mode(default_datetime_to_time_mode()) + { } + Options(date_conv_mode_t flags, THD *thd) + :Temporal::Options(flags, default_round_mode(thd)), + m_datetime_to_time_mode(default_datetime_to_time_mode()) + { } + Options(date_conv_mode_t flags, THD *thd, datetime_to_time_mode_t dtmode) + :Temporal::Options(flags, default_round_mode(thd)), + m_datetime_to_time_mode(dtmode) + { } + Options(date_conv_mode_t fuzzydate, time_round_mode_t round_mode, + datetime_to_time_mode_t datetime_to_time_mode) + :Temporal::Options(fuzzydate, round_mode), + m_datetime_to_time_mode(datetime_to_time_mode) + { } + + datetime_to_time_mode_t datetime_to_time_mode() const + { return m_datetime_to_time_mode; } + + static datetime_to_time_mode_t default_datetime_to_time_mode() + { + return DATETIME_TO_TIME_YYYYMMDD_000000DD_MIX_TO_HOURS; + } + }; + /* + CAST(AS TIME) historically does not mix days to hours. + This is different comparing to how implicit conversion + in Field::store_time_dec() works (e.g. on INSERT). + */ + class Options_for_cast: public Options + { + public: + Options_for_cast(THD *thd) + :Options(default_flags_for_get_date(), default_round_mode(thd), + DATETIME_TO_TIME_YYYYMMDD_TRUNCATE) + { } + Options_for_cast(date_mode_t mode, THD *thd) + :Options(default_flags_for_get_date() | (mode & TIME_FUZZY_DATES), + default_round_mode(thd), + DATETIME_TO_TIME_YYYYMMDD_TRUNCATE) + { } + }; + + class Options_for_round: public Options + { + public: + Options_for_round(time_round_mode_t round_mode= TIME_FRAC_TRUNCATE) + :Options(Time::default_flags_for_get_date(), round_mode, + Time::DATETIME_TO_TIME_DISALLOW) + { } + }; + class Options_cmp: public Options + { + public: + Options_cmp(THD *thd) + :Options(comparison_flags_for_get_date(), thd) + { } + Options_cmp(THD *thd, datetime_to_time_mode_t dtmode) + :Options(comparison_flags_for_get_date(), + default_round_mode(thd), dtmode) + { } + }; +private: + bool is_valid_value_slow() const + { + return time_type == MYSQL_TIMESTAMP_NONE || is_valid_time_slow(); + } + bool is_valid_time_slow() const + { + return time_type == MYSQL_TIMESTAMP_TIME && + has_zero_YYYYMMDD() && has_valid_mmssff(); + } + void hhmmssff_copy(const MYSQL_TIME *from) + { + hour= from->hour; + minute= from->minute; + second= from->second; + second_part= from->second_part; + } + void datetime_to_time_YYYYMMDD_000000DD_mix_to_hours(int *warn, + uint from_year, + uint from_month, + uint from_day) + { + if (from_year != 0 || from_month != 0) + *warn|= MYSQL_TIME_NOTE_TRUNCATED; + else + hour+= from_day * 24; + } + /* + The result is calculated effectively similar to: + TIMEDIFF(dt, CAST(CURRENT_DATE AS DATETIME)) + If the difference does not fit to the supported TIME range, it's truncated. + */ + void datetime_to_time_minus_current_date(THD *thd) + { + MYSQL_TIME current_date, tmp; + set_current_date(thd, ¤t_date); + calc_time_diff(this, ¤t_date, 1, &tmp, date_mode_t(0)); + static_cast(this)[0]= tmp; + int warnings= 0; + (void) check_time_range(this, TIME_SECOND_PART_DIGITS, &warnings); + DBUG_ASSERT(is_valid_time()); + } + /* + Convert a valid DATE or DATETIME to TIME. + Before this call, "this" must be a valid DATE or DATETIME value, + e.g. returned from Item::get_date(), str_to_xxx(), number_to_xxx(). + After this call, "this" is a valid TIME value. + */ + void valid_datetime_to_valid_time(THD *thd, int *warn, const Options opt) + { + DBUG_ASSERT(time_type == MYSQL_TIMESTAMP_DATE || + time_type == MYSQL_TIMESTAMP_DATETIME); + /* + We're dealing with a DATE or DATETIME returned from + str_to_xxx(), number_to_xxx() or unpack_time(). + Do some asserts to make sure the result hour value + after mixing days to hours does not go out of the valid TIME range. + The maximum hour value after mixing days will be 31*24+23=767, + which is within the supported TIME range. + Thus no adjust_time_range_or_invalidate() is needed here. + */ + DBUG_ASSERT(day < 32); + DBUG_ASSERT(hour < 24); + if (opt.datetime_to_time_mode() == DATETIME_TO_TIME_MINUS_CURRENT_DATE) + { + datetime_to_time_minus_current_date(thd); + } + else + { + if (opt.datetime_to_time_mode() == + DATETIME_TO_TIME_YYYYMMDD_000000DD_MIX_TO_HOURS) + datetime_to_time_YYYYMMDD_000000DD_mix_to_hours(warn, year, month, day); + year= month= day= 0; + time_type= MYSQL_TIMESTAMP_TIME; + } + DBUG_ASSERT(is_valid_time_slow()); + } + /** + Convert valid DATE/DATETIME to valid TIME if needed. + This method is called after Item::get_date(), + str_to_xxx(), number_to_xxx(). + which can return only valid TIME/DATE/DATETIME values. + Before this call, "this" is: + - either a valid TIME/DATE/DATETIME value + (within the supported range for the corresponding type), + - or MYSQL_TIMESTAMP_NONE + After this call, "this" is: + - either a valid TIME (within the supported TIME range), + - or MYSQL_TIMESTAMP_NONE + */ + void valid_MYSQL_TIME_to_valid_value(THD *thd, int *warn, const Options opt) + { + switch (time_type) { + case MYSQL_TIMESTAMP_DATE: + case MYSQL_TIMESTAMP_DATETIME: + if (opt.datetime_to_time_mode() == + DATETIME_TO_TIME_YYYYMMDD_00000000_ONLY && + (year || month || day)) + make_from_out_of_range(warn); + else if (opt.datetime_to_time_mode() == DATETIME_TO_TIME_DISALLOW) + make_from_out_of_range(warn); + else + valid_datetime_to_valid_time(thd, warn, opt); + break; + case MYSQL_TIMESTAMP_NONE: + break; + case MYSQL_TIMESTAMP_ERROR: + set_zero_time(this, MYSQL_TIMESTAMP_TIME); + break; + case MYSQL_TIMESTAMP_TIME: + DBUG_ASSERT(is_valid_time_slow()); + break; + } + } + + /* + This method is called after number_to_xxx() and str_to_xxx(), + which can return DATE or DATETIME values. Convert to TIME if needed. + We trust that xxx_to_time() returns a valid TIME/DATE/DATETIME value, + so here we need to do only simple validation. + */ + void xxx_to_time_result_to_valid_value(THD *thd, int *warn, const Options opt) + { + // str_to_xxx(), number_to_xxx() never return MYSQL_TIMESTAMP_ERROR + DBUG_ASSERT(time_type != MYSQL_TIMESTAMP_ERROR); + valid_MYSQL_TIME_to_valid_value(thd, warn, opt); + } + void adjust_time_range_or_invalidate(int *warn) + { + if (check_time_range(this, TIME_SECOND_PART_DIGITS, warn)) + time_type= MYSQL_TIMESTAMP_NONE; + DBUG_ASSERT(is_valid_value_slow()); + } +public: + void round_or_set_max(uint dec, int *warn, ulong nsec); +private: + void round_or_set_max(uint dec, int *warn); + + /* + All make_from_xxx() methods initialize *warn. + The old value gets lost. + */ + void make_from_datetime_move_day_to_hour(int *warn, const MYSQL_TIME *from); + void make_from_datetime_with_days_diff(int *warn, const MYSQL_TIME *from, + long curdays); + void make_from_time(int *warn, const MYSQL_TIME *from); + void make_from_datetime(int *warn, const MYSQL_TIME *from, long curdays); + void make_from_item(THD *thd, int *warn, Item *item, const Options opt); +public: + /* + All constructors that accept an "int *warn" parameter initialize *warn. + The old value gets lost. + */ + Time(int *warn, bool neg, ulonglong hour, uint minute, const Sec6 &second); + Time() { time_type= MYSQL_TIMESTAMP_NONE; } + Time(const Native &native); + Time(THD *thd, const MYSQL_TIME *ltime, const Options opt) + { + *(static_cast(this))= *ltime; + DBUG_ASSERT(is_valid_temporal()); + int warn= 0; + valid_MYSQL_TIME_to_valid_value(thd, &warn, opt); + } + Time(Item *item) + :Time(current_thd, item) + { } + Time(THD *thd, Item *item, const Options opt) + { + int warn; + make_from_item(thd, &warn, item, opt); + } + Time(THD *thd, Item *item) + :Time(thd, item, Options(thd)) + { } + Time(int *warn, const MYSQL_TIME *from, long curdays); + Time(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t len, CHARSET_INFO *cs, + const Options opt) + { + if (str_to_datetime_or_date_or_time(thd, status, str, len, cs, opt)) + time_type= MYSQL_TIMESTAMP_NONE; + // The below call will optionally add notes to already collected warnings: + else + xxx_to_time_result_to_valid_value(thd, &status->warnings, opt); + } + +protected: + Time(THD *thd, int *warn, const Sec6 &nr, const Options opt) + { + if (nr.to_datetime_or_time(this, warn, TIME_INVALID_DATES)) + time_type= MYSQL_TIMESTAMP_NONE; + xxx_to_time_result_to_valid_value(thd, warn, opt); + } + Time(THD *thd, int *warn, const Sec9 &nr, const Options &opt) + :Time(thd, warn, static_cast(nr), opt) + { + if (is_valid_time() && time_round_mode_t(opt) == TIME_FRAC_ROUND) + round_or_set_max(6, warn, nr.nsec()); + } + +public: + Time(THD *thd, int *warn, const Longlong_hybrid &nr, const Options &opt) + :Time(thd, warn, Sec6(nr), opt) + { } + Time(THD *thd, int *warn, double nr, const Options &opt) + :Time(thd, warn, Sec9(nr), opt) + { } + Time(THD *thd, int *warn, const my_decimal *d, const Options &opt) + :Time(thd, warn, Sec9(d), opt) + { } + + Time(THD *thd, Item *item, const Options opt, uint dec) + :Time(thd, item, opt) + { + round(dec, time_round_mode_t(opt)); + } + Time(int *warn, const MYSQL_TIME *from, long curdays, + const Time::Options &opt, uint dec) + :Time(warn, from, curdays) + { + round(dec, time_round_mode_t(opt), warn); + } + Time(int *warn, bool neg, ulonglong hour, uint minute, const Sec9 &second, + time_round_mode_t mode, uint dec) + :Time(warn, neg, hour, minute, second) + { + DBUG_ASSERT(is_valid_time()); + if ((ulonglong) mode == (ulonglong) TIME_FRAC_ROUND) + round_or_set_max(6, warn, second.nsec()); + round(dec, mode, warn); + } + Time(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t len, CHARSET_INFO *cs, + const Options &opt, uint dec) + :Time(thd, status, str, len, cs, opt) + { + round(dec, time_round_mode_t(opt), &status->warnings); + } + Time(THD *thd, int *warn, const Longlong_hybrid &nr, + const Options &opt, uint dec) + :Time(thd, warn, nr, opt) + { + /* + Decimal digit truncation is needed here in case if nr was out + of the supported TIME range, so "this" was set to '838:59:59.999999'. + We always do truncation (not rounding) here, independently from "opt". + */ + trunc(dec); + } + Time(THD *thd, int *warn, double nr, const Options &opt, uint dec) + :Time(thd, warn, nr, opt) + { + round(dec, time_round_mode_t(opt), warn); + } + Time(THD *thd, int *warn, const my_decimal *d, const Options &opt, uint dec) + :Time(thd, warn, d, opt) + { + round(dec, time_round_mode_t(opt), warn); + } + + static date_conv_mode_t default_flags_for_get_date() + { return TIME_TIME_ONLY | TIME_INVALID_DATES; } + static date_conv_mode_t comparison_flags_for_get_date() + { return TIME_TIME_ONLY | TIME_INVALID_DATES | TIME_FUZZY_DATES; } + bool is_valid_time() const + { + DBUG_ASSERT(is_valid_value_slow()); + return time_type == MYSQL_TIMESTAMP_TIME; + } + const MYSQL_TIME *get_mysql_time() const + { + DBUG_ASSERT(is_valid_time_slow()); + return this; + } + bool copy_to_mysql_time(MYSQL_TIME *ltime) const + { + if (time_type == MYSQL_TIMESTAMP_NONE) + { + ltime->time_type= MYSQL_TIMESTAMP_NONE; + return true; + } + DBUG_ASSERT(is_valid_time_slow()); + *ltime= *this; + return false; + } + int cmp(const Time *other) const + { + DBUG_ASSERT(is_valid_time_slow()); + DBUG_ASSERT(other->is_valid_time_slow()); + longlong p0= to_packed(); + longlong p1= other->to_packed(); + if (p0 < p1) + return -1; + if (p0 > p1) + return 1; + return 0; + } + longlong to_seconds_abs() const + { + DBUG_ASSERT(is_valid_time_slow()); + return hour * 3600L + minute * 60 + second; + } + longlong to_seconds() const + { + return neg ? -to_seconds_abs() : to_seconds_abs(); + } + longlong to_longlong() const + { + if (!is_valid_time()) + return 0; + ulonglong v= TIME_to_ulonglong_time(this); + return neg ? -(longlong) v : (longlong) v; + } + double to_double() const + { + return !is_valid_time() ? 0 : + Temporal::to_double(neg, TIME_to_ulonglong_time(this), second_part); + } + bool to_native(Native *to, uint decimals) const; + String *to_string(String *str, uint dec) const + { + if (!is_valid_time()) + return NULL; + str->set_charset(&my_charset_numeric); + if (!str->alloc(MAX_DATE_STRING_REP_LENGTH)) + str->length(my_time_to_str(this, const_cast(str->ptr()), dec)); + return str; + } + my_decimal *to_decimal(my_decimal *to) + { + return is_valid_time() ? Temporal::to_decimal(to) : bad_to_decimal(to); + } + longlong to_packed() const + { + return is_valid_time() ? Temporal::to_packed() : 0; + } + longlong valid_time_to_packed() const + { + DBUG_ASSERT(is_valid_time_slow()); + return Temporal::to_packed(); + } + long fraction_remainder(uint dec) const + { + DBUG_ASSERT(is_valid_time()); + return Temporal::fraction_remainder(dec); + } + + Time &trunc(uint dec) + { + if (is_valid_time()) + my_time_trunc(this, dec); + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Time &ceiling(int *warn) + { + if (is_valid_time()) + { + if (neg) + my_time_trunc(this, 0); + else if (second_part) + round_or_set_max(0, warn, 999999999); + } + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Time &ceiling() + { + int warn= 0; + return ceiling(&warn); + } + Time &floor(int *warn) + { + if (is_valid_time()) + { + if (!neg) + my_time_trunc(this, 0); + else if (second_part) + round_or_set_max(0, warn, 999999999); + } + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Time &floor() + { + int warn= 0; + return floor(&warn); + } + Time &round(uint dec, int *warn) + { + if (is_valid_time()) + round_or_set_max(dec, warn); + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Time &round(uint dec, time_round_mode_t mode, int *warn) + { + switch (mode.mode()) { + case time_round_mode_t::FRAC_NONE: + DBUG_ASSERT(fraction_remainder(dec) == 0); + return trunc(dec); + case time_round_mode_t::FRAC_TRUNCATE: + return trunc(dec); + case time_round_mode_t::FRAC_ROUND: + return round(dec, warn); + } + return *this; + } + Time &round(uint dec, time_round_mode_t mode) + { + int warn= 0; + return round(dec, mode, &warn); + } + +}; + + +/** + Class Temporal_with_date is designed to store valid DATE or DATETIME values. + See also class Time. + + 1. Valid value: + a. MYSQL_TIMESTAMP_{DATE|DATETIME} - a valid DATE or DATETIME value + b. MYSQL_TIMESTAMP_NONE - an undefined value + + 2. Invalid value (internally only): + a. MYSQL_TIMESTAMP_{DATE|DATETIME} - a DATE or DATETIME value, but with + MYSQL_TIME members outside of the + valid/supported range + b. MYSQL_TIMESTAMP_TIME - a TIME value + c. MYSQL_TIMESTAMP_ERROR - error + + Temporarily is allowed to have an invalid value, but only internally, + during initialization time. All constructors and modification methods must + leave the value as described above (see "Valid value"). + + Derives from MYSQL_TIME using "protected" inheritance to make sure + it is accessed externally only in the valid state. +*/ + +class Temporal_with_date: public Temporal +{ +public: + class Options: public Temporal::Options + { + public: + Options(date_conv_mode_t fuzzydate, time_round_mode_t mode): + Temporal::Options(fuzzydate, mode) + {} + explicit Options(const Temporal::Options &opt) + :Temporal::Options(opt) + { } + explicit Options(date_mode_t mode) + :Temporal::Options(mode) + { } + }; +protected: + void check_date_or_invalidate(int *warn, date_conv_mode_t flags); + void make_from_item(THD *thd, Item *item, date_mode_t flags); + + ulong daynr() const + { + return (ulong) ::calc_daynr((uint) year, (uint) month, (uint) day); + } + int weekday(bool sunday_first_day_of_week) const + { + return ::calc_weekday(daynr(), sunday_first_day_of_week); + } + ulong dayofyear() const + { + return (ulong) (daynr() - ::calc_daynr(year, 1, 1) + 1); + } + uint quarter() const + { + return (month + 2) / 3; + } + uint week(uint week_behaviour) const + { + uint year; + return calc_week(this, week_behaviour, &year); + } + uint yearweek(uint week_behaviour) const + { + uint year; + uint week= calc_week(this, week_behaviour, &year); + return week + year * 100; + } +public: + Temporal_with_date() + { + time_type= MYSQL_TIMESTAMP_NONE; + } + Temporal_with_date(THD *thd, Item *item, date_mode_t fuzzydate) + { + make_from_item(thd, item, fuzzydate); + } + Temporal_with_date(int *warn, const Sec6 &nr, date_mode_t flags) + { + DBUG_ASSERT(bool(flags & TIME_TIME_ONLY) == false); + if (nr.to_datetime_or_date(this, warn, date_conv_mode_t(flags))) + time_type= MYSQL_TIMESTAMP_NONE; + } + Temporal_with_date(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t len, CHARSET_INFO *cs, + date_mode_t flags) + { + DBUG_ASSERT(bool(flags & TIME_TIME_ONLY) == false); + if (str_to_datetime_or_date(thd, status, str, len, cs, flags)) + time_type= MYSQL_TIMESTAMP_NONE; + } +public: + bool check_date_with_warn(THD *thd, date_conv_mode_t flags) + { + return ::check_date_with_warn(thd, this, flags, MYSQL_TIMESTAMP_ERROR); + } + bool check_date_with_warn(THD *thd) + { + return ::check_date_with_warn(thd, this, Temporal::sql_mode_for_dates(thd), + MYSQL_TIMESTAMP_ERROR); + } + static date_conv_mode_t comparison_flags_for_get_date() + { return TIME_INVALID_DATES | TIME_FUZZY_DATES; } +}; + + +/** + Class Date is designed to store valid DATE values. + All constructors and modification methods leave instances + of this class in one of the following valid states: + a. MYSQL_TIMESTAMP_DATE - a DATE with all MYSQL_TIME members properly set + b. MYSQL_TIMESTAMP_NONE - an undefined value. + Other MYSQL_TIMESTAMP_XXX are not possible. + MYSQL_TIMESTAMP_DATE with MYSQL_TIME members improperly set is not possible. +*/ +class Date: public Temporal_with_date +{ + bool is_valid_value_slow() const + { + return time_type == MYSQL_TIMESTAMP_NONE || is_valid_date_slow(); + } + bool is_valid_date_slow() const + { + DBUG_ASSERT(time_type == MYSQL_TIMESTAMP_DATE); + return !check_datetime_range(this); + } +public: + class Options: public Temporal_with_date::Options + { + public: + explicit Options(date_conv_mode_t fuzzydate) + :Temporal_with_date::Options(fuzzydate, TIME_FRAC_TRUNCATE) + { } + Options(THD *thd, time_round_mode_t mode) + :Temporal_with_date::Options(sql_mode_for_dates(thd), mode) + { } + explicit Options(THD *thd) + :Temporal_with_date::Options(sql_mode_for_dates(thd), TIME_FRAC_TRUNCATE) + { } + explicit Options(date_mode_t fuzzydate) + :Temporal_with_date::Options(fuzzydate) + { } + }; +public: + Date(Item *item, date_mode_t fuzzydate) + :Date(current_thd, item, fuzzydate) + { } + Date(THD *thd, Item *item, date_mode_t fuzzydate) + :Temporal_with_date(thd, item, fuzzydate) + { + if (time_type == MYSQL_TIMESTAMP_DATETIME) + datetime_to_date(this); + DBUG_ASSERT(is_valid_value_slow()); + } + Date(THD *thd, Item *item, date_conv_mode_t fuzzydate) + :Date(thd, item, Options(fuzzydate)) + { } + Date(THD *thd, Item *item) + :Temporal_with_date(Date(thd, item, Options(thd, TIME_FRAC_TRUNCATE))) + { } + Date(Item *item) + :Temporal_with_date(Date(current_thd, item)) + { } + Date(const Temporal_with_date *d) + :Temporal_with_date(*d) + { + datetime_to_date(this); + DBUG_ASSERT(is_valid_date_slow()); + } + explicit Date(const Temporal_hybrid *from) + { + from->copy_valid_value_to_mysql_time(this); + DBUG_ASSERT(is_valid_date_slow()); + } + bool is_valid_date() const + { + DBUG_ASSERT(is_valid_value_slow()); + return time_type == MYSQL_TIMESTAMP_DATE; + } + bool check_date(date_conv_mode_t flags, int *warnings) const + { + DBUG_ASSERT(is_valid_date_slow()); + return ::check_date(this, (year || month || day), + ulonglong(flags & TIME_MODE_FOR_XXX_TO_DATE), + warnings); + } + bool check_date(THD *thd, int *warnings) const + { + return check_date(Temporal::sql_mode_for_dates(thd), warnings); + } + bool check_date(date_conv_mode_t flags) const + { + int dummy; /* unused */ + return check_date(flags, &dummy); + } + bool check_date(THD *thd) const + { + int dummy; + return check_date(Temporal::sql_mode_for_dates(thd), &dummy); + } + const MYSQL_TIME *get_mysql_time() const + { + DBUG_ASSERT(is_valid_date_slow()); + return this; + } + bool copy_to_mysql_time(MYSQL_TIME *ltime) const + { + if (time_type == MYSQL_TIMESTAMP_NONE) + { + ltime->time_type= MYSQL_TIMESTAMP_NONE; + return true; + } + DBUG_ASSERT(is_valid_date_slow()); + *ltime= *this; + return false; + } + ulong daynr() const + { + DBUG_ASSERT(is_valid_date_slow()); + return Temporal_with_date::daynr(); + } + ulong dayofyear() const + { + DBUG_ASSERT(is_valid_date_slow()); + return Temporal_with_date::dayofyear(); + } + uint quarter() const + { + DBUG_ASSERT(is_valid_date_slow()); + return Temporal_with_date::quarter(); + } + uint week(uint week_behaviour) const + { + DBUG_ASSERT(is_valid_date_slow()); + return Temporal_with_date::week(week_behaviour); + } + uint yearweek(uint week_behaviour) const + { + DBUG_ASSERT(is_valid_date_slow()); + return Temporal_with_date::yearweek(week_behaviour); + } + + longlong valid_date_to_packed() const + { + DBUG_ASSERT(is_valid_date_slow()); + return Temporal::to_packed(); + } + longlong to_longlong() const + { + return is_valid_date() ? (longlong) TIME_to_ulonglong_date(this) : 0LL; + } + double to_double() const + { + return (double) to_longlong(); + } + String *to_string(String *str) const + { + if (!is_valid_date()) + return NULL; + str->set_charset(&my_charset_numeric); + if (!str->alloc(MAX_DATE_STRING_REP_LENGTH)) + str->length(my_date_to_str(this, const_cast(str->ptr()))); + return str; + } + my_decimal *to_decimal(my_decimal *to) + { + return is_valid_date() ? Temporal::to_decimal(to) : bad_to_decimal(to); + } +}; + + +/** + Class Datetime is designed to store valid DATETIME values. + All constructors and modification methods leave instances + of this class in one of the following valid states: + a. MYSQL_TIMESTAMP_DATETIME - a DATETIME with all members properly set + b. MYSQL_TIMESTAMP_NONE - an undefined value. + Other MYSQL_TIMESTAMP_XXX are not possible. + MYSQL_TIMESTAMP_DATETIME with MYSQL_TIME members + improperly set is not possible. +*/ +class Datetime: public Temporal_with_date +{ + bool is_valid_value_slow() const + { + return time_type == MYSQL_TIMESTAMP_NONE || is_valid_datetime_slow(); + } + bool is_valid_datetime_slow() const + { + DBUG_ASSERT(time_type == MYSQL_TIMESTAMP_DATETIME); + return !check_datetime_range(this); + } + bool add_nanoseconds_or_invalidate(THD *thd, int *warn, ulong nsec) + { + DBUG_ASSERT(is_valid_datetime_slow()); + bool rc= Temporal::datetime_add_nanoseconds_or_invalidate(thd, warn, nsec); + DBUG_ASSERT(is_valid_value_slow()); + return rc; + } + void date_to_datetime_if_needed() + { + if (time_type == MYSQL_TIMESTAMP_DATE) + date_to_datetime(this); + } + void make_from_time(THD *thd, int *warn, const MYSQL_TIME *from, + date_conv_mode_t flags); + void make_from_datetime(THD *thd, int *warn, const MYSQL_TIME *from, + date_conv_mode_t flags); + bool round_or_invalidate(THD *thd, uint dec, int *warn); + bool round_or_invalidate(THD *thd, uint dec, int *warn, ulong nsec) + { + DBUG_ASSERT(is_valid_datetime_slow()); + bool rc= Temporal::datetime_round_or_invalidate(thd, dec, warn, nsec); + DBUG_ASSERT(is_valid_value_slow()); + return rc; + } +public: + + class Options: public Temporal_with_date::Options + { + public: + Options(date_conv_mode_t fuzzydate, time_round_mode_t nanosecond_rounding) + :Temporal_with_date::Options(fuzzydate, nanosecond_rounding) + { } + Options(THD *thd) + :Temporal_with_date::Options(sql_mode_for_dates(thd), default_round_mode(thd)) + { } + Options(THD *thd, time_round_mode_t rounding_mode) + :Temporal_with_date::Options(sql_mode_for_dates(thd), rounding_mode) + { } + Options(date_conv_mode_t fuzzydate, THD *thd) + :Temporal_with_date::Options(fuzzydate, default_round_mode(thd)) + { } + }; + + class Options_cmp: public Options + { + public: + Options_cmp(THD *thd) + :Options(comparison_flags_for_get_date(), thd) + { } + }; + + static Datetime zero() + { + int warn; + static Longlong_hybrid nr(0, false); + return Datetime(&warn, nr, date_mode_t(0)); + } +public: + Datetime() // NULL value + :Temporal_with_date() + { } + Datetime(THD *thd, Item *item, date_mode_t fuzzydate) + :Temporal_with_date(thd, item, fuzzydate) + { + date_to_datetime_if_needed(); + DBUG_ASSERT(is_valid_value_slow()); + } + Datetime(THD *thd, Item *item) + :Temporal_with_date(Datetime(thd, item, Options(thd))) + { } + Datetime(Item *item) + :Datetime(current_thd, item) + { } + + Datetime(THD *thd, int *warn, const MYSQL_TIME *from, date_conv_mode_t flags); + Datetime(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t len, CHARSET_INFO *cs, + const date_mode_t fuzzydate) + :Temporal_with_date(thd, status, str, len, cs, fuzzydate) + { + date_to_datetime_if_needed(); + DBUG_ASSERT(is_valid_value_slow()); + } + +protected: + Datetime(int *warn, const Sec6 &nr, date_mode_t flags) + :Temporal_with_date(warn, nr, flags) + { + date_to_datetime_if_needed(); + DBUG_ASSERT(is_valid_value_slow()); + } + Datetime(THD *thd, int *warn, const Sec9 &nr, date_mode_t fuzzydate) + :Datetime(warn, static_cast(nr), fuzzydate) + { + if (is_valid_datetime() && + time_round_mode_t(fuzzydate) == TIME_FRAC_ROUND) + round_or_invalidate(thd, 6, warn, nr.nsec()); + DBUG_ASSERT(is_valid_value_slow()); + } + +public: + Datetime(int *warn, const Longlong_hybrid &nr, date_mode_t mode) + :Datetime(warn, Sec6(nr), mode) + { } + Datetime(THD *thd, int *warn, double nr, date_mode_t fuzzydate) + :Datetime(thd, warn, Sec9(nr), fuzzydate) + { } + Datetime(THD *thd, int *warn, const my_decimal *d, date_mode_t fuzzydate) + :Datetime(thd, warn, Sec9(d), fuzzydate) + { } + Datetime(THD *thd, const timeval &tv); + + Datetime(THD *thd, Item *item, date_mode_t fuzzydate, uint dec) + :Datetime(thd, item, fuzzydate) + { + int warn= 0; + round(thd, dec, time_round_mode_t(fuzzydate), &warn); + } + Datetime(THD *thd, MYSQL_TIME_STATUS *status, + const char *str, size_t len, CHARSET_INFO *cs, + date_mode_t fuzzydate, uint dec) + :Datetime(thd, status, str, len, cs, fuzzydate) + { + round(thd, dec, time_round_mode_t(fuzzydate), &status->warnings); + } + Datetime(THD *thd, int *warn, double nr, date_mode_t fuzzydate, uint dec) + :Datetime(thd, warn, nr, fuzzydate) + { + round(thd, dec, time_round_mode_t(fuzzydate), warn); + } + Datetime(THD *thd, int *warn, const my_decimal *d, date_mode_t fuzzydate, uint dec) + :Datetime(thd, warn, d, fuzzydate) + { + round(thd, dec, time_round_mode_t(fuzzydate), warn); + } + Datetime(THD *thd, int *warn, const MYSQL_TIME *from, + date_mode_t fuzzydate, uint dec) + :Datetime(thd, warn, from, date_conv_mode_t(fuzzydate) & ~TIME_TIME_ONLY) + { + round(thd, dec, time_round_mode_t(fuzzydate), warn); + } + explicit Datetime(const Temporal_hybrid *from) + { + from->copy_valid_value_to_mysql_time(this); + DBUG_ASSERT(is_valid_datetime_slow()); + } + explicit Datetime(const MYSQL_TIME *from) + { + *(static_cast(this))= *from; + DBUG_ASSERT(is_valid_datetime_slow()); + } + Datetime(my_time_t unix_time, ulong second_part, + const Time_zone* time_zone); + + bool is_valid_datetime() const + { + /* + Here we quickly check for the type only. + If the type is valid, the rest of value must also be valid. + */ + DBUG_ASSERT(is_valid_value_slow()); + return time_type == MYSQL_TIMESTAMP_DATETIME; + } + bool check_date(date_conv_mode_t flags, int *warnings) const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return ::check_date(this, (year || month || day), + ulonglong(flags & TIME_MODE_FOR_XXX_TO_DATE), + warnings); + } + bool check_date(date_conv_mode_t flags) const + { + int dummy; /* unused */ + return check_date(flags, &dummy); + } + bool check_date(THD *thd) const + { + return check_date(Temporal::sql_mode_for_dates(thd)); + } + bool hhmmssff_is_zero() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return hour == 0 && minute == 0 && second == 0 && second_part == 0; + } + ulong daynr() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal_with_date::daynr(); + } + int weekday(bool sunday_first_day_of_week) const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal_with_date::weekday(sunday_first_day_of_week); + } + ulong dayofyear() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal_with_date::dayofyear(); + } + uint quarter() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal_with_date::quarter(); + } + uint week(uint week_behaviour) const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal_with_date::week(week_behaviour); + } + uint yearweek(uint week_behaviour) const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal_with_date::yearweek(week_behaviour); + } + + longlong hhmmss_to_seconds_abs() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return hour * 3600L + minute * 60 + second; + } + longlong hhmmss_to_seconds() const + { + return neg ? -hhmmss_to_seconds_abs() : hhmmss_to_seconds_abs(); + } + longlong to_seconds() const + { + return hhmmss_to_seconds() + (longlong) daynr() * 24L * 3600L; + } + + const MYSQL_TIME *get_mysql_time() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return this; + } + bool copy_to_mysql_time(MYSQL_TIME *ltime) const + { + if (time_type == MYSQL_TIMESTAMP_NONE) + { + ltime->time_type= MYSQL_TIMESTAMP_NONE; + return true; + } + DBUG_ASSERT(is_valid_datetime_slow()); + *ltime= *this; + return false; + } + /** + Copy without data loss, with an optional DATETIME to DATE conversion. + If the value of the "type" argument is MYSQL_TIMESTAMP_DATE, + then "this" must be a datetime with a zero hhmmssff part. + */ + bool copy_to_mysql_time(MYSQL_TIME *ltime, timestamp_type type) + { + DBUG_ASSERT(type == MYSQL_TIMESTAMP_DATE || + type == MYSQL_TIMESTAMP_DATETIME); + if (copy_to_mysql_time(ltime)) + return true; + DBUG_ASSERT(type != MYSQL_TIMESTAMP_DATE || hhmmssff_is_zero()); + ltime->time_type= type; + return false; + } + longlong to_longlong() const + { + return is_valid_datetime() ? + (longlong) TIME_to_ulonglong_datetime(this) : 0LL; + } + double to_double() const + { + return !is_valid_datetime() ? 0 : + Temporal::to_double(neg, TIME_to_ulonglong_datetime(this), second_part); + } + String *to_string(String *str, uint dec) const + { + if (!is_valid_datetime()) + return NULL; + str->set_charset(&my_charset_numeric); + if (!str->alloc(MAX_DATE_STRING_REP_LENGTH)) + str->length(my_datetime_to_str(this, const_cast(str->ptr()), dec)); + return str; + } + my_decimal *to_decimal(my_decimal *to) + { + return is_valid_datetime() ? Temporal::to_decimal(to) : bad_to_decimal(to); + } + longlong to_packed() const + { + return is_valid_datetime() ? Temporal::to_packed() : 0; + } + longlong valid_datetime_to_packed() const + { + DBUG_ASSERT(is_valid_datetime_slow()); + return Temporal::to_packed(); + } + long fraction_remainder(uint dec) const + { + DBUG_ASSERT(is_valid_datetime()); + return Temporal::fraction_remainder(dec); + } + + Datetime &trunc(uint dec) + { + if (is_valid_datetime()) + my_datetime_trunc(this, dec); + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Datetime &ceiling(THD *thd, int *warn) + { + if (is_valid_datetime() && second_part) + round_or_invalidate(thd, 0, warn, 999999999); + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Datetime &ceiling(THD *thd) + { + int warn= 0; + return ceiling(thd, &warn); + } + Datetime &round(THD *thd, uint dec, int *warn) + { + if (is_valid_datetime()) + round_or_invalidate(thd, dec, warn); + DBUG_ASSERT(is_valid_value_slow()); + return *this; + } + Datetime &round(THD *thd, uint dec, time_round_mode_t mode, int *warn) + { + switch (mode.mode()) { + case time_round_mode_t::FRAC_NONE: + DBUG_ASSERT(fraction_remainder(dec) == 0); + return trunc(dec); + case time_round_mode_t::FRAC_TRUNCATE: + return trunc(dec); + case time_round_mode_t::FRAC_ROUND: + return round(thd, dec, warn); + } + return *this; + } + Datetime &round(THD *thd, uint dec, time_round_mode_t mode) + { + int warn= 0; + return round(thd, dec, mode, &warn); + } + +}; + + +/* + Datetime to be created from an Item who is known to be of a temporal + data type. For temporal data types we don't need nanosecond rounding + or truncation, as their precision is limited. +*/ +class Datetime_from_temporal: public Datetime +{ +public: + // The constructor DBUG_ASSERTs on a proper Item data type. + Datetime_from_temporal(THD *thd, Item *temporal, date_conv_mode_t flags); +}; + + +/* + Datetime to be created from an Item who is known not to have digits outside + of the specified scale. So it's not important which rounding method to use. + TRUNCATE should work. + Typically, Item is of a temporal data type, but this is not strictly required. +*/ +class Datetime_truncation_not_needed: public Datetime +{ +public: + Datetime_truncation_not_needed(THD *thd, Item *item, date_conv_mode_t mode); + Datetime_truncation_not_needed(THD *thd, Item *item, date_mode_t mode) + :Datetime_truncation_not_needed(thd, item, date_conv_mode_t(mode)) + { } +}; + + +class Timestamp: protected Timeval +{ + static uint binary_length_to_precision(uint length); +protected: + void round_or_set_max(uint dec, int *warn); + bool add_nanoseconds_usec(uint nanoseconds) + { + DBUG_ASSERT(nanoseconds <= 1000000000); + if (nanoseconds < 500) + return false; + tv_usec+= (nanoseconds + 500) / 1000; + if (tv_usec < 1000000) + return false; + tv_usec%= 1000000; + return true; + } +public: + static date_conv_mode_t sql_mode_for_timestamp(THD *thd); + static time_round_mode_t default_round_mode(THD *thd); + class DatetimeOptions: public date_mode_t + { + public: + DatetimeOptions(date_conv_mode_t fuzzydate, time_round_mode_t round_mode) + :date_mode_t(fuzzydate | round_mode) + { } + DatetimeOptions(THD *thd) + :DatetimeOptions(sql_mode_for_timestamp(thd), default_round_mode(thd)) + { } + }; +public: + Timestamp(my_time_t timestamp, ulong sec_part) + :Timeval(timestamp, sec_part) + { } + explicit Timestamp(const timeval &tv) + :Timeval(tv) + { } + explicit Timestamp(const Native &native); + Timestamp(THD *thd, const MYSQL_TIME *ltime, uint *error_code); + const struct timeval &tv() const { return *this; } + int cmp(const Timestamp &other) const + { + return tv_sec < other.tv_sec ? -1 : + tv_sec > other.tv_sec ? +1 : + tv_usec < other.tv_usec ? -1 : + tv_usec > other.tv_usec ? +1 : 0; + } + bool to_TIME(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) const; + bool to_native(Native *to, uint decimals) const; + Datetime to_datetime(THD *thd) const + { + return Datetime(thd, *this); + } + long fraction_remainder(uint dec) const + { + return my_time_fraction_remainder(tv_usec, dec); + } + Timestamp &trunc(uint dec) + { + my_timeval_trunc(this, dec); + return *this; + } + Timestamp &round(uint dec, int *warn) + { + round_or_set_max(dec, warn); + return *this; + } + Timestamp &round(uint dec, time_round_mode_t mode, int *warn) + { + switch (mode.mode()) { + case time_round_mode_t::FRAC_NONE: + DBUG_ASSERT(fraction_remainder(dec) == 0); + return trunc(dec); + case time_round_mode_t::FRAC_TRUNCATE: + return trunc(dec); + case time_round_mode_t::FRAC_ROUND: + return round(dec, warn); + } + return *this; + } + Timestamp &round(uint dec, time_round_mode_t mode) + { + int warn= 0; + return round(dec, mode, &warn); + } +}; + + +/** + A helper class to store MariaDB TIMESTAMP values, which can be: + - real TIMESTAMP (seconds and microseconds since epoch), or + - zero datetime '0000-00-00 00:00:00.000000' +*/ +class Timestamp_or_zero_datetime: protected Timestamp +{ + bool m_is_zero_datetime; +public: + Timestamp_or_zero_datetime() + :Timestamp(0,0), m_is_zero_datetime(true) + { } + Timestamp_or_zero_datetime(const Native &native) + :Timestamp(native.length() ? Timestamp(native) : Timestamp(0,0)), + m_is_zero_datetime(native.length() == 0) + { } + Timestamp_or_zero_datetime(const Timestamp &tm, bool is_zero_datetime) + :Timestamp(tm), m_is_zero_datetime(is_zero_datetime) + { } + Timestamp_or_zero_datetime(THD *thd, const MYSQL_TIME *ltime, uint *err_code); + Datetime to_datetime(THD *thd) const + { + if (is_zero_datetime()) + return Datetime::zero(); + return Timestamp::to_datetime(thd); + } + bool is_zero_datetime() const { return m_is_zero_datetime; } + void trunc(uint decimals) + { + if (!is_zero_datetime()) + Timestamp::trunc(decimals); + } + int cmp(const Timestamp_or_zero_datetime &other) const + { + if (is_zero_datetime()) + return other.is_zero_datetime() ? 0 : -1; + if (other.is_zero_datetime()) + return 1; + return Timestamp::cmp(other); + } + bool to_TIME(THD *thd, MYSQL_TIME *to, date_mode_t fuzzydate) const; + /* + Convert to native format: + - Real timestamps are encoded in the same way how Field_timestamp2 stores + values (big endian seconds followed by big endian microseconds) + - Zero datetime '0000-00-00 00:00:00.000000' is encoded as empty string. + Two native values are binary comparable. + */ + bool to_native(Native *to, uint decimals) const; +}; + + +/** + A helper class to store non-null MariaDB TIMESTAMP values in + the native binary encoded representation. +*/ +class Timestamp_or_zero_datetime_native: + public NativeBuffer +{ +public: + Timestamp_or_zero_datetime_native() = default; + Timestamp_or_zero_datetime_native(const Timestamp_or_zero_datetime &ts, + uint decimals) + { + if (ts.to_native(this, decimals)) + length(0); // safety + } + int save_in_field(Field *field, uint decimals) const; + Datetime to_datetime(THD *thd) const + { + return is_zero_datetime() ? + Datetime::zero() : + Datetime(thd, Timestamp(*this).tv()); + } + bool is_zero_datetime() const + { + return length() == 0; + } +}; + + +/** + A helper class to store nullable MariaDB TIMESTAMP values in + the native binary encoded representation. +*/ +class Timestamp_or_zero_datetime_native_null: public Timestamp_or_zero_datetime_native, + public Null_flag +{ +public: + // With optional data type conversion + Timestamp_or_zero_datetime_native_null(THD *thd, Item *item, bool conv); + // Without data type conversion: item is known to be of the TIMESTAMP type + Timestamp_or_zero_datetime_native_null(THD *thd, Item *item) + :Timestamp_or_zero_datetime_native_null(thd, item, false) + { } + Datetime to_datetime(THD *thd) const + { + return is_null() ? Datetime() : + Timestamp_or_zero_datetime_native::to_datetime(thd); + } + void to_TIME(THD *thd, MYSQL_TIME *to) + { + DBUG_ASSERT(!is_null()); + Datetime::Options opt(TIME_CONV_NONE, TIME_FRAC_NONE); + Timestamp_or_zero_datetime(*this).to_TIME(thd, to, opt); + } + bool is_zero_datetime() const + { + DBUG_ASSERT(!is_null()); + return Timestamp_or_zero_datetime_native::is_zero_datetime(); + } +}; + + +/* + Flags for collation aggregation modes, used in TDCollation::agg(): + + MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset + MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value + (i.e. constant). + MY_COLL_ALLOW_CONV - allow any kind of conversion + (combination of the above two) + MY_COLL_ALLOW_NUMERIC_CONV - if all items were numbers, convert to + @@character_set_connection + MY_COLL_DISALLOW_NONE - don't allow return DERIVATION_NONE + (e.g. when aggregating for comparison) + MY_COLL_CMP_CONV - combination of MY_COLL_ALLOW_CONV + and MY_COLL_DISALLOW_NONE +*/ + +#define MY_COLL_ALLOW_SUPERSET_CONV 1 +#define MY_COLL_ALLOW_COERCIBLE_CONV 2 +#define MY_COLL_DISALLOW_NONE 4 +#define MY_COLL_ALLOW_NUMERIC_CONV 8 + +#define MY_COLL_ALLOW_CONV (MY_COLL_ALLOW_SUPERSET_CONV | MY_COLL_ALLOW_COERCIBLE_CONV) +#define MY_COLL_CMP_CONV (MY_COLL_ALLOW_CONV | MY_COLL_DISALLOW_NONE) + + +#define MY_REPERTOIRE_NUMERIC MY_REPERTOIRE_ASCII + + +static inline my_repertoire_t operator|(const my_repertoire_t a, + const my_repertoire_t b) +{ + return (my_repertoire_t) ((uint) a | (uint) b); +} + +static inline my_repertoire_t &operator|=(my_repertoire_t &a, + const my_repertoire_t b) +{ + return a= (my_repertoire_t) ((uint) a | (uint) b); +} + + +enum Derivation +{ + DERIVATION_IGNORABLE= 6, + DERIVATION_NUMERIC= 5, + DERIVATION_COERCIBLE= 4, + DERIVATION_SYSCONST= 3, + DERIVATION_IMPLICIT= 2, + DERIVATION_NONE= 1, + DERIVATION_EXPLICIT= 0 +}; + + +/** + "Declared Type Collation" + A combination of collation and its derivation. +*/ + +class DTCollation { +public: + CHARSET_INFO *collation; + enum Derivation derivation; + my_repertoire_t repertoire; + + void set_repertoire_from_charset(CHARSET_INFO *cs) + { + repertoire= cs->state & MY_CS_PUREASCII ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } + DTCollation() + { + collation= &my_charset_bin; + derivation= DERIVATION_NONE; + repertoire= MY_REPERTOIRE_UNICODE30; + } + DTCollation(CHARSET_INFO *collation_arg) + { + /* + This constructor version is used in combination with Field constructors, + to pass "CHARSET_INFO" instead of the full DTCollation. + Therefore, derivation is set to DERIVATION_IMPLICIT, which is the + proper derivation for table fields. + We should eventually remove all code pieces that pass "CHARSET_INFO" + (e.g. in storage engine sources) and fix to pass the full DTCollation + instead. Then, this constructor can be removed. + */ + collation= collation_arg; + derivation= DERIVATION_IMPLICIT; + repertoire= my_charset_repertoire(collation_arg); + } + DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg) + { + collation= collation_arg; + derivation= derivation_arg; + set_repertoire_from_charset(collation_arg); + } + DTCollation(CHARSET_INFO *collation_arg, + Derivation derivation_arg, + my_repertoire_t repertoire_arg) + :collation(collation_arg), + derivation(derivation_arg), + repertoire(repertoire_arg) + { } + void set(const DTCollation &dt) + { + *this= dt; + } + void set(CHARSET_INFO *collation_arg, Derivation derivation_arg) + { + collation= collation_arg; + derivation= derivation_arg; + set_repertoire_from_charset(collation_arg); + } + void set(CHARSET_INFO *collation_arg, + Derivation derivation_arg, + my_repertoire_t repertoire_arg) + { + collation= collation_arg; + derivation= derivation_arg; + repertoire= repertoire_arg; + } + void set(CHARSET_INFO *collation_arg) + { + collation= collation_arg; + set_repertoire_from_charset(collation_arg); + } + void set(Derivation derivation_arg) + { derivation= derivation_arg; } + bool aggregate(const DTCollation &dt, uint flags= 0); + bool set(DTCollation &dt1, DTCollation &dt2, uint flags= 0) + { set(dt1); return aggregate(dt2, flags); } + const char *derivation_name() const + { + switch(derivation) + { + case DERIVATION_NUMERIC: return "NUMERIC"; + case DERIVATION_IGNORABLE: return "IGNORABLE"; + case DERIVATION_COERCIBLE: return "COERCIBLE"; + case DERIVATION_IMPLICIT: return "IMPLICIT"; + case DERIVATION_SYSCONST: return "SYSCONST"; + case DERIVATION_EXPLICIT: return "EXPLICIT"; + case DERIVATION_NONE: return "NONE"; + default: return "UNKNOWN"; + } + } + int sortcmp(const Binary_string *s, const Binary_string *t) const + { + return collation->strnncollsp(s->ptr(), s->length(), + t->ptr(), t->length()); + } +}; + + +class DTCollation_numeric: public DTCollation +{ +public: + DTCollation_numeric() + :DTCollation(charset_info(), DERIVATION_NUMERIC, MY_REPERTOIRE_NUMERIC) + { } + static const CHARSET_INFO *charset_info() { return &my_charset_numeric; } + static const DTCollation & singleton(); +}; + + +static inline uint32 +char_to_byte_length_safe(size_t char_length_arg, uint32 mbmaxlen_arg) +{ + ulonglong tmp= ((ulonglong) char_length_arg) * mbmaxlen_arg; + return tmp > UINT_MAX32 ? (uint32) UINT_MAX32 : static_cast(tmp); +} + +class Type_numeric_attributes +{ +public: + static uint count_unsigned(Item **item, uint nitems); + static uint32 find_max_char_length(Item **item, uint nitems); + static uint32 find_max_octet_length(Item **item, uint nitems); + static decimal_digits_t find_max_decimal_int_part(Item **item, uint nitems); + static decimal_digits_t find_max_decimals(Item **item, uint nitems); +public: + /* + The maximum value length in characters multiplied by collation->mbmaxlen. + Almost always it's the maximum value length in bytes. + */ + uint32 max_length; + decimal_digits_t decimals; + bool unsigned_flag; +public: + Type_numeric_attributes() + :max_length(0), decimals(0), unsigned_flag(false) + { } + Type_numeric_attributes(uint32 max_length_arg, decimal_digits_t decimals_arg, + bool unsigned_flag_arg) + :max_length(max_length_arg), + decimals(decimals_arg), + unsigned_flag(unsigned_flag_arg) + { } +protected: + void aggregate_numeric_attributes_real(Item **item, uint nitems); + void aggregate_numeric_attributes_decimal(Item **item, uint nitems, + bool unsigned_arg); +}; + + + +class Type_temporal_attributes: public Type_numeric_attributes +{ +public: + Type_temporal_attributes(uint32 int_part_length, decimal_digits_t dec, bool unsigned_arg) + :Type_numeric_attributes(int_part_length + (dec ? 1 : 0), + MY_MIN(dec, + (decimal_digits_t) TIME_SECOND_PART_DIGITS), + unsigned_arg) + { + max_length+= decimals; + } +}; + + +class Type_temporal_attributes_not_fixed_dec: public Type_numeric_attributes +{ +public: + Type_temporal_attributes_not_fixed_dec(uint32 int_part_length, decimal_digits_t dec, + bool unsigned_flag) + :Type_numeric_attributes(int_part_length, dec, unsigned_flag) + { + if (decimals == NOT_FIXED_DEC) + max_length+= TIME_SECOND_PART_DIGITS + 1; + else if (decimals) + { + set_if_smaller(decimals, TIME_SECOND_PART_DIGITS); + max_length+= decimals + 1; + } + } +}; + + +/** + A class to store type attributes for the standard data types. + Does not include attributes for the extended data types + such as ENUM, SET, GEOMETRY. +*/ +class Type_std_attributes: public Type_numeric_attributes +{ +public: + DTCollation collation; + Type_std_attributes() + :collation(&my_charset_bin, DERIVATION_COERCIBLE) + { } + Type_std_attributes(const Type_numeric_attributes &nattr, + const DTCollation &dtc) + :Type_numeric_attributes(nattr), collation(dtc) + { } + void set(const Type_std_attributes *other) + { + *this= *other; + } + void set(const Type_std_attributes &other) + { + *this= other; + } + void set(const Type_numeric_attributes &nattr, const DTCollation &dtc) + { + *this= Type_std_attributes(nattr, dtc); + } + uint32 max_char_length() const + { return max_length / collation.collation->mbmaxlen; } + void fix_length_and_charset(uint32 max_char_length_arg, CHARSET_INFO *cs) + { + max_length= char_to_byte_length_safe(max_char_length_arg, cs->mbmaxlen); + collation.collation= cs; + } + void fix_char_length(uint32 max_char_length_arg) + { + max_length= char_to_byte_length_safe(max_char_length_arg, + collation.collation->mbmaxlen); + } + void fix_attributes_temporal(uint32 int_part_length, decimal_digits_t dec) + { + *this= Type_std_attributes( + Type_temporal_attributes(int_part_length, dec, false), + DTCollation_numeric()); + } + void fix_attributes_date() + { + fix_attributes_temporal(MAX_DATE_WIDTH, 0); + } + void fix_attributes_time(decimal_digits_t dec) + { + fix_attributes_temporal(MIN_TIME_WIDTH, dec); + } + void fix_attributes_datetime(decimal_digits_t dec) + { + fix_attributes_temporal(MAX_DATETIME_WIDTH, dec); + } + + void aggregate_attributes_int(Item **items, uint nitems) + { + collation= DTCollation_numeric(); + fix_char_length(find_max_char_length(items, nitems)); + unsigned_flag= count_unsigned(items, nitems) > 0; + decimals= 0; + } + void aggregate_attributes_real(Item **items, uint nitems) + { + collation= DTCollation_numeric(); + aggregate_numeric_attributes_real(items, nitems); + } + void aggregate_attributes_decimal(Item **items, uint nitems, + bool unsigned_arg) + { + collation= DTCollation_numeric(); + aggregate_numeric_attributes_decimal(items, nitems, + (unsigned_flag= unsigned_arg)); + } + bool aggregate_attributes_string(const LEX_CSTRING &func_name, + Item **item, uint nitems); + void aggregate_attributes_temporal(uint int_part_length, + Item **item, uint nitems) + { + fix_attributes_temporal(int_part_length, find_max_decimals(item, nitems)); + } + + bool agg_item_collations(DTCollation &c, const LEX_CSTRING &name, + Item **items, uint nitems, + uint flags, int item_sep); + struct Single_coll_err + { + const DTCollation& coll; + bool first; + }; + bool agg_item_set_converter(const DTCollation &coll, + const LEX_CSTRING &name, + Item **args, uint nargs, + uint flags, int item_sep, + const Single_coll_err *single_item_err= NULL); + + /* + Collect arguments' character sets together. + We allow to apply automatic character set conversion in some cases. + The conditions when conversion is possible are: + - arguments A and B have different charsets + - A wins according to coercibility rules + (i.e. a column is stronger than a string constant, + an explicit COLLATE clause is stronger than a column) + - character set of A is either superset for character set of B, + or B is a string constant which can be converted into the + character set of A without data loss. + + If all of the above is true, then it's possible to convert + B into the character set of A, and then compare according + to the collation of A. + + For functions with more than two arguments: + + collect(A,B,C) ::= collect(collect(A,B),C) + + Since this function calls THD::change_item_tree() on the passed Item ** + pointers, it is necessary to pass the original Item **'s, not copies. + Otherwise their values will not be properly restored (see BUG#20769). + If the items are not consecutive (eg. args[2] and args[5]), use the + item_sep argument, ie. + + agg_item_charsets(coll, fname, &args[2], 2, flags, 3) + */ + bool agg_arg_charsets(DTCollation &c, const LEX_CSTRING &func_name, + Item **items, uint nitems, + uint flags, int item_sep) + { + if (agg_item_collations(c, func_name, items, nitems, flags, item_sep)) + return true; + return agg_item_set_converter(c, func_name, items, nitems, flags, item_sep); + } + /* + Aggregate arguments for string result, e.g: CONCAT(a,b) + - convert to @@character_set_connection if all arguments are numbers + - allow DERIVATION_NONE + */ + bool agg_arg_charsets_for_string_result(DTCollation &c, + const LEX_CSTRING &func_name, + Item **items, uint nitems, + int item_sep) + { + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_ALLOW_NUMERIC_CONV; + return agg_arg_charsets(c, func_name, items, nitems, flags, item_sep); + } + /* + Aggregate arguments for string result, when some comparison + is involved internally, e.g: REPLACE(a,b,c) + - convert to @@character_set_connection if all arguments are numbers + - disallow DERIVATION_NONE + */ + bool agg_arg_charsets_for_string_result_with_comparison(DTCollation &c, + const LEX_CSTRING &func_name, + Item **items, + uint nitems, + int item_sep) + { + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_ALLOW_NUMERIC_CONV | + MY_COLL_DISALLOW_NONE; + return agg_arg_charsets(c, func_name, items, nitems, flags, item_sep); + } + + /* + Aggregate arguments for comparison, e.g: a=b, a LIKE b, a RLIKE b + - don't convert to @@character_set_connection if all arguments are numbers + - don't allow DERIVATION_NONE + */ + bool agg_arg_charsets_for_comparison(DTCollation &c, + const LEX_CSTRING &func_name, + Item **items, uint nitems, + int item_sep) + { + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_DISALLOW_NONE; + return agg_arg_charsets(c, func_name, items, nitems, flags, item_sep); + } + +}; + + +class Type_all_attributes: public Type_std_attributes +{ +public: + Type_all_attributes() = default; + Type_all_attributes(const Type_all_attributes &) = default; + virtual ~Type_all_attributes() = default; + virtual void set_type_maybe_null(bool maybe_null_arg)= 0; + // Returns total number of decimal digits + virtual decimal_digits_t decimal_precision() const= 0; + virtual const TYPELIB *get_typelib() const= 0; + virtual void set_typelib(const TYPELIB *typelib)= 0; +}; + + +class Type_cmp_attributes +{ +public: + virtual ~Type_cmp_attributes() = default; + virtual CHARSET_INFO *compare_collation() const= 0; +}; + + +class Type_cast_attributes +{ + CHARSET_INFO *m_charset; + ulonglong m_length; + ulonglong m_decimals; + bool m_length_specified; + bool m_decimals_specified; +public: + Type_cast_attributes(const Lex_length_and_dec_st &length_and_dec, + CHARSET_INFO *cs) + :m_charset(cs), m_length(0), m_decimals(0), + m_length_specified(false), m_decimals_specified(false) + { + m_length= length_and_dec.length_overflowed() ? (ulonglong) UINT_MAX32 + 1 : + length_and_dec.length(); + m_decimals= length_and_dec.dec(); + m_length_specified= length_and_dec.has_explicit_length(); + m_decimals_specified= length_and_dec.has_explicit_dec(); + } + Type_cast_attributes(CHARSET_INFO *cs) + :m_charset(cs), m_length(0), m_decimals(0), + m_length_specified(false), m_decimals_specified(false) + { } + CHARSET_INFO *charset() const { return m_charset; } + bool length_specified() const { return m_length_specified; } + bool decimals_specified() const { return m_decimals_specified; } + ulonglong length() const { return m_length; } + ulonglong decimals() const { return m_decimals; } +}; + + +class Name: private LEX_CSTRING +{ +public: + constexpr Name(const char *str_arg, uint length_arg) : + LEX_CSTRING({str_arg, length_arg}) + { } + constexpr Name(const LEX_CSTRING &lcs) : + LEX_CSTRING(lcs) + { } + const char *ptr() const { return LEX_CSTRING::str; } + uint length() const { return (uint) LEX_CSTRING::length; } + const LEX_CSTRING &lex_cstring() const { return *this; } + bool eq(const LEX_CSTRING &other) const + { + return !system_charset_info->strnncoll(LEX_CSTRING::str, LEX_CSTRING::length, + other.str, other.length); + } +}; + + +class Bit_addr +{ + /** + Byte where the bit is stored inside a record. + If the corresponding Field is a NOT NULL field, this member is NULL. + */ + uchar *m_ptr; + /** + Offset of the bit inside m_ptr[0], in the range 0..7. + */ + uchar m_offs; +public: + Bit_addr() + :m_ptr(NULL), + m_offs(0) + { } + Bit_addr(uchar *ptr, uchar offs) + :m_ptr(ptr), m_offs(offs) + { + DBUG_ASSERT(ptr || offs == 0); + DBUG_ASSERT(offs < 8); + } + Bit_addr(bool maybe_null) + :m_ptr(maybe_null ? (uchar *) "" : NULL), + m_offs(0) + { } + uchar *ptr() const { return m_ptr; } + uchar offs() const { return m_offs; } + uchar bit() const { return static_cast(m_ptr ? 1U << m_offs : 0); } + void inc() + { + DBUG_ASSERT(m_ptr); + m_ptr+= (m_offs == 7); + m_offs= (m_offs + 1) & 7; + } +}; + + +class Record_addr +{ + uchar *m_ptr; // Position of the field in the record + Bit_addr m_null; // Position and offset of the null bit +public: + Record_addr(uchar *ptr_arg, + uchar *null_ptr_arg, + uchar null_bit_arg) + :m_ptr(ptr_arg), + m_null(null_ptr_arg, null_bit_arg) + { } + Record_addr(uchar *ptr, const Bit_addr &null) + :m_ptr(ptr), + m_null(null) + { } + Record_addr(bool maybe_null) + :m_ptr(NULL), + m_null(maybe_null) + { } + uchar *ptr() const { return m_ptr; } + const Bit_addr &null() const { return m_null; } + uchar *null_ptr() const { return m_null.ptr(); } + uchar null_bit() const { return m_null.bit(); } +}; + + +class Information_schema_numeric_attributes +{ + enum enum_attr + { + ATTR_NONE= 0, + ATTR_PRECISION= 1, + ATTR_SCALE= 2, + ATTR_PRECISION_AND_SCALE= (ATTR_PRECISION|ATTR_SCALE) + }; + uint m_precision; + decimal_digits_t m_scale; + enum_attr m_available_attributes; +public: + Information_schema_numeric_attributes() + :m_precision(0), m_scale(0), + m_available_attributes(ATTR_NONE) + { } + Information_schema_numeric_attributes(uint precision) + :m_precision(precision), m_scale(0), + m_available_attributes(ATTR_PRECISION) + { } + Information_schema_numeric_attributes(uint precision, decimal_digits_t scale) + :m_precision(precision), m_scale(scale), + m_available_attributes(ATTR_PRECISION_AND_SCALE) + { } + bool has_precision() const { return m_available_attributes & ATTR_PRECISION; } + bool has_scale() const { return m_available_attributes & ATTR_SCALE; } + uint precision() const + { + DBUG_ASSERT(has_precision()); + return (uint) m_precision; + } + decimal_digits_t scale() const + { + DBUG_ASSERT(has_scale()); + return m_scale; + } +}; + + +class Information_schema_character_attributes +{ + uint32 m_octet_length; + uint32 m_char_length; + bool m_is_set; +public: + Information_schema_character_attributes() + :m_octet_length(0), m_char_length(0), m_is_set(false) + { } + Information_schema_character_attributes(uint32 octet_length, + uint32 char_length) + :m_octet_length(octet_length), m_char_length(char_length), m_is_set(true) + { } + bool has_octet_length() const { return m_is_set; } + bool has_char_length() const { return m_is_set; } + uint32 octet_length() const + { + DBUG_ASSERT(has_octet_length()); + return m_octet_length; + } + uint char_length() const + { + DBUG_ASSERT(has_char_length()); + return m_char_length; + } +}; + + +enum vers_kind_t +{ + VERS_UNDEFINED= 0, + VERS_TIMESTAMP, + VERS_TRX_ID +}; + + +class Vers_type_handler +{ +protected: + Vers_type_handler() = default; +public: + virtual ~Vers_type_handler() = default; + virtual vers_kind_t kind() const + { + DBUG_ASSERT(0); + return VERS_UNDEFINED; + } + virtual bool check_sys_fields(const LEX_CSTRING &table_name, + const Column_definition *row_start, + const Column_definition *row_end) const= 0; +}; + + +class Vers_type_timestamp: public Vers_type_handler +{ +public: + virtual vers_kind_t kind() const + { + return VERS_TIMESTAMP; + } + bool check_sys_fields(const LEX_CSTRING &table_name, + const Column_definition *row_start, + const Column_definition *row_end) const; +}; +extern Vers_type_timestamp vers_type_timestamp; + + +class Vers_type_trx: public Vers_type_handler +{ +public: + virtual vers_kind_t kind() const + { + return VERS_TRX_ID; + } + bool check_sys_fields(const LEX_CSTRING &table_name, + const Column_definition *row_start, + const Column_definition *row_end) const; +}; +extern MYSQL_PLUGIN_IMPORT Vers_type_trx vers_type_trx; + + +class Type_handler +{ + Name m_name; +protected: + String *print_item_value_csstr(THD *thd, Item *item, String *str) const; + String *print_item_value_temporal(THD *thd, Item *item, String *str, + const Name &type_name, String *buf) const; + void make_sort_key_longlong(uchar *to, + bool maybe_null, bool null_value, + bool unsigned_flag, + longlong value) const; + void store_sort_key_longlong(uchar *to, bool unsigned_flag, + longlong value) const; + + uint make_packed_sort_key_longlong(uchar *to, bool maybe_null, + bool null_value, bool unsigned_flag, + longlong value, + const SORT_FIELD_ATTR *sort_field) const; + + bool Item_func_or_sum_illegal_param(const LEX_CSTRING &name) const; + bool Item_func_or_sum_illegal_param(const Item_func_or_sum *) const; + bool check_null(const Item *item, st_value *value) const; + bool Item_send_str(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_tiny(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_short(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_long(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_longlong(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_float(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_double(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_time(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_date(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_timestamp(Item *item, Protocol *protocol, st_value *buf) const; + bool Item_send_datetime(Item *item, Protocol *protocol, st_value *buf) const; + bool Column_definition_prepare_stage2_legacy(Column_definition *c, + enum_field_types type) + const; + bool Column_definition_prepare_stage2_legacy_num(Column_definition *c, + enum_field_types type) + const; + bool Column_definition_prepare_stage2_legacy_real(Column_definition *c, + enum_field_types type) + const; +public: + static const Type_handler *handler_by_name(THD *thd, const LEX_CSTRING &name); + static const Type_handler *handler_by_name_or_error(THD *thd, + const LEX_CSTRING &name); + static const Type_handler *odbc_literal_type_handler(const LEX_CSTRING *str); + static const Type_handler *blob_type_handler(uint max_octet_length); + static const Type_handler *string_type_handler(uint max_octet_length); + static const Type_handler *bit_and_int_mixture_handler(uint max_char_len); + static const Type_handler *type_handler_long_or_longlong(uint max_char_len, + bool unsigned_flag); + /** + Return a string type handler for Item + If too_big_for_varchar() returns a BLOB variant, according to length. + If max_length > 0 create a VARCHAR(n) + If max_length == 0 create a CHAR(0) + @param item - the Item to get the handler to. + */ + static const Type_handler *varstring_type_handler(const Item *item); + static const Type_handler *blob_type_handler(const Item *item); + static const Type_handler *get_handler_by_field_type(enum_field_types type); + static const Type_handler *get_handler_by_real_type(enum_field_types type); + static const Type_handler *get_handler_by_cmp_type(Item_result type); + static const Type_collection * + type_collection_for_aggregation(const Type_handler *h1, + const Type_handler *h2); + virtual const Type_collection *type_collection() const; + static const + Type_handler *aggregate_for_result_traditional(const Type_handler *h1, + const Type_handler *h2); + virtual Schema *schema() const; + static void partition_field_type_not_allowed(const LEX_CSTRING &field_name); + static bool partition_field_check_result_type(Item *item, + Item_result expected_type); + static const Name & version_mysql56(); + static const Name & version_mariadb53(); + + void set_name(Name n) { DBUG_ASSERT(!m_name.ptr()); m_name= n; } + const Name name() const { return m_name; } + virtual const Name version() const; + virtual const Name &default_value() const= 0; + virtual uint32 flags() const { return 0; } + virtual ulong KEY_pack_flags(uint column_nr) const { return 0; } + bool is_unsigned() const { return flags() & UNSIGNED_FLAG; } + virtual enum_field_types field_type() const= 0; + virtual enum_field_types real_field_type() const { return field_type(); } + /** + Type code which is used for merging of traditional data types for result + (for UNION and for hybrid functions such as COALESCE). + Mapping can be done both ways: old->new, new->old, depending + on the particular data type implementation: + - type_handler_var_string (MySQL-4.1 old VARCHAR) is converted to + new VARCHAR before merging. + field_type_merge_rules[][] returns new VARCHAR. + - type_handler_newdate is converted to old DATE before merging. + field_type_merge_rules[][] returns NEWDATE. + - Temporal type_handler_xxx2 (new MySQL-5.6 types) are converted to + corresponding old type codes before merging (e.g. TIME2->TIME). + field_type_merge_rules[][] returns old type codes (e.g. TIME). + Then old types codes are supposed to convert to new type codes somehow, + but they do not. So UNION and COALESCE create old columns. + This is a bug and should be fixed eventually. + */ + virtual enum_field_types traditional_merge_field_type() const + { + DBUG_ASSERT(is_traditional_scalar_type()); + return field_type(); + } + virtual enum_field_types type_code_for_protocol() const + { + return field_type(); + } + virtual protocol_send_type_t protocol_send_type() const= 0; + virtual bool Item_append_extended_type_info(Send_field_extended_metadata *to, + const Item *item) const + { + return false; + } + virtual Item_result result_type() const= 0; + virtual Item_result cmp_type() const= 0; + virtual enum_dynamic_column_type + dyncol_type(const Type_all_attributes *attr) const= 0; + virtual enum_mysql_timestamp_type mysql_timestamp_type() const + { + return MYSQL_TIMESTAMP_ERROR; + } + /* + Return true if the native format is fully implemented for a data type: + - Field_xxx::val_native() + - Item_xxx::val_native() for all classes supporting this data type + - Type_handler_xxx::cmp_native() + */ + virtual bool is_val_native_ready() const + { + return false; + } + /* + If operations such as: + UPDATE t1 SET binary_string_field=this_type_field; + should store this_type_field->val_native() rather than + this_type_field->val_str(). + */ + virtual bool convert_to_binary_using_val_native() const + { + return false; + } + virtual bool is_timestamp_type() const + { + return false; + } + virtual bool is_order_clause_position_type() const + { + return false; + } + virtual bool is_limit_clause_valid_type() const + { + return false; + } + /* + Returns true if this data type supports a hack that + WHERE notnull_column IS NULL + finds zero values, e.g.: + WHERE date_notnull_column IS NULL -> + WHERE date_notnull_column = '0000-00-00' + */ + virtual bool cond_notnull_field_isnull_to_field_eq_zero() const + { + return false; + } + /** + Check whether a field type can be partially indexed by a key. + @param type field type + @retval true Type can have a prefixed key + @retval false Type can not have a prefixed key + */ + virtual bool type_can_have_key_part() const + { + return false; + } + virtual bool type_can_have_auto_increment_attribute() const + { + return false; + } + virtual uint max_octet_length() const { return 0; } + /** + Prepared statement long data: + Check whether this parameter data type is compatible with long data. + Used to detect whether a long data stream has been supplied to a + incompatible data type. + */ + virtual bool is_param_long_data_type() const { return false; } + /* + The base type handler "this" is derived from. + "This" inherits aggregation rules from the base type handler. + */ + virtual const Type_handler *type_handler_base() const + { + return NULL; + } + const Type_handler *type_handler_base_or_self() const + { + const Type_handler *res= type_handler_base(); + return res ? res : this; + } + virtual const Type_handler *type_handler_for_comparison() const= 0; + virtual const Type_handler *type_handler_for_native_format() const + { + return this; + } + virtual const Type_handler *type_handler_for_item_field() const + { + return this; + } + virtual const Type_handler *type_handler_for_tmp_table(const Item *) const + { + return this; + } + virtual const Type_handler *type_handler_for_union(const Item *) const + { + return this; + } + virtual const Type_handler *cast_to_int_type_handler() const + { + return this; + } + virtual const Type_handler *type_handler_unsigned() const + { + return this; + } + virtual const Type_handler *type_handler_signed() const + { + return this; + } + virtual bool partition_field_check(const LEX_CSTRING &field_name, Item *) + const + { + partition_field_type_not_allowed(field_name); + return true; + } + virtual bool partition_field_append_value(String *str, + Item *item_expr, + CHARSET_INFO *field_cs, + partition_value_print_mode_t mode) + const; + virtual int + stored_field_cmp_to_item(THD *thd, Field *field, Item *item) const= 0; + virtual CHARSET_INFO *charset_for_protocol(const Item *item) const; + virtual const Type_handler* + type_handler_adjusted_to_max_octet_length(uint max_octet_length, + CHARSET_INFO *cs) const + { return this; } + virtual bool adjust_spparam_type(Spvar_definition *def, Item *from) const + { + return false; + } + Type_handler() : m_name(0,0) {} + virtual ~Type_handler() = default; + /** + Determines MariaDB traditional scalar data types that always present + in the server. + */ + bool is_traditional_scalar_type() const; + virtual bool is_scalar_type() const { return true; } + virtual bool can_return_int() const { return true; } + virtual bool can_return_decimal() const { return true; } + virtual bool can_return_real() const { return true; } + virtual bool can_return_str() const { return true; } + virtual bool can_return_text() const { return true; } + virtual bool can_return_date() const { return true; } + virtual bool can_return_time() const { return true; } + virtual bool can_return_extract_source(interval_type type) const; + virtual bool is_bool_type() const { return false; } + virtual bool is_general_purpose_string_type() const { return false; } + virtual decimal_digits_t Item_time_precision(THD *thd, Item *item) const; + virtual decimal_digits_t Item_datetime_precision(THD *thd, Item *item) const; + virtual decimal_digits_t Item_decimal_scale(const Item *item) const; + virtual decimal_digits_t Item_decimal_precision(const Item *item) const= 0; + /* + Returns how many digits a divisor adds into a division result. + See Item::divisor_precision_increment() in item.h for more comments. + */ + virtual decimal_digits_t Item_divisor_precision_increment(const Item *) const; + /** + Makes a temporary table Field to handle numeric aggregate functions, + e.g. SUM(DISTINCT expr), AVG(DISTINCT expr), etc. + */ + virtual Field *make_num_distinct_aggregator_field(MEM_ROOT *, + const Item *) const; + /** + Makes a temporary table Field to handle RBR replication type conversion. + @param TABLE - The conversion table the field is going to be added to. + It's used to access to table->in_use->mem_root, + to create the new field on the table memory root, + as well as to increment statistics in table->share + (e.g. table->s->blob_count). + @param metadata - Metadata from the binary log. + @param target - The field in the target table on the slave. + + Note, the data types of "target" and of "this" are not necessarily + always the same, in general case it's possible that: + this->field_type() != target->field_type() + and/or + this->real_type( ) != target->real_type() + + This method decodes metadata according to this->real_type() + and creates a new field also according to this->real_type(). + + In some cases it lurks into "target", to get some extra information, e.g.: + - unsigned_flag for numeric fields + - charset() for string fields + - typelib and field_length for SET and ENUM + - geom_type and srid for GEOMETRY + This information is not available in the binary log, so + we assume that these fields are the same on the master and on the slave. + */ + virtual Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) const= 0; + virtual void show_binlog_type(const Conv_source &src, const Field &dst, + String *str) const; + virtual uint32 max_display_length_for_field(const Conv_source &src) const= 0; + /* + Performs the final data type validation for a UNION element, + after the regular "aggregation for result" was done. + */ + virtual bool union_element_finalize(Item_type_holder* item) const + { + return false; + } + virtual uint Column_definition_gis_options_image(uchar *buff, + const Column_definition &def) + const + { + return 0; + } + virtual bool Column_definition_data_type_info_image(Binary_string *to, + const Column_definition &def) + const; + // Check if the implicit default value is Ok in the current sql_mode + virtual bool validate_implicit_default_value(THD *thd, + const Column_definition &def) + const; + // Automatic upgrade, e.g. for ALTER TABLE t1 FORCE + virtual void Column_definition_implicit_upgrade(Column_definition *c) const + { } + // Validate CHECK constraint after the parser + virtual bool Column_definition_validate_check_constraint(THD *thd, + Column_definition *c) + const; + // Set attributes in the parser + virtual bool Column_definition_set_attributes(THD *thd, + Column_definition *def, + const Lex_field_type_st &attr, + column_definition_type_t type) + const; + // Fix attributes after the parser + virtual bool Column_definition_fix_attributes(Column_definition *c) const= 0; + /* + Fix attributes from an existing field. Used for: + - ALTER TABLE (for columns that do not change) + - DECLARE var TYPE OF t1.col1; (anchored SP variables) + */ + virtual void Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *c, + const Field *field) const + { } + virtual bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const; + virtual bool Column_definition_bulk_alter(Column_definition *c, + const Column_derived_attributes + *derived_attr, + const Column_bulk_alter_attributes + *bulk_alter_attr) + const + { return false; } + /* + This method is called on queries like: + CREATE TABLE t2 (a INT) AS SELECT a FROM t1; + I.e. column "a" is queried from another table, + but its data type is redefined. + @param OUT def - The column definition to be redefined + @param IN dup - The column definition to take the data type from + (i.e. "a INT" in the above example). + @param IN file - Table owner handler. If it does not support certain + data types, some conversion can be applied. + I.g. true BIT to BIT-AS-CHAR. + @param IN schema - the owner schema definition, e.g. for the default + character set and collation. + @retval true - on error + @retval false - on success + */ + virtual bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const; + virtual bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const= 0; + virtual bool Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const; + virtual bool Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *has_key_needed) const; + virtual bool Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const; + virtual bool Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const; + virtual bool Key_part_spec_init_spatial(Key_part_spec *part, + const Column_definition &def) const; + virtual bool Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) const + { + return true; // Error + } + virtual Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const= 0; + Field *make_and_init_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE *table) const; + virtual Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const + { + DBUG_ASSERT(0); + return NULL; + } + virtual Field * + make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const= 0; + virtual void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const; + virtual const Type_handler *type_handler_frm_unpack(const uchar *buffer) const + { + return this; + } + virtual bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) const; + + /* + Create a fixed size key part for a sort key + */ + virtual void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const= 0; + + /* + create a compact size key part for a sort key + */ + virtual uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const=0; + + virtual void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const= 0; + virtual bool is_packable() const { return false; } + + virtual uint32 max_display_length(const Item *item) const= 0; + virtual uint32 Item_decimal_notation_int_digits(const Item *item) const { return 0; } + virtual uint32 calc_pack_length(uint32 length) const= 0; + virtual uint calc_key_length(const Column_definition &def) const; + virtual void Item_update_null_value(Item *item) const= 0; + virtual bool Item_save_in_value(THD *thd, Item *item, st_value *value) const= 0; + virtual void Item_param_setup_conversion(THD *thd, Item_param *) const {} + virtual void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const; + virtual bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const= 0; + virtual bool Item_param_val_native(THD *thd, + Item_param *item, + Native *to) const; + virtual bool Item_send(Item *item, Protocol *p, st_value *buf) const= 0; + virtual int Item_save_in_field(Item *item, Field *field, + bool no_conversions) const= 0; + + /** + Return a string representation of the Item value. + + @param thd thread handle + @param str string buffer for representation of the value + + @note + If the item has a string result type, the string is escaped + according to its character set. + + @retval + NULL on error + @retval + non-NULL a pointer to a a valid string on success + */ + virtual String *print_item_value(THD *thd, Item *item, String *str) const= 0; + + /** + Check if + WHERE expr=value AND expr=const + can be rewritten as: + WHERE const=value AND expr=const + + "this" is the comparison handler that is used by "target". + + @param target - the predicate expr=value, + whose "expr" argument will be replaced to "const". + @param target_expr - the target's "expr" which will be replaced to "const". + @param target_value - the target's second argument, it will remain unchanged. + @param source - the equality predicate expr=const (or expr<=>const) + that can be used to rewrite the "target" part + (under certain conditions, see the code). + @param source_expr - the source's "expr". It should be exactly equal to + the target's "expr" to make condition rewrite possible. + @param source_const - the source's "const" argument, it will be inserted + into "target" instead of "expr". + */ + virtual bool + can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) const= 0; + + /* + @brief + Check if an IN subquery allows materialization or not + @param + inner expression on the inner side of the IN subquery + outer expression on the outer side of the IN subquery + is_in_predicate SET to true if IN subquery was converted from an + IN predicate or we are checking if materialization + strategy can be used for an IN predicate + */ + virtual bool + subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) const= 0; + /** + Make a simple constant replacement item for a constant "src", + so the new item can futher be used for comparison with "cmp", e.g.: + src = cmp -> replacement = cmp + + "this" is the type handler that is used to compare "src" and "cmp". + + @param thd - current thread, for mem_root + @param src - The item that we want to replace. It's a const item, + but it can be complex enough to calculate on every row. + @param cmp - The src's comparand. + @retval - a pointer to the created replacement Item + @retval - NULL, if could not create a replacement (e.g. on EOM). + NULL is also returned for ROWs, because instead of replacing + a Item_row to a new Item_row, Type_handler_row just replaces + its elements. + */ + virtual Item *make_const_item_for_comparison(THD *thd, + Item *src, + const Item *cmp) const= 0; + virtual Item_cache *Item_get_cache(THD *thd, const Item *item) const= 0; + virtual Item *make_constructor_item(THD *thd, List *args) const + { + return NULL; + } + /** + A builder for literals with data type name prefix, e.g.: + TIME'00:00:00', DATE'2001-01-01', TIMESTAMP'2001-01-01 00:00:00'. + @param thd The current thread + @param str Character literal + @param length Length of str + @param cs Character set of the string + @param send_error Whether to generate an error on failure + + @retval A pointer to a new Item on success + NULL on error (wrong literal value, EOM) + */ + virtual Item_literal *create_literal_item(THD *thd, + const char *str, size_t length, + CHARSET_INFO *cs, + bool send_error) const + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + Item_literal *create_literal_item(THD *thd, const String *str, + bool send_error) const + { + return create_literal_item(thd, str->ptr(), str->length(), str->charset(), + send_error); + } + virtual Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + virtual Item_copy *create_item_copy(THD *thd, Item *item) const; + virtual int cmp_native(const Native &a, const Native &b) const + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + virtual bool set_comparator_func(THD *thd, Arg_comparator *cmp) const= 0; + virtual bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const + { + return false; + } + virtual bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const= 0; + virtual bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, + uint nitems) const= 0; + virtual bool Item_func_min_max_fix_attributes(THD *thd, + Item_func_min_max *func, + Item **items, + uint nitems) const; + virtual bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *) const= 0; + virtual bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const= 0; + virtual bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const= 0; + virtual + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *) const= 0; + + virtual bool Item_val_native_with_conversion(THD *thd, Item *item, + Native *to) const + { + return true; + } + virtual bool Item_val_native_with_conversion_result(THD *thd, Item *item, + Native *to) const + { + return true; + } + + virtual bool Item_val_bool(Item *item) const= 0; + virtual void Item_get_date(THD *thd, Item *item, + Temporal::Warn *buff, MYSQL_TIME *ltime, + date_mode_t fuzzydate) const= 0; + bool Item_get_date_with_warn(THD *thd, Item *item, MYSQL_TIME *ltime, + date_mode_t fuzzydate) const; + virtual longlong Item_val_int_signed_typecast(Item *item) const= 0; + virtual longlong Item_val_int_unsigned_typecast(Item *item) const= 0; + + virtual String *Item_func_hex_val_str_ascii(Item_func_hex *item, + String *str) const= 0; + + virtual + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const= 0; + virtual + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const= 0; + virtual + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const= 0; + virtual + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const= 0; + virtual + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t fuzzydate) const= 0; + bool Item_func_hybrid_field_type_get_date_with_warn(THD *thd, + Item_func_hybrid_field_type *, + MYSQL_TIME *, + date_mode_t) const; + virtual + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const= 0; + virtual + double Item_func_min_max_val_real(Item_func_min_max *) const= 0; + virtual + longlong Item_func_min_max_val_int(Item_func_min_max *) const= 0; + virtual + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const= 0; + virtual + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, + MYSQL_TIME *, date_mode_t fuzzydate) const= 0; + virtual bool + Item_func_between_fix_length_and_dec(Item_func_between *func) const= 0; + virtual longlong + Item_func_between_val_int(Item_func_between *func) const= 0; + + virtual cmp_item * + make_cmp_item(THD *thd, CHARSET_INFO *cs) const= 0; + + virtual in_vector * + make_in_vector(THD *thd, const Item_func_in *func, uint nargs) const= 0; + + virtual bool + Item_func_in_fix_comparator_compatible_types(THD *thd, Item_func_in *) + const= 0; + + virtual bool + Item_func_round_fix_length_and_dec(Item_func_round *round) const= 0; + + virtual bool + Item_func_int_val_fix_length_and_dec(Item_func_int_val *func) const= 0; + + virtual bool + Item_func_abs_fix_length_and_dec(Item_func_abs *func) const= 0; + + virtual bool + Item_func_neg_fix_length_and_dec(Item_func_neg *func) const= 0; + + virtual bool + Item_func_signed_fix_length_and_dec(Item_func_signed *item) const; + virtual bool + Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const; + virtual bool + Item_double_typecast_fix_length_and_dec(Item_double_typecast *item) const; + virtual bool + Item_float_typecast_fix_length_and_dec(Item_float_typecast *item) const; + virtual bool + Item_decimal_typecast_fix_length_and_dec(Item_decimal_typecast *item) const; + virtual bool + Item_char_typecast_fix_length_and_dec(Item_char_typecast *item) const; + virtual bool + Item_time_typecast_fix_length_and_dec(Item_time_typecast *item) const; + virtual bool + Item_date_typecast_fix_length_and_dec(Item_date_typecast *item) const; + virtual bool + Item_datetime_typecast_fix_length_and_dec(Item_datetime_typecast *item) const; + + virtual bool + Item_func_plus_fix_length_and_dec(Item_func_plus *func) const= 0; + virtual bool + Item_func_minus_fix_length_and_dec(Item_func_minus *func) const= 0; + virtual bool + Item_func_mul_fix_length_and_dec(Item_func_mul *func) const= 0; + virtual bool + Item_func_div_fix_length_and_dec(Item_func_div *func) const= 0; + virtual bool + Item_func_mod_fix_length_and_dec(Item_func_mod *func) const= 0; + + virtual const Vers_type_handler *vers() const { return NULL; } +}; + + +/* + Special handler for ROW +*/ +class Type_handler_row: public Type_handler +{ +public: + virtual ~Type_handler_row() = default; + const Name &default_value() const override; + bool validate_implicit_default_value(THD *, const Column_definition &) + const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + const Type_collection *type_collection() const override; + bool is_scalar_type() const override { return false; } + bool can_return_int() const override { return false; } + bool can_return_decimal() const override { return false; } + bool can_return_real() const override { return false; } + bool can_return_str() const override { return false; } + bool can_return_text() const override { return false; } + bool can_return_date() const override { return false; } + bool can_return_time() const override { return false; } + enum_field_types field_type() const override + { + MY_ASSERT_UNREACHABLE(); + return MYSQL_TYPE_NULL; + }; + protocol_send_type_t protocol_send_type() const override + { + MY_ASSERT_UNREACHABLE(); + return PROTOCOL_SEND_STRING; + } + Item_result result_type() const override + { + return ROW_RESULT; + } + Item_result cmp_type() const override + { + return ROW_RESULT; + } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *) + const override + { + MY_ASSERT_UNREACHABLE(); + return DYN_COL_NULL; + } + const Type_handler *type_handler_for_comparison() const override; + int stored_field_cmp_to_item(THD *, Field *, Item *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + bool subquery_type_allows_materialization(const Item *, const Item *, bool) + const override + { + MY_ASSERT_UNREACHABLE(); + return false; + } + Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const + override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + Field *make_conversion_table_field(MEM_ROOT *, TABLE *, uint, const Field *) + const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + bool Column_definition_fix_attributes(Column_definition *) const override + { + return false; + } + void Column_definition_reuse_fix_attributes(THD *, Column_definition *, + const Field *) const override + { + MY_ASSERT_UNREACHABLE(); + } + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_redefine_stage1(Column_definition *, + const Column_definition *, + const handler *) + const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Column_definition_prepare_stage2(Column_definition *, handler *, + ulonglong) const override + { + return false; + } + Field *make_table_field(MEM_ROOT *, const LEX_CSTRING *, const Record_addr &, + const Type_all_attributes &, TABLE_SHARE *) + const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override + { + MY_ASSERT_UNREACHABLE(); + } + uint make_packed_sort_key_part(uchar *, Item *, const SORT_FIELD_ATTR *, + String *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + void sort_length(THD *, const Type_std_attributes *, SORT_FIELD_ATTR *) + const override + { + MY_ASSERT_UNREACHABLE(); + } + uint32 max_display_length(const Item *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + uint32 max_display_length_for_field(const Conv_source &) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + uint32 calc_pack_length(uint32) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + decimal_digits_t Item_decimal_precision(const Item *) const override + { + MY_ASSERT_UNREACHABLE(); + return DECIMAL_MAX_PRECISION; + } + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const + override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + bool Item_send(Item *, Protocol *, st_value *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + void Item_update_null_value(Item *item) const override; + int Item_save_in_field(Item *, Field *, bool) const override + { + MY_ASSERT_UNREACHABLE(); + return 1; + } + String *print_item_value(THD *thd, Item *item, String *str) const override; + bool can_change_cond_ref_to_const(Item_bool_func2 *, Item *, Item *, + Item_bool_func2 *, Item *, Item *) + const override + { + MY_ASSERT_UNREACHABLE(); + return false; + } + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const + override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + Item_copy *create_item_copy(THD *, Item *) const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_val_bool(Item *item) const override + { + MY_ASSERT_UNREACHABLE(); + return false; + } + void Item_get_date(THD *, Item *, Temporal::Warn *, MYSQL_TIME *ltime, + date_mode_t) const override + { + MY_ASSERT_UNREACHABLE(); + set_zero_time(ltime, MYSQL_TIMESTAMP_NONE); + } + longlong Item_val_int_signed_typecast(Item *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + longlong Item_val_int_unsigned_typecast(Item *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + String *Item_func_hex_val_str_ascii(Item_func_hex *, String *) const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override + { + MY_ASSERT_UNREACHABLE(); + return 0.0; + } + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *ltime, + date_mode_t) const override + { + MY_ASSERT_UNREACHABLE(); + set_zero_time(ltime, MYSQL_TIMESTAMP_NONE); + } + + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const + override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + double Item_func_min_max_val_real(Item_func_min_max *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + longlong Item_func_min_max_val_int(Item_func_min_max *) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const override + { + MY_ASSERT_UNREACHABLE(); + return nullptr; + } + bool Item_func_min_max_get_date(THD *, Item_func_min_max*, MYSQL_TIME *, + date_mode_t) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_func_between_fix_length_and_dec(Item_func_between *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + longlong Item_func_between_val_int(Item_func_between *func) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *thd, const Item_func_in *f, uint nargs) const + override; + bool Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *) const + override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *) const + override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + + bool Item_func_signed_fix_length_and_dec(Item_func_signed *) const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_double_typecast_fix_length_and_dec(Item_double_typecast *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_float_typecast_fix_length_and_dec(Item_float_typecast *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_decimal_typecast_fix_length_and_dec(Item_decimal_typecast *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_char_typecast_fix_length_and_dec(Item_char_typecast *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_time_typecast_fix_length_and_dec(Item_time_typecast *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_date_typecast_fix_length_and_dec(Item_date_typecast *) const + override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + bool Item_datetime_typecast_fix_length_and_dec(Item_datetime_typecast *) + const override + { + MY_ASSERT_UNREACHABLE(); + return true; + } + + bool Item_func_plus_fix_length_and_dec(Item_func_plus *) const override; + bool Item_func_minus_fix_length_and_dec(Item_func_minus *) const override; + bool Item_func_mul_fix_length_and_dec(Item_func_mul *) const override; + bool Item_func_div_fix_length_and_dec(Item_func_div *) const override; + bool Item_func_mod_fix_length_and_dec(Item_func_mod *) const override; +}; + + +/* + A common parent class for numeric data type handlers +*/ +class Type_handler_numeric: public Type_handler +{ +public: + const Name &default_value() const override; + String *print_item_value(THD *thd, Item *item, String *str) const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + double Item_func_min_max_val_real(Item_func_min_max *) const override; + longlong Item_func_min_max_val_int(Item_func_min_max *) const override; + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const override; + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, + MYSQL_TIME *, date_mode_t fuzzydate) const + override; + virtual ~Type_handler_numeric() = default; + bool can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) const + override; + bool Item_func_between_fix_length_and_dec(Item_func_between *func) const + override; + bool Item_char_typecast_fix_length_and_dec(Item_char_typecast *) const + override; +}; + + +/*** Abstract classes for every XXX_RESULT */ + +class Type_handler_real_result: public Type_handler_numeric +{ +public: + Item_result result_type() const override{ return REAL_RESULT; } + Item_result cmp_type() const override { return REAL_RESULT; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_DOUBLE; + } + virtual ~Type_handler_real_result() = default; + const Type_handler *type_handler_for_comparison() const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + void Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *c, + const Field *field) + const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const override; + int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) + const override; + bool subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) + const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + decimal_digits_t Item_decimal_precision(const Item *item) const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + void Item_update_null_value(Item *item) const override; + int Item_save_in_field(Item *item, Field *field, bool no_conversions) + const override; + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) + const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override; + bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) + const override; + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const override; + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override; + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override; + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *) const override; + bool Item_func_signed_fix_length_and_dec(Item_func_signed *item) + const override; + bool Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) + const override; + bool Item_val_bool(Item *item) const override; + void Item_get_date(THD *thd, Item *item, Temporal::Warn *warn, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override; + longlong Item_val_int_signed_typecast(Item *item) const override; + longlong Item_val_int_unsigned_typecast(Item *item) const override; + String *Item_func_hex_val_str_ascii(Item_func_hex *item, String *str) + const override; + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override; + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override; + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t fuzzydate) + const override; + longlong Item_func_between_val_int(Item_func_between *func) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *, const Item_func_in *, uint nargs) + const override; + bool Item_func_in_fix_comparator_compatible_types(THD *thd, Item_func_in *) + const override; + + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *) const override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + bool Item_func_plus_fix_length_and_dec(Item_func_plus *) const override; + bool Item_func_minus_fix_length_and_dec(Item_func_minus *) const override; + bool Item_func_mul_fix_length_and_dec(Item_func_mul *) const override; + bool Item_func_div_fix_length_and_dec(Item_func_div *) const override; + bool Item_func_mod_fix_length_and_dec(Item_func_mod *) const override; +}; + + +class Type_handler_decimal_result: public Type_handler_numeric +{ +public: + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_STRING; + } + Item_result result_type() const override { return DECIMAL_RESULT; } + Item_result cmp_type() const override { return DECIMAL_RESULT; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *) const + override + { + return DYN_COL_DECIMAL; + } + virtual ~Type_handler_decimal_result() = default; + const Type_handler *type_handler_for_comparison() const override; + int stored_field_cmp_to_item(THD *, Field *field, Item *item) const override + { + VDec item_val(item); + return item_val.is_null() ? 0 : my_decimal(field).cmp(item_val.ptr()); + } + bool subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) + const override; + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) + const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + uint32 max_display_length(const Item *item) const override; + uint32 Item_decimal_notation_int_digits(const Item *item) const override; + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override + { + VDec va(a), vb(b); + return va.ptr() && vb.ptr() && !va.cmp(vb); + } + decimal_digits_t Item_decimal_precision(const Item *item) const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_str(item, protocol, buf); + } + void Item_update_null_value(Item *item) const override; + int Item_save_in_field(Item *item, Field *field, bool no_conversions) const + override; + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const + override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override; + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *) const override; + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override; + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override; + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance*) const override; + bool Item_val_bool(Item *item) const override + { + return VDec(item).to_bool(); + } + void Item_get_date(THD *thd, Item *item, Temporal::Warn *warn, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override; + longlong Item_val_int_signed_typecast(Item *item) const override; + longlong Item_val_int_unsigned_typecast(Item *item) const override + { + return VDec(item).to_longlong(true); + } + String *Item_func_hex_val_str_ascii(Item_func_hex *item, String *str) + const override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override; + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override; + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t fuzzydate) + const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) + const override; + longlong Item_func_between_val_int(Item_func_between *func) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *, const Item_func_in *, uint nargs) + const override; + bool Item_func_in_fix_comparator_compatible_types(THD *thd, Item_func_in *) + const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + bool Item_func_plus_fix_length_and_dec(Item_func_plus *) const override; + bool Item_func_minus_fix_length_and_dec(Item_func_minus *) const override; + bool Item_func_mul_fix_length_and_dec(Item_func_mul *) const override; + bool Item_func_div_fix_length_and_dec(Item_func_div *) const override; + bool Item_func_mod_fix_length_and_dec(Item_func_mod *) const override; +}; + + +class Type_limits_int +{ +private: + uint32 m_precision; + uint32 m_char_length; +public: + Type_limits_int(uint32 prec, uint32 nchars) + :m_precision(prec), m_char_length(nchars) + { } + uint32 precision() const { return m_precision; } + uint32 char_length() const { return m_char_length; } +}; + + +/* + UNDIGNED TINYINT: 0..255 digits=3 nchars=3 + SIGNED TINYINT : -128..127 digits=3 nchars=4 +*/ +class Type_limits_uint8: public Type_limits_int +{ +public: + Type_limits_uint8() + :Type_limits_int(MAX_TINYINT_WIDTH, MAX_TINYINT_WIDTH) + { } +}; + + +class Type_limits_sint8: public Type_limits_int +{ +public: + Type_limits_sint8() + :Type_limits_int(MAX_TINYINT_WIDTH, MAX_TINYINT_WIDTH + 1) + { } +}; + + +/* + UNDIGNED SMALLINT: 0..65535 digits=5 nchars=5 + SIGNED SMALLINT: -32768..32767 digits=5 nchars=6 +*/ +class Type_limits_uint16: public Type_limits_int +{ +public: + Type_limits_uint16() + :Type_limits_int(MAX_SMALLINT_WIDTH, MAX_SMALLINT_WIDTH) + { } +}; + + +class Type_limits_sint16: public Type_limits_int +{ +public: + Type_limits_sint16() + :Type_limits_int(MAX_SMALLINT_WIDTH, MAX_SMALLINT_WIDTH + 1) + { } +}; + + +/* + MEDIUMINT UNSIGNED 0 .. 16777215 digits=8 char_length=8 + MEDIUMINT SIGNED: -8388608 .. 8388607 digits=7 char_length=8 +*/ +class Type_limits_uint24: public Type_limits_int +{ +public: + Type_limits_uint24() + :Type_limits_int(MAX_MEDIUMINT_WIDTH, MAX_MEDIUMINT_WIDTH) + { } +}; + + +class Type_limits_sint24: public Type_limits_int +{ +public: + Type_limits_sint24() + :Type_limits_int(MAX_MEDIUMINT_WIDTH - 1, MAX_MEDIUMINT_WIDTH) + { } +}; + + +/* + UNSIGNED INT: 0..4294967295 digits=10 nchars=10 + SIGNED INT: -2147483648..2147483647 digits=10 nchars=11 +*/ +class Type_limits_uint32: public Type_limits_int +{ +public: + Type_limits_uint32() + :Type_limits_int(MAX_INT_WIDTH, MAX_INT_WIDTH) + { } +}; + + + +class Type_limits_sint32: public Type_limits_int +{ +public: + Type_limits_sint32() + :Type_limits_int(MAX_INT_WIDTH, MAX_INT_WIDTH + 1) + { } +}; + + +/* + UNSIGNED BIGINT: 0..18446744073709551615 digits=20 nchars=20 + SIGNED BIGINT: -9223372036854775808..9223372036854775807 digits=19 nchars=20 +*/ +class Type_limits_uint64: public Type_limits_int +{ +public: + Type_limits_uint64(): Type_limits_int(MAX_BIGINT_WIDTH, MAX_BIGINT_WIDTH) + { } +}; + + +class Type_limits_sint64: public Type_limits_int +{ +public: + Type_limits_sint64() + :Type_limits_int(MAX_BIGINT_WIDTH - 1, MAX_BIGINT_WIDTH) + { } +}; + + + +class Type_handler_int_result: public Type_handler_numeric +{ +public: + Item_result result_type() const override { return INT_RESULT; } + Item_result cmp_type() const override { return INT_RESULT; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) const override + { + return attr->unsigned_flag ? DYN_COL_UINT : DYN_COL_INT; + } + bool is_order_clause_position_type() const override { return true; } + bool is_limit_clause_valid_type() const override { return true; } + virtual ~Type_handler_int_result() = default; + const Type_handler *type_handler_for_comparison() const override; + int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) const override; + bool subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) + const override; + Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + decimal_digits_t Item_decimal_precision(const Item *item) const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + void Item_update_null_value(Item *item) const override; + int Item_save_in_field(Item *item, Field *field, bool no_conversions) const override; + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) const override; + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const override; + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override; + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override; + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *) const override; + bool Item_val_bool(Item *item) const override; + void Item_get_date(THD *thd, Item *item, Temporal::Warn *warn, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override; + longlong Item_val_int_signed_typecast(Item *item) const override; + longlong Item_val_int_unsigned_typecast(Item *item) const override; + String *Item_func_hex_val_str_ascii(Item_func_hex *item, String *str) const override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override; + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override; + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t fuzzydate) const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const override; + longlong Item_func_between_val_int(Item_func_between *func) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *, const Item_func_in *, uint nargs) const override; + bool Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *) const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *) const override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + bool Item_func_plus_fix_length_and_dec(Item_func_plus *) const override; + bool Item_func_minus_fix_length_and_dec(Item_func_minus *) const override; + bool Item_func_mul_fix_length_and_dec(Item_func_mul *) const override; + bool Item_func_div_fix_length_and_dec(Item_func_div *) const override; + bool Item_func_mod_fix_length_and_dec(Item_func_mod *) const override; + const Vers_type_handler *vers() const override { return &vers_type_trx; } +}; + + +class Type_handler_general_purpose_int: public Type_handler_int_result +{ +public: + bool type_can_have_auto_increment_attribute() const override { return true; } + virtual const Type_limits_int *type_limits_int() const= 0; + uint32 max_display_length(const Item *item) const override + { + return type_limits_int()->char_length(); + } + uint32 Item_decimal_notation_int_digits(const Item *item) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, + uint nitems) const override; + bool partition_field_check(const LEX_CSTRING &, Item *item_expr) + const override + { + return partition_field_check_result_type(item_expr, INT_RESULT); + } + bool partition_field_append_value(String *str, + Item *item_expr, + CHARSET_INFO *field_cs, + partition_value_print_mode_t) + const override; + const Vers_type_handler *vers() const override { return &vers_type_trx; } +}; + + +class Type_handler_temporal_result: public Type_handler +{ +protected: + decimal_digits_t Item_decimal_scale_with_seconds(const Item *item) const; + decimal_digits_t Item_divisor_precision_increment_with_seconds(const Item *) const; +public: + Item_result result_type() const override { return STRING_RESULT; } + Item_result cmp_type() const override { return TIME_RESULT; } + virtual ~Type_handler_temporal_result() = default; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + uint32 max_display_length(const Item *item) const override; + uint32 Item_decimal_notation_int_digits(const Item *item) const override; + bool can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) + const override; + bool subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) + const override; + bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) + const override; + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *) const override; + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override; + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override; + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *)const override; + bool Item_val_bool(Item *item) const override; + void Item_get_date(THD *thd, Item *item, Temporal::Warn *warn, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override; + longlong Item_val_int_signed_typecast(Item *item) const override; + longlong Item_val_int_unsigned_typecast(Item *item) const override; + String *Item_func_hex_val_str_ascii(Item_func_hex *, String *)const override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override; + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override; + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t) const override; + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, + MYSQL_TIME *, date_mode_t) const override; + bool Item_func_between_fix_length_and_dec(Item_func_between *)const override; + bool Item_func_in_fix_comparator_compatible_types(THD *, Item_func_in *) + const override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + bool Item_func_plus_fix_length_and_dec(Item_func_plus *) const override; + bool Item_func_minus_fix_length_and_dec(Item_func_minus *) const override; + bool Item_func_mul_fix_length_and_dec(Item_func_mul *) const override; + bool Item_func_div_fix_length_and_dec(Item_func_div *) const override; + bool Item_func_mod_fix_length_and_dec(Item_func_mod *) const override; + const Vers_type_handler *vers() const override; +}; + + +class Type_handler_string_result: public Type_handler +{ + decimal_digits_t Item_temporal_precision(THD *thd, Item *item, bool is_time) const; +public: + const Name &default_value() const override; + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_STRING; + } + Item_result result_type() const override { return STRING_RESULT; } + Item_result cmp_type() const override { return STRING_RESULT; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *) const + override + { + return DYN_COL_STRING; + } + CHARSET_INFO *charset_for_protocol(const Item *item) const override; + virtual ~Type_handler_string_result() = default; + const Type_handler *type_handler_for_comparison() const override; + int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) const + override; + const Type_handler * + type_handler_adjusted_to_max_octet_length(uint max_octet_length, + CHARSET_INFO *cs) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + bool is_packable() const override { return true; } + bool union_element_finalize(Item_type_holder* item) const override; + uint calc_key_length(const Column_definition &def) const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + uint32 max_display_length(const Item *item) const override; + /* + The next method returns 309 for long stringified doubles in scientific + notation, e.g. FORMAT('1e308', 2). + */ + uint32 Item_decimal_notation_int_digits(const Item *item) const override + { return 309; } + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + decimal_digits_t Item_time_precision(THD *thd, Item *item) const override + { + return Item_temporal_precision(thd, item, true); + } + decimal_digits_t Item_datetime_precision(THD *thd, Item *item) const override + { + return Item_temporal_precision(thd, item, false); + } + decimal_digits_t Item_decimal_precision(const Item *item) const override; + void Item_update_null_value(Item *item) const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override; + void Item_param_setup_conversion(THD *thd, Item_param *) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_str(item, protocol, buf); + } + int Item_save_in_field(Item *item, Field *field, bool no_conversions) const + override; + String *print_item_value(THD *thd, Item *item, String *str) const override + { + return print_item_value_csstr(thd, item, str); + } + bool can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) const + override; + bool subquery_type_allows_materialization(const Item *inner, + const Item *outer, + bool is_in_predicate) + const override; + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const + override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) const + override; + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const override; + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override; + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override; + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *) const override; + bool Item_func_signed_fix_length_and_dec(Item_func_signed *item) const + override; + bool Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const + override; + bool Item_val_bool(Item *item) const override; + void Item_get_date(THD *thd, Item *item, Temporal::Warn *warn, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override; + longlong Item_val_int_signed_typecast(Item *item) const override; + longlong Item_val_int_unsigned_typecast(Item *item) const override; + String *Item_func_hex_val_str_ascii(Item_func_hex *item, String *str) const + override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override; + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override; + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t fuzzydate) + const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const + override; + double Item_func_min_max_val_real(Item_func_min_max *) const override; + longlong Item_func_min_max_val_int(Item_func_min_max *) const override; + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const override; + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, + MYSQL_TIME *, date_mode_t fuzzydate) const + override; + bool Item_func_between_fix_length_and_dec(Item_func_between *func) const + override; + longlong Item_func_between_val_int(Item_func_between *func) const override; + bool Item_char_typecast_fix_length_and_dec(Item_char_typecast *) const + override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *, const Item_func_in *, uint nargs) const + override; + bool Item_func_in_fix_comparator_compatible_types(THD *thd, Item_func_in *) + const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *) const override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + bool Item_func_plus_fix_length_and_dec(Item_func_plus *) const override; + bool Item_func_minus_fix_length_and_dec(Item_func_minus *) const override; + bool Item_func_mul_fix_length_and_dec(Item_func_mul *) const override; + bool Item_func_div_fix_length_and_dec(Item_func_div *) const override; + bool Item_func_mod_fix_length_and_dec(Item_func_mod *) const override; + const Vers_type_handler *vers() const override; +}; + + +class Type_handler_general_purpose_string: public Type_handler_string_result +{ +public: + bool is_general_purpose_string_type() const override { return true; } + bool Column_definition_bulk_alter(Column_definition *c, + const Column_derived_attributes + *derived_attr, + const Column_bulk_alter_attributes + *bulk_alter_attr) + const override; +}; + + +/*** + Instantiable classes for every MYSQL_TYPE_XXX + + There are no Type_handler_xxx for the following types: + - MYSQL_TYPE_VAR_STRING (old VARCHAR) - mapped to MYSQL_TYPE_VARSTRING + - MYSQL_TYPE_ENUM - mapped to MYSQL_TYPE_VARSTRING + - MYSQL_TYPE_SET: - mapped to MYSQL_TYPE_VARSTRING + + because the functionality that currently uses Type_handler + (e.g. hybrid type functions) does not need to distinguish between + these types and VARCHAR. + For example: + CREATE TABLE t2 AS SELECT COALESCE(enum_column) FROM t1; + creates a VARCHAR column. + + There most likely be Type_handler_enum and Type_handler_set later, + when the Type_handler infrastructure gets used in more pieces of the code. +*/ + + +class Type_handler_tiny: public Type_handler_general_purpose_int +{ +public: + virtual ~Type_handler_tiny() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_TINY; } + const Type_handler *type_handler_unsigned() const override; + const Type_handler *type_handler_signed() const override; + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_TINY; + } + const Type_limits_int *type_limits_int() const override; + uint32 calc_pack_length(uint32 length) const override { return 1; } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 4; } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_tiny(item, protocol, buf); + } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_TINY); } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; +}; + + +class Type_handler_utiny: public Type_handler_tiny +{ +public: + uint flags() const override { return UNSIGNED_FLAG; } + const Type_limits_int *type_limits_int() const override; +}; + + +class Type_handler_short: public Type_handler_general_purpose_int +{ +public: + virtual ~Type_handler_short() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_SHORT; } + const Type_handler *type_handler_unsigned() const override; + const Type_handler *type_handler_signed() const override; + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_SHORT; + } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_short(item, protocol, buf); + } + const Type_limits_int *type_limits_int() const override; + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 6; } + uint32 calc_pack_length(uint32 length) const override{ return 2; } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_SHORT); } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; +}; + + +class Type_handler_ushort: public Type_handler_short +{ +public: + uint flags() const override { return UNSIGNED_FLAG; } + const Type_limits_int *type_limits_int() const override; +}; + + +class Type_handler_long: public Type_handler_general_purpose_int +{ +public: + virtual ~Type_handler_long() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_LONG; } + const Type_handler *type_handler_unsigned() const override; + const Type_handler *type_handler_signed() const override; + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_LONG; + } + const Type_limits_int *type_limits_int() const override; + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 11; } + uint32 calc_pack_length(uint32 length) const override { return 4; } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_long(item, protocol, buf); + } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_LONG); } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; +}; + + +class Type_handler_ulong: public Type_handler_long +{ +public: + uint flags() const override { return UNSIGNED_FLAG; } + const Type_limits_int *type_limits_int() const override; +}; + + +class Type_handler_bool: public Type_handler_long +{ +public: + bool is_bool_type() const override { return true; } + const Type_handler *type_handler_unsigned() const override; + const Type_handler *type_handler_signed() const override; + void Item_update_null_value(Item *item) const override; + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *) const override; +}; + + +class Type_handler_longlong: public Type_handler_general_purpose_int +{ +public: + virtual ~Type_handler_longlong() = default; + enum_field_types field_type() const override{ return MYSQL_TYPE_LONGLONG; } + const Type_handler *type_handler_unsigned() const override; + const Type_handler *type_handler_signed() const override; + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_LONGLONG; + } + const Type_limits_int *type_limits_int() const override; + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 20; } + uint32 calc_pack_length(uint32 length) const override { return 8; } + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_longlong(item, protocol, buf); + } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { + return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_LONGLONG); + } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; +}; + + +class Type_handler_ulonglong: public Type_handler_longlong +{ +public: + uint flags() const override { return UNSIGNED_FLAG; } + const Type_limits_int *type_limits_int() const override; +}; + + +class Type_handler_vers_trx_id: public Type_handler_ulonglong +{ +public: + virtual ~Type_handler_vers_trx_id() = default; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; +}; + + +class Type_handler_int24: public Type_handler_general_purpose_int +{ +public: + virtual ~Type_handler_int24() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_INT24; } + const Type_handler *type_handler_unsigned() const override; + const Type_handler *type_handler_signed() const override; + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_LONG; + } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_long(item, protocol, buf); + } + const Type_limits_int *type_limits_int() const override; + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 9; } + uint32 calc_pack_length(uint32 length) const override { return 3; } + Field *make_conversion_table_field(MEM_ROOT *mem_root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_INT24); } + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_uint24: public Type_handler_int24 +{ +public: + uint flags() const override { return UNSIGNED_FLAG; } + const Type_limits_int *type_limits_int() const override; +}; + + +class Type_handler_year: public Type_handler_int_result +{ +public: + virtual ~Type_handler_year() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_YEAR; } + uint flags() const override { return UNSIGNED_FLAG; } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_SHORT; + } + uint32 max_display_length(const Item *item) const override; + uint32 Item_decimal_notation_int_digits(const Item *item) const override + { return 4; }; + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 4; } + uint32 calc_pack_length(uint32 length) const override { return 1; } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_short(item, protocol, buf); + } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + void Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *c, + const Field *field) + const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_YEAR); } + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *)const override; + void Item_get_date(THD *thd, Item *item, Temporal::Warn *warn, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *item, + Temporal::Warn *, + MYSQL_TIME *to, + date_mode_t fuzzydate) + const override; + const Vers_type_handler *vers() const override { return NULL; } +}; + + +class Type_handler_bit: public Type_handler_int_result +{ +public: + virtual ~Type_handler_bit() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_BIT; } + uint flags() const override { return UNSIGNED_FLAG; } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_STRING; + } + uint32 max_display_length(const Item *item) const override; + uint32 Item_decimal_notation_int_digits(const Item *item) const override; + static uint32 Bit_decimal_notation_int_digits_by_nbits(uint nbits); + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override { return length / 8; } + uint calc_key_length(const Column_definition &def) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_str(item, protocol, buf); + } + String *print_item_value(THD *thd, Item *item, String *str) const override + { + return print_item_value_csstr(thd, item, str); + } + void show_binlog_type(const Conv_source &src, const Field &, String *str) + const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_float: public Type_handler_real_result +{ +public: + virtual ~Type_handler_float() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_FLOAT; } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_FLOAT; + } + bool type_can_have_auto_increment_attribute() const override { return true; } + uint32 max_display_length(const Item *item) const override { return 25; } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 12; } + uint32 Item_decimal_notation_int_digits(const Item *item) const override + { return 39; } + uint32 calc_pack_length(uint32 length) const override { return sizeof(float); } + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_float(item, protocol, buf); + } + Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_real(c, MYSQL_TYPE_FLOAT); } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) + const override; +}; + + +class Type_handler_double: public Type_handler_real_result +{ +public: + virtual ~Type_handler_double() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_DOUBLE; } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_DOUBLE; + } + bool type_can_have_auto_increment_attribute() const override { return true; } + uint32 max_display_length(const Item *item) const override { return 53; } + uint32 Item_decimal_notation_int_digits(const Item *item) const override + { return 309; } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 22; } + uint32 calc_pack_length(uint32 length) const override + { + return sizeof(double); + } + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_double(item, protocol, buf); + } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_real(c, MYSQL_TYPE_DOUBLE); } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) + const override; +}; + + +class Type_handler_time_common: public Type_handler_temporal_result +{ +public: + virtual ~Type_handler_time_common() = default; + const Name &default_value() const override; + enum_field_types field_type() const override { return MYSQL_TYPE_TIME; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_TIME; + } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_TIME; + } + enum_mysql_timestamp_type mysql_timestamp_type() const override + { + return MYSQL_TIMESTAMP_TIME; + } + bool is_val_native_ready() const override { return true; } + const Type_handler *type_handler_for_native_format() const override; + int cmp_native(const Native &a, const Native &b) const override; + bool Item_val_native_with_conversion(THD *thd, Item *, Native *to) + const override; + bool Item_val_native_with_conversion_result(THD *thd, Item *, Native *to) + const override; + bool Item_param_val_native(THD *thd, Item_param *item, Native *to) + const override; + bool partition_field_check(const LEX_CSTRING &, Item *item_expr) + const override + { + return partition_field_check_result_type(item_expr, STRING_RESULT); + } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Item_literal *create_literal_item(THD *thd, const char *str, size_t length, + CHARSET_INFO *cs, bool send_error) + const override; + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) + const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + decimal_digits_t Item_decimal_scale(const Item *item) const override + { + return Item_decimal_scale_with_seconds(item); + } + decimal_digits_t Item_decimal_precision(const Item *item) const override; + decimal_digits_t Item_divisor_precision_increment(const Item *item) const override + { + return Item_divisor_precision_increment_with_seconds(item); + } + const Type_handler *type_handler_for_comparison() const override; + int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) + const override; + void Column_definition_implicit_upgrade(Column_definition *c) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_time(item, protocol, buf); + } + void Item_update_null_value(Item *item) const override; + int Item_save_in_field(Item *item, Field *field, bool no_conversions) + const override; + String *print_item_value(THD *thd, Item *item, String *str) const override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + longlong Item_val_int_unsigned_typecast(Item *item) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override; + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *, + String *) const override; + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override; + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override; + my_decimal *Item_func_hybrid_field_type_val_decimal( + Item_func_hybrid_field_type *, + my_decimal *) const override; + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *, + date_mode_t fuzzydate) + const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const override; + double Item_func_min_max_val_real(Item_func_min_max *) const override; + longlong Item_func_min_max_val_int(Item_func_min_max *) const override; + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const override; + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, + MYSQL_TIME *, date_mode_t fuzzydate) + const override; + longlong Item_func_between_val_int(Item_func_between *func) const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) + const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *, const Item_func_in *, uint nargs) + const override; + void Item_param_set_param_func(Item_param *param, uchar **pos, ulong len) + const override; +}; + + +class Type_handler_time: public Type_handler_time_common +{ + /* number of bytes to store TIME(N) */ + static uint m_hires_bytes[MAX_DATETIME_PRECISION+1]; +public: + static uint hires_bytes(uint dec) { return m_hires_bytes[dec]; } + virtual ~Type_handler_time() = default; + const Name version() const override { return version_mariadb53(); } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return MIN_TIME_WIDTH; } + uint32 calc_pack_length(uint32 length) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_TIME); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_time2: public Type_handler_time_common +{ +public: + virtual ~Type_handler_time2() = default; + const Name version() const override { return version_mysql56(); } + enum_field_types real_field_type() const override { return MYSQL_TYPE_TIME2; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_TIME2); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_temporal_with_date: public Type_handler_temporal_result +{ +public: + virtual ~Type_handler_temporal_with_date() = default; + Item_literal *create_literal_item(THD *thd, const char *str, size_t length, + CHARSET_INFO *cs, bool send_error) + const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) + const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) + const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_date(item, protocol, buf); + } + void Item_update_null_value(Item *item) const override; + int Item_save_in_field(Item *item, Field *field, bool no_conversions) + const override; + Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) + const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *, const Item_func_in *, uint nargs) + const override; + longlong Item_func_between_val_int(Item_func_between *func) const override; +}; + + +class Type_handler_date_common: public Type_handler_temporal_with_date +{ +public: + virtual ~Type_handler_date_common() = default; + const Name &default_value() const override; + const Type_handler *type_handler_for_comparison() const override; + enum_field_types field_type() const override { return MYSQL_TYPE_DATE; } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return 3; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_DATE; + } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_DATE; + } + enum_mysql_timestamp_type mysql_timestamp_type() const override + { + return MYSQL_TIMESTAMP_DATE; + } + bool cond_notnull_field_isnull_to_field_eq_zero() const override + { + return true; + } + bool partition_field_check(const LEX_CSTRING &, Item *item_expr) + const override + { + return partition_field_check_result_type(item_expr, STRING_RESULT); + } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Item_literal *create_literal_item(THD *thd, const char *str, size_t length, + CHARSET_INFO *cs, bool send_error) + const override; + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) + const override; + bool validate_implicit_default_value(THD *thd, + const Column_definition &def) + const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + decimal_digits_t Item_decimal_precision(const Item *item) const override; + String *print_item_value(THD *thd, Item *item, String *str) const override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const override; + double Item_func_min_max_val_real(Item_func_min_max *) const override; + longlong Item_func_min_max_val_int(Item_func_min_max *) const override; + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) const + override; + bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) const + override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; +}; + +class Type_handler_date: public Type_handler_date_common +{ +public: + virtual ~Type_handler_date() = default; + uint32 calc_pack_length(uint32 length) const override { return 4; } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_DATE); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_newdate: public Type_handler_date_common +{ +public: + virtual ~Type_handler_newdate() = default; + enum_field_types real_field_type() const override + { + return MYSQL_TYPE_NEWDATE; + } + uint32 calc_pack_length(uint32 length) const override { return 3; } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_NEWDATE); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_datetime_common: public Type_handler_temporal_with_date +{ +public: + virtual ~Type_handler_datetime_common() = default; + const Name &default_value() const override; + const Type_handler *type_handler_for_comparison() const override; + enum_field_types field_type() const override + { + return MYSQL_TYPE_DATETIME; + } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_DATETIME; + } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_DATETIME; + } + enum_mysql_timestamp_type mysql_timestamp_type() const override + { + return MYSQL_TIMESTAMP_DATETIME; + } + bool cond_notnull_field_isnull_to_field_eq_zero() const override + { + return true; + } + bool partition_field_check(const LEX_CSTRING &, Item *item_expr) + const override + { + return partition_field_check_result_type(item_expr, STRING_RESULT); + } + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; + bool validate_implicit_default_value(THD *thd, const Column_definition &def) + const override; + void Column_definition_implicit_upgrade(Column_definition *c) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const override; + decimal_digits_t Item_decimal_scale(const Item *item) const override + { + return Item_decimal_scale_with_seconds(item); + } + decimal_digits_t Item_decimal_precision(const Item *item) const override; + decimal_digits_t Item_divisor_precision_increment(const Item *item) const override + { + return Item_divisor_precision_increment_with_seconds(item); + } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_datetime(item, protocol, buf); + } + String *print_item_value(THD *thd, Item *item, String *str) const override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const override; + double Item_func_min_max_val_real(Item_func_min_max *) const override; + longlong Item_func_min_max_val_int(Item_func_min_max *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, my_decimal *) + const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override; + void Item_param_set_param_func(Item_param *param, uchar **pos, ulong len) + const override; +}; + + +class Type_handler_datetime: public Type_handler_datetime_common +{ + /* number of bytes to store DATETIME(N) */ + static uint m_hires_bytes[MAX_DATETIME_PRECISION + 1]; +public: + static uint hires_bytes(uint dec) { return m_hires_bytes[dec]; } + virtual ~Type_handler_datetime() = default; + const Name version() const override { return version_mariadb53(); } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return MAX_DATETIME_WIDTH; } + uint32 calc_pack_length(uint32 length) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_DATETIME); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_datetime2: public Type_handler_datetime_common +{ +public: + virtual ~Type_handler_datetime2() = default; + const Name version() const override { return version_mysql56(); } + enum_field_types real_field_type() const override + { + return MYSQL_TYPE_DATETIME2; + } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_DATETIME2); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_timestamp_common: public Type_handler_temporal_with_date +{ +protected: + bool TIME_to_native(THD *, const MYSQL_TIME *from, Native *to, uint dec) const; +public: + virtual ~Type_handler_timestamp_common() = default; + const Name &default_value() const override; + const Type_handler *type_handler_for_comparison() const override; + const Type_handler *type_handler_for_native_format() const override; + enum_field_types field_type() const override { return MYSQL_TYPE_TIMESTAMP; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_DATETIME; + } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_DATETIME; + } + enum_mysql_timestamp_type mysql_timestamp_type() const override + { + return MYSQL_TIMESTAMP_DATETIME; + } + bool is_val_native_ready() const override + { + return true; + } + bool is_timestamp_type() const override + { + return true; + } + void Column_definition_implicit_upgrade(Column_definition *c) const override; + bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const override; + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override; + bool Item_val_native_with_conversion(THD *thd, Item *, Native *to) + const override; + bool Item_val_native_with_conversion_result(THD *thd, Item *, Native *to) + const override; + bool Item_param_val_native(THD *thd, Item_param *item, Native *to) + const override; + int cmp_native(const Native &a, const Native &b) const override; + longlong Item_func_between_val_int(Item_func_between *func) const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; + in_vector *make_in_vector(THD *thd, const Item_func_in *f, uint nargs) + const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *tmp) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + decimal_digits_t Item_decimal_scale(const Item *item) const override + { + return Item_decimal_scale_with_seconds(item); + } + decimal_digits_t Item_decimal_precision(const Item *item) const override; + decimal_digits_t Item_divisor_precision_increment(const Item *item) const override + { + return Item_divisor_precision_increment_with_seconds(item); + } + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override + { + return Item_send_timestamp(item, protocol, buf); + } + int Item_save_in_field(Item *item, Field *field, bool no_conversions) + const override; + String *print_item_value(THD *thd, Item *item, String *str) const override; + Item_cache *Item_get_cache(THD *thd, const Item *item) const override; + Item_copy *create_item_copy(THD *thd, Item *item) const override; + String *Item_func_min_max_val_str(Item_func_min_max *, String *) const override; + double Item_func_min_max_val_real(Item_func_min_max *) const override; + longlong Item_func_min_max_val_int(Item_func_min_max *) const override; + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *) const override; + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, + MYSQL_TIME *, date_mode_t fuzzydate) + const override; +}; + + +class Type_handler_timestamp: public Type_handler_timestamp_common +{ + /* number of bytes to store second_part part of the TIMESTAMP(N) */ + static uint m_sec_part_bytes[MAX_DATETIME_PRECISION + 1]; +public: + static uint sec_part_bytes(uint dec) { return m_sec_part_bytes[dec]; } + virtual ~Type_handler_timestamp() = default; + const Name version() const override { return version_mariadb53(); } + uint32 max_display_length_for_field(const Conv_source &src) const override + { return MAX_DATETIME_WIDTH; } + uint32 calc_pack_length(uint32 length) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_TIMESTAMP); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_timestamp2: public Type_handler_timestamp_common +{ +public: + virtual ~Type_handler_timestamp2() = default; + const Name version() const override { return version_mysql56(); } + enum_field_types real_field_type() const override + { + return MYSQL_TYPE_TIMESTAMP2; + } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { + return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_TIMESTAMP2); + } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_olddecimal: public Type_handler_decimal_result +{ +public: + virtual ~Type_handler_olddecimal() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_DECIMAL; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override { return length; } + const Type_handler *type_handler_for_tmp_table(const Item *item) const override; + const Type_handler *type_handler_for_union(const Item *item) const override; + void show_binlog_type(const Conv_source &src, const Field &, String *str) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_DECIMAL); } + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_newdecimal: public Type_handler_decimal_result +{ +public: + virtual ~Type_handler_newdecimal() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_NEWDECIMAL; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + uint calc_key_length(const Column_definition &def) const override; + void show_binlog_type(const Conv_source &src, const Field &, String *str) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_null: public Type_handler_general_purpose_string +{ +public: + virtual ~Type_handler_null() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_NULL; } + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_NULL; + } + const Type_handler *type_handler_for_comparison() const override; + const Type_handler *type_handler_for_tmp_table(const Item *item) const override; + const Type_handler *type_handler_for_union(const Item *) const override; + uint32 max_display_length(const Item *item) const override { return 0; } + uint32 max_display_length_for_field(const Conv_source &src) const override + { + return 0; + } + uint32 calc_pack_length(uint32 length) const override { return 0; } + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override; + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override; + bool Item_send(Item *item, Protocol *protocol, st_value *buf) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool union_element_finalize(Item_type_holder* item) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy(c, MYSQL_TYPE_NULL); } + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +class Type_handler_longstr: public Type_handler_general_purpose_string +{ +public: + bool type_can_have_key_part() const override + { + return true; + } +}; + + +class Type_handler_string: public Type_handler_longstr +{ +public: + virtual ~Type_handler_string() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_STRING; } + ulong KEY_pack_flags(uint column_nr) const override + { + return HA_PACK_KEY; + } + bool is_param_long_data_type() const override { return true; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override { return length; } + const Type_handler *type_handler_for_tmp_table(const Item *item) const + override + { + return varstring_type_handler(item); + } + bool partition_field_check(const LEX_CSTRING &, Item *item_expr) + const override + { + return partition_field_check_result_type(item_expr, STRING_RESULT); + } + void show_binlog_type(const Conv_source &src, const Field &dst, String *str) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_set_attributes(THD *thd, + Column_definition *def, + const Lex_field_type_st &attr, + column_definition_type_t type) + const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + bool Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +/* Old varchar */ +class Type_handler_var_string: public Type_handler_string +{ +public: + virtual ~Type_handler_var_string() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_VAR_STRING; } + enum_field_types real_field_type() const override { return MYSQL_TYPE_STRING; } + enum_field_types traditional_merge_field_type() const override + { + return MYSQL_TYPE_VARCHAR; + } + const Type_handler *type_handler_for_tmp_table(const Item *item) const override + { + return varstring_type_handler(item); + } + uint32 max_display_length_for_field(const Conv_source &src) const override; + void show_binlog_type(const Conv_source &src, const Field &dst, String *str) + const override; + void Column_definition_implicit_upgrade(Column_definition *c) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override + { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_STRING); } + const Type_handler *type_handler_for_union(const Item *item) const override + { + return varstring_type_handler(item); + } +}; + + +class Type_handler_varchar: public Type_handler_longstr +{ +public: + virtual ~Type_handler_varchar() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_VARCHAR; } + ulong KEY_pack_flags(uint column_nr) const override + { + if (column_nr == 0) + return HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY; + return HA_PACK_KEY; + } + enum_field_types type_code_for_protocol() const override + { + return MYSQL_TYPE_VAR_STRING; // Keep things compatible for old clients + } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override + { + return (length + (length < 256 ? 1: 2)); + } + const Type_handler *type_handler_for_tmp_table(const Item *item) const + override + { + return varstring_type_handler(item); + } + const Type_handler *type_handler_for_union(const Item *item) const override + { + return varstring_type_handler(item); + } + bool is_param_long_data_type() const override { return true; } + bool partition_field_check(const LEX_CSTRING &, Item *item_expr) + const override + { + return partition_field_check_result_type(item_expr, STRING_RESULT); + } + void show_binlog_type(const Conv_source &src, const Field &dst, String *str) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + bool Column_definition_set_attributes(THD *thd, + Column_definition *def, + const Lex_field_type_st &attr, + column_definition_type_t type) + const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + bool Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + bool adjust_spparam_type(Spvar_definition *def, Item *from) const override; +}; + + +class Type_handler_hex_hybrid: public Type_handler_varchar +{ +public: + virtual ~Type_handler_hex_hybrid() = default; + const Type_handler *cast_to_int_type_handler() const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; +}; + + +class Type_handler_varchar_compressed: public Type_handler_varchar +{ +public: + enum_field_types real_field_type() const override + { + return MYSQL_TYPE_VARCHAR_COMPRESSED; + } + ulong KEY_pack_flags(uint column_nr) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + uint32 max_display_length_for_field(const Conv_source &src) const override; + bool partition_field_check(const LEX_CSTRING &field_name, Item *) + const override + { + partition_field_type_not_allowed(field_name); + return true; + } + void show_binlog_type(const Conv_source &src, const Field &dst, String *str) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + DBUG_ASSERT(0); + return DYN_COL_STRING; + } +}; + + +class Type_handler_blob_common: public Type_handler_longstr +{ +public: + virtual ~Type_handler_blob_common() = default; + virtual uint length_bytes() const= 0; + ulong KEY_pack_flags(uint column_nr) const override + { + if (column_nr == 0) + return HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY; + return HA_PACK_KEY; + } + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + const Type_handler *type_handler_for_tmp_table(const Item *item) const + override + { + return blob_type_handler(item); + } + const Type_handler *type_handler_for_union(const Item *item) const override + { + return blob_type_handler(item); + } + bool subquery_type_allows_materialization(const Item *, const Item *, bool) + const override + { + return false; // Materialization does not work with BLOB columns + } + bool is_param_long_data_type() const override { return true; } + uint calc_key_length(const Column_definition &def) const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + bool Key_part_spec_init_ft(Key_part_spec *part, + const Column_definition &def) const override; + bool Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *has_key_needed) const override; + bool Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) const + override; + void Item_param_setup_conversion(THD *thd, Item_param *) const override; + + bool partition_field_check(const LEX_CSTRING &field_name, + Item *item_expr) const override; + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + const Vers_type_handler *vers() const override; +}; + + +class Type_handler_tiny_blob: public Type_handler_blob_common +{ +public: + virtual ~Type_handler_tiny_blob() = default; + uint length_bytes() const override { return 1; } + enum_field_types field_type() const override { return MYSQL_TYPE_TINY_BLOB; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + uint max_octet_length() const override { return UINT_MAX8; } +}; + + +class Type_handler_medium_blob: public Type_handler_blob_common +{ +public: + virtual ~Type_handler_medium_blob() = default; + uint length_bytes() const override { return 3; } + enum_field_types field_type() const override + { return MYSQL_TYPE_MEDIUM_BLOB; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + uint max_octet_length() const override { return UINT_MAX24; } +}; + + +class Type_handler_long_blob: public Type_handler_blob_common +{ +public: + virtual ~Type_handler_long_blob() = default; + uint length_bytes() const override { return 4; } + enum_field_types field_type() const override { return MYSQL_TYPE_LONG_BLOB; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + uint max_octet_length() const override { return UINT_MAX32; } +}; + + +class Type_handler_blob: public Type_handler_blob_common +{ +public: + virtual ~Type_handler_blob() = default; + uint length_bytes() const override { return 2; } + enum_field_types field_type() const override { return MYSQL_TYPE_BLOB; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + uint max_octet_length() const override { return UINT_MAX16; } +}; + + +class Type_handler_blob_compressed: public Type_handler_blob +{ +public: + enum_field_types real_field_type() const override + { + return MYSQL_TYPE_BLOB_COMPRESSED; + } + ulong KEY_pack_flags(uint) const override + { + MY_ASSERT_UNREACHABLE(); + return 0; + } + uint32 max_display_length_for_field(const Conv_source &src) const override; + void show_binlog_type(const Conv_source &src, const Field &, String *str) + const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + enum_dynamic_column_type dyncol_type(const Type_all_attributes *) + const override + { + DBUG_ASSERT(0); + return DYN_COL_STRING; + } +}; + + +class Type_handler_typelib: public Type_handler_general_purpose_string +{ +public: + virtual ~Type_handler_typelib() = default; + enum_field_types field_type() const override { return MYSQL_TYPE_STRING; } + const Type_handler *type_handler_for_item_field() const override; + const Type_handler *cast_to_int_type_handler() const override; + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val*) const override; + uint32 max_display_length_for_field(const Conv_source &src) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *, + Type_all_attributes *atrr, + Item **items, uint nitems) + const override; + void Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *c, + const Field *field) + const override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) + const override; + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + const Vers_type_handler *vers() const override { return NULL; } +}; + + +class Type_handler_enum: public Type_handler_typelib +{ +public: + virtual ~Type_handler_enum() = default; + enum_field_types real_field_type() const override { return MYSQL_TYPE_ENUM; } + enum_field_types traditional_merge_field_type() const override + { + return MYSQL_TYPE_ENUM; + } + uint32 calc_pack_length(uint32 length) const override; + uint calc_key_length(const Column_definition &def) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) + const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + Field *make_schema_field(MEM_ROOT *root, + TABLE *table, + const Record_addr &addr, + const ST_FIELD_INFO &def) const override; +}; + + +class Type_handler_set: public Type_handler_typelib +{ +public: + virtual ~Type_handler_set() = default; + enum_field_types real_field_type() const override { return MYSQL_TYPE_SET; } + enum_field_types traditional_merge_field_type() const override + { + return MYSQL_TYPE_SET; + } + uint32 calc_pack_length(uint32 length) const override; + uint calc_key_length(const Column_definition &def) const override; + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) + const override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; +}; + + +// A pseudo type handler, mostly for test purposes for now +class Type_handler_interval_DDhhmmssff: public Type_handler_long_blob +{ +public: + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override; +}; + + +class Function_collection +{ +public: + virtual ~Function_collection() = default; + virtual bool init()= 0; + virtual void cleanup()= 0; + virtual Create_func *find_native_function_builder(THD *thd, + const LEX_CSTRING &name) + const= 0; +}; + + +class Type_collection +{ +public: + virtual ~Type_collection() = default; + virtual bool init(Type_handler_data *) { return false; } + virtual const Type_handler *aggregate_for_result(const Type_handler *h1, + const Type_handler *h2) + const= 0; + virtual const Type_handler *aggregate_for_comparison(const Type_handler *h1, + const Type_handler *h2) + const= 0; + virtual const Type_handler *aggregate_for_min_max(const Type_handler *h1, + const Type_handler *h2) + const= 0; + virtual const Type_handler *aggregate_for_num_op(const Type_handler *h1, + const Type_handler *h2) + const= 0; +}; + + +/** + A handler for hybrid type functions, e.g. + COALESCE(), IF(), IFNULL(), NULLIF(), CASE, + numeric operators, + UNIX_TIMESTAMP(), TIME_TO_SEC(). + + Makes sure that field_type(), cmp_type() and result_type() + are always in sync to each other for hybrid functions. +*/ +class Type_handler_hybrid_field_type +{ + const Type_handler *m_type_handler; + bool aggregate_for_min_max(const Type_handler *other); + +public: + Type_handler_hybrid_field_type(); + Type_handler_hybrid_field_type(const Type_handler *handler) + :m_type_handler(handler) + { } + Type_handler_hybrid_field_type(const Type_handler_hybrid_field_type *other) + :m_type_handler(other->m_type_handler) + { } + void swap(Type_handler_hybrid_field_type &other) + { + swap_variables(const Type_handler *, m_type_handler, other.m_type_handler); + } + const Type_handler *type_handler() const { return m_type_handler; } + enum_field_types real_field_type() const + { + return m_type_handler->real_field_type(); + } + Item_result cmp_type() const { return m_type_handler->cmp_type(); } + enum_mysql_timestamp_type mysql_timestamp_type() const + { + return m_type_handler->mysql_timestamp_type(); + } + bool is_timestamp_type() const + { + return m_type_handler->is_timestamp_type(); + } + void set_handler(const Type_handler *other) + { + m_type_handler= other; + } + const Type_handler *set_handler_by_field_type(enum_field_types type) + { + return (m_type_handler= Type_handler::get_handler_by_field_type(type)); + } + const Type_handler *set_handler_by_real_type(enum_field_types type) + { + return (m_type_handler= Type_handler::get_handler_by_real_type(type)); + } + bool aggregate_for_comparison(const Type_handler *other); + bool aggregate_for_comparison(const LEX_CSTRING &funcname, + Item **items, uint nitems, + bool treat_int_to_uint_as_decimal); + bool aggregate_for_result(const Type_handler *other); + bool aggregate_for_result(const LEX_CSTRING &funcname, + Item **item, uint nitems, bool treat_bit_as_number); + bool aggregate_for_min_max(const LEX_CSTRING &funcname, Item **item, + uint nitems); + + bool aggregate_for_num_op(const class Type_aggregator *aggregator, + const Type_handler *h0, const Type_handler *h1); +}; + + +class Type_handler_pair +{ + const Type_handler *m_a; + const Type_handler *m_b; +public: + Type_handler_pair(const Type_handler *a, + const Type_handler *b) + :m_a(a), m_b(b) + { } + const Type_handler *a() const { return m_a; } + const Type_handler *b() const { return m_b; } + /* + Change both handlers to their parent data type handlers, if available. + For example, VARCHAR/JSON -> VARCHAR. + @returns The number of handlers changed (0,1 or 2). + */ + bool to_base() + { + bool rc= false; + const Type_handler *na= m_a->type_handler_base(); + const Type_handler *nb= m_b->type_handler_base(); + if (na) + { + m_a= na; rc= true; + } + if (nb) + { + m_b= nb; rc= true; + } + return rc; + } +}; + + +/* + Helper template to simplify creating builtin types with names. + Plugin types inherit from Type_handler_xxx types that do not set the name in + the constructor, as sql_plugin.cc sets the type name from the plugin name. +*/ +template +class Named_type_handler : public TypeHandler +{ + public: + Named_type_handler(const char *n) : TypeHandler() + { Type_handler::set_name(Name(n, static_cast(strlen(n)))); } +}; + +extern Named_type_handler type_handler_row; +extern Named_type_handler type_handler_null; + +extern Named_type_handler type_handler_float; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_double; + +extern Named_type_handler type_handler_bit; + +extern Named_type_handler type_handler_enum; +extern Named_type_handler type_handler_set; + +extern Named_type_handler type_handler_string; +extern Named_type_handler type_handler_var_string; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_varchar; +extern Named_type_handler type_handler_varchar_compressed; +extern Named_type_handler type_handler_hex_hybrid; + +extern Named_type_handler type_handler_tiny_blob; +extern Named_type_handler type_handler_medium_blob; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_long_blob; +extern Named_type_handler type_handler_blob; +extern Named_type_handler type_handler_blob_compressed; + +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_bool; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_stiny; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_sshort; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_sint24; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_slong; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_slonglong; + +extern Named_type_handler type_handler_utiny; +extern Named_type_handler type_handler_ushort; +extern Named_type_handler type_handler_uint24; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_ulong; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_ulonglong; +extern Named_type_handler type_handler_vers_trx_id; + +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_newdecimal; +extern Named_type_handler type_handler_olddecimal; + +extern Named_type_handler type_handler_year; +extern Named_type_handler type_handler_year2; +extern Named_type_handler type_handler_newdate; +extern Named_type_handler type_handler_date; +extern Named_type_handler type_handler_time; +extern Named_type_handler type_handler_time2; +extern Named_type_handler type_handler_datetime; +extern Named_type_handler type_handler_datetime2; + +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_timestamp; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_timestamp2; + +extern Type_handler_interval_DDhhmmssff type_handler_interval_DDhhmmssff; + +class Type_aggregator +{ + bool m_is_commutative; +public: + class Pair + { + public: + const Type_handler *m_handler1; + const Type_handler *m_handler2; + const Type_handler *m_result; + Pair() = default; + Pair(const Type_handler *handler1, + const Type_handler *handler2, + const Type_handler *result) + :m_handler1(handler1), m_handler2(handler2), m_result(result) + { } + bool eq(const Type_handler *handler1, const Type_handler *handler2) const + { + return m_handler1 == handler1 && m_handler2 == handler2; + } + }; + static const Type_handler * + find_handler_in_array(const Type_aggregator::Pair *pairs, + const Type_handler *h1, + const Type_handler *h2, + bool commutative) + { + for (const Type_aggregator::Pair *p= pairs; p->m_result; p++) + { + if (p->eq(h1, h2)) + return p->m_result; + if (commutative && p->eq(h2, h1)) + return p->m_result; + } + return NULL; + } + +private: + Dynamic_array m_array; + const Pair* find_pair(const Type_handler *handler1, + const Type_handler *handler2) const; +public: + Type_aggregator(bool is_commutative= false) + :m_is_commutative(is_commutative), m_array(PSI_INSTRUMENT_MEM) + { } + bool add(const Type_handler *handler1, + const Type_handler *handler2, + const Type_handler *result) + { + return m_array.append(Pair(handler1, handler2, result)); + } + const Type_handler *find_handler(const Type_handler *handler1, + const Type_handler *handler2) const + { + const Pair* el= find_pair(handler1, handler2); + return el ? el->m_result : NULL; + } + bool is_commutative() const { return m_is_commutative; } +}; + + +class Type_aggregator_commutative: public Type_aggregator +{ +public: + Type_aggregator_commutative() + :Type_aggregator(true) + { } +}; + + +class Type_handler_data +{ +public: + Type_aggregator_commutative m_type_aggregator_for_result; + Type_aggregator_commutative m_type_aggregator_for_comparison; + + Type_aggregator_commutative m_type_aggregator_for_plus; + Type_aggregator_commutative m_type_aggregator_for_mul; + + Type_aggregator m_type_aggregator_for_minus; + Type_aggregator m_type_aggregator_for_div; + Type_aggregator m_type_aggregator_for_mod; +#ifndef DBUG_OFF + // This is used for mtr purposes in debug builds + Type_aggregator m_type_aggregator_non_commutative_test; +#endif + bool init(); +}; + +extern Type_handler_data *type_handler_data; + +#endif /* SQL_TYPE_H_INCLUDED */ diff --git a/sql/sql_type_fixedbin.h b/sql/sql_type_fixedbin.h new file mode 100644 index 00000000..88c24ba0 --- /dev/null +++ b/sql/sql_type_fixedbin.h @@ -0,0 +1,1921 @@ +#ifndef SQL_TYPE_FIXEDBIN_H +#define SQL_TYPE_FIXEDBIN_H +/* Copyright (c) 2019,2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This is a common code for plugin (?) types that are generally + handled like strings, but have their own fixed size on-disk binary storage + format and their own (variable size) canonical string representation. + + Examples are INET6 and UUID types. +*/ + +#define MYSQL_SERVER +#include "sql_class.h" // THD, SORT_FIELD_ATTR +#include "opt_range.h" // SEL_ARG, null_element +#include "sql_type_fixedbin_storage.h" + +/***********************************************************************/ + + +template class Type_collection_fbt; + +template > +class Type_handler_fbt: public Type_handler +{ + /* =[ internal helper classes ]=============================== */ + +public: + class Fbt: public FbtImpl + { + protected: + using FbtImpl::m_buffer; + bool make_from_item(Item *item, bool warn) + { + if (item->type_handler() == singleton()) + { + Native tmp(m_buffer, sizeof(m_buffer)); + bool rc= item->val_native(current_thd, &tmp); + if (rc) + return true; + DBUG_ASSERT(tmp.length() == sizeof(m_buffer)); + if (tmp.ptr() != m_buffer) + memcpy(m_buffer, tmp.ptr(), sizeof(m_buffer)); + return false; + } + StringBuffer tmp; + String *str= item->val_str(&tmp); + return str ? make_from_character_or_binary_string(str, warn) : true; + } + + bool character_string_to_fbt(const char *str, size_t str_length, + CHARSET_INFO *cs) + { + if (cs->state & MY_CS_NONASCII) + { + char tmp[FbtImpl::max_char_length()+1]; + String_copier copier; + uint length= copier.well_formed_copy(&my_charset_latin1, tmp, sizeof(tmp), + cs, str, str_length); + return FbtImpl::ascii_to_fbt(tmp, length); + } + return FbtImpl::ascii_to_fbt(str, str_length); + } + bool make_from_character_or_binary_string(const String *str, bool warn) + { + if (str->charset() != &my_charset_bin) + { + bool rc= character_string_to_fbt(str->ptr(), str->length(), + str->charset()); + if (rc && warn) + current_thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + singleton()->name().ptr(), ErrConvString(str).ptr()); + return rc; + } + if (str->length() != sizeof(m_buffer)) + { + if (warn) + current_thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + singleton()->name().ptr(), ErrConvString(str).ptr()); + return true; + } + DBUG_ASSERT(str->ptr() != m_buffer); + memcpy(m_buffer, str->ptr(), sizeof(m_buffer)); + return false; + } + bool binary_to_fbt(const char *str, size_t length) + { + if (length != sizeof(m_buffer)) + return true; + memcpy(m_buffer, str, length); + return false; + } + + Fbt() { } + + public: + + static Fbt zero() + { + Fbt fbt; + fbt.set_zero(); + return fbt; + } + + static Fbt record_to_memory(const char *ptr) + { + Fbt fbt; + FbtImpl::record_to_memory(fbt.m_buffer, ptr); + return fbt; + } + /* + Check at Item's fix_fields() time if "item" can return a nullable value + on conversion to Fbt, or conversion produces a NOT NULL Fbt value. + */ + static bool fix_fields_maybe_null_on_conversion_to_fbt(Item *item) + { + if (item->maybe_null()) + return true; + if (item->type_handler() == singleton()) + return false; + if (!item->const_item() || item->is_expensive()) + return true; + return Fbt_null(item, false).is_null(); + } + + public: + + Fbt(Item *item, bool *error, bool warn= true) + { + *error= make_from_item(item, warn); + } + void to_record(char *str, size_t str_size) const + { + DBUG_ASSERT(str_size >= sizeof(m_buffer)); + FbtImpl::memory_to_record(str, m_buffer); + } + bool to_binary(String *to) const + { + return to->copy(m_buffer, sizeof(m_buffer), &my_charset_bin); + } + bool to_native(Native *to) const + { + return to->copy(m_buffer, sizeof(m_buffer)); + } + bool to_string(String *to) const + { + to->set_charset(&my_charset_latin1); + if (to->alloc(FbtImpl::max_char_length()+1)) + return true; + to->length((uint32) FbtImpl::to_string(const_cast(to->ptr()), + FbtImpl::max_char_length()+1)); + return false; + } + int cmp(const Binary_string &other) const + { + return FbtImpl::cmp(FbtImpl::to_lex_cstring(), other.to_lex_cstring()); + } + int cmp(const Fbt &other) const + { + return FbtImpl::cmp(FbtImpl::to_lex_cstring(), other.to_lex_cstring()); + } + }; + + class Fbt_null: public Fbt, public Null_flag + { + public: + // Initialize from a text representation + Fbt_null(const char *str, size_t length, CHARSET_INFO *cs) + :Null_flag(Fbt::character_string_to_fbt(str, length, cs)) { } + Fbt_null(const String &str) + :Fbt_null(str.ptr(), str.length(), str.charset()) { } + // Initialize from a binary representation + Fbt_null(const char *str, size_t length) + :Null_flag(Fbt::binary_to_fbt(str, length)) { } + Fbt_null(const Binary_string &str) + :Fbt_null(str.ptr(), str.length()) { } + // Initialize from an Item + Fbt_null(Item *item, bool warn= true) + :Null_flag(Fbt::make_from_item(item, warn)) { } + public: + const Fbt& to_fbt() const + { + DBUG_ASSERT(!is_null()); + return *this; + } + void to_record(char *str, size_t str_size) const + { + to_fbt().to_record(str, str_size); + } + bool to_binary(String *to) const + { + return to_fbt().to_binary(to); + } + bool to_string(String *to) const + { + return to_fbt().to_string(to); + } + }; + + /* =[ API classes ]=========================================== */ + + class Type_std_attributes_fbt: public Type_std_attributes + { + public: + Type_std_attributes_fbt() + :Type_std_attributes( + Type_numeric_attributes(FbtImpl::max_char_length(), 0, true), + DTCollation_numeric()) + { } + }; + + class Item_literal_fbt: public Item_literal + { + Fbt m_value; + public: + Item_literal_fbt(THD *thd) + :Item_literal(thd), + m_value(Fbt::zero()) + { } + Item_literal_fbt(THD *thd, const Fbt &value) + :Item_literal(thd), + m_value(value) + { } + const Type_handler *type_handler() const override + { + return singleton(); + } + longlong val_int() override + { + return 0; + } + double val_real() override + { + return 0; + } + String *val_str(String *to) override + { + return m_value.to_string(to) ? NULL : to; + } + my_decimal *val_decimal(my_decimal *to) override + { + my_decimal_set_zero(to); + return to; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + return false; + } + bool val_native(THD *thd, Native *to) override + { + return m_value.to_native(to); + } + void print(String *str, enum_query_type query_type) override + { + StringBuffer tmp; + tmp.append(singleton()->name().lex_cstring()); + my_caseup_str(&my_charset_latin1, tmp.c_ptr()); + str->append(tmp); + str->append('\''); + m_value.to_string(&tmp); + str->append(tmp); + str->append('\''); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + + // Non-overriding methods + void set_value(const Fbt &value) + { + m_value= value; + } + }; + + class Field_fbt: public Field + { + static void set_min_value(char *ptr) + { + memset(ptr, 0, FbtImpl::binary_length()); + } + static void set_max_value(char *ptr) + { + memset(ptr, 0xFF, FbtImpl::binary_length()); + } + void store_warning(const ErrConv &str, + Sql_condition::enum_warning_level level) + { + if (get_thd()->count_cuted_fields <= CHECK_FIELD_EXPRESSION) + return; + const TABLE_SHARE *s= table->s; + static const Name type_name= singleton()->name(); + get_thd()->push_warning_truncated_value_for_field(level, type_name.ptr(), + str.ptr(), s ? s->db.str : nullptr, s ? s->table_name.str : nullptr, + field_name.str); + } + int set_null_with_warn(const ErrConv &str) + { + store_warning(str, Sql_condition::WARN_LEVEL_WARN); + set_null(); + return 1; + } + int set_min_value_with_warn(const ErrConv &str) + { + store_warning(str, Sql_condition::WARN_LEVEL_WARN); + set_min_value((char*) ptr); + return 1; + } + int set_max_value_with_warn(const ErrConv &str) + { + store_warning(str, Sql_condition::WARN_LEVEL_WARN); + set_max_value((char*) ptr); + return 1; + } + int store_fbt_null_with_warn(const Fbt_null &fbt, + const ErrConvString &err) + { + DBUG_ASSERT(marked_for_write_or_computed()); + if (fbt.is_null()) + return maybe_null() ? set_null_with_warn(err) + : set_min_value_with_warn(err); + fbt.to_record((char *) ptr, FbtImpl::binary_length()); + return 0; + } + + public: + Field_fbt(const LEX_CSTRING *field_name_arg, const Record_addr &rec) + :Field(rec.ptr(), FbtImpl::max_char_length(), + rec.null_ptr(), rec.null_bit(), Field::NONE, field_name_arg) + { + flags|= BINARY_FLAG | UNSIGNED_FLAG; + } + const Type_handler *type_handler() const override + { + return singleton(); + } + uint32 max_display_length() const override { return field_length; } + bool str_needs_quotes() const override { return true; } + const DTCollation &dtcollation() const override + { + static DTCollation_numeric c; + return c; + } + CHARSET_INFO *charset(void) const override { return &my_charset_numeric; } + const CHARSET_INFO *sort_charset(void) const override { return &my_charset_bin; } + /** + This makes client-server protocol convert the value according + to @@character_set_client. + */ + bool binary() const override { return false; } + enum ha_base_keytype key_type() const override { return HA_KEYTYPE_BINARY; } + + bool is_equal(const Column_definition &new_field) const override + { + return new_field.type_handler() == type_handler(); + } + bool eq_def(const Field *field) const override + { + return Field::eq_def(field); + } + double pos_in_interval(Field *min, Field *max) override + { + return pos_in_interval_val_str(min, max, 0); + } + int cmp(const uchar *a, const uchar *b) const override + { return memcmp(a, b, pack_length()); } + + void sort_string(uchar *to, uint length) override + { + DBUG_ASSERT(length == pack_length()); + memcpy(to, ptr, length); + } + uint32 pack_length() const override + { + return FbtImpl::binary_length(); + } + uint pack_length_from_metadata(uint field_metadata) const override + { + return FbtImpl::binary_length(); + } + + void sql_type(String &str) const override + { + static Name name= singleton()->name(); + str.set_ascii(name.ptr(), name.length()); + } + + void make_send_field(Send_field *to) override + { + Field::make_send_field(to); + to->set_data_type_name(singleton()->name().lex_cstring()); + } + + bool validate_value_in_record(THD *thd, const uchar *record) const override + { + return false; + } + + bool val_native(Native *to) override + { + DBUG_ASSERT(marked_for_read()); + if (to->alloc(FbtImpl::binary_length())) + return true; + to->length(FbtImpl::binary_length()); + FbtImpl::record_to_memory((char*) to->ptr(), (const char*) ptr); + return false; + } + + Fbt to_fbt() const + { + DBUG_ASSERT(marked_for_read()); + return Fbt::record_to_memory((const char*) ptr); + } + + String *val_str(String *val_buffer, String *) override + { + return to_fbt().to_string(val_buffer) ? NULL : val_buffer; + } + + my_decimal *val_decimal(my_decimal *to) override + { + DBUG_ASSERT(marked_for_read()); + my_decimal_set_zero(to); + return to; + } + + longlong val_int() override + { + DBUG_ASSERT(marked_for_read()); + return 0; + } + + double val_real() override + { + DBUG_ASSERT(marked_for_read()); + return 0; + } + + bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + DBUG_ASSERT(marked_for_read()); + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + return false; + } + + bool val_bool(void) override + { + DBUG_ASSERT(marked_for_read()); + return !Fbt::only_zero_bytes((const char *) ptr, FbtImpl::binary_length()); + } + + int store_native(const Native &value) override + { + DBUG_ASSERT(marked_for_write_or_computed()); + DBUG_ASSERT(value.length() == FbtImpl::binary_length()); + FbtImpl::memory_to_record((char*) ptr, value.ptr()); + return 0; + } + + int store(const char *str, size_t length, CHARSET_INFO *cs) override + { + return cs == &my_charset_bin ? store_binary(str, length) + : store_text(str, length, cs); + } + + int store_text(const char *str, size_t length, CHARSET_INFO *cs) override + { + return store_fbt_null_with_warn(Fbt_null(str, length, cs), + ErrConvString(str, length, cs)); + } + + int store_binary(const char *str, size_t length) override + { + return store_fbt_null_with_warn(Fbt_null(str, length), + ErrConvString(str, length, + &my_charset_bin)); + } + + int store_hex_hybrid(const char *str, size_t length) override + { + return Field_fbt::store_binary(str, length); + } + + int store_decimal(const my_decimal *num) override + { + DBUG_ASSERT(marked_for_write_or_computed()); + return set_min_value_with_warn(ErrConvDecimal(num)); + } + + int store(longlong nr, bool unsigned_flag) override + { + DBUG_ASSERT(marked_for_write_or_computed()); + return set_min_value_with_warn( + ErrConvInteger(Longlong_hybrid(nr, unsigned_flag))); + } + + int store(double nr) override + { + DBUG_ASSERT(marked_for_write_or_computed()); + return set_min_value_with_warn(ErrConvDouble(nr)); + } + + int store_time_dec(const MYSQL_TIME *ltime, uint dec) override + { + DBUG_ASSERT(marked_for_write_or_computed()); + return set_min_value_with_warn(ErrConvTime(ltime)); + } + + /*** Field conversion routines ***/ + int store_field(Field *from) override + { + // INSERT INTO t1 (fbt_field) SELECT different_field_type FROM t2; + return from->save_in_field(this); + } + int save_in_field(Field *to) override + { + // INSERT INTO t2 (different_field_type) SELECT fbt_field FROM t1; + if (to->charset() == &my_charset_bin && + dynamic_cast + (to->type_handler())) + { + NativeBuffer res; + val_native(&res); + return to->store(res.ptr(), res.length(), &my_charset_bin); + } + return save_in_field_str(to); + } + Copy_func *get_copy_func(const Field *from) const override + { + // ALTER to FBT from another field + return do_field_string; + } + + Copy_func *get_copy_func_to(const Field *to) const override + { + if (type_handler() == to->type_handler()) + { + // ALTER from FBT to FBT + DBUG_ASSERT(pack_length() == to->pack_length()); + DBUG_ASSERT(charset() == to->charset()); + DBUG_ASSERT(sort_charset() == to->sort_charset()); + return Field::do_field_eq; + } + // ALTER from FBT to another fbt type + if (to->charset() == &my_charset_bin && + dynamic_cast + (to->type_handler())) + { + /* + ALTER from FBT to a binary string type, e.g.: + BINARY, TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB + */ + return do_field_fbt_native_to_binary; + } + return do_field_string; + } + + static void do_field_fbt_native_to_binary(Copy_field *copy) + { + NativeBuffer res; + copy->from_field->val_native(&res); + copy->to_field->store(res.ptr(), res.length(), &my_charset_bin); + } + + bool memcpy_field_possible(const Field *from) const override + { + // INSERT INTO t1 (fbt_field) SELECT field2 FROM t2; + return type_handler() == from->type_handler(); + } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override + { + if (type_handler() == source.type_handler() || + (source.type_handler() == &type_handler_string && + source.type_handler()->max_display_length_for_field(source) == + FbtImpl::binary_length())) + return rpl_conv_type_from_same_data_type(source.metadata(), rli, param); + return CONV_TYPE_IMPOSSIBLE; + } + + /*** Optimizer routines ***/ + bool test_if_equality_guarantees_uniqueness(const Item *const_item) const override + { + /* + This condition: + WHERE fbt_field=const + should return a single distinct value only, + as comparison is done according to FBT. + */ + return true; + } + bool can_be_substituted_to_equal_item(const Context &ctx, + const Item_equal *item_equal) + override + { + switch (ctx.subst_constraint()) { + case ANY_SUBST: + return ctx.compare_type_handler() == item_equal->compare_type_handler(); + case IDENTITY_SUBST: + return true; + } + return false; + } + Item *get_equal_const_item(THD *thd, const Context &ctx, + Item *const_item) override + { + Fbt_null tmp(const_item); + if (tmp.is_null()) + return NULL; + return new (thd->mem_root) Item_literal_fbt(thd, tmp); + } + Data_type_compatibility can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) + const override + { + /* + Mixing of two different non-traditional types is currently prevented. + This may change in the future. + */ + DBUG_ASSERT(item->type_handler()->type_handler_base_or_self()-> + is_traditional_scalar_type() || + item->type_handler()->type_collection() == + type_handler()->type_collection()); + return Data_type_compatibility::OK; + } + /** + Test if Field can use range optimizer for a standard comparison operation: + <=, <, =, <=>, >, >= + Note, this method does not cover spatial operations. + */ + Data_type_compatibility can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const override + { + // See the DBUG_ASSERT comment in can_optimize_keypart_ref() + DBUG_ASSERT(item->type_handler()->type_handler_base_or_self()-> + is_traditional_scalar_type() || + item->type_handler()->type_collection() == + type_handler()->type_collection()); + return Data_type_compatibility::OK; + } + void hash_not_null(Hasher *hasher) override + { + FbtImpl::hash_record(ptr, hasher); + } + SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part, + const Item_bool_func *cond, + scalar_comparison_op op, Item *value) override + { + DBUG_ENTER("Field_fbt::get_mm_leaf"); + if (can_optimize_scalar_range(prm, key_part, cond, op, value) != + Data_type_compatibility::OK) + DBUG_RETURN(0); + int err= value->save_in_field_no_warnings(this, 1); + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) + DBUG_RETURN(&null_element); + if (err > 0) + { + if (op == SCALAR_CMP_EQ || op == SCALAR_CMP_EQUAL) + DBUG_RETURN(new (prm->mem_root) SEL_ARG_IMPOSSIBLE(this)); + DBUG_RETURN(NULL); /* Cannot infer anything */ + } + DBUG_RETURN(stored_field_make_mm_leaf(prm, key_part, op, value)); + } + Data_type_compatibility can_optimize_hash_join(const Item_bool_func *cond, + const Item *item) + const override + { + return can_optimize_keypart_ref(cond, item); + } + Data_type_compatibility can_optimize_group_min_max( + const Item_bool_func *cond, + const Item *const_item) const override + { + return Data_type_compatibility::OK; + } + + uint row_pack_length() const override { return pack_length(); } + + Binlog_type_info binlog_type_info() const override + { + DBUG_ASSERT(type() == binlog_type()); + return Binlog_type_info_fixed_string(Field_fbt::binlog_type(), + FbtImpl::binary_length(), &my_charset_bin); + } + + uchar *pack(uchar *to, const uchar *from, uint max_length) override + { + DBUG_PRINT("debug", ("Packing field '%s'", field_name.str)); + return FbtImpl::pack(to, from, max_length); + } + + const uchar *unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) override + { + return FbtImpl::unpack(to, from, from_end, param_data); + } + + uint max_packed_col_length(uint max_length) override + { + return StringPack::max_packed_col_length(max_length); + } + + uint packed_col_length(const uchar *fbt_ptr, uint length) override + { + return StringPack::packed_col_length(fbt_ptr, length); + } + + uint size_of() const override { return sizeof(*this); } + }; + + + class cmp_item_fbt: public cmp_item_scalar + { + Fbt m_native; + public: + cmp_item_fbt() + :cmp_item_scalar(), + m_native(Fbt::zero()) + { } + void store_value(Item *item) override + { + m_native= Fbt(item, &m_null_value); + } + int cmp_not_null(const Value *val) override + { + DBUG_ASSERT(!val->is_null()); + DBUG_ASSERT(val->is_string()); + Fbt_null tmp(val->m_string); + DBUG_ASSERT(!tmp.is_null()); + return m_native.cmp(tmp); + } + int cmp(Item *arg) override + { + Fbt_null tmp(arg); + return m_null_value || tmp.is_null() ? UNKNOWN : m_native.cmp(tmp) != 0; + } + int compare(cmp_item *ci) override + { + cmp_item_fbt *tmp= static_cast(ci); + DBUG_ASSERT(!m_null_value); + DBUG_ASSERT(!tmp->m_null_value); + return m_native.cmp(tmp->m_native); + } + cmp_item *make_same(THD *thd) override + { + return new (thd->mem_root) cmp_item_fbt(); + } + }; + + class in_fbt :public in_vector + { + Fbt m_value; + static int cmp_fbt(void *cmp_arg, Fbt *a, Fbt *b) + { + return a->cmp(*b); + } + public: + in_fbt(THD *thd, uint elements) + :in_vector(thd, elements, sizeof(Fbt), (qsort2_cmp) cmp_fbt, 0), + m_value(Fbt::zero()) + { } + const Type_handler *type_handler() const override + { + return singleton(); + } + bool set(uint pos, Item *item) override + { + Fbt *buff= &((Fbt *) base)[pos]; + Fbt_null value(item); + if (value.is_null()) + { + *buff= Fbt::zero(); + return true; + } + *buff= value; + return false; + } + uchar *get_value(Item *item) override + { + Fbt_null value(item); + if (value.is_null()) + return 0; + m_value= value; + return (uchar *) &m_value; + } + Item* create_item(THD *thd) override + { + return new (thd->mem_root) Item_literal_fbt(thd); + } + void value_to_item(uint pos, Item *item) override + { + const Fbt &buff= (((Fbt*) base)[pos]); + static_cast(item)->set_value(buff); + } + }; + + class Item_copy_fbt: public Item_copy + { + NativeBuffer m_value; + public: + Item_copy_fbt(THD *thd, Item *item_arg): Item_copy(thd, item_arg) {} + + bool val_native(THD *thd, Native *to) override + { + if (null_value) + return true; + return to->copy(m_value.ptr(), m_value.length()); + } + String *val_str(String *to) override + { + if (null_value) + return NULL; + Fbt_null tmp(m_value.ptr(), m_value.length()); + return tmp.is_null() || tmp.to_string(to) ? NULL : to; + } + my_decimal *val_decimal(my_decimal *to) override + { + my_decimal_set_zero(to); + return to; + } + double val_real() override + { + return 0; + } + longlong val_int() override + { + return 0; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + return null_value; + } + void copy() override + { + null_value= item->val_native(current_thd, &m_value); + DBUG_ASSERT(null_value == item->null_value); + } + int save_in_field(Field *field, bool no_conversions) override + { + return Item::save_in_field(field, no_conversions); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + }; + + class Item_char_typecast_func_handler_fbt_to_binary: + public Item_handled_func::Handler_str + { + public: + const Type_handler *return_type_handler(const Item_handled_func *item) + const override + { + if (item->max_length > MAX_FIELD_VARCHARLENGTH) + return Type_handler::blob_type_handler(item->max_length); + if (item->max_length > 255) + return &type_handler_varchar; + return &type_handler_string; + } + bool fix_length_and_dec(Item_handled_func *xitem) const override + { + return false; + } + String *val_str(Item_handled_func *item, String *to) const override + { + DBUG_ASSERT(dynamic_cast(item)); + return static_cast(item)-> + val_str_binary_from_native(to); + } + }; + + class Item_typecast_fbt: public Item_func + { + public: + Item_typecast_fbt(THD *thd, Item *a) :Item_func(thd, a) {} + + const Type_handler *type_handler() const override + { return singleton(); } + + enum Functype functype() const override { return CHAR_TYPECAST_FUNC; } + bool eq(const Item *item, bool binary_cmp) const override + { + if (this == item) + return true; + if (item->type() != FUNC_ITEM || + functype() != ((Item_func*)item)->functype()) + return false; + if (type_handler() != item->type_handler()) + return false; + Item_typecast_fbt *cast= (Item_typecast_fbt*) item; + return args[0]->eq(cast->args[0], binary_cmp); + } + LEX_CSTRING func_name_cstring() const override + { + static Name name= singleton()->name(); + size_t len= 9+name.length()+1; + char *buf= (char*)current_thd->alloc(len); + strmov(strmov(buf, "cast_as_"), name.ptr()); + return { buf, len }; + } + void print(String *str, enum_query_type query_type) override + { + str->append(STRING_WITH_LEN("cast(")); + args[0]->print(str, query_type); + str->append(STRING_WITH_LEN(" as ")); + str->append(singleton()->name().lex_cstring()); + str->append(')'); + } + bool fix_length_and_dec(THD *thd) override + { + Type_std_attributes::operator=(Type_std_attributes_fbt()); + if (Fbt::fix_fields_maybe_null_on_conversion_to_fbt(args[0])) + set_maybe_null(); + return false; + } + String *val_str(String *to) override + { + Fbt_null tmp(args[0]); + return (null_value= tmp.is_null() || tmp.to_string(to)) ? NULL : to; + } + longlong val_int() override + { + return 0; + } + double val_real() override + { + return 0; + } + my_decimal *val_decimal(my_decimal *to) override + { + my_decimal_set_zero(to); + return to; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override + { + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + return false; + } + bool val_native(THD *thd, Native *to) override + { + Fbt_null tmp(args[0]); + return null_value= tmp.is_null() || tmp.to_native(to); + } + Item *get_copy(THD *thd) override + { return get_item_copy(thd, this); } + }; + + class Item_cache_fbt: public Item_cache + { + NativeBuffer m_value; + public: + Item_cache_fbt(THD *thd) + :Item_cache(thd, singleton()) { } + Item *get_copy(THD *thd) + { return get_item_copy(thd, this); } + bool cache_value() + { + if (!example) + return false; + value_cached= true; + null_value_inside= null_value= + example->val_native_with_conversion_result(current_thd, + &m_value, type_handler()); + return true; + } + String* val_str(String *to) + { + if (!has_value()) + return NULL; + Fbt_null tmp(m_value.ptr(), m_value.length()); + return tmp.is_null() || tmp.to_string(to) ? NULL : to; + } + my_decimal *val_decimal(my_decimal *to) + { + if (!has_value()) + return NULL; + my_decimal_set_zero(to); + return to; + } + longlong val_int() + { + if (!has_value()) + return 0; + return 0; + } + double val_real() + { + if (!has_value()) + return 0; + return 0; + } + longlong val_datetime_packed(THD *thd) + { + DBUG_ASSERT(0); + if (!has_value()) + return 0; + return 0; + } + longlong val_time_packed(THD *thd) + { + DBUG_ASSERT(0); + if (!has_value()) + return 0; + return 0; + } + bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) + { + if (!has_value()) + return true; + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + return false; + } + bool val_native(THD *thd, Native *to) + { + if (!has_value()) + return true; + return to->copy(m_value.ptr(), m_value.length()); + } + }; + + /* =[ methods ]=============================================== */ +private: + + bool character_or_binary_string_to_native(THD *thd, const String *str, + Native *to) const + { + if (str->charset() == &my_charset_bin) + { + // Convert from a binary string + if (str->length() != FbtImpl::binary_length() || + to->copy(str->ptr(), str->length())) + { + thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + name().ptr(), ErrConvString(str).ptr()); + return true; + } + return false; + } + // Convert from a character string + Fbt_null tmp(*str); + if (tmp.is_null()) + thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + name().ptr(), ErrConvString(str).ptr()); + return tmp.is_null() || tmp.to_native(to); + } + +public: + ~Type_handler_fbt() override {} + + const Type_collection *type_collection() const override + { + return TypeCollectionImpl::singleton(); + } + + const Name &default_value() const override + { + return FbtImpl::default_value(); + } + ulong KEY_pack_flags(uint column_nr) const override + { + return FbtImpl::KEY_pack_flags(column_nr); + } + protocol_send_type_t protocol_send_type() const override + { + return PROTOCOL_SEND_STRING; + } + bool Item_append_extended_type_info(Send_field_extended_metadata *to, + const Item *item) const override + { + return to->set_data_type_name(name().lex_cstring()); + } + + enum_field_types field_type() const override + { + return MYSQL_TYPE_STRING; + } + + Item_result result_type() const override + { + return STRING_RESULT; + } + + Item_result cmp_type() const override + { + return STRING_RESULT; + } + + enum_dynamic_column_type dyncol_type(const Type_all_attributes *attr) + const override + { + return DYN_COL_STRING; + } + + uint32 max_display_length_for_field(const Conv_source &src) const override + { + return FbtImpl::max_char_length(); + } + + const Type_handler *type_handler_for_comparison() const override + { + return this; + } + + int stored_field_cmp_to_item(THD *thd, Field *field, Item *item) const override + { + DBUG_ASSERT(field->type_handler() == this); + Fbt_null ni(item); // Convert Item to Fbt + if (ni.is_null()) + return 0; + NativeBuffer tmp; + if (field->val_native(&tmp)) + { + DBUG_ASSERT(0); + return 0; + } + return -ni.cmp(tmp); + } + CHARSET_INFO *charset_for_protocol(const Item *item) const override + { + return item->collation.collation; + } + + bool is_scalar_type() const override { return true; } + bool is_val_native_ready() const override { return true; } + bool can_return_int() const override { return false; } + bool can_return_decimal() const override { return false; } + bool can_return_real() const override { return false; } + bool can_return_str() const override { return true; } + bool can_return_text() const override { return true; } + bool can_return_date() const override { return false; } + bool can_return_time() const override { return false; } + bool convert_to_binary_using_val_native() const override { return true; } + + decimal_digits_t Item_time_precision(THD *thd, Item *item) const override + { + return 0; + } + decimal_digits_t Item_datetime_precision(THD *thd, Item *item) const override + { + return 0; + } + decimal_digits_t Item_decimal_scale(const Item *item) const override + { + return 0; + } + decimal_digits_t Item_decimal_precision(const Item *item) const override + { + /* This will be needed if we ever allow cast from Fbt to DECIMAL. */ + return (FbtImpl::binary_length()*8+7)/10*3; // = bytes to decimal digits + } + + /* + Returns how many digits a divisor adds into a division result. + See Item::divisor_precision_increment() in item.h for more comments. + */ + decimal_digits_t Item_divisor_precision_increment(const Item *) const override + { + return 0; + } + /** + Makes a temporary table Field to handle numeric aggregate functions, + e.g. SUM(DISTINCT expr), AVG(DISTINCT expr), etc. + */ + Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const override + { + DBUG_ASSERT(0); + return 0; + } + Field *make_conversion_table_field(MEM_ROOT *root, TABLE *table, uint metadata, + const Field *target) const override + { + const Record_addr tmp(NULL, Bit_addr(true)); + return new (table->in_use->mem_root) Field_fbt(&empty_clex_str, tmp); + } + // Fix attributes after the parser + bool Column_definition_fix_attributes(Column_definition *c) const override + { + c->length= FbtImpl::max_char_length(); + return false; + } + + bool Column_definition_prepare_stage1(THD *, MEM_ROOT *, + Column_definition *def, + column_definition_type_t, + const Column_derived_attributes *) + const override + { + def->prepare_stage1_simple(&my_charset_numeric); + return false; + } + + bool Column_definition_redefine_stage1(Column_definition *def, + const Column_definition *dup, + const handler *file) const override + { + def->redefine_stage1_common(dup, file); + def->set_compression_method(dup->compression_method()); + def->create_length_to_internal_length_string(); + return false; + } + + bool Column_definition_prepare_stage2(Column_definition *def, handler *file, + ulonglong table_flags) const override + { + def->pack_flag= FIELDFLAG_BINARY; + return false; + } + + bool partition_field_check(const LEX_CSTRING &field_name, + Item *item_expr) const override + { + if (item_expr->cmp_type() != STRING_RESULT) + { + my_error(ER_WRONG_TYPE_COLUMN_VALUE_ERROR, MYF(0)); + return true; + } + return false; + } + + bool partition_field_append_value(String *to, Item *item_expr, + CHARSET_INFO *field_cs, + partition_value_print_mode_t mode) + const override + { + StringBuffer fbtstr; + Fbt_null fbt(item_expr); + if (fbt.is_null()) + { + my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0)); + return true; + } + return fbt.to_string(&fbtstr) || + to->append('\'') || + to->append(fbtstr) || + to->append('\''); + } + + Field *make_table_field(MEM_ROOT *root, const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *table) const override + { + return new (root) Field_fbt(name, addr); + } + + Field * make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *mem_root, + const LEX_CSTRING *name, const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override + { + return new (mem_root) Field_fbt(name, addr); + } + void Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const override + { + def->frm_pack_basic(buff); + def->frm_pack_charset(buff); + } + bool Column_definition_attributes_frm_unpack(Column_definition_attributes *def, + TABLE_SHARE *share, const uchar *buffer, + LEX_CUSTRING *gis_options) + const override + { + def->frm_unpack_basic(buffer); + return def->frm_unpack_charset(share, buffer); + } + void make_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, + String *) const override + { + DBUG_ASSERT(item->type_handler() == this); + NativeBuffer tmp; + item->val_native_result(current_thd, &tmp); + if (item->maybe_null()) + { + if (item->null_value) + { + memset(to, 0, FbtImpl::binary_length() + 1); + return; + } + *to++= 1; + } + DBUG_ASSERT(!item->null_value); + DBUG_ASSERT(FbtImpl::binary_length() == tmp.length()); + DBUG_ASSERT(FbtImpl::binary_length() == sort_field->length); + FbtImpl::memory_to_record((char*) to, tmp.ptr()); + } + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + String *) const override + { + DBUG_ASSERT(item->type_handler() == this); + NativeBuffer tmp; + item->val_native_result(current_thd, &tmp); + if (item->maybe_null()) + { + if (item->null_value) + { + *to++=0; + return 0; + } + *to++= 1; + } + DBUG_ASSERT(!item->null_value); + DBUG_ASSERT(FbtImpl::binary_length() == tmp.length()); + DBUG_ASSERT(FbtImpl::binary_length() == sort_field->length); + FbtImpl::memory_to_record((char*) to, tmp.ptr()); + return tmp.length(); + } + void sort_length(THD *thd, const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override + { + attr->original_length= attr->length= FbtImpl::binary_length(); + attr->suffix_length= 0; + } + uint32 max_display_length(const Item *item) const override + { + return FbtImpl::max_char_length(); + } + uint32 calc_pack_length(uint32 length) const override + { + return FbtImpl::binary_length(); + } + void Item_update_null_value(Item *item) const override + { + NativeBuffer tmp; + item->val_native(current_thd, &tmp); + } + bool Item_save_in_value(THD *thd, Item *item, st_value *value) const override + { + value->m_type= DYN_COL_STRING; + String *str= item->val_str(&value->m_string); + if (str != &value->m_string && !item->null_value) + { + // "item" returned a non-NULL value + if (Fbt_null(*str).is_null()) + { + /* + The value was not-null, but conversion to FBT failed: + SELECT a, DECODE_ORACLE(fbtcol, 'garbage', '', '::01', '01') + FROM t1; + */ + thd->push_warning_wrong_value(Sql_condition::WARN_LEVEL_WARN, + name().ptr(), ErrConvString(str).ptr()); + value->m_type= DYN_COL_NULL; + return true; + } + // "item" returned a non-NULL value, and it was a valid FBT + value->m_string.set(str->ptr(), str->length(), str->charset()); + } + return check_null(item, value); + } + void Item_param_setup_conversion(THD *thd, Item_param *param) const override + { + param->setup_conversion_string(thd, thd->variables.character_set_client); + } + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override + { + param->set_param_str(pos, len); + } + bool Item_param_set_from_value(THD *thd, Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const override + { + param->unsigned_flag= false; + param->setup_conversion_string(thd, attr->collation.collation); + /* + Exact value of max_length is not known unless fbt is converted to + charset of connection, so we have to set it later. + */ + return param->set_str(val->m_string.ptr(), val->m_string.length(), + attr->collation.collation, + attr->collation.collation); + } + bool Item_param_val_native(THD *thd, Item_param *item, Native *to) + const override + { + StringBuffer buffer; + String *str= item->val_str(&buffer); + if (!str) + return true; + Fbt_null tmp(*str); + return tmp.is_null() || tmp.to_native(to); + } + bool Item_send(Item *item, Protocol *p, st_value *buf) const override + { + return Item_send_str(item, p, buf); + } + int Item_save_in_field(Item *item, Field *field, bool no_conversions) + const override + { + if (field->type_handler() == this) + { + NativeBuffer tmp; + bool rc= item->val_native(current_thd, &tmp); + if (rc || item->null_value) + return set_field_to_null_with_conversions(field, no_conversions); + field->set_notnull(); + return field->store_native(tmp); + } + return item->save_str_in_field(field, no_conversions); + } + + String *print_item_value(THD *thd, Item *item, String *str) const override + { + StringBuffer buf; + String *result= item->val_str(&buf); + /* + TODO: This should eventually use one of these notations: + 1. CAST('xxx' AS Fbt) + Problem: CAST is not supported as a NAME_CONST() argument. + 2. Fbt'xxx' + Problem: This syntax is not supported by the parser yet. + */ + return !result || str->realloc(result->length() + 2) || + str->append(STRING_WITH_LEN("'")) || + str->append(result->ptr(), result->length()) || + str->append(STRING_WITH_LEN("'")) ? nullptr : str; + } + + /** + Check if + WHERE expr=value AND expr=const + can be rewritten as: + WHERE const=value AND expr=const + + "this" is the comparison handler that is used by "target". + + @param target - the predicate expr=value, + whose "expr" argument will be replaced to "const". + @param target_expr - the target's "expr" which will be replaced to "const". + @param target_value - the target's second argument, it will remain unchanged. + @param source - the equality predicate expr=const (or expr<=>const) + that can be used to rewrite the "target" part + (under certain conditions, see the code). + @param source_expr - the source's "expr". It should be exactly equal to + the target's "expr" to make condition rewrite possible. + @param source_const - the source's "const" argument, it will be inserted + into "target" instead of "expr". + */ + bool can_change_cond_ref_to_const(Item_bool_func2 *target, Item *target_expr, + Item *target_value, Item_bool_func2 *source, + Item *source_expr, Item *source_const) + const override + { + /* + WHERE COALESCE(col)='xxx' AND COALESCE(col)=CONCAT(a); --> + WHERE COALESCE(col)='xxx' AND 'xxx'=CONCAT(a); + */ + return target->compare_type_handler() == source->compare_type_handler(); + } + bool subquery_type_allows_materialization(const Item *inner, + const Item *outer, bool) const override + { + /* + Example: + SELECT * FROM t1 WHERE a IN (SELECT col FROM t1 GROUP BY col); + Allow materialization only if the outer column is also FBT. + This can be changed for more relaxed rules in the future. + */ + DBUG_ASSERT(inner->type_handler() == this); + return outer->type_handler() == this; + } + /** + Make a simple constant replacement item for a constant "src", + so the new item can futher be used for comparison with "cmp", e.g.: + src = cmp -> replacement = cmp + + "this" is the type handler that is used to compare "src" and "cmp". + + @param thd - current thread, for mem_root + @param src - The item that we want to replace. It's a const item, + but it can be complex enough to calculate on every row. + @param cmp - The src's comparand. + @retval - a pointer to the created replacement Item + @retval - NULL, if could not create a replacement (e.g. on EOM). + NULL is also returned for ROWs, because instead of replacing + a Item_row to a new Item_row, Type_handler_row just replaces + its elements. + */ + Item *make_const_item_for_comparison(THD *thd, Item *src, + const Item *cmp) const override + { + Fbt_null tmp(src); + if (tmp.is_null()) + return new (thd->mem_root) Item_null(thd, src->name.str); + return new (thd->mem_root) Item_literal_fbt(thd, tmp); + } + Item_cache *Item_get_cache(THD *thd, const Item *item) const override + { + return new (thd->mem_root) Item_cache_fbt(thd); + } + + Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) const override + { + return new (thd->mem_root) Item_typecast_fbt(thd, item); + } + Item_copy *create_item_copy(THD *thd, Item *item) const override + { + return new (thd->mem_root) Item_copy_fbt(thd, item); + } + int cmp_native(const Native &a, const Native &b) const override + { + return FbtImpl::cmp(a.to_lex_cstring(), b.to_lex_cstring()); + } + bool set_comparator_func(THD *thd, Arg_comparator *cmp) const override + { + return cmp->set_cmp_func_native(thd); + } + bool Item_const_eq(const Item_const *a, const Item_const *b, + bool binary_cmp) const override + { + return false; + } + bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, + Item *a, Item *b) const override + { + Fbt_null na(a), nb(b); + return !na.is_null() && !nb.is_null() && !na.cmp(nb); + } + bool Item_hybrid_func_fix_attributes(THD *thd, const LEX_CSTRING &name, + Type_handler_hybrid_field_type *h, + Type_all_attributes *attr, + Item **items, uint nitems) const override + { + attr->Type_std_attributes::operator=(Type_std_attributes_fbt()); + h->set_handler(this); + /* + If some of the arguments cannot be safely converted to "FBT NOT NULL", + then mark the entire function nullability as NULL-able. + Otherwise, keep the generic nullability calculated by earlier stages: + - either by the most generic way in Item_func::fix_fields() + - or by Item_func_xxx::fix_length_and_dec() before the call of + Item_hybrid_func_fix_attributes() + IFNULL() is special. It does not need to test args[0]. + */ + uint first= dynamic_cast(attr) ? 1 : 0; + for (uint i= first; i < nitems; i++) + { + if (Fbt::fix_fields_maybe_null_on_conversion_to_fbt(items[i])) + { + attr->set_type_maybe_null(true); + break; + } + } + return false; + } + bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, + Item **items, uint nitems) const override + { + return Item_hybrid_func_fix_attributes(thd, func->func_name_cstring(), + func, func, items, nitems); + + } + bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const override + { + func->Type_std_attributes::operator=(Type_std_attributes_fbt()); + func->set_handler(this); + return false; + } + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + + bool Item_val_native_with_conversion(THD *thd, Item *item, + Native *to) const override + { + if (item->type_handler() == this) + return item->val_native(thd, to); // No conversion needed + StringBuffer buffer; + String *str= item->val_str(&buffer); + return str ? character_or_binary_string_to_native(thd, str, to) : true; + } + bool Item_val_native_with_conversion_result(THD *thd, Item *item, + Native *to) const override + { + if (item->type_handler() == this) + return item->val_native_result(thd, to); // No conversion needed + StringBuffer buffer; + String *str= item->str_result(&buffer); + return str ? character_or_binary_string_to_native(thd, str, to) : true; + } + + bool Item_val_bool(Item *item) const override + { + NativeBuffer tmp; + if (item->val_native(current_thd, &tmp)) + return false; + return !Fbt::only_zero_bytes(tmp.ptr(), tmp.length()); + } + void Item_get_date(THD *thd, Item *item, Temporal::Warn *buff, + MYSQL_TIME *ltime, date_mode_t fuzzydate) const override + { + set_zero_time(ltime, MYSQL_TIMESTAMP_TIME); + } + + longlong Item_val_int_signed_typecast(Item *item) const override + { + DBUG_ASSERT(0); + return 0; + } + + longlong Item_val_int_unsigned_typecast(Item *item) const override + { + DBUG_ASSERT(0); + return 0; + } + + String *Item_func_hex_val_str_ascii(Item_func_hex *item, String *str) + const override + { + NativeBuffer tmp; + if ((item->null_value= item->arguments()[0]->val_native(current_thd, &tmp))) + return nullptr; + DBUG_ASSERT(tmp.length() == FbtImpl::binary_length()); + if (str->set_hex(tmp.ptr(), tmp.length())) + { + str->length(0); + str->set_charset(item->collation.collation); + } + return str; + } + + String *Item_func_hybrid_field_type_val_str(Item_func_hybrid_field_type *item, + String *str) const override + { + NativeBuffer native; + if (item->val_native(current_thd, &native)) + { + DBUG_ASSERT(item->null_value); + return nullptr; + } + DBUG_ASSERT(native.length() == FbtImpl::binary_length()); + Fbt_null tmp(native.ptr(), native.length()); + return tmp.is_null() || tmp.to_string(str) ? nullptr : str; + } + double Item_func_hybrid_field_type_val_real(Item_func_hybrid_field_type *) + const override + { + return 0; + } + longlong Item_func_hybrid_field_type_val_int(Item_func_hybrid_field_type *) + const override + { + return 0; + } + my_decimal * + Item_func_hybrid_field_type_val_decimal(Item_func_hybrid_field_type *, + my_decimal *to) const override + { + my_decimal_set_zero(to); + return to; + } + void Item_func_hybrid_field_type_get_date(THD *, + Item_func_hybrid_field_type *, + Temporal::Warn *, + MYSQL_TIME *to, + date_mode_t fuzzydate) + const override + { + set_zero_time(to, MYSQL_TIMESTAMP_TIME); + } + // WHERE is Item_func_min_max_val_native??? + String *Item_func_min_max_val_str(Item_func_min_max *func, String *str) + const override + { + Fbt_null tmp(func); + return tmp.is_null() || tmp.to_string(str) ? nullptr : str; + } + double Item_func_min_max_val_real(Item_func_min_max *) const override + { + return 0; + } + longlong Item_func_min_max_val_int(Item_func_min_max *) const override + { + return 0; + } + my_decimal *Item_func_min_max_val_decimal(Item_func_min_max *, + my_decimal *to) const override + { + my_decimal_set_zero(to); + return to; + } + bool Item_func_min_max_get_date(THD *thd, Item_func_min_max*, MYSQL_TIME *to, + date_mode_t fuzzydate) const override + { + set_zero_time(to, MYSQL_TIMESTAMP_TIME); + return false; + } + + bool Item_func_between_fix_length_and_dec(Item_func_between *func) const override + { + return false; + } + longlong Item_func_between_val_int(Item_func_between *func) const override + { + return func->val_int_cmp_native(); + } + + cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override + { + return new (thd->mem_root) cmp_item_fbt; + } + + in_vector *make_in_vector(THD *thd, const Item_func_in *func, + uint nargs) const override + { + return new (thd->mem_root) in_fbt(thd, nargs); + } + + bool Item_func_in_fix_comparator_compatible_types(THD *thd, + Item_func_in *func) + const override + { + if (func->compatible_types_scalar_bisection_possible()) + { + return func->value_list_convert_const_to_int(thd) || + func->fix_for_scalar_comparison_using_bisection(thd); + } + return + func->fix_for_scalar_comparison_using_cmp_items(thd, + 1U << (uint) STRING_RESULT); + } + bool Item_func_round_fix_length_and_dec(Item_func_round *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + + bool Item_func_abs_fix_length_and_dec(Item_func_abs *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + + bool Item_func_neg_fix_length_and_dec(Item_func_neg *func) const override + { + return Item_func_or_sum_illegal_param(func); + } + + bool Item_func_signed_fix_length_and_dec(Item_func_signed *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_double_typecast_fix_length_and_dec(Item_double_typecast *item) + const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_float_typecast_fix_length_and_dec(Item_float_typecast *item) + const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_decimal_typecast_fix_length_and_dec(Item_decimal_typecast *item) + const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_char_typecast_fix_length_and_dec(Item_char_typecast *item) + const override + { + if (item->cast_charset() == &my_charset_bin) + { + static Item_char_typecast_func_handler_fbt_to_binary + item_char_typecast_func_handler_fbt_to_binary; + item->fix_length_and_dec_native_to_binary(FbtImpl::binary_length()); + item->set_func_handler(&item_char_typecast_func_handler_fbt_to_binary); + return false; + } + item->fix_length_and_dec_str(); + return false; + } + + bool Item_time_typecast_fix_length_and_dec(Item_time_typecast *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_date_typecast_fix_length_and_dec(Item_date_typecast *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_datetime_typecast_fix_length_and_dec(Item_datetime_typecast *item) + const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_func_plus_fix_length_and_dec(Item_func_plus *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_func_minus_fix_length_and_dec(Item_func_minus *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_func_mul_fix_length_and_dec(Item_func_mul *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_func_div_fix_length_and_dec(Item_func_div *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + bool Item_func_mod_fix_length_and_dec(Item_func_mod *item) const override + { + return Item_func_or_sum_illegal_param(item); + } + + static Type_handler_fbt *singleton() + { + static Type_handler_fbt th; + return &th; + } +}; + +template +class Type_collection_fbt: public Type_collection +{ + const Type_handler *aggregate_common(const Type_handler *a, + const Type_handler *b) const + { + if (a == b) + return a; + return NULL; + } + const Type_handler *aggregate_if_string(const Type_handler *a, + const Type_handler *b) const + { + static const Type_aggregator::Pair agg[]= + { + {Type_handler_fbt::singleton(), &type_handler_null, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_varchar, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_string, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_tiny_blob, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_blob, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_medium_blob, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_long_blob, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_hex_hybrid, Type_handler_fbt::singleton()}, + {NULL,NULL,NULL} + }; + return Type_aggregator::find_handler_in_array(agg, a, b, true); + } +public: + const Type_handler *aggregate_for_result(const Type_handler *a, + const Type_handler *b) + const override + { + const Type_handler *h; + if ((h= aggregate_common(a, b)) || (h= aggregate_if_string(a, b))) + return h; + return NULL; + } + + const Type_handler *aggregate_for_min_max(const Type_handler *a, + const Type_handler *b) + const override + { + return aggregate_for_result(a, b); + } + + const Type_handler *aggregate_for_comparison(const Type_handler *a, + const Type_handler *b) + const override + { + if (const Type_handler *h= aggregate_common(a, b)) + return h; + static const Type_aggregator::Pair agg[]= + { + {Type_handler_fbt::singleton(), &type_handler_null, Type_handler_fbt::singleton()}, + {Type_handler_fbt::singleton(), &type_handler_long_blob, Type_handler_fbt::singleton()}, + {NULL,NULL,NULL} + }; + return Type_aggregator::find_handler_in_array(agg, a, b, true); + } + + const Type_handler *aggregate_for_num_op(const Type_handler *a, + const Type_handler *b) + const override + { + return NULL; + } + + static Type_collection_fbt *singleton() + { + static Type_collection_fbt tc; + return &tc; + } +}; + +#endif /* SQL_TYPE_FIXEDBIN_H */ diff --git a/sql/sql_type_fixedbin_storage.h b/sql/sql_type_fixedbin_storage.h new file mode 100644 index 00000000..3a4e74a8 --- /dev/null +++ b/sql/sql_type_fixedbin_storage.h @@ -0,0 +1,171 @@ +#ifndef SQL_TYPE_FIXEDBIN_STORAGE +#define SQL_TYPE_FIXEDBIN_STORAGE +/* Copyright (c) 2019,2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This is a common code for plugin (?) types that are generally + handled like strings, but have their own fixed size on-disk binary storage + format and their own (variable size) canonical string representation. + + Examples are INET6 and UUID types. + + The MariaDB server uses three binary representations of a data type: + + 1. In-memory binary representation (user visible) + This representation: + - can be used in INSERT..VALUES (X'AABBCC') + - can be used in WHERE conditions: WHERE c1=X'AABBCC' + - is returned by CAST(x AS BINARY(N)) + - is returned by Field::val_native() and Item::val_native() + + 2. In-record binary representation (user invisible) + This representation: + - is used in records (is pointed by Field::ptr) + - must be comparable by memcmp() + + 3. Binlog binary (row) representation + Usually, for string data types the binlog representation + is based on the in-record representation with trailing byte compression: + - trailing space compression for text string data types + - trailing zero compression for binary string data types + + We have to have separate in-memory and in-record representations + because we use HA_KEYTYPE_BINARY for indexing. The engine API + does not have a way to pass a comparison function as a parameter. + + The default implementation below assumes that: + - the in-memory and in-record representations are equal + - the binlog representation is compatible with BINARY(N) + This is OK for simple data types, like INET6. + + Data type implementations that need different representations + can override the default implementation (like e.g. UUID does). +*/ + +/***********************************************************************/ + +template +class FixedBinTypeStorage +{ +protected: + // The buffer that stores the in-memory binary representation + char m_buffer[NATIVE_LEN]; + + FixedBinTypeStorage() = default; + + FixedBinTypeStorage & set_zero() + { + bzero(&m_buffer, sizeof(m_buffer)); + return *this; + } +public: + + // Initialize from the in-memory binary representation + FixedBinTypeStorage(const char *str, size_t length) + { + if (length != binary_length()) + set_zero(); + else + memcpy(&m_buffer, str, sizeof(m_buffer)); + } + + // Return the buffer with the in-memory representation + Lex_cstring to_lex_cstring() const + { + return Lex_cstring(m_buffer, sizeof(m_buffer)); + } + + static constexpr uint binary_length() { return NATIVE_LEN; } + static constexpr uint max_char_length() { return MAX_CHAR_LEN; } + + // Compare the in-memory binary representations of two values + static int cmp(const LEX_CSTRING &a, const LEX_CSTRING &b) + { + DBUG_ASSERT(a.length == binary_length()); + DBUG_ASSERT(b.length == binary_length()); + return memcmp(a.str, b.str, b.length); + } + + /* + Convert from the in-memory to the in-record representation. + Used in Field::store_native(). + */ + static void memory_to_record(char *to, const char *from) + { + memcpy(to, from, NATIVE_LEN); + } + /* + Convert from the in-record to the in-memory representation + Used in Field::val_native(). + */ + static void record_to_memory(char *to, const char *from) + { + memcpy(to, from, NATIVE_LEN); + } + + /* + Hash the in-record representation + Used in Field::hash(). + */ + static void hash_record(uchar *ptr, Hasher *hasher) + { + hasher->add(&my_charset_bin, ptr, binary_length()); + } + + static bool only_zero_bytes(const char *ptr, size_t length) + { + for (uint i= 0 ; i < length; i++) + { + if (ptr[i] != 0) + return false; + } + return true; + } + + static ulong KEY_pack_flags(uint column_nr) + { + /* + Return zero by default. A particular data type can override + this method return some flags, e.g. HA_PACK_KEY to enable + key prefix compression. + */ + return 0; + } + + /* + Convert from the in-record to the binlog representation. + Used in Field::pack(), and in filesort to store the addon fields. + By default, do what BINARY(N) does. + */ + static uchar *pack(uchar *to, const uchar *from, uint max_length) + { + return StringPack(&my_charset_bin, binary_length()).pack(to, from, max_length); + } + + /* + Convert from the in-binary-log to the in-record representation. + Used in Field::unpack(). + By default, do what BINARY(N) does. + */ + static const uchar *unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) + { + return StringPack(&my_charset_bin, binary_length()).unpack(to, from, from_end, + param_data); + } + +}; +#endif /* SQL_TYPE_FIXEDBIN_STORAGE */ diff --git a/sql/sql_type_geom.cc b/sql/sql_type_geom.cc new file mode 100644 index 00000000..b026b5e9 --- /dev/null +++ b/sql/sql_type_geom.cc @@ -0,0 +1,973 @@ +/* + Copyright (c) 2015, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#include "mariadb.h" + +#ifdef HAVE_SPATIAL + +#include "sql_class.h" +#include "sql_type_geom.h" +#include "item_geofunc.h" + +Named_type_handler type_handler_geometry("geometry"); +Named_type_handler type_handler_point("point"); +Named_type_handler type_handler_linestring("linestring"); +Named_type_handler type_handler_polygon("polygon"); +Named_type_handler type_handler_multipoint("multipoint"); +Named_type_handler type_handler_multilinestring("multilinestring"); +Named_type_handler type_handler_multipolygon("multipolygon"); +Named_type_handler type_handler_geometrycollection("geometrycollection"); + +Type_collection_geometry type_collection_geometry; + + +LEX_CSTRING Type_handler_geometry::extended_metadata_data_type_name() const +{ + return geometry_type() == GEOM_GEOMETRY ? null_clex_str : + name().lex_cstring(); +} + + +const Type_handler_geometry * +Type_handler_geometry::type_handler_geom_by_type(uint type) +{ + switch (type) { + case Type_handler_geometry::GEOM_POINT: + return &type_handler_point; + case Type_handler_geometry::GEOM_LINESTRING: + return &type_handler_linestring; + case Type_handler_geometry::GEOM_POLYGON: + return &type_handler_polygon; + case Type_handler_geometry::GEOM_MULTIPOINT: + return &type_handler_multipoint; + case Type_handler_geometry::GEOM_MULTILINESTRING: + return &type_handler_multilinestring; + case Type_handler_geometry::GEOM_MULTIPOLYGON: + return &type_handler_multipolygon; + case Type_handler_geometry::GEOM_GEOMETRYCOLLECTION: + return &type_handler_geometrycollection; + case Type_handler_geometry::GEOM_GEOMETRY: + break; + } + return &type_handler_geometry; +} + + +const Type_handler * +Type_collection_geometry_handler_by_name(const LEX_CSTRING &name) +{ + if (type_handler_point.name().eq(name)) + return &type_handler_point; + if (type_handler_linestring.name().eq(name)) + return &type_handler_linestring; + if (type_handler_polygon.name().eq(name)) + return &type_handler_polygon; + if (type_handler_multipoint.name().eq(name)) + return &type_handler_multipoint; + if (type_handler_multilinestring.name().eq(name)) + return &type_handler_multilinestring; + if (type_handler_multipolygon.name().eq(name)) + return &type_handler_multipolygon; + if (type_handler_geometry.name().eq(name)) + return &type_handler_geometry; + if (type_handler_geometrycollection.name().eq(name)) + return &type_handler_geometrycollection; + return NULL; +} + + +const Type_collection *Type_handler_geometry::type_collection() const +{ + return &type_collection_geometry; +} + + +const Type_handler * +Type_handler_geometry::type_handler_frm_unpack(const uchar *buffer) const +{ + // charset and geometry_type share the same byte in frm + return type_handler_geom_by_type((uint) buffer[14]); +} + + +const Type_handler * +Type_collection_geometry::aggregate_for_comparison(const Type_handler *a, + const Type_handler *b) + const +{ + const Type_handler *h; + if ((h= aggregate_common(a, b)) || + (h= aggregate_if_null(a, b)) || + (h= aggregate_if_long_blob(a, b))) + return h; + return NULL; +} + + +const Type_handler * +Type_collection_geometry::aggregate_for_result(const Type_handler *a, + const Type_handler *b) + const +{ + const Type_handler *h; + if ((h= aggregate_common(a, b)) || + (h= aggregate_if_null(a, b)) || + (h= aggregate_if_long_blob(a, b)) || + (h= aggregate_if_string(a, b))) + return h; + return NULL; +} + + +const Type_handler * +Type_collection_geometry::aggregate_for_min_max(const Type_handler *a, + const Type_handler *b) + const +{ + const Type_handler *h; + if ((h= aggregate_common(a, b)) || + (h= aggregate_if_null(a, b)) || + (h= aggregate_if_long_blob(a, b)) || + (h= aggregate_if_string(a, b))) + return h; + return NULL; +} + + +const Type_handler * +Type_collection_geometry::aggregate_if_string(const Type_handler *a, + const Type_handler *b) const +{ + if (a->type_collection() == this) + { + DBUG_ASSERT(b->type_collection() != this); + swap_variables(const Type_handler *, a, b); + } + if (a == &type_handler_hex_hybrid || + a == &type_handler_tiny_blob || + a == &type_handler_blob || + a == &type_handler_medium_blob || + a == &type_handler_varchar || + a == &type_handler_string) + return &type_handler_long_blob; + return NULL; +} + + +#ifndef DBUG_OFF +bool Type_collection_geometry::init_aggregators(Type_handler_data *data, + const Type_handler *geom) const +{ + Type_aggregator *r= &data->m_type_aggregator_for_result; + return + r->add(geom, &type_handler_hex_hybrid, &type_handler_long_blob) || + r->add(geom, &type_handler_tiny_blob, &type_handler_long_blob) || + r->add(geom, &type_handler_blob, &type_handler_long_blob) || + r->add(geom, &type_handler_medium_blob, &type_handler_long_blob) || + r->add(geom, &type_handler_varchar, &type_handler_long_blob) || + r->add(geom, &type_handler_string, &type_handler_long_blob); +} +#endif + + +bool Type_collection_geometry::init(Type_handler_data *data) +{ +#ifndef DBUG_OFF + Type_aggregator *nct= &data->m_type_aggregator_non_commutative_test; + if (nct->add(&type_handler_point, + &type_handler_varchar, + &type_handler_long_blob)) + return true; + return + init_aggregators(data, &type_handler_geometry) || + init_aggregators(data, &type_handler_geometrycollection) || + init_aggregators(data, &type_handler_point) || + init_aggregators(data, &type_handler_linestring) || + init_aggregators(data, &type_handler_polygon) || + init_aggregators(data, &type_handler_multipoint) || + init_aggregators(data, &type_handler_multilinestring) || + init_aggregators(data, &type_handler_multipolygon); +#endif // DBUG_OFF + return false; +} + + +bool Type_handler_geometry:: +check_type_geom_or_binary(const LEX_CSTRING &opname, + const Item *item) +{ + const Type_handler *handler= item->type_handler(); + if (handler->type_handler_for_comparison() == &type_handler_geometry || + (handler->is_general_purpose_string_type() && + item->collation.collation == &my_charset_bin)) + return false; + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), opname.str); + return true; +} + + +bool Type_handler_geometry:: +check_types_geom_or_binary(const LEX_CSTRING &opname, + Item* const *args, + uint start, uint end) +{ + for (uint i= start; i < end ; i++) + { + if (check_type_geom_or_binary(opname, args[i])) + return true; + } + return false; +} + + +const Type_handler *Type_handler_geometry::type_handler_for_comparison() const +{ + return &type_handler_geometry; +} + + +Field *Type_handler_geometry::make_conversion_table_field(MEM_ROOT *root, + TABLE *table, + uint metadata, + const Field *target) + const +{ + DBUG_ASSERT(target->type() == MYSQL_TYPE_GEOMETRY); + /* + We do not do not update feature_gis statistics here: + status_var_increment(target->table->in_use->status_var.feature_gis); + as this is only a temporary field. + The statistics was already incremented when "target" was created. + */ + const Field_geom *fg= static_cast(target); + return new (root) + Field_geom(NULL, (uchar *) "", 1, Field::NONE, &empty_clex_str, + table->s, 4, fg->type_handler_geom(), fg->srid); +} + + +bool Type_handler_geometry:: + Column_definition_fix_attributes(Column_definition *def) const +{ + def->flags|= BLOB_FLAG; + return false; +} + +void Type_handler_geometry:: + Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *def, + const Field *field) const +{ + def->srid= ((Field_geom*) field)->srid; +} + + +bool Type_handler_geometry:: + Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *def, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) const +{ + def->charset= &my_charset_bin; + def->create_length_to_internal_length_string(); + return def->prepare_blob_field(thd); +} + + +bool Type_handler_geometry:: + Column_definition_prepare_stage2(Column_definition *def, + handler *file, + ulonglong table_flags) const +{ + if (!(table_flags & HA_CAN_GEOMETRY)) + { + my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "GEOMETRY"); + return true; + } + return def->prepare_stage2_blob(file, table_flags, FIELDFLAG_GEOM); +} + +bool Type_handler_geometry::Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + return part->check_primary_key_for_blob(file); +} + + +bool Type_handler_geometry::Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *hash_field_needed) const +{ + if (!part->length) + *hash_field_needed= true; + return part->check_key_for_blob(file); +} + + +bool Type_handler_geometry::Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + return part->init_multiple_key_for_blob(file); +} + + +bool Type_handler_geometry::Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + return part->check_foreign_key_for_blob(file); +} + + +bool Type_handler_geometry::Key_part_spec_init_spatial(Key_part_spec *part, + const Column_definition &def) + const +{ + if (part->length) + { + my_error(ER_WRONG_SUB_KEY, MYF(0)); + return true; + } + /* + 4 is: (Xmin,Xmax,Ymin,Ymax), this is for 2D case + Lately we'll extend this code to support more dimensions + */ + part->length= 4 * sizeof(double); + return false; +} + + +Item * +Type_handler_geometry::create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) + const +{ + DBUG_EXECUTE_IF("emulate_geometry_create_typecast_item", + return new (thd->mem_root) Item_func_geometry_from_text(thd, item); + ); + + return NULL; +} + +bool Type_handler_point::Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + /* + QQ: + The below assignment (here and in all other Key_part_spec_init_xxx methods) + overrides the explicitly given key part length, so in this query: + CREATE OR REPLACE TABLE t1 (a POINT, KEY(a(10))); + the key becomes KEY(a(25)). + This might be a bug. + */ + part->length= octet_length(); + return part->check_key_for_blob(file); +} + + +bool Type_handler_point::Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *hash_field_needed) const +{ + part->length= octet_length(); + return part->check_key_for_blob(file); +} + + +bool Type_handler_point::Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length= octet_length(); + return part->check_key_for_blob(file); +} + + +bool Type_handler_point::Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const +{ + part->length= octet_length(); + return part->check_key_for_blob(file); +} + + +Item * +Type_handler_point::make_constructor_item(THD *thd, List *args) const +{ + if (!args || args->elements != 2) + return NULL; + Item_args tmp(thd, *args); + return new (thd->mem_root) Item_func_point(thd, + tmp.arguments()[0], + tmp.arguments()[1]); +} + + +Item * +Type_handler_linestring::make_constructor_item(THD *thd, List *args) const +{ + return args ? new (thd->mem_root) Item_func_linestring(thd, *args) : NULL; +} + + +Item * +Type_handler_polygon::make_constructor_item(THD *thd, List *args) const +{ + return args ? new (thd->mem_root) Item_func_polygon(thd, *args) : NULL; +} + + +Item * +Type_handler_multipoint::make_constructor_item(THD *thd, List *args) const +{ + return args ? new (thd->mem_root) Item_func_multipoint(thd, *args) : NULL; +} + + +Item * +Type_handler_multilinestring::make_constructor_item(THD *thd, + List *args) const +{ + return args ? new (thd->mem_root) Item_func_multilinestring(thd, *args) : + NULL; +} + + +Item * +Type_handler_multipolygon::make_constructor_item(THD *thd, + List *args) const +{ + return args ? new (thd->mem_root) Item_func_multipolygon(thd, *args) : NULL; +} + + +Item * +Type_handler_geometrycollection::make_constructor_item(THD *thd, + List *args) const +{ + return args ? new (thd->mem_root) Item_func_geometrycollection(thd, *args) : + NULL; +} + + +uint32 Type_handler_geometry::calc_pack_length(uint32 length) const +{ + return 4 + portable_sizeof_char_ptr; +} + + +Field *Type_handler_geometry::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const +{ + return new (root) + Field_geom(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, 4, this, 0); +} + + +bool Type_handler_geometry:: + Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &func_name, + Type_handler_hybrid_field_type *handler, + Type_all_attributes *func, + Item **items, uint nitems) const +{ + DBUG_ASSERT(nitems > 0); + func->collation.set(&my_charset_bin); + func->unsigned_flag= false; + func->decimals= 0; + func->max_length= (uint32) UINT_MAX32; + func->set_type_maybe_null(true); + return false; +} + + +bool Type_handler_geometry:: + Item_sum_sum_fix_length_and_dec(Item_sum_sum *item) const +{ + LEX_CSTRING name= {STRING_WITH_LEN("sum") }; + return Item_func_or_sum_illegal_param(name); +} + + +bool Type_handler_geometry:: + Item_sum_avg_fix_length_and_dec(Item_sum_avg *item) const +{ + LEX_CSTRING name= {STRING_WITH_LEN("avg") }; + return Item_func_or_sum_illegal_param(name); +} + + +bool Type_handler_geometry:: + Item_sum_variance_fix_length_and_dec(Item_sum_variance *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_func_round_fix_length_and_dec(Item_func_round *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_func_int_val_fix_length_and_dec(Item_func_int_val *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_func_abs_fix_length_and_dec(Item_func_abs *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_func_neg_fix_length_and_dec(Item_func_neg *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + + +bool Type_handler_geometry:: + Item_func_signed_fix_length_and_dec(Item_func_signed *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_double_typecast_fix_length_and_dec(Item_double_typecast *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_float_typecast_fix_length_and_dec(Item_float_typecast *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_decimal_typecast_fix_length_and_dec(Item_decimal_typecast *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_char_typecast_fix_length_and_dec(Item_char_typecast *item) const +{ + if (item->cast_charset() != &my_charset_bin) + return Item_func_or_sum_illegal_param(item); // CAST(geom AS CHAR) + item->fix_length_and_dec_str(); + return false; // CAST(geom AS BINARY) +} + + +bool Type_handler_geometry:: + Item_time_typecast_fix_length_and_dec(Item_time_typecast *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + + +bool Type_handler_geometry:: + Item_date_typecast_fix_length_and_dec(Item_date_typecast *item) const +{ + return Item_func_or_sum_illegal_param(item); +} + + +bool Type_handler_geometry:: + Item_datetime_typecast_fix_length_and_dec(Item_datetime_typecast *item) + const +{ + return Item_func_or_sum_illegal_param(item); + +} + + +bool Type_handler_geometry:: + Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *val) const +{ + param->unsigned_flag= false; + param->setup_conversion_blob(thd); + return param->set_str(val->m_string.ptr(), val->m_string.length(), + &my_charset_bin, &my_charset_bin); +} + + +void Type_handler_geometry::Item_param_set_param_func(Item_param *param, + uchar **pos, + ulong len) const +{ + param->set_null(); // Not possible type code in the client-server protocol +} + + +Field *Type_handler_geometry:: + make_table_field_from_def(TABLE_SHARE *share, MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &rec, const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const +{ + status_var_increment(current_thd->status_var.feature_gis); + return new (root) + Field_geom(rec.ptr(), rec.null_ptr(), rec.null_bit(), + attr->unireg_check, name, share, + attr->pack_flag_to_pack_length(), this, attr->srid); +} + + +void Type_handler_geometry:: + Column_definition_attributes_frm_pack(const Column_definition_attributes *def, + uchar *buff) const +{ + DBUG_ASSERT(f_decimals(def->pack_flag & ~FIELDFLAG_GEOM) == 0); + def->frm_pack_basic(buff); + buff[11]= 0; + buff[14]= (uchar) geometry_type(); +} + + + +/* Values 1-40 reserved for 1-byte options, + 41-80 for 2-byte options, + 81-120 for 4-byte options, + 121-160 for 8-byte options, + other - varied length in next 1-3 bytes. +*/ +enum extra2_gis_field_options { + FIELDGEOM_END=0, + FIELDGEOM_STORAGE_MODEL=1, + FIELDGEOM_PRECISION=2, + FIELDGEOM_SCALE=3, + FIELDGEOM_SRID=81, +}; + + +uint +Type_handler_geometry:: + Column_definition_gis_options_image(uchar *cbuf, + const Column_definition &def) const +{ + if (cbuf) + { + cbuf[0]= FIELDGEOM_STORAGE_MODEL; + cbuf[1]= (uchar) Field_geom::GEOM_STORAGE_WKB; + + cbuf[2]= FIELDGEOM_PRECISION; + cbuf[3]= (uchar) def.length; + + cbuf[4]= FIELDGEOM_SCALE; + cbuf[5]= (uchar) def.decimals; + + cbuf[6]= FIELDGEOM_SRID; + int4store(cbuf + 7, ((uint32) def.srid)); + + cbuf[11]= FIELDGEOM_END; + } + return 12; +} + + +static uint gis_field_options_read(const uchar *buf, size_t buf_len, + Field_geom::storage_type *st_type, + uint *precision, uint *scale, uint *srid) +{ + const uchar *buf_end= buf + buf_len; + const uchar *cbuf= buf; + int option_id; + + *precision= *scale= *srid= 0; + *st_type= Field_geom::GEOM_STORAGE_WKB; + + if (!buf) /* can only happen with the old FRM file */ + goto end_of_record; + + while (cbuf < buf_end) + { + switch ((option_id= *(cbuf++))) + { + case FIELDGEOM_STORAGE_MODEL: + *st_type= (Field_geom::storage_type) cbuf[0]; + break; + case FIELDGEOM_PRECISION: + *precision= cbuf[0]; + break; + case FIELDGEOM_SCALE: + *scale= cbuf[0]; + break; + case FIELDGEOM_SRID: + *srid= uint4korr(cbuf); + break; + case FIELDGEOM_END: + goto end_of_record; + } + if (option_id > 0 && option_id <= 40) + cbuf+= 1; + else if (option_id > 40 && option_id <= 80) + cbuf+= 2; + else if (option_id > 80 && option_id <= 120) + cbuf+= 4; + else if (option_id > 120 && option_id <= 160) + cbuf+= 8; + else /* > 160 and <=255 */ + cbuf+= cbuf[0] ? 1 + cbuf[0] : 3 + uint2korr(cbuf+1); + } + +end_of_record: + return (uint)(cbuf - buf); +} + + +bool Type_handler_geometry:: + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) + const +{ + uint gis_opt_read, gis_length, gis_decimals; + Field_geom::storage_type st_type; + attr->frm_unpack_basic(buffer); + gis_opt_read= gis_field_options_read(gis_options->str, + gis_options->length, + &st_type, &gis_length, + &gis_decimals, &attr->srid); + gis_options->str+= gis_opt_read; + gis_options->length-= gis_opt_read; + return false; +} + + +uint32 +Type_handler_geometry::max_display_length_for_field(const Conv_source &src) + const +{ + return (uint32) my_set_bits(4 * 8); +} + + +enum_conv_type +Field_geom::rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const +{ + return binlog_type() == source.real_field_type() ? + rpl_conv_type_from_same_data_type(source.metadata(), rli, param) : + CONV_TYPE_IMPOSSIBLE; +} + + +/*****************************************************************/ +void Field_geom::sql_type(String &res) const +{ + CHARSET_INFO *cs= &my_charset_latin1; + const Name tmp= m_type_handler->name(); + res.set(tmp.ptr(), tmp.length(), cs); +} + + +int Field_geom::store(double nr) +{ + my_message(ER_CANT_CREATE_GEOMETRY_OBJECT, + ER_THD(get_thd(), ER_CANT_CREATE_GEOMETRY_OBJECT), MYF(0)); + return -1; +} + + +int Field_geom::store(longlong nr, bool unsigned_val) +{ + my_message(ER_CANT_CREATE_GEOMETRY_OBJECT, + ER_THD(get_thd(), ER_CANT_CREATE_GEOMETRY_OBJECT), MYF(0)); + return -1; +} + + +int Field_geom::store_decimal(const my_decimal *) +{ + my_message(ER_CANT_CREATE_GEOMETRY_OBJECT, + ER_THD(get_thd(), ER_CANT_CREATE_GEOMETRY_OBJECT), MYF(0)); + return -1; +} + + +int Field_geom::store(const char *from, size_t length, CHARSET_INFO *cs) +{ + if (!length) + bzero(ptr, Field_blob::pack_length()); + else + { + // Check given WKB + uint32 wkb_type; + if (length < SRID_SIZE + WKB_HEADER_SIZE + 4) + goto err; + wkb_type= uint4korr(from + SRID_SIZE + 1); + if (wkb_type < (uint32) Geometry::wkb_point || + wkb_type > (uint32) Geometry::wkb_last) + goto err; + + if (m_type_handler->geometry_type() != Type_handler_geometry::GEOM_GEOMETRY && + m_type_handler->geometry_type() != Type_handler_geometry::GEOM_GEOMETRYCOLLECTION && + (uint32) m_type_handler->geometry_type() != wkb_type) + { + const char *db= table->s->db.str; + const char *tab_name= table->s->table_name.str; + + if (!db) + db= ""; + if (!tab_name) + tab_name= ""; + + Geometry_buffer buffer; + Geometry *geom= NULL; + String wkt; + const char *dummy; + wkt.set_charset(&my_charset_latin1); + if (!(geom= Geometry::construct(&buffer, from, uint32(length))) || + geom->as_wkt(&wkt, &dummy)) + goto err; + + my_error(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, MYF(0), + Geometry::ci_collection[m_type_handler->geometry_type()]->m_name.str, + wkt.c_ptr_safe(), + db, tab_name, field_name.str, + (ulong) table->in_use->get_stmt_da()-> + current_row_for_warning()); + goto err_exit; + } + + Field_blob::store_length(length); + if ((table->copy_blobs || length <= MAX_FIELD_WIDTH) && + from != value.ptr()) + { // Must make a copy + value.copy(from, length, cs); + from= value.ptr(); + } + bmove(ptr + packlength, &from, sizeof(char*)); + } + return 0; + +err: + my_message(ER_CANT_CREATE_GEOMETRY_OBJECT, + ER_THD(get_thd(), ER_CANT_CREATE_GEOMETRY_OBJECT), MYF(0)); +err_exit: + bzero(ptr, Field_blob::pack_length()); + return -1; +} + + +bool Field_geom::is_equal(const Column_definition &new_field) const +{ + /* + - Allow ALTER..INPLACE to supertype (GEOMETRY), + e.g. POINT to GEOMETRY or POLYGON to GEOMETRY. + - Allow ALTER..INPLACE to the same geometry type: POINT -> POINT + */ + if (new_field.type_handler() == m_type_handler) + return true; + const Type_handler_geometry *gth= + dynamic_cast(new_field.type_handler()); + return gth && gth->is_binary_compatible_geom_super_type_for(m_type_handler); +} + + +Data_type_compatibility +Field_geom::can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const +{ + return item->cmp_type() == STRING_RESULT ? + Data_type_compatibility::OK : + Data_type_compatibility::INCOMPATIBLE_DATA_TYPE; +} + + +bool Field_geom::load_data_set_no_data(THD *thd, bool fixed_format) +{ + return Field_geom::load_data_set_null(thd); +} + + +bool Field_geom::load_data_set_null(THD *thd) +{ + Field_blob::reset(); + if (!maybe_null()) + { + my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field_name.str, + thd->get_stmt_da()->current_row_for_warning()); + return true; + } + set_null(); + set_has_explicit_value(); // Do not auto-update this field + return false; +} + + +uint Field_geom::get_key_image(uchar *buff,uint length, const uchar *ptr_arg, + imagetype type_arg) const +{ + if (type_arg == itMBR) + { + LEX_CSTRING tmp; + tmp.str= (const char *) get_ptr(ptr_arg); + tmp.length= get_length(ptr_arg); + return Geometry::get_key_image_itMBR(tmp, buff, length); + } + return Field_blob::get_key_image_itRAW(ptr_arg, buff, length); +} + +Binlog_type_info Field_geom::binlog_type_info() const +{ + DBUG_ASSERT(Field_geom::type() == binlog_type()); + return Binlog_type_info(Field_geom::type(), pack_length_no_ptr(), 1, + field_charset(), type_handler_geom()->geometry_type()); +} + +#endif // HAVE_SPATIAL diff --git a/sql/sql_type_geom.h b/sql/sql_type_geom.h new file mode 100644 index 00000000..d86d1181 --- /dev/null +++ b/sql/sql_type_geom.h @@ -0,0 +1,434 @@ +#ifndef SQL_TYPE_GEOM_H_INCLUDED +#define SQL_TYPE_GEOM_H_INCLUDED +/* + Copyright (c) 2015 MariaDB Foundation + Copyright (c) 2019, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_type.h" + +#ifdef HAVE_SPATIAL +class Type_handler_geometry: public Type_handler_string_result +{ +public: + enum geometry_types + { + GEOM_GEOMETRY = 0, GEOM_POINT = 1, GEOM_LINESTRING = 2, GEOM_POLYGON = 3, + GEOM_MULTIPOINT = 4, GEOM_MULTILINESTRING = 5, GEOM_MULTIPOLYGON = 6, + GEOM_GEOMETRYCOLLECTION = 7 + }; + static bool check_type_geom_or_binary(const LEX_CSTRING &opname, + const Item *item); + static bool check_types_geom_or_binary(const LEX_CSTRING &opname, + Item * const *args, + uint start, uint end); + static const Type_handler_geometry *type_handler_geom_by_type(uint type); + LEX_CSTRING extended_metadata_data_type_name() const; +public: + virtual ~Type_handler_geometry() {} + enum_field_types field_type() const override { return MYSQL_TYPE_GEOMETRY; } + bool Item_append_extended_type_info(Send_field_extended_metadata *to, + const Item *item) const override + { + LEX_CSTRING tmp= extended_metadata_data_type_name(); + return tmp.length ? to->set_data_type_name(tmp) : false; + } + bool is_param_long_data_type() const override { return true; } + uint32 max_display_length_for_field(const Conv_source &src) const override; + uint32 calc_pack_length(uint32 length) const override; + const Type_collection *type_collection() const override; + const Type_handler *type_handler_for_comparison() const override; + virtual geometry_types geometry_type() const { return GEOM_GEOMETRY; } + virtual Item *create_typecast_item(THD *thd, Item *item, + const Type_cast_attributes &attr) + const override; + const Type_handler *type_handler_frm_unpack(const uchar *buffer) + const override; + bool is_binary_compatible_geom_super_type_for(const Type_handler_geometry *th) + const + { + return geometry_type() == GEOM_GEOMETRY || + geometry_type() == th->geometry_type(); + } + bool type_can_have_key_part() const override { return true; } + bool subquery_type_allows_materialization(const Item *, const Item *, bool) + const override + { + return false; // Materialization does not work with GEOMETRY columns + } + void Item_param_set_param_func(Item_param *param, + uchar **pos, ulong len) const override; + bool Item_param_set_from_value(THD *thd, + Item_param *param, + const Type_all_attributes *attr, + const st_value *value) const override; + Field *make_conversion_table_field(MEM_ROOT *root, + TABLE *table, uint metadata, + const Field *target) const override; + uint Column_definition_gis_options_image(uchar *buff, + const Column_definition &def) + const override; + bool Column_definition_data_type_info_image(Binary_string *to, + const Column_definition &def) + const override + { + return false; + } + void + Column_definition_attributes_frm_pack(const Column_definition_attributes *at, + uchar *buff) const override; + bool + Column_definition_attributes_frm_unpack(Column_definition_attributes *attr, + TABLE_SHARE *share, + const uchar *buffer, + LEX_CUSTRING *gis_options) const + override; + bool Column_definition_fix_attributes(Column_definition *c) const override; + void Column_definition_reuse_fix_attributes(THD *thd, + Column_definition *c, + const Field *field) const + override; + bool Column_definition_prepare_stage1(THD *thd, + MEM_ROOT *mem_root, + Column_definition *c, + column_definition_type_t type, + const Column_derived_attributes + *derived_attr) + const override; + bool Column_definition_prepare_stage2(Column_definition *c, + handler *file, + ulonglong table_flags) const override; + bool Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *has_key_needed) const override; + bool Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_spatial(Key_part_spec *part, + const Column_definition &def) const override; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; + + Field *make_table_field_from_def(TABLE_SHARE *share, + MEM_ROOT *mem_root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Bit_addr &bit, + const Column_definition_attributes *attr, + uint32 flags) const override; + + bool can_return_int() const override { return false; } + bool can_return_decimal() const override { return false; } + bool can_return_real() const override { return false; } + bool can_return_text() const override { return false; } + bool can_return_date() const override { return false; } + bool can_return_time() const override { return false; } + bool Item_func_round_fix_length_and_dec(Item_func_round *) const override; + bool Item_func_int_val_fix_length_and_dec(Item_func_int_val *) const override; + bool Item_func_abs_fix_length_and_dec(Item_func_abs *) const override; + bool Item_func_neg_fix_length_and_dec(Item_func_neg *) const override; + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *h, + Type_all_attributes *attr, + Item **items, uint nitems) const + override; + bool Item_sum_sum_fix_length_and_dec(Item_sum_sum *) const override; + bool Item_sum_avg_fix_length_and_dec(Item_sum_avg *) const override; + bool Item_sum_variance_fix_length_and_dec(Item_sum_variance *) const override; + + bool Item_func_signed_fix_length_and_dec(Item_func_signed *) const override; + bool Item_func_unsigned_fix_length_and_dec(Item_func_unsigned *) const + override; + bool Item_double_typecast_fix_length_and_dec(Item_double_typecast *) const + override; + bool Item_float_typecast_fix_length_and_dec(Item_float_typecast *) const + override; + bool Item_decimal_typecast_fix_length_and_dec(Item_decimal_typecast *) const + override; + bool Item_char_typecast_fix_length_and_dec(Item_char_typecast *) const + override; + bool Item_time_typecast_fix_length_and_dec(Item_time_typecast *) const + override; + bool Item_date_typecast_fix_length_and_dec(Item_date_typecast *) const + override; + bool Item_datetime_typecast_fix_length_and_dec(Item_datetime_typecast *) const + override; +}; + + +class Type_handler_point: public Type_handler_geometry +{ + // Binary length of a POINT value: 4 byte SRID + 21 byte WKB POINT + static uint octet_length() { return 25; } +public: + geometry_types geometry_type() const override { return GEOM_POINT; } + Item *make_constructor_item(THD *thd, List *args) const override; + bool Key_part_spec_init_primary(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_unique(Key_part_spec *part, + const Column_definition &def, + const handler *file, + bool *has_key_needed) const override; + bool Key_part_spec_init_multiple(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; + bool Key_part_spec_init_foreign(Key_part_spec *part, + const Column_definition &def, + const handler *file) const override; +}; + + +class Type_handler_linestring: public Type_handler_geometry +{ +public: + geometry_types geometry_type() const override { return GEOM_LINESTRING; } + Item *make_constructor_item(THD *thd, List *args) const override; +}; + + +class Type_handler_polygon: public Type_handler_geometry +{ +public: + geometry_types geometry_type() const override { return GEOM_POLYGON; } + Item *make_constructor_item(THD *thd, List *args) const override; +}; + + +class Type_handler_multipoint: public Type_handler_geometry +{ +public: + geometry_types geometry_type() const override { return GEOM_MULTIPOINT; } + Item *make_constructor_item(THD *thd, List *args) const override; +}; + + +class Type_handler_multilinestring: public Type_handler_geometry +{ +public: + geometry_types geometry_type() const override { return GEOM_MULTILINESTRING; } + Item *make_constructor_item(THD *thd, List *args) const override; +}; + + +class Type_handler_multipolygon: public Type_handler_geometry +{ +public: + geometry_types geometry_type() const override { return GEOM_MULTIPOLYGON; } + Item *make_constructor_item(THD *thd, List *args) const override; +}; + + +class Type_handler_geometrycollection: public Type_handler_geometry +{ +public: + geometry_types geometry_type() const override { return GEOM_GEOMETRYCOLLECTION; } + Item *make_constructor_item(THD *thd, List *args) const override; +}; + +extern Named_type_handler type_handler_geometry; +extern Named_type_handler type_handler_point; +extern Named_type_handler type_handler_linestring; +extern Named_type_handler type_handler_polygon; +extern Named_type_handler type_handler_multipoint; +extern Named_type_handler type_handler_multilinestring; +extern Named_type_handler type_handler_multipolygon; +extern Named_type_handler type_handler_geometrycollection; + +class Type_collection_geometry: public Type_collection +{ + const Type_handler *aggregate_common(const Type_handler *a, + const Type_handler *b) const + { + if (a == b) + return a; + if (dynamic_cast(a) && + dynamic_cast(b)) + return &type_handler_geometry; + return NULL; + } + const Type_handler *aggregate_if_null(const Type_handler *a, + const Type_handler *b) const + { + return a == &type_handler_null ? b : + b == &type_handler_null ? a : + NULL; + } + const Type_handler *aggregate_if_long_blob(const Type_handler *a, + const Type_handler *b) const + { + return a == &type_handler_long_blob ? &type_handler_long_blob : + b == &type_handler_long_blob ? &type_handler_long_blob : + NULL; + } + const Type_handler *aggregate_if_string(const Type_handler *a, + const Type_handler *b) const; +#ifndef DBUG_OFF + bool init_aggregators(Type_handler_data *data, const Type_handler *geom) const; +#endif +public: + bool init(Type_handler_data *data) override; + const Type_handler *aggregate_for_result(const Type_handler *a, + const Type_handler *b) + const override; + const Type_handler *aggregate_for_comparison(const Type_handler *a, + const Type_handler *b) + const override; + const Type_handler *aggregate_for_min_max(const Type_handler *a, + const Type_handler *b) + const override; + const Type_handler *aggregate_for_num_op(const Type_handler *a, + const Type_handler *b) + const override + { + return NULL; + } +}; + +extern Type_collection_geometry type_collection_geometry; +const Type_handler * +Type_collection_geometry_handler_by_name(const LEX_CSTRING &name); + +#include "field.h" + +class Field_geom :public Field_blob +{ + const Type_handler_geometry *m_type_handler; +public: + uint srid; + uint precision; + enum storage_type { GEOM_STORAGE_WKB= 0, GEOM_STORAGE_BINARY= 1}; + enum storage_type storage; + + Field_geom(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, + enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, + TABLE_SHARE *share, uint blob_pack_length, + const Type_handler_geometry *gth, + uint field_srid) + :Field_blob(ptr_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, share, blob_pack_length, &my_charset_bin), + m_type_handler(gth) + { srid= field_srid; } + enum_conv_type rpl_conv_type_from(const Conv_source &source, + const Relay_log_info *rli, + const Conv_param ¶m) const override; + enum ha_base_keytype key_type() const override + { + return HA_KEYTYPE_VARBINARY2; + } + const Type_handler *type_handler() const override + { + return m_type_handler; + } + const Type_handler_geometry *type_handler_geom() const + { + return m_type_handler; + } + void set_type_handler(const Type_handler_geometry *th) + { + m_type_handler= th; + } + enum_field_types type() const override + { + return MYSQL_TYPE_GEOMETRY; + } + enum_field_types real_type() const override + { + return MYSQL_TYPE_GEOMETRY; + } + Information_schema_character_attributes + information_schema_character_attributes() const override + { + return Information_schema_character_attributes(); + } + void make_send_field(Send_field *to) override + { + Field_longstr::make_send_field(to); + LEX_CSTRING tmp= m_type_handler->extended_metadata_data_type_name(); + if (tmp.length) + to->set_data_type_name(tmp); + } + Data_type_compatibility can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const override; + void sql_type(String &str) const override; + Copy_func *get_copy_func(const Field *from) const override + { + const Type_handler_geometry *fth= + dynamic_cast(from->type_handler()); + if (fth && m_type_handler->is_binary_compatible_geom_super_type_for(fth)) + return get_identical_copy_func(); + return do_conv_blob; + } + bool memcpy_field_possible(const Field *from) const override + { + const Type_handler_geometry *fth= + dynamic_cast(from->type_handler()); + return fth && + m_type_handler->is_binary_compatible_geom_super_type_for(fth) && + !table->copy_blobs; + } + bool is_equal(const Column_definition &new_field) const override; + int store(const char *to, size_t length, CHARSET_INFO *charset) override; + int store(double nr) override; + int store(longlong nr, bool unsigned_val) override; + int store_decimal(const my_decimal *) override; + uint size_of() const override{ return sizeof(*this); } + /** + Key length is provided only to support hash joins. (compared byte for byte) + Ex: SELECT .. FROM t1,t2 WHERE t1.field_geom1=t2.field_geom2. + + The comparison is not very relevant, as identical geometry might be + represented differently, but we need to support it either way. + */ + uint32 key_length() const override{ return packlength; } + uint get_key_image(uchar *buff,uint length, + const uchar *ptr_arg, imagetype type_arg) const override; + + /** + Non-nullable GEOMETRY types cannot have defaults, + but the underlying blob must still be reset. + */ + int reset(void) override{ return Field_blob::reset() || !maybe_null(); } + bool load_data_set_null(THD *thd) override; + bool load_data_set_no_data(THD *thd, bool fixed_format) override; + + uint get_srid() const { return srid; } + void print_key_value(String *out, uint32 length) override + { + out->append(STRING_WITH_LEN("unprintable_geometry_value")); + } + Binlog_type_info binlog_type_info() const override; +}; + +#endif // HAVE_SPATIAL + +#endif // SQL_TYPE_GEOM_H_INCLUDED diff --git a/sql/sql_type_int.h b/sql/sql_type_int.h new file mode 100644 index 00000000..e015e989 --- /dev/null +++ b/sql/sql_type_int.h @@ -0,0 +1,365 @@ +/* Copyright (c) 2018, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_TYPE_INT_INCLUDED +#define SQL_TYPE_INT_INCLUDED + +#include "my_bit.h" // my_count_bits() + + +class Null_flag +{ +protected: + bool m_is_null; +public: + bool is_null() const { return m_is_null; } + Null_flag(bool is_null) :m_is_null(is_null) { } +}; + + +class Longlong +{ +protected: + longlong m_value; +public: + longlong value() const { return m_value; } + Longlong(longlong nr) :m_value(nr) { } + ulonglong abs() + { + if (m_value == LONGLONG_MIN) // avoid undefined behavior + return ((ulonglong) LONGLONG_MAX) + 1; + return m_value < 0 ? -m_value : m_value; + } +}; + + +class Longlong_null: public Longlong, public Null_flag +{ +public: + Longlong_null(longlong nr, bool is_null) + :Longlong(nr), Null_flag(is_null) + { } + explicit Longlong_null() + :Longlong(0), Null_flag(true) + { } + explicit Longlong_null(longlong nr) + :Longlong(nr), Null_flag(false) + { } + Longlong_null operator|(const Longlong_null &other) const + { + if (is_null() || other.is_null()) + return Longlong_null(); + return Longlong_null(value() | other.value()); + } + Longlong_null operator&(const Longlong_null &other) const + { + if (is_null() || other.is_null()) + return Longlong_null(); + return Longlong_null(value() & other.value()); + } + Longlong_null operator^(const Longlong_null &other) const + { + if (is_null() || other.is_null()) + return Longlong_null(); + return Longlong_null((longlong) (value() ^ other.value())); + } + Longlong_null operator~() const + { + if (is_null()) + return *this; + return Longlong_null((longlong) ~ (ulonglong) value()); + } + Longlong_null operator<<(const Longlong_null &llshift) const + { + ulonglong res; + uint shift; + if (is_null() || llshift.is_null()) + return Longlong_null(); + shift= (uint) llshift.value(); + res= 0; + if (shift < sizeof(longlong) * 8) + res= ((ulonglong) value()) << shift; + return Longlong_null((longlong) res); + } + Longlong_null operator>>(const Longlong_null &llshift) const + { + ulonglong res; + uint shift; + if (is_null() || llshift.is_null()) + return Longlong_null(); + shift= (uint) llshift.value(); + res= 0; + if (shift < sizeof(longlong) * 8) + res= ((ulonglong) value()) >> shift; + return Longlong_null(res); + } + Longlong_null bit_count() const + { + if (is_null()) + return *this; + return Longlong_null((longlong) my_count_bits((ulonglong) value())); + } +}; + + +class ULonglong +{ +protected: + ulonglong m_value; +public: + ulonglong value() const { return m_value; } + explicit ULonglong(ulonglong nr) :m_value(nr) { } + + static bool test_if_sum_overflows_ull(ulonglong arg1, ulonglong arg2) + { + return ULONGLONG_MAX - arg1 < arg2; + } + + Longlong_null operator-() const + { + if (m_value > (ulonglong) LONGLONG_MAX) // Avoid undefined behaviour + { + return m_value == (ulonglong) LONGLONG_MAX + 1 ? + Longlong_null(LONGLONG_MIN, false) : + Longlong_null(0, true); + } + return Longlong_null(-(longlong) m_value, false); + } + + // Convert to Longlong_null with the range check + Longlong_null to_longlong_null() const + { + if (m_value > (ulonglong) LONGLONG_MAX) + return Longlong_null(0, true); + return Longlong_null((longlong) m_value, false); + } + +}; + + +class ULonglong_null: public ULonglong, public Null_flag +{ +public: + ULonglong_null(ulonglong nr, bool is_null) + :ULonglong(nr), Null_flag(is_null) + { } + + /* + Multiply two ulonglong values. + + Let a = a1 * 2^32 + a0 and b = b1 * 2^32 + b0. Then + a * b = (a1 * 2^32 + a0) * (b1 * 2^32 + b0) = a1 * b1 * 2^64 + + + (a1 * b0 + a0 * b1) * 2^32 + a0 * b0; + We can determine if the above sum overflows the ulonglong range by + sequentially checking the following conditions: + 1. If both a1 and b1 are non-zero. + 2. Otherwise, if (a1 * b0 + a0 * b1) is greater than ULONG_MAX. + 3. Otherwise, if (a1 * b0 + a0 * b1) * 2^32 + a0 * b0 is greater than + ULONGLONG_MAX. + */ + static ULonglong_null ullmul(ulonglong a, ulonglong b) + { + ulong a1= (ulong)(a >> 32); + ulong b1= (ulong)(b >> 32); + + if (a1 && b1) + return ULonglong_null(0, true); + + ulong a0= (ulong)(0xFFFFFFFFUL & a); + ulong b0= (ulong)(0xFFFFFFFFUL & b); + + ulonglong res1= (ulonglong) a1 * b0 + (ulonglong) a0 * b1; + if (res1 > 0xFFFFFFFFUL) + return ULonglong_null(0, true); + + res1= res1 << 32; + ulonglong res0= (ulonglong) a0 * b0; + + if (test_if_sum_overflows_ull(res1, res0)) + return ULonglong_null(0, true); + return ULonglong_null(res1 + res0, false); + } +}; + + +// A longlong/ulonglong hybrid. Good to store results of val_int(). +class Longlong_hybrid: public Longlong +{ +protected: + bool m_unsigned; + int cmp_signed(const Longlong_hybrid& other) const + { + return m_value < other.m_value ? -1 : m_value == other.m_value ? 0 : 1; + } + int cmp_unsigned(const Longlong_hybrid& other) const + { + return (ulonglong) m_value < (ulonglong) other.m_value ? -1 : + m_value == other.m_value ? 0 : 1; + } +public: + Longlong_hybrid(longlong nr, bool unsigned_flag) + :Longlong(nr), m_unsigned(unsigned_flag) + { } + bool is_unsigned() const { return m_unsigned; } + bool is_unsigned_outside_of_signed_range() const + { + return m_unsigned && ((ulonglong) m_value) > (ulonglong) LONGLONG_MAX; + } + bool neg() const { return m_value < 0 && !m_unsigned; } + ulonglong abs() const + { + if (m_unsigned) + return (ulonglong) m_value; + return Longlong(m_value).abs(); + } + /* + Convert to an unsigned number: + - Negative numbers are converted to 0. + - Positive numbers bigger than upper_bound are converted to upper_bound. + - Other numbers are returned as is. + */ + ulonglong to_ulonglong(ulonglong upper_bound) const + { + return neg() ? 0 : + (ulonglong) m_value > upper_bound ? upper_bound : + (ulonglong) m_value; + } + uint to_uint(uint upper_bound) const + { + return (uint) to_ulonglong(upper_bound); + } + + + Longlong_null val_int_signed() const + { + if (m_unsigned) + return ULonglong((ulonglong) m_value).to_longlong_null(); + return Longlong_null(m_value, false); + } + + Longlong_null val_int_unsigned() const + { + if (!m_unsigned && m_value < 0) + return Longlong_null(0, true); + return Longlong_null(m_value, false); + } + + /* + Return in Item compatible val_int() format: + - signed numbers as a straight longlong value + - unsigned numbers as a ulonglong value reinterpreted to longlong + */ + Longlong_null val_int(bool want_unsigned_value) const + { + return want_unsigned_value ? val_int_unsigned() : + val_int_signed(); + } + + int cmp(const Longlong_hybrid& other) const + { + if (m_unsigned == other.m_unsigned) + return m_unsigned ? cmp_unsigned(other) : cmp_signed(other); + if (is_unsigned_outside_of_signed_range()) + return 1; + if (other.is_unsigned_outside_of_signed_range()) + return -1; + /* + The unsigned argument is in the range 0..LONGLONG_MAX. + The signed argument is in the range LONGLONG_MIN..LONGLONG_MAX. + Safe to compare as signed. + */ + return cmp_signed(other); + } + bool operator==(const Longlong_hybrid &nr) const + { + return cmp(nr) == 0; + } + bool operator==(ulonglong nr) const + { + return cmp(Longlong_hybrid((longlong) nr, true)) == 0; + } + bool operator==(uint nr) const + { + return cmp(Longlong_hybrid((longlong) nr, true)) == 0; + } + bool operator==(longlong nr) const + { + return cmp(Longlong_hybrid(nr, false)) == 0; + } + bool operator==(int nr) const + { + return cmp(Longlong_hybrid(nr, false)) == 0; + } +}; + + +class Longlong_hybrid_null: public Longlong_hybrid, + public Null_flag +{ +public: + Longlong_hybrid_null(const Longlong_null &nr, bool unsigned_flag) + :Longlong_hybrid(nr.value(), unsigned_flag), + Null_flag(nr.is_null()) + { } +}; + + +/* + Stores the absolute value of a number, and the sign. + Value range: -ULONGLONG_MAX .. +ULONGLONG_MAX. + + Provides a wider range for negative numbers than Longlong_hybrid does. + Usefull to store intermediate results of an expression whose value + is further needed to be negated. For example, these methods: + - Item_func_mul::int_op() + - Item_func_int_div::val_int() + - Item_func_mod::int_op() + calculate the result of absolute values of the arguments, + then optionally negate the result. +*/ +class ULonglong_hybrid: public ULonglong +{ + bool m_neg; +public: + ULonglong_hybrid(ulonglong value, bool neg) + :ULonglong(value), m_neg(neg) + { + if (m_neg && !m_value) + m_neg= false; // convert -0 to +0 + } + Longlong_null val_int_unsigned() const + { + return m_neg ? Longlong_null(0, true) : + Longlong_null((longlong) m_value, false); + } + Longlong_null val_int_signed() const + { + return m_neg ? -ULonglong(m_value) : ULonglong::to_longlong_null(); + } + + /* + Return in Item compatible val_int() format: + - signed numbers as a straight longlong value + - unsigned numbers as a ulonglong value reinterpreted to longlong + */ + Longlong_null val_int(bool want_unsigned_value) const + { + return want_unsigned_value ? val_int_unsigned() : + val_int_signed(); + } +}; + + +#endif // SQL_TYPE_INT_INCLUDED diff --git a/sql/sql_type_json.cc b/sql/sql_type_json.cc new file mode 100644 index 00000000..27072de2 --- /dev/null +++ b/sql/sql_type_json.cc @@ -0,0 +1,243 @@ +/* + Copyright (c) 2019, 2021 MariaDB + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "sql_type_json.h" +#include "sql_class.h" + + +Named_type_handler + type_handler_string_json("char/json"); + +Named_type_handler + type_handler_varchar_json("varchar/json"); + +Named_type_handler + type_handler_tiny_blob_json("tinyblob/json"); + +Named_type_handler + type_handler_blob_json("blob/json"); + +Named_type_handler + type_handler_medium_blob_json("mediumblob/json"); + +Named_type_handler + type_handler_long_blob_json("longblob/json"); + + +// Convert general purpose string type handlers to their JSON counterparts +const Type_handler * +Type_handler_json_common::json_type_handler_from_generic(const Type_handler *th) +{ + // Test in the order of likelyhood. + if (th == &type_handler_long_blob) + return &type_handler_long_blob_json; + if (th == &type_handler_varchar) + return &type_handler_varchar_json; + if (th == &type_handler_blob) + return &type_handler_blob_json; + if (th == &type_handler_tiny_blob) + return &type_handler_tiny_blob_json; + if (th == &type_handler_medium_blob) + return &type_handler_medium_blob_json; + if (th == &type_handler_string) + return &type_handler_string_json; + DBUG_ASSERT(is_json_type_handler(th)); + return th; +} + + +/* + This method resembles what Type_handler::string_type_handler() + does for general purpose string type handlers. +*/ +const Type_handler * +Type_handler_json_common::json_type_handler(uint max_octet_length) +{ + if (max_octet_length >= 16777216) + return &type_handler_long_blob_json; + else if (max_octet_length >= 65536) + return &type_handler_medium_blob_json; + else if (max_octet_length >= MAX_FIELD_VARCHARLENGTH) + return &type_handler_blob_json; + return &type_handler_varchar_json; +} + + +/* + This method resembles what Field_blob::type_handler() + does for general purpose BLOB type handlers. +*/ +const Type_handler * +Type_handler_json_common::json_blob_type_handler_by_length_bytes(uint len) +{ + switch (len) { + case 1: return &type_handler_tiny_blob_json; + case 2: return &type_handler_blob_json; + case 3: return &type_handler_medium_blob_json; + } + return &type_handler_long_blob_json; +} + + +/* + This method resembles what Item_sum_group_concat::type_handler() + does for general purpose string type handlers. +*/ +const Type_handler * +Type_handler_json_common::json_type_handler_sum(const Item_sum *item) +{ + if (item->too_big_for_varchar()) + return &type_handler_blob_json; + return &type_handler_varchar_json; +} + + +bool Type_handler_json_common::has_json_valid_constraint(const Field *field) +{ + return field->check_constraint && + field->check_constraint->expr && + field->check_constraint->expr->type() == Item::FUNC_ITEM && + static_cast(field->check_constraint->expr)-> + functype() == Item_func::JSON_VALID_FUNC; +} + + +/** + Create JSON_VALID(field_name) expression +*/ + + +Virtual_column_info * +Type_handler_json_common::make_json_valid_expr(THD *thd, + const LEX_CSTRING *field_name) +{ + Lex_ident_sys_st str; + Item *field, *expr; + str.set_valid_utf8(field_name); + if (unlikely(!(field= thd->lex->create_item_ident_field(thd, + Lex_ident_sys(), + Lex_ident_sys(), + str)))) + return 0; + if (unlikely(!(expr= new (thd->mem_root) Item_func_json_valid(thd, field)))) + return 0; + return add_virtual_expression(thd, expr); +} + + +bool Type_handler_json_common::make_json_valid_expr_if_needed(THD *thd, + Column_definition *c) +{ + return !c->check_constraint && + !(c->check_constraint= make_json_valid_expr(thd, &c->field_name)); +} + + +class Type_collection_json: public Type_collection +{ + const Type_handler *aggregate_common(const Type_handler *a, + const Type_handler *b) const + { + if (a == b) + return a; + if (a == &type_handler_null) + return b; + if (b == &type_handler_null) + return a; + return NULL; + } + + /* + Aggregate two JSON type handlers for result. + If one of the handlers is not JSON, NULL is returned. + */ + const Type_handler *aggregate_json_for_result(const Type_handler *a, + const Type_handler *b) const + { + if (!Type_handler_json_common::is_json_type_handler(a) || + !Type_handler_json_common::is_json_type_handler(b)) + return NULL; + // Here we have two JSON data types. Let's aggregate their base types. + const Type_handler *a0= a->type_handler_base(); + const Type_handler *b0= b->type_handler_base(); + // Base types are expected to belong to type_collection_std: + DBUG_ASSERT(a0->type_collection() == type_handler_null.type_collection()); + DBUG_ASSERT(b0->type_collection() == type_handler_null.type_collection()); + const Type_handler *c= a0->type_collection()->aggregate_for_result(a0, b0); + return Type_handler_json_common::json_type_handler_from_generic(c); + } +public: + const Type_handler *aggregate_for_result(const Type_handler *a, + const Type_handler *b) + const override + { + const Type_handler *h; + if ((h= aggregate_common(a, b)) || + (h= aggregate_json_for_result(a, b))) + return h; + /* + One of the types is not JSON. + Let the caller aggregate according to the derived rules: + COALESCE(VARCHAR/JSON, TEXT) -> COALESCE(VARCHAR, TEXT) + */ + return NULL; + } + + const Type_handler *aggregate_for_min_max(const Type_handler *a, + const Type_handler *b) + const override + { + /* + No JSON specific rules. + Let the caller aggregate according to the derived rules: + LEAST(VARCHAR/JSON, TEXT/JSON) -> LEAST(VARCHAR, TEXT) + */ + return NULL; + } + + const Type_handler *aggregate_for_comparison(const Type_handler *a, + const Type_handler *b) + const override + { + /* + All JSON types return &type_handler_long_blob + in type_handler_for_comparison(). We should not get here. + */ + DBUG_ASSERT(0); + return NULL; + } + + const Type_handler *aggregate_for_num_op(const Type_handler *a, + const Type_handler *b) + const override + { + /* + No JSON specific rules. + Let the caller aggregate according to the derived rules: + (VARCHAR/JSON + TEXT/JSON) -> (VARCHAR + TEXT) + */ + return NULL; + } +}; + + +const Type_collection *Type_handler_json_common::type_collection() +{ + static Type_collection_json type_collection_json; + return &type_collection_json; +} diff --git a/sql/sql_type_json.h b/sql/sql_type_json.h new file mode 100644 index 00000000..b7fe5c8a --- /dev/null +++ b/sql/sql_type_json.h @@ -0,0 +1,165 @@ +#ifndef SQL_TYPE_JSON_INCLUDED +#define SQL_TYPE_JSON_INCLUDED +/* + Copyright (c) 2019, 2021 MariaDB + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "mariadb.h" +#include "sql_type.h" + + +class Type_handler_json_common +{ +public: + static Virtual_column_info *make_json_valid_expr(THD *thd, + const LEX_CSTRING *field_name); + static bool make_json_valid_expr_if_needed(THD *thd, Column_definition *c); + static bool set_format_name(Send_field_extended_metadata *to) + { + static const Lex_cstring fmt(STRING_WITH_LEN("json")); + return to->set_format_name(fmt); + } + static const Type_handler *json_type_handler(uint max_octet_length); + static const Type_handler *json_blob_type_handler_by_length_bytes(uint len); + static const Type_handler *json_type_handler_sum(const Item_sum *sum); + static const Type_handler *json_type_handler_from_generic(const Type_handler *th); + static bool has_json_valid_constraint(const Field *field); + static const Type_collection *type_collection(); + static bool is_json_type_handler(const Type_handler *handler) + { + return handler->type_collection() == type_collection(); + } +}; + + +template &thbase> +class Type_handler_general_purpose_string_to_json: + public BASE, + public Type_handler_json_common +{ +public: + const Type_handler *type_handler_base() const override + { + return &thbase; + } + const Type_collection *type_collection() const override + { + return Type_handler_json_common::type_collection(); + } + bool Column_definition_validate_check_constraint(THD *thd, + Column_definition *c) + const override + { + return make_json_valid_expr_if_needed(thd, c) || + BASE::Column_definition_validate_check_constraint(thd, c); + } + bool Column_definition_data_type_info_image(Binary_string *to, + const Column_definition &def) + const override + { + /* + Override the inherited method to avoid JSON type handlers writing any + extended metadata to FRM. JSON type handlers are currently detected + only by CHECK(JSON_VALID()) constraint. This may change in the future + to do write extended metadata to FRM, for more reliable detection. + */ + return false; + } + + bool Item_append_extended_type_info(Send_field_extended_metadata *to, + const Item *item) const override + { + return set_format_name(to); // Send "format=json" in the protocol + } + + bool Item_hybrid_func_fix_attributes(THD *thd, + const LEX_CSTRING &name, + Type_handler_hybrid_field_type *hybrid, + Type_all_attributes *attr, + Item **items, uint nitems) + const override + { + if (BASE::Item_hybrid_func_fix_attributes(thd, name, hybrid, attr, + items, nitems)) + return true; + /* + The above call can change the type handler on "hybrid", e.g. + choose a proper BLOB type handler according to the calculated max_length. + Convert general purpose string type handler to its JSON counterpart. + This makes hybrid functions preserve JSON data types, e.g.: + COALESCE(json_expr1, json_expr2) -> JSON + */ + hybrid->set_handler(json_type_handler_from_generic(hybrid->type_handler())); + return false; + } +}; + + +class Type_handler_string_json: + public Type_handler_general_purpose_string_to_json +{ }; + + +class Type_handler_varchar_json: + public Type_handler_general_purpose_string_to_json +{ }; + +class Type_handler_tiny_blob_json: + public Type_handler_general_purpose_string_to_json +{ }; + +class Type_handler_blob_json: + public Type_handler_general_purpose_string_to_json +{ }; + + +class Type_handler_medium_blob_json: + public Type_handler_general_purpose_string_to_json +{ }; + +class Type_handler_long_blob_json: + public Type_handler_general_purpose_string_to_json +{ }; + + + +extern MYSQL_PLUGIN_IMPORT + Named_type_handler type_handler_string_json; + +extern MYSQL_PLUGIN_IMPORT + Named_type_handler type_handler_varchar_json; + +extern MYSQL_PLUGIN_IMPORT + Named_type_handler type_handler_tiny_blob_json; + +extern MYSQL_PLUGIN_IMPORT + Named_type_handler type_handler_blob_json; + +extern MYSQL_PLUGIN_IMPORT + Named_type_handler type_handler_medium_blob_json; + +extern MYSQL_PLUGIN_IMPORT + Named_type_handler type_handler_long_blob_json; + + +#endif // SQL_TYPE_JSON_INCLUDED diff --git a/sql/sql_type_real.h b/sql/sql_type_real.h new file mode 100644 index 00000000..5a484fbe --- /dev/null +++ b/sql/sql_type_real.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SQL_TYPE_REAL_INCLUDED +#define SQL_TYPE_REAL_INCLUDED + +#include + +class Float +{ + float m_value; +public: + Float(float nr) + :m_value(nr) + { + DBUG_ASSERT(!std::isnan(nr)); + DBUG_ASSERT(!std::isinf(nr)); + } + Float(double nr) + :m_value((float) nr) + { + DBUG_ASSERT(!std::isnan(nr)); + DBUG_ASSERT(!std::isinf(nr)); + DBUG_ASSERT(nr <= FLT_MAX); + DBUG_ASSERT(nr >= -FLT_MAX); + } + Float(const uchar *ptr) + { + float4get(m_value, ptr); + } + bool to_string(String *to, uint dec) const; +}; + + +#endif // SQL_TYPE_REAL_INCLUDED diff --git a/sql/sql_type_string.cc b/sql/sql_type_string.cc new file mode 100644 index 00000000..df46ef74 --- /dev/null +++ b/sql/sql_type_string.cc @@ -0,0 +1,104 @@ +/* + Copyright (c) 2019, 2020 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#include "mariadb.h" + +#include "sql_class.h" +#include "sql_type_string.h" + + +uchar * +StringPack::pack(uchar *to, const uchar *from, uint max_length) const +{ + size_t length= MY_MIN(m_octet_length, max_length); + size_t local_char_length= char_length(); + DBUG_PRINT("debug", ("length: %zu ", length)); + + if (length > local_char_length) + local_char_length= charset()->charpos(from, from + length, + local_char_length); + set_if_smaller(length, local_char_length); + + /* + TODO: change charset interface to add a new function that does + the following or add a flag to lengthsp to do it itself + (this is for not packing padding adding bytes in BINARY + fields). + */ + if (mbmaxlen() == 1) + { + while (length && from[length-1] == charset()->pad_char) + length --; + } + else + length= charset()->lengthsp((const char*) from, length); + + // Length always stored little-endian + *to++= (uchar) length; + if (m_octet_length > 255) + *to++= (uchar) (length >> 8); + + // Store the actual bytes of the string + memcpy(to, from, length); + return to+length; +} + + +const uchar * +StringPack::unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) const +{ + uint from_length, length; + + /* + Compute the declared length of the field on the master. This is + used to decide if one or two bytes should be read as length. + */ + if (param_data) + from_length= (((param_data >> 4) & 0x300) ^ 0x300) + (param_data & 0x00ff); + else + from_length= m_octet_length; + + DBUG_PRINT("debug", + ("param_data: 0x%x, field_length: %u, from_length: %u", + param_data, m_octet_length, from_length)); + /* + Compute the actual length of the data by reading one or two bits + (depending on the declared field length on the master). + */ + if (from_length > 255) + { + if (from + 2 > from_end) + return 0; + length= uint2korr(from); + from+= 2; + } + else + { + if (from + 1 > from_end) + return 0; + length= (uint) *from++; + } + if (from + length > from_end || length > m_octet_length) + return 0; + + memcpy(to, from, length); + // Pad the string with the pad character of the fields charset + charset()->fill((char*) to + length, + m_octet_length - length, + charset()->pad_char); + return from+length; +} diff --git a/sql/sql_type_string.h b/sql/sql_type_string.h new file mode 100644 index 00000000..fca46e91 --- /dev/null +++ b/sql/sql_type_string.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SQL_TYPE_STRING_INCLUDED +#define SQL_TYPE_STRING_INCLUDED + +class StringPack +{ + CHARSET_INFO *m_cs; + uint32 m_octet_length; + CHARSET_INFO *charset() const { return m_cs; } + uint mbmaxlen() const { return m_cs->mbmaxlen; }; + uint32 char_length() const { return m_octet_length / mbmaxlen(); } +public: + StringPack(CHARSET_INFO *cs, uint32 octet_length) + :m_cs(cs), + m_octet_length(octet_length) + { } + uchar *pack(uchar *to, const uchar *from, uint max_length) const; + const uchar *unpack(uchar *to, const uchar *from, const uchar *from_end, + uint param_data) const; +public: + static uint max_packed_col_length(uint max_length) + { + return (max_length > 255 ? 2 : 1) + max_length; + } + static uint packed_col_length(const uchar *data_ptr, uint length) + { + if (length > 255) + return uint2korr(data_ptr)+2; + return (uint) *data_ptr + 1; + } +}; + + +#endif // SQL_TYPE_STRING_INCLUDED diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc new file mode 100644 index 00000000..02f068e9 --- /dev/null +++ b/sql/sql_udf.cc @@ -0,0 +1,750 @@ +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* This implements 'user defined functions' */ + +/* + Known bugs: + + Memory for functions is never freed! + Shared libraries are not closed before mysqld exits; + - This is because we can't be sure if some threads are using + a function. + + The bugs only affect applications that create and free a lot of + dynamic functions, so this shouldn't be a real problem. +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_base.h" // close_mysql_tables +#include "sql_parse.h" // check_identifier_name +#include "sql_table.h" // write_bin_log +#include "records.h" // init_read_record, end_read_record +#include +#include "lock.h" // MYSQL_LOCK_IGNORE_TIMEOUT + +#ifdef HAVE_DLOPEN +extern "C" +{ +#include +#include +} + +static bool initialized = 0; +static MEM_ROOT mem; +static HASH udf_hash; +static mysql_rwlock_t THR_LOCK_udf; +static LEX_CSTRING MYSQL_FUNC_NAME= {STRING_WITH_LEN("func") }; + +static udf_func *add_udf(LEX_CSTRING *name, Item_result ret, + const char *dl, Item_udftype typ); +static void del_udf(udf_func *udf); +static void *find_udf_dl(const char *dl); +static bool find_udf_everywhere(THD* thd, const LEX_CSTRING &name, + TABLE *table); + +static const char *init_syms(udf_func *tmp, char *nm) +{ + char *end; + + if (!((tmp->func= (Udf_func_any) dlsym(tmp->dlhandle, tmp->name.str)))) + return tmp->name.str; + + end=strmov(nm,tmp->name.str); + + if (tmp->type == UDFTYPE_AGGREGATE) + { + (void)strmov(end, "_clear"); + if (!((tmp->func_clear= (Udf_func_clear) dlsym(tmp->dlhandle, nm)))) + return nm; + (void)strmov(end, "_add"); + if (!((tmp->func_add= (Udf_func_add) dlsym(tmp->dlhandle, nm)))) + return nm; + (void)strmov(end, "_remove"); + tmp->func_remove= (Udf_func_add) dlsym(tmp->dlhandle, nm); + } + + (void) strmov(end,"_deinit"); + tmp->func_deinit= (Udf_func_deinit) dlsym(tmp->dlhandle, nm); + + (void) strmov(end,"_init"); + tmp->func_init= (Udf_func_init) dlsym(tmp->dlhandle, nm); + + /* + to prefent loading "udf" from, e.g. libc.so + let's ensure that at least one auxiliary symbol is defined + */ + if (!tmp->func_init && !tmp->func_deinit && tmp->type != UDFTYPE_AGGREGATE) + { + THD *thd= current_thd; + if (!opt_allow_suspicious_udfs) + return nm; + if (thd->variables.log_warnings) + sql_print_warning(ER_THD(thd, ER_CANT_FIND_DL_ENTRY), nm); + } + return 0; +} + + +extern "C" uchar* get_hash_key(const uchar *buff, size_t *length, + my_bool not_used __attribute__((unused))) +{ + udf_func *udf=(udf_func*) buff; + *length=(uint) udf->name.length; + return (uchar*) udf->name.str; +} + +static PSI_memory_key key_memory_udf_mem; + +#ifdef HAVE_PSI_INTERFACE +static PSI_rwlock_key key_rwlock_THR_LOCK_udf; + +static PSI_rwlock_info all_udf_rwlocks[]= +{ + { &key_rwlock_THR_LOCK_udf, "THR_LOCK_udf", PSI_FLAG_GLOBAL} +}; + +static PSI_memory_info all_udf_memory[]= +{ + { &key_memory_udf_mem, "udf_mem", PSI_FLAG_GLOBAL} +}; + +static void init_udf_psi_keys(void) +{ + const char* category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_udf_rwlocks); + PSI_server->register_rwlock(category, all_udf_rwlocks, count); + + count= array_elements(all_udf_memory); + mysql_memory_register(category, all_udf_memory, count); +} +#endif + +/* + Read all predeclared functions from mysql.func and accept all that + can be used. +*/ + +void udf_init() +{ + udf_func *tmp; + TABLE_LIST tables; + READ_RECORD read_record_info; + TABLE *table; + int error; + DBUG_ENTER("ufd_init"); + + if (initialized || opt_noacl) + DBUG_VOID_RETURN; + +#ifdef HAVE_PSI_INTERFACE + init_udf_psi_keys(); +#endif + + mysql_rwlock_init(key_rwlock_THR_LOCK_udf, &THR_LOCK_udf); + + init_sql_alloc(key_memory_udf_mem, &mem, UDF_ALLOC_BLOCK_SIZE, 0, MYF(0)); + THD *new_thd = new THD(0); + if (!new_thd || + my_hash_init(key_memory_udf_mem, + &udf_hash,system_charset_info,32,0,0,get_hash_key, NULL, 0)) + { + sql_print_error("Can't allocate memory for udf structures"); + my_hash_free(&udf_hash); + free_root(&mem,MYF(0)); + delete new_thd; + DBUG_VOID_RETURN; + } + initialized = 1; + new_thd->thread_stack= (char*) &new_thd; + new_thd->store_globals(); + new_thd->set_db(&MYSQL_SCHEMA_NAME); + + tables.init_one_table(&new_thd->db, &MYSQL_FUNC_NAME, 0, TL_READ); + + if (open_and_lock_tables(new_thd, &tables, FALSE, MYSQL_LOCK_IGNORE_TIMEOUT)) + { + DBUG_PRINT("error",("Can't open udf table")); + sql_print_error("Can't open the mysql.func table. Please " + "run mysql_upgrade to create it."); + goto end; + } + + table= tables.table; + if (init_read_record(&read_record_info, new_thd, table, NULL, NULL, 1, 0, + FALSE)) + { + sql_print_error("Could not initialize init_read_record; udf's not " + "loaded"); + goto end; + } + + table->use_all_columns(); + while (!(error= read_record_info.read_record())) + { + DBUG_PRINT("info",("init udf record")); + LEX_CSTRING name; + name.str=get_field(&mem, table->field[0]); + name.length = (uint) safe_strlen(name.str); + char *dl_name= get_field(&mem, table->field[2]); + bool new_dl=0; + Item_udftype udftype=UDFTYPE_FUNCTION; + if (table->s->fields >= 4) // New func table + udftype=(Item_udftype) table->field[3]->val_int(); + + /* + Ensure that the .dll doesn't have a path + This is done to ensure that only approved dll from the system + directories are used (to make this even remotely secure). + + On windows we must check both FN_LIBCHAR and '/'. + */ + if (!name.str || !dl_name || check_valid_path(dl_name, strlen(dl_name)) || + check_string_char_length(&name, 0, NAME_CHAR_LEN, + system_charset_info, 1)) + { + sql_print_error("Invalid row in mysql.func table for function '%.64s'", + safe_str(name.str)); + continue; + } + + if (!(tmp= add_udf(&name,(Item_result) table->field[1]->val_int(), + dl_name, udftype))) + { + sql_print_error("Can't alloc memory for udf function: '%.64s'", name.str); + continue; + } + + void *dl = find_udf_dl(tmp->dl); + if (dl == NULL) + { + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", tmp->dl, NullS); + (void) unpack_filename(dlpath, dlpath); + if (!(dl= dlopen(dlpath, RTLD_NOW))) + { + /* Print warning to log */ + sql_print_error(ER_THD(new_thd, ER_CANT_OPEN_LIBRARY), + tmp->dl, errno, my_dlerror(dlpath)); + /* Keep the udf in the hash so that we can remove it later */ + continue; + } + new_dl=1; + } + tmp->dlhandle = dl; + { + char buf[SAFE_NAME_LEN+16]; + const char *missing; + if ((missing= init_syms(tmp, buf))) + { + sql_print_error(ER_THD(new_thd, ER_CANT_FIND_DL_ENTRY), missing); + del_udf(tmp); + if (new_dl) + dlclose(dl); + } + } + } + if (unlikely(error > 0)) + sql_print_error("Got unknown error: %d", my_errno); + end_read_record(&read_record_info); + + // Force close to free memory + table->mark_table_for_reopen(); + +end: + close_mysql_tables(new_thd); + delete new_thd; + DBUG_VOID_RETURN; +} + + +void udf_free() +{ + /* close all shared libraries */ + DBUG_ENTER("udf_free"); + if (opt_noacl) + DBUG_VOID_RETURN; + for (uint idx=0 ; idx < udf_hash.records ; idx++) + { + udf_func *udf=(udf_func*) my_hash_element(&udf_hash,idx); + if (udf->dlhandle) // Not closed before + { + /* Mark all versions using the same handler as closed */ + for (uint j=idx+1 ; j < udf_hash.records ; j++) + { + udf_func *tmp=(udf_func*) my_hash_element(&udf_hash,j); + if (udf->dlhandle == tmp->dlhandle) + tmp->dlhandle=0; // Already closed + } + dlclose(udf->dlhandle); + } + } + my_hash_free(&udf_hash); + free_root(&mem,MYF(0)); + if (initialized) + { + initialized= 0; + mysql_rwlock_destroy(&THR_LOCK_udf); + } + DBUG_VOID_RETURN; +} + + +static void del_udf(udf_func *udf) +{ + DBUG_ENTER("del_udf"); + if (!--udf->usage_count) + { + my_hash_delete(&udf_hash,(uchar*) udf); + using_udf_functions=udf_hash.records != 0; + } + else + { + /* + The functions is in use ; Rename the functions instead of removing it. + The functions will be automaticly removed when the least threads + doesn't use it anymore + */ + const char *name= udf->name.str; + size_t name_length=udf->name.length; + udf->name.str= "*"; + udf->name.length=1; + my_hash_update(&udf_hash,(uchar*) udf,(uchar*) name,name_length); + } + DBUG_VOID_RETURN; +} + + +void free_udf(udf_func *udf) +{ + DBUG_ENTER("free_udf"); + + if (!initialized) + DBUG_VOID_RETURN; + + mysql_rwlock_wrlock(&THR_LOCK_udf); + if (!--udf->usage_count) + { + /* + We come here when someone has deleted the udf function + while another thread still was using the udf + */ + my_hash_delete(&udf_hash,(uchar*) udf); + using_udf_functions=udf_hash.records != 0; + if (!find_udf_dl(udf->dl)) + dlclose(udf->dlhandle); + } + mysql_rwlock_unlock(&THR_LOCK_udf); + DBUG_VOID_RETURN; +} + + +/* This is only called if using_udf_functions != 0 */ + +udf_func *find_udf(const char *name,size_t length,bool mark_used) +{ + udf_func *udf=0; + DBUG_ENTER("find_udf"); + DBUG_ASSERT(strlen(name) == length); + + if (!initialized) + DBUG_RETURN(NULL); + + DEBUG_SYNC(current_thd, "find_udf_before_lock"); + /* TODO: This should be changed to reader locks someday! */ + if (mark_used) + mysql_rwlock_wrlock(&THR_LOCK_udf); /* Called during fix_fields */ + else + mysql_rwlock_rdlock(&THR_LOCK_udf); /* Called during parsing */ + + if ((udf=(udf_func*) my_hash_search(&udf_hash,(uchar*) name, length))) + { + if (!udf->dlhandle) + udf=0; // Could not be opened + else if (mark_used) + udf->usage_count++; + } + mysql_rwlock_unlock(&THR_LOCK_udf); + DBUG_RETURN(udf); +} + + +static void *find_udf_dl(const char *dl) +{ + DBUG_ENTER("find_udf_dl"); + + /* + Because only the function name is hashed, we have to search trough + all rows to find the dl. + */ + for (uint idx=0 ; idx < udf_hash.records ; idx++) + { + udf_func *udf=(udf_func*) my_hash_element(&udf_hash,idx); + if (!strcmp(dl, udf->dl) && udf->dlhandle != NULL) + DBUG_RETURN(udf->dlhandle); + } + DBUG_RETURN(0); +} + + +/* Assume that name && dl is already allocated */ + +static udf_func *add_udf(LEX_CSTRING *name, Item_result ret, const char *dl, + Item_udftype type) +{ + if (!name || !dl || !(uint) type || (uint) type > (uint) UDFTYPE_AGGREGATE) + return 0; + udf_func *tmp= (udf_func*) alloc_root(&mem, sizeof(udf_func)); + if (!tmp) + return 0; + bzero((char*) tmp,sizeof(*tmp)); + tmp->name = *name; //dup !! + tmp->dl = dl; + tmp->returns = ret; + tmp->type = type; + tmp->usage_count=1; + if (my_hash_insert(&udf_hash,(uchar*) tmp)) + return 0; + using_udf_functions=1; + return tmp; +} + +/** + Find record with the udf in the udf func table + + @param exact_name udf name + @param table table of mysql.func + + @retval TRUE found + @retral FALSE not found +*/ + +static bool find_udf_in_table(const LEX_CSTRING &exact_name, TABLE *table) +{ + table->use_all_columns(); + table->field[0]->store(exact_name.str, exact_name.length, &my_charset_bin); + return (!table->file->ha_index_read_idx_map(table->record[0], 0, + (uchar*) table->field[0]->ptr, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)); +} + +static bool remove_udf_in_table(const LEX_CSTRING &exact_name, TABLE *table) +{ + if (find_udf_in_table(exact_name, table)) + { + int error; + if ((error= table->file->ha_delete_row(table->record[0]))) + { + table->file->print_error(error, MYF(0)); + return TRUE; + } + } + return FALSE; +} + + +/* + Drop user defined function. + + @param thd Thread handler. + @param udf Existing udf_func pointer which is to be deleted. + @param table mysql.func table reference (opened and locked) + + Assumption + + - udf is not null. + - table is already opened and locked +*/ +static int mysql_drop_function_internal(THD *thd, udf_func *udf, TABLE *table) +{ + DBUG_ENTER("mysql_drop_function_internal"); + + const LEX_CSTRING exact_name= udf->name; + + del_udf(udf); + /* + Close the handle if this was function that was found during boot or + CREATE FUNCTION and it's not in use by any other udf function + */ + if (udf->dlhandle && !find_udf_dl(udf->dl)) + dlclose(udf->dlhandle); + + if (!table) + DBUG_RETURN(1); + + bool ret= remove_udf_in_table(exact_name, table); + DBUG_RETURN(ret); +} + + +static TABLE *open_udf_func_table(THD *thd) +{ + TABLE_LIST tables; + tables.init_one_table(&MYSQL_SCHEMA_NAME, &MYSQL_FUNC_NAME, + &MYSQL_FUNC_NAME, TL_WRITE); + return open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT); +} + + +/** + Create a user defined function. + + @note Like implementations of other DDL/DML in MySQL, this function + relies on the caller to close the thread tables. This is done in the + end of dispatch_command(). +*/ + +int mysql_create_function(THD *thd,udf_func *udf) +{ + int error; + void *dl=0; + bool new_dl=0; + TABLE *table; + TABLE_LIST tables; + udf_func *u_d; + DBUG_ENTER("mysql_create_function"); + + if (!initialized) + { + if (opt_noacl) + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), + udf->name.str, + "UDFs are unavailable with the --skip-grant-tables option"); + else + my_message(ER_OUT_OF_RESOURCES, ER_THD(thd, ER_OUT_OF_RESOURCES), + MYF(0)); + DBUG_RETURN(1); + } + + /* + Ensure that the .dll doesn't have a path + This is done to ensure that only approved dll from the system + directories are used (to make this even remotely secure). + */ + if (check_valid_path(udf->dl, strlen(udf->dl))) + { + my_message(ER_UDF_NO_PATHS, ER_THD(thd, ER_UDF_NO_PATHS), MYF(0)); + DBUG_RETURN(1); + } + if (check_ident_length(&udf->name)) + DBUG_RETURN(1); + + table= open_udf_func_table(thd); + + mysql_rwlock_wrlock(&THR_LOCK_udf); + DEBUG_SYNC(current_thd, "mysql_create_function_after_lock"); + if ((u_d= (udf_func*) my_hash_search(&udf_hash, (uchar*) udf->name.str, + udf->name.length))) + { + if (thd->lex->create_info.or_replace()) + { + if (unlikely((error= mysql_drop_function_internal(thd, u_d, table)))) + goto err; + } + else if (thd->lex->create_info.if_not_exists()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, ER_UDF_EXISTS, + ER_THD(thd, ER_UDF_EXISTS), udf->name.str); + + goto done; + } + else + { + my_error(ER_UDF_EXISTS, MYF(0), udf->name.str); + goto err; + } + } + if (!(dl = find_udf_dl(udf->dl))) + { + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", udf->dl, NullS); + (void) unpack_filename(dlpath, dlpath); + + if (!(dl = dlopen(dlpath, RTLD_NOW))) + { + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), + udf->dl, errno, my_dlerror(dlpath)); + DBUG_PRINT("error",("dlopen of %s failed, error: %d (%s)", + udf->dl, errno, dlerror())); + goto err; + } + new_dl=1; + } + udf->dlhandle=dl; + { + char buf[SAFE_NAME_LEN+16]; + const char *missing; + if ((missing= init_syms(udf, buf))) + { + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), missing); + goto err; + } + } + udf->name.str= strdup_root(&mem,udf->name.str); + udf->dl= strdup_root(&mem,udf->dl); + if (!(u_d=add_udf(&udf->name,udf->returns,udf->dl,udf->type))) + goto err; + u_d->dlhandle= dl; + u_d->func= udf->func; + u_d->func_init= udf->func_init; + u_d->func_deinit= udf->func_deinit; + u_d->func_clear= udf->func_clear; + u_d->func_add= udf->func_add; + u_d->func_remove= udf->func_remove; + + /* create entry in mysql.func table */ + + /* Allow creation of functions even if we can't open func table */ + if (unlikely(!table)) + goto err_open_func_table; + table->use_all_columns(); + restore_record(table, s->default_values); // Default values for fields + table->field[0]->store(u_d->name.str, u_d->name.length, system_charset_info); + table->field[1]->store((longlong) u_d->returns, TRUE); + table->field[2]->store(u_d->dl,(uint) strlen(u_d->dl), system_charset_info); + if (table->s->fields >= 4) // If not old func format + table->field[3]->store((longlong) u_d->type, TRUE); + error= table->file->ha_write_row(table->record[0]); + + if (unlikely(error)) + { + my_error(ER_ERROR_ON_WRITE, MYF(0), "mysql.func", error); + del_udf(u_d); + goto err_open_func_table; + } + +done: + mysql_rwlock_unlock(&THR_LOCK_udf); + + /* Binlog the create function. */ + if (unlikely(write_bin_log(thd, TRUE, thd->query(), thd->query_length()))) + DBUG_RETURN(1); + + DBUG_RETURN(0); + +err: + if (new_dl) + dlclose(dl); +err_open_func_table: + mysql_rwlock_unlock(&THR_LOCK_udf); + DBUG_RETURN(1); +} + + +enum drop_udf_result mysql_drop_function(THD *thd, const LEX_CSTRING *udf_name) +{ + TABLE *table; + udf_func *udf; + DBUG_ENTER("mysql_drop_function"); + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(UDF_DEL_RESULT_ERROR); + } + + if (!(table= open_udf_func_table(thd))) + DBUG_RETURN(UDF_DEL_RESULT_ERROR); + + // Fast pre-check + if (!mysql_rwlock_tryrdlock(&THR_LOCK_udf)) + { + bool found= find_udf_everywhere(thd, *udf_name, table); + mysql_rwlock_unlock(&THR_LOCK_udf); + if (!found) + { + close_mysql_tables(thd); + DBUG_RETURN(UDF_DEL_RESULT_ABSENT); + } + } + + if (!initialized) + { + close_mysql_tables(thd); + if (opt_noacl) + DBUG_RETURN(UDF_DEL_RESULT_ABSENT); // SP should be checked + + my_message(ER_OUT_OF_RESOURCES, ER_THD(thd, ER_OUT_OF_RESOURCES), MYF(0)); + DBUG_RETURN(UDF_DEL_RESULT_ERROR); + } + + mysql_rwlock_wrlock(&THR_LOCK_udf); + + // re-check under protection + if (!find_udf_everywhere(thd, *udf_name, table)) + { + close_mysql_tables(thd); + mysql_rwlock_unlock(&THR_LOCK_udf); + DBUG_RETURN(UDF_DEL_RESULT_ABSENT); + } + + if (check_access(thd, DELETE_ACL, "mysql", NULL, NULL, 1, 0)) + goto err; + + + DEBUG_SYNC(current_thd, "mysql_drop_function_after_lock"); + + if (!(udf= (udf_func*) my_hash_search(&udf_hash, (uchar*) udf_name->str, + (uint) udf_name->length)) ) + { + if (remove_udf_in_table(*udf_name, table)) + goto err; + goto done; + } + + if (mysql_drop_function_internal(thd, udf, table)) + goto err; + +done: + mysql_rwlock_unlock(&THR_LOCK_udf); + + /* + Binlog the drop function. Keep the table open and locked + while binlogging, to avoid binlog inconsistency. + */ + if (write_bin_log(thd, TRUE, thd->query(), thd->query_length())) + DBUG_RETURN(UDF_DEL_RESULT_ERROR); + + close_mysql_tables(thd); + DBUG_RETURN(UDF_DEL_RESULT_DELETED); + +err: + close_mysql_tables(thd); + mysql_rwlock_unlock(&THR_LOCK_udf); + DBUG_RETURN(UDF_DEL_RESULT_ERROR); +} + +static bool find_udf_everywhere(THD* thd, const LEX_CSTRING &name, + TABLE *table) +{ + if (initialized && my_hash_search(&udf_hash, (uchar*) name.str, name.length)) + return true; + + return find_udf_in_table(name, table); +} + +#endif /* HAVE_DLOPEN */ diff --git a/sql/sql_udf.h b/sql/sql_udf.h new file mode 100644 index 00000000..cb195435 --- /dev/null +++ b/sql/sql_udf.h @@ -0,0 +1,169 @@ +#ifndef SQL_UDF_INCLUDED +#define SQL_UDF_INCLUDED + +/* Copyright (c) 2000, 2003-2007 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* This file defines structures needed by udf functions */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface +#endif + +enum Item_udftype {UDFTYPE_FUNCTION=1,UDFTYPE_AGGREGATE}; + +typedef void (*Udf_func_clear)(UDF_INIT *, uchar *, uchar *); +typedef void (*Udf_func_add)(UDF_INIT *, UDF_ARGS *, uchar *, uchar *); +typedef void (*Udf_func_deinit)(UDF_INIT*); +typedef my_bool (*Udf_func_init)(UDF_INIT *, UDF_ARGS *, char *); +typedef void (*Udf_func_any)(); +typedef double (*Udf_func_double)(UDF_INIT *, UDF_ARGS *, uchar *, uchar *); +typedef longlong (*Udf_func_longlong)(UDF_INIT *, UDF_ARGS *, uchar *, + uchar *); + +typedef struct st_udf_func +{ + LEX_CSTRING name; + Item_result returns; + Item_udftype type; + const char *dl; + void *dlhandle; + Udf_func_any func; + Udf_func_init func_init; + Udf_func_deinit func_deinit; + Udf_func_clear func_clear; + Udf_func_add func_add; + Udf_func_add func_remove; + ulong usage_count; +} udf_func; + +class Item_result_field; + +class udf_handler :public Sql_alloc +{ + protected: + udf_func *u_d; + String *buffers; + UDF_ARGS f_args; + UDF_INIT initid; + char *num_buffer; + uchar error, is_null; + bool initialized; + Item **args; + + public: + bool not_original; + udf_handler(udf_func *udf_arg) :u_d(udf_arg), buffers(0), error(0), + is_null(0), initialized(0), not_original(0) + {} + ~udf_handler(); + const char *name() const { return u_d ? u_d->name.str : "?"; } + Item_result result_type () const + { return u_d ? u_d->returns : STRING_RESULT;} + bool get_arguments(); + bool fix_fields(THD *thd, Item_func_or_sum *item, + uint arg_count, Item **args); + void cleanup(); + double val(my_bool *null_value) + { + is_null= 0; + if (get_arguments()) + { + *null_value=1; + return 0.0; + } + Udf_func_double func= (Udf_func_double) u_d->func; + double tmp=func(&initid, &f_args, &is_null, &error); + if (is_null || error) + { + *null_value=1; + return 0.0; + } + *null_value=0; + return tmp; + } + longlong val_int(my_bool *null_value) + { + is_null= 0; + if (get_arguments()) + { + *null_value=1; + return 0; + } + Udf_func_longlong func= (Udf_func_longlong) u_d->func; + longlong tmp=func(&initid, &f_args, &is_null, &error); + if (is_null || error) + { + *null_value=1; + return 0; + } + *null_value=0; + return tmp; + } + my_decimal *val_decimal(my_bool *null_value, my_decimal *dec_buf); + void clear() + { + is_null= 0; + Udf_func_clear func= u_d->func_clear; + func(&initid, &is_null, &error); + } + void add(my_bool *null_value) + { + if (get_arguments()) + { + *null_value=1; + return; + } + Udf_func_add func= u_d->func_add; + func(&initid, &f_args, &is_null, &error); + *null_value= (my_bool) (is_null || error); + } + bool supports_removal() const + { return MY_TEST(u_d->func_remove); } + void remove(my_bool *null_value) + { + DBUG_ASSERT(u_d->func_remove); + if (get_arguments()) + { + *null_value=1; + return; + } + Udf_func_add func= u_d->func_remove; + func(&initid, &f_args, &is_null, &error); + *null_value= (my_bool) (is_null || error); + } + String *val_str(String *str,String *save_str); +}; + + +#ifdef HAVE_DLOPEN +void udf_init(void),udf_free(void); +udf_func *find_udf(const char *name, size_t size, bool mark_used=0); +void free_udf(udf_func *udf); +int mysql_create_function(THD *thd,udf_func *udf); +enum drop_udf_result +{ + UDF_DEL_RESULT_ABSENT, + UDF_DEL_RESULT_DELETED, + UDF_DEL_RESULT_ERROR +}; +enum drop_udf_result mysql_drop_function(THD *thd, const LEX_CSTRING *name); +#else +static inline void udf_init(void) { } +static inline void udf_free(void) { } +#endif +#endif /* SQL_UDF_INCLUDED */ diff --git a/sql/sql_union.cc b/sql/sql_union.cc new file mode 100644 index 00000000..6e839409 --- /dev/null +++ b/sql/sql_union.cc @@ -0,0 +1,2902 @@ +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* + UNION of select's + UNION's were introduced by Monty and Sinisa +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_union.h" +#include "sql_select.h" +#include "sql_cursor.h" +#include "sql_base.h" // fill_record +#include "filesort.h" // filesort_free_buffers +#include "sql_view.h" +#include "sql_cte.h" +#include "item_windowfunc.h" + +bool mysql_union(THD *thd, LEX *lex, select_result *result, + SELECT_LEX_UNIT *unit, ulonglong setup_tables_done_option) +{ + DBUG_ENTER("mysql_union"); + bool res; + if (!(res= unit->prepare(unit->derived, result, SELECT_NO_UNLOCK | + setup_tables_done_option))) + res= unit->exec(); + res|= unit->cleanup(); + DBUG_RETURN(res); +} + + +/*************************************************************************** +** store records in temporary table for UNION +***************************************************************************/ + +int select_unit::prepare(List &list, SELECT_LEX_UNIT *u) +{ + unit= u; + return 0; +} + +/** + This called by SELECT_LEX_UNIT::exec when select changed +*/ + +void select_unit::change_select() +{ + uint current_select_number= thd->lex->current_select->select_number; + DBUG_ENTER("select_unit::change_select"); + DBUG_PRINT("enter", ("select in unit change: %u -> %u", + curr_sel, current_select_number)); + DBUG_ASSERT(curr_sel != current_select_number); + curr_sel= current_select_number; + /* New SELECT processing starts */ + DBUG_ASSERT(table->file->inited == 0); + step= thd->lex->current_select->get_linkage(); + switch (step) + { + case INTERSECT_TYPE: + prev_step= curr_step; + curr_step= current_select_number; + break; + case EXCEPT_TYPE: + break; + default: + step= UNION_TYPE; + break; + } + DBUG_VOID_RETURN; +} + +/** + Fill temporary tables for UNION/EXCEPT/INTERSECT + + @Note +UNION: + just add records to the table (with 'counter' field first if INTERSECT + present in the sequence). +EXCEPT: + looks for the record in the table (with 'counter' field first if + INTERSECT present in the sequence) and delete it if found +INTERSECT: + looks for the same record with 'counter' field of previous operation, + put as a 'counter' number of the current SELECT. + We scan the table and remove all records which marked with not last + 'counter' after processing all records in send_eof and only if it last + SELECT of sequence of INTERSECTS. + + @param values List of record items to process. + + @retval 0 - OK + @retval -1 - duplicate + @retval 1 - error +*/ +int select_unit::send_data(List &values) +{ + int rc= 0; + int not_reported_error= 0; + + if (table->no_rows_with_nulls) + table->null_catch_flags= CHECK_ROW_FOR_NULLS_TO_REJECT; + + fill_record(thd, table, table->field + addon_cnt, values, true, false); + /* set up initial values for records to be written */ + if (addon_cnt && step == UNION_TYPE) + { + DBUG_ASSERT(addon_cnt == 1); + table->field[0]->store((longlong) curr_step, 1); + } + + if (unlikely(thd->is_error())) + { + rc= 1; + if (unlikely(not_reported_error)) + { + DBUG_ASSERT(rc); + table->file->print_error(not_reported_error, MYF(0)); + } + return rc; + } + if (table->no_rows_with_nulls) + { + table->null_catch_flags&= ~CHECK_ROW_FOR_NULLS_TO_REJECT; + if (table->null_catch_flags) + { + rc= 0; + if (unlikely(not_reported_error)) + { + DBUG_ASSERT(rc); + table->file->print_error(not_reported_error, MYF(0)); + } + return rc; + } + } + + /* select_unit::change_select() change step & Co correctly for each SELECT */ + int find_res; + switch (step) + { + case UNION_TYPE: + rc= write_record(); + /* no reaction with conversion */ + if (rc == -2) + rc= 0; + break; + + case EXCEPT_TYPE: + /* + The temporary table uses very first index or constrain for + checking unique constrain. + */ + if (!(find_res= table->file->find_unique_row(table->record[0], 0))) + rc= delete_record(); + else + rc= not_reported_error= (find_res != 1); + break; + case INTERSECT_TYPE: + /* + The temporary table uses very first index or constrain for + checking unique constrain. + */ + if (!(find_res= table->file->find_unique_row(table->record[0], 0))) + { + DBUG_ASSERT(!table->triggers); + if (table->field[0]->val_int() == prev_step) + { + not_reported_error= update_counter(table->field[0], curr_step); + rc= MY_TEST(not_reported_error); + DBUG_ASSERT(rc != HA_ERR_RECORD_IS_THE_SAME); + } + } + else + rc= not_reported_error= (find_res != 1); + break; + default: + DBUG_ASSERT(0); + } + + if (unlikely(not_reported_error)) + { + DBUG_ASSERT(rc); + table->file->print_error(not_reported_error, MYF(0)); + } + return rc; +} + +bool select_unit::send_eof() +{ + if (step != INTERSECT_TYPE || + (thd->lex->current_select->next_select() && + thd->lex->current_select->next_select()->get_linkage() == INTERSECT_TYPE)) + { + /* + it is not INTERSECT or next SELECT in the sequence is INTERSECT so no + need filtering (the last INTERSECT in this sequence of intersects will + filter). + */ + return 0; + } + + /* + It is last select in the sequence of INTERSECTs so we should filter out + all records except marked with actual counter. + + TODO: as optimization for simple case this could be moved to + 'fake_select' WHERE condition + */ + int error; + + if (table->file->ha_rnd_init_with_error(1)) + return 1; + do + { + error= table->file->ha_rnd_next(table->record[0]); + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + break; + } + if (table->field[0]->val_int() != curr_step) + error= delete_record(); + } while (!error); + table->file->ha_rnd_end(); + + if (unlikely(error)) + table->file->print_error(error, MYF(0)); + + return(MY_TEST(error)); +} + + +int select_union_recursive::send_data(List &values) +{ + + int rc; + bool save_abort_on_warning= thd->abort_on_warning; + enum_check_fields save_count_cuted_fields= thd->count_cuted_fields; + long save_counter; + + /* + For recursive CTE's give warnings for wrong field info + However, we don't do that for CREATE TABLE ... SELECT or INSERT ... SELECT + as the upper level code for these handles setting of abort_on_warning + depending on if 'IGNORE' is used. + */ + if (thd->lex->sql_command != SQLCOM_CREATE_TABLE && + thd->lex->sql_command != SQLCOM_INSERT_SELECT) + thd->abort_on_warning= thd->is_strict_mode(); + thd->count_cuted_fields= CHECK_FIELD_WARN; + save_counter= thd->get_stmt_da()->set_current_row_for_warning(++row_counter); + rc= select_unit::send_data(values); + thd->get_stmt_da()->set_current_row_for_warning(save_counter); + thd->count_cuted_fields= save_count_cuted_fields; + thd->abort_on_warning= save_abort_on_warning; + + if (rc == 0 && + write_err != HA_ERR_FOUND_DUPP_KEY && + write_err != HA_ERR_FOUND_DUPP_UNIQUE) + { + int err; + DBUG_ASSERT(incr_table->s->reclength == table->s->reclength || + incr_table->s->reclength == table->s->reclength - MARIA_UNIQUE_HASH_LENGTH); + if ((err= incr_table->file->ha_write_tmp_row(table->record[0]))) + { + bool is_duplicate; + rc= create_internal_tmp_table_from_heap(thd, incr_table, + tmp_table_param.start_recinfo, + &tmp_table_param.recinfo, + err, 1, &is_duplicate); + } + } + + return rc; +} + + +bool select_unit::flush() +{ + int error; + if (unlikely((error=table->file->extra(HA_EXTRA_NO_CACHE)))) + { + table->file->print_error(error, MYF(0)); + return 1; + } + return 0; +} + + +/* + Create a temporary table to store the result of select_union. + + SYNOPSIS + select_unit::create_result_table() + thd thread handle + column_types a list of items used to define columns of the + temporary table + is_union_distinct if set, the temporary table will eliminate + duplicates on insert + options create options + table_alias name of the temporary table + bit_fields_as_long convert bit fields to ulonglong + create_table whether to physically create result table + keep_row_order keep rows in order as they were inserted + hidden number of hidden fields (for INTERSECT) + plus one for `ALL` + + DESCRIPTION + Create a temporary table that is used to store the result of a UNION, + derived table, or a materialized cursor. + + RETURN VALUE + 0 The table has been created successfully. + 1 create_tmp_table failed. +*/ + +bool +select_unit::create_result_table(THD *thd_arg, List *column_types, + bool is_union_distinct, ulonglong options, + const LEX_CSTRING *alias, + bool bit_fields_as_long, bool create_table, + bool keep_row_order, + uint hidden) +{ + DBUG_ASSERT(table == 0); + tmp_table_param.init(); + tmp_table_param.field_count= column_types->elements; + tmp_table_param.func_count= tmp_table_param.field_count; + tmp_table_param.bit_fields_as_long= bit_fields_as_long; + tmp_table_param.hidden_field_count= hidden; + + if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types, + (ORDER*) 0, is_union_distinct, 1, + options, HA_POS_ERROR, alias, + !create_table, keep_row_order))) + return TRUE; + + table->keys_in_use_for_query.clear_all(); + for (uint i=0; i < table->s->fields; i++) + table->field[i]->flags &= ~(PART_KEY_FLAG | PART_INDIRECT_KEY_FLAG); + + if (create_table) + { + table->file->extra(HA_EXTRA_WRITE_CACHE); + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + } + return FALSE; +} + +bool +select_union_recursive::create_result_table(THD *thd_arg, + List *column_types, + bool is_union_distinct, + ulonglong options, + const LEX_CSTRING *alias, + bool bit_fields_as_long, + bool create_table, + bool keep_row_order, + uint hidden) +{ + if (select_unit::create_result_table(thd_arg, column_types, + is_union_distinct, options, + &empty_clex_str, bit_fields_as_long, + create_table, keep_row_order, + hidden)) + return true; + + incr_table_param.init(); + incr_table_param.field_count= incr_table_param.func_count= + column_types->elements; + incr_table_param.bit_fields_as_long= bit_fields_as_long; + if (! (incr_table= create_tmp_table(thd_arg, &incr_table_param, *column_types, + (ORDER*) 0, false, 1, + options, HA_POS_ERROR, &empty_clex_str, + true, keep_row_order))) + return true; + + incr_table->keys_in_use_for_query.clear_all(); + for (uint i=0; i < table->s->fields; i++) + incr_table->field[i]->flags &= ~(PART_KEY_FLAG | PART_INDIRECT_KEY_FLAG); + + return false; +} + + +/* + @brief + Write a record + + @retval + -2 conversion happened + -1 found a duplicate key + 0 no error + 1 if an error is reported +*/ + +int select_unit::write_record() +{ + if (unlikely((write_err= table->file->ha_write_tmp_row(table->record[0])))) + { + if (write_err == HA_ERR_FOUND_DUPP_KEY) + { + /* + Inform upper level that we found a duplicate key, that should not + be counted as part of limit + */ + return -1; + } + bool is_duplicate= false; + /* create_internal_tmp_table_from_heap will generate error if needed */ + if (table->file->is_fatal_error(write_err, HA_CHECK_DUP)) + { + if (!create_internal_tmp_table_from_heap(thd, table, + tmp_table_param.start_recinfo, + &tmp_table_param.recinfo, + write_err, 1, &is_duplicate)) + { + return -2; + } + else + { + return 1; + } + } + if (is_duplicate) + { + return -1; + } + } + return 0; +} + + +/* + @brief + Update counter for a record + + @retval + 0 no error + -1 error occurred +*/ + +int select_unit::update_counter(Field* counter, longlong value) +{ + store_record(table, record[1]); + counter->store(value, 0); + int error= table->file->ha_update_tmp_row(table->record[1], + table->record[0]); + return error; +} + + +/* + @brief + Try to disable index + + @retval + true index is disabled this time + false this time did not disable the index +*/ + +bool select_unit_ext::disable_index_if_needed(SELECT_LEX *curr_sl) +{ + if (is_index_enabled && + (curr_sl == curr_sl->master_unit()->union_distinct || + !curr_sl->next_select()) ) + { + is_index_enabled= false; + if (table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL)) + return false; + table->no_keyread=1; + return true; + } + return false; +} + +/* + @brief + Unfold a record + + @retval + 0 no error + -1 conversion happened +*/ + +int select_unit_ext::unfold_record(ha_rows cnt) +{ + + DBUG_ASSERT(cnt > 0); + int error= 0; + bool is_convertion_happened= false; + while (--cnt) + { + error= write_record(); + if (error == -2) + { + is_convertion_happened= true; + error= -1; + } + } + if (is_convertion_happened) + return -1; + return error; +} + +/* + @brief + Delete a record + + @retval + 0 no error + 1 if an error is reported +*/ + +int select_unit::delete_record() +{ + DBUG_ASSERT(!table->triggers); + table->status|= STATUS_DELETED; + int not_reported_error= table->file->ha_delete_tmp_row(table->record[0]); + return MY_TEST(not_reported_error); +} + +/** + Reset and empty the temporary table that stores the materialized query + result. + + @note The cleanup performed here is exactly the same as for the two temp + tables of JOIN - exec_tmp_table_[1 | 2]. +*/ + +void select_unit::cleanup() +{ + table->file->extra(HA_EXTRA_RESET_STATE); + table->file->ha_delete_all_rows(); +} + + +/* + @brief + Set up value needed by send_data() and send_eof() + + @detail + - For EXCEPT we will decrease the counter by one + and INTERSECT / UNION we increase the counter. + + - For INTERSECT we will modify the second extra field (intersect counter) + and for EXCEPT / UNION we modify the first (duplicate counter) +*/ + +void select_unit_ext::change_select() +{ + select_unit::change_select(); + switch(step){ + case UNION_TYPE: + increment= 1; + curr_op_type= UNION_DISTINCT; + break; + case EXCEPT_TYPE: + increment= -1; + curr_op_type= EXCEPT_DISTINCT; + break; + case INTERSECT_TYPE: + increment= 1; + curr_op_type= INTERSECT_DISTINCT; + break; + default: DBUG_ASSERT(0); + } + if (!thd->lex->current_select->distinct) + /* change type from DISTINCT to ALL */ + curr_op_type= (set_op_type)(curr_op_type + 1); + + duplicate_cnt= table->field[addon_cnt - 1]; + if (addon_cnt == 2) + additional_cnt= table->field[addon_cnt - 2]; + else + additional_cnt= NULL; +} + + +/* + @brief + Fill temporary tables for operations need extra fields + + @detail + - If this operation is not distinct, we try to find it and increase the + counter by "increment" setted in select_unit_ext::change_select(). + + - If it is distinct, for UNION we write this record; for INTERSECT we + try to find it and increase the intersect counter if found; for EXCEPT + we try to find it and delete that record if found. + +*/ + +int select_unit_ext::send_data(List &values) +{ + int rc= 0; + int not_reported_error= 0; + int find_res; + + if (table->no_rows_with_nulls) + table->null_catch_flags= CHECK_ROW_FOR_NULLS_TO_REJECT; + + fill_record(thd, table, table->field + addon_cnt, values, true, false); + /* set up initial values for records to be written */ + if ( step == UNION_TYPE ) + { + /* set duplicate counter to 1 */ + duplicate_cnt->store((longlong) 1, 1); + /* set the other counter to 0 */ + if (curr_op_type == INTERSECT_ALL) + additional_cnt->store((longlong) 0, 1); + } + + if (unlikely(thd->is_error())) + { + rc= 1; + if (unlikely(not_reported_error)) + { + DBUG_ASSERT(rc); + table->file->print_error(not_reported_error, MYF(0)); + } + return rc; + } + if (table->no_rows_with_nulls) + { + table->null_catch_flags&= ~CHECK_ROW_FOR_NULLS_TO_REJECT; + if (table->null_catch_flags) + { + if (unlikely(not_reported_error)) + { + DBUG_ASSERT(rc); + table->file->print_error(not_reported_error, MYF(0)); + } + return rc; + } + } + + switch(curr_op_type) + { + case UNION_ALL: + if (!is_index_enabled || + (find_res= table->file->find_unique_row(table->record[0], 0))) + { + rc= write_record(); + /* no reaction with conversion */ + if (rc == -2) + rc= 0; + } + else + { + longlong cnt= duplicate_cnt->val_int() + increment; + not_reported_error= update_counter(duplicate_cnt, cnt); + DBUG_ASSERT(!table->triggers); + rc= MY_TEST(not_reported_error); + } + break; + + case EXCEPT_ALL: + if (!(find_res= table->file->find_unique_row(table->record[0], 0))) + { + longlong cnt= duplicate_cnt->val_int() + increment; + if (cnt == 0) + rc= delete_record(); + else + { + not_reported_error= update_counter(duplicate_cnt, cnt); + DBUG_ASSERT(!table->triggers); + rc= MY_TEST(not_reported_error); + } + } + break; + + case INTERSECT_ALL: + if (!(find_res= table->file->find_unique_row(table->record[0], 0))) + { + longlong cnt= duplicate_cnt->val_int() + increment; + if (cnt <= additional_cnt->val_int()) + { + not_reported_error= update_counter(duplicate_cnt, cnt); + DBUG_ASSERT(!table->triggers); + rc= MY_TEST(not_reported_error); + } + } + break; + + case UNION_DISTINCT: + rc= write_record(); + /* no reaction with conversion */ + if (rc == -2) + rc= 0; + break; + + case EXCEPT_DISTINCT: + if (!(find_res= table->file->find_unique_row(table->record[0], 0))) + rc= delete_record(); + else + rc= not_reported_error= (find_res != 1); + break; + + case INTERSECT_DISTINCT: + if (!(find_res= table->file->find_unique_row(table->record[0], 0))) + { + if (additional_cnt->val_int() == prev_step) + { + not_reported_error= update_counter(additional_cnt, curr_step); + rc= MY_TEST(not_reported_error); + DBUG_ASSERT(rc != HA_ERR_RECORD_IS_THE_SAME); + } + else if (additional_cnt->val_int() != curr_step) + rc= delete_record(); + } + else + rc= not_reported_error= (find_res != 1); + break; + + default: + DBUG_ASSERT(0); + } + + if (unlikely(not_reported_error)) + { + DBUG_ASSERT(rc); + table->file->print_error(not_reported_error, MYF(0)); + } + return rc; +} + + +/* + @brief + Do post-operation after a operator + + @detail + We need to scan in these cases: + - If this operation is DISTINCT and next is ALL, + duplicate counter needs to be set to 1. + - If this operation is INTERSECT ALL and counter needs to be updated. + - If next operation is INTERSECT ALL, + set up the second extra field (called "intersect_counter") to 0. + this extra field counts records in the second operand. + + If this operation is equal to "union_distinct" or is the last operation, + we'll disable index. Then if this operation is ALL we'll unfold records. +*/ + +bool select_unit_ext::send_eof() +{ + int error= 0; + SELECT_LEX *curr_sl= thd->lex->current_select; + SELECT_LEX *next_sl= curr_sl->next_select(); + bool is_next_distinct= next_sl && next_sl->distinct; + bool is_next_intersect_all= + next_sl && + next_sl->get_linkage() == INTERSECT_TYPE && + !next_sl->distinct; + bool need_unfold= (disable_index_if_needed(curr_sl) && + !curr_sl->distinct); + + if (((curr_sl->distinct && !is_next_distinct) || + curr_op_type == INTERSECT_ALL || + is_next_intersect_all) && + !need_unfold) + { + if (!next_sl) + DBUG_ASSERT(curr_op_type != INTERSECT_ALL); + bool need_update_row; + if (unlikely(table->file->ha_rnd_init_with_error(1))) + return 1; + do + { + need_update_row= false; + if (unlikely(error= table->file->ha_rnd_next(table->record[0]))) + { + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + break; + } + store_record(table, record[1]); + + if (curr_sl->distinct && !is_next_distinct) + { + /* set duplicate counter to 1 if next operation is ALL */ + duplicate_cnt->store(1, 0); + need_update_row= true; + } + + if (is_next_intersect_all) + { + longlong d_cnt_val= duplicate_cnt->val_int(); + if (d_cnt_val == 0) + error= delete_record(); + else + { + if (curr_op_type == INTERSECT_ALL) + { + longlong a_cnt_val= additional_cnt->val_int(); + if (a_cnt_val < d_cnt_val) + d_cnt_val= a_cnt_val; + } + additional_cnt->store(d_cnt_val, 0); + duplicate_cnt->store((longlong)0, 0); + need_update_row= true; + } + } + + if (need_update_row) + error= table->file->ha_update_tmp_row(table->record[1], + table->record[0]); + } while (likely(!error)); + table->file->ha_rnd_end(); + } + + /* unfold */ + else if (need_unfold) + { + /* unfold if is ALL operation */ + ha_rows dup_cnt; + if (unlikely(table->file->ha_rnd_init_with_error(1))) + return 1; + do + { + if (unlikely(error= table->file->ha_rnd_next(table->record[0]))) + { + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + break; + } + dup_cnt= (ha_rows)duplicate_cnt->val_int(); + /* delete record if not exist in the second operand */ + if (dup_cnt == 0) + { + error= delete_record(); + continue; + } + if (curr_op_type == INTERSECT_ALL) + { + ha_rows add_cnt= (ha_rows)additional_cnt->val_int(); + if (dup_cnt > add_cnt && add_cnt > 0) + dup_cnt= (ha_rows)add_cnt; + } + + if (dup_cnt == 1) + continue; + + duplicate_cnt->store((longlong)1, 0); + if (additional_cnt) + additional_cnt->store((longlong)0, 0); + error= table->file->ha_update_tmp_row(table->record[1], + table->record[0]); + if (unlikely(error)) + break; + + if (unfold_record(dup_cnt) == -1) + { + /* restart the scan */ + if (unlikely(table->file->ha_rnd_init_with_error(1))) + return 1; + + duplicate_cnt= table->field[addon_cnt - 1]; + if (addon_cnt == 2) + additional_cnt= table->field[addon_cnt - 2]; + else + additional_cnt= NULL; + continue; + } + } while (likely(!error)); + table->file->ha_rnd_end(); + } + + /* Clean up table buffers for the next set operation from pipeline */ + if (next_sl) + restore_record(table,s->default_values); + + if (unlikely(error)) + table->file->print_error(error, MYF(0)); + + return (MY_TEST(error)); +} + +void select_union_recursive::cleanup() +{ + if (table) + { + select_unit::cleanup(); + free_tmp_table(thd, table); + } + + if (incr_table) + { + if (incr_table->is_created()) + { + incr_table->file->extra(HA_EXTRA_RESET_STATE); + incr_table->file->ha_delete_all_rows(); + } + free_tmp_table(thd, incr_table); + } + + List_iterator it(rec_table_refs); + TABLE_LIST *tbl; + while ((tbl= it++)) + { + TABLE *tab= tbl->table; + if (tab->is_created()) + { + tab->file->extra(HA_EXTRA_RESET_STATE); + tab->file->ha_delete_all_rows(); + } + /* + The table will be closed later in close_thread_tables(), + because it might be used in the statements like + ANALYZE WITH r AS (...) SELECT * from r + where r is defined through recursion. + */ + tab->next= thd->rec_tables; + thd->rec_tables= tab; + tbl->derived_result= 0; + } + row_counter= 0; +} + + +/** + Replace the current result with new_result and prepare it. + + @param new_result New result pointer + + @retval FALSE Success + @retval TRUE Error +*/ + +bool select_union_direct::change_result(select_result *new_result) +{ + result= new_result; + return (result->prepare(unit->types, unit) || result->prepare2(NULL)); +} + + +bool select_union_direct::postponed_prepare(List &types) +{ + if (result != NULL) + return (result->prepare(types, unit) || result->prepare2(NULL)); + else + return false; +} + + +bool select_union_direct::send_result_set_metadata(List &list, uint flags) +{ + if (done_send_result_set_metadata) + return false; + done_send_result_set_metadata= true; + + /* + Set global offset and limit to be used in send_data(). These can + be variables in prepared statements or stored programs, so they + must be reevaluated for each execution. + */ + offset= unit->global_parameters()->get_offset(); + limit= unit->global_parameters()->get_limit(); + if (limit + offset >= limit) + limit+= offset; + else + limit= HA_POS_ERROR; /* purecov: inspected */ + + return result->send_result_set_metadata(unit->types, flags); +} + + +int select_union_direct::send_data(List &items) +{ + if (!limit) + return false; + limit--; + if (offset) + { + offset--; + return false; + } + + send_records++; + fill_record(thd, table, table->field, items, true, false); + if (unlikely(thd->is_error())) + return true; /* purecov: inspected */ + + return result->send_data(unit->item_list); +} + + +bool select_union_direct::initialize_tables (JOIN *join) +{ + if (done_initialize_tables) + return false; + done_initialize_tables= true; + + return result->initialize_tables(join); +} + + +bool select_union_direct::send_eof() +{ + // Reset for each SELECT_LEX, so accumulate here + limit_found_rows+= thd->limit_found_rows; + + if (unit->thd->lex->current_select == last_select_lex) + { + thd->limit_found_rows= limit_found_rows; + + // Reset and make ready for re-execution + done_send_result_set_metadata= false; + done_initialize_tables= false; + + return result->send_eof(); + } + else + return false; +} + + +/* + initialization procedures before fake_select_lex preparation() + + SYNOPSIS + st_select_lex_unit::init_prepare_fake_select_lex() + thd - thread handler + first_execution - TRUE at the first execution of the union + + RETURN + options of SELECT +*/ + +void +st_select_lex_unit::init_prepare_fake_select_lex(THD *thd_arg, + bool first_execution) +{ + thd_arg->lex->current_select= fake_select_lex; + fake_select_lex->table_list.link_in_list(&result_table_list, + &result_table_list.next_local); + fake_select_lex->context.table_list= + fake_select_lex->context.first_name_resolution_table= + fake_select_lex->get_table_list(); + /* + The flag fake_select_lex->first_execution indicates whether this is + called at the first execution of the statement, while first_execution + shows whether this is called at the first execution of the union that + may form just a subselect. + */ + if ((fake_select_lex->changed_elements & TOUCHED_SEL_COND) && + first_execution) + { + for (ORDER *order= global_parameters()->order_list.first; + order; + order= order->next) + order->item= &order->item_ptr; + } +} + + +bool st_select_lex_unit::prepare_join(THD *thd_arg, SELECT_LEX *sl, + select_result *tmp_result, + ulonglong additional_options, + bool is_union_select) +{ + DBUG_ENTER("st_select_lex_unit::prepare_join"); + TABLE_LIST *derived= sl->master_unit()->derived; + bool can_skip_order_by; + sl->options|= SELECT_NO_UNLOCK; + JOIN *join= new JOIN(thd_arg, sl->item_list, + (sl->options | thd_arg->variables.option_bits | + additional_options), + tmp_result); + if (!join) + DBUG_RETURN(true); + + thd_arg->lex->current_select= sl; + + can_skip_order_by= (is_union_select && !(sl->braces && + sl->limit_params.explicit_limit) && + !thd->lex->with_rownum); + + saved_error= join->prepare(sl->table_list.first, + (derived && derived->merged ? NULL : sl->where), + (can_skip_order_by ? 0 : + sl->order_list.elements) + + sl->group_list.elements, + can_skip_order_by ? + NULL : sl->order_list.first, + can_skip_order_by, + sl->group_list.first, + sl->having, + (is_union_select ? NULL : + thd_arg->lex->proc_list.first), + sl, this); + + last_procedure= join->procedure; + + if (unlikely(saved_error || (saved_error= thd_arg->is_fatal_error))) + DBUG_RETURN(true); + /* + Remove all references from the select_lex_units to the subqueries that + are inside the ORDER BY clause. + */ + if (can_skip_order_by) + { + for (ORDER *ord= (ORDER *)sl->order_list.first; ord; ord= ord->next) + { + (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL); + } + } + DBUG_RETURN(false); +} + + +/** + Aggregate data type handlers for the "count" leftmost UNION parts. +*/ +bool st_select_lex_unit::join_union_type_handlers(THD *thd_arg, + Type_holder *holders, + uint count) +{ + DBUG_ENTER("st_select_lex_unit::join_union_type_handlers"); + SELECT_LEX *first_sl= first_select(), *sl= first_sl; + for (uint i= 0; i < count ; sl= sl->next_select(), i++) + { + Item *item; + List_iterator_fast it(sl->item_list); + for (uint pos= 0; (item= it++); pos++) + { + const Type_handler *item_type_handler= item->real_type_handler(); + if (sl == first_sl) + holders[pos].set_handler(item_type_handler); + else + { + DBUG_ASSERT(first_sl->item_list.elements == sl->item_list.elements); + if (holders[pos].aggregate_for_result(item_type_handler)) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION, MYF(0), + holders[pos].type_handler()->name().ptr(), + item_type_handler->name().ptr(), + "UNION"); + DBUG_RETURN(true); + } + } + } + } + DBUG_RETURN(false); +} + + +/** + Aggregate data type attributes for the "count" leftmost UNION parts. +*/ +bool st_select_lex_unit::join_union_type_attributes(THD *thd_arg, + Type_holder *holders, + uint count) +{ + DBUG_ENTER("st_select_lex_unit::join_union_type_attributes"); + SELECT_LEX *sl, *first_sl= first_select(); + uint item_pos; + for (uint pos= 0; pos < first_sl->item_list.elements; pos++) + { + if (holders[pos].alloc_arguments(thd_arg, count)) + DBUG_RETURN(true); + } + for (item_pos= 0, sl= first_sl ; + item_pos < count; + sl= sl->next_select(), item_pos++) + { + Item *item_tmp; + List_iterator_fast itx(sl->item_list); + for (uint holder_pos= 0 ; (item_tmp= itx++); holder_pos++) + { + /* + If the outer query has a GROUP BY clause, an outer reference to this + query block may have been wrapped in a Item_outer_ref, which has not + been fixed yet. An Item_type_holder must be created based on a fixed + Item, so use the inner Item instead. + */ + DBUG_ASSERT(item_tmp->fixed() || + (item_tmp->type() == Item::REF_ITEM && + ((Item_ref *)(item_tmp))->ref_type() == + Item_ref::OUTER_REF)); + if (!item_tmp->fixed()) + item_tmp= item_tmp->real_item(); + holders[holder_pos].add_argument(item_tmp); + } + } + for (uint pos= 0; pos < first_sl->item_list.elements; pos++) + { + if (holders[pos].aggregate_attributes(thd_arg)) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/** + Join data types for the leftmost "count" UNION parts + and store corresponding Item_type_holder's into "types". +*/ +bool st_select_lex_unit::join_union_item_types(THD *thd_arg, + List &types, + uint count) +{ + DBUG_ENTER("st_select_lex_unit::join_union_select_list_types"); + SELECT_LEX *first_sl= first_select(); + Type_holder *holders; + + if (!(holders= new (thd_arg->mem_root) + Type_holder[first_sl->item_list.elements]) || + join_union_type_handlers(thd_arg, holders, count) || + join_union_type_attributes(thd_arg, holders, count)) + DBUG_RETURN(true); + + bool is_recursive= with_element && with_element->is_recursive; + types.empty(); + List_iterator_fast it(first_sl->item_list); + Item *item_tmp; + for (uint pos= 0; (item_tmp= it++); pos++) + { + /* + SQL standard requires forced nullability only for + recursive columns. However type aggregation in our + implementation so far does not differentiate between + recursive and non-recursive columns of a recursive CTE. + TODO: this should be fixed. + */ + bool pos_maybe_null= is_recursive ? true : holders[pos].get_maybe_null(); + + /* Error's in 'new' will be detected after loop */ + types.push_back(new (thd_arg->mem_root) + Item_type_holder(thd_arg, + item_tmp, + holders[pos].type_handler(), + &holders[pos]/*Type_all_attributes*/, + pos_maybe_null)); + } + if (unlikely(thd_arg->is_fatal_error)) + DBUG_RETURN(true); // out of memory + DBUG_RETURN(false); +} + + +bool init_item_int(THD* thd, Item_int* &item) +{ + if (!item) + { + Query_arena *arena, backup_arena; + arena= thd->activate_stmt_arena_if_needed(&backup_arena); + + item= new (thd->mem_root) Item_int(thd, 0); + + if (arena) + thd->restore_active_arena(arena, &backup_arena); + + if (!item) + return false; + } + else + { + item->value= 0; + } + return true; +} + + +bool st_select_lex_unit::prepare(TABLE_LIST *derived_arg, + select_result *sel_result, + ulonglong additional_options) +{ + SELECT_LEX *lex_select_save= thd->lex->current_select; + SELECT_LEX *sl, *first_sl= first_select(); + bool is_recursive= with_element && with_element->is_recursive; + bool is_rec_result_table_created= false; + uint union_part_count= 0; + select_result *tmp_result; + bool is_union_select; + bool have_except= false, have_intersect= false, + have_except_all_or_intersect_all= false; + bool instantiate_tmp_table= false; + bool single_tvc= !first_sl->next_select() && first_sl->tvc; + bool single_tvc_wo_order= single_tvc && !first_sl->order_list.elements; + DBUG_ENTER("st_select_lex_unit::prepare"); + DBUG_ASSERT(thd == current_thd); + + if (is_recursive && (sl= first_sl->next_select())) + { + SELECT_LEX *next_sl; + for ( ; ; sl= next_sl) + { + next_sl= sl->next_select(); + if (!next_sl) + break; + if (next_sl->with_all_modifier != sl->with_all_modifier) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "mix of ALL and DISTINCT UNION operations in recursive CTE spec"); + DBUG_RETURN(TRUE); + } + } + } + + describe= additional_options & SELECT_DESCRIBE; + + /* + Save fake_select_lex in case we don't need it for anything but + global parameters. + */ + if (saved_fake_select_lex == NULL) // Don't overwrite on PS second prepare + saved_fake_select_lex= fake_select_lex; + + /* + result object should be reassigned even if preparing already done for + max/min subquery (ALL/ANY optimization) + */ + result= sel_result; + + if (prepared) + { + if (describe) + { + /* fast reinit for EXPLAIN */ + for (sl= first_sl; sl; sl= sl->next_select()) + { + if (sl->tvc) + { + sl->tvc->result= result; + if (result->prepare(sl->item_list, this)) + DBUG_RETURN(TRUE); + sl->tvc->select_options|= SELECT_DESCRIBE; + } + else + { + sl->join->result= result; + lim.clear(); + if (!sl->join->procedure && + result->prepare(sl->join->fields_list, this)) + { + DBUG_RETURN(TRUE); + } + sl->join->select_options|= SELECT_DESCRIBE; + sl->join->reinit(); + } + } + } + DBUG_RETURN(FALSE); + } + prepared= 1; + saved_error= FALSE; + + thd->lex->current_select= sl= first_sl; + found_rows_for_union= first_sl->options & OPTION_FOUND_ROWS; + is_union_select= is_unit_op() || fake_select_lex || single_tvc; + + /* + If we are reading UNION output and the UNION is in the + IN/ANY/ALL/EXISTS subquery, then ORDER BY is redundant and hence should + be removed. + Example: + select ... col IN (select col2 FROM t1 union select col3 from t2 ORDER BY 1) + + (as for ORDER BY ... LIMIT, it currently not supported inside + IN/ALL/ANY subqueries) + (For non-UNION this removal of ORDER BY clause is done in + check_and_do_in_subquery_rewrites()) + */ + if (item && is_unit_op() && + (item->is_in_predicate() || item->is_exists_predicate())) + { + global_parameters()->order_list.first= NULL; + global_parameters()->order_list.elements= 0; + } + + /* will only optimize once */ + if (!bag_set_op_optimized && !is_recursive) + { + optimize_bag_operation(false); + } + + for (SELECT_LEX *s= first_sl; s; s= s->next_select()) + { + switch (s->linkage) + { + case INTERSECT_TYPE: + have_intersect= TRUE; + if (!s->distinct){ + have_except_all_or_intersect_all= true; + } + break; + case EXCEPT_TYPE: + have_except= TRUE; + if (!s->distinct){ + have_except_all_or_intersect_all= TRUE; + } + break; + default: + break; + } + } + + /* Global option */ + + if (is_union_select || is_recursive) + { + if ((single_tvc_wo_order && !fake_select_lex) || + (is_unit_op() && !union_needs_tmp_table() && + !have_except && !have_intersect && !single_tvc)) + { + SELECT_LEX *last= first_select(); + while (last->next_select()) + last= last->next_select(); + if (!(tmp_result= union_result= + new (thd->mem_root) select_union_direct(thd, sel_result, + last))) + goto err; /* purecov: inspected */ + fake_select_lex= NULL; + instantiate_tmp_table= false; + } + else + { + if (!is_recursive) + { + /* + class "select_unit_ext" handles query contains EXCEPT ALL and / or + INTERSECT ALL. Others are handled by class "select_unit" + If have EXCEPT ALL or INTERSECT ALL in the query. First operand + should be UNION ALL + */ + if (have_except_all_or_intersect_all) + { + union_result= new (thd->mem_root) select_unit_ext(thd); + first_sl->distinct= false; + } + else + union_result= new (thd->mem_root) select_unit(thd); + } + else + { + with_element->rec_result= + new (thd->mem_root) select_union_recursive(thd); + union_result= with_element->rec_result; + if (fake_select_lex) + { + if (fake_select_lex->order_list.first || + fake_select_lex->limit_params.explicit_limit) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "global ORDER_BY/LIMIT in recursive CTE spec"); + goto err; + } + fake_select_lex->cleanup(); + fake_select_lex= NULL; + } + } + if (!(tmp_result= union_result)) + goto err; /* purecov: inspected */ + instantiate_tmp_table= true; + } + } + else + tmp_result= sel_result; + + sl->context.resolve_in_select_list= TRUE; + + if (!is_union_select && !is_recursive) + { + if (sl->tvc) + { + if (sl->tvc->prepare(thd, sl, tmp_result, this)) + goto err; + } + else + { + if (prepare_join(thd, first_sl, tmp_result, additional_options, + is_union_select)) + goto err; + + if (derived_arg && derived_arg->table && + derived_arg->derived_type == VIEW_ALGORITHM_MERGE && + derived_arg->table->versioned()) + { + /* Got versioning conditions (see vers_setup_conds()), need to update + derived_arg. */ + derived_arg->where= first_sl->where; + } + } + types= first_sl->item_list; + goto cont; + } + + if (sl->tvc && sl->order_list.elements && + !sl->tvc->to_be_wrapped_as_with_tail()) + { + SELECT_LEX_UNIT *unit= sl->master_unit(); + if (thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) + { + unit->fake_select_lex= 0; + unit->saved_fake_select_lex= 0; + } + else + { + if (!unit->first_select()->next_select()) + { + if (!unit->fake_select_lex) + { + Query_arena *arena, backup_arena; + arena= thd->activate_stmt_arena_if_needed(&backup_arena); + bool rc= unit->add_fake_select_lex(thd); + if (arena) + thd->restore_active_arena(arena, &backup_arena); + if (rc) + goto err; + } + SELECT_LEX *fake= unit->fake_select_lex; + fake->order_list= sl->order_list; + fake->limit_params= sl->limit_params; + sl->order_list.empty(); + sl->limit_params.clear(); + if (describe) + fake->options|= SELECT_DESCRIBE; + } + else if (!sl->limit_params.explicit_limit) + sl->order_list.empty(); + } + } + + for (;sl; sl= sl->next_select(), union_part_count++) + { + if (sl->tvc) + { + if (sl->tvc->to_be_wrapped_as_with_tail() && + !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW)) + + { + st_select_lex *wrapper_sl= wrap_tvc_with_tail(thd, sl); + if (!wrapper_sl) + goto err; + + if (sl == first_sl) + first_sl= wrapper_sl; + sl= wrapper_sl; + + if (prepare_join(thd, sl, tmp_result, additional_options, + is_union_select)) + goto err; + } + else if (sl->tvc->prepare(thd, sl, tmp_result, this)) + goto err; + } + else if (prepare_join(thd, sl, tmp_result, additional_options, + is_union_select)) + goto err; + + /* + setup_tables_done_option should be set only for very first SELECT, + because it protect from secont setup_tables call for select-like non + select commands (DELETE/INSERT/...) and they use only very first + SELECT (for union it can be only INSERT ... SELECT). + */ + additional_options&= ~OPTION_SETUP_TABLES_DONE; + + /* + Use items list of underlaid select for derived tables to preserve + information about fields lengths and exact types + */ + if (sl == first_sl) + { + if (with_element) + { + if (with_element->process_columns_of_derived_unit(thd, this)) + goto err; + if (check_duplicate_names(thd, sl->item_list, 0)) + goto err; + } + } + else + { + if (first_sl->item_list.elements != sl->item_list.elements) + { + my_message(ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT, + ER_THD(thd, ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT), + MYF(0)); + goto err; + } + } + if (is_recursive) + { + if (!with_element->is_anchor(sl)) + sl->uncacheable|= UNCACHEABLE_UNITED; + if (!is_rec_result_table_created && + (!sl->next_select() || + sl->next_select() == with_element->first_recursive)) + { + ulonglong create_options; + create_options= (first_sl->options | thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS); + // Join data types for all non-recursive parts of a recursive UNION + if (join_union_item_types(thd, types, union_part_count + 1)) + goto err; + if (union_result->create_result_table(thd, &types, + MY_TEST(union_distinct), + create_options, + &derived_arg->alias, false, + instantiate_tmp_table, false, + 0)) + goto err; + if (have_except_all_or_intersect_all) + { + union_result->init(); + } + if (!derived_arg->table) + { + bool res= false; + + if ((!derived_arg->is_with_table_recursive_reference() || + !derived_arg->derived_result) && + !(derived_arg->derived_result= + new (thd->mem_root) select_unit(thd))) + goto err; // out of memory + thd->create_tmp_table_for_derived= TRUE; + + res= derived_arg->derived_result->create_result_table(thd, + &types, + FALSE, + create_options, + &derived_arg->alias, + FALSE, FALSE, + FALSE, 0); + thd->create_tmp_table_for_derived= FALSE; + if (res) + goto err; + derived_arg->derived_result->set_unit(this); + derived_arg->table= derived_arg->derived_result->table; + if (derived_arg->is_with_table_recursive_reference()) + { + /* Here 'derived_arg' is the primary recursive table reference */ + derived_arg->with->rec_result-> + rec_table_refs.push_back(derived_arg); + } + } + with_element->mark_as_with_prepared_anchor(); + is_rec_result_table_created= true; + } + } + } + + // In case of a non-recursive UNION, join data types for all UNION parts. + if (!is_recursive && join_union_item_types(thd, types, union_part_count)) + goto err; + +cont: + /* + If the query is using select_union_direct, we have postponed + preparation of the underlying select_result until column types + are known. + */ + if (union_result != NULL && union_result->postponed_prepare(types)) + DBUG_RETURN(true); + + if (is_union_select) + { + /* + Check that it was possible to aggregate + all collations together for UNION. + */ + List_iterator_fast tp(types); + Item *type; + ulonglong create_options; + uint save_tablenr= 0; + table_map save_map= 0; + uint save_maybe_null= 0; + + while ((type= tp++)) + { + /* + Test if the aggregated data type is OK for a UNION element. + E.g. in case of string data, DERIVATION_NONE is not allowed. + */ + if (type->type() == Item::TYPE_HOLDER && type->type_handler()-> + union_element_finalize(static_cast(type))) + goto err; + } + + /* + Disable the usage of fulltext searches in the last union branch. + This is a temporary 5.x limitation because of the way the fulltext + search functions are handled by the optimizer. + This is manifestation of the more general problems of "taking away" + parts of a SELECT statement post-fix_fields(). This is generally not + doable since various flags are collected in various places (e.g. + SELECT_LEX) that carry information about the presence of certain + expressions or constructs in the parts of the query. + When part of the query is taken away it's not clear how to "divide" + the meaning of these accumulated flags and what to carry over to the + recipient query (SELECT_LEX). + */ + if (global_parameters()->ftfunc_list->elements && + global_parameters()->order_list.elements && + global_parameters() != fake_select_lex) + { + ORDER *ord; + Item_func::Functype ft= Item_func::FT_FUNC; + for (ord= global_parameters()->order_list.first; ord; ord= ord->next) + if ((*ord->item)->walk (&Item::find_function_processor, FALSE, &ft)) + { + my_error (ER_CANT_USE_OPTION_HERE, MYF(0), "MATCH()"); + goto err; + } + } + + + create_options= (first_sl->options | thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS); + /* + Force the temporary table to be a MyISAM table if we're going to use + fullext functions (MATCH ... AGAINST .. IN BOOLEAN MODE) when reading + from it (this should be removed in 5.2 when fulltext search is moved + out of MyISAM). + */ + if (global_parameters()->ftfunc_list->elements) + create_options= create_options | TMP_TABLE_FORCE_MYISAM; + + /* extra field counter */ + uint hidden= 0; + Item_int *addon_fields[2]= {0}; + if (!is_recursive) + { + if (have_except_all_or_intersect_all) + { + /* add duplicate_count */ + ++hidden; + } + /* add intersect_count */ + if (have_intersect) + ++hidden; + + for(uint i= 0; i< hidden; i++) + { + init_item_int(thd, addon_fields[i]); + types.push_front(addon_fields[i]); + addon_fields[i]->name.str= i ? "__CNT_1" : "__CNT_2"; + addon_fields[i]->name.length= 7; + } + bool error= + union_result->create_result_table(thd, &types, + MY_TEST(union_distinct) || + have_except_all_or_intersect_all || + have_intersect, + create_options, &empty_clex_str, false, + instantiate_tmp_table, false, + hidden); + union_result->addon_cnt= hidden; + for (uint i= 0; i < hidden; i++) + types.pop(); + if (unlikely(error)) + goto err; + } + + if (fake_select_lex && !fake_select_lex->first_cond_optimization) + { + save_tablenr= result_table_list.tablenr_exec; + save_map= result_table_list.map_exec; + save_maybe_null= result_table_list.maybe_null_exec; + } + bzero((char*) &result_table_list, sizeof(result_table_list)); + result_table_list.db.str= (char*) ""; + result_table_list.db.length= 0; + result_table_list.table_name.str= result_table_list.alias.str= "union"; + result_table_list.table_name.length= result_table_list.alias.length= sizeof("union")-1; + result_table_list.table= table= union_result->table; + if (fake_select_lex && !fake_select_lex->first_cond_optimization) + { + result_table_list.tablenr_exec= save_tablenr; + result_table_list.map_exec= save_map; + result_table_list.maybe_null_exec= save_maybe_null; + } + + thd->lex->current_select= lex_select_save; + if (!item_list.elements) + { + Query_arena *arena, backup_arena; + + arena= thd->activate_stmt_arena_if_needed(&backup_arena); + + saved_error= table->fill_item_list(&item_list); + for (uint i= 0; i < hidden; i++) + item_list.pop(); + + if (arena) + thd->restore_active_arena(arena, &backup_arena); + + if (unlikely(saved_error)) + goto err; + + if (fake_select_lex != NULL && + (thd->stmt_arena->is_stmt_prepare() || + (thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW))) + { + /* Validate the global parameters of this union */ + + init_prepare_fake_select_lex(thd, TRUE); + /* Should be done only once (the only item_list per statement) */ + DBUG_ASSERT(fake_select_lex->join == 0); + if (!(fake_select_lex->join= new JOIN(thd, item_list, thd->variables.option_bits, + result))) + { + fake_select_lex->table_list.empty(); + DBUG_RETURN(TRUE); + } + + /* + Fake st_select_lex should have item list for correct ref_array + allocation. + */ + fake_select_lex->item_list= item_list; + + thd->lex->current_select= fake_select_lex; + + /* + We need to add up n_sum_items in order to make the correct + allocation in setup_ref_array(). + */ + fake_select_lex->n_child_sum_items+= global_parameters()->n_sum_items; + } + } + else + { + /* + We're in execution of a prepared statement or stored procedure: + reset field items to point at fields from the created temporary table. + */ + table->reset_item_list(&item_list, hidden); + } + if (fake_select_lex != NULL && + (thd->stmt_arena->is_stmt_prepare() || + (thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW))) + { + if (!fake_select_lex->join && + !(fake_select_lex->join= + new JOIN(thd, item_list, thd->variables.option_bits, result))) + { + fake_select_lex->table_list.empty(); + DBUG_RETURN(TRUE); + } + saved_error= fake_select_lex->join-> + prepare(fake_select_lex->table_list.first, 0, + global_parameters()->order_list.elements, // og_num + global_parameters()->order_list.first, // order + false, NULL, NULL, NULL, fake_select_lex, this); + fake_select_lex->table_list.empty(); + } + } + + thd->lex->current_select= lex_select_save; + + DBUG_RETURN(saved_error || thd->is_fatal_error); + +err: + thd->lex->current_select= lex_select_save; + (void) cleanup(); + DBUG_RETURN(TRUE); +} + + +/** + @brief + Optimize a sequence of set operations + + @param first_sl first select of the level now under processing + + @details + The method optimizes with the following rules: + - (1)If a subsequence of INTERSECT contains at least one INTERSECT DISTINCT + or this subsequence is followed by UNION/EXCEPT DISTINCT then all + elements in the subsequence can changed for INTERSECT DISTINCT + - (2)If previous set operation is DISTINCT then EXCEPT ALL can be replaced + for EXCEPT DISTINCT + - (3)If UNION DISTINCT / EXCEPT DISTINCT follows a subsequence of UNION ALL + then all set operations of this subsequence can be replaced for + UNION DISTINCT + + For derived table it will look up outer select, and do optimize based on + outer select. + + Variable "union_distinct" will be updated in the end. + Not compatible with Oracle Mode. +*/ + +void st_select_lex_unit::optimize_bag_operation(bool is_outer_distinct) +{ + /* + skip run optimize for: + ORACLE MODE + CREATE VIEW + PREPARE ... FROM + recursive + */ + if ((thd->variables.sql_mode & MODE_ORACLE) || + (thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) || + (fake_select_lex != NULL && thd->stmt_arena->is_stmt_prepare()) || + (with_element && with_element->is_recursive )) + return; + DBUG_ASSERT(!bag_set_op_optimized); + + SELECT_LEX *sl; + /* INTERSECT subsequence can occur only at the very beginning */ + /* The first select with linkage == INTERSECT_TYPE */ + SELECT_LEX *intersect_start= NULL; + /* The first select after the INTERSECT subsequence */ + SELECT_LEX *intersect_end= NULL; + /* + Will point to the last node before UNION ALL subsequence. + Index can be disable there. + */ + SELECT_LEX *disable_index= NULL; + /* + True if there is a select with: + linkage == INTERSECT_TYPE && distinct==true + */ + bool any_intersect_distinct= false; + SELECT_LEX *prev_sl= first_select(); + + /* process INTERSECT subsequence in the begining */ + for (sl= prev_sl->next_select(); sl; prev_sl= sl, sl= sl->next_select()) + { + if (sl->linkage != INTERSECT_TYPE) + { + intersect_end= sl; + break; + } + else + { + if (!intersect_start) + intersect_start= sl; + if (sl->distinct) + { + any_intersect_distinct= true; + disable_index= sl; + } + } + } + + /* if subquery only contains INTERSECT and outer is UNION DISTINCT*/ + if (!sl && is_outer_distinct) + any_intersect_distinct= true; + + /* The first select of the current UNION ALL subsequence */ + SELECT_LEX *union_all_start= NULL; + for ( ; sl; prev_sl= sl, sl= sl->next_select()) + { + DBUG_ASSERT (sl->linkage != INTERSECT_TYPE); + if (!sl->distinct) + { + if (sl->linkage == UNION_TYPE) + { + if (!union_all_start) + { + union_all_start= sl; + } + } + else + { + DBUG_ASSERT (sl->linkage == EXCEPT_TYPE); + union_all_start= NULL; + if (prev_sl->distinct && prev_sl->is_set_op()) + { + sl->distinct= true; + disable_index= sl; + } + } + } + else + { /* sl->distinct == true */ + for (SELECT_LEX *si= union_all_start; si && si != sl; si= si->next_select()) + { + si->distinct= true; + } + union_all_start= NULL; + disable_index= sl; + } + } + + if (is_outer_distinct) + { + for (SELECT_LEX *si= union_all_start; si && si != sl; si= si->next_select()) + { + si->distinct= true; + } + union_all_start= NULL; + } + + if (any_intersect_distinct || + (intersect_end != NULL && intersect_end->distinct)) + { + for (sl= intersect_start; sl && sl != intersect_end; sl= sl->next_select()) + { + sl->distinct= true; + if (disable_index && disable_index->linkage == INTERSECT_TYPE) + disable_index= sl; + } + } + /* + if disable_index points to a INTERSECT, based on rule 1 we can set it + to the last INTERSECT node. + */ + if (disable_index && disable_index->linkage == INTERSECT_TYPE && + intersect_end && intersect_end->distinct) + disable_index= intersect_end; + /* union_distinct controls when to disable index */ + union_distinct= disable_index; + + /* recursive call this function for whole lex tree */ + for(sl= first_select(); sl; sl= sl->next_select()) + { + if (sl->is_unit_nest() && + sl->first_inner_unit() && + !sl->first_inner_unit()->bag_set_op_optimized) + sl->first_inner_unit()->optimize_bag_operation(sl->distinct); + } + + /* mark as optimized */ + bag_set_op_optimized= true; +} + + +/** + Run optimization phase. + + @return false unit successfully passed optimization phase. + @return TRUE an error occur. +*/ +bool st_select_lex_unit::optimize() +{ + SELECT_LEX *lex_select_save= thd->lex->current_select; + SELECT_LEX *select_cursor=first_select(); + DBUG_ENTER("st_select_lex_unit::optimize"); + + if (optimized && !uncacheable && !describe) + DBUG_RETURN(false); + + if (with_element && with_element->is_recursive && optimize_started) + DBUG_RETURN(false); + optimize_started= true; + + if (uncacheable || !item || !item->assigned() || describe) + { + if (item) + item->reset_value_registration(); + if (optimized && item) + { + if (item->assigned()) + { + item->assigned(0); // We will reinit & rexecute unit + item->reset(); + } + if (table->is_created()) + { + table->file->ha_delete_all_rows(); + table->file->info(HA_STATUS_VARIABLE); + } + /* re-enabling indexes for next subselect iteration */ + if ((union_result->force_enable_index_if_needed() || union_distinct)) + { + if(table->file->ha_enable_indexes(HA_KEY_SWITCH_ALL)) + DBUG_ASSERT(0); + else + table->no_keyread= 0; + } + } + for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select()) + { + if (sl->tvc) + { + sl->tvc->select_options= + (lim.is_unlimited() || sl->braces) ? + sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union; + if (sl->tvc->optimize(thd)) + { + thd->lex->current_select= lex_select_save; + DBUG_RETURN(TRUE); + } + if (derived) + sl->increase_derived_records(sl->tvc->get_records()); + continue; + } + thd->lex->current_select= sl; + + if (optimized) + saved_error= sl->join->reinit(); + else + { + set_limit(sl); + if (sl == global_parameters() || describe) + { + lim.remove_offset(); + /* + We can't use LIMIT at this stage if we are using ORDER BY for the + whole query + */ + if (sl->order_list.first || describe) + lim.set_unlimited(); + } + + /* + When using braces, SQL_CALC_FOUND_ROWS affects the whole query: + we don't calculate found_rows() per union part. + Otherwise, SQL_CALC_FOUND_ROWS should be done on all sub parts. + */ + sl->join->select_options= + (lim.is_unlimited() || sl->braces) ? + sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union; + + saved_error= sl->join->optimize(); + } + + if (unlikely(saved_error)) + { + thd->lex->current_select= lex_select_save; + DBUG_RETURN(saved_error); + } + } + } + optimized= 1; + + thd->lex->current_select= lex_select_save; + DBUG_RETURN(saved_error); +} + + +bool st_select_lex_unit::exec() +{ + SELECT_LEX *lex_select_save= thd->lex->current_select; + SELECT_LEX *select_cursor=first_select(); + ulonglong add_rows=0; + ha_rows examined_rows= 0; + bool first_execution= !executed; + DBUG_ENTER("st_select_lex_unit::exec"); + bool was_executed= executed; + + if (executed && !uncacheable && !describe) + DBUG_RETURN(FALSE); + executed= 1; + if (!(uncacheable & ~UNCACHEABLE_EXPLAIN) && item && + !item->with_recursive_reference) + item->make_const(); + + saved_error= optimize(); + + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + + if (!saved_error && !was_executed) + save_union_explain(thd->lex->explain); + + if (unlikely(saved_error)) + DBUG_RETURN(saved_error); + + if (union_result) + { + union_result->init(); + if (uncacheable & UNCACHEABLE_DEPENDENT && + union_result->table && union_result->table->is_created()) + { + union_result->table->file->ha_delete_all_rows(); + union_result->table->file->ha_enable_indexes(HA_KEY_SWITCH_ALL); + } + } + + if (uncacheable || !item || !item->assigned() || describe) + { + if (!fake_select_lex && !(with_element && with_element->is_recursive)) + union_result->cleanup(); + for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select()) + { + ha_rows records_at_start= 0; + thd->lex->current_select= sl; + if (union_result) + union_result->change_select(); + if (fake_select_lex) + { + if (sl != thd->lex->first_select_lex()) + fake_select_lex->uncacheable|= sl->uncacheable; + else + fake_select_lex->uncacheable= 0; + } + + { + set_limit(sl); + if (sl == global_parameters() || describe) + { + lim.remove_offset(); + /* + We can't use LIMIT at this stage if we are using ORDER BY for the + whole query + */ + if (sl->order_list.first || describe) + lim.set_unlimited(); + } + + /* + When using braces, SQL_CALC_FOUND_ROWS affects the whole query: + we don't calculate found_rows() per union part. + Otherwise, SQL_CALC_FOUND_ROWS should be done on all sub parts. + */ + if (sl->tvc) + { + sl->tvc->select_options= + (lim.is_unlimited() || sl->braces) ? + sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union; + saved_error= sl->tvc->optimize(thd); + } + else + { + sl->join->select_options= + (lim.is_unlimited() || sl->braces) ? + sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union; + saved_error= sl->join->optimize(); + } + } + if (likely(!saved_error)) + { + records_at_start= table->file->stats.records; + if (sl->tvc) + sl->tvc->exec(sl); + else + sl->join->exec(); + if (sl == union_distinct && !have_except_all_or_intersect_all && + !(with_element && with_element->is_recursive)) + { + // This is UNION DISTINCT, so there should be a fake_select_lex + DBUG_ASSERT(fake_select_lex != NULL); + if (unlikely(table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL))) + DBUG_RETURN(TRUE); + table->no_keyread=1; + } + if (!sl->tvc) + saved_error= sl->join->error; + if (likely(!saved_error)) + { + examined_rows+= thd->get_examined_row_count(); + thd->set_examined_row_count(0); + if (union_result->flush()) + { + thd->lex->current_select= lex_select_save; + DBUG_RETURN(1); + } + } + } + if (unlikely(saved_error)) + { + thd->lex->current_select= lex_select_save; + DBUG_RETURN(saved_error); + } + if (fake_select_lex != NULL) + { + /* Needed for the following test and for records_at_start in next loop */ + int error= table->file->info(HA_STATUS_VARIABLE); + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(1); + } + } + if (found_rows_for_union && !sl->braces && + !lim.is_unlimited()) + { + /* + This is a union without braces. Remember the number of rows that + could also have been part of the result set. + We get this from the difference of between total number of possible + rows and actual rows added to the temporary table. + */ + add_rows+= (ulonglong) (thd->limit_found_rows - (ulonglong) + ((table->file->stats.records - records_at_start))); + } + if (thd->killed == ABORT_QUERY) + { + /* + Stop execution of the remaining queries in the UNIONS, and produce + the current result. + */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_QUERY_RESULT_INCOMPLETE, + ER_THD(thd, ER_QUERY_RESULT_INCOMPLETE), + "LIMIT ROWS EXAMINED", + thd->lex->limit_rows_examined->val_uint()); + thd->reset_killed(); + break; + } + } + } + + DBUG_EXECUTE_IF("show_explain_probe_union_read", + dbug_serve_apcs(thd, 1);); + { + List empty_list; + empty_list.empty(); + /* + Disable LIMIT ROWS EXAMINED in order to produce the possibly incomplete + result of the UNION without interruption due to exceeding the limit. + */ + thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX; + + // Check if EOM + if (fake_select_lex != NULL && likely(!thd->is_fatal_error)) + { + /* Send result to 'result' */ + saved_error= true; + + set_limit(global_parameters()); + init_prepare_fake_select_lex(thd, first_execution); + JOIN *join= fake_select_lex->join; + saved_error= false; + if (!join) + { + /* + allocate JOIN for fake select only once (prevent + mysql_select automatic allocation) + TODO: The above is nonsense. mysql_select() will not allocate the + join if one already exists. There must be some other reason why we + don't let it allocate the join. Perhaps this is because we need + some special parameter values passed to join constructor? + */ + if (unlikely(!(fake_select_lex->join= + new JOIN(thd, item_list, fake_select_lex->options, + result)))) + { + fake_select_lex->table_list.empty(); + goto err; + } + fake_select_lex->join->no_const_tables= TRUE; + + /* + Fake st_select_lex should have item list for correct ref_array + allocation. + */ + fake_select_lex->item_list= item_list; + + /* + We need to add up n_sum_items in order to make the correct + allocation in setup_ref_array(). + Don't add more sum_items if we have already done JOIN::prepare + for this (with a different join object) + */ + if (fake_select_lex->ref_pointer_array.is_null()) + fake_select_lex->n_child_sum_items+= global_parameters()->n_sum_items; + + if (!was_executed) + save_union_explain_part2(thd->lex->explain); + + saved_error= mysql_select(thd, &result_table_list, + item_list, NULL, + global_parameters()->order_list.elements, + global_parameters()->order_list.first, + NULL, NULL, NULL, + fake_select_lex->options | SELECT_NO_UNLOCK, + result, this, fake_select_lex); + } + else + { + if (describe) + { + /* + In EXPLAIN command, constant subqueries that do not use any + tables are executed two times: + - 1st time is a real evaluation to get the subquery value + - 2nd time is to produce EXPLAIN output rows. + 1st execution sets certain members (e.g. select_result) to perform + subquery execution rather than EXPLAIN line production. In order + to reset them back, we re-do all of the actions (yes it is ugly): + */ // psergey-todo: is the above really necessary anymore?? + join->init(thd, item_list, fake_select_lex->options, result); + saved_error= mysql_select(thd, &result_table_list, item_list, NULL, + global_parameters()->order_list.elements, + global_parameters()->order_list.first, + NULL, NULL, NULL, + fake_select_lex->options | SELECT_NO_UNLOCK, + result, this, fake_select_lex); + } + else + { + join->join_examined_rows= 0; + saved_error= join->reinit(); + join->exec(); + } + } + + fake_select_lex->table_list.empty(); + if (likely(!saved_error)) + { + thd->limit_found_rows = (ulonglong)table->file->stats.records + add_rows; + thd->inc_examined_row_count(examined_rows); + } + /* + Mark for slow query log if any of the union parts didn't use + indexes efficiently + */ + } + } + thd->lex->current_select= lex_select_save; +err: + thd->lex->set_limit_rows_examined(); + DBUG_RETURN(saved_error); +} + + +/** + @brief + Execute the union of the specification of a recursive with table + + @details + The method is performed only for the units that are specifications + if recursive with table T. If the specification contains an anchor + part then the first call of this method executes only this part + while the following calls execute the recursive part. If there are + no anchors each call executes the whole unit. + Before the excution the method cleans up the temporary table + to where the new rows of the recursive table are sent. + After the execution the unit these rows are copied to the + temporary tables created for recursive references of T. + If the specification if T is restricted (standards compliant) + then these temporary tables are cleaned up before new rows + are copied into them. + + @retval + false on success + true on failure +*/ + +bool st_select_lex_unit::exec_recursive() +{ + st_select_lex *lex_select_save= thd->lex->current_select; + st_select_lex *start= with_element->first_recursive; + TABLE *incr_table= with_element->rec_result->incr_table; + st_select_lex *end= NULL; + bool is_unrestricted= with_element->is_unrestricted(); + List_iterator_fast li(with_element->rec_result->rec_table_refs); + TMP_TABLE_PARAM *tmp_table_param= &with_element->rec_result->tmp_table_param; + ha_rows examined_rows= 0; + bool was_executed= executed; + TABLE_LIST *rec_tbl; + + DBUG_ENTER("st_select_lex_unit::exec_recursive"); + + executed= 1; + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + if (!was_executed) + save_union_explain(thd->lex->explain); + + if (with_element->level == 0) + { + if (!incr_table->is_created() && + instantiate_tmp_table(incr_table, + tmp_table_param->keyinfo, + tmp_table_param->start_recinfo, + &tmp_table_param->recinfo, + 0)) + DBUG_RETURN(1); + incr_table->file->extra(HA_EXTRA_WRITE_CACHE); + incr_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + start= first_select(); + if (with_element->with_anchor) + end= with_element->first_recursive; + } + else if (unlikely((saved_error= incr_table->file->ha_delete_all_rows()))) + goto err; + + for (st_select_lex *sl= start ; sl != end; sl= sl->next_select()) + { + if (with_element->level) + { + for (TABLE_LIST *derived= with_element->derived_with_rec_ref.first; + derived; + derived= derived->next_with_rec_ref) + { + if (derived->is_materialized_derived()) + { + if (derived->table->is_created()) + derived->table->file->ha_delete_all_rows(); + derived->table->reginfo.join_tab->preread_init_done= false; + } + } + } + thd->lex->current_select= sl; + set_limit(sl); + if (sl->tvc) + sl->tvc->exec(sl); + else + { + sl->join->exec(); + saved_error= sl->join->error; + } + if (likely(!saved_error)) + { + examined_rows+= thd->get_examined_row_count(); + thd->set_examined_row_count(0); + if (unlikely(union_result->flush())) + { + thd->lex->current_select= lex_select_save; + DBUG_RETURN(1); + } + } + if (unlikely(saved_error)) + { + thd->lex->current_select= lex_select_save; + goto err; + + } + } + + thd->inc_examined_row_count(examined_rows); + + incr_table->file->info(HA_STATUS_VARIABLE); + if (with_element->level && incr_table->file->stats.records == 0) + with_element->set_as_stabilized(); + else + with_element->level++; + + while ((rec_tbl= li++)) + { + TABLE *rec_table= rec_tbl->table; + saved_error= + incr_table->insert_all_rows_into_tmp_table(thd, rec_table, + tmp_table_param, + !is_unrestricted); + if (!with_element->rec_result->first_rec_table_to_update) + with_element->rec_result->first_rec_table_to_update= rec_table; + if (with_element->level == 1 && rec_table->reginfo.join_tab) + rec_table->reginfo.join_tab->preread_init_done= true; + } + for (Item_subselect *sq= with_element->sq_with_rec_ref.first; + sq; + sq= sq->next_with_rec_ref) + { + sq->reset(); + sq->engine->force_reexecution(); + } + + thd->lex->current_select= lex_select_save; +err: + thd->lex->set_limit_rows_examined(); + DBUG_RETURN(saved_error); +} + + +bool st_select_lex_unit::cleanup() +{ + bool error= 0; + DBUG_ENTER("st_select_lex_unit::cleanup"); + + if (cleaned) + { + DBUG_RETURN(FALSE); + } + if (with_element && with_element->is_recursive && union_result && + with_element->rec_outer_references) + { + select_union_recursive *result= with_element->rec_result; + if (++result->cleanup_count == with_element->rec_outer_references) + { + /* + Perform cleanup for with_element and for all with elements + mutually recursive with it. + */ + cleaned= 1; + with_element->get_next_mutually_recursive()->spec->cleanup(); + } + else + { + /* + Just increment by 1 cleanup_count for with_element and + for all with elements mutually recursive with it. + */ + With_element *with_elem= with_element; + while ((with_elem= with_elem->get_next_mutually_recursive()) != + with_element) + with_elem->rec_result->cleanup_count++; + DBUG_RETURN(FALSE); + } + } + columns_are_renamed= false; + cleaned= 1; + + for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + error|= sl->cleanup(); + + if (fake_select_lex) + { + error|= fake_select_lex->cleanup(); + /* + There are two cases when we should clean order items: + 1. UNION with SELECTs which all enclosed into braces + in this case global_parameters == fake_select_lex + 2. UNION where last SELECT is not enclosed into braces + in this case global_parameters == 'last select' + So we should use global_parameters->order_list for + proper order list clean up. + Note: global_parameters and fake_select_lex are always + initialized for UNION + */ + DBUG_ASSERT(global_parameters()); + if (global_parameters()->order_list.elements) + { + ORDER *ord; + for (ord= global_parameters()->order_list.first; ord; ord= ord->next) + (*ord->item)->walk (&Item::cleanup_processor, 0, 0); + } + } + + if (with_element && with_element->is_recursive) + { + if (union_result) + { + ((select_union_recursive *) union_result)->cleanup(); + delete union_result; + union_result= 0; + } + with_element->mark_as_cleaned(); + } + else + { + if (union_result) + { + delete union_result; + union_result=0; // Safety + if (table) + free_tmp_table(thd, table); + table= 0; // Safety + } + } + + DBUG_RETURN(error); +} + + +void st_select_lex_unit::reinit_exec_mechanism() +{ + prepared= optimized= optimized_2= executed= 0; + optimize_started= 0; + if (with_element && with_element->is_recursive) + with_element->reset_recursive_for_exec(); +} + + +/** + Change the select_result object used to return the final result of + the unit, replacing occurences of old_result with new_result. + + @param new_result New select_result object + @param old_result Old select_result object + + @retval false Success + @retval true Error +*/ + +bool st_select_lex_unit::change_result(select_result_interceptor *new_result, + select_result_interceptor *old_result) +{ + for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + if (sl->join) + if (sl->join->change_result(new_result, old_result)) + return true; /* purecov: inspected */ + } + /* + If there were a fake_select_lex->join, we would have to change the + result of that also, but change_result() is called before such an + object is created. + */ + DBUG_ASSERT(fake_select_lex == NULL || fake_select_lex->join == NULL); + return false; +} + +/* + Get column type information for this unit. + + SYNOPSIS + st_select_lex_unit::get_column_types() + @param for_cursor if true return the list the fields + retrieved by the cursor + + DESCRIPTION + For a single-select the column types are taken + from the list of selected items. For a union this function + assumes that st_select_lex_unit::prepare has been called + and returns the type holders that were created for unioned + column types of all selects. + + NOTES + The implementation of this function should be in sync with + st_select_lex_unit::prepare() +*/ + +List *st_select_lex_unit::get_column_types(bool for_cursor) +{ + SELECT_LEX *sl= first_select(); + bool is_procedure= !sl->tvc && sl->join->procedure ; + + if (is_procedure) + { + /* Types for "SELECT * FROM t1 procedure analyse()" + are generated during execute */ + return &sl->join->procedure_fields_list; + } + + + if (is_unit_op()) + { + DBUG_ASSERT(prepared); + /* Types are generated during prepare */ + return &types; + } + + return for_cursor ? sl->join->fields : &sl->item_list; +} + + +static void cleanup_order(ORDER *order) +{ + for (; order; order= order->next) + order->counter_used= 0; +} + + +static void cleanup_window_funcs(List &win_funcs) +{ + List_iterator_fast it(win_funcs); + Item_window_func *win_func; + while ((win_func= it++)) + { + Window_spec *win_spec= win_func->window_spec; + if (!win_spec) + continue; + if (win_spec->save_partition_list) + { + win_spec->partition_list= win_spec->save_partition_list; + win_spec->save_partition_list= NULL; + } + if (win_spec->save_order_list) + { + win_spec->order_list= win_spec->save_order_list; + win_spec->save_order_list= NULL; + } + } +} + + +bool st_select_lex::cleanup() +{ + bool error= FALSE; + DBUG_ENTER("st_select_lex::cleanup()"); + + DBUG_PRINT("info", ("select: %p (%u) JOIN %p", + this, select_number, join)); + cleanup_order(order_list.first); + cleanup_order(group_list.first); + cleanup_ftfuncs(this); + + cleanup_window_funcs(window_funcs); + + if (join) + { + List_iterator ti(leaf_tables); + TABLE_LIST *tbl; + while ((tbl= ti++)) + { + if (tbl->is_recursive_with_table() && + !tbl->is_with_table_recursive_reference()) + { + /* + If query is killed before open_and_process_table() for tbl + is called then 'with' is already set, but 'derived' is not. + */ + st_select_lex_unit *unit= tbl->with->spec; + error|= (bool) error | (uint) unit->cleanup(); + } + } + DBUG_ASSERT((st_select_lex*)join->select_lex == this); + error= join->destroy(); + delete join; + join= 0; + } + leaf_tables.empty(); + for (SELECT_LEX_UNIT *lex_unit= first_inner_unit(); lex_unit ; + lex_unit= lex_unit->next_unit()) + { + if (lex_unit->with_element && lex_unit->with_element->is_recursive && + lex_unit->with_element->rec_outer_references) + continue; + error= (bool) ((uint) error | (uint) lex_unit->cleanup()); + } + inner_refs_list.empty(); + exclude_from_table_unique_test= FALSE; + hidden_bit_fields= 0; + DBUG_RETURN(error); +} + + +void st_select_lex::cleanup_all_joins(bool full) +{ + SELECT_LEX_UNIT *unit; + SELECT_LEX *sl; + DBUG_ENTER("st_select_lex::cleanup_all_joins"); + + if (join) + join->cleanup(full); + + for (unit= first_inner_unit(); unit; unit= unit->next_unit()) + { + if (unit->with_element && unit->with_element->is_recursive) + continue; + for (sl= unit->first_select(); sl; sl= sl->next_select()) + sl->cleanup_all_joins(full); + } + DBUG_VOID_RETURN; +} + + +/** + Set exclude_from_table_unique_test for selects of this unit and all + underlying selects. + + @note used to exclude materialized derived tables (views) from unique + table check. +*/ + +void st_select_lex_unit::set_unique_exclude() +{ + for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) + { + sl->exclude_from_table_unique_test= TRUE; + for (SELECT_LEX_UNIT *unit= sl->first_inner_unit(); + unit; + unit= unit->next_unit()) + { + unit->set_unique_exclude(); + } + } +} + +/** + @brief + Check if the derived table is guaranteed to have distinct rows because of + UNION operations used to populate it. + + @detail + UNION operation removes duplicate rows from its output. That is, a query like + + select * from t1 UNION select * from t2 + + will not produce duplicate rows in its output, even if table t1 (and/or t2) + contain duplicate rows. EXCEPT and INTERSECT operations also have this + property. + + On the other hand, UNION ALL operation doesn't remove duplicates. (The SQL + standard also defines EXCEPT ALL and INTERSECT ALL, but we don't support + them). + + st_select_lex_unit computes its value left to right. That is, if there is + a st_select_lex_unit object describing + + (select #1) OP1 (select #2) OP2 (select #3) + + then ((select #1) OP1 (select #2)) is computed first, and OP2 is computed + second. + + How can one tell if st_select_lex_unit is guaranteed to have distinct + output rows? This depends on whether the last operation was duplicate- + removing or not: + - UNION ALL is not duplicate-removing + - all other operations are duplicate-removing +*/ + +bool st_select_lex_unit::check_distinct_in_union() +{ + if (union_distinct && !union_distinct->next_select()) + return true; + return false; +} diff --git a/sql/sql_union.h b/sql/sql_union.h new file mode 100644 index 00000000..2cda0cbb --- /dev/null +++ b/sql/sql_union.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_UNION_INCLUDED +#define SQL_UNION_INCLUDED + +class THD; +class select_result; +struct LEX; + +typedef class st_select_lex_unit SELECT_LEX_UNIT; + +bool mysql_union(THD *thd, LEX *lex, select_result *result, + SELECT_LEX_UNIT *unit, ulonglong setup_tables_done_option); + + +#endif /* SQL_UNION_INCLUDED */ diff --git a/sql/sql_update.cc b/sql/sql_update.cc new file mode 100644 index 00000000..1be0bbcd --- /dev/null +++ b/sql/sql_update.cc @@ -0,0 +1,3142 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2011, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* + Single table and multi table updates of tables. + Multi-table updates were introduced by Sinisa & Monty +*/ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "sql_update.h" +#include "sql_cache.h" // query_cache_* +#include "sql_base.h" // close_tables_for_reopen +#include "sql_parse.h" // cleanup_items +#include "sql_partition.h" // partition_key_modified +#include "sql_select.h" +#include "sql_view.h" // check_key_in_view +#include "sp_head.h" +#include "sql_trigger.h" +#include "sql_statistics.h" +#include "probes_mysql.h" +#include "debug_sync.h" +#include "key.h" // is_key_used +#include "records.h" // init_read_record, + // end_read_record +#include "filesort.h" // filesort +#include "sql_derived.h" // mysql_derived_prepare, + // mysql_handle_derived, + // mysql_derived_filling + + +#include "sql_insert.h" // For vers_insert_history_row() that may be + // needed for System Versioning. + +/** + True if the table's input and output record buffers are comparable using + compare_record(TABLE*). + */ +bool records_are_comparable(const TABLE *table) { + return !table->versioned() && + (((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) == 0) || + bitmap_is_subset(table->write_set, table->read_set)); +} + + +/** + Compares the input and outbut record buffers of the table to see if a row + has changed. + + @return true if row has changed. + @return false otherwise. +*/ + +bool compare_record(const TABLE *table) +{ + DBUG_ASSERT(records_are_comparable(table)); + + if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ || + table->s->has_update_default_function) + { + /* + Storage engine may not have read all columns of the record. Fields + (including NULL bits) not in the write_set may not have been read and + can therefore not be compared. + Or ON UPDATE DEFAULT NOW() could've changed field values, including + NULL bits. + */ + for (Field **ptr= table->field ; *ptr != NULL; ptr++) + { + Field *field= *ptr; + if (field->has_explicit_value() && !field->vcol_info) + { + if (field->real_maybe_null()) + { + uchar null_byte_index= (uchar)(field->null_ptr - table->record[0]); + + if (((table->record[0][null_byte_index]) & field->null_bit) != + ((table->record[1][null_byte_index]) & field->null_bit)) + return TRUE; + } + if (field->cmp_binary_offset(table->s->rec_buff_length)) + return TRUE; + } + } + return FALSE; + } + + /* + The storage engine has read all columns, so it's safe to compare all bits + including those not in the write_set. This is cheaper than the + field-by-field comparison done above. + */ + if (table->s->can_cmp_whole_record) + return cmp_record(table,record[1]); + /* Compare null bits */ + if (memcmp(table->null_flags, + table->null_flags+table->s->rec_buff_length, + table->s->null_bytes_for_compare)) + return TRUE; // Diff in NULL value + /* Compare updated fields */ + for (Field **ptr= table->field ; *ptr ; ptr++) + { + Field *field= *ptr; + if (field->has_explicit_value() && !field->vcol_info && + field->cmp_binary_offset(table->s->rec_buff_length)) + return TRUE; + } + return FALSE; +} + + +/* + check that all fields are real fields + + SYNOPSIS + check_fields() + thd thread handler + items Items for check + + RETURN + TRUE Items can't be used in UPDATE + FALSE Items are OK +*/ + +static bool check_fields(THD *thd, TABLE_LIST *table, List &items, + bool update_view) +{ + Item *item; + if (update_view) + { + List_iterator it(items); + Item_field *field; + while ((item= it++)) + { + if (!(field= item->field_for_view_update())) + { + /* item has name, because it comes from VIEW SELECT list */ + my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->name.str); + return TRUE; + } + /* + we make temporary copy of Item_field, to avoid influence of changing + result_field on Item_ref which refer on this field + */ + thd->change_item_tree(it.ref(), + new (thd->mem_root) Item_field(thd, field)); + } + } + + if (thd->variables.sql_mode & MODE_SIMULTANEOUS_ASSIGNMENT) + { + // Make sure that a column is updated only once + List_iterator_fast it(items); + while ((item= it++)) + { + item->field_for_view_update()->field->clear_has_explicit_value(); + } + it.rewind(); + while ((item= it++)) + { + Field *f= item->field_for_view_update()->field; + if (f->has_explicit_value()) + { + my_error(ER_UPDATED_COLUMN_ONLY_ONCE, MYF(0), + *(f->table_name), f->field_name.str); + return TRUE; + } + f->set_has_explicit_value(); + } + } + + if (table->has_period()) + { + if (table->is_view_or_derived()) + { + my_error(ER_IT_IS_A_VIEW, MYF(0), table->table_name.str); + return TRUE; + } + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "updating and querying the same temporal periods table"); + + return true; + } + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_UPDATE); + for (List_iterator_fast it(items); (item=it++);) + { + Field *f= item->field_for_view_update()->field; + vers_select_conds_t &period= table->period_conditions; + if (period.field_start->field == f || period.field_end->field == f) + { + my_error(ER_PERIOD_COLUMNS_UPDATED, MYF(0), + item->name.str, period.name.str); + return true; + } + } + } + return FALSE; +} + +bool TABLE::vers_check_update(List &items) +{ + List_iterator it(items); + if (!versioned_write()) + return false; + + while (Item *item= it++) + { + if (Item_field *item_field= item->field_for_view_update()) + { + Field *field= item_field->field; + if (field->table == this && !field->vers_update_unversioned()) + { + no_cache= true; + return true; + } + } + } + /* + Tell TRX_ID-versioning that it does not insert history row + (see calc_row_difference()). + */ + vers_write= false; + return false; +} + +/** + Re-read record if more columns are needed for error message. + + If we got a duplicate key error, we want to write an error + message containing the value of the duplicate key. If we do not have + all fields of the key value in record[0], we need to re-read the + record with a proper read_set. + + @param[in] error error number + @param[in] table table +*/ + +static void prepare_record_for_error_message(int error, TABLE *table) +{ + Field **field_p; + Field *field; + uint keynr; + MY_BITMAP unique_map; /* Fields in offended unique. */ + my_bitmap_map unique_map_buf[bitmap_buffer_size(MAX_FIELDS)]; + DBUG_ENTER("prepare_record_for_error_message"); + + /* + Only duplicate key errors print the key value. + If storage engine does always read all columns, we have the value alraedy. + */ + if ((error != HA_ERR_FOUND_DUPP_KEY) || + !(table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ)) + DBUG_VOID_RETURN; + + /* + Get the number of the offended index. + We will see MAX_KEY if the engine cannot determine the affected index. + */ + if (unlikely((keynr= table->file->get_dup_key(error)) >= MAX_KEY)) + DBUG_VOID_RETURN; + + /* Create unique_map with all fields used by that index. */ + my_bitmap_init(&unique_map, unique_map_buf, table->s->fields); + table->mark_index_columns(keynr, &unique_map); + + /* Subtract read_set and write_set. */ + bitmap_subtract(&unique_map, table->read_set); + bitmap_subtract(&unique_map, table->write_set); + + /* + If the unique index uses columns that are neither in read_set + nor in write_set, we must re-read the record. + Otherwise no need to do anything. + */ + if (bitmap_is_clear_all(&unique_map)) + DBUG_VOID_RETURN; + + /* Get identifier of last read record into table->file->ref. */ + table->file->position(table->record[0]); + /* Add all fields used by unique index to read_set. */ + bitmap_union(table->read_set, &unique_map); + /* Tell the engine about the new set. */ + table->file->column_bitmaps_signal(); + + if ((error= table->file->ha_index_or_rnd_end()) || + (error= table->file->ha_rnd_init(0))) + { + table->file->print_error(error, MYF(0)); + DBUG_VOID_RETURN; + } + + /* Read record that is identified by table->file->ref. */ + (void) table->file->ha_rnd_pos(table->record[1], table->file->ref); + /* Copy the newly read columns into the new record. */ + for (field_p= table->field; (field= *field_p); field_p++) + if (bitmap_is_set(&unique_map, field->field_index)) + field->copy_from_tmp(table->s->rec_buff_length); + + DBUG_VOID_RETURN; +} + + +static +int cut_fields_for_portion_of_time(THD *thd, TABLE *table, + const vers_select_conds_t &period_conds) +{ + bool lcond= period_conds.field_start->val_datetime_packed(thd) + < period_conds.start.item->val_datetime_packed(thd); + bool rcond= period_conds.field_end->val_datetime_packed(thd) + > period_conds.end.item->val_datetime_packed(thd); + + Field *start_field= table->field[table->s->period.start_fieldno]; + Field *end_field= table->field[table->s->period.end_fieldno]; + + int res= 0; + if (lcond) + { + res= period_conds.start.item->save_in_field(start_field, true); + start_field->set_has_explicit_value(); + } + + if (likely(!res) && rcond) + { + res= period_conds.end.item->save_in_field(end_field, true); + end_field->set_has_explicit_value(); + } + + return res; +} + +/* + Process usual UPDATE + + SYNOPSIS + mysql_update() + thd thread handler + fields fields for update + values values of fields for update + conds WHERE clause expression + order_num number of elemen in ORDER BY clause + order ORDER BY clause list + limit limit clause + + RETURN + 0 - OK + 2 - privilege check and openning table passed, but we need to convert to + multi-update because of view substitution + 1 - error +*/ + +int mysql_update(THD *thd, + TABLE_LIST *table_list, + List &fields, + List &values, + COND *conds, + uint order_num, ORDER *order, + ha_rows limit, + bool ignore, + ha_rows *found_return, ha_rows *updated_return) +{ + bool using_limit= limit != HA_POS_ERROR; + bool safe_update= (thd->variables.option_bits & OPTION_SAFE_UPDATES) + && !thd->lex->describe; + bool used_key_is_modified= FALSE, transactional_table; + bool will_batch= FALSE; + bool can_compare_record; + int res; + int error, loc_error; + ha_rows dup_key_found; + bool need_sort= TRUE; + bool reverse= FALSE; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + privilege_t want_privilege(NO_ACL); +#endif + uint table_count= 0; + ha_rows updated, updated_or_same, found; + key_map old_covering_keys; + TABLE *table; + SQL_SELECT *select= NULL; + SORT_INFO *file_sort= 0; + READ_RECORD info; + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + ulonglong id; + List all_fields; + killed_state killed_status= NOT_KILLED; + bool has_triggers, binlog_is_row, do_direct_update= FALSE; + Update_plan query_plan(thd->mem_root); + Explain_update *explain; + TABLE_LIST *update_source_table; + query_plan.index= MAX_KEY; + query_plan.using_filesort= FALSE; + + // For System Versioning (may need to insert new fields to a table). + ha_rows rows_inserted= 0; + + DBUG_ENTER("mysql_update"); + + create_explain_query(thd->lex, thd->mem_root); + if (open_tables(thd, &table_list, &table_count, 0)) + DBUG_RETURN(1); + + /* Prepare views so they are handled correctly */ + if (mysql_handle_derived(thd->lex, DT_INIT)) + DBUG_RETURN(1); + + if (table_list->has_period() && table_list->is_view_or_derived()) + { + my_error(ER_IT_IS_A_VIEW, MYF(0), table_list->table_name.str); + DBUG_RETURN(TRUE); + } + + if (((update_source_table=unique_table(thd, table_list, + table_list->next_global, 0)) || + table_list->is_multitable())) + { + DBUG_ASSERT(update_source_table || table_list->view != 0); + DBUG_PRINT("info", ("Switch to multi-update")); + /* pass counter value */ + thd->lex->table_count_update= table_count; + if (thd->lex->period_conditions.is_set()) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "updating and querying the same temporal periods table"); + + DBUG_RETURN(1); + } + + /* convert to multiupdate */ + DBUG_RETURN(2); + } + if (lock_tables(thd, table_list, table_count, 0)) + DBUG_RETURN(1); + + (void) read_statistics_for_tables_if_needed(thd, table_list); + + THD_STAGE_INFO(thd, stage_init_update); + if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(1); + if (table_list->handle_derived(thd->lex, DT_PREPARE)) + DBUG_RETURN(1); + + table= table_list->table; + + if (!table_list->single_table_updatable()) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "UPDATE"); + DBUG_RETURN(1); + } + + /* Calculate "table->covering_keys" based on the WHERE */ + table->covering_keys= table->s->keys_in_use; + table->opt_range_keys.clear_all(); + + query_plan.select_lex= thd->lex->first_select_lex(); + query_plan.table= table; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* Force privilege re-checking for views after they have been opened. */ + want_privilege= (table_list->view ? UPDATE_ACL : + table_list->grant.want_privilege); +#endif + thd->lex->promote_select_describe_flag_if_needed(); + + if (mysql_prepare_update(thd, table_list, &conds, order_num, order)) + DBUG_RETURN(1); + + if (table_list->has_period()) + { + if (!table_list->period_conditions.start.item->const_item() + || !table_list->period_conditions.end.item->const_item()) + { + my_error(ER_NOT_CONSTANT_EXPRESSION, MYF(0), "FOR PORTION OF"); + DBUG_RETURN(true); + } + table->no_cache= true; + } + + old_covering_keys= table->covering_keys; // Keys used in WHERE + /* Check the fields we are going to modify */ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + table_list->grant.want_privilege= table->grant.want_privilege= want_privilege; + table_list->register_want_access(want_privilege); +#endif + /* 'Unfix' fields to allow correct marking by the setup_fields function. */ + if (table_list->is_view()) + unfix_fields(fields); + + if (setup_fields_with_no_wrap(thd, Ref_ptr_array(), + fields, MARK_COLUMNS_WRITE, 0, 0)) + DBUG_RETURN(1); /* purecov: inspected */ + if (check_fields(thd, table_list, fields, table_list->view)) + { + DBUG_RETURN(1); + } + bool has_vers_fields= table->vers_check_update(fields); + if (check_key_in_view(thd, table_list)) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "UPDATE"); + DBUG_RETURN(1); + } + + if (table->default_field) + table->mark_default_fields_for_write(false); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* Check values */ + table_list->grant.want_privilege= table->grant.want_privilege= + (SELECT_ACL & ~table->grant.privilege); +#endif + if (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_READ, 0, NULL, 0)) + { + free_underlaid_joins(thd, select_lex); + DBUG_RETURN(1); /* purecov: inspected */ + } + + if (table_list->table->check_assignability_explicit_fields(fields, values, + ignore)) + DBUG_RETURN(true); + + if (check_unique_table(thd, table_list)) + DBUG_RETURN(TRUE); + + switch_to_nullable_trigger_fields(fields, table); + switch_to_nullable_trigger_fields(values, table); + + /* Apply the IN=>EXISTS transformation to all subqueries and optimize them */ + if (select_lex->optimize_unflattened_subqueries(false)) + DBUG_RETURN(TRUE); + + if (select_lex->inner_refs_list.elements && + fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array)) + DBUG_RETURN(1); + + if (conds) + { + Item::cond_result cond_value; + conds= conds->remove_eq_conds(thd, &cond_value, true); + if (cond_value == Item::COND_FALSE) + { + limit= 0; // Impossible WHERE + query_plan.set_impossible_where(); + if (thd->lex->describe || thd->lex->analyze_stmt) + goto produce_explain_and_leave; + } + } + + // Don't count on usage of 'only index' when calculating which key to use + table->covering_keys.clear_all(); + transactional_table= table->file->has_transactions_and_rollback(); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (prune_partitions(thd, table, conds)) + { + free_underlaid_joins(thd, select_lex); + + query_plan.set_no_partitions(); + if (thd->lex->describe || thd->lex->analyze_stmt) + goto produce_explain_and_leave; + if (thd->is_error()) + DBUG_RETURN(1); + + if (thd->binlog_for_noop_dml(transactional_table)) + DBUG_RETURN(1); + + my_ok(thd); // No matching records + DBUG_RETURN(0); + } +#endif + /* Update the table->file->stats.records number */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + set_statistics_for_table(thd, table); + + select= make_select(table, 0, 0, conds, (SORT_INFO*) 0, 0, &error); + if (unlikely(error || !limit || thd->is_error() || + (select && select->check_quick(thd, safe_update, limit)))) + { + query_plan.set_impossible_where(); + if (thd->lex->describe || thd->lex->analyze_stmt) + goto produce_explain_and_leave; + + delete select; + free_underlaid_joins(thd, select_lex); + /* + There was an error or the error was already sent by + the quick select evaluation. + TODO: Add error code output parameter to Item::val_xxx() methods. + Currently they rely on the user checking DA for + errors when unwinding the stack after calling Item::val_xxx(). + */ + if (error || thd->is_error()) + { + DBUG_RETURN(1); // Error in where + } + + if (thd->binlog_for_noop_dml(transactional_table)) + DBUG_RETURN(1); + + my_ok(thd); // No matching records + DBUG_RETURN(0); + } + + /* If running in safe sql mode, don't allow updates without keys */ + if (!select || !select->quick) + { + thd->set_status_no_index_used(); + if (safe_update && !using_limit) + { + my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, + ER_THD(thd, ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0)); + goto err; + } + } + if (unlikely(init_ftfuncs(thd, select_lex, 1))) + goto err; + + if (table_list->has_period()) + { + table->use_all_columns(); + table->rpl_write_set= table->write_set; + } + else + { + table->mark_columns_needed_for_update(); + } + + table->update_const_key_parts(conds); + order= simple_remove_const(order, conds); + query_plan.scanned_rows= select? select->records: table->file->stats.records; + + if (select && select->quick && select->quick->unique_key_range()) + { + /* Single row select (always "ordered"): Ok to use with key field UPDATE */ + need_sort= FALSE; + query_plan.index= MAX_KEY; + used_key_is_modified= FALSE; + } + else + { + ha_rows scanned_limit= query_plan.scanned_rows; + table->no_keyread= 1; + query_plan.index= get_index_for_order(order, table, select, limit, + &scanned_limit, &need_sort, + &reverse); + table->no_keyread= 0; + if (!need_sort) + query_plan.scanned_rows= scanned_limit; + + if (select && select->quick) + { + DBUG_ASSERT(need_sort || query_plan.index == select->quick->index); + used_key_is_modified= (!select->quick->unique_key_range() && + select->quick->is_keys_used(table->write_set)); + } + else + { + if (need_sort) + { + /* Assign table scan index to check below for modified key fields: */ + query_plan.index= table->file->key_used_on_scan; + } + if (query_plan.index != MAX_KEY) + { + /* Check if we are modifying a key that we are used to search with: */ + used_key_is_modified= is_key_used(table, query_plan.index, + table->write_set); + } + } + } + + /* + Query optimization is finished at this point. + - Save the decisions in the query plan + - if we're running EXPLAIN UPDATE, get out + */ + query_plan.select= select; + query_plan.possible_keys= select? select->possible_keys: key_map(0); + + if (used_key_is_modified || order || + partition_key_modified(table, table->write_set)) + { + if (order && need_sort) + query_plan.using_filesort= true; + else + query_plan.using_io_buffer= true; + } + + /* + Ok, we have generated a query plan for the UPDATE. + - if we're running EXPLAIN UPDATE, goto produce explain output + - otherwise, execute the query plan + */ + if (thd->lex->describe) + goto produce_explain_and_leave; + if (!(explain= query_plan.save_explain_update_data(thd, query_plan.mem_root))) + goto err; + + ANALYZE_START_TRACKING(thd, &explain->command_tracker); + + DBUG_EXECUTE_IF("show_explain_probe_update_exec_start", + dbug_serve_apcs(thd, 1);); + + has_triggers= (table->triggers && + (table->triggers->has_triggers(TRG_EVENT_UPDATE, + TRG_ACTION_BEFORE) || + table->triggers->has_triggers(TRG_EVENT_UPDATE, + TRG_ACTION_AFTER))); + + if (table_list->has_period()) + has_triggers= table->triggers && + (table->triggers->has_triggers(TRG_EVENT_INSERT, + TRG_ACTION_BEFORE) + || table->triggers->has_triggers(TRG_EVENT_INSERT, + TRG_ACTION_AFTER) + || has_triggers); + DBUG_PRINT("info", ("has_triggers: %s", has_triggers ? "TRUE" : "FALSE")); + binlog_is_row= thd->is_current_stmt_binlog_format_row(); + DBUG_PRINT("info", ("binlog_is_row: %s", binlog_is_row ? "TRUE" : "FALSE")); + + if (!(select && select->quick)) + status_var_increment(thd->status_var.update_scan_count); + + /* + We can use direct update (update that is done silently in the handler) + if none of the following conditions are true: + - There are triggers + - There is binary logging + - using_io_buffer + - This means that the partition changed or the key we want + to use for scanning the table is changed + - ignore is set + - Direct updates don't return the number of ignored rows + - There is a virtual not stored column in the WHERE clause + - Changing a field used by a stored virtual column, which + would require the column to be recalculated. + - ORDER BY or LIMIT + - As this requires the rows to be updated in a specific order + - Note that Spider can handle ORDER BY and LIMIT in a cluster with + one data node. These conditions are therefore checked in + direct_update_rows_init(). + - Update fields include a unique timestamp field + - The storage engine may not be able to avoid false duplicate key + errors. This condition is checked in direct_update_rows_init(). + + Direct update does not require a WHERE clause + + Later we also ensure that we are only using one table (no sub queries) + */ + DBUG_PRINT("info", ("HA_CAN_DIRECT_UPDATE_AND_DELETE: %s", (table->file->ha_table_flags() & HA_CAN_DIRECT_UPDATE_AND_DELETE) ? "TRUE" : "FALSE")); + DBUG_PRINT("info", ("using_io_buffer: %s", query_plan.using_io_buffer ? "TRUE" : "FALSE")); + DBUG_PRINT("info", ("ignore: %s", ignore ? "TRUE" : "FALSE")); + DBUG_PRINT("info", ("virtual_columns_marked_for_read: %s", table->check_virtual_columns_marked_for_read() ? "TRUE" : "FALSE")); + DBUG_PRINT("info", ("virtual_columns_marked_for_write: %s", table->check_virtual_columns_marked_for_write() ? "TRUE" : "FALSE")); + if ((table->file->ha_table_flags() & HA_CAN_DIRECT_UPDATE_AND_DELETE) && + !has_triggers && !binlog_is_row && + !query_plan.using_io_buffer && !ignore && + !table->check_virtual_columns_marked_for_read() && + !table->check_virtual_columns_marked_for_write()) + { + DBUG_PRINT("info", ("Trying direct update")); + bool use_direct_update= !select || !select->cond; + if (!use_direct_update && + (select->cond->used_tables() & ~RAND_TABLE_BIT) == table->map) + { + DBUG_ASSERT(!table->file->pushed_cond); + if (!table->file->cond_push(select->cond)) + { + use_direct_update= TRUE; + table->file->pushed_cond= select->cond; + } + } + + if (use_direct_update && + !table->file->info_push(INFO_KIND_UPDATE_FIELDS, &fields) && + !table->file->info_push(INFO_KIND_UPDATE_VALUES, &values) && + !table->file->direct_update_rows_init(&fields)) + { + do_direct_update= TRUE; + + /* Direct update is not using_filesort and is not using_io_buffer */ + goto update_begin; + } + } + + if (query_plan.using_filesort || query_plan.using_io_buffer) + { + /* + We can't update table directly; We must first search after all + matching rows before updating the table! + + note: We avoid sorting if we sort on the used index + */ + if (query_plan.using_filesort) + { + /* + Doing an ORDER BY; Let filesort find and sort the rows we are going + to update + NOTE: filesort will call table->prepare_for_position() + */ + Filesort fsort(order, limit, true, select); + + Filesort_tracker *fs_tracker= + thd->lex->explain->get_upd_del_plan()->filesort_tracker; + + if (!(file_sort= filesort(thd, table, &fsort, fs_tracker))) + goto err; + thd->inc_examined_row_count(file_sort->examined_rows); + + /* + Filesort has already found and selected the rows we want to update, + so we don't need the where clause + */ + delete select; + select= 0; + } + else + { + MY_BITMAP *save_read_set= table->read_set; + MY_BITMAP *save_write_set= table->write_set; + + if (query_plan.index < MAX_KEY && old_covering_keys.is_set(query_plan.index)) + table->prepare_for_keyread(query_plan.index); + else + table->use_all_columns(); + + /* + We are doing a search on a key that is updated. In this case + we go trough the matching rows, save a pointer to them and + update these in a separate loop based on the pointer. + */ + explain->buf_tracker.on_scan_init(); + IO_CACHE tempfile; + if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX, + DISK_BUFFER_SIZE, MYF(MY_WME))) + goto err; + + /* If quick select is used, initialize it before retrieving rows. */ + if (select && select->quick && select->quick->reset()) + { + close_cached_file(&tempfile); + goto err; + } + + table->file->try_semi_consistent_read(1); + + /* + When we get here, we have one of the following options: + A. query_plan.index == MAX_KEY + This means we should use full table scan, and start it with + init_read_record call + B. query_plan.index != MAX_KEY + B.1 quick select is used, start the scan with init_read_record + B.2 quick select is not used, this is full index scan (with LIMIT) + Full index scan must be started with init_read_record_idx + */ + + if (query_plan.index == MAX_KEY || (select && select->quick)) + error= init_read_record(&info, thd, table, select, NULL, 0, 1, FALSE); + else + error= init_read_record_idx(&info, thd, table, 1, query_plan.index, + reverse); + + if (unlikely(error)) + { + close_cached_file(&tempfile); + goto err; + } + + THD_STAGE_INFO(thd, stage_searching_rows_for_update); + ha_rows tmp_limit= limit; + + while (likely(!(error=info.read_record())) && likely(!thd->killed)) + { + explain->buf_tracker.on_record_read(); + thd->inc_examined_row_count(1); + if (!select || (error= select->skip_record(thd)) > 0) + { + if (table->file->ha_was_semi_consistent_read()) + continue; /* repeat the read of the same row if it still exists */ + + explain->buf_tracker.on_record_after_where(); + table->file->position(table->record[0]); + if (unlikely(my_b_write(&tempfile,table->file->ref, + table->file->ref_length))) + { + error=1; /* purecov: inspected */ + break; /* purecov: inspected */ + } + if (!--limit && using_limit) + { + error= -1; + break; + } + } + else + { + /* + Don't try unlocking the row if skip_record reported an + error since in this case the transaction might have been + rolled back already. + */ + if (unlikely(error < 0)) + { + /* Fatal error from select->skip_record() */ + error= 1; + break; + } + else + table->file->unlock_row(); + } + } + if (unlikely(thd->killed) && !error) + error= 1; // Aborted + limit= tmp_limit; + table->file->try_semi_consistent_read(0); + end_read_record(&info); + + /* Change select to use tempfile */ + if (select) + { + delete select->quick; + if (select->free_cond) + delete select->cond; + select->quick=0; + select->cond=0; + } + else + { + if (!(select= new SQL_SELECT)) + goto err; + select->head=table; + } + + if (unlikely(reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))) + error= 1; /* purecov: inspected */ + select->file= tempfile; // Read row ptrs from this file + if (unlikely(error >= 0)) + goto err; + + table->file->ha_end_keyread(); + table->column_bitmaps_set(save_read_set, save_write_set); + } + } + +update_begin: + if (ignore) + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + + if (select && select->quick && select->quick->reset()) + goto err; + table->file->try_semi_consistent_read(1); + if (init_read_record(&info, thd, table, select, file_sort, 0, 1, FALSE)) + goto err; + + updated= updated_or_same= found= 0; + /* + Generate an error (in TRADITIONAL mode) or warning + when trying to set a NOT NULL field to NULL. + */ + thd->count_cuted_fields= CHECK_FIELD_WARN; + thd->cuted_fields=0L; + + thd->abort_on_warning= !ignore && thd->is_strict_mode(); + + if (do_direct_update) + { + /* Direct updating is supported */ + ha_rows update_rows= 0, found_rows= 0; + DBUG_PRINT("info", ("Using direct update")); + table->reset_default_fields(); + if (unlikely(!(error= table->file->ha_direct_update_rows(&update_rows, + &found_rows)))) + error= -1; + updated= update_rows; + found= found_rows; + if (found < updated) + found= updated; + goto update_end; + } + + if ((table->file->ha_table_flags() & HA_CAN_FORCE_BULK_UPDATE) && + !table->prepare_triggers_for_update_stmt_or_event() && + !thd->lex->with_rownum) + will_batch= !table->file->start_bulk_update(); + + /* + Assure that we can use position() + if we need to create an error message. + */ + if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) + table->prepare_for_position(); + + table->reset_default_fields(); + + /* + We can use compare_record() to optimize away updates if + the table handler is returning all columns OR if + if all updated columns are read + */ + can_compare_record= records_are_comparable(table); + explain->tracker.on_scan_init(); + + table->file->prepare_for_insert(1); + DBUG_ASSERT(table->file->inited != handler::NONE); + + THD_STAGE_INFO(thd, stage_updating); + fix_rownum_pointers(thd, thd->lex->current_select, &updated_or_same); + thd->get_stmt_da()->reset_current_row_for_warning(1); + while (!(error=info.read_record()) && !thd->killed) + { + explain->tracker.on_record_read(); + thd->inc_examined_row_count(1); + if (!select || select->skip_record(thd) > 0) + { + if (table->file->ha_was_semi_consistent_read()) + continue; /* repeat the read of the same row if it still exists */ + + explain->tracker.on_record_after_where(); + store_record(table,record[1]); + + if (table_list->has_period()) + cut_fields_for_portion_of_time(thd, table, + table_list->period_conditions); + + if (fill_record_n_invoke_before_triggers(thd, table, fields, values, 0, + TRG_EVENT_UPDATE)) + break; /* purecov: inspected */ + + found++; + + bool record_was_same= false; + bool need_update= !can_compare_record || compare_record(table); + + if (need_update) + { + if (table->versioned(VERS_TIMESTAMP) && + thd->lex->sql_command == SQLCOM_DELETE) + table->vers_update_end(); + + if ((res= table_list->view_check_option(thd, ignore)) != + VIEW_CHECK_OK) + { + found--; + if (res == VIEW_CHECK_SKIP) + continue; + else if (res == VIEW_CHECK_ERROR) + { + error= 1; + break; + } + } + if (will_batch) + { + /* + Typically a batched handler can execute the batched jobs when: + 1) When specifically told to do so + 2) When it is not a good idea to batch anymore + 3) When it is necessary to send batch for other reasons + (One such reason is when READ's must be performed) + + 1) is covered by exec_bulk_update calls. + 2) and 3) is handled by the bulk_update_row method. + + bulk_update_row can execute the updates including the one + defined in the bulk_update_row or not including the row + in the call. This is up to the handler implementation and can + vary from call to call. + + The dup_key_found reports the number of duplicate keys found + in those updates actually executed. It only reports those if + the extra call with HA_EXTRA_IGNORE_DUP_KEY have been issued. + If this hasn't been issued it returns an error code and can + ignore this number. Thus any handler that implements batching + for UPDATE IGNORE must also handle this extra call properly. + + If a duplicate key is found on the record included in this + call then it should be included in the count of dup_key_found + and error should be set to 0 (only if these errors are ignored). + */ + DBUG_PRINT("info", ("Batched update")); + error= table->file->ha_bulk_update_row(table->record[1], + table->record[0], + &dup_key_found); + limit+= dup_key_found; + updated-= dup_key_found; + } + else + { + /* Non-batched update */ + error= table->file->ha_update_row(table->record[1], + table->record[0]); + } + + record_was_same= error == HA_ERR_RECORD_IS_THE_SAME; + if (unlikely(record_was_same)) + { + error= 0; + updated_or_same++; + } + else if (likely(!error)) + { + if (has_vers_fields && table->versioned(VERS_TRX_ID)) + rows_inserted++; + updated++; + updated_or_same++; + } + + if (likely(!error) && !record_was_same && table_list->has_period()) + { + store_record(table, record[2]); + restore_record(table, record[1]); + error= table->insert_portion_of_time(thd, + table_list->period_conditions, + &rows_inserted); + restore_record(table, record[2]); + } + + if (unlikely(error) && + (!ignore || table->file->is_fatal_error(error, HA_CHECK_ALL))) + { + goto error; + } + } + else + updated_or_same++; + + if (likely(!error) && has_vers_fields && table->versioned(VERS_TIMESTAMP)) + { + store_record(table, record[2]); + table->mark_columns_per_binlog_row_image(); + error= vers_insert_history_row(table); + restore_record(table, record[2]); + if (unlikely(error)) + { +error: + /* + If (ignore && error is ignorable) we don't have to + do anything; otherwise... + */ + myf flags= 0; + + if (table->file->is_fatal_error(error, HA_CHECK_ALL)) + flags|= ME_FATAL; /* Other handler errors are fatal */ + + prepare_record_for_error_message(error, table); + table->file->print_error(error,MYF(flags)); + error= 1; + break; + } + rows_inserted++; + } + + if (table->triggers && + unlikely(table->triggers->process_triggers(thd, TRG_EVENT_UPDATE, + TRG_ACTION_AFTER, TRUE))) + { + error= 1; + break; + } + + if (!--limit && using_limit) + { + /* + We have reached end-of-file in most common situations where no + batching has occurred and if batching was supposed to occur but + no updates were made and finally when the batch execution was + performed without error and without finding any duplicate keys. + If the batched updates were performed with errors we need to + check and if no error but duplicate key's found we need to + continue since those are not counted for in limit. + */ + if (will_batch && + ((error= table->file->exec_bulk_update(&dup_key_found)) || + dup_key_found)) + { + if (error) + { + /* purecov: begin inspected */ + /* + The handler should not report error of duplicate keys if they + are ignored. This is a requirement on batching handlers. + */ + prepare_record_for_error_message(error, table); + table->file->print_error(error,MYF(0)); + error= 1; + break; + /* purecov: end */ + } + /* + Either an error was found and we are ignoring errors or there + were duplicate keys found. In both cases we need to correct + the counters and continue the loop. + */ + limit= dup_key_found; //limit is 0 when we get here so need to + + updated-= dup_key_found; + } + else + { + error= -1; // Simulate end of file + break; + } + } + } + /* + Don't try unlocking the row if skip_record reported an error since in + this case the transaction might have been rolled back already. + */ + else if (likely(!thd->is_error())) + table->file->unlock_row(); + else + { + error= 1; + break; + } + thd->get_stmt_da()->inc_current_row_for_warning(); + if (unlikely(thd->is_error())) + { + error= 1; + break; + } + } + ANALYZE_STOP_TRACKING(thd, &explain->command_tracker); + table->auto_increment_field_not_null= FALSE; + dup_key_found= 0; + /* + Caching the killed status to pass as the arg to query event constuctor; + The cached value can not change whereas the killed status can + (externally) since this point and change of the latter won't affect + binlogging. + It's assumed that if an error was set in combination with an effective + killed status then the error is due to killing. + */ + killed_status= thd->killed; // get the status of the volatile + // simulated killing after the loop must be ineffective for binlogging + DBUG_EXECUTE_IF("simulate_kill_bug27571", + { + thd->set_killed(KILL_QUERY); + };); + error= (killed_status == NOT_KILLED)? error : 1; + + if (likely(error) && + will_batch && + (loc_error= table->file->exec_bulk_update(&dup_key_found))) + /* + An error has occurred when a batched update was performed and returned + an error indication. It cannot be an allowed duplicate key error since + we require the batching handler to treat this as a normal behavior. + + Otherwise we simply remove the number of duplicate keys records found + in the batched update. + */ + { + /* purecov: begin inspected */ + prepare_record_for_error_message(loc_error, table); + table->file->print_error(loc_error,MYF(ME_FATAL)); + error= 1; + /* purecov: end */ + } + else + updated-= dup_key_found; + if (will_batch) + table->file->end_bulk_update(); + +update_end: + table->file->try_semi_consistent_read(0); + + if (!transactional_table && updated > 0) + thd->transaction->stmt.modified_non_trans_table= TRUE; + + end_read_record(&info); + delete select; + select= NULL; + THD_STAGE_INFO(thd, stage_end); + if (table_list->has_period()) + table->file->ha_release_auto_increment(); + (void) table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + + /* + Invalidate the table in the query cache if something changed. + This must be before binlog writing and ha_autocommit_... + */ + if (updated) + { + query_cache_invalidate3(thd, table_list, 1); + } + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + + /* + error < 0 means really no error at all: we processed all rows until the + last one without error. error > 0 means an error (e.g. unique key + violation and no IGNORE or REPLACE). error == 0 is also an error (if + preparing the record or invoking before triggers fails). See + ha_autocommit_or_rollback(error>=0) and DBUG_RETURN(error>=0) below. + Sometimes we want to binlog even if we updated no rows, in case user used + it to be sure master and slave are in same state. + */ + if (likely(error < 0) || thd->transaction->stmt.modified_non_trans_table || + thd->log_current_statement()) + { + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + int errcode= 0; + if (likely(error < 0)) + thd->clear_error(); + else + errcode= query_error_code(thd, killed_status == NOT_KILLED); + + StatementBinlog stmt_binlog(thd, table->versioned(VERS_TRX_ID) || + thd->binlog_need_stmt_format(transactional_table)); + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query(), thd->query_length(), + transactional_table, FALSE, FALSE, errcode) > 0) + { + error=1; // Rollback update + } + } + } + DBUG_ASSERT(transactional_table || !updated || thd->transaction->stmt.modified_non_trans_table); + free_underlaid_joins(thd, select_lex); + delete file_sort; + if (table->file->pushed_cond) + { + table->file->pushed_cond= 0; + table->file->cond_pop(); + } + + /* If LAST_INSERT_ID(X) was used, report X */ + id= thd->arg_of_last_insert_id_function ? + thd->first_successful_insert_id_in_prev_stmt : 0; + + if (likely(error < 0) && likely(!thd->lex->analyze_stmt)) + { + char buff[MYSQL_ERRMSG_SIZE]; + if (!table->versioned(VERS_TIMESTAMP) && !table_list->has_period()) + my_snprintf(buff, sizeof(buff), ER_THD(thd, ER_UPDATE_INFO), (ulong) found, + (ulong) updated, + (ulong) thd->get_stmt_da()->current_statement_warn_count()); + else + my_snprintf(buff, sizeof(buff), + ER_THD(thd, ER_UPDATE_INFO_WITH_SYSTEM_VERSIONING), + (ulong) found, (ulong) updated, (ulong) rows_inserted, + (ulong) thd->get_stmt_da()->current_statement_warn_count()); + my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated, + id, buff); + DBUG_PRINT("info",("%ld records updated", (long) updated)); + } + thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */ + thd->abort_on_warning= 0; + if (thd->lex->current_select->first_cond_optimization) + { + thd->lex->current_select->save_leaf_tables(thd); + thd->lex->current_select->first_cond_optimization= 0; + } + *found_return= found; + *updated_return= updated; + + if (unlikely(thd->lex->analyze_stmt)) + goto emit_explain_and_leave; + + DBUG_RETURN((error >= 0 || thd->is_error()) ? 1 : 0); + +err: + delete select; + delete file_sort; + free_underlaid_joins(thd, select_lex); + table->file->ha_end_keyread(); + if (table->file->pushed_cond) + table->file->cond_pop(); + thd->abort_on_warning= 0; + DBUG_RETURN(1); + +produce_explain_and_leave: + /* + We come here for various "degenerate" query plans: impossible WHERE, + no-partitions-used, impossible-range, etc. + */ + if (unlikely(!query_plan.save_explain_update_data(thd, query_plan.mem_root))) + goto err; + +emit_explain_and_leave: + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + int err2= thd->lex->explain->send_explain(thd, extended); + + delete select; + free_underlaid_joins(thd, select_lex); + DBUG_RETURN((err2 || thd->is_error()) ? 1 : 0); +} + +/* + Prepare items in UPDATE statement + + SYNOPSIS + mysql_prepare_update() + thd - thread handler + table_list - global/local table list + conds - conditions + order_num - number of ORDER BY list entries + order - ORDER BY clause list + + RETURN VALUE + FALSE OK + TRUE error +*/ +bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list, + Item **conds, uint order_num, ORDER *order) +{ + Item *fake_conds= 0; +#ifndef NO_EMBEDDED_ACCESS_CHECKS + TABLE *table= table_list->table; +#endif + List all_fields; + SELECT_LEX *select_lex= thd->lex->first_select_lex(); + DBUG_ENTER("mysql_prepare_update"); + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + table_list->grant.want_privilege= table->grant.want_privilege= + (SELECT_ACL & ~table->grant.privilege); + table_list->register_want_access(SELECT_ACL); +#endif + + thd->lex->allow_sum_func.clear_all(); + + if (table_list->has_period() && + select_lex->period_setup_conds(thd, table_list)) + DBUG_RETURN(true); + + DBUG_ASSERT(table_list->table); + // conds could be cached from previous SP call + DBUG_ASSERT(!table_list->vers_conditions.need_setup() || + !*conds || thd->stmt_arena->is_stmt_execute()); + if (select_lex->vers_setup_conds(thd, table_list)) + DBUG_RETURN(TRUE); + + *conds= select_lex->where; + + /* + We do not call DT_MERGE_FOR_INSERT because it has no sense for simple + (not multi-) update + */ + if (mysql_handle_derived(thd->lex, DT_PREPARE)) + DBUG_RETURN(TRUE); + + if (setup_tables_and_check_access(thd, &select_lex->context, + &select_lex->top_join_list, table_list, + select_lex->leaf_tables, + FALSE, UPDATE_ACL, SELECT_ACL, TRUE) || + setup_conds(thd, table_list, select_lex->leaf_tables, conds) || + select_lex->setup_ref_array(thd, order_num) || + setup_order(thd, select_lex->ref_pointer_array, + table_list, all_fields, all_fields, order) || + setup_ftfuncs(select_lex)) + DBUG_RETURN(TRUE); + + + select_lex->fix_prepare_information(thd, conds, &fake_conds); + if (!thd->lex->upd_del_where) + thd->lex->upd_del_where= *conds; + DBUG_RETURN(FALSE); +} + +/** + Check that we are not using table that we are updating in a sub select + + @param thd Thread handle + @param table_list List of table with first to check + + @retval TRUE Error + @retval FALSE OK +*/ +bool check_unique_table(THD *thd, TABLE_LIST *table_list) +{ + TABLE_LIST *duplicate; + DBUG_ENTER("check_unique_table"); + if ((duplicate= unique_table(thd, table_list, table_list->next_global, 0))) + { + update_non_unique_table_error(table_list, "UPDATE", duplicate); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + +/*************************************************************************** + Update multiple tables from join +***************************************************************************/ + +/* + Get table map for list of Item_field +*/ + +static table_map get_table_map(List *items) +{ + List_iterator_fast item_it(*items); + Item_field *item; + table_map map= 0; + + while ((item= (Item_field *) item_it++)) + map|= item->all_used_tables(); + DBUG_PRINT("info", ("table_map: 0x%08lx", (long) map)); + return map; +} + +/** + If one row is updated through two different aliases and the first + update physically moves the row, the second update will error + because the row is no longer located where expected. This function + checks if the multiple-table update is about to do that and if so + returns with an error. + + The following update operations physically moves rows: + 1) Update of a column in a clustered primary key + 2) Update of a column used to calculate which partition the row belongs to + + This function returns with an error if both of the following are + true: + + a) A table in the multiple-table update statement is updated + through multiple aliases (including views) + b) At least one of the updates on the table from a) may physically + moves the row. Note: Updating a column used to calculate which + partition a row belongs to does not necessarily mean that the + row is moved. The new value may or may not belong to the same + partition. + + @param leaves First leaf table + @param tables_for_update Map of tables that are updated + + @return + true if the update is unsafe, in which case an error message is also set, + false otherwise. +*/ +static +bool unsafe_key_update(List leaves, table_map tables_for_update) +{ + List_iterator_fast it(leaves), it2(leaves); + TABLE_LIST *tl, *tl2; + + while ((tl= it++)) + { + if (!tl->is_jtbm() && (tl->table->map & tables_for_update)) + { + TABLE *table1= tl->table; + bool primkey_clustered= (table1->file-> + pk_is_clustering_key(table1->s->primary_key)); + + bool table_partitioned= false; +#ifdef WITH_PARTITION_STORAGE_ENGINE + table_partitioned= (table1->part_info != NULL); +#endif + + if (!table_partitioned && !primkey_clustered) + continue; + + it2.rewind(); + while ((tl2= it2++)) + { + if (tl2->is_jtbm()) + continue; + /* + Look at "next" tables only since all previous tables have + already been checked + */ + TABLE *table2= tl2->table; + if (tl2 != tl && + table2->map & tables_for_update && table1->s == table2->s) + { + // A table is updated through two aliases + if (table_partitioned && + (partition_key_modified(table1, table1->write_set) || + partition_key_modified(table2, table2->write_set))) + { + // Partitioned key is updated + my_error(ER_MULTI_UPDATE_KEY_CONFLICT, MYF(0), + tl->top_table()->alias.str, + tl2->top_table()->alias.str); + return true; + } + + if (primkey_clustered) + { + // The primary key can cover multiple columns + KEY key_info= table1->key_info[table1->s->primary_key]; + KEY_PART_INFO *key_part= key_info.key_part; + KEY_PART_INFO *key_part_end= key_part + key_info.user_defined_key_parts; + + for (;key_part != key_part_end; ++key_part) + { + if (bitmap_is_set(table1->write_set, key_part->fieldnr-1) || + bitmap_is_set(table2->write_set, key_part->fieldnr-1)) + { + // Clustered primary key is updated + my_error(ER_MULTI_UPDATE_KEY_CONFLICT, MYF(0), + tl->top_table()->alias.str, + tl2->top_table()->alias.str); + return true; + } + } + } + } + } + } + } + return false; +} + +/** + Check if there is enough privilege on specific table used by the + main select list of multi-update directly or indirectly (through + a view). + + @param[in] thd Thread context. + @param[in] table Table list element for the table. + @param[in] tables_for_update Bitmap with tables being updated. + @param[in/out] updated_arg Set to true if table in question is + updated, also set to true if it is + a view and one of its underlying + tables is updated. Should be + initialized to false by the caller + before a sequence of calls to this + function. + + @note To determine which tables/views are updated we have to go from + leaves to root since tables_for_update contains map of leaf + tables being updated and doesn't include non-leaf tables + (fields are already resolved to leaf tables). + + @retval false - Success, all necessary privileges on all tables are + present or might be present on column-level. + @retval true - Failure, some necessary privilege on some table is + missing. +*/ + +static bool multi_update_check_table_access(THD *thd, TABLE_LIST *table, + table_map tables_for_update, + bool *updated_arg) +{ + if (table->view) + { + bool updated= false; + /* + If it is a mergeable view then we need to check privileges on its + underlying tables being merged (including views). We also need to + check if any of them is updated in order to find if this view is + updated. + If it is a non-mergeable view then it can't be updated. + */ + DBUG_ASSERT(table->merge_underlying_list || + (!table->updatable && + !(table->table->map & tables_for_update))); + + for (TABLE_LIST *tbl= table->merge_underlying_list; tbl; + tbl= tbl->next_local) + { + if (multi_update_check_table_access(thd, tbl, tables_for_update, + &updated)) + { + tbl->hide_view_error(thd); + return true; + } + } + if (check_table_access(thd, updated ? UPDATE_ACL: SELECT_ACL, table, + FALSE, 1, FALSE)) + return true; + *updated_arg|= updated; + /* We only need SELECT privilege for columns in the values list. */ + table->grant.want_privilege= SELECT_ACL & ~table->grant.privilege; + } + else + { + /* Must be a base or derived table. */ + const bool updated= table->table->map & tables_for_update; + if (check_table_access(thd, updated ? UPDATE_ACL : SELECT_ACL, table, + FALSE, 1, FALSE)) + return true; + *updated_arg|= updated; + /* We only need SELECT privilege for columns in the values list. */ + if (!table->derived) + { + table->grant.want_privilege= SELECT_ACL & ~table->grant.privilege; + table->table->grant.want_privilege= (SELECT_ACL & + ~table->table->grant.privilege); + } + } + return false; +} + + +class Multiupdate_prelocking_strategy : public DML_prelocking_strategy +{ + bool done; + bool has_prelocking_list; +public: + void reset(THD *thd); + bool handle_end(THD *thd); +}; + +void Multiupdate_prelocking_strategy::reset(THD *thd) +{ + done= false; + has_prelocking_list= thd->lex->requires_prelocking(); +} + +/** + Determine what tables could be updated in the multi-update + + For these tables we'll need to open triggers and continue prelocking + until all is open. +*/ +bool Multiupdate_prelocking_strategy::handle_end(THD *thd) +{ + DBUG_ENTER("Multiupdate_prelocking_strategy::handle_end"); + if (done) + DBUG_RETURN(0); + + LEX *lex= thd->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + TABLE_LIST *table_list= lex->query_tables, *tl; + + done= true; + + if (mysql_handle_derived(lex, DT_INIT) || + mysql_handle_derived(lex, DT_MERGE_FOR_INSERT) || + mysql_handle_derived(lex, DT_PREPARE)) + DBUG_RETURN(1); + + /* + setup_tables() need for VIEWs. JOIN::prepare() will call setup_tables() + second time, but this call will do nothing (there are check for second + call in setup_tables()). + */ + + if (setup_tables_and_check_access(thd, &select_lex->context, + &select_lex->top_join_list, table_list, select_lex->leaf_tables, + FALSE, UPDATE_ACL, SELECT_ACL, TRUE)) + DBUG_RETURN(1); + + List *fields= &lex->first_select_lex()->item_list; + if (setup_fields_with_no_wrap(thd, Ref_ptr_array(), + *fields, MARK_COLUMNS_WRITE, 0, 0)) + DBUG_RETURN(1); + + // Check if we have a view in the list ... + for (tl= table_list; tl ; tl= tl->next_local) + if (tl->view) + break; + // ... and pass this knowlage in check_fields call + if (check_fields(thd, table_list, *fields, tl != NULL )) + DBUG_RETURN(1); + + table_map tables_for_update= thd->table_map_for_update= get_table_map(fields); + + if (unsafe_key_update(select_lex->leaf_tables, tables_for_update)) + DBUG_RETURN(1); + + /* + Setup timestamp handling and locking mode + */ + List_iterator ti(lex->first_select_lex()->leaf_tables); + const bool using_lock_tables= thd->locked_tables_mode != LTM_NONE; + while ((tl= ti++)) + { + TABLE *table= tl->table; + + if (tl->is_jtbm()) + continue; + + /* if table will be updated then check that it is unique */ + if (table->map & tables_for_update) + { + if (!tl->single_table_updatable() || check_key_in_view(thd, tl)) + { + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), + tl->top_table()->alias.str, "UPDATE"); + DBUG_RETURN(1); + } + + DBUG_PRINT("info",("setting table `%s` for update", + tl->top_table()->alias.str)); + /* + If table will be updated we should not downgrade lock for it and + leave it as is. + */ + tl->updating= 1; + if (tl->belong_to_view) + tl->belong_to_view->updating= 1; + if (extend_table_list(thd, tl, this, has_prelocking_list)) + DBUG_RETURN(1); + } + else + { + DBUG_PRINT("info",("setting table `%s` for read-only", tl->alias.str)); + /* + If we are using the binary log, we need TL_READ_NO_INSERT to get + correct order of statements. Otherwise, we use a TL_READ lock to + improve performance. + We don't downgrade metadata lock from SW to SR in this case as + there is no guarantee that the same ticket is not used by + another table instance used by this statement which is going to + be write-locked (for example, trigger to be invoked might try + to update this table). + Last argument routine_modifies_data for read_lock_type_for_table() + is ignored, as prelocking placeholder will never be set here. + */ + DBUG_ASSERT(tl->prelocking_placeholder == false); + thr_lock_type lock_type= read_lock_type_for_table(thd, lex, tl, true); + if (using_lock_tables) + tl->lock_type= lock_type; + else + tl->set_lock_type(thd, lock_type); + } + } + + /* + Check access privileges for tables being updated or read. + Note that unlike in the above loop we need to iterate here not only + through all leaf tables but also through all view hierarchy. + */ + + for (tl= table_list; tl; tl= tl->next_local) + { + bool not_used= false; + if (tl->is_jtbm()) + continue; + if (multi_update_check_table_access(thd, tl, tables_for_update, ¬_used)) + DBUG_RETURN(TRUE); + } + + /* check single table update for view compound from several tables */ + for (tl= table_list; tl; tl= tl->next_local) + { + TABLE_LIST *for_update= 0; + if (tl->is_jtbm()) + continue; + if (tl->is_merged_derived() && + tl->check_single_table(&for_update, tables_for_update, tl)) + { + my_error(ER_VIEW_MULTIUPDATE, MYF(0), tl->view_db.str, tl->view_name.str); + DBUG_RETURN(1); + } + } + + DBUG_RETURN(0); +} + +/* + make update specific preparation and checks after opening tables + + SYNOPSIS + mysql_multi_update_prepare() + thd thread handler + + RETURN + FALSE OK + TRUE Error +*/ + +int mysql_multi_update_prepare(THD *thd) +{ + LEX *lex= thd->lex; + TABLE_LIST *table_list= lex->query_tables; + TABLE_LIST *tl; + Multiupdate_prelocking_strategy prelocking_strategy; + uint table_count= lex->table_count_update; + DBUG_ENTER("mysql_multi_update_prepare"); + + /* + Open tables and create derived ones, but do not lock and fill them yet. + + During prepare phase acquire only S metadata locks instead of SW locks to + keep prepare of multi-UPDATE compatible with concurrent LOCK TABLES WRITE + and global read lock. + + Don't evaluate any subqueries even if constant, because + tables aren't locked yet. + */ + lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED; + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) + { + if (open_tables(thd, &table_list, &table_count, + thd->stmt_arena->is_stmt_prepare() ? MYSQL_OPEN_FORCE_SHARED_MDL : 0, + &prelocking_strategy)) + DBUG_RETURN(TRUE); + } + else + { + /* following need for prepared statements, to run next time multi-update */ + thd->lex->sql_command= SQLCOM_UPDATE_MULTI; + prelocking_strategy.reset(thd); + if (prelocking_strategy.handle_end(thd)) + DBUG_RETURN(TRUE); + } + + /* now lock and fill tables */ + if (!thd->stmt_arena->is_stmt_prepare() && + lock_tables(thd, table_list, table_count, 0)) + DBUG_RETURN(TRUE); + + lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED; + + (void) read_statistics_for_tables_if_needed(thd, table_list); + /* @todo: downgrade the metadata locks here. */ + + /* + Check that we are not using table that we are updating, but we should + skip all tables of UPDATE SELECT itself + */ + lex->first_select_lex()->exclude_from_table_unique_test= TRUE; + /* We only need SELECT privilege for columns in the values list */ + List_iterator ti(lex->first_select_lex()->leaf_tables); + while ((tl= ti++)) + { + if (tl->is_jtbm()) + continue; + TABLE *table= tl->table; + TABLE_LIST *tlist; + if (!(tlist= tl->top_table())->derived) + { + tlist->grant.want_privilege= + (SELECT_ACL & ~tlist->grant.privilege); + table->grant.want_privilege= (SELECT_ACL & ~table->grant.privilege); + } + DBUG_PRINT("info", ("table: %s want_privilege: %llx", tl->alias.str, + (longlong) table->grant.want_privilege)); + } + /* + Set exclude_from_table_unique_test value back to FALSE. It is needed for + further check in multi_update::prepare whether to use record cache. + */ + lex->first_select_lex()->exclude_from_table_unique_test= FALSE; + + if (lex->save_prep_leaf_tables()) + DBUG_RETURN(TRUE); + + DBUG_RETURN (FALSE); +} + + +/* + Setup multi-update handling and call SELECT to do the join +*/ + +bool mysql_multi_update(THD *thd, TABLE_LIST *table_list, List *fields, + List *values, COND *conds, ulonglong options, + enum enum_duplicates handle_duplicates, + bool ignore, SELECT_LEX_UNIT *unit, + SELECT_LEX *select_lex, multi_update **result) +{ + bool res; + DBUG_ENTER("mysql_multi_update"); + + if (!(*result= new (thd->mem_root) multi_update(thd, table_list, + &thd->lex->first_select_lex()->leaf_tables, + fields, values, handle_duplicates, ignore))) + { + DBUG_RETURN(TRUE); + } + + if ((*result)->init(thd)) + DBUG_RETURN(1); + + thd->abort_on_warning= !ignore && thd->is_strict_mode(); + List total_list; + + if (setup_tables(thd, &select_lex->context, &select_lex->top_join_list, + table_list, select_lex->leaf_tables, FALSE, FALSE)) + DBUG_RETURN(1); + + if (select_lex->vers_setup_conds(thd, table_list)) + DBUG_RETURN(1); + + res= mysql_select(thd, + table_list, total_list, conds, + select_lex->order_list.elements, + select_lex->order_list.first, NULL, NULL, NULL, + options | SELECT_NO_JOIN_CACHE | SELECT_NO_UNLOCK | + OPTION_SETUP_TABLES_DONE, + *result, unit, select_lex); + + DBUG_PRINT("info",("res: %d report_error: %d", res, (int) thd->is_error())); + res|= thd->is_error(); + if (unlikely(res)) + (*result)->abort_result_set(); + else + { + if (thd->lex->describe || thd->lex->analyze_stmt) + { + bool extended= thd->lex->describe & DESCRIBE_EXTENDED; + res= thd->lex->explain->send_explain(thd, extended); + } + } + thd->abort_on_warning= 0; + DBUG_RETURN(res); +} + + +multi_update::multi_update(THD *thd_arg, TABLE_LIST *table_list, + List *leaves_list, + List *field_list, List *value_list, + enum enum_duplicates handle_duplicates_arg, + bool ignore_arg): + select_result_interceptor(thd_arg), + all_tables(table_list), leaves(leaves_list), update_tables(0), + tmp_tables(0), updated(0), found(0), fields(field_list), + values(value_list), table_count(0), copy_field(0), + handle_duplicates(handle_duplicates_arg), do_update(1), trans_safe(1), + transactional_tables(0), ignore(ignore_arg), error_handled(0), prepared(0), + updated_sys_ver(0) +{ +} + + +bool multi_update::init(THD *thd) +{ + table_map tables_to_update= get_table_map(fields); + List_iterator_fast li(*leaves); + TABLE_LIST *tbl; + while ((tbl =li++)) + { + if (tbl->is_jtbm()) + continue; + if (!(tbl->table->map & tables_to_update)) + continue; + if (updated_leaves.push_back(tbl, thd->mem_root)) + return true; + } + return false; +} + + +/* + Connect fields with tables and create list of tables that are updated +*/ + +int multi_update::prepare(List ¬_used_values, + SELECT_LEX_UNIT *lex_unit) + +{ + TABLE_LIST *table_ref; + SQL_I_List update; + table_map tables_to_update; + Item_field *item; + List_iterator_fast field_it(*fields); + List_iterator_fast value_it(*values); + uint i, max_fields; + uint leaf_table_count= 0; + List_iterator ti(updated_leaves); + DBUG_ENTER("multi_update::prepare"); + + if (prepared) + DBUG_RETURN(0); + prepared= true; + + thd->count_cuted_fields= CHECK_FIELD_WARN; + thd->cuted_fields=0L; + THD_STAGE_INFO(thd, stage_updating_main_table); + + tables_to_update= get_table_map(fields); + + if (!tables_to_update) + { + my_message(ER_NO_TABLES_USED, ER_THD(thd, ER_NO_TABLES_USED), MYF(0)); + DBUG_RETURN(1); + } + + /* + We gather the set of columns read during evaluation of SET expression in + TABLE::tmp_set by pointing TABLE::read_set to it and then restore it after + setup_fields(). + */ + while ((table_ref= ti++)) + { + if (table_ref->is_jtbm()) + continue; + + TABLE *table= table_ref->table; + if (tables_to_update & table->map) + { + DBUG_ASSERT(table->read_set == &table->def_read_set); + table->read_set= &table->tmp_set; + bitmap_clear_all(table->read_set); + } + } + + /* + We have to check values after setup_tables to get covering_keys right in + reference tables + */ + + int error= setup_fields(thd, Ref_ptr_array(), + *values, MARK_COLUMNS_READ, 0, NULL, 0) || + TABLE::check_assignability_explicit_fields(*fields, *values, + ignore); + + ti.rewind(); + while ((table_ref= ti++)) + { + if (table_ref->is_jtbm()) + continue; + + TABLE *table= table_ref->table; + if (tables_to_update & table->map) + { + table->read_set= &table->def_read_set; + bitmap_union(table->read_set, &table->tmp_set); + table->file->prepare_for_insert(1); + } + } + if (unlikely(error)) + DBUG_RETURN(1); + + /* + Save tables being updated in update_tables + update_table->shared is position for table + Don't use key read on tables that are updated + */ + + update.empty(); + ti.rewind(); + while ((table_ref= ti++)) + { + /* TODO: add support of view of join support */ + if (table_ref->is_jtbm()) + continue; + TABLE *table=table_ref->table; + leaf_table_count++; + if (tables_to_update & table->map) + { + TABLE_LIST *tl= (TABLE_LIST*) thd->memdup(table_ref, + sizeof(*tl)); + if (!tl) + DBUG_RETURN(1); + update.link_in_list(tl, &tl->next_local); + table_ref->shared= tl->shared= table_count++; + table->no_keyread=1; + table->covering_keys.clear_all(); + table->prepare_triggers_for_update_stmt_or_event(); + table->reset_default_fields(); + } + } + + table_count= update.elements; + update_tables= update.first; + + tmp_tables = (TABLE**) thd->calloc(sizeof(TABLE *) * table_count); + tmp_table_param = (TMP_TABLE_PARAM*) thd->calloc(sizeof(TMP_TABLE_PARAM) * + table_count); + fields_for_table= (List_item **) thd->alloc(sizeof(List_item *) * + table_count); + values_for_table= (List_item **) thd->alloc(sizeof(List_item *) * + table_count); + if (unlikely(thd->is_fatal_error)) + DBUG_RETURN(1); + for (i=0 ; i < table_count ; i++) + { + fields_for_table[i]= new List_item; + values_for_table[i]= new List_item; + } + if (unlikely(thd->is_fatal_error)) + DBUG_RETURN(1); + + /* Split fields into fields_for_table[] and values_by_table[] */ + + while ((item= (Item_field *) field_it++)) + { + Item *value= value_it++; + uint offset= item->field->table->pos_in_table_list->shared; + fields_for_table[offset]->push_back(item, thd->mem_root); + values_for_table[offset]->push_back(value, thd->mem_root); + } + if (unlikely(thd->is_fatal_error)) + DBUG_RETURN(1); + + /* Allocate copy fields */ + max_fields=0; + for (i=0 ; i < table_count ; i++) + { + set_if_bigger(max_fields, fields_for_table[i]->elements + leaf_table_count); + if (fields_for_table[i]->elements) + { + TABLE *table= ((Item_field*)(fields_for_table[i]->head()))->field->table; + switch_to_nullable_trigger_fields(*fields_for_table[i], table); + switch_to_nullable_trigger_fields(*values_for_table[i], table); + } + } + copy_field= new (thd->mem_root) Copy_field[max_fields]; + DBUG_RETURN(thd->is_fatal_error != 0); +} + +void multi_update::update_used_tables() +{ + Item *item; + List_iterator_fast it(*values); + while ((item= it++)) + { + item->update_used_tables(); + } +} + +void multi_update::prepare_to_read_rows() +{ + /* + update column maps now. it cannot be done in ::prepare() before the + optimizer, because the optimize might reset them (in + SELECT_LEX::update_used_tables()), it cannot be done in + ::initialize_tables() after the optimizer, because the optimizer + might read rows from const tables + */ + + for (TABLE_LIST *tl= update_tables; tl; tl= tl->next_local) + tl->table->mark_columns_needed_for_update(); +} + + +/* + Check if table is safe to update on fly + + SYNOPSIS + safe_update_on_fly() + thd Thread handler + join_tab How table is used in join + all_tables List of tables + + NOTES + We can update the first table in join on the fly if we know that + a row in this table will never be read twice. This is true under + the following conditions: + + - No column is both written to and read in SET expressions. + + - We are doing a table scan and the data is in a separate file (MyISAM) or + if we don't update a clustered key. + + - We are doing a range scan and we don't update the scan key or + the primary key for a clustered table handler. + + - Table is not joined to itself. + + This function gets information about fields to be updated from + the TABLE::write_set bitmap. + + WARNING + This code is a bit dependent of how make_join_readinfo() works. + + The field table->tmp_set is used for keeping track of which fields are + read during evaluation of the SET expression. See multi_update::prepare. + + RETURN + 0 Not safe to update + 1 Safe to update +*/ + +static bool safe_update_on_fly(THD *thd, JOIN_TAB *join_tab, + TABLE_LIST *table_ref, TABLE_LIST *all_tables) +{ + TABLE *table= join_tab->table; + if (unique_table(thd, table_ref, all_tables, 0)) + return 0; + if (join_tab->join->order) // FIXME this is probably too strong + return 0; + switch (join_tab->type) { + case JT_SYSTEM: + case JT_CONST: + case JT_EQ_REF: + return TRUE; // At most one matching row + case JT_REF: + case JT_REF_OR_NULL: + return !is_key_used(table, join_tab->ref.key, table->write_set); + case JT_ALL: + if (bitmap_is_overlapping(&table->tmp_set, table->write_set)) + return FALSE; + /* If range search on index */ + if (join_tab->quick) + return !join_tab->quick->is_keys_used(table->write_set); + /* If scanning in clustered key */ + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + table->s->primary_key < MAX_KEY) + return !is_key_used(table, table->s->primary_key, table->write_set); + return TRUE; + default: + break; // Avoid compiler warning + } + return FALSE; + +} + + +/* + Initialize table for multi table + + IMPLEMENTATION + - Update first table in join on the fly, if possible + - Create temporary tables to store changed values for all other tables + that are updated (and main_table if the above doesn't hold). +*/ + +bool +multi_update::initialize_tables(JOIN *join) +{ + TABLE_LIST *table_ref; + DBUG_ENTER("initialize_tables"); + + if (unlikely((thd->variables.option_bits & OPTION_SAFE_UPDATES) && + error_if_full_join(join))) + DBUG_RETURN(1); + if (join->implicit_grouping) + { + my_error(ER_INVALID_GROUP_FUNC_USE, MYF(0)); + DBUG_RETURN(1); + } + main_table=join->join_tab->table; + table_to_update= 0; + + /* Any update has at least one pair (field, value) */ + DBUG_ASSERT(fields->elements); + /* + Only one table may be modified by UPDATE of an updatable view. + For an updatable view first_table_for_update indicates this + table. + For a regular multi-update it refers to some updated table. + */ + TABLE *first_table_for_update= ((Item_field *) fields->head())->field->table; + + /* Create a temporary table for keys to all tables, except main table */ + for (table_ref= update_tables; table_ref; table_ref= table_ref->next_local) + { + TABLE *table=table_ref->table; + uint cnt= table_ref->shared; + List temp_fields; + ORDER group; + TMP_TABLE_PARAM *tmp_param; + + if (ignore) + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + if (table == main_table) // First table in join + { + if (safe_update_on_fly(thd, join->join_tab, table_ref, all_tables)) + { + table_to_update= table; // Update table on the fly + has_vers_fields= table->vers_check_update(*fields); + continue; + } + } + table->prepare_for_position(); + join->map2table[table->tablenr]->keep_current_rowid= true; + + /* + enable uncacheable flag if we update a view with check option + and check option has a subselect, otherwise, the check option + can be evaluated after the subselect was freed as independent + (See full_local in JOIN::join_free()). + */ + if (table_ref->check_option && !join->select_lex->uncacheable) + { + SELECT_LEX_UNIT *tmp_unit; + SELECT_LEX *sl; + for (tmp_unit= join->select_lex->first_inner_unit(); + tmp_unit; + tmp_unit= tmp_unit->next_unit()) + { + for (sl= tmp_unit->first_select(); sl; sl= sl->next_select()) + { + if (sl->master_unit()->item) + { + join->select_lex->uncacheable|= UNCACHEABLE_CHECKOPTION; + goto loop_end; + } + } + } + } +loop_end: + + if (table == first_table_for_update && table_ref->check_option) + { + table_map unupdated_tables= table_ref->check_option->used_tables() & + ~first_table_for_update->map; + List_iterator ti(*leaves); + TABLE_LIST *tbl_ref; + while ((tbl_ref= ti++) && unupdated_tables) + { + if (unupdated_tables & tbl_ref->table->map) + unupdated_tables&= ~tbl_ref->table->map; + else + continue; + if (unupdated_check_opt_tables.push_back(tbl_ref->table)) + DBUG_RETURN(1); + } + } + + tmp_param= tmp_table_param+cnt; + + /* + Create a temporary table to store all fields that are changed for this + table. The first field in the temporary table is a pointer to the + original row so that we can find and update it. For the updatable + VIEW a few following fields are rowids of tables used in the CHECK + OPTION condition. + */ + + List_iterator_fast
tbl_it(unupdated_check_opt_tables); + TABLE *tbl= table; + do + { + LEX_CSTRING field_name; + field_name.str= tbl->alias.c_ptr(); + field_name.length= strlen(field_name.str); + /* + Signal each table (including tables referenced by WITH CHECK OPTION + clause) for which we will store row position in the temporary table + that we need a position to be read first. + */ + tbl->prepare_for_position(); + join->map2table[tbl->tablenr]->keep_current_rowid= true; + + Item_temptable_rowid *item= + new (thd->mem_root) Item_temptable_rowid(tbl); + if (!item) + DBUG_RETURN(1); + item->fix_fields(thd, 0); + if (temp_fields.push_back(item, thd->mem_root)) + DBUG_RETURN(1); + } while ((tbl= tbl_it++)); + + temp_fields.append(fields_for_table[cnt]); + + /* Make an unique key over the first field to avoid duplicated updates */ + bzero((char*) &group, sizeof(group)); + group.direction= ORDER::ORDER_ASC; + group.item= (Item**) temp_fields.head_ref(); + + tmp_param->quick_group= 1; + tmp_param->field_count= temp_fields.elements; + tmp_param->func_count= temp_fields.elements - 1; + calc_group_buffer(tmp_param, &group); + /* small table, ignore @@big_tables */ + my_bool save_big_tables= thd->variables.big_tables; + thd->variables.big_tables= FALSE; + tmp_tables[cnt]=create_tmp_table(thd, tmp_param, temp_fields, + (ORDER*) &group, 0, 0, + TMP_TABLE_ALL_COLUMNS, HA_POS_ERROR, &empty_clex_str); + thd->variables.big_tables= save_big_tables; + if (!tmp_tables[cnt]) + DBUG_RETURN(1); + tmp_tables[cnt]->file->extra(HA_EXTRA_WRITE_CACHE); + } + join->tmp_table_keep_current_rowid= TRUE; + DBUG_RETURN(0); +} + + +static TABLE *item_rowid_table(Item *item) +{ + if (item->type() != Item::FUNC_ITEM) + return NULL; + Item_func *func= (Item_func *)item; + if (func->functype() != Item_func::TEMPTABLE_ROWID) + return NULL; + Item_temptable_rowid *itr= (Item_temptable_rowid *)func; + return itr->table; +} + + +/* + multi_update stores a rowid and new field values for every updated row in a + temporary table (one temporary table per updated table). These rowids are + obtained via Item_temptable_rowid's by calling handler::position(). But if + the join is resolved via a temp table, rowids cannot be obtained from + handler::position() in the multi_update::send_data(). So, they're stored in + the join's temp table (JOIN::add_fields_for_current_rowid()) and here we + replace Item_temptable_rowid's (that would've done handler::position()) with + Item_field's (that will simply take the corresponding field value from the + temp table). +*/ +int multi_update::prepare2(JOIN *join) +{ + if (!join->need_tmp || !join->tmp_table_keep_current_rowid) + return 0; + + // there cannot be many tmp tables in multi-update + JOIN_TAB *tmptab= join->join_tab + join->exec_join_tab_cnt(); + + for (Item **it= tmptab->tmp_table_param->items_to_copy; *it ; it++) + { + TABLE *tbl= item_rowid_table(*it); + if (!tbl) + continue; + for (uint i= 0; i < table_count; i++) + { + for (Item **it2= tmp_table_param[i].items_to_copy; *it2; it2++) + { + if (item_rowid_table(*it2) != tbl) + continue; + Item_field *fld= new (thd->mem_root) + Item_field(thd, (*it)->get_tmp_table_field()); + if (!fld) + return 1; + fld->result_field= (*it2)->get_tmp_table_field(); + *it2= fld; + } + } + } + return 0; +} + + +multi_update::~multi_update() +{ + TABLE_LIST *table; + for (table= update_tables ; table; table= table->next_local) + { + table->table->no_keyread= 0; + if (ignore) + table->table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); + } + + if (tmp_tables) + { + for (uint cnt = 0; cnt < table_count; cnt++) + { + if (tmp_tables[cnt]) + { + free_tmp_table(thd, tmp_tables[cnt]); + tmp_table_param[cnt].cleanup(); + } + } + } + if (copy_field) + delete [] copy_field; + thd->count_cuted_fields= CHECK_FIELD_IGNORE; // Restore this setting + DBUG_ASSERT(trans_safe || !updated || + thd->transaction->all.modified_non_trans_table); +} + + +int multi_update::send_data(List ¬_used_values) +{ + TABLE_LIST *cur_table; + DBUG_ENTER("multi_update::send_data"); + + for (cur_table= update_tables; cur_table; cur_table= cur_table->next_local) + { + int error= 0; + TABLE *table= cur_table->table; + uint offset= cur_table->shared; + /* + Check if we are using outer join and we didn't find the row + or if we have already updated this row in the previous call to this + function. + + The same row may be presented here several times in a join of type + UPDATE t1 FROM t1,t2 SET t1.a=t2.a + + In this case we will do the update for the first found row combination. + The join algorithm guarantees that we will not find the a row in + t1 several times. + */ + if (table->status & (STATUS_NULL_ROW | STATUS_UPDATED)) + continue; + + if (table == table_to_update) + { + /* + We can use compare_record() to optimize away updates if + the table handler is returning all columns OR if + if all updated columns are read + */ + bool can_compare_record; + can_compare_record= records_are_comparable(table); + + table->status|= STATUS_UPDATED; + store_record(table,record[1]); + + if (fill_record_n_invoke_before_triggers(thd, table, + *fields_for_table[offset], + *values_for_table[offset], 0, + TRG_EVENT_UPDATE)) + DBUG_RETURN(1); + /* + Reset the table->auto_increment_field_not_null as it is valid for + only one row. + */ + table->auto_increment_field_not_null= FALSE; + found++; + if (!can_compare_record || compare_record(table)) + { + + if ((error= cur_table->view_check_option(thd, ignore)) != + VIEW_CHECK_OK) + { + found--; + if (error == VIEW_CHECK_SKIP) + continue; + else if (unlikely(error == VIEW_CHECK_ERROR)) + DBUG_RETURN(1); + } + if (unlikely(!updated++)) + { + /* + Inform the main table that we are going to update the table even + while we may be scanning it. This will flush the read cache + if it's used. + */ + main_table->file->extra(HA_EXTRA_PREPARE_FOR_UPDATE); + } + if (unlikely((error=table->file->ha_update_row(table->record[1], + table->record[0]))) && + error != HA_ERR_RECORD_IS_THE_SAME) + { + updated--; + if (!ignore || + table->file->is_fatal_error(error, HA_CHECK_ALL)) + goto error; + } + else + { + if (unlikely(error == HA_ERR_RECORD_IS_THE_SAME)) + { + error= 0; + updated--; + } + else if (has_vers_fields && table->versioned(VERS_TRX_ID)) + { + updated_sys_ver++; + } + /* non-transactional or transactional table got modified */ + /* either multi_update class' flag is raised in its branch */ + if (table->file->has_transactions_and_rollback()) + transactional_tables= TRUE; + else + { + trans_safe= FALSE; + thd->transaction->stmt.modified_non_trans_table= TRUE; + } + } + } + if (has_vers_fields && table->versioned(VERS_TIMESTAMP)) + { + store_record(table, record[2]); + if (unlikely(error= vers_insert_history_row(table))) + { + restore_record(table, record[2]); + goto error; + } + restore_record(table, record[2]); + updated_sys_ver++; + } + if (table->triggers && + unlikely(table->triggers->process_triggers(thd, TRG_EVENT_UPDATE, + TRG_ACTION_AFTER, TRUE))) + DBUG_RETURN(1); + } + else + { + TABLE *tmp_table= tmp_tables[offset]; + if (copy_funcs(tmp_table_param[offset].items_to_copy, thd)) + DBUG_RETURN(1); + /* rowid field is NULL if join tmp table has null row from outer join */ + if (tmp_table->field[0]->is_null()) + continue; + /* Store regular updated fields in the row. */ + DBUG_ASSERT(1 + unupdated_check_opt_tables.elements == + tmp_table_param[offset].func_count); + fill_record(thd, tmp_table, + tmp_table->field + 1 + unupdated_check_opt_tables.elements, + *values_for_table[offset], TRUE, FALSE); + + /* Write row, ignoring duplicated updates to a row */ + error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]); + found++; + if (unlikely(error)) + { + found--; + if (error != HA_ERR_FOUND_DUPP_KEY && + error != HA_ERR_FOUND_DUPP_UNIQUE) + { + if (create_internal_tmp_table_from_heap(thd, tmp_table, + tmp_table_param[offset].start_recinfo, + &tmp_table_param[offset].recinfo, + error, 1, NULL)) + { + do_update= 0; + DBUG_RETURN(1); // Not a table_is_full error + } + found++; + } + } + } + continue; +error: + DBUG_ASSERT(error > 0); + /* + If (ignore && error == is ignorable) we don't have to + do anything; otherwise... + */ + myf flags= 0; + + if (table->file->is_fatal_error(error, HA_CHECK_ALL)) + flags|= ME_FATAL; /* Other handler errors are fatal */ + + prepare_record_for_error_message(error, table); + table->file->print_error(error,MYF(flags)); + DBUG_RETURN(1); + } // for (cur_table) + DBUG_RETURN(0); +} + + +void multi_update::abort_result_set() +{ + /* the error was handled or nothing deleted and no side effects return */ + if (unlikely(error_handled || + (!thd->transaction->stmt.modified_non_trans_table && !updated))) + return; + + /* Something already updated so we have to invalidate cache */ + if (updated) + query_cache_invalidate3(thd, update_tables, 1); + /* + If all tables that has been updated are trans safe then just do rollback. + If not attempt to do remaining updates. + */ + + if (! trans_safe) + { + DBUG_ASSERT(thd->transaction->stmt.modified_non_trans_table); + if (do_update && table_count > 1) + { + /* Add warning here */ + (void) do_updates(); + } + } + if (thd->transaction->stmt.modified_non_trans_table || + thd->log_current_statement()) + { + /* + The query has to binlog because there's a modified non-transactional table + either from the query's list or via a stored routine: bug#13270,23333 + */ + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + StatementBinlog stmt_binlog(thd, thd->binlog_need_stmt_format(transactional_tables)); + /* + THD::killed status might not have been set ON at time of an error + got caught and if happens later the killed error is written + into repl event. + */ + int errcode= query_error_code(thd, thd->killed == NOT_KILLED); + /* the error of binary logging is ignored */ + (void)thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query(), thd->query_length(), + transactional_tables, FALSE, FALSE, errcode); + } + thd->transaction->all.modified_non_trans_table= TRUE; + } + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + DBUG_ASSERT(trans_safe || !updated || thd->transaction->stmt.modified_non_trans_table); +} + + +int multi_update::do_updates() +{ + TABLE_LIST *cur_table; + int local_error= 0; + ha_rows org_updated; + TABLE *table, *tmp_table, *err_table; + List_iterator_fast
check_opt_it(unupdated_check_opt_tables); + DBUG_ENTER("multi_update::do_updates"); + + do_update= 0; // Don't retry this function + if (!found) + DBUG_RETURN(0); + + /* + Update read_set to include all fields that virtual columns may depend on. + Usually they're already in the read_set, but if the previous access + method was keyread, only the virtual column itself will be in read_set, + not its dependencies + */ + while(TABLE *tbl= check_opt_it++) + if (Field **vf= tbl->vfield) + for (; *vf; vf++) + if (bitmap_is_set(tbl->read_set, (*vf)->field_index)) + (*vf)->vcol_info->expr->walk(&Item::register_field_in_read_map, 1, 0); + + for (cur_table= update_tables; cur_table; cur_table= cur_table->next_local) + { + bool can_compare_record; + uint offset= cur_table->shared; + + table = cur_table->table; + if (table == table_to_update) + continue; // Already updated + org_updated= updated; + tmp_table= tmp_tables[cur_table->shared]; + tmp_table->file->extra(HA_EXTRA_CACHE); // Change to read cache + if (unlikely((local_error= table->file->ha_rnd_init(0)))) + { + err_table= table; + goto err; + } + table->file->extra(HA_EXTRA_NO_CACHE); + /* + We have to clear the base record, if we have virtual indexed + blob fields, as some storage engines will access the blob fields + to calculate the keys to see if they have changed. Without + clearing the blob pointers will contain random values which can + cause a crash. + This is a workaround for engines that access columns not present in + either read or write set. + */ + if (table->vfield) + empty_record(table); + + has_vers_fields= table->vers_check_update(*fields); + + check_opt_it.rewind(); + while(TABLE *tbl= check_opt_it++) + { + if (unlikely((local_error= tbl->file->ha_rnd_init(0)))) + { + err_table= tbl; + goto err; + } + tbl->file->extra(HA_EXTRA_CACHE); + } + + /* + Setup copy functions to copy fields from temporary table + */ + List_iterator_fast field_it(*fields_for_table[offset]); + Field **field; + Copy_field *copy_field_ptr= copy_field, *copy_field_end; + + /* Skip row pointers */ + field= tmp_table->field + 1 + unupdated_check_opt_tables.elements; + for ( ; *field ; field++) + { + Item_field *item= (Item_field* ) field_it++; + (copy_field_ptr++)->set(item->field, *field, 0); + } + copy_field_end=copy_field_ptr; + + if (unlikely((local_error= tmp_table->file->ha_rnd_init(1)))) + { + err_table= tmp_table; + goto err; + } + + can_compare_record= records_are_comparable(table); + + for (;;) + { + if (thd->killed && trans_safe) + { + thd->fatal_error(); + goto err2; + } + if (unlikely((local_error= + tmp_table->file->ha_rnd_next(tmp_table->record[0])))) + { + if (local_error == HA_ERR_END_OF_FILE) + break; + err_table= tmp_table; + goto err; + } + + /* call rnd_pos() using rowids from temporary table */ + check_opt_it.rewind(); + TABLE *tbl= table; + uint field_num= 0; + do + { + DBUG_ASSERT(!tmp_table->field[field_num]->is_null()); + String rowid; + tmp_table->field[field_num]->val_str(&rowid); + if (unlikely((local_error= tbl->file->ha_rnd_pos(tbl->record[0], + (uchar*)rowid.ptr())))) + { + err_table= tbl; + goto err; + } + field_num++; + } while ((tbl= check_opt_it++)); + + if (table->vfield && + unlikely(table->update_virtual_fields(table->file, + VCOL_UPDATE_INDEXED_FOR_UPDATE))) + goto err2; + + table->status|= STATUS_UPDATED; + store_record(table,record[1]); + + /* Copy data from temporary table to current table */ + for (copy_field_ptr=copy_field; + copy_field_ptr != copy_field_end; + copy_field_ptr++) + { + (*copy_field_ptr->do_copy)(copy_field_ptr); + copy_field_ptr->to_field->set_has_explicit_value(); + } + + table->evaluate_update_default_function(); + if (table->vfield && + table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_WRITE)) + goto err2; + if (table->triggers && + table->triggers->process_triggers(thd, TRG_EVENT_UPDATE, + TRG_ACTION_BEFORE, TRUE)) + goto err2; + + if (!can_compare_record || compare_record(table)) + { + int error; + if ((error= cur_table->view_check_option(thd, ignore)) != + VIEW_CHECK_OK) + { + if (error == VIEW_CHECK_SKIP) + continue; + else if (unlikely(error == VIEW_CHECK_ERROR)) + { + thd->fatal_error(); + goto err2; + } + } + if (has_vers_fields && table->versioned()) + table->vers_update_fields(); + + if (unlikely((local_error= + table->file->ha_update_row(table->record[1], + table->record[0]))) && + local_error != HA_ERR_RECORD_IS_THE_SAME) + { + if (!ignore || + table->file->is_fatal_error(local_error, HA_CHECK_ALL)) + { + err_table= table; + goto err; + } + } + if (local_error != HA_ERR_RECORD_IS_THE_SAME) + { + updated++; + + if (has_vers_fields && table->versioned()) + { + if (table->versioned(VERS_TIMESTAMP)) + { + store_record(table, record[2]); + if ((local_error= vers_insert_history_row(table))) + { + restore_record(table, record[2]); + err_table = table; + goto err; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } + } + else + { + local_error= 0; + } + } + + if (table->triggers && + unlikely(table->triggers->process_triggers(thd, TRG_EVENT_UPDATE, + TRG_ACTION_AFTER, TRUE))) + goto err2; + } + + if (updated != org_updated) + { + if (table->file->has_transactions_and_rollback()) + transactional_tables= TRUE; + else + { + trans_safe= FALSE; // Can't do safe rollback + thd->transaction->stmt.modified_non_trans_table= TRUE; + } + } + (void) table->file->ha_rnd_end(); + (void) tmp_table->file->ha_rnd_end(); + check_opt_it.rewind(); + while (TABLE *tbl= check_opt_it++) + tbl->file->ha_rnd_end(); + + } + DBUG_RETURN(0); + +err: + { + prepare_record_for_error_message(local_error, err_table); + err_table->file->print_error(local_error,MYF(ME_FATAL)); + } + +err2: + if (table->file->inited) + (void) table->file->ha_rnd_end(); + if (tmp_table->file->inited) + (void) tmp_table->file->ha_rnd_end(); + check_opt_it.rewind(); + while (TABLE *tbl= check_opt_it++) + { + if (tbl->file->inited) + (void) tbl->file->ha_rnd_end(); + } + + if (updated != org_updated) + { + if (table->file->has_transactions_and_rollback()) + transactional_tables= TRUE; + else + { + trans_safe= FALSE; + thd->transaction->stmt.modified_non_trans_table= TRUE; + } + } + DBUG_RETURN(1); +} + + +/* out: 1 if error, 0 if success */ + +bool multi_update::send_eof() +{ + char buff[STRING_BUFFER_USUAL_SIZE]; + ulonglong id; + killed_state killed_status= NOT_KILLED; + DBUG_ENTER("multi_update::send_eof"); + THD_STAGE_INFO(thd, stage_updating_reference_tables); + + /* + Does updates for the last n - 1 tables, returns 0 if ok; + error takes into account killed status gained in do_updates() + */ + int local_error= thd->is_error(); + if (likely(!local_error)) + local_error = (table_count) ? do_updates() : 0; + /* + if local_error is not set ON until after do_updates() then + later carried out killing should not affect binlogging. + */ + killed_status= (local_error == 0) ? NOT_KILLED : thd->killed; + THD_STAGE_INFO(thd, stage_end); + + /* We must invalidate the query cache before binlog writing and + ha_autocommit_... */ + + if (updated) + { + query_cache_invalidate3(thd, update_tables, 1); + } + /* + Write the SQL statement to the binlog if we updated + rows and we succeeded or if we updated some non + transactional tables. + + The query has to binlog because there's a modified non-transactional table + either from the query's list or via a stored routine: bug#13270,23333 + */ + + if (thd->transaction->stmt.modified_non_trans_table) + thd->transaction->all.modified_non_trans_table= TRUE; + thd->transaction->all.m_unsafe_rollback_flags|= + (thd->transaction->stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); + + if (likely(local_error == 0 || + thd->transaction->stmt.modified_non_trans_table) || + thd->log_current_statement()) + { + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) + { + int errcode= 0; + if (likely(local_error == 0)) + thd->clear_error(); + else + errcode= query_error_code(thd, killed_status == NOT_KILLED); + + bool force_stmt= thd->binlog_need_stmt_format(transactional_tables); + if (!force_stmt) + for (TABLE *table= all_tables->table; table; table= table->next) + { + if (table->versioned(VERS_TRX_ID)) + { + force_stmt= true; + break; + } + } + StatementBinlog stmt_binlog(thd, force_stmt); + if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), + thd->query_length(), transactional_tables, FALSE, + FALSE, errcode) > 0) + local_error= 1; // Rollback update + } + } + DBUG_ASSERT(trans_safe || !updated || + thd->transaction->stmt.modified_non_trans_table); + + if (unlikely(local_error)) + { + error_handled= TRUE; // to force early leave from ::abort_result_set() + if (thd->killed == NOT_KILLED && !thd->get_stmt_da()->is_set()) + { + /* + No error message was sent and query was not killed (in which case + mysql_execute_command() will send the error mesage). + */ + my_message(ER_UNKNOWN_ERROR, "An error occurred in multi-table update", + MYF(0)); + } + DBUG_RETURN(TRUE); + } + + if (!thd->lex->analyze_stmt) + { + id= thd->arg_of_last_insert_id_function ? + thd->first_successful_insert_id_in_prev_stmt : 0; + my_snprintf(buff, sizeof(buff), ER_THD(thd, ER_UPDATE_INFO), + (ulong) found, (ulong) updated, (ulong) thd->cuted_fields); + ::my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated, + id, buff); + } + DBUG_RETURN(FALSE); +} diff --git a/sql/sql_update.h b/sql/sql_update.h new file mode 100644 index 00000000..65e44d11 --- /dev/null +++ b/sql/sql_update.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SQL_UPDATE_INCLUDED +#define SQL_UPDATE_INCLUDED + +#include "sql_class.h" /* enum_duplicates */ + +class Item; +struct TABLE_LIST; +class THD; + +typedef class st_select_lex SELECT_LEX; +typedef class st_select_lex_unit SELECT_LEX_UNIT; + +bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list, + Item **conds, uint order_num, ORDER *order); +bool check_unique_table(THD *thd, TABLE_LIST *table_list); +int mysql_update(THD *thd,TABLE_LIST *tables,List &fields, + List &values,COND *conds, + uint order_num, ORDER *order, ha_rows limit, + bool ignore, ha_rows *found_return, ha_rows *updated_return); +bool mysql_multi_update(THD *thd, TABLE_LIST *table_list, + List *fields, List *values, + COND *conds, ulonglong options, + enum enum_duplicates handle_duplicates, bool ignore, + SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex, + multi_update **result); +bool records_are_comparable(const TABLE *table); +bool compare_record(const TABLE *table); + +#endif /* SQL_UPDATE_INCLUDED */ diff --git a/sql/sql_view.cc b/sql/sql_view.cc new file mode 100644 index 00000000..08208048 --- /dev/null +++ b/sql/sql_view.cc @@ -0,0 +1,2383 @@ +/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. + Copyright (c) 2011, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#define MYSQL_LEX 1 +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "unireg.h" +#include "sql_view.h" +#include "sql_base.h" // find_table_in_global_list, lock_table_names +#include "sql_parse.h" // sql_parse +#include "sql_cache.h" // query_cache_* +#include "lock.h" // MYSQL_OPEN_SKIP_TEMPORARY +#include "sql_show.h" // append_identifier +#include "sql_table.h" // build_table_filename +#include "sql_db.h" // mysql_opt_change_db, mysql_change_db +#include "sql_select.h" +#include "parse_file.h" +#include "sp_head.h" +#include "sp.h" +#include "sp_cache.h" +#include "datadict.h" // dd_frm_is_view() +#include "sql_derived.h" +#include "opt_trace.h" +#include "ddl_log.h" +#include "debug.h" // debug_crash_here +#include "wsrep_mysqld.h" + +#define MD5_BUFF_LENGTH 33 + +const LEX_CSTRING view_type= { STRING_WITH_LEN("VIEW") }; + +static int mysql_register_view(THD *thd, DDL_LOG_STATE *ddl_log_state, + TABLE_LIST *view, enum_view_create_mode mode, + char *backup_file_name); + +/* + Make a unique name for an anonymous view column + SYNOPSIS + target reference to the item for which a new name has to be made + item_list list of items within which we should check uniqueness of + the created name + last_element the last element of the list above + + NOTE + Unique names are generated by adding 'My_exp_' to the old name of the + column. In case the name that was created this way already exists, we + add a numeric postfix to its end (i.e. "1") and increase the number + until the name becomes unique. If the generated name is longer than + NAME_LEN, it is truncated. +*/ + +static void make_unique_view_field_name(THD *thd, Item *target, + List &item_list, + Item *last_element) +{ + const char *name= (target->orig_name ? + target->orig_name : + target->name.str); + size_t name_len; + uint attempt; + char buff[NAME_LEN+1]; + List_iterator_fast itc(item_list); + + for (attempt= 0;; attempt++) + { + Item *check; + bool ok= TRUE; + + if (attempt) + name_len= my_snprintf(buff, NAME_LEN, "My_exp_%d_%s", attempt, name); + else + name_len= my_snprintf(buff, NAME_LEN, "My_exp_%s", name); + + do + { + check= itc++; + if (check != target && + my_strcasecmp(system_charset_info, buff, check->name.str) == 0) + { + ok= FALSE; + break; + } + } while (check != last_element); + if (ok) + break; + itc.rewind(); + } + + if (!target->orig_name) + target->orig_name= target->name.str; + target->set_name(thd, buff, name_len, system_charset_info); +} + + +/* + Check if items with same names are present in list and possibly + generate unique names for them. + + SYNOPSIS + item_list list of Items which should be checked for duplicates + gen_unique_view_name flag: generate unique name or return with error when + duplicate names are found. + + DESCRIPTION + This function is used on view creation and preparation of derived tables. + It checks item_list for items with duplicate names. If it founds two + items with same name and conversion to unique names isn't allowed, or + names for both items are set by user - function fails. + Otherwise it generates unique name for one item with autogenerated name + using make_unique_view_field_name() + + RETURN VALUE + FALSE no duplicate names found, or they are converted to unique ones + TRUE duplicate names are found and they can't be converted or conversion + isn't allowed +*/ + +bool check_duplicate_names(THD *thd, List &item_list, bool gen_unique_view_name) +{ + Item *item; + List_iterator_fast it(item_list); + List_iterator_fast itc(item_list); + DBUG_ENTER("check_duplicate_names"); + + while ((item= it++)) + { + Item *check; + /* treat underlying fields like set by user names */ + if (item->real_item()->type() == Item::FIELD_ITEM) + item->base_flags|= item_base_t::IS_EXPLICIT_NAME; + itc.rewind(); + while ((check= itc++) && check != item) + { + if (lex_string_cmp(system_charset_info, &item->name, &check->name) == 0) + { + if (!gen_unique_view_name) + goto err; + if (!item->is_explicit_name()) + make_unique_view_field_name(thd, item, item_list, item); + else if (!check->is_explicit_name()) + make_unique_view_field_name(thd, check, item_list, item); + else + goto err; + } + } + } + DBUG_RETURN(FALSE); + +err: + my_error(ER_DUP_FIELDNAME, MYF(0), item->name.str); + DBUG_RETURN(TRUE); +} + + +/** + Check if auto generated column names are conforming and + possibly generate a conforming name for them if not. + + @param item_list List of Items which should be checked +*/ + +void make_valid_column_names(THD *thd, List &item_list) +{ + Item *item; + size_t name_len; + List_iterator_fast it(item_list); + char buff[NAME_LEN]; + DBUG_ENTER("make_valid_column_names"); + + for (uint column_no= 1; (item= it++); column_no++) + { + if (item->is_explicit_name() || !check_column_name(item->name.str)) + continue; + name_len= my_snprintf(buff, NAME_LEN, "Name_exp_%u", column_no); + item->orig_name= item->name.str; + item->set_name(thd, buff, name_len, system_charset_info); + } + + DBUG_VOID_RETURN; +} + + +/* + Fill defined view parts + + SYNOPSIS + fill_defined_view_parts() + thd current thread. + view view to operate on + + DESCRIPTION + This function will initialize the parts of the view + definition that are not specified in ALTER VIEW + to their values from CREATE VIEW. + The view must be opened to get its definition. + We use a copy of the view when opening because we want + to preserve the original view instance. + + RETURN VALUE + TRUE can't open table + FALSE success +*/ +static bool +fill_defined_view_parts (THD *thd, TABLE_LIST *view) +{ + LEX *lex= thd->lex; + TABLE_LIST decoy; + + decoy= *view; + decoy.mdl_request.key.mdl_key_init(&view->mdl_request.key); + if (tdc_open_view(thd, &decoy, OPEN_VIEW_NO_PARSE)) + return TRUE; + + if (!lex->definer) + { + view->definer.host= decoy.definer.host; + view->definer.user= decoy.definer.user; + lex->definer= &view->definer; + } + if (lex->create_view->algorithm == VIEW_ALGORITHM_INHERIT) + lex->create_view->algorithm= (uint8) decoy.algorithm; + if (lex->create_view->suid == VIEW_SUID_DEFAULT) + lex->create_view->suid= decoy.view_suid ? + VIEW_SUID_DEFINER : VIEW_SUID_INVOKER; + + return FALSE; +} + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + +/** + @brief CREATE VIEW privileges pre-check. + + @param thd thread handler + @param tables tables used in the view + @param views views to create + @param mode VIEW_CREATE_NEW, VIEW_ALTER, VIEW_CREATE_OR_REPLACE + + @retval FALSE Operation was a success. + @retval TRUE An error occurred. +*/ + +bool create_view_precheck(THD *thd, TABLE_LIST *tables, TABLE_LIST *view, + enum_view_create_mode mode) +{ + LEX *lex= thd->lex; + /* first table in list is target VIEW name => cut off it */ + TABLE_LIST *tbl; + SELECT_LEX *select_lex= lex->first_select_lex(); + SELECT_LEX *sl; + bool res= TRUE; + DBUG_ENTER("create_view_precheck"); + + /* + Privilege check for view creation: + - user has CREATE VIEW privilege on view table + - user has DROP privilege in case of ALTER VIEW or CREATE OR REPLACE + VIEW + - user has some (SELECT/UPDATE/INSERT/DELETE) privileges on columns of + underlying tables used on top of SELECT list (because it can be + (theoretically) updated, so it is enough to have UPDATE privilege on + them, for example) + - user has SELECT privilege on columns used in expressions of VIEW select + - for columns of underly tables used on top of SELECT list also will be + checked that we have not more privileges on correspondent column of view + table (i.e. user will not get some privileges by view creation) + */ + if ((check_access(thd, CREATE_VIEW_ACL, view->db.str, + &view->grant.privilege, + &view->grant.m_internal, + 0, 0) || + check_grant(thd, CREATE_VIEW_ACL, view, FALSE, 1, FALSE)) || + (mode != VIEW_CREATE_NEW && + (check_access(thd, DROP_ACL, view->db.str, + &view->grant.privilege, + &view->grant.m_internal, + 0, 0) || + check_grant(thd, DROP_ACL, view, FALSE, 1, FALSE)))) + goto err; + + for (sl= select_lex; sl; sl= sl->next_select()) + { + for (tbl= sl->get_table_list(); tbl; tbl= tbl->next_local) + { + if (!tbl->with && tbl->select_lex) + tbl->with= tbl->select_lex->find_table_def_in_with_clauses(tbl); + /* + Ensure that we have some privileges on this table, more strict check + will be done on column level after preparation, + */ + if (check_some_access(thd, VIEW_ANY_ACL, tbl)) + { + my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), + "ANY", thd->security_ctx->priv_user, + thd->security_ctx->priv_host, + tbl->db.str, tbl->table_name.str); + goto err; + } + /* + Mark this table as a table which will be checked after the prepare + phase + */ + tbl->table_in_first_from_clause= 1; + + /* + We need to check only SELECT_ACL for all normal fields, fields for + which we need "any" (SELECT/UPDATE/INSERT/DELETE) privilege will be + checked later + */ + tbl->grant.want_privilege= SELECT_ACL; + /* + Make sure that all rights are loaded to the TABLE::grant field. + + tbl->table_name will be correct name of table because VIEWs are + not opened yet. + */ + fill_effective_table_privileges(thd, &tbl->grant, tbl->db.str, tbl->table_name.str); + } + } + + { + /* check tables of subqueries */ + for (tbl= tables; tbl; tbl= tbl->next_global) + { + if (!tbl->table_in_first_from_clause) + { + if (check_single_table_access(thd, SELECT_ACL, tbl, FALSE)) + { + tbl->hide_view_error(thd); + goto err; + } + } + } + } + /* + Mark fields for special privilege check ("any" privilege) + */ + for (sl= select_lex; sl; sl= sl->next_select()) + { + List_iterator_fast it(sl->item_list); + Item *item; + while ((item= it++)) + { + Item_field *field; + if ((field= item->field_for_view_update())) + { + /* + any_privileges may be reset later by the Item_field::set_field + method in case of a system temporary table. + */ + field->any_privileges= 1; + } + } + } + + res= FALSE; + +err: + DBUG_RETURN(res || thd->is_error()); +} + +#else + +bool create_view_precheck(THD *thd, TABLE_LIST *tables, TABLE_LIST *view, + enum_view_create_mode mode) +{ + return FALSE; +} + +#endif + + +/** + @brief Creating/altering VIEW procedure + + @param thd thread handler + @param views views to create + @param mode VIEW_CREATE_NEW, VIEW_ALTER, VIEW_CREATE_OR_REPLACE + + @note This function handles both create and alter view commands. + + @retval FALSE Operation was a success. + @retval TRUE An error occurred. +*/ + +bool mysql_create_view(THD *thd, TABLE_LIST *views, + enum_view_create_mode mode) +{ + LEX *lex= thd->lex; + bool link_to_local; + /* first table in list is target VIEW name => cut off it */ + TABLE_LIST *view= lex->unlink_first_table(&link_to_local); + TABLE_LIST *tables= lex->query_tables; + TABLE_LIST *tbl; + SELECT_LEX *select_lex= lex->first_select_lex(); + SELECT_LEX *sl; + SELECT_LEX_UNIT *unit= &lex->unit; + DDL_LOG_STATE ddl_log_state, ddl_log_state_tmp_file; + char backup_file_name[FN_REFLEN+2]; + bool res= FALSE; + DBUG_ENTER("mysql_create_view"); + + /* + This is ensured in the parser. + NOTE: Originally, the assert below contained the extra condition + && !lex->result + but in this form the assert is failed in case CREATE VIEW run under + cursor (the case when the byte 'flags' in the COM_STMT_EXECUTE packet has + the flag CURSOR_TYPE_READ_ONLY set). For the cursor use case + thd->lex->result is assigned a pointer to the class Select_materialize + inside the function mysql_open_cursor() just before handling of a statement + will be started and the function mysql_create_view() called. + */ + DBUG_ASSERT(!lex->proc_list.first && + !lex->param_list.elements); + + bzero(&ddl_log_state, sizeof(ddl_log_state)); + bzero(&ddl_log_state_tmp_file, sizeof(ddl_log_state_tmp_file)); + backup_file_name[0]= 0; + /* + We can't allow taking exclusive meta-data locks of unlocked view under + LOCK TABLES since this might lead to deadlock. Since at the moment we + can't really lock view with LOCK TABLES we simply prohibit creation/ + alteration of views under LOCK TABLES. + */ + + if (thd->locked_tables_mode) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + res= TRUE; + goto err; + } + + if ((res= create_view_precheck(thd, tables, view, mode))) + goto err; + + lex->link_first_table_back(view, link_to_local); + view->open_type= OT_BASE_ONLY; + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + + /* + ignore lock specs for CREATE statement + */ + if (lex->current_select->lock_type != TL_READ_DEFAULT) + { + lex->current_select->set_lock_for_tables(TL_READ_DEFAULT, false, select_lex->skip_locked); + view->mdl_request.set_type(MDL_EXCLUSIVE); + } + + if (thd->open_temporary_tables(lex->query_tables) || + open_and_lock_tables(thd, lex->query_tables, TRUE, 0)) + { + res= TRUE; + goto err_no_relink; + } + +#ifdef WITH_WSREP + if(!wsrep_should_replicate_ddl_iterate(thd, static_cast(tables))) + { + res= TRUE; + goto err_no_relink; + } +#endif + + view= lex->unlink_first_table(&link_to_local); + + if (check_db_dir_existence(view->db.str)) + { + my_error(ER_BAD_DB_ERROR, MYF(0), view->db.str); + res= TRUE; + goto err; + } + + if (mode == VIEW_ALTER && fill_defined_view_parts(thd, view)) + { + res= TRUE; + goto err; + } + + if (lex->limit_rows_examined) + { + /* + LIMIT ROWS EXAMINED is not supported inside views to avoid complicated + side-effects and semantics of the clause. + */ + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "LIMIT ROWS EXAMINED inside views"); + res= TRUE; + goto err; + } + + sp_cache_invalidate(); + if (sp_process_definer(thd)) + goto err; + + /* + check that tables are not temporary and this VIEW do not used in query + (it is possible with ALTERing VIEW). + open_and_lock_tables can change the value of tables, + e.g. it may happen if before the function call tables was equal to 0. + */ + for (tbl= lex->query_tables; tbl; tbl= tbl->next_global) + { + /* is this table view and the same view which we creates now? */ + if (tbl->view && + cmp(&tbl->view_db, &view->db) == 0 && + cmp(&tbl->view_name, &view->table_name) == 0) + { + my_error(ER_NO_SUCH_TABLE, MYF(0), tbl->view_db.str, tbl->view_name.str); + res= TRUE; + goto err; + } + + /* + tbl->table can be NULL when tbl is a placeholder for a view + that is indirectly referenced via a stored function from the + view being created. We don't check these indirectly + referenced views in CREATE VIEW so they don't have table + object. + */ + if (tbl->table) + { + /* is this table temporary and is not view? */ + if (tbl->table->s->tmp_table != NO_TMP_TABLE && !tbl->view && + !tbl->schema_table && !tbl->table_function) + { + my_error(ER_VIEW_SELECT_TMPTABLE, MYF(0), tbl->alias.str); + res= TRUE; + goto err; + } + /* + Copy the privileges of the underlying VIEWs which were filled by + fill_effective_table_privileges + (they were not copied at derived tables processing) + */ + tbl->table->grant.privilege= tbl->grant.privilege; + } + } + + /* prepare select to resolve all fields */ + lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_VIEW; + if (unit->prepare(unit->derived, 0, 0)) + { + /* + some errors from prepare are reported to user, if is not then + it will be checked after err: label + */ + res= TRUE; + goto err; + } + + /* view list (list of view fields names) */ + if (lex->view_list.elements) + { + List_iterator_fast it(select_lex->item_list); + List_iterator_fast nm(lex->view_list); + Item *item; + LEX_CSTRING *name; + + if (lex->view_list.elements != select_lex->item_list.elements) + { + my_message(ER_VIEW_WRONG_LIST, ER_THD(thd, ER_VIEW_WRONG_LIST), MYF(0)); + res= TRUE; + goto err; + } + while ((item= it++, name= nm++)) + { + item->set_name(thd, *name); + item->base_flags|= item_base_t::IS_EXPLICIT_NAME; + } + } + + /* Check if the auto generated column names are conforming. */ + for (sl= select_lex; sl; sl= sl->next_select()) + make_valid_column_names(thd, sl->item_list); + + if (check_duplicate_names(thd, select_lex->item_list, 1)) + { + res= TRUE; + goto err; + } + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* + Compare/check grants on view with grants of underlying tables + */ + + fill_effective_table_privileges(thd, &view->grant, view->db.str, + view->table_name.str); + + /* + Make sure that the current user does not have more column-level privileges + on the newly created view than he/she does on the underlying + tables. E.g. it must not be so that the user has UPDATE privileges on a + view column of he/she doesn't have it on the underlying table's + corresponding column. In that case, return an error for CREATE VIEW. + */ + { + Item *report_item= NULL; + /* + This will hold the intersection of the priviliges on all columns in the + view. + */ + privilege_t final_priv(VIEW_ANY_ACL); + + for (sl= select_lex; sl; sl= sl->next_select()) + { + DBUG_ASSERT(view->db.str); /* Must be set in the parser */ + List_iterator_fast it(sl->item_list); + Item *item; + while ((item= it++)) + { + Item_field *fld= item->field_for_view_update(); + privilege_t priv(get_column_grant(thd, &view->grant, view->db.str, + view->table_name.str, + item->name.str) & + VIEW_ANY_ACL); + + if (!fld) + continue; + TABLE_SHARE *s= fld->field->table->s; + const Lex_ident field_name= fld->field->field_name; + if (s->tmp_table || + (s->versioned && + (field_name.streq(s->vers_start_field()->field_name) || + field_name.streq(s->vers_end_field()->field_name)))) + { + continue; + } + + final_priv&= fld->have_privileges; + + if (~fld->have_privileges & priv) + report_item= item; + } + } + + if (!final_priv && report_item) + { + my_error(ER_COLUMNACCESS_DENIED_ERROR, MYF(0), + "create view", thd->security_ctx->priv_user, + thd->security_ctx->priv_host, report_item->name.str, + view->table_name.str); + res= TRUE; + goto err; + } + } +#endif + + res= mysql_register_view(thd, &ddl_log_state, view, mode, backup_file_name); + + /* + View TABLE_SHARE must be removed from the table definition cache in order to + make ALTER VIEW work properly. Otherwise, we would not be able to detect + meta-data changes after ALTER VIEW. + */ + + if (!res) + tdc_remove_table(thd, view->db.str, view->table_name.str); + + if (!res && mysql_bin_log.is_open()) + { + StringBuffer<128> buff(thd->variables.character_set_client); + DBUG_ASSERT(buff.charset()->mbminlen == 1); + const LEX_CSTRING command[3]= + {{ STRING_WITH_LEN("CREATE ") }, + { STRING_WITH_LEN("ALTER ") }, + { STRING_WITH_LEN("CREATE OR REPLACE ") }}; + + buff.append(&command[thd->lex->create_view->mode]); + view_store_options(thd, views, &buff); + buff.append(STRING_WITH_LEN("VIEW ")); + + /* Appending IF NOT EXISTS if present in the query */ + if (lex->create_info.if_not_exists()) + buff.append(STRING_WITH_LEN("IF NOT EXISTS ")); + + /* Test if user supplied a db (ie: we did not use thd->db) */ + if (views->db.str && views->db.str[0] && + (thd->db.str == NULL || cmp(&views->db, &thd->db))) + { + append_identifier(thd, &buff, &views->db); + buff.append('.'); + } + append_identifier(thd, &buff, &views->table_name); + if (lex->view_list.elements) + { + List_iterator_fast names(lex->view_list); + + buff.append('('); + while (LEX_CSTRING *name= names++) + { + append_identifier(thd, &buff, name); + buff.append(", ", 2); + } + buff.length(buff.length()-2); + buff.append(')'); + } + buff.append(STRING_WITH_LEN(" AS ")); + /* views->source doesn't end with \0 */ + buff.append(views->source.str, views->source.length); + + int errcode= query_error_code(thd, TRUE); + /* + Don't log any unsafe warnings for CREATE VIEW as it's safely replicated + with statement based replication + */ + thd->reset_unsafe_warnings(); + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + if (backup_file_name[0]) + { + LEX_CSTRING cpath= {backup_file_name, strlen(backup_file_name) }; + ddl_log_delete_tmp_file(&ddl_log_state_tmp_file, &cpath, + &ddl_log_state); + } + debug_crash_here("ddl_log_create_before_binlog"); + if (thd->binlog_query(THD::STMT_QUERY_TYPE, + buff.ptr(), buff.length(), FALSE, FALSE, FALSE, + errcode) > 0) + res= TRUE; + thd->binlog_xid= 0; + debug_crash_here("ddl_log_create_after_binlog"); + } + if (!res) + { + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("CREATE") }; + ddl_log.org_storage_engine_name= { C_STRING_WITH_LEN("VIEW") }; + ddl_log.org_database= view->db; + ddl_log.org_table= view->table_name; + backup_log_ddl(&ddl_log); + } + + if (mode != VIEW_CREATE_NEW) + query_cache_invalidate3(thd, view, 0); + if (res) + goto err; + + if (backup_file_name[0] && + mysql_file_delete(key_file_fileparser, backup_file_name, MYF(MY_WME))) + goto err; // Should be impossible + + my_ok(thd); + lex->link_first_table_back(view, link_to_local); + ddl_log_complete(&ddl_log_state); + ddl_log_complete(&ddl_log_state_tmp_file); + DBUG_RETURN(0); + +#ifdef WITH_WSREP +wsrep_error_label: + res= true; + goto err_no_relink; +#endif + +err: + lex->link_first_table_back(view, link_to_local); +err_no_relink: + unit->cleanup(); + if (backup_file_name[0]) + mysql_file_delete(key_file_fileparser, backup_file_name, MYF(MY_WME)); + ddl_log_complete(&ddl_log_state); + ddl_log_complete(&ddl_log_state_tmp_file); + DBUG_RETURN(res || thd->is_error()); +} + + +static void make_view_filename(LEX_CSTRING *dir, char *dir_buff, + size_t dir_buff_len, + LEX_CSTRING *path, char *path_buff, + size_t path_buff_len, + LEX_CSTRING *file, + TABLE_LIST *view) +{ + /* print file name */ + dir->length= build_table_filename(dir_buff, dir_buff_len - 1, + view->db.str, "", "", 0); + dir->str= dir_buff; + + path->length= build_table_filename(path_buff, path_buff_len - 1, + view->db.str, view->table_name.str, reg_ext, 0); + path->str= path_buff; + + file->str= path->str + dir->length; + file->length= path->length - dir->length; +} + +/* number of required parameters for making view */ +static const int required_view_parameters= 15; + +/* + table of VIEW .frm field descriptors + + Note that one should NOT change the order for this, as it's used by + parse() +*/ +static File_option view_parameters[]= +{{{ STRING_WITH_LEN("query")}, + my_offsetof(TABLE_LIST, select_stmt), + FILE_OPTIONS_ESTRING}, + {{ STRING_WITH_LEN("md5")}, + my_offsetof(TABLE_LIST, md5), + FILE_OPTIONS_STRING}, + {{ STRING_WITH_LEN("updatable")}, + my_offsetof(TABLE_LIST, updatable_view), + FILE_OPTIONS_ULONGLONG}, + {{ STRING_WITH_LEN("algorithm")}, + my_offsetof(TABLE_LIST, algorithm), + FILE_OPTIONS_VIEW_ALGO}, + {{ STRING_WITH_LEN("definer_user")}, + my_offsetof(TABLE_LIST, definer.user), + FILE_OPTIONS_STRING}, + {{ STRING_WITH_LEN("definer_host")}, + my_offsetof(TABLE_LIST, definer.host), + FILE_OPTIONS_STRING}, + {{ STRING_WITH_LEN("suid")}, + my_offsetof(TABLE_LIST, view_suid), + FILE_OPTIONS_ULONGLONG}, + {{ STRING_WITH_LEN("with_check_option")}, + my_offsetof(TABLE_LIST, with_check), + FILE_OPTIONS_ULONGLONG}, + {{ STRING_WITH_LEN("timestamp")}, + my_offsetof(TABLE_LIST, hr_timestamp), + FILE_OPTIONS_TIMESTAMP}, + {{ STRING_WITH_LEN("create-version")}, + my_offsetof(TABLE_LIST, file_version), + FILE_OPTIONS_ULONGLONG}, + {{ STRING_WITH_LEN("source")}, + my_offsetof(TABLE_LIST, source), + FILE_OPTIONS_ESTRING}, + {{(char*) STRING_WITH_LEN("client_cs_name")}, + my_offsetof(TABLE_LIST, view_client_cs_name), + FILE_OPTIONS_STRING}, + {{(char*) STRING_WITH_LEN("connection_cl_name")}, + my_offsetof(TABLE_LIST, view_connection_cl_name), + FILE_OPTIONS_STRING}, + {{(char*) STRING_WITH_LEN("view_body_utf8")}, + my_offsetof(TABLE_LIST, view_body_utf8), + FILE_OPTIONS_ESTRING}, + {{ STRING_WITH_LEN("mariadb-version")}, + my_offsetof(TABLE_LIST, mariadb_version), + FILE_OPTIONS_ULONGLONG}, + {{NullS, 0}, 0, + FILE_OPTIONS_STRING} +}; + + +static File_option view_timestamp_parameters[]= +{ + + {{ C_STRING_WITH_LEN("timestamp")}, 0, FILE_OPTIONS_TIMESTAMP}, + {{NullS, 0}, 0, FILE_OPTIONS_STRING} +}; + + +static LEX_CSTRING view_file_type[]= {{STRING_WITH_LEN("VIEW") }}; + + +int mariadb_fix_view(THD *thd, TABLE_LIST *view, bool wrong_checksum, + bool swap_alg) +{ + char dir_buff[FN_REFLEN + 1], path_buff[FN_REFLEN + 1]; + LEX_CSTRING dir, file, path; + DBUG_ENTER("mariadb_fix_view"); + + if (!wrong_checksum && view->mariadb_version) + DBUG_RETURN(HA_ADMIN_OK); + + make_view_filename(&dir, dir_buff, sizeof(dir_buff), + &path, path_buff, sizeof(path_buff), + &file, view); + /* init timestamp */ + if (!view->hr_timestamp.str) + view->hr_timestamp.str= view->timestamp_buffer; + + if (swap_alg && view->algorithm != VIEW_ALGORITHM_UNDEFINED) + { + DBUG_ASSERT(view->algorithm == VIEW_ALGORITHM_MERGE || + view->algorithm == VIEW_ALGORITHM_TMPTABLE); + if (view->algorithm == VIEW_ALGORITHM_MERGE) + view->algorithm= VIEW_ALGORITHM_TMPTABLE; + else + view->algorithm= VIEW_ALGORITHM_MERGE; + } + else + swap_alg= 0; + if (wrong_checksum) + { + if (view->md5.length != VIEW_MD5_LEN) + { + if ((view->md5.str= (char *)thd->alloc(VIEW_MD5_LEN + 1)) == NULL) + DBUG_RETURN(HA_ADMIN_FAILED); + } + view->calc_md5(const_cast(view->md5.str)); + view->md5.length= VIEW_MD5_LEN; + } + view->mariadb_version= MYSQL_VERSION_ID; + + if (sql_create_definition_file(&dir, &file, view_file_type, + (uchar*)view, view_parameters)) + { + sql_print_error("View '%-.192s'.'%-.192s': algorithm swap error.", + view->db.str, view->table_name.str); + DBUG_RETURN(HA_ADMIN_INTERNAL_ERROR); + } + sql_print_information("View %`s.%`s: the version is set to %llu%s%s", + view->db.str, view->table_name.str, + view->mariadb_version, + (wrong_checksum ? ", checksum corrected" : ""), + (swap_alg ? + ((view->algorithm == VIEW_ALGORITHM_MERGE) ? + ", algorithm restored to be MERGE" + : ", algorithm restored to be TEMPTABLE") + : "")); + + + DBUG_RETURN(HA_ADMIN_OK); +} + + +/* + Register VIEW (write .frm & process .frm's history backups) + + SYNOPSIS + mysql_register_view() + thd - thread handler + view - view description + mode - VIEW_CREATE_NEW, VIEW_ALTER, VIEW_CREATE_OR_REPLACE + backup_file_name - Store name for backup of old view definition here + + RETURN + 0 OK + -1 Error + 1 Error and error message given +*/ + +static int mysql_register_view(THD *thd, DDL_LOG_STATE *ddl_log_state, + TABLE_LIST *view, enum_view_create_mode mode, + char *backup_file_name) +{ + LEX *lex= thd->lex; + + /* + Ensure character set number != 17 (character set = filename) and mbminlen=1 + because these character sets are not parser friendly, which can give weird + sequence in .frm file of view and later give parsing error. + */ + DBUG_ASSERT(thd->charset()->mbminlen == 1 && thd->charset()->number != 17); + + /* + View definition query -- a SELECT statement that fully defines view. It + is generated from the Item-tree built from the original (specified by + the user) query. The idea is that generated query should eliminates all + ambiguities and fix view structure at CREATE-time (once for all). + Item::print() virtual operation is used to generate view definition + query. + + INFORMATION_SCHEMA query (IS query) -- a SQL statement describing a + view that is shown in INFORMATION_SCHEMA. Basically, it is 'view + definition query' with text literals converted to UTF8 and without + character set introducers. + + For example: + Let's suppose we have: + CREATE TABLE t1(a INT, b INT); + User specified query: + CREATE VIEW v1(x, y) AS SELECT * FROM t1; + Generated query: + SELECT a AS x, b AS y FROM t1; + IS query: + SELECT a AS x, b AS y FROM t1; + + View definition query is stored in the client character set. + */ + StringBuffer<4096> view_query(thd->charset()); + StringBuffer<4096> is_query(system_charset_info); + + char md5[MD5_BUFF_LENGTH]; + bool can_be_merged; + char dir_buff[FN_REFLEN + 1], path_buff[FN_REFLEN + 1]; + LEX_CSTRING dir, file, path; + int error= 0; + bool old_view_exists= 0; + DBUG_ENTER("mysql_register_view"); + + /* Generate view definition and IS queries. */ + view_query.length(0); + is_query.length(0); + backup_file_name[0]= 0; + { + Sql_mode_save_for_frm_handling sql_mode_save(thd); + + lex->unit.print(&view_query, enum_query_type(QT_VIEW_INTERNAL | + QT_ITEM_ORIGINAL_FUNC_NULLIF | + QT_NO_WRAPPERS_FOR_TVC_IN_VIEW)); + lex->unit.print(&is_query, enum_query_type(QT_TO_SYSTEM_CHARSET | + QT_WITHOUT_INTRODUCERS | + QT_ITEM_ORIGINAL_FUNC_NULLIF | + QT_NO_WRAPPERS_FOR_TVC_IN_VIEW)); + } + DBUG_PRINT("info", ("View: %.*s", view_query.length(), view_query.ptr())); + + /* fill structure */ + view->source= thd->lex->create_view->select; + + if (!thd->make_lex_string(&view->select_stmt, view_query.ptr(), + view_query.length())) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + error= -1; + goto err; + } + + /* + version 1 - before 10.0.5 + version 2 - empty definer_host means a role + */ + view->file_version= 2; + view->mariadb_version= MYSQL_VERSION_ID; + view->calc_md5(md5); + if (!(view->md5.str= (char*) thd->memdup(md5, VIEW_MD5_LEN))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + error= -1; + goto err; + } + view->md5.length= VIEW_MD5_LEN; + can_be_merged= lex->can_be_merged(); + if (lex->create_view->algorithm == VIEW_ALGORITHM_MERGE && + !lex->can_be_merged()) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_WARN_VIEW_MERGE, + ER_THD(thd, ER_WARN_VIEW_MERGE)); + lex->create_view->algorithm= DTYPE_ALGORITHM_UNDEFINED; + } + view->algorithm= lex->create_view->algorithm; + view->definer.user= lex->definer->user; + view->definer.host= lex->definer->host; + view->view_suid= lex->create_view->suid; + view->with_check= lex->create_view->check; + + DBUG_EXECUTE_IF("simulate_register_view_failure", + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + error= -1; + goto err; + }); + + if ((view->updatable_view= (can_be_merged && + view->algorithm != VIEW_ALGORITHM_TMPTABLE))) + { + /* TODO: change here when we will support UNIONs */ + for (TABLE_LIST *tbl= lex->first_select_lex()->table_list.first; + tbl; + tbl= tbl->next_local) + { + if ((tbl->view && !tbl->updatable_view) || tbl->schema_table) + { + view->updatable_view= 0; + break; + } + for (TABLE_LIST *up= tbl; up; up= up->embedding) + { + if (up->outer_join) + { + view->updatable_view= 0; + goto loop_out; + } + } + } + } +loop_out: + /* print file name */ + make_view_filename(&dir, dir_buff, sizeof(dir_buff), + &path, path_buff, sizeof(path_buff), + &file, view); + /* init timestamp */ + if (!view->hr_timestamp.str) + view->hr_timestamp.str= view->timestamp_buffer; + + /* check old .frm */ + { + char path_buff[FN_REFLEN]; + LEX_CSTRING path; + File_parser *parser; + + path.str= path_buff; + fn_format(path_buff, file.str, dir.str, "", MY_UNPACK_FILENAME); + path.length= strlen(path_buff); + + if (ha_table_exists(thd, &view->db, &view->table_name)) + { + old_view_exists= 1; + if (lex->create_info.if_not_exists()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_TABLE_EXISTS_ERROR, + ER_THD(thd, ER_TABLE_EXISTS_ERROR), + view->table_name.str); + DBUG_RETURN(0); + } + else if (mode == VIEW_CREATE_NEW) + { + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), view->alias.str); + error= -1; + goto err; + } + + if (!(parser= sql_parse_prepare(&path, thd->mem_root, 0))) + { + error= 1; + goto err; + } + + if (!parser->ok() || !is_equal(&view_type, parser->type())) + { + my_error(ER_WRONG_OBJECT, MYF(0), view->db.str, view->table_name.str, + "VIEW"); + error= -1; + goto err; + } + + /* + TODO: read dependence list, too, to process cascade/restrict + TODO: special cascade/restrict procedure for alter? + */ + } + else + { + if (mode == VIEW_ALTER) + { + my_error(ER_NO_SUCH_TABLE, MYF(0), view->db.str, view->alias.str); + error= -1; + goto err; + } + } + } + + /* Initialize view creation context from the environment. */ + + view->view_creation_ctx= View_creation_ctx::create(thd); + + /* + Set LEX_STRING attributes in view-structure for parser to create + frm-file. + */ + + view->view_client_cs_name= view->view_creation_ctx->get_client_cs()->cs_name; + + view->view_connection_cl_name= + view->view_creation_ctx->get_connection_cl()->coll_name; + + if (!thd->make_lex_string(&view->view_body_utf8, is_query.ptr(), + is_query.length())) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + error= -1; + goto err; + } + + /* + Check that table of main select do not used in subqueries. + + This test can catch only very simple cases of such non-updateable views, + all other will be detected before updating commands execution. + (it is more optimisation then real check) + + NOTE: this skip cases of using table via VIEWs, joined VIEWs, VIEWs with + UNION + */ + if (view->updatable_view && + !lex->first_select_lex()->master_unit()->is_unit_op() && + !(lex->first_select_lex()->table_list.first)->next_local && + find_table_in_global_list(lex->query_tables->next_global, + &lex->query_tables->db, + &lex->query_tables->table_name)) + { + view->updatable_view= 0; + } + + if (view->with_check != VIEW_CHECK_NONE && + !view->updatable_view) + { + my_error(ER_VIEW_NONUPD_CHECK, MYF(0), view->db.str, view->table_name.str); + error= -1; + goto err; + } + + ddl_log_create_view(ddl_log_state, &path, old_view_exists ? + DDL_CREATE_VIEW_PHASE_DELETE_VIEW_COPY : + DDL_CREATE_VIEW_PHASE_NO_OLD_VIEW); + + debug_crash_here("ddl_log_create_before_copy_view"); + + if (old_view_exists) + { + LEX_CSTRING backup_name= { backup_file_name, 0 }; + if (sql_backup_definition_file(&path, &backup_name)) + { + error= 1; + goto err; + } + ddl_log_update_phase(ddl_log_state, DDL_CREATE_VIEW_PHASE_OLD_VIEW_COPIED); + } + + debug_crash_here("ddl_log_create_before_create_view"); + if (sql_create_definition_file(&dir, &file, view_file_type, + (uchar*)view, view_parameters)) + { + error= thd->is_error() ? -1 : 1; + goto err; + } + debug_crash_here("ddl_log_create_after_create_view"); + + DBUG_RETURN(0); +err: + view->select_stmt.str= NULL; + view->select_stmt.length= 0; + view->md5.str= NULL; + view->md5.length= 0; + DBUG_RETURN(error); +} + +/** + Reads view definition "version" + + @param[in] share Share object of view + + @return true on error, otherwise false +*/ + +bool mariadb_view_version_get(TABLE_SHARE *share) +{ + DBUG_ASSERT(share->is_view); + DBUG_ASSERT(share->tabledef_version.length == 0); + + if (!(share->tabledef_version.str= + (uchar*) alloc_root(&share->mem_root, + MICROSECOND_TIMESTAMP_BUFFER_SIZE))) + return TRUE; + + DBUG_ASSERT(share->view_def != NULL); + if (share->view_def->parse((uchar *) &share->tabledef_version, NULL, + view_timestamp_parameters, 1, + &file_parser_dummy_hook)) + { + // safety if the definition file is brocken + share->tabledef_version.length= 0; + my_error(ER_TABLE_CORRUPT, MYF(0), + share->db.str, share->table_name.str); + return TRUE; + } + DBUG_ASSERT(share->tabledef_version.length == MICROSECOND_TIMESTAMP_BUFFER_SIZE-1); + + return FALSE; +} + +/** + read VIEW .frm and create structures + + @param[in] thd Thread handler + @param[in] share Share object of view + @param[in] table TABLE_LIST structure for filling + @param[in] open_view_no_parse Flag to indicate open view but + do not parse. + + @return false-in case of success, true-in case of error. +*/ +bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table, + bool open_view_no_parse) +{ + SELECT_LEX_NODE *end; + SELECT_LEX *UNINIT_VAR(view_select); + LEX *old_lex, *lex; + Query_arena *arena, backup; + TABLE_LIST *top_view= table->top_table(); + bool UNINIT_VAR(parse_status); + bool result, view_is_mergeable; + TABLE_LIST *UNINIT_VAR(view_main_select_tables); + DBUG_ENTER("mysql_make_view"); + DBUG_PRINT("info", ("table: %p (%s)", table, table->table_name.str)); + + if (table->required_type == TABLE_TYPE_NORMAL) + { + my_error(ER_WRONG_OBJECT, MYF(0), share->db.str, share->table_name.str, + "BASE TABLE"); + DBUG_RETURN(true); + } + + if (table->view) + { + /* + It's an execution of a PS/SP and the view has already been unfolded + into a list of used tables. Now we only need to update the information + about granted privileges in the view tables with the actual data + stored in MySQL privilege system. We don't need to restore the + required privileges (by calling register_want_access) because they has + not changed since PREPARE or the previous execution: the only case + when this information is changed is execution of UPDATE on a view, but + the original want_access is restored in its end. + */ + if (!table->prelocking_placeholder && table->prepare_security(thd)) + { + DBUG_RETURN(1); + } + DBUG_PRINT("info", + ("VIEW %s.%s is already processed on previous PS/SP execution", + table->view_db.str, table->view_name.str)); + + /* + Clear old variables in the TABLE_LIST that could be left from an old view + This is only needed if there was an error at last usage of view, + in which case the reinit call wasn't done. + See MDEV-6668 for details. + */ + mysql_handle_single_derived(thd->lex, table, DT_REINIT); + + DEBUG_SYNC(thd, "after_cached_view_opened"); + DBUG_ASSERT(share->tabledef_version.length); + DBUG_RETURN(0); + } + + if (table->index_hints && table->index_hints->elements) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), + table->index_hints->head()->key_name.str, table->table_name.str); + DBUG_RETURN(TRUE); + } + + /* check loop via view definition */ + for (TABLE_LIST *precedent= table->referencing_view; + precedent; + precedent= precedent->referencing_view) + { + if (precedent->view_name.length == table->table_name.length && + precedent->view_db.length == table->db.length && + my_strcasecmp(system_charset_info, + precedent->view_name.str, table->table_name.str) == 0 && + my_strcasecmp(system_charset_info, + precedent->view_db.str, table->db.str) == 0) + { + my_error(ER_VIEW_RECURSIVE, MYF(0), + top_view->view_db.str, top_view->view_name.str); + DBUG_RETURN(TRUE); + } + } + + /* + For now we assume that tables will not be changed during PS life (it + will be TRUE as far as we make new table cache). + */ + old_lex= thd->lex; + arena= thd->activate_stmt_arena_if_needed(&backup); + + /* init timestamp */ + if (!table->hr_timestamp.str) + table->hr_timestamp.str= table->timestamp_buffer; + /* prepare default values for old format */ + table->view_suid= TRUE; + table->definer.user.str= table->definer.host.str= 0; + table->definer.user.length= table->definer.host.length= 0; + + /* + TODO: when VIEWs will be stored in cache (not only parser), + table mem_root should be used here + */ + DBUG_ASSERT(share->view_def != NULL); + if ((result= share->view_def->parse((uchar*)table, thd->mem_root, + view_parameters, + required_view_parameters, + &file_parser_dummy_hook))) + goto end; + DBUG_ASSERT(share->tabledef_version.length); + if (!table->tabledef_version.length) + { + table->set_view_def_version(&table->hr_timestamp); + } + + /* + check old format view .frm + */ + if (!table->definer.user.str) + { + DBUG_ASSERT(!table->definer.host.str && + !table->definer.user.length && + !table->definer.host.length); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_VIEW_FRM_NO_USER, ER_THD(thd, ER_VIEW_FRM_NO_USER), + table->db.str, table->table_name.str); + get_default_definer(thd, &table->definer, false); + } + + /* + since 10.0.5 definer.host can never be "" for a User, but it's + always "" for a Role. Before 10.0.5 it could be "" for a User, + but roles didn't exist. file_version helps. + */ + if (!table->definer.host.str[0] && table->file_version < 2) + table->definer.host= host_not_specified; // User, not Role + + /* + Initialize view definition context by character set names loaded from + the view definition file. Use UTF8 character set if view definition + file is of old version and does not contain the character set names. + */ + table->view_creation_ctx= View_creation_ctx::create(thd, table); + + if (open_view_no_parse) + { + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(FALSE); + } + + /* + Save VIEW parameters, which will be wiped out by derived table + processing + */ + table->view_db= table->db; + table->view_name= table->table_name; + /* + We don't invalidate a prepared statement when a view changes, + or when someone creates a temporary table. + Instead, the view is inlined into the body of the statement + upon the first execution. Below, make sure that on + re-execution of a prepared statement we don't prefer + a temporary table to the view, if the view name was shadowed + with a temporary table with the same name. + This assignment ensures that on re-execution open_table() will + not try to call find_temporary_table() for this TABLE_LIST, + but will invoke open_table_from_share(), which will + eventually call this function. + */ + table->open_type= OT_BASE_ONLY; + + /* + Clear old variables in the TABLE_LIST that could be left from an old view + */ + table->merged_for_insert= FALSE; + + /*TODO: md5 test here and warning if it is differ */ + + + /* + TODO: TABLE mem root should be used here when VIEW will be stored in + TABLE cache + + now Lex placed in statement memory + */ + + table->view= lex= thd->lex= (LEX*) new(thd->mem_root) st_lex_local; + if (!table->view) + { + result= true; + goto end; + } + + { + char old_db_buf[SAFE_NAME_LEN+1]; + LEX_CSTRING old_db= { old_db_buf, sizeof(old_db_buf) }; + bool dbchanged; + Parser_state parser_state; + if (parser_state.init(thd, table->select_stmt.str, + (uint)table->select_stmt.length)) + goto err; + + /* + Use view db name as thread default database, in order to ensure + that the view is parsed and prepared correctly. + */ + if ((result= mysql_opt_change_db(thd, &table->view_db, + (LEX_STRING*) &old_db, 1, + &dbchanged))) + goto end; + + lex_start(thd); + lex->stmt_lex= old_lex; + + Sql_mode_save_for_frm_handling sql_mode_save(thd); + /* Parse the query. */ + + parse_status= parse_sql(thd, & parser_state, table->view_creation_ctx); + + view_select= lex->first_select_lex(); + + /* Restore environment. */ + + if ((old_lex->sql_command == SQLCOM_SHOW_FIELDS) || + (old_lex->sql_command == SQLCOM_SHOW_CREATE)) + lex->sql_command= old_lex->sql_command; + + if (dbchanged && mysql_change_db(thd, &old_db, TRUE)) + goto err; + } + if (!parse_status) + { + TABLE_LIST *view_tables= lex->query_tables; + TABLE_LIST *view_tables_tail= 0; + TABLE_LIST *tbl; + Security_context *security_ctx= 0; + + /* + Check rights to run commands which show underlying tables. + In the optimizer trace we would not like to show trace for + cases when the current user does not have rights for the + underlying tables. + */ + if (!table->prelocking_placeholder) + opt_trace_disable_if_no_view_access(thd, table, view_tables); + + /* + Check rights to run commands (ANALYZE SELECT, EXPLAIN SELECT & + SHOW CREATE) which show underlying tables. + Skip this step if we are opening view for prelocking only. + */ + if (!table->prelocking_placeholder && (old_lex->describe || + old_lex->analyze_stmt)) + { + /* + The user we run EXPLAIN as (either the connected user who issued + the EXPLAIN statement, or the definer of a SUID stored routine + which contains the EXPLAIN) should have both SHOW_VIEW_ACL and + SELECT_ACL on the view being opened as well as on all underlying + views since EXPLAIN will disclose their structure. This user also + should have SELECT_ACL on all underlying tables of the view since + this EXPLAIN will disclose information about the number of rows in it. + + To perform this privilege check we create auxiliary TABLE_LIST object + for the view in order a) to avoid trashing "table->grant" member for + original table list element, which contents can be important at later + stage for column-level privilege checking b) get TABLE_LIST object + with "security_ctx" member set to 0, i.e. forcing check_table_access() + to use active user's security context. + + There is no need for creating similar copies of TABLE_LIST elements + for underlying tables since they just have been constructed and thus + have TABLE_LIST::security_ctx == 0 and fresh TABLE_LIST::grant member. + + Finally at this point making sure we have SHOW_VIEW_ACL on the views + will suffice as we implicitly require SELECT_ACL anyway. + */ + + TABLE_LIST view_no_suid; + bzero(static_cast(&view_no_suid), sizeof(TABLE_LIST)); + view_no_suid.db= table->db; + view_no_suid.table_name= table->table_name; + + DBUG_ASSERT(view_tables == NULL || view_tables->security_ctx == NULL); + + if (check_table_access(thd, SELECT_ACL, view_tables, + FALSE, UINT_MAX, TRUE) || + check_table_access(thd, SHOW_VIEW_ACL, &view_no_suid, + FALSE, UINT_MAX, TRUE)) + { + my_message(ER_VIEW_NO_EXPLAIN, ER_THD(thd, ER_VIEW_NO_EXPLAIN), + MYF(0)); + goto err; + } + } + else if (!table->prelocking_placeholder && + (old_lex->sql_command == SQLCOM_SHOW_CREATE) && + !table->belong_to_view) + { + if (check_table_access(thd, SHOW_VIEW_ACL, table, FALSE, UINT_MAX, FALSE)) + goto err; + } + + if (!(table->view_tables= + (List*) new(thd->mem_root) List)) + goto err; + /* + mark to avoid temporary table using and put view reference and find + last view table + */ + for (tbl= view_tables; + tbl; + tbl= (view_tables_tail= tbl)->next_global) + { + tbl->open_type= OT_BASE_ONLY; + tbl->belong_to_view= top_view; + tbl->referencing_view= table; + tbl->prelocking_placeholder= table->prelocking_placeholder; + /* + First we fill want_privilege with SELECT_ACL (this is needed for the + tables which belongs to view subqueries and temporary table views, + then for the merged view underlying tables we will set wanted + privileges of top_view + */ + tbl->grant.want_privilege= SELECT_ACL; + + /* + After unfolding the view we lose the list of tables referenced in it + (we will have only a list of underlying tables in case of MERGE + algorithm, which does not include the tables referenced from + subqueries used in view definition). + Let's build a list of all tables referenced in the view. + */ + table->view_tables->push_back(tbl); + } + + /* + Put tables of VIEW after VIEW TABLE_LIST + + NOTE: It is important for UPDATE/INSERT/DELETE checks to have this + tables just after VIEW instead of tail of list, to be able check that + table is unique. Also we store old next table for the same purpose. + */ + if (view_tables) + { + if (table->next_global) + { + view_tables_tail->next_global= table->next_global; + table->next_global->prev_global= &view_tables_tail->next_global; + } + else + { + old_lex->query_tables_last= &view_tables_tail->next_global; + } + view_tables->prev_global= &table->next_global; + table->next_global= view_tables; + } + + /* + If the view's body needs row-based binlogging (e.g. the VIEW is created + from SELECT UUID()), the top statement also needs it. + */ + old_lex->set_stmt_unsafe_flags(lex->get_stmt_unsafe_flags()); + + view_is_mergeable= (table->algorithm != VIEW_ALGORITHM_TMPTABLE && + lex->can_be_merged()); + + if (view_is_mergeable) + { + /* + Currently 'view_main_select_tables' differs from 'view_tables' + only then view has CONVERT_TZ() function in its select list. + This may change in future, for example if we enable merging of + views with subqueries in select list. + */ + view_main_select_tables= lex->first_select_lex()->table_list.first; + /* + Mergeable view can be used for inserting, so we move the flag down + */ + if (table->for_insert_data) + { + for (TABLE_LIST *t= view_main_select_tables; + t; + t= t->next_local) + { + t->for_insert_data= TRUE; + } + } + + /* + Let us set proper lock type for tables of the view's main + select since we may want to perform update or insert on + view. This won't work for view containing union. But this is + ok since we don't allow insert and update on such views + anyway. + */ + for (tbl= view_main_select_tables; tbl; tbl= tbl->next_local) + { + /* We have to keep the lock type for sequence tables */ + if (!tbl->sequence) + tbl->lock_type= table->lock_type; + tbl->mdl_request.set_type(table->mdl_request.type); + tbl->updating= table->updating; + } + /* + If the view is mergeable, we might want to + INSERT/UPDATE/DELETE into tables of this view. Preserve the + original sql command and 'duplicates' of the outer lex. + This is used later in set_trg_event_type_for_command. + */ + lex->sql_command= old_lex->sql_command; + lex->duplicates= old_lex->duplicates; + + /* Fields in this view can be used in upper select in case of merge. */ + if (table->select_lex) + table->select_lex->add_where_field(lex->first_select_lex()); + } + /* + This method has a dependency on the proper lock type being set, + so in case of views should be called here. + */ + lex->set_trg_event_type_for_tables(); + + /* + If we are opening this view as part of implicit LOCK TABLES, then + this view serves as simple placeholder and we should not continue + further processing. + */ + if (table->prelocking_placeholder) + goto ok2; + + old_lex->derived_tables|= (DERIVED_VIEW | lex->derived_tables); + + /* move SQL_NO_CACHE & Co to whole query */ + old_lex->safe_to_cache_query= (old_lex->safe_to_cache_query && + lex->safe_to_cache_query); + /* move SQL_CACHE to whole query */ + if (lex->first_select_lex()->options & OPTION_TO_QUERY_CACHE) + old_lex->first_select_lex()->options|= OPTION_TO_QUERY_CACHE; + +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (table->view_suid) + { + /* + For suid views prepare a security context for checking underlying + objects of the view. + */ + if (!(table->view_sctx= (Security_context *) + thd->stmt_arena->calloc(sizeof(Security_context)))) + goto err; + security_ctx= table->view_sctx; + } + else + { + /* + For non-suid views inherit security context from view's table list. + This allows properly handle situation when non-suid view is used + from within suid view. + */ + security_ctx= table->security_ctx; + } +#endif + + /* Assign the context to the tables referenced in the view */ + if (view_tables) + { + DBUG_ASSERT(view_tables_tail); + for (tbl= view_tables; tbl != view_tables_tail->next_global; + tbl= tbl->next_global) + tbl->security_ctx= security_ctx; + } + + /* assign security context to SELECT name resolution contexts of view */ + for(SELECT_LEX *sl= lex->all_selects_list; + sl; + sl= sl->next_select_in_list()) + sl->context.security_ctx= security_ctx; + + /* + Setup an error processor to hide error messages issued by stored + routines referenced in the view + */ + for (SELECT_LEX *sl= lex->all_selects_list; + sl; + sl= sl->next_select_in_list()) + { + sl->context.error_processor= &view_error_processor; + sl->context.error_processor_data= (void *)table; + } + + view_select->master_unit()->is_view= true; + + /* + check MERGE algorithm ability + - algorithm is not explicit TEMPORARY TABLE + - VIEW SELECT allow merging + - VIEW used in subquery or command support MERGE algorithm + */ + if (view_is_mergeable && + (table->select_lex->master_unit() != &old_lex->unit || + old_lex->can_use_merged()) && + !old_lex->can_not_use_merged()) + { + /* lex should contain at least one table */ + DBUG_ASSERT(view_main_select_tables != 0); + + List_iterator_fast ti(view_select->top_join_list); + + table->derived_type= VIEW_ALGORITHM_MERGE; + DBUG_PRINT("info", ("algorithm: MERGE")); + table->updatable= (table->updatable_view != 0); + table->effective_with_check= + old_lex->get_effective_with_check(table); + table->merge_underlying_list= view_main_select_tables; + + /* Fill correct wanted privileges. */ + for (tbl= view_main_select_tables; tbl; tbl= tbl->next_local) + tbl->grant.want_privilege= top_view->grant.orig_want_privilege; + + /* prepare view context */ + lex->first_select_lex()-> + context.resolve_in_table_list_only(view_main_select_tables); + lex->first_select_lex()->context.outer_context= 0; + lex->first_select_lex()->select_n_having_items+= + table->select_lex->select_n_having_items; + + table->where= view_select->where; + + /* + We can safely ignore the VIEW's ORDER BY if we merge into union + branch, as order is not important there. + */ + if (!table->select_lex->master_unit()->is_unit_op() && + table->select_lex->order_list.elements == 0) + { + table->select_lex->order_list. + push_back(&lex->first_select_lex()->order_list); + lex->first_select_lex()->order_list.empty(); + } + else + { + if (old_lex->sql_command == SQLCOM_SELECT && + (old_lex->describe & DESCRIBE_EXTENDED) && + lex->first_select_lex()->order_list.elements && + !table->select_lex->master_unit()->is_unit_op()) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_VIEW_ORDERBY_IGNORED, + ER_THD(thd, ER_VIEW_ORDERBY_IGNORED), + table->db.str, table->table_name.str); + } + } + /* + This SELECT_LEX will be linked in global SELECT_LEX list + to make it processed by mysql_handle_derived(), + but it will not be included to SELECT_LEX tree, because it + will not be executed + */ + goto ok; + } + + table->derived_type= VIEW_ALGORITHM_TMPTABLE; + DBUG_PRINT("info", ("algorithm: TEMPORARY TABLE")); + view_select->linkage= DERIVED_TABLE_TYPE; + table->updatable= 0; + table->effective_with_check= VIEW_CHECK_NONE; + + table->derived= &lex->unit; + } + else + goto err; + +ok: + /* SELECT tree link */ + lex->unit.include_down(table->select_lex); + lex->unit.slave= view_select; // fix include_down initialisation + /* global SELECT list linking */ + /* + The primary SELECT_LEX is always last (because parsed first) if WITH not + used, otherwise it is good start point for last element finding + */ + for (end= view_select; end->link_next; end= end->link_next); + end->link_next= old_lex->all_selects_list; + old_lex->all_selects_list->link_prev= &end->link_next; + old_lex->all_selects_list= lex->all_selects_list; + lex->all_selects_list->link_prev= + (st_select_lex_node**)&old_lex->all_selects_list; + +ok2: + DBUG_ASSERT(lex == thd->lex); + thd->lex= old_lex; // Needed for prepare_security + result= !table->prelocking_placeholder && table->prepare_security(thd); + + lex_end(lex); +end: + if (arena) + thd->restore_active_arena(arena, &backup); + thd->lex= old_lex; + status_var_increment(thd->status_var.opened_views); + DBUG_RETURN(result); + +err: + DBUG_ASSERT(thd->lex == table->view); + lex_end(thd->lex); + delete table->view; + table->view= 0; // now it is not VIEW placeholder + result= 1; + goto end; +} + + +/* + drop view + + SYNOPSIS + mysql_drop_view() + thd - thread handle + views - views to delete + drop_mode - cascade/check + + RETURN VALUE + FALSE OK + TRUE Error +*/ + +bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode) +{ + char path[FN_REFLEN + 1]; + TABLE_LIST *view; + String non_existant_views; + bool delete_error= FALSE, wrong_object_name= FALSE; + bool some_views_deleted= FALSE; + bool something_wrong= FALSE; + uint not_exists_count= 0, view_count= 0; + DDL_LOG_STATE ddl_log_state; + DBUG_ENTER("mysql_drop_view"); + + bzero(&ddl_log_state, sizeof(ddl_log_state)); + + /* + We can't allow dropping of unlocked view under LOCK TABLES since this + might lead to deadlock. But since we can't really lock view with LOCK + TABLES we have to simply prohibit dropping of views. + */ + + if (unlikely(thd->locked_tables_mode)) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (unlikely(lock_table_names(thd, views, 0, + thd->variables.lock_wait_timeout, 0))) + DBUG_RETURN(TRUE); + + for (view= views; view; view= view->next_local) + { + LEX_CSTRING cpath; + bool not_exist; + size_t length; + length= build_table_filename(path, sizeof(path) - 1, + view->db.str, view->table_name.str, reg_ext, 0); + lex_string_set3(&cpath, path, length); + + if ((not_exist= my_access(path, F_OK)) || !dd_frm_is_view(thd, path)) + { + char name[FN_REFLEN]; + size_t length= my_snprintf(name, sizeof(name), "%s.%s", view->db.str, + view->table_name.str); + if (non_existant_views.length()) + non_existant_views.append(','); + non_existant_views.append(name, length); + + if (!not_exist) + { + wrong_object_name= 1; + my_error(ER_WRONG_OBJECT, MYF(ME_WARNING), view->db.str, + view->table_name.str, "VIEW"); + } + else + not_exists_count++; + continue; + } + if (!view_count++) + { + if (ddl_log_drop_view_init(&ddl_log_state, &thd->db)) + DBUG_RETURN(TRUE); + } + if (ddl_log_drop_view(&ddl_log_state, &cpath, &view->db, + &view->table_name)) + DBUG_RETURN(TRUE); + debug_crash_here("ddl_log_drop_before_delete_view"); + if (unlikely(mysql_file_delete(key_file_frm, path, MYF(MY_WME)))) + delete_error= TRUE; + debug_crash_here("ddl_log_drop_after_delete_view"); + + some_views_deleted= TRUE; + + /* + For a view, there is a TABLE_SHARE object. + Remove it from the table definition cache, in case the view was cached. + */ + tdc_remove_table(thd, view->db.str, view->table_name.str); + query_cache_invalidate3(thd, view, 0); + sp_cache_invalidate(); + + backup_log_info ddl_log; + bzero(&ddl_log, sizeof(ddl_log)); + ddl_log.query= { C_STRING_WITH_LEN("DROP") }; + ddl_log.org_storage_engine_name= { C_STRING_WITH_LEN("VIEW") }; + ddl_log.org_database= view->db; + ddl_log.org_table= view->table_name; + backup_log_ddl(&ddl_log); + } + + something_wrong= (delete_error || + (!thd->lex->if_exists() && (not_exists_count || + wrong_object_name))); + + if (unlikely(non_existant_views.length())) + { + my_error(ER_UNKNOWN_VIEW, MYF(something_wrong ? 0 : ME_NOTE), + non_existant_views.c_ptr_safe()); + } + + if (some_views_deleted || !something_wrong) + { + /* if something goes wrong, bin-log with possible error code, + otherwise bin-log with error code cleared. + */ + debug_crash_here("ddl_log_drop_before_binlog"); + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); + if (unlikely(write_bin_log(thd, !something_wrong, thd->query(), + thd->query_length()))) + something_wrong= 1; + thd->binlog_xid= 0; + debug_crash_here("ddl_log_drop_after_binlog"); + } + ddl_log_complete(&ddl_log_state); + + if (unlikely(something_wrong)) + { + DBUG_RETURN(TRUE); + } + my_ok(thd); + DBUG_RETURN(FALSE); +} + + +/* + check of key (primary or unique) presence in updatable view + + SYNOPSIS + check_key_in_view() + thd thread handle + view view for check with opened table + + DESCRIPTION + If it is VIEW and query have LIMIT clause then check that underlying + table of view contain one of following: + 1) primary key of underlying table + 2) unique key underlying table with fields for which NULL value is + impossible + 3) all fields of underlying table + + RETURN + FALSE OK + TRUE view do not contain key or all fields +*/ + +bool check_key_in_view(THD *thd, TABLE_LIST *view) +{ + TABLE *table; + Field_translator *trans, *end_of_trans; + KEY *key_info, *key_info_end; + DBUG_ENTER("check_key_in_view"); + + /* + we do not support updatable UNIONs in VIEW, so we can check just limit of + LEX::select_lex + */ + if ((!view->view && !view->belong_to_view) || + thd->lex->sql_command == SQLCOM_INSERT || + thd->lex->first_select_lex()->limit_params.select_limit == 0) + DBUG_RETURN(FALSE); /* it is normal table or query without LIMIT */ + table= view->table; + view= view->top_table(); + trans= view->field_translation; + key_info_end= (key_info= table->key_info)+ table->s->keys; + + end_of_trans= view->field_translation_end; + DBUG_ASSERT(table != 0 && view->field_translation != 0); + + { + /* + We should be sure that all fields are ready to get keys from them, but + this operation should not have influence on Field::query_id, to avoid + marking as used fields which are not used + */ + enum_column_usage saved_column_usage= thd->column_usage; + thd->column_usage= COLUMNS_WRITE; + DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage)); + for (Field_translator *fld= trans; fld < end_of_trans; fld++) + { + if (fld->item->fix_fields_if_needed(thd, &fld->item)) + { + thd->column_usage= saved_column_usage; + DBUG_RETURN(TRUE); + } + } + thd->column_usage= saved_column_usage; + DBUG_PRINT("info", ("thd->column_usage: %d", thd->column_usage)); + } + /* Loop over all keys to see if a unique-not-null key is used */ + for (;key_info != key_info_end ; key_info++) + { + if ((key_info->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) + { + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *key_part_end= key_part + key_info->user_defined_key_parts; + + /* check that all key parts are used */ + for (;;) + { + Field_translator *k; + for (k= trans; k < end_of_trans; k++) + { + Item_field *field; + if ((field= k->item->field_for_view_update()) && + field->field == key_part->field) + break; + } + if (k == end_of_trans) + break; // Key is not possible + if (++key_part == key_part_end) + DBUG_RETURN(FALSE); // Found usable key + } + } + } + + DBUG_PRINT("info", ("checking if all fields of table are used")); + /* check all fields presence */ + { + Field **field_ptr; + Field_translator *fld; + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + for (fld= trans; fld < end_of_trans; fld++) + { + Item_field *field; + if ((field= fld->item->field_for_view_update()) && + field->field == *field_ptr) + break; + } + if (fld == end_of_trans) // If field didn't exists + { + /* + Keys or all fields of underlying tables are not found => we have + to check variable updatable_views_with_limit to decide should we + issue an error or just a warning + */ + if (thd->variables.updatable_views_with_limit) + { + /* update allowed, but issue warning */ + push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_VIEW_WITHOUT_KEY, + ER_THD(thd, ER_WARN_VIEW_WITHOUT_KEY)); + DBUG_RETURN(FALSE); + } + /* prohibit update */ + DBUG_RETURN(TRUE); + } + } + } + DBUG_RETURN(FALSE); +} + + +/* + insert fields from VIEW (MERGE algorithm) into given list + + SYNOPSIS + insert_view_fields() + thd thread handler + list list for insertion + view view for processing + + RETURN + FALSE OK + TRUE error (is not sent to client) +*/ + +bool insert_view_fields(THD *thd, List *list, TABLE_LIST *view) +{ + Field_translator *trans_end; + Field_translator *trans; + DBUG_ENTER("insert_view_fields"); + + if (!(trans= view->field_translation)) + DBUG_RETURN(FALSE); + trans_end= view->field_translation_end; + + for (Field_translator *entry= trans; entry < trans_end; entry++) + { + Item_field *fld; + if ((fld= entry->item->field_for_view_update())) + { + TABLE_SHARE *s= fld->context->table_list->table->s; + Lex_ident field_name= fld->field_name; + if (s->versioned && + (field_name.streq(s->vers_start_field()->field_name) || + field_name.streq(s->vers_end_field()->field_name))) + continue; + list->push_back(fld, thd->mem_root); + } + else + { + my_error(ER_NON_INSERTABLE_TABLE, MYF(0), view->alias.str, "INSERT"); + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + +/* + checking view md5 check sum + + SINOPSYS + view_checksum() + thd threar handler + view view for check + + RETUIRN + HA_ADMIN_OK OK + HA_ADMIN_NOT_IMPLEMENTED it is not VIEW + HA_ADMIN_WRONG_CHECKSUM check sum is wrong +*/ + +int view_checksum(THD *thd, TABLE_LIST *view) +{ + char md5[MD5_BUFF_LENGTH]; + if (!view->view || view->md5.length != VIEW_MD5_LEN) + return HA_ADMIN_NOT_IMPLEMENTED; + view->calc_md5(md5); + return (strncmp(md5, view->md5.str, VIEW_MD5_LEN) ? + HA_ADMIN_WRONG_CHECKSUM : + HA_ADMIN_OK); +} + +/** + Check view + + @param thd thread handle + @param view view for check + @param check_opt check options + + @retval HA_ADMIN_OK OK + @retval HA_ADMIN_NOT_IMPLEMENTED it is not VIEW + @retval HA_ADMIN_WRONG_CHECKSUM check sum is wrong +*/ +int view_check(THD *thd, TABLE_LIST *view, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("view_check"); + + int res= view_checksum(thd, view); + if (res != HA_ADMIN_OK) + DBUG_RETURN(res); + + if (((check_opt->sql_flags & TT_FOR_UPGRADE) && !view->mariadb_version)) + DBUG_RETURN(HA_ADMIN_NEEDS_UPGRADE); + + DBUG_RETURN(HA_ADMIN_OK); +} + + +/** + Repair view + + @param thd thread handle + @param view view for check + @param check_opt check options + + @retval HA_ADMIN_OK OK + @retval HA_ADMIN_NOT_IMPLEMENTED it is not VIEW + @retval HA_ADMIN_WRONG_CHECKSUM check sum is wrong +*/ + +int view_repair(THD *thd, TABLE_LIST *view, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("view_repair"); + bool swap_alg= (check_opt->sql_flags & TT_FROM_MYSQL); + bool wrong_checksum= view_checksum(thd, view) != HA_ADMIN_OK; + int ret; + if (wrong_checksum || !view->mariadb_version) + { + ret= mariadb_fix_view(thd, view, wrong_checksum, swap_alg); + DBUG_RETURN(ret); + } + DBUG_RETURN(HA_ADMIN_OK); +} + +/* + rename view + + Synopsis: + renames a view + + Parameters: + thd thread handler + new_db new name of database + new_name new name of view + view view + + Return values: + FALSE Ok + TRUE Error +*/ +bool +mysql_rename_view(THD *thd, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, + const LEX_CSTRING *old_db, + const LEX_CSTRING *old_name) +{ + LEX_CSTRING pathstr; + File_parser *parser; + char path_buff[FN_REFLEN + 1]; + bool error= TRUE; + DBUG_ENTER("mysql_rename_view"); + + pathstr.str= (char *) path_buff; + pathstr.length= build_table_filename(path_buff, sizeof(path_buff) - 1, + old_db->str, old_name->str, + reg_ext, 0); + + if ((parser= sql_parse_prepare(&pathstr, thd->mem_root, 1)) && + is_equal(&view_type, parser->type())) + { + TABLE_LIST view_def; + char dir_buff[FN_REFLEN + 1]; + LEX_CSTRING dir, file; + + /* + To be PS-friendly we should either to restore state of + TABLE_LIST object pointed by 'view' after using it for + view definition parsing or use temporary 'view_def' + object for it. + */ + view_def.reset(); + view_def.hr_timestamp.str= view_def.timestamp_buffer; + view_def.view_suid= TRUE; + + /* get view definition and source */ + if (parser->parse((uchar*)&view_def, thd->mem_root, view_parameters, + array_elements(view_parameters)-1, + &file_parser_dummy_hook)) + goto err; + + /* rename view and it's backups */ + if (rename_in_schema_file(thd, old_db->str, old_name->str, + new_db->str, new_name->str)) + goto err; + debug_crash_here("rename_view_after_rename_schema_file"); + + dir.str= dir_buff; + dir.length= build_table_filename(dir_buff, sizeof(dir_buff) - 1, + new_db->str, "", "", 0); + + pathstr.str= path_buff; + pathstr.length= build_table_filename(path_buff, sizeof(path_buff) - 1, + new_db->str, new_name->str, reg_ext, 0); + + file.str= pathstr.str + dir.length; + file.length= pathstr.length - dir.length; + + if (sql_create_definition_file(&dir, &file, view_file_type, + (uchar*)&view_def, view_parameters)) + { + /* restore renamed view in case of error */ + rename_in_schema_file(thd, new_db->str, new_name->str, old_db->str, + old_name->str); + goto err; + } + } + else + DBUG_RETURN(1); + + /* remove cache entries */ + { + char key[NAME_LEN*2+1], *ptr; + memcpy(key, old_db->str, old_db->length); + ptr= key+ old_db->length; + *ptr++= 0; + memcpy(key, old_name->str, old_name->length); + ptr= key+ old_db->length; + *ptr++= 0; + query_cache.invalidate(thd, key, (size_t) (ptr-key), 0); + } + error= FALSE; + +err: + DBUG_RETURN(error); +} diff --git a/sql/sql_view.h b/sql/sql_view.h new file mode 100644 index 00000000..1b880e43 --- /dev/null +++ b/sql/sql_view.h @@ -0,0 +1,71 @@ +#ifndef SQL_VIEW_INCLUDED +#define SQL_VIEW_INCLUDED + +/* -*- C++ -*- */ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. + Copyright (c) 2015, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "sql_class.h" /* Required by sql_lex.h */ +#include "sql_lex.h" /* enum_view_create_mode, enum_drop_mode */ + +/* Forward declarations */ + +class File_parser; + + +/* Function declarations */ + +bool create_view_precheck(THD *thd, TABLE_LIST *tables, TABLE_LIST *view, + enum_view_create_mode mode); + +bool mysql_create_view(THD *thd, TABLE_LIST *view, + enum_view_create_mode mode); + +bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table, + bool open_view_no_parse); + + +bool mysql_drop_view(THD *thd, TABLE_LIST *view, enum_drop_mode drop_mode); + +bool check_key_in_view(THD *thd, TABLE_LIST * view); + +bool insert_view_fields(THD *thd, List *list, TABLE_LIST *view); + +int view_checksum(THD *thd, TABLE_LIST *view); +int view_check(THD *thd, TABLE_LIST *view, HA_CHECK_OPT *check_opt); +int view_repair(THD *thd, TABLE_LIST *view, HA_CHECK_OPT *check_opt); + +extern TYPELIB updatable_views_with_limit_typelib; + +bool check_duplicate_names(THD *thd, List& item_list, + bool gen_unique_view_names); +bool mysql_rename_view(THD *thd, const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, + const LEX_CSTRING *old_db, + const LEX_CSTRING *old_name); + +void make_valid_column_names(THD *thd, List &item_list); + +#define VIEW_ANY_ACL (SELECT_ACL | UPDATE_ACL | INSERT_ACL | DELETE_ACL) + +extern const LEX_CSTRING view_type; + +void make_valid_column_names(List &item_list); + +bool mariadb_view_version_get(TABLE_SHARE *share); + +#endif /* SQL_VIEW_INCLUDED */ diff --git a/sql/sql_window.cc b/sql/sql_window.cc new file mode 100644 index 00000000..8fe2890d --- /dev/null +++ b/sql/sql_window.cc @@ -0,0 +1,3362 @@ +/* + Copyright (c) 2016, 2022 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mariadb.h" +#include "sql_parse.h" +#include "sql_select.h" +#include "sql_list.h" +#include "item_windowfunc.h" +#include "filesort.h" +#include "sql_base.h" +#include "sql_window.h" + + +bool +Window_spec::check_window_names(List_iterator_fast &it) +{ + if (window_names_are_checked) + return false; + const char *name= this->name(); + const char *ref_name= window_reference(); + it.rewind(); + Window_spec *win_spec; + while((win_spec= it++) && win_spec != this) + { + const char *win_spec_name= win_spec->name(); + if (!win_spec_name) + break; + if (name && my_strcasecmp(system_charset_info, name, win_spec_name) == 0) + { + my_error(ER_DUP_WINDOW_NAME, MYF(0), name); + return true; + } + if (ref_name && + my_strcasecmp(system_charset_info, ref_name, win_spec_name) == 0) + { + if (partition_list->elements) + { + my_error(ER_PARTITION_LIST_IN_REFERENCING_WINDOW_SPEC, MYF(0), + ref_name); + return true; + } + if (win_spec->order_list->elements && order_list->elements) + { + my_error(ER_ORDER_LIST_IN_REFERENCING_WINDOW_SPEC, MYF(0), ref_name); + return true; + } + if (win_spec->window_frame) + { + my_error(ER_WINDOW_FRAME_IN_REFERENCED_WINDOW_SPEC, MYF(0), ref_name); + return true; + } + referenced_win_spec= win_spec; + if (partition_list->elements == 0) + partition_list= win_spec->partition_list; + if (order_list->elements == 0) + order_list= win_spec->order_list; + } + } + if (ref_name && !referenced_win_spec) + { + my_error(ER_WRONG_WINDOW_SPEC_NAME, MYF(0), ref_name); + return true; + } + window_names_are_checked= true; + return false; +} + +void +Window_spec::print(String *str, enum_query_type query_type) +{ + str->append('('); + print_partition(str, query_type); + print_order(str, query_type); + + if (window_frame) + window_frame->print(str, query_type); + str->append(')'); +} + +void +Window_spec::print_partition(String *str, enum_query_type query_type) +{ + if (partition_list->first) + { + str->append(STRING_WITH_LEN(" partition by ")); + st_select_lex::print_order(str, partition_list->first, query_type); + } +} + +void +Window_spec::print_order(String *str, enum_query_type query_type) +{ + if (order_list->first) + { + str->append(STRING_WITH_LEN(" order by ")); + st_select_lex::print_order(str, order_list->first, query_type); + } +} + +bool +Window_frame::check_frame_bounds() +{ + if ((top_bound->is_unbounded() && + top_bound->precedence_type == Window_frame_bound::FOLLOWING) || + (bottom_bound->is_unbounded() && + bottom_bound->precedence_type == Window_frame_bound::PRECEDING) || + (top_bound->precedence_type == Window_frame_bound::CURRENT && + bottom_bound->precedence_type == Window_frame_bound::PRECEDING) || + (bottom_bound->precedence_type == Window_frame_bound::CURRENT && + top_bound->precedence_type == Window_frame_bound::FOLLOWING)) + { + my_error(ER_BAD_COMBINATION_OF_WINDOW_FRAME_BOUND_SPECS, MYF(0)); + return true; + } + + return false; +} + + +void +Window_frame::print(String *str, enum_query_type query_type) +{ + switch (units) { + case UNITS_ROWS: + str->append(STRING_WITH_LEN(" rows ")); + break; + case UNITS_RANGE: + str->append(STRING_WITH_LEN(" range ")); + break; + default: + DBUG_ASSERT(0); + } + + str->append(STRING_WITH_LEN("between ")); + top_bound->print(str, query_type); + str->append(STRING_WITH_LEN(" and ")); + bottom_bound->print(str, query_type); + + if (exclusion != EXCL_NONE) + { + str->append(STRING_WITH_LEN(" exclude ")); + switch (exclusion) { + case EXCL_CURRENT_ROW: + str->append(STRING_WITH_LEN(" current row ")); + break; + case EXCL_GROUP: + str->append(STRING_WITH_LEN(" group ")); + break; + case EXCL_TIES: + str->append(STRING_WITH_LEN(" ties ")); + break; + default: + DBUG_ASSERT(0); + ; + } + } +} + + +void +Window_frame_bound::print(String *str, enum_query_type query_type) +{ + if (precedence_type == CURRENT) + { + str->append(STRING_WITH_LEN(" current row ")); + return; + } + if (is_unbounded()) + str->append(STRING_WITH_LEN(" unbounded ")); + else + offset->print(str ,query_type); + switch (precedence_type) { + case PRECEDING: + str->append(STRING_WITH_LEN(" preceding ")); + break; + case FOLLOWING: + str->append(STRING_WITH_LEN(" following ")); + break; + default: + DBUG_ASSERT(0); + } +} + +/* + Setup window functions in a select +*/ + +int +setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List &fields, List &all_fields, + List &win_specs, List &win_funcs) +{ + Window_spec *win_spec; + DBUG_ENTER("setup_windows"); + List_iterator it(win_specs); + + if (!thd->lex->current_select->is_win_spec_list_built) + { + + /* + Move all unnamed specifications after the named ones. + We could have avoided it if we had built two separate lists for + named and unnamed specifications. + */ + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + uint i = 0; + uint elems= win_specs.elements; + while ((win_spec= it++) && i++ < elems) + { + if (win_spec->name() == NULL) + { + it.remove(); + win_specs.push_back(win_spec); + } + } + if (arena) + thd->restore_active_arena(arena, &backup); + + it.rewind(); + + thd->lex->current_select->is_win_spec_list_built= true; + } + + List_iterator_fast itp(win_specs); + + while ((win_spec= it++)) + { + bool hidden_group_fields; + if (win_spec->check_window_names(itp) || + setup_group(thd, ref_pointer_array, tables, fields, all_fields, + win_spec->partition_list->first, &hidden_group_fields, + true) || + setup_order(thd, ref_pointer_array, tables, fields, all_fields, + win_spec->order_list->first, true) || + (win_spec->window_frame && + win_spec->window_frame->check_frame_bounds())) + { + DBUG_RETURN(1); + } + + if (win_spec->window_frame && + win_spec->window_frame->exclusion != Window_frame::EXCL_NONE) + { + my_error(ER_FRAME_EXCLUSION_NOT_SUPPORTED, MYF(0)); + DBUG_RETURN(1); + } + /* + For "win_func() OVER (ORDER BY order_list RANGE BETWEEN ...)", + - ORDER BY order_list must not be ommitted + - the list must have a single element. + */ + if (win_spec->window_frame && + win_spec->window_frame->units == Window_frame::UNITS_RANGE) + { + if (win_spec->order_list->elements != 1) + { + my_error(ER_RANGE_FRAME_NEEDS_SIMPLE_ORDERBY, MYF(0)); + DBUG_RETURN(1); + } + + /* + "The declared type of SK shall be numeric, datetime, or interval" + we don't support datetime or interval, yet. + */ + Item_result rtype= win_spec->order_list->first->item[0]->result_type(); + if (rtype != REAL_RESULT && rtype != INT_RESULT && + rtype != DECIMAL_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0)); + DBUG_RETURN(1); + } + + /* + "The declared type of UVS shall be numeric if the declared type of SK + is numeric; otherwise, it shall be an interval type that may be added + to or subtracted from the declared type of SK" + */ + Window_frame_bound *bounds[]= {win_spec->window_frame->top_bound, + win_spec->window_frame->bottom_bound, + NULL}; + for (Window_frame_bound **pbound= &bounds[0]; *pbound; pbound++) + { + if (!(*pbound)->is_unbounded() && + ((*pbound)->precedence_type == Window_frame_bound::FOLLOWING || + (*pbound)->precedence_type == Window_frame_bound::PRECEDING)) + { + Item_result rtype= (*pbound)->offset->result_type(); + if (rtype != REAL_RESULT && rtype != INT_RESULT && + rtype != DECIMAL_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0)); + DBUG_RETURN(1); + } + } + } + } + + /* "ROWS PRECEDING|FOLLOWING $n" must have a numeric $n */ + if (win_spec->window_frame && + win_spec->window_frame->units == Window_frame::UNITS_ROWS) + { + Window_frame_bound *bounds[]= {win_spec->window_frame->top_bound, + win_spec->window_frame->bottom_bound, + NULL}; + for (Window_frame_bound **pbound= &bounds[0]; *pbound; pbound++) + { + if (!(*pbound)->is_unbounded() && + ((*pbound)->precedence_type == Window_frame_bound::FOLLOWING || + (*pbound)->precedence_type == Window_frame_bound::PRECEDING)) + { + Item *offset= (*pbound)->offset; + if (offset->result_type() != INT_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_ROWS_FRAME, MYF(0)); + DBUG_RETURN(1); + } + } + } + } + } + + List_iterator_fast li(win_funcs); + while (Item_window_func * win_func_item= li++) + { + if (win_func_item->check_result_type_of_order_item()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/** + @brief + Find fields common for all partition lists used in window functions + + @param thd The thread handle + + @details + This function looks for the field references in the partition lists + of all window functions used in this select that are common for + all the partition lists. The function returns an ORDER list contained + all such references.The list either is specially built by the function + or is taken directly from the first window specification. + + @retval + pointer to the first element of the ORDER list contained field + references common for all partition lists + 0 if no such reference is found. +*/ + +ORDER *st_select_lex::find_common_window_func_partition_fields(THD *thd) +{ + ORDER *ord; + Item *item; + DBUG_ASSERT(window_funcs.elements); + List_iterator_fast it(window_funcs); + Item_window_func *first_wf= it++; + if (!first_wf->window_spec->partition_list) + return 0; + List common_fields; + uint first_partition_elements= 0; + for (ord= first_wf->window_spec->partition_list->first; ord; ord= ord->next) + { + if ((*ord->item)->real_item()->type() == Item::FIELD_ITEM) + common_fields.push_back(*ord->item, thd->mem_root); + first_partition_elements++; + } + if (window_specs.elements == 1 && + common_fields.elements == first_partition_elements) + return first_wf->window_spec->partition_list->first; + List_iterator li(common_fields); + Item_window_func *wf; + while (common_fields.elements && (wf= it++)) + { + if (!wf->window_spec->partition_list) + return 0; + while ((item= li++)) + { + for (ord= wf->window_spec->partition_list->first; ord; ord= ord->next) + { + if (item->eq(*ord->item, false)) + break; + } + if (!ord) + li.remove(); + } + li.rewind(); + } + if (!common_fields.elements) + return 0; + if (common_fields.elements == first_partition_elements) + return first_wf->window_spec->partition_list->first; + SQL_I_List res_list; + for (ord= first_wf->window_spec->partition_list->first, item= li++; + ord; ord= ord->next) + { + if (item != *ord->item) + continue; + if (add_to_list(thd, res_list, item, ord->direction)) + return 0; + item= li++; + } + return res_list.first; +} + + +///////////////////////////////////////////////////////////////////////////// +// Sorting window functions to minimize the number of table scans +// performed during the computation of these functions +///////////////////////////////////////////////////////////////////////////// + +#define CMP_LT -2 // Less than +#define CMP_LT_C -1 // Less than and compatible +#define CMP_EQ 0 // Equal to +#define CMP_GT_C 1 // Greater than and compatible +#define CMP_GT 2 // Greater then + +static +int compare_order_elements(ORDER *ord1, int weight1, + ORDER *ord2, int weight2) +{ + if (*ord1->item == *ord2->item && ord1->direction == ord2->direction) + return CMP_EQ; + Item *item1= (*ord1->item)->real_item(); + Item *item2= (*ord2->item)->real_item(); + + bool item1_field= (item1->type() == Item::FIELD_ITEM); + bool item2_field= (item2->type() == Item::FIELD_ITEM); + + ptrdiff_t cmp; + if (item1_field && item2_field) + { + DBUG_ASSERT(((Item_field *) item1)->field->table == + ((Item_field *) item2)->field->table); + cmp= ((Item_field *) item1)->field->field_index - + ((Item_field *) item2)->field->field_index; + } + else if (item1_field && !item2_field) + return CMP_LT; + else if (!item1_field && item2_field) + return CMP_LT; + else + { + /* + Ok, item1_field==NULL and item2_field==NULL. + We're not able to compare Item expressions. Order them according to + their passed "weight" (which comes from Window_spec::win_spec_number): + */ + if (weight1 != weight2) + cmp= weight1 - weight2; + else + { + /* + The weight is the same. That is, the elements come from the same + window specification... This shouldn't happen. + */ + DBUG_ASSERT(0); + cmp= item1 - item2; + } + } + + if (cmp == 0) + { + if (ord1->direction == ord2->direction) + return CMP_EQ; + return ord1->direction > ord2->direction ? CMP_GT : CMP_LT; + } + else + return cmp > 0 ? CMP_GT : CMP_LT; +} + +static +int compare_order_lists(SQL_I_List *part_list1, + int spec_number1, + SQL_I_List *part_list2, + int spec_number2) +{ + if (part_list1 == part_list2) + return CMP_EQ; + ORDER *elem1= part_list1->first; + ORDER *elem2= part_list2->first; + for ( ; elem1 && elem2; elem1= elem1->next, elem2= elem2->next) + { + int cmp; + // remove all constants as we don't need them for comparision + while(elem1 && ((*elem1->item)->real_item())->const_item()) + { + elem1= elem1->next; + continue; + } + + while(elem2 && ((*elem2->item)->real_item())->const_item()) + { + elem2= elem2->next; + continue; + } + + if (!elem1 || !elem2) + break; + + if ((cmp= compare_order_elements(elem1, spec_number1, + elem2, spec_number2))) + return cmp; + } + if (elem1) + return CMP_GT_C; + if (elem2) + return CMP_LT_C; + return CMP_EQ; +} + + +static +int compare_window_frame_bounds(Window_frame_bound *win_frame_bound1, + Window_frame_bound *win_frame_bound2, + bool is_bottom_bound) +{ + int res; + if (win_frame_bound1->precedence_type != win_frame_bound2->precedence_type) + { + res= win_frame_bound1->precedence_type > win_frame_bound2->precedence_type ? + CMP_GT : CMP_LT; + if (is_bottom_bound) + res= -res; + return res; + } + + if (win_frame_bound1->is_unbounded() && win_frame_bound2->is_unbounded()) + return CMP_EQ; + + if (!win_frame_bound1->is_unbounded() && !win_frame_bound2->is_unbounded()) + { + if (win_frame_bound1->offset->eq(win_frame_bound2->offset, true)) + return CMP_EQ; + else + { + res= strcmp(win_frame_bound1->offset->name.str, + win_frame_bound2->offset->name.str); + res= res > 0 ? CMP_GT : CMP_LT; + if (is_bottom_bound) + res= -res; + return res; + } + } + + /* + Here we have: + win_frame_bound1->is_unbounded() != win_frame_bound1->is_unbounded() + */ + return is_bottom_bound != win_frame_bound1->is_unbounded() ? CMP_LT : CMP_GT; +} + + +static +int compare_window_frames(Window_frame *win_frame1, + Window_frame *win_frame2) +{ + int cmp; + + if (win_frame1 == win_frame2) + return CMP_EQ; + + if (!win_frame1) + return CMP_LT; + + if (!win_frame2) + return CMP_GT; + + if (win_frame1->units != win_frame2->units) + return win_frame1->units > win_frame2->units ? CMP_GT : CMP_LT; + + cmp= compare_window_frame_bounds(win_frame1->top_bound, + win_frame2->top_bound, + false); + if (cmp) + return cmp; + + cmp= compare_window_frame_bounds(win_frame1->bottom_bound, + win_frame2->bottom_bound, + true); + if (cmp) + return cmp; + + if (win_frame1->exclusion != win_frame2->exclusion) + return win_frame1->exclusion > win_frame2->exclusion ? CMP_GT_C : CMP_LT_C; + + return CMP_EQ; +} + +static +int compare_window_spec_joined_lists(Window_spec *win_spec1, + Window_spec *win_spec2) +{ + win_spec1->join_partition_and_order_lists(); + win_spec2->join_partition_and_order_lists(); + int cmp= compare_order_lists(win_spec1->partition_list, + win_spec1->win_spec_number, + win_spec2->partition_list, + win_spec2->win_spec_number); + win_spec1->disjoin_partition_and_order_lists(); + win_spec2->disjoin_partition_and_order_lists(); + return cmp; +} + + +static +int compare_window_funcs_by_window_specs(Item_window_func *win_func1, + Item_window_func *win_func2, + void *arg) +{ + int cmp; + Window_spec *win_spec1= win_func1->window_spec; + Window_spec *win_spec2= win_func2->window_spec; + if (win_spec1 == win_spec2) + return CMP_EQ; + cmp= compare_order_lists(win_spec1->partition_list, + win_spec1->win_spec_number, + win_spec2->partition_list, + win_spec2->win_spec_number); + if (cmp == CMP_EQ) + { + /* + Partition lists contain the same elements. + Let's use only one of the lists. + */ + if (!win_spec1->name() && win_spec2->name()) + { + win_spec1->save_partition_list= win_spec1->partition_list; + win_spec1->partition_list= win_spec2->partition_list; + } + else + { + win_spec2->save_partition_list= win_spec2->partition_list; + win_spec2->partition_list= win_spec1->partition_list; + } + + cmp= compare_order_lists(win_spec1->order_list, + win_spec1->win_spec_number, + win_spec2->order_list, + win_spec2->win_spec_number); + + if (cmp != CMP_EQ) + return cmp; + + /* + Order lists contain the same elements. + Let's use only one of the lists. + */ + if (!win_spec1->name() && win_spec2->name()) + { + win_spec1->save_order_list= win_spec2->order_list; + win_spec1->order_list= win_spec2->order_list; + } + else + { + win_spec1->save_order_list= win_spec2->order_list; + win_spec2->order_list= win_spec1->order_list; + } + + cmp= compare_window_frames(win_spec1->window_frame, + win_spec2->window_frame); + + if (cmp != CMP_EQ) + return cmp; + + /* Window frames are equal. Let's use only one of them. */ + if (!win_spec1->name() && win_spec2->name()) + win_spec1->window_frame= win_spec2->window_frame; + else + win_spec2->window_frame= win_spec1->window_frame; + + return CMP_EQ; + } + + if (cmp == CMP_GT || cmp == CMP_LT) + return cmp; + + /* one of the partitions lists is the proper beginning of the another */ + cmp= compare_window_spec_joined_lists(win_spec1, win_spec2); + + if (CMP_LT_C <= cmp && cmp <= CMP_GT_C) + cmp= win_spec1->partition_list->elements < + win_spec2->partition_list->elements ? CMP_GT_C : CMP_LT_C; + + return cmp; +} + + +typedef int (*Item_window_func_cmp)(Item_window_func *f1, + Item_window_func *f2, + void *arg); +/* + @brief + Sort window functions so that those that can be computed together are + adjacent. + + @detail + Sort window functions by their + - required sorting order, + - partition list, + - window frame compatibility. + + The changes between the groups are marked by setting item_window_func->marker. +*/ + +static +void order_window_funcs_by_window_specs(List *win_func_list) +{ + if (win_func_list->elements == 0) + return; + + bubble_sort(win_func_list, + compare_window_funcs_by_window_specs, + NULL); + + List_iterator_fast it(*win_func_list); + Item_window_func *prev= it++; + prev->marker= (MARKER_SORTORDER_CHANGE | + MARKER_PARTITION_CHANGE | + MARKER_FRAME_CHANGE); + Item_window_func *curr; + while ((curr= it++)) + { + Window_spec *win_spec_prev= prev->window_spec; + Window_spec *win_spec_curr= curr->window_spec; + curr->marker= MARKER_UNUSED; + if (!(win_spec_prev->partition_list == win_spec_curr->partition_list && + win_spec_prev->order_list == win_spec_curr->order_list)) + { + int cmp; + if (win_spec_prev->partition_list == win_spec_curr->partition_list) + cmp= compare_order_lists(win_spec_prev->order_list, + win_spec_prev->win_spec_number, + win_spec_curr->order_list, + win_spec_curr->win_spec_number); + else + cmp= compare_window_spec_joined_lists(win_spec_prev, win_spec_curr); + if (!(CMP_LT_C <= cmp && cmp <= CMP_GT_C)) + { + curr->marker= (MARKER_SORTORDER_CHANGE | + MARKER_PARTITION_CHANGE | + MARKER_FRAME_CHANGE); + } + else if (win_spec_prev->partition_list != win_spec_curr->partition_list) + { + curr->marker|= MARKER_PARTITION_CHANGE | MARKER_FRAME_CHANGE; + } + } + else if (win_spec_prev->window_frame != win_spec_curr->window_frame) + curr->marker|= MARKER_FRAME_CHANGE; + + prev= curr; + } +} + + +///////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////// +// Window Frames support +///////////////////////////////////////////////////////////////////////////// + +// note: make rr_from_pointers static again when not need it here anymore +int rr_from_pointers(READ_RECORD *info); + + +///////////////////////////////////////////////////////////////////////////// + + +/* + A cursor over a sequence of rowids. One can + - Move to next rowid + - jump to given number in the sequence + - Know the number of the current rowid (i.e. how many rowids have been read) +*/ + +class Rowid_seq_cursor +{ +public: + Rowid_seq_cursor() : io_cache(NULL), ref_buffer(0) {} + virtual ~Rowid_seq_cursor() + { + if (ref_buffer) + my_free(ref_buffer); + if (io_cache) + { + end_slave_io_cache(io_cache); + my_free(io_cache); + io_cache= NULL; + } + } + +private: + /* Length of one rowid element */ + size_t ref_length; + + /* If io_cache=!NULL, use it */ + IO_CACHE *io_cache; + uchar *ref_buffer; /* Buffer for the last returned rowid */ + ha_rows rownum; /* Number of the rowid that is about to be returned */ + ha_rows current_ref_buffer_rownum; + bool ref_buffer_valid; + + /* The following are used when we are reading from an array of pointers */ + uchar *cache_start; + uchar *cache_pos; + uchar *cache_end; +public: + + void init(READ_RECORD *info) + { + ref_length= info->ref_length; + if (info->read_record_func == rr_from_pointers) + { + io_cache= NULL; + cache_start= info->cache_pos; + cache_pos= info->cache_pos; + cache_end= info->cache_end; + } + else + { + //DBUG_ASSERT(info->read_record == rr_from_tempfile); + rownum= 0; + io_cache= (IO_CACHE*)my_malloc(PSI_INSTRUMENT_ME, sizeof(IO_CACHE), MYF(0)); + init_slave_io_cache(info->io_cache, io_cache); + + ref_buffer= (uchar*)my_malloc(PSI_INSTRUMENT_ME, ref_length, MYF(0)); + ref_buffer_valid= false; + } + } + + virtual int next() + { + /* Allow multiple next() calls in EOF state. */ + if (at_eof()) + return -1; + + if (io_cache) + { + rownum++; + } + else + { + cache_pos+= ref_length; + DBUG_ASSERT(cache_pos <= cache_end); + } + return 0; + } + + virtual int prev() + { + if (io_cache) + { + if (rownum == 0) + return -1; + + rownum--; + return 0; + } + else + { + /* Allow multiple prev() calls when positioned at the start. */ + if (cache_pos == cache_start) + return -1; + cache_pos-= ref_length; + DBUG_ASSERT(cache_pos >= cache_start); + return 0; + } + } + + ha_rows get_rownum() const + { + if (io_cache) + return rownum; + else + return (cache_pos - cache_start) / ref_length; + } + + void move_to(ha_rows row_number) + { + if (io_cache) + { + rownum= row_number; + } + else + { + cache_pos= MY_MIN(cache_end, cache_start + row_number * ref_length); + DBUG_ASSERT(cache_pos <= cache_end); + } + } + +protected: + bool at_eof() + { + if (io_cache) + { + return rownum * ref_length >= io_cache->end_of_file; + } + else + return (cache_pos == cache_end); + } + + bool get_curr_rowid(uchar **row_id) + { + if (io_cache) + { + DBUG_ASSERT(!at_eof()); + if (!ref_buffer_valid || current_ref_buffer_rownum != rownum) + { + seek_io_cache(io_cache, rownum * ref_length); + if (my_b_read(io_cache,ref_buffer,ref_length)) + { + /* Error reading from file. */ + return true; + } + ref_buffer_valid= true; + current_ref_buffer_rownum = rownum; + } + *row_id = ref_buffer; + return false; + } + else + { + *row_id= cache_pos; + return false; + } + } +}; + + +/* + Cursor which reads from rowid sequence and also retrieves table rows. +*/ + +class Table_read_cursor : public Rowid_seq_cursor +{ +public: + virtual ~Table_read_cursor() = default; + + void init(READ_RECORD *info) + { + Rowid_seq_cursor::init(info); + table= info->table; + record= info->record(); + } + + virtual int fetch() + { + if (at_eof()) + return -1; + + uchar* curr_rowid; + if (get_curr_rowid(&curr_rowid)) + return -1; + return table->file->ha_rnd_pos(record, curr_rowid); + } + +private: + /* The table that is acccesed by this cursor. */ + TABLE *table; + /* Buffer where to store the table's record data. */ + uchar *record; + + // TODO(spetrunia): should move_to() also read row here? +}; + + +/* + A cursor which only moves within a partition. The scan stops at the partition + end, and it needs an explicit command to move to the next partition. + + This cursor can not move backwards. +*/ + +class Partition_read_cursor : public Table_read_cursor +{ +public: + Partition_read_cursor(THD *thd, SQL_I_List *partition_list) : + bound_tracker(thd, partition_list) {} + + void init(READ_RECORD *info) + { + Table_read_cursor::init(info); + bound_tracker.init(); + end_of_partition= false; + } + + /* + Informs the cursor that we need to move into the next partition. + The next partition is provided in two ways: + - in table->record[0].. + - rownum parameter has the row number. + */ + void on_next_partition(ha_rows rownum) + { + /* Remember the sort key value from the new partition */ + move_to(rownum); + bound_tracker.check_if_next_group(); + end_of_partition= false; + + } + + /* + This returns -1 when end of partition was reached. + */ + int next() + { + int res; + if (end_of_partition) + return -1; + + if ((res= Table_read_cursor::next()) || + (res= fetch())) + { + /* TODO(cvicentiu) This does not consider table read failures. + Perhaps assuming end of table like this is fine in that case. */ + + /* This row is the final row in the table. To maintain semantics + that cursors always point to the last valid row, move back one step, + but mark end_of_partition as true. */ + Table_read_cursor::prev(); + end_of_partition= true; + return res; + } + + if (bound_tracker.compare_with_cache()) + { + /* This row is part of a new partition, don't move + forward any more untill we get informed of a new partition. */ + Table_read_cursor::prev(); + end_of_partition= true; + return -1; + } + return 0; + } + +private: + Group_bound_tracker bound_tracker; + bool end_of_partition; +}; + + + +///////////////////////////////////////////////////////////////////////////// + +/* + Window frame bound cursor. Abstract interface. + + @detail + The cursor moves within the partition that the current row is in. + It may be ahead or behind the current row. + + The cursor also assumes that the current row moves forward through the + partition and will move to the next adjacent partition after this one. + + List of all cursor classes: + Frame_cursor + Frame_range_n_top + Frame_range_n_bottom + + Frame_range_current_row_top + Frame_range_current_row_bottom + + Frame_n_rows_preceding + Frame_n_rows_following + + Frame_rows_current_row_top = Frame_n_rows_preceding(0) + Frame_rows_current_row_bottom + + // These handle both RANGE and ROWS-type bounds + Frame_unbounded_preceding + Frame_unbounded_following + + // This is not used as a frame bound, it counts rows in the partition: + Frame_unbounded_following_set_count : public Frame_unbounded_following + + @todo + - if we want to allocate this on the MEM_ROOT we should make sure + it is not re-allocated for every subquery execution. +*/ + +class Frame_cursor : public Sql_alloc +{ +public: + Frame_cursor() : sum_functions(), perform_no_action(false) {} + + virtual void init(READ_RECORD *info) {}; + + bool add_sum_func(Item_sum* item) + { + return sum_functions.push_back(item); + } + /* + Current row has moved to the next partition and is positioned on the first + row there. Position the frame bound accordingly. + + @param first - TRUE means this is the first partition + @param item - Put or remove rows from there. + + @detail + - if first==false, the caller guarantees that tbl->record[0] points at the + first row in the new partition. + - if first==true, we are just starting in the first partition and no such + guarantee is provided. + + - The callee may move tbl->file and tbl->record[0] to point to some other + row. + */ + virtual void pre_next_partition(ha_rows rownum) {}; + virtual void next_partition(ha_rows rownum)=0; + + /* + The current row has moved one row forward. + Move this frame bound accordingly, and update the value of aggregate + function as necessary. + */ + virtual void pre_next_row() {}; + virtual void next_row()=0; + + virtual bool is_outside_computation_bounds() const { return false; }; + + virtual ~Frame_cursor() = default; + + /* + Regular frame cursors add or remove values from the sum functions they + manage. By calling this method, they will only perform the required + movement within the table, but no adding/removing will happen. + */ + void set_no_action() + { + perform_no_action= true; + } + + /* Retrieves the row number that this cursor currently points at. */ + virtual ha_rows get_curr_rownum() const= 0; + +protected: + inline void add_value_to_items() + { + if (perform_no_action) + return; + + List_iterator_fast it(sum_functions); + Item_sum *item_sum; + while ((item_sum= it++)) + { + item_sum->add(); + } + } + + inline void remove_value_from_items() + { + if (perform_no_action) + return; + + List_iterator_fast it(sum_functions); + Item_sum *item_sum; + while ((item_sum= it++)) + { + item_sum->remove(); + } + } + + /* Clear all sum functions handled by this cursor. */ + void clear_sum_functions() + { + List_iterator_fast iter_sum_func(sum_functions); + Item_sum *sum_func; + while ((sum_func= iter_sum_func++)) + { + sum_func->clear(); + } + } + + /* Sum functions that this cursor handles. */ + List sum_functions; + +private: + bool perform_no_action; +}; + +/* + A class that owns cursor objects associated with a specific window function. +*/ +class Cursor_manager +{ +public: + bool add_cursor(Frame_cursor *cursor) + { + return cursors.push_back(cursor); + } + + void initialize_cursors(READ_RECORD *info) + { + List_iterator_fast iter(cursors); + Frame_cursor *fc; + while ((fc= iter++)) + fc->init(info); + } + + void notify_cursors_partition_changed(ha_rows rownum) + { + List_iterator_fast iter(cursors); + Frame_cursor *cursor; + while ((cursor= iter++)) + cursor->pre_next_partition(rownum); + + iter.rewind(); + while ((cursor= iter++)) + cursor->next_partition(rownum); + } + + void notify_cursors_next_row() + { + List_iterator_fast iter(cursors); + Frame_cursor *cursor; + while ((cursor= iter++)) + cursor->pre_next_row(); + + iter.rewind(); + while ((cursor= iter++)) + cursor->next_row(); + } + + ~Cursor_manager() { cursors.delete_elements(); } + +private: + /* List of the cursors that this manager owns. */ + List cursors; +}; + + + +////////////////////////////////////////////////////////////////////////////// +// RANGE-type frames +////////////////////////////////////////////////////////////////////////////// + +/* + Frame_range_n_top handles the top end of RANGE-type frame. + + That is, it handles: + RANGE BETWEEN n PRECEDING AND ... + RANGE BETWEEN n FOLLOWING AND ... + + Top of the frame doesn't need to check for partition end, since bottom will + reach it before. +*/ + +class Frame_range_n_top : public Frame_cursor +{ + Partition_read_cursor cursor; + + Cached_item_item *range_expr; + + Item *n_val; + Item *item_add; + + const bool is_preceding; + + bool end_of_partition; + + /* + 1 when order_list uses ASC ordering + -1 when order_list uses DESC ordering + */ + int order_direction; +public: + Frame_range_n_top(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list, + bool is_preceding_arg, Item *n_val_arg) : + cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), + is_preceding(is_preceding_arg) + { + DBUG_ASSERT(order_list->elements == 1); + Item *src_expr= order_list->first->item[0]; + if (order_list->first->direction == ORDER::ORDER_ASC) + order_direction= 1; + else + order_direction= -1; + + range_expr= (Cached_item_item*) new_Cached_item(thd, src_expr, FALSE); + + bool use_minus= is_preceding; + if (order_direction == -1) + use_minus= !use_minus; + + if (use_minus) + item_add= new (thd->mem_root) Item_func_minus(thd, src_expr, n_val); + else + item_add= new (thd->mem_root) Item_func_plus(thd, src_expr, n_val); + + item_add->fix_fields(thd, &item_add); + } + + void init(READ_RECORD *info) + { + cursor.init(info); + } + + void pre_next_partition(ha_rows rownum) + { + // Save the value of FUNC(current_row) + range_expr->fetch_value_from(item_add); + + cursor.on_next_partition(rownum); + end_of_partition= false; + } + + void next_partition(ha_rows rownum) + { + walk_till_non_peer(); + } + + void pre_next_row() + { + if (end_of_partition) + return; + range_expr->fetch_value_from(item_add); + } + + void next_row() + { + if (end_of_partition) + return; + /* + Ok, our cursor is at the first row R where + (prev_row + n) >= R + We need to check about the current row. + */ + walk_till_non_peer(); + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + + bool is_outside_computation_bounds() const + { + if (end_of_partition) + return true; + return false; + } + +private: + void walk_till_non_peer() + { + if (cursor.fetch()) // ERROR + return; + // Current row is not a peer. + if (order_direction * range_expr->cmp_read_only() <= 0) + return; + remove_value_from_items(); + + int res; + while (!(res= cursor.next())) + { + /* Note, no need to fetch the value explicitly here. The partition + read cursor will fetch it to check if the partition has changed. + TODO(cvicentiu) make this piece of information not necessary by + reimplementing Partition_read_cursor. + */ + if (order_direction * range_expr->cmp_read_only() <= 0) + break; + remove_value_from_items(); + } + if (res) + end_of_partition= true; + } + +}; + + +/* + Frame_range_n_bottom handles bottom end of RANGE-type frame. + + That is, it handles frame bounds in form: + RANGE BETWEEN ... AND n PRECEDING + RANGE BETWEEN ... AND n FOLLOWING + + Bottom end moves first so it needs to check for partition end + (todo: unless it's PRECEDING and in that case it doesnt) + (todo: factor out common parts with Frame_range_n_top into + a common ancestor) +*/ + +class Frame_range_n_bottom: public Frame_cursor +{ + Partition_read_cursor cursor; + + Cached_item_item *range_expr; + + Item *n_val; + Item *item_add; + + const bool is_preceding; + + bool end_of_partition; + + /* + 1 when order_list uses ASC ordering + -1 when order_list uses DESC ordering + */ + int order_direction; +public: + Frame_range_n_bottom(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list, + bool is_preceding_arg, Item *n_val_arg) : + cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), + is_preceding(is_preceding_arg), added_values(false) + { + DBUG_ASSERT(order_list->elements == 1); + Item *src_expr= order_list->first->item[0]; + + if (order_list->first->direction == ORDER::ORDER_ASC) + order_direction= 1; + else + order_direction= -1; + + range_expr= (Cached_item_item*) new_Cached_item(thd, src_expr, FALSE); + + bool use_minus= is_preceding; + if (order_direction == -1) + use_minus= !use_minus; + + if (use_minus) + item_add= new (thd->mem_root) Item_func_minus(thd, src_expr, n_val); + else + item_add= new (thd->mem_root) Item_func_plus(thd, src_expr, n_val); + + item_add->fix_fields(thd, &item_add); + } + + void init(READ_RECORD *info) + { + cursor.init(info); + } + + void pre_next_partition(ha_rows rownum) + { + // Save the value of FUNC(current_row) + range_expr->fetch_value_from(item_add); + + cursor.on_next_partition(rownum); + end_of_partition= false; + added_values= false; + } + + void next_partition(ha_rows rownum) + { + cursor.move_to(rownum); + walk_till_non_peer(); + } + + void pre_next_row() + { + if (end_of_partition) + return; + range_expr->fetch_value_from(item_add); + } + + void next_row() + { + if (end_of_partition) + return; + /* + Ok, our cursor is at the first row R where + (prev_row + n) >= R + We need to check about the current row. + */ + walk_till_non_peer(); + } + + bool is_outside_computation_bounds() const + { + if (!added_values) + return true; + return false; + } + + ha_rows get_curr_rownum() const + { + if (end_of_partition) + return cursor.get_rownum(); // Cursor does not pass over partition bound. + else + return cursor.get_rownum() - 1; // Cursor is placed on first non peer. + } + +private: + bool added_values; + + void walk_till_non_peer() + { + cursor.fetch(); + // Current row is not a peer. + if (order_direction * range_expr->cmp_read_only() < 0) + return; + + add_value_to_items(); // Add current row. + added_values= true; + int res; + while (!(res= cursor.next())) + { + if (order_direction * range_expr->cmp_read_only() < 0) + break; + add_value_to_items(); + } + if (res) + end_of_partition= true; + } +}; + + +/* + RANGE BETWEEN ... AND CURRENT ROW, bottom frame bound for CURRENT ROW + ... + | peer1 + | peer2 <----- current_row + | peer3 + +-peer4 <----- the cursor points here. peer4 itself is included. + nonpeer1 + nonpeer2 + + This bound moves in front of the current_row. It should be a the first row + that is still a peer of the current row. +*/ + +class Frame_range_current_row_bottom: public Frame_cursor +{ + Partition_read_cursor cursor; + + Group_bound_tracker peer_tracker; + + bool dont_move; +public: + Frame_range_current_row_bottom(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list) : + cursor(thd, partition_list), peer_tracker(thd, order_list) + { + } + + void init(READ_RECORD *info) + { + cursor.init(info); + peer_tracker.init(); + } + + void pre_next_partition(ha_rows rownum) + { + // Save the value of the current_row + peer_tracker.check_if_next_group(); + cursor.on_next_partition(rownum); + // Add the current row now because our cursor has already seen it + add_value_to_items(); + } + + void next_partition(ha_rows rownum) + { + walk_till_non_peer(); + } + + void pre_next_row() + { + dont_move= !peer_tracker.check_if_next_group(); + } + + void next_row() + { + // Check if our cursor is pointing at a peer of the current row. + // If not, move forward until that becomes true + if (dont_move) + { + /* + Our current is not a peer of the current row. + No need to move the bound. + */ + return; + } + walk_till_non_peer(); + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + +private: + void walk_till_non_peer() + { + /* + Walk forward until we've met first row that's not a peer of the current + row + */ + while (!cursor.next()) + { + if (peer_tracker.compare_with_cache()) + { + cursor.prev(); // Move to our peer. + break; + } + + add_value_to_items(); + } + } +}; + + +/* + RANGE BETWEEN CURRENT ROW AND .... Top CURRENT ROW, RANGE-type frame bound + + nonpeer1 + nonpeer2 + +-peer1 <----- the cursor points here. peer1 itself is included. + | peer2 + | peer3 <----- current_row + | peer4 + ... + + It moves behind the current_row. It is located right after the first peer of + the current_row. +*/ + +class Frame_range_current_row_top : public Frame_cursor +{ + Group_bound_tracker bound_tracker; + + Table_read_cursor cursor; + Group_bound_tracker peer_tracker; + + bool move; +public: + Frame_range_current_row_top(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list) : + bound_tracker(thd, partition_list), cursor(), peer_tracker(thd, order_list), + move(false) + {} + + void init(READ_RECORD *info) + { + bound_tracker.init(); + + cursor.init(info); + peer_tracker.init(); + } + + void pre_next_partition(ha_rows rownum) + { + // Fetch the value from the first row + peer_tracker.check_if_next_group(); + cursor.move_to(rownum); + } + + void next_partition(ha_rows rownum) {} + + void pre_next_row() + { + // Check if the new current_row is a peer of the row that our cursor is + // pointing to. + move= peer_tracker.check_if_next_group(); + } + + void next_row() + { + if (move) + { + /* + Our cursor is pointing at the first row that was a peer of the previous + current row. Or, it was the first row in the partition. + */ + if (cursor.fetch()) + return; + + // todo: need the following check ? + if (!peer_tracker.compare_with_cache()) + return; + remove_value_from_items(); + + do + { + if (cursor.next() || cursor.fetch()) + return; + if (!peer_tracker.compare_with_cache()) + return; + remove_value_from_items(); + } + while (1); + } + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } +}; + + +///////////////////////////////////////////////////////////////////////////// +// UNBOUNDED frame bounds (shared between RANGE and ROWS) +///////////////////////////////////////////////////////////////////////////// + +/* + UNBOUNDED PRECEDING frame bound +*/ +class Frame_unbounded_preceding : public Frame_cursor +{ +public: + Frame_unbounded_preceding(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list) + {} + + void init(READ_RECORD *info) {} + + void next_partition(ha_rows rownum) + { + /* + UNBOUNDED PRECEDING frame end just stays on the first row of the + partition. We are top of the frame, so we don't need to update the sum + function. + */ + curr_rownum= rownum; + } + + void next_row() + { + /* Do nothing, UNBOUNDED PRECEDING frame end doesn't move. */ + } + + ha_rows get_curr_rownum() const + { + return curr_rownum; + } + +private: + ha_rows curr_rownum; +}; + + +/* + UNBOUNDED FOLLOWING frame bound +*/ + +class Frame_unbounded_following : public Frame_cursor +{ +protected: + Partition_read_cursor cursor; + +public: + Frame_unbounded_following(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list) : + cursor(thd, partition_list) {} + + void init(READ_RECORD *info) + { + cursor.init(info); + } + + void pre_next_partition(ha_rows rownum) + { + cursor.on_next_partition(rownum); + } + + void next_partition(ha_rows rownum) + { + /* Activate the first row */ + cursor.fetch(); + add_value_to_items(); + + /* Walk to the end of the partition, updating the SUM function */ + while (!cursor.next()) + { + add_value_to_items(); + } + } + + void next_row() + { + /* Do nothing, UNBOUNDED FOLLOWING frame end doesn't move */ + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } +}; + + +class Frame_unbounded_following_set_count : public Frame_unbounded_following +{ +public: + Frame_unbounded_following_set_count( + THD *thd, + SQL_I_List *partition_list, SQL_I_List *order_list) : + Frame_unbounded_following(thd, partition_list, order_list) {} + + void next_partition(ha_rows rownum) + { + ha_rows num_rows_in_partition= 0; + if (cursor.fetch()) + return; + num_rows_in_partition++; + + /* Walk to the end of the partition, find how many rows there are. */ + while (!cursor.next()) + num_rows_in_partition++; + set_win_funcs_row_count(num_rows_in_partition); + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + +protected: + void set_win_funcs_row_count(ha_rows num_rows_in_partition) + { + List_iterator_fast it(sum_functions); + Item_sum* item; + while ((item= it++)) + item->set_partition_row_count(num_rows_in_partition); + } +}; + +class Frame_unbounded_following_set_count_no_nulls: + public Frame_unbounded_following_set_count +{ + +public: + Frame_unbounded_following_set_count_no_nulls(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list) : + Frame_unbounded_following_set_count(thd,partition_list, order_list) + { + order_item= order_list->first->item[0]; + } + void next_partition(ha_rows rownum) + { + ha_rows num_rows_in_partition= 0; + if (cursor.fetch()) + return; + + /* Walk to the end of the partition, find how many rows there are. */ + do + { + if (!order_item->is_null()) + num_rows_in_partition++; + } while (!cursor.next()); + + set_win_funcs_row_count(num_rows_in_partition); + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + +private: + Item* order_item; +}; + +///////////////////////////////////////////////////////////////////////////// +// ROWS-type frame bounds +///////////////////////////////////////////////////////////////////////////// +/* + ROWS $n PRECEDING frame bound + +*/ +class Frame_n_rows_preceding : public Frame_cursor +{ + /* Whether this is top of the frame or bottom */ + const bool is_top_bound; + const ha_rows n_rows; + + /* Number of rows that we need to skip before our cursor starts moving */ + ha_rows n_rows_behind; + + Table_read_cursor cursor; +public: + Frame_n_rows_preceding(bool is_top_bound_arg, ha_rows n_rows_arg) : + is_top_bound(is_top_bound_arg), n_rows(n_rows_arg), n_rows_behind(0) + {} + + void init(READ_RECORD *info) + { + cursor.init(info); + } + + void next_partition(ha_rows rownum) + { + /* + Position our cursor to point at the first row in the new partition + (for rownum=0, it is already there, otherwise, it lags behind) + */ + cursor.move_to(rownum); + /* Cursor is in the same spot as current row. */ + n_rows_behind= 0; + + /* + Suppose the bound is ROWS 2 PRECEDING, and current row is row#n: + ... + n-3 + n-2 --- bound row + n-1 + n --- current_row + ... + The bound should point at row #(n-2). Bounds are inclusive, so + - bottom bound should add row #(n-2) into the window function + - top bound should remove row (#n-3) from the window function. + */ + move_cursor_if_possible(); + + } + + void next_row() + { + n_rows_behind++; + move_cursor_if_possible(); + } + + bool is_outside_computation_bounds() const + { + /* As a bottom boundary, rows have not yet been added. */ + if (!is_top_bound && n_rows - n_rows_behind) + return true; + return false; + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + +private: + void move_cursor_if_possible() + { + longlong rows_difference= n_rows - n_rows_behind; + if (rows_difference > 0) /* We still have to wait. */ + return; + + /* The cursor points to the first row in the frame. */ + if (rows_difference == 0) + { + if (!is_top_bound) + { + cursor.fetch(); + add_value_to_items(); + } + /* For top bound we don't have to remove anything as nothing was added. */ + return; + } + + /* We need to catch up by one row. */ + DBUG_ASSERT(rows_difference == -1); + + if (is_top_bound) + { + cursor.fetch(); + remove_value_from_items(); + cursor.next(); + } + else + { + cursor.next(); + cursor.fetch(); + add_value_to_items(); + } + /* We've advanced one row. We are no longer behind. */ + n_rows_behind--; + } +}; + + +/* + ROWS ... CURRENT ROW, Bottom bound. + + This case is moved to separate class because here we don't need to maintain + our own cursor, or check for partition bound. +*/ + +class Frame_rows_current_row_bottom : public Frame_cursor +{ +public: + + Frame_rows_current_row_bottom() : curr_rownum(0) {} + + void pre_next_partition(ha_rows rownum) + { + add_value_to_items(); + curr_rownum= rownum; + } + + void next_partition(ha_rows rownum) {} + + void pre_next_row() + { + /* Temp table's current row is current_row. Add it to the window func */ + add_value_to_items(); + } + + void next_row() + { + curr_rownum++; + }; + + ha_rows get_curr_rownum() const + { + return curr_rownum; + } + +private: + ha_rows curr_rownum; +}; + + +/* + ROWS-type CURRENT ROW, top bound. + + This serves for processing "ROWS BETWEEN CURRENT ROW AND ..." frames. + + n-1 + n --+ --- current_row, and top frame bound + n+1 | + ... | + + when the current_row moves to row #n, this frame bound should remove the + row #(n-1) from the window function. + + In other words, we need what "ROWS PRECEDING 0" provides. +*/ +class Frame_rows_current_row_top: public Frame_n_rows_preceding + +{ +public: + Frame_rows_current_row_top() : + Frame_n_rows_preceding(true /*top*/, 0 /* n_rows */) + {} +}; + + +/* + ROWS $n FOLLOWING frame bound. +*/ + +class Frame_n_rows_following : public Frame_cursor +{ + /* Whether this is top of the frame or bottom */ + const bool is_top_bound; + const ha_rows n_rows; + + Partition_read_cursor cursor; + bool at_partition_end; +public: + Frame_n_rows_following(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list, + bool is_top_bound_arg, ha_rows n_rows_arg) : + is_top_bound(is_top_bound_arg), n_rows(n_rows_arg), + cursor(thd, partition_list) + { + } + + void init(READ_RECORD *info) + { + cursor.init(info); + at_partition_end= false; + } + + void pre_next_partition(ha_rows rownum) + { + at_partition_end= false; + + cursor.on_next_partition(rownum); + } + + /* Move our cursor to be n_rows ahead. */ + void next_partition(ha_rows rownum) + { + if (is_top_bound) + next_part_top(rownum); + else + next_part_bottom(rownum); + } + + void next_row() + { + if (is_top_bound) + next_row_top(); + else + next_row_bottom(); + } + + bool is_outside_computation_bounds() const + { + /* + The top bound can go over the current partition. In this case, + the sum function has 0 values added to it. + */ + if (at_partition_end && is_top_bound) + return true; + return false; + } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + +private: + void next_part_top(ha_rows rownum) + { + for (ha_rows i= 0; i < n_rows; i++) + { + if (cursor.fetch()) + break; + remove_value_from_items(); + if (cursor.next()) + at_partition_end= true; + } + } + + void next_part_bottom(ha_rows rownum) + { + if (cursor.fetch()) + return; + add_value_to_items(); + + for (ha_rows i= 0; i < n_rows; i++) + { + if (cursor.next()) + { + at_partition_end= true; + break; + } + add_value_to_items(); + } + return; + } + + void next_row_top() + { + if (cursor.fetch()) // PART END OR FAILURE + { + at_partition_end= true; + return; + } + remove_value_from_items(); + if (cursor.next()) + { + at_partition_end= true; + return; + } + } + + void next_row_bottom() + { + if (at_partition_end) + return; + + if (cursor.next()) + { + at_partition_end= true; + return; + } + + add_value_to_items(); + + } +}; + +/* + A cursor that performs a table scan between two indices. The indices + are provided by the two cursors representing the top and bottom bound + of the window function's frame definition. + + Each scan clears the sum function. + + NOTE: + The cursor does not alter the top and bottom cursors. + This type of cursor is expensive computational wise. This is only to be + used when the sum functions do not support removal. +*/ +class Frame_scan_cursor : public Frame_cursor +{ +public: + Frame_scan_cursor(const Frame_cursor &top_bound, + const Frame_cursor &bottom_bound) : + top_bound(top_bound), bottom_bound(bottom_bound) {} + + void init(READ_RECORD *info) + { + cursor.init(info); + } + + void pre_next_partition(ha_rows rownum) + { + /* TODO(cvicentiu) Sum functions get cleared on next partition anyway during + the window function computation algorithm. Either perform this only in + cursors, or remove it from pre_next_partition. + */ + curr_rownum= rownum; + clear_sum_functions(); + } + + void next_partition(ha_rows rownum) + { + compute_values_for_current_row(); + } + + void pre_next_row() + { + clear_sum_functions(); + } + + void next_row() + { + curr_rownum++; + compute_values_for_current_row(); + } + + ha_rows get_curr_rownum() const + { + return curr_rownum; + } + +private: + const Frame_cursor &top_bound; + const Frame_cursor &bottom_bound; + Table_read_cursor cursor; + ha_rows curr_rownum; + + /* Scan the rows between the top bound and bottom bound. Add all the values + between them, top bound row and bottom bound row inclusive. */ + void compute_values_for_current_row() + { + if (top_bound.is_outside_computation_bounds() || + bottom_bound.is_outside_computation_bounds()) + return; + + ha_rows start_rownum= top_bound.get_curr_rownum(); + ha_rows bottom_rownum= bottom_bound.get_curr_rownum(); + DBUG_PRINT("info", ("COMPUTING (%llu %llu)", start_rownum, bottom_rownum)); + + cursor.move_to(start_rownum); + + for (ha_rows idx= start_rownum; idx <= bottom_rownum; idx++) + { + if (cursor.fetch()) //EOF + break; + add_value_to_items(); + if (cursor.next()) // EOF + break; + } + } +}; + +/* A cursor that follows a target cursor. Each time a new row is added, + the window functions are cleared and only have the row at which the target + is point at added to them. + + The window functions are cleared if the bounds or the position cursors are + outside computational bounds. +*/ +class Frame_positional_cursor : public Frame_cursor +{ + public: + Frame_positional_cursor(const Frame_cursor &position_cursor) : + position_cursor(position_cursor), top_bound(NULL), + bottom_bound(NULL), offset(NULL), overflowed(false), + negative_offset(false) {} + + Frame_positional_cursor(const Frame_cursor &position_cursor, + const Frame_cursor &top_bound, + const Frame_cursor &bottom_bound, + Item &offset, + bool negative_offset) : + position_cursor(position_cursor), top_bound(&top_bound), + bottom_bound(&bottom_bound), offset(&offset), + negative_offset(negative_offset) {} + + void init(READ_RECORD *info) + { + cursor.init(info); + } + + void pre_next_partition(ha_rows rownum) + { + /* The offset is dependant on the current row values. We can only get + * it here accurately. When fetching other rows, it changes. */ + save_offset_value(); + } + + void next_partition(ha_rows rownum) + { + save_positional_value(); + } + + void pre_next_row() + { + /* The offset is dependant on the current row values. We can only get + * it here accurately. When fetching other rows, it changes. */ + save_offset_value(); + } + + void next_row() + { + save_positional_value(); + } + + ha_rows get_curr_rownum() const + { + return position_cursor.get_curr_rownum(); + } + +private: + /* Check if a our position is within bounds. + * The position is passed as a parameter to avoid recalculating it. */ + bool position_is_within_bounds() + { + if (!offset) + return !position_cursor.is_outside_computation_bounds(); + + if (overflowed) + return false; + + /* No valid bound to compare to. */ + if (position_cursor.is_outside_computation_bounds() || + top_bound->is_outside_computation_bounds() || + bottom_bound->is_outside_computation_bounds()) + return false; + + /* We are over the bound. */ + if (position < top_bound->get_curr_rownum()) + return false; + if (position > bottom_bound->get_curr_rownum()) + return false; + + return true; + } + + /* Get the current position, accounting for the offset value, if present. + NOTE: This function does not check over/underflow. + */ + void get_current_position() + { + position = position_cursor.get_curr_rownum(); + overflowed= false; + if (offset) + { + if (offset_value < 0 && + position + offset_value > position) + { + overflowed= true; + } + if (offset_value > 0 && + position + offset_value < position) + { + overflowed= true; + } + position += offset_value; + } + } + + void save_offset_value() + { + if (offset) + offset_value= offset->val_int() * (negative_offset ? -1 : 1); + else + offset_value= 0; + } + + void save_positional_value() + { + get_current_position(); + if (!position_is_within_bounds()) + clear_sum_functions(); + else + { + cursor.move_to(position); + cursor.fetch(); + add_value_to_items(); + } + } + + const Frame_cursor &position_cursor; + const Frame_cursor *top_bound; + const Frame_cursor *bottom_bound; + Item *offset; + Table_read_cursor cursor; + ha_rows position; + longlong offset_value; + bool overflowed; + + bool negative_offset; +}; + + +/* + Get a Frame_cursor for a frame bound. This is a "factory function". +*/ +Frame_cursor *get_frame_cursor(THD *thd, Window_spec *spec, bool is_top_bound) +{ + Window_frame *frame= spec->window_frame; + if (!frame) + { + /* + The docs say this about the lack of frame clause: + + Let WD be a window structure descriptor. + ... + If WD has no window framing clause, then + Case: + i) If the window ordering clause of WD is not present, then WF is the + window partition of R. + ii) Otherwise, WF consists of all rows of the partition of R that + precede R or are peers of R in the window ordering of the window + partition defined by the window ordering clause. + + For case #ii, the frame bounds essentially are "RANGE BETWEEN UNBOUNDED + PRECEDING AND CURRENT ROW". + For the case #i, without ordering clause all rows are considered peers, + so again the same frame bounds can be used. + */ + if (is_top_bound) + return new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + else + return new Frame_range_current_row_bottom(thd, + spec->partition_list, + spec->order_list); + } + + Window_frame_bound *bound= is_top_bound? frame->top_bound : + frame->bottom_bound; + + if (bound->precedence_type == Window_frame_bound::PRECEDING || + bound->precedence_type == Window_frame_bound::FOLLOWING) + { + bool is_preceding= (bound->precedence_type == + Window_frame_bound::PRECEDING); + + if (bound->offset == NULL) /* this is UNBOUNDED */ + { + /* The following serve both RANGE and ROWS: */ + if (is_preceding) + return new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + + return new Frame_unbounded_following(thd, + spec->partition_list, + spec->order_list); + } + + if (frame->units == Window_frame::UNITS_ROWS) + { + ha_rows n_rows= bound->offset->val_int(); + /* These should be handled in the parser */ + DBUG_ASSERT(!bound->offset->null_value); + DBUG_ASSERT((longlong) n_rows >= 0); + if (is_preceding) + return new Frame_n_rows_preceding(is_top_bound, n_rows); + + return new Frame_n_rows_following( + thd, spec->partition_list, spec->order_list, + is_top_bound, n_rows); + } + else + { + if (is_top_bound) + return new Frame_range_n_top( + thd, spec->partition_list, spec->order_list, + is_preceding, bound->offset); + + return new Frame_range_n_bottom(thd, + spec->partition_list, spec->order_list, + is_preceding, bound->offset); + } + } + + if (bound->precedence_type == Window_frame_bound::CURRENT) + { + if (frame->units == Window_frame::UNITS_ROWS) + { + if (is_top_bound) + return new Frame_rows_current_row_top; + + return new Frame_rows_current_row_bottom; + } + else + { + if (is_top_bound) + return new Frame_range_current_row_top( + thd, spec->partition_list, spec->order_list); + + return new Frame_range_current_row_bottom( + thd, spec->partition_list, spec->order_list); + } + } + return NULL; +} + +static +void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager, + Item_window_func *window_func) +{ + Window_spec *spec= window_func->window_spec; + Item_sum *item_sum= window_func->window_func(); + DBUG_PRINT("info", ("Get arg count: %d", item_sum->get_arg_count())); + Frame_cursor *fc; + switch (item_sum->sum_func()) + { + case Item_sum::CUME_DIST_FUNC: + fc= new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + fc= new Frame_range_current_row_bottom(thd, + spec->partition_list, + spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + case Item_sum::LEAD_FUNC: + case Item_sum::LAG_FUNC: + { + Frame_cursor *bottom_bound= new Frame_unbounded_following(thd, + spec->partition_list, + spec->order_list); + Frame_cursor *top_bound= new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + Frame_cursor *current_row_pos= new Frame_rows_current_row_bottom; + cursor_manager->add_cursor(bottom_bound); + cursor_manager->add_cursor(top_bound); + cursor_manager->add_cursor(current_row_pos); + DBUG_ASSERT(item_sum->fixed()); + bool negative_offset= item_sum->sum_func() == Item_sum::LAG_FUNC; + fc= new Frame_positional_cursor(*current_row_pos, + *top_bound, *bottom_bound, + *item_sum->get_arg(1), + negative_offset); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } + case Item_sum::FIRST_VALUE_FUNC: + { + Frame_cursor *bottom_bound= get_frame_cursor(thd, spec, false); + Frame_cursor *top_bound= get_frame_cursor(thd, spec, true); + cursor_manager->add_cursor(bottom_bound); + cursor_manager->add_cursor(top_bound); + DBUG_ASSERT(item_sum->fixed()); + Item *offset_item= new (thd->mem_root) Item_int(thd, 0); + offset_item->fix_fields(thd, &offset_item); + fc= new Frame_positional_cursor(*top_bound, + *top_bound, *bottom_bound, + *offset_item, false); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } + case Item_sum::LAST_VALUE_FUNC: + { + Frame_cursor *bottom_bound= get_frame_cursor(thd, spec, false); + Frame_cursor *top_bound= get_frame_cursor(thd, spec, true); + cursor_manager->add_cursor(bottom_bound); + cursor_manager->add_cursor(top_bound); + DBUG_ASSERT(item_sum->fixed()); + Item *offset_item= new (thd->mem_root) Item_int(thd, 0); + offset_item->fix_fields(thd, &offset_item); + fc= new Frame_positional_cursor(*bottom_bound, + *top_bound, *bottom_bound, + *offset_item, false); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } + case Item_sum::NTH_VALUE_FUNC: + { + Frame_cursor *bottom_bound= get_frame_cursor(thd, spec, false); + Frame_cursor *top_bound= get_frame_cursor(thd, spec, true); + cursor_manager->add_cursor(bottom_bound); + cursor_manager->add_cursor(top_bound); + DBUG_ASSERT(item_sum->fixed()); + Item *int_item= new (thd->mem_root) Item_int(thd, 1); + Item *offset_func= new (thd->mem_root) + Item_func_minus(thd, item_sum->get_arg(1), + int_item); + offset_func->fix_fields(thd, &offset_func); + fc= new Frame_positional_cursor(*top_bound, + *top_bound, *bottom_bound, + *offset_func, false); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + { + fc= new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + fc= new Frame_unbounded_following(thd, + spec->partition_list, + spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } + default: + fc= new Frame_unbounded_preceding( + thd, spec->partition_list, spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + + fc= new Frame_rows_current_row_bottom; + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + } +} + + +static bool is_computed_with_remove(Item_sum::Sumfunctype sum_func) +{ + switch (sum_func) + { + case Item_sum::CUME_DIST_FUNC: + case Item_sum::ROW_NUMBER_FUNC: + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + case Item_sum::NTILE_FUNC: + case Item_sum::FIRST_VALUE_FUNC: + case Item_sum::LAST_VALUE_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + return false; + default: + return true; + } +} +/* + Create required frame cursors for the list of window functions. + Register all functions to their appropriate cursors. + If the window functions share the same frame specification, + those window functions will be registered to the same cursor. +*/ +void get_window_functions_required_cursors( + THD *thd, + List& window_functions, + List *cursor_managers) +{ + List_iterator_fast it(window_functions); + Item_window_func* item_win_func; + Item_sum *sum_func; + while ((item_win_func= it++)) + { + Cursor_manager *cursor_manager = new Cursor_manager(); + sum_func = item_win_func->window_func(); + Frame_cursor *fc; + /* + Some window functions require the partition size for computing values. + Add a cursor that retrieves it as the first one in the list if necessary. + */ + if (item_win_func->requires_partition_size()) + { + if (item_win_func->only_single_element_order_list()) + { + fc= new Frame_unbounded_following_set_count_no_nulls(thd, + item_win_func->window_spec->partition_list, + item_win_func->window_spec->order_list); + } + else + { + fc= new Frame_unbounded_following_set_count(thd, + item_win_func->window_spec->partition_list, + item_win_func->window_spec->order_list); + } + fc->add_sum_func(sum_func); + cursor_manager->add_cursor(fc); + } + + /* + If it is not a regular window function that follows frame specifications, + and/or specific cursors are required. ROW_NUM, RANK, NTILE and others + follow such rules. Check is_frame_prohibited check for the full list. + + TODO(cvicentiu) This approach is messy. Every time a function allows + computation in a certain way, we have to add an extra method to this + factory function. It is better to have window functions output + their own cursors, as needed. This way, the logic is bound + only to the implementation of said window function. Regular aggregate + functions can keep the default frame generating code, overwrite it or + add to it. + */ + if (item_win_func->is_frame_prohibited() || + item_win_func->requires_special_cursors()) + { + add_special_frame_cursors(thd, cursor_manager, item_win_func); + cursor_managers->push_back(cursor_manager); + continue; + } + + Frame_cursor *frame_bottom= get_frame_cursor(thd, + item_win_func->window_spec, false); + Frame_cursor *frame_top= get_frame_cursor(thd, + item_win_func->window_spec, true); + + frame_bottom->add_sum_func(sum_func); + frame_top->add_sum_func(sum_func); + + /* + The order of these cursors is important. A sum function + must first add values (via frame_bottom) then remove them via + frame_top. Removing items first doesn't make sense in the case of all + window functions. + */ + cursor_manager->add_cursor(frame_bottom); + cursor_manager->add_cursor(frame_top); + if (is_computed_with_remove(sum_func->sum_func()) && + !sum_func->supports_removal()) + { + frame_bottom->set_no_action(); + frame_top->set_no_action(); + Frame_cursor *scan_cursor= new Frame_scan_cursor(*frame_top, + *frame_bottom); + scan_cursor->add_sum_func(sum_func); + cursor_manager->add_cursor(scan_cursor); + + } + cursor_managers->push_back(cursor_manager); + } +} + +/** + Helper function that takes a list of window functions and writes + their values in the current table record. +*/ +static +bool save_window_function_values(List& window_functions, + TABLE *tbl, uchar *rowid_buf) +{ + List_iterator_fast iter(window_functions); + JOIN_TAB *join_tab= tbl->reginfo.join_tab; + tbl->file->ha_rnd_pos(tbl->record[0], rowid_buf); + store_record(tbl, record[1]); + while (Item_window_func *item_win= iter++) + item_win->save_in_field(item_win->result_field, true); + + /* + In case we have window functions present, an extra step is required + to compute all the fields from the temporary table. + In case we have a compound expression such as: expr + expr, + where one of the terms has a window function inside it, only + after computing window function values we actually know the true + final result of the compounded expression. + + Go through all the func items and save their values once again in the + corresponding temp table fields. Do this for each row in the table. + + This needs to be done earlier because ORDER BY clause can also have + a window function, so we need to make sure all the fields of the temp.table + are updated before we do the filesort. So is best to update the other fields + that contain the window functions along with the computation of window + functions. + */ + + Item **func_ptr= join_tab->tmp_table_param->items_to_copy; + Item *func; + for (; (func = *func_ptr) ; func_ptr++) + { + if (func->with_window_func() && func->type() != Item::WINDOW_FUNC_ITEM) + func->save_in_result_field(true); + } + + int err= tbl->file->ha_update_row(tbl->record[1], tbl->record[0]); + if (err && err != HA_ERR_RECORD_IS_THE_SAME) + return true; + + return false; +} + +/* + TODO(cvicentiu) update this comment to reflect the new execution. + + Streamed window function computation with window frames. + + We make a single pass over the ordered temp.table, but we're using three + cursors: + - current row - the row that we're computing window func value for) + - start_bound - the start of the frame + - bottom_bound - the end of the frame + + All three cursors move together. + + @todo + Provided bounds have their 'cursors'... is it better to re-clone their + cursors or re-position them onto the current row? + + @detail + ROWS BETWEEN 3 PRECEDING -- frame start + AND 3 FOLLOWING -- frame end + + /------ frame end (aka BOTTOM) + Dataset start | + --------====*=======[*]========*========-------->> dataset end + | \ + | +-------- current row + | + \-------- frame start ("TOP") + + - frame_end moves forward and adds rows into the aggregate function. + - frame_start follows behind and removes rows from the aggregate function. + - current_row is the row where the value of aggregate function is stored. + + @TODO: Only the first cursor needs to check for run-out-of-partition + condition (Others can catch up by counting rows?) + +*/ +bool compute_window_func(THD *thd, + List& window_functions, + List& cursor_managers, + TABLE *tbl, + SORT_INFO *filesort_result) +{ + List_iterator_fast iter_win_funcs(window_functions); + List_iterator_fast iter_cursor_managers(cursor_managers); + uint err; + + READ_RECORD info; + + if (init_read_record(&info, current_thd, tbl, NULL/*select*/, filesort_result, + 0, 1, FALSE)) + return true; + + Cursor_manager *cursor_manager; + while ((cursor_manager= iter_cursor_managers++)) + cursor_manager->initialize_cursors(&info); + + /* One partition tracker for each window function. */ + List partition_trackers; + Item_window_func *win_func; + while ((win_func= iter_win_funcs++)) + { + Group_bound_tracker *tracker= new Group_bound_tracker(thd, + win_func->window_spec->partition_list); + // TODO(cvicentiu) This should be removed and placed in constructor. + tracker->init(); + partition_trackers.push_back(tracker); + } + + List_iterator_fast iter_part_trackers(partition_trackers); + ha_rows rownum= 0; + uchar *rowid_buf= (uchar*) my_malloc(PSI_INSTRUMENT_ME, tbl->file->ref_length, MYF(0)); + + while (true) + { + if ((err= info.read_record())) + break; // End of file. + + /* Remember current row so that we can restore it before computing + each window function. */ + tbl->file->position(tbl->record[0]); + memcpy(rowid_buf, tbl->file->ref, tbl->file->ref_length); + + iter_win_funcs.rewind(); + iter_part_trackers.rewind(); + iter_cursor_managers.rewind(); + + Group_bound_tracker *tracker; + while ((win_func= iter_win_funcs++) && + (tracker= iter_part_trackers++) && + (cursor_manager= iter_cursor_managers++)) + { + if (tracker->check_if_next_group() || (rownum == 0)) + { + /* TODO(cvicentiu) + Clearing window functions should happen through cursors. */ + win_func->window_func()->clear(); + cursor_manager->notify_cursors_partition_changed(rownum); + } + else + { + cursor_manager->notify_cursors_next_row(); + } + + /* Check if we found any error in the window function while adding values + through cursors. */ + if (unlikely(thd->is_error() || thd->is_killed())) + break; + + + /* Return to current row after notifying cursors for each window + function. */ + tbl->file->ha_rnd_pos(tbl->record[0], rowid_buf); + } + + /* We now have computed values for each window function. They can now + be saved in the current row. */ + save_window_function_values(window_functions, tbl, rowid_buf); + + rownum++; + } + + my_free(rowid_buf); + partition_trackers.delete_elements(); + end_read_record(&info); + + return false; +} + +/* Make a list that is a concation of two lists of ORDER elements */ + +static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2) +{ + if (!list1) + { + list1= list2; + list2= NULL; + } + + ORDER *res= NULL; // first element in the new list + ORDER *prev= NULL; // last element in the new list + ORDER *cur_list= list1; // this goes through list1, list2 + while (cur_list) + { + for (ORDER *cur= cur_list; cur; cur= cur->next) + { + ORDER *copy= (ORDER*)alloc_root(mem_root, sizeof(ORDER)); + memcpy(copy, cur, sizeof(ORDER)); + if (prev) + prev->next= copy; + prev= copy; + if (!res) + res= copy; + } + + cur_list= (cur_list == list1)? list2: NULL; + } + + if (prev) + prev->next= NULL; + + return res; +} + +bool Window_func_runner::add_function_to_run(Item_window_func *win_func) +{ + + Item_sum *sum_func= win_func->window_func(); + sum_func->setup_window_func(current_thd, win_func->window_spec); + + Item_sum::Sumfunctype type= win_func->window_func()->sum_func(); + + switch (type) + { + /* Distinct is not yet supported. */ + case Item_sum::GROUP_CONCAT_FUNC: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "GROUP_CONCAT() aggregate as window function"); + return true; + case Item_sum::SUM_DISTINCT_FUNC: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "SUM(DISTINCT) aggregate as window function"); + return true; + case Item_sum::AVG_DISTINCT_FUNC: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "AVG(DISTINCT) aggregate as window function"); + return true; + case Item_sum::COUNT_DISTINCT_FUNC: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "COUNT(DISTINCT) aggregate as window function"); + return true; + case Item_sum::JSON_ARRAYAGG_FUNC: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "JSON_ARRAYAGG() aggregate as window function"); + return true; + case Item_sum::JSON_OBJECTAGG_FUNC: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "JSON_OBJECTAGG() aggregate as window function"); + return true; + default: + break; + } + + return window_functions.push_back(win_func); +} + + +/* + Compute the value of window function for all rows. +*/ +bool Window_func_runner::exec(THD *thd, TABLE *tbl, SORT_INFO *filesort_result) +{ + List_iterator_fast it(window_functions); + Item_window_func *win_func; + while ((win_func= it++)) + { + win_func->set_phase_to_computation(); + // TODO(cvicentiu) Setting the aggregator should probably be done during + // setup of Window_funcs_sort. + win_func->window_func()->set_aggregator(thd, + Aggregator::SIMPLE_AGGREGATOR); + } + it.rewind(); + + List cursor_managers; + get_window_functions_required_cursors(thd, window_functions, + &cursor_managers); + + /* Go through the sorted array and compute the window function */ + bool is_error= compute_window_func(thd, + window_functions, + cursor_managers, + tbl, filesort_result); + while ((win_func= it++)) + { + win_func->set_phase_to_retrieval(); + } + + cursor_managers.delete_elements(); + + return is_error; +} + + +bool Window_funcs_sort::exec(JOIN *join, bool keep_filesort_result) +{ + THD *thd= join->thd; + JOIN_TAB *join_tab= join->join_tab + join->total_join_tab_cnt(); + + /* Sort the table based on the most specific sorting criteria of + the window functions. */ + if (create_sort_index(thd, join, join_tab, filesort)) + return true; + + TABLE *tbl= join_tab->table; + SORT_INFO *filesort_result= join_tab->filesort_result; + + bool is_error= runner.exec(thd, tbl, filesort_result); + + if (!keep_filesort_result) + { + delete join_tab->filesort_result; + join_tab->filesort_result= NULL; + } + return is_error; +} + + +bool Window_funcs_sort::setup(THD *thd, SQL_SELECT *sel, + List_iterator &it, + JOIN_TAB *join_tab) +{ + Window_spec *spec; + Item_window_func *win_func= it.peek(); + Item_window_func *win_func_with_longest_order= NULL; + int longest_order_elements= -1; + + /* The iterator should point to a valid function at the start of execution. */ + DBUG_ASSERT(win_func); + do + { + spec= win_func->window_spec; + int win_func_order_elements= spec->partition_list->elements + + spec->order_list->elements; + if (win_func_order_elements >= longest_order_elements) + { + win_func_with_longest_order= win_func; + longest_order_elements= win_func_order_elements; + } + if (runner.add_function_to_run(win_func)) + return true; + it++; + win_func= it.peek(); + } while (win_func && !(win_func->marker & MARKER_SORTORDER_CHANGE)); + + /* + The sort criteria must be taken from the last win_func in the group of + adjacent win_funcs that do not have MARKER_SORTORDER_CHANGE. This is + because the sort order must be the most specific sorting criteria defined + within the window function group. This ensures that we sort the table + in a way that the result is valid for all window functions belonging to + this Window_funcs_sort. + */ + spec= win_func_with_longest_order->window_spec; + + ORDER* sort_order= concat_order_lists(thd->mem_root, + spec->partition_list->first, + spec->order_list->first); + if (sort_order == NULL) // No partition or order by clause. + { + /* TODO(cvicentiu) This is used as a way to allow an empty OVER () + clause for window functions. However, a better approach is + to not call Filesort at all in this case and just read whatever order + the temporary table has. + Due to cursors not working for out_of_memory cases (yet!), we have to run + filesort to generate a sort buffer of the results. + In this case we sort by the first field of the temporary table. + We should have this field available, even if it is a window_function + field. We don't care of the particular sorting result in this case. + */ + ORDER *order= (ORDER *)alloc_root(thd->mem_root, sizeof(ORDER)); + memset(order, 0, sizeof(*order)); + Item_field *item= + new (thd->mem_root) Item_field(thd, join_tab->table->field[0]); + if (item) + item->set_refers_to_temp_table(); + order->item= (Item **)alloc_root(thd->mem_root, 2 * sizeof(Item *)); + order->item[1]= NULL; + order->item[0]= item; + order->field= join_tab->table->field[0]; + sort_order= order; + } + filesort= new (thd->mem_root) Filesort(sort_order, HA_POS_ERROR, true, NULL); + + /* Apply the same condition that the subsequent sort has. */ + filesort->select= sel; + + return false; +} + + +bool Window_funcs_computation::setup(THD *thd, + List *window_funcs, + JOIN_TAB *tab) +{ + order_window_funcs_by_window_specs(window_funcs); + + SQL_SELECT *sel= NULL; + /* + If the tmp table is filtered during sorting + (ex: SELECT with HAVING && ORDER BY), we must make sure to keep the + filtering conditions when we perform sorting for window function + computation. + */ + if (tab->filesort && tab->filesort->select) + { + sel= tab->filesort->select; + DBUG_ASSERT(!sel->quick); + } + + Window_funcs_sort *srt; + List_iterator iter(*window_funcs); + while (iter.peek()) + { + if (!(srt= new Window_funcs_sort()) || + srt->setup(thd, sel, iter, tab)) + { + return true; + } + win_func_sorts.push_back(srt, thd->mem_root); + } + return false; +} + + +bool Window_funcs_computation::exec(JOIN *join, bool keep_last_filesort_result) +{ + List_iterator it(win_func_sorts); + Window_funcs_sort *srt; + uint counter= 0; /* Count how many sorts we've executed. */ + /* Execute each sort */ + while ((srt = it++)) + { + counter++; + bool keep_filesort_result= keep_last_filesort_result && + counter == win_func_sorts.elements; + if (srt->exec(join, keep_filesort_result)) + return true; + } + return false; +} + + +void Window_funcs_computation::cleanup() +{ + List_iterator it(win_func_sorts); + Window_funcs_sort *srt; + while ((srt = it++)) + { + srt->cleanup(); + delete srt; + } +} + + +Explain_aggr_window_funcs* +Window_funcs_computation::save_explain_plan(MEM_ROOT *mem_root, + bool is_analyze) +{ + Explain_aggr_window_funcs *xpl= new Explain_aggr_window_funcs; + List_iterator it(win_func_sorts); + Window_funcs_sort *srt; + if (!xpl) + return 0; + while ((srt = it++)) + { + Explain_aggr_filesort *eaf= + new Explain_aggr_filesort(mem_root, is_analyze, srt->filesort); + if (!eaf) + return 0; + xpl->sorts.push_back(eaf, mem_root); + } + return xpl; +} + + +bool st_select_lex::add_window_func(Item_window_func *win_func) +{ + if (parsing_place != SELECT_LIST) + fields_in_window_functions+= win_func->window_func()->argument_count(); + return window_funcs.push_back(win_func); +} + +///////////////////////////////////////////////////////////////////////////// +// Unneeded comments (will be removed when we develop a replacement for +// the feature that was attempted here +///////////////////////////////////////////////////////////////////////////// + /* + TODO Get this code to set can_compute_window_function during preparation, + not during execution. + + The reason for this is the following: + Our single scan optimization for window functions without tmp table, + is valid, if and only if, we only need to perform one sorting operation, + via filesort. The cases where we need to perform one sorting operation only: + + * A select with only one window function. + * A select with multiple window functions, but they must have their + partition and order by clauses compatible. This means that one ordering + is acceptable for both window functions. + + For example: + partition by a, b, c; order by d, e results in sorting by a b c d e. + partition by a; order by d results in sorting by a d. + + This kind of sorting is compatible. The less specific partition does + not care for the order of b and c columns so it is valid if we sort + by those in case of equality over a. + + partition by a, b; order by d, e results in sorting by a b d e + partition by a; order by e results in sorting by a e + + This sorting is incompatible due to the order by clause. The partition by + clause is compatible, (partition by a) is a prefix for (partition by a, b) + However, order by e is not a prefix for order by d, e, thus it is not + compatible. + + The rule for having compatible sorting is thus: + Each partition order must contain the other window functions partitions + prefixes, or be a prefix itself. This must hold true for all partitions. + Analog for the order by clause. + */ +#if 0 + List window_functions; + SQL_I_List largest_partition; + SQL_I_List largest_order_by; + bool can_compute_window_live = !need_tmp; + // Construct the window_functions item list and check if they can be + // computed using only one sorting. + // + // TODO: Perhaps group functions into compatible sorting bins + // to minimize the number of sorting passes required to compute all of them. + while ((item= it++)) + { + if (item->type() == Item::WINDOW_FUNC_ITEM) + { + Item_window_func *item_win = (Item_window_func *) item; + window_functions.push_back(item_win); + if (!can_compute_window_live) + continue; // No point checking since we have to perform multiple sorts. + Window_spec *spec = item_win->window_spec; + // Having an empty partition list on one window function and a + // not empty list on a separate window function causes the sorting + // to be incompatible. + // + // Example: + // over (partition by a, order by x) && over (order by x). + // + // The first function requires an ordering by a first and then by x, + // while the seond function requires an ordering by x first. + // The same restriction is not required for the order by clause. + if (largest_partition.elements && !spec->partition_list.elements) + { + can_compute_window_live= FALSE; + continue; + } + can_compute_window_live= test_if_order_compatible(largest_partition, + spec->partition_list); + if (!can_compute_window_live) + continue; + + can_compute_window_live= test_if_order_compatible(largest_order_by, + spec->order_list); + if (!can_compute_window_live) + continue; + + if (largest_partition.elements < spec->partition_list.elements) + largest_partition = spec->partition_list; + if (largest_order_by.elements < spec->order_list.elements) + largest_order_by = spec->order_list; + } + } + if (can_compute_window_live && window_functions.elements && table_count == 1) + { + ha_rows examined_rows = 0; + ha_rows found_rows = 0; + ha_rows filesort_retval; + SORT_FIELD *s_order= (SORT_FIELD *) my_malloc(sizeof(SORT_FIELD) * + (largest_partition.elements + largest_order_by.elements) + 1, + MYF(MY_WME | MY_ZEROFILL | MY_THREAD_SPECIFIC)); + + size_t pos= 0; + for (ORDER* curr = largest_partition.first; curr; curr=curr->next, pos++) + s_order[pos].item = *curr->item; + + for (ORDER* curr = largest_order_by.first; curr; curr=curr->next, pos++) + s_order[pos].item = *curr->item; + + table[0]->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE), + MYF(MY_WME | MY_ZEROFILL| + MY_THREAD_SPECIFIC)); + + + filesort_retval= filesort(thd, table[0], s_order, + (largest_partition.elements + largest_order_by.elements), + this->select, HA_POS_ERROR, FALSE, + &examined_rows, &found_rows, + this->explain->ops_tracker.report_sorting(thd)); + table[0]->sort.found_records= filesort_retval; + + join_tab->read_first_record = join_init_read_record; + join_tab->records= found_rows; + + my_free(s_order); + } + else +#endif diff --git a/sql/sql_window.h b/sql/sql_window.h new file mode 100644 index 00000000..1c02740e --- /dev/null +++ b/sql/sql_window.h @@ -0,0 +1,260 @@ +/* + Copyright (c) 2016, 2022 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SQL_WINDOW_INCLUDED +#define SQL_WINDOW_INCLUDED + +#include "filesort.h" + +class Item_window_func; + +/* + Window functions module. + + Each instance of window function has its own element in SELECT_LEX::window_specs. +*/ + + +class Window_frame_bound : public Sql_alloc +{ + +public: + + enum Bound_precedence_type + { + PRECEDING, + CURRENT, // Used for CURRENT ROW window frame bounds + FOLLOWING + }; + + Bound_precedence_type precedence_type; + + + /* + For UNBOUNDED PRECEDING / UNBOUNDED FOLLOWING window frame bounds + precedence type is seto to PRECEDING / FOLLOWING and + offset is set to NULL. + The offset is not meaningful with precedence type CURRENT + */ + Item *offset; + + Window_frame_bound(Bound_precedence_type prec_type, + Item *offset_val) + : precedence_type(prec_type), offset(offset_val) {} + + bool is_unbounded() { return offset == NULL; } + + void print(String *str, enum_query_type query_type); + +}; + + +class Window_frame : public Sql_alloc +{ + +public: + + enum Frame_units + { + UNITS_ROWS, + UNITS_RANGE + }; + + enum Frame_exclusion + { + EXCL_NONE, + EXCL_CURRENT_ROW, + EXCL_GROUP, + EXCL_TIES + }; + + Frame_units units; + + Window_frame_bound *top_bound; + + Window_frame_bound *bottom_bound; + + Frame_exclusion exclusion; + + Window_frame(Frame_units win_frame_units, + Window_frame_bound *win_frame_top_bound, + Window_frame_bound *win_frame_bottom_bound, + Frame_exclusion win_frame_exclusion) + : units(win_frame_units), top_bound(win_frame_top_bound), + bottom_bound(win_frame_bottom_bound), exclusion(win_frame_exclusion) {} + + bool check_frame_bounds(); + + void print(String *str, enum_query_type query_type); + +}; + +class Window_spec : public Sql_alloc +{ + bool window_names_are_checked; + public: + virtual ~Window_spec() = default; + + LEX_CSTRING *window_ref; + + SQL_I_List *partition_list; + SQL_I_List *save_partition_list; + + SQL_I_List *order_list; + SQL_I_List *save_order_list; + + Window_frame *window_frame; + + Window_spec *referenced_win_spec; + + /* + Window_spec objects are numbered by the number of their appearance in the + query. This is used by compare_order_elements() to provide a predictable + ordering of PARTITION/ORDER BY clauses. + */ + int win_spec_number; + + Window_spec(LEX_CSTRING *win_ref, SQL_I_List *part_list, + SQL_I_List *ord_list, Window_frame *win_frame) + : window_names_are_checked(false), window_ref(win_ref), + partition_list(part_list), save_partition_list(NULL), + order_list(ord_list), save_order_list(NULL), + window_frame(win_frame), referenced_win_spec(NULL) {} + + virtual const char *name() { return NULL; } + + bool check_window_names(List_iterator_fast &it); + + const char *window_reference() + { + return window_ref ? window_ref->str : NULL; + } + + void join_partition_and_order_lists() + { + *(partition_list->next)= order_list->first; + } + + void disjoin_partition_and_order_lists() + { + *(partition_list->next)= NULL; + } + + void print(String *str, enum_query_type query_type); + void print_order(String *str, enum_query_type query_type); + void print_partition(String *str, enum_query_type query_type); + +}; + +class Window_def : public Window_spec +{ + public: + + LEX_CSTRING *window_name; + + Window_def(LEX_CSTRING *win_name, + LEX_CSTRING *win_ref, + SQL_I_List *part_list, + SQL_I_List *ord_list, + Window_frame *win_frame) + : Window_spec(win_ref, part_list, ord_list, win_frame), + window_name(win_name) {} + + const char *name() { return window_name->str; } + +}; + +int setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List &fields, List &all_fields, + List &win_specs, List &win_funcs); + + +////////////////////////////////////////////////////////////////////////////// +// Classes that make window functions computation a part of SELECT's query plan +////////////////////////////////////////////////////////////////////////////// + +class Frame_cursor; +/* + This handles computation of one window function. + + Currently, we make a spearate filesort() call for each window function. +*/ + +class Window_func_runner : public Sql_alloc +{ +public: + /* Add the function to be computed during the execution pass */ + bool add_function_to_run(Item_window_func *win_func); + + /* Compute and fill the fields in the table. */ + bool exec(THD *thd, TABLE *tbl, SORT_INFO *filesort_result); + +private: + /* A list of window functions for which this Window_func_runner will compute + values during the execution phase. */ + List window_functions; +}; + + +/* + Represents a group of window functions that require the same sorting of + rows and so share the filesort() call. + +*/ + +class Window_funcs_sort : public Sql_alloc +{ +public: + bool setup(THD *thd, SQL_SELECT *sel, List_iterator &it, + st_join_table *join_tab); + bool exec(JOIN *join, bool keep_filesort_result); + void cleanup() { delete filesort; } + + friend class Window_funcs_computation; + +private: + Window_func_runner runner; + + /* Window functions can be computed over this sorting */ + Filesort *filesort; +}; + + +struct st_join_table; +class Explain_aggr_window_funcs; + +/* + This is a "window function computation phase": a single object of this class + takes care of computing all window functions in a SELECT. + + - JOIN optimizer is exected to call setup() during query optimization. + - JOIN::exec() should call exec() once it has collected join output in a + temporary table. +*/ + +class Window_funcs_computation : public Sql_alloc +{ + List win_func_sorts; +public: + bool setup(THD *thd, List *window_funcs, st_join_table *tab); + bool exec(JOIN *join, bool keep_last_filesort_result); + + Explain_aggr_window_funcs *save_explain_plan(MEM_ROOT *mem_root, bool is_analyze); + void cleanup(); +}; + + +#endif /* SQL_WINDOW_INCLUDED */ diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy new file mode 100644 index 00000000..ffa70dea --- /dev/null +++ b/sql/sql_yacc.yy @@ -0,0 +1,19510 @@ +/* + Copyright (c) 2000, 2015, Oracle and/or its affiliates. + Copyright (c) 2010, 2023, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* sql_yacc.yy */ + +/** + @defgroup Parser Parser + @{ +*/ + +%{ +#define YYLIP (& thd->m_parser_state->m_lip) +#define YYPS (& thd->m_parser_state->m_yacc) +#define YYCSCL (thd->variables.character_set_client) + +#define MYSQL_YACC +#define YYINITDEPTH 100 +#define YYMAXDEPTH 3200 /* Because of 64K stack */ +#define Lex (thd->lex) + +#define Select Lex->current_select +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_parse.h" /* comp_*_creator */ +#include "sql_table.h" /* primary_key_name */ +#include "sql_partition.h" /* partition_info, HASH_PARTITION */ +#include "sql_class.h" /* Key_part_spec, enum_filetype, Diag_condition_item_name */ +#include "slave.h" +#include "lex_symbol.h" +#include "item_create.h" +#include "sp_head.h" +#include "sp_rcontext.h" +#include "sp.h" +#include "sql_show.h" +#include "sql_alter.h" // Sql_cmd_alter_table* +#include "sql_truncate.h" // Sql_cmd_truncate_table +#include "sql_admin.h" // Sql_cmd_analyze/Check..._table +#include "sql_partition_admin.h" // Sql_cmd_alter_table_*_part. +#include "sql_handler.h" // Sql_cmd_handler_* +#include "sql_signal.h" +#include "sql_get_diagnostics.h" // Sql_cmd_get_diagnostics +#include "sql_cte.h" +#include "sql_window.h" +#include "item_windowfunc.h" +#include "event_parse_data.h" +#include "create_options.h" +#include +#include +#include "keycaches.h" +#include "set_var.h" +#include "rpl_mi.h" +#include "lex_token.h" +#include "sql_lex.h" +#include "sql_sequence.h" +#include "my_base.h" +#include "sql_type_json.h" +#include "json_table.h" + +/* this is to get the bison compilation windows warnings out */ +#ifdef _MSC_VER +/* warning C4065: switch statement contains 'default' but no 'case' labels */ +/* warning C4102: 'yyexhaustedlab': unreferenced label */ +#pragma warning (disable : 4065 4102) +#endif +#if defined (__GNUC__) || defined (__clang__) +#pragma GCC diagnostic ignored "-Wunused-label" /* yyexhaustedlab: */ +#endif + +int yylex(void *yylval, void *yythd); + +#define yyoverflow(A,B,C,D,E,F) \ + { \ + size_t val= *(F); \ + if (unlikely(my_yyoverflow((B), (D), &val))) \ + { \ + yyerror(thd, (char*) (A)); \ + return 2; \ + } \ + else \ + { \ + *(F)= (YYSIZE_T)val; \ + } \ + } + +#define MYSQL_YYABORT \ + do \ + { \ + YYABORT; \ + } while (0) + +#define MYSQL_YYABORT_UNLESS(A) \ + if (unlikely(!(A))) \ + { \ + thd->parse_error(); \ + MYSQL_YYABORT; \ + } + +#define my_yyabort_error(A) \ + do { my_error A; MYSQL_YYABORT; } while(0) + +#ifndef DBUG_OFF +#define YYDEBUG 1 +#else +#define YYDEBUG 0 +#endif + + +static Item* escape(THD *thd) +{ + thd->lex->escape_used= false; + const char *esc= thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES ? "" : "\\"; + return new (thd->mem_root) Item_string_ascii(thd, esc, MY_TEST(esc[0])); +} + + +/** + @brief Bison callback to report a syntax/OOM error + + This function is invoked by the bison-generated parser + when a syntax error, a parse error or an out-of-memory + condition occurs. This function is not invoked when the + parser is requested to abort by semantic action code + by means of YYABORT or YYACCEPT macros. This is why these + macros should not be used (use MYSQL_YYABORT/MYSQL_YYACCEPT + instead). + + The parser will abort immediately after invoking this callback. + + This function is not for use in semantic actions and is internal to + the parser, as it performs some pre-return cleanup. + In semantic actions, please use thd->parse_error() or my_error to + push an error into the error stack and MYSQL_YYABORT + to abort from the parser. +*/ + +static void yyerror(THD *thd, const char *s) +{ + /* "parse error" changed into "syntax error" between bison 1.75 and 1.875 */ + if (strcmp(s,"parse error") == 0 || strcmp(s,"syntax error") == 0) + s= ER_THD(thd, ER_SYNTAX_ERROR); + thd->parse_error(s, 0); +} + + +#ifndef DBUG_OFF +#define __CONCAT_UNDERSCORED(x,y) x ## _ ## y +#define _CONCAT_UNDERSCORED(x,y) __CONCAT_UNDERSCORED(x,y) +void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)() +{ + /* + MYSQLdebug is in sql/yy_*.cc, in bison generated code. + Turning this option on is **VERY** verbose, and should be + used when investigating a syntax error problem only. + + The syntax to run with bison traces is as follows : + - Starting a server manually : + mysqld --debug-dbug="d,parser_debug" ... + - Running a test : + mysql-test-run.pl --mysqld="--debug-dbug=d,parser_debug" ... + + The result will be in the process stderr (var/log/master.err) + */ + +#ifndef _AIX + extern int yydebug; +#else + static int yydebug; +#endif + yydebug= 1; +} +#endif + + +%} +%union { + int num; + ulong ulong_num; + ulonglong ulonglong_number; + longlong longlong_number; + uint sp_instr_addr; + + /* structs */ + LEX_CSTRING lex_str; + Lex_ident_cli_st kwd; + Lex_ident_cli_st ident_cli; + Lex_ident_sys_st ident_sys; + Lex_column_list_privilege_st column_list_privilege; + Lex_string_with_metadata_st lex_string_with_metadata; + Lex_spblock_st spblock; + Lex_spblock_handlers_st spblock_handlers; + Lex_length_and_dec_st Lex_length_and_dec; + Lex_cast_type_st Lex_cast_type; + Lex_field_type_st Lex_field_type; + Lex_exact_charset_extended_collation_attrs_st + Lex_exact_charset_extended_collation_attrs; + Lex_extended_collation_st Lex_extended_collation; + Lex_dyncol_type_st Lex_dyncol_type; + Lex_for_loop_st for_loop; + Lex_for_loop_bounds_st for_loop_bounds; + Lex_trim_st trim; + Json_table_column::On_response json_on_response; + Lex_substring_spec_st substring_spec; + vers_history_point_t vers_history_point; + struct + { + enum sub_select_type unit_type; + bool distinct; + } unit_operation; + struct + { + SELECT_LEX *first; + SELECT_LEX *prev_last; + } select_list; + SQL_I_List *select_order; + Lex_select_lock select_lock; + Lex_select_limit select_limit; + Lex_order_limit_lock *order_limit_lock; + + /* pointers */ + Lex_ident_sys *ident_sys_ptr; + Create_field *create_field; + Spvar_definition *spvar_definition; + Row_definition_list *spvar_definition_list; + const Type_handler *type_handler; + const class Sp_handler *sp_handler; + CHARSET_INFO *charset; + Condition_information_item *cond_info_item; + DYNCALL_CREATE_DEF *dyncol_def; + Diagnostics_information *diag_info; + Item *item; + Item_num *item_num; + Item_param *item_param; + Item_basic_constant *item_basic_constant; + Key_part_spec *key_part; + LEX *lex; + sp_expr_lex *expr_lex; + sp_assignment_lex *assignment_lex; + class sp_lex_cursor *sp_cursor_stmt; + LEX_CSTRING *lex_str_ptr; + LEX_USER *lex_user; + USER_AUTH *user_auth; + List *cond_info_list; + List *dyncol_def_list; + List *item_list; + List *sp_assignment_lex_list; + List *stmt_info_list; + List *string_list; + List *ident_sys_list; + Statement_information_item *stmt_info_item; + String *string; + TABLE_LIST *table_list; + Table_ident *table; + Qualified_column_ident *qualified_column_ident; + char *simple_string; + const char *const_simple_string; + chooser_compare_func_creator boolfunc2creator; + class Lex_grant_privilege *lex_grant; + class Lex_grant_object_name *lex_grant_ident; + class my_var *myvar; + class sp_condition_value *spcondvalue; + class sp_head *sphead; + class sp_name *spname; + class sp_variable *spvar; + class With_element_head *with_element_head; + class With_clause *with_clause; + class Virtual_column_info *virtual_column; + engine_option_value *engine_option_value_ptr; + + handlerton *db_type; + st_select_lex *select_lex; + st_select_lex_unit *select_lex_unit; + struct p_elem_val *p_elem_value; + class Window_frame *window_frame; + class Window_frame_bound *window_frame_bound; + udf_func *udf; + st_trg_execution_order trg_execution_order; + + /* enums */ + enum enum_sp_suid_behaviour sp_suid; + enum enum_sp_aggregate_type sp_aggregate_type; + enum enum_view_suid view_suid; + enum Condition_information_item::Name cond_info_item_name; + enum enum_diag_condition_item_name diag_condition_item_name; + enum Diagnostics_information::Which_area diag_area; + enum enum_fk_option m_fk_option; + enum Item_udftype udf_type; + enum Key::Keytype key_type; + enum Statement_information_item::Name stmt_info_item_name; + enum enum_filetype filetype; + enum enum_tx_isolation tx_isolation; + enum enum_var_type var_type; + enum enum_yes_no_unknown m_yes_no_unk; + enum ha_choice choice; + enum ha_key_alg key_alg; + enum ha_rkey_function ha_rkey_mode; + enum index_hint_type index_hint; + enum interval_type interval, interval_time_st; + enum row_type row_type; + enum sp_variable::enum_mode spvar_mode; + enum thr_lock_type lock_type; + enum enum_mysql_timestamp_type date_time_type; + enum Window_frame_bound::Bound_precedence_type bound_precedence_type; + enum Window_frame::Frame_units frame_units; + enum Window_frame::Frame_exclusion frame_exclusion; + enum trigger_order_type trigger_action_order_type; + DDL_options_st object_ddl_options; + enum vers_kind_t vers_range_unit; + enum Column_definition::enum_column_versioning vers_column_versioning; + enum plsql_cursor_attr_t plsql_cursor_attr; + privilege_t privilege; +} + +%{ +/* avoid unintentional %union size increases, it's what a parser stack made of */ +static_assert(sizeof(YYSTYPE) == sizeof(void*)*2+8, "%union size check"); +bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); +%} + +%define api.pure /* We have threads */ +%parse-param { THD *thd } +%lex-param { THD *thd } +/* + We should not introduce any further shift/reduce conflicts. +*/ + +%ifdef MARIADB +%expect 64 +%else +%expect 65 +%endif + +/* + Comments for TOKENS. + For each token, please include in the same line a comment that contains + the following tags: + SQL-2011-R : Reserved keyword as per SQL-2011 + SQL-2011-N : Non Reserved keyword as per SQL-2011 + SQL-2003-R : Reserved keyword as per SQL-2003 + SQL-2003-N : Non Reserved keyword as per SQL-2003 + SQL-1999-R : Reserved keyword as per SQL-1999 + SQL-1999-N : Non Reserved keyword as per SQL-1999 + MYSQL : MySQL extention (unspecified) + MYSQL-FUNC : MySQL extention, function + INTERNAL : Not a real token, lex optimization + OPERATOR : SQL operator + FUTURE-USE : Reserved for future use + + This makes the code grep-able, and helps maintenance. +*/ + + +%token '@' + +/* + Special purpose tokens +*/ +%token ABORT_SYM /* INTERNAL (used in lex) */ +%token IMPOSSIBLE_ACTION /* To avoid warning for yyerrlab1 */ +%token FORCE_LOOKAHEAD /* INTERNAL never returned by the lexer */ +%token END_OF_INPUT /* INTERNAL */ +%token COLON_ORACLE_SYM /* INTERNAL */ +%token PARAM_MARKER /* INTERNAL */ +%token FOR_SYSTEM_TIME_SYM /* INTERNAL */ +%token LEFT_PAREN_ALT /* INTERNAL */ +%token LEFT_PAREN_WITH /* INTERNAL */ +%token LEFT_PAREN_LIKE /* INTERNAL */ +%token ORACLE_CONCAT_SYM /* INTERNAL */ +%token PERCENT_ORACLE_SYM /* INTERNAL */ +%token WITH_CUBE_SYM /* INTERNAL */ +%token WITH_ROLLUP_SYM /* INTERNAL */ +%token WITH_SYSTEM_SYM /* INTERNAL */ + +/* + Identifiers +*/ +%token IDENT +%token IDENT_QUOTED +%token LEX_HOSTNAME +%token UNDERSCORE_CHARSET /* _latin1 */ + + +/* + Literals +*/ +%token BIN_NUM /* LITERAL */ +%token DECIMAL_NUM /* LITERAL */ +%token FLOAT_NUM /* LITERAL */ +%token HEX_NUM /* LITERAL */ +%token HEX_STRING /* LITERAL */ +%token LONG_NUM /* LITERAL */ +%token NCHAR_STRING /* LITERAL */ +%token NUM /* LITERAL */ +%token TEXT_STRING /* LITERAL */ +%token ULONGLONG_NUM /* LITERAL */ + + +/* + Operators +*/ +%token AND_AND_SYM /* OPERATOR */ +%token DOT_DOT_SYM /* OPERATOR */ +%token EQUAL_SYM /* OPERATOR */ +%token GE /* OPERATOR */ +%token LE /* OPERATOR */ +%token MYSQL_CONCAT_SYM /* OPERATOR */ +%token NE /* OPERATOR */ +%token NOT2_SYM /* OPERATOR */ +%token OR2_SYM /* OPERATOR */ +%token SET_VAR /* OPERATOR */ +%token SHIFT_LEFT /* OPERATOR */ +%token SHIFT_RIGHT /* OPERATOR */ + + +/* + Reserved keywords +*/ +%token ACCESSIBLE_SYM +%token ADD /* SQL-2003-R */ +%token ALL /* SQL-2003-R */ +%token ALTER /* SQL-2003-R */ +%token ANALYZE_SYM +%token AND_SYM /* SQL-2003-R */ +%token ASC /* SQL-2003-N */ +%token ASENSITIVE_SYM /* FUTURE-USE */ +%token AS /* SQL-2003-R */ +%token BEFORE_SYM /* SQL-2003-N */ +%token BETWEEN_SYM /* SQL-2003-R */ +%token BIGINT /* SQL-2003-R */ +%token BINARY /* SQL-2003-R */ +%token BIT_AND /* MYSQL-FUNC */ +%token BIT_OR /* MYSQL-FUNC */ +%token BIT_XOR /* MYSQL-FUNC */ +%token BLOB_MARIADB_SYM /* SQL-2003-R */ +%token BLOB_ORACLE_SYM /* Oracle-R */ +%token BODY_ORACLE_SYM /* Oracle-R */ +%token BOTH /* SQL-2003-R */ +%token BY /* SQL-2003-R */ +%token CALL_SYM /* SQL-2003-R */ +%token CASCADE /* SQL-2003-N */ +%token CASE_SYM /* SQL-2003-R */ +%token CAST_SYM /* SQL-2003-R */ +%token CHANGE +%token CHAR_SYM /* SQL-2003-R */ +%token CHECK_SYM /* SQL-2003-R */ +%token COLLATE_SYM /* SQL-2003-R */ +%token CONDITION_SYM /* SQL-2003-R, SQL-2008-R */ +%token CONSTRAINT /* SQL-2003-R */ +%token CONTINUE_MARIADB_SYM /* SQL-2003-R, Oracle-R */ +%token CONTINUE_ORACLE_SYM /* SQL-2003-R, Oracle-R */ +%token CONVERT_SYM /* SQL-2003-N */ +%token COUNT_SYM /* SQL-2003-N */ +%token CREATE /* SQL-2003-R */ +%token CROSS /* SQL-2003-R */ +%token CUME_DIST_SYM +%token CURDATE /* MYSQL-FUNC */ +%token CURRENT_ROLE /* SQL-2003-R */ +%token CURRENT_USER /* SQL-2003-R */ +%token CURSOR_SYM /* SQL-2003-R */ +%token CURTIME /* MYSQL-FUNC */ +%token DATABASE +%token DATABASES +%token DATE_ADD_INTERVAL /* MYSQL-FUNC */ +%token DATE_SUB_INTERVAL /* MYSQL-FUNC */ +%token DAY_HOUR_SYM +%token DAY_MICROSECOND_SYM +%token DAY_MINUTE_SYM +%token DAY_SECOND_SYM +%token DECIMAL_SYM /* SQL-2003-R */ +%token DECLARE_MARIADB_SYM /* SQL-2003-R */ +%token DECLARE_ORACLE_SYM /* Oracle-R */ +%token DEFAULT /* SQL-2003-R */ +%token DELETE_DOMAIN_ID_SYM +%token DELETE_SYM /* SQL-2003-R */ +%token DENSE_RANK_SYM +%token DESCRIBE /* SQL-2003-R */ +%token DESC /* SQL-2003-N */ +%token DETERMINISTIC_SYM /* SQL-2003-R */ +%token DISTINCT /* SQL-2003-R */ +%token DIV_SYM +%token DO_DOMAIN_IDS_SYM +%token DOUBLE_SYM /* SQL-2003-R */ +%token DROP /* SQL-2003-R */ +%token DUAL_SYM +%token EACH_SYM /* SQL-2003-R */ +%token ELSEIF_MARIADB_SYM +%token ELSE /* SQL-2003-R */ +%token ELSIF_ORACLE_SYM /* PLSQL-R */ +%token EMPTY_SYM /* SQL-2016-R */ +%token ENCLOSED +%token ESCAPED +%token EXCEPT_SYM /* SQL-2003-R */ +%token EXISTS /* SQL-2003-R */ +%token EXTRACT_SYM /* SQL-2003-N */ +%token FALSE_SYM /* SQL-2003-R */ +%token FETCH_SYM /* SQL-2003-R */ +%token FIRST_VALUE_SYM /* SQL-2011 */ +%token FLOAT_SYM /* SQL-2003-R */ +%token FOREIGN /* SQL-2003-R */ +%token FOR_SYM /* SQL-2003-R */ +%token FROM +%token FULLTEXT_SYM +%token GOTO_ORACLE_SYM /* Oracle-R */ +%token GRANT /* SQL-2003-R */ +%token GROUP_CONCAT_SYM +%token JSON_ARRAYAGG_SYM +%token JSON_OBJECTAGG_SYM +%token JSON_TABLE_SYM +%token GROUP_SYM /* SQL-2003-R */ +%token HAVING /* SQL-2003-R */ +%token HOUR_MICROSECOND_SYM +%token HOUR_MINUTE_SYM +%token HOUR_SECOND_SYM +%token IF_SYM +%token IGNORE_DOMAIN_IDS_SYM +%token IGNORE_SYM +%token IGNORED_SYM +%token INDEX_SYM +%token INFILE +%token INNER_SYM /* SQL-2003-R */ +%token INOUT_SYM /* SQL-2003-R */ +%token INSENSITIVE_SYM /* SQL-2003-R */ +%token INSERT /* SQL-2003-R */ +%token IN_SYM /* SQL-2003-R */ +%token INTERSECT_SYM /* SQL-2003-R */ +%token INTERVAL_SYM /* SQL-2003-R */ +%token INTO /* SQL-2003-R */ +%token INT_SYM /* SQL-2003-R */ +%token IS /* SQL-2003-R */ +%token ITERATE_SYM +%token JOIN_SYM /* SQL-2003-R */ +%token KEYS +%token KEY_SYM /* SQL-2003-N */ +%token KILL_SYM +%token LAG_SYM /* SQL-2011 */ +%token LEADING /* SQL-2003-R */ +%token LEAD_SYM /* SQL-2011 */ +%token LEAVE_SYM +%token LEFT /* SQL-2003-R */ +%token LIKE /* SQL-2003-R */ +%token LIMIT +%token LINEAR_SYM +%token LINES +%token LOAD +%token LOCATOR_SYM /* SQL-2003-N */ +%token LOCK_SYM +%token LONGBLOB +%token LONG_SYM +%token LONGTEXT +%token LOOP_SYM +%token LOW_PRIORITY +%token MASTER_SSL_VERIFY_SERVER_CERT_SYM +%token MATCH /* SQL-2003-R */ +%token MAX_SYM /* SQL-2003-N */ +%token MAXVALUE_SYM /* SQL-2003-N */ +%token MEDIAN_SYM +%token MEDIUMBLOB +%token MEDIUMINT +%token MEDIUMTEXT +%token MIN_SYM /* SQL-2003-N */ +%token MINUS_ORACLE_SYM /* Oracle-R */ +%token MINUTE_MICROSECOND_SYM +%token MINUTE_SECOND_SYM +%token MODIFIES_SYM /* SQL-2003-R */ +%token MOD_SYM /* SQL-2003-N */ +%token NATURAL /* SQL-2003-R */ +%token NEG +%token NESTED_SYM /* SQL-2003-N */ +%token NOT_SYM /* SQL-2003-R */ +%token NO_WRITE_TO_BINLOG +%token NOW_SYM +%token NTH_VALUE_SYM /* SQL-2011 */ +%token NTILE_SYM +%token NULL_SYM /* SQL-2003-R */ +%token NUMERIC_SYM /* SQL-2003-R */ +%token ON /* SQL-2003-R */ +%token OPTIMIZE +%token OPTIONALLY +%token ORDER_SYM /* SQL-2003-R */ +%token ORDINALITY_SYM /* SQL-2003-N */ +%token OR_SYM /* SQL-2003-R */ +%token OTHERS_ORACLE_SYM /* SQL-2011-N, PLSQL-R */ +%token OUTER +%token OUTFILE +%token OUT_SYM /* SQL-2003-R */ +%token OVER_SYM +%token PACKAGE_ORACLE_SYM /* Oracle-R */ +%token PAGE_CHECKSUM_SYM +%token PARSE_VCOL_EXPR_SYM +%token PARTITION_SYM /* SQL-2003-R */ +%token PATH_SYM /* SQL-2003-N */ +%token PERCENTILE_CONT_SYM +%token PERCENTILE_DISC_SYM +%token PERCENT_RANK_SYM +%token PORTION_SYM /* SQL-2016-R */ +%token POSITION_SYM /* SQL-2003-N */ +%token PRECISION /* SQL-2003-R */ +%token PRIMARY_SYM /* SQL-2003-R */ +%token PROCEDURE_SYM /* SQL-2003-R */ +%token PURGE +%token RAISE_ORACLE_SYM /* PLSQL-R */ +%token RANGE_SYM /* SQL-2003-R */ +%token RANK_SYM +%token READS_SYM /* SQL-2003-R */ +%token READ_SYM /* SQL-2003-N */ +%token READ_WRITE_SYM +%token REAL /* SQL-2003-R */ +%token RECURSIVE_SYM +%token REFERENCES /* SQL-2003-R */ +%token REF_SYSTEM_ID_SYM +%token REGEXP +%token RELEASE_SYM /* SQL-2003-R */ +%token RENAME +%token REPEAT_SYM /* MYSQL-FUNC */ +%token REQUIRE_SYM +%token RESIGNAL_SYM /* SQL-2003-R */ +%token RESTRICT +%token RETURNING_SYM +%token RETURN_MARIADB_SYM /* SQL-2003-R, PLSQL-R */ +%token RETURN_ORACLE_SYM /* SQL-2003-R, PLSQL-R */ +%token REVOKE /* SQL-2003-R */ +%token RIGHT /* SQL-2003-R */ +%token ROW_NUMBER_SYM +%token ROWS_SYM /* SQL-2003-R */ +%token ROWTYPE_ORACLE_SYM /* PLSQL-R */ +%token SECOND_MICROSECOND_SYM +%token SELECT_SYM /* SQL-2003-R */ +%token SENSITIVE_SYM /* FUTURE-USE */ +%token SEPARATOR_SYM +%token SERVER_OPTIONS +%token SET /* SQL-2003-R */ +%token SHOW +%token SIGNAL_SYM /* SQL-2003-R */ +%token SMALLINT /* SQL-2003-R */ +%token SPATIAL_SYM +%token SPECIFIC_SYM /* SQL-2003-R */ +%token SQL_BIG_RESULT +%token SQLEXCEPTION_SYM /* SQL-2003-R */ +%token SQL_SMALL_RESULT +%token SQLSTATE_SYM /* SQL-2003-R */ +%token SQL_SYM /* SQL-2003-R */ +%token SQLWARNING_SYM /* SQL-2003-R */ +%token SSL_SYM +%token STARTING +%token STATS_AUTO_RECALC_SYM +%token STATS_PERSISTENT_SYM +%token STATS_SAMPLE_PAGES_SYM +%token STDDEV_SAMP_SYM /* SQL-2003-N */ +%token STD_SYM +%token STRAIGHT_JOIN +%token SUM_SYM /* SQL-2003-N */ +%token SYSDATE +%token TABLE_REF_PRIORITY +%token TABLE_SYM /* SQL-2003-R */ +%token TERMINATED +%token THEN_SYM /* SQL-2003-R */ +%token TINYBLOB +%token TINYINT +%token TINYTEXT +%token TO_SYM /* SQL-2003-R */ +%token TRAILING /* SQL-2003-R */ +%token TRIGGER_SYM /* SQL-2003-R */ +%token TRUE_SYM /* SQL-2003-R */ +%token UNDO_SYM /* FUTURE-USE */ +%token UNION_SYM /* SQL-2003-R */ +%token UNIQUE_SYM +%token UNLOCK_SYM +%token UNSIGNED +%token UPDATE_SYM /* SQL-2003-R */ +%token USAGE /* SQL-2003-N */ +%token USE_SYM +%token USING /* SQL-2003-R */ +%token UTC_DATE_SYM +%token UTC_TIMESTAMP_SYM +%token UTC_TIME_SYM +%token VALUES_IN_SYM +%token VALUES_LESS_SYM +%token VALUES /* SQL-2003-R */ +%token VARBINARY +%token VARCHAR /* SQL-2003-R */ +%token VARIANCE_SYM +%token VAR_SAMP_SYM +%token VARYING /* SQL-2003-R */ +%token WHEN_SYM /* SQL-2003-R */ +%token WHERE /* SQL-2003-R */ +%token WHILE_SYM +%token WITH /* SQL-2003-R */ +%token XOR +%token YEAR_MONTH_SYM +%token ZEROFILL + + +/* + Keywords that have different reserved status in std/oracle modes. +*/ +%token BODY_MARIADB_SYM // Oracle-R +%token ELSEIF_ORACLE_SYM +%token ELSIF_MARIADB_SYM // PLSQL-R +%token EXCEPTION_ORACLE_SYM // SQL-2003-N, PLSQL-R +%token GOTO_MARIADB_SYM // Oracle-R +%token OTHERS_MARIADB_SYM // SQL-2011-N, PLSQL-R +%token PACKAGE_MARIADB_SYM // Oracle-R +%token RAISE_MARIADB_SYM // PLSQL-R +%token ROWTYPE_MARIADB_SYM // PLSQL-R +%token ROWNUM_SYM /* Oracle-R */ + +/* + SQL functions with a special syntax +*/ +%token REPLACE /* MYSQL-FUNC */ +%token SUBSTRING /* SQL-2003-N */ +%token TRIM /* SQL-2003-N */ + + +/* + Non-reserved keywords +*/ + +%token ACCOUNT_SYM /* MYSQL */ +%token ACTION /* SQL-2003-N */ +%token ADMIN_SYM /* SQL-2003-N */ +%token ADDDATE_SYM /* MYSQL-FUNC */ +%token ADD_MONTHS_SYM /* Oracle FUNC*/ +%token AFTER_SYM /* SQL-2003-N */ +%token AGAINST +%token AGGREGATE_SYM +%token ALGORITHM_SYM +%token ALWAYS_SYM +%token ANY_SYM /* SQL-2003-R */ +%token ASCII_SYM /* MYSQL-FUNC */ +%token AT_SYM /* SQL-2003-R */ +%token ATOMIC_SYM /* SQL-2003-R */ +%token AUTHORS_SYM +%token AUTOEXTEND_SIZE_SYM +%token AUTO_INC +%token AUTO_SYM +%token AVG_ROW_LENGTH +%token AVG_SYM /* SQL-2003-N */ +%token BACKUP_SYM +%token BEGIN_MARIADB_SYM /* SQL-2003-R, PLSQL-R */ +%token BEGIN_ORACLE_SYM /* SQL-2003-R, PLSQL-R */ +%token BINLOG_SYM +%token BIT_SYM /* MYSQL-FUNC */ +%token BLOCK_SYM +%token BOOL_SYM +%token BOOLEAN_SYM /* SQL-2003-R, PLSQL-R */ +%token BTREE_SYM +%token BYTE_SYM +%token CACHE_SYM +%token CASCADED /* SQL-2003-R */ +%token CATALOG_NAME_SYM /* SQL-2003-N */ +%token CHAIN_SYM /* SQL-2003-N */ +%token CHANGED +%token CHANNEL_SYM +%token CHARSET +%token CHECKPOINT_SYM +%token CHECKSUM_SYM +%token CIPHER_SYM +%token CLASS_ORIGIN_SYM /* SQL-2003-N */ +%token CLIENT_SYM +%token CLOB_MARIADB_SYM /* SQL-2003-R */ +%token CLOB_ORACLE_SYM /* Oracle-R */ +%token CLOSE_SYM /* SQL-2003-R */ +%token COALESCE /* SQL-2003-N */ +%token CODE_SYM +%token COLLATION_SYM /* SQL-2003-N */ +%token COLUMNS +%token COLUMN_ADD_SYM +%token COLUMN_CHECK_SYM +%token COLUMN_CREATE_SYM +%token COLUMN_DELETE_SYM +%token COLUMN_GET_SYM +%token COLUMN_SYM /* SQL-2003-R */ +%token COLUMN_NAME_SYM /* SQL-2003-N */ +%token COMMENT_SYM /* Oracle-R */ +%token COMMITTED_SYM /* SQL-2003-N */ +%token COMMIT_SYM /* SQL-2003-R */ +%token COMPACT_SYM +%token COMPLETION_SYM +%token COMPRESSED_SYM +%token CONCURRENT +%token CONNECTION_SYM +%token CONSISTENT_SYM +%token CONSTRAINT_CATALOG_SYM /* SQL-2003-N */ +%token CONSTRAINT_NAME_SYM /* SQL-2003-N */ +%token CONSTRAINT_SCHEMA_SYM /* SQL-2003-N */ +%token CONTAINS_SYM /* SQL-2003-N */ +%token CONTEXT_SYM +%token CONTRIBUTORS_SYM +%token CPU_SYM +%token CUBE_SYM /* SQL-2003-R */ +%token CURRENT_SYM /* SQL-2003-R */ +%token CURRENT_POS_SYM +%token CURSOR_NAME_SYM /* SQL-2003-N */ +%token CYCLE_SYM +%token DATAFILE_SYM +%token DATA_SYM /* SQL-2003-N */ +%token DATETIME +%token DATE_FORMAT_SYM /* MYSQL-FUNC */ +%token DATE_SYM /* SQL-2003-R, Oracle-R, PLSQL-R */ +%token DAY_SYM /* SQL-2003-R */ +%token DEALLOCATE_SYM /* SQL-2003-R */ +%token DEFINER_SYM +%token DELAYED_SYM +%token DELAY_KEY_WRITE_SYM +%token DES_KEY_FILE +%token DIAGNOSTICS_SYM /* SQL-2003-N */ +%token DIRECTORY_SYM +%token DISABLE_SYM +%token DISCARD +%token DISK_SYM +%token DO_SYM +%token DUMPFILE +%token DUPLICATE_SYM +%token DYNAMIC_SYM /* SQL-2003-R */ +%token ENABLE_SYM +%token END /* SQL-2003-R, PLSQL-R */ +%token ENDS_SYM +%token ENGINES_SYM +%token ENGINE_SYM +%token ENUM +%token ERROR_SYM +%token ERRORS +%token ESCAPE_SYM /* SQL-2003-R */ +%token EVENTS_SYM +%token EVENT_SYM +%token EVERY_SYM /* SQL-2003-N */ +%token EXCHANGE_SYM +%token EXAMINED_SYM +%token EXCLUDE_SYM /* SQL-2011-N */ +%token EXECUTE_SYM /* SQL-2003-R */ +%token EXCEPTION_MARIADB_SYM /* SQL-2003-N, PLSQL-R */ +%token EXIT_MARIADB_SYM /* PLSQL-R */ +%token EXIT_ORACLE_SYM /* PLSQL-R */ +%token EXPANSION_SYM +%token EXPIRE_SYM /* MySQL */ +%token EXPORT_SYM +%token EXTENDED_SYM +%token EXTENT_SIZE_SYM +%token FAST_SYM +%token FAULTS_SYM +%token FEDERATED_SYM /* MariaDB privilege */ +%token FILE_SYM +%token FIRST_SYM /* SQL-2003-N */ +%token FIXED_SYM +%token FLUSH_SYM +%token FOLLOWS_SYM /* MYSQL trigger*/ +%token FOLLOWING_SYM /* SQL-2011-N */ +%token FORCE_SYM +%token FORMAT_SYM +%token FOUND_SYM /* SQL-2003-R */ +%token FULL /* SQL-2003-R */ +%token FUNCTION_SYM /* SQL-2003-R, Oracle-R */ +%token GENERAL +%token GENERATED_SYM +%token GET_FORMAT /* MYSQL-FUNC */ +%token GET_SYM /* SQL-2003-R */ +%token GLOBAL_SYM /* SQL-2003-R */ +%token GRANTS +%token HANDLER_SYM +%token HARD_SYM +%token HASH_SYM +%token HELP_SYM +%token HIGH_PRIORITY +%token HISTORY_SYM /* MYSQL */ +%token HOST_SYM +%token HOSTS_SYM +%token HOUR_SYM /* SQL-2003-R */ +%token ID_SYM /* MYSQL */ +%token IDENTIFIED_SYM +%token IGNORE_SERVER_IDS_SYM +%token IMMEDIATE_SYM /* SQL-2003-R */ +%token IMPORT +%token INCREMENT_SYM +%token INDEXES +%token INITIAL_SIZE_SYM +%token INSERT_METHOD +%token INSTALL_SYM +%token INVOKER_SYM +%token IO_SYM +%token IPC_SYM +%token ISOLATION /* SQL-2003-R */ +%token ISOPEN_SYM /* Oracle-N */ +%token ISSUER_SYM +%token INVISIBLE_SYM +%token JSON_SYM +%token KEY_BLOCK_SIZE +%token LANGUAGE_SYM /* SQL-2003-R */ +%token LAST_SYM /* SQL-2003-N */ +%token LAST_VALUE +%token LASTVAL_SYM /* PostgreSQL sequence function */ +%token LEAVES +%token LESS_SYM +%token LEVEL_SYM +%token LIST_SYM +%token LOCAL_SYM /* SQL-2003-R */ +%token LOCKED_SYM +%token LOCKS_SYM +%token LOGFILE_SYM +%token LOGS_SYM +%token MASTER_CONNECT_RETRY_SYM +%token MASTER_DELAY_SYM +%token MASTER_GTID_POS_SYM +%token MASTER_HOST_SYM +%token MASTER_LOG_FILE_SYM +%token MASTER_LOG_POS_SYM +%token MASTER_PASSWORD_SYM +%token MASTER_PORT_SYM +%token MASTER_SERVER_ID_SYM +%token MASTER_SSL_CAPATH_SYM +%token MASTER_SSL_CA_SYM +%token MASTER_SSL_CERT_SYM +%token MASTER_SSL_CIPHER_SYM +%token MASTER_SSL_CRL_SYM +%token MASTER_SSL_CRLPATH_SYM +%token MASTER_SSL_KEY_SYM +%token MASTER_SSL_SYM +%token MASTER_SYM +%token MASTER_USER_SYM +%token MASTER_USE_GTID_SYM +%token MASTER_HEARTBEAT_PERIOD_SYM +%token MASTER_DEMOTE_TO_SLAVE_SYM +%token MAX_CONNECTIONS_PER_HOUR +%token MAX_QUERIES_PER_HOUR +%token MAX_ROWS +%token MAX_SIZE_SYM +%token MAX_UPDATES_PER_HOUR +%token MAX_STATEMENT_TIME_SYM +%token MAX_USER_CONNECTIONS_SYM +%token MEDIUM_SYM +%token MEMORY_SYM +%token MERGE_SYM /* SQL-2003-R */ +%token MESSAGE_TEXT_SYM /* SQL-2003-N */ +%token MICROSECOND_SYM /* MYSQL-FUNC */ +%token MIGRATE_SYM +%token MINUTE_SYM /* SQL-2003-R */ +%token MINVALUE_SYM +%token MIN_ROWS +%token MODE_SYM +%token MODIFY_SYM +%token MONITOR_SYM /* MariaDB privilege */ +%token MONTH_SYM /* SQL-2003-R */ +%token MUTEX_SYM +%token MYSQL_SYM +%token MYSQL_ERRNO_SYM +%token NAMES_SYM /* SQL-2003-N */ +%token NAME_SYM /* SQL-2003-N */ +%token NATIONAL_SYM /* SQL-2003-R */ +%token NCHAR_SYM /* SQL-2003-R */ +%token NEVER_SYM /* MySQL */ +%token NEW_SYM /* SQL-2003-R */ +%token NEXT_SYM /* SQL-2003-N */ +%token NEXTVAL_SYM /* PostgreSQL sequence function */ +%token NOCACHE_SYM +%token NOCYCLE_SYM +%token NODEGROUP_SYM +%token NONE_SYM /* SQL-2003-R */ +%token NOTFOUND_SYM /* Oracle-R */ +%token NO_SYM /* SQL-2003-R */ +%token NOMAXVALUE_SYM +%token NOMINVALUE_SYM +%token NO_WAIT_SYM +%token NOWAIT_SYM +%token NUMBER_MARIADB_SYM /* SQL-2003-N */ +%token NUMBER_ORACLE_SYM /* Oracle-R, PLSQL-R */ +%token NVARCHAR_SYM +%token OF_SYM /* SQL-1992-R, Oracle-R */ +%token OFFSET_SYM +%token OLD_PASSWORD_SYM +%token ONE_SYM +%token ONLY_SYM /* SQL-2003-R */ +%token ONLINE_SYM +%token OPEN_SYM /* SQL-2003-R */ +%token OPTIONS_SYM +%token OPTION /* SQL-2003-N */ +%token OVERLAPS_SYM +%token OWNER_SYM +%token PACK_KEYS_SYM +%token PAGE_SYM +%token PARSER_SYM +%token PARTIAL /* SQL-2003-N */ +%token PARTITIONS_SYM +%token PARTITIONING_SYM +%token PASSWORD_SYM +%token PERIOD_SYM /* SQL-2011-R */ +%token PERSISTENT_SYM +%token PHASE_SYM +%token PLUGINS_SYM +%token PLUGIN_SYM +%token PORT_SYM +%token PRECEDES_SYM /* MYSQL */ +%token PRECEDING_SYM /* SQL-2011-N */ +%token PREPARE_SYM /* SQL-2003-R */ +%token PRESERVE_SYM +%token PREV_SYM +%token PREVIOUS_SYM +%token PRIVILEGES /* SQL-2003-N */ +%token PROCESS +%token PROCESSLIST_SYM +%token PROFILE_SYM +%token PROFILES_SYM +%token PROXY_SYM +%token QUARTER_SYM +%token QUERY_SYM +%token QUICK +%token RAW_MARIADB_SYM +%token RAW_ORACLE_SYM /* Oracle-R */ +%token READ_ONLY_SYM +%token REBUILD_SYM +%token RECOVER_SYM +%token REDOFILE_SYM +%token REDO_BUFFER_SIZE_SYM +%token REDUNDANT_SYM +%token RELAY +%token RELAYLOG_SYM +%token RELAY_LOG_FILE_SYM +%token RELAY_LOG_POS_SYM +%token RELAY_THREAD +%token RELOAD +%token REMOVE_SYM +%token REORGANIZE_SYM +%token REPAIR +%token REPEATABLE_SYM /* SQL-2003-N */ +%token REPLAY_SYM /* MariaDB privilege */ +%token REPLICATION +%token RESET_SYM +%token RESTART_SYM +%token RESOURCES +%token RESTORE_SYM +%token RESUME_SYM +%token RETURNED_SQLSTATE_SYM /* SQL-2003-N */ +%token RETURNS_SYM /* SQL-2003-R */ +%token REUSE_SYM /* Oracle-R */ +%token REVERSE_SYM +%token ROLE_SYM +%token ROLLBACK_SYM /* SQL-2003-R */ +%token ROLLUP_SYM /* SQL-2003-R */ +%token ROUTINE_SYM /* SQL-2003-N */ +%token ROWCOUNT_SYM /* Oracle-N */ +%token ROW_SYM /* SQL-2003-R */ +%token ROW_COUNT_SYM /* SQL-2003-N */ +%token ROW_FORMAT_SYM +%token RTREE_SYM +%token SAVEPOINT_SYM /* SQL-2003-R */ +%token SCHEDULE_SYM +%token SCHEMA_NAME_SYM /* SQL-2003-N */ +%token SECOND_SYM /* SQL-2003-R */ +%token SECURITY_SYM /* SQL-2003-N */ +%token SEQUENCE_SYM +%token SERIALIZABLE_SYM /* SQL-2003-N */ +%token SERIAL_SYM +%token SESSION_SYM /* SQL-2003-N */ +%token SERVER_SYM +%token SETVAL_SYM /* PostgreSQL sequence function */ +%token SHARE_SYM +%token SHUTDOWN +%token SIGNED_SYM +%token SIMPLE_SYM /* SQL-2003-N */ +%token SKIP_SYM +%token SLAVE +%token SLAVES +%token SLAVE_POS_SYM +%token SLOW +%token SNAPSHOT_SYM +%token SOCKET_SYM +%token SOFT_SYM +%token SONAME_SYM +%token SOUNDS_SYM +%token SOURCE_SYM +%token SQL_BUFFER_RESULT +%token SQL_CACHE_SYM +%token SQL_CALC_FOUND_ROWS +%token SQL_NO_CACHE_SYM +%token SQL_THREAD +%token STAGE_SYM +%token STARTS_SYM +%token START_SYM /* SQL-2003-R */ +%token STATEMENT_SYM +%token STATUS_SYM +%token STOP_SYM +%token STORAGE_SYM +%token STORED_SYM +%token STRING_SYM +%token SUBCLASS_ORIGIN_SYM /* SQL-2003-N */ +%token SUBDATE_SYM +%token SUBJECT_SYM +%token SUBPARTITIONS_SYM +%token SUBPARTITION_SYM +%token SUPER_SYM +%token SUSPEND_SYM +%token SWAPS_SYM +%token SWITCHES_SYM +%token SYSTEM /* SQL-2011-R */ +%token SYSTEM_TIME_SYM /* SQL-2011-R */ +%token TABLES +%token TABLESPACE +%token TABLE_CHECKSUM_SYM +%token TABLE_NAME_SYM /* SQL-2003-N */ +%token TEMPORARY /* SQL-2003-N */ +%token TEMPTABLE_SYM +%token TEXT_SYM +%token THAN_SYM +%token TIES_SYM /* SQL-2011-N */ +%token TIMESTAMP /* SQL-2003-R */ +%token TIMESTAMP_ADD +%token TIMESTAMP_DIFF +%token TIME_SYM /* SQL-2003-R, Oracle-R */ +%token TRANSACTION_SYM +%token TRANSACTIONAL_SYM +%token THREADS_SYM +%token TRIGGERS_SYM +%token TRIM_ORACLE +%token TRUNCATE_SYM +%token TYPES_SYM +%token TYPE_SYM /* SQL-2003-N */ +%token UDF_RETURNS_SYM +%token UNBOUNDED_SYM /* SQL-2011-N */ +%token UNCOMMITTED_SYM /* SQL-2003-N */ +%token UNDEFINED_SYM +%token UNDOFILE_SYM +%token UNDO_BUFFER_SIZE_SYM +%token UNICODE_SYM +%token UNINSTALL_SYM +%token UNKNOWN_SYM /* SQL-2003-R */ +%token UNTIL_SYM +%token UPGRADE_SYM +%token USER_SYM /* SQL-2003-R */ +%token USE_FRM +%token VALUE_SYM /* SQL-2003-R */ +%token VARCHAR2_MARIADB_SYM +%token VARCHAR2_ORACLE_SYM /* Oracle-R, PLSQL-R */ +%token VARIABLES +%token VERSIONING_SYM /* SQL-2011-R */ +%token VIA_SYM +%token VIEW_SYM /* SQL-2003-N */ +%token VISIBLE_SYM /* MySQL 8.0 */ +%token VIRTUAL_SYM +%token WAIT_SYM +%token WARNINGS +%token WEEK_SYM +%token WEIGHT_STRING_SYM +%token WINDOW_SYM /* SQL-2003-R */ +%token WITHIN +%token WITHOUT /* SQL-2003-R */ +%token WORK_SYM /* SQL-2003-N */ +%token WRAPPER_SYM +%token WRITE_SYM /* SQL-2003-N */ +%token X509_SYM +%token XA_SYM +%token XML_SYM +%token YEAR_SYM /* SQL-2003-R */ + +/* A dummy token to force the priority of table_ref production in a join. */ +%left CONDITIONLESS_JOIN +%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT ON_SYM USING + +%left SET_VAR +%left OR_SYM OR2_SYM +%left XOR +%left AND_SYM AND_AND_SYM + +%left PREC_BELOW_NOT + +/* The precendence of boolean NOT is in fact here. See the comment below. */ + +%left '=' EQUAL_SYM GE '>' LE '<' NE +%nonassoc IS +%right BETWEEN_SYM +%left LIKE SOUNDS_SYM REGEXP IN_SYM +%left '|' +%left '&' +%left SHIFT_LEFT SHIFT_RIGHT +%left '-' '+' ORACLE_CONCAT_SYM +%left '*' '/' '%' DIV_SYM MOD_SYM +%left '^' +%left MYSQL_CONCAT_SYM +/* + Boolean negation has a special branch in "expr" starting with NOT_SYM. + The precedence of logical negation is determined by the grammar itself + (without using Bison terminal symbol precedence) in this order + - Boolean factor (i.e. logical AND) + - Boolean NOT + - Boolean test (such as '=', IS NULL, IS TRUE) + + But we also need a precedence for NOT_SYM in other contexts, + to shift (without reduce) in these cases: + predicate NOT IN ... + predicate NOT BETWEEN ... + predicate NOT LIKE ... + predicate NOT REGEXP ... + If the precedence of NOT_SYM was low, it would reduce immediately + after scanning "predicate" and then produce a syntax error on "NOT". +*/ +%nonassoc NOT_SYM +%nonassoc NEG '~' NOT2_SYM BINARY +%nonassoc COLLATE_SYM +%nonassoc SUBQUERY_AS_EXPR + +/* + Tokens that can change their meaning from identifier to something else + in certain context. + + - TRANSACTION: identifier, history unit: + SELECT transaction FROM t1; + SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION @var; + + - TIMESTAMP: identifier, literal, history unit: + SELECT timestamp FROM t1; + SELECT TIMESTAMP '2001-01-01 10:20:30'; + SELECT * FROM t1 FOR SYSTEM_TIME AS OF TIMESTAMP CONCAT(@date,' ',@time); + + - PERIOD: identifier, period for system time: + SELECT period FROM t1; + ALTER TABLE DROP PERIOD FOR SYSTEM TIME; + + - SYSTEM: identifier, system versioning: + SELECT system FROM t1; + ALTER TABLE DROP SYSTEM VERSIONIONG; + + - USER: identifier, user: + SELECT user FROM t1; + KILL USER foo; + + Note, we need here only tokens that cause shift/reduce conflicts + with keyword identifiers. For example: + opt_clause1: %empty | KEYWORD ... ; + clause2: opt_clause1 ident; + KEYWORD can appear both in opt_clause1 and in "ident" through the "keyword" + rule. So the parser reports a conflict on how to interpret KEYWORD: + - as a start of non-empty branch in opt_clause1, or + - as an identifier which follows the empty branch in opt_clause1. + + Example#1: + alter_list_item: + DROP opt_column opt_if_exists_table_element field_ident + | DROP SYSTEM VERSIONING_SYM + SYSTEM can be a keyword in field_ident, or can be a start of + SYSTEM VERSIONING. + + Example#2: + system_time_expr: AS OF_SYM history_point + history_point: opt_history_unit bit_expr + opt_history_unit: | TRANSACTION_SYM + TRANSACTION can be a non-empty history unit, or can be an identifier + in bit_expr. + + In the grammar below we use %prec to explicitly tell Bison to go + through the empty branch in the optional rule only when the lookahead + token does not belong to a small set of selected tokens. + + Tokens NEXT_SYM and PREVIOUS_SYM also change their meaning from + identifiers to sequence operations when followed by VALUE_SYM: + SELECT NEXT VALUE FOR s1, PREVIOUS VALUE FOR s1; + but we don't need to list them here as they do not seem to cause + conflicts (according to bison -v), as both meanings + (as identifier, and as a sequence operation) are parts of the same target + column_default_non_parenthesized_expr, and there are no any optional + clauses between the start of column_default_non_parenthesized_expr + and until NEXT_SYM / PREVIOUS_SYM. +*/ +%left PREC_BELOW_IDENTIFIER_OPT_SPECIAL_CASE +%left TRANSACTION_SYM TIMESTAMP PERIOD_SYM SYSTEM USER COMMENT_SYM + + +/* + Tokens that can appear in a token contraction on the second place + and change the meaning of the previous token. + + - TEXT_STRING: changes the meaning of TIMESTAMP/TIME/DATE + from identifier to literal: + SELECT timestamp FROM t1; + SELECT TIMESTAMP'2001-01-01 00:00:00' FROM t1; + + - Parenthesis: changes the meaning of TIMESTAMP/TIME/DATE + from identifiers to CAST-alike functions: + SELECT timestamp FROM t1; + SELECT timestamp(1) FROM t1; + + - VALUE: changes NEXT and PREVIOUS from identifier to sequence operation: + SELECT next, previous FROM t1; + SELECT NEXT VALUE FOR s1, PREVIOUS VALUE FOR s1; + + - VERSIONING: changes SYSTEM from identifier to SYSTEM VERSIONING + SELECT system FROM t1; + ALTER TABLE t1 ADD SYSTEM VERSIONING; +*/ +%left PREC_BELOW_CONTRACTION_TOKEN2 +%left TEXT_STRING '(' ')' VALUE_SYM VERSIONING_SYM +%left EMPTY_FROM_CLAUSE +%right INTO + +%type + DECIMAL_NUM FLOAT_NUM NUM LONG_NUM + HEX_NUM HEX_STRING + LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident_or_text + TEXT_STRING_sys TEXT_STRING_literal + key_cache_name + sp_opt_label BIN_NUM TEXT_STRING_filesystem + opt_constraint constraint opt_ident + sp_block_label sp_control_label opt_place opt_db + udt_name + +%type + IDENT_sys + ident + label_ident + sp_decl_ident + ident_or_empty + ident_table_alias + ident_sysvar_name + ident_for_loop_index + +%type + TEXT_STRING + NCHAR_STRING + json_text_literal + json_text_literal_or_num + +%type + opt_table_alias_clause + table_alias_clause + +%type + IDENT + IDENT_QUOTED + IDENT_cli + ident_cli + ident_cli_set_usual_case + +%type + ident_sys_alloc + +%type + keyword_data_type + keyword_cast_type + keyword_ident + keyword_label + keyword_set_special_case + keyword_set_usual_case + keyword_sp_block_section + keyword_sp_decl + keyword_sp_head + keyword_sp_var_and_label + keyword_sp_var_not_label + keyword_sysvar_name + keyword_sysvar_type + keyword_table_alias + keyword_verb_clause + charset + reserved_keyword_udt + reserved_keyword_udt_not_param_type + non_reserved_keyword_udt + +%type
+ table_ident table_ident_nodb references xid + table_ident_opt_wild create_like + +%type + optionally_qualified_column_ident + +%type + remember_name remember_end + remember_tok_start + wild_and_where + +%type + field_length_str + opt_compression_method + +%type + text_string hex_or_bin_String opt_gconcat_separator + +%type int_type real_type + +%type sp_handler + +%type json_on_response + +%type field_type field_type_all + qualified_field_type + field_type_numeric + field_type_string + field_type_lob + field_type_temporal + field_type_misc + json_table_field_type + +%type + binary + opt_binary + opt_binary_and_compression + attribute + attribute_list + field_def + +%type + collation_name + collation_name_or_default + + +%type opt_dyncol_type dyncol_type + numeric_dyncol_type temporal_dyncol_type string_dyncol_type + +%type + column_list_privilege + +%type field_spec column_def + +%type + order_dir lock_option + udf_type opt_local opt_no_write_to_binlog + opt_temporary all_or_any opt_distinct opt_glimit_clause + opt_ignore_leaves fulltext_options union_option + opt_not + transaction_access_mode_types + opt_natural_language_mode opt_query_expansion + opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment + ev_alter_on_schedule_completion opt_ev_rename_to opt_ev_sql_stmt + optional_flush_tables_arguments + opt_time_precision kill_type kill_option int_num + opt_default_time_precision + case_stmt_body opt_bin_mod opt_for_system_time_clause + opt_if_exists_table_element opt_if_not_exists_table_element + opt_recursive opt_format_xid opt_for_portion_of_time_clause + ignorability + +%type + create_or_replace + opt_if_not_exists + opt_if_exists + +/* + Bit field of MYSQL_START_TRANS_OPT_* flags. +*/ +%type opt_start_transaction_option_list +%type start_transaction_option_list +%type start_transaction_option + +%type + opt_chain opt_release + +%type + delete_option + +%type + column_privilege + object_privilege + opt_grant_options + opt_grant_option + grant_option_list + grant_option + +%type + object_privilege_list + grant_privileges + +%type + grant_ident + +%type + ulong_num real_ulong_num merge_insert_types + ws_nweights + ws_level_flag_desc ws_level_flag_reverse ws_level_flags + opt_ws_levels ws_level_list ws_level_list_item ws_level_number + ws_level_range ws_level_list_or_range bool + field_options last_field_options + +%type + ulonglong_num real_ulonglong_num + +%type + longlong_num + +%type choice + +%type + replace_lock_option opt_low_priority insert_lock_option load_data_lock + insert_replace_option + +%type + literal insert_ident order_ident temporal_literal + simple_ident expr sum_expr in_sum_expr + variable variable_aux + boolean_test + predicate bit_expr parenthesized_expr + table_wild simple_expr column_default_non_parenthesized_expr udf_expr + primary_expr string_factor_expr mysql_concatenation_expr + select_sublist_qualified_asterisk + expr_or_ignore expr_or_ignore_or_default set_expr_or_default + signed_literal expr_or_literal + sp_opt_default + simple_ident_nospvar + field_or_var limit_option + part_func_expr + window_func_expr + window_func + simple_window_func + inverse_distribution_function + percentile_function + inverse_distribution_function_def + explicit_cursor_attr + function_call_keyword + function_call_keyword_timestamp + function_call_nonkeyword + function_call_generic + function_call_conflict kill_expr + signal_allowed_expr + simple_target_specification + condition_number + opt_versioning_interval_start + +%type opt_vers_auto_part + +%type param_marker + +%type + NUM_literal + +%type text_literal + +%type + expr_list opt_udf_expr_list udf_expr_list when_list when_list_opt_else + ident_list ident_list_arg opt_expr_list + execute_using + execute_params + +%type + sp_cursor_stmt_lex + sp_cursor_stmt + +%type + expr_lex + +%destructor +{ + /* + In case of a syntax/oom error let's free the sp_expr_lex + instance, but only if it has not been linked to any structures + such as sp_instr_jump_if_not::m_lex_keeper yet, e.g.: + IF f1() THEN1 + i.e. THEN1 came instead of the expected THEN causing a syntax error. + */ + if (!$$->sp_lex_in_use) + delete $$; +} + + +/* + +// COMMENT_FOR_DESCTRUCTOR: +// +// %destructor is only invoked if the rule parsing fails in the middle. +// If we call YYABORT from the last code block %destructor is not called, +// because Bison's stack already contains the reduced upper level rule. +// If we need to invoke the %destructor after the YYABORT in the last code +// block, we have to add another dummy empty end-of-rule action {} at the end. + +// So to have a %destructor work properly with YYABORT, +// make sure to turn a grammar like this: + +rule: + KEYWORD expr_lex + { + if (condition) // End-of-rule action + YYABORT; + } + ; + +// into: + +rule: + KEYWORD expr_lex + { + if (condition) // This is now a mid-rule action + YYABORT; + } + { + // A dummy empty end-of-rule action. + } + ; +*/ + + +%type + assignment_source_lex + assignment_source_expr + for_loop_bound_expr + +%type + cursor_actual_parameters + opt_parenthesized_cursor_actual_parameters + +%destructor +{ + if ($$) + { + sp_assignment_lex *elem; + List_iterator li(*$$); + while ((elem= li++)) + { + if (!elem->sp_lex_in_use) + delete elem; + } + } +} + + +%type + option_type opt_var_type opt_var_ident_type + +%type + constraint_key_type fulltext spatial + +%type + btree_or_rtree opt_key_algorithm_clause opt_USING_key_algorithm + +%type + using_list opt_use_partition use_partition + +%type + key_part + +%type + join_table_list join_table + table_factor table_ref esc_table_ref + table_primary_ident table_primary_ident_opt_parens + table_primary_derived table_primary_derived_opt_parens + derived_table_list table_reference_list_parens + nested_table_reference_list join_table_parens + update_table_list table_function +%type date_time_type; +%type interval + +%type interval_time_stamp + +%type storage_engines known_storage_engines + +%type row_types + +%type isolation_types + +%type handler_rkey_mode + +%type cast_type cast_type_numeric cast_type_temporal + +%type precision opt_precision float_options + field_length opt_field_length + field_scale opt_field_scale + +%type user grant_user grant_role user_or_role current_role + admin_option_for_role user_maybe_role role_name + +%type opt_auth_str auth_expression auth_token + text_or_password + +%type + charset_name + charset_or_alias + charset_name_or_default + old_or_new_charset_name + old_or_new_charset_name_or_default + opt_load_data_charset + UNDERSCORE_CHARSET + +%type subselect + query_specification + table_value_constructor + simple_table + query_simple + query_primary + subquery + select_into_query_specification + +%type + query_expression + query_expression_no_with_clause + query_expression_body_ext + query_expression_body_ext_parens + query_expression_body + query_specification_start + +%type comp_op + +%type dyncall_create_element + +%type dyncall_create_list + +%type select_outvar + +%type opt_check_constraint check_constraint virtual_column_func + column_default_expr + +%type unit_type_decl + +%type + opt_procedure_or_into + opt_select_lock_type + select_lock_type + opt_lock_wait_timeout_new + +%type opt_limit_clause limit_clause limit_options + fetch_first_clause + +%type + query_expression_tail + opt_query_expression_tail + order_or_limit + order_limit_lock + opt_order_limit_lock + +%type opt_order_clause order_clause order_list + +%type + directly_executable_statement + analyze_stmt_command backup backup_statements + query verb_clause create create_routine change select select_into + do drop drop_routine insert replace insert_start stmt_end + insert_values update delete truncate rename compound_statement + show describe load alter optimize keycache preload flush + reset purge begin_stmt_mariadb commit rollback savepoint release + slave master_def master_defs master_file_def slave_until_opts + repair analyze opt_with_admin opt_with_admin_option + analyze_table_list analyze_table_elem_spec + opt_persistent_stat_clause persistent_stat_spec + persistent_column_stat_spec persistent_index_stat_spec + table_column_list table_index_list table_index_name + check start checksum opt_returning + field_list field_list_item kill key_def constraint_def + keycache_list keycache_list_or_parts assign_to_keycache + assign_to_keycache_parts + preload_list preload_list_or_parts preload_keys preload_keys_parts + select_item_list select_item values_list no_braces + delete_limit_clause fields opt_values values + no_braces_with_names opt_values_with_names values_with_names + procedure_list procedure_list2 procedure_item + handler opt_generated_always + opt_ignore opt_column opt_restrict + grant revoke set lock unlock string_list + table_lock_list table_lock + ref_list opt_match_clause opt_on_update_delete use + opt_delete_options opt_delete_option varchar nchar nvarchar + opt_outer table_list table_name table_alias_ref_list table_alias_ref + compressed_deprecated_data_type_attribute + compressed_deprecated_column_attribute + grant_list + user_list user_and_role_list + rename_list table_or_tables + clear_privileges flush_options flush_option + opt_flush_lock flush_lock flush_options_list + equal optional_braces + opt_mi_check_type opt_to mi_check_types + table_to_table_list table_to_table opt_table_list opt_as + handler_rkey_function handler_read_or_scan + single_multi table_wild_list table_wild_one opt_wild + opt_and + select_var_list select_var_list_init help + opt_extended_describe shutdown + opt_format_json + prepare execute deallocate + statement + sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic xa + opt_field_or_var_spec fields_or_vars opt_load_data_set_spec + view_list_opt view_list view_select + trigger_tail event_tail + install uninstall partition_entry binlog_base64_event + normal_key_options normal_key_opts all_key_opt + spatial_key_options fulltext_key_options normal_key_opt + fulltext_key_opt spatial_key_opt fulltext_key_opts spatial_key_opts + explain_for_connection + keep_gcc_happy + key_using_alg + part_column_list + period_for_system_time + period_for_application_time + server_def server_options_list server_option + definer_opt no_definer definer get_diagnostics + parse_vcol_expr vcol_opt_specifier vcol_opt_attribute + vcol_opt_attribute_list vcol_attribute + opt_serial_attribute opt_serial_attribute_list serial_attribute + explainable_command + opt_lock_wait_timeout + opt_delete_gtid_domain + asrow_attribute + opt_constraint_no_id + json_table_columns_clause json_table_columns_list json_table_column + json_table_column_type json_opt_on_empty_or_error + json_on_error_response json_on_empty_response + +%type call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt +%type sp_if_then_statements sp_case_then_statements +%type sp_proc_stmt_statement sp_proc_stmt_return +%type sp_proc_stmt_compound_ok +%type sp_proc_stmt_if +%type sp_labeled_control sp_unlabeled_control +%type sp_labeled_block sp_unlabeled_block +%type sp_proc_stmt_continue_oracle +%type sp_proc_stmt_exit_oracle +%type sp_proc_stmt_leave +%type sp_proc_stmt_iterate +%type sp_proc_stmt_goto_oracle +%type sp_proc_stmt_with_cursor +%type sp_proc_stmt_open sp_proc_stmt_fetch sp_proc_stmt_close +%type case_stmt_specification +%type loop_body while_body repeat_body +%type for_loop_statements + +%type view_algorithm view_check_option +%type view_suid opt_view_suid +%type only_or_with_ties + +%type plsql_cursor_attr +%type sp_suid +%type opt_aggregate + +%type sp_decl_idents sp_decl_idents_init_vars +%type sp_handler_type sp_hcond_list +%type sp_cond sp_hcond sqlstate signal_value opt_signal_value +%type sp_name +%type sp_param_name sp_param_name_and_mode sp_param +%type sp_param_anchored +%type sp_for_loop_index_and_bounds +%type sp_for_loop_bounds +%type trim_operands +%type substring_operands +%type opt_sp_for_loop_direction +%type sp_parameter_type +%type index_hint_type +%type index_hint_clause normal_join inner_join +%type data_or_xml + +%type signal_stmt resignal_stmt raise_stmt_oracle +%type signal_condition_information_item_name + +%type trigger_follows_precedes_clause; +%type trigger_action_order; + +%type which_area; +%type diagnostics_information; +%type statement_information_item; +%type statement_information_item_name; +%type statement_information; +%type condition_information_item; +%type condition_information_item_name; +%type condition_information; + +%type row_field_name row_field_definition +%type row_field_definition_list row_type_body + +%type opt_window_clause window_def_list window_def window_spec +%type window_name +%type opt_window_ref opt_window_frame_clause +%type window_frame_units; +%type window_frame_extent; +%type opt_window_frame_exclusion; +%type window_frame_start window_frame_bound; + +%type + '-' '+' '*' '/' '%' '(' ')' + ',' '!' '{' '}' '&' '|' + +%type with_clause + +%type with_element_head + +%type + comma_separated_ident_list + opt_with_column_list + with_column_list + opt_cycle + +%type opt_history_unit +%type history_point +%type with_or_without_system +%type engine_defined_option; + +%ifdef MARIADB +%type sp_tail_standalone +%type sp_unlabeled_block_not_atomic +%type sp_proc_stmt_in_returns_clause +%type sp_label +%type sp_decl_handler +%type sp_decls +%type sp_decl +%type sp_decl_body +%type sp_decl_variable_list +%type sp_decl_variable_list_anchored +%type reserved_keyword_udt_param_type +%else +%type set_assign +%type sp_opt_inout +%type sp_tail_standalone +%type sp_labelable_stmt +%type remember_end_opt +%type opt_package_routine_end_name +%type label_declaration_oracle +%type labels_declaration_oracle +%type keyword_directly_assignable +%type ident_directly_assignable +%type ident_cli_directly_assignable +%type opt_sp_name +%type sp_decl_body_list +%type opt_sp_decl_body_list +%type sp_decl_variable_list +%type sp_decl_variable_list_anchored +%type sp_decl_non_handler +%type sp_decl_non_handler_list +%type sp_decl_handler +%type sp_decl_handler_list +%type opt_sp_decl_handler_list +%type package_implementation_routine_definition +%type package_implementation_item_declaration +%type package_implementation_declare_section +%type package_implementation_declare_section_list1 +%type package_implementation_declare_section_list2 +%type sp_block_statements_and_exceptions +%type package_implementation_executable_section +%type sp_instr_addr +%type opt_exception_clause exception_handlers +%type remember_lex +%type package_routine_lex +%type package_specification_function +%type package_specification_procedure +%endif ORACLE + +%% + + +/* + Indentation of grammar rules: + +rule: <-- starts at col 1 + rule1a rule1b rule1c <-- starts at col 11 + { <-- starts at col 11 + code <-- starts at col 13, indentation is 2 spaces + } + | rule2a rule2b + { + code + } + ; <-- on a line by itself, starts at col 9 + + Also, please do not use any , but spaces. + Having a uniform indentation in this file helps + code reviews, patches, merges, and make maintenance easier. + Tip: grep [[:cntrl:]] sql_yacc.yy + Thanks. +*/ + +query: + END_OF_INPUT + { + if (!thd->bootstrap && + (!(thd->lex->lex_options & OPTION_LEX_FOUND_COMMENT))) + my_yyabort_error((ER_EMPTY_QUERY, MYF(0))); + + thd->lex->sql_command= SQLCOM_EMPTY_QUERY; + YYLIP->found_semicolon= NULL; + } + | directly_executable_statement + { + Lex_input_stream *lip = YYLIP; + + if ((thd->client_capabilities & CLIENT_MULTI_QUERIES) && + lip->multi_statements && + ! lip->eof()) + { + /* + We found a well formed query, and multi queries are allowed: + - force the parser to stop after the ';' + - mark the start of the next query for the next invocation + of the parser. + */ + lip->next_state= MY_LEX_END; + lip->found_semicolon= lip->get_ptr(); + } + else + { + /* Single query, terminated. */ + lip->found_semicolon= NULL; + } + } + ';' + opt_end_of_input + | directly_executable_statement END_OF_INPUT + { + /* Single query, not terminated. */ + YYLIP->found_semicolon= NULL; + } + ; + +opt_end_of_input: + /* empty */ + | END_OF_INPUT + ; + +directly_executable_statement: + statement + | begin_stmt_mariadb + | compound_statement + ; + +/* Verb clauses, except begin and compound_statement */ +verb_clause: + alter + | analyze + | analyze_stmt_command + | backup + | binlog_base64_event + | call + | change + | check + | checksum + | commit + | create + | deallocate + | delete + | describe + | do + | drop + | execute + | explain_for_connection + | flush + | get_diagnostics + | grant + | handler + | help + | insert + | install + | keep_gcc_happy + | keycache + | kill + | load + | lock + | optimize + | parse_vcol_expr + | partition_entry + | preload + | prepare + | purge + | raise_stmt_oracle + | release + | rename + | repair + | replace + | reset + | resignal_stmt + | revoke + | rollback + | savepoint + | select + | select_into + | set + | signal_stmt + | show + | shutdown + | slave + | start + | truncate + | uninstall + | unlock + | update + | use + | xa + ; + +deallocate: + deallocate_or_drop PREPARE_SYM ident + { + Lex->stmt_deallocate_prepare($3); + } + ; + +deallocate_or_drop: + DEALLOCATE_SYM + | DROP + ; + +prepare: + PREPARE_SYM ident FROM + { Lex->clause_that_disallows_subselect= "PREPARE..FROM"; } + expr + { + Lex->clause_that_disallows_subselect= NULL; + if (Lex->stmt_prepare($2, $5)) + MYSQL_YYABORT; + } + ; + +execute: + EXECUTE_SYM ident execute_using + { + if (Lex->stmt_execute($2, $3)) + MYSQL_YYABORT; + } + | EXECUTE_SYM IMMEDIATE_SYM + { Lex->clause_that_disallows_subselect= "EXECUTE IMMEDIATE"; } + expr + { Lex->clause_that_disallows_subselect= NULL; } + execute_using + { + if (Lex->stmt_execute_immediate($4, $6)) + MYSQL_YYABORT; + } + ; + +execute_using: + /* nothing */ { $$= NULL; } + | USING + { Lex->clause_that_disallows_subselect= "EXECUTE..USING"; } + execute_params + { + $$= $3; + Lex->clause_that_disallows_subselect= NULL; + } + ; + +execute_params: + expr_or_ignore_or_default + { + if (unlikely(!($$= List::make(thd->mem_root, $1)))) + MYSQL_YYABORT; + } + | execute_params ',' expr_or_ignore_or_default + { + if (($$= $1)->push_back($3, thd->mem_root)) + MYSQL_YYABORT; + } + ; + + +/* help */ + +help: + HELP_SYM + { + if (unlikely(Lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "HELP")); + } + ident_or_text + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_HELP; + lex->help_arg= $3.str; + } + ; + +/* change master */ + +change: + CHANGE MASTER_SYM optional_connection_name TO_SYM + { + Lex->sql_command = SQLCOM_CHANGE_MASTER; + } + master_defs + optional_for_channel + {} + ; + +master_defs: + master_def + | master_defs ',' master_def + ; + +master_def: + MASTER_HOST_SYM '=' TEXT_STRING_sys + { + Lex->mi.host = $3.str; + } + | MASTER_USER_SYM '=' TEXT_STRING_sys + { + Lex->mi.user = $3.str; + } + | MASTER_PASSWORD_SYM '=' TEXT_STRING_sys + { + Lex->mi.password = $3.str; + } + | MASTER_PORT_SYM '=' ulong_num + { + Lex->mi.port = $3; + } + | MASTER_CONNECT_RETRY_SYM '=' ulong_num + { + Lex->mi.connect_retry = $3; + } + | MASTER_DELAY_SYM '=' ulong_num + { + if ($3 > MASTER_DELAY_MAX) + { + my_error(ER_MASTER_DELAY_VALUE_OUT_OF_RANGE, MYF(0), + (ulong) $3, (ulong) MASTER_DELAY_MAX); + } + else + Lex->mi.sql_delay = $3; + } + | MASTER_SSL_SYM '=' ulong_num + { + Lex->mi.ssl= $3 ? + LEX_MASTER_INFO::LEX_MI_ENABLE : LEX_MASTER_INFO::LEX_MI_DISABLE; + } + | MASTER_SSL_CA_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_ca= $3.str; + } + | MASTER_SSL_CAPATH_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_capath= $3.str; + } + | MASTER_SSL_CERT_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_cert= $3.str; + } + | MASTER_SSL_CIPHER_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_cipher= $3.str; + } + | MASTER_SSL_KEY_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_key= $3.str; + } + | MASTER_SSL_VERIFY_SERVER_CERT_SYM '=' ulong_num + { + Lex->mi.ssl_verify_server_cert= $3 ? + LEX_MASTER_INFO::LEX_MI_ENABLE : LEX_MASTER_INFO::LEX_MI_DISABLE; + } + | MASTER_SSL_CRL_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_crl= $3.str; + } + | MASTER_SSL_CRLPATH_SYM '=' TEXT_STRING_sys + { + Lex->mi.ssl_crlpath= $3.str; + } + + | MASTER_HEARTBEAT_PERIOD_SYM '=' NUM_literal + { + Lex->mi.heartbeat_period= (float) $3->val_real(); + if (unlikely(Lex->mi.heartbeat_period > + SLAVE_MAX_HEARTBEAT_PERIOD) || + unlikely(Lex->mi.heartbeat_period < 0.0)) + my_yyabort_error((ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE, MYF(0), + SLAVE_MAX_HEARTBEAT_PERIOD)); + + if (unlikely(Lex->mi.heartbeat_period > slave_net_timeout)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX, + ER_THD(thd, ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX)); + } + if (unlikely(Lex->mi.heartbeat_period < 0.001)) + { + if (unlikely(Lex->mi.heartbeat_period != 0.0)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MIN, + ER_THD(thd, ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MIN)); + Lex->mi.heartbeat_period= 0.0; + } + Lex->mi.heartbeat_opt= LEX_MASTER_INFO::LEX_MI_DISABLE; + } + Lex->mi.heartbeat_opt= LEX_MASTER_INFO::LEX_MI_ENABLE; + } + | IGNORE_SERVER_IDS_SYM '=' '(' ignore_server_id_list ')' + { + Lex->mi.repl_ignore_server_ids_opt= LEX_MASTER_INFO::LEX_MI_ENABLE; + } + | DO_DOMAIN_IDS_SYM '=' '(' do_domain_id_list ')' + { + Lex->mi.repl_do_domain_ids_opt= LEX_MASTER_INFO::LEX_MI_ENABLE; + } + | IGNORE_DOMAIN_IDS_SYM '=' '(' ignore_domain_id_list ')' + { + Lex->mi.repl_ignore_domain_ids_opt= LEX_MASTER_INFO::LEX_MI_ENABLE; + } + | + master_file_def + ; + +ignore_server_id_list: + /* Empty */ + | ignore_server_id + | ignore_server_id_list ',' ignore_server_id + ; + +ignore_server_id: + ulong_num + { + insert_dynamic(&Lex->mi.repl_ignore_server_ids, (uchar*) &($1)); + } + ; + +do_domain_id_list: + /* Empty */ + | do_domain_id + | do_domain_id_list ',' do_domain_id + ; + +do_domain_id: + ulong_num + { + insert_dynamic(&Lex->mi.repl_do_domain_ids, (uchar*) &($1)); + } + ; + +ignore_domain_id_list: + /* Empty */ + | ignore_domain_id + | ignore_domain_id_list ',' ignore_domain_id + ; + +ignore_domain_id: + ulong_num + { + insert_dynamic(&Lex->mi.repl_ignore_domain_ids, (uchar*) &($1)); + } + ; + +master_file_def: + MASTER_LOG_FILE_SYM '=' TEXT_STRING_sys + { + Lex->mi.log_file_name = $3.str; + } + | MASTER_LOG_POS_SYM '=' ulonglong_num + { + /* + If the user specified a value < BIN_LOG_HEADER_SIZE, adjust it + instead of causing subsequent errors. + We need to do it in this file, because only there we know that + MASTER_LOG_POS has been explicitly specified. On the contrary + in change_master() (sql_repl.cc) we cannot distinguish between 0 + (MASTER_LOG_POS explicitly specified as 0) and 0 (unspecified), + whereas we want to distinguish (specified 0 means "read the binlog + from 0" (4 in fact), unspecified means "don't change the position + (keep the preceding value)"). + */ + Lex->mi.pos= MY_MAX(BIN_LOG_HEADER_SIZE, $3); + } + | RELAY_LOG_FILE_SYM '=' TEXT_STRING_sys + { + Lex->mi.relay_log_name = $3.str; + } + | RELAY_LOG_POS_SYM '=' ulong_num + { + Lex->mi.relay_log_pos = $3; + /* Adjust if < BIN_LOG_HEADER_SIZE (same comment as Lex->mi.pos) */ + Lex->mi.relay_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos); + } + | MASTER_USE_GTID_SYM '=' CURRENT_POS_SYM + { + if (unlikely(Lex->mi.use_gtid_opt != LEX_MASTER_INFO::LEX_GTID_UNCHANGED)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MASTER_use_gtid")); + Lex->mi.use_gtid_opt= LEX_MASTER_INFO::LEX_GTID_CURRENT_POS; + } + | MASTER_USE_GTID_SYM '=' SLAVE_POS_SYM + { + if (unlikely(Lex->mi.use_gtid_opt != LEX_MASTER_INFO::LEX_GTID_UNCHANGED)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MASTER_use_gtid")); + Lex->mi.use_gtid_opt= LEX_MASTER_INFO::LEX_GTID_SLAVE_POS; + } + | MASTER_USE_GTID_SYM '=' NO_SYM + { + if (unlikely(Lex->mi.use_gtid_opt != LEX_MASTER_INFO::LEX_GTID_UNCHANGED)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MASTER_use_gtid")); + Lex->mi.use_gtid_opt= LEX_MASTER_INFO::LEX_GTID_NO; + } + | MASTER_DEMOTE_TO_SLAVE_SYM '=' bool + { + Lex->mi.is_demotion_opt= (bool) $3; + } + ; + +optional_connection_name: + /* empty */ + { + LEX *lex= thd->lex; + lex->mi.connection_name= null_clex_str; + } + | connection_name + ; + +connection_name: + TEXT_STRING_sys + { + Lex->mi.connection_name= $1; +#ifdef HAVE_REPLICATION + if (unlikely(check_master_connection_name(&$1))) + my_yyabort_error((ER_WRONG_ARGUMENTS, MYF(0), "MASTER_CONNECTION_NAME")); +#endif + } + ; + +optional_for_channel: + /* empty */ + { + /*do nothing */ + } + | for_channel + + ; + +for_channel: + FOR_SYM CHANNEL_SYM TEXT_STRING_sys + { + if (Lex->mi.connection_name.str != NULL) + { + my_yyabort_error((ER_WRONG_ARGUMENTS, MYF(0), "CONNECTION_NAME AND FOR CHANNEL CAN NOT BE SPECIFIED AT THE SAME TIME)")); + } + else + { + Lex->mi.connection_name= $3; +#ifdef HAVE_REPLICATION + if (unlikely(check_master_connection_name(&$3))) + my_yyabort_error((ER_WRONG_ARGUMENTS, MYF(0), "MASTER_CONNECTION_NAME")); +#endif + } + + } + ; + +/* create a table */ + +create: + create_or_replace opt_temporary TABLE_SYM opt_if_not_exists + { + LEX *lex= thd->lex; + if (!(lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_create_table())) + MYSQL_YYABORT; + lex->create_info.init(); + if (lex->main_select_push()) + MYSQL_YYABORT; + lex->current_select->parsing_place= BEFORE_OPT_LIST; + if (lex->set_command_with_check(SQLCOM_CREATE_TABLE, $2, $1 | $4)) + MYSQL_YYABORT; + } + table_ident + { + LEX *lex= thd->lex; + if (!lex->first_select_lex()-> + add_table_to_list(thd, $6, NULL, TL_OPTION_UPDATING, + TL_WRITE, MDL_SHARED_UPGRADABLE)) + MYSQL_YYABORT; + lex->alter_info.reset(); + /* + For CREATE TABLE we should not open the table even if it exists. + If the table exists, we should either not create it or replace it + */ + lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; + lex->name= null_clex_str; + lex->create_last_non_select_table= lex->last_table(); + lex->inc_select_stack_outer_barrier(); + } + create_body + { + LEX *lex= thd->lex; + create_table_set_open_action_and_adjust_tables(lex); + Lex->pop_select(); //main select + } + | create_or_replace opt_temporary SEQUENCE_SYM opt_if_not_exists table_ident + { + LEX *lex= thd->lex; + if (lex->main_select_push()) + MYSQL_YYABORT; + if (!(lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_create_sequence())) + MYSQL_YYABORT; + lex->create_info.init(); + if (unlikely(lex->set_command_with_check(SQLCOM_CREATE_SEQUENCE, $2, + $1 | $4))) + MYSQL_YYABORT; + + if (!lex->first_select_lex()-> + add_table_to_list(thd, $5, NULL, TL_OPTION_UPDATING, + TL_WRITE, MDL_EXCLUSIVE)) + MYSQL_YYABORT; + + /* + For CREATE TABLE, an non-existing table is not an error. + Instruct open_tables() to just take an MDL lock if the + table does not exist. + */ + lex->alter_info.reset(); + lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; + lex->name= null_clex_str; + lex->create_last_non_select_table= lex->last_table(); + if (unlikely(!(lex->create_info.seq_create_info= + new (thd->mem_root) sequence_definition()))) + MYSQL_YYABORT; + } + opt_sequence opt_create_table_options + { + LEX *lex= thd->lex; + + if (unlikely(lex->create_info.seq_create_info->check_and_adjust(1))) + { + my_error(ER_SEQUENCE_INVALID_DATA, MYF(0), + lex->first_select_lex()->table_list.first->db.str, + lex->first_select_lex()->table_list.first-> + table_name.str); + MYSQL_YYABORT; + } + + /* No fields specified, generate them */ + if (unlikely(prepare_sequence_fields(thd, + &lex->alter_info.create_list))) + MYSQL_YYABORT; + + /* CREATE SEQUENCE always creates a sequence */ + Lex->create_info.used_fields|= HA_CREATE_USED_SEQUENCE; + Lex->create_info.sequence= 1; + + create_table_set_open_action_and_adjust_tables(lex); + Lex->pop_select(); //main select + } + | create_or_replace INDEX_SYM opt_if_not_exists + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + ident + opt_key_algorithm_clause + ON table_ident + { + if (Lex->add_create_index_prepare($8)) + MYSQL_YYABORT; + if (Lex->add_create_index(Key::MULTIPLE, &$5, $6, $1 | $3)) + MYSQL_YYABORT; + } + '(' key_list ')' opt_lock_wait_timeout normal_key_options + opt_index_lock_algorithm + { + Lex->pop_select(); //main select + } + | create_or_replace UNIQUE_SYM INDEX_SYM opt_if_not_exists + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + ident + opt_key_algorithm_clause + ON table_ident + { + if (Lex->add_create_index_prepare($9)) + MYSQL_YYABORT; + if (Lex->add_create_index(Key::UNIQUE, &$6, $7, $1 | $4)) + MYSQL_YYABORT; + } + '(' key_list opt_without_overlaps ')' + opt_lock_wait_timeout normal_key_options + opt_index_lock_algorithm + { + Lex->pop_select(); //main select + } + | create_or_replace fulltext INDEX_SYM + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + opt_if_not_exists ident + ON table_ident + { + if (Lex->add_create_index_prepare($8)) + MYSQL_YYABORT; + if (Lex->add_create_index($2, &$6, HA_KEY_ALG_UNDEF, $1 | $5)) + MYSQL_YYABORT; + } + '(' key_list ')' opt_lock_wait_timeout fulltext_key_options + opt_index_lock_algorithm + { + Lex->pop_select(); //main select + } + | create_or_replace spatial INDEX_SYM + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + opt_if_not_exists ident + ON table_ident + { + if (Lex->add_create_index_prepare($8)) + MYSQL_YYABORT; + if (Lex->add_create_index($2, &$6, HA_KEY_ALG_UNDEF, $1 | $5)) + MYSQL_YYABORT; + } + '(' key_list ')' opt_lock_wait_timeout spatial_key_options + opt_index_lock_algorithm + { + Lex->pop_select(); //main select + } + | create_or_replace DATABASE opt_if_not_exists ident + { + Lex->create_info.init(); + } + opt_create_database_options + { + LEX *lex=Lex; + if (unlikely(lex->set_command_with_check(SQLCOM_CREATE_DB, 0, + $1 | $3))) + MYSQL_YYABORT; + lex->name= $4; + } + | create_or_replace definer_opt opt_view_suid VIEW_SYM + opt_if_not_exists table_ident + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + Lex->inc_select_stack_outer_barrier(); + if (Lex->add_create_view(thd, $1 | $5, + DTYPE_ALGORITHM_UNDEFINED, $3, $6)) + MYSQL_YYABORT; + } + view_list_opt AS view_select + { + Lex->pop_select(); //main select + } + | create_or_replace view_algorithm definer_opt opt_view_suid VIEW_SYM + opt_if_not_exists table_ident + { + if (unlikely(Lex->add_create_view(thd, $1 | $6, $2, $4, $7))) + MYSQL_YYABORT; + if (Lex->main_select_push()) + MYSQL_YYABORT; + Lex->inc_select_stack_outer_barrier(); + } + view_list_opt AS view_select + { + Lex->pop_select(); //main select + } + | create_or_replace definer_opt TRIGGER_SYM + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + Lex->create_info.set($1); + } + trigger_tail + { + Lex->pop_select(); //main select + } + | create_or_replace definer_opt EVENT_SYM + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + Lex->create_info.set($1); + } + event_tail + { + Lex->pop_select(); //main select + } + | create_or_replace USER_SYM opt_if_not_exists clear_privileges + grant_list opt_require_clause opt_resource_options opt_account_locking_and_opt_password_expiration + { + if (unlikely(Lex->set_command_with_check(SQLCOM_CREATE_USER, + $1 | $3))) + MYSQL_YYABORT; + } + | create_or_replace ROLE_SYM opt_if_not_exists + clear_privileges role_list opt_with_admin + { + if (unlikely(Lex->set_command_with_check(SQLCOM_CREATE_ROLE, + $1 | $3))) + MYSQL_YYABORT; + } + | create_or_replace { Lex->set_command(SQLCOM_CREATE_SERVER, $1); } + server_def + { } + | create_routine + ; + +opt_sequence: + /* empty */ { } + | sequence_defs + ; + +sequence_defs: + sequence_def + | sequence_defs sequence_def + ; + +sequence_def: + MINVALUE_SYM opt_equal longlong_num + { + Lex->create_info.seq_create_info->min_value= $3; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_min_value; + } + | NO_SYM MINVALUE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & seq_field_used_min_value)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MINVALUE")); + Lex->create_info.seq_create_info->used_fields|= seq_field_used_min_value; + } + | NOMINVALUE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & seq_field_used_min_value)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MINVALUE")); + Lex->create_info.seq_create_info->used_fields|= seq_field_used_min_value; + } + | MAXVALUE_SYM opt_equal longlong_num + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_max_value)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MAXVALUE")); + Lex->create_info.seq_create_info->max_value= $3; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_max_value; + } + | NO_SYM MAXVALUE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & seq_field_used_max_value)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MAXVALUE")); + Lex->create_info.seq_create_info->used_fields|= seq_field_used_max_value; + } + | NOMAXVALUE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & seq_field_used_max_value)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "MAXVALUE")); + Lex->create_info.seq_create_info->used_fields|= seq_field_used_max_value; + } + | START_SYM opt_with longlong_num + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_start)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "START")); + Lex->create_info.seq_create_info->start= $3; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_start; + } + | INCREMENT_SYM opt_by longlong_num + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_increment)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "INCREMENT")); + Lex->create_info.seq_create_info->increment= $3; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_increment; + } + | CACHE_SYM opt_equal longlong_num + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_cache)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "CACHE")); + Lex->create_info.seq_create_info->cache= $3; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_cache; + } + | NOCACHE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_cache)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "CACHE")); + Lex->create_info.seq_create_info->cache= 0; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_cache; + } + | CYCLE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_cycle)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "CYCLE")); + Lex->create_info.seq_create_info->cycle= 1; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_cycle; + } + | NOCYCLE_SYM + { + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_cycle)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "CYCLE")); + Lex->create_info.seq_create_info->cycle= 0; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_cycle; + } + | RESTART_SYM + { + if (unlikely(Lex->sql_command != SQLCOM_ALTER_SEQUENCE)) + { + thd->parse_error(ER_SYNTAX_ERROR, "RESTART"); + MYSQL_YYABORT; + } + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_restart)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "RESTART")); + Lex->create_info.seq_create_info->used_fields|= seq_field_used_restart; + } + | RESTART_SYM opt_with longlong_num + { + if (unlikely(Lex->sql_command != SQLCOM_ALTER_SEQUENCE)) + { + thd->parse_error(ER_SYNTAX_ERROR, "RESTART"); + MYSQL_YYABORT; + } + if (unlikely(Lex->create_info.seq_create_info->used_fields & + seq_field_used_restart)) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "RESTART")); + Lex->create_info.seq_create_info->restart= $3; + Lex->create_info.seq_create_info->used_fields|= seq_field_used_restart | seq_field_used_restart_value; + } + ; + +/* this rule is used to force look-ahead in the parser */ +force_lookahead: {} | FORCE_LOOKAHEAD {} ; + +server_def: + SERVER_SYM opt_if_not_exists ident_or_text + { + if (unlikely(Lex->add_create_options_with_check($2))) + MYSQL_YYABORT; + Lex->server_options.reset($3); + } + FOREIGN DATA_SYM WRAPPER_SYM ident_or_text + OPTIONS_SYM '(' server_options_list ')' + { Lex->server_options.scheme= $8; } + ; + +server_options_list: + server_option + | server_options_list ',' server_option + ; + +server_option: + USER_SYM TEXT_STRING_sys + { + MYSQL_YYABORT_UNLESS(Lex->server_options.username.str == 0); + Lex->server_options.username= $2; + } + | HOST_SYM TEXT_STRING_sys + { + MYSQL_YYABORT_UNLESS(Lex->server_options.host.str == 0); + Lex->server_options.host= $2; + } + | DATABASE TEXT_STRING_sys + { + MYSQL_YYABORT_UNLESS(Lex->server_options.db.str == 0); + Lex->server_options.db= $2; + } + | OWNER_SYM TEXT_STRING_sys + { + MYSQL_YYABORT_UNLESS(Lex->server_options.owner.str == 0); + Lex->server_options.owner= $2; + } + | PASSWORD_SYM TEXT_STRING_sys + { + MYSQL_YYABORT_UNLESS(Lex->server_options.password.str == 0); + Lex->server_options.password= $2; + } + | SOCKET_SYM TEXT_STRING_sys + { + MYSQL_YYABORT_UNLESS(Lex->server_options.socket.str == 0); + Lex->server_options.socket= $2; + } + | PORT_SYM ulong_num + { + Lex->server_options.port= $2; + } + ; + +event_tail: + remember_name opt_if_not_exists sp_name + { + LEX *lex=Lex; + + lex->stmt_definition_begin= $1; + if (unlikely(lex->add_create_options_with_check($2))) + MYSQL_YYABORT; + if (unlikely(!(lex->event_parse_data= + Event_parse_data::new_instance(thd)))) + MYSQL_YYABORT; + lex->event_parse_data->identifier= $3; + lex->event_parse_data->on_completion= + Event_parse_data::ON_COMPLETION_DROP; + + lex->sql_command= SQLCOM_CREATE_EVENT; + /* We need that for disallowing subqueries */ + } + ON SCHEDULE_SYM ev_schedule_time + opt_ev_on_completion + opt_ev_status + opt_ev_comment + DO_SYM ev_sql_stmt + { + /* + sql_command is set here because some rules in ev_sql_stmt + can overwrite it + */ + Lex->sql_command= SQLCOM_CREATE_EVENT; + } + ; + +ev_schedule_time: + EVERY_SYM expr interval + { + Lex->event_parse_data->item_expression= $2; + Lex->event_parse_data->interval= $3; + } + ev_starts + ev_ends + | AT_SYM expr + { + Lex->event_parse_data->item_execute_at= $2; + } + ; + +opt_ev_status: + /* empty */ { $$= 0; } + | ENABLE_SYM + { + Lex->event_parse_data->status= Event_parse_data::ENABLED; + Lex->event_parse_data->status_changed= true; + $$= 1; + } + | DISABLE_SYM ON SLAVE + { + Lex->event_parse_data->status= Event_parse_data::SLAVESIDE_DISABLED; + Lex->event_parse_data->status_changed= true; + $$= 1; + } + | DISABLE_SYM + { + Lex->event_parse_data->status= Event_parse_data::DISABLED; + Lex->event_parse_data->status_changed= true; + $$= 1; + } + ; + +ev_starts: + /* empty */ + { + Item *item= new (thd->mem_root) Item_func_now_local(thd, 0); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + Lex->event_parse_data->item_starts= item; + } + | STARTS_SYM expr + { + Lex->event_parse_data->item_starts= $2; + } + ; + +ev_ends: + /* empty */ + | ENDS_SYM expr + { + Lex->event_parse_data->item_ends= $2; + } + ; + +opt_ev_on_completion: + /* empty */ { $$= 0; } + | ev_on_completion + ; + +ev_on_completion: + ON COMPLETION_SYM opt_not PRESERVE_SYM + { + Lex->event_parse_data->on_completion= $3 + ? Event_parse_data::ON_COMPLETION_DROP + : Event_parse_data::ON_COMPLETION_PRESERVE; + $$= 1; + } + ; + +opt_ev_comment: + /* empty */ { $$= 0; } + | COMMENT_SYM TEXT_STRING_sys + { + Lex->comment= Lex->event_parse_data->comment= $2; + $$= 1; + } + ; + +ev_sql_stmt: + { + LEX *lex= thd->lex; + Lex_input_stream *lip= YYLIP; + + /* + This stops the following : + - CREATE EVENT ... DO CREATE EVENT ...; + - ALTER EVENT ... DO CREATE EVENT ...; + - CREATE EVENT ... DO ALTER EVENT DO ....; + - CREATE PROCEDURE ... BEGIN CREATE EVENT ... END| + This allows: + - CREATE EVENT ... DO DROP EVENT yyy; + - CREATE EVENT ... DO ALTER EVENT yyy; + (the nested ALTER EVENT can have anything but DO clause) + - ALTER EVENT ... DO ALTER EVENT yyy; + (the nested ALTER EVENT can have anything but DO clause) + - ALTER EVENT ... DO DROP EVENT yyy; + - CREATE PROCEDURE ... BEGIN ALTER EVENT ... END| + (the nested ALTER EVENT can have anything but DO clause) + - CREATE PROCEDURE ... BEGIN DROP EVENT ... END| + */ + if (unlikely(lex->sphead)) + my_yyabort_error((ER_EVENT_RECURSION_FORBIDDEN, MYF(0))); + + if (unlikely(!lex->make_sp_head(thd, + lex->event_parse_data->identifier, + &sp_handler_procedure, + DEFAULT_AGGREGATE))) + MYSQL_YYABORT; + + lex->sphead->set_body_start(thd, lip->get_cpp_ptr()); + } + sp_proc_stmt force_lookahead + { + /* return back to the original memory root ASAP */ + if (Lex->sp_body_finalize_event(thd)) + MYSQL_YYABORT; + } + ; + +clear_privileges: + /* Nothing */ + { + LEX *lex=Lex; + lex->users_list.empty(); + lex->first_select_lex()->db= null_clex_str; + lex->account_options.reset(); + } + ; + +opt_aggregate: + /* Empty */ { $$= NOT_AGGREGATE; } + | AGGREGATE_SYM { $$= GROUP_AGGREGATE; } + ; + + +sp_handler: + FUNCTION_SYM { $$= &sp_handler_function; } + | PROCEDURE_SYM { $$= &sp_handler_procedure; } + | PACKAGE_ORACLE_SYM { $$= &sp_handler_package_spec; } + | PACKAGE_ORACLE_SYM BODY_ORACLE_SYM { $$= &sp_handler_package_body; } + ; + + +sp_name: + ident '.' ident + { + if (unlikely(!($$= Lex->make_sp_name(thd, &$1, &$3)))) + MYSQL_YYABORT; + } + | ident + { + if (unlikely(!($$= Lex->make_sp_name(thd, &$1)))) + MYSQL_YYABORT; + } + ; + +sp_a_chistics: + /* Empty */ {} + | sp_a_chistics sp_chistic {} + ; + +sp_c_chistics: + /* Empty */ {} + | sp_c_chistics sp_c_chistic {} + ; + +/* Characteristics for both create and alter */ +sp_chistic: + COMMENT_SYM TEXT_STRING_sys + { Lex->sp_chistics.comment= $2; } + | LANGUAGE_SYM SQL_SYM + { /* Just parse it, we only have one language for now. */ } + | NO_SYM SQL_SYM + { Lex->sp_chistics.daccess= SP_NO_SQL; } + | CONTAINS_SYM SQL_SYM + { Lex->sp_chistics.daccess= SP_CONTAINS_SQL; } + | READS_SYM SQL_SYM DATA_SYM + { Lex->sp_chistics.daccess= SP_READS_SQL_DATA; } + | MODIFIES_SYM SQL_SYM DATA_SYM + { Lex->sp_chistics.daccess= SP_MODIFIES_SQL_DATA; } + | sp_suid + { Lex->sp_chistics.suid= $1; } + ; + +/* Create characteristics */ +sp_c_chistic: + sp_chistic { } + | opt_not DETERMINISTIC_SYM { Lex->sp_chistics.detistic= ! $1; } + ; + +sp_suid: + SQL_SYM SECURITY_SYM DEFINER_SYM { $$= SP_IS_SUID; } + | SQL_SYM SECURITY_SYM INVOKER_SYM { $$= SP_IS_NOT_SUID; } + ; + +call: + CALL_SYM ident + { + if (unlikely(Lex->call_statement_start(thd, &$2))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | CALL_SYM ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, &$2, &$4))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | CALL_SYM ident '.' ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, &$2, &$4, &$6))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + ; + +/* CALL parameters */ +opt_sp_cparam_list: + /* Empty */ + | '(' opt_sp_cparams ')' + ; + +opt_sp_cparams: + /* Empty */ + | sp_cparams + ; + +sp_cparams: + sp_cparams ',' expr + { + Lex->value_list.push_back($3, thd->mem_root); + } + | expr + { + Lex->value_list.push_back($1, thd->mem_root); + } + ; + +/* Stored FUNCTION parameter declaration list */ +sp_fdparam_list: + /* Empty */ + { + Lex->sphead->m_param_begin= YYLIP->get_cpp_tok_start(); + Lex->sphead->m_param_end= Lex->sphead->m_param_begin; + } + | + { + Lex->sphead->m_param_begin= YYLIP->get_cpp_tok_start(); + } + sp_fdparams + { + Lex->sphead->m_param_end= YYLIP->get_cpp_tok_start(); + } + ; + +sp_fdparams: + sp_fdparams ',' sp_param + | sp_param + ; + +sp_param_name: + ident + { + if (unlikely(!($$= Lex->sp_param_init(&$1)))) + MYSQL_YYABORT; + } + ; + +/* Stored PROCEDURE parameter declaration list */ +sp_pdparam_list: + /* Empty */ + | sp_pdparams + ; + +sp_pdparams: + sp_pdparams ',' sp_param + | sp_param + ; + +sp_parameter_type: + IN_SYM { $$= sp_variable::MODE_IN; } + | OUT_SYM { $$= sp_variable::MODE_OUT; } + | INOUT_SYM { $$= sp_variable::MODE_INOUT; } + ; + +sp_parenthesized_pdparam_list: + '(' + { + Lex->sphead->m_param_begin= YYLIP->get_cpp_tok_start() + 1; + } + sp_pdparam_list + ')' + { + Lex->sphead->m_param_end= YYLIP->get_cpp_tok_start(); + } + ; + +sp_parenthesized_fdparam_list: + '(' sp_fdparam_list ')' + ; + +sp_proc_stmts: + /* Empty */ {} + | sp_proc_stmts sp_proc_stmt ';' + ; + +sp_proc_stmts1: + sp_proc_stmt ';' {} + | sp_proc_stmts1 sp_proc_stmt ';' + ; + + +optionally_qualified_column_ident: + sp_decl_ident + { + if (unlikely(!($$= new (thd->mem_root) + Qualified_column_ident(&$1)))) + MYSQL_YYABORT; + } + | sp_decl_ident '.' ident + { + if (unlikely(!($$= new (thd->mem_root) + Qualified_column_ident(&$1, &$3)))) + MYSQL_YYABORT; + } + | sp_decl_ident '.' ident '.' ident + { + if (unlikely(!($$= new (thd->mem_root) + Qualified_column_ident(thd, &$1, &$3, &$5)))) + MYSQL_YYABORT; + } + ; + + +row_field_definition: + row_field_name field_type + { + Lex->last_field->set_attributes(thd, $2, + COLUMN_DEFINITION_ROUTINE_LOCAL); + } + ; + +row_field_definition_list: + row_field_definition + { + if (!($$= Row_definition_list::make(thd->mem_root, $1))) + MYSQL_YYABORT; + } + | row_field_definition_list ',' row_field_definition + { + if (($$= $1)->append_uniq(thd->mem_root, $3)) + MYSQL_YYABORT; + } + ; + +row_type_body: + '(' row_field_definition_list ')' { $$= $2; } + ; + +sp_decl_idents_init_vars: + sp_decl_idents + { + Lex->sp_variable_declarations_init(thd, $1); + } + ; + +sp_decl_variable_list: + sp_decl_idents_init_vars + field_type + { + Lex->last_field->set_attributes(thd, $2, + COLUMN_DEFINITION_ROUTINE_LOCAL); + } + sp_opt_default + { + if (unlikely(Lex->sp_variable_declarations_finalize(thd, $1, + &Lex->last_field[0], + $4))) + MYSQL_YYABORT; + $$.init_using_vars($1); + } + | sp_decl_idents_init_vars + ROW_SYM row_type_body + sp_opt_default + { + if (unlikely(Lex->sp_variable_declarations_row_finalize(thd, $1, $3, $4))) + MYSQL_YYABORT; + $$.init_using_vars($1); + } + | sp_decl_variable_list_anchored + ; + +sp_decl_handler: + sp_handler_type HANDLER_SYM FOR_SYM + { + if (unlikely(Lex->sp_handler_declaration_init(thd, $1))) + MYSQL_YYABORT; + } + sp_hcond_list sp_proc_stmt + { + if (unlikely(Lex->sp_handler_declaration_finalize(thd, $1))) + MYSQL_YYABORT; + $$.vars= $$.conds= $$.curs= 0; + $$.hndlrs= 1; + } + ; + +opt_parenthesized_cursor_formal_parameters: + /* Empty */ + | '(' sp_fdparams ')' + ; + + +sp_cursor_stmt_lex: + { + DBUG_ASSERT(thd->lex->sphead); + if (unlikely(!($$= new (thd->mem_root) + sp_lex_cursor(thd, thd->lex)))) + MYSQL_YYABORT; + } + ; + +sp_cursor_stmt: + sp_cursor_stmt_lex + { + DBUG_ASSERT(thd->free_list == NULL); + Lex->sphead->reset_lex(thd, $1); + if (Lex->main_select_push(true)) + MYSQL_YYABORT; + } + select + { + DBUG_ASSERT(Lex == $1); + Lex->pop_select(); //main select + if (unlikely($1->stmt_finalize(thd)) || + unlikely($1->sphead->restore_lex(thd))) + MYSQL_YYABORT; + $$= $1; + } + ; + +sp_handler_type: + EXIT_MARIADB_SYM { $$= sp_handler::EXIT; } + | CONTINUE_MARIADB_SYM { $$= sp_handler::CONTINUE; } + | EXIT_ORACLE_SYM { $$= sp_handler::EXIT; } + | CONTINUE_ORACLE_SYM { $$= sp_handler::CONTINUE; } + /*| UNDO_SYM { QQ No yet } */ + ; + +sp_hcond_list: + sp_hcond_element + { $$= 1; } + | sp_hcond_list ',' sp_hcond_element + { $$+= 1; } + ; + +sp_hcond_element: + sp_hcond + { + LEX *lex= Lex; + sp_head *sp= lex->sphead; + sp_pcontext *ctx= lex->spcont->parent_context(); + + if (unlikely(ctx->check_duplicate_handler($1))) + my_yyabort_error((ER_SP_DUP_HANDLER, MYF(0))); + + sp_instr_hpush_jump *i= (sp_instr_hpush_jump *)sp->last_instruction(); + i->add_condition($1); + } + ; + +sp_cond: + ulong_num + { /* mysql errno */ + if (unlikely($1 == 0)) + my_yyabort_error((ER_WRONG_VALUE, MYF(0), "CONDITION", "0")); + $$= new (thd->mem_root) sp_condition_value($1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | sqlstate + ; + +sqlstate: + SQLSTATE_SYM opt_value TEXT_STRING_literal + { /* SQLSTATE */ + + /* + An error is triggered: + - if the specified string is not a valid SQLSTATE, + - or if it represents the completion condition -- it is not + allowed to SIGNAL, or declare a handler for the completion + condition. + */ + if (unlikely(!is_sqlstate_valid(&$3) || + is_sqlstate_completion($3.str))) + my_yyabort_error((ER_SP_BAD_SQLSTATE, MYF(0), $3.str)); + $$= new (thd->mem_root) sp_condition_value($3.str); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +opt_value: + /* Empty */ {} + | VALUE_SYM {} + ; + +sp_hcond: + sp_cond + { + $$= $1; + } + | ident /* CONDITION name */ + { + $$= Lex->spcont->find_declared_or_predefined_condition(thd, &$1); + if (unlikely($$ == NULL)) + my_yyabort_error((ER_SP_COND_MISMATCH, MYF(0), $1.str)); + } + | SQLWARNING_SYM /* SQLSTATEs 01??? */ + { + $$= new (thd->mem_root) sp_condition_value(sp_condition_value::WARNING); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | not FOUND_SYM /* SQLSTATEs 02??? */ + { + $$= new (thd->mem_root) sp_condition_value(sp_condition_value::NOT_FOUND); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SQLEXCEPTION_SYM /* All other SQLSTATEs */ + { + $$= new (thd->mem_root) sp_condition_value(sp_condition_value::EXCEPTION); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | OTHERS_ORACLE_SYM /* All other SQLSTATEs */ + { + $$= new (thd->mem_root) sp_condition_value(sp_condition_value::EXCEPTION); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + + +raise_stmt_oracle: + RAISE_ORACLE_SYM opt_set_signal_information + { + if (unlikely(Lex->add_resignal_statement(thd, NULL))) + MYSQL_YYABORT; + } + | RAISE_ORACLE_SYM signal_value opt_set_signal_information + { + if (unlikely(Lex->add_signal_statement(thd, $2))) + MYSQL_YYABORT; + } + ; + +signal_stmt: + SIGNAL_SYM signal_value opt_set_signal_information + { + if (Lex->add_signal_statement(thd, $2)) + MYSQL_YYABORT; + } + ; + +signal_value: + ident + { + if (!($$= Lex->stmt_signal_value($1))) + MYSQL_YYABORT; + } + | sqlstate + { $$= $1; } + ; + +opt_signal_value: + /* empty */ + { $$= NULL; } + | signal_value + { $$= $1; } + ; + +opt_set_signal_information: + /* empty */ + { + thd->m_parser_state->m_yacc.m_set_signal_info.clear(); + } + | SET signal_information_item_list + ; + +signal_information_item_list: + signal_condition_information_item_name '=' signal_allowed_expr + { + Set_signal_information *info; + info= &thd->m_parser_state->m_yacc.m_set_signal_info; + int index= (int) $1; + info->clear(); + info->m_item[index]= $3; + } + | signal_information_item_list ',' + signal_condition_information_item_name '=' signal_allowed_expr + { + Set_signal_information *info; + info= &thd->m_parser_state->m_yacc.m_set_signal_info; + int index= (int) $3; + if (unlikely(info->m_item[index] != NULL)) + my_yyabort_error((ER_DUP_SIGNAL_SET, MYF(0), + Diag_condition_item_names[index].str)); + info->m_item[index]= $5; + } + ; + +/* + Only a limited subset of are allowed in SIGNAL/RESIGNAL. +*/ +signal_allowed_expr: + literal + { $$= $1; } + | variable + { + if ($1->type() == Item::FUNC_ITEM) + { + Item_func *item= (Item_func*) $1; + if (unlikely(item->functype() == Item_func::SUSERVAR_FUNC)) + { + /* + Don't allow the following syntax: + SIGNAL/RESIGNAL ... + SET = @foo := expr + */ + thd->parse_error(); + MYSQL_YYABORT; + } + } + $$= $1; + } + | simple_ident + { $$= $1; } + ; + +/* conditions that can be set in signal / resignal */ +signal_condition_information_item_name: + CLASS_ORIGIN_SYM + { $$= DIAG_CLASS_ORIGIN; } + | SUBCLASS_ORIGIN_SYM + { $$= DIAG_SUBCLASS_ORIGIN; } + | CONSTRAINT_CATALOG_SYM + { $$= DIAG_CONSTRAINT_CATALOG; } + | CONSTRAINT_SCHEMA_SYM + { $$= DIAG_CONSTRAINT_SCHEMA; } + | CONSTRAINT_NAME_SYM + { $$= DIAG_CONSTRAINT_NAME; } + | CATALOG_NAME_SYM + { $$= DIAG_CATALOG_NAME; } + | SCHEMA_NAME_SYM + { $$= DIAG_SCHEMA_NAME; } + | TABLE_NAME_SYM + { $$= DIAG_TABLE_NAME; } + | COLUMN_NAME_SYM + { $$= DIAG_COLUMN_NAME; } + | CURSOR_NAME_SYM + { $$= DIAG_CURSOR_NAME; } + | MESSAGE_TEXT_SYM + { $$= DIAG_MESSAGE_TEXT; } + | MYSQL_ERRNO_SYM + { $$= DIAG_MYSQL_ERRNO; } + | ROW_NUMBER_SYM + { $$= DIAG_ROW_NUMBER; } + ; + +resignal_stmt: + RESIGNAL_SYM opt_signal_value opt_set_signal_information + { + if (unlikely(Lex->add_resignal_statement(thd, $2))) + MYSQL_YYABORT; + } + ; + +get_diagnostics: + GET_SYM which_area DIAGNOSTICS_SYM diagnostics_information + { + Diagnostics_information *info= $4; + + info->set_which_da($2); + + Lex->sql_command= SQLCOM_GET_DIAGNOSTICS; + Lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_get_diagnostics(info); + + if (unlikely(Lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + ; + +which_area: + /* If is not specified, then CURRENT is implicit. */ + { $$= Diagnostics_information::CURRENT_AREA; } + | CURRENT_SYM + { $$= Diagnostics_information::CURRENT_AREA; } + ; + +diagnostics_information: + statement_information + { + $$= new (thd->mem_root) Statement_information($1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | CONDITION_SYM condition_number condition_information + { + $$= new (thd->mem_root) Condition_information($2, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +statement_information: + statement_information_item + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL) || + unlikely($$->push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | statement_information ',' statement_information_item + { + if (unlikely($1->push_back($3, thd->mem_root))) + MYSQL_YYABORT; + $$= $1; + } + ; + +statement_information_item: + simple_target_specification '=' statement_information_item_name + { + $$= new (thd->mem_root) Statement_information_item($3, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +simple_target_specification: + ident_cli + { + if (unlikely(!($$= thd->lex->create_item_for_sp_var(&$1, NULL)))) + MYSQL_YYABORT; + } + | '@' ident_or_text + { + if (!$2.length) + { + thd->parse_error(); + MYSQL_YYABORT; + } + $$= new (thd->mem_root) Item_func_get_user_var(thd, &$2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +statement_information_item_name: + NUMBER_MARIADB_SYM + { $$= Statement_information_item::NUMBER; } + | NUMBER_ORACLE_SYM + { $$= Statement_information_item::NUMBER; } + | ROW_COUNT_SYM + { $$= Statement_information_item::ROW_COUNT; } + ; + +/* + Only a limited subset of are allowed in GET DIAGNOSTICS + , same subset as for SIGNAL/RESIGNAL. +*/ +condition_number: + signal_allowed_expr + { $$= $1; } + ; + +condition_information: + condition_information_item + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL) || + unlikely($$->push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | condition_information ',' condition_information_item + { + if (unlikely($1->push_back($3, thd->mem_root))) + MYSQL_YYABORT; + $$= $1; + } + ; + +condition_information_item: + simple_target_specification '=' condition_information_item_name + { + $$= new (thd->mem_root) Condition_information_item($3, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +condition_information_item_name: + CLASS_ORIGIN_SYM + { $$= Condition_information_item::CLASS_ORIGIN; } + | SUBCLASS_ORIGIN_SYM + { $$= Condition_information_item::SUBCLASS_ORIGIN; } + | CONSTRAINT_CATALOG_SYM + { $$= Condition_information_item::CONSTRAINT_CATALOG; } + | CONSTRAINT_SCHEMA_SYM + { $$= Condition_information_item::CONSTRAINT_SCHEMA; } + | CONSTRAINT_NAME_SYM + { $$= Condition_information_item::CONSTRAINT_NAME; } + | CATALOG_NAME_SYM + { $$= Condition_information_item::CATALOG_NAME; } + | SCHEMA_NAME_SYM + { $$= Condition_information_item::SCHEMA_NAME; } + | TABLE_NAME_SYM + { $$= Condition_information_item::TABLE_NAME; } + | COLUMN_NAME_SYM + { $$= Condition_information_item::COLUMN_NAME; } + | CURSOR_NAME_SYM + { $$= Condition_information_item::CURSOR_NAME; } + | MESSAGE_TEXT_SYM + { $$= Condition_information_item::MESSAGE_TEXT; } + | MYSQL_ERRNO_SYM + { $$= Condition_information_item::MYSQL_ERRNO; } + | RETURNED_SQLSTATE_SYM + { $$= Condition_information_item::RETURNED_SQLSTATE; } + | ROW_NUMBER_SYM + { $$= Condition_information_item::ROW_NUMBER; } + ; + +sp_decl_ident: + IDENT_sys + | keyword_sp_decl + { + if (unlikely($$.copy_ident_cli(thd, &$1))) + MYSQL_YYABORT; + } + ; + +sp_decl_idents: + sp_decl_ident + { + /* NOTE: field definition is filled in sp_decl section. */ + + LEX *lex= Lex; + sp_pcontext *spc= lex->spcont; + + if (unlikely(spc->find_variable(&$1, TRUE))) + my_yyabort_error((ER_SP_DUP_VAR, MYF(0), $1.str)); + spc->add_variable(thd, &$1); + $$= 1; + } + | sp_decl_idents ',' ident + { + /* NOTE: field definition is filled in sp_decl section. */ + + LEX *lex= Lex; + sp_pcontext *spc= lex->spcont; + + if (unlikely(spc->find_variable(&$3, TRUE))) + my_yyabort_error((ER_SP_DUP_VAR, MYF(0), $3.str)); + spc->add_variable(thd, &$3); + $$= $1 + 1; + } + ; + +sp_proc_stmt_if: + IF_SYM + { + if (unlikely(Lex->maybe_start_compound_statement(thd))) + MYSQL_YYABORT; + Lex->sphead->new_cont_backpatch(NULL); + } + sp_if END IF_SYM + { Lex->sphead->do_cont_backpatch(); } + ; + +sp_proc_stmt_statement: + { + LEX *lex= thd->lex; + Lex_input_stream *lip= YYLIP; + + lex->sphead->reset_lex(thd); + /* + We should not push main select here, it will be done or not + done by the statement, we just provide only a new LEX for the + statement here as if it is start of parsing a new statement. + */ + lex->sphead->m_tmp_query= lip->get_tok_start(); + } + sp_statement + { + if (Lex->sp_proc_stmt_statement_finalize(thd, yychar == YYEMPTY) || + Lex->sphead->restore_lex(thd)) + MYSQL_YYABORT; + } + ; + + +RETURN_ALLMODES_SYM: + RETURN_MARIADB_SYM + | RETURN_ORACLE_SYM + ; + +sp_proc_stmt_return: + RETURN_ALLMODES_SYM expr_lex + { + sp_head *sp= $2->sphead; + if (unlikely(sp->m_handler->add_instr_freturn(thd, sp, $2->spcont, + $2->get_item(), $2))) + MYSQL_YYABORT; + } + { /* See the comment 'COMMENT_FOR_DESCTRUCTOR' near %destructor */ } + | RETURN_ORACLE_SYM + { + LEX *lex= Lex; + sp_head *sp= lex->sphead; + if (unlikely(sp->m_handler->add_instr_preturn(thd, sp, + lex->spcont))) + MYSQL_YYABORT; + } + ; + +sp_proc_stmt_exit_oracle: + EXIT_ORACLE_SYM + { + if (unlikely(Lex->sp_exit_statement(thd, NULL))) + MYSQL_YYABORT; + } + | EXIT_ORACLE_SYM label_ident + { + if (unlikely(Lex->sp_exit_statement(thd, &$2, NULL))) + MYSQL_YYABORT; + } + | EXIT_ORACLE_SYM WHEN_SYM expr_lex + { + if (unlikely($3->sp_exit_statement(thd, $3->get_item()))) + MYSQL_YYABORT; + } + { /* See the comment 'COMMENT_FOR_DESCTRUCTOR' near %destructor */ } + | EXIT_ORACLE_SYM label_ident WHEN_SYM expr_lex + { + if (unlikely($4->sp_exit_statement(thd, &$2, $4->get_item()))) + MYSQL_YYABORT; + } + { /* See the comment 'COMMENT_FOR_DESCTRUCTOR' near %destructor */ } + ; + +sp_proc_stmt_continue_oracle: + CONTINUE_ORACLE_SYM + { + if (unlikely(Lex->sp_continue_statement(thd))) + MYSQL_YYABORT; + } + | CONTINUE_ORACLE_SYM label_ident + { + if (unlikely(Lex->sp_continue_statement(thd, &$2))) + MYSQL_YYABORT; + } + | CONTINUE_ORACLE_SYM WHEN_SYM expr_lex + { + if (unlikely($3->sp_continue_when_statement(thd))) + MYSQL_YYABORT; + } + { /* See the comment 'COMMENT_FOR_DESCTRUCTOR' near %destructor */ } + | CONTINUE_ORACLE_SYM label_ident WHEN_SYM expr_lex + { + if (unlikely($4->sp_continue_when_statement(thd, &$2))) + MYSQL_YYABORT; + } + { /* See the comment 'COMMENT_FOR_DESCTRUCTOR' near %destructor */ } + ; + + +sp_proc_stmt_leave: + LEAVE_SYM label_ident + { + if (unlikely(Lex->sp_leave_statement(thd, &$2))) + MYSQL_YYABORT; + } + ; + +sp_proc_stmt_iterate: + ITERATE_SYM label_ident + { + if (unlikely(Lex->sp_iterate_statement(thd, &$2))) + MYSQL_YYABORT; + } + ; + +sp_proc_stmt_goto_oracle: + GOTO_ORACLE_SYM label_ident + { + if (unlikely(Lex->sp_goto_statement(thd, &$2))) + MYSQL_YYABORT; + } + ; + + +expr_lex: + { + DBUG_ASSERT(Lex->sphead); + if (unlikely(!($$= new (thd->mem_root) + sp_expr_lex(thd, thd->lex)))) + MYSQL_YYABORT; + Lex->sphead->reset_lex(thd, $$); + if (Lex->main_select_push(true)) + MYSQL_YYABORT; + } + expr + { + $$= $1; + $$->set_item($2); + Lex->pop_select(); //min select + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + if ($$->sphead->restore_lex(thd)) + MYSQL_YYABORT; + } + ; + + +assignment_source_lex: + { + DBUG_ASSERT(Lex->sphead); + if (unlikely(!($$= new (thd->mem_root) + sp_assignment_lex(thd, thd->lex)))) + MYSQL_YYABORT; + } + ; + +assignment_source_expr: + assignment_source_lex + { + DBUG_ASSERT(thd->free_list == NULL); + Lex->sphead->reset_lex(thd, $1); + if (Lex->main_select_push(true)) + MYSQL_YYABORT; + } + expr + { + DBUG_ASSERT($1 == thd->lex); + $$= $1; + $$->set_item_and_free_list($3, thd->free_list); + thd->free_list= NULL; + Lex->pop_select(); //min select + if ($$->sphead->restore_lex(thd)) + MYSQL_YYABORT; + } + ; + +for_loop_bound_expr: + assignment_source_lex + { + Lex->sphead->reset_lex(thd, $1); + if (Lex->main_select_push(true)) + MYSQL_YYABORT; + Lex->current_select->parsing_place= FOR_LOOP_BOUND; + } + expr + { + DBUG_ASSERT($1 == thd->lex); + $$= $1; + $$->set_item_and_free_list($3, NULL); + Lex->pop_select(); //main select + if (unlikely($$->sphead->restore_lex(thd))) + MYSQL_YYABORT; + Lex->current_select->parsing_place= NO_MATTER; + } + ; + +cursor_actual_parameters: + assignment_source_expr + { + if (unlikely(!($$= new (thd->mem_root) List))) + MYSQL_YYABORT; + $$->push_back($1, thd->mem_root); + } + | cursor_actual_parameters ',' assignment_source_expr + { + $$= $1; + $$->push_back($3, thd->mem_root); + } + ; + +opt_parenthesized_cursor_actual_parameters: + /* Empty */ { $$= NULL; } + | '(' cursor_actual_parameters ')' { $$= $2; } + ; + +sp_proc_stmt_with_cursor: + sp_proc_stmt_open + | sp_proc_stmt_fetch + | sp_proc_stmt_close + ; + +sp_proc_stmt_open: + OPEN_SYM ident opt_parenthesized_cursor_actual_parameters + { + if (unlikely(Lex->sp_open_cursor(thd, &$2, $3))) + MYSQL_YYABORT; + } + ; + +sp_proc_stmt_fetch_head: + FETCH_SYM ident INTO + { + if (unlikely(Lex->sp_add_cfetch(thd, &$2))) + MYSQL_YYABORT; + } + | FETCH_SYM FROM ident INTO + { + if (unlikely(Lex->sp_add_cfetch(thd, &$3))) + MYSQL_YYABORT; + } + | FETCH_SYM NEXT_SYM FROM ident INTO + { + if (unlikely(Lex->sp_add_cfetch(thd, &$4))) + MYSQL_YYABORT; + } + ; + +sp_proc_stmt_fetch: + sp_proc_stmt_fetch_head sp_fetch_list { } + | FETCH_SYM GROUP_SYM NEXT_SYM ROW_SYM + { + if (unlikely(Lex->sp_add_agg_cfetch())) + MYSQL_YYABORT; + } + ; + +sp_proc_stmt_close: + CLOSE_SYM ident + { + LEX *lex= Lex; + sp_head *sp= lex->sphead; + uint offset; + sp_instr_cclose *i; + + if (unlikely(!lex->spcont->find_cursor(&$2, &offset, false))) + my_yyabort_error((ER_SP_CURSOR_MISMATCH, MYF(0), $2.str)); + i= new (thd->mem_root) + sp_instr_cclose(sp->instructions(), lex->spcont, offset); + if (unlikely(i == NULL) || + unlikely(sp->add_instr(i))) + MYSQL_YYABORT; + } + ; + +sp_fetch_list: + ident + { + LEX *lex= Lex; + sp_head *sp= lex->sphead; + sp_pcontext *spc= lex->spcont; + sp_variable *spv= likely(spc != NULL) + ? spc->find_variable(&$1, false) + : NULL; + + if (unlikely(!spv)) + my_yyabort_error((ER_SP_UNDECLARED_VAR, MYF(0), $1.str)); + + /* An SP local variable */ + sp_instr_cfetch *i= (sp_instr_cfetch *)sp->last_instruction(); + i->add_to_varlist(spv); + } + | sp_fetch_list ',' ident + { + LEX *lex= Lex; + sp_head *sp= lex->sphead; + sp_pcontext *spc= lex->spcont; + sp_variable *spv= likely(spc != NULL) + ? spc->find_variable(&$3, false) + : NULL; + + if (unlikely(!spv)) + my_yyabort_error((ER_SP_UNDECLARED_VAR, MYF(0), $3.str)); + + /* An SP local variable */ + sp_instr_cfetch *i= (sp_instr_cfetch *)sp->last_instruction(); + i->add_to_varlist(spv); + } + ; + +sp_if: + expr_lex THEN_SYM + { + if (unlikely($1->sp_if_expr(thd))) + MYSQL_YYABORT; + } + sp_if_then_statements + { + if (unlikely($1->sp_if_after_statements(thd))) + MYSQL_YYABORT; + } + sp_elseifs + { + LEX *lex= Lex; + + lex->sphead->backpatch(lex->spcont->pop_label()); + } + ; + +sp_elseifs: + /* Empty */ + | ELSEIF_MARIADB_SYM sp_if + | ELSIF_ORACLE_SYM sp_if + | ELSE sp_if_then_statements + ; + +case_stmt_specification: + CASE_SYM + { + if (unlikely(Lex->maybe_start_compound_statement(thd))) + MYSQL_YYABORT; + + /** + An example of the CASE statement in use is +
+            CREATE PROCEDURE proc_19194_simple(i int)
+            BEGIN
+              DECLARE str CHAR(10);
+
+              CASE i
+                WHEN 1 THEN SET str="1";
+                WHEN 2 THEN SET str="2";
+                WHEN 3 THEN SET str="3";
+                ELSE SET str="unknown";
+              END CASE;
+
+              SELECT str;
+            END
+            
+ The actions are used to generate the following code: +
+            SHOW PROCEDURE CODE proc_19194_simple;
+            Pos     Instruction
+            0       set str@1 NULL
+            1       set_case_expr (12) 0 i@0
+            2       jump_if_not 5(12) (case_expr@0 = 1)
+            3       set str@1 _latin1'1'
+            4       jump 12
+            5       jump_if_not 8(12) (case_expr@0 = 2)
+            6       set str@1 _latin1'2'
+            7       jump 12
+            8       jump_if_not 11(12) (case_expr@0 = 3)
+            9       set str@1 _latin1'3'
+            10      jump 12
+            11      set str@1 _latin1'unknown'
+            12      stmt 0 "SELECT str"
+            
+ */ + + Lex->sphead->new_cont_backpatch(NULL); + + /* + BACKPATCH: Creating target label for the jump to after END CASE + (instruction 12 in the example) + */ + Lex->spcont->push_label(thd, &empty_clex_str, Lex->sphead->instructions()); + } + case_stmt_body + else_clause_opt + END + CASE_SYM + { + /* + BACKPATCH: Resolving forward jump from + "case_stmt_action_then" to after END CASE + (jump from instruction 4 to 12, 7 to 12 ... in the example) + */ + Lex->sphead->backpatch(Lex->spcont->pop_label()); + + if ($3) + Lex->spcont->pop_case_expr_id(); + + Lex->sphead->do_cont_backpatch(); + } + ; + +case_stmt_body: + expr_lex + { + if (unlikely($1->case_stmt_action_expr())) + MYSQL_YYABORT; + } + simple_when_clause_list + { $$= 1; } + | searched_when_clause_list + { $$= 0; } + ; + +simple_when_clause_list: + simple_when_clause + | simple_when_clause_list simple_when_clause + ; + +searched_when_clause_list: + searched_when_clause + | searched_when_clause_list searched_when_clause + ; + +simple_when_clause: + WHEN_SYM expr_lex + { + /* Simple case: = */ + if (unlikely($2->case_stmt_action_when(true))) + MYSQL_YYABORT; + } + THEN_SYM + sp_case_then_statements + { + if (unlikely(Lex->case_stmt_action_then())) + MYSQL_YYABORT; + } + ; + +searched_when_clause: + WHEN_SYM expr_lex + { + if (unlikely($2->case_stmt_action_when(false))) + MYSQL_YYABORT; + } + THEN_SYM + sp_case_then_statements + { + if (unlikely(Lex->case_stmt_action_then())) + MYSQL_YYABORT; + } + ; + +else_clause_opt: + /* empty */ + { + LEX *lex= Lex; + sp_head *sp= lex->sphead; + uint ip= sp->instructions(); + sp_instr_error *i= new (thd->mem_root) + sp_instr_error(ip, lex->spcont, ER_SP_CASE_NOT_FOUND); + if (unlikely(i == NULL) || + unlikely(sp->add_instr(i))) + MYSQL_YYABORT; + } + | ELSE sp_case_then_statements + ; + +sp_opt_label: + /* Empty */ { $$= null_clex_str; } + | label_ident { $$= $1; } + ; + +/* This adds one shift/reduce conflict */ +opt_sp_for_loop_direction: + /* Empty */ { $$= 1; } + | REVERSE_SYM { $$= -1; } + ; + +sp_for_loop_index_and_bounds: + ident_for_loop_index sp_for_loop_bounds + { + if (unlikely(Lex->sp_for_loop_declarations(thd, &$$, &$1, $2))) + MYSQL_YYABORT; + } + ; + +sp_for_loop_bounds: + IN_SYM opt_sp_for_loop_direction for_loop_bound_expr + DOT_DOT_SYM for_loop_bound_expr + { + $$= Lex_for_loop_bounds_intrange($2, $3, $5); + } + | IN_SYM opt_sp_for_loop_direction for_loop_bound_expr + { + $$.m_direction= $2; + $$.m_index= $3; + $$.m_target_bound= NULL; + $$.m_implicit_cursor= false; + } + | IN_SYM opt_sp_for_loop_direction '(' sp_cursor_stmt ')' + { + if (unlikely(Lex->sp_for_loop_implicit_cursor_statement(thd, &$$, + $4))) + MYSQL_YYABORT; + } + ; + +loop_body: + sp_proc_stmts1 END LOOP_SYM + { + LEX *lex= Lex; + uint ip= lex->sphead->instructions(); + sp_label *lab= lex->spcont->last_label(); /* Jumping back */ + sp_instr_jump *i= new (thd->mem_root) + sp_instr_jump(ip, lex->spcont, lab->ip); + if (unlikely(i == NULL) || + unlikely(lex->sphead->add_instr(i))) + MYSQL_YYABORT; + } + ; + +repeat_body: + sp_proc_stmts1 UNTIL_SYM expr_lex END REPEAT_SYM + { + if ($3->sp_repeat_loop_finalize(thd)) + MYSQL_YYABORT; + } + ; + +pop_sp_loop_label: + sp_opt_label + { + if (unlikely(Lex->sp_pop_loop_label(thd, &$1))) + MYSQL_YYABORT; + } + ; + +sp_labeled_control: + sp_control_label LOOP_SYM + { + if (unlikely(Lex->sp_push_loop_label(thd, &$1))) + MYSQL_YYABORT; + } + loop_body pop_sp_loop_label + { } + | sp_control_label WHILE_SYM + { + if (unlikely(Lex->sp_push_loop_label(thd, &$1))) + MYSQL_YYABORT; + } + while_body pop_sp_loop_label + { } + | sp_control_label FOR_SYM + { + // See "The FOR LOOP statement" comments in sql_lex.cc + Lex->sp_block_init(thd); // The outer DECLARE..BEGIN..END block + } + sp_for_loop_index_and_bounds + { + if (unlikely(Lex->sp_push_loop_label(thd, &$1))) // The inner WHILE block + MYSQL_YYABORT; + if (unlikely(Lex->sp_for_loop_condition_test(thd, $4))) + MYSQL_YYABORT; + } + for_loop_statements + { + if (unlikely(Lex->sp_for_loop_finalize(thd, $4))) + MYSQL_YYABORT; + } + pop_sp_loop_label // The inner WHILE block + { + if (unlikely(Lex->sp_for_loop_outer_block_finalize(thd, $4))) + MYSQL_YYABORT; + } + | sp_control_label REPEAT_SYM + { + if (unlikely(Lex->sp_push_loop_label(thd, &$1))) + MYSQL_YYABORT; + } + repeat_body pop_sp_loop_label + { } + ; + +sp_unlabeled_control: + LOOP_SYM + { + if (unlikely(Lex->sp_push_loop_empty_label(thd))) + MYSQL_YYABORT; + } + loop_body + { + Lex->sp_pop_loop_empty_label(thd); + } + | WHILE_SYM + { + if (unlikely(Lex->sp_push_loop_empty_label(thd))) + MYSQL_YYABORT; + } + while_body + { + Lex->sp_pop_loop_empty_label(thd); + } + | FOR_SYM + { + // See "The FOR LOOP statement" comments in sql_lex.cc + if (unlikely(Lex->maybe_start_compound_statement(thd))) + MYSQL_YYABORT; + Lex->sp_block_init(thd); // The outer DECLARE..BEGIN..END block + } + sp_for_loop_index_and_bounds + { + if (unlikely(Lex->sp_push_loop_empty_label(thd))) // The inner WHILE block + MYSQL_YYABORT; + if (unlikely(Lex->sp_for_loop_condition_test(thd, $3))) + MYSQL_YYABORT; + } + for_loop_statements + { + if (unlikely(Lex->sp_for_loop_finalize(thd, $3))) + MYSQL_YYABORT; + Lex->sp_pop_loop_empty_label(thd); // The inner WHILE block + if (unlikely(Lex->sp_for_loop_outer_block_finalize(thd, $3))) + MYSQL_YYABORT; + } + | REPEAT_SYM + { + if (unlikely(Lex->sp_push_loop_empty_label(thd))) + MYSQL_YYABORT; + } + repeat_body + { + Lex->sp_pop_loop_empty_label(thd); + } + ; + +trg_action_time: + BEFORE_SYM + { Lex->trg_chistics.action_time= TRG_ACTION_BEFORE; } + | AFTER_SYM + { Lex->trg_chistics.action_time= TRG_ACTION_AFTER; } + ; + +trg_event: + INSERT + { Lex->trg_chistics.event= TRG_EVENT_INSERT; } + | UPDATE_SYM + { Lex->trg_chistics.event= TRG_EVENT_UPDATE; } + | DELETE_SYM + { Lex->trg_chistics.event= TRG_EVENT_DELETE; } + ; + +create_body: + create_field_list_parens + { Lex->create_info.option_list= NULL; } + opt_create_table_options opt_create_partitioning opt_create_select {} + | opt_create_table_options opt_create_partitioning opt_create_select {} + | create_like + { + + Lex->create_info.add(DDL_options_st::OPT_LIKE); + TABLE_LIST *src_table= Lex->first_select_lex()-> + add_table_to_list(thd, $1, NULL, 0, TL_READ, MDL_SHARED_READ); + if (unlikely(! src_table)) + MYSQL_YYABORT; + /* CREATE TABLE ... LIKE is not allowed for views. */ + src_table->required_type= TABLE_TYPE_NORMAL; + } + ; + +create_like: + LIKE table_ident { $$= $2; } + | LEFT_PAREN_LIKE LIKE table_ident ')' { $$= $3; } + ; + +opt_create_select: + /* empty */ {} + | opt_duplicate opt_as create_select_query_expression + opt_versioning_option + { + Lex->create_info.add(DDL_options_st::OPT_CREATE_SELECT); + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + ; + +create_select_query_expression: + query_expression + { + if (Lex->parsed_insert_select($1->first_select())) + MYSQL_YYABORT; + } + | LEFT_PAREN_WITH with_clause query_expression_no_with_clause ')' + { + SELECT_LEX *first_select= $3->first_select(); + $3->set_with_clause($2); + $2->attach_to(first_select); + if (Lex->parsed_insert_select(first_select)) + MYSQL_YYABORT; + } + ; + +opt_create_partitioning: + opt_partitioning + { + /* + Remove all tables used in PARTITION clause from the global table + list. Partitioning with subqueries is not allowed anyway. + */ + TABLE_LIST *last_non_sel_table= Lex->create_last_non_select_table; + last_non_sel_table->next_global= 0; + Lex->query_tables_last= &last_non_sel_table->next_global; + } + ; + +/* + This part of the parser is about handling of the partition information. + + Its first version was written by Mikael Ronström with lots of answers to + questions provided by Antony Curtis. + + The partition grammar can be called from three places. + 1) CREATE TABLE ... PARTITION .. + 2) ALTER TABLE table_name PARTITION ... + 3) PARTITION ... + + The first place is called when a new table is created from a MySQL client. + The second place is called when a table is altered with the ALTER TABLE + command from a MySQL client. + The third place is called when opening an frm file and finding partition + info in the .frm file. It is necessary to avoid allowing PARTITION to be + an allowed entry point for SQL client queries. This is arranged by setting + some state variables before arriving here. + + To be able to handle errors we will only set error code in this code + and handle the error condition in the function calling the parser. This + is necessary to ensure we can also handle errors when calling the parser + from the openfrm function. +*/ +opt_partitioning: + /* empty */ {} + | partitioning + ; + +partitioning: + PARTITION_SYM have_partitioning + { + LEX *lex= Lex; + lex->part_info= new (thd->mem_root) partition_info(); + if (unlikely(!lex->part_info)) + MYSQL_YYABORT; + if (lex->sql_command == SQLCOM_ALTER_TABLE) + { + lex->alter_info.partition_flags|= ALTER_PARTITION_INFO; + } + } + partition + ; + +have_partitioning: + /* empty */ + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + LEX_CSTRING partition_name={STRING_WITH_LEN("partition")}; + if (unlikely(!plugin_is_ready(&partition_name, MYSQL_STORAGE_ENGINE_PLUGIN))) + my_yyabort_error((ER_OPTION_PREVENTS_STATEMENT, MYF(0), + "--skip-partition")); +#else + my_yyabort_error((ER_FEATURE_DISABLED, MYF(0), "partitioning", + "--with-plugin-partition")); +#endif + } + ; + +partition_entry: + PARTITION_SYM + { + if (unlikely(!Lex->part_info)) + { + thd->parse_error(ER_PARTITION_ENTRY_ERROR); + MYSQL_YYABORT; + } + if (Lex->main_select_push()) + MYSQL_YYABORT; + /* + We enter here when opening the frm file to translate + partition info string into part_info data structure. + */ + } + partition + { + Lex->pop_select(); //main select + } + ; + +partition: + BY + { Lex->safe_to_cache_query= 1; } + part_type_def opt_num_parts opt_sub_part part_defs + ; + +part_type_def: + opt_linear KEY_SYM opt_key_algo '(' part_field_list ')' + { + partition_info *part_info= Lex->part_info; + part_info->list_of_part_fields= TRUE; + part_info->column_list= FALSE; + part_info->part_type= HASH_PARTITION; + } + | opt_linear HASH_SYM + { Lex->part_info->part_type= HASH_PARTITION; } + part_func {} + | RANGE_SYM part_func + { Lex->part_info->part_type= RANGE_PARTITION; } + | RANGE_SYM part_column_list + { Lex->part_info->part_type= RANGE_PARTITION; } + | LIST_SYM + { + Select->parsing_place= IN_PART_FUNC; + } + part_func + { + Lex->part_info->part_type= LIST_PARTITION; + Select->parsing_place= NO_MATTER; + } + | LIST_SYM part_column_list + { Lex->part_info->part_type= LIST_PARTITION; } + | SYSTEM_TIME_SYM + { + if (unlikely(Lex->part_info->vers_init_info(thd))) + MYSQL_YYABORT; + } + opt_versioning_rotation + ; + +opt_linear: + /* empty */ {} + | LINEAR_SYM + { Lex->part_info->linear_hash_ind= TRUE;} + ; + +opt_key_algo: + /* empty */ + { Lex->part_info->key_algorithm= partition_info::KEY_ALGORITHM_NONE;} + | ALGORITHM_SYM '=' real_ulong_num + { + switch ($3) { + case 1: + Lex->part_info->key_algorithm= partition_info::KEY_ALGORITHM_51; + break; + case 2: + Lex->part_info->key_algorithm= partition_info::KEY_ALGORITHM_55; + break; + default: + thd->parse_error(); + MYSQL_YYABORT; + } + } + ; + +part_field_list: + /* empty */ {} + | part_field_item_list {} + ; + +part_field_item_list: + part_field_item {} + | part_field_item_list ',' part_field_item {} + ; + +part_field_item: + ident + { + partition_info *part_info= Lex->part_info; + part_info->num_columns++; + if (unlikely(part_info->part_field_list.push_back($1.str, + thd->mem_root))) + MYSQL_YYABORT; + if (unlikely(part_info->num_columns > MAX_REF_PARTS)) + my_yyabort_error((ER_TOO_MANY_PARTITION_FUNC_FIELDS_ERROR, MYF(0), + "list of partition fields")); + } + ; + +part_column_list: + COLUMNS '(' part_field_list ')' + { + partition_info *part_info= Lex->part_info; + part_info->column_list= TRUE; + part_info->list_of_part_fields= TRUE; + } + ; + + +part_func: + '(' part_func_expr ')' + { + partition_info *part_info= Lex->part_info; + if (unlikely(part_info->set_part_expr(thd, $2, FALSE))) + MYSQL_YYABORT; + part_info->num_columns= 1; + part_info->column_list= FALSE; + } + ; + +sub_part_func: + '(' part_func_expr ')' + { + if (unlikely(Lex->part_info->set_part_expr(thd, $2, TRUE))) + MYSQL_YYABORT; + } + ; + + +opt_num_parts: + /* empty */ {} + | PARTITIONS_SYM real_ulong_num + { + uint num_parts= $2; + partition_info *part_info= Lex->part_info; + if (unlikely(num_parts == 0)) + my_yyabort_error((ER_NO_PARTS_ERROR, MYF(0), "partitions")); + + part_info->num_parts= num_parts; + part_info->use_default_num_partitions= FALSE; + } + ; + +opt_sub_part: + /* empty */ {} + | SUBPARTITION_SYM BY opt_linear HASH_SYM sub_part_func + { Lex->part_info->subpart_type= HASH_PARTITION; } + opt_num_subparts {} + | SUBPARTITION_SYM BY opt_linear KEY_SYM opt_key_algo + '(' sub_part_field_list ')' + { + partition_info *part_info= Lex->part_info; + part_info->subpart_type= HASH_PARTITION; + part_info->list_of_subpart_fields= TRUE; + } + opt_num_subparts {} + ; + +sub_part_field_list: + sub_part_field_item {} + | sub_part_field_list ',' sub_part_field_item {} + ; + +sub_part_field_item: + ident + { + partition_info *part_info= Lex->part_info; + if (unlikely(part_info->subpart_field_list.push_back($1.str, + thd->mem_root))) + MYSQL_YYABORT; + + if (unlikely(part_info->subpart_field_list.elements > MAX_REF_PARTS)) + my_yyabort_error((ER_TOO_MANY_PARTITION_FUNC_FIELDS_ERROR, MYF(0), + "list of subpartition fields")); + } + ; + +part_func_expr: + bit_expr + { + if (unlikely(!Lex->safe_to_cache_query)) + { + thd->parse_error(ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR); + MYSQL_YYABORT; + } + $$=$1; + } + ; + +opt_num_subparts: + /* empty */ {} + | SUBPARTITIONS_SYM real_ulong_num + { + uint num_parts= $2; + LEX *lex= Lex; + if (unlikely(num_parts == 0)) + my_yyabort_error((ER_NO_PARTS_ERROR, MYF(0), "subpartitions")); + lex->part_info->num_subparts= num_parts; + lex->part_info->use_default_num_subpartitions= FALSE; + } + ; + +part_defs: + /* empty */ + { + partition_info *part_info= Lex->part_info; + if (unlikely(part_info->part_type == RANGE_PARTITION)) + my_yyabort_error((ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), + "RANGE")); + if (unlikely(part_info->part_type == LIST_PARTITION)) + my_yyabort_error((ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), + "LIST")); + } + | '(' part_def_list ')' + { + partition_info *part_info= Lex->part_info; + uint count_curr_parts= part_info->partitions.elements; + if (part_info->num_parts != 0) + { + if (unlikely(part_info->num_parts != + count_curr_parts)) + { + thd->parse_error(ER_PARTITION_WRONG_NO_PART_ERROR); + MYSQL_YYABORT; + } + } + else if (count_curr_parts > 0) + { + part_info->num_parts= count_curr_parts; + } + part_info->count_curr_subparts= 0; + } + ; + +part_def_list: + part_definition {} + | part_def_list ',' part_definition {} + ; + +opt_partition: + /* empty */ + | PARTITION_SYM + ; + +part_definition: + opt_partition + { + partition_info *part_info= Lex->part_info; + partition_element *p_elem= new (thd->mem_root) partition_element(); + + if (unlikely(!p_elem) || + unlikely(part_info->partitions.push_back(p_elem, thd->mem_root))) + MYSQL_YYABORT; + + p_elem->part_state= PART_NORMAL; + p_elem->id= part_info->partitions.elements - 1; + part_info->curr_part_elem= p_elem; + part_info->current_partition= p_elem; + part_info->use_default_partitions= FALSE; + part_info->use_default_num_partitions= FALSE; + } + part_name + opt_part_values + opt_part_options + opt_sub_partition + {} + ; + +part_name: + ident + { + partition_info *part_info= Lex->part_info; + partition_element *p_elem= part_info->curr_part_elem; + if (unlikely(check_ident_length(&$1))) + MYSQL_YYABORT; + p_elem->partition_name= $1.str; + } + ; + +opt_part_values: + /* empty */ + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (! lex->is_partition_management()) + { + if (unlikely(part_info->error_if_requires_values())) + MYSQL_YYABORT; + if (unlikely(part_info->part_type == VERSIONING_PARTITION)) + my_yyabort_error((ER_VERS_WRONG_PARTS, MYF(0), + lex->create_last_non_select_table-> + table_name.str)); + } + else + part_info->part_type= HASH_PARTITION; + } + | VALUES_LESS_SYM THAN_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (! lex->is_partition_management()) + { + if (unlikely(part_info->part_type != RANGE_PARTITION)) + my_yyabort_error((ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN")); + } + else + part_info->part_type= RANGE_PARTITION; + } + part_func_max {} + | VALUES_IN_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (! lex->is_partition_management()) + { + if (unlikely(part_info->part_type != LIST_PARTITION)) + my_yyabort_error((ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "LIST", "IN")); + } + else + part_info->part_type= LIST_PARTITION; + } + part_values_in {} + | CURRENT_SYM + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (Lex->part_values_current(thd)) + MYSQL_YYABORT; +#endif + } + | HISTORY_SYM + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (Lex->part_values_history(thd)) + MYSQL_YYABORT; +#endif + } + | DEFAULT + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (! lex->is_partition_management()) + { + if (unlikely(part_info->part_type != LIST_PARTITION)) + my_yyabort_error((ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "LIST", "DEFAULT")); + } + else + part_info->part_type= LIST_PARTITION; + if (unlikely(part_info->init_column_part(thd))) + MYSQL_YYABORT; + if (unlikely(part_info->add_max_value(thd))) + MYSQL_YYABORT; + } + ; + +part_func_max: + MAXVALUE_SYM + { + partition_info *part_info= Lex->part_info; + + if (unlikely(part_info->num_columns && + part_info->num_columns != 1U)) + { + part_info->print_debug("Kilroy II", NULL); + thd->parse_error(ER_PARTITION_COLUMN_LIST_ERROR); + MYSQL_YYABORT; + } + else + part_info->num_columns= 1U; + if (unlikely(part_info->init_column_part(thd))) + MYSQL_YYABORT; + if (unlikely(part_info->add_max_value(thd))) + MYSQL_YYABORT; + } + | part_value_item {} + ; + +part_values_in: + part_value_item + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + part_info->print_debug("part_values_in: part_value_item", NULL); + + if (part_info->num_columns != 1U) + { + if (unlikely(!lex->is_partition_management() || + part_info->num_columns == 0 || + part_info->num_columns > MAX_REF_PARTS)) + { + part_info->print_debug("Kilroy III", NULL); + thd->parse_error(ER_PARTITION_COLUMN_LIST_ERROR); + MYSQL_YYABORT; + } + /* + Reorganize the current large array into a list of small + arrays with one entry in each array. This can happen + in the first partition of an ALTER TABLE statement where + we ADD or REORGANIZE partitions. Also can only happen + for LIST partitions. + */ + if (unlikely(part_info->reorganize_into_single_field_col_val(thd))) + MYSQL_YYABORT; + } + } + | '(' part_value_list ')' + { + partition_info *part_info= Lex->part_info; + if (unlikely(part_info->num_columns < 2U)) + { + thd->parse_error(ER_ROW_SINGLE_PARTITION_FIELD_ERROR); + MYSQL_YYABORT; + } + } + ; + +part_value_list: + part_value_item {} + | part_value_list ',' part_value_item {} + ; + +part_value_item: + '(' + { + partition_info *part_info= Lex->part_info; + part_info->print_debug("( part_value_item", NULL); + /* Initialisation code needed for each list of value expressions */ + if (unlikely(!(part_info->part_type == LIST_PARTITION && + part_info->num_columns == 1U) && + part_info->init_column_part(thd))) + MYSQL_YYABORT; + } + part_value_item_list {} + ')' + { + partition_info *part_info= Lex->part_info; + part_info->print_debug(") part_value_item", NULL); + if (part_info->num_columns == 0) + part_info->num_columns= part_info->curr_list_object; + if (unlikely(part_info->num_columns != part_info->curr_list_object)) + { + /* + All value items lists must be of equal length, in some cases + which is covered by the above if-statement we don't know yet + how many columns is in the partition so the assignment above + ensures that we only report errors when we know we have an + error. + */ + part_info->print_debug("Kilroy I", NULL); + thd->parse_error(ER_PARTITION_COLUMN_LIST_ERROR); + MYSQL_YYABORT; + } + part_info->curr_list_object= 0; + } + ; + +part_value_item_list: + part_value_expr_item {} + | part_value_item_list ',' part_value_expr_item {} + ; + +part_value_expr_item: + MAXVALUE_SYM + { + partition_info *part_info= Lex->part_info; + if (unlikely(part_info->part_type == LIST_PARTITION)) + { + thd->parse_error(ER_MAXVALUE_IN_VALUES_IN); + MYSQL_YYABORT; + } + if (unlikely(part_info->add_max_value(thd))) + MYSQL_YYABORT; + } + | bit_expr + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + Item *part_expr= $1; + + if (unlikely(!lex->safe_to_cache_query)) + { + thd->parse_error(ER_WRONG_EXPR_IN_PARTITION_FUNC_ERROR); + MYSQL_YYABORT; + } + if (unlikely(part_info->add_column_list_value(thd, part_expr))) + MYSQL_YYABORT; + } + ; + + +opt_sub_partition: + /* empty */ + { + partition_info *part_info= Lex->part_info; + if (unlikely(part_info->num_subparts != 0 && + !part_info->use_default_subpartitions)) + { + /* + We come here when we have defined subpartitions on the first + partition but not on all the subsequent partitions. + */ + thd->parse_error(ER_PARTITION_WRONG_NO_SUBPART_ERROR); + MYSQL_YYABORT; + } + } + | '(' sub_part_list ')' + { + partition_info *part_info= Lex->part_info; + if (part_info->num_subparts != 0) + { + if (unlikely(part_info->num_subparts != + part_info->count_curr_subparts)) + { + thd->parse_error(ER_PARTITION_WRONG_NO_SUBPART_ERROR); + MYSQL_YYABORT; + } + } + else if (part_info->count_curr_subparts > 0) + { + if (unlikely(part_info->partitions.elements > 1)) + { + thd->parse_error(ER_PARTITION_WRONG_NO_SUBPART_ERROR); + MYSQL_YYABORT; + } + part_info->num_subparts= part_info->count_curr_subparts; + } + part_info->count_curr_subparts= 0; + } + ; + +sub_part_list: + sub_part_definition {} + | sub_part_list ',' sub_part_definition {} + ; + +sub_part_definition: + SUBPARTITION_SYM + { + partition_info *part_info= Lex->part_info; + partition_element *curr_part= part_info->current_partition; + partition_element *sub_p_elem= new (thd->mem_root) + partition_element(curr_part); + if (unlikely(part_info->use_default_subpartitions && + part_info->partitions.elements >= 2)) + { + /* + create table t1 (a int) + partition by list (a) subpartition by hash (a) + (partition p0 values in (1), + partition p1 values in (2) subpartition sp11); + causes use to arrive since we are on the second + partition, but still use_default_subpartitions + is set. When we come here we're processing at least + the second partition (the current partition processed + have already been put into the partitions list. + */ + thd->parse_error(ER_PARTITION_WRONG_NO_SUBPART_ERROR); + MYSQL_YYABORT; + } + if (unlikely(!sub_p_elem) || + unlikely(curr_part->subpartitions.push_back(sub_p_elem, thd->mem_root))) + MYSQL_YYABORT; + + sub_p_elem->id= curr_part->subpartitions.elements - 1; + part_info->curr_part_elem= sub_p_elem; + part_info->use_default_subpartitions= FALSE; + part_info->use_default_num_subpartitions= FALSE; + part_info->count_curr_subparts++; + } + sub_name opt_subpart_options {} + ; + +sub_name: + ident_or_text + { + if (unlikely(check_ident_length(&$1))) + MYSQL_YYABORT; + Lex->part_info->curr_part_elem->partition_name= $1.str; + } + ; + +opt_part_options: + /* empty */ {} + | part_option_list {} + ; + +part_option_list: + part_option_list part_option {} + | part_option {} + ; + +part_option: + server_part_option {} + | engine_defined_option + { + $1->link(&Lex->part_info->curr_part_elem->option_list, + &Lex->option_list_last); + } + ; + +opt_subpart_options: + /* empty */ {} + | subpart_option_list {} + ; + +subpart_option_list: + subpart_option_list server_part_option {} + | server_part_option {} + ; + +server_part_option: + TABLESPACE opt_equal ident_or_text + { /* Compatibility with MySQL */ } + | opt_storage ENGINE_SYM opt_equal storage_engines + { + partition_info *part_info= Lex->part_info; + part_info->curr_part_elem->engine_type= $4; + part_info->default_engine_type= $4; + } + | CONNECTION_SYM opt_equal TEXT_STRING_sys + { + LEX *lex= Lex; + lex->part_info->curr_part_elem->connect_string.str= $3.str; + lex->part_info->curr_part_elem->connect_string.length= $3.length; + } + | NODEGROUP_SYM opt_equal real_ulong_num + { Lex->part_info->curr_part_elem->nodegroup_id= (uint16) $3; } + | MAX_ROWS opt_equal real_ulonglong_num + { Lex->part_info->curr_part_elem->part_max_rows= (ha_rows) $3; } + | MIN_ROWS opt_equal real_ulonglong_num + { Lex->part_info->curr_part_elem->part_min_rows= (ha_rows) $3; } + | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->data_file_name= $4.str; } + | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->index_file_name= $4.str; } + | COMMENT_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->part_comment= $3.str; } + ; + +opt_versioning_rotation: + /* empty */ {} + | { Lex->clause_that_disallows_subselect= "INTERVAL"; } + INTERVAL_SYM expr interval opt_versioning_interval_start opt_vers_auto_part + { + partition_info *part_info= Lex->part_info; + const char *table_name= Lex->create_last_non_select_table->table_name.str; + if (unlikely(part_info->vers_set_interval(thd, $3, $4, $5, $6, + table_name))) + MYSQL_YYABORT; + } + | LIMIT ulonglong_num opt_vers_auto_part + { + partition_info *part_info= Lex->part_info; + const char *table_name= Lex->create_last_non_select_table->table_name.str; + if (unlikely(part_info->vers_set_limit($2, $3, table_name))) + MYSQL_YYABORT; + } + ; + + +opt_versioning_interval_start: + /* empty */ + { + $$= NULL; + } + | STARTS_SYM literal + { + $$= $2; + } + ; + +opt_vers_auto_part: + /* empty */ + { + $$= 0; + } + | AUTO_SYM + { + $$= 1; + } + ; +/* + End of partition parser part +*/ + +opt_as: + /* empty */ {} + | AS {} + ; + +opt_create_database_options: + /* empty */ {} + | create_database_options {} + ; + +create_database_options: + create_database_option {} + | create_database_options create_database_option {} + ; + +create_database_option: + default_collation {} + | default_charset {} + | COMMENT_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.schema_comment= thd->make_clex_string($3); + Lex->create_info.used_fields|= HA_CREATE_USED_COMMENT; + } + ; + +opt_if_not_exists_table_element: + /* empty */ + { + Lex->check_exists= FALSE; + } + | IF_SYM not EXISTS + { + Lex->check_exists= TRUE; + } + ; + +opt_if_not_exists: + /* empty */ + { + $$.init(); + } + | IF_SYM not EXISTS + { + $$.set(DDL_options_st::OPT_IF_NOT_EXISTS); + } + ; + +create_or_replace: + CREATE /* empty */ + { + $$.init(); + } + | CREATE OR_SYM REPLACE + { + $$.set(DDL_options_st::OPT_OR_REPLACE); + } + ; + +opt_create_table_options: + /* empty */ + | create_table_options + ; + +create_table_options_space_separated: + create_table_option + | create_table_option create_table_options_space_separated + ; + +create_table_options: + create_table_option + | create_table_option create_table_options + | create_table_option ',' create_table_options + ; + +create_table_option: + ENGINE_SYM opt_equal ident_or_text + { + LEX *lex= Lex; + if (!lex->m_sql_cmd) + { + DBUG_ASSERT(lex->sql_command == SQLCOM_ALTER_TABLE); + if (!(lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_alter_table())) + MYSQL_YYABORT; + } + Storage_engine_name *opt= + lex->m_sql_cmd->option_storage_engine_name(); + DBUG_ASSERT(opt); // Expect a proper Sql_cmd + *opt= Storage_engine_name($3); + lex->create_info.used_fields|= HA_CREATE_USED_ENGINE; + } + | MAX_ROWS opt_equal ulonglong_num + { + Lex->create_info.max_rows= $3; + Lex->create_info.used_fields|= HA_CREATE_USED_MAX_ROWS; + } + | MIN_ROWS opt_equal ulonglong_num + { + Lex->create_info.min_rows= $3; + Lex->create_info.used_fields|= HA_CREATE_USED_MIN_ROWS; + } + | AVG_ROW_LENGTH opt_equal ulong_num + { + Lex->create_info.avg_row_length=$3; + Lex->create_info.used_fields|= HA_CREATE_USED_AVG_ROW_LENGTH; + } + | PASSWORD_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.password=$3.str; + Lex->create_info.used_fields|= HA_CREATE_USED_PASSWORD; + } + | COMMENT_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.comment=$3; + Lex->create_info.used_fields|= HA_CREATE_USED_COMMENT; + } + | AUTO_INC opt_equal ulonglong_num + { + Lex->create_info.auto_increment_value=$3; + Lex->create_info.used_fields|= HA_CREATE_USED_AUTO; + } + | PACK_KEYS_SYM opt_equal ulong_num + { + switch($3) { + case 0: + Lex->create_info.table_options|= HA_OPTION_NO_PACK_KEYS; + break; + case 1: + Lex->create_info.table_options|= HA_OPTION_PACK_KEYS; + break; + default: + thd->parse_error(); + MYSQL_YYABORT; + } + Lex->create_info.used_fields|= HA_CREATE_USED_PACK_KEYS; + } + | PACK_KEYS_SYM opt_equal DEFAULT + { + Lex->create_info.table_options&= + ~(HA_OPTION_PACK_KEYS | HA_OPTION_NO_PACK_KEYS); + Lex->create_info.used_fields|= HA_CREATE_USED_PACK_KEYS; + } + | STATS_AUTO_RECALC_SYM opt_equal ulong_num + { + switch($3) { + case 0: + Lex->create_info.stats_auto_recalc= HA_STATS_AUTO_RECALC_OFF; + break; + case 1: + Lex->create_info.stats_auto_recalc= HA_STATS_AUTO_RECALC_ON; + break; + default: + thd->parse_error(); + MYSQL_YYABORT; + } + Lex->create_info.used_fields|= HA_CREATE_USED_STATS_AUTO_RECALC; + } + | STATS_AUTO_RECALC_SYM opt_equal DEFAULT + { + Lex->create_info.stats_auto_recalc= HA_STATS_AUTO_RECALC_DEFAULT; + Lex->create_info.used_fields|= HA_CREATE_USED_STATS_AUTO_RECALC; + } + | STATS_PERSISTENT_SYM opt_equal ulong_num + { + switch($3) { + case 0: + Lex->create_info.table_options|= HA_OPTION_NO_STATS_PERSISTENT; + break; + case 1: + Lex->create_info.table_options|= HA_OPTION_STATS_PERSISTENT; + break; + default: + thd->parse_error(); + MYSQL_YYABORT; + } + Lex->create_info.used_fields|= HA_CREATE_USED_STATS_PERSISTENT; + } + | STATS_PERSISTENT_SYM opt_equal DEFAULT + { + Lex->create_info.table_options&= + ~(HA_OPTION_STATS_PERSISTENT | HA_OPTION_NO_STATS_PERSISTENT); + Lex->create_info.used_fields|= HA_CREATE_USED_STATS_PERSISTENT; + } + | STATS_SAMPLE_PAGES_SYM opt_equal ulong_num + { + /* From user point of view STATS_SAMPLE_PAGES can be specified as + STATS_SAMPLE_PAGES=N (where 0 0xffff)) + { + thd->parse_error(); + MYSQL_YYABORT; + } + Lex->create_info.stats_sample_pages=$3; + Lex->create_info.used_fields|= HA_CREATE_USED_STATS_SAMPLE_PAGES; + } + | STATS_SAMPLE_PAGES_SYM opt_equal DEFAULT + { + Lex->create_info.stats_sample_pages=0; + Lex->create_info.used_fields|= HA_CREATE_USED_STATS_SAMPLE_PAGES; + } + | CHECKSUM_SYM opt_equal ulong_num + { + Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM; + Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM; + } + | TABLE_CHECKSUM_SYM opt_equal ulong_num + { + Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM; + Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM; + } + | PAGE_CHECKSUM_SYM opt_equal choice + { + Lex->create_info.used_fields|= HA_CREATE_USED_PAGE_CHECKSUM; + Lex->create_info.page_checksum= $3; + } + | DELAY_KEY_WRITE_SYM opt_equal ulong_num + { + Lex->create_info.table_options|= $3 ? HA_OPTION_DELAY_KEY_WRITE : HA_OPTION_NO_DELAY_KEY_WRITE; + Lex->create_info.used_fields|= HA_CREATE_USED_DELAY_KEY_WRITE; + } + | ROW_FORMAT_SYM opt_equal row_types + { + Lex->create_info.row_type= $3; + Lex->create_info.used_fields|= HA_CREATE_USED_ROW_FORMAT; + } + | UNION_SYM opt_equal + { + Lex->first_select_lex()->table_list.save_and_clear(&Lex->save_list); + } + '(' opt_table_list ')' + { + /* + Move the union list to the merge_list and exclude its tables + from the global list. + */ + LEX *lex=Lex; + lex->create_info.merge_list= lex->first_select_lex()->table_list.first; + lex->first_select_lex()->table_list= lex->save_list; + /* + When excluding union list from the global list we assume that + elements of the former immediately follow elements which represent + table being created/altered and parent tables. + */ + TABLE_LIST *last_non_sel_table= lex->create_last_non_select_table; + DBUG_ASSERT(last_non_sel_table->next_global == + lex->create_info.merge_list); + last_non_sel_table->next_global= 0; + Lex->query_tables_last= &last_non_sel_table->next_global; + + lex->create_info.used_fields|= HA_CREATE_USED_UNION; + } + | default_charset + | default_collation + | INSERT_METHOD opt_equal merge_insert_types + { + Lex->create_info.merge_insert_method= $3; + Lex->create_info.used_fields|= HA_CREATE_USED_INSERT_METHOD; + } + | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.data_file_name= $4.str; + Lex->create_info.used_fields|= HA_CREATE_USED_DATADIR; + } + | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.index_file_name= $4.str; + Lex->create_info.used_fields|= HA_CREATE_USED_INDEXDIR; + } + | TABLESPACE ident + { /* Compatiblity with MySQL */ } + | STORAGE_SYM DISK_SYM + {Lex->create_info.storage_media= HA_SM_DISK;} + | STORAGE_SYM MEMORY_SYM + {Lex->create_info.storage_media= HA_SM_MEMORY;} + | CONNECTION_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.connect_string.str= $3.str; + Lex->create_info.connect_string.length= $3.length; + Lex->create_info.used_fields|= HA_CREATE_USED_CONNECTION; + } + | KEY_BLOCK_SIZE opt_equal ulong_num + { + Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE; + Lex->create_info.key_block_size= $3; + } + | TRANSACTIONAL_SYM opt_equal choice + { + Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL; + Lex->create_info.transactional= $3; + } + | engine_defined_option + { + $1->link(&Lex->create_info.option_list, &Lex->option_list_last); + } + | SEQUENCE_SYM opt_equal choice + { + Lex->create_info.used_fields|= HA_CREATE_USED_SEQUENCE; + Lex->create_info.sequence= ($3 == HA_CHOICE_YES); + } + | versioning_option + ; + +engine_defined_option: + IDENT_sys equal TEXT_STRING_sys + { + if (unlikely($3.length > ENGINE_OPTION_MAX_LENGTH)) + my_yyabort_error((ER_VALUE_TOO_LONG, MYF(0), $1.str)); + $$= new (thd->mem_root) engine_option_value($1, $3, true); + MYSQL_YYABORT_UNLESS($$); + } + | IDENT_sys equal ident + { + if (unlikely($3.length > ENGINE_OPTION_MAX_LENGTH)) + my_yyabort_error((ER_VALUE_TOO_LONG, MYF(0), $1.str)); + $$= new (thd->mem_root) engine_option_value($1, $3, false); + MYSQL_YYABORT_UNLESS($$); + } + | IDENT_sys equal real_ulonglong_num + { + $$= new (thd->mem_root) engine_option_value($1, $3, thd->mem_root); + MYSQL_YYABORT_UNLESS($$); + } + | IDENT_sys equal DEFAULT + { + $$= new (thd->mem_root) engine_option_value($1); + MYSQL_YYABORT_UNLESS($$); + } + ; + +opt_versioning_option: + /* empty */ + | versioning_option + ; + +versioning_option: + WITH_SYSTEM_SYM VERSIONING_SYM + { + if (unlikely(Lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)) + { + if (!DBUG_IF("sysvers_force")) + { + my_error(ER_VERS_NOT_SUPPORTED, MYF(0), "CREATE TEMPORARY TABLE"); + MYSQL_YYABORT; + } + } + else + { + Lex->alter_info.flags|= ALTER_ADD_SYSTEM_VERSIONING; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } + } + ; + +default_charset: + opt_default charset opt_equal charset_name_or_default + { + if (unlikely(Lex->create_info.add_table_option_default_charset($4))) + MYSQL_YYABORT; + } + ; + +default_collation: + opt_default COLLATE_SYM opt_equal collation_name_or_default + { + Table_specification_st *cinfo= &Lex->create_info; + if (unlikely(cinfo->add_table_option_default_collation($4))) + MYSQL_YYABORT; + } + ; + +storage_engines: + ident_or_text + { + if (Storage_engine_name($1). + resolve_storage_engine_with_error(thd, &$$, + thd->lex->create_info.tmp_table())) + MYSQL_YYABORT; + } + ; + +known_storage_engines: + ident_or_text + { + plugin_ref plugin; + if (likely((plugin= ha_resolve_by_name(thd, &$1, false)))) + $$= plugin_hton(plugin); + else + my_yyabort_error((ER_UNKNOWN_STORAGE_ENGINE, MYF(0), $1.str)); + } + ; + +row_types: + DEFAULT { $$= ROW_TYPE_DEFAULT; } + | FIXED_SYM { $$= ROW_TYPE_FIXED; } + | DYNAMIC_SYM { $$= ROW_TYPE_DYNAMIC; } + | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; } + | REDUNDANT_SYM { $$= ROW_TYPE_REDUNDANT; } + | COMPACT_SYM { $$= ROW_TYPE_COMPACT; } + | PAGE_SYM { $$= ROW_TYPE_PAGE; } + ; + +merge_insert_types: + NO_SYM { $$= MERGE_INSERT_DISABLED; } + | FIRST_SYM { $$= MERGE_INSERT_TO_FIRST; } + | LAST_SYM { $$= MERGE_INSERT_TO_LAST; } + ; + +udf_type: + STRING_SYM {$$ = (int) STRING_RESULT; } + | REAL {$$ = (int) REAL_RESULT; } + | DECIMAL_SYM {$$ = (int) DECIMAL_RESULT; } + | INT_SYM {$$ = (int) INT_RESULT; } + ; + + +create_field_list: + field_list + { + Lex->create_last_non_select_table= Lex->last_table(); + } + ; + +create_field_list_parens: + LEFT_PAREN_ALT field_list ')' + { + Lex->create_last_non_select_table= Lex->last_table(); + } + ; + +field_list: + field_list_item + | field_list ',' field_list_item + ; + +field_list_item: + column_def { } + | key_def + | constraint_def + | period_for_system_time + | PERIOD_SYM period_for_application_time { } + ; + +column_def: + field_spec + { $$= $1; } + | field_spec opt_constraint references + { + if (unlikely(Lex->add_column_foreign_key(&($1->field_name), &$2, + $3, DDL_options()))) + MYSQL_YYABORT; + $$= $1; + } + ; + +key_def: + key_or_index opt_if_not_exists opt_ident opt_USING_key_algorithm + { + Lex->option_list= NULL; + if (unlikely(Lex->add_key(Key::MULTIPLE, &$3, $4, $2))) + MYSQL_YYABORT; + } + '(' key_list ')' normal_key_options { } + | key_or_index opt_if_not_exists ident TYPE_SYM btree_or_rtree + { + Lex->option_list= NULL; + if (unlikely(Lex->add_key(Key::MULTIPLE, &$3, $5, $2))) + MYSQL_YYABORT; + } + '(' key_list ')' normal_key_options { } + | fulltext opt_key_or_index opt_if_not_exists opt_ident + { + Lex->option_list= NULL; + if (unlikely(Lex->add_key($1, &$4, HA_KEY_ALG_UNDEF, $3))) + MYSQL_YYABORT; + } + '(' key_list ')' fulltext_key_options { } + | spatial opt_key_or_index opt_if_not_exists opt_ident + { + Lex->option_list= NULL; + if (unlikely(Lex->add_key($1, &$4, HA_KEY_ALG_UNDEF, $3))) + MYSQL_YYABORT; + } + '(' key_list ')' spatial_key_options { } + | opt_constraint constraint_key_type + opt_if_not_exists opt_ident + opt_USING_key_algorithm + { + Lex->option_list= NULL; + if (unlikely(Lex->add_key($2, $4.str ? &$4 : &$1, $5, $3))) + MYSQL_YYABORT; + } + '(' key_list opt_without_overlaps ')' normal_key_options { } + | opt_constraint constraint_key_type opt_if_not_exists ident + TYPE_SYM btree_or_rtree + { + Lex->option_list= NULL; + if (unlikely(Lex->add_key($2, $4.str ? &$4 : &$1, $6, $3))) + MYSQL_YYABORT; + } + '(' key_list opt_without_overlaps ')' normal_key_options { } + | opt_constraint FOREIGN KEY_SYM opt_if_not_exists opt_ident + { + if (unlikely(Lex->check_add_key($4)) || + unlikely(!(Lex->last_key= (new (thd->mem_root) + Key(Key::MULTIPLE, + $1.str ? &$1 : &$5, + HA_KEY_ALG_UNDEF, true, $4))))) + MYSQL_YYABORT; + Lex->option_list= NULL; + } + '(' key_list ')' references + { + if (unlikely(Lex->add_table_foreign_key($5.str ? &$5 : &$1, + $1.str ? &$1 : &$5, $10, $4))) + MYSQL_YYABORT; + } + ; + +constraint_def: + opt_constraint check_constraint + { + Lex->add_constraint($1, $2, FALSE); + } + ; + +period_for_system_time: + // If FOR_SYM is followed by SYSTEM_TIME_SYM then they are merged to: FOR_SYSTEM_TIME_SYM . + PERIOD_SYM FOR_SYSTEM_TIME_SYM '(' ident ',' ident ')' + { + Vers_parse_info &info= Lex->vers_get_info(); + info.set_period($4, $6); + } + ; + +period_for_application_time: + FOR_SYM ident '(' ident ',' ident ')' + { + if (Lex->add_period($2, $4, $6)) + MYSQL_YYABORT; + } + ; + +opt_check_constraint: + /* empty */ { $$= (Virtual_column_info*) 0; } + | check_constraint { $$= $1;} + ; + +check_constraint: + CHECK_SYM '(' expr ')' + { + Virtual_column_info *v= add_virtual_expression(thd, $3); + if (unlikely(!v)) + MYSQL_YYABORT; + $$= v; + } + ; + +opt_constraint_no_id: + /* Empty */ {} + | CONSTRAINT {} + ; + +opt_constraint: + /* empty */ { $$= null_clex_str; } + | constraint { $$= $1; } + ; + +constraint: + CONSTRAINT opt_ident { $$=$2; } + ; + +field_spec: + field_ident + { + LEX *lex=Lex; + Create_field *f= new (thd->mem_root) Create_field(); + + if (unlikely(check_string_char_length(&$1, 0, NAME_CHAR_LEN, + system_charset_info, 1))) + my_yyabort_error((ER_TOO_LONG_IDENT, MYF(0), $1.str)); + + if (unlikely(!f)) + MYSQL_YYABORT; + + lex->init_last_field(f, &$1); + $$= f; + lex->parsing_options.lookup_keywords_after_qualifier= true; + } + field_type_or_serial opt_check_constraint + { + LEX *lex=Lex; + lex->parsing_options.lookup_keywords_after_qualifier= false; + $$= $2; + + $$->check_constraint= $4; + + if (unlikely($$->check(thd))) + MYSQL_YYABORT; + + lex->alter_info.create_list.push_back($$, thd->mem_root); + + $$->create_if_not_exists= Lex->check_exists; + if ($$->flags & PRI_KEY_FLAG) + lex->add_key_to_list(&$1, Key::PRIMARY, lex->check_exists); + else if ($$->flags & UNIQUE_KEY_FLAG) + lex->add_key_to_list(&$1, Key::UNIQUE, lex->check_exists); + } + ; + +field_type_or_serial: + qualified_field_type + { + Lex->last_field->set_attributes(thd, $1, + COLUMN_DEFINITION_TABLE_FIELD); + } + field_def + { + auto tmp= $1.charset_collation_attrs(); + if (tmp.merge_column_charset_clause_and_collate_clause($3)) + MYSQL_YYABORT; + Lex->last_field->set_charset_collation_attrs(tmp); + } + | SERIAL_SYM + { + Lex->last_field->set_handler(&type_handler_ulonglong); + Lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG + | UNSIGNED_FLAG | UNIQUE_KEY_FLAG; + Lex->alter_info.flags|= ALTER_ADD_INDEX; + } + opt_serial_attribute + ; + +opt_serial_attribute: + /* empty */ {} + | opt_serial_attribute_list {} + ; + +opt_serial_attribute_list: + opt_serial_attribute_list serial_attribute {} + | serial_attribute + ; + +opt_asrow_attribute: + /* empty */ {} + | opt_asrow_attribute_list {} + ; + +opt_asrow_attribute_list: + opt_asrow_attribute_list asrow_attribute {} + | asrow_attribute + ; + +field_def: + /* empty */ { $$.init(); } + | attribute_list + | attribute_list compressed_deprecated_column_attribute { $$= $1; } + | attribute_list compressed_deprecated_column_attribute attribute_list + { + if (($$= $1).merge_column_collate_clause_and_collate_clause($3)) + MYSQL_YYABORT; + } + | opt_generated_always AS virtual_column_func + { + Lex->last_field->vcol_info= $3; + } + vcol_opt_specifier vcol_opt_attribute + { + $$.init(); + } + | opt_generated_always AS ROW_SYM START_SYM opt_asrow_attribute + { + if (Lex->last_field_generated_always_as_row_start()) + MYSQL_YYABORT; + $$.init(); + } + | opt_generated_always AS ROW_SYM END opt_asrow_attribute + { + if (Lex->last_field_generated_always_as_row_end()) + MYSQL_YYABORT; + $$.init(); + } + ; + +opt_generated_always: + /* empty */ {} + | GENERATED_SYM ALWAYS_SYM {} + ; + +vcol_opt_specifier: + /* empty */ + { + Lex->last_field->vcol_info->set_stored_in_db_flag(FALSE); + } + | VIRTUAL_SYM + { + Lex->last_field->vcol_info->set_stored_in_db_flag(FALSE); + } + | PERSISTENT_SYM + { + Lex->last_field->vcol_info->set_stored_in_db_flag(TRUE); + } + | STORED_SYM + { + Lex->last_field->vcol_info->set_stored_in_db_flag(TRUE); + } + ; + +vcol_opt_attribute: + /* empty */ {} + | vcol_opt_attribute_list {} + ; + +vcol_opt_attribute_list: + vcol_opt_attribute_list vcol_attribute {} + | vcol_attribute + ; + +vcol_attribute: + UNIQUE_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= UNIQUE_KEY_FLAG; + lex->alter_info.flags|= ALTER_ADD_INDEX; + } + | UNIQUE_SYM KEY_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= UNIQUE_KEY_FLAG; + lex->alter_info.flags|= ALTER_ADD_INDEX; + } + | COMMENT_SYM TEXT_STRING_sys { Lex->last_field->comment= $2; } + | INVISIBLE_SYM + { + Lex->last_field->invisible= INVISIBLE_USER; + } + ; + +parse_vcol_expr: + PARSE_VCOL_EXPR_SYM + { + /* + "PARSE_VCOL_EXPR" can only be used by the SQL server + when reading a '*.frm' file. + Prevent the end user from invoking this command. + */ + MYSQL_YYABORT_UNLESS(Lex->parse_vcol_expr); + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + expr + { + Virtual_column_info *v= add_virtual_expression(thd, $3); + if (unlikely(!v)) + MYSQL_YYABORT; + Lex->last_field->vcol_info= v; + Lex->pop_select(); //main select + } + ; + +parenthesized_expr: + expr + | expr ',' expr_list + { + $3->push_front($1, thd->mem_root); + $$= new (thd->mem_root) Item_row(thd, *$3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +virtual_column_func: + '(' parenthesized_expr ')' + { + Virtual_column_info *v= + add_virtual_expression(thd, $2); + if (unlikely(!v)) + MYSQL_YYABORT; + $$= v; + } + | subquery + { + Item *item; + if (!(item= new (thd->mem_root) Item_singlerow_subselect(thd, $1))) + MYSQL_YYABORT; + Virtual_column_info *v= add_virtual_expression(thd, item); + if (unlikely(!v)) + MYSQL_YYABORT; + $$= v; + } + ; + +expr_or_literal: column_default_non_parenthesized_expr | signed_literal ; + +column_default_expr: + virtual_column_func + | expr_or_literal + { + if (unlikely(!($$= add_virtual_expression(thd, $1)))) + MYSQL_YYABORT; + } + ; + +field_type: field_type_all + { + Lex->map_data_type(Lex_ident_sys(), &($$= $1)); + } + ; + +qualified_field_type: + field_type_all + { + Lex->map_data_type(Lex_ident_sys(), &($$= $1)); + } + | sp_decl_ident '.' field_type_all + { + if (Lex->map_data_type($1, &($$= $3))) + MYSQL_YYABORT; + } + ; + +udt_name: + IDENT_sys { $$= $1; } + | reserved_keyword_udt { $$= $1; } + | non_reserved_keyword_udt { $$= $1; } + ; + +field_type_all: + field_type_numeric + | field_type_temporal + | field_type_string + | field_type_lob + | field_type_misc + | udt_name float_options srid_option + { + if (Lex->set_field_type_udt(&$$, $1, $2)) + MYSQL_YYABORT; + } + ; + +field_type_numeric: + int_type opt_field_length last_field_options + { + $$.set_handler_length_flags($1, $2, (uint32) $3); + } + | real_type opt_precision last_field_options { $$.set($1, $2); } + | FLOAT_SYM float_options last_field_options + { + $$.set(&type_handler_float, $2); + if ($2.has_explicit_length() && !$2.has_explicit_dec()) + { + if (unlikely($2.length() > PRECISION_FOR_DOUBLE)) + my_yyabort_error((ER_WRONG_FIELD_SPEC, MYF(0), + Lex->last_field->field_name.str)); + if ($2.length() > PRECISION_FOR_FLOAT) + $$.set(&type_handler_double); + else + $$.set(&type_handler_float); + } + } + | BIT_SYM opt_field_length + { + $$.set(&type_handler_bit, $2); + } + | BOOL_SYM + { + $$.set_handler_length(&type_handler_stiny, 1); + } + | BOOLEAN_SYM + { + $$.set_handler_length(&type_handler_stiny, 1); + } + | DECIMAL_SYM float_options last_field_options + { $$.set(&type_handler_newdecimal, $2);} + | NUMBER_ORACLE_SYM float_options last_field_options + { + if ($2.has_explicit_length()) + $$.set(&type_handler_newdecimal, $2); + else + $$.set(&type_handler_double); + } + | NUMERIC_SYM float_options last_field_options + { $$.set(&type_handler_newdecimal, $2);} + | FIXED_SYM float_options last_field_options + { $$.set(&type_handler_newdecimal, $2);} + ; + + +opt_binary_and_compression: + /* empty */ { $$.init(); } + | binary { $$= $1; } + | binary compressed_deprecated_data_type_attribute { $$= $1; } + | compressed opt_binary { $$= $2; } + ; + +field_type_string: + char opt_field_length opt_binary + { + $$.set(&type_handler_string, $2, $3); + } + | nchar opt_field_length opt_bin_mod + { + $$.set(&type_handler_string, $2, + Lex_exact_charset_extended_collation_attrs::national($3)); + } + | BINARY opt_field_length + { + $$.set(&type_handler_string, $2, &my_charset_bin); + } + | varchar opt_field_length opt_binary_and_compression + { + $$.set(&type_handler_varchar, $2, $3); + } + | VARCHAR2_ORACLE_SYM opt_field_length opt_binary_and_compression + { + $$.set(&type_handler_varchar, $2, $3); + } + | nvarchar opt_field_length opt_compressed opt_bin_mod + { + $$.set(&type_handler_varchar, $2, + Lex_exact_charset_extended_collation_attrs::national($4)); + } + | VARBINARY opt_field_length opt_compressed + { + $$.set(&type_handler_varchar, $2, &my_charset_bin); + } + | RAW_ORACLE_SYM opt_field_length opt_compressed + { + $$.set(&type_handler_varchar, $2, &my_charset_bin); + } + ; + +field_type_temporal: + YEAR_SYM opt_field_length last_field_options + { + if ($2.has_explicit_length()) + { + if ($2.length() != 4) + { + char buff[sizeof("YEAR()") + MY_INT64_NUM_DECIMAL_DIGITS + 1]; + my_snprintf(buff, sizeof(buff), "YEAR(%u)", (uint) $2.length()); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX), + buff, "YEAR(4)"); + } + } + $$.set(&type_handler_year, $2); + } + | DATE_SYM { $$.set(&type_handler_newdate); } + | TIME_SYM opt_field_length + { + $$.set(opt_mysql56_temporal_format ? + static_cast(&type_handler_time2) : + static_cast(&type_handler_time), + $2); + } + | TIMESTAMP opt_field_length + { + $$.set(opt_mysql56_temporal_format ? + static_cast(&type_handler_timestamp2): + static_cast(&type_handler_timestamp), + $2); + } + | DATETIME opt_field_length + { + $$.set(thd->type_handler_for_datetime(), $2); + } + ; + + +field_type_lob: + TINYBLOB opt_compressed + { + $$.set(&type_handler_tiny_blob, &my_charset_bin); + } + | BLOB_MARIADB_SYM opt_field_length opt_compressed + { + $$.set(&type_handler_blob, $2, &my_charset_bin); + } + | BLOB_ORACLE_SYM field_length opt_compressed + { + $$.set(&type_handler_blob, $2, &my_charset_bin); + } + | BLOB_ORACLE_SYM opt_compressed + { + $$.set(&type_handler_long_blob, &my_charset_bin); + } + | MEDIUMBLOB opt_compressed + { + $$.set(&type_handler_medium_blob, &my_charset_bin); + } + | LONGBLOB opt_compressed + { + $$.set(&type_handler_long_blob, &my_charset_bin); + } + | LONG_SYM VARBINARY opt_compressed + { + $$.set(&type_handler_medium_blob, &my_charset_bin); + } + | LONG_SYM varchar opt_binary_and_compression + { $$.set(&type_handler_medium_blob, $3); } + | TINYTEXT opt_binary_and_compression + { $$.set(&type_handler_tiny_blob, $2); } + | TEXT_SYM opt_field_length opt_binary_and_compression + { $$.set(&type_handler_blob, $2, $3); } + | MEDIUMTEXT opt_binary_and_compression + { $$.set(&type_handler_medium_blob, $2); } + | LONGTEXT opt_binary_and_compression + { $$.set(&type_handler_long_blob, $2); } + | CLOB_ORACLE_SYM opt_binary_and_compression + { $$.set(&type_handler_long_blob, $2); } + | LONG_SYM opt_binary_and_compression + { $$.set(&type_handler_medium_blob, $2); } + | JSON_SYM opt_compressed + { + $$.set(&type_handler_long_blob_json, &my_charset_utf8mb4_bin); + } + ; + +field_type_misc: + ENUM '(' string_list ')' opt_binary + { $$.set(&type_handler_enum, $5); } + | SET '(' string_list ')' opt_binary + { $$.set(&type_handler_set, $5); } + ; + +char: + CHAR_SYM {} + ; + +nchar: + NCHAR_SYM {} + | NATIONAL_SYM CHAR_SYM {} + ; + +varchar: + char VARYING {} + | VARCHAR {} + ; + +nvarchar: + NATIONAL_SYM VARCHAR {} + | NVARCHAR_SYM {} + | NCHAR_SYM VARCHAR {} + | NATIONAL_SYM CHAR_SYM VARYING {} + | NCHAR_SYM VARYING {} + ; + +int_type: + INT_SYM { $$= &type_handler_slong; } + | TINYINT { $$= &type_handler_stiny; } + | SMALLINT { $$= &type_handler_sshort; } + | MEDIUMINT { $$= &type_handler_sint24; } + | BIGINT { $$= &type_handler_slonglong; } + ; + +real_type: + REAL + { + $$= thd->variables.sql_mode & MODE_REAL_AS_FLOAT ? + static_cast(&type_handler_float) : + static_cast(&type_handler_double); + } + | DOUBLE_SYM { $$= &type_handler_double; } + | DOUBLE_SYM PRECISION { $$= &type_handler_double; } + ; + +srid_option: + /* empty */ + { Lex->last_field->srid= 0; } + | + REF_SYSTEM_ID_SYM '=' NUM + { + Lex->last_field->srid=atoi($3.str); + } + ; + +float_options: + /* empty */ { $$.reset(); } + | field_length + | precision + ; + +precision: + '(' NUM ',' NUM ')' { $$.set($2.str, $4.str); } + ; + +field_options: + /* empty */ { $$= 0; } + | SIGNED_SYM { $$= 0; } + | UNSIGNED { $$= UNSIGNED_FLAG; } + | ZEROFILL { $$= UNSIGNED_FLAG | ZEROFILL_FLAG; } + | UNSIGNED ZEROFILL { $$= UNSIGNED_FLAG | ZEROFILL_FLAG; } + | ZEROFILL UNSIGNED { $$= UNSIGNED_FLAG | ZEROFILL_FLAG; } + ; + +last_field_options: + field_options { Lex->last_field->flags|= ($$= $1); } + ; + +field_length_str: + '(' LONG_NUM ')' { $$= $2.str; } + | '(' ULONGLONG_NUM ')' { $$= $2.str; } + | '(' DECIMAL_NUM ')' { $$= $2.str; } + | '(' NUM ')' { $$= $2.str; } + ; + +field_length: field_length_str { $$.set($1, NULL); } + ; + + +field_scale: field_length_str { $$.set(NULL, $1); } + ; + + +opt_field_length: + /* empty */ { $$.reset(); /* use default length */ } + | field_length + ; + +opt_field_scale: + /* empty */ { $$.reset(); } + | field_scale + ; + +opt_precision: + /* empty */ { $$.reset(); } + | precision { $$= $1; } + ; + + +attribute_list: + attribute_list attribute + { + if (($$= $1).merge_column_collate_clause_and_collate_clause($2)) + MYSQL_YYABORT; + } + | attribute + ; + +attribute: + NULL_SYM + { + Lex->last_field->flags&= ~NOT_NULL_FLAG; + Lex->last_field->explicitly_nullable= true; + $$.init(); + } + | DEFAULT column_default_expr + { + Lex->last_field->default_value= $2; + $$.init(); + } + | ON UPDATE_SYM NOW_SYM opt_default_time_precision + { + Item *item= new (thd->mem_root) Item_func_now_local(thd, $4); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + Lex->last_field->on_update= item; + $$.init(); + } + | AUTO_INC { Lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; $$.init(); } + | SERIAL_SYM DEFAULT VALUE_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG | UNIQUE_KEY_FLAG; + lex->alter_info.flags|= ALTER_ADD_INDEX; + $$.init(); + } + | COLLATE_SYM collation_name + { + $$= Lex_exact_charset_extended_collation_attrs($2); + } + | serial_attribute { $$.init(); } + ; + +opt_compression_method: + /* empty */ { $$= NULL; } + | equal ident { $$= $2.str; } + ; + +opt_compressed: + /* empty */ {} + | compressed { } + ; + +opt_enable: + /* empty */ {} + | ENABLE_SYM { } + ; + +compressed: + COMPRESSED_SYM opt_compression_method + { + if (unlikely(Lex->last_field->set_compressed($2))) + MYSQL_YYABORT; + } + ; + +compressed_deprecated_data_type_attribute: + COMPRESSED_SYM opt_compression_method + { + if (unlikely(Lex->last_field->set_compressed_deprecated(thd, $2))) + MYSQL_YYABORT; + } + ; + +compressed_deprecated_column_attribute: + COMPRESSED_SYM opt_compression_method + { + if (unlikely(Lex->last_field-> + set_compressed_deprecated_column_attribute(thd, $1.pos(), $2))) + MYSQL_YYABORT; + } + ; + +asrow_attribute: + not NULL_SYM opt_enable + { + Lex->last_field->flags|= NOT_NULL_FLAG; + } + | opt_primary KEY_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= PRI_KEY_FLAG | NOT_NULL_FLAG; + lex->alter_info.flags|= ALTER_ADD_INDEX; + } + | vcol_attribute + ; + +serial_attribute: + asrow_attribute + | engine_defined_option + { + $1->link(&Lex->last_field->option_list, &Lex->option_list_last); + } + | with_or_without_system VERSIONING_SYM + { + Lex->last_field->versioning= $1; + Lex->create_info.options|= HA_VERSIONED_TABLE; + if (Lex->alter_info.flags & ALTER_DROP_SYSTEM_VERSIONING) + { + my_yyabort_error((ER_VERS_NOT_VERSIONED, MYF(0), + Lex->create_last_non_select_table->table_name.str)); + } + } + ; + +with_or_without_system: + WITH_SYSTEM_SYM + { + Lex->alter_info.flags|= ALTER_COLUMN_UNVERSIONED; + Lex->create_info.vers_info.versioned_fields= true; + $$= Column_definition::WITH_VERSIONING; + } + | WITHOUT SYSTEM + { + Lex->alter_info.flags|= ALTER_COLUMN_UNVERSIONED; + Lex->create_info.vers_info.unversioned_fields= true; + $$= Column_definition::WITHOUT_VERSIONING; + } + ; + + +charset: + CHAR_SYM SET { $$= $1; } + | CHARSET { $$= $1; } + ; + +charset_name: + ident_or_text + { + myf utf8_flag= thd->get_utf8_flag(); + if (unlikely(!($$=get_charset_by_csname($1.str, MY_CS_PRIMARY, + MYF(utf8_flag))))) + my_yyabort_error((ER_UNKNOWN_CHARACTER_SET, MYF(0), $1.str)); + } + | BINARY { $$= &my_charset_bin; } + ; + +charset_name_or_default: + charset_name { $$=$1; } + | DEFAULT { $$=NULL; } + ; + +opt_load_data_charset: + /* Empty */ { $$= NULL; } + | charset charset_name_or_default { $$= $2; } + ; + +old_or_new_charset_name: + ident_or_text + { + myf utf8_flag= thd->get_utf8_flag(); + if (unlikely(!($$=get_charset_by_csname($1.str, + MY_CS_PRIMARY, + MYF(utf8_flag))) && + !($$=get_old_charset_by_name($1.str)))) + my_yyabort_error((ER_UNKNOWN_CHARACTER_SET, MYF(0), $1.str)); + } + | BINARY { $$= &my_charset_bin; } + ; + +old_or_new_charset_name_or_default: + old_or_new_charset_name { $$=$1; } + | DEFAULT { $$=NULL; } + ; + +collation_name: + ident_or_text + { + if ($$.set_by_name($1.str, thd->get_utf8_flag())) + MYSQL_YYABORT; + } + ; + +collation_name_or_default: + collation_name { $$=$1; } + | DEFAULT { $$.set_collate_default(); } + ; + +opt_default: + /* empty */ {} + | DEFAULT {} + ; + +charset_or_alias: + charset charset_name { $$= $2; } + | ASCII_SYM { $$= &my_charset_latin1; } + | UNICODE_SYM + { + if (unlikely(!($$= get_charset_by_csname("ucs2", MY_CS_PRIMARY,MYF(0))))) + my_yyabort_error((ER_UNKNOWN_CHARACTER_SET, MYF(0), "ucs2")); + } + ; + +opt_binary: + /* empty */ { $$.init(); } + | binary + ; + +binary: + BYTE_SYM + { + $$.set_charset(Lex_exact_charset(&my_charset_bin)); + } + | charset_or_alias + { + $$.set_charset(Lex_exact_charset($1)); + } + | charset_or_alias BINARY + { + if ($$.set_charset_collate_binary(Lex_exact_charset($1))) + MYSQL_YYABORT; + } + | BINARY { $$.set_contextually_typed_binary_style(); } + | BINARY charset_or_alias + { + if ($$.set_charset_collate_binary(Lex_exact_charset($2))) + MYSQL_YYABORT; + } + | charset_or_alias COLLATE_SYM DEFAULT + { + $$.set_charset_collate_default(Lex_exact_charset($1)); + } + | charset_or_alias COLLATE_SYM collation_name + { + if ($3.merge_exact_charset(Lex_exact_charset($1))) + MYSQL_YYABORT; + $$= Lex_exact_charset_extended_collation_attrs($3); + } + | COLLATE_SYM collation_name + { + $$= Lex_exact_charset_extended_collation_attrs($2); + } + | COLLATE_SYM DEFAULT + { + $$.set_collate_default(); + } + ; + +opt_bin_mod: + /* empty */ { $$= false; } + | BINARY { $$= true; } + ; + +ws_nweights: + '(' real_ulong_num + { + if (unlikely($2 == 0)) + { + thd->parse_error(); + MYSQL_YYABORT; + } + } + ')' + { $$= $2; } + ; + +ws_level_flag_desc: + ASC { $$= 0; } + | DESC { $$= 1 << MY_STRXFRM_DESC_SHIFT; } + ; + +ws_level_flag_reverse: + REVERSE_SYM { $$= 1 << MY_STRXFRM_REVERSE_SHIFT; } ; + +ws_level_flags: + /* empty */ { $$= 0; } + | ws_level_flag_desc { $$= $1; } + | ws_level_flag_desc ws_level_flag_reverse { $$= $1 | $2; } + | ws_level_flag_reverse { $$= $1 ; } + ; + +ws_level_number: + real_ulong_num + { + $$= $1 < 1 ? 1 : ($1 > MY_STRXFRM_NLEVELS ? MY_STRXFRM_NLEVELS : $1); + $$--; + } + ; + +ws_level_list_item: + ws_level_number ws_level_flags + { + $$= (1 | $2) << $1; + } + ; + +ws_level_list: + ws_level_list_item { $$= $1; } + | ws_level_list ',' ws_level_list_item { $$|= $3; } + ; + +ws_level_range: + ws_level_number '-' ws_level_number + { + uint start= $1; + uint end= $3; + for ($$= 0; start <= end; start++) + $$|= (1 << start); + } + ; + +ws_level_list_or_range: + ws_level_list { $$= $1; } + | ws_level_range { $$= $1; } + ; + +opt_ws_levels: + /* empty*/ { $$= 0; } + | LEVEL_SYM ws_level_list_or_range { $$= $2; } + ; + +opt_primary: + /* empty */ + | PRIMARY_SYM + ; + +references: + REFERENCES + table_ident + opt_ref_list + opt_match_clause + opt_on_update_delete + { + $$=$2; + } + ; + +opt_ref_list: + /* empty */ + { Lex->ref_list.empty(); } + | '(' ref_list ')' + ; + +ref_list: + ref_list ',' ident + { + Key_part_spec *key= new (thd->mem_root) Key_part_spec(&$3, 0); + if (unlikely(key == NULL)) + MYSQL_YYABORT; + Lex->ref_list.push_back(key, thd->mem_root); + } + | ident + { + Key_part_spec *key= new (thd->mem_root) Key_part_spec(&$1, 0); + if (unlikely(key == NULL)) + MYSQL_YYABORT; + LEX *lex= Lex; + lex->ref_list.empty(); + lex->ref_list.push_back(key, thd->mem_root); + } + ; + +opt_match_clause: + /* empty */ + { Lex->fk_match_option= Foreign_key::FK_MATCH_UNDEF; } + | MATCH FULL + { Lex->fk_match_option= Foreign_key::FK_MATCH_FULL; } + | MATCH PARTIAL + { Lex->fk_match_option= Foreign_key::FK_MATCH_PARTIAL; } + | MATCH SIMPLE_SYM + { Lex->fk_match_option= Foreign_key::FK_MATCH_SIMPLE; } + ; + +opt_on_update_delete: + /* empty */ + { + LEX *lex= Lex; + lex->fk_update_opt= FK_OPTION_UNDEF; + lex->fk_delete_opt= FK_OPTION_UNDEF; + } + | ON UPDATE_SYM delete_option + { + LEX *lex= Lex; + lex->fk_update_opt= $3; + lex->fk_delete_opt= FK_OPTION_UNDEF; + } + | ON DELETE_SYM delete_option + { + LEX *lex= Lex; + lex->fk_update_opt= FK_OPTION_UNDEF; + lex->fk_delete_opt= $3; + } + | ON UPDATE_SYM delete_option + ON DELETE_SYM delete_option + { + LEX *lex= Lex; + lex->fk_update_opt= $3; + lex->fk_delete_opt= $6; + } + | ON DELETE_SYM delete_option + ON UPDATE_SYM delete_option + { + LEX *lex= Lex; + lex->fk_update_opt= $6; + lex->fk_delete_opt= $3; + } + ; + +delete_option: + RESTRICT { $$= FK_OPTION_RESTRICT; } + | CASCADE { $$= FK_OPTION_CASCADE; } + | SET NULL_SYM { $$= FK_OPTION_SET_NULL; } + | NO_SYM ACTION { $$= FK_OPTION_NO_ACTION; } + | SET DEFAULT { $$= FK_OPTION_SET_DEFAULT; } + ; + +constraint_key_type: + PRIMARY_SYM KEY_SYM { $$= Key::PRIMARY; } + | UNIQUE_SYM opt_key_or_index { $$= Key::UNIQUE; } + ; + +key_or_index: + KEY_SYM {} + | INDEX_SYM {} + ; + +opt_key_or_index: + /* empty */ {} + | key_or_index + ; + +keys_or_index: + KEYS {} + | INDEX_SYM {} + | INDEXES {} + ; + +fulltext: + FULLTEXT_SYM { $$= Key::FULLTEXT;} + ; + +spatial: + SPATIAL_SYM + { +#ifdef HAVE_SPATIAL + $$= Key::SPATIAL; +#else + my_yyabort_error((ER_FEATURE_DISABLED, MYF(0), sym_group_geom.name, + sym_group_geom.needed_define)); +#endif + } + ; + +normal_key_options: + /* empty */ {} + | normal_key_opts { Lex->last_key->option_list= Lex->option_list; } + ; + +fulltext_key_options: + /* empty */ {} + | fulltext_key_opts { Lex->last_key->option_list= Lex->option_list; } + ; + +spatial_key_options: + /* empty */ {} + | spatial_key_opts { Lex->last_key->option_list= Lex->option_list; } + ; + +normal_key_opts: + normal_key_opt + | normal_key_opts normal_key_opt + ; + +spatial_key_opts: + spatial_key_opt + | spatial_key_opts spatial_key_opt + ; + +fulltext_key_opts: + fulltext_key_opt + | fulltext_key_opts fulltext_key_opt + ; + +opt_USING_key_algorithm: + /* Empty*/ { $$= HA_KEY_ALG_UNDEF; } + | USING btree_or_rtree { $$= $2; } + ; + +/* TYPE is a valid identifier, so it's handled differently than USING */ +opt_key_algorithm_clause: + /* Empty*/ { $$= HA_KEY_ALG_UNDEF; } + | USING btree_or_rtree { $$= $2; } + | TYPE_SYM btree_or_rtree { $$= $2; } + ; + +key_using_alg: + USING btree_or_rtree + { Lex->last_key->key_create_info.algorithm= $2; } + | TYPE_SYM btree_or_rtree + { Lex->last_key->key_create_info.algorithm= $2; } + ; + +all_key_opt: + KEY_BLOCK_SIZE opt_equal ulong_num + { + Lex->last_key->key_create_info.block_size= $3; + Lex->last_key->key_create_info.flags|= HA_USES_BLOCK_SIZE; + } + | COMMENT_SYM TEXT_STRING_sys + { Lex->last_key->key_create_info.comment= $2; } + | VISIBLE_SYM + { + /* This is mainly for MySQL 8.0 compatibility */ + } + | ignorability + { + Lex->last_key->key_create_info.is_ignored= $1; + } + | engine_defined_option + { + $1->link(&Lex->option_list, &Lex->option_list_last); + } + ; + +normal_key_opt: + all_key_opt + | key_using_alg + ; + +spatial_key_opt: + all_key_opt + ; + +fulltext_key_opt: + all_key_opt + | WITH PARSER_SYM IDENT_sys + { + if (likely(plugin_is_ready(&$3, MYSQL_FTPARSER_PLUGIN))) + Lex->last_key->key_create_info.parser_name= $3; + else + my_yyabort_error((ER_FUNCTION_NOT_DEFINED, MYF(0), $3.str)); + } + ; + +btree_or_rtree: + BTREE_SYM { $$= HA_KEY_ALG_BTREE; } + | RTREE_SYM { $$= HA_KEY_ALG_RTREE; } + | HASH_SYM { $$= HA_KEY_ALG_HASH; } + ; + +ignorability: + IGNORED_SYM { $$= true; } + | NOT_SYM IGNORED_SYM { $$= false; } + ; + +key_list: + key_list ',' key_part order_dir + { + $3->asc= $4; + Lex->last_key->columns.push_back($3, thd->mem_root); + } + | key_part order_dir + { + $1->asc= $2; + Lex->last_key->columns.push_back($1, thd->mem_root); + } + ; + +opt_without_overlaps: + /* nothing */ {} + | ',' ident WITHOUT OVERLAPS_SYM + { + Lex->last_key->without_overlaps= true; + Lex->last_key->period= $2; + } + ; + +key_part: + ident + { + $$= new (thd->mem_root) Key_part_spec(&$1, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ident '(' NUM ')' + { + int key_part_len= atoi($3.str); + if (unlikely(!key_part_len)) + my_yyabort_error((ER_KEY_PART_0, MYF(0), $1.str)); + $$= new (thd->mem_root) Key_part_spec(&$1, (uint) key_part_len); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +opt_ident: + /* empty */ { $$= null_clex_str; } + | field_ident { $$= $1; } + ; + +string_list: + text_string + { Lex->last_field->interval_list.push_back($1, thd->mem_root); } + | string_list ',' text_string + { Lex->last_field->interval_list.push_back($3, thd->mem_root); } + ; + +/* +** Alter table +*/ + +alter: + ALTER + { + Lex->name= null_clex_str; + Lex->table_type= TABLE_TYPE_UNKNOWN; + Lex->sql_command= SQLCOM_ALTER_TABLE; + Lex->duplicates= DUP_ERROR; + Lex->first_select_lex()->order_list.empty(); + Lex->create_info.init(); + Lex->create_info.row_type= ROW_TYPE_NOT_USED; + Lex->alter_info.reset(); + Lex->no_write_to_binlog= 0; + Lex->create_info.storage_media= HA_SM_DEFAULT; + if (Lex->main_select_push()) + MYSQL_YYABORT; + DBUG_ASSERT(!Lex->m_sql_cmd); + } + alter_options TABLE_SYM opt_if_exists table_ident opt_lock_wait_timeout + { + Lex->create_info.set($5); + if (!Lex->first_select_lex()-> + add_table_to_list(thd, $6, NULL, TL_OPTION_UPDATING, + TL_READ_NO_INSERT, MDL_SHARED_UPGRADABLE)) + MYSQL_YYABORT; + Lex->first_select_lex()->db= + (Lex->first_select_lex()->table_list.first)->db; + Lex->create_last_non_select_table= Lex->last_table(); + Lex->mark_first_table_as_inserting(); + } + alter_commands + { + if (likely(!Lex->m_sql_cmd)) + { + /* Create a generic ALTER TABLE statment. */ + Lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_alter_table(); + if (unlikely(Lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + Lex->pop_select(); //main select + } + | ALTER DATABASE ident_or_empty + { + Lex->create_info.init(); + if (Lex->main_select_push(true)) + MYSQL_YYABORT; + } + create_database_options + { + LEX *lex=Lex; + lex->sql_command=SQLCOM_ALTER_DB; + lex->name= $3; + if (lex->name.str == NULL && + unlikely(lex->copy_db_to(&lex->name))) + MYSQL_YYABORT; + Lex->pop_select(); //main select + } + | ALTER DATABASE COMMENT_SYM opt_equal TEXT_STRING_sys + { + Lex->create_info.init(); + Lex->create_info.schema_comment= thd->make_clex_string($5); + Lex->create_info.used_fields|= HA_CREATE_USED_COMMENT; + } + opt_create_database_options + { + LEX *lex=Lex; + lex->sql_command=SQLCOM_ALTER_DB; + lex->name= Lex_ident_sys(); + if (lex->name.str == NULL && + unlikely(lex->copy_db_to(&lex->name))) + MYSQL_YYABORT; + } + | ALTER DATABASE ident UPGRADE_SYM DATA_SYM DIRECTORY_SYM NAME_SYM + { + LEX *lex= Lex; + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_NO_DROP_SP, MYF(0), "DATABASE")); + lex->sql_command= SQLCOM_ALTER_DB_UPGRADE; + lex->name= $3; + } + | ALTER PROCEDURE_SYM sp_name + { + if (Lex->stmt_alter_procedure_start($3)) + MYSQL_YYABORT; + } + sp_a_chistics + stmt_end {} + | ALTER FUNCTION_SYM sp_name + { + if (Lex->stmt_alter_function_start($3)) + MYSQL_YYABORT; + } + sp_a_chistics + stmt_end {} + | ALTER view_algorithm definer_opt opt_view_suid VIEW_SYM table_ident + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + if (Lex->add_alter_view(thd, $2, $4, $6)) + MYSQL_YYABORT; + } + view_list_opt AS view_select stmt_end {} + | ALTER definer_opt opt_view_suid VIEW_SYM table_ident + /* + We have two separate rules for ALTER VIEW rather that + optional view_algorithm above, to resolve the ambiguity + with the ALTER EVENT below. + */ + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + if (Lex->add_alter_view(thd, VIEW_ALGORITHM_INHERIT, $3, $5)) + MYSQL_YYABORT; + } + view_list_opt AS view_select stmt_end {} + | ALTER definer_opt remember_name EVENT_SYM sp_name + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + /* + It is safe to use Lex->spname because + ALTER EVENT xxx RENATE TO yyy DO ALTER EVENT RENAME TO + is not allowed. Lex->spname is used in the case of RENAME TO + If it had to be supported spname had to be added to + Event_parse_data. + */ + + if (unlikely(!(Lex->event_parse_data= Event_parse_data::new_instance(thd)))) + MYSQL_YYABORT; + Lex->event_parse_data->identifier= $5; + + Lex->sql_command= SQLCOM_ALTER_EVENT; + Lex->stmt_definition_begin= $3; + } + ev_alter_on_schedule_completion + opt_ev_rename_to + opt_ev_status + opt_ev_comment + opt_ev_sql_stmt + { + if (unlikely(!($7 || $8 || $9 || $10 || $11))) + { + thd->parse_error(); + MYSQL_YYABORT; + } + /* + sql_command is set here because some rules in ev_sql_stmt + can overwrite it + */ + Lex->sql_command= SQLCOM_ALTER_EVENT; + Lex->stmt_definition_end= (char*)YYLIP->get_cpp_ptr(); + + Lex->pop_select(); //main select + } + | ALTER SERVER_SYM ident_or_text + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_ALTER_SERVER; + lex->server_options.reset($3); + } OPTIONS_SYM '(' server_options_list ')' { } + /* ALTER USER foo is allowed for MySQL compatibility. */ + | ALTER USER_SYM opt_if_exists clear_privileges grant_list + opt_require_clause opt_resource_options opt_account_locking_and_opt_password_expiration + { + Lex->create_info.set($3); + Lex->sql_command= SQLCOM_ALTER_USER; + } + | ALTER SEQUENCE_SYM opt_if_exists + { + LEX *lex= Lex; + lex->name= null_clex_str; + lex->table_type= TABLE_TYPE_UNKNOWN; + lex->sql_command= SQLCOM_ALTER_SEQUENCE; + lex->create_info.init(); + lex->no_write_to_binlog= 0; + DBUG_ASSERT(!lex->m_sql_cmd); + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + table_ident + { + LEX *lex= Lex; + if (!(lex->create_info.seq_create_info= new (thd->mem_root) + sequence_definition()) || + !lex->first_select_lex()-> + add_table_to_list(thd, $5, NULL, TL_OPTION_SEQUENCE, + TL_WRITE, MDL_EXCLUSIVE)) + MYSQL_YYABORT; + } + sequence_defs + { + /* Create a generic ALTER SEQUENCE statment. */ + Lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_alter_sequence($3); + if (unlikely(Lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } stmt_end {} + ; + +account_locking_option: + LOCK_SYM + { + Lex->account_options.account_locked= ACCOUNTLOCK_LOCKED; + } + | UNLOCK_SYM + { + Lex->account_options.account_locked= ACCOUNTLOCK_UNLOCKED; + } + ; + +opt_password_expire_option: + /* empty */ + { + Lex->account_options.password_expire= PASSWORD_EXPIRE_NOW; + } + | NEVER_SYM + { + Lex->account_options.password_expire= PASSWORD_EXPIRE_NEVER; + } + | DEFAULT + { + Lex->account_options.password_expire= PASSWORD_EXPIRE_DEFAULT; + } + | INTERVAL_SYM NUM DAY_SYM + { + Lex->account_options.password_expire= PASSWORD_EXPIRE_INTERVAL; + if (!(Lex->account_options.num_expiration_days= atoi($2.str))) + my_yyabort_error((ER_WRONG_VALUE, MYF(0), "DAY", $2.str)); + } + ; + +opt_account_locking_and_opt_password_expiration: + /* empty */ + | ACCOUNT_SYM account_locking_option + | PASSWORD_SYM EXPIRE_SYM opt_password_expire_option + | ACCOUNT_SYM account_locking_option PASSWORD_SYM EXPIRE_SYM opt_password_expire_option + | PASSWORD_SYM EXPIRE_SYM opt_password_expire_option ACCOUNT_SYM account_locking_option + ; + +ev_alter_on_schedule_completion: + /* empty */ { $$= 0;} + | ON SCHEDULE_SYM ev_schedule_time { $$= 1; } + | ev_on_completion { $$= 1; } + | ON SCHEDULE_SYM ev_schedule_time ev_on_completion { $$= 1; } + ; + +opt_ev_rename_to: + /* empty */ { $$= 0;} + | RENAME TO_SYM sp_name + { + /* + Use lex's spname to hold the new name. + The original name is in the Event_parse_data object + */ + Lex->spname= $3; + $$= 1; + } + ; + +opt_ev_sql_stmt: + /* empty*/ { $$= 0;} + | DO_SYM ev_sql_stmt { $$= 1; } + ; + +ident_or_empty: + /* empty */ + %prec PREC_BELOW_IDENTIFIER_OPT_SPECIAL_CASE { $$= Lex_ident_sys(); } + | ident + ; + +alter_commands: + /* empty */ + | DISCARD TABLESPACE + { + Lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_discard_import_tablespace( + Sql_cmd_discard_import_tablespace::DISCARD_TABLESPACE); + if (unlikely(Lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + | IMPORT TABLESPACE + { + Lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_discard_import_tablespace( + Sql_cmd_discard_import_tablespace::IMPORT_TABLESPACE); + if (unlikely(Lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + | alter_list + opt_partitioning + | alter_list + remove_partitioning + | remove_partitioning + | partitioning +/* + This part was added for release 5.1 by Mikael Ronstrm. + From here we insert a number of commands to manage the partitions of a + partitioned table such as adding partitions, dropping partitions, + reorganising partitions in various manners. In future releases the list + will be longer. +*/ + | add_partition_rule + | DROP PARTITION_SYM opt_if_exists alt_part_name_list + { + Lex->alter_info.partition_flags|= ALTER_PARTITION_DROP; + DBUG_ASSERT(!Lex->if_exists()); + Lex->create_info.add($3); + } + | REBUILD_SYM PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= Lex; + lex->alter_info.partition_flags|= ALTER_PARTITION_REBUILD; + lex->no_write_to_binlog= $3; + } + | OPTIMIZE PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= thd->lex; + lex->no_write_to_binlog= $3; + lex->check_opt.init(); + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_alter_table_optimize_partition(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + opt_no_write_to_binlog + | ANALYZE_SYM PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= thd->lex; + lex->no_write_to_binlog= $3; + lex->check_opt.init(); + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_alter_table_analyze_partition(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + | CHECK_SYM PARTITION_SYM all_or_alt_part_name_list + { + LEX *lex= thd->lex; + lex->check_opt.init(); + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_alter_table_check_partition(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + opt_mi_check_type + | REPAIR PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= thd->lex; + lex->no_write_to_binlog= $3; + lex->check_opt.init(); + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_alter_table_repair_partition(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + opt_mi_repair_type + | COALESCE PARTITION_SYM opt_no_write_to_binlog real_ulong_num + { + LEX *lex= Lex; + lex->alter_info.partition_flags|= ALTER_PARTITION_COALESCE; + lex->no_write_to_binlog= $3; + lex->alter_info.num_parts= $4; + } + | TRUNCATE_SYM PARTITION_SYM all_or_alt_part_name_list + { + LEX *lex= thd->lex; + lex->check_opt.init(); + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_alter_table_truncate_partition(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + | reorg_partition_rule + | EXCHANGE_SYM PARTITION_SYM alt_part_name_item + WITH TABLE_SYM table_ident have_partitioning + { + if (Lex->stmt_alter_table_exchange_partition($6)) + MYSQL_YYABORT; + } + | CONVERT_SYM PARTITION_SYM alt_part_name_item + TO_SYM TABLE_SYM table_ident have_partitioning + { + LEX *lex= Lex; + if (Lex->stmt_alter_table($6)) + MYSQL_YYABORT; + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_alter_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + lex->alter_info.partition_flags|= ALTER_PARTITION_CONVERT_OUT; + } + | CONVERT_SYM TABLE_SYM table_ident + { + LEX *lex= Lex; + if (!lex->first_select_lex()->add_table_to_list(thd, $3, nullptr, 0, + TL_READ_NO_INSERT, + MDL_SHARED_NO_WRITE)) + MYSQL_YYABORT; + + /* + This will appear as (new_db, new_name) in alter_ctx. + new_db will be IX-locked and new_name X-locked. + */ + lex->first_select_lex()->db= $3->db; + lex->name= $3->table; + if (lex->first_select_lex()->db.str == NULL && + lex->copy_db_to(&lex->first_select_lex()->db)) + MYSQL_YYABORT; + + lex->part_info= new (thd->mem_root) partition_info(); + if (unlikely(!lex->part_info)) + MYSQL_YYABORT; + + lex->part_info->num_parts= 1; + /* + OR-ed with ALTER_PARTITION_ADD because too many checks of + ALTER_PARTITION_ADD required. + */ + lex->alter_info.partition_flags|= ALTER_PARTITION_ADD | + ALTER_PARTITION_CONVERT_IN; + } + TO_SYM PARTITION_SYM part_definition + { + LEX *lex= Lex; + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_alter_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + ; + +remove_partitioning: + REMOVE_SYM PARTITIONING_SYM + { + Lex->alter_info.partition_flags|= ALTER_PARTITION_REMOVE; + } + ; + +all_or_alt_part_name_list: + ALL + { + Lex->alter_info.partition_flags|= ALTER_PARTITION_ALL; + } + | alt_part_name_list + ; + +add_partition_rule: + ADD PARTITION_SYM opt_if_not_exists + opt_no_write_to_binlog + { + LEX *lex= Lex; + lex->part_info= new (thd->mem_root) partition_info(); + if (unlikely(!lex->part_info)) + MYSQL_YYABORT; + + lex->alter_info.partition_flags|= ALTER_PARTITION_ADD; + DBUG_ASSERT(!Lex->create_info.if_not_exists()); + lex->create_info.set($3); + lex->no_write_to_binlog= $4; + } + add_part_extra + {} + ; + +add_part_extra: + /* empty */ + | '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->num_parts= lex->part_info->partitions.elements; + } + | PARTITIONS_SYM real_ulong_num + { + Lex->part_info->num_parts= $2; + } + ; + +reorg_partition_rule: + REORGANIZE_SYM PARTITION_SYM opt_no_write_to_binlog + { + LEX *lex= Lex; + lex->part_info= new (thd->mem_root) partition_info(); + if (unlikely(!lex->part_info)) + MYSQL_YYABORT; + + lex->no_write_to_binlog= $3; + } + reorg_parts_rule + ; + +reorg_parts_rule: + /* empty */ + { + Lex->alter_info.partition_flags|= ALTER_PARTITION_TABLE_REORG; + } + | alt_part_name_list + { + Lex->alter_info.partition_flags|= ALTER_PARTITION_REORGANIZE; + } + INTO '(' part_def_list ')' + { + partition_info *part_info= Lex->part_info; + part_info->num_parts= part_info->partitions.elements; + } + ; + +alt_part_name_list: + alt_part_name_item {} + | alt_part_name_list ',' alt_part_name_item {} + ; + +alt_part_name_item: + ident + { + if (unlikely(Lex->alter_info.partition_names.push_back($1.str, + thd->mem_root))) + MYSQL_YYABORT; + } + ; + +/* + End of management of partition commands +*/ + +alter_list: + alter_list_item + | alter_list ',' alter_list_item + ; + +add_column: + ADD opt_column opt_if_not_exists_table_element + ; + +alter_list_item: + add_column column_def opt_place + { + LEX *lex=Lex; + lex->create_last_non_select_table= lex->last_table(); + lex->alter_info.flags|= ALTER_PARSER_ADD_COLUMN; + $2->after= $3; + } + | ADD key_def + { + Lex->create_last_non_select_table= Lex->last_table(); + Lex->alter_info.flags|= ALTER_ADD_INDEX; + } + | ADD period_for_system_time + { + Lex->alter_info.flags|= ALTER_ADD_PERIOD; + } + | ADD + PERIOD_SYM opt_if_not_exists_table_element period_for_application_time + { + Table_period_info &period= Lex->create_info.period_info; + period.create_if_not_exists= Lex->check_exists; + Lex->alter_info.flags|= ALTER_ADD_CHECK_CONSTRAINT; + } + | add_column '(' create_field_list ')' + { + LEX *lex=Lex; + lex->alter_info.flags|= ALTER_PARSER_ADD_COLUMN; + if (!lex->alter_info.key_list.is_empty()) + lex->alter_info.flags|= ALTER_ADD_INDEX; + } + | ADD constraint_def + { + Lex->alter_info.flags|= ALTER_ADD_CHECK_CONSTRAINT; + } + | ADD CONSTRAINT IF_SYM not EXISTS field_ident check_constraint + { + Lex->alter_info.flags|= ALTER_ADD_CHECK_CONSTRAINT; + Lex->add_constraint($6, $7, TRUE); + } + | CHANGE opt_column opt_if_exists_table_element field_ident + field_spec opt_place + { + Lex->alter_info.flags|= ALTER_CHANGE_COLUMN | ALTER_RENAME_COLUMN; + Lex->create_last_non_select_table= Lex->last_table(); + $5->change= $4; + $5->after= $6; + } + | MODIFY_SYM opt_column opt_if_exists_table_element + field_spec opt_place + { + Lex->alter_info.flags|= ALTER_CHANGE_COLUMN; + Lex->create_last_non_select_table= Lex->last_table(); + $4->change= $4->field_name; + $4->after= $5; + } + | DROP opt_column opt_if_exists_table_element field_ident opt_restrict + { + LEX *lex=Lex; + Alter_drop *ad= (new (thd->mem_root) + Alter_drop(Alter_drop::COLUMN, $4.str, $3)); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + lex->alter_info.drop_list.push_back(ad, thd->mem_root); + lex->alter_info.flags|= ALTER_PARSER_DROP_COLUMN; + } + | DROP CONSTRAINT opt_if_exists_table_element field_ident + { + LEX *lex=Lex; + Alter_drop *ad= (new (thd->mem_root) + Alter_drop(Alter_drop::CHECK_CONSTRAINT, + $4.str, $3)); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + lex->alter_info.drop_list.push_back(ad, thd->mem_root); + lex->alter_info.flags|= ALTER_DROP_CHECK_CONSTRAINT; + } + | DROP FOREIGN KEY_SYM opt_if_exists_table_element field_ident + { + LEX *lex=Lex; + Alter_drop *ad= (new (thd->mem_root) + Alter_drop(Alter_drop::FOREIGN_KEY, $5.str, $4)); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + lex->alter_info.drop_list.push_back(ad, thd->mem_root); + lex->alter_info.flags|= ALTER_DROP_FOREIGN_KEY; + } + | DROP opt_constraint_no_id PRIMARY_SYM KEY_SYM + { + LEX *lex=Lex; + Alter_drop *ad= (new (thd->mem_root) + Alter_drop(Alter_drop::KEY, primary_key_name.str, + FALSE)); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + lex->alter_info.drop_list.push_back(ad, thd->mem_root); + lex->alter_info.flags|= ALTER_DROP_INDEX; + } + | DROP key_or_index opt_if_exists_table_element field_ident + { + LEX *lex=Lex; + Alter_drop *ad= (new (thd->mem_root) + Alter_drop(Alter_drop::KEY, $4.str, $3)); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + lex->alter_info.drop_list.push_back(ad, thd->mem_root); + lex->alter_info.flags|= ALTER_DROP_INDEX; + } + | DISABLE_SYM KEYS + { + LEX *lex=Lex; + lex->alter_info.keys_onoff= Alter_info::DISABLE; + lex->alter_info.flags|= ALTER_KEYS_ONOFF; + } + | ENABLE_SYM KEYS + { + LEX *lex=Lex; + lex->alter_info.keys_onoff= Alter_info::ENABLE; + lex->alter_info.flags|= ALTER_KEYS_ONOFF; + } + | ALTER opt_column opt_if_exists_table_element field_ident SET DEFAULT column_default_expr + { + if (check_expression($7, &$4, VCOL_DEFAULT)) + MYSQL_YYABORT; + if (unlikely(Lex->add_alter_list($4, $7, $3))) + MYSQL_YYABORT; + } + | ALTER key_or_index opt_if_exists_table_element ident ignorability + { + LEX *lex= Lex; + Alter_index_ignorability *ac= new (thd->mem_root) + Alter_index_ignorability($4.str, $5, $3); + if (ac == NULL) + MYSQL_YYABORT; + lex->alter_info.alter_index_ignorability_list.push_back(ac); + lex->alter_info.flags|= ALTER_INDEX_IGNORABILITY; + } + | ALTER opt_column opt_if_exists_table_element field_ident DROP DEFAULT + { + if (unlikely(Lex->add_alter_list($4, (Virtual_column_info*) 0, $3))) + MYSQL_YYABORT; + } + | RENAME opt_to table_ident + { + if (Lex->stmt_alter_table($3)) + MYSQL_YYABORT; + Lex->alter_info.flags|= ALTER_RENAME; + } + | RENAME COLUMN_SYM opt_if_exists_table_element ident TO_SYM ident + { + if (unlikely(Lex->add_alter_list($4, $6, $3))) + MYSQL_YYABORT; + } + | RENAME key_or_index opt_if_exists_table_element field_ident TO_SYM field_ident + { + LEX *lex=Lex; + Alter_rename_key *ak= new (thd->mem_root) + Alter_rename_key($4, $6, $3); + if (ak == NULL) + MYSQL_YYABORT; + lex->alter_info.alter_rename_key_list.push_back(ak); + lex->alter_info.flags|= ALTER_RENAME_INDEX; + } + | CONVERT_SYM TO_SYM charset charset_name_or_default + { + if (Lex->add_alter_list_item_convert_to_charset($4)) + MYSQL_YYABORT; + } + | CONVERT_SYM TO_SYM charset charset_name_or_default + COLLATE_SYM collation_name_or_default + { + if (Lex->add_alter_list_item_convert_to_charset($4, $6)) + MYSQL_YYABORT; + } + | create_table_options_space_separated + { + LEX *lex=Lex; + lex->alter_info.flags|= ALTER_OPTIONS; + } + | FORCE_SYM + { + Lex->alter_info.flags|= ALTER_RECREATE; + } + | alter_order_clause + { + LEX *lex=Lex; + lex->alter_info.flags|= ALTER_ORDER; + } + | alter_algorithm_option + | alter_lock_option + | ADD SYSTEM VERSIONING_SYM + { + Lex->alter_info.flags|= ALTER_ADD_SYSTEM_VERSIONING; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } + | DROP SYSTEM VERSIONING_SYM + { + Lex->alter_info.flags|= ALTER_DROP_SYSTEM_VERSIONING; + Lex->create_info.options&= ~HA_VERSIONED_TABLE; + } + | DROP PERIOD_SYM FOR_SYSTEM_TIME_SYM + { + Lex->alter_info.flags|= ALTER_DROP_PERIOD; + } + | DROP PERIOD_SYM opt_if_exists_table_element FOR_SYM ident + { + Alter_drop *ad= new Alter_drop(Alter_drop::PERIOD, $5.str, $3); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + Lex->alter_info.drop_list.push_back(ad, thd->mem_root); + Lex->alter_info.flags|= ALTER_DROP_CHECK_CONSTRAINT; + } + ; + +opt_index_lock_algorithm: + /* empty */ + | alter_lock_option + | alter_algorithm_option + | alter_lock_option alter_algorithm_option + | alter_algorithm_option alter_lock_option + ; + +alter_algorithm_option: + ALGORITHM_SYM opt_equal DEFAULT + { + Lex->alter_info.set_requested_algorithm( + Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT); + } + | ALGORITHM_SYM opt_equal ident + { + if (unlikely(Lex->alter_info.set_requested_algorithm(&$3))) + my_yyabort_error((ER_UNKNOWN_ALTER_ALGORITHM, MYF(0), $3.str)); + } + ; + +alter_lock_option: + LOCK_SYM opt_equal DEFAULT + { + Lex->alter_info.requested_lock= + Alter_info::ALTER_TABLE_LOCK_DEFAULT; + } + | LOCK_SYM opt_equal ident + { + if (unlikely(Lex->alter_info.set_requested_lock(&$3))) + my_yyabort_error((ER_UNKNOWN_ALTER_LOCK, MYF(0), $3.str)); + } + ; + +opt_column: + /* empty */ {} %prec PREC_BELOW_IDENTIFIER_OPT_SPECIAL_CASE + | COLUMN_SYM {} + ; + +opt_ignore: + /* empty */ { Lex->ignore= 0;} + | IGNORE_SYM { Lex->ignore= 1;} + ; + +alter_options: + { Lex->ignore= 0;} alter_options_part2 + ; + +alter_options_part2: + /* empty */ + | alter_option_list + ; + +alter_option_list: + alter_option_list alter_option + | alter_option + ; + +alter_option: + IGNORE_SYM { Lex->ignore= 1;} + | ONLINE_SYM + { + Lex->alter_info.requested_lock= + Alter_info::ALTER_TABLE_LOCK_NONE; + } + ; + +opt_restrict: + /* empty */ { Lex->drop_mode= DROP_DEFAULT; } + | RESTRICT { Lex->drop_mode= DROP_RESTRICT; } + | CASCADE { Lex->drop_mode= DROP_CASCADE; } + ; + +opt_place: + /* empty */ { $$= null_clex_str; } + | AFTER_SYM ident + { + $$= $2; + Lex->alter_info.flags |= ALTER_COLUMN_ORDER; + } + | FIRST_SYM + { + $$.str= first_keyword; + $$.length= 5; /* Length of "first" */ + Lex->alter_info.flags |= ALTER_COLUMN_ORDER; + } + ; + +opt_to: + /* empty */ {} + | TO_SYM {} + | '=' {} + | AS {} + ; + +slave: + START_SYM SLAVE optional_connection_name slave_thread_opts optional_for_channel + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_SLAVE_START; + lex->type = 0; + /* If you change this code don't forget to update SLAVE START too */ + } + slave_until + {} + | START_SYM ALL SLAVES slave_thread_opts + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_SLAVE_ALL_START; + lex->type = 0; + /* If you change this code don't forget to update STOP SLAVE too */ + } + {} + | STOP_SYM SLAVE optional_connection_name slave_thread_opts optional_for_channel + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_SLAVE_STOP; + lex->type = 0; + /* If you change this code don't forget to update SLAVE STOP too */ + } + | STOP_SYM ALL SLAVES slave_thread_opts + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_SLAVE_ALL_STOP; + lex->type = 0; + /* If you change this code don't forget to update SLAVE STOP too */ + } + ; + +start: + START_SYM TRANSACTION_SYM opt_start_transaction_option_list + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_BEGIN; + /* READ ONLY and READ WRITE are mutually exclusive. */ + if (unlikely(($3 & MYSQL_START_TRANS_OPT_READ_WRITE) && + ($3 & MYSQL_START_TRANS_OPT_READ_ONLY))) + { + thd->parse_error(); + MYSQL_YYABORT; + } + lex->start_transaction_opt= $3; + } + ; + +opt_start_transaction_option_list: + /* empty */ + { + $$= 0; + } + | start_transaction_option_list + { + $$= $1; + } + ; + +start_transaction_option_list: + start_transaction_option + { + $$= $1; + } + | start_transaction_option_list ',' start_transaction_option + { + $$= $1 | $3; + } + ; + +start_transaction_option: + WITH CONSISTENT_SYM SNAPSHOT_SYM + { + $$= MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT; + } + | READ_SYM ONLY_SYM + { + $$= MYSQL_START_TRANS_OPT_READ_ONLY; + } + | READ_SYM WRITE_SYM + { + $$= MYSQL_START_TRANS_OPT_READ_WRITE; + } + ; + +slave_thread_opts: + { Lex->slave_thd_opt= 0; } + slave_thread_opt_list + {} + ; + +slave_thread_opt_list: + slave_thread_opt + | slave_thread_opt_list ',' slave_thread_opt + ; + +slave_thread_opt: + /*empty*/ {} + | SQL_THREAD { Lex->slave_thd_opt|=SLAVE_SQL; } + | RELAY_THREAD { Lex->slave_thd_opt|=SLAVE_IO; } + ; + +slave_until: + /*empty*/ {} + | UNTIL_SYM slave_until_opts + { + LEX *lex=Lex; + if (unlikely(((lex->mi.log_file_name || lex->mi.pos) && + (lex->mi.relay_log_name || lex->mi.relay_log_pos)) || + !((lex->mi.log_file_name && lex->mi.pos) || + (lex->mi.relay_log_name && lex->mi.relay_log_pos)))) + my_yyabort_error((ER_BAD_SLAVE_UNTIL_COND, MYF(0))); + } + | UNTIL_SYM MASTER_GTID_POS_SYM '=' TEXT_STRING_sys + { + Lex->mi.gtid_pos_str = $4; + } + ; + +slave_until_opts: + master_file_def + | slave_until_opts ',' master_file_def + ; + +checksum: + CHECKSUM_SYM table_or_tables + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_CHECKSUM; + /* Will be overridden during execution. */ + YYPS->m_lock_type= TL_UNLOCK; + } + table_list opt_checksum_type + {} + ; + +opt_checksum_type: + /* nothing */ { Lex->check_opt.flags= 0; } + | QUICK { Lex->check_opt.flags= T_QUICK; } + | EXTENDED_SYM { Lex->check_opt.flags= T_EXTEND; } + ; + +repair_table_or_view: + table_or_tables table_list opt_mi_repair_type + | VIEW_SYM + { Lex->table_type= TABLE_TYPE_VIEW; } + table_list opt_view_repair_type + ; + +repair: + REPAIR opt_no_write_to_binlog + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_REPAIR; + lex->no_write_to_binlog= $2; + lex->check_opt.init(); + lex->alter_info.reset(); + /* Will be overridden during execution. */ + YYPS->m_lock_type= TL_UNLOCK; + } + repair_table_or_view + { + LEX* lex= thd->lex; + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_repair_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + ; + +opt_mi_repair_type: + /* empty */ { Lex->check_opt.flags = T_MEDIUM; } + | mi_repair_types {} + ; + +mi_repair_types: + mi_repair_type {} + | mi_repair_type mi_repair_types {} + ; + +mi_repair_type: + QUICK { Lex->check_opt.flags|= T_QUICK; } + | EXTENDED_SYM { Lex->check_opt.flags|= T_EXTEND; } + | USE_FRM { Lex->check_opt.sql_flags|= TT_USEFRM; } + ; + +opt_view_repair_type: + /* empty */ { } + | FOR_SYM UPGRADE_SYM { Lex->check_opt.sql_flags|= TT_FOR_UPGRADE; } + | FROM MYSQL_SYM { Lex->check_opt.sql_flags|= TT_FROM_MYSQL; } + ; + +analyze: + ANALYZE_SYM opt_no_write_to_binlog table_or_tables + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_ANALYZE; + lex->no_write_to_binlog= $2; + lex->check_opt.init(); + lex->alter_info.reset(); + /* Will be overridden during execution. */ + YYPS->m_lock_type= TL_UNLOCK; + } + analyze_table_list + { + LEX* lex= thd->lex; + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_analyze_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + ; + +analyze_table_list: + analyze_table_elem_spec + | analyze_table_list ',' analyze_table_elem_spec + ; + +analyze_table_elem_spec: + table_name opt_persistent_stat_clause + ; + +opt_persistent_stat_clause: + /* empty */ + {} + | PERSISTENT_SYM FOR_SYM persistent_stat_spec + { + thd->lex->with_persistent_for_clause= TRUE; + } + ; + +persistent_stat_spec: + ALL + {} + | COLUMNS persistent_column_stat_spec INDEXES persistent_index_stat_spec + {} + ; + +persistent_column_stat_spec: + ALL {} + | '(' + { + LEX* lex= thd->lex; + lex->column_list= new (thd->mem_root) List; + if (unlikely(lex->column_list == NULL)) + MYSQL_YYABORT; + } + table_column_list + ')' + { } + ; + +persistent_index_stat_spec: + ALL {} + | '(' + { + LEX* lex= thd->lex; + lex->index_list= new (thd->mem_root) List; + if (unlikely(lex->index_list == NULL)) + MYSQL_YYABORT; + } + table_index_list + ')' + { } + ; + +table_column_list: + /* empty */ + {} + | ident + { + Lex->column_list->push_back((LEX_STRING*) + thd->memdup(&$1, sizeof(LEX_STRING)), thd->mem_root); + } + | table_column_list ',' ident + { + Lex->column_list->push_back((LEX_STRING*) + thd->memdup(&$3, sizeof(LEX_STRING)), thd->mem_root); + } + ; + +table_index_list: + /* empty */ + {} + | table_index_name + | table_index_list ',' table_index_name + ; + +table_index_name: + ident + { + Lex->index_list->push_back((LEX_STRING*) + thd->memdup(&$1, sizeof(LEX_STRING)), + thd->mem_root); + } + | + PRIMARY_SYM + { + LEX_STRING str= {(char*) "PRIMARY", 7}; + Lex->index_list->push_back((LEX_STRING*) + thd->memdup(&str, sizeof(LEX_STRING)), + thd->mem_root); + } + ; + +binlog_base64_event: + BINLOG_SYM TEXT_STRING_sys + { + Lex->sql_command = SQLCOM_BINLOG_BASE64_EVENT; + Lex->comment= $2; + Lex->ident.str= NULL; + Lex->ident.length= 0; + } + | + BINLOG_SYM '@' ident_or_text ',' '@' ident_or_text + { + Lex->sql_command = SQLCOM_BINLOG_BASE64_EVENT; + Lex->comment= $3; + Lex->ident= $6; + } + ; + +check_view_or_table: + table_or_tables table_list opt_mi_check_type + | VIEW_SYM + { Lex->table_type= TABLE_TYPE_VIEW; } + table_list opt_view_check_type + ; + +check: CHECK_SYM + { + LEX *lex=Lex; + + lex->sql_command = SQLCOM_CHECK; + lex->check_opt.init(); + lex->alter_info.reset(); + /* Will be overridden during execution. */ + YYPS->m_lock_type= TL_UNLOCK; + } + check_view_or_table + { + LEX* lex= thd->lex; + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "CHECK")); + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_check_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + ; + +opt_mi_check_type: + /* empty */ { Lex->check_opt.flags = T_MEDIUM; } + | mi_check_types {} + ; + +mi_check_types: + mi_check_type {} + | mi_check_type mi_check_types {} + ; + +mi_check_type: + QUICK { Lex->check_opt.flags|= T_QUICK; } + | FAST_SYM { Lex->check_opt.flags|= T_FAST; } + | MEDIUM_SYM { Lex->check_opt.flags|= T_MEDIUM; } + | EXTENDED_SYM { Lex->check_opt.flags|= T_EXTEND; } + | CHANGED { Lex->check_opt.flags|= T_CHECK_ONLY_CHANGED; } + | FOR_SYM UPGRADE_SYM { Lex->check_opt.sql_flags|= TT_FOR_UPGRADE; } + ; + +opt_view_check_type: + /* empty */ { } + | FOR_SYM UPGRADE_SYM { Lex->check_opt.sql_flags|= TT_FOR_UPGRADE; } + ; + +optimize: + OPTIMIZE opt_no_write_to_binlog table_or_tables + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_OPTIMIZE; + lex->no_write_to_binlog= $2; + lex->check_opt.init(); + lex->alter_info.reset(); + /* Will be overridden during execution. */ + YYPS->m_lock_type= TL_UNLOCK; + } + table_list opt_lock_wait_timeout + { + LEX* lex= thd->lex; + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_optimize_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + ; + +opt_no_write_to_binlog: + /* empty */ { $$= 0; } + | NO_WRITE_TO_BINLOG { $$= 1; } + | LOCAL_SYM { $$= 1; } + ; + +rename: + RENAME table_or_tables opt_if_exists + { + Lex->sql_command= SQLCOM_RENAME_TABLE; + Lex->create_info.set($3); + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + table_to_table_list + { + Lex->pop_select(); //main select + } + | RENAME USER_SYM clear_privileges rename_list + { + Lex->sql_command = SQLCOM_RENAME_USER; + } + ; + +rename_list: + user TO_SYM user + { + if (unlikely(Lex->users_list.push_back($1, thd->mem_root) || + Lex->users_list.push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + | rename_list ',' user TO_SYM user + { + if (unlikely(Lex->users_list.push_back($3, thd->mem_root) || + Lex->users_list.push_back($5, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +table_to_table_list: + table_to_table + | table_to_table_list ',' table_to_table + ; + +table_to_table: + table_ident opt_lock_wait_timeout TO_SYM table_ident + { + LEX *lex=Lex; + SELECT_LEX *sl= lex->current_select; + if (unlikely(!sl->add_table_to_list(thd, $1,NULL, + TL_OPTION_UPDATING, + TL_IGNORE, MDL_EXCLUSIVE)) || + unlikely(!sl->add_table_to_list(thd, $4, NULL, + TL_OPTION_UPDATING, + TL_IGNORE, MDL_EXCLUSIVE))) + MYSQL_YYABORT; + } + ; + +keycache: + CACHE_SYM INDEX_SYM + { + Lex->alter_info.reset(); + } + keycache_list_or_parts IN_SYM key_cache_name + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_ASSIGN_TO_KEYCACHE; + lex->ident= $6; + } + ; + +keycache_list_or_parts: + keycache_list + | assign_to_keycache_parts + ; + +keycache_list: + assign_to_keycache + | keycache_list ',' assign_to_keycache + ; + +assign_to_keycache: + table_ident cache_keys_spec + { + if (unlikely(!Select->add_table_to_list(thd, $1, NULL, 0, TL_READ, + MDL_SHARED_READ, + Select-> + pop_index_hints()))) + MYSQL_YYABORT; + } + ; + +assign_to_keycache_parts: + table_ident adm_partition cache_keys_spec + { + if (unlikely(!Select->add_table_to_list(thd, $1, NULL, 0, TL_READ, + MDL_SHARED_READ, + Select-> + pop_index_hints()))) + MYSQL_YYABORT; + } + ; + +key_cache_name: + ident { $$= $1; } + | DEFAULT { $$ = default_key_cache_base; } + ; + +preload: + LOAD INDEX_SYM INTO CACHE_SYM + { + LEX *lex=Lex; + lex->sql_command=SQLCOM_PRELOAD_KEYS; + lex->alter_info.reset(); + if (lex->main_select_push()) + MYSQL_YYABORT; + } + preload_list_or_parts + { + Lex->pop_select(); //main select + } + ; + +preload_list_or_parts: + preload_keys_parts + | preload_list + ; + +preload_list: + preload_keys + | preload_list ',' preload_keys + ; + +preload_keys: + table_ident cache_keys_spec opt_ignore_leaves + { + if (unlikely(!Select->add_table_to_list(thd, $1, NULL, $3, TL_READ, + MDL_SHARED_READ, + Select-> + pop_index_hints()))) + MYSQL_YYABORT; + } + ; + +preload_keys_parts: + table_ident adm_partition cache_keys_spec opt_ignore_leaves + { + if (unlikely(!Select->add_table_to_list(thd, $1, NULL, $4, TL_READ, + MDL_SHARED_READ, + Select-> + pop_index_hints()))) + MYSQL_YYABORT; + } + ; + +adm_partition: + PARTITION_SYM have_partitioning + { + Lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; + } + '(' all_or_alt_part_name_list ')' + ; + +cache_keys_spec: + { + Lex->first_select_lex()->alloc_index_hints(thd); + Select->set_index_hint_type(INDEX_HINT_USE, + INDEX_HINT_MASK_ALL); + } + cache_key_list_or_empty + ; + +cache_key_list_or_empty: + /* empty */ { } + | key_or_index '(' opt_key_usage_list ')' + ; + +opt_ignore_leaves: + /* empty */ + { $$= 0; } + | IGNORE_SYM LEAVES { $$= TL_OPTION_IGNORE_LEAVES; } + ; + +/* + Select : retrieve data from table +*/ + + +select: + query_expression_no_with_clause + { + if (Lex->push_select($1->fake_select_lex ? + $1->fake_select_lex : + $1->first_select())) + MYSQL_YYABORT; + } + opt_procedure_or_into + { + Lex->pop_select(); + $1->set_with_clause(NULL); + if (Lex->select_finalize($1, $3)) + MYSQL_YYABORT; + } + | with_clause query_expression_no_with_clause + { + if (Lex->push_select($2->fake_select_lex ? + $2->fake_select_lex : + $2->first_select())) + MYSQL_YYABORT; + } + opt_procedure_or_into + { + Lex->pop_select(); + $2->set_with_clause($1); + $1->attach_to($2->first_select()); + if (Lex->select_finalize($2, $4)) + MYSQL_YYABORT; + } + ; + +select_into: + select_into_query_specification + { + if (Lex->push_select($1)) + MYSQL_YYABORT; + } + opt_order_limit_lock + { + SELECT_LEX_UNIT *unit; + if (!(unit = Lex->create_unit($1))) + MYSQL_YYABORT; + if ($3) + unit= Lex->add_tail_to_query_expression_body(unit, $3); + if (Lex->select_finalize(unit)) + MYSQL_YYABORT; + } + | with_clause + select_into_query_specification + { + if (Lex->push_select($2)) + MYSQL_YYABORT; + } + opt_order_limit_lock + { + SELECT_LEX_UNIT *unit; + if (!(unit = Lex->create_unit($2))) + MYSQL_YYABORT; + if ($4) + unit= Lex->add_tail_to_query_expression_body(unit, $4); + unit->set_with_clause($1); + $1->attach_to($2); + if (Lex->select_finalize(unit)) + MYSQL_YYABORT; + } + ; + +simple_table: + query_specification { $$= $1; } + | table_value_constructor { $$= $1; } + ; + +table_value_constructor: + VALUES + { + if (Lex->parsed_TVC_start()) + MYSQL_YYABORT; + } + values_list + { + if (!($$= Lex->parsed_TVC_end())) + MYSQL_YYABORT; + } + ; + +query_specification_start: + SELECT_SYM + { + SELECT_LEX *sel; + LEX *lex= Lex; + if (!(sel= lex->alloc_select(TRUE)) || lex->push_select(sel)) + MYSQL_YYABORT; + sel->init_select(); + sel->braces= FALSE; + } + select_options + { + Select->parsing_place= SELECT_LIST; + } + select_item_list + { + Select->parsing_place= NO_MATTER; + } + ; + +query_specification: + query_specification_start + opt_from_clause + opt_where_clause + opt_group_clause + opt_having_clause + opt_window_clause + { + $$= Lex->pop_select(); + } + ; + +select_into_query_specification: + query_specification_start + into + opt_from_clause + opt_where_clause + opt_group_clause + opt_having_clause + opt_window_clause + { + $$= Lex->pop_select(); + } + ; + +/** + + The following grammar for query expressions conformant to + the latest SQL Standard is supported: + + ::= + [ ] + [ ] [ ] [ ] + + ::= + WITH [ RECURSIVE ] ::= + [ { }... ] + + ::= + [ '(' ')' ] + AS
+ + ::= + + + :: + + | UNION [ ALL | DISTINCT ] + | EXCEPT [ DISTINCT ] + + ::= + + | INTERSECT [ DISTINCT ] + + ::= + + | '(' + [ ] [ ] [ ] + ')' + + + + |
+ + + '(' ')' + +*/ + +/* + query_expression produces the same expressions as + +*/ + +query_expression: + query_expression_no_with_clause + { + $1->set_with_clause(NULL); + $$= $1; + } + | with_clause + query_expression_no_with_clause + { + $2->set_with_clause($1); + $1->attach_to($2->first_select()); + $$= $2; + } + ; + +/* + query_expression_no_with_clause produces the same expressions as + without [ ] +*/ + +query_expression_no_with_clause: + query_expression_body_ext { $$= $1; } + | query_expression_body_ext_parens { $$= $1; } + ; + +/* + query_expression_body_ext produces the same expressions as + + [ ] [ ] [ ] + | '('... + [ ] [ ] [ ] + ')'... + Note: number of ')' must be equal to the number of '(' in the rule above +*/ + +query_expression_body_ext: + query_expression_body + { + if ($1->first_select()->next_select()) + { + if (Lex->parsed_multi_operand_query_expression_body($1)) + MYSQL_YYABORT; + } + } + opt_query_expression_tail + { + if (!$3) + $$= $1; + else + $$= Lex->add_tail_to_query_expression_body($1, $3); + } + | query_expression_body_ext_parens + { + Lex->push_select(!$1->first_select()->next_select() ? + $1->first_select() : $1->fake_select_lex); + } + query_expression_tail + { + if (!($$= Lex->add_tail_to_query_expression_body_ext_parens($1, $3))) + MYSQL_YYABORT; + } + ; + +query_expression_body_ext_parens: + '(' query_expression_body_ext_parens ')' + { $$= $2; } + | '(' query_expression_body_ext ')' + { + SELECT_LEX *sel= $2->first_select()->next_select() ? + $2->fake_select_lex : $2->first_select(); + sel->braces= true; + $$= $2; + } + ; + +/* + query_expression_body produces the same expressions as + +*/ + +query_expression_body: + query_simple + { + Lex->push_select($1); + if (!($$= Lex->create_unit($1))) + MYSQL_YYABORT; + } + | query_expression_body + unit_type_decl + { + if (!$1->first_select()->next_select()) + { + Lex->pop_select(); + } + } + query_primary + { + if (!($$= Lex->add_primary_to_query_expression_body($1, $4, + $2.unit_type, + $2.distinct))) + MYSQL_YYABORT; + } + | query_expression_body_ext_parens + unit_type_decl + query_primary + { + if (!($$= Lex->add_primary_to_query_expression_body_ext_parens( + $1, $3, + $2.unit_type, + $2.distinct))) + MYSQL_YYABORT; + } + ; + +/* + query_primary produces the same expressions as + +*/ + +query_primary: + query_simple + { $$= $1; } + | query_expression_body_ext_parens + { $$= $1->first_select(); } + ; + +/* + query_simple produces the same expressions as + +*/ + +query_simple: + simple_table { $$= $1;} + ; + +subselect: + query_expression + { + if (!($$= Lex->parsed_subselect($1))) + MYSQL_YYABORT; + } + ; + +/* + subquery produces the same expressions as + + + Consider the production rule of the SQL Standard + subquery: + '(' query_expression ')' + + This rule is equivalent to the rule + subquery: + '(' query_expression_no_with_clause ')' + | '(' with_clause query_expression_no_with_clause ')' + that in its turn is equivalent to + subquery: + '(' query_expression_body_ext ')' + | query_expression_body_ext_parens + | '(' with_clause query_expression_no_with_clause ')' + + The latter can be re-written into + subquery: + query_expression_body_ext_parens + | '(' with_clause query_expression_no_with_clause ')' + + The last rule allows us to resolve properly the shift/reduce conflict + when subquery is used in expressions such as in the following queries + select (select * from t1 limit 1) + t2.a from t2 + select * from t1 where t1.a [not] in (select t2.a from t2) + + In the rule below %prec SUBQUERY_AS_EXPR forces the parser to perform a shift + operation rather then a reduce operation when ')' is encountered and can be + considered as the last symbol a query expression. +*/ + +subquery: + query_expression_body_ext_parens %prec SUBQUERY_AS_EXPR + { + if (!$1->fake_select_lex) + $1->first_select()->braces= false; + else + $1->fake_select_lex->braces= false; + if (!($$= Lex->parsed_subselect($1))) + MYSQL_YYABORT; + } + | '(' with_clause query_expression_no_with_clause ')' + { + $3->set_with_clause($2); + $2->attach_to($3->first_select()); + if (!($$= Lex->parsed_subselect($3))) + MYSQL_YYABORT; + } + ; + +opt_from_clause: + /* empty */ %prec EMPTY_FROM_CLAUSE + | from_clause + ; + +from_clause: + FROM table_reference_list + ; + +table_reference_list: + join_table_list + { + Select->context.table_list= + Select->context.first_name_resolution_table= + Select->table_list.first; + } + | DUAL_SYM + /* oracle compatibility: oracle always requires FROM clause, + and DUAL is system table without fields. + Is "SELECT 1 FROM DUAL" any better than "SELECT 1" ? + Hmmm :) */ + ; + +select_options: + /* empty*/ + | select_option_list + { + if (unlikely((Select->options & SELECT_DISTINCT) && + (Select->options & SELECT_ALL))) + my_yyabort_error((ER_WRONG_USAGE, MYF(0), "ALL", "DISTINCT")); + } + ; + +opt_history_unit: + /* empty*/ %prec PREC_BELOW_IDENTIFIER_OPT_SPECIAL_CASE + { + $$= VERS_TIMESTAMP; + } + | TRANSACTION_SYM + { + $$= VERS_TRX_ID; + } + | TIMESTAMP + { + $$= VERS_TIMESTAMP; + } + ; + +history_point: + TIMESTAMP TEXT_STRING + { + Item *item; + if (!(item= type_handler_datetime.create_literal_item(thd, + $2.str, $2.length, + YYCSCL, true))) + MYSQL_YYABORT; + $$= Vers_history_point(VERS_TIMESTAMP, item); + } + | function_call_keyword_timestamp + { + $$= Vers_history_point(VERS_TIMESTAMP, $1); + } + | opt_history_unit bit_expr + { + $$= Vers_history_point($1, $2); + } + ; + +for_portion_of_time_clause: + FOR_SYM PORTION_SYM OF_SYM remember_tok_start ident FROM + bit_expr TO_SYM bit_expr + { + if (unlikely(0 == strcasecmp($5.str, "SYSTEM_TIME"))) + { + thd->parse_error(ER_SYNTAX_ERROR, $4); + MYSQL_YYABORT; + } + Lex->period_conditions.init(SYSTEM_TIME_FROM_TO, + Vers_history_point(VERS_TIMESTAMP, $7), + Vers_history_point(VERS_TIMESTAMP, $9), + $5); + } + ; + +opt_for_portion_of_time_clause: + /* empty */ + { + $$= false; + } + | for_portion_of_time_clause + { + $$= true; + } + ; + +opt_for_system_time_clause: + /* empty */ + { + $$= false; + } + | FOR_SYSTEM_TIME_SYM system_time_expr + { + $$= true; + } + ; + +system_time_expr: + AS OF_SYM history_point + { + Lex->vers_conditions.init(SYSTEM_TIME_AS_OF, $3); + } + | ALL + { + Lex->vers_conditions.init(SYSTEM_TIME_ALL); + } + | FROM history_point TO_SYM history_point + { + Lex->vers_conditions.init(SYSTEM_TIME_FROM_TO, $2, $4); + } + | BETWEEN_SYM history_point AND_SYM history_point + { + Lex->vers_conditions.init(SYSTEM_TIME_BETWEEN, $2, $4); + } + ; + +select_option_list: + select_option_list select_option + | select_option + ; + +select_option: + query_expression_option + | SQL_NO_CACHE_SYM + { + /* + Allow this flag once per query. + */ + if (Select->options & OPTION_NO_QUERY_CACHE) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "SQL_NO_CACHE")); + Select->options|= OPTION_NO_QUERY_CACHE; + } + | SQL_CACHE_SYM + { + /* + Allow this flag once per query. + */ + if (Select->options & OPTION_TO_QUERY_CACHE) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "SQL_CACHE")); + Select->options|= OPTION_TO_QUERY_CACHE; + } + ; + + +select_lock_type: + FOR_SYM UPDATE_SYM opt_lock_wait_timeout_new + { + $$= $3; + $$.defined_lock= TRUE; + $$.update_lock= TRUE; + } + | LOCK_SYM IN_SYM SHARE_SYM MODE_SYM opt_lock_wait_timeout_new + { + $$= $5; + $$.defined_lock= TRUE; + $$.update_lock= FALSE; + } + ; + + +opt_select_lock_type: + /* empty */ + { + $$.empty(); + } + | select_lock_type + { + $$= $1; + } + ; + +opt_lock_wait_timeout_new: + /* empty */ + { + $$.empty(); + } + | WAIT_SYM ulong_num + { + $$.empty(); + $$.defined_timeout= TRUE; + $$.timeout= $2; + } + | NOWAIT_SYM + { + $$.empty(); + $$.defined_timeout= TRUE; + $$.timeout= 0; + } + | SKIP_SYM LOCKED_SYM + { + $$.empty(); + $$.skip_locked= 1; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SKIP_LOCKED); + } + ; + +select_item_list: + select_item_list ',' select_item + | select_item + | '*' + { + bool is_parsing_returning= + thd->lex->current_select->parsing_place == + IN_RETURNING; + SELECT_LEX *correct_select= is_parsing_returning ? + thd->lex->returning() : + thd->lex->current_select; + Item *item= new (thd->mem_root) + Item_field(thd, &correct_select->context, + star_clex_str); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + if (unlikely(add_item_to_list(thd, item))) + MYSQL_YYABORT; + correct_select->with_wild++; + } + ; + +select_item: + remember_name select_sublist_qualified_asterisk remember_end + { + if (unlikely(add_item_to_list(thd, $2))) + MYSQL_YYABORT; + } + | remember_name expr remember_end select_alias + { + DBUG_ASSERT($1 < $3); + + if (unlikely(add_item_to_list(thd, $2))) + MYSQL_YYABORT; + if ($4.str) + { + if (unlikely(Lex->sql_command == SQLCOM_CREATE_VIEW && + check_column_name($4.str))) + my_yyabort_error((ER_WRONG_COLUMN_NAME, MYF(0), $4.str)); + $2->base_flags|= item_base_t::IS_EXPLICIT_NAME; + $2->set_name(thd, $4); + } + else if (!$2->name.str || $2->name.str == item_empty_name) + { + $2->set_name(thd, $1, (uint) ($3 - $1), thd->charset()); + } + } + ; + +remember_tok_start: + { + $$= (char*) YYLIP->get_tok_start(); + } + ; + +remember_name: + { + $$= (char*) YYLIP->get_cpp_tok_start(); + } + ; + +remember_end: + { + $$= (char*) YYLIP->get_cpp_tok_end_rtrim(); + } + ; + +select_alias: + /* empty */ { $$=null_clex_str;} + | AS ident { $$=$2; } + | AS TEXT_STRING_sys { $$=$2; } + | ident { $$=$1; } + | TEXT_STRING_sys { $$=$1; } + ; + +opt_default_time_precision: + /* empty */ { $$= NOT_FIXED_DEC; } + | '(' ')' { $$= NOT_FIXED_DEC; } + | '(' real_ulong_num ')' { $$= $2; } + ; + +opt_time_precision: + /* empty */ { $$= 0; } + | '(' ')' { $$= 0; } + | '(' real_ulong_num ')' { $$= $2; } + ; + +optional_braces: + /* empty */ {} + | '(' ')' {} + ; + +/* all possible expressions */ +expr: + expr or expr %prec OR_SYM + { + /* + Design notes: + Do not use a manually maintained stack like thd->lex->xxx_list, + but use the internal bison stack ($$, $1 and $3) instead. + Using the bison stack is: + - more robust to changes in the grammar, + - guaranteed to be in sync with the parser state, + - better for performances (no memory allocation). + */ + Item_cond_or *item1; + Item_cond_or *item3; + if (is_cond_or($1)) + { + item1= (Item_cond_or*) $1; + if (is_cond_or($3)) + { + item3= (Item_cond_or*) $3; + /* + (X1 OR X2) OR (Y1 OR Y2) ==> OR (X1, X2, Y1, Y2) + */ + item3->add_at_head(item1->argument_list()); + $$ = $3; + } + else + { + /* + (X1 OR X2) OR Y ==> OR (X1, X2, Y) + */ + item1->add($3, thd->mem_root); + $$ = $1; + } + } + else if (is_cond_or($3)) + { + item3= (Item_cond_or*) $3; + /* + X OR (Y1 OR Y2) ==> OR (X, Y1, Y2) + */ + item3->add_at_head($1, thd->mem_root); + $$ = $3; + } + else + { + /* X OR Y */ + $$= new (thd->mem_root) Item_cond_or(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + } + | expr XOR expr %prec XOR + { + /* XOR is a proprietary extension */ + $$= new (thd->mem_root) Item_func_xor(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | expr and expr %prec AND_SYM + { + /* See comments in rule expr: expr or expr */ + Item_cond_and *item1; + Item_cond_and *item3; + if (is_cond_and($1)) + { + item1= (Item_cond_and*) $1; + if (is_cond_and($3)) + { + item3= (Item_cond_and*) $3; + /* + (X1 AND X2) AND (Y1 AND Y2) ==> AND (X1, X2, Y1, Y2) + */ + item3->add_at_head(item1->argument_list()); + $$ = $3; + } + else + { + /* + (X1 AND X2) AND Y ==> AND (X1, X2, Y) + */ + item1->add($3, thd->mem_root); + $$ = $1; + } + } + else if (is_cond_and($3)) + { + item3= (Item_cond_and*) $3; + /* + X AND (Y1 AND Y2) ==> AND (X, Y1, Y2) + */ + item3->add_at_head($1, thd->mem_root); + $$ = $3; + } + else + { + /* X AND Y */ + $$= new (thd->mem_root) Item_cond_and(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + } + | NOT_SYM expr + { + $$= negate_expression(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test %prec PREC_BELOW_NOT + ; + +boolean_test: + boolean_test IS TRUE_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_istrue(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS not TRUE_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isnottrue(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS FALSE_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isfalse(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS not FALSE_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isnotfalse(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS UNKNOWN_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isnull(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS not UNKNOWN_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isnotnull(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS NULL_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isnull(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test IS not NULL_SYM %prec IS + { + $$= new (thd->mem_root) Item_func_isnotnull(thd, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test EQUAL_SYM predicate %prec EQUAL_SYM + { + $$= new (thd->mem_root) Item_func_equal(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test comp_op predicate %prec '=' + { + $$= (*$2)(0)->create(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | boolean_test comp_op all_or_any '(' subselect ')' %prec '=' + { + $$= all_any_subquery_creator(thd, $1, $2, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate %prec BETWEEN_SYM + ; + +predicate: + predicate IN_SYM subquery + { + $$= new (thd->mem_root) Item_in_subselect(thd, $1, $3); + if (unlikely(!$$)) + MYSQL_YYABORT; + } + | predicate not IN_SYM subquery + { + Item *item= new (thd->mem_root) Item_in_subselect(thd, $1, $4); + if (unlikely(!item)) + MYSQL_YYABORT; + $$= negate_expression(thd, item); + if (unlikely(!$$)) + MYSQL_YYABORT; + } + | predicate IN_SYM '(' expr ')' + { + $$= handle_sql2003_note184_exception(thd, $1, true, $4); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate IN_SYM '(' expr ',' expr_list ')' + { + $6->push_front($4, thd->mem_root); + $6->push_front($1, thd->mem_root); + $$= new (thd->mem_root) Item_func_in(thd, *$6); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate not IN_SYM '(' expr ')' + { + $$= handle_sql2003_note184_exception(thd, $1, false, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate not IN_SYM '(' expr ',' expr_list ')' + { + $7->push_front($5, thd->mem_root); + $7->push_front($1, thd->mem_root); + Item_func_in *item= new (thd->mem_root) Item_func_in(thd, *$7); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= item->neg_transformer(thd); + } + | predicate BETWEEN_SYM predicate AND_SYM predicate %prec BETWEEN_SYM + { + $$= new (thd->mem_root) Item_func_between(thd, $1, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate not BETWEEN_SYM predicate AND_SYM predicate %prec BETWEEN_SYM + { + Item_func_between *item; + item= new (thd->mem_root) Item_func_between(thd, $1, $4, $6); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= item->neg_transformer(thd); + } + | predicate SOUNDS_SYM LIKE predicate + { + Item *item1= new (thd->mem_root) Item_func_soundex(thd, $1); + Item *item4= new (thd->mem_root) Item_func_soundex(thd, $4); + if (unlikely(item1 == NULL) || unlikely(item4 == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_func_eq(thd, item1, item4); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate LIKE predicate + { + $$= new (thd->mem_root) Item_func_like(thd, $1, $3, escape(thd), false); + if (unlikely(!$$)) + MYSQL_YYABORT; + } + | predicate LIKE predicate ESCAPE_SYM predicate %prec LIKE + { + Lex->escape_used= true; + $$= new (thd->mem_root) Item_func_like(thd, $1, $3, $5, true); + if (unlikely(!$$)) + MYSQL_YYABORT; + } + | predicate not LIKE predicate + { + Item *item= new (thd->mem_root) Item_func_like(thd, $1, $4, escape(thd), false); + if (unlikely(!item)) + MYSQL_YYABORT; + $$= item->neg_transformer(thd); + } + | predicate not LIKE predicate ESCAPE_SYM predicate %prec LIKE + { + Lex->escape_used= true; + Item *item= new (thd->mem_root) Item_func_like(thd, $1, $4, $6, true); + if (unlikely(!item)) + MYSQL_YYABORT; + $$= item->neg_transformer(thd); + } + | predicate REGEXP predicate + { + $$= new (thd->mem_root) Item_func_regex(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | predicate not REGEXP predicate + { + Item *item= new (thd->mem_root) Item_func_regex(thd, $1, $4); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= negate_expression(thd, item); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr %prec PREC_BELOW_NOT + ; + +bit_expr: + bit_expr '|' bit_expr %prec '|' + { + $$= new (thd->mem_root) Item_func_bit_or(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '&' bit_expr %prec '&' + { + $$= new (thd->mem_root) Item_func_bit_and(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr SHIFT_LEFT bit_expr %prec SHIFT_LEFT + { + $$= new (thd->mem_root) Item_func_shift_left(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr SHIFT_RIGHT bit_expr %prec SHIFT_RIGHT + { + $$= new (thd->mem_root) Item_func_shift_right(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr ORACLE_CONCAT_SYM bit_expr + { + $$= new (thd->mem_root) Item_func_concat_operator_oracle(thd, + $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '+' bit_expr %prec '+' + { + $$= new (thd->mem_root) Item_func_plus(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '-' bit_expr %prec '-' + { + $$= new (thd->mem_root) Item_func_minus(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '+' INTERVAL_SYM expr interval %prec '+' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $1, $4, $5, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '-' INTERVAL_SYM expr interval %prec '-' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $1, $4, $5, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | INTERVAL_SYM expr interval '+' expr + /* we cannot put interval before - */ + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $5, $2, $3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | '+' INTERVAL_SYM expr interval '+' expr %prec NEG + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $6, $3, $4, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | '-' INTERVAL_SYM expr interval '+' expr %prec NEG + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $6, $3, $4, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '*' bit_expr %prec '*' + { + $$= new (thd->mem_root) Item_func_mul(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '/' bit_expr %prec '/' + { + $$= new (thd->mem_root) Item_func_div(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '%' bit_expr %prec '%' + { + $$= new (thd->mem_root) Item_func_mod(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr DIV_SYM bit_expr %prec DIV_SYM + { + $$= new (thd->mem_root) Item_func_int_div(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr MOD_SYM bit_expr %prec MOD_SYM + { + $$= new (thd->mem_root) Item_func_mod(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | bit_expr '^' bit_expr + { + $$= new (thd->mem_root) Item_func_bit_xor(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | mysql_concatenation_expr %prec '^' + ; + +or: + OR_SYM + | OR2_SYM + ; + +and: + AND_SYM + | AND_AND_SYM + ; + +not: + NOT_SYM + | NOT2_SYM + ; + +not2: + '!' + | NOT2_SYM + ; + +comp_op: + '=' { $$ = &comp_eq_creator; } + | GE { $$ = &comp_ge_creator; } + | '>' { $$ = &comp_gt_creator; } + | LE { $$ = &comp_le_creator; } + | '<' { $$ = &comp_lt_creator; } + | NE { $$ = &comp_ne_creator; } + ; + +all_or_any: + ALL { $$ = 1; } + | ANY_SYM { $$ = 0; } + ; + +opt_dyncol_type: + /* empty */ + { + $$.set(DYN_COL_NULL); /* automatic type */ + } + | AS dyncol_type { $$= $2; } + ; + +dyncol_type: + numeric_dyncol_type + | temporal_dyncol_type + | string_dyncol_type + ; + +numeric_dyncol_type: + INT_SYM { $$.set(DYN_COL_INT); } + | UNSIGNED INT_SYM { $$.set(DYN_COL_UINT); } + | DOUBLE_SYM { $$.set(DYN_COL_DOUBLE); } + | REAL { $$.set(DYN_COL_DOUBLE); } + | FLOAT_SYM { $$.set(DYN_COL_DOUBLE); } + | DECIMAL_SYM float_options { $$.set(DYN_COL_DECIMAL, $2); } + ; + +temporal_dyncol_type: + DATE_SYM { $$.set(DYN_COL_DATE); } + | TIME_SYM opt_field_scale { $$.set(DYN_COL_TIME, $2); } + | DATETIME opt_field_scale { $$.set(DYN_COL_DATETIME, $2); } + ; + +string_dyncol_type: + char opt_binary + { + if ($$.set(DYN_COL_STRING, $2, thd->variables.collation_connection)) + MYSQL_YYABORT; + } + | nchar + { + $$.set(DYN_COL_STRING, national_charset_info); + } + ; + +dyncall_create_element: + expr ',' expr opt_dyncol_type + { + $$= (DYNCALL_CREATE_DEF *) + alloc_root(thd->mem_root, sizeof(DYNCALL_CREATE_DEF)); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + $$->key= $1; + $$->value= $3; + $$->type= (DYNAMIC_COLUMN_TYPE)$4.dyncol_type(); + $$->cs= $4.charset_collation(); + if ($4.has_explicit_length()) + $$->len= $4.length(); + else + $$->len= 0; + if ($4.has_explicit_dec()) + $$->frac= $4.dec(); + else + $$->len= 0; + } + ; + +dyncall_create_list: + dyncall_create_element + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + $$->push_back($1, thd->mem_root); + } + | dyncall_create_list ',' dyncall_create_element + { + $1->push_back($3, thd->mem_root); + $$= $1; + } + ; + + +plsql_cursor_attr: + ISOPEN_SYM { $$= PLSQL_CURSOR_ATTR_ISOPEN; } + | FOUND_SYM { $$= PLSQL_CURSOR_ATTR_FOUND; } + | NOTFOUND_SYM { $$= PLSQL_CURSOR_ATTR_NOTFOUND; } + | ROWCOUNT_SYM { $$= PLSQL_CURSOR_ATTR_ROWCOUNT; } + ; + +explicit_cursor_attr: + ident PERCENT_ORACLE_SYM plsql_cursor_attr + { + if (unlikely(!($$= Lex->make_item_plsql_cursor_attr(thd, &$1, $3)))) + MYSQL_YYABORT; + } + ; + + +trim_operands: + expr { $$.set(TRIM_BOTH, $1); } + | LEADING expr FROM expr { $$.set(TRIM_LEADING, $2, $4); } + | TRAILING expr FROM expr { $$.set(TRIM_TRAILING, $2, $4); } + | BOTH expr FROM expr { $$.set(TRIM_BOTH, $2, $4); } + | LEADING FROM expr { $$.set(TRIM_LEADING, $3); } + | TRAILING FROM expr { $$.set(TRIM_TRAILING, $3); } + | BOTH FROM expr { $$.set(TRIM_BOTH, $3); } + | expr FROM expr { $$.set(TRIM_BOTH, $1, $3); } + ; + +/* + Expressions that the parser allows in a column DEFAULT clause + without parentheses. These expressions cannot end with a COLLATE clause. + + If we allowed any "expr" in DEFAULT clause, there would be a confusion + in queries like this: + CREATE TABLE t1 (a TEXT DEFAULT 'a' COLLATE latin1_bin); + It would be not clear what COLLATE stands for: + - the collation of the column `a`, or + - the collation of the string literal 'a' + + This restriction allows to parse the above query unambiguiusly: + COLLATE belongs to the column rather than the literal. + If one needs COLLATE to belong to the literal, parentheses must be used: + CREATE TABLE t1 (a TEXT DEFAULT ('a' COLLATE latin1_bin)); + Note: the COLLATE clause is rather meaningless here, but the query + is syntactically correct. + + Note, some of the expressions are not actually allowed in DEFAULT, + e.g. sum_expr, window_func_expr, ROW(...), VALUES(). + We could move them to simple_expr, but that would make + these two queries return a different error messages: + CREATE TABLE t1 (a INT DEFAULT AVG(1)); + CREATE TABLE t1 (a INT DEFAULT (AVG(1))); + The first query would return "syntax error". + Currenly both return: + Function or expression 'avg(' is not allowed for 'DEFAULT' ... +*/ +column_default_non_parenthesized_expr: + simple_ident + | function_call_keyword + | function_call_nonkeyword + | function_call_generic + | function_call_conflict + | literal + | param_marker { $$= $1; } + | variable + | sum_expr + { + if (!Lex->select_stack_top || Lex->json_table) + { + my_error(ER_INVALID_GROUP_FUNC_USE, MYF(0)); + MYSQL_YYABORT; + } + } + | window_func_expr + { + if (!Lex->select_stack_top) + { + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + MYSQL_YYABORT; + } + } + | inverse_distribution_function + | ROW_SYM '(' expr ',' expr_list ')' + { + $5->push_front($3, thd->mem_root); + $$= new (thd->mem_root) Item_row(thd, *$5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | EXISTS '(' subselect ')' + { + $$= new (thd->mem_root) Item_exists_subselect(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | '{' ident expr '}' + { + if (unlikely(!($$= $3->make_odbc_literal(thd, &$2)))) + MYSQL_YYABORT; + } + | MATCH ident_list_arg AGAINST '(' bit_expr fulltext_options ')' + { + $2->push_front($5, thd->mem_root); + Item_func_match *i1= new (thd->mem_root) Item_func_match(thd, *$2, + $6); + if (unlikely(i1 == NULL)) + MYSQL_YYABORT; + Select->add_ftfunc_to_list(thd, i1); + $$= i1; + } + | CAST_SYM '(' expr AS cast_type ')' + { + if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3)))) + MYSQL_YYABORT; + } + | CASE_SYM when_list_opt_else END + { + if (unlikely(!($$= new(thd->mem_root) Item_func_case_searched(thd, *$2)))) + MYSQL_YYABORT; + } + | CASE_SYM expr when_list_opt_else END + { + $3->push_front($2, thd->mem_root); + if (unlikely(!($$= new (thd->mem_root) Item_func_case_simple(thd, *$3)))) + MYSQL_YYABORT; + } + | CONVERT_SYM '(' expr ',' cast_type ')' + { + if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3)))) + MYSQL_YYABORT; + } + | CONVERT_SYM '(' expr USING charset_name ')' + { + $$= new (thd->mem_root) Item_func_conv_charset(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DEFAULT '(' simple_ident ')' + { + Item_splocal *il= $3->get_item_splocal(); + if (unlikely(il)) + my_yyabort_error((ER_WRONG_COLUMN_NAME, MYF(0), il->my_name()->str)); + $$= new (thd->mem_root) Item_default_value(thd, Lex->current_context(), + $3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->default_used= TRUE; + } + | VALUE_SYM '(' simple_ident_nospvar ')' + { + $$= new (thd->mem_root) Item_insert_value(thd, Lex->current_context(), + $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | NEXT_SYM VALUE_SYM FOR_SYM table_ident + { + if (unlikely(!($$= Lex->create_item_func_nextval(thd, $4)))) + MYSQL_YYABORT; + } + | NEXTVAL_SYM '(' table_ident ')' + { + if (unlikely(!($$= Lex->create_item_func_nextval(thd, $3)))) + MYSQL_YYABORT; + } + | PREVIOUS_SYM VALUE_SYM FOR_SYM table_ident + { + if (unlikely(!($$= Lex->create_item_func_lastval(thd, $4)))) + MYSQL_YYABORT; + } + | LASTVAL_SYM '(' table_ident ')' + { + if (unlikely(!($$= Lex->create_item_func_lastval(thd, $3)))) + MYSQL_YYABORT; + } + | SETVAL_SYM '(' table_ident ',' longlong_num ')' + { + if (unlikely(!($$= Lex->create_item_func_setval(thd, $3, $5, 0, 1)))) + MYSQL_YYABORT; + } + | SETVAL_SYM '(' table_ident ',' longlong_num ',' bool ')' + { + if (unlikely(!($$= Lex->create_item_func_setval(thd, $3, $5, 0, $7)))) + MYSQL_YYABORT; + } + | SETVAL_SYM '(' table_ident ',' longlong_num ',' bool ',' ulonglong_num ')' + { + if (unlikely(!($$= Lex->create_item_func_setval(thd, $3, $5, $9, $7)))) + MYSQL_YYABORT; + } + ; + +primary_expr: + column_default_non_parenthesized_expr + | explicit_cursor_attr + | '(' parenthesized_expr ')' { $$= $2; } + | subquery + { + if (!($$= Lex->create_item_query_expression(thd, $1->master_unit()))) + MYSQL_YYABORT; + } + ; + +string_factor_expr: + primary_expr + | string_factor_expr COLLATE_SYM collation_name + { + if (unlikely(!($$= new (thd->mem_root) + Item_func_set_collation(thd, $1, $3)))) + MYSQL_YYABORT; + } + ; + +simple_expr: + string_factor_expr %prec NEG + | BINARY simple_expr + { + Type_cast_attributes at(&my_charset_bin); + if (unlikely(!($$= type_handler_long_blob.create_typecast_item(thd, $2, at)))) + MYSQL_YYABORT; + } + | '+' simple_expr %prec NEG + { + $$= $2; + } + | '-' simple_expr %prec NEG + { + $$= $2->neg(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | '~' simple_expr %prec NEG + { + $$= new (thd->mem_root) Item_func_bit_neg(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | not2 simple_expr %prec NEG + { + $$= negate_expression(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +mysql_concatenation_expr: + simple_expr + | mysql_concatenation_expr MYSQL_CONCAT_SYM simple_expr + { + $$= new (thd->mem_root) Item_func_concat(thd, $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +function_call_keyword_timestamp: + TIMESTAMP '(' expr ')' + { + $$= new (thd->mem_root) Item_datetime_typecast(thd, $3, + AUTO_SEC_PART_DIGITS); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | TIMESTAMP '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_timestamp(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; +/* + Function call syntax using official SQL 2003 keywords. + Because the function name is an official token, + a dedicated grammar rule is needed in the parser. + There is no potential for conflicts +*/ +function_call_keyword: + CHAR_SYM '(' expr_list ')' + { + $$= new (thd->mem_root) Item_func_char(thd, *$3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | CHAR_SYM '(' expr_list USING charset_name ')' + { + $$= new (thd->mem_root) Item_func_char(thd, *$3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | CURRENT_USER optional_braces + { + $$= new (thd->mem_root) Item_func_current_user(thd, + Lex->current_context()); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Lex->safe_to_cache_query= 0; + } + | CURRENT_ROLE optional_braces + { + $$= new (thd->mem_root) Item_func_current_role(thd, + Lex->current_context()); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Lex->safe_to_cache_query= 0; + } + | DATE_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_date_typecast(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DAY_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_dayofmonth(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | HOUR_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_hour(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | INSERT '(' expr ',' expr ',' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_insert(thd, $3, $5, $7, $9); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | INTERVAL_SYM '(' expr ',' expr ')' + { + List *list= new (thd->mem_root) List; + if (unlikely(list == NULL)) + MYSQL_YYABORT; + if (unlikely(list->push_front($5, thd->mem_root)) || + unlikely(list->push_front($3, thd->mem_root))) + MYSQL_YYABORT; + Item_row *item= new (thd->mem_root) Item_row(thd, *list); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_func_interval(thd, item); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | INTERVAL_SYM '(' expr ',' expr ',' expr_list ')' + { + $7->push_front($5, thd->mem_root); + $7->push_front($3, thd->mem_root); + Item_row *item= new (thd->mem_root) Item_row(thd, *$7); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_func_interval(thd, item); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | LEFT '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_left(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MINUTE_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_minute(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MONTH_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_month(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | RIGHT '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_right(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SECOND_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_second(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SQL_SYM PERCENT_ORACLE_SYM ROWCOUNT_SYM + { + $$= new (thd->mem_root) Item_func_oracle_sql_rowcount(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Lex->safe_to_cache_query= 0; + } + | TIME_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_time_typecast(thd, $3, + AUTO_SEC_PART_DIGITS); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | function_call_keyword_timestamp + { + $$= $1; + } + | TRIM '(' trim_operands ')' + { + if (unlikely(!($$= Schema::find_implied(thd)-> + make_item_func_trim(thd, $3)))) + MYSQL_YYABORT; + } + | USER_SYM '(' ')' + { + $$= new (thd->mem_root) Item_func_user(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Lex->safe_to_cache_query=0; + } + | YEAR_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_year(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +substring_operands: + expr ',' expr ',' expr + { + $$= Lex_substring_spec_st::init($1, $3, $5); + } + | expr ',' expr + { + $$= Lex_substring_spec_st::init($1, $3); + } + | expr FROM expr FOR_SYM expr + { + $$= Lex_substring_spec_st::init($1, $3, $5); + } + | expr FROM expr + { + $$= Lex_substring_spec_st::init($1, $3); + } + ; + + +/* + Function calls using non reserved keywords, with special syntaxic forms. + Dedicated grammar rules are needed because of the syntax, + but also have the potential to cause incompatibilities with other + parts of the language. + MAINTAINER: + The only reasons a function should be added here are: + - for compatibility reasons with another SQL syntax (CURDATE), + - for typing reasons (GET_FORMAT) + Any other 'Syntaxic sugar' enhancements should be *STRONGLY* + discouraged. +*/ +function_call_nonkeyword: + ADD_MONTHS_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $5, + INTERVAL_MONTH, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ADDDATE_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $5, + INTERVAL_DAY, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ADDDATE_SYM '(' expr ',' INTERVAL_SYM expr interval ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $6, $7, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | CURDATE optional_braces + { + $$= new (thd->mem_root) Item_func_curdate_local(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | CURTIME opt_time_precision + { + $$= new (thd->mem_root) Item_func_curtime_local(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | DATE_ADD_INTERVAL '(' expr ',' INTERVAL_SYM expr interval ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $6, $7, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DATE_SUB_INTERVAL '(' expr ',' INTERVAL_SYM expr interval ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $6, $7, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DATE_FORMAT_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_date_format(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DATE_FORMAT_SYM '(' expr ',' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_date_format(thd, $3, $5, $7); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | EXTRACT_SYM '(' interval FROM expr ')' + { + $$=new (thd->mem_root) Item_extract(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | GET_FORMAT '(' date_time_type ',' expr ')' + { + $$= new (thd->mem_root) Item_func_get_format(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | NOW_SYM opt_time_precision + { + $$= new (thd->mem_root) Item_func_now_local(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | POSITION_SYM '(' bit_expr IN_SYM expr ')' + { + $$= new (thd->mem_root) Item_func_locate(thd, $5, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ROWNUM_SYM +%ifdef MARIADB + '(' ')' +%else + optional_braces +%endif ORACLE + { + $$= new (thd->mem_root) Item_func_rownum(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SUBDATE_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $5, + INTERVAL_DAY, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SUBDATE_SYM '(' expr ',' INTERVAL_SYM expr interval ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $3, $6, $7, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SUBSTRING '(' substring_operands ')' + { + if (unlikely(!($$= Schema::find_implied(thd)-> + make_item_func_substr(thd, $3)))) + MYSQL_YYABORT; + } +%ifdef ORACLE + | SYSDATE + { + if (unlikely(!($$= Lex->make_item_func_sysdate(thd, 0)))) + MYSQL_YYABORT; + } +%endif + | SYSDATE '(' ')' + { + if (unlikely(!($$= Lex->make_item_func_sysdate(thd, 0)))) + MYSQL_YYABORT; + } + | SYSDATE '(' real_ulong_num ')' + { + if (unlikely(!($$= Lex->make_item_func_sysdate(thd, (uint) $3)))) + MYSQL_YYABORT; + } + | TIMESTAMP_ADD '(' interval_time_stamp ',' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_date_add_interval(thd, $7, $5, $3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | TIMESTAMP_DIFF '(' interval_time_stamp ',' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_timestamp_diff(thd, $5, $7, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | TRIM_ORACLE '(' trim_operands ')' + { + if (unlikely(!($$= $3.make_item_func_trim_oracle(thd)))) + MYSQL_YYABORT; + } + | UTC_DATE_SYM optional_braces + { + $$= new (thd->mem_root) Item_func_curdate_utc(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | UTC_TIME_SYM opt_time_precision + { + $$= new (thd->mem_root) Item_func_curtime_utc(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | UTC_TIMESTAMP_SYM opt_time_precision + { + $$= new (thd->mem_root) Item_func_now_utc(thd, $2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | + COLUMN_ADD_SYM '(' expr ',' dyncall_create_list ')' + { + $$= create_func_dyncol_add(thd, $3, *$5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + COLUMN_DELETE_SYM '(' expr ',' expr_list ')' + { + $$= create_func_dyncol_delete(thd, $3, *$5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + COLUMN_CHECK_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_dyncol_check(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + COLUMN_CREATE_SYM '(' dyncall_create_list ')' + { + $$= create_func_dyncol_create(thd, *$3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + COLUMN_GET_SYM '(' expr ',' expr AS cast_type ')' + { + $$= create_func_dyncol_get(thd, $3, $5, $7.type_handler(), + $7, $7.charset()); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +/* + Functions calls using a non reserved keyword, and using a regular syntax. + Because the non reserved keyword is used in another part of the grammar, + a dedicated rule is needed here. +*/ +function_call_conflict: + ASCII_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_ascii(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | CHARSET '(' expr ')' + { + $$= new (thd->mem_root) Item_func_charset(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | COALESCE '(' expr_list ')' + { + $$= new (thd->mem_root) Item_func_coalesce(thd, *$3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | COLLATION_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_collation(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DATABASE '(' ')' + { + $$= new (thd->mem_root) Item_func_database(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->safe_to_cache_query=0; + } + | IF_SYM '(' expr ',' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_if(thd, $3, $5, $7); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | FORMAT_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_format(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | FORMAT_SYM '(' expr ',' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_format(thd, $3, $5, $7); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + /* LAST_VALUE here conflicts with the definition for window functions. + We have these 2 separate rules to remove the shift/reduce conflict. + */ + | LAST_VALUE '(' expr ')' + { + List *list= new (thd->mem_root) List; + if (unlikely(list == NULL)) + MYSQL_YYABORT; + list->push_back($3, thd->mem_root); + + $$= new (thd->mem_root) Item_func_last_value(thd, *list); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | LAST_VALUE '(' expr_list ',' expr ')' + { + $3->push_back($5, thd->mem_root); + $$= new (thd->mem_root) Item_func_last_value(thd, *$3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MICROSECOND_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_microsecond(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MOD_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_mod(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | OLD_PASSWORD_SYM '(' expr ')' + { + $$= new (thd->mem_root) + Item_func_password(thd, $3, Item_func_password::OLD); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | PASSWORD_SYM '(' expr ')' + { + Item* i1; + i1= new (thd->mem_root) Item_func_password(thd, $3); + if (unlikely(i1 == NULL)) + MYSQL_YYABORT; + $$= i1; + } + | QUARTER_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_quarter(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | REPEAT_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_repeat(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | REPLACE '(' expr ',' expr ',' expr ')' + { + if (unlikely(!($$= Schema::find_implied(thd)-> + make_item_func_replace(thd, $3, $5, $7)))) + MYSQL_YYABORT; + } + | REVERSE_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_reverse(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ROW_COUNT_SYM '(' ')' + { + $$= new (thd->mem_root) Item_func_row_count(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Lex->safe_to_cache_query= 0; + } + | TRUNCATE_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_round(thd, $3, $5, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | WEEK_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_func_week(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | WEEK_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_func_week(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | WEIGHT_STRING_SYM '(' expr opt_ws_levels ')' + { + $$= new (thd->mem_root) Item_func_weight_string(thd, $3, 0, 0, $4); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | WEIGHT_STRING_SYM '(' expr AS CHAR_SYM ws_nweights opt_ws_levels ')' + { + $$= new (thd->mem_root) + Item_func_weight_string(thd, $3, 0, $6, + $7 | MY_STRXFRM_PAD_WITH_SPACE); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | WEIGHT_STRING_SYM '(' expr AS BINARY ws_nweights ')' + { + Item *item= new (thd->mem_root) Item_char_typecast(thd, $3, $6, + &my_charset_bin); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) + Item_func_weight_string(thd, item, 0, $6, + MY_STRXFRM_PAD_WITH_SPACE); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | WEIGHT_STRING_SYM '(' expr ',' ulong_num ',' ulong_num ',' ulong_num ')' + { + $$= new (thd->mem_root) Item_func_weight_string(thd, $3, $5, $7, + $9); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +/* + Regular function calls. + The function name is *not* a token, and therefore is guaranteed to not + introduce side effects to the language in general. + MAINTAINER: + All the new functions implemented for new features should fit into + this category. The place to implement the function itself is + in sql/item_create.cc +*/ +function_call_generic: + IDENT_sys '(' + { +#ifdef HAVE_DLOPEN + udf_func *udf= 0; + LEX *lex= Lex; + if (using_udf_functions && + (udf= find_udf($1.str, $1.length)) && + udf->type == UDFTYPE_AGGREGATE) + { + if (unlikely(lex->current_select->inc_in_sum_expr())) + { + thd->parse_error(); + MYSQL_YYABORT; + } + } + /* Temporary placing the result of find_udf in $3 */ + $$= udf; +#endif + } + opt_udf_expr_list ')' + { + const Type_handler *h; + Create_func *builder; + Item *item= NULL; + + if (unlikely(check_routine_name(&$1))) + MYSQL_YYABORT; + + /* + Implementation note: + names are resolved with the following order: + - MySQL native functions, + - User Defined Functions, + - Constructors, like POINT(1,1) + - Stored Functions (assuming the current database) + + This will be revised with WL#2128 (SQL PATH) + */ + if ((builder= native_functions_hash.find(thd, $1))) + { + item= builder->create_func(thd, &$1, $4); + } + else if ((h= Type_handler::handler_by_name(thd, $1)) && + (item= h->make_constructor_item(thd, $4))) + { + // Found a constructor with a proper argument count + } + else + { +#ifdef HAVE_DLOPEN + /* Retrieving the result of find_udf */ + udf_func *udf= $3; + + if (udf) + { + if (udf->type == UDFTYPE_AGGREGATE) + { + Select->in_sum_expr--; + } + + item= Create_udf_func::s_singleton.create(thd, udf, $4); + } + else +#endif + { + builder= find_qualified_function_builder(thd); + DBUG_ASSERT(builder); + item= builder->create_func(thd, &$1, $4); + } + } + + if (unlikely(! ($$= item))) + MYSQL_YYABORT; + } + | CONTAINS_SYM '(' opt_expr_list ')' + { + if (!($$= Lex->make_item_func_call_native_or_parse_error(thd, + $1, $3))) + MYSQL_YYABORT; + } + | OVERLAPS_SYM '(' opt_expr_list ')' + { + if (!($$= Lex->make_item_func_call_native_or_parse_error(thd, + $1, $3))) + MYSQL_YYABORT; + } + | WITHIN '(' opt_expr_list ')' + { + if (!($$= Lex->make_item_func_call_native_or_parse_error(thd, + $1, $3))) + MYSQL_YYABORT; + } + | ident_cli '.' ident_cli '(' opt_expr_list ')' + { + if (unlikely(!($$= Lex->make_item_func_call_generic(thd, &$1, &$3, $5)))) + MYSQL_YYABORT; + } + | ident_cli '.' ident_cli '.' ident_cli '(' opt_expr_list ')' + { + if (unlikely(!($$= Lex->make_item_func_call_generic(thd, &$1, &$3, &$5, $7)))) + MYSQL_YYABORT; + } + ; + +fulltext_options: + opt_natural_language_mode opt_query_expansion + { $$= $1 | $2; } + | IN_SYM BOOLEAN_SYM MODE_SYM + { $$= FT_BOOL; } + ; + +opt_natural_language_mode: + /* nothing */ { $$= FT_NL; } + | IN_SYM NATURAL LANGUAGE_SYM MODE_SYM { $$= FT_NL; } + ; + +opt_query_expansion: + /* nothing */ { $$= 0; } + | WITH QUERY_SYM EXPANSION_SYM { $$= FT_EXPAND; } + ; + +opt_udf_expr_list: + /* empty */ { $$= NULL; } + | udf_expr_list { $$= $1; } + ; + +udf_expr_list: + udf_expr + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + $$->push_back($1, thd->mem_root); + } + | udf_expr_list ',' udf_expr + { + $1->push_back($3, thd->mem_root); + $$= $1; + } + ; + +udf_expr: + remember_name expr remember_end select_alias + { + /* + Use Item::name as a storage for the attribute value of user + defined function argument. It is safe to use Item::name + because the syntax will not allow having an explicit name here. + See WL#1017 re. udf attributes. + */ + if ($4.str) + { + $2->base_flags|= item_base_t::IS_EXPLICIT_NAME; + $2->set_name(thd, $4); + } + /* + A field has to have its proper name in order for name + resolution to work, something we are only guaranteed if we + parse it out. If we hijack the input stream with + remember_name we may get quoted or escaped names. + */ + else if ($2->type() != Item::FIELD_ITEM && + $2->type() != Item::REF_ITEM /* For HAVING */ ) + $2->set_name(thd, $1, (uint) ($3 - $1), thd->charset()); + $$= $2; + } + ; + +sum_expr: + AVG_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_avg(thd, $3, FALSE); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | AVG_SYM '(' DISTINCT in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_avg(thd, $4, TRUE); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | BIT_AND '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_and(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | BIT_OR '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_or(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | BIT_XOR '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_xor(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | COUNT_SYM '(' opt_all '*' ')' + { + Item *item= new (thd->mem_root) Item_int(thd, (int32) 0L, 1); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_sum_count(thd, item); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | COUNT_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_count(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | COUNT_SYM '(' DISTINCT + { Select->in_sum_expr++; } + expr_list + { Select->in_sum_expr--; } + ')' + { + $$= new (thd->mem_root) Item_sum_count(thd, *$5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MIN_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_min(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + /* + According to ANSI SQL, DISTINCT is allowed and has + no sense inside MIN and MAX grouping functions; so MIN|MAX(DISTINCT ...) + is processed like an ordinary MIN | MAX() + */ + | MIN_SYM '(' DISTINCT in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_min(thd, $4); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MAX_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_max(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | MAX_SYM '(' DISTINCT in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_max(thd, $4); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | STD_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_std(thd, $3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | VARIANCE_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_variance(thd, $3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | STDDEV_SAMP_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_std(thd, $3, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | VAR_SAMP_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_variance(thd, $3, 1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SUM_SYM '(' in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_sum(thd, $3, FALSE); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SUM_SYM '(' DISTINCT in_sum_expr ')' + { + $$= new (thd->mem_root) Item_sum_sum(thd, $4, TRUE); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | GROUP_CONCAT_SYM '(' opt_distinct + { Select->in_sum_expr++; } + expr_list opt_gorder_clause + opt_gconcat_separator opt_glimit_clause + ')' + { + SELECT_LEX *sel= Select; + sel->in_sum_expr--; + $$= new (thd->mem_root) + Item_func_group_concat(thd, Lex->current_context(), + $3, $5, + sel->gorder_list, $7, $8, + sel->limit_params.select_limit, + sel->limit_params.offset_limit); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + sel->limit_params.clear(); + $5->empty(); + sel->gorder_list.empty(); + } + | JSON_ARRAYAGG_SYM '(' opt_distinct + { Select->in_sum_expr++; } + expr_list opt_gorder_clause opt_glimit_clause + ')' + { + SELECT_LEX *sel= Select; + List *args= $5; + sel->in_sum_expr--; + if (args && args->elements > 1) + { + /* JSON_ARRAYAGG supports only one parameter */ + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), "JSON_ARRAYAGG"); + MYSQL_YYABORT; + } + String* s= new (thd->mem_root) String(",", 1, &my_charset_latin1); + if (unlikely(s == NULL)) + MYSQL_YYABORT; + + $$= new (thd->mem_root) + Item_func_json_arrayagg(thd, Lex->current_context(), + $3, args, + sel->gorder_list, s, $7, + sel->limit_params.select_limit, + sel->limit_params.offset_limit); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + sel->limit_params.clear(); + $5->empty(); + sel->gorder_list.empty(); + } + | JSON_OBJECTAGG_SYM '(' + { Select->in_sum_expr++; } + expr ',' expr ')' + { + SELECT_LEX *sel= Select; + sel->in_sum_expr--; + + $$= new (thd->mem_root) Item_func_json_objectagg(thd, $4, $6); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +window_func_expr: + window_func OVER_SYM window_name + { + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + if (unlikely(Select->add_window_func((Item_window_func *) $$))) + MYSQL_YYABORT; + } + | + window_func OVER_SYM window_spec + { + LEX *lex= Lex; + if (unlikely(Select->add_window_spec(thd, lex->win_ref, + Select->group_list, + Select->order_list, + lex->win_frame))) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, + thd->lex->win_spec); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + if (unlikely(Select->add_window_func((Item_window_func *) $$))) + MYSQL_YYABORT; + } + ; + +window_func: + simple_window_func + | + sum_expr + | + function_call_generic + { + Item* item = (Item*)$1; + /* Only UDF aggregate here possible */ + if ((item == NULL) || + (item->type() != Item::SUM_FUNC_ITEM) + || (((Item_sum *)item)->sum_func() != Item_sum::UDF_SUM_FUNC)) + { + thd->parse_error(); + MYSQL_YYABORT; + } + } + ; + +simple_window_func: + ROW_NUMBER_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_row_number(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + RANK_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_rank(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + DENSE_RANK_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_dense_rank(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + PERCENT_RANK_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_percent_rank(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + CUME_DIST_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_cume_dist(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + NTILE_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_ntile(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + FIRST_VALUE_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_first_value(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + LAST_VALUE '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_last_value(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + NTH_VALUE_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_sum_nth_value(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + LEAD_SYM '(' expr ')' + { + /* No second argument defaults to 1. */ + Item* item_offset= new (thd->mem_root) Item_uint(thd, 1); + if (unlikely(item_offset == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_sum_lead(thd, $3, item_offset); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + LEAD_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_sum_lead(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + LAG_SYM '(' expr ')' + { + /* No second argument defaults to 1. */ + Item* item_offset= new (thd->mem_root) Item_uint(thd, 1); + if (unlikely(item_offset == NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_sum_lag(thd, $3, item_offset); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | + LAG_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_sum_lag(thd, $3, $5); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + + + +inverse_distribution_function: + percentile_function OVER_SYM + '(' opt_window_partition_clause ')' + { + LEX *lex= Lex; + if (unlikely(Select->add_window_spec(thd, lex->win_ref, + Select->group_list, + Select->order_list, + NULL))) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, + thd->lex->win_spec); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + if (unlikely(Select->add_window_func((Item_window_func *) $$))) + MYSQL_YYABORT; + } + ; + +percentile_function: + inverse_distribution_function_def WITHIN GROUP_SYM '(' + { Select->prepare_add_window_spec(thd); } + order_by_single_element_list ')' + { + $$= $1; + } + | MEDIAN_SYM '(' expr ')' + { + Item *args= new (thd->mem_root) Item_decimal(thd, "0.5", 3, + thd->charset()); + if (unlikely(args == NULL) || unlikely(thd->is_error())) + MYSQL_YYABORT; + Select->prepare_add_window_spec(thd); + if (unlikely(add_order_to_list(thd, $3,FALSE))) + MYSQL_YYABORT; + + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, args); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +inverse_distribution_function_def: + PERCENTILE_CONT_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | PERCENTILE_DISC_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +order_by_single_element_list: + ORDER_SYM BY order_ident order_dir + { + if (unlikely(add_order_to_list(thd, $3,(bool) $4))) + MYSQL_YYABORT; + } + ; + + +window_name: + ident + { + $$= (LEX_CSTRING *) thd->memdup(&$1, sizeof(LEX_CSTRING)); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +variable: + '@' + { + if (unlikely(! Lex->parsing_options.allows_variable)) + my_yyabort_error((ER_VIEW_SELECT_VARIABLE, MYF(0))); + } + variable_aux + { + $$= $3; + } + ; + +variable_aux: + ident_or_text SET_VAR expr + { + Item_func_set_user_var *item; + if (!$1.length) + { + thd->parse_error(); + MYSQL_YYABORT; + } + $$= item= new (thd->mem_root) Item_func_set_user_var(thd, &$1, $3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + LEX *lex= Lex; + lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + lex->set_var_list.push_back(item, thd->mem_root); + } + | ident_or_text + { + if (!$1.length) + { + thd->parse_error(); + MYSQL_YYABORT; + } + $$= new (thd->mem_root) Item_func_get_user_var(thd, &$1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + LEX *lex= Lex; + lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + } + | '@' opt_var_ident_type ident_sysvar_name + { + if (unlikely(!($$= Lex->make_item_sysvar(thd, $2, &$3)))) + MYSQL_YYABORT; + } + | '@' opt_var_ident_type ident_sysvar_name '.' ident + { + if (unlikely(!($$= Lex->make_item_sysvar(thd, $2, &$3, &$5)))) + MYSQL_YYABORT; + } + ; + +opt_distinct: + /* empty */ { $$ = 0; } + | DISTINCT { $$ = 1; } + ; + +opt_gconcat_separator: + /* empty */ + { + $$= new (thd->mem_root) String(",", 1, &my_charset_latin1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | SEPARATOR_SYM text_string { $$ = $2; } + ; + +opt_gorder_clause: + /* empty */ + | ORDER_SYM BY gorder_list + ; + +gorder_list: + gorder_list ',' order_ident order_dir + { + if (unlikely(add_gorder_to_list(thd, $3,(bool) $4))) + MYSQL_YYABORT; + } + | order_ident order_dir + { + if (unlikely(add_gorder_to_list(thd, $1,(bool) $2))) + MYSQL_YYABORT; + } + ; + +opt_glimit_clause: + /* empty */ { $$ = 0; } + | glimit_clause { $$ = 1; } + ; + + +glimit_clause: + LIMIT glimit_options + { + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + } + ; + +glimit_options: + limit_options + { + Select->limit_params= $1; + } + ; + + + +in_sum_expr: + opt_all + { + LEX *lex= Lex; + if (unlikely(lex->current_select->inc_in_sum_expr())) + { + thd->parse_error(); + MYSQL_YYABORT; + } + } + expr + { + Select->in_sum_expr--; + $$= $3; + } + ; + +cast_type: + BINARY opt_field_length + { $$.set(&type_handler_long_blob, $2, &my_charset_bin); } + | CHAR_SYM opt_field_length opt_binary + { + if ($$.set(&type_handler_long_blob, $2, $3, + thd->variables.collation_connection)) + MYSQL_YYABORT; + } + | VARCHAR field_length opt_binary + { + if ($$.set(&type_handler_long_blob, $2, $3, + thd->variables.collation_connection)) + MYSQL_YYABORT; + } + | VARCHAR2_ORACLE_SYM field_length opt_binary + { + if ($$.set(&type_handler_long_blob, $2, $3, + thd->variables.collation_connection)) + MYSQL_YYABORT; + } + | NCHAR_SYM opt_field_length + { + $$.set(&type_handler_long_blob, $2, national_charset_info); + } + | cast_type_numeric { $$= $1; } + | cast_type_temporal { $$= $1; } + | udt_name + { + if (Lex->set_cast_type_udt(&$$, $1)) + MYSQL_YYABORT; + } + ; + +cast_type_numeric: + INT_SYM { $$.set(&type_handler_slonglong); } + | SIGNED_SYM { $$.set(&type_handler_slonglong); } + | SIGNED_SYM INT_SYM { $$.set(&type_handler_slonglong); } + | UNSIGNED { $$.set(&type_handler_ulonglong); } + | UNSIGNED INT_SYM { $$.set(&type_handler_ulonglong); } + | DECIMAL_SYM float_options { $$.set(&type_handler_newdecimal, $2); } + | FLOAT_SYM { $$.set(&type_handler_float); } + | DOUBLE_SYM opt_precision { $$.set(&type_handler_double, $2); } + ; + +cast_type_temporal: + DATE_SYM { $$.set(&type_handler_newdate); } + | TIME_SYM opt_field_scale { $$.set(&type_handler_time2, $2); } + | DATETIME opt_field_scale { $$.set(&type_handler_datetime2, $2); } + | INTERVAL_SYM DAY_SECOND_SYM field_scale + { + $$.set(&type_handler_interval_DDhhmmssff, $3); + } + ; + +opt_expr_list: + /* empty */ { $$= NULL; } + | expr_list { $$= $1;} + ; + +expr_list: + expr + { + if (unlikely(!($$= List::make(thd->mem_root, $1)))) + MYSQL_YYABORT; + } + | expr_list ',' expr + { + $1->push_back($3, thd->mem_root); + $$= $1; + } + ; + +ident_list_arg: + ident_list { $$= $1; } + | '(' ident_list ')' { $$= $2; } + ; + +ident_list: + simple_ident + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL) || + unlikely($$->push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | ident_list ',' simple_ident + { + $1->push_back($3, thd->mem_root); + $$= $1; + } + ; + +when_list: + WHEN_SYM expr THEN_SYM expr + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + if (unlikely($$->push_back($2, thd->mem_root) || + $$->push_back($4, thd->mem_root))) + MYSQL_YYABORT; + } + | when_list WHEN_SYM expr THEN_SYM expr + { + if (unlikely($1->push_back($3, thd->mem_root) || + $1->push_back($5, thd->mem_root))) + MYSQL_YYABORT; + $$= $1; + } + ; + +when_list_opt_else: + when_list + | when_list ELSE expr + { + if (unlikely($1->push_back($3, thd->mem_root))) + MYSQL_YYABORT; + $$= $1; + } + ; + +/* Equivalent to
in the SQL:2003 standard. */ +/* Warning - may return NULL in case of incomplete SELECT */ +table_ref: + table_factor { $$= $1; } + | join_table + { + LEX *lex= Lex; + if (unlikely(!($$= lex->current_select->nest_last_join(thd)))) + { + thd->parse_error(); + MYSQL_YYABORT; + } + } + ; + +json_text_literal: + TEXT_STRING + { + Lex->json_table->m_text_literal_cs= NULL; + } + | NCHAR_STRING + { + Lex->json_table->m_text_literal_cs= national_charset_info; + } + | UNDERSCORE_CHARSET TEXT_STRING + { + Lex->json_table->m_text_literal_cs= $1; + $$= $2; + } + ; + +json_text_literal_or_num: + json_text_literal + | NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + | LONG_NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + | DECIMAL_NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + | FLOAT_NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + ; + +join_table_list: + derived_table_list { MYSQL_YYABORT_UNLESS($$=$1); } + ; + +json_table_columns_clause: + COLUMNS '(' json_table_columns_list ')' + {} + ; + +json_table_columns_list: + json_table_column + | json_table_columns_list ',' json_table_column + {} + ; + +json_table_column: + ident + { + LEX *lex=Lex; + Create_field *f= new (thd->mem_root) Create_field(); + + if (unlikely(check_string_char_length(&$1, 0, NAME_CHAR_LEN, + system_charset_info, 1))) + my_yyabort_error((ER_TOO_LONG_IDENT, MYF(0), $1.str)); + + lex->json_table->m_cur_json_table_column= + new (thd->mem_root) Json_table_column(f, + lex->json_table->get_cur_nested_path()); + + if (unlikely(!f || + !lex->json_table->m_cur_json_table_column)) + MYSQL_YYABORT; + + lex->init_last_field(f, &$1); + } + json_table_column_type + { + LEX *lex=Lex; + if (unlikely(lex->json_table-> + m_cur_json_table_column->m_field->check(thd))) + MYSQL_YYABORT; + lex->json_table->m_columns.push_back( + lex->json_table->m_cur_json_table_column, thd->mem_root); + } + | NESTED_SYM PATH_SYM json_text_literal + { + LEX *lex=Lex; + Json_table_nested_path *np= new (thd->mem_root) + Json_table_nested_path(); + np->set_path(thd, $3); + lex->json_table->start_nested_path(np); + } + json_table_columns_clause + { + LEX *lex=Lex; + lex->json_table->end_nested_path(); + } + ; + +json_table_column_type: + FOR_SYM ORDINALITY_SYM + { + Lex_field_type_st type; + type.set(&type_handler_slong); + Lex->last_field->set_attributes(thd, type, + COLUMN_DEFINITION_TABLE_FIELD); + Lex->json_table->m_cur_json_table_column-> + set(Json_table_column::FOR_ORDINALITY); + } + | json_table_field_type PATH_SYM json_text_literal + json_opt_on_empty_or_error + { + Lex->last_field->set_attributes(thd, $1, + COLUMN_DEFINITION_TABLE_FIELD); + if (Lex->json_table->m_cur_json_table_column-> + set(thd, Json_table_column::PATH, $3, + $1.charset_collation_attrs())) + { + MYSQL_YYABORT; + } + } + | json_table_field_type EXISTS PATH_SYM json_text_literal + { + Lex->last_field->set_attributes(thd, $1, + COLUMN_DEFINITION_TABLE_FIELD); + if (Lex->json_table->m_cur_json_table_column-> + set(thd, Json_table_column::EXISTS_PATH, $4, + $1.charset_collation_attrs())) + MYSQL_YYABORT; + } + ; + +json_table_field_type: + field_type_numeric + | field_type_temporal + | field_type_string + | field_type_lob + ; + +json_opt_on_empty_or_error: + /* none */ + {} + | json_on_error_response + | json_on_error_response json_on_empty_response + | json_on_empty_response + | json_on_empty_response json_on_error_response + ; + +json_on_response: + ERROR_SYM + { + $$.m_response= Json_table_column::RESPONSE_ERROR; + } + | NULL_SYM + { + $$.m_response= Json_table_column::RESPONSE_NULL; + } + | DEFAULT json_text_literal_or_num + { + $$.m_response= Json_table_column::RESPONSE_DEFAULT; + $$.m_default= $2; + Lex->json_table->m_cur_json_table_column->m_defaults_cs= + thd->variables.collation_connection; + } + ; + +json_on_error_response: + json_on_response ON ERROR_SYM + { + Lex->json_table->m_cur_json_table_column->m_on_error= $1; + } + ; + +json_on_empty_response: + json_on_response ON EMPTY_SYM + { + Lex->json_table->m_cur_json_table_column->m_on_empty= $1; + } + ; + +table_function: + JSON_TABLE_SYM '(' + { + push_table_function_arg_context(Lex, thd->mem_root); + //TODO: introduce IN_TABLE_FUNC_ARGUMENT? + Select->parsing_place= IN_ON; + } + expr ',' + { + Table_function_json_table *jt= + new (thd->mem_root) Table_function_json_table($4); + if (unlikely(!jt)) + MYSQL_YYABORT; + /* See comment for class Table_function_json_table: */ + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); + Lex->json_table= jt; + + Select->parsing_place= NO_MATTER; + jt->set_name_resolution_context(Lex->pop_context()); + } + json_text_literal json_table_columns_clause ')' opt_table_alias_clause + { + SELECT_LEX *sel= Select; + if (unlikely($10 == NULL)) + { + /* Alias is not optional. */ + my_error(ER_JSON_TABLE_ALIAS_REQUIRED, MYF(0)); + MYSQL_YYABORT; + } + if (unlikely(Lex->json_table->m_nested_path.set_path(thd, $7))) + MYSQL_YYABORT; + if (!($$= sel->add_table_to_list(thd, + new (thd->mem_root) Table_ident(thd, &any_db, + $10, TRUE), + NULL, + TL_OPTION_TABLE_FUNCTION, + YYPS->m_lock_type, + YYPS->m_mdl_type, + 0,0,0))) + MYSQL_YYABORT; + $$->table_function= Lex->json_table; + Lex->json_table= 0; + status_var_increment(thd->status_var.feature_json); + } + ; + +/* + The ODBC escape syntax for Outer Join is: '{' OJ join_table '}' + The parser does not define OJ as a token, any ident is accepted + instead in $2 (ident). Also, all productions from table_ref can + be escaped, not only join_table. Both syntax extensions are safe + and are ignored. +*/ +esc_table_ref: + table_ref { $$=$1; } + | '{' ident table_ref '}' { $$=$3; } + ; + +/* Equivalent to
in the SQL:2003 standard. */ +/* Warning - may return NULL in case of incomplete SELECT */ +derived_table_list: + esc_table_ref + { + $$=$1; + Select->add_joined_table($1); + } + | derived_table_list ',' esc_table_ref + { + MYSQL_YYABORT_UNLESS($1 && ($$=$3)); + Select->add_joined_table($3); + } + ; + +/* + Notice that JOIN can be a left-associative operator in one context and + a right-associative operator in another context (see the comment for + st_select_lex::add_cross_joined_table). +*/ +join_table: + /* INNER JOIN variants */ + table_ref normal_join table_ref %prec CONDITIONLESS_JOIN + { + MYSQL_YYABORT_UNLESS($1 && ($$=$3)); + if (unlikely(Select->add_cross_joined_table($1, $3, $2))) + MYSQL_YYABORT; + } + | table_ref normal_join table_ref + ON + { + MYSQL_YYABORT_UNLESS($1 && $3); + Select->add_joined_table($1); + Select->add_joined_table($3); + /* Change the current name resolution context to a local context. */ + if (unlikely(push_new_name_resolution_context(thd, $1, $3))) + MYSQL_YYABORT; + Select->parsing_place= IN_ON; + } + expr + { + $3->straight=$2; + add_join_on(thd, $3, $6); + $3->on_context= Lex->pop_context(); + Select->parsing_place= NO_MATTER; + } + | table_ref normal_join table_ref + USING + { + MYSQL_YYABORT_UNLESS($1 && $3); + Select->add_joined_table($1); + Select->add_joined_table($3); + } + '(' using_list ')' + { + $3->straight=$2; + add_join_natural($1,$3,$7,Select); + $$=$3; + } + | table_ref NATURAL inner_join table_factor + { + MYSQL_YYABORT_UNLESS($1 && ($$=$4)); + Select->add_joined_table($1); + Select->add_joined_table($4); + $4->straight=$3; + add_join_natural($1,$4,NULL,Select); + } + + /* LEFT JOIN variants */ + | table_ref LEFT opt_outer JOIN_SYM table_ref + ON + { + MYSQL_YYABORT_UNLESS($1 && $5); + Select->add_joined_table($1); + Select->add_joined_table($5); + /* Change the current name resolution context to a local context. */ + if (unlikely(push_new_name_resolution_context(thd, $1, $5))) + MYSQL_YYABORT; + Select->parsing_place= IN_ON; + } + expr + { + add_join_on(thd, $5, $8); + $5->on_context= Lex->pop_context(); + $5->outer_join|=JOIN_TYPE_LEFT; + $$=$5; + Select->parsing_place= NO_MATTER; + } + | table_ref LEFT opt_outer JOIN_SYM table_factor + { + MYSQL_YYABORT_UNLESS($1 && $5); + Select->add_joined_table($1); + Select->add_joined_table($5); + } + USING '(' using_list ')' + { + add_join_natural($1,$5,$9,Select); + $5->outer_join|=JOIN_TYPE_LEFT; + $$=$5; + } + | table_ref NATURAL LEFT opt_outer JOIN_SYM table_factor + { + MYSQL_YYABORT_UNLESS($1 && $6); + Select->add_joined_table($1); + Select->add_joined_table($6); + add_join_natural($1,$6,NULL,Select); + $6->outer_join|=JOIN_TYPE_LEFT; + $$=$6; + } + + /* RIGHT JOIN variants */ + | table_ref RIGHT opt_outer JOIN_SYM table_ref + ON + { + MYSQL_YYABORT_UNLESS($1 && $5); + Select->add_joined_table($1); + Select->add_joined_table($5); + /* Change the current name resolution context to a local context. */ + if (unlikely(push_new_name_resolution_context(thd, $1, $5))) + MYSQL_YYABORT; + Select->parsing_place= IN_ON; + } + expr + { + LEX *lex= Lex; + if (unlikely(!($$= lex->current_select->convert_right_join()))) + MYSQL_YYABORT; + add_join_on(thd, $$, $8); + $1->on_context= Lex->pop_context(); + Select->parsing_place= NO_MATTER; + } + | table_ref RIGHT opt_outer JOIN_SYM table_factor + { + MYSQL_YYABORT_UNLESS($1 && $5); + Select->add_joined_table($1); + Select->add_joined_table($5); + } + USING '(' using_list ')' + { + LEX *lex= Lex; + if (unlikely(!($$= lex->current_select->convert_right_join()))) + MYSQL_YYABORT; + add_join_natural($$,$5,$9,Select); + } + | table_ref NATURAL RIGHT opt_outer JOIN_SYM table_factor + { + MYSQL_YYABORT_UNLESS($1 && $6); + Select->add_joined_table($1); + Select->add_joined_table($6); + add_join_natural($6,$1,NULL,Select); + LEX *lex= Lex; + if (unlikely(!($$= lex->current_select->convert_right_join()))) + MYSQL_YYABORT; + } + ; + + +inner_join: /* $$ set if using STRAIGHT_JOIN, false otherwise */ + JOIN_SYM { $$ = 0; } + | INNER_SYM JOIN_SYM { $$ = 0; } + | STRAIGHT_JOIN { $$ = 1; } + ; + +normal_join: + inner_join { $$ = $1; } + | CROSS JOIN_SYM { $$ = 0; } + ; + +/* + table PARTITION (list of partitions), reusing using_list instead of creating + a new rule for partition_list. +*/ +opt_use_partition: + /* empty */ { $$= 0;} + | use_partition + ; + +use_partition: + PARTITION_SYM '(' using_list ')' have_partitioning + { + $$= $3; + Select->parsing_place= Select->save_parsing_place; + Select->save_parsing_place= NO_MATTER; + } + ; + +table_factor: + table_primary_ident_opt_parens { $$= $1; } + | table_primary_derived_opt_parens { $$= $1; } + | join_table_parens + { + $1->nested_join->nest_type= 0; + $$= $1; + } + | table_reference_list_parens { $$= $1; } + | table_function { $$= $1; } + ; + +table_primary_ident_opt_parens: + table_primary_ident { $$= $1; } + | '(' table_primary_ident_opt_parens ')' { $$= $2; } + ; + +table_primary_derived_opt_parens: + table_primary_derived { $$= $1; } + | '(' table_primary_derived_opt_parens ')' { $$= $2; } + ; + +table_reference_list_parens: + '(' table_reference_list_parens ')' { $$= $2; } + | '(' nested_table_reference_list ')' + { + if (!($$= Select->end_nested_join(thd))) + MYSQL_YYABORT; + } + ; + +nested_table_reference_list: + table_ref ',' table_ref + { + if (Select->init_nested_join(thd)) + MYSQL_YYABORT; + Select->add_joined_table($1); + Select->add_joined_table($3); + $$= $1->embedding; + } + | nested_table_reference_list ',' table_ref + { + Select->add_joined_table($3); + $$= $1; + } + ; + +join_table_parens: + '(' join_table_parens ')' { $$= $2; } + | '(' join_table ')' + { + LEX *lex= Lex; + if (!($$= lex->current_select->nest_last_join(thd))) + { + thd->parse_error(); + MYSQL_YYABORT; + } + } + ; + + +table_primary_ident: + table_ident opt_use_partition opt_for_system_time_clause + opt_table_alias_clause opt_key_definition + { + if (!($$= Select->add_table_to_list(thd, $1, $4, + 0, + YYPS->m_lock_type, + YYPS->m_mdl_type, + Select->pop_index_hints(), + $2))) + MYSQL_YYABORT; + if ($3) + $$->vers_conditions= Lex->vers_conditions; + } + ; + +table_primary_derived: + subquery + opt_for_system_time_clause table_alias_clause + { + if (!($$= Lex->parsed_derived_table($1->master_unit(), $2, $3))) + MYSQL_YYABORT; + } +%ifdef ORACLE + | subquery + opt_for_system_time_clause + { + LEX_CSTRING alias; + if ($1->make_unique_derived_name(thd, &alias) || + !($$= Lex->parsed_derived_table($1->master_unit(), $2, &alias))) + MYSQL_YYABORT; + } +%endif + ; + +opt_outer: + /* empty */ {} + | OUTER {} + ; + +index_hint_clause: + /* empty */ + { + $$= (thd->variables.old_behavior & OLD_MODE_IGNORE_INDEX_ONLY_FOR_JOIN) ? + INDEX_HINT_MASK_JOIN : INDEX_HINT_MASK_ALL; + } + | FOR_SYM JOIN_SYM { $$= INDEX_HINT_MASK_JOIN; } + | FOR_SYM ORDER_SYM BY { $$= INDEX_HINT_MASK_ORDER; } + | FOR_SYM GROUP_SYM BY { $$= INDEX_HINT_MASK_GROUP; } + ; + +index_hint_type: + FORCE_SYM { $$= INDEX_HINT_FORCE; } + | IGNORE_SYM { $$= INDEX_HINT_IGNORE; } + ; + +index_hint_definition: + index_hint_type key_or_index index_hint_clause + { + Select->set_index_hint_type($1, $3); + } + '(' key_usage_list ')' + | USE_SYM key_or_index index_hint_clause + { + Select->set_index_hint_type(INDEX_HINT_USE, $3); + } + '(' opt_key_usage_list ')' + ; + +index_hints_list: + index_hint_definition + | index_hints_list index_hint_definition + ; + +opt_index_hints_list: + /* empty */ + | { Select->alloc_index_hints(thd); } index_hints_list + ; + +opt_key_definition: + { Select->clear_index_hints(); } + opt_index_hints_list + ; + +opt_key_usage_list: + /* empty */ { Select->add_index_hint(thd, NULL, 0); } + | key_usage_list {} + ; + +key_usage_element: + ident + { Select->add_index_hint(thd, $1.str, $1.length); } + | PRIMARY_SYM + { Select->add_index_hint(thd, "PRIMARY", 7); } + ; + +key_usage_list: + key_usage_element + | key_usage_list ',' key_usage_element + ; + +using_list: + ident + { + if (unlikely(!($$= new (thd->mem_root) List))) + MYSQL_YYABORT; + String *s= new (thd->mem_root) String((const char*) $1.str, + $1.length, + system_charset_info); + if (unlikely(unlikely(s == NULL))) + MYSQL_YYABORT; + $$->push_back(s, thd->mem_root); + } + | using_list ',' ident + { + String *s= new (thd->mem_root) String((const char*) $3.str, + $3.length, + system_charset_info); + if (unlikely(unlikely(s == NULL))) + MYSQL_YYABORT; + if (unlikely($1->push_back(s, thd->mem_root))) + MYSQL_YYABORT; + $$= $1; + } + ; + +interval: + interval_time_stamp {} + | DAY_HOUR_SYM { $$=INTERVAL_DAY_HOUR; } + | DAY_MICROSECOND_SYM { $$=INTERVAL_DAY_MICROSECOND; } + | DAY_MINUTE_SYM { $$=INTERVAL_DAY_MINUTE; } + | DAY_SECOND_SYM { $$=INTERVAL_DAY_SECOND; } + | HOUR_MICROSECOND_SYM { $$=INTERVAL_HOUR_MICROSECOND; } + | HOUR_MINUTE_SYM { $$=INTERVAL_HOUR_MINUTE; } + | HOUR_SECOND_SYM { $$=INTERVAL_HOUR_SECOND; } + | MINUTE_MICROSECOND_SYM { $$=INTERVAL_MINUTE_MICROSECOND; } + | MINUTE_SECOND_SYM { $$=INTERVAL_MINUTE_SECOND; } + | SECOND_MICROSECOND_SYM { $$=INTERVAL_SECOND_MICROSECOND; } + | YEAR_MONTH_SYM { $$=INTERVAL_YEAR_MONTH; } + ; + +interval_time_stamp: + DAY_SYM { $$=INTERVAL_DAY; } + | WEEK_SYM { $$=INTERVAL_WEEK; } + | HOUR_SYM { $$=INTERVAL_HOUR; } + | MINUTE_SYM { $$=INTERVAL_MINUTE; } + | MONTH_SYM { $$=INTERVAL_MONTH; } + | QUARTER_SYM { $$=INTERVAL_QUARTER; } + | SECOND_SYM { $$=INTERVAL_SECOND; } + | MICROSECOND_SYM { $$=INTERVAL_MICROSECOND; } + | YEAR_SYM { $$=INTERVAL_YEAR; } + ; + +date_time_type: + DATE_SYM {$$=MYSQL_TIMESTAMP_DATE;} + | TIME_SYM {$$=MYSQL_TIMESTAMP_TIME;} + | DATETIME {$$=MYSQL_TIMESTAMP_DATETIME;} + | TIMESTAMP {$$=MYSQL_TIMESTAMP_DATETIME;} + ; + +table_alias: + /* empty */ + | AS + | '=' + ; + +opt_table_alias_clause: + /* empty */ { $$=0; } + | table_alias_clause { $$= $1; } + ; + +table_alias_clause: + table_alias ident_table_alias + { + $$= (LEX_CSTRING*) thd->memdup(&$2,sizeof(LEX_STRING)); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +opt_all: + /* empty */ + | ALL + ; + +opt_where_clause: + /* empty */ { Select->where= 0; } + | WHERE + { + Select->parsing_place= IN_WHERE; + } + expr + { + SELECT_LEX *select= Select; + select->where= normalize_cond(thd, $3); + select->parsing_place= NO_MATTER; + if ($3) + $3->top_level_item(); + } + ; + +opt_having_clause: + /* empty */ + | HAVING + { + Select->parsing_place= IN_HAVING; + } + expr + { + SELECT_LEX *sel= Select; + sel->having= normalize_cond(thd, $3); + sel->parsing_place= NO_MATTER; + if ($3) + $3->top_level_item(); + } + ; + +/* + group by statement in select +*/ + +opt_group_clause: + /* empty */ + | GROUP_SYM BY group_list olap_opt + ; + +group_list: + group_list ',' order_ident order_dir + { + if (unlikely(add_group_to_list(thd, $3,(bool) $4))) + MYSQL_YYABORT; + } + | order_ident order_dir + { + if (unlikely(add_group_to_list(thd, $1,(bool) $2))) + MYSQL_YYABORT; + } + ; + +olap_opt: + /* empty */ {} + | WITH_CUBE_SYM + { + /* + 'WITH CUBE' is reserved in the MySQL syntax, but not implemented, + and cause LALR(2) conflicts. + This syntax is not standard. + MySQL syntax: GROUP BY col1, col2, col3 WITH CUBE + SQL-2003: GROUP BY ... CUBE(col1, col2, col3) + */ + LEX *lex=Lex; + if (unlikely(lex->current_select->get_linkage() == GLOBAL_OPTIONS_TYPE)) + my_yyabort_error((ER_WRONG_USAGE, MYF(0), "WITH CUBE", + "global union parameters")); + lex->current_select->olap= CUBE_TYPE; + + my_yyabort_error((ER_NOT_SUPPORTED_YET, MYF(0), "CUBE")); + } + | WITH_ROLLUP_SYM + { + /* + 'WITH ROLLUP' is needed for backward compatibility, + and cause LALR(2) conflicts. + This syntax is not standard. + MySQL syntax: GROUP BY col1, col2, col3 WITH ROLLUP + SQL-2003: GROUP BY ... ROLLUP(col1, col2, col3) + */ + LEX *lex= Lex; + if (unlikely(lex->current_select->get_linkage() == GLOBAL_OPTIONS_TYPE)) + my_yyabort_error((ER_WRONG_USAGE, MYF(0), "WITH ROLLUP", + "global union parameters")); + lex->current_select->olap= ROLLUP_TYPE; + } + ; + +/* + optional window clause in select +*/ + +opt_window_clause: + /* empty */ + {} + | WINDOW_SYM + window_def_list + {} + ; + +window_def_list: + window_def_list ',' window_def + | window_def + ; + +window_def: + window_name AS window_spec + { + LEX *lex= Lex; + if (unlikely(Select->add_window_def(thd, $1, lex->win_ref, + Select->group_list, + Select->order_list, + lex->win_frame))) + MYSQL_YYABORT; + } + ; + +window_spec: + '(' + { Select->prepare_add_window_spec(thd); } + opt_window_ref opt_window_partition_clause + opt_window_order_clause opt_window_frame_clause + ')' + { } + ; + +opt_window_ref: + /* empty */ {} + | ident + { + thd->lex->win_ref= (LEX_CSTRING *) thd->memdup(&$1, sizeof(LEX_CSTRING)); + if (unlikely(thd->lex->win_ref == NULL)) + MYSQL_YYABORT; + } + ; + +opt_window_partition_clause: + /* empty */ { } + | PARTITION_SYM BY group_list + ; + +opt_window_order_clause: + /* empty */ { } + | ORDER_SYM BY order_list { Select->order_list= *($3); } + ; + +opt_window_frame_clause: + /* empty */ {} + | window_frame_units window_frame_extent opt_window_frame_exclusion + { + LEX *lex= Lex; + lex->win_frame= + new (thd->mem_root) Window_frame($1, + lex->frame_top_bound, + lex->frame_bottom_bound, + $3); + if (unlikely(lex->win_frame == NULL)) + MYSQL_YYABORT; + } + ; + +window_frame_units: + ROWS_SYM { $$= Window_frame::UNITS_ROWS; } + | RANGE_SYM { $$= Window_frame::UNITS_RANGE; } + ; + +window_frame_extent: + window_frame_start + { + LEX *lex= Lex; + lex->frame_top_bound= $1; + lex->frame_bottom_bound= + new (thd->mem_root) + Window_frame_bound(Window_frame_bound::CURRENT, NULL); + if (unlikely(lex->frame_bottom_bound == NULL)) + MYSQL_YYABORT; + } + | BETWEEN_SYM window_frame_bound AND_SYM window_frame_bound + { + LEX *lex= Lex; + lex->frame_top_bound= $2; + lex->frame_bottom_bound= $4; + } + ; + +window_frame_start: + UNBOUNDED_SYM PRECEDING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::PRECEDING, NULL); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | CURRENT_SYM ROW_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::CURRENT, NULL); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | literal PRECEDING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::PRECEDING, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +window_frame_bound: + window_frame_start { $$= $1; } + | UNBOUNDED_SYM FOLLOWING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::FOLLOWING, NULL); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | literal FOLLOWING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::FOLLOWING, $1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +opt_window_frame_exclusion: + /* empty */ { $$= Window_frame::EXCL_NONE; } + | EXCLUDE_SYM CURRENT_SYM ROW_SYM + { $$= Window_frame::EXCL_CURRENT_ROW; } + | EXCLUDE_SYM GROUP_SYM + { $$= Window_frame::EXCL_GROUP; } + | EXCLUDE_SYM TIES_SYM + { $$= Window_frame::EXCL_TIES; } + | EXCLUDE_SYM NO_SYM OTHERS_MARIADB_SYM + { $$= Window_frame::EXCL_NONE; } + | EXCLUDE_SYM NO_SYM OTHERS_ORACLE_SYM + { $$= Window_frame::EXCL_NONE; } + ; + +/* + Order by statement in ALTER TABLE +*/ + +alter_order_clause: + ORDER_SYM BY alter_order_list + ; + +alter_order_list: + alter_order_list ',' alter_order_item + | alter_order_item + ; + +alter_order_item: + simple_ident_nospvar order_dir + { + bool ascending= ($2 == 1) ? true : false; + if (unlikely(add_order_to_list(thd, $1, ascending))) + MYSQL_YYABORT; + } + ; + +/* + Order by statement in select +*/ + +opt_order_clause: + /* empty */ + { $$= NULL; } + | order_clause + { $$= $1; } + ; + +order_clause: + ORDER_SYM BY + { + thd->where= "ORDER clause"; + } + order_list + { + $$= $4; + } + ; + +order_list: + order_list ',' order_ident order_dir + { + $$= $1; + if (add_to_list(thd, *$$, $3,(bool) $4)) + MYSQL_YYABORT; + } + | order_ident order_dir + { + $$= new (thd->mem_root) SQL_I_List(); + if (add_to_list(thd, *$$, $1, (bool) $2)) + MYSQL_YYABORT; + } + ; + +order_dir: + /* empty */ { $$= 1; } + | ASC { $$= 1; } + | DESC { $$= 0; } + ; + + +opt_limit_clause: + /* empty */ + { $$.clear(); } + | limit_clause + { $$= $1; } + ; + +limit_clause: + LIMIT limit_options + { + $$= $2; + if (!$$.select_limit->basic_const_item() || + $$.select_limit->val_int() > 0) + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + } + | LIMIT limit_options + ROWS_SYM EXAMINED_SYM limit_rows_option + { + $$= $2; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + } + | LIMIT ROWS_SYM EXAMINED_SYM limit_rows_option + { + $$.clear(); + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + } + | fetch_first_clause + { + $$= $1; + if (!$$.select_limit || + !$$.select_limit->basic_const_item() || + $$.select_limit->val_int() > 0) + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + } + ; + +fetch_first_clause: + FETCH_SYM first_or_next row_or_rows only_or_with_ties + { + Item *one= new (thd->mem_root) Item_int(thd, (int32) 1); + if (unlikely(one == NULL)) + MYSQL_YYABORT; + $$.select_limit= one; + $$.offset_limit= 0; + $$.explicit_limit= true; + $$.with_ties= $4; + } + | OFFSET_SYM limit_option row_or_rows + FETCH_SYM first_or_next row_or_rows only_or_with_ties + { + Item *one= new (thd->mem_root) Item_int(thd, (int32) 1); + if (unlikely(one == NULL)) + MYSQL_YYABORT; + $$.select_limit= one; + $$.offset_limit= $2; + $$.explicit_limit= true; + $$.with_ties= $7; + } + | FETCH_SYM first_or_next limit_option row_or_rows only_or_with_ties + { + $$.select_limit= $3; + $$.offset_limit= 0; + $$.explicit_limit= true; + $$.with_ties= $5; + } + | OFFSET_SYM limit_option row_or_rows + FETCH_SYM first_or_next limit_option row_or_rows only_or_with_ties + { + $$.select_limit= $6; + $$.offset_limit= $2; + $$.explicit_limit= true; + $$.with_ties= $8; + } + | OFFSET_SYM limit_option row_or_rows + { + $$.select_limit= 0; + $$.offset_limit= $2; + $$.explicit_limit= true; + $$.with_ties= false; + } + ; + +first_or_next: + FIRST_SYM + | NEXT_SYM + ; + +row_or_rows: + ROW_SYM + | ROWS_SYM + ; + +only_or_with_ties: + ONLY_SYM { $$= 0; } + | WITH TIES_SYM { $$= 1; } + ; + + +opt_global_limit_clause: + opt_limit_clause + { + Select->limit_params= $1; + } + ; + +limit_options: + limit_option + { + $$.select_limit= $1; + $$.offset_limit= NULL; + $$.explicit_limit= true; + $$.with_ties= false; + } + | limit_option ',' limit_option + { + $$.select_limit= $3; + $$.offset_limit= $1; + $$.explicit_limit= true; + $$.with_ties= false; + } + | limit_option OFFSET_SYM limit_option + { + $$.select_limit= $1; + $$.offset_limit= $3; + $$.explicit_limit= true; + $$.with_ties= false; + } + ; + +limit_option: + ident_cli + { + if (unlikely(!($$= Lex->create_item_limit(thd, &$1)))) + MYSQL_YYABORT; + } + | ident_cli '.' ident_cli + { + if (unlikely(!($$= Lex->create_item_limit(thd, &$1, &$3)))) + MYSQL_YYABORT; + } + | param_marker + { + $1->limit_clause_param= TRUE; + } + | ULONGLONG_NUM + { + $$= new (thd->mem_root) Item_uint(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | LONG_NUM + { + $$= new (thd->mem_root) Item_uint(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | NUM + { + $$= new (thd->mem_root) Item_uint(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +limit_rows_option: + limit_option + { + Lex->limit_rows_examined= $1; + } + ; + +delete_limit_clause: + /* empty */ + { + LEX *lex=Lex; + lex->current_select->limit_params.select_limit= 0; + } + | LIMIT limit_option + { + SELECT_LEX *sel= Select; + sel->limit_params.select_limit= $2; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + sel->limit_params.explicit_limit= 1; + } + | LIMIT ROWS_SYM EXAMINED_SYM { thd->parse_error(); MYSQL_YYABORT; } + | LIMIT limit_option ROWS_SYM EXAMINED_SYM { thd->parse_error(); MYSQL_YYABORT; } + ; + +order_limit_lock: + order_or_limit + { + $$= $1; + $$->lock.empty(); + } + | order_or_limit select_lock_type + { + $$= $1; + $$->lock= $2; + } + | select_lock_type + { + $$= new(thd->mem_root) Lex_order_limit_lock; + if (!$$) + YYABORT; + $$->order_list= NULL; + $$->limit.clear(); + $$->lock= $1; + } + ; + +opt_order_limit_lock: + /* empty */ + { + Lex->pop_select(); + $$= NULL; + } + | order_limit_lock { $$= $1; } + ; + +query_expression_tail: + order_limit_lock + ; + +opt_query_expression_tail: + opt_order_limit_lock + ; + +opt_procedure_or_into: + /* empty */ + { + $$.empty(); + } + | procedure_clause opt_select_lock_type + { + $$= $2; + } + | into opt_select_lock_type + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX), + " INTO " + " FROM...'"); + $$= $2; + } + ; + + +order_or_limit: + order_clause opt_limit_clause + { + $$= new(thd->mem_root) Lex_order_limit_lock; + if (!$$) + YYABORT; + $$->order_list= $1; + $$->limit= $2; + } + | limit_clause + { + $$= new(thd->mem_root) Lex_order_limit_lock; + if (!$$) + YYABORT; + $$->order_list= NULL; + $$->limit= $1; + } + ; + + +opt_plus: + /* empty */ + | '+' + ; + +int_num: + opt_plus NUM { int error; $$= (int) my_strtoll10($2.str, (char**) 0, &error); } + | '-' NUM { int error; $$= -(int) my_strtoll10($2.str, (char**) 0, &error); } + ; + +ulong_num: + opt_plus NUM { int error; $$= (ulong) my_strtoll10($2.str, (char**) 0, &error); } + | HEX_NUM { $$= strtoul($1.str, (char**) 0, 16); } + | opt_plus LONG_NUM { int error; $$= (ulong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus ULONGLONG_NUM { int error; $$= (ulong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus DECIMAL_NUM { int error; $$= (ulong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus FLOAT_NUM { int error; $$= (ulong) my_strtoll10($2.str, (char**) 0, &error); } + ; + +real_ulong_num: + NUM { int error; $$= (ulong) my_strtoll10($1.str, (char**) 0, &error); } + | HEX_NUM { $$= (ulong) strtol($1.str, (char**) 0, 16); } + | LONG_NUM { int error; $$= (ulong) my_strtoll10($1.str, (char**) 0, &error); } + | ULONGLONG_NUM { int error; $$= (ulong) my_strtoll10($1.str, (char**) 0, &error); } + | dec_num_error { MYSQL_YYABORT; } + ; + +longlong_num: + opt_plus NUM { int error; $$= (longlong) my_strtoll10($2.str, (char**) 0, &error); } + | LONG_NUM { int error; $$= (longlong) my_strtoll10($1.str, (char**) 0, &error); } + | '-' NUM { int error; $$= -(longlong) my_strtoll10($2.str, (char**) 0, &error); } + | '-' LONG_NUM { int error; $$= -(longlong) my_strtoll10($2.str, (char**) 0, &error); } + ; + +ulonglong_num: + opt_plus NUM { int error; $$= (ulonglong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus ULONGLONG_NUM { int error; $$= (ulonglong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus LONG_NUM { int error; $$= (ulonglong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus DECIMAL_NUM { int error; $$= (ulonglong) my_strtoll10($2.str, (char**) 0, &error); } + | opt_plus FLOAT_NUM { int error; $$= (ulonglong) my_strtoll10($2.str, (char**) 0, &error); } + ; + +real_ulonglong_num: + NUM { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); } + | ULONGLONG_NUM { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); } + | HEX_NUM { $$= strtoull($1.str, (char**) 0, 16); } + | LONG_NUM { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); } + | dec_num_error { MYSQL_YYABORT; } + ; + +dec_num_error: + dec_num + { thd->parse_error(ER_ONLY_INTEGERS_ALLOWED); } + ; + +dec_num: + DECIMAL_NUM + | FLOAT_NUM + ; + +choice: + ulong_num { $$= $1 != 0 ? HA_CHOICE_YES : HA_CHOICE_NO; } + | DEFAULT { $$= HA_CHOICE_UNDEF; } + ; + +bool: + ulong_num { $$= $1 != 0; } + | TRUE_SYM { $$= 1; } + | FALSE_SYM { $$= 0; } + ; + +procedure_clause: + PROCEDURE_SYM ident /* Procedure name */ + { + LEX *lex=Lex; + + lex->proc_list.elements=0; + lex->proc_list.first=0; + lex->proc_list.next= &lex->proc_list.first; + Item_field *item= new (thd->mem_root) + Item_field(thd, &lex->current_select->context, + $2); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + if (unlikely(add_proc_to_list(thd, item))) + MYSQL_YYABORT; + Lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + + /* + PROCEDURE CLAUSE cannot handle subquery as one of its parameter, + so disallow any subqueries further. + Alow subqueries back once the parameters are reduced. + */ + Lex->clause_that_disallows_subselect= "PROCEDURE"; + Select->options|= OPTION_PROCEDURE_CLAUSE; + } + '(' procedure_list ')' + { + /* Subqueries are allowed from now.*/ + Lex->clause_that_disallows_subselect= NULL; + } + ; + +procedure_list: + /* empty */ {} + | procedure_list2 {} + ; + +procedure_list2: + procedure_list2 ',' procedure_item + | procedure_item + ; + +procedure_item: + remember_name expr remember_end + { + if (unlikely(add_proc_to_list(thd, $2))) + MYSQL_YYABORT; + if (!$2->name.str || $2->name.str == item_empty_name) + $2->set_name(thd, $1, (uint) ($3 - $1), thd->charset()); + } + ; + +select_var_list_init: + { + LEX *lex=Lex; + if (!lex->describe && + unlikely((!(lex->result= new (thd->mem_root) + select_dumpvar(thd))))) + MYSQL_YYABORT; + } + select_var_list + {} + ; + +select_var_list: + select_var_list ',' select_var_ident + | select_var_ident {} + ; + +select_var_ident: select_outvar + { + if (Lex->result) + { + if (unlikely($1 == NULL)) + MYSQL_YYABORT; + ((select_dumpvar *)Lex->result)->var_list.push_back($1, thd->mem_root); + } + else + { + /* + The parser won't create select_result instance only + if it's an EXPLAIN. + */ + DBUG_ASSERT(Lex->describe); + } + } + ; + +select_outvar: + '@' ident_or_text + { + if (!$2.length) + { + thd->parse_error(); + MYSQL_YYABORT; + } + + $$ = Lex->result ? new (thd->mem_root) my_var_user(&$2) : NULL; + } + | ident_or_text + { + if (unlikely(!($$= Lex->create_outvar(thd, &$1)) && Lex->result)) + MYSQL_YYABORT; + } + | ident '.' ident + { + if (unlikely(!($$= Lex->create_outvar(thd, &$1, &$3)) && Lex->result)) + MYSQL_YYABORT; + } + ; + +into: + INTO into_destination + {} + ; + +into_destination: + OUTFILE TEXT_STRING_filesystem + { + LEX *lex= Lex; + lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + if (unlikely(!(lex->exchange= + new (thd->mem_root) sql_exchange($2.str, 0))) || + unlikely(!(lex->result= + new (thd->mem_root) + select_export(thd, lex->exchange)))) + MYSQL_YYABORT; + } + opt_load_data_charset + { Lex->exchange->cs= $4; } + opt_field_term opt_line_term + | DUMPFILE TEXT_STRING_filesystem + { + LEX *lex=Lex; + if (!lex->describe) + { + lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + if (unlikely(!(lex->exchange= + new (thd->mem_root) sql_exchange($2.str,1)))) + MYSQL_YYABORT; + if (unlikely(!(lex->result= + new (thd->mem_root) + select_dump(thd, lex->exchange)))) + MYSQL_YYABORT; + } + } + | select_var_list_init + { + Lex->uncacheable(UNCACHEABLE_SIDEEFFECT); + } + ; + +/* + DO statement +*/ + +do: + DO_SYM + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_DO; + if (lex->main_select_push(true)) + MYSQL_YYABORT; + lex->init_select(); + } + expr_list + { + Lex->insert_list= $3; + Lex->pop_select(); //main select + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + ; + +/* + Drop : delete tables or index or user +*/ + +drop: + DROP opt_temporary table_or_tables opt_if_exists + { + LEX *lex=Lex; + lex->set_command(SQLCOM_DROP_TABLE, $2, $4); + YYPS->m_lock_type= TL_UNLOCK; + YYPS->m_mdl_type= MDL_EXCLUSIVE; + } + table_list opt_lock_wait_timeout opt_restrict + {} + | DROP INDEX_SYM + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + opt_if_exists_table_element ident ON table_ident opt_lock_wait_timeout + { + LEX *lex=Lex; + Alter_drop *ad= (new (thd->mem_root) + Alter_drop(Alter_drop::KEY, $5.str, $4)); + if (unlikely(ad == NULL)) + MYSQL_YYABORT; + lex->sql_command= SQLCOM_DROP_INDEX; + lex->alter_info.reset(); + lex->alter_info.flags= ALTER_DROP_INDEX; + lex->alter_info.drop_list.push_back(ad, thd->mem_root); + if (unlikely(!lex->current_select-> + add_table_to_list(thd, $7, NULL, TL_OPTION_UPDATING, + TL_READ_NO_INSERT, + MDL_SHARED_UPGRADABLE))) + MYSQL_YYABORT; + Lex->pop_select(); //main select + } + | DROP DATABASE opt_if_exists ident + { + LEX *lex=Lex; + lex->set_command(SQLCOM_DROP_DB, $3); + lex->name= $4; + } + | DROP USER_SYM opt_if_exists clear_privileges user_list + { + Lex->set_command(SQLCOM_DROP_USER, $3); + } + | DROP ROLE_SYM opt_if_exists clear_privileges role_list + { + Lex->set_command(SQLCOM_DROP_ROLE, $3); + } + | DROP VIEW_SYM opt_if_exists + { + LEX *lex= Lex; + lex->set_command(SQLCOM_DROP_VIEW, $3); + YYPS->m_lock_type= TL_UNLOCK; + YYPS->m_mdl_type= MDL_EXCLUSIVE; + } + table_list opt_restrict + {} + | DROP EVENT_SYM opt_if_exists sp_name + { + Lex->spname= $4; + Lex->set_command(SQLCOM_DROP_EVENT, $3); + } + | DROP TRIGGER_SYM opt_if_exists sp_name + { + LEX *lex= Lex; + lex->set_command(SQLCOM_DROP_TRIGGER, $3); + lex->spname= $4; + } + | DROP SERVER_SYM opt_if_exists ident_or_text + { + Lex->set_command(SQLCOM_DROP_SERVER, $3); + Lex->server_options.reset($4); + } + | DROP opt_temporary SEQUENCE_SYM opt_if_exists + + { + LEX *lex= Lex; + lex->set_command(SQLCOM_DROP_SEQUENCE, $2, $4); + lex->table_type= TABLE_TYPE_SEQUENCE; + YYPS->m_lock_type= TL_UNLOCK; + YYPS->m_mdl_type= MDL_EXCLUSIVE; + } + table_list + {} + | drop_routine + ; + +table_list: + table_name + | table_list ',' table_name + ; + +table_name: + table_ident + { + if (!thd->lex->current_select_or_default()-> + add_table_to_list(thd, $1, NULL, + TL_OPTION_UPDATING, + YYPS->m_lock_type, + YYPS->m_mdl_type)) + MYSQL_YYABORT; + } + ; + +table_name_with_opt_use_partition: + table_ident opt_use_partition + { + if (unlikely(!Select->add_table_to_list(thd, $1, NULL, + TL_OPTION_UPDATING, + YYPS->m_lock_type, + YYPS->m_mdl_type, + NULL, + $2))) + MYSQL_YYABORT; + } + ; + +table_alias_ref_list: + table_alias_ref + | table_alias_ref_list ',' table_alias_ref + ; + +table_alias_ref: + table_ident_opt_wild + { + if (unlikely(!Select-> + add_table_to_list(thd, $1, NULL, + (TL_OPTION_UPDATING | + TL_OPTION_ALIAS), + YYPS->m_lock_type, + YYPS->m_mdl_type))) + MYSQL_YYABORT; + } + ; + +opt_if_exists_table_element: + /* empty */ + { + Lex->check_exists= FALSE; + $$= 0; + } + | IF_SYM EXISTS + { + Lex->check_exists= TRUE; + $$= 1; + } + ; + +opt_if_exists: + /* empty */ + { + $$.set(DDL_options_st::OPT_NONE); + } + | IF_SYM EXISTS + { + $$.set(DDL_options_st::OPT_IF_EXISTS); + } + ; + +opt_temporary: + /* empty */ { $$= 0; } + | TEMPORARY { $$= HA_LEX_CREATE_TMP_TABLE; } + ; +/* +** Insert : add new data to table +*/ + +insert: + INSERT + { + Lex->sql_command= SQLCOM_INSERT; + Lex->duplicates= DUP_ERROR; + thd->get_stmt_da()->opt_clear_warning_info(thd->query_id); + thd->get_stmt_da()->reset_current_row_for_warning(1); + } + insert_start insert_lock_option opt_ignore opt_into insert_table + { + Select->set_lock_for_tables($4, true, false); + } + insert_field_spec opt_insert_update opt_returning + stmt_end + { + Lex->mark_first_table_as_inserting(); + thd->get_stmt_da()->reset_current_row_for_warning(0); + } + ; + +replace: + REPLACE + { + Lex->sql_command = SQLCOM_REPLACE; + Lex->duplicates= DUP_REPLACE; + thd->get_stmt_da()->opt_clear_warning_info(thd->query_id); + thd->get_stmt_da()->reset_current_row_for_warning(1); + } + insert_start replace_lock_option opt_into insert_table + { + Select->set_lock_for_tables($4, true, false); + } + insert_field_spec opt_returning + stmt_end + { + Lex->mark_first_table_as_inserting(); + thd->get_stmt_da()->reset_current_row_for_warning(0); + } + ; + +insert_start: { + if (Lex->main_select_push()) + MYSQL_YYABORT; + Lex->init_select(); + Lex->inc_select_stack_outer_barrier(); + Lex->current_select->parsing_place= BEFORE_OPT_LIST; + } + ; + +stmt_end: { + Lex->pop_select(); //main select + if (Lex->check_main_unit_semantics()) + MYSQL_YYABORT; + } + ; + +insert_lock_option: + /* empty */ + { + /* + If it is SP we do not allow insert optimisation when result of + insert visible only after the table unlocking but everyone can + read table. + */ + $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT); + } + | insert_replace_option + | HIGH_PRIORITY { $$= TL_WRITE; } + ; + +replace_lock_option: + /* empty */ { $$= TL_WRITE_DEFAULT; } + | insert_replace_option + ; + +insert_replace_option: + LOW_PRIORITY { $$= TL_WRITE_LOW_PRIORITY; } + | DELAYED_SYM + { + Lex->keyword_delayed_begin_offset= (uint)($1.pos() - thd->query()); + Lex->keyword_delayed_end_offset= (uint)($1.end() - thd->query()); + $$= TL_WRITE_DELAYED; + } + ; + +opt_into: /* nothing */ | INTO ; + +insert_table: + { + Select->save_parsing_place= Select->parsing_place; + } + table_name_with_opt_use_partition + { + LEX *lex=Lex; + //lex->field_list.empty(); + lex->many_values.empty(); + lex->insert_list=0; + } + ; + +insert_field_spec: + insert_values {} + | insert_field_list insert_values {} + | SET + { + LEX *lex=Lex; + if (unlikely(!(lex->insert_list= new (thd->mem_root) List_item)) || + unlikely(lex->many_values.push_back(lex->insert_list, + thd->mem_root))) + MYSQL_YYABORT; + lex->current_select->parsing_place= NO_MATTER; + } + ident_eq_list + ; + +insert_field_list: + LEFT_PAREN_ALT opt_fields ')' + { + Lex->current_select->parsing_place= AFTER_LIST; + } + ; + +opt_fields: + /* empty */ + | fields + ; + +fields: + fields ',' insert_ident + { Lex->field_list.push_back($3, thd->mem_root); } + | insert_ident { Lex->field_list.push_back($1, thd->mem_root); } + ; + + + +insert_values: + create_select_query_expression {} + ; + +values_list: + values_list ',' no_braces + | no_braces_with_names + ; + +ident_eq_list: + ident_eq_list ',' ident_eq_value + | ident_eq_value + ; + +ident_eq_value: + simple_ident_nospvar equal expr_or_ignore_or_default + { + LEX *lex=Lex; + if (unlikely(lex->field_list.push_back($1, thd->mem_root)) || + unlikely(lex->insert_list->push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +equal: + '=' {} + | SET_VAR {} + ; + +opt_equal: + /* empty */ {} + | equal {} + ; + +opt_with: + opt_equal {} + | WITH {} + ; + +opt_by: + opt_equal {} + | BY {} + ; + +no_braces: + '(' + { + if (unlikely(!(Lex->insert_list= new (thd->mem_root) List_item))) + MYSQL_YYABORT; + } + opt_values ')' + { + LEX *lex=Lex; + thd->get_stmt_da()->inc_current_row_for_warning(); + if (unlikely(lex->many_values.push_back(lex->insert_list, + thd->mem_root))) + MYSQL_YYABORT; + } + ; + +no_braces_with_names: + '(' + { + if (unlikely(!(Lex->insert_list= new (thd->mem_root) List_item))) + MYSQL_YYABORT; + } + opt_values_with_names ')' + { + LEX *lex=Lex; + thd->get_stmt_da()->inc_current_row_for_warning(); + if (unlikely(lex->many_values.push_back(lex->insert_list, + thd->mem_root))) + MYSQL_YYABORT; + } + ; + +opt_values: + /* empty */ {} + | values + ; + +opt_values_with_names: + /* empty */ {} + | values_with_names + ; + +values: + values ',' expr_or_ignore_or_default + { + if (unlikely(Lex->insert_list->push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + | expr_or_ignore_or_default + { + if (unlikely(Lex->insert_list->push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +values_with_names: + values_with_names ',' remember_name expr_or_ignore_or_default remember_end + { + if (unlikely(Lex->insert_list->push_back($4, thd->mem_root))) + MYSQL_YYABORT; + // give some name in case of using in table value constuctor (TVC) + if (!$4->name.str || $4->name.str == item_empty_name) + $4->set_name(thd, $3, (uint) ($5 - $3), thd->charset()); + } + | remember_name expr_or_ignore_or_default remember_end + { + if (unlikely(Lex->insert_list->push_back($2, thd->mem_root))) + MYSQL_YYABORT; + // give some name in case of using in table value constuctor (TVC) + if (!$2->name.str || $2->name.str == item_empty_name) + $2->set_name(thd, $1, (uint) ($3 - $1), thd->charset()); + } + ; + +expr_or_ignore: + expr { $$= $1;} + | IGNORE_SYM + { + $$= new (thd->mem_root) Item_ignore_specification(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +expr_or_ignore_or_default: + expr_or_ignore { $$= $1;} + | DEFAULT + { + $$= new (thd->mem_root) Item_default_specification(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +opt_insert_update: + /* empty */ + | ON DUPLICATE_SYM { Lex->duplicates= DUP_UPDATE; } + KEY_SYM UPDATE_SYM + { + Select->parsing_place= IN_UPDATE_ON_DUP_KEY; + } + insert_update_list + { + Select->parsing_place= NO_MATTER; + } + ; + +update_table_list: + table_ident opt_use_partition for_portion_of_time_clause + opt_table_alias_clause opt_key_definition + { + if (!($$= Select->add_table_to_list(thd, $1, $4, + 0, + YYPS->m_lock_type, + YYPS->m_mdl_type, + Select->pop_index_hints(), + $2))) + MYSQL_YYABORT; + $$->period_conditions= Lex->period_conditions; + } + | join_table_list { $$= $1; } + ; + +/* Update rows in a table */ + +update: + UPDATE_SYM + { + LEX *lex= Lex; + if (Lex->main_select_push()) + MYSQL_YYABORT; + lex->init_select(); + lex->sql_command= SQLCOM_UPDATE; + lex->duplicates= DUP_ERROR; + } + opt_low_priority opt_ignore update_table_list + SET update_list + { + SELECT_LEX *slex= Lex->first_select_lex(); + if (slex->table_list.elements > 1) + Lex->sql_command= SQLCOM_UPDATE_MULTI; + else if (slex->get_table_list()->derived) + { + /* it is single table update and it is update of derived table */ + my_error(ER_NON_UPDATABLE_TABLE, MYF(0), + slex->get_table_list()->alias.str, "UPDATE"); + MYSQL_YYABORT; + } + /* + In case of multi-update setting write lock for all tables may + be too pessimistic. We will decrease lock level if possible in + mysql_multi_update(). + */ + slex->set_lock_for_tables($3, slex->table_list.elements == 1, false); + } + opt_where_clause opt_order_clause delete_limit_clause + { + if ($10) + Select->order_list= *($10); + } stmt_end {} + ; + +update_list: + update_list ',' update_elem + | update_elem + ; + +update_elem: + simple_ident_nospvar equal DEFAULT + { + Item *def= new (thd->mem_root) Item_default_value(thd, + Lex->current_context(), $1, 1); + if (!def || add_item_to_list(thd, $1) || add_value_to_list(thd, def)) + MYSQL_YYABORT; + } + | simple_ident_nospvar equal expr_or_ignore + { + if (add_item_to_list(thd, $1) || add_value_to_list(thd, $3)) + MYSQL_YYABORT; + } + ; + +insert_update_list: + insert_update_list ',' insert_update_elem + | insert_update_elem + ; + +insert_update_elem: + simple_ident_nospvar equal expr_or_ignore_or_default + { + LEX *lex= Lex; + if (unlikely(lex->update_list.push_back($1, thd->mem_root)) || + unlikely(lex->value_list.push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +opt_low_priority: + /* empty */ { $$= TL_WRITE_DEFAULT; } + | LOW_PRIORITY { $$= TL_WRITE_LOW_PRIORITY; } + ; + +/* Delete rows from a table */ + +delete: + DELETE_SYM + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_DELETE; + YYPS->m_lock_type= TL_WRITE_DEFAULT; + YYPS->m_mdl_type= MDL_SHARED_WRITE; + if (Lex->main_select_push()) + MYSQL_YYABORT; + lex->init_select(); + lex->ignore= 0; + lex->first_select_lex()->order_list.empty(); + } + delete_part2 + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + ; + +opt_delete_system_time: + /* empty */ + { + Lex->vers_conditions.init(SYSTEM_TIME_HISTORY); + } + | BEFORE_SYM SYSTEM_TIME_SYM history_point + { + Lex->vers_conditions.init(SYSTEM_TIME_BEFORE, $3); + } + ; + +delete_part2: + opt_delete_options single_multi {} + | HISTORY_SYM delete_single_table opt_delete_system_time + { + Lex->last_table()->vers_conditions= Lex->vers_conditions; + Lex->pop_select(); //main select + if (Lex->check_main_unit_semantics()) + MYSQL_YYABORT; + } + ; + +delete_single_table: + FROM table_ident opt_use_partition + { + if (unlikely(!Select-> + add_table_to_list(thd, $2, NULL, TL_OPTION_UPDATING, + YYPS->m_lock_type, + YYPS->m_mdl_type, + NULL, + $3))) + MYSQL_YYABORT; + YYPS->m_lock_type= TL_READ_DEFAULT; + YYPS->m_mdl_type= MDL_SHARED_READ; + } + ; + +delete_single_table_for_period: + delete_single_table opt_for_portion_of_time_clause + { + if ($2) + Lex->last_table()->period_conditions= Lex->period_conditions; + } + ; + +single_multi: + delete_single_table_for_period + opt_where_clause + opt_order_clause + delete_limit_clause + opt_returning + { + if ($3) + Select->order_list= *($3); + Lex->pop_select(); //main select + if (Lex->check_main_unit_semantics()) + MYSQL_YYABORT; + } + | table_wild_list + { + mysql_init_multi_delete(Lex); + YYPS->m_lock_type= TL_READ_DEFAULT; + YYPS->m_mdl_type= MDL_SHARED_READ; + } + FROM join_table_list opt_where_clause + { + if (unlikely(multi_delete_set_locks_and_link_aux_tables(Lex))) + MYSQL_YYABORT; + } stmt_end {} + | FROM table_alias_ref_list + { + mysql_init_multi_delete(Lex); + YYPS->m_lock_type= TL_READ_DEFAULT; + YYPS->m_mdl_type= MDL_SHARED_READ; + } + USING join_table_list opt_where_clause + { + if (unlikely(multi_delete_set_locks_and_link_aux_tables(Lex))) + MYSQL_YYABORT; + } stmt_end {} + ; + +opt_returning: + /* empty */ + { + DBUG_ASSERT(!Lex->has_returning()); + } + | RETURNING_SYM + { + DBUG_ASSERT(!Lex->has_returning()); + /* + When parsing_place is IN_RETURNING, we push select items to + item_list of builtin_select instead of current_select. + But set parsing_place of current_select to true. + + Because parsing_place for builtin_select will be IN_RETURNING, + regardless there is SELECT in RETURNING. Example, if + there is RETURNING (SELECT...), then when we parse + SELECT inside RETURNING, builtin_select->parsing_place + will still be true. So the select items of SELECT inside + RETURNING will be added to item_list of builtin_select which + is incorrect. We want to prevent this from happening. + Since for every new select, a new SELECT_LEX + object is created and pushed to select stack, current_select + will point to SELECT inside RETURNING, and also has + parsing_place not set to IN_RETURNING by default. + So items are correctly added to item_list of SELECT inside + RETURNING instead of builtin_select. + */ + + thd->lex->current_select->parsing_place= IN_RETURNING; + thd->lex->push_context(&thd->lex->returning()->context); + } + select_item_list + { + thd->lex->pop_context(); + thd->lex->current_select->parsing_place= NO_MATTER; + } + ; + +table_wild_list: + table_wild_one + | table_wild_list ',' table_wild_one + ; + +table_wild_one: + ident opt_wild + { + Table_ident *ti= new (thd->mem_root) Table_ident(&$1); + if (unlikely(ti == NULL)) + MYSQL_YYABORT; + if (unlikely(!Select-> + add_table_to_list(thd, + ti, + NULL, + (TL_OPTION_UPDATING | + TL_OPTION_ALIAS), + YYPS->m_lock_type, + YYPS->m_mdl_type))) + MYSQL_YYABORT; + } + | ident '.' ident opt_wild + { + Table_ident *ti= new (thd->mem_root) Table_ident(thd, &$1, &$3, 0); + if (unlikely(ti == NULL)) + MYSQL_YYABORT; + if (unlikely(!Select-> + add_table_to_list(thd, + ti, + NULL, + (TL_OPTION_UPDATING | + TL_OPTION_ALIAS), + YYPS->m_lock_type, + YYPS->m_mdl_type))) + MYSQL_YYABORT; + } + ; + +opt_wild: + /* empty */ {} + | '.' '*' {} + ; + +opt_delete_options: + /* empty */ {} + | opt_delete_option opt_delete_options {} + ; + +opt_delete_option: + QUICK { Select->options|= OPTION_QUICK; } + | LOW_PRIORITY { YYPS->m_lock_type= TL_WRITE_LOW_PRIORITY; } + | IGNORE_SYM { Lex->ignore= 1; } + ; + +truncate: + TRUNCATE_SYM + { + LEX* lex= Lex; + lex->sql_command= SQLCOM_TRUNCATE; + lex->alter_info.reset(); + lex->first_select_lex()->options= 0; + lex->sql_cache= LEX::SQL_CACHE_UNSPECIFIED; + lex->first_select_lex()->order_list.empty(); + YYPS->m_lock_type= TL_WRITE; + YYPS->m_mdl_type= MDL_EXCLUSIVE; + } + opt_table_sym table_name opt_lock_wait_timeout + { + LEX* lex= thd->lex; + DBUG_ASSERT(!lex->m_sql_cmd); + lex->m_sql_cmd= new (thd->mem_root) Sql_cmd_truncate_table(); + if (unlikely(lex->m_sql_cmd == NULL)) + MYSQL_YYABORT; + } + opt_truncate_table_storage_clause { } + ; + +opt_table_sym: + /* empty */ + | TABLE_SYM + ; + +opt_profile_defs: + /* empty */ + | profile_defs; + +profile_defs: + profile_def + | profile_defs ',' profile_def; + +profile_def: + CPU_SYM + { + Lex->profile_options|= PROFILE_CPU; + } + | MEMORY_SYM + { + Lex->profile_options|= PROFILE_MEMORY; + } + | BLOCK_SYM IO_SYM + { + Lex->profile_options|= PROFILE_BLOCK_IO; + } + | CONTEXT_SYM SWITCHES_SYM + { + Lex->profile_options|= PROFILE_CONTEXT; + } + | PAGE_SYM FAULTS_SYM + { + Lex->profile_options|= PROFILE_PAGE_FAULTS; + } + | IPC_SYM + { + Lex->profile_options|= PROFILE_IPC; + } + | SWAPS_SYM + { + Lex->profile_options|= PROFILE_SWAPS; + } + | SOURCE_SYM + { + Lex->profile_options|= PROFILE_SOURCE; + } + | ALL + { + Lex->profile_options|= PROFILE_ALL; + } + ; + +opt_profile_args: + /* empty */ + { + Lex->profile_query_id= 0; + } + | FOR_SYM QUERY_SYM NUM + { + Lex->profile_query_id= atoi($3.str); + } + ; + +/* Show things */ + +show: + SHOW + { + LEX *lex=Lex; + lex->wild=0; + lex->ident= null_clex_str; + if (Lex->main_select_push()) + MYSQL_YYABORT; + lex->init_select(); + lex->current_select->parsing_place= SELECT_LIST; + lex->create_info.init(); + } + show_param + { + Select->parsing_place= NO_MATTER; + Lex->pop_select(); //main select + } + ; + +show_param: + DATABASES wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_DATABASES; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_SCHEMATA))) + MYSQL_YYABORT; + } + | opt_full TABLES opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_TABLES; + lex->first_select_lex()->db= $3; + if (prepare_schema_table(thd, lex, 0, SCH_TABLE_NAMES)) + MYSQL_YYABORT; + } + | opt_full TRIGGERS_SYM opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_TRIGGERS; + lex->first_select_lex()->db= $3; + if (prepare_schema_table(thd, lex, 0, SCH_TRIGGERS)) + MYSQL_YYABORT; + } + | EVENTS_SYM opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_EVENTS; + lex->first_select_lex()->db= $2; + if (prepare_schema_table(thd, lex, 0, SCH_EVENTS)) + MYSQL_YYABORT; + } + | TABLE_SYM STATUS_SYM opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_TABLE_STATUS; + lex->first_select_lex()->db= $3; + if (prepare_schema_table(thd, lex, 0, SCH_TABLES)) + MYSQL_YYABORT; + } + | OPEN_SYM TABLES opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_OPEN_TABLES; + lex->first_select_lex()->db= $3; + if (prepare_schema_table(thd, lex, 0, SCH_OPEN_TABLES)) + MYSQL_YYABORT; + } + | PLUGINS_SYM + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_PLUGINS; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PLUGINS))) + MYSQL_YYABORT; + } + | PLUGINS_SYM SONAME_SYM TEXT_STRING_sys + { + Lex->ident= $3; + Lex->sql_command= SQLCOM_SHOW_PLUGINS; + if (unlikely(prepare_schema_table(thd, Lex, 0, SCH_ALL_PLUGINS))) + MYSQL_YYABORT; + } + | PLUGINS_SYM SONAME_SYM wild_and_where + { + Lex->sql_command= SQLCOM_SHOW_PLUGINS; + if (unlikely(prepare_schema_table(thd, Lex, 0, SCH_ALL_PLUGINS))) + MYSQL_YYABORT; + } + | ENGINE_SYM known_storage_engines show_engine_param + { Lex->create_info.db_type= $2; } + | ENGINE_SYM ALL show_engine_param + { Lex->create_info.db_type= NULL; } + | opt_full COLUMNS from_or_in table_ident opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_FIELDS; + if ($5.str) + $4->change_db(&$5); + if (unlikely(prepare_schema_table(thd, lex, $4, SCH_COLUMNS))) + MYSQL_YYABORT; + } + | master_or_binary LOGS_SYM + { + Lex->sql_command = SQLCOM_SHOW_BINLOGS; + } + | SLAVE HOSTS_SYM + { + Lex->sql_command = SQLCOM_SHOW_SLAVE_HOSTS; + } + | BINLOG_SYM EVENTS_SYM binlog_in binlog_from + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_BINLOG_EVENTS; + } + opt_global_limit_clause + | RELAYLOG_SYM optional_connection_name EVENTS_SYM binlog_in binlog_from + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_RELAYLOG_EVENTS; + } + opt_global_limit_clause optional_for_channel + { } + | keys_or_index from_or_in table_ident opt_db opt_where_clause + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_KEYS; + if ($4.str) + $3->change_db(&$4); + if (unlikely(prepare_schema_table(thd, lex, $3, SCH_STATISTICS))) + MYSQL_YYABORT; + } + | opt_storage ENGINES_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_ENGINES))) + MYSQL_YYABORT; + } + | AUTHORS_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_AUTHORS; + } + | CONTRIBUTORS_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_CONTRIBUTORS; + } + | PRIVILEGES + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_PRIVILEGES; + } + | COUNT_SYM '(' '*' ')' WARNINGS + { + LEX_CSTRING var= {STRING_WITH_LEN("warning_count")}; + (void) create_select_for_variable(thd, &var); + } + | COUNT_SYM '(' '*' ')' ERRORS + { + LEX_CSTRING var= {STRING_WITH_LEN("error_count")}; + (void) create_select_for_variable(thd, &var); + } + | WARNINGS opt_global_limit_clause + { Lex->sql_command = SQLCOM_SHOW_WARNS;} + | ERRORS opt_global_limit_clause + { Lex->sql_command = SQLCOM_SHOW_ERRORS;} + | PROFILES_SYM + { Lex->sql_command = SQLCOM_SHOW_PROFILES; } + | PROFILE_SYM opt_profile_defs opt_profile_args opt_global_limit_clause + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_PROFILE; + if (unlikely(prepare_schema_table(thd, lex, NULL, SCH_PROFILES))) + MYSQL_YYABORT; + } + | opt_var_type STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS; + lex->option_type= $1; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_SESSION_STATUS))) + MYSQL_YYABORT; + } + | opt_full PROCESSLIST_SYM + { Lex->sql_command= SQLCOM_SHOW_PROCESSLIST;} + | opt_var_type VARIABLES wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_VARIABLES; + lex->option_type= $1; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_SESSION_VARIABLES))) + MYSQL_YYABORT; + } + | charset wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_CHARSETS; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_CHARSETS))) + MYSQL_YYABORT; + } + | COLLATION_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_COLLATIONS; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_COLLATIONS))) + MYSQL_YYABORT; + } + | GRANTS + { + Lex->sql_command= SQLCOM_SHOW_GRANTS; + if (unlikely(!(Lex->grant_user= + (LEX_USER*)thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + Lex->grant_user->user= current_user_and_current_role; + } + | GRANTS FOR_SYM user_or_role clear_privileges + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_GRANTS; + lex->grant_user=$3; + } + | CREATE DATABASE opt_if_not_exists ident + { + Lex->set_command(SQLCOM_SHOW_CREATE_DB, $3); + Lex->name= $4; + } + | CREATE TABLE_SYM table_ident + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE; + if (!lex->first_select_lex()->add_table_to_list(thd, $3, NULL,0)) + MYSQL_YYABORT; + lex->create_info.storage_media= HA_SM_DEFAULT; + } + | CREATE VIEW_SYM table_ident + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE; + if (!lex->first_select_lex()->add_table_to_list(thd, $3, NULL, 0)) + MYSQL_YYABORT; + lex->table_type= TABLE_TYPE_VIEW; + } + | CREATE SEQUENCE_SYM table_ident + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE; + if (!lex->first_select_lex()->add_table_to_list(thd, $3, NULL, 0)) + MYSQL_YYABORT; + lex->table_type= TABLE_TYPE_SEQUENCE; + } + | BINLOG_SYM STATUS_SYM + { + Lex->sql_command = SQLCOM_SHOW_BINLOG_STAT; + } + | MASTER_SYM STATUS_SYM + { + Lex->sql_command = SQLCOM_SHOW_BINLOG_STAT; + } + | ALL SLAVES STATUS_SYM + { + if (!(Lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_show_slave_status(true))) + MYSQL_YYABORT; + Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT; + } + | SLAVE optional_connection_name STATUS_SYM optional_for_channel + { + if (!(Lex->m_sql_cmd= new (thd->mem_root) + Sql_cmd_show_slave_status())) + MYSQL_YYABORT; + Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT; + } + | CREATE PROCEDURE_SYM sp_name + { + LEX *lex= Lex; + + lex->sql_command = SQLCOM_SHOW_CREATE_PROC; + lex->spname= $3; + } + | CREATE FUNCTION_SYM sp_name + { + LEX *lex= Lex; + + lex->sql_command = SQLCOM_SHOW_CREATE_FUNC; + lex->spname= $3; + } + | CREATE PACKAGE_MARIADB_SYM sp_name + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE_PACKAGE; + lex->spname= $3; + } + | CREATE PACKAGE_ORACLE_SYM sp_name + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE_PACKAGE; + lex->spname= $3; + } + | CREATE PACKAGE_MARIADB_SYM BODY_MARIADB_SYM sp_name + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE_PACKAGE_BODY; + lex->spname= $4; + } + | CREATE PACKAGE_ORACLE_SYM BODY_ORACLE_SYM sp_name + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_CREATE_PACKAGE_BODY; + lex->spname= $4; + } + | CREATE TRIGGER_SYM sp_name + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_CREATE_TRIGGER; + lex->spname= $3; + } + | CREATE USER_SYM + { + Lex->sql_command= SQLCOM_SHOW_CREATE_USER; + if (unlikely(!(Lex->grant_user= + (LEX_USER*)thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + Lex->grant_user->user= current_user; + } + | CREATE USER_SYM user + { + Lex->sql_command= SQLCOM_SHOW_CREATE_USER; + Lex->grant_user= $3; + } + | PROCEDURE_SYM STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS_PROC; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PROCEDURES))) + MYSQL_YYABORT; + } + | FUNCTION_SYM STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS_FUNC; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PROCEDURES))) + MYSQL_YYABORT; + } + | PACKAGE_MARIADB_SYM STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS_PACKAGE; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PROCEDURES))) + MYSQL_YYABORT; + } + | PACKAGE_ORACLE_SYM STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS_PACKAGE; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PROCEDURES))) + MYSQL_YYABORT; + } + | PACKAGE_MARIADB_SYM BODY_MARIADB_SYM STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS_PACKAGE_BODY; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PROCEDURES))) + MYSQL_YYABORT; + } + | PACKAGE_ORACLE_SYM BODY_ORACLE_SYM STATUS_SYM wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_STATUS_PACKAGE_BODY; + if (unlikely(prepare_schema_table(thd, lex, 0, SCH_PROCEDURES))) + MYSQL_YYABORT; + } + | PROCEDURE_SYM CODE_SYM sp_name + { + Lex->sql_command= SQLCOM_SHOW_PROC_CODE; + Lex->spname= $3; + } + | FUNCTION_SYM CODE_SYM sp_name + { + Lex->sql_command= SQLCOM_SHOW_FUNC_CODE; + Lex->spname= $3; + } + | PACKAGE_MARIADB_SYM BODY_MARIADB_SYM CODE_SYM sp_name + { + Lex->sql_command= SQLCOM_SHOW_PACKAGE_BODY_CODE; + Lex->spname= $4; + } + | PACKAGE_ORACLE_SYM BODY_ORACLE_SYM CODE_SYM sp_name + { + Lex->sql_command= SQLCOM_SHOW_PACKAGE_BODY_CODE; + Lex->spname= $4; + } + | CREATE EVENT_SYM sp_name + { + Lex->spname= $3; + Lex->sql_command = SQLCOM_SHOW_CREATE_EVENT; + } + | describe_command opt_format_json FOR_SYM expr + /* + The alternaltive syntax for this command is MySQL-compatible + EXPLAIN FOR CONNECTION + */ + { + Lex->sql_command= SQLCOM_SHOW_EXPLAIN; + if (unlikely(prepare_schema_table(thd, Lex, 0, + Lex->explain_json ? SCH_EXPLAIN_JSON : SCH_EXPLAIN_TABULAR))) + MYSQL_YYABORT; + add_value_to_list(thd, $4); + } + | ANALYZE_SYM opt_format_json FOR_SYM expr + { + Lex->sql_command= SQLCOM_SHOW_ANALYZE; + if (unlikely(prepare_schema_table(thd, Lex, 0, + Lex->explain_json ? SCH_ANALYZE_JSON : SCH_ANALYZE_TABULAR))) + MYSQL_YYABORT; + add_value_to_list(thd, $4); + } + | IDENT_sys remember_tok_start wild_and_where + { + LEX *lex= Lex; + bool in_plugin; + lex->sql_command= SQLCOM_SHOW_GENERIC; + ST_SCHEMA_TABLE *table= find_schema_table(thd, &$1, &in_plugin); + if (unlikely(!table || !table->old_format || !in_plugin)) + { + thd->parse_error(ER_SYNTAX_ERROR, $2); + MYSQL_YYABORT; + } + if (unlikely(lex->wild && table->idx_field1 < 0)) + { + thd->parse_error(ER_SYNTAX_ERROR, $3); + MYSQL_YYABORT; + } + if (unlikely(make_schema_select(thd, Lex->current_select, table))) + MYSQL_YYABORT; + } + ; + +show_engine_param: + STATUS_SYM + { Lex->sql_command= SQLCOM_SHOW_ENGINE_STATUS; } + | MUTEX_SYM + { Lex->sql_command= SQLCOM_SHOW_ENGINE_MUTEX; } + | LOGS_SYM + { Lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; } + ; + +master_or_binary: + MASTER_SYM + | BINARY + ; + +opt_storage: + /* empty */ + | STORAGE_SYM + ; + +opt_db: + /* empty */ { $$= null_clex_str; } + | from_or_in ident { $$= $2; } + ; + +opt_full: + /* empty */ { Lex->verbose=0; } + | FULL { Lex->verbose=1; } + ; + +from_or_in: + FROM + | IN_SYM + ; + +binlog_in: + /* empty */ { Lex->mi.log_file_name = 0; } + | IN_SYM TEXT_STRING_sys { Lex->mi.log_file_name = $2.str; } + ; + +binlog_from: + /* empty */ { Lex->mi.pos = 4; /* skip magic number */ } + | FROM ulonglong_num { Lex->mi.pos = $2; } + ; + +wild_and_where: + /* empty */ { $$= 0; } + | LIKE remember_tok_start TEXT_STRING_sys + { + Lex->wild= new (thd->mem_root) String((const char*) $3.str, + $3.length, + system_charset_info); + if (unlikely(Lex->wild == NULL)) + MYSQL_YYABORT; + $$= $2; + } + | WHERE remember_tok_start expr + { + Select->where= normalize_cond(thd, $3); + if ($3) + $3->top_level_item(); + $$= $2; + } + ; + +/* A Oracle compatible synonym for show */ +describe: + describe_command table_ident + { + LEX *lex= Lex; + if (lex->main_select_push()) + MYSQL_YYABORT; + lex->init_select(); + lex->current_select->parsing_place= SELECT_LIST; + lex->sql_command= SQLCOM_SHOW_FIELDS; + lex->first_select_lex()->db= null_clex_str; + lex->verbose= 0; + if (unlikely(prepare_schema_table(thd, lex, $2, SCH_COLUMNS))) + MYSQL_YYABORT; + } + opt_describe_column + { + Select->parsing_place= NO_MATTER; + Lex->pop_select(); //main select + } + | describe_command opt_extended_describe + { Lex->describe|= DESCRIBE_NORMAL; } + explainable_command + { + LEX *lex=Lex; + lex->first_select_lex()->options|= SELECT_DESCRIBE; + } + ; + +explainable_command: + select + | select_into + | insert + | replace + | update + | delete + ; + +describe_command: + DESC + | DESCRIBE + ; + +analyze_stmt_command: + ANALYZE_SYM opt_format_json explainable_command + { + Lex->analyze_stmt= true; + } + ; + +opt_extended_describe: + EXTENDED_SYM { Lex->describe|= DESCRIBE_EXTENDED; } + | EXTENDED_SYM ALL + { Lex->describe|= DESCRIBE_EXTENDED | DESCRIBE_EXTENDED2; } + | PARTITIONS_SYM { Lex->describe|= DESCRIBE_PARTITIONS; } + | opt_format_json {} + ; + +opt_format_json: + /* empty */ {} + | FORMAT_SYM '=' ident_or_text + { + if (lex_string_eq(&$3, STRING_WITH_LEN("JSON"))) + Lex->explain_json= true; + else if (lex_string_eq(&$3, STRING_WITH_LEN("TRADITIONAL"))) + DBUG_ASSERT(Lex->explain_json==false); + else + my_yyabort_error((ER_UNKNOWN_EXPLAIN_FORMAT, MYF(0), + "EXPLAIN/ANALYZE", $3.str)); + } + ; + +opt_describe_column: + /* empty */ {} + | text_string { Lex->wild= $1; } + | ident + { + Lex->wild= new (thd->mem_root) String((const char*) $1.str, + $1.length, + system_charset_info); + if (unlikely(Lex->wild == NULL)) + MYSQL_YYABORT; + } + ; + +explain_for_connection: + /* + EXPLAIN FOR CONNECTION is an alternative syntax for + SHOW EXPLAIN FOR command. It was introduced for compatibility + with MySQL which implements EXPLAIN FOR CONNECTION command + */ + describe_command opt_format_json FOR_SYM CONNECTION_SYM expr + { + LEX *lex=Lex; + lex->wild=0; + lex->ident= null_clex_str; + if (Lex->main_select_push()) + MYSQL_YYABORT; + lex->init_select(); + lex->current_select->parsing_place= SELECT_LIST; + lex->create_info.init(); + Select->parsing_place= NO_MATTER; + Lex->pop_select(); //main select + Lex->sql_command= SQLCOM_SHOW_EXPLAIN; + if (unlikely(prepare_schema_table(thd, Lex, 0, + Lex->explain_json ? SCH_EXPLAIN_JSON : SCH_EXPLAIN_TABULAR))) + MYSQL_YYABORT; + add_value_to_list(thd, $5); + } + ; + +/* flush things */ + +flush: + FLUSH_SYM opt_no_write_to_binlog + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_FLUSH; + lex->type= 0; + lex->no_write_to_binlog= $2; + } + flush_options {} + ; + +flush_options: + table_or_tables + { + Lex->type|= REFRESH_TABLES; + /* + Set type of metadata and table locks for + FLUSH TABLES table_list [WITH READ LOCK]. + */ + YYPS->m_lock_type= TL_READ_NO_INSERT; + YYPS->m_mdl_type= MDL_SHARED_HIGH_PRIO; + } + opt_table_list opt_flush_lock + {} + | flush_options_list + {} + ; + +opt_flush_lock: + /* empty */ {} + | flush_lock + { + TABLE_LIST *tables= Lex->query_tables; + for (; tables; tables= tables->next_global) + { + tables->mdl_request.set_type(MDL_SHARED_NO_WRITE); + /* Ignore temporary tables. */ + tables->open_type= OT_BASE_ONLY; + } + } + ; + +flush_lock: + WITH READ_SYM LOCK_SYM optional_flush_tables_arguments + { Lex->type|= REFRESH_READ_LOCK | $4; } + | FOR_SYM + { + if (unlikely(Lex->query_tables == NULL)) + { + // Table list can't be empty + thd->parse_error(ER_NO_TABLES_USED); + MYSQL_YYABORT; + } + Lex->type|= REFRESH_FOR_EXPORT; + } EXPORT_SYM {} + ; + +flush_options_list: + flush_options_list ',' flush_option + | flush_option + {} + ; + +flush_option: + ERROR_SYM LOGS_SYM + { Lex->type|= REFRESH_ERROR_LOG; } + | ENGINE_SYM LOGS_SYM + { Lex->type|= REFRESH_ENGINE_LOG; } + | GENERAL LOGS_SYM + { Lex->type|= REFRESH_GENERAL_LOG; } + | SLOW LOGS_SYM + { Lex->type|= REFRESH_SLOW_LOG; } + | BINARY LOGS_SYM opt_delete_gtid_domain + { Lex->type|= REFRESH_BINARY_LOG; } + | RELAY LOGS_SYM optional_connection_name optional_for_channel + { + LEX *lex= Lex; + if (unlikely(lex->type & REFRESH_RELAY_LOG)) + my_yyabort_error((ER_WRONG_USAGE, MYF(0), "FLUSH", "RELAY LOGS")); + lex->type|= REFRESH_RELAY_LOG; + lex->relay_log_connection_name= lex->mi.connection_name; + } + | QUERY_SYM CACHE_SYM + { Lex->type|= REFRESH_QUERY_CACHE_FREE; } + | HOSTS_SYM + { Lex->type|= REFRESH_HOSTS; } + | PRIVILEGES + { Lex->type|= REFRESH_GRANT; } + | LOGS_SYM + { + Lex->type|= REFRESH_LOG; + Lex->relay_log_connection_name= empty_clex_str; + } + | STATUS_SYM + { Lex->type|= REFRESH_STATUS; } + | SLAVE optional_connection_name + { + LEX *lex= Lex; + if (unlikely(lex->type & REFRESH_SLAVE)) + my_yyabort_error((ER_WRONG_USAGE, MYF(0), "FLUSH","SLAVE")); + lex->type|= REFRESH_SLAVE; + lex->reset_slave_info.all= false; + } + | MASTER_SYM + { Lex->type|= REFRESH_MASTER; } + | DES_KEY_FILE + { Lex->type|= REFRESH_DES_KEY_FILE; } + | RESOURCES + { Lex->type|= REFRESH_USER_RESOURCES; } + | SSL_SYM + { Lex->type|= REFRESH_SSL;} + | THREADS_SYM + { Lex->type|= REFRESH_THREADS;} + | IDENT_sys remember_tok_start + { + Lex->type|= REFRESH_GENERIC; + ST_SCHEMA_TABLE *table= find_schema_table(thd, &$1); + if (unlikely(!table || !table->reset_table)) + { + thd->parse_error(ER_SYNTAX_ERROR, $2); + MYSQL_YYABORT; + } + if (unlikely(Lex->view_list.push_back((LEX_CSTRING*) + thd->memdup(&$1, sizeof(LEX_CSTRING)), + thd->mem_root))) + MYSQL_YYABORT; + } + ; + +opt_table_list: + /* empty */ {} + | table_list {} + ; + +backup: + BACKUP_SYM backup_statements {} + ; + +backup_statements: + STAGE_SYM ident + { + int type; + if (unlikely(Lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "BACKUP STAGE")); + if ((type= find_type($2.str, &backup_stage_names, + FIND_TYPE_NO_PREFIX)) <= 0) + my_yyabort_error((ER_BACKUP_UNKNOWN_STAGE, MYF(0), $2.str)); + Lex->sql_command= SQLCOM_BACKUP; + Lex->backup_stage= (backup_stages) (type-1); + break; + } + | LOCK_SYM + { + if (unlikely(Lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "BACKUP LOCK")); + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + table_ident + { + if (unlikely(!Select->add_table_to_list(thd, $3, NULL, 0, + TL_READ, MDL_SHARED_HIGH_PRIO))) + MYSQL_YYABORT; + Lex->sql_command= SQLCOM_BACKUP_LOCK; + Lex->pop_select(); //main select + } + | UNLOCK_SYM + { + if (unlikely(Lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "BACKUP UNLOCK")); + /* Table list is empty for unlock */ + Lex->sql_command= SQLCOM_BACKUP_LOCK; + } + ; + +opt_delete_gtid_domain: + /* empty */ {} + | DELETE_DOMAIN_ID_SYM '=' '(' delete_domain_id_list ')' + {} + ; +delete_domain_id_list: + /* Empty */ + | delete_domain_id + | delete_domain_id_list ',' delete_domain_id + ; + +delete_domain_id: + ulonglong_num + { + uint32 value= (uint32) $1; + if ($1 > UINT_MAX32) + { + my_printf_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, + "The value of gtid domain being deleted ('%llu') " + "exceeds its maximum size " + "of 32 bit unsigned integer", MYF(0), $1); + MYSQL_YYABORT; + } + insert_dynamic(&Lex->delete_gtid_domain, (uchar*) &value); + } + ; + +optional_flush_tables_arguments: + /* empty */ {$$= 0;} + | AND_SYM DISABLE_SYM CHECKPOINT_SYM {$$= REFRESH_CHECKPOINT; } + ; + +reset: + RESET_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_RESET; lex->type=0; + } + reset_options + {} + ; + +reset_options: + reset_options ',' reset_option + | reset_option + ; + +reset_option: + SLAVE { Lex->type|= REFRESH_SLAVE; } + optional_connection_name + slave_reset_options optional_for_channel + { } + | MASTER_SYM + { + Lex->type|= REFRESH_MASTER; + Lex->next_binlog_file_number= 0; + } + master_reset_options + | QUERY_SYM CACHE_SYM { Lex->type|= REFRESH_QUERY_CACHE;} + ; + +slave_reset_options: + /* empty */ { Lex->reset_slave_info.all= false; } + | ALL { Lex->reset_slave_info.all= true; } + ; + +master_reset_options: + /* empty */ {} + | TO_SYM ulong_num + { + Lex->next_binlog_file_number = $2; + } + ; + +purge: + PURGE master_or_binary LOGS_SYM TO_SYM TEXT_STRING_sys + { + Lex->stmt_purge_to($5); + } + | PURGE master_or_binary LOGS_SYM BEFORE_SYM + { Lex->clause_that_disallows_subselect= "PURGE..BEFORE"; } + expr + { + Lex->clause_that_disallows_subselect= NULL; + if (Lex->stmt_purge_before($6)) + MYSQL_YYABORT; + } + ; + + +/* kill threads */ + +kill: + KILL_SYM + { + LEX *lex=Lex; + lex->value_list.empty(); + lex->users_list.empty(); + lex->sql_command= SQLCOM_KILL; + lex->kill_type= KILL_TYPE_ID; + } + kill_type kill_option + { + Lex->kill_signal= (killed_state) ($3 | $4); + } + ; + +kill_type: + /* Empty */ { $$= (int) KILL_HARD_BIT; } + | HARD_SYM { $$= (int) KILL_HARD_BIT; } + | SOFT_SYM { $$= 0; } + ; + +kill_option: + opt_connection kill_expr { $$= (int) KILL_CONNECTION; } + | QUERY_SYM kill_expr { $$= (int) KILL_QUERY; } + | QUERY_SYM ID_SYM expr + { + $$= (int) KILL_QUERY; + Lex->kill_type= KILL_TYPE_QUERY; + Lex->value_list.push_front($3, thd->mem_root); + } + ; + +opt_connection: + /* empty */ { } + | CONNECTION_SYM { } + ; + +kill_expr: + expr + { + Lex->value_list.push_front($$, thd->mem_root); + } + | USER_SYM user + { + Lex->users_list.push_back($2, thd->mem_root); + Lex->kill_type= KILL_TYPE_USER; + } + ; + +shutdown: + SHUTDOWN { Lex->sql_command= SQLCOM_SHUTDOWN; } + shutdown_option {} + ; + +shutdown_option: + /* Empty */ { Lex->is_shutdown_wait_for_slaves= false; } + | WAIT_SYM FOR_SYM ALL SLAVES + { + Lex->is_shutdown_wait_for_slaves= true; + } + ; + +/* change database */ + +use: + USE_SYM ident + { + LEX *lex=Lex; + lex->sql_command=SQLCOM_CHANGE_DB; + lex->first_select_lex()->db= $2; + } + ; + +/* import, export of files */ + +load: + LOAD data_or_xml + { + LEX *lex= thd->lex; + + if (unlikely(lex->sphead)) + { + my_error(ER_SP_BADSTATEMENT, MYF(0), + $2 == FILETYPE_CSV ? "LOAD DATA" : "LOAD XML"); + MYSQL_YYABORT; + } + if (lex->main_select_push()) + MYSQL_YYABORT; + lex->init_select(); + } + load_data_lock opt_local INFILE TEXT_STRING_filesystem + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_LOAD; + lex->local_file= $5; + lex->duplicates= DUP_ERROR; + lex->ignore= 0; + if (unlikely(!(lex->exchange= new (thd->mem_root) + sql_exchange($7.str, 0, $2)))) + MYSQL_YYABORT; + } + opt_duplicate INTO TABLE_SYM table_ident opt_use_partition + { + LEX *lex=Lex; + if (unlikely(!Select->add_table_to_list(thd, $12, NULL, + TL_OPTION_UPDATING, + $4, MDL_SHARED_WRITE, + NULL, $13))) + MYSQL_YYABORT; + lex->field_list.empty(); + lex->update_list.empty(); + lex->value_list.empty(); + lex->many_values.empty(); + } + opt_load_data_charset + { Lex->exchange->cs= $15; } + opt_xml_rows_identified_by + opt_field_term opt_line_term opt_ignore_lines opt_field_or_var_spec + opt_load_data_set_spec + stmt_end + { + Lex->mark_first_table_as_inserting(); + } + ; + +data_or_xml: + DATA_SYM { $$= FILETYPE_CSV; } + | XML_SYM { $$= FILETYPE_XML; } + ; + +opt_local: + /* empty */ { $$=0;} + | LOCAL_SYM { $$=1;} + ; + +load_data_lock: + /* empty */ { $$= TL_WRITE_DEFAULT; } + | CONCURRENT + { + /* + Ignore this option in SP to avoid problem with query cache and + triggers with non default priority locks + */ + $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT); + } + | LOW_PRIORITY { $$= TL_WRITE_LOW_PRIORITY; } + ; + +opt_duplicate: + /* empty */ { Lex->duplicates=DUP_ERROR; } + | REPLACE { Lex->duplicates=DUP_REPLACE; } + | IGNORE_SYM { Lex->ignore= 1; } + ; + +opt_field_term: + /* empty */ + | COLUMNS field_term_list + ; + +field_term_list: + field_term_list field_term + | field_term + ; + +field_term: + TERMINATED BY text_string + { + DBUG_ASSERT(Lex->exchange != 0); + Lex->exchange->field_term= $3; + } + | OPTIONALLY ENCLOSED BY text_string + { + LEX *lex= Lex; + DBUG_ASSERT(lex->exchange != 0); + lex->exchange->enclosed= $4; + lex->exchange->opt_enclosed= 1; + } + | ENCLOSED BY text_string + { + DBUG_ASSERT(Lex->exchange != 0); + Lex->exchange->enclosed= $3; + } + | ESCAPED BY text_string + { + DBUG_ASSERT(Lex->exchange != 0); + Lex->exchange->escaped= $3; + } + ; + +opt_line_term: + /* empty */ + | LINES line_term_list + ; + +line_term_list: + line_term_list line_term + | line_term + ; + +line_term: + TERMINATED BY text_string + { + DBUG_ASSERT(Lex->exchange != 0); + Lex->exchange->line_term= $3; + } + | STARTING BY text_string + { + DBUG_ASSERT(Lex->exchange != 0); + Lex->exchange->line_start= $3; + } + ; + +opt_xml_rows_identified_by: + /* empty */ { } + | ROWS_SYM IDENTIFIED_SYM BY text_string + { Lex->exchange->line_term = $4; } + ; + +opt_ignore_lines: + /* empty */ + | IGNORE_SYM NUM lines_or_rows + { + DBUG_ASSERT(Lex->exchange != 0); + Lex->exchange->skip_lines= atol($2.str); + } + ; + +lines_or_rows: + LINES { } + | ROWS_SYM { } + ; + +opt_field_or_var_spec: + /* empty */ {} + | '(' fields_or_vars ')' {} + | '(' ')' {} + ; + +fields_or_vars: + fields_or_vars ',' field_or_var + { Lex->field_list.push_back($3, thd->mem_root); } + | field_or_var + { Lex->field_list.push_back($1, thd->mem_root); } + ; + +field_or_var: + simple_ident_nospvar {$$= $1;} + | '@' ident_or_text + { + if (!$2.length) + { + thd->parse_error(); + MYSQL_YYABORT; + } + + $$= new (thd->mem_root) Item_user_var_as_out_param(thd, &$2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +opt_load_data_set_spec: + /* empty */ {} + | SET load_data_set_list {} + ; + +load_data_set_list: + load_data_set_list ',' load_data_set_elem + | load_data_set_elem + ; + +load_data_set_elem: + simple_ident_nospvar equal remember_name expr_or_ignore_or_default remember_end + { + LEX *lex= Lex; + if (unlikely(lex->update_list.push_back($1, thd->mem_root)) || + unlikely(lex->value_list.push_back($4, thd->mem_root))) + MYSQL_YYABORT; + $4->set_name_no_truncate(thd, $3, (uint) ($5 - $3), thd->charset()); + } + ; + +/* Common definitions */ + +text_literal: + TEXT_STRING + { + if (unlikely(!($$= thd->make_string_literal($1)))) + MYSQL_YYABORT; + } + | NCHAR_STRING + { + if (unlikely(!($$= thd->make_string_literal_nchar($1)))) + MYSQL_YYABORT; + } + | UNDERSCORE_CHARSET TEXT_STRING + { + if (unlikely(!($$= thd->make_string_literal_charset($2, $1)))) + MYSQL_YYABORT; + } + | text_literal TEXT_STRING_literal + { + if (unlikely(!($$= $1->make_string_literal_concat(thd, &$2)))) + MYSQL_YYABORT; + } + ; + +text_string: + TEXT_STRING_literal + { + $$= new (thd->mem_root) String((const char*) $1.str, + $1.length, + thd->variables.collation_connection); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | hex_or_bin_String { $$= $1; } + ; + + +hex_or_bin_String: + HEX_NUM + { + Item *tmp= new (thd->mem_root) Item_hex_hybrid(thd, $1.str, + $1.length); + if (unlikely(tmp == NULL)) + MYSQL_YYABORT; + $$= tmp->val_str((String*) 0); + } + | HEX_STRING + { + Item *tmp= new (thd->mem_root) Item_hex_string(thd, $1.str, + $1.length); + if (unlikely(tmp == NULL)) + MYSQL_YYABORT; + $$= tmp->val_str((String*) 0); + } + | BIN_NUM + { + Item *tmp= new (thd->mem_root) Item_bin_string(thd, $1.str, + $1.length); + if (unlikely(tmp == NULL)) + MYSQL_YYABORT; + /* + it is OK only emulate fix_fields, because we need only + value of constant + */ + $$= tmp->val_str((String*) 0); + } + ; + +param_marker: + PARAM_MARKER + { + if (unlikely(!($$= Lex->add_placeholder(thd, ¶m_clex_str, + YYLIP->get_tok_start(), + YYLIP->get_tok_start() + 1)))) + MYSQL_YYABORT; + } + | COLON_ORACLE_SYM ident_cli + { + if (unlikely(!($$= Lex->add_placeholder(thd, &null_clex_str, + $1.pos(), $2.end())))) + MYSQL_YYABORT; + } + | COLON_ORACLE_SYM NUM + { + if (unlikely(!($$= Lex->add_placeholder(thd, &null_clex_str, + $1.pos(), + YYLIP->get_ptr())))) + MYSQL_YYABORT; + } + ; + +signed_literal: + '+' NUM_literal { $$ = $2; } + | '-' NUM_literal + { + $2->max_length++; + $$= $2->neg(thd); + } + ; + +literal: + text_literal { $$ = $1; } + | NUM_literal { $$ = $1; } + | temporal_literal { $$= $1; } + | NULL_SYM + { + /* + For the digest computation, in this context only, + NULL is considered a literal, hence reduced to '?' + REDUCE: + TOK_GENERIC_VALUE := NULL_SYM + */ + YYLIP->reduce_digest_token(TOK_GENERIC_VALUE, NULL_SYM); + $$= new (thd->mem_root) Item_null(thd); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + YYLIP->next_state= MY_LEX_OPERATOR_OR_IDENT; + } + | FALSE_SYM + { + $$= new (thd->mem_root) Item_bool(thd, (char*) "FALSE",0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | TRUE_SYM + { + $$= new (thd->mem_root) Item_bool(thd, (char*) "TRUE",1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | HEX_NUM + { + $$= new (thd->mem_root) Item_hex_hybrid(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | HEX_STRING + { + $$= new (thd->mem_root) Item_hex_string(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | BIN_NUM + { + $$= new (thd->mem_root) Item_bin_string(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | UNDERSCORE_CHARSET hex_or_bin_String + { + Item_string_with_introducer *item_str; + LEX_CSTRING tmp; + $2->get_value(&tmp); + /* + Pass NULL as name. Name will be set in the "select_item" rule and + will include the introducer and the original hex/bin notation. + */ + item_str= new (thd->mem_root) + Item_string_with_introducer(thd, null_clex_str, + tmp, $1); + if (unlikely(!item_str || + !item_str->check_well_formed_result(true))) + MYSQL_YYABORT; + + $$= item_str; + } + ; + +NUM_literal: + NUM + { + int error; + $$= new (thd->mem_root) + Item_int(thd, $1.str, + (longlong) my_strtoll10($1.str, NULL, &error), + $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | LONG_NUM + { + int error; + $$= new (thd->mem_root) + Item_int(thd, $1.str, + (longlong) my_strtoll10($1.str, NULL, &error), + $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ULONGLONG_NUM + { + $$= new (thd->mem_root) Item_uint(thd, $1.str, $1.length); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | DECIMAL_NUM + { + $$= new (thd->mem_root) Item_decimal(thd, $1.str, $1.length, + thd->charset()); + if (unlikely($$ == NULL) || unlikely(thd->is_error())) + MYSQL_YYABORT; + } + | FLOAT_NUM + { + $$= new (thd->mem_root) Item_float(thd, $1.str, $1.length); + if (unlikely($$ == NULL) || unlikely(thd->is_error())) + MYSQL_YYABORT; + } + ; + + +temporal_literal: + DATE_SYM TEXT_STRING + { + if (unlikely(!($$= type_handler_newdate.create_literal_item(thd, + $2.str, $2.length, + YYCSCL, true)))) + MYSQL_YYABORT; + } + | TIME_SYM TEXT_STRING + { + if (unlikely(!($$= type_handler_time2.create_literal_item(thd, + $2.str, $2.length, + YYCSCL, true)))) + MYSQL_YYABORT; + } + | TIMESTAMP TEXT_STRING + { + if (unlikely(!($$= type_handler_datetime.create_literal_item(thd, + $2.str, $2.length, + YYCSCL, true)))) + MYSQL_YYABORT; + } + ; + +with_clause: + WITH opt_recursive + { + LEX *lex= Lex; + With_clause *with_clause= + new With_clause($2, Lex->curr_with_clause); + if (unlikely(with_clause == NULL)) + MYSQL_YYABORT; + lex->derived_tables|= DERIVED_WITH; + lex->with_cte_resolution= true; + lex->curr_with_clause= with_clause; + with_clause->add_to_list(&lex->with_clauses_list, + lex->with_clauses_list_last_next); + if (lex->current_select && + lex->current_select->parsing_place == BEFORE_OPT_LIST) + lex->current_select->parsing_place= NO_MATTER; + } + with_list + { + $$= Lex->curr_with_clause; + Lex->curr_with_clause= Lex->curr_with_clause->pop(); + } + ; + + +opt_recursive: + /*empty*/ { $$= 0; } + | RECURSIVE_SYM { $$= 1; } + ; + + +with_list: + with_list_element + | with_list ',' with_list_element + ; + + +with_list_element: + with_element_head + opt_with_column_list + AS '(' query_expression ')' opt_cycle + { + LEX *lex= thd->lex; + const char *query_start= lex->sphead ? lex->sphead->m_tmp_query + : thd->query(); + const char *spec_start= $4.pos() + 1; + With_element *elem= new With_element($1, *$2, $5); + if (elem == NULL || Lex->curr_with_clause->add_with_element(elem)) + MYSQL_YYABORT; + if (elem->set_unparsed_spec(thd, spec_start, $6.pos(), + spec_start - query_start)) + MYSQL_YYABORT; + if ($7) + { + elem->set_cycle_list($7); + } + elem->set_tables_end_pos(lex->query_tables_last); + } + ; + +opt_cycle: + /* empty */ + { $$= NULL; } + | + CYCLE_SYM + { + if (!Lex->curr_with_clause->with_recursive) + { + thd->parse_error(ER_SYNTAX_ERROR, $1.pos()); + } + } + comma_separated_ident_list RESTRICT + { + $$= $3; + } + ; + + +opt_with_column_list: + /* empty */ + { + if (($$= new (thd->mem_root) List) == NULL) + MYSQL_YYABORT; + } + | '(' with_column_list ')' + { $$= $2; } + ; + +with_column_list: + comma_separated_ident_list + ; + +ident_sys_alloc: + ident_cli + { + void *buf= thd->alloc(sizeof(Lex_ident_sys)); + if (!buf) + MYSQL_YYABORT; + $$= new (buf) Lex_ident_sys(thd, &$1); + } + ; + +comma_separated_ident_list: + ident_sys_alloc + { + $$= new (thd->mem_root) List; + if (unlikely($$ == NULL || $$->push_back($1))) + MYSQL_YYABORT; + } + | comma_separated_ident_list ',' ident_sys_alloc + { + if (($$= $1)->push_back($3)) + MYSQL_YYABORT; + } + ; + + +with_element_head: + ident + { + LEX_CSTRING *name= + (LEX_CSTRING *) thd->memdup(&$1, sizeof(LEX_CSTRING)); + $$= new (thd->mem_root) With_element_head(name); + if (unlikely(name == NULL || $$ == NULL)) + MYSQL_YYABORT; + $$->tables_pos.set_start_pos(Lex->query_tables_last); + } + ; + + + +/********************************************************************** +** Creating different items. +**********************************************************************/ + +insert_ident: + simple_ident_nospvar { $$=$1; } + | table_wild { $$=$1; } + ; + +table_wild: + ident '.' '*' + { + if (unlikely(!($$= Lex->create_item_qualified_asterisk(thd, &$1)))) + MYSQL_YYABORT; + } + | ident '.' ident '.' '*' + { + if (unlikely(!($$= Lex->create_item_qualified_asterisk(thd, &$1, &$3)))) + MYSQL_YYABORT; + } + ; + +select_sublist_qualified_asterisk: + ident_cli '.' '*' + { + if (unlikely(!($$= Lex->create_item_qualified_asterisk(thd, &$1)))) + MYSQL_YYABORT; + } + | ident_cli '.' ident_cli '.' '*' + { + if (unlikely(!($$= Lex->create_item_qualified_asterisk(thd, &$1, &$3)))) + MYSQL_YYABORT; + } + ; + +order_ident: + expr { $$=$1; } + ; + + +simple_ident: + ident_cli + { + if (unlikely(!($$= Lex->create_item_ident(thd, &$1)))) + MYSQL_YYABORT; + } + | ident_cli '.' ident_cli + { + if (unlikely(!($$= Lex->create_item_ident(thd, &$1, &$3)))) + MYSQL_YYABORT; + } + | '.' ident_cli '.' ident_cli + { + Lex_ident_cli empty($2.pos(), 0); + if (unlikely(!($$= Lex->create_item_ident(thd, &empty, &$2, &$4)))) + MYSQL_YYABORT; + } + | ident_cli '.' ident_cli '.' ident_cli + { + if (unlikely(!($$= Lex->create_item_ident(thd, &$1, &$3, &$5)))) + MYSQL_YYABORT; + } + | COLON_ORACLE_SYM ident_cli '.' ident_cli + { + if (unlikely(!($$= Lex->make_item_colon_ident_ident(thd, &$2, &$4)))) + MYSQL_YYABORT; + } + ; + +simple_ident_nospvar: + ident + { + if (unlikely(!($$= Lex->create_item_ident_nosp(thd, &$1)))) + MYSQL_YYABORT; + } + | ident '.' ident + { + if (unlikely(!($$= Lex->create_item_ident_nospvar(thd, &$1, &$3)))) + MYSQL_YYABORT; + } + | COLON_ORACLE_SYM ident_cli '.' ident_cli + { + if (unlikely(!($$= Lex->make_item_colon_ident_ident(thd, &$2, &$4)))) + MYSQL_YYABORT; + } + | '.' ident '.' ident + { + Lex_ident_sys none; + if (unlikely(!($$= Lex->create_item_ident(thd, &none, &$2, &$4)))) + MYSQL_YYABORT; + } + | ident '.' ident '.' ident + { + if (unlikely(!($$= Lex->create_item_ident(thd, &$1, &$3, &$5)))) + MYSQL_YYABORT; + } + ; + +field_ident: + ident { $$=$1;} + | ident '.' ident '.' ident + { + TABLE_LIST *table= Select->table_list.first; + if (unlikely(my_strcasecmp(table_alias_charset, $1.str, + table->db.str))) + my_yyabort_error((ER_WRONG_DB_NAME, MYF(0), $1.str)); + if (unlikely(my_strcasecmp(table_alias_charset, $3.str, + table->table_name.str))) + my_yyabort_error((ER_WRONG_TABLE_NAME, MYF(0), $3.str)); + $$=$5; + } + | ident '.' ident + { + TABLE_LIST *table= Select->table_list.first; + if (unlikely(my_strcasecmp(table_alias_charset, $1.str, + table->alias.str))) + my_yyabort_error((ER_WRONG_TABLE_NAME, MYF(0), $1.str)); + $$=$3; + } + | '.' ident { $$=$2;} /* For Delphi */ + ; + +table_ident: + ident + { + $$= new (thd->mem_root) Table_ident(&$1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ident '.' ident + { + $$= new (thd->mem_root) Table_ident(thd, &$1, &$3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | '.' ident + { + /* For Delphi */ + $$= new (thd->mem_root) Table_ident(&$2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +table_ident_opt_wild: + ident opt_wild + { + $$= new (thd->mem_root) Table_ident(&$1); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ident '.' ident opt_wild + { + $$= new (thd->mem_root) Table_ident(thd, &$1, &$3, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +table_ident_nodb: + ident + { + LEX_CSTRING db= any_db; + $$= new (thd->mem_root) Table_ident(thd, &db, &$1, 0); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +IDENT_cli: + IDENT + | IDENT_QUOTED + ; + +ident_cli: + IDENT + | IDENT_QUOTED + | keyword_ident { $$= $1; } + ; + +IDENT_sys: + IDENT_cli + { + if (unlikely(thd->to_ident_sys_alloc(&$$, &$1))) + MYSQL_YYABORT; + } + ; + +TEXT_STRING_sys: + TEXT_STRING + { + if (thd->make_text_string_sys(&$$, &$1)) + MYSQL_YYABORT; + } + ; + +TEXT_STRING_literal: + TEXT_STRING + { + if (thd->make_text_string_connection(&$$, &$1)) + MYSQL_YYABORT; + } + ; + +TEXT_STRING_filesystem: + TEXT_STRING + { + if (thd->make_text_string_filesystem(&$$, &$1)) + MYSQL_YYABORT; + } + ; + +ident_table_alias: + IDENT_sys + | keyword_table_alias + { + if (unlikely($$.copy_keyword(thd, &$1))) + MYSQL_YYABORT; + } + ; + +ident_cli_set_usual_case: + IDENT_cli { $$= $1; } + | keyword_set_usual_case { $$= $1; } + ; + +ident_sysvar_name: + IDENT_sys + | keyword_sysvar_name + { + if (unlikely($$.copy_keyword(thd, &$1))) + MYSQL_YYABORT; + } + | TEXT_STRING_sys + { + if (unlikely($$.copy_sys(thd, &$1))) + MYSQL_YYABORT; + } + ; + + +ident: + IDENT_sys + | keyword_ident + { + if (unlikely($$.copy_keyword(thd, &$1))) + MYSQL_YYABORT; + } + ; + +label_ident: + IDENT_sys + | keyword_label + { + if (unlikely($$.copy_keyword(thd, &$1))) + MYSQL_YYABORT; + } + ; + +ident_or_text: + ident { $$=$1;} + | TEXT_STRING_sys { $$=$1;} + | LEX_HOSTNAME { $$=$1;} + ; + +user_maybe_role: + ident_or_text + { + if (unlikely(!($$=(LEX_USER*) thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + $$->user = $1; + + if (unlikely(check_string_char_length(&$$->user, ER_USERNAME, + username_char_length, + system_charset_info, 0))) + MYSQL_YYABORT; + } + | ident_or_text '@' ident_or_text + { + if (unlikely(!($$=(LEX_USER*) thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + $$->user = $1; $$->host=$3; + + if (unlikely(check_string_char_length(&$$->user, ER_USERNAME, + username_char_length, + system_charset_info, 0)) || + unlikely(check_host_name(&$$->host))) + MYSQL_YYABORT; + if ($$->host.str[0]) + { + /* + Convert hostname part of username to lowercase. + It's OK to use in-place lowercase as long as + the character set is utf8. + */ + my_casedn_str(system_charset_info, (char*) $$->host.str); + } + else + { + /* + fix historical undocumented convention that empty host is the + same as '%' + */ + $$->host= host_not_specified; + } + } + | CURRENT_USER optional_braces + { + if (unlikely(!($$=(LEX_USER*)thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + $$->user= current_user; + $$->auth= new (thd->mem_root) USER_AUTH(); + } + ; + +user_or_role: user_maybe_role | current_role; + +user: user_maybe_role + { + if ($1->user.str != current_user.str && $1->host.str == 0) + $1->host= host_not_specified; + $$= $1; + } + ; + +/* Keywords which we allow as table aliases. */ +keyword_table_alias: + keyword_data_type + | keyword_cast_type + | keyword_set_special_case + | keyword_sp_block_section + | keyword_sp_head + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_sysvar_type + | keyword_verb_clause + | FUNCTION_SYM + | EXCEPTION_ORACLE_SYM + | IGNORED_SYM + ; + +/* Keyword that we allow for identifiers (except SP labels) */ +keyword_ident: + keyword_data_type + | keyword_cast_type + | keyword_set_special_case + | keyword_sp_block_section + | keyword_sp_head + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_sysvar_type + | keyword_verb_clause + | FUNCTION_SYM + | WINDOW_SYM + | EXCEPTION_ORACLE_SYM + | IGNORED_SYM + ; + +keyword_sysvar_name: + keyword_data_type + | keyword_cast_type + | keyword_set_special_case + | keyword_sp_block_section + | keyword_sp_head + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_verb_clause + | FUNCTION_SYM + | WINDOW_SYM + | EXCEPTION_ORACLE_SYM + | IGNORED_SYM + | OFFSET_SYM + ; + +keyword_set_usual_case: + keyword_data_type + | keyword_cast_type + | keyword_sp_block_section + | keyword_sp_head + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_sysvar_type + | keyword_verb_clause + | FUNCTION_SYM + | WINDOW_SYM + | EXCEPTION_ORACLE_SYM + | IGNORED_SYM + | OFFSET_SYM + ; + +non_reserved_keyword_udt: + keyword_sp_var_not_label + | keyword_sp_head + | keyword_verb_clause + | keyword_set_special_case + | keyword_sp_block_section + | keyword_sysvar_type + | keyword_sp_var_and_label + | OFFSET_SYM + ; + +/* + Keywords that we allow in Oracle-style direct assignments: + xxx := 10; + but do not allow in labels in the default sql_mode: + label: + stmt1; + stmt2; + TODO: check if some of them can migrate to keyword_sp_var_and_label. +*/ +keyword_sp_var_not_label: + ASCII_SYM + | BACKUP_SYM + | BINLOG_SYM + | BYTE_SYM + | CACHE_SYM + | CHECKSUM_SYM + | CHECKPOINT_SYM + | COLUMN_ADD_SYM + | COLUMN_CHECK_SYM + | COLUMN_CREATE_SYM + | COLUMN_DELETE_SYM + | COLUMN_GET_SYM + | COMMENT_SYM + | COMPRESSED_SYM + | DEALLOCATE_SYM + | EXAMINED_SYM + | EXCLUDE_SYM + | EXECUTE_SYM + | FLUSH_SYM + | FOLLOWING_SYM + | FORMAT_SYM + | GET_SYM + | HELP_SYM + | HOST_SYM + | INSTALL_SYM + | OPTION + | OPTIONS_SYM + | OTHERS_MARIADB_SYM + | OWNER_SYM + | PARSER_SYM + | PERIOD_SYM + | PORT_SYM + | PRECEDING_SYM + | PREPARE_SYM + | REMOVE_SYM + | RESET_SYM + | RESTORE_SYM + | SECURITY_SYM + | SERVER_SYM + | SOCKET_SYM + | SLAVE + | SLAVES + | SONAME_SYM + | START_SYM + | STOP_SYM + | STORED_SYM + | TIES_SYM + | UNICODE_SYM + | UNINSTALL_SYM + | UNBOUNDED_SYM + | WITHIN + | WRAPPER_SYM + | XA_SYM + | UPGRADE_SYM + ; + +/* + Keywords that can start optional clauses in SP or trigger declarations + Allowed as identifiers (e.g. table, column names), + but: + - not allowed as SP label names + - not allowed as variable names in Oracle-style assignments: + xxx := 10; + + If we allowed these variables in assignments, there would be conflicts + with SP characteristics, or verb clauses, or compound statements, e.g.: + CREATE PROCEDURE p1 LANGUAGE ... + would be either: + CREATE PROCEDURE p1 LANGUAGE SQL BEGIN END; + or + CREATE PROCEDURE p1 LANGUAGE:=10; + + Note, these variables can still be assigned using quoted identifiers: + `do`:= 10; + "do":= 10; (when ANSI_QUOTES) + or using a SET statement: + SET do= 10; + + Note, some of these keywords are reserved keywords in Oracle. + In case if heavy grammar conflicts are found in the future, + we'll possibly need to make them reserved for sql_mode=ORACLE. + + TODO: Allow these variables as SP lables when sql_mode=ORACLE. + TODO: Allow assigning of "SP characteristics" marked variables + inside compound blocks. + TODO: Allow "follows" and "precedes" as variables in compound blocks: + BEGIN + follows := 10; + END; + as they conflict only with non-block FOR EACH ROW statement: + CREATE TRIGGER .. FOR EACH ROW follows:= 10; + CREATE TRIGGER .. FOR EACH ROW FOLLOWS tr1 a:= 10; +*/ +keyword_sp_head: + CONTAINS_SYM /* SP characteristic */ + | LANGUAGE_SYM /* SP characteristic */ + | NO_SYM /* SP characteristic */ + | CHARSET /* SET CHARSET utf8; */ + | FOLLOWS_SYM /* Conflicts with assignment in FOR EACH */ + | PRECEDES_SYM /* Conflicts with assignment in FOR EACH */ + ; + +/* + Keywords that start a statement. + Generally allowed as identifiers (e.g. table, column names) + - not allowed as SP label names + - not allowed as variable names in Oracle-style assignments: + xxx:=10 +*/ +keyword_verb_clause: + CLOSE_SYM /* Verb clause. Reserved in Oracle */ + | COMMIT_SYM /* Verb clause. Reserved in Oracle */ + | DO_SYM /* Verb clause */ + | HANDLER_SYM /* Verb clause */ + | OPEN_SYM /* Verb clause. Reserved in Oracle */ + | REPAIR /* Verb clause */ + | ROLLBACK_SYM /* Verb clause. Reserved in Oracle */ + | SAVEPOINT_SYM /* Verb clause. Reserved in Oracle */ + | SHUTDOWN /* Verb clause */ + | TRUNCATE_SYM /* Verb clause. Reserved in Oracle */ + ; + +keyword_set_special_case: + NAMES_SYM + | ROLE_SYM + | PASSWORD_SYM + ; + +keyword_sysvar_type: + GLOBAL_SYM + | LOCAL_SYM + | SESSION_SYM + ; + + +/* + These keywords are generally allowed as identifiers, + but not allowed as non-delimited SP variable names in sql_mode=ORACLE. +*/ +keyword_data_type: + BIT_SYM + | BOOLEAN_SYM + | BOOL_SYM + | CLOB_MARIADB_SYM + | CLOB_ORACLE_SYM + | DATE_SYM %prec PREC_BELOW_CONTRACTION_TOKEN2 + | DATETIME + | ENUM + | FIXED_SYM + | JSON_SYM + | MEDIUM_SYM + | NATIONAL_SYM + | NCHAR_SYM + | NUMBER_MARIADB_SYM + | NUMBER_ORACLE_SYM + | NVARCHAR_SYM + | RAW_MARIADB_SYM + | RAW_ORACLE_SYM + | ROW_SYM + | SERIAL_SYM + | TEXT_SYM + | TIMESTAMP %prec PREC_BELOW_CONTRACTION_TOKEN2 + | TIME_SYM %prec PREC_BELOW_CONTRACTION_TOKEN2 + | VARCHAR2_MARIADB_SYM + | VARCHAR2_ORACLE_SYM + | YEAR_SYM + ; + + +keyword_cast_type: + SIGNED_SYM + ; + + +/* + These keywords are fine for both SP variable names and SP labels. +*/ +keyword_sp_var_and_label: + ACTION + | ACCOUNT_SYM + | ADDDATE_SYM + | ADD_MONTHS_SYM + | ADMIN_SYM + | AFTER_SYM + | AGAINST + | AGGREGATE_SYM + | ALGORITHM_SYM + | ALWAYS_SYM + | ANY_SYM + | AT_SYM + | ATOMIC_SYM + | AUTHORS_SYM + | AUTO_INC + | AUTOEXTEND_SIZE_SYM + | AUTO_SYM + | AVG_ROW_LENGTH + | AVG_SYM + | BLOCK_SYM + | BODY_MARIADB_SYM + | BTREE_SYM + | CASCADED + | CATALOG_NAME_SYM + | CHAIN_SYM + | CHANNEL_SYM + | CHANGED + | CIPHER_SYM + | CLIENT_SYM + | CLASS_ORIGIN_SYM + | COALESCE + | CODE_SYM + | COLLATION_SYM + | COLUMN_NAME_SYM + | COLUMNS + | COMMITTED_SYM + | COMPACT_SYM + | COMPLETION_SYM + | CONCURRENT + | CONNECTION_SYM + | CONSISTENT_SYM + | CONSTRAINT_CATALOG_SYM + | CONSTRAINT_SCHEMA_SYM + | CONSTRAINT_NAME_SYM + | CONTEXT_SYM + | CONTRIBUTORS_SYM + | CURRENT_POS_SYM + | CPU_SYM + | CUBE_SYM + /* + Although a reserved keyword in SQL:2003 (and :2008), + not reserved in MySQL per WL#2111 specification. + */ + | CURRENT_SYM + | CURSOR_NAME_SYM + | CYCLE_SYM + | DATA_SYM + | DATAFILE_SYM + | DATE_FORMAT_SYM + | DAY_SYM + | DEFINER_SYM + | DELAY_KEY_WRITE_SYM + | DES_KEY_FILE + | DIAGNOSTICS_SYM + | DIRECTORY_SYM + | DISABLE_SYM + | DISCARD + | DISK_SYM + | DUMPFILE + | DUPLICATE_SYM + | DYNAMIC_SYM + | ELSEIF_ORACLE_SYM + | ELSIF_MARIADB_SYM + | EMPTY_SYM + | ENDS_SYM + | ENGINE_SYM + | ENGINES_SYM + | ERROR_SYM + | ERRORS + | ESCAPE_SYM + | EVENT_SYM + | EVENTS_SYM + | EVERY_SYM + | EXCEPTION_MARIADB_SYM + | EXCHANGE_SYM + | EXPANSION_SYM + | EXPIRE_SYM + | EXPORT_SYM + | EXTENDED_SYM + | EXTENT_SIZE_SYM + | FAULTS_SYM + | FAST_SYM + | FOUND_SYM + | ENABLE_SYM + | FEDERATED_SYM + | FULL + | FILE_SYM + | FIRST_SYM + | GENERAL + | GENERATED_SYM + | GET_FORMAT + | GRANTS + | GOTO_MARIADB_SYM + | HASH_SYM + | HARD_SYM + | HISTORY_SYM + | HOSTS_SYM + | HOUR_SYM + | ID_SYM + | IDENTIFIED_SYM + | IGNORE_SERVER_IDS_SYM + | INCREMENT_SYM + | IMMEDIATE_SYM + | INVOKER_SYM + | IMPORT + | INDEXES + | INITIAL_SIZE_SYM + | IO_SYM + | IPC_SYM + | ISOLATION + | ISOPEN_SYM + | ISSUER_SYM + | INSERT_METHOD + | INVISIBLE_SYM + | JSON_TABLE_SYM + | KEY_BLOCK_SIZE + | LAST_VALUE + | LAST_SYM + | LASTVAL_SYM + | LEAVES + | LESS_SYM + | LEVEL_SYM + | LIST_SYM + | LOCKED_SYM + | LOCKS_SYM + | LOGFILE_SYM + | LOGS_SYM + | MAX_ROWS + | MASTER_SYM + | MASTER_HEARTBEAT_PERIOD_SYM + | MASTER_GTID_POS_SYM + | MASTER_HOST_SYM + | MASTER_PORT_SYM + | MASTER_LOG_FILE_SYM + | MASTER_LOG_POS_SYM + | MASTER_USER_SYM + | MASTER_USE_GTID_SYM + | MASTER_PASSWORD_SYM + | MASTER_SERVER_ID_SYM + | MASTER_CONNECT_RETRY_SYM + | MASTER_DELAY_SYM + | MASTER_SSL_SYM + | MASTER_SSL_CA_SYM + | MASTER_SSL_CAPATH_SYM + | MASTER_SSL_CERT_SYM + | MASTER_SSL_CIPHER_SYM + | MASTER_SSL_CRL_SYM + | MASTER_SSL_CRLPATH_SYM + | MASTER_SSL_KEY_SYM + | MAX_CONNECTIONS_PER_HOUR + | MAX_QUERIES_PER_HOUR + | MAX_SIZE_SYM + | MAX_STATEMENT_TIME_SYM + | MAX_UPDATES_PER_HOUR + | MAX_USER_CONNECTIONS_SYM + | MEMORY_SYM + | MERGE_SYM + | MESSAGE_TEXT_SYM + | MICROSECOND_SYM + | MIGRATE_SYM + | MINUTE_SYM +%ifdef MARIADB + | MINUS_ORACLE_SYM +%endif + | MINVALUE_SYM + | MIN_ROWS + | MODIFY_SYM + | MODE_SYM + | MONITOR_SYM + | MONTH_SYM + | MUTEX_SYM + | MYSQL_SYM + | MYSQL_ERRNO_SYM + | NAME_SYM + | NESTED_SYM + | NEVER_SYM + | NEXT_SYM %prec PREC_BELOW_CONTRACTION_TOKEN2 + | NEXTVAL_SYM + | NEW_SYM + | NOCACHE_SYM + | NOCYCLE_SYM + | NOMINVALUE_SYM + | NOMAXVALUE_SYM + | NO_WAIT_SYM + | NOWAIT_SYM + | NODEGROUP_SYM + | NONE_SYM + | NOTFOUND_SYM + | OF_SYM + | OLD_PASSWORD_SYM + | ONE_SYM + | ONLINE_SYM + | ONLY_SYM + | ORDINALITY_SYM + | OVERLAPS_SYM + | PACKAGE_MARIADB_SYM + | PACK_KEYS_SYM + | PAGE_SYM + | PARTIAL + | PARTITIONING_SYM + | PARTITIONS_SYM + | PATH_SYM + | PERSISTENT_SYM + | PHASE_SYM + | PLUGIN_SYM + | PLUGINS_SYM + | PRESERVE_SYM + | PREV_SYM + | PREVIOUS_SYM %prec PREC_BELOW_CONTRACTION_TOKEN2 + | PRIVILEGES + | PROCESS + | PROCESSLIST_SYM + | PROFILE_SYM + | PROFILES_SYM + | PROXY_SYM + | QUARTER_SYM + | QUERY_SYM + | QUICK + | RAISE_MARIADB_SYM + | READ_ONLY_SYM + | REBUILD_SYM + | RECOVER_SYM + | REDO_BUFFER_SIZE_SYM + | REDOFILE_SYM + | REDUNDANT_SYM + | RELAY + | RELAYLOG_SYM + | RELAY_LOG_FILE_SYM + | RELAY_LOG_POS_SYM + | RELAY_THREAD + | RELOAD + | REORGANIZE_SYM + | REPEATABLE_SYM + | REPLAY_SYM + | REPLICATION + | RESOURCES + | RESTART_SYM + | RESUME_SYM + | RETURNED_SQLSTATE_SYM + | RETURNS_SYM + | REUSE_SYM + | REVERSE_SYM + | ROLLUP_SYM + | ROUTINE_SYM + | ROWCOUNT_SYM + | ROWTYPE_MARIADB_SYM + | ROW_COUNT_SYM + | ROW_FORMAT_SYM +%ifdef MARIADB + | ROWNUM_SYM +%endif + | RTREE_SYM + | SCHEDULE_SYM + | SCHEMA_NAME_SYM + | SECOND_SYM + | SEQUENCE_SYM + | SERIALIZABLE_SYM + | SETVAL_SYM + | SIMPLE_SYM + | SHARE_SYM + | SKIP_SYM + | SLAVE_POS_SYM + | SLOW + | SNAPSHOT_SYM + | SOFT_SYM + | SOUNDS_SYM + | SOURCE_SYM + | SQL_CACHE_SYM + | SQL_BUFFER_RESULT + | SQL_NO_CACHE_SYM + | SQL_THREAD + | STAGE_SYM + | STARTS_SYM + | STATEMENT_SYM + | STATUS_SYM + | STORAGE_SYM + | STRING_SYM + | SUBCLASS_ORIGIN_SYM + | SUBDATE_SYM + | SUBJECT_SYM + | SUBPARTITION_SYM + | SUBPARTITIONS_SYM + | SUPER_SYM + | SUSPEND_SYM + | SWAPS_SYM + | SWITCHES_SYM +%ifdef MARIADB + | SYSDATE +%endif + | SYSTEM + | SYSTEM_TIME_SYM + | TABLE_NAME_SYM + | TABLES + | TABLE_CHECKSUM_SYM + | TABLESPACE + | TEMPORARY + | TEMPTABLE_SYM + | THAN_SYM + | TRANSACTION_SYM %prec PREC_BELOW_CONTRACTION_TOKEN2 + | TRANSACTIONAL_SYM + | THREADS_SYM + | TRIGGERS_SYM + | TRIM_ORACLE + | TIMESTAMP_ADD + | TIMESTAMP_DIFF + | TYPES_SYM + | TYPE_SYM + | UDF_RETURNS_SYM + | UNCOMMITTED_SYM + | UNDEFINED_SYM + | UNDO_BUFFER_SIZE_SYM + | UNDOFILE_SYM + | UNKNOWN_SYM + | UNTIL_SYM + | USER_SYM %prec PREC_BELOW_CONTRACTION_TOKEN2 + | USE_FRM + | VARIABLES + | VERSIONING_SYM + | VIEW_SYM + | VIRTUAL_SYM + | VISIBLE_SYM + | VALUE_SYM + | WARNINGS + | WAIT_SYM + | WEEK_SYM + | WEIGHT_STRING_SYM + | WITHOUT + | WORK_SYM + | X509_SYM + | XML_SYM + | VIA_SYM + ; + + +reserved_keyword_udt_not_param_type: + ACCESSIBLE_SYM + | ADD + | ALL + | ALTER + | ANALYZE_SYM + | AND_SYM + | AS + | ASC + | ASENSITIVE_SYM + | BEFORE_SYM + | BETWEEN_SYM + | BIT_AND + | BIT_OR + | BIT_XOR + | BODY_ORACLE_SYM + | BOTH + | BY + | CALL_SYM + | CASCADE + | CASE_SYM + | CAST_SYM + | CHANGE + | CHECK_SYM + | COLLATE_SYM + | CONSTRAINT + | CONTINUE_MARIADB_SYM + | CONTINUE_ORACLE_SYM + | CONVERT_SYM + | COUNT_SYM + | CREATE + | CROSS + | CUME_DIST_SYM + | CURDATE + | CURRENT_USER + | CURRENT_ROLE + | CURTIME + | DATABASE + | DATABASES + | DATE_ADD_INTERVAL + | DATE_SUB_INTERVAL + | DAY_HOUR_SYM + | DAY_MICROSECOND_SYM + | DAY_MINUTE_SYM + | DAY_SECOND_SYM + | DECLARE_MARIADB_SYM + | DECLARE_ORACLE_SYM + | DEFAULT + | DELETE_DOMAIN_ID_SYM + | DELETE_SYM + | DENSE_RANK_SYM + | DESC + | DESCRIBE + | DETERMINISTIC_SYM + | DISTINCT + | DIV_SYM + | DO_DOMAIN_IDS_SYM + | DROP + | DUAL_SYM + | EACH_SYM + | ELSE + | ELSEIF_MARIADB_SYM + | ELSIF_ORACLE_SYM + | ENCLOSED + | ESCAPED + | EXCEPT_SYM + | EXISTS + | EXTRACT_SYM + | FALSE_SYM + | FETCH_SYM + | FIRST_VALUE_SYM + | FOREIGN + | FROM + | FULLTEXT_SYM + | GOTO_ORACLE_SYM + | GRANT + | GROUP_SYM + | GROUP_CONCAT_SYM + | LAG_SYM + | LEAD_SYM + | HAVING + | HOUR_MICROSECOND_SYM + | HOUR_MINUTE_SYM + | HOUR_SECOND_SYM + | IF_SYM + | IGNORE_DOMAIN_IDS_SYM + | IGNORE_SYM + | IGNORED_SYM + | INDEX_SYM + | INFILE + | INNER_SYM + | INSENSITIVE_SYM + | INSERT + | INTERSECT_SYM + | INTERVAL_SYM + | INTO + | IS + | ITERATE_SYM + | JOIN_SYM + | KEYS + | KEY_SYM + | KILL_SYM + | LEADING + | LEAVE_SYM + | LEFT + | LIKE + | LIMIT + | LINEAR_SYM + | LINES + | LOAD + | LOCATOR_SYM + | LOCK_SYM + | LOOP_SYM + | LOW_PRIORITY + | MASTER_SSL_VERIFY_SERVER_CERT_SYM + | MATCH + | MAX_SYM + | MAXVALUE_SYM + | MEDIAN_SYM + | MINUTE_MICROSECOND_SYM + | MINUTE_SECOND_SYM + | MIN_SYM +%ifdef ORACLE + | MINUS_ORACLE_SYM +%endif + | MODIFIES_SYM + | MOD_SYM + | NATURAL + | NEG + | NOT_SYM + | NOW_SYM + | NO_WRITE_TO_BINLOG + | NTILE_SYM + | NULL_SYM + | NTH_VALUE_SYM + | ON + | OPTIMIZE + | OPTIONALLY + | ORDER_SYM + | OR_SYM + | OTHERS_ORACLE_SYM + | OUTER + | OUTFILE + | OVER_SYM + | PACKAGE_ORACLE_SYM + | PAGE_CHECKSUM_SYM + | PARSE_VCOL_EXPR_SYM + | PARTITION_SYM + | PERCENT_RANK_SYM + | PERCENTILE_CONT_SYM + | PERCENTILE_DISC_SYM + | PORTION_SYM + | POSITION_SYM + | PRECISION + | PRIMARY_SYM + | PROCEDURE_SYM + | PURGE + | RAISE_ORACLE_SYM + | RANGE_SYM + | RANK_SYM + | READS_SYM + | READ_SYM + | READ_WRITE_SYM + | RECURSIVE_SYM + | REF_SYSTEM_ID_SYM + | REFERENCES + | REGEXP + | RELEASE_SYM + | RENAME + | REPEAT_SYM + | REPLACE + | REQUIRE_SYM + | RESIGNAL_SYM + | RESTRICT + | RETURNING_SYM + | RETURN_MARIADB_SYM + | RETURN_ORACLE_SYM + | REVOKE + | RIGHT + | ROWS_SYM + | ROWTYPE_ORACLE_SYM + | ROW_NUMBER_SYM + | SECOND_MICROSECOND_SYM + | SELECT_SYM + | SENSITIVE_SYM + | SEPARATOR_SYM + | SERVER_OPTIONS + | SHOW + | SIGNAL_SYM + | SPATIAL_SYM + | SPECIFIC_SYM + | SQLEXCEPTION_SYM + | SQLSTATE_SYM + | SQLWARNING_SYM + | SQL_BIG_RESULT + | SQL_SMALL_RESULT + | SQL_SYM + | SSL_SYM + | STARTING + | STATS_AUTO_RECALC_SYM + | STATS_PERSISTENT_SYM + | STATS_SAMPLE_PAGES_SYM + | STDDEV_SAMP_SYM + | STD_SYM + | STRAIGHT_JOIN + | SUBSTRING + | SUM_SYM + | TABLE_REF_PRIORITY + | TABLE_SYM + | TERMINATED + | THEN_SYM + | TO_SYM + | TRAILING + | TRIGGER_SYM + | TRIM + | TRUE_SYM + | UNDO_SYM + | UNION_SYM + | UNIQUE_SYM + | UNLOCK_SYM + | UPDATE_SYM + | USAGE + | USE_SYM + | USING + | UTC_DATE_SYM + | UTC_TIMESTAMP_SYM + | UTC_TIME_SYM + | VALUES + | VALUES_IN_SYM + | VALUES_LESS_SYM + | VARIANCE_SYM + | VARYING + | VAR_SAMP_SYM + | WHEN_SYM + | WHERE + | WHILE_SYM + | WITH + | XOR + | YEAR_MONTH_SYM + | ZEROFILL + ; + +/* + SQLCOM_SET_OPTION statement. + + Note that to avoid shift/reduce conflicts, we have separate rules for the + first option listed in the statement. +*/ + +set: + SET + { + LEX *lex=Lex; + lex->set_stmt_init(); + } + set_param + { + if (Lex->check_main_unit_semantics()) + MYSQL_YYABORT; + } + ; + +set_param: + option_value_no_option_type + | option_value_no_option_type ',' option_value_list + | TRANSACTION_SYM + { + Lex->option_type= OPT_DEFAULT; + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + transaction_characteristics + { + if (unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | option_type + { + Lex->option_type= $1; + } + start_option_value_list_following_option_type + | STATEMENT_SYM + set_stmt_option_list + { + LEX *lex= Lex; + if (unlikely(lex->table_or_sp_used())) + my_yyabort_error((ER_SUBQUERIES_NOT_SUPPORTED, MYF(0), "SET STATEMENT")); + lex->stmt_var_list= lex->var_list; + lex->var_list.empty(); + if (Lex->check_main_unit_semantics()) + MYSQL_YYABORT; + } + FOR_SYM directly_executable_statement + ; + +set_stmt_option_list: + /* + Only system variables can be used here. If this condition is changed + please check careful code under lex->option_type == OPT_STATEMENT + condition on wrong type casts. + */ + set_stmt_option + | set_stmt_option_list ',' set_stmt_option + ; + +/* Start of option value list, option_type was given */ +start_option_value_list_following_option_type: + option_value_following_option_type + | option_value_following_option_type ',' option_value_list + | TRANSACTION_SYM + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + transaction_characteristics + { + if (unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + ; + +/* Repeating list of option values after first option value. */ +option_value_list: + option_value + | option_value_list ',' option_value + ; + +/* Wrapper around option values following the first option value in the stmt. */ +option_value: + option_type + { + Lex->option_type= $1; + } + option_value_following_option_type + | option_value_no_option_type + ; + +option_type: + GLOBAL_SYM { $$=OPT_GLOBAL; } + | LOCAL_SYM { $$=OPT_SESSION; } + | SESSION_SYM { $$=OPT_SESSION; } + ; + +opt_var_type: + /* empty */ { $$=OPT_SESSION; } + | GLOBAL_SYM { $$=OPT_GLOBAL; } + | LOCAL_SYM { $$=OPT_SESSION; } + | SESSION_SYM { $$=OPT_SESSION; } + ; + +opt_var_ident_type: + /* empty */ { $$=OPT_DEFAULT; } + | GLOBAL_SYM '.' { $$=OPT_GLOBAL; } + | LOCAL_SYM '.' { $$=OPT_SESSION; } + | SESSION_SYM '.' { $$=OPT_SESSION; } + ; + +/* + SET STATEMENT options do not need their own LEX or Query_arena. + Let's put them to the main ones. +*/ +set_stmt_option: + ident_cli equal + { + if (Lex->main_select_push(false)) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_system_variable(Lex->option_type, &tmp, $4))) + MYSQL_YYABORT; + Lex->pop_select(); //min select + } + | ident_cli '.' ident equal + { + if (Lex->main_select_push(false)) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_system_variable(thd, Lex->option_type, + &tmp, &$3, $6))) + MYSQL_YYABORT; + Lex->pop_select(); //min select + } + | DEFAULT '.' ident equal + { + if (Lex->main_select_push(false)) + MYSQL_YYABORT; + } + set_expr_or_default + { + if (unlikely(Lex->set_default_system_variable(Lex->option_type, + &$3, $6))) + MYSQL_YYABORT; + Lex->pop_select(); //min select + } + ; + + +/* Option values with preceding option_type. */ +option_value_following_option_type: + ident_cli equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_system_variable(Lex->option_type, &tmp, $4)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | ident_cli '.' ident equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_system_variable(thd, Lex->option_type, &tmp, &$3, $6)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | DEFAULT '.' ident equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + if (unlikely(Lex->set_default_system_variable(Lex->option_type, &$3, $6)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + ; + +/* Option values without preceding option_type. */ +option_value_no_option_type: + ident_cli_set_usual_case equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_variable(&tmp, $4)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | ident_cli_set_usual_case '.' ident equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_variable(&tmp, &$3, $6)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | DEFAULT '.' ident equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + if (unlikely(Lex->set_default_system_variable(Lex->option_type, &$3, $6))) + MYSQL_YYABORT; + if (unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | '@' ident_or_text equal + { + if (!$2.length) + { + thd->parse_error(); + MYSQL_YYABORT; + } + + if (sp_create_assignment_lex(thd, $1.str)) + MYSQL_YYABORT; + } + expr + { + if (unlikely(Lex->set_user_variable(thd, &$2, $5)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | '@' '@' opt_var_ident_type ident_sysvar_name equal + { + if (sp_create_assignment_lex(thd, $1.str)) + MYSQL_YYABORT; + } + set_expr_or_default + { + if (unlikely(Lex->set_system_variable($3, &$4, $7)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | '@' '@' opt_var_ident_type ident_sysvar_name '.' ident equal + { + if (sp_create_assignment_lex(thd, $1.str)) + MYSQL_YYABORT; + } + set_expr_or_default + { + if (unlikely(Lex->set_system_variable(thd, $3, &$4, &$6, $9)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | '@' '@' opt_var_ident_type DEFAULT '.' ident equal + { + if (sp_create_assignment_lex(thd, $1.str)) + MYSQL_YYABORT; + } + set_expr_or_default + { + if (unlikely(Lex->set_default_system_variable($3, &$6, $9)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | charset old_or_new_charset_name_or_default + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + LEX *lex= thd->lex; + CHARSET_INFO *cs2; + cs2= $2 ? $2: global_system_variables.character_set_client; + set_var_collation_client *var; + var= (new (thd->mem_root) + set_var_collation_client(cs2, + thd->variables.collation_database, + cs2)); + if (unlikely(var == NULL)) + MYSQL_YYABORT; + lex->var_list.push_back(var, thd->mem_root); + if (unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | NAMES_SYM equal expr + { + LEX *lex= Lex; + sp_pcontext *spc= lex->spcont; + LEX_CSTRING names= { STRING_WITH_LEN("names") }; + if (unlikely(spc && spc->find_variable(&names, false))) + my_error(ER_SP_BAD_VAR_SHADOW, MYF(0), names.str); + else + thd->parse_error(); + MYSQL_YYABORT; + } + | NAMES_SYM charset_name_or_default + { + CHARSET_INFO *def= global_system_variables.character_set_client; + Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); + if (Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) + MYSQL_YYABORT; + } + | NAMES_SYM charset_name_or_default + COLLATE_SYM collation_name_or_default + { + CHARSET_INFO *def= global_system_variables.character_set_client; + Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); + if (tmp.merge_collation($4) || + Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) + MYSQL_YYABORT; + } + | DEFAULT ROLE_SYM grant_role + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + LEX *lex = Lex; + LEX_USER *user; + if (unlikely(!(user=(LEX_USER *) thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + user->user= current_user; + set_var_default_role *var= (new (thd->mem_root) + set_var_default_role(user, + $3->user)); + if (unlikely(var == NULL) || + unlikely(lex->var_list.push_back(var, thd->mem_root))) + MYSQL_YYABORT; + + thd->lex->autocommit= TRUE; + if (lex->sphead) + lex->sphead->m_flags|= sp_head::HAS_SET_AUTOCOMMIT_STMT; + if (unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | DEFAULT ROLE_SYM grant_role FOR_SYM user + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + LEX *lex = Lex; + set_var_default_role *var= (new (thd->mem_root) + set_var_default_role($5, $3->user)); + if (unlikely(var == NULL) || + unlikely(lex->var_list.push_back(var, thd->mem_root))) + MYSQL_YYABORT; + thd->lex->autocommit= TRUE; + if (lex->sphead) + lex->sphead->m_flags|= sp_head::HAS_SET_AUTOCOMMIT_STMT; + if (unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | ROLE_SYM role_name + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + LEX *lex = Lex; + set_var_role *var= new (thd->mem_root) set_var_role($2->user); + if (unlikely(var == NULL) || + unlikely(lex->var_list.push_back(var, thd->mem_root)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | ROLE_SYM equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_variable(&tmp, $4)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | PASSWORD_SYM equal + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + text_or_password + { + if (unlikely(Lex->sp_create_set_password_instr(thd, $4, + yychar == YYEMPTY))) + MYSQL_YYABORT; + } + | PASSWORD_SYM FOR_SYM + { + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + user equal text_or_password + { + if (unlikely(Lex->sp_create_set_password_instr(thd, $4, $6, + yychar == YYEMPTY))) + MYSQL_YYABORT; + } + ; + +transaction_characteristics: + transaction_access_mode + | isolation_level + | transaction_access_mode ',' isolation_level + | isolation_level ',' transaction_access_mode + ; + +transaction_access_mode: + transaction_access_mode_types + { + LEX *lex=Lex; + Item *item= new (thd->mem_root) Item_int(thd, (int32) $1); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + set_var *var= (new (thd->mem_root) + set_var(thd, lex->option_type, + find_sys_var(thd, "tx_read_only"), + &null_clex_str, + item)); + if (unlikely(var == NULL)) + MYSQL_YYABORT; + if (unlikely(lex->var_list.push_back(var, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +isolation_level: + ISOLATION LEVEL_SYM isolation_types + { + LEX *lex=Lex; + Item *item= new (thd->mem_root) Item_int(thd, (int32) $3); + if (unlikely(item == NULL)) + MYSQL_YYABORT; + set_var *var= (new (thd->mem_root) + set_var(thd, lex->option_type, + find_sys_var(thd, "tx_isolation"), + &null_clex_str, + item)); + if (unlikely(var == NULL) || + unlikely(lex->var_list.push_back(var, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +transaction_access_mode_types: + READ_SYM ONLY_SYM { $$= true; } + | READ_SYM WRITE_SYM { $$= false; } + ; + +isolation_types: + READ_SYM UNCOMMITTED_SYM { $$= ISO_READ_UNCOMMITTED; } + | READ_SYM COMMITTED_SYM { $$= ISO_READ_COMMITTED; } + | REPEATABLE_SYM READ_SYM { $$= ISO_REPEATABLE_READ; } + | SERIALIZABLE_SYM { $$= ISO_SERIALIZABLE; } + ; + + +text_or_password: + TEXT_STRING + { + $$= new (thd->mem_root) USER_AUTH(); + $$->auth_str= $1; + } + | PASSWORD_SYM '(' TEXT_STRING ')' + { + $$= new (thd->mem_root) USER_AUTH(); + $$->pwtext= $3; + } + | OLD_PASSWORD_SYM '(' TEXT_STRING ')' + { + $$= new (thd->mem_root) USER_AUTH(); + $$->pwtext= $3; + $$->auth_str.str= Item_func_password::alloc(thd, + $3.str, $3.length, Item_func_password::OLD); + $$->auth_str.length= SCRAMBLED_PASSWORD_CHAR_LENGTH_323; + } + ; + +set_expr_or_default: + expr { $$=$1; } + | DEFAULT { $$=0; } + | ON + { + $$=new (thd->mem_root) Item_string_sys(thd, "ON", 2); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | ALL + { + $$=new (thd->mem_root) Item_string_sys(thd, "ALL", 3); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + | BINARY + { + $$=new (thd->mem_root) Item_string_sys(thd, "binary", 6); + if (unlikely($$ == NULL)) + MYSQL_YYABORT; + } + ; + +/* Lock function */ + +lock: + LOCK_SYM table_or_tables + { + LEX *lex= Lex; + + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "LOCK")); + lex->sql_command= SQLCOM_LOCK_TABLES; + } + table_lock_list opt_lock_wait_timeout + {} + ; + +opt_lock_wait_timeout: + /* empty */ + {} + | WAIT_SYM ulong_num + { + if (unlikely(set_statement_var_if_exists(thd, STRING_WITH_LEN("lock_wait_timeout"), $2)) || + unlikely(set_statement_var_if_exists(thd, STRING_WITH_LEN("innodb_lock_wait_timeout"), $2))) + MYSQL_YYABORT; + } + | NOWAIT_SYM + { + if (unlikely(set_statement_var_if_exists(thd, STRING_WITH_LEN("lock_wait_timeout"), 0)) || + unlikely(set_statement_var_if_exists(thd, STRING_WITH_LEN("innodb_lock_wait_timeout"), 0))) + MYSQL_YYABORT; + } + ; + +table_or_tables: + TABLE_SYM { } + | TABLES { } + ; + +table_lock_list: + table_lock + | table_lock_list ',' table_lock + ; + +table_lock: + table_ident opt_table_alias_clause lock_option + { + thr_lock_type lock_type= (thr_lock_type) $3; + bool lock_for_write= (lock_type >= TL_FIRST_WRITE); + ulong table_options= lock_for_write ? TL_OPTION_UPDATING : 0; + enum_mdl_type mdl_type= !lock_for_write + ? MDL_SHARED_READ + : lock_type == TL_WRITE_CONCURRENT_INSERT + ? MDL_SHARED_WRITE + : MDL_SHARED_NO_READ_WRITE; + + if (unlikely(!Lex->current_select_or_default()-> + add_table_to_list(thd, $1, $2, table_options, + lock_type, mdl_type))) + MYSQL_YYABORT; + } + ; + +lock_option: + READ_SYM { $$= TL_READ_NO_INSERT; } + | WRITE_SYM { $$= TL_WRITE_DEFAULT; } + | WRITE_SYM CONCURRENT + { + $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT); + } + + | LOW_PRIORITY WRITE_SYM { $$= TL_WRITE_LOW_PRIORITY; } + | READ_SYM LOCAL_SYM { $$= TL_READ; } + ; + +unlock: + UNLOCK_SYM + { + LEX *lex= Lex; + + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "UNLOCK")); + lex->sql_command= SQLCOM_UNLOCK_TABLES; + } + table_or_tables + {} + ; + +/* +** Handler: direct access to ISAM functions +*/ + +handler: + HANDLER_SYM + { + if (Lex->main_select_push()) + MYSQL_YYABORT; + } + handler_tail + { + Lex->pop_select(); //main select + } + ; + +handler_tail: + table_ident OPEN_SYM opt_table_alias_clause + { + LEX *lex= Lex; + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "HANDLER")); + lex->sql_command = SQLCOM_HA_OPEN; + if (!lex->current_select->add_table_to_list(thd, $1, $3, 0)) + MYSQL_YYABORT; + } + | table_ident_nodb CLOSE_SYM + { + LEX *lex= Lex; + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "HANDLER")); + lex->sql_command = SQLCOM_HA_CLOSE; + if (!lex->current_select->add_table_to_list(thd, $1, 0, 0)) + MYSQL_YYABORT; + } + | table_ident_nodb READ_SYM + { + LEX *lex=Lex; + SELECT_LEX *select= Select; + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_BADSTATEMENT, MYF(0), "HANDLER")); + lex->clause_that_disallows_subselect= "HANDLER..READ"; + lex->sql_command = SQLCOM_HA_READ; + lex->ha_rkey_mode= HA_READ_KEY_EXACT; /* Avoid purify warnings */ + Item *one= new (thd->mem_root) Item_int(thd, (int32) 1); + if (unlikely(one == NULL)) + MYSQL_YYABORT; + select->limit_params.select_limit= one; + select->limit_params.offset_limit= 0; + lex->limit_rows_examined= 0; + if (!lex->current_select->add_table_to_list(thd, $1, 0, 0)) + MYSQL_YYABORT; + } + handler_read_or_scan opt_where_clause opt_global_limit_clause + { + LEX *lex=Lex; + SELECT_LEX *select= Select; + lex->clause_that_disallows_subselect= NULL; + if (!lex->current_select->limit_params.explicit_limit) + { + Item *one= new (thd->mem_root) Item_int(thd, (int32) 1); + if (one == NULL) + MYSQL_YYABORT; + select->limit_params.select_limit= one; + select->limit_params.offset_limit= 0; + lex->limit_rows_examined= 0; + } + /* Stored functions are not supported for HANDLER READ. */ + if (lex->uses_stored_routines()) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "stored functions in HANDLER ... READ"); + MYSQL_YYABORT; + } + } + ; + +handler_read_or_scan: + handler_scan_function { Lex->ident= null_clex_str; } + | ident handler_rkey_function { Lex->ident= $1; } + ; + +handler_scan_function: + FIRST_SYM { Lex->ha_read_mode = RFIRST; } + | NEXT_SYM { Lex->ha_read_mode = RNEXT; } + ; + +handler_rkey_function: + FIRST_SYM { Lex->ha_read_mode = RFIRST; } + | NEXT_SYM { Lex->ha_read_mode = RNEXT; } + | PREV_SYM { Lex->ha_read_mode = RPREV; } + | LAST_SYM { Lex->ha_read_mode = RLAST; } + | handler_rkey_mode + { + LEX *lex=Lex; + lex->ha_read_mode = RKEY; + lex->ha_rkey_mode=$1; + if (unlikely(!(lex->insert_list= new (thd->mem_root) List_item))) + MYSQL_YYABORT; + } + '(' values ')' + {} + ; + +handler_rkey_mode: + '=' { $$=HA_READ_KEY_EXACT; } + | GE { $$=HA_READ_KEY_OR_NEXT; } + | LE { $$=HA_READ_KEY_OR_PREV; } + | '>' { $$=HA_READ_AFTER_KEY; } + | '<' { $$=HA_READ_BEFORE_KEY; } + ; + +/* GRANT / REVOKE */ + +revoke: + REVOKE clear_privileges revoke_command + {} + ; + +revoke_command: + grant_privileges ON opt_table grant_ident FROM user_and_role_list + { + if (Lex->stmt_revoke_table(thd, $1, *$4)) + MYSQL_YYABORT; + } + | grant_privileges ON sp_handler grant_ident FROM user_and_role_list + { + if (Lex->stmt_revoke_sp(thd, $1, *$4, *$3)) + MYSQL_YYABORT; + } + | ALL opt_privileges ',' GRANT OPTION FROM user_and_role_list + { + Lex->sql_command = SQLCOM_REVOKE_ALL; + } + | PROXY_SYM ON user FROM user_list + { + if (Lex->stmt_revoke_proxy(thd, $3)) + MYSQL_YYABORT; + } + | admin_option_for_role FROM user_and_role_list + { + Lex->sql_command= SQLCOM_REVOKE_ROLE; + if (unlikely(Lex->users_list.push_front($1, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +admin_option_for_role: + ADMIN_SYM OPTION FOR_SYM grant_role + { Lex->with_admin_option= true; $$= $4; } + | grant_role + { Lex->with_admin_option= false; $$= $1; } + ; + +grant: + GRANT clear_privileges grant_command + {} + ; + +grant_command: + grant_privileges ON opt_table grant_ident TO_SYM grant_list + opt_require_clause opt_grant_options + { + if (Lex->stmt_grant_table(thd, $1, *$4, $8)) + MYSQL_YYABORT; + } + | grant_privileges ON sp_handler grant_ident TO_SYM grant_list + opt_require_clause opt_grant_options + { + if (Lex->stmt_grant_sp(thd, $1, *$4, *$3, $8)) + MYSQL_YYABORT; + } + | PROXY_SYM ON user TO_SYM grant_list opt_grant_option + { + if (Lex->stmt_grant_proxy(thd, $3, $6)) + MYSQL_YYABORT; + } + | grant_role TO_SYM grant_list opt_with_admin_option + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_GRANT_ROLE; + /* The first role is the one that is granted */ + if (unlikely(Lex->users_list.push_front($1, thd->mem_root))) + MYSQL_YYABORT; + } + + ; + +opt_with_admin: + /* nothing */ { Lex->definer = 0; } + | WITH ADMIN_SYM user_or_role { Lex->definer = $3; } + ; + +opt_with_admin_option: + /* nothing */ { Lex->with_admin_option= false; } + | WITH ADMIN_SYM OPTION { Lex->with_admin_option= true; } + ; + +role_list: + grant_role + { + if (unlikely(Lex->users_list.push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | role_list ',' grant_role + { + if (unlikely(Lex->users_list.push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +current_role: + CURRENT_ROLE optional_braces + { + if (unlikely(!($$=(LEX_USER*) thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + $$->user= current_role; + } + ; + + +role_name: ident_or_text + { + CHARSET_INFO *cs= system_charset_info; + /* trim end spaces (as they'll be lost in mysql.user anyway) */ + $1.length= cs->lengthsp($1.str, $1.length); + ((char*) $1.str)[$1.length] = '\0'; + if (unlikely($1.length == 0)) + my_yyabort_error((ER_INVALID_ROLE, MYF(0), "")); + if (unlikely(!($$=(LEX_USER*) thd->calloc(sizeof(LEX_USER))))) + MYSQL_YYABORT; + if (lex_string_eq(&$1, &none)) + $$->user= none; + else if (lex_string_eq(&$1, &public_name)) + $$->user= public_name; + else if (check_string_char_length(&($$->user= $1), ER_USERNAME, + username_char_length, cs, 0)) + MYSQL_YYABORT; + $$->host= empty_clex_str; + } + ; + +grant_role: role_name | current_role ; + +opt_table: + /* Empty */ + | TABLE_SYM + ; + +grant_privileges: + object_privilege_list + | ALL opt_privileges + { + if (!($$= new (thd->mem_root) Lex_grant_privilege(GLOBAL_ACLS, true))) + MYSQL_YYABORT; + } + ; + +opt_privileges: + /* empty */ + | PRIVILEGES + ; + +object_privilege_list: + object_privilege + { + if (!($$= new (thd->mem_root) Lex_grant_privilege($1))) + MYSQL_YYABORT; + } + | column_list_privilege + { + if (!($$= new (thd->mem_root) Lex_grant_privilege()) || + $$->add_column_list_privilege(thd, $1.m_columns[0], + $1.m_privilege)) + MYSQL_YYABORT; + } + | object_privilege_list ',' object_privilege + { + ($$= $1)->add_object_privilege($3); + } + | object_privilege_list ',' column_list_privilege + { + if (($$= $1)->add_column_list_privilege(thd, $3.m_columns[0], + $3.m_privilege)) + MYSQL_YYABORT; + } + ; + +column_list_privilege: + column_privilege '(' comma_separated_ident_list ')' + { + $$= Lex_column_list_privilege($3, $1); + } + ; + +column_privilege: + SELECT_SYM { $$= SELECT_ACL; } + | INSERT { $$= INSERT_ACL; } + | UPDATE_SYM { $$= UPDATE_ACL; } + | REFERENCES { $$= REFERENCES_ACL; } + ; + +object_privilege: + SELECT_SYM { $$= SELECT_ACL; } + | INSERT { $$= INSERT_ACL; } + | UPDATE_SYM { $$= UPDATE_ACL; } + | REFERENCES { $$= REFERENCES_ACL; } + | DELETE_SYM { $$= DELETE_ACL;} + | USAGE { $$= NO_ACL; } + | INDEX_SYM { $$= INDEX_ACL;} + | ALTER { $$= ALTER_ACL;} + | CREATE { $$= CREATE_ACL;} + | DROP { $$= DROP_ACL;} + | EXECUTE_SYM { $$= EXECUTE_ACL;} + | RELOAD { $$= RELOAD_ACL;} + | SHUTDOWN { $$= SHUTDOWN_ACL;} + | PROCESS { $$= PROCESS_ACL;} + | FILE_SYM { $$= FILE_ACL;} + | GRANT OPTION { $$= GRANT_ACL;} + | SHOW DATABASES { $$= SHOW_DB_ACL;} + | SUPER_SYM { $$= SUPER_ACL;} + | CREATE TEMPORARY TABLES { $$= CREATE_TMP_ACL;} + | LOCK_SYM TABLES { $$= LOCK_TABLES_ACL; } + | REPLICATION SLAVE { $$= REPL_SLAVE_ACL; } + | REPLICATION CLIENT_SYM { $$= BINLOG_MONITOR_ACL; /*Compatibility*/ } + | CREATE VIEW_SYM { $$= CREATE_VIEW_ACL; } + | SHOW VIEW_SYM { $$= SHOW_VIEW_ACL; } + | CREATE ROUTINE_SYM { $$= CREATE_PROC_ACL; } + | ALTER ROUTINE_SYM { $$= ALTER_PROC_ACL; } + | CREATE USER_SYM { $$= CREATE_USER_ACL; } + | EVENT_SYM { $$= EVENT_ACL;} + | TRIGGER_SYM { $$= TRIGGER_ACL; } + | CREATE TABLESPACE { $$= CREATE_TABLESPACE_ACL; } + | DELETE_SYM HISTORY_SYM { $$= DELETE_HISTORY_ACL; } + | SET USER_SYM { $$= SET_USER_ACL; } + | FEDERATED_SYM ADMIN_SYM { $$= FEDERATED_ADMIN_ACL; } + | CONNECTION_SYM ADMIN_SYM { $$= CONNECTION_ADMIN_ACL; } + | READ_SYM ONLY_SYM ADMIN_SYM { $$= READ_ONLY_ADMIN_ACL; } + | READ_ONLY_SYM ADMIN_SYM { $$= READ_ONLY_ADMIN_ACL; } + | BINLOG_SYM MONITOR_SYM { $$= BINLOG_MONITOR_ACL; } + | BINLOG_SYM ADMIN_SYM { $$= BINLOG_ADMIN_ACL; } + | BINLOG_SYM REPLAY_SYM { $$= BINLOG_REPLAY_ACL; } + | REPLICATION MASTER_SYM ADMIN_SYM { $$= REPL_MASTER_ADMIN_ACL; } + | REPLICATION SLAVE ADMIN_SYM { $$= REPL_SLAVE_ADMIN_ACL; } + | SLAVE MONITOR_SYM { $$= SLAVE_MONITOR_ACL; } + ; + +opt_and: + /* empty */ {} + | AND_SYM {} + ; + +require_list: + require_list_element opt_and require_list + | require_list_element + ; + +require_list_element: + SUBJECT_SYM TEXT_STRING + { + LEX *lex=Lex; + if (lex->account_options.x509_subject.str) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "SUBJECT")); + lex->account_options.x509_subject= $2; + } + | ISSUER_SYM TEXT_STRING + { + LEX *lex=Lex; + if (lex->account_options.x509_issuer.str) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "ISSUER")); + lex->account_options.x509_issuer= $2; + } + | CIPHER_SYM TEXT_STRING + { + LEX *lex=Lex; + if (lex->account_options.ssl_cipher.str) + my_yyabort_error((ER_DUP_ARGUMENT, MYF(0), "CIPHER")); + lex->account_options.ssl_cipher= $2; + } + ; + +grant_ident: + '*' + { + LEX_CSTRING db; + if (unlikely(Lex->copy_db_to(&db))) + MYSQL_YYABORT; + if (!($$= new (thd->mem_root) Lex_grant_object_name(db, + Lex_grant_object_name::STAR))) + MYSQL_YYABORT; + } + | ident '.' '*' + { + if (!($$= new (thd->mem_root) Lex_grant_object_name($1, + Lex_grant_object_name::IDENT_STAR))) + MYSQL_YYABORT; + } + | '*' '.' '*' + { + if (!($$= new (thd->mem_root) Lex_grant_object_name( + null_clex_str, + Lex_grant_object_name::STAR_STAR))) + MYSQL_YYABORT; + } + | table_ident + { + if (!($$= new (thd->mem_root) Lex_grant_object_name($1))) + MYSQL_YYABORT; + } + ; + +user_list: + user + { + if (unlikely(Lex->users_list.push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | user_list ',' user + { + if (unlikely(Lex->users_list.push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +grant_list: + grant_user + { + if (unlikely(Lex->users_list.push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | grant_list ',' grant_user + { + if (unlikely(Lex->users_list.push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +user_and_role_list: + user_or_role + { + if (unlikely(Lex->users_list.push_back($1, thd->mem_root))) + MYSQL_YYABORT; + } + | user_and_role_list ',' user_or_role + { + if (unlikely(Lex->users_list.push_back($3, thd->mem_root))) + MYSQL_YYABORT; + } + ; + +via_or_with: VIA_SYM | WITH ; +using_or_as: USING | AS ; + +grant_user: + user IDENTIFIED_SYM BY TEXT_STRING + { + $$= $1; + $1->auth= new (thd->mem_root) USER_AUTH(); + $1->auth->pwtext= $4; + } + | user IDENTIFIED_SYM BY PASSWORD_SYM TEXT_STRING + { + $$= $1; + $1->auth= new (thd->mem_root) USER_AUTH(); + $1->auth->auth_str= $5; + } + | user IDENTIFIED_SYM via_or_with auth_expression + { + $$= $1; + $1->auth= $4; + } + | user_or_role + { + $$= $1; + } + ; + +auth_expression: + auth_token OR_SYM auth_expression + { + $$= $1; + DBUG_ASSERT($$->next == NULL); + $$->next= $3; + } + | auth_token + { + $$= $1; + } + ; + +auth_token: + ident_or_text opt_auth_str + { + $$= $2; + $$->plugin= $1; + } + ; + +opt_auth_str: + /* empty */ + { + if (!($$=(USER_AUTH*) thd->calloc(sizeof(USER_AUTH)))) + MYSQL_YYABORT; + } + | using_or_as TEXT_STRING_sys + { + if (!($$=(USER_AUTH*) thd->calloc(sizeof(USER_AUTH)))) + MYSQL_YYABORT; + $$->auth_str= $2; + } + | using_or_as PASSWORD_SYM '(' TEXT_STRING ')' + { + if (!($$=(USER_AUTH*) thd->calloc(sizeof(USER_AUTH)))) + MYSQL_YYABORT; + $$->pwtext= $4; + } + ; + +opt_require_clause: + /* empty */ + | REQUIRE_SYM require_list + { + Lex->account_options.ssl_type= SSL_TYPE_SPECIFIED; + } + | REQUIRE_SYM SSL_SYM + { + Lex->account_options.ssl_type= SSL_TYPE_ANY; + } + | REQUIRE_SYM X509_SYM + { + Lex->account_options.ssl_type= SSL_TYPE_X509; + } + | REQUIRE_SYM NONE_SYM + { + Lex->account_options.ssl_type= SSL_TYPE_NONE; + } + ; + +resource_option: + MAX_QUERIES_PER_HOUR ulong_num + { + Lex->account_options.questions=$2; + Lex->account_options.specified_limits|= USER_RESOURCES::QUERIES_PER_HOUR; + } + | MAX_UPDATES_PER_HOUR ulong_num + { + Lex->account_options.updates=$2; + Lex->account_options.specified_limits|= USER_RESOURCES::UPDATES_PER_HOUR; + } + | MAX_CONNECTIONS_PER_HOUR ulong_num + { + Lex->account_options.conn_per_hour= $2; + Lex->account_options.specified_limits|= USER_RESOURCES::CONNECTIONS_PER_HOUR; + } + | MAX_USER_CONNECTIONS_SYM int_num + { + Lex->account_options.user_conn= $2; + Lex->account_options.specified_limits|= USER_RESOURCES::USER_CONNECTIONS; + } + | MAX_STATEMENT_TIME_SYM NUM_literal + { + Lex->account_options.max_statement_time= $2->val_real(); + Lex->account_options.specified_limits|= USER_RESOURCES::MAX_STATEMENT_TIME; + } + ; + +resource_option_list: + resource_option_list resource_option {} + | resource_option {} + ; + +opt_resource_options: + /* empty */ {} + | WITH resource_option_list + ; + + +opt_grant_options: + /* empty */ { $$= NO_ACL; } + | WITH grant_option_list { $$= $2; } + ; + +opt_grant_option: + /* empty */ { $$= NO_ACL; } + | WITH GRANT OPTION { $$= GRANT_ACL; } + ; + +grant_option_list: + grant_option_list grant_option { $$= $1 | $2; } + | grant_option + ; + +grant_option: + GRANT OPTION { $$= GRANT_ACL;} + | resource_option { $$= NO_ACL; } + ; + +begin_stmt_mariadb: + BEGIN_MARIADB_SYM + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_BEGIN; + lex->start_transaction_opt= 0; + } + opt_work {} + ; + +compound_statement: + sp_proc_stmt_compound_ok + { + Lex->sql_command= SQLCOM_COMPOUND; + if (Lex->sp_body_finalize_procedure(thd)) + MYSQL_YYABORT; + } + ; + +opt_not: + /* nothing */ { $$= 0; } + | not { $$= 1; } + ; + +opt_work: + /* empty */ {} + | WORK_SYM {} + ; + +opt_chain: + /* empty */ + { $$= TVL_UNKNOWN; } + | AND_SYM NO_SYM CHAIN_SYM { $$= TVL_NO; } + | AND_SYM CHAIN_SYM { $$= TVL_YES; } + ; + +opt_release: + /* empty */ + { $$= TVL_UNKNOWN; } + | RELEASE_SYM { $$= TVL_YES; } + | NO_SYM RELEASE_SYM { $$= TVL_NO; } + ; + +commit: + COMMIT_SYM opt_work opt_chain opt_release + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_COMMIT; + /* Don't allow AND CHAIN RELEASE. */ + MYSQL_YYABORT_UNLESS($3 != TVL_YES || $4 != TVL_YES); + lex->tx_chain= $3; + lex->tx_release= $4; + } + ; + +rollback: + ROLLBACK_SYM opt_work opt_chain opt_release + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_ROLLBACK; + /* Don't allow AND CHAIN RELEASE. */ + MYSQL_YYABORT_UNLESS($3 != TVL_YES || $4 != TVL_YES); + lex->tx_chain= $3; + lex->tx_release= $4; + } + | ROLLBACK_SYM opt_work TO_SYM SAVEPOINT_SYM ident + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_ROLLBACK_TO_SAVEPOINT; + lex->ident= $5; + } + | ROLLBACK_SYM opt_work TO_SYM ident + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_ROLLBACK_TO_SAVEPOINT; + lex->ident= $4; + } + ; + +savepoint: + SAVEPOINT_SYM ident + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SAVEPOINT; + lex->ident= $2; + } + ; + +release: + RELEASE_SYM SAVEPOINT_SYM ident + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_RELEASE_SAVEPOINT; + lex->ident= $3; + } + ; + +/* + UNIONS : glue selects together +*/ + +unit_type_decl: + UNION_SYM union_option + { $$.unit_type= UNION_TYPE; $$.distinct= $2; } + | INTERSECT_SYM union_option + { $$.unit_type= INTERSECT_TYPE; $$.distinct= $2; } + | EXCEPT_SYM union_option + { $$.unit_type= EXCEPT_TYPE; $$.distinct= $2; } + ; + +/* + Start a UNION, for non-top level query expressions. +*/ +union_option: + /* empty */ { $$=1; } + | DISTINCT { $$=1; } + | ALL { $$=0; } + ; + +query_expression_option: + STRAIGHT_JOIN { Select->options|= SELECT_STRAIGHT_JOIN; } + | HIGH_PRIORITY + { + YYPS->m_lock_type= TL_READ_HIGH_PRIORITY; + YYPS->m_mdl_type= MDL_SHARED_READ; + Select->options|= SELECT_HIGH_PRIORITY; + } + | DISTINCT { Select->options|= SELECT_DISTINCT; } + | UNIQUE_SYM { Select->options|= SELECT_DISTINCT; } + | SQL_SMALL_RESULT { Select->options|= SELECT_SMALL_RESULT; } + | SQL_BIG_RESULT { Select->options|= SELECT_BIG_RESULT; } + | SQL_BUFFER_RESULT { Select->options|= OPTION_BUFFER_RESULT; } + | SQL_CALC_FOUND_ROWS { Select->options|= OPTION_FOUND_ROWS; } + | ALL { Select->options|= SELECT_ALL; } + ; + +/************************************************************************** + + DEFINER clause support. + +**************************************************************************/ + +definer_opt: + no_definer + | definer + ; + +no_definer: + /* empty */ + { + /* + We have to distinguish missing DEFINER-clause from case when + CURRENT_USER specified as definer explicitly in order to properly + handle CREATE TRIGGER statements which come to replication thread + from older master servers (i.e. to create non-suid trigger in this + case). + */ + thd->lex->definer= 0; + } + ; + +definer: + DEFINER_SYM '=' user_or_role + { + Lex->definer= $3; + Lex->account_options.reset(); + } + ; + +/************************************************************************** + + CREATE VIEW statement parts. + +**************************************************************************/ + +view_algorithm: + ALGORITHM_SYM '=' UNDEFINED_SYM { $$= DTYPE_ALGORITHM_UNDEFINED; } + | ALGORITHM_SYM '=' MERGE_SYM { $$= VIEW_ALGORITHM_MERGE; } + | ALGORITHM_SYM '=' TEMPTABLE_SYM { $$= VIEW_ALGORITHM_TMPTABLE; } + ; + +opt_view_suid: + /* empty */ { $$= VIEW_SUID_DEFAULT; } + | view_suid { $$= $1; } + ; + +view_suid: + SQL_SYM SECURITY_SYM DEFINER_SYM { $$= VIEW_SUID_DEFINER; } + | SQL_SYM SECURITY_SYM INVOKER_SYM { $$= VIEW_SUID_INVOKER; } + ; + +view_list_opt: + /* empty */ + {} + | '(' view_list ')' { } + ; + +view_list: + ident + { + Lex->view_list.push_back((LEX_CSTRING*) + thd->memdup(&$1, sizeof(LEX_CSTRING)), + thd->mem_root); + } + | view_list ',' ident + { + Lex->view_list.push_back((LEX_CSTRING*) + thd->memdup(&$3, sizeof(LEX_CSTRING)), + thd->mem_root); + } + ; + +view_select: + { + LEX *lex= Lex; + lex->parsing_options.allows_variable= FALSE; + lex->create_view->select.str= (char *) YYLIP->get_cpp_ptr(); + } + query_expression + view_check_option + { + if (Lex->parsed_create_view($2, $3)) + MYSQL_YYABORT; + } + ; + +view_check_option: + /* empty */ { $$= VIEW_CHECK_NONE; } + | WITH CHECK_SYM OPTION { $$= VIEW_CHECK_CASCADED; } + | WITH CASCADED CHECK_SYM OPTION { $$= VIEW_CHECK_CASCADED; } + | WITH LOCAL_SYM CHECK_SYM OPTION { $$= VIEW_CHECK_LOCAL; } + ; + +/************************************************************************** + + CREATE TRIGGER statement parts. + +**************************************************************************/ + +trigger_action_order: + FOLLOWS_SYM + { $$= TRG_ORDER_FOLLOWS; } + | PRECEDES_SYM + { $$= TRG_ORDER_PRECEDES; } + ; + +trigger_follows_precedes_clause: + /* empty */ + { + $$.ordering_clause= TRG_ORDER_NONE; + $$.anchor_trigger_name.str= NULL; + $$.anchor_trigger_name.length= 0; + } + | + trigger_action_order ident_or_text + { + $$.ordering_clause= $1; + $$.anchor_trigger_name= $2; + } + ; + +trigger_tail: + remember_name + opt_if_not_exists + { + if (unlikely(Lex->add_create_options_with_check($2))) + MYSQL_YYABORT; + } + sp_name + trg_action_time + trg_event + ON + remember_name /* $8 */ + { /* $9 */ + Lex->raw_trg_on_table_name_begin= YYLIP->get_tok_start(); + } + table_ident /* $10 */ + FOR_SYM + remember_name /* $12 */ + { /* $13 */ + Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start(); + } + EACH_SYM + ROW_SYM + { + Lex->trg_chistics.ordering_clause_begin= YYLIP->get_cpp_ptr(); + } + trigger_follows_precedes_clause /* $17 */ + { /* $18 */ + LEX *lex= thd->lex; + Lex_input_stream *lip= YYLIP; + + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_NO_RECURSIVE_CREATE, MYF(0), "TRIGGER")); + + lex->stmt_definition_begin= $1; + lex->ident.str= $8; + lex->ident.length= $12 - $8; + lex->spname= $4; + (*static_cast(&lex->trg_chistics))= ($17); + lex->trg_chistics.ordering_clause_end= lip->get_cpp_ptr(); + + if (unlikely(!lex->make_sp_head(thd, $4, &sp_handler_trigger, + DEFAULT_AGGREGATE))) + MYSQL_YYABORT; + + lex->sphead->set_body_start(thd, lip->get_cpp_tok_start()); + } + sp_proc_stmt /* $19 */ force_lookahead /* $20 */ + { /* $21 */ + LEX *lex= Lex; + + lex->sql_command= SQLCOM_CREATE_TRIGGER; + if (lex->sp_body_finalize_trigger(thd)) + MYSQL_YYABORT; + + /* + We have to do it after parsing trigger body, because some of + sp_proc_stmt alternatives are not saving/restoring LEX, so + lex->query_tables can be wiped out. + */ + if (!lex->first_select_lex()-> + add_table_to_list(thd, $10, (LEX_CSTRING*) 0, + TL_OPTION_UPDATING, TL_READ_NO_INSERT, + MDL_SHARED_NO_WRITE)) + MYSQL_YYABORT; + } + ; + +/************************************************************************** + + CREATE FUNCTION | PROCEDURE statements parts. + +**************************************************************************/ + + +sf_return_type: + { + LEX *lex= Lex; + lex->init_last_field(&lex->sphead->m_return_field_def, + &empty_clex_str); + } + field_type + { + if (unlikely(Lex->sf_return_fill_definition($2))) + MYSQL_YYABORT; + } + ; + +/*************************************************************************/ + +xa: + XA_SYM begin_or_start xid opt_join_or_resume + { + Lex->sql_command = SQLCOM_XA_START; + } + | XA_SYM END xid opt_suspend + { + Lex->sql_command = SQLCOM_XA_END; + } + | XA_SYM PREPARE_SYM xid + { + Lex->sql_command = SQLCOM_XA_PREPARE; + } + | XA_SYM COMMIT_SYM xid opt_one_phase + { + Lex->sql_command = SQLCOM_XA_COMMIT; + } + | XA_SYM ROLLBACK_SYM xid + { + Lex->sql_command = SQLCOM_XA_ROLLBACK; + } + | XA_SYM RECOVER_SYM opt_format_xid + { + Lex->sql_command = SQLCOM_XA_RECOVER; + Lex->verbose= $3; + } + ; + +opt_format_xid: + /* empty */ { $$= false; } + | FORMAT_SYM '=' ident_or_text + { + if (lex_string_eq(&$3, STRING_WITH_LEN("SQL"))) + $$= true; + else if (lex_string_eq(&$3, STRING_WITH_LEN("RAW"))) + $$= false; + else + { + my_yyabort_error((ER_UNKNOWN_EXPLAIN_FORMAT, MYF(0), + "XA RECOVER", $3.str)); + $$= false; + } + } + ; + +xid: + text_string + { + MYSQL_YYABORT_UNLESS($1->length() <= MAXGTRIDSIZE); + if (unlikely(!(Lex->xid=(XID *)thd->alloc(sizeof(XID))))) + MYSQL_YYABORT; + Lex->xid->set(1L, $1->ptr(), $1->length(), 0, 0); + } + | text_string ',' text_string + { + MYSQL_YYABORT_UNLESS($1->length() <= MAXGTRIDSIZE && $3->length() <= MAXBQUALSIZE); + if (unlikely(!(Lex->xid=(XID *)thd->alloc(sizeof(XID))))) + MYSQL_YYABORT; + Lex->xid->set(1L, $1->ptr(), $1->length(), $3->ptr(), $3->length()); + } + | text_string ',' text_string ',' ulong_num + { + MYSQL_YYABORT_UNLESS($1->length() <= MAXGTRIDSIZE && + $3->length() <= MAXBQUALSIZE && + $5 <= static_cast( + std::numeric_limits::max())); + if (unlikely(!(Lex->xid=(XID *)thd->alloc(sizeof(XID))))) + MYSQL_YYABORT; + Lex->xid->set($5, $1->ptr(), $1->length(), $3->ptr(), $3->length()); + } + ; + +begin_or_start: + BEGIN_MARIADB_SYM {} + | BEGIN_ORACLE_SYM {} + | START_SYM {} + ; + +opt_join_or_resume: + /* nothing */ { Lex->xa_opt=XA_NONE; } + | JOIN_SYM { Lex->xa_opt=XA_JOIN; } + | RESUME_SYM { Lex->xa_opt=XA_RESUME; } + ; + +opt_one_phase: + /* nothing */ { Lex->xa_opt=XA_NONE; } + | ONE_SYM PHASE_SYM { Lex->xa_opt=XA_ONE_PHASE; } + ; + +opt_suspend: + /* nothing */ + { Lex->xa_opt=XA_NONE; } + | SUSPEND_SYM + { Lex->xa_opt=XA_SUSPEND; } + opt_migrate + ; + +opt_migrate: + /* nothing */ {} + | FOR_SYM MIGRATE_SYM { Lex->xa_opt=XA_FOR_MIGRATE; } + ; + +install: + INSTALL_SYM PLUGIN_SYM opt_if_not_exists ident SONAME_SYM TEXT_STRING_sys + { + if (Lex->stmt_install_plugin($3, $4, $6)) + MYSQL_YYABORT; + } + | INSTALL_SYM SONAME_SYM TEXT_STRING_sys + { + Lex->stmt_install_plugin($3); + } + ; + +uninstall: + UNINSTALL_SYM PLUGIN_SYM opt_if_exists ident + { + if (Lex->stmt_uninstall_plugin_by_name($3, $4)) + MYSQL_YYABORT; + } + | UNINSTALL_SYM SONAME_SYM opt_if_exists TEXT_STRING_sys + { + if (Lex->stmt_uninstall_plugin_by_soname($3, $4)) + MYSQL_YYABORT; + } + ; + +/* Avoid compiler warning from yy_*.cc where yyerrlab1 is not used */ +keep_gcc_happy: + IMPOSSIBLE_ACTION + { + YYERROR; + } + ; + +_empty: + /* Empty */ + ; + +%ifdef MARIADB + + +statement: + verb_clause + ; + +sp_statement: + statement + ; + +sp_if_then_statements: + sp_proc_stmts1 + ; + +sp_case_then_statements: + sp_proc_stmts1 + ; + +reserved_keyword_udt_param_type: + INOUT_SYM + | IN_SYM + | OUT_SYM + ; + +reserved_keyword_udt: + reserved_keyword_udt_not_param_type + | reserved_keyword_udt_param_type + ; + +// Keywords that start an SP block section +keyword_sp_block_section: + BEGIN_MARIADB_SYM + | END + ; + +// Keywords that we allow for labels in SPs. +// Should not include keywords that start a statement or SP characteristics. +keyword_label: + keyword_data_type + | keyword_set_special_case + | keyword_sp_var_and_label + | keyword_sysvar_type + | FUNCTION_SYM + | EXCEPTION_ORACLE_SYM + | IGNORED_SYM + ; + +keyword_sp_decl: + keyword_data_type + | keyword_cast_type + | keyword_set_special_case + | keyword_sp_block_section + | keyword_sp_head + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_sysvar_type + | keyword_verb_clause + | FUNCTION_SYM + | WINDOW_SYM + | IGNORED_SYM + ; + +opt_truncate_table_storage_clause: + _empty + ; + + +ident_for_loop_index: + ident + ; + +row_field_name: + ident + { + if (!($$= Lex->row_field_name(thd, $1))) + MYSQL_YYABORT; + } + ; + +while_body: + expr_lex DO_SYM + { + if (unlikely($1->sp_while_loop_expression(thd))) + MYSQL_YYABORT; + } + sp_proc_stmts1 END WHILE_SYM + { + if (unlikely(Lex->sp_while_loop_finalize(thd))) + MYSQL_YYABORT; + } + ; + +for_loop_statements: + DO_SYM sp_proc_stmts1 END FOR_SYM + { } + ; + +sp_label: + label_ident ':' { $$= $1; } + ; + +sp_control_label: + sp_label + ; + +sp_block_label: + sp_label + { + if (unlikely(Lex->spcont->block_label_declare(&$1))) + MYSQL_YYABORT; + $$= $1; + } + ; + +sp_opt_default: + _empty { $$ = NULL; } + | DEFAULT expr { $$ = $2; } + ; + +sp_decl_variable_list_anchored: + sp_decl_idents_init_vars + TYPE_SYM OF_SYM optionally_qualified_column_ident + sp_opt_default + { + if (unlikely(Lex->sp_variable_declarations_with_ref_finalize(thd, $1, $4, $5))) + MYSQL_YYABORT; + $$.init_using_vars($1); + } + | sp_decl_idents_init_vars + ROW_SYM TYPE_SYM OF_SYM optionally_qualified_column_ident + sp_opt_default + { + if (unlikely(Lex->sp_variable_declarations_rowtype_finalize(thd, $1, $5, $6))) + MYSQL_YYABORT; + $$.init_using_vars($1); + } + ; + +sp_param_name_and_mode: + sp_parameter_type sp_param_name + { + $2->mode= $1; + $$= $2; + } + | sp_param_name + ; + +sp_param: + sp_param_name_and_mode field_type + { + if (unlikely(Lex->sp_param_fill_definition($$= $1, $2))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode ROW_SYM row_type_body + { + if (unlikely(Lex->sphead->spvar_fill_row(thd, $$= $1, $3))) + MYSQL_YYABORT; + } + | sp_param_anchored + ; + +sp_param_anchored: + sp_param_name_and_mode TYPE_SYM OF_SYM ident '.' ident + { + if (unlikely(Lex->sphead->spvar_fill_type_reference(thd, + $$= $1, $4, + $6))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode TYPE_SYM OF_SYM ident '.' ident '.' ident + { + if (unlikely(Lex->sphead->spvar_fill_type_reference(thd, $$= $1, + $4, $6, $8))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode ROW_SYM TYPE_SYM OF_SYM ident + { + if (unlikely(Lex->sphead->spvar_fill_table_rowtype_reference(thd, $$= $1, $5))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode ROW_SYM TYPE_SYM OF_SYM ident '.' ident + { + if (unlikely(Lex->sphead->spvar_fill_table_rowtype_reference(thd, $$= $1, $5, $7))) + MYSQL_YYABORT; + } + ; + + +sf_c_chistics_and_body_standalone: + sp_c_chistics + { + LEX *lex= thd->lex; + lex->sphead->set_c_chistics(lex->sp_chistics); + lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_proc_stmt_in_returns_clause force_lookahead + { + if (unlikely(Lex->sp_body_finalize_function(thd))) + MYSQL_YYABORT; + } + ; + +sp_tail_standalone: + sp_name + { + if (unlikely(!Lex->make_sp_head_no_recursive(thd, $1, + &sp_handler_procedure, + DEFAULT_AGGREGATE))) + MYSQL_YYABORT; + } + sp_parenthesized_pdparam_list + sp_c_chistics + { + Lex->sphead->set_c_chistics(Lex->sp_chistics); + Lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_proc_stmt force_lookahead + { + if (unlikely(Lex->sp_body_finalize_procedure(thd))) + MYSQL_YYABORT; + } + ; + +drop_routine: + DROP FUNCTION_SYM opt_if_exists ident '.' ident + { + if (Lex->stmt_drop_function($3, $4, $6)) + MYSQL_YYABORT; + } + | DROP FUNCTION_SYM opt_if_exists ident + { + if (Lex->stmt_drop_function($3, $4)) + MYSQL_YYABORT; + } + | DROP PROCEDURE_SYM opt_if_exists sp_name + { + if (Lex->stmt_drop_procedure($3, $4)) + MYSQL_YYABORT; + } + ; + + +create_routine: + create_or_replace definer_opt PROCEDURE_SYM opt_if_not_exists + { + if (Lex->stmt_create_procedure_start($1 | $4)) + MYSQL_YYABORT; + } + sp_tail_standalone + { + Lex->stmt_create_routine_finalize(); + } + | create_or_replace definer opt_aggregate FUNCTION_SYM opt_if_not_exists + sp_name + { + if (Lex->stmt_create_stored_function_start($1 | $5, $3, $6)) + MYSQL_YYABORT; + } + sp_parenthesized_fdparam_list + RETURNS_SYM sf_return_type + sf_c_chistics_and_body_standalone + { + Lex->stmt_create_routine_finalize(); + } + | create_or_replace no_definer opt_aggregate FUNCTION_SYM opt_if_not_exists + sp_name + { + if (Lex->stmt_create_stored_function_start($1 | $5, $3, $6)) + MYSQL_YYABORT; + } + sp_parenthesized_fdparam_list + RETURNS_SYM sf_return_type + sf_c_chistics_and_body_standalone + { + Lex->stmt_create_routine_finalize(); + } + | create_or_replace no_definer opt_aggregate FUNCTION_SYM opt_if_not_exists + ident RETURNS_SYM udf_type SONAME_SYM TEXT_STRING_sys + { + if (Lex->stmt_create_udf_function($1 | $5, $3, $6, + (Item_result) $8, $10)) + MYSQL_YYABORT; + } + ; + + +sp_decls: + _empty + { + $$.init(); + } + | sp_decls sp_decl ';' + { + // We check for declarations out of (standard) order this way + // because letting the grammar rules reflect it caused tricky + // shift/reduce conflicts with the wrong result. (And we get + // better error handling this way.) + if (unlikely(Lex->sp_declarations_join(&$$, $1, $2))) + MYSQL_YYABORT; + } + ; + +sp_decl: + DECLARE_MARIADB_SYM sp_decl_body { $$= $2; } + ; + + +sp_decl_body: + sp_decl_variable_list + | sp_decl_ident CONDITION_SYM FOR_SYM sp_cond + { + if (unlikely(Lex->spcont->declare_condition(thd, &$1, $4))) + MYSQL_YYABORT; + $$.vars= $$.hndlrs= $$.curs= 0; + $$.conds= 1; + } + | sp_decl_handler + | sp_decl_ident CURSOR_SYM + { + Lex->sp_block_init(thd); + } + opt_parenthesized_cursor_formal_parameters + FOR_SYM sp_cursor_stmt + { + sp_pcontext *param_ctx= Lex->spcont; + if (unlikely(Lex->sp_block_finalize(thd))) + MYSQL_YYABORT; + if (unlikely(Lex->sp_declare_cursor(thd, &$1, $6, param_ctx, true))) + MYSQL_YYABORT; + $$.vars= $$.conds= $$.hndlrs= 0; + $$.curs= 1; + } + ; + + + +// ps_proc_stmt_in_returns_clause is a statement that is allowed +// in the RETURNS clause of a stored function definition directly, +// without the BEGIN..END block. +// It should not include any syntax structures starting with '(', to avoid +// shift/reduce conflicts with the rule "field_type" and its sub-rules +// that scan an optional length, like CHAR(1) or YEAR(4). +// See MDEV-9166. + +sp_proc_stmt_in_returns_clause: + sp_proc_stmt_return + | sp_labeled_block + | sp_unlabeled_block + | sp_labeled_control + | sp_proc_stmt_compound_ok + ; + +sp_proc_stmt: + sp_proc_stmt_in_returns_clause + | sp_proc_stmt_statement + | sp_proc_stmt_continue_oracle + | sp_proc_stmt_exit_oracle + | sp_proc_stmt_leave + | sp_proc_stmt_iterate + | sp_proc_stmt_goto_oracle + | sp_proc_stmt_with_cursor + ; + +sp_proc_stmt_compound_ok: + sp_proc_stmt_if + | case_stmt_specification + | sp_unlabeled_block_not_atomic + | sp_unlabeled_control + ; + + +sp_labeled_block: + sp_block_label + BEGIN_MARIADB_SYM + { + Lex->sp_block_init(thd, &$1); + } + sp_decls + sp_proc_stmts + END + sp_opt_label + { + if (unlikely(Lex->sp_block_finalize(thd, $4, &$7))) + MYSQL_YYABORT; + } + ; + +sp_unlabeled_block: + BEGIN_MARIADB_SYM + { + Lex->sp_block_init(thd); + } + sp_decls + sp_proc_stmts + END + { + if (unlikely(Lex->sp_block_finalize(thd, $3))) + MYSQL_YYABORT; + } + ; + +sp_unlabeled_block_not_atomic: + BEGIN_MARIADB_SYM not ATOMIC_SYM // TODO: BEGIN ATOMIC (not -> opt_not) + { + if (unlikely(Lex->maybe_start_compound_statement(thd))) + MYSQL_YYABORT; + Lex->sp_block_init(thd); + } + sp_decls + sp_proc_stmts + END + { + if (unlikely(Lex->sp_block_finalize(thd, $5))) + MYSQL_YYABORT; + } + ; + + +%endif MARIADB + + +%ifdef ORACLE + +statement: + verb_clause + | set_assign + ; + +sp_statement: + statement + | ident_cli_directly_assignable + { + // Direct procedure call (without the CALL keyword) + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->call_statement_start(thd, &tmp))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | ident_cli_directly_assignable '.' ident + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->call_statement_start(thd, &tmp, &$3))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | ident_cli_directly_assignable '.' ident '.' ident + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(Lex->call_statement_start(thd, &tmp, &$3, &$5))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + ; + +sp_if_then_statements: + sp_proc_stmts1_implicit_block { } + ; + +sp_case_then_statements: + sp_proc_stmts1_implicit_block { } + ; + +reserved_keyword_udt: + reserved_keyword_udt_not_param_type + ; + +// Keywords that start an SP block section. +keyword_sp_block_section: + BEGIN_ORACLE_SYM + | END + ; + +// Keywords that we allow for labels in SPs. +// Should not include keywords that start a statement or SP characteristics. +keyword_label: + keyword_data_type + | keyword_set_special_case + | keyword_sp_var_and_label + | keyword_sysvar_type + | FUNCTION_SYM + | COMPRESSED_SYM + | EXCEPTION_ORACLE_SYM + | IGNORED_SYM + ; + +keyword_sp_decl: + keyword_sp_head + | keyword_set_special_case + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_sysvar_type + | keyword_verb_clause + | WINDOW_SYM + | IGNORED_SYM + ; + +opt_truncate_table_storage_clause: + _empty + | DROP STORAGE_SYM + | REUSE_SYM STORAGE_SYM + ; + + +ident_for_loop_index: + ident_directly_assignable + ; + +row_field_name: + ident_directly_assignable + { + if (!($$= Lex->row_field_name(thd, $1))) + MYSQL_YYABORT; + } + ; + +while_body: + expr_lex LOOP_SYM + { + if (unlikely($1->sp_while_loop_expression(thd))) + MYSQL_YYABORT; + } + sp_proc_stmts1 END LOOP_SYM + { + if (unlikely(Lex->sp_while_loop_finalize(thd))) + MYSQL_YYABORT; + } + ; + +for_loop_statements: + LOOP_SYM sp_proc_stmts1 END LOOP_SYM + { } + ; + + +sp_control_label: + labels_declaration_oracle + ; + +sp_block_label: + labels_declaration_oracle + { + if (unlikely(Lex->spcont->block_label_declare(&$1))) + MYSQL_YYABORT; + $$= $1; + } + ; + + +remember_end_opt: + { + if (yychar == YYEMPTY) + $$= (char*) YYLIP->get_cpp_ptr_rtrim(); + else + $$= (char*) YYLIP->get_cpp_tok_end_rtrim(); + } + ; + +sp_opt_default: + _empty { $$ = NULL; } + | DEFAULT expr { $$ = $2; } + | SET_VAR expr { $$ = $2; } + ; + +sp_opt_inout: + _empty { $$= sp_variable::MODE_IN; } + | sp_parameter_type + | IN_SYM OUT_SYM { $$= sp_variable::MODE_INOUT; } + ; + +sp_proc_stmts1_implicit_block: + { + Lex->sp_block_init(thd); + } + sp_proc_stmts1 + { + if (unlikely(Lex->sp_block_finalize(thd))) + MYSQL_YYABORT; + } + ; + + +remember_lex: + { + $$= thd->lex; + } + ; + +keyword_directly_assignable: + keyword_data_type + | keyword_cast_type + | keyword_set_special_case + | keyword_sp_var_and_label + | keyword_sp_var_not_label + | keyword_sysvar_type + | FUNCTION_SYM + | WINDOW_SYM + ; + +ident_directly_assignable: + IDENT_sys + | keyword_directly_assignable + { + if (unlikely($$.copy_keyword(thd, &$1))) + MYSQL_YYABORT; + } + ; + +ident_cli_directly_assignable: + IDENT_cli + | keyword_directly_assignable { $$= $1; } + ; + + +set_assign: + ident_cli_directly_assignable SET_VAR + { + LEX *lex=Lex; + lex->set_stmt_init(); + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(Lex->set_variable(&tmp, $4)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY, + false))) + MYSQL_YYABORT; + } + | ident_cli_directly_assignable '.' ident SET_VAR + { + LEX *lex=Lex; + lex->set_stmt_init(); + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + LEX *lex= Lex; + DBUG_ASSERT(lex->var_list.is_empty()); + Lex_ident_sys tmp(thd, &$1); + if (unlikely(!tmp.str) || + unlikely(lex->set_variable(&tmp, &$3, $6)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY, + false))) + MYSQL_YYABORT; + } + | COLON_ORACLE_SYM ident '.' ident SET_VAR + { + LEX *lex= Lex; + if (unlikely(!lex->is_trigger_new_or_old_reference(&$2))) + { + thd->parse_error(ER_SYNTAX_ERROR, $1.pos()); + MYSQL_YYABORT; + } + lex->set_stmt_init(); + if (sp_create_assignment_lex(thd, $1.pos())) + MYSQL_YYABORT; + } + set_expr_or_default + { + LEX_CSTRING tmp= { $2.str, $2.length }; + if (unlikely(Lex->set_trigger_field(&tmp, &$4, $7)) || + unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY, + false))) + MYSQL_YYABORT; + } + ; + + +labels_declaration_oracle: + label_declaration_oracle { $$= $1; } + | labels_declaration_oracle label_declaration_oracle { $$= $2; } + ; + +label_declaration_oracle: + SHIFT_LEFT label_ident SHIFT_RIGHT + { + if (unlikely(Lex->sp_push_goto_label(thd, &$2))) + MYSQL_YYABORT; + $$= $2; + } + ; + +opt_exception_clause: + _empty { $$= 0; } + | EXCEPTION_ORACLE_SYM exception_handlers { $$= $2; } + ; + +exception_handlers: + exception_handler { $$= 1; } + | exception_handlers exception_handler { $$= $1 + 1; } + ; + +exception_handler: + WHEN_SYM + { + if (unlikely(Lex->sp_handler_declaration_init(thd, sp_handler::EXIT))) + MYSQL_YYABORT; + } + sp_hcond_list + THEN_SYM + sp_proc_stmts1_implicit_block + { + if (unlikely(Lex->sp_handler_declaration_finalize(thd, sp_handler::EXIT))) + MYSQL_YYABORT; + } + ; + +sp_no_param: + _empty + { + Lex->sphead->m_param_begin= Lex->sphead->m_param_end= + YYLIP->get_cpp_tok_start() + 1; + } + ; + +opt_sp_parenthesized_fdparam_list: + sp_no_param + | sp_parenthesized_fdparam_list + ; + +opt_sp_parenthesized_pdparam_list: + sp_no_param + | sp_parenthesized_pdparam_list + ; + + +opt_sp_name: + _empty { $$= NULL; } + | sp_name { $$= $1; } + ; + + +opt_package_routine_end_name: + _empty { $$= null_clex_str; } + | ident { $$= $1; } + ; + +sp_tail_is: + IS + | AS + ; + +sp_instr_addr: + { $$= Lex->sphead->instructions(); } + ; + +sp_body: + { + Lex->sp_block_init(thd); + } + opt_sp_decl_body_list + { + if (unlikely(Lex->sp_block_with_exceptions_finalize_declarations(thd))) + MYSQL_YYABORT; + } + BEGIN_ORACLE_SYM + sp_block_statements_and_exceptions + { + $2.hndlrs+= $5.hndlrs; + if (unlikely(Lex->sp_block_finalize(thd, $2))) + MYSQL_YYABORT; + } + END + ; + +create_package_chistic: + COMMENT_SYM TEXT_STRING_sys + { Lex->sp_chistics.comment= $2; } + | sp_suid + { Lex->sp_chistics.suid= $1; } + ; + +create_package_chistics: + create_package_chistic {} + | create_package_chistics create_package_chistic { } + ; + +opt_create_package_chistics: + _empty + | create_package_chistics { } + ; + +opt_create_package_chistics_init: + { Lex->sp_chistics.init(); } + opt_create_package_chistics + ; + + +package_implementation_executable_section: + END + { + if (unlikely(Lex->sp_block_with_exceptions_add_empty(thd))) + MYSQL_YYABORT; + $$.init(0); + } + | BEGIN_ORACLE_SYM sp_block_statements_and_exceptions END { $$= $2; } + ; + + +// Inside CREATE PACKAGE BODY, package-wide items (e.g. variables) +// must be declared before routine definitions. + +package_implementation_declare_section: + package_implementation_declare_section_list1 + | package_implementation_declare_section_list2 + | package_implementation_declare_section_list1 + package_implementation_declare_section_list2 + { $$.join($1, $2); } + ; + +package_implementation_declare_section_list1: + package_implementation_item_declaration + | package_implementation_declare_section_list1 + package_implementation_item_declaration + { $$.join($1, $2); } + ; + +package_implementation_declare_section_list2: + package_implementation_routine_definition + | package_implementation_declare_section_list2 + package_implementation_routine_definition + { $$.join($1, $2); } + ; + +package_routine_lex: + { + if (unlikely(!($$= new (thd->mem_root) + sp_lex_local(thd, thd->lex)))) + MYSQL_YYABORT; + thd->m_parser_state->m_yacc.reset_before_substatement(); + } + ; + + +package_specification_function: + remember_lex package_routine_lex ident + { + DBUG_ASSERT($1->sphead->get_package()); + $2->sql_command= SQLCOM_CREATE_FUNCTION; + sp_name *spname= $1->make_sp_name_package_routine(thd, &$3); + if (unlikely(!spname)) + MYSQL_YYABORT; + thd->lex= $2; + if (unlikely(!$2->make_sp_head_no_recursive(thd, spname, + &sp_handler_package_function, + NOT_AGGREGATE))) + MYSQL_YYABORT; + $1->sphead->get_package()->m_current_routine= $2; + (void) is_native_function_with_warn(thd, &$3); + } + opt_sp_parenthesized_fdparam_list + RETURN_ORACLE_SYM sf_return_type + sp_c_chistics + { + sp_head *sp= thd->lex->sphead; + sp->restore_thd_mem_root(thd); + thd->lex= $1; + $$= $2; + } + ; + +package_specification_procedure: + remember_lex package_routine_lex ident + { + DBUG_ASSERT($1->sphead->get_package()); + $2->sql_command= SQLCOM_CREATE_PROCEDURE; + sp_name *spname= $1->make_sp_name_package_routine(thd, &$3); + if (unlikely(!spname)) + MYSQL_YYABORT; + thd->lex= $2; + if (unlikely(!$2->make_sp_head_no_recursive(thd, spname, + &sp_handler_package_procedure, + DEFAULT_AGGREGATE))) + MYSQL_YYABORT; + $1->sphead->get_package()->m_current_routine= $2; + } + opt_sp_parenthesized_pdparam_list + sp_c_chistics + { + sp_head *sp= thd->lex->sphead; + sp->restore_thd_mem_root(thd); + thd->lex= $1; + $$= $2; + } + ; + + +package_implementation_routine_definition: + FUNCTION_SYM package_specification_function + package_implementation_function_body ';' + { + sp_package *pkg= Lex->get_sp_package(); + if (unlikely(pkg->add_routine_implementation($2))) + MYSQL_YYABORT; + pkg->m_current_routine= NULL; + $$.init(); + } + | PROCEDURE_SYM package_specification_procedure + package_implementation_procedure_body ';' + { + sp_package *pkg= Lex->get_sp_package(); + if (unlikely(pkg->add_routine_implementation($2))) + MYSQL_YYABORT; + pkg->m_current_routine= NULL; + $$.init(); + } + | package_specification_element { $$.init(); } + ; + + +package_implementation_function_body: + sp_tail_is remember_lex + { + sp_package *pkg= Lex->get_sp_package(); + sp_head *sp= pkg->m_current_routine->sphead; + thd->lex= pkg->m_current_routine; + sp->reset_thd_mem_root(thd); + sp->set_c_chistics(thd->lex->sp_chistics); + sp->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_body opt_package_routine_end_name + { + if (unlikely(thd->lex->sp_body_finalize_function(thd) || + thd->lex->sphead->check_package_routine_end_name($5))) + MYSQL_YYABORT; + thd->lex= $2; + } + ; + +package_implementation_procedure_body: + sp_tail_is remember_lex + { + sp_package *pkg= Lex->get_sp_package(); + sp_head *sp= pkg->m_current_routine->sphead; + thd->lex= pkg->m_current_routine; + sp->reset_thd_mem_root(thd); + sp->set_c_chistics(thd->lex->sp_chistics); + sp->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_body opt_package_routine_end_name + { + if (unlikely(thd->lex->sp_body_finalize_procedure(thd) || + thd->lex->sphead->check_package_routine_end_name($5))) + MYSQL_YYABORT; + thd->lex= $2; + } + ; + + +package_implementation_item_declaration: + sp_decl_variable_list ';' + ; + +opt_package_specification_element_list: + _empty + | package_specification_element_list + ; + +package_specification_element_list: + package_specification_element + | package_specification_element_list package_specification_element + ; + +package_specification_element: + FUNCTION_SYM package_specification_function ';' + { + sp_package *pkg= Lex->get_sp_package(); + if (unlikely(pkg->add_routine_declaration($2))) + MYSQL_YYABORT; + pkg->m_current_routine= NULL; + } + | PROCEDURE_SYM package_specification_procedure ';' + { + sp_package *pkg= Lex->get_sp_package(); + if (unlikely(pkg->add_routine_declaration($2))) + MYSQL_YYABORT; + pkg->m_current_routine= NULL; + } + ; + +sp_decl_variable_list_anchored: + sp_decl_idents_init_vars + optionally_qualified_column_ident PERCENT_ORACLE_SYM TYPE_SYM + sp_opt_default + { + if (unlikely(Lex->sp_variable_declarations_with_ref_finalize(thd, $1, $2, $5))) + MYSQL_YYABORT; + $$.init_using_vars($1); + } + | sp_decl_idents_init_vars + optionally_qualified_column_ident PERCENT_ORACLE_SYM ROWTYPE_ORACLE_SYM + sp_opt_default + { + if (unlikely(Lex->sp_variable_declarations_rowtype_finalize(thd, $1, $2, $5))) + MYSQL_YYABORT; + $$.init_using_vars($1); + } + ; + +sp_param_name_and_mode: + sp_param_name sp_opt_inout + { + $1->mode= $2; + $$= $1; + } + ; + +sp_param: + sp_param_name_and_mode field_type + { + if (unlikely(Lex->sp_param_fill_definition($$= $1, $2))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode ROW_SYM row_type_body + { + if (unlikely(Lex->sphead->spvar_fill_row(thd, $$= $1, $3))) + MYSQL_YYABORT; + } + | sp_param_anchored + ; + +sp_param_anchored: + sp_param_name_and_mode sp_decl_ident '.' ident PERCENT_ORACLE_SYM TYPE_SYM + { + if (unlikely(Lex->sphead->spvar_fill_type_reference(thd, $$= $1, $2, $4))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode sp_decl_ident '.' ident '.' ident PERCENT_ORACLE_SYM TYPE_SYM + { + if (unlikely(Lex->sphead->spvar_fill_type_reference(thd, $$= $1, $2, $4, $6))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode sp_decl_ident PERCENT_ORACLE_SYM ROWTYPE_ORACLE_SYM + { + if (unlikely(Lex->sphead->spvar_fill_table_rowtype_reference(thd, $$= $1, $2))) + MYSQL_YYABORT; + } + | sp_param_name_and_mode sp_decl_ident '.' ident PERCENT_ORACLE_SYM ROWTYPE_ORACLE_SYM + { + if (unlikely(Lex->sphead->spvar_fill_table_rowtype_reference(thd, $$= $1, $2, $4))) + MYSQL_YYABORT; + } + ; + + +sf_c_chistics_and_body_standalone: + sp_c_chistics + { + LEX *lex= thd->lex; + lex->sphead->set_c_chistics(lex->sp_chistics); + lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_tail_is sp_body force_lookahead + { + if (unlikely(Lex->sp_body_finalize_function(thd))) + MYSQL_YYABORT; + } + ; + +sp_tail_standalone: + sp_name + { + if (unlikely(!Lex->make_sp_head_no_recursive(thd, $1, + &sp_handler_procedure, + DEFAULT_AGGREGATE))) + MYSQL_YYABORT; + } + opt_sp_parenthesized_pdparam_list + sp_c_chistics + { + Lex->sphead->set_c_chistics(Lex->sp_chistics); + Lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_tail_is + sp_body + opt_sp_name + { + if (unlikely(Lex->sp_body_finalize_procedure_standalone(thd, $8))) + MYSQL_YYABORT; + } + ; + +drop_routine: + DROP FUNCTION_SYM opt_if_exists ident '.' ident + { + if (Lex->stmt_drop_function($3, $4, $6)) + MYSQL_YYABORT; + } + | DROP FUNCTION_SYM opt_if_exists ident + { + if (Lex->stmt_drop_function($3, $4)) + MYSQL_YYABORT; + } + | DROP PROCEDURE_SYM opt_if_exists sp_name + { + if (Lex->stmt_drop_procedure($3, $4)) + MYSQL_YYABORT; + } + | DROP PACKAGE_ORACLE_SYM opt_if_exists sp_name + { + LEX *lex= Lex; + lex->set_command(SQLCOM_DROP_PACKAGE, $3); + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_NO_DROP_SP, MYF(0), "PACKAGE")); + lex->spname= $4; + } + | DROP PACKAGE_ORACLE_SYM BODY_ORACLE_SYM opt_if_exists sp_name + { + LEX *lex= Lex; + lex->set_command(SQLCOM_DROP_PACKAGE_BODY, $4); + if (unlikely(lex->sphead)) + my_yyabort_error((ER_SP_NO_DROP_SP, MYF(0), "PACKAGE BODY")); + lex->spname= $5; + } + ; + + +create_routine: + create_or_replace definer_opt PROCEDURE_SYM opt_if_not_exists + { + if (Lex->stmt_create_procedure_start($1 | $4)) + MYSQL_YYABORT; + } + sp_tail_standalone + { + Lex->stmt_create_routine_finalize(); + } + | create_or_replace definer opt_aggregate FUNCTION_SYM opt_if_not_exists + sp_name + { + if (Lex->stmt_create_stored_function_start($1 | $5, $3, $6)) + MYSQL_YYABORT; + } + opt_sp_parenthesized_fdparam_list + RETURN_ORACLE_SYM sf_return_type + sf_c_chistics_and_body_standalone + opt_sp_name + { + if (Lex->stmt_create_stored_function_finalize_standalone($12)) + MYSQL_YYABORT; + } + | create_or_replace no_definer opt_aggregate FUNCTION_SYM opt_if_not_exists + sp_name + { + if (Lex->stmt_create_stored_function_start($1 | $5, $3, $6)) + MYSQL_YYABORT; + } + opt_sp_parenthesized_fdparam_list + RETURN_ORACLE_SYM sf_return_type + sf_c_chistics_and_body_standalone + opt_sp_name + { + if (Lex->stmt_create_stored_function_finalize_standalone($12)) + MYSQL_YYABORT; + } + | create_or_replace no_definer opt_aggregate FUNCTION_SYM opt_if_not_exists + ident RETURNS_SYM udf_type SONAME_SYM TEXT_STRING_sys + { + if (Lex->stmt_create_udf_function($1 | $5, $3, $6, + (Item_result) $8, $10)) + MYSQL_YYABORT; + } + | create_or_replace definer_opt PACKAGE_ORACLE_SYM + opt_if_not_exists sp_name opt_create_package_chistics_init + { + sp_package *pkg; + if (unlikely(!(pkg= Lex-> + create_package_start(thd, + SQLCOM_CREATE_PACKAGE, + &sp_handler_package_spec, + $5, $1 | $4)))) + MYSQL_YYABORT; + pkg->set_c_chistics(Lex->sp_chistics); + Lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + } + sp_tail_is + opt_package_specification_element_list END + remember_end_opt opt_sp_name + { + if (unlikely(Lex->create_package_finalize(thd, $5, $12, $11))) + MYSQL_YYABORT; + } + | create_or_replace definer_opt PACKAGE_ORACLE_SYM BODY_ORACLE_SYM + opt_if_not_exists sp_name opt_create_package_chistics_init + { + sp_package *pkg; + if (unlikely(!(pkg= Lex-> + create_package_start(thd, + SQLCOM_CREATE_PACKAGE_BODY, + &sp_handler_package_body, + $6, $1 | $5)))) + MYSQL_YYABORT; + pkg->set_c_chistics(Lex->sp_chistics); + Lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + Lex->sp_block_init(thd); + } + sp_tail_is + package_implementation_declare_section + { + if (unlikely(Lex->sp_block_with_exceptions_finalize_declarations(thd))) + MYSQL_YYABORT; + } + package_implementation_executable_section + { + $10.hndlrs+= $12.hndlrs; + if (unlikely(Lex->sp_block_finalize(thd, $10))) + MYSQL_YYABORT; + } + remember_end_opt opt_sp_name + { + if (unlikely(Lex->create_package_finalize(thd, $6, $15, $14))) + MYSQL_YYABORT; + } + ; + +opt_sp_decl_body_list: + _empty + { + $$.init(); + } + | sp_decl_body_list { $$= $1; } + ; + +sp_decl_body_list: + sp_decl_non_handler_list + { + if (unlikely(Lex->sphead->sp_add_instr_cpush_for_cursors(thd, Lex->spcont))) + MYSQL_YYABORT; + } + opt_sp_decl_handler_list + { + $$.join($1, $3); + } + | sp_decl_handler_list + ; + +sp_decl_non_handler_list: + sp_decl_non_handler ';' { $$= $1; } + | sp_decl_non_handler_list sp_decl_non_handler ';' + { + $$.join($1, $2); + } + ; + +sp_decl_handler_list: + sp_decl_handler ';' { $$= $1; } + | sp_decl_handler_list sp_decl_handler ';' + { + $$.join($1, $2); + } + ; + +opt_sp_decl_handler_list: + _empty { $$.init(); } + | sp_decl_handler_list + ; + +sp_decl_non_handler: + sp_decl_variable_list + | ident_directly_assignable CONDITION_SYM FOR_SYM sp_cond + { + if (unlikely(Lex->spcont->declare_condition(thd, &$1, $4))) + MYSQL_YYABORT; + $$.vars= $$.hndlrs= $$.curs= 0; + $$.conds= 1; + } + | ident_directly_assignable EXCEPTION_ORACLE_SYM + { + sp_condition_value *spcond= new (thd->mem_root) + sp_condition_value_user_defined(); + if (unlikely(!spcond) || + unlikely(Lex->spcont->declare_condition(thd, &$1, spcond))) + MYSQL_YYABORT; + $$.vars= $$.hndlrs= $$.curs= 0; + $$.conds= 1; + } + | CURSOR_SYM ident_directly_assignable + { + Lex->sp_block_init(thd); + } + opt_parenthesized_cursor_formal_parameters + IS sp_cursor_stmt + { + sp_pcontext *param_ctx= Lex->spcont; + if (unlikely(Lex->sp_block_finalize(thd))) + MYSQL_YYABORT; + if (unlikely(Lex->sp_declare_cursor(thd, &$2, $6, param_ctx, false))) + MYSQL_YYABORT; + $$.vars= $$.conds= $$.hndlrs= 0; + $$.curs= 1; + } + ; + + +sp_proc_stmt: + sp_labeled_block + | sp_unlabeled_block + | sp_labeled_control + | sp_unlabeled_control + | sp_labelable_stmt + | labels_declaration_oracle sp_labelable_stmt {} + ; + +sp_labelable_stmt: + sp_proc_stmt_statement + | sp_proc_stmt_continue_oracle + | sp_proc_stmt_exit_oracle + | sp_proc_stmt_leave + | sp_proc_stmt_iterate + | sp_proc_stmt_goto_oracle + | sp_proc_stmt_with_cursor + | sp_proc_stmt_return + | sp_proc_stmt_if + | case_stmt_specification + | NULL_SYM { } + ; + +sp_proc_stmt_compound_ok: + sp_proc_stmt_if + | case_stmt_specification + | sp_unlabeled_block + | sp_unlabeled_control + ; + + +sp_labeled_block: + sp_block_label + BEGIN_ORACLE_SYM + { + Lex->sp_block_init(thd, &$1); + if (unlikely(Lex->sp_block_with_exceptions_finalize_declarations(thd))) + MYSQL_YYABORT; + } + sp_block_statements_and_exceptions + END + sp_opt_label + { + if (unlikely(Lex->sp_block_finalize(thd, Lex_spblock($4), &$6))) + MYSQL_YYABORT; + } + | sp_block_label + DECLARE_ORACLE_SYM + { + Lex->sp_block_init(thd, &$1); + } + opt_sp_decl_body_list + { + if (unlikely(Lex->sp_block_with_exceptions_finalize_declarations(thd))) + MYSQL_YYABORT; + } + BEGIN_ORACLE_SYM + sp_block_statements_and_exceptions + END + sp_opt_label + { + $4.hndlrs+= $7.hndlrs; + if (unlikely(Lex->sp_block_finalize(thd, $4, &$9))) + MYSQL_YYABORT; + } + ; + +opt_not_atomic: + _empty + | not ATOMIC_SYM // TODO: BEGIN ATOMIC (not -> opt_not) + ; + +sp_unlabeled_block: + BEGIN_ORACLE_SYM opt_not_atomic + { + if (unlikely(Lex->maybe_start_compound_statement(thd))) + MYSQL_YYABORT; + Lex->sp_block_init(thd); + if (unlikely(Lex->sp_block_with_exceptions_finalize_declarations(thd))) + MYSQL_YYABORT; + } + sp_block_statements_and_exceptions + END + { + if (unlikely(Lex->sp_block_finalize(thd, Lex_spblock($4)))) + MYSQL_YYABORT; + } + | DECLARE_ORACLE_SYM + { + if (unlikely(Lex->maybe_start_compound_statement(thd))) + MYSQL_YYABORT; + Lex->sp_block_init(thd); + } + opt_sp_decl_body_list + { + if (unlikely(Lex->sp_block_with_exceptions_finalize_declarations(thd))) + MYSQL_YYABORT; + } + BEGIN_ORACLE_SYM + sp_block_statements_and_exceptions + END + { + $3.hndlrs+= $6.hndlrs; + if (unlikely(Lex->sp_block_finalize(thd, $3))) + MYSQL_YYABORT; + } + ; + +sp_block_statements_and_exceptions: + sp_instr_addr + sp_proc_stmts + { + if (unlikely(Lex->sp_block_with_exceptions_finalize_executable_section(thd, $1))) + MYSQL_YYABORT; + } + opt_exception_clause + { + if (unlikely(Lex->sp_block_with_exceptions_finalize_exceptions(thd, $1, $4))) + MYSQL_YYABORT; + $$.init($4); + } + ; + +%endif ORACLE + +/** + @} (end of group Parser) +*/ diff --git a/sql/strfunc.cc b/sql/strfunc.cc new file mode 100644 index 00000000..416aea58 --- /dev/null +++ b/sql/strfunc.cc @@ -0,0 +1,410 @@ +/* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Some useful string utility functions used by the MySQL server */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "strfunc.h" +#include "sql_class.h" +#include "typelib.h" // TYPELIB +#include "m_ctype.h" // my_charset_latin1 +#include "mysqld.h" // system_charset_info + +/* + Return bitmap for strings used in a set + + SYNOPSIS + find_set() + lib Strings in set + str Strings of set-strings separated by ',' + err_pos If error, set to point to start of wrong set string + err_len If error, set to the length of wrong set string + set_warning Set to 1 if some string in set couldn't be used + + NOTE + We delete all end space from str before comparison + + RETURN + bitmap of all sets found in x. + set_warning is set to 1 if there was any sets that couldn't be set +*/ + +static const char field_separator=','; + +ulonglong find_set(const TYPELIB *lib, + const char *str, size_t length, CHARSET_INFO *cs, + char **err_pos, uint *err_len, bool *set_warning) +{ + CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; + const char *end= str + strip->lengthsp(str, length); + ulonglong found= 0; + *err_pos= 0; // No error yet + *err_len= 0; + if (str != end) + { + const char *start= str; + for (;;) + { + const char *pos= start; + uint var_len; + int mblen= 1; + + if (cs && cs->mbminlen > 1) + { + for ( ; pos < end; pos+= mblen) + { + my_wc_t wc; + if ((mblen= cs->mb_wc(&wc, (const uchar *) pos, + (const uchar *) end)) < 1) + mblen= 1; // Not to hang on a wrong multibyte sequence + else if (wc == (my_wc_t) field_separator) + break; + } + } + else + for (; pos != end && *pos != field_separator; pos++) ; + var_len= (uint) (pos - start); + uint find= cs ? find_type2(lib, start, var_len, cs) : + find_type(lib, start, var_len, (bool) 0); + if (unlikely(!find)) + { + if (*err_len == 0) + { + // report the first error with length > 0 + *err_pos= (char*) start; + *err_len= var_len; + *set_warning= 1; + } + } + else if (find <= sizeof(longlong) * 8) + found|= 1ULL << (find - 1); + if (pos >= end) + break; + start= pos + mblen; + } + } + return found; +} + +/* + Function to find a string in a TYPELIB + (similar to find_type() of mysys/typelib.c) + + SYNOPSIS + find_type() + lib TYPELIB (struct of pointer to values + count) + find String to find + length Length of string to find + part_match Allow part matching of value + + RETURN + 0 error + > 0 position in TYPELIB->type_names +1 +*/ + +uint find_type(const TYPELIB *lib, const char *find, size_t length, + bool part_match) +{ + uint found_count=0, found_pos=0; + const char *end= find+length; + const char *i; + const char *j; + for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) + { + for (i=find ; i != end && + my_toupper(system_charset_info,*i) == + my_toupper(system_charset_info,*j) ; i++, j++) ; + if (i == end) + { + if (! *j) + return(pos); + found_count++; + found_pos= pos; + } + } + return(found_count == 1 && part_match ? found_pos : 0); +} + + +/* + Find a string in a list of strings according to collation + + SYNOPSIS + find_type2() + lib TYPELIB (struct of pointer to values + count) + x String to find + length String length + cs Character set + collation to use for comparison + + NOTES + + RETURN + 0 No matching value + >0 Offset+1 in typelib for matched string +*/ + +uint find_type2(const TYPELIB *typelib, const char *x, size_t length, + CHARSET_INFO *cs) +{ + int pos; + const char *j; + DBUG_ENTER("find_type2"); + DBUG_PRINT("enter",("x: '%.*s' lib: %p", (int)length, x, typelib)); + + if (!typelib->count) + { + DBUG_PRINT("exit",("no count")); + DBUG_RETURN(0); + } + + for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) + { + if (!cs->strnncoll(x, length, + j, typelib->type_lengths[pos])) + DBUG_RETURN(pos+1); + } + DBUG_PRINT("exit",("Couldn't find type")); + DBUG_RETURN(0); +} /* find_type */ + + +/* + Un-hex all elements in a typelib + + SYNOPSIS + unhex_type2() + interval TYPELIB (struct of pointer to values + lengths + count) + + NOTES + + RETURN + N/A +*/ + +void unhex_type2(TYPELIB *interval) +{ + for (uint pos= 0; pos < interval->count; pos++) + { + char *from, *to; + for (from= to= (char*) interval->type_names[pos]; *from; ) + { + /* + Note, hexchar_to_int(*from++) doesn't work + one some compilers, e.g. IRIX. Looks like a compiler + bug in inline functions in combination with arguments + that have a side effect. So, let's use from[0] and from[1] + and increment 'from' by two later. + */ + + *to++= (char) (hexchar_to_int(from[0]) << 4) + + hexchar_to_int(from[1]); + from+= 2; + } + interval->type_lengths[pos] /= 2; + } +} + + +/* + Check if the first word in a string is one of the ones in TYPELIB + + SYNOPSIS + check_word() + lib TYPELIB + val String to check + end End of input + end_of_word Store value of last used byte here if we found word + + RETURN + 0 No matching value + > 1 lib->type_names[#-1] matched + end_of_word will point to separator character/end in 'val' +*/ + +uint check_word(TYPELIB *lib, const char *val, const char *end, + const char **end_of_word) +{ + int res; + const char *ptr; + + /* Fiend end of word */ + for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) + ; + if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) + *end_of_word= ptr; + return res; +} + + +/* + Converts a string between character sets + + SYNOPSIS + strconvert() + from_cs source character set + from source, a null terminated string + to destination buffer + to_length destination buffer length + + NOTES + 'to' is always terminated with a '\0' character. + If there is no enough space to convert whole string, + only prefix is converted, and terminated with '\0'. + + RETURN VALUES + result string length +*/ + + +uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length, + CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors) +{ + int cnvres; + my_wc_t wc; + char *to_start= to; + uchar *to_end= (uchar*) to + to_length - 1; + const uchar *from_end= (const uchar*) from + from_length; + my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; + uint error_count= 0; + + while (1) + { + if ((cnvres= (*mb_wc)(from_cs, &wc, + (uchar*) from, from_end)) > 0) + { + if (!wc) + break; + from+= cnvres; + } + else if (cnvres == MY_CS_ILSEQ) + { + error_count++; + from++; + wc= '?'; + } + else + break; // Impossible char. + +outp: + + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) + to+= cnvres; + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + error_count++; + wc= '?'; + goto outp; + } + else + break; + } + *to= '\0'; + *errors= error_count; + return (uint32) (to - to_start); + +} + + +/* + Searches for a LEX_STRING in an LEX_STRING array. + + SYNOPSIS + find_string_in_array() + heap The array + needle The string to search for + + NOTE + The last LEX_STRING in the array should have str member set to NULL + + RETURN VALUES + -1 Not found + >=0 Ordinal position +*/ + +int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle, + CHARSET_INFO * const cs) +{ + const LEX_CSTRING *pos; + for (pos= haystack; pos->str; pos++) + if (!cs->strnncollsp(pos->str, pos->length, + needle->str, needle->length)) + { + return (int)(pos - haystack); + } + return -1; +} + + +const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, + const char *lib[]) +{ + char buff[STRING_BUFFER_USUAL_SIZE*8]; + String tmp(buff, sizeof(buff), &my_charset_latin1); + LEX_CSTRING unused; + + if (!result) + result= &unused; + + tmp.length(0); + + for (uint i= 0; set; i++, set >>= 1) + if (set & 1) { + tmp.append(lib[i], strlen(lib[i])); + tmp.append(','); + } + + if (tmp.length()) + { + result->str= thd->strmake(tmp.ptr(), tmp.length()-1); + result->length= tmp.length()-1; + } + else + { + result->str= const_cast(""); + result->length= 0; + } + return result->str; +} + +const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, + const char *lib[]) +{ + char buff[STRING_BUFFER_USUAL_SIZE*8]; + String tmp(buff, sizeof(buff), &my_charset_latin1); + LEX_CSTRING unused; + + if (!result) result= &unused; + + tmp.length(0); + + // note that the last element is always "default", and it's ignored below + for (uint i= 0; lib[i+1]; i++, set >>= 1) + { + tmp.append(lib[i], strlen(lib[i])); + if (set & 1) + tmp.append(STRING_WITH_LEN("=on,")); + else + tmp.append(STRING_WITH_LEN("=off,")); + } + + result->str= thd->strmake(tmp.ptr(), tmp.length()-1); + result->length= tmp.length()-1; + + return result->str; +} diff --git a/sql/strfunc.h b/sql/strfunc.h new file mode 100644 index 00000000..b2b293e1 --- /dev/null +++ b/sql/strfunc.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef STRFUNC_INCLUDED +#define STRFUNC_INCLUDED + +typedef struct st_typelib TYPELIB; + +ulonglong find_set(const TYPELIB *lib, + const char *x, size_t length, CHARSET_INFO *cs, + char **err_pos, uint *err_len, bool *set_warning); +ulonglong find_set_from_flags(TYPELIB *lib, uint default_name, + ulonglong cur_set, ulonglong default_set, + const char *str, uint length, CHARSET_INFO *cs, + char **err_pos, uint *err_len, bool *set_warning); +uint find_type(const TYPELIB *lib, const char *find, size_t length, + bool part_match); +uint find_type2(const TYPELIB *lib, const char *find, size_t length, + CHARSET_INFO *cs); +void unhex_type2(TYPELIB *lib); +uint check_word(TYPELIB *lib, const char *val, const char *end, + const char **end_of_word); +int find_string_in_array(LEX_CSTRING * const haystack, + LEX_CSTRING * const needle, + CHARSET_INFO * const cs); +const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, + const char *lib[]); +const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, + const char *lib[]); + +/* + These functions were protected by INNODB_COMPATIBILITY_HOOKS + */ +uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length, + CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors); + +#endif /* STRFUNC_INCLUDED */ diff --git a/sql/structs.h b/sql/structs.h new file mode 100644 index 00000000..a77bb8cb --- /dev/null +++ b/sql/structs.h @@ -0,0 +1,1040 @@ +#ifndef STRUCTS_INCLUDED +#define STRUCTS_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. + Copyright (c) 2009, 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + + +/* The old structures from unireg */ + +#include "sql_plugin.h" /* plugin_ref */ +#include "sql_const.h" /* MAX_REFLENGTH */ +#include "my_time.h" /* enum_mysql_timestamp_type */ +#include "thr_lock.h" /* thr_lock_type */ +#include "my_base.h" /* ha_rows, ha_key_alg */ +#include /* USERNAME_LENGTH */ +#include "sql_bitmap.h" +#include "lex_charset.h" + +struct TABLE; +class Type_handler; +class Field; +class Index_statistics; + +class THD; + +/* Array index type for table.field[] */ +typedef uint16 field_index_t; + +typedef struct st_date_time_format { + uchar positions[8]; + char time_separator; /* Separator between hour and minute */ + uint flag; /* For future */ + LEX_CSTRING format; +} DATE_TIME_FORMAT; + + +typedef struct st_keyfile_info { /* used with ha_info() */ + uchar ref[MAX_REFLENGTH]; /* Pointer to current row */ + uchar dupp_ref[MAX_REFLENGTH]; /* Pointer to dupp row */ + uint ref_length; /* Length of ref (1-8) */ + uint block_size; /* index block size */ + File filenr; /* (uniq) filenr for table */ + ha_rows records; /* Records i datafilen */ + ha_rows deleted; /* Deleted records */ + ulonglong data_file_length; /* Length off data file */ + ulonglong max_data_file_length; /* Length off data file */ + ulonglong index_file_length; + ulonglong max_index_file_length; + ulonglong delete_length; /* Free bytes */ + ulonglong auto_increment_value; + int errkey,sortkey; /* Last errorkey and sorted by */ + time_t create_time; /* When table was created */ + time_t check_time; + time_t update_time; + ulong mean_rec_length; /* physical reclength */ +} KEYFILE_INFO; + + +typedef struct st_key_part_info { /* Info about a key part */ + Field *field; /* the Field object for the indexed + prefix of the original table Field. + NOT necessarily the original Field */ + uint offset; /* Offset in record (from 0) */ + uint null_offset; /* Offset to null_bit in record */ + /* Length of key part in bytes, excluding NULL flag and length bytes */ + uint length; + /* + Number of bytes required to store the keypart value. This may be + different from the "length" field as it also counts + - possible NULL-flag byte (see HA_KEY_NULL_LENGTH) + - possible HA_KEY_BLOB_LENGTH bytes needed to store actual value length. + */ + uint store_length; + uint16 key_type; + field_index_t fieldnr; /* Fieldnr begins counting from 1 */ + uint16 key_part_flag; /* 0 or HA_REVERSE_SORT */ + uint8 type; + uint8 null_bit; /* Position to null_bit */ +} KEY_PART_INFO ; + +class engine_option_value; +struct ha_index_option_struct; + +typedef struct st_key { + uint key_length; /* total length of user defined key parts */ + ulong flags; /* dupp key and pack flags */ + uint user_defined_key_parts; /* How many key_parts */ + uint usable_key_parts; /* Should normally be = user_defined_key_parts */ + uint ext_key_parts; /* Number of key parts in extended key */ + ulong ext_key_flags; /* Flags for extended key */ + /* + Parts of primary key that are in the extension of this index. + + Example: if this structure describes idx1, which is defined as + INDEX idx1 (pk2, col2) + and pk is defined as: + PRIMARY KEY (pk1, pk2) + then + pk1 is in the extension idx1, ext_key_part_map.is_set(0) == true + pk2 is explicitly present in idx1, it is not in the extension, so + ext_key_part_map.is_set(1) == false + */ + key_part_map ext_key_part_map; + /* + Bitmap of indexes having common parts with this index + (only key parts from key definitions are taken into account) + */ + key_map overlapped; + /* Set of keys constraint correlated with this key */ + key_map constraint_correlated; + LEX_CSTRING name; + uint block_size; + enum ha_key_alg algorithm; + /* + The flag is on if statistical data for the index prefixes + has to be taken from the system statistical tables. + */ + bool is_statistics_from_stat_tables; + /* + Note that parser is used when the table is opened for use, and + parser_name is used when the table is being created. + */ + union + { + plugin_ref parser; /* Fulltext [pre]parser */ + LEX_CSTRING *parser_name; /* Fulltext [pre]parser name */ + }; + KEY_PART_INFO *key_part; + /* Unique name for cache; db + \0 + table_name + \0 + key_name + \0 */ + uchar *cache_name; + /* + Array of AVG(#records with the same field value) for 1st ... Nth key part. + 0 means 'not known'. + For temporary heap tables this member is NULL. + */ + ulong *rec_per_key; + + /* + This structure is used for statistical data on the index + that has been read from the statistical table index_stat + */ + Index_statistics *read_stats; + /* + This structure is used for statistical data on the index that + is collected by the function collect_statistics_for_table + */ + Index_statistics *collected_stats; + + TABLE *table; + LEX_CSTRING comment; + /** reference to the list of options or NULL */ + engine_option_value *option_list; + ha_index_option_struct *option_struct; /* structure with parsed options */ + + double actual_rec_per_key(uint i); + + bool without_overlaps; + /* + TRUE if index needs to be ignored + */ + bool is_ignored; +} KEY; + + +struct st_join_table; + +typedef struct st_reginfo { /* Extra info about reg */ + struct st_join_table *join_tab; /* Used by SELECT() */ + enum thr_lock_type lock_type; /* How database is used */ + bool skip_locked; + bool not_exists_optimize; + /* + TRUE <=> range optimizer found that there is no rows satisfying + table conditions. + */ + bool impossible_range; +} REGINFO; + + +/* + Originally MySQL used MYSQL_TIME structure inside server only, but since + 4.1 it's exported to user in the new client API. Define aliases for + new names to keep existing code simple. +*/ + +typedef enum enum_mysql_timestamp_type timestamp_type; + + +typedef struct { + ulong year,month,day,hour; + ulonglong minute,second,second_part; + bool neg; +} INTERVAL; + + +typedef struct st_known_date_time_format { + const char *format_name; + const char *date_format; + const char *datetime_format; + const char *time_format; +} KNOWN_DATE_TIME_FORMAT; + +extern const char *show_comp_option_name[]; + +typedef int *(*update_var)(THD *, struct st_mysql_show_var *); + +struct USER_AUTH : public Sql_alloc +{ + LEX_CSTRING plugin, auth_str, pwtext; + USER_AUTH *next; + USER_AUTH() : next(NULL) + { + plugin.str= auth_str.str= ""; + pwtext.str= NULL; + plugin.length= auth_str.length= pwtext.length= 0; + } +}; + +struct AUTHID +{ + LEX_CSTRING user, host; + void init() { memset(this, 0, sizeof(*this)); } + void copy(MEM_ROOT *root, const LEX_CSTRING *usr, const LEX_CSTRING *host); + bool is_role() const { return user.str[0] && !host.str[0]; } + void set_lex_string(LEX_CSTRING *l, char *buf) + { + if (is_role()) + *l= user; + else + { + l->str= buf; + l->length= strxmov(buf, user.str, "@", host.str, NullS) - buf; + } + } + void parse(const char *str, size_t length); + bool read_from_mysql_proc_row(THD *thd, TABLE *table); +}; + + +struct LEX_USER: public AUTHID +{ + USER_AUTH *auth; + bool has_auth() + { + return auth && (auth->plugin.length || auth->auth_str.length || auth->pwtext.length); + } +}; + +/* + This structure specifies the maximum amount of resources which + can be consumed by each account. Zero value of a member means + there is no limit. +*/ +typedef struct user_resources { + /* Maximum number of queries/statements per hour. */ + uint questions; + /* + Maximum number of updating statements per hour (which statements are + updating is defined by sql_command_flags array). + */ + uint updates; + /* Maximum number of connections established per hour. */ + uint conn_per_hour; + /* + Maximum number of concurrent connections. If -1 then no new + connections allowed + */ + int user_conn; + /* Max query timeout */ + double max_statement_time; + + /* + Values of this enum and specified_limits member are used by the + parser to store which user limits were specified in GRANT statement. + */ + enum {QUERIES_PER_HOUR= 1, UPDATES_PER_HOUR= 2, CONNECTIONS_PER_HOUR= 4, + USER_CONNECTIONS= 8, MAX_STATEMENT_TIME= 16}; + uint specified_limits; +} USER_RESOURCES; + + +/* + This structure is used for counting resources consumed and for checking + them against specified user limits. +*/ +typedef struct user_conn { + /* + Pointer to user+host key (pair separated by '\0') defining the entity + for which resources are counted (By default it is user account thus + priv_user/priv_host pair is used. If --old-style-user-limits option + is enabled, resources are counted for each user+host separately). + */ + char *user; + /* Pointer to host part of the key. */ + char *host; + /** + The moment of time when per hour counters were reset last time + (i.e. start of "hour" for conn_per_hour, updates, questions counters). + */ + ulonglong reset_utime; + /* Total length of the key. */ + uint len; + /* Current amount of concurrent connections for this account. */ + int connections; + /* + Current number of connections per hour, number of updating statements + per hour and total number of statements per hour for this account. + */ + uint conn_per_hour, updates, questions; + /* Maximum amount of resources which account is allowed to consume. */ + USER_RESOURCES user_resources; +} USER_CONN; + +typedef struct st_user_stats +{ + char user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1]; + // Account name the user is mapped to when this is a user from mapped_user. + // Otherwise, the same value as user. + char priv_user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1]; + uint user_name_length; + uint total_connections; + uint total_ssl_connections; + uint concurrent_connections; + time_t connected_time; // in seconds + ha_rows rows_read, rows_sent; + ha_rows rows_updated, rows_deleted, rows_inserted; + ulonglong bytes_received; + ulonglong bytes_sent; + ulonglong binlog_bytes_written; + ulonglong select_commands, update_commands, other_commands; + ulonglong commit_trans, rollback_trans; + ulonglong denied_connections, lost_connections, max_statement_time_exceeded; + ulonglong access_denied_errors; + ulonglong empty_queries; + double busy_time; // in seconds + double cpu_time; // in seconds +} USER_STATS; + +typedef struct st_table_stats +{ + char table[NAME_LEN * 2 + 2]; // [db] + '\0' + [table] + '\0' + size_t table_name_length; + ulonglong rows_read, rows_changed; + ulonglong rows_changed_x_indexes; + /* Stores enum db_type, but forward declarations cannot be done */ + int engine_type; +} TABLE_STATS; + +typedef struct st_index_stats +{ + // [db] + '\0' + [table] + '\0' + [index] + '\0' + char index[NAME_LEN * 3 + 3]; + size_t index_name_length; /* Length of 'index' */ + ulonglong rows_read; +} INDEX_STATS; + + + /* Bits in form->update */ +#define REG_MAKE_DUPP 1U /* Make a copy of record when read */ +#define REG_NEW_RECORD 2U /* Write a new record if not found */ +#define REG_UPDATE 4U /* Uppdate record */ +#define REG_DELETE 8U /* Delete found record */ +#define REG_PROG 16U /* User is updating database */ +#define REG_CLEAR_AFTER_WRITE 32U +#define REG_MAY_BE_UPDATED 64U +#define REG_AUTO_UPDATE 64U /* Used in D-forms for scroll-tables */ +#define REG_OVERWRITE 128U +#define REG_SKIP_DUP 256U + + /* Bits in form->status */ +#define STATUS_NO_RECORD (1U+2U) /* Record isn't usable */ +#define STATUS_GARBAGE 1U +#define STATUS_NOT_FOUND 2U /* No record in database when needed */ +#define STATUS_NO_PARENT 4U /* Parent record wasn't found */ +#define STATUS_NOT_READ 8U /* Record isn't read */ +#define STATUS_UPDATED 16U /* Record is updated by formula */ +#define STATUS_NULL_ROW 32U /* table->null_row is set */ +#define STATUS_DELETED 64U + +/* + Such interval is "discrete": it is the set of + { auto_inc_interval_min + k * increment, + 0 <= k <= (auto_inc_interval_values-1) } + Where "increment" is maintained separately by the user of this class (and is + currently only thd->variables.auto_increment_increment). + It mustn't derive from Sql_alloc, because SET INSERT_ID needs to + allocate memory which must stay allocated for use by the next statement. +*/ +class Discrete_interval { +private: + ulonglong interval_min; + ulonglong interval_values; + ulonglong interval_max; // excluded bound. Redundant. +public: + Discrete_interval *next; // used when linked into Discrete_intervals_list + void replace(ulonglong start, ulonglong val, ulonglong incr) + { + interval_min= start; + interval_values= val; + interval_max= (val == ULONGLONG_MAX) ? val : start + val * incr; + } + Discrete_interval(ulonglong start, ulonglong val, ulonglong incr) : + next(NULL) { replace(start, val, incr); }; + Discrete_interval() : next(NULL) { replace(0, 0, 0); }; + ulonglong minimum() const { return interval_min; }; + ulonglong values() const { return interval_values; }; + ulonglong maximum() const { return interval_max; }; + /* + If appending [3,5] to [1,2], we merge both in [1,5] (they should have the + same increment for that, user of the class has to ensure that). That is + just a space optimization. Returns 0 if merge succeeded. + */ + bool merge_if_contiguous(ulonglong start, ulonglong val, ulonglong incr) + { + if (interval_max == start) + { + if (val == ULONGLONG_MAX) + { + interval_values= interval_max= val; + } + else + { + interval_values+= val; + interval_max= start + val * incr; + } + return 0; + } + return 1; + }; +}; + +/* List of Discrete_interval objects */ +class Discrete_intervals_list { +private: + Discrete_interval *head; + Discrete_interval *tail; + /* + When many intervals are provided at the beginning of the execution of a + statement (in a replication slave or SET INSERT_ID), "current" points to + the interval being consumed by the thread now (so "current" goes from + "head" to "tail" then to NULL). + */ + Discrete_interval *current; + uint elements; // number of elements + void set_members(Discrete_interval *h, Discrete_interval *t, + Discrete_interval *c, uint el) + { + head= h; + tail= t; + current= c; + elements= el; + } + void operator=(Discrete_intervals_list &); /* prevent use of these */ + Discrete_intervals_list(const Discrete_intervals_list &); + +public: + Discrete_intervals_list() : head(NULL), current(NULL), elements(0) {}; + void empty_no_free() + { + set_members(NULL, NULL, NULL, 0); + } + void empty() + { + for (Discrete_interval *i= head; i;) + { + Discrete_interval *next= i->next; + delete i; + i= next; + } + empty_no_free(); + } + void copy_shallow(const Discrete_intervals_list * dli) + { + head= dli->get_head(); + tail= dli->get_tail(); + current= dli->get_current(); + elements= dli->nb_elements(); + } + void swap (Discrete_intervals_list * dli) + { + Discrete_interval *h, *t, *c; + uint el; + h= dli->get_head(); + t= dli->get_tail(); + c= dli->get_current(); + el= dli->nb_elements(); + dli->copy_shallow(this); + set_members(h, t, c, el); + } + const Discrete_interval* get_next() + { + Discrete_interval *tmp= current; + if (current != NULL) + current= current->next; + return tmp; + } + ~Discrete_intervals_list() { empty(); }; + bool append(ulonglong start, ulonglong val, ulonglong incr); + bool append(Discrete_interval *interval); + ulonglong minimum() const { return (head ? head->minimum() : 0); }; + ulonglong maximum() const { return (head ? tail->maximum() : 0); }; + uint nb_elements() const { return elements; } + Discrete_interval* get_head() const { return head; }; + Discrete_interval* get_tail() const { return tail; }; + Discrete_interval* get_current() const { return current; }; +}; + + +/* + DDL options: + - CREATE IF NOT EXISTS + - DROP IF EXISTS + - CREATE LIKE + - REPLACE +*/ +struct DDL_options_st +{ +public: + enum Options + { + OPT_NONE= 0, + OPT_IF_NOT_EXISTS= 2, // CREATE TABLE IF NOT EXISTS + OPT_LIKE= 4, // CREATE TABLE LIKE + OPT_OR_REPLACE= 16, // CREATE OR REPLACE TABLE + OPT_OR_REPLACE_SLAVE_GENERATED= 32,// REPLACE was added on slave, it was + // not in the original query on master. + OPT_IF_EXISTS= 64, + OPT_CREATE_SELECT= 128 // CREATE ... SELECT + }; + +private: + Options m_options; + +public: + Options create_like_options() const + { + return (DDL_options_st::Options) + (((uint) m_options) & (OPT_IF_NOT_EXISTS | OPT_OR_REPLACE)); + } + void init() { m_options= OPT_NONE; } + void init(Options options) { m_options= options; } + void set(Options other) + { + m_options= other; + } + void set(const DDL_options_st other) + { + m_options= other.m_options; + } + bool if_not_exists() const { return m_options & OPT_IF_NOT_EXISTS; } + bool or_replace() const { return m_options & OPT_OR_REPLACE; } + bool or_replace_slave_generated() const + { return m_options & OPT_OR_REPLACE_SLAVE_GENERATED; } + bool like() const { return m_options & OPT_LIKE; } + bool if_exists() const { return m_options & OPT_IF_EXISTS; } + bool is_create_select() const { return m_options & OPT_CREATE_SELECT; } + + void add(const DDL_options_st::Options other) + { + m_options= (Options) ((uint) m_options | (uint) other); + } + void add(const DDL_options_st &other) + { + add(other.m_options); + } + DDL_options_st operator|(const DDL_options_st &other) + { + add(other.m_options); + return *this; + } + DDL_options_st operator|=(DDL_options_st::Options other) + { + add(other); + return *this; + } +}; + + +class DDL_options: public DDL_options_st +{ +public: + DDL_options() { init(); } + DDL_options(Options options) { init(options); } + DDL_options(const DDL_options_st &options) + { DDL_options_st::operator=(options); } +}; + + +struct Lex_length_and_dec_st +{ +protected: + uint32 m_length; + uint8 m_dec; + uint8 m_collation_type:LEX_CHARSET_COLLATION_TYPE_BITS; + bool m_has_explicit_length:1; + bool m_has_explicit_dec:1; + bool m_length_overflowed:1; + bool m_dec_overflowed:1; + + static_assert(LEX_CHARSET_COLLATION_TYPE_BITS <= 8, + "Lex_length_and_dec_st::m_collation_type bits check"); + +public: + void reset() + { + m_length= 0; + m_dec= 0; + m_collation_type= 0; + m_has_explicit_length= false; + m_has_explicit_dec= false; + m_length_overflowed= false; + m_dec_overflowed= false; + } + void set_length_only(uint32 length) + { + m_length= length; + m_dec= 0; + m_collation_type= 0; + m_has_explicit_length= true; + m_has_explicit_dec= false; + m_length_overflowed= false; + m_dec_overflowed= false; + } + void set_dec_only(uint8 dec) + { + m_length= 0; + m_dec= dec; + m_collation_type= 0; + m_has_explicit_length= false; + m_has_explicit_dec= true; + m_length_overflowed= false; + m_dec_overflowed= false; + } + void set_length_and_dec(uint32 length, uint8 dec) + { + m_length= length; + m_dec= dec; + m_collation_type= 0; + m_has_explicit_length= true; + m_has_explicit_dec= true; + m_length_overflowed= false; + m_dec_overflowed= false; + } + void set(const char *length, const char *dec); + uint32 length() const + { + return m_length; + } + uint8 dec() const + { + return m_dec; + } + bool has_explicit_length() const + { + return m_has_explicit_length; + } + bool has_explicit_dec() const + { + return m_has_explicit_dec; + } + bool length_overflowed() const + { + return m_length_overflowed; + } + bool dec_overflowed() const + { + return m_dec_overflowed; + } +}; + + +struct Lex_field_type_st: public Lex_length_and_dec_st +{ +private: + const Type_handler *m_handler; + CHARSET_INFO *m_ci; +public: + void set(const Type_handler *handler, + Lex_length_and_dec_st length_and_dec, + CHARSET_INFO *cs= NULL) + { + m_handler= handler; + m_ci= cs; + Lex_length_and_dec_st::operator=(length_and_dec); + } + void set(const Type_handler *handler, + const Lex_length_and_dec_st &length_and_dec, + const Lex_column_charset_collation_attrs_st &coll) + { + m_handler= handler; + m_ci= coll.charset_info(); + Lex_length_and_dec_st::operator=(length_and_dec); + // Using bit-and to avoid the warning: + // conversion from ‘uint8’ to ‘unsigned char:3’ may change value + m_collation_type= ((uint8) coll.type()) & LEX_CHARSET_COLLATION_TYPE_MASK; + } + void set(const Type_handler *handler, + const Lex_column_charset_collation_attrs_st &coll) + { + m_handler= handler; + m_ci= coll.charset_info(); + Lex_length_and_dec_st::reset(); + // Using bit-and to avoid the warning: + // conversion from ‘uint8’ to ‘unsigned char:3’ may change value + m_collation_type= ((uint8) coll.type()) & LEX_CHARSET_COLLATION_TYPE_MASK; + } + void set(const Type_handler *handler, CHARSET_INFO *cs= NULL) + { + m_handler= handler; + m_ci= cs; + Lex_length_and_dec_st::reset(); + } + void set_handler_length_flags(const Type_handler *handler, + const Lex_length_and_dec_st &length, + uint32 flags); + void set_handler_length(const Type_handler *handler, uint32 length) + { + m_handler= handler; + m_ci= NULL; + Lex_length_and_dec_st::set_length_only(length); + } + void set_handler(const Type_handler *handler) + { + m_handler= handler; + } + const Type_handler *type_handler() const { return m_handler; } + CHARSET_INFO *charset_collation() const { return m_ci; } + Lex_column_charset_collation_attrs charset_collation_attrs() const + { + return Lex_column_charset_collation_attrs(m_ci, + (Lex_column_charset_collation_attrs_st::Type) + m_collation_type); + } +}; + + +struct Lex_dyncol_type_st: public Lex_length_and_dec_st +{ +private: + int m_type; // enum_dynamic_column_type is not visible here, so use int + CHARSET_INFO *m_ci; +public: + void set(int type, Lex_length_and_dec_st length_and_dec, + CHARSET_INFO *cs= NULL) + { + m_type= type; + m_ci= cs; + Lex_length_and_dec_st::operator=(length_and_dec); + } + void set(int type) + { + m_type= type; + m_ci= NULL; + Lex_length_and_dec_st::reset(); + } + void set(int type, CHARSET_INFO *cs) + { + m_type= type; + m_ci= cs; + Lex_length_and_dec_st::reset(); + } + bool set(int type, const Lex_column_charset_collation_attrs_st &collation, + CHARSET_INFO *charset) + { + CHARSET_INFO *tmp= collation.resolved_to_character_set(charset); + if (!tmp) + return true; + set(type, tmp); + return false; + } + int dyncol_type() const { return m_type; } + CHARSET_INFO *charset_collation() const { return m_ci; } +}; + + +struct Lex_spblock_handlers_st +{ +public: + int hndlrs; + void init(int count) { hndlrs= count; } +}; + + +struct Lex_spblock_st: public Lex_spblock_handlers_st +{ +public: + int vars; + int conds; + int curs; + void init() + { + vars= conds= hndlrs= curs= 0; + } + void init_using_vars(uint nvars) + { + vars= nvars; + conds= hndlrs= curs= 0; + } + void join(const Lex_spblock_st &b1, const Lex_spblock_st &b2) + { + vars= b1.vars + b2.vars; + conds= b1.conds + b2.conds; + hndlrs= b1.hndlrs + b2.hndlrs; + curs= b1.curs + b2.curs; + } +}; + + +class Lex_spblock: public Lex_spblock_st +{ +public: + Lex_spblock() { init(); } + Lex_spblock(const Lex_spblock_handlers_st &other) + { + vars= conds= curs= 0; + hndlrs= other.hndlrs; + } +}; + + +struct Lex_for_loop_bounds_st +{ +public: + class sp_assignment_lex *m_index; // The first iteration value (or cursor) + class sp_assignment_lex *m_target_bound; // The last iteration value + int8 m_direction; + bool m_implicit_cursor; + bool is_for_loop_cursor() const { return m_target_bound == NULL; } +}; + + +class Lex_for_loop_bounds_intrange: public Lex_for_loop_bounds_st +{ +public: + Lex_for_loop_bounds_intrange(int8 direction, + class sp_assignment_lex *left_expr, + class sp_assignment_lex *right_expr) + { + m_direction= direction; + m_index= direction > 0 ? left_expr : right_expr; + m_target_bound= direction > 0 ? right_expr : left_expr; + m_implicit_cursor= false; + } +}; + + +struct Lex_for_loop_st +{ +public: + class sp_variable *m_index; // The first iteration value (or cursor) + class sp_variable *m_target_bound; // The last iteration value + int m_cursor_offset; + int8 m_direction; + bool m_implicit_cursor; + void init() + { + m_index= 0; + m_target_bound= 0; + m_cursor_offset= 0; + m_direction= 0; + m_implicit_cursor= false; + } + bool is_for_loop_cursor() const { return m_target_bound == NULL; } + bool is_for_loop_explicit_cursor() const + { + return is_for_loop_cursor() && !m_implicit_cursor; + } +}; + + +enum trim_spec { TRIM_LEADING, TRIM_TRAILING, TRIM_BOTH }; + +struct Lex_trim_st +{ + Item *m_remove; + Item *m_source; + trim_spec m_spec; +public: + void set(trim_spec spec, Item *remove, Item *source) + { + m_spec= spec; + m_remove= remove; + m_source= source; + } + void set(trim_spec spec, Item *source) + { + set(spec, NULL, source); + } + Item *make_item_func_trim_std(THD *thd) const; + Item *make_item_func_trim_oracle(THD *thd) const; + /* + This method is still used to handle LTRIM and RTRIM, + while the special syntax TRIM(... BOTH|LEADING|TRAILING) + is now handled by Schema::make_item_func_trim(). + */ + Item *make_item_func_trim(THD *thd) const; +}; + + +class Lex_trim: public Lex_trim_st +{ +public: + Lex_trim(trim_spec spec, Item *source) { set(spec, source); } +}; + + +class Lex_substring_spec_st +{ +public: + Item *m_subject; + Item *m_from; + Item *m_for; + static Lex_substring_spec_st init(Item *subject, + Item *from, + Item *xfor= NULL) + { + Lex_substring_spec_st res; + res.m_subject= subject; + res.m_from= from; + res.m_for= xfor; + return res; + } +}; + + +class st_select_lex; + +class Lex_select_lock +{ +public: + struct + { + uint defined_lock:1; + uint update_lock:1; + uint defined_timeout:1; + uint skip_locked:1; + }; + ulong timeout; + + + void empty() + { + defined_lock= update_lock= defined_timeout= skip_locked= FALSE; + timeout= 0; + } + void set_to(st_select_lex *sel); +}; + +class Lex_select_limit +{ +public: + /* explicit LIMIT clause was used */ + bool explicit_limit; + bool with_ties; + Item *select_limit, *offset_limit; + + void clear() + { + explicit_limit= FALSE; // No explicit limit given by user + with_ties= FALSE; // No use of WITH TIES operator + select_limit= NULL; // denotes the default limit = HA_POS_ERROR + offset_limit= NULL; // denotes the default offset = 0 + } +}; + +struct st_order; + +class Load_data_param +{ +protected: + CHARSET_INFO *m_charset; // Character set of the file + ulonglong m_fixed_length; // Sum of target field lengths for fixed format + bool m_is_fixed_length; + bool m_use_blobs; +public: + Load_data_param(CHARSET_INFO *cs, bool is_fixed_length): + m_charset(cs), + m_fixed_length(0), + m_is_fixed_length(is_fixed_length), + m_use_blobs(false) + { } + bool add_outvar_field(THD *thd, const Field *field); + bool add_outvar_user_var(THD *thd); + CHARSET_INFO *charset() const { return m_charset; } + bool is_fixed_length() const { return m_is_fixed_length; } + bool use_blobs() const { return m_use_blobs; } +}; + + +class Load_data_outvar +{ +public: + virtual ~Load_data_outvar() = default; + virtual bool load_data_set_null(THD *thd, const Load_data_param *param)= 0; + virtual bool load_data_set_value(THD *thd, const char *pos, uint length, + const Load_data_param *param)= 0; + virtual bool load_data_set_no_data(THD *thd, const Load_data_param *param)= 0; + virtual void load_data_print_for_log_event(THD *thd, class String *to) const= 0; + virtual bool load_data_add_outvar(THD *thd, Load_data_param *param) const= 0; + virtual uint load_data_fixed_length() const= 0; +}; + + +class Timeval: public timeval +{ +protected: + Timeval() = default; +public: + Timeval(my_time_t sec, ulong usec) + { + tv_sec= sec; + /* + Since tv_usec is not always of type ulong, cast usec parameter + explicitly to uint to avoid compiler warnings about losing + integer precision. + */ + DBUG_ASSERT(usec < 1000000); + tv_usec= (uint)usec; + } + explicit Timeval(const timeval &tv) + :timeval(tv) + { } +}; + + +#endif /* STRUCTS_INCLUDED */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc new file mode 100644 index 00000000..0df11e0b --- /dev/null +++ b/sql/sys_vars.cc @@ -0,0 +1,7030 @@ +/* Copyright (c) 2002, 2015, Oracle and/or its affiliates. + Copyright (c) 2012, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + Definitions of all server's session or global variables. + + How to add new variables: + + 1. copy one of the existing variables, and edit the declaration. + 2. if you need special behavior on assignment or additional checks + use ON_CHECK and ON_UPDATE callbacks. + 3. *Don't* add new Sys_var classes or uncle Occam will come + with his razor to haunt you at nights + + Note - all storage engine variables (for example myisam_whatever) + should go into the corresponding storage engine sources + (for example in storage/myisam/ha_myisam.cc) ! +*/ + +#include "sql_plugin.h" +#include "sql_priv.h" +#include "sql_class.h" // set_var.h: THD +#include "sys_vars.inl" +#include "my_sys.h" + +#include "events.h" +#include +#include "slave.h" +#include "rpl_mi.h" +#include "transaction.h" +#include "mysqld.h" +#include "lock.h" +#include "sql_time.h" // known_date_time_formats +#include "sql_acl.h" // mysql_user_table_is_in_short_password_format +#include "derror.h" // read_texts +#include "sql_base.h" // close_cached_tables +#include "hostname.h" // host_cache_size +#include +#include "debug_sync.h" // DEBUG_SYNC +#include "sql_show.h" +#include "opt_trace_context.h" + +#include "log_event.h" +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE +#include "../storage/perfschema/pfs_server.h" +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ +#include "threadpool.h" +#include "sql_repl.h" +#include "opt_range.h" +#include "rpl_parallel.h" +#include "semisync_master.h" +#include "semisync_slave.h" +#include +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif + +#define PCRE2_STATIC 1 /* Important on Windows */ +#include "pcre2.h" /* pcre2 header file */ + +/* + The rule for this file: everything should be 'static'. When a sys_var + variable or a function from this file is - in very rare cases - needed + elsewhere it should be explicitly declared 'export' here to show that it's + not a mistakenly forgotten 'static' keyword. +*/ +#define export /* not static */ + +#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + +static Sys_var_mybool Sys_pfs_enabled( + "performance_schema", "Enable the performance schema.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_enabled), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_long Sys_pfs_events_waits_history_long_size( + "performance_schema_events_waits_history_long_size", + "Number of rows in EVENTS_WAITS_HISTORY_LONG." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_waits_history_long_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_waits_history_size( + "performance_schema_events_waits_history_size", + "Number of rows per thread in EVENTS_WAITS_HISTORY." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_waits_history_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_cond_classes( + "performance_schema_max_cond_classes", + "Maximum number of condition instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_cond_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_COND_CLASS), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_cond_instances( + "performance_schema_max_cond_instances", + "Maximum number of instrumented condition objects." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_cond_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_program_instances( + "performance_schema_max_program_instances", + "Maximum number of instrumented programs." + " Use 0 to disable, -1 for automated scaling.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_program_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_prepared_stmt_instances( + "performance_schema_max_prepared_statements_instances", + "Maximum number of instrumented prepared statements." + " Use 0 to disable, -1 for automated scaling.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_prepared_stmt_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_file_classes( + "performance_schema_max_file_classes", + "Maximum number of file instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_file_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_FILE_CLASS), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_file_handles( + "performance_schema_max_file_handles", + "Maximum number of opened instrumented files.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_file_handle_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024*1024), + DEFAULT(PFS_MAX_FILE_HANDLE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_file_instances( + "performance_schema_max_file_instances", + "Maximum number of instrumented files." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_file_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_sockets( + "performance_schema_max_socket_instances", + "Maximum number of opened instrumented sockets." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_socket_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_socket_classes( + "performance_schema_max_socket_classes", + "Maximum number of socket instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_socket_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_SOCKET_CLASS), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_mutex_classes( + "performance_schema_max_mutex_classes", + "Maximum number of mutex instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_mutex_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_MUTEX_CLASS), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_mutex_instances( + "performance_schema_max_mutex_instances", + "Maximum number of instrumented MUTEX objects." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_mutex_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 100*1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_rwlock_classes( + "performance_schema_max_rwlock_classes", + "Maximum number of rwlock instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_rwlock_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_RWLOCK_CLASS), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_rwlock_instances( + "performance_schema_max_rwlock_instances", + "Maximum number of instrumented RWLOCK objects." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_rwlock_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 100*1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_table_handles( + "performance_schema_max_table_handles", + "Maximum number of opened instrumented tables." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_table_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_table_instances( + "performance_schema_max_table_instances", + "Maximum number of instrumented tables." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_table_share_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_table_lock_stat( + "performance_schema_max_table_lock_stat", + "Maximum number of lock statistics for instrumented tables." + " Use 0 to disable, -1 for automated scaling.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_table_lock_stat_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_index_stat( + "performance_schema_max_index_stat", + "Maximum number of index statistics for instrumented tables." + " Use 0 to disable, -1 for automated scaling.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_index_stat_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_thread_classes( + "performance_schema_max_thread_classes", + "Maximum number of thread instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_thread_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_THREAD_CLASS), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_thread_instances( + "performance_schema_max_thread_instances", + "Maximum number of instrumented threads." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_thread_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_setup_actors_size( + "performance_schema_setup_actors_size", + "Maximum number of rows in SETUP_ACTORS.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_setup_actor_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_setup_objects_size( + "performance_schema_setup_objects_size", + "Maximum number of rows in SETUP_OBJECTS.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_setup_object_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_accounts_size( + "performance_schema_accounts_size", + "Maximum number of instrumented user@host accounts." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_account_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_hosts_size( + "performance_schema_hosts_size", + "Maximum number of instrumented hosts." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_host_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_users_size( + "performance_schema_users_size", + "Maximum number of instrumented users." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_user_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_stage_classes( + "performance_schema_max_stage_classes", + "Maximum number of stage instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_stage_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT(PFS_MAX_STAGE_CLASS), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_stages_history_long_size( + "performance_schema_events_stages_history_long_size", + "Number of rows in EVENTS_STAGES_HISTORY_LONG." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_stages_history_long_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_stages_history_size( + "performance_schema_events_stages_history_size", + "Number of rows per thread in EVENTS_STAGES_HISTORY." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_stages_history_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +/** + Variable performance_schema_max_statement_classes. + The default number of statement classes is the sum of: + - SQLCOM_END for all regular "statement/sql/...", + - SP_PSI_STATEMENT_INFO_COUNT for "statement/sp/...". + - (COM_END - mariadb gap) for all regular "statement/com/...", + - 1 for "statement/com/new_packet", for unknown enum_server_command + - 1 for "statement/com/Error", for invalid enum_server_command + - 1 for "statement/sql/error", for invalid enum_sql_command + - 1 for "statement/rpl/relay_log", for replicated statements. + - 1 for "statement/scheduler/event", for scheduled events. +*/ +static Sys_var_ulong Sys_pfs_max_statement_classes( + "performance_schema_max_statement_classes", + "Maximum number of statement instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_statement_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 256), + DEFAULT((ulong) SQLCOM_END + SP_PSI_STATEMENT_INFO_COUNT + + (ulong) (COM_END -(COM_MDB_GAP_END - COM_MDB_GAP_BEG + 1)) + 5), + BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_statements_history_long_size( + "performance_schema_events_statements_history_long_size", + "Number of rows in EVENTS_STATEMENTS_HISTORY_LONG." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_statements_history_long_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_statements_history_size( + "performance_schema_events_statements_history_size", + "Number of rows per thread in EVENTS_STATEMENTS_HISTORY." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_statements_history_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_statement_stack_size( + "performance_schema_max_statement_stack", + "Number of rows per thread in EVENTS_STATEMENTS_CURRENT.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_statement_stack_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(1, 256), + DEFAULT(PFS_STATEMENTS_STACK_SIZE), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_pfs_max_memory_classes( + "performance_schema_max_memory_classes", + "Maximum number of memory pool instruments.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_memory_class_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024), + DEFAULT(PFS_MAX_MEMORY_CLASS), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_digest_size( + "performance_schema_digests_size", + "Size of the statement digest." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_digest_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_transactions_history_long_size( + "performance_schema_events_transactions_history_long_size", + "Number of rows in EVENTS_TRANSACTIONS_HISTORY_LONG." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_transactions_history_long_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024*1024), + DEFAULT(PFS_AUTOSIZE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_events_transactions_history_size( + "performance_schema_events_transactions_history_size", + "Number of rows per thread in EVENTS_TRANSACTIONS_HISTORY." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_events_transactions_history_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024), + DEFAULT(PFS_AUTOSIZE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_digest_length( + "performance_schema_max_digest_length", + "Maximum length considered for digest text, when stored in performance_schema tables.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_max_digest_length), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024 * 1024), + DEFAULT(1024), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_connect_attrs_size( + "performance_schema_session_connect_attrs_size", + "Size of session attribute string buffer per thread." + " Use 0 to disable, -1 for automated sizing.", + PARSED_EARLY READ_ONLY + GLOBAL_VAR(pfs_param.m_session_connect_attrs_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 1024 * 1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_metadata_locks( + "performance_schema_max_metadata_locks", + "Maximum number of metadata locks." + " Use 0 to disable, -1 for automated scaling.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_metadata_lock_sizing), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(-1, 100*1024*1024), + DEFAULT(PFS_AUTOSCALE_VALUE), BLOCK_SIZE(1)); + +static Sys_var_long Sys_pfs_max_sql_text_length( + "performance_schema_max_sql_text_length", + "Maximum length of displayed sql text.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(pfs_param.m_max_sql_text_length), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1024 * 1024), + DEFAULT(1024), BLOCK_SIZE(1)); + +#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ + +#ifdef WITH_WSREP + +/* + We need to keep the original values set by the user, as they will + be lost if wsrep_auto_increment_control set to 'ON': +*/ +static bool update_auto_increment_increment (sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_GLOBAL) + global_system_variables.saved_auto_increment_increment= + global_system_variables.auto_increment_increment; + else + thd->variables.saved_auto_increment_increment= + thd->variables.auto_increment_increment; + return false; +} + +#endif /* WITH_WSREP */ + +static Sys_var_double Sys_analyze_sample_percentage( + "analyze_sample_percentage", + "Percentage of rows from the table ANALYZE TABLE will sample " + "to collect table statistics. Set to 0 to let MariaDB decide " + "what percentage of rows to sample.", + SESSION_VAR(sample_percentage), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100), + DEFAULT(100)); + +static Sys_var_ulong Sys_auto_increment_increment( + "auto_increment_increment", + "Auto-increment columns are incremented by this", + SESSION_VAR(auto_increment_increment), + CMD_LINE(OPT_ARG), + VALID_RANGE(1, 65535), DEFAULT(1), BLOCK_SIZE(1), +#ifdef WITH_WSREP + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_auto_increment_increment)); +#else + NO_MUTEX_GUARD, IN_BINLOG); +#endif /* WITH_WSREP */ + +#ifdef WITH_WSREP + +/* + We need to keep the original values set by the user, as they will + be lost if wsrep_auto_increment_control set to 'ON': +*/ +static bool update_auto_increment_offset (sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_GLOBAL) + global_system_variables.saved_auto_increment_offset= + global_system_variables.auto_increment_offset; + else + thd->variables.saved_auto_increment_offset= + thd->variables.auto_increment_offset; + return false; +} + +#endif /* WITH_WSREP */ + +static Sys_var_ulong Sys_auto_increment_offset( + "auto_increment_offset", + "Offset added to Auto-increment columns. Used when " + "auto-increment-increment != 1", + SESSION_VAR(auto_increment_offset), + CMD_LINE(OPT_ARG), + VALID_RANGE(1, 65535), DEFAULT(1), BLOCK_SIZE(1), +#ifdef WITH_WSREP + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_auto_increment_offset)); +#else + NO_MUTEX_GUARD, IN_BINLOG); +#endif /* WITH_WSREP */ + +static Sys_var_mybool Sys_automatic_sp_privileges( + "automatic_sp_privileges", + "Creating and dropping stored procedures alters ACLs", + GLOBAL_VAR(sp_automatic_privileges), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_ulong Sys_back_log( + "back_log", "The number of outstanding connection requests " + "MariaDB can have. This comes into play when the main MariaDB thread " + "gets very many connection requests in a very short time", + AUTO_SET READ_ONLY GLOBAL_VAR(back_log), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 65535), DEFAULT(150), BLOCK_SIZE(1)); + +static Sys_var_charptr_fscs Sys_basedir( + "basedir", "Path to installation directory. All paths are " + "usually resolved relative to this", + READ_ONLY GLOBAL_VAR(mysql_home_ptr), CMD_LINE(REQUIRED_ARG, 'b'), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_my_bind_addr( + "bind_address", "IP address to bind to. Several addresses may be " + "specified, separated by a comma (,).", + READ_ONLY GLOBAL_VAR(my_bind_addr_str), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_vers_asof Sys_vers_asof_timestamp( + "system_versioning_asof", "Default value for the FOR SYSTEM_TIME AS OF clause", + SESSION_VAR(vers_asof_timestamp.type), NO_CMD_LINE, + DEFAULT(SYSTEM_TIME_UNSPECIFIED)); + +static const char *vers_alter_history_keywords[]= {"ERROR", "KEEP", NullS}; +static Sys_var_enum Sys_vers_alter_history( + "system_versioning_alter_history", "Versioning ALTER TABLE mode. " + "ERROR: Fail ALTER with error; " /* TODO: fail only when history non-empty */ + "KEEP: Keep historical system rows and subject them to ALTER", + SESSION_VAR(vers_alter_history), CMD_LINE(REQUIRED_ARG), + vers_alter_history_keywords, DEFAULT(VERS_ALTER_HISTORY_ERROR)); + +static Sys_var_on_access_global +Sys_binlog_cache_size( + "binlog_cache_size", "The size of the transactional cache for " + "updates to transactional engines for the binary log. " + "If you often use transactions containing many statements, " + "you can increase this to get more performance", + GLOBAL_VAR(binlog_cache_size), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE, SIZE_T_MAX), DEFAULT(32768), BLOCK_SIZE(IO_SIZE)); + +static Sys_var_on_access_global +Sys_binlog_file_cache_size( + "binlog_file_cache_size", + "The size of file cache for the binary log", + GLOBAL_VAR(binlog_file_cache_size), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE*2, SIZE_T_MAX), DEFAULT(IO_SIZE*4), BLOCK_SIZE(IO_SIZE)); + +static Sys_var_on_access_global +Sys_binlog_stmt_cache_size( + "binlog_stmt_cache_size", "The size of the statement cache for " + "updates to non-transactional engines for the binary log. " + "If you often use statements updating a great number of rows, " + "you can increase this to get more performance.", + GLOBAL_VAR(binlog_stmt_cache_size), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE, SIZE_T_MAX), DEFAULT(32768), BLOCK_SIZE(IO_SIZE)); + +/* + Some variables like @sql_log_bin and @binlog_format change how/if binlogging + is done. We must not change them inside a running transaction or statement, + otherwise the event group eventually written to the binlog may become + incomplete or otherwise garbled. + + This function does the appropriate check. + + It returns true if an error is caused by incorrect usage, false if ok. +*/ +static bool +error_if_in_trans_or_substatement(THD *thd, int in_substatement_error, + int in_transaction_error) +{ + if (unlikely(thd->in_sub_stmt)) + { + my_error(in_substatement_error, MYF(0)); + return true; + } + + if (unlikely(thd->in_active_multi_stmt_transaction())) + { + my_error(in_transaction_error, MYF(0)); + return true; + } + + return false; +} + +bool check_has_super(sys_var *self, THD *thd, set_var *var) +{ + DBUG_ASSERT(self->scope() != sys_var::GLOBAL);// don't abuse check_has_super() +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (!(thd->security_ctx->master_access & + PRIV_SET_RESTRICTED_SESSION_SYSTEM_VARIABLE)) + { + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER"); + return true; + } +#endif + return false; +} + +static Sys_var_bit Sys_core_file("core_file", "Write core on crashes", + READ_ONLY GLOBAL_VAR(test_flags), CMD_LINE(OPT_ARG), + TEST_CORE_ON_SIGNAL, DEFAULT(IF_WIN(TRUE,FALSE))); + +static bool binlog_format_check(sys_var *self, THD *thd, set_var *var) +{ + /* + MariaDB Galera does not support STATEMENT or MIXED binlog format currently. + */ + if ((WSREP(thd) || opt_support_flashback) && + var->save_result.ulonglong_value != BINLOG_FORMAT_ROW) + { + // Push a warning to the error log. + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "MariaDB Galera and flashback do not support binlog format: %s", + binlog_format_names[var->save_result.ulonglong_value]); + /* + We allow setting up binlog_format other then ROW for session scope when + wsrep/flasback is enabled.This is done because of 2 reasons + 1. User might want to run pt-table-checksum. + 2. SuperUser knows what is doing :-) + + For refrence:- MDEV-7322 + */ + if (var->type == OPT_GLOBAL) + { + if (WSREP(thd)) + WSREP_ERROR("MariaDB Galera does not support binlog format: %s", + binlog_format_names[var->save_result.ulonglong_value]); + else + my_error(ER_FLASHBACK_NOT_SUPPORTED,MYF(0),"binlog_format", + binlog_format_names[var->save_result.ulonglong_value]); + return true; + } + } + + if (var->type == OPT_GLOBAL) + return false; + + /* + If RBR and open temporary tables, their CREATE TABLE may not be in the + binlog, so we can't toggle to SBR in this connection. + + If binlog_format=MIXED, there are open temporary tables, and an unsafe + statement is executed, then subsequent statements are logged in row + format and hence changes to temporary tables may be lost. So we forbid + switching @@SESSION.binlog_format from MIXED to STATEMENT when there are + open temp tables and we are logging in row format. + */ + if (thd->has_thd_temporary_tables() && + var->type == OPT_SESSION && + var->save_result.ulonglong_value == BINLOG_FORMAT_STMT && + ((thd->variables.binlog_format == BINLOG_FORMAT_MIXED && + thd->is_current_stmt_binlog_format_row()) || + thd->variables.binlog_format == BINLOG_FORMAT_ROW)) + { + my_error(ER_TEMP_TABLE_PREVENTS_SWITCH_OUT_OF_RBR, MYF(0)); + return true; + } + + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_FORMAT, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_BINLOG_FORMAT))) + return true; + + return false; +} + +static bool fix_binlog_format_after_update(sys_var *self, THD *thd, + enum_var_type type) +{ + if (type == OPT_SESSION) + thd->reset_current_stmt_binlog_format_row(); + return false; +} + +static Sys_var_on_access +Sys_binlog_format( + "binlog_format", "What form of binary logging the master will " + "use: either ROW for row-based binary logging, STATEMENT " + "for statement-based binary logging, or MIXED. MIXED is statement-" + "based binary logging except for those statements where only row-" + "based is correct: those which involve user-defined functions (i.e. " + "UDFs) or the UUID() function; for those, row-based binary logging is " + "automatically used.", + SESSION_VAR(binlog_format), CMD_LINE(REQUIRED_ARG, OPT_BINLOG_FORMAT), + binlog_format_names, DEFAULT(BINLOG_FORMAT_MIXED), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(binlog_format_check), + ON_UPDATE(fix_binlog_format_after_update)); + +static bool binlog_direct_check(sys_var *self, THD *thd, set_var *var) +{ + if (var->type == OPT_GLOBAL) + return false; + + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_DIRECT, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_BINLOG_DIRECT))) + return true; + + return false; +} + +static Sys_var_on_access +Sys_binlog_direct( + "binlog_direct_non_transactional_updates", + "Causes updates to non-transactional engines using statement format to " + "be written directly to binary log. Before using this option make sure " + "that there are no dependencies between transactional and " + "non-transactional tables such as in the statement INSERT INTO t_myisam " + "SELECT * FROM t_innodb; otherwise, slaves may diverge from the master.", + SESSION_VAR(binlog_direct_non_trans_update), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(binlog_direct_check)); + +static Sys_var_bit Sys_explicit_defaults_for_timestamp( + "explicit_defaults_for_timestamp", + "This option causes CREATE TABLE to create all TIMESTAMP columns " + "as NULL with DEFAULT NULL attribute, Without this option, " + "TIMESTAMP columns are NOT NULL and have implicit DEFAULT clauses.", + SESSION_VAR(option_bits), CMD_LINE(OPT_ARG), + OPTION_EXPLICIT_DEF_TIMESTAMP, DEFAULT(TRUE), NO_MUTEX_GUARD, IN_BINLOG); + +static Sys_var_ulonglong Sys_bulk_insert_buff_size( + "bulk_insert_buffer_size", "Size of tree cache used in bulk " + "insert optimisation. Note that this is a limit per thread!", + SESSION_VAR(bulk_insert_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, SIZE_T_MAX), DEFAULT(8192*1024), BLOCK_SIZE(1)); + +static Sys_var_charptr_fscs Sys_character_sets_dir( + "character_sets_dir", "Directory where character sets are", + READ_ONLY GLOBAL_VAR(charsets_dir), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static bool check_engine_supports_temporary(sys_var *self, THD *thd, set_var *var) +{ + plugin_ref plugin= var->save_result.plugin; + if (!plugin) + return false; + DBUG_ASSERT(plugin); + handlerton *hton= plugin_hton(plugin); + DBUG_ASSERT(hton); + if (ha_check_storage_engine_flag(hton, HTON_TEMPORARY_NOT_SUPPORTED)) + { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), hton_name(hton)->str, + "TEMPORARY"); + return true; + } + return false; +} + +static bool check_not_null(sys_var *self, THD *thd, set_var *var) +{ + return var->value && var->value->is_null(); +} +static bool check_charset(sys_var *self, THD *thd, set_var *var) +{ + if (!var->value) + return false; + + char buff[STRING_BUFFER_USUAL_SIZE]; + if (var->value->result_type() == STRING_RESULT) + { + String str(buff, sizeof(buff), system_charset_info), *res; + if (!(res= var->value->val_str(&str))) + var->save_result.ptr= NULL; + else + { + ErrConvString err(res); /* Get utf8 '\0' terminated string */ + myf utf8_flag= thd->get_utf8_flag(); + if (!(var->save_result.ptr= get_charset_by_csname(err.ptr(), + MY_CS_PRIMARY, + MYF(utf8_flag))) && + !(var->save_result.ptr= get_old_charset_by_name(err.ptr()))) + { + my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), err.ptr()); + return true; + } + } + } + else // INT_RESULT + { + int csno= (int)var->value->val_int(); + CHARSET_INFO *cs; + if ((var->save_result.ptr= cs= get_charset(csno, MYF(0)))) + { + /* + Backward compatibility: pre MDEV-30824 servers + can write non-default collation IDs to binary log: + SET character_set_client=83; -- utf8mb3_bin + Convert a non-default collation to the compiled default collation, + e.g. utf8mb3_bin to utf8mb3_general_ci, but only if + - THD is a slave thread or + - is processing a mysqlbinlog output. + */ + if ((cs->state & MY_CS_PRIMARY) || + ((thd->variables.pseudo_slave_mode || thd->slave_thread) && + (var->save_result.ptr= + Lex_exact_charset_opt_extended_collate(cs, true). + find_default_collation()))) + return false; + } + my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), llstr(csno, buff)); + return true; + } + return false; +} +static bool check_charset_not_null(sys_var *self, THD *thd, set_var *var) +{ + return check_charset(self, thd, var) || check_not_null(self, thd, var); +} +static Sys_var_struct Sys_character_set_system( + "character_set_system", "The character set used by the server " + "for storing identifiers", + READ_ONLY GLOBAL_VAR(system_charset_info), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(0)); + +static Sys_var_struct Sys_character_set_server( + "character_set_server", "The default character set", + SESSION_VAR(collation_server), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_charset_not_null)); + +static bool check_charset_db(sys_var *self, THD *thd, set_var *var) +{ + if (check_charset_not_null(self, thd, var)) + return true; + if (!var->value) // = DEFAULT + var->save_result.ptr= thd->db_charset; + return false; +} +static Sys_var_struct Sys_character_set_database( + "character_set_database", + "The character set used by the default database", + SESSION_VAR(collation_database), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_charset_db)); + +static bool check_cs_client(sys_var *self, THD *thd, set_var *var) +{ + if (check_charset_not_null(self, thd, var)) + return true; + + // Currently, UCS-2 cannot be used as a client character set + if (!is_supported_parser_charset((CHARSET_INFO *)(var->save_result.ptr))) + return true; + + return false; +} +static bool fix_thd_charset(sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_SESSION) + thd->update_charset(); + return false; +} +static Sys_var_struct Sys_character_set_client( + "character_set_client", "The character set for statements " + "that arrive from the client", + NO_SET_STMT SESSION_VAR(character_set_client), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_cs_client), + ON_UPDATE(fix_thd_charset)); +// for check changing +export sys_var *Sys_character_set_client_ptr= &Sys_character_set_client; + +static Sys_var_struct Sys_character_set_connection( + "character_set_connection", "The character set used for " + "literals that do not have a character set introducer and for " + "number-to-string conversion", + NO_SET_STMT SESSION_VAR(collation_connection), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_charset_not_null), + ON_UPDATE(fix_thd_charset)); +// for check changing +export sys_var *Sys_character_set_connection_ptr= &Sys_character_set_connection; + +static Sys_var_struct Sys_character_set_results( + "character_set_results", "The character set used for returning " + "query results to the client", + SESSION_VAR(character_set_results), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_charset)); +// for check changing +export sys_var *Sys_character_set_results_ptr= &Sys_character_set_results; + +static Sys_var_struct Sys_character_set_filesystem( + "character_set_filesystem", "The filesystem character set", + NO_SET_STMT SESSION_VAR(character_set_filesystem), NO_CMD_LINE, + offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&character_set_filesystem), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_charset_not_null), + ON_UPDATE(fix_thd_charset)); + +static const char *completion_type_names[]= {"NO_CHAIN", "CHAIN", "RELEASE", 0}; +static Sys_var_enum Sys_completion_type( + "completion_type", "The transaction completion type", + SESSION_VAR(completion_type), CMD_LINE(REQUIRED_ARG), + completion_type_names, DEFAULT(0)); + +static bool check_collation_not_null(sys_var *self, THD *thd, set_var *var) +{ + if (!var->value) + return false; + myf utf8_flag= thd->get_utf8_flag(); + char buff[STRING_BUFFER_USUAL_SIZE]; + if (var->value->result_type() == STRING_RESULT) + { + String str(buff, sizeof(buff), system_charset_info), *res; + if (!(res= var->value->val_str(&str))) + var->save_result.ptr= NULL; + else + { + ErrConvString err(res); /* Get utf8 '\0'-terminated string */ + if (!(var->save_result.ptr= get_charset_by_name(err.ptr(), MYF(utf8_flag)))) + { + my_error(ER_UNKNOWN_COLLATION, MYF(0), err.ptr()); + return true; + } + } + } + else // INT_RESULT + { + int csno= (int)var->value->val_int(); + if (!(var->save_result.ptr= get_charset(csno, MYF(0)))) + { + my_error(ER_UNKNOWN_COLLATION, MYF(0), llstr(csno, buff)); + return true; + } + } + return check_not_null(self, thd, var); +} +static Sys_var_struct Sys_collation_connection( + "collation_connection", "The collation of the connection " + "character set", + NO_SET_STMT SESSION_VAR(collation_connection), NO_CMD_LINE, + offsetof(CHARSET_INFO, coll_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_collation_not_null), + ON_UPDATE(fix_thd_charset)); + +static bool check_collation_db(sys_var *self, THD *thd, set_var *var) +{ + if (check_collation_not_null(self, thd, var)) + return true; + if (!var->value) // = DEFAULT + var->save_result.ptr= thd->db_charset; + return false; +} +static Sys_var_struct Sys_collation_database( + "collation_database", "The collation of the database " + "character set", + SESSION_VAR(collation_database), NO_CMD_LINE, + offsetof(CHARSET_INFO, coll_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_collation_db)); + +static Sys_var_struct Sys_collation_server( + "collation_server", "The server default collation", + SESSION_VAR(collation_server), NO_CMD_LINE, + offsetof(CHARSET_INFO, coll_name.str), DEFAULT(&default_charset_info), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_collation_not_null)); + +static Sys_var_uint Sys_column_compression_threshold( + "column_compression_threshold", + "Minimum column data length eligible for compression", + SESSION_VAR(column_compression_threshold), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(100), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_column_compression_zlib_level( + "column_compression_zlib_level", + "zlib compression level (1 gives best speed, 9 gives best compression)", + SESSION_VAR(column_compression_zlib_level), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 9), DEFAULT(6), BLOCK_SIZE(1)); + +/* + Note that names must correspond to zlib strategy definition. So that we can + pass column_compression_zlib_strategy directly to deflateInit2(). +*/ +static const char *column_compression_zlib_strategy_names[]= +{ "DEFAULT_STRATEGY", "FILTERED", "HUFFMAN_ONLY", "RLE", "FIXED", 0 }; + +static Sys_var_enum Sys_column_compression_zlib_strategy( + "column_compression_zlib_strategy", + "The strategy parameter is used to tune the compression algorithm. Use " + "the value DEFAULT_STRATEGY for normal data, FILTERED for data produced " + "by a filter (or predictor), HUFFMAN_ONLY to force Huffman encoding " + "only (no string match), or RLE to limit match distances to one " + "(run-length encoding). Filtered data consists mostly of small values " + "with a somewhat random distribution. In this case, the compression " + "algorithm is tuned to compress them better. The effect of FILTERED is " + "to force more Huffman coding and less string matching; it is somewhat " + "intermediate between DEFAULT_STRATEGY and HUFFMAN_ONLY. RLE is " + "designed to be almost as fast as HUFFMAN_ONLY, but give better " + "compression for PNG image data. The strategy parameter only affects " + "the compression ratio but not the correctness of the compressed output " + "even if it is not set appropriately. FIXED prevents the use of dynamic " + "Huffman codes, allowing for a simpler decoder for special " + "applications.", + SESSION_VAR(column_compression_zlib_strategy), CMD_LINE(REQUIRED_ARG), + column_compression_zlib_strategy_names, DEFAULT(0)); + +static Sys_var_mybool Sys_column_compression_zlib_wrap( + "column_compression_zlib_wrap", + "Generate zlib header and trailer and compute adler32 check value. " + "It can be used with storage engines that don't provide data integrity " + "verification to detect data corruption.", + SESSION_VAR(column_compression_zlib_wrap), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static const char *concurrent_insert_names[]= {"NEVER", "AUTO", "ALWAYS", 0}; +static Sys_var_enum Sys_concurrent_insert( + "concurrent_insert", "Use concurrent insert with MyISAM", + GLOBAL_VAR(myisam_concurrent_insert), CMD_LINE(OPT_ARG), + concurrent_insert_names, DEFAULT(1)); + +static Sys_var_on_access_global +Sys_connect_timeout( + "connect_timeout", + "The number of seconds the mysqld server is waiting for a connect " + "packet before responding with 'Bad handshake'", + GLOBAL_VAR(connect_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(2, LONG_TIMEOUT), DEFAULT(CONNECT_TIMEOUT), BLOCK_SIZE(1)); + +static Sys_var_charptr_fscs Sys_datadir( + "datadir", "Path to the database root directory", + READ_ONLY GLOBAL_VAR(mysql_real_data_home_ptr), + CMD_LINE(REQUIRED_ARG, 'h'), DEFAULT(mysql_real_data_home)); + +#ifndef DBUG_OFF +static Sys_var_dbug Sys_dbug( + "debug", "Built-in DBUG debugger", sys_var::SESSION, + CMD_LINE(OPT_ARG, '#'), DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_has_super), ON_UPDATE(0), + DEPRECATED("'@@debug_dbug'")); // since 5.5.37 + +static Sys_var_dbug Sys_debug_dbug( + "debug_dbug", "Built-in DBUG debugger", sys_var::SESSION, + CMD_LINE(OPT_ARG, '#'), DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_has_super)); +#endif + +/** + @todo + When updating myisam_delay_key_write, we should do a 'flush tables' + of all MyISAM tables to ensure that they are reopen with the + new attribute. +*/ +export bool fix_delay_key_write(sys_var *self, THD *thd, enum_var_type type) +{ + switch (delay_key_write_options) { + case DELAY_KEY_WRITE_NONE: + myisam_delay_key_write=0; + ha_open_options&= ~HA_OPEN_DELAY_KEY_WRITE; + break; + case DELAY_KEY_WRITE_ON: + myisam_delay_key_write=1; + ha_open_options&= ~HA_OPEN_DELAY_KEY_WRITE; + break; + case DELAY_KEY_WRITE_ALL: + myisam_delay_key_write=1; + ha_open_options|= HA_OPEN_DELAY_KEY_WRITE; + break; + } +#ifdef WITH_ARIA_STORAGE_ENGINE + maria_delay_key_write= myisam_delay_key_write; +#endif + return false; +} +static const char *delay_key_write_names[]= { "OFF", "ON", "ALL", NullS }; +static Sys_var_enum Sys_delay_key_write( + "delay_key_write", "Specifies how MyISAM tables handles CREATE " + "TABLE DELAY_KEY_WRITE. If set to ON, the default, any DELAY KEY " + "WRITEs are honored. The key buffer is then flushed only when the " + "table closes, speeding up writes. MyISAM tables should be " + "automatically checked upon startup in this case, and " + "--external locking should not be used, as it can lead to index " + "corruption. If set to OFF, DELAY KEY WRITEs are ignored, while if " + "set to ALL, all new opened tables are treated as if created with " + "DELAY KEY WRITEs enabled.", + GLOBAL_VAR(delay_key_write_options), CMD_LINE(OPT_ARG), + delay_key_write_names, DEFAULT(DELAY_KEY_WRITE_ON), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_delay_key_write)); + +static Sys_var_ulong Sys_delayed_insert_limit( + "delayed_insert_limit", + "After inserting delayed_insert_limit rows, the INSERT DELAYED " + "handler will check if there are any SELECT statements pending. " + "If so, it allows these to execute before continuing.", + GLOBAL_VAR(delayed_insert_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(DELAYED_LIMIT), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_delayed_insert_timeout( + "delayed_insert_timeout", + "How long a INSERT DELAYED thread should wait for INSERT statements " + "before terminating", + GLOBAL_VAR(delayed_insert_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(DELAYED_WAIT_TIMEOUT), + BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_delayed_queue_size( + "delayed_queue_size", + "What size queue (in rows) should be allocated for handling INSERT " + "DELAYED. If the queue becomes full, any client that does INSERT " + "DELAYED will wait until there is room in the queue again", + GLOBAL_VAR(delayed_queue_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(DELAYED_QUEUE_SIZE), BLOCK_SIZE(1)); + +#ifdef HAVE_EVENT_SCHEDULER +static const char *event_scheduler_names[]= { "OFF", "ON", "DISABLED", + "ORIGINAL", NullS }; +static bool event_scheduler_check(sys_var *self, THD *thd, set_var *var) +{ + if (Events::opt_event_scheduler == Events::EVENTS_DISABLED) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), + "--event-scheduler=DISABLED or --skip-grant-tables"); + return true; + } + /* DISABLED is only accepted on the command line */ + if (var->save_result.ulonglong_value == Events::EVENTS_DISABLED) + return true; + return false; +} + +static bool event_scheduler_update(sys_var *self, THD *thd, enum_var_type type) +{ + int err_no= 0; + bool ret; + uint opt_event_scheduler_value= Events::opt_event_scheduler; + mysql_mutex_unlock(&LOCK_global_system_variables); + /* + Events::start() is heavyweight. In particular it creates a new THD, + which takes LOCK_global_system_variables internally. + Thus we have to release it here. + We need to re-take it before returning, though. + + Note that since we release LOCK_global_system_variables before calling + start/stop, there is a possibility that the server variable + can become out of sync with the real event scheduler state. + + This can happen with two concurrent statments if the first gets + interrupted after start/stop but before retaking + LOCK_global_system_variables. However, this problem should be quite + rare and it's difficult to avoid it without opening up possibilities + for deadlocks. See bug#51160. + */ + + /* EVENTS_ORIGINAL means we should revert back to the startup state */ + if (opt_event_scheduler_value == Events::EVENTS_ORIGINAL) + { + opt_event_scheduler_value= Events::opt_event_scheduler= + Events::startup_state; + } + + /* + If the scheduler was not properly inited (because of wrong system tables), + try to init it again. This is needed for mysql_upgrade to work properly if + the event tables where upgraded. + */ + if (!Events::inited && (Events::init(thd, 0) || !Events::inited)) + ret= 1; + else + ret= opt_event_scheduler_value == Events::EVENTS_ON ? + Events::start(&err_no) : + Events::stop(); + mysql_mutex_lock(&LOCK_global_system_variables); + if (ret) + { + Events::opt_event_scheduler= Events::EVENTS_OFF; + my_error(ER_EVENT_SET_VAR_ERROR, MYF(0), err_no); + } + return ret; +} + +static Sys_var_enum Sys_event_scheduler( + "event_scheduler", "Enable the event scheduler. Possible values are " + "ON, OFF, and DISABLED (keep the event scheduler completely " + "deactivated, it cannot be activated run-time)", + GLOBAL_VAR(Events::opt_event_scheduler), CMD_LINE(OPT_ARG), + event_scheduler_names, DEFAULT(Events::EVENTS_OFF), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(event_scheduler_check), ON_UPDATE(event_scheduler_update)); +#endif + +static bool copy_to_expire_logs_days(sys_var *, THD *, + enum_var_type type) +{ + expire_logs_days= binlog_expire_logs_seconds / (double)(24 * 60 * 60); + return false; +} + +static bool copy_to_binlog_expire_logs_seconds(sys_var *, THD *, + enum_var_type type) +{ + binlog_expire_logs_seconds= (ulong)(expire_logs_days * 24 * 60 * 60); + return false; +} + +static Sys_var_on_access_global +Sys_expire_logs_days( + "expire_logs_days", + "If non-zero, binary logs will be purged after expire_logs_days " + "days; It and binlog_expire_logs_seconds are linked, such that " + "changes in one are converted into the other, presentable as a " + "decimal value with 1/1000000 of the day precision; possible " + "purges happen at startup and at binary log rotation", + GLOBAL_VAR(expire_logs_days), + CMD_LINE(REQUIRED_ARG, OPT_EXPIRE_LOGS_DAYS), VALID_RANGE(0, 99), + DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(copy_to_binlog_expire_logs_seconds)); + +static Sys_var_on_access_global +Sys_binlog_expire_logs_seconds( + "binlog_expire_logs_seconds", + "If non-zero, binary logs will be purged after " + "binlog_expire_logs_seconds seconds; It and expire_logs_days are " + "linked, such that changes in one are converted into the other. " + "Possible purges happen at startup and at binary log rotation.", + GLOBAL_VAR(binlog_expire_logs_seconds), + CMD_LINE(REQUIRED_ARG, OPT_BINLOG_EXPIRE_LOGS_SECONDS), + VALID_RANGE(0, 8553600), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(copy_to_expire_logs_days)); + +static Sys_var_mybool Sys_flush( + "flush", "Flush MyISAM tables to disk between SQL commands", + GLOBAL_VAR(myisam_flush), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_flush_time( + "flush_time", + "A dedicated thread is created to flush all tables at the " + "given interval", + GLOBAL_VAR(flush_time), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, LONG_TIMEOUT), + DEFAULT(0), BLOCK_SIZE(1)); + +static bool check_ftb_syntax(sys_var *self, THD *thd, set_var *var) +{ + return ft_boolean_check_syntax_string((uchar*) + (var->save_result.string_value.str), + var->save_result.string_value.length, + self->charset(thd)); +} +static bool query_cache_flush(sys_var *self, THD *thd, enum_var_type type) +{ +#ifdef HAVE_QUERY_CACHE + query_cache.flush(); +#endif /* HAVE_QUERY_CACHE */ + return false; +} +/// @todo make SESSION_VAR (usability enhancement and a fix for a race condition) +static Sys_var_charptr Sys_ft_boolean_syntax( + "ft_boolean_syntax", "List of operators for " + "MATCH ... AGAINST ( ... IN BOOLEAN MODE)", + GLOBAL_VAR(ft_boolean_syntax), + CMD_LINE(REQUIRED_ARG), + DEFAULT(DEFAULT_FTB_SYNTAX), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_ftb_syntax), ON_UPDATE(query_cache_flush)); + +static Sys_var_ulong Sys_ft_max_word_len( + "ft_max_word_len", + "The maximum length of the word to be included in a FULLTEXT index. " + "Note: FULLTEXT indexes must be rebuilt after changing this variable", + READ_ONLY GLOBAL_VAR(ft_max_word_len), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(10, HA_FT_MAXCHARLEN), DEFAULT(HA_FT_MAXCHARLEN), + BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_ft_min_word_len( + "ft_min_word_len", + "The minimum length of the word to be included in a FULLTEXT index. " + "Note: FULLTEXT indexes must be rebuilt after changing this variable", + READ_ONLY GLOBAL_VAR(ft_min_word_len), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, HA_FT_MAXCHARLEN), DEFAULT(4), BLOCK_SIZE(1)); + +/// @todo make it an updatable SESSION_VAR +static Sys_var_ulong Sys_ft_query_expansion_limit( + "ft_query_expansion_limit", + "Number of best matches to use for query expansion", + READ_ONLY GLOBAL_VAR(ft_query_expansion_limit), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1000), DEFAULT(20), BLOCK_SIZE(1)); + +static Sys_var_charptr_fscs Sys_ft_stopword_file( + "ft_stopword_file", + "Use stopwords from this file instead of built-in list", + READ_ONLY GLOBAL_VAR(ft_stopword_file), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_mybool Sys_ignore_builtin_innodb( + "ignore_builtin_innodb", + "Disable initialization of builtin InnoDB plugin", + READ_ONLY GLOBAL_VAR(opt_ignore_builtin_innodb), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static bool check_init_string(sys_var *self, THD *thd, set_var *var) +{ + if (var->save_result.string_value.str == 0) + { + var->save_result.string_value.str= const_cast(""); + var->save_result.string_value.length= 0; + } + return false; +} +static PolyLock_rwlock PLock_sys_init_connect(&LOCK_sys_init_connect); + +static Sys_var_on_access_global +Sys_init_connect( + "init_connect", "Command(s) that are executed for each " + "new connection (unless the user has SUPER privilege)", + GLOBAL_VAR(opt_init_connect), CMD_LINE(REQUIRED_ARG), + DEFAULT(""), &PLock_sys_init_connect, NOT_IN_BINLOG, + ON_CHECK(check_init_string)); + +#ifdef HAVE_REPLICATION +static bool check_master_connection(sys_var *self, THD *thd, set_var *var) +{ + LEX_CSTRING tmp; + tmp.str= var->save_result.string_value.str; + tmp.length= var->save_result.string_value.length; + if (!tmp.str || check_master_connection_name(&tmp)) + return true; + + return false; +} + +static Sys_var_session_lexstring Sys_default_master_connection( + "default_master_connection", + "Master connection to use for all slave variables and slave commands", + SESSION_ONLY(default_master_connection), + NO_CMD_LINE, + DEFAULT(""), MAX_CONNECTION_NAME, ON_CHECK(check_master_connection)); +#endif + +static Sys_var_charptr_fscs Sys_init_file( + "init_file", "Read SQL commands from this file at startup", + READ_ONLY GLOBAL_VAR(opt_init_file), +#ifdef DISABLE_GRANT_OPTIONS + NO_CMD_LINE, +#else + CMD_LINE(REQUIRED_ARG), +#endif + DEFAULT(0)); + +static PolyLock_rwlock PLock_sys_init_slave(&LOCK_sys_init_slave); +static Sys_var_on_access_global +Sys_init_slave( + "init_slave", "Command(s) that are executed by a slave server " + "each time the SQL thread starts", GLOBAL_VAR(opt_init_slave), + CMD_LINE(REQUIRED_ARG), + DEFAULT(""), &PLock_sys_init_slave, + NOT_IN_BINLOG, ON_CHECK(check_init_string)); + +static Sys_var_ulong Sys_interactive_timeout( + "interactive_timeout", + "The number of seconds the server waits for activity on an interactive " + "connection before closing it", + NO_SET_STMT SESSION_VAR(net_interactive_timeout), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(NET_WAIT_TIMEOUT), BLOCK_SIZE(1)); + +static Sys_var_ulonglong Sys_join_buffer_size( + "join_buffer_size", + "The size of the buffer that is used for joins", + SESSION_VAR(join_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(128, SIZE_T_MAX), DEFAULT(256*1024), BLOCK_SIZE(128)); + +static Sys_var_keycache Sys_key_buffer_size( + "key_buffer_size", "The size of the buffer used for " + "index blocks for MyISAM tables. Increase this to get better index " + "handling (for all reads and multiple writes) to as much as you can " + "afford", + KEYCACHE_VAR(param_buff_size), + CMD_LINE(REQUIRED_ARG, OPT_KEY_BUFFER_SIZE), + VALID_RANGE(0, SIZE_T_MAX), DEFAULT(KEY_CACHE_SIZE), + BLOCK_SIZE(IO_SIZE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_buffer_size)); + +static Sys_var_keycache Sys_key_cache_block_size( + "key_cache_block_size", "The default size of key cache blocks", + KEYCACHE_VAR(param_block_size), + CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_BLOCK_SIZE), + VALID_RANGE(512, 1024*16), DEFAULT(KEY_CACHE_BLOCK_SIZE), + BLOCK_SIZE(512), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(resize_keycache)); + +static Sys_var_keycache Sys_key_cache_division_limit( + "key_cache_division_limit", + "The minimum percentage of warm blocks in key cache", + KEYCACHE_VAR(param_division_limit), + CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_DIVISION_LIMIT), + VALID_RANGE(1, 100), DEFAULT(100), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(change_keycache_param)); + +static Sys_var_keycache Sys_key_cache_age_threshold( + "key_cache_age_threshold", "This characterizes the number of " + "hits a hot block has to be untouched until it is considered aged " + "enough to be downgraded to a warm block. This specifies the " + "percentage ratio of that number of hits to the total number of " + "blocks in key cache", + KEYCACHE_VAR(param_age_threshold), + CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_AGE_THRESHOLD), + VALID_RANGE(100, UINT_MAX), DEFAULT(300), + BLOCK_SIZE(100), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(change_keycache_param)); + +static Sys_var_keycache Sys_key_cache_file_hash_size( + "key_cache_file_hash_size", + "Number of hash buckets for open and changed files. If you have a lot of MyISAM " + "files open you should increase this for faster flush of changes. A good " + "value is probably 1/10 of number of possible open MyISAM files.", + KEYCACHE_VAR(changed_blocks_hash_size), + CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_CHANGED_BLOCKS_HASH_SIZE), + VALID_RANGE(128, 16384), DEFAULT(512), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(resize_keycache)); + +static Sys_var_mybool Sys_large_files_support( + "large_files_support", + "Whether mysqld was compiled with options for large file support", + READ_ONLY GLOBAL_VAR(opt_large_files), + CMD_LINE_HELP_ONLY, DEFAULT(sizeof(my_off_t) > 4)); + +static Sys_var_uint Sys_large_page_size( + "large_page_size", + "Previously showed the size of large memory pages, unused since " + "multiple page size support was added", + READ_ONLY GLOBAL_VAR(opt_large_page_size), NO_CMD_LINE, + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); + +static Sys_var_mybool Sys_large_pages( + "large_pages", "Enable support for large pages", + READ_ONLY GLOBAL_VAR(opt_large_pages), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr_fscs Sys_language( + "lc_messages_dir", "Directory where error messages are", + READ_ONLY GLOBAL_VAR(lc_messages_dir_ptr), CMD_LINE(REQUIRED_ARG, 'L'), + DEFAULT(0)); + +static Sys_var_mybool Sys_local_infile( + "local_infile", "Enable LOAD DATA LOCAL INFILE", + GLOBAL_VAR(opt_local_infile), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_ulong Sys_lock_wait_timeout( + "lock_wait_timeout", + "Timeout in seconds to wait for a lock before returning an error.", + SESSION_VAR(lock_wait_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(24 * 60 * 60), BLOCK_SIZE(1)); + +#ifdef HAVE_MLOCKALL +static Sys_var_mybool Sys_locked_in_memory( + "locked_in_memory", + "Whether mysqld was locked in memory with --memlock", + READ_ONLY GLOBAL_VAR(locked_in_memory), NO_CMD_LINE, DEFAULT(FALSE)); +#endif + +/* this says NO_CMD_LINE, as command-line option takes a string, not a bool */ +static Sys_var_mybool Sys_log_bin( + "log_bin", "Whether the binary log is enabled", + READ_ONLY GLOBAL_VAR(opt_bin_log), NO_CMD_LINE, DEFAULT(FALSE)); + +static Sys_var_on_access_global +Sys_log_bin_compress( + "log_bin_compress", "Whether the binary log can be compressed", + GLOBAL_VAR(opt_bin_log_compress), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +/* the min length is 10, means that Begin/Commit/Rollback would never be compressed! */ +static Sys_var_on_access_global +Sys_log_bin_compress_min_len( + "log_bin_compress_min_len", + "Minimum length of sql statement(in statement mode) or record(in row mode)" + "that can be compressed.", + GLOBAL_VAR(opt_bin_log_compress_min_len), + CMD_LINE(OPT_ARG), VALID_RANGE(10, 1024), DEFAULT(256), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_trust_function_creators( + "log_bin_trust_function_creators", + "If set to FALSE (the default), then when --log-bin is used, creation " + "of a stored function (or trigger) is allowed only to users having the " + "SUPER privilege and only if this stored function (trigger) may not " + "break binary logging. Note that if ALL connections to this server " + "ALWAYS use row-based binary logging, the security issues do not " + "exist and the binary logging cannot break, so you can safely set " + "this to TRUE", + GLOBAL_VAR(trust_function_creators), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr_fscs Sys_log_error( + "log_error", + "Log errors to file (instead of stdout). If file name is not specified " + "then 'datadir'/'log-basename'.err or the 'pid-file' path with extension " + ".err is used", + READ_ONLY GLOBAL_VAR(log_error_file_ptr), + CMD_LINE(OPT_ARG, OPT_LOG_ERROR), + DEFAULT(disabled_my_option)); + +static Sys_var_bit Sys_log_queries_not_using_indexes( + "log_queries_not_using_indexes", + "Log queries that are executed without benefit of any index to the " + "slow log if it is open. Same as log_slow_filter='not_using_index'", + SESSION_VAR(log_slow_filter), CMD_LINE(OPT_ARG), QPLAN_NOT_USING_INDEX, + DEFAULT(FALSE)); + +static Sys_var_bit Sys_log_slow_admin_statements( + "log_slow_admin_statements", + "Log slow OPTIMIZE, ANALYZE, ALTER and other administrative statements " + "to the slow log if it is open. Resets or sets the option 'admin' in " + "log_slow_disabled_statements", + SESSION_VAR(log_slow_disabled_statements), + CMD_LINE(OPT_ARG), REVERSE(LOG_SLOW_DISABLE_ADMIN), DEFAULT(TRUE)); + +static Sys_var_bit Sys_log_slow_slave_statements( + "log_slow_slave_statements", + "Log slow statements executed by slave thread to the slow log if it is " + "open. Resets or sets the option 'slave' in " + "log_slow_disabled_statements", + SESSION_VAR(log_slow_disabled_statements), + CMD_LINE(OPT_ARG), REVERSE(LOG_SLOW_DISABLE_SLAVE), DEFAULT(TRUE)); + +static Sys_var_ulong Sys_log_warnings( + "log_warnings", + "Log some not critical warnings to the general log file." + "Value can be between 0 and 11. Higher values mean more verbosity", + SESSION_VAR(log_warnings), + CMD_LINE(OPT_ARG, 'W'), + VALID_RANGE(0, UINT_MAX), DEFAULT(2), BLOCK_SIZE(1)); + +static bool update_cached_long_query_time(sys_var *self, THD *thd, + enum_var_type type) +{ + if (type == OPT_SESSION) + thd->variables.long_query_time= + double2ulonglong(thd->variables.long_query_time_double * 1e6); + else + global_system_variables.long_query_time= + double2ulonglong(global_system_variables.long_query_time_double * 1e6); + return false; +} + +static Sys_var_double Sys_long_query_time( + "long_query_time", + "Alias for log_slow_query_time. " + "Log all queries that have taken more than long_query_time seconds " + "to execute to the slow query log file. The argument will be treated " + "as a decimal value with microsecond precision", + SESSION_VAR(long_query_time_double), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(10), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_cached_long_query_time)); + +static Sys_var_double Sys_log_slow_query_time( + "log_slow_query_time", + "Log all queries that have taken more than log_slow_query_time seconds " + "to execute to the slow query log file. The argument will be treated " + "as a decimal value with microsecond precision", + SESSION_VAR(long_query_time_double), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(10), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_cached_long_query_time)); + +static bool update_cached_max_statement_time(sys_var *self, THD *thd, + enum_var_type type) +{ + if (type == OPT_SESSION) + thd->variables.max_statement_time= + double2ulonglong(thd->variables.max_statement_time_double * 1e6); + else + global_system_variables.max_statement_time= + double2ulonglong(global_system_variables.max_statement_time_double * 1e6); + return false; +} + +static Sys_var_double Sys_max_statement_time( + "max_statement_time", + "A query that has taken more than max_statement_time seconds " + "will be aborted. The argument will be treated as a decimal value " + "with microsecond precision. A value of 0 (default) means no timeout", + SESSION_VAR(max_statement_time_double), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_cached_max_statement_time)); + +static bool fix_low_prio_updates(sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_SESSION) + thd->update_lock_default= (thd->variables.low_priority_updates ? + TL_WRITE_LOW_PRIORITY : TL_WRITE); + else + thr_upgraded_concurrent_insert_lock= + (global_system_variables.low_priority_updates ? + TL_WRITE_LOW_PRIORITY : TL_WRITE); + return false; +} +static Sys_var_mybool Sys_low_priority_updates( + "low_priority_updates", + "INSERT/DELETE/UPDATE has lower priority than selects", + SESSION_VAR(low_priority_updates), + CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_low_prio_updates)); + +static Sys_var_mybool Sys_lower_case_file_system( + "lower_case_file_system", + "Case sensitivity of file names on the file system where the " + "data directory is located", + READ_ONLY GLOBAL_VAR(lower_case_file_system), + CMD_LINE_HELP_ONLY, + DEFAULT(FALSE)); + +static Sys_var_uint Sys_lower_case_table_names( + "lower_case_table_names", + "If set to 1 table names are stored in lowercase on disk and table " + "names will be case-insensitive. Should be set to 2 if you are using " + "a case insensitive file system", + READ_ONLY GLOBAL_VAR(lower_case_table_names), + CMD_LINE(OPT_ARG, OPT_LOWER_CASE_TABLE_NAMES), + VALID_RANGE(0, 2), +#ifdef FN_NO_CASE_SENSE + DEFAULT(1), +#else + DEFAULT(0), +#endif + BLOCK_SIZE(1)); + +static bool session_readonly(sys_var *self, THD *thd, set_var *var) +{ + if (var->type == OPT_GLOBAL) + return false; + my_error(ER_VARIABLE_IS_READONLY, MYF(0), "SESSION", + self->name.str, "GLOBAL"); + return true; +} + +static bool check_max_allowed_packet(sys_var *self, THD *thd, set_var *var) +{ + longlong val; + if (session_readonly(self, thd, var)) + return true; + + val= var->save_result.ulonglong_value; + if (val < (longlong) global_system_variables.net_buffer_length) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_OPTION_BELOW_LIMIT, + ER_THD(thd, WARN_OPTION_BELOW_LIMIT), + "max_allowed_packet", "net_buffer_length"); + } + return false; +} + + +static Sys_var_ulong Sys_max_allowed_packet( + "max_allowed_packet", + "Max packet length to send to or receive from the server", + SESSION_VAR(max_allowed_packet), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, 1024*1024*1024), DEFAULT(16*1024*1024), + BLOCK_SIZE(1024), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_max_allowed_packet)); + +static Sys_var_on_access_global +Sys_slave_max_allowed_packet( + "slave_max_allowed_packet", + "The maximum packet length to sent successfully from the master to slave.", + GLOBAL_VAR(slave_max_allowed_packet), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, MAX_MAX_ALLOWED_PACKET), + DEFAULT(MAX_MAX_ALLOWED_PACKET), BLOCK_SIZE(1024)); + +static Sys_var_on_access_global +Sys_max_binlog_cache_size( + "max_binlog_cache_size", + "Sets the total size of the transactional cache", + GLOBAL_VAR(max_binlog_cache_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE, SIZE_T_MAX), + DEFAULT((SIZE_T_MAX/IO_SIZE)*IO_SIZE), BLOCK_SIZE(IO_SIZE)); + +static Sys_var_on_access_global +Sys_max_binlog_stmt_cache_size( + "max_binlog_stmt_cache_size", + "Sets the total size of the statement cache", + GLOBAL_VAR(max_binlog_stmt_cache_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE, SIZE_T_MAX), + DEFAULT((SIZE_T_MAX/IO_SIZE)*IO_SIZE), BLOCK_SIZE(IO_SIZE)); + +static bool fix_max_binlog_size(sys_var *self, THD *thd, enum_var_type type) +{ + mysql_bin_log.set_max_size(max_binlog_size); + return false; +} +static Sys_var_on_access_global +Sys_max_binlog_size( + "max_binlog_size", + "Binary log will be rotated automatically when the size exceeds this " + "value.", + GLOBAL_VAR(max_binlog_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE, 1024*1024L*1024L), DEFAULT(1024*1024L*1024L), + BLOCK_SIZE(IO_SIZE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_max_binlog_size)); + +static bool fix_max_connections(sys_var *self, THD *thd, enum_var_type type) +{ +#ifndef EMBEDDED_LIBRARY + resize_thr_alarm(max_connections + extra_max_connections + + global_system_variables.max_insert_delayed_threads + 10); +#endif + return false; +} + +// Default max_connections of 151 is larger than Apache's default max +// children, to avoid "too many connections" error in a common setup +static Sys_var_on_access_global +Sys_max_connections( + "max_connections", "The number of simultaneous clients allowed", + PARSED_EARLY GLOBAL_VAR(max_connections), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(10, 100000), + DEFAULT(MAX_CONNECTIONS_DEFAULT), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_max_connections)); + +static Sys_var_uint Sys_default_password_lifetime( + "default_password_lifetime", + "This defines the global password expiration policy. 0 means " + "automatic password expiration is disabled. If the value is a " + "positive integer N, the passwords must be changed every N days. This " + "behavior can be overridden using the password expiration options in " + "ALTER USER.", + GLOBAL_VAR(default_password_lifetime), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_disconnect_on_expired_password( + "disconnect_on_expired_password", + "This variable controls how the server handles clients that are not " + "aware of the sandbox mode. If enabled, the server disconnects the " + "client, otherwise the server puts the client in a sandbox mode.", + GLOBAL_VAR(disconnect_on_expired_password), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static Sys_var_on_access_global +Sys_max_connect_errors( + "max_connect_errors", + "If there is more than this number of interrupted connections from " + "a host this host will be blocked from further connections", + GLOBAL_VAR(max_connect_errors), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(MAX_CONNECT_ERRORS), + BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_max_password_errors( + "max_password_errors", + "If there is more than this number of failed connect attempts " + "due to invalid password, user will be blocked from further connections until FLUSH_PRIVILEGES.", + GLOBAL_VAR(max_password_errors), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(UINT_MAX), + BLOCK_SIZE(1)); + +static Sys_var_uint Sys_max_digest_length( + "max_digest_length", "Maximum length considered for digest text.", + READ_ONLY GLOBAL_VAR(max_digest_length), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1024 * 1024), DEFAULT(1024), BLOCK_SIZE(1)); + +static bool check_max_delayed_threads(sys_var *self, THD *thd, set_var *var) +{ + return var->type != OPT_GLOBAL && + var->save_result.ulonglong_value != 0 && + var->save_result.ulonglong_value != + global_system_variables.max_insert_delayed_threads; +} + +static Sys_var_ulong Sys_max_insert_delayed_threads( + "max_insert_delayed_threads", + "Alias for max_delayed_threads. " + "Don't start more than this number of threads to handle INSERT " + "DELAYED statements. If set to zero INSERT DELAYED will be not used", + SESSION_VAR(max_insert_delayed_threads), + NO_CMD_LINE, VALID_RANGE(0, 16384), DEFAULT(20), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_max_delayed_threads), ON_UPDATE(fix_max_connections)); + +static Sys_var_ulong Sys_max_delayed_threads( + "max_delayed_threads", + "Don't start more than this number of threads to handle INSERT " + "DELAYED statements. If set to zero INSERT DELAYED will be not used", + SESSION_VAR(max_insert_delayed_threads), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 16384), DEFAULT(20), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_max_delayed_threads), ON_UPDATE(fix_max_connections)); + +static Sys_var_ulong Sys_max_error_count( + "max_error_count", + "Max number of errors/warnings to store for a statement", + SESSION_VAR(max_error_count), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 65535), DEFAULT(DEFAULT_ERROR_COUNT), BLOCK_SIZE(1)); + +static Sys_var_ulonglong Sys_max_heap_table_size( + "max_heap_table_size", + "Don't allow creation of heap tables bigger than this", + SESSION_VAR(max_heap_table_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(16384, SIZE_T_MAX), DEFAULT(16*1024*1024), + BLOCK_SIZE(1024)); + +static ulong mdl_locks_cache_size; +static Sys_var_ulong Sys_metadata_locks_cache_size( + "metadata_locks_cache_size", "Unused", + READ_ONLY GLOBAL_VAR(mdl_locks_cache_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 1024*1024), DEFAULT(1024), + BLOCK_SIZE(1)); + +static ulong mdl_locks_hash_partitions; +static Sys_var_ulong Sys_metadata_locks_hash_instances( + "metadata_locks_hash_instances", "Unused", + READ_ONLY GLOBAL_VAR(mdl_locks_hash_partitions), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 1024), DEFAULT(8), + BLOCK_SIZE(1)); + +static Sys_var_on_access_session +Sys_pseudo_thread_id( + "pseudo_thread_id", + "This variable is for internal server use", + SESSION_ONLY(pseudo_thread_id), + NO_CMD_LINE, VALID_RANGE(0, ULONGLONG_MAX), DEFAULT(0), + BLOCK_SIZE(1), NO_MUTEX_GUARD, IN_BINLOG); + +static bool +check_gtid_domain_id(sys_var *self, THD *thd, set_var *var) +{ + if (var->type != OPT_GLOBAL && + error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO)) + return true; + + return false; +} + + +static Sys_var_on_access +Sys_gtid_domain_id( + "gtid_domain_id", + "Used with global transaction ID to identify logically independent " + "replication streams. When events can propagate through multiple " + "parallel paths (for example multiple masters), each independent " + "source server must use a distinct domain_id. For simple tree-shaped " + "replication topologies, it can be left at its default, 0.", + SESSION_VAR(gtid_domain_id), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, UINT_MAX32), DEFAULT(0), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_gtid_domain_id)); + + +/* + Check that setting gtid_seq_no isn't done inside a transaction, and (in + gtid_strict_mode) doesn't create an out-of-order GTID sequence. + + Setting gtid_seq_no to DEFAULT or 0 means we 'reset' it so that the value + doesn't affect the GTID of the next event group written to the binlog. +*/ +static bool check_gtid_seq_no(sys_var *self, THD *thd, set_var *var) +{ + uint32 domain_id, server_id; + uint64 seq_no; + + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO))) + return true; + + DBUG_EXECUTE_IF("ignore_set_gtid_seq_no_check", return false;); + if (var->value && opt_gtid_strict_mode && opt_bin_log) + { + domain_id= thd->variables.gtid_domain_id; + server_id= thd->variables.server_id; + seq_no= (uint64)var->value->val_uint(); + if (seq_no != 0 && + mysql_bin_log.check_strict_gtid_sequence(domain_id, server_id, seq_no)) + return true; + } + + return false; +} + + +static Sys_var_on_access_session +Sys_gtid_seq_no( + "gtid_seq_no", + "Internal server usage, for replication with global transaction id. " + "When set, next event group logged to the binary log will use this " + "sequence number, not generate a new one, thus allowing to preserve " + "master's GTID in slave's binlog.", + SESSION_ONLY(gtid_seq_no), + NO_CMD_LINE, VALID_RANGE(0, ULONGLONG_MAX), DEFAULT(0), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_gtid_seq_no)); + + +#ifdef HAVE_REPLICATION +static unsigned char opt_gtid_binlog_pos_dummy; +static Sys_var_gtid_binlog_pos Sys_gtid_binlog_pos( + "gtid_binlog_pos", "Last GTID logged to the binary log, per replication" + "domain", + READ_ONLY GLOBAL_VAR(opt_gtid_binlog_pos_dummy), NO_CMD_LINE); + + +const uchar * +Sys_var_gtid_binlog_pos::global_value_ptr(THD *thd, + const LEX_CSTRING *base) const +{ + char buf[128]; + String str(buf, sizeof(buf), system_charset_info); + char *p; + + str.length(0); + if ((opt_bin_log && mysql_bin_log.append_state_pos(&str)) || + !(p= thd->strmake(str.ptr(), str.length()))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + + return (uchar *)p; +} + + +static unsigned char opt_gtid_current_pos_dummy; +static Sys_var_gtid_current_pos Sys_gtid_current_pos( + "gtid_current_pos", "Current GTID position of the server. Per " + "replication domain, this is either the last GTID replicated by a " + "slave thread, or the GTID logged to the binary log, whichever is " + "most recent.", + READ_ONLY GLOBAL_VAR(opt_gtid_current_pos_dummy), NO_CMD_LINE); + + +const uchar * +Sys_var_gtid_current_pos::global_value_ptr(THD *thd, + const LEX_CSTRING *base) const +{ + String str; + char *p; + + str.length(0); + if (rpl_append_gtid_state(&str, true) || + !(p= thd->strmake(str.ptr(), str.length()))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + + return (uchar *)p; +} + + +bool +Sys_var_gtid_slave_pos::do_check(THD *thd, set_var *var) +{ + String str, *res; + + DBUG_ASSERT(var->type == OPT_GLOBAL); + + if (rpl_load_gtid_slave_state(thd)) + { + my_error(ER_CANNOT_LOAD_SLAVE_GTID_STATE, MYF(0), "mysql", + rpl_gtid_slave_state_table_name.str); + return true; + } + + if (give_error_if_slave_running(0)) + return true; + if (!(res= var->value->val_str(&str))) + return true; + if (thd->in_active_multi_stmt_transaction()) + { + my_error(ER_CANT_DO_THIS_DURING_AN_TRANSACTION, MYF(0)); + return true; + } + if (rpl_gtid_pos_check(thd, &((*res)[0]), res->length())) + return true; + + if (!(var->save_result.string_value.str= + thd->strmake(res->ptr(), res->length()))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + var->save_result.string_value.length= res->length(); + return false; +} + + +bool +Sys_var_gtid_slave_pos::global_update(THD *thd, set_var *var) +{ + bool err; + + DBUG_ASSERT(var->type == OPT_GLOBAL); + + if (!var->value) + { + my_error(ER_NO_DEFAULT, MYF(0), var->var->name.str); + return true; + } + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); + if (give_error_if_slave_running(1)) + err= true; + else + err= rpl_gtid_pos_update(thd, var->save_result.string_value.str, + var->save_result.string_value.length); + mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); + return err; +} + + +const uchar * +Sys_var_gtid_slave_pos::global_value_ptr(THD *thd, + const LEX_CSTRING *base) const +{ + String str; + char *p; + + str.length(0); + /* + If the mysql.rpl_slave_pos table could not be loaded, then we cannot + easily automatically try to reload it here - we may be inside a statement + that already has tables locked and so opening more tables is problematic. + + But if the table is not loaded (eg. missing mysql_upgrade_db or some such), + then the slave state must be empty anyway. + */ + if ((rpl_global_gtid_slave_state->loaded && + rpl_append_gtid_state(&str, false)) || + !(p= thd->strmake(str.ptr(), str.length()))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + + return (uchar *)p; +} + + +static unsigned char opt_gtid_slave_pos_dummy; +static Sys_var_gtid_slave_pos Sys_gtid_slave_pos( + "gtid_slave_pos", + "The list of global transaction IDs that were last replicated on the " + "server, one for each replication domain.", + GLOBAL_VAR(opt_gtid_slave_pos_dummy), NO_CMD_LINE); + + +static Sys_var_on_access_global +Sys_gtid_strict_mode( + "gtid_strict_mode", + "Enforce strict seq_no ordering of events in the binary log. Slave " + "stops with an error if it encounters an event that would cause it to " + "generate an out-of-order binlog if executed. " + "When ON the same server-id semisync-replicated transactions that " + "duplicate existing ones in binlog are ignored without error " + "and slave interruption.", + GLOBAL_VAR(opt_gtid_strict_mode), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + + +struct gtid_binlog_state_data { rpl_gtid *list; uint32 list_len; }; + +bool +Sys_var_gtid_binlog_state::do_check(THD *thd, set_var *var) +{ + String str, *res; + struct gtid_binlog_state_data *data; + rpl_gtid *list; + uint32 list_len; + + DBUG_ASSERT(var->type == OPT_GLOBAL); + + if (!(res= var->value->val_str(&str))) + return true; + if (thd->in_active_multi_stmt_transaction()) + { + my_error(ER_CANT_DO_THIS_DURING_AN_TRANSACTION, MYF(0)); + return true; + } + if (!mysql_bin_log.is_open()) + { + my_error(ER_FLUSH_MASTER_BINLOG_CLOSED, MYF(0)); + return true; + } + if (!mysql_bin_log.is_empty_state()) + { + my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0)); + return true; + } + if (res->length() == 0) + { + list= NULL; + list_len= 0; + } + else if (!(list= gtid_parse_string_to_list(res->ptr(), res->length(), + &list_len))) + { + my_error(ER_INCORRECT_GTID_STATE, MYF(0)); + return true; + } + if (!(data= (gtid_binlog_state_data *)my_malloc(PSI_INSTRUMENT_ME, + sizeof(*data), MYF(0)))) + { + my_free(list); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + data->list= list; + data->list_len= list_len; + var->save_result.ptr= data; + return false; +} + + +bool +Sys_var_gtid_binlog_state::global_update(THD *thd, set_var *var) +{ + bool res; + + DBUG_ASSERT(var->type == OPT_GLOBAL); + + if (!var->value) + { + my_error(ER_NO_DEFAULT, MYF(0), var->var->name.str); + return true; + } + + struct gtid_binlog_state_data *data= + (struct gtid_binlog_state_data *)var->save_result.ptr; + mysql_mutex_unlock(&LOCK_global_system_variables); + res= (reset_master(thd, data->list, data->list_len, 0) != 0); + mysql_mutex_lock(&LOCK_global_system_variables); + my_free(data->list); + my_free(data); + return res; +} + + +const uchar * +Sys_var_gtid_binlog_state::global_value_ptr(THD *thd, + const LEX_CSTRING *base) const +{ + char buf[512]; + String str(buf, sizeof(buf), system_charset_info); + char *p; + + str.length(0); + if ((opt_bin_log && mysql_bin_log.append_state(&str)) || + !(p= thd->strmake(str.ptr(), str.length()))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + + return (uchar *)p; +} + + +static unsigned char opt_gtid_binlog_state_dummy; +static Sys_var_gtid_binlog_state Sys_gtid_binlog_state( + "gtid_binlog_state", + "The internal GTID state of the binlog, used to keep track of all " + "GTIDs ever logged to the binlog.", + GLOBAL_VAR(opt_gtid_binlog_state_dummy), NO_CMD_LINE); + + +static Sys_var_last_gtid Sys_last_gtid( + "last_gtid", "The GTID of the last commit (if binlogging was enabled), " + "or the empty string if none.", + READ_ONLY sys_var::ONLY_SESSION, NO_CMD_LINE); + +export sys_var *Sys_last_gtid_ptr= &Sys_last_gtid; // for check changing + + +const uchar * +Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_CSTRING *base) const +{ + char buf[10+1+10+1+20+1]; + String str(buf, sizeof(buf), system_charset_info); + char *p; + bool first= true; + + str.length(0); + rpl_gtid gtid= thd->get_last_commit_gtid(); + if ((gtid.seq_no > 0 && + rpl_slave_state_tostring_helper(&str, >id, &first)) || + !(p= thd->strmake(str.ptr(), str.length()))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return NULL; + } + + return (uchar *)p; +} + + +static Sys_var_on_access_global +Sys_gtid_cleanup_batch_size( + "gtid_cleanup_batch_size", + "Normally does not need tuning. How many old rows must accumulate in " + "the mysql.gtid_slave_pos table before a background job will be run to " + "delete them. Can be increased to reduce number of commits if " + "using many different engines with --gtid_pos_auto_engines, or to " + "reduce CPU overhead if using a huge number of different " + "gtid_domain_ids. Can be decreased to reduce number of old rows in the " + "table.", + GLOBAL_VAR(opt_gtid_cleanup_batch_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,2147483647), DEFAULT(64), BLOCK_SIZE(1)); + + +static bool +check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var) +{ + return give_error_if_slave_running(0); +} + +static bool +fix_slave_parallel_threads(sys_var *self, THD *thd, enum_var_type type) +{ + bool err; + + mysql_mutex_unlock(&LOCK_global_system_variables); + err= give_error_if_slave_running(0); + mysql_mutex_lock(&LOCK_global_system_variables); + + return err; +} + + +static Sys_var_on_access_global +Sys_slave_parallel_threads( + "slave_parallel_threads", + "If non-zero, number of threads to spawn to apply in parallel events " + "on the slave that were group-committed on the master or were logged " + "with GTID in different replication domains. Note that these threads " + "are in addition to the IO and SQL threads, which are always created " + "by a replication slave", + GLOBAL_VAR(opt_slave_parallel_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,16383), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_slave_parallel_threads), + ON_UPDATE(fix_slave_parallel_threads)); + +/* Alias for @@slave_parallel_threads to match what MySQL 5.7 uses. */ +static Sys_var_on_access_global +Sys_slave_parallel_workers( + "slave_parallel_workers", + "Alias for slave_parallel_threads", + GLOBAL_VAR(opt_slave_parallel_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,16383), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_slave_parallel_threads), + ON_UPDATE(fix_slave_parallel_threads)); + + +static bool +check_slave_domain_parallel_threads(sys_var *self, THD *thd, set_var *var) +{ + return give_error_if_slave_running(0); +} + +static bool +fix_slave_domain_parallel_threads(sys_var *self, THD *thd, enum_var_type type) +{ + bool running; + + mysql_mutex_unlock(&LOCK_global_system_variables); + running= give_error_if_slave_running(0); + mysql_mutex_lock(&LOCK_global_system_variables); + + return running; +} + + +static Sys_var_on_access_global +Sys_slave_domain_parallel_threads( + "slave_domain_parallel_threads", + "Maximum number of parallel threads to use on slave for events in a " + "single replication domain. When using multiple domains, this can be " + "used to limit a single domain from grabbing all threads and thus " + "stalling other domains. The default of 0 means to allow a domain to " + "grab as many threads as it wants, up to the value of " + "slave_parallel_threads.", + GLOBAL_VAR(opt_slave_domain_parallel_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,16383), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_slave_domain_parallel_threads), + ON_UPDATE(fix_slave_domain_parallel_threads)); + + +static Sys_var_on_access_global +Sys_slave_parallel_max_queued( + "slave_parallel_max_queued", + "Limit on how much memory SQL threads should use per parallel " + "replication thread when reading ahead in the relay log looking for " + "opportunities for parallel replication. Only used when " + "--slave-parallel-threads > 0.", + GLOBAL_VAR(opt_slave_parallel_max_queued), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,2147483647), DEFAULT(131072), BLOCK_SIZE(1)); + + +bool +Sys_var_slave_parallel_mode::global_update(THD *thd, set_var *var) +{ + enum_slave_parallel_mode new_value= + (enum_slave_parallel_mode)var->save_result.ulonglong_value; + LEX_CSTRING *base_name= &var->base; + Master_info *mi; + bool res= false; + + if (!base_name->length) + base_name= &thd->variables.default_master_connection; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); + + mi= master_info_index-> + get_master_info(base_name, !base_name->length ? + Sql_condition::WARN_LEVEL_ERROR : + Sql_condition::WARN_LEVEL_WARN); + + if (mi) + { + if (mi->rli.slave_running) + { + my_error(ER_SLAVE_MUST_STOP, MYF(0), + (int) mi->connection_name.length, mi->connection_name.str); + res= true; + } + else + { + mi->parallel_mode= new_value; + if (!base_name->length) + { + /* Use as default value for new connections */ + opt_slave_parallel_mode= new_value; + } + } + } + + mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); + + return res; +} + + +const uchar * +Sys_var_slave_parallel_mode::global_value_ptr(THD *thd, + const + LEX_CSTRING *base_name) const +{ + Master_info *mi; + enum_slave_parallel_mode val= + (enum_slave_parallel_mode)opt_slave_parallel_mode; + + if (!base_name->length) + base_name= &thd->variables.default_master_connection; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); + + mi= master_info_index-> + get_master_info(base_name, !base_name->length ? + Sql_condition::WARN_LEVEL_ERROR : + Sql_condition::WARN_LEVEL_WARN); + if (mi) + val= mi->parallel_mode; + + mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); + if (!mi) + return 0; + + return valptr(thd, val); +} + + +/* The order here must match enum_slave_parallel_mode in mysqld.h. */ +static const char *slave_parallel_mode_names[] = { + "none", "minimal", "conservative", "optimistic", "aggressive", NULL +}; +export TYPELIB slave_parallel_mode_typelib = { + array_elements(slave_parallel_mode_names)-1, + "", + slave_parallel_mode_names, + NULL +}; + +static Sys_var_on_access_global +Sys_slave_parallel_mode( + "slave_parallel_mode", + "Controls what transactions are applied in parallel when using " + "--slave-parallel-threads. Possible values: \"optimistic\" tries to " + "apply most transactional DML in parallel, and handles any conflicts " + "with rollback and retry. \"conservative\" limits parallelism in an " + "effort to avoid any conflicts. \"aggressive\" tries to maximise the " + "parallelism, possibly at the cost of increased conflict rate. " + "\"minimal\" only parallelizes the commit steps of transactions. " + "\"none\" disables parallel apply completely.", + GLOBAL_VAR(opt_slave_parallel_mode), NO_CMD_LINE, + slave_parallel_mode_names, DEFAULT(SLAVE_PARALLEL_OPTIMISTIC)); + + +static Sys_var_bit Sys_skip_parallel_replication( + "skip_parallel_replication", + "If set when a transaction is written to the binlog, parallel apply of " + "that transaction will be avoided on a slave where slave_parallel_mode " + "is not \"aggressive\". Can be used to avoid unnecessary rollback and " + "retry for transactions that are likely to cause a conflict if " + "replicated in parallel.", + SESSION_ONLY(option_bits), NO_CMD_LINE, OPTION_RPL_SKIP_PARALLEL, + DEFAULT(FALSE)); + +static Sys_var_mybool Sys_binlog_alter_two_phase( + "binlog_alter_two_phase", + "When set, split ALTER at binary logging into 2 statements: " + "START ALTER and COMMIT/ROLLBACK ALTER", + SESSION_VAR(binlog_alter_two_phase), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static bool +check_gtid_ignore_duplicates(sys_var *self, THD *thd, set_var *var) +{ + return give_error_if_slave_running(0); +} + +static bool +fix_gtid_ignore_duplicates(sys_var *self, THD *thd, enum_var_type type) +{ + bool running; + + mysql_mutex_unlock(&LOCK_global_system_variables); + running= give_error_if_slave_running(0); + mysql_mutex_lock(&LOCK_global_system_variables); + + return running; +} + + +static Sys_var_on_access_global +Sys_gtid_ignore_duplicates( + "gtid_ignore_duplicates", + "When set, different master connections in multi-source replication are " + "allowed to receive and process event groups with the same GTID (when " + "using GTID mode). Only one will be applied, any others will be " + "ignored. Within a given replication domain, just the sequence number " + "will be used to decide whether a given GTID has been already applied; " + "this means it is the responsibility of the user to ensure that GTID " + "sequence numbers are strictly increasing.", + GLOBAL_VAR(opt_gtid_ignore_duplicates), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_gtid_ignore_duplicates), + ON_UPDATE(fix_gtid_ignore_duplicates)); + +static bool +update_slave_max_statement_time(sys_var *self, THD *thd, enum_var_type type) +{ + slave_max_statement_time= + double2ulonglong(slave_max_statement_time_double * 1e6); + + return false; +} + +static Sys_var_on_access_global< + Sys_var_double, PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_MAX_STATEMENT_TIME> + Sys_slave_max_statement_time( + "slave_max_statement_time", + "A query that has taken more than slave_max_statement_time seconds to " + "run on the slave will be aborted. The argument will be treated as a " + "decimal value with microsecond precision. A value of 0 (default) " + "means no timeout", + GLOBAL_VAR(slave_max_statement_time_double), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(0), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(update_slave_max_statement_time)); +#endif + + +static Sys_var_on_access_global +Sys_binlog_commit_wait_count( + "binlog_commit_wait_count", + "If non-zero, binlog write will wait at most binlog_commit_wait_usec " + "microseconds for at least this many commits to queue up for group " + "commit to the binlog. This can reduce I/O on the binlog and provide " + "increased opportunity for parallel apply on the slave, but too high " + "a value will decrease commit throughput.", + GLOBAL_VAR(opt_binlog_commit_wait_count), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(0), BLOCK_SIZE(1)); + + +static Sys_var_on_access_global +Sys_binlog_commit_wait_usec( + "binlog_commit_wait_usec", + "Maximum time, in microseconds, to wait for more commits to queue up " + "for binlog group commit. Only takes effect if the value of " + "binlog_commit_wait_count is non-zero.", + GLOBAL_VAR(opt_binlog_commit_wait_usec), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(100000), BLOCK_SIZE(1)); + + +static bool fix_max_join_size(sys_var *self, THD *thd, enum_var_type type) +{ + SV *sv= type == OPT_GLOBAL ? &global_system_variables : &thd->variables; + if (sv->max_join_size == HA_POS_ERROR) + sv->option_bits|= OPTION_BIG_SELECTS; + else + sv->option_bits&= ~OPTION_BIG_SELECTS; + return false; +} +static Sys_var_harows Sys_max_join_size( + "max_join_size", + "Joins that are probably going to read more than max_join_size " + "records return an error", + SESSION_VAR(max_join_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, HA_POS_ERROR), DEFAULT(HA_POS_ERROR), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_max_join_size)); + +static Sys_var_ulong Sys_max_seeks_for_key( + "max_seeks_for_key", + "Limit assumed max number of seeks when looking up rows based on a key", + SESSION_VAR(max_seeks_for_key), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(UINT_MAX), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_max_length_for_sort_data( + "max_length_for_sort_data", + "Max number of bytes in sorted records", + SESSION_VAR(max_length_for_sort_data), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(4, 8192*1024L), DEFAULT(1024), BLOCK_SIZE(1)); + +static PolyLock_mutex PLock_prepared_stmt_count(&LOCK_prepared_stmt_count); +static Sys_var_uint Sys_max_prepared_stmt_count( + "max_prepared_stmt_count", + "Maximum number of prepared statements in the server", + GLOBAL_VAR(max_prepared_stmt_count), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX32), DEFAULT(16382), BLOCK_SIZE(1), + &PLock_prepared_stmt_count); + +static Sys_var_ulong Sys_max_recursive_iterations( + "max_recursive_iterations", + "Maximum number of iterations when executing recursive queries", + SESSION_VAR(max_recursive_iterations), CMD_LINE(OPT_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(1000), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_max_sort_length( + "max_sort_length", + "The number of bytes to use when sorting BLOB or TEXT values (only " + "the first max_sort_length bytes of each value are used; the rest " + "are ignored)", + SESSION_VAR(max_sort_length), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(64, 8192*1024L), DEFAULT(1024), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_max_sp_recursion_depth( + "max_sp_recursion_depth", + "Maximum stored procedure recursion depth", + SESSION_VAR(max_sp_recursion_depth), CMD_LINE(OPT_ARG), + VALID_RANGE(0, 255), DEFAULT(0), BLOCK_SIZE(1)); + + +static bool if_checking_enabled(sys_var *self, THD *thd, set_var *var) +{ + if (session_readonly(self, thd, var)) + return true; + + if (!max_user_connections_checking) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--max-user-connections=0"); + return true; + } + + return false; +} +// non-standard session_value_ptr() here +static Sys_var_max_user_conn Sys_max_user_connections( + "max_user_connections", + "The maximum number of active connections for a single user " + "(0 = no limit)", + SESSION_VAR(max_user_connections), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(-1, INT_MAX), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(if_checking_enabled)); + +static Sys_var_ulong Sys_max_tmp_tables( + "max_tmp_tables", "Unused, will be removed.", + SESSION_VAR(max_tmp_tables), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(32), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); // since 10.1.2 + +static Sys_var_ulong Sys_max_write_lock_count( + "max_write_lock_count", + "After this many write locks, allow some read locks to run in between", + GLOBAL_VAR(max_write_lock_count), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(UINT_MAX), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_min_examined_row_limit( + "min_examined_row_limit", + "Alias for log_slow_min_examined_row_limit. " + "Don't write queries to slow log that examine fewer rows " + "than that", + SESSION_VAR(min_examined_row_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_log_slow_min_examined_row_limit( + "log_slow_min_examined_row_limit", + "Don't write queries to slow log that examine fewer rows " + "than that", + SESSION_VAR(min_examined_row_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1)); + +#ifdef _WIN32 +static Sys_var_mybool Sys_named_pipe( + "named_pipe", "Enable the named pipe (NT)", + READ_ONLY GLOBAL_VAR(opt_enable_named_pipe), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); +#endif + + +static bool check_net_buffer_length(sys_var *self, THD *thd, set_var *var) +{ + longlong val; + if (session_readonly(self, thd, var)) + return true; + + val= var->save_result.ulonglong_value; + if (val > (longlong) global_system_variables.max_allowed_packet) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_OPTION_BELOW_LIMIT, + ER_THD(thd, WARN_OPTION_BELOW_LIMIT), + "max_allowed_packet", "net_buffer_length"); + } + return false; +} +static Sys_var_ulong Sys_net_buffer_length( + "net_buffer_length", + "Buffer length for TCP/IP and socket communication", + SESSION_VAR(net_buffer_length), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, 1024*1024), DEFAULT(16384), BLOCK_SIZE(1024), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_net_buffer_length)); + +static bool fix_net_read_timeout(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + my_net_set_read_timeout(&thd->net, thd->variables.net_read_timeout); + return false; +} +static Sys_var_ulong Sys_net_read_timeout( + "net_read_timeout", + "Number of seconds to wait for more data from a connection before " + "aborting the read", + SESSION_VAR(net_read_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(NET_READ_TIMEOUT), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_net_read_timeout)); + +static bool fix_net_write_timeout(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + my_net_set_write_timeout(&thd->net, thd->variables.net_write_timeout); + return false; +} +static Sys_var_ulong Sys_net_write_timeout( + "net_write_timeout", + "Number of seconds to wait for a block to be written to a connection " + "before aborting the write", + SESSION_VAR(net_write_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(NET_WRITE_TIMEOUT), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_net_write_timeout)); + +static bool fix_net_retry_count(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + thd->net.retry_count=thd->variables.net_retry_count; + return false; +} +static Sys_var_ulong Sys_net_retry_count( + "net_retry_count", + "If a read on a communication port is interrupted, retry this " + "many times before giving up", + SESSION_VAR(net_retry_count), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(MYSQLD_NET_RETRY_COUNT), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_net_retry_count)); + +static bool set_old_mode (sys_var *self, THD *thd, enum_var_type type) +{ + if (thd->variables.old_mode) + { + thd->variables.old_behavior|= (OLD_MODE_NO_PROGRESS_INFO | + OLD_MODE_IGNORE_INDEX_ONLY_FOR_JOIN | + OLD_MODE_COMPAT_5_1_CHECKSUM); + } + else + { + thd->variables.old_behavior&= ~(OLD_MODE_NO_PROGRESS_INFO| + OLD_MODE_IGNORE_INDEX_ONLY_FOR_JOIN | + OLD_MODE_COMPAT_5_1_CHECKSUM); + } + + return false; +} + +static Sys_var_mybool Sys_old_mode( + "old", "Use compatible behavior from previous MariaDB version. See also --old-mode", + SESSION_VAR(old_mode), CMD_LINE(OPT_ARG), DEFAULT(FALSE), 0, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(set_old_mode), DEPRECATED("'@@old_mode'")); + +static Sys_var_mybool Sys_opt_allow_suspicious_udfs( + "allow_suspicious_udfs", + "Allows use of user-defined functions (UDFs) consisting of only one symbol xxx() without corresponding xxx_init() or xxx_deinit(). That also means that one can load any function from any library, for example exit() from libc.so", + READ_ONLY GLOBAL_VAR(opt_allow_suspicious_udfs), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +#ifndef DISABLE_GRANT_OPTIONS +static Sys_var_mybool Sys_skip_grant_tables( + "skip_grant_tables", + "Start without grant tables. This gives all users FULL ACCESS to all tables.", + READ_ONLY GLOBAL_VAR(opt_noacl), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); +#endif + +static const char *alter_algorithm_modes[]= {"DEFAULT", "COPY", "INPLACE", +"NOCOPY", "INSTANT", NULL}; + +static Sys_var_enum Sys_alter_algorithm( + "alter_algorithm", "Specify the alter table algorithm", + SESSION_VAR(alter_algorithm), CMD_LINE(OPT_ARG), + alter_algorithm_modes, DEFAULT(0)); + +static Sys_var_enum Sys_old_alter_table( + "old_alter_table", "Alias for alter_algorithm. " + "Deprecated. Use --alter-algorithm instead.", + SESSION_VAR(alter_algorithm), CMD_LINE(OPT_ARG), + alter_algorithm_modes, DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("'@@alter_algorithm'")); // Since 10.5.1 + +static bool check_old_passwords(sys_var *self, THD *thd, set_var *var) +{ + return mysql_user_table_is_in_short_password_format; +} +static Sys_var_mybool Sys_old_passwords( + "old_passwords", + "Use old password encryption method (needed for 4.0 and older clients)", + SESSION_VAR(old_passwords), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_old_passwords)); +export sys_var *Sys_old_passwords_ptr= &Sys_old_passwords; // for sql_acl.cc + +static Sys_var_ulong Sys_open_files_limit( + "open_files_limit", + "If this is not 0, then mysqld will use this value to reserve file " + "descriptors to use with setrlimit(). If this value is 0 or autoset " + "then mysqld will reserve max_connections*5 or max_connections + " + "table_cache*2 (whichever is larger) number of file descriptors", + AUTO_SET READ_ONLY GLOBAL_VAR(open_files_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, OS_FILE_LIMIT), DEFAULT(0), BLOCK_SIZE(1)); + +/// @todo change to enum +static Sys_var_ulong Sys_optimizer_prune_level( + "optimizer_prune_level", + "Controls the heuristic(s) applied during query optimization to prune " + "less-promising partial plans from the optimizer search space. " + "Meaning: 0 - do not apply any heuristic, thus perform exhaustive " + "search: 1 - prune plans based on cost and number of retrieved rows " + "eq_ref: 2 - prune also if we find an eq_ref chain", + SESSION_VAR(optimizer_prune_level), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 2), DEFAULT(2), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( + "optimizer_selectivity_sampling_limit", + "Controls number of record samples to check condition selectivity", + SESSION_VAR(optimizer_selectivity_sampling_limit), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), + DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_optimizer_use_condition_selectivity( + "optimizer_use_condition_selectivity", + "Controls selectivity of which conditions the optimizer takes into " + "account to calculate cardinality of a partial join when it searches " + "for the best execution plan " + "Meaning: " + "1 - use selectivity of index backed range conditions to calculate " + "the cardinality of a partial join if the last joined table is " + "accessed by full table scan or an index scan, " + "2 - use selectivity of index backed range conditions to calculate " + "the cardinality of a partial join in any case, " + "3 - additionally always use selectivity of range conditions that are " + "not backed by any index to calculate the cardinality of a partial join, " + "4 - use histograms to calculate selectivity of range conditions that " + "are not backed by any index to calculate the cardinality of " + "a partial join." + "5 - additionally use selectivity of certain non-range predicates " + "calculated on record samples", + SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 5), DEFAULT(4), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_optimizer_search_depth( + "optimizer_search_depth", + "Maximum depth of search performed by the query optimizer. Values " + "larger than the number of relations in a query result in better " + "query plans, but take longer to compile a query. Values smaller " + "than the number of tables in a relation result in faster " + "optimization, but may produce very bad query plans. If set to 0, " + "the system will automatically pick a reasonable value.", + SESSION_VAR(optimizer_search_depth), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, MAX_TABLES+1), DEFAULT(MAX_TABLES+1), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_optimizer_extra_pruning_depth( + "optimizer_extra_pruning_depth", + "If the optimizer needs to enumerate join prefix of this size or " + "larger, then it will try aggressively prune away the search space.", + SESSION_VAR(optimizer_extra_pruning_depth), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, MAX_TABLES+1), DEFAULT(8), BLOCK_SIZE(1)); + +/* this is used in the sigsegv handler */ +export const char *optimizer_switch_names[]= +{ + "index_merge","index_merge_union","index_merge_sort_union", + "index_merge_intersection","index_merge_sort_intersection", + "engine_condition_pushdown", + "index_condition_pushdown", + "derived_merge", "derived_with_keys", + "firstmatch","loosescan","materialization","in_to_exists","semijoin", + "partial_match_rowid_merge", + "partial_match_table_scan", + "subquery_cache", + "mrr", + "mrr_cost_based", + "mrr_sort_keys", + "outer_join_with_cache", + "semijoin_with_cache", + "join_cache_incremental", + "join_cache_hashed", + "join_cache_bka", + "optimize_join_buffer_size", + "table_elimination", + "extended_keys", + "exists_to_in", + "orderby_uses_equalities", + "condition_pushdown_for_derived", + "split_materialized", + "condition_pushdown_for_subquery", + "rowid_filter", + "condition_pushdown_from_having", + "not_null_range_scan", + "hash_join_cardinality", + "cset_narrowing", + "default", + NullS +}; +static bool fix_optimizer_switch(sys_var *self, THD *thd, + enum_var_type type) +{ + SV *sv= (type == OPT_GLOBAL) ? &global_system_variables : &thd->variables; + if (sv->optimizer_switch & deprecated_ENGINE_CONDITION_PUSHDOWN) + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT, + ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT), + "engine_condition_pushdown=on"); // since 10.1.1 + return false; +} +static bool check_legal_optimizer_switch(sys_var *self, THD *thd, + set_var *var) +{ + if (var->save_result.ulonglong_value & (OPTIMIZER_SWITCH_MATERIALIZATION | + OPTIMIZER_SWITCH_IN_TO_EXISTS)) + { + return false; + } + my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0)); + return true; +} +static Sys_var_flagset Sys_optimizer_switch( + "optimizer_switch", + "Fine-tune the optimizer behavior", + SESSION_VAR(optimizer_switch), CMD_LINE(REQUIRED_ARG), + optimizer_switch_names, DEFAULT(OPTIMIZER_SWITCH_DEFAULT), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_legal_optimizer_switch), + ON_UPDATE(fix_optimizer_switch)); + +static Sys_var_flagset Sys_optimizer_trace( + "optimizer_trace", + "Controls tracing of the Optimizer:" + " optimizer_trace=option=val[,option=val...], where option is one of" + " {enabled}" + " and val is one of {on, off, default}", + SESSION_VAR(optimizer_trace), CMD_LINE(REQUIRED_ARG), + Opt_trace_context::flag_names, DEFAULT(Opt_trace_context::FLAG_DEFAULT)); + // @see set_var::is_var_optimizer_trace() +export sys_var *Sys_optimizer_trace_ptr = &Sys_optimizer_trace; + +static Sys_var_ulong Sys_optimizer_trace_max_mem_size( + "optimizer_trace_max_mem_size", + "Maximum allowed size of an optimizer trace", + SESSION_VAR(optimizer_trace_max_mem_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(1024 * 1024), BLOCK_SIZE(1)); + +static Sys_var_charptr_fscs Sys_pid_file( + "pid_file", "Pid file used by safe_mysqld", + READ_ONLY GLOBAL_VAR(pidfile_name_ptr), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_plugin_dir( + "plugin_dir", "Directory for plugins", + READ_ONLY GLOBAL_VAR(opt_plugin_dir_ptr), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_uint Sys_port( + "port", + "Port number to use for connection or 0 to default to, " + "my.cnf, $MYSQL_TCP_PORT, " +#if MYSQL_PORT_DEFAULT == 0 + "/etc/services, " +#endif + "built-in default (" STRINGIFY_ARG(MYSQL_PORT) "), whatever comes first", + READ_ONLY GLOBAL_VAR(mysqld_port), CMD_LINE(REQUIRED_ARG, 'P'), + VALID_RANGE(0, UINT_MAX32), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_preload_buff_size( + "preload_buffer_size", + "The size of the buffer that is allocated when preloading indexes", + SESSION_VAR(preload_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, 1024*1024*1024), DEFAULT(32768), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_protocol_version( + "protocol_version", + "The version of the client/server protocol used by the MariaDB server", + READ_ONLY GLOBAL_VAR(protocol_version), CMD_LINE_HELP_ONLY, + VALID_RANGE(0, ~0U), DEFAULT(PROTOCOL_VERSION), BLOCK_SIZE(1)); + +static Sys_var_proxy_user Sys_proxy_user( + "proxy_user", "The proxy user account name used when logging in"); + +static Sys_var_external_user Sys_exterenal_user( + "external_user", "The external user account used when logging in"); + +static Sys_var_ulong Sys_read_buff_size( + "read_buffer_size", + "Each thread that does a sequential scan allocates a buffer of " + "this size for each table it scans. If you do many sequential scans, " + "you may want to increase this value", + SESSION_VAR(read_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE*2, INT_MAX32), DEFAULT(128*1024), + BLOCK_SIZE(IO_SIZE)); + +static bool check_read_only(sys_var *self, THD *thd, set_var *var) +{ + /* Prevent self dead-lock */ + if (thd->locked_tables_mode || thd->in_active_multi_stmt_transaction() || + thd->current_backup_stage != BACKUP_FINISHED) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + return true; + } + return false; +} + +static bool fix_read_only(sys_var *self, THD *thd, enum_var_type type) +{ + bool result= true; + my_bool new_read_only= read_only; // make a copy before releasing a mutex + DBUG_ENTER("sys_var_opt_readonly::update"); + + if (read_only == FALSE || read_only == opt_readonly) + { + opt_readonly= read_only; + DBUG_RETURN(false); + } + + if (check_read_only(self, thd, 0)) // just in case + goto end; + + if (thd->global_read_lock.is_acquired()) + { + /* + This connection already holds the global read lock. + This can be the case with: + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = 1 + */ + opt_readonly= read_only; + DBUG_RETURN(false); + } + + /* + READ_ONLY=1 prevents write locks from being taken on tables and + blocks transactions from committing. We therefore should make sure + that no such events occur while setting the read_only variable. + This is a 2 step process: + [1] lock_global_read_lock() + Prevents connections from obtaining new write locks on + tables. Note that we can still have active rw transactions. + [2] make_global_read_lock_block_commit() + Prevents transactions from committing. + */ + + read_only= opt_readonly; + mysql_mutex_unlock(&LOCK_global_system_variables); + + if (thd->global_read_lock.lock_global_read_lock(thd)) + goto end_with_mutex_unlock; + + if ((result= thd->global_read_lock.make_global_read_lock_block_commit(thd))) + goto end_with_read_lock; + + /* Change the opt_readonly system variable, safe because the lock is held */ + opt_readonly= new_read_only; + result= false; + + end_with_read_lock: + /* Release the lock */ + thd->global_read_lock.unlock_global_read_lock(thd); + end_with_mutex_unlock: + mysql_mutex_lock(&LOCK_global_system_variables); + end: + read_only= opt_readonly; + DBUG_RETURN(result); +} + + +/** + The read_only boolean is always equal to the opt_readonly boolean except + during fix_read_only(); when that function is entered, opt_readonly is + the pre-update value and read_only is the post-update value. + fix_read_only() compares them and runs needed operations for the + transition (especially when transitioning from false to true) and + synchronizes both booleans in the end. +*/ +static Sys_var_on_access_global +Sys_readonly( + "read_only", + "Make all non-temporary tables read-only, with the exception for " + "replication (slave) threads and users with the 'READ ONLY ADMIN' " + "privilege", + GLOBAL_VAR(read_only), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_read_only), ON_UPDATE(fix_read_only)); + +// Small lower limit to be able to test MRR +static Sys_var_ulong Sys_read_rnd_buff_size( + "read_rnd_buffer_size", + "When reading rows in sorted order after a sort, the rows are read " + "through this buffer to avoid a disk seeks", + SESSION_VAR(read_rnd_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, INT_MAX32), DEFAULT(256*1024), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_div_precincrement( + "div_precision_increment", "Precision of the result of '/' " + "operator will be increased on that value", + SESSION_VAR(div_precincrement), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, DECIMAL_MAX_SCALE), DEFAULT(4), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_eq_range_index_dive_limit( + "eq_range_index_dive_limit", + "The optimizer will use existing index statistics instead of " + "doing index dives for equality ranges if the number of equality " + "ranges for the index is larger than or equal to this number. " + "If set to 0, index dives are always used.", + SESSION_VAR(eq_range_index_dive_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX32), DEFAULT(200), + BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_range_alloc_block_size( + "range_alloc_block_size", + "Allocation block size for storing ranges during optimization", + SESSION_VAR(range_alloc_block_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(RANGE_ALLOC_BLOCK_SIZE, UINT_MAX), + DEFAULT(RANGE_ALLOC_BLOCK_SIZE), BLOCK_SIZE(1024)); + +static bool fix_thd_mem_root(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + reset_root_defaults(thd->mem_root, + thd->variables.query_alloc_block_size, + thd->variables.query_prealloc_size); + return false; +} +static Sys_var_ulong Sys_query_alloc_block_size( + "query_alloc_block_size", + "Allocation block size for query parsing and execution", + SESSION_VAR(query_alloc_block_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, UINT_MAX), DEFAULT(QUERY_ALLOC_BLOCK_SIZE), + BLOCK_SIZE(1024), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_thd_mem_root)); + +static Sys_var_ulong Sys_query_prealloc_size( + "query_prealloc_size", + "Persistent buffer for query parsing and execution", + SESSION_VAR(query_prealloc_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, UINT_MAX), + DEFAULT(QUERY_ALLOC_PREALLOC_SIZE), + BLOCK_SIZE(1024), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_thd_mem_root)); + + +// this has to be NO_CMD_LINE as the command-line option has a different name +static Sys_var_mybool Sys_skip_external_locking( + "skip_external_locking", "Don't use system (external) locking", + READ_ONLY GLOBAL_VAR(my_disable_locking), NO_CMD_LINE, DEFAULT(TRUE)); + +static Sys_var_mybool Sys_skip_networking( + "skip_networking", "Don't allow connection with TCP/IP", + READ_ONLY GLOBAL_VAR(opt_disable_networking), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static Sys_var_mybool Sys_skip_name_resolve( + "skip_name_resolve", + "Don't resolve hostnames. All hostnames are IP's or 'localhost'.", + READ_ONLY GLOBAL_VAR(opt_skip_name_resolve), + CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static Sys_var_mybool Sys_skip_show_database( + "skip_show_database", "Don't allow 'SHOW DATABASE' commands", + READ_ONLY GLOBAL_VAR(opt_skip_show_db), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static Sys_var_charptr_fscs Sys_socket( + "socket", "Socket file to use for connection", + READ_ONLY GLOBAL_VAR(mysqld_unix_port), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_ulonglong Sys_thread_stack( + "thread_stack", "The stack size for each thread", + READ_ONLY GLOBAL_VAR(my_thread_stack_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(128*1024, ULONGLONG_MAX), DEFAULT(DEFAULT_THREAD_STACK), + BLOCK_SIZE(1024)); + +static Sys_var_charptr_fscs Sys_tmpdir( + "tmpdir", "Path for temporary files. Several paths may " + "be specified, separated by a " +#if defined(_WIN32) + "semicolon (;)" +#else + "colon (:)" +#endif + ", in this case they are used in a round-robin fashion", + READ_ONLY GLOBAL_VAR(opt_mysql_tmpdir), CMD_LINE(REQUIRED_ARG, 't'), + DEFAULT(0)); + +static bool fix_trans_mem_root(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + reset_root_defaults(&thd->transaction->mem_root, + thd->variables.trans_alloc_block_size, + thd->variables.trans_prealloc_size); + return false; +} +static Sys_var_ulong Sys_trans_alloc_block_size( + "transaction_alloc_block_size", + "Allocation block size for transactions to be stored in binary log", + SESSION_VAR(trans_alloc_block_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, 128 * 1024 * 1024), DEFAULT(TRANS_ALLOC_BLOCK_SIZE), + BLOCK_SIZE(1024), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_trans_mem_root)); + +static Sys_var_ulong Sys_trans_prealloc_size( + "transaction_prealloc_size", + "Persistent buffer for transactions to be stored in binary log", + SESSION_VAR(trans_prealloc_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, 128 * 1024 * 1024), DEFAULT(TRANS_ALLOC_PREALLOC_SIZE), + BLOCK_SIZE(1024), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_trans_mem_root)); + +static const char *thread_handling_names[]= +{ + "one-thread-per-connection", "no-threads", +#ifdef HAVE_POOL_OF_THREADS + "pool-of-threads", +#endif + 0 +}; + +#if defined (_WIN32) && defined (HAVE_POOL_OF_THREADS) +/* Windows is using OS threadpool, so we're pretty sure it works well */ +#define DEFAULT_THREAD_HANDLING 2 +#else +#define DEFAULT_THREAD_HANDLING 0 +#endif + +static Sys_var_enum Sys_thread_handling( + "thread_handling", + "Define threads usage for handling queries", + READ_ONLY GLOBAL_VAR(thread_handling), CMD_LINE(REQUIRED_ARG), + thread_handling_names, + DEFAULT(DEFAULT_THREAD_HANDLING) + ); + +#ifdef HAVE_QUERY_CACHE +static bool fix_query_cache_size(sys_var *self, THD *thd, enum_var_type type) +{ + size_t new_cache_size= query_cache.resize((size_t)query_cache_size); + /* + Note: query_cache_size is a global variable reflecting the + requested cache size. See also query_cache_size_arg + */ + if (query_cache_size != new_cache_size) + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_QC_RESIZE, ER_THD(thd, ER_WARN_QC_RESIZE), + query_cache_size, (ulong)new_cache_size); + + query_cache_size= new_cache_size; + + return false; +} + +static bool fix_query_cache_limit(sys_var *self, THD *thd, enum_var_type type) +{ + query_cache.result_size_limit(query_cache_limit); + return false; +} +static Sys_var_ulonglong Sys_query_cache_size( + "query_cache_size", + "The memory allocated to store results from old queries", + GLOBAL_VAR(query_cache_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(1024*1024), BLOCK_SIZE(1024), + NO_MUTEX_GUARD, NOT_IN_BINLOG, NULL, + ON_UPDATE(fix_query_cache_size)); + +static Sys_var_ulong Sys_query_cache_limit( + "query_cache_limit", + "Don't cache results that are bigger than this", + GLOBAL_VAR(query_cache_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(1024*1024), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_query_cache_limit)); + +static bool fix_qcache_min_res_unit(sys_var *self, THD *thd, enum_var_type type) +{ + query_cache_min_res_unit= + (ulong)query_cache.set_min_res_unit(query_cache_min_res_unit); + return false; +} +static Sys_var_ulong Sys_query_cache_min_res_unit( + "query_cache_min_res_unit", + "The minimum size for blocks allocated by the query cache", + GLOBAL_VAR(query_cache_min_res_unit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(QUERY_CACHE_MIN_RESULT_DATA_SIZE), + BLOCK_SIZE(8), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_qcache_min_res_unit)); + +static const char *query_cache_type_names[]= { "OFF", "ON", "DEMAND", 0 }; + +static bool check_query_cache_type(sys_var *self, THD *thd, set_var *var) +{ + if (query_cache.is_disable_in_progress()) + { + my_error(ER_QUERY_CACHE_IS_DISABLED, MYF(0)); + return true; + } + + if (var->type != OPT_GLOBAL && global_system_variables.query_cache_type == 0) + { + if (var->value) + { + if (var->save_result.ulonglong_value != 0) + { + my_error(ER_QUERY_CACHE_IS_GLOBALY_DISABLED, MYF(0)); + return true; + } + } + } + return false; +} + + +static bool fix_query_cache_type(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + return false; + + if (global_system_variables.query_cache_type != 0 && + query_cache.is_disabled()) + { + /* if disabling in progress variable will not be set */ + DBUG_ASSERT(!query_cache.is_disable_in_progress()); + /* Enable query cache because it was disabled */ + fix_query_cache_size(0, thd, type); + } + else if (global_system_variables.query_cache_type == 0) + query_cache.disable_query_cache(thd); + return false; +} +static Sys_var_enum Sys_query_cache_type( + "query_cache_type", + "OFF = Don't cache or retrieve results. ON = Cache all results " + "except SELECT SQL_NO_CACHE ... queries. DEMAND = Cache only " + "SELECT SQL_CACHE ... queries", + NO_SET_STMT SESSION_VAR(query_cache_type), CMD_LINE(REQUIRED_ARG), + query_cache_type_names, DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_query_cache_type), + ON_UPDATE(fix_query_cache_type)); + +static Sys_var_mybool Sys_query_cache_wlock_invalidate( + "query_cache_wlock_invalidate", + "Invalidate queries in query cache on LOCK for write", + SESSION_VAR(query_cache_wlock_invalidate), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); +#endif /* HAVE_QUERY_CACHE */ + +static Sys_var_on_access_global +Sys_secure_auth( + "secure_auth", + "Disallow authentication for accounts that have old (pre-4.1) " + "passwords", + GLOBAL_VAR(opt_secure_auth), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + +static bool check_require_secure_transport(sys_var *self, THD *thd, set_var *var) +{ +#ifndef _WIN32 + /* + Always allow require_secure_transport to be enabled on + Linux, because it always has Unix domain sockets that are secure: + */ + return false; +#else + /* + Check SSL is enabled before turning require_secure_transport ON, + otherwise no connections will be allowed on Windows: + */ + if (!var->save_result.ulonglong_value) + return false; + if (opt_use_ssl || opt_enable_named_pipe) + return false; + /* reject if SSL is disabled: */ + my_error(ER_NO_SECURE_TRANSPORTS_CONFIGURED, MYF(0)); + return true; +#endif +} + +static Sys_var_mybool Sys_require_secure_transport( + "require_secure_transport", + "When this option is enabled, connections attempted using insecure " + "transport will be rejected. Secure transports are SSL/TLS, " + "Unix sockets or named pipes.", + GLOBAL_VAR(opt_require_secure_transport), + CMD_LINE(OPT_ARG), + DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_require_secure_transport), ON_UPDATE(0)); + +static Sys_var_charptr_fscs Sys_secure_file_priv( + "secure_file_priv", + "Limit LOAD DATA, SELECT ... OUTFILE, and LOAD_FILE() to files " + "within specified directory", + PREALLOCATED READ_ONLY GLOBAL_VAR(opt_secure_file_priv), + CMD_LINE(REQUIRED_ARG, OPT_SEQURE_FILE_PRIV), DEFAULT(0)); + +static bool check_server_id(sys_var *self, THD *thd, set_var *var) +{ +#ifdef WITH_WSREP + if (WSREP_ON && WSREP_PROVIDER_EXISTS && !wsrep_new_cluster && wsrep_gtid_mode) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Can't change server_id because wsrep and wsrep_gtid_mode is set." + " You can set server_id only with wsrep_new_cluster. "); + return true; + } +#endif /* WITH_WSREP */ + return false; +} + +static bool fix_server_id(sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_GLOBAL) + { + thd->variables.server_id= global_system_variables.server_id; + /* + Historically, server_id was a global variable that is exported to + plugins. Now it is a session variable, and lives in the + global_system_variables struct, but we still need to export the + value for reading to plugins for backwards compatibility reasons. + */ + ::server_id= global_system_variables.server_id; + } + return false; +} +static Sys_var_on_access +Sys_server_id( + "server_id", + "Uniquely identifies the server instance in the community of " + "replication partners", + SESSION_VAR(server_id), CMD_LINE(REQUIRED_ARG, OPT_SERVER_ID), + VALID_RANGE(1, UINT_MAX32), DEFAULT(1), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_server_id), ON_UPDATE(fix_server_id)); + +static Sys_var_on_access_global +Sys_slave_compressed_protocol( + "slave_compressed_protocol", + "Use compression on master/slave protocol", + GLOBAL_VAR(opt_slave_compressed_protocol), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +#ifdef HAVE_REPLICATION +static const char *slave_exec_mode_names[]= {"STRICT", "IDEMPOTENT", 0}; +static Sys_var_on_access_global +Slave_exec_mode( + "slave_exec_mode", + "How replication events should be executed. Legal values " + "are STRICT (default) and IDEMPOTENT. In IDEMPOTENT mode, " + "replication will not stop for operations that are idempotent. " + "For example, in row based replication attempts to delete rows that " + "doesn't exist will be ignored. " + "In STRICT mode, replication will stop on any unexpected difference " + "between the master and the slave.", + GLOBAL_VAR(slave_exec_mode_options), CMD_LINE(REQUIRED_ARG), + slave_exec_mode_names, DEFAULT(SLAVE_EXEC_MODE_STRICT)); + +static Sys_var_on_access_global +Slave_ddl_exec_mode( + "slave_ddl_exec_mode", + "How replication events should be executed. Legal values " + "are STRICT and IDEMPOTENT (default). In IDEMPOTENT mode, " + "replication will not stop for DDL operations that are idempotent. " + "This means that CREATE TABLE is treated as CREATE TABLE OR REPLACE and " + "DROP TABLE is treated as DROP TABLE IF EXISTS.", + GLOBAL_VAR(slave_ddl_exec_mode_options), CMD_LINE(REQUIRED_ARG), + slave_exec_mode_names, DEFAULT(SLAVE_EXEC_MODE_IDEMPOTENT)); + +static const char *slave_run_triggers_for_rbr_names[]= + {"NO", "YES", "LOGGING", "ENFORCE", 0}; +static Sys_var_on_access_global +Slave_run_triggers_for_rbr( + "slave_run_triggers_for_rbr", + "Modes for how triggers in row-base replication on slave side will be " + "executed. Legal values are NO (default), YES, LOGGING and ENFORCE. NO means " + "that trigger for RBR will not be running on slave. YES and LOGGING " + "means that triggers will be running on slave, if there was not " + "triggers running on the master for the statement. LOGGING also means " + "results of that the executed triggers work will be written to " + "the binlog. ENFORCE means that triggers will always be run on the slave, " + "even if there are triggers on the master. ENFORCE implies LOGGING.", + GLOBAL_VAR(slave_run_triggers_for_rbr), CMD_LINE(REQUIRED_ARG), + slave_run_triggers_for_rbr_names, + DEFAULT(SLAVE_RUN_TRIGGERS_FOR_RBR_NO)); + +static const char *slave_type_conversions_name[]= {"ALL_LOSSY", "ALL_NON_LOSSY", 0}; +static Sys_var_on_access_global +Slave_type_conversions( + "slave_type_conversions", + "Set of slave type conversions that are enabled." + " If the variable is empty, no conversions are" + " allowed and it is expected that the types match exactly", + GLOBAL_VAR(slave_type_conversions_options), CMD_LINE(REQUIRED_ARG), + slave_type_conversions_name, + DEFAULT(0)); + +static Sys_var_on_access_global +Sys_slave_sql_verify_checksum( + "slave_sql_verify_checksum", + "Force checksum verification of replication events after reading them " + "from relay log. Note: Events are always checksum-verified by slave on " + "receiving them from the network before writing them to the relay log", + GLOBAL_VAR(opt_slave_sql_verify_checksum), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + +static Sys_var_on_access_global +Sys_master_verify_checksum( + "master_verify_checksum", + "Force checksum verification of logged events in the binary log before " + "sending them to slaves or printing them in the output of " + "SHOW BINLOG EVENTS", + GLOBAL_VAR(opt_master_verify_checksum), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +/* These names must match RPL_SKIP_XXX #defines in slave.h. */ +static const char *replicate_events_marked_for_skip_names[]= { + "REPLICATE", "FILTER_ON_SLAVE", "FILTER_ON_MASTER", 0 +}; + +bool +Sys_var_replicate_events_marked_for_skip::global_update(THD *thd, set_var *var) +{ + bool result= true; // Assume error + DBUG_ENTER("Sys_var_replicate_events_marked_for_skip::global_update"); + + mysql_mutex_unlock(&LOCK_global_system_variables); + if (!give_error_if_slave_running(0)) + result= Sys_var_enum::global_update(thd, var); + mysql_mutex_lock(&LOCK_global_system_variables); + DBUG_RETURN(result); +} + +static Sys_var_on_access_global +Replicate_events_marked_for_skip + ("replicate_events_marked_for_skip", + "Whether the slave should replicate events that were created with " + "@@skip_replication=1 on the master. Default REPLICATE (no events are " + "skipped). Other values are FILTER_ON_SLAVE (events will be sent by the " + "master but ignored by the slave) and FILTER_ON_MASTER (events marked with " + "@@skip_replication=1 will be filtered on the master and never be sent to " + "the slave).", + GLOBAL_VAR(opt_replicate_events_marked_for_skip), CMD_LINE(REQUIRED_ARG), + replicate_events_marked_for_skip_names, DEFAULT(RPL_SKIP_REPLICATE)); + +/* new options for semisync */ + +static bool fix_rpl_semi_sync_master_enabled(sys_var *self, THD *thd, + enum_var_type type) +{ + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&repl_semisync_master.LOCK_rpl_semi_sync_master_enabled); + if (rpl_semi_sync_master_enabled) + { + if (repl_semisync_master.enable_master() != 0) + rpl_semi_sync_master_enabled= false; + else if (ack_receiver.start()) + { + repl_semisync_master.disable_master(); + rpl_semi_sync_master_enabled= false; + } + } + else + { + repl_semisync_master.disable_master(); + ack_receiver.stop(); + } + mysql_mutex_unlock(&repl_semisync_master.LOCK_rpl_semi_sync_master_enabled); + mysql_mutex_lock(&LOCK_global_system_variables); + return false; +} + +static bool fix_rpl_semi_sync_master_timeout(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_master.set_wait_timeout(rpl_semi_sync_master_timeout); + return false; +} + +static bool fix_rpl_semi_sync_master_trace_level(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_master.set_trace_level(rpl_semi_sync_master_trace_level); + ack_receiver.set_trace_level(rpl_semi_sync_master_trace_level); + return false; +} + +static bool fix_rpl_semi_sync_master_wait_point(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_master.set_wait_point(rpl_semi_sync_master_wait_point); + return false; +} + +static bool fix_rpl_semi_sync_master_wait_no_slave(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_master.check_and_switch(); + return false; +} + +static Sys_var_on_access_global +Sys_semisync_master_enabled( + "rpl_semi_sync_master_enabled", + "Enable semi-synchronous replication master (disabled by default).", + GLOBAL_VAR(rpl_semi_sync_master_enabled), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_master_enabled)); + +static Sys_var_on_access_global +Sys_semisync_master_timeout( + "rpl_semi_sync_master_timeout", + "The timeout value (in ms) for semi-synchronous replication in the " + "master", + GLOBAL_VAR(rpl_semi_sync_master_timeout), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,~0L),DEFAULT(10000),BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_master_timeout)); + +static Sys_var_on_access_global +Sys_semisync_master_wait_no_slave( + "rpl_semi_sync_master_wait_no_slave", + "Wait until timeout when no semi-synchronous replication slave " + "available (enabled by default).", + GLOBAL_VAR(rpl_semi_sync_master_wait_no_slave), + CMD_LINE(OPT_ARG), DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_master_wait_no_slave)); + +static Sys_var_on_access_global +Sys_semisync_master_trace_level( + "rpl_semi_sync_master_trace_level", + "The tracing level for semi-sync replication.", + GLOBAL_VAR(rpl_semi_sync_master_trace_level), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,~0L),DEFAULT(32),BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_master_trace_level)); + +static const char *repl_semisync_wait_point[]= +{"AFTER_SYNC", "AFTER_COMMIT", NullS}; + +static Sys_var_on_access_global +Sys_semisync_master_wait_point( + "rpl_semi_sync_master_wait_point", + "Should transaction wait for semi-sync ack after having synced binlog, " + "or after having committed in storage engine.", + GLOBAL_VAR(rpl_semi_sync_master_wait_point), CMD_LINE(REQUIRED_ARG), + repl_semisync_wait_point, DEFAULT(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG,ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_master_wait_point)); + +static bool fix_rpl_semi_sync_slave_enabled(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_slave.set_slave_enabled(rpl_semi_sync_slave_enabled != 0); + return false; +} + +static bool fix_rpl_semi_sync_slave_trace_level(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_slave.set_trace_level(rpl_semi_sync_slave_trace_level); + return false; +} + +static bool fix_rpl_semi_sync_slave_delay_master(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_slave.set_delay_master(rpl_semi_sync_slave_delay_master); + return false; +} + +static bool fix_rpl_semi_sync_slave_kill_conn_timeout(sys_var *self, THD *thd, + enum_var_type type) +{ + repl_semisync_slave. + set_kill_conn_timeout(rpl_semi_sync_slave_kill_conn_timeout); + return false; +} + +static Sys_var_on_access_global +Sys_semisync_slave_enabled( + "rpl_semi_sync_slave_enabled", + "Enable semi-synchronous replication slave (disabled by default).", + GLOBAL_VAR(rpl_semi_sync_slave_enabled), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_slave_enabled)); + +static Sys_var_on_access_global +Sys_semisync_slave_trace_level( + "rpl_semi_sync_slave_trace_level", + "The tracing level for semi-sync replication.", + GLOBAL_VAR(rpl_semi_sync_slave_trace_level), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,~0L),DEFAULT(32),BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_slave_trace_level)); + +static Sys_var_on_access_global +Sys_semisync_slave_delay_master( + "rpl_semi_sync_slave_delay_master", + "Only write master info file when ack is needed.", + GLOBAL_VAR(rpl_semi_sync_slave_delay_master), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_slave_delay_master)); + +static Sys_var_on_access_global +Sys_semisync_slave_kill_conn_timeout( + "rpl_semi_sync_slave_kill_conn_timeout", + "Timeout for the mysql connection used to kill the slave io_thread's " + "connection on master. This timeout comes into play when stop slave " + "is executed.", + GLOBAL_VAR(rpl_semi_sync_slave_kill_conn_timeout), + CMD_LINE(OPT_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_rpl_semi_sync_slave_kill_conn_timeout)); +#endif /* HAVE_REPLICATION */ + +static Sys_var_on_access_global +Sys_slow_launch_time( + "slow_launch_time", + "If creating the thread takes longer than this value (in seconds), " + "the Slow_launch_threads counter will be incremented", + GLOBAL_VAR(slow_launch_time), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(2), BLOCK_SIZE(1)); + +static Sys_var_ulonglong Sys_sort_buffer( + "sort_buffer_size", + "Each thread that needs to do a sort allocates a buffer of this size", + SESSION_VAR(sortbuff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(MIN_SORT_MEMORY, SIZE_T_MAX), DEFAULT(MAX_SORT_MEMORY), + BLOCK_SIZE(1)); + +export sql_mode_t expand_sql_mode(sql_mode_t sql_mode) +{ + if (sql_mode & MODE_ANSI) + { + /* + Note that we don't set + MODE_NO_KEY_OPTIONS | MODE_NO_TABLE_OPTIONS | MODE_NO_FIELD_OPTIONS + to allow one to get full use of MySQL in this mode. + + MODE_ONLY_FULL_GROUP_BY was removed from ANSI mode because it is + currently overly restrictive (see BUG#8510). + */ + sql_mode|= (MODE_REAL_AS_FLOAT | MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE); + } + if (sql_mode & MODE_ORACLE) + sql_mode|= (MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE | + MODE_NO_KEY_OPTIONS | MODE_NO_TABLE_OPTIONS | + MODE_NO_FIELD_OPTIONS | MODE_NO_AUTO_CREATE_USER | + MODE_SIMULTANEOUS_ASSIGNMENT); + if (sql_mode & MODE_MSSQL) + sql_mode|= (MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE | + MODE_NO_KEY_OPTIONS | MODE_NO_TABLE_OPTIONS | + MODE_NO_FIELD_OPTIONS); + if (sql_mode & MODE_POSTGRESQL) + sql_mode|= (MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE | + MODE_NO_KEY_OPTIONS | MODE_NO_TABLE_OPTIONS | + MODE_NO_FIELD_OPTIONS); + if (sql_mode & MODE_DB2) + sql_mode|= (MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE | + MODE_NO_KEY_OPTIONS | MODE_NO_TABLE_OPTIONS | + MODE_NO_FIELD_OPTIONS); + if (sql_mode & MODE_MAXDB) + sql_mode|= (MODE_PIPES_AS_CONCAT | MODE_ANSI_QUOTES | + MODE_IGNORE_SPACE | + MODE_NO_KEY_OPTIONS | MODE_NO_TABLE_OPTIONS | + MODE_NO_FIELD_OPTIONS | MODE_NO_AUTO_CREATE_USER); + if (sql_mode & MODE_MYSQL40) + sql_mode|= MODE_HIGH_NOT_PRECEDENCE; + if (sql_mode & MODE_MYSQL323) + sql_mode|= MODE_HIGH_NOT_PRECEDENCE; + if (sql_mode & MODE_TRADITIONAL) + sql_mode|= (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES | + MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | + MODE_ERROR_FOR_DIVISION_BY_ZERO | MODE_NO_AUTO_CREATE_USER | + MODE_NO_ENGINE_SUBSTITUTION); + return sql_mode; +} +static bool check_sql_mode(sys_var *self, THD *thd, set_var *var) +{ + var->save_result.ulonglong_value= + (ulonglong) expand_sql_mode(var->save_result.ulonglong_value); + return false; +} +static bool fix_sql_mode(sys_var *self, THD *thd, enum_var_type type) +{ + if (type != OPT_GLOBAL) + { + /* Update thd->server_status */ + if (thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) + thd->server_status|= SERVER_STATUS_NO_BACKSLASH_ESCAPES; + else + thd->server_status&= ~SERVER_STATUS_NO_BACKSLASH_ESCAPES; + if (thd->variables.sql_mode & MODE_ANSI_QUOTES) + thd->server_status|= SERVER_STATUS_ANSI_QUOTES; + else + thd->server_status&= ~SERVER_STATUS_ANSI_QUOTES; + } + return false; +} +/* + WARNING: When adding new SQL modes don't forget to update the + tables definitions that stores it's value (ie: mysql.event, mysql.proc) +*/ +static const char *sql_mode_names[]= +{ + "REAL_AS_FLOAT", "PIPES_AS_CONCAT", "ANSI_QUOTES", "IGNORE_SPACE", + "IGNORE_BAD_TABLE_OPTIONS", + "ONLY_FULL_GROUP_BY", "NO_UNSIGNED_SUBTRACTION", "NO_DIR_IN_CREATE", + "POSTGRESQL", "ORACLE", "MSSQL", "DB2", "MAXDB", "NO_KEY_OPTIONS", + "NO_TABLE_OPTIONS", "NO_FIELD_OPTIONS", "MYSQL323", "MYSQL40", "ANSI", + "NO_AUTO_VALUE_ON_ZERO", "NO_BACKSLASH_ESCAPES", "STRICT_TRANS_TABLES", + "STRICT_ALL_TABLES", "NO_ZERO_IN_DATE", "NO_ZERO_DATE", + "ALLOW_INVALID_DATES", "ERROR_FOR_DIVISION_BY_ZERO", "TRADITIONAL", + "NO_AUTO_CREATE_USER", "HIGH_NOT_PRECEDENCE", "NO_ENGINE_SUBSTITUTION", + "PAD_CHAR_TO_FULL_LENGTH", "EMPTY_STRING_IS_NULL", "SIMULTANEOUS_ASSIGNMENT", + "TIME_ROUND_FRACTIONAL", + 0 +}; + + +const char *sql_mode_string_representation(uint bit_number) +{ + DBUG_ASSERT(bit_number < array_elements(sql_mode_names)); + return sql_mode_names[bit_number]; +} + + +export bool sql_mode_string_representation(THD *thd, sql_mode_t sql_mode, + LEX_CSTRING *ls) +{ + set_to_string(thd, ls, sql_mode, sql_mode_names); + return ls->str == 0; +} +/* + sql_mode should *not* be IN_BINLOG: even though it is written to the binlog, + the slave ignores the MODE_NO_DIR_IN_CREATE variable, so slave's value + differs from master's (see log_event.cc: Query_log_event::do_apply_event()). +*/ +static Sys_var_set Sys_sql_mode( + "sql_mode", + "Sets the sql mode", + SESSION_VAR(sql_mode), CMD_LINE(REQUIRED_ARG), + sql_mode_names, + DEFAULT(MODE_STRICT_TRANS_TABLES | + MODE_ERROR_FOR_DIVISION_BY_ZERO | + MODE_NO_ENGINE_SUBSTITUTION | + MODE_NO_AUTO_CREATE_USER), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_sql_mode), ON_UPDATE(fix_sql_mode)); + +static const char *old_mode_names[]= +{ + "NO_DUP_KEY_WARNINGS_WITH_IGNORE", + "NO_PROGRESS_INFO", + "ZERO_DATE_TIME_CAST", + "UTF8_IS_UTF8MB3", + "IGNORE_INDEX_ONLY_FOR_JOIN", + "COMPAT_5_1_CHECKSUM", + 0 +}; + +/* + sql_mode should *not* be IN_BINLOG as the slave can't remember this + anyway on restart. +*/ +static Sys_var_set Sys_old_behavior( + "old_mode", + "Used to emulate old behavior from earlier MariaDB or MySQL versions", + SESSION_VAR(old_behavior), CMD_LINE(REQUIRED_ARG), + old_mode_names, DEFAULT(OLD_MODE_UTF8_IS_UTF8MB3)); + +#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) +#define SSL_OPT(X) CMD_LINE(REQUIRED_ARG,X) +#else +#define SSL_OPT(X) NO_CMD_LINE +#endif + +static Sys_var_charptr_fscs Sys_ssl_ca( + "ssl_ca", + "CA file in PEM format (check OpenSSL docs, implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_ca), SSL_OPT(OPT_SSL_CA), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_ssl_capath( + "ssl_capath", + "CA directory (check OpenSSL docs, implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_capath), SSL_OPT(OPT_SSL_CAPATH), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_ssl_cert( + "ssl_cert", "X509 cert in PEM format (implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_cert), SSL_OPT(OPT_SSL_CERT), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_ssl_cipher( + "ssl_cipher", "SSL cipher to use (implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_cipher), SSL_OPT(OPT_SSL_CIPHER), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_ssl_key( + "ssl_key", "X509 key in PEM format (implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_key), SSL_OPT(OPT_SSL_KEY), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_ssl_crl( + "ssl_crl", + "CRL file in PEM format (check OpenSSL docs, implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_crl), SSL_OPT(OPT_SSL_CRL), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_ssl_crlpath( + "ssl_crlpath", + "CRL directory (check OpenSSL docs, implies --ssl)", + READ_ONLY GLOBAL_VAR(opt_ssl_crlpath), SSL_OPT(OPT_SSL_CRLPATH), + DEFAULT(0)); + +static const char *tls_version_names[]= +{ + "TLSv1.0", + "TLSv1.1", + "TLSv1.2", + "TLSv1.3", + 0 +}; + +export bool tls_version_string_representation(THD *thd, sql_mode_t sql_mode, + LEX_CSTRING *ls) +{ + set_to_string(thd, ls, tls_version, tls_version_names); + return ls->str == 0; +} + +static Sys_var_set Sys_tls_version( + "tls_version", + "TLS protocol version for secure connections.", + READ_ONLY GLOBAL_VAR(tls_version), CMD_LINE(REQUIRED_ARG), + tls_version_names, + DEFAULT(VIO_TLSv1_2 | VIO_TLSv1_3)); + +static Sys_var_mybool Sys_standard_compliant_cte( + "standard_compliant_cte", + "Allow only CTEs compliant to SQL standard", + SESSION_VAR(only_standard_compliant_cte), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + + +// why ENUM and not BOOL ? +static const char *updatable_views_with_limit_names[]= {"NO", "YES", 0}; +static Sys_var_enum Sys_updatable_views_with_limit( + "updatable_views_with_limit", + "YES = Don't issue an error message (warning only) if a VIEW without " + "presence of a key of the underlying table is used in queries with a " + "LIMIT clause for updating. NO = Prohibit update of a VIEW, which " + "does not contain a key of the underlying table and the query uses " + "a LIMIT clause (usually get from GUI tools)", + SESSION_VAR(updatable_views_with_limit), CMD_LINE(REQUIRED_ARG), + updatable_views_with_limit_names, DEFAULT(TRUE)); + +static Sys_var_mybool Sys_sync_frm( + "sync_frm", "Sync .frm files to disk on creation", + GLOBAL_VAR(opt_sync_frm), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + +static char *system_time_zone_ptr; +static Sys_var_charptr Sys_system_time_zone( + "system_time_zone", "The server system time zone", + READ_ONLY GLOBAL_VAR(system_time_zone_ptr), + CMD_LINE_HELP_ONLY, + DEFAULT(system_time_zone)); + +/* + If One use views with prepared statements this should be bigger than + table_open_cache (now we allow 2 times bigger value) +*/ +static Sys_var_ulong Sys_table_def_size( + "table_definition_cache", + "The number of cached table definitions", + GLOBAL_VAR(tdc_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(TABLE_DEF_CACHE_MIN, 2*1024*1024), + DEFAULT(TABLE_DEF_CACHE_DEFAULT), BLOCK_SIZE(1)); + + +static bool fix_table_open_cache(sys_var *, THD *, enum_var_type) +{ + mysql_mutex_unlock(&LOCK_global_system_variables); + tc_purge(); + mysql_mutex_lock(&LOCK_global_system_variables); + return false; +} + +/* Check the table_definition_cache comment if makes changes */ +static Sys_var_ulong Sys_table_cache_size( + "table_open_cache", "The number of cached open tables", + GLOBAL_VAR(tc_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(10, 1024*1024), DEFAULT(TABLE_OPEN_CACHE_DEFAULT), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_table_open_cache)); + +static Sys_var_uint Sys_table_cache_instances( + "table_open_cache_instances", "Maximum number of table cache instances", + READ_ONLY GLOBAL_VAR(tc_instances), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 64), DEFAULT(8), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_thread_cache_size( + "thread_cache_size", + "How many threads we should keep in a cache for reuse. These are freed after 5 minutes of idle time", + GLOBAL_VAR(thread_cache_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 16384), DEFAULT(256), BLOCK_SIZE(1)); + +#ifdef HAVE_POOL_OF_THREADS +static bool fix_tp_max_threads(sys_var *, THD *, enum_var_type) +{ + tp_set_max_threads(threadpool_max_threads); + return false; +} + + +#ifdef _WIN32 +static bool fix_tp_min_threads(sys_var *, THD *, enum_var_type) +{ + tp_set_min_threads(threadpool_min_threads); + return false; +} +#endif + +static bool check_threadpool_size(sys_var *self, THD *thd, set_var *var) +{ + +#ifdef _WIN32 + if (threadpool_mode != TP_MODE_GENERIC) + return false; +#endif + + ulonglong v= var->save_result.ulonglong_value; + if (v > threadpool_max_size) + { + var->save_result.ulonglong_value= threadpool_max_size; + return throw_bounds_warning(thd, self->name.str, true, true, v); + } + return false; +} + + +static bool fix_threadpool_size(sys_var*, THD*, enum_var_type) +{ + tp_set_threadpool_size(threadpool_size); + return false; +} + + +static bool fix_threadpool_stall_limit(sys_var*, THD*, enum_var_type) +{ + tp_set_threadpool_stall_limit(threadpool_stall_limit); + return false; +} + +#ifdef _WIN32 +static Sys_var_on_access_global +Sys_threadpool_min_threads( + "thread_pool_min_threads", + "Minimum number of threads in the thread pool.", + GLOBAL_VAR(threadpool_min_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 256), DEFAULT(1), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_tp_min_threads) + ); + +static const char *threadpool_mode_names[]={ "windows", "generic", 0 }; +static Sys_var_on_access_global +Sys_threadpool_mode( + "thread_pool_mode", + "Chose implementation of the threadpool", + READ_ONLY GLOBAL_VAR(threadpool_mode), CMD_LINE(REQUIRED_ARG), + threadpool_mode_names, DEFAULT(TP_MODE_WINDOWS) + ); +#endif + +static const char *threadpool_priority_names[]={ "high", "low", "auto", 0 }; +static Sys_var_on_access_global +Sys_thread_pool_priority( + "thread_pool_priority", + "Threadpool priority. High priority connections usually start executing earlier than low priority." + "If priority set to 'auto', the the actual priority(low or high) is determined based on whether or not connection is inside transaction.", + SESSION_VAR(threadpool_priority), CMD_LINE(REQUIRED_ARG), + threadpool_priority_names, DEFAULT(TP_PRIORITY_AUTO)); + +static Sys_var_on_access_global +Sys_threadpool_idle_thread_timeout( + "thread_pool_idle_timeout", + "Timeout in seconds for an idle thread in the thread pool." + "Worker thread will be shut down after timeout", + GLOBAL_VAR(threadpool_idle_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(60), BLOCK_SIZE(1) +); +static Sys_var_on_access_global +Sys_threadpool_oversubscribe( + "thread_pool_oversubscribe", + "How many additional active worker threads in a group are allowed.", + GLOBAL_VAR(threadpool_oversubscribe), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 1000), DEFAULT(3), BLOCK_SIZE(1) +); +static Sys_var_on_access_global +Sys_threadpool_size( + "thread_pool_size", + "Number of thread groups in the pool. " + "This parameter is roughly equivalent to maximum number of concurrently " + "executing threads (threads in a waiting state do not count as executing).", + GLOBAL_VAR(threadpool_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, MAX_THREAD_GROUPS), DEFAULT(8), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_threadpool_size), + ON_UPDATE(fix_threadpool_size) +); +static Sys_var_on_access_global +Sys_threadpool_stall_limit( + "thread_pool_stall_limit", + "Maximum query execution time in milliseconds," + "before an executing non-yielding thread is considered stalled." + "If a worker thread is stalled, additional worker thread " + "may be created to handle remaining clients.", + GLOBAL_VAR(threadpool_stall_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(DEFAULT_THREADPOOL_STALL_LIMIT), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_threadpool_stall_limit) +); + +static Sys_var_on_access_global +Sys_threadpool_max_threads( + "thread_pool_max_threads", + "Maximum allowed number of worker threads in the thread pool", + GLOBAL_VAR(threadpool_max_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 65536), DEFAULT(65536), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_tp_max_threads) +); + +static Sys_var_on_access_global +Sys_threadpool_threadpool_prio_kickup_timer( + "thread_pool_prio_kickup_timer", + "The number of milliseconds before a dequeued low-priority statement is moved to the high-priority queue", + GLOBAL_VAR(threadpool_prio_kickup_timer), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(1000), BLOCK_SIZE(1) +); + +static Sys_var_on_access_global +Sys_threadpool_exact_stats( + "thread_pool_exact_stats", + "If set to 1, provides better statistics in information_schema threadpool tables", + GLOBAL_VAR(threadpool_exact_stats), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG +); + +static Sys_var_on_access_global +Sys_threadpool_dedicated_listener( + "thread_pool_dedicated_listener", + "If set to 1,listener thread will not pick up queries", + GLOBAL_VAR(threadpool_dedicated_listener), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG +); +#endif /* HAVE_POOL_OF_THREADS */ + +/** + Can't change the 'next' tx_isolation if we are already in a + transaction. +*/ + +static bool check_tx_isolation(sys_var *self, THD *thd, set_var *var) +{ + if (var->type == OPT_DEFAULT && thd->in_active_multi_stmt_transaction()) + { + DBUG_ASSERT(thd->in_multi_stmt_transaction_mode()); + my_error(ER_CANT_CHANGE_TX_CHARACTERISTICS, MYF(0)); + return TRUE; + } + return FALSE; +} + +// NO_CMD_LINE - different name of the option +static Sys_var_tx_isolation Sys_tx_isolation( + "tx_isolation", "Default transaction isolation level", + NO_SET_STMT SESSION_VAR(tx_isolation), NO_CMD_LINE, + tx_isolation_names, DEFAULT(ISO_REPEATABLE_READ), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_tx_isolation)); + + +/** + Can't change the tx_read_only state if we are already in a + transaction. +*/ + +static bool check_tx_read_only(sys_var *self, THD *thd, set_var *var) +{ + if (var->type == OPT_DEFAULT && thd->in_active_multi_stmt_transaction()) + { + DBUG_ASSERT(thd->in_multi_stmt_transaction_mode()); + my_error(ER_CANT_CHANGE_TX_CHARACTERISTICS, MYF(0)); + return true; + } + return false; +} + + +bool Sys_var_tx_read_only::session_update(THD *thd, set_var *var) +{ + if (var->type == OPT_SESSION && Sys_var_mybool::session_update(thd, var)) + return true; + if (var->type == OPT_DEFAULT || !thd->in_active_multi_stmt_transaction()) + { + // @see Sys_var_tx_isolation::session_update() above for the rules. + thd->tx_read_only= var->save_result.ulonglong_value; + +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + { + if (var->type == OPT_DEFAULT) + thd->session_tracker.transaction_info.set_read_flags(thd, + thd->tx_read_only ? TX_READ_ONLY : TX_READ_WRITE); + else + thd->session_tracker.transaction_info.set_read_flags(thd, + TX_READ_INHERIT); + } +#endif //EMBEDDED_LIBRARY + } + return false; +} + + +static Sys_var_tx_read_only Sys_tx_read_only( + "tx_read_only", "Default transaction access mode. If set to OFF, " + "the default, access is read/write. If set to ON, access is read-only. " + "The SET TRANSACTION statement can also change the value of this variable. " + "See SET TRANSACTION and START TRANSACTION.", + SESSION_VAR(tx_read_only), NO_CMD_LINE, DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_tx_read_only)); + +static Sys_var_ulonglong Sys_tmp_table_size( + "tmp_table_size", + "Alias for tmp_memory_table_size. " + "If an internal in-memory temporary table exceeds this size, MariaDB " + "will automatically convert it to an on-disk MyISAM or Aria table.", + SESSION_VAR(tmp_memory_table_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, (ulonglong)~(intptr)0), DEFAULT(16*1024*1024), + BLOCK_SIZE(1)); + +static Sys_var_ulonglong Sys_tmp_memory_table_size( + "tmp_memory_table_size", + "If an internal in-memory temporary table exceeds this size, MariaDB " + "will automatically convert it to an on-disk MyISAM or Aria table. " + "Same as tmp_table_size.", + SESSION_VAR(tmp_memory_table_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, (ulonglong)~(intptr)0), DEFAULT(16*1024*1024), + BLOCK_SIZE(1)); + +static Sys_var_ulonglong Sys_tmp_disk_table_size( + "tmp_disk_table_size", + "Max size for data for an internal temporary on-disk MyISAM or Aria table.", + SESSION_VAR(tmp_disk_table_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, (ulonglong)~(intptr)0), + DEFAULT((ulonglong)~(intptr)0), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_version( + "version", "Server version number. It may also include a suffix " + "with configuration or build information. -debug indicates " + "debugging support was enabled on the server, and -log indicates " + "at least one of the binary log, general log or slow query log are " + "enabled, for example 10.1.1-MariaDB-mariadb1precise-log.", + READ_ONLY GLOBAL_VAR(server_version_ptr), + CMD_LINE_HELP_ONLY, + DEFAULT(server_version)); + +static char *server_version_comment_ptr; +static Sys_var_charptr Sys_version_comment( + "version_comment", "Value of the COMPILATION_COMMENT option " + "specified by CMake when building MariaDB, for example " + "mariadb.org binary distribution.", + READ_ONLY GLOBAL_VAR(server_version_comment_ptr), + CMD_LINE_HELP_ONLY, + DEFAULT(MYSQL_COMPILATION_COMMENT)); + +static char *server_version_compile_machine_ptr; +static Sys_var_charptr Sys_version_compile_machine( + "version_compile_machine", "The machine type or architecture " + "MariaDB was built on, for example i686.", + READ_ONLY GLOBAL_VAR(server_version_compile_machine_ptr), + CMD_LINE_HELP_ONLY, DEFAULT(DEFAULT_MACHINE)); + +static char *server_version_compile_os_ptr; +static Sys_var_charptr Sys_version_compile_os( + "version_compile_os", "Operating system that MariaDB was built " + "on, for example debian-linux-gnu.", + READ_ONLY GLOBAL_VAR(server_version_compile_os_ptr), + CMD_LINE_HELP_ONLY, + DEFAULT(SYSTEM_TYPE)); + +#include +static char *server_version_source_revision; +static Sys_var_charptr Sys_version_source_revision( + "version_source_revision", "Source control revision id for MariaDB source code", + READ_ONLY GLOBAL_VAR(server_version_source_revision), + CMD_LINE_HELP_ONLY, + DEFAULT(SOURCE_REVISION)); + +static char *malloc_library; +static Sys_var_charptr Sys_malloc_library( + "version_malloc_library", "Version of the used malloc library", + READ_ONLY GLOBAL_VAR(malloc_library), CMD_LINE_HELP_ONLY, + DEFAULT(guess_malloc_library())); + +static char *ssl_library; +static Sys_var_charptr Sys_ssl_library( + "version_ssl_library", "Version of the used SSL library", + READ_ONLY GLOBAL_VAR(ssl_library), CMD_LINE_HELP_ONLY, + DEFAULT(SSL_LIBRARY)); + +static Sys_var_ulong Sys_net_wait_timeout( + "wait_timeout", + "The number of seconds the server waits for activity on a " + "connection before closing it", + NO_SET_STMT SESSION_VAR(net_wait_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)), + DEFAULT(NET_WAIT_TIMEOUT), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_idle_transaction_timeout( + "idle_transaction_timeout", + "The number of seconds the server waits for idle transaction", + SESSION_VAR(idle_transaction_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)), + DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_idle_readonly_transaction_timeout( + "idle_readonly_transaction_timeout", + "The number of seconds the server waits for read-only idle transaction", + SESSION_VAR(idle_readonly_transaction_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)), + DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_idle_write_transaction_timeout( + "idle_write_transaction_timeout", + "The number of seconds the server waits for write idle transaction", + SESSION_VAR(idle_write_transaction_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)), + DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_plugin Sys_default_storage_engine( + "default_storage_engine", "The default storage engine for new tables", + SESSION_VAR(table_plugin), NO_CMD_LINE, + MYSQL_STORAGE_ENGINE_PLUGIN, DEFAULT(&default_storage_engine), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_not_null)); + +static Sys_var_plugin Sys_storage_engine( + "storage_engine", "Alias for @@default_storage_engine. Deprecated", + SESSION_VAR(table_plugin), NO_CMD_LINE, + MYSQL_STORAGE_ENGINE_PLUGIN, DEFAULT(&default_storage_engine), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_not_null), ON_UPDATE(0), + DEPRECATED("'@@default_storage_engine'")); // since 10.5.1 + +static Sys_var_plugin Sys_default_tmp_storage_engine( + "default_tmp_storage_engine", "The default storage engine for user-created temporary tables", + SESSION_VAR(tmp_table_plugin), NO_CMD_LINE, + MYSQL_STORAGE_ENGINE_PLUGIN, DEFAULT(&default_tmp_storage_engine), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_engine_supports_temporary)); + +static Sys_var_plugin Sys_enforce_storage_engine( + "enforce_storage_engine", "Force the use of a storage engine for new tables", + SESSION_VAR(enforced_table_plugin), + NO_CMD_LINE, MYSQL_STORAGE_ENGINE_PLUGIN, + DEFAULT(&enforced_storage_engine), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_has_super)); + + +#ifdef HAVE_REPLICATION +/* + Check + 1. Value for gtid_pos_auto_engines is not NULL. + 2. No slave SQL thread is running. +*/ +static bool +check_gtid_pos_auto_engines(sys_var *self, THD *thd, set_var *var) +{ + bool running; + bool err= false; + + DBUG_ASSERT(var->type == OPT_GLOBAL); + if (var->value && var->value->is_null()) + err= true; + else + { + running= give_error_if_slave_running(false); + if (running) + err= true; + } + return err; +} + + +static Sys_var_on_access_global +Sys_gtid_pos_auto_engines( + "gtid_pos_auto_engines", + "List of engines for which to automatically create a " + "mysql.gtid_slave_pos_ENGINE table, if a transaction using that engine " + "is replicated. This can be used to avoid introducing cross-engine " + "transactions, if engines are used different from that used by table " + "mysql.gtid_slave_pos", + GLOBAL_VAR(opt_gtid_pos_auto_plugins), NO_CMD_LINE, + DEFAULT(>id_pos_auto_engines), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_gtid_pos_auto_engines)); +#endif + + +#if defined(ENABLED_DEBUG_SYNC) +/* + Variable can be set for the session only. + + This could be changed later. Then we need to have a global array of + actions in addition to the thread local ones. SET GLOBAL would + manage the global array, SET [SESSION] the local array. A sync point + would need to look for a local and a global action. Setting and + executing of global actions need to be protected by a mutex. + + The purpose of global actions could be to allow synchronizing with + connectionless threads that cannot execute SET statements. +*/ +static Sys_var_debug_sync Sys_debug_sync( + "debug_sync", "Debug Sync Facility", + NO_SET_STMT sys_var::ONLY_SESSION, NO_CMD_LINE, + DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_has_super)); +#endif /* defined(ENABLED_DEBUG_SYNC) */ + +/** + "time_format" "date_format" "datetime_format" + + the following three variables are unused, and the source of confusion + (bug reports like "I've changed date_format, but date format hasn't changed. + I've made them read-only, to alleviate the situation somewhat. + + @todo make them NO_CMD_LINE ? +*/ +static Sys_var_charptr Sys_date_format( + "date_format", "The DATE format (ignored)", + READ_ONLY GLOBAL_VAR(global_date_format.format.str), + CMD_LINE(REQUIRED_ARG), + DEFAULT(known_date_time_formats[ISO_FORMAT].date_format), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); // since 10.1.2 + +static Sys_var_charptr Sys_datetime_format( + "datetime_format", "The DATETIME format (ignored)", + READ_ONLY GLOBAL_VAR(global_datetime_format.format.str), + CMD_LINE(REQUIRED_ARG), + DEFAULT(known_date_time_formats[ISO_FORMAT].datetime_format), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); // since 10.1.2 + +static Sys_var_charptr Sys_time_format( + "time_format", "The TIME format (ignored)", + READ_ONLY GLOBAL_VAR(global_time_format.format.str), + CMD_LINE(REQUIRED_ARG), + DEFAULT(known_date_time_formats[ISO_FORMAT].time_format), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); // since 10.1.2 + +static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_GLOBAL) + { + if (global_system_variables.option_bits & OPTION_AUTOCOMMIT) + global_system_variables.option_bits&= ~OPTION_NOT_AUTOCOMMIT; + else + global_system_variables.option_bits|= OPTION_NOT_AUTOCOMMIT; + return false; + } + + if (test_all_bits(thd->variables.option_bits, + (OPTION_AUTOCOMMIT | OPTION_NOT_AUTOCOMMIT))) + { + // activating autocommit + if (trans_commit_stmt(thd) || trans_commit(thd)) + { + thd->variables.option_bits&= ~OPTION_AUTOCOMMIT; + thd->release_transactional_locks(); + WSREP_DEBUG("autocommit, MDL TRX lock released: %lld", + (longlong) thd->thread_id); + return true; + } + /* + Don't close thread tables or release metadata locks: if we do so, we + risk releasing locks/closing tables of expressions used to assign + other variables, as in: + set @var=my_stored_function1(), @@autocommit=1, @var2=(select MY_MAX(a) + from my_table), ... + The locks will be released at statement end anyway, as SET + statement that assigns autocommit is marked to commit + transaction implicitly at the end (@sa stmt_causes_implicitcommit()). + */ + thd->variables.option_bits&= + ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX | OPTION_NOT_AUTOCOMMIT | + OPTION_GTID_BEGIN); + thd->transaction->all.modified_non_trans_table= false; + thd->transaction->all.m_unsafe_rollback_flags&= ~THD_TRANS::DID_WAIT; + thd->server_status|= SERVER_STATUS_AUTOCOMMIT; + return false; + } + + if ((thd->variables.option_bits & + (OPTION_AUTOCOMMIT |OPTION_NOT_AUTOCOMMIT)) == 0) + { + // disabling autocommit + thd->transaction->all.modified_non_trans_table= false; + thd->transaction->all.m_unsafe_rollback_flags&= ~THD_TRANS::DID_WAIT; + thd->server_status&= ~SERVER_STATUS_AUTOCOMMIT; + thd->variables.option_bits|= OPTION_NOT_AUTOCOMMIT; + return false; + } + + return false; // autocommit value wasn't changed +} + +static Sys_var_bit Sys_autocommit( + "autocommit", "If set to 1, the default, all queries are committed " + "immediately. If set to 0, they are only committed upon a COMMIT statement" + ", or rolled back with a ROLLBACK statement. If autocommit is set to 0, " + "and then changed to 1, all open transactions are immediately committed.", + NO_SET_STMT SESSION_VAR(option_bits), NO_CMD_LINE, + OPTION_AUTOCOMMIT, DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_autocommit)); +export sys_var *Sys_autocommit_ptr= &Sys_autocommit; // for sql_yacc.yy + +static Sys_var_mybool Sys_big_tables( + "big_tables", "Old variable, which if set to 1, allows large result sets " + "by saving all temporary sets to disk, avoiding 'table full' errors. No " + "longer needed, as the server now handles this automatically.", + SESSION_VAR(big_tables), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); // since 10.5.0 + +static Sys_var_bit Sys_big_selects( + "sql_big_selects", "If set to 0, MariaDB will not perform large SELECTs." + " See max_join_size for details. If max_join_size is set to anything but " + "DEFAULT, sql_big_selects is automatically set to 0. If sql_big_selects " + "is again set, max_join_size will be ignored.", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_BIG_SELECTS, + DEFAULT(FALSE)); + +static Sys_var_bit Sys_log_off( + "sql_log_off", "If set to 1 (0 is the default), no logging to the general " + "query log is done for the client. Only clients with the SUPER privilege " + "can update this variable.", + NO_SET_STMT SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_LOG_OFF, + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_has_super)); + +/** + This function sets the session variable thd->variables.sql_log_bin + to reflect changes to @@session.sql_log_bin. + + @param[IN] self A pointer to the sys_var, i.e. Sys_log_binlog. + @param[IN] type The type either session or global. + + @return @c FALSE. +*/ +static bool fix_sql_log_bin_after_update(sys_var *self, THD *thd, + enum_var_type type) +{ + DBUG_ASSERT(type == OPT_SESSION); + + thd->set_binlog_bit(); + + return FALSE; +} + +static bool check_session_only_variable(sys_var *self, THD *,set_var *var) +{ + if (unlikely(var->type == OPT_GLOBAL)) + { + my_error(ER_INCORRECT_GLOBAL_LOCAL_VAR, MYF(0), self->name.str, "SESSION"); + return true; + } + return false; +} + +/** + This function checks if the sql_log_bin can be changed, + what is possible if: + - the user is a super user; + - the set is not called from within a function/trigger; + - there is no on-going transaction. + + @param[IN] self A pointer to the sys_var, i.e. Sys_log_binlog. + @param[IN] var A pointer to the set_var created by the parser. + + @return @c FALSE if the change is allowed, otherwise @c TRUE. +*/ +static bool check_sql_log_bin(sys_var *self, THD *thd, set_var *var) +{ + if (check_session_only_variable(self, thd, var)) + return true; + + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_SQL_LOG_BIN, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SQL_LOG_BIN))) + return true; + + return false; +} + +static Sys_var_on_access +Sys_sql_log_bin( + "sql_log_bin", "If set to 0 (1 is the default), no logging to the binary " + "log is done for the client. Only clients with the SUPER privilege can " + "update this variable. Can have unintended consequences if set globally, " + "see SET SQL_LOG_BIN. Starting MariaDB 10.1.7, this variable does not " + "affect the replication of events in a Galera cluster.", + SESSION_VAR(sql_log_bin), NO_CMD_LINE, DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_sql_log_bin), + ON_UPDATE(fix_sql_log_bin_after_update)); + +static Sys_var_bit Sys_sql_warnings( + "sql_warnings", "If set to 1, single-row INSERTs will produce a string " + "containing warning information if a warning occurs.", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_WARNINGS, + DEFAULT(FALSE)); + +static Sys_var_bit Sys_sql_notes( + "sql_notes", + "If set to 1, the default, warning_count is incremented " + "each time a Note warning is encountered. If set to 0, Note warnings " + "are not recorded. mysqldump has outputs to set this variable to 0 so " + "that no unnecessary increments occur when data is reloaded. " + "See also note_verbosity, which allows one to define with notes are " + "sent.", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_SQL_NOTES, + DEFAULT(TRUE)); + +static Sys_var_bit Sys_auto_is_null( + "sql_auto_is_null", "If set to 1, the query SELECT * FROM table_name WHERE " + "auto_increment_column IS NULL will return an auto-increment that has just " + "been successfully inserted, the same as the LAST_INSERT_ID() function. Some" + " ODBC programs make use of this IS NULL comparison.", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_AUTO_IS_NULL, + DEFAULT(FALSE), NO_MUTEX_GUARD, IN_BINLOG); + +static Sys_var_bit Sys_if_exists( + "sql_if_exists", "If set to 1 adds an implicate IF EXISTS to ALTER, RENAME and DROP of TABLES, VIEWS, FUNCTIONS and PACKAGES", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_IF_EXISTS, + DEFAULT(FALSE), NO_MUTEX_GUARD, IN_BINLOG); + +static Sys_var_bit Sys_safe_updates( + "sql_safe_updates", "If set to 1, UPDATEs and DELETEs need either a key in " + "the WHERE clause, or a LIMIT clause, or else they will aborted. Prevents " + "the common mistake of accidentally deleting or updating every row in a table.", + SESSION_VAR(option_bits), CMD_LINE(OPT_ARG), OPTION_SAFE_UPDATES, + DEFAULT(FALSE)); + +static Sys_var_bit Sys_buffer_results( + "sql_buffer_result", "If set to 1 (0 is default), results from SELECT " + "statements are always placed into temporary tables. This can help the " + "server when it takes a long time to send the results to the client by " + "allowing the table locks to be freed early.", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_BUFFER_RESULT, + DEFAULT(FALSE)); + +static Sys_var_bit Sys_quote_show_create( + "sql_quote_show_create", "If set to 1, the default, the server will " + "quote identifiers for SHOW CREATE DATABASE, SHOW CREATE TABLE and " + "SHOW CREATE VIEW statements. Quoting is disabled if set to 0. Enable " + "to ensure replications works when identifiers require quoting.", + SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_QUOTE_SHOW_CREATE, + DEFAULT(TRUE)); + +static Sys_var_bit Sys_foreign_key_checks( + "foreign_key_checks", "If set to 1 (the default) foreign key constraints" + " (including ON UPDATE and ON DELETE behavior) InnoDB tables are checked," + " while if set to 0, they are not checked. 0 is not recommended for normal " + "use, though it can be useful in situations where you know the data is " + "consistent, but want to reload data in a different order from that that " + "specified by parent/child relationships. Setting this variable to 1 does " + "not retrospectively check for inconsistencies introduced while set to 0.", + SESSION_VAR(option_bits), NO_CMD_LINE, + REVERSE(OPTION_NO_FOREIGN_KEY_CHECKS), + DEFAULT(TRUE), NO_MUTEX_GUARD, IN_BINLOG); + +static Sys_var_bit Sys_unique_checks( + "unique_checks", "If set to 1, the default, secondary indexes in InnoDB " + "tables are performed. If set to 0, storage engines can (but are not " + "required to) assume that duplicate keys are not present in input data. " + "Set to 0 to speed up imports of large tables to InnoDB. The storage " + "engine will still issue a duplicate key error if it detects one, even " + "if set to 0.", + SESSION_VAR(option_bits), NO_CMD_LINE, + REVERSE(OPTION_RELAXED_UNIQUE_CHECKS), + DEFAULT(TRUE), NO_MUTEX_GUARD, IN_BINLOG); + +static Sys_var_bit Sys_no_check_constraint( + "check_constraint_checks", "check_constraint_checks", + SESSION_VAR(option_bits), NO_CMD_LINE, + REVERSE(OPTION_NO_CHECK_CONSTRAINT_CHECKS), + DEFAULT(TRUE), NO_MUTEX_GUARD, IN_BINLOG); + +#ifdef ENABLED_PROFILING +static bool update_profiling(sys_var *self, THD *thd, enum_var_type type) +{ + if (type == OPT_SESSION) + thd->profiling.reset(); + return false; +} + +static Sys_var_bit Sys_profiling( + "profiling", "If set to 1 (0 is default), statement profiling will be " + "enabled. See SHOW PROFILES and SHOW PROFILE.", + NO_SET_STMT SESSION_VAR(option_bits), NO_CMD_LINE, OPTION_PROFILING, + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_profiling)); + +static Sys_var_ulong Sys_profiling_history_size( + "profiling_history_size", "Number of statements about which profiling " + "information is maintained. If set to 0, no profiles are stored. " + "See SHOW PROFILES.", + NO_SET_STMT SESSION_VAR(profiling_history_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 100), DEFAULT(15), BLOCK_SIZE(1)); +#endif + +/* + When this is set by a connection, binlogged events will be marked with a + corresponding flag. The slave can be configured to not replicate events + so marked. + In the binlog dump thread on the master, this variable is re-used for a + related purpose: The slave sets this flag when connecting to the master to + request that the master filter out (ie. not send) any events with the flag + set, thus saving network traffic on events that would be ignored by the + slave anyway. +*/ +static bool check_skip_replication(sys_var *self, THD *thd, set_var *var) +{ + /* + We must not change @@skip_replication in the middle of a transaction or + statement, as that could result in only part of the transaction / statement + being replicated. + (This would be particularly serious if we were to replicate eg. + Rows_log_event without Table_map_log_event or transactional updates without + the COMMIT). + */ + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_SKIP_REPLICATION, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SKIP_REPLICATION))) + return 1; + + return 0; +} + +static Sys_var_bit Sys_skip_replication( + "skip_replication", "Changes are logged into the binary log with the " + "@@skip_replication flag set. Such events will not be replicated by " + "slaves that run with --replicate-events-marked-for-skip set different " + "from its default of REPLICATE. See Selectively skipping replication " + "of binlog events for more information.", + NO_SET_STMT SESSION_ONLY(option_bits), + NO_CMD_LINE, OPTION_SKIP_REPLICATION, + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_skip_replication)); + +static Sys_var_harows Sys_select_limit( + "sql_select_limit", + "The maximum number of rows to return from SELECT statements", + SESSION_VAR(select_limit), NO_CMD_LINE, + VALID_RANGE(0, HA_POS_ERROR), DEFAULT(HA_POS_ERROR), BLOCK_SIZE(1)); + +static const char *secure_timestamp_levels[]= {"NO", "SUPER", "REPLICATION", "YES", 0}; + +bool is_set_timestamp_forbidden(THD *thd) +{ + switch (opt_secure_timestamp) { + case SECTIME_NO: + return false; + case SECTIME_SUPER: + return check_global_access(thd, SUPER_ACL | BINLOG_REPLAY_ACL); + case SECTIME_REPL: + return check_global_access(thd, BINLOG_REPLAY_ACL); + case SECTIME_YES: + break; + } + char buf[1024]; + strxnmov(buf, sizeof(buf), "--secure-timestamp=", + secure_timestamp_levels[opt_secure_timestamp], NULL); + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), buf); + return true; +} + +bool Sys_var_timestamp::on_check_access_session(THD *thd) const +{ + return is_set_timestamp_forbidden(thd); +} +static Sys_var_timestamp Sys_timestamp( + "timestamp", "Set the time for this client", + sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, TIMESTAMP_MAX_VALUE), + NO_MUTEX_GUARD, IN_BINLOG); + +static bool update_last_insert_id(THD *thd, set_var *var) +{ + if (!var->value) + { + my_error(ER_NO_DEFAULT, MYF(0), var->var->name.str); + return true; + } + thd->first_successful_insert_id_in_prev_stmt= + var->save_result.ulonglong_value; + return false; +} +static ulonglong read_last_insert_id(THD *thd) +{ + return (ulonglong) thd->read_first_successful_insert_id_in_prev_stmt(); +} +static Sys_var_session_special Sys_last_insert_id( + "last_insert_id", "The value to be returned from LAST_INSERT_ID()", + sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONGLONG_MAX), BLOCK_SIZE(1), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_last_insert_id), ON_READ(read_last_insert_id)); + +// alias for last_insert_id(), Sybase-style +static Sys_var_session_special Sys_identity( + "identity", "Synonym for the last_insert_id variable", + sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONGLONG_MAX), BLOCK_SIZE(1), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_last_insert_id), ON_READ(read_last_insert_id)); + +/* + insert_id should *not* be marked as written to the binlog (i.e., it + should *not* be IN_BINLOG), because we want any statement that + refers to insert_id explicitly to be unsafe. (By "explicitly", we + mean using @@session.insert_id, whereas insert_id is used + "implicitly" when NULL value is inserted into an auto_increment + column). + + We want statements referring explicitly to @@session.insert_id to be + unsafe, because insert_id is modified internally by the slave sql + thread when NULL values are inserted in an AUTO_INCREMENT column. + This modification interfers with the value of the + @@session.insert_id variable if @@session.insert_id is referred + explicitly by an insert statement (as is seen by executing "SET + @@session.insert_id=0; CREATE TABLE t (a INT, b INT KEY + AUTO_INCREMENT); INSERT INTO t(a) VALUES (@@session.insert_id);" in + statement-based logging mode: t will be different on master and + slave). +*/ +static bool update_insert_id(THD *thd, set_var *var) +{ + /* + If we set the insert_id to the DEFAULT or 0 + it means we 'reset' it so it's value doesn't + affect the INSERT. + */ + if (!var->value || + var->save_result.ulonglong_value == 0) + thd->auto_inc_intervals_forced.empty(); + else + thd->force_one_auto_inc_interval(var->save_result.ulonglong_value); + return false; +} + +static ulonglong read_insert_id(THD *thd) +{ + return thd->auto_inc_intervals_forced.minimum(); +} + + +static Sys_var_session_special Sys_insert_id( + "insert_id", "The value to be used by the following INSERT " + "or ALTER TABLE statement when inserting an AUTO_INCREMENT value", + sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONGLONG_MAX), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_insert_id), ON_READ(read_insert_id)); + +static bool update_rand_seed1(THD *thd, set_var *var) +{ + if (!var->value) + { + my_error(ER_NO_DEFAULT, MYF(0), var->var->name.str); + return true; + } + thd->rand.seed1= (ulong) var->save_result.ulonglong_value; + return false; +} +static ulonglong read_rand_seed1(THD *thd) +{ + return thd->rand.seed1; +} +static Sys_var_session_special Sys_rand_seed1( + "rand_seed1", "Sets the internal state of the RAND() " + "generator for replication purposes", + sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONG_MAX), BLOCK_SIZE(1), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_rand_seed1), ON_READ(read_rand_seed1)); + +static bool update_rand_seed2(THD *thd, set_var *var) +{ + if (!var->value) + { + my_error(ER_NO_DEFAULT, MYF(0), var->var->name.str); + return true; + } + thd->rand.seed2= (ulong) var->save_result.ulonglong_value; + return false; +} +static ulonglong read_rand_seed2(THD *thd) +{ + return thd->rand.seed2; +} +static Sys_var_session_special Sys_rand_seed2( + "rand_seed2", "Sets the internal state of the RAND() " + "generator for replication purposes", + sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONG_MAX), BLOCK_SIZE(1), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_rand_seed2), ON_READ(read_rand_seed2)); + +static ulonglong read_error_count(THD *thd) +{ + return thd->get_stmt_da()->error_count(); +} +// this really belongs to the SHOW STATUS +static Sys_var_session_special Sys_error_count( + "error_count", "The number of errors that resulted from the " + "last statement that generated messages", + READ_ONLY sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONGLONG_MAX), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), ON_READ(read_error_count)); + +static ulonglong read_warning_count(THD *thd) +{ + return thd->get_stmt_da()->warn_count(); +} +// this really belongs to the SHOW STATUS +static Sys_var_session_special Sys_warning_count( + "warning_count", "The number of errors, warnings, and notes " + "that resulted from the last statement that generated messages", + READ_ONLY sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, ULONGLONG_MAX), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), ON_READ(read_warning_count)); + +static Sys_var_ulong Sys_default_week_format( + "default_week_format", + "The default week format used by WEEK() functions", + SESSION_VAR(default_week_format), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 7), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_group_concat_max_len( + "group_concat_max_len", + "The maximum length of the result of function GROUP_CONCAT()", + SESSION_VAR(group_concat_max_len), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(4, UINT_MAX32), DEFAULT(1024*1024), BLOCK_SIZE(1)); + +static char *glob_hostname_ptr; +static Sys_var_charptr Sys_hostname( + "hostname", "Server host name", + READ_ONLY GLOBAL_VAR(glob_hostname_ptr), NO_CMD_LINE, + DEFAULT(glob_hostname)); + +#ifndef EMBEDDED_LIBRARY +static Sys_var_charptr Sys_repl_report_host( + "report_host", + "Hostname or IP of the slave to be reported to the master during " + "slave registration. Will appear in the output of SHOW SLAVE HOSTS. " + "Leave unset if you do not want the slave to register itself with the " + "master. Note that it is not sufficient for the master to simply read " + "the IP of the slave off the socket once the slave connects. Due to " + "NAT and other routing issues, that IP may not be valid for connecting " + "to the slave from the master or other hosts", + READ_ONLY GLOBAL_VAR(report_host), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_charptr Sys_repl_report_user( + "report_user", + "The account user name of the slave to be reported to the master " + "during slave registration", + READ_ONLY GLOBAL_VAR(report_user), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_charptr Sys_repl_report_password( + "report_password", + "The account password of the slave to be reported to the master " + "during slave registration", + READ_ONLY GLOBAL_VAR(report_password), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_uint Sys_repl_report_port( + "report_port", + "Port for connecting to slave reported to the master during slave " + "registration. Set it only if the slave is listening on a non-default " + "port or if you have a special tunnel from the master or other clients " + "to the slave. If not sure, leave this option unset", + READ_ONLY GLOBAL_VAR(report_port), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1)); +#endif + +static Sys_var_mybool Sys_keep_files_on_create( + "keep_files_on_create", + "Don't overwrite stale .MYD and .MYI even if no directory is specified", + SESSION_VAR(keep_files_on_create), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), + DEPRECATED("")); // since 10.8.0 + +static char *license; +static Sys_var_charptr Sys_license( + "license", "The type of license the server has", + READ_ONLY GLOBAL_VAR(license), NO_CMD_LINE, + DEFAULT(STRINGIFY_ARG(LICENSE))); + +#include +char *my_proxy_protocol_networks; +static bool check_proxy_protocol_networks(sys_var *, THD *, set_var *var) +{ + if (!var->value) + return false; + return !proxy_protocol_networks_valid(var->save_result.string_value.str); +} + + +static bool fix_proxy_protocol_networks(sys_var *, THD *, enum_var_type) +{ + return (bool)set_proxy_protocol_networks(my_proxy_protocol_networks); +} + + +static Sys_var_on_access_global +Sys_proxy_protocol_networks( + "proxy_protocol_networks", "Enable proxy protocol for these source " + "networks. The syntax is a comma separated list of IPv4 and IPv6 " + "networks. If the network doesn't contain mask, it is considered to be " + "a single host. \"*\" represents all networks and must the only " + "directive on the line. String \"localhost\" represents non-TCP " + "local connections (Unix domain socket, Windows named pipe or shared memory).", + GLOBAL_VAR(my_proxy_protocol_networks), CMD_LINE(REQUIRED_ARG), + DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_proxy_protocol_networks), ON_UPDATE(fix_proxy_protocol_networks)); + + +static bool check_log_path(sys_var *self, THD *thd, set_var *var) +{ + if (!var->value) + return false; // DEFAULT is ok + + if (!var->save_result.string_value.str) + return true; + + LEX_STRING *val= &var->save_result.string_value; + + if (val->length > FN_REFLEN) + { // path is too long + my_error(ER_PATH_LENGTH, MYF(0), self->name.str); + return true; + } + + char path[FN_REFLEN]; + size_t path_length= unpack_filename(path, val->str); + + if (!path_length) + return true; + + if (!is_filename_allowed(var->save_result.string_value.str, + var->save_result.string_value.length, TRUE)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), + self->name.str, var->save_result.string_value.str); + return true; + } + + static const LEX_CSTRING my_cnf= { STRING_WITH_LEN("my.cnf") }; + static const LEX_CSTRING my_ini= { STRING_WITH_LEN("my.ini") }; + if (path_length >= my_cnf.length) + { + if (strcasecmp(path + path_length - my_cnf.length, my_cnf.str) == 0) + return true; // log file name ends with "my.cnf" + DBUG_ASSERT(my_cnf.length == my_ini.length); + if (strcasecmp(path + path_length - my_ini.length, my_ini.str) == 0) + return true; // log file name ends with "my.ini" + } + + MY_STAT f_stat; + + if (my_stat(path, &f_stat, MYF(0))) + { + if (!MY_S_ISREG(f_stat.st_mode) || !(f_stat.st_mode & MY_S_IWRITE)) + return true; // not a regular writable file + return false; + } + + (void) dirname_part(path, val->str, &path_length); + + if (val->length - path_length >= FN_LEN) + { // filename is too long + my_error(ER_PATH_LENGTH, MYF(0), self->name.str); + return true; + } + + if (!path_length) // no path is good path (remember, relative to datadir) + return false; + + if (my_access(path, (F_OK|W_OK))) + return true; // directory is not writable + + return false; +} +static bool fix_log(char** logname, const char* default_logname, + const char*ext, bool enabled, void (*reopen)(char*)) +{ + if (!*logname) // SET ... = DEFAULT + { + make_default_log_name(logname, ext, false); + if (!*logname) + return true; + } + logger.lock_exclusive(); + mysql_mutex_unlock(&LOCK_global_system_variables); + if (enabled) + reopen(*logname); + logger.unlock(); + mysql_mutex_lock(&LOCK_global_system_variables); + return false; +} +static void reopen_general_log(char* name) +{ + logger.get_log_file_handler()->close(0); + logger.get_log_file_handler()->open_query_log(name); +} +static bool fix_general_log_file(sys_var *self, THD *thd, enum_var_type type) +{ + return fix_log(&opt_logname, opt_log_basename, ".log", opt_log, + reopen_general_log); +} +static Sys_var_charptr_fscs Sys_general_log_path( + "general_log_file", "Log connections and queries to given file", + PREALLOCATED GLOBAL_VAR(opt_logname), CMD_LINE(REQUIRED_ARG), + DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_log_path), ON_UPDATE(fix_general_log_file)); + +static void reopen_slow_log(char* name) +{ + logger.get_slow_log_file_handler()->close(0); + logger.get_slow_log_file_handler()->open_slow_log(name); +} +static bool fix_slow_log_file(sys_var *self, THD *thd, enum_var_type type) +{ + return fix_log(&opt_slow_logname, opt_log_basename, "-slow.log", + global_system_variables.sql_log_slow, reopen_slow_log); +} + +static Sys_var_charptr_fscs Sys_slow_log_path( + "slow_query_log_file", + "Alias for log_slow_query_file. " + "Log slow queries to given log file. " + "Defaults logging to 'hostname'-slow.log. Must be enabled to activate " + "other slow log options", + PREALLOCATED GLOBAL_VAR(opt_slow_logname), CMD_LINE(REQUIRED_ARG), + DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_log_path), ON_UPDATE(fix_slow_log_file)); + +static Sys_var_charptr_fscs Sys_log_slow_query_file_name( + "log_slow_query_file", "Log slow queries to given log file. " + "Defaults logging to 'hostname'-slow.log. Must be enabled to activate " + "other slow log options", + PREALLOCATED GLOBAL_VAR(opt_slow_logname), CMD_LINE(REQUIRED_ARG), + DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_log_path), ON_UPDATE(fix_slow_log_file)); + +static Sys_var_have Sys_have_compress( + "have_compress", "If the zlib compression library is accessible to the " + "server, this will be set to YES, otherwise it will be NO. The COMPRESS() " + "and UNCOMPRESS() functions will only be available if set to YES.", + READ_ONLY GLOBAL_VAR(have_compress), NO_CMD_LINE); + +static Sys_var_have Sys_have_crypt( + "have_crypt", "If the crypt() system call is available this variable will " + "be set to YES, otherwise it will be set to NO. If set to NO, the " + "ENCRYPT() function cannot be used.", + READ_ONLY GLOBAL_VAR(have_crypt), NO_CMD_LINE); + +static Sys_var_have Sys_have_dlopen( + "have_dynamic_loading", "If the server supports dynamic loading of plugins, " + "will be set to YES, otherwise will be set to NO.", + READ_ONLY GLOBAL_VAR(have_dlopen), NO_CMD_LINE); + +static Sys_var_have Sys_have_geometry( + "have_geometry", "If the server supports spatial data types, will be set to " + "YES, otherwise will be set to NO.", + READ_ONLY GLOBAL_VAR(have_geometry), NO_CMD_LINE); + +static Sys_var_have Sys_have_openssl( + "have_openssl", "Comparing have_openssl with have_ssl will indicate whether " + "YaSSL or openssl was used. If YaSSL, have_ssl will be YES, but have_openssl " + "will be NO.", + READ_ONLY GLOBAL_VAR(have_openssl), NO_CMD_LINE); + +static Sys_var_have Sys_have_profiling( + "have_profiling", "If statement profiling is available, will be set to YES, " + "otherwise will be set to NO. See SHOW PROFILES and SHOW PROFILE.", + READ_ONLY GLOBAL_VAR(have_profiling), NO_CMD_LINE); + +static Sys_var_have Sys_have_query_cache( + "have_query_cache", "If the server supports the query cache, will be set to " + "YES, otherwise will be set to NO.", + READ_ONLY GLOBAL_VAR(have_query_cache), NO_CMD_LINE); + +static Sys_var_have Sys_have_rtree_keys( + "have_rtree_keys", "If RTREE indexes (used for spatial indexes) " + "are available, will be set to YES, otherwise will be set to NO.", + READ_ONLY GLOBAL_VAR(have_rtree_keys), NO_CMD_LINE); + +static Sys_var_have Sys_have_ssl( + "have_ssl", "If the server supports secure connections, will be set to YES, " + "otherwise will be set to NO. If set to DISABLED, the server was compiled with " + "TLS support, but was not started with TLS support (see the mysqld options). " + "See also have_openssl.", + READ_ONLY GLOBAL_VAR(have_ssl), NO_CMD_LINE); + +static Sys_var_have Sys_have_symlink( + "have_symlink", "If symbolic link support is enabled, will be set to YES, " + "otherwise will be set to NO. Required for the INDEX DIRECTORY and DATA " + "DIRECTORY table options (see CREATE TABLE) and Windows symlink support. " + "Will be set to DISABLED if the server is started with the " + "--skip-symbolic-links option.", + READ_ONLY GLOBAL_VAR(have_symlink), NO_CMD_LINE); + +#if defined __SANITIZE_ADDRESS__ || defined WITH_UBSAN || __has_feature(memory_sanitizer) + +# ifdef __SANITIZE_ADDRESS__ +# ifdef WITH_UBSAN +# define SANITIZER_MODE "ASAN,UBSAN" +# else +# define SANITIZER_MODE "ASAN" +# endif +# elif defined WITH_UBSAN +# define SANITIZER_MODE "UBSAN" +# else +# define SANITIZER_MODE "MSAN" +# endif + +static char *have_sanitizer; +static Sys_var_charptr Sys_have_santitizer( + "have_sanitizer", + "If the server is compiled with sanitize (compiler option), this " + "variable is set to the sanitizer mode used. Possible values are " + "ASAN (Address sanitizer) or UBSAN (The Undefined Behavior Sanitizer).", + READ_ONLY GLOBAL_VAR(have_sanitizer), NO_CMD_LINE, + DEFAULT(SANITIZER_MODE)); +#endif /* defined(__SANITIZE_ADDRESS__) || defined(WITH_UBSAN) */ + +static bool fix_log_state(sys_var *self, THD *thd, enum_var_type type); + +static Sys_var_mybool Sys_general_log( + "general_log", "Log connections and queries to a table or log file. " + "Defaults logging to a file 'hostname'.log or a table mysql.general_log" + "if --log-output=TABLE is used.", + GLOBAL_VAR(opt_log), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_log_state)); + +static Sys_var_mybool Sys_slow_query_log( + "slow_query_log", + "Alias for log_slow_query. " + "Log slow queries to a table or log file. Defaults logging to a file " + "'hostname'-slow.log or a table mysql.slow_log if --log-output=TABLE is " + "used. Must be enabled to activate other slow log options.", + SESSION_VAR(sql_log_slow), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), ON_UPDATE(fix_log_state)); + +static Sys_var_mybool Sys_log_slow_query( + "log_slow_query", + "Log slow queries to a table or log file. Defaults logging to a file " + "'hostname'-slow.log or a table mysql.slow_log if --log-output=TABLE is " + "used. Must be enabled to activate other slow log options.", + SESSION_VAR(sql_log_slow), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), ON_UPDATE(fix_log_state)); + +static bool fix_log_state(sys_var *self, THD *thd, enum_var_type type) +{ + bool res; + my_bool *UNINIT_VAR(newvalptr), newval, UNINIT_VAR(oldval); + uint UNINIT_VAR(log_type); + + if (type != OPT_GLOBAL) + return 0; + + if (self == &Sys_general_log) + { + newvalptr= &opt_log; + oldval= logger.get_log_file_handler()->is_open(); + log_type= QUERY_LOG_GENERAL; + } + else + { + DBUG_ASSERT(self == &Sys_slow_query_log || self == &Sys_log_slow_query); + newvalptr= &global_system_variables.sql_log_slow; + oldval= logger.get_slow_log_file_handler()->is_open(); + log_type= QUERY_LOG_SLOW; + } + + newval= *newvalptr; + if (oldval == newval) + return false; + + *newvalptr= oldval; // [de]activate_log_handler works that way (sigh) + + mysql_mutex_unlock(&LOCK_global_system_variables); + if (!newval) + { + logger.deactivate_log_handler(thd, log_type); + res= false; + } + else + res= logger.activate_log_handler(thd, log_type); + mysql_mutex_lock(&LOCK_global_system_variables); + return res; +} + + +static bool check_not_empty_set(sys_var *self, THD *thd, set_var *var) +{ + return var->save_result.ulonglong_value == 0; +} +static bool fix_log_output(sys_var *self, THD *thd, enum_var_type type) +{ + logger.lock_exclusive(); + logger.init_slow_log(log_output_options); + logger.init_general_log(log_output_options); + logger.unlock(); + return false; +} + +static const char *log_output_names[] = { "NONE", "FILE", "TABLE", NULL}; + +static Sys_var_set Sys_log_output( + "log_output", "How logs should be written", + GLOBAL_VAR(log_output_options), CMD_LINE(REQUIRED_ARG), + log_output_names, DEFAULT(LOG_FILE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_not_empty_set), ON_UPDATE(fix_log_output)); + +#ifdef HAVE_REPLICATION +static Sys_var_mybool Sys_log_slave_updates( + "log_slave_updates", "Tells the slave to log the updates from " + "the slave thread to the binary log. You will need to turn it on if " + "you plan to daisy-chain the slaves.", + READ_ONLY GLOBAL_VAR(opt_log_slave_updates), CMD_LINE(OPT_ARG), + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_relay_log( + "relay_log", "The location and name to use for relay logs.", + READ_ONLY GLOBAL_VAR(opt_relay_logname), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +/* + Uses NO_CMD_LINE since the --relay-log-index option set + opt_relaylog_index_name variable and computes a value for the + relay_log_index variable. +*/ +static Sys_var_charptr_fscs Sys_relay_log_index( + "relay_log_index", "The location and name to use for the file " + "that keeps a list of the last relay logs.", + READ_ONLY GLOBAL_VAR(relay_log_index), NO_CMD_LINE, + DEFAULT(0)); + +/* + Uses NO_CMD_LINE since the --log-bin-index option set + opt_binlog_index_name variable and computes a value for the + log_bin_index variable. +*/ +static Sys_var_charptr_fscs Sys_binlog_index( + "log_bin_index", "File that holds the names for last binary log files.", + READ_ONLY GLOBAL_VAR(log_bin_index), NO_CMD_LINE, + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_relay_log_basename( + "relay_log_basename", + "The full path of the relay log file names, excluding the extension.", + READ_ONLY GLOBAL_VAR(relay_log_basename), NO_CMD_LINE, + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_log_bin_basename( + "log_bin_basename", + "The full path of the binary log file names, excluding the extension.", + READ_ONLY GLOBAL_VAR(log_bin_basename), NO_CMD_LINE, + DEFAULT(0)); + +static Sys_var_charptr_fscs Sys_relay_log_info_file( + "relay_log_info_file", "The location and name of the file that " + "remembers where the SQL replication thread is in the relay logs.", + READ_ONLY GLOBAL_VAR(relay_log_info_file), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_on_access_global +Sys_relay_log_purge( + "relay_log_purge", "if disabled - do not purge relay logs. " + "if enabled - purge them as soon as they are no more needed.", + GLOBAL_VAR(relay_log_purge), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_on_access_global +Sys_relay_log_recovery( + "relay_log_recovery", "Enables automatic relay log recovery " + "right after the database startup, which means that the IO Thread " + "starts re-fetching from the master right after the last transaction " + "processed.", + GLOBAL_VAR(relay_log_recovery), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + + +bool Sys_var_rpl_filter::global_update(THD *thd, set_var *var) +{ + bool result= true; // Assume error + LEX_CSTRING *base_name= &var->base; + + if (!base_name->length) + base_name= &thd->variables.default_master_connection; + + mysql_mutex_unlock(&LOCK_global_system_variables); + + if (Master_info *mi= get_master_info(base_name, !var->base.length ? + Sql_condition::WARN_LEVEL_ERROR : + Sql_condition::WARN_LEVEL_WARN)) + { + if (mi->rli.slave_running) + { + my_error(ER_SLAVE_MUST_STOP, MYF(0), + (int) mi->connection_name.length, + mi->connection_name.str); + result= true; + } + else + { + result= set_filter_value(var->save_result.string_value.str, mi); + } + mi->release(); + } + + mysql_mutex_lock(&LOCK_global_system_variables); + return result; +} + +bool Sys_var_rpl_filter::set_filter_value(const char *value, Master_info *mi) +{ + bool status= true; + Rpl_filter* rpl_filter= mi->rpl_filter; + + /* Proctect against other threads */ + mysql_mutex_lock(&LOCK_active_mi); + switch (opt_id) { + case OPT_REPLICATE_REWRITE_DB: + status= rpl_filter->set_rewrite_db(value); + break; + case OPT_REPLICATE_DO_DB: + status= rpl_filter->set_do_db(value); + break; + case OPT_REPLICATE_DO_TABLE: + status= rpl_filter->set_do_table(value); + break; + case OPT_REPLICATE_IGNORE_DB: + status= rpl_filter->set_ignore_db(value); + break; + case OPT_REPLICATE_IGNORE_TABLE: + status= rpl_filter->set_ignore_table(value); + break; + case OPT_REPLICATE_WILD_DO_TABLE: + status= rpl_filter->set_wild_do_table(value); + break; + case OPT_REPLICATE_WILD_IGNORE_TABLE: + status= rpl_filter->set_wild_ignore_table(value); + break; + } + mysql_mutex_unlock(&LOCK_active_mi); + return status; +} + +const uchar * +Sys_var_rpl_filter::global_value_ptr(THD *thd, + const LEX_CSTRING *base_name) const +{ + char buf[256]; + String tmp(buf, sizeof(buf), &my_charset_bin); + uchar *ret; + Master_info *mi; + Rpl_filter *rpl_filter; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mi= get_master_info(base_name, !base_name->length ? + Sql_condition::WARN_LEVEL_ERROR : + Sql_condition::WARN_LEVEL_WARN); + + if (!mi) + { + mysql_mutex_lock(&LOCK_global_system_variables); + return 0; + } + + rpl_filter= mi->rpl_filter; + + mysql_mutex_lock(&LOCK_active_mi); + switch (opt_id) { + case OPT_REPLICATE_REWRITE_DB: + rpl_filter->get_rewrite_db(&tmp); + break; + case OPT_REPLICATE_DO_DB: + rpl_filter->get_do_db(&tmp); + break; + case OPT_REPLICATE_DO_TABLE: + rpl_filter->get_do_table(&tmp); + break; + case OPT_REPLICATE_IGNORE_DB: + rpl_filter->get_ignore_db(&tmp); + break; + case OPT_REPLICATE_IGNORE_TABLE: + rpl_filter->get_ignore_table(&tmp); + break; + case OPT_REPLICATE_WILD_DO_TABLE: + rpl_filter->get_wild_do_table(&tmp); + break; + case OPT_REPLICATE_WILD_IGNORE_TABLE: + rpl_filter->get_wild_ignore_table(&tmp); + break; + } + mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); + + mi->release(); + + ret= (uchar *) thd->strmake(tmp.ptr(), tmp.length()); + + return ret; +} + +static Sys_var_rpl_filter Sys_replicate_do_db( + "replicate_do_db", OPT_REPLICATE_DO_DB, + "Tell the slave to restrict replication to updates of tables " + "whose names appear in the comma-separated list. For " + "statement-based replication, only the default database (that " + "is, the one selected by USE) is considered, not any explicitly " + "mentioned tables in the query. For row-based replication, the " + "actual names of table(s) being updated are checked.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_DO_DB); + +static Sys_var_rpl_filter Sys_replicate_rewrite_db( + "replicate_rewrite_db", OPT_REPLICATE_REWRITE_DB, + "Tells the slave to replicate binlog events " + "into a different database than their original target on the master." + "Example: replicate-rewrite-db=master_db_name->slave_db_name.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_REWRITE_DB); + +static Sys_var_rpl_filter Sys_replicate_do_table( + "replicate_do_table", OPT_REPLICATE_DO_TABLE, + "Tells the slave to restrict replication to tables in the " + "comma-separated list.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_DO_TABLE); + +static Sys_var_rpl_filter Sys_replicate_ignore_db( + "replicate_ignore_db", OPT_REPLICATE_IGNORE_DB, + "Tell the slave to restrict replication to updates of tables " + "whose names do not appear in the comma-separated list. For " + "statement-based replication, only the default database (that " + "is, the one selected by USE) is considered, not any explicitly " + "mentioned tables in the query. For row-based replication, the " + "actual names of table(s) being updated are checked.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_IGNORE_DB); + +static Sys_var_rpl_filter Sys_replicate_ignore_table( + "replicate_ignore_table", OPT_REPLICATE_IGNORE_TABLE, + "Tells the slave thread not to replicate any statement that " + "updates the specified table, even if any other tables might be " + "updated by the same statement.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_IGNORE_TABLE); + +static Sys_var_rpl_filter Sys_replicate_wild_do_table( + "replicate_wild_do_table", OPT_REPLICATE_WILD_DO_TABLE, + "Tells the slave thread to restrict replication to statements " + "where any of the updated tables match the specified database " + "and table name patterns.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_WILD_DO_TABLE); + +static Sys_var_rpl_filter Sys_replicate_wild_ignore_table( + "replicate_wild_ignore_table", OPT_REPLICATE_WILD_IGNORE_TABLE, + "Tells the slave thread to not replicate to the tables that " + "match the given wildcard pattern.", + PRIV_SET_SYSTEM_GLOBAL_VAR_REPLICATE_WILD_IGNORE_TABLE); + +static Sys_var_charptr_fscs Sys_slave_load_tmpdir( + "slave_load_tmpdir", "The location where the slave should put " + "its temporary files when replicating a LOAD DATA INFILE command", + READ_ONLY GLOBAL_VAR(slave_load_tmpdir), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_on_access_global +Sys_slave_net_timeout( + "slave_net_timeout", "Number of seconds to wait for more data " + "from any master/slave connection before aborting the read", + GLOBAL_VAR(slave_net_timeout), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1)); + + +/* + Access a multi_source variable + Return 0 + warning if it doesn't exist +*/ + +ulonglong Sys_var_multi_source_ulonglong:: +get_master_info_ulonglong_value(THD *thd, ptrdiff_t offset) const +{ + Master_info *mi; + ulonglong res= 0; // Default value + mysql_mutex_unlock(&LOCK_global_system_variables); + if ((mi= get_master_info(&thd->variables.default_master_connection, + Sql_condition::WARN_LEVEL_WARN))) + { + res= *((ulonglong*) (((uchar*) mi) + master_info_offset)); + mi->release(); + } + mysql_mutex_lock(&LOCK_global_system_variables); + return res; +} + + +bool update_multi_source_variable(sys_var *self_var, THD *thd, + enum_var_type type) +{ + Sys_var_multi_source_ulonglong *self= (Sys_var_multi_source_ulonglong*) self_var; + bool result= true; + Master_info *mi; + + if (type == OPT_GLOBAL) + mysql_mutex_unlock(&LOCK_global_system_variables); + if ((mi= (get_master_info(&thd->variables.default_master_connection, + Sql_condition::WARN_LEVEL_ERROR)))) + { + mysql_mutex_lock(&mi->rli.run_lock); + mysql_mutex_lock(&mi->rli.data_lock); + result= self->update_variable(thd, mi); + mysql_mutex_unlock(&mi->rli.data_lock); + mysql_mutex_unlock(&mi->rli.run_lock); + mi->release(); + } + if (type == OPT_GLOBAL) + mysql_mutex_lock(&LOCK_global_system_variables); + return result; +} + +static bool update_slave_skip_counter(sys_var *self, THD *thd, Master_info *mi) +{ + if (mi->rli.slave_running) + { + my_error(ER_SLAVE_MUST_STOP, MYF(0), (int) mi->connection_name.length, + mi->connection_name.str); + return true; + } + if (mi->using_gtid != Master_info::USE_GTID_NO && mi->using_parallel()) + { + ulong domain_count; + mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); + domain_count= rpl_global_gtid_slave_state->count(); + mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + if (domain_count > 1) + { + /* + With domain-based parallel replication, the slave position is + multi-dimensional, so the relay log position is not very meaningful. + It might not even correspond to the next GTID to execute in _any_ + domain (the case after error stop). So slave_skip_counter will most + likely not do what the user intends. Instead give an error, with a + suggestion to instead set @@gtid_slave_pos past the point of error; + this works reliably also in the case of multiple domains. + */ + my_error(ER_SLAVE_SKIP_NOT_IN_GTID, MYF(0)); + return true; + } + } + + /* The value was stored temporarily in thd */ + mi->rli.slave_skip_counter= thd->variables.slave_skip_counter; + return false; +} + +static Sys_var_multi_source_ulonglong Sys_slave_skip_counter( + "sql_slave_skip_counter", "Skip the next N events from the master log", + SESSION_VAR(slave_skip_counter), NO_CMD_LINE, + MASTER_INFO_VAR(rli.slave_skip_counter), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1), + ON_UPDATE(update_slave_skip_counter)); + +static bool update_max_relay_log_size(sys_var *self, THD *thd, Master_info *mi) +{ + mi->rli.max_relay_log_size= thd->variables.max_relay_log_size; + mi->rli.relay_log.set_max_size((ulong)mi->rli.max_relay_log_size); + return false; +} + +static Sys_var_multi_source_ulonglong Sys_max_relay_log_size( + "max_relay_log_size", + "relay log will be rotated automatically when the size exceeds this " + "value. If 0 at startup, it's set to max_binlog_size", + SESSION_VAR(max_relay_log_size), CMD_LINE(REQUIRED_ARG), + MASTER_INFO_VAR(rli.max_relay_log_size), + VALID_RANGE(0, 1024L*1024*1024), DEFAULT(0), BLOCK_SIZE(IO_SIZE), + ON_UPDATE(update_max_relay_log_size)); + +static Sys_var_charptr Sys_slave_skip_errors( + "slave_skip_errors", "Tells the slave thread to continue " + "replication when a query event returns an error from the " + "provided list", + READ_ONLY GLOBAL_VAR(opt_slave_skip_errors), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_on_access_global +Sys_read_binlog_speed_limit( + "read_binlog_speed_limit", "Maximum speed(KB/s) to read binlog from" + " master (0 = no limit)", + GLOBAL_VAR(opt_read_binlog_speed_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_slave_transaction_retry_errors( + "slave_transaction_retry_errors", "Tells the slave thread to retry " + "transaction for replication when a query event returns an error from " + "the provided list. Deadlock error, elapsed lock wait timeout, " + "net read error, net read timeout, net write error, net write timeout, " + "connect error and 2 types of lost connection error are automatically " + "added to this list", + READ_ONLY GLOBAL_VAR(opt_slave_transaction_retry_errors), CMD_LINE(REQUIRED_ARG), + DEFAULT(0)); + +static Sys_var_ulonglong Sys_relay_log_space_limit( + "relay_log_space_limit", "Maximum space to use for all relay logs", + READ_ONLY GLOBAL_VAR(relay_log_space_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONGLONG_MAX), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_sync_relaylog_period( + "sync_relay_log", "Synchronously flush relay log to disk after " + "every #th event. Use 0 to disable synchronous flushing", + GLOBAL_VAR(sync_relaylog_period), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(10000), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_sync_relayloginfo_period( + "sync_relay_log_info", "Synchronously flush relay log info " + "to disk after every #th transaction. Use 0 to disable " + "synchronous flushing", + GLOBAL_VAR(sync_relayloginfo_period), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(10000), BLOCK_SIZE(1)); +#endif + +static Sys_var_on_access_global +Sys_sync_binlog_period( + "sync_binlog", "Synchronously flush binary log to disk after " + "every #th event. Use 0 (default) to disable synchronous flushing", + GLOBAL_VAR(sync_binlog_period), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_sync_masterinfo_period( + "sync_master_info", "Synchronously flush master info to disk " + "after every #th event. Use 0 to disable synchronous flushing", + GLOBAL_VAR(sync_masterinfo_period), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(10000), BLOCK_SIZE(1)); + +#ifdef HAVE_REPLICATION +static Sys_var_ulong Sys_slave_trans_retries( + "slave_transaction_retries", "Number of times the slave SQL " + "thread will retry a transaction in case it failed with a deadlock, " + "elapsed lock wait timeout or listed in " + "slave_transaction_retry_errors, before giving up and stopping", + GLOBAL_VAR(slave_trans_retries), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(10), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_slave_trans_retry_interval( + "slave_transaction_retry_interval", "Interval of the slave SQL " + "thread will retry a transaction in case it failed with a deadlock " + "or elapsed lock wait timeout or listed in " + "slave_transaction_retry_errors", + GLOBAL_VAR(slave_trans_retry_interval), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 3600), DEFAULT(0), BLOCK_SIZE(1)); +#endif + +static bool check_locale(sys_var *self, THD *thd, set_var *var) +{ + if (!var->value) + return false; + + MY_LOCALE *locale; + char buff[STRING_BUFFER_USUAL_SIZE]; + if (var->value->result_type() == INT_RESULT) + { + int lcno= (int)var->value->val_int(); + if (!(locale= my_locale_by_number(lcno))) + { + my_error(ER_UNKNOWN_LOCALE, MYF(0), llstr(lcno, buff)); + return true; + } + if (check_not_null(self, thd, var)) + return true; + } + else // STRING_RESULT + { + String str(buff, sizeof(buff), system_charset_info), *res; + if (!(res=var->value->val_str(&str))) + return true; + else if (!(locale= my_locale_by_name(res->c_ptr_safe()))) + { + ErrConvString err(res); + my_error(ER_UNKNOWN_LOCALE, MYF(0), err.ptr()); + return true; + } + } + + var->save_result.ptr= locale; + + if (!locale->errmsgs->errmsgs) + { + bool res; + mysql_mutex_lock(&LOCK_error_messages); + res= (!locale->errmsgs->errmsgs && + read_texts(ERRMSG_FILE, locale->errmsgs->language, + &locale->errmsgs->errmsgs)); + mysql_mutex_unlock(&LOCK_error_messages); + if (res) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Can't process error message file for locale '%s'", + locale->name); + return true; + } + } + status_var_increment(thd->status_var.feature_locale); + return false; +} + +static bool update_locale(sys_var *self, THD* thd, enum_var_type type) +{ + /* Cache pointer to error messages */ + if (type == OPT_SESSION) + thd->variables.errmsgs= thd->variables.lc_messages->errmsgs->errmsgs; + else + global_system_variables.errmsgs= + global_system_variables.lc_messages->errmsgs->errmsgs; + return false; +} + +static Sys_var_struct Sys_lc_messages( + "lc_messages", "Set the language used for the error messages", + SESSION_VAR(lc_messages), NO_CMD_LINE, + offsetof(MY_LOCALE, name), DEFAULT(&my_default_lc_messages), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_locale), ON_UPDATE(update_locale)); + +static Sys_var_struct Sys_lc_time_names( + "lc_time_names", "Set the language used for the month " + "names and the days of the week", + SESSION_VAR(lc_time_names), NO_CMD_LINE, + offsetof(MY_LOCALE, name), DEFAULT(&my_default_lc_time_names), + NO_MUTEX_GUARD, IN_BINLOG, ON_CHECK(check_locale)); + +static Sys_var_tz Sys_time_zone( + "time_zone", "The current time zone, used to initialize the time " + "zone for a client when it connects. Set to SYSTEM by default, in " + "which the client uses the system time zone value.", + SESSION_VAR(time_zone), NO_CMD_LINE, + DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG); + +#ifdef WITH_WSREP +#include "wsrep_var.h" +#include "wsrep_sst.h" +#include "wsrep_binlog.h" + +static Sys_var_charptr_fscs Sys_wsrep_provider( + "wsrep_provider", "Path to replication provider library", + PREALLOCATED READ_ONLY GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG), + DEFAULT(WSREP_NONE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update)); + +static Sys_var_charptr Sys_wsrep_provider_options( + "wsrep_provider_options", "Semicolon (;) separated list of wsrep " + "options (see wsrep_provider_options documentation).", + PREALLOCATED GLOBAL_VAR(wsrep_provider_options), + CMD_LINE(REQUIRED_ARG), + DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_options_check), + ON_UPDATE(wsrep_provider_options_update)); + +static Sys_var_charptr_fscs Sys_wsrep_data_home_dir( + "wsrep_data_home_dir", "home directory for wsrep provider", + READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG), + DEFAULT(mysql_real_data_home)); + +static Sys_var_charptr Sys_wsrep_cluster_name( + "wsrep_cluster_name", "Name for the cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG), + DEFAULT(WSREP_CLUSTER_NAME), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_name_check), + ON_UPDATE(wsrep_cluster_name_update)); + +static Sys_var_charptr Sys_wsrep_cluster_address ( + "wsrep_cluster_address", "Address to initially connect to cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_address), + CMD_LINE(REQUIRED_ARG), + DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_address_check), + ON_UPDATE(wsrep_cluster_address_update)); + +static Sys_var_charptr Sys_wsrep_node_name ( + "wsrep_node_name", "Name of this node. This name can be used in " + "wsrep_sst_donor as a preferred donor. Note that multiple nodes " + "in a cluster can have the same name.", + PREALLOCATED GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG), + DEFAULT(glob_hostname), NO_MUTEX_GUARD, NOT_IN_BINLOG, + wsrep_node_name_check, wsrep_node_name_update); + +static Sys_var_charptr Sys_wsrep_node_address ( + "wsrep_node_address", "Specifies the node's network address, in " + "the format ip address[:port]. Used in situations where autoguessing " + "is not reliable. As of MariaDB 10.1.8, supports IPv6.", + PREALLOCATED GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG), + DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_node_address_check), + ON_UPDATE(wsrep_node_address_update)); + +static Sys_var_charptr Sys_wsrep_node_incoming_address( + "wsrep_node_incoming_address", "Client connection address", + PREALLOCATED GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG), + DEFAULT(WSREP_NODE_INCOMING_AUTO)); + +static Sys_var_ulong Sys_wsrep_slave_threads( + "wsrep_slave_threads", "Number of slave appliers to launch", + GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), + ON_UPDATE(wsrep_slave_threads_update)); + +static Sys_var_charptr Sys_wsrep_dbug_option( + "wsrep_dbug_option", "DBUG options to provider library", + GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG), + DEFAULT("")); + +static const char *wsrep_debug_names[]= +{ "NONE", "SERVER", "TRANSACTION", "STREAMING", "CLIENT", NullS }; +static Sys_var_enum Sys_wsrep_debug( + "wsrep_debug", "WSREP debug level logging", + GLOBAL_VAR(wsrep_debug), CMD_LINE(REQUIRED_ARG), + wsrep_debug_names, DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), ON_UPDATE(wsrep_debug_update)); + +static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx( + "wsrep_convert_LOCK_to_trx", "To convert locking sessions " + "into transactions", + GLOBAL_VAR(wsrep_convert_LOCK_to_trx), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_retry_autocommit( + "wsrep_retry_autocommit", "Max number of times to retry " + "a failed autocommit statement", + SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1)); + +static bool update_wsrep_auto_increment_control (sys_var *self, THD *thd, enum_var_type type) +{ + if (wsrep_auto_increment_control) + { + /* + The variables that control auto increment shall be calculated + automatically based on the size of the cluster. This usually done + within the wsrep_view_handler_cb callback. However, if the user + manually sets the value of wsrep_auto_increment_control to 'ON', + then we should to re-calculate these variables again (because + these values may be required before wsrep_view_handler_cb will + be re-invoked, which is rarely invoked if the cluster stays in + the stable state): + */ + global_system_variables.auto_increment_increment= + wsrep_cluster_size ? wsrep_cluster_size : 1; + global_system_variables.auto_increment_offset= + wsrep_local_index >= 0 ? wsrep_local_index + 1 : 1; + thd->variables.auto_increment_increment= + global_system_variables.auto_increment_increment; + thd->variables.auto_increment_offset= + global_system_variables.auto_increment_offset; + } + else + { + /* + We must restore the last values of the variables that + are explicitly specified by the user: + */ + global_system_variables.auto_increment_increment= + global_system_variables.saved_auto_increment_increment; + global_system_variables.auto_increment_offset= + global_system_variables.saved_auto_increment_offset; + thd->variables.auto_increment_increment= + thd->variables.saved_auto_increment_increment; + thd->variables.auto_increment_offset= + thd->variables.saved_auto_increment_offset; + } + return false; +} + +static Sys_var_mybool Sys_wsrep_auto_increment_control( + "wsrep_auto_increment_control", "To automatically control the " + "assignment of autoincrement variables", + GLOBAL_VAR(wsrep_auto_increment_control), + CMD_LINE(OPT_ARG), DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_wsrep_auto_increment_control)); + +static Sys_var_mybool Sys_wsrep_drupal_282555_workaround( + "wsrep_drupal_282555_workaround", "Enable a workaround to handle the " + "cases where inserting a DEFAULT value into an auto-increment column " + "could fail with duplicate key error", + GLOBAL_VAR(wsrep_drupal_282555_workaround), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr sys_wsrep_sst_method( + "wsrep_sst_method", "State snapshot transfer method", + GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG), + DEFAULT(WSREP_SST_DEFAULT), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_method_check)); + +static Sys_var_charptr Sys_wsrep_sst_receive_address( + "wsrep_sst_receive_address", "Address where node is waiting for " + "SST contact", + GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG), + DEFAULT(WSREP_SST_ADDRESS_AUTO), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_receive_address_check), + ON_UPDATE(wsrep_sst_receive_address_update)); + +static Sys_var_charptr Sys_wsrep_sst_auth( + "wsrep_sst_auth", "Authentication for SST connection", + PREALLOCATED GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG), + DEFAULT(NULL), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_auth_check), + ON_UPDATE(wsrep_sst_auth_update)); + +static Sys_var_charptr Sys_wsrep_sst_donor( + "wsrep_sst_donor", "preferred donor node for the SST", + GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG), + DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_donor_check), + ON_UPDATE(wsrep_sst_donor_update)); + +static Sys_var_mybool Sys_wsrep_sst_donor_rejects_queries( + "wsrep_sst_donor_rejects_queries", "Reject client queries " + "when donating state snapshot transfer", + GLOBAL_VAR(wsrep_sst_donor_rejects_queries), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_on ( + "wsrep_on", "To enable wsrep replication ", + SESSION_VAR(wsrep_on), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_on_check), + ON_UPDATE(wsrep_on_update)); + +static Sys_var_charptr Sys_wsrep_start_position ( + "wsrep_start_position", "global transaction position to start from ", + PREALLOCATED GLOBAL_VAR(wsrep_start_position), + CMD_LINE(REQUIRED_ARG), + DEFAULT(WSREP_START_POSITION_ZERO), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_start_position_check), + ON_UPDATE(wsrep_start_position_update)); + +static Sys_var_ulong Sys_wsrep_max_ws_size ( + "wsrep_max_ws_size", "Max write set size (bytes)", + GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, WSREP_MAX_WS_SIZE), DEFAULT(WSREP_MAX_WS_SIZE), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_max_ws_size_check), ON_UPDATE(wsrep_max_ws_size_update)); + +static Sys_var_ulong Sys_wsrep_max_ws_rows ( + "wsrep_max_ws_rows", "Max number of rows in write set", + GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1048576), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_wsrep_notify_cmd( + "wsrep_notify_cmd", "", + READ_ONLY GLOBAL_VAR(wsrep_notify_cmd), CMD_LINE(REQUIRED_ARG), + DEFAULT("")); + +static Sys_var_charptr_fscs Sys_wsrep_status_file( + "wsrep_status_file", "wsrep status output filename", + READ_ONLY GLOBAL_VAR(wsrep_status_file), CMD_LINE(REQUIRED_ARG), + DEFAULT("")); + +static Sys_var_mybool Sys_wsrep_certify_nonPK( + "wsrep_certify_nonPK", "Certify tables with no primary key", + GLOBAL_VAR(wsrep_certify_nonPK), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static const char *wsrep_certification_rules_names[]= { "strict", "optimized", NullS }; +static Sys_var_enum Sys_wsrep_certification_rules( + "wsrep_certification_rules", + "Certification rules to use in the cluster. Possible values are: " + "\"strict\": stricter rules that could result in more certification " + "failures. " + "\"optimized\": relaxed rules that allow more concurrency and " + "cause less certification failures.", + GLOBAL_VAR(wsrep_certification_rules), CMD_LINE(REQUIRED_ARG), + wsrep_certification_rules_names, DEFAULT(WSREP_CERTIFICATION_RULES_STRICT), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static Sys_var_mybool Sys_wsrep_causal_reads( + "wsrep_causal_reads", "Setting this variable is equivalent " + "to setting wsrep_sync_wait READ flag", + SESSION_VAR(wsrep_causal_reads), + CMD_LINE(OPT_ARG, OPT_WSREP_CAUSAL_READS), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_causal_reads_update), + DEPRECATED("'@@wsrep_sync_wait=1'")); // since 10.1.3 + +static Sys_var_uint Sys_wsrep_sync_wait( + "wsrep_sync_wait", "Ensure \"synchronous\" read view before executing " + "an operation of the type specified by bitmask: 1 - READ(includes " + "SELECT, SHOW and BEGIN/START TRANSACTION); 2 - UPDATE and DELETE; 4 - " + "INSERT and REPLACE", + SESSION_VAR(wsrep_sync_wait), CMD_LINE(OPT_ARG, OPT_WSREP_SYNC_WAIT), + VALID_RANGE(WSREP_SYNC_WAIT_NONE, WSREP_SYNC_WAIT_MAX), + DEFAULT(WSREP_SYNC_WAIT_NONE), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_sync_wait_update)); + +static const char *wsrep_mode_names[]= +{ + "STRICT_REPLICATION", + "BINLOG_ROW_FORMAT_ONLY", + "REQUIRED_PRIMARY_KEY", + "REPLICATE_MYISAM", + "REPLICATE_ARIA", + "DISALLOW_LOCAL_GTID", + "BF_ABORT_MARIABACKUP", + NullS +}; +static Sys_var_set Sys_wsrep_mode( + "wsrep_mode", + "Set of WSREP features that are enabled.", + GLOBAL_VAR(wsrep_mode), CMD_LINE(REQUIRED_ARG), + wsrep_mode_names, + DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_mode_check)); + +static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; +static Sys_var_enum Sys_wsrep_OSU_method( + "wsrep_OSU_method", "Method for Online Schema Upgrade", + SESSION_VAR(wsrep_OSU_method), CMD_LINE(OPT_ARG), + wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI)); + +static PolyLock_mutex PLock_wsrep_desync(&LOCK_wsrep_desync); +static Sys_var_mybool Sys_wsrep_desync ( + "wsrep_desync", "To desynchronize the node from the cluster", + GLOBAL_VAR(wsrep_desync), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + &PLock_wsrep_desync, NOT_IN_BINLOG, + ON_CHECK(wsrep_desync_check), + ON_UPDATE(wsrep_desync_update)); + +static const char *wsrep_reject_queries_names[]= { "NONE", "ALL", "ALL_KILL", NullS }; +static Sys_var_enum Sys_wsrep_reject_queries( + "wsrep_reject_queries", "Variable to set to reject queries", + GLOBAL_VAR(wsrep_reject_queries), CMD_LINE(OPT_ARG), + wsrep_reject_queries_names, DEFAULT(WSREP_REJECT_NONE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_reject_queries_update)); + +static const char *wsrep_binlog_format_names[]= + {"MIXED", "STATEMENT", "ROW", "NONE", NullS}; +static Sys_var_enum Sys_wsrep_forced_binlog_format( + "wsrep_forced_binlog_format", "binlog format to take effect over user's choice", + GLOBAL_VAR(wsrep_forced_binlog_format), CMD_LINE(REQUIRED_ARG), + wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC)); + +static Sys_var_mybool Sys_wsrep_recover_datadir( + "wsrep_recover", "Recover database state after crash and exit", + READ_ONLY GLOBAL_VAR(wsrep_recovery), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_log_conflicts( + "wsrep_log_conflicts", "To log multi-master conflicts", + GLOBAL_VAR(wsrep_log_conflicts), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_mysql_replication_bundle( + "wsrep_mysql_replication_bundle", "mysql replication group commit ", + GLOBAL_VAR(wsrep_mysql_replication_bundle), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1000), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_load_data_splitting( + "wsrep_load_data_splitting", "To commit LOAD DATA " + "transaction after every 10K rows inserted (deprecated)", + GLOBAL_VAR(wsrep_load_data_splitting), + CMD_LINE(OPT_ARG), DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), ON_UPDATE(0), DEPRECATED("")); // since 10.4.3 + +static Sys_var_mybool Sys_wsrep_slave_FK_checks( + "wsrep_slave_FK_checks", "Should slave thread do " + "foreign key constraint checks", + GLOBAL_VAR(wsrep_slave_FK_checks), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_UK_checks( + "wsrep_slave_UK_checks", "Should slave thread do " + "secondary index uniqueness checks", + GLOBAL_VAR(wsrep_slave_UK_checks), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_restart_slave( + "wsrep_restart_slave", "Should MariaDB slave be restarted automatically, when node joins back to cluster", + GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulonglong Sys_wsrep_trx_fragment_size( + "wsrep_trx_fragment_size", + "Size of transaction fragments for streaming replication (measured in " + "units of 'wsrep_trx_fragment_unit')", + SESSION_VAR(wsrep_trx_fragment_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, WSREP_MAX_WS_SIZE), DEFAULT(0), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_trx_fragment_size_check), + ON_UPDATE(wsrep_trx_fragment_size_update)); + +extern const char *wsrep_fragment_units[]; + +static Sys_var_enum Sys_wsrep_trx_fragment_unit( + "wsrep_trx_fragment_unit", + "Unit for streaming replication transaction fragments' size: bytes, " + "rows, statements", + SESSION_VAR(wsrep_trx_fragment_unit), CMD_LINE(REQUIRED_ARG), + wsrep_fragment_units, + DEFAULT(WSREP_FRAG_BYTES), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), + ON_UPDATE(wsrep_trx_fragment_unit_update)); + +extern const char *wsrep_SR_store_types[]; +static Sys_var_enum Sys_wsrep_SR_store( + "wsrep_SR_store", "Storage for streaming replication fragments", + READ_ONLY GLOBAL_VAR(wsrep_SR_store_type), CMD_LINE(REQUIRED_ARG), + wsrep_SR_store_types, DEFAULT(WSREP_SR_STORE_TABLE)); + +static Sys_var_mybool Sys_wsrep_dirty_reads( + "wsrep_dirty_reads", + "Allow reads even when the node is not in the primary component.", + SESSION_VAR(wsrep_dirty_reads), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static Sys_var_uint Sys_wsrep_ignore_apply_errors ( + "wsrep_ignore_apply_errors", "Ignore replication errors", + GLOBAL_VAR(wsrep_ignore_apply_errors), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(WSREP_IGNORE_ERRORS_NONE, WSREP_IGNORE_ERRORS_MAX), + DEFAULT(7), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_wsrep_gtid_domain_id( + "wsrep_gtid_domain_id", "When wsrep_gtid_mode is set, this value is " + "used as gtid_domain_id for galera transactions and also copied to the " + "joiner nodes during state transfer. It is ignored, otherwise.", + GLOBAL_VAR(wsrep_gtid_domain_id), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX32), DEFAULT(0), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_gtid_domain_id_update)); + +static Sys_var_ulonglong Sys_wsrep_gtid_seq_no( + "wsrep_gtid_seq_no", + "Internal server usage, manually set WSREP GTID seqno.", + SESSION_ONLY(wsrep_gtid_seq_no), + NO_CMD_LINE, VALID_RANGE(0, ULONGLONG_MAX), DEFAULT(0), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_gtid_seq_no_check)); + +static Sys_var_mybool Sys_wsrep_gtid_mode( + "wsrep_gtid_mode", "Automatically update the (joiner) node's " + "wsrep_gtid_domain_id value with that of donor's (received during " + "state transfer) and use it in place of gtid_domain_id for all galera " + "transactions. When OFF (default), wsrep_gtid_domain_id is simply " + "ignored (backward compatibility).", + GLOBAL_VAR(wsrep_gtid_mode), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static char *wsrep_patch_version_ptr; +static Sys_var_charptr Sys_wsrep_patch_version( + "wsrep_patch_version", "Wsrep patch version, for example wsrep_25.10.", + READ_ONLY GLOBAL_VAR(wsrep_patch_version_ptr), CMD_LINE_HELP_ONLY, + DEFAULT(WSREP_PATCH_VERSION)); + + +static Sys_var_charptr Sys_wsrep_allowlist( + "wsrep_allowlist", "Allowed IP addresses split by comma delimiter", + READ_ONLY GLOBAL_VAR(wsrep_allowlist), CMD_LINE(REQUIRED_ARG), + DEFAULT("")); + +#endif /* WITH_WSREP */ + +static bool fix_host_cache_size(sys_var *, THD *, enum_var_type) +{ + hostname_cache_resize((uint) host_cache_size); + return false; +} + +static Sys_var_ulong Sys_host_cache_size( + "host_cache_size", + "How many host names should be cached to avoid resolving.", + AUTO_SET GLOBAL_VAR(host_cache_size), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 65536), + DEFAULT(HOST_CACHE_SIZE), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_host_cache_size)); + +vio_keepalive_opts opt_vio_keepalive; + +static Sys_var_int Sys_keepalive_time( + "tcp_keepalive_time", + "Timeout, in seconds, with no activity until the first TCP keep-alive packet is sent." + "If set to 0, system dependent default is used.", + AUTO_SET GLOBAL_VAR(opt_vio_keepalive.idle), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, INT_MAX32/1000), DEFAULT(0), + BLOCK_SIZE(1)); + +static Sys_var_int Sys_keepalive_interval( + "tcp_keepalive_interval", + "The interval, in seconds, between when successive keep-alive packets are sent if no acknowledgement is received." + "If set to 0, system dependent default is used.", + AUTO_SET GLOBAL_VAR(opt_vio_keepalive.interval), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, INT_MAX32/1000), DEFAULT(0), + BLOCK_SIZE(1)); + +static Sys_var_int Sys_keepalive_probes( + "tcp_keepalive_probes", + "The number of unacknowledged probes to send before considering the connection dead and notifying the application layer." + "If set to 0, system dependent default is used.", + AUTO_SET GLOBAL_VAR(opt_vio_keepalive.probes), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, INT_MAX32/1000), DEFAULT(0), + BLOCK_SIZE(1)); + + +static bool update_tcp_nodelay(sys_var *self, THD *thd, + enum_var_type type) +{ + DBUG_ASSERT(thd); + + Vio *vio = thd->net.vio; + if (vio) + return (MY_TEST(vio_nodelay(vio, thd->variables.tcp_nodelay))); + + return false; +} + +static Sys_var_mybool Sys_tcp_nodelay( + "tcp_nodelay", + "Set option TCP_NODELAY (disable Nagle's algorithm) on socket", + SESSION_VAR(tcp_nodelay), CMD_LINE(OPT_ARG), + DEFAULT(TRUE),NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(check_session_only_variable), + ON_UPDATE(update_tcp_nodelay)); + +static Sys_var_charptr_fscs Sys_ignore_db_dirs( + "ignore_db_dirs", + "Specifies a directory to add to the ignore list when collecting " + "database names from the datadir. Put a blank argument to reset " + "the list accumulated so far.", + READ_ONLY GLOBAL_VAR(opt_ignore_db_dirs), + CMD_LINE(REQUIRED_ARG, OPT_IGNORE_DB_DIRECTORY), + DEFAULT(0)); + +static Sys_var_ulong Sys_sp_cache_size( + "stored_program_cache", + "The soft upper limit for number of cached stored routines for " + "one connection.", + GLOBAL_VAR(stored_program_cache_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 512 * 1024), DEFAULT(256), BLOCK_SIZE(1)); + +export const char *plugin_maturity_names[]= +{ "unknown", "experimental", "alpha", "beta", "gamma", "stable", 0 }; +static Sys_var_enum Sys_plugin_maturity( + "plugin_maturity", + "The lowest desirable plugin maturity. " + "Plugins less mature than that will not be installed or loaded", + READ_ONLY GLOBAL_VAR(plugin_maturity), CMD_LINE(REQUIRED_ARG), + plugin_maturity_names, + DEFAULT(SERVER_MATURITY_LEVEL > 0 ? + SERVER_MATURITY_LEVEL - 1 : SERVER_MATURITY_LEVEL)); + +static Sys_var_ulong Sys_deadlock_search_depth_short( + "deadlock_search_depth_short", + "Short search depth for the two-step deadlock detection", + SESSION_VAR(wt_deadlock_search_depth_short), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 32), DEFAULT(4), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_deadlock_search_depth_long( + "deadlock_search_depth_long", + "Long search depth for the two-step deadlock detection", + SESSION_VAR(wt_deadlock_search_depth_long), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 33), DEFAULT(15), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_deadlock_timeout_depth_short( + "deadlock_timeout_short", + "Short timeout for the two-step deadlock detection (in microseconds)", + SESSION_VAR(wt_timeout_short), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(10000), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_deadlock_timeout_depth_long( + "deadlock_timeout_long", + "Long timeout for the two-step deadlock detection (in microseconds)", + SESSION_VAR(wt_timeout_long), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(50000000), BLOCK_SIZE(1)); + +static Sys_var_uint Sys_extra_port( + "extra_port", + "Extra port number to use for tcp connections in a " + "one-thread-per-connection manner. 0 means don't use another port", + READ_ONLY GLOBAL_VAR(mysqld_extra_port), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX32), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_on_access_global +Sys_extra_max_connections( + "extra_max_connections", "The number of connections on extra-port", + GLOBAL_VAR(extra_max_connections), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 100000), DEFAULT(1), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_max_connections)); + +#ifdef SAFE_MUTEX +static Sys_var_mybool Sys_mutex_deadlock_detector( + "debug_mutex_deadlock_detector", "Enable checking of wrong mutex usage", + READ_ONLY GLOBAL_VAR(safe_mutex_deadlock_detector), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); +#endif + +static Sys_var_keycache Sys_key_cache_segments( + "key_cache_segments", "The number of segments in a key cache", + KEYCACHE_VAR(param_partitions), + CMD_LINE(REQUIRED_ARG, OPT_KEY_CACHE_PARTITIONS), + VALID_RANGE(0, MAX_KEY_CACHE_PARTITIONS), + DEFAULT(DEFAULT_KEY_CACHE_PARTITIONS), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(repartition_keycache)); + +static const char *log_slow_filter_names[]= +{ + "admin", "filesort", "filesort_on_disk", "filesort_priority_queue", + "full_join", "full_scan", "not_using_index", "query_cache", + "query_cache_miss", "tmp_table", "tmp_table_on_disk", 0 +}; + + +static Sys_var_set Sys_log_slow_filter( + "log_slow_filter", + "Log only certain types of queries to the slow log. If variable empty all kind of queries are logged. All types are bound by slow_query_time, except 'not_using_index' which is always logged if enabled", + SESSION_VAR(log_slow_filter), CMD_LINE(REQUIRED_ARG), + log_slow_filter_names, + /* by default we log all queries except 'not_using_index' */ + DEFAULT(my_set_bits(array_elements(log_slow_filter_names)-1) & + ~QPLAN_NOT_USING_INDEX)); + +static const char *log_slow_disabled_statements_names[]= +{ "admin", "call", "slave", "sp", 0 }; + +static const char *log_disabled_statements_names[]= +{ "slave", "sp", 0 }; + +static Sys_var_set Sys_log_slow_disabled_statements( + "log_slow_disabled_statements", + "Don't log certain types of statements to slow log", + SESSION_VAR(log_slow_disabled_statements), CMD_LINE(REQUIRED_ARG), + log_slow_disabled_statements_names, + DEFAULT(LOG_SLOW_DISABLE_SP)); + +static Sys_var_set Sys_log_disabled_statements( + "log_disabled_statements", + "Don't log certain types of statements to general log", + SESSION_VAR(log_disabled_statements), CMD_LINE(REQUIRED_ARG), + log_disabled_statements_names, + DEFAULT(LOG_DISABLE_SP), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_has_super)); + +#define NOT_SUPPORTED_YET -2 +#ifndef PCRE2_EXTENDED_MORE +#define PCRE2_EXTENDED_MORE NOT_SUPPORTED_YET +#endif + +static const char *default_regex_flags_names[]= +{ + "DOTALL", // (?s) . matches anything including NL + "DUPNAMES", // (?J) Allow duplicate names for subpatterns + "EXTENDED", // (?x) Ignore white space and # comments + "EXTENDED_MORE",//(?xx) Ignore white space and # comments inside cheracter + "EXTRA", // means nothing since PCRE2 + "MULTILINE", // (?m) ^ and $ match newlines within data + "UNGREEDY", // (?U) Invert greediness of quantifiers + 0 +}; +static const int default_regex_flags_to_pcre[]= +{ + PCRE2_DOTALL, + PCRE2_DUPNAMES, + PCRE2_EXTENDED, + PCRE2_EXTENDED_MORE, + -1, /* EXTRA flag not available since PCRE2 */ + PCRE2_MULTILINE, + PCRE2_UNGREEDY, + 0 +}; +int default_regex_flags_pcre(THD *thd) +{ + ulonglong src= thd->variables.default_regex_flags; + int i, res; + for (i= res= 0; default_regex_flags_to_pcre[i]; i++) + { + if (src & (1ULL << i)) + { + if (default_regex_flags_to_pcre[i] < 0) + { + const char *msg= default_regex_flags_to_pcre[i] == NOT_SUPPORTED_YET + ? "Your version of PCRE2 does not support the %s flag. Ignored." + : "PCRE2 doesn't support the %s flag. Ignored."; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, msg, default_regex_flags_names[i]); + continue; + } + res|= default_regex_flags_to_pcre[i]; + } + } + return res; +} +static Sys_var_set Sys_default_regex_flags( + "default_regex_flags", + "Default flags for the regex library", + SESSION_VAR(default_regex_flags), CMD_LINE(REQUIRED_ARG), + default_regex_flags_names, + DEFAULT(0)); + +static Sys_var_ulong Sys_log_slow_rate_limit( + "log_slow_rate_limit", + "Write to slow log every #th slow query. Set to 1 to log everything. " + "Increase it to reduce the size of the slow or the performance impact " + "of slow logging", + SESSION_VAR(log_slow_rate_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, UINT_MAX), DEFAULT(1), BLOCK_SIZE(1)); + +/* + Full is not needed below anymore as one can set all bits with '= ALL', but + we need it for compatiblity with earlier versions. +*/ +static const char *log_slow_verbosity_names[]= +{ "innodb", "query_plan", "explain", "engine", "warnings", "full", 0}; + +static Sys_var_set Sys_log_slow_verbosity( + "log_slow_verbosity", + "Verbosity level for the slow log", + SESSION_VAR(log_slow_verbosity), CMD_LINE(REQUIRED_ARG), + log_slow_verbosity_names, DEFAULT(LOG_SLOW_VERBOSITY_INIT)); + +static Sys_var_ulong Sys_log_slow_max_warnings( + "log_slow_max_warnings", + "Max numbers of warnings printed to slow query log per statement", + SESSION_VAR(log_slow_max_warnings), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1000), DEFAULT(10), BLOCK_SIZE(1)); + +static const char *note_verbosity_names[]= +{ "basic", "unusable_keys", "explain", 0}; + +static Sys_var_set Sys_note_verbosity( + "note_verbosity", + "Verbosity level for note-warnings given to the user. " + "See also @@sql_notes.", + SESSION_VAR(note_verbosity), CMD_LINE(REQUIRED_ARG), + note_verbosity_names, DEFAULT(NOTE_VERBOSITY_NORMAL | + NOTE_VERBOSITY_EXPLAIN)); + +static Sys_var_ulong Sys_join_cache_level( + "join_cache_level", + "Controls what join operations can be executed with join buffers. Odd " + "numbers are used for plain join buffers while even numbers are used " + "for linked buffers", + SESSION_VAR(join_cache_level), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 8), DEFAULT(2), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_mrr_buffer_size( + "mrr_buffer_size", + "Size of buffer to use when using MRR with range access", + SESSION_VAR(mrr_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(IO_SIZE*2, INT_MAX32), DEFAULT(256*1024), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_rowid_merge_buff_size( + "rowid_merge_buff_size", + "The size of the buffers used [NOT] IN evaluation via partial matching", + SESSION_VAR(rowid_merge_buff_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, LONG_MAX), DEFAULT(8*1024*1024), + BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_userstat( + "userstat", + "Enables statistics gathering for USER_STATISTICS, CLIENT_STATISTICS, " + "INDEX_STATISTICS and TABLE_STATISTICS tables in the INFORMATION_SCHEMA", + GLOBAL_VAR(opt_userstat_running), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_on_access +Sys_binlog_annotate_row_events( + "binlog_annotate_row_events", + "Tells the master to annotate RBR events with the statement that " + "caused these events", + SESSION_VAR(binlog_annotate_row_events), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + +#ifdef HAVE_REPLICATION +static Sys_var_mybool Sys_replicate_annotate_row_events( + "replicate_annotate_row_events", + "Tells the slave to write annotate rows events received from the master " + "to its own binary log. Ignored if log_slave_updates is not set", + READ_ONLY GLOBAL_VAR(opt_replicate_annotate_row_events), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); +#endif + +static Sys_var_ulonglong Sys_join_buffer_space_limit( + "join_buffer_space_limit", + "The limit of the space for all join buffers used by a query", + SESSION_VAR(join_buff_space_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(2048, ULONGLONG_MAX), DEFAULT(16*128*1024), + BLOCK_SIZE(2048)); + +static Sys_var_ulong Sys_progress_report_time( + "progress_report_time", + "Seconds between sending progress reports to the client for " + "time-consuming statements. Set to 0 to disable progress reporting.", + SESSION_VAR(progress_report_time), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1)); + +const char *use_stat_tables_modes[] = + {"NEVER", "COMPLEMENTARY", "PREFERABLY", + "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0}; +static Sys_var_enum Sys_optimizer_use_stat_tables( + "use_stat_tables", + "Specifies how to use system statistics tables", + SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG), + use_stat_tables_modes, DEFAULT(4)); + +static Sys_var_ulong Sys_histogram_size( + "histogram_size", + "Number of bytes used for a histogram. " + "If set to 0, no histograms are created by ANALYZE.", + SESSION_VAR(histogram_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 255), DEFAULT(254), BLOCK_SIZE(1)); + +extern const char *histogram_types[]; +static Sys_var_enum Sys_histogram_type( + "histogram_type", + "Specifies type of the histograms created by ANALYZE. " + "Possible values are: " + "SINGLE_PREC_HB - single precision height-balanced, " + "DOUBLE_PREC_HB - double precision height-balanced, " + "JSON_HB - height-balanced, stored as JSON.", + SESSION_VAR(histogram_type), CMD_LINE(REQUIRED_ARG), + histogram_types, DEFAULT(1)); + +static Sys_var_mybool Sys_no_thread_alarm( + "debug_no_thread_alarm", + "Disable system thread alarm calls. Disabling it may be useful " + "in debugging or testing, never do it in production", + READ_ONLY GLOBAL_VAR(my_disable_thr_alarm), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static Sys_var_mybool Sys_query_cache_strip_comments( + "query_cache_strip_comments", + "Strip all comments from a query before storing it " + "in the query cache", + SESSION_VAR(query_cache_strip_comments), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static ulonglong in_transaction(THD *thd) +{ + return MY_TEST(thd->in_active_multi_stmt_transaction()); +} +static Sys_var_session_special Sys_in_transaction( + "in_transaction", "Whether there is an active transaction", + READ_ONLY sys_var::ONLY_SESSION, NO_CMD_LINE, + VALID_RANGE(0, 1), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), ON_READ(in_transaction)); + +#ifndef DBUG_OFF +static Sys_var_ulong Sys_debug_binlog_fsync_sleep( + "debug_binlog_fsync_sleep", + "Extra sleep (in microseconds) to add to binlog fsync(), for debugging", + GLOBAL_VAR(opt_binlog_dbug_fsync_sleep), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1)); +#endif + +static Sys_var_harows Sys_expensive_subquery_limit( + "expensive_subquery_limit", + "The maximum number of rows a subquery may examine in order to be " + "executed during optimization and used for constant optimization", + SESSION_VAR(expensive_subquery_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, HA_POS_ERROR), DEFAULT(100), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_encrypt_tmp_disk_tables( + "encrypt_tmp_disk_tables", + "Encrypt temporary on-disk tables (created as part of query execution)", + GLOBAL_VAR(encrypt_tmp_disk_tables), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_encrypt_tmp_files( + "encrypt_tmp_files", + "Encrypt temporary files (created for filesort, binary log cache, etc)", + READ_ONLY GLOBAL_VAR(encrypt_tmp_files), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_binlog_encryption( + "encrypt_binlog", "Encrypt binary logs (including relay logs)", + READ_ONLY GLOBAL_VAR(encrypt_binlog), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + +static const char *binlog_row_image_names[]= {"MINIMAL", "NOBLOB", "FULL", NullS}; +static Sys_var_on_access +Sys_binlog_row_image( + "binlog_row_image", + "Controls whether rows should be logged in 'FULL', 'NOBLOB' or " + "'MINIMAL' formats. 'FULL', means that all columns in the before " + "and after image are logged. 'NOBLOB', means that mysqld avoids logging " + "blob columns whenever possible (eg, blob column was not changed or " + "is not part of primary key). 'MINIMAL', means that a PK equivalent (PK " + "columns or full row if there is no PK in the table) is logged in the " + "before image, and only changed columns are logged in the after image. " + "(Default: FULL).", + SESSION_VAR(binlog_row_image), CMD_LINE(REQUIRED_ARG), + binlog_row_image_names, DEFAULT(BINLOG_ROW_IMAGE_FULL)); + +static const char *binlog_row_metadata_names[]= {"NO_LOG", "MINIMAL", "FULL", NullS}; +static Sys_var_on_access_global +Sys_binlog_row_metadata( + "binlog_row_metadata", + "Controls whether metadata is logged using FULL , MINIMAL format and NO_LOG." + "FULL causes all metadata to be logged; MINIMAL means that only " + "metadata actually required by slave is logged; NO_LOG NO metadata will be logged." + "Default: NO_LOG.", + GLOBAL_VAR(binlog_row_metadata), CMD_LINE(REQUIRED_ARG), + binlog_row_metadata_names, DEFAULT(Table_map_log_event::BINLOG_ROW_METADATA_NO_LOG), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(NULL), + ON_UPDATE(NULL)); + + +static bool check_pseudo_slave_mode(sys_var *self, THD *thd, set_var *var) +{ + longlong previous_val= thd->variables.pseudo_slave_mode; + longlong val= (longlong) var->save_result.ulonglong_value; + bool rli_fake= false; + +#ifndef EMBEDDED_LIBRARY + rli_fake= thd->rli_fake ? true : false; +#endif + + if (rli_fake) + { + if (!val) + { +#ifndef EMBEDDED_LIBRARY + delete thd->rli_fake; + thd->rli_fake= NULL; + delete thd->rgi_fake; + thd->rgi_fake= NULL; +#endif + } + else if (previous_val && val) + goto ineffective; + else if (!previous_val && val) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'pseudo_slave_mode' is already ON."); + } + else + { + if (!previous_val && !val) + goto ineffective; + else if (previous_val && !val) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Slave applier execution mode not active, " + "statement ineffective."); + } + goto end; + +ineffective: + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'pseudo_slave_mode' change was ineffective."); + +end: + return FALSE; +} +static Sys_var_mybool Sys_pseudo_slave_mode( + "pseudo_slave_mode", + "SET pseudo_slave_mode= 0,1 are commands that mysqlbinlog " + "adds to beginning and end of binary log dumps. While zero " + "value indeed disables, the actual enabling of the slave " + "applier execution mode is done implicitly when a " + "Format_description_event is sent through the session.", + SESSION_ONLY(pseudo_slave_mode), NO_CMD_LINE, DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_pseudo_slave_mode)); + +static Sys_var_mybool Sys_mysql56_temporal_format( + "mysql56_temporal_format", + "Use MySQL-5.6 (instead of MariaDB-5.3) format for TIME, DATETIME, TIMESTAMP columns.", + GLOBAL_VAR(opt_mysql56_temporal_format), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_strict_password_validation( + "strict_password_validation", + "When password validation plugins are enabled, reject passwords " + "that cannot be validated (passwords specified as a hash)", + GLOBAL_VAR(strict_password_validation), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +#ifdef HAVE_MMAP +static Sys_var_ulong Sys_log_tc_size( + "log_tc_size", + "Size of transaction coordinator log.", + READ_ONLY GLOBAL_VAR(opt_tc_log_size), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(my_getpagesize() * 3, ULONG_MAX), + DEFAULT(my_getpagesize() * 6), BLOCK_SIZE(my_getpagesize())); +#endif + +static Sys_var_ulonglong Sys_max_session_mem_used( + "max_session_mem_used", "Amount of memory a single user session " + "is allowed to allocate. This limits the value of the " + "session variable MEM_USED", SESSION_VAR(max_mem_used), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(8192, ULONGLONG_MAX), + DEFAULT(LONGLONG_MAX), BLOCK_SIZE(1)); + +#ifndef EMBEDDED_LIBRARY + +static Sys_var_sesvartrack Sys_track_session_sys_vars( + "session_track_system_variables", + "Track changes in registered system variables. ", + CMD_LINE(REQUIRED_ARG), + DEFAULT("autocommit,character_set_client,character_set_connection," + "character_set_results,time_zone")); + +static bool update_session_track_schema(sys_var *self, THD *thd, + enum_var_type type) +{ + DBUG_ENTER("update_session_track_schema"); + DBUG_RETURN(thd->session_tracker.current_schema.update(thd, NULL)); +} + +static Sys_var_mybool Sys_session_track_schema( + "session_track_schema", + "Track changes to the default schema.", + SESSION_VAR(session_track_schema), + CMD_LINE(OPT_ARG), DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), + ON_UPDATE(update_session_track_schema)); + + +static bool update_session_track_tx_info(sys_var *self, THD *thd, + enum_var_type type) +{ + DBUG_ENTER("update_session_track_tx_info"); + DBUG_RETURN(thd->session_tracker.transaction_info.update(thd, NULL)); +} + +static const char *session_track_transaction_info_names[]= + { "OFF", "STATE", "CHARACTERISTICS", NullS }; + +static Sys_var_enum Sys_session_track_transaction_info( + "session_track_transaction_info", + "Track changes to the transaction attributes. OFF to disable; " + "STATE to track just transaction state (Is there an active transaction? " + "Does it have any data? etc.); CHARACTERISTICS to track transaction " + "state and report all statements needed to start a transaction with " + "the same characteristics (isolation level, read only/read write," + "snapshot - but not any work done / data modified within the " + "transaction).", + SESSION_VAR(session_track_transaction_info), + CMD_LINE(REQUIRED_ARG), session_track_transaction_info_names, + DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_session_track_tx_info)); + + +static bool update_session_track_state_change(sys_var *self, THD *thd, + enum_var_type type) +{ + DBUG_ENTER("update_session_track_state_change"); + DBUG_RETURN(thd->session_tracker.state_change.update(thd, NULL)); +} + +static Sys_var_mybool Sys_session_track_state_change( + "session_track_state_change", + "Track changes to the session state.", + SESSION_VAR(session_track_state_change), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), + ON_UPDATE(update_session_track_state_change)); + + +#ifdef USER_VAR_TRACKING +static bool update_session_track_user_variables(sys_var *self, THD *thd, + enum_var_type type) +{ + return thd->session_tracker.user_variables.update(thd, 0); +} + +static Sys_var_mybool Sys_session_track_user_variables( + "session_track_user_variables", + "Track changes to user variables.", + SESSION_VAR(session_track_user_variables), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), + ON_UPDATE(update_session_track_user_variables)); +#endif // USER_VAR_TRACKING + +#endif //EMBEDDED_LIBRARY + +static Sys_var_uint Sys_in_subquery_conversion_threshold( + "in_predicate_conversion_threshold", + "The minimum number of scalar elements in the value list of " + "IN predicate that triggers its conversion to IN subquery. Set to " + "0 to disable the conversion.", + SESSION_VAR(in_subquery_conversion_threshold), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(IN_SUBQUERY_CONVERSION_THRESHOLD), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_optimizer_max_sel_arg_weight( + "optimizer_max_sel_arg_weight", + "The maximum weight of the SEL_ARG graph. Set to 0 for no limit", + SESSION_VAR(optimizer_max_sel_arg_weight), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(SEL_ARG::MAX_WEIGHT), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_optimizer_max_sel_args( + "optimizer_max_sel_args", + "The maximum number of SEL_ARG objects created when optimizing a range. " + "If more objects would be needed, the range will not be used by the " + "optimizer.", + SESSION_VAR(optimizer_max_sel_args), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), DEFAULT(SEL_ARG::DEFAULT_MAX_SEL_ARGS), BLOCK_SIZE(1)); + +static Sys_var_enum Sys_secure_timestamp( + "secure_timestamp", "Restricts direct setting of a session " + "timestamp. Possible levels are: YES - timestamp cannot deviate from " + "the system clock, REPLICATION - replication thread can adjust " + "timestamp to match the master's, SUPER - a user with this " + "privilege and a replication thread can adjust timestamp, NO - " + "historical behavior, anyone can modify session timestamp", + READ_ONLY GLOBAL_VAR(opt_secure_timestamp), CMD_LINE(REQUIRED_ARG), + secure_timestamp_levels, DEFAULT(SECTIME_NO)); + +static Sys_var_ulonglong Sys_max_rowid_filter_size( + "max_rowid_filter_size", + "The maximum size of the container of a rowid filter", + SESSION_VAR(max_rowid_filter_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, (ulonglong)~(intptr)0), DEFAULT(128*1024), + BLOCK_SIZE(1)); + +static Sys_var_bit Sys_system_versioning_insert_history( + "system_versioning_insert_history", + "Allows direct inserts into ROW_START and ROW_END columns if " + "secure_timestamp allows changing @@timestamp", + SESSION_VAR(option_bits), CMD_LINE(OPT_ARG), + OPTION_INSERT_HISTORY, DEFAULT(FALSE), + NO_MUTEX_GUARD, IN_BINLOG); diff --git a/sql/sys_vars.inl b/sql/sys_vars.inl new file mode 100644 index 00000000..385ad897 --- /dev/null +++ b/sql/sys_vars.inl @@ -0,0 +1,2820 @@ +/* Copyright (c) 2002, 2011, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + "private" interface to sys_var - server configuration variables. + + This header is included only by the file that contains declarations + of sys_var variables (sys_vars.cc). +*/ + +#include "sys_vars_shared.h" +#include +#include +#include +#include "keycaches.h" +#include "strfunc.h" +#include "tztime.h" // my_tz_find, my_tz_SYSTEM, struct Time_zone +#include "rpl_mi.h" // For Multi-Source Replication +#include "debug_sync.h" +#include "sql_acl.h" // check_global_access() + +/* + a set of mostly trivial (as in f(X)=X) defines below to make system variable + declarations more readable +*/ +#define VALID_RANGE(X,Y) X,Y +#define DEFAULT(X) X +#define BLOCK_SIZE(X) X +#define GLOBAL_VAR(X) sys_var::GLOBAL, (((char*)&(X))-(char*)&global_system_variables), sizeof(X) +#define SESSION_VAR(X) sys_var::SESSION, offsetof(SV, X), sizeof(((SV *)0)->X) +#define SESSION_ONLY(X) sys_var::ONLY_SESSION, offsetof(SV, X), sizeof(((SV *)0)->X) +#define NO_CMD_LINE CMD_LINE(NO_ARG, sys_var::NO_GETOPT) +#define CMD_LINE_HELP_ONLY CMD_LINE(NO_ARG, sys_var::GETOPT_ONLY_HELP) +/* + the define below means that there's no *second* mutex guard, + LOCK_global_system_variables always guards all system variables +*/ +#define NO_MUTEX_GUARD ((PolyLock*)0) +#define IN_BINLOG sys_var::SESSION_VARIABLE_IN_BINLOG +#define NOT_IN_BINLOG sys_var::VARIABLE_NOT_IN_BINLOG +#define ON_READ(X) X +#define ON_CHECK(X) X +#define ON_UPDATE(X) X +#define READ_ONLY sys_var::READONLY+ +#define AUTO_SET sys_var::AUTO_SET+ +// this means that Sys_var_charptr initial value was malloc()ed +#define PREALLOCATED sys_var::ALLOCATED+ +#define PARSED_EARLY sys_var::PARSE_EARLY+ +#define NO_SET_STMT sys_var::NO_SET_STATEMENT+ + +/* + Sys_var_bit meaning is reversed, like in + @@foreign_key_checks <-> OPTION_NO_FOREIGN_KEY_CHECKS +*/ +#define REVERSE(X) ~(X) +#define DEPRECATED(X) X + +#define session_var(THD, TYPE) (*(TYPE*)session_var_ptr(THD)) +#define global_var(TYPE) (*(TYPE*)global_var_ptr()) + +#if SIZEOF_OFF_T > 4 && defined(BIG_TABLES) +#define GET_HA_ROWS GET_ULL +#else +#define GET_HA_ROWS GET_ULONG +#endif + +// Disable warning caused by SESSION_VAR() macro +#ifdef __clang__ +#pragma clang diagnostic ignored "-Winvalid-offsetof" +#endif + +/* + special assert for sysvars. Tells the name of the variable, + and fails even in non-debug builds. + + It is supposed to be used *only* in Sys_var* constructors, + and has name_arg hard-coded to prevent incorrect usage. +*/ +#define SYSVAR_ASSERT(X) \ + while(!(X)) \ + { \ + fprintf(stderr, "Sysvar '%s' failed '%s'\n", name_arg, #X); \ + DBUG_ASSERT(0); \ + exit(255); \ + } + + +static const char *bool_values[3]= {"OFF", "ON", 0}; +TYPELIB bool_typelib={ array_elements(bool_values)-1, "", bool_values, 0 }; + + +template +class Sys_var_on_access: public BASE +{ + using BASE::BASE; + bool on_check_access_global(THD *thd) const override + { + return check_global_access(thd, GLOBAL_PRIV); + } + bool on_check_access_session(THD *thd) const override + { + return check_global_access(thd, SESSION_PRIV); + } +}; + + +template +class Sys_var_on_access_global: public BASE +{ + using BASE::BASE; + bool on_check_access_global(THD *thd) const override + { + return check_global_access(thd, GLOBAL_PRIV); + } +}; + + +template +class Sys_var_on_access_session: public BASE +{ + using BASE::BASE; + bool on_check_access_session(THD *thd) const override + { + return check_global_access(thd, SESSION_PRIV); + } +}; + + +/** + A small wrapper class to pass getopt arguments as a pair + to the Sys_var_* constructors. It improves type safety and helps + to catch errors in the argument order. +*/ +struct CMD_LINE +{ + int id; + enum get_opt_arg_type arg_type; + CMD_LINE(enum get_opt_arg_type getopt_arg_type, int getopt_id=0) + : id(getopt_id), arg_type(getopt_arg_type) {} +}; + +/** + Sys_var_integer template is used to generate Sys_var_* classes + for variables that represent the value as an integer number. + They are Sys_var_uint, Sys_var_ulong, Sys_var_harows, Sys_var_ulonglong, + Sys_var_int. + + An integer variable has a minimal and maximal values, and a "block_size" + (any valid value of the variable must be divisible by the block_size). + + Class specific constructor arguments: min, max, block_size + Backing store: int, uint, ulong, ha_rows, ulonglong, depending on the class +*/ +template +class Sys_var_integer: public sys_var +{ +public: + Sys_var_integer(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + T min_val, T max_val, T def_val, uint block_size, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOWT, def_val, lock, binlog_status_arg, + on_check_func, on_update_func, substitute) + { + option.var_type|= ARGT; + option.min_value= min_val; + option.max_value= max_val; + option.block_size= block_size; + if ((option.u_max_value= (uchar**) max_var_ptr())) + { + *((T*) option.u_max_value)= max_val; + } + + global_var(T)= def_val; + SYSVAR_ASSERT(size == sizeof(T)); + SYSVAR_ASSERT(min_val < max_val); + SYSVAR_ASSERT(min_val <= def_val); + SYSVAR_ASSERT(max_val >= def_val); + SYSVAR_ASSERT(block_size > 0); + SYSVAR_ASSERT(def_val % block_size == 0); + } + bool do_check(THD *thd, set_var *var) + { + my_bool fixed= FALSE, unused; + longlong v= var->value->val_int(); + + if ((ARGT == GET_HA_ROWS) || (ARGT == GET_UINT) || + (ARGT == GET_ULONG) || (ARGT == GET_ULL)) + { + ulonglong uv; + + /* + if the value is signed and negative, + and a variable is unsigned, it is set to zero + */ + if ((fixed= (!var->value->unsigned_flag && v < 0))) + uv= 0; + else + uv= v; + + var->save_result.ulonglong_value= + getopt_ull_limit_value(uv, &option, &unused); + + if (max_var_ptr() && (T)var->save_result.ulonglong_value > get_max_var()) + var->save_result.ulonglong_value= get_max_var(); + + fixed= fixed || var->save_result.ulonglong_value != uv; + } + else + { + /* + if the value is unsigned and has the highest bit set + and a variable is signed, it is set to max signed value + */ + if ((fixed= (var->value->unsigned_flag && v < 0))) + v= LONGLONG_MAX; + + var->save_result.longlong_value= + getopt_ll_limit_value(v, &option, &unused); + + if (max_var_ptr() && (T)var->save_result.longlong_value > get_max_var()) + var->save_result.longlong_value= get_max_var(); + + fixed= fixed || var->save_result.longlong_value != v; + } + return throw_bounds_warning(thd, name.str, fixed, + var->value->unsigned_flag, v); + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, T)= static_cast(var->save_result.ulonglong_value); + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(T)= static_cast(var->save_result.ulonglong_value); + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= (ulonglong)*(T*)global_value_ptr(thd, 0); } + void global_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= option.def_value; } + private: + T get_max_var() { return *((T*) max_var_ptr()); } + const uchar *default_value_ptr(THD *thd) const { return (uchar*) &option.def_value; } +}; + +typedef Sys_var_integer Sys_var_int; +typedef Sys_var_integer Sys_var_uint; +typedef Sys_var_integer Sys_var_ulong; +typedef Sys_var_integer Sys_var_harows; +typedef Sys_var_integer Sys_var_ulonglong; +typedef Sys_var_integer Sys_var_long; + + +template<> const uchar *Sys_var_int::default_value_ptr(THD *thd) const +{ + thd->sys_var_tmp.int_value= (int)option.def_value; + return (uchar*) &thd->sys_var_tmp.int_value; +} + +template<> const uchar *Sys_var_uint::default_value_ptr(THD *thd) const +{ + thd->sys_var_tmp.uint_value= (uint)option.def_value; + return (uchar*) &thd->sys_var_tmp.uint_value; +} + +template<> const uchar *Sys_var_long::default_value_ptr(THD *thd) const +{ + thd->sys_var_tmp.long_value= (long)option.def_value; + return (uchar*) &thd->sys_var_tmp.long_value; +} + +template<> const uchar *Sys_var_ulong::default_value_ptr(THD *thd) const +{ + thd->sys_var_tmp.ulong_value= (ulong)option.def_value; + return (uchar*) &thd->sys_var_tmp.ulong_value; +} + + +/** + Helper class for variables that take values from a TYPELIB +*/ +class Sys_var_typelib: public sys_var +{ +protected: + TYPELIB typelib; + virtual bool check_maximum(THD *thd, set_var *var, + const char *c_val, longlong i_val) + { return FALSE; } +public: + Sys_var_typelib(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, + CMD_LINE getopt, + SHOW_TYPE show_val_type_arg, const char *values[], + ulonglong def_val, PolyLock *lock, + enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func, on_update_function on_update_func, + const char *substitute) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, show_val_type_arg, def_val, lock, + binlog_status_arg, on_check_func, + on_update_func, substitute) + { + for (typelib.count= 0; values[typelib.count]; typelib.count++) /*no-op */; + typelib.name=""; + typelib.type_names= values; + typelib.type_lengths= 0; // only used by Fields_enum and Field_set + option.typelib= &typelib; + } + bool do_check(THD *thd, set_var *var) // works for enums and my_bool + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + + if (var->value->result_type() == STRING_RESULT) + { + if (!(res=var->value->val_str(&str))) + return true; + else + if (!(var->save_result.ulonglong_value= + find_type(&typelib, res->ptr(), res->length(), false))) + return true; + else + var->save_result.ulonglong_value--; + return check_maximum(thd, var, res->ptr(), 0); + } + + longlong tmp=var->value->val_int(); + if (tmp < 0 || tmp >= typelib.count) + return true; + var->save_result.ulonglong_value= tmp; + return check_maximum(thd, var, 0, tmp); + } +}; + +/** + The class for ENUM variables - variables that take one value from a fixed + list of values. + + Class specific constructor arguments: + char* values[] - 0-terminated list of strings of valid values + + Backing store: ulong + + @note + Do *not* use "enum FOO" variables as a backing store, there is no + guarantee that sizeof(enum FOO) == sizeof(uint), there is no guarantee + even that sizeof(enum FOO) == sizeof(enum BAR) +*/ +class Sys_var_enum: public Sys_var_typelib +{ +public: + Sys_var_enum(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *values[], uint def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_typelib(name_arg, comment, flag_args, off, getopt, + SHOW_CHAR, values, def_val, lock, + binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_ENUM; + option.min_value= 0; + option.max_value= ULONG_MAX; + global_var(ulong)= def_val; + if ((option.u_max_value= (uchar**)max_var_ptr())) + { + *((ulong *) option.u_max_value)= ULONG_MAX; + } + SYSVAR_ASSERT(def_val < typelib.count); + SYSVAR_ASSERT(size == sizeof(ulong)); + } + bool check_maximum(THD *thd, set_var *var, + const char *c_val, longlong i_val) + { + if (!max_var_ptr() || + var->save_result.ulonglong_value <= get_max_var()) + return FALSE; + var->save_result.ulonglong_value= get_max_var(); + + return c_val ? throw_bounds_warning(thd, name.str, c_val) : + throw_bounds_warning(thd, name.str, TRUE, + var->value->unsigned_flag, i_val); + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, ulong)= static_cast(var->save_result.ulonglong_value); + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(ulong)= static_cast(var->save_result.ulonglong_value); + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= global_var(ulong); } + void global_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= option.def_value; } + const uchar *valptr(THD *thd, ulong val) const + { return reinterpret_cast(typelib.type_names[val]); } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, ulong)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(ulong)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, (ulong)option.def_value); } + + ulong get_max_var() { return *((ulong *) max_var_ptr()); } +}; + +/** + The class for boolean variables - a variant of ENUM variables + with the fixed list of values of { OFF , ON } + + Backing store: my_bool +*/ +class Sys_var_mybool: public Sys_var_typelib +{ +public: + Sys_var_mybool(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + my_bool def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_typelib(name_arg, comment, flag_args, off, getopt, + SHOW_MY_BOOL, bool_values, def_val, lock, + binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_BOOL; + global_var(my_bool)= def_val; + SYSVAR_ASSERT(def_val < 2); + SYSVAR_ASSERT(getopt.arg_type == OPT_ARG || getopt.id < 0); + SYSVAR_ASSERT(size == sizeof(my_bool)); + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, my_bool)= var->save_result.ulonglong_value != 0; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(my_bool)= var->save_result.ulonglong_value != 0; + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= (ulonglong)*(my_bool *)global_value_ptr(thd, 0); } + void global_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= option.def_value; } + const uchar *default_value_ptr(THD *thd) const + { + thd->sys_var_tmp.my_bool_value=(my_bool) option.def_value; + return (uchar*) &thd->sys_var_tmp.my_bool_value; + } +}; + +/** + The class for string variables. The string can be in character_set_filesystem + or in character_set_system. The string can be allocated with my_malloc() + or not. The state of the initial value is specified in the constructor, + after that it's managed automatically. The value of NULL is supported. + + Backing store: char* + + @note + This class supports only GLOBAL variables, because THD on destruction + does not destroy individual members of SV, there's no way to free + allocated string variables for every thread. +*/ +class Sys_var_charptr_base: public sys_var +{ +public: + Sys_var_charptr_base(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR_PTR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { + /* + use GET_STR_ALLOC - if ALLOCATED it must be *always* allocated, + otherwise (GET_STR) you'll never know whether to free it or not. + (think of an exit because of an error right after my_getopt) + */ + option.var_type|= (flags & ALLOCATED) ? GET_STR_ALLOC : GET_STR; + global_var(const char*)= def_val; + } + void cleanup() + { + if (flags & ALLOCATED) + { + my_free(global_var(char*)); + global_var(char *)= NULL; + } + flags&= ~ALLOCATED; + } + static bool do_string_check(THD *thd, set_var *var, CHARSET_INFO *charset) + { + char buff[STRING_BUFFER_USUAL_SIZE], buff2[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), charset); + String str2(buff2, sizeof(buff2), charset), *res; + + if (!(res=var->value->val_str(&str))) + { + var->save_result.string_value.str= 0; + var->save_result.string_value.length= 0; // safety + } + else + { + uint32 unused; + if (String::needs_conversion(res->length(), res->charset(), + charset, &unused)) + { + uint errors; + str2.copy(res->ptr(), res->length(), res->charset(), charset, + &errors); + res=&str2; + + } + var->save_result.string_value.str= thd->strmake(res->ptr(), res->length()); + var->save_result.string_value.length= res->length(); + } + + return false; + } + bool do_check(THD *thd, set_var *var) + { return do_string_check(thd, var, charset(thd)); } + bool session_update(THD *thd, set_var *var)= 0; + char *global_update_prepare(THD *thd, set_var *var) + { + char *new_val, *ptr= var->save_result.string_value.str; + size_t len=var->save_result.string_value.length; + if (ptr) + { + new_val= (char*)my_memdup(key_memory_Sys_var_charptr_value, + ptr, len+1, MYF(MY_WME)); + if (!new_val) return 0; + new_val[len]=0; + } + else + new_val= 0; + return new_val; + } + void global_update_finish(char *new_val) + { + if (flags & ALLOCATED) + my_free(global_var(char*)); + flags|= ALLOCATED; + global_var(char*)= new_val; + } + bool global_update(THD *thd, set_var *var) + { + char *new_val= global_update_prepare(thd, var); + global_update_finish(new_val); + return (new_val == 0 && var->save_result.string_value.str != 0); + } + void session_save_default(THD *thd, set_var *var)= 0; + void global_save_default(THD *thd, set_var *var) + { + char *ptr= (char*)(intptr)option.def_value; + var->save_result.string_value.str= ptr; + var->save_result.string_value.length= ptr ? strlen(ptr) : 0; + } +}; + +class Sys_var_charptr: public Sys_var_charptr_base +{ +public: + Sys_var_charptr(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) : + Sys_var_charptr_base(name_arg, comment, flag_args, off, size, getopt, + def_val, lock, binlog_status_arg, + on_check_func, on_update_func, substitute) + { + SYSVAR_ASSERT(scope() == GLOBAL); + SYSVAR_ASSERT(size == sizeof(char *)); + } + + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + void session_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(FALSE); } +}; + + +class Sys_var_charptr_fscs: public Sys_var_charptr +{ + using Sys_var_charptr::Sys_var_charptr; +public: + CHARSET_INFO *charset(THD *thd) const override + { + return thd->variables.character_set_filesystem; + } +}; + + +#ifndef EMBEDDED_LIBRARY +class Sys_var_sesvartrack: public Sys_var_charptr_base +{ +public: + Sys_var_sesvartrack(const char *name_arg, + const char *comment, + CMD_LINE getopt, + const char *def_val, PolyLock *lock= 0) : + Sys_var_charptr_base(name_arg, comment, + SESSION_VAR(session_track_system_variables), + getopt, def_val, lock, + VARIABLE_NOT_IN_BINLOG, 0, 0, 0) + {} + bool do_check(THD *thd, set_var *var) + { + if (Sys_var_charptr_base::do_check(thd, var) || + sysvartrack_validate_value(thd, var->save_result.string_value.str, + var->save_result.string_value.length)) + return TRUE; + return FALSE; + } + bool global_update(THD *thd, set_var *var) + { + char *new_val= global_update_prepare(thd, var); + if (new_val) + { + if (sysvartrack_global_update(thd, new_val, + var->save_result.string_value.length)) + { + if (new_val) + my_free(new_val); + new_val= 0; + } + } + global_update_finish(new_val); + return (new_val == 0 && var->save_result.string_value.str != 0); + } + bool session_update(THD *thd, set_var *var) + { return thd->session_tracker.sysvars.update(thd, var); } + void session_save_default(THD *thd, set_var *var) + { + var->save_result.string_value.str= global_var(char*); + var->save_result.string_value.length= + strlen(var->save_result.string_value.str); + /* parse and feel list with default values */ + if (thd) + { +#ifdef DBUG_ASSERT_EXISTS + bool res= +#endif + sysvartrack_validate_value(thd, + var->save_result.string_value.str, + var->save_result.string_value.length); + DBUG_ASSERT(res == 0); + } + } +}; +#endif //EMBEDDED_LIBRARY + + +class Sys_var_proxy_user: public sys_var +{ +public: + Sys_var_proxy_user(const char *name_arg, const char *comment) + : sys_var(&all_sys_vars, name_arg, comment, + sys_var::READONLY+sys_var::ONLY_SESSION, 0, NO_GETOPT, + NO_ARG, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL) + { + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return false; + } + void session_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(FALSE); } + void global_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(FALSE); } +protected: + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + return thd->security_ctx->proxy_user[0] ? + (uchar *) &(thd->security_ctx->proxy_user[0]) : NULL; + } +}; + +class Sys_var_external_user : public Sys_var_proxy_user +{ +public: + Sys_var_external_user(const char *name_arg, const char *comment_arg) + : Sys_var_proxy_user (name_arg, comment_arg) + {} + +protected: + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + return (uchar*)thd->security_ctx->external_user; + } +}; + +class Master_info; +class Sys_var_rpl_filter: public sys_var +{ +private: + int opt_id; + privilege_t m_access_global; + +public: + Sys_var_rpl_filter(const char *name, int getopt_id, const char *comment, + privilege_t access_global) + : sys_var(&all_sys_vars, name, comment, sys_var::GLOBAL, 0, NO_GETOPT, + NO_ARG, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL), opt_id(getopt_id), + m_access_global(access_global) + { + option.var_type|= GET_STR | GET_ASK_ADDR; + } + + bool do_check(THD *thd, set_var *var) override + { + return Sys_var_charptr::do_string_check(thd, var, charset(thd)); + } + void session_save_default(THD *, set_var *) override + { DBUG_ASSERT(FALSE); } + + void global_save_default(THD *thd, set_var *var) override + { + char *ptr= (char*)(intptr)option.def_value; + var->save_result.string_value.str= ptr; + var->save_result.string_value.length= ptr ? strlen(ptr) : 0; + } + + bool session_update(THD *, set_var *) override + { + DBUG_ASSERT(FALSE); + return true; + } + + bool global_update(THD *thd, set_var *var) override; + + bool on_check_access_global(THD *thd) const override + { + return check_global_access(thd, m_access_global); + } + +protected: + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) + const override; + bool set_filter_value(const char *value, Master_info *mi); +}; + +/** + The class for string variables. Useful for strings that aren't necessarily + \0-terminated. Otherwise the same as Sys_var_charptr. + + Backing store: LEX_CSTRING + + @note + Behaves exactly as Sys_var_charptr, only the backing store is different. +*/ +class Sys_var_lexstring: public Sys_var_charptr +{ +public: + Sys_var_lexstring(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_charptr(name_arg, comment, flag_args, off, sizeof(char*), + getopt, def_val, lock, binlog_status_arg, + on_check_func, on_update_func, substitute) + { + global_var(LEX_CSTRING).length= strlen(def_val); + SYSVAR_ASSERT(size == sizeof(LEX_CSTRING)); + *const_cast(&show_val_type)= SHOW_LEX_STRING; + } + bool global_update(THD *thd, set_var *var) + { + if (Sys_var_charptr::global_update(thd, var)) + return true; + global_var(LEX_CSTRING).length= var->save_result.string_value.length; + return false; + } +}; + + +/* + A LEX_CSTRING stored only in thd->variables + Only to be used for small buffers +*/ + +class Sys_var_session_lexstring: public sys_var +{ + size_t max_length; +public: + Sys_var_session_lexstring(const char *name_arg, + const char *comment, int flag_args, + ptrdiff_t off, size_t size, CMD_LINE getopt, + const char *def_val, size_t max_length_arg, + on_check_function on_check_func=0, + on_update_function on_update_func=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + 0, VARIABLE_NOT_IN_BINLOG, on_check_func, on_update_func, + 0),max_length(max_length_arg) + { + option.var_type|= GET_STR; + SYSVAR_ASSERT(scope() == ONLY_SESSION) + *const_cast(&show_val_type)= SHOW_LEX_STRING; + } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + + if (!(res=var->value->val_str(&str))) + { + var->save_result.string_value.str= 0; /* NULL */ + var->save_result.string_value.length= 0; + } + else + { + if (res->length() > max_length) + { + my_error(ER_WRONG_STRING_LENGTH, MYF(0), + res->ptr(), name.str, (int) max_length); + return true; + } + var->save_result.string_value.str= thd->strmake(res->ptr(), + res->length()); + var->save_result.string_value.length= res->length(); + } + return false; + } + bool session_update(THD *thd, set_var *var) + { + LEX_CSTRING *tmp= &session_var(thd, LEX_CSTRING); + tmp->length= var->save_result.string_value.length; + /* Store as \0 terminated string (just to be safe) */ + strmake((char*) tmp->str, var->save_result.string_value.str, tmp->length); + return false; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return false; + } + void session_save_default(THD *thd, set_var *var) + { + char *ptr= (char*)(intptr)option.def_value; + var->save_result.string_value.str= ptr; + var->save_result.string_value.length= strlen(ptr); + } + void global_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(FALSE); + return NULL; + } +}; + + +#ifndef DBUG_OFF +/** + @@session.debug_dbug and @@global.debug_dbug variables. + + @@dbug variable differs from other variables in one aspect: + if its value is not assigned in the session, it "points" to the global + value, and so when the global value is changed, the change + immediately takes effect in the session. + + This semantics is intentional, to be able to debug one session from + another. +*/ +class Sys_var_dbug: public sys_var +{ +public: + Sys_var_dbug(const char *name_arg, + const char *comment, int flag_args, + CMD_LINE getopt, + const char *def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, + (char*)¤t_dbug_option-(char*)&global_system_variables, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { option.var_type|= GET_STR; } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + + if (!(res=var->value->val_str(&str))) + { + var->save_result.string_value.str= const_cast(""); + var->save_result.string_value.length= 0; + } + else + { + size_t len= res->length(); + var->save_result.string_value.str= thd->strmake(res->ptr(), len); + var->save_result.string_value.length= len; + } + return false; + } + bool session_update(THD *thd, set_var *var) + { + const char *val= var->save_result.string_value.str; + if (!var->value) + DBUG_POP(); + else + DBUG_SET(val); + return false; + } + bool global_update(THD *thd, set_var *var) + { + const char *val= var->save_result.string_value.str; + DBUG_SET_INITIAL(val); + return false; + } + void session_save_default(THD *thd, set_var *var) + { } + void global_save_default(THD *thd, set_var *var) + { + char *ptr= (char*)(intptr)option.def_value; + var->save_result.string_value.str= ptr; + var->save_result.string_value.length= safe_strlen(ptr); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + char buf[256]; + DBUG_EXPLAIN(buf, sizeof(buf)); + return (uchar*) thd->strdup(buf); + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + char buf[256]; + DBUG_EXPLAIN_INITIAL(buf, sizeof(buf)); + return (uchar*) thd->strdup(buf); + } + const uchar *default_value_ptr(THD *thd) const + { return (uchar*)""; } +}; +#endif + +#define KEYCACHE_VAR(X) GLOBAL_VAR(dflt_key_cache_var.X) +#define keycache_var_ptr(KC, OFF) (((uchar*)(KC))+(OFF)) +#define keycache_var(KC, OFF) (*(ulonglong*)keycache_var_ptr(KC, OFF)) +typedef bool (*keycache_update_function)(THD *, KEY_CACHE *, ptrdiff_t, ulonglong); + +/** + The class for keycache_* variables. Supports structured names, + keycache_name.variable_name. + + Class specific constructor arguments: + everything derived from Sys_var_ulonglong + + Backing store: ulonglong + + @note these variables can be only GLOBAL +*/ +class Sys_var_keycache: public Sys_var_ulonglong +{ + keycache_update_function keycache_update; +public: + Sys_var_keycache(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + ulonglong min_val, ulonglong max_val, ulonglong def_val, + uint block_size, PolyLock *lock, + enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func, + keycache_update_function on_update_func, + const char *substitute=0) + : Sys_var_ulonglong(name_arg, comment, flag_args, off, size, + getopt, min_val, max_val, def_val, + block_size, lock, binlog_status_arg, on_check_func, 0, + substitute), + keycache_update(on_update_func) + { + option.var_type|= GET_ASK_ADDR; + option.value= (uchar**)1; // crash me, please + // fix an offset from global_system_variables to be an offset in KEY_CACHE + offset= global_var_ptr() - (uchar*)dflt_key_cache; + SYSVAR_ASSERT(scope() == GLOBAL); + } + bool global_update(THD *thd, set_var *var) + { + ulonglong new_value= var->save_result.ulonglong_value; + LEX_CSTRING *base_name= &var->base; + KEY_CACHE *key_cache; + + /* If no basename, assume it's for the key cache named 'default' */ + if (!base_name->length) + base_name= &default_key_cache_base; + + key_cache= get_key_cache(base_name); + + if (!key_cache) + { // Key cache didn't exists */ + if (!new_value) // Tried to delete cache + return false; // Ok, nothing to do + if (!(key_cache= create_key_cache(base_name->str, base_name->length))) + return true; + } + + /** + Abort if some other thread is changing the key cache + @todo This should be changed so that we wait until the previous + assignment is done and then do the new assign + */ + if (key_cache->in_init) + return true; + + return keycache_update(thd, key_cache, offset, new_value); + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + KEY_CACHE *key_cache= get_key_cache(base); + if (!key_cache) + key_cache= &zero_key_cache; + return keycache_var_ptr(key_cache, offset); + } +}; + +static bool update_buffer_size(THD *thd, KEY_CACHE *key_cache, + ptrdiff_t offset, ulonglong new_value) +{ + bool error= false; + DBUG_ASSERT(offset == offsetof(KEY_CACHE, param_buff_size)); + + if (new_value == 0) + { + if (key_cache == dflt_key_cache) + { + my_error(ER_WARN_CANT_DROP_DEFAULT_KEYCACHE, MYF(0)); + return true; + } + + if (key_cache->key_cache_inited) // If initied + { + /* + Move tables using this key cache to the default key cache + and clear the old key cache. + */ + key_cache->in_init= 1; + mysql_mutex_unlock(&LOCK_global_system_variables); + key_cache->param_buff_size= 0; + ha_resize_key_cache(key_cache); + ha_change_key_cache(key_cache, dflt_key_cache); + /* + We don't delete the key cache as some running threads my still be in + the key cache code with a pointer to the deleted (empty) key cache + */ + mysql_mutex_lock(&LOCK_global_system_variables); + key_cache->in_init= 0; + } + return error; + } + + key_cache->param_buff_size= new_value; + + /* If key cache didn't exist initialize it, else resize it */ + key_cache->in_init= 1; + mysql_mutex_unlock(&LOCK_global_system_variables); + + if (!key_cache->key_cache_inited) + error= ha_init_key_cache(0, key_cache, 0); + else + error= ha_resize_key_cache(key_cache); + + mysql_mutex_lock(&LOCK_global_system_variables); + key_cache->in_init= 0; + + return error; +} + +static bool update_keycache(THD *thd, KEY_CACHE *key_cache, + ptrdiff_t offset, ulonglong new_value, + int (*func)(KEY_CACHE *)) +{ + bool error= false; + DBUG_ASSERT(offset != offsetof(KEY_CACHE, param_buff_size)); + + keycache_var(key_cache, offset)= new_value; + + key_cache->in_init= 1; + mysql_mutex_unlock(&LOCK_global_system_variables); + error= func(key_cache); + mysql_mutex_lock(&LOCK_global_system_variables); + key_cache->in_init= 0; + + return error; +} + +static bool resize_keycache(THD *thd, KEY_CACHE *key_cache, + ptrdiff_t offset, ulonglong new_value) +{ + return update_keycache(thd, key_cache, offset, new_value, + ha_resize_key_cache); +} + +static bool change_keycache_param(THD *thd, KEY_CACHE *key_cache, + ptrdiff_t offset, ulonglong new_value) +{ + return update_keycache(thd, key_cache, offset, new_value, + ha_change_key_cache_param); +} + +static bool repartition_keycache(THD *thd, KEY_CACHE *key_cache, + ptrdiff_t offset, ulonglong new_value) +{ + return update_keycache(thd, key_cache, offset, new_value, + ha_repartition_key_cache); +} + + +/** + The class for floating point variables + + Class specific constructor arguments: min, max + + Backing store: double +*/ +class Sys_var_double: public sys_var +{ +public: + Sys_var_double(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + double min_val, double max_val, double def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_DOUBLE, + (longlong) getopt_double2ulonglong(def_val), + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_DOUBLE; + option.min_value= (longlong) getopt_double2ulonglong(min_val); + option.max_value= (longlong) getopt_double2ulonglong(max_val); + global_var(double)= (double)option.def_value; + SYSVAR_ASSERT(min_val < max_val); + SYSVAR_ASSERT(min_val <= def_val); + SYSVAR_ASSERT(max_val >= def_val); + SYSVAR_ASSERT(size == sizeof(double)); + } + bool do_check(THD *thd, set_var *var) + { + my_bool fixed; + double v= var->value->val_real(); + var->save_result.double_value= getopt_double_limit_value(v, &option, &fixed); + + return throw_bounds_warning(thd, name.str, fixed, v); + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, double)= var->save_result.double_value; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(double)= var->save_result.double_value; + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.double_value= global_var(double); } + void global_save_default(THD *thd, set_var *var) + { var->save_result.double_value= getopt_ulonglong2double(option.def_value); } +}; + +/** + The class for the @max_user_connections. + It's derived from Sys_var_uint, but non-standard session value + requires a new class. + + Class specific constructor arguments: + everything derived from Sys_var_uint + + Backing store: uint +*/ +class Sys_var_max_user_conn: public Sys_var_int +{ +public: + Sys_var_max_user_conn(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + int min_val, int max_val, int def_val, + uint block_size, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_int(name_arg, comment, SESSION, off, size, getopt, + min_val, max_val, def_val, block_size, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + if (thd->user_connect && thd->user_connect->user_resources.user_conn) + return (uchar*) &(thd->user_connect->user_resources.user_conn); + return global_value_ptr(thd, base); + } +}; + +/** + The class for flagset variables - a variant of SET that allows in-place + editing (turning on/off individual bits). String representations looks like + a "flag=val,flag=val,...". Example: @@optimizer_switch + + Class specific constructor arguments: + char* values[] - 0-terminated list of strings of valid values + + Backing store: ulonglong + + @note + the last value in the values[] array should + *always* be the string "default". +*/ +class Sys_var_flagset: public Sys_var_typelib +{ +public: + Sys_var_flagset(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *values[], ulonglong def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_typelib(name_arg, comment, flag_args, off, getopt, + SHOW_CHAR, values, def_val, lock, + binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_FLAGSET; + global_var(ulonglong)= def_val; + SYSVAR_ASSERT(typelib.count > 1); + SYSVAR_ASSERT(typelib.count <= 65); + SYSVAR_ASSERT(def_val <= my_set_bits(typelib.count-1)); + SYSVAR_ASSERT(strcmp(values[typelib.count-1], "default") == 0); + SYSVAR_ASSERT(size == sizeof(ulonglong)); + } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + ulonglong default_value, current_value; + if (var->type == OPT_GLOBAL) + { + default_value= option.def_value; + current_value= global_var(ulonglong); + } + else + { + default_value= global_var(ulonglong); + current_value= session_var(thd, ulonglong); + } + + if (var->value->result_type() == STRING_RESULT) + { + if (!(res=var->value->val_str(&str))) + return true; + else + { + char *error; + uint error_len; + + var->save_result.ulonglong_value= + find_set_from_flags(&typelib, + typelib.count, + current_value, + default_value, + res->ptr(), res->length(), + &error, &error_len); + if (unlikely(error)) + { + ErrConvString err(error, error_len, res->charset()); + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name.str, err.ptr()); + return true; + } + } + } + else + { + longlong tmp=var->value->val_int(); + if ((tmp < 0 && ! var->value->unsigned_flag) + || (ulonglong)tmp > my_set_bits(typelib.count)) + return true; + else + var->save_result.ulonglong_value= tmp; + } + + return false; + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, ulonglong)= var->save_result.ulonglong_value; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(ulonglong)= var->save_result.ulonglong_value; + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= global_var(ulonglong); } + void global_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= option.def_value; } + const uchar *valptr(THD *thd, ulonglong val) const + { return (uchar*)flagset_to_string(thd, 0, val, typelib.type_names); } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, ulonglong)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(ulonglong)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, option.def_value); } +}; + +/** + The class for SET variables - variables taking zero or more values + from the given list. Example: @@sql_mode + + Class specific constructor arguments: + char* values[] - 0-terminated list of strings of valid values + + Backing store: ulonglong +*/ + +static const LEX_CSTRING all_clex_str= {STRING_WITH_LEN("all")}; + + +class Sys_var_set: public Sys_var_typelib +{ +public: + Sys_var_set(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *values[], ulonglong def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_typelib(name_arg, comment, flag_args, off, getopt, + SHOW_CHAR, values, def_val, lock, + binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_SET; + option.min_value= 0; + option.max_value= ~0ULL; + global_var(ulonglong)= def_val; + if ((option.u_max_value= (uchar**)max_var_ptr())) + { + *((ulonglong*) option.u_max_value)= ~0ULL; + } + SYSVAR_ASSERT(typelib.count > 0); + SYSVAR_ASSERT(typelib.count <= 64); + SYSVAR_ASSERT(def_val <= my_set_bits(typelib.count)); + SYSVAR_ASSERT(size == sizeof(ulonglong)); + } + bool check_maximum(THD *thd, set_var *var, + const char *c_val, longlong i_val) + { + if (!max_var_ptr() || + (var->save_result.ulonglong_value & ~(get_max_var())) == 0) + return FALSE; + var->save_result.ulonglong_value&= get_max_var(); + + return c_val ? throw_bounds_warning(thd, name.str, c_val) : + throw_bounds_warning(thd, name.str, TRUE, + var->value->unsigned_flag, i_val); + } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + + if (var->value->result_type() == STRING_RESULT) + { + char *error; + uint error_len; + bool not_used; + + if (!(res= var->value->val_str_ascii_revert_empty_string_is_null(thd, + &str))) + return true; + + var->save_result.ulonglong_value= + find_set(&typelib, res->ptr(), res->length(), NULL, + &error, &error_len, ¬_used); + if (error_len && + !my_charset_latin1.strnncollsp(res->to_lex_cstring(), all_clex_str)) + { + var->save_result.ulonglong_value= ((1ULL << (typelib.count)) -1); + error_len= 0; + } + /* + note, we only issue an error if error_len > 0. + That is even while empty (zero-length) values are considered + errors by find_set(), these errors are ignored here + */ + if (error_len) + { + ErrConvString err(error, error_len, res->charset()); + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name.str, err.ptr()); + return true; + } + return check_maximum(thd, var, res->ptr(), 0); + } + + longlong tmp=var->value->val_int(); + if ((tmp < 0 && ! var->value->unsigned_flag) + || (ulonglong)tmp > my_set_bits(typelib.count)) + return true; + + var->save_result.ulonglong_value= tmp; + return check_maximum(thd, var, 0, tmp); + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, ulonglong)= var->save_result.ulonglong_value; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(ulonglong)= var->save_result.ulonglong_value; + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= global_var(ulonglong); } + void global_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= option.def_value; } + const uchar *valptr(THD *thd, ulonglong val) const + { return reinterpret_cast(set_to_string(thd, 0, val, typelib.type_names)); } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, ulonglong)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(ulonglong)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, option.def_value); } + + ulonglong get_max_var() { return *((ulonglong*) max_var_ptr()); } +}; + +/** + The class for variables which value is a plugin. + Example: @@default_storage_engine + + Class specific constructor arguments: + int plugin_type_arg (for example MYSQL_STORAGE_ENGINE_PLUGIN) + + Backing store: plugin_ref + + @note + these variables don't support command-line equivalents, any such + command-line options should be added manually to my_long_options in mysqld.cc +*/ +class Sys_var_plugin: public sys_var +{ + int plugin_type; +public: + Sys_var_plugin(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + int plugin_type_arg, char **def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute), + plugin_type(plugin_type_arg) + { + option.var_type|= GET_STR; + SYSVAR_ASSERT(size == sizeof(plugin_ref)); + SYSVAR_ASSERT(getopt.id < 0); // force NO_CMD_LINE + } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff,sizeof(buff), system_charset_info), *res; + if (!(res=var->value->val_str(&str))) + var->save_result.plugin= NULL; + else + { + const LEX_CSTRING pname= { const_cast(res->ptr()), res->length() }; + plugin_ref plugin; + + // special code for storage engines (e.g. to handle historical aliases) + if (plugin_type == MYSQL_STORAGE_ENGINE_PLUGIN) + plugin= ha_resolve_by_name(thd, &pname, false); + else + plugin= my_plugin_lock_by_name(thd, &pname, plugin_type); + if (unlikely(!plugin)) + { + // historically different error code + if (plugin_type == MYSQL_STORAGE_ENGINE_PLUGIN) + { + ErrConvString err(res); + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), err.ptr()); + } + return true; + } + var->save_result.plugin= plugin; + } + return false; + } + void do_update(plugin_ref *valptr, plugin_ref newval) + { + plugin_ref oldval= *valptr; + if (oldval != newval) + { + *valptr= newval ? my_plugin_lock(NULL, newval) : 0; + plugin_unlock(NULL, oldval); + } + } + bool session_update(THD *thd, set_var *var) + { + do_update((plugin_ref*)session_var_ptr(thd), + var->save_result.plugin); + return false; + } + bool global_update(THD *thd, set_var *var) + { + do_update((plugin_ref*)global_var_ptr(), + var->save_result.plugin); + return false; + } + void session_save_default(THD *thd, set_var *var) + { + plugin_ref plugin= global_var(plugin_ref); + var->save_result.plugin= plugin ? my_plugin_lock(thd, plugin) : 0; + } + plugin_ref get_default(THD *thd) const + { + char *default_value= *reinterpret_cast(option.def_value); + if (!default_value) + return 0; + + LEX_CSTRING pname= { default_value, strlen(default_value) }; + plugin_ref plugin; + + if (plugin_type == MYSQL_STORAGE_ENGINE_PLUGIN) + plugin= ha_resolve_by_name(thd, &pname, false); + else + plugin= my_plugin_lock_by_name(thd, &pname, plugin_type); + DBUG_ASSERT(plugin); + return my_plugin_lock(thd, plugin); + } + + void global_save_default(THD *thd, set_var *var) + { + var->save_result.plugin= get_default(thd); + } + + uchar *valptr(THD *thd, plugin_ref plugin) const + { + return (uchar*)(plugin ? thd->strmake(plugin_name(plugin)->str, + plugin_name(plugin)->length) : 0); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, plugin_ref)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(plugin_ref)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, get_default(thd)); } +}; + +/** + Class for variables that containg a list of plugins. + Currently this is used only for @@gtid_pos_auto_create_engines + + Backing store: plugin_ref + + @note + Currently this is only used for storage engine type plugins, and thus only + storage engine type plugin is implemented. It could be extended to other + plugin types later if needed, similar to Sys_var_plugin. + + These variables don't support command-line equivalents, any such + command-line options should be added manually to my_long_options in mysqld.cc + + Note on lifetimes of resources allocated: We allocate a zero-terminated array + of plugin_ref*, and lock the contained plugins. The list in the global + variable must be freed (with free_engine_list()). However, the way Sys_var + works, there is no place to explicitly free other lists, like the one + returned from get_default(). + + Therefore, the code needs to work with temporary lists, which are + registered in the THD to be automatically freed (and plugins similarly + automatically unlocked). This is why do_check() allocates a temporary + list, from which do_update() then makes a permanent copy. +*/ +class Sys_var_pluginlist: public sys_var +{ +public: + Sys_var_pluginlist(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + char **def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_STR; + SYSVAR_ASSERT(size == sizeof(plugin_ref)); + SYSVAR_ASSERT(getopt.id < 0); // force NO_CMD_LINE + } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff,sizeof(buff), system_charset_info), *res; + plugin_ref *plugins; + + if (!(res=var->value->val_str(&str))) + plugins= resolve_engine_list(thd, "", 0, true, true); + else + plugins= resolve_engine_list(thd, res->ptr(), res->length(), true, true); + if (!plugins) + return true; + var->save_result.plugins= plugins; + return false; + } + void do_update(plugin_ref **valptr, plugin_ref* newval) + { + plugin_ref *oldval= *valptr; + *valptr= copy_engine_list(newval); + free_engine_list(oldval); + } + bool session_update(THD *thd, set_var *var) + { + do_update((plugin_ref**)session_var_ptr(thd), + var->save_result.plugins); + return false; + } + bool global_update(THD *thd, set_var *var) + { + do_update((plugin_ref**)global_var_ptr(), + var->save_result.plugins); + return false; + } + void session_save_default(THD *thd, set_var *var) + { + plugin_ref* plugins= global_var(plugin_ref *); + var->save_result.plugins= plugins ? temp_copy_engine_list(thd, plugins) : 0; + } + plugin_ref *get_default(THD *thd) const + { + char *default_value= *reinterpret_cast(option.def_value); + if (!default_value) + return 0; + return resolve_engine_list(thd, default_value, strlen(default_value), + false, true); + } + + void global_save_default(THD *thd, set_var *var) + { + var->save_result.plugins= get_default(thd); + } + + uchar *valptr(THD *thd, plugin_ref *plugins) const + { + return reinterpret_cast(pretty_print_engine_list(thd, plugins)); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, plugin_ref*)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(plugin_ref*)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, get_default(thd)); } +}; + +#if defined(ENABLED_DEBUG_SYNC) + +#include "debug_sync.h" + +/** + The class for @@debug_sync session-only variable +*/ +class Sys_var_debug_sync :public sys_var +{ +public: + Sys_var_debug_sync(const char *name_arg, + const char *comment, int flag_args, + CMD_LINE getopt, + const char *def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, 0, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { + SYSVAR_ASSERT(scope() == ONLY_SESSION); + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) + { + char buff[STRING_BUFFER_USUAL_SIZE]; + String str(buff, sizeof(buff), system_charset_info), *res; + + if (!(res=var->value->val_str(&str))) + var->save_result.string_value= empty_lex_str; + else + { + if (!thd->make_lex_string(&var->save_result.string_value, + res->ptr(), res->length())) + return true; + } + return false; + } + bool session_update(THD *thd, set_var *var) + { + return debug_sync_update(thd, var->save_result.string_value.str, + var->save_result.string_value.length); + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + void session_save_default(THD *thd, set_var *var) + { + var->save_result.string_value.str= const_cast(""); + var->save_result.string_value.length= 0; + } + void global_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + return debug_sync_value_ptr(thd); + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(FALSE); + return 0; + } + const uchar *default_value_ptr(THD *thd) const + { return (uchar*)""; } +}; +#endif /* defined(ENABLED_DEBUG_SYNC) */ + + +/** + The class for bit variables - a variant of boolean that stores the value + in a bit. + + Class specific constructor arguments: + ulonglong bitmask_arg - the mask for the bit to set in the ulonglong + backing store + + Backing store: ulonglong + + @note + This class supports the "reverse" semantics, when the value of the bit + being 0 corresponds to the value of variable being set. To activate it + use REVERSE(bitmask) instead of simply bitmask in the constructor. + + @note + variables of this class cannot be set from the command line as + my_getopt does not support bits. +*/ +class Sys_var_bit: public Sys_var_typelib +{ + ulonglong bitmask; + bool reverse_semantics; + void set(uchar *ptr, ulonglong value) + { + if ((value != 0) ^ reverse_semantics) + (*(ulonglong *)ptr)|= bitmask; + else + (*(ulonglong *)ptr)&= ~bitmask; + } +public: + Sys_var_bit(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + ulonglong bitmask_arg, my_bool def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : Sys_var_typelib(name_arg, comment, flag_args, off, getopt, + SHOW_MY_BOOL, bool_values, def_val, lock, + binlog_status_arg, on_check_func, on_update_func, + substitute) + { + option.var_type|= GET_BIT; + reverse_semantics= my_count_bits(bitmask_arg) > 1; + bitmask= reverse_semantics ? ~bitmask_arg : bitmask_arg; + option.block_size= reverse_semantics ? -(long) bitmask : (long)bitmask; + set(global_var_ptr(), def_val); + SYSVAR_ASSERT(def_val < 2); + SYSVAR_ASSERT(size == sizeof(ulonglong)); + } + bool session_update(THD *thd, set_var *var) + { + set(session_var_ptr(thd), var->save_result.ulonglong_value); + return false; + } + bool global_update(THD *thd, set_var *var) + { + set(global_var_ptr(), var->save_result.ulonglong_value); + return false; + } + void session_save_default(THD *thd, set_var *var) + { + var->save_result.ulonglong_value= + (reverse_semantics == !(global_var(ulonglong) & bitmask)); + } + void global_save_default(THD *thd, set_var *var) + { var->save_result.ulonglong_value= option.def_value; } + + uchar *valptr(THD *thd, ulonglong val) const + { + thd->sys_var_tmp.my_bool_value= (reverse_semantics == !(val & bitmask)); + return (uchar*) &thd->sys_var_tmp.my_bool_value; + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, ulonglong)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(ulonglong)); } + const uchar *default_value_ptr(THD *thd) const + { + thd->sys_var_tmp.my_bool_value= option.def_value != 0; + return (uchar*) &thd->sys_var_tmp.my_bool_value; + } +}; + +/** + The class for variables that have a special meaning for a session, + such as @@timestamp or @@rnd_seed1, their values typically cannot be read + from SV structure, and a special "read" callback is provided. + + Class specific constructor arguments: + everything derived from Sys_var_ulonglong + session_special_read_function read_func_arg + + Backing store: ulonglong + + @note + These variables are session-only, global or command-line equivalents + are not supported as they're generally meaningless. +*/ +class Sys_var_session_special: public Sys_var_ulonglong +{ + typedef bool (*session_special_update_function)(THD *thd, set_var *var); + typedef ulonglong (*session_special_read_function)(THD *thd); + + session_special_read_function read_func; + session_special_update_function update_func; +public: + Sys_var_session_special(const char *name_arg, + const char *comment, int flag_args, + CMD_LINE getopt, + ulonglong min_val, ulonglong max_val, uint block_size, + PolyLock *lock, enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func, + session_special_update_function update_func_arg, + session_special_read_function read_func_arg, + const char *substitute=0) + : Sys_var_ulonglong(name_arg, comment, flag_args, 0, + sizeof(ulonglong), getopt, min_val, + max_val, 0, block_size, lock, binlog_status_arg, on_check_func, 0, + substitute), + read_func(read_func_arg), update_func(update_func_arg) + { + SYSVAR_ASSERT(scope() == ONLY_SESSION); + SYSVAR_ASSERT(getopt.id < 0); // NO_CMD_LINE, because the offset is fake + } + bool session_update(THD *thd, set_var *var) + { return update_func(thd, var); } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + void session_save_default(THD *thd, set_var *var) + { var->value= 0; } + void global_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(FALSE); } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + thd->sys_var_tmp.ulonglong_value= read_func(thd); + return (uchar*) &thd->sys_var_tmp.ulonglong_value; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(FALSE); + return 0; + } + const uchar *default_value_ptr(THD *thd) const + { + thd->sys_var_tmp.ulonglong_value= 0; + return (uchar*) &thd->sys_var_tmp.ulonglong_value; + } +}; + + +/* + Dedicated class because of a weird behavior of a default value. + Assigning timestamp to itself + + SET @@timestamp = @@timestamp + + make it non-default and stops the time flow. +*/ +class Sys_var_timestamp: public Sys_var_double +{ +public: + Sys_var_timestamp(const char *name_arg, + const char *comment, int flag_args, + CMD_LINE getopt, + double min_val, double max_val, + PolyLock *lock, enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func=0) + : Sys_var_double(name_arg, comment, flag_args, 0, + sizeof(double), getopt, min_val, + max_val, 0, lock, binlog_status_arg, on_check_func) + { + SYSVAR_ASSERT(scope() == ONLY_SESSION); + SYSVAR_ASSERT(getopt.id < 0); // NO_CMD_LINE, because the offset is fake + } + bool session_update(THD *thd, set_var *var) + { + if (var->value) + { + my_hrtime_t hrtime = { hrtime_from_time(var->save_result.double_value) }; + thd->set_time(hrtime); + } + else // SET timestamp=DEFAULT + thd->user_time.val= 0; + return false; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + bool session_is_default(THD *thd) + { + return thd->user_time.val == 0; + } + void session_save_default(THD *thd, set_var *var) + { var->value= 0; } + void global_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(FALSE); } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + thd->sys_var_tmp.double_value= thd->start_time + + thd->start_time_sec_part/(double)TIME_SECOND_PART_FACTOR; + return (uchar*) &thd->sys_var_tmp.double_value; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(FALSE); + return 0; + } + const uchar *default_value_ptr(THD *thd) const + { + thd->sys_var_tmp.double_value= 0; + return (uchar*) &thd->sys_var_tmp.double_value; + } + bool on_check_access_session(THD *thd) const; +}; + + +/** + The class for read-only variables that show whether a particular + feature is supported by the server. Example: have_compression + + Backing store: enum SHOW_COMP_OPTION + + @note + These variables are necessarily read-only, only global, and have no + command-line equivalent. +*/ +class Sys_var_have: public sys_var +{ +public: + Sys_var_have(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, 0, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { + SYSVAR_ASSERT(scope() == GLOBAL); + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(lock == 0); + SYSVAR_ASSERT(binlog_status_arg == VARIABLE_NOT_IN_BINLOG); + SYSVAR_ASSERT(is_readonly()); + SYSVAR_ASSERT(on_update == 0); + SYSVAR_ASSERT(size == sizeof(enum SHOW_COMP_OPTION)); + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) { + DBUG_ASSERT(FALSE); + return true; + } + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(FALSE); + return true; + } + void session_save_default(THD *thd, set_var *var) { } + void global_save_default(THD *thd, set_var *var) { } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(FALSE); + return 0; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + return (uchar*)show_comp_option_name[global_var(enum SHOW_COMP_OPTION)]; + } +}; + +/** + Generic class for variables for storing entities that are internally + represented as structures, have names, and possibly can be referred to by + numbers. Examples: character sets, collations, locales, + + Class specific constructor arguments: + ptrdiff_t name_offset - offset of the 'name' field in the structure + + Backing store: void* + + @note + As every such a structure requires special treatment from my_getopt, + these variables don't support command-line equivalents, any such + command-line options should be added manually to my_long_options in mysqld.cc +*/ +class Sys_var_struct: public sys_var +{ + ptrdiff_t name_offset; // offset to the 'name' property in the structure +public: + Sys_var_struct(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + ptrdiff_t name_off, void *def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute), + name_offset(name_off) + { + option.var_type|= GET_ENUM; // because we accept INT and STRING here + /* + struct variables are special on the command line - often (e.g. for + charsets) the name cannot be immediately resolved, but only after all + options (in particular, basedir) are parsed. + + thus all struct command-line options should be added manually + to my_long_options in mysqld.cc + */ + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(size == sizeof(void *)); + } + bool do_check(THD *thd, set_var *var) + { return false; } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, const void*)= var->save_result.ptr; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(const void*)= var->save_result.ptr; + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.ptr= global_var(void*); } + void global_save_default(THD *thd, set_var *var) + { + void **default_value= reinterpret_cast(option.def_value); + var->save_result.ptr= *default_value; + } + uchar *valptr(THD *thd, uchar *val) const + { return val ? *(uchar**)(val+name_offset) : 0; } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, session_var(thd, uchar*)); } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(uchar*)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, *(uchar**)option.def_value); } +}; + +/** + The class for variables that store time zones + + Backing store: Time_zone* + + @note + Time zones cannot be supported directly by my_getopt, thus + these variables don't support command-line equivalents, any such + command-line options should be added manually to my_long_options in mysqld.cc +*/ +class Sys_var_tz: public sys_var +{ +public: + Sys_var_tz(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + Time_zone **def_val, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, (intptr)def_val, + lock, binlog_status_arg, on_check_func, on_update_func, + substitute) + { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(size == sizeof(Time_zone *)); + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) + { + char buff[MAX_TIME_ZONE_NAME_LENGTH]; + String str(buff, sizeof(buff), &my_charset_latin1); + String *res= var->value->val_str(&str); + + if (!res) + return true; + + if (!(var->save_result.time_zone= my_tz_find(thd, res))) + { + ErrConvString err(res); + my_error(ER_UNKNOWN_TIME_ZONE, MYF(0), err.ptr()); + return true; + } + return false; + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, Time_zone*)= var->save_result.time_zone; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(Time_zone*)= var->save_result.time_zone; + return false; + } + void session_save_default(THD *thd, set_var *var) + { + var->save_result.time_zone= global_var(Time_zone*); + } + void global_save_default(THD *thd, set_var *var) + { + var->save_result.time_zone= + *(Time_zone**)(intptr)option.def_value; + } + const uchar *valptr(THD *thd, Time_zone *val) const + { return reinterpret_cast(val->get_name()->ptr()); } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + /* + This is an ugly fix for replication: we don't replicate properly queries + invoking system variables' values to update tables; but + CONVERT_TZ(,,@@session.time_zone) is so popular that we make it + replicable (i.e. we tell the binlog code to store the session + timezone). If it's the global value which was used we can't replicate + (binlog code stores session value only). + */ + thd->used|= THD::TIME_ZONE_USED; + return valptr(thd, session_var(thd, Time_zone *)); + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return valptr(thd, global_var(Time_zone*)); } + const uchar *default_value_ptr(THD *thd) const + { return valptr(thd, *(Time_zone**)option.def_value); } +}; + +/** + Special implementation for transaction isolation, that + distingushes between + + SET GLOBAL TRANSACTION ISOLATION (stored in global_system_variables) + SET SESSION TRANSACTION ISOLATION (stored in thd->variables) + SET TRANSACTION ISOLATION (stored in thd->tx_isolation) + + where the last statement sets isolation level for the next transaction only +*/ +class Sys_var_tx_isolation: public Sys_var_enum +{ +public: + Sys_var_tx_isolation(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *values[], uint def_val, PolyLock *lock, + enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func) + :Sys_var_enum(name_arg, comment, flag_args, off, size, getopt, + values, def_val, lock, binlog_status_arg, on_check_func) + {} + bool session_update(THD *thd, set_var *var) + { + if (var->type == OPT_SESSION && Sys_var_enum::session_update(thd, var)) + return TRUE; + if (var->type == OPT_DEFAULT || !thd->in_active_multi_stmt_transaction()) + { + thd->tx_isolation= (enum_tx_isolation) var->save_result.ulonglong_value; + +#ifndef EMBEDDED_LIBRARY + if (var->type == OPT_DEFAULT) + { + enum enum_tx_isol_level l; + switch (thd->tx_isolation) { + case ISO_READ_UNCOMMITTED: + l= TX_ISOL_UNCOMMITTED; + break; + case ISO_READ_COMMITTED: + l= TX_ISOL_COMMITTED; + break; + case ISO_REPEATABLE_READ: + l= TX_ISOL_REPEATABLE; + break; + case ISO_SERIALIZABLE: + l= TX_ISOL_SERIALIZABLE; + break; + default: + DBUG_ASSERT(0); + return TRUE; + } + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.set_isol_level(thd, l); + } + else if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.set_isol_level(thd, TX_ISOL_INHERIT); +#endif //EMBEDDED_LIBRARY + } + return FALSE; + } +}; + + +/** + Class representing the tx_read_only system variable for setting + default transaction access mode. + + Note that there is a special syntax - SET TRANSACTION READ ONLY + (or READ WRITE) that sets the access mode for the next transaction + only. +*/ + +class Sys_var_tx_read_only: public Sys_var_mybool +{ +public: + Sys_var_tx_read_only(const char *name_arg, const char *comment, int flag_args, + ptrdiff_t off, size_t size, CMD_LINE getopt, + my_bool def_val, PolyLock *lock, + enum binlog_status_enum binlog_status_arg, + on_check_function on_check_func) + :Sys_var_mybool(name_arg, comment, flag_args, off, size, getopt, + def_val, lock, binlog_status_arg, on_check_func) + {} + virtual bool session_update(THD *thd, set_var *var); +}; + +/* + Class for replicate_events_marked_for_skip. + We need a custom update function that ensures the slave is stopped when + the update is happening. +*/ +class Sys_var_replicate_events_marked_for_skip: public Sys_var_enum +{ +public: + Sys_var_replicate_events_marked_for_skip(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + const char *values[], uint def_val, PolyLock *lock= 0, + enum binlog_status_enum binlog_status_arg= VARIABLE_NOT_IN_BINLOG) + :Sys_var_enum(name_arg, comment, flag_args, off, size, getopt, + values, def_val, lock, binlog_status_arg) + {} + bool global_update(THD *thd, set_var *var); +}; + +/* + Class for handing multi-source replication variables + Variable values are store in Master_info, but to make it possible to + access variable without locks we also store it thd->variables. + These can be used as GLOBAL or SESSION, but both points to the same + variable. This is to make things compatible with MySQL 5.5 where variables + like sql_slave_skip_counter are GLOBAL. +*/ + +#define MASTER_INFO_VAR(X) my_offsetof(Master_info, X), sizeof(((Master_info *)0x10)->X) +class Sys_var_multi_source_ulonglong; +class Master_info; + +typedef bool (*on_multi_source_update_function)(sys_var *self, THD *thd, + Master_info *mi); +bool update_multi_source_variable(sys_var *self, + THD *thd, enum_var_type type); + + +class Sys_var_multi_source_ulonglong :public Sys_var_ulonglong +{ + ptrdiff_t master_info_offset; + on_multi_source_update_function update_multi_source_variable_func; +public: + Sys_var_multi_source_ulonglong(const char *name_arg, + const char *comment, int flag_args, + ptrdiff_t off, size_t size, + CMD_LINE getopt, + ptrdiff_t master_info_offset_arg, + size_t master_info_arg_size, + ulonglong min_val, ulonglong max_val, + ulonglong def_val, uint block_size, + on_multi_source_update_function on_update_func) + :Sys_var_ulonglong(name_arg, comment, flag_args, off, size, + getopt, min_val, max_val, def_val, block_size, + 0, VARIABLE_NOT_IN_BINLOG, 0, update_multi_source_variable), + master_info_offset(master_info_offset_arg), + update_multi_source_variable_func(on_update_func) + { + SYSVAR_ASSERT(master_info_arg_size == size); + } + bool global_update(THD *thd, set_var *var) + { + return session_update(thd, var); + } + void session_save_default(THD *thd, set_var *var) + { + /* Use value given in variable declaration */ + global_save_default(thd, var); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + ulonglong *tmp, res; + tmp= (ulonglong*) (((uchar*)&(thd->variables)) + offset); + res= get_master_info_ulonglong_value(thd, master_info_offset); + *tmp= res; + return (uchar*) tmp; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + return session_value_ptr(thd, base); + } + ulonglong get_master_info_ulonglong_value(THD *thd, ptrdiff_t offset) const; + bool update_variable(THD *thd, Master_info *mi) + { + return update_multi_source_variable_func(this, thd, mi); + } +}; + + +/** + Class for @@global.gtid_current_pos. +*/ +class Sys_var_gtid_current_pos: public sys_var +{ +public: + Sys_var_gtid_current_pos(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL) + { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(is_readonly()); + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + void session_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + void global_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(false); + return NULL; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const; +}; + + +/** + Class for @@global.gtid_binlog_pos. +*/ +class Sys_var_gtid_binlog_pos: public sys_var +{ +public: + Sys_var_gtid_binlog_pos(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL) + { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(is_readonly()); + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + void session_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + void global_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(false); + return NULL; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const; +}; + + +/** + Class for @@global.gtid_slave_pos. +*/ +class Sys_var_gtid_slave_pos: public sys_var +{ +public: + Sys_var_gtid_slave_pos(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL) + { + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var); + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool global_update(THD *thd, set_var *var); + void session_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + void global_save_default(THD *thd, set_var *var) + { + /* Record the attempt to use default so we can error. */ + var->value= 0; + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(false); + return NULL; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const; + const uchar *default_value_ptr(THD *thd) const + { return 0; } + bool on_check_access_global(THD *thd) const + { + return check_global_access(thd, PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_SLAVE_POS); + } +}; + + +/** + Class for @@global.gtid_binlog_state. +*/ +class Sys_var_gtid_binlog_state: public sys_var +{ +public: + Sys_var_gtid_binlog_state(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, + getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL) + { + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var); + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool global_update(THD *thd, set_var *var); + void session_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + void global_save_default(THD *thd, set_var *var) + { + /* Record the attempt to use default so we can error. */ + var->value= 0; + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(false); + return NULL; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const; + const uchar *default_value_ptr(THD *thd) const + { return 0; } + bool on_check_access_global(THD *thd) const + { + return + check_global_access(thd, PRIV_SET_SYSTEM_GLOBAL_VAR_GTID_BINLOG_STATE); + } +}; + + +/** + Class for @@session.last_gtid. +*/ +class Sys_var_last_gtid: public sys_var +{ +public: + Sys_var_last_gtid(const char *name_arg, + const char *comment, int flag_args, CMD_LINE getopt) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, 0, getopt.id, + getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, + NULL, NULL, NULL) + { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(is_readonly()); + option.var_type|= GET_STR; + } + bool do_check(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool session_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + bool global_update(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + return true; + } + void session_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + void global_save_default(THD *thd, set_var *var) + { + DBUG_ASSERT(false); + } + const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const; + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + DBUG_ASSERT(false); + return NULL; + } +}; + + +/** + Class for connection_name.slave_parallel_mode. +*/ +class Sys_var_slave_parallel_mode: public Sys_var_enum +{ +public: + Sys_var_slave_parallel_mode(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, const char *values[], + enum_slave_parallel_mode def_val) + : Sys_var_enum(name_arg, comment, flag_args, off, size, + getopt, values, def_val) + { + option.var_type|= GET_ASK_ADDR; + option.value= (uchar**)1; // crash me, please + SYSVAR_ASSERT(scope() == GLOBAL); + } + bool global_update(THD *thd, set_var *var); + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const; +}; + + +class Sys_var_vers_asof: public sys_var +{ +public: + Sys_var_vers_asof(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, uint def_val, + PolyLock *lock= NO_MUTEX_GUARD, + binlog_status_enum binlog_status_arg= VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func= NULL, + on_update_function on_update_func= NULL, + const char *substitute= NULL) + : sys_var(&all_sys_vars, name_arg, comment, flag_args, off, + getopt.id, getopt.arg_type, SHOW_CHAR, def_val, lock, + binlog_status_arg, on_check_func, on_update_func, substitute) + { + option.var_type= GET_STR; + } + virtual bool do_check(THD *thd, set_var *var) + { + if (!var->value) + return false; + + MYSQL_TIME ltime; + Datetime::Options opt(TIME_CONV_NONE | + TIME_NO_ZERO_IN_DATE | + TIME_NO_ZERO_DATE, thd); + bool res= var->value->get_date(thd, <ime, opt); + if (!res) + { + uint error; + var->save_result.timestamp.unix_time= + thd->variables.time_zone->TIME_to_gmt_sec(<ime, &error); + var->save_result.timestamp.second_part= ltime.second_part; + res= error != 0; + } + return res; + } + +private: + static bool update(THD *thd, set_var *var, vers_asof_timestamp_t *out) + { + if (var->value) + { + out->type = SYSTEM_TIME_AS_OF; + out->unix_time = var->save_result.timestamp.unix_time; + out->second_part= var->save_result.timestamp.second_part; + } + return 0; + } + + static void save_default(set_var *var, vers_asof_timestamp_t *out) + { + out->type= SYSTEM_TIME_UNSPECIFIED; + } + +public: + virtual bool global_update(THD *thd, set_var *var) + { + return update(thd, var, &global_var(vers_asof_timestamp_t)); + } + virtual bool session_update(THD *thd, set_var *var) + { + return update(thd, var, &session_var(thd, vers_asof_timestamp_t)); + } + + virtual bool session_is_default(THD *thd) + { + const vers_asof_timestamp_t &var= session_var(thd, vers_asof_timestamp_t); + return var.type == SYSTEM_TIME_UNSPECIFIED; + } + + virtual void session_save_default(THD *thd, set_var *var) + { + save_default(var, &session_var(thd, vers_asof_timestamp_t)); + } + virtual void global_save_default(THD *thd, set_var *var) + { + save_default(var, &global_var(vers_asof_timestamp_t)); + } + +private: + const uchar *value_ptr(THD *thd, vers_asof_timestamp_t &val) const + { + const char *value; + switch (val.type) + { + case SYSTEM_TIME_UNSPECIFIED: + return (uchar*)"DEFAULT"; + break; + case SYSTEM_TIME_AS_OF: + { + char *buf= (char*) thd->alloc(MAX_DATE_STRING_REP_LENGTH); + MYSQL_TIME ltime; + + thd->variables.time_zone->gmt_sec_to_TIME(<ime, val.unix_time); + ltime.second_part= val.second_part; + + value= buf; + if (buf && !my_datetime_to_str(<ime, buf, 6)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name.str, "NULL (wrong datetime)"); + value= thd->strdup("Error: wrong datetime"); + } + break; + } + default: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name.str, "NULL (wrong range type)"); + value= thd->strdup("Error: wrong range type"); + } + return reinterpret_cast(value); + } + +public: + virtual const uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return value_ptr(thd, session_var(thd, vers_asof_timestamp_t)); } + virtual const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { return value_ptr(thd, global_var(vers_asof_timestamp_t)); } +}; diff --git a/sql/sys_vars_shared.h b/sql/sys_vars_shared.h new file mode 100644 index 00000000..508a0a70 --- /dev/null +++ b/sql/sys_vars_shared.h @@ -0,0 +1,87 @@ +#ifndef SYS_VARS_SHARED_INCLUDED +#define SYS_VARS_SHARED_INCLUDED + +/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + "protected" interface to sys_var - server configuration variables. + + This header is included by files implementing support and utility + functions of sys_var's (set_var.cc) and files implementing + classes in the sys_var hierarchy (sql_plugin.cc) +*/ + +#include +#include "set_var.h" + +extern bool throw_bounds_warning(THD *thd, const char *name,const char *v); +extern bool throw_bounds_warning(THD *thd, const char *name, + bool fixed, bool is_unsigned, longlong v); +extern bool throw_bounds_warning(THD *thd, const char *name, bool fixed, + double v); +extern sys_var *intern_find_sys_var(const char *str, size_t length); + +extern sys_var_chain all_sys_vars; + +/** wrapper to hide a mutex and an rwlock under a common interface */ +class PolyLock +{ +public: + virtual void rdlock()= 0; + virtual void wrlock()= 0; + virtual void unlock()= 0; + virtual ~PolyLock() = default; +}; + +class PolyLock_mutex: public PolyLock +{ + mysql_mutex_t *mutex; +public: + PolyLock_mutex(mysql_mutex_t *arg): mutex(arg) {} + void rdlock() { mysql_mutex_lock(mutex); } + void wrlock() { mysql_mutex_lock(mutex); } + void unlock() { mysql_mutex_unlock(mutex); } +}; + +class PolyLock_rwlock: public PolyLock +{ + mysql_rwlock_t *rwlock; +public: + PolyLock_rwlock(mysql_rwlock_t *arg): rwlock(arg) {} + void rdlock() { mysql_rwlock_rdlock(rwlock); } + void wrlock() { mysql_rwlock_wrlock(rwlock); } + void unlock() { mysql_rwlock_unlock(rwlock); } +}; + +class AutoWLock +{ + PolyLock *lock; +public: + AutoWLock(PolyLock *l) : lock(l) { if (lock) lock->wrlock(); } + ~AutoWLock() { if (lock) lock->unlock(); } +}; + +class AutoRLock +{ + PolyLock *lock; +public: + AutoRLock(PolyLock *l) : lock(l) { if (lock) lock->rdlock(); } + ~AutoRLock() { if (lock) lock->unlock(); } +}; + + +#endif /* SYS_VARS_SHARED_INCLUDED */ diff --git a/sql/table.cc b/sql/table.cc new file mode 100644 index 00000000..b85d23b2 --- /dev/null +++ b/sql/table.cc @@ -0,0 +1,10579 @@ +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Some general useful functions */ + +#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_priv.h" +#include "table.h" +#include "key.h" // find_ref_key +#include "sql_table.h" // build_table_filename, + // primary_key_name +#include "sql_parse.h" // free_items +#include "strfunc.h" // unhex_type2 +#include "ha_partition.h" // PART_EXT + // mysql_unpack_partition, + // fix_partition_func, partition_info +#include "sql_base.h" +#include "create_options.h" +#include "sql_trigger.h" +#include +#include "my_md5.h" +#include "my_bit.h" +#include "sql_select.h" +#include "sql_derived.h" +#include "sql_statistics.h" +#include "discover.h" +#include "mdl.h" // MDL_wait_for_graph_visitor +#include "sql_view.h" +#include "rpl_filter.h" +#include "sql_cte.h" +#include "ha_sequence.h" +#include "sql_show.h" +#include "opt_trace.h" +#include "sql_db.h" // get_default_db_collation +#ifdef WITH_WSREP +#include "wsrep_schema.h" +#endif + +/* For MySQL 5.7 virtual fields */ +#define MYSQL57_GENERATED_FIELD 128 +#define MYSQL57_GCOL_HEADER_SIZE 4 + +bool TABLE::init_expr_arena(MEM_ROOT *mem_root) +{ + /* + We need to use CONVENTIONAL_EXECUTION here to ensure that + any new items created by fix_fields() are not reverted. + */ + expr_arena= new (alloc_root(mem_root, sizeof(Query_arena))) + Query_arena(mem_root, Query_arena::STMT_CONVENTIONAL_EXECUTION); + return expr_arena == NULL; +} + +struct extra2_fields +{ + LEX_CUSTRING version; + LEX_CUSTRING options; + Lex_ident engine; + LEX_CUSTRING gis; + LEX_CUSTRING field_flags; + LEX_CUSTRING system_period; + LEX_CUSTRING application_period; + LEX_CUSTRING field_data_type_info; + LEX_CUSTRING without_overlaps; + LEX_CUSTRING index_flags; + void reset() + { bzero((void*)this, sizeof(*this)); } +}; + +static Virtual_column_info * unpack_vcol_info_from_frm(THD *, + TABLE *, String *, Virtual_column_info **, bool *); + +/* INFORMATION_SCHEMA name */ +LEX_CSTRING INFORMATION_SCHEMA_NAME= {STRING_WITH_LEN("information_schema")}; + +/* PERFORMANCE_SCHEMA name */ +LEX_CSTRING PERFORMANCE_SCHEMA_DB_NAME= {STRING_WITH_LEN("performance_schema")}; + +/* MYSQL_SCHEMA name */ +LEX_CSTRING MYSQL_SCHEMA_NAME= {STRING_WITH_LEN("mysql")}; + +/* GENERAL_LOG name */ +LEX_CSTRING GENERAL_LOG_NAME= {STRING_WITH_LEN("general_log")}; + +/* SLOW_LOG name */ +LEX_CSTRING SLOW_LOG_NAME= {STRING_WITH_LEN("slow_log")}; + +LEX_CSTRING TRANSACTION_REG_NAME= {STRING_WITH_LEN("transaction_registry")}; +LEX_CSTRING MYSQL_PROC_NAME= {STRING_WITH_LEN("proc")}; + +/* + Keyword added as a prefix when parsing the defining expression for a + virtual column read from the column definition saved in the frm file +*/ +static LEX_CSTRING parse_vcol_keyword= { STRING_WITH_LEN("PARSE_VCOL_EXPR ") }; + +static std::atomic last_table_id; + + /* Functions defined in this file */ + +static bool fix_type_pointers(const char ***typelib_value_names, + uint **typelib_value_lengths, + TYPELIB *point_to_type, uint types, + char *names, size_t names_length); + +static field_index_t find_field(Field **fields, uchar *record, uint start, + uint length); + +inline bool is_system_table_name(const char *name, size_t length); + +/************************************************************************** + Object_creation_ctx implementation. +**************************************************************************/ + +Object_creation_ctx *Object_creation_ctx::set_n_backup(THD *thd) +{ + Object_creation_ctx *backup_ctx; + DBUG_ENTER("Object_creation_ctx::set_n_backup"); + + backup_ctx= create_backup_ctx(thd); + change_env(thd); + + DBUG_RETURN(backup_ctx); +} + +void Object_creation_ctx::restore_env(THD *thd, Object_creation_ctx *backup_ctx) +{ + if (!backup_ctx) + return; + + backup_ctx->change_env(thd); + + delete backup_ctx; +} + +/************************************************************************** + Default_object_creation_ctx implementation. +**************************************************************************/ + +Default_object_creation_ctx::Default_object_creation_ctx(THD *thd) + : m_client_cs(thd->variables.character_set_client), + m_connection_cl(thd->variables.collation_connection) +{ } + +Default_object_creation_ctx::Default_object_creation_ctx( + CHARSET_INFO *client_cs, CHARSET_INFO *connection_cl) + : m_client_cs(client_cs), + m_connection_cl(connection_cl) +{ } + +Object_creation_ctx * +Default_object_creation_ctx::create_backup_ctx(THD *thd) const +{ + return new Default_object_creation_ctx(thd); +} + +void Default_object_creation_ctx::change_env(THD *thd) const +{ + thd->update_charset(m_client_cs, m_connection_cl); +} + +/************************************************************************** + View_creation_ctx implementation. +**************************************************************************/ + +View_creation_ctx *View_creation_ctx::create(THD *thd) +{ + View_creation_ctx *ctx= new (thd->mem_root) View_creation_ctx(thd); + + return ctx; +} + +/*************************************************************************/ + +View_creation_ctx * View_creation_ctx::create(THD *thd, + TABLE_LIST *view) +{ + View_creation_ctx *ctx= new (thd->mem_root) View_creation_ctx(thd); + + /* Throw a warning if there is NULL cs name. */ + + if (!view->view_client_cs_name.str || + !view->view_connection_cl_name.str) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_VIEW_NO_CREATION_CTX, + ER_THD(thd, ER_VIEW_NO_CREATION_CTX), + view->db.str, + view->table_name.str); + + ctx->m_client_cs= system_charset_info; + ctx->m_connection_cl= system_charset_info; + + return ctx; + } + + /* Resolve cs names. Throw a warning if there is unknown cs name. */ + + bool invalid_creation_ctx; + myf utf8_flag= thd->get_utf8_flag(); + invalid_creation_ctx= resolve_charset(view->view_client_cs_name.str, + system_charset_info, + &ctx->m_client_cs, MYF(utf8_flag)); + + invalid_creation_ctx= resolve_collation(view->view_connection_cl_name.str, + system_charset_info, + &ctx->m_connection_cl, MYF(utf8_flag)) || + invalid_creation_ctx; + + if (invalid_creation_ctx) + { + sql_print_warning("View '%s'.'%s': there is unknown charset/collation " + "names (client: '%s'; connection: '%s').", + view->db.str, + view->table_name.str, + (const char *) view->view_client_cs_name.str, + (const char *) view->view_connection_cl_name.str); + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_VIEW_INVALID_CREATION_CTX, + ER_THD(thd, ER_VIEW_INVALID_CREATION_CTX), + view->db.str, + view->table_name.str); + } + + return ctx; +} + +/*************************************************************************/ + +/* Get column name from column hash */ + +static uchar *get_field_name(Field **buff, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= (uint) (*buff)->field_name.length; + return (uchar*) (*buff)->field_name.str; +} + + +/* + Returns pointer to '.frm' extension of the file name. + + SYNOPSIS + fn_frm_ext() + name file name + + DESCRIPTION + Checks file name part starting with the rightmost '.' character, + and returns it if it is equal to '.frm'. + + RETURN VALUES + Pointer to the '.frm' extension or NULL if not a .frm file +*/ + +const char *fn_frm_ext(const char *name) +{ + const char *res= strrchr(name, '.'); + if (res && !strcmp(res, reg_ext)) + return res; + return 0; +} + + +TABLE_CATEGORY get_table_category(const LEX_CSTRING *db, + const LEX_CSTRING *name) +{ + DBUG_ASSERT(db != NULL); + DBUG_ASSERT(name != NULL); + +#ifdef WITH_WSREP + if (db->str && + my_strcasecmp(system_charset_info, db->str, WSREP_SCHEMA) == 0) + { + if ((my_strcasecmp(system_charset_info, name->str, WSREP_STREAMING_TABLE) == 0 || + my_strcasecmp(system_charset_info, name->str, WSREP_CLUSTER_TABLE) == 0 || + my_strcasecmp(system_charset_info, name->str, WSREP_MEMBERS_TABLE) == 0)) + { + return TABLE_CATEGORY_INFORMATION; + } + } +#endif /* WITH_WSREP */ + if (is_infoschema_db(db)) + return TABLE_CATEGORY_INFORMATION; + + if (is_perfschema_db(db)) + return TABLE_CATEGORY_PERFORMANCE; + + if (lex_string_eq(&MYSQL_SCHEMA_NAME, db)) + { + if (is_system_table_name(name->str, name->length)) + return TABLE_CATEGORY_SYSTEM; + + if (lex_string_eq(&GENERAL_LOG_NAME, name)) + return TABLE_CATEGORY_LOG; + + if (lex_string_eq(&SLOW_LOG_NAME, name)) + return TABLE_CATEGORY_LOG; + + if (lex_string_eq(&TRANSACTION_REG_NAME, name)) + return TABLE_CATEGORY_LOG; + } + + return TABLE_CATEGORY_USER; +} + + +/* + Allocate and setup a TABLE_SHARE structure + + SYNOPSIS + alloc_table_share() + db Database name + table_name Table name + key Table cache key (db \0 table_name \0...) + key_length Length of key + + RETURN + 0 Error (out of memory) + # Share +*/ + +TABLE_SHARE *alloc_table_share(const char *db, const char *table_name, + const char *key, uint key_length) +{ + MEM_ROOT mem_root; + TABLE_SHARE *share; + char *key_buff, *path_buff; + char path[FN_REFLEN]; + uint path_length; + DBUG_ENTER("alloc_table_share"); + DBUG_PRINT("enter", ("table: '%s'.'%s'", db, table_name)); + + path_length= build_table_filename(path, sizeof(path) - 1, + db, table_name, "", 0); + init_sql_alloc(key_memory_table_share, &mem_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(0)); + if (multi_alloc_root(&mem_root, + &share, sizeof(*share), + &key_buff, key_length, + &path_buff, path_length + 1, + NULL)) + { + bzero((char*) share, sizeof(*share)); + + share->set_table_cache_key(key_buff, key, key_length); + + share->path.str= path_buff; + share->path.length= path_length; + strmov(path_buff, path); + share->normalized_path.str= share->path.str; + share->normalized_path.length= path_length; + share->table_category= get_table_category(& share->db, & share->table_name); + share->open_errno= ENOENT; + /* The following will be updated in open_table_from_share */ + share->can_do_row_logging= 1; + if (share->table_category == TABLE_CATEGORY_LOG) + share->no_replicate= 1; + if (key_length > 6 && + table_alias_charset->strnncoll(key, 6, "mysql", 6) == 0) + share->not_usable_by_query_cache= 1; + + memcpy((char*) &share->mem_root, (char*) &mem_root, sizeof(mem_root)); + mysql_mutex_init(key_TABLE_SHARE_LOCK_share, + &share->LOCK_share, MY_MUTEX_INIT_SLOW); + mysql_mutex_init(key_TABLE_SHARE_LOCK_ha_data, + &share->LOCK_ha_data, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_TABLE_SHARE_LOCK_statistics, + &share->LOCK_statistics, MY_MUTEX_INIT_SLOW); + + DBUG_EXECUTE_IF("simulate_big_table_id", + if (last_table_id < UINT_MAX32) + last_table_id= UINT_MAX32 - 1;); + /* + There is one reserved number that cannot be used. Remember to + change this when 6-byte global table id's are introduced. + */ + do + { + share->table_map_id= + last_table_id.fetch_add(1, std::memory_order_relaxed); + } while (unlikely(share->table_map_id == ~0UL || + share->table_map_id == 0)); + } + DBUG_RETURN(share); +} + + +/* + Initialize share for temporary tables + + SYNOPSIS + init_tmp_table_share() + thd thread handle + share Share to fill + key Table_cache_key, as generated from tdc_create_key. + must start with db name. + key_length Length of key + table_name Table name + path Path to file (possible in lower case) without .frm + + NOTES + This is different from alloc_table_share() because temporary tables + don't have to be shared between threads or put into the table def + cache, so we can do some things notable simpler and faster + + If table is not put in thd->temporary_tables (happens only when + one uses OPEN TEMPORARY) then one can specify 'db' as key and + use key_length= 0 as neither table_cache_key or key_length will be used). +*/ + +void init_tmp_table_share(THD *thd, TABLE_SHARE *share, const char *key, + uint key_length, const char *table_name, + const char *path) +{ + DBUG_ENTER("init_tmp_table_share"); + DBUG_PRINT("enter", ("table: '%s'.'%s'", key, table_name)); + + bzero((char*) share, sizeof(*share)); + /* + This can't be MY_THREAD_SPECIFIC for slaves as they are freed + during cleanup() from Relay_log_info::close_temporary_tables() + */ + init_sql_alloc(key_memory_table_share, &share->mem_root, + TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(thd->slave_thread ? 0 : MY_THREAD_SPECIFIC)); + share->table_category= TABLE_CATEGORY_TEMPORARY; + share->tmp_table= INTERNAL_TMP_TABLE; + share->db.str= (char*) key; + share->db.length= strlen(key); + share->table_cache_key.str= (char*) key; + share->table_cache_key.length= key_length; + share->table_name.str= (char*) table_name; + share->table_name.length= strlen(table_name); + share->path.str= (char*) path; + share->normalized_path.str= (char*) path; + share->path.length= share->normalized_path.length= strlen(path); + share->frm_version= FRM_VER_CURRENT; + share->not_usable_by_query_cache= 1; + share->can_do_row_logging= 0; // No row logging + + /* + table_map_id is also used for MERGE tables to suppress repeated + compatibility checks. + */ + share->table_map_id= (ulong) thd->query_id; + DBUG_VOID_RETURN; +} + + +/** + Release resources (plugins) used by the share and free its memory. + TABLE_SHARE is self-contained -- it's stored in its own MEM_ROOT. + Free this MEM_ROOT. +*/ + +void TABLE_SHARE::destroy() +{ + uint idx; + KEY *info_it; + DBUG_ENTER("TABLE_SHARE::destroy"); + DBUG_PRINT("info", ("db: %s table: %s", db.str, table_name.str)); + + if (ha_share) + { + delete ha_share; + ha_share= NULL; // Safety + } + + if (stats_cb) + { + stats_cb->usage_count--; + delete stats_cb; + } + delete sequence; + + /* The mutexes are initialized only for shares that are part of the TDC */ + if (tmp_table == NO_TMP_TABLE) + { + mysql_mutex_destroy(&LOCK_share); + mysql_mutex_destroy(&LOCK_ha_data); + mysql_mutex_destroy(&LOCK_statistics); + } + my_hash_free(&name_hash); + + plugin_unlock(NULL, db_plugin); + db_plugin= NULL; + + /* Release fulltext parsers */ + info_it= key_info; + for (idx= keys; idx; idx--, info_it++) + { + if (info_it->flags & HA_USES_PARSER) + { + plugin_unlock(NULL, info_it->parser); + info_it->flags= 0; + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + plugin_unlock(NULL, default_part_plugin); +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + + PSI_CALL_release_table_share(m_psi); + + /* + Make a copy since the share is allocated in its own root, + and free_root() updates its argument after freeing the memory. + */ + MEM_ROOT own_root= mem_root; + free_root(&own_root, MYF(0)); + DBUG_VOID_RETURN; +} + +/* + Free table share and memory used by it + + SYNOPSIS + free_table_share() + share Table share +*/ + +void free_table_share(TABLE_SHARE *share) +{ + DBUG_ENTER("free_table_share"); + DBUG_PRINT("enter", ("table: %s.%s", share->db.str, share->table_name.str)); + share->destroy(); + DBUG_VOID_RETURN; +} + + +/** + Return TRUE if a table name matches one of the system table names. + Currently these are: + + help_category, help_keyword, help_relation, help_topic, + proc, event + time_zone, time_zone_leap_second, time_zone_name, time_zone_transition, + time_zone_transition_type + + This function trades accuracy for speed, so may return false + positives. Presumably mysql.* database is for internal purposes only + and should not contain user tables. +*/ + +inline bool is_system_table_name(const char *name, size_t length) +{ + CHARSET_INFO *ci= system_charset_info; + + return ( + /* mysql.proc table */ + (length == 4 && + my_tolower(ci, name[0]) == 'p' && + my_tolower(ci, name[1]) == 'r' && + my_tolower(ci, name[2]) == 'o' && + my_tolower(ci, name[3]) == 'c') || + + (length > 4 && + ( + /* one of mysql.help* tables */ + (my_tolower(ci, name[0]) == 'h' && + my_tolower(ci, name[1]) == 'e' && + my_tolower(ci, name[2]) == 'l' && + my_tolower(ci, name[3]) == 'p') || + + /* one of mysql.time_zone* tables */ + (my_tolower(ci, name[0]) == 't' && + my_tolower(ci, name[1]) == 'i' && + my_tolower(ci, name[2]) == 'm' && + my_tolower(ci, name[3]) == 'e') || + + /* one of mysql.*_stat tables, but not mysql.innodb* tables*/ + ((my_tolower(ci, name[length-5]) == 's' && + my_tolower(ci, name[length-4]) == 't' && + my_tolower(ci, name[length-3]) == 'a' && + my_tolower(ci, name[length-2]) == 't' && + my_tolower(ci, name[length-1]) == 's') && + !(my_tolower(ci, name[0]) == 'i' && + my_tolower(ci, name[1]) == 'n' && + my_tolower(ci, name[2]) == 'n' && + my_tolower(ci, name[3]) == 'o')) || + + /* mysql.event table */ + (my_tolower(ci, name[0]) == 'e' && + my_tolower(ci, name[1]) == 'v' && + my_tolower(ci, name[2]) == 'e' && + my_tolower(ci, name[3]) == 'n' && + my_tolower(ci, name[4]) == 't') + ) + ) + ); +} + + +/* + Read table definition from a binary / text based .frm file + + SYNOPSIS + open_table_def() + thd Thread handler + share Fill this with table definition + flags Bit mask of the following flags: OPEN_VIEW + + NOTES + This function is called when the table definition is not cached in + table definition cache + The data is returned in 'share', which is allocated by + alloc_table_share().. The code assumes that share is initialized. +*/ + +enum open_frm_error open_table_def(THD *thd, TABLE_SHARE *share, uint flags) +{ + bool error_given= false; + File file; + uchar *buf; + uchar head[FRM_HEADER_SIZE]; + char path[FN_REFLEN]; + size_t frmlen, read_length; + uint length; + DBUG_ENTER("open_table_def"); + DBUG_PRINT("enter", ("table: '%s'.'%s' path: '%s'", share->db.str, + share->table_name.str, share->normalized_path.str)); + + share->error= OPEN_FRM_OPEN_ERROR; + + length=(uint) (strxmov(path, share->normalized_path.str, reg_ext, NullS) - + path); + if (flags & GTS_FORCE_DISCOVERY) + { + const char *path2= share->normalized_path.str; + DBUG_ASSERT(flags & GTS_TABLE); + DBUG_ASSERT(flags & GTS_USE_DISCOVERY); + /* Delete .frm and .par files */ + mysql_file_delete_with_symlink(key_file_frm, path2, reg_ext, MYF(0)); + mysql_file_delete_with_symlink(key_file_partition_ddl_log, path2, PAR_EXT, + MYF(0)); + file= -1; + } + else + file= mysql_file_open(key_file_frm, path, O_RDONLY | O_SHARE, MYF(0)); + + if (file < 0) + { + if ((flags & GTS_TABLE) && (flags & GTS_USE_DISCOVERY)) + { + ha_discover_table(thd, share); + error_given= true; + } + goto err_not_open; + } + + if (mysql_file_read(file, head, sizeof(head), MYF(MY_NABP))) + { + share->error = my_errno == HA_ERR_FILE_TOO_SHORT + ? OPEN_FRM_CORRUPTED : OPEN_FRM_READ_ERROR; + goto err; + } + + if (memcmp(head, STRING_WITH_LEN("TYPE=VIEW\n")) == 0) + { + share->is_view= 1; + if (flags & GTS_VIEW) + { + LEX_CSTRING pathstr= { path, length }; + /* + Create view file parser and hold it in TABLE_SHARE member + view_def. + */ + share->view_def= sql_parse_prepare(&pathstr, &share->mem_root, true); + if (!share->view_def) + share->error= OPEN_FRM_ERROR_ALREADY_ISSUED; + else + { + share->error= OPEN_FRM_OK; + if (mariadb_view_version_get(share)) + share->error= OPEN_FRM_ERROR_ALREADY_ISSUED; + } + } + else + share->error= OPEN_FRM_NOT_A_TABLE; + goto err; + } + if (!is_binary_frm_header(head)) + { + /* No handling of text based files yet */ + share->error = OPEN_FRM_CORRUPTED; + goto err; + } + if (!(flags & GTS_TABLE)) + { + share->error = OPEN_FRM_NOT_A_VIEW; + goto err; + } + + frmlen= uint4korr(head+10); + set_if_smaller(frmlen, FRM_MAX_SIZE); // safety + + if (!(buf= (uchar*)my_malloc(PSI_INSTRUMENT_ME, frmlen, + MYF(MY_THREAD_SPECIFIC|MY_WME)))) + goto err; + + memcpy(buf, head, sizeof(head)); + + read_length= mysql_file_read(file, buf + sizeof(head), + frmlen - sizeof(head), MYF(MY_WME)); + if (read_length == 0 || read_length == (size_t)-1) + { + share->error = OPEN_FRM_READ_ERROR; + my_free(buf); + goto err; + } + mysql_file_close(file, MYF(MY_WME)); + + frmlen= read_length + sizeof(head); + + share->init_from_binary_frm_image(thd, false, buf, frmlen); + /* + Don't give any additional errors. If there would be a problem, + init_from_binary_frm_image would call my_error() itself. + */ + error_given= true; + my_free(buf); + + goto err_not_open; + +err: + mysql_file_close(file, MYF(MY_WME)); + +err_not_open: + /* Mark that table was created earlier and thus should have been logged */ + share->table_creation_was_logged= 1; + + if (unlikely(share->error && !error_given)) + { + share->open_errno= my_errno; + open_table_error(share, share->error, share->open_errno); + } + + DBUG_RETURN(share->error); +} + +static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end, + uint keys, KEY *keyinfo, + uint new_frm_ver, uint *ext_key_parts, + TABLE_SHARE *share, uint len, + KEY *first_keyinfo, + LEX_STRING *keynames) +{ + uint i, j, n_length; + KEY_PART_INFO *key_part= NULL; + ulong *rec_per_key= NULL; + KEY_PART_INFO *first_key_part= NULL; + uint first_key_parts= 0; + + if (!keys) + { + if (!(keyinfo = (KEY*) alloc_root(&share->mem_root, len))) + return 1; + bzero((char*) keyinfo, len); + key_part= reinterpret_cast (keyinfo); + } + + /* + If share->use_ext_keys is set to TRUE we assume that any key + can be extended by the components of the primary key whose + definition is read first from the frm file. + For each key only those fields of the assumed primary key are + added that are not included in the proper key definition. + If after all it turns out that there is no primary key the + added components are removed from each key. + + When in the future we support others schemes of extending of + secondary keys with components of the primary key we'll have + to change the type of this flag for an enumeration type. + */ + + for (i=0 ; i < keys ; i++, keyinfo++) + { + if (new_frm_ver >= 3) + { + if (strpos + 8 >= frm_image_end) + return 1; + keyinfo->flags= (uint) uint2korr(strpos) ^ HA_NOSAME; + keyinfo->key_length= (uint) uint2korr(strpos+2); + keyinfo->user_defined_key_parts= (uint) strpos[4]; + keyinfo->algorithm= (enum ha_key_alg) strpos[5]; + keyinfo->block_size= uint2korr(strpos+6); + strpos+=8; + } + else + { + if (strpos + 4 >= frm_image_end) + return 1; + keyinfo->flags= ((uint) strpos[0]) ^ HA_NOSAME; + keyinfo->key_length= (uint) uint2korr(strpos+1); + keyinfo->user_defined_key_parts= (uint) strpos[3]; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + strpos+=4; + } + + if (i == 0) + { + (*ext_key_parts)+= (share->use_ext_keys ? first_keyinfo->user_defined_key_parts*(keys-1) : 0); + n_length=keys * sizeof(KEY) + *ext_key_parts * sizeof(KEY_PART_INFO); + if (!(keyinfo= (KEY*) alloc_root(&share->mem_root, + n_length + len))) + return 1; + bzero((char*) keyinfo,n_length); + share->key_info= keyinfo; + key_part= reinterpret_cast (keyinfo + keys); + + if (!(rec_per_key= (ulong*) alloc_root(&share->mem_root, + sizeof(ulong) * *ext_key_parts))) + return 1; + first_key_part= key_part; + first_key_parts= first_keyinfo->user_defined_key_parts; + keyinfo->flags= first_keyinfo->flags; + keyinfo->key_length= first_keyinfo->key_length; + keyinfo->user_defined_key_parts= first_keyinfo->user_defined_key_parts; + keyinfo->algorithm= first_keyinfo->algorithm; + if (new_frm_ver >= 3) + keyinfo->block_size= first_keyinfo->block_size; + } + + keyinfo->key_part= key_part; + keyinfo->rec_per_key= rec_per_key; + for (j=keyinfo->user_defined_key_parts ; j-- ; key_part++) + { + if (strpos + (new_frm_ver >= 1 ? 9 : 7) >= frm_image_end) + return 1; + if (!(keyinfo->algorithm == HA_KEY_ALG_LONG_HASH)) + *rec_per_key++=0; + key_part->fieldnr= (uint16) (uint2korr(strpos) & FIELD_NR_MASK); + key_part->offset= (uint) uint2korr(strpos+2)-1; + key_part->key_type= (uint) uint2korr(strpos+5); + // key_part->field= (Field*) 0; // Will be fixed later + if (new_frm_ver >= 1) + { + key_part->key_part_flag= *(strpos+4); + key_part->length= (uint) uint2korr(strpos+7); + strpos+=9; + } + else + { + key_part->length= *(strpos+4); + key_part->key_part_flag=0; + if (key_part->length > 128) + { + key_part->length&=127; /* purecov: inspected */ + key_part->key_part_flag=HA_REVERSE_SORT; /* purecov: inspected */ + } + strpos+=7; + } + key_part->store_length=key_part->length; + } + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) + { + keyinfo->key_length= HA_HASH_KEY_LENGTH_WITHOUT_NULL; + key_part++; // reserved for the hash value + *rec_per_key++=0; + } + + /* + Add primary key to end of extended keys for non unique keys for + storage engines that supports it. + */ + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->ext_key_part_map= 0; + if (share->use_ext_keys && i && !(keyinfo->flags & HA_NOSAME)) + { + for (j= 0; + j < first_key_parts && keyinfo->ext_key_parts < MAX_REF_PARTS; + j++) + { + uint key_parts= keyinfo->user_defined_key_parts; + KEY_PART_INFO* curr_key_part= keyinfo->key_part; + KEY_PART_INFO* curr_key_part_end= curr_key_part+key_parts; + for ( ; curr_key_part < curr_key_part_end; curr_key_part++) + { + if (curr_key_part->fieldnr == first_key_part[j].fieldnr) + break; + } + if (curr_key_part == curr_key_part_end) + { + *key_part++= first_key_part[j]; + *rec_per_key++= 0; + keyinfo->ext_key_parts++; + keyinfo->ext_key_part_map|= 1 << j; + } + } + if (j == first_key_parts) + keyinfo->ext_key_flags= keyinfo->flags | HA_EXT_NOSAME; + } + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) + share->ext_key_parts++; + share->ext_key_parts+= keyinfo->ext_key_parts; + } + keynames->str= (char*) key_part; + keynames->length= strnmov(keynames->str, (char *) strpos, + frm_image_end - strpos) - keynames->str; + strpos+= keynames->length; + if (*strpos++) // key names are \0-terminated + return 1; + keynames->length++; // Include '\0', to make fix_type_pointers() happy. + + //reading index comments + for (keyinfo= share->key_info, i=0; i < keys; i++, keyinfo++) + { + if (keyinfo->flags & HA_USES_COMMENT) + { + if (strpos + 2 >= frm_image_end) + return 1; + keyinfo->comment.length= uint2korr(strpos); + strpos+= 2; + + if (strpos + keyinfo->comment.length >= frm_image_end) + return 1; + keyinfo->comment.str= strmake_root(&share->mem_root, (char*) strpos, + keyinfo->comment.length); + strpos+= keyinfo->comment.length; + } + DBUG_ASSERT(MY_TEST(keyinfo->flags & HA_USES_COMMENT) == + (keyinfo->comment.length > 0)); + } + + share->keys= keys; // do it *after* all key_info's are initialized + + return 0; +} + + +/** ensures that the enum value (read from frm) is within limits + + if not - issues a warning and resets the value to 0 + (that is, 0 is assumed to be a default value) +*/ + +static uint enum_value_with_check(THD *thd, TABLE_SHARE *share, + const char *name, uint value, uint limit) +{ + if (value < limit) + return value; + + sql_print_warning("%s.frm: invalid value %d for the field %s", + share->normalized_path.str, value, name); + return 0; +} + + +void Column_definition_attributes::frm_pack_basic(uchar *buff) const +{ + int2store(buff + 3, length); + int2store(buff + 8, pack_flag); + buff[10]= (uchar) unireg_check; +} + + +void Column_definition_attributes::frm_unpack_basic(const uchar *buff) +{ + length= uint2korr(buff + 3); + pack_flag= uint2korr(buff + 8); + unireg_check= (Field::utype) MTYP_TYPENR((uint) buff[10]); +} + + +void Column_definition_attributes::frm_pack_numeric_with_dec(uchar *buff) const +{ + DBUG_ASSERT(f_decimals(pack_flag) == 0); + uint tmp_pack_flag= pack_flag | (decimals << FIELDFLAG_DEC_SHIFT); + int2store(buff + 3, length); + int2store(buff + 8, tmp_pack_flag); + buff[10]= (uchar) unireg_check; +} + + +bool +Column_definition_attributes::frm_unpack_numeric_with_dec(TABLE_SHARE *share, + const uchar *buff) +{ + frm_unpack_basic(buff); + decimals= f_decimals(pack_flag); + pack_flag&= ~FIELDFLAG_DEC_MASK; + return frm_unpack_charset(share, buff); +} + + +bool +Column_definition_attributes::frm_unpack_temporal_with_dec(TABLE_SHARE *share, + uint intlen, + const uchar *buff) +{ + frm_unpack_basic(buff); + decimals= temporal_dec(intlen); + return frm_unpack_charset(share, buff); +} + + +void Column_definition_attributes::frm_pack_charset(uchar *buff) const +{ + buff[11]= (uchar) (charset->number >> 8); + buff[14]= (uchar) charset->number; +} + + +bool Column_definition_attributes::frm_unpack_charset(TABLE_SHARE *share, + const uchar *buff) +{ + uint cs_org= buff[14] + (((uint) buff[11]) << 8); + uint cs_new= Charset::upgrade_collation_id(share->mysql_version, cs_org); + if (cs_org != cs_new) + share->incompatible_version|= HA_CREATE_USED_CHARSET; + if (cs_new && !(charset= get_charset(cs_new, MYF(0)))) + { + const char *csname= get_charset_name((uint) cs_new); + char tmp[10]; + if (!csname || csname[0] =='?') + { + my_snprintf(tmp, sizeof(tmp), "#%u", cs_new); + csname= tmp; + } + my_printf_error(ER_UNKNOWN_COLLATION, + "Unknown collation '%s' in table '%-.64s' definition", + MYF(0), csname, share->table_name.str); + return true; + } + return false; +} + + +/* + In MySQL 5.7 the null bits for not stored virtual fields are last. + Calculate the position for these bits +*/ + +static void mysql57_calculate_null_position(TABLE_SHARE *share, + uchar **null_pos, + uint *null_bit_pos, + const uchar *strpos, + const uchar *vcol_screen_pos) +{ + uint field_pack_length= 17; + + for (uint i=0 ; i < share->fields; i++, strpos+= field_pack_length) + { + uint field_length, pack_flag; + enum_field_types field_type; + + if ((strpos[10] & MYSQL57_GENERATED_FIELD)) + { + /* Skip virtual (not stored) generated field */ + bool stored_in_db= vcol_screen_pos[3]; + vcol_screen_pos+= (uint2korr(vcol_screen_pos + 1) + + MYSQL57_GCOL_HEADER_SIZE); + if (! stored_in_db) + continue; + } + field_length= uint2korr(strpos+3); + pack_flag= uint2korr(strpos+8); + field_type= (enum_field_types) (uint) strpos[13]; + if (field_type == MYSQL_TYPE_BIT && !f_bit_as_char(pack_flag)) + { + if (((*null_bit_pos)+= field_length & 7) > 7) + { + (*null_pos)++; + (*null_bit_pos)-= 8; + } + } + if (f_maybe_null(pack_flag)) + { + if (!((*null_bit_pos)= ((*null_bit_pos) + 1) & 7)) + (*null_pos)++; + } + } +} + + +Item_func_hash *TABLE_SHARE::make_long_hash_func(THD *thd, + MEM_ROOT *mem_root, + List *field_list) + const +{ + if (old_long_hash_function()) + return new (mem_root) Item_func_hash_mariadb_100403(thd, *field_list); + return new (mem_root) Item_func_hash(thd, *field_list); +} + + +/** Parse TABLE_SHARE::vcol_defs + + unpack_vcol_info_from_frm + 5.7 + byte 1 = 1 + byte 2,3 = expr length + byte 4 = stored_in_db + expression + 10.1- + byte 1 = 1 | 2 + byte 2 = sql_type ; but TABLE::init_from_binary_frm_image() + byte 3 = stored_in_db ; has put expr_length here + [byte 4] = optional interval_id for sql_type (if byte 1 == 2) + expression + 10.2+ + byte 1 = type + byte 2,3 = field_number + byte 4,5 = length of expression + byte 6 = length of name + name + expression +*/ +bool parse_vcol_defs(THD *thd, MEM_ROOT *mem_root, TABLE *table, + bool *error_reported, vcol_init_mode mode) +{ + struct check_vcol_forward_refs + { + static bool check(Field *field, Virtual_column_info *vcol) + { + return vcol && + vcol->expr->walk(&Item::check_field_expression_processor, 0, field); + } + static bool check(Field *field) + { + if (check(field, field->vcol_info) || + check(field, field->default_value)) + return true; + return false; + } + }; + CHARSET_INFO *save_character_set_client= thd->variables.character_set_client; + CHARSET_INFO *save_collation= thd->variables.collation_connection; + Query_arena *backup_stmt_arena_ptr= thd->stmt_arena; + const uchar *pos= table->s->vcol_defs.str; + const uchar *end= pos + table->s->vcol_defs.length; + Field **field_ptr= table->field - 1; + Field **vfield_ptr= table->vfield; + Field **dfield_ptr= table->default_field; + Virtual_column_info **check_constraint_ptr= table->check_constraints; + Sql_mode_save_for_frm_handling sql_mode_save(thd); + Query_arena backup_arena; + Virtual_column_info *vcol= 0; + StringBuffer expr_str; + bool res= 1; + DBUG_ENTER("parse_vcol_defs"); + + if (check_constraint_ptr) + memcpy(table->check_constraints + table->s->field_check_constraints, + table->s->check_constraints, + table->s->table_check_constraints * sizeof(Virtual_column_info*)); + + DBUG_ASSERT(table->expr_arena == NULL); + + if (table->init_expr_arena(mem_root)) + DBUG_RETURN(1); + + thd->set_n_backup_active_arena(table->expr_arena, &backup_arena); + thd->stmt_arena= table->expr_arena; + thd->update_charset(&my_charset_utf8mb4_general_ci, table->s->table_charset); + expr_str.append(&parse_vcol_keyword); + + while (pos < end) + { + uint type, expr_length; + if (table->s->frm_version >= FRM_VER_EXPRESSSIONS) + { + uint field_nr, name_length; + /* see pack_expression() for how data is stored */ + type= pos[0]; + field_nr= uint2korr(pos+1); + expr_length= uint2korr(pos+3); + name_length= pos[5]; + pos+= FRM_VCOL_NEW_HEADER_SIZE + name_length; + field_ptr= table->field + field_nr; + } + else + { + /* + see below in ::init_from_binary_frm_image for how data is stored + in versions below 10.2 (that includes 5.7 too) + */ + while (*++field_ptr && !(*field_ptr)->vcol_info) /* no-op */; + if (!*field_ptr) + { + open_table_error(table->s, OPEN_FRM_CORRUPTED, 1); + goto end; + } + type= (*field_ptr)->vcol_info->stored_in_db + ? VCOL_GENERATED_STORED : VCOL_GENERATED_VIRTUAL; + expr_length= uint2korr(pos+1); + if (table->s->mysql_version > 50700 && table->s->mysql_version < 100000) + { + table->s->keep_original_mysql_version= 1; + pos+= 4; // MySQL from 5.7 + } + else + pos+= pos[0] == 2 ? 4 : 3; // MariaDB from 5.2 to 10.1 + } + + expr_str.length(parse_vcol_keyword.length); + expr_str.append((char*)pos, expr_length); + thd->where= vcol_type_name(static_cast(type)); + + switch (type) { + case VCOL_GENERATED_VIRTUAL: + case VCOL_GENERATED_STORED: + vcol= unpack_vcol_info_from_frm(thd, table, &expr_str, + &((*field_ptr)->vcol_info), error_reported); + *(vfield_ptr++)= *field_ptr; + DBUG_ASSERT(table->map == 0); + /* + We need Item_field::const_item() to return false, so + datetime_precision() and time_precision() do not try to calculate + field values, e.g. val_str(). + Set table->map to non-zero temporarily. + */ + table->map= 1; + if (vcol && field_ptr[0]->check_vcol_sql_mode_dependency(thd, mode)) + { + DBUG_ASSERT(thd->is_error()); + *error_reported= true; + goto end; + } + table->map= 0; + break; + case VCOL_DEFAULT: + vcol= unpack_vcol_info_from_frm(thd, table, &expr_str, + &((*field_ptr)->default_value), + error_reported); + *(dfield_ptr++)= *field_ptr; + if (vcol && (vcol->flags & (VCOL_NON_DETERMINISTIC | VCOL_SESSION_FUNC))) + table->s->non_determinstic_insert= true; + break; + case VCOL_CHECK_FIELD: + vcol= unpack_vcol_info_from_frm(thd, table, &expr_str, + &((*field_ptr)->check_constraint), + error_reported); + *check_constraint_ptr++= (*field_ptr)->check_constraint; + break; + case VCOL_CHECK_TABLE: + vcol= unpack_vcol_info_from_frm(thd, table, &expr_str, + check_constraint_ptr, error_reported); + check_constraint_ptr++; + break; + } + if (!vcol) + goto end; + pos+= expr_length; + } + + /* Now, initialize CURRENT_TIMESTAMP and UNIQUE_INDEX_HASH_FIELD fields */ + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *field= *field_ptr; + if (field->flags & LONG_UNIQUE_HASH_FIELD) + { + List *field_list= new (mem_root) List(); + Item *list_item; + KEY *key= 0; + uint key_index, parts= 0; + for (key_index= 0; key_index < table->s->keys; key_index++) + { + key=table->key_info + key_index; + parts= key->user_defined_key_parts; + if (key->key_part[parts].fieldnr == field->field_index + 1) + break; + } + if (!key || key->algorithm != HA_KEY_ALG_LONG_HASH) + goto end; + KEY_PART_INFO *keypart; + for (uint i=0; i < parts; i++) + { + keypart= key->key_part + i; + if (keypart->key_part_flag & HA_PART_KEY_SEG) + { + int length= keypart->length/keypart->field->charset()->mbmaxlen; + list_item= new (mem_root) Item_func_left(thd, + new (mem_root) Item_field(thd, keypart->field), + new (mem_root) Item_int(thd, length)); + list_item->fix_fields(thd, NULL); + keypart->field->vcol_info= + table->field[keypart->field->field_index]->vcol_info; + } + else + list_item= new (mem_root) Item_field(thd, keypart->field); + field_list->push_back(list_item, mem_root); + } + + Item_func_hash *hash_item= table->s->make_long_hash_func(thd, mem_root, + field_list); + + Virtual_column_info *v= new (mem_root) Virtual_column_info(); + field->vcol_info= v; + field->vcol_info->expr= hash_item; + field->vcol_info->set_vcol_type(VCOL_USING_HASH); + if (v->fix_and_check_expr(thd, table)) + goto end; + key->user_defined_key_parts= key->ext_key_parts= key->usable_key_parts= 1; + key->key_part+= parts; + + if (key->flags & HA_NULL_PART_KEY) + key->key_length= HA_HASH_KEY_LENGTH_WITH_NULL; + else + key->key_length= HA_HASH_KEY_LENGTH_WITHOUT_NULL; + + *(vfield_ptr++)= *field_ptr; + } + if (field->has_default_now_unireg_check()) + { + expr_str.length(parse_vcol_keyword.length); + expr_str.append(STRING_WITH_LEN("current_timestamp(")); + expr_str.append_ulonglong(field->decimals()); + expr_str.append(')'); + vcol= unpack_vcol_info_from_frm(thd, table, &expr_str, + &((*field_ptr)->default_value), + error_reported); + *(dfield_ptr++)= *field_ptr; + if (!field->default_value->expr) + goto end; + } + else if (field->has_update_default_function() && !field->default_value) + *(dfield_ptr++)= *field_ptr; + } + + if (vfield_ptr) + *vfield_ptr= 0; + + if (dfield_ptr) + *dfield_ptr= 0; + + if (check_constraint_ptr) + *check_constraint_ptr= 0; + + /* Check that expressions aren't referring to not yet initialized fields */ + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + if (check_vcol_forward_refs::check(*field_ptr)) + { + *error_reported= true; + goto end; + } + if ((*field_ptr)->check_constraint) + (*field_ptr)->check_constraint->expr-> + walk(&Item::update_func_default_processor, 0, *field_ptr); + } + + table->find_constraint_correlated_indexes(); + + res=0; +end: + thd->restore_active_arena(table->expr_arena, &backup_arena); + thd->stmt_arena= backup_stmt_arena_ptr; + if (save_character_set_client) + thd->update_charset(save_character_set_client, save_collation); + DBUG_RETURN(res); +} + + +static const Type_handler *old_frm_type_handler(uint pack_flag, + uint interval_nr) +{ + enum_field_types field_type= (enum_field_types) f_packtype(pack_flag); + DBUG_ASSERT(field_type < 16); + + if (!f_is_alpha(pack_flag)) + return Type_handler::get_handler_by_real_type(field_type); + + if (!f_is_packed(pack_flag)) + { + if (field_type == MYSQL_TYPE_DECIMAL) // 3.23 or 4.0 string + return &type_handler_string; + if (field_type == MYSQL_TYPE_VARCHAR) // Since mysql-5.0 + return &type_handler_varchar; + return NULL; // Error (bad frm?) + } + + if (f_is_blob(pack_flag)) + return &type_handler_blob; // QQ: exact type?? + + if (interval_nr) + { + if (f_is_enum(pack_flag)) + return &type_handler_enum; + return &type_handler_set; + } + return Type_handler::get_handler_by_real_type(field_type); +} + +/* Set overlapped bitmaps for each index */ + +void TABLE_SHARE::set_overlapped_keys() +{ + KEY *key1= key_info; + for (uint i= 0; i < keys; i++, key1++) + { + key1->overlapped.clear_all(); + key1->overlapped.set_bit(i); + } + key1= key_info; + for (uint i= 0; i < keys; i++, key1++) + { + KEY *key2= key1 + 1; + for (uint j= i+1; j < keys; j++, key2++) + { + KEY_PART_INFO *key_part1= key1->key_part; + uint n1= key1->user_defined_key_parts; + uint n2= key2->user_defined_key_parts; + for (uint k= 0; k < n1; k++, key_part1++) + { + KEY_PART_INFO *key_part2= key2->key_part; + for (uint l= 0; l < n2; l++, key_part2++) + { + if (key_part1->fieldnr == key_part2->fieldnr) + { + key1->overlapped.set_bit(j); + key2->overlapped.set_bit(i); + goto end_checking_overlap; + } + } + } + end_checking_overlap: + ; + } + } +} + + +/* + @brief + Set of indexes that are marked as IGNORE. +*/ + +void TABLE_SHARE::set_ignored_indexes() +{ + KEY *keyinfo= key_info; + for (uint i= 0; i < keys; i++, keyinfo++) + { + if (keyinfo->is_ignored) + ignored_indexes.set_bit(i); + } +} + + +/* + @brief + Set of indexes that the optimizer may use when creating an execution plan. +*/ + +key_map TABLE_SHARE::usable_indexes(THD *thd) +{ + key_map usable_indexes(keys_in_use); + usable_indexes.subtract(ignored_indexes); + return usable_indexes; +} + + +bool Item_field::check_index_dependence(void *arg) +{ + TABLE *table= (TABLE *)arg; + + KEY *key= table->key_info; + for (uint j= 0; j < table->s->keys; j++, key++) + { + if (table->constraint_dependent_keys.is_set(j)) + continue; + + KEY_PART_INFO *key_part= key->key_part; + uint n= key->user_defined_key_parts; + + for (uint k= 0; k < n; k++, key_part++) + { + if (this->field == key_part->field) + { + table->constraint_dependent_keys.set_bit(j); + break; + } + } + } + return false; +} + + +/** + @brief + Find keys that occur in the same constraint on this table + + @details + Constraints on this table are checked only. + + The method goes through constraints list trying to find at + least two keys which parts participate in some constraint. + These keys are called constraint correlated. + + Each key has its own key map with the information about with + which keys it is constraint correlated. Bit in this map is set + only if keys are constraint correlated. + This method fills each keys constraint correlated key map. +*/ + +void TABLE::find_constraint_correlated_indexes() +{ + if (s->keys == 0) + return; + + KEY *key= key_info; + for (uint i= 0; i < s->keys; i++, key++) + { + key->constraint_correlated.clear_all(); + key->constraint_correlated.set_bit(i); + } + + if (!check_constraints) + return; + + for (Virtual_column_info **chk= check_constraints ; *chk ; chk++) + { + constraint_dependent_keys.clear_all(); + (*chk)->expr->walk(&Item::check_index_dependence, 0, this); + + if (constraint_dependent_keys.bits_set() <= 1) + continue; + + uint key_no= 0; + key_map::Iterator ki(constraint_dependent_keys); + while ((key_no= ki++) != key_map::Iterator::BITMAP_END) + key_info[key_no].constraint_correlated.merge(constraint_dependent_keys); + } +} + + +bool TABLE_SHARE::init_period_from_extra2(period_info_t *period, + const uchar *data, const uchar *end) +{ + if (data + 2*frm_fieldno_size > end) + return 1; + period->start_fieldno= read_frm_fieldno(data); + period->end_fieldno= read_frm_fieldno(data + frm_fieldno_size); + return period->start_fieldno >= fields || period->end_fieldno >= fields; +} + + +static +bool read_extra2_section_once(const uchar *extra2, size_t len, LEX_CUSTRING *section) +{ + if (section->str) + return true; + *section= {extra2, len}; + return false; +} + +static +bool read_extra2(const uchar *frm_image, size_t len, extra2_fields *fields) +{ + const uchar *extra2= frm_image + 64; + + DBUG_ENTER("read_extra2"); + + fields->reset(); + + if (*extra2 != '/') // old frm had '/' there + { + const uchar *e2end= extra2 + len; + while (extra2 + 3 <= e2end) + { + extra2_frm_value_type type= (extra2_frm_value_type)*extra2++; + size_t length= extra2_read_len(&extra2, e2end); + if (!length) + DBUG_RETURN(true); + + bool fail= false; + switch (type) { + case EXTRA2_TABLEDEF_VERSION: + if (fields->version.str) // see init_from_sql_statement_string() + { + if (length != fields->version.length) + DBUG_RETURN(true); + } + else + { + fields->version.str= extra2; + fields->version.length= length; + } + break; + case EXTRA2_ENGINE_TABLEOPTS: + fail= read_extra2_section_once(extra2, length, &fields->options); + break; + case EXTRA2_DEFAULT_PART_ENGINE: + fields->engine.set((const char*)extra2, length); + break; + case EXTRA2_GIS: + fail= read_extra2_section_once(extra2, length, &fields->gis); + break; + case EXTRA2_PERIOD_FOR_SYSTEM_TIME: + fail= read_extra2_section_once(extra2, length, &fields->system_period) + || length != 2 * frm_fieldno_size; + break; + case EXTRA2_FIELD_FLAGS: + fail= read_extra2_section_once(extra2, length, &fields->field_flags); + break; + case EXTRA2_APPLICATION_TIME_PERIOD: + fail= read_extra2_section_once(extra2, length, &fields->application_period); + break; + case EXTRA2_PERIOD_WITHOUT_OVERLAPS: + fail= read_extra2_section_once(extra2, length, &fields->without_overlaps); + break; + case EXTRA2_FIELD_DATA_TYPE_INFO: + fail= read_extra2_section_once(extra2, length, &fields->field_data_type_info); + break; + case EXTRA2_INDEX_FLAGS: + fail= read_extra2_section_once(extra2, length, &fields->index_flags); + break; + default: + /* abort frm parsing if it's an unknown but important extra2 value */ + if (type >= EXTRA2_ENGINE_IMPORTANT) + DBUG_RETURN(true); + } + if (fail) + DBUG_RETURN(true); + + extra2+= length; + } + if (extra2 != e2end) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +class Field_data_type_info_array +{ +public: + class Elem + { + LEX_CSTRING m_type_info; + public: + void set(const LEX_CSTRING &type_info) + { + m_type_info= type_info; + } + const LEX_CSTRING &type_info() const + { + return m_type_info; + } + }; +private: + Elem *m_array; + uint m_count; + bool alloc(MEM_ROOT *root, uint count) + { + DBUG_ASSERT(!m_array); + DBUG_ASSERT(!m_count); + size_t nbytes= sizeof(Elem) * count; + if (!(m_array= (Elem*) alloc_root(root, nbytes))) + return true; + m_count= count; + bzero((void*) m_array, nbytes); + return false; + } + static uint32 read_length(uchar **pos, const uchar *end) + { + ulonglong num= safe_net_field_length_ll(pos, end - *pos); + if (num > UINT_MAX32) + return 0; + return (uint32) num; + } + static bool read_string(LEX_CSTRING *to, uchar **pos, const uchar *end) + { + to->length= read_length(pos, end); + if (*pos + to->length > end) + return true; // Not enough data + to->str= (const char *) *pos; + *pos+= to->length; + return false; + } +public: + Field_data_type_info_array() + :m_array(NULL), m_count(0) + { } + uint count() const + { + return m_count; + } + const Elem& element(uint i) const + { + DBUG_ASSERT(i < m_count); + return m_array[i]; + } + bool parse(MEM_ROOT *root, uint count, LEX_CUSTRING &image) + { + const uchar *pos= image.str; + const uchar *end= pos + image.length; + if (alloc(root, count)) + return true; + for (uint i= 0; i < count && pos < end; i++) + { + LEX_CSTRING type_info; + uint fieldnr= read_length((uchar**) &pos, end); + if ((fieldnr == 0 && i > 0) || fieldnr >= count) + return true; // Bad data + if (read_string(&type_info, (uchar**) &pos, end) || type_info.length == 0) + return true; // Bad data + m_array[fieldnr].set(type_info); + } + return pos < end; // Error if some data is still left + } +}; + + +/* + Change to use the partition storage engine +*/ + +#ifdef WITH_PARTITION_STORAGE_ENGINE +static bool change_to_partiton_engine(LEX_CSTRING *name, + plugin_ref *se_plugin) +{ + /* + Use partition handler + tmp_plugin is locked with a local lock. + we unlock the old value of se_plugin before + replacing it with a globally locked version of tmp_plugin + */ + /* Check if the partitioning engine is ready */ + if (!plugin_is_ready(name, MYSQL_STORAGE_ENGINE_PLUGIN)) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), + "--skip-partition"); + return 1; + } + plugin_unlock(NULL, *se_plugin); + *se_plugin= ha_lock_engine(NULL, partition_hton); + return 0; +} +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + +/** + Read data from a binary .frm file image into a TABLE_SHARE + + @param write Write the .frm and .par file. These are not created if + the function returns an error. + + @note + frm bytes at the following offsets are unused in MariaDB 10.0: + + 8..9 (used to be the number of "form names") + 28..29 (used to be key_info_length) + + They're still set, for compatibility reasons, but never read. + + 42..46 are unused since 5.0 (were for RAID support) + Also, there're few unused bytes in forminfo. +*/ + +int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, + const uchar *frm_image, + size_t frm_length, + const uchar *par_image, + size_t par_length) +{ + TABLE_SHARE *share= this; + uint new_frm_ver, field_pack_length, new_field_pack_flag; + uint interval_count, interval_parts, read_length, int_length; + uint total_typelib_value_count; + uint db_create_options, keys, key_parts, n_length; + uint com_length, null_bit_pos, UNINIT_VAR(mysql57_vcol_null_bit_pos), bitmap_count; + uint i, hash_fields= 0; + bool use_hash, mysql57_null_bits= 0; + LEX_STRING keynames= {NULL, 0}; + char *names, *comment_pos; + const uchar *forminfo; + const uchar *frm_image_end = frm_image + frm_length; + uchar *record, *null_flags, *null_pos, *UNINIT_VAR(mysql57_vcol_null_pos); + const uchar *disk_buff, *strpos; + ulong pos, record_offset; + ulong rec_buff_length; + handler *handler_file= 0; + KEY *keyinfo; + KEY_PART_INFO *key_part= NULL; + Field **field_ptr, *reg_field; + const char **interval_array; + uint *typelib_value_lengths= NULL; + enum legacy_db_type legacy_db_type; + my_bitmap_map *bitmaps; + bool null_bits_are_used; + uint vcol_screen_length; + uchar *vcol_screen_pos; + LEX_CUSTRING options; + LEX_CSTRING se_name= empty_clex_str; + KEY first_keyinfo; + uint len; + uint ext_key_parts= 0; + plugin_ref se_plugin= 0; + bool vers_can_native= false, frm_created= 0; + Field_data_type_info_array field_data_type_info_array; + MEM_ROOT *old_root= thd->mem_root; + Virtual_column_info **table_check_constraints; + bool *interval_unescaped= NULL; + extra2_fields extra2; + bool extra_index_flags_present= FALSE; + DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); + + keyinfo= &first_keyinfo; + thd->mem_root= &share->mem_root; + + if (frm_length < FRM_HEADER_SIZE + FRM_FORMINFO_SIZE) + goto err; + + if (write) + { + frm_created= 1; + if (write_frm_image(frm_image, frm_length)) + goto err; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (par_image) + if (write_par_image(par_image, par_length)) + goto err; +#endif + } + + share->frm_version= frm_image[2]; + /* + Check if .frm file created by MySQL 5.0. In this case we want to + display CHAR fields as CHAR and not as VARCHAR. + We do it this way as we want to keep the old frm version to enable + MySQL 4.1 to read these files. + */ + if (share->frm_version == FRM_VER_TRUE_VARCHAR -1 && frm_image[33] == 5) + share->frm_version= FRM_VER_TRUE_VARCHAR; + + new_field_pack_flag= frm_image[27]; + new_frm_ver= (frm_image[2] - FRM_VER); + field_pack_length= new_frm_ver < 2 ? 11 : 17; + + /* Length of the MariaDB extra2 segment in the form file. */ + len = uint2korr(frm_image+4); + + if (read_extra2(frm_image, len, &extra2)) + goto err; + + tabledef_version.length= extra2.version.length; + tabledef_version.str= (uchar*)memdup_root(&mem_root, extra2.version.str, + extra2.version.length); + if (!tabledef_version.str) + goto err; + + /* remember but delay parsing until we have read fields and keys */ + options= extra2.options; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (extra2.engine) + { + share->default_part_plugin= ha_resolve_by_name(NULL, &extra2.engine, false); + if (!share->default_part_plugin) + goto err; + } +#endif + + if (frm_length < FRM_HEADER_SIZE + len || + !(pos= uint4korr(frm_image + FRM_HEADER_SIZE + len))) + goto err; + + forminfo= frm_image + pos; + if (forminfo + FRM_FORMINFO_SIZE >= frm_image_end) + goto err; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (frm_image[61] && !share->default_part_plugin) + { + enum legacy_db_type db_type= (enum legacy_db_type) (uint) frm_image[61]; + share->default_part_plugin= ha_lock_engine(NULL, ha_checktype(thd, db_type, 1)); + if (!share->default_part_plugin) + goto err; + } +#endif + legacy_db_type= (enum legacy_db_type) (uint) frm_image[3]; + /* + if the storage engine is dynamic, no point in resolving it by its + dynamically allocated legacy_db_type. We will resolve it later by name. + */ + if (legacy_db_type > DB_TYPE_UNKNOWN && + legacy_db_type < DB_TYPE_FIRST_DYNAMIC) + se_plugin= ha_lock_engine(NULL, ha_checktype(thd, legacy_db_type)); + share->db_create_options= db_create_options= uint2korr(frm_image+30); + share->db_options_in_use= share->db_create_options; + share->mysql_version= uint4korr(frm_image+51); + share->table_type= TABLE_TYPE_NORMAL; + share->null_field_first= 0; + if (!frm_image[32]) // New frm file in 3.23 + { + uint cs_org= (((uint) frm_image[41]) << 8) + (uint) frm_image[38]; + uint cs_new= Charset::upgrade_collation_id(share->mysql_version, cs_org); + if (cs_org != cs_new) + share->incompatible_version|= HA_CREATE_USED_CHARSET; + + share->avg_row_length= uint4korr(frm_image+34); + share->transactional= (ha_choice) + enum_value_with_check(thd, share, "transactional", frm_image[39] & 3, HA_CHOICE_MAX); + share->page_checksum= (ha_choice) + enum_value_with_check(thd, share, "page_checksum", (frm_image[39] >> 2) & 3, HA_CHOICE_MAX); + if (((ha_choice) enum_value_with_check(thd, share, "sequence", + (frm_image[39] >> 4) & 3, + HA_CHOICE_MAX)) == HA_CHOICE_YES) + { + share->table_type= TABLE_TYPE_SEQUENCE; + share->sequence= new (&share->mem_root) SEQUENCE(); + share->non_determinstic_insert= true; + } + share->row_type= (enum row_type) + enum_value_with_check(thd, share, "row_format", frm_image[40], ROW_TYPE_MAX); + + if (cs_new && !(share->table_charset= get_charset(cs_new, MYF(MY_WME)))) + goto err; + share->null_field_first= 1; + share->stats_sample_pages= uint2korr(frm_image+42); + share->stats_auto_recalc= (enum_stats_auto_recalc)(frm_image[44]); + share->table_check_constraints= uint2korr(frm_image+45); + } + if (!share->table_charset) + { + const CHARSET_INFO *cs= thd->variables.collation_database; + /* unknown charset in frm_image[38] or pre-3.23 frm */ + if (cs->use_mb()) + { + /* Warn that we may be changing the size of character columns */ + sql_print_warning("'%s' had no or invalid character set, " + "and default character set is multi-byte, " + "so character column sizes may have changed", + share->path.str); + } + share->table_charset= cs; + } + + share->db_record_offset= 1; + share->max_rows= uint4korr(frm_image+18); + share->min_rows= uint4korr(frm_image+22); + + /* Read keyinformation */ + disk_buff= frm_image + uint2korr(frm_image+6); + + if (disk_buff + 6 >= frm_image_end) + goto err; + + if (disk_buff[0] & 0x80) + { + keys= (disk_buff[1] << 7) | (disk_buff[0] & 0x7f); + share->key_parts= key_parts= uint2korr(disk_buff+2); + } + else + { + keys= disk_buff[0]; + share->key_parts= key_parts= disk_buff[1]; + } + share->keys_for_keyread.init(0); + share->ignored_indexes.init(0); + share->keys_in_use.init(keys); + ext_key_parts= key_parts; + + if (extra2.index_flags.str && extra2.index_flags.length != keys) + goto err; + + len= (uint) uint2korr(disk_buff+4); + + share->reclength = uint2korr(frm_image+16); + share->stored_rec_length= share->reclength; + if (frm_image[26] == 1) + share->system= 1; /* one-record-database */ + + record_offset= (ulong) (uint2korr(frm_image+6)+ + ((uint2korr(frm_image+14) == 0xffff ? + uint4korr(frm_image+47) : uint2korr(frm_image+14)))); + + if (record_offset + share->reclength >= frm_length) + goto err; + + if ((n_length= uint4korr(frm_image+55))) + { + /* Read extra data segment */ + const uchar *next_chunk, *buff_end; + DBUG_PRINT("info", ("extra segment size is %u bytes", n_length)); + next_chunk= frm_image + record_offset + share->reclength; + buff_end= next_chunk + n_length; + + if (buff_end >= frm_image_end) + goto err; + + share->connect_string.length= uint2korr(next_chunk); + if (!(share->connect_string.str= strmake_root(&share->mem_root, + (char*) next_chunk + 2, + share->connect_string. + length))) + { + goto err; + } + next_chunk+= share->connect_string.length + 2; + if (next_chunk + 2 < buff_end) + { + uint str_db_type_length= uint2korr(next_chunk); + se_name.str= (char*) next_chunk + 2; + se_name.length= str_db_type_length; + + plugin_ref tmp_plugin= ha_resolve_by_name(thd, &se_name, false); + if (tmp_plugin != NULL && !plugin_equals(tmp_plugin, se_plugin) && + legacy_db_type != DB_TYPE_S3) + { + if (se_plugin) + { + /* bad file, legacy_db_type did not match the name */ + sql_print_warning("%s.frm is inconsistent: engine typecode %d, engine name %s (%d)", + share->normalized_path.str, legacy_db_type, + plugin_name(tmp_plugin)->str, + ha_legacy_type(plugin_data(tmp_plugin, handlerton *))); + } + /* + tmp_plugin is locked with a local lock. + we unlock the old value of se_plugin before + replacing it with a globally locked version of tmp_plugin + */ + plugin_unlock(NULL, se_plugin); + se_plugin= plugin_lock(NULL, tmp_plugin); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + else if (str_db_type_length == 9 && + !strncmp((char *) next_chunk + 2, "partition", 9)) + { + if (change_to_partiton_engine(&se_name, &se_plugin)) + goto err; + } +#endif + else if (!tmp_plugin) + { + /* purecov: begin inspected */ + ((char*) se_name.str)[se_name.length]=0; + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), se_name.str); + goto err; + /* purecov: end */ + } + next_chunk+= str_db_type_length + 2; + } + + share->set_use_ext_keys_flag(plugin_hton(se_plugin)->flags & HTON_SUPPORTS_EXTENDED_KEYS); + + if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo, + new_frm_ver, &ext_key_parts, + share, len, &first_keyinfo, &keynames)) + goto err; + + if (next_chunk + 5 < buff_end) + { + uint32 partition_info_str_len = uint4korr(next_chunk); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((share->partition_info_buffer_size= + share->partition_info_str_len= partition_info_str_len)) + { + if (!(share->partition_info_str= (char*) + memdup_root(&share->mem_root, next_chunk + 4, + partition_info_str_len + 1))) + { + goto err; + } + if (plugin_data(se_plugin, handlerton*) != partition_hton && + share->mysql_version >= 50600 && share->mysql_version <= 50799) + { + share->keep_original_mysql_version= 1; + if (change_to_partiton_engine(&se_name, &se_plugin)) + goto err; + } + } +#else + if (partition_info_str_len) + { + DBUG_PRINT("info", ("WITH_PARTITION_STORAGE_ENGINE is not defined")); + goto err; + } +#endif + next_chunk+= 5 + partition_info_str_len; + } + if (share->mysql_version >= 50110 && next_chunk < buff_end) + { + /* New auto_partitioned indicator introduced in 5.1.11 */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + share->auto_partitioned= *next_chunk; +#endif + next_chunk++; + } + keyinfo= share->key_info; + for (i= 0; i < keys; i++, keyinfo++) + { + if (keyinfo->flags & HA_USES_PARSER) + { + LEX_CSTRING parser_name; + if (next_chunk >= buff_end) + { + DBUG_PRINT("error", + ("fulltext key uses parser that is not defined in .frm")); + goto err; + } + parser_name.str= (char*) next_chunk; + parser_name.length= strlen((char*) next_chunk); + next_chunk+= parser_name.length + 1; + keyinfo->parser= my_plugin_lock_by_name(NULL, &parser_name, + MYSQL_FTPARSER_PLUGIN); + if (! keyinfo->parser) + { + my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), parser_name.str); + goto err; + } + } + } + + if (forminfo[46] == (uchar)255) + { + //reading long table comment + if (next_chunk + 2 > buff_end) + { + DBUG_PRINT("error", + ("long table comment is not defined in .frm")); + goto err; + } + share->comment.length = uint2korr(next_chunk); + if (! (share->comment.str= strmake_root(&share->mem_root, + (char*)next_chunk + 2, share->comment.length))) + { + goto err; + } + next_chunk+= 2 + share->comment.length; + } + + DBUG_ASSERT(next_chunk <= buff_end); + + if (share->db_create_options & HA_OPTION_TEXT_CREATE_OPTIONS_legacy) + { + if (options.str) + goto err; + options.length= uint4korr(next_chunk); + options.str= next_chunk + 4; + next_chunk+= options.length + 4; + } + DBUG_ASSERT(next_chunk <= buff_end); + } + else + { + if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo, + new_frm_ver, &ext_key_parts, + share, len, &first_keyinfo, &keynames)) + goto err; + } + share->key_block_size= uint2korr(frm_image+62); + keyinfo= share->key_info; + + + if (extra2.index_flags.str) + extra_index_flags_present= TRUE; + + for (uint i= 0; i < share->keys; i++, keyinfo++) + { + if (extra_index_flags_present) + { + uchar flags= *extra2.index_flags.str++; + keyinfo->is_ignored= (flags & EXTRA2_IGNORED_KEY); + } + else + keyinfo->is_ignored= FALSE; + + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) + hash_fields++; + } + + share->set_ignored_indexes(); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (par_image && plugin_data(se_plugin, handlerton*) == partition_hton) + { + /* + Discovery returned a partition plugin. Change to use it. The partition + engine will then use discovery to find the rest of the plugin tables, + which may be in the original engine used for discovery + */ + share->db_plugin= se_plugin; + } +#endif + if (share->db_plugin && !plugin_equals(share->db_plugin, se_plugin)) + goto err; // wrong engine (someone changed the frm under our feet?) + + rec_buff_length= ALIGN_SIZE(share->reclength + 1); + share->rec_buff_length= rec_buff_length; + if (!(record= (uchar *) alloc_root(&share->mem_root, rec_buff_length))) + goto err; /* purecov: inspected */ + /* Mark bytes after record as not accessable to catch overrun bugs */ + MEM_NOACCESS(record + share->reclength, rec_buff_length - share->reclength); + share->default_values= record; + memcpy(record, frm_image + record_offset, share->reclength); + + disk_buff= frm_image + pos + FRM_FORMINFO_SIZE; + share->fields= uint2korr(forminfo+258); + if (extra2.field_flags.str && extra2.field_flags.length != share->fields) + goto err; + pos= uint2korr(forminfo+260); /* Length of all screens */ + n_length= uint2korr(forminfo+268); + interval_count= uint2korr(forminfo+270); + interval_parts= uint2korr(forminfo+272); + int_length= uint2korr(forminfo+274); + share->null_fields= uint2korr(forminfo+282); + com_length= uint2korr(forminfo+284); + vcol_screen_length= uint2korr(forminfo+286); + share->virtual_fields= share->default_expressions= + share->field_check_constraints= share->default_fields= 0; + share->visible_fields= 0; + share->stored_fields= share->fields; + if (forminfo[46] != (uchar)255) + { + share->comment.length= (int) (forminfo[46]); + share->comment.str= strmake_root(&share->mem_root, (char*) forminfo+47, + share->comment.length); + } + + DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d vcol_screen_length: %d", interval_count,interval_parts, keys,n_length,int_length, com_length, vcol_screen_length)); + + /* + We load the following things into TYPELIBs: + - One TYPELIB for field names + - interval_count TYPELIBs for ENUM/SET values + - One TYPELIB for key names + Every TYPELIB requires one extra value with a NULL pointer and zero length, + which is the end-of-values marker. + TODO-10.5+: + Note, we should eventually reuse this total_typelib_value_count + to allocate interval_array. The below code reserves less space + than total_typelib_value_count pointers. So it seems `interval_array` + and `names` overlap in the memory. Too dangerous to fix in 10.1. + */ + total_typelib_value_count= + (share->fields + 1/*end-of-values marker*/) + + (interval_parts + interval_count/*end-of-values markers*/) + + (keys + 1/*end-of-values marker*/); + + if (!multi_alloc_root(&share->mem_root, + &share->field, (uint)(share->fields+1)*sizeof(Field*), + &share->intervals, (uint)interval_count*sizeof(TYPELIB), + &share->check_constraints, (uint) share->table_check_constraints * sizeof(Virtual_column_info*), + /* + This looks wrong: shouldn't it be (+2+interval_count) + instread of (+3) ? + */ + &interval_array, (uint) (share->fields+interval_parts+ keys+3)*sizeof(char *), + &typelib_value_lengths, total_typelib_value_count * sizeof(uint *), + &names, (uint) (n_length+int_length), + &comment_pos, (uint) com_length, + &vcol_screen_pos, vcol_screen_length, + NullS)) + + goto err; + + if (interval_count) + { + if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool)))) + goto err; + bzero(interval_unescaped, interval_count * sizeof(bool)); + } + + field_ptr= share->field; + table_check_constraints= share->check_constraints; + read_length=(uint) (share->fields * field_pack_length + + pos+ (uint) (n_length+int_length+com_length+ + vcol_screen_length)); + strpos= disk_buff+pos; + + if (!interval_count) + share->intervals= 0; // For better debugging + + share->vcol_defs.str= vcol_screen_pos; + share->vcol_defs.length= vcol_screen_length; + + memcpy(names, strpos+(share->fields*field_pack_length), n_length+int_length); + memcpy(comment_pos, disk_buff+read_length-com_length-vcol_screen_length, + com_length); + memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length, + vcol_screen_length); + + if (fix_type_pointers(&interval_array, &typelib_value_lengths, + &share->fieldnames, 1, names, n_length) || + share->fieldnames.count != share->fields) + goto err; + + if (fix_type_pointers(&interval_array, &typelib_value_lengths, + share->intervals, interval_count, + names + n_length, int_length)) + goto err; + + if (keynames.length && + (fix_type_pointers(&interval_array, &typelib_value_lengths, + &share->keynames, 1, keynames.str, keynames.length) || + share->keynames.count != keys)) + goto err; + + /* Allocate handler */ + if (!(handler_file= get_new_handler(share, thd->mem_root, + plugin_hton(se_plugin)))) + goto err; + + if (handler_file->set_ha_share_ref(&share->ha_share)) + goto err; + + record= share->default_values-1; /* Fieldstart = 1 */ + null_bits_are_used= share->null_fields != 0; + if (share->null_field_first) + { + null_flags= null_pos= record+1; + null_bit_pos= (db_create_options & HA_OPTION_PACK_RECORD) ? 0 : 1; + /* + null_bytes below is only correct under the condition that + there are no bit fields. Correct values is set below after the + table struct is initialized + */ + share->null_bytes= (share->null_fields + null_bit_pos + 7) / 8; + } +#ifndef WE_WANT_TO_SUPPORT_VERY_OLD_FRM_FILES + else + { + share->null_bytes= (share->null_fields+7)/8; + null_flags= null_pos= record + 1 + share->reclength - share->null_bytes; + null_bit_pos= 0; + } +#endif + + use_hash= share->fields >= MAX_FIELDS_BEFORE_HASH; + if (use_hash) + use_hash= !my_hash_init(PSI_INSTRUMENT_ME, &share->name_hash, + system_charset_info, share->fields, 0, 0, + (my_hash_get_key) get_field_name, 0, 0); + + if (share->mysql_version >= 50700 && share->mysql_version < 100000 && + vcol_screen_length) + { + share->keep_original_mysql_version= 1; + /* + MySQL 5.7 stores the null bits for not stored fields last. + Calculate the position for them. + */ + mysql57_null_bits= 1; + mysql57_vcol_null_pos= null_pos; + mysql57_vcol_null_bit_pos= null_bit_pos; + mysql57_calculate_null_position(share, &mysql57_vcol_null_pos, + &mysql57_vcol_null_bit_pos, + strpos, vcol_screen_pos); + } + + /* Set system versioning information. */ + vers.name= Lex_ident(STRING_WITH_LEN("SYSTEM_TIME")); + if (extra2.system_period.str == NULL) + { + versioned= VERS_UNDEFINED; + vers.start_fieldno= 0; + vers.end_fieldno= 0; + } + else + { + DBUG_PRINT("info", ("Setting system versioning information")); + if (init_period_from_extra2(&vers, extra2.system_period.str, + extra2.system_period.str + extra2.system_period.length)) + goto err; + DBUG_PRINT("info", ("Columns with system versioning: [%d, %d]", + vers.start_fieldno, vers.end_fieldno)); + versioned= VERS_TIMESTAMP; + vers_can_native= handler_file->vers_can_native(thd); + status_var_increment(thd->status_var.feature_system_versioning); + } // if (system_period == NULL) + + if (extra2.application_period.str) + { + const uchar *pos= extra2.application_period.str; + const uchar *end= pos + extra2.application_period.length; + period.name.length= extra2_read_len(&pos, end); + period.name.str= strmake_root(&mem_root, (char*)pos, period.name.length); + pos+= period.name.length; + + period.constr_name.length= extra2_read_len(&pos, end); + period.constr_name.str= strmake_root(&mem_root, (char*)pos, + period.constr_name.length); + pos+= period.constr_name.length; + + if (init_period_from_extra2(&period, pos, end)) + goto err; + if (extra2_str_size(period.name.length) + + extra2_str_size(period.constr_name.length) + + 2 * frm_fieldno_size + != extra2.application_period.length) + goto err; + status_var_increment(thd->status_var.feature_application_time_periods); + } + + if (extra2.without_overlaps.str) + { + if (extra2.application_period.str == NULL) + goto err; + const uchar *key_pos= extra2.without_overlaps.str; + period.unique_keys= read_frm_keyno(key_pos); + for (uint k= 0; k < period.unique_keys; k++) + { + key_pos+= frm_keyno_size; + uint key_nr= read_frm_keyno(key_pos); + key_info[key_nr].without_overlaps= true; + } + + if ((period.unique_keys + 1) * frm_keyno_size + != extra2.without_overlaps.length) + goto err; + } + + if (extra2.field_data_type_info.length && + field_data_type_info_array.parse(old_root, share->fields, + extra2.field_data_type_info)) + goto err; + + for (i=0 ; i < share->fields; i++, strpos+=field_pack_length, field_ptr++) + { + uint interval_nr= 0, recpos; + LEX_CSTRING comment; + LEX_CSTRING name; + Virtual_column_info *vcol_info= 0; + const Type_handler *handler; + uint32 flags= 0; + Column_definition_attributes attr; + + if (new_frm_ver >= 3) + { + /* new frm file in 4.1 */ + recpos= uint3korr(strpos+5); + uint comment_length=uint2korr(strpos+15); + + if (!comment_length) + { + comment.str= (char*) ""; + comment.length=0; + } + else + { + comment.str= (char*) comment_pos; + comment.length= comment_length; + comment_pos+= comment_length; + } + + if (strpos[13] == MYSQL_TYPE_VIRTUAL && + (share->mysql_version < 50600 || share->mysql_version >= 100000)) + { + /* + MariaDB 5.5 or 10.0 version. + The interval_id byte in the .frm file stores the length of the + expression statement for a virtual column. + */ + uint vcol_info_length= (uint) strpos[12]; + + if (!vcol_info_length) // Expect non-null expression + goto err; + + attr.frm_unpack_basic(strpos); + if (attr.frm_unpack_charset(share, strpos)) + goto err; + /* + Old virtual field information before 10.2 + + Get virtual column data stored in the .frm file as follows: + byte 1 = 1 | 2 + byte 2 = sql_type + byte 3 = flags. 1 for stored_in_db + [byte 4] = optional interval_id for sql_type (if byte 1 == 2) + next byte ... = virtual column expression (text data) + */ + + vcol_info= new (&share->mem_root) Virtual_column_info(); + bool opt_interval_id= (uint)vcol_screen_pos[0] == 2; + enum_field_types ftype= (enum_field_types) (uchar) vcol_screen_pos[1]; + if (!(handler= Type_handler::get_handler_by_real_type(ftype))) + goto err; + if (opt_interval_id) + interval_nr= (uint)vcol_screen_pos[3]; + else if ((uint)vcol_screen_pos[0] != 1) + goto err; + bool stored= vcol_screen_pos[2] & 1; + vcol_info->stored_in_db= stored; + vcol_info->set_vcol_type(stored ? VCOL_GENERATED_STORED : VCOL_GENERATED_VIRTUAL); + uint vcol_expr_length= vcol_info_length - + (uint)(FRM_VCOL_OLD_HEADER_SIZE(opt_interval_id)); + vcol_info->utf8= 0; // before 10.2.1 the charset was unknown + int2store(vcol_screen_pos+1, vcol_expr_length); // for parse_vcol_defs() + vcol_screen_pos+= vcol_info_length; + share->virtual_fields++; + } + else + { + interval_nr= (uint) strpos[12]; + enum_field_types field_type= (enum_field_types) strpos[13]; + if (!(handler= Type_handler::get_handler_by_real_type(field_type))) + { + if (field_type == 245 && + share->mysql_version >= 50700) // a.k.a MySQL 5.7 JSON + { + share->incompatible_version|= HA_CREATE_USED_ENGINE; + const LEX_CSTRING mysql_json{STRING_WITH_LEN("MYSQL_JSON")}; + handler= Type_handler::handler_by_name_or_error(thd, mysql_json); + } + + if (!handler) + goto err; // Not supported field type + } + handler= handler->type_handler_frm_unpack(strpos); + if (handler->Column_definition_attributes_frm_unpack(&attr, share, + strpos, + &extra2.gis)) + goto err; + + if (field_data_type_info_array.count()) + { + const LEX_CSTRING &info= field_data_type_info_array. + element(i).type_info(); + DBUG_EXECUTE_IF("frm_data_type_info", + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, "DBUG: [%u] name='%s' type_info='%.*s'", + i, share->fieldnames.type_names[i], + (uint) info.length, info.str);); + + if (info.length) + { + const Type_handler *h= Type_handler::handler_by_name_or_error(thd, + info); + /* + This code will eventually be extended here: + - If the handler was not found by name, we could + still open the table using the fallback type handler "handler", + at least for a limited set of commands. + - If the handler was found by name, we could check + that "h" and "handler" have the same type code + (and maybe some other properties) to make sure + that the FRM data is consistent. + */ + if (!h) + goto err; + handler= h; + } + } + } + + if (((uint) strpos[10]) & MYSQL57_GENERATED_FIELD) + { + attr.unireg_check= Field::NONE; + + /* + MySQL 5.7 generated fields + + byte 1 = 1 + byte 2,3 = expr length + byte 4 = stored_in_db + byte 5.. = expr + */ + if ((uint)(vcol_screen_pos)[0] != 1) + goto err; + vcol_info= new (&share->mem_root) Virtual_column_info(); + uint vcol_info_length= uint2korr(vcol_screen_pos + 1); + if (!vcol_info_length) // Expect non-empty expression + goto err; + vcol_info->stored_in_db= vcol_screen_pos[3]; + vcol_info->utf8= 0; + vcol_screen_pos+= vcol_info_length + MYSQL57_GCOL_HEADER_SIZE;; + share->virtual_fields++; + } + } + else + { + attr.length= (uint) strpos[3]; + recpos= uint2korr(strpos+4), + attr.pack_flag= uint2korr(strpos+6); + if (f_is_num(attr.pack_flag)) + { + attr.decimals= f_decimals(attr.pack_flag); + attr.pack_flag&= ~FIELDFLAG_DEC_MASK; + } + attr.pack_flag&= ~FIELDFLAG_NO_DEFAULT; // Safety for old files + attr.unireg_check= (Field::utype) MTYP_TYPENR((uint) strpos[8]); + interval_nr= (uint) strpos[10]; + + /* old frm file */ + enum_field_types ftype= (enum_field_types) f_packtype(attr.pack_flag); + if (!(handler= Type_handler::get_handler_by_real_type(ftype))) + goto err; // Not supported field type + + if (f_is_binary(attr.pack_flag)) + { + /* + Try to choose the best 4.1 type: + - for 4.0 "CHAR(N) BINARY" or "VARCHAR(N) BINARY" + try to find a binary collation for character set. + - for other types (e.g. BLOB) just use my_charset_bin. + */ + if (!f_is_blob(attr.pack_flag)) + { + // 3.23 or 4.0 string + myf utf8_flag= thd->get_utf8_flag(); + if (!(attr.charset= get_charset_by_csname(share->table_charset-> + cs_name.str, + MY_CS_BINSORT, + MYF(utf8_flag)))) + attr.charset= &my_charset_bin; + } + } + else + attr.charset= share->table_charset; + bzero((char*) &comment, sizeof(comment)); + if ((!(handler= old_frm_type_handler(attr.pack_flag, interval_nr)))) + goto err; // Not supported field type + } + + /* Remove >32 decimals from old files */ + if (share->mysql_version < 100200 && + (attr.pack_flag & FIELDFLAG_LONG_DECIMAL)) + { + share->keep_original_mysql_version= 1; + attr.pack_flag&= ~FIELDFLAG_LONG_DECIMAL; + } + + if (interval_nr && attr.charset->mbminlen > 1 && + !interval_unescaped[interval_nr - 1]) + { + /* + Unescape UCS2/UTF16/UTF32 intervals from HEX notation. + Note, ENUM/SET columns with equal value list share a single + copy of TYPELIB. Unescape every TYPELIB only once. + */ + TYPELIB *interval= share->intervals + interval_nr - 1; + unhex_type2(interval); + interval_unescaped[interval_nr - 1]= true; + } + +#ifndef TO_BE_DELETED_ON_PRODUCTION + if (handler->real_field_type() == MYSQL_TYPE_NEWDECIMAL && + !share->mysql_version) + { + /* + Fix pack length of old decimal values from 5.0.3 -> 5.0.4 + The difference is that in the old version we stored precision + in the .frm table while we now store the display_length + */ + uint decimals= f_decimals(attr.pack_flag); + attr.length= + my_decimal_precision_to_length((uint) attr.length, decimals, + f_is_dec(attr.pack_flag) == 0); + sql_print_error("Found incompatible DECIMAL field '%s' in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix it!", + share->fieldnames.type_names[i], share->table_name.str, + share->table_name.str); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CRASHED_ON_USAGE, + "Found incompatible DECIMAL field '%s' in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix it!", + share->fieldnames.type_names[i], + share->table_name.str, + share->table_name.str); + share->crashed= 1; // Marker for CHECK TABLE + } +#endif + + if (mysql57_null_bits && vcol_info && !vcol_info->stored_in_db) + { + swap_variables(uchar*, null_pos, mysql57_vcol_null_pos); + swap_variables(uint, null_bit_pos, mysql57_vcol_null_bit_pos); + } + + if (versioned) + { + if (i == vers.start_fieldno) + flags|= VERS_ROW_START; + else if (i == vers.end_fieldno) + flags|= VERS_ROW_END; + + if (flags & VERS_SYSTEM_FIELD) + { + auto field_type= handler->real_field_type(); + + DBUG_EXECUTE_IF("error_vers_wrong_type", field_type= MYSQL_TYPE_BLOB;); + + switch (field_type) { + case MYSQL_TYPE_TIMESTAMP2: + break; + case MYSQL_TYPE_LONGLONG: + if (vers_can_native) + { + versioned= VERS_TRX_ID; + break; + } + /* Fallthrough */ + default: + my_error(ER_VERS_FIELD_WRONG_TYPE, + (field_type == MYSQL_TYPE_LONGLONG ? + MYF(0) : MYF(ME_WARNING)), + fieldnames.type_names[i], + (versioned == VERS_TIMESTAMP ? + "TIMESTAMP(6)" : "BIGINT(20) UNSIGNED"), + table_name.str); + goto err; + } + } + } + + /* Convert pre-10.2.2 timestamps to use Field::default_value */ + name.str= fieldnames.type_names[i]; + name.length= strlen(name.str); + attr.interval= interval_nr ? share->intervals + interval_nr - 1 : NULL; + Record_addr addr(record + recpos, null_pos, null_bit_pos); + *field_ptr= reg_field= + attr.make_field(share, &share->mem_root, &addr, handler, &name, flags); + if (!reg_field) // Not supported field type + goto err; + + if (attr.unireg_check == Field::TIMESTAMP_DNUN_FIELD || + attr.unireg_check == Field::TIMESTAMP_DN_FIELD) + { + reg_field->default_value= new (&share->mem_root) Virtual_column_info(); + reg_field->default_value->set_vcol_type(VCOL_DEFAULT); + reg_field->default_value->stored_in_db= 1; + share->default_expressions++; + } + + reg_field->field_index= i; + reg_field->comment=comment; + reg_field->vcol_info= vcol_info; + reg_field->flags|= flags; + if (extra2.field_flags.str) + { + uchar flags= *extra2.field_flags.str++; + if (flags & VERS_OPTIMIZED_UPDATE) + reg_field->flags|= VERS_UPDATE_UNVERSIONED_FLAG; + + reg_field->invisible= f_visibility(flags); + } + if (reg_field->invisible == INVISIBLE_USER) + status_var_increment(thd->status_var.feature_invisible_columns); + if (!reg_field->invisible) + share->visible_fields++; + if (handler->real_field_type() == MYSQL_TYPE_BIT && + !f_bit_as_char(attr.pack_flag)) + { + null_bits_are_used= 1; + if ((null_bit_pos+= (uint) (attr.length & 7)) > 7) + { + null_pos++; + null_bit_pos-= 8; + } + } + if (!(reg_field->flags & NOT_NULL_FLAG)) + { + if (!(null_bit_pos= (null_bit_pos + 1) & 7)) + null_pos++; + } + + if (vcol_info) + { + vcol_info->name= reg_field->field_name; + if (mysql57_null_bits && !vcol_info->stored_in_db) + { + /* MySQL 5.7 has null bits last */ + swap_variables(uchar*, null_pos, mysql57_vcol_null_pos); + swap_variables(uint, null_bit_pos, mysql57_vcol_null_bit_pos); + } + } + + if (f_no_default(attr.pack_flag)) + reg_field->flags|= NO_DEFAULT_VALUE_FLAG; + + if (reg_field->unireg_check == Field::NEXT_NUMBER) + share->found_next_number_field= field_ptr; + + if (use_hash && my_hash_insert(&share->name_hash, (uchar*) field_ptr)) + goto err; + if (!reg_field->stored_in_db()) + { + share->stored_fields--; + if (share->stored_rec_length>=recpos) + share->stored_rec_length= recpos-1; + } + if (reg_field->has_update_default_function()) + { + has_update_default_function= 1; + if (!reg_field->default_value) + share->default_fields++; + } + } + *field_ptr=0; // End marker + /* Sanity checks: */ + DBUG_ASSERT(share->fields>=share->stored_fields); + DBUG_ASSERT(share->reclength>=share->stored_rec_length); + + if (mysql57_null_bits) + { + /* We want to store the value for the last bits */ + swap_variables(uchar*, null_pos, mysql57_vcol_null_pos); + swap_variables(uint, null_bit_pos, mysql57_vcol_null_bit_pos); + DBUG_ASSERT((null_pos + (null_bit_pos + 7) / 8) <= share->field[0]->ptr); + } + + /* Fix key->name and key_part->field */ + if (key_parts) + { + keyinfo= share->key_info; + uint hash_field_used_no= share->fields - hash_fields; + KEY_PART_INFO *hash_keypart; + Field *hash_field; + uint offset= share->reclength - HA_HASH_FIELD_LENGTH * hash_fields; + for (uint i= 0; i < share->keys; i++, keyinfo++) + { + /* We need set value in hash key_part */ + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) + { + share->long_unique_table= 1; + hash_keypart= keyinfo->key_part + keyinfo->user_defined_key_parts; + hash_keypart->length= HA_HASH_KEY_LENGTH_WITHOUT_NULL; + hash_keypart->store_length= hash_keypart->length; + hash_keypart->type= HA_KEYTYPE_ULONGLONG; + hash_keypart->key_part_flag= 0; + hash_keypart->key_type= 32834; + /* Last n fields are unique_index_hash fields*/ + hash_keypart->offset= offset; + hash_keypart->fieldnr= hash_field_used_no + 1; + hash_field= share->field[hash_field_used_no]; + hash_field->flags|= LONG_UNIQUE_HASH_FIELD;//Used in parse_vcol_defs + keyinfo->flags|= HA_NOSAME; + share->virtual_fields++; + share->stored_fields--; + if (record + share->stored_rec_length >= hash_field->ptr) + share->stored_rec_length= (ulong)(hash_field->ptr - record - 1); + hash_field_used_no++; + offset+= HA_HASH_FIELD_LENGTH; + } + } + uint add_first_key_parts= 0; + longlong ha_option= handler_file->ha_table_flags(); + keyinfo= share->key_info; + uint primary_key= my_strcasecmp(system_charset_info, + share->keynames.type_names[0], + primary_key_name.str) ? MAX_KEY : 0; + KEY* key_first_info= NULL; + + if (primary_key >= MAX_KEY && keyinfo->flags & HA_NOSAME && + keyinfo->algorithm != HA_KEY_ALG_LONG_HASH) + { + /* + If the UNIQUE key doesn't have NULL columns and is not a part key + declare this as a primary key. + */ + primary_key= 0; + key_part= keyinfo->key_part; + for (i=0 ; i < keyinfo->user_defined_key_parts ;i++) + { + DBUG_ASSERT(key_part[i].fieldnr > 0); + // Table field corresponding to the i'th key part. + Field *table_field= share->field[key_part[i].fieldnr - 1]; + + /* + If the key column is of NOT NULL BLOB type, then it + will definitly have key prefix. And if key part prefix size + is equal to the BLOB column max size, then we can promote + it to primary key. + */ + if (!table_field->real_maybe_null() && + table_field->type() == MYSQL_TYPE_BLOB && + table_field->field_length == key_part[i].length) + continue; + + if (table_field->real_maybe_null() || + table_field->key_length() != key_part[i].length) + { + primary_key= MAX_KEY; // Can't be used + break; + } + } + } + + /* + Make sure that the primary key is not marked as IGNORE + This can happen in the case + 1) when IGNORE is mentioned in the Key specification + 2) When a unique NON-NULLABLE key is promted to a primary key. + The unqiue key could have been marked as IGNORE when there + was a primary key in the table. + + Eg: + CREATE TABLE t1(a INT NOT NULL, primary key(a), UNIQUE key1(a)) + so for this table when we try to IGNORE key1 + then we run: + ALTER TABLE t1 ALTER INDEX key1 IGNORE + this runs successsfully and key1 is marked as IGNORE. + + But lets say then we drop the primary key + ALTER TABLE t1 DROP PRIMARY + then the UNIQUE key will be promoted to become the primary key + but then the UNIQUE key cannot be marked as IGNORE, so an + error is thrown + */ + if (primary_key != MAX_KEY && keyinfo && keyinfo->is_ignored) + { + my_error(ER_PK_INDEX_CANT_BE_IGNORED, MYF(0)); + goto err; + } + + if (share->use_ext_keys) + { + if (primary_key >= MAX_KEY) + { + add_first_key_parts= 0; + share->set_use_ext_keys_flag(FALSE); + } + else + { + add_first_key_parts= first_keyinfo.user_defined_key_parts; + /* + Do not add components of the primary key starting from + the major component defined over the beginning of a field. + */ + for (i= 0; i < first_keyinfo.user_defined_key_parts; i++) + { + uint fieldnr= keyinfo[0].key_part[i].fieldnr; + if (share->field[fieldnr-1]->key_length() != + keyinfo[0].key_part[i].length) + { + add_first_key_parts= i; + break; + } + } + } + } + + key_first_info= keyinfo; + for (uint key=0 ; key < keys ; key++,keyinfo++) + { + uint usable_parts= 0; + keyinfo->name.str= share->keynames.type_names[key]; + keyinfo->name.length= strlen(keyinfo->name.str); + keyinfo->cache_name= + (uchar*) alloc_root(&share->mem_root, + share->table_cache_key.length+ + keyinfo->name.length + 1); + if (keyinfo->cache_name) // If not out of memory + { + uchar *pos= keyinfo->cache_name; + memcpy(pos, share->table_cache_key.str, share->table_cache_key.length); + memcpy(pos + share->table_cache_key.length, keyinfo->name.str, + keyinfo->name.length+1); + } + + if (ext_key_parts > share->key_parts && key) + { + KEY_PART_INFO *new_key_part= (keyinfo-1)->key_part + + (keyinfo-1)->ext_key_parts; + uint add_keyparts_for_this_key= add_first_key_parts; + uint len_null_byte= 0, ext_key_length= 0; + Field *field; + + if ((keyinfo-1)->algorithm == HA_KEY_ALG_LONG_HASH) + new_key_part++; // reserved for the hash value + + /* + Do not extend the key that contains a component + defined over the beginning of a field. + */ + for (i= 0; i < keyinfo->user_defined_key_parts; i++) + { + uint length_bytes= 0; + uint fieldnr= keyinfo->key_part[i].fieldnr; + field= share->field[fieldnr-1]; + + if (field->null_ptr) + len_null_byte= HA_KEY_NULL_LENGTH; + + if (keyinfo->algorithm != HA_KEY_ALG_LONG_HASH) + length_bytes= field->key_part_length_bytes(); + + ext_key_length+= keyinfo->key_part[i].length + len_null_byte + + length_bytes; + if (field->key_length() != keyinfo->key_part[i].length) + { + add_keyparts_for_this_key= 0; + break; + } + } + + if (add_keyparts_for_this_key) + { + for (i= 0; i < add_keyparts_for_this_key; i++) + { + uint pk_part_length= key_first_info->key_part[i].store_length; + if (keyinfo->ext_key_part_map & 1< MAX_DATA_LENGTH_FOR_KEY) + { + add_keyparts_for_this_key= i; + break; + } + ext_key_length+= pk_part_length; + } + } + } + + if (add_keyparts_for_this_key < keyinfo->ext_key_parts - + keyinfo->user_defined_key_parts) + { + share->ext_key_parts-= keyinfo->ext_key_parts; + key_part_map ext_key_part_map= keyinfo->ext_key_part_map; + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->ext_key_part_map= 0; + for (i= 0; i < add_keyparts_for_this_key; i++) + { + if (ext_key_part_map & 1<ext_key_part_map|= 1<ext_key_parts++; + } + } + share->ext_key_parts+= keyinfo->ext_key_parts; + } + if (new_key_part != keyinfo->key_part) + { + memmove(new_key_part, keyinfo->key_part, + sizeof(KEY_PART_INFO) * keyinfo->ext_key_parts); + keyinfo->key_part= new_key_part; + } + } + + /* Fix fulltext keys for old .frm files */ + if (share->key_info[key].flags & HA_FULLTEXT) + share->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT; + + key_part= keyinfo->key_part; + uint key_parts= share->use_ext_keys ? keyinfo->ext_key_parts : + keyinfo->user_defined_key_parts; + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) + key_parts++; + for (i=0; i < key_parts; key_part++, i++) + { + Field *field; + if (new_field_pack_flag <= 1) + key_part->fieldnr= find_field(share->field, + share->default_values, + (uint) key_part->offset, + (uint) key_part->length); + if (!key_part->fieldnr) + goto err; + + field= key_part->field= share->field[key_part->fieldnr-1]; + if (Charset::collation_changed_order(share->mysql_version, + field->charset()->number)) + share->incompatible_version|= HA_CREATE_USED_CHARSET; + key_part->type= field->key_type(); + + if (field->invisible > INVISIBLE_USER && !field->vers_sys_field()) + if (keyinfo->algorithm != HA_KEY_ALG_LONG_HASH) + keyinfo->flags |= HA_INVISIBLE_KEY; + if (field->null_ptr) + { + key_part->null_offset=(uint) ((uchar*) field->null_ptr - + share->default_values); + key_part->null_bit= field->null_bit; + key_part->store_length+=HA_KEY_NULL_LENGTH; + keyinfo->flags|=HA_NULL_PART_KEY; + keyinfo->key_length+= HA_KEY_NULL_LENGTH; + } + + key_part->key_part_flag|= field->key_part_flag(); + uint16 key_part_length_bytes= field->key_part_length_bytes(); + key_part->store_length+= key_part_length_bytes; + if (i < keyinfo->user_defined_key_parts) + keyinfo->key_length+= key_part_length_bytes; + + if (i == 0 && key != primary_key) + field->flags |= (((keyinfo->flags & HA_NOSAME || + keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) && + (keyinfo->user_defined_key_parts == 1)) ? + UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG); + if (i == 0) + field->key_start.set_bit(key); + if (field->key_length() == key_part->length && + !(field->flags & BLOB_FLAG) && + keyinfo->algorithm != HA_KEY_ALG_LONG_HASH) + { + if (handler_file->index_flags(key, i, 0) & HA_KEYREAD_ONLY) + { + share->keys_for_keyread.set_bit(key); + field->part_of_key.set_bit(key); + if (i < keyinfo->user_defined_key_parts) + field->part_of_key_not_clustered.set_bit(key); + } + if (handler_file->index_flags(key, i, 1) & HA_READ_ORDER) + field->part_of_sortkey.set_bit(key); + } + if (!(key_part->key_part_flag & HA_REVERSE_SORT) && + usable_parts == i) + usable_parts++; // For FILESORT + field->flags|= PART_KEY_FLAG; + if (key == primary_key) + { + field->flags|= PRI_KEY_FLAG; + /* + If this field is part of the primary key and all keys contains + the primary key, then we can use any key to find this column + */ + if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX) + { + if (field->key_length() == key_part->length && + !(field->flags & BLOB_FLAG)) + field->part_of_key= share->keys_in_use; + if (field->part_of_sortkey.is_set(key)) + field->part_of_sortkey= share->keys_in_use; + } + } + if (field->key_length() != key_part->length) + { +#ifndef TO_BE_DELETED_ON_PRODUCTION + if (field->type() == MYSQL_TYPE_NEWDECIMAL && + keyinfo->algorithm != HA_KEY_ALG_LONG_HASH) + { + /* + Fix a fatal error in decimal key handling that causes crashes + on Innodb. We fix it by reducing the key length so that + InnoDB never gets a too big key when searching. + This allows the end user to do an ALTER TABLE to fix the + error. + */ + keyinfo->key_length-= (key_part->length - field->key_length()); + key_part->store_length-= (uint16)(key_part->length - + field->key_length()); + key_part->length= (uint16)field->key_length(); + sql_print_error("Found wrong key definition in %s; " + "Please do \"ALTER TABLE '%s' FORCE \" to fix it!", + share->table_name.str, + share->table_name.str); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CRASHED_ON_USAGE, + "Found wrong key definition in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix " + "it!", + share->table_name.str, + share->table_name.str); + share->crashed= 1; // Marker for CHECK TABLE + continue; + } +#endif + key_part->key_part_flag|= HA_PART_KEY_SEG; + } + if (field->real_maybe_null()) + key_part->key_part_flag|= HA_NULL_PART; + /* + Sometimes we can compare key parts for equality with memcmp. + But not always. + */ + if (!(key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART | + HA_BIT_PART)) && + key_part->type != HA_KEYTYPE_FLOAT && + key_part->type == HA_KEYTYPE_DOUBLE && + keyinfo->algorithm != HA_KEY_ALG_LONG_HASH) + key_part->key_part_flag|= HA_CAN_MEMCMP; + } + keyinfo->usable_key_parts= usable_parts; // Filesort + + set_if_bigger(share->max_key_length,keyinfo->key_length+ + keyinfo->user_defined_key_parts); + /* + MERGE tables do not have unique indexes. But every key could be + an unique index on the underlying MyISAM table. (Bug #10400) + */ + if ((keyinfo->flags & HA_NOSAME) || + (ha_option & HA_ANY_INDEX_MAY_BE_UNIQUE)) + set_if_bigger(share->max_unique_length,keyinfo->key_length); + } + if (primary_key < MAX_KEY && + (share->keys_in_use.is_set(primary_key))) + { + share->primary_key= primary_key; + /* + If we are using an integer as the primary key then allow the user to + refer to it as '_rowid' + */ + if (share->key_info[primary_key].user_defined_key_parts == 1) + { + Field *field= share->key_info[primary_key].key_part[0].field; + if (field && field->result_type() == INT_RESULT) + { + /* note that fieldnr here (and rowid_field_offset) starts from 1 */ + share->rowid_field_offset= (share->key_info[primary_key].key_part[0]. + fieldnr); + } + } + } + else + share->primary_key = MAX_KEY; // we do not have a primary key + } + else + share->primary_key= MAX_KEY; + if (new_field_pack_flag <= 1) + { + /* Old file format with default as not null */ + uint null_length= (share->null_fields+7)/8; + bfill(share->default_values + (null_flags - (uchar*) record), + null_length, 255); + } + + set_overlapped_keys(); + + /* Handle virtual expressions */ + if (vcol_screen_length && share->frm_version >= FRM_VER_EXPRESSSIONS) + { + uchar *vcol_screen_end= vcol_screen_pos + vcol_screen_length; + + /* Skip header */ + vcol_screen_pos+= FRM_VCOL_NEW_BASE_SIZE; + share->vcol_defs.str+= FRM_VCOL_NEW_BASE_SIZE; + share->vcol_defs.length-= FRM_VCOL_NEW_BASE_SIZE; + + /* + Read virtual columns, default values and check constraints + See pack_expression() for how data is stored + */ + while (vcol_screen_pos < vcol_screen_end) + { + Virtual_column_info *vcol_info; + uint type= (uint) vcol_screen_pos[0]; + uint field_nr= uint2korr(vcol_screen_pos+1); + uint expr_length= uint2korr(vcol_screen_pos+3); + uint name_length= (uint) vcol_screen_pos[5]; + + if (!(vcol_info= new (&share->mem_root) Virtual_column_info())) + goto err; + + /* The following can only be true for check_constraints */ + + if (field_nr != UINT_MAX16) + { + DBUG_ASSERT(field_nr < share->fields); + reg_field= share->field[field_nr]; + } + else + { + reg_field= 0; + DBUG_ASSERT(name_length); + } + + vcol_screen_pos+= FRM_VCOL_NEW_HEADER_SIZE; + vcol_info->set_vcol_type((enum_vcol_info_type) type); + if (name_length) + { + vcol_info->name.str= strmake_root(&share->mem_root, + (char*)vcol_screen_pos, name_length); + vcol_info->name.length= name_length; + } + else + vcol_info->name= reg_field->field_name; + vcol_screen_pos+= name_length + expr_length; + + switch (type) { + case VCOL_GENERATED_VIRTUAL: + { + uint recpos; + reg_field->vcol_info= vcol_info; + share->virtual_fields++; + share->stored_fields--; + if (reg_field->flags & BLOB_FLAG) + share->virtual_not_stored_blob_fields++; + if (reg_field->flags & PART_KEY_FLAG) + vcol_info->set_vcol_type(VCOL_GENERATED_VIRTUAL_INDEXED); + /* Correct stored_rec_length as non stored fields are last */ + recpos= (uint) (reg_field->ptr - record); + if (share->stored_rec_length >= recpos) + share->stored_rec_length= recpos-1; + break; + } + case VCOL_GENERATED_STORED: + vcol_info->stored_in_db= 1; + DBUG_ASSERT(!reg_field->vcol_info); + reg_field->vcol_info= vcol_info; + share->virtual_fields++; + break; + case VCOL_DEFAULT: + vcol_info->stored_in_db= 1; + DBUG_ASSERT(!reg_field->default_value); + reg_field->default_value= vcol_info; + share->default_expressions++; + break; + case VCOL_CHECK_FIELD: + DBUG_ASSERT(!reg_field->check_constraint); + reg_field->check_constraint= vcol_info; + share->field_check_constraints++; + break; + case VCOL_CHECK_TABLE: + *(table_check_constraints++)= vcol_info; + break; + } + } + } + DBUG_ASSERT((uint) (table_check_constraints - share->check_constraints) == + (uint) (share->table_check_constraints - + share->field_check_constraints)); + + if (options.str) + { + DBUG_ASSERT(options.length); + if (engine_table_options_frm_read(options.str, options.length, share)) + goto err; + } + if (parse_engine_table_options(thd, handler_file->partition_ht(), share)) + goto err; + + if (share->found_next_number_field) + { + reg_field= *share->found_next_number_field; + if ((int) (share->next_number_index= (uint) + find_ref_key(share->key_info, keys, + share->default_values, reg_field, + &share->next_number_key_offset, + &share->next_number_keypart)) < 0) + goto err; // Wrong field definition + reg_field->flags |= AUTO_INCREMENT_FLAG; + } + + if (share->blob_fields) + { + Field **ptr; + uint k, *save; + + /* Store offsets to blob fields to find them fast */ + if (!(share->blob_field= save= + (uint*) alloc_root(&share->mem_root, + (uint) (share->blob_fields* sizeof(uint))))) + goto err; + for (k=0, ptr= share->field ; *ptr ; ptr++, k++) + { + if ((*ptr)->flags & BLOB_FLAG) + (*save++)= k; + } + } + + /* + the correct null_bytes can now be set, since bitfields have been taken + into account + */ + share->null_bytes= (uint)(null_pos - (uchar*) null_flags + + (null_bit_pos + 7) / 8); + share->last_null_bit_pos= null_bit_pos; + share->null_bytes_for_compare= null_bits_are_used ? share->null_bytes : 0; + share->can_cmp_whole_record= (share->blob_fields == 0 && + share->varchar_fields == 0); + + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + bitmap_count= 1; + if (share->table_check_constraints) + { + feature_check_constraint++; + if (!(share->check_set= (MY_BITMAP*) + alloc_root(&share->mem_root, sizeof(*share->check_set)))) + goto err; + bitmap_count++; + } + if (!(bitmaps= (my_bitmap_map*) alloc_root(&share->mem_root, + share->column_bitmap_size * + bitmap_count))) + goto err; + my_bitmap_init(&share->all_set, bitmaps, share->fields); + bitmap_set_all(&share->all_set); + if (share->check_set) + { + /* + Bitmap for fields used by CHECK constraint. Will be filled up + at first usage of table. + */ + my_bitmap_init(share->check_set, + (my_bitmap_map*) ((uchar*) bitmaps + + share->column_bitmap_size), + share->fields); + bitmap_clear_all(share->check_set); + } + +#ifndef DBUG_OFF + if (use_hash) + (void) my_hash_check(&share->name_hash); +#endif + + share->db_plugin= se_plugin; + delete handler_file; + + share->error= OPEN_FRM_OK; + thd->status_var.opened_shares++; + thd->mem_root= old_root; + my_afree(interval_unescaped); + DBUG_RETURN(0); + +err: + if (frm_created) + { + char path[FN_REFLEN+1]; + strxnmov(path, FN_REFLEN, normalized_path.str, reg_ext, NullS); + my_delete(path, MYF(0)); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (par_image) + { + strxnmov(path, FN_REFLEN, normalized_path.str, PAR_EXT, NullS); + my_delete(path, MYF(0)); + } +#endif + } + share->db_plugin= NULL; + share->error= OPEN_FRM_CORRUPTED; + share->open_errno= my_errno; + delete handler_file; + plugin_unlock(0, se_plugin); + my_hash_free(&share->name_hash); + + if (!thd->is_error()) + open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno); + + thd->mem_root= old_root; + my_afree(interval_unescaped); + DBUG_RETURN(HA_ERR_NOT_A_TABLE); +} + + +static bool sql_unusable_for_discovery(THD *thd, handlerton *engine, + const char *sql) +{ + LEX *lex= thd->lex; + HA_CREATE_INFO *create_info= &lex->create_info; + + // ... not CREATE TABLE + if (lex->sql_command != SQLCOM_CREATE_TABLE && + lex->sql_command != SQLCOM_CREATE_SEQUENCE) + return 1; + // ... create like + if (lex->create_info.like()) + return 1; + // ... create select + if (lex->first_select_lex()->item_list.elements) + return 1; + // ... temporary + if (create_info->tmp_table()) + return 1; + // ... if exists + if (lex->create_info.if_not_exists()) + return 1; + + // XXX error out or rather ignore the following: + // ... partitioning + if (lex->part_info) + return 1; + // ... union + if (create_info->used_fields & HA_CREATE_USED_UNION) + return 1; + // ... index/data directory + if (create_info->data_file_name || create_info->index_file_name) + return 1; + // ... engine + DBUG_ASSERT(lex->m_sql_cmd); + if (lex->create_info.used_fields & HA_CREATE_USED_ENGINE) + { + /* + TODO: we could just compare engine names here, without resolving. + But this optimization is too late for 10.1. + */ + Storage_engine_name *opt= lex->m_sql_cmd->option_storage_engine_name(); + DBUG_ASSERT(opt); // lex->m_sql_cmd must be an Sql_cmd_create_table instance + if (opt->resolve_storage_engine_with_error(thd, &create_info->db_type, + false) || + (create_info->db_type && create_info->db_type != engine)) + return 1; + } + // ... WITH SYSTEM VERSIONING + if (create_info->versioned()) + return 1; + + return 0; +} + +int TABLE_SHARE::init_from_sql_statement_string(THD *thd, bool write, + const char *sql, size_t sql_length) +{ + CHARSET_INFO *old_cs= thd->variables.character_set_client; + Parser_state parser_state; + bool error; + char *sql_copy; + handler *file; + LEX *old_lex; + LEX tmp_lex; + KEY *unused1; + uint unused2; + handlerton *hton= plugin_hton(db_plugin); + LEX_CUSTRING frm= {0,0}; + LEX_CSTRING db_backup= thd->db; + DBUG_ENTER("TABLE_SHARE::init_from_sql_statement_string"); + + /* + Ouch. Parser may *change* the string it's working on. + Currently (2013-02-26) it is used to permanently disable + conditional comments. + Anyway, let's copy the caller's string... + */ + if (!(sql_copy= thd->strmake(sql, sql_length))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + if (parser_state.init(thd, sql_copy, sql_length)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + Sql_mode_instant_set sms(thd, MODE_NO_ENGINE_SUBSTITUTION | MODE_NO_DIR_IN_CREATE); + thd->variables.character_set_client= system_charset_info; + tmp_disable_binlog(thd); + old_lex= thd->lex; + thd->lex= &tmp_lex; + + + /* + THD::reset_db() does not set THD::db_charset, + so it keeps pointing to the character set and collation + of the current database, rather than the database of the + new initialized table. After reset_db() the result of + get_default_db_collation() can be wrong. The latter is + used inside charset_collation_context_create_table_in_db(). + Let's initialize ctx before calling reset_db(). + This makes sure the db.opt file to be loaded properly when needed. + */ + Charset_collation_context + ctx(thd->charset_collation_context_create_table_in_db(db.str)); + + thd->reset_db(&db); + lex_start(thd); + + if (unlikely((error= parse_sql(thd, & parser_state, NULL) || + sql_unusable_for_discovery(thd, hton, sql_copy)))) + goto ret; + + if (thd->lex->create_info.resolve_to_charset_collation_context(thd, ctx)) + DBUG_RETURN(true); + + tmp_lex.create_info.db_type= hton; +#ifdef WITH_PARTITION_STORAGE_ENGINE + thd->work_part_info= 0; // For partitioning +#endif + + if (tabledef_version.str) + tmp_lex.create_info.tabledef_version= tabledef_version; + + tmp_lex.alter_info.db= db; + tmp_lex.alter_info.table_name= table_name; + promote_first_timestamp_column(&tmp_lex.alter_info.create_list); + file= mysql_create_frm_image(thd, &tmp_lex.create_info, &tmp_lex.alter_info, + C_ORDINARY_CREATE, &unused1, &unused2, &frm); + error|= file == 0; + delete file; + + if (frm.str) + { + option_list= 0; // cleanup existing options ... + option_struct= 0; // ... if it's an assisted discovery + error= init_from_binary_frm_image(thd, write, frm.str, frm.length); + } + +ret: + my_free(const_cast(frm.str)); + lex_end(&tmp_lex); + thd->reset_db(&db_backup); + thd->lex= old_lex; + reenable_binlog(thd); + thd->variables.character_set_client= old_cs; + if (unlikely(thd->is_error() || error)) + { + thd->clear_error(); + my_error(ER_SQL_DISCOVER_ERROR, MYF(0), hton_name(hton)->str, + db.str, table_name.str, sql_copy); + DBUG_RETURN(HA_ERR_GENERIC); + } + /* Treat the table as normal table from binary logging point of view */ + table_creation_was_logged= 1; + DBUG_RETURN(0); +} + +bool TABLE_SHARE::write_frm_image(const uchar *frm, size_t len) +{ + char file_name[FN_REFLEN+1]; + strxnmov(file_name, sizeof(file_name)-1, normalized_path.str, reg_ext, + NullS); + return writefile(file_name, db.str, table_name.str, false, + frm, len); +} + +bool TABLE_SHARE::write_par_image(const uchar *par, size_t len) +{ + char file_name[FN_REFLEN+1]; + strxnmov(file_name, sizeof(file_name)-1, normalized_path.str, PAR_EXT, + NullS); + return writefile(file_name, db.str, table_name.str, false, par, len); +} + + +bool TABLE_SHARE::read_frm_image(const uchar **frm, size_t *len) +{ + if (IF_PARTITIONING(partition_info_str, 0)) // cannot discover a partition + { + DBUG_ASSERT(db_type()->discover_table == 0); + return 1; + } + + if (frm_image) + { + *frm= frm_image->str; + *len= frm_image->length; + frm_image->str= 0; // pass the ownership to the caller + frm_image= 0; + return 0; + } + return readfrm(normalized_path.str, frm, len); +} + + +void TABLE_SHARE::free_frm_image(const uchar *frm) +{ + if (frm) + my_free(const_cast(frm)); +} + + +bool Virtual_column_info::fix_expr(THD *thd) +{ + DBUG_ENTER("fix_vcol_expr"); + + const enum enum_column_usage saved_column_usage= thd->column_usage; + thd->column_usage= COLUMNS_WRITE; + + int error= expr->fix_fields(thd, &expr); + + thd->column_usage= saved_column_usage; + + if (unlikely(error)) + { + StringBuffer str; + print(&str); + my_error(ER_ERROR_EVALUATING_EXPRESSION, MYF(0), str.c_ptr_safe()); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + +/** rerun fix_fields for vcols that returns time- or session- dependent values + + @note this is done for all vcols for INSERT/UPDATE/DELETE, + and only as needed for SELECTs. +*/ +bool Virtual_column_info::fix_session_expr(THD *thd) +{ + if (!need_refix()) + return false; + + DBUG_ASSERT(!expr->fixed()); + return fix_expr(thd); +} + + +bool Virtual_column_info::cleanup_session_expr() +{ + DBUG_ASSERT(need_refix()); + return expr->walk(&Item::cleanup_excluding_fields_processor, 0, 0); +} + + + +class Vcol_expr_context +{ + bool inited; + THD *thd; + TABLE *table; + Query_arena backup_arena, *stmt_arena; + table_map old_map; + Security_context *save_security_ctx; + sql_mode_t save_sql_mode; + +public: + Vcol_expr_context(THD *_thd, TABLE *_table) : + inited(false), + thd(_thd), + table(_table), + stmt_arena(thd->stmt_arena), + old_map(table->map), + save_security_ctx(thd->security_ctx), + save_sql_mode(thd->variables.sql_mode) {} + bool init(); + + ~Vcol_expr_context(); +}; + + +bool Vcol_expr_context::init() +{ + thd->variables.sql_mode= 0; + + TABLE_LIST const *tl= table->pos_in_table_list; + DBUG_ASSERT(table->pos_in_table_list); + + if (table->pos_in_table_list->security_ctx) + thd->security_ctx= tl->security_ctx; + + thd->set_n_backup_active_arena(table->expr_arena, &backup_arena); + thd->stmt_arena= thd; + + inited= true; + return false; +} + +Vcol_expr_context::~Vcol_expr_context() +{ + if (!inited) + return; + table->map= old_map; + thd->security_ctx= save_security_ctx; + thd->restore_active_arena(table->expr_arena, &backup_arena); + thd->variables.sql_mode= save_sql_mode; + thd->stmt_arena= stmt_arena; +} + + +bool TABLE::vcol_fix_expr(THD *thd) +{ + if (pos_in_table_list->placeholder() || vcol_refix_list.is_empty()) + return false; + + if (!thd->stmt_arena->is_conventional() && + vcol_refix_list.head()->expr->fixed()) + { + /* NOTE: Under trigger we already have fixed expressions */ + return false; + } + + Vcol_expr_context expr_ctx(thd, this); + if (expr_ctx.init()) + return true; + + List_iterator_fast it(vcol_refix_list); + while (Virtual_column_info *vcol= it++) + if (vcol->fix_session_expr(thd)) + goto error; + + return false; + +error: + DBUG_ASSERT(thd->get_stmt_da()->is_error()); + return true; +} + + +bool TABLE::vcol_cleanup_expr(THD *thd) +{ + if (vcol_refix_list.is_empty()) + return false; + + List_iterator it(vcol_refix_list); + bool result= false; + + while (Virtual_column_info *vcol= it++) + result|= vcol->cleanup_session_expr(); + + DBUG_ASSERT(!result || thd->get_stmt_da()->is_error()); + return result; +} + + +/* + @brief + Perform semantic analysis of the defining expression for a virtual column + + @param thd The thread object + @param table The table containing the virtual column + @param field Field if this is a DEFAULT or AS, otherwise NULL + @param vcol The Virtual_column object + + + @details + The function performs semantic analysis of the defining expression for + the virtual column vcol_field. The expression is used to compute the + values of this column. + + @retval + TRUE An error occurred, something was wrong with the function + @retval + FALSE Otherwise +*/ + +bool Virtual_column_info::fix_and_check_expr(THD *thd, TABLE *table) +{ + DBUG_ENTER("Virtual_column_info::fix_and_check_expr"); + DBUG_PRINT("info", ("vcol: %p", this)); + DBUG_ASSERT(expr); + + /* NOTE: constants are fixed when constructed */ + if (expr->fixed()) + DBUG_RETURN(0); // nothing to do + + if (fix_expr(thd)) + DBUG_RETURN(1); + + if (flags) + DBUG_RETURN(0); // already checked, no need to do it again + + + /* this was checked in check_expression(), but the frm could be mangled... */ + if (unlikely(expr->result_type() == ROW_RESULT)) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + DBUG_RETURN(1); + } + + /* + Walk through the Item tree checking if all items are valid + to be part of the virtual column + */ + Item::vcol_func_processor_result res; + + int error= expr->walk(&Item::check_vcol_func_processor, 0, &res); + if (unlikely(error || (res.errors & VCOL_IMPOSSIBLE))) + { + // this can only happen if the frm was corrupted + my_error(ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), res.name, + get_vcol_type_name(), name.str); + DBUG_RETURN(1); + } + else if (res.errors & VCOL_AUTO_INC && vcol_type != VCOL_GENERATED_VIRTUAL) + { + /* + An auto_increment field may not be used in an expression for + a check constraint, a default value or a generated column + + Note that this error condition is not detected during parsing + of the statement because the field item does not have a field + pointer at that time + */ + myf warn= table->s->frm_version < FRM_VER_EXPRESSSIONS ? ME_WARNING : 0; + my_error(ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(warn), + "AUTO_INCREMENT", get_vcol_type_name(), res.name); + if (!warn) + DBUG_RETURN(1); + } + else if (vcol_type != VCOL_GENERATED_VIRTUAL && vcol_type != VCOL_DEFAULT && + res.errors & VCOL_NOT_STRICTLY_DETERMINISTIC) + { + my_error(ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), + res.name, get_vcol_type_name(), name.str); + DBUG_RETURN(1); + } + flags= res.errors; + + if (!table->s->tmp_table && need_refix()) + table->vcol_refix_list.push_back(this, &table->mem_root); + + DBUG_RETURN(0); +} + + +/* + @brief + Unpack the definition of a virtual column from its linear representation + + @param thd The thread object + @param mem_root Where to allocate memory + @param table The table containing the virtual column + @param field Field if this is a DEFAULT or AS, otherwise NULL + @param vcol The Virtual_column object + @param[out] error_reported Flag to inform the caller that no + other error messages are to be generated + + @details + + The function takes string expression from the 'vcol' object of the + table 'table' and parses it, building an item object for it. The + pointer to this item is placed into in a Virtual_column_info object + that is created. After this the function performs + semantic analysis of the item by calling the the function + fix_and_check_vcol_expr(). Since the defining expression is part of the table + definition the item for it is created in table->memroot within the + special arena TABLE::expr_arena or in the thd memroot for INSERT DELAYED + + @note + Before passing 'vcol_expr' to the parser the function wraps it in + parentheses and prepends a special keyword. + + @retval Virtual_column_info* Success + @retval NULL Error +*/ + +static Virtual_column_info * +unpack_vcol_info_from_frm(THD *thd, TABLE *table, + String *expr_str, Virtual_column_info **vcol_ptr, + bool *error_reported) +{ + Create_field vcol_storage; // placeholder for vcol_info + Parser_state parser_state; + Virtual_column_info *vcol= *vcol_ptr, *vcol_info= 0; + LEX *old_lex= thd->lex; + LEX lex; + bool error; + DBUG_ENTER("unpack_vcol_info_from_frm"); + + DBUG_ASSERT(vcol->expr == NULL); + + if (parser_state.init(thd, expr_str->c_ptr_safe(), expr_str->length())) + goto end; + + if (init_lex_with_single_table(thd, table, &lex)) + goto end; + + lex.parse_vcol_expr= true; + lex.last_field= &vcol_storage; + + error= parse_sql(thd, &parser_state, NULL); + if (unlikely(error)) + goto end; + + if (lex.current_select->table_list.first[0].next_global) + { + /* We are using NEXT VALUE FOR sequence. Remember table name for open */ + TABLE_LIST *sequence= lex.current_select->table_list.first[0].next_global; + sequence->next_global= table->internal_tables; + table->internal_tables= sequence; + } + + vcol_storage.vcol_info->set_vcol_type(vcol->get_vcol_type()); + vcol_storage.vcol_info->stored_in_db= vcol->stored_in_db; + vcol_storage.vcol_info->name= vcol->name; + vcol_storage.vcol_info->utf8= vcol->utf8; + if (!vcol_storage.vcol_info->fix_and_check_expr(thd, table)) + { + *vcol_ptr= vcol_info= vcol_storage.vcol_info; // Expression ok + DBUG_ASSERT(vcol_info->expr); + goto end; + } + *error_reported= TRUE; + +end: + end_lex_with_single_table(thd, table, old_lex); + + DBUG_RETURN(vcol_info); +} + +#ifndef DBUG_OFF +static void print_long_unique_table(TABLE *table) +{ + char buff[256]; + String str; + KEY *key_info_table, *key_info_share; + KEY_PART_INFO *key_part; + Field *field; + my_snprintf(buff, sizeof(buff), "Printing Table state, It will print table fields," + " fields->offset,field->null_bit, field->null_pos and key_info ... \n" + "\nPrinting Table keyinfo\n"); + str.append(buff, strlen(buff)); + my_snprintf(buff, sizeof(buff), "\ntable->s->reclength %d\n" + "table->s->fields %d\n", + table->s->reclength, table->s->fields); + str.append(buff, strlen(buff)); + for (uint i= 0; i < table->s->keys; i++) + { + key_info_table= table->key_info + i; + key_info_share= table->s->key_info + i; + my_snprintf(buff, sizeof(buff), "\ntable->key_info[%d] user_defined_key_parts = %d\n" + "table->key_info[%d] algorithm == HA_KEY_ALG_LONG_HASH = %d\n" + "table->key_info[%d] flags & HA_NOSAME = %d\n", + i, key_info_table->user_defined_key_parts, + i, key_info_table->algorithm == HA_KEY_ALG_LONG_HASH, + i, key_info_table->flags & HA_NOSAME); + str.append(buff, strlen(buff)); + my_snprintf(buff, sizeof(buff), "\ntable->s->key_info[%d] user_defined_key_parts = %d\n" + "table->s->key_info[%d] algorithm == HA_KEY_ALG_LONG_HASH = %d\n" + "table->s->key_info[%d] flags & HA_NOSAME = %d\n", + i, key_info_share->user_defined_key_parts, + i, key_info_share->algorithm == HA_KEY_ALG_LONG_HASH, + i, key_info_share->flags & HA_NOSAME); + str.append(buff, strlen(buff)); + key_part = key_info_table->key_part; + my_snprintf(buff, sizeof(buff), "\nPrinting table->key_info[%d].key_part[0] info\n" + "key_part->offset = %d\n" + "key_part->field_name = %s\n" + "key_part->length = %d\n" + "key_part->null_bit = %d\n" + "key_part->null_offset = %d\n", + i, key_part->offset, key_part->field->field_name.str, key_part->length, + key_part->null_bit, key_part->null_offset); + str.append(buff, strlen(buff)); + + for (uint j= 0; j < key_info_share->user_defined_key_parts; j++) + { + key_part= key_info_share->key_part + j; + my_snprintf(buff, sizeof(buff), "\nPrinting share->key_info[%d].key_part[%d] info\n" + "key_part->offset = %d\n" + "key_part->field_name = %s\n" + "key_part->length = %d\n" + "key_part->null_bit = %d\n" + "key_part->null_offset = %d\n", + i,j,key_part->offset, key_part->field->field_name.str, key_part->length, + key_part->null_bit, key_part->null_offset); + str.append(buff, strlen(buff)); + } + } + my_snprintf(buff, sizeof(buff), "\nPrinting table->fields\n"); + str.append(buff, strlen(buff)); + for(uint i= 0; i < table->s->fields; i++) + { + field= table->field[i]; + my_snprintf(buff, sizeof(buff), "\ntable->field[%d]->field_name %s\n" + "table->field[%d]->offset = %d\n" + "table->field[%d]->field_length = %d\n" + "table->field[%d]->null_pos wrt to record 0 = %d\n" + "table->field[%d]->null_bit_pos = %d\n", + i, field->field_name.str, + i, field->ptr- table->record[0], + i, field->pack_length(), + i, field->null_bit ? field->null_ptr - table->record[0] : -1, + i, field->null_bit); + str.append(buff, strlen(buff)); + } + (*error_handler_hook)(1, str.ptr(), ME_NOTE); +} +#endif + +bool copy_keys_from_share(TABLE *outparam, MEM_ROOT *root) +{ + TABLE_SHARE *share= outparam->s; + if (share->key_parts) + { + KEY *key_info, *key_info_end; + KEY_PART_INFO *key_part; + + if (!multi_alloc_root(root, &key_info, share->keys*sizeof(KEY), + &key_part, share->ext_key_parts*sizeof(KEY_PART_INFO), + NullS)) + return 1; + + outparam->key_info= key_info; + + memcpy(key_info, share->key_info, sizeof(*key_info)*share->keys); + memcpy(key_part, key_info->key_part, sizeof(*key_part)*share->ext_key_parts); + + my_ptrdiff_t adjust_ptrs= PTR_BYTE_DIFF(key_part, key_info->key_part); + for (key_info_end= key_info + share->keys ; + key_info < key_info_end ; + key_info++) + { + key_info->table= outparam; + key_info->key_part= reinterpret_cast + (reinterpret_cast(key_info->key_part) + adjust_ptrs); + if (key_info->algorithm == HA_KEY_ALG_LONG_HASH) + key_info->flags&= ~HA_NOSAME; + } + for (KEY_PART_INFO *key_part_end= key_part+share->ext_key_parts; + key_part < key_part_end; + key_part++) + { + Field *field= key_part->field= outparam->field[key_part->fieldnr - 1]; + if (field->key_length() != key_part->length && + !(field->flags & BLOB_FLAG)) + { + /* + We are using only a prefix of the column as a key: + Create a new field for the key part that matches the index + */ + field= key_part->field=field->make_new_field(root, outparam, 0); + field->field_length= key_part->length; + } + } + } + return 0; +} + +/* + Open a table based on a TABLE_SHARE + + SYNOPSIS + open_table_from_share() + thd Thread handler + share Table definition + alias Alias for table + db_stat open flags (for example HA_OPEN_KEYFILE| + HA_OPEN_RNDFILE..) can be 0 (example in + ha_example_table) + prgflag READ_ALL etc.. + ha_open_flags HA_OPEN_ABORT_IF_LOCKED etc.. + outparam result table + partitions_to_open open only these partitions. + + RETURN VALUES + 0 ok + 1 Error (see open_table_error) + 2 Error (see open_table_error) + 3 Wrong data in .frm file + 4 Error (see open_table_error) + 5 Error (see open_table_error: charset unavailable) + 7 Table definition has changed in engine +*/ + +enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, + const LEX_CSTRING *alias, uint db_stat, uint prgflag, + uint ha_open_flags, TABLE *outparam, + bool is_create_table, List *partitions_to_open) +{ + enum open_frm_error error; + uint records, i, bitmap_size, bitmap_count; + const char *tmp_alias; + bool error_reported= FALSE; + uchar *record, *bitmaps; + Field **field_ptr; + uint8 save_context_analysis_only= thd->lex->context_analysis_only; + TABLE_SHARE::enum_v_keys check_set_initialized= share->check_set_initialized; + DBUG_ENTER("open_table_from_share"); + DBUG_PRINT("enter",("name: '%s.%s' form: %p", share->db.str, + share->table_name.str, outparam)); + + thd->lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_VIEW; // not a view + + error= OPEN_FRM_ERROR_ALREADY_ISSUED; // for OOM errors below + bzero((char*) outparam, sizeof(*outparam)); + outparam->in_use= thd; + outparam->s= share; + outparam->db_stat= db_stat; + outparam->status= STATUS_NO_RECORD; + + if (share->incompatible_version && + !(ha_open_flags & (HA_OPEN_FOR_ALTER | HA_OPEN_FOR_REPAIR | + HA_OPEN_FOR_FLUSH))) + { + /* one needs to run mysql_upgrade on the table */ + error= OPEN_FRM_NEEDS_REBUILD; + goto err; + } + init_sql_alloc(key_memory_TABLE, &outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, + 0, MYF(0)); + + /* + We have to store the original alias in mem_root as constraints and virtual + functions may store pointers to it + */ + if (!(tmp_alias= strmake_root(&outparam->mem_root, alias->str, alias->length))) + goto err; + + outparam->alias.set(tmp_alias, alias->length, table_alias_charset); + outparam->vcol_refix_list.empty(); + + /* Allocate handler */ + outparam->file= 0; + if (!(prgflag & OPEN_FRM_FILE_ONLY)) + { + if (!(outparam->file= get_new_handler(share, &outparam->mem_root, + share->db_type()))) + goto err; + + if (outparam->file->set_ha_share_ref(&share->ha_share)) + goto err; + } + else + { + DBUG_ASSERT(!db_stat); + } + + if (share->sequence && outparam->file) + { + ha_sequence *file; + /* SEQUENCE table. Create a sequence handler over the original handler */ + if (!(file= (ha_sequence*) sql_sequence_hton->create(sql_sequence_hton, share, + &outparam->mem_root))) + goto err; + file->register_original_handler(outparam->file); + outparam->file= file; + } + + outparam->reginfo.lock_type= TL_UNLOCK; + outparam->reginfo.skip_locked= false; + outparam->current_lock= F_UNLCK; + records=0; + if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) + records=1; + if (prgflag & (READ_ALL + EXTRA_RECORD)) + { + records++; + if (share->versioned || share->period.name) + records++; + } + + if (records == 0) + { + /* We are probably in hard repair, and the buffers should not be used */ + record= share->default_values; + } + else + { + if (!(record= (uchar*) alloc_root(&outparam->mem_root, + share->rec_buff_length * records))) + goto err; /* purecov: inspected */ + } + + for (i= 0; i < 3;) + { + outparam->record[i]= record; + if (++i < records) + record+= share->rec_buff_length; + } + /* Mark bytes between records as not accessable to catch overrun bugs */ + for (i= 0; i < records; i++) + MEM_NOACCESS(outparam->record[i] + share->reclength, + share->rec_buff_length - share->reclength); + + if (!(field_ptr = (Field **) alloc_root(&outparam->mem_root, + (uint) ((share->fields+1)* + sizeof(Field*))))) + goto err; /* purecov: inspected */ + + /* Allocate storage for range optimizer */ + if (!multi_alloc_root(&outparam->mem_root, + &outparam->opt_range, + share->keys * sizeof(TABLE::OPT_RANGE), + &outparam->const_key_parts, + share->keys * sizeof(key_part_map), + NullS)) + goto err; + + outparam->field= field_ptr; + + record= (uchar*) outparam->record[0]-1; /* Fieldstart = 1 */ + if (share->null_field_first) + outparam->null_flags= (uchar*) record+1; + else + outparam->null_flags= (uchar*) (record+ 1+ share->reclength - + share->null_bytes); + + /* Setup copy of fields from share, but use the right alias and record */ + for (i=0 ; i < share->fields; i++, field_ptr++) + { + if (!((*field_ptr)= share->field[i]->clone(&outparam->mem_root, outparam))) + goto err; + } + (*field_ptr)= 0; // End marker + + DEBUG_SYNC(thd, "TABLE_after_field_clone"); + + outparam->vers_write= share->versioned; + + if (share->found_next_number_field) + outparam->found_next_number_field= + outparam->field[(uint) (share->found_next_number_field - share->field)]; + + if (copy_keys_from_share(outparam, &outparam->mem_root)) + goto err; + + /* + Process virtual and default columns, if any. + */ + if (share->virtual_fields || share->default_fields || + share->default_expressions || share->table_check_constraints) + { + Field **vfield_ptr, **dfield_ptr; + Virtual_column_info **check_constraint_ptr; + + if (!multi_alloc_root(&outparam->mem_root, + &vfield_ptr, (uint) ((share->virtual_fields + 1)* + sizeof(Field*)), + &dfield_ptr, (uint) ((share->default_fields + + share->default_expressions +1)* + sizeof(Field*)), + &check_constraint_ptr, + (uint) ((share->table_check_constraints + + share->field_check_constraints + 1)* + sizeof(Virtual_column_info*)), + NullS)) + goto err; + if (share->virtual_fields) + outparam->vfield= vfield_ptr; + if (share->default_fields + share->default_expressions) + outparam->default_field= dfield_ptr; + if (share->table_check_constraints || share->field_check_constraints) + outparam->check_constraints= check_constraint_ptr; + + vcol_init_mode mode= VCOL_INIT_DEPENDENCY_FAILURE_IS_WARNING; + switch (thd->lex->sql_command) + { + case SQLCOM_CREATE_TABLE: + mode= VCOL_INIT_DEPENDENCY_FAILURE_IS_ERROR; + break; + case SQLCOM_ALTER_TABLE: + case SQLCOM_CREATE_INDEX: + case SQLCOM_DROP_INDEX: + if ((ha_open_flags & HA_OPEN_FOR_ALTER) == 0) + mode= VCOL_INIT_DEPENDENCY_FAILURE_IS_ERROR; + break; + default: + break; + } + + if (parse_vcol_defs(thd, &outparam->mem_root, outparam, + &error_reported, mode)) + { + error= OPEN_FRM_CORRUPTED; + goto err; + } + + /* Update to use trigger fields */ + switch_defaults_to_nullable_trigger_fields(outparam); + + for (uint k= 0; k < share->keys; k++) + { + KEY &key_info= outparam->key_info[k]; + uint parts = (share->use_ext_keys ? key_info.ext_key_parts : + key_info.user_defined_key_parts); + for (uint p= 0; p < parts; p++) + { + KEY_PART_INFO &kp= key_info.key_part[p]; + if (kp.field != outparam->field[kp.fieldnr - 1]) + { + kp.field->vcol_info = outparam->field[kp.fieldnr - 1]->vcol_info; + } + } + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool work_part_info_used; + if (share->partition_info_str_len && outparam->file) + { + /* + In this execution we must avoid calling thd->change_item_tree since + we might release memory before statement is completed. We do this + by changing to a new statement arena. As part of this arena we also + set the memory root to be the memory root of the table since we + call the parser and fix_fields which both can allocate memory for + item objects. We keep the arena to ensure that we can release the + free_list when closing the table object. + SEE Bug #21658 + */ + + Query_arena *backup_stmt_arena_ptr= thd->stmt_arena; + Query_arena backup_arena; + Query_arena part_func_arena(&outparam->mem_root, + Query_arena::STMT_INITIALIZED); + thd->set_n_backup_active_arena(&part_func_arena, &backup_arena); + thd->stmt_arena= &part_func_arena; + bool tmp; + + tmp= mysql_unpack_partition(thd, share->partition_info_str, + share->partition_info_str_len, + outparam, is_create_table, + plugin_hton(share->default_part_plugin), + &work_part_info_used); + if (tmp) + { + thd->stmt_arena= backup_stmt_arena_ptr; + thd->restore_active_arena(&part_func_arena, &backup_arena); + goto partititon_err; + } + if (parse_engine_part_options(thd, outparam)) + goto err; + outparam->part_info->is_auto_partitioned= share->auto_partitioned; + DBUG_PRINT("info", ("autopartitioned: %u", share->auto_partitioned)); + /* + We should perform the fix_partition_func in either local or + caller's arena depending on work_part_info_used value. + */ + if (!work_part_info_used) + tmp= fix_partition_func(thd, outparam, is_create_table); + thd->stmt_arena= backup_stmt_arena_ptr; + thd->restore_active_arena(&part_func_arena, &backup_arena); + if (!tmp) + { + if (work_part_info_used) + tmp= fix_partition_func(thd, outparam, is_create_table); + } + outparam->part_info->item_free_list= part_func_arena.free_list; +partititon_err: + if (tmp) + { + if (is_create_table) + { + /* + During CREATE/ALTER TABLE it is ok to receive errors here. + It is not ok if it happens during the opening of an frm + file as part of a normal query. + */ + error_reported= TRUE; + } + goto err; + } + } +#endif + + /* Check virtual columns against table's storage engine. */ + if (share->virtual_fields && + (outparam->file && + !(outparam->file->ha_table_flags() & HA_CAN_VIRTUAL_COLUMNS))) + { + my_error(ER_UNSUPPORTED_ENGINE_FOR_VIRTUAL_COLUMNS, MYF(0), + plugin_name(share->db_plugin)->str); + error_reported= TRUE; + goto err; + } + + /* Allocate bitmaps */ + + bitmap_size= share->column_bitmap_size; + bitmap_count= 7; + if (share->virtual_fields) + bitmap_count++; + + if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, + bitmap_size * bitmap_count))) + goto err; + + my_bitmap_init(&outparam->def_read_set, + (my_bitmap_map*) bitmaps, share->fields); + bitmaps+= bitmap_size; + my_bitmap_init(&outparam->def_write_set, + (my_bitmap_map*) bitmaps, share->fields); + bitmaps+= bitmap_size; + + my_bitmap_init(&outparam->has_value_set, + (my_bitmap_map*) bitmaps, share->fields); + bitmaps+= bitmap_size; + my_bitmap_init(&outparam->tmp_set, + (my_bitmap_map*) bitmaps, share->fields); + bitmaps+= bitmap_size; + my_bitmap_init(&outparam->eq_join_set, + (my_bitmap_map*) bitmaps, share->fields); + bitmaps+= bitmap_size; + my_bitmap_init(&outparam->cond_set, + (my_bitmap_map*) bitmaps, share->fields); + bitmaps+= bitmap_size; + my_bitmap_init(&outparam->def_rpl_write_set, + (my_bitmap_map*) bitmaps, share->fields); + outparam->default_column_bitmaps(); + + outparam->cond_selectivity= 1.0; + + /* The table struct is now initialized; Open the table */ + if (db_stat) + { + if (specialflag & SPECIAL_WAIT_IF_LOCKED) + ha_open_flags|= HA_OPEN_WAIT_IF_LOCKED; + else + ha_open_flags|= HA_OPEN_IGNORE_IF_LOCKED; + + int ha_err= outparam->file->ha_open(outparam, share->normalized_path.str, + (db_stat & HA_READ_ONLY ? O_RDONLY : O_RDWR), + ha_open_flags, 0, partitions_to_open); + if (ha_err) + { + share->open_errno= ha_err; + /* Set a flag if the table is crashed and it can be auto. repaired */ + share->crashed= (outparam->file->auto_repair(ha_err) && + !(ha_open_flags & HA_OPEN_FOR_REPAIR)); + if (!thd->is_error()) + outparam->file->print_error(ha_err, MYF(0)); + error_reported= TRUE; + + if (ha_err == HA_ERR_TABLE_DEF_CHANGED) + error= OPEN_FRM_DISCOVER; + + /* + We're here, because .frm file was successfully opened. + + But if the table doesn't exist in the engine and the engine + supports discovery, we force rediscover to discover + the fact that table doesn't in fact exist and remove + the stray .frm file. + */ + if (outparam->file->partition_ht()->discover_table && + (ha_err == ENOENT || ha_err == HA_ERR_NO_SUCH_TABLE)) + error= OPEN_FRM_DISCOVER; + + goto err; + } + } + + outparam->mark_columns_used_by_virtual_fields(); + if (!check_set_initialized && + share->check_set_initialized == TABLE_SHARE::V_KEYS) + { + // copy PART_INDIRECT_KEY_FLAG that was set meanwhile by *some* thread + for (uint i= 0 ; i < share->fields ; i++) + { + if (share->field[i]->flags & PART_INDIRECT_KEY_FLAG) + outparam->field[i]->flags|= PART_INDIRECT_KEY_FLAG; + } + } + + if (db_stat) + { + /* Set some flags in share on first open of the table */ + handler::Table_flags flags= outparam->file->ha_table_flags(); + if (! MY_TEST(flags & (HA_BINLOG_STMT_CAPABLE | + HA_BINLOG_ROW_CAPABLE)) || + MY_TEST(flags & HA_HAS_OWN_BINLOGGING)) + share->no_replicate= TRUE; + if (outparam->file->table_cache_type() & HA_CACHE_TBL_NOCACHE) + share->not_usable_by_query_cache= TRUE; + if (outparam->file->ha_table_flags() & HA_CAN_ONLINE_BACKUPS) + share->online_backup= 1; + } + + if (share->no_replicate || !binlog_filter->db_ok(share->db.str)) + share->can_do_row_logging= 0; // No row based replication + + /* Increment the opened_tables counter, only when open flags set. */ + if (db_stat) + thd->status_var.opened_tables++; + + thd->lex->context_analysis_only= save_context_analysis_only; + DBUG_EXECUTE_IF("print_long_unique_internal_state", + print_long_unique_table(outparam);); + DBUG_RETURN (OPEN_FRM_OK); + + err: + if (! error_reported) + open_table_error(share, error, my_errno); + delete outparam->file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (outparam->part_info) + free_items(outparam->part_info->item_free_list); +#endif + outparam->file= 0; // For easier error checking + outparam->db_stat=0; + thd->lex->context_analysis_only= save_context_analysis_only; + if (outparam->expr_arena) + outparam->expr_arena->free_items(); + free_root(&outparam->mem_root, MYF(0)); // Safe to call on bzero'd root + outparam->alias.free(); + DBUG_RETURN (error); +} + + +/** + Free engine stats + + This is only called from closefrm() when the TABLE object is destroyed +**/ + +void TABLE::free_engine_stats() +{ + bool free_stats= 0; + TABLE_STATISTICS_CB *stats= stats_cb; + mysql_mutex_lock(&s->LOCK_share); + free_stats= --stats->usage_count == 0; + mysql_mutex_unlock(&s->LOCK_share); + if (free_stats) + delete stats; +} + + +/* + Use engine stats from table_share if table_share has been updated +*/ + +void TABLE::update_engine_independent_stats() +{ + bool free_stats= 0; + TABLE_STATISTICS_CB *org_stats= stats_cb; + DBUG_ASSERT(stats_cb != s->stats_cb); + + if (stats_cb != s->stats_cb) + { + mysql_mutex_lock(&s->LOCK_share); + if (org_stats) + free_stats= --org_stats->usage_count == 0; + if ((stats_cb= s->stats_cb)) + stats_cb->usage_count++; + mysql_mutex_unlock(&s->LOCK_share); + if (free_stats) + delete org_stats; + } +} + + +/* + Update engine stats in table share to use new stats +*/ + +void +TABLE_SHARE::update_engine_independent_stats(TABLE_STATISTICS_CB *new_stats) +{ + TABLE_STATISTICS_CB *free_stats= 0; + DBUG_ASSERT(new_stats->usage_count == 0); + + mysql_mutex_lock(&LOCK_share); + if (stats_cb) + { + if (!--stats_cb->usage_count) + free_stats= stats_cb; + } + stats_cb= new_stats; + new_stats->usage_count++; + mysql_mutex_unlock(&LOCK_share); + if (free_stats) + delete free_stats; +} + + +/* Check if we have statistics for histograms */ + +bool TABLE_SHARE::histograms_exists() +{ + bool res= 0; + if (stats_cb) + { + mysql_mutex_lock(&LOCK_share); + if (stats_cb) + res= stats_cb->histograms_exists(); + mysql_mutex_unlock(&LOCK_share); + } + return res; +} + + +/* + Free information allocated by openfrm + + SYNOPSIS + closefrm() + table TABLE object to free +*/ + +int closefrm(TABLE *table) +{ + int error=0; + DBUG_ENTER("closefrm"); + DBUG_PRINT("enter", ("table: %p", table)); + + if (table->db_stat) + error=table->file->ha_close(); + table->alias.free(); + if (table->expr_arena) + table->expr_arena->free_items(); + if (table->field) + { + for (Field **ptr=table->field ; *ptr ; ptr++) + { + delete *ptr; + } + table->field= 0; + } + delete table->file; + table->file= 0; /* For easier errorchecking */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + { + /* Allocated through table->mem_root, freed below */ + free_items(table->part_info->item_free_list); + table->part_info->item_free_list= 0; + table->part_info= 0; + } +#endif + if (table->stats_cb) + { + DBUG_ASSERT(table->s->tmp_table == NO_TMP_TABLE); + table->free_engine_stats(); + } + + free_root(&table->mem_root, MYF(0)); + DBUG_RETURN(error); +} + + +/* Deallocate temporary blob storage */ + +void free_blobs(TABLE *table) +{ + uint *ptr, *end; + for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; + ptr != end ; + ptr++) + { + /* + Reduced TABLE objects which are used by row-based replication for + type conversion might have some fields missing. Skip freeing BLOB + buffers for such missing fields. + */ + if (table->field[*ptr]) + ((Field_blob*) table->field[*ptr])->free(); + } +} + + +/** + Reclaim temporary blob storage which is bigger than + a threshold. + + @param table A handle to the TABLE object containing blob fields + @param size The threshold value. + +*/ + +void free_field_buffers_larger_than(TABLE *table, uint32 size) +{ + uint *ptr, *end; + for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; + ptr != end ; + ptr++) + { + Field_blob *blob= (Field_blob*) table->field[*ptr]; + if (blob->get_field_buffer_size() > size) + blob->free(); + } +} + +/* error message when opening a form file */ + +void open_table_error(TABLE_SHARE *share, enum open_frm_error error, + int db_errno) +{ + char buff[FN_REFLEN]; + const myf errortype= ME_ERROR_LOG; // Write fatals error to log + DBUG_ENTER("open_table_error"); + DBUG_PRINT("info", ("error: %d db_errno: %d", error, db_errno)); + + switch (error) { + case OPEN_FRM_OPEN_ERROR: + /* + Test if file didn't exists. We have to also test for EINVAL as this + may happen on windows when opening a file with a not legal file name + */ + if (db_errno == ENOENT || db_errno == EINVAL) + my_error(ER_NO_SUCH_TABLE, MYF(0), share->db.str, share->table_name.str); + else + { + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error((db_errno == EMFILE) ? ER_CANT_OPEN_FILE : ER_FILE_NOT_FOUND, + errortype, buff, db_errno); + } + break; + case OPEN_FRM_OK: + DBUG_ASSERT(0); // open_table_error() is never called for this one + break; + case OPEN_FRM_ERROR_ALREADY_ISSUED: + break; + case OPEN_FRM_NOT_A_VIEW: + my_error(ER_WRONG_OBJECT, MYF(0), share->db.str, + share->table_name.str, "VIEW"); + break; + case OPEN_FRM_NOT_A_TABLE: + my_error(ER_WRONG_OBJECT, MYF(0), share->db.str, + share->table_name.str, "TABLE"); + break; + case OPEN_FRM_DISCOVER: + DBUG_ASSERT(0); // open_table_error() is never called for this one + break; + case OPEN_FRM_CORRUPTED: + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error(ER_NOT_FORM_FILE, errortype, buff); + break; + case OPEN_FRM_READ_ERROR: + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error(ER_ERROR_ON_READ, errortype, buff, db_errno); + break; + case OPEN_FRM_NEEDS_REBUILD: + strxnmov(buff, sizeof(buff)-1, + share->db.str, ".", share->table_name.str, NullS); + my_error(ER_TABLE_NEEDS_REBUILD, errortype, buff); + break; + } + DBUG_VOID_RETURN; +} /* open_table_error */ + + + /* + ** fix a str_type to a array type + ** typeparts separated with some char. differents types are separated + ** with a '\0' + */ + +static bool +fix_type_pointers(const char ***typelib_value_names, + uint **typelib_value_lengths, + TYPELIB *point_to_type, uint types, + char *ptr, size_t length) +{ + const char *end= ptr + length; + + while (types--) + { + char sep; + point_to_type->name=0; + point_to_type->type_names= *typelib_value_names; + point_to_type->type_lengths= *typelib_value_lengths; + + /* + Typelib can be encoded as: + 1) 0x00 - empty typelib + 2) 0xFF 0x00 - empty typelib (index names) + 3) sep (value sep)... 0x00 - non-empty typelib (where sep is a separator) + */ + if (length == 2 && ptr[0] == (char) 0xFF && ptr[1] == '\0') + { + /* + This is a special case #2. + If there are no indexes at all, index names can be encoded + as a two byte sequence: 0xFF 0x00 + TODO: Check if it's a bug in the FRM packing routine. + It should probably write just 0x00 instead of 0xFF00. + */ + ptr+= 2; + } + else if ((sep= *ptr++)) // A non-empty typelib + { + for ( ; ptr < end; ) + { + // Now scan the next value+sep pair + char *vend= (char*) memchr(ptr, sep, end - ptr); + if (!vend) + return true; // Bad format + *((*typelib_value_names)++)= ptr; + *((*typelib_value_lengths)++)= (uint) (vend - ptr); + *vend= '\0'; // Change sep to '\0' + ptr= vend + 1; // Shift from sep to the next byte + /* + Now we can have either: + - the end-of-typelib marker (0x00) + - more value+sep pairs + */ + if (!*ptr) + { + /* + We have an ambiguity here. 0x00 can be an end-of-typelib marker, + but it can also be a part of the next value: + CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY); + If this is the last ENUM/SET in the table and there is still more + packed data left after 0x00, then we know for sure that 0x00 + is a part of the next value. + TODO-10.5+: we should eventually introduce a new unambiguous + typelib encoding for FRM. + */ + if (!types && ptr + 1 < end) + continue; // A binary value starting with 0x00 + ptr++; // Consume the end-of-typelib marker + break; // End of the current typelib + } + } + } + point_to_type->count= (uint) (*typelib_value_names - + point_to_type->type_names); + point_to_type++; + *((*typelib_value_names)++)= NullS; /* End of type */ + *((*typelib_value_lengths)++)= 0; /* End of type */ + } + return ptr != end; +} /* fix_type_pointers */ + + +/* + Search after a field with given start & length + If an exact field isn't found, return longest field with starts + at right position. + + NOTES + This is needed because in some .frm fields 'fieldnr' was saved wrong + + RETURN + 0 error + # field number +1 +*/ + +static field_index_t find_field(Field **fields, uchar *record, uint start, + uint length) +{ + Field **field; + field_index_t i, pos; + + pos= 0; + for (field= fields, i=1 ; *field ; i++,field++) + { + if ((*field)->offset(record) == start) + { + if ((*field)->key_length() == length) + return (i); + if (!pos || fields[pos-1]->pack_length() < + (*field)->pack_length()) + pos= i; + } + } + return (pos); +} + + +/* + Store an SQL quoted string. + + SYNOPSIS + append_unescaped() + res result String + pos string to be quoted + length it's length + + NOTE + This function works correctly with utf8 or single-byte charset strings. + May fail with some multibyte charsets though. +*/ + +void append_unescaped(String *res, const char *pos, size_t length) +{ + const char *end= pos+length; + res->append('\''); + + for (; pos != end ; pos++) + { + switch (*pos) { + case 0: /* Must be escaped for 'mysql' */ + res->append('\\'); + res->append('0'); + break; + case '\n': /* Must be escaped for logs */ + res->append('\\'); + res->append('n'); + break; + case '\r': + res->append('\\'); /* This gives better readability */ + res->append('r'); + break; + case '\\': + res->append('\\'); /* Because of the sql syntax */ + res->append('\\'); + break; + case '\'': + res->append('\''); /* Because of the sql syntax */ + res->append('\''); + break; + default: + res->append(*pos); + break; + } + } + res->append('\''); +} + + +void prepare_frm_header(THD *thd, uint reclength, uchar *fileinfo, + HA_CREATE_INFO *create_info, uint keys, KEY *key_info) +{ + size_t key_comment_total_bytes= 0; + uint i; + uchar frm_format= create_info->expression_length ? FRM_VER_EXPRESSSIONS + : FRM_VER_TRUE_VARCHAR; + DBUG_ENTER("prepare_frm_header"); + + /* Fix this when we have new .frm files; Current limit is 4G rows (TODO) */ + if (create_info->max_rows > UINT_MAX32) + create_info->max_rows= UINT_MAX32; + if (create_info->min_rows > UINT_MAX32) + create_info->min_rows= UINT_MAX32; + + /* + Keep in sync with pack_keys() in unireg.cc + For each key: + 8 bytes for the key header + 9 bytes for each key-part (MAX_REF_PARTS) + NAME_LEN bytes for the name + 1 byte for the NAMES_SEP_CHAR (before the name) + For all keys: + 6 bytes for the header + 1 byte for the NAMES_SEP_CHAR (after the last name) + 9 extra bytes (padding for safety? alignment?) + */ + for (i= 0; i < keys; i++) + { + DBUG_ASSERT(MY_TEST(key_info[i].flags & HA_USES_COMMENT) == + (key_info[i].comment.length > 0)); + if (key_info[i].flags & HA_USES_COMMENT) + key_comment_total_bytes += 2 + key_info[i].comment.length; + if (key_info[i].algorithm == HA_KEY_ALG_LONG_HASH) + frm_format= FRM_VER_EXPRESSSIONS; + } + + size_t key_length, tmp_key_length, tmp, csid; + bzero((char*) fileinfo, FRM_HEADER_SIZE); + /* header */ + fileinfo[0]=(uchar) 254; + fileinfo[1]= 1; + fileinfo[2]= frm_format; + + DBUG_ASSERT(ha_storage_engine_is_enabled(create_info->db_type)); + fileinfo[3]= (uchar) ha_legacy_type(create_info->db_type); + + key_length= keys * (8 + MAX_REF_PARTS * 9 + NAME_LEN + 1) + 16 + + key_comment_total_bytes; + + int2store(fileinfo+8,1); + tmp_key_length= (key_length < 0xffff) ? key_length : 0xffff; + int2store(fileinfo+14,tmp_key_length); + int2store(fileinfo+16,reclength); + int4store(fileinfo+18,create_info->max_rows); + int4store(fileinfo+22,create_info->min_rows); + /* fileinfo[26] is set in mysql_create_frm() */ + fileinfo[27]=2; // Use long pack-fields + /* fileinfo[28 & 29] is set to key_info_length in mysql_create_frm() */ + create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers + int2store(fileinfo+30,create_info->table_options); + fileinfo[32]=0; // No filename anymore + fileinfo[33]=5; // Mark for 5.0 frm file + int4store(fileinfo+34,create_info->avg_row_length); + csid= (create_info->default_table_charset ? + create_info->default_table_charset->number : 0); + fileinfo[38]= (uchar) csid; + fileinfo[39]= (uchar) ((uint) create_info->transactional | + ((uint) create_info->page_checksum << 2) | + ((create_info->sequence ? HA_CHOICE_YES : 0) << 4)); + fileinfo[40]= (uchar) create_info->row_type; + /* Bytes 41-46 were for RAID support; now reused for other purposes */ + fileinfo[41]= (uchar) (csid >> 8); + int2store(fileinfo+42, create_info->stats_sample_pages & 0xffff); + fileinfo[44]= (uchar) create_info->stats_auto_recalc; + int2store(fileinfo+45, (create_info->check_constraint_list->elements+ + create_info->field_check_constraints)); + int4store(fileinfo+47, key_length); + tmp= MYSQL_VERSION_ID; // Store to avoid warning from int4store + int4store(fileinfo+51, tmp); + int4store(fileinfo+55, create_info->extra_size); + /* + 59-60 is unused since 10.2.4 + 61 for default_part_db_type + */ + int2store(fileinfo+62, create_info->key_block_size); + DBUG_VOID_RETURN; +} /* prepare_fileinfo */ + + +void update_create_info_from_table(HA_CREATE_INFO *create_info, TABLE *table) +{ + TABLE_SHARE *share= table->s; + DBUG_ENTER("update_create_info_from_table"); + + create_info->max_rows= share->max_rows; + create_info->min_rows= share->min_rows; + create_info->table_options= share->db_create_options; + create_info->avg_row_length= share->avg_row_length; + create_info->row_type= share->row_type; + create_info->key_block_size= share->key_block_size; + create_info->default_table_charset= share->table_charset; + create_info->alter_table_convert_to_charset= 0; + create_info->comment= share->comment; + create_info->transactional= share->transactional; + create_info->page_checksum= share->page_checksum; + create_info->option_list= share->option_list; + create_info->sequence= MY_TEST(share->sequence); + + DBUG_VOID_RETURN; +} + +int +rename_file_ext(const char * from,const char * to,const char * ext) +{ + /* Reserve space for ./databasename/tablename.frm + NUL byte */ + char from_b[2 + FN_REFLEN + 4 + 1], to_b[2 + FN_REFLEN + 4 + 1]; + (void) strxmov(from_b,from,ext,NullS); + (void) strxmov(to_b,to,ext,NullS); + return mysql_file_rename(key_file_frm, from_b, to_b, MYF(0)); +} + + +/* + Allocate string field in MEM_ROOT and return it as String + + SYNOPSIS + get_field() + mem MEM_ROOT for allocating + field Field for retrieving of string + res result String + + RETURN VALUES + 1 string is empty + 0 all ok +*/ + +bool get_field(MEM_ROOT *mem, Field *field, String *res) +{ + const char *to; + StringBuffer str; + bool rc; + THD *thd= field->get_thd(); + Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH); + + field->val_str(&str); + if ((rc= !str.length() || + !(to= strmake_root(mem, str.ptr(), str.length())))) + { + res->length(0); + return rc; + } + res->set(to, str.length(), field->charset()); + return false; +} + + +/* + Allocate string field in MEM_ROOT and return it as NULL-terminated string + + SYNOPSIS + get_field() + mem MEM_ROOT for allocating + field Field for retrieving of string + + RETURN VALUES + NullS string is empty + # pointer to NULL-terminated string value of field +*/ + +char *get_field(MEM_ROOT *mem, Field *field) +{ + String str; + bool rc= get_field(mem, field, &str); + DBUG_ASSERT(rc || str.ptr()[str.length()] == '\0'); + return rc ? NullS : (char *) str.ptr(); +} + +/* + DESCRIPTION + given a buffer with a key value, and a map of keyparts + that are present in this value, returns the length of the value +*/ +uint calculate_key_len(TABLE *table, uint key, const uchar *buf, + key_part_map keypart_map) +{ + /* works only with key prefixes */ + DBUG_ASSERT(((keypart_map + 1) & keypart_map) == 0); + + KEY *key_info= table->key_info+key; + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end_key_part= key_part + table->actual_n_key_parts(key_info); + uint length= 0; + + while (key_part < end_key_part && keypart_map) + { + length+= key_part->store_length; + keypart_map >>= 1; + key_part++; + } + return length; +} + +#ifndef DBUG_OFF +/** + Verifies that database/table name is in lowercase, when it should be + + This is supposed to be used only inside DBUG_ASSERT() +*/ +bool ok_for_lower_case_names(const char *name) +{ + if (!lower_case_table_names || !name) + return true; + + char buf[SAFE_NAME_LEN]; + strmake_buf(buf, name); + my_casedn_str(files_charset_info, buf); + return strcmp(name, buf) == 0; +} +#endif + +/* + Check if database name is valid + + SYNPOSIS + check_db_name() + org_name Name of database + + NOTES + If lower_case_table_names is set to 1 then database name is converted + to lower case + + RETURN + 0 ok + 1 error +*/ + +bool check_db_name(LEX_STRING *org_name) +{ + char *name= org_name->str; + size_t name_length= org_name->length; + bool check_for_path_chars; + + if ((check_for_path_chars= check_mysql50_prefix(name))) + { + name+= MYSQL50_TABLE_NAME_PREFIX_LENGTH; + name_length-= MYSQL50_TABLE_NAME_PREFIX_LENGTH; + } + + if (!name_length || name_length > NAME_LEN) + return 1; + + if (lower_case_table_names == 1 && name != any_db.str) + { + org_name->length= name_length= my_casedn_str(files_charset_info, name); + if (check_for_path_chars) + org_name->length+= MYSQL50_TABLE_NAME_PREFIX_LENGTH; + } + if (db_name_is_in_ignore_db_dirs_list(name)) + return 1; + + return check_table_name(name, name_length, check_for_path_chars); +} + + +/* + Allow anything as a table name, as long as it doesn't contain an + ' ' at the end + returns 1 on error +*/ + +bool check_table_name(const char *name, size_t length, bool check_for_path_chars) +{ + // name length in symbols + size_t name_length= 0; + const char *end= name+length; + + if (!check_for_path_chars && + (check_for_path_chars= check_mysql50_prefix(name))) + { + name+= MYSQL50_TABLE_NAME_PREFIX_LENGTH; + length-= MYSQL50_TABLE_NAME_PREFIX_LENGTH; + } + + if (!length || length > NAME_LEN) + return 1; +#if defined(USE_MB) && defined(USE_MB_IDENT) + bool last_char_is_space= FALSE; +#else + if (name[length-1]==' ') + return 1; +#endif + + while (name != end) + { +#if defined(USE_MB) && defined(USE_MB_IDENT) + last_char_is_space= my_isspace(system_charset_info, *name); + if (system_charset_info->use_mb()) + { + int len=my_ismbchar(system_charset_info, name, end); + if (len) + { + name+= len; + name_length++; + continue; + } + } +#endif + if (check_for_path_chars && + (*name == '/' || *name == '\\' || *name == '~' || *name == FN_EXTCHAR)) + return 1; + /* + We don't allow zero byte in table/schema names: + - Some code still uses NULL-terminated strings. + Zero bytes will confuse this code. + - There is a little practical use of zero bytes in names anyway. + Note, if the string passed as "name" comes here + from the parser as an identifier, it does not contain zero bytes, + as the parser rejects zero bytes in identifiers. + But "name" can also come here from queries like this: + SELECT * FROM I_S.TABLES WHERE TABLE_NAME='str'; + In this case "name" is a general string expression + and it can have any arbitrary bytes, including zero bytes. + */ + if (*name == 0x00) + return 1; + name++; + name_length++; + } +#if defined(USE_MB) && defined(USE_MB_IDENT) + return last_char_is_space || (name_length > NAME_CHAR_LEN); +#else + return FALSE; +#endif +} + + +bool check_column_name(const char *name) +{ + // name length in symbols + size_t name_length= 0; + bool last_char_is_space= TRUE; + + while (*name) + { +#if defined(USE_MB) && defined(USE_MB_IDENT) + last_char_is_space= my_isspace(system_charset_info, *name); + if (system_charset_info->use_mb()) + { + int len=my_ismbchar(system_charset_info, name, + name+system_charset_info->mbmaxlen); + if (len) + { + name += len; + name_length++; + continue; + } + } +#else + last_char_is_space= *name==' '; + if (*name == '\377') + return 1; +#endif + name++; + name_length++; + } + /* Error if empty or too long column name */ + return last_char_is_space || (name_length > NAME_CHAR_LEN); +} + + +bool check_period_name(const char *name) +{ + return check_column_name(name); +} + + +/** + Checks whether a table is intact. Should be done *just* after the table has + been opened. + + @param[in] table The table to check + @param[in] table_def Expected structure of the table (column name + and type) + + @retval FALSE OK + @retval TRUE There was an error. An error message is output + to the error log. We do not push an error + message into the error stack because this + function is currently only called at start up, + and such errors never reach the user. +*/ + +bool +Table_check_intact::check(TABLE *table, const TABLE_FIELD_DEF *table_def) +{ + uint i; + my_bool error= FALSE; + const TABLE_FIELD_TYPE *field_def= table_def->field; + DBUG_ENTER("table_check_intact"); + DBUG_PRINT("info",("table: %s expected_count: %d", + table->alias.c_ptr(), table_def->count)); + + /* Whether the table definition has already been validated. */ + if (table->s->table_field_def_cache == table_def) + goto end; + + if (table->s->fields != table_def->count) + { + THD *thd= current_thd; + DBUG_PRINT("info", ("Column count has changed, checking the definition")); + + /* previous MySQL version */ + if (MYSQL_VERSION_ID > table->s->mysql_version) + { + report_error(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE, + ER_THD(thd, ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE), + table->alias.c_ptr(), table_def->count, table->s->fields, + static_cast(table->s->mysql_version), + MYSQL_VERSION_ID); + DBUG_RETURN(TRUE); + } + else if (MYSQL_VERSION_ID == table->s->mysql_version) + { + report_error(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2, + ER_THD(thd, ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2), + table->s->db.str, table->s->table_name.str, + table_def->count, table->s->fields); + DBUG_RETURN(TRUE); + } + /* + Something has definitely changed, but we're running an older + version of MySQL with new system tables. + Let's check column definitions. If a column was added at + the end of the table, then we don't care much since such change + is backward compatible. + */ + } + else + { + StringBuffer<1024> sql_type(system_charset_info); + sql_type.extra_allocation(256); // Allocate min 256 characters at once + for (i=0 ; i < table_def->count; i++, field_def++) + { + sql_type.length(0); + if (i < table->s->fields) + { + Field *field= table->field[i]; + + if (strncmp(field->field_name.str, field_def->name.str, + field_def->name.length)) + { + /* + Name changes are not fatal, we use ordinal numbers to access columns. + Still this can be a sign of a tampered table, output an error + to the error log. + */ + report_error(0, "Incorrect definition of table %s.%s: " + "expected column '%s' at position %d, found '%s'.", + table->s->db.str, table->alias.c_ptr(), + field_def->name.str, i, + field->field_name.str); + } + field->sql_type(sql_type); + /* + Generally, if column types don't match, then something is + wrong. + + However, we only compare column definitions up to the + length of the original definition, since we consider the + following definitions compatible: + + 1. DATETIME and DATETIM + 2. INT(11) and INT(11 + 3. SET('one', 'two') and SET('one', 'two', 'more') + + For SETs or ENUMs, if the same prefix is there it's OK to + add more elements - they will get higher ordinal numbers and + the new table definition is backward compatible with the + original one. + */ + if (strncmp(sql_type.c_ptr_safe(), field_def->type.str, + field_def->type.length - 1)) + { + report_error(0, "Incorrect definition of table %s.%s: " + "expected column '%s' at position %d to have type " + "%s, found type %s.", table->s->db.str, + table->alias.c_ptr(), + field_def->name.str, i, field_def->type.str, + sql_type.c_ptr_safe()); + error= TRUE; + } + else if (field_def->cset.str && !field->has_charset()) + { + report_error(0, "Incorrect definition of table %s.%s: " + "expected the type of column '%s' at position %d " + "to have character set '%s' but the type has no " + "character set.", table->s->db.str, + table->alias.c_ptr(), + field_def->name.str, i, field_def->cset.str); + error= TRUE; + } + else if (field_def->cset.str && + strncmp(field->charset()->cs_name.str, field_def->cset.str, + field_def->cset.length)) + { + report_error(0, "Incorrect definition of table %s.%s: " + "expected the type of column '%s' at position %d " + "to have character set '%s' but found " + "character set '%s'.", table->s->db.str, + table->alias.c_ptr(), + field_def->name.str, i, field_def->cset.str, + field->charset()->cs_name.str); + error= TRUE; + } + } + else + { + report_error(0, "Incorrect definition of table %s.%s: " + "expected column '%s' at position %d to have type %s " + " but the column is not found.", + table->s->db.str, table->alias.c_ptr(), + field_def->name.str, i, field_def->type.str); + error= TRUE; + } + } + } + + if (table_def->primary_key_parts) + { + if (table->s->primary_key == MAX_KEY) + { + report_error(0, "Incorrect definition of table %s.%s: " + "missing primary key.", table->s->db.str, + table->alias.c_ptr()); + error= TRUE; + } + else + { + KEY *pk= &table->s->key_info[table->s->primary_key]; + if (pk->user_defined_key_parts != table_def->primary_key_parts) + { + report_error(0, "Incorrect definition of table %s.%s: " + "Expected primary key to have %u columns, but instead " + "found %u columns.", table->s->db.str, + table->alias.c_ptr(), table_def->primary_key_parts, + pk->user_defined_key_parts); + error= TRUE; + } + else + { + for (i= 0; i < pk->user_defined_key_parts; ++i) + { + if (table_def->primary_key_columns[i] + 1 != pk->key_part[i].fieldnr) + { + report_error(0, "Incorrect definition of table %s.%s: Expected " + "primary key part %u to refer to column %u, but " + "instead found column %u.", table->s->db.str, + table->alias.c_ptr(), i + 1, + table_def->primary_key_columns[i] + 1, + pk->key_part[i].fieldnr); + error= TRUE; + } + } + } + } + } + + if (likely(! error)) + table->s->table_field_def_cache= table_def; + +end: + + if (has_keys && !error && !table->key_info) + { + report_error(0, "Incorrect definition of table %s.%s: " + "indexes are missing", + table->s->db.str, table->alias.c_ptr()); + error= TRUE; + } + + DBUG_RETURN(error); +} + + +void Table_check_intact_log_error::report_error(uint, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + error_log_print(ERROR_LEVEL, fmt, args); + va_end(args); +} + + +/** + Traverse portion of wait-for graph which is reachable through edge + represented by this flush ticket in search for deadlocks. + + @retval TRUE A deadlock is found. A victim is remembered + by the visitor. + @retval FALSE Success, no deadlocks. +*/ + +bool Wait_for_flush::accept_visitor(MDL_wait_for_graph_visitor *gvisitor) +{ + return m_share->visit_subgraph(this, gvisitor); +} + + +uint Wait_for_flush::get_deadlock_weight() const +{ + return m_deadlock_weight; +} + + +/** + Traverse portion of wait-for graph which is reachable through this + table share in search for deadlocks. + + @param waiting_ticket Ticket representing wait for this share. + @param dvisitor Deadlock detection visitor. + + @retval TRUE A deadlock is found. A victim is remembered + by the visitor. + @retval FALSE No deadlocks, it's OK to begin wait. +*/ + +bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush, + MDL_wait_for_graph_visitor *gvisitor) +{ + TABLE *table; + MDL_context *src_ctx= wait_for_flush->get_ctx(); + bool result= TRUE; + + /* + To protect all_tables list from being concurrently modified + while we are iterating through it we increment tdc.all_tables_refs. + This does not introduce deadlocks in the deadlock detector + because we won't try to acquire tdc.LOCK_table_share while + holding a write-lock on MDL_lock::m_rwlock. + */ + mysql_mutex_lock(&tdc->LOCK_table_share); + tdc->all_tables_refs++; + mysql_mutex_unlock(&tdc->LOCK_table_share); + + All_share_tables_list::Iterator tables_it(tdc->all_tables); + + /* + In case of multiple searches running in parallel, avoid going + over the same loop twice and shortcut the search. + Do it after taking the lock to weed out unnecessary races. + */ + if (src_ctx->m_wait.get_status() != MDL_wait::EMPTY) + { + result= FALSE; + goto end; + } + + if (gvisitor->enter_node(src_ctx)) + goto end; + + while ((table= tables_it++)) + { + DBUG_ASSERT(table->in_use && tdc->flushed); + if (gvisitor->inspect_edge(&table->in_use->mdl_context)) + { + goto end_leave_node; + } + } + + tables_it.rewind(); + while ((table= tables_it++)) + { + DBUG_ASSERT(table->in_use && tdc->flushed); + if (table->in_use->mdl_context.visit_subgraph(gvisitor)) + { + goto end_leave_node; + } + } + + result= FALSE; + +end_leave_node: + gvisitor->leave_node(src_ctx); + +end: + mysql_mutex_lock(&tdc->LOCK_table_share); + if (!--tdc->all_tables_refs) + mysql_cond_broadcast(&tdc->COND_release); + mysql_mutex_unlock(&tdc->LOCK_table_share); + + return result; +} + + +/** + Wait until the subject share is removed from the table + definition cache and make sure it's destroyed. + + @param mdl_context MDL context for thread which is going to wait. + @param abstime Timeout for waiting as absolute time value. + @param deadlock_weight Weight of this wait for deadlock detector. + + @pre LOCK_table_share is locked, the share is marked for flush and + this connection does not reference the share. + LOCK_table_share will be unlocked temporarily during execution. + + It may happen that another FLUSH TABLES thread marked this share + for flush, but didn't yet purge it from table definition cache. + In this case we may start waiting for a table share that has no + references (ref_count == 0). We do this with assumption that this + another FLUSH TABLES thread is about to purge this share. + + @retval FALSE - Success. + @retval TRUE - Error (OOM, deadlock, timeout, etc...). +*/ + +bool TABLE_SHARE::wait_for_old_version(THD *thd, struct timespec *abstime, + uint deadlock_weight) +{ + MDL_context *mdl_context= &thd->mdl_context; + Wait_for_flush ticket(mdl_context, this, deadlock_weight); + MDL_wait::enum_wait_status wait_status; + + mysql_mutex_assert_owner(&tdc->LOCK_table_share); + DBUG_ASSERT(tdc->flushed); + + tdc->m_flush_tickets.push_front(&ticket); + + mdl_context->m_wait.reset_status(); + + mysql_mutex_unlock(&tdc->LOCK_table_share); + + mdl_context->will_wait_for(&ticket); + + mdl_context->find_deadlock(); + + wait_status= mdl_context->m_wait.timed_wait(thd, abstime, TRUE, + &stage_waiting_for_table_flush); + + mdl_context->done_waiting_for(); + + mysql_mutex_lock(&tdc->LOCK_table_share); + tdc->m_flush_tickets.remove(&ticket); + mysql_cond_broadcast(&tdc->COND_release); + mysql_mutex_unlock(&tdc->LOCK_table_share); + + + /* + In cases when our wait was aborted by KILL statement, + a deadlock or a timeout, the share might still be referenced, + so we don't delete it. Note, that we can't determine this + condition by checking wait_status alone, since, for example, + a timeout can happen after all references to the table share + were released, but before the share is removed from the + cache and we receive the notification. This is why + we first destroy the share, and then look at + wait_status. + */ + switch (wait_status) + { + case MDL_wait::GRANTED: + return FALSE; + case MDL_wait::VICTIM: + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return TRUE; + case MDL_wait::TIMEOUT: + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); + return TRUE; + case MDL_wait::KILLED: + return TRUE; + default: + DBUG_ASSERT(0); + return TRUE; + } +} + + +/** + Initialize TABLE instance (newly created, or coming either from table + cache or THD::temporary_tables list) and prepare it for further use + during statement execution. Set the 'alias' attribute from the specified + TABLE_LIST element. Remember the TABLE_LIST element in the + TABLE::pos_in_table_list member. + + @param thd Thread context. + @param tl TABLE_LIST element. +*/ + +void TABLE::init(THD *thd, TABLE_LIST *tl) +{ + DBUG_ASSERT(s->tmp_table != NO_TMP_TABLE || s->tdc->ref_count > 0); + + if (thd->lex->need_correct_ident()) + alias_name_used= my_strcasecmp(table_alias_charset, + s->table_name.str, + tl->alias.str); + /* Fix alias if table name changes. */ + if (!alias.alloced_length() || strcmp(alias.c_ptr(), tl->alias.str)) + alias.copy(tl->alias.str, tl->alias.length, alias.charset()); + + tablenr= thd->current_tablenr++; + used_fields= 0; + const_table= 0; + null_row= 0; + maybe_null= 0; + force_index= 0; + force_index_order= 0; + force_index_group= 0; + status= STATUS_NO_RECORD; + insert_values= 0; + fulltext_searched= 0; + file->ft_handler= 0; + reginfo.impossible_range= 0; + reginfo.join_tab= NULL; + reginfo.not_exists_optimize= FALSE; + reginfo.skip_locked= false; + created= TRUE; + cond_selectivity= 1.0; + cond_selectivity_sampling_explain= NULL; + range_rowid_filter_cost_info_elems= 0; + range_rowid_filter_cost_info_ptr= NULL; + range_rowid_filter_cost_info= NULL; + vers_write= s->versioned; + opt_range_condition_rows=0; + no_cache= false; + initialize_opt_range_structures(); +#ifdef HAVE_REPLICATION + /* used in RBR Triggers */ + master_had_triggers= 0; +#endif + /* Catch wrong handling of the auto_increment_field_not_null. */ + DBUG_ASSERT(!auto_increment_field_not_null); + auto_increment_field_not_null= FALSE; + + pos_in_table_list= tl; + + clear_column_bitmaps(); + for (Field **f_ptr= field ; *f_ptr ; f_ptr++) + { + (*f_ptr)->next_equal_field= NULL; + (*f_ptr)->cond_selectivity= 1.0; + } + + /* enable and clear or disable engine query statistics */ + if (thd->should_collect_handler_stats()) + file->ha_handler_stats_reset(); + else + file->ha_handler_stats_disable(); + + notnull_cond= 0; + DBUG_ASSERT(!file->keyread_enabled()); + + restore_record(this, s->default_values); + + /* Tables may be reused in a sub statement. */ + DBUG_ASSERT(!file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN)); +} + + +/* + Create Item_field for each column in the table. + + SYNPOSIS + TABLE::fill_item_list() + item_list a pointer to an empty list used to store items + + DESCRIPTION + Create Item_field object for each column in the table and + initialize it with the corresponding Field. New items are + created in the current THD memory root. + + RETURN VALUE + 0 success + 1 out of memory +*/ + +bool TABLE::fill_item_list(List *item_list) const +{ + /* + All Item_field's created using a direct pointer to a field + are fixed in Item_field constructor. + */ + for (Field **ptr= field; *ptr; ptr++) + { + Item_field *item= new (in_use->mem_root) Item_field(in_use, *ptr); + if (!item || item_list->push_back(item)) + return TRUE; + } + return FALSE; +} + +/* + Reset an existing list of Item_field items to point to the + Fields of this table. + + SYNPOSIS + TABLE::fill_item_list() + item_list a non-empty list with Item_fields + + DESCRIPTION + This is a counterpart of fill_item_list used to redirect + Item_fields to the fields of a newly created table. + The caller must ensure that number of items in the item_list + is the same as the number of columns in the table. +*/ + +void TABLE::reset_item_list(List *item_list, uint skip) const +{ + List_iterator_fast it(*item_list); + Field **ptr= field; + for ( ; skip && *ptr; skip--) + ptr++; + for (; *ptr; ptr++) + { + Item_field *item_field= (Item_field*) it++; + DBUG_ASSERT(item_field != 0); + item_field->reset_field(*ptr); + } +} + +/* + calculate md5 of query + + SYNOPSIS + TABLE_LIST::calc_md5() + buffer buffer for md5 writing +*/ + +void TABLE_LIST::calc_md5(char *buffer) +{ + uchar digest[16]; + compute_md5_hash(digest, select_stmt.str, + select_stmt.length); + sprintf(buffer, + "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", + digest[0], digest[1], digest[2], digest[3], + digest[4], digest[5], digest[6], digest[7], + digest[8], digest[9], digest[10], digest[11], + digest[12], digest[13], digest[14], digest[15]); +} + + +/** + @brief + Create field translation for mergeable derived table/view. + + @param thd Thread handle + + @details + Create field translation for mergeable derived table/view. + + @return FALSE ok. + @return TRUE an error occur. +*/ + +bool TABLE_LIST::create_field_translation(THD *thd) +{ + Item *item; + Field_translator *transl; + SELECT_LEX *select= get_single_select(); + List_iterator_fast it(select->item_list); + uint field_count= 0; + Query_arena *arena, backup; + bool res= FALSE; + DBUG_ENTER("TABLE_LIST::create_field_translation"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (alias.str ? alias.str : ""), + get_unit())); + + if (thd->stmt_arena->is_conventional() || + thd->stmt_arena->is_stmt_prepare_or_first_sp_execute()) + { + /* initialize lists */ + used_items.empty(); + persistent_used_items.empty(); + } + else + { + /* + Copy the list created by natural join procedure because the procedure + will not be repeated. + */ + used_items= persistent_used_items; + } + + if (field_translation) + { + /* + Update items in the field translation after view have been prepared. + It's needed because some items in the select list, like IN subselects, + might be substituted for optimized ones. + */ + if (is_view() && get_unit()->prepared && !field_translation_updated) + { + field_translation_updated= TRUE; + if (static_cast(field_translation_end - field_translation) < + select->item_list.elements) + goto allocate; + while ((item= it++)) + { + field_translation[field_count++].item= item; + } + } + + DBUG_RETURN(FALSE); + } + +allocate: + arena= thd->activate_stmt_arena_if_needed(&backup); + + /* Create view fields translation table */ + + if (!(transl= + (Field_translator*)(thd->stmt_arena-> + alloc(select->item_list.elements * + sizeof(Field_translator))))) + { + res= TRUE; + goto exit; + } + + while ((item= it++)) + { + DBUG_ASSERT(item->name.str && item->name.str[0]); + transl[field_count].name.str= thd->strmake(item->name.str, item->name.length); + transl[field_count].name.length= item->name.length; + transl[field_count++].item= item; + } + field_translation= transl; + field_translation_end= transl + field_count; + /* It's safe to cache this table for prepared statements */ + cacheable_table= 1; + +exit: + if (arena) + thd->restore_active_arena(arena, &backup); + + DBUG_RETURN(res); +} + + +/** + @brief + Create field translation for mergeable derived table/view. + + @param thd Thread handle + + @details + Create field translation for mergeable derived table/view. + + @return FALSE ok. + @return TRUE an error occur. +*/ + +bool TABLE_LIST::setup_underlying(THD *thd) +{ + DBUG_ENTER("TABLE_LIST::setup_underlying"); + + if (!view || (!field_translation && merge_underlying_list)) + { + SELECT_LEX *select= get_single_select(); + + if (create_field_translation(thd)) + DBUG_RETURN(TRUE); + + /* full text function moving to current select */ + if (select->ftfunc_list->elements) + { + Item_func_match *ifm; + SELECT_LEX *current_select= thd->lex->current_select; + List_iterator_fast + li(*(select_lex->ftfunc_list)); + while ((ifm= li++)) + current_select->ftfunc_list->push_front(ifm); + } + } + DBUG_RETURN(FALSE); +} + + +/* + Prepare where expression of derived table/view + + SYNOPSIS + TABLE_LIST::prep_where() + thd - thread handler + conds - condition of this JOIN + no_where_clause - do not build WHERE or ON outer qwery do not need it + (it is INSERT), we do not need conds if this flag is set + + NOTE: have to be called befor CHECK OPTION preparation, because it makes + fix_fields for view WHERE clause + + RETURN + FALSE - OK + TRUE - error +*/ + +bool TABLE_LIST::prep_where(THD *thd, Item **conds, + bool no_where_clause) +{ + DBUG_ENTER("TABLE_LIST::prep_where"); + bool res= FALSE; + + for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local) + { + if (tbl->is_view_or_derived() && + tbl->prep_where(thd, conds, no_where_clause)) + { + DBUG_RETURN(TRUE); + } + } + + if (where) + { + if (where->fixed()) + where->update_used_tables(); + else if (where->fix_fields(thd, &where)) + DBUG_RETURN(TRUE); + + /* + check that it is not VIEW in which we insert with INSERT SELECT + (in this case we can't add view WHERE condition to main SELECT_LEX) + */ + if (!no_where_clause && !where_processed) + { + TABLE_LIST *tbl= this; + Query_arena *arena= thd->stmt_arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test + + /* Go up to join tree and try to find left join */ + for (; tbl; tbl= tbl->embedding) + { + if (tbl->outer_join) + { + /* + Store WHERE condition to ON expression for outer join, because + we can't use WHERE to correctly execute left joins on VIEWs and + this expression will not be moved to WHERE condition (i.e. will + be clean correctly for PS/SP) + */ + tbl->on_expr= and_conds(thd, tbl->on_expr, + where->copy_andor_structure(thd)); + break; + } + } + if (tbl == 0) + { + if (*conds) + res= (*conds)->fix_fields_if_needed_for_bool(thd, conds); + if (!res) + *conds= and_conds(thd, *conds, where->copy_andor_structure(thd)); + if (*conds && !res) + res= (*conds)->fix_fields_if_needed_for_bool(thd, conds); + } + if (arena) + thd->restore_active_arena(arena, &backup); + where_processed= TRUE; + } + } + + DBUG_RETURN(res); +} + +/** + Check that table/view is updatable and if it has single + underlying tables/views it is also updatable + + @return Result of the check. +*/ + +bool TABLE_LIST::single_table_updatable() +{ + if (!updatable) + return false; + if (view && view->first_select_lex()->table_list.elements == 1) + { + /* + We need to check deeply only single table views. Multi-table views + will be turned to multi-table updates and then checked by leaf tables + */ + return (((TABLE_LIST *)view->first_select_lex()->table_list.first)-> + single_table_updatable()); + } + return true; +} + + +/* + Merge ON expressions for a view + + SYNOPSIS + merge_on_conds() + thd thread handle + table table for the VIEW + is_cascaded TRUE <=> merge ON expressions from underlying views + + DESCRIPTION + This function returns the result of ANDing the ON expressions + of the given view and all underlying views. The ON expressions + of the underlying views are added only if is_cascaded is TRUE. + + RETURN + Pointer to the built expression if there is any. + Otherwise and in the case of a failure NULL is returned. +*/ + +static Item * +merge_on_conds(THD *thd, TABLE_LIST *table, bool is_cascaded) +{ + DBUG_ENTER("merge_on_conds"); + + Item *cond= NULL; + DBUG_PRINT("info", ("alias: %s", table->alias.str)); + if (table->on_expr) + cond= table->on_expr->copy_andor_structure(thd); + if (!table->view) + DBUG_RETURN(cond); + for (TABLE_LIST *tbl= + (TABLE_LIST*)table->view->first_select_lex()->table_list.first; + tbl; + tbl= tbl->next_local) + { + if (tbl->view && !is_cascaded) + continue; + cond= and_conds(thd, cond, merge_on_conds(thd, tbl, is_cascaded)); + } + DBUG_RETURN(cond); +} + + +/* + Prepare check option expression of table + + SYNOPSIS + TABLE_LIST::prep_check_option() + thd - thread handler + check_opt_type - WITH CHECK OPTION type (VIEW_CHECK_NONE, + VIEW_CHECK_LOCAL, VIEW_CHECK_CASCADED) + we use this parameter instead of direct check of + effective_with_check to change type of underlying + views to VIEW_CHECK_CASCADED if outer view have + such option and prevent processing of underlying + view check options if outer view have just + VIEW_CHECK_LOCAL option. + + NOTE + This method builds check option condition to use it later on + every call (usual execution or every SP/PS call). + This method have to be called after WHERE preparation + (TABLE_LIST::prep_where) + + RETURN + FALSE - OK + TRUE - error +*/ + +bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type) +{ + DBUG_ENTER("TABLE_LIST::prep_check_option"); + bool is_cascaded= check_opt_type == VIEW_CHECK_CASCADED; + TABLE_LIST *merge_underlying_list= view->first_select_lex()->get_table_list(); + for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local) + { + /* see comment of check_opt_type parameter */ + if (tbl->view && tbl->prep_check_option(thd, (is_cascaded ? + VIEW_CHECK_CASCADED : + VIEW_CHECK_NONE))) + DBUG_RETURN(TRUE); + } + + if (check_opt_type && !check_option_processed) + { + Query_arena *arena= thd->stmt_arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test + + if (where) + { + check_option= where->copy_andor_structure(thd); + } + if (is_cascaded) + { + for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local) + { + if (tbl->check_option) + check_option= and_conds(thd, check_option, tbl->check_option); + } + } + check_option= and_conds(thd, check_option, + merge_on_conds(thd, this, is_cascaded)); + + if (arena) + thd->restore_active_arena(arena, &backup); + check_option_processed= TRUE; + + } + + if (check_option) + { + const char *save_where= thd->where; + thd->where= "check option"; + if (check_option->fix_fields_if_needed_for_bool(thd, &check_option)) + DBUG_RETURN(TRUE); + thd->where= save_where; + } + DBUG_RETURN(FALSE); +} + + +/** + Hide errors which show view underlying table information. + There are currently two mechanisms at work that handle errors for views, + this one and a more general mechanism based on an Internal_error_handler, + see Show_create_error_handler. The latter handles errors encountered during + execution of SHOW CREATE VIEW, while the mechanism using this method is + handles SELECT from views. The two methods should not clash. + + @param[in,out] thd thread handler + + @pre This method can be called only if there is an error. +*/ + +void TABLE_LIST::hide_view_error(THD *thd) +{ + if ((thd->killed && !thd->is_error())|| thd->get_internal_handler()) + return; + /* Hide "Unknown column" or "Unknown function" error */ + DBUG_ASSERT(thd->is_error()); + switch (thd->get_stmt_da()->sql_errno()) { + case ER_BAD_FIELD_ERROR: + case ER_SP_DOES_NOT_EXIST: + case ER_FUNC_INEXISTENT_NAME_COLLISION: + case ER_PROCACCESS_DENIED_ERROR: + case ER_COLUMNACCESS_DENIED_ERROR: + case ER_TABLEACCESS_DENIED_ERROR: + case ER_TABLE_NOT_LOCKED: + case ER_NO_SUCH_TABLE: + { + TABLE_LIST *top= top_table(); + thd->clear_error(); + my_error(ER_VIEW_INVALID, MYF(0), + top->view_db.str, top->view_name.str); + break; + } + + case ER_NO_DEFAULT_FOR_FIELD: + { + TABLE_LIST *top= top_table(); + thd->clear_error(); + // TODO: make correct error message + my_error(ER_NO_DEFAULT_FOR_VIEW_FIELD, MYF(0), + top->view_db.str, top->view_name.str); + break; + } + } +} + + +/* + Find underlying base tables (TABLE_LIST) which represent given + table_to_find (TABLE) + + SYNOPSIS + TABLE_LIST::find_underlying_table() + table_to_find table to find + + RETURN + 0 table is not found + found table reference +*/ + +TABLE_LIST *TABLE_LIST::find_underlying_table(TABLE *table_to_find) +{ + /* is this real table and table which we are looking for? */ + if (table == table_to_find && view == 0) + return this; + if (!view) + return 0; + + for (TABLE_LIST *tbl= view->first_select_lex()->get_table_list(); + tbl; + tbl= tbl->next_local) + { + TABLE_LIST *result; + if ((result= tbl->find_underlying_table(table_to_find))) + return result; + } + return 0; +} + +/* + cleanup items belonged to view fields translation table + + SYNOPSIS + TABLE_LIST::cleanup_items() +*/ + +void TABLE_LIST::cleanup_items() +{ + if (!field_translation) + return; + + for (Field_translator *transl= field_translation; + transl < field_translation_end; + transl++) + transl->item->walk(&Item::cleanup_processor, 0, 0); +} + + +/* + check CHECK OPTION condition both for view and underlying table + + SYNOPSIS + TABLE_LIST::view_check_option() + ignore_failure ignore check option fail + + RETURN + VIEW_CHECK_OK OK + VIEW_CHECK_ERROR FAILED + VIEW_CHECK_SKIP FAILED, but continue +*/ + + +int TABLE_LIST::view_check_option(THD *thd, bool ignore_failure) +{ + if (check_option) + { + /* VIEW's CHECK OPTION CLAUSE */ + Counting_error_handler ceh; + thd->push_internal_handler(&ceh); + bool res= check_option->val_int() == 0; + thd->pop_internal_handler(); + if (ceh.errors) + return(VIEW_CHECK_ERROR); + if (res) + { + TABLE_LIST *main_view= top_table(); + const char *name_db= (main_view->view ? main_view->view_db.str : + main_view->db.str); + const char *name_table= (main_view->view ? main_view->view_name.str : + main_view->table_name.str); + my_error(ER_VIEW_CHECK_FAILED, MYF(ignore_failure ? ME_WARNING : 0), + name_db, name_table); + return ignore_failure ? VIEW_CHECK_SKIP : VIEW_CHECK_ERROR; + } + } + return table->verify_constraints(ignore_failure); +} + + +int TABLE::verify_constraints(bool ignore_failure) +{ + /* + We have to check is_error() first as we are checking it for each + constraint to catch fatal warnings. + */ + if (in_use->is_error()) + return (VIEW_CHECK_ERROR); + + /* go trough check option clauses for fields and table */ + if (check_constraints && + !(in_use->variables.option_bits & OPTION_NO_CHECK_CONSTRAINT_CHECKS)) + { + if (versioned() && !vers_end_field()->is_max()) + return VIEW_CHECK_OK; + + StringBuffer field_error(system_charset_info); + for (Virtual_column_info **chk= check_constraints ; *chk ; chk++) + { + /* + yes! NULL is ok. + see 4.23.3.4 Table check constraints, part 2, SQL:2016 + */ + if (((*chk)->expr->val_int() == 0 && !(*chk)->expr->null_value) || + in_use->is_error()) + { + enum_vcol_info_type vcol_type= (*chk)->get_vcol_type(); + DBUG_ASSERT(vcol_type == VCOL_CHECK_TABLE || + vcol_type == VCOL_CHECK_FIELD); + + field_error.set_buffer_if_not_allocated(system_charset_info); + field_error.length(0); + + if (vcol_type == VCOL_CHECK_FIELD) + { + field_error.append(s->table_name); + field_error.append('.'); + } + field_error.append((*chk)->name); + my_error(ER_CONSTRAINT_FAILED, + MYF(ignore_failure ? ME_WARNING : 0), field_error.c_ptr(), + s->db.str, s->table_name.str); + return ignore_failure ? VIEW_CHECK_SKIP : VIEW_CHECK_ERROR; + } + } + } + /* + We have to check in_use() as checking constraints may have generated + warnings that should be treated as errors + */ + return(!in_use->is_error() ? VIEW_CHECK_OK : VIEW_CHECK_ERROR); +} + +/* + Find table in underlying tables by mask and check that only this + table belong to given mask + + SYNOPSIS + TABLE_LIST::check_single_table() + table_arg reference on variable where to store found table + (should be 0 on call, to find table, or point to table for + unique test) + map bit mask of tables + view_arg view for which we are looking table + + RETURN + FALSE table not found or found only one + TRUE found several tables +*/ + +bool TABLE_LIST::check_single_table(TABLE_LIST **table_arg, + table_map map, + TABLE_LIST *view_arg) +{ + if (!select_lex) + return FALSE; + DBUG_ASSERT(is_merged_derived()); + for (TABLE_LIST *tbl= get_single_select()->get_table_list(); + tbl; + tbl= tbl->next_local) + { + /* + Merged view has also temporary table attached (in 5.2 if it has table + then it was real table), so we have filter such temporary tables out + by checking that it is not merged view + */ + if (tbl->table && + !(tbl->is_view() && + tbl->is_merged_derived())) + { + if (tbl->table->map & map) + { + if (*table_arg) + return TRUE; + *table_arg= tbl; + tbl->check_option= view_arg->check_option; + } + } + else if (tbl->check_single_table(table_arg, map, view_arg)) + return TRUE; + } + return FALSE; +} + + +/* + Set insert_values buffer + + SYNOPSIS + set_insert_values() + mem_root memory pool for allocating + + RETURN + FALSE - OK + TRUE - out of memory +*/ + +bool TABLE_LIST::set_insert_values(MEM_ROOT *mem_root) +{ + DBUG_ENTER("set_insert_values"); + if (table) + { + DBUG_PRINT("info", ("setting insert_value for table")); + if (!table->insert_values && + !(table->insert_values= (uchar *)alloc_root(mem_root, + table->s->rec_buff_length))) + DBUG_RETURN(TRUE); + } + else + { + DBUG_PRINT("info", ("setting insert_value for view")); + DBUG_ASSERT(is_view_or_derived() && is_merged_derived()); + for (TABLE_LIST *tbl= + (TABLE_LIST*)view->first_select_lex()->table_list.first; + tbl; + tbl= tbl->next_local) + if (tbl->set_insert_values(mem_root)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + Test if this is a leaf with respect to name resolution. + + SYNOPSIS + TABLE_LIST::is_leaf_for_name_resolution() + + DESCRIPTION + A table reference is a leaf with respect to name resolution if + it is either a leaf node in a nested join tree (table, view, + schema table, subquery), or an inner node that represents a + NATURAL/USING join, or a nested join with materialized join + columns. + + RETURN + TRUE if a leaf, FALSE otherwise. +*/ +bool TABLE_LIST::is_leaf_for_name_resolution() +{ + return (is_merged_derived() || is_natural_join || is_join_columns_complete || + !nested_join); +} + + +/* + Retrieve the first (left-most) leaf in a nested join tree with + respect to name resolution. + + SYNOPSIS + TABLE_LIST::first_leaf_for_name_resolution() + + DESCRIPTION + Given that 'this' is a nested table reference, recursively walk + down the left-most children of 'this' until we reach a leaf + table reference with respect to name resolution. + + IMPLEMENTATION + The left-most child of a nested table reference is the last element + in the list of children because the children are inserted in + reverse order. + + RETURN + If 'this' is a nested table reference - the left-most child of + the tree rooted in 'this', + else return 'this' +*/ + +TABLE_LIST *TABLE_LIST::first_leaf_for_name_resolution() +{ + TABLE_LIST *UNINIT_VAR(cur_table_ref); + NESTED_JOIN *cur_nested_join; + + if (is_leaf_for_name_resolution()) + return this; + DBUG_ASSERT(nested_join); + + for (cur_nested_join= nested_join; + cur_nested_join; + cur_nested_join= cur_table_ref->nested_join) + { + List_iterator_fast it(cur_nested_join->join_list); + cur_table_ref= it++; + /* + If the current nested join is a RIGHT JOIN, the operands in + 'join_list' are in reverse order, thus the first operand is + already at the front of the list. Otherwise the first operand + is in the end of the list of join operands. + */ + if (!(cur_table_ref->outer_join & JOIN_TYPE_RIGHT)) + { + TABLE_LIST *next; + while ((next= it++)) + cur_table_ref= next; + } + if (cur_table_ref->is_leaf_for_name_resolution()) + break; + } + return cur_table_ref; +} + + +/* + Retrieve the last (right-most) leaf in a nested join tree with + respect to name resolution. + + SYNOPSIS + TABLE_LIST::last_leaf_for_name_resolution() + + DESCRIPTION + Given that 'this' is a nested table reference, recursively walk + down the right-most children of 'this' until we reach a leaf + table reference with respect to name resolution. + + IMPLEMENTATION + The right-most child of a nested table reference is the first + element in the list of children because the children are inserted + in reverse order. + + RETURN + - If 'this' is a nested table reference - the right-most child of + the tree rooted in 'this', + - else - 'this' +*/ + +TABLE_LIST *TABLE_LIST::last_leaf_for_name_resolution() +{ + TABLE_LIST *cur_table_ref= this; + NESTED_JOIN *cur_nested_join; + + if (is_leaf_for_name_resolution()) + return this; + DBUG_ASSERT(nested_join); + + for (cur_nested_join= nested_join; + cur_nested_join; + cur_nested_join= cur_table_ref->nested_join) + { + cur_table_ref= cur_nested_join->join_list.head(); + /* + If the current nested is a RIGHT JOIN, the operands in + 'join_list' are in reverse order, thus the last operand is in the + end of the list. + */ + if ((cur_table_ref->outer_join & JOIN_TYPE_RIGHT)) + { + List_iterator_fast it(cur_nested_join->join_list); + TABLE_LIST *next; + cur_table_ref= it++; + while ((next= it++)) + cur_table_ref= next; + } + if (cur_table_ref->is_leaf_for_name_resolution()) + break; + } + return cur_table_ref; +} + + +/* + Register access mode which we need for underlying tables + + SYNOPSIS + register_want_access() + want_access Acess which we require +*/ + +void TABLE_LIST::register_want_access(privilege_t want_access) +{ + /* Remove SHOW_VIEW_ACL, because it will be checked during making view */ + want_access&= ~SHOW_VIEW_ACL; + if (belong_to_view) + { + grant.want_privilege= want_access; + if (table) + table->grant.want_privilege= want_access; + } + if (!view) + return; + for (TABLE_LIST *tbl= view->first_select_lex()->get_table_list(); + tbl; + tbl= tbl->next_local) + tbl->register_want_access(want_access); +} + + +/* + Load security context information for this view + + SYNOPSIS + TABLE_LIST::prepare_view_security_context() + thd [in] thread handler + + RETURN + FALSE OK + TRUE Error +*/ + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +bool TABLE_LIST::prepare_view_security_context(THD *thd, bool upgrade_check) +{ + DBUG_ENTER("TABLE_LIST::prepare_view_security_context"); + DBUG_PRINT("enter", ("table: %s", alias.str)); + + DBUG_ASSERT(!prelocking_placeholder && view); + if (view_suid) + { + DBUG_PRINT("info", ("This table is suid view => load contest")); + DBUG_ASSERT(view && view_sctx); + if (acl_getroot(view_sctx, definer.user.str, definer.host.str, + definer.host.str, thd->db.str)) + { + if ((thd->lex->sql_command == SQLCOM_SHOW_CREATE) || + (thd->lex->sql_command == SQLCOM_SHOW_FIELDS)) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_NO_SUCH_USER, + ER_THD(thd, ER_NO_SUCH_USER), + definer.user.str, definer.host.str); + } + else + { + if (thd->security_ctx->master_access & PRIV_REVEAL_MISSING_DEFINER) + { + my_error(ER_NO_SUCH_USER, MYF(upgrade_check ? ME_WARNING: 0), + definer.user.str, definer.host.str); + } + else + { + if (thd->password == 2) + my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0), + thd->security_ctx->priv_user, + thd->security_ctx->priv_host); + else + my_error(ER_ACCESS_DENIED_ERROR, MYF(0), + thd->security_ctx->priv_user, + thd->security_ctx->priv_host, + (thd->password ? ER_THD(thd, ER_YES) : + ER_THD(thd, ER_NO))); + } + DBUG_RETURN(TRUE); + } + } + } + DBUG_RETURN(FALSE); + +} +#endif + + +/* + Find security context of current view + + SYNOPSIS + TABLE_LIST::find_view_security_context() + thd [in] thread handler + +*/ + +#ifndef NO_EMBEDDED_ACCESS_CHECKS +Security_context *TABLE_LIST::find_view_security_context(THD *thd) +{ + Security_context *sctx; + TABLE_LIST *upper_view= this; + DBUG_ENTER("TABLE_LIST::find_view_security_context"); + + DBUG_ASSERT(view); + while (upper_view && !upper_view->view_suid) + { + DBUG_ASSERT(!upper_view->prelocking_placeholder); + upper_view= upper_view->referencing_view; + } + if (upper_view) + { + DBUG_PRINT("info", ("Securety context of view %s will be used", + upper_view->alias.str)); + sctx= upper_view->view_sctx; + DBUG_ASSERT(sctx); + } + else + { + DBUG_PRINT("info", ("Current global context will be used")); + sctx= thd->security_ctx; + } + DBUG_RETURN(sctx); +} +#endif + + +/* + Prepare security context and load underlying tables priveleges for view + + SYNOPSIS + TABLE_LIST::prepare_security() + thd [in] thread handler + + RETURN + FALSE OK + TRUE Error +*/ + +bool TABLE_LIST::prepare_security(THD *thd) +{ + List_iterator_fast tb(*view_tables); + TABLE_LIST *tbl; + DBUG_ENTER("TABLE_LIST::prepare_security"); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + /* + Check if we are running REPAIR VIEW FOR UPGRADE + In this case we are probably comming from mysql_upgrade and + should not get an error for mysql.user table we just created. + */ + bool upgrade_check= (thd->lex->sql_command == SQLCOM_REPAIR && + (thd->lex->check_opt.sql_flags & + (TT_FOR_UPGRADE | TT_FROM_MYSQL)) && + (thd->security_ctx->master_access & + PRIV_REVEAL_MISSING_DEFINER)); + Security_context *save_security_ctx= thd->security_ctx; + + DBUG_ASSERT(!prelocking_placeholder); + if (prepare_view_security_context(thd, upgrade_check)) + { + if (upgrade_check) + { + /* REPAIR needs SELECT_ACL */ + while ((tbl= tb++)) + { + tbl->grant.privilege= SELECT_ACL; + tbl->security_ctx= save_security_ctx; + } + DBUG_RETURN(FALSE); + } + DBUG_RETURN(TRUE); // Fatal + } + thd->security_ctx= find_view_security_context(thd); + opt_trace_disable_if_no_security_context_access(thd); + while ((tbl= tb++)) + { + DBUG_ASSERT(tbl->referencing_view); + const char *local_db, *local_table_name; + if (tbl->view) + { + local_db= tbl->view_db.str; + local_table_name= tbl->view_name.str; + } + else + { + local_db= tbl->db.str; + local_table_name= tbl->table_name.str; + } + fill_effective_table_privileges(thd, &tbl->grant, local_db, + local_table_name); + if (tbl->table) + tbl->table->grant= grant; + } + thd->security_ctx= save_security_ctx; +#else + while ((tbl= tb++)) + tbl->grant.privilege= ALL_KNOWN_ACL; +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ + DBUG_RETURN(FALSE); +} + +#ifndef DBUG_OFF +void TABLE_LIST::set_check_merged() +{ + if (is_view()) + return; + + DBUG_ASSERT(derived); + /* + It is not simple to check all, but at least this should be checked: + this select is not excluded or the exclusion came from above. + */ + DBUG_ASSERT(derived->is_excluded() || + !derived->first_select()->exclude_from_table_unique_test || + derived->outer_select()-> + exclude_from_table_unique_test); +} +#endif + +void TABLE_LIST::set_check_materialized() +{ + DBUG_ENTER("TABLE_LIST::set_check_materialized"); + SELECT_LEX_UNIT *derived= this->derived; + if (view) + derived= this->derived= &view->unit; + DBUG_ASSERT(derived); + DBUG_ASSERT(!derived->is_excluded()); + if (!derived->first_select()->exclude_from_table_unique_test) + derived->set_unique_exclude(); + else + { + /* + The subtree should be already excluded + */ + DBUG_ASSERT(!derived->first_select()->first_inner_unit() || + derived->first_select()->first_inner_unit()->with_element || + derived->first_select()->first_inner_unit()->first_select()-> + exclude_from_table_unique_test); + } + DBUG_VOID_RETURN; +} + +TABLE *TABLE_LIST::get_real_join_table() +{ + TABLE_LIST *tbl= this; + while (tbl->table == NULL || tbl->table->reginfo.join_tab == NULL) + { + if ((tbl->view == NULL && tbl->derived == NULL) || + tbl->is_materialized_derived()) + break; + /* we do not support merging of union yet */ + DBUG_ASSERT(tbl->view == NULL || + tbl->view->first_select_lex()->next_select() == NULL); + DBUG_ASSERT(tbl->derived == NULL || + tbl->derived->first_select()->next_select() == NULL); + + { + List_iterator_fast + ti(tbl->view != NULL ? + tbl->view->first_select_lex()->top_join_list : + tbl->derived->first_select()->top_join_list); + for (;;) + { + tbl= NULL; + /* + Find left table in outer join on this level + (the list is reverted). + */ + for (TABLE_LIST *t= ti++; t; t= ti++) + tbl= t; + if (!tbl) + return NULL; // view/derived with no tables + if (!tbl->nested_join) + break; + /* go deeper if we've found nested join */ + ti= tbl->nested_join->join_list; + } + } + } + + return tbl->table; +} + + +Natural_join_column::Natural_join_column(Field_translator *field_param, + TABLE_LIST *tab) +{ + DBUG_ASSERT(tab->field_translation); + view_field= field_param; + table_field= NULL; + table_ref= tab; + is_common= FALSE; +} + + +Natural_join_column::Natural_join_column(Item_field *field_param, + TABLE_LIST *tab) +{ + DBUG_ASSERT(tab->table == field_param->field->table); + table_field= field_param; + view_field= NULL; + table_ref= tab; + is_common= FALSE; +} + + +LEX_CSTRING *Natural_join_column::name() +{ + if (view_field) + { + DBUG_ASSERT(table_field == NULL); + return &view_field->name; + } + + return &table_field->field_name; +} + + +Item *Natural_join_column::create_item(THD *thd) +{ + if (view_field) + { + DBUG_ASSERT(table_field == NULL); + return create_view_field(thd, table_ref, &view_field->item, + &view_field->name); + } + return table_field; +} + + +Field *Natural_join_column::field() +{ + if (view_field) + { + DBUG_ASSERT(table_field == NULL); + return NULL; + } + return table_field->field; +} + + +const char *Natural_join_column::safe_table_name() +{ + DBUG_ASSERT(table_ref); + return table_ref->alias.str ? table_ref->alias.str : ""; +} + + +const char *Natural_join_column::safe_db_name() +{ + if (view_field) + return table_ref->view_db.str ? table_ref->view_db.str : ""; + + /* + Test that TABLE_LIST::db is the same as TABLE_SHARE::db to + ensure consistency. An exception are I_S schema tables, which + are inconsistent in this respect. + */ + DBUG_ASSERT(!cmp(&table_ref->db, &table_ref->table->s->db) || + table_ref->table_function || + (table_ref->schema_table && + is_infoschema_db(&table_ref->table->s->db)) || + table_ref->is_materialized_derived()); + return table_ref->db.str ? table_ref->db.str : ""; +} + + +GRANT_INFO *Natural_join_column::grant() +{ +/* if (view_field) + return &(table_ref->grant); + return &(table_ref->table->grant);*/ + /* + Have to check algorithm because merged derived also has + field_translation. + */ +//if (table_ref->effective_algorithm == DTYPE_ALGORITHM_MERGE) + if (table_ref->is_merged_derived()) + return &(table_ref->grant); + return &(table_ref->table->grant); +} + + +void Field_iterator_view::set(TABLE_LIST *table) +{ + DBUG_ASSERT(table->field_translation); + view= table; + ptr= table->field_translation; + array_end= table->field_translation_end; +} + + +LEX_CSTRING *Field_iterator_table::name() +{ + return &(*ptr)->field_name; +} + + +Item *Field_iterator_table::create_item(THD *thd) +{ + SELECT_LEX *select= thd->lex->current_select; + + Item_field *item= new (thd->mem_root) Item_field(thd, &select->context, *ptr); + DBUG_ASSERT(strlen(item->name.str) == item->name.length); + if (item && thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && + !thd->lex->in_sum_func && select->cur_pos_in_select_list != UNDEF_POS && + select->join) + { + select->join->non_agg_fields.push_back(item); + item->marker= select->cur_pos_in_select_list; + select->set_non_agg_field_used(true); + } + return item; +} + + +LEX_CSTRING *Field_iterator_view::name() +{ + return &ptr->name; +} + + +Item *Field_iterator_view::create_item(THD *thd) +{ + return create_view_field(thd, view, &ptr->item, &ptr->name); +} + +Item *create_view_field(THD *thd, TABLE_LIST *view, Item **field_ref, + LEX_CSTRING *name) +{ + bool save_wrapper= thd->lex->current_select->no_wrap_view_item; + Item *field= *field_ref; + DBUG_ENTER("create_view_field"); + + if (view->schema_table_reformed) + { + /* + Translation table items are always Item_fields and already fixed + ('mysql_schema_table' function). So we can return directly the + field. This case happens only for 'show & where' commands. + */ + DBUG_ASSERT(field && field->fixed()); + DBUG_RETURN(field); + } + + DBUG_ASSERT(field); + thd->lex->current_select->no_wrap_view_item= TRUE; + if (!field->fixed()) + { + if (field->fix_fields(thd, field_ref)) + { + thd->lex->current_select->no_wrap_view_item= save_wrapper; + DBUG_RETURN(0); + } + field= *field_ref; + } + thd->lex->current_select->no_wrap_view_item= save_wrapper; + if (save_wrapper) + { + DBUG_RETURN(field); + } + Name_resolution_context *context= (view->view ? + &view->view->first_select_lex()->context: + &thd->lex->first_select_lex()->context); + Item *item= (new (thd->mem_root) + Item_direct_view_ref(thd, context, field_ref, view->alias, + *name, view)); + if (!item) + return NULL; + /* + Force creation of nullable item for the result tmp table for outer joined + views/derived tables. + */ + if (view->table && view->table->maybe_null) + item->set_maybe_null(); + /* Save item in case we will need to fall back to materialization. */ + view->used_items.push_front(item, thd->mem_root); + /* + If we create this reference on persistent memory then it should be + present in persistent list + */ + if (thd->mem_root == thd->stmt_arena->mem_root) + view->persistent_used_items.push_front(item, thd->mem_root); + DBUG_RETURN(item); +} + + +void Field_iterator_natural_join::set(TABLE_LIST *table_ref) +{ + DBUG_ASSERT(table_ref->join_columns); + column_ref_it.init(*(table_ref->join_columns)); + cur_column_ref= column_ref_it++; +} + + +void Field_iterator_natural_join::next() +{ + cur_column_ref= column_ref_it++; + DBUG_ASSERT(!cur_column_ref || ! cur_column_ref->table_field || + cur_column_ref->table_ref->table == + cur_column_ref->table_field->field->table); +} + + +void Field_iterator_table_ref::set_field_iterator() +{ + DBUG_ENTER("Field_iterator_table_ref::set_field_iterator"); + /* + If the table reference we are iterating over is a natural join, or it is + an operand of a natural join, and TABLE_LIST::join_columns contains all + the columns of the join operand, then we pick the columns from + TABLE_LIST::join_columns, instead of the orginial container of the + columns of the join operator. + */ + if (table_ref->is_join_columns_complete) + { + /* Necesary, but insufficient conditions. */ + DBUG_ASSERT(table_ref->is_natural_join || + table_ref->nested_join || + (table_ref->join_columns && + /* This is a merge view. */ + ((table_ref->field_translation && + table_ref->join_columns->elements == + (ulong)(table_ref->field_translation_end - + table_ref->field_translation)) || + /* This is stored table or a tmptable view. */ + (!table_ref->field_translation && + table_ref->join_columns->elements == + table_ref->table->s->fields)))); + field_it= &natural_join_it; + DBUG_PRINT("info",("field_it for '%s' is Field_iterator_natural_join", + table_ref->alias.str)); + } + /* This is a merge view, so use field_translation. */ + else if (table_ref->is_merged_derived() && table_ref->field_translation) + { + field_it= &view_field_it; + DBUG_PRINT("info", ("field_it for '%s' is Field_iterator_view", + table_ref->alias.str)); + } + /* This is a base table or stored view. */ + else + { + DBUG_ASSERT(table_ref->table || table_ref->view); + field_it= &table_field_it; + DBUG_PRINT("info", ("field_it for '%s' is Field_iterator_table", + table_ref->alias.str)); + } + field_it->set(table_ref); + DBUG_VOID_RETURN; +} + + +void Field_iterator_table_ref::set(TABLE_LIST *table) +{ + DBUG_ASSERT(table); + first_leaf= table->first_leaf_for_name_resolution(); + last_leaf= table->last_leaf_for_name_resolution(); + DBUG_ASSERT(first_leaf && last_leaf); + table_ref= first_leaf; + set_field_iterator(); +} + + +void Field_iterator_table_ref::next() +{ + /* Move to the next field in the current table reference. */ + field_it->next(); + /* + If all fields of the current table reference are exhausted, move to + the next leaf table reference. + */ + if (field_it->end_of_fields() && table_ref != last_leaf) + { + table_ref= table_ref->next_name_resolution_table; + DBUG_ASSERT(table_ref); + set_field_iterator(); + } +} + + +const char *Field_iterator_table_ref::get_table_name() +{ + if (table_ref->view) + return table_ref->view_name.str; + if (table_ref->is_derived()) + return table_ref->table->s->table_name.str; + else if (table_ref->is_natural_join) + return natural_join_it.column_ref()->safe_table_name(); + + DBUG_ASSERT(!strcmp(table_ref->table_name.str, + table_ref->table->s->table_name.str) || + table_ref->schema_table || table_ref->table_function); + return table_ref->table_name.str; +} + + +const char *Field_iterator_table_ref::get_db_name() +{ + if (table_ref->view) + return table_ref->view_db.str; + else if (table_ref->is_natural_join) + return natural_join_it.column_ref()->safe_db_name(); + + /* + Test that TABLE_LIST::db is the same as TABLE_SHARE::db to + ensure consistency. An exception are I_S schema tables, which + are inconsistent in this respect. + */ + DBUG_ASSERT(!cmp(&table_ref->db, &table_ref->table->s->db) || + (table_ref->schema_table && + is_infoschema_db(&table_ref->table->s->db)) || + table_ref->table_function); + + return table_ref->db.str; +} + + +GRANT_INFO *Field_iterator_table_ref::grant() +{ + if (table_ref->view) + return &(table_ref->grant); + else if (table_ref->is_natural_join) + return natural_join_it.column_ref()->grant(); + return &(table_ref->table->grant); +} + + +/* + Create new or return existing column reference to a column of a + natural/using join. + + SYNOPSIS + Field_iterator_table_ref::get_or_create_column_ref() + parent_table_ref the parent table reference over which the + iterator is iterating + + DESCRIPTION + Create a new natural join column for the current field of the + iterator if no such column was created, or return an already + created natural join column. The former happens for base tables or + views, and the latter for natural/using joins. If a new field is + created, then the field is added to 'parent_table_ref' if it is + given, or to the original table referene of the field if + parent_table_ref == NULL. + + NOTES + This method is designed so that when a Field_iterator_table_ref + walks through the fields of a table reference, all its fields + are created and stored as follows: + - If the table reference being iterated is a stored table, view or + natural/using join, store all natural join columns in a list + attached to that table reference. + - If the table reference being iterated is a nested join that is + not natural/using join, then do not materialize its result + fields. This is OK because for such table references + Field_iterator_table_ref iterates over the fields of the nested + table references (recursively). In this way we avoid the storage + of unnecessay copies of result columns of nested joins. + + RETURN + # Pointer to a column of a natural join (or its operand) + NULL No memory to allocate the column +*/ + +Natural_join_column * +Field_iterator_table_ref::get_or_create_column_ref(THD *thd, TABLE_LIST *parent_table_ref) +{ + Natural_join_column *nj_col; + bool is_created= TRUE; + uint UNINIT_VAR(field_count); + TABLE_LIST *add_table_ref= parent_table_ref ? + parent_table_ref : table_ref; + + if (field_it == &table_field_it) + { + /* The field belongs to a stored table. */ + Field *tmp_field= table_field_it.field(); + Item_field *tmp_item= + new (thd->mem_root) Item_field(thd, &thd->lex->current_select->context, tmp_field); + if (!tmp_item) + return NULL; + nj_col= new Natural_join_column(tmp_item, table_ref); + field_count= table_ref->table->s->fields; + } + else if (field_it == &view_field_it) + { + /* The field belongs to a merge view or information schema table. */ + Field_translator *translated_field= view_field_it.field_translator(); + nj_col= new Natural_join_column(translated_field, table_ref); + field_count= (uint)(table_ref->field_translation_end - + table_ref->field_translation); + } + else + { + /* + The field belongs to a NATURAL join, therefore the column reference was + already created via one of the two constructor calls above. In this case + we just return the already created column reference. + */ + DBUG_ASSERT(table_ref->is_join_columns_complete); + is_created= FALSE; + nj_col= natural_join_it.column_ref(); + DBUG_ASSERT(nj_col); + } + DBUG_ASSERT(!nj_col->table_field || !nj_col->table_field->field || + nj_col->table_ref->table == nj_col->table_field->field->table); + + /* + If the natural join column was just created add it to the list of + natural join columns of either 'parent_table_ref' or to the table + reference that directly contains the original field. + */ + if (is_created) + { + /* Make sure not all columns were materialized. */ + DBUG_ASSERT(!add_table_ref->is_join_columns_complete); + if (!add_table_ref->join_columns) + { + /* Create a list of natural join columns on demand. */ + if (!(add_table_ref->join_columns= new List)) + return NULL; + add_table_ref->is_join_columns_complete= FALSE; + } + add_table_ref->join_columns->push_back(nj_col); + /* + If new fields are added to their original table reference, mark if + all fields were added. We do it here as the caller has no easy way + of knowing when to do it. + If the fields are being added to parent_table_ref, then the caller + must take care to mark when all fields are created/added. + */ + if (!parent_table_ref && + add_table_ref->join_columns->elements == field_count) + add_table_ref->is_join_columns_complete= TRUE; + } + + return nj_col; +} + + +/* + Return an existing reference to a column of a natural/using join. + + SYNOPSIS + Field_iterator_table_ref::get_natural_column_ref() + + DESCRIPTION + The method should be called in contexts where it is expected that + all natural join columns are already created, and that the column + being retrieved is a Natural_join_column. + + RETURN + # Pointer to a column of a natural join (or its operand) + NULL We didn't originally have memory to allocate the column +*/ + +Natural_join_column * +Field_iterator_table_ref::get_natural_column_ref() +{ + Natural_join_column *nj_col; + + DBUG_ASSERT(field_it == &natural_join_it); + /* + The field belongs to a NATURAL join, therefore the column reference was + already created via one of the two constructor calls above. In this case + we just return the already created column reference. + */ + nj_col= natural_join_it.column_ref(); + DBUG_ASSERT(nj_col && + (!nj_col->table_field || !nj_col->table_field->field || + nj_col->table_ref->table == nj_col->table_field->field->table)); + return nj_col; +} + +/***************************************************************************** + Functions to handle column usage bitmaps (read_set, write_set etc...) +*****************************************************************************/ + +/* Reset all columns bitmaps */ + +void TABLE::clear_column_bitmaps() +{ + /* + Reset column read/write usage. It's identical to: + bitmap_clear_all(&table->def_read_set); + bitmap_clear_all(&table->def_write_set); + The code assumes that the bitmaps are allocated after each other, as + guaranteed by open_table_from_share() + */ + bzero((char*) def_read_set.bitmap, + s->column_bitmap_size * (s->virtual_fields ? 3 : 2)); + column_bitmaps_set(&def_read_set, &def_write_set); + rpl_write_set= 0; // Safety +} + + +/* + Tell handler we are going to call position() and rnd_pos() later. + + NOTES: + This is needed for handlers that uses the primary key to find the + row. In this case we have to extend the read bitmap with the primary + key fields. +*/ + +void TABLE::prepare_for_position() +{ + DBUG_ENTER("TABLE::prepare_for_position"); + + if ((file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + s->primary_key < MAX_KEY) + { + mark_index_columns_for_read(s->primary_key); + /* signal change */ + file->column_bitmaps_signal(); + } + DBUG_VOID_RETURN; +} + + +MY_BITMAP *TABLE::prepare_for_keyread(uint index, MY_BITMAP *map) +{ + MY_BITMAP *backup= read_set; + DBUG_ENTER("TABLE::prepare_for_keyread"); + if (!no_keyread) + file->ha_start_keyread(index); + if (map != read_set || !(file->index_flags(index, 0, 1) & HA_CLUSTERED_INDEX)) + { + mark_index_columns(index, map); + column_bitmaps_set(map); + } + DBUG_RETURN(backup); +} + + +/* + Mark that only fields from one key is used. Useful before keyread. +*/ + +void TABLE::mark_index_columns(uint index, MY_BITMAP *bitmap) +{ + DBUG_ENTER("TABLE::mark_index_columns"); + + bitmap_clear_all(bitmap); + mark_index_columns_no_reset(index, bitmap); + DBUG_VOID_RETURN; +} + +/* + Restore to use normal column maps after key read + + NOTES + This reverse the change done by mark_index_columns + + WARNING + For this to work, one must have the normal table maps in place + when calling mark_index_columns +*/ + +void TABLE::restore_column_maps_after_keyread(MY_BITMAP *backup) +{ + DBUG_ENTER("TABLE::restore_column_maps_after_mark_index"); + file->ha_end_keyread(); + read_set= backup; + file->column_bitmaps_signal(); + DBUG_VOID_RETURN; +} + +static void do_mark_index_columns(TABLE *table, uint index, + MY_BITMAP *bitmap, bool read) +{ + KEY_PART_INFO *key_part= table->key_info[index].key_part; + uint key_parts= table->key_info[index].user_defined_key_parts; + for (uint k= 0; k < key_parts; k++) + if (read) + key_part[k].field->register_field_in_read_map(); + else + bitmap_set_bit(bitmap, key_part[k].fieldnr-1); + if (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX && + table->s->primary_key != MAX_KEY && table->s->primary_key != index) + do_mark_index_columns(table, table->s->primary_key, bitmap, read); + +} +/* + mark columns used by key, but don't reset other fields +*/ + +inline void TABLE::mark_index_columns_no_reset(uint index, MY_BITMAP *bitmap) +{ + do_mark_index_columns(this, index, bitmap, false); +} + + +inline void TABLE::mark_index_columns_for_read(uint index) +{ + do_mark_index_columns(this, index, read_set, true); +} + +/* + Mark auto-increment fields as used fields in both read and write maps + + NOTES + This is needed in insert & update as the auto-increment field is + always set and sometimes read. +*/ + +void TABLE::mark_auto_increment_column() +{ + DBUG_ASSERT(found_next_number_field); + /* + We must set bit in read set as update_auto_increment() is using the + store() to check overflow of auto_increment values + */ + bitmap_set_bit(read_set, found_next_number_field->field_index); + bitmap_set_bit(write_set, found_next_number_field->field_index); + if (s->next_number_keypart) + mark_index_columns_for_read(s->next_number_index); + file->column_bitmaps_signal(); +} + + +/* + Mark columns needed for doing an delete of a row + + DESCRIPTON + Some table engines don't have a cursor on the retrieve rows + so they need either to use the primary key or all columns to + be able to delete a row. + + If the engine needs this, the function works as follows: + - If primary key exits, mark the primary key columns to be read. + - If not, mark all columns to be read + + If the engine has HA_REQUIRES_KEY_COLUMNS_FOR_DELETE, we will + mark all key columns as 'to-be-read'. This allows the engine to + loop over the given record to find all keys and doesn't have to + retrieve the row again. +*/ + +void TABLE::mark_columns_needed_for_delete() +{ + bool need_signal= false; + mark_columns_per_binlog_row_image(); + + if (triggers) + triggers->mark_fields_used(TRG_EVENT_DELETE); + if (file->ha_table_flags() & HA_REQUIRES_KEY_COLUMNS_FOR_DELETE) + { + Field **reg_field; + for (reg_field= field ; *reg_field ; reg_field++) + { + if ((*reg_field)->flags & (PART_KEY_FLAG | PART_INDIRECT_KEY_FLAG)) + mark_column_with_deps(*reg_field); + } + need_signal= true; + } + if (file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) + { + /* + If the handler has no cursor capabilites, we have to read either + the primary key, the hidden primary key or all columns to be + able to do an delete + */ + if (s->primary_key == MAX_KEY) + file->use_hidden_primary_key(); + else + { + mark_index_columns_for_read(s->primary_key); + need_signal= true; + } + } + + if (s->versioned) + { + bitmap_set_bit(read_set, s->vers.start_fieldno); + bitmap_set_bit(read_set, s->vers.end_fieldno); + bitmap_set_bit(write_set, s->vers.end_fieldno); + need_signal= true; + } + + if (need_signal) + file->column_bitmaps_signal(); +} + + +/* + Mark columns needed for doing an update of a row + + DESCRIPTON + Some engines needs to have all columns in an update (to be able to + build a complete row). If this is the case, we mark all not + updated columns to be read. + + If this is no the case, we do like in the delete case and mark + if needed, either the primary key column or all columns to be read. + (see mark_columns_needed_for_delete() for details) + + If the engine has HA_REQUIRES_KEY_COLUMNS_FOR_DELETE, we will + mark all USED key columns as 'to-be-read'. This allows the engine to + loop over the given record to find all changed keys and doesn't have to + retrieve the row again. +*/ + +void TABLE::mark_columns_needed_for_update() +{ + DBUG_ENTER("TABLE::mark_columns_needed_for_update"); + bool need_signal= false; + + + if (triggers) + triggers->mark_fields_used(TRG_EVENT_UPDATE); + if (default_field) + mark_default_fields_for_write(FALSE); + if (vfield) + need_signal|= mark_virtual_columns_for_write(FALSE); + if (file->ha_table_flags() & HA_REQUIRES_KEY_COLUMNS_FOR_DELETE) + { + KEY *end= key_info + s->keys; + for (KEY *k= key_info; k < end; k++) + { + KEY_PART_INFO *kpend= k->key_part + k->ext_key_parts; + int any_written= 0, all_read= 1; + for (KEY_PART_INFO *kp= k->key_part; kp < kpend; kp++) + { + int idx= kp->fieldnr - 1; + any_written|= bitmap_is_set(write_set, idx); + all_read&= bitmap_is_set(read_set, idx); + } + if (any_written && !all_read) + { + for (KEY_PART_INFO *kp= k->key_part; kp < kpend; kp++) + mark_column_with_deps(field[kp->fieldnr - 1]); + } + } + need_signal= true; + } + else + { + if (found_next_number_field) + mark_auto_increment_column(); + } + + if (file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) + { + /* + If the handler has no cursor capabilites, we have to read either + the primary key, the hidden primary key or all columns to be + able to do an update + */ + if (s->primary_key == MAX_KEY) + file->use_hidden_primary_key(); + else + { + mark_index_columns_for_read(s->primary_key); + need_signal= true; + } + } + if (s->versioned) + { + bitmap_set_bit(write_set, s->vers.start_fieldno); + bitmap_set_bit(write_set, s->vers.end_fieldno); + /* + For System Versioning we have to read all columns since we store + a copy of previous row with modified row_end back to a table. + */ + bitmap_union(read_set, &s->all_set); + need_signal= true; + } + if (check_constraints) + { + mark_check_constraint_columns_for_read(); + need_signal= true; + } + + /* + If a timestamp field settable on UPDATE is present then to avoid wrong + update force the table handler to retrieve write-only fields to be able + to compare records and detect data change. + */ + if ((file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) && + default_field && s->has_update_default_function) + { + bitmap_union(read_set, write_set); + need_signal= true; + } + mark_columns_per_binlog_row_image(); + if (need_signal) + file->column_bitmaps_signal(); + DBUG_VOID_RETURN; +} + + +/* + Mark columns the handler needs for doing an insert + + For now, this is used to mark fields used by the trigger + as changed. +*/ + +void TABLE::mark_columns_needed_for_insert() +{ + DBUG_ENTER("mark_columns_needed_for_insert"); + + if (triggers) + { + /* + We don't need to mark columns which are used by ON DELETE and + ON UPDATE triggers, which may be invoked in case of REPLACE or + INSERT ... ON DUPLICATE KEY UPDATE, since before doing actual + row replacement or update write_record() will mark all table + fields as used. + */ + triggers->mark_fields_used(TRG_EVENT_INSERT); + } + if (found_next_number_field) + mark_auto_increment_column(); + if (default_field) + mark_default_fields_for_write(TRUE); + if (s->versioned) + { + bitmap_set_bit(write_set, s->vers.start_fieldno); + bitmap_set_bit(write_set, s->vers.end_fieldno); + bitmap_set_bit(read_set, s->vers.end_fieldno); + } + /* Mark virtual columns for insert */ + if (vfield) + mark_virtual_columns_for_write(TRUE); + mark_columns_per_binlog_row_image(); + if (check_constraints) + mark_check_constraint_columns_for_read(); + DBUG_VOID_RETURN; +} + +/* + Mark columns according the binlog row image option. + + Columns to be written are stored in 'rpl_write_set' + + When logging in RBR, the user can select whether to + log partial or full rows, depending on the table + definition, and the value of binlog_row_image. + + Semantics of the binlog_row_image are the following + (PKE - primary key equivalent, ie, PK fields if PK + exists, all fields otherwise): + + binlog_row_image= MINIMAL + - This marks the PKE fields in the read_set + - This marks all fields where a value was specified + in the rpl_write_set + + binlog_row_image= NOBLOB + - This marks PKE + all non-blob fields in the read_set + - This marks all fields where a value was specified + and all non-blob fields in the rpl_write_set + + binlog_row_image= FULL + - all columns in the read_set + - all columns in the rpl_write_set + + This marking is done without resetting the original + bitmaps. This means that we will strip extra fields in + the read_set at binlogging time (for those cases that + we only want to log a PK and we needed other fields for + execution). +*/ + +void TABLE::mark_columns_per_binlog_row_image() +{ + THD *thd= in_use; + DBUG_ENTER("mark_columns_per_binlog_row_image"); + DBUG_ASSERT(read_set->bitmap); + DBUG_ASSERT(write_set->bitmap); + + /* If not using row format */ + rpl_write_set= write_set; + + /** + If in RBR we may need to mark some extra columns, + depending on the binlog-row-image command line argument. + */ + if (file->row_logging && + !ha_check_storage_engine_flag(s->db_type(), HTON_NO_BINLOG_ROW_OPT)) + { + /* if there is no PK, then mark all columns for the BI. */ + if (s->primary_key >= MAX_KEY) + { + bitmap_set_all(read_set); + rpl_write_set= read_set; + } + else + { + switch (thd->variables.binlog_row_image) { + case BINLOG_ROW_IMAGE_FULL: + bitmap_set_all(read_set); + /* Set of columns that should be written (all) */ + rpl_write_set= read_set; + break; + case BINLOG_ROW_IMAGE_NOBLOB: + /* Only write changed columns + not blobs */ + rpl_write_set= &def_rpl_write_set; + bitmap_copy(rpl_write_set, write_set); + + /* + for every field that is not set, mark it unless it is a blob or + part of a primary key + */ + for (Field **ptr=field ; *ptr ; ptr++) + { + Field *my_field= *ptr; + /* + bypass blob fields. These can be set or not set, we don't care. + Later, at binlogging time, if we don't need them in the before + image, we will discard them. + + If set in the AI, then the blob is really needed, there is + nothing we can do about it. + */ + if ((my_field->flags & PRI_KEY_FLAG) || + (my_field->type() != MYSQL_TYPE_BLOB)) + { + my_field->register_field_in_read_map(); + bitmap_set_bit(rpl_write_set, my_field->field_index); + } + } + break; + case BINLOG_ROW_IMAGE_MINIMAL: + /* + mark the primary key in the read set so that we can find the row + that is updated / deleted. + We don't need to mark the primary key in the rpl_write_set as the + binary log will include all columns read anyway. + */ + mark_index_columns_for_read(s->primary_key); + if (versioned()) + { + // TODO: After MDEV-18432 we don't pass history rows, so remove this: + rpl_write_set= &s->all_set; + } + else + { + /* Only write columns that have changed */ + rpl_write_set= write_set; + } + break; + + default: + DBUG_ASSERT(FALSE); + } + } + file->column_bitmaps_signal(); + } + + DBUG_VOID_RETURN; +} + + +/* + @brief Mark virtual columns for update/insert commands + + @param insert_fl true if virtual columns are marked for insert command + For the moment this is not used, may be used in future. + + @details + The function marks virtual columns used in a update/insert commands + in the vcol_set bitmap. + For an insert command a virtual column is always marked in write_set if + it is a stored column. + If a virtual column is from write_set it is always marked in vcol_set. + If a stored virtual column is not from write_set but it is computed + through columns from write_set it is also marked in vcol_set, and, + besides, it is added to write_set. + + @return whether a bitmap was updated + + @note + Let table t1 have columns a,b,c and let column c be a stored virtual + column computed through columns a and b. Then for the query + UPDATE t1 SET a=1 + column c will be placed into vcol_set and into write_set while + column b will be placed into read_set. + If column c was a virtual column, but not a stored virtual column + then it would not be added to any of the sets. Column b would not + be added to read_set either. +*/ + +bool TABLE::mark_virtual_columns_for_write(bool insert_fl + __attribute__((unused))) +{ + Field **vfield_ptr, *tmp_vfield; + bool bitmap_updated= false; + DBUG_ENTER("mark_virtual_columns_for_write"); + + for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++) + { + tmp_vfield= *vfield_ptr; + if (bitmap_is_set(write_set, tmp_vfield->field_index)) + bitmap_updated|= mark_virtual_column_with_deps(tmp_vfield); + else if (tmp_vfield->vcol_info->stored_in_db || + (tmp_vfield->flags & (PART_KEY_FLAG | FIELD_IN_PART_FUNC_FLAG | + PART_INDIRECT_KEY_FLAG))) + { + bitmap_set_bit(write_set, tmp_vfield->field_index); + mark_virtual_column_with_deps(tmp_vfield); + bitmap_updated= true; + } + } + if (bitmap_updated) + file->column_bitmaps_signal(); + DBUG_RETURN(bitmap_updated); +} + + +/** + Check if a virtual not stored column field is in read set + + @retval FALSE No virtual not stored column is used + @retval TRUE At least one virtual not stored column is used +*/ + +bool TABLE::check_virtual_columns_marked_for_read() +{ + if (vfield) + { + Field **vfield_ptr; + for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++) + { + Field *tmp_vfield= *vfield_ptr; + if (bitmap_is_set(read_set, tmp_vfield->field_index) && + !tmp_vfield->vcol_info->stored_in_db) + return TRUE; + } + } + return FALSE; +} + + +/** + Check if a stored virtual column field is marked for write + + This can be used to check if any column that is part of a virtual + stored column is changed + + @retval FALSE No stored virtual column is used + @retval TRUE At least one stored virtual column is used +*/ + +bool TABLE::check_virtual_columns_marked_for_write() +{ + if (vfield) + { + Field **vfield_ptr; + for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++) + { + Field *tmp_vfield= *vfield_ptr; + if (bitmap_is_set(write_set, tmp_vfield->field_index) && + tmp_vfield->vcol_info->stored_in_db) + return TRUE; + } + } + return FALSE; +} + + +/* + Mark fields used by check constraints into s->check_set. + Mark all fields used in an expression that is part of an index + with PART_INDIRECT_KEY_FLAG + + This is done once for the TABLE_SHARE the first time the table is opened. + The marking must be done non-destructively to handle the case when + this could be run in parallely by two threads +*/ + +void TABLE::mark_columns_used_by_virtual_fields(void) +{ + MY_BITMAP *save_read_set; + Field **vfield_ptr; + TABLE_SHARE::enum_v_keys v_keys= TABLE_SHARE::NO_V_KEYS; + + /* If there is virtual fields are already initialized */ + if (s->check_set_initialized) + return; + + if (s->tmp_table == NO_TMP_TABLE) + mysql_mutex_lock(&s->LOCK_share); + if (s->check_set) + { + /* Mark fields used by check constraint */ + save_read_set= read_set; + read_set= s->check_set; + + for (Virtual_column_info **chk= check_constraints ; *chk ; chk++) + (*chk)->expr->walk(&Item::register_field_in_read_map, 1, 0); + read_set= save_read_set; + } + + /* + mark all fields that part of a virtual indexed field with + PART_INDIRECT_KEY_FLAG. This is used to ensure that all fields + that are part of an index exits before write/delete/update. + + As this code is only executed once per open share, it's reusing + existing functionality instead of adding an extra argument to + add_field_to_set_processor or adding another processor. + */ + if (vfield) + { + for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++) + { + if ((*vfield_ptr)->flags & PART_KEY_FLAG) + (*vfield_ptr)->vcol_info->expr->walk(&Item::add_field_to_set_processor, + 1, this); + } + for (uint i= 0 ; i < s->fields ; i++) + { + if (bitmap_is_set(&tmp_set, i)) + { + s->field[i]->flags|= PART_INDIRECT_KEY_FLAG; + v_keys= TABLE_SHARE::V_KEYS; + } + } + bitmap_clear_all(&tmp_set); + } + s->check_set_initialized= v_keys; + if (s->tmp_table == NO_TMP_TABLE) + mysql_mutex_unlock(&s->LOCK_share); +} + +/* Add fields used by CHECK CONSTRAINT to read map */ + +void TABLE::mark_check_constraint_columns_for_read(void) +{ + bitmap_union(read_set, s->check_set); +} + + +/** + Add all fields that have a default function to the table write set. +*/ + +void TABLE::mark_default_fields_for_write(bool is_insert) +{ + DBUG_ENTER("mark_default_fields_for_write"); + Field **field_ptr, *field; + for (field_ptr= default_field; *field_ptr; field_ptr++) + { + field= (*field_ptr); + if (is_insert && field->default_value) + { + bitmap_set_bit(write_set, field->field_index); + field->default_value->expr-> + walk(&Item::register_field_in_read_map, 1, 0); + } + else if (!is_insert && field->has_update_default_function()) + bitmap_set_bit(write_set, field->field_index); + } + DBUG_VOID_RETURN; +} + + +void TABLE::move_fields(Field **ptr, const uchar *to, const uchar *from) +{ + my_ptrdiff_t diff= to - from; + if (diff) + { + do + { + (*ptr)->move_field_offset(diff); + } while (*(++ptr)); + } +} + + +/* + Store all allocated virtual fields blob values + Used by InnoDB when calculating virtual fields for it's own internal + records +*/ + +void TABLE::remember_blob_values(String *blob_storage) +{ + Field **vfield_ptr; + for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++) + { + if ((*vfield_ptr)->type() == MYSQL_TYPE_BLOB && + !(*vfield_ptr)->vcol_info->stored_in_db) + { + Field_blob *blob= ((Field_blob*) *vfield_ptr); + memcpy((void*) blob_storage, (void*) &blob->value, sizeof(blob->value)); + blob_storage++; + blob->value.release(); + } + } +} + + +/* + Restore all allocated virtual fields blob values + Used by InnoDB when calculating virtual fields for it's own internal + records +*/ + +void TABLE::restore_blob_values(String *blob_storage) +{ + Field **vfield_ptr; + for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++) + { + if ((*vfield_ptr)->type() == MYSQL_TYPE_BLOB && + !(*vfield_ptr)->vcol_info->stored_in_db) + { + Field_blob *blob= ((Field_blob*) *vfield_ptr); + blob->value.free(); + memcpy((void*) &blob->value, (void*) blob_storage, sizeof(blob->value)); + blob_storage++; + } + } +} + + +/** + @brief + Allocate space for keys + + @param key_count number of keys to allocate additionally + + @details + The function allocates memory to fit additionally 'key_count' keys + for this table. + + @return FALSE space was successfully allocated + @return TRUE an error occur +*/ + +bool TABLE::alloc_keys(uint key_count) +{ + KEY *new_key_info; + key_part_map *new_const_key_parts; + DBUG_ASSERT(s->tmp_table == INTERNAL_TMP_TABLE); + + if (!multi_alloc_root(&mem_root, + &new_key_info, sizeof(*key_info)*(s->keys+key_count), + &new_const_key_parts, + sizeof(*new_const_key_parts)*(s->keys+key_count), + NullS)) + return TRUE; + if (s->keys) + { + memmove(new_key_info, s->key_info, sizeof(*key_info) * s->keys); + memmove(new_const_key_parts, const_key_parts, + s->keys * sizeof(const_key_parts)); + } + s->key_info= key_info= new_key_info; + const_key_parts= new_const_key_parts; + bzero((char*) (const_key_parts + s->keys), + sizeof(*const_key_parts) * key_count); + max_keys= s->keys+key_count; + return FALSE; +} + + +/** + @brief + Populate a KEY_PART_INFO structure with the data related to a field entry. + + @param key_part_info The structure to fill. + @param field The field entry that represents the key part. + @param fleldnr The number of the field, count starting from 1. + + TODO: This method does not make use of any table specific fields. It + could be refactored to act as a constructor for KEY_PART_INFO instead. +*/ + +void TABLE::create_key_part_by_field(KEY_PART_INFO *key_part_info, + Field *field, uint fieldnr) +{ + DBUG_ASSERT(field->field_index + 1 == (int)fieldnr); + key_part_info->null_bit= field->null_bit; + key_part_info->null_offset= (uint) (field->null_ptr - + (uchar*) record[0]); + key_part_info->field= field; + key_part_info->fieldnr= fieldnr; + key_part_info->offset= field->offset(record[0]); + /* + field->key_length() accounts for the raw length of the field, excluding + any metadata such as length of field or the NULL flag. + */ + key_part_info->length= (uint16) field->key_length(); + key_part_info->key_part_flag= 0; + /* TODO: + The below method of computing the key format length of the + key part is a copy/paste from opt_range.cc, and table.cc. + This should be factored out, e.g. as a method of Field. + In addition it is not clear if any of the Field::*_length + methods is supposed to compute the same length. If so, it + might be reused. + */ + key_part_info->store_length= key_part_info->length; + /* + For BIT fields null_bit is not set to 0 even if the field is defined + as NOT NULL, look at Field_bit::Field_bit + */ + if (!field->real_maybe_null()) + { + key_part_info->null_bit= 0; + } + + /* + The total store length of the key part is the raw length of the field + + any metadata information, such as its length for strings and/or the null + flag. + */ + if (field->real_maybe_null()) + { + key_part_info->store_length+= HA_KEY_NULL_LENGTH; + } + + key_part_info->key_part_flag|= field->key_part_flag(); + key_part_info->store_length+= field->key_part_length_bytes(); + + key_part_info->type= (uint8) field->key_type(); + key_part_info->key_type = + ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; +} + + +/** + @brief + Check validity of a possible key for the derived table + + @param key the number of the key + @param key_parts number of components of the key + @param next_field_no the call-back function that returns the number of + the field used as the next component of the key + @param arg the argument for the above function + + @details + The function checks whether a possible key satisfies the constraints + imposed on the keys of any temporary table. + + We need to filter out BLOB columns here, because ref access optimizer creates + KEYUSE objects for equalities for non-key columns for two puproses: + 1. To discover possible keys for derived_with_keys optimization + 2. To do hash joins + For the purpose of #1, KEYUSE objects are not created for "blob_column=..." . + However, they might be created for #2. In order to catch that case, we filter + them out here. + + @return TRUE if the key is valid + @return FALSE otherwise +*/ + +bool TABLE::check_tmp_key(uint key, uint key_parts, + uint (*next_field_no) (uchar *), uchar *arg) +{ + Field **reg_field; + uint i; + uint key_len= 0; + + for (i= 0; i < key_parts; i++) + { + uint fld_idx= next_field_no(arg); + reg_field= field + fld_idx; + if ((*reg_field)->type() == MYSQL_TYPE_BLOB) + return FALSE; + uint fld_store_len= (uint16) (*reg_field)->key_length(); + if ((*reg_field)->real_maybe_null()) + fld_store_len+= HA_KEY_NULL_LENGTH; + if ((*reg_field)->real_type() == MYSQL_TYPE_VARCHAR || + (*reg_field)->type() == MYSQL_TYPE_GEOMETRY) + fld_store_len+= HA_KEY_BLOB_LENGTH; + key_len+= fld_store_len; + } + /* + We use MI_MAX_KEY_LENGTH (myisam's default) below because it is + smaller than MAX_KEY_LENGTH (heap's default) and it's unknown whether + myisam or heap will be used for the temporary table. + */ + return key_len <= MI_MAX_KEY_LENGTH; +} + +/** + @brief + Add one key to a temporary table + + @param key the number of the key + @param key_parts number of components of the key + @param next_field_no the call-back function that returns the number of + the field used as the next component of the key + @param arg the argument for the above function + @param unique TRUE <=> it is a unique index + + @details + The function adds a new key to the table that is assumed to be a temporary + table. At each its invocation the call-back function must return + the number of the field that is used as the next component of this key. + + @return FALSE is a success + @return TRUE if a failure + +*/ + +bool TABLE::add_tmp_key(uint key, uint key_parts, + uint (*next_field_no) (uchar *), uchar *arg, + bool unique) +{ + DBUG_ASSERT(key < max_keys); + + char buf[NAME_CHAR_LEN]; + KEY* keyinfo; + Field **reg_field; + uint i; + + bool key_start= TRUE; + KEY_PART_INFO* key_part_info= + (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts); + if (!key_part_info) + return TRUE; + keyinfo= key_info + key; + keyinfo->key_part= key_part_info; + keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts; + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->key_length=0; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->flags= HA_GENERATED_KEY; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->is_statistics_from_stat_tables= FALSE; + if (unique) + keyinfo->flags|= HA_NOSAME; + sprintf(buf, "key%i", key); + keyinfo->name.length= strlen(buf); + if (!(keyinfo->name.str= strmake_root(&mem_root, buf, keyinfo->name.length))) + return TRUE; + keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root, + sizeof(ulong)*key_parts); + if (!keyinfo->rec_per_key) + return TRUE; + bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts); + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; + + for (i= 0; i < key_parts; i++) + { + uint fld_idx= next_field_no(arg); + reg_field= field + fld_idx; + if (key_start) + (*reg_field)->key_start.set_bit(key); + (*reg_field)->part_of_key.set_bit(key); + create_key_part_by_field(key_part_info, *reg_field, fld_idx+1); + keyinfo->key_length += key_part_info->store_length; + (*reg_field)->flags|= PART_KEY_FLAG; + key_start= FALSE; + key_part_info++; + } + + /* + For the case when there is a derived table that would give distinct rows, + the index statistics are passed to the join optimizer to tell that a ref + access to all the fields of the derived table will produce only one row. + */ + + st_select_lex_unit* derived= pos_in_table_list ? + pos_in_table_list->derived: NULL; + if (derived) + { + st_select_lex* first= derived->first_select(); + uint select_list_items= first->get_item_list()->elements; + if (key_parts == select_list_items) + { + if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) || + derived->check_distinct_in_union()) + keyinfo->rec_per_key[key_parts - 1]= 1; + } + } + + set_if_bigger(s->max_key_length, keyinfo->key_length); + s->keys++; + return FALSE; +} + +/* + @brief + Drop all indexes except specified one. + + @param key_to_save the key to save + + @details + Drop all indexes on this table except 'key_to_save'. The saved key becomes + key #0. Memory occupied by key parts of dropped keys are freed. + If the 'key_to_save' is negative then all keys are freed. +*/ + +void TABLE::use_index(int key_to_save) +{ + uint i= 1; + DBUG_ASSERT(!created && key_to_save < (int)s->keys); + if (key_to_save >= 0) + /* Save the given key. */ + memmove(key_info, key_info + key_to_save, sizeof(KEY)); + else + /* Drop all keys; */ + i= 0; + + s->keys= i; +} + +/* + Return TRUE if the table is filled at execution phase + + (and so, the optimizer must not do anything that depends on the contents of + the table, like range analysis or constant table detection) +*/ + +bool TABLE::is_filled_at_execution() +{ + /* + pos_in_table_list == NULL for internal temporary tables because they + do not have a corresponding table reference. Such tables are filled + during execution. + */ + return MY_TEST(!pos_in_table_list || + pos_in_table_list->jtbm_subselect || + pos_in_table_list->is_active_sjm() || + pos_in_table_list->table_function); +} + + +/** + @brief + Get actual number of key components + + @param keyinfo + + @details + The function calculates actual number of key components, possibly including + components of extended keys, taken into consideration by the optimizer for the + key described by the parameter keyinfo. + + @return number of considered key components +*/ + +uint TABLE::actual_n_key_parts(KEY *keyinfo) +{ + return optimizer_flag(in_use, OPTIMIZER_SWITCH_EXTENDED_KEYS) ? + keyinfo->ext_key_parts : keyinfo->user_defined_key_parts; +} + + +/** + @brief + Get actual key flags for a table key + + @param keyinfo + + @details + The function finds out actual key flags taken into consideration by the + optimizer for the key described by the parameter keyinfo. + + @return actual key flags +*/ + +ulong TABLE::actual_key_flags(KEY *keyinfo) +{ + return optimizer_flag(in_use, OPTIMIZER_SWITCH_EXTENDED_KEYS) ? + keyinfo->ext_key_flags : keyinfo->flags; +} + + +/* + Cleanup this table for re-execution. + + SYNOPSIS + TABLE_LIST::reinit_before_use() +*/ + +void TABLE_LIST::reinit_before_use(THD *thd) +{ + /* + Reset old pointers to TABLEs: they are not valid since the tables + were closed in the end of previous prepare or execute call. + */ + table= 0; + /* Reset is_schema_table_processed value(needed for I_S tables */ + schema_table_state= NOT_PROCESSED; + + TABLE_LIST *embedded; /* The table at the current level of nesting. */ + TABLE_LIST *parent_embedding= this; /* The parent nested table reference. */ + do + { + embedded= parent_embedding; + if (embedded->prep_on_expr) + embedded->on_expr= embedded->prep_on_expr->copy_andor_structure(thd); + parent_embedding= embedded->embedding; + } + while (parent_embedding && + parent_embedding->nested_join->join_list.head() == embedded); + + mdl_request.ticket= NULL; +} + + +/* + Return subselect that contains the FROM list this table is taken from + + SYNOPSIS + TABLE_LIST::containing_subselect() + + RETURN + Subselect item for the subquery that contains the FROM list + this table is taken from if there is any + 0 - otherwise + +*/ + +Item_subselect *TABLE_LIST::containing_subselect() +{ + return (select_lex ? select_lex->master_unit()->item : 0); +} + +/* + Compiles the tagged hints list and fills up the bitmasks. + + SYNOPSIS + process_index_hints() + table the TABLE to operate on. + + DESCRIPTION + The parser collects the index hints for each table in a "tagged list" + (TABLE_LIST::index_hints). Using the information in this tagged list + this function sets the members TABLE::keys_in_use_for_query, + TABLE::keys_in_use_for_group_by, TABLE::keys_in_use_for_order_by, + TABLE::force_index, TABLE::force_index_order, + TABLE::force_index_group and TABLE::covering_keys. + + Current implementation of the runtime does not allow mixing FORCE INDEX + and USE INDEX, so this is checked here. Then the FORCE INDEX list + (if non-empty) is appended to the USE INDEX list and a flag is set. + + Multiple hints of the same kind are processed so that each clause + is applied to what is computed in the previous clause. + For example: + USE INDEX (i1) USE INDEX (i2) + is equivalent to + USE INDEX (i1,i2) + and means "consider only i1 and i2". + + Similarly + USE INDEX () USE INDEX (i1) + is equivalent to + USE INDEX (i1) + and means "consider only the index i1" + + It is OK to have the same index several times, e.g. "USE INDEX (i1,i1)" is + not an error. + + Different kind of hints (USE/FORCE/IGNORE) are processed in the following + order: + 1. All indexes in USE (or FORCE) INDEX are added to the mask. + 2. All IGNORE INDEX + + e.g. "USE INDEX i1, IGNORE INDEX i1, USE INDEX i1" will not use i1 at all + as if we had "USE INDEX i1, USE INDEX i1, IGNORE INDEX i1". + + As an optimization if there is a covering index, and we have + IGNORE INDEX FOR GROUP/ORDER, and this index is used for the JOIN part, + then we have to ignore the IGNORE INDEX FROM GROUP/ORDER. + + RETURN VALUE + FALSE no errors found + TRUE found and reported an error. +*/ +bool TABLE_LIST::process_index_hints(TABLE *tbl) +{ + /* initialize the result variables */ + tbl->keys_in_use_for_query= tbl->keys_in_use_for_group_by= + tbl->keys_in_use_for_order_by= tbl->s->usable_indexes(tbl->in_use); + + /* index hint list processing */ + if (index_hints) + { + key_map index_join[INDEX_HINT_FORCE + 1]; + key_map index_order[INDEX_HINT_FORCE + 1]; + key_map index_group[INDEX_HINT_FORCE + 1]; + Index_hint *hint; + int type; + bool have_empty_use_join= FALSE, have_empty_use_order= FALSE, + have_empty_use_group= FALSE; + List_iterator iter(*index_hints); + + /* initialize temporary variables used to collect hints of each kind */ + for (type= INDEX_HINT_IGNORE; type <= INDEX_HINT_FORCE; type++) + { + index_join[type].clear_all(); + index_order[type].clear_all(); + index_group[type].clear_all(); + } + + /* iterate over the hints list */ + while ((hint= iter++)) + { + uint pos; + + /* process empty USE INDEX () */ + if (hint->type == INDEX_HINT_USE && !hint->key_name.str) + { + if (hint->clause & INDEX_HINT_MASK_JOIN) + { + index_join[hint->type].clear_all(); + have_empty_use_join= TRUE; + } + if (hint->clause & INDEX_HINT_MASK_ORDER) + { + index_order[hint->type].clear_all(); + have_empty_use_order= TRUE; + } + if (hint->clause & INDEX_HINT_MASK_GROUP) + { + index_group[hint->type].clear_all(); + have_empty_use_group= TRUE; + } + continue; + } + + /* + Check if an index with the given name exists and get his offset in + the keys bitmask for the table + */ + if (tbl->s->keynames.type_names == 0 || + (pos= find_type(&tbl->s->keynames, hint->key_name.str, + hint->key_name.length, 1)) <= 0 || + (tbl->s->key_info[pos - 1].is_ignored)) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), hint->key_name.str, alias.str); + return 1; + } + + pos--; + + /* add to the appropriate clause mask */ + if (hint->clause & INDEX_HINT_MASK_JOIN) + index_join[hint->type].set_bit (pos); + if (hint->clause & INDEX_HINT_MASK_ORDER) + index_order[hint->type].set_bit (pos); + if (hint->clause & INDEX_HINT_MASK_GROUP) + index_group[hint->type].set_bit (pos); + } + + /* cannot mix USE INDEX and FORCE INDEX */ + if ((!index_join[INDEX_HINT_FORCE].is_clear_all() || + !index_order[INDEX_HINT_FORCE].is_clear_all() || + !index_group[INDEX_HINT_FORCE].is_clear_all()) && + (!index_join[INDEX_HINT_USE].is_clear_all() || have_empty_use_join || + !index_order[INDEX_HINT_USE].is_clear_all() || have_empty_use_order || + !index_group[INDEX_HINT_USE].is_clear_all() || have_empty_use_group)) + { + my_error(ER_WRONG_USAGE, MYF(0), index_hint_type_name[INDEX_HINT_USE], + index_hint_type_name[INDEX_HINT_FORCE]); + return 1; + } + + /* process FORCE INDEX as USE INDEX with a flag */ + if (!index_order[INDEX_HINT_FORCE].is_clear_all()) + { + tbl->force_index_order= TRUE; + index_order[INDEX_HINT_USE].merge(index_order[INDEX_HINT_FORCE]); + } + + if (!index_group[INDEX_HINT_FORCE].is_clear_all()) + { + tbl->force_index_group= TRUE; + index_group[INDEX_HINT_USE].merge(index_group[INDEX_HINT_FORCE]); + } + + /* + TODO: get rid of tbl->force_index (on if any FORCE INDEX is specified) + and create tbl->force_index_join instead. + Then use the correct force_index_XX instead of the global one. + */ + if (!index_join[INDEX_HINT_FORCE].is_clear_all() || + tbl->force_index_group || tbl->force_index_order) + { + tbl->force_index= TRUE; + index_join[INDEX_HINT_USE].merge(index_join[INDEX_HINT_FORCE]); + } + + /* apply USE INDEX */ + if (!index_join[INDEX_HINT_USE].is_clear_all() || have_empty_use_join) + tbl->keys_in_use_for_query.intersect(index_join[INDEX_HINT_USE]); + if (!index_order[INDEX_HINT_USE].is_clear_all() || have_empty_use_order) + tbl->keys_in_use_for_order_by.intersect (index_order[INDEX_HINT_USE]); + if (!index_group[INDEX_HINT_USE].is_clear_all() || have_empty_use_group) + tbl->keys_in_use_for_group_by.intersect (index_group[INDEX_HINT_USE]); + + /* apply IGNORE INDEX */ + tbl->keys_in_use_for_query.subtract (index_join[INDEX_HINT_IGNORE]); + tbl->keys_in_use_for_order_by.subtract (index_order[INDEX_HINT_IGNORE]); + tbl->keys_in_use_for_group_by.subtract (index_group[INDEX_HINT_IGNORE]); + } + + /* make sure covering_keys don't include indexes disabled with a hint */ + tbl->covering_keys.intersect(tbl->keys_in_use_for_query); + return 0; +} + + +size_t max_row_length(TABLE *table, MY_BITMAP const *cols, const uchar *data) +{ + TABLE_SHARE *table_s= table->s; + size_t length= table_s->reclength + 2 * table_s->fields; + uint *const beg= table_s->blob_field; + uint *const end= beg + table_s->blob_fields; + my_ptrdiff_t const rec_offset= (my_ptrdiff_t) (data - table->record[0]); + DBUG_ENTER("max_row_length"); + + for (uint *ptr= beg ; ptr != end ; ++ptr) + { + Field * const field= table->field[*ptr]; + if (bitmap_is_set(cols, field->field_index) && + !field->is_null(rec_offset)) + { + Field_blob * const blob= (Field_blob*) field; + length+= blob->get_length(rec_offset) + 8; /* max blob store length */ + } + } + DBUG_PRINT("exit", ("length: %lld", (longlong) length)); + DBUG_RETURN(length); +} + + +/** + Helper function which allows to allocate metadata lock request + objects for all elements of table list. +*/ + +void init_mdl_requests(TABLE_LIST *table_list) +{ + for ( ; table_list ; table_list= table_list->next_global) + MDL_REQUEST_INIT(&table_list->mdl_request, MDL_key::TABLE, + table_list->db.str, table_list->table_name.str, + table_list->lock_type >= TL_FIRST_WRITE + ? MDL_SHARED_WRITE : MDL_SHARED_READ, MDL_TRANSACTION); +} + + +/** + Update TABLE::const_key_parts for single table UPDATE/DELETE query + + @param conds WHERE clause expression + + @retval TRUE error (OOM) + @retval FALSE success + + @note + Set const_key_parts bits if key fields are equal to constants in + the WHERE expression. +*/ + +bool TABLE::update_const_key_parts(COND *conds) +{ + bzero((char*) const_key_parts, sizeof(key_part_map) * s->keys); + + if (conds == NULL) + return FALSE; + + for (uint index= 0; index < s->keys; index++) + { + KEY_PART_INFO *keyinfo= key_info[index].key_part; + KEY_PART_INFO *keyinfo_end= keyinfo + key_info[index].user_defined_key_parts; + + for (key_part_map part_map= (key_part_map)1; + keyinfo < keyinfo_end; + keyinfo++, part_map<<= 1) + { + if (const_expression_in_where(conds, NULL, keyinfo->field)) + const_key_parts[index]|= part_map; + } + } + return FALSE; +} + +/** + Test if the order list consists of simple field expressions + + @param order Linked list of ORDER BY arguments + + @return TRUE if @a order is empty or consist of simple field expressions +*/ + +bool is_simple_order(ORDER *order) +{ + for (ORDER *ord= order; ord; ord= ord->next) + { + if (ord->item[0]->real_item()->type() != Item::FIELD_ITEM) + return FALSE; + } + return TRUE; +} + +/* + to satisfy marked_for_write_or_computed() Field's assert we temporarily + mark field for write before storing the generated value in it +*/ +#ifdef DBUG_ASSERT_EXISTS +#define DBUG_FIX_WRITE_SET(f) bool _write_set_fixed= !bitmap_fast_test_and_set(write_set, (f)->field_index) +#define DBUG_RESTORE_WRITE_SET(f) if (_write_set_fixed) bitmap_clear_bit(write_set, (f)->field_index) +#else +#define DBUG_FIX_WRITE_SET(f) +#define DBUG_RESTORE_WRITE_SET(f) +#endif + + +/* + @brief Compute values for virtual columns used in query + + @param update_mode Specifies what virtual column are computed + + @details + The function computes the values of the virtual columns of the table and + stores them in the table record buffer. + This will be done even if is_error() is set either when function was called + or by calculating the virtual function, as most calls to this + function doesn't check the result. We also want to ensure that as many + fields as possible has the right value so that we can optionally + return the partly-faulty-row from a storage engine with a virtual + field that gives an error on storage for an existing row. + + @todo + Ensure that all caller checks the value of this function and + either properly ignores it (and resets the error) or sends the + error forward to the caller. + + @retval + 0 Success + @retval + >0 Error occurred when storing a virtual field value or potentially + is_error() was set when function was called. +*/ + +int TABLE::update_virtual_fields(handler *h, enum_vcol_update_mode update_mode) +{ + DBUG_ENTER("TABLE::update_virtual_fields"); + DBUG_PRINT("enter", ("update_mode: %d is_error: %d", update_mode, + in_use->is_error())); + Field **vfield_ptr, *vf; + Query_arena backup_arena; + Turn_errors_to_warnings_handler Suppress_errors; + bool handler_pushed= 0, update_all_columns= 1; + DBUG_ASSERT(vfield); + + if (h->keyread_enabled()) + DBUG_RETURN(0); + /* + TODO: this imposes memory leak until table flush when save_in_field() + does expr_arena allocation. F.ex. case in + gcol.gcol_supported_sql_funcs_innodb (see CONVERT_TZ): + + create table t1 ( + a datetime, b datetime generated always as + (convert_tz(a, 'MET', 'UTC')) virtual); + insert into t1 values ('2008-08-31', default); + */ + in_use->set_n_backup_active_arena(expr_arena, &backup_arena); + + /* When reading or deleting row, ignore errors from virtual columns */ + if (update_mode == VCOL_UPDATE_FOR_READ || + update_mode == VCOL_UPDATE_FOR_DELETE || + update_mode == VCOL_UPDATE_INDEXED) + { + in_use->push_internal_handler(&Suppress_errors); + handler_pushed= 1; + } + else if (update_mode == VCOL_UPDATE_FOR_REPLACE && + in_use->is_current_stmt_binlog_format_row() && + in_use->variables.binlog_row_image != BINLOG_ROW_IMAGE_MINIMAL) + { + /* + If we are doing a replace with not minimal binary logging, we have to + calculate all virtual columns. + */ + update_all_columns= 1; + } + + /* Iterate over virtual fields in the table */ + for (vfield_ptr= vfield; *vfield_ptr ; vfield_ptr++) + { + vf= (*vfield_ptr); + Virtual_column_info *vcol_info= vf->vcol_info; + DBUG_ASSERT(vcol_info); + DBUG_ASSERT(vcol_info->expr); + + bool update= 0, swap_values= 0; + switch (update_mode) { + case VCOL_UPDATE_FOR_READ: + update= (!vcol_info->stored_in_db && + bitmap_is_set(read_set, vf->field_index)); + swap_values= 1; + break; + case VCOL_UPDATE_FOR_DELETE: + case VCOL_UPDATE_FOR_WRITE: + update= bitmap_is_set(read_set, vf->field_index); + break; + case VCOL_UPDATE_FOR_REPLACE: + update= ((!vcol_info->stored_in_db && + (vf->flags & (PART_KEY_FLAG | PART_INDIRECT_KEY_FLAG)) && + bitmap_is_set(read_set, vf->field_index)) || + update_all_columns); + if (update && (vf->flags & BLOB_FLAG)) + { + /* + The row has been read into record[1] and Field_blob::value + contains the value for record[0]. Swap value and read_value + to ensure that the virtual column data for the read row will + be in read_value at the end of this function + */ + ((Field_blob*) vf)->swap_value_and_read_value(); + /* Ensure we call swap_value_and_read_value() after update */ + swap_values= 1; + } + break; + case VCOL_UPDATE_INDEXED: + case VCOL_UPDATE_INDEXED_FOR_UPDATE: + /* Read indexed fields that was not updated in VCOL_UPDATE_FOR_READ */ + update= (!vcol_info->stored_in_db && + (vf->flags & (PART_KEY_FLAG | PART_INDIRECT_KEY_FLAG)) && + !bitmap_is_set(read_set, vf->field_index)); + swap_values= 1; + break; + } + + if (update) + { + /* Compute the actual value of the virtual fields */ + DBUG_FIX_WRITE_SET(vf); +# ifdef DBUG_TRACE + int field_error= +# endif + vcol_info->expr->save_in_field(vf, 0); + DBUG_RESTORE_WRITE_SET(vf); + DBUG_PRINT("info", ("field '%s' - updated error: %d", + vf->field_name.str, field_error)); + if (swap_values && (vf->flags & BLOB_FLAG)) + { + /* + Remember the read value to allow other update_virtual_field() calls + for the same blob field for the row to be updated. + Field_blob->read_value always contains the virtual column data for + any read row. + */ + ((Field_blob*) vf)->swap_value_and_read_value(); + } + } + else + { + DBUG_PRINT("info", ("field '%s' - skipped", vf->field_name.str)); + } + } + if (handler_pushed) + in_use->pop_internal_handler(); + in_use->restore_active_arena(expr_arena, &backup_arena); + + /* Return 1 only of we got a fatal error, not a warning */ + DBUG_RETURN(in_use->is_error()); +} + +/* + Calculate the virtual field value for a specified field. + @param vf A field to calculate + @param ignore_warnings Ignore the warnings and also make the + calculations permissive. This usually means + that a calculation is internal and is not + expected to fail. +*/ +int TABLE::update_virtual_field(Field *vf, bool ignore_warnings) +{ + DBUG_ENTER("TABLE::update_virtual_field"); + Query_arena backup_arena; + Counting_error_handler count_errors; + Suppress_warnings_error_handler warning_handler; + in_use->push_internal_handler(&count_errors); + bool abort_on_warning= ignore_warnings; + if (ignore_warnings) + { + abort_on_warning= in_use->abort_on_warning; + in_use->abort_on_warning= false; + in_use->push_internal_handler(&warning_handler); + } + /* + TODO: this may impose memory leak until table flush. + See comment in + TABLE::update_virtual_fields(handler *, enum_vcol_update_mode). + */ + in_use->set_n_backup_active_arena(expr_arena, &backup_arena); + bitmap_clear_all(&tmp_set); + vf->vcol_info->expr->walk(&Item::update_vcol_processor, 0, &tmp_set); + DBUG_FIX_WRITE_SET(vf); + vf->vcol_info->expr->save_in_field(vf, 0); + DBUG_RESTORE_WRITE_SET(vf); + in_use->restore_active_arena(expr_arena, &backup_arena); + in_use->pop_internal_handler(); + if (ignore_warnings) + { + in_use->abort_on_warning= abort_on_warning; + in_use->pop_internal_handler(); + // This is an internal calculation, we expect it to always succeed + DBUG_ASSERT(count_errors.errors == 0); + } + DBUG_RETURN(count_errors.errors); +} + + +/** + Update all DEFAULT and/or ON INSERT fields. + + @details + Compute and set the default value of all fields with a default function. + There are two kinds of default functions - one is used for INSERT-like + operations, the other for UPDATE-like operations. Depending on the field + definition and the current operation one or the other kind of update + function is evaluated. + + @param update_command True if command was an update else insert + @param ignore_errors True if we should ignore errors + + @retval + 0 Success + @retval + >0 Error occurred when storing a virtual field value and + ignore_errors == 0. If set then an error was generated. +*/ + +int TABLE::update_default_fields(bool ignore_errors) +{ + Query_arena backup_arena; + Field **field_ptr; + int res= 0; + DBUG_ENTER("TABLE::update_default_fields"); + DBUG_ASSERT(default_field); + + /* + TODO: this may impose memory leak until table flush. + See comment in + TABLE::update_virtual_fields(handler *, enum_vcol_update_mode). + */ + in_use->set_n_backup_active_arena(expr_arena, &backup_arena); + + /* Iterate over fields with default functions in the table */ + for (field_ptr= default_field; *field_ptr ; field_ptr++) + { + Field *field= (*field_ptr); + /* + If an explicit default value for a field overrides the default, + do not update the field with its automatic default value. + */ + if (!field->has_explicit_value()) + { + if (field->default_value && + (field->default_value->flags || field->flags & BLOB_FLAG)) + res|= (field->default_value->expr->save_in_field(field, 0) < 0); + if (!ignore_errors && res) + { + my_error(ER_CALCULATING_DEFAULT_VALUE, MYF(0), field->field_name.str); + break; + } + res= 0; + } + } + in_use->restore_active_arena(expr_arena, &backup_arena); + DBUG_RETURN(res); +} + +int TABLE::update_generated_fields() +{ + int res= 0; + if (found_next_number_field) + { + next_number_field= found_next_number_field; + res= found_next_number_field->set_default(); + if (likely(!res)) + res= file->update_auto_increment(); + next_number_field= NULL; + } + + if (likely(!res) && vfield) + res= update_virtual_fields(file, VCOL_UPDATE_FOR_WRITE); + if (likely(!res) && versioned()) + vers_update_fields(); + if (likely(!res)) + res= verify_constraints(false) == VIEW_CHECK_ERROR; + return res; +} + +int TABLE::period_make_insert(Item *src, Field *dst) +{ + THD *thd= in_use; + + ulonglong prev_insert_id= file->next_insert_id; + store_record(this, record[1]); + int res= src->save_in_field(dst, true); + + if (likely(!res)) + res= update_generated_fields(); + + if (likely(!res) && triggers) + res= triggers->process_triggers(thd, TRG_EVENT_INSERT, + TRG_ACTION_BEFORE, true); + + if (likely(!res)) + res = file->ha_write_row(record[0]); + + if (likely(!res) && triggers) + res= triggers->process_triggers(thd, TRG_EVENT_INSERT, + TRG_ACTION_AFTER, true); + + restore_record(this, record[1]); + if (res) + file->restore_auto_increment(prev_insert_id); + return res; +} + +int TABLE::insert_portion_of_time(THD *thd, + const vers_select_conds_t &period_conds, + ha_rows *rows_inserted) +{ + bool lcond= period_conds.field_start->val_datetime_packed(thd) + < period_conds.start.item->val_datetime_packed(thd); + bool rcond= period_conds.field_end->val_datetime_packed(thd) + > period_conds.end.item->val_datetime_packed(thd); + + int res= 0; + if (lcond) + { + res= period_make_insert(period_conds.start.item, + field[s->period.end_fieldno]); + if (likely(!res)) + ++*rows_inserted; + } + if (likely(!res) && rcond) + { + res= period_make_insert(period_conds.end.item, + field[s->period.start_fieldno]); + if (likely(!res)) + ++*rows_inserted; + } + + return res; +} + +void TABLE::evaluate_update_default_function() +{ + DBUG_ENTER("TABLE::evaluate_update_default_function"); + + if (s->has_update_default_function) + for (Field **field_ptr= default_field; *field_ptr ; field_ptr++) + { + Field *field= (*field_ptr); + if (!field->has_explicit_value() && field->has_update_default_function()) + field->set_time(); + } + DBUG_VOID_RETURN; +} + +/** + Compare two records by a specific key (that has WITHOUT OVERLAPS clause) + + @return true, key values are equal and periods overlap + false, either key values differ or periods don't overlap + */ +bool TABLE::check_period_overlaps(const KEY &key, + const uchar *lhs, const uchar *rhs) +{ + DBUG_ASSERT(key.without_overlaps); + uint base_part_nr= key.user_defined_key_parts - 2; + for (uint part_nr= 0; part_nr < base_part_nr; part_nr++) + { + Field *f= key.key_part[part_nr].field; + if (key.key_part[part_nr].null_bit) + if (f->is_null_in_record(lhs) || f->is_null_in_record(rhs)) + return false; + uint kp_len= key.key_part[part_nr].length; + if (f->cmp_prefix(f->ptr_in_record(lhs), f->ptr_in_record(rhs), + kp_len / f->charset()->mbmaxlen) != 0) + return false; + } + + uint period_start= key.user_defined_key_parts - 1; + uint period_end= key.user_defined_key_parts - 2; + const Field *fs= key.key_part[period_start].field; + const Field *fe= key.key_part[period_end].field; + + if (fs->cmp(fe->ptr_in_record(lhs), fs->ptr_in_record(rhs)) <= 0) + return false; + if (fs->cmp(fs->ptr_in_record(lhs), fe->ptr_in_record(rhs)) >= 0) + return false; + return true; +} + +/* returns true if vers_end_field was updated */ +bool TABLE::vers_update_fields() +{ + bool res= false; + if (versioned(VERS_TIMESTAMP) && !vers_start_field()->has_explicit_value()) + { + if (vers_start_field()->set_time()) + DBUG_ASSERT(0); + } + + if (!versioned(VERS_TIMESTAMP) || !vers_end_field()->has_explicit_value()) + { + vers_end_field()->set_max(); + res= true; + } + + if (vfield) + update_virtual_fields(file, VCOL_UPDATE_FOR_READ); + return res; +} + + +void TABLE::vers_update_end() +{ + if (vers_end_field()->store_timestamp(in_use->query_start(), + in_use->query_start_sec_part())) + DBUG_ASSERT(0); +} + +/** + Reset markers that fields are being updated +*/ + +void TABLE::reset_default_fields() +{ + DBUG_ENTER("reset_default_fields"); + bitmap_clear_all(&has_value_set); + DBUG_VOID_RETURN; +} + +/* + Prepare triggers for INSERT-like statement. + + SYNOPSIS + prepare_triggers_for_insert_stmt_or_event() + + NOTE + Prepare triggers for INSERT-like statement by marking fields + used by triggers and inform handlers that batching of UPDATE/DELETE + cannot be done if there are BEFORE UPDATE/DELETE triggers. +*/ + +void TABLE::prepare_triggers_for_insert_stmt_or_event() +{ + if (triggers) + { + if (triggers->has_triggers(TRG_EVENT_DELETE, + TRG_ACTION_AFTER)) + { + /* + The table has AFTER DELETE triggers that might access to + subject table and therefore might need delete to be done + immediately. So we turn-off the batching. + */ + (void) file->extra(HA_EXTRA_DELETE_CANNOT_BATCH); + } + if (triggers->has_triggers(TRG_EVENT_UPDATE, + TRG_ACTION_AFTER)) + { + /* + The table has AFTER UPDATE triggers that might access to subject + table and therefore might need update to be done immediately. + So we turn-off the batching. + */ + (void) file->extra(HA_EXTRA_UPDATE_CANNOT_BATCH); + } + } +} + + +bool TABLE::prepare_triggers_for_delete_stmt_or_event() +{ + if (triggers && + triggers->has_triggers(TRG_EVENT_DELETE, + TRG_ACTION_AFTER)) + { + /* + The table has AFTER DELETE triggers that might access to subject table + and therefore might need delete to be done immediately. So we turn-off + the batching. + */ + (void) file->extra(HA_EXTRA_DELETE_CANNOT_BATCH); + return TRUE; + } + return FALSE; +} + + +bool TABLE::prepare_triggers_for_update_stmt_or_event() +{ + if (triggers && + triggers->has_triggers(TRG_EVENT_UPDATE, + TRG_ACTION_AFTER)) + { + /* + The table has AFTER UPDATE triggers that might access to subject + table and therefore might need update to be done immediately. + So we turn-off the batching. + */ + (void) file->extra(HA_EXTRA_UPDATE_CANNOT_BATCH); + return TRUE; + } + return FALSE; +} + + +/** + Validates default value of fields which are not specified in + the column list of INSERT/LOAD statement. + + @Note s->default_values should be properly populated + before calling this function. + + @param thd thread context + @param record the record to check values in + + @return + @retval false Success. + @retval true Failure. +*/ + +bool TABLE::validate_default_values_of_unset_fields(THD *thd) const +{ + DBUG_ENTER("TABLE::validate_default_values_of_unset_fields"); + for (Field **fld= field; *fld; fld++) + { + if (!bitmap_is_set(write_set, (*fld)->field_index) && + !((*fld)->flags & (NO_DEFAULT_VALUE_FLAG | VERS_SYSTEM_FIELD))) + { + if (!(*fld)->is_null_in_record(s->default_values) && + (*fld)->validate_value_in_record_with_warn(thd, s->default_values) && + thd->is_error()) + { + /* + We're here if: + - validate_value_in_record_with_warn() failed and + strict mo validate_default_values_of_unset_fieldsde converted WARN to ERROR + - or the connection was killed, or closed unexpectedly + */ + DBUG_RETURN(true); + } + } + } + DBUG_RETURN(false); +} + + +/* + Check assignment compatibility of a value list against an explicitly + specified field list, e.g. + INSERT INTO t1 (a,b) VALUES (1,2); +*/ +bool TABLE::check_assignability_explicit_fields(List fields, + List values, + bool ignore) +{ + DBUG_ENTER("TABLE::check_assignability_explicit_fields"); + DBUG_ASSERT(fields.elements == values.elements); + + List_iterator fi(fields); + List_iterator vi(values); + Item *f, *value; + while ((f= fi++) && (value= vi++)) + { + Item_field *item_field= f->field_for_view_update(); + if (!item_field) + { + /* + A non-updatable field of a view found. + This scenario is caught later and an error is raised. + We could eventually move error reporting here. For now just continue. + */ + continue; + } + if (value->check_assignability_to(item_field->field, ignore)) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +/* + Check assignment compatibility for a value list against + all visible fields of the table, e.g. + INSERT INTO t1 VALUES (1,2); +*/ +bool TABLE::check_assignability_all_visible_fields(List &values, + bool ignore) const +{ + DBUG_ENTER("TABLE::check_assignability_all_visible_fields"); + DBUG_ASSERT(s->visible_fields == values.elements); + + List_iterator vi(values); + for (uint i= 0; i < s->fields; i++) + { + if (!field[i]->invisible && + (vi++)->check_assignability_to(field[i], ignore)) + DBUG_RETURN(true); + } + DBUG_RETURN(false); +} + + +bool TABLE::insert_all_rows_into_tmp_table(THD *thd, + TABLE *tmp_table, + TMP_TABLE_PARAM *tmp_table_param, + bool with_cleanup) +{ + int write_err= 0; + + DBUG_ENTER("TABLE::insert_all_rows_into_tmp_table"); + + if (with_cleanup) + { + if ((write_err= tmp_table->file->ha_delete_all_rows())) + goto err; + } + + if (file->indexes_are_disabled()) + tmp_table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL); + file->ha_index_or_rnd_end(); + + if (unlikely(file->ha_rnd_init_with_error(1))) + DBUG_RETURN(1); + + if (tmp_table->no_rows) + tmp_table->file->extra(HA_EXTRA_NO_ROWS); + else + { + /* update table->file->stats.records */ + file->info(HA_STATUS_VARIABLE); + tmp_table->file->ha_start_bulk_insert(file->stats.records); + } + + while (likely(!file->ha_rnd_next(tmp_table->record[0]))) + { + write_err= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]); + if (unlikely(write_err)) + { + bool is_duplicate; + if (tmp_table->file->is_fatal_error(write_err, HA_CHECK_DUP) && + create_internal_tmp_table_from_heap(thd, tmp_table, + tmp_table_param->start_recinfo, + &tmp_table_param->recinfo, + write_err, 1, &is_duplicate)) + DBUG_RETURN(1); + + } + if (unlikely(thd->check_killed())) + goto err_killed; + } + if (!tmp_table->no_rows && tmp_table->file->ha_end_bulk_insert()) + goto err; + DBUG_RETURN(0); + +err: + DBUG_PRINT("error",("Got error: %d",write_err)); + file->print_error(write_err, MYF(0)); +err_killed: + (void) file->ha_rnd_end(); + DBUG_RETURN(1); +} + + + +/* + @brief Reset const_table flag + + @detail + Reset const_table flag for this table. If this table is a merged derived + table/view the flag is recursively reseted for all tables of the underlying + select. +*/ + +void TABLE_LIST::reset_const_table() +{ + table->const_table= 0; + if (is_merged_derived()) + { + SELECT_LEX *select_lex= get_unit()->first_select(); + TABLE_LIST *tl; + List_iterator ti(select_lex->leaf_tables); + while ((tl= ti++)) + tl->reset_const_table(); + } +} + + +/* + @brief Run derived tables/view handling phases on underlying select_lex. + + @param lex LEX for this thread + @param phases derived tables/views handling phases to run + (set of DT_XXX constants) + @details + This function runs this derived table through specified 'phases'. + Underlying tables of this select are handled prior to this derived. + 'lex' is passed as an argument to called functions. + + @return TRUE on error + @return FALSE ok +*/ + +bool TABLE_LIST::handle_derived(LEX *lex, uint phases) +{ + SELECT_LEX_UNIT *unit= get_unit(); + DBUG_ENTER("handle_derived"); + DBUG_PRINT("enter", ("phases: 0x%x", phases)); + + if (unit) + { + if (!is_with_table_recursive_reference()) + { + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + if (sl->handle_derived(lex, phases)) + DBUG_RETURN(TRUE); + } + if (mysql_handle_single_derived(lex, this, phases)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + +/** + @brief + Return unit of this derived table/view + + @return reference to a unit if it's a derived table/view. + @return 0 when it's not a derived table/view. +*/ + +st_select_lex_unit *TABLE_LIST::get_unit() +{ + return (view ? &view->unit : derived); +} + + +/** + @brief + Return select_lex of this derived table/view + + @return select_lex of this derived table/view. + @return 0 when it's not a derived table. +*/ + +st_select_lex *TABLE_LIST::get_single_select() +{ + SELECT_LEX_UNIT *unit= get_unit(); + return (unit ? unit->first_select() : 0); +} + + +/** + @brief + Attach a join table list as a nested join to this TABLE_LIST. + + @param join_list join table list to attach + + @details + This function wraps 'join_list' into a nested_join of this table, thus + turning it to a nested join leaf. +*/ + +void TABLE_LIST::wrap_into_nested_join(List &join_list) +{ + TABLE_LIST *tl; + /* + Walk through derived table top list and set 'embedding' to point to + the nesting table. + */ + nested_join->join_list.empty(); + List_iterator_fast li(join_list); + nested_join->join_list= join_list; + while ((tl= li++)) + { + tl->embedding= this; + tl->join_list= &nested_join->join_list; + } +} + + +/** + Check whether optimization has been performed and a derived table either + been merged to upper select level or materialized. + + @param table a TABLE_LIST object containing a derived table + + @return true in case the derived table has been merged to surrounding select, + false otherwise +*/ + +static inline bool derived_table_optimization_done(TABLE_LIST *table) +{ + SELECT_LEX_UNIT *derived= (table->derived ? + table->derived : + (table->view ? + &table->view->unit: + NULL)); + return derived && + (derived->is_excluded() || + table->is_materialized_derived()); +} + + +/** + @brief + Initialize this derived table/view + + @param thd Thread handle + + @details + This function makes initial preparations of this derived table/view for + further processing: + if it's a derived table this function marks it either as mergeable or + materializable + creates temporary table for name resolution purposes + creates field translation for mergeable derived table/view + + @return TRUE an error occur + @return FALSE ok +*/ + +bool TABLE_LIST::init_derived(THD *thd, bool init_view) +{ + SELECT_LEX *first_select= get_single_select(); + SELECT_LEX_UNIT *unit= get_unit(); + + if (!unit) + return FALSE; + /* + Check whether we can merge this derived table into main select. + Depending on the result field translation will or will not + be created. + */ + TABLE_LIST *first_table= (TABLE_LIST *) first_select->table_list.first; + if (first_select->table_list.elements > 1 || + (first_table && first_table->is_multitable())) + set_multitable(); + + if (!unit->derived) + unit->derived= this; + else if (!is_with_table_recursive_reference() && unit->derived != this) + { + if (unit->derived->is_with_table_recursive_reference()) + unit->derived= this; + else if (vers_conditions.eq(unit->derived->vers_conditions)) + vers_conditions.empty(); + else + { + my_error(ER_CONFLICTING_FOR_SYSTEM_TIME, MYF(0)); + return TRUE; + } + } + + if (init_view && !view && + !derived_table_optimization_done(this)) + { + /* This is all what we can do for a derived table for now. */ + set_derived(); + } + + if (!derived_table_optimization_done(this)) + { + /* A subquery might be forced to be materialized due to a side-effect. */ + if (!is_materialized_derived() && unit->can_be_merged() && + /* + Following is special case of + SELECT * FROM () WHERE ROWNUM() <= nnn + */ + (unit->outer_select() && + !(unit->outer_select()->with_rownum && + unit->outer_select()->table_list.elements == 1 && + (thd->lex->sql_command == SQLCOM_SELECT || + !unit->outer_select()->is_query_topmost(thd)) && + !is_view())) && + + (!thd->lex->with_rownum || + (!first_select->group_list.elements && + !first_select->order_list.elements)) && + (is_view() || + optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_MERGE)) && + !thd->lex->can_not_use_merged() && + !((thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI) && !is_view()) && + !is_recursive_with_table()) + set_merged_derived(); + else + set_materialized_derived(); + } + /* + Derived tables/view are materialized prior to UPDATE, thus we can skip + them from table uniqueness check + */ + if (is_materialized_derived()) + { + set_check_materialized(); + } + + /* + Create field translation for mergeable derived tables/views. + For derived tables field translation can be created only after + unit is prepared so all '*' are get unrolled. + */ + if (is_merged_derived()) + { + if (is_view() || + (unit->prepared && + !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW))) + create_field_translation(thd); + } + + return FALSE; +} + + +/** + @brief + Retrieve number of rows in the table + + @details + Retrieve number of rows in the table referred by this TABLE_LIST and + store it in the table's stats.records variable. If this TABLE_LIST refers + to a materialized derived table/view then the estimated number of rows of + the derived table/view is used instead. + + @return 0 ok + @return non zero error +*/ + +int TABLE_LIST::fetch_number_of_rows() +{ + int error= 0; + if (jtbm_subselect) + { + if (jtbm_subselect->is_jtbm_merged) + { + table->file->stats.records= (ha_rows)jtbm_subselect->jtbm_record_count; + set_if_bigger(table->file->stats.records, 2); + table->used_stat_records= table->file->stats.records; + } + return 0; + } + if (is_materialized_derived() && !fill_me) + { + table->file->stats.records= get_unit()->result->est_records; + set_if_bigger(table->file->stats.records, 2); + table->used_stat_records= table->file->stats.records; + } + else + error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + return error; +} + +/* + Procedure of keys generation for result tables of materialized derived + tables/views. + + A key is generated for each equi-join pair derived table-another table. + Each generated key consists of fields of derived table used in equi-join. + Example: + + SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN + t1 ON tt.f1=t1.f3 and tt.f2.=t1.f4; + In this case for the derived table tt one key will be generated. It will + consist of two parts f1 and f2. + Example: + + SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN + t1 ON tt.f1=t1.f3 JOIN + t2 ON tt.f2=t2.f4; + In this case for the derived table tt two keys will be generated. + One key over f1 field, and another key over f2 field. + Currently optimizer may choose to use only one such key, thus the second + one will be dropped after range optimizer is finished. + See also JOIN::drop_unused_derived_keys function. + Example: + + SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN + t1 ON tt.f1=a_function(t1.f3); + In this case for the derived table tt one key will be generated. It will + consist of one field - f1. +*/ + + + +/* + @brief + Change references to underlying items of a merged derived table/view + for fields in derived table's result table. + + @return FALSE ok + @return TRUE Out of memory +*/ +bool TABLE_LIST::change_refs_to_fields() +{ + List_iterator li(used_items); + Item_direct_ref *ref; + Field_iterator_view field_it; + Name_resolution_context *ctx; + THD *thd= table->in_use; + Item **materialized_items; + DBUG_ASSERT(is_merged_derived()); + + if (!used_items.elements) + return FALSE; + + materialized_items= (Item **)thd->calloc(sizeof(void *) * table->s->fields); + ctx= new (thd->mem_root) Name_resolution_context(this); + if (!materialized_items || !ctx) + return TRUE; + + while ((ref= (Item_direct_ref*)li++)) + { + uint idx; + Item *orig_item= *ref->ref; + field_it.set(this); + for (idx= 0; !field_it.end_of_fields(); field_it.next(), idx++) + { + if (field_it.item() == orig_item) + break; + } + DBUG_ASSERT(!field_it.end_of_fields()); + if (!materialized_items[idx]) + { + materialized_items[idx]= + new (thd->mem_root) Item_field(thd, ctx, table->field[idx]); + if (!materialized_items[idx]) + return TRUE; + } + /* + We need to restore the pointers after the execution of the + prepared statement. + */ + thd->change_item_tree((Item **)&ref->ref, + (Item*)(materialized_items + idx)); + /* Inform Item_direct_ref that what it points to has changed */ + ref->ref_changed(); + } + + return FALSE; +} + + +void TABLE_LIST::set_lock_type(THD *thd, enum thr_lock_type lock) +{ + if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar *)&lock)) + return; + /* we call it only when table is opened and it is "leaf" table*/ + DBUG_ASSERT(table); + lock_type= lock; + /* If not derived tables */ + if (table->file && table->file->is_open()) + table->file->set_lock_type(lock); + if (is_merged_derived()) + { + for (TABLE_LIST *table= get_single_select()->get_table_list(); + table; + table= table->next_local) + { + table->set_lock_type(thd, lock); + } + } +} + +bool TABLE_LIST::is_with_table() +{ + return derived && derived->with_element; +} + + +/** + Check if the definition are the same. + + If versions do not match it check definitions (with checking and setting + trigger definition versions (times) + + @param[in] view TABLE_LIST of the view + @param[in] share Share object of view + + @return false on error or different definitions. + + @sa check_and_update_table_version() +*/ + +bool TABLE_LIST::is_the_same_definition(THD* thd, TABLE_SHARE *s) +{ + enum enum_table_ref_type tp= s->get_table_ref_type(); + if (m_table_ref_type == tp) + { + /* + Cache have not changed which means that definition was not changed + including triggers + */ + if (m_table_ref_version == s->get_table_ref_version()) + return TRUE; + + /* + If cache changed then check content version + */ + if ((tabledef_version.length && + tabledef_version.length == s->tabledef_version.length && + memcmp(tabledef_version.str, s->tabledef_version.str, + tabledef_version.length) == 0)) + { + // Definition have not changed, let's check if triggers changed. + if (table && table->triggers) + { + + my_hrtime_t hr_stmt_prepare= thd->hr_prepare_time; + if (hr_stmt_prepare.val) + for(uint i= 0; i < TRG_EVENT_MAX; i++) + for (uint j= 0; j < TRG_ACTION_MAX; j++) + { + Trigger *tr= + table->triggers->get_trigger((trg_event_type)i, + (trg_action_time_type)j); + if (tr) + if (hr_stmt_prepare.val <= tr->hr_create_time.val) + { + set_tabledef_version(s); + return FALSE; + } + } + } + set_table_id(s); + return TRUE; + } + else + tabledef_version.length= 0; + } + else + set_tabledef_version(s); + return FALSE; +} + + +uint TABLE_SHARE::actual_n_key_parts(THD *thd) +{ + return use_ext_keys && + optimizer_flag(thd, OPTIMIZER_SWITCH_EXTENDED_KEYS) ? + ext_key_parts : key_parts; +} + + +double KEY::actual_rec_per_key(uint i) +{ + if (rec_per_key == 0) + return 0; + return (is_statistics_from_stat_tables ? + read_stats->get_avg_frequency(i) : (double) rec_per_key[i]); +} + +/* + find total number of field in hash expr +*/ +int fields_in_hash_keyinfo(KEY *keyinfo) +{ + Item_func_hash * temp= (Item_func_hash *) + keyinfo->key_part->field->vcol_info->expr; + return temp->argument_count(); +} +/* + setup_keyinfo_hash changes the key_info->key_part + to be same as defined by user + */ +void setup_keyinfo_hash(KEY *key_info) +{ + DBUG_ASSERT(key_info->algorithm == HA_KEY_ALG_LONG_HASH); + DBUG_ASSERT(key_info->key_part->field->flags & LONG_UNIQUE_HASH_FIELD); + uint no_of_keyparts= fields_in_hash_keyinfo(key_info); + key_info->key_part-= no_of_keyparts; + key_info->user_defined_key_parts= key_info->usable_key_parts= + key_info->ext_key_parts= no_of_keyparts; + key_info->flags|= HA_NOSAME; +} +/* + re_setup_keyinfo_hash reverts th setup_keyinfo_hash and this type of + arrangement is expected by storage engine + */ + +void re_setup_keyinfo_hash(KEY *key_info) +{ + DBUG_ASSERT(key_info->algorithm == HA_KEY_ALG_LONG_HASH); + DBUG_ASSERT(!(key_info->key_part->field->flags & LONG_UNIQUE_HASH_FIELD)); + while(!(key_info->key_part->field->flags & LONG_UNIQUE_HASH_FIELD)) + key_info->key_part++; + key_info->user_defined_key_parts= key_info->usable_key_parts= + key_info->ext_key_parts= 1; + key_info->flags&= ~HA_NOSAME; +} + +LEX_CSTRING *fk_option_name(enum_fk_option opt) +{ + static LEX_CSTRING names[]= + { + { STRING_WITH_LEN("???") }, + { STRING_WITH_LEN("RESTRICT") }, + { STRING_WITH_LEN("NO ACTION") }, + { STRING_WITH_LEN("CASCADE") }, + { STRING_WITH_LEN("SET NULL") }, + { STRING_WITH_LEN("SET DEFAULT") } + }; + return names + opt; +} + +enum TR_table::enabled TR_table::use_transaction_registry= TR_table::MAYBE; + +TR_table::TR_table(THD* _thd, bool rw) : + thd(_thd), open_tables_backup(NULL) +{ + init_one_table(&MYSQL_SCHEMA_NAME, &TRANSACTION_REG_NAME, + NULL, rw ? TL_WRITE : TL_READ); +} + +bool TR_table::open() +{ + DBUG_ASSERT(!table); + open_tables_backup= new Open_tables_backup; + if (!open_tables_backup) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + + All_tmp_tables_list *temporary_tables= thd->temporary_tables; + bool error= !open_log_table(thd, this, open_tables_backup); + thd->temporary_tables= temporary_tables; + + if (use_transaction_registry == MAYBE) + error= check(error); + + use_transaction_registry= error ? NO : YES; + + return error; +} + +TR_table::~TR_table() +{ + if (table) + { + thd->temporary_tables= NULL; + close_log_table(thd, open_tables_backup); + } + delete open_tables_backup; +} + +void TR_table::store(uint field_id, ulonglong val) +{ + table->field[field_id]->store(val, true); + table->field[field_id]->set_notnull(); +} + +void TR_table::store(uint field_id, timeval ts) +{ + table->field[field_id]->store_timestamp(ts.tv_sec, ts.tv_usec); + table->field[field_id]->set_notnull(); +} + +enum_tx_isolation TR_table::iso_level() const +{ + enum_tx_isolation res= (enum_tx_isolation) ((*this)[FLD_ISO_LEVEL]->val_int() - 1); + DBUG_ASSERT(res <= ISO_SERIALIZABLE); + return res; +} + +bool TR_table::update(ulonglong start_id, ulonglong end_id) +{ + if (!table && open()) + return true; + + store(FLD_BEGIN_TS, thd->transaction_time()); + thd->set_time(); + timeval end_time= {thd->query_start(), int(thd->query_start_sec_part())}; + store(FLD_TRX_ID, start_id); + store(FLD_COMMIT_ID, end_id); + store(FLD_COMMIT_TS, end_time); + store_iso_level(thd->tx_isolation); + + int error= table->file->ha_write_row(table->record[0]); + if (unlikely(error)) + table->file->print_error(error, MYF(0)); + /* extra() is used to apply the bulk insert operation + on mysql/transaction_registry table */ + return error; +} + +#define newx new (thd->mem_root) +bool TR_table::query(ulonglong trx_id) +{ + if (!table && open()) + return false; + SQL_SELECT_auto select; + READ_RECORD info; + int error; + List dummy; + SELECT_LEX &slex= *(thd->lex->first_select_lex()); + Name_resolution_context_backup backup(slex.context, *this); + Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_TRX_ID]); + Item *value= newx Item_int(thd, trx_id); + COND *conds= newx Item_func_eq(thd, field, value); + if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) + return false; + select= make_select(table, 0, 0, conds, NULL, 0, &error); + if (unlikely(error || !select)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return false; + } + // FIXME: (performance) force index 'transaction_id' + error= init_read_record(&info, thd, table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + if (select->skip_record(thd) > 0) + return true; + } + my_error(ER_VERS_NO_TRX_ID, MYF(0), (longlong) trx_id); + return false; +} + +bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) +{ + if (!table && open()) + return false; + SQL_SELECT_auto select; + READ_RECORD info; + int error; + List dummy; + SELECT_LEX &slex= *(thd->lex->first_select_lex()); + Name_resolution_context_backup backup(slex.context, *this); + Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_COMMIT_TS]); + Datetime dt(&commit_time); + Item *value= newx Item_datetime_literal(thd, &dt, 6); + COND *conds; + if (backwards) + conds= newx Item_func_ge(thd, field, value); + else + conds= newx Item_func_le(thd, field, value); + if (unlikely((error= setup_conds(thd, this, dummy, &conds)))) + return false; + // FIXME: (performance) force index 'commit_timestamp' + select= make_select(table, 0, 0, conds, NULL, 0, &error); + if (unlikely(error || !select)) + return false; + error= init_read_record(&info, thd, table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + + // With PK by transaction_id the records are ordered by PK, so we have to + // scan TRT fully and collect min (backwards == true) + // or max (backwards == false) stats. + bool found= false; + MYSQL_TIME found_ts; + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + int res= select->skip_record(thd); + if (res > 0) + { + MYSQL_TIME commit_ts; + if ((*this)[FLD_COMMIT_TS]->get_date(&commit_ts, date_mode_t(0))) + { + found= false; + break; + } + int c; + if (!found || ((c= my_time_compare(&commit_ts, &found_ts)) && + (backwards ? c < 0 : c > 0))) + { + found_ts= commit_ts; + found= true; + // TODO: (performance) make ORDER DESC and break after first found. + // Otherwise it is O(n) scan (+copy)! + store_record(table, record[1]); + } + } + else if (res < 0) + { + found= false; + break; + } + } + if (found) + restore_record(table, record[1]); + return found; +} +#undef newx + +bool TR_table::query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, + ulonglong commit_id1, enum_tx_isolation iso_level1, + ulonglong commit_id0) +{ + if (trx_id1 == trx_id0) + { + return false; + } + + if (trx_id1 == ULONGLONG_MAX || trx_id0 == 0) + { + result= true; + return false; + } + + if (trx_id0 == ULONGLONG_MAX || trx_id1 == 0) + { + result= false; + return false; + } + + if (!commit_id1) + { + if (!query(trx_id1)) + return true; + + commit_id1= (*this)[FLD_COMMIT_ID]->val_int(); + iso_level1= iso_level(); + } + + if (!commit_id0) + { + if (!query(trx_id0)) + return true; + + commit_id0= (*this)[FLD_COMMIT_ID]->val_int(); + } + + // Trivial case: TX1 started after TX0 committed + if (trx_id1 > commit_id0 + // Concurrent transactions: TX1 committed after TX0 and TX1 is read (un)committed + || (commit_id1 > commit_id0 && iso_level1 < ISO_REPEATABLE_READ)) + { + result= true; + } + else // All other cases: TX1 does not see TX0 + { + result= false; + } + + return false; +} + +void TR_table::warn_schema_incorrect(const char *reason) +{ + if (MYSQL_VERSION_ID == table->s->mysql_version) + { + sql_print_error("%`s.%`s schema is incorrect: %s.", + db.str, table_name.str, reason); + } + else + { + sql_print_error("%`s.%`s schema is incorrect: %s. Created with MariaDB %d, " + "now running %d.", + db.str, table_name.str, reason, MYSQL_VERSION_ID, + static_cast(table->s->mysql_version)); + } +} + +bool TR_table::check(bool error) +{ + if (error) + { + sql_print_warning("%`s.%`s does not exist (open failed).", db.str, + table_name.str); + return true; + } + + if (table->file->ht->db_type != DB_TYPE_INNODB) + { + warn_schema_incorrect("Wrong table engine (expected InnoDB)"); + return true; + } + +#define WARN_SCHEMA(...) \ + char reason[128]; \ + snprintf(reason, 128, __VA_ARGS__); \ + warn_schema_incorrect(reason); + + if (table->s->fields != FIELD_COUNT) + { + WARN_SCHEMA("Wrong field count (expected %d)", FIELD_COUNT); + return true; + } + + if (table->field[FLD_TRX_ID]->type() != MYSQL_TYPE_LONGLONG) + { + WARN_SCHEMA("Wrong field %d type (expected BIGINT UNSIGNED)", FLD_TRX_ID); + return true; + } + + if (table->field[FLD_COMMIT_ID]->type() != MYSQL_TYPE_LONGLONG) + { + WARN_SCHEMA("Wrong field %d type (expected BIGINT UNSIGNED)", FLD_COMMIT_ID); + return true; + } + + if (table->field[FLD_BEGIN_TS]->type() != MYSQL_TYPE_TIMESTAMP) + { + WARN_SCHEMA("Wrong field %d type (expected TIMESTAMP(6))", FLD_BEGIN_TS); + return true; + } + + if (table->field[FLD_COMMIT_TS]->type() != MYSQL_TYPE_TIMESTAMP) + { + WARN_SCHEMA("Wrong field %d type (expected TIMESTAMP(6))", FLD_COMMIT_TS); + return true; + } + + if (table->field[FLD_ISO_LEVEL]->type() != MYSQL_TYPE_STRING || + !(table->field[FLD_ISO_LEVEL]->flags & ENUM_FLAG)) + { + wrong_enum: + WARN_SCHEMA("Wrong field %d type (expected ENUM('READ-UNCOMMITTED', " + "'READ-COMMITTED', 'REPEATABLE-READ', 'SERIALIZABLE'))", + FLD_ISO_LEVEL); + return true; + } + + Field_enum *iso_level= static_cast(table->field[FLD_ISO_LEVEL]); + const st_typelib *typelib= iso_level->typelib; + + if (typelib->count != 4) + goto wrong_enum; + + if (strcmp(typelib->type_names[0], "READ-UNCOMMITTED") || + strcmp(typelib->type_names[1], "READ-COMMITTED") || + strcmp(typelib->type_names[2], "REPEATABLE-READ") || + strcmp(typelib->type_names[3], "SERIALIZABLE")) + { + goto wrong_enum; + } + + if (!table->key_info || !table->key_info->key_part) + goto wrong_pk; + + if (strcmp(table->key_info->key_part->field->field_name.str, "transaction_id")) + { + wrong_pk: + WARN_SCHEMA("Wrong PRIMARY KEY (expected `transaction_id`)"); + return true; + } + + return false; +} + +bool vers_select_conds_t::check_units(THD *thd) +{ + DBUG_ASSERT(type != SYSTEM_TIME_UNSPECIFIED); + DBUG_ASSERT(start.item); + return start.check_unit(thd) || + end.check_unit(thd); +} + +bool vers_select_conds_t::eq(const vers_select_conds_t &conds) const +{ + if (type != conds.type) + return false; + switch (type) { + case SYSTEM_TIME_UNSPECIFIED: + case SYSTEM_TIME_ALL: + return true; + case SYSTEM_TIME_BEFORE: + break; + case SYSTEM_TIME_HISTORY: + break; + case SYSTEM_TIME_AS_OF: + return start.eq(conds.start); + case SYSTEM_TIME_FROM_TO: + case SYSTEM_TIME_BETWEEN: + return start.eq(conds.start) && end.eq(conds.end); + } + DBUG_ASSERT(0); + return false; +} + + +bool Vers_history_point::check_unit(THD *thd) +{ + if (!item) + return false; + if (item->fix_fields_if_needed(thd, &item)) + return true; + const Type_handler *t= item->this_item()->real_type_handler(); + DBUG_ASSERT(t); + if (!t->vers()) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + t->name().ptr(), "FOR SYSTEM_TIME"); + return true; + } + return false; +} + + +void Vers_history_point::fix_item() +{ + if (item && item->decimals == 0 && item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::NOW_FUNC) + item->decimals= 6; +} + + +bool Vers_history_point::eq(const vers_history_point_t &point) const +{ + return unit == point.unit && item->eq(point.item, false); +} + +void Vers_history_point::print(String *str, enum_query_type query_type, + const char *prefix, size_t plen) const +{ + const static LEX_CSTRING unit_type[]= + { + { STRING_WITH_LEN("") }, + { STRING_WITH_LEN("TIMESTAMP ") }, + { STRING_WITH_LEN("TRANSACTION ") } + }; + str->append(prefix, plen); + str->append(unit_type + unit); + item->print(str, query_type); +} + +Field *TABLE::find_field_by_name(LEX_CSTRING *str) const +{ + Field **tmp; + size_t length= str->length; + if (s->name_hash.records) + { + tmp= (Field**) my_hash_search(&s->name_hash, (uchar*) str->str, length); + return tmp ? field[tmp - s->field] : NULL; + } + else + { + for (tmp= field; *tmp; tmp++) + { + if ((*tmp)->field_name.length == length && + !lex_string_cmp(system_charset_info, &(*tmp)->field_name, str)) + return *tmp; + } + } + return NULL; +} + + +bool TABLE::export_structure(THD *thd, Row_definition_list *defs) +{ + for (Field **src= field; *src; src++) + { + uint offs; + if (defs->find_row_field_by_name(&src[0]->field_name, &offs)) + { + my_error(ER_DUP_FIELDNAME, MYF(0), src[0]->field_name.str); + return true; + } + Spvar_definition *def= new (thd->mem_root) Spvar_definition(thd, *src); + if (!def) + return true; + def->flags&= (uint) ~NOT_NULL_FLAG; + if ((def->sp_prepare_create_field(thd, thd->mem_root)) || + (defs->push_back(def, thd->mem_root))) + return true; + } + return false; +} + +/** + @brief + Initialize all the opt_range structures that are used to stored the + estimates when the range optimizer is run. + As these are initialized by the range optimizer for all index + marked in opt_range_keys, we only mark the memory as undefined + to be able to find wrong usage of data with valgrind or MSAN. +*/ + +inline void TABLE::initialize_opt_range_structures() +{ + TRASH_ALLOC((void*)&opt_range_keys, sizeof(opt_range_keys)); + TRASH_ALLOC(opt_range, s->keys * sizeof(*opt_range)); + TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts)); +} + +/* + Mark table to be reopened after query +*/ + +void TABLE::mark_table_for_reopen() +{ + THD *thd= in_use; + DBUG_ASSERT(thd); + thd->locked_tables_list.mark_table_for_reopen(this); +} diff --git a/sql/table.h b/sql/table.h new file mode 100644 index 00000000..12ad29b1 --- /dev/null +++ b/sql/table.h @@ -0,0 +1,3519 @@ +#ifndef TABLE_INCLUDED +#define TABLE_INCLUDED +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_plist.h" +#include "sql_list.h" /* Sql_alloc */ +#include "mdl.h" +#include "datadict.h" +#include "sql_string.h" /* String */ +#include "lex_string.h" + +#ifndef MYSQL_CLIENT + +#include "my_cpu.h" /* LF_BACKOFF() */ +#include "hash.h" /* HASH */ +#include "handler.h" /* row_type, ha_choice, handler */ +#include "mysql_com.h" /* enum_field_types */ +#include "thr_lock.h" /* thr_lock_type */ +#include "filesort_utils.h" +#include "parse_file.h" +#include "sql_i_s.h" +#include "sql_type.h" /* vers_kind_t */ +#include "privilege.h" /* privilege_t */ + +/* + Buffer for unix timestamp in microseconds: + 9,223,372,036,854,775,807 (signed int64 maximal value) + 1 234 567 890 123 456 789 + + Note: we can use unsigned for calculation, but practically they + are the same by probability to overflow them (signed int64 in + microseconds is enough for almost 3e5 years) and signed allow to + avoid increasing the buffer (the old buffer for human readable + date was 19+1). +*/ +#define MICROSECOND_TIMESTAMP_BUFFER_SIZE (19 + 1) + +/* Structs that defines the TABLE */ + +class Item; /* Needed by ORDER */ +typedef Item (*Item_ptr); +class Item_subselect; +class Item_field; +class Item_func_hash; +class GRANT_TABLE; +class st_select_lex_unit; +class st_select_lex; +class partition_info; +class COND_EQUAL; +class Security_context; +struct TABLE_LIST; +class ACL_internal_schema_access; +class ACL_internal_table_access; +class Field; +class Table_statistics; +class With_element; +struct TDC_element; +class Virtual_column_info; +class Table_triggers_list; +class TMP_TABLE_PARAM; +class SEQUENCE; +class Range_rowid_filter_cost_info; +class derived_handler; +class Pushdown_derived; +struct Name_resolution_context; +class Table_function_json_table; +class Open_table_context; + +/* + Used to identify NESTED_JOIN structures within a join (applicable only to + structures that have not been simplified away and embed more the one + element) +*/ +typedef ulonglong nested_join_map; + +#define VIEW_MD5_LEN 32 + + +#define tmp_file_prefix "#sql" /**< Prefix for tmp tables */ +#define tmp_file_prefix_length 4 +#define TMP_TABLE_KEY_EXTRA 8 + +/** + Enumerate possible types of a table from re-execution + standpoint. + TABLE_LIST class has a member of this type. + At prepared statement prepare, this member is assigned a value + as of the current state of the database. Before (re-)execution + of a prepared statement, we check that the value recorded at + prepare matches the type of the object we obtained from the + table definition cache. + + @sa check_and_update_table_version() + @sa Execute_observer + @sa Prepared_statement::reprepare() +*/ + +enum enum_table_ref_type +{ + /** Initial value set by the parser */ + TABLE_REF_NULL= 0, + TABLE_REF_VIEW, + TABLE_REF_BASE_TABLE, + TABLE_REF_I_S_TABLE, + TABLE_REF_TMP_TABLE +}; + + +/*************************************************************************/ + +/** + Object_creation_ctx -- interface for creation context of database objects + (views, stored routines, events, triggers). Creation context -- is a set + of attributes, that should be fixed at the creation time and then be used + each time the object is parsed or executed. +*/ + +class Object_creation_ctx +{ +public: + Object_creation_ctx *set_n_backup(THD *thd); + + void restore_env(THD *thd, Object_creation_ctx *backup_ctx); + +protected: + Object_creation_ctx() = default; + virtual Object_creation_ctx *create_backup_ctx(THD *thd) const = 0; + + virtual void change_env(THD *thd) const = 0; + +public: + virtual ~Object_creation_ctx() = default; +}; + +/*************************************************************************/ + +/** + Default_object_creation_ctx -- default implementation of + Object_creation_ctx. +*/ + +class Default_object_creation_ctx : public Object_creation_ctx +{ +public: + CHARSET_INFO *get_client_cs() + { + return m_client_cs; + } + + CHARSET_INFO *get_connection_cl() + { + return m_connection_cl; + } + +protected: + Default_object_creation_ctx(THD *thd); + + Default_object_creation_ctx(CHARSET_INFO *client_cs, + CHARSET_INFO *connection_cl); + +protected: + virtual Object_creation_ctx *create_backup_ctx(THD *thd) const; + + virtual void change_env(THD *thd) const; + +protected: + /** + client_cs stores the value of character_set_client session variable. + The only character set attribute is used. + + Client character set is included into query context, because we save + query in the original character set, which is client character set. So, + in order to parse the query properly we have to switch client character + set on parsing. + */ + CHARSET_INFO *m_client_cs; + + /** + connection_cl stores the value of collation_connection session + variable. Both character set and collation attributes are used. + + Connection collation is included into query context, becase it defines + the character set and collation of text literals in internal + representation of query (item-objects). + */ + CHARSET_INFO *m_connection_cl; +}; + +class Query_arena; + +/*************************************************************************/ + +/** + View_creation_ctx -- creation context of view objects. +*/ + +class View_creation_ctx : public Default_object_creation_ctx, + public Sql_alloc +{ +public: + static View_creation_ctx *create(THD *thd); + + static View_creation_ctx *create(THD *thd, + TABLE_LIST *view); + +private: + View_creation_ctx(THD *thd) + : Default_object_creation_ctx(thd) + { } +}; + +/*************************************************************************/ + +/* Order clause list element */ + +typedef int (*fast_field_copier)(Field *to, Field *from); + + +typedef struct st_order { + struct st_order *next; + Item **item; /* Point at item in select fields */ + Item *item_ptr; /* Storage for initial item */ + /* + Reference to the function we are trying to optimize copy to + a temporary table + */ + fast_field_copier fast_field_copier_func; + /* Field for which above optimizer function setup */ + Field *fast_field_copier_setup; + int counter; /* position in SELECT list, correct + only if counter_used is true*/ + enum enum_order { + ORDER_NOT_RELEVANT, + ORDER_ASC, + ORDER_DESC + }; + + enum_order direction; /* Requested direction of ordering */ + bool in_field_list; /* true if in select field list */ + bool counter_used; /* parameter was counter of columns */ + Field *field; /* If tmp-table group */ + char *buff; /* If tmp-table group */ + table_map used; /* NOTE: the below is only set to 0 but is still used by eq_ref_table */ + table_map depend_map; +} ORDER; + +/** + State information for internal tables grants. + This structure is part of the TABLE_LIST, and is updated + during the ACL check process. + @sa GRANT_INFO +*/ +struct st_grant_internal_info +{ + /** True if the internal lookup by schema name was done. */ + bool m_schema_lookup_done; + /** Cached internal schema access. */ + const ACL_internal_schema_access *m_schema_access; + /** True if the internal lookup by table name was done. */ + bool m_table_lookup_done; + /** Cached internal table access. */ + const ACL_internal_table_access *m_table_access; +}; +typedef struct st_grant_internal_info GRANT_INTERNAL_INFO; + +/** + @brief The current state of the privilege checking process for the current + user, SQL statement and SQL object. + + @details The privilege checking process is divided into phases depending on + the level of the privilege to be checked and the type of object to be + accessed. Due to the mentioned scattering of privilege checking + functionality, it is necessary to keep track of the state of the + process. This information is stored in privilege, want_privilege, and + orig_want_privilege. + + A GRANT_INFO also serves as a cache of the privilege hash tables. Relevant + members are grant_table and version. + */ +typedef struct st_grant_info +{ + /** + @brief A copy of the privilege information regarding the current host, + database, object and user. + + @details The version of this copy is found in GRANT_INFO::version. + */ + GRANT_TABLE *grant_table_user; + GRANT_TABLE *grant_table_role; + GRANT_TABLE *grant_public; + /** + @brief Used for cache invalidation when caching privilege information. + + @details The privilege information is stored on disk, with dedicated + caches residing in memory: table-level and column-level privileges, + respectively, have their own dedicated caches. + + The GRANT_INFO works as a level 1 cache with this member updated to the + current value of the global variable @c grant_version (@c static variable + in sql_acl.cc). It is updated Whenever the GRANT_INFO is refreshed from + the level 2 cache. The level 2 cache is the @c column_priv_hash structure + (@c static variable in sql_acl.cc) + + @see grant_version + */ + uint version; + /** + @brief The set of privileges that the current user has fulfilled for a + certain host, database, and object. + + @details This field is continually updated throughout the access checking + process. In each step the "wanted privilege" is checked against the + fulfilled privileges. When/if the intersection of these sets is empty, + access is granted. + + The set is implemented as a bitmap, with the bits defined in sql_acl.h. + */ + privilege_t privilege; + /** + @brief the set of privileges that the current user needs to fulfil in + order to carry out the requested operation. + */ + privilege_t want_privilege; + /** + Stores the requested access acl of top level tables list. Is used to + check access rights to the underlying tables of a view. + */ + privilege_t orig_want_privilege; + /** The grant state for internal tables. */ + GRANT_INTERNAL_INFO m_internal; + + st_grant_info() + :privilege(NO_ACL), + want_privilege(NO_ACL), + orig_want_privilege(NO_ACL) + { } + + void read(const Security_context *sctx, const char *db, + const char *table); + + inline void refresh(const Security_context *sctx, const char *db, + const char *table); + inline privilege_t aggregate_privs(); + inline privilege_t aggregate_cols(); + + /* OR table and all column privileges */ + privilege_t all_privilege(); +} GRANT_INFO; + +enum tmp_table_type +{ + NO_TMP_TABLE= 0, NON_TRANSACTIONAL_TMP_TABLE, TRANSACTIONAL_TMP_TABLE, + INTERNAL_TMP_TABLE, SYSTEM_TMP_TABLE +}; +enum release_type { RELEASE_NORMAL, RELEASE_WAIT_FOR_DROP }; + + +enum vcol_init_mode +{ + VCOL_INIT_DEPENDENCY_FAILURE_IS_WARNING= 1, + VCOL_INIT_DEPENDENCY_FAILURE_IS_ERROR= 2 + /* + There may be new flags here. + e.g. to automatically remove sql_mode dependency: + GENERATED ALWAYS AS (char_col) -> + GENERATED ALWAYS AS (RTRIM(char_col)) + */ +}; + + +enum enum_vcol_update_mode +{ + VCOL_UPDATE_FOR_READ= 0, + VCOL_UPDATE_FOR_WRITE, + VCOL_UPDATE_FOR_DELETE, + VCOL_UPDATE_INDEXED, + VCOL_UPDATE_INDEXED_FOR_UPDATE, + VCOL_UPDATE_FOR_REPLACE +}; + +/* Field visibility enums */ + +enum __attribute__((packed)) field_visibility_t { + VISIBLE= 0, + INVISIBLE_USER, + /* automatically added by the server. Can be queried explicitly + in SELECT, otherwise invisible from anything" */ + INVISIBLE_SYSTEM, + INVISIBLE_FULL +}; + +#define INVISIBLE_MAX_BITS 3 +#define HA_HASH_FIELD_LENGTH 8 +#define HA_HASH_KEY_LENGTH_WITHOUT_NULL 8 +#define HA_HASH_KEY_LENGTH_WITH_NULL 9 + + +int fields_in_hash_keyinfo(KEY *keyinfo); + +void setup_keyinfo_hash(KEY *key_info); + +void re_setup_keyinfo_hash(KEY *key_info); + +/** + Category of table found in the table share. +*/ +enum enum_table_category +{ + /** + Unknown value. + */ + TABLE_UNKNOWN_CATEGORY=0, + + /** + Temporary table. + The table is visible only in the session. + Therefore, + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = ON + do not apply to this table. + Note that LOCK TABLE t FOR READ/WRITE + can be used on temporary tables. + Temporary tables are not part of the table cache. + */ + TABLE_CATEGORY_TEMPORARY=1, + + /** + User table. + These tables do honor: + - LOCK TABLE t FOR READ/WRITE + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = ON + User tables are cached in the table cache. + */ + TABLE_CATEGORY_USER=2, + + /** + System table, maintained by the server. + These tables do honor: + - LOCK TABLE t FOR READ/WRITE + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = ON + Typically, writes to system tables are performed by + the server implementation, not explicitly be a user. + System tables are cached in the table cache. + */ + TABLE_CATEGORY_SYSTEM=3, + + /** + Log tables. + These tables are an interface provided by the system + to inspect the system logs. + These tables do *not* honor: + - LOCK TABLE t FOR READ/WRITE + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = ON + as there is no point in locking explicitly + a LOG table. + An example of LOG tables are: + - mysql.slow_log + - mysql.general_log, + which *are* updated even when there is either + a GLOBAL READ LOCK or a GLOBAL READ_ONLY in effect. + User queries do not write directly to these tables + (there are exceptions for log tables). + The server implementation perform writes. + Log tables are cached in the table cache. + */ + TABLE_CATEGORY_LOG=4, + + /* + Types below are read only tables, not affected by FLUSH TABLES or + MDL locks. + */ + /** + Information schema tables. + These tables are an interface provided by the system + to inspect the system metadata. + These tables do *not* honor: + - LOCK TABLE t FOR READ/WRITE + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = ON + as there is no point in locking explicitly + an INFORMATION_SCHEMA table. + Nothing is directly written to information schema tables. + Note that this value is not used currently, + since information schema tables are not shared, + but implemented as session specific temporary tables. + */ + /* + TODO: Fixing the performance issues of I_S will lead + to I_S tables in the table cache, which should use + this table type. + */ + TABLE_CATEGORY_INFORMATION=5, + + /** + Performance schema tables. + These tables are an interface provided by the system + to inspect the system performance data. + These tables do *not* honor: + - LOCK TABLE t FOR READ/WRITE + - FLUSH TABLES WITH READ LOCK + - SET GLOBAL READ_ONLY = ON + as there is no point in locking explicitly + a PERFORMANCE_SCHEMA table. + An example of PERFORMANCE_SCHEMA tables are: + - performance_schema.* + which *are* updated (but not using the handler interface) + even when there is either + a GLOBAL READ LOCK or a GLOBAL READ_ONLY in effect. + User queries do not write directly to these tables + (there are exceptions for SETUP_* tables). + The server implementation perform writes. + Performance tables are cached in the table cache. + */ + TABLE_CATEGORY_PERFORMANCE=6 +}; + +typedef enum enum_table_category TABLE_CATEGORY; + +TABLE_CATEGORY get_table_category(const LEX_CSTRING *db, + const LEX_CSTRING *name); + + +typedef struct st_table_field_type +{ + LEX_CSTRING name; + LEX_CSTRING type; + LEX_CSTRING cset; +} TABLE_FIELD_TYPE; + + +typedef struct st_table_field_def +{ + uint count; + const TABLE_FIELD_TYPE *field; + uint primary_key_parts; + const uint *primary_key_columns; +} TABLE_FIELD_DEF; + + +class Table_check_intact +{ +protected: + bool has_keys; + virtual void report_error(uint code, const char *fmt, ...)= 0; + +public: + Table_check_intact(bool keys= false) : has_keys(keys) {} + virtual ~Table_check_intact() = default; + + /** Checks whether a table is intact. */ + bool check(TABLE *table, const TABLE_FIELD_DEF *table_def); +}; + + +/* + If the table isn't valid, report the error to the server log only. +*/ +class Table_check_intact_log_error : public Table_check_intact +{ +protected: + void report_error(uint, const char *fmt, ...); +public: + Table_check_intact_log_error() : Table_check_intact(true) {} +}; + + +/** + Class representing the fact that some thread waits for table + share to be flushed. Is used to represent information about + such waits in MDL deadlock detector. +*/ + +class Wait_for_flush : public MDL_wait_for_subgraph +{ + MDL_context *m_ctx; + TABLE_SHARE *m_share; + uint m_deadlock_weight; +public: + Wait_for_flush(MDL_context *ctx_arg, TABLE_SHARE *share_arg, + uint deadlock_weight_arg) + : m_ctx(ctx_arg), m_share(share_arg), + m_deadlock_weight(deadlock_weight_arg) + {} + + MDL_context *get_ctx() const { return m_ctx; } + + virtual bool accept_visitor(MDL_wait_for_graph_visitor *dvisitor); + + virtual uint get_deadlock_weight() const; + + /** + Pointers for participating in the list of waiters for table share. + */ + Wait_for_flush *next_in_share; + Wait_for_flush **prev_in_share; +}; + + +typedef I_P_List > + Wait_for_flush_list; + + +enum open_frm_error { + OPEN_FRM_OK = 0, + OPEN_FRM_OPEN_ERROR, + OPEN_FRM_READ_ERROR, + OPEN_FRM_CORRUPTED, + OPEN_FRM_DISCOVER, + OPEN_FRM_ERROR_ALREADY_ISSUED, + OPEN_FRM_NOT_A_VIEW, + OPEN_FRM_NOT_A_TABLE, + OPEN_FRM_NEEDS_REBUILD +}; + +/** + Control block to access table statistics loaded + from persistent statistical tables +*/ + + +#define TABLE_STAT_NO_STATS 0 +#define TABLE_STAT_TABLE 1 +#define TABLE_STAT_COLUMN 2 +#define TABLE_STAT_INDEX 4 +#define TABLE_STAT_HISTOGRAM 8 + +/* + EITS statistics information for a table. + + This data is loaded from mysql.{table|index|column}_stats tables and + then most of the time is owned by table's TABLE_SHARE object. + + Individual TABLE objects also have pointer to this object, and we do + reference counting to know when to free it. See + TABLE::update_engine_stats(), TABLE::free_engine_stats(), + TABLE_SHARE::update_engine_stats(), TABLE_SHARE::destroy(). + These implement a "shared pointer"-like functionality. + + When new statistics is loaded, we create new TABLE_STATISTICS_CB and make + the TABLE_SHARE point to it. Some TABLE object may still be using older + TABLE_STATISTICS_CB objects. Reference counting allows to free + TABLE_STATISTICS_CB when it is no longer used. +*/ + +class TABLE_STATISTICS_CB +{ + uint usage_count; // Instances of this stat + +public: + TABLE_STATISTICS_CB(); + ~TABLE_STATISTICS_CB(); + MEM_ROOT mem_root; /* MEM_ROOT to allocate statistical data for the table */ + Table_statistics *table_stats; /* Structure to access the statistical data */ + uint stats_available; + bool histograms_exists_on_disk; + + bool histograms_exists() const + { + return histograms_exists_on_disk; + } + bool unused() + { + return usage_count == 0; + } + /* Copy (latest) state from TABLE_SHARE to TABLE */ + void update_stats_in_table(TABLE *table); + friend struct TABLE; + friend struct TABLE_SHARE; +}; + +/** + This structure is shared between different table objects. There is one + instance of table share per one table in the database. +*/ + +struct TABLE_SHARE +{ + TABLE_SHARE() = default; /* Remove gcc warning */ + + /** Category of this table. */ + TABLE_CATEGORY table_category; + + /* hash of field names (contains pointers to elements of field array) */ + HASH name_hash; /* hash of field names */ + MEM_ROOT mem_root; + TYPELIB keynames; /* Pointers to keynames */ + TYPELIB fieldnames; /* Pointer to fieldnames */ + TYPELIB *intervals; /* pointer to interval info */ + mysql_mutex_t LOCK_ha_data; /* To protect access to ha_data */ + mysql_mutex_t LOCK_share; /* To protect TABLE_SHARE */ + mysql_mutex_t LOCK_statistics; /* To protect against concurrent load */ + + TDC_element *tdc; + + LEX_CUSTRING tabledef_version; + + engine_option_value *option_list; /* text options for table */ + ha_table_option_struct *option_struct; /* structure with parsed options */ + + /* The following is copied to each TABLE on OPEN */ + Field **field; + Field **found_next_number_field; + KEY *key_info; /* data of keys in database */ + Virtual_column_info **check_constraints; + uint *blob_field; /* Index to blobs in Field arrray*/ + LEX_CUSTRING vcol_defs; /* definitions of generated columns */ + + /* + EITS statistics data from the last time the table was opened or ANALYZE + table was run. + This is typically same as any related TABLE::stats_cb until ANALYZE + table is run. + This pointer is only to be de-referenced under LOCK_share as the + pointer can change by another thread running ANALYZE TABLE. + Without using a LOCK_share one can check if the statistics has been + updated by checking if TABLE::stats_cb != TABLE_SHARE::stats_cb. + */ + TABLE_STATISTICS_CB *stats_cb; + + uchar *default_values; /* row with default values */ + LEX_CSTRING comment; /* Comment about table */ + CHARSET_INFO *table_charset; /* Default charset of string fields */ + + MY_BITMAP *check_set; /* Fields used by check constrant */ + MY_BITMAP all_set; + /* + Key which is used for looking-up table in table cache and in the list + of thread's temporary tables. Has the form of: + "database_name\0table_name\0" + optional part for temporary tables. + + Note that all three 'table_cache_key', 'db' and 'table_name' members + must be set (and be non-zero) for tables in table cache. They also + should correspond to each other. + To ensure this one can use set_table_cache() methods. + */ + LEX_CSTRING table_cache_key; + LEX_CSTRING db; /* Pointer to db */ + LEX_CSTRING table_name; /* Table name (for open) */ + LEX_CSTRING path; /* Path to .frm file (from datadir) */ + LEX_CSTRING normalized_path; /* unpack_filename(path) */ + LEX_CSTRING connect_string; + + /* + Set of keys in use, implemented as a Bitmap. + Excludes keys disabled by ALTER TABLE ... DISABLE KEYS. + */ + key_map keys_in_use; + + /* The set of ignored indexes for a table. */ + key_map ignored_indexes; + + key_map keys_for_keyread; + ha_rows min_rows, max_rows; /* create information */ + ulong avg_row_length; /* create information */ + ulong mysql_version; /* 0 if .frm is created before 5.0 */ + ulong reclength; /* Recordlength */ + /* Stored record length. No generated-only virtual fields are included */ + ulong stored_rec_length; + + plugin_ref db_plugin; /* storage engine plugin */ + inline handlerton *db_type() const /* table_type for handler */ + { + return is_view ? view_pseudo_hton : + db_plugin ? plugin_hton(db_plugin) : NULL; + } + enum row_type row_type; /* How rows are stored */ + enum Table_type table_type; + enum tmp_table_type tmp_table; + + /** Transactional or not. */ + enum ha_choice transactional; + /** Per-page checksums or not. */ + enum ha_choice page_checksum; + + uint key_block_size; /* create key_block_size, if used */ + uint stats_sample_pages; /* number of pages to sample during + stats estimation, if used, otherwise 0. */ + enum_stats_auto_recalc stats_auto_recalc; /* Automatic recalc of stats. */ + uint null_bytes, last_null_bit_pos; + /* + Same as null_bytes, except that if there is only a 'delete-marker' in + the record then this value is 0. + */ + uint null_bytes_for_compare; + uint fields; /* number of fields */ + /* number of stored fields, purely virtual not included */ + uint stored_fields; + uint virtual_fields; /* number of purely virtual fields */ + /* number of purely virtual not stored blobs */ + uint virtual_not_stored_blob_fields; + uint null_fields; /* number of null fields */ + uint blob_fields; /* number of blob fields */ + uint varchar_fields; /* number of varchar fields */ + uint default_fields; /* number of default fields */ + uint visible_fields; /* number of visible fields */ + + uint default_expressions; + uint table_check_constraints, field_check_constraints; + + uint rec_buff_length; /* Size of table->record[] buffer */ + uint keys, key_parts; + uint ext_key_parts; /* Total number of key parts in extended keys */ + uint max_key_length, max_unique_length; + uint uniques; /* Number of UNIQUE index */ + uint db_create_options; /* Create options from database */ + uint db_options_in_use; /* Options in use */ + uint db_record_offset; /* if HA_REC_IN_SEQ */ + uint rowid_field_offset; /* Field_nr +1 to rowid field */ + /* Primary key index number, used in TABLE::key_info[] */ + uint primary_key; + uint next_number_index; /* autoincrement key number */ + uint next_number_key_offset; /* autoinc keypart offset in a key */ + uint next_number_keypart; /* autoinc keypart number in a key */ + enum open_frm_error error; /* error from open_table_def() */ + uint open_errno; /* error from open_table_def() */ + uint column_bitmap_size; + uchar frm_version; + + enum enum_v_keys { NOT_INITIALIZED=0, NO_V_KEYS, V_KEYS }; + enum_v_keys check_set_initialized; + + bool use_ext_keys; /* Extended keys can be used */ + bool null_field_first; + bool system; /* Set if system table (one record) */ + bool not_usable_by_query_cache; + bool online_backup; /* Set if on-line backup supported */ + /* + This is used by log tables, for tables that have their own internal + binary logging or for tables that doesn't support statement or row logging + */ + bool no_replicate; + bool crashed; + bool is_view; + bool can_cmp_whole_record; + /* This is set for temporary tables where CREATE was binary logged */ + bool table_creation_was_logged; + bool non_determinstic_insert; + bool has_update_default_function; + bool can_do_row_logging; /* 1 if table supports RBR */ + bool long_unique_table; + /* 1 if frm version cannot be updated as part of upgrade */ + bool keep_original_mysql_version; + + ulong table_map_id; /* for row-based replication */ + + /* + Things that are incompatible between the stored version and the + current version. This is a set of HA_CREATE... bits that can be used + to modify create_info->used_fields for ALTER TABLE. + */ + ulong incompatible_version; + + /** + For shares representing views File_parser object with view + definition read from .FRM file. + */ + const File_parser *view_def; + + /* For sequence tables, the current sequence state */ + SEQUENCE *sequence; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* filled in when reading from frm */ + bool auto_partitioned; + char *partition_info_str; + uint partition_info_str_len; + uint partition_info_buffer_size; + plugin_ref default_part_plugin; +#endif + + /** + System versioning and application-time periods support. + */ + struct period_info_t + { + field_index_t start_fieldno; + field_index_t end_fieldno; + Lex_ident name; + Lex_ident constr_name; + uint unique_keys; + Field *start_field(TABLE_SHARE *s) const + { + return s->field[start_fieldno]; + } + Field *end_field(TABLE_SHARE *s) const + { + return s->field[end_fieldno]; + } + }; + + vers_kind_t versioned; + period_info_t vers; + period_info_t period; + /* + Protect multiple threads from repeating partition auto-create over + single share. + + TODO: remove it when partitioning metadata will be in TABLE_SHARE. + */ + bool vers_skip_auto_create; + + bool init_period_from_extra2(period_info_t *period, const uchar *data, + const uchar *end); + + Field *vers_start_field() + { + DBUG_ASSERT(versioned); + return field[vers.start_fieldno]; + } + + Field *vers_end_field() + { + DBUG_ASSERT(versioned); + return field[vers.end_fieldno]; + } + + Field *period_start_field() const + { + DBUG_ASSERT(period.name); + return field[period.start_fieldno]; + } + + Field *period_end_field() const + { + DBUG_ASSERT(period.name); + return field[period.end_fieldno]; + } + + /** + Cache the checked structure of this table. + + The pointer data is used to describe the structure that + a instance of the table must have. Each element of the + array specifies a field that must exist on the table. + + The pointer is cached in order to perform the check only + once -- when the table is loaded from the disk. + */ + const TABLE_FIELD_DEF *table_field_def_cache; + + /** Main handler's share */ + Handler_share *ha_share; + + /** Instrumentation for this table share. */ + PSI_table_share *m_psi; + + inline void reset() { bzero((void*)this, sizeof(*this)); } + + /* + Set share's table cache key and update its db and table name appropriately. + + SYNOPSIS + set_table_cache_key() + key_buff Buffer with already built table cache key to be + referenced from share. + key_length Key length. + + NOTES + Since 'key_buff' buffer will be referenced from share it should has same + life-time as share itself. + This method automatically ensures that TABLE_SHARE::table_name/db have + appropriate values by using table cache key as their source. + */ + + void set_table_cache_key(char *key_buff, uint key_length) + { + table_cache_key.str= key_buff; + table_cache_key.length= key_length; + /* + Let us use the fact that the key is "db/0/table_name/0" + optional + part for temporary tables. + */ + db.str= table_cache_key.str; + db.length= strlen(db.str); + table_name.str= db.str + db.length + 1; + table_name.length= strlen(table_name.str); + } + + + /* + Set share's table cache key and update its db and table name appropriately. + + SYNOPSIS + set_table_cache_key() + key_buff Buffer to be used as storage for table cache key + (should be at least key_length bytes). + key Value for table cache key. + key_length Key length. + + NOTE + Since 'key_buff' buffer will be used as storage for table cache key + it should has same life-time as share itself. + */ + + void set_table_cache_key(char *key_buff, const char *key, uint key_length) + { + memcpy(key_buff, key, key_length); + set_table_cache_key(key_buff, key_length); + } + + inline bool require_write_privileges() + { + return (table_category == TABLE_CATEGORY_LOG); + } + + inline ulong get_table_def_version() + { + return table_map_id; + } + + /** + Convert unrelated members of TABLE_SHARE to one enum + representing its type. + + @todo perhaps we need to have a member instead of a function. + */ + enum enum_table_ref_type get_table_ref_type() const + { + if (is_view) + return TABLE_REF_VIEW; + switch (tmp_table) { + case NO_TMP_TABLE: + return TABLE_REF_BASE_TABLE; + case SYSTEM_TMP_TABLE: + return TABLE_REF_I_S_TABLE; + default: + return TABLE_REF_TMP_TABLE; + } + } + /** + Return a table metadata version. + * for base tables and views, we return table_map_id. + It is assigned from a global counter incremented for each + new table loaded into the table definition cache (TDC). + * for temporary tables it's table_map_id again. But for + temporary tables table_map_id is assigned from + thd->query_id. The latter is assigned from a thread local + counter incremented for every new SQL statement. Since + temporary tables are thread-local, each temporary table + gets a unique id. + * for everything else (e.g. information schema tables), + the version id is zero. + + This choice of version id is a large compromise + to have a working prepared statement validation in 5.1. In + future version ids will be persistent, as described in WL#4180. + + Let's try to explain why and how this limited solution allows + to validate prepared statements. + + Firstly, sets (in mathematical sense) of version numbers + never intersect for different table types. Therefore, + version id of a temporary table is never compared with + a version id of a view, and vice versa. + + Secondly, for base tables and views, we know that each DDL flushes + the respective share from the TDC. This ensures that whenever + a table is altered or dropped and recreated, it gets a new + version id. + Unfortunately, since elements of the TDC are also flushed on + LRU basis, this choice of version ids leads to false positives. + E.g. when the TDC size is too small, we may have a SELECT + * FROM INFORMATION_SCHEMA.TABLES flush all its elements, which + in turn will lead to a validation error and a subsequent + reprepare of all prepared statements. This is + considered acceptable, since as long as prepared statements are + automatically reprepared, spurious invalidation is only + a performance hit. Besides, no better simple solution exists. + + For temporary tables, using thd->query_id ensures that if + a temporary table was altered or recreated, a new version id is + assigned. This suits validation needs very well and will perhaps + never change. + + Metadata of information schema tables never changes. + Thus we can safely assume 0 for a good enough version id. + + Finally, by taking into account table type, we always + track that a change has taken place when a view is replaced + with a base table, a base table is replaced with a temporary + table and so on. + + @sa TABLE_LIST::is_the_same_definition() + */ + ulong get_table_ref_version() const + { + return (tmp_table == SYSTEM_TMP_TABLE) ? 0 : table_map_id; + } + + bool visit_subgraph(Wait_for_flush *waiting_ticket, + MDL_wait_for_graph_visitor *gvisitor); + + bool wait_for_old_version(THD *thd, struct timespec *abstime, + uint deadlock_weight); + /** Release resources and free memory occupied by the table share. */ + void destroy(); + + void set_use_ext_keys_flag(bool fl) + { + use_ext_keys= fl; + } + + uint actual_n_key_parts(THD *thd); + + LEX_CUSTRING *frm_image; ///< only during CREATE TABLE (@sa ha_create_table) + + /* + populates TABLE_SHARE from the table description in the binary frm image. + if 'write' is true, this frm image is also written into a corresponding + frm file, that serves as a persistent metadata cache to avoid + discovering the table over and over again + */ + int init_from_binary_frm_image(THD *thd, bool write, + const uchar *frm_image, size_t frm_length, + const uchar *par_image=0, + size_t par_length=0); + + /* + populates TABLE_SHARE from the table description, specified as the + complete CREATE TABLE sql statement. + if 'write' is true, this frm image is also written into a corresponding + frm file, that serves as a persistent metadata cache to avoid + discovering the table over and over again + */ + int init_from_sql_statement_string(THD *thd, bool write, + const char *sql, size_t sql_length); + /* + writes the frm image to an frm file, corresponding to this table + */ + bool write_frm_image(const uchar *frm_image, size_t frm_length); + bool write_par_image(const uchar *par_image, size_t par_length); + + /* Only used by S3 */ + bool write_frm_image(void) + { return frm_image ? write_frm_image(frm_image->str, frm_image->length) : 0; } + + /* + returns an frm image for this table. + the memory is allocated and must be freed later + */ + bool read_frm_image(const uchar **frm_image, size_t *frm_length); + + /* frees the memory allocated in read_frm_image */ + void free_frm_image(const uchar *frm); + + void set_overlapped_keys(); + void set_ignored_indexes(); + key_map usable_indexes(THD *thd); + bool old_long_hash_function() const + { + return mysql_version < 100428 || + (mysql_version >= 100500 && mysql_version < 100519) || + (mysql_version >= 100600 && mysql_version < 100612) || + (mysql_version >= 100700 && mysql_version < 100708) || + (mysql_version >= 100800 && mysql_version < 100807) || + (mysql_version >= 100900 && mysql_version < 100905) || + (mysql_version >= 101000 && mysql_version < 101003) || + (mysql_version >= 101100 && mysql_version < 101102); + } + Item_func_hash *make_long_hash_func(THD *thd, + MEM_ROOT *mem_root, + List *field_list) const; + void update_engine_independent_stats(TABLE_STATISTICS_CB *stat); + bool histograms_exists(); +}; + +/* not NULL, but cannot be dereferenced */ +#define UNUSABLE_TABLE_SHARE ((TABLE_SHARE*)1) + +/** + Class is used as a BLOB field value storage for + intermediate GROUP_CONCAT results. Used only for + GROUP_CONCAT with DISTINCT or ORDER BY options. + */ + +class Blob_mem_storage: public Sql_alloc +{ +private: + MEM_ROOT storage; + /** + Sign that some values were cut + during saving into the storage. + */ + bool truncated_value; +public: + Blob_mem_storage() :truncated_value(false) + { + init_alloc_root(key_memory_blob_mem_storage, + &storage, MAX_FIELD_VARCHARLENGTH, 0, MYF(0)); + } + ~ Blob_mem_storage() + { + free_root(&storage, MYF(0)); + } + void reset() + { + free_root(&storage, MYF(MY_MARK_BLOCKS_FREE)); + truncated_value= false; + } + /** + Fuction creates duplicate of 'from' + string in 'storage' MEM_ROOT. + + @param from string to copy + @param length string length + + @retval Pointer to the copied string. + @retval 0 if an error occurred. + */ + char *store(const char *from, size_t length) + { + return (char*) memdup_root(&storage, from, length); + } + void set_truncated_value(bool is_truncated_value) + { + truncated_value= is_truncated_value; + } + bool is_truncated_value() { return truncated_value; } +}; + + +/* Information for one open table */ +enum index_hint_type +{ + INDEX_HINT_IGNORE, + INDEX_HINT_USE, + INDEX_HINT_FORCE +}; + +struct st_cond_statistic; + +#define CHECK_ROW_FOR_NULLS_TO_REJECT (1 << 0) +#define REJECT_ROW_DUE_TO_NULL_FIELDS (1 << 1) + +class SplM_opt_info; + +struct vers_select_conds_t; + +struct TABLE +{ + TABLE() = default; /* Remove gcc warning */ + + TABLE_SHARE *s; + handler *file; + TABLE *next, *prev; + +private: + /** + Links for the list of all TABLE objects for this share. + Declared as private to avoid direct manipulation with those objects. + One should use methods of I_P_List template instead. + */ + TABLE *share_all_next, **share_all_prev; + TABLE *global_free_next, **global_free_prev; + friend struct All_share_tables; + friend struct Table_cache_instance; + +public: + + uint32 instance; /** Table cache instance this TABLE is belonging to */ + THD *in_use; /* Which thread uses this */ + + uchar *record[3]; /* Pointer to records */ + uchar *write_row_record; /* Used as optimisation in + THD::write_row */ + uchar *insert_values; /* used by INSERT ... UPDATE */ + /* + Map of keys that can be used to retrieve all data from this table + needed by the query without reading the row. + */ + key_map covering_keys, intersect_keys; + /* + A set of keys that can be used in the query that references this + table. + + All indexes disabled on the table's TABLE_SHARE (see TABLE::s) will be + subtracted from this set upon instantiation. Thus for any TABLE t it holds + that t.keys_in_use_for_query is a subset of t.s.keys_in_use. Generally we + must not introduce any new keys here (see setup_tables). + + The set is implemented as a bitmap. + */ + key_map keys_in_use_for_query; + /* Map of keys that can be used to calculate GROUP BY without sorting */ + key_map keys_in_use_for_group_by; + /* Map of keys that can be used to calculate ORDER BY without sorting */ + key_map keys_in_use_for_order_by; + /* Map of keys dependent on some constraint */ + key_map constraint_dependent_keys; + KEY *key_info; /* data of keys in database */ + + Field **field; /* Pointer to fields */ + Field **vfield; /* Pointer to virtual fields*/ + Field **default_field; /* Fields with non-constant DEFAULT */ + Field *next_number_field; /* Set if next_number is activated */ + Field *found_next_number_field; /* Set on open */ + Virtual_column_info **check_constraints; + + /* Table's triggers, 0 if there are no of them */ + Table_triggers_list *triggers; + TABLE_LIST *pos_in_table_list;/* Element referring to this table */ + /* Position in thd->locked_table_list under LOCK TABLES */ + TABLE_LIST *pos_in_locked_tables; + /* Tables used in DEFAULT and CHECK CONSTRAINT (normally sequence tables) */ + TABLE_LIST *internal_tables; + + /* + Not-null for temporary tables only. Non-null values means this table is + used to compute GROUP BY, it has a unique of GROUP BY columns. + (set by create_tmp_table) + */ + ORDER *group; + String alias; /* alias or table name */ + uchar *null_flags; + MY_BITMAP def_read_set, def_write_set, tmp_set; + MY_BITMAP def_rpl_write_set; + MY_BITMAP eq_join_set; /* used to mark equi-joined fields */ + MY_BITMAP cond_set; /* used to mark fields from sargable conditions*/ + /* Active column sets */ + MY_BITMAP *read_set, *write_set, *rpl_write_set; + /* On INSERT: fields that the user specified a value for */ + MY_BITMAP has_value_set; + + /* + The ID of the query that opened and is using this table. Has different + meanings depending on the table type. + + Temporary tables: + + table->query_id is set to thd->query_id for the duration of a statement + and is reset to 0 once it is closed by the same statement. A non-zero + table->query_id means that a statement is using the table even if it's + not the current statement (table is in use by some outer statement). + + Non-temporary tables: + + Under pre-locked or LOCK TABLES mode: query_id is set to thd->query_id + for the duration of a statement and is reset to 0 once it is closed by + the same statement. A non-zero query_id is used to control which tables + in the list of pre-opened and locked tables are actually being used. + */ + query_id_t query_id; + + /* + This structure is used for statistical data on the table that + is collected by the function collect_statistics_for_table + */ + Table_statistics *collected_stats; + + /* The estimate of the number of records in the table used by optimizer */ + ha_rows used_stat_records; + + key_map opt_range_keys; + /* + The following structure is filled for each key that has + opt_range_keys.is_set(key) == TRUE + */ + struct OPT_RANGE + { + uint key_parts; + uint ranges; + ha_rows rows; + double cost; + /* + If there is a range access by i-th index then the cost of + index only access for it is stored in index_only_costs[i] + */ + double index_only_cost; + } *opt_range; + /* + Bitmaps of key parts that =const for the duration of join execution. If + we're in a subquery, then the constant may be different across subquery + re-executions. + */ + key_part_map *const_key_parts; + + /* + Estimate of number of records that satisfy SARGable part of the table + condition, or table->file->records if no SARGable condition could be + constructed. + This value is used by join optimizer as an estimate of number of records + that will pass the table condition (condition that depends on fields of + this table and constants) + */ + ha_rows opt_range_condition_rows; + + double cond_selectivity; + List *cond_selectivity_sampling_explain; + + table_map map; /* ID bit of table (1,2,4,8,16...) */ + + uint lock_position; /* Position in MYSQL_LOCK.table */ + uint lock_data_start; /* Start pos. in MYSQL_LOCK.locks */ + uint lock_count; /* Number of locks */ + uint tablenr,used_fields; + uint temp_pool_slot; /* Used by intern temp tables */ + uint status; /* What's in record[0] */ + uint db_stat; /* mode of file as in handler.h */ + /* number of select if it is derived table */ + uint derived_select_number; + /* + Possible values: + - 0 by default + - JOIN_TYPE_{LEFT|RIGHT} if the table is inner w.r.t an outer join + operation + - 1 if the SELECT has mixed_implicit_grouping=1. example: + select max(col1), col2 from t1. In this case, the query produces + one row with all columns having NULL values. + + Interpetation: If maybe_null!=0, all fields of the table are considered + NULLable (and have NULL values when null_row=true) + */ + uint maybe_null; + int current_lock; /* Type of lock on table */ + bool copy_blobs; /* copy_blobs when storing */ + /* + Set if next_number_field is in the UPDATE fields of INSERT ... ON DUPLICATE + KEY UPDATE. + */ + bool next_number_field_updated; + + /* + If true, the current table row is considered to have all columns set to + NULL, including columns declared as "not null" (see maybe_null). + */ + bool null_row; + /* + No rows that contain null values can be placed into this table. + Currently this flag can be set to true only for a temporary table + that used to store the result of materialization of a subquery. + */ + bool no_rows_with_nulls; + /* + This field can contain two bit flags: + CHECK_ROW_FOR_NULLS_TO_REJECT + REJECT_ROW_DUE_TO_NULL_FIELDS + The first flag is set for the dynamic contexts where it is prohibited + to write any null into the table. + The second flag is set only if the first flag is set on. + The informs the outer scope that there was an attept to write null + into a field of the table in the context where it is prohibited. + This flag should be set off as soon as the first flag is set on. + Currently these flags are used only the tables tno_rows_with_nulls set + to true. + */ + uint8 null_catch_flags; + + /* + TODO: Each of the following flags take up 8 bits. They can just as easily + be put into one single unsigned long and instead of taking up 18 + bytes, it would take up 4. + */ + bool force_index; + + /** + Flag set when the statement contains FORCE INDEX FOR ORDER BY + See TABLE_LIST::process_index_hints(). + */ + bool force_index_order; + + /** + Flag set when the statement contains FORCE INDEX FOR GROUP BY + See TABLE_LIST::process_index_hints(). + */ + bool force_index_group; + /* + TRUE<=> this table was created with create_tmp_table(... distinct=TRUE..) + call + */ + bool distinct; + bool const_table,no_rows, used_for_duplicate_elimination; + /** + Forces DYNAMIC Aria row format for internal temporary tables. + */ + bool keep_row_order; + + bool no_keyread; + /** + If set, indicate that the table is not replicated by the server. + */ + bool locked_by_logger; + bool locked_by_name; + bool fulltext_searched; + bool no_cache; + /* To signal that the table is associated with a HANDLER statement */ + bool open_by_handler; + /* + To indicate that a non-null value of the auto_increment field + was provided by the user or retrieved from the current record. + Used only in the MODE_NO_AUTO_VALUE_ON_ZERO mode. + */ + bool auto_increment_field_not_null; + bool insert_or_update; /* Can be used by the handler */ + /* + NOTE: alias_name_used is only a hint! It works only in need_correct_ident() + condition. On other cases it is FALSE even if table_name is alias. + + E.g. in update t1 as x set a = 1 + */ + bool alias_name_used; /* true if table_name is alias */ + bool get_fields_in_item_tree; /* Signal to fix_field */ + List vcol_refix_list; +private: + bool m_needs_reopen; + bool created; /* For tmp tables. TRUE <=> tmp table was actually created.*/ +public: +#ifdef HAVE_REPLICATION + /* used in RBR Triggers */ + bool master_had_triggers; +#endif + + REGINFO reginfo; /* field connections */ + MEM_ROOT mem_root; + /** + Initialized in Item_func_group_concat::setup for appropriate + temporary table if GROUP_CONCAT is used with ORDER BY | DISTINCT + and BLOB field count > 0. + */ + Blob_mem_storage *blob_storage; + GRANT_INFO grant; + /* + The arena which the items for expressions from the table definition + are associated with. + Currently only the items of the expressions for virtual columns are + associated with this arena. + TODO: To attach the partitioning expressions to this arena. + */ + Query_arena *expr_arena; +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; /* Partition related information */ + /* If true, all partitions have been pruned away */ + bool all_partitions_pruned_away; +#endif + uint max_keys; /* Size of allocated key_info array. */ + bool stats_is_read; /* Persistent statistics is read for the table */ + bool histograms_are_read; + MDL_ticket *mdl_ticket; + + /* + This is used only for potentially splittable materialized tables and it + points to the info used by the optimizer to apply splitting optimization + */ + SplM_opt_info *spl_opt_info; + key_map keys_usable_for_splitting; + + /* + Conjunction of the predicates of the form IS NOT NULL(f) where f refers to + a column of this TABLE such that they can be inferred from the condition + of the WHERE clause or from some ON expression of the processed select + and can be useful for range optimizer. + */ + Item *notnull_cond; + TABLE_STATISTICS_CB *stats_cb; + + inline void reset() { bzero((void*)this, sizeof(*this)); } + void init(THD *thd, TABLE_LIST *tl); + bool fill_item_list(List *item_list) const; + void reset_item_list(List *item_list, uint skip) const; + void clear_column_bitmaps(void); + void prepare_for_position(void); + MY_BITMAP *prepare_for_keyread(uint index, MY_BITMAP *map); + MY_BITMAP *prepare_for_keyread(uint index) + { return prepare_for_keyread(index, &tmp_set); } + void mark_index_columns(uint index, MY_BITMAP *bitmap); + void mark_index_columns_no_reset(uint index, MY_BITMAP *bitmap); + void mark_index_columns_for_read(uint index); + void restore_column_maps_after_keyread(MY_BITMAP *backup); + void mark_auto_increment_column(void); + void mark_columns_needed_for_update(void); + void mark_columns_needed_for_delete(void); + void mark_columns_needed_for_insert(void); + void mark_columns_per_binlog_row_image(void); + inline bool mark_column_with_deps(Field *field); + inline bool mark_virtual_column_with_deps(Field *field); + inline void mark_virtual_column_deps(Field *field); + bool mark_virtual_columns_for_write(bool insert_fl); + bool check_virtual_columns_marked_for_read(); + bool check_virtual_columns_marked_for_write(); + void mark_default_fields_for_write(bool insert_fl); + void mark_columns_used_by_virtual_fields(void); + void mark_check_constraint_columns_for_read(void); + int verify_constraints(bool ignore_failure); + void free_engine_stats(); + void update_engine_independent_stats(); + inline void column_bitmaps_set(MY_BITMAP *read_set_arg) + { + read_set= read_set_arg; + if (file) + file->column_bitmaps_signal(); + } + inline void column_bitmaps_set(MY_BITMAP *read_set_arg, + MY_BITMAP *write_set_arg) + { + read_set= read_set_arg; + write_set= write_set_arg; + if (file) + file->column_bitmaps_signal(); + } + inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg, + MY_BITMAP *write_set_arg) + { + read_set= read_set_arg; + write_set= write_set_arg; + } + inline void use_all_columns() + { + column_bitmaps_set(&s->all_set, &s->all_set); + } + inline void use_all_stored_columns(); + inline void default_column_bitmaps() + { + read_set= &def_read_set; + write_set= &def_write_set; + rpl_write_set= 0; + } + /** Should this instance of the table be reopened? */ + inline bool needs_reopen() + { return !db_stat || m_needs_reopen; } + /* + Mark that all current connection instances of the table should be + reopen at end of statement + */ + void mark_table_for_reopen(); + /* Should only be called from Locked_tables_list::mark_table_for_reopen() */ + void internal_set_needs_reopen(bool value) + { + m_needs_reopen= value; + } + + bool init_expr_arena(MEM_ROOT *mem_root); + + bool alloc_keys(uint key_count); + bool check_tmp_key(uint key, uint key_parts, + uint (*next_field_no) (uchar *), uchar *arg); + bool add_tmp_key(uint key, uint key_parts, + uint (*next_field_no) (uchar *), uchar *arg, + bool unique); + void create_key_part_by_field(KEY_PART_INFO *key_part_info, + Field *field, uint fieldnr); + void use_index(int key_to_save); + void set_table_map(table_map map_arg, uint tablenr_arg) + { + map= map_arg; + tablenr= tablenr_arg; + } + + /// Return true if table is instantiated, and false otherwise. + bool is_created() const + { + DBUG_ASSERT(!created || file != 0); + return created; + } + + /** + Set the table as "created", and enable flags in storage engine + that could not be enabled without an instantiated table. + */ + void set_created() + { + if (created) + return; + if (file->keyread_enabled()) + file->extra(HA_EXTRA_KEYREAD); + created= true; + } + + void reset_created() + { + created= 0; + } + + /* + Returns TRUE if the table is filled at execution phase (and so, the + optimizer must not do anything that depends on the contents of the table, + like range analysis or constant table detection) + */ + bool is_filled_at_execution(); + + bool update_const_key_parts(COND *conds); + + inline void initialize_opt_range_structures(); + + my_ptrdiff_t default_values_offset() const + { return (my_ptrdiff_t) (s->default_values - record[0]); } + + void move_fields(Field **ptr, const uchar *to, const uchar *from); + void remember_blob_values(String *blob_storage); + void restore_blob_values(String *blob_storage); + + uint actual_n_key_parts(KEY *keyinfo); + ulong actual_key_flags(KEY *keyinfo); + int update_virtual_field(Field *vf, bool ignore_warnings); + int update_virtual_fields(handler *h, enum_vcol_update_mode update_mode); + int update_default_fields(bool ignore_errors); + void evaluate_update_default_function(); + void reset_default_fields(); + inline ha_rows stat_records() { return used_stat_records; } + + void prepare_triggers_for_insert_stmt_or_event(); + bool prepare_triggers_for_delete_stmt_or_event(); + bool prepare_triggers_for_update_stmt_or_event(); + + Field **field_to_fill(); + bool validate_default_values_of_unset_fields(THD *thd) const; + + // Check if the value list is assignable to the explicit field list + static bool check_assignability_explicit_fields(List fields, + List values, + bool ignore); + // Check if the value list is assignable to all visible fields + bool check_assignability_all_visible_fields(List &values, + bool ignore) const; + /* + Check if the value list is assignable to: + - The explicit field list if fields.elements > 0, e.g. + INSERT INTO t1 (a,b) VALUES (1,2); + - All visible fields, if fields.elements==0, e.g. + INSERT INTO t1 VALUES (1,2); + */ + bool check_assignability_opt_fields(List fields, + List values, + bool ignore) const + { + DBUG_ASSERT(values.elements); + return fields.elements ? + check_assignability_explicit_fields(fields, values, ignore) : + check_assignability_all_visible_fields(values, ignore); + } + + bool insert_all_rows_into_tmp_table(THD *thd, + TABLE *tmp_table, + TMP_TABLE_PARAM *tmp_table_param, + bool with_cleanup); + bool vcol_fix_expr(THD *thd); + bool vcol_cleanup_expr(THD *thd); + Field *find_field_by_name(LEX_CSTRING *str) const; + bool export_structure(THD *thd, class Row_definition_list *defs); + bool is_splittable() { return spl_opt_info != NULL; } + void set_spl_opt_info(SplM_opt_info *spl_info); + void deny_splitting(); + double get_materialization_cost(); // Now used only if is_splittable()==true + void add_splitting_info_for_key_field(struct KEY_FIELD *key_field); + + key_map with_impossible_ranges; + + /* Number of cost info elements for possible range filters */ + uint range_rowid_filter_cost_info_elems; + /* Pointer to the array of cost info elements for range filters */ + Range_rowid_filter_cost_info *range_rowid_filter_cost_info; + /* The array of pointers to cost info elements for range filters */ + Range_rowid_filter_cost_info **range_rowid_filter_cost_info_ptr; + + void init_cost_info_for_usable_range_rowid_filters(THD *thd); + void prune_range_rowid_filters(); + void trace_range_rowid_filters(THD *thd) const; + Range_rowid_filter_cost_info * + best_range_rowid_filter_for_partial_join(uint access_key_no, + double records, + double access_cost_factor); + + /** + System Versioning support + */ + bool vers_write; + + bool versioned() const + { + return s->versioned; + } + + bool versioned(vers_kind_t type) const + { + DBUG_ASSERT(type); + return s->versioned == type; + } + + bool versioned_write() const + { + DBUG_ASSERT(versioned() || !vers_write); + return versioned() ? vers_write : false; + } + + bool versioned_write(vers_kind_t type) const + { + DBUG_ASSERT(type); + DBUG_ASSERT(versioned() || !vers_write); + return versioned(type) ? vers_write : false; + } + + Field *vers_start_field() const + { + DBUG_ASSERT(s->versioned); + return field[s->vers.start_fieldno]; + } + + Field *vers_end_field() const + { + DBUG_ASSERT(s->versioned); + return field[s->vers.end_fieldno]; + } + + Field *period_start_field() const + { + DBUG_ASSERT(s->period.name); + return field[s->period.start_fieldno]; + } + + Field *period_end_field() const + { + DBUG_ASSERT(s->period.name); + return field[s->period.end_fieldno]; + } + + + ulonglong vers_start_id() const; + ulonglong vers_end_id() const; +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool vers_switch_partition(THD *thd, TABLE_LIST *table_list, + Open_table_context *ot_ctx); +#endif + + int update_generated_fields(); + int period_make_insert(Item *src, Field *dst); + int insert_portion_of_time(THD *thd, const vers_select_conds_t &period_conds, + ha_rows *rows_inserted); + bool vers_check_update(List &items); + static bool check_period_overlaps(const KEY &key, const uchar *lhs, const uchar *rhs); + int delete_row(); + /* Used in majority of DML (called from fill_record()) */ + bool vers_update_fields(); + /* Used in DELETE, DUP REPLACE and insert history row */ + void vers_update_end(); + void find_constraint_correlated_indexes(); + +/** Number of additional fields used in versioned tables */ +#define VERSIONING_FIELDS 2 +}; + + +/** + Helper class which specifies which members of TABLE are used for + participation in the list of used/unused TABLE objects for the share. +*/ + +struct TABLE_share +{ + static inline TABLE **next_ptr(TABLE *l) + { + return &l->next; + } + static inline TABLE ***prev_ptr(TABLE *l) + { + return (TABLE ***) &l->prev; + } +}; + +struct All_share_tables +{ + static inline TABLE **next_ptr(TABLE *l) + { + return &l->share_all_next; + } + static inline TABLE ***prev_ptr(TABLE *l) + { + return &l->share_all_prev; + } +}; + +typedef I_P_List All_share_tables_list; + +enum enum_schema_table_state +{ + NOT_PROCESSED= 0, + PROCESSED_BY_CREATE_SORT_INDEX, + PROCESSED_BY_JOIN_EXEC +}; + +enum enum_fk_option { FK_OPTION_UNDEF, FK_OPTION_RESTRICT, FK_OPTION_NO_ACTION, + FK_OPTION_CASCADE, FK_OPTION_SET_NULL, FK_OPTION_SET_DEFAULT }; + +typedef struct st_foreign_key_info +{ + LEX_CSTRING *foreign_id; + LEX_CSTRING *foreign_db; + LEX_CSTRING *foreign_table; + LEX_CSTRING *referenced_db; + LEX_CSTRING *referenced_table; + enum_fk_option update_method; + enum_fk_option delete_method; + LEX_CSTRING *referenced_key_name; + List foreign_fields; + List referenced_fields; +} FOREIGN_KEY_INFO; + +LEX_CSTRING *fk_option_name(enum_fk_option opt); +static inline bool fk_modifies_child(enum_fk_option opt) +{ + return opt >= FK_OPTION_CASCADE; +} + + +class IS_table_read_plan; + +/* + Types of derived tables. The ending part is a bitmap of phases that are + applicable to a derived table of the type. +*/ +#define DTYPE_ALGORITHM_UNDEFINED 0U +#define DTYPE_VIEW 1U +#define DTYPE_TABLE 2U +#define DTYPE_MERGE 4U +#define DTYPE_MATERIALIZE 8U +#define DTYPE_MULTITABLE 16U +#define DTYPE_MASK (DTYPE_VIEW|DTYPE_TABLE|DTYPE_MULTITABLE) + +/* + Phases of derived tables/views handling, see sql_derived.cc + Values are used as parts of a bitmap attached to derived table types. +*/ +#define DT_INIT 1U +#define DT_PREPARE 2U +#define DT_OPTIMIZE 4U +#define DT_MERGE 8U +#define DT_MERGE_FOR_INSERT 16U +#define DT_CREATE 32U +#define DT_FILL 64U +#define DT_REINIT 128U +#define DT_PHASES 8U +/* Phases that are applicable to all derived tables. */ +#define DT_COMMON (DT_INIT + DT_PREPARE + DT_REINIT + DT_OPTIMIZE) +/* Phases that are applicable only to materialized derived tables. */ +#define DT_MATERIALIZE (DT_CREATE + DT_FILL) + +#define DT_PHASES_MERGE (DT_COMMON | DT_MERGE | DT_MERGE_FOR_INSERT) +#define DT_PHASES_MATERIALIZE (DT_COMMON | DT_MATERIALIZE) + +#define VIEW_ALGORITHM_UNDEFINED 0 +/* Special value for ALTER VIEW: inherit original algorithm. */ +#define VIEW_ALGORITHM_INHERIT DTYPE_VIEW +#define VIEW_ALGORITHM_MERGE (DTYPE_VIEW | DTYPE_MERGE) +#define VIEW_ALGORITHM_TMPTABLE (DTYPE_VIEW | DTYPE_MATERIALIZE) + +/* + View algorithm values as stored in the FRM. Values differ from in-memory + representation for backward compatibility. +*/ + +#define VIEW_ALGORITHM_UNDEFINED_FRM 0U +#define VIEW_ALGORITHM_MERGE_FRM 1U +#define VIEW_ALGORITHM_TMPTABLE_FRM 2U + +#define JOIN_TYPE_LEFT 1U +#define JOIN_TYPE_RIGHT 2U +#define JOIN_TYPE_OUTER 4U /* Marker that this is an outer join */ + +/* view WITH CHECK OPTION parameter options */ +#define VIEW_CHECK_NONE 0 +#define VIEW_CHECK_LOCAL 1 +#define VIEW_CHECK_CASCADED 2 + +/* result of view WITH CHECK OPTION parameter check */ +#define VIEW_CHECK_OK 0 +#define VIEW_CHECK_ERROR 1 +#define VIEW_CHECK_SKIP 2 + +/** The threshold size a blob field buffer before it is freed */ +#define MAX_TDC_BLOB_SIZE 65536 + +/** number of bytes used by field positional indexes in frm */ +constexpr uint frm_fieldno_size= 2; +/** number of bytes used by key position number in frm */ +constexpr uint frm_keyno_size= 2; +static inline field_index_t read_frm_fieldno(const uchar *data) +{ return uint2korr(data); } +static inline void store_frm_fieldno(uchar *data, field_index_t fieldno) +{ int2store(data, fieldno); } +static inline uint16 read_frm_keyno(const uchar *data) +{ return uint2korr(data); } +static inline void store_frm_keyno(uchar *data, uint16 keyno) +{ int2store(data, keyno); } +static inline size_t extra2_str_size(size_t len) +{ return (len > 255 ? 3 : 1) + len; } + +class select_unit; +class TMP_TABLE_PARAM; + +Item *create_view_field(THD *thd, TABLE_LIST *view, Item **field_ref, + LEX_CSTRING *name); + +struct Field_translator +{ + Item *item; + LEX_CSTRING name; +}; + + +/* + Column reference of a NATURAL/USING join. Since column references in + joins can be both from views and stored tables, may point to either a + Field (for tables), or a Field_translator (for views). +*/ + +class Natural_join_column: public Sql_alloc +{ +public: + Field_translator *view_field; /* Column reference of merge view. */ + Item_field *table_field; /* Column reference of table or temp view. */ + TABLE_LIST *table_ref; /* Original base table/view reference. */ + /* + True if a common join column of two NATURAL/USING join operands. Notice + that when we have a hierarchy of nested NATURAL/USING joins, a column can + be common at some level of nesting but it may not be common at higher + levels of nesting. Thus this flag may change depending on at which level + we are looking at some column. + */ + bool is_common; +public: + Natural_join_column(Field_translator *field_param, TABLE_LIST *tab); + Natural_join_column(Item_field *field_param, TABLE_LIST *tab); + LEX_CSTRING *name(); + Item *create_item(THD *thd); + Field *field(); + const char *safe_table_name(); + const char *safe_db_name(); + GRANT_INFO *grant(); +}; + + +/** + Type of table which can be open for an element of table list. +*/ + +enum enum_open_type +{ + OT_TEMPORARY_OR_BASE= 0, OT_TEMPORARY_ONLY, OT_BASE_ONLY +}; + + +class SJ_MATERIALIZATION_INFO; +class Index_hint; +class Item_in_subselect; + +/* trivial class, for %union in sql_yacc.yy */ +struct vers_history_point_t +{ + vers_kind_t unit; + Item *item; +}; + +class Vers_history_point : public vers_history_point_t +{ + void fix_item(); + +public: + Vers_history_point() { empty(); } + Vers_history_point(vers_kind_t unit_arg, Item *item_arg) + { + unit= unit_arg; + item= item_arg; + fix_item(); + } + Vers_history_point(vers_history_point_t p) + { + unit= p.unit; + item= p.item; + fix_item(); + } + void empty() { unit= VERS_TIMESTAMP; item= NULL; } + void print(String *str, enum_query_type, const char *prefix, size_t plen) const; + bool check_unit(THD *thd); + void bad_expression_data_type_error(const char *type) const; + bool eq(const vers_history_point_t &point) const; +}; + +struct vers_select_conds_t +{ + vers_system_time_t type; + vers_system_time_t orig_type; + bool used:1; + bool delete_history:1; + Vers_history_point start; + Vers_history_point end; + Lex_ident name; + + Item_field *field_start; + Item_field *field_end; + + const TABLE_SHARE::period_info_t *period; + + void empty() + { + type= SYSTEM_TIME_UNSPECIFIED; + orig_type= SYSTEM_TIME_UNSPECIFIED; + used= false; + delete_history= false; + start.empty(); + end.empty(); + } + + void init(vers_system_time_t _type, + Vers_history_point _start= Vers_history_point(), + Vers_history_point _end= Vers_history_point(), + Lex_ident _name= "SYSTEM_TIME") + { + type= _type; + orig_type= _type; + used= false; + delete_history= (type == SYSTEM_TIME_HISTORY || + type == SYSTEM_TIME_BEFORE); + start= _start; + end= _end; + name= _name; + } + + void set_all() + { + type= SYSTEM_TIME_ALL; + name= "SYSTEM_TIME"; + } + + void print(String *str, enum_query_type query_type) const; + + bool init_from_sysvar(THD *thd); + + bool is_set() const + { + return type != SYSTEM_TIME_UNSPECIFIED; + } + bool check_units(THD *thd); + bool was_set() const + { + return orig_type != SYSTEM_TIME_UNSPECIFIED; + } + bool need_setup() const + { + return type != SYSTEM_TIME_UNSPECIFIED && type != SYSTEM_TIME_ALL; + } + bool eq(const vers_select_conds_t &conds) const; +}; + +/* + Table reference in the FROM clause. + + These table references can be of several types that correspond to + different SQL elements. Below we list all types of TABLE_LISTs with + the necessary conditions to determine when a TABLE_LIST instance + belongs to a certain type. + + 1) table (TABLE_LIST::view == NULL) + - base table + (TABLE_LIST::derived == NULL) + - FROM-clause subquery - TABLE_LIST::table is a temp table + (TABLE_LIST::derived != NULL) + - information schema table + (TABLE_LIST::schema_table != NULL) + NOTICE: for schema tables TABLE_LIST::field_translation may be != NULL + 2) view (TABLE_LIST::view != NULL) + - merge (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_MERGE) + also (TABLE_LIST::field_translation != NULL) + - tmptable (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_TMPTABLE) + also (TABLE_LIST::field_translation == NULL) + 2.5) TODO: Add derived tables description here + 3) nested table reference (TABLE_LIST::nested_join != NULL) + - table sequence - e.g. (t1, t2, t3) + TODO: how to distinguish from a JOIN? + - general JOIN + TODO: how to distinguish from a table sequence? + - NATURAL JOIN + (TABLE_LIST::natural_join != NULL) + - JOIN ... USING + (TABLE_LIST::join_using_fields != NULL) + - semi-join nest (sj_on_expr!= NULL && sj_subq_pred!=NULL) + 4) jtbm semi-join (jtbm_subselect != NULL) +*/ + +/** last_leaf_for_name_resolutioning support. */ + +struct LEX; +class Index_hint; + +/* + @struct TABLE_CHAIN + @brief Subchain of global chain of table references + + The structure contains a pointer to the address of the next_global + pointer to the first TABLE_LIST objectof the subchain and the address + of the next_global pointer to the element right after the last + TABLE_LIST object of the subchain. For an empty subchain both pointers + have the same value. +*/ + +struct TABLE_CHAIN +{ + TABLE_CHAIN() = default; + + TABLE_LIST **start_pos; + TABLE_LIST ** end_pos; + + void set_start_pos(TABLE_LIST **pos) { start_pos= pos; } + void set_end_pos(TABLE_LIST **pos) { end_pos= pos; } +}; + +struct TABLE_LIST +{ + TABLE_LIST() = default; /* Remove gcc warning */ + + enum prelocking_types + { + PRELOCK_NONE, PRELOCK_ROUTINE, PRELOCK_FK + }; + + /** + Prepare TABLE_LIST that consists of one table instance to use in + open_and_lock_tables + */ + inline void reset() { bzero((void*)this, sizeof(*this)); } + inline void init_one_table(const LEX_CSTRING *db_arg, + const LEX_CSTRING *table_name_arg, + const LEX_CSTRING *alias_arg, + enum thr_lock_type lock_type_arg) + { + enum enum_mdl_type mdl_type; + if (lock_type_arg >= TL_FIRST_WRITE) + mdl_type= MDL_SHARED_WRITE; + else if (lock_type_arg == TL_READ_NO_INSERT) + mdl_type= MDL_SHARED_NO_WRITE; + else + mdl_type= MDL_SHARED_READ; + + reset(); + DBUG_ASSERT(!db_arg->str || strlen(db_arg->str) == db_arg->length); + DBUG_ASSERT(!table_name_arg->str || strlen(table_name_arg->str) == table_name_arg->length); + DBUG_ASSERT(!alias_arg || strlen(alias_arg->str) == alias_arg->length); + db= *db_arg; + table_name= *table_name_arg; + alias= (alias_arg ? *alias_arg : *table_name_arg); + lock_type= lock_type_arg; + updating= lock_type >= TL_FIRST_WRITE; + MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, db.str, table_name.str, + mdl_type, MDL_TRANSACTION); + } + + TABLE_LIST(TABLE *table_arg, thr_lock_type lock_type) + { + DBUG_ASSERT(table_arg->s); + init_one_table(&table_arg->s->db, &table_arg->s->table_name, + NULL, lock_type); + table= table_arg; + vers_conditions.name= table->s->vers.name; + } + + inline void init_one_table_for_prelocking(const LEX_CSTRING *db_arg, + const LEX_CSTRING *table_name_arg, + const LEX_CSTRING *alias_arg, + enum thr_lock_type lock_type_arg, + prelocking_types prelocking_type, + TABLE_LIST *belong_to_view_arg, + uint8 trg_event_map_arg, + TABLE_LIST ***last_ptr, + my_bool insert_data) + + { + init_one_table(db_arg, table_name_arg, alias_arg, lock_type_arg); + cacheable_table= 1; + prelocking_placeholder= prelocking_type; + open_type= (prelocking_type == PRELOCK_ROUTINE ? + OT_TEMPORARY_OR_BASE : + OT_BASE_ONLY); + belong_to_view= belong_to_view_arg; + trg_event_map= trg_event_map_arg; + /* MDL is enough for read-only FK checks, we don't need the table */ + if (prelocking_type == PRELOCK_FK && lock_type < TL_FIRST_WRITE) + open_strategy= OPEN_STUB; + + **last_ptr= this; + prev_global= *last_ptr; + *last_ptr= &next_global; + for_insert_data= insert_data; + } + + + /* + List of tables local to a subquery (used by SQL_I_List). Considers + views as leaves (unlike 'next_leaf' below). Created at parse time + in st_select_lex::add_table_to_list() -> table_list.link_in_list(). + */ + TABLE_LIST *next_local; + /* link in a global list of all queries tables */ + TABLE_LIST *next_global, **prev_global; + LEX_CSTRING db; + LEX_CSTRING table_name; + LEX_CSTRING schema_table_name; + LEX_CSTRING alias; + const char *option; /* Used by cache index */ + Item *on_expr; /* Used with outer join */ + Name_resolution_context *on_context; /* For ON expressions */ + Table_function_json_table *table_function; /* If it's the table function. */ + + Item *sj_on_expr; + /* + (Valid only for semi-join nests) Bitmap of tables that are within the + semi-join (this is different from bitmap of all nest's children because + tables that were pulled out of the semi-join nest remain listed as + nest's children). + */ + table_map sj_inner_tables; + /* Number of IN-compared expressions */ + uint sj_in_exprs; + + /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */ + Item_in_subselect *sj_subq_pred; + + table_map original_subq_pred_used_tables; + + /* If this is a jtbm semi-join object: corresponding subselect predicate */ + Item_in_subselect *jtbm_subselect; + /* TODO: check if this can be joined with tablenr_exec */ + uint jtbm_table_no; + + SJ_MATERIALIZATION_INFO *sj_mat_info; + + /* + The structure of ON expression presented in the member above + can be changed during certain optimizations. This member + contains a snapshot of AND-OR structure of the ON expression + made after permanent transformations of the parse tree, and is + used to restore ON clause before every reexecution of a prepared + statement or stored procedure. + */ + Item *prep_on_expr; + COND_EQUAL *cond_equal; /* Used with outer join */ + /* + During parsing - left operand of NATURAL/USING join where 'this' is + the right operand. After parsing (this->natural_join == this) iff + 'this' represents a NATURAL or USING join operation. Thus after + parsing 'this' is a NATURAL/USING join iff (natural_join != NULL). + */ + TABLE_LIST *natural_join; + /* + True if 'this' represents a nested join that is a NATURAL JOIN. + For one of the operands of 'this', the member 'natural_join' points + to the other operand of 'this'. + */ + bool is_natural_join; + /* Field names in a USING clause for JOIN ... USING. */ + List *join_using_fields; + /* + Explicitly store the result columns of either a NATURAL/USING join or + an operand of such a join. + */ + List *join_columns; + /* TRUE if join_columns contains all columns of this table reference. */ + bool is_join_columns_complete; + + /* + List of nodes in a nested join tree, that should be considered as + leaves with respect to name resolution. The leaves are: views, + top-most nodes representing NATURAL/USING joins, subqueries, and + base tables. All of these TABLE_LIST instances contain a + materialized list of columns. The list is local to a subquery. + */ + TABLE_LIST *next_name_resolution_table; + /* Index names in a "... JOIN ... USE/IGNORE INDEX ..." clause. */ + List *index_hints; + TABLE *table; /* opened table */ + ulonglong table_id; /* table id (from binlog) for opened table */ + /* + select_result for derived table to pass it from table creation to table + filling procedure + */ + select_unit *derived_result; + /* Stub used for materialized derived tables. */ + table_map map; /* ID bit of table (1,2,4,8,16...) */ + table_map get_map() + { + return jtbm_subselect? table_map(1) << jtbm_table_no : table->map; + } + uint get_tablenr() + { + return jtbm_subselect? jtbm_table_no : table->tablenr; + } + void set_tablenr(uint new_tablenr) + { + if (jtbm_subselect) + { + jtbm_table_no= new_tablenr; + } + if (table) + { + table->tablenr= new_tablenr; + table->map= table_map(1) << new_tablenr; + } + } + /* + Reference from aux_tables to local list entry of main select of + multi-delete statement: + delete t1 from t2,t1 where t1.a<'B' and t2.b=t1.b; + here it will be reference of first occurrence of t1 to second (as you + can see this lists can't be merged) + */ + TABLE_LIST *correspondent_table; + /** + @brief Normally, this field is non-null for anonymous derived tables only. + + @details This field is set to non-null for + + - Anonymous derived tables, In this case it points to the SELECT_LEX_UNIT + representing the derived table. E.g. for a query + + @verbatim SELECT * FROM (SELECT a FROM t1) b @endverbatim + + For the @c TABLE_LIST representing the derived table @c b, @c derived + points to the SELECT_LEX_UNIT representing the result of the query within + parenteses. + + - Views. This is set for views with @verbatim ALGORITHM = TEMPTABLE + @endverbatim by mysql_make_view(). + + @note Inside views, a subquery in the @c FROM clause is not allowed. + @note Do not use this field to separate views/base tables/anonymous + derived tables. Use TABLE_LIST::is_anonymous_derived_table(). + */ + st_select_lex_unit *derived; /* SELECT_LEX_UNIT of derived table */ + With_element *with; /* With element defining this table (if any) */ + /* Bitmap of the defining with element */ + table_map with_internal_reference_map; + TABLE_LIST * next_with_rec_ref; + bool is_derived_with_recursive_reference; + bool block_handle_derived; + /* The interface employed to materialize the table by a foreign engine */ + derived_handler *dt_handler; + /* The text of the query specifying the derived table */ + LEX_CSTRING derived_spec; + /* + The object used to organize execution of the query that specifies + the derived table by a foreign engine + */ + Pushdown_derived *pushdown_derived; + ST_SCHEMA_TABLE *schema_table; /* Information_schema table */ + st_select_lex *schema_select_lex; + /* + True when the view field translation table is used to convert + schema table fields for backwards compatibility with SHOW command. + */ + bool schema_table_reformed; + TMP_TABLE_PARAM *schema_table_param; + /* link to select_lex where this table was used */ + st_select_lex *select_lex; + LEX *view; /* link on VIEW lex for merging */ + Field_translator *field_translation; /* array of VIEW fields */ + /* pointer to element after last one in translation table above */ + Field_translator *field_translation_end; + bool field_translation_updated; + /* + List (based on next_local) of underlying tables of this view. I.e. it + does not include the tables of subqueries used in the view. Is set only + for merged views. + */ + TABLE_LIST *merge_underlying_list; + /* + - 0 for base tables + - in case of the view it is the list of all (not only underlying + tables but also used in subquery ones) tables of the view. + */ + List *view_tables; + /* most upper view this table belongs to */ + TABLE_LIST *belong_to_view; + /* A derived table this table belongs to */ + TABLE_LIST *belong_to_derived; + /* + The view directly referencing this table + (non-zero only for merged underlying tables of a view). + */ + TABLE_LIST *referencing_view; + + table_map view_used_tables; + table_map map_exec; + /* TODO: check if this can be joined with jtbm_table_no */ + uint tablenr_exec; + uint maybe_null_exec; + + /* Ptr to parent MERGE table list item. See top comment in ha_myisammrg.cc */ + TABLE_LIST *parent_l; + /* + Security context (non-zero only for tables which belong + to view with SQL SECURITY DEFINER) + */ + Security_context *security_ctx; + uchar tabledef_version_buf[MY_UUID_SIZE > + MICROSECOND_TIMESTAMP_BUFFER_SIZE-1 ? + MY_UUID_SIZE + 1 : + MICROSECOND_TIMESTAMP_BUFFER_SIZE]; + LEX_CUSTRING tabledef_version; + + /* + This view security context (non-zero only for views with + SQL SECURITY DEFINER) + */ + Security_context *view_sctx; + bool allowed_show; + Item *where; /* VIEW WHERE clause condition */ + Item *check_option; /* WITH CHECK OPTION condition */ + LEX_STRING select_stmt; /* text of (CREATE/SELECT) statement */ + LEX_CSTRING md5; /* md5 of query text */ + LEX_CSTRING source; /* source of CREATE VIEW */ + LEX_CSTRING view_db; /* saved view database */ + LEX_CSTRING view_name; /* saved view name */ + LEX_STRING hr_timestamp; /* time stamp of last operation */ + LEX_USER definer; /* definer of view */ + ulonglong file_version; /* version of file's field set */ + ulonglong mariadb_version; /* version of server on creation */ + ulonglong updatable_view; /* VIEW can be updated */ + /** + @brief The declared algorithm, if this is a view. + @details One of + - VIEW_ALGORITHM_UNDEFINED + - VIEW_ALGORITHM_TMPTABLE + - VIEW_ALGORITHM_MERGE + @to do Replace with an enum + */ + ulonglong algorithm; + ulonglong view_suid; /* view is suid (TRUE dy default) */ + ulonglong with_check; /* WITH CHECK OPTION */ + /* + effective value of WITH CHECK OPTION (differ for temporary table + algorithm) + */ + uint8 effective_with_check; + /** + @brief The view algorithm that is actually used, if this is a view. + @details One of + - VIEW_ALGORITHM_UNDEFINED + - VIEW_ALGORITHM_TMPTABLE + - VIEW_ALGORITHM_MERGE + @to do Replace with an enum + */ + uint8 derived_type; + GRANT_INFO grant; + /* data need by some engines in query cache*/ + ulonglong engine_data; + /* call back function for asking handler about caching in query cache */ + qc_engine_callback callback_func; + thr_lock_type lock_type; + + /* + Two fields below are set during parsing this table reference in the cases + when the table reference can be potentially a reference to a CTE table. + In this cases the fact that the reference is a reference to a CTE or not + will be ascertained at the very end of parsing of the query when referencies + to CTE are resolved. For references to CTE and to derived tables no mdl + requests are needed while for other table references they are. If a request + is possibly postponed the info that allows to issue this request must be + saved in 'mdl_type' and 'table_options'. + */ + enum_mdl_type mdl_type; + ulong table_options; + + uint outer_join; /* Which join type */ + uint shared; /* Used in multi-upd */ + bool updatable; /* VIEW/TABLE can be updated now */ + bool straight; /* optimize with prev table */ + bool updating; /* for replicate-do/ignore table */ + bool force_index; /* prefer index over table scan */ + bool ignore_leaves; /* preload only non-leaf nodes */ + bool crashed; /* Table was found crashed */ + bool skip_locked; /* Skip locked in view defination */ + table_map dep_tables; /* tables the table depends on */ + table_map on_expr_dep_tables; /* tables on expression depends on */ + struct st_nested_join *nested_join; /* if the element is a nested join */ + TABLE_LIST *embedding; /* nested join containing the table */ + List *join_list;/* join list the table belongs to */ + bool lifted; /* set to true when the table is moved to + the upper level at the parsing stage */ + bool cacheable_table; /* stop PS caching */ + /* used in multi-upd/views privilege check */ + bool table_in_first_from_clause; + /** + Specifies which kind of table should be open for this element + of table list. + */ + enum enum_open_type open_type; + /* TRUE if this merged view contain auto_increment field */ + bool contain_auto_increment; + bool compact_view_format; /* Use compact format for SHOW CREATE VIEW */ + /* view where processed */ + bool where_processed; + /* TRUE <=> VIEW CHECK OPTION expression has been processed */ + bool check_option_processed; + /* TABLE_TYPE_UNKNOWN if any type is acceptable */ + Table_type required_type; + handlerton *db_type; /* table_type for handler */ + char timestamp_buffer[MICROSECOND_TIMESTAMP_BUFFER_SIZE]; + /* + This TABLE_LIST object is just placeholder for prelocking, it will be + used for implicit LOCK TABLES only and won't be used in real statement. + */ + prelocking_types prelocking_placeholder; + /** + Indicates that if TABLE_LIST object corresponds to the table/view + which requires special handling. + */ + enum enum_open_strategy + { + /* Normal open. */ + OPEN_NORMAL= 0, + /* Associate a table share only if the the table exists. */ + OPEN_IF_EXISTS, + /* Don't associate a table share. */ + OPEN_STUB + } open_strategy; + /** TRUE if an alias for this table was specified in the SQL. */ + bool is_alias; + /** TRUE if the table is referred to in the statement using a fully + qualified name (.). + */ + bool is_fqtn; + + /* TRUE <=> derived table should be filled right after optimization. */ + bool fill_me; + /* TRUE <=> view/DT is merged. */ + /* TODO: replace with derived_type */ + bool merged; + bool merged_for_insert; + bool sequence; /* Part of NEXTVAL/CURVAL/LASTVAL */ + /* + Protect single thread from repeating partition auto-create over + multiple share instances (as the share is closed on backoff action). + + Skips auto-create only for one given query id. + */ + query_id_t vers_skip_create; + + /* + Items created by create_view_field and collected to change them in case + of materialization of the view/derived table + */ + List used_items; + /* Sublist (tail) of persistent used_items */ + List persistent_used_items; + + /* View creation context. */ + + View_creation_ctx *view_creation_ctx; + + /* + Attributes to save/load view creation context in/from frm-file. + + Ther are required only to be able to use existing parser to load + view-definition file. As soon as the parser parsed the file, view + creation context is initialized and the attributes become redundant. + + These attributes MUST NOT be used for any purposes but the parsing. + */ + + LEX_CSTRING view_client_cs_name; + LEX_CSTRING view_connection_cl_name; + + /* + View definition (SELECT-statement) in the UTF-form. + */ + + LEX_CSTRING view_body_utf8; + + /* End of view definition context. */ + + /** + Indicates what triggers we need to pre-load for this TABLE_LIST + when opening an associated TABLE. This is filled after + the parsed tree is created. + + slave_fk_event_map is filled on the slave side with bitmaps value + representing row-based event operation to help find and prelock + possible FK constrain-related child tables. + */ + uint8 trg_event_map, slave_fk_event_map; + /* TRUE <=> this table is a const one and was optimized away. */ + bool optimized_away; + + /** + TRUE <=> already materialized. Valid only for materialized derived + tables/views. + */ + bool materialized; + /* I_S: Flags to open_table (e.g. OPEN_TABLE_ONLY or OPEN_VIEW_ONLY) */ + uint i_s_requested_object; + + bool prohibit_cond_pushdown; + + /* + I_S: how to read the tables (SKIP_OPEN_TABLE/OPEN_FRM_ONLY/OPEN_FULL_TABLE) + */ + uint table_open_method; + /* + I_S: where the schema table was filled + (this is a hack. The code should be able to figure out whether reading + from I_S should be done by create_sort_index() or by JOIN::exec.) + */ + enum enum_schema_table_state schema_table_state; + + /* Something like a "query plan" for reading INFORMATION_SCHEMA table */ + IS_table_read_plan *is_table_read_plan; + + MDL_request mdl_request; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* List to carry partition names from PARTITION (...) clause in statement */ + List *partition_names; +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + + void calc_md5(char *buffer); + int view_check_option(THD *thd, bool ignore_failure); + bool create_field_translation(THD *thd); + bool setup_underlying(THD *thd); + void cleanup_items(); + bool placeholder() + { + return derived || view || schema_table || !table || table_function; + } + void print(THD *thd, table_map eliminated_tables, String *str, + enum_query_type query_type); + void print_leaf_tables(THD *thd, String *str, + enum_query_type query_type); + bool check_single_table(TABLE_LIST **table, table_map map, + TABLE_LIST *view); + bool set_insert_values(MEM_ROOT *mem_root); + void hide_view_error(THD *thd); + TABLE_LIST *find_underlying_table(TABLE *table); + TABLE_LIST *first_leaf_for_name_resolution(); + TABLE_LIST *last_leaf_for_name_resolution(); + + /* System Versioning */ + vers_select_conds_t vers_conditions; + vers_select_conds_t period_conditions; + + bool has_period() const + { + return period_conditions.is_set(); + } + + my_bool for_insert_data; + + /** + @brief + Find the bottom in the chain of embedded table VIEWs. + + @detail + This is used for single-table UPDATE/DELETE when they are modifying a + single-table VIEW. + */ + TABLE_LIST *find_table_for_update() + { + TABLE_LIST *tbl= this; + while(!tbl->is_multitable() && tbl->single_table_updatable() && + tbl->merge_underlying_list) + { + tbl= tbl->merge_underlying_list; + } + return tbl; + } + TABLE *get_real_join_table(); + bool is_leaf_for_name_resolution(); + inline TABLE_LIST *top_table() + { return belong_to_view ? belong_to_view : this; } + inline bool prepare_check_option(THD *thd) + { + bool res= FALSE; + if (effective_with_check) + res= prep_check_option(thd, effective_with_check); + return res; + } + inline bool prepare_where(THD *thd, Item **conds, + bool no_where_clause) + { + if (!view || is_merged_derived()) + return prep_where(thd, conds, no_where_clause); + return FALSE; + } + + void register_want_access(privilege_t want_access); + bool prepare_security(THD *thd); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Security_context *find_view_security_context(THD *thd); + bool prepare_view_security_context(THD *thd, bool upgrade_check); +#endif + /* + Cleanup for re-execution in a prepared statement or a stored + procedure. + */ + void reinit_before_use(THD *thd); + Item_subselect *containing_subselect(); + + /* + Compiles the tagged hints list and fills up TABLE::keys_in_use_for_query, + TABLE::keys_in_use_for_group_by, TABLE::keys_in_use_for_order_by, + TABLE::force_index and TABLE::covering_keys. + */ + bool process_index_hints(TABLE *table); + + bool is_the_same_definition(THD *thd, TABLE_SHARE *s); + /** + Record the value of metadata version of the corresponding + table definition cache element in this parse tree node. + + @sa check_and_update_table_version() + */ + inline void set_table_ref_id(TABLE_SHARE *s) + { set_table_ref_id(s->get_table_ref_type(), s->get_table_ref_version()); } + + inline void set_table_ref_id(enum_table_ref_type table_ref_type_arg, + ulong table_ref_version_arg) + { + m_table_ref_type= table_ref_type_arg; + m_table_ref_version= table_ref_version_arg; + } + + void set_table_id(TABLE_SHARE *s) + { + set_table_ref_id(s); + set_tabledef_version(s); + } + + void set_tabledef_version(TABLE_SHARE *s) + { + if (!tabledef_version.length && s->tabledef_version.length) + { + DBUG_ASSERT(s->tabledef_version.length < + sizeof(tabledef_version_buf)); + tabledef_version.str= tabledef_version_buf; + memcpy(tabledef_version_buf, s->tabledef_version.str, + (tabledef_version.length= s->tabledef_version.length)); + // safety + tabledef_version_buf[tabledef_version.length]= 0; + } + } + + /* Set of functions returning/setting state of a derived table/view. */ + bool is_non_derived() const { return (!derived_type); } + bool is_view_or_derived() const { return derived_type; } + bool is_view() const { return (derived_type & DTYPE_VIEW); } + bool is_derived() const { return (derived_type & DTYPE_TABLE); } + bool is_with_table(); + bool is_recursive_with_table(); + bool is_with_table_recursive_reference(); + void register_as_derived_with_rec_ref(With_element *rec_elem); + bool is_nonrecursive_derived_with_rec_ref(); + bool fill_recursive(THD *thd); + + inline void set_view() + { + derived_type= DTYPE_VIEW; + } + inline void set_derived() + { + derived_type= DTYPE_TABLE; + } + bool is_merged_derived() const { return (derived_type & DTYPE_MERGE); } + inline void set_merged_derived() + { + DBUG_ENTER("set_merged_derived"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (alias.str ? alias.str : ""), + get_unit())); + derived_type= static_cast((derived_type & DTYPE_MASK) | DTYPE_MERGE); + set_check_merged(); + DBUG_VOID_RETURN; + } + bool is_materialized_derived() const + { + return (derived_type & DTYPE_MATERIALIZE); + } + void set_materialized_derived() + { + DBUG_ENTER("set_materialized_derived"); + DBUG_PRINT("enter", ("Alias: '%s' Unit: %p", + (alias.str ? alias.str : ""), + get_unit())); + derived_type= static_cast((derived_type & + (derived ? DTYPE_MASK : DTYPE_VIEW)) | + DTYPE_MATERIALIZE); + set_check_materialized(); + DBUG_VOID_RETURN; + } + bool is_multitable() const { return (derived_type & DTYPE_MULTITABLE); } + inline void set_multitable() + { + derived_type|= DTYPE_MULTITABLE; + } + bool set_as_with_table(THD *thd, With_element *with_elem); + void reset_const_table(); + bool handle_derived(LEX *lex, uint phases); + + /** + @brief True if this TABLE_LIST represents an anonymous derived table, + i.e. the result of a subquery. + */ + bool is_anonymous_derived_table() const { return derived && !view; } + + /** + @brief Returns the name of the database that the referenced table belongs + to. + */ + const char *get_db_name() const { return view != NULL ? view_db.str : db.str; } + + /** + @brief Returns the name of the table that this TABLE_LIST represents. + + @details The unqualified table name or view name for a table or view, + respectively. + */ + const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; } + bool is_active_sjm(); + bool is_sjm_scan_table(); + bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); } + st_select_lex_unit *get_unit(); + st_select_lex *get_single_select(); + void wrap_into_nested_join(List &join_list); + bool init_derived(THD *thd, bool init_view); + int fetch_number_of_rows(); + bool change_refs_to_fields(); + + bool single_table_updatable(); + + bool is_inner_table_of_outer_join() + { + for (TABLE_LIST *tbl= this; tbl; tbl= tbl->embedding) + { + if (tbl->outer_join) + return true; + } + return false; + } + void set_lock_type(THD* thd, enum thr_lock_type lock); + + derived_handler *find_derived_handler(THD *thd); + TABLE_LIST *get_first_table(); + + void remove_join_columns() + { + if (join_columns) + { + join_columns->empty(); + join_columns= NULL; + is_join_columns_complete= FALSE; + } + } + + inline void set_view_def_version(LEX_STRING *version) + { + m_table_ref_type= TABLE_REF_VIEW; + tabledef_version.str= (const uchar *) version->str; + tabledef_version.length= version->length; + } +private: + bool prep_check_option(THD *thd, uint8 check_opt_type); + bool prep_where(THD *thd, Item **conds, bool no_where_clause); + void set_check_materialized(); +#ifndef DBUG_OFF + void set_check_merged(); +#else + inline void set_check_merged() {} +#endif + /** See comments for set_table_ref_id() */ + enum enum_table_ref_type m_table_ref_type; + /** See comments for set_table_ref_id() */ + ulong m_table_ref_version; +}; + +class Item; + +/* + Iterator over the fields of a generic table reference. +*/ + +class Field_iterator: public Sql_alloc +{ +public: + Field_iterator() = default; /* Remove gcc warning */ + virtual ~Field_iterator() = default; + virtual void set(TABLE_LIST *)= 0; + virtual void next()= 0; + virtual bool end_of_fields()= 0; /* Return 1 at end of list */ + virtual LEX_CSTRING *name()= 0; + virtual Item *create_item(THD *)= 0; + virtual Field *field()= 0; +}; + + +/* + Iterator over the fields of a base table, view with temporary + table, or subquery. +*/ + +class Field_iterator_table: public Field_iterator +{ + Field **ptr; +public: + Field_iterator_table() :ptr(0) {} + void set(TABLE_LIST *table) { ptr= table->table->field; } + void set_table(TABLE *table) { ptr= table->field; } + void next() { ptr++; } + bool end_of_fields() { return *ptr == 0; } + LEX_CSTRING *name(); + Item *create_item(THD *thd); + Field *field() { return *ptr; } +}; + + +/* Iterator over the fields of a merge view. */ + +class Field_iterator_view: public Field_iterator +{ + Field_translator *ptr, *array_end; + TABLE_LIST *view; +public: + Field_iterator_view() :ptr(0), array_end(0) {} + void set(TABLE_LIST *table); + void next() { ptr++; } + bool end_of_fields() { return ptr == array_end; } + LEX_CSTRING *name(); + Item *create_item(THD *thd); + Item **item_ptr() {return &ptr->item; } + Field *field() { return 0; } + inline Item *item() { return ptr->item; } + Field_translator *field_translator() { return ptr; } +}; + + +/* + Field_iterator interface to the list of materialized fields of a + NATURAL/USING join. +*/ + +class Field_iterator_natural_join: public Field_iterator +{ + List_iterator_fast column_ref_it; + Natural_join_column *cur_column_ref; +public: + Field_iterator_natural_join() :cur_column_ref(NULL) {} + ~Field_iterator_natural_join() = default; + void set(TABLE_LIST *table); + void next(); + bool end_of_fields() { return !cur_column_ref; } + LEX_CSTRING *name() { return cur_column_ref->name(); } + Item *create_item(THD *thd) { return cur_column_ref->create_item(thd); } + Field *field() { return cur_column_ref->field(); } + Natural_join_column *column_ref() { return cur_column_ref; } +}; + + +/* + Generic iterator over the fields of an arbitrary table reference. + + DESCRIPTION + This class unifies the various ways of iterating over the columns + of a table reference depending on the type of SQL entity it + represents. If such an entity represents a nested table reference, + this iterator encapsulates the iteration over the columns of the + members of the table reference. + + IMPLEMENTATION + The implementation assumes that all underlying NATURAL/USING table + references already contain their result columns and are linked into + the list TABLE_LIST::next_name_resolution_table. +*/ + +class Field_iterator_table_ref: public Field_iterator +{ + TABLE_LIST *table_ref, *first_leaf, *last_leaf; + Field_iterator_table table_field_it; + Field_iterator_view view_field_it; + Field_iterator_natural_join natural_join_it; + Field_iterator *field_it; + void set_field_iterator(); +public: + Field_iterator_table_ref() :field_it(NULL) {} + void set(TABLE_LIST *table); + void next(); + bool end_of_fields() + { return (table_ref == last_leaf && field_it->end_of_fields()); } + LEX_CSTRING *name() { return field_it->name(); } + const char *get_table_name(); + const char *get_db_name(); + GRANT_INFO *grant(); + Item *create_item(THD *thd) { return field_it->create_item(thd); } + Field *field() { return field_it->field(); } + Natural_join_column *get_or_create_column_ref(THD *thd, TABLE_LIST *parent_table_ref); + Natural_join_column *get_natural_column_ref(); +}; + + +#define JOIN_OP_NEST 1 +#define REBALANCED_NEST 2 + +typedef struct st_nested_join +{ + List join_list; /* list of elements in the nested join */ + /* + Currently the valid values for nest type are: + JOIN_OP_NEST - for nest created for JOIN operation used as an operand in + a join expression, contains 2 elements; + JOIN_OP_NEST | REBALANCED_NEST - nest created after tree re-balancing + in st_select_lex::add_cross_joined_table(), contains 1 element; + 0 - for all other nests. + Examples: + 1. SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a; + Here the nest created for LEFT JOIN at first has nest_type==JOIN_OP_NEST. + After re-balancing in st_select_lex::add_cross_joined_table() this nest + has nest_type==JOIN_OP_NEST | REBALANCED_NEST. The nest for JOIN created + in st_select_lex::add_cross_joined_table() has nest_type== JOIN_OP_NEST. + 2. SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) + Here the nest created for LEFT JOIN has nest_type==0, because it's not + an operand in a join expression. The nest created for JOIN has nest_type + set to JOIN_OP_NEST. + */ + uint nest_type; + /* + Bitmap of tables within this nested join (including those embedded within + its children), including tables removed by table elimination. + */ + table_map used_tables; + table_map not_null_tables; /* tables that rejects nulls */ + /** + Used for pointing out the first table in the plan being covered by this + join nest. It is used exclusively within make_outerjoin_info(). + */ + struct st_join_table *first_nested; + /* + Used to count tables in the nested join in 2 isolated places: + 1. In make_outerjoin_info(). + 2. check_interleaving_with_nj/restore_prev_nj_state (these are called + by the join optimizer. + Before each use the counters are zeroed by reset_nj_counters. + */ + uint counter; + + /* + Number of elements in join_list that participate in the join plan choice: + - Base tables that were not removed by table elimination + - Join nests that were not removed by mark_join_nest_as_const + */ + uint n_tables; + nested_join_map nj_map; /* Bit used to identify this nested join*/ + /* + (Valid only for semi-join nests) Bitmap of tables outside the semi-join + that are used within the semi-join's ON condition. + */ + table_map sj_depends_on; + /* Outer non-trivially correlated tables */ + table_map sj_corr_tables; + table_map direct_children_map; + List sj_outer_expr_list; + /** + True if this join nest node is completely covered by the query execution + plan. This means two things. + + 1. All tables on its @c join_list are covered by the plan. + + 2. All child join nest nodes are fully covered. + */ + bool is_fully_covered() const { return n_tables == counter; } +} NESTED_JOIN; + + +typedef struct st_changed_table_list +{ + struct st_changed_table_list *next; + char *key; + size_t key_length; +} CHANGED_TABLE_LIST; + + +typedef struct st_open_table_list{ + struct st_open_table_list *next; + char *db,*table; + uint32 in_use,locked; +} OPEN_TABLE_LIST; + + +static inline MY_BITMAP *tmp_use_all_columns(TABLE *table, + MY_BITMAP **bitmap) +{ + MY_BITMAP *old= *bitmap; + *bitmap= &table->s->all_set; + return old; +} + + +static inline void tmp_restore_column_map(MY_BITMAP **bitmap, + MY_BITMAP *old) +{ + *bitmap= old; +} + +/* The following is only needed for debugging */ + +static inline MY_BITMAP *dbug_tmp_use_all_columns(TABLE *table, + MY_BITMAP **bitmap) +{ +#ifdef DBUG_ASSERT_EXISTS + return tmp_use_all_columns(table, bitmap); +#else + return 0; +#endif +} + +static inline void dbug_tmp_restore_column_map(MY_BITMAP **bitmap, + MY_BITMAP *old) +{ +#ifdef DBUG_ASSERT_EXISTS + tmp_restore_column_map(bitmap, old); +#endif +} + + +/* + Variant of the above : handle both read and write sets. + Provide for the possiblity of the read set being the same as the write set +*/ +static inline void dbug_tmp_use_all_columns(TABLE *table, + MY_BITMAP **save, + MY_BITMAP **read_set, + MY_BITMAP **write_set) +{ +#ifdef DBUG_ASSERT_EXISTS + save[0]= *read_set; + save[1]= *write_set; + (void) tmp_use_all_columns(table, read_set); + (void) tmp_use_all_columns(table, write_set); +#endif +} + + +static inline void dbug_tmp_restore_column_maps(MY_BITMAP **read_set, + MY_BITMAP **write_set, + MY_BITMAP **old) +{ +#ifdef DBUG_ASSERT_EXISTS + tmp_restore_column_map(read_set, old[0]); + tmp_restore_column_map(write_set, old[1]); +#endif +} + +bool ok_for_lower_case_names(const char *names); + +enum get_table_share_flags { + GTS_TABLE = 1, + GTS_VIEW = 2, + GTS_NOLOCK = 4, + GTS_USE_DISCOVERY = 8, + GTS_FORCE_DISCOVERY = 16 +}; + +size_t max_row_length(TABLE *table, MY_BITMAP const *cols, const uchar *data); + +void init_mdl_requests(TABLE_LIST *table_list); + +enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, + const LEX_CSTRING *alias, uint db_stat, uint prgflag, + uint ha_open_flags, TABLE *outparam, + bool is_create_table, + List *partitions_to_open= NULL); +bool copy_keys_from_share(TABLE *outparam, MEM_ROOT *root); +bool parse_vcol_defs(THD *thd, MEM_ROOT *mem_root, TABLE *table, + bool *error_reported, vcol_init_mode expr); +TABLE_SHARE *alloc_table_share(const char *db, const char *table_name, + const char *key, uint key_length); +void init_tmp_table_share(THD *thd, TABLE_SHARE *share, const char *key, + uint key_length, + const char *table_name, const char *path); +void free_table_share(TABLE_SHARE *share); +enum open_frm_error open_table_def(THD *thd, TABLE_SHARE *share, + uint flags = GTS_TABLE); + +void open_table_error(TABLE_SHARE *share, enum open_frm_error error, + int db_errno); +void update_create_info_from_table(HA_CREATE_INFO *info, TABLE *form); +bool check_db_name(LEX_STRING *db); +bool check_column_name(const char *name); +bool check_period_name(const char *name); +bool check_table_name(const char *name, size_t length, bool check_for_path_chars); +int rename_file_ext(const char * from,const char * to,const char * ext); +char *get_field(MEM_ROOT *mem, Field *field); +bool get_field(MEM_ROOT *mem, Field *field, class String *res); + +bool validate_comment_length(THD *thd, LEX_CSTRING *comment, size_t max_len, + uint err_code, const char *name); + +int closefrm(TABLE *table); +void free_blobs(TABLE *table); +void free_field_buffers_larger_than(TABLE *table, uint32 size); +ulong get_form_pos(File file, uchar *head, TYPELIB *save_names); +void append_unescaped(String *res, const char *pos, size_t length); +void prepare_frm_header(THD *thd, uint reclength, uchar *fileinfo, + HA_CREATE_INFO *create_info, uint keys, KEY *key_info); +const char *fn_frm_ext(const char *name); + +/* Check that the integer is in the internal */ +static inline int set_zone(int nr,int min_zone,int max_zone) +{ + if (nr <= min_zone) + return min_zone; + if (nr >= max_zone) + return max_zone; + return nr; +} + +/* performance schema */ +extern LEX_CSTRING PERFORMANCE_SCHEMA_DB_NAME; + +extern LEX_CSTRING GENERAL_LOG_NAME; +extern LEX_CSTRING SLOW_LOG_NAME; +extern LEX_CSTRING TRANSACTION_REG_NAME; + +/* information schema */ +extern LEX_CSTRING INFORMATION_SCHEMA_NAME; +extern LEX_CSTRING MYSQL_SCHEMA_NAME; + +/* table names */ +extern LEX_CSTRING MYSQL_PROC_NAME; + +inline bool is_infoschema_db(const LEX_CSTRING *name) +{ + return lex_string_eq(&INFORMATION_SCHEMA_NAME, name); +} + +inline bool is_perfschema_db(const LEX_CSTRING *name) +{ + return lex_string_eq(&PERFORMANCE_SCHEMA_DB_NAME, name); +} + +inline void mark_as_null_row(TABLE *table) +{ + table->null_row=1; + table->status|=STATUS_NULL_ROW; + if (table->s->null_bytes) + bfill(table->null_flags,table->s->null_bytes,255); +} + +/* + Restore table to state before mark_as_null_row() call. + This assumes that the caller has restored table->null_flags, + as is done in unclear_tables(). +*/ + +inline void unmark_as_null_row(TABLE *table) +{ + table->null_row= 0; + table->status&= ~STATUS_NULL_ROW; +} + +bool is_simple_order(ORDER *order); + +class Open_tables_backup; + +/** Transaction Registry Table (TRT) + + This table holds transaction IDs, their corresponding times and other + transaction-related data which is used for transaction order resolution. + When versioned table marks its records lifetime with transaction IDs, + TRT is used to get their actual timestamps. */ + +class TR_table: public TABLE_LIST +{ + THD *thd; + Open_tables_backup *open_tables_backup; + +public: + enum field_id_t { + FLD_TRX_ID= 0, + FLD_COMMIT_ID, + FLD_BEGIN_TS, + FLD_COMMIT_TS, + FLD_ISO_LEVEL, + FIELD_COUNT + }; + + enum enabled {NO, MAYBE, YES}; + static enum enabled use_transaction_registry; + + /** + @param[in,out] Thread handle + @param[in] Current transaction is read-write. + */ + TR_table(THD *_thd, bool rw= false); + /** + Opens a transaction_registry table. + + @retval true on error, false otherwise. + */ + bool open(); + ~TR_table(); + /** + @retval current thd + */ + THD *get_thd() const { return thd; } + /** + Stores value to internal transaction_registry TABLE object. + + @param[in] field number in a TABLE + @param[in] value to store + */ + void store(uint field_id, ulonglong val); + /** + Stores value to internal transaction_registry TABLE object. + + @param[in] field number in a TABLE + @param[in] value to store + */ + void store(uint field_id, timeval ts); + /** + Update the transaction_registry right before commit. + @param start_id transaction identifier at start + @param end_id transaction identifier at commit + + @retval false on success + @retval true on error (the transaction must be rolled back) + */ + bool update(ulonglong start_id, ulonglong end_id); + // return true if found; false if not found or error + bool query(ulonglong trx_id); + /** + Gets a row from transaction_registry with the closest commit_timestamp to + first argument. We can search for a value which a lesser or greater than + first argument. Also loads a row into an internal TABLE object. + + @param[in] timestamp + @param[in] true if we search for a lesser timestamp, false if greater + @retval true if exists, false it not exists or an error occurred + */ + bool query(MYSQL_TIME &commit_time, bool backwards); + /** + Checks whether transaction1 sees transaction0. + + @param[out] true if transaction1 sees transaction0, undefined on error and + when transaction1=transaction0 and false otherwise + @param[in] transaction_id of transaction1 + @param[in] transaction_id of transaction0 + @param[in] commit time of transaction1 or 0 if we want it to be queried + @param[in] isolation level (from handler.h) of transaction1 + @param[in] commit time of transaction0 or 0 if we want it to be queried + @retval true on error, false otherwise + */ + bool query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, + ulonglong commit_id1= 0, + enum_tx_isolation iso_level1= ISO_READ_UNCOMMITTED, + ulonglong commit_id0= 0); + + /** + @retval transaction isolation level of a row from internal TABLE object. + */ + enum_tx_isolation iso_level() const; + /** + Stores transactioin isolation level to internal TABLE object. + */ + void store_iso_level(enum_tx_isolation iso_level) + { + DBUG_ASSERT(iso_level <= ISO_SERIALIZABLE); + store(FLD_ISO_LEVEL, iso_level + 1); + } + + /** + Writes a message to MariaDB log about incorrect transaction_registry schema. + + @param[in] a message explained what's incorrect in schema + */ + void warn_schema_incorrect(const char *reason); + /** + Checks whether transaction_registry table has a correct schema. + + @retval true if schema is incorrect and false otherwise + */ + bool check(bool error); + + TABLE * operator-> () const + { + return table; + } + Field * operator[] (uint field_id) const + { + DBUG_ASSERT(field_id < FIELD_COUNT); + return table->field[field_id]; + } + operator bool () const + { + return table; + } + bool operator== (const TABLE_LIST &subj) const + { + return (!cmp(&db, &subj.db) && !cmp(&table_name, &subj.table_name)); + } + bool operator!= (const TABLE_LIST &subj) const + { + return !(*this == subj); + } +}; + +#endif /* MYSQL_CLIENT */ + +#endif /* TABLE_INCLUDED */ diff --git a/sql/table_cache.cc b/sql/table_cache.cc new file mode 100644 index 00000000..0039c96a --- /dev/null +++ b/sql/table_cache.cc @@ -0,0 +1,1315 @@ +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. + Copyright (c) 2010, 2022, MariaDB Corporation. + Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + Table definition cache and table cache implementation. + + Table definition cache actions: + - add new TABLE_SHARE object to cache (tdc_acquire_share()) + - acquire TABLE_SHARE object from cache (tdc_acquire_share()) + - release TABLE_SHARE object to cache (tdc_release_share()) + - purge unused TABLE_SHARE objects from cache (tdc_purge()) + - remove TABLE_SHARE object from cache (tdc_remove_table()) + - get number of TABLE_SHARE objects in cache (tdc_records()) + + Table cache actions: + - add new TABLE object to cache (tc_add_table()) + - acquire TABLE object from cache (tc_acquire_table()) + - release TABLE object to cache (tc_release_table()) + - purge unused TABLE objects from cache (tc_purge()) + - purge unused TABLE objects of a table from cache (tdc_remove_table()) + - get number of TABLE objects in cache (tc_records()) + + Dependencies: + - close_cached_tables(): flush tables on shutdown + - alloc_table_share() + - free_table_share() + + Table cache invariants: + - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0 + - TABLE_SHARE::free_tables shall not receive new objects if + TABLE_SHARE::tdc.flushed is true +*/ + +#include "mariadb.h" +#include "lf.h" +#include "table.h" +#include "sql_base.h" +#include "aligned.h" + + +/** Configuration. */ +ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */ +ulong tc_size; /**< Table cache threshold for LRU eviction. */ +uint32 tc_instances; +static size_t tc_allocated_size; +static std::atomic tc_active_instances(1); +static std::atomic tc_contention_warning_reported; + +/** Data collections. */ +static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */ +/** Collection of unused TABLE_SHARE objects. */ +static +I_P_List , + I_P_List_null_counter, + I_P_List_fast_push_back > unused_shares; + +static bool tdc_inited; + + +/** + Protects unused shares list. + + TDC_element::prev + TDC_element::next + unused_shares +*/ + +static mysql_mutex_t LOCK_unused_shares; + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share, + key_LOCK_table_cache; +static PSI_mutex_info all_tc_mutexes[]= +{ + { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL }, + { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 }, + { &key_LOCK_table_cache, "LOCK_table_cache", 0 } +}; + +static PSI_cond_key key_TABLE_SHARE_COND_release; +static PSI_cond_info all_tc_conds[]= +{ + { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 } +}; +#endif + + +static int fix_thd_pins(THD *thd) +{ + return thd->tdc_hash_pins ? 0 : + (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0; +} + + +/* + Auxiliary routines for manipulating with per-share all/unused lists + and tc_count counter. + Responsible for preserving invariants between those lists, counter + and TABLE::in_use member. + In fact those routines implement sort of implicit table cache as + part of table definition cache. +*/ + +struct Table_cache_instance +{ + /** + Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev), + records, Share_free_tables::List (TABLE::prev and TABLE::next), + TABLE::in_use. + */ + alignas(CPU_LEVEL1_DCACHE_LINESIZE) + mysql_mutex_t LOCK_table_cache; + I_P_List , + I_P_List_null_counter, I_P_List_fast_push_back
> + free_tables; + ulong records; + uint mutex_waits; + uint mutex_nowaits; + + Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0) + { + static_assert(!(sizeof(*this) % CPU_LEVEL1_DCACHE_LINESIZE), "alignment"); + mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache, + MY_MUTEX_INIT_FAST); + } + + ~Table_cache_instance() + { + mysql_mutex_destroy(&LOCK_table_cache); + DBUG_ASSERT(free_tables.is_empty()); + DBUG_ASSERT(records == 0); + } + + static void *operator new[](size_t size) + { return aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE); } + static void operator delete[](void *ptr) { aligned_free(ptr); } + static void mark_memory_freed() + { + update_malloc_size(-(longlong) tc_allocated_size, 0); + } + + /** + Lock table cache mutex and check contention. + + Instance is considered contested if more than 20% of mutex acquisiotions + can't be served immediately. Up to 100 000 probes may be performed to avoid + instance activation on short sporadic peaks. 100 000 is estimated maximum + number of queries one instance can serve in one second. + + These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell + system, that is expected number of instances is activated within reasonable + warmup time. It may have to be adjusted for other systems. + + Only TABLE object acquistion is instrumented. We intentionally avoid this + overhead on TABLE object release. All other table cache mutex acquistions + are considered out of hot path and are not instrumented either. + */ + void lock_and_check_contention(uint32_t n_instances, uint32_t instance) + { + if (mysql_mutex_trylock(&LOCK_table_cache)) + { + mysql_mutex_lock(&LOCK_table_cache); + if (++mutex_waits == 20000) + { + if (n_instances < tc_instances) + { + if (tc_active_instances. + compare_exchange_weak(n_instances, n_instances + 1, + std::memory_order_relaxed, + std::memory_order_relaxed)) + { + sql_print_information("Detected table cache mutex contention at instance %d: " + "%d%% waits. Additional table cache instance " + "activated. Number of instances after " + "activation: %d.", + instance + 1, + mutex_waits * 100 / (mutex_nowaits + mutex_waits), + n_instances + 1); + } + } + else if (!tc_contention_warning_reported.exchange(true, + std::memory_order_relaxed)) + { + sql_print_warning("Detected table cache mutex contention at instance %d: " + "%d%% waits. Additional table cache instance " + "cannot be activated: consider raising " + "table_open_cache_instances. Number of active " + "instances: %d.", + instance + 1, + mutex_waits * 100 / (mutex_nowaits + mutex_waits), + n_instances); + } + mutex_waits= 0; + mutex_nowaits= 0; + } + } + else if (++mutex_nowaits == 80000) + { + mutex_waits= 0; + mutex_nowaits= 0; + } + } +}; + + +static Table_cache_instance *tc; + + +static void intern_close_table(TABLE *table) +{ + delete table->triggers; + DBUG_ASSERT(table->file); + closefrm(table); + tdc_release_share(table->s); + my_free(table); +} + + +/** + Get number of TABLE objects (used and unused) in table cache. +*/ + +uint tc_records(void) +{ + ulong total= 0; + for (uint32 i= 0; i < tc_instances; i++) + { + mysql_mutex_lock(&tc[i].LOCK_table_cache); + total+= tc[i].records; + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + } + return total; +} + + +/** + Remove TABLE object from table cache. +*/ + +static void tc_remove_table(TABLE *table) +{ + TDC_element *element= table->s->tdc; + + mysql_mutex_lock(&element->LOCK_table_share); + /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */ + while (element->all_tables_refs) + mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); + element->all_tables.remove(table); + mysql_mutex_unlock(&element->LOCK_table_share); + + intern_close_table(table); +} + + +static void tc_remove_all_unused_tables(TDC_element *element, + Share_free_tables::List *purge_tables) +{ + for (uint32 i= 0; i < tc_instances; i++) + { + mysql_mutex_lock(&tc[i].LOCK_table_cache); + while (auto table= element->free_tables[i].list.pop_front()) + { + tc[i].records--; + tc[i].free_tables.remove(table); + DBUG_ASSERT(element->all_tables_refs == 0); + element->all_tables.remove(table); + purge_tables->push_front(table); + } + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + } +} + + +/** + Free all unused TABLE objects. + + While locked: + - remove unused objects from TABLE_SHARE::tdc.free_tables and + TABLE_SHARE::tdc.all_tables + - decrement tc_count + + While unlocked: + - free resources related to unused objects + + @note This is called by 'handle_manager' when one wants to + periodicly flush all not used tables. +*/ + +static my_bool tc_purge_callback(TDC_element *element, + Share_free_tables::List *purge_tables) +{ + mysql_mutex_lock(&element->LOCK_table_share); + tc_remove_all_unused_tables(element, purge_tables); + mysql_mutex_unlock(&element->LOCK_table_share); + return FALSE; +} + + +void tc_purge() +{ + Share_free_tables::List purge_tables; + + tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &purge_tables); + while (auto table= purge_tables.pop_front()) + intern_close_table(table); +} + + +/** + Add new TABLE object to table cache. + + @pre TABLE object is used by caller. + + Added object cannot be evicted or acquired. + + While locked: + - add object to TABLE_SHARE::tdc.all_tables + - increment tc_count + - evict LRU object from table cache if we reached threshold + + While unlocked: + - free evicted object +*/ + +void tc_add_table(THD *thd, TABLE *table) +{ + uint32_t i= + thd->thread_id % tc_active_instances.load(std::memory_order_relaxed); + TABLE *LRU_table= 0; + TDC_element *element= table->s->tdc; + + DBUG_ASSERT(table->in_use == thd); + table->instance= i; + mysql_mutex_lock(&element->LOCK_table_share); + /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */ + while (element->all_tables_refs) + mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); + element->all_tables.push_front(table); + mysql_mutex_unlock(&element->LOCK_table_share); + + mysql_mutex_lock(&tc[i].LOCK_table_cache); + if (tc[i].records == tc_size) + { + if ((LRU_table= tc[i].free_tables.pop_front())) + { + LRU_table->s->tdc->free_tables[i].list.remove(LRU_table); + /* Needed if MDL deadlock detector chimes in before tc_remove_table() */ + LRU_table->in_use= thd; + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + /* Keep out of locked LOCK_table_cache */ + tc_remove_table(LRU_table); + } + else + { + tc[i].records++; + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + } + /* Keep out of locked LOCK_table_cache */ + status_var_increment(thd->status_var.table_open_cache_overflows); + } + else + { + tc[i].records++; + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + } +} + + +/** + Acquire TABLE object from table cache. + + @pre share must be protected against removal. + + Acquired object cannot be evicted or acquired again. + + @return TABLE object, or NULL if no unused objects. +*/ + +TABLE *tc_acquire_table(THD *thd, TDC_element *element) +{ + uint32_t n_instances= tc_active_instances.load(std::memory_order_relaxed); + uint32_t i= thd->thread_id % n_instances; + TABLE *table; + + tc[i].lock_and_check_contention(n_instances, i); + table= element->free_tables[i].list.pop_front(); + if (table) + { + DBUG_ASSERT(!table->in_use); + table->in_use= thd; + /* The ex-unused table must be fully functional. */ + DBUG_ASSERT(table->db_stat && table->file); + /* The children must be detached from the table. */ + DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN)); + tc[i].free_tables.remove(table); + } + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + return table; +} + + +/** + Release TABLE object to table cache. + + @pre object is used by caller. + + Released object may be evicted or acquired again. + + While locked: + - if object is marked for purge, decrement tc_count + - add object to TABLE_SHARE::tdc.free_tables + - evict LRU object from table cache if we reached threshold + + While unlocked: + - mark object not in use by any thread + - free evicted/purged object + + @note Another thread may mark share for purge any moment (even + after version check). It means to-be-purged object may go to + unused lists. This other thread is expected to call tc_purge(), + which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share. + + @return + @retval true object purged + @retval false object released +*/ + +void tc_release_table(TABLE *table) +{ + uint32 i= table->instance; + DBUG_ENTER("tc_release_table"); + DBUG_ASSERT(table->in_use); + DBUG_ASSERT(table->file); + DBUG_ASSERT(!table->pos_in_locked_tables); + + mysql_mutex_lock(&tc[i].LOCK_table_cache); + if (table->needs_reopen() || table->s->tdc->flushed || + tc[i].records > tc_size) + { + tc[i].records--; + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + tc_remove_table(table); + } + else + { + table->in_use= 0; + table->s->tdc->free_tables[i].list.push_front(table); + tc[i].free_tables.push_back(table); + mysql_mutex_unlock(&tc[i].LOCK_table_cache); + } + DBUG_VOID_RETURN; +} + + +static void tdc_assert_clean_share(TDC_element *element) +{ + DBUG_ASSERT(element->share == 0); + DBUG_ASSERT(element->ref_count == 0); + DBUG_ASSERT(element->m_flush_tickets.is_empty()); + DBUG_ASSERT(element->all_tables.is_empty()); +#ifndef DBUG_OFF + for (uint32 i= 0; i < tc_instances; i++) + DBUG_ASSERT(element->free_tables[i].list.is_empty()); +#endif + DBUG_ASSERT(element->all_tables_refs == 0); + DBUG_ASSERT(element->next == 0); + DBUG_ASSERT(element->prev == 0); +} + + +/** + Delete share from hash and free share object. +*/ + +static void tdc_delete_share_from_hash(TDC_element *element) +{ + THD *thd= current_thd; + LF_PINS *pins; + TABLE_SHARE *share; + DBUG_ENTER("tdc_delete_share_from_hash"); + + mysql_mutex_assert_owner(&element->LOCK_table_share); + share= element->share; + DBUG_ASSERT(share); + element->share= 0; + PSI_CALL_release_table_share(share->m_psi); + share->m_psi= 0; + + if (!element->m_flush_tickets.is_empty()) + { + Wait_for_flush_list::Iterator it(element->m_flush_tickets); + Wait_for_flush *ticket; + while ((ticket= it++)) + (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED); + + do + { + mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); + } while (!element->m_flush_tickets.is_empty()); + } + + mysql_mutex_unlock(&element->LOCK_table_share); + + if (thd) + { + fix_thd_pins(thd); + pins= thd->tdc_hash_pins; + } + else + pins= lf_hash_get_pins(&tdc_hash); + + DBUG_ASSERT(pins); // What can we do about it? + tdc_assert_clean_share(element); + lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length); + if (!thd) + lf_hash_put_pins(pins); + free_table_share(share); + DBUG_VOID_RETURN; +} + + +/** + Prepeare table share for use with table definition cache. +*/ + +static void lf_alloc_constructor(uchar *arg) +{ + TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD); + DBUG_ENTER("lf_alloc_constructor"); + mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share, + &element->LOCK_table_share, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0); + element->m_flush_tickets.empty(); + element->all_tables.empty(); + for (uint32 i= 0; i < tc_instances; i++) + element->free_tables[i].list.empty(); + element->all_tables_refs= 0; + element->share= 0; + element->ref_count= 0; + element->next= 0; + element->prev= 0; + DBUG_VOID_RETURN; +} + + +/** + Release table definition cache specific resources of table share. +*/ + +static void lf_alloc_destructor(uchar *arg) +{ + TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD); + DBUG_ENTER("lf_alloc_destructor"); + tdc_assert_clean_share(element); + mysql_cond_destroy(&element->COND_release); + mysql_mutex_destroy(&element->LOCK_table_share); + DBUG_VOID_RETURN; +} + + +static void tdc_hash_initializer(LF_HASH *, + TDC_element *element, LEX_STRING *key) +{ + memcpy(element->m_key, key->str, key->length); + element->m_key_length= (uint)key->length; + tdc_assert_clean_share(element); +} + + +static uchar *tdc_hash_key(const TDC_element *element, size_t *length, + my_bool) +{ + *length= element->m_key_length; + return (uchar*) element->m_key; +} + + +/** + Initialize table definition cache. +*/ + +bool tdc_init(void) +{ + DBUG_ENTER("tdc_init"); +#ifdef HAVE_PSI_INTERFACE + mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes)); + mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds)); +#endif + /* Extra instance is allocated to avoid false sharing */ + if (!(tc= new Table_cache_instance[tc_instances + 1])) + DBUG_RETURN(true); + tc_allocated_size= (tc_instances + 1) * sizeof *tc; + update_malloc_size(tc_allocated_size, 0); + tdc_inited= true; + mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares, + MY_MUTEX_INIT_FAST); + lf_hash_init(&tdc_hash, sizeof(TDC_element) + + sizeof(Share_free_tables) * (tc_instances - 1), + LF_HASH_UNIQUE, 0, 0, + (my_hash_get_key) tdc_hash_key, + &my_charset_bin); + tdc_hash.alloc.constructor= lf_alloc_constructor; + tdc_hash.alloc.destructor= lf_alloc_destructor; + tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer; + DBUG_RETURN(false); +} + + +/** + Notify table definition cache that process of shutting down server + has started so it has to keep number of TABLE and TABLE_SHARE objects + minimal in order to reduce number of references to pluggable engines. +*/ + +void tdc_start_shutdown(void) +{ + DBUG_ENTER("tdc_start_shutdown"); + if (tdc_inited) + { + /* + Ensure that TABLE and TABLE_SHARE objects which are created for + tables that are open during process of plugins' shutdown are + immediately released. This keeps number of references to engine + plugins minimal and allows shutdown to proceed smoothly. + */ + tdc_size= 0; + tc_size= 0; + /* Free all cached but unused TABLEs and TABLE_SHAREs. */ + purge_tables(); + } + DBUG_VOID_RETURN; +} + + +/** + Deinitialize table definition cache. +*/ + +void tdc_deinit(void) +{ + DBUG_ENTER("tdc_deinit"); + if (tdc_inited) + { + tdc_inited= false; + lf_hash_destroy(&tdc_hash); + mysql_mutex_destroy(&LOCK_unused_shares); + if (tc) + { + tc->mark_memory_freed(); + delete [] tc; + tc= 0; + } + } + DBUG_VOID_RETURN; +} + + +/** + Get number of cached table definitions. + + @return Number of cached table definitions +*/ + +ulong tdc_records(void) +{ + return lf_hash_size(&tdc_hash); +} + + +void tdc_purge(bool all) +{ + DBUG_ENTER("tdc_purge"); + while (all || tdc_records() > tdc_size) + { + TDC_element *element; + + mysql_mutex_lock(&LOCK_unused_shares); + if (!(element= unused_shares.pop_front())) + { + mysql_mutex_unlock(&LOCK_unused_shares); + break; + } + + /* Concurrent thread may start using share again, reset prev and next. */ + element->prev= 0; + element->next= 0; + mysql_mutex_lock(&element->LOCK_table_share); + if (element->ref_count) + { + mysql_mutex_unlock(&element->LOCK_table_share); + mysql_mutex_unlock(&LOCK_unused_shares); + continue; + } + mysql_mutex_unlock(&LOCK_unused_shares); + + tdc_delete_share_from_hash(element); + } + DBUG_VOID_RETURN; +} + + +/** + Lock table share. + + Find table share with given db.table_name in table definition cache. Return + locked table share if found. + + Locked table share means: + - table share is protected against removal from table definition cache + - no other thread can acquire/release table share + + Caller is expected to unlock table share with tdc_unlock_share(). + + @retval 0 Share not found + @retval MY_ERRPTR OOM + @retval ptr Pointer to locked table share +*/ + +TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name) +{ + TDC_element *element; + char key[MAX_DBKEY_LENGTH]; + + DBUG_ENTER("tdc_lock_share"); + if (unlikely(fix_thd_pins(thd))) + DBUG_RETURN((TDC_element*) MY_ERRPTR); + + element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins, + (uchar*) key, + tdc_create_key(key, db, table_name)); + if (element) + { + mysql_mutex_lock(&element->LOCK_table_share); + if (unlikely(!element->share || element->share->error)) + { + mysql_mutex_unlock(&element->LOCK_table_share); + element= 0; + } + lf_hash_search_unpin(thd->tdc_hash_pins); + } + + DBUG_RETURN(element); +} + + +/** + Unlock share locked by tdc_lock_share(). +*/ + +void tdc_unlock_share(TDC_element *element) +{ + DBUG_ENTER("tdc_unlock_share"); + mysql_mutex_unlock(&element->LOCK_table_share); + DBUG_VOID_RETURN; +} + + +int tdc_share_is_cached(THD *thd, const char *db, const char *table_name) +{ + char key[MAX_DBKEY_LENGTH]; + + if (unlikely(fix_thd_pins(thd))) + return -1; + + if (lf_hash_search(&tdc_hash, thd->tdc_hash_pins, (uchar*) key, + tdc_create_key(key, db, table_name))) + { + lf_hash_search_unpin(thd->tdc_hash_pins); + return 1; + } + return 0; +} + + +/* + Get TABLE_SHARE for a table. + + tdc_acquire_share() + thd Thread handle + tl Table that should be opened + flags operation: what to open table or view + out_table TABLE for the requested table + + IMPLEMENTATION + Get a table definition from the table definition cache. + If it doesn't exist, create a new from the table definition file. + + RETURN + 0 Error + # Share for table +*/ + +TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags, + TABLE **out_table) +{ + TABLE_SHARE *share; + TDC_element *element; + const char *key; + uint key_length= get_table_def_key(tl, &key); + my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value(); + bool was_unused; + DBUG_ENTER("tdc_acquire_share"); + + if (fix_thd_pins(thd)) + DBUG_RETURN(0); + +retry: + while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash, + thd->tdc_hash_pins, hash_value, (uchar*) key, key_length))) + { + LEX_STRING tmp= { const_cast(key), key_length }; + int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp); + + if (res == -1) + DBUG_RETURN(0); + else if (res == 1) + continue; + + element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash, + thd->tdc_hash_pins, hash_value, (uchar*) key, key_length); + /* It's safe to unpin the pins here, because an empty element was inserted + above, "empty" means at least element->share = 0. Some other thread can't + delete it while element->share == 0. And element->share is also protected + with element->LOCK_table_share mutex. */ + lf_hash_search_unpin(thd->tdc_hash_pins); + DBUG_ASSERT(element); + + if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length))) + { + lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length); + DBUG_RETURN(0); + } + + /* note that tdc_acquire_share() *always* uses discovery */ + open_table_def(thd, share, flags | GTS_USE_DISCOVERY); + + if (checked_unlikely(share->error)) + { + free_table_share(share); + lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length); + DBUG_RETURN(0); + } + + mysql_mutex_lock(&element->LOCK_table_share); + element->share= share; + share->tdc= element; + element->ref_count++; + element->flushed= false; + mysql_mutex_unlock(&element->LOCK_table_share); + + tdc_purge(false); + if (out_table) + { + status_var_increment(thd->status_var.table_open_cache_misses); + *out_table= 0; + } + share->m_psi= PSI_CALL_get_table_share(false, share); + goto end; + } + + /* cannot force discovery of a cached share */ + DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY)); + + if (out_table && (flags & GTS_TABLE)) + { + if ((*out_table= tc_acquire_table(thd, element))) + { + lf_hash_search_unpin(thd->tdc_hash_pins); + DBUG_ASSERT(!(flags & GTS_NOLOCK)); + DBUG_ASSERT(element->share); + DBUG_ASSERT(!element->share->error); + DBUG_ASSERT(!element->share->is_view); + status_var_increment(thd->status_var.table_open_cache_hits); + DBUG_RETURN(element->share); + } + status_var_increment(thd->status_var.table_open_cache_misses); + } + + mysql_mutex_lock(&element->LOCK_table_share); + if (!(share= element->share)) + { + mysql_mutex_unlock(&element->LOCK_table_share); + lf_hash_search_unpin(thd->tdc_hash_pins); + goto retry; + } + lf_hash_search_unpin(thd->tdc_hash_pins); + + /* + We found an existing table definition. Return it if we didn't get + an error when reading the table definition from file. + */ + if (unlikely(share->error)) + { + open_table_error(share, share->error, share->open_errno); + goto err; + } + + if (share->is_view && !(flags & GTS_VIEW)) + { + open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT); + goto err; + } + if (!share->is_view && !(flags & GTS_TABLE)) + { + open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT); + goto err; + } + + was_unused= !element->ref_count; + element->ref_count++; + mysql_mutex_unlock(&element->LOCK_table_share); + if (was_unused) + { + mysql_mutex_lock(&LOCK_unused_shares); + if (element->prev) + { + /* + Share was not used before and it was in the old_unused_share list + Unlink share from this list + */ + DBUG_PRINT("info", ("Unlinking from not used list")); + unused_shares.remove(element); + element->next= 0; + element->prev= 0; + } + mysql_mutex_unlock(&LOCK_unused_shares); + } + +end: + DBUG_PRINT("exit", ("share: %p ref_count: %u", + share, share->tdc->ref_count)); + if (flags & GTS_NOLOCK) + { + tdc_release_share(share); + /* + if GTS_NOLOCK is requested, the returned share pointer cannot be used, + the share it points to may go away any moment. + But perhaps the caller is only interested to know whether a share or + table existed? + Let's return an invalid pointer here to catch dereferencing attempts. + */ + share= UNUSABLE_TABLE_SHARE; + } + DBUG_RETURN(share); + +err: + mysql_mutex_unlock(&element->LOCK_table_share); + DBUG_RETURN(0); +} + + +/** + Release table share acquired by tdc_acquire_share(). +*/ + +void tdc_release_share(TABLE_SHARE *share) +{ + DBUG_ENTER("tdc_release_share"); + + mysql_mutex_lock(&share->tdc->LOCK_table_share); + DBUG_PRINT("enter", + ("share: %p table: %s.%s ref_count: %u", + share, share->db.str, share->table_name.str, + share->tdc->ref_count)); + DBUG_ASSERT(share->tdc->ref_count); + + if (share->tdc->ref_count > 1) + { + share->tdc->ref_count--; + if (!share->is_view) + mysql_cond_broadcast(&share->tdc->COND_release); + mysql_mutex_unlock(&share->tdc->LOCK_table_share); + DBUG_VOID_RETURN; + } + mysql_mutex_unlock(&share->tdc->LOCK_table_share); + + mysql_mutex_lock(&LOCK_unused_shares); + mysql_mutex_lock(&share->tdc->LOCK_table_share); + if (--share->tdc->ref_count) + { + if (!share->is_view) + mysql_cond_broadcast(&share->tdc->COND_release); + mysql_mutex_unlock(&share->tdc->LOCK_table_share); + mysql_mutex_unlock(&LOCK_unused_shares); + DBUG_VOID_RETURN; + } + if (share->tdc->flushed || tdc_records() > tdc_size) + { + mysql_mutex_unlock(&LOCK_unused_shares); + tdc_delete_share_from_hash(share->tdc); + DBUG_VOID_RETURN; + } + /* Link share last in used_table_share list */ + DBUG_PRINT("info", ("moving share to unused list")); + DBUG_ASSERT(share->tdc->next == 0); + unused_shares.push_back(share->tdc); + mysql_mutex_unlock(&share->tdc->LOCK_table_share); + mysql_mutex_unlock(&LOCK_unused_shares); + DBUG_VOID_RETURN; +} + + +void tdc_remove_referenced_share(THD *thd, TABLE_SHARE *share) +{ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str, + share->table_name.str, + MDL_EXCLUSIVE)); + share->tdc->flush_unused(true); + mysql_mutex_lock(&share->tdc->LOCK_table_share); + DEBUG_SYNC(thd, "before_wait_for_refs"); + share->tdc->wait_for_refs(1); + DBUG_ASSERT(share->tdc->all_tables.is_empty()); + share->tdc->ref_count--; + tdc_delete_share_from_hash(share->tdc); +} + + +/** + Removes all TABLE instances and corresponding TABLE_SHARE + + @param thd Thread context + @param db Name of database + @param table_name Name of table + + @note It assumes that table instances are already not used by any + (other) thread (this should be achieved by using meta-data locks). +*/ + +void tdc_remove_table(THD *thd, const char *db, const char *table_name) +{ + TDC_element *element; + DBUG_ENTER("tdc_remove_table"); + DBUG_PRINT("enter", ("name: %s", table_name)); + + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name, + MDL_EXCLUSIVE)); + + mysql_mutex_lock(&LOCK_unused_shares); + if (!(element= tdc_lock_share(thd, db, table_name))) + { + mysql_mutex_unlock(&LOCK_unused_shares); + DBUG_VOID_RETURN; + } + + DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it? + + if (!element->ref_count) + { + if (element->prev) + { + unused_shares.remove(element); + element->prev= 0; + element->next= 0; + } + mysql_mutex_unlock(&LOCK_unused_shares); + + tdc_delete_share_from_hash(element); + DBUG_VOID_RETURN; + } + mysql_mutex_unlock(&LOCK_unused_shares); + + element->ref_count++; + mysql_mutex_unlock(&element->LOCK_table_share); + + /* We have to relock the mutex to avoid code duplication. Sigh. */ + tdc_remove_referenced_share(thd, element->share); + DBUG_VOID_RETURN; +} + + +/** + Check if table's share is being removed from the table definition + cache and, if yes, wait until the flush is complete. + + @param thd Thread context. + @param table_list Table which share should be checked. + @param timeout Timeout for waiting. + @param deadlock_weight Weight of this wait for deadlock detector. + + @retval 0 Success. Share is up to date or has been flushed. + @retval 1 Error (OOM, was killed, the wait resulted + in a deadlock or timeout). Reported. +*/ + +int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name, + ulong wait_timeout, uint deadlock_weight) +{ + TDC_element *element; + + if (!(element= tdc_lock_share(thd, db, table_name))) + return FALSE; + else if (element == MY_ERRPTR) + return TRUE; + else if (element->flushed) + { + struct timespec abstime; + set_timespec(abstime, wait_timeout); + return element->share->wait_for_old_version(thd, &abstime, deadlock_weight); + } + tdc_unlock_share(element); + return FALSE; +} + + +/** + Iterate table definition cache. + + Object is protected against removal from table definition cache. + + @note Returned TABLE_SHARE is not guaranteed to be fully initialized: + tdc_acquire_share() added new share, but didn't open it yet. If caller + needs fully initializer share, it must lock table share mutex. +*/ + +struct eliminate_duplicates_arg +{ + HASH hash; + MEM_ROOT root; + my_hash_walk_action action; + void *argument; +}; + + +static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length, + my_bool not_used __attribute__((unused))) +{ + LEX_STRING *key= (LEX_STRING *) element; + *length= key->length; + return (uchar *) key->str; +} + + +static my_bool eliminate_duplicates(TDC_element *element, + eliminate_duplicates_arg *arg) +{ + LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING)); + + if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key, + element->m_key_length))) + return TRUE; + + key->length= element->m_key_length; + + if (my_hash_insert(&arg->hash, (uchar *) key)) + return FALSE; + + return arg->action(element, arg->argument); +} + + +int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument, + bool no_dups) +{ + eliminate_duplicates_arg no_dups_argument; + LF_PINS *pins; + myf alloc_flags= 0; + uint hash_flags= HASH_UNIQUE; + int res; + + if (thd) + { + fix_thd_pins(thd); + pins= thd->tdc_hash_pins; + alloc_flags= MY_THREAD_SPECIFIC; + hash_flags|= HASH_THREAD_SPECIFIC; + } + else + pins= lf_hash_get_pins(&tdc_hash); + + if (!pins) + return ER_OUTOFMEMORY; + + if (no_dups) + { + init_alloc_root(PSI_INSTRUMENT_ME, &no_dups_argument.root, 4096, 4096, MYF(alloc_flags)); + my_hash_init(PSI_INSTRUMENT_ME, &no_dups_argument.hash, &my_charset_bin, + tdc_records(), 0, 0, eliminate_duplicates_get_key, 0, + hash_flags); + no_dups_argument.action= action; + no_dups_argument.argument= argument; + action= (my_hash_walk_action) eliminate_duplicates; + argument= &no_dups_argument; + } + + res= lf_hash_iterate(&tdc_hash, pins, action, argument); + + if (!thd) + lf_hash_put_pins(pins); + + if (no_dups) + { + my_hash_free(&no_dups_argument.hash); + free_root(&no_dups_argument.root, MYF(0)); + } + return res; +} + + +int show_tc_active_instances(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + var->type= SHOW_UINT; + var->value= buff; + *(reinterpret_cast(buff))= + tc_active_instances.load(std::memory_order_relaxed); + return 0; +} + + +/** + Waits until ref_count goes down to given number + + @param my_refs Number of references owned by the caller + + Caller must own at least one TABLE_SHARE reference. + + Even though current thread holds exclusive metadata lock on this share, + concurrent FLUSH TABLES threads may be in process of closing unused table + instances belonging to this share. E.g.: + thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front(); + thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table); + thr2 (ALTER TABLE): tdc_remove_table(); + thr1 (FLUSH TABLES): intern_close_table(table); + + Current remove type assumes that all table instances (except for those + that are owned by current thread) must be closed before + thd_remove_table() returns. Wait for such tables now. + + intern_close_table() decrements ref_count and signals COND_release. When + ref_count drops down to number of references owned by current thread + waiting is completed. + + Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here + because it waits for all table instances, whereas we have to wait only + for those that are not owned by current thread. +*/ + +void TDC_element::wait_for_refs(uint my_refs) +{ + while (ref_count > my_refs) + mysql_cond_wait(&COND_release, &LOCK_table_share); +} + + +/** + Flushes unused TABLE instances + + @param thd Thread context + @param mark_flushed Whether to destroy TABLE_SHARE when released + + Caller is allowed to own used TABLE instances. + There must be no TABLE objects used by other threads and caller must own + exclusive metadata lock on the table. +*/ + +void TDC_element::flush(THD *thd, bool mark_flushed) +{ + DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str, + share->table_name.str, + MDL_EXCLUSIVE)); + + flush_unused(mark_flushed); + + mysql_mutex_lock(&LOCK_table_share); + All_share_tables_list::Iterator it(all_tables); + uint my_refs= 0; + while (auto table= it++) + { + if (table->in_use == thd) + my_refs++; + } + wait_for_refs(my_refs); +#ifndef DBUG_OFF + it.rewind(); + while (auto table= it++) + DBUG_ASSERT(table->in_use == thd); +#endif + mysql_mutex_unlock(&LOCK_table_share); +} + + +/** + Flushes unused TABLE instances +*/ + +void TDC_element::flush_unused(bool mark_flushed) +{ + Share_free_tables::List purge_tables; + + mysql_mutex_lock(&LOCK_table_share); + if (mark_flushed) + flushed= true; + tc_remove_all_unused_tables(this, &purge_tables); + mysql_mutex_unlock(&LOCK_table_share); + + while (auto table= purge_tables.pop_front()) + intern_close_table(table); +} diff --git a/sql/table_cache.h b/sql/table_cache.h new file mode 100644 index 00000000..433df5e0 --- /dev/null +++ b/sql/table_cache.h @@ -0,0 +1,117 @@ +#ifndef TABLE_CACHE_H_INCLUDED +#define TABLE_CACHE_H_INCLUDED +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. + Copyright (c) 2010, 2011 Monty Program Ab + Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +struct Share_free_tables +{ + typedef I_P_List List; + List list; + /** Avoid false sharing between instances */ + char pad[CPU_LEVEL1_DCACHE_LINESIZE]; +}; + + +struct TDC_element +{ + uchar m_key[NAME_LEN + 1 + NAME_LEN + 1]; + uint m_key_length; + bool flushed; + TABLE_SHARE *share; + + /** + Protects ref_count, m_flush_tickets, all_tables, flushed, all_tables_refs. + */ + mysql_mutex_t LOCK_table_share; + mysql_cond_t COND_release; + TDC_element *next, **prev; /* Link to unused shares */ + uint ref_count; /* How many TABLE objects uses this */ + uint all_tables_refs; /* Number of refs to all_tables */ + /** + List of tickets representing threads waiting for the share to be flushed. + */ + Wait_for_flush_list m_flush_tickets; + /* + Doubly-linked (back-linked) lists of used and unused TABLE objects + for this share. + */ + All_share_tables_list all_tables; + /** Avoid false sharing between TDC_element and free_tables */ + char pad[CPU_LEVEL1_DCACHE_LINESIZE]; + Share_free_tables free_tables[1]; + + inline void wait_for_refs(uint my_refs); + void flush(THD *thd, bool mark_flushed); + void flush_unused(bool mark_flushed); +}; + + +extern ulong tdc_size; +extern ulong tc_size; +extern uint32 tc_instances; + +extern bool tdc_init(void); +extern void tdc_start_shutdown(void); +extern void tdc_deinit(void); +extern ulong tdc_records(void); +extern void tdc_purge(bool all); +extern TDC_element *tdc_lock_share(THD *thd, const char *db, + const char *table_name); +extern void tdc_unlock_share(TDC_element *element); +int tdc_share_is_cached(THD *thd, const char *db, const char *table_name); +extern TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags, + TABLE **out_table= 0); +extern void tdc_release_share(TABLE_SHARE *share); +void tdc_remove_referenced_share(THD *thd, TABLE_SHARE *share); +void tdc_remove_table(THD *thd, const char *db, const char *table_name); + +extern int tdc_wait_for_old_version(THD *thd, const char *db, + const char *table_name, + ulong wait_timeout, uint deadlock_weight); +extern int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument, + bool no_dups= false); + +extern uint tc_records(void); +int show_tc_active_instances(THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope); +extern void tc_purge(); +extern void tc_add_table(THD *thd, TABLE *table); +extern void tc_release_table(TABLE *table); +extern TABLE *tc_acquire_table(THD *thd, TDC_element *element); + +/** + Create a table cache key for non-temporary table. + + @param key Buffer for key (must be at least NAME_LEN*2+2 bytes). + @param db Database name. + @param table_name Table name. + + @return Length of key. +*/ + +inline uint tdc_create_key(char *key, const char *db, const char *table_name) +{ + /* + In theory caller should ensure that both db and table_name are + not longer than NAME_LEN bytes. In practice we play safe to avoid + buffer overruns. + */ + return (uint) (strmake(strmake(key, db, NAME_LEN) + 1, table_name, + NAME_LEN) - key + 1); +} +#endif /* TABLE_CACHE_H_INCLUDED */ diff --git a/sql/temporary_tables.cc b/sql/temporary_tables.cc new file mode 100644 index 00000000..ecbfdde1 --- /dev/null +++ b/sql/temporary_tables.cc @@ -0,0 +1,1595 @@ +/* + Copyright (c) 2016, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/** + All methods pertaining to temporary tables. +*/ + +#include "mariadb.h" +#include "sql_acl.h" /* TMP_TABLE_ACLS */ +#include "sql_base.h" /* tdc_create_key */ +#include "lock.h" /* mysql_lock_remove */ +#include "log_event.h" /* Query_log_event */ +#include "sql_show.h" /* append_identifier */ +#include "sql_handler.h" /* mysql_ha_rm_temporary_tables */ +#include "rpl_rli.h" /* rpl_group_info */ + +#define IS_USER_TABLE(A) ((A->tmp_table == TRANSACTIONAL_TMP_TABLE) || \ + (A->tmp_table == NON_TRANSACTIONAL_TMP_TABLE)) + +/** + Check whether temporary tables exist. The decision is made based on the + existence of TMP_TABLE_SHAREs in Open_tables_state::temporary_tables list. + + @return false Temporary tables exist + true No temporary table exist +*/ +bool THD::has_thd_temporary_tables() +{ + DBUG_ENTER("THD::has_thd_temporary_tables"); + bool result= (temporary_tables && !temporary_tables->is_empty()); + DBUG_RETURN(result); +} + + +/** + Create a temporary table, open it and return the TABLE handle. + + @param frm [IN] Binary frm image + @param path [IN] File path (without extension) + @param db [IN] Schema name + @param table_name [IN] Table name + + @return Success A pointer to table object + Failure NULL +*/ +TABLE *THD::create_and_open_tmp_table(LEX_CUSTRING *frm, + const char *path, + const char *db, + const char *table_name, + bool open_internal_tables) +{ + DBUG_ENTER("THD::create_and_open_tmp_table"); + + TMP_TABLE_SHARE *share; + TABLE *table= NULL; + + if ((share= create_temporary_table(frm, path, db, table_name))) + { + open_options|= HA_OPEN_FOR_CREATE; + table= open_temporary_table(share, table_name); + open_options&= ~HA_OPEN_FOR_CREATE; + + /* + Failed to open a temporary table instance. As we are not passing + the created TMP_TABLE_SHARE to the caller, we must remove it from + the list and free it here. + */ + if (!table) + { + /* Remove the TABLE_SHARE from the list of temporary tables. */ + temporary_tables->remove(share); + + /* Free the TMP_TABLE_SHARE. */ + free_tmp_table_share(share, false); + DBUG_RETURN(0); + } + + /* Open any related tables */ + if (open_internal_tables && table->internal_tables && + open_and_lock_internal_tables(table, true)) + { + drop_temporary_table(table, NULL, false); + DBUG_RETURN(0); + } + } + + DBUG_RETURN(table); +} + + +/** + Check whether an open table with db/table name is in use. + + @param db [IN] Database name + @param table_name [IN] Table name + @param state [IN] State of temp table to open + + @return Success Pointer to first used table instance. + Failure NULL +*/ +TABLE *THD::find_temporary_table(const char *db, + const char *table_name, + Temporary_table_state state) +{ + DBUG_ENTER("THD::find_temporary_table"); + + TABLE *table; + char key[MAX_DBKEY_LENGTH]; + uint key_length; + bool locked; + + if (!has_temporary_tables()) + { + DBUG_RETURN(NULL); + } + + key_length= create_tmp_table_def_key(key, db, table_name); + + locked= lock_temporary_tables(); + table= find_temporary_table(key, key_length, state); + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + DBUG_RETURN(table); +} + + +/** + Check whether an open table specified in TABLE_LIST is in use. + + @return tl [IN] TABLE_LIST + + @return Success Pointer to first used table instance. + Failure NULL +*/ +TABLE *THD::find_temporary_table(const TABLE_LIST *tl, + Temporary_table_state state) +{ + DBUG_ENTER("THD::find_temporary_table"); + TABLE *table= find_temporary_table(tl->get_db_name(), tl->get_table_name(), + state); + DBUG_RETURN(table); +} + + +/** + Check whether a temporary table exists with the specified key. + The key, in this case, is not the usual key used for temporary tables. + It does not contain server_id & pseudo_thread_id. This function is + essentially used use to check whether there is any temporary table + which _shadows_ a base table. + (see: Query_cache::send_result_to_client()) + + @return Success A pointer to table share object + Failure NULL +*/ +TMP_TABLE_SHARE *THD::find_tmp_table_share_w_base_key(const char *key, + uint key_length) +{ + DBUG_ENTER("THD::find_tmp_table_share_w_base_key"); + + TMP_TABLE_SHARE *share; + TMP_TABLE_SHARE *result= NULL; + bool locked; + + if (!has_temporary_tables()) + { + DBUG_RETURN(NULL); + } + + locked= lock_temporary_tables(); + + All_tmp_tables_list::Iterator it(*temporary_tables); + while ((share= it++)) + { + if ((share->table_cache_key.length - TMP_TABLE_KEY_EXTRA) == key_length + && !memcmp(share->table_cache_key.str, key, key_length)) + { + result= share; + } + } + + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + DBUG_RETURN(result); +} + + +/** + Lookup the TMP_TABLE_SHARE using the given db/table_name.The server_id and + pseudo_thread_id used to generate table definition key is taken from THD + (see create_tmp_table_def_key()). Return NULL is none found. + + @return Success A pointer to table share object + Failure NULL +*/ +TMP_TABLE_SHARE *THD::find_tmp_table_share(const char *db, + const char *table_name) +{ + DBUG_ENTER("THD::find_tmp_table_share"); + + TMP_TABLE_SHARE *share; + char key[MAX_DBKEY_LENGTH]; + uint key_length; + + key_length= create_tmp_table_def_key(key, db, table_name); + share= find_tmp_table_share(key, key_length); + + DBUG_RETURN(share); +} + + +/** + Lookup TMP_TABLE_SHARE using the specified TABLE_LIST element. + Return NULL is none found. + + @param tl [IN] Table + + @return Success A pointer to table share object + Failure NULL +*/ +TMP_TABLE_SHARE *THD::find_tmp_table_share(const TABLE_LIST *tl) +{ + DBUG_ENTER("THD::find_tmp_table_share"); + TMP_TABLE_SHARE *share= find_tmp_table_share(tl->get_db_name(), + tl->get_table_name()); + DBUG_RETURN(share); +} + + +/** + Lookup TMP_TABLE_SHARE using the specified table definition key. + Return NULL is none found. + + @return Success A pointer to table share object + Failure NULL +*/ +TMP_TABLE_SHARE *THD::find_tmp_table_share(const char *key, size_t key_length) +{ + DBUG_ENTER("THD::find_tmp_table_share"); + + TMP_TABLE_SHARE *share; + TMP_TABLE_SHARE *result= NULL; + bool locked; + + if (!has_temporary_tables()) + { + DBUG_RETURN(NULL); + } + + locked= lock_temporary_tables(); + + All_tmp_tables_list::Iterator it(*temporary_tables); + while ((share= it++)) + { + if (share->table_cache_key.length == key_length && + !(memcmp(share->table_cache_key.str, key, key_length))) + { + result= share; + break; + } + } + + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + DBUG_RETURN(result); +} + + +/** + Find a temporary table specified by TABLE_LIST instance in the open table + list and prepare its TABLE instance for use. If + + This function tries to resolve this table in the list of temporary tables + of this thread. Temporary tables are thread-local and "shadow" base + tables with the same name. + + @note In most cases one should use THD::open_tables() instead + of this call. + + @note One should finalize process of opening temporary table for table + list element by calling open_and_process_table(). This function + is responsible for table version checking and handling of merge + tables. + + @note We used to check global_read_lock before opening temporary tables. + However, that limitation was artificial and is removed now. + + @param tl [IN] TABLE_LIST + + @return Error status. + @retval false On success. If a temporary table exists + for the given key, tl->table is set. + @retval true On error. my_error() has been called. +*/ +bool THD::open_temporary_table(TABLE_LIST *tl) +{ + DBUG_ENTER("THD::open_temporary_table"); + DBUG_PRINT("enter", ("table: '%s'.'%s'", tl->db.str, tl->table_name.str)); + + TMP_TABLE_SHARE *share; + TABLE *table= NULL; + + /* + Code in open_table() assumes that TABLE_LIST::table can be non-zero only + for pre-opened temporary tables. + */ + DBUG_ASSERT(tl->table == NULL); + + /* + This function should not be called for cases when derived or I_S + tables can be met since table list elements for such tables can + have invalid db or table name. + Instead THD::open_tables() should be used. + */ + DBUG_ASSERT(!tl->derived); + DBUG_ASSERT(!tl->schema_table); + DBUG_ASSERT(has_temporary_tables() || + (rgi_slave && rgi_slave->is_parallel_exec)); + + if (tl->open_type == OT_BASE_ONLY) + { + DBUG_PRINT("info", ("skip_temporary is set or no temporary tables")); + DBUG_RETURN(false); + } + + if (!tl->db.str) + { + DBUG_PRINT("info", + ("Table reference to a temporary table must have database set")); + DBUG_RETURN(false); + } + + /* + Temporary tables are not safe for parallel replication. They were + designed to be visible to one thread only, so have no table locking. + Thus there is no protection against two conflicting transactions + committing in parallel and things like that. + + So for now, anything that uses temporary tables will be serialised + with anything before it, when using parallel replication. + */ + + if (rgi_slave && + rgi_slave->is_parallel_exec && + find_temporary_table(tl) && + wait_for_prior_commit()) + DBUG_RETURN(true); + + /* + First check if there is a reusable open table available in the + open table list. + */ + if (find_and_use_tmp_table(tl, &table)) + { + DBUG_RETURN(true); /* Error */ + } + + /* + No reusable table was found. We will have to open a new instance. + */ + if (!table && (share= find_tmp_table_share(tl))) + { + table= open_temporary_table(share, tl->get_table_name()); + /* + Temporary tables are not safe for parallel replication. They were + designed to be visible to one thread only, so have no table locking. + Thus there is no protection against two conflicting transactions + committing in parallel and things like that. + + So for now, anything that uses temporary tables will be serialised + with anything before it, when using parallel replication. + */ + if (table && rgi_slave && + rgi_slave->is_parallel_exec && + wait_for_prior_commit()) + DBUG_RETURN(true); + + if (!table && is_error()) + DBUG_RETURN(true); // Error when opening table + } + + if (!table) + { + if (tl->open_type == OT_TEMPORARY_ONLY && + tl->open_strategy == TABLE_LIST::OPEN_NORMAL) + { + my_error(ER_NO_SUCH_TABLE, MYF(0), tl->db.str, tl->table_name.str); + DBUG_RETURN(true); + } + DBUG_RETURN(false); + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (tl->partition_names) + { + /* Partitioned temporary tables is not supported. */ + DBUG_ASSERT(!table->part_info); + my_error(ER_PARTITION_CLAUSE_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(true); + } +#endif + + table->query_id= query_id; + used|= THREAD_SPECIFIC_USED; + + /* It is neither a derived table nor non-updatable view. */ + tl->updatable= true; + tl->table= table; + + table->init(this, tl); + + DBUG_PRINT("info", ("Using temporary table")); + DBUG_RETURN(false); +} + + +/** + Pre-open temporary tables corresponding to table list elements. + + @note One should finalize process of opening temporary tables + by calling open_tables(). This function is responsible + for table version checking and handling of merge tables. + + @param tl [IN] TABLE_LIST + + @return false On success. If a temporary table exists + for the given element, tl->table is set. + true On error. my_error() has been called. +*/ +bool THD::open_temporary_tables(TABLE_LIST *tl) +{ + TABLE_LIST *first_not_own; + DBUG_ENTER("THD::open_temporary_tables"); + + if (!has_temporary_tables()) + DBUG_RETURN(0); + + first_not_own= lex->first_not_own_table(); + for (TABLE_LIST *table= tl; table && table != first_not_own; + table= table->next_global) + { + if (table->derived || table->schema_table) + { + /* + Derived and I_S tables will be handled by a later call to open_tables(). + */ + continue; + } + + if (open_temporary_table(table)) + { + DBUG_RETURN(true); + } + } + + DBUG_RETURN(false); +} + + +/** + Close all temporary tables created by 'CREATE TEMPORARY TABLE' for thread + creates one DROP TEMPORARY TABLE binlog event for each pseudo-thread. + + Temporary tables created in a sql slave is closed by + Relay_log_info::close_temporary_tables(). + + @return false Success + true Failure +*/ +bool THD::close_temporary_tables() +{ + DBUG_ENTER("THD::close_temporary_tables"); + + TMP_TABLE_SHARE *share; + TABLE *table; + + bool error= false; + + if (!has_thd_temporary_tables()) + { + if (temporary_tables) + { + my_free(temporary_tables); + temporary_tables= NULL; + } + DBUG_RETURN(false); + } + + DBUG_ASSERT(!rgi_slave); + + /* + Ensure we don't have open HANDLERs for tables we are about to close. + This is necessary when THD::close_temporary_tables() is called as + part of execution of BINLOG statement (e.g. for format description event). + */ + mysql_ha_rm_temporary_tables(this); + + /* Close all open temporary tables. */ + All_tmp_tables_list::Iterator it(*temporary_tables); + while ((share= it++)) + { + /* Traverse the table list. */ + while ((table= share->all_tmp_tables.pop_front())) + { + table->file->extra(HA_EXTRA_PREPARE_FOR_DROP); + free_temporary_table(table); + } + } + + // Write DROP TEMPORARY TABLE query log events to binary log. + if (mysql_bin_log.is_open()) + { + error= log_events_and_free_tmp_shares(); + } + else + { + while ((share= temporary_tables->pop_front())) + { + free_tmp_table_share(share, true); + } + } + + /* By now, there mustn't be any elements left in the list. */ + DBUG_ASSERT(temporary_tables->is_empty()); + + my_free(temporary_tables); + temporary_tables= NULL; + + DBUG_RETURN(error); +} + + +/** + Rename a temporary table. + + @param table [IN] Table handle + @param db [IN] New schema name + @param table_name [IN] New table name + + @return false Success + true Error +*/ +bool THD::rename_temporary_table(TABLE *table, + const LEX_CSTRING *db, + const LEX_CSTRING *table_name) +{ + char *key; + uint key_length; + TABLE_SHARE *share= table->s; + DBUG_ENTER("THD::rename_temporary_table"); + + if (!(key= (char *) alloc_root(&share->mem_root, MAX_DBKEY_LENGTH))) + DBUG_RETURN(true); + + /* + Temporary tables are renamed by simply changing their table definition key. + */ + key_length= create_tmp_table_def_key(key, db->str, table_name->str); + share->set_table_cache_key(key, key_length); + + DBUG_RETURN(false); +} + + +/** + Drop a temporary table. + + Try to locate the table in the list of open temporary tables. + If the table is found: + - If the table is locked with LOCK TABLES or by prelocking, + unlock it and remove it from the list of locked tables + (THD::lock). Currently only transactional temporary tables + are locked. + - Close the temporary table, remove its .FRM. + - Remove the table share from the list of temporary table shares. + + This function is used to drop user temporary tables, as well as + internal tables created in CREATE TEMPORARY TABLE ... SELECT + or ALTER TABLE. + + @param table [IN] Temporary table to be deleted + @param is_trans [OUT] Is set to the type of the table: + transactional (e.g. innodb) as true or + non-transactional (e.g. myisam) as false. + @paral delete_table [IN] Whether to delete the table files? + + @return false Table was dropped + true Error +*/ +bool THD::drop_temporary_table(TABLE *table, bool *is_trans, bool delete_table) +{ + DBUG_ENTER("THD::drop_temporary_table"); + + TMP_TABLE_SHARE *share; + TABLE *tab; + bool result= false; + bool locked; + + DBUG_ASSERT(table); + DBUG_PRINT("tmptable", ("Dropping table: '%s'.'%s'", + table->s->db.str, table->s->table_name.str)); + + // close all handlers in case it is statement abort and some can be left + if (is_error()) + table->file->ha_reset(); + + locked= lock_temporary_tables(); + + share= tmp_table_share(table); + + /* Table might be in use by some outer statement. */ + All_share_tables_list::Iterator it(share->all_tmp_tables); + while ((tab= it++)) + { + if (tab != table && tab->query_id != 0) + { + /* Found a table instance in use. This table cannot be be dropped. */ + my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias.c_ptr()); + result= true; + goto end; + } + } + + if (is_trans) + *is_trans= table->file->has_transactions(); + + /* + Iterate over the list of open tables and close them. + */ + while ((tab= share->all_tmp_tables.pop_front())) + { + /* + We need to set the THD as it may be different in case of + parallel replication + */ + tab->in_use= this; + if (delete_table) + tab->file->extra(HA_EXTRA_PREPARE_FOR_DROP); + free_temporary_table(tab); + } + + DBUG_ASSERT(temporary_tables); + + /* Remove the TABLE_SHARE from the list of temporary tables. */ + temporary_tables->remove(share); + + /* Free the TABLE_SHARE and/or delete the files. */ + result= free_tmp_table_share(share, delete_table); + +end: + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + DBUG_RETURN(result); +} + + +/** + Delete the temporary table files. + + @param base [IN] Handlerton for table to be deleted. + @param path [IN] Path to the table to be deleted (i.e. path + to its .frm without an extension). + + @return false Success + true Error +*/ +bool THD::rm_temporary_table(handlerton *base, const char *path) +{ + DBUG_ENTER("THD::rm_temporary_table"); + + bool error= false; + char frm_path[FN_REFLEN + 1]; + + strxnmov(frm_path, sizeof(frm_path) - 1, path, reg_ext, NullS); + + if (base->drop_table(base, path) > 0) + { + error= true; + sql_print_warning("Could not remove temporary table: '%s', error: %d", + path, my_errno); + } + + if (mysql_file_delete(key_file_frm, frm_path, + MYF(MY_WME | MY_IGNORE_ENOENT))) + error= true; + + DBUG_RETURN(error); +} + + +/** + Mark all temporary tables which were used by the current statement or + sub-statement as free for reuse, but only if the query_id can be cleared. + + @remark For temp tables associated with a open SQL HANDLER the query_id + is not reset until the HANDLER is closed. +*/ +void THD::mark_tmp_tables_as_free_for_reuse() +{ + DBUG_ENTER("THD::mark_tmp_tables_as_free_for_reuse"); + + TMP_TABLE_SHARE *share; + TABLE *table; + bool locked; + + if (query_id == 0) + { + /* + Thread has not executed any statement and has not used any + temporary tables. + */ + DBUG_VOID_RETURN; + } + + if (!has_temporary_tables()) + { + DBUG_VOID_RETURN; + } + + locked= lock_temporary_tables(); + + All_tmp_tables_list::Iterator it(*temporary_tables); + while ((share= it++)) + { + All_share_tables_list::Iterator tables_it(share->all_tmp_tables); + while ((table= tables_it++)) + { + if ((table->query_id == query_id) && !table->open_by_handler) + mark_tmp_table_as_free_for_reuse(table); + } + } + + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + if (rgi_slave) + { + /* + Temporary tables are shared with other by sql execution threads. + As a safety measure, clear the pointer to the common area. + */ + temporary_tables= NULL; + } + + DBUG_VOID_RETURN; +} + + +/** + Reset a single temporary table. Effectively this "closes" one temporary + table in a session. + + @param table Temporary table + + @return void +*/ +void THD::mark_tmp_table_as_free_for_reuse(TABLE *table) +{ + DBUG_ENTER("THD::mark_tmp_table_as_free_for_reuse"); + + DBUG_ASSERT(table->s->tmp_table); + + table->query_id= 0; + table->file->ha_reset(); + + /* Detach temporary MERGE children from temporary parent. */ + DBUG_ASSERT(table->file); + table->file->extra(HA_EXTRA_DETACH_CHILDREN); + + /* + Reset temporary table lock type to it's default value (TL_WRITE). + + Statements such as INSERT INTO .. SELECT FROM tmp, CREATE TABLE + .. SELECT FROM tmp and UPDATE may under some circumstances modify + the lock type of the tables participating in the statement. This + isn't a problem for non-temporary tables since their lock type is + reset at every open, but the same does not occur for temporary + tables for historical reasons. + + Furthermore, the lock type of temporary tables is not really that + important because they can only be used by one query at a time. + Nonetheless, it's safer from a maintenance point of view to reset + the lock type of this singleton TABLE object as to not cause problems + when the table is reused. + + Even under LOCK TABLES mode its okay to reset the lock type as + LOCK TABLES is allowed (but ignored) for a temporary table. + */ + table->reginfo.lock_type= TL_WRITE; + DBUG_VOID_RETURN; +} + + +/** + Remove and return the specified table's TABLE_SHARE from the temporary + tables list. + + @param table [IN] Table + + @return TMP_TABLE_SHARE of the specified table. +*/ +TMP_TABLE_SHARE *THD::save_tmp_table_share(TABLE *table) +{ + DBUG_ENTER("THD::save_tmp_table_share"); + + TMP_TABLE_SHARE *share; + + lock_temporary_tables(); + DBUG_ASSERT(temporary_tables); + share= tmp_table_share(table); + temporary_tables->remove(share); + unlock_temporary_tables(); + + DBUG_RETURN(share); +} + + +/** + Add the specified TMP_TABLE_SHARE to the temporary tables list. + + @param share [IN] Table share + + @return void +*/ +void THD::restore_tmp_table_share(TMP_TABLE_SHARE *share) +{ + DBUG_ENTER("THD::restore_tmp_table_share"); + + lock_temporary_tables(); + DBUG_ASSERT(temporary_tables); + temporary_tables->push_front(share); + unlock_temporary_tables(); + + DBUG_VOID_RETURN; +} + + +/** + If its a replication slave, report whether slave temporary tables + exist (Relay_log_info::save_temporary_tables) or report about THD + temporary table (Open_tables_state::temporary_tables) otherwise. + + @return false Temporary tables exist + true No temporary table exist +*/ +bool THD::has_temporary_tables() +{ + DBUG_ENTER("THD::has_temporary_tables"); + bool result; +#ifdef HAVE_REPLICATION + if (rgi_slave) + { + mysql_mutex_lock(&rgi_slave->rli->data_lock); + result= rgi_slave->rli->save_temporary_tables && + !rgi_slave->rli->save_temporary_tables->is_empty(); + mysql_mutex_unlock(&rgi_slave->rli->data_lock); + } + else +#endif + { + result= has_thd_temporary_tables(); + } + DBUG_RETURN(result); +} + + +/** + Create a table definition key. + + @param key [OUT] Buffer for the key to be created (must + be of size MAX_DBKRY_LENGTH) + @param db [IN] Database name + @param table_name [IN] Table name + + @return Key length. + + @note + The table key is create from: + db + \0 + table_name + \0 + + Additionally, we add the following to make each temporary table unique on + the slave. + + 4 bytes of master thread id + 4 bytes of pseudo thread id +*/ +uint THD::create_tmp_table_def_key(char *key, const char *db, + const char *table_name) +{ + uint key_length; + DBUG_ENTER("THD::create_tmp_table_def_key"); + + key_length= tdc_create_key(key, db, table_name); + int4store(key + key_length, variables.server_id); + int4store(key + key_length + 4, variables.pseudo_thread_id); + key_length += TMP_TABLE_KEY_EXTRA; + + DBUG_RETURN(key_length); +} + + +/** + Create a temporary table. + + @param frm [IN] Binary frm image + @param path [IN] File path (without extension) + @param db [IN] Schema name + @param table_name [IN] Table name + + @return Success A pointer to table share object + Failure NULL +*/ +TMP_TABLE_SHARE *THD::create_temporary_table(LEX_CUSTRING *frm, + const char *path, + const char *db, + const char *table_name) +{ + DBUG_ENTER("THD::create_temporary_table"); + + TMP_TABLE_SHARE *share; + char key_cache[MAX_DBKEY_LENGTH]; + char *saved_key_cache; + char *tmp_path; + uint key_length; + bool locked; + int res; + + /* Temporary tables are not safe for parallel replication. */ + if (rgi_slave && + rgi_slave->is_parallel_exec && + wait_for_prior_commit()) + DBUG_RETURN(NULL); + + /* Create the table definition key for the temporary table. */ + key_length= create_tmp_table_def_key(key_cache, db, table_name); + + if (!(share= (TMP_TABLE_SHARE *) my_malloc(key_memory_table_share, + sizeof(TMP_TABLE_SHARE) + + strlen(path) + 1 + key_length, + MYF(MY_WME)))) + { + DBUG_RETURN(NULL); /* Out of memory */ + } + + tmp_path= (char *)(share + 1); + saved_key_cache= strmov(tmp_path, path) + 1; + memcpy(saved_key_cache, key_cache, key_length); + + init_tmp_table_share(this, share, saved_key_cache, key_length, + strend(saved_key_cache) + 1, tmp_path); + + /* + Prefer using frm image over file. The image might not be available in + ALTER TABLE, when the discovering engine took over the ownership (see + TABLE::read_frm_image). + */ + res= (frm->str) + ? share->init_from_binary_frm_image(this, false, frm->str, frm->length) + : open_table_def(this, share, GTS_TABLE | GTS_USE_DISCOVERY); + + if (res) + { + /* + No need to lock share->mutex as this is not needed for temporary tables. + */ + free_table_share(share); + my_free(share); + DBUG_RETURN(NULL); + } + + share->m_psi= PSI_CALL_get_table_share(true, share); + + locked= lock_temporary_tables(); + + /* Initialize the all_tmp_tables list. */ + share->all_tmp_tables.empty(); + + /* + We need to alloc & initialize temporary_tables if this happens + to be the very first temporary table. + */ + if (!temporary_tables) + { + if ((temporary_tables= + (All_tmp_tables_list *) my_malloc(key_memory_table_share, + sizeof(All_tmp_tables_list), + MYF(MY_WME)))) + { + temporary_tables->empty(); + } + else + { + DBUG_RETURN(NULL); /* Out of memory */ + } + } + + /* Add share to the head of the temporary table share list. */ + temporary_tables->push_front(share); + + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + DBUG_RETURN(share); +} + + +/** + Find a table with the specified key. + + @param key [IN] Key + @param key_length [IN] Key length + @param state [IN] Open table state to look for + + @return Success Pointer to the table instance. + Failure NULL +*/ +TABLE *THD::find_temporary_table(const char *key, uint key_length, + Temporary_table_state state) +{ + DBUG_ENTER("THD::find_temporary_table"); + + TMP_TABLE_SHARE *share; + TABLE *table; + TABLE *result= NULL; + bool locked; + + locked= lock_temporary_tables(); + + All_tmp_tables_list::Iterator it(*temporary_tables); + while ((share= it++)) + { + if (share->table_cache_key.length == key_length && + !(memcmp(share->table_cache_key.str, key, key_length))) + { + /* A matching TMP_TABLE_SHARE is found. */ + All_share_tables_list::Iterator tables_it(share->all_tmp_tables); + + bool found= false; + while (!found && (table= tables_it++)) + { + switch (state) + { + case TMP_TABLE_IN_USE: found= table->query_id > 0; break; + case TMP_TABLE_NOT_IN_USE: found= table->query_id == 0; break; + case TMP_TABLE_ANY: found= true; break; + } + } + if (table && unlikely(table->needs_reopen())) + { + share->all_tmp_tables.remove(table); + free_temporary_table(table); + it.rewind(); + continue; + } + result= table; + break; + } + } + + if (locked) + { + DBUG_ASSERT(m_tmp_tables_locked); + unlock_temporary_tables(); + } + + DBUG_RETURN(result); +} + + + +/** + Open a table from the specified TABLE_SHARE with the given alias. + + @param share [IN] Table share + @param alias [IN] Table alias + + @return Success A pointer to table object + Failure NULL +*/ +TABLE *THD::open_temporary_table(TMP_TABLE_SHARE *share, + const char *alias_arg) +{ + TABLE *table; + LEX_CSTRING alias= {alias_arg, strlen(alias_arg) }; + DBUG_ENTER("THD::open_temporary_table"); + + + if (!(table= (TABLE *) my_malloc(key_memory_TABLE, sizeof(TABLE), + MYF(MY_WME)))) + { + DBUG_RETURN(NULL); /* Out of memory */ + } + + if (open_table_from_share(this, share, &alias, + (uint) HA_OPEN_KEYFILE, + EXTRA_RECORD, + (ha_open_options | + (open_options & HA_OPEN_FOR_CREATE)), + table, false)) + { + my_free(table); + DBUG_RETURN(NULL); + } + + table->reginfo.lock_type= TL_WRITE; /* Simulate locked */ + table->grant.privilege= TMP_TABLE_ACLS; + table->query_id= query_id; + share->tmp_table= (table->file->has_transaction_manager() ? + TRANSACTIONAL_TMP_TABLE : NON_TRANSACTIONAL_TMP_TABLE); + share->not_usable_by_query_cache= 1; + + /* Add table to the head of table list. */ + share->all_tmp_tables.push_front(table); + + /* Increment Slave_open_temp_table_definitions status variable count. */ + if (rgi_slave) + slave_open_temp_tables++; + + DBUG_PRINT("tmptable", ("Opened table: '%s'.'%s table: %p", + table->s->db.str, + table->s->table_name.str, table)); + DBUG_RETURN(table); +} + + +/** + Find a reusable table in the open table list using the specified TABLE_LIST. + + @param tl [IN] Table list + @param out_table [OUT] Pointer to the requested TABLE object + + @return Success false + Failure true +*/ +bool THD::find_and_use_tmp_table(const TABLE_LIST *tl, TABLE **out_table) +{ + char key[MAX_DBKEY_LENGTH]; + uint key_length; + bool result; + DBUG_ENTER("THD::find_and_use_tmp_table"); + + key_length= create_tmp_table_def_key(key, tl->get_db_name(), + tl->get_table_name()); + result= use_temporary_table(find_temporary_table(key, key_length, + TMP_TABLE_NOT_IN_USE), + out_table); + DBUG_RETURN(result); +} + +/** + Mark table as in-use. + + @param table [IN] Table to be marked in-use + @param out_table [OUT] Pointer to the specified table + + @return false Success + true Error +*/ +bool THD::use_temporary_table(TABLE *table, TABLE **out_table) +{ + DBUG_ENTER("THD::use_temporary_table"); + + *out_table= table; + + /* The following can happen if find_temporary_table() returns NULL */ + if (!table) + DBUG_RETURN(false); + + /* + Temporary tables are not safe for parallel replication. They were + designed to be visible to one thread only, so have no table locking. + Thus there is no protection against two conflicting transactions + committing in parallel and things like that. + + So for now, anything that uses temporary tables will be serialised + with anything before it, when using parallel replication. + + TODO: We might be able to introduce a reference count or something + on temp tables, and have slave worker threads wait for it to reach + zero before being allowed to use the temp table. Might not be worth + it though, as statement-based replication using temporary tables is + in any case rather fragile. + */ + if (rgi_slave && + rgi_slave->is_parallel_exec && + wait_for_prior_commit()) + DBUG_RETURN(true); + + /* + We need to set the THD as it may be different in case of + parallel replication + */ + table->in_use= this; + + DBUG_RETURN(false); +} + + +/** + Close a temporary table. + + @param table [IN] Table handle + + @return void +*/ +void THD::close_temporary_table(TABLE *table) +{ + DBUG_ENTER("THD::close_temporary_table"); + + DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'%p alias: '%s'", + table->s->db.str, table->s->table_name.str, + table, table->alias.c_ptr())); + + closefrm(table); + my_free(table); + + if (rgi_slave) + { + /* Natural invariant of temporary_tables */ + DBUG_ASSERT(slave_open_temp_tables || !temporary_tables); + /* Decrement Slave_open_temp_table_definitions status variable count. */ + slave_open_temp_tables--; + } + + DBUG_VOID_RETURN; +} + + +/** + Write query log events with "DROP TEMPORARY TABLES .." for each pseudo + thread to the binary log. + + @return false Success + true Error +*/ +bool THD::log_events_and_free_tmp_shares() +{ + DBUG_ENTER("THD::log_events_and_free_tmp_shares"); + + DBUG_ASSERT(!rgi_slave); + + TMP_TABLE_SHARE *share; + TMP_TABLE_SHARE *sorted; + TMP_TABLE_SHARE *prev_sorted; + // Assume thd->variables.option_bits has OPTION_QUOTE_SHOW_CREATE. + bool was_quote_show= true; + bool error= false; + bool found_user_tables= false; + // Better add "IF EXISTS" in case a RESET MASTER has been done. + const char stub[]= "DROP /*!40005 TEMPORARY */ TABLE IF EXISTS "; + char buf[FN_REFLEN]; + + String s_query(buf, sizeof(buf), system_charset_info); + s_query.copy(stub, sizeof(stub) - 1, system_charset_info); + + /* + Insertion sort of temporary tables by pseudo_thread_id to build ordered + list of sublists of equal pseudo_thread_id. + */ + All_tmp_tables_list::Iterator it_sorted(*temporary_tables); + All_tmp_tables_list::Iterator it_unsorted(*temporary_tables); + uint sorted_count= 0; + while((share= it_unsorted++)) + { + if (IS_USER_TABLE(share)) + { + prev_sorted= NULL; + + if (!found_user_tables) found_user_tables= true; + + for (uint i= 0; i < sorted_count; i ++) + { + sorted= it_sorted ++; + + if (!IS_USER_TABLE(sorted) || + (tmpkeyval(sorted) > tmpkeyval(share))) + { + /* + Insert this share before the current element in + the sorted part of the list. + */ + temporary_tables->remove(share); + + if (prev_sorted) + { + temporary_tables->insert_after(prev_sorted, share); + } + else + { + temporary_tables->push_front(share); + } + break; + } + prev_sorted= sorted; + } + it_sorted.rewind(); + } + sorted_count ++; + } + + /* + We always quote db & table names. + */ + if (found_user_tables && + !(was_quote_show= MY_TEST(variables.option_bits & + OPTION_QUOTE_SHOW_CREATE))) + { + variables.option_bits |= OPTION_QUOTE_SHOW_CREATE; + } + + /* + Scan sorted temporary tables to generate sequence of DROP. + */ + share= temporary_tables->pop_front(); + while (share) + { + if (IS_USER_TABLE(share)) + { + used_t save_thread_specific_used= used & THREAD_SPECIFIC_USED; + my_thread_id save_pseudo_thread_id= variables.pseudo_thread_id; + char db_buf[FN_REFLEN]; + String db(db_buf, sizeof(db_buf), system_charset_info); + bool at_least_one_create_logged; + + /* + Set pseudo_thread_id to be that of the processed table. + */ + variables.pseudo_thread_id= tmpkeyval(share); + + db.copy(share->db.str, share->db.length, system_charset_info); + /* + Reset s_query() if changed by previous loop. + */ + s_query.length(sizeof(stub) - 1); + + /* + Loop forward through all tables that belong to a common database + within the sublist of common pseudo_thread_id to create single + DROP query. + */ + for (at_least_one_create_logged= false; + share && IS_USER_TABLE(share) && + tmpkeyval(share) == variables.pseudo_thread_id && + share->db.length == db.length() && + memcmp(share->db.str, db.ptr(), db.length()) == 0; + /* Get the next TABLE_SHARE in the list. */ + share= temporary_tables->pop_front()) + { + if (share->table_creation_was_logged) + { + at_least_one_create_logged= true; + /* + We are going to add ` around the table names and possible more + due to special characters. + */ + append_identifier(this, &s_query, &share->table_name); + s_query.append(','); + } + rm_temporary_table(share->db_type(), share->path.str); + free_table_share(share); + my_free(share); + } + + if (at_least_one_create_logged) + { + clear_error(); + CHARSET_INFO *cs_save= variables.character_set_client; + variables.character_set_client= system_charset_info; + used|= THREAD_SPECIFIC_USED; + + Query_log_event qinfo(this, s_query.ptr(), + s_query.length() - 1 /* to remove trailing ',' */, + false, true, false, 0); + qinfo.db= db.ptr(); + qinfo.db_len= db.length(); + variables.character_set_client= cs_save; + + get_stmt_da()->set_overwrite_status(true); + transaction->stmt.mark_dropped_temp_table(); + bool error2= mysql_bin_log.write(&qinfo); + if (unlikely(error|= error2)) + { + /* + If we're here following THD::cleanup, thence the connection + has been closed already. So lets print a message to the + error log instead of pushing yet another error into the + stmt_da. + + Also, we keep the error flag so that we propagate the error + up in the stack. This way, if we're the SQL thread we notice + that THD::close_tables failed. (Actually, the SQL + thread only calls THD::close_tables while applying + old Start_log_event_v3 events.) + */ + sql_print_error("Failed to write the DROP statement for " + "temporary tables to binary log"); + } + + get_stmt_da()->set_overwrite_status(false); + } + variables.pseudo_thread_id= save_pseudo_thread_id; + used = (used & ~THREAD_SPECIFIC_USED) | save_thread_specific_used; + } + else + { + free_tmp_table_share(share, true); + /* Get the next TABLE_SHARE in the list. */ + share= temporary_tables->pop_front(); + } + } + + if (!was_quote_show) + { + /* + Restore option. + */ + variables.option_bits&= ~OPTION_QUOTE_SHOW_CREATE; + } + + DBUG_RETURN(error); +} + + +/** + Delete the files and free the specified table share. + + @param share [IN] TABLE_SHARE to free + @param delete_table [IN] Whether to delete the table files? + + @return false Success + true Error +*/ +bool THD::free_tmp_table_share(TMP_TABLE_SHARE *share, bool delete_table) +{ + bool error= false; + DBUG_ENTER("THD::free_tmp_table_share"); + + if (delete_table) + { + error= rm_temporary_table(share->db_type(), share->path.str); + } + free_table_share(share); + my_free(share); + DBUG_RETURN(error); +} + + +/** + Free the specified table object. + + @param table [IN] Table object to free. + + @return void +*/ +void THD::free_temporary_table(TABLE *table) +{ + DBUG_ENTER("THD::free_temporary_table"); + + /* + If LOCK TABLES list is not empty and contains this table, unlock the table + and remove the table from this list. + */ + mysql_lock_remove(this, lock, table); + + close_temporary_table(table); + + DBUG_VOID_RETURN; +} + + +/** + On replication slave, acquire the Relay_log_info's data_lock and use slave + temporary tables. + + @return true Lock acquired + false Lock wasn't acquired +*/ +bool THD::lock_temporary_tables() +{ + DBUG_ENTER("THD::lock_temporary_tables"); + + /* Do not proceed if a lock has already been taken. */ + if (m_tmp_tables_locked) + { + DBUG_RETURN(false); + } + +#ifdef HAVE_REPLICATION + if (rgi_slave) + { + mysql_mutex_lock(&rgi_slave->rli->data_lock); + temporary_tables= rgi_slave->rli->save_temporary_tables; + m_tmp_tables_locked= true; + } +#endif + + DBUG_RETURN(m_tmp_tables_locked); +} + + +/** + On replication slave, release the Relay_log_info::data_lock previously + acquired to use slave temporary tables. + + @return void +*/ +void THD::unlock_temporary_tables() +{ + DBUG_ENTER("THD::unlock_temporary_tables"); + + if (!m_tmp_tables_locked) + { + DBUG_VOID_RETURN; + } + +#ifdef HAVE_REPLICATION + if (rgi_slave) + { + rgi_slave->rli->save_temporary_tables= temporary_tables; + temporary_tables= NULL; /* Safety */ + mysql_mutex_unlock(&rgi_slave->rli->data_lock); + m_tmp_tables_locked= false; + } +#endif + + DBUG_VOID_RETURN; +} + + +/** + Close unused TABLE instances for given temporary table. + + @param tl [IN] TABLE_LIST + + Initial use case was TRUNCATE, which expects only one instance (which is used + by TRUNCATE itself) to be open. Most probably some ALTER TABLE variants and + REPAIR may have similar expectations. +*/ + +void THD::close_unused_temporary_table_instances(const TABLE_LIST *tl) +{ + TMP_TABLE_SHARE *share= find_tmp_table_share(tl); + + if (share) + { + All_share_tables_list::Iterator tables_it(share->all_tmp_tables); + + while (TABLE *table= tables_it++) + { + if (table->query_id == 0) + { + /* Note: removing current list element doesn't invalidate iterator. */ + share->all_tmp_tables.remove(table); + free_temporary_table(table); + } + } + } +} diff --git a/sql/thr_malloc.cc b/sql/thr_malloc.cc new file mode 100644 index 00000000..89ad359b --- /dev/null +++ b/sql/thr_malloc.cc @@ -0,0 +1,98 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +/* Mallocs for used in threads */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "thr_malloc.h" +#include "sql_class.h" + +extern "C" { + void sql_alloc_error_handler(void) + { + THD *thd= current_thd; + if (likely(thd)) + { + if (! thd->is_error()) + { + /* + This thread is Out Of Memory. + An OOM condition is a fatal error. + It should not be caught by error handlers in stored procedures. + Also, recording that SQL condition in the condition area could + cause more memory allocations, which in turn could raise more + OOM conditions, causing recursion in the error handling code itself. + As a result, my_error() should not be invoked, and the + thread diagnostics area is set to an error status directly. + Note that Diagnostics_area::set_error_status() is safe, + since it does not call any memory allocation routines. + The visible result for a client application will be: + - a query fails with an ER_OUT_OF_RESOURCES error, + returned in the error packet. + - SHOW ERROR/SHOW WARNINGS may be empty. + */ + thd->get_stmt_da()->set_error_status(ER_OUT_OF_RESOURCES); + } + } + + /* Skip writing to the error log to avoid mtr complaints */ + DBUG_EXECUTE_IF("simulate_out_of_memory", return;); + + sql_print_error("%s", ER_THD_OR_DEFAULT(thd, ER_OUT_OF_RESOURCES)); + } +} + +void init_sql_alloc(PSI_memory_key key, MEM_ROOT *mem_root, uint block_size, + uint pre_alloc, myf my_flags) +{ + init_alloc_root(key, mem_root, block_size, pre_alloc, my_flags); + mem_root->error_handler=sql_alloc_error_handler; +} + + +char *sql_strmake_with_convert(THD *thd, const char *str, size_t arg_length, + CHARSET_INFO *from_cs, + size_t max_res_length, + CHARSET_INFO *to_cs, size_t *result_length) +{ + char *pos; + size_t new_length= to_cs->mbmaxlen*arg_length; + max_res_length--; // Reserve place for end null + + set_if_smaller(new_length, max_res_length); + if (!(pos= (char*) thd->alloc(new_length + 1))) + return pos; // Error + + if ((from_cs == &my_charset_bin) || (to_cs == &my_charset_bin)) + { + // Safety if to_cs->mbmaxlen > 0 + new_length= MY_MIN(arg_length, max_res_length); + memcpy(pos, str, new_length); + } + else + { + uint dummy_errors; + new_length= copy_and_convert((char*) pos, new_length, to_cs, str, + arg_length, from_cs, &dummy_errors); + } + pos[new_length]= 0; + *result_length= new_length; + return pos; +} + diff --git a/sql/thr_malloc.h b/sql/thr_malloc.h new file mode 100644 index 00000000..cc56666b --- /dev/null +++ b/sql/thr_malloc.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef THR_MALLOC_INCLUDED +#define THR_MALLOC_INCLUDED + +typedef struct st_mem_root MEM_ROOT; + +void init_sql_alloc(PSI_memory_key key, MEM_ROOT *root, uint block_size, uint + pre_alloc_size, myf my_flags); +char *sql_strmake_with_convert(THD *thd, const char *str, size_t arg_length, + CHARSET_INFO *from_cs, + size_t max_res_length, + CHARSET_INFO *to_cs, size_t *result_length); + +#endif /* THR_MALLOC_INCLUDED */ diff --git a/sql/thread_cache.h b/sql/thread_cache.h new file mode 100644 index 00000000..5cb6c0fe --- /dev/null +++ b/sql/thread_cache.h @@ -0,0 +1,210 @@ +/* + Copyright (C) 2020 MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + + +/** + MariaDB thread cache for "one thread per connection" scheduler. + + Thread cache allows to re-use threads (as well as THD objects) for + subsequent connections. +*/ +class Thread_cache +{ + mutable mysql_cond_t COND_thread_cache; + mutable mysql_cond_t COND_flush_thread_cache; + mutable mysql_mutex_t LOCK_thread_cache; + /** Queue of new connection requests. */ + I_List list; + /** Number of threads parked in the cache. */ + ulong cached_thread_count; + /** Number of active flush requests. */ + uint32_t kill_cached_threads; + /** + PFS stuff, only used during initialization. + Unfortunately needs to survive till destruction. + */ + PSI_cond_key key_COND_thread_cache, key_COND_flush_thread_cache; + PSI_mutex_key key_LOCK_thread_cache; + +public: + void init() + { +#ifdef HAVE_PSI_INTERFACE + PSI_cond_info conds[]= + { + { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL }, + { &key_COND_flush_thread_cache, "COND_flush_thread_cache", + PSI_FLAG_GLOBAL } + }; + PSI_mutex_info mutexes[]= + { + { &key_LOCK_thread_cache, "LOCK_thread_cache", PSI_FLAG_GLOBAL } + }; + mysql_mutex_register("sql", mutexes, array_elements(mutexes)); + mysql_cond_register("sql", conds, array_elements(conds)); +#endif + mysql_mutex_init(key_LOCK_thread_cache, &LOCK_thread_cache, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_thread_cache, &COND_thread_cache, 0); + mysql_cond_init(key_COND_flush_thread_cache, &COND_flush_thread_cache, 0); + list.empty(); + kill_cached_threads= 0; + cached_thread_count= 0; + } + + + void destroy() + { + DBUG_ASSERT(cached_thread_count == 0); + DBUG_ASSERT(list.is_empty()); + mysql_cond_destroy(&COND_flush_thread_cache); + mysql_cond_destroy(&COND_thread_cache); + mysql_mutex_destroy(&LOCK_thread_cache); + } + + + /** + Flushes thread cache. + + Awakes parked threads and requests them to shutdown. + Waits until last parked thread leaves the cache. + */ + void flush() + { + mysql_mutex_lock(&LOCK_thread_cache); + kill_cached_threads++; + while (cached_thread_count) + { + mysql_cond_broadcast(&COND_thread_cache); + mysql_cond_wait(&COND_flush_thread_cache, &LOCK_thread_cache); + } + kill_cached_threads--; + mysql_mutex_unlock(&LOCK_thread_cache); + } + + + /** + Flushes thread cache and forbids threads parking in the cache. + + This is a pre-shutdown hook. + */ + void final_flush() + { + kill_cached_threads++; + flush(); + } + + + /** + Requests parked thread to serve new connection. + + @return + @retval true connection is enqueued and parked thread is about to serve it + @retval false thread cache is empty + */ + bool enqueue(CONNECT *connect) + { + mysql_mutex_lock(&LOCK_thread_cache); + if (cached_thread_count) + { + list.push_back(connect); + cached_thread_count--; + mysql_mutex_unlock(&LOCK_thread_cache); + mysql_cond_signal(&COND_thread_cache); + return true; + } + mysql_mutex_unlock(&LOCK_thread_cache); + return false; + } + + + /** + Parks thread in the cache. + + Thread execution is suspended until either of the following occurs: + - thread is requested to serve new connection; + - thread cache is flushed; + - THREAD_CACHE_TIMEOUT elapsed. + + @return + @retval pointer to CONNECT if requested to serve new connection + @retval 0 if thread cache is flushed or on timeout + */ + CONNECT *park() + { + struct timespec abstime; + CONNECT *connect; + bool flushed= false; + DBUG_ENTER("Thread_cache::park"); + set_timespec(abstime, THREAD_CACHE_TIMEOUT); + + /* + Delete the instrumentation for the job that just completed, + before parking this pthread in the cache (blocked on COND_thread_cache). + */ + PSI_CALL_delete_current_thread(); + +#ifndef DBUG_OFF + while (_db_is_pushed_()) + _db_pop_(); +#endif + + mysql_mutex_lock(&LOCK_thread_cache); + if ((connect= list.get())) + cached_thread_count++; + else if (cached_thread_count < thread_cache_size && !kill_cached_threads) + { + /* Don't kill the thread, just put it in cache for reuse */ + DBUG_PRINT("info", ("Adding thread to cache")); + cached_thread_count++; + for (;;) + { + int error= mysql_cond_timedwait(&COND_thread_cache, &LOCK_thread_cache, + &abstime); + flushed= kill_cached_threads; + if ((connect= list.get())) + break; + else if (flushed || error == ETIMEDOUT || error == ETIME) + { + /* + If timeout, end thread. + If a new thread is requested, we will handle the call, even if we + got a timeout (as we are already awake and free) + */ + cached_thread_count--; + break; + } + } + } + mysql_mutex_unlock(&LOCK_thread_cache); + if (flushed) + mysql_cond_signal(&COND_flush_thread_cache); + DBUG_RETURN(connect); + } + + + /** Returns the number of parked threads. */ + ulong size() const + { + mysql_mutex_lock(&LOCK_thread_cache); + ulong r= cached_thread_count; + mysql_mutex_unlock(&LOCK_thread_cache); + return r; + } +}; + +extern Thread_cache thread_cache; diff --git a/sql/thread_pool_info.cc b/sql/thread_pool_info.cc new file mode 100644 index 00000000..e3ffd160 --- /dev/null +++ b/sql/thread_pool_info.cc @@ -0,0 +1,364 @@ +/* Copyright(C) 2019 MariaDB + +This program is free software; you can redistribute itand /or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Show { + +static ST_FIELD_INFO groups_fields_info[] = +{ + Column("GROUP_ID", SLong(6), NOT_NULL), + Column("CONNECTIONS", SLong(6), NOT_NULL), + Column("THREADS", SLong(6), NOT_NULL), + Column("ACTIVE_THREADS", SLong(6), NOT_NULL), + Column("STANDBY_THREADS", SLong(6), NOT_NULL), + Column("QUEUE_LENGTH", SLong(6), NOT_NULL), + Column("HAS_LISTENER", STiny(1), NOT_NULL), + Column("IS_STALLED", STiny(1), NOT_NULL), + CEnd() +}; + +} // namespace Show + + +static int groups_fill_table(THD* thd, TABLE_LIST* tables, COND*) +{ + if (!all_groups) + return 0; + + TABLE* table = tables->table; + for (uint i = 0; i < threadpool_max_size && all_groups[i].pollfd != INVALID_HANDLE_VALUE; i++) + { + thread_group_t* group = &all_groups[i]; + /* ID */ + table->field[0]->store(i, true); + /* CONNECTION_COUNT */ + table->field[1]->store(group->connection_count, true); + /* THREAD_COUNT */ + table->field[2]->store(group->thread_count, true); + /* ACTIVE_THREAD_COUNT */ + table->field[3]->store(group->active_thread_count, true); + /* STANDBY_THREAD_COUNT */ + table->field[4]->store(group->waiting_threads.elements(), true); + /* QUEUE LENGTH */ + uint queue_len = group->queues[TP_PRIORITY_LOW].elements() + + group->queues[TP_PRIORITY_HIGH].elements(); + table->field[5]->store(queue_len, true); + /* HAS_LISTENER */ + table->field[6]->store((longlong)(group->listener != 0), true); + /* IS_STALLED */ + table->field[7]->store(group->stalled, true); + + if (schema_table_store_record(thd, table)) + return 1; + } + return 0; +} + + +static int groups_init(void* p) +{ + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*)p; + schema->fields_info = Show::groups_fields_info; + schema->fill_table = groups_fill_table; + return 0; +} + + +namespace Show { + +static ST_FIELD_INFO queues_field_info[] = +{ + Column("GROUP_ID", SLong(6), NOT_NULL), + Column("POSITION", SLong(6), NOT_NULL), + Column("PRIORITY", SLong(1), NOT_NULL), + Column("CONNECTION_ID", ULonglong(19), NULLABLE), + Column("QUEUEING_TIME_MICROSECONDS", SLonglong(19), NOT_NULL), + CEnd() +}; + +} // namespace Show + +typedef connection_queue_t::Iterator connection_queue_iterator; + +static int queues_fill_table(THD* thd, TABLE_LIST* tables, COND*) +{ + if (!all_groups) + return 0; + + TABLE* table = tables->table; + for (uint group_id = 0; + group_id < threadpool_max_size && all_groups[group_id].pollfd != INVALID_HANDLE_VALUE; + group_id++) + { + thread_group_t* group = &all_groups[group_id]; + + mysql_mutex_lock(&group->mutex); + bool err = false; + int pos = 0; + ulonglong now = microsecond_interval_timer(); + for (uint prio = 0; prio < NQUEUES && !err; prio++) + { + connection_queue_iterator it(group->queues[prio]); + TP_connection_generic* c; + while ((c = it++) != 0) + { + /* GROUP_ID */ + table->field[0]->store(group_id, true); + /* POSITION */ + table->field[1]->store(pos++, true); + /* PRIORITY */ + table->field[2]->store(prio, true); + /* CONNECTION_ID */ + if (c->thd) + { + table->field[3]->set_notnull(); + table->field[3]->store(c->thd->thread_id, true); + } + /* QUEUEING_TIME */ + table->field[4]->store(now - c->enqueue_time, true); + + err = schema_table_store_record(thd, table); + if (err) + break; + } + } + mysql_mutex_unlock(&group->mutex); + if (err) + return 1; + } + return 0; +} + +static int queues_init(void* p) +{ + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*)p; + schema->fields_info = Show::queues_field_info; + schema->fill_table = queues_fill_table; + return 0; +} + +namespace Show { + +static ST_FIELD_INFO stats_fields_info[] = +{ + Column("GROUP_ID", SLong(6), NOT_NULL), + Column("THREAD_CREATIONS", SLonglong(19), NOT_NULL), + Column("THREAD_CREATIONS_DUE_TO_STALL", SLonglong(19), NOT_NULL), + Column("WAKES", SLonglong(19), NOT_NULL), + Column("WAKES_DUE_TO_STALL", SLonglong(19), NOT_NULL), + Column("THROTTLES", SLonglong(19), NOT_NULL), + Column("STALLS", SLonglong(19), NOT_NULL), + Column("POLLS_BY_LISTENER", SLonglong(19), NOT_NULL), + Column("POLLS_BY_WORKER", SLonglong(19), NOT_NULL), + Column("DEQUEUES_BY_LISTENER", SLonglong(19), NOT_NULL), + Column("DEQUEUES_BY_WORKER", SLonglong(19), NOT_NULL), + CEnd() +}; + +} // namespace Show + + +static int stats_fill_table(THD* thd, TABLE_LIST* tables, COND*) +{ + if (!all_groups) + return 0; + + TABLE* table = tables->table; + for (uint i = 0; i < threadpool_max_size && all_groups[i].pollfd != INVALID_HANDLE_VALUE; i++) + { + table->field[0]->store(i, true); + thread_group_t* group = &all_groups[i]; + + mysql_mutex_lock(&group->mutex); + thread_group_counters_t* counters = &group->counters; + table->field[1]->store(counters->thread_creations, true); + table->field[2]->store(counters->thread_creations_due_to_stall, true); + table->field[3]->store(counters->wakes, true); + table->field[4]->store(counters->wakes_due_to_stall, true); + table->field[5]->store(counters->throttles, true); + table->field[6]->store(counters->stalls, true); + table->field[7]->store(counters->polls[(int)operation_origin::LISTENER], true); + table->field[8]->store(counters->polls[(int)operation_origin::WORKER], true); + table->field[9]->store(counters->dequeues[(int)operation_origin::LISTENER], true); + table->field[10]->store(counters->dequeues[(int)operation_origin::WORKER], true); + mysql_mutex_unlock(&group->mutex); + if (schema_table_store_record(thd, table)) + return 1; + } + return 0; +} + +static int stats_reset_table() +{ + if (!all_groups) + return 0; + + for (uint i = 0; i < threadpool_max_size && all_groups[i].pollfd != INVALID_HANDLE_VALUE; i++) + { + thread_group_t* group = &all_groups[i]; + mysql_mutex_lock(&group->mutex); + memset(&group->counters, 0, sizeof(group->counters)); + mysql_mutex_unlock(&group->mutex); + } + return 0; +} + +static int stats_init(void* p) +{ + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*)p; + schema->fields_info = Show::stats_fields_info; + schema->fill_table = stats_fill_table; + schema->reset_table = stats_reset_table; + return 0; +} + + +namespace Show { + +static ST_FIELD_INFO waits_fields_info[] = +{ + Column("REASON", Varchar(16), NOT_NULL), + Column("COUNT", SLonglong(19), NOT_NULL), + CEnd() +}; + +} // namespace Show + +/* See thd_wait_type enum for explanation*/ +static const LEX_CSTRING wait_reasons[THD_WAIT_LAST] = +{ + {STRING_WITH_LEN("UNKNOWN")}, + {STRING_WITH_LEN("SLEEP")}, + {STRING_WITH_LEN("DISKIO")}, + {STRING_WITH_LEN("ROW_LOCK")}, + {STRING_WITH_LEN("GLOBAL_LOCK")}, + {STRING_WITH_LEN("META_DATA_LOCK")}, + {STRING_WITH_LEN("TABLE_LOCK")}, + {STRING_WITH_LEN("USER_LOCK")}, + {STRING_WITH_LEN("BINLOG")}, + {STRING_WITH_LEN("GROUP_COMMIT")}, + {STRING_WITH_LEN("SYNC")}, + {STRING_WITH_LEN("NET")} +}; + +extern Atomic_counter tp_waits[THD_WAIT_LAST]; + +static int waits_fill_table(THD* thd, TABLE_LIST* tables, COND*) +{ + if (!all_groups) + return 0; + + TABLE* table = tables->table; + for (auto i = 0; i < THD_WAIT_LAST; i++) + { + table->field[0]->store(wait_reasons[i].str, wait_reasons[i].length, system_charset_info); + table->field[1]->store(tp_waits[i], true); + if (schema_table_store_record(thd, table)) + return 1; + } + return 0; +} + +static int waits_reset_table() +{ + for (auto i = 0; i < THD_WAIT_LAST; i++) + tp_waits[i] = 0; + + return 0; +} + +static int waits_init(void* p) +{ + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*)p; + schema->fields_info = Show::waits_fields_info; + schema->fill_table = waits_fill_table; + schema->reset_table = waits_reset_table; + return 0; +} + +static struct st_mysql_information_schema plugin_descriptor = +{ MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION }; + +maria_declare_plugin(thread_pool_info) +{ + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &plugin_descriptor, + "THREAD_POOL_GROUPS", + "Vladislav Vaintroub", + "Provides information about threadpool groups.", + PLUGIN_LICENSE_GPL, + groups_init, + 0, + 0x0100, + NULL, + NULL, + "1.0", + MariaDB_PLUGIN_MATURITY_STABLE +}, +{ + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &plugin_descriptor, + "THREAD_POOL_QUEUES", + "Vladislav Vaintroub", + "Provides information about threadpool queues.", + PLUGIN_LICENSE_GPL, + queues_init, + 0, + 0x0100, + NULL, + NULL, + "1.0", + MariaDB_PLUGIN_MATURITY_STABLE +}, +{ + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &plugin_descriptor, + "THREAD_POOL_STATS", + "Vladislav Vaintroub", + "Provides performance counter information for threadpool.", + PLUGIN_LICENSE_GPL, + stats_init, + 0, + 0x0100, + NULL, + NULL, + "1.0", + MariaDB_PLUGIN_MATURITY_STABLE +}, +{ + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &plugin_descriptor, + "THREAD_POOL_WAITS", + "Vladislav Vaintroub", + "Provides wait counters for threadpool.", + PLUGIN_LICENSE_GPL, + waits_init, + 0, + 0x0100, + NULL, + NULL, + "1.0", + MariaDB_PLUGIN_MATURITY_STABLE +} +maria_declare_plugin_end; diff --git a/sql/threadpool.h b/sql/threadpool.h new file mode 100644 index 00000000..d815d538 --- /dev/null +++ b/sql/threadpool.h @@ -0,0 +1,166 @@ +/* Copyright (C) 2012, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#pragma once +#ifdef HAVE_POOL_OF_THREADS +#define MAX_THREAD_GROUPS 100000 + +/* Threadpool parameters */ +extern uint threadpool_min_threads; /* Minimum threads in pool */ +extern uint threadpool_idle_timeout; /* Shutdown idle worker threads after this timeout */ +extern uint threadpool_size; /* Number of parallel executing threads */ +extern uint threadpool_max_size; +extern uint threadpool_stall_limit; /* time interval in milliseconds for stall checks*/ +extern uint threadpool_max_threads; /* Maximum threads in pool */ +extern uint threadpool_oversubscribe; /* Maximum active threads in group */ +extern uint threadpool_prio_kickup_timer; /* Time before low prio item gets prio boost */ +extern my_bool threadpool_exact_stats; /* Better queueing time stats for information_schema, at small performance cost */ +extern my_bool threadpool_dedicated_listener; /* Listener thread does not pick up work items. */ +#ifdef _WIN32 +extern uint threadpool_mode; /* Thread pool implementation , windows or generic */ +#define TP_MODE_WINDOWS 0 +#define TP_MODE_GENERIC 1 +#endif + +#define DEFAULT_THREADPOOL_STALL_LIMIT 500U + +struct TP_connection; +struct st_vio; + +extern void tp_callback(TP_connection *c); +extern void tp_timeout_handler(TP_connection *c); + + + +/* + Threadpool statistics +*/ +struct TP_STATISTICS +{ + /* Current number of worker thread. */ + Atomic_counter num_worker_threads; +}; + +extern TP_STATISTICS tp_stats; + + +/* Functions to set threadpool parameters */ +extern void tp_set_min_threads(uint val); +extern void tp_set_max_threads(uint val); +extern void tp_set_threadpool_size(uint val); +extern void tp_set_threadpool_stall_limit(uint val); +extern int tp_get_idle_thread_count(); +extern int tp_get_thread_count(); + + +enum TP_PRIORITY { + TP_PRIORITY_HIGH, + TP_PRIORITY_LOW, + TP_PRIORITY_AUTO +}; + + +enum TP_STATE +{ + TP_STATE_IDLE, + TP_STATE_RUNNING, + TP_STATE_PENDING +}; + +/* + Connection structure, encapsulates THD + structures for asynchronous + IO and pool. + + Platform specific parts are specified in subclasses called connection_t, + inside threadpool_win.cc and threadpool_unix.cc +*/ + +class CONNECT; + +struct TP_connection +{ + THD* thd; + CONNECT* connect; + TP_STATE state; + TP_PRIORITY priority; + TP_connection(CONNECT *c) : + thd(0), + connect(c), + state(TP_STATE_IDLE), + priority(TP_PRIORITY_HIGH) + {} + + virtual ~TP_connection() = default; + + /* Initialize io structures windows threadpool, epoll etc */ + virtual int init() = 0; + + virtual void set_io_timeout(int sec) = 0; + + /* Read for the next client command (async) with specified timeout */ + virtual int start_io() = 0; + + virtual void wait_begin(int type)= 0; + virtual void wait_end() = 0; + IF_WIN(virtual,) void init_vio(st_vio *){}; +}; + + +struct TP_pool +{ + virtual ~TP_pool() = default; + virtual int init()= 0; + virtual TP_connection *new_connection(CONNECT *)= 0; + virtual void add(TP_connection *c)= 0; + virtual int set_max_threads(uint){ return 0; } + virtual int set_min_threads(uint){ return 0; } + virtual int set_pool_size(uint){ return 0; } + virtual int set_idle_timeout(uint){ return 0; } + virtual int set_oversubscribe(uint){ return 0; } + virtual int set_stall_limit(uint){ return 0; } + virtual int get_thread_count() { return tp_stats.num_worker_threads; } + virtual int get_idle_thread_count(){ return 0; } + virtual void resume(TP_connection* c)=0; +}; + +#ifdef _WIN32 + +struct TP_pool_win:TP_pool +{ + TP_pool_win(); + virtual int init(); + virtual ~TP_pool_win(); + virtual TP_connection *new_connection(CONNECT *c); + virtual void add(TP_connection *); + virtual int set_max_threads(uint); + virtual int set_min_threads(uint); + void resume(TP_connection *c); +}; +#endif + +struct TP_pool_generic :TP_pool +{ + TP_pool_generic(); + ~TP_pool_generic(); + virtual int init(); + virtual TP_connection *new_connection(CONNECT *c); + virtual void add(TP_connection *); + virtual int set_pool_size(uint); + virtual int set_stall_limit(uint); + virtual int get_idle_thread_count(); + void resume(TP_connection* c); +}; + +#endif /* HAVE_POOL_OF_THREADS */ diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc new file mode 100644 index 00000000..22730550 --- /dev/null +++ b/sql/threadpool_common.cc @@ -0,0 +1,636 @@ +/* Copyright (C) 2012, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ + +#ifdef _WIN32 +#include "threadpool_winsockets.h" +#endif + +/* Threadpool parameters */ + +uint threadpool_min_threads; +uint threadpool_idle_timeout; +uint threadpool_size; +uint threadpool_max_size; +uint threadpool_stall_limit; +uint threadpool_max_threads; +uint threadpool_oversubscribe; +uint threadpool_mode; +uint threadpool_prio_kickup_timer; +my_bool threadpool_exact_stats; +my_bool threadpool_dedicated_listener; + +/* Stats */ +TP_STATISTICS tp_stats; + + +static void threadpool_remove_connection(THD *thd); +static dispatch_command_return threadpool_process_request(THD *thd); +static THD* threadpool_add_connection(CONNECT *connect, TP_connection *c); + +extern bool do_command(THD*); + +static inline TP_connection *get_TP_connection(THD *thd) +{ + return (TP_connection *)thd->event_scheduler.data; +} + +/* + Worker threads contexts, and THD contexts. + ========================================= + + Both worker threads and connections have their sets of thread local variables + At the moment it is mysys_var (this has specific data for dbug, my_error and + similar goodies), and PSI per-client structure. + + Whenever query is executed following needs to be done: + + 1. Save worker thread context. + 2. Change TLS variables to connection specific ones using thread_attach(THD*). + This function does some additional work , e.g setting up + thread_stack/thread_ends_here pointers. + 3. Process query + 4. Restore worker thread context. + + Connection login and termination follows similar schema w.r.t saving and + restoring contexts. + + For both worker thread, and for the connection, mysys variables are created + using my_thread_init() and freed with my_thread_end(). + +*/ +struct Worker_thread_context +{ + PSI_thread *psi_thread; + st_my_thread_var* mysys_var; + + Worker_thread_context() + { + psi_thread= PSI_CALL_get_thread(); + mysys_var= my_thread_var; + } + + ~Worker_thread_context() + { + PSI_CALL_set_thread(psi_thread); + set_mysys_var(mysys_var); + set_current_thd(nullptr); + } +}; + + +#ifdef HAVE_PSI_INTERFACE + +/* + The following fixes PSI "idle" psi instrumentation. + The server assumes that connection becomes idle + just before net_read_packet() and switches to active after it. + In out setup, server becomes idle when async socket io is made. +*/ + +extern void net_before_header_psi(struct st_net *net, void *user_data, size_t); + +static void dummy_before_header(struct st_net *, void *, size_t) +{ +} + +static void re_init_net_server_extension(THD *thd) +{ + thd->m_net_server_extension.m_before_header = dummy_before_header; +} + +#else + +#define re_init_net_server_extension(thd) + +#endif /* HAVE_PSI_INTERFACE */ + +static inline bool has_unread_compressed_data(const NET *net) +{ + return net->compress && net->remain_in_buf; +} + +static inline void set_thd_idle(THD *thd) +{ + thd->net.reading_or_writing= 1; +#ifdef HAVE_PSI_INTERFACE + if (!has_unread_compressed_data(&thd->net)) + net_before_header_psi(&thd->net, thd, 0); +#endif +} + +/* + Per OS thread info (ID and pthread_self) + stored as TLS, because of syscall overhead + (on Linux) +*/ +struct OS_thread_info +{ + pthread_t self; + ssize_t stack_size; + uint32_t thread_id; + + inline bool initialized() { return stack_size != 0; } + + void init(ssize_t ssize) + { +#if _WIN32 + self= thread_id= GetCurrentThreadId(); +#else +#ifdef __NR_gettid + thread_id= (uint32) syscall(__NR_gettid); +#else + thread_id= 0; +#endif + self= pthread_self(); +#endif + stack_size= ssize; + } +}; +static thread_local OS_thread_info os_thread_info; + +static const OS_thread_info *get_os_thread_info() +{ + auto *res= &os_thread_info; + if (!res->initialized()) + res->init((ssize_t) (my_thread_stack_size * STACK_DIRECTION)); + return res; +} + +/* + Attach/associate the connection with the OS thread, +*/ +static void thread_attach(THD* thd) +{ +#ifdef WITH_WSREP + /* Wait until possible background rollback has finished before + attaching the thd. */ + wsrep_wait_rollback_complete_and_acquire_ownership(thd); +#endif /* WITH_WSREP */ + set_mysys_var(thd->mysys_var); + thd->thread_stack=(char*)&thd; + set_current_thd(thd); + auto tinfo= get_os_thread_info(); + thd->real_id= tinfo->self; + thd->os_thread_id= tinfo->thread_id; + DBUG_ASSERT(thd->mysys_var == my_thread_var); + thd->mysys_var->stack_ends_here= thd->thread_stack + tinfo->stack_size; + PSI_CALL_set_thread(thd->get_psi()); +} + +/* + Determine connection priority , using current + transaction state and 'threadpool_priority' variable value. +*/ +static TP_PRIORITY get_priority(TP_connection *c) +{ + DBUG_ASSERT(c->thd == current_thd); + TP_PRIORITY prio= (TP_PRIORITY)c->thd->variables.threadpool_priority; + if (prio == TP_PRIORITY_AUTO) + prio= c->thd->transaction->is_active() ? TP_PRIORITY_HIGH : TP_PRIORITY_LOW; + + return prio; +} + + +void tp_callback(TP_connection *c) +{ + DBUG_ASSERT(c); + + Worker_thread_context worker_context; + + THD *thd= c->thd; + + c->state = TP_STATE_RUNNING; + + if (unlikely(!thd)) + { + /* No THD, need to login first. */ + DBUG_ASSERT(c->connect); + thd= c->thd= threadpool_add_connection(c->connect, c); + if (!thd) + { + /* Bail out on connect error.*/ + goto error; + } + c->connect= 0; + } + else + { +retry: + switch(threadpool_process_request(thd)) + { + case DISPATCH_COMMAND_WOULDBLOCK: + if (!thd->async_state.try_suspend()) + { + /* + All async operations finished meanwhile, thus nobody is will wake up + this THD. Therefore, we'll resume "manually" here. + */ + thd->async_state.m_state = thd_async_state::enum_async_state::RESUMED; + goto retry; + } + return; + case DISPATCH_COMMAND_CLOSE_CONNECTION: + /* QUIT or an error occurred. */ + goto error; + case DISPATCH_COMMAND_SUCCESS: + break; + } + thd->async_state.m_state= thd_async_state::enum_async_state::NONE; + } + + /* Set priority */ + c->priority= get_priority(c); + + /* Read next command from client. */ + c->set_io_timeout(thd->get_net_wait_timeout()); + c->state= TP_STATE_IDLE; + if (c->start_io()) + goto error; + return; + +error: + c->thd= 0; + if (thd) + { + threadpool_remove_connection(thd); + } + delete c; +} + + +static THD *threadpool_add_connection(CONNECT *connect, TP_connection *c) +{ + THD *thd= NULL; + + /* + Create a new connection context: mysys_thread_var and PSI thread + Store them in THD. + */ + + set_mysys_var(NULL); + my_thread_init(); + st_my_thread_var* mysys_var= my_thread_var; + PSI_CALL_set_thread(PSI_CALL_new_thread(key_thread_one_connection, connect, 0)); + if (!mysys_var ||!(thd= connect->create_thd(NULL))) + { + /* Out of memory? */ + connect->close_and_delete(); + if (mysys_var) + my_thread_end(); + return NULL; + } + + thd->event_scheduler.data= c; + server_threads.insert(thd); // Make THD visible in show processlist + delete connect; // must be after server_threads.insert, see close_connections() + thd->set_mysys_var(mysys_var); + + /* Login. */ + thread_attach(thd); + mysql_socket_set_thread_owner(thd->net.vio->mysql_socket); + re_init_net_server_extension(thd); + ulonglong now= microsecond_interval_timer(); + thd->prior_thr_create_utime= now; + thd->start_utime= now; + thd->thr_create_utime= now; + + setup_connection_thread_globals(thd); + + if (thd_prepare_connection(thd)) + goto end; + + c->init_vio(thd->net.vio); + + /* + Check if THD is ok, as prepare_new_connection_state() + can fail, for example if init command failed. + */ + if (!thd_is_connection_alive(thd)) + goto end; + + thd->skip_wait_timeout= true; + set_thd_idle(thd); + return thd; + +end: + threadpool_remove_connection(thd); + return NULL; +} + + +static void threadpool_remove_connection(THD *thd) +{ + thread_attach(thd); + thd->net.reading_or_writing = 0; + end_connection(thd); + close_connection(thd, 0); + unlink_thd(thd); + PSI_CALL_delete_current_thread(); // before THD is destroyed + delete thd; + + /* + Free resources associated with this connection: + mysys thread_var and PSI thread. + */ + my_thread_end(); +} + + +/* + Ensure that proper error message is sent to client, + and "aborted" message appears in the log in case of + wait timeout. + + See also timeout handling in net_serv.cc +*/ +static void handle_wait_timeout(THD *thd) +{ + thd->get_stmt_da()->reset_diagnostics_area(); + thd->reset_killed(); + my_error(ER_NET_READ_INTERRUPTED, MYF(0)); + thd->net.last_errno= ER_NET_READ_INTERRUPTED; + thd->net.error= 2; +} + +/** Check if some client data is cached in thd->net or thd->net.vio */ +static bool has_unread_data(THD* thd) +{ + NET *net= &thd->net; + Vio *vio= net->vio; + return vio->has_data(vio) || has_unread_compressed_data(net); +} + + +/** + Process a single client request or a single batch. +*/ +static dispatch_command_return threadpool_process_request(THD *thd) +{ + dispatch_command_return retval= DISPATCH_COMMAND_SUCCESS; + + thread_attach(thd); + if(thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + goto resume; + + if (thd->killed >= KILL_CONNECTION) + { + /* + killed flag was set by timeout handler + or KILL command. Return error. + */ + retval= DISPATCH_COMMAND_CLOSE_CONNECTION; + if(thd->killed == KILL_WAIT_TIMEOUT) + handle_wait_timeout(thd); + goto end; + } + + + /* + In the loop below, the flow is essentially the copy of + thead-per-connections + logic, see do_handle_one_connection() in sql_connect.c + + The goal is to execute a single query, thus the loop is normally executed + only once. However for SSL connections, it can be executed multiple times + (SSL can preread and cache incoming data, and vio->has_data() checks if it + was the case). + */ + for(;;) + { + thd->net.reading_or_writing= 0; + if (mysql_audit_release_required(thd)) + mysql_audit_release(thd); + +resume: + retval= do_command(thd, false); + switch(retval) + { + case DISPATCH_COMMAND_WOULDBLOCK: + case DISPATCH_COMMAND_CLOSE_CONNECTION: + goto end; + case DISPATCH_COMMAND_SUCCESS: + break; + } + + if (!thd_is_connection_alive(thd)) + { + retval=DISPATCH_COMMAND_CLOSE_CONNECTION; + goto end; + } + + set_thd_idle(thd); + + if (!has_unread_data(thd)) + { + /* More info on this debug sync is in sql_parse.cc*/ + DEBUG_SYNC(thd, "before_do_command_net_read"); + goto end; + } + } + +end: + return retval; +} + + +static TP_pool *pool; + +static bool tp_init() +{ + +#ifdef _WIN32 + if (threadpool_mode == TP_MODE_WINDOWS) + pool= new (std::nothrow) TP_pool_win; + else + pool= new (std::nothrow) TP_pool_generic; +#else + pool= new (std::nothrow) TP_pool_generic; +#endif + if (!pool) + return true; + if (pool->init()) + { + delete pool; + pool= 0; + return true; + } +#ifdef _WIN32 + init_win_aio_buffers(max_connections); +#endif + return false; +} + +static void tp_add_connection(CONNECT *connect) +{ + TP_connection *c= pool->new_connection(connect); + DBUG_EXECUTE_IF("simulate_failed_connection_1", delete c ; c= 0;); + if (c) + pool->add(c); + else + connect->close_and_delete(); +} + +int tp_get_idle_thread_count() +{ + return pool? pool->get_idle_thread_count(): 0; +} + +int tp_get_thread_count() +{ + return pool ? pool->get_thread_count() : 0; +} + +void tp_set_min_threads(uint val) +{ + if (pool) + pool->set_min_threads(val); +} + + +void tp_set_max_threads(uint val) +{ + if (pool) + pool->set_max_threads(val); +} + +void tp_set_threadpool_size(uint val) +{ + if (pool) + pool->set_pool_size(val); +} + + +void tp_set_threadpool_stall_limit(uint val) +{ + if (pool) + pool->set_stall_limit(val); +} + + +void tp_timeout_handler(TP_connection *c) +{ + if (c->state != TP_STATE_IDLE) + return; + THD *thd= c->thd; + mysql_mutex_lock(&thd->LOCK_thd_kill); + Vio *vio= thd->net.vio; + if (vio && (vio_pending(vio) > 0 || vio->has_data(vio)) && + c->state == TP_STATE_IDLE) + { + /* + There is some data on that connection, i.e + i.e there was no inactivity timeout. + Don't kill. + */ + c->state= TP_STATE_PENDING; + } + else if (c->state == TP_STATE_IDLE) + { + thd->set_killed_no_mutex(KILL_WAIT_TIMEOUT); + c->priority= TP_PRIORITY_HIGH; + post_kill_notification(thd); + } + mysql_mutex_unlock(&thd->LOCK_thd_kill); +} + +MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) Atomic_counter tp_waits[THD_WAIT_LAST]; + +static void tp_wait_begin(THD *thd, int type) +{ + TP_connection *c = get_TP_connection(thd); + if (c) + { + DBUG_ASSERT(type > 0 && type < THD_WAIT_LAST); + tp_waits[type]++; + c->wait_begin(type); + } +} + + +static void tp_wait_end(THD *thd) +{ + TP_connection *c = get_TP_connection(thd); + if (c) + c->wait_end(); +} + + +static void tp_end() +{ + delete pool; +#ifdef _WIN32 + destroy_win_aio_buffers(); +#endif +} + +static void tp_post_kill_notification(THD *thd) +{ + TP_connection *c= get_TP_connection(thd); + if (c) + c->priority= TP_PRIORITY_HIGH; + post_kill_notification(thd); +} + +/* Resume previously suspended THD */ +static void tp_resume(THD* thd) +{ + DBUG_ASSERT(thd->async_state.m_state == thd_async_state::enum_async_state::SUSPENDED); + thd->async_state.m_state = thd_async_state::enum_async_state::RESUMED; + TP_connection* c = get_TP_connection(thd); + pool->resume(c); +} + +static scheduler_functions tp_scheduler_functions= +{ + 0, // max_threads + NULL, + NULL, + tp_init, // init + tp_add_connection, // add_connection + tp_wait_begin, // thd_wait_begin + tp_wait_end, // thd_wait_end + tp_post_kill_notification, // post kill notification + tp_end, // end + tp_resume +}; + +void pool_of_threads_scheduler(struct scheduler_functions *func, + ulong *arg_max_connections, + Atomic_counter *arg_connection_count) +{ + *func = tp_scheduler_functions; + func->max_threads= threadpool_max_threads; + func->max_connections= arg_max_connections; + func->connection_count= arg_connection_count; + scheduler_init(); +} diff --git a/sql/threadpool_generic.cc b/sql/threadpool_generic.cc new file mode 100644 index 00000000..7261eabf --- /dev/null +++ b/sql/threadpool_generic.cc @@ -0,0 +1,1759 @@ +/* Copyright (C) 2012, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#if (defined HAVE_POOL_OF_THREADS) && !defined(EMBEDDED_LIBRARY) + +#include "threadpool_generic.h" +#include "mariadb.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include "threadpool_winsockets.h" +#define OPTIONAL_IO_POLL_READ_PARAM this +#else +#define OPTIONAL_IO_POLL_READ_PARAM 0 +#endif + +static void io_poll_close(TP_file_handle fd) +{ +#ifdef _WIN32 + CloseHandle(fd); +#else + close(fd); +#endif +} + +/** Maximum number of native events a listener can read in one go */ +#define MAX_EVENTS 1024 + +/** Indicates that threadpool was initialized*/ +static bool threadpool_started= false; + +/* + Define PSI Keys for performance schema. + We have a mutex per group, worker threads, condition per worker thread, + and timer thread with its own mutex and condition. +*/ + + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_group_mutex; +static PSI_mutex_key key_timer_mutex; +static PSI_mutex_info mutex_list[]= +{ + { &key_group_mutex, "group_mutex", 0}, + { &key_timer_mutex, "timer_mutex", PSI_FLAG_GLOBAL} +}; + +static PSI_cond_key key_worker_cond; +static PSI_cond_key key_timer_cond; +static PSI_cond_info cond_list[]= +{ + { &key_worker_cond, "worker_cond", 0}, + { &key_timer_cond, "timer_cond", PSI_FLAG_GLOBAL} +}; + +static PSI_thread_key key_worker_thread; +static PSI_thread_key key_timer_thread; +static PSI_thread_info thread_list[] = +{ + {&key_worker_thread, "worker_thread", 0}, + {&key_timer_thread, "timer_thread", PSI_FLAG_GLOBAL} +}; + +/* Macro to simplify performance schema registration */ +#define PSI_register(X) \ + if(PSI_server) PSI_server->register_ ## X("threadpool", X ## _list, array_elements(X ## _list)) +#else +#define PSI_register(X) /* no-op */ +#endif + +thread_group_t *all_groups; +static uint group_count; +static Atomic_counter shutdown_group_count; + +/** + Used for printing "pool blocked" message, see + print_pool_blocked_message(); +*/ +static ulonglong pool_block_start; + +/* Global timer for all groups */ +struct pool_timer_t +{ + mysql_mutex_t mutex; + mysql_cond_t cond; + volatile uint64 current_microtime; + std::atomic next_timeout_check; + int tick_interval; + bool shutdown; + pthread_t timer_thread_id; +}; + +static pool_timer_t pool_timer; + +static void queue_put(thread_group_t *thread_group, TP_connection_generic *connection); +static void queue_put(thread_group_t *thread_group, native_event *ev, int cnt); +static int wake_thread(thread_group_t *thread_group,bool due_to_stall); +static int wake_or_create_thread(thread_group_t *thread_group, bool due_to_stall=false); +static int create_worker(thread_group_t *thread_group, bool due_to_stall); +static void *worker_main(void *param); +static void check_stall(thread_group_t *thread_group); +static void set_next_timeout_check(ulonglong abstime); +static void print_pool_blocked_message(bool); + +/** + Asynchronous network IO. + + We use native edge-triggered network IO multiplexing facility. + This maps to different APIs on different Unixes. + + Supported are currently Linux with epoll, Solaris with event ports, + OSX and BSD with kevent, Windows with IOCP. All those API's are used with one-shot flags + (the event is signalled once client has written something into the socket, + then socket is removed from the "poll-set" until the command is finished, + and we need to re-arm/re-register socket) + + No implementation for poll/select is currently provided. + + The API closely resembles all of the above mentioned platform APIs + and consists of following functions. + + - io_poll_create() + Creates an io_poll descriptor + On Linux: epoll_create() + + - io_poll_associate_fd(int poll_fd, TP_file_handle fd, void *data, void *opt) + Associate file descriptor with io poll descriptor + On Linux : epoll_ctl(..EPOLL_CTL_ADD)) + + - io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) + Associate file descriptor with io poll descriptor + On Linux: epoll_ctl(..EPOLL_CTL_DEL) + + + - io_poll_start_read(int poll_fd,int fd, void *data, void *opt) + The same as io_poll_associate_fd(), but cannot be used before + io_poll_associate_fd() was called. + On Linux : epoll_ctl(..EPOLL_CTL_MOD) + + - io_poll_wait (TP_file_handle pollfd, native_event *native_events, int maxevents, + int timeout_ms) + + wait until one or more descriptors added with io_poll_associate_fd() + or io_poll_start_read() becomes readable. Data associated with + descriptors can be retrieved from native_events array, using + native_event_get_userdata() function. + + On Linux: epoll_wait() +*/ + +#if defined (__linux__) +#ifndef EPOLLRDHUP +/* Early 2.6 kernel did not have EPOLLRDHUP */ +#define EPOLLRDHUP 0 +#endif +static TP_file_handle io_poll_create() +{ + return epoll_create(1); +} + + +int io_poll_associate_fd(TP_file_handle pollfd, TP_file_handle fd, void *data, void*) +{ + struct epoll_event ev; + ev.data.u64= 0; /* Keep valgrind happy */ + ev.data.ptr= data; + ev.events= EPOLLIN|EPOLLET|EPOLLERR|EPOLLRDHUP|EPOLLONESHOT; + return epoll_ctl(pollfd, EPOLL_CTL_ADD, fd, &ev); +} + + + +int io_poll_start_read(TP_file_handle pollfd, TP_file_handle fd, void *data, void *) +{ + struct epoll_event ev; + ev.data.u64= 0; /* Keep valgrind happy */ + ev.data.ptr= data; + ev.events= EPOLLIN|EPOLLET|EPOLLERR|EPOLLRDHUP|EPOLLONESHOT; + return epoll_ctl(pollfd, EPOLL_CTL_MOD, fd, &ev); +} + +int io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) +{ + struct epoll_event ev; + return epoll_ctl(pollfd, EPOLL_CTL_DEL, fd, &ev); +} + + +/* + Wrapper around epoll_wait. + NOTE - in case of EINTR, it restarts with original timeout. Since we use + either infinite or 0 timeouts, this is not critical +*/ +int io_poll_wait(TP_file_handle pollfd, native_event *native_events, int maxevents, + int timeout_ms) +{ + int ret; + do + { + ret = epoll_wait(pollfd, native_events, maxevents, timeout_ms); + } + while(ret == -1 && errno == EINTR); + return ret; +} + + +static void *native_event_get_userdata(native_event *event) +{ + return event->data.ptr; +} + +#elif defined(HAVE_KQUEUE) + +/* + NetBSD prior to 9.99.17 is incompatible with other BSDs, last parameter + in EV_SET macro (udata, user data) needs to be intptr_t, whereas it needs + to be void* everywhere else. +*/ + +#ifdef __NetBSD__ +#include +# if !__NetBSD_Prereq__(9,99,17) +#define MY_EV_SET(a, b, c, d, e, f, g) EV_SET(a, b, c, d, e, f, (intptr_t)g) +# endif +#endif + +#ifndef MY_EV_SET +#define MY_EV_SET(a, b, c, d, e, f, g) EV_SET(a, b, c, d, e, f, g) +#endif + + +TP_file_handle io_poll_create() +{ + return kqueue(); +} + +int io_poll_start_read(TP_file_handle pollfd, TP_file_handle fd, void *data,void *) +{ + struct kevent ke; + MY_EV_SET(&ke, fd, EVFILT_READ, EV_ADD|EV_ONESHOT, + 0, 0, data); + return kevent(pollfd, &ke, 1, 0, 0, 0); +} + + +int io_poll_associate_fd(TP_file_handle pollfd, TP_file_handle fd, void *data,void *) +{ + struct kevent ke; + MY_EV_SET(&ke, fd, EVFILT_READ, EV_ADD|EV_ONESHOT, + 0, 0, data); + return io_poll_start_read(pollfd,fd, data, 0); +} + + +int io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) +{ + struct kevent ke; + MY_EV_SET(&ke,fd, EVFILT_READ, EV_DELETE, 0, 0, 0); + return kevent(pollfd, &ke, 1, 0, 0, 0); +} + + +int io_poll_wait(TP_file_handle pollfd, struct kevent *events, int maxevents, int timeout_ms) +{ + struct timespec ts; + int ret; + if (timeout_ms >= 0) + { + ts.tv_sec= timeout_ms/1000; + ts.tv_nsec= (timeout_ms%1000)*1000000; + } + do + { + ret= kevent(pollfd, 0, 0, events, maxevents, + (timeout_ms >= 0)?&ts:NULL); + } + while (ret == -1 && errno == EINTR); + return ret; +} + +static void* native_event_get_userdata(native_event *event) +{ + return (void *)event->udata; +} + +#elif defined (__sun) + +static TP_file_handle io_poll_create() +{ + return port_create(); +} + +int io_poll_start_read(TP_file_handle pollfd, TP_file_handle fd, void *data, void *) +{ + return port_associate(pollfd, PORT_SOURCE_FD, fd, POLLIN, data); +} + +static int io_poll_associate_fd(TP_file_handle pollfd, TP_file_handle fd, void *data, void *) +{ + return io_poll_start_read(pollfd, fd, data, 0); +} + +int io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) +{ + return port_dissociate(pollfd, PORT_SOURCE_FD, fd); +} + +int io_poll_wait(TP_file_handle pollfd, native_event *events, int maxevents, int timeout_ms) +{ + struct timespec ts; + int ret; + uint_t nget= 1; + if (timeout_ms >= 0) + { + ts.tv_sec= timeout_ms/1000; + ts.tv_nsec= (timeout_ms%1000)*1000000; + } + do + { + ret= port_getn(pollfd, events, maxevents, &nget, + (timeout_ms >= 0)?&ts:NULL); + } + while (ret == -1 && errno == EINTR); + DBUG_ASSERT(nget < INT_MAX); + return (int)nget; +} + +static void* native_event_get_userdata(native_event *event) +{ + return event->portev_user; +} + +#elif defined(_WIN32) + + +static TP_file_handle io_poll_create() +{ + return CreateIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 0); +} + + +int io_poll_start_read(TP_file_handle pollfd, TP_file_handle fd, void *, void *opt) +{ + auto c= (TP_connection_generic *) opt; + return (int) c->win_sock.begin_read(); +} + + +static int io_poll_associate_fd(TP_file_handle pollfd, TP_file_handle fd, void *data, void *opt) +{ + HANDLE h= CreateIoCompletionPort(fd, pollfd, (ULONG_PTR)data, 0); + if (!h) + return -1; + return io_poll_start_read(pollfd,fd, 0, opt); +} + + +typedef LONG NTSTATUS; + +typedef struct _IO_STATUS_BLOCK { + union { + NTSTATUS Status; + PVOID Pointer; + }; + ULONG_PTR Information; +} IO_STATUS_BLOCK, * PIO_STATUS_BLOCK; + +struct FILE_COMPLETION_INFORMATION { + HANDLE Port; + PVOID Key; +}; + +enum FILE_INFORMATION_CLASS { + FileReplaceCompletionInformation = 0x3D +}; + + +typedef NTSTATUS(WINAPI* pNtSetInformationFile)(HANDLE, PIO_STATUS_BLOCK, PVOID, ULONG, FILE_INFORMATION_CLASS); + +int io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) +{ + static pNtSetInformationFile my_NtSetInformationFile = (pNtSetInformationFile) + GetProcAddress(GetModuleHandle("ntdll"), "NtSetInformationFile"); + if (!my_NtSetInformationFile) + return -1; /* unexpected, we only support Windows 8.1+*/ + IO_STATUS_BLOCK iosb{}; + FILE_COMPLETION_INFORMATION fci{}; + if (my_NtSetInformationFile(fd,&iosb,&fci,sizeof(fci),FileReplaceCompletionInformation)) + return -1; + return 0; +} + + +static void *native_event_get_userdata(native_event *event) +{ + return (void *) event->lpCompletionKey; +} + +int io_poll_wait(TP_file_handle pollfd, native_event *events, int maxevents, + int timeout_ms) +{ + ULONG n; + if (!GetQueuedCompletionStatusEx(pollfd, events, maxevents, &n, timeout_ms, FALSE)) + return -1; + + /* Update win_sock with number of bytes read.*/ + for (ULONG i= 0; i < n; i++) + { + auto ev= &events[i]; + auto c= (TP_connection_generic *) native_event_get_userdata(ev); + /* null userdata zero means shutdown (see PostQueuedCompletionStatus() usage*/ + if (c) + { + c->win_sock.end_read(ev->dwNumberOfBytesTransferred, 0); + } + } + + return (int) n; +} + +#endif + + +/* Dequeue element from a workqueue */ + +static TP_connection_generic *queue_get(thread_group_t *thread_group) +{ + DBUG_ENTER("queue_get"); + thread_group->queue_event_count++; + TP_connection_generic *c; + for (int i=0; i < NQUEUES;i++) + { + c= thread_group->queues[i].pop_front(); + if (c) + DBUG_RETURN(c); + } + DBUG_RETURN(0); +} + +static TP_connection_generic* queue_get(thread_group_t* group, operation_origin origin) +{ + auto ret = queue_get(group); + if (ret) + { + TP_INCREMENT_GROUP_COUNTER(group, dequeues[(int)origin]); + } + return ret; +} + +static bool is_queue_empty(thread_group_t *thread_group) +{ + for (int i=0; i < NQUEUES; i++) + { + if (!thread_group->queues[i].is_empty()) + return false; + } + return true; +} + + +static void queue_init(thread_group_t *thread_group) +{ + for (int i=0; i < NQUEUES; i++) + { + thread_group->queues[i].empty(); + } +} + +static void queue_put(thread_group_t *thread_group, native_event *ev, int cnt) +{ + ulonglong now= threadpool_exact_stats?microsecond_interval_timer():pool_timer.current_microtime; + for(int i=0; i < cnt; i++) + { + TP_connection_generic *c = (TP_connection_generic *)native_event_get_userdata(&ev[i]); + c->enqueue_time= now; + thread_group->queues[c->priority].push_back(c); + } +} + +/* + Handle wait timeout : + Find connections that have been idle for too long and kill them. + Also, recalculate time when next timeout check should run. +*/ + +static my_bool timeout_check(THD *thd, pool_timer_t *timer) +{ + DBUG_ENTER("timeout_check"); + if (thd->net.reading_or_writing == 1) + { + TP_connection_generic *connection= (TP_connection_generic *)thd->event_scheduler.data; + if (!connection || connection->state != TP_STATE_IDLE) + { + /* + Connection does not have scheduler data. This happens for example + if THD belongs to a different scheduler, that is listening to extra_port. + */ + DBUG_RETURN(0); + } + + if(connection->abs_wait_timeout < timer->current_microtime) + { + tp_timeout_handler(connection); + } + else + { + if (connection->abs_wait_timeout < timer->current_microtime) + tp_timeout_handler(connection); + else + set_next_timeout_check(connection->abs_wait_timeout); + } + } + DBUG_RETURN(0); +} + + +/* + Timer thread. + + Periodically, check if one of the thread groups is stalled. Stalls happen if + events are not being dequeued from the queue, or from the network, Primary + reason for stall can be a lengthy executing non-blocking request. It could + also happen that thread is waiting but wait_begin/wait_end is forgotten by + storage engine. Timer thread will create a new thread in group in case of + a stall. + + Besides checking for stalls, timer thread is also responsible for terminating + clients that have been idle for longer than wait_timeout seconds. + + TODO: Let the timer sleep for long time if there is no work to be done. + Currently it wakes up rather often on and idle server. +*/ + +static void* timer_thread(void *param) +{ + uint i; + pool_timer_t* timer=(pool_timer_t *)param; + + my_thread_init(); + DBUG_ENTER("timer_thread"); + timer->next_timeout_check.store(std::numeric_limits::max(), + std::memory_order_relaxed); + timer->current_microtime= microsecond_interval_timer(); + + for(;;) + { + struct timespec ts; + int err; + + set_timespec_nsec(ts,timer->tick_interval*1000000); + mysql_mutex_lock(&timer->mutex); + err= mysql_cond_timedwait(&timer->cond, &timer->mutex, &ts); + if (timer->shutdown) + { + mysql_mutex_unlock(&timer->mutex); + break; + } + if (err == ETIMEDOUT) + { + timer->current_microtime= microsecond_interval_timer(); + + /* Check stalls in thread groups */ + for (i= 0; i < threadpool_max_size; i++) + { + if(all_groups[i].connection_count) + check_stall(&all_groups[i]); + } + + /* Check if any client exceeded wait_timeout */ + if (timer->next_timeout_check.load(std::memory_order_relaxed) <= + timer->current_microtime) + { + /* Reset next timeout check, it will be recalculated below */ + timer->next_timeout_check.store(std::numeric_limits::max(), + std::memory_order_relaxed); + server_threads.iterate(timeout_check, timer); + } + } + mysql_mutex_unlock(&timer->mutex); + } + + mysql_mutex_destroy(&timer->mutex); + my_thread_end(); + return NULL; +} + + + +void check_stall(thread_group_t *thread_group) +{ + mysql_mutex_lock(&thread_group->mutex); + + /* + Bump priority for the low priority connections that spent too much + time in low prio queue. + */ + TP_connection_generic *c; + for (;;) + { + c= thread_group->queues[TP_PRIORITY_LOW].front(); + if (c && pool_timer.current_microtime - c->enqueue_time > 1000ULL * threadpool_prio_kickup_timer) + { + thread_group->queues[TP_PRIORITY_LOW].remove(c); + thread_group->queues[TP_PRIORITY_HIGH].push_back(c); + } + else + break; + } + + /* + Check if listener is present. If not, check whether any IO + events were dequeued since last time. If not, this means + listener is either in tight loop or thd_wait_begin() + was forgotten. Create a new worker(it will make itself listener). + */ + if (!thread_group->listener && !thread_group->io_event_count) + { + wake_or_create_thread(thread_group, true); + mysql_mutex_unlock(&thread_group->mutex); + return; + } + + /* Reset io event count */ + thread_group->io_event_count= 0; + + /* + Check whether requests from the workqueue are being dequeued. + + The stall detection and resolution works as follows: + + 1. There is a counter thread_group->queue_event_count for the number of + events removed from the queue. Timer resets the counter to 0 on each run. + 2. Timer determines stall if this counter remains 0 since last check + and the queue is not empty. + 3. Once timer determined a stall it sets thread_group->stalled flag and + wakes and idle worker (or creates a new one, subject to throttling). + 4. The stalled flag is reset, when an event is dequeued. + + Q : Will this handling lead to an unbound growth of threads, if queue + stalls permanently? + A : No. If queue stalls permanently, it is an indication for many very long + simultaneous queries. The maximum number of simultanoues queries is + max_connections, further we have threadpool_max_threads limit, upon which no + worker threads are created. So in case there is a flood of very long + queries, threadpool would slowly approach thread-per-connection behavior. + NOTE: + If long queries never wait, creation of the new threads is done by timer, + so it is slower than in real thread-per-connection. However if long queries + do wait and indicate that via thd_wait_begin/end callbacks, thread creation + will be faster. + */ + if (!is_queue_empty(thread_group) && !thread_group->queue_event_count) + { + thread_group->stalled= true; + TP_INCREMENT_GROUP_COUNTER(thread_group,stalls); + wake_or_create_thread(thread_group,true); + } + + /* Reset queue event count */ + thread_group->queue_event_count= 0; + + mysql_mutex_unlock(&thread_group->mutex); +} + + +static void start_timer(pool_timer_t* timer) +{ + DBUG_ENTER("start_timer"); + mysql_mutex_init(key_timer_mutex,&timer->mutex, NULL); + mysql_cond_init(key_timer_cond, &timer->cond, NULL); + timer->shutdown = false; + mysql_thread_create(key_timer_thread, &timer->timer_thread_id, NULL, + timer_thread, timer); + DBUG_VOID_RETURN; +} + + +static void stop_timer(pool_timer_t *timer) +{ + DBUG_ENTER("stop_timer"); + mysql_mutex_lock(&timer->mutex); + timer->shutdown = true; + mysql_cond_signal(&timer->cond); + mysql_mutex_unlock(&timer->mutex); + pthread_join(timer->timer_thread_id, NULL); + DBUG_VOID_RETURN; +} + + +/** + Poll for socket events and distribute them to worker threads + In many case current thread will handle single event itself. + + @return a ready connection, or NULL on shutdown +*/ +static TP_connection_generic * listener(worker_thread_t *current_thread, + thread_group_t *thread_group) +{ + DBUG_ENTER("listener"); + TP_connection_generic *retval= NULL; + + for(;;) + { + native_event ev[MAX_EVENTS]; + int cnt; + + if (thread_group->shutdown) + break; + + cnt = io_poll_wait(thread_group->pollfd, ev, MAX_EVENTS, -1); + TP_INCREMENT_GROUP_COUNTER(thread_group, polls[(int)operation_origin::LISTENER]); + if (cnt <=0) + { + DBUG_ASSERT(thread_group->shutdown); + break; + } + + mysql_mutex_lock(&thread_group->mutex); + + if (thread_group->shutdown) + { + mysql_mutex_unlock(&thread_group->mutex); + break; + } + + thread_group->io_event_count += cnt; + + /* + We got some network events and need to make decisions : whether + listener hould handle events and whether or not any wake worker + threads so they can handle events. + + Q1 : Should listener handle an event itself, or put all events into + queue and let workers handle the events? + + Solution : + Generally, listener that handles events itself is preferable. We do not + want listener thread to change its state from waiting to running too + often, Since listener has just woken from poll, it better uses its time + slice and does some work. Besides, not handling events means they go to + the queue, and often to wake another worker must wake up to handle the + event. This is not good, as we want to avoid wakeups. + + The downside of listener that also handles queries is that we can + potentially leave thread group for long time not picking the new + network events. It is not a major problem, because this stall will be + detected sooner or later by the timer thread. Still, relying on timer + is not always good, because it may "tick" too slow (large timer_interval) + + We use following strategy to solve this problem - if queue was not empty + we suspect flood of network events and listener stays, Otherwise, it + handles a query. + + Q2: If queue is not empty, how many workers to wake? + + Solution: + We generally try to keep one thread per group active (threads handling + queries are considered active, unless they stuck in inside some "wait") + Thus, we will wake only one worker, and only if there is not active + threads currently,and listener is not going to handle a query. When we + don't wake, we hope that currently active threads will finish fast and + handle the queue. If this does not happen, timer thread will detect stall + and wake a worker. + + NOTE: Currently nothing is done to detect or prevent long queuing times. + A solution for the future would be to give up "one active thread per + group" principle, if events stay in the queue for too long, and just wake + more workers. + */ + + bool listener_picks_event=is_queue_empty(thread_group) && !threadpool_dedicated_listener; + queue_put(thread_group, ev, cnt); + if (listener_picks_event) + { + /* Handle the first event. */ + retval= queue_get(thread_group, operation_origin::LISTENER); + mysql_mutex_unlock(&thread_group->mutex); + break; + } + + if(thread_group->active_thread_count==0) + { + /* We added some work items to queue, now wake a worker. */ + if(wake_thread(thread_group, false)) + { + /* + Wake failed, hence groups has no idle threads. Now check if there are + any threads in the group except listener. + */ + if(thread_group->thread_count == 1) + { + /* + Currently there is no worker thread in the group, as indicated by + thread_count == 1 (this means listener is the only one thread in + the group). + The queue is not empty, and listener is not going to handle + events. In order to drain the queue, we create a worker here. + Alternatively, we could just rely on timer to detect stall, and + create thread, but waiting for timer would be an inefficient and + pointless delay. + */ + create_worker(thread_group, false); + } + } + } + mysql_mutex_unlock(&thread_group->mutex); + } + + DBUG_RETURN(retval); +} + +/** + Adjust thread counters in group or global + whenever thread is created or is about to exit + + @param thread_group + @param count - 1, when new thread is created + -1, when thread is about to exit +*/ + +static void add_thread_count(thread_group_t *thread_group, int32 count) +{ + thread_group->thread_count += count; + /* worker starts out and end in "active" state */ + thread_group->active_thread_count += count; + tp_stats.num_worker_threads+= count; +} + + +/** + Creates a new worker thread. + thread_mutex must be held when calling this function + + NOTE: in rare cases, the number of threads can exceed + threadpool_max_threads, because we need at least 2 threads + per group to prevent deadlocks (one listener + one worker) +*/ + +static int create_worker(thread_group_t *thread_group, bool due_to_stall) +{ + pthread_t thread_id; + bool max_threads_reached= false; + int err; + + DBUG_ENTER("create_worker"); + if (tp_stats.num_worker_threads >= threadpool_max_threads + && thread_group->thread_count >= 2) + { + err= 1; + max_threads_reached= true; + goto end; + } + + err= mysql_thread_create(key_worker_thread, &thread_id, + thread_group->pthread_attr, worker_main, thread_group); + if (!err) + { + thread_group->last_thread_creation_time=microsecond_interval_timer(); + statistic_increment(thread_created,&LOCK_status); + add_thread_count(thread_group, 1); + TP_INCREMENT_GROUP_COUNTER(thread_group,thread_creations); + if(due_to_stall) + { + TP_INCREMENT_GROUP_COUNTER(thread_group, thread_creations_due_to_stall); + } + } + else + { + my_errno= errno; + } + +end: + if (err) + print_pool_blocked_message(max_threads_reached); + else + pool_block_start= 0; /* Reset pool blocked timer, if it was set */ + + DBUG_RETURN(err); +} + + +/** + Calculate microseconds throttling delay for thread creation. + + The value depends on how many threads are already in the group: + small number of threads means no delay, the more threads the larger + the delay. + + The actual values were not calculated using any scientific methods. + They just look right, and behave well in practice. +*/ + +#define THROTTLING_FACTOR (threadpool_stall_limit/std::max(DEFAULT_THREADPOOL_STALL_LIMIT,threadpool_stall_limit)) + +static ulonglong microsecond_throttling_interval(thread_group_t *thread_group) +{ + int count= thread_group->thread_count; + + if (count < 1+ (int)threadpool_oversubscribe) + return 0; + + if (count < 8) + return 50*1000*THROTTLING_FACTOR; + + if(count < 16) + return 100*1000*THROTTLING_FACTOR; + + return 200*100*THROTTLING_FACTOR; +} + + +/** + Wakes a worker thread, or creates a new one. + + Worker creation is throttled, so we avoid too many threads + to be created during the short time. +*/ +static int wake_or_create_thread(thread_group_t *thread_group, bool due_to_stall) +{ + DBUG_ENTER("wake_or_create_thread"); + + if (thread_group->shutdown) + DBUG_RETURN(0); + + if (wake_thread(thread_group, due_to_stall) == 0) + { + DBUG_RETURN(0); + } + + if (thread_group->thread_count > thread_group->connection_count) + DBUG_RETURN(-1); + + + if (thread_group->active_thread_count == 0) + { + /* + We're better off creating a new thread here with no delay, either there + are no workers at all, or they all are all blocking and there was no + idle thread to wakeup. Smells like a potential deadlock or very slowly + executing requests, e.g sleeps or user locks. + */ + DBUG_RETURN(create_worker(thread_group, due_to_stall)); + } + + ulonglong now = microsecond_interval_timer(); + ulonglong time_since_last_thread_created = + (now - thread_group->last_thread_creation_time); + + /* Throttle thread creation. */ + if (time_since_last_thread_created > + microsecond_throttling_interval(thread_group)) + { + DBUG_RETURN(create_worker(thread_group, due_to_stall)); + } + + TP_INCREMENT_GROUP_COUNTER(thread_group,throttles); + DBUG_RETURN(-1); +} + + + +int thread_group_init(thread_group_t *thread_group, pthread_attr_t* thread_attr) +{ + DBUG_ENTER("thread_group_init"); + thread_group->pthread_attr = thread_attr; + mysql_mutex_init(key_group_mutex, &thread_group->mutex, NULL); + thread_group->pollfd= INVALID_HANDLE_VALUE; + thread_group->shutdown_pipe[0]= -1; + thread_group->shutdown_pipe[1]= -1; + queue_init(thread_group); + DBUG_RETURN(0); +} + + +void thread_group_destroy(thread_group_t *thread_group) +{ + mysql_mutex_destroy(&thread_group->mutex); + if (thread_group->pollfd != INVALID_HANDLE_VALUE) + { + io_poll_close(thread_group->pollfd); + thread_group->pollfd= INVALID_HANDLE_VALUE; + } +#ifndef _WIN32 + for(int i=0; i < 2; i++) + { + if(thread_group->shutdown_pipe[i] != -1) + { + close(thread_group->shutdown_pipe[i]); + thread_group->shutdown_pipe[i]= -1; + } + } +#endif + + if (!--shutdown_group_count) + { + my_free(all_groups); + all_groups= 0; + } +} + +/** + Wake sleeping thread from waiting list +*/ + +static int wake_thread(thread_group_t *thread_group,bool due_to_stall) +{ + DBUG_ENTER("wake_thread"); + worker_thread_t *thread = thread_group->waiting_threads.front(); + if(thread) + { + thread->woken= true; + thread_group->waiting_threads.remove(thread); + mysql_cond_signal(&thread->cond); + TP_INCREMENT_GROUP_COUNTER(thread_group, wakes); + if (due_to_stall) + { + TP_INCREMENT_GROUP_COUNTER(thread_group, wakes_due_to_stall); + } + DBUG_RETURN(0); + } + DBUG_RETURN(1); /* no thread in waiter list => missed wakeup */ +} + +/* + Wake listener thread (during shutdown) + Self-pipe trick is used in most cases,except IOCP. +*/ +static int wake_listener(thread_group_t *thread_group) +{ +#ifndef _WIN32 + if (pipe(thread_group->shutdown_pipe)) + { + return -1; + } + + /* Wake listener */ + if (io_poll_associate_fd(thread_group->pollfd, + thread_group->shutdown_pipe[0], NULL, NULL)) + { + return -1; + } + char c= 0; + if (write(thread_group->shutdown_pipe[1], &c, 1) < 0) + return -1; +#else + PostQueuedCompletionStatus(thread_group->pollfd, 0, 0, 0); +#endif + return 0; +} +/** + Initiate shutdown for thread group. + + The shutdown is asynchronous, we only care to wake all threads in here, so + they can finish. We do not wait here until threads terminate. Final cleanup + of the group (thread_group_destroy) will be done by the last exiting threads. +*/ + +static void thread_group_close(thread_group_t *thread_group) +{ + DBUG_ENTER("thread_group_close"); + + mysql_mutex_lock(&thread_group->mutex); + if (thread_group->thread_count == 0) + { + mysql_mutex_unlock(&thread_group->mutex); + thread_group_destroy(thread_group); + DBUG_VOID_RETURN; + } + + thread_group->shutdown= true; + thread_group->listener= NULL; + + wake_listener(thread_group); + + /* Wake all workers. */ + while(wake_thread(thread_group, false) == 0) + { + } + + mysql_mutex_unlock(&thread_group->mutex); + + DBUG_VOID_RETURN; +} + + +/* + Add work to the queue. Maybe wake a worker if they all sleep. + + Currently, this function is only used when new connections need to + perform login (this is done in worker threads). + +*/ + +static void queue_put(thread_group_t *thread_group, TP_connection_generic *connection) +{ + DBUG_ENTER("queue_put"); + + connection->enqueue_time= threadpool_exact_stats?microsecond_interval_timer():pool_timer.current_microtime; + thread_group->queues[connection->priority].push_back(connection); + + if (thread_group->active_thread_count == 0) + wake_or_create_thread(thread_group); + + DBUG_VOID_RETURN; +} + + +/* + Prevent too many threads executing at the same time,if the workload is + not CPU bound. +*/ + +static bool too_many_threads(thread_group_t *thread_group) +{ + return (thread_group->active_thread_count >= 1+(int)threadpool_oversubscribe + && !thread_group->stalled); +} + + +/** + Retrieve a connection with pending event. + + Pending event in our case means that there is either a pending login request + (if connection is not yet logged in), or there are unread bytes on the socket. + + If there are no pending events currently, thread will wait. + If timeout specified in abstime parameter passes, the function returns NULL. + + @param current_thread - current worker thread + @param thread_group - current thread group + @param abstime - absolute wait timeout + + @return + connection with pending event. + NULL is returned if timeout has expired,or on shutdown. +*/ + +TP_connection_generic *get_event(worker_thread_t *current_thread, + thread_group_t *thread_group, struct timespec *abstime) +{ + DBUG_ENTER("get_event"); + TP_connection_generic *connection = NULL; + + + mysql_mutex_lock(&thread_group->mutex); + DBUG_ASSERT(thread_group->active_thread_count >= 0); + + for(;;) + { + int err=0; + bool oversubscribed = too_many_threads(thread_group); + if (thread_group->shutdown) + break; + + /* Check if queue is not empty */ + if (!oversubscribed) + { + connection = queue_get(thread_group, operation_origin::WORKER); + if(connection) + { + break; + } + } + + /* If there is currently no listener in the group, become one. */ + if(!thread_group->listener) + { + thread_group->listener= current_thread; + thread_group->active_thread_count--; + mysql_mutex_unlock(&thread_group->mutex); + + connection = listener(current_thread, thread_group); + + mysql_mutex_lock(&thread_group->mutex); + thread_group->active_thread_count++; + /* There is no listener anymore, it just returned. */ + thread_group->listener= NULL; + break; + } + + + /* + Last thing we try before going to sleep is to + non-blocking event poll, i.e with timeout = 0. + If this returns events, pick one + */ + if (!oversubscribed && !threadpool_dedicated_listener) + { + native_event ev[MAX_EVENTS]; + int cnt = io_poll_wait(thread_group->pollfd, ev, MAX_EVENTS, 0); + TP_INCREMENT_GROUP_COUNTER(thread_group, polls[(int)operation_origin::WORKER]); + if (cnt > 0) + { + queue_put(thread_group, ev, cnt); + connection= queue_get(thread_group,operation_origin::WORKER); + break; + } + } + + + /* And now, finally sleep */ + current_thread->woken = false; /* wake() sets this to true */ + + /* + Add current thread to the head of the waiting list and wait. + It is important to add thread to the head rather than tail + as it ensures LIFO wakeup order (hot caches, working inactivity timeout) + */ + thread_group->waiting_threads.push_front(current_thread); + + thread_group->active_thread_count--; + if (abstime) + { + err = mysql_cond_timedwait(¤t_thread->cond, &thread_group->mutex, + abstime); + } + else + { + err = mysql_cond_wait(¤t_thread->cond, &thread_group->mutex); + } + thread_group->active_thread_count++; + + if (!current_thread->woken) + { + /* + Thread was not signalled by wake(), it might be a spurious wakeup or + a timeout. Anyhow, we need to remove ourselves from the list now. + If thread was explicitly woken, than caller removed us from the list. + */ + thread_group->waiting_threads.remove(current_thread); + } + + if (err) + break; + } + + thread_group->stalled= false; + + mysql_mutex_unlock(&thread_group->mutex); + + DBUG_RETURN(connection); +} + + + +/** + Tells the pool that worker starts waiting on IO, lock, condition, + sleep() or similar. +*/ + +void wait_begin(thread_group_t *thread_group) +{ + DBUG_ENTER("wait_begin"); + mysql_mutex_lock(&thread_group->mutex); + thread_group->active_thread_count--; + + DBUG_ASSERT(thread_group->active_thread_count >=0); + DBUG_ASSERT(thread_group->connection_count > 0); + + if ((thread_group->active_thread_count == 0) && + (!is_queue_empty(thread_group) || !thread_group->listener)) + { + /* + Group might stall while this thread waits, thus wake + or create a worker to prevent stall. + */ + wake_or_create_thread(thread_group); + } + + mysql_mutex_unlock(&thread_group->mutex); + DBUG_VOID_RETURN; +} + +/** + Tells the pool has finished waiting. +*/ + +void wait_end(thread_group_t *thread_group) +{ + DBUG_ENTER("wait_end"); + mysql_mutex_lock(&thread_group->mutex); + thread_group->active_thread_count++; + mysql_mutex_unlock(&thread_group->mutex); + DBUG_VOID_RETURN; +} + + +TP_connection * TP_pool_generic::new_connection(CONNECT *c) +{ + return new (std::nothrow) TP_connection_generic(c); +} + +/** + Add a new connection to thread pool +*/ + +void TP_pool_generic::add(TP_connection *c) +{ + DBUG_ENTER("tp_add_connection"); + + TP_connection_generic *connection=(TP_connection_generic *)c; + thread_group_t *thread_group= connection->thread_group; + /* + Add connection to the work queue.Actual logon + will be done by a worker thread. + */ + mysql_mutex_lock(&thread_group->mutex); + queue_put(thread_group, connection); + mysql_mutex_unlock(&thread_group->mutex); + DBUG_VOID_RETURN; +} + +void TP_pool_generic::resume(TP_connection* c) +{ + add(c); +} + +/** + MySQL scheduler callback: wait begin +*/ + +void TP_connection_generic::wait_begin(int type) +{ + DBUG_ENTER("wait_begin"); + + DBUG_ASSERT(!waiting); + waiting++; + if (waiting == 1) + ::wait_begin(thread_group); + DBUG_VOID_RETURN; +} + + +/** + MySQL scheduler callback: wait end +*/ + +void TP_connection_generic::wait_end() +{ + DBUG_ENTER("wait_end"); + DBUG_ASSERT(waiting); + waiting--; + if (waiting == 0) + ::wait_end(thread_group); + DBUG_VOID_RETURN; +} + + +static void set_next_timeout_check(ulonglong abstime) +{ + auto old= pool_timer.next_timeout_check.load(std::memory_order_relaxed); + DBUG_ENTER("set_next_timeout_check"); + while (abstime < old) + { + if (pool_timer.next_timeout_check. + compare_exchange_weak(old, abstime, + std::memory_order_relaxed, + std::memory_order_relaxed)) + break; + } + DBUG_VOID_RETURN; +} + +static size_t get_group_id(my_thread_id tid) +{ + return size_t(tid % group_count); +} + + +TP_connection_generic::TP_connection_generic(CONNECT *c): + TP_connection(c), + thread_group(0), + next_in_queue(0), + prev_in_queue(0), + abs_wait_timeout(ULONGLONG_MAX), + bound_to_poll_descriptor(false), + waiting(false), + fix_group(false) +{ + DBUG_ASSERT(c->vio_type != VIO_CLOSED); + +#ifdef _WIN32 + fd= (c->vio_type == VIO_TYPE_NAMEDPIPE) ? + c->pipe: (TP_file_handle) mysql_socket_getfd(c->sock); +#else + fd= mysql_socket_getfd(c->sock); +#endif + + /* Assign connection to a group. */ + thread_group_t *group= + &all_groups[get_group_id(c->thread_id)]; + thread_group=group; + + mysql_mutex_lock(&group->mutex); + group->connection_count++; + mysql_mutex_unlock(&group->mutex); +} + +TP_connection_generic::~TP_connection_generic() +{ + mysql_mutex_lock(&thread_group->mutex); + thread_group->connection_count--; + mysql_mutex_unlock(&thread_group->mutex); +} + +/** + Set wait timeout for connection. +*/ + +void TP_connection_generic::set_io_timeout(int timeout_sec) +{ + DBUG_ENTER("set_wait_timeout"); + /* + Calculate wait deadline for this connection. + Instead of using microsecond_interval_timer() which has a syscall + overhead, use pool_timer.current_microtime and take + into account that its value could be off by at most + one tick interval. + */ + + abs_wait_timeout= pool_timer.current_microtime + + 1000LL*pool_timer.tick_interval + + 1000000LL*timeout_sec; + + set_next_timeout_check(abs_wait_timeout); + DBUG_VOID_RETURN; +} + + +/** + Handle a (rare) special case,where connection needs to + migrate to a different group because group_count has changed + after thread_pool_size setting. +*/ + +static int change_group(TP_connection_generic *c, + thread_group_t *old_group, + thread_group_t *new_group) +{ + int ret= 0; + + DBUG_ASSERT(c->thread_group == old_group); + + /* Remove connection from the old group. */ + mysql_mutex_lock(&old_group->mutex); + if (c->bound_to_poll_descriptor) + { + io_poll_disassociate_fd(old_group->pollfd,c->fd); + c->bound_to_poll_descriptor= false; + } + c->thread_group->connection_count--; + mysql_mutex_unlock(&old_group->mutex); + + /* Add connection to the new group. */ + mysql_mutex_lock(&new_group->mutex); + c->thread_group= new_group; + new_group->connection_count++; + /* Ensure that there is a listener in the new group. */ + if (!new_group->thread_count) + ret= create_worker(new_group, false); + mysql_mutex_unlock(&new_group->mutex); + return ret; +} + + +int TP_connection_generic::start_io() +{ + /* + Usually, connection will stay in the same group for the entire + connection's life. However, we do allow group_count to + change at runtime, which means in rare cases when it changes is + connection should need to migrate to another group, this ensures + to ensure equal load between groups. + + So we recalculate in which group the connection should be, based + on thread_id and current group count, and migrate if necessary. + */ + if (fix_group) + { + fix_group = false; + thread_group_t *new_group= &all_groups[get_group_id(thd->thread_id)]; + + if (new_group != thread_group) + { + if (change_group(this, thread_group, new_group)) + return -1; + } + } + + /* + Bind to poll descriptor if not yet done. + */ + if (!bound_to_poll_descriptor) + { + bound_to_poll_descriptor= true; + return io_poll_associate_fd(thread_group->pollfd, fd, this, OPTIONAL_IO_POLL_READ_PARAM); + } + + return io_poll_start_read(thread_group->pollfd, fd, this, OPTIONAL_IO_POLL_READ_PARAM); +} + + + +/** + Worker thread's main +*/ + +static void *worker_main(void *param) +{ + + worker_thread_t this_thread; + pthread_detach_this_thread(); + my_thread_init(); + + DBUG_ENTER("worker_main"); + + thread_group_t *thread_group = (thread_group_t *)param; + + /* Init per-thread structure */ + mysql_cond_init(key_worker_cond, &this_thread.cond, NULL); + this_thread.thread_group= thread_group; + this_thread.event_count=0; + + /* Run event loop */ + for(;;) + { + TP_connection_generic *connection; + struct timespec ts; + set_timespec(ts,threadpool_idle_timeout); + connection = get_event(&this_thread, thread_group, &ts); + if (!connection) + break; + this_thread.event_count++; + tp_callback(connection); + } + + /* Thread shutdown: cleanup per-worker-thread structure. */ + mysql_cond_destroy(&this_thread.cond); + + bool last_thread; /* last thread in group exits */ + mysql_mutex_lock(&thread_group->mutex); + add_thread_count(thread_group, -1); + last_thread= ((thread_group->thread_count == 0) && thread_group->shutdown); + mysql_mutex_unlock(&thread_group->mutex); + + /* Last thread in group exits and pool is terminating, destroy group.*/ + if (last_thread) + thread_group_destroy(thread_group); + + my_thread_end(); + return NULL; +} + + +TP_pool_generic::TP_pool_generic() = default; + +int TP_pool_generic::init() +{ + DBUG_ENTER("TP_pool_generic::TP_pool_generic"); + threadpool_max_size= MY_MAX(threadpool_size, 128); + all_groups= (thread_group_t *) + my_malloc(PSI_INSTRUMENT_ME, + sizeof(thread_group_t) * threadpool_max_size, MYF(MY_WME|MY_ZEROFILL)); + if (!all_groups) + { + threadpool_max_size= 0; + sql_print_error("Allocation failed"); + DBUG_RETURN(-1); + } + PSI_register(mutex); + PSI_register(cond); + PSI_register(thread); + scheduler_init(); + threadpool_started= true; + for (uint i= 0; i < threadpool_max_size; i++) + { + thread_group_init(&all_groups[i], get_connection_attrib()); + } + set_pool_size(threadpool_size); + if(group_count == 0) + { + /* Something went wrong */ + sql_print_error("Can't set threadpool size to %d",threadpool_size); + DBUG_RETURN(-1); + } + pool_timer.tick_interval= threadpool_stall_limit; + start_timer(&pool_timer); + DBUG_RETURN(0); +} + +TP_pool_generic::~TP_pool_generic() +{ + DBUG_ENTER("tp_end"); + + if (!threadpool_started) + DBUG_VOID_RETURN; + + stop_timer(&pool_timer); + shutdown_group_count= threadpool_max_size; + for (uint i= 0; i < threadpool_max_size; i++) + { + thread_group_close(&all_groups[i]); + } + + /* + Wait until memory occupied by all_groups is freed. + */ + int timeout_ms=5000; + while(all_groups && timeout_ms--) + my_sleep(1000); + + threadpool_started= false; + DBUG_VOID_RETURN; +} + + +static my_bool thd_reset_group(THD* thd, void*) +{ + auto c= (TP_connection_generic*)thd->event_scheduler.data; + if(c) + c->fix_group= true; + return FALSE; +} + +/** Ensure that poll descriptors are created when threadpool_size changes */ +int TP_pool_generic::set_pool_size(uint size) +{ + bool success= true; + + for(uint i=0; i< size; i++) + { + thread_group_t *group= &all_groups[i]; + mysql_mutex_lock(&group->mutex); + if (group->pollfd == INVALID_HANDLE_VALUE) + { + group->pollfd= io_poll_create(); + success= (group->pollfd != INVALID_HANDLE_VALUE); + if(!success) + { + sql_print_error("io_poll_create() failed, errno=%d", errno); + } + } + mysql_mutex_unlock(&group->mutex); + if (!success) + { + group_count= i; + return -1; + } + } + group_count= size; + server_threads.iterate(thd_reset_group); + return 0; +} + +int TP_pool_generic::set_stall_limit(uint limit) +{ + mysql_mutex_lock(&(pool_timer.mutex)); + pool_timer.tick_interval= limit; + mysql_mutex_unlock(&(pool_timer.mutex)); + mysql_cond_signal(&(pool_timer.cond)); + return 0; +} + + +/** + Calculate number of idle/waiting threads in the pool. + + Sum idle threads over all groups. + Don't do any locking, it is not required for stats. +*/ + +int TP_pool_generic::get_idle_thread_count() +{ + int sum=0; + for (uint i= 0; i < threadpool_max_size && all_groups[i].pollfd != INVALID_HANDLE_VALUE; i++) + { + sum+= (all_groups[i].thread_count - all_groups[i].active_thread_count); + } + return sum; +} + + +/* Report threadpool problems */ + +/** + Delay in microseconds, after which "pool blocked" message is printed. + (30 sec == 30 Mio usec) +*/ +#define BLOCK_MSG_DELAY (30*1000000) + +#define MAX_THREADS_REACHED_MSG \ +"Threadpool could not create additional thread to handle queries, because the \ +number of allowed threads was reached. Increasing 'thread_pool_max_threads' \ +parameter can help in this situation.\n \ +If 'extra_port' parameter is set, you can still connect to the database with \ +superuser account (it must be TCP connection using extra_port as TCP port) \ +and troubleshoot the situation. \ +A likely cause of pool blocks are clients that lock resources for long time. \ +'show processlist' or 'show engine innodb status' can give additional hints." + +#define CREATE_THREAD_ERROR_MSG "Can't create threads in threadpool (errno=%d)." + +/** + Write a message when blocking situation in threadpool occurs. + The message is written only when pool blocks for BLOCK_MSG_DELAY (30) seconds. + It will be just a single message for each blocking situation (to prevent + log flood). +*/ + +static void print_pool_blocked_message(bool max_threads_reached) +{ + ulonglong now; + static bool msg_written; + + now= microsecond_interval_timer(); + if (pool_block_start == 0) + { + pool_block_start= now; + msg_written = false; + return; + } + + if (now > pool_block_start + BLOCK_MSG_DELAY && !msg_written) + { + if (max_threads_reached) + sql_print_warning(MAX_THREADS_REACHED_MSG); + else + sql_print_warning(CREATE_THREAD_ERROR_MSG, my_errno); + + sql_print_information("Threadpool has been blocked for %u seconds\n", + (uint)((now- pool_block_start)/1000000)); + /* avoid reperated messages for the same blocking situation */ + msg_written= true; + } +} + +#endif /* HAVE_POOL_OF_THREADS */ diff --git a/sql/threadpool_generic.h b/sql/threadpool_generic.h new file mode 100644 index 00000000..b7a35b7c --- /dev/null +++ b/sql/threadpool_generic.h @@ -0,0 +1,157 @@ +/* Copyright(C) 2019, 2020, MariaDB + * + * This program is free software; you can redistribute itand /or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ + +#if defined (HAVE_POOL_OF_THREADS) +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include "threadpool_winsockets.h" +/* AIX may define this, too ?*/ +#define HAVE_IOCP +#endif + + +#ifdef _WIN32 +typedef HANDLE TP_file_handle; +#else +typedef int TP_file_handle; +#define INVALID_HANDLE_VALUE -1 +#endif + +#ifdef __linux__ +#include +typedef struct epoll_event native_event; +#elif defined(HAVE_KQUEUE) +#include +typedef struct kevent native_event; +#elif defined (__sun) +#include +typedef port_event_t native_event; +#elif defined (HAVE_IOCP) +typedef OVERLAPPED_ENTRY native_event; +#else +#error threadpool is not available on this platform +#endif + +struct thread_group_t; + +/* Per-thread structure for workers */ +struct worker_thread_t +{ + ulonglong event_count; /* number of request handled by this thread */ + thread_group_t* thread_group; + worker_thread_t* next_in_list; + worker_thread_t** prev_in_list; + mysql_cond_t cond; + bool woken; +}; + +typedef I_P_List, + I_P_List_counter +> +worker_list_t; + +struct TP_connection_generic :public TP_connection +{ + TP_connection_generic(CONNECT* c); + ~TP_connection_generic(); + + int init() override { return 0; } + void set_io_timeout(int sec) override; + int start_io() override; + void wait_begin(int type) override; + void wait_end() override; + + thread_group_t* thread_group; + TP_connection_generic* next_in_queue; + TP_connection_generic** prev_in_queue; + ulonglong abs_wait_timeout; + ulonglong enqueue_time; + TP_file_handle fd; + bool bound_to_poll_descriptor; + int waiting; + bool fix_group; +#ifdef _WIN32 + win_aiosocket win_sock{}; + void init_vio(st_vio *vio) override + { win_sock.init(vio);} +#endif + +}; + + +typedef I_P_List, + I_P_List_counter, + I_P_List_fast_push_back > + connection_queue_t; + +const int NQUEUES = 2; /* We have high and low priority queues*/ + +enum class operation_origin +{ + WORKER, + LISTENER +}; + +struct thread_group_counters_t +{ + ulonglong thread_creations; + ulonglong thread_creations_due_to_stall; + ulonglong wakes; + ulonglong wakes_due_to_stall; + ulonglong throttles; + ulonglong stalls; + ulonglong dequeues[2]; + ulonglong polls[2]; +}; + +struct thread_group_t +{ + mysql_mutex_t mutex; + connection_queue_t queues[NQUEUES]; + worker_list_t waiting_threads; + worker_thread_t* listener; + pthread_attr_t* pthread_attr; + TP_file_handle pollfd; + int thread_count; + int active_thread_count; + int connection_count; + /* Stats for the deadlock detection timer routine.*/ + int io_event_count; + int queue_event_count; + ulonglong last_thread_creation_time; + int shutdown_pipe[2]; + bool shutdown; + bool stalled; + thread_group_counters_t counters; + char pad[CPU_LEVEL1_DCACHE_LINESIZE]; +}; + +#define TP_INCREMENT_GROUP_COUNTER(group,var) do {group->counters.var++;}while(0) + +extern thread_group_t* all_groups; +#endif + diff --git a/sql/threadpool_win.cc b/sql/threadpool_win.cc new file mode 100644 index 00000000..ed68e31c --- /dev/null +++ b/sql/threadpool_win.cc @@ -0,0 +1,447 @@ +/* Copyright (C) 2012 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif + +#define _WIN32_WINNT 0x0601 + +#include "mariadb.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "threadpool_winsockets.h" + +/* Log a warning */ +static void tp_log_warning(const char *msg, const char *fct) +{ + sql_print_warning("Threadpool: %s. %s failed (last error %d)",msg, fct, + GetLastError()); +} + + +static PTP_POOL pool; +static TP_CALLBACK_ENVIRON callback_environ; +static DWORD fls; + +PTP_CALLBACK_ENVIRON get_threadpool_win_callback_environ() +{ + return pool? &callback_environ: 0; +} + +/* + Threadpool callbacks. + + io_completion_callback - handle client request + timer_callback - handle wait timeout (kill connection) + login_callback - user login (submitted as threadpool work) + +*/ + +static void CALLBACK timer_callback(PTP_CALLBACK_INSTANCE instance, + PVOID context, PTP_TIMER timer); + +static void CALLBACK io_completion_callback(PTP_CALLBACK_INSTANCE instance, + PVOID context, PVOID overlapped, ULONG io_result, ULONG_PTR nbytes, PTP_IO io); + + +static void CALLBACK work_callback(PTP_CALLBACK_INSTANCE instance, PVOID context, PTP_WORK work); + +static void pre_callback(PVOID context, PTP_CALLBACK_INSTANCE instance); + +/* Get current time as Windows time */ +static ulonglong now() +{ + ulonglong current_time; + GetSystemTimeAsFileTime((PFILETIME)¤t_time); + return current_time; +} + +struct TP_connection_win:public TP_connection +{ +public: + TP_connection_win(CONNECT*); + ~TP_connection_win(); + int init() override; + void init_vio(st_vio *vio) override; + int start_io() override; + void set_io_timeout(int sec) override; + void wait_begin(int type) override; + void wait_end() override; + + ulonglong timeout=ULLONG_MAX; + OVERLAPPED overlapped{}; + PTP_CALLBACK_INSTANCE callback_instance{}; + PTP_IO io{}; + PTP_TIMER timer{}; + PTP_WORK work{}; + bool long_callback{}; + win_aiosocket sock; +}; + +struct TP_connection *new_TP_connection(CONNECT *connect) +{ + TP_connection *c = new (std::nothrow) TP_connection_win(connect); + if (!c || c->init()) + { + delete c; + return 0; + } + return c; +} + +void TP_pool_win::add(TP_connection *c) +{ + if(FlsGetValue(fls)) + { + /* Inside threadpool(), execute callback directly. */ + tp_callback(c); + } + else + { + SubmitThreadpoolWork(((TP_connection_win *)c)->work); + } +} + +void TP_pool_win::resume(TP_connection* c) +{ + DBUG_ASSERT(c->state == TP_STATE_RUNNING); + SubmitThreadpoolWork(((TP_connection_win*)c)->work); +} + +#define CHECK_ALLOC_ERROR(op) \ + do \ + { \ + if (!(op)) \ + { \ + tp_log_warning("Allocation failed", #op); \ + } \ + } while (0) + +TP_connection_win::TP_connection_win(CONNECT *c) : + TP_connection(c) +{ + /* Assign io completion callback */ + HANDLE h= c->vio_type == VIO_TYPE_NAMEDPIPE ? c->pipe + : (HANDLE)mysql_socket_getfd(c->sock); + + CHECK_ALLOC_ERROR(io=CreateThreadpoolIo(h, io_completion_callback, this, &callback_environ)); + CHECK_ALLOC_ERROR(timer= CreateThreadpoolTimer(timer_callback, this, &callback_environ)); + CHECK_ALLOC_ERROR(work= CreateThreadpoolWork(work_callback, this, &callback_environ)); +} + +int TP_connection_win::init() +{ + return !io || !timer || !work ; +} + +void TP_connection_win::init_vio(st_vio* vio) +{ + sock.init(vio); +} + +/* + Start asynchronous read +*/ +int TP_connection_win::start_io() +{ + StartThreadpoolIo(io); + if (sock.begin_read()) + { + /* Some error occurred */ + CancelThreadpoolIo(io); + return -1; + } + return 0; +} + +/* + Recalculate wait timeout, maybe reset timer. +*/ +void TP_connection_win::set_io_timeout(int timeout_sec) +{ + ulonglong old_timeout= timeout; + ulonglong new_timeout = now() + 10000000LL * timeout_sec; + + if (new_timeout < old_timeout) + { + SetThreadpoolTimer(timer, (PFILETIME)&new_timeout, 0, 1000); + } + /* new_timeout > old_timeout case is handled by expiring timer. */ + timeout = new_timeout; +} + + +TP_connection_win::~TP_connection_win() +{ + if (io) + CloseThreadpoolIo(io); + + if (work) + CloseThreadpoolWork(work); + + if (timer) + { + SetThreadpoolTimer(timer, 0, 0, 0); + WaitForThreadpoolTimerCallbacks(timer, TRUE); + CloseThreadpoolTimer(timer); + } +} + +void TP_connection_win::wait_begin(int type) +{ + /* + Signal to the threadpool whenever callback can run long. Currently, binlog + waits are a good candidate, its waits are really long + */ + if (type == THD_WAIT_BINLOG) + { + if (!long_callback && callback_instance) + { + CallbackMayRunLong(callback_instance); + long_callback= true; + } + } +} + +void TP_connection_win::wait_end() +{ + /* Do we need to do anything ? */ +} + +/* + This function should be called first whenever a callback is invoked in the + threadpool, does my_thread_init() if not yet done +*/ +void tp_win_callback_prolog() +{ + if (FlsGetValue(fls) == NULL) + { + /* Running in new worker thread*/ + FlsSetValue(fls, (void *)1); + thread_created++; + tp_stats.num_worker_threads++; + my_thread_init(); + } +} + +extern ulong thread_created; +static void pre_callback(PVOID context, PTP_CALLBACK_INSTANCE instance) +{ + tp_win_callback_prolog(); + TP_connection_win *c = (TP_connection_win *)context; + c->callback_instance = instance; + c->long_callback = false; +} + + +/* + Decrement number of threads when a thread exits. + On Windows, FlsAlloc() provides the thread destruction callbacks. +*/ +static VOID WINAPI thread_destructor(void *data) +{ + if(data) + { + tp_stats.num_worker_threads--; + my_thread_end(); + } +} + + + +static inline void tp_callback(PTP_CALLBACK_INSTANCE instance, PVOID context) +{ + pre_callback(context, instance); + tp_callback((TP_connection *)context); +} + + +/* + Handle read completion/notification. +*/ +static VOID CALLBACK io_completion_callback(PTP_CALLBACK_INSTANCE instance, + PVOID context, PVOID overlapped, ULONG io_result, ULONG_PTR nbytes, PTP_IO io) +{ + TP_connection_win *c= (TP_connection_win *)context; + + /* How many bytes were preread into read buffer */ + c->sock.end_read((ULONG)nbytes, io_result); + + /* + Execute high priority connections immediately. + 'Yield' in case of low priority connections, i.e SubmitThreadpoolWork (with the same callback) + which makes Windows threadpool place the items at the end of its internal work queue. + */ + if (c->priority == TP_PRIORITY_HIGH) + tp_callback(instance, context); + else + SubmitThreadpoolWork(c->work); +} + + +/* + Timer callback. + Invoked when connection times out (wait_timeout) +*/ +static VOID CALLBACK timer_callback(PTP_CALLBACK_INSTANCE instance, + PVOID parameter, PTP_TIMER timer) +{ + TP_connection_win *c = (TP_connection_win *)parameter; + if (c->timeout <= now()) + { + tp_timeout_handler(c); + } + else + { + /* + Reset timer. + There is a tiny possibility of a race condition, since the value of timeout + could have changed to smaller value in the thread doing io callback. + + Given the relative unimportance of the wait timeout, we accept race + condition. + */ + SetThreadpoolTimer(timer, (PFILETIME)&c->timeout, 0, 1000); + } +} + +static void CALLBACK work_callback(PTP_CALLBACK_INSTANCE instance, PVOID context, PTP_WORK work) +{ + tp_callback(instance, context); +} + +TP_pool_win::TP_pool_win() +{} + +int TP_pool_win::init() +{ + fls= FlsAlloc(thread_destructor); + pool= CreateThreadpool(NULL); + + if (!pool) + { + sql_print_error("Can't create threadpool. " + "CreateThreadpool() failed with %d. Likely cause is memory pressure", + GetLastError()); + return -1; + } + + InitializeThreadpoolEnvironment(&callback_environ); + SetThreadpoolCallbackPool(&callback_environ, pool); + + if (IS_SYSVAR_AUTOSIZE(&threadpool_max_threads)) + { + /* + Nr 500 comes from Microsoft documentation, + there is no API for GetThreadpoolThreadMaxThreads() + */ + SYSVAR_AUTOSIZE(threadpool_max_threads,500); + } + else + { + SetThreadpoolThreadMaximum(pool, threadpool_max_threads); + } + + if (IS_SYSVAR_AUTOSIZE(&threadpool_min_threads)) + { + SYSVAR_AUTOSIZE(threadpool_min_threads,1); + } + else + { + if (!SetThreadpoolThreadMinimum(pool, threadpool_min_threads)) + { + tp_log_warning("Can't set threadpool minimum threads", + "SetThreadpoolThreadMinimum"); + } + } + + + if (IS_SYSVAR_AUTOSIZE(&global_system_variables.threadpool_priority)) + { + /* + There is a notable overhead for "auto" priority implementation, + use "high" which handles socket IO callbacks as they come + without rescheduling to work queue. + */ + SYSVAR_AUTOSIZE(global_system_variables.threadpool_priority, + TP_PRIORITY_HIGH); + } + + TP_POOL_STACK_INFORMATION stackinfo; + stackinfo.StackCommit = 0; + stackinfo.StackReserve = (SIZE_T)my_thread_stack_size; + if (!SetThreadpoolStackInformation(pool, &stackinfo)) + { + tp_log_warning("Can't set threadpool stack size", + "SetThreadpoolStackInformation"); + } + return 0; +} + + +/** + Scheduler callback : Destroy the scheduler. +*/ +TP_pool_win::~TP_pool_win() +{ + if (!pool) + return; + DestroyThreadpoolEnvironment(&callback_environ); + SetThreadpoolThreadMaximum(pool, 0); + CloseThreadpool(pool); + if (!tp_stats.num_worker_threads) + FlsFree(fls); +} +/** + Sets the number of idle threads the thread pool maintains in anticipation of new + requests. +*/ +int TP_pool_win::set_min_threads(uint val) +{ + SetThreadpoolThreadMinimum(pool, val); + return 0; +} + +int TP_pool_win::set_max_threads(uint val) +{ + SetThreadpoolThreadMaximum(pool, val); + return 0; +} + + +TP_connection *TP_pool_win::new_connection(CONNECT *connect) +{ + TP_connection *c= new (std::nothrow) TP_connection_win(connect); + if (!c ) + return 0; + if (c->init()) + { + delete c; + return 0; + } + return c; +} + diff --git a/sql/threadpool_winsockets.cc b/sql/threadpool_winsockets.cc new file mode 100644 index 00000000..a214cda2 --- /dev/null +++ b/sql/threadpool_winsockets.cc @@ -0,0 +1,268 @@ +/* Copyright (C) 2012 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ + +#include +#include +#include +#include "threadpool_winsockets.h" +#include +#include +#include + +/* + A cache for IO buffers for asynchronous socket(or named pipe) reads. + + Considerations on Windows : since Windows locks the AIO buffers in physical memory, + it is important that these buffers are compactly allocated. + We try to to prevent any kinds of memory fragmentation + + A relatively small region (at most 1MB) is allocated, for equally sized smallish(256 bytes) + This allow buffers. The region is pagesize-aligned (via VirtualAlloc allocation) + + We use smallish IO buffers, 256 bytes is probably large enough for most of + the queries. Larger buffers could have funny effects(thread hogginng) + on threadpool scheduling in case client is using protocol pipelining. + + Also note, that even in an unlikely situation where cache runs out of buffers, + this does not lead to errors, zero szed reads will be used in WSARecv then. +*/ + +constexpr size_t READ_BUFSIZ= 256; +class AIO_buffer_cache +{ + const size_t ITEM_SIZE= READ_BUFSIZ; + + /** Limit the whole cache to 1MB*/ + const size_t MAX_SIZE= 1048576; + + /* Allocation base */ + char *m_base= 0; + + /* "Free list" with LIFO policy */ + std::vector m_cache; + std::mutex m_mtx; + size_t m_elements=0; + +public: + void set_size(size_t n_items); + char *acquire_buffer(); + void release_buffer(char *v); + void clear(); + ~AIO_buffer_cache(); +}; + + +void AIO_buffer_cache::set_size(size_t n_items) +{ + DBUG_ASSERT(!m_base); + m_elements= std::min(n_items, MAX_SIZE / ITEM_SIZE); + auto sz= m_elements * ITEM_SIZE; + + m_base= + (char *) VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!m_base) + { + m_elements= 0; + return; + } + + /* Try to help memory manager here, by prelocking region in memory*/ + (void) VirtualLock(m_base, sz); + + m_cache.reserve(m_elements); + for (ssize_t i= m_elements - 1; i >= 0 ; i--) + m_cache.push_back(m_base + i * ITEM_SIZE); +} + +/* + Returns a buffer, or NULL if no free buffers. + + LIFO policy is implemented, so we do not touch too many + pages (no std::stack though) +*/ +char *AIO_buffer_cache::acquire_buffer() +{ + std::unique_lock lk(m_mtx); + if (m_cache.empty()) + return nullptr; + auto p= m_cache.back(); + m_cache.pop_back(); + return p; +} + +void AIO_buffer_cache::release_buffer(char *v) +{ + std::unique_lock lk(m_mtx); + m_cache.push_back(v); +} + +void AIO_buffer_cache::clear() +{ + if (!m_base) + return; + + std::unique_lock lk(m_mtx, std::defer_lock); + for(;;) + { + if (lk.try_lock()) + { + if (m_cache.size() == m_elements) + break; + lk.unlock(); + } + Sleep(100); + } + VirtualFree(m_base, 0, MEM_RELEASE); + m_cache.clear(); + m_base= 0; + m_elements= 0; +} + +AIO_buffer_cache::~AIO_buffer_cache() { clear(); } + +/* Global variable for the cache buffers.*/ +AIO_buffer_cache read_buffers; + +win_aiosocket::~win_aiosocket() +{ + if (m_buf_ptr) + read_buffers.release_buffer(m_buf_ptr); +} + + +/** Return number of unread bytes.*/ +size_t win_aiosocket::buffer_remaining() +{ + return m_buf_datalen - m_buf_off; +} + +static my_bool my_vio_has_data(st_vio *vio) +{ + auto sock= (win_aiosocket *) vio->tp_ctx; + return sock->buffer_remaining() || sock->m_orig_vio_has_data(vio); +} + +/* + (Half-)buffered read. + + The buffer is filled once, by completion of the async IO. + + We do not refill the buffer once it is read off, + does not make sense. +*/ +static size_t my_vio_read(st_vio *vio, uchar *dest, size_t sz) +{ + auto sock= (win_aiosocket *) vio->tp_ctx; + DBUG_ASSERT(sock); + + auto nbytes= std::min(sock->buffer_remaining(), sz); + + if (nbytes > 0) + { + /* Copy to output, adjust the offset.*/ + memcpy(dest, sock->m_buf_ptr + sock->m_buf_off, nbytes); + sock->m_buf_off += nbytes; + return nbytes; + } + + return sock->m_orig_vio_read(vio, dest, sz); +} + +DWORD win_aiosocket::begin_read() +{ + DWORD err = ERROR_SUCCESS; + static char c; + WSABUF buf; + + DBUG_ASSERT(!buffer_remaining()); + + /* + If there is no internal buffer to store data, + we do zero size read, but still need a valid + pointer for the buffer parameter. + */ + if (m_buf_ptr) + buf= {(ULONG)READ_BUFSIZ, m_buf_ptr}; + else + buf= {0, &c}; + + + if (!m_is_pipe) + { + /* Do async io (sockets). */ + DWORD flags= 0; + if (WSARecv((SOCKET) m_handle, &buf, 1, 0, &flags, &m_overlapped, NULL)) + err= WSAGetLastError(); + } + else + { + /* Do async read (named pipe) */ + if (!ReadFile(m_handle, buf.buf, buf.len, 0, &m_overlapped)) + err= GetLastError(); + } + + if (!err || err == ERROR_IO_PENDING) + return 0; + return err; +} + +void win_aiosocket::end_read(ULONG nbytes, DWORD err) +{ + DBUG_ASSERT(!buffer_remaining()); + DBUG_ASSERT(!nbytes || m_buf_ptr); + m_buf_off= 0; + m_buf_datalen= nbytes; +} + +void win_aiosocket::init(Vio *vio) +{ + m_is_pipe= vio->type == VIO_TYPE_NAMEDPIPE; + m_handle= + m_is_pipe ? vio->hPipe : (HANDLE) mysql_socket_getfd(vio->mysql_socket); + + SetFileCompletionNotificationModes(m_handle, FILE_SKIP_SET_EVENT_ON_HANDLE); + if (vio->type == VIO_TYPE_SSL) + { + /* + TODO : This requires fixing viossl to call our manipulated VIO + */ + return; + } + + if (!(m_buf_ptr = read_buffers.acquire_buffer())) + { + /* Ran out of buffers, that's fine.*/ + return; + } + + vio->tp_ctx= this; + + m_orig_vio_has_data= vio->has_data; + vio->has_data= my_vio_has_data; + + m_orig_vio_read= vio->read; + vio->read= my_vio_read; +} + +void init_win_aio_buffers(unsigned int n_buffers) +{ + read_buffers.set_size(n_buffers); +} + +extern void destroy_win_aio_buffers() +{ + read_buffers.clear(); +} diff --git a/sql/threadpool_winsockets.h b/sql/threadpool_winsockets.h new file mode 100644 index 00000000..ca2068b7 --- /dev/null +++ b/sql/threadpool_winsockets.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2020 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ +#pragma once + +#include +#include + +struct st_vio; + +struct win_aiosocket +{ + /** OVERLAPPED is needed by all Windows AIO*/ + OVERLAPPED m_overlapped{}; + /** Handle to pipe, or socket */ + HANDLE m_handle{}; + /** Whether the m_handle refers to pipe*/ + bool m_is_pipe{}; + + /* Read buffer handling */ + + /** Pointer to buffer of size READ_BUFSIZ. Can be NULL.*/ + char *m_buf_ptr{}; + /** Offset to current buffer position*/ + size_t m_buf_off{}; + /** Size of valid data in the buffer*/ + size_t m_buf_datalen{}; + + /* Vio handling */ + /** Pointer to original vio->vio_read/vio->has_data function */ + size_t (*m_orig_vio_read)(st_vio *, unsigned char *, size_t){}; + char (*m_orig_vio_has_data)(st_vio *){}; + + + + /** + Begins asynchronnous reading from socket/pipe. + On IO completion, pre-read some bytes into internal buffer + */ + DWORD begin_read(); + + /** + Update number of bytes returned, and IO error status + + Should be called right after IO is completed + GetQueuedCompletionStatus() , or threadpool IO completion + callback would return nbytes and the error. + + Sets the valid data length in the read buffer. + */ + void end_read(ULONG nbytes, DWORD err); + + /** + Override VIO routines with ours, accounting for + one-shot buffering. + */ + void init(st_vio *vio); + + /** Return number of unread bytes.*/ + size_t buffer_remaining(); + + /* Frees the read buffer.*/ + ~win_aiosocket(); +}; + +/* Functions related to IO buffers caches.*/ +extern void init_win_aio_buffers(unsigned int n_buffers); +extern void destroy_win_aio_buffers(); diff --git a/sql/transaction.cc b/sql/transaction.cc new file mode 100644 index 00000000..a6dbf57c --- /dev/null +++ b/sql/transaction.cc @@ -0,0 +1,728 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "transaction.h" +#include "debug_sync.h" // DEBUG_SYNC +#include "sql_acl.h" +#include "semisync_master.h" +#include +#include +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ + +/** + Helper: Tell tracker (if any) that transaction ended. +*/ +void trans_track_end_trx(THD *thd) +{ +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.end_trx(thd); +#endif //EMBEDDED_LIBRARY +} + + +/** + Helper: transaction ended, SET TRANSACTION one-shot variables + revert to session values. Let the transaction state tracker know. +*/ +void trans_reset_one_shot_chistics(THD *thd) +{ +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + { + thd->session_tracker.transaction_info.set_read_flags(thd, TX_READ_INHERIT); + thd->session_tracker.transaction_info.set_isol_level(thd, TX_ISOL_INHERIT); + } +#endif //EMBEDDED_LIBRARY + thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; + thd->tx_read_only= thd->variables.tx_read_only; +} + + +/* + Conditions under which the transaction state must not change + + @result TRUE Transaction can not commit + @result FALSE Transaction can commit +*/ + +static bool trans_check(THD *thd) +{ + DBUG_ENTER("trans_check"); + + /* + Always commit statement transaction before manipulating with + the normal one. + */ + DBUG_ASSERT(thd->transaction->stmt.is_empty()); + + if (unlikely(thd->in_sub_stmt)) + { + my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0)); + DBUG_RETURN(TRUE); + } + if (likely(!thd->transaction->xid_state.is_explicit_XA())) + DBUG_RETURN(FALSE); + + thd->transaction->xid_state.er_xaer_rmfail(); + DBUG_RETURN(TRUE); +} + + +/** + Begin a new transaction. + + @note Beginning a transaction implicitly commits any current + transaction and releases existing locks. + + @param thd Current thread + @param flags Transaction flags + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_begin(THD *thd, uint flags) +{ + int res= FALSE; + DBUG_ENTER("trans_begin"); + + if (trans_check(thd)) + DBUG_RETURN(TRUE); + + if (thd->locked_tables_list.unlock_locked_tables(thd)) + DBUG_RETURN(true); + + DBUG_ASSERT(!thd->locked_tables_mode); + + if (thd->in_multi_stmt_transaction_mode() || + (thd->variables.option_bits & OPTION_TABLE_LOCK)) + { + thd->variables.option_bits&= ~OPTION_TABLE_LOCK; + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + res= MY_TEST(ha_commit_trans(thd, TRUE)); +#ifdef WITH_WSREP + if (wsrep_thd_is_local(thd)) + { + res= res || wsrep_after_statement(thd); + } +#endif /* WITH_WSREP */ + } + + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX); + + /* + The following set should not be needed as transaction state should + already be reset. We should at some point change this to an assert. + */ + thd->transaction->all.reset(); + thd->has_waiter= false; + thd->waiting_on_group_commit= false; + thd->transaction->start_time.reset(thd); + + if (res) + DBUG_RETURN(TRUE); + + /* + Release transactional metadata locks only after the + transaction has been committed. + */ + thd->release_transactional_locks(); + + // The RO/RW options are mutually exclusive. + DBUG_ASSERT(!((flags & MYSQL_START_TRANS_OPT_READ_ONLY) && + (flags & MYSQL_START_TRANS_OPT_READ_WRITE))); + if (flags & MYSQL_START_TRANS_OPT_READ_ONLY) + { + thd->tx_read_only= true; +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.set_read_flags(thd, TX_READ_ONLY); +#endif //EMBEDDED_LIBRARY + } + else if (flags & MYSQL_START_TRANS_OPT_READ_WRITE) + { + /* + Explicitly starting a RW transaction when the server is in + read-only mode, is not allowed unless the user has SUPER priv. + Implicitly starting a RW transaction is allowed for backward + compatibility. + */ + const bool user_is_super= + MY_TEST(thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY); + if (opt_readonly && !user_is_super) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + DBUG_RETURN(true); + } + thd->tx_read_only= false; + /* + This flags that tx_read_only was set explicitly, rather than + just from the session's default. + */ +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.set_read_flags(thd, TX_READ_WRITE); +#endif //EMBEDDED_LIBRARY + } + +#ifdef WITH_WSREP + if (wsrep_thd_is_local(thd)) + { + if (wsrep_sync_wait(thd)) + DBUG_RETURN(TRUE); + if (!thd->tx_read_only && + wsrep_start_transaction(thd, thd->wsrep_next_trx_id())) + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ + + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + if (thd->tx_read_only) + thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY; + DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS")); + +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.add_trx_state(thd, TX_EXPLICIT); +#endif //EMBEDDED_LIBRARY + + /* ha_start_consistent_snapshot() relies on OPTION_BEGIN flag set. */ + if (flags & MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT) + { +#ifndef EMBEDDED_LIBRARY + if (thd->variables.session_track_transaction_info > TX_TRACK_NONE) + thd->session_tracker.transaction_info.add_trx_state(thd, TX_WITH_SNAPSHOT); +#endif //EMBEDDED_LIBRARY + res= ha_start_consistent_snapshot(thd); + } + /* + Register transaction start in performance schema if not done already. + We handle explicitly started transactions here, implicitly started + transactions (and single-statement transactions in autocommit=1 mode) + are handled in trans_register_ha(). + We can't handle explicit transactions in the same way as implicit + because we want to correctly attribute statements which follow + BEGIN but do not touch any transactional tables. + */ + if (thd->m_transaction_psi == NULL) + { + thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state, + NULL, 0, thd->tx_isolation, + thd->tx_read_only, false); + DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid"); + //gtid_set_performance_schema_values(thd); + } + + DBUG_RETURN(MY_TEST(res)); +} + + +/** + Commit the current transaction, making its changes permanent. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_commit(THD *thd) +{ + int res; + DBUG_ENTER("trans_commit"); + + if (trans_check(thd)) + DBUG_RETURN(TRUE); + + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + res= ha_commit_trans(thd, TRUE); + + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + + /* + if res is non-zero, then ha_commit_trans has rolled back the + transaction, so the hooks for rollback will be called. + */ +#ifdef HAVE_REPLICATION + if (res) + repl_semisync_master.wait_after_rollback(thd, FALSE); + else + repl_semisync_master.wait_after_commit(thd, FALSE); +#endif + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX); + thd->transaction->all.reset(); + thd->lex->start_transaction_opt= 0; + + /* The transaction should be marked as complete in P_S. */ + DBUG_ASSERT(thd->m_transaction_psi == NULL); + trans_track_end_trx(thd); + + DBUG_RETURN(MY_TEST(res)); +} + + +/** + Implicitly commit the current transaction. + + @note A implicit commit does not releases existing table locks. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_commit_implicit(THD *thd) +{ + bool res= FALSE; + DBUG_ENTER("trans_commit_implicit"); + + if (trans_check(thd)) + DBUG_RETURN(TRUE); + + if (thd->variables.option_bits & OPTION_GTID_BEGIN) + { + DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. " + "Master and slave will have different GTID values")); + } + + if (thd->in_multi_stmt_transaction_mode() || + (thd->variables.option_bits & OPTION_TABLE_LOCK)) + { + /* Safety if one did "drop table" on locked tables */ + if (!thd->locked_tables_mode) + thd->variables.option_bits&= ~OPTION_TABLE_LOCK; + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + res= MY_TEST(ha_commit_trans(thd, TRUE)); + } + + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX); + thd->transaction->all.reset(); + + /* The transaction should be marked as complete in P_S. */ + DBUG_ASSERT(thd->m_transaction_psi == NULL); + + /* + Upon implicit commit, reset the current transaction + isolation level and access mode. We do not care about + @@session.completion_type since it's documented + to not have any effect on implicit commit. + */ + trans_reset_one_shot_chistics(thd); + + trans_track_end_trx(thd); + + DBUG_RETURN(res); +} + + +/** + Rollback the current transaction, canceling its changes. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_rollback(THD *thd) +{ + int res; + DBUG_ENTER("trans_rollback"); + + if (trans_check(thd)) + DBUG_RETURN(TRUE); + + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + res= ha_rollback_trans(thd, TRUE); +#ifdef HAVE_REPLICATION + repl_semisync_master.wait_after_rollback(thd, FALSE); +#endif + /* Reset the binlog transaction marker */ + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX | + OPTION_GTID_BEGIN); + thd->transaction->all.reset(); + thd->lex->start_transaction_opt= 0; + + /* The transaction should be marked as complete in P_S. */ + DBUG_ASSERT(thd->m_transaction_psi == NULL); + + trans_track_end_trx(thd); + + DBUG_RETURN(MY_TEST(res)); +} + + +/** + Implicitly rollback the current transaction, typically + after deadlock was discovered. + + @param thd Current thread + + @retval False Success + @retval True Failure + + @note ha_rollback_low() which is indirectly called by this + function will mark XA transaction for rollback by + setting appropriate RM error status if there was + transaction rollback request. +*/ + +bool trans_rollback_implicit(THD *thd) +{ + int res; + DBUG_ENTER("trans_rollback_implict"); + + /* + Always commit/rollback statement transaction before manipulating + with the normal one. + Don't perform rollback in the middle of sub-statement, wait till + its end. + */ + DBUG_ASSERT(thd->transaction->stmt.is_empty() && !thd->in_sub_stmt); + + thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + res= ha_rollback_trans(thd, true); + /* + We don't reset OPTION_BEGIN flag below to simulate implicit start + of new transacton in @@autocommit=1 mode. This is necessary to + preserve backward compatibility. + */ + thd->variables.option_bits&= ~(OPTION_BINLOG_THIS_TRX); + thd->transaction->all.reset(); + + /* Rollback should clear transaction_rollback_request flag. */ + DBUG_ASSERT(!thd->transaction_rollback_request); + /* The transaction should be marked as complete in P_S. */ + DBUG_ASSERT(thd->m_transaction_psi == NULL); + + trans_track_end_trx(thd); + + DBUG_RETURN(MY_TEST(res)); +} + + +/** + Commit the single statement transaction. + + @note Note that if the autocommit is on, then the following call + inside InnoDB will commit or rollback the whole transaction + (= the statement). The autocommit mechanism built into InnoDB + is based on counting locks, but if the user has used LOCK + TABLES then that mechanism does not know to do the commit. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_commit_stmt(THD *thd) +{ + DBUG_ENTER("trans_commit_stmt"); + int res= FALSE; + /* + We currently don't invoke commit/rollback at end of + a sub-statement. In future, we perhaps should take + a savepoint for each nested statement, and release the + savepoint when statement has succeeded. + */ + DBUG_ASSERT(!(thd->in_sub_stmt)); + + thd->merge_unsafe_rollback_flags(); + + if (thd->transaction->stmt.ha_list) + { + res= ha_commit_trans(thd, FALSE); + if (! thd->in_active_multi_stmt_transaction()) + { + trans_reset_one_shot_chistics(thd); + } + } + + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + + /* + if res is non-zero, then ha_commit_trans has rolled back the + transaction, so the hooks for rollback will be called. + */ + if (res) + { +#ifdef HAVE_REPLICATION + repl_semisync_master.wait_after_rollback(thd, FALSE); +#endif + } + else + { +#ifdef HAVE_REPLICATION + repl_semisync_master.wait_after_commit(thd, FALSE); +#endif + } + + /* In autocommit=1 mode the transaction should be marked as complete in P_S */ + DBUG_ASSERT(thd->in_active_multi_stmt_transaction() || + thd->m_transaction_psi == NULL); + + thd->transaction->stmt.reset(); + + DBUG_RETURN(MY_TEST(res)); +} + + +/** + Rollback the single statement transaction. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ +bool trans_rollback_stmt(THD *thd) +{ + DBUG_ENTER("trans_rollback_stmt"); + + /* + We currently don't invoke commit/rollback at end of + a sub-statement. In future, we perhaps should take + a savepoint for each nested statement, and release the + savepoint when statement has succeeded. + */ + DBUG_ASSERT(! thd->in_sub_stmt); + + thd->merge_unsafe_rollback_flags(); + + if (thd->transaction->stmt.ha_list) + { + ha_rollback_trans(thd, FALSE); + if (! thd->in_active_multi_stmt_transaction()) + trans_reset_one_shot_chistics(thd); + } + +#ifdef HAVE_REPLICATION + repl_semisync_master.wait_after_rollback(thd, FALSE); +#endif + + /* In autocommit=1 mode the transaction should be marked as complete in P_S */ + DBUG_ASSERT(thd->in_active_multi_stmt_transaction() || + thd->m_transaction_psi == NULL); + + thd->transaction->stmt.reset(); + + DBUG_RETURN(FALSE); +} + +/* Find a named savepoint in the current transaction. */ +static SAVEPOINT ** +find_savepoint(THD *thd, LEX_CSTRING name) +{ + SAVEPOINT **sv= &thd->transaction->savepoints; + + while (*sv) + { + if (system_charset_info->strnncoll( + (uchar *) name.str, name.length, + (uchar *) (*sv)->name, (*sv)->length) == 0) + break; + sv= &(*sv)->prev; + } + + return sv; +} + + +/** + Set a named transaction savepoint. + + @param thd Current thread + @param name Savepoint name + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_savepoint(THD *thd, LEX_CSTRING name) +{ + SAVEPOINT **sv, *newsv; + DBUG_ENTER("trans_savepoint"); + + if (!(thd->in_multi_stmt_transaction_mode() || thd->in_sub_stmt) || + !opt_using_transactions) + DBUG_RETURN(FALSE); + + if (thd->transaction->xid_state.check_has_uncommitted_xa()) + DBUG_RETURN(TRUE); + + sv= find_savepoint(thd, name); + + if (*sv) /* old savepoint of the same name exists */ + { + newsv= *sv; + ha_release_savepoint(thd, *sv); + *sv= (*sv)->prev; + } + else if ((newsv= (SAVEPOINT *) alloc_root(&thd->transaction->mem_root, + savepoint_alloc_size)) == NULL) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(TRUE); + } + + newsv->name= strmake_root(&thd->transaction->mem_root, name.str, name.length); + newsv->length= (uint)name.length; + + /* + if we'll get an error here, don't add new savepoint to the list. + we'll lose a little bit of memory in transaction mem_root, but it'll + be free'd when transaction ends anyway + */ + if (unlikely(ha_savepoint(thd, newsv))) + DBUG_RETURN(TRUE); + + newsv->prev= thd->transaction->savepoints; + thd->transaction->savepoints= newsv; + + /* + Remember locks acquired before the savepoint was set. + They are used as a marker to only release locks acquired after + the setting of this savepoint. + Note: this works just fine if we're under LOCK TABLES, + since mdl_savepoint() is guaranteed to be beyond + the last locked table. This allows to release some + locks acquired during LOCK TABLES. + */ + newsv->mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + DBUG_RETURN(FALSE); +} + + +/** + Rollback a transaction to the named savepoint. + + @note Modifications that the current transaction made to + rows after the savepoint was set are undone in the + rollback. + + @note Savepoints that were set at a later time than the + named savepoint are deleted. + + @param thd Current thread + @param name Savepoint name + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_rollback_to_savepoint(THD *thd, LEX_CSTRING name) +{ + int res= FALSE; + SAVEPOINT *sv= *find_savepoint(thd, name); + DBUG_ENTER("trans_rollback_to_savepoint"); + + if (sv == NULL) + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "SAVEPOINT", name.str); + DBUG_RETURN(TRUE); + } + + if (thd->transaction->xid_state.check_has_uncommitted_xa()) + DBUG_RETURN(TRUE); + + if (ha_rollback_to_savepoint(thd, sv)) + res= TRUE; + else if (((thd->variables.option_bits & OPTION_BINLOG_THIS_TRX) || + thd->transaction->all.modified_non_trans_table) && + !thd->slave_thread) + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARNING_NOT_COMPLETE_ROLLBACK, + ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)); + + thd->transaction->savepoints= sv; + + if (res) + /* An error occurred during rollback; we cannot release any MDL */; + else if (thd->variables.sql_log_bin && + (WSREP_EMULATE_BINLOG_NNULL(thd) || mysql_bin_log.is_open())) + /* In some cases (such as with non-transactional tables) we may + choose to preserve events that were added after the SAVEPOINT, + delimiting them by SAVEPOINT and ROLLBACK TO SAVEPOINT statements. + Prematurely releasing MDL on such objects would break replication. */; + else if (ha_rollback_to_savepoint_can_release_mdl(thd)) + thd->mdl_context.rollback_to_savepoint(sv->mdl_savepoint); + + DBUG_RETURN(MY_TEST(res)); +} + + +/** + Remove the named savepoint from the set of savepoints of + the current transaction. + + @note No commit or rollback occurs. It is an error if the + savepoint does not exist. + + @param thd Current thread + @param name Savepoint name + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_release_savepoint(THD *thd, LEX_CSTRING name) +{ + int res= FALSE; + SAVEPOINT *sv= *find_savepoint(thd, name); + DBUG_ENTER("trans_release_savepoint"); + + if (sv == NULL) + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "SAVEPOINT", name.str); + DBUG_RETURN(TRUE); + } + + if (ha_release_savepoint(thd, sv)) + res= TRUE; + + thd->transaction->savepoints= sv->prev; + + DBUG_RETURN(MY_TEST(res)); +} diff --git a/sql/transaction.h b/sql/transaction.h new file mode 100644 index 00000000..fe0129fa --- /dev/null +++ b/sql/transaction.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef TRANSACTION_H +#define TRANSACTION_H + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include + +class THD; + +void trans_track_end_trx(THD *thd); + +bool trans_begin(THD *thd, uint flags= 0); +bool trans_commit(THD *thd); +bool trans_commit_implicit(THD *thd); +bool trans_rollback(THD *thd); +bool trans_rollback_implicit(THD *thd); + +bool trans_commit_stmt(THD *thd); +bool trans_rollback_stmt(THD *thd); + +bool trans_savepoint(THD *thd, LEX_CSTRING name); +bool trans_rollback_to_savepoint(THD *thd, LEX_CSTRING name); +bool trans_release_savepoint(THD *thd, LEX_CSTRING name); + +void trans_reset_one_shot_chistics(THD *thd); + +#endif /* TRANSACTION_H */ diff --git a/sql/tzfile.h b/sql/tzfile.h new file mode 100644 index 00000000..aa030864 --- /dev/null +++ b/sql/tzfile.h @@ -0,0 +1,142 @@ +#ifndef TZFILE_INCLUDED +#define TZFILE_INCLUDED + +/* Copyright (c) 2004, 2006, 2007 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This file is based on public domain code from ftp://elsie.ncih.nist.gov/ + Initial source code is in the public domain, so clarified as of + 1996-06-05 by Arthur David Olson (arthur_david_olson@nih.gov). +*/ + +/* + Information about time zone files. +*/ + +#ifndef TZDIR +#define TZDIR "/usr/share/zoneinfo" /* Time zone object file directory */ +#endif /* !defined TZDIR */ + +/* + Each file begins with. . . +*/ + +#define TZ_MAGIC "TZif" + +struct tzhead { + uchar tzh_magic[4]; /* TZ_MAGIC */ + uchar tzh_reserved[16]; /* reserved for future use */ + uchar tzh_ttisgmtcnt[4]; /* coded number of trans. time flags */ + uchar tzh_ttisstdcnt[4]; /* coded number of trans. time flags */ + uchar tzh_leapcnt[4]; /* coded number of leap seconds */ + uchar tzh_timecnt[4]; /* coded number of transition times */ + uchar tzh_typecnt[4]; /* coded number of local time types */ + uchar tzh_charcnt[4]; /* coded number of abbr. chars */ +}; + +/* + . . .followed by. . . + + tzh_timecnt (char [4])s coded transition times a la time(2) + tzh_timecnt (unsigned char)s types of local time starting at above + tzh_typecnt repetitions of + one (char [4]) coded UTC offset in seconds + one (unsigned char) used to set tm_isdst + one (unsigned char) that's an abbreviation list index + tzh_charcnt (char)s '\0'-terminated zone abbreviations + tzh_leapcnt repetitions of + one (char [4]) coded leap second transition times + one (char [4]) total correction after above + tzh_ttisstdcnt (char)s indexed by type; if TRUE, transition + time is standard time, if FALSE, + transition time is wall clock time + if absent, transition times are + assumed to be wall clock time + tzh_ttisgmtcnt (char)s indexed by type; if TRUE, transition + time is UTC, if FALSE, + transition time is local time + if absent, transition times are + assumed to be local time +*/ + +/* + In the current implementation, we refuse to deal with files that + exceed any of the limits below. +*/ + +#ifndef TZ_MAX_TIMES +/* + The TZ_MAX_TIMES value below is enough to handle a bit more than a + year's worth of solar time (corrected daily to the nearest second) or + 138 years of Pacific Presidential Election time + (where there are three time zone transitions every fourth year). +*/ +#define TZ_MAX_TIMES 370 +#endif /* !defined TZ_MAX_TIMES */ + +#ifndef TZ_MAX_TYPES +#ifdef SOLAR +#define TZ_MAX_TYPES 256 /* Limited by what (unsigned char)'s can hold */ +#else +/* + Must be at least 14 for Europe/Riga as of Jan 12 1995, + as noted by Earl Chew . +*/ +#define TZ_MAX_TYPES 20 /* Maximum number of local time types */ +#endif /* defined SOLAR */ +#endif /* !defined TZ_MAX_TYPES */ + +#ifndef TZ_MAX_CHARS +#define TZ_MAX_CHARS 50 /* Maximum number of abbreviation characters */ + /* (limited by what unsigned chars can hold) */ +#endif /* !defined TZ_MAX_CHARS */ + +#ifndef TZ_MAX_LEAPS +#define TZ_MAX_LEAPS 50 /* Maximum number of leap second corrections */ +#endif /* !defined TZ_MAX_LEAPS */ + +#ifndef TZ_MAX_REV_RANGES +#ifdef SOLAR +/* Solar (Asia/RiyadhXX) zones need significantly bigger TZ_MAX_REV_RANGES */ +#define TZ_MAX_REV_RANGES (TZ_MAX_TIMES*2+TZ_MAX_LEAPS*2+2) +#else +#define TZ_MAX_REV_RANGES (TZ_MAX_TIMES+TZ_MAX_LEAPS+2) +#endif +#endif + +#define SECS_PER_MIN 60 +#define MINS_PER_HOUR 60 +#define HOURS_PER_DAY 24 +#define DAYS_PER_WEEK 7 +#define DAYS_PER_NYEAR 365 +#define DAYS_PER_LYEAR 366 +#define SECS_PER_HOUR (SECS_PER_MIN * MINS_PER_HOUR) +#define SECS_PER_DAY ((long) SECS_PER_HOUR * HOURS_PER_DAY) +#define MONS_PER_YEAR 12 + +#define TM_YEAR_BASE 1900 + +#define EPOCH_YEAR 1970 + +/* + Accurate only for the past couple of centuries, + that will probably do. +*/ + +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + +#endif diff --git a/sql/tztime.cc b/sql/tztime.cc new file mode 100644 index 00000000..8ddb9f0e --- /dev/null +++ b/sql/tztime.cc @@ -0,0 +1,3094 @@ +/* + Copyright (c) 2004, 2010, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Most of the following code and structures were derived from + public domain code from ftp://elsie.nci.nih.gov/pub + (We will refer to this code as to elsie-code further.) +*/ + +/* + We should not include sql_priv.h in mysql_tzinfo_to_sql utility since + it creates unsolved link dependencies on some platforms. +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#if !defined(TZINFO2SQL) && !defined(TESTTIME) +#include "sql_priv.h" +#include "unireg.h" +#include "sql_time.h" // localtime_to_TIME +#include "sql_base.h" // open_system_tables_for_read, + // close_system_tables +#else +#include +#include +#include +#include +#endif + +#include "tztime.h" +#include "tzfile.h" +#include +#include +#include +#include "lock.h" // MYSQL_LOCK_IGNORE_FLUSH, + // MYSQL_LOCK_IGNORE_TIMEOUT + +/* + Now we don't use abbreviations in server but we will do this in future. +*/ +#if defined(TZINFO2SQL) || defined(TESTTIME) +#define ABBR_ARE_USED +#else +#if !defined(DBUG_OFF) +/* Let use abbreviations for debug purposes */ +#undef ABBR_ARE_USED +#define ABBR_ARE_USED +#endif /* !defined(DBUG_OFF) */ +#endif /* defined(TZINFO2SQL) || defined(TESTTIME) */ + +#define PROGRAM_VERSION "1.1" + +/* Structure describing local time type (e.g. Moscow summer time (MSD)) */ +typedef struct ttinfo +{ + long tt_gmtoff; // Offset from UTC in seconds + uint tt_isdst; // Is daylight saving time or not. Used to set tm_isdst +#ifdef ABBR_ARE_USED + uint tt_abbrind; // Index of start of abbreviation for this time type. +#endif + /* + We don't use tt_ttisstd and tt_ttisgmt members of original elsie-code + struct since we don't support POSIX-style TZ descriptions in variables. + */ +} TRAN_TYPE_INFO; + +/* Structure describing leap-second corrections. */ +typedef struct lsinfo +{ + my_time_t ls_trans; // Transition time + long ls_corr; // Correction to apply +} LS_INFO; + +/* + Structure with information describing ranges of my_time_t shifted to local + time (my_time_t + offset). Used for local MYSQL_TIME -> my_time_t conversion. + See comments for TIME_to_gmt_sec() for more info. +*/ +typedef struct revtinfo +{ + long rt_offset; // Offset of local time from UTC in seconds + uint rt_type; // Type of period 0 - Normal period. 1 - Spring time-gap +} REVT_INFO; + +#ifdef TZNAME_MAX +#define MY_TZNAME_MAX TZNAME_MAX +#endif +#ifndef TZNAME_MAX +#define MY_TZNAME_MAX 255 +#endif + +/* + Structure which fully describes time zone which is + described in our db or in zoneinfo files. +*/ +typedef struct st_time_zone_info +{ + uint leapcnt; // Number of leap-second corrections + uint timecnt; // Number of transitions between time types + uint typecnt; // Number of local time types + uint charcnt; // Number of characters used for abbreviations + uint revcnt; // Number of transition descr. for TIME->my_time_t conversion + /* The following are dynamical arrays are allocated in MEM_ROOT */ + my_time_t *ats; // Times of transitions between time types + uchar *types; // Local time types for transitions + TRAN_TYPE_INFO *ttis; // Local time types descriptions +#ifdef ABBR_ARE_USED + /* Storage for local time types abbreviations. They are stored as ASCIIZ */ + char *chars; +#endif + /* + Leap seconds corrections descriptions, this array is shared by + all time zones who use leap seconds. + */ + LS_INFO *lsis; + /* + Starting points and descriptions of shifted my_time_t (my_time_t + offset) + ranges on which shifted my_time_t -> my_time_t mapping is linear or undefined. + Used for tm -> my_time_t conversion. + */ + my_time_t *revts; + REVT_INFO *revtis; + /* + Time type which is used for times smaller than first transition or if + there are no transitions at all. + */ + TRAN_TYPE_INFO *fallback_tti; + +} TIME_ZONE_INFO; + + +static my_bool prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage); + +my_bool opt_leap, opt_verbose, opt_skip_write_binlog; + +#if defined(TZINFO2SQL) || defined(TESTTIME) + +/* + Load time zone description from zoneinfo (TZinfo) file. + + SYNOPSIS + tz_load() + name - path to zoneinfo file + sp - TIME_ZONE_INFO structure to fill + + RETURN VALUES + 0 - Ok + 1 - Error +*/ +static my_bool +tz_load(const char *name, TIME_ZONE_INFO *sp, MEM_ROOT *storage) +{ + uchar *p; + ssize_t read_from_file; + uint i; + MYSQL_FILE *file; + + if (!(file= mysql_file_fopen(0, name, O_RDONLY|O_BINARY, MYF(MY_WME)))) + return 1; + { + union + { + struct tzhead tzhead; + uchar buf[sizeof(struct tzhead) + sizeof(my_time_t) * TZ_MAX_TIMES + + TZ_MAX_TIMES + sizeof(TRAN_TYPE_INFO) * TZ_MAX_TYPES + +#ifdef ABBR_ARE_USED + MY_MAX(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1))) + +#endif + sizeof(LS_INFO) * TZ_MAX_LEAPS]; + } u; + uint ttisstdcnt; + uint ttisgmtcnt; + char *tzinfo_buf; + + read_from_file= (ssize_t)mysql_file_fread(file, u.buf, sizeof(u.buf), MYF(MY_WME)); + + if (mysql_file_fclose(file, MYF(MY_WME)) != 0) + return 1; + + if (read_from_file < (int)sizeof(struct tzhead)) + return 1; + + ttisstdcnt= int4net(u.tzhead.tzh_ttisgmtcnt); + ttisgmtcnt= int4net(u.tzhead.tzh_ttisstdcnt); + sp->leapcnt= int4net(u.tzhead.tzh_leapcnt); + sp->timecnt= int4net(u.tzhead.tzh_timecnt); + sp->typecnt= int4net(u.tzhead.tzh_typecnt); + sp->charcnt= int4net(u.tzhead.tzh_charcnt); + p= u.tzhead.tzh_charcnt + sizeof(u.tzhead.tzh_charcnt); + if (sp->leapcnt > TZ_MAX_LEAPS || + sp->typecnt == 0 || sp->typecnt > TZ_MAX_TYPES || + sp->timecnt > TZ_MAX_TIMES || + sp->charcnt > TZ_MAX_CHARS || + (ttisstdcnt != sp->typecnt && ttisstdcnt != 0) || + (ttisgmtcnt != sp->typecnt && ttisgmtcnt != 0)) + return 1; + if ((uint)(read_from_file - (p - u.buf)) < + sp->timecnt * 4 + /* ats */ + sp->timecnt + /* types */ + sp->typecnt * (4 + 2) + /* ttinfos */ + sp->charcnt + /* chars */ + sp->leapcnt * (4 + 4) + /* lsinfos */ + ttisstdcnt + /* ttisstds */ + ttisgmtcnt) /* ttisgmts */ + return 1; + + if (!(tzinfo_buf= (char *)alloc_root(storage, + ALIGN_SIZE(sp->timecnt * + sizeof(my_time_t)) + + ALIGN_SIZE(sp->timecnt) + + ALIGN_SIZE(sp->typecnt * + sizeof(TRAN_TYPE_INFO)) + +#ifdef ABBR_ARE_USED + ALIGN_SIZE(sp->charcnt+1) + +#endif + sp->leapcnt * sizeof(LS_INFO)))) + return 1; + + sp->ats= (my_time_t *)tzinfo_buf; + tzinfo_buf+= ALIGN_SIZE(sp->timecnt * sizeof(my_time_t)); + sp->types= (uchar *)tzinfo_buf; + tzinfo_buf+= ALIGN_SIZE(sp->timecnt); + sp->ttis= (TRAN_TYPE_INFO *)tzinfo_buf; + tzinfo_buf+= ALIGN_SIZE(sp->typecnt * sizeof(TRAN_TYPE_INFO)); +#ifdef ABBR_ARE_USED + sp->chars= tzinfo_buf; + tzinfo_buf+= ALIGN_SIZE(sp->charcnt+1); +#endif + sp->lsis= (LS_INFO *)tzinfo_buf; + + for (i= 0; i < sp->timecnt; i++, p+= 4) + sp->ats[i]= int4net(p); + + for (i= 0; i < sp->timecnt; i++) + { + sp->types[i]= (uchar) *p++; + if (sp->types[i] >= sp->typecnt) + return 1; + } + for (i= 0; i < sp->typecnt; i++) + { + TRAN_TYPE_INFO * ttisp; + + ttisp= &sp->ttis[i]; + ttisp->tt_gmtoff= int4net(p); + p+= 4; + ttisp->tt_isdst= (uchar) *p++; + if (ttisp->tt_isdst != 0 && ttisp->tt_isdst != 1) + return 1; + ttisp->tt_abbrind= (uchar) *p++; + if (ttisp->tt_abbrind > sp->charcnt) + return 1; + } + for (i= 0; i < sp->charcnt; i++) + sp->chars[i]= *p++; + sp->chars[i]= '\0'; /* ensure '\0' at end */ + for (i= 0; i < sp->leapcnt; i++) + { + LS_INFO *lsisp; + + lsisp= &sp->lsis[i]; + lsisp->ls_trans= int4net(p); + p+= 4; + lsisp->ls_corr= int4net(p); + p+= 4; + } + /* + Since we don't support POSIX style TZ definitions in variables we + don't read further like glibc or elsie code. + */ + } + + return prepare_tz_info(sp, storage); +} +#endif /* defined(TZINFO2SQL) || defined(TESTTIME) */ + + +/* + Finish preparation of time zone description for use in TIME_to_gmt_sec() + and gmt_sec_to_TIME() functions. + + SYNOPSIS + prepare_tz_info() + sp - pointer to time zone description + storage - pointer to MEM_ROOT where arrays for map allocated + + DESCRIPTION + First task of this function is to find fallback time type which will + be used if there are no transitions or we have moment in time before + any transitions. + Second task is to build "shifted my_time_t" -> my_time_t map used in + MYSQL_TIME -> my_time_t conversion. + Note: See description of TIME_to_gmt_sec() function first. + In order to perform MYSQL_TIME -> my_time_t conversion we need to build table + which defines "shifted by tz offset and leap seconds my_time_t" -> + my_time_t function which is almost the same (except ranges of ambiguity) + as reverse function to piecewise linear function used for my_time_t -> + "shifted my_time_t" conversion and which is also specified as table in + zoneinfo file or in our db (It is specified as start of time type ranges + and time type offsets). So basic idea is very simple - let us iterate + through my_time_t space from one point of discontinuity of my_time_t -> + "shifted my_time_t" function to another and build our approximation of + reverse function. (Actually we iterate through ranges on which + my_time_t -> "shifted my_time_t" is linear function). + + RETURN VALUES + 0 Ok + 1 Error +*/ +static my_bool +prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage) +{ + my_time_t cur_t= MY_TIME_T_MIN; + my_time_t cur_l, end_t, UNINIT_VAR(end_l); + my_time_t cur_max_seen_l= MY_TIME_T_MIN; + long cur_offset, cur_corr, cur_off_and_corr; + uint next_trans_idx, next_leap_idx; + uint i; + /* + Temporary arrays where we will store tables. Needed because + we don't know table sizes ahead. (Well we can estimate their + upper bound but this will take extra space.) + */ + my_time_t revts[TZ_MAX_REV_RANGES]; + REVT_INFO revtis[TZ_MAX_REV_RANGES]; + + /* + Let us setup fallback time type which will be used if we have not any + transitions or if we have moment of time before first transition. + We will find first non-DST local time type and use it (or use first + local time type if all of them are DST types). + */ + for (i= 0; i < sp->typecnt && sp->ttis[i].tt_isdst; i++) + /* no-op */ ; + if (i == sp->typecnt) + i= 0; + sp->fallback_tti= &(sp->ttis[i]); + + + /* + Let us build shifted my_time_t -> my_time_t map. + */ + sp->revcnt= 0; + + /* Let us find initial offset */ + if (sp->timecnt == 0 || cur_t < sp->ats[0]) + { + /* + If we have not any transitions or t is before first transition we are using + already found fallback time type which index is already in i. + */ + next_trans_idx= 0; + } + else + { + /* cur_t == sp->ats[0] so we found transition */ + i= sp->types[0]; + next_trans_idx= 1; + } + + cur_offset= sp->ttis[i].tt_gmtoff; + + + /* let us find leap correction... unprobable, but... */ + for (next_leap_idx= 0; next_leap_idx < sp->leapcnt && + cur_t >= sp->lsis[next_leap_idx].ls_trans; + ++next_leap_idx) + continue; + + if (next_leap_idx > 0) + cur_corr= sp->lsis[next_leap_idx - 1].ls_corr; + else + cur_corr= 0; + + /* Iterate trough t space */ + while (sp->revcnt < TZ_MAX_REV_RANGES - 1) + { + cur_off_and_corr= cur_offset - cur_corr; + + /* + We assuming that cur_t could be only overflowed downwards, + we also assume that end_t won't be overflowed in this case. + */ + if (cur_off_and_corr < 0 && + cur_t < MY_TIME_T_MIN - cur_off_and_corr) + cur_t= MY_TIME_T_MIN - cur_off_and_corr; + + cur_l= cur_t + cur_off_and_corr; + + /* + Let us choose end_t as point before next time type change or leap + second correction. + */ + end_t= MY_MIN((next_trans_idx < sp->timecnt) ? sp->ats[next_trans_idx] - 1: + MY_TIME_T_MAX, + (next_leap_idx < sp->leapcnt) ? + sp->lsis[next_leap_idx].ls_trans - 1: MY_TIME_T_MAX); + /* + again assuming that end_t can be overlowed only in positive side + we also assume that end_t won't be overflowed in this case. + */ + if (cur_off_and_corr > 0 && + end_t > MY_TIME_T_MAX - cur_off_and_corr) + end_t= MY_TIME_T_MAX - cur_off_and_corr; + + end_l= end_t + cur_off_and_corr; + + + if (end_l > cur_max_seen_l) + { + /* We want special handling in the case of first range */ + if (cur_max_seen_l == MY_TIME_T_MIN) + { + revts[sp->revcnt]= cur_l; + revtis[sp->revcnt].rt_offset= cur_off_and_corr; + revtis[sp->revcnt].rt_type= 0; + sp->revcnt++; + cur_max_seen_l= end_l; + } + else + { + if (cur_l > cur_max_seen_l + 1) + { + /* We have a spring time-gap and we are not at the first range */ + revts[sp->revcnt]= cur_max_seen_l + 1; + revtis[sp->revcnt].rt_offset= revtis[sp->revcnt-1].rt_offset; + revtis[sp->revcnt].rt_type= 1; + sp->revcnt++; + if (sp->revcnt == TZ_MAX_TIMES + TZ_MAX_LEAPS + 1) + break; /* That was too much */ + cur_max_seen_l= cur_l - 1; + } + + /* Assume here end_l > cur_max_seen_l (because end_l>=cur_l) */ + + revts[sp->revcnt]= cur_max_seen_l + 1; + revtis[sp->revcnt].rt_offset= cur_off_and_corr; + revtis[sp->revcnt].rt_type= 0; + sp->revcnt++; + cur_max_seen_l= end_l; + } + } + + if (end_t == MY_TIME_T_MAX || + ((cur_off_and_corr > 0) && + (end_t >= MY_TIME_T_MAX - cur_off_and_corr))) + /* end of t space */ + break; + + cur_t= end_t + 1; + + /* + Let us find new offset and correction. Because of our choice of end_t + cur_t can only be point where new time type starts or/and leap + correction is performed. + */ + if (sp->timecnt != 0 && cur_t >= sp->ats[0]) /* else reuse old offset */ + if (next_trans_idx < sp->timecnt && + cur_t == sp->ats[next_trans_idx]) + { + /* We are at offset point */ + cur_offset= sp->ttis[sp->types[next_trans_idx]].tt_gmtoff; + ++next_trans_idx; + } + + if (next_leap_idx < sp->leapcnt && + cur_t == sp->lsis[next_leap_idx].ls_trans) + { + /* we are at leap point */ + cur_corr= sp->lsis[next_leap_idx].ls_corr; + ++next_leap_idx; + } + } + + /* check if we have had enough space */ + if (sp->revcnt == TZ_MAX_REV_RANGES - 1) + return 1; + + /* set maximum end_l as finisher */ + revts[sp->revcnt]= end_l; + + /* Allocate arrays of proper size in sp and copy result there */ + if (!(sp->revts= (my_time_t *)alloc_root(storage, + sizeof(my_time_t) * (sp->revcnt + 1))) || + !(sp->revtis= (REVT_INFO *)alloc_root(storage, + sizeof(REVT_INFO) * sp->revcnt))) + return 1; + + memcpy(sp->revts, revts, sizeof(my_time_t) * (sp->revcnt + 1)); + memcpy(sp->revtis, revtis, sizeof(REVT_INFO) * sp->revcnt); + + return 0; +} + + +#if !defined(TZINFO2SQL) + +static const uint mon_lengths[2][MONS_PER_YEAR]= +{ + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, + { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } +}; + +static const uint mon_starts[2][MONS_PER_YEAR]= +{ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }, + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 } +}; + +static const uint year_lengths[2]= +{ + DAYS_PER_NYEAR, DAYS_PER_LYEAR +}; + +#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400) + + +/* + Converts time from my_time_t representation (seconds in UTC since Epoch) + to broken down representation using given local time zone offset. + + SYNOPSIS + sec_to_TIME() + tmp - pointer to structure for broken down representation + t - my_time_t value to be converted + offset - local time zone offset + + DESCRIPTION + Convert my_time_t with offset to MYSQL_TIME struct. Differs from timesub + (from elsie code) because doesn't contain any leap correction and + TM_GMTOFF and is_dst setting and contains some MySQL specific + initialization. Funny but with removing of these we almost have + glibc's offtime function. +*/ +static void +sec_to_TIME(MYSQL_TIME * tmp, my_time_t t, long offset) +{ + long days; + long rem; + int y; + int yleap; + const uint *ip; + + days= (long) (t / SECS_PER_DAY); + rem= (long) (t % SECS_PER_DAY); + + /* + We do this as separate step after dividing t, because this + allows us handle times near my_time_t bounds without overflows. + */ + rem+= offset; + while (rem < 0) + { + rem+= SECS_PER_DAY; + days--; + } + while (rem >= SECS_PER_DAY) + { + rem -= SECS_PER_DAY; + days++; + } + tmp->hour= (uint)(rem / SECS_PER_HOUR); + rem= rem % SECS_PER_HOUR; + tmp->minute= (uint)(rem / SECS_PER_MIN); + /* + A positive leap second requires a special + representation. This uses "... ??:59:60" et seq. + */ + tmp->second= (uint)(rem % SECS_PER_MIN); + + y= EPOCH_YEAR; + while (days < 0 || days >= (long)year_lengths[yleap= isleap(y)]) + { + int newy; + + newy= y + days / DAYS_PER_NYEAR; + if (days < 0) + newy--; + days-= (newy - y) * DAYS_PER_NYEAR + + LEAPS_THRU_END_OF(newy - 1) - + LEAPS_THRU_END_OF(y - 1); + y= newy; + } + tmp->year= y; + + ip= mon_lengths[yleap]; + for (tmp->month= 0; days >= (long) ip[tmp->month]; tmp->month++) + days= days - (long) ip[tmp->month]; + tmp->month++; + tmp->day= (uint)(days + 1); + + /* filling MySQL specific MYSQL_TIME members */ + tmp->neg= 0; tmp->second_part= 0; + tmp->time_type= MYSQL_TIMESTAMP_DATETIME; +} + + +/* + Find time range which contains given my_time_t value + + SYNOPSIS + find_time_range() + t - my_time_t value for which we looking for range + range_boundaries - sorted array of range starts. + higher_bound - number of ranges + + DESCRIPTION + Performs binary search for range which contains given my_time_t value. + It has sense if number of ranges is greater than zero and my_time_t value + is greater or equal than beginning of first range. It also assumes that + t belongs to some range specified or end of last is MY_TIME_T_MAX. + + With this localtime_r on real data may takes less time than with linear + search (I've seen 30% speed up). + + RETURN VALUE + Index of range to which t belongs +*/ +static uint +find_time_range(my_time_t t, const my_time_t *range_boundaries, + uint higher_bound) +{ + uint i, lower_bound= 0; + + /* + Function will work without this assertion but result would be meaningless. + */ + DBUG_ASSERT(higher_bound > 0 && t >= range_boundaries[0]); + + /* + Do binary search for minimal interval which contain t. We preserve: + range_boundaries[lower_bound] <= t < range_boundaries[higher_bound] + invariant and decrease this higher_bound - lower_bound gap twice + times on each step. + */ + + while (higher_bound - lower_bound > 1) + { + i= (lower_bound + higher_bound) >> 1; + if (range_boundaries[i] <= t) + lower_bound= i; + else + higher_bound= i; + } + return lower_bound; +} + +/* + Find local time transition for given my_time_t. + + SYNOPSIS + find_transition_type() + t - my_time_t value to be converted + sp - pointer to struct with time zone description + + RETURN VALUE + Pointer to structure in time zone description describing + local time type for given my_time_t. +*/ +static +const TRAN_TYPE_INFO * +find_transition_type(my_time_t t, const TIME_ZONE_INFO *sp) +{ + if (unlikely(sp->timecnt == 0 || t < sp->ats[0])) + { + /* + If we have not any transitions or t is before first transition let + us use fallback time type. + */ + return sp->fallback_tti; + } + + /* + Do binary search for minimal interval between transitions which + contain t. With this localtime_r on real data may takes less + time than with linear search (I've seen 30% speed up). + */ + return &(sp->ttis[sp->types[find_time_range(t, sp->ats, sp->timecnt)]]); +} + + +/* + Converts time in my_time_t representation (seconds in UTC since Epoch) to + broken down MYSQL_TIME representation in local time zone. + + SYNOPSIS + gmt_sec_to_TIME() + tmp - pointer to structure for broken down represenatation + sec_in_utc - my_time_t value to be converted + sp - pointer to struct with time zone description + + TODO + We can improve this function by creating joined array of transitions and + leap corrections. This will require adding extra field to TRAN_TYPE_INFO + for storing number of "extra" seconds to minute occurred due to correction + (60th and 61st second, look how we calculate them as "hit" in this + function). + Under realistic assumptions about frequency of transitions the same array + can be used fot MYSQL_TIME -> my_time_t conversion. For this we need to + implement tweaked binary search which will take into account that some + MYSQL_TIME has two matching my_time_t ranges and some of them have none. +*/ +static void +gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t sec_in_utc, const TIME_ZONE_INFO *sp) +{ + const TRAN_TYPE_INFO *ttisp; + const LS_INFO *lp; + long corr= 0; + int hit= 0; + int i; + + /* + Find proper transition (and its local time type) for our sec_in_utc value. + Funny but again by separating this step in function we receive code + which very close to glibc's code. No wonder since they obviously use + the same base and all steps are sensible. + */ + ttisp= find_transition_type(sec_in_utc, sp); + + /* + Let us find leap correction for our sec_in_utc value and number of extra + secs to add to this minute. + This loop is rarely used because most users will use time zones without + leap seconds, and even in case when we have such time zone there won't + be many iterations (we have about 22 corrections at this moment (2004)). + */ + for ( i= sp->leapcnt; i-- > 0; ) + { + lp= &sp->lsis[i]; + if (sec_in_utc >= lp->ls_trans) + { + if (sec_in_utc == lp->ls_trans) + { + hit= ((i == 0 && lp->ls_corr > 0) || + lp->ls_corr > sp->lsis[i - 1].ls_corr); + if (hit) + { + while (i > 0 && + sp->lsis[i].ls_trans == sp->lsis[i - 1].ls_trans + 1 && + sp->lsis[i].ls_corr == sp->lsis[i - 1].ls_corr + 1) + { + hit++; + i--; + } + } + } + corr= lp->ls_corr; + break; + } + } + + sec_to_TIME(tmp, sec_in_utc, ttisp->tt_gmtoff - corr); + + tmp->second+= hit; +} + + +/* + Converts local time in broken down representation to local + time zone analog of my_time_t represenation. + + SYNOPSIS + sec_since_epoch() + year, mon, mday, hour, min, sec - broken down representation. + + DESCRIPTION + Converts time in broken down representation to my_time_t representation + ignoring time zone. Note that we cannot convert back some valid _local_ + times near ends of my_time_t range because of my_time_t overflow. But we + ignore this fact now since MySQL will never pass such argument. + + RETURN VALUE + Seconds since epoch time representation. +*/ +static my_time_t +sec_since_epoch(int year, int mon, int mday, int hour, int min ,int sec) +{ + /* Guard against my_time_t overflow(on system with 32 bit my_time_t) */ + DBUG_ASSERT(!(year == TIMESTAMP_MAX_YEAR && mon == 1 && mday > 17)); +#ifndef WE_WANT_TO_HANDLE_UNORMALIZED_DATES + /* + It turns out that only whenever month is normalized or unnormalized + plays role. + */ + DBUG_ASSERT(mon > 0 && mon < 13); + long days= year * DAYS_PER_NYEAR - EPOCH_YEAR * DAYS_PER_NYEAR + + LEAPS_THRU_END_OF(year - 1) - + LEAPS_THRU_END_OF(EPOCH_YEAR - 1); + days+= mon_starts[isleap(year)][mon - 1]; +#else + long norm_month= (mon - 1) % MONS_PER_YEAR; + long a_year= year + (mon - 1)/MONS_PER_YEAR - (int)(norm_month < 0); + long days= a_year * DAYS_PER_NYEAR - EPOCH_YEAR * DAYS_PER_NYEAR + + LEAPS_THRU_END_OF(a_year - 1) - + LEAPS_THRU_END_OF(EPOCH_YEAR - 1); + days+= mon_starts[isleap(a_year)] + [norm_month + (norm_month < 0 ? MONS_PER_YEAR : 0)]; +#endif + days+= mday - 1; + + return ((days * HOURS_PER_DAY + hour) * MINS_PER_HOUR + min) * + SECS_PER_MIN + sec; +} + +/* + Converts local time in broken down MYSQL_TIME representation to my_time_t + representation. + + SYNOPSIS + TIME_to_gmt_sec() + t - pointer to structure for broken down represenatation + sp - pointer to struct with time zone description + error_code - 0, if the conversion was successful; + ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value + which is out of TIMESTAMP range; + ER_WARN_INVALID_TIMESTAMP, if t represents value which + doesn't exists (falls into the spring time-gap). + + DESCRIPTION + This is mktime analog for MySQL. It is essentially different + from mktime (or hypotetical my_mktime) because: + - It has no idea about tm_isdst member so if it + has two answers it will give the smaller one + - If we are in spring time gap then it will return + beginning of the gap + - It can give wrong results near the ends of my_time_t due to + overflows, but we are safe since in MySQL we will never + call this function for such dates (its restriction for year + between 1970 and 2038 gives us several days of reserve). + - By default it doesn't support un-normalized input. But if + sec_since_epoch() function supports un-normalized dates + then this function should handle un-normalized input right, + altough it won't normalize structure TIME. + + Traditional approach to problem of conversion from broken down + representation to time_t is iterative. Both elsie's and glibc + implementation try to guess what time_t value should correspond to + this broken-down value. They perform localtime_r function on their + guessed value and then calculate the difference and try to improve + their guess. Elsie's code guesses time_t value in bit by bit manner, + Glibc's code tries to add difference between broken-down value + corresponding to guess and target broken-down value to current guess. + It also uses caching of last found correction... So Glibc's approach + is essentially faster but introduces some undetermenism (in case if + is_dst member of broken-down representation (tm struct) is not known + and we have two possible answers). + + We use completely different approach. It is better since it is both + faster than iterative implementations and fully determenistic. If you + look at my_time_t to MYSQL_TIME conversion then you'll find that it consist + of two steps: + The first is calculating shifted my_time_t value and the second - TIME + calculation from shifted my_time_t value (well it is a bit simplified + picture). The part in which we are interested in is my_time_t -> shifted + my_time_t conversion. It is piecewise linear function which is defined + by combination of transition times as break points and times offset + as changing function parameter. The possible inverse function for this + converison would be ambiguos but with MySQL's restrictions we can use + some function which is the same as inverse function on unambigiuos + ranges and coincides with one of branches of inverse function in + other ranges. Thus we just need to build table which will determine + this shifted my_time_t -> my_time_t conversion similar to existing + (my_time_t -> shifted my_time_t table). We do this in + prepare_tz_info function. + + TODO + If we can even more improve this function. For doing this we will need to + build joined map of transitions and leap corrections for gmt_sec_to_TIME() + function (similar to revts/revtis). Under realistic assumptions about + frequency of transitions we can use the same array for TIME_to_gmt_sec(). + We need to implement special version of binary search for this. Such step + will be beneficial to CPU cache since we will decrease data-set used for + conversion twice. + + RETURN VALUE + Seconds in UTC since Epoch. + 0 in case of error. +*/ + +static my_time_t +TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp, uint *error_code) +{ + my_time_t local_t; + uint saved_seconds; + uint i; + int shift= 0; + DBUG_ENTER("TIME_to_gmt_sec"); + + if (!validate_timestamp_range(t)) + { + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + DBUG_RETURN(0); + } + + *error_code= 0; + + /* We need this for correct leap seconds handling */ + if (t->second < SECS_PER_MIN) + saved_seconds= 0; + else + saved_seconds= t->second; + + /* + NOTE: to convert full my_time_t range we do a shift of the + boundary dates here to avoid overflow of my_time_t. + We use alike approach in my_system_gmt_sec(). + + However in that function we also have to take into account + overflow near 0 on some platforms. That's because my_system_gmt_sec + uses localtime_r(), which doesn't work with negative values correctly + on platforms with unsigned time_t (QNX). Here we don't use localtime() + => we negative values of local_t are ok. + */ + + if ((t->year == TIMESTAMP_MAX_YEAR) && (t->month == 1) && t->day > 4) + { + /* + We will pass (t->day - shift) to sec_since_epoch(), and + want this value to be a positive number, so we shift + only dates > 4.01.2038 (to avoid owerflow). + */ + shift= 2; + } + + + local_t= sec_since_epoch(t->year, t->month, (t->day - shift), + t->hour, t->minute, + saved_seconds ? 0 : t->second); + + /* We have at least one range */ + DBUG_ASSERT(sp->revcnt >= 1); + + if (local_t < sp->revts[0] || local_t > sp->revts[sp->revcnt]) + { + /* + This means that source time can't be represented as my_time_t due to + limited my_time_t range. + */ + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + DBUG_RETURN(0); + } + + /* binary search for our range */ + i= find_time_range(local_t, sp->revts, sp->revcnt); + + /* + As there are no offset switches at the end of TIMESTAMP range, + we could simply check for overflow here (and don't need to bother + about DST gaps etc) + */ + if (shift) + { + if (local_t > (my_time_t) (TIMESTAMP_MAX_VALUE - shift * SECS_PER_DAY + + sp->revtis[i].rt_offset - saved_seconds)) + { + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + DBUG_RETURN(0); /* my_time_t overflow */ + } + local_t+= shift * SECS_PER_DAY; + } + + if (sp->revtis[i].rt_type) + { + /* + Oops! We are in spring time gap. + May be we should return error here? + Now we are returning my_time_t value corresponding to the + beginning of the gap. + */ + *error_code= ER_WARN_INVALID_TIMESTAMP; + local_t= sp->revts[i] - sp->revtis[i].rt_offset + saved_seconds; + } + else + local_t= local_t - sp->revtis[i].rt_offset + saved_seconds; + + /* check for TIMESTAMP_MAX_VALUE was already done above */ + if (local_t < TIMESTAMP_MIN_VALUE) + { + local_t= 0; + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + } + + DBUG_RETURN(local_t); +} + + +/* + End of elsie derived code. +*/ +#endif /* !defined(TZINFO2SQL) */ + + +#if !defined(TESTTIME) && !defined(TZINFO2SQL) + +/* + String with names of SYSTEM time zone. +*/ +static const String tz_SYSTEM_name("SYSTEM", 6, &my_charset_latin1); + + +/* + Instance of this class represents local time zone used on this system + (specified by TZ environment variable or via any other system mechanism). + It uses system functions (localtime_r, my_system_gmt_sec) for conversion + and is always available. Because of this it is used by default - if there + were no explicit time zone specified. On the other hand because of this + conversion methods provided by this class is significantly slower and + possibly less multi-threaded-friendly than corresponding Time_zone_db + methods so the latter should be preffered there it is possible. +*/ +class Time_zone_system : public Time_zone +{ +public: + Time_zone_system() = default; /* Remove gcc warning */ + virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const; + virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const; + virtual const String * get_name() const; +}; + + +/* + Converts local time in system time zone in MYSQL_TIME representation + to its my_time_t representation. + + SYNOPSIS + TIME_to_gmt_sec() + t - pointer to MYSQL_TIME structure with local time in + broken-down representation. + error_code - 0, if the conversion was successful; + ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value + which is out of TIMESTAMP range; + ER_WARN_INVALID_TIMESTAMP, if t represents value which + doesn't exists (falls into the spring time-gap). + + DESCRIPTION + This method uses system function (localtime_r()) for conversion + local time in system time zone in MYSQL_TIME structure to its my_time_t + representation. Unlike the same function for Time_zone_db class + it it won't handle unnormalized input properly. Still it will + return lowest possible my_time_t in case of ambiguity or if we + provide time corresponding to the time-gap. + + You should call my_init_time() function before using this function. + + RETURN VALUE + Corresponding my_time_t value or 0 in case of error +*/ +my_time_t +Time_zone_system::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const +{ + long not_used; + return my_system_gmt_sec(t, ¬_used, error_code); +} + + +/* + Converts time from UTC seconds since Epoch (my_time_t) representation + to system local time zone broken-down representation. + + SYNOPSIS + gmt_sec_to_TIME() + tmp - pointer to MYSQL_TIME structure to fill-in + t - my_time_t value to be converted + + NOTE + We assume that value passed to this function will fit into time_t range + supported by localtime_r. This conversion is putting restriction on + TIMESTAMP range in MySQL. If we can get rid of SYSTEM time zone at least + for interaction with client then we can extend TIMESTAMP range down to + the 1902 easily. +*/ +void +Time_zone_system::gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const +{ + struct tm tmp_tm; + time_t tmp_t= (time_t)t; + + localtime_r(&tmp_t, &tmp_tm); + localtime_to_TIME(tmp, &tmp_tm); + tmp->time_type= MYSQL_TIMESTAMP_DATETIME; + adjust_leap_second(tmp); +} + + +/* + Get name of time zone + + SYNOPSIS + get_name() + + RETURN VALUE + Name of time zone as String +*/ +const String * +Time_zone_system::get_name() const +{ + return &tz_SYSTEM_name; +} + + +/* + Instance of this class represents UTC time zone. It uses system gmtime_r + function for conversions and is always available. It is used only for + my_time_t -> MYSQL_TIME conversions in various UTC_... functions, it is not + intended for MYSQL_TIME -> my_time_t conversions and shouldn't be exposed to user. +*/ +class Time_zone_utc : public Time_zone +{ +public: + Time_zone_utc() = default; /* Remove gcc warning */ + virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, + uint *error_code) const; + virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const; + virtual const String * get_name() const; +}; + + +/* + Convert UTC time from MYSQL_TIME representation to its my_time_t representation. + + DESCRIPTION + Since Time_zone_utc is used only internally for my_time_t -> TIME + conversions, this function of Time_zone interface is not implemented for + this class and should not be called. + + RETURN VALUE + 0 +*/ +my_time_t +Time_zone_utc::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const +{ + /* Should be never called */ + DBUG_ASSERT(0); + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + return 0; +} + + +/* + Converts time from UTC seconds since Epoch (my_time_t) representation + to broken-down representation (also in UTC). + + SYNOPSIS + gmt_sec_to_TIME() + tmp - pointer to MYSQL_TIME structure to fill-in + t - my_time_t value to be converted + + NOTE + See note for apropriate Time_zone_system method. +*/ +void +Time_zone_utc::gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const +{ + struct tm tmp_tm; + time_t tmp_t= (time_t)t; + gmtime_r(&tmp_t, &tmp_tm); + localtime_to_TIME(tmp, &tmp_tm); + tmp->time_type= MYSQL_TIMESTAMP_DATETIME; + adjust_leap_second(tmp); +} + + +/* + Get name of time zone + + SYNOPSIS + get_name() + + DESCRIPTION + Since Time_zone_utc is used only internally by SQL's UTC_* functions it + is not accessible directly, and hence this function of Time_zone + interface is not implemented for this class and should not be called. + + RETURN VALUE + 0 +*/ +const String * +Time_zone_utc::get_name() const +{ + /* Should be never called */ + DBUG_ASSERT(0); + return 0; +} + + +/* + Instance of this class represents some time zone which is + described in mysql.time_zone family of tables. +*/ +class Time_zone_db : public Time_zone +{ +public: + Time_zone_db(TIME_ZONE_INFO *tz_info_arg, const String * tz_name_arg); + virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const; + virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const; + virtual const String * get_name() const; +private: + TIME_ZONE_INFO *tz_info; + const String *tz_name; +}; + + +/* + Initializes object representing time zone described by mysql.time_zone + tables. + + SYNOPSIS + Time_zone_db() + tz_info_arg - pointer to TIME_ZONE_INFO structure which is filled + according to db or other time zone description + (for example by my_tz_init()). + Several Time_zone_db instances can share one + TIME_ZONE_INFO structure. + tz_name_arg - name of time zone. +*/ +Time_zone_db::Time_zone_db(TIME_ZONE_INFO *tz_info_arg, + const String *tz_name_arg): + tz_info(tz_info_arg), tz_name(tz_name_arg) +{ +} + + +/* + Converts local time in time zone described from TIME + representation to its my_time_t representation. + + SYNOPSIS + TIME_to_gmt_sec() + t - pointer to MYSQL_TIME structure with local time + in broken-down representation. + error_code - 0, if the conversion was successful; + ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value + which is out of TIMESTAMP range; + ER_WARN_INVALID_TIMESTAMP, if t represents value which + doesn't exists (falls into the spring time-gap). + + DESCRIPTION + Please see ::TIME_to_gmt_sec for function description and + parameter restrictions. + + RETURN VALUE + Corresponding my_time_t value or 0 in case of error +*/ +my_time_t +Time_zone_db::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const +{ + return ::TIME_to_gmt_sec(t, tz_info, error_code); +} + + +/* + Converts time from UTC seconds since Epoch (my_time_t) representation + to local time zone described in broken-down representation. + + SYNOPSIS + gmt_sec_to_TIME() + tmp - pointer to MYSQL_TIME structure to fill-in + t - my_time_t value to be converted +*/ +void +Time_zone_db::gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const +{ + ::gmt_sec_to_TIME(tmp, t, tz_info); + adjust_leap_second(tmp); +} + + +/* + Get name of time zone + + SYNOPSIS + get_name() + + RETURN VALUE + Name of time zone as ASCIIZ-string +*/ +const String * +Time_zone_db::get_name() const +{ + return tz_name; +} + + +/* + Instance of this class represents time zone which + was specified as offset from UTC. +*/ +class Time_zone_offset : public Time_zone +{ +public: + Time_zone_offset(long tz_offset_arg); + virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, + uint *error_code) const; + virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const; + virtual const String * get_name() const; + /* + This have to be public because we want to be able to access it from + my_offset_tzs_get_key() function + */ + long offset; +private: + /* Extra reserve because of snprintf */ + char name_buff[7+16]; + String name; +}; + + +/* + Initializes object representing time zone described by its offset from UTC. + + SYNOPSIS + Time_zone_offset() + tz_offset_arg - offset from UTC in seconds. + Positive for direction to east. +*/ +Time_zone_offset::Time_zone_offset(long tz_offset_arg): + offset(tz_offset_arg) +{ + uint hours= abs((int)(offset / SECS_PER_HOUR)); + uint minutes= abs((int)(offset % SECS_PER_HOUR / SECS_PER_MIN)); + size_t length= my_snprintf(name_buff, sizeof(name_buff), "%s%02d:%02d", + (offset>=0) ? "+" : "-", hours, minutes); + name.set(name_buff, length, &my_charset_latin1); +} + + +/* + Converts local time in time zone described as offset from UTC + from MYSQL_TIME representation to its my_time_t representation. + + SYNOPSIS + TIME_to_gmt_sec() + t - pointer to MYSQL_TIME structure with local time + in broken-down representation. + error_code - 0, if the conversion was successful; + ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value + which is out of TIMESTAMP range; + ER_WARN_INVALID_TIMESTAMP, if t represents value which + doesn't exists (falls into the spring time-gap). + + RETURN VALUE + Corresponding my_time_t value or 0 in case of error. +*/ + +my_time_t +Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const +{ + my_time_t local_t; + int shift= 0; + + /* + Check timestamp range.we have to do this as calling function relies on + us to make all validation checks here. + */ + if (!validate_timestamp_range(t)) + { + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + return 0; + } + *error_code= 0; + + /* + Do a temporary shift of the boundary dates to avoid + overflow of my_time_t if the time value is near it's + maximum range + */ + if ((t->year == TIMESTAMP_MAX_YEAR) && (t->month == 1) && t->day > 4) + shift= 2; + + local_t= sec_since_epoch(t->year, t->month, (t->day - shift), + t->hour, t->minute, t->second) - + offset; + + if (shift) + { + /* Add back the shifted time */ + local_t+= shift * SECS_PER_DAY; + } + + if (local_t >= TIMESTAMP_MIN_VALUE && local_t <= TIMESTAMP_MAX_VALUE) + return local_t; + + /* range error*/ + *error_code= ER_WARN_DATA_OUT_OF_RANGE; + return 0; +} + + +/* + Converts time from UTC seconds since Epoch (my_time_t) representation + to local time zone described as offset from UTC and in broken-down + representation. + + SYNOPSIS + gmt_sec_to_TIME() + tmp - pointer to MYSQL_TIME structure to fill-in + t - my_time_t value to be converted +*/ +void +Time_zone_offset::gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const +{ + sec_to_TIME(tmp, t, offset); +} + + +/* + Get name of time zone + + SYNOPSIS + get_name() + + RETURN VALUE + Name of time zone as pointer to String object +*/ +const String * +Time_zone_offset::get_name() const +{ + return &name; +} + + +static Time_zone_utc tz_UTC; +static Time_zone_system tz_SYSTEM; +static Time_zone_offset tz_OFFSET0(0); + +Time_zone *my_tz_OFFSET0= &tz_OFFSET0; +Time_zone *my_tz_UTC= &tz_UTC; +Time_zone *my_tz_SYSTEM= &tz_SYSTEM; + +static HASH tz_names; +static HASH offset_tzs; +static MEM_ROOT tz_storage; + +/* + These mutex protects offset_tzs and tz_storage. + These protection needed only when we are trying to set + time zone which is specified as offset, and searching for existing + time zone in offset_tzs or creating if it didn't existed before in + tz_storage. So contention is low. +*/ +static mysql_mutex_t tz_LOCK; +static bool tz_inited= 0; + +/* + This two static variables are inteded for holding info about leap seconds + shared by all time zones. +*/ +static uint tz_leapcnt= 0; +static LS_INFO *tz_lsis= 0; + +/* + Shows whenever we have found time zone tables during start-up. + Used for avoiding of putting those tables to global table list + for queries that use time zone info. +*/ +static bool time_zone_tables_exist= 1; + + +/* + Names of tables (with their lengths) that are needed + for dynamical loading of time zone descriptions. +*/ + +static const LEX_CSTRING tz_tables_names[MY_TZ_TABLES_COUNT]= +{ + { STRING_WITH_LEN("time_zone_name")}, + { STRING_WITH_LEN("time_zone")}, + { STRING_WITH_LEN("time_zone_transition_type")}, + { STRING_WITH_LEN("time_zone_transition")} +}; + +class Tz_names_entry: public Sql_alloc +{ +public: + String name; + Time_zone *tz; +}; + + +/* + We are going to call both of these functions from C code so + they should obey C calling conventions. +*/ + +extern "C" uchar * +my_tz_names_get_key(Tz_names_entry *entry, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= entry->name.length(); + return (uchar*) entry->name.ptr(); +} + +extern "C" uchar * +my_offset_tzs_get_key(Time_zone_offset *entry, + size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= sizeof(long); + return (uchar*) &entry->offset; +} + + +/* + Prepare table list with time zone related tables from preallocated array. + + SYNOPSIS + tz_init_table_list() + tz_tabs - pointer to preallocated array of MY_TZ_TABLES_COUNT + TABLE_LIST objects + + DESCRIPTION + This function prepares list of TABLE_LIST objects which can be used + for opening of time zone tables from preallocated array. +*/ + +static void +tz_init_table_list(TABLE_LIST *tz_tabs) +{ + for (int i= 0; i < MY_TZ_TABLES_COUNT; i++) + { + tz_tabs[i].init_one_table(&MYSQL_SCHEMA_NAME, tz_tables_names + i, + NULL, TL_READ); + if (i != MY_TZ_TABLES_COUNT - 1) + tz_tabs[i].next_global= tz_tabs[i].next_local= &tz_tabs[i+1]; + if (i != 0) + tz_tabs[i].prev_global= &tz_tabs[i-1].next_global; + } +} + +static PSI_memory_key key_memory_tz_storage; + +#ifdef HAVE_PSI_INTERFACE +static PSI_mutex_key key_tz_LOCK; + +static PSI_mutex_info all_tz_mutexes[]= +{ + { & key_tz_LOCK, "tz_LOCK", PSI_FLAG_GLOBAL} +}; + +static PSI_memory_info all_tz_memory[]= +{ + { &key_memory_tz_storage, "tz_storage", PSI_FLAG_GLOBAL} +}; + +static void init_tz_psi_keys(void) +{ + const char* category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_tz_mutexes); + PSI_server->register_mutex(category, all_tz_mutexes, count); + + count= array_elements(all_tz_memory); + mysql_memory_register(category, all_tz_memory, count); +} +#endif /* HAVE_PSI_INTERFACE */ + + +/* + Initialize time zone support infrastructure. + + SYNOPSIS + my_tz_init() + thd - current thread object + default_tzname - default time zone or 0 if none. + bootstrap - indicates whenever we are in bootstrap mode + + DESCRIPTION + This function will init memory structures needed for time zone support, + it will register mandatory SYSTEM time zone in them. It will try to open + mysql.time_zone* tables and load information about default time zone and + information which further will be shared among all time zones loaded. + If system tables with time zone descriptions don't exist it won't fail + (unless default_tzname is time zone from tables). If bootstrap parameter + is true then this routine assumes that we are in bootstrap mode and won't + load time zone descriptions unless someone specifies default time zone + which is supposedly stored in those tables. + It'll also set default time zone if it is specified. + + RETURN VALUES + 0 - ok + 1 - Error +*/ +my_bool +my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap) +{ + THD *thd; + TABLE_LIST tz_tables[1+MY_TZ_TABLES_COUNT]; + TABLE *table; + const LEX_CSTRING tmp_table_name= { STRING_WITH_LEN("time_zone_leap_second") }; + Tz_names_entry *tmp_tzname; + my_bool return_val= 1; + int res; + DBUG_ENTER("my_tz_init"); + +#ifdef HAVE_PSI_INTERFACE + init_tz_psi_keys(); +#endif + + /* + To be able to run this from boot, we allocate a temporary THD + */ + if (!(thd= new THD(0))) + DBUG_RETURN(1); + thd->thread_stack= (char*) &thd; + thd->store_globals(); + + /* Init all memory structures that require explicit destruction */ + if (my_hash_init(key_memory_tz_storage, &tz_names, &my_charset_latin1, 20, 0, + 0, (my_hash_get_key) my_tz_names_get_key, 0, 0)) + { + sql_print_error("Fatal error: OOM while initializing time zones"); + goto end; + } + if (my_hash_init(key_memory_tz_storage, &offset_tzs, &my_charset_latin1, 26, + 0, 0, (my_hash_get_key)my_offset_tzs_get_key, 0, 0)) + { + sql_print_error("Fatal error: OOM while initializing time zones"); + my_hash_free(&tz_names); + goto end; + } + init_sql_alloc(key_memory_tz_storage, &tz_storage, 32 * 1024, 0, MYF(0)); + mysql_mutex_init(key_tz_LOCK, &tz_LOCK, MY_MUTEX_INIT_FAST); + tz_inited= 1; + + /* Add 'SYSTEM' time zone to tz_names hash */ + if (!(tmp_tzname= new (&tz_storage) Tz_names_entry())) + { + sql_print_error("Fatal error: OOM while initializing time zones"); + goto end_with_cleanup; + } + tmp_tzname->name.set(STRING_WITH_LEN("SYSTEM"), &my_charset_latin1); + tmp_tzname->tz= my_tz_SYSTEM; + if (my_hash_insert(&tz_names, (const uchar *)tmp_tzname)) + { + sql_print_error("Fatal error: OOM while initializing time zones"); + goto end_with_cleanup; + } + + if (bootstrap) + { + /* If we are in bootstrap mode we should not load time zone tables */ + return_val= time_zone_tables_exist= 0; + goto end_with_cleanup; + } + + /* + After this point all memory structures are inited and we even can live + without time zone description tables. Now try to load information about + leap seconds shared by all time zones. + */ + + thd->set_db(&MYSQL_SCHEMA_NAME); + bzero((char*) &tz_tables[0], sizeof(TABLE_LIST)); + tz_tables[0].alias= tz_tables[0].table_name= tmp_table_name; + tz_tables[0].db= MYSQL_SCHEMA_NAME; + tz_tables[0].lock_type= TL_READ; + + tz_init_table_list(tz_tables+1); + tz_tables[0].next_global= tz_tables[0].next_local= &tz_tables[1]; + tz_tables[1].prev_global= &tz_tables[0].next_global; + init_mdl_requests(tz_tables); + + /* + We need to open only mysql.time_zone_leap_second, but we try to + open all time zone tables to see if they exist. + */ + if (open_and_lock_tables(thd, tz_tables, FALSE, + MYSQL_OPEN_IGNORE_FLUSH | MYSQL_LOCK_IGNORE_TIMEOUT)) + { + sql_print_warning("Can't open and lock time zone table: %s " + "trying to live without them", + thd->get_stmt_da()->message()); + /* We will try emulate that everything is ok */ + return_val= time_zone_tables_exist= 0; + goto end_with_setting_default_tz; + } + + for (TABLE_LIST *tl= tz_tables; tl; tl= tl->next_global) + { + tl->table->use_all_columns(); + /* Force close at the end of the function to free memory. */ + tl->table->mark_table_for_reopen(); + } + + /* + Now we are going to load leap seconds descriptions that are shared + between all time zones that use them. We are using index for getting + records in proper order. Since we share the same MEM_ROOT between + all time zones we just allocate enough memory for it first. + */ + if (!(tz_lsis= (LS_INFO*) alloc_root(&tz_storage, + sizeof(LS_INFO) * TZ_MAX_LEAPS))) + { + sql_print_error("Fatal error: Out of memory while loading " + "mysql.time_zone_leap_second table"); + goto end_with_close; + } + + table= tz_tables[0].table; + + if (table->file->ha_index_init(0, 1)) + goto end_with_close; + + table->use_all_columns(); + tz_leapcnt= 0; + + res= table->file->ha_index_first(table->record[0]); + + while (!res) + { + if (tz_leapcnt + 1 > TZ_MAX_LEAPS) + { + sql_print_error("Fatal error: While loading mysql.time_zone_leap_second" + " table: too much leaps"); + table->file->ha_index_end(); + goto end_with_close; + } + + tz_lsis[tz_leapcnt].ls_trans= (my_time_t)table->field[0]->val_int(); + tz_lsis[tz_leapcnt].ls_corr= (long)table->field[1]->val_int(); + + tz_leapcnt++; + + DBUG_PRINT("info", + ("time_zone_leap_second table: tz_leapcnt: %u tt_time: %lu offset: %ld", + tz_leapcnt, (ulong) tz_lsis[tz_leapcnt-1].ls_trans, + tz_lsis[tz_leapcnt-1].ls_corr)); + + res= table->file->ha_index_next(table->record[0]); + } + + (void)table->file->ha_index_end(); + + if (res != HA_ERR_END_OF_FILE) + { + sql_print_error("Fatal error: Error while loading " + "mysql.time_zone_leap_second table"); + goto end_with_close; + } + + /* + Loading of info about leap seconds succeeded + */ + + return_val= 0; + + +end_with_setting_default_tz: + /* If we have default time zone try to load it */ + if (default_tzname) + { + String tmp_tzname2(default_tzname, strlen(default_tzname), + &my_charset_latin1); + /* + Time zone tables may be open here, and my_tz_find() may open + most of them once more, but this is OK for system tables open + for READ. + */ + if (unlikely(!(global_system_variables.time_zone= + my_tz_find(thd, &tmp_tzname2)))) + { + sql_print_error("Fatal error: Illegal or unknown default time zone '%s'", + default_tzname); + return_val= 1; + } + } + +end_with_close: + if (time_zone_tables_exist) + close_mysql_tables(thd); + +end_with_cleanup: + + /* if there were error free time zone describing structs */ + if (unlikely(return_val)) + my_tz_free(); +end: + delete thd; + if (org_thd) + org_thd->store_globals(); /* purecov: inspected */ + + default_tz= default_tz_name ? global_system_variables.time_zone + : my_tz_SYSTEM; + + DBUG_RETURN(return_val); +} + + +/* + Free resources used by time zone support infrastructure. + + SYNOPSIS + my_tz_free() +*/ + +void my_tz_free() +{ + if (tz_inited) + { + tz_inited= 0; + mysql_mutex_destroy(&tz_LOCK); + my_hash_free(&offset_tzs); + my_hash_free(&tz_names); + free_root(&tz_storage, MYF(0)); + } +} + + +/* + Load time zone description from system tables. + + SYNOPSIS + tz_load_from_open_tables() + tz_name - name of time zone that should be loaded. + tz_tables - list of tables from which time zone description + should be loaded + + DESCRIPTION + This function will try to load information about time zone specified + from the list of the already opened and locked tables (first table in + tz_tables should be time_zone_name, next time_zone, then + time_zone_transition_type and time_zone_transition should be last). + It will also update information in hash used for time zones lookup. + + RETURN VALUES + Returns pointer to newly created Time_zone object or 0 in case of error. + +*/ + +static Time_zone* +tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) +{ + TABLE *table= 0; + TIME_ZONE_INFO *tz_info= NULL; + Tz_names_entry *tmp_tzname; + Time_zone *return_val= 0; + int res; + uint tzid, ttid; + my_time_t ttime; + char buff[MAX_FIELD_WIDTH]; + uchar keybuff[32]; + Field *field; + String abbr(buff, sizeof(buff), &my_charset_latin1); + char *alloc_buff= NULL; + char *tz_name_buff= NULL; + /* + Temporary arrays that are used for loading of data for filling + TIME_ZONE_INFO structure + */ + my_time_t ats[TZ_MAX_TIMES]; + uchar types[TZ_MAX_TIMES]; + TRAN_TYPE_INFO ttis[TZ_MAX_TYPES]; +#ifdef ABBR_ARE_USED + char chars[MY_MAX(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1)))]; +#endif + /* + Used as a temporary tz_info until we decide that we actually want to + allocate and keep the tz info and tz name in tz_storage. + */ + TIME_ZONE_INFO tmp_tz_info; + memset(&tmp_tz_info, 0, sizeof(TIME_ZONE_INFO)); + + DBUG_ENTER("tz_load_from_open_tables"); + + /* + Let us find out time zone id by its name (there is only one index + and it is specifically for this purpose). + */ + table= tz_tables->table; + tz_tables= tz_tables->next_local; + table->field[0]->store(tz_name->ptr(), tz_name->length(), + &my_charset_latin1); + if (table->file->ha_index_init(0, 1)) + goto end; + + if (table->file->ha_index_read_map(table->record[0], table->field[0]->ptr, + HA_WHOLE_KEY, HA_READ_KEY_EXACT)) + { +#ifdef EXTRA_DEBUG + /* + Most probably user has mistyped time zone name, so no need to bark here + unless we need it for debugging. + */ + sql_print_error("Can't find description of time zone '%.*b'", + tz_name->length(), tz_name->ptr()); +#endif + goto end; + } + + tzid= (uint)table->field[1]->val_int(); + + (void)table->file->ha_index_end(); + + /* + Now we need to lookup record in mysql.time_zone table in order to + understand whenever this timezone uses leap seconds (again we are + using the only index in this table). + */ + table= tz_tables->table; + tz_tables= tz_tables->next_local; + field= table->field[0]; + field->store((longlong) tzid, TRUE); + DBUG_ASSERT(field->key_length() <= sizeof(keybuff)); + field->get_key_image(keybuff, + MY_MIN(field->key_length(), sizeof(keybuff)), + Field::itRAW); + if (table->file->ha_index_init(0, 1)) + goto end; + + if (table->file->ha_index_read_map(table->record[0], keybuff, + HA_WHOLE_KEY, HA_READ_KEY_EXACT)) + { + sql_print_error("Can't find description of time zone '%u'", tzid); + goto end; + } + + /* If Uses_leap_seconds == 'Y' */ + if (table->field[1]->val_int() == 1) + { + tmp_tz_info.leapcnt= tz_leapcnt; + tmp_tz_info.lsis= tz_lsis; + } + + (void)table->file->ha_index_end(); + + /* + Now we will iterate through records for out time zone in + mysql.time_zone_transition_type table. Because we want records + only for our time zone guess what are we doing? + Right - using special index. + */ + table= tz_tables->table; + tz_tables= tz_tables->next_local; + field= table->field[0]; + field->store((longlong) tzid, TRUE); + DBUG_ASSERT(field->key_length() <= sizeof(keybuff)); + field->get_key_image(keybuff, + MY_MIN(field->key_length(), sizeof(keybuff)), + Field::itRAW); + if (table->file->ha_index_init(0, 1)) + goto end; + + res= table->file->ha_index_read_map(table->record[0], keybuff, + (key_part_map)1, HA_READ_KEY_EXACT); + while (!res) + { + ttid= (uint)table->field[1]->val_int(); + + if (ttid >= TZ_MAX_TYPES) + { + sql_print_error("Error while loading time zone description from " + "mysql.time_zone_transition_type table: too big " + "transition type id"); + goto end; + } + + ttis[ttid].tt_gmtoff= (long)table->field[2]->val_int(); + ttis[ttid].tt_isdst= (table->field[3]->val_int() > 0); + +#ifdef ABBR_ARE_USED + // FIXME should we do something with duplicates here ? + table->field[4]->val_str(&abbr, &abbr); + if (tmp_tz_info.charcnt + abbr.length() + 1 > sizeof(chars)) + { + sql_print_error("Error while loading time zone description from " + "mysql.time_zone_transition_type table: not enough " + "room for abbreviations"); + goto end; + } + ttis[ttid].tt_abbrind= tmp_tz_info.charcnt; + memcpy(chars + tmp_tz_info.charcnt, abbr.ptr(), abbr.length()); + tmp_tz_info.charcnt+= abbr.length(); + chars[tmp_tz_info.charcnt]= 0; + tmp_tz_info.charcnt++; + + DBUG_PRINT("info", + ("time_zone_transition_type table: tz_id=%u tt_id=%u tt_gmtoff=%ld " + "abbr='%s' tt_isdst=%u", tzid, ttid, ttis[ttid].tt_gmtoff, + chars + ttis[ttid].tt_abbrind, ttis[ttid].tt_isdst)); +#else + DBUG_PRINT("info", + ("time_zone_transition_type table: tz_id=%u tt_id=%u tt_gmtoff=%ld " + "tt_isdst=%u", tzid, ttid, ttis[ttid].tt_gmtoff, ttis[ttid].tt_isdst)); +#endif + + /* ttid is increasing because we are reading using index */ + if (ttid < tmp_tz_info.typecnt) + { + sql_print_error("mysql.time_zone_transition_type table is incorrectly defined or corrupted"); + goto end; + } + + tmp_tz_info.typecnt= ttid + 1; + + res= table->file->ha_index_next_same(table->record[0], keybuff, 4); + } + + if (res != HA_ERR_END_OF_FILE) + { + sql_print_error("Error while loading time zone description from " + "mysql.time_zone_transition_type table"); + goto end; + } + + (void)table->file->ha_index_end(); + + + /* + At last we are doing the same thing for records in + mysql.time_zone_transition table. Here we additionally need records + in ascending order by index scan also satisfies us. + */ + table= tz_tables->table; + table->field[0]->store((longlong) tzid, TRUE); + if (table->file->ha_index_init(0, 1)) + goto end; + + res= table->file->ha_index_read_map(table->record[0], keybuff, + (key_part_map)1, HA_READ_KEY_EXACT); + while (!res) + { + ttime= (my_time_t)table->field[1]->val_int(); + ttid= (uint)table->field[2]->val_int(); + + if (tmp_tz_info.timecnt + 1 > TZ_MAX_TIMES) + { + sql_print_error("Error while loading time zone description from " + "mysql.time_zone_transition table: " + "too much transitions"); + goto end; + } + if (ttid + 1 > tmp_tz_info.typecnt) + { + sql_print_error("Error while loading time zone description from " + "mysql.time_zone_transition table: " + "bad transition type id"); + goto end; + } + + ats[tmp_tz_info.timecnt]= ttime; + types[tmp_tz_info.timecnt]= ttid; + tmp_tz_info.timecnt++; + + DBUG_PRINT("info", + ("time_zone_transition table: tz_id: %u tt_time: %lu tt_id: %u", + tzid, (ulong) ttime, ttid)); + + res= table->file->ha_index_next_same(table->record[0], keybuff, 4); + } + + /* + We have to allow HA_ERR_KEY_NOT_FOUND because some time zones + for example UTC have no transitons. + */ + if (res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND) + { + sql_print_error("Error while loading time zone description from " + "mysql.time_zone_transition table"); + goto end; + } + + (void)table->file->ha_index_end(); + table= 0; + + /* + Let us check how correct our time zone description is. We don't check for + tz->timecnt < 1 since it is ok for GMT. + */ + if (tmp_tz_info.typecnt < 1) + { + sql_print_error("loading time zone without transition types"); + goto end; + } + + /* Allocate memory for the timezone info and timezone name in tz_storage. */ + if (!(alloc_buff= (char*) alloc_root(&tz_storage, sizeof(TIME_ZONE_INFO) + + tz_name->length() + 1))) + { + sql_print_error("Out of memory while loading time zone description"); + return 0; + } + + /* Move the temporary tz_info into the allocated area */ + tz_info= (TIME_ZONE_INFO *)alloc_buff; + memcpy(tz_info, &tmp_tz_info, sizeof(TIME_ZONE_INFO)); + tz_name_buff= alloc_buff + sizeof(TIME_ZONE_INFO); + /* + By writing zero to the end we guarantee that we can call ptr() + instead of c_ptr() for time zone name. + */ + strmake(tz_name_buff, tz_name->ptr(), tz_name->length()); + + /* + Now we will allocate memory and init TIME_ZONE_INFO structure. + */ + if (!(alloc_buff= (char*) alloc_root(&tz_storage, + ALIGN_SIZE(sizeof(my_time_t) * + tz_info->timecnt) + + ALIGN_SIZE(tz_info->timecnt) + +#ifdef ABBR_ARE_USED + ALIGN_SIZE(tz_info->charcnt) + +#endif + sizeof(TRAN_TYPE_INFO) * + tz_info->typecnt))) + { + sql_print_error("Out of memory while loading time zone description"); + goto end; + } + + tz_info->ats= (my_time_t *) alloc_buff; + memcpy(tz_info->ats, ats, tz_info->timecnt * sizeof(my_time_t)); + alloc_buff+= ALIGN_SIZE(sizeof(my_time_t) * tz_info->timecnt); + tz_info->types= (uchar *)alloc_buff; + memcpy(tz_info->types, types, tz_info->timecnt); + alloc_buff+= ALIGN_SIZE(tz_info->timecnt); +#ifdef ABBR_ARE_USED + tz_info->chars= alloc_buff; + memcpy(tz_info->chars, chars, tz_info->charcnt); + alloc_buff+= ALIGN_SIZE(tz_info->charcnt); +#endif + tz_info->ttis= (TRAN_TYPE_INFO *)alloc_buff; + memcpy(tz_info->ttis, ttis, tz_info->typecnt * sizeof(TRAN_TYPE_INFO)); + + /* Build reversed map. */ + if (prepare_tz_info(tz_info, &tz_storage)) + { + sql_print_error("Unable to build mktime map for time zone"); + goto end; + } + + + if (!(tmp_tzname= new (&tz_storage) Tz_names_entry()) || + !(tmp_tzname->tz= new (&tz_storage) Time_zone_db(tz_info, + &(tmp_tzname->name))) || + (tmp_tzname->name.set(tz_name_buff, tz_name->length(), + &my_charset_latin1), + my_hash_insert(&tz_names, (const uchar *)tmp_tzname))) + { + sql_print_error("Out of memory while loading time zone"); + goto end; + } + + /* + Loading of time zone succeeded + */ + return_val= tmp_tzname->tz; + +end: + + if (table && table->file->inited) + (void) table->file->ha_index_end(); + + DBUG_RETURN(return_val); +} + + +/* + Parse string that specifies time zone as offset from UTC. + + SYNOPSIS + str_to_offset() + str - pointer to string which contains offset + length - length of string + offset - out parameter for storing found offset in seconds. + + DESCRIPTION + This function parses string which contains time zone offset + in form similar to '+10:00' and converts found value to + seconds from UTC form (east is positive). + + RETURN VALUE + 0 - Ok + 1 - String doesn't contain valid time zone offset +*/ +my_bool +str_to_offset(const char *str, uint length, long *offset) +{ + const char *end= str + length; + my_bool negative; + ulong number_tmp; + long offset_tmp; + + if (length < 4) + return 1; + + if (*str == '+') + negative= 0; + else if (*str == '-') + negative= 1; + else + return 1; + str++; + + number_tmp= 0; + + while (str < end && my_isdigit(&my_charset_latin1, *str)) + { + number_tmp= number_tmp*10 + *str - '0'; + str++; + } + + if (str + 1 >= end || *str != ':') + return 1; + str++; + + offset_tmp = number_tmp * MINS_PER_HOUR; number_tmp= 0; + + while (str < end && my_isdigit(&my_charset_latin1, *str)) + { + number_tmp= number_tmp * 10 + *str - '0'; + str++; + } + + if (str != end) + return 1; + + offset_tmp= (offset_tmp + number_tmp) * SECS_PER_MIN; + + if (negative) + offset_tmp= -offset_tmp; + + /* + Check if offset is in range prescribed by standard + (from -12:59 to 13:00). + */ + + if (number_tmp > 59 || offset_tmp < -13 * SECS_PER_HOUR + 1 || + offset_tmp > 13 * SECS_PER_HOUR) + return 1; + + *offset= offset_tmp; + + return 0; +} + + +/* + Get Time_zone object for specified time zone. + + SYNOPSIS + my_tz_find() + thd - pointer to thread THD structure + name - time zone specification + + DESCRIPTION + This function checks if name is one of time zones described in db, + predefined SYSTEM time zone or valid time zone specification as + offset from UTC (In last case it will create proper Time_zone_offset + object if there were not any.). If name is ok it returns corresponding + Time_zone object. + + Clients of this function are not responsible for releasing resources + occupied by returned Time_zone object so they can just forget pointers + to Time_zone object if they are not needed longer. + + Other important property of this function: if some Time_zone found once + it will be for sure found later, so this function can also be used for + checking if proper Time_zone object exists (and if there will be error + it will be reported during first call). + + If name pointer is 0 then this function returns 0 (this allows to pass 0 + values as parameter without additional external check and this property + is used by @@time_zone variable handling code). + + It will perform lookup in system tables (mysql.time_zone*), + opening and locking them, and closing afterwards. It won't perform + such lookup if no time zone describing tables were found during + server start up. + + RETURN VALUE + Pointer to corresponding Time_zone object. 0 - in case of bad time zone + specification or other error. + +*/ +Time_zone * +my_tz_find(THD *thd, const String *name) +{ + Tz_names_entry *tmp_tzname; + Time_zone *result_tz= 0; + long offset; + DBUG_ENTER("my_tz_find"); + DBUG_PRINT("enter", ("time zone name='%s'", + name ? ((String *)name)->c_ptr_safe() : "NULL")); + + if (!name || name->is_empty()) + DBUG_RETURN(0); + + mysql_mutex_lock(&tz_LOCK); + + if (!str_to_offset(name->ptr(), name->length(), &offset)) + { + if (!(result_tz= (Time_zone_offset *)my_hash_search(&offset_tzs, + (const uchar *)&offset, + sizeof(long)))) + { + DBUG_PRINT("info", ("Creating new Time_zone_offset object")); + + if (!(result_tz= new (&tz_storage) Time_zone_offset(offset)) || + my_hash_insert(&offset_tzs, (const uchar *) result_tz)) + { + result_tz= 0; + sql_print_error("Fatal error: Out of memory " + "while setting new time zone"); + } + } + } + else + { + result_tz= 0; + if ((tmp_tzname= (Tz_names_entry *)my_hash_search(&tz_names, + (const uchar *) + name->ptr(), + name->length()))) + result_tz= tmp_tzname->tz; + else if (time_zone_tables_exist) + { + TABLE_LIST tz_tables[MY_TZ_TABLES_COUNT]; + + /* + Allocate start_new_trans with malloc as it's > 4000 bytes and this + function can be called deep inside a stored procedure + */ + start_new_trans *new_trans= new start_new_trans(thd); + tz_init_table_list(tz_tables); + init_mdl_requests(tz_tables); + if (!open_system_tables_for_read(thd, tz_tables)) + { + result_tz= tz_load_from_open_tables(name, tz_tables); + thd->commit_whole_transaction_and_close_tables(); + } + new_trans->restore_old_transaction(); + delete new_trans; + } + } + + mysql_mutex_unlock(&tz_LOCK); + + if (result_tz && result_tz != my_tz_SYSTEM && result_tz != my_tz_UTC) + status_var_increment(thd->status_var.feature_timezone); + + DBUG_RETURN(result_tz); +} + + +/** + Convert leap seconds into non-leap + + This function will convert the leap seconds added by the OS to + non-leap seconds, e.g. 23:59:59, 23:59:60 -> 23:59:59, 00:00:01 ... + This check is not checking for years on purpose : although it's not a + complete check this way it doesn't require looking (and having installed) + the leap seconds table. + + @param[in,out] broken down time structure as filled in by the OS +*/ + +void Time_zone::adjust_leap_second(MYSQL_TIME *t) +{ + if (t->second == 60 || t->second == 61) + t->second= 59; +} + +#endif /* !defined(TESTTIME) && !defined(TZINFO2SQL) */ + + +#ifdef TZINFO2SQL +/* + This code belongs to mysql_tzinfo_to_sql converter command line utility. + This utility should be used by db admin for populating mysql.time_zone + tables. +*/ + +/* + Print info about time zone described by TIME_ZONE_INFO struct as + SQL statements populating mysql.time_zone* tables. + + SYNOPSIS + print_tz_as_sql() + tz_name - name of time zone + sp - structure describing time zone +*/ +void +print_tz_as_sql(const char* tz_name, const TIME_ZONE_INFO *sp) +{ + uint i; + + /* Here we assume that all time zones have same leap correction tables */ + printf("INSERT INTO time_zone (Use_leap_seconds) VALUES ('%s');\n", + sp->leapcnt ? "Y" : "N"); + printf("SET @time_zone_id= LAST_INSERT_ID();\n"); + printf("INSERT INTO time_zone_name (Name, Time_zone_id) VALUES \ +('%s', @time_zone_id);\n", tz_name); + + if (sp->timecnt) + { + printf("INSERT INTO time_zone_transition \ +(Time_zone_id, Transition_time, Transition_type_id) VALUES\n"); + for (i= 0; i < sp->timecnt; i++) + printf("%s(@time_zone_id, %ld, %u)\n", (i == 0 ? " " : ","), sp->ats[i], + (uint)sp->types[i]); + printf(";\n"); + } + + printf("INSERT INTO time_zone_transition_type \ +(Time_zone_id, Transition_type_id, `Offset`, Is_DST, Abbreviation) VALUES\n"); + + for (i= 0; i < sp->typecnt; i++) + printf("%s(@time_zone_id, %u, %ld, %d, '%s')\n", (i == 0 ? " " : ","), i, + sp->ttis[i].tt_gmtoff, sp->ttis[i].tt_isdst, + sp->chars + sp->ttis[i].tt_abbrind); + printf(";\n"); +} + + +#define SAVE_ENGINE(e) \ + "'select ENGINE into @" e "_engine" \ + " from information_schema.TABLES" \ + " where TABLE_SCHEMA=DATABASE() and TABLE_NAME=''" e "'''" + +/* + Print info about leap seconds in time zone as SQL statements + populating mysql.time_zone_leap_second table. + + SYNOPSIS + print_tz_leaps_as_sql() + sp - structure describing time zone +*/ +void +print_tz_leaps_as_sql(const TIME_ZONE_INFO *sp) +{ + uint i; + + /* + We are assuming that there are only one list of leap seconds + For all timezones. + */ + if (!opt_skip_write_binlog) + printf( + "execute immediate if(@wsrep_cannot_replicate_tz, " + SAVE_ENGINE("time_zone_leap_second") ", 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " + "'ALTER TABLE time_zone_leap_second ENGINE=InnoDB', 'do 0');\n"); + + printf("TRUNCATE TABLE time_zone_leap_second;\n"); + + if (sp->leapcnt) + { + printf("INSERT INTO time_zone_leap_second \ +(Transition_time, Correction) VALUES\n"); + for (i= 0; i < sp->leapcnt; i++) + printf("%s(%ld, %ld)\n", (i == 0 ? " " : ","), + sp->lsis[i].ls_trans, sp->lsis[i].ls_corr); + printf(";\n"); + } + + if (!opt_skip_write_binlog) + printf( + "execute immediate if(@wsrep_cannot_replicate_tz, " + "concat('ALTER TABLE time_zone_leap_second ENGINE=', " + "@time_zone_leap_second_engine), 'do 0');\n"); + + printf("ALTER TABLE time_zone_leap_second ORDER BY Transition_time;\n"); +} + + +/* + Some variables used as temporary or as parameters + in recursive scan_tz_dir() code. +*/ +TIME_ZONE_INFO tz_info; +MEM_ROOT tz_storage; +char fullname[FN_REFLEN + 1]; +char *root_name_end; + + +/* + Recursively scan zoneinfo directory and print all found time zone + descriptions as SQL. + + SYNOPSIS + scan_tz_dir() + name_end - pointer to end of path to directory to be searched. + symlink_recursion_level How many symlink directory levels are used + verbose >0 if we should print warnings + + DESCRIPTION + This auxiliary recursive function also uses several global + variables as in parameters and for storing temporary values. + + fullname - path to directory that should be scanned. + root_name_end - pointer to place in fullname where part with + path to initial directory ends. + current_tz_id - last used time zone id + + RETURN VALUE + 0 - Ok, 1 - Fatal error + +*/ +my_bool +scan_tz_dir(char * name_end, uint symlink_recursion_level, uint verbose) +{ + MY_DIR *cur_dir; + char *name_end_tmp; + size_t i; + + /* Sort directory data, to pass mtr tests on different platforms. */ + if (!(cur_dir= my_dir(fullname, MYF(MY_WANT_STAT|MY_WANT_SORT)))) + return 1; + + name_end= strmake(name_end, "/", FN_REFLEN - (name_end - fullname)); + + for (i= 0; i < cur_dir->number_of_files; i++) + { + if (cur_dir->dir_entry[i].name[0] != '.' && + strcmp(cur_dir->dir_entry[i].name, "Factory")) + { + name_end_tmp= strmake(name_end, cur_dir->dir_entry[i].name, + FN_REFLEN - (name_end - fullname)); + + if (MY_S_ISDIR(cur_dir->dir_entry[i].mystat->st_mode)) + { + my_bool is_symlink; + if ((is_symlink= my_is_symlink(fullname)) && + symlink_recursion_level > 0) + { + /* + The timezone definition data in some Linux distributions + (e.g. the "timezone-data-2013f" package in Gentoo) + may have synlimks like: + /usr/share/zoneinfo/posix/ -> /usr/share/zoneinfo/, + so the same timezone files are available under two names + (e.g. "CET" and "posix/CET"). + + We allow one level of symlink recursion for backward + compatibility with earlier timezone data packages that have + duplicate copies of the same timezone files inside the root + directory and the "posix" subdirectory (instead of symlinking). + This makes "posix/CET" still available, but helps to avoid + following such symlinks infinitely: + /usr/share/zoneinfo/posix/posix/posix/.../posix/ + */ + + /* + This is a normal case and not critical. only print warning if + verbose mode is chosen. + */ + if (verbose > 0) + { + fflush(stdout); + fprintf(stderr, "Warning: Skipping directory '%s': " + "to avoid infinite symlink recursion.\n", fullname); + } + continue; + } + if (scan_tz_dir(name_end_tmp, symlink_recursion_level + is_symlink, + verbose)) + { + my_dirend(cur_dir); + return 1; + } + } + else if (MY_S_ISREG(cur_dir->dir_entry[i].mystat->st_mode)) + { + init_alloc_root(PSI_INSTRUMENT_ME, &tz_storage, + 32768, 0, MYF(MY_THREAD_SPECIFIC)); + if (!tz_load(fullname, &tz_info, &tz_storage)) + print_tz_as_sql(root_name_end + 1, &tz_info); + else + { + /* + Some systems (like Debian, openSUSE, etc) have non-timezone files: + * iso3166.tab + * leap-seconds.list + * leapseconds + * tzdata.zi + * zone.tab + * zone1970.tab + We skip these silently unless verbose > 0. + */ + const char *current_ext= fn_ext(fullname); + my_bool known_ext= strlen(current_ext) || + !strcmp(my_basename(fullname), "leapseconds"); + + if (verbose > 0 || !known_ext) + { + fflush(stdout); + fprintf(stderr, + "Warning: Unable to load '%s' as time zone. Skipping it.\n", + fullname); + } + } + free_root(&tz_storage, MYF(0)); + } + else + { + fflush(stdout); + fprintf(stderr, "Warning: '%s' is not regular file or directory\n", + fullname); + } + } + } + + my_dirend(cur_dir); + + return 0; +} + + +static const char *load_default_groups[]= +{ "mysql_tzinfo_to_sql", 0}; + +static struct my_option my_long_options[] = +{ + {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, + 0, 0, 0, 0, 0, 0}, +#ifdef DBUG_OFF + {"debug", '#', "This is a non-debug version. Catch this and exit.", + 0,0, 0, GET_DISABLED, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#else + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"leap", 'l', "Print the leap second information from the given time zone file. By convention, when --leap is used the next argument is the timezonefile.", + &opt_leap, &opt_leap, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Write non critical warnings.", + &opt_verbose, &opt_verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Output version information and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-write-binlog", 'S', "Do not replicate changes to time zone tables to the binary log, or to other nodes in a Galera cluster.", + &opt_skip_write_binlog,&opt_skip_write_binlog, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static char **default_argv; + +static void free_allocated_data() +{ + free_defaults(default_argv); + my_end(0); +} + + +C_MODE_START +static my_bool get_one_option(const struct my_option *, const char *, + const char *); +C_MODE_END + +static void print_version(void) +{ + printf("%s Ver %s Distrib %s, for %s (%s)\n",my_progname, PROGRAM_VERSION, + MYSQL_SERVER_VERSION,SYSTEM_TYPE,MACHINE_TYPE); +} + +static const char *default_timezone_dir= "/usr/share/zoneinfo/"; + + +static void print_usage(void) +{ + fprintf(stdout, "Create SQL commands for loading system timezeone data for " + "MariaDB\n\n"); + fprintf(stdout, "Usage:\n"); + fprintf(stdout, " %s [options] timezonedir\n", my_progname); + fprintf(stdout, "or\n"); + fprintf(stdout, " %s [options] timezonefile timezonename\n", my_progname); + + fprintf(stdout, "\nA typical place for the system timezone directory is " + "\"%s\"\n", default_timezone_dir); + + print_defaults("my",load_default_groups); + puts(""); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} + + +static my_bool +get_one_option(const struct my_option *opt, const char *argument, const char *) +{ + switch(opt->id) { + case '#': +#ifndef DBUG_OFF + DBUG_PUSH(argument ? argument : "d:t:S:i:O,/tmp/mysq_tzinfo_to_sql.trace"); +#endif + break; + case '?': + print_version(); + puts(""); + print_usage(); + free_allocated_data(); + exit(0); + case 'V': + print_version(); + free_allocated_data(); + exit(0); + } + return 0; +} + + +static const char *lock_tables= + "LOCK TABLES time_zone WRITE,\n" + " time_zone_leap_second WRITE,\n" + " time_zone_name WRITE,\n" + " time_zone_transition WRITE,\n" + " time_zone_transition_type WRITE"; +static const char *trunc_tables_const= + "TRUNCATE TABLE time_zone;\n" + "TRUNCATE TABLE time_zone_name;\n" + "TRUNCATE TABLE time_zone_transition;\n" + "TRUNCATE TABLE time_zone_transition_type;\n"; + +/* + These queries need to return FALSE/0 when the 'wsrep*' variables do not + exist at all. + Moving the WHERE clause into the sum(...) seems like the obvious solution + here, but it does not work in bootstrap mode (see MDEV-28782 and + 0e4cf497ca11a7298e2bd896cb594bd52085a1d4). + Thus we use coalesce(..., 0) instead, +*/ +static const char *wsrep_is_on= + "select coalesce(sum(SESSION_VALUE='ON'), 0)" + " from information_schema.SYSTEM_VARIABLES WHERE VARIABLE_NAME='wsrep_on'"; +static const char *wsrep_cannot_replicate_tz= + "select coalesce(sum(GLOBAL_VALUE NOT LIKE @replicate_opt), 0)" + " from information_schema.SYSTEM_VARIABLES WHERE VARIABLE_NAME='wsrep_mode'"; + +int +main(int argc, char **argv) +{ + const char *trunc_tables= ""; + MY_INIT(argv[0]); + + load_defaults_or_exit("my", load_default_groups, &argc, &argv); + default_argv= argv; + + if ((handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(1); + + if ((argc != 1 && argc != 2) || (opt_leap && argc != 1)) + { + print_usage(); + free_allocated_data(); + return 1; + } + + if (argc == 1 && !opt_leap) + trunc_tables= trunc_tables_const; + + printf("set @wsrep_is_on=(%s);\n", wsrep_is_on); + printf("SET STATEMENT SQL_MODE='' FOR " + "SELECT concat('%%', GROUP_CONCAT(OPTION), '%%') INTO @replicate_opt " + " FROM" + " (SELECT DISTINCT concat('REPLICATE_', UPPER(ENGINE)) AS OPTION" + " FROM information_schema.TABLES" + " WHERE TABLE_SCHEMA=DATABASE()" + " AND TABLE_NAME IN ('time_zone'," + " 'time_zone_name'," + " 'time_zone_transition'," + " 'time_zone_transition_type'," + " 'time_zone_leap_second')" + " AND ENGINE in ('MyISAM'," + " 'Aria')) AS o" + " ORDER BY OPTION DESC;\n"); + printf("set @wsrep_cannot_replicate_tz=@wsrep_is_on AND (%s);\n", wsrep_cannot_replicate_tz); + if (opt_skip_write_binlog) + /* We turn off session wsrep if we cannot replicate using galera. + Disable sql_log_bin as the name implies. */ + printf("execute immediate if(@wsrep_is_on, 'SET @save_wsrep_on=@@WSREP_ON, WSREP_ON=OFF', 'do 0');\n" + "SET @save_sql_log_bin=@@SQL_LOG_BIN;\n" + "SET SESSION SQL_LOG_BIN=0;\n" + "SET @wsrep_cannot_replicate_tz=0;\n" + "%s%s;\n", trunc_tables, lock_tables); + else + // Alter time zone tables to InnoDB if wsrep_on is enabled + // to allow changes to them to replicate with Galera + printf( + "execute immediate if(@wsrep_cannot_replicate_tz, " SAVE_ENGINE("time_zone") ", 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, 'ALTER TABLE time_zone ENGINE=InnoDB', 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " SAVE_ENGINE("time_zone_name") ", 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, 'ALTER TABLE time_zone_name ENGINE=InnoDB', 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " SAVE_ENGINE("time_zone_transition") ", 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, 'ALTER TABLE time_zone_transition ENGINE=InnoDB', 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " SAVE_ENGINE("time_zone_transition_type") ", 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, 'ALTER TABLE time_zone_transition_type ENGINE=InnoDB', 'do 0');\n" + "%s" + "/*M!100602 execute immediate if(@wsrep_cannot_replicate_tz, 'start transaction', '%s')*/;\n" + , trunc_tables, lock_tables); + + if (argc == 1 && !opt_leap) + { + /* Argument is timezonedir */ + + root_name_end= strmake_buf(fullname, argv[0]); + + if (scan_tz_dir(root_name_end, 0, opt_verbose)) + { + printf("ROLLBACK;\n"); + fflush(stdout); + fprintf(stderr, + "There were fatal errors during processing " + "of zoneinfo directory '%s'\n", fullname); + return 1; + } + + printf("UNLOCK TABLES;\n" + "COMMIT;\n"); + printf( + "execute immediate if(@wsrep_cannot_replicate_tz, 'do 0'," + "'ALTER TABLE time_zone_transition " + "ORDER BY Time_zone_id, Transition_time');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, 'do 0'," + "'ALTER TABLE time_zone_transition_type " + "ORDER BY Time_zone_id, Transition_type_id');\n"); + } + else + { + /* + First argument is timezonefile. + The second is timezonename if opt_leap is not given + */ + init_alloc_root(PSI_INSTRUMENT_ME, &tz_storage, 32768, 0, MYF(0)); + + if (tz_load(argv[0], &tz_info, &tz_storage)) + { + fflush(stdout); + fprintf(stderr, "Problems with zoneinfo file '%s'\n", argv[0]); + return 1; + } + if (opt_leap) + print_tz_leaps_as_sql(&tz_info); + else + print_tz_as_sql(argv[1], &tz_info); + printf("UNLOCK TABLES;\n" + "COMMIT;\n"); + free_root(&tz_storage, MYF(0)); + } + + if(opt_skip_write_binlog) + printf("SET SESSION SQL_LOG_BIN=@save_sql_log_bin;\n" + "execute immediate if(@wsrep_is_on, 'SET SESSION WSREP_ON=@save_wsrep_on', 'do 0');\n"); + else + // Change back to what it was before + printf( + "execute immediate if(@wsrep_cannot_replicate_tz, " + "concat('ALTER TABLE time_zone ENGINE=', @time_zone_engine), 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " + "concat('ALTER TABLE time_zone_name ENGINE=', @time_zone_name_engine), 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " + "concat('ALTER TABLE time_zone_transition ENGINE=', " + "@time_zone_transition_engine, ', ORDER BY Time_zone_id, Transition_time'), 'do 0');\n" + "execute immediate if(@wsrep_cannot_replicate_tz, " + "concat('ALTER TABLE time_zone_transition_type ENGINE=', " + "@time_zone_transition_type_engine, ', ORDER BY Time_zone_id, Transition_type_id'), 'do 0');\n"); + + free_allocated_data(); + my_end(0); + return 0; +} + +#endif /* defined(TZINFO2SQL) */ + + +#ifdef TESTTIME + +/* + Some simple brute-force test which allowed to catch a pair of bugs. + Also can provide interesting facts about system's time zone support + implementation. +*/ + +#ifndef CHAR_BIT +#define CHAR_BIT 8 +#endif + +#ifndef TYPE_BIT +#define TYPE_BIT(type) (sizeof (type) * CHAR_BIT) +#endif + +#ifndef TYPE_SIGNED +#define TYPE_SIGNED(type) (((type) -1) < 0) +#endif + +my_bool +is_equal_TIME_tm(const TIME* time_arg, const struct tm * tm_arg) +{ + return (time_arg->year == (uint)tm_arg->tm_year+TM_YEAR_BASE) && + (time_arg->month == (uint)tm_arg->tm_mon+1) && + (time_arg->day == (uint)tm_arg->tm_mday) && + (time_arg->hour == (uint)tm_arg->tm_hour) && + (time_arg->minute == (uint)tm_arg->tm_min) && + (time_arg->second == (uint)tm_arg->tm_sec) && + time_arg->second_part == 0; +} + + +int +main(int argc, char **argv) +{ + my_bool localtime_negative; + TIME_ZONE_INFO tz_info; + struct tm tmp; + MYSQL_TIME time_tmp; + time_t t, t1, t2; + char fullname[FN_REFLEN+1]; + char *str_end; + MEM_ROOT tz_storage; + + MY_INIT(argv[0]); + + init_alloc_root(&tz_storage, "timezone_storage", 32768, MYF(0)); + + /* let us set some well known timezone */ + setenv("TZ", "MET", 1); + tzset(); + + /* Some initial time zone related system info */ + printf("time_t: %s %u bit\n", TYPE_SIGNED(time_t) ? "signed" : "unsigned", + (uint)TYPE_BIT(time_t)); + if (TYPE_SIGNED(time_t)) + { + t= -100; + localtime_negative= MY_TEST(localtime_r(&t, &tmp) != 0); + printf("localtime_r %s negative params \ + (time_t=%d is %d-%d-%d %d:%d:%d)\n", + (localtime_negative ? "supports" : "doesn't support"), (int)t, + TM_YEAR_BASE + tmp.tm_year, tmp.tm_mon + 1, tmp.tm_mday, + tmp.tm_hour, tmp.tm_min, tmp.tm_sec); + + printf("mktime %s negative results (%d)\n", + (t == mktime(&tmp) ? "doesn't support" : "supports"), + (int)mktime(&tmp)); + } + + tmp.tm_year= 103; tmp.tm_mon= 2; tmp.tm_mday= 30; + tmp.tm_hour= 2; tmp.tm_min= 30; tmp.tm_sec= 0; tmp.tm_isdst= -1; + t= mktime(&tmp); + printf("mktime returns %s for spring time gap (%d)\n", + (t != (time_t)-1 ? "something" : "error"), (int)t); + + tmp.tm_year= 103; tmp.tm_mon= 8; tmp.tm_mday= 1; + tmp.tm_hour= 0; tmp.tm_min= 0; tmp.tm_sec= 0; tmp.tm_isdst= 0; + t= mktime(&tmp); + printf("mktime returns %s for non existing date (%d)\n", + (t != (time_t)-1 ? "something" : "error"), (int)t); + + tmp.tm_year= 103; tmp.tm_mon= 8; tmp.tm_mday= 1; + tmp.tm_hour= 25; tmp.tm_min=0; tmp.tm_sec=0; tmp.tm_isdst=1; + t= mktime(&tmp); + printf("mktime %s unnormalized input (%d)\n", + (t != (time_t)-1 ? "handles" : "doesn't handle"), (int)t); + + tmp.tm_year= 103; tmp.tm_mon= 9; tmp.tm_mday= 26; + tmp.tm_hour= 0; tmp.tm_min= 30; tmp.tm_sec= 0; tmp.tm_isdst= 1; + mktime(&tmp); + tmp.tm_hour= 2; tmp.tm_isdst= -1; + t= mktime(&tmp); + tmp.tm_hour= 4; tmp.tm_isdst= 0; + mktime(&tmp); + tmp.tm_hour= 2; tmp.tm_isdst= -1; + t1= mktime(&tmp); + printf("mktime is %s (%d %d)\n", + (t == t1 ? "determenistic" : "is non-determenistic"), + (int)t, (int)t1); + + /* Let us load time zone description */ + str_end= strmake_buf(fullname, TZDIR); + strmake(str_end, "/MET", FN_REFLEN - (str_end - fullname)); + + if (tz_load(fullname, &tz_info, &tz_storage)) + { + printf("Unable to load time zone info from '%s'\n", fullname); + free_root(&tz_storage, MYF(0)); + return 1; + } + + printf("Testing our implementation\n"); + + if (TYPE_SIGNED(time_t) && localtime_negative) + { + for (t= -40000; t < 20000; t++) + { + localtime_r(&t, &tmp); + gmt_sec_to_TIME(&time_tmp, (my_time_t)t, &tz_info); + if (!is_equal_TIME_tm(&time_tmp, &tmp)) + { + printf("Problem with negative time_t = %d\n", (int)t); + free_root(&tz_storage, MYF(0)); + return 1; + } + } + printf("gmt_sec_to_TIME = localtime for time_t in [-40000,20000) range\n"); + } + + for (t= 1000000000; t < 1100000000; t+= 13) + { + localtime_r(&t,&tmp); + gmt_sec_to_TIME(&time_tmp, (my_time_t)t, &tz_info); + + if (!is_equal_TIME_tm(&time_tmp, &tmp)) + { + printf("Problem with time_t = %d\n", (int)t); + free_root(&tz_storage, MYF(0)); + return 1; + } + } + printf("gmt_sec_to_TIME = localtime for time_t in [1000000000,1100000000) range\n"); + + my_init_time(); + + /* + Be careful here! my_system_gmt_sec doesn't fully handle unnormalized + dates. + */ + for (time_tmp.year= 1980; time_tmp.year < 2010; time_tmp.year++) + { + for (time_tmp.month= 1; time_tmp.month < 13; time_tmp.month++) + { + for (time_tmp.day= 1; + time_tmp.day < mon_lengths[isleap(time_tmp.year)][time_tmp.month-1]; + time_tmp.day++) + { + for (time_tmp.hour= 0; time_tmp.hour < 24; time_tmp.hour++) + { + for (time_tmp.minute= 0; time_tmp.minute < 60; time_tmp.minute+= 5) + { + for (time_tmp.second=0; time_tmp.second<60; time_tmp.second+=25) + { + long not_used; + uint not_used_2; + t= (time_t)my_system_gmt_sec(&time_tmp, ¬_used, ¬_used_2); + t1= (time_t)TIME_to_gmt_sec(&time_tmp, &tz_info, ¬_used_2); + if (t != t1) + { + /* + We need special handling during autumn since my_system_gmt_sec + prefers greater time_t values (in MET) for ambiguity. + And BTW that is a bug which should be fixed !!! + */ + tmp.tm_year= time_tmp.year - TM_YEAR_BASE; + tmp.tm_mon= time_tmp.month - 1; + tmp.tm_mday= time_tmp.day; + tmp.tm_hour= time_tmp.hour; + tmp.tm_min= time_tmp.minute; + tmp.tm_sec= time_tmp.second; + tmp.tm_isdst= 1; + + t2= mktime(&tmp); + + if (t1 == t2) + continue; + + printf("Problem: %u/%u/%u %u:%u:%u with times t=%d, t1=%d\n", + time_tmp.year, time_tmp.month, time_tmp.day, + time_tmp.hour, time_tmp.minute, time_tmp.second, + (int)t,(int)t1); + + free_root(&tz_storage, MYF(0)); + return 1; + } + } + } + } + } + } + } + + printf("TIME_to_gmt_sec = my_system_gmt_sec for test range\n"); + + free_root(&tz_storage, MYF(0)); + return 0; +} + +#endif /* defined(TESTTIME) */ diff --git a/sql/tztime.h b/sql/tztime.h new file mode 100644 index 00000000..6d8af62e --- /dev/null +++ b/sql/tztime.h @@ -0,0 +1,93 @@ +#ifndef TZTIME_INCLUDED +#define TZTIME_INCLUDED + +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class interface */ +#endif + +#include "my_time.h" /* my_time_t */ +#include "mysql_time.h" /* MYSQL_TIME */ +#include "sql_list.h" /* Sql_alloc */ +#include "sql_string.h" /* String */ + +class THD; + +#if !defined(TESTTIME) && !defined(TZINFO2SQL) + +class THD; + +/** + This class represents abstract time zone and provides + basic interface for MYSQL_TIME <-> my_time_t conversion. + Actual time zones which are specified by DB, or via offset + or use system functions are its descendants. +*/ +class Time_zone: public Sql_alloc +{ +public: + Time_zone() = default; /* Remove gcc warning */ + /** + Converts local time in broken down MYSQL_TIME representation to + my_time_t (UTC seconds since Epoch) represenation. + Returns 0 in case of error. May set error_code to ER_WARN_DATA_OUT_OF_RANGE + or ER_WARN_INVALID_TIMESTAMP, see TIME_to_timestamp()) + */ + virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, + uint *error_code) const = 0; + /** + Converts time in my_time_t representation to local time in + broken down MYSQL_TIME representation. + */ + virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const = 0; + /** + Because of constness of String returned by get_name() time zone name + have to be already zeroended to be able to use String::ptr() instead + of c_ptr(). + */ + virtual const String * get_name() const = 0; + + /** + We need this only for surpressing warnings, objects of this type are + allocated on MEM_ROOT and should not require destruction. + */ + virtual ~Time_zone() = default; + +protected: + static inline void adjust_leap_second(MYSQL_TIME *t); +}; + +extern Time_zone * my_tz_UTC; +extern MYSQL_PLUGIN_IMPORT Time_zone * my_tz_SYSTEM; +extern Time_zone * my_tz_OFFSET0; +extern Time_zone * my_tz_find(THD *thd, const String *name); +extern my_bool my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap); +extern void my_tz_free(); +extern my_time_t sec_since_epoch_TIME(MYSQL_TIME *t); + +/** + Number of elements in table list produced by my_tz_get_table_list() + (this table list contains tables which are needed for dynamical loading + of time zone descriptions). Actually it is imlementation detail that + should not be used anywhere outside of tztime.h and tztime.cc. +*/ + +static const int MY_TZ_TABLES_COUNT= 4; + +#endif /* !defined(TESTTIME) && !defined(TZINFO2SQL) */ +#endif /* TZTIME_INCLUDED */ diff --git a/sql/udf_example.c b/sql/udf_example.c new file mode 100644 index 00000000..14c793ee --- /dev/null +++ b/sql/udf_example.c @@ -0,0 +1,1292 @@ +/* + Copyright (c) 2000, 2014, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* +** example file of UDF (user definable functions) that are dynamicly loaded +** into the standard mysqld core. +** +** The functions name, type and shared library is saved in the new system +** table 'func'. To be able to create new functions one must have write +** privilege for the database 'mysql'. If one starts MySQL with +** --skip-grant, then UDF initialization will also be skipped. +** +** Syntax for the new commands are: +** create function returns {string|real|integer} +** soname +** drop function +** +** Each defined function may have a xxxx_init function and a xxxx_deinit +** function. The init function should alloc memory for the function +** and tell the main function about the max length of the result +** (for string functions), number of decimals (for double functions) and +** if the result may be a null value. +** +** If a function sets the 'error' argument to 1 the function will not be +** called anymore and mysqld will return NULL for all calls to this copy +** of the function. +** +** All strings arguments to functions are given as string pointer + length +** to allow handling of binary data. +** Remember that all functions must be thread safe. This means that one is not +** allowed to alloc any global or static variables that changes! +** If one needs memory one should alloc this in the init function and free +** this on the __deinit function. +** +** Note that the init and __deinit functions are only called once per +** SQL statement while the value function may be called many times +** +** Function 'metaphon' returns a metaphon string of the string argument. +** This is something like a soundex string, but it's more tuned for English. +** +** Function 'myfunc_double' returns summary of codes of all letters +** of arguments divided by summary length of all its arguments. +** +** Function 'myfunc_int' returns summary length of all its arguments. +** +** Function 'udf_sequence' returns an sequence starting from a certain number. +** +** Function 'myfunc_argument_name' returns name of argument. +** +** On the end is a couple of functions that converts hostnames to ip and +** vice versa. +** +** A dynamicly loadable file should be compiled shared. +** (something like: gcc -shared -o my_func.so myfunc.cc). +** You can easily get all switches right by doing: +** cd sql ; make udf_example.o +** Take the compile line that make writes, remove the '-c' near the end of +** the line and add -shared -o udf_example.so to the end of the compile line. +** The resulting library (udf_example.so) should be copied to some dir +** searched by ld. (/usr/lib ?) +** If you are using gcc, then you should be able to create the udf_example.so +** by simply doing 'make udf_example.so'. +** +** After the library is made one must notify mysqld about the new +** functions with the commands: +** +** CREATE FUNCTION metaphon RETURNS STRING SONAME "udf_example.so"; +** CREATE FUNCTION myfunc_double RETURNS REAL SONAME "udf_example.so"; +** CREATE FUNCTION myfunc_int RETURNS INTEGER SONAME "udf_example.so"; +** CREATE FUNCTION udf_sequence RETURNS INTEGER SONAME "udf_example.so"; +** CREATE FUNCTION lookup RETURNS STRING SONAME "udf_example.so"; +** CREATE FUNCTION reverse_lookup RETURNS STRING SONAME "udf_example.so"; +** CREATE AGGREGATE FUNCTION avgcost RETURNS REAL SONAME "udf_example.so"; +** CREATE FUNCTION myfunc_argument_name RETURNS STRING SONAME "udf_example.so"; +** +** After this the functions will work exactly like native MySQL functions. +** Functions should be created only once. +** +** The functions can be deleted by: +** +** DROP FUNCTION metaphon; +** DROP FUNCTION myfunc_double; +** DROP FUNCTION myfunc_int; +** DROP FUNCTION lookup; +** DROP FUNCTION reverse_lookup; +** DROP FUNCTION avgcost; +** DROP FUNCTION myfunc_argument_name; +** +** The CREATE FUNCTION and DROP FUNCTION update the func@mysql table. All +** Active function will be reloaded on every restart of server +** (if --skip-grant-tables is not given) +** +** If you ge problems with undefined symbols when loading the shared +** library, you should verify that mysqld is compiled with the -rdynamic +** option. +** +** If you can't get AGGREGATES to work, check that you have the column +** 'type' in the mysql.func table. If not, run 'mysql_upgrade'. +** +*/ + +#ifdef _WIN32 +/* Silence warning about deprecated functions , gethostbyname etc*/ +#define _WINSOCK_DEPRECATED_NO_WARNINGS +#endif + +#ifdef STANDARD +/* STANDARD is defined, don't use any mysql functions */ +#include +#include +#include +#ifdef _WIN32 +typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ +typedef __int64 longlong; +#else +typedef unsigned long long ulonglong; +typedef long long longlong; +#endif /*_WIN32*/ +#else +#include "mariadb.h" +#include +#if defined(MYSQL_SERVER) +#include /* To get strmov() */ +#else +/* when compiled as standalone */ +#include +#define strmov(a,b) stpcpy(a,b) +#define bzero(a,b) memset(a,0,b) +#endif +#endif +#include +#include + + +#ifdef HAVE_DLOPEN + +#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST) +static pthread_mutex_t LOCK_hostname; +#endif + +/* These must be right or mysqld will not find the symbol! */ + +my_bool metaphon_init(UDF_INIT *initid, UDF_ARGS *args, char *message); +void metaphon_deinit(UDF_INIT *initid); +char *metaphon(UDF_INIT *initid, UDF_ARGS *args, char *result, + unsigned long *length, char *is_null, char *error); +my_bool myfunc_double_init(UDF_INIT *, UDF_ARGS *args, char *message); +double myfunc_double(UDF_INIT *initid, UDF_ARGS *args, char *is_null, + char *error); +my_bool myfunc_int_init(UDF_INIT *initid, UDF_ARGS *args, char *message); +longlong myfunc_int(UDF_INIT *initid, UDF_ARGS *args, char *is_null, + char *error); +my_bool udf_sequence_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + void udf_sequence_deinit(UDF_INIT *initid); +longlong udf_sequence(UDF_INIT *initid, UDF_ARGS *args, char *is_null, + char *error); +my_bool avgcost_init( UDF_INIT* initid, UDF_ARGS* args, char* message ); +void avgcost_deinit( UDF_INIT* initid ); +void avgcost_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +void avgcost_clear( UDF_INIT* initid, char* is_null, char *error ); +void avgcost_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +double avgcost( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +my_bool avg2_init( UDF_INIT* initid, UDF_ARGS* args, char* message ); +void avg2_deinit( UDF_INIT* initid ); +void avg2_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +void avg2_clear( UDF_INIT* initid, char* is_null, char *error ); +void avg2_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +void avg2_remove( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +double avg2( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); +my_bool is_const_init(UDF_INIT *initid, UDF_ARGS *args, char *message); +char *is_const(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long + *length, char *is_null, char *error); + + +/************************************************************************* +** Example of init function +** Arguments: +** initid Points to a structure that the init function should fill. +** This argument is given to all other functions. +** my_bool maybe_null 1 if function can return NULL +** Default value is 1 if any of the arguments +** is declared maybe_null. +** unsigned int decimals Number of decimals. +** Default value is max decimals in any of the +** arguments. +** unsigned int max_length Length of string result. +** The default value for integer functions is 21 +** The default value for real functions is 13+ +** default number of decimals. +** The default value for string functions is +** the longest string argument. +** char *ptr; A pointer that the function can use. +** +** args Points to a structure which contains: +** unsigned int arg_count Number of arguments +** enum Item_result *arg_type Types for each argument. +** Types are STRING_RESULT, REAL_RESULT +** and INT_RESULT. +** char **args Pointer to constant arguments. +** Contains 0 for not constant argument. +** unsigned long *lengths; max string length for each argument +** char *maybe_null Information of which arguments +** may be NULL +** +** message Error message that should be passed to the user on fail. +** The message buffer is MYSQL_ERRMSG_SIZE big, but one should +** try to keep the error message less than 80 bytes long! +** +** This function should return 1 if something goes wrong. In this case +** message should contain something useful! +**************************************************************************/ + +#define MAXMETAPH 8 + +my_bool metaphon_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1 || args->arg_type[0] != STRING_RESULT) + { + strcpy(message,"Wrong arguments to metaphon; Use the source"); + return 1; + } + initid->max_length=MAXMETAPH; + return 0; +} + +/**************************************************************************** +** Deinit function. This should free all resources allocated by +** this function. +** Arguments: +** initid Return value from xxxx_init +****************************************************************************/ + + +void metaphon_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +/*************************************************************************** +** UDF string function. +** Arguments: +** initid Structure filled by xxx_init +** args The same structure as to xxx_init. This structure +** contains values for all parameters. +** Note that the functions MUST check and convert all +** to the type it wants! Null values are represented by +** a NULL pointer +** result Possible buffer to save result. At least 255 byte long. +** length Pointer to length of the above buffer. In this the function +** should save the result length +** is_null If the result is null, one should store 1 here. +** error If something goes fatally wrong one should store 1 here. +** +** This function should return a pointer to the result string. +** Normally this is 'result' but may also be an alloced string. +***************************************************************************/ + +/* Character coding array */ +static char codes[26] = { + 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0 + /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z*/ + }; + +/*--- Macros to access character coding array -------------*/ + +#define ISVOWEL(x) (codes[(x) - 'A'] & 1) /* AEIOU */ + + /* Following letters are not changed */ +#define NOCHANGE(x) (codes[(x) - 'A'] & 2) /* FJLMNR */ + + /* These form diphthongs when preceding H */ +#define AFFECTH(x) (codes[(x) - 'A'] & 4) /* CGPST */ + + /* These make C and G soft */ +#define MAKESOFT(x) (codes[(x) - 'A'] & 8) /* EIY */ + + /* These prevent GH from becoming F */ +#define NOGHTOF(x) (codes[(x) - 'A'] & 16) /* BDH */ + + +char *metaphon(UDF_INIT *initid __attribute__((unused)), + UDF_ARGS *args, char *result, unsigned long *length, + char *is_null, char *error __attribute__((unused))) +{ + const char *word=args->args[0]; + const char *w_end; + char *org_result; + char *n, *n_start, *n_end; /* pointers to string */ + char *metaph_end; /* pointers to end of metaph */ + char ntrans[32]; /* word with uppercase letters */ + int KSflag; /* state flag for X to KS */ + + if (!word) /* Null argument */ + { + /* The length is expected to be zero when the argument is NULL. */ + assert(args->lengths[0] == 0); + *is_null=1; + return 0; + } + + w_end=word+args->lengths[0]; + org_result=result; + + /*-------------------------------------------------------- + * Copy word to internal buffer, dropping non-alphabetic + * characters and converting to uppercase. + *-------------------------------------------------------*/ + + for (n = ntrans + 1, n_end = ntrans + sizeof(ntrans)-2; + word != w_end && n < n_end; word++ ) + if ( isalpha ( *word )) + *n++ = toupper ( *word ); + + if ( n == ntrans + 1 ) /* return empty string if 0 bytes */ + { + *length=0; + return result; + } + n_end = n; /* set n_end to end of string */ + ntrans[0] = 'Z'; /* ntrans[0] should be a neutral char */ + n[0]=n[1]=0; /* pad with nulls */ + n = ntrans + 1; /* assign pointer to start */ + + /*------------------------------------------------------------ + * check for all prefixes: + * PN KN GN AE WR WH and X at start. + *----------------------------------------------------------*/ + + switch ( *n ) { + case 'P': + case 'K': + case 'G': + if ( n[1] == 'N') + *n++ = 0; + break; + case 'A': + if ( n[1] == 'E') + *n++ = 0; + break; + case 'W': + if ( n[1] == 'R' ) + *n++ = 0; + else + if ( *(n + 1) == 'H') + { + n[1] = *n; + *n++ = 0; + } + break; + case 'X': + *n = 'S'; + break; + } + + /*------------------------------------------------------------ + * Now, loop step through string, stopping at end of string + * or when the computed metaph is MAXMETAPH characters long + *----------------------------------------------------------*/ + + KSflag = 0; /* state flag for KS translation */ + + for (metaph_end = result + MAXMETAPH, n_start = n; + n < n_end && result < metaph_end; n++ ) + { + + if ( KSflag ) + { + KSflag = 0; + *result++ = *n; + } + else + { + /* drop duplicates except for CC */ + if ( *( n - 1 ) == *n && *n != 'C' ) + continue; + + /* check for F J L M N R or first letter vowel */ + if ( NOCHANGE ( *n ) || + ( n == n_start && ISVOWEL ( *n ))) + *result++ = *n; + else + switch ( *n ) { + case 'B': /* check for -MB */ + if ( n < n_end || *( n - 1 ) != 'M' ) + *result++ = *n; + break; + + case 'C': /* C = X ("sh" sound) in CH and CIA */ + /* = S in CE CI and CY */ + /* dropped in SCI SCE SCY */ + /* else K */ + if ( *( n - 1 ) != 'S' || + !MAKESOFT ( n[1])) + { + if ( n[1] == 'I' && n[2] == 'A' ) + *result++ = 'X'; + else + if ( MAKESOFT ( n[1])) + *result++ = 'S'; + else + if ( n[1] == 'H' ) + *result++ = (( n == n_start && + !ISVOWEL ( n[2])) || + *( n - 1 ) == 'S' ) ? + (char)'K' : (char)'X'; + else + *result++ = 'K'; + } + break; + + case 'D': /* J before DGE, DGI, DGY, else T */ + *result++ = + ( n[1] == 'G' && + MAKESOFT ( n[2])) ? + (char)'J' : (char)'T'; + break; + + case 'G': /* complicated, see table in text */ + if (( n[1] != 'H' || ISVOWEL ( n[2])) + && ( + n[1] != 'N' || + ( + (n + 1) < n_end && + ( + n[2] != 'E' || + *( n + 3 ) != 'D' + ) + ) + ) + && ( + *( n - 1 ) != 'D' || + !MAKESOFT ( n[1]) + ) + ) + *result++ = + ( MAKESOFT ( *( n + 1 )) && + n[2] != 'G' ) ? + (char)'J' : (char)'K'; + else + if ( n[1] == 'H' && + !NOGHTOF( *( n - 3 )) && + *( n - 4 ) != 'H') + *result++ = 'F'; + break; + + case 'H': /* H if before a vowel and not after */ + /* C, G, P, S, T */ + + if ( !AFFECTH ( *( n - 1 )) && + ( !ISVOWEL ( *( n - 1 )) || + ISVOWEL ( n[1]))) + *result++ = 'H'; + break; + + case 'K': /* K = K, except dropped after C */ + if ( *( n - 1 ) != 'C') + *result++ = 'K'; + break; + + case 'P': /* PH = F, else P = P */ + *result++ = *( n + 1 ) == 'H' + ? (char)'F' : (char)'P'; + break; + case 'Q': /* Q = K (U after Q is already gone */ + *result++ = 'K'; + break; + + case 'S': /* SH, SIO, SIA = X ("sh" sound) */ + *result++ = ( n[1] == 'H' || + ( *(n + 1) == 'I' && + ( n[2] == 'O' || + n[2] == 'A'))) ? + (char)'X' : (char)'S'; + break; + + case 'T': /* TIO, TIA = X ("sh" sound) */ + /* TH = 0, ("th" sound ) */ + if ( *( n + 1 ) == 'I' && ( n[2] == 'O' + || n[2] == 'A') ) + *result++ = 'X'; + else + if ( n[1] == 'H' ) + *result++ = '0'; + else + if ( *( n + 1) != 'C' || n[2] != 'H') + *result++ = 'T'; + break; + + case 'V': /* V = F */ + *result++ = 'F'; + break; + + case 'W': /* only exist if a vowel follows */ + case 'Y': + if ( ISVOWEL ( n[1])) + *result++ = *n; + break; + + case 'X': /* X = KS, except at start */ + if ( n == n_start ) + *result++ = 'S'; + else + { + *result++ = 'K'; /* insert K, then S */ + KSflag = 1; /* this flag will cause S to be + inserted on next pass thru loop */ + } + break; + + case 'Z': + *result++ = 'S'; + break; + } + } + } + *length= (unsigned long) (result - org_result); + return org_result; +} + + +/*************************************************************************** +** UDF double function. +** Arguments: +** initid Structure filled by xxx_init +** args The same structure as to xxx_init. This structure +** contains values for all parameters. +** Note that the functions MUST check and convert all +** to the type it wants! Null values are represented by +** a NULL pointer +** is_null If the result is null, one should store 1 here. +** error If something goes fatally wrong one should store 1 here. +** +** This function should return the result. +***************************************************************************/ + +my_bool myfunc_double_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + uint i; + + if (!args->arg_count) + { + strcpy(message,"myfunc_double must have at least one argument"); + return 1; + } + /* + ** As this function wants to have everything as strings, force all arguments + ** to strings. + */ + for (i=0 ; i < args->arg_count; i++) + args->arg_type[i]=STRING_RESULT; + initid->maybe_null=1; /* The result may be null */ + initid->decimals=2; /* We want 2 decimals in the result */ + initid->max_length=6; /* 3 digits + . + 2 decimals */ + return 0; +} + + +double myfunc_double(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, + char *is_null, char *error __attribute__((unused))) +{ + unsigned long val = 0; + unsigned long v = 0; + uint i, j; + + for (i = 0; i < args->arg_count; i++) + { + if (args->args[i] == NULL) + continue; + val += args->lengths[i]; + for (j=args->lengths[i] ; j-- > 0 ;) + v += args->args[i][j]; + } + if (val) + return (double) v/ (double) val; + *is_null=1; + return 0.0; +} + + +/*************************************************************************** +** UDF long long function. +** Arguments: +** initid Return value from xxxx_init +** args The same structure as to xxx_init. This structure +** contains values for all parameters. +** Note that the functions MUST check and convert all +** to the type it wants! Null values are represented by +** a NULL pointer +** is_null If the result is null, one should store 1 here. +** error If something goes fatally wrong one should store 1 here. +** +** This function should return the result as a long long +***************************************************************************/ + +/* This function returns the sum of all arguments */ + +longlong myfunc_int(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, + char *is_null __attribute__((unused)), + char *error __attribute__((unused))) +{ + longlong val = 0; + uint i; + + for (i = 0; i < args->arg_count; i++) + { + if (args->args[i] == NULL) + continue; + switch (args->arg_type[i]) { + case STRING_RESULT: /* Add string lengths */ + val += args->lengths[i]; + break; + case INT_RESULT: /* Add numbers */ + val += *((longlong*) args->args[i]); + break; + case REAL_RESULT: /* Add numers as longlong */ + val += (longlong) *((double*) args->args[i]); + break; + default: + break; + } + } + return val; +} + +/* + At least one of _init/_deinit is needed unless the server is started + with --allow_suspicious_udfs. +*/ +my_bool myfunc_int_init(UDF_INIT *initid __attribute__((unused)), + UDF_ARGS *args __attribute__((unused)), + char *message __attribute__((unused))) +{ + return 0; +} + +/* + Simple example of how to get a sequences starting from the first argument + or 1 if no arguments have been given +*/ + +my_bool udf_sequence_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count > 1) + { + strmov(message,"This function takes none or 1 argument"); + return 1; + } + if (args->arg_count) + args->arg_type[0]= INT_RESULT; /* Force argument to int */ + + if (!(initid->ptr=(char*) malloc(sizeof(longlong)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + bzero(initid->ptr,sizeof(longlong)); + /* + udf_sequence() is a non-deterministic function : it has different value + even if called with the same arguments. + */ + initid->const_item=0; + return 0; +} + +void udf_sequence_deinit(UDF_INIT *initid) +{ + if (initid->ptr) + free(initid->ptr); +} + +longlong udf_sequence(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, + char *is_null __attribute__((unused)), + char *error __attribute__((unused))) +{ + ulonglong val=0; + if (args->arg_count) + val= *((longlong*) args->args[0]); + return ++*((longlong*) initid->ptr) + val; +} + + +/**************************************************************************** +** Some functions that handles IP and hostname conversions +** The orignal function was from Zeev Suraski. +** +** CREATE FUNCTION lookup RETURNS STRING SONAME "udf_example.so"; +** CREATE FUNCTION reverse_lookup RETURNS STRING SONAME "udf_example.so"; +** +****************************************************************************/ + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#endif + +my_bool lookup_init(UDF_INIT *initid, UDF_ARGS *args, char *message); +void lookup_deinit(UDF_INIT *initid); +char *lookup(UDF_INIT *initid, UDF_ARGS *args, char *result, + unsigned long *length, char *null_value, char *error); +my_bool reverse_lookup_init(UDF_INIT *initid, UDF_ARGS *args, char *message); +void reverse_lookup_deinit(UDF_INIT *initid); +char *reverse_lookup(UDF_INIT *initid, UDF_ARGS *args, char *result, + unsigned long *length, char *null_value, char *error); + + +/**************************************************************************** +** lookup IP for an hostname. +** +** This code assumes that gethostbyname_r exists and inet_ntoa() is thread +** safe (As it is in Solaris) +****************************************************************************/ + + +my_bool lookup_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1 || args->arg_type[0] != STRING_RESULT) + { + strmov(message,"Wrong arguments to lookup; Use the source"); + return 1; + } + initid->max_length=11; + initid->maybe_null=1; +#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST) + (void) pthread_mutex_init(&LOCK_hostname,MY_MUTEX_INIT_SLOW); +#endif + return 0; +} + +void lookup_deinit(UDF_INIT *initid __attribute__((unused))) +{ +#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST) + (void) pthread_mutex_destroy(&LOCK_hostname); +#endif +} + +char *lookup(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, + char *result, unsigned long *res_length, char *null_value, + char *error __attribute__((unused))) +{ + uint length; + char name_buff[256]; + struct hostent *hostent; +#if defined(HAVE_GETHOSTBYADDR_R) && defined(HAVE_SOLARIS_STYLE_GETHOST) + int tmp_errno; + char hostname_buff[2048]; + struct hostent tmp_hostent; +#endif + struct in_addr in; + + if (!args->args[0] || !(length=args->lengths[0])) + { + *null_value=1; + return 0; + } + if (length >= sizeof(name_buff)) + length=sizeof(name_buff)-1; + memcpy(name_buff,args->args[0],length); + name_buff[length]=0; +#if defined(HAVE_GETHOSTBYADDR_R) && defined(HAVE_SOLARIS_STYLE_GETHOST) + if (!(hostent=gethostbyname_r(name_buff,&tmp_hostent,hostname_buff, + sizeof(hostname_buff), &tmp_errno))) + { + *null_value=1; + return 0; + } +#else + pthread_mutex_lock(&LOCK_hostname); + if (!(hostent= gethostbyname((char*) name_buff))) + { + pthread_mutex_unlock(&LOCK_hostname); + *null_value= 1; + return 0; + } + pthread_mutex_unlock(&LOCK_hostname); +#endif + memcpy(&in, *hostent->h_addr_list, sizeof(in.s_addr)); + *res_length= (ulong) (strmov(result, inet_ntoa(in)) - result); + return result; +} + + +/**************************************************************************** +** return hostname for an IP number. +** The functions can take as arguments a string "xxx.xxx.xxx.xxx" or +** four numbers. +****************************************************************************/ + +my_bool reverse_lookup_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count == 1) + args->arg_type[0]= STRING_RESULT; + else if (args->arg_count == 4) + args->arg_type[0]=args->arg_type[1]=args->arg_type[2]=args->arg_type[3]= + INT_RESULT; + else + { + strmov(message, + "Wrong number of arguments to reverse_lookup; Use the source"); + return 1; + } + initid->max_length=32; + initid->maybe_null=1; +#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST) + (void) pthread_mutex_init(&LOCK_hostname,MY_MUTEX_INIT_SLOW); +#endif + return 0; +} + +void reverse_lookup_deinit(UDF_INIT *initid __attribute__((unused))) +{ +#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST) + (void) pthread_mutex_destroy(&LOCK_hostname); +#endif +} + +char *reverse_lookup(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, + char *result, unsigned long *res_length, + char *null_value, char *error __attribute__((unused))) +{ +#if defined(HAVE_GETHOSTBYADDR_R) && defined(HAVE_SOLARIS_STYLE_GETHOST) + char name_buff[256]; + struct hostent tmp_hostent; + int tmp_errno; +#endif + struct hostent *hp; + unsigned long taddr; + uint length; + + if (args->arg_count == 4) + { + if (!args->args[0] || !args->args[1] ||!args->args[2] ||!args->args[3]) + { + *null_value=1; + return 0; + } + sprintf(result,"%d.%d.%d.%d", + (int) *((longlong*) args->args[0]), + (int) *((longlong*) args->args[1]), + (int) *((longlong*) args->args[2]), + (int) *((longlong*) args->args[3])); + } + else + { /* string argument */ + if (!args->args[0]) /* Return NULL for NULL values */ + { + *null_value=1; + return 0; + } + length=args->lengths[0]; + if (length >= (uint) *res_length-1) + length=(uint) *res_length; + memcpy(result,args->args[0],length); + result[length]=0; + } + + taddr = inet_addr(result); + if (taddr == (unsigned long) -1L) + { + *null_value=1; + return 0; + } +#if defined(HAVE_GETHOSTBYADDR_R) && defined(HAVE_SOLARIS_STYLE_GETHOST) + if (!(hp=gethostbyaddr_r((char*) &taddr,sizeof(taddr), AF_INET, + &tmp_hostent, name_buff,sizeof(name_buff), + &tmp_errno))) + { + *null_value=1; + return 0; + } +#else + pthread_mutex_lock(&LOCK_hostname); + if (!(hp= gethostbyaddr((char*) &taddr, sizeof(taddr), AF_INET))) + { + pthread_mutex_unlock(&LOCK_hostname); + *null_value= 1; + return 0; + } + pthread_mutex_unlock(&LOCK_hostname); +#endif + *res_length=(ulong) (strmov(result,hp->h_name) - result); + return result; +} + +/* +** Syntax for the new aggregate commands are: +** create aggregate function returns {string|real|integer} +** soname +** +** Syntax for avgcost: avgcost( t.quantity, t.price ) +** with t.quantity=integer, t.price=double +** (this example is provided by Andreas F. Bobak ) +*/ + + +struct avgcost_data +{ + ulonglong count; + longlong totalquantity; + double totalprice; +}; + + +/* +** Average Cost Aggregate Function. +*/ +my_bool +avgcost_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) +{ + struct avgcost_data* data; + + if (args->arg_count != 2) + { + strcpy( + message, + "wrong number of arguments: AVGCOST() requires two arguments" + ); + return 1; + } + + if ((args->arg_type[0] != INT_RESULT) || (args->arg_type[1] != REAL_RESULT) ) + { + strcpy( + message, + "wrong argument type: AVGCOST() requires an INT and a REAL" + ); + return 1; + } + + /* + ** force arguments to double. + */ + /*args->arg_type[0] = REAL_RESULT; + args->arg_type[1] = REAL_RESULT;*/ + + initid->maybe_null = 0; /* The result may be null */ + initid->decimals = 4; /* We want 4 decimals in the result */ + initid->max_length = 20; /* 6 digits + . + 10 decimals */ + + if (!(data = (struct avgcost_data*) malloc(sizeof(struct avgcost_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->totalquantity = 0; + data->totalprice = 0.0; + + initid->ptr = (char*)data; + + return 0; +} + +void +avgcost_deinit( UDF_INIT* initid ) +{ + free(initid->ptr); +} + + +/* This is only for MySQL 4.0 compatibility */ +void +avgcost_reset(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message) +{ + avgcost_clear(initid, is_null, message); + avgcost_add(initid, args, is_null, message); +} + +/* This is needed to get things to work in MySQL 4.1.1 and above */ + +void +avgcost_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + struct avgcost_data* data = (struct avgcost_data*)initid->ptr; + data->totalprice= 0.0; + data->totalquantity= 0; + data->count= 0; +} + + +void +avgcost_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + if (args->args[0] && args->args[1]) + { + struct avgcost_data* data = (struct avgcost_data*)initid->ptr; + longlong quantity = *((longlong*)args->args[0]); + longlong newquantity = data->totalquantity + quantity; + double price = *((double*)args->args[1]); + + data->count++; + + if ( ((data->totalquantity >= 0) && (quantity < 0)) + || ((data->totalquantity < 0) && (quantity > 0)) ) + { + /* + ** passing from + to - or from - to + + */ + if ( ((quantity < 0) && (newquantity < 0)) + || ((quantity > 0) && (newquantity > 0)) ) + { + data->totalprice = price * (double)newquantity; + } + /* + ** sub q if totalq > 0 + ** add q if totalq < 0 + */ + else + { + price = data->totalprice / (double)data->totalquantity; + data->totalprice = price * (double)newquantity; + } + data->totalquantity = newquantity; + } + else + { + data->totalquantity += quantity; + data->totalprice += price * (double)quantity; + } + + if (data->totalquantity == 0) + data->totalprice = 0.0; + } +} + + +double +avgcost( UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) +{ + struct avgcost_data* data = (struct avgcost_data*)initid->ptr; + if (!data->count || !data->totalquantity) + { + *is_null = 1; + return 0.0; + } + + *is_null = 0; + return data->totalprice/(double)data->totalquantity; +} + + +/* +** Average 2 (number, sum)*/ +struct avg2_data +{ + ulonglong count; + double sum; +}; + + +my_bool +avg2_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) +{ + struct avg2_data* data; + + if (args->arg_count != 2) + { + strcpy( + message, + "wrong number of arguments: AVG2() requires two arguments" + ); + return 1; + } + + if ((args->arg_type[0] != INT_RESULT) || (args->arg_type[1] != REAL_RESULT) ) + { + strcpy( + message, + "wrong argument type: AVG2() requires an INT and a REAL" + ); + return 1; + } + + /* + ** force arguments to double. + */ + /*args->arg_type[0] = REAL_RESULT; + args->arg_type[1] = REAL_RESULT;*/ + + initid->maybe_null = 0; /* The result may be null */ + initid->decimals = 4; /* We want 4 decimals in the result */ + initid->max_length = 20; /* 6 digits + . + 10 decimals */ + + if (!(data = (struct avg2_data*) malloc(sizeof(struct avg2_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->count = 0; + data->sum = 0.0; + + initid->ptr = (char*)data; + + return 0; +} + +void +avg2_deinit( UDF_INIT* initid ) +{ + free(initid->ptr); +} + + +/* This is only for MySQL 4.0 compatibility */ +void +avg2_reset(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message) +{ + avgcost_clear(initid, is_null, message); + avgcost_add(initid, args, is_null, message); +} + +/* This is needed to get things to work in MySQL 4.1.1 and above */ + +void +avg2_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + struct avg2_data* data = (struct avg2_data*)initid->ptr; + data->sum= 0.0; + data->count= 0; +} + + +void +avg2_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + if (args->args[0] && args->args[1]) + { + struct avg2_data* data = (struct avg2_data*)initid->ptr; + longlong quantity = *((longlong*)args->args[0]); + double sum = *((double*)args->args[1]); + + data->count += quantity; + data->sum += sum; + } +} + + +void +avg2_remove(UDF_INIT* initid, UDF_ARGS* args, + char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + if (args->args[0] && args->args[1]) + { + struct avg2_data* data = (struct avg2_data*)initid->ptr; + longlong quantity = *((longlong*)args->args[0]); + double sum = *((double*)args->args[1]); + + data->count -= quantity; + data->sum -= sum; + } +} + + +double +avg2( UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) +{ + struct avg2_data* data = (struct avg2_data*)initid->ptr; + if (!data->count) + { + *is_null = 1; + return 0.0; + } + + *is_null = 0; + return data->sum/(double)data->count; +} + +my_bool myfunc_argument_name_init(UDF_INIT *initid, UDF_ARGS *args, + char *message); +char *myfunc_argument_name(UDF_INIT *initid, UDF_ARGS *args, char *result, + unsigned long *length, char *null_value, + char *error); + +my_bool myfunc_argument_name_init(UDF_INIT *initid, UDF_ARGS *args, + char *message) +{ + if (args->arg_count != 1) + { + strmov(message,"myfunc_argument_name_init accepts only one argument"); + return 1; + } + initid->max_length= args->attribute_lengths[0]; + initid->maybe_null= 1; + initid->const_item= 1; + return 0; +} + +char *myfunc_argument_name(UDF_INIT *initid __attribute__((unused)), + UDF_ARGS *args, char *result, + unsigned long *length, char *null_value, + char *error __attribute__((unused))) +{ + if (!args->attributes[0]) + { + *null_value= 1; + return 0; + } + (*length)--; /* space for ending \0 (for debugging purposes) */ + if (*length > args->attribute_lengths[0]) + *length= args->attribute_lengths[0]; + memcpy(result, args->attributes[0], *length); + result[*length]= 0; + return result; +} + + + +my_bool is_const_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) + { + strmov(message, "IS_CONST accepts only one argument"); + return 1; + } + initid->ptr= (char*)((args->args[0] != NULL) ? (size_t)1 : (size_t)0); + return 0; +} + +char * is_const(UDF_INIT *initid, UDF_ARGS *args __attribute__((unused)), + char *result, unsigned long *length, + char *is_null, char *error __attribute__((unused))) +{ + if (initid->ptr != 0) { + sprintf(result, "const"); + } else { + sprintf(result, "not const"); + } + *is_null= 0; + *length= (uint) strlen(result); + return result; +} + + + +my_bool check_const_len_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) + { + strmov(message, "CHECK_CONST_LEN accepts only one argument"); + return 1; + } + if (args->args[0] == 0) + { + initid->ptr= (char*)"Not constant"; + } + else if(strlen(args->args[0]) == args->lengths[0]) + { + initid->ptr= (char*)"Correct length"; + } + else + { + initid->ptr= (char*)"Wrong length"; + } + initid->max_length = 100; + return 0; +} + +char * check_const_len(UDF_INIT *initid, UDF_ARGS *args __attribute__((unused)), + char *result, unsigned long *length, + char *is_null, char *error __attribute__((unused))) +{ + strmov(result, initid->ptr); + *length= (uint) strlen(result); + *is_null= 0; + return result; +} + + +#endif /* HAVE_DLOPEN */ diff --git a/sql/udf_example.def b/sql/udf_example.def new file mode 100644 index 00000000..903c2b74 --- /dev/null +++ b/sql/udf_example.def @@ -0,0 +1,36 @@ +LIBRARY udf_example +VERSION 1.0 +EXPORTS + lookup + lookup_init + lookup_deinit + reverse_lookup + reverse_lookup_init + reverse_lookup_deinit + metaphon_init + metaphon_deinit + metaphon + myfunc_double_init + myfunc_double + myfunc_int_init + myfunc_int + udf_sequence_init + udf_sequence_deinit + udf_sequence + avgcost_init + avgcost_deinit + avgcost_reset + avgcost_add + avgcost_clear + avgcost + avg2_init + avg2_deinit + avg2_reset + avg2_add + avg2_remove + avg2_clear + avg2 + is_const + is_const_init + check_const_len + check_const_len_init diff --git a/sql/uniques.cc b/sql/uniques.cc new file mode 100644 index 00000000..572d80f0 --- /dev/null +++ b/sql/uniques.cc @@ -0,0 +1,833 @@ +/* Copyright (c) 2001, 2010, Oracle and/or its affiliates. + Copyright (c) 2010, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Function to handle quick removal of duplicates + This code is used when doing multi-table deletes to find the rows in + reference tables that needs to be deleted. + + The basic idea is as follows: + + Store first all strings in a binary tree, ignoring duplicates. + When the tree uses more memory than 'max_heap_table_size', + write the tree (in sorted order) out to disk and start with a new tree. + When all data has been generated, merge the trees (removing any found + duplicates). + + The unique entries will be returned in sort order, to ensure that we do the + deletes in disk order. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_sort.h" +#include "queues.h" // QUEUE +#include "my_tree.h" // element_count +#include "uniques.h" // Unique +#include "sql_sort.h" + +int unique_write_to_file(uchar* key, element_count count, Unique *unique) +{ + /* + Use unique->size (size of element stored in the tree) and not + unique->tree.size_of_element. The latter is different from unique->size + when tree implementation chooses to store pointer to key in TREE_ELEMENT + (instead of storing the element itself there) + */ + return my_b_write(&unique->file, key, unique->size) ? 1 : 0; +} + +int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique) +{ + return my_b_write(&unique->file, key, unique->size) || + my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0; +} + +int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) +{ + memcpy(unique->sort.record_pointers, key, unique->size); + unique->sort.record_pointers+=unique->size; + return 0; +} + +int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique) +{ + if (count >= unique->min_dupl_count) + { + memcpy(unique->sort.record_pointers, key, unique->size); + unique->sort.record_pointers+=unique->size; + } + else + unique->filtered_out_elems++; + return 0; +} + + +Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg) + :max_in_memory_size(max_in_memory_size_arg), + size(size_arg), + elements(0) +{ + my_b_clear(&file); + min_dupl_count= min_dupl_count_arg; + full_size= size; + if (min_dupl_count_arg) + full_size+= sizeof(element_count); + with_counters= MY_TEST(min_dupl_count_arg); + init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func, + NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); + /* If the following fail's the next add will also fail */ + my_init_dynamic_array(PSI_INSTRUMENT_ME, &file_ptrs, sizeof(Merge_chunk), 16, + 16, MYF(MY_THREAD_SPECIFIC)); + /* + If you change the following, change it in get_max_elements function, too. + */ + max_elements= (ulong) (max_in_memory_size / + ALIGN_SIZE(sizeof(TREE_ELEMENT)+size)); + if (!max_elements) + max_elements= 1; + + (void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE, + MYF(MY_WME)); +} + + +/* + Calculate log2(n!) + + NOTES + Stirling's approximate formula is used: + + n! ~= sqrt(2*M_PI*n) * (n/M_E)^n + + Derivation of formula used for calculations is as follows: + + log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) = + + = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2). +*/ + +inline double log2_n_fact(double x) +{ + return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2; +} + + +/* + Calculate cost of merge_buffers function call for given sequence of + input stream lengths and store the number of rows in result stream in *last. + + SYNOPSIS + get_merge_buffers_cost() + buff_elems Array of #s of elements in buffers + elem_size Size of element stored in buffer + first Pointer to first merged element size + last Pointer to last merged element size + + RETURN + Cost of merge_buffers operation in disk seeks. + + NOTES + It is assumed that no rows are eliminated during merge. + The cost is calculated as + + cost(read_and_write) + cost(merge_comparisons). + + All bytes in the sequences is read and written back during merge so cost + of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write) + + For comparisons cost calculations we assume that all merged sequences have + the same length, so each of total_buf_size elements will be added to a sort + heap with (n_buffers-1) elements. This gives the comparison cost: + + total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID; +*/ + +static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, + uint *first, uint *last, + double compare_factor) +{ + uint total_buf_elems= 0; + for (uint *pbuf= first; pbuf <= last; pbuf++) + total_buf_elems+= *pbuf; + *last= total_buf_elems; + + size_t n_buffers= last - first + 1; + + /* Using log2(n)=log(n)/log(2) formula */ + return 2*((double)total_buf_elems*elem_size) / IO_SIZE + + total_buf_elems*log((double) n_buffers) / (compare_factor * M_LN2); +} + + +/* + Calculate cost of merging buffers into one in Unique::get, i.e. calculate + how long (in terms of disk seeks) the two calls + merge_many_buffs(...); + merge_buffers(...); + will take. + + SYNOPSIS + get_merge_many_buffs_cost() + buffer buffer space for temporary data, at least + Unique::get_cost_calc_buff_size bytes + maxbuffer # of full buffers + max_n_elems # of elements in first maxbuffer buffers + last_n_elems # of elements in last buffer + elem_size size of buffer element + + NOTES + maxbuffer+1 buffers are merged, where first maxbuffer buffers contain + max_n_elems elements each and last buffer contains last_n_elems elements. + + The current implementation does a dumb simulation of merge_many_buffs + function actions. + + RETURN + Cost of merge in disk seeks. +*/ + +static double get_merge_many_buffs_cost(uint *buffer, + uint maxbuffer, uint max_n_elems, + uint last_n_elems, int elem_size, + double compare_factor) +{ + int i; + double total_cost= 0.0; + uint *buff_elems= buffer; /* #s of elements in each of merged sequences */ + + /* + Set initial state: first maxbuffer sequences contain max_n_elems elements + each, last sequence contains last_n_elems elements. + */ + for (i = 0; i < (int)maxbuffer; i++) + buff_elems[i]= max_n_elems; + buff_elems[maxbuffer]= last_n_elems; + + /* + Do it exactly as merge_many_buff function does, calling + get_merge_buffers_cost to get cost of merge_buffers. + */ + if (maxbuffer >= MERGEBUFF2) + { + while (maxbuffer >= MERGEBUFF2) + { + uint lastbuff= 0; + for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF) + { + total_cost+=get_merge_buffers_cost(buff_elems, elem_size, + buff_elems + i, + buff_elems + i + MERGEBUFF-1, + compare_factor); + lastbuff++; + } + total_cost+=get_merge_buffers_cost(buff_elems, elem_size, + buff_elems + i, + buff_elems + maxbuffer, + compare_factor); + maxbuffer= lastbuff; + } + } + + /* Simulate final merge_buff call. */ + total_cost += get_merge_buffers_cost(buff_elems, elem_size, + buff_elems, buff_elems + maxbuffer, + compare_factor); + return total_cost; +} + + +/* + Calculate cost of using Unique for processing nkeys elements of size + key_size using max_in_memory_size memory. + + SYNOPSIS + Unique::get_use_cost() + buffer space for temporary data, use Unique::get_cost_calc_buff_size + to get # bytes needed. + nkeys #of elements in Unique + key_size size of each elements in bytes + max_in_memory_size amount of memory Unique will be allowed to use + compare_factor used to calculate cost of one comparison + write_fl if the result must be saved written to disk + in_memory_elems OUT estimate of the number of elements in memory + if disk is not used + + RETURN + Cost in disk seeks. + + NOTES + cost(using_unqiue) = + cost(create_trees) + (see #1) + cost(merge) + (see #2) + cost(read_result) (see #3) + + 1. Cost of trees creation + For each Unique::put operation there will be 2*log2(n+1) elements + comparisons, where n runs from 1 tree_size (we assume that all added + elements are different). Together this gives: + + n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) + + then cost(tree_creation) = n_compares*ROWID_COMPARE_COST; + + Total cost of creating trees: + (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost. + + Approximate value of log2(N!) is calculated by log2_n_fact function. + + 2. Cost of merging. + If only one tree is created by Unique no merging will be necessary. + Otherwise, we model execution of merge_many_buff function and count + #of merges. (The reason behind this is that number of buffers is small, + while size of buffers is big and we don't want to loose precision with + O(x)-style formula) + + 3. If only one tree is created by Unique no disk io will happen. + Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume + these will be random seeks. +*/ + +double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, + size_t max_in_memory_size, + double compare_factor, + bool intersect_fl, bool *in_memory) +{ + size_t max_elements_in_tree; + size_t last_tree_elems; + size_t n_full_trees; /* number of trees in unique - 1 */ + double result; + + max_elements_in_tree= ((size_t) max_in_memory_size / + ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size)); + + if (max_elements_in_tree == 0) + max_elements_in_tree= 1; + + n_full_trees= nkeys / max_elements_in_tree; + last_tree_elems= nkeys % max_elements_in_tree; + + /* Calculate cost of creating trees */ + result= 2*log2_n_fact(last_tree_elems + 1.0); + if (n_full_trees) + result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0); + result /= compare_factor; + + DBUG_PRINT("info",("unique trees sizes: %u=%u*%u + %u", (uint)nkeys, + (uint)n_full_trees, + (uint)(n_full_trees?max_elements_in_tree:0), + (uint)last_tree_elems)); + + if (in_memory) + *in_memory= !n_full_trees; + + if (!n_full_trees) + return result; + + /* + There is more then one tree and merging is necessary. + First, add cost of writing all trees to disk, assuming that all disk + writes are sequential. + */ + result += DISK_SEEK_BASE_COST * n_full_trees * + ceil(((double) key_size)*max_elements_in_tree / IO_SIZE); + result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE); + + /* Cost of merge */ + if (intersect_fl) + key_size+= sizeof(element_count); + double merge_cost= get_merge_many_buffs_cost(buffer, (uint)n_full_trees, + (uint)max_elements_in_tree, + (uint)last_tree_elems, key_size, + compare_factor); + result += merge_cost; + /* + Add cost of reading the resulting sequence, assuming there were no + duplicate elements. + */ + result += ceil((double)key_size*nkeys/IO_SIZE); + + return result; +} + +Unique::~Unique() +{ + close_cached_file(&file); + delete_tree(&tree, 0); + delete_dynamic(&file_ptrs); +} + + + /* Write tree to disk; clear tree */ +bool Unique::flush() +{ + Merge_chunk file_ptr; + elements+= tree.elements_in_tree; + file_ptr.set_rowcount(tree.elements_in_tree); + file_ptr.set_file_position(my_b_tell(&file)); + + tree_walk_action action= min_dupl_count ? + (tree_walk_action) unique_write_to_file_with_count : + (tree_walk_action) unique_write_to_file; + if (tree_walk(&tree, action, + (void*) this, left_root_right) || + insert_dynamic(&file_ptrs, (uchar*) &file_ptr)) + return 1; + delete_tree(&tree, 0); + return 0; +} + + +/* + Clear the tree and the file. + You must call reset() if you want to reuse Unique after walk(). +*/ + +void +Unique::reset() +{ + reset_tree(&tree); + /* + If elements != 0, some trees were stored in the file (see how + flush() works). Note, that we can not count on my_b_tell(&file) == 0 + here, because it can return 0 right after walk(), and walk() does not + reset any Unique member. + */ + if (elements) + { + reset_dynamic(&file_ptrs); + reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1); + } + my_free(sort.record_pointers); + elements= 0; + tree.flag= 0; + sort.record_pointers= 0; +} + +/* + The comparison function, passed to queue_init() in merge_walk() and in + merge_buffers() when the latter is called from Uniques::get() must + use comparison function of Uniques::tree, but compare members of struct + BUFFPEK. +*/ + +C_MODE_START + +static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2) +{ + BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg; + return ctx->key_compare(ctx->key_compare_arg, + *((uchar **) key_ptr1), *((uchar **)key_ptr2)); +} + +C_MODE_END + + +inline +element_count get_counter_from_merged_element(void *ptr, uint ofs) +{ + element_count cnt; + memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count)); + return cnt; +} + + +inline +void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt) +{ + memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count)); +} + + +/* + DESCRIPTION + + Function is very similar to merge_buffers, but instead of writing sorted + unique keys to the output file, it invokes walk_action for each key. + This saves I/O if you need to pass through all unique keys only once. + + SYNOPSIS + merge_walk() + All params are 'IN' (but see comment for begin, end): + merge_buffer buffer to perform cached piece-by-piece loading + of trees; initially the buffer is empty + merge_buffer_size size of merge_buffer. Must be aligned with + key_length + key_length size of tree element; key_length * (end - begin) + must be less or equal than merge_buffer_size. + begin pointer to BUFFPEK struct for the first tree. + end pointer to BUFFPEK struct for the last tree; + end > begin and [begin, end) form a consecutive + range. BUFFPEKs structs in that range are used and + overwritten in merge_walk(). + walk_action element visitor. Action is called for each unique + key. + walk_action_arg argument to walk action. Passed to it on each call. + compare elements comparison function + compare_arg comparison function argument + file file with all trees dumped. Trees in the file + must contain sorted unique values. Cache must be + initialized in read mode. + with counters take into account counters for equal merged + elements + RETURN VALUE + 0 ok + <> 0 error +*/ + +static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, + uint key_length, Merge_chunk *begin, Merge_chunk *end, + tree_walk_action walk_action, void *walk_action_arg, + qsort_cmp2 compare, void *compare_arg, + IO_CACHE *file, bool with_counters) +{ + BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; + QUEUE queue; + if (end <= begin || + merge_buffer_size < (size_t) (key_length * (end - begin + 1)) || + init_queue(&queue, (uint) (end - begin), + offsetof(Merge_chunk, m_current_key), 0, + buffpek_compare, &compare_context, 0, 0)) + return 1; + /* we need space for one key when a piece of merge buffer is re-read */ + merge_buffer_size-= key_length; + uchar *save_key_buff= merge_buffer + merge_buffer_size; + uint max_key_count_per_piece= (uint) (merge_buffer_size/(end-begin) / + key_length); + /* if piece_size is aligned reuse_freed_buffer will always hit */ + uint piece_size= max_key_count_per_piece * key_length; + ulong bytes_read; /* to hold return value of read_to_buffer */ + Merge_chunk *top; + int res= 1; + uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); + element_count cnt; + + // read_to_buffer() needs only rec_length. + Sort_param sort_param; + sort_param.rec_length= key_length; + DBUG_ASSERT(!sort_param.using_addon_fields()); + + /* + Invariant: queue must contain top element from each tree, until a tree + is not completely walked through. + Here we're forcing the invariant, inserting one element from each tree + to the queue. + */ + for (top= begin; top != end; ++top) + { + top->set_buffer(merge_buffer + (top - begin) * piece_size, + merge_buffer + (top - begin) * piece_size + piece_size); + top->set_max_keys(max_key_count_per_piece); + bytes_read= read_to_buffer(file, top, &sort_param, false); + if (unlikely(bytes_read == (ulong) -1)) + goto end; + DBUG_ASSERT(bytes_read); + queue_insert(&queue, (uchar *) top); + } + top= (Merge_chunk *) queue_top(&queue); + while (queue.elements > 1) + { + /* + Every iteration one element is removed from the queue, and one is + inserted by the rules of the invariant. If two adjacent elements on + the top of the queue are not equal, biggest one is unique, because all + elements in each tree are unique. Action is applied only to unique + elements. + */ + void *old_key= top->current_key(); + /* + read next key from the cache or from the file and push it to the + queue; this gives new top. + */ + top->advance_current_key(key_length); + top->decrement_mem_count(); + if (top->mem_count()) + queue_replace_top(&queue); + else /* next piece should be read */ + { + /* save old_key not to overwrite it in read_to_buffer */ + memcpy(save_key_buff, old_key, key_length); + old_key= save_key_buff; + bytes_read= read_to_buffer(file, top, &sort_param, false); + if (unlikely(bytes_read == (ulong) -1)) + goto end; + else if (bytes_read) /* top->key, top->mem_count are reset */ + queue_replace_top(&queue); /* in read_to_buffer */ + else + { + /* + Tree for old 'top' element is empty: remove it from the queue and + give all its memory to the nearest tree. + */ + queue_remove_top(&queue); + reuse_freed_buff(&queue, top, key_length); + } + } + top= (Merge_chunk *) queue_top(&queue); + /* new top has been obtained; if old top is unique, apply the action */ + if (compare(compare_arg, old_key, top->current_key())) + { + cnt= with_counters ? + get_counter_from_merged_element(old_key, cnt_ofs) : 1; + if (walk_action(old_key, cnt, walk_action_arg)) + goto end; + } + else if (with_counters) + { + cnt= get_counter_from_merged_element(top->current_key(), cnt_ofs); + cnt+= get_counter_from_merged_element(old_key, cnt_ofs); + put_counter_into_merged_element(top->current_key(), cnt_ofs, cnt); + } + } + /* + Applying walk_action to the tail of the last tree: this is safe because + either we had only one tree in the beginning, either we work with the + last tree in the queue. + */ + do + { + do + { + + cnt= with_counters ? + get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1; + if (walk_action(top->current_key(), cnt, walk_action_arg)) + goto end; + top->advance_current_key(key_length); + } + while (top->decrement_mem_count()); + bytes_read= read_to_buffer(file, top, &sort_param, false); + if (unlikely(bytes_read == (ulong) -1)) + goto end; + } + while (bytes_read); + res= 0; +end: + delete_queue(&queue); + return res; +} + + +/* + DESCRIPTION + Walks consecutively through all unique elements: + if all elements are in memory, then it simply invokes 'tree_walk', else + all flushed trees are loaded to memory piece-by-piece, pieces are + sorted, and action is called for each unique value. + Note: so as merging resets file_ptrs state, this method can change + internal Unique state to undefined: if you want to reuse Unique after + walk() you must call reset() first! + SYNOPSIS + Unique:walk() + All params are 'IN': + table parameter for the call of the merge method + action function-visitor, typed in include/my_tree.h + function is called for each unique element + arg argument for visitor, which is passed to it on each call + RETURN VALUE + 0 OK + <> 0 error + */ + +bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) +{ + int res= 0; + uchar *merge_buffer; + + if (elements == 0) /* the whole tree is in memory */ + return tree_walk(&tree, action, walk_action_arg, left_root_right); + + sort.return_rows= elements+tree.elements_in_tree; + /* flush current tree to the file to have some memory for merge buffer */ + if (flush()) + return 1; + if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0)) + return 1; + /* + merge_buffer must fit at least MERGEBUFF2 + 1 keys, because + merge_index() can merge that many BUFFPEKs at once. The extra space for one key + is needed when a piece of merge buffer is re-read, see merge_walk() + */ + size_t buff_sz= MY_MAX(MERGEBUFF2+1, max_in_memory_size/full_size+1) * full_size; + if (!(merge_buffer = (uchar *)my_malloc(key_memory_Unique_merge_buffer, + buff_sz, MYF(MY_THREAD_SPECIFIC|MY_WME)))) + return 1; + if (buff_sz < full_size * (file_ptrs.elements + 1UL)) + res= merge(table, merge_buffer, buff_sz, + buff_sz >= full_size * MERGEBUFF2) ; + + if (!res) + { + res= merge_walk(merge_buffer, buff_sz, full_size, + (Merge_chunk *) file_ptrs.buffer, + (Merge_chunk *) file_ptrs.buffer + file_ptrs.elements, + action, walk_action_arg, + tree.compare, tree.custom_arg, &file, with_counters); + } + my_free(merge_buffer); + return res; +} + + +/* + DESCRIPTION + + Perform multi-pass sort merge of the elements using the buffer buff as + the merge buffer. The last pass is not performed if without_last_merge is + TRUE. + + SYNOPSIS + Unique:merge() + All params are 'IN': + table the parameter to access sort context + buff merge buffer + buff_size size of merge buffer + without_last_merge TRUE <=> do not perform the last merge + RETURN VALUE + 0 OK + <> 0 error + */ + +bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, + bool without_last_merge) +{ + IO_CACHE *outfile= &sort.io_cache; + Merge_chunk *file_ptr= (Merge_chunk*) file_ptrs.buffer; + uint maxbuffer= (uint)file_ptrs.elements - 1; + my_off_t save_pos; + bool error= 1; + Sort_param sort_param; + + /* Open cached file for table records if it isn't open */ + if (! my_b_inited(outfile) && + open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER, + MYF(MY_WME))) + return 1; + + bzero((char*) &sort_param,sizeof(sort_param)); + sort_param.max_rows= elements; + sort_param.sort_form= table; + sort_param.rec_length= sort_param.sort_length= sort_param.ref_length= + full_size; + sort_param.min_dupl_count= min_dupl_count; + sort_param.res_length= 0; + sort_param.max_keys_per_buffer= + (uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2); + sort_param.not_killable= 1; + + sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer * + sort_param.sort_length); + + sort_param.compare= (qsort2_cmp) buffpek_compare; + sort_param.cmp_context.key_compare= tree.compare; + sort_param.cmp_context.key_compare_arg= tree.custom_arg; + + /* + We need to remove the size allocated for the unique buffer. + The sort_buffer_size is: + MY_MAX(MERGEBUFF2+1, max_in_memory_size/full_size+1) * full_size; + */ + buff_size-= full_size; + + /* Merge the buffers to one file, removing duplicates */ + if (merge_many_buff(&sort_param, + Bounds_checked_array(buff, buff_size), + file_ptr,&maxbuffer,&file)) + goto err; + if (flush_io_cache(&file) || + reinit_io_cache(&file,READ_CACHE,0L,0,0)) + goto err; + sort_param.res_length= sort_param.rec_length- + (min_dupl_count ? sizeof(min_dupl_count) : 0); + if (without_last_merge) + { + file_ptrs.elements= maxbuffer+1; + return 0; + } + if (merge_index(&sort_param, Bounds_checked_array(buff, buff_size), + file_ptr, maxbuffer, &file, outfile)) + goto err; + error= 0; +err: + if (flush_io_cache(outfile)) + error= 1; + + /* Setup io_cache for reading */ + save_pos= outfile->pos_in_file; + if (reinit_io_cache(outfile,READ_CACHE,0L,0,0)) + error= 1; + outfile->end_of_file=save_pos; + return error; +} + + +/* + Allocate memory that can be used with init_records() so that + rows will be read in priority order. +*/ + +bool Unique::get(TABLE *table) +{ + bool rc= 1; + uchar *sort_buffer= NULL; + sort.return_rows= elements+tree.elements_in_tree; + DBUG_ENTER("Unique::get"); + + if (my_b_tell(&file) == 0) + { + /* Whole tree is in memory; Don't use disk if you don't need to */ + if ((sort.record_pointers= (uchar*) + my_malloc(key_memory_Filesort_info_record_pointers, + size * tree.elements_in_tree, MYF(MY_THREAD_SPECIFIC)))) + { + uchar *save_record_pointers= sort.record_pointers; + tree_walk_action action= min_dupl_count ? + (tree_walk_action) unique_intersect_write_to_ptrs : + (tree_walk_action) unique_write_to_ptrs; + filtered_out_elems= 0; + (void) tree_walk(&tree, action, + this, left_root_right); + /* Restore record_pointers that was changed in by 'action' above */ + sort.record_pointers= save_record_pointers; + sort.return_rows-= filtered_out_elems; + DBUG_RETURN(0); + } + } + /* Not enough memory; Save the result to file && free memory used by tree */ + if (flush()) + DBUG_RETURN(1); + /* + merge_buffer must fit at least MERGEBUFF2 + 1 keys, because + merge_index() can merge that many BUFFPEKs at once. The extra space for + one key for Sort_param::unique_buff + */ + size_t buff_sz= MY_MAX(MERGEBUFF2+1, max_in_memory_size/full_size+1) * full_size; + + if (!(sort_buffer= (uchar*) my_malloc(key_memory_Unique_sort_buffer, buff_sz, + MYF(MY_THREAD_SPECIFIC|MY_WME)))) + DBUG_RETURN(1); + + if (merge(table, sort_buffer, buff_sz, FALSE)) + goto err; + rc= 0; + +err: + my_free(sort_buffer); + DBUG_RETURN(rc); +} diff --git a/sql/uniques.h b/sql/uniques.h new file mode 100644 index 00000000..7e12a391 --- /dev/null +++ b/sql/uniques.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2016 MariaDB corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef UNIQUE_INCLUDED +#define UNIQUE_INCLUDED + +#include "filesort.h" + +/* + Unique -- class for unique (removing of duplicates). + Puts all values to the TREE. If the tree becomes too big, + it's dumped to the file. User can request sorted values, or + just iterate through them. In the last case tree merging is performed in + memory simultaneously with iteration, so it should be ~2-3x faster. + */ + +class Unique :public Sql_alloc +{ + DYNAMIC_ARRAY file_ptrs; + ulong max_elements; /* Total number of elements that will be stored in-memory */ + size_t max_in_memory_size; + IO_CACHE file; + TREE tree; + /* Number of elements filtered out due to min_dupl_count when storing results + to table. See Unique::get */ + ulong filtered_out_elems; + uint size; + + uint full_size; /* Size of element + space needed to store the number of + duplicates found for the element. */ + uint min_dupl_count; /* Minimum number of occurences of element required for + it to be written to record_pointers. + always 0 for unions, > 0 for intersections */ + bool with_counters; + + bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge); + bool flush(); + +public: + ulong elements; + SORT_INFO sort; + Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg= 0); + ~Unique(); + ulong elements_in_tree() { return tree.elements_in_tree; } + inline bool unique_add(void *ptr) + { + DBUG_ENTER("unique_add"); + DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements)); + if (!(tree.flag & TREE_ONLY_DUPS) && + tree.elements_in_tree >= max_elements && flush()) + DBUG_RETURN(1); + DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); + } + + bool is_in_memory() { return (my_b_tell(&file) == 0); } + void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; } + + bool get(TABLE *table); + + /* Cost of searching for an element in the tree */ + inline static double get_search_cost(ulonglong tree_elems, + double compare_factor) + { + return log((double) tree_elems) / (compare_factor * M_LN2); + } + + static double get_use_cost(uint *buffer, size_t nkeys, uint key_size, + size_t max_in_memory_size, double compare_factor, + bool intersect_fl, bool *in_memory); + inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size, + size_t max_in_memory_size) + { + size_t max_elems_in_tree= + max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size); + + if (max_elems_in_tree == 0) + max_elems_in_tree= 1; + return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree)); + } + + void reset(); + bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg); + + uint get_size() const { return size; } + size_t get_max_in_memory_size() const { return max_in_memory_size; } + + friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); + friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); + + friend int unique_write_to_file_with_count(uchar* key, element_count count, + Unique *unique); + friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, + Unique *unique); +}; + +#endif /* UNIQUE_INCLUDED */ diff --git a/sql/unireg.cc b/sql/unireg.cc new file mode 100644 index 00000000..ea0b94eb --- /dev/null +++ b/sql/unireg.cc @@ -0,0 +1,1259 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2009, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + Functions to create a unireg form-file from a FIELD and a fieldname-fieldinfo + struct. + In the following functions FIELD * is an ordinary field-structure with + the following exeptions: + sc_length,typepos,row,kol,dtype,regnr and field need not to be set. + str is a (long) to record position where 0 is the first position. +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_partition.h" // struct partition_info +#include "sql_class.h" // THD, Internal_error_handler +#include "create_options.h" +#include "discover.h" +#include + +#define FCOMP 17 /* Bytes for a packed field */ + +/* threshold for safe_alloca */ +#define ALLOCA_THRESHOLD 2048 + +static uint pack_keys(uchar *,uint, KEY *, ulong, uint); +static bool pack_header(THD *, uchar *, List &, HA_CREATE_INFO *, + ulong, handler *); +static bool pack_vcols(THD *thd, String *, + List &, List *); +static uint get_interval_id(uint *,List &, Create_field *); +static bool pack_fields(uchar **, List &, HA_CREATE_INFO*, + ulong); +static size_t packed_fields_length(List &); +static bool make_empty_rec(THD *, uchar *, uint, List &, uint, + ulong); + +/* + write the length as + if ( 0 < length <= 255) one byte + if (256 < length <= 65535) zero byte, then two bytes, low-endian +*/ +static uchar *extra2_write_len(uchar *pos, size_t len) +{ + DBUG_ASSERT(len); + if (len <= 255) + *pos++= (uchar)len; + else + { + /* + At the moment we support options_len up to 64K. + We can easily extend it in the future, if the need arises. + */ + DBUG_ASSERT(len <= 65535); + int2store(pos + 1, len); + pos+= 3; + } + return pos; +} + +static uchar* extra2_write_str(uchar *pos, const LEX_CSTRING &str) +{ + pos= extra2_write_len(pos, str.length); + memcpy(pos, str.str, str.length); + return pos + str.length; +} + +static uchar* extra2_write_str(uchar *pos, const Binary_string *str) +{ + pos= extra2_write_len(pos, str->length()); + memcpy(pos, str->ptr(), str->length()); + return pos + str->length(); +} + +static uchar *extra2_write(uchar *pos, enum extra2_frm_value_type type, + const LEX_CSTRING &str) +{ + *pos++ = type; + return extra2_write_str(pos, str); +} + +static uchar *extra2_write(uchar *pos, enum extra2_frm_value_type type, + const LEX_CUSTRING &str) +{ + return extra2_write(pos, type, *reinterpret_cast(&str)); +} + +static uchar *extra2_write_field_properties(uchar *pos, + List &create_fields) +{ + List_iterator it(create_fields); + *pos++= EXTRA2_FIELD_FLAGS; + /* + always first 2 for field visibility + */ + pos= extra2_write_len(pos, create_fields.elements); + while (Create_field *cf= it++) + { + uchar flags= cf->invisible; + if (cf->flags & VERS_UPDATE_UNVERSIONED_FLAG) + flags|= VERS_OPTIMIZED_UPDATE; + *pos++= flags; + } + return pos; +} + +static uchar *extra2_write_index_properties(uchar *pos, const KEY *keyinfo, + uint keys) +{ + *pos++= EXTRA2_INDEX_FLAGS; + pos= extra2_write_len(pos, keys); + for (uint i=0; i < keys; i++) + { + *pos++= keyinfo[i].is_ignored ? + EXTRA2_IGNORED_KEY : + EXTRA2_DEFAULT_INDEX_FLAGS; + } + return pos; +} + +static field_index_t +get_fieldno_by_name(HA_CREATE_INFO *create_info, + List &create_fields, + const Lex_ident &field_name) +{ + List_iterator it(create_fields); + Create_field *sql_field = NULL; + + DBUG_ASSERT(field_name); + + for (field_index_t field_no= 0; (sql_field = it++); ++field_no) + { + if (field_name.streq(sql_field->field_name)) + { + DBUG_ASSERT(field_no < NO_CACHED_FIELD_INDEX); + return field_no; + } + } + + DBUG_ASSERT(0); /* Not Reachable */ + return NO_CACHED_FIELD_INDEX; +} + +static inline +bool has_extra2_field_flags(List &create_fields) +{ + List_iterator it(create_fields); + while (Create_field *f= it++) + { + if (f->invisible) + return true; + if (f->flags & VERS_UPDATE_UNVERSIONED_FLAG) + return true; + } + return false; +} + +static uint gis_field_options_image(uchar *buff, + List &create_fields) +{ + uint image_size= 0; + List_iterator it(create_fields); + Create_field *field; + while ((field= it++)) + { + if (field->real_field_type() != MYSQL_TYPE_GEOMETRY) + continue; + uchar *cbuf= buff ? buff + image_size : NULL; + image_size+= field->type_handler()-> + Column_definition_gis_options_image(cbuf, *field); + } + return image_size; +} + + +class Field_data_type_info_image: public BinaryStringBuffer<512> +{ + static uchar *store_length(uchar *pos, ulonglong length) + { + return net_store_length(pos, length); + } + static uchar *store_string(uchar *pos, const Binary_string *str) + { + pos= store_length(pos, str->length()); + memcpy(pos, str->ptr(), str->length()); + return pos + str->length(); + } + static uint store_length_required_length(ulonglong length) + { + return net_length_size(length); + } +public: + Field_data_type_info_image() { } + bool append(uint fieldnr, const Column_definition &def) + { + BinaryStringBuffer<64> type_info; + if (def.type_handler()-> + Column_definition_data_type_info_image(&type_info, def) || + type_info.length() > 0xFFFF/*Some reasonable limit*/) + return true; // Error + if (!type_info.length()) + return false; + size_t need_length= store_length_required_length(fieldnr) + + store_length_required_length(type_info.length()) + + type_info.length(); + if (reserve(need_length)) + return true; // Error + uchar *pos= (uchar *) end(); + pos= store_length(pos, fieldnr); + pos= store_string(pos, &type_info); + size_t new_length= (const char *) pos - ptr(); + DBUG_ASSERT(new_length < alloced_length()); + length((uint32) new_length); + return false; + } + bool append(List &fields) + { + uint fieldnr= 0; + Create_field *field; + List_iterator it(fields); + for (field= it++; field; field= it++, fieldnr++) + { + if (append(fieldnr, *field)) + return true; // Error + } + return false; + } +}; + + +/** + Create a frm (table definition) file + + @param thd Thread handler + @param table Name of table + @param create_info create info parameters + @param create_fields Fields to create + @param keys number of keys to create + @param key_info Keys to create + @param db_file Handler to use. + + @return the generated frm image as a LEX_CUSTRING, + or null LEX_CUSTRING (str==0) in case of an error. +*/ + +LEX_CUSTRING build_frm_image(THD *thd, const LEX_CSTRING &table, + HA_CREATE_INFO *create_info, + List &create_fields, + uint keys, KEY *key_info, handler *db_file) +{ + LEX_CSTRING str_db_type; + uint reclength, key_info_length, i; + ulong key_buff_length; + size_t filepos; + ulong data_offset; + uint options_len; + uint gis_extra2_len= 0; + size_t period_info_len= create_info->period_info.name + ? extra2_str_size(create_info->period_info.name.length) + + extra2_str_size(create_info->period_info.constr->name.length) + + 2 * frm_fieldno_size + : 0; + size_t without_overlaps_len= frm_keyno_size * (create_info->period_info.unique_keys + 1); + uint e_unique_hash_extra_parts= 0; + uchar fileinfo[FRM_HEADER_SIZE],forminfo[FRM_FORMINFO_SIZE]; + const partition_info *part_info= IF_PARTITIONING(thd->work_part_info, 0); + bool error; + uchar *frm_ptr, *pos; + LEX_CUSTRING frm= {0,0}; + StringBuffer vcols; + Field_data_type_info_image field_data_type_info_image; + DBUG_ENTER("build_frm_image"); + + /* If fixed row records, we need one bit to check for deleted rows */ + if (!(create_info->table_options & HA_OPTION_PACK_RECORD)) + create_info->null_bits++; + data_offset= (create_info->null_bits + 7) / 8; + + error= pack_vcols(thd, &vcols, + create_fields, create_info->check_constraint_list); + + if (unlikely(error)) + DBUG_RETURN(frm); + + if (vcols.length()) + create_info->expression_length= vcols.length() + FRM_VCOL_NEW_BASE_SIZE; + + error= pack_header(thd, forminfo, create_fields, create_info, + (ulong)data_offset, db_file); + if (unlikely(error)) + DBUG_RETURN(frm); + + reclength= uint2korr(forminfo+266); + + /* Calculate extra data segment length */ + str_db_type= *hton_name(create_info->db_type); + /* str_db_type */ + create_info->extra_size= (uint)(2 + str_db_type.length + + 2 + create_info->connect_string.length); + /* + Partition: + Length of partition info = 4 byte + Potential NULL byte at end of partition info string = 1 byte + Indicator if auto-partitioned table = 1 byte + => Total 6 byte + */ + create_info->extra_size+= 6; + if (part_info) + create_info->extra_size+= (uint)part_info->part_info_len; + + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + create_info->extra_size+= (uint)key_info[i].parser_name->length + 1; + } + + options_len= engine_table_options_frm_length(create_info->option_list, + create_fields, + keys, key_info); + gis_extra2_len= gis_field_options_image(NULL, create_fields); + DBUG_PRINT("info", ("Options length: %u", options_len)); + + if (field_data_type_info_image.append(create_fields)) + { + my_printf_error(ER_CANT_CREATE_TABLE, + "Cannot create table %`s: " + "Building the field data type info image failed.", + MYF(0), table.str); + DBUG_RETURN(frm); + } + DBUG_PRINT("info", ("Field data type info length: %u", + (uint) field_data_type_info_image.length())); + DBUG_EXECUTE_IF("frm_data_type_info", + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, + "build_frm_image: Field data type info length: %u", + (uint) field_data_type_info_image.length());); + + if (validate_comment_length(thd, &create_info->comment, TABLE_COMMENT_MAXLEN, + ER_TOO_LONG_TABLE_COMMENT, table.str)) + DBUG_RETURN(frm); + /* + If table comment is longer than TABLE_COMMENT_INLINE_MAXLEN bytes, + store the comment in an extra segment (up to TABLE_COMMENT_MAXLEN bytes). + Pre 5.5, the limit was 60 characters, with no extra segment-handling. + */ + if (create_info->comment.length > TABLE_COMMENT_INLINE_MAXLEN) + { + forminfo[46]=255; + create_info->extra_size+= 2 + (uint)create_info->comment.length; + } + else + { + strmake((char*) forminfo+47, create_info->comment.str ? + create_info->comment.str : "", create_info->comment.length); + forminfo[46]=(uchar) create_info->comment.length; + } + + if (!create_info->tabledef_version.str) + { + uchar *to= (uchar*) thd->alloc(MY_UUID_SIZE); + if (unlikely(!to)) + DBUG_RETURN(frm); + my_uuid(to); + create_info->tabledef_version.str= to; + create_info->tabledef_version.length= MY_UUID_SIZE; + } + DBUG_ASSERT(create_info->tabledef_version.length > 0); + DBUG_ASSERT(create_info->tabledef_version.length <= 255); + + prepare_frm_header(thd, reclength, fileinfo, create_info, keys, key_info); + + /* one byte for a type, one or three for a length */ + size_t extra2_size= 1 + extra2_str_size(create_info->tabledef_version.length); + if (options_len) + extra2_size+= 1 + extra2_str_size(options_len); + + if (part_info) + extra2_size+= 1 + extra2_str_size(hton_name(part_info->default_engine_type)->length); + + if (gis_extra2_len) + extra2_size+= 1 + extra2_str_size(gis_extra2_len); + + if (field_data_type_info_image.length()) + extra2_size+= 1 + extra2_str_size(field_data_type_info_image.length()); + + if (create_info->versioned()) + { + extra2_size+= 1 + extra2_str_size(2 * frm_fieldno_size); + } + + if (create_info->period_info.name) + { + extra2_size+= 2 + extra2_str_size(period_info_len) + + extra2_str_size(without_overlaps_len); + } + + bool has_extra2_field_flags_= has_extra2_field_flags(create_fields); + if (has_extra2_field_flags_) + { + extra2_size+= 1 + extra2_str_size(create_fields.elements); + } + + /* + To store the ignorability flag for each key. + Here 1 bytes is reserved to store the extra index flags for keys. + Currently only 1 bit is used, rest of the bits can be used in the future + */ + if (keys) + extra2_size+= 1 + extra2_str_size(keys); + + for (i= 0; i < keys; i++) + if (key_info[i].algorithm == HA_KEY_ALG_LONG_HASH) + e_unique_hash_extra_parts++; + key_buff_length= uint4korr(fileinfo+47); + + frm.length= FRM_HEADER_SIZE; // fileinfo; + frm.length+= extra2_size + 4; // mariadb extra2 frm segment + + int2store(fileinfo+4, extra2_size); + int2store(fileinfo+6, frm.length); // Position to key information + frm.length+= key_buff_length; + frm.length+= reclength; // row with default values + frm.length+= create_info->extra_size; + + filepos= frm.length; + frm.length+= FRM_FORMINFO_SIZE; // forminfo + frm.length+= packed_fields_length(create_fields); + frm.length+= create_info->expression_length; + + if (frm.length > FRM_MAX_SIZE || + create_info->expression_length > UINT_MAX32) + { + my_error(ER_TABLE_DEFINITION_TOO_BIG, MYF(0), table.str); + DBUG_RETURN(frm); + } + + frm_ptr= (uchar*) my_malloc(PSI_INSTRUMENT_ME, frm.length, + MYF(MY_WME | MY_ZEROFILL | MY_THREAD_SPECIFIC)); + if (!frm_ptr) + DBUG_RETURN(frm); + + /* write the extra2 segment */ + pos = frm_ptr + 64; + compile_time_assert(EXTRA2_TABLEDEF_VERSION != '/'); + pos= extra2_write(pos, EXTRA2_TABLEDEF_VERSION, + create_info->tabledef_version); + + if (part_info) + pos= extra2_write(pos, EXTRA2_DEFAULT_PART_ENGINE, + *hton_name(part_info->default_engine_type)); + + if (options_len) + { + *pos++= EXTRA2_ENGINE_TABLEOPTS; + pos= extra2_write_len(pos, options_len); + pos= engine_table_options_frm_image(pos, create_info->option_list, + create_fields, keys, key_info); + } + + if (gis_extra2_len) + { + *pos= EXTRA2_GIS; + pos= extra2_write_len(pos+1, gis_extra2_len); + pos+= gis_field_options_image(pos, create_fields); + } + + if (field_data_type_info_image.length()) + { + if (field_data_type_info_image.length() > 0xFFFF) + { + my_printf_error(ER_CANT_CREATE_TABLE, + "Cannot create table %`s: " + "field data type info image is too large. " + "Decrease the number of columns with " + "extended data types.", + MYF(0), table.str); + goto err; + } + *pos= EXTRA2_FIELD_DATA_TYPE_INFO; + pos= extra2_write_str(pos + 1, &field_data_type_info_image); + } + + // PERIOD + if (create_info->period_info.is_set()) + { + *pos++= EXTRA2_APPLICATION_TIME_PERIOD; + pos= extra2_write_len(pos, period_info_len); + pos= extra2_write_str(pos, create_info->period_info.name); + pos= extra2_write_str(pos, create_info->period_info.constr->name); + + store_frm_fieldno(pos, get_fieldno_by_name(create_info, create_fields, + create_info->period_info.period.start)); + pos+= frm_fieldno_size; + store_frm_fieldno(pos, get_fieldno_by_name(create_info, create_fields, + create_info->period_info.period.end)); + pos+= frm_fieldno_size; + + *pos++= EXTRA2_PERIOD_WITHOUT_OVERLAPS; + pos= extra2_write_len(pos, without_overlaps_len); + store_frm_keyno(pos, create_info->period_info.unique_keys); + pos+= frm_keyno_size; + for (uint key= 0; key < keys; key++) + { + if (key_info[key].without_overlaps) + { + store_frm_keyno(pos, key); + pos+= frm_keyno_size; + } + } + } + + if (create_info->versioned()) + { + *pos++= EXTRA2_PERIOD_FOR_SYSTEM_TIME; + *pos++= 2 * frm_fieldno_size; + store_frm_fieldno(pos, get_fieldno_by_name(create_info, create_fields, + create_info->vers_info.as_row.start)); + pos+= frm_fieldno_size; + store_frm_fieldno(pos, get_fieldno_by_name(create_info, create_fields, + create_info->vers_info.as_row.end)); + pos+= frm_fieldno_size; + } + + if (has_extra2_field_flags_) + pos= extra2_write_field_properties(pos, create_fields); + + + if (keys) + pos= extra2_write_index_properties(pos, key_info, keys); + + int4store(pos, filepos); // end of the extra2 segment + pos+= 4; + + DBUG_ASSERT(pos == frm_ptr + uint2korr(fileinfo+6)); + key_info_length= pack_keys(pos, keys, key_info, data_offset, e_unique_hash_extra_parts); + if (key_info_length > UINT_MAX16) + { + my_printf_error(ER_CANT_CREATE_TABLE, + "Cannot create table %`s: index information is too long. " + "Decrease number of indexes or use shorter index names or shorter comments.", + MYF(0), table.str); + goto err; + } + + int2store(forminfo+2, frm.length - filepos); + int4store(fileinfo+10, frm.length); + fileinfo[26]= (uchar) MY_TEST((create_info->max_rows == 1) && + (create_info->min_rows == 1) && (keys == 0)); + int2store(fileinfo+28,key_info_length); + + if (part_info) + { + fileinfo[61]= (uchar) ha_legacy_type(part_info->default_engine_type); + DBUG_PRINT("info", ("part_db_type = %d", fileinfo[61])); + } + + memcpy(frm_ptr, fileinfo, FRM_HEADER_SIZE); + + pos+= key_buff_length; + if (make_empty_rec(thd, pos, create_info->table_options, create_fields, + reclength, data_offset)) + goto err; + + pos+= reclength; + int2store(pos, create_info->connect_string.length); + pos+= 2; + if (create_info->connect_string.length) + memcpy(pos, create_info->connect_string.str, create_info->connect_string.length); + pos+= create_info->connect_string.length; + int2store(pos, str_db_type.length); + pos+= 2; + memcpy(pos, str_db_type.str, str_db_type.length); + pos+= str_db_type.length; + + if (part_info) + { + char auto_partitioned= part_info->is_auto_partitioned ? 1 : 0; + int4store(pos, part_info->part_info_len); + pos+= 4; + memcpy(pos, part_info->part_info_string, part_info->part_info_len + 1); + pos+= part_info->part_info_len + 1; + *pos++= auto_partitioned; + } + else + { + pos+= 6; + } + + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + { + memcpy(pos, key_info[i].parser_name->str, key_info[i].parser_name->length + 1); + pos+= key_info[i].parser_name->length + 1; + } + } + if (forminfo[46] == (uchar)255) // New style MySQL 5.5 table comment + { + int2store(pos, create_info->comment.length); + pos+=2; + memcpy(pos, create_info->comment.str, create_info->comment.length); + pos+= create_info->comment.length; + } + + memcpy(frm_ptr + filepos, forminfo, FRM_FORMINFO_SIZE); + pos= frm_ptr + filepos + FRM_FORMINFO_SIZE; + if (pack_fields(&pos, create_fields, create_info, data_offset)) + goto err; + + if (vcols.length()) + { + /* Store header for packed fields (extra space for future) */ + bzero(pos, FRM_VCOL_NEW_BASE_SIZE); + pos+= FRM_VCOL_NEW_BASE_SIZE; + memcpy(pos, vcols.ptr(), vcols.length()); + pos+= vcols.length(); + } + + { + /* + Restore all UCS2 intervals. + HEX representation of them is not needed anymore. + */ + List_iterator it(create_fields); + Create_field *field; + while ((field=it++)) + { + if (field->save_interval) + { + field->interval= field->save_interval; + field->save_interval= 0; + } + } + } + + frm.str= frm_ptr; + DBUG_RETURN(frm); + +err: + my_free(frm_ptr); + DBUG_RETURN(frm); +} + + +/* Pack keyinfo and keynames to keybuff for save in form-file. */ + +static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo, + ulong data_offset, uint e_unique_hash_extra_parts) +{ + uint key_parts,length; + uchar *pos, *keyname_pos; + KEY *key,*end; + KEY_PART_INFO *key_part,*key_part_end; + DBUG_ENTER("pack_keys"); + + pos=keybuff+6; + key_parts=0; + for (key=keyinfo,end=keyinfo+key_count ; key != end ; key++) + { + int2store(pos, (key->flags ^ HA_NOSAME)); + int2store(pos+2,key->key_length); + pos[4]= (uchar) key->user_defined_key_parts; + pos[5]= (uchar) key->algorithm; + int2store(pos+6, key->block_size); + pos+=8; + key_parts+=key->user_defined_key_parts; + DBUG_PRINT("loop", ("flags: %lu key_parts: %d key_part: %p", + key->flags, key->user_defined_key_parts, + key->key_part)); + + /* For SPATIAL, FULLTEXT and HASH indexes (anything other than B-tree), + ignore the ASC/DESC attribute of columns. */ + const uchar ha_reverse_sort= + key->algorithm > HA_KEY_ALG_BTREE || key->flags & (HA_FULLTEXT|HA_SPATIAL) + ? 0 : HA_REVERSE_SORT; + + for (key_part=key->key_part,key_part_end=key_part+key->user_defined_key_parts ; + key_part != key_part_end ; + key_part++) + + { + uint offset; + DBUG_PRINT("loop",("field: %d startpos: %lu length: %d", + key_part->fieldnr, key_part->offset + data_offset, + key_part->length)); + int2store(pos,key_part->fieldnr+1+FIELD_NAME_USED); + offset= (uint) (key_part->offset+data_offset+1); + int2store(pos+2, offset); + key_part->key_part_flag &= ha_reverse_sort; + pos[4]= (uchar)(key_part->key_part_flag); + int2store(pos+5,key_part->key_type); + int2store(pos+7,key_part->length); + pos+=9; + } + } + /* Save keynames */ + keyname_pos=pos; + *pos++=(uchar) NAMES_SEP_CHAR; + for (key=keyinfo ; key != end ; key++) + { + uchar *tmp=(uchar*) strmov((char*) pos,key->name.str); + *tmp++= (uchar) NAMES_SEP_CHAR; + *tmp=0; + pos=tmp; + } + *(pos++)=0; + for (key=keyinfo,end=keyinfo+key_count ; key != end ; key++) + { + if (key->flags & HA_USES_COMMENT) + { + int2store(pos, key->comment.length); + uchar *tmp= (uchar*)strnmov((char*) pos+2,key->comment.str, + key->comment.length); + pos= tmp; + } + } + + key_parts+= e_unique_hash_extra_parts; + if (key_count > 127 || key_parts > 127) + { + keybuff[0]= (key_count & 0x7f) | 0x80; + keybuff[1]= key_count >> 7; + int2store(keybuff+2,key_parts); + } + else + { + keybuff[0]=(uchar) key_count; + keybuff[1]=(uchar) key_parts; + keybuff[2]= keybuff[3]= 0; + } + length=(uint) (pos-keyname_pos); + int2store(keybuff+4,length); + DBUG_RETURN((uint) (pos-keybuff)); +} /* pack_keys */ + + +/** + Pack the expression (for GENERATED ALWAYS AS, DEFAULT, CHECK) + + The data is stored as: + 1 byte type (enum_vcol_info_type) + 2 bytes field_number + 2 bytes length of expression + 1 byte length of name + name + next bytes column expression (text data) + + @return 0 ok + @return 1 error (out of memory or wrong characters in expression) +*/ + +static bool pack_expression(String *buf, Virtual_column_info *vcol, + uint field_nr, enum_vcol_info_type type) +{ + if (buf->reserve(FRM_VCOL_NEW_HEADER_SIZE + vcol->name.length)) + return 1; + + buf->q_append((char) type); + buf->q_append2b(field_nr); + size_t len_off= buf->length(); + buf->q_append2b(0); // to be added later + buf->q_append((char)vcol->name.length); + buf->q_append(&vcol->name); + size_t expr_start= buf->length(); + vcol->print(buf); + size_t expr_len= buf->length() - expr_start; + if (expr_len >= 65536) + { + my_error(ER_EXPRESSION_IS_TOO_BIG, MYF(0), vcol_type_name(type)); + return 1; + } + int2store(buf->ptr() + len_off, expr_len); + return 0; +} + + +static bool pack_vcols(THD *thd, String *buf, List &create_fields, + List *check_constraint_list) +{ + Sql_mode_save_for_frm_handling sql_mode_save(thd); + List_iterator it(create_fields); + Create_field *field; + + for (uint field_nr=0; (field= it++); field_nr++) + { + if (field->vcol_info && field->vcol_info->expr) + if (pack_expression(buf, field->vcol_info, field_nr, + field->vcol_info->stored_in_db + ? VCOL_GENERATED_STORED : VCOL_GENERATED_VIRTUAL)) + return 1; + if (field->has_default_expression() && !field->has_default_now_unireg_check()) + if (pack_expression(buf, field->default_value, field_nr, VCOL_DEFAULT)) + return 1; + if (field->check_constraint) + if (pack_expression(buf, field->check_constraint, field_nr, + VCOL_CHECK_FIELD)) + return 1; + } + + List_iterator cit(*check_constraint_list); + Virtual_column_info *check; + while ((check= cit++)) + if (pack_expression(buf, check, UINT_MAX32, VCOL_CHECK_TABLE)) + return 1; + return 0; +} + + +static uint typelib_values_packed_length(const TYPELIB *t) +{ + uint length= 0; + for (uint i= 0; t->type_names[i]; i++) + { + length+= t->type_lengths[i]; + length++; /* Separator */ + } + return length; +} + + +/* Make formheader */ + +static bool pack_header(THD *thd, uchar *forminfo, + List &create_fields, + HA_CREATE_INFO *create_info, ulong data_offset, + handler *file) +{ + uint int_count,int_length, int_parts; + uint time_stamp_pos,null_fields; + uint table_options= create_info->table_options; + size_t length, reclength, totlength, n_length, com_length; + DBUG_ENTER("pack_header"); + + if (create_fields.elements > MAX_FIELDS) + { + my_message(ER_TOO_MANY_FIELDS, ER_THD(thd, ER_TOO_MANY_FIELDS), MYF(0)); + DBUG_RETURN(1); + } + + totlength= 0L; + reclength= data_offset; + int_count=int_parts=int_length=time_stamp_pos=null_fields=0; + com_length= 0; + n_length=2L; + create_info->field_check_constraints= 0; + + /* Check fields */ + List_iterator it(create_fields); + Create_field *field; + while ((field=it++)) + { + if (validate_comment_length(thd, &field->comment, COLUMN_COMMENT_MAXLEN, + ER_TOO_LONG_FIELD_COMMENT, + field->field_name.str)) + DBUG_RETURN(1); + + totlength+= (size_t)field->length; + com_length+= field->comment.length; + /* + We mark first TIMESTAMP field with NOW() in DEFAULT or ON UPDATE + as auto-update field. + */ + if (field->real_field_type() == MYSQL_TYPE_TIMESTAMP && + field->unireg_check != Field::NONE && + !time_stamp_pos) + time_stamp_pos= (uint) field->offset+ (uint) data_offset + 1; + length=field->pack_length; + if ((uint) field->offset+ (uint) data_offset+ length > reclength) + reclength=(uint) (field->offset+ data_offset + length); + n_length+= field->field_name.length + 1; + field->interval_id=0; + field->save_interval= 0; + if (field->interval) + { + uint old_int_count=int_count; + + if (field->charset->mbminlen > 1) + { + TYPELIB *tmpint; + /* + Escape UCS2 intervals using HEX notation to avoid + problems with delimiters between enum elements. + As the original representation is still needed in + the function make_empty_rec to create a record of + filled with default values it is saved in save_interval + The HEX representation is created from this copy. + */ + uint count= field->interval->count; + field->save_interval= field->interval; + field->interval= tmpint= (TYPELIB*) thd->alloc(sizeof(TYPELIB)); + *tmpint= *field->save_interval; + tmpint->type_names= + (const char **) thd->alloc(sizeof(char*) * + (count + 1)); + tmpint->type_lengths= (uint *) thd->alloc(sizeof(uint) * (count + 1)); + tmpint->type_names[count]= 0; + tmpint->type_lengths[count]= 0; + + for (uint pos= 0; pos < field->interval->count; pos++) + { + char *dst; + const char *src= field->save_interval->type_names[pos]; + size_t hex_length; + length= field->save_interval->type_lengths[pos]; + hex_length= length * 2; + tmpint->type_lengths[pos]= (uint) hex_length; + tmpint->type_names[pos]= dst= (char*) thd->alloc(hex_length + 1); + octet2hex(dst, src, length); + } + } + + field->interval_id=get_interval_id(&int_count,create_fields,field); + if (old_int_count != int_count) + { + int_length+= typelib_values_packed_length(field->interval); + int_parts+= field->interval->count + 1; + } + } + if (f_maybe_null(field->pack_flag)) + null_fields++; + if (field->check_constraint) + create_info->field_check_constraints++; + } + int_length+=int_count*2; // 255 prefix + 0 suffix + + /* Save values in forminfo */ + if (reclength > (ulong) file->max_record_length()) + { + my_error(ER_TOO_BIG_ROWSIZE, MYF(0), static_cast(file->max_record_length())); + DBUG_RETURN(1); + } + + /* Hack to avoid bugs with small static rows in MySQL */ + reclength= MY_MAX(file->min_record_length(table_options), reclength); + length= n_length + create_fields.elements*FCOMP + FRM_FORMINFO_SIZE + + int_length + com_length + create_info->expression_length; + if (length > 65535L || int_count > 255) + { + my_message(ER_TOO_MANY_FIELDS, "Table definition is too large", MYF(0)); + DBUG_RETURN(1); + } + + bzero((char*)forminfo,FRM_FORMINFO_SIZE); + int2store(forminfo,length); + int2store(forminfo+258,create_fields.elements); + // bytes 260-261 are unused + int2store(forminfo+262,totlength); + // bytes 264-265 are unused + int2store(forminfo+266,reclength); + int2store(forminfo+268,n_length); + int2store(forminfo+270,int_count); + int2store(forminfo+272,int_parts); + int2store(forminfo+274,int_length); + int2store(forminfo+276,time_stamp_pos); + int2store(forminfo+278,80); /* Columns needed */ + int2store(forminfo+280,22); /* Rows needed */ + int2store(forminfo+282,null_fields); + int2store(forminfo+284,com_length); + int2store(forminfo+286,create_info->expression_length); + DBUG_RETURN(0); +} /* pack_header */ + + +/* get each unique interval each own id */ +static uint get_interval_id(uint *int_count,List &create_fields, + Create_field *last_field) +{ + List_iterator it(create_fields); + Create_field *field; + const TYPELIB *interval= last_field->interval; + + while ((field=it++) != last_field) + { + /* + ENUM/SET columns with equal value lists share a single + copy of the underlying TYPELIB. + Fields with different mbminlen can't reuse TYPELIBs, because: + - mbminlen==1 are written to FRM as is + - mbminlen>1 are written to FRM in hex-encoded format + */ + if (field->interval_id && + field->interval->count == interval->count && + field->charset->mbminlen == last_field->charset->mbminlen) + { + const char **a,**b; + for (a=field->interval->type_names, b=interval->type_names ; + *a && !strcmp(*a,*b); + a++,b++) ; + + if (! *a) + { + return field->interval_id; // Re-use last interval + } + } + } + return ++*int_count; // New unique interval +} + + +static size_t packed_fields_length(List &create_fields) +{ + Create_field *field; + size_t length= 0; + DBUG_ENTER("packed_fields_length"); + + List_iterator it(create_fields); + uint int_count=0; + while ((field=it++)) + { + if (field->interval_id > int_count) + { + int_count= field->interval_id; + length++; + length+= typelib_values_packed_length(field->interval); + length++; + } + + length+= FCOMP; + length+= field->field_name.length + 1; + length+= field->comment.length; + } + length+= 2; + DBUG_RETURN(length); +} + +/* Save fields, fieldnames and intervals */ + +static bool pack_fields(uchar **buff_arg, List &create_fields, + HA_CREATE_INFO *create_info, + ulong data_offset) +{ + uchar *buff= *buff_arg; + uint int_count; + size_t comment_length= 0; + Create_field *field; + DBUG_ENTER("pack_fields"); + + /* Write field info */ + List_iterator it(create_fields); + int_count=0; + while ((field=it++)) + { + uint recpos; + /* The +1 is here becasue the col offset in .frm file have offset 1 */ + recpos= field->offset+1 + (uint) data_offset; + int3store(buff+5,recpos); + buff[12]= (uchar) field->interval_id; + buff[13]= (uchar) field->type_handler()->real_field_type(); + field->type_handler()->Column_definition_attributes_frm_pack(field, buff); + int2store(buff+15, field->comment.length); + comment_length+= field->comment.length; + set_if_bigger(int_count,field->interval_id); + buff+= FCOMP; + } + + /* Write fieldnames */ + *buff++= NAMES_SEP_CHAR; + it.rewind(); + while ((field=it++)) + { + buff= (uchar*)strmov((char*) buff, field->field_name.str); + *buff++=NAMES_SEP_CHAR; + } + *buff++= 0; + + /* Write intervals */ + if (int_count) + { + it.rewind(); + int_count=0; + while ((field=it++)) + { + if (field->interval_id > int_count) + { + unsigned char sep= 0; + unsigned char occ[256]; + uint i; + unsigned char *val= NULL; + + bzero(occ, sizeof(occ)); + + for (i=0; (val= (unsigned char*) field->interval->type_names[i]); i++) + for (uint j = 0; j < field->interval->type_lengths[i]; j++) + occ[(unsigned int) (val[j])]= 1; + + if (!occ[(unsigned char)NAMES_SEP_CHAR]) + sep= (unsigned char) NAMES_SEP_CHAR; + else if (!occ[(unsigned int)',']) + sep= ','; + else + { + for (uint i=1; i<256; i++) + { + if(!occ[i]) + { + sep= i; + break; + } + } + + if (!sep) + { + /* disaster, enum uses all characters, none left as separator */ + my_message(ER_WRONG_FIELD_TERMINATORS, + ER(ER_WRONG_FIELD_TERMINATORS), + MYF(0)); + DBUG_RETURN(1); + } + } + + int_count= field->interval_id; + *buff++= sep; + for (int i=0; field->interval->type_names[i]; i++) + { + memcpy(buff, field->interval->type_names[i], field->interval->type_lengths[i]); + buff+= field->interval->type_lengths[i]; + *buff++= sep; + } + *buff++= 0; + } + } + } + if (comment_length) + { + it.rewind(); + while ((field=it++)) + { + if (size_t l= field->comment.length) + { + memcpy(buff, field->comment.str, l); + buff+= l; + } + } + } + *buff_arg= buff; + DBUG_RETURN(0); +} + + +static bool make_empty_rec_store_default(THD *thd, Field *regfield, + Create_field *field) +{ + Virtual_column_info *default_value= field->default_value; + if (!field->vers_sys_field() && default_value && !default_value->flags) + { + Item *expr= default_value->expr; + // may be already fixed if ALTER TABLE + if (expr->fix_fields_if_needed(thd, &expr)) + return true; + DBUG_ASSERT(expr == default_value->expr); // Should not change + if (regfield->make_empty_rec_store_default_value(thd, expr)) + { + my_error(ER_INVALID_DEFAULT, MYF(0), regfield->field_name.str); + return true; + } + return false; + } + regfield->make_empty_rec_reset(thd); + return false; +} + + +/* save an empty record on start of formfile */ + +static bool make_empty_rec(THD *thd, uchar *buff, uint table_options, + List &create_fields, + uint reclength, ulong data_offset) +{ + int error= false; + uint null_count; + uchar *null_pos; + TABLE table; + TABLE_SHARE share; + Create_field *field; + Check_level_instant_set old_count_cuted_fields(thd, CHECK_FIELD_WARN); + Abort_on_warning_instant_set old_abort_on_warning(thd, 0); + DBUG_ENTER("make_empty_rec"); + + /* We need a table to generate columns for default values */ + bzero((char*) &table, sizeof(table)); + bzero((char*) &share, sizeof(share)); + table.s= &share; + + table.in_use= thd; + + null_count=0; + if (!(table_options & HA_OPTION_PACK_RECORD)) + { + null_count++; // Need one bit for delete mark + *buff|= 1; + } + null_pos= buff; + + List_iterator it(create_fields); + while ((field=it++)) + { + Record_addr addr(buff + field->offset + data_offset, + null_pos + null_count / 8, null_count & 7); + Column_definition_attributes tmp(*field); + tmp.interval= field->save_interval ? + field->save_interval : field->interval; + /* regfield don't have to be deleted as it's allocated on THD::mem_root */ + Field *regfield= tmp.make_field(&share, thd->mem_root, &addr, + field->type_handler(), + &field->field_name, + field->flags); + if (!regfield) + { + error= true; + goto err; // End of memory + } + + /* save_in_field() will access regfield->table->in_use */ + regfield->init(&table); + + if (!(field->flags & NOT_NULL_FLAG)) + { + *regfield->null_ptr|= regfield->null_bit; + null_count++; + } + + if (field->real_field_type() == MYSQL_TYPE_BIT && + !f_bit_as_char(field->pack_flag)) + null_count+= field->length & 7; + + error= make_empty_rec_store_default(thd, regfield, field); + delete regfield; // Avoid memory leaks + if (error) + goto err; + } + DBUG_ASSERT(data_offset == ((null_count + 7) / 8)); + + /* + We need to set the unused bits to 1. If the number of bits is a multiple + of 8 there are no unused bits. + */ + if (null_count & 7) + *(null_pos + null_count / 8)|= ~(((uchar) 1 << (null_count & 7)) - 1); + +err: + DBUG_RETURN(error); +} /* make_empty_rec */ diff --git a/sql/unireg.h b/sql/unireg.h new file mode 100644 index 00000000..1eec3585 --- /dev/null +++ b/sql/unireg.h @@ -0,0 +1,228 @@ +#ifndef UNIREG_INCLUDED +#define UNIREG_INCLUDED + +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include /* FRM_VER */ + +/* Extra functions used by unireg library */ + +#ifndef NO_ALARM_LOOP +#define NO_ALARM_LOOP /* lib5 and popen can't use alarm */ +#endif + +/* These paths are converted to other systems (WIN95) before use */ + +#define LANGUAGE "english/" +#define ERRMSG_FILE "errmsg.sys" +#define TEMP_PREFIX "MY" +#define LOG_PREFIX "ML" +#define PROGDIR "bin/" +#ifndef MYSQL_DATADIR +#define MYSQL_DATADIR "data/" +#endif +#ifndef SHAREDIR +#define SHAREDIR "share/" +#endif +#ifndef PLUGINDIR +#define PLUGINDIR "lib/plugin" +#endif + +#define MAX_ERROR_RANGES 4 /* 1000-2000, 2000-3000, 3000-4000, 4000-5000 */ +#define ERRORS_PER_RANGE 1000 + +#define DEFAULT_ERRMSGS my_default_lc_messages->errmsgs->errmsgs +#define CURRENT_THD_ERRMSGS (current_thd)->variables.errmsgs + +#ifndef mysqld_error_find_printf_error_used +#define ER_DEFAULT(X) DEFAULT_ERRMSGS[((X)-ER_ERROR_FIRST) / ERRORS_PER_RANGE][(X)% ERRORS_PER_RANGE] +#define ER_THD(thd,X) ((thd)->variables.errmsgs[((X)-ER_ERROR_FIRST) / ERRORS_PER_RANGE][(X) % ERRORS_PER_RANGE]) +#define ER(X) ER_THD(current_thd, (X)) +#endif +#define ER_THD_OR_DEFAULT(thd,X) ((thd) ? ER_THD(thd, (X)) : ER_DEFAULT(X)) + +#define SPECIAL_USE_LOCKS 1 /* Lock used databases */ +#define SPECIAL_NO_NEW_FUNC 2 /* Skip new functions */ +#define SPECIAL_SKIP_SHOW_DB 4 /* Don't allow 'show db' */ +#define SPECIAL_WAIT_IF_LOCKED 8 /* Wait if locked database */ +#define SPECIAL_SAME_DB_NAME 16 /* form name = file name */ +#define SPECIAL_ENGLISH 32 /* English error messages */ +#define SPECIAL_NO_RESOLVE 64 /* Obsolete */ +#define SPECIAL_NO_PRIOR 128 /* Obsolete */ +#define SPECIAL_BIG_SELECTS 256 /* Don't use heap tables */ +#define SPECIAL_NO_HOST_CACHE 512 /* Don't cache hosts */ +#define SPECIAL_SHORT_LOG_FORMAT 1024 +#define SPECIAL_SAFE_MODE 2048 +#define SPECIAL_LOG_QUERIES_NOT_USING_INDEXES 4096 /* Obsolete */ + + /* Extern defines */ +#define store_record(A,B) memcpy((A)->B,(A)->record[0],(size_t) (A)->s->reclength) +#define restore_record(A,B) memcpy((A)->record[0],(A)->B,(size_t) (A)->s->reclength) +#define cmp_record(A,B) memcmp((A)->record[0],(A)->B,(size_t) (A)->s->reclength) +#define empty_record(A) { \ + restore_record((A),s->default_values); \ + if ((A)->s->null_bytes) \ + bfill((A)->null_flags,(A)->s->null_bytes,255); \ + } + + /* Defines for use with openfrm, openprt and openfrd */ + +#define READ_ALL (1 << 0) /* openfrm: Read all parameters */ +#define EXTRA_RECORD (1 << 3) /* Reserve space for an extra record */ +#define DELAYED_OPEN (1 << 12) /* Open table later */ +#define OPEN_VIEW_NO_PARSE (1 << 14) /* Open frm only if it's a view, + but do not parse view itself */ +/** + This flag is used in function get_all_tables() which fills + I_S tables with data which are retrieved from frm files and storage engine + The flag means that we need to open FRM file only to get necessary data. +*/ +#define OPEN_FRM_FILE_ONLY (1 << 15) +/** + This flag is used in function get_all_tables() which fills + I_S tables with data which are retrieved from frm files and storage engine + The flag means that we need to process tables only to get necessary data. + Views are not processed. +*/ +#define OPEN_TABLE_ONLY (1 << 16) +/** + This flag is used in function get_all_tables() which fills + I_S tables with data which are retrieved from frm files and storage engine + The flag means that we need to process views only to get necessary data. + Tables are not processed. +*/ +#define OPEN_VIEW_ONLY (1 << 17) +/** + This flag is used in function get_all_tables() which fills + I_S tables with data which are retrieved from frm files and storage engine. + The flag means that we need to open a view using + open_normal_and_derived_tables() function. +*/ +#define OPEN_VIEW_FULL (1 << 18) +/** + This flag is used in function get_all_tables() which fills + I_S tables with data which are retrieved from frm files and storage engine. + The flag means that I_S table uses optimization algorithm. +*/ +#define OPTIMIZE_I_S_TABLE (1 << 19) +/** + This flag is used to instruct tdc_open_view() to check metadata version. +*/ +#define CHECK_METADATA_VERSION (1 << 20) + +/* + The flag means that we need to process trigger files only. +*/ +#define OPEN_TRIGGER_ONLY (1 << 21) + +/* + Minimum length pattern before Turbo Boyer-Moore is used + for SELECT "text" LIKE "%pattern%", excluding the two + wildcards in class Item_func_like. +*/ +#define MIN_TURBOBM_PATTERN_LEN 3 + +/* + Defines for binary logging. + Do not decrease the value of BIN_LOG_HEADER_SIZE. + Do not even increase it before checking code. +*/ + +#define BIN_LOG_HEADER_SIZE 4 + +#define DEFAULT_KEY_CACHE_NAME "default" + + +/* Include prototypes for unireg */ + +#include "mysqld_error.h" +#include "structs.h" /* All structs we need */ +#include "sql_list.h" /* List<> */ +#include "field.h" /* Create_field */ + +/* + Types of values in the MariaDB extra2 frm segment. + Each value is written as + type: 1 byte + length: 1 byte (1..255) or \0 and 2 bytes. + binary value of the 'length' bytes. + + Older MariaDB servers can ignore values of unknown types if + the type code is less than 128 (EXTRA2_ENGINE_IMPORTANT). + Otherwise older (but newer than 10.0.1) servers are required + to report an error. +*/ +enum extra2_frm_value_type { + EXTRA2_TABLEDEF_VERSION=0, + EXTRA2_DEFAULT_PART_ENGINE=1, + EXTRA2_GIS=2, + EXTRA2_APPLICATION_TIME_PERIOD=3, + EXTRA2_PERIOD_FOR_SYSTEM_TIME=4, + EXTRA2_INDEX_FLAGS=5, + +#define EXTRA2_ENGINE_IMPORTANT 128 + + EXTRA2_ENGINE_TABLEOPTS=128, + EXTRA2_FIELD_FLAGS=129, + EXTRA2_FIELD_DATA_TYPE_INFO=130, + EXTRA2_PERIOD_WITHOUT_OVERLAPS=131, +}; + +enum extra2_field_flags { + VERS_OPTIMIZED_UPDATE= 1 << INVISIBLE_MAX_BITS, +}; + +enum extra2_index_flags { + EXTRA2_DEFAULT_INDEX_FLAGS, + EXTRA2_IGNORED_KEY +}; + + +static inline size_t extra2_read_len(const uchar **extra2, const uchar *end) +{ + size_t length= *(*extra2)++; + if (length) + return length; + + if ((*extra2) + 2 >= end) + return 0; + length= uint2korr(*extra2); + (*extra2)+= 2; + if (length < 256 || *extra2 + length > end) + return 0; + return length; +} + +LEX_CUSTRING build_frm_image(THD *thd, const LEX_CSTRING &table, + HA_CREATE_INFO *create_info, + List &create_fields, + uint keys, KEY *key_info, handler *db_file); + +#define FRM_HEADER_SIZE 64 +#define FRM_FORMINFO_SIZE 288 +#define FRM_MAX_SIZE (1024*1024) + +static inline bool is_binary_frm_header(uchar *head) +{ + return head[0] == 254 + && head[1] == 1 + && head[2] >= FRM_VER + && head[2] <= FRM_VER_CURRENT; +} + +#endif diff --git a/sql/upgrade_conf_file.cc b/sql/upgrade_conf_file.cc new file mode 100644 index 00000000..0d7bc603 --- /dev/null +++ b/sql/upgrade_conf_file.cc @@ -0,0 +1,318 @@ +/* + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + + +/* + Variables that were present in older releases, but are now removed. + to get the list of variables that are present in current release + execute + + SELECT LOWER(variable_name) from INFORMATION_SCHEMA.GLOBAL_VARIABLES ORDER BY 1 + + Compare the list between releases to figure out which variables have gone. + + Note : the list below only includes the default-compiled server and none of the + loadable plugins. +*/ +#include +#include +#include +#include +#include +#include +#include + +static const char *removed_variables[] = +{ +"aria_recover", +"debug_crc_break", +"engine_condition_pushdown", +"have_csv", +"have_innodb", +"have_ndbcluster", +"have_partitioning", +"innodb_adaptive_flushing_method", +"innodb_adaptive_hash_index_partitions", +"innodb_adaptive_max_sleep_delay", +"innodb_additional_mem_pool_size", +"innodb_api_bk_commit_interval", +"innodb_api_disable_rowlock", +"innodb_api_enable_binlog", +"innodb_api_enable_mdl", +"innodb_api_trx_level", +"innodb_background_scrub_data_check_interval", +"innodb_background_scrub_data_compressed", +"innodb_background_scrub_data_interval", +"innodb_background_scrub_data_uncompressed", +"innodb_blocking_buffer_pool_restore", +"innodb_buffer_pool_instances", +"innodb_buffer_pool_populate", +"innodb_buffer_pool_restore_at_startup", +"innodb_buffer_pool_shm_checksum", +"innodb_buffer_pool_shm_key", +"innodb_checkpoint_age_target", +"innodb_checksums", +"innodb_cleaner_eviction_factor", +"innodb_cleaner_flush_chunk_size", +"innodb_cleaner_free_list_lwm", +"innodb_cleaner_lru_chunk_size", +"innodb_cleaner_lsn_age_factor", +"innodb_cleaner_max_flush_time", +"innodb_cleaner_max_lru_time", +"innodb_commit_concurrency", +"innodb_concurrency_tickets", +"innodb_corrupt_table_action", +"innodb_dict_size_limit", +"innodb_doublewrite_file", +"innodb_empty_free_list_algorithm", +"innodb_fake_changes", +"innodb_fast_checksum", +"innodb_file_format", +"innodb_file_format_check", +"innodb_file_format_max", +"innodb_flush_neighbor_pages", +"innodb_force_load_corrupted", +"innodb_foreground_preflush", +"innodb_ibuf_accel_rate", +"innodb_ibuf_active_contract", +"innodb_ibuf_max_size", +"innodb_idle_flush_pct", +"innodb_import_table_from_xtrabackup", +"innodb_instrument_semaphores", +"innodb_kill_idle_transaction", +"innodb_large_prefix", +"innodb_lazy_drop_table", +"innodb_locking_fake_changes", +"innodb_locks_unsafe_for_binlog", +"innodb_log_arch_dir", +"innodb_log_arch_expire_sec", +"innodb_log_archive", +"innodb_log_block_size", +"innodb_log_checksum_algorithm", +"innodb_log_checksums", +"innodb_log_compressed_pages", +"innodb_log_files_in_group", +"innodb_log_optimize_ddl", +"innodb_max_bitmap_file_size", +"innodb_max_changed_pages", +"innodb_merge_sort_block_size", +"innodb_mirrored_log_groups", +"innodb_mtflush_threads", +"innodb_page_cleaners", +"innodb_persistent_stats_root_page", +"innodb_print_lock_wait_timeout_info", +"innodb_purge_run_now", +"innodb_purge_stop_now", +"innodb_read_ahead", +"innodb_recovery_stats", +"innodb_recovery_update_relay_log", +"innodb_replication_delay", +"innodb_rollback_segments", +"innodb_scrub_log", +"innodb_scrub_log_speed", +"innodb_show_locks_held", +"innodb_show_verbose_locks", +"innodb_stats_auto_update", +"innodb_stats_sample_pages", +"innodb_stats_update_need_lock", +"innodb_support_xa", +"innodb_sync_array_size", +"innodb_thread_concurrency", +"innodb_thread_concurrency_timer_based", +"innodb_thread_sleep_delay", +"innodb_track_changed_pages", +"innodb_track_redo_log_now", +"innodb_undo_logs", +"innodb_use_fallocate", +"innodb_use_global_flush_log_at_trx_commit", +"innodb_use_mtflush", +"innodb_use_stacktrace", +"innodb_use_sys_malloc", +"innodb_use_sys_stats_table", +"innodb_use_trim", +"log", +"log_slow_queries", +"max_long_data_size", +"multi_range_count", +"rpl_recovery_rank", +"skip_bdb", +"sql_big_tables", +"sql_low_priority_updates", +"sql_max_join_size", +"thread_concurrency", +"timed_mutexes" +}; + + +static int cmp_strings(const void* a, const void *b) +{ + return strcmp((const char *)a, *(const char **)b); +} + + +#define MY_INI_SECTION_SIZE 32 * 1024 + 3 + +static bool is_utf8_str(const char *s) +{ + MY_STRCOPY_STATUS status; + const struct charset_info_st *cs= &my_charset_utf8mb4_bin; + size_t len= strlen(s); + if (!len) + return true; + cs->cset->well_formed_char_length(cs, s, s + len, len, &status); + return status.m_well_formed_error_pos == nullptr; +} + + +static UINT get_system_acp() +{ + static DWORD system_acp; + if (system_acp) + return system_acp; + + char str_cp[10]; + int cch= GetLocaleInfo(GetSystemDefaultLCID(), LOCALE_IDEFAULTANSICODEPAGE, + str_cp, sizeof(str_cp)); + + system_acp= cch > 0 ? atoi(str_cp) : 1252; + + return system_acp; +} + + +static char *ansi_to_utf8(const char *s) +{ +#define MAX_STR_LEN MY_INI_SECTION_SIZE + static wchar_t utf16_buf[MAX_STR_LEN]; + static char utf8_buf[MAX_STR_LEN]; + if (MultiByteToWideChar(get_system_acp(), 0, s, -1, utf16_buf, MAX_STR_LEN)) + { + if (WideCharToMultiByte(CP_UTF8, 0, utf16_buf, -1, utf8_buf, MAX_STR_LEN, + 0, 0)) + return utf8_buf; + } + return 0; +} + +int fix_section(const char *myini_path, const char *section_name, + bool is_server) +{ + if (!is_server && GetACP() != CP_UTF8) + return 0; + + static char section_data[MY_INI_SECTION_SIZE]; + DWORD size= GetPrivateProfileSection(section_name, section_data, + MY_INI_SECTION_SIZE, myini_path); + if (size == MY_INI_SECTION_SIZE - 2) + { + return -1; + } + + for (char *keyval= section_data; *keyval; keyval += strlen(keyval)+1) + { + char varname[256]; + char *value; + char *key_end= strchr(keyval, '='); + if (!key_end) + key_end= keyval + strlen(keyval); + + if (key_end - keyval > sizeof(varname)) + continue; + + value= key_end + 1; + if (GetACP() == CP_UTF8 && !is_utf8_str(value)) + { + /*Convert a value, if it is not already UTF-8*/ + char *new_val= ansi_to_utf8(value); + if (new_val) + { + *key_end= 0; + fprintf(stdout, "Fixing variable '%s' charset, value=%s\n", keyval, + new_val); + WritePrivateProfileString(section_name, keyval, new_val, myini_path); + *key_end= '='; + } + } + if (!is_server) + continue; + + // Check if variable should be removed from config. + // First, copy and normalize (convert dash to underscore) to variable + // names + for (char *p= keyval, *q= varname;; p++, q++) + { + if (p == key_end) + { + *q= 0; + break; + } + *q= (*p == '-') ? '_' : *p; + } + const char *v= (const char *) bsearch(varname, removed_variables, sizeof(removed_variables) / sizeof(removed_variables[0]), + sizeof(char *), cmp_strings); + + if (v) + { + fprintf(stdout, "Removing variable '%s' from config file\n", varname); + // delete variable + *key_end= 0; + WritePrivateProfileString(section_name, keyval, 0, myini_path); + } + } + return 0; +} + +static bool is_mariadb_section(const char *name, bool *is_server) +{ + if (strncmp(name, "mysql", 5) + && strncmp(name, "mariadb", 7) + && strcmp(name, "client") + && strcmp(name, "client-server") + && strcmp(name, "server")) + { + return false; + } + + for (const char *section_name : {"mysqld", "server", "mariadb"}) + if (*is_server= !strcmp(section_name, name)) + break; + + return true; +} + + +/** + Convert file from a previous version, by removing obsolete variables + Also, fix values to be UTF8, if MariaDB is running in utf8 mode +*/ +int upgrade_config_file(const char *myini_path) +{ + static char all_sections[MY_INI_SECTION_SIZE]; + int sz= GetPrivateProfileSectionNamesA(all_sections, MY_INI_SECTION_SIZE, + myini_path); + if (!sz) + return 0; + if (sz > MY_INI_SECTION_SIZE - 2) + { + fprintf(stderr, "Too many sections in config file\n"); + return -1; + } + for (char *section= all_sections; *section; section+= strlen(section) + 1) + { + bool is_server_section; + if (is_mariadb_section(section, &is_server_section)) + fix_section(myini_path, section, is_server_section); + } + return 0; +} diff --git a/sql/vers_string.h b/sql/vers_string.h new file mode 100644 index 00000000..c5be9c35 --- /dev/null +++ b/sql/vers_string.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2018, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef VERS_STRING_INCLUDED +#define VERS_STRING_INCLUDED + +#include "lex_string.h" + +/* + LEX_CSTRING with comparison semantics. +*/ + +// db and table names: case sensitive (or insensitive) in table_alias_charset +struct Compare_table_names +{ + int operator()(const LEX_CSTRING& a, const LEX_CSTRING& b) const + { + DBUG_ASSERT(a.str[a.length] == 0); + DBUG_ASSERT(b.str[b.length] == 0); + return table_alias_charset->strnncoll(a.str, a.length, + b.str, b.length); + } +}; + +// column names and other identifiers: case insensitive in system_charset_info +struct Compare_identifiers +{ + int operator()(const LEX_CSTRING& a, const LEX_CSTRING& b) const + { + DBUG_ASSERT(a.str != NULL); + DBUG_ASSERT(b.str != NULL); + DBUG_ASSERT(a.str[a.length] == 0); + DBUG_ASSERT(b.str[b.length] == 0); + return my_strcasecmp(system_charset_info, a.str, b.str); + } +}; + + +template +struct Lex_cstring_with_compare : public Lex_cstring +{ +public: + Lex_cstring_with_compare() = default; + Lex_cstring_with_compare(const char *_str, size_t _len) : + Lex_cstring(_str, _len) + { } + Lex_cstring_with_compare(const LEX_STRING src) : + Lex_cstring(src.str, src.length) + { } + Lex_cstring_with_compare(const LEX_CSTRING src) : Lex_cstring(src.str, src.length) + { } + Lex_cstring_with_compare(const char *_str) : Lex_cstring(_str, strlen(_str)) + { } + bool streq(const Lex_cstring_with_compare& b) const + { + return Lex_cstring::length == b.length && 0 == Compare()(*this, b); + } + operator const char* () const + { + return str; + } + operator bool () const + { + return str != NULL; + } +}; + +typedef Lex_cstring_with_compare Lex_ident; +typedef Lex_cstring_with_compare Lex_table_name; + +#endif // VERS_STRING_INCLUDED diff --git a/sql/win_tzname_data.h b/sql/win_tzname_data.h new file mode 100644 index 00000000..8a240118 --- /dev/null +++ b/sql/win_tzname_data.h @@ -0,0 +1,140 @@ +/* This file was generated using gen_win_tzname_data.ps1 */ +{L"Dateline Standard Time","Etc/GMT+12"}, +{L"UTC-11","Etc/GMT+11"}, +{L"Aleutian Standard Time","America/Adak"}, +{L"Hawaiian Standard Time","Pacific/Honolulu"}, +{L"Marquesas Standard Time","Pacific/Marquesas"}, +{L"Alaskan Standard Time","America/Anchorage"}, +{L"UTC-09","Etc/GMT+9"}, +{L"Pacific Standard Time (Mexico)","America/Tijuana"}, +{L"UTC-08","Etc/GMT+8"}, +{L"Pacific Standard Time","America/Los_Angeles"}, +{L"US Mountain Standard Time","America/Phoenix"}, +{L"Mountain Standard Time (Mexico)","America/Chihuahua"}, +{L"Mountain Standard Time","America/Denver"}, +{L"Yukon Standard Time","America/Whitehorse"}, +{L"Central America Standard Time","America/Guatemala"}, +{L"Central Standard Time","America/Chicago"}, +{L"Easter Island Standard Time","Pacific/Easter"}, +{L"Central Standard Time (Mexico)","America/Mexico_City"}, +{L"Canada Central Standard Time","America/Regina"}, +{L"SA Pacific Standard Time","America/Bogota"}, +{L"Eastern Standard Time (Mexico)","America/Cancun"}, +{L"Eastern Standard Time","America/New_York"}, +{L"Haiti Standard Time","America/Port-au-Prince"}, +{L"Cuba Standard Time","America/Havana"}, +{L"US Eastern Standard Time","America/Indianapolis"}, +{L"Turks And Caicos Standard Time","America/Grand_Turk"}, +{L"Paraguay Standard Time","America/Asuncion"}, +{L"Atlantic Standard Time","America/Halifax"}, +{L"Venezuela Standard Time","America/Caracas"}, +{L"Central Brazilian Standard Time","America/Cuiaba"}, +{L"SA Western Standard Time","America/La_Paz"}, +{L"Pacific SA Standard Time","America/Santiago"}, +{L"Newfoundland Standard Time","America/St_Johns"}, +{L"Tocantins Standard Time","America/Araguaina"}, +{L"E. South America Standard Time","America/Sao_Paulo"}, +{L"SA Eastern Standard Time","America/Cayenne"}, +{L"Argentina Standard Time","America/Buenos_Aires"}, +{L"Greenland Standard Time","America/Godthab"}, +{L"Montevideo Standard Time","America/Montevideo"}, +{L"Magallanes Standard Time","America/Punta_Arenas"}, +{L"Saint Pierre Standard Time","America/Miquelon"}, +{L"Bahia Standard Time","America/Bahia"}, +{L"UTC-02","Etc/GMT+2"}, +{L"Azores Standard Time","Atlantic/Azores"}, +{L"Cape Verde Standard Time","Atlantic/Cape_Verde"}, +{L"UTC","Etc/UTC"}, +{L"GMT Standard Time","Europe/London"}, +{L"Greenwich Standard Time","Atlantic/Reykjavik"}, +{L"Sao Tome Standard Time","Africa/Sao_Tome"}, +{L"Morocco Standard Time","Africa/Casablanca"}, +{L"W. Europe Standard Time","Europe/Berlin"}, +{L"Central Europe Standard Time","Europe/Budapest"}, +{L"Romance Standard Time","Europe/Paris"}, +{L"Central European Standard Time","Europe/Warsaw"}, +{L"W. Central Africa Standard Time","Africa/Lagos"}, +{L"Jordan Standard Time","Asia/Amman"}, +{L"GTB Standard Time","Europe/Bucharest"}, +{L"Middle East Standard Time","Asia/Beirut"}, +{L"Egypt Standard Time","Africa/Cairo"}, +{L"E. Europe Standard Time","Europe/Chisinau"}, +{L"Syria Standard Time","Asia/Damascus"}, +{L"West Bank Standard Time","Asia/Hebron"}, +{L"South Africa Standard Time","Africa/Johannesburg"}, +{L"FLE Standard Time","Europe/Kiev"}, +{L"Israel Standard Time","Asia/Jerusalem"}, +{L"South Sudan Standard Time","Africa/Juba"}, +{L"Kaliningrad Standard Time","Europe/Kaliningrad"}, +{L"Sudan Standard Time","Africa/Khartoum"}, +{L"Libya Standard Time","Africa/Tripoli"}, +{L"Namibia Standard Time","Africa/Windhoek"}, +{L"Arabic Standard Time","Asia/Baghdad"}, +{L"Turkey Standard Time","Europe/Istanbul"}, +{L"Arab Standard Time","Asia/Riyadh"}, +{L"Belarus Standard Time","Europe/Minsk"}, +{L"Russian Standard Time","Europe/Moscow"}, +{L"E. Africa Standard Time","Africa/Nairobi"}, +{L"Iran Standard Time","Asia/Tehran"}, +{L"Arabian Standard Time","Asia/Dubai"}, +{L"Astrakhan Standard Time","Europe/Astrakhan"}, +{L"Azerbaijan Standard Time","Asia/Baku"}, +{L"Russia Time Zone 3","Europe/Samara"}, +{L"Mauritius Standard Time","Indian/Mauritius"}, +{L"Saratov Standard Time","Europe/Saratov"}, +{L"Georgian Standard Time","Asia/Tbilisi"}, +{L"Volgograd Standard Time","Europe/Volgograd"}, +{L"Caucasus Standard Time","Asia/Yerevan"}, +{L"Afghanistan Standard Time","Asia/Kabul"}, +{L"West Asia Standard Time","Asia/Tashkent"}, +{L"Ekaterinburg Standard Time","Asia/Yekaterinburg"}, +{L"Pakistan Standard Time","Asia/Karachi"}, +{L"Qyzylorda Standard Time","Asia/Qyzylorda"}, +{L"India Standard Time","Asia/Calcutta"}, +{L"Sri Lanka Standard Time","Asia/Colombo"}, +{L"Nepal Standard Time","Asia/Katmandu"}, +{L"Central Asia Standard Time","Asia/Almaty"}, +{L"Bangladesh Standard Time","Asia/Dhaka"}, +{L"Omsk Standard Time","Asia/Omsk"}, +{L"Myanmar Standard Time","Asia/Rangoon"}, +{L"SE Asia Standard Time","Asia/Bangkok"}, +{L"Altai Standard Time","Asia/Barnaul"}, +{L"W. Mongolia Standard Time","Asia/Hovd"}, +{L"North Asia Standard Time","Asia/Krasnoyarsk"}, +{L"N. Central Asia Standard Time","Asia/Novosibirsk"}, +{L"Tomsk Standard Time","Asia/Tomsk"}, +{L"China Standard Time","Asia/Shanghai"}, +{L"North Asia East Standard Time","Asia/Irkutsk"}, +{L"Singapore Standard Time","Asia/Singapore"}, +{L"W. Australia Standard Time","Australia/Perth"}, +{L"Taipei Standard Time","Asia/Taipei"}, +{L"Ulaanbaatar Standard Time","Asia/Ulaanbaatar"}, +{L"Aus Central W. Standard Time","Australia/Eucla"}, +{L"Transbaikal Standard Time","Asia/Chita"}, +{L"Tokyo Standard Time","Asia/Tokyo"}, +{L"North Korea Standard Time","Asia/Pyongyang"}, +{L"Korea Standard Time","Asia/Seoul"}, +{L"Yakutsk Standard Time","Asia/Yakutsk"}, +{L"Cen. Australia Standard Time","Australia/Adelaide"}, +{L"AUS Central Standard Time","Australia/Darwin"}, +{L"E. Australia Standard Time","Australia/Brisbane"}, +{L"AUS Eastern Standard Time","Australia/Sydney"}, +{L"West Pacific Standard Time","Pacific/Port_Moresby"}, +{L"Tasmania Standard Time","Australia/Hobart"}, +{L"Vladivostok Standard Time","Asia/Vladivostok"}, +{L"Lord Howe Standard Time","Australia/Lord_Howe"}, +{L"Bougainville Standard Time","Pacific/Bougainville"}, +{L"Russia Time Zone 10","Asia/Srednekolymsk"}, +{L"Magadan Standard Time","Asia/Magadan"}, +{L"Norfolk Standard Time","Pacific/Norfolk"}, +{L"Sakhalin Standard Time","Asia/Sakhalin"}, +{L"Central Pacific Standard Time","Pacific/Guadalcanal"}, +{L"Russia Time Zone 11","Asia/Kamchatka"}, +{L"New Zealand Standard Time","Pacific/Auckland"}, +{L"UTC+12","Etc/GMT-12"}, +{L"Fiji Standard Time","Pacific/Fiji"}, +{L"Chatham Islands Standard Time","Pacific/Chatham"}, +{L"UTC+13","Etc/GMT-13"}, +{L"Tonga Standard Time","Pacific/Tongatapu"}, +{L"Samoa Standard Time","Pacific/Apia"}, +{L"Line Islands Standard Time","Pacific/Kiritimati"}, diff --git a/sql/winmain.cc b/sql/winmain.cc new file mode 100644 index 00000000..2ed43130 --- /dev/null +++ b/sql/winmain.cc @@ -0,0 +1,373 @@ +/* Copyright (C) 2020 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + main() function for the server on Windows is implemented here. + The core functionality is implemented elsewhere, in mysqld_main(), and running as + service is done here. + + Main tasks of the service are + + 1. Report current status back to service control manager. Here we're + providing callbacks so code outside of winmain.cc can call it + (via mysqld_set_service_status_callback()) + + 2. React to notification, the only one we care about is the "stop" + notification. we initiate shutdown, when instructed. + + Note that our service might not be too Windows-friendly, as it might take + a while to startup (recovery), and a while to shut down(innodb cleanups). + + Most of the code more of less standard service stuff, taken from Microsoft + docs examples. + + Notable oddity in running services, is that we do not know for sure, + whether we should run as a service or not (there is no --service parameter that + would tell).Heuristics are used, and if the last command line argument is + valid service name, we try to run as service, but fallback to usual process + if this fails. + + As an example, even if mysqld.exe is started with command line like "mysqld.exe --help", + it is entirely possible that mysqld.exe run as service "--help". + + Apart from that, now deprecated and obsolete service registration/removal functionality is + still provided (mysqld.exe --install/--remove) +*/ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static SERVICE_STATUS svc_status{SERVICE_WIN32_OWN_PROCESS}; +static SERVICE_STATUS_HANDLE svc_status_handle; +static char *svc_name; + +static char **save_argv; +static int save_argc; + +static int install_service(int argc, char **argv, const char *name); +static int remove_service(const char *name); + +/* + Report service status to SCM. This function is indirectly invoked + by the server to report state transitions. + + 1. from START_PENDING to SERVICE_RUNNING, when we start accepting user connections + 2. from SERVICE_RUNNING to STOP_PENDING, when we start shutdown + 3. from STOP_PENDING to SERVICE_STOPPED, in mysqld_exit() + sometimes also START_PENDING to SERVICE_STOPPED, on startup errors +*/ +static void report_svc_status(DWORD current_state, DWORD exit_code, DWORD wait_hint) +{ + if (!svc_status_handle) + return; + + static DWORD check_point= 1; + if (current_state != (DWORD)-1) + svc_status.dwCurrentState= current_state; + svc_status.dwWaitHint= wait_hint; + + if (exit_code) + { + svc_status.dwWin32ExitCode= ERROR_SERVICE_SPECIFIC_ERROR; + svc_status.dwServiceSpecificExitCode= exit_code; + } + else + { + svc_status.dwWin32ExitCode= 0; + } + + if (current_state == SERVICE_START_PENDING) + svc_status.dwControlsAccepted= 0; + else + svc_status.dwControlsAccepted= SERVICE_ACCEPT_STOP|SERVICE_ACCEPT_SHUTDOWN; + + if ((current_state == SERVICE_RUNNING) || (current_state == SERVICE_STOPPED)) + svc_status.dwCheckPoint= 0; + else + svc_status.dwCheckPoint= check_point++; + + SetServiceStatus(svc_status_handle, &svc_status); +} + +/* Report unexpected errors. */ +static void svc_report_event(const char *svc_name, const char *command) +{ + char buffer[80]; + sprintf_s(buffer, "mariadb service %s, %s failed with %d", + svc_name, command, GetLastError()); + OutputDebugString(buffer); +} + +/* + Service control function. + Reacts to service stop, initiates shutdown. +*/ +static void WINAPI svc_ctrl_handle(DWORD cntrl) +{ + switch (cntrl) + { + case SERVICE_CONTROL_SHUTDOWN: + case SERVICE_CONTROL_STOP: + sql_print_information( + "Windows service \"%s\": received %s", + svc_name, + cntrl == SERVICE_CONTROL_STOP? "SERVICE_CONTROL_STOP": "SERVICE_CONTROL_SHUTDOWN"); + + /* The below will also set the status to STOP_PENDING. */ + mysqld_win_initiate_shutdown(); + break; + + case SERVICE_CONTROL_INTERROGATE: + default: + break; + } +} + +/* Service main routine, mainly runs mysqld_main() */ +static void WINAPI svc_main(DWORD svc_argc, char **svc_argv) +{ + /* Register the handler function for the service */ + char *name= svc_argv[0]; + + svc_status_handle= RegisterServiceCtrlHandler(name, svc_ctrl_handle); + if (!svc_status_handle) + { + svc_report_event(name, "RegisterServiceCtrlHandler"); + return; + } + report_svc_status(SERVICE_START_PENDING, NO_ERROR, 0); + + /* Make server report service status via our callback.*/ + mysqld_set_service_status_callback(report_svc_status); + + /* This would add service name entry to load_defaults.*/ + mysqld_win_set_service_name(name); + + /* + Do not pass the service name parameter (last on the command line) + to mysqld_main(), it is unaware of it. + */ + save_argv[save_argc - 1]= 0; + mysqld_main(save_argc - 1, save_argv); +} + +/* + This start the service. Sometimes it will fail, because + currently we do not know for sure whether we run as service or not. + If this fails, the fallback is to run as normal process. +*/ +static int run_as_service(char *name) +{ + SERVICE_TABLE_ENTRY stb[]= {{name, svc_main}, {0, 0}}; + if (!StartServiceCtrlDispatcher(stb)) + { + assert(GetLastError() == ERROR_FAILED_SERVICE_CONTROLLER_CONNECT); + return -1; + } + return 0; +} + +/* + Check for valid existing service name. + Part of our guesswork, whether we run as service or not. +*/ +static bool is_existing_service(const char *name) +{ + if (strchr(name, '\\') || strchr(name, '/')) + { + /* Invalid characters in service name */ + return false; + } + + SC_HANDLE sc_service= 0, scm= 0; + bool ret= ((scm= OpenSCManager(0, 0, SC_MANAGER_ENUMERATE_SERVICE)) != 0) && + ((sc_service= OpenService(scm, name, SERVICE_QUERY_STATUS)) != 0); + + if (sc_service) + CloseServiceHandle(sc_service); + if (scm) + CloseServiceHandle(scm); + + return ret; +} + +/* + If service name is not given to --install/--remove + it is assumed to be "MySQL" (traditional handling) +*/ +static const char *get_svc_name(const char *arg) +{ + return arg ? arg : "MySQL"; +} + +/* + Main function on Windows. + Runs mysqld as normal process, or as a service. + + Plus, the obsolete functionality to register/remove services. +*/ +__declspec(dllexport) int mysqld_win_main(int argc, char **argv) +{ + save_argv= argv; + save_argc= argc; + + /* + If no special arguments are given, service name is nor present + run as normal program. + */ + if (argc == 1) + return mysqld_main(argc, argv); + + auto cmd= argv[1]; + + /* Handle install/remove */ + if (!strcmp(cmd, "--install") || !strcmp(cmd, "--install-manual")) + return install_service(argc, argv, get_svc_name(argv[2])); + + if (!strcmp(cmd, "--remove")) + return remove_service(get_svc_name(argv[2])); + + /* Try to run as service, and fallback to mysqld_main(), if this fails */ + svc_name= argv[argc - 1]; + if (is_existing_service(svc_name) && !run_as_service(svc_name)) + return 0; + svc_name= 0; + + /* Run as normal program.*/ + return mysqld_main(argc, argv); +} + + +/* + Register/remove services functionality. + This is kept for backward compatibility only, and is + superseeded by much more versatile mysql_install_db.exe + + "mysqld --remove=svc" has no advantage over + OS own "sc delete svc" +*/ +static void ATTRIBUTE_NORETURN die(const char *func, const char *name) +{ + DWORD err= GetLastError(); + fprintf(stderr, "FATAL ERROR : %s failed (%lu)\n", func, err); + switch (err) + { + case ERROR_SERVICE_EXISTS: + fprintf(stderr, "Service %s already exists.\n", name); + break; + case ERROR_SERVICE_DOES_NOT_EXIST: + fprintf(stderr, "Service %s does not exist.\n", name); + break; + case ERROR_ACCESS_DENIED: + fprintf(stderr, "Access is denied. " + "Make sure to run as elevated admin user.\n"); + break; + case ERROR_INVALID_NAME: + fprintf(stderr, "Invalid service name '%s'\n", name); + default: + break; + } + exit(1); +} + +static inline std::string quoted(const char *src) +{ + std::string s; + s.append("\"").append(src).append("\""); + return s; +} + +static int install_service(int argc, char **argv, const char *name) +{ + std::string cmdline; + + char path[MAX_PATH]; + auto nSize = GetModuleFileName(0, path, sizeof(path)); + + if (nSize == (DWORD) sizeof(path) && GetLastError() == ERROR_INSUFFICIENT_BUFFER) + die("GetModuleName", name); + + cmdline.append(quoted(path)); + + const char *user= 0; + // mysqld --install[-manual] name ...[--local-service] + if (argc > 2) + { + for (int i= 3; argv[i]; i++) + { + if (!strcmp(argv[i], "--local-service")) + user= "NT AUTHORITY\\LocalService"; + else + { + cmdline.append(" ").append(quoted(argv[i])); + } + } + } + cmdline.append(" ").append(quoted(name)); + + DWORD start_type; + if (!strcmp(argv[1], "--install-manual")) + start_type= SERVICE_DEMAND_START; + else + start_type= SERVICE_AUTO_START; + + SC_HANDLE scm, sc_service; + if (!(scm= OpenSCManager(0, 0, SC_MANAGER_CREATE_SERVICE))) + die("OpenSCManager", name); + + if (!(sc_service= CreateService( + scm, name, name, SERVICE_ALL_ACCESS, + SERVICE_WIN32_OWN_PROCESS, start_type, SERVICE_ERROR_NORMAL, + cmdline.c_str(), 0, 0, 0, user, 0))) + die("CreateService", name); + + char description[]= "MariaDB database server"; + SERVICE_DESCRIPTION sd= {description}; + ChangeServiceConfig2(sc_service, SERVICE_CONFIG_DESCRIPTION, &sd); + + CloseServiceHandle(sc_service); + CloseServiceHandle(scm); + + printf("Service '%s' successfully installed.\n", name); + return 0; +} + +static int remove_service(const char *name) +{ + SC_HANDLE scm, sc_service; + + if (!(scm= OpenSCManager(0, 0, SC_MANAGER_CREATE_SERVICE))) + die("OpenSCManager", name); + + if (!(sc_service= OpenService(scm, name, DELETE))) + die("OpenService", name); + + if (!DeleteService(sc_service)) + die("DeleteService", name); + + CloseServiceHandle(sc_service); + CloseServiceHandle(scm); + + printf("Service '%s' successfully deleted.\n", name); + return 0; +} diff --git a/sql/winservice.c b/sql/winservice.c new file mode 100644 index 00000000..d4e3bb09 --- /dev/null +++ b/sql/winservice.c @@ -0,0 +1,338 @@ +/* + Copyright (c) 2011, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Get Properties of an existing mysqld Windows service +*/ + +#include +#include +#include "winservice.h" +#include +#include +#include +#include + +/* + Get version from an executable file +*/ +void get_file_version(const char *path, int *major, int *minor, int *patch) +{ + DWORD version_handle; + char *ver= 0; + VS_FIXEDFILEINFO info; + UINT len; + DWORD size; + void *p; + *major= *minor= *patch= 0; + + size= GetFileVersionInfoSize(path, &version_handle); + if (size == 0) + return; + ver= (char *)malloc(size); + if(!GetFileVersionInfo(path, version_handle, size, ver)) + goto end; + + if(!VerQueryValue(ver,"\\",&p,&len)) + goto end; + memcpy(&info,p ,sizeof(VS_FIXEDFILEINFO)); + + *major= (info.dwFileVersionMS & 0xFFFF0000) >> 16; + *minor= (info.dwFileVersionMS & 0x0000FFFF); + *patch= (info.dwFileVersionLS & 0xFFFF0000) >> 16; +end: + free(ver); +} + +void normalize_path(char *path, size_t size) +{ + char buf[MAX_PATH]; + if (*path== '"') + { + char *p; + strcpy_s(buf, MAX_PATH, path+1); + p= strchr(buf, '"'); + if (p) + *p=0; + } + else + strcpy_s(buf, MAX_PATH, path); + GetFullPathName(buf, MAX_PATH, buf, NULL); + strcpy_s(path, size, buf); +} + +/* + Exclusion rules. + + Some hardware manufacturers deliver systems with own preinstalled MySQL copy + and services. We do not want to mess up with these installations. We will + just ignore such services, pretending it is not MySQL. + + We also exclude MySQL5.7+ since we cannot upgrade it (and it is not an upgrade anyway) + + @return + TRUE, if this service should be excluded from UI lists etc + FALSE otherwise. +*/ +BOOL exclude_service(mysqld_service_properties *props) +{ + static const char* exclude_patterns[] = + { + "common files\\dell\\mysql\\bin\\", /* Dell's private installation */ + NULL + }; + int i; + char buf[MAX_PATH]; + + /* Convert mysqld path to lower case, rules for paths are case-insensitive. */ + memcpy(buf, props->mysqld_exe, sizeof(props->mysqld_exe)); + _strlwr(buf); + + for(i= 0; exclude_patterns[i]; i++) + { + if (strstr(buf, exclude_patterns[i])) + return TRUE; + } + if ((props->version_major == 0) || + (props->version_major > 5 && props->version_major < 10) || + (props->version_major == 5 && props->version_minor > 7)) + { + return TRUE; + } + return FALSE; +} + + +static void get_datadir_from_ini(const char *ini, char *service_name, char *datadir, size_t sz) +{ + *datadir= 0; + const char *sections[]= {service_name, "mysqld", "server", "mariadb", + "mariadbd"}; + for (int i= 0; i < sizeof(sections) / sizeof(sections[0]); i++) + { + if (sections[i]) + { + GetPrivateProfileStringA(sections[i], "datadir", NULL, datadir, + (DWORD) sz, ini); + if (*datadir) + return; + } + } +} + + +static int fix_and_check_datadir(mysqld_service_properties *props) +{ + normalize_path(props->datadir, MAX_PATH); + /* Check if datadir really exists */ + if (GetFileAttributes(props->datadir) != INVALID_FILE_ATTRIBUTES) + return 0; + /* + It is possible, that datadir contains some unconvertable character. + We just pretend not to know what's the data directory + */ + props->datadir[0]= 0; + return 0; +} + +/* + Retrieve some properties from windows mysqld service binary path. + We're interested in ini file location and datadir, and also in version of + the data. We tolerate missing mysqld.exe. + + Note that this function carefully avoids using mysql libraries (e.g dbug), + since it is used in unusual environments (windows installer, MFC), where we + do not have much control over how threads are created and destroyed, so we + cannot assume MySQL thread initilization here. +*/ +int get_mysql_service_properties(const wchar_t *bin_path, + mysqld_service_properties *props) +{ + int numargs; + wchar_t mysqld_path[MAX_PATH + 4]; + wchar_t *file_part; + wchar_t **args= NULL; + int retval= 1; + BOOL have_inifile; + char service_name[MAX_PATH]; + + props->datadir[0]= 0; + props->inifile[0]= 0; + props->mysqld_exe[0]= 0; + props->version_major= 0; + props->version_minor= 0; + props->version_patch= 0; + + args= CommandLineToArgvW(bin_path, &numargs); + if(numargs == 2) + { + /* + There are rare cases where service config does not have + --defaults-file in the binary path . There services were + registered with plain mysqld --install, the data directory is + next to "bin" in this case. + */ + have_inifile= FALSE; + } + else if(numargs == 3) + { + have_inifile= TRUE; + } + else + { + goto end; + } + + /* Last parameter is the service name*/ + wcstombs(service_name, args[numargs-1], MAX_PATH); + + if(have_inifile && wcsncmp(args[1], L"--defaults-file=", 16) != 0) + goto end; + + GetFullPathNameW(args[0], MAX_PATH, mysqld_path, &file_part); + + if(wcsstr(mysqld_path, L".exe") == NULL) + wcscat(mysqld_path, L".exe"); + + if(wcsicmp(file_part, L"mysqld.exe") != 0 && + wcsicmp(file_part, L"mysqld-debug.exe") != 0 && + wcsicmp(file_part, L"mysqld-nt.exe") != 0 && + wcsicmp(file_part, L"mariadbd.exe") != 0) + { + /* The service executable is not mysqld. */ + goto end; + } + + wcstombs(props->mysqld_exe, mysqld_path, MAX_PATH); + /* If mysqld.exe exists, try to get its version from executable */ + if (GetFileAttributes(props->mysqld_exe) != INVALID_FILE_ATTRIBUTES) + { + get_file_version(props->mysqld_exe, &props->version_major, + &props->version_minor, &props->version_patch); + } + + if (have_inifile) + { + /* We have --defaults-file in service definition. */ + wcstombs(props->inifile, args[1]+16, MAX_PATH); + normalize_path(props->inifile, MAX_PATH); + if (GetFileAttributes(props->inifile) != INVALID_FILE_ATTRIBUTES) + { + get_datadir_from_ini(props->inifile, service_name, props->datadir, + sizeof(props->datadir)); + } + else + { + /* + Service will start even with invalid .ini file, using lookup for + datadir relative to mysqld.exe. This is equivalent to the case no ini + file used. + */ + props->inifile[0]= 0; + have_inifile= FALSE; + } + } + + if(!have_inifile || props->datadir[0] == 0) + { + /* + Hard, although a rare case, we're guessing datadir and defaults-file. + On Windows, defaults-file is traditionally install-root\my.ini + and datadir is install-root\data + */ + char install_root[MAX_PATH]; + int i; + char *p; + + /* + Get the install root(parent of bin directory where mysqld.exe) + is located. + */ + strcpy_s(install_root, MAX_PATH, props->mysqld_exe); + for (i=0; i< 2; i++) + { + p= strrchr(install_root, '\\'); + if(!p) + goto end; + *p= 0; + } + + if (!have_inifile) + { + /* Look for my.ini, my.cnf in the install root */ + sprintf_s(props->inifile, MAX_PATH, "%s\\my.ini", install_root); + if (GetFileAttributes(props->inifile) == INVALID_FILE_ATTRIBUTES) + { + sprintf_s(props->inifile, MAX_PATH, "%s\\my.cnf", install_root); + } + if (GetFileAttributes(props->inifile) != INVALID_FILE_ATTRIBUTES) + { + /* Ini file found, get datadir from there */ + get_datadir_from_ini(props->inifile, service_name, props->datadir, + sizeof(props->datadir)); + } + else + { + /* No ini file */ + props->inifile[0]= 0; + } + } + + /* Try datadir in install directory.*/ + if (props->datadir[0] == 0) + { + sprintf_s(props->datadir, MAX_PATH, "%s\\data", install_root); + } + } + + if (props->datadir[0] == 0 || fix_and_check_datadir(props)) + { + /* There is no datadir in ini file, or non-existing dir, bail out.*/ + goto end; + } + + /* + If version could not be determined so far, try mysql_upgrade_info in + database directory. + */ + if(props->version_major == 0) + { + char buf[MAX_PATH]; + FILE *mysql_upgrade_info; + + sprintf_s(buf, MAX_PATH, "%s\\mysql_upgrade_info", props->datadir); + mysql_upgrade_info= fopen(buf, "r"); + if(mysql_upgrade_info) + { + if (fgets(buf, MAX_PATH, mysql_upgrade_info)) + { + int major,minor,patch; + if (sscanf(buf, "%d.%d.%d", &major, &minor, &patch) == 3) + { + props->version_major= major; + props->version_minor= minor; + props->version_patch= patch; + } + } + } + } + + if (!exclude_service(props)) + retval = 0; +end: + LocalFree((HLOCAL)args); + return retval; +} diff --git a/sql/winservice.h b/sql/winservice.h new file mode 100644 index 00000000..aa0528be --- /dev/null +++ b/sql/winservice.h @@ -0,0 +1,212 @@ +/* + Copyright (c) 2011, 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Extract properties of a windows service binary path +*/ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4995) +#endif +#include +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +typedef struct mysqld_service_properties_st +{ + char mysqld_exe[MAX_PATH]; + char inifile[MAX_PATH]; + char datadir[MAX_PATH]; + int version_major; + int version_minor; + int version_patch; +} mysqld_service_properties; + +extern int get_mysql_service_properties(const wchar_t *bin_path, + mysqld_service_properties *props); + + +#if !defined(UNICODE) +/* + The following wrappers workaround Windows bugs + with CreateService/OpenService with ANSI codepage UTF8. + + Apparently, these function in ANSI mode, for this codepage only + do *not* behave as expected (as-if string parameters were + converted to UTF16 and "wide" function were called) +*/ +#include +static inline wchar_t* awstrdup(const char *str) +{ + if (!str) + return NULL; + size_t len= strlen(str) + 1; + wchar_t *wstr= (wchar_t *) malloc(sizeof(wchar_t)*len); + if (MultiByteToWideChar(GetACP(), 0, str, (int)len, wstr, (int)len) == 0) + { + free(wstr); + return NULL; + } + return wstr; +} + +#define AWSTRDUP(dest, src) \ + dest= awstrdup(src); \ + if (src && !dest) \ + { \ + ok= FALSE; \ + last_error = ERROR_OUTOFMEMORY; \ + goto end; \ + } + +static inline SC_HANDLE my_OpenService(SC_HANDLE hSCManager, LPCSTR lpServiceName, DWORD dwDesiredAccess) +{ + wchar_t *w_ServiceName= NULL; + BOOL ok=TRUE; + DWORD last_error=0; + SC_HANDLE sch=NULL; + + AWSTRDUP(w_ServiceName, lpServiceName); + sch= OpenServiceW(hSCManager, w_ServiceName, dwDesiredAccess); + if (!sch) + { + ok= FALSE; + last_error= GetLastError(); + } + +end: + free(w_ServiceName); + if (!ok) + SetLastError(last_error); + return sch; +} + +static inline SC_HANDLE my_CreateService(SC_HANDLE hSCManager, + LPCSTR lpServiceName, LPCSTR lpDisplayName, + DWORD dwDesiredAccess, DWORD dwServiceType, + DWORD dwStartType, DWORD dwErrorControl, + LPCSTR lpBinaryPathName, LPCSTR lpLoadOrderGroup, + LPDWORD lpdwTagId, LPCSTR lpDependencies, + LPCSTR lpServiceStartName, LPCSTR lpPassword) +{ + wchar_t *w_ServiceName= NULL; + wchar_t *w_DisplayName= NULL; + wchar_t *w_BinaryPathName= NULL; + wchar_t *w_LoadOrderGroup= NULL; + wchar_t *w_Dependencies= NULL; + wchar_t *w_ServiceStartName= NULL; + wchar_t *w_Password= NULL; + SC_HANDLE sch = NULL; + DWORD last_error=0; + BOOL ok= TRUE; + + AWSTRDUP(w_ServiceName,lpServiceName); + AWSTRDUP(w_DisplayName,lpDisplayName); + AWSTRDUP(w_BinaryPathName, lpBinaryPathName); + AWSTRDUP(w_LoadOrderGroup, lpLoadOrderGroup); + AWSTRDUP(w_Dependencies, lpDependencies); + AWSTRDUP(w_ServiceStartName, lpServiceStartName); + AWSTRDUP(w_Password, lpPassword); + + sch= CreateServiceW( + hSCManager, w_ServiceName, w_DisplayName, dwDesiredAccess, dwServiceType, + dwStartType, dwErrorControl, w_BinaryPathName, w_LoadOrderGroup, + lpdwTagId, w_Dependencies, w_ServiceStartName, w_Password); + if(!sch) + { + ok= FALSE; + last_error= GetLastError(); + } + +end: + free(w_ServiceName); + free(w_DisplayName); + free(w_BinaryPathName); + free(w_LoadOrderGroup); + free(w_Dependencies); + free(w_ServiceStartName); + free(w_Password); + + if (!ok) + SetLastError(last_error); + return sch; +} + +static inline BOOL my_ChangeServiceConfig(SC_HANDLE hService, DWORD dwServiceType, + DWORD dwStartType, DWORD dwErrorControl, + LPCSTR lpBinaryPathName, LPCSTR lpLoadOrderGroup, + LPDWORD lpdwTagId, LPCSTR lpDependencies, + LPCSTR lpServiceStartName, LPCSTR lpPassword, + LPCSTR lpDisplayName) +{ + wchar_t *w_DisplayName= NULL; + wchar_t *w_BinaryPathName= NULL; + wchar_t *w_LoadOrderGroup= NULL; + wchar_t *w_Dependencies= NULL; + wchar_t *w_ServiceStartName= NULL; + wchar_t *w_Password= NULL; + DWORD last_error=0; + BOOL ok= TRUE; + + AWSTRDUP(w_DisplayName, lpDisplayName); + AWSTRDUP(w_BinaryPathName, lpBinaryPathName); + AWSTRDUP(w_LoadOrderGroup, lpLoadOrderGroup); + AWSTRDUP(w_Dependencies, lpDependencies); + AWSTRDUP(w_ServiceStartName, lpServiceStartName); + AWSTRDUP(w_Password, lpPassword); + + ok= ChangeServiceConfigW( + hService, dwServiceType, dwStartType, dwErrorControl, w_BinaryPathName, + w_LoadOrderGroup, lpdwTagId, w_Dependencies, w_ServiceStartName, + w_Password, w_DisplayName); + if (!ok) + { + last_error= GetLastError(); + } + +end: + free(w_DisplayName); + free(w_BinaryPathName); + free(w_LoadOrderGroup); + free(w_Dependencies); + free(w_ServiceStartName); + free(w_Password); + + if (last_error) + SetLastError(last_error); + return ok; +} +#undef AWSTRDUP + +#undef OpenService +#define OpenService my_OpenService +#undef ChangeServiceConfig +#define ChangeServiceConfig my_ChangeServiceConfig +#undef CreateService +#define CreateService my_CreateService +#endif + +#ifdef __cplusplus +} +#endif diff --git a/sql/wsrep_allowlist_service.cc b/sql/wsrep_allowlist_service.cc new file mode 100644 index 00000000..23ade8b3 --- /dev/null +++ b/sql/wsrep_allowlist_service.cc @@ -0,0 +1,56 @@ +/* Copyright 2021-2022 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "wsrep_allowlist_service.h" + +#include "my_global.h" +#include "wsrep_mysqld.h" +#include "wsrep_priv.h" +#include "wsrep_schema.h" + +#include +#include +#include + +class Wsrep_allowlist_service : public wsrep::allowlist_service +{ +public: + bool allowlist_cb(wsrep::allowlist_service::allowlist_key key, + const wsrep::const_buffer& value) WSREP_NOEXCEPT override; +}; + +bool Wsrep_allowlist_service::allowlist_cb ( + wsrep::allowlist_service::allowlist_key key, + const wsrep::const_buffer& value) + WSREP_NOEXCEPT +{ + std::string string_value(value.data()); + bool res= wsrep_schema->allowlist_check(key, string_value); + return res; +} + +std::unique_ptr entrypoint; + +wsrep::allowlist_service* wsrep_allowlist_service_init() +{ + entrypoint = std::unique_ptr(new Wsrep_allowlist_service); + return entrypoint.get(); +} + +void wsrep_allowlist_service_deinit() +{ + entrypoint.reset(); +} + diff --git a/sql/wsrep_allowlist_service.h b/sql/wsrep_allowlist_service.h new file mode 100644 index 00000000..2d96139b --- /dev/null +++ b/sql/wsrep_allowlist_service.h @@ -0,0 +1,29 @@ +/* Copyright 2021 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Implementation of wsrep provider threads instrumentation. + */ + +#ifndef WSREP_PROVIDER_ALLOWLIST_H +#define WSREP_PROVIDER_ALLOWLIST_H + +#include "wsrep/allowlist_service.hpp" + +wsrep::allowlist_service* wsrep_allowlist_service_init(); + +void wsrep_allowlist_service_deinit(); + +#endif /* WSREP_PROVIDER_ALLOWLIST_H */ diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc new file mode 100644 index 00000000..90ede81a --- /dev/null +++ b/sql/wsrep_applier.cc @@ -0,0 +1,233 @@ +/* Copyright (C) 2013-2019 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#include "mariadb.h" +#include "mysql/service_wsrep.h" +#include "wsrep_applier.h" + +#include "wsrep_priv.h" +#include "wsrep_binlog.h" // wsrep_dump_rbr_buf() +#include "wsrep_xid.h" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" + +#include "slave.h" // opt_log_slave_updates +#include "debug_sync.h" + +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ +static Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + DBUG_ENTER("wsrep_read_log_event"); + char *head= (*arg_buf); + + uint data_len= uint4korr(head + EVENT_LEN_OFFSET); + uchar *buf= (uchar*) (*arg_buf); + const char *error= 0; + Log_event *res= 0; + + res= Log_event::read_log_event(buf, data_len, &error, description_event, + true); + + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} + +#include "transaction.h" // trans_commit(), trans_rollback() + +void wsrep_set_apply_format(THD* thd, Format_description_log_event* ev) +{ + if (thd->wsrep_apply_format) + { + delete (Format_description_log_event*)thd->wsrep_apply_format; + } + thd->wsrep_apply_format= ev; +} + +Format_description_log_event* +wsrep_get_apply_format(THD* thd) +{ + if (thd->wsrep_apply_format) + { + return (Format_description_log_event*) thd->wsrep_apply_format; + } + + DBUG_ASSERT(thd->wsrep_rgi); + + return thd->wsrep_rgi->rli->relay_log.description_event_for_exec; +} + +void wsrep_store_error(const THD* const thd, wsrep::mutable_buffer& dst) +{ + Diagnostics_area::Sql_condition_iterator it= + thd->get_stmt_da()->sql_conditions(); + const Sql_condition* cond; + + static size_t const max_len= 2*MAX_SLAVE_ERRMSG; // 2x so that we have enough + + dst.resize(max_len); + + char* slider= dst.data(); + const char* const buf_end= slider + max_len - 1; // -1: leave space for \0 + + for (cond= it++; cond && slider < buf_end; cond= it++) + { + uint const err_code= cond->get_sql_errno(); + const char* const err_str= cond->get_message_text(); + + slider+= my_snprintf(slider, buf_end - slider, " %s, Error_code: %d;", + err_str, err_code); + } + + if (slider != dst.data()) + { + *slider= '\0'; + slider++; + } + + dst.resize(slider - dst.data()); + + WSREP_DEBUG("Error buffer for thd %llu seqno %lld, %zu bytes: '%s'", + thd->thread_id, (long long)wsrep_thd_trx_seqno(thd), + dst.size(), dst.size() ? dst.data() : "(null)"); +} + +int wsrep_apply_events(THD* thd, + Relay_log_info* rli, + const void* events_buf, + size_t buf_len) +{ + char *buf= (char *)events_buf; + int rcode= 0; + int event= 1; + Log_event_type typ; + + DBUG_ENTER("wsrep_apply_events"); + if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + + thd->variables.gtid_seq_no= 0; + if (wsrep_gtid_mode) + thd->variables.gtid_domain_id= wsrep_gtid_server.domain_id; + else + thd->variables.gtid_domain_id= global_system_variables.gtid_domain_id; + + while (buf_len) + { + int exec_res; + Log_event* ev= wsrep_read_log_event(&buf, &buf_len, + wsrep_get_apply_format(thd)); + if (!ev) + { + WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %zu", + (long long)wsrep_thd_trx_seqno(thd), buf_len); + rcode= WSREP_ERR_BAD_EVENT; + goto error; + } + + typ= ev->get_type_code(); + + switch (typ) { + case FORMAT_DESCRIPTION_EVENT: + wsrep_set_apply_format(thd, (Format_description_log_event*)ev); + continue; + case GTID_EVENT: + { + Gtid_log_event *gtid_ev= (Gtid_log_event*)ev; + thd->variables.server_id= gtid_ev->server_id; + thd->variables.gtid_domain_id= gtid_ev->domain_id; + if ((gtid_ev->server_id == wsrep_gtid_server.server_id) && + (gtid_ev->domain_id == wsrep_gtid_server.domain_id)) + { + thd->variables.wsrep_gtid_seq_no= gtid_ev->seq_no; + } + else + { + thd->variables.gtid_seq_no= gtid_ev->seq_no; + } + delete ev; + } + continue; + default: + break; + } + + + if (!thd->variables.gtid_seq_no && wsrep_thd_is_toi(thd) && + (ev->get_type_code() == QUERY_EVENT)) + { + uint64 seqno= wsrep_gtid_server.seqno_inc(); + thd->wsrep_current_gtid_seqno= seqno; + if (mysql_bin_log.is_open() && wsrep_gtid_mode) + { + thd->variables.gtid_seq_no= seqno; + } + } + /* Use the original server id for logging. */ + thd->set_server_id(ev->server_id); + thd->set_time(); // time the query + thd->transaction->start_time.reset(thd); + thd->lex->current_select= 0; + if (!ev->when) + { + my_hrtime_t hrtime= my_hrtime(); + ev->when= hrtime_to_my_time(hrtime); + ev->when_sec_part= hrtime_sec_part(hrtime); + } + + thd->variables.option_bits= + (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | + (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); + + ev->thd= thd; + exec_res= ev->apply_event(thd->wsrep_rgi); + DBUG_PRINT("info", ("exec_event result: %d", exec_res)); + + if (exec_res) + { + WSREP_WARN("Event %d %s apply failed: %d, seqno %lld", + event, ev->get_type_str(), exec_res, + (long long) wsrep_thd_trx_seqno(thd)); + rcode= exec_res; + /* stop processing for the first error */ + delete ev; + goto error; + } + event++; + + delete_or_keep_event_post_apply(thd->wsrep_rgi, typ, ev); + } + +error: + if (thd->killed == KILL_CONNECTION) + WSREP_INFO("applier aborted: %lld", (long long)wsrep_thd_trx_seqno(thd)); + + wsrep_set_apply_format(thd, NULL); + + DBUG_RETURN(rcode); +} diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h new file mode 100644 index 00000000..fefca306 --- /dev/null +++ b/sql/wsrep_applier.h @@ -0,0 +1,44 @@ +/* Copyright 2013-2019 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef WSREP_APPLIER_H +#define WSREP_APPLIER_H + +#include "sql_class.h" // THD class +#include "rpl_rli.h" // Relay_log_info +#include "log_event.h" // Format_description_log_event + +int wsrep_apply_events(THD* thd, + Relay_log_info* rli, + const void* events_buf, + size_t buf_len); + +/* Applier error codes, when nothing better is available. */ +#define WSREP_RET_SUCCESS 0 // Success +#define WSREP_ERR_GENERIC 1 // When in doubt (MySQL default error code) +#define WSREP_ERR_BAD_EVENT 2 // Can't parse event +#define WSREP_ERR_NOT_FOUND 3 // Key. table, schema not found +#define WSREP_ERR_EXISTS 4 // Key, table, schema already exists +#define WSREP_ERR_WRONG_TYPE 5 // Incompatible data type +#define WSREP_ERR_FAILED 6 // Operation failed for some internal reason +#define WSREP_ERR_ABORTED 7 // Operation was aborted externally + +void wsrep_store_error(const THD* thd, wsrep::mutable_buffer& buf); + +class Format_description_log_event; +void wsrep_set_apply_format(THD*, Format_description_log_event*); +Format_description_log_event* wsrep_get_apply_format(THD* thd); + +#endif /* WSREP_APPLIER_H */ diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc new file mode 100644 index 00000000..84392fac --- /dev/null +++ b/sql/wsrep_binlog.cc @@ -0,0 +1,400 @@ +/* Copyright (C) 2013 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#include "mariadb.h" +#include "mysql/service_wsrep.h" +#include "wsrep_binlog.h" +#include "log.h" +#include "slave.h" +#include "log_event.h" +#include "wsrep_applier.h" +#include "wsrep_mysqld.h" + +#include "transaction.h" + +extern handlerton *binlog_hton; +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) +{ + *buf= NULL; + *buf_len= 0; + my_off_t const saved_pos(my_b_tell(cache)); + DBUG_ENTER("wsrep_write_cache_buf"); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + DBUG_RETURN(ER_ERROR_ON_WRITE); + } + + uint length= my_b_bytes_in_cache(cache); + if (unlikely(0 == length)) length= my_b_fill(cache); + + size_t total_length= 0; + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + goto error; + } + uchar* tmp= (uchar *)my_realloc(PSI_INSTRUMENT_ME, *buf, total_length, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + *buf_len, length); + goto error; + } + *buf= tmp; + + memcpy(*buf + *buf_len, cache->read_pos, length); + *buf_len= total_length; + + if (cache->file < 0) + { + cache->read_pos= cache->read_end; + break; + } + } while ((length= my_b_fill(cache))); + + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + goto cleanup; + } + + DBUG_RETURN(0); + +error: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + } +cleanup: + my_free(*buf); + *buf= NULL; + *buf_len= 0; + DBUG_RETURN(ER_ERROR_ON_WRITE); +} + +#define STACK_SIZE 4096 /* 4K - for buffer preallocated on the stack: + * many transactions would fit in there + * so there is no need to reach for the heap */ + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version uses incremental data appending as it reads it from cache. + */ +static int wsrep_write_cache_inc(THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + DBUG_ENTER("wsrep_write_cache_inc"); + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, thd->wsrep_sr().log_position(), 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + DBUG_RETURN(1);; + } + + int ret= 0; + size_t total_length(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length= my_b_fill(cache); + + if (likely(length > 0)) + { + do + { + total_length += length; + /* bail out if buffer grows too large + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + ret= 1; + goto cleanup; + } + if (thd->wsrep_cs().append_data(wsrep::const_buffer(cache->read_pos, length))) + goto cleanup; + cache->read_pos= cache->read_end; + } while ((cache->file >= 0) && (length= my_b_fill(cache))); + if (ret == 0) + { + assert(total_length + thd->wsrep_sr().log_position() == saved_pos); + } + } + +cleanup: + *len= total_length; + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + DBUG_RETURN(ret); +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache(THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + return wsrep_write_cache_inc(thd, cache, len); +} + +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len) +{ + int len= snprintf(NULL, 0, "%s/GRA_%lld_%lld.log", + wsrep_data_home_dir, (longlong) thd->thread_id, + (longlong) wsrep_thd_trx_seqno(thd)); + if (len < 0) + { + WSREP_ERROR("snprintf error: %d, skipping dump.", len); + return; + } + /* + len doesn't count the \0 end-of-string. Use len+1 below + to alloc and pass as an argument to snprintf. + */ + + char *filename= (char *) my_malloc(key_memory_WSREP, len+1, 0); + int len1= snprintf(filename, len+1, "%s/GRA_%lld_%lld.log", + wsrep_data_home_dir, (longlong) thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + + if (len > len1) + { + WSREP_ERROR("RBR dump path truncated: %d, skipping dump.", len); + my_free(filename); + return; + } + + FILE *of= fopen(filename, "wb"); + + if (of) + { + if (fwrite(rbr_buf, buf_len, 1, of) == 0) + WSREP_ERROR("Failed to write buffer of length %llu to '%s'", + (unsigned long long)buf_len, filename); + + fclose(of); + } + else + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + } + my_free(filename); +} + +/* Dump replication buffer along with header to a file. */ +void wsrep_dump_rbr_buf_with_header(THD *thd, const void *rbr_buf, + size_t buf_len) +{ + DBUG_ENTER("wsrep_dump_rbr_buf_with_header"); + + File file; + IO_CACHE cache; + Log_event_writer writer(&cache, 0); + Format_description_log_event *ev= 0; + + longlong thd_trx_seqno= (long long)wsrep_thd_trx_seqno(thd); + int len= snprintf(NULL, 0, "%s/GRA_%lld_%lld_v2.log", + wsrep_data_home_dir, (longlong)thd->thread_id, + thd_trx_seqno); + /* + len doesn't count the \0 end-of-string. Use len+1 below + to alloc and pass as an argument to snprintf. + */ + char *filename; + if (len < 0 || !(filename= (char*) my_malloc(key_memory_WSREP, len+1, 0))) + { + WSREP_ERROR("snprintf error: %d, skipping dump.", len); + DBUG_VOID_RETURN; + } + + int len1= snprintf(filename, len+1, "%s/GRA_%lld_%lld_v2.log", + wsrep_data_home_dir, (longlong) thd->thread_id, + thd_trx_seqno); + + if (len > len1) + { + WSREP_ERROR("RBR dump path truncated: %d, skipping dump.", len); + my_free(filename); + DBUG_VOID_RETURN; + } + + if ((file= mysql_file_open(key_file_wsrep_gra_log, filename, + O_RDWR | O_CREAT | O_BINARY, MYF(MY_WME))) < 0) + { + WSREP_ERROR("Failed to open file '%s' : %d (%s)", + filename, errno, strerror(errno)); + goto cleanup1; + } + + if (init_io_cache(&cache, file, 0, WRITE_CACHE, 0, 0, MYF(MY_WME | MY_NABP))) + { + goto cleanup2; + } + + if (my_b_safe_write(&cache, BINLOG_MAGIC, BIN_LOG_HEADER_SIZE)) + { + goto cleanup2; + } + + /* + Instantiate an FDLE object for non-wsrep threads (to be written + to the dump file). + */ + ev= (thd->wsrep_applier) ? wsrep_get_apply_format(thd) : + (new Format_description_log_event(4)); + + if (writer.write(ev) || my_b_write(&cache, (uchar*)rbr_buf, buf_len) || + flush_io_cache(&cache)) + { + WSREP_ERROR("Failed to write to '%s'.", filename); + goto cleanup2; + } + +cleanup2: + end_io_cache(&cache); + +cleanup1: + my_free(filename); + mysql_file_close(file, MYF(MY_WME)); + + if (!thd->wsrep_applier) delete ev; + + DBUG_VOID_RETURN; +} + +int wsrep_write_skip_event(THD* thd) +{ + DBUG_ENTER("wsrep_write_skip_event"); + Ignorable_log_event skip_event(thd); + int ret= mysql_bin_log.write_event(&skip_event); + if (ret) + { + WSREP_WARN("wsrep_write_skip_event: write to binlog failed: %d", ret); + } + if (!ret && (ret= trans_commit_stmt(thd))) + { + WSREP_WARN("wsrep_write_skip_event: statt commit failed"); + } + DBUG_RETURN(ret); +} + +int wsrep_write_dummy_event_low(THD *thd, const char *msg) +{ + ::abort(); + return 0; +} + +int wsrep_write_dummy_event(THD *orig_thd, const char *msg) +{ + return 0; +} + +bool wsrep_commit_will_write_binlog(THD *thd) +{ + return (!wsrep_emulate_bin_log && /* binlog enabled*/ + (wsrep_thd_is_local(thd) || /* local thd*/ + (thd->wsrep_applier_service && /* applier and log-slave-updates */ + opt_log_slave_updates))); +} + +/* + The last THD/commit_for_wait registered for group commit. +*/ +static wait_for_commit *commit_order_tail= NULL; + +void wsrep_register_for_group_commit(THD *thd) +{ + DBUG_ENTER("wsrep_register_for_group_commit"); + if (wsrep_emulate_bin_log) + { + /* Binlog is off, no need to maintain group commit queue */ + DBUG_VOID_RETURN; + } + + DBUG_ASSERT(thd->wsrep_trx().ordered()); + + wait_for_commit *wfc= thd->wait_for_commit_ptr= &thd->wsrep_wfc; + + mysql_mutex_lock(&LOCK_wsrep_group_commit); + if (commit_order_tail) + { + wfc->register_wait_for_prior_commit(commit_order_tail); + } + commit_order_tail= thd->wait_for_commit_ptr; + mysql_mutex_unlock(&LOCK_wsrep_group_commit); + + /* + Now we have queued for group commit. If the commit will go + through TC log_and_order(), the commit ordering is done + by TC group commit. Otherwise the wait for prior + commits to complete is done in ha_commit_one_phase(). + */ + DBUG_VOID_RETURN; +} + +void wsrep_unregister_from_group_commit(THD *thd) +{ + DBUG_ASSERT(thd->wsrep_trx().ordered()); + wait_for_commit *wfc= thd->wait_for_commit_ptr; + + if (wfc) + { + mysql_mutex_lock(&LOCK_wsrep_group_commit); + wfc->unregister_wait_for_prior_commit(); + thd->wakeup_subsequent_commits(0); + + /* The last one queued for group commit has completed commit, it is + safe to set tail to NULL. */ + if (wfc == commit_order_tail) + commit_order_tail= NULL; + mysql_mutex_unlock(&LOCK_wsrep_group_commit); + thd->wait_for_commit_ptr= NULL; + } +} diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h new file mode 100644 index 00000000..252fbe60 --- /dev/null +++ b/sql/wsrep_binlog.h @@ -0,0 +1,103 @@ +/* Copyright (C) 2013 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#ifndef WSREP_BINLOG_H +#define WSREP_BINLOG_H + +#include "my_global.h" +#include "sql_class.h" // THD, IO_CACHE + +#define HEAP_PAGE_SIZE 65536 /* 64K */ +#define WSREP_MAX_WS_SIZE 2147483647 /* 2GB */ + +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len); + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + @param len total amount of data written + @return wsrep error status + */ +int wsrep_write_cache(THD* thd, + IO_CACHE* cache, + size_t* len); + +/* Dump replication buffer to disk */ +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len); + +/* Dump replication buffer along with header to a file */ +void wsrep_dump_rbr_buf_with_header(THD *thd, const void *rbr_buf, + size_t buf_len); + +/** + Write a skip event into binlog. + + @param thd Thread object pointer + @return Zero in case of success, non-zero on failure. +*/ +int wsrep_write_skip_event(THD* thd); + +/* + Write dummy event into binlog in place of unused GTID. + The binlog write is done in thd context. +*/ +int wsrep_write_dummy_event_low(THD *thd, const char *msg); +/* + Write dummy event to binlog in place of unused GTID and + commit. The binlog write and commit are done in temporary + thd context, the original thd state is not altered. +*/ +int wsrep_write_dummy_event(THD* thd, const char *msg); + +void wsrep_register_binlog_handler(THD *thd, bool trx); + +/** + Return true if committing THD will write to binlog during commit. + This is the case for: + - Local THD, binlog is open + - Replaying THD, binlog is open + - Applier THD, log-slave-updates is enabled +*/ +bool wsrep_commit_will_write_binlog(THD *thd); + +/** + Register THD for group commit. The wsrep_trx must be in committing state, + i.e. the call must be done after wsrep_before_commit() but before + commit order is released. + + This call will release commit order critical section if it is + determined that the commit will go through binlog group commit. + */ +void wsrep_register_for_group_commit(THD *thd); + +/** + Deregister THD from group commit. The wsrep_trx must be in committing state, + as for wsrep_register_for_group_commit() above. + + This call must be used only for THDs which will not go through + binlog group commit. +*/ +void wsrep_unregister_from_group_commit(THD *thd); + +#endif /* WSREP_BINLOG_H */ diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc new file mode 100644 index 00000000..b17a5f16 --- /dev/null +++ b/sql/wsrep_check_opts.cc @@ -0,0 +1,102 @@ +/* Copyright 2011 Codership Oy + Copyright 2014 SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "mysqld.h" +#include "sys_vars_shared.h" +#include "wsrep.h" +#include "wsrep_sst.h" +#include "wsrep_mysqld.h" + +extern char *my_bind_addr_str; + +int wsrep_check_opts() +{ + if (wsrep_slave_threads > 1) + { + sys_var *autoinc_lock_mode= + intern_find_sys_var(STRING_WITH_LEN("innodb_autoinc_lock_mode")); + bool is_null; + if (autoinc_lock_mode && + autoinc_lock_mode->val_int(&is_null, 0, OPT_GLOBAL, 0) != 2) + { + WSREP_ERROR("Parallel applying (wsrep_slave_threads > 1) requires" + " innodb_autoinc_lock_mode= 2."); + return 1; + } + } + + if (locked_in_memory) + { + WSREP_ERROR("Memory locking is not supported (locked_in_memory=ON)"); + return 1; + } + + if (!strcasecmp(wsrep_sst_method, "mysqldump")) + { + if (my_bind_addr_str && + (!strcasecmp(my_bind_addr_str, "127.0.0.1") || + !strcasecmp(my_bind_addr_str, "localhost"))) + { + WSREP_WARN("wsrep_sst_method is set to 'mysqldump' yet " + "mysqld bind_address is set to '%s', which makes it " + "impossible to receive state transfer from another " + "node, since mysqld won't accept such connections. " + "If you wish to use mysqldump state transfer method, " + "set bind_address to allow mysql client connections " + "from other cluster members (e.g. 0.0.0.0).", + my_bind_addr_str); + } + } + else + { + // non-mysqldump SST requires wsrep_cluster_address on startup + if (!wsrep_cluster_address_exists()) + { + WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be " + "configured on startup.", wsrep_sst_method); + return 1; + } + } + + if (strcasecmp(wsrep_sst_receive_address, "AUTO")) + { + if (!strncasecmp(wsrep_sst_receive_address, STRING_WITH_LEN("127.0.0.1")) || + !strncasecmp(wsrep_sst_receive_address, STRING_WITH_LEN("localhost"))) + { + WSREP_WARN("wsrep_sst_receive_address is set to '%s' which " + "makes it impossible for another host to reach this " + "one. Please set it to the address which this node " + "can be connected at by other cluster members.", + wsrep_sst_receive_address); + } + } + + if (strcasecmp(wsrep_provider, "NONE")) + { + if (global_system_variables.binlog_format != BINLOG_FORMAT_ROW) + { + WSREP_ERROR("Only binlog_format= 'ROW' is currently supported. " + "Configured value: '%s'. Please adjust your " + "configuration.", + binlog_format_names[global_system_variables.binlog_format]); + + return 1; + } + } + return 0; +} + diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc new file mode 100644 index 00000000..d3b4a181 --- /dev/null +++ b/sql/wsrep_client_service.cc @@ -0,0 +1,380 @@ +/* Copyright 2018-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "wsrep_client_service.h" +#include "wsrep_high_priority_service.h" +#include "wsrep_binlog.h" /* wsrep_dump_rbr_buf() */ +#include "wsrep_schema.h" /* remove_fragments() */ +#include "wsrep_thd.h" +#include "wsrep_xid.h" +#include "wsrep_trans_observer.h" +#include "wsrep_server_state.h" +#include "wsrep_mysqld.h" + +#include "sql_base.h" /* close_temporary_table() */ +#include "sql_class.h" /* THD */ +#include "sql_parse.h" /* stmt_causes_implicit_commit() */ +#include "rpl_filter.h" /* binlog_filter */ +#include "rpl_rli.h" /* Relay_log_info */ +#include "slave.h" /* opt_log_slave_updates */ +#include "transaction.h" /* trans_commit()... */ +#include "log.h" /* stmt_has_updated_trans_table() */ +#include "mysql/service_debug_sync.h" +#include "mysql/psi/mysql_thread.h" /* mysql_mutex_assert_owner() */ + +namespace +{ + +void debug_sync_caller(THD* thd, const char* sync_point) +{ +#ifdef ENABLED_DEBUG_SYNC_OUT + debug_sync_set_action(thd, sync_point, strlen(sync_point)); +#endif +#ifdef ENABLED_DEBUG_SYNC + if (debug_sync_service) debug_sync_service(thd,sync_point,strlen(sync_point)); +#endif + +} +} + +Wsrep_client_service::Wsrep_client_service(THD* thd, + Wsrep_client_state& client_state) + : wsrep::client_service() + , m_thd(thd) + , m_client_state(client_state) +{ } + +void Wsrep_client_service::store_globals() +{ + wsrep_store_threadvars(m_thd); +} + +void Wsrep_client_service::reset_globals() +{ + wsrep_reset_threadvars(m_thd); +} + +bool Wsrep_client_service::interrupted( + wsrep::unique_lock& lock WSREP_UNUSED) const +{ + DBUG_ASSERT(m_thd == current_thd); + mysql_mutex_assert_owner(static_cast(lock.mutex()->native())); + bool ret= (m_thd->killed != NOT_KILLED); + if (ret) + { + WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d", + m_thd->killed, m_thd->wsrep_trx().state()); + } + return ret; +} + +int Wsrep_client_service::prepare_data_for_replication() +{ + DBUG_ASSERT(m_thd == current_thd); + DBUG_ENTER("Wsrep_client_service::prepare_data_for_replication"); + size_t data_len= 0; + IO_CACHE* transactional_cache= wsrep_get_cache(m_thd, true); + IO_CACHE* stmt_cache= wsrep_get_cache(m_thd, false); + + if (transactional_cache || stmt_cache) + { + m_thd->binlog_flush_pending_rows_event(true); + + size_t transactional_data_len= 0; + size_t stmt_data_len= 0; + + // Write transactional cache + if (transactional_cache && + wsrep_write_cache(m_thd, transactional_cache, &transactional_data_len)) + { + WSREP_ERROR("rbr write fail, data_len: %zu", + data_len); + // wsrep_override_error(m_thd, ER_ERROR_DURING_COMMIT); + DBUG_RETURN(1); + } + + // Write stmt cache + if (stmt_cache && wsrep_write_cache(m_thd, stmt_cache, &stmt_data_len)) + { + WSREP_ERROR("rbr write fail, data_len: %zu", + data_len); + // wsrep_override_error(m_thd, ER_ERROR_DURING_COMMIT); + DBUG_RETURN(1); + } + + // Complete data written from both caches + data_len = transactional_data_len + stmt_data_len; + } + + if (data_len == 0) + { + if (m_thd->get_stmt_da()->is_ok() && + m_thd->get_stmt_da()->affected_rows() > 0 && + !binlog_filter->is_on() && + !m_thd->wsrep_trx().is_streaming()) + { + WSREP_DEBUG("empty rbr buffer, query: %s, " + "affected rows: %llu, " + "changed tables: %d, " + "sql_log_bin: %d", + wsrep_thd_query(m_thd), + m_thd->get_stmt_da()->affected_rows(), + stmt_has_updated_trans_table(m_thd), + m_thd->variables.sql_log_bin); + } + else + { + WSREP_DEBUG("empty rbr buffer, query: %s", wsrep_thd_query(m_thd)); + } + } + DBUG_RETURN(0); +} + + +void Wsrep_client_service::cleanup_transaction() +{ + DBUG_ASSERT(m_thd == current_thd); + if (WSREP_EMULATE_BINLOG(m_thd)) wsrep_thd_binlog_trx_reset(m_thd); + m_thd->wsrep_affected_rows= 0; +} + + +int Wsrep_client_service::prepare_fragment_for_replication( + wsrep::mutable_buffer& buffer, size_t& log_position) +{ + DBUG_ASSERT(m_thd == current_thd); + THD* thd= m_thd; + DBUG_ENTER("Wsrep_client_service::prepare_fragment_for_replication"); + IO_CACHE* cache= wsrep_get_cache(thd, true); + thd->binlog_flush_pending_rows_event(true); + + if (!cache) + { + DBUG_RETURN(0); + } + + const my_off_t saved_pos(my_b_tell(cache)); + if (reinit_io_cache(cache, READ_CACHE, thd->wsrep_sr().log_position(), 0, 0)) + { + DBUG_RETURN(1); + } + + int ret= 0; + size_t total_length= 0; + size_t length= my_b_bytes_in_cache(cache); + + if (!length) + { + length= my_b_fill(cache); + } + + if (length > 0) + { + do + { + total_length+= length; + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + ret= 1; + goto cleanup; + } + + buffer.push_back(reinterpret_cast(cache->read_pos), + reinterpret_cast(cache->read_pos + length)); + cache->read_pos= cache->read_end; + } + while (cache->file >= 0 && (length= my_b_fill(cache))); + } + DBUG_ASSERT(total_length == buffer.size()); + log_position= saved_pos; +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("Failed to reinitialize IO cache"); + ret= 1; + } + DBUG_RETURN(ret); +} + +int Wsrep_client_service::remove_fragments() +{ + DBUG_ENTER("Wsrep_client_service::remove_fragments"); + DEBUG_SYNC(m_thd, "wsrep_before_fragment_removal"); + if (wsrep_schema->remove_fragments(m_thd, + Wsrep_server_state::instance().id(), + m_thd->wsrep_trx().id(), + m_thd->wsrep_sr().fragments())) + { + WSREP_DEBUG("Failed to remove fragments from SR storage for transaction " + "%llu, %llu", + m_thd->thread_id, m_thd->wsrep_trx().id().get()); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +bool Wsrep_client_service::statement_allowed_for_streaming() const +{ + /* + Todo: Decide if implicit commit is allowed with streaming + replication. + !stmt_causes_implicit_commit(m_thd, CF_IMPLICIT_COMMIT_BEGIN); + */ + return true; +} + +size_t Wsrep_client_service::bytes_generated() const +{ + IO_CACHE* cache= wsrep_get_cache(m_thd, true); + if (cache) + { + size_t pending_rows_event_length= 0; + if (Rows_log_event* ev= m_thd->binlog_get_pending_rows_event(true)) + { + pending_rows_event_length= ev->get_data_size(); + } + return my_b_tell(cache) + pending_rows_event_length; + } + return 0; +} + +void Wsrep_client_service::will_replay() +{ + DBUG_ASSERT(m_thd == current_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + ++wsrep_replaying; + mysql_mutex_unlock(&LOCK_wsrep_replaying); +} + +void Wsrep_client_service::signal_replayed() +{ + DBUG_ASSERT(m_thd == current_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + --wsrep_replaying; + DBUG_ASSERT(wsrep_replaying >= 0); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); +} + +enum wsrep::provider::status Wsrep_client_service::replay() +{ + + DBUG_ASSERT(m_thd == current_thd); + DBUG_ENTER("Wsrep_client_service::replay"); + + /* + Allocate separate THD for replaying to avoid tampering + original THD state during replication event applying. + */ + THD *replayer_thd= new THD(true, true); + replayer_thd->thread_stack= m_thd->thread_stack; + replayer_thd->real_id= pthread_self(); + replayer_thd->prior_thr_create_utime= + replayer_thd->start_utime= microsecond_interval_timer(); + replayer_thd->set_command(COM_SLEEP); + replayer_thd->reset_for_next_command(true); + + enum wsrep::provider::status ret; + { + Wsrep_replayer_service replayer_service(replayer_thd, m_thd); + wsrep::provider& provider(replayer_thd->wsrep_cs().provider()); + ret= provider.replay(replayer_thd->wsrep_trx().ws_handle(), + &replayer_service); + replayer_service.replay_status(ret); + } + + delete replayer_thd; + DBUG_RETURN(ret); +} + +enum wsrep::provider::status Wsrep_client_service::replay_unordered() +{ + DBUG_ASSERT(0); + return wsrep::provider::error_not_implemented; +} + +void Wsrep_client_service::wait_for_replayers(wsrep::unique_lock& lock) +{ + DBUG_ASSERT(m_thd == current_thd); + lock.unlock(); + mysql_mutex_lock(&LOCK_wsrep_replaying); + /* We need to check if the THD is BF aborted during condition wait. + Because the aborter does not know which condition this thread is waiting, + use timed wait and check if the THD is BF aborted in the loop. */ + while (wsrep_replaying > 0 && !wsrep_is_bf_aborted(m_thd)) + { + struct timespec wait_time; + set_timespec_nsec(wait_time, 10000000L); + mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying, + &wait_time); + } + mysql_mutex_unlock(&LOCK_wsrep_replaying); + lock.lock(); +} + +enum wsrep::provider::status Wsrep_client_service::commit_by_xid() +{ + DBUG_ASSERT(0); + return wsrep::provider::error_not_implemented; +} + +void Wsrep_client_service::debug_sync(const char* sync_point) +{ + DBUG_ASSERT(m_thd == current_thd); + debug_sync_caller(m_thd, sync_point); +} + +void Wsrep_client_service::debug_crash(const char* crash_point) +{ + // DBUG_ASSERT(m_thd == current_thd); + DBUG_EXECUTE_IF(crash_point, DBUG_SUICIDE(); ); +} + +int Wsrep_client_service::bf_rollback() +{ + DBUG_ASSERT(m_thd == current_thd); + DBUG_ENTER("Wsrep_client_service::bf_rollback"); + + int ret= (trans_rollback_stmt(m_thd) || trans_rollback(m_thd)); + + WSREP_DEBUG("::bf_rollback() thread: %lu, client_state %s " + "client_mode %s trans_state %s killed %d", + thd_get_thread_id(m_thd), + wsrep_thd_client_state_str(m_thd), + wsrep_thd_client_mode_str(m_thd), + wsrep_thd_transaction_state_str(m_thd), + m_thd->killed); + + /* If client is quiting all below will be done in THD::cleanup() + TODO: why we need this any other case? */ + if (m_thd->wsrep_cs().state() != wsrep::client_state::s_quitting) + { + if (m_thd->locked_tables_mode && m_thd->lock) + { + if (m_thd->locked_tables_list.unlock_locked_tables(m_thd)) + ret= 1; + m_thd->variables.option_bits&= ~OPTION_TABLE_LOCK; + } + if (m_thd->global_read_lock.is_acquired()) + { + m_thd->global_read_lock.unlock_global_read_lock(m_thd); + } + m_thd->release_transactional_locks(); + } + + DBUG_RETURN(ret); +} diff --git a/sql/wsrep_client_service.h b/sql/wsrep_client_service.h new file mode 100644 index 00000000..253d2f43 --- /dev/null +++ b/sql/wsrep_client_service.h @@ -0,0 +1,74 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/** @file wsrep_client_service.h + + This file provides declaratios for client service implementation. + See wsrep/client_service.hpp for interface documentation. +*/ + +#ifndef WSREP_CLIENT_SERVICE_H +#define WSREP_CLIENT_SERVICE_H + +/* wsrep-lib */ +#include "wsrep/client_service.hpp" +#include "wsrep/client_state.hpp" +#include "wsrep/exception.hpp" /* not_implemented_error, remove when finished */ + +class THD; +class Wsrep_client_state; +class Wsrep_high_priority_context; + +class Wsrep_client_service : public wsrep::client_service +{ +public: + Wsrep_client_service(THD*, Wsrep_client_state&); + + bool interrupted(wsrep::unique_lock&) const; + void reset_globals(); + void store_globals(); + int prepare_data_for_replication(); + void cleanup_transaction(); + bool statement_allowed_for_streaming() const; + size_t bytes_generated() const; + int prepare_fragment_for_replication(wsrep::mutable_buffer&, size_t&); + int remove_fragments(); + void emergency_shutdown() + { throw wsrep::not_implemented_error(); } + void will_replay(); + void signal_replayed(); + enum wsrep::provider::status replay(); + enum wsrep::provider::status replay_unordered(); + void wait_for_replayers(wsrep::unique_lock&); + enum wsrep::provider::status commit_by_xid(); + bool is_explicit_xa() + { + return false; + } + bool is_xa_rollback() + { + return false; + } + void debug_sync(const char*); + void debug_crash(const char*); + int bf_rollback(); +private: + friend class Wsrep_server_service; + THD* m_thd; + Wsrep_client_state& m_client_state; +}; + + +#endif /* WSREP_CLIENT_SERVICE_H */ diff --git a/sql/wsrep_client_state.h b/sql/wsrep_client_state.h new file mode 100644 index 00000000..403bfa81 --- /dev/null +++ b/sql/wsrep_client_state.h @@ -0,0 +1,47 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_CLIENT_STATE_H +#define WSREP_CLIENT_STATE_H + +/* wsrep-lib */ +#include "wsrep/client_state.hpp" +#include "my_global.h" + +class THD; + +class Wsrep_client_state : public wsrep::client_state +{ +public: + Wsrep_client_state(THD* thd, + wsrep::mutex& mutex, + wsrep::condition_variable& cond, + wsrep::server_state& server_state, + wsrep::client_service& client_service, + const wsrep::client_id& id) + : wsrep::client_state(mutex, + cond, + server_state, + client_service, + id, + wsrep::client_state::m_local) + , m_thd(thd) + { } + THD* thd() { return m_thd; } +private: + THD* m_thd; +}; + +#endif /* WSREP_CLIENT_STATE_H */ diff --git a/sql/wsrep_condition_variable.h b/sql/wsrep_condition_variable.h new file mode 100644 index 00000000..d9798bb9 --- /dev/null +++ b/sql/wsrep_condition_variable.h @@ -0,0 +1,53 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_CONDITION_VARIABLE_H +#define WSREP_CONDITION_VARIABLE_H + +/* wsrep-lib */ +#include "wsrep/condition_variable.hpp" + +/* implementation */ +#include "my_pthread.h" + +class Wsrep_condition_variable : public wsrep::condition_variable +{ +public: + + Wsrep_condition_variable(mysql_cond_t* cond) + : m_cond(cond) + { } + ~Wsrep_condition_variable() = default; + + void notify_one() + { + mysql_cond_signal(m_cond); + } + + void notify_all() + { + mysql_cond_broadcast(m_cond); + } + + void wait(wsrep::unique_lock& lock) + { + mysql_mutex_t* mutex= static_cast(lock.mutex()->native()); + mysql_cond_wait(m_cond, mutex); + } +private: + mysql_cond_t* m_cond; +}; + +#endif /* WSREP_CONDITION_VARIABLE_H */ diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc new file mode 100644 index 00000000..e1508884 --- /dev/null +++ b/sql/wsrep_dummy.cc @@ -0,0 +1,169 @@ +/* Copyright (C) 2014, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#include "mariadb.h" +#include +#include + +my_bool wsrep_thd_is_BF(const THD *, my_bool) +{ return 0; } + +int wsrep_is_wsrep_xid(const void* xid) +{ return 0; } + +long long wsrep_xid_seqno(const XID* x) +{ return -1; } + +const unsigned char* wsrep_xid_uuid(const XID*) +{ + static const unsigned char uuid[16]= {0}; + return uuid; +} + +bool wsrep_prepare_key_for_innodb(THD* thd, const uchar*, size_t, const uchar*, size_t, struct wsrep_buf*, size_t*) +{ return 1; } + +bool wsrep_prepare_key(const uchar*, size_t, const uchar*, size_t, struct wsrep_buf*, size_t*) +{ return 0; } + +struct wsrep *get_wsrep() +{ return 0; } + +my_bool get_wsrep_recovery() +{ return 0; } + +bool wsrep_consistency_check(THD *) +{ return 0; } + +void wsrep_lock_rollback() +{ } + +my_bool wsrep_on(const THD *) +{ return 0; } + +void wsrep_thd_LOCK(const THD *) +{ } + +int wsrep_thd_TRYLOCK(const THD *) +{ + return 0; +} + +void wsrep_thd_UNLOCK(const THD *) +{ } + +void wsrep_thd_kill_LOCK(const THD *) +{ } + +void wsrep_thd_kill_UNLOCK(const THD *) +{ } + +const char *wsrep_thd_conflict_state_str(THD *) +{ return 0; } + +const char *wsrep_thd_exec_mode_str(THD *) +{ return NULL; } + +const char *wsrep_thd_query(const THD *) +{ return "NULL"; } + +const char *wsrep_thd_query_state_str(THD *) +{ return 0; } + +int wsrep_thd_retry_counter(const THD *) +{ return 0; } + +bool wsrep_thd_ignore_table(THD *) +{ return 0; } + +long long wsrep_thd_trx_seqno(const THD *) +{ return -1; } + +my_bool wsrep_thd_is_aborting(const THD *) +{ return 0; } + +void wsrep_set_data_home_dir(const char *) +{ } + +my_bool wsrep_thd_is_local(const THD *) +{ return 0; } + +void wsrep_thd_self_abort(THD *) +{ } + +int wsrep_thd_append_key(THD *, const struct wsrep_key*, int, enum Wsrep_service_key_type) +{ return 0; } + +int wsrep_thd_append_table_key(THD *, const char*, const char*, enum Wsrep_service_key_type) +{ return 0; } + +my_bool wsrep_thd_is_local_transaction(const THD*) +{ return 0; } + +const char* wsrep_thd_client_state_str(const THD*) +{ return 0; } + +const char* wsrep_thd_client_mode_str(const THD*) +{ return 0; } + +const char* wsrep_thd_transaction_state_str(const THD*) +{ return 0; } + +query_id_t wsrep_thd_transaction_id(const THD *) +{ return 0; } + +my_bool wsrep_thd_bf_abort(THD *, THD *, my_bool) +{ return 0; } + +my_bool wsrep_thd_order_before(const THD*, const THD *) +{ return 0; } + +void wsrep_handle_SR_rollback(THD*, THD*) +{ } + +my_bool wsrep_thd_skip_locking(const THD*) +{ return 0;} + +const char* wsrep_get_sr_table_name() +{ return 0; } + +my_bool wsrep_get_debug() +{ return 0;} + +void wsrep_commit_ordered(THD* ) +{ } + +void wsrep_log(void (*)(const char *, ...), const char *, ...) +{ +} + +my_bool wsrep_thd_is_applying(const THD*) +{ return 0;} + +my_bool wsrep_thd_has_ignored_error(const THD*) +{ return 0;} + +void wsrep_thd_set_ignored_error(THD*, my_bool) +{ } +ulong wsrep_OSU_method_get(const THD*) +{ return 0;} + +void wsrep_report_bf_lock_wait(const THD*, + unsigned long long) +{} + +void wsrep_thd_set_PA_unsafe(THD*) +{} + diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc new file mode 100644 index 00000000..fb93273d --- /dev/null +++ b/sql/wsrep_high_priority_service.cc @@ -0,0 +1,771 @@ +/* Copyright 2018-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "wsrep_high_priority_service.h" +#include "wsrep_applier.h" +#include "wsrep_binlog.h" +#include "wsrep_schema.h" +#include "wsrep_xid.h" +#include "wsrep_trans_observer.h" +#include "wsrep_server_state.h" + +#include "sql_class.h" /* THD */ +#include "transaction.h" +#include "debug_sync.h" +/* RLI */ +#include "rpl_rli.h" +#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1 +#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2 +#include "slave.h" +#include "rpl_mi.h" + +namespace +{ +/* + Scoped mode for applying non-transactional write sets (TOI) + */ +class Wsrep_non_trans_mode +{ +public: + Wsrep_non_trans_mode(THD* thd, const wsrep::ws_meta& ws_meta) + : m_thd(thd) + , m_option_bits(thd->variables.option_bits) + , m_server_status(thd->server_status) + { + m_thd->variables.option_bits&= ~OPTION_BEGIN; + m_thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + m_thd->wsrep_cs().enter_toi_mode(ws_meta); + } + ~Wsrep_non_trans_mode() + { + m_thd->variables.option_bits= m_option_bits; + m_thd->server_status= m_server_status; + m_thd->wsrep_cs().leave_toi_mode(); + } +private: + Wsrep_non_trans_mode(const Wsrep_non_trans_mode&); + Wsrep_non_trans_mode& operator=(const Wsrep_non_trans_mode&); + THD* m_thd; + ulonglong m_option_bits; + uint m_server_status; +}; +} + +static rpl_group_info* wsrep_relay_group_init(THD* thd, const char* log_fname) +{ + Relay_log_info* rli= new Relay_log_info(false); + + if (!rli->relay_log.description_event_for_exec) + { + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + } + + static LEX_CSTRING connection_name= { STRING_WITH_LEN("wsrep") }; + + /* + Master_info's constructor initializes rpl_filter by either an already + constructed Rpl_filter object from global 'rpl_filters' list if the + specified connection name is same, or it constructs a new Rpl_filter + object and adds it to rpl_filters. This object is later destructed by + Mater_info's destructor by looking it up based on connection name in + rpl_filters list. + + However, since all Master_info objects created here would share same + connection name ("wsrep"), destruction of any of the existing Master_info + objects (in wsrep_return_from_bf_mode()) would free rpl_filter referenced + by any/all existing Master_info objects. + + In order to avoid that, we have added a check in Master_info's destructor + to not free the "wsrep" rpl_filter. It will eventually be freed by + free_all_rpl_filters() when server terminates. + */ + rli->mi= new Master_info(&connection_name, false); + + struct rpl_group_info *rgi= new rpl_group_info(rli); + rgi->thd= rli->sql_driver_thd= thd; + + if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) + { + rgi->deferred_events= new Deferred_log_events(rli); + } + + return rgi; +} + +static void wsrep_setup_uk_and_fk_checks(THD* thd) +{ + /* Tune FK and UK checking policy. These are reset back to original + in Wsrep_high_priority_service destructor. */ + if (wsrep_slave_UK_checks == FALSE) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + if (wsrep_slave_FK_checks == FALSE) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; +} + +static int apply_events(THD* thd, + Relay_log_info* rli, + const wsrep::const_buffer& data, + wsrep::mutable_buffer& err) +{ + int const ret= wsrep_apply_events(thd, rli, data.data(), data.size()); + if (ret || wsrep_thd_has_ignored_error(thd)) + { + if (ret) + { + wsrep_store_error(thd, err); + } + wsrep_dump_rbr_buf_with_header(thd, data.data(), data.size()); + } + return ret; +} + +/**************************************************************************** + High priority service +*****************************************************************************/ + +Wsrep_high_priority_service::Wsrep_high_priority_service(THD* thd) + : wsrep::high_priority_service(Wsrep_server_state::instance()) + , wsrep::high_priority_context(thd->wsrep_cs()) + , m_thd(thd) + , m_rli() +{ + LEX_CSTRING db_str= { NULL, 0 }; + m_shadow.option_bits = thd->variables.option_bits; + m_shadow.server_status= thd->server_status; + m_shadow.vio = thd->net.vio; + m_shadow.tx_isolation = thd->variables.tx_isolation; + m_shadow.db = (char *)thd->db.str; + m_shadow.db_length = thd->db.length; + m_shadow.user_time = thd->user_time; + m_shadow.row_count_func= thd->get_row_count_func(); + m_shadow.wsrep_applier= thd->wsrep_applier; + + /* Disable general logging on applier threads */ + thd->variables.option_bits |= OPTION_LOG_OFF; + + /* enable binlogging regardless of log_slave_updates setting + this is for ensuring that both local and applier transaction go through + same commit ordering algorithm in group commit control + */ + thd->variables.option_bits|= OPTION_BIN_LOG; + + thd->net.vio= 0; + thd->reset_db(&db_str); + thd->clear_error(); + thd->variables.tx_isolation= ISO_READ_COMMITTED; + thd->tx_isolation = ISO_READ_COMMITTED; + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + /* Make THD wsrep_applier so that it cannot be killed */ + thd->wsrep_applier= true; + + if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init(thd, "wsrep_relay"); + + m_rgi= thd->wsrep_rgi; + m_rgi->thd= thd; + m_rli= m_rgi->rli; + thd_proc_info(thd, "wsrep applier idle"); +} + +Wsrep_high_priority_service::~Wsrep_high_priority_service() +{ + THD* thd= m_thd; + thd->variables.option_bits = m_shadow.option_bits; + thd->server_status = m_shadow.server_status; + thd->net.vio = m_shadow.vio; + thd->variables.tx_isolation= m_shadow.tx_isolation; + LEX_CSTRING db_str= { m_shadow.db, m_shadow.db_length }; + thd->reset_db(&db_str); + thd->user_time = m_shadow.user_time; + + if (thd->wsrep_rgi && thd->wsrep_rgi->rli) + delete thd->wsrep_rgi->rli->mi; + if (thd->wsrep_rgi) + delete thd->wsrep_rgi->rli; + delete thd->wsrep_rgi; + thd->wsrep_rgi= NULL; + + thd->set_row_count_func(m_shadow.row_count_func); + thd->wsrep_applier = m_shadow.wsrep_applier; +} + +int Wsrep_high_priority_service::start_transaction( + const wsrep::ws_handle& ws_handle, const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER(" Wsrep_high_priority_service::start_transaction"); + DBUG_RETURN(m_thd->wsrep_cs().start_transaction(ws_handle, ws_meta) || + trans_begin(m_thd)); +} + +const wsrep::transaction& Wsrep_high_priority_service::transaction() const +{ + DBUG_ENTER(" Wsrep_high_priority_service::transaction"); + DBUG_RETURN(m_thd->wsrep_trx()); +} + +int Wsrep_high_priority_service::next_fragment(const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER(" Wsrep_high_priority_service::next_fragment"); + DBUG_RETURN(m_thd->wsrep_cs().next_fragment(ws_meta)); +} + +int Wsrep_high_priority_service::adopt_transaction( + const wsrep::transaction& transaction) +{ + DBUG_ENTER(" Wsrep_high_priority_service::adopt_transaction"); + /* Adopt transaction first to set up transaction meta data for + trans begin. If trans_begin() fails for some reason, roll back + the wsrep transaction before return. */ + m_thd->wsrep_cs().adopt_transaction(transaction); + int ret= trans_begin(m_thd); + if (ret) + { + m_thd->wsrep_cs().before_rollback(); + m_thd->wsrep_cs().after_rollback(); + } + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::append_fragment_and_commit( + const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data, + const wsrep::xid& xid WSREP_UNUSED) +{ + DBUG_ENTER("Wsrep_high_priority_service::append_fragment_and_commit"); + int ret= start_transaction(ws_handle, ws_meta); + /* + Start transaction explicitly to avoid early commit via + trans_commit_stmt() in append_fragment() + */ + ret= ret || trans_begin(m_thd); + ret= ret || wsrep_schema->append_fragment(m_thd, + ws_meta.server_id(), + ws_meta.transaction_id(), + ws_meta.seqno(), + ws_meta.flags(), + data); + + /* + Note: The commit code below seems to be identical to + Wsrep_storage_service::commit(). Consider implementing + common utility function to deal with commit. + */ + const bool do_binlog_commit= (opt_log_slave_updates && + wsrep_gtid_mode && + m_thd->variables.gtid_seq_no); + /* + Write skip event into binlog if gtid_mode is on. This is to + maintain gtid continuity. + */ + if (do_binlog_commit) + { + ret= wsrep_write_skip_event(m_thd); + } + + if (!ret) + { + ret= m_thd->wsrep_cs().prepare_for_ordering(ws_handle, + ws_meta, true); + } + + ret= ret || trans_commit(m_thd); + ret= ret || (m_thd->wsrep_cs().after_applying(), 0); + + m_thd->release_transactional_locks(); + + free_root(m_thd->mem_root, MYF(MY_KEEP_PREALLOC)); + + thd_proc_info(m_thd, "wsrep applier committed"); + + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::remove_fragments(const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::remove_fragments"); + int ret= wsrep_schema->remove_fragments(m_thd, + ws_meta.server_id(), + ws_meta.transaction_id(), + m_thd->wsrep_sr().fragments()); + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::commit(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::commit"); + THD* thd= m_thd; + DBUG_ASSERT(thd->wsrep_trx().active()); + thd->wsrep_cs().prepare_for_ordering(ws_handle, ws_meta, true); + thd_proc_info(thd, "committing"); + int ret=0; + + const bool is_ordered= !ws_meta.seqno().is_undefined(); + + if (!thd->transaction->stmt.is_empty()) + ret= trans_commit_stmt(thd); + + if (ret == 0) + ret= trans_commit(thd); + + if (ret == 0) + { + m_rgi->cleanup_context(thd, 0); + } + + m_thd->release_transactional_locks(); + + thd_proc_info(thd, "wsrep applier committed"); + + if (!is_ordered) + { + m_thd->wsrep_cs().before_rollback(); + m_thd->wsrep_cs().after_rollback(); + } + else if (m_thd->wsrep_trx().state() == wsrep::transaction::s_executing) + { + /* + Wsrep commit was ordered but it did not go through commit time + hooks and remains active. Cycle through commit hooks to release + commit order and to make cleanup happen in after_applying() call. + + This is a workaround for CTAS with empty result set. + */ + WSREP_DEBUG("Commit not finished for applier %llu", thd->thread_id); + ret= ret || m_thd->wsrep_cs().before_commit() || + m_thd->wsrep_cs().ordered_commit() || + m_thd->wsrep_cs().after_commit(); + } + + thd->lex->sql_command= SQLCOM_END; + + free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); + + must_exit_= check_exit_status(); + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::rollback"); + if (ws_meta.ordered()) + { + m_thd->wsrep_cs().prepare_for_ordering(ws_handle, ws_meta, false); + } + else + { + assert(ws_meta == wsrep::ws_meta()); + assert(ws_handle == wsrep::ws_handle()); + } + int ret= (trans_rollback_stmt(m_thd) || trans_rollback(m_thd)); + + WSREP_DEBUG("::rollback() thread: %lu, client_state %s " + "client_mode %s trans_state %s killed %d", + thd_get_thread_id(m_thd), + wsrep_thd_client_state_str(m_thd), + wsrep_thd_client_mode_str(m_thd), + wsrep_thd_transaction_state_str(m_thd), + m_thd->killed); + + m_thd->release_transactional_locks(); + + free_root(m_thd->mem_root, MYF(MY_KEEP_PREALLOC)); + + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::apply_toi(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data, + wsrep::mutable_buffer& err) +{ + DBUG_ENTER("Wsrep_high_priority_service::apply_toi"); + THD* thd= m_thd; + Wsrep_non_trans_mode non_trans_mode(thd, ws_meta); + + wsrep::client_state& client_state(thd->wsrep_cs()); + DBUG_ASSERT(client_state.in_toi()); + + thd_proc_info(thd, "wsrep applier toi"); + + WSREP_DEBUG("Wsrep_high_priority_service::apply_toi: %lld", + client_state.toi_meta().seqno().get()); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_apply_toi", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_apply_toi_reached " + "WAIT_FOR signal.wsrep_apply_toi"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif + + int ret= apply_events(thd, m_rli, data, err); + wsrep_thd_set_ignored_error(thd, false); + trans_commit(thd); + + thd->close_temporary_tables(); + thd->lex->sql_command= SQLCOM_END; + + wsrep_gtid_server.signal_waiters(thd->wsrep_current_gtid_seqno, false); + wsrep_set_SE_checkpoint(client_state.toi_meta().gtid(), wsrep_gtid_server.gtid()); + + must_exit_= check_exit_status(); + + DBUG_RETURN(ret); +} + +void Wsrep_high_priority_service::store_globals() +{ + wsrep_store_threadvars(m_thd); + m_thd->wsrep_cs().acquire_ownership(); +} + +void Wsrep_high_priority_service::reset_globals() +{ + wsrep_reset_threadvars(m_thd); +} + +void Wsrep_high_priority_service::switch_execution_context(wsrep::high_priority_service& orig_high_priority_service) +{ + DBUG_ENTER("Wsrep_high_priority_service::switch_execution_context"); + Wsrep_high_priority_service& + orig_hps= static_cast(orig_high_priority_service); + m_thd->thread_stack= orig_hps.m_thd->thread_stack; + DBUG_VOID_RETURN; +} + +int Wsrep_high_priority_service::log_dummy_write_set(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta, + wsrep::mutable_buffer& err) +{ + DBUG_ENTER("Wsrep_high_priority_service::log_dummy_write_set"); + int ret= 0; + DBUG_PRINT("info", + ("Wsrep_high_priority_service::log_dummy_write_set: seqno=%lld", + ws_meta.seqno().get())); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_log_dummy_write_set", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_log_dummy_write_set_reached "; + DBUG_ASSERT(!debug_sync_set_action(m_thd, + STRING_WITH_LEN(act))); + };); +#endif + + if (ws_meta.ordered()) + { + wsrep::client_state& cs(m_thd->wsrep_cs()); + if (!cs.transaction().active()) + { + cs.start_transaction(ws_handle, ws_meta); + } + adopt_apply_error(err); + WSREP_DEBUG("Log dummy write set %lld", ws_meta.seqno().get()); + ret= cs.provider().commit_order_enter(ws_handle, ws_meta); + if (!(ret && opt_log_slave_updates && wsrep_gtid_mode && + m_thd->variables.gtid_seq_no)) + { + cs.before_rollback(); + cs.after_rollback(); + } + + if (!WSREP_EMULATE_BINLOG(m_thd)) + { + wsrep_register_for_group_commit(m_thd); + /* wait_for_prior_commit() ensures that all preceding transactions + have been committed and seqno has been synced into + storage engine. We don't release commit order here yet to + avoid following transactions to sync seqno before + wsrep_set_SE_checkpoint() below returns. This effectively pauses + group commit for the checkpoint operation, but is the only way to + ensure proper ordering. */ + m_thd->wait_for_prior_commit(); + } + + WSREP_DEBUG("checkpointing dummy write set %lld", ws_meta.seqno().get()); + wsrep_set_SE_checkpoint(ws_meta.gtid(), wsrep_gtid_server.gtid()); + + if (!WSREP_EMULATE_BINLOG(m_thd)) + { + wsrep_unregister_from_group_commit(m_thd); + } + ret= ret || cs.provider().commit_order_leave(ws_handle, ws_meta, err); + cs.after_applying(); + } + DBUG_RETURN(ret); +} + +void Wsrep_high_priority_service::adopt_apply_error(wsrep::mutable_buffer& err) +{ + m_thd->wsrep_cs().adopt_apply_error(err); +} + +void Wsrep_high_priority_service::debug_crash(const char* crash_point) +{ + DBUG_ASSERT(m_thd == current_thd); + DBUG_EXECUTE_IF(crash_point, DBUG_SUICIDE();); +} + +/**************************************************************************** + Applier service +*****************************************************************************/ + +Wsrep_applier_service::Wsrep_applier_service(THD* thd) + : Wsrep_high_priority_service(thd) +{ + thd->wsrep_applier_service= this; + thd->wsrep_cs().open(wsrep::client_id(thd->thread_id)); + thd->wsrep_cs().before_command(); + thd->wsrep_cs().debug_log_level(wsrep_debug); + if (!thd->slave_thread) + thd->system_thread_info.rpl_sql_info= + new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter); +} + +Wsrep_applier_service::~Wsrep_applier_service() +{ + if (!m_thd->slave_thread) + delete m_thd->system_thread_info.rpl_sql_info; + m_thd->wsrep_cs().after_command_before_result(); + m_thd->wsrep_cs().after_command_after_result(); + m_thd->wsrep_cs().close(); + m_thd->wsrep_cs().cleanup(); + m_thd->wsrep_applier_service= NULL; +} + +int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data, + wsrep::mutable_buffer& err) +{ + DBUG_ENTER("Wsrep_applier_service::apply_write_set"); + THD* thd= m_thd; + + thd->variables.option_bits |= OPTION_BEGIN; + thd->variables.option_bits |= OPTION_NOT_AUTOCOMMIT; + DBUG_ASSERT(thd->wsrep_trx().active()); + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_executing); + + thd_proc_info(thd, "applying write set"); + + /* moved dbug sync point here, after possible THD switch for SR transactions + has ben done + */ +#ifdef ENABLED_DEBUG_SYNC + /* Allow tests to block the applier thread using the DBUG facilities */ + DBUG_EXECUTE_IF("sync.wsrep_apply_cb", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_apply_cb_reached " + "WAIT_FOR signal.wsrep_apply_cb"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif /* ENABLED_DEBUG_SYNC */ + + wsrep_setup_uk_and_fk_checks(thd); + int ret= apply_events(thd, m_rli, data, err); + + thd->close_temporary_tables(); + if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) + { + thd->wsrep_cs().fragment_applied(ws_meta.seqno()); + } + thd_proc_info(thd, "wsrep applied write set"); + DBUG_RETURN(ret); +} + +int Wsrep_applier_service::apply_nbo_begin(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data, + wsrep::mutable_buffer& err) +{ + DBUG_ENTER("Wsrep_applier_service::apply_nbo_begin"); + DBUG_RETURN(0); +} + +void Wsrep_applier_service::after_apply() +{ + DBUG_ENTER("Wsrep_applier_service::after_apply"); + wsrep_after_apply(m_thd); + DBUG_VOID_RETURN; +} + +bool Wsrep_applier_service::check_exit_status() const +{ + bool ret= false; + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + if (wsrep_slave_count_change < 0) + { + ++wsrep_slave_count_change; + ret= true; + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + return ret; +} + +/**************************************************************************** + Replayer service +*****************************************************************************/ + +Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd) + : Wsrep_high_priority_service(replayer_thd) + , m_orig_thd(orig_thd) + , m_da_shadow() + , m_replay_status() +{ + /* Response must not have been sent to client */ + DBUG_ASSERT(!orig_thd->get_stmt_da()->is_sent()); + /* PS reprepare observer should have been removed already + open_table() will fail if we have dangling observer here */ + DBUG_ASSERT(!orig_thd->m_reprepare_observer); + /* Replaying should happen always from after_statement() hook + after rollback, which should guarantee that there are no + transactional locks */ + DBUG_ASSERT(!orig_thd->mdl_context.has_transactional_locks()); + + replayer_thd->system_thread_info.rpl_sql_info= + new rpl_sql_thread_info(replayer_thd->wsrep_rgi->rli->mi->rpl_filter); + + /* Make a shadow copy of diagnostics area and reset */ + m_da_shadow.status= orig_thd->get_stmt_da()->status(); + if (m_da_shadow.status == Diagnostics_area::DA_OK) + { + m_da_shadow.affected_rows= orig_thd->get_stmt_da()->affected_rows(); + m_da_shadow.last_insert_id= orig_thd->get_stmt_da()->last_insert_id(); + strmake(m_da_shadow.message, orig_thd->get_stmt_da()->message(), + sizeof(m_da_shadow.message) - 1); + } + orig_thd->get_stmt_da()->reset_diagnostics_area(); + + /* Release explicit locks */ + if (orig_thd->locked_tables_mode && orig_thd->lock) + { + WSREP_WARN("releasing table lock for replaying (%llu)", + orig_thd->thread_id); + orig_thd->locked_tables_list.unlock_locked_tables(orig_thd); + orig_thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + + thd_proc_info(orig_thd, "wsrep replaying trx"); + + /* + Switch execution context to replayer_thd and prepare it for + replay execution. + */ + /* Copy thd vars from orig_thd before reset, otherwise reset + for orig thd clears thread local storage before copy. */ + wsrep_assign_from_threadvars(replayer_thd); + wsrep_reset_threadvars(orig_thd); + wsrep_store_threadvars(replayer_thd); + wsrep_open(replayer_thd); + wsrep_before_command(replayer_thd); + replayer_thd->wsrep_cs().clone_transaction_for_replay(orig_thd->wsrep_trx()); +} + +Wsrep_replayer_service::~Wsrep_replayer_service() +{ + /* Switch execution context back to original. */ + wsrep_after_apply(m_thd); + wsrep_after_command_ignore_result(m_thd); + wsrep_close(m_thd); + wsrep_reset_threadvars(m_thd); + wsrep_store_threadvars(m_orig_thd); + + DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_sent()); + DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_set()); + + delete m_thd->system_thread_info.rpl_sql_info; + m_thd->system_thread_info.rpl_sql_info= nullptr; + + if (m_replay_status == wsrep::provider::success) + { + DBUG_ASSERT(m_thd->wsrep_cs().current_error() == wsrep::e_success); + m_orig_thd->reset_kill_query(); + my_ok(m_orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id); + } + else if (m_replay_status == wsrep::provider::error_certification_failed) + { + wsrep_override_error(m_orig_thd, ER_LOCK_DEADLOCK); + } + else + { + DBUG_ASSERT(0); + WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s", + m_replay_status, + m_orig_thd->db.str, wsrep_thd_query(m_orig_thd)); + unireg_abort(1); + } +} + +int Wsrep_replayer_service::apply_write_set(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data, + wsrep::mutable_buffer& err) +{ + DBUG_ENTER("Wsrep_replayer_service::apply_write_set"); + THD* thd= m_thd; + + DBUG_ASSERT(thd->wsrep_trx().active()); + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_replaying); + +#ifdef ENABLED_DEBUG_SYNC + /* Allow tests to block the replayer thread using the DBUG facilities */ + DBUG_EXECUTE_IF("sync.wsrep_replay_cb", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_replay_cb_reached " + "WAIT_FOR signal.wsrep_replay_cb"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif + + wsrep_setup_uk_and_fk_checks(thd); + + int ret= 0; + if (!wsrep::starts_transaction(ws_meta.flags())) + { + DBUG_ASSERT(thd->wsrep_trx().is_streaming()); + ret= wsrep_schema->replay_transaction(thd, + m_rli, + ws_meta, + thd->wsrep_sr().fragments()); + } + ret= ret || apply_events(thd, m_rli, data, err); + thd->close_temporary_tables(); + if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) + { + thd->wsrep_cs().fragment_applied(ws_meta.seqno()); + } + + thd_proc_info(thd, "wsrep replayed write set"); + DBUG_RETURN(ret); +} diff --git a/sql/wsrep_high_priority_service.h b/sql/wsrep_high_priority_service.h new file mode 100644 index 00000000..c275c352 --- /dev/null +++ b/sql/wsrep_high_priority_service.h @@ -0,0 +1,134 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_HIGH_PRIORITY_SERVICE_H +#define WSREP_HIGH_PRIORITY_SERVICE_H + +#include "wsrep/high_priority_service.hpp" +#include "my_global.h" +#include "sql_error.h" /* Diagnostics area */ +#include "sql_class.h" /* rpl_group_info */ + +class THD; +class Relay_log_info; +class Wsrep_server_service; + +class Wsrep_high_priority_service : + public wsrep::high_priority_service, + public wsrep::high_priority_context +{ +public: + Wsrep_high_priority_service(THD*); + ~Wsrep_high_priority_service(); + int start_transaction(const wsrep::ws_handle&, + const wsrep::ws_meta&); + int next_fragment(const wsrep::ws_meta&); + const wsrep::transaction& transaction() const; + int adopt_transaction(const wsrep::transaction&); + int apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, + wsrep::mutable_buffer&) = 0; + int append_fragment_and_commit(const wsrep::ws_handle&, + const wsrep::ws_meta&, + const wsrep::const_buffer&, + const wsrep::xid&); + int remove_fragments(const wsrep::ws_meta&); + int commit(const wsrep::ws_handle&, const wsrep::ws_meta&); + int rollback(const wsrep::ws_handle&, const wsrep::ws_meta&); + int apply_toi(const wsrep::ws_meta&, const wsrep::const_buffer&, + wsrep::mutable_buffer&); + void store_globals(); + void reset_globals(); + void switch_execution_context(wsrep::high_priority_service&); + int log_dummy_write_set(const wsrep::ws_handle&, + const wsrep::ws_meta&, + wsrep::mutable_buffer&); + void adopt_apply_error(wsrep::mutable_buffer&); + + virtual bool check_exit_status() const = 0; + void debug_crash(const char*); +protected: + friend Wsrep_server_service; + THD* m_thd; + Relay_log_info* m_rli; + rpl_group_info* m_rgi; + struct shadow + { + ulonglong option_bits; + uint server_status; + struct st_vio* vio; + ulong tx_isolation; + char* db; + size_t db_length; + //struct timeval user_time; + my_hrtime_t user_time; + longlong row_count_func; + bool wsrep_applier; + } m_shadow; +}; + +class Wsrep_applier_service : public Wsrep_high_priority_service +{ +public: + Wsrep_applier_service(THD*); + ~Wsrep_applier_service(); + int apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, + wsrep::mutable_buffer&); + int apply_nbo_begin(const wsrep::ws_meta&, const wsrep::const_buffer& data, + wsrep::mutable_buffer& err); + void after_apply(); + bool is_replaying() const { return false; } + bool check_exit_status() const; +}; + +class Wsrep_replayer_service : public Wsrep_high_priority_service +{ +public: + Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd); + ~Wsrep_replayer_service(); + int apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, + wsrep::mutable_buffer&); + int apply_nbo_begin(const wsrep::ws_meta&, const wsrep::const_buffer& data, + wsrep::mutable_buffer& err) + { + DBUG_ASSERT(0); /* DDL should never cause replaying */ + return 0; + } + void after_apply() { } + bool is_replaying() const { return true; } + void replay_status(enum wsrep::provider::status status) + { m_replay_status = status; } + enum wsrep::provider::status replay_status() const + { return m_replay_status; } + /* Replayer should never be forced to exit */ + bool check_exit_status() const { return false; } +private: + THD* m_orig_thd; + struct da_shadow + { + enum Diagnostics_area::enum_diagnostics_status status; + ulonglong affected_rows; + ulonglong last_insert_id; + char message[MYSQL_ERRMSG_SIZE]; + da_shadow() + : status() + , affected_rows() + , last_insert_id() + , message() + { } + } m_da_shadow; + enum wsrep::provider::status m_replay_status; +}; + +#endif /* WSREP_HIGH_PRIORITY_SERVICE_H */ diff --git a/sql/wsrep_mutex.h b/sql/wsrep_mutex.h new file mode 100644 index 00000000..50f129c9 --- /dev/null +++ b/sql/wsrep_mutex.h @@ -0,0 +1,51 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_MUTEX_H +#define WSREP_MUTEX_H + +/* wsrep-lib */ +#include "wsrep/mutex.hpp" + +/* implementation */ +#include "my_global.h" +#include "my_pthread.h" + +class Wsrep_mutex : public wsrep::mutex +{ +public: + Wsrep_mutex(mysql_mutex_t* mutex) + : m_mutex(mutex) + { } + + void lock() + { + mysql_mutex_lock(m_mutex); + } + + void unlock() + { + mysql_mutex_unlock(m_mutex); + } + + void* native() + { + return m_mutex; + } +private: + mysql_mutex_t* m_mutex; +}; + +#endif /* WSREP_MUTEX_H */ diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc new file mode 100644 index 00000000..0a615228 --- /dev/null +++ b/sql/wsrep_mysqld.cc @@ -0,0 +1,3913 @@ +/* Copyright (c) 2008, 2023 Codership Oy + Copyright (c) 2020, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License.x1 + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "sql_plugin.h" /* wsrep_plugins_pre_init() */ +#include "my_global.h" +#include "wsrep_server_state.h" +#include "wsrep_status.h" + +#include "mariadb.h" +#include +#include +#include +#include +#include /* find_temporary_table() */ +#include /* is_stat_table() */ +#include "slave.h" +#include "rpl_mi.h" +#include "sql_repl.h" +#include "rpl_filter.h" +#include "sql_callback.h" +#include "sp_head.h" +#include "sql_show.h" +#include "sp.h" +#include "handler.h" +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" +#include "wsrep_utils.h" +#include "wsrep_var.h" +#include "wsrep_binlog.h" +#include "wsrep_applier.h" +#include "wsrep_schema.h" +#include "wsrep_xid.h" +#include "wsrep_trans_observer.h" +#include "mysql/service_wsrep.h" +#include +#include +#include +#include "log_event.h" +#include "sql_connect.h" +#include "thread_cache.h" +#include "debug_sync.h" + +#include + +/* wsrep-lib */ +Wsrep_server_state* Wsrep_server_state::m_instance; + +my_bool wsrep_emulate_bin_log= FALSE; // activating parts of binlog interface +my_bool wsrep_preordered_opt= FALSE; + +/* Streaming Replication */ +const char *wsrep_fragment_units[]= { "bytes", "rows", "statements", NullS }; +const char *wsrep_SR_store_types[]= { "none", "table", NullS }; + +/* + * Begin configuration options + */ + +extern my_bool plugins_are_initialized; + +/* System variables. */ +const char *wsrep_provider; +const char *wsrep_provider_options; +const char *wsrep_cluster_address; +const char *wsrep_cluster_name; +const char *wsrep_node_name; +const char *wsrep_node_address; +const char *wsrep_node_incoming_address; +const char *wsrep_start_position; +const char *wsrep_data_home_dir; +const char *wsrep_dbug_option; +const char *wsrep_notify_cmd; +const char *wsrep_status_file; +const char *wsrep_allowlist; + +ulong wsrep_debug; // Debug level logging +my_bool wsrep_convert_LOCK_to_trx; // Convert locking sessions to trx +my_bool wsrep_auto_increment_control; // Control auto increment variables +my_bool wsrep_drupal_282555_workaround; // Retry autoinc insert after dupkey +my_bool wsrep_certify_nonPK; // Certify, even when no primary key +ulong wsrep_certification_rules = WSREP_CERTIFICATION_RULES_STRICT; +my_bool wsrep_recovery; // Recovery +my_bool wsrep_log_conflicts; +my_bool wsrep_load_data_splitting= 0; // Commit load data every 10K intervals +my_bool wsrep_slave_UK_checks; // Slave thread does UK checks +my_bool wsrep_slave_FK_checks; // Slave thread does FK checks +my_bool wsrep_restart_slave; // Should mysql slave thread be + // restarted, when node joins back? +my_bool wsrep_desync; // De(re)synchronize the node from the + // cluster +ulonglong wsrep_mode; +bool wsrep_service_started; // If Galera was initialized +long wsrep_slave_threads; // No. of slave appliers threads +ulong wsrep_retry_autocommit; // Retry aborted autocommit trx +ulong wsrep_max_ws_size; // Max allowed ws (RBR buffer) size +ulong wsrep_max_ws_rows; // Max number of rows in ws +ulong wsrep_forced_binlog_format= BINLOG_FORMAT_UNSPEC; +ulong wsrep_mysql_replication_bundle; + +bool wsrep_gtid_mode; // Enable WSREP native GTID support +Wsrep_gtid_server wsrep_gtid_server; +uint wsrep_gtid_domain_id=0; // Domain id on above structure + +/* Other configuration variables and their default values. */ +my_bool wsrep_incremental_data_collection= 0; // Incremental data collection +my_bool wsrep_restart_slave_activated= 0; // Node has dropped, and slave + // restart will be needed +bool wsrep_new_cluster= false; // Bootstrap the cluster? +int wsrep_slave_count_change= 0; // No. of appliers to stop/start +int wsrep_to_isolation= 0; // No. of active TO isolation threads +long wsrep_max_protocol_version= 4; // Maximum protocol version to use +long int wsrep_protocol_version= wsrep_max_protocol_version; +ulong wsrep_trx_fragment_unit= WSREP_FRAG_BYTES; + // unit for fragment size +ulong wsrep_SR_store_type= WSREP_SR_STORE_TABLE; +uint wsrep_ignore_apply_errors= 0; + +std::atomic wsrep_thread_create_failed; + +/* + * End configuration options + */ + +/* + * Cached variables + */ + +// Whether the Galera write-set replication provider is set +// wsrep_provider && strcmp(wsrep_provider, WSREP_NONE) +bool WSREP_PROVIDER_EXISTS_; + +// Whether the Galera write-set replication is enabled +// global_system_variables.wsrep_on && WSREP_PROVIDER_EXISTS_ +bool WSREP_ON_; + +/* + * Other wsrep global variables. + */ + +mysql_mutex_t LOCK_wsrep_ready; +mysql_cond_t COND_wsrep_ready; +mysql_mutex_t LOCK_wsrep_sst; +mysql_cond_t COND_wsrep_sst; +mysql_mutex_t LOCK_wsrep_sst_init; +mysql_cond_t COND_wsrep_sst_init; +mysql_mutex_t LOCK_wsrep_replaying; +mysql_cond_t COND_wsrep_replaying; +mysql_mutex_t LOCK_wsrep_slave_threads; +mysql_cond_t COND_wsrep_slave_threads; +mysql_mutex_t LOCK_wsrep_gtid_wait_upto; +mysql_mutex_t LOCK_wsrep_cluster_config; +mysql_mutex_t LOCK_wsrep_desync; +mysql_mutex_t LOCK_wsrep_config_state; +mysql_mutex_t LOCK_wsrep_group_commit; +mysql_mutex_t LOCK_wsrep_SR_pool; +mysql_mutex_t LOCK_wsrep_SR_store; +mysql_mutex_t LOCK_wsrep_joiner_monitor; +mysql_mutex_t LOCK_wsrep_donor_monitor; +mysql_cond_t COND_wsrep_joiner_monitor; +mysql_cond_t COND_wsrep_donor_monitor; + +int wsrep_replaying= 0; +ulong wsrep_running_threads = 0; // # of currently running wsrep + // # threads +ulong wsrep_running_applier_threads = 0; // # of running applier threads +ulong wsrep_running_rollbacker_threads = 0; // # of running + // # rollbacker threads +ulong my_bind_addr; + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key + key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst, + key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init, + key_LOCK_wsrep_slave_threads, key_LOCK_wsrep_gtid_wait_upto, + key_LOCK_wsrep_desync, + key_LOCK_wsrep_config_state, key_LOCK_wsrep_cluster_config, + key_LOCK_wsrep_group_commit, + key_LOCK_wsrep_SR_pool, + key_LOCK_wsrep_SR_store, + key_LOCK_wsrep_thd_queue, + key_LOCK_wsrep_joiner_monitor, + key_LOCK_wsrep_donor_monitor; + +PSI_cond_key key_COND_wsrep_thd, + key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst, + key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread, + key_COND_wsrep_thd_queue, key_COND_wsrep_slave_threads, key_COND_wsrep_gtid_wait_upto, + key_COND_wsrep_joiner_monitor, key_COND_wsrep_donor_monitor; + +PSI_file_key key_file_wsrep_gra_log; + +static PSI_mutex_info wsrep_mutexes[]= +{ + { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_gtid_wait_upto, "LOCK_wsrep_gtid_wait_upto", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_cluster_config, "LOCK_wsrep_cluster_config", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_desync, "LOCK_wsrep_desync", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_config_state, "LOCK_wsrep_config_state", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_group_commit, "LOCK_wsrep_group_commit", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_SR_pool, "LOCK_wsrep_SR_pool", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_SR_store, "LOCK_wsrep_SR_store", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_joiner_monitor, "LOCK_wsrep_joiner_monitor", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_donor_monitor, "LOCK_wsrep_donor_monitor", PSI_FLAG_GLOBAL} +}; + +static PSI_cond_info wsrep_conds[]= +{ + { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_COND_wsrep_thd, "THD::COND_wsrep_thd", 0}, + { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_slave_threads, "COND_wsrep_wsrep_slave_threads", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_gtid_wait_upto, "COND_wsrep_gtid_wait_upto", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_joiner_monitor, "COND_wsrep_joiner_monitor", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_donor_monitor, "COND_wsrep_donor_monitor", PSI_FLAG_GLOBAL} +}; + +static PSI_file_info wsrep_files[]= +{ + { &key_file_wsrep_gra_log, "wsrep_gra_log", 0} +}; + +PSI_thread_key key_wsrep_sst_joiner, key_wsrep_sst_donor, + key_wsrep_rollbacker, key_wsrep_applier, + key_wsrep_sst_joiner_monitor, key_wsrep_sst_donor_monitor; + +static PSI_thread_info wsrep_threads[]= +{ + {&key_wsrep_sst_joiner, "wsrep_sst_joiner_thread", PSI_FLAG_GLOBAL}, + {&key_wsrep_sst_donor, "wsrep_sst_donor_thread", PSI_FLAG_GLOBAL}, + {&key_wsrep_rollbacker, "wsrep_rollbacker_thread", PSI_FLAG_GLOBAL}, + {&key_wsrep_applier, "wsrep_applier_thread", PSI_FLAG_GLOBAL}, + {&key_wsrep_sst_joiner_monitor, "wsrep_sst_joiner_monitor", PSI_FLAG_GLOBAL}, + {&key_wsrep_sst_donor_monitor, "wsrep_sst_donor_monitor", PSI_FLAG_GLOBAL} +}; + +#endif /* HAVE_PSI_INTERFACE */ + +my_bool wsrep_inited= 0; // initialized ? + +static wsrep_uuid_t node_uuid= WSREP_UUID_UNDEFINED; +static char cluster_uuid_str[40]= { 0, }; + +static char provider_name[256]= { 0, }; +static char provider_version[256]= { 0, }; +static char provider_vendor[256]= { 0, }; + +/* + * Wsrep status variables. LOCK_status must be locked When modifying + * these variables, + */ +my_bool wsrep_connected = FALSE; +my_bool wsrep_ready = FALSE; +const char* wsrep_cluster_state_uuid= cluster_uuid_str; +long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED; +const char* wsrep_cluster_status = "Disconnected"; +long wsrep_cluster_size = 0; +long wsrep_local_index = -1; +long long wsrep_local_bf_aborts = 0; +const char* wsrep_provider_name = provider_name; +const char* wsrep_provider_version = provider_version; +const char* wsrep_provider_vendor = provider_vendor; +char* wsrep_provider_capabilities = NULL; +char* wsrep_cluster_capabilities = NULL; +/* End wsrep status variables */ + +wsp::Config_state *wsrep_config_state; + +void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...) +{ + /* Allocate short buffer from stack. If the vsnprintf() return value + indicates that the message was truncated, a new buffer will be allocated + dynamically and the message will be reprinted. */ + char msg[128] = {'\0'}; + va_list arglist; + va_start(arglist, fmt); + int n= vsnprintf(msg, sizeof(msg), fmt, arglist); + va_end(arglist); + if (n < 0) + { + sql_print_warning("WSREP: Printing message failed"); + } + else if (n < (int)sizeof(msg)) + { + fun("WSREP: %s", msg); + } + else + { + size_t dynbuf_size= std::max(n, 4096); + char* dynbuf= (char*) my_malloc(PSI_NOT_INSTRUMENTED, dynbuf_size, MYF(0)); + if (dynbuf) + { + va_start(arglist, fmt); + (void)vsnprintf(&dynbuf[0], dynbuf_size - 1, fmt, arglist); + va_end(arglist); + dynbuf[dynbuf_size - 1] = '\0'; + fun("WSREP: %s", &dynbuf[0]); + my_free(dynbuf); + } + else + { + /* Memory allocation for vector failed, print truncated message. */ + fun("WSREP: %s", msg); + } + } +} + + +wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; +wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; + +/* + */ +Wsrep_schema *wsrep_schema= 0; + +static void wsrep_log_cb(wsrep::log::level level, + const char*, const char *msg) +{ + /* + Silence all wsrep related logging from lib and provider if + wsrep is not enabled. + */ + if (!WSREP_ON) return; + + switch (level) { + case wsrep::log::info: + WSREP_INFO("%s", msg); + break; + case wsrep::log::warning: + WSREP_WARN("%s", msg); + break; + case wsrep::log::error: + WSREP_ERROR("%s", msg); + break; + case wsrep::log::debug: + WSREP_DEBUG("%s", msg); + break; + case wsrep::log::unknown: + WSREP_UNKNOWN("%s", msg); + break; + } +} + +void wsrep_init_gtid() +{ + wsrep_server_gtid_t stored_gtid= wsrep_get_SE_checkpoint(); + // Domain id may have changed, use the one + // received during state transfer. + stored_gtid.domain_id= wsrep_gtid_server.domain_id; + if (stored_gtid.server_id == 0) + { + rpl_gtid wsrep_last_gtid; + if (mysql_bin_log.is_open() && + mysql_bin_log.lookup_domain_in_binlog_state(stored_gtid.domain_id, + &wsrep_last_gtid)) + { + stored_gtid.server_id= wsrep_last_gtid.server_id; + stored_gtid.seqno= wsrep_last_gtid.seq_no; + } + else + { + stored_gtid.server_id= global_system_variables.server_id; + stored_gtid.seqno= 0; + } + } + wsrep_gtid_server.gtid(stored_gtid); +} + +bool wsrep_get_binlog_gtid_seqno(wsrep_server_gtid_t& gtid) +{ + rpl_gtid binlog_gtid; + int ret= 0; + if (mysql_bin_log.is_open() && + mysql_bin_log.find_in_binlog_state(gtid.domain_id, + gtid.server_id, + &binlog_gtid)) + { + gtid.domain_id= binlog_gtid.domain_id; + gtid.server_id= binlog_gtid.server_id; + gtid.seqno= binlog_gtid.seq_no; + ret= 1; + } + return ret; +} + +bool wsrep_check_gtid_seqno(const uint32& domain, const uint32& server, + uint64& seqno) +{ + if (domain == wsrep_gtid_server.domain_id && + server == wsrep_gtid_server.server_id) + { + if (wsrep_gtid_server.seqno_committed() < seqno) return 1; + return 0; + } + return 0; +} + +void wsrep_init_sidno(const wsrep::id& uuid) +{ + /* + Protocol versions starting from 4 use group gtid as it is. + For lesser protocol versions generate new Sid map entry from inverted + uuid. + */ + rpl_gtid sid; + if (wsrep_protocol_version >= 4) + { + memcpy((void*)&sid, (const uchar*)uuid.data(),16); + } + else + { + wsrep_uuid_t ltid_uuid; + for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i) + { + ltid_uuid.data[i]= ~((const uchar*)uuid.data())[i]; + } + memcpy((void*)&sid, (const uchar*)ltid_uuid.data,16); + } +#ifdef GTID_SUPPORT + global_sid_lock->wrlock(); + wsrep_sidno= global_sid_map->add_sid(sid); + WSREP_INFO("Initialized wsrep sidno %d", wsrep_sidno); + global_sid_lock->unlock(); +#endif +} + +void wsrep_init_schema() +{ + DBUG_ASSERT(!wsrep_schema); + + WSREP_INFO("wsrep_init_schema_and_SR %p", wsrep_schema); + if (!wsrep_schema) + { + wsrep_schema= new Wsrep_schema(); + if (wsrep_schema->init()) + { + WSREP_ERROR("Failed to init wsrep schema"); + unireg_abort(1); + } + // If we are bootstraping new cluster we should + // clear allowlist table and populate it from variable + if (wsrep_new_cluster) + { + wsrep_schema->clear_allowlist(); + std::vector ip_allowlist; + if (wsrep_split_allowlist(ip_allowlist)) + { + wsrep_schema->store_allowlist(ip_allowlist); + } + } + } +} + +void wsrep_deinit_schema() +{ + delete wsrep_schema; + wsrep_schema= 0; +} + +void wsrep_recover_sr_from_storage(THD *orig_thd) +{ + switch (wsrep_SR_store_type) + { + case WSREP_SR_STORE_TABLE: + if (!wsrep_schema) + { + WSREP_ERROR("Wsrep schema not initialized when trying to recover " + "streaming transactions: wsrep_on %d", WSREP_ON); + trans_commit(orig_thd); + } + if (wsrep_schema->recover_sr_transactions(orig_thd)) + { + WSREP_ERROR("Failed to recover SR transactions from schema: wsrep_on : %d", WSREP_ON); + trans_commit(orig_thd); + } + break; + default: + /* */ + WSREP_ERROR("Unsupported wsrep SR store type: %lu wsrep_on: %d", + wsrep_SR_store_type, WSREP_ON); + trans_commit(orig_thd); + break; + } +} + +/** Export the WSREP provider's capabilities as a human readable string. + * The result is saved in a dynamically allocated string of the form: + * :cap1:cap2:cap3: + */ +static void wsrep_capabilities_export(wsrep_cap_t const cap, char** str) +{ + static const char* names[] = + { + /* Keep in sync with wsrep/wsrep_api.h WSREP_CAP_* macros. */ + "MULTI_MASTER", + "CERTIFICATION", + "PARALLEL_APPLYING", + "TRX_REPLAY", + "ISOLATION", + "PAUSE", + "CAUSAL_READS", + "CAUSAL_TRX", + "INCREMENTAL_WRITESET", + "SESSION_LOCKS", + "DISTRIBUTED_LOCKS", + "CONSISTENCY_CHECK", + "UNORDERED", + "ANNOTATION", + "PREORDERED", + "STREAMING", + "SNAPSHOT", + "NBO", + }; + + std::string s; + for (size_t i= 0; i < sizeof(names) / sizeof(names[0]); ++i) + { + if (cap & (1ULL << i)) + { + if (s.empty()) + { + s= ":"; + } + s += names[i]; + s += ":"; + } + } + + /* A read from the string pointed to by *str may be started at any time, + * so it must never point to free(3)d memory or non '\0' terminated string. */ + + char* const previous= *str; + + *str= strdup(s.c_str()); + + if (previous != NULL) + { + free(previous); + } +} + +/* Verifies that SE position is consistent with the group position + * and initializes other variables */ +void wsrep_verify_SE_checkpoint(const wsrep_uuid_t& uuid, + wsrep_seqno_t const seqno) +{ +} + +/* + Wsrep is considered ready if + 1) Provider is not loaded (native mode) + 2) Server has reached synced state + 3) Server is in joiner mode and mysqldump SST method has been + specified + See Wsrep_server_service::log_state_change() for further details. + */ +my_bool wsrep_ready_get (void) +{ + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + my_bool ret= wsrep_ready; + mysql_mutex_unlock (&LOCK_wsrep_ready); + return ret; +} + +int wsrep_show_ready(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_MY_BOOL; + var->value= buff; + *((my_bool *)buff)= wsrep_ready_get(); + return 0; +} + +void wsrep_update_cluster_state_uuid(const char* uuid) +{ + strncpy(cluster_uuid_str, uuid, sizeof(cluster_uuid_str) - 1); +} + +static void wsrep_init_position() +{ +} + +/**************************************************************************** + Helpers for wsrep_init() + ****************************************************************************/ +static std::string wsrep_server_name() +{ + std::string ret(wsrep_node_name ? wsrep_node_name : ""); + return ret; +} + +static std::string wsrep_server_id() +{ + /* using empty server_id, which enables view change handler to + set final server_id later on + */ + std::string ret(""); + return ret; +} + +static std::string wsrep_server_node_address() +{ + + std::string ret; + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + wsrep_data_home_dir= mysql_real_data_home; + + /* Initialize node address */ + if (!wsrep_node_address || !strcmp(wsrep_node_address, "")) + { + char node_addr[512]= {0, }; + const size_t node_addr_max= sizeof(node_addr) - 1; + size_t guess_ip_ret= wsrep_guess_ip(node_addr, node_addr_max); + if (!(guess_ip_ret > 0 && guess_ip_ret < node_addr_max)) + { + WSREP_WARN("Failed to guess base node address. Set it explicitly via " + "wsrep_node_address."); + } + else + { + ret= node_addr; + } + } + else + { + ret= wsrep_node_address; + } + return ret; +} + +static std::string wsrep_server_incoming_address() +{ + std::string ret; + const std::string node_addr(wsrep_server_node_address()); + char inc_addr[512]= { 0, }; + size_t const inc_addr_max= sizeof (inc_addr); + + /* + In case wsrep_node_incoming_address is either not set or set to AUTO, + we need to use mysqld's my_bind_addr_str:mysqld_port, lastly fallback + to wsrep_node_address' value if mysqld's bind-address is not set either. + */ + if ((!wsrep_node_incoming_address || + !strcmp (wsrep_node_incoming_address, WSREP_NODE_INCOMING_AUTO))) + { + bool is_ipv6= false; + unsigned int my_bind_ip= INADDR_ANY; // default if not set + + if (my_bind_addr_str && strlen(my_bind_addr_str) && + strcmp(my_bind_addr_str, "*") != 0) + { + my_bind_ip= wsrep_check_ip(my_bind_addr_str, &is_ipv6); + } + + if (INADDR_ANY != my_bind_ip) + { + /* + If its a not a valid address, leave inc_addr as empty string. mysqld + is not listening for client connections on network interfaces. + */ + if (INADDR_NONE != my_bind_ip && INADDR_LOOPBACK != my_bind_ip) + { + const char *fmt= (is_ipv6) ? "[%s]:%u" : "%s:%u"; + snprintf(inc_addr, inc_addr_max, fmt, my_bind_addr_str, mysqld_port); + } + } + else /* mysqld binds to 0.0.0.0, try taking IP from wsrep_node_address. */ + { + if (node_addr.size()) + { + size_t const ip_len_mdb= wsrep_host_len(node_addr.c_str(), node_addr.size()); + if (ip_len_mdb + 7 /* :55555\0 */ < inc_addr_max) + { + memcpy (inc_addr, node_addr.c_str(), ip_len_mdb); + snprintf(inc_addr + ip_len_mdb, inc_addr_max - ip_len_mdb, ":%u", + (int)mysqld_port); + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + + if (!strlen(inc_addr)) + { + WSREP_WARN("Guessing address for incoming client connections failed. " + "Try setting wsrep_node_incoming_address explicitly."); + WSREP_INFO("Node addr: %s", node_addr.c_str()); + } + } + } + else + { + wsp::Address addr(wsrep_node_incoming_address); + + if (!addr.is_valid()) + { + WSREP_WARN("Could not parse wsrep_node_incoming_address : %s", + wsrep_node_incoming_address); + goto done; + } + + /* + In case port is not specified in wsrep_node_incoming_address, we use + mysqld_port. + Note that we might get here before we execute set_ports(). + */ + int local_port= (addr.get_port() > 0) ? addr.get_port() : (int) mysqld_port; + if (!local_port) + local_port= MYSQL_PORT; + const char *fmt= (addr.is_ipv6()) ? "[%s]:%u" : "%s:%u"; + + snprintf(inc_addr, inc_addr_max, fmt, addr.get_address(), local_port); + } + + done: + if (!strlen(inc_addr)) + ret= wsrep_node_incoming_address; + else + ret= inc_addr; + WSREP_DEBUG("wsrep_incoming_address = %s", ret.c_str()); + return ret; +} + +static std::string wsrep_server_working_dir() +{ + std::string ret; + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + { + ret= mysql_real_data_home; + } + else + { + ret= wsrep_data_home_dir; + } + return ret; +} + +static wsrep::gtid wsrep_server_initial_position() +{ + wsrep::gtid ret; + WSREP_DEBUG("Server initial position: %s", wsrep_start_position); + std::istringstream is(wsrep_start_position); + is >> ret; + return ret; +} + +/* + Intitialize provider specific status variables + */ +static void wsrep_init_provider_status_variables() +{ + wsrep_inited= 1; + const wsrep::provider& provider= + Wsrep_server_state::instance().provider(); + strncpy(provider_name, + provider.name().c_str(), sizeof(provider_name) - 1); + strncpy(provider_version, + provider.version().c_str(), sizeof(provider_version) - 1); + strncpy(provider_vendor, + provider.vendor().c_str(), sizeof(provider_vendor) - 1); +} + +int wsrep_init_server() +{ + wsrep::log::logger_fn(wsrep_log_cb); + try + { + Wsrep_status::init_once(wsrep_status_file); + + std::string server_name; + std::string server_id; + std::string node_address; + std::string incoming_address; + std::string working_dir; + wsrep::gtid initial_position; + + server_name= wsrep_server_name(); + server_id= wsrep_server_id(); + node_address= wsrep_server_node_address(); + incoming_address= wsrep_server_incoming_address(); + working_dir= wsrep_server_working_dir(); + initial_position= wsrep_server_initial_position(); + + Wsrep_server_state::init_once(server_name, + incoming_address, + node_address, + working_dir, + initial_position, + wsrep_max_protocol_version); + Wsrep_server_state::instance().debug_log_level(wsrep_debug); + } + catch (const wsrep::runtime_error& e) + { + WSREP_ERROR("Failed to init wsrep server %s", e.what()); + return 1; + } + catch (const std::exception& e) + { + WSREP_ERROR("Failed to init wsrep server %s", e.what()); + } + return 0; +} + +void wsrep_init_globals() +{ + wsrep_init_sidno(Wsrep_server_state::instance().connected_gtid().id()); + /* Recover last written wsrep gtid */ + wsrep_init_gtid(); + if (wsrep_new_cluster) + { + /* Start with provided domain_id & server_id found in configuration */ + wsrep_server_gtid_t new_gtid; + new_gtid.domain_id= wsrep_gtid_domain_id; + new_gtid.server_id= global_system_variables.server_id; + new_gtid.seqno= 0; + /* Try to search for domain_id and server_id combination in binlog if found continue from last seqno */ + wsrep_get_binlog_gtid_seqno(new_gtid); + wsrep_gtid_server.gtid(new_gtid); + } + else + { + if (wsrep_gtid_mode && wsrep_gtid_server.server_id != global_system_variables.server_id) + { + WSREP_WARN("Ignoring server id for non bootstrap node."); + } + } + wsrep_init_schema(); + if (WSREP_ON) + { + Wsrep_server_state::instance().initialized(); + } +} + +void wsrep_deinit_server() +{ + wsrep_deinit_schema(); + Wsrep_server_state::destroy(); + Wsrep_status::destroy(); + wsrep_free_status_vars(); +} + +int wsrep_init() +{ + assert(wsrep_provider); + + wsrep_init_position(); + wsrep_sst_auth_init(); + + if (strlen(wsrep_provider)== 0 || + !strcmp(wsrep_provider, WSREP_NONE)) + { + // enable normal operation in case no provider is specified + global_system_variables.wsrep_on= 0; + int err= Wsrep_server_state::instance().load_provider(wsrep_provider, wsrep_provider_options ? wsrep_provider_options : ""); + if (err) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", err)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", err); + } + else + wsrep_init_provider_status_variables(); + return err; + } + + if (wsrep_gtid_mode && opt_bin_log && !opt_log_slave_updates) + { + WSREP_ERROR("Option --log-slave-updates is required if " + "binlog is enabled, GTID mode is on and wsrep provider " + "is specified"); + return 1; + } + + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + wsrep_data_home_dir= mysql_real_data_home; + + Wsrep_server_state::init_provider_services(); + if (Wsrep_server_state::instance().load_provider( + wsrep_provider, + wsrep_provider_options, + Wsrep_server_state::instance().provider_services())) + { + WSREP_ERROR("Failed to load provider"); + Wsrep_server_state::deinit_provider_services(); + return 1; + } + + if (!wsrep_provider_is_SR_capable() && + global_system_variables.wsrep_trx_fragment_size > 0) + { + WSREP_ERROR("The WSREP provider (%s) does not support streaming " + "replication but wsrep_trx_fragment_size is set to a " + "value other than 0 (%llu). Cannot continue. Either set " + "wsrep_trx_fragment_size to 0 or use wsrep_provider that " + "supports streaming replication.", + wsrep_provider, global_system_variables.wsrep_trx_fragment_size); + Wsrep_server_state::instance().unload_provider(); + Wsrep_server_state::deinit_provider_services(); + return 1; + } + + /* Now WSREP is fully initialized */ + global_system_variables.wsrep_on= 1; + WSREP_ON_= wsrep_provider && strcmp(wsrep_provider, WSREP_NONE); + wsrep_service_started= 1; + + wsrep_init_provider_status_variables(); + wsrep_capabilities_export(Wsrep_server_state::instance().provider().capabilities(), + &wsrep_provider_capabilities); + + WSREP_DEBUG("SR storage init for: %s", + (wsrep_SR_store_type == WSREP_SR_STORE_TABLE) ? "table" : "void"); + + return 0; +} + +/* Initialize wsrep thread LOCKs and CONDs */ +void wsrep_thr_init() +{ + DBUG_ENTER("wsrep_thr_init"); + wsrep_config_state= new wsp::Config_state; +#ifdef HAVE_PSI_INTERFACE + mysql_mutex_register("sql", wsrep_mutexes, array_elements(wsrep_mutexes)); + mysql_cond_register("sql", wsrep_conds, array_elements(wsrep_conds)); + mysql_file_register("sql", wsrep_files, array_elements(wsrep_files)); + mysql_thread_register("sql", wsrep_threads, array_elements(wsrep_threads)); +#endif + + mysql_mutex_init(key_LOCK_wsrep_ready, &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst, &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst_init, &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL); + mysql_mutex_init(key_LOCK_wsrep_replaying, &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL); + mysql_mutex_init(key_LOCK_wsrep_slave_threads, &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_slave_threads, &COND_wsrep_slave_threads, NULL); + mysql_mutex_init(key_LOCK_wsrep_gtid_wait_upto, &LOCK_wsrep_gtid_wait_upto, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_cluster_config, &LOCK_wsrep_cluster_config, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_desync, &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_config_state, &LOCK_wsrep_config_state, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_group_commit, &LOCK_wsrep_group_commit, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_SR_pool, + &LOCK_wsrep_SR_pool, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_SR_store, + &LOCK_wsrep_SR_store, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_joiner_monitor, + &LOCK_wsrep_joiner_monitor, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_donor_monitor, + &LOCK_wsrep_donor_monitor, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_joiner_monitor, &COND_wsrep_joiner_monitor, NULL); + mysql_cond_init(key_COND_wsrep_donor_monitor, &COND_wsrep_donor_monitor, NULL); + + DBUG_VOID_RETURN; +} + +void wsrep_init_startup (bool sst_first) +{ + if (wsrep_init()) unireg_abort(1); + + /* + Pre-initialize global_system_variables.table_plugin with a dummy engine + (placeholder) required during the initialization of wsrep threads (THDs). + (see: plugin_thdvar_init()) + Note: This only needs to be done for rsync & mariabackup based SST methods. + In case of mysqldump SST method, the wsrep threads are created after the + server plugins & global system variables are initialized. + */ + if (wsrep_before_SE()) + wsrep_plugins_pre_init(); + + /* Skip replication start if dummy wsrep provider is loaded */ + if (!strcmp(wsrep_provider, WSREP_NONE)) return; + + /* Skip replication start if no cluster address */ + if (!wsrep_cluster_address_exists()) return; + + /* + Read value of wsrep_new_cluster before wsrep_start_replication(), + the value is reset to FALSE inside wsrep_start_replication. + */ + if (!wsrep_start_replication(wsrep_cluster_address)) unireg_abort(1); + + wsrep_create_rollbacker(); + wsrep_create_appliers(1); + + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + /* + If the SST happens before server initialization, wait until the server + state reaches initializing. This indicates that + either SST was not necessary or SST has been delivered. + + With mysqldump SST (!sst_first) wait until the server reaches + joiner state and procedd to accepting connections. + */ + int err= 0; + if (sst_first) + { + err= server_state.wait_until_state(Wsrep_server_state::s_initializing); + } + else + { + err= server_state.wait_until_state(Wsrep_server_state::s_joiner); + } + if (err) + { + WSREP_ERROR("Wsrep startup was interrupted"); + unireg_abort(1); + } +} + + +void wsrep_deinit(bool free_options) +{ + DBUG_ASSERT(wsrep_inited == 1); + WSREP_DEBUG("wsrep_deinit"); + + Wsrep_server_state::instance().unload_provider(); + Wsrep_server_state::deinit_provider_services(); + + provider_name[0]= '\0'; + provider_version[0]= '\0'; + provider_vendor[0]= '\0'; + + wsrep_inited= 0; + + if (wsrep_provider_capabilities != NULL) + { + char* p= wsrep_provider_capabilities; + wsrep_provider_capabilities= NULL; + free(p); + } + + if (free_options) + { + wsrep_sst_auth_free(); + } +} + +/* Destroy wsrep thread LOCKs and CONDs */ +void wsrep_thr_deinit() +{ + if (!wsrep_config_state) + return; // Never initialized + WSREP_DEBUG("wsrep_thr_deinit"); + mysql_mutex_destroy(&LOCK_wsrep_ready); + mysql_cond_destroy(&COND_wsrep_ready); + mysql_mutex_destroy(&LOCK_wsrep_sst); + mysql_cond_destroy(&COND_wsrep_sst); + mysql_mutex_destroy(&LOCK_wsrep_sst_init); + mysql_cond_destroy(&COND_wsrep_sst_init); + mysql_mutex_destroy(&LOCK_wsrep_replaying); + mysql_cond_destroy(&COND_wsrep_replaying); + mysql_mutex_destroy(&LOCK_wsrep_gtid_wait_upto); + mysql_mutex_destroy(&LOCK_wsrep_slave_threads); + mysql_cond_destroy(&COND_wsrep_slave_threads); + mysql_mutex_destroy(&LOCK_wsrep_cluster_config); + mysql_mutex_destroy(&LOCK_wsrep_desync); + mysql_mutex_destroy(&LOCK_wsrep_config_state); + mysql_mutex_destroy(&LOCK_wsrep_group_commit); + mysql_mutex_destroy(&LOCK_wsrep_SR_pool); + mysql_mutex_destroy(&LOCK_wsrep_SR_store); + mysql_mutex_destroy(&LOCK_wsrep_joiner_monitor); + mysql_mutex_destroy(&LOCK_wsrep_donor_monitor); + mysql_cond_destroy(&COND_wsrep_joiner_monitor); + mysql_cond_destroy(&COND_wsrep_donor_monitor); + + delete wsrep_config_state; + wsrep_config_state= 0; // Safety + + if (wsrep_cluster_capabilities != NULL) + { + char* p= wsrep_cluster_capabilities; + wsrep_cluster_capabilities= NULL; + free(p); + } +} + +void wsrep_recover() +{ + char uuid_str[40]; + + if (wsrep_uuid_compare(&local_uuid, &WSREP_UUID_UNDEFINED) == 0 && + local_seqno == -2) + { + wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Position %s:%lld given at startup, skipping position recovery", + uuid_str, (long long)local_seqno); + return; + } + wsrep::gtid gtid= wsrep_get_SE_checkpoint(); + std::ostringstream oss; + oss << gtid; + if (wsrep_gtid_mode) + { + wsrep_server_gtid_t server_gtid= wsrep_get_SE_checkpoint(); + WSREP_INFO("Recovered position: %s,%d-%d-%llu", oss.str().c_str(), server_gtid.domain_id, + server_gtid.server_id, server_gtid.seqno); + } + else + { + WSREP_INFO("Recovered position: %s", oss.str().c_str()); + } +} + + +void wsrep_stop_replication(THD *thd) +{ + WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0); + if (Wsrep_server_state::instance().state() != + Wsrep_server_state::s_disconnected) + { + WSREP_DEBUG("Disconnect provider"); + Wsrep_server_state::instance().disconnect(); + if (Wsrep_server_state::instance().wait_until_state( + Wsrep_server_state::s_disconnected)) + { + WSREP_WARN("Wsrep interrupted while waiting for disconnected state"); + } + } + + /* my connection, should not terminate with wsrep_close_client_connection(), + make transaction to rollback + */ + if (thd && !thd->wsrep_applier) trans_rollback(thd); + wsrep_close_client_connections(TRUE, thd); + + /* wait until appliers have stopped */ + wsrep_wait_appliers_close(thd); + + node_uuid= WSREP_UUID_UNDEFINED; +} + +void wsrep_shutdown_replication() +{ + WSREP_INFO("Shutdown replication"); + if (Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected) + { + WSREP_DEBUG("Disconnect provider"); + Wsrep_server_state::instance().disconnect(); + if (Wsrep_server_state::instance().wait_until_state( + Wsrep_server_state::s_disconnected)) + { + WSREP_WARN("Wsrep interrupted while waiting for disconnected state"); + } + } + + wsrep_close_client_connections(TRUE); + + /* wait until appliers have stopped */ + wsrep_wait_appliers_close(NULL); + node_uuid= WSREP_UUID_UNDEFINED; + + /* Undocking the thread specific data. */ + set_current_thd(nullptr); +} + +bool wsrep_start_replication(const char *wsrep_cluster_address) +{ + int rcode; + WSREP_DEBUG("wsrep_start_replication"); + + /* + if provider is trivial, don't even try to connect, + but resume local node operation + */ + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + return true; + } + + DBUG_ASSERT(wsrep_cluster_address[0]); + + // --wsrep-new-cluster flag is not used, checking wsrep_cluster_address + // it should match gcomm:// only to be considered as bootstrap node. + // This logic is used in galera. + if (!wsrep_new_cluster && + (strlen(wsrep_cluster_address) == 8) && + !strncmp(wsrep_cluster_address, "gcomm://", 8)) + { + wsrep_new_cluster= true; + } + + bool const bootstrap(TRUE == wsrep_new_cluster); + + WSREP_INFO("Start replication"); + + if ((rcode= Wsrep_server_state::instance().connect( + wsrep_cluster_name, + wsrep_cluster_address, + wsrep_sst_donor, + bootstrap))) + { + DBUG_PRINT("wsrep",("wsrep_ptr->connect(%s) failed: %d", + wsrep_cluster_address, rcode)); + WSREP_ERROR("wsrep::connect(%s) failed: %d", + wsrep_cluster_address, rcode); + return false; + } + else + { + try + { + std::string opts= Wsrep_server_state::instance().provider().options(); + wsrep_provider_options_init(opts.c_str()); + } + catch (const wsrep::runtime_error&) + { + WSREP_WARN("Failed to get wsrep options"); + } + } + + return true; +} + +bool wsrep_check_mode (enum_wsrep_mode mask) +{ + return wsrep_mode & mask; +} + +//seconds after which the limit warnings suppression will be activated +#define WSREP_WARNING_ACTIVATION_TIMEOUT 5*60 +//number of limit warnings after which the suppression will be activated +#define WSREP_WARNING_ACTIVATION_THRESHOLD 10 + +enum wsrep_warning_type { + WSREP_DISABLED = 0, + WSREP_REQUIRE_PRIMARY_KEY= 1, + WSREP_REQUIRE_INNODB= 2, + WSREP_REQUIRE_MAX=3, +}; + +static ulonglong wsrep_warning_start_time=0; +static bool wsrep_warning_active[WSREP_REQUIRE_MAX+1]; +static ulonglong wsrep_warning_count[WSREP_REQUIRE_MAX+1]; +static ulonglong wsrep_total_warnings_count=0; + +/** + Auxiliary function to reset the limit of wsrep warnings. + This is done without mutex protection, but this should be good + enough as it doesn't matter if we loose a couple of suppressed + messages or if this is called multiple times. +*/ + +static void wsrep_reset_warnings(ulonglong now) +{ + uint i; + + wsrep_warning_start_time= now; + wsrep_total_warnings_count= 0; + + for (i= 0 ; i < WSREP_REQUIRE_MAX ; i++) + { + wsrep_warning_active[i]= false; + wsrep_warning_count[i]= 0; + } +} + +static const char* wsrep_warning_name(const enum wsrep_warning_type type) +{ + switch(type) + { + case WSREP_REQUIRE_PRIMARY_KEY: + return "WSREP_REQUIRE_PRIMARY_KEY"; break; + case WSREP_REQUIRE_INNODB: + return "WSREP_REQUIRE_INNODB"; break; + default: assert(0); return " "; break; // for compiler + } +} +/** + Auxiliary function to check if the warning statements should be + thrown or suppressed. + + Logic is: + - If we get more than WSREP_WARNING_ACTIVATION_THRESHOLD errors + of one type, that type of errors will be suppressed for + WSREP_WARNING_ACTIVATION_TIMEOUT. + - When the time limit has been reached, all suppressions are reset. + + This means that if one gets many different types of errors, some of them + may be reset less than WSREP_WARNING_ACTIVATION_TIMEOUT. However at + least one error is disabled for this time. + + SYNOPSIS: + @params + warning_type - The type of warning. + + RETURN: + 0 0k to log + 1 Message suppressed +*/ + +static bool wsrep_protect_against_warning_flood( + enum wsrep_warning_type warning_type) +{ + ulonglong count; + ulonglong now= my_interval_timer()/1000000000ULL; + + count= ++wsrep_warning_count[warning_type]; + wsrep_total_warnings_count++; + + /* + INITIALIZING: + If this is the first time this function is called with log warning + enabled, the monitoring the warnings should start. + */ + if (wsrep_warning_start_time == 0) + { + wsrep_reset_warnings(now); + return false; + } + + /* + The following is true if we got too many errors or if the error was + already suppressed + */ + if (count >= WSREP_WARNING_ACTIVATION_THRESHOLD) + { + ulonglong diff_time= (now - wsrep_warning_start_time); + + if (!wsrep_warning_active[warning_type]) + { + /* + ACTIVATION: + We got WSREP_WARNING_ACTIVATION_THRESHOLD warnings in + less than WSREP_WARNING_ACTIVATION_TIMEOUT we activate the + suppression. + */ + if (diff_time <= WSREP_WARNING_ACTIVATION_TIMEOUT) + { + wsrep_warning_active[warning_type]= true; + WSREP_INFO("Suppressing warnings of type '%s' for up to %d seconds because of flooding", + wsrep_warning_name(warning_type), + WSREP_WARNING_ACTIVATION_TIMEOUT); + } + else + { + /* + There is no flooding till now, therefore we restart the monitoring + */ + wsrep_reset_warnings(now); + } + } + else + { + /* This type of warnings was suppressed */ + if (diff_time > WSREP_WARNING_ACTIVATION_TIMEOUT) + { + ulonglong save_count= wsrep_total_warnings_count; + /* Print a suppression note and remove the suppression */ + wsrep_reset_warnings(now); + WSREP_INFO("Suppressed %lu unsafe warnings during " + "the last %d seconds", + save_count, (int) diff_time); + } + } + } + + return wsrep_warning_active[warning_type]; +} + +/** + Auxiliary function to push warning to client and to the error log +*/ +static void wsrep_push_warning(THD *thd, + enum wsrep_warning_type type, + const handlerton *hton, + const TABLE_LIST *tables) +{ + switch(type) + { + case WSREP_REQUIRE_PRIMARY_KEY: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. " + "Table '%s'.'%s' should have PRIMARY KEY defined.", + tables->db.str, tables->table_name.str); + if (global_system_variables.log_warnings > 1 && + !wsrep_protect_against_warning_flood(type)) + WSREP_WARN("wsrep_mode = REQUIRED_PRIMARY_KEY enabled. " + "Table '%s'.'%s' should have PRIMARY KEY defined", + tables->db.str, tables->table_name.str); + break; + case WSREP_REQUIRE_INNODB: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s for table '%s'.'%s' is " + "not supported in Galera", + ha_resolve_storage_engine_name(hton), + tables->db.str, tables->table_name.str); + if (global_system_variables.log_warnings > 1 && + !wsrep_protect_against_warning_flood(type)) + WSREP_WARN("wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s for table '%s'.'%s' is " + "not supported in Galera", + ha_resolve_storage_engine_name(hton), + tables->db.str, tables->table_name.str); + break; + + default: assert(0); break; + } +} + +bool wsrep_check_mode_after_open_table (THD *thd, + const handlerton *hton, + TABLE_LIST *tables) +{ + enum_sql_command sql_command= thd->lex->sql_command; + bool is_dml_stmt= thd->get_command() != COM_STMT_PREPARE && + (sql_command == SQLCOM_INSERT || + sql_command == SQLCOM_INSERT_SELECT || + sql_command == SQLCOM_REPLACE || + sql_command == SQLCOM_REPLACE_SELECT || + sql_command == SQLCOM_UPDATE || + sql_command == SQLCOM_UPDATE_MULTI || + sql_command == SQLCOM_LOAD || + sql_command == SQLCOM_DELETE); + + if (!is_dml_stmt) + return true; + + const legacy_db_type db_type= hton->db_type; + bool replicate= ((db_type == DB_TYPE_MYISAM && wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) || + (db_type == DB_TYPE_ARIA && wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA))); + TABLE *tbl= tables->table; + + if (replicate) + { + /* It is not recommended to replicate MyISAM as it lacks rollback feature + but if user demands then actions are replicated using TOI. + Following code will kick-start the TOI but this has to be done only once + per statement. + Note: kick-start will take-care of creating isolation key for all tables + involved in the list (provided all of them are MYISAM or Aria tables). */ + if (!is_stat_table(&tables->db, &tables->alias)) + { + if (tbl->s->primary_key == MAX_KEY && + wsrep_check_mode(WSREP_MODE_REQUIRED_PRIMARY_KEY)) + { + /* Other replicated table doesn't have explicit primary-key defined. */ + wsrep_push_warning(thd, WSREP_REQUIRE_PRIMARY_KEY, hton, tables); + } + + wsrep_before_rollback(thd, true); + wsrep_after_rollback(thd, true); + wsrep_after_statement(thd); + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (tables)); + } + } else if (db_type != DB_TYPE_UNKNOWN && + db_type != DB_TYPE_PERFORMANCE_SCHEMA) + { + bool is_system_db= (tbl && + ((strcmp(tbl->s->db.str, "mysql") == 0) || + (strcmp(tbl->s->db.str, "information_schema") == 0))); + + if (!is_system_db && + !is_temporary_table(tables)) + { + + if (db_type != DB_TYPE_INNODB && + wsrep_check_mode(WSREP_MODE_STRICT_REPLICATION)) + { + /* Table is not an InnoDB table and strict replication is requested*/ + wsrep_push_warning(thd, WSREP_REQUIRE_INNODB, hton, tables); + } + + if (tbl->s->primary_key == MAX_KEY && + db_type == DB_TYPE_INNODB && + wsrep_check_mode(WSREP_MODE_REQUIRED_PRIMARY_KEY)) + { + /* InnoDB table doesn't have explicit primary-key defined. */ + wsrep_push_warning(thd, WSREP_REQUIRE_PRIMARY_KEY, hton, tables); + } + + if (db_type != DB_TYPE_INNODB && + thd->variables.sql_log_bin == 1 && + wsrep_check_mode(WSREP_MODE_DISALLOW_LOCAL_GTID)) + { + /* Table is not an InnoDB table and local GTIDs are disallowed */ + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "You can't execute statements that would generate local " + "GTIDs when wsrep_mode = DISALLOW_LOCAL_GTID is set. " + "Try disabling binary logging with SET sql_log_bin=0 " + "to execute this statement."); + goto wsrep_error_label; + } + } + } + + return true; + +wsrep_error_label: + return false; +} + +bool wsrep_check_mode_before_cmd_execute (THD *thd) +{ + bool ret= true; + if (wsrep_check_mode(WSREP_MODE_BINLOG_ROW_FORMAT_ONLY) && + !thd->is_current_stmt_binlog_format_row() && is_update_query(thd->lex->sql_command)) + { + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = BINLOG_ROW_FORMAT_ONLY enabled. Only ROW binlog format is supported."); + ret= false; + } + if (wsrep_check_mode(WSREP_MODE_REQUIRED_PRIMARY_KEY) && + thd->lex->sql_command == SQLCOM_CREATE_TABLE) + { + Key *key; + List_iterator key_iterator(thd->lex->alter_info.key_list); + bool primary_key_found= false; + while ((key= key_iterator++)) + { + if (key->type == Key::PRIMARY) + { + primary_key_found= true; + break; + } + } + if (!primary_key_found) + { + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. Table should have PRIMARY KEY defined."); + ret= false; + } + } + return ret; +} + +bool wsrep_must_sync_wait (THD* thd, uint mask) +{ + bool ret= 0; + if (thd->variables.wsrep_on) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + ret= (thd->variables.wsrep_sync_wait & mask) && + thd->wsrep_client_thread && + WSREP_ON && + !(thd->variables.wsrep_dirty_reads && + !is_update_query(thd->lex->sql_command)) && + !thd->in_active_multi_stmt_transaction() && + thd->wsrep_trx().state() != + wsrep::transaction::s_replaying && + thd->wsrep_cs().sync_wait_gtid().is_undefined(); + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + return ret; +} + +bool wsrep_sync_wait (THD* thd, uint mask) +{ + if (wsrep_must_sync_wait(thd, mask)) + { + WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait= %u, " + "mask= %u, thd->variables.wsrep_on= %d", + thd->variables.wsrep_sync_wait, mask, + thd->variables.wsrep_on); + /* + This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 + TODO: modify to check if thd has locked any rows. + */ + if (thd->wsrep_cs().sync_wait(-1)) + { + const char* msg; + int err; + + /* + Possibly relevant error codes: + ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY, + ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET, + ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED + */ + + switch (thd->wsrep_cs().current_error()) + { + case wsrep::e_not_supported_error: + msg= "synchronous reads by wsrep backend. " + "Please unset wsrep_causal_reads variable."; + err= ER_NOT_SUPPORTED_YET; + break; + default: + msg= "Synchronous wait failed."; + err= ER_LOCK_WAIT_TIMEOUT; // NOTE: the above msg won't be displayed + // with ER_LOCK_WAIT_TIMEOUT + } + + my_error(err, MYF(0), msg); + + return true; + } + } + + return false; +} + +enum wsrep::provider::status +wsrep_sync_wait_upto (THD* thd, + wsrep_gtid_t* upto, + int timeout) +{ + DBUG_ASSERT(upto); + enum wsrep::provider::status ret; + if (upto) + { + wsrep::gtid upto_gtid(wsrep::id(upto->uuid.data, sizeof(upto->uuid.data)), + wsrep::seqno(upto->seqno)); + ret= Wsrep_server_state::instance().wait_for_gtid(upto_gtid, timeout); + } + else + { + ret= Wsrep_server_state::instance().causal_read(timeout).second; + } + WSREP_DEBUG("wsrep_sync_wait_upto: %d", ret); + return ret; +} + +bool wsrep_is_show_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; +} + +static bool wsrep_is_diagnostic_query(enum enum_sql_command command) +{ + assert(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_DIAGNOSTIC_STMT) != 0; +} + +static enum enum_wsrep_sync_wait +wsrep_sync_wait_mask_for_command(enum enum_sql_command command) +{ + switch (command) + { + case SQLCOM_SELECT: + case SQLCOM_CHECKSUM: + return WSREP_SYNC_WAIT_BEFORE_READ; + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + return WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE; + case SQLCOM_REPLACE: + case SQLCOM_INSERT: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_INSERT_SELECT: + return WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE; + default: + if (wsrep_is_diagnostic_query(command)) + { + return WSREP_SYNC_WAIT_NONE; + } + if (wsrep_is_show_query(command)) + { + switch (command) + { + case SQLCOM_SHOW_PROFILE: + case SQLCOM_SHOW_PROFILES: + case SQLCOM_SHOW_SLAVE_HOSTS: + case SQLCOM_SHOW_RELAYLOG_EVENTS: + case SQLCOM_SHOW_SLAVE_STAT: + case SQLCOM_SHOW_BINLOG_STAT: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_PROCESSLIST: + case SQLCOM_SHOW_PRIVILEGES: + return WSREP_SYNC_WAIT_NONE; + default: + return WSREP_SYNC_WAIT_BEFORE_SHOW; + } + } + } + return WSREP_SYNC_WAIT_NONE; +} + +bool wsrep_sync_wait(THD* thd, enum enum_sql_command command) +{ + bool res = false; + if (WSREP_CLIENT(thd) && thd->variables.wsrep_sync_wait) + res = wsrep_sync_wait(thd, wsrep_sync_wait_mask_for_command(command)); + return res; +} + +void wsrep_keys_free(wsrep_key_arr_t* key_arr) +{ + for (size_t i= 0; i < key_arr->keys_len; ++i) + { + my_free((void*)key_arr->keys[i].key_parts); + } + my_free(key_arr->keys); + key_arr->keys= 0; + key_arr->keys_len= 0; +} + +/*! + * @param thd thread + * @param tables list of tables + * @param keys prepared keys + + * @return true if parent table append was successfull, otherwise false. +*/ +bool +wsrep_append_fk_parent_table(THD* thd, TABLE_LIST* tables, wsrep::key_array* keys) +{ + bool fail= false; + TABLE_LIST *table; + TABLE_LIST *table_last_in_list; + + for (table= tables; table; table= table->next_local) + { + if (is_temporary_table(table)) + { + WSREP_DEBUG("Temporary table %s.%s already opened query=%s", table->db.str, + table->table_name.str, wsrep_thd_query(thd)); + return false; + } + } + + thd->release_transactional_locks(); + uint counter; + MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + + for (table_last_in_list= tables;;table_last_in_list= table_last_in_list->next_local) { + if (!table_last_in_list->next_local) { + break; + } + } + + if (open_tables(thd, &tables, &counter, MYSQL_OPEN_FORCE_SHARED_HIGH_PRIO_MDL)) + { + WSREP_DEBUG("Unable to open table for FK checks for %s", wsrep_thd_query(thd)); + fail= true; + goto exit; + } + + for (table= tables; table; table= table->next_local) + { + if (!is_temporary_table(table) && table->table) + { + FOREIGN_KEY_INFO *f_key_info; + List f_key_list; + + table->table->file->get_foreign_key_list(thd, &f_key_list); + List_iterator_fast it(f_key_list); + while ((f_key_info=it++)) + { + WSREP_DEBUG("appended fkey %s", f_key_info->referenced_table->str); + keys->push_back(wsrep_prepare_key_for_toi(f_key_info->referenced_db->str, + f_key_info->referenced_table->str, + wsrep::key::shared)); + } + } + } + +exit: + /* close the table and release MDL locks */ + close_thread_tables(thd); + thd->mdl_context.rollback_to_savepoint(mdl_savepoint); + bool invalidate_next_global= false; + for (table= tables; table; table= table->next_local) + { + table->table= NULL; + table->mdl_request.ticket= NULL; + // We should invalidate `next_global` only for entries that are added + // in this function + if (table == table_last_in_list) { + invalidate_next_global= true; + } + if (invalidate_next_global) { + table->next_global= NULL; + } + } + + return fail; +} + +bool wsrep_reload_ssl() +{ + try + { + std::string opts= Wsrep_server_state::instance().provider().options(); + if (opts.find("socket.ssl_reload") == std::string::npos) + { + WSREP_DEBUG("Option `socket.ssl_reload` not found in parameters."); + return false; + } + const std::string reload_ssl_param("socket.ssl_reload=1"); + enum wsrep::provider::status ret= Wsrep_server_state::instance().provider().options(reload_ssl_param); + if (ret) + { + WSREP_ERROR("Set options returned %d", ret); + return true; + } + return false; + } + catch (...) + { + WSREP_ERROR("Failed to get provider options"); + return true; + } +} + +bool wsrep_split_allowlist(std::vector& allowlist) +{ + if (!wsrep_allowlist || 0 == strlen(wsrep_allowlist)) + { + return false; + } + std::istringstream ss{wsrep_allowlist}; + std::string token; + while (std::getline(ss, token, ',')) + { + if (!token.empty()) + { + struct sockaddr_in sa_4; + struct sockaddr_in6 sa_6; + if ((inet_pton(AF_INET, token.c_str(), &(sa_4.sin_addr)) != 0) || + (inet_pton(AF_INET6, token.c_str(), &(sa_6.sin6_addr)) != 0)) + { + allowlist.push_back(token); + } + else + { + WSREP_WARN("Invalid IP address %s provided in `wsrep_allowlist` variable", token.c_str()); + } + } + } + return allowlist.size(); +} + +/*! + * @param db Database string + * @param table Table string + * @param key Array of wsrep_key_t + * @param key_len In: number of elements in key array, Out: number of + * elements populated + * + * @return true if preparation was successful, otherwise false. + */ + +static bool wsrep_prepare_key_for_isolation(const char* db, + const char* table, + wsrep_buf_t* key, + size_t* key_len) +{ + if (*key_len < 2) return false; + + switch (wsrep_protocol_version) + { + case 0: + *key_len= 0; + break; + case 1: + case 2: + case 3: + case 4: + { + *key_len= 0; + if (db) + { + key[*key_len].ptr= db; + key[*key_len].len= strlen(db); + ++(*key_len); + if (table) + { + key[*key_len].ptr= table; + key[*key_len].len= strlen(table); + ++(*key_len); + } + } + break; + } + default: + assert(0); + WSREP_ERROR("Unsupported protocol version: %ld", wsrep_protocol_version); + unireg_abort(1); + return false; + } + + return true; +} + +static bool wsrep_prepare_key_for_isolation(const char* db, + const char* table, + wsrep_key_arr_t* ka) +{ + wsrep_key_t* tmp; + tmp= (wsrep_key_t*)my_realloc(PSI_INSTRUMENT_ME, ka->keys, + (ka->keys_len + 1) * sizeof(wsrep_key_t), + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("Can't allocate memory for key_array"); + return false; + } + ka->keys= tmp; + if (!(ka->keys[ka->keys_len].key_parts= (wsrep_buf_t*) + my_malloc(PSI_INSTRUMENT_ME, sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + return false; + } + ka->keys[ka->keys_len].key_parts_num= 2; + ++ka->keys_len; + if (!wsrep_prepare_key_for_isolation(db, table, + (wsrep_buf_t*)ka->keys[ka->keys_len - 1].key_parts, + &ka->keys[ka->keys_len - 1].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + return false; + } + + return true; +} + +static bool wsrep_prepare_keys_for_alter_add_fk(const char* child_table_db, + const Alter_info* alter_info, + wsrep_key_arr_t* ka) +{ + Key *key; + List_iterator key_iterator(const_cast(alter_info)->key_list); + while ((key= key_iterator++)) + { + if (key->type == Key::FOREIGN_KEY) + { + Foreign_key *fk_key= (Foreign_key *)key; + const char *db_name= fk_key->ref_db.str; + const char *table_name= fk_key->ref_table.str; + if (!db_name) + { + db_name= child_table_db; + } + if (!wsrep_prepare_key_for_isolation(db_name, table_name, ka)) + { + return false; + } + } + } + return true; +} + +static bool wsrep_prepare_keys_for_isolation(THD* thd, + const char* db, + const char* table, + const TABLE_LIST* table_list, + Alter_info* alter_info, + wsrep_key_arr_t* ka) +{ + ka->keys= 0; + ka->keys_len= 0; + + if (db || table) + { + if (!wsrep_prepare_key_for_isolation(db, table, ka)) + goto err; + } + + for (const TABLE_LIST* table= table_list; table; table= table->next_global) + { + if (!wsrep_prepare_key_for_isolation(table->db.str, table->table_name.str, ka)) + goto err; + } + + if (alter_info) + { + if (!wsrep_prepare_keys_for_alter_add_fk(table_list->db.str, alter_info, ka)) + goto err; + } + return false; + +err: + wsrep_keys_free(ka); + return true; +} + +/* + * Prepare key list from db/table and table_list + * + * Return zero in case of success, 1 in case of failure. + */ + +bool wsrep_prepare_keys_for_isolation(THD* thd, + const char* db, + const char* table, + const TABLE_LIST* table_list, + wsrep_key_arr_t* ka) +{ + return wsrep_prepare_keys_for_isolation(thd, db, table, table_list, NULL, ka); +} + +bool wsrep_prepare_key(const uchar* cache_key, size_t cache_key_len, + const uchar* row_id, size_t row_id_len, + wsrep_buf_t* key, size_t* key_len) +{ + if (*key_len < 3) return false; + + *key_len= 0; + switch (wsrep_protocol_version) + { + case 0: + { + key[0].ptr= cache_key; + key[0].len= cache_key_len; + + *key_len= 1; + break; + } + case 1: + case 2: + case 3: + case 4: + { + key[0].ptr= cache_key; + key[0].len= strlen( (char*)cache_key ); + + key[1].ptr= cache_key + strlen( (char*)cache_key ) + 1; + key[1].len= strlen( (char*)(key[1].ptr) ); + + *key_len= 2; + break; + } + default: + return false; + } + + key[*key_len].ptr= row_id; + key[*key_len].len= row_id_len; + ++(*key_len); + + return true; +} + +bool wsrep_prepare_key_for_innodb(THD* thd, + const uchar* cache_key, + size_t cache_key_len, + const uchar* row_id, + size_t row_id_len, + wsrep_buf_t* key, + size_t* key_len) +{ + + return wsrep_prepare_key(cache_key, cache_key_len, row_id, row_id_len, key, key_len); +} + +wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, + enum wsrep::key::type type) +{ + wsrep::key ret(type); + DBUG_ASSERT(db); + ret.append_key_part(db, strlen(db)); + if (table) ret.append_key_part(table, strlen(table)); + return ret; +} + +wsrep::key_array +wsrep_prepare_keys_for_alter_add_fk(const char* child_table_db, + const Alter_info* alter_info) + +{ + wsrep::key_array ret; + Key *key; + List_iterator key_iterator(const_cast(alter_info)->key_list); + while ((key= key_iterator++)) + { + if (key->type == Key::FOREIGN_KEY) + { + Foreign_key *fk_key= (Foreign_key *)key; + const char *db_name= fk_key->ref_db.str; + const char *table_name= fk_key->ref_table.str; + if (!db_name) + { + db_name= child_table_db; + } + ret.push_back(wsrep_prepare_key_for_toi(db_name, table_name, + wsrep::key::exclusive)); + } + } + return ret; +} + +wsrep::key_array wsrep_prepare_keys_for_toi(const char *db, + const char *table, + const TABLE_LIST *table_list, + const Alter_info *alter_info, + const wsrep::key_array *fk_tables) +{ + wsrep::key_array ret; + if (db || table) + { + ret.push_back(wsrep_prepare_key_for_toi(db, table, wsrep::key::exclusive)); + } + for (const TABLE_LIST* table= table_list; table; table= table->next_global) + { + ret.push_back(wsrep_prepare_key_for_toi(table->db.str, table->table_name.str, + wsrep::key::exclusive)); + } + if (alter_info) + { + wsrep::key_array fk(wsrep_prepare_keys_for_alter_add_fk(table_list->db.str, alter_info)); + if (!fk.empty()) + { + ret.insert(ret.end(), fk.begin(), fk.end()); + } + } + if (fk_tables && !fk_tables->empty()) + { + ret.insert(ret.end(), fk_tables->begin(), fk_tables->end()); + } + return ret; +} + +/* + * Construct Query_log_Event from thd query and serialize it + * into buffer. + * + * Return 0 in case of success, 1 in case of error. + */ +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len) +{ + IO_CACHE tmp_io_cache; + Log_event_writer writer(&tmp_io_cache, 0); + if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX, + 65536, MYF(MY_WME))) + return 1; + int ret(0); + enum enum_binlog_checksum_alg current_binlog_check_alg= + (enum_binlog_checksum_alg) binlog_checksum_options; + + Format_description_log_event *tmp_fd= new Format_description_log_event(4); + tmp_fd->checksum_alg= current_binlog_check_alg; + writer.write(tmp_fd); + delete tmp_fd; + +#ifdef GTID_SUPPORT + if (thd->variables.gtid_next.type == GTID_GROUP) + { + Gtid_log_event gtid_ev(thd, FALSE, &thd->variables.gtid_next); + if (!gtid_ev.is_valid()) ret= 0; + if (!ret && writer.write(>id_ev)) ret= 1; + } +#endif /* GTID_SUPPORT */ + /* + * Check if this is applier thread, slave_thread or + * we have set manually WSREP GTID seqno. Add GTID event. + */ + if (thd->slave_thread || wsrep_thd_is_applying(thd) || + thd->variables.wsrep_gtid_seq_no) + { + uint64 seqno= thd->variables.gtid_seq_no; + uint32 domain_id= thd->variables.gtid_domain_id; + uint32 server_id= thd->variables.server_id; + if (!thd->variables.gtid_seq_no && thd->variables.wsrep_gtid_seq_no) + { + seqno= thd->variables.wsrep_gtid_seq_no; + domain_id= wsrep_gtid_server.domain_id; + server_id= wsrep_gtid_server.server_id; + } + Gtid_log_event gtid_event(thd, seqno, domain_id, true, + LOG_EVENT_SUPPRESS_USE_F, true, 0); + gtid_event.server_id= server_id; + if (!gtid_event.is_valid()) ret= 0; + ret= writer.write(>id_event); + } + /* + It's local DDL so in case of possible gtid seqno (SET gtid_seq_no=X) + manipulation, seqno value will be ignored. + */ + else + { + thd->variables.gtid_seq_no= 0; + } + + /* if there is prepare query, add event for it */ + if (!ret && thd->wsrep_TOI_pre_query) + { + Query_log_event ev(thd, thd->wsrep_TOI_pre_query, + thd->wsrep_TOI_pre_query_len, + FALSE, FALSE, FALSE, 0); + ev.checksum_alg= current_binlog_check_alg; + if (writer.write(&ev)) ret= 1; + } + + /* continue to append the actual query */ + Query_log_event ev(thd, query, query_len, FALSE, FALSE, FALSE, 0); + /* WSREP GTID mode, we need to change server_id */ + if (wsrep_gtid_mode && !thd->variables.gtid_seq_no) + ev.server_id= wsrep_gtid_server.server_id; + ev.checksum_alg= current_binlog_check_alg; + if (!ret && writer.write(&ev)) ret= 1; + if (!ret && wsrep_write_cache_buf(&tmp_io_cache, buf, buf_len)) ret= 1; + close_cached_file(&tmp_io_cache); + return ret; +} + +static int +wsrep_alter_query_string(THD *thd, String *buf) +{ + /* Append the "ALTER" part of the query */ + if (buf->append(STRING_WITH_LEN("ALTER "))) + return 1; + /* Append definer */ + append_definer(thd, buf, &(thd->lex->definer->user), &(thd->lex->definer->host)); + /* Append the left part of thd->query after event name part */ + if (buf->append(thd->lex->stmt_definition_begin, + thd->lex->stmt_definition_end - + thd->lex->stmt_definition_begin)) + return 1; + + return 0; +} + +static int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + + if (wsrep_alter_query_string(thd, &log_query)) + { + WSREP_WARN("events alter string failed: schema: %s, query: %s", + thd->get_db(), thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} + +#include "sql_show.h" +static int +create_view_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + SELECT_LEX *select_lex= lex->first_select_lex(); + TABLE_LIST *first_table= select_lex->table_list.first; + TABLE_LIST *views= first_table; + LEX_USER *definer; + String buff; + const LEX_CSTRING command[3]= + {{ STRING_WITH_LEN("CREATE ") }, + { STRING_WITH_LEN("ALTER ") }, + { STRING_WITH_LEN("CREATE OR REPLACE ") }}; + + buff.append(&command[thd->lex->create_view->mode]); + + if (lex->definer) + definer= get_current_user(thd, lex->definer); + else + { + /* + DEFINER-clause is missing; we have to create default definer in + persistent arena to be PS/SP friendly. + If this is an ALTER VIEW then the current user should be set as + the definer. + */ + definer= create_default_definer(thd, false); + } + + if (definer) + { + views->definer.user= definer->user; + views->definer.host= definer->host; + } else { + WSREP_ERROR("Failed to get DEFINER for VIEW."); + return 1; + } + + views->algorithm = lex->create_view->algorithm; + views->view_suid = lex->create_view->suid; + views->with_check = lex->create_view->check; + + view_store_options(thd, views, &buff); + buff.append(STRING_WITH_LEN("VIEW ")); + /* Test if user supplied a db (ie: we did not use thd->db) */ + if (views->db.str && views->db.str[0] && + (thd->db.str == NULL || cmp(&views->db, &thd->db))) + { + append_identifier(thd, &buff, &views->db); + buff.append('.'); + } + append_identifier(thd, &buff, &views->table_name); + if (lex->view_list.elements) + { + List_iterator_fast names(lex->view_list); + LEX_CSTRING *name; + int i; + + buff.append('('); + for (i= 0; (name= names++); i++) + { + append_identifier(thd, &buff, name); + buff.append(", ", 2); + } + if (i) + buff.length(buff.length()-2); + buff.append(')'); + } + buff.append(STRING_WITH_LEN(" AS ")); + buff.append(thd->lex->create_view->select.str, + thd->lex->create_view->select.length); + return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len); +} + +/* + Rewrite DROP TABLE for TOI. Temporary tables are eliminated from + the query as they are visible only to client connection. + + TODO: See comments for sql_base.cc:drop_temporary_table() and refine + the function to deal with transactional locked tables. + */ +static int wsrep_drop_table_query(THD* thd, uchar** buf, size_t* buf_len) +{ + + LEX* lex= thd->lex; + SELECT_LEX* select_lex= lex->first_select_lex(); + TABLE_LIST* first_table= select_lex->table_list.first; + String buff; + + DBUG_ASSERT(!lex->create_info.tmp_table()); + + bool found_temp_table= false; + for (TABLE_LIST* table= first_table; table; table= table->next_global) + { + if (thd->find_temporary_table(table->db.str, table->table_name.str)) + { + found_temp_table= true; + break; + } + } + + if (found_temp_table) + { + buff.append(STRING_WITH_LEN("DROP TABLE ")); + if (lex->check_exists) + buff.append(STRING_WITH_LEN("IF EXISTS ")); + + for (TABLE_LIST* table= first_table; table; table= table->next_global) + { + if (!thd->find_temporary_table(table->db.str, table->table_name.str)) + { + append_identifier(thd, &buff, table->db.str, table->db.length); + buff.append('.'); + append_identifier(thd, &buff, + table->table_name.str, table->table_name.length); + buff.append(','); + } + } + + /* Chop the last comma */ + buff.chop(); + buff.append(STRING_WITH_LEN(" /* generated by wsrep */")); + + WSREP_DEBUG("Rewrote '%s' as '%s'", thd->query(), buff.ptr()); + + return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len); + } + else + { + return wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), + buf, buf_len); + } +} + + +/* Forward declarations. */ +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); + +bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list) +{ + if (WSREP(thd)) + { + for (const TABLE_LIST* it= table_list; it; it= it->next_global) + { + if (it->table && + !wsrep_should_replicate_ddl(thd, it->table->s->db_type())) + return false; + } + } + return true; +} + +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *hton) +{ + if (!wsrep_check_mode(WSREP_MODE_STRICT_REPLICATION)) + return true; + + if (!hton) + return true; + + switch (hton->db_type) + { + case DB_TYPE_INNODB: + return true; + break; + case DB_TYPE_MYISAM: + if (wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) + return true; + else + WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); + break; + case DB_TYPE_ARIA: + if (wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA)) + return true; + else + WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); + break; + default: + WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); + break; + } + + /* wsrep_mode = STRICT_REPLICATION, treat as error */ + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_ILLEGAL_HA, + "WSREP: wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s not supported.", + ha_resolve_storage_engine_name(hton)); + return false; +} +/* + Decide if statement should run in TOI. + + Look if table or table_list contain temporary tables. If the + statement affects only temporary tables, statement should not run + in TOI. If the table list contains mix of regular and temporary tables + (DROP TABLE, OPTIMIZE, ANALYZE), statement should be run in TOI but + should be rewritten at later time for replication to contain only + non-temporary tables. + */ +bool wsrep_can_run_in_toi(THD *thd, const char *db, const char *table, + const TABLE_LIST *table_list, + const HA_CREATE_INFO* create_info) +{ + DBUG_ASSERT(!table || db); + DBUG_ASSERT(table_list || db); + + LEX* lex= thd->lex; + SELECT_LEX* select_lex= lex->first_select_lex(); + const TABLE_LIST* first_table= select_lex->table_list.first; + + switch (lex->sql_command) + { + case SQLCOM_CREATE_TABLE: + if (thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) + { + return false; + } + if (!wsrep_should_replicate_ddl(thd, create_info->db_type)) + { + return false; + } + /* + If mariadb master has replicated a CTAS, we should not replicate the create table + part separately as TOI, but to replicate both create table and following inserts + as one write set. + Howver, if CTAS creates empty table, we should replicate the create table alone + as TOI. We have to do relay log event lookup to see if row events follow the + create table event. + */ + if (thd->slave_thread && + !(thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_STANDALONE)) + { + /* this is CTAS, either empty or populated table */ + ulonglong event_size = 0; + enum Log_event_type ev_type= wsrep_peak_event(thd->rgi_slave, &event_size); + switch (ev_type) + { + case QUERY_EVENT: + /* CTAS with empty table, we replicate create table as TOI */ + break; + + case TABLE_MAP_EVENT: + WSREP_DEBUG("replicating CTAS of empty table as TOI"); + // fall through + case WRITE_ROWS_EVENT: + /* CTAS with populated table, we replicate later at commit time */ + WSREP_DEBUG("skipping create table of CTAS replication"); + return false; + + default: + WSREP_WARN("unexpected async replication event: %d", ev_type); + } + return true; + } + /* no next async replication event */ + return true; + break; + case SQLCOM_CREATE_VIEW: + + DBUG_ASSERT(!table_list); + DBUG_ASSERT(first_table); /* First table is view name */ + /* + If any of the remaining tables refer to temporary table error + is returned to client, so TOI can be skipped + */ + for (const TABLE_LIST* it= first_table->next_global; it; it= it->next_global) + { + if (thd->find_temporary_table(it)) + { + return false; + } + } + return true; + break; + case SQLCOM_CREATE_TRIGGER: + + DBUG_ASSERT(first_table); + + if (thd->find_temporary_table(first_table)) + { + return false; + } + return true; + break; + case SQLCOM_DROP_TRIGGER: + DBUG_ASSERT(table_list); + if (thd->find_temporary_table(table_list)) + { + return false; + } + return true; + break; + case SQLCOM_ALTER_TABLE: + if (create_info) + { + const handlerton *hton= create_info->db_type; + + if (!hton) + hton= ha_default_handlerton(thd); + if (!wsrep_should_replicate_ddl(thd, hton)) + return false; + } + /* fallthrough */ + default: + if (table && !thd->find_temporary_table(db, table)) + { + return true; + } + + if (table_list) + { + for (const TABLE_LIST* table= first_table; table; table= table->next_global) + { + if (!thd->find_temporary_table(table->db.str, table->table_name.str)) + { + return true; + } + } + } + + return !(table || table_list); + break; + case SQLCOM_CREATE_SEQUENCE: + /* No TOI for temporary sequences as they are + not replicated */ + if (thd->lex->tmp_table()) + { + return false; + } + return true; + + } +} + +static int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + sp_head *sp= thd->lex->sphead; + sql_mode_t saved_mode= thd->variables.sql_mode; + String retstr(64); + LEX_CSTRING returns= empty_clex_str; + retstr.set_charset(system_charset_info); + + log_query.set_charset(system_charset_info); + + if (sp->m_handler->type() == SP_TYPE_FUNCTION) + { + sp_returns_type(thd, retstr, sp); + retstr.get_value(&returns); + } + if (sp->m_handler-> + show_create_sp(thd, &log_query, + sp->m_explicit_name ? sp->m_db : null_clex_str, + sp->m_name, sp->m_params, returns, + sp->m_body, sp->chistics(), + thd->lex->definer[0], + thd->lex->create_info, + saved_mode)) + { + WSREP_WARN("SP create string failed: schema: %s, query: %s", + thd->get_db(), thd->query()); + return 1; + } + + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} + +static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len) +{ + int err; + switch (thd->lex->sql_command) + { + case SQLCOM_CREATE_VIEW: + err= create_view_query(thd, buf, buf_len); + break; + case SQLCOM_CREATE_PROCEDURE: + case SQLCOM_CREATE_SPFUNCTION: + err= wsrep_create_sp(thd, buf, buf_len); + break; + case SQLCOM_CREATE_TRIGGER: + err= wsrep_create_trigger_query(thd, buf, buf_len); + break; + case SQLCOM_CREATE_EVENT: + err= wsrep_create_event_query(thd, buf, buf_len); + break; + case SQLCOM_ALTER_EVENT: + err= wsrep_alter_event_query(thd, buf, buf_len); + break; + case SQLCOM_DROP_TABLE: + err= wsrep_drop_table_query(thd, buf, buf_len); + break; + case SQLCOM_CREATE_ROLE: + if (sp_process_definer(thd)) + { + WSREP_WARN("Failed to set CREATE ROLE definer for TOI."); + } + /* fallthrough */ + default: + err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), buf, + buf_len); + break; + } + + return err; +} + +static void wsrep_TOI_begin_failed(THD* thd, const wsrep_buf_t* /* const err */) +{ + if (wsrep_thd_trx_seqno(thd) > 0) + { + /* GTID was granted and TO acquired - need to log event and release TO */ + if (wsrep_emulate_bin_log) wsrep_thd_binlog_trx_reset(thd); + if (wsrep_write_dummy_event(thd, "TOI begin failed")) { goto fail; } + wsrep::client_state& cs(thd->wsrep_cs()); + std::string const err(wsrep::to_c_string(cs.current_error())); + wsrep::mutable_buffer err_buf; + err_buf.push_back(err); + int const ret= cs.leave_toi_local(err_buf); + if (ret) + { + WSREP_ERROR("Leaving critical section for failed TOI failed: thd: %lld, " + "schema: %s, SQL: %s, rcode: %d wsrep_error: %s", + (long long)thd->real_id, thd->db.str, + thd->query(), ret, err.c_str()); + goto fail; + } + } + return; +fail: + WSREP_ERROR("Failed to release TOI resources. Need to abort."); + unireg_abort(1); +} + + +/* + returns: + 0: statement was replicated as TOI + 1: TOI replication was skipped + -1: TOI replication failed + */ +static int wsrep_TOI_begin(THD *thd, const char *db, const char *table, + const TABLE_LIST *table_list, + const Alter_info *alter_info, + const wsrep::key_array *fk_tables, + const HA_CREATE_INFO *create_info) +{ + DBUG_ASSERT(wsrep_OSU_method_get(thd) == WSREP_OSU_TOI); + + WSREP_DEBUG("TOI Begin: %s", wsrep_thd_query(thd)); + + if (wsrep_can_run_in_toi(thd, db, table, table_list, create_info) == false) + { + WSREP_DEBUG("No TOI for %s", wsrep_thd_query(thd)); + return 1; + } + + uchar* buf= 0; + size_t buf_len(0); + int buf_err; + int rc; + + buf_err= wsrep_TOI_event_buf(thd, &buf, &buf_len); + + if (buf_err) { + WSREP_ERROR("Failed to create TOI event buf: %d", buf_err); + my_message(ER_UNKNOWN_ERROR, + "WSREP replication failed to prepare TOI event buffer. " + "Check your query.", + MYF(0)); + return -1; + } + + struct wsrep_buf buff= { buf, buf_len }; + + wsrep::key_array key_array= + wsrep_prepare_keys_for_toi(db, table, table_list, alter_info, fk_tables); + + if (thd->has_read_only_protection()) + { + /* non replicated DDL, affecting temporary tables only */ + WSREP_DEBUG("TO isolation skipped, sql: %s." + "Only temporary tables affected.", + wsrep_thd_query(thd)); + if (buf) my_free(buf); + return -1; + } + + thd_proc_info(thd, "acquiring total order isolation"); + WSREP_DEBUG("wsrep_TOI_begin for %s", wsrep_thd_query(thd)); + THD_STAGE_INFO(thd, stage_waiting_isolation); + + wsrep::client_state& cs(thd->wsrep_cs()); + + int ret= cs.enter_toi_local(key_array, + wsrep::const_buffer(buff.ptr, buff.len)); + + if (ret) + { + DBUG_ASSERT(cs.current_error()); + WSREP_DEBUG("to_execute_start() failed for %llu: %s, seqno: %lld", + thd->thread_id, wsrep_thd_query(thd), + (long long)wsrep_thd_trx_seqno(thd)); + + /* jump to error handler in mysql_execute_command() */ + switch (cs.current_error()) + { + case wsrep::e_size_exceeded_error: + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. " + "Maximum size exceeded.", + ret, + (thd->db.str ? thd->db.str : "(null)"), + wsrep_thd_query(thd)); + my_error(ER_UNKNOWN_ERROR, MYF(0), "Maximum writeset size exceeded"); + break; + case wsrep::e_deadlock_error: + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. " + "Deadlock error.", + ret, + (thd->db.str ? thd->db.str : "(null)"), + wsrep_thd_query(thd)); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + break; + case wsrep::e_timeout_error: + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. " + "Operation timed out.", + ret, + (thd->db.str ? thd->db.str : "(null)"), + wsrep_thd_query(thd)); + my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); + break; + default: + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. " + "Check your wsrep connection state and retry the query.", + ret, + (thd->db.str ? thd->db.str : "(null)"), + wsrep_thd_query(thd)); + + if (!thd->is_error()) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check " + "your wsrep connection state and retry the query."); + } + } + rc= -1; + } + else { + if (!thd->variables.gtid_seq_no) + { + uint64 seqno= 0; + if (thd->variables.wsrep_gtid_seq_no && + thd->variables.wsrep_gtid_seq_no > wsrep_gtid_server.seqno()) + { + seqno= thd->variables.wsrep_gtid_seq_no; + wsrep_gtid_server.seqno(thd->variables.wsrep_gtid_seq_no); + } + else + { + seqno= wsrep_gtid_server.seqno_inc(); + } + thd->variables.wsrep_gtid_seq_no= 0; + thd->wsrep_current_gtid_seqno= seqno; + if (mysql_bin_log.is_open() && wsrep_gtid_mode) + { + thd->variables.gtid_seq_no= seqno; + thd->variables.gtid_domain_id= wsrep_gtid_server.domain_id; + thd->variables.server_id= wsrep_gtid_server.server_id; + } + } + ++wsrep_to_isolation; + rc= 0; + } + + if (buf) my_free(buf); + + if (rc) wsrep_TOI_begin_failed(thd, NULL); + + return rc; +} + +static void wsrep_TOI_end(THD *thd) { + wsrep_to_isolation--; + wsrep::client_state& client_state(thd->wsrep_cs()); + DBUG_ASSERT(wsrep_thd_is_local_toi(thd)); + WSREP_DEBUG("TO END: %lld: %s", client_state.toi_meta().seqno().get(), + wsrep_thd_query(thd)); + + wsrep_gtid_server.signal_waiters(thd->wsrep_current_gtid_seqno, false); + + if (wsrep_thd_is_local_toi(thd)) + { + wsrep::mutable_buffer err; + + thd->wsrep_last_written_gtid_seqno= thd->wsrep_current_gtid_seqno; + wsrep_set_SE_checkpoint(client_state.toi_meta().gtid(), wsrep_gtid_server.gtid()); + + if (thd->is_error() && !wsrep_must_ignore_error(thd)) + { + wsrep_store_error(thd, err); + } + + int const ret= client_state.leave_toi_local(err); + + if (!ret) + { + WSREP_DEBUG("TO END: %lld", client_state.toi_meta().seqno().get()); + } + else + { + WSREP_WARN("TO isolation end failed for: %d, schema: %s, sql: %s", + ret, (thd->db.str ? thd->db.str : "(null)"), wsrep_thd_query(thd)); + } + } +} + +static int wsrep_RSU_begin(THD *thd, const char *db_, const char *table_) +{ + WSREP_DEBUG("RSU BEGIN: %lld, : %s", wsrep_thd_trx_seqno(thd), + wsrep_thd_query(thd)); + + /* For CREATE TEMPORARY SEQUENCE we do not start RSU because + object is local only and actually CREATE TABLE + INSERT + */ + if (thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE && + thd->lex->tmp_table()) + return 1; + + if (thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && + thd->variables.sql_log_bin == 1 && + wsrep_check_mode(WSREP_MODE_DISALLOW_LOCAL_GTID)) + { + /* wsrep_mode = WSREP_MODE_DISALLOW_LOCAL_GTID, treat as error */ + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "You can't execute statements that would generate local " + "GTIDs when wsrep_mode = DISALLOW_LOCAL_GTID is set. " + "Try disabling binary logging with SET sql_log_bin=0 " + "to execute this statement."); + + return -1; + } + + if (thd->wsrep_cs().begin_rsu(5000)) + { + WSREP_WARN("RSU begin failed"); + } + else + { + thd->variables.wsrep_on= 0; + } + return 0; +} + +static void wsrep_RSU_end(THD *thd) +{ + WSREP_DEBUG("RSU END: %lld : %s", wsrep_thd_trx_seqno(thd), + wsrep_thd_query(thd)); + if (thd->wsrep_cs().end_rsu()) + { + WSREP_WARN("Failed to end RSU, server may need to be restarted"); + } + thd->variables.wsrep_on= 1; +} + +static inline bool is_replaying_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_thd_data); + ret= (thd->wsrep_trx().state() == wsrep::transaction::s_replaying) ? true : false; + mysql_mutex_unlock(&thd->LOCK_thd_data); + + return ret; +} + +int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, + const TABLE_LIST* table_list, + const Alter_info *alter_info, + const wsrep::key_array *fk_tables, + const HA_CREATE_INFO *create_info) +{ + /* + No isolation for applier or replaying threads. + */ + if (!wsrep_thd_is_local(thd)) + { + if (wsrep_OSU_method_get(thd) == WSREP_OSU_TOI) + WSREP_DEBUG("%s TOI Begin: %s", + is_replaying_connection(thd) ? "Replay" : "Apply", + wsrep_thd_query(thd)); + + return 0; + } + + if (thd->wsrep_parallel_slave_wait_for_prior_commit()) + { + WSREP_WARN("TOI: wait_for_prior_commit() returned error."); + return -1; + } + + int ret= 0; + + mysql_mutex_lock(&thd->LOCK_thd_data); + + if (thd->wsrep_trx().state() == wsrep::transaction::s_must_abort) + { + WSREP_INFO("thread: %lld schema: %s query: %s has been aborted due to multi-master conflict", + (longlong) thd->thread_id, thd->get_db(), thd->query()); + mysql_mutex_unlock(&thd->LOCK_thd_data); + return WSREP_TRX_FAIL; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + + DBUG_ASSERT(wsrep_thd_is_local(thd)); + DBUG_ASSERT(thd->wsrep_trx().ws_meta().seqno().is_undefined()); + + if (Wsrep_server_state::instance().desynced_on_pause()) + { + my_message(ER_UNKNOWN_COM_ERROR, + "Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.", MYF(0)); + WSREP_DEBUG("Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.: %s %llu", + wsrep_thd_query(thd), thd->thread_id); + return -1; + } + + /* If we are inside LOCK TABLE we release it and give warning. */ + if (thd->variables.option_bits & OPTION_TABLE_LOCK && + thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE) + { + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "Galera cluster does not support LOCK TABLE on " + "SEQUENCES. Lock is released."); + } + if (wsrep_debug && thd->mdl_context.has_locks()) + { + WSREP_DEBUG("thread holds MDL locks at TO begin: %s %llu", + wsrep_thd_query(thd), thd->thread_id); + } + + /* + It makes sense to set auto_increment_* to defaults in TOI operations. + Must be done before wsrep_TOI_begin() since Query_log_event encapsulating + TOI statement and auto inc variables for wsrep replication is constructed + there. Variables are reset back in THD::reset_for_next_command() before + processing of next command. + */ + if (wsrep_auto_increment_control) + { + thd->variables.auto_increment_offset= 1; + thd->variables.auto_increment_increment= 1; + } + + if (thd->variables.wsrep_on && wsrep_thd_is_local(thd)) + { + switch (wsrep_OSU_method_get(thd)) { + case WSREP_OSU_TOI: + ret= wsrep_TOI_begin(thd, db_, table_, table_list, alter_info, fk_tables, + create_info); + break; + case WSREP_OSU_RSU: + ret= wsrep_RSU_begin(thd, db_, table_); + break; + default: + WSREP_ERROR("Unsupported OSU method: %lu", + wsrep_OSU_method_get(thd)); + ret= -1; + break; + } + + switch (ret) { + case 0: /* wsrep_TOI_begin should set toi mode */ + if (thd->variables.wsrep_OSU_method == WSREP_OSU_TOI) + { + /* + TOI operations ignore the provided lock_wait_timeout once replicated, + and restore it after operation is done. + */ + thd->variables.saved_lock_wait_timeout= thd->variables.lock_wait_timeout; + thd->variables.lock_wait_timeout= LONG_TIMEOUT; + } + break; + case 1: + /* TOI replication skipped, treat as success */ + ret= 0; + break; + case -1: + /* TOI replication failed, treat as error */ + break; + } + } + + return ret; +} + +void wsrep_to_isolation_end(THD *thd) +{ + if (wsrep_thd_is_local_toi(thd)) + { + thd->variables.lock_wait_timeout= thd->variables.saved_lock_wait_timeout; + DBUG_ASSERT(wsrep_OSU_method_get(thd) == WSREP_OSU_TOI); + wsrep_TOI_end(thd); + } + else if (wsrep_thd_is_in_rsu(thd)) + { + thd->variables.lock_wait_timeout= thd->variables.saved_lock_wait_timeout; + DBUG_ASSERT(wsrep_OSU_method_get(thd) == WSREP_OSU_RSU); + wsrep_RSU_end(thd); + } + else + { + /* Applier or replaying threads just output TO END */ + if (wsrep_debug) + { + wsrep::client_state& client_state(thd->wsrep_cs()); + WSREP_DEBUG("%s TO END: %lld: %s", + is_replaying_connection(thd) ? "Replay" : "Apply", + client_state.toi_meta().seqno().get(), + wsrep_thd_query(thd)); + } + return; + } + + if (wsrep_emulate_bin_log) wsrep_thd_binlog_trx_reset(thd); +} + +#define WSREP_MDL_LOG(severity, msg, schema, schema_len, req, gra) \ + WSREP_##severity( \ + "%s\n" \ + "schema: %.*s\n" \ + "request: (%llu \tseqno %lld \twsrep (%s, %s, %s) cmd %d %d \t%s)\n" \ + "granted: (%llu \tseqno %lld \twsrep (%s, %s, %s) cmd %d %d \t%s)", \ + msg, schema_len, schema, \ + req->thread_id, (long long)wsrep_thd_trx_seqno(req), \ + wsrep_thd_client_mode_str(req), wsrep_thd_client_state_str(req), wsrep_thd_transaction_state_str(req), \ + req->get_command(), req->lex->sql_command, req->query(), \ + gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \ + wsrep_thd_client_mode_str(gra), wsrep_thd_client_state_str(gra), wsrep_thd_transaction_state_str(gra), \ + gra->get_command(), gra->lex->sql_command, gra->query()); + +/** + Check if request for the metadata lock should be granted to the requester. + + @param requestor_ctx The MDL context of the requestor + @param ticket MDL ticket for the requested lock + + @retval TRUE Lock request can be granted + @retval FALSE Lock request cannot be granted +*/ + +void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, + const MDL_ticket *ticket, + const MDL_key *key) +{ + THD *request_thd= requestor_ctx->get_thd(); + THD *granted_thd= ticket->get_ctx()->get_thd(); + + /* Fallback to the non-wsrep behaviour */ + if (!WSREP(request_thd)) return; + + const char* schema= key->db_name(); + int schema_len= key->db_name_length(); + + mysql_mutex_lock(&request_thd->LOCK_thd_data); + + if (wsrep_thd_is_toi(request_thd) || + wsrep_thd_is_applying(request_thd)) + { + WSREP_DEBUG("wsrep_handle_mdl_conflict request TOI/APPLY for %s", + wsrep_thd_query(request_thd)); + THD_STAGE_INFO(request_thd, stage_waiting_isolation); + mysql_mutex_unlock(&request_thd->LOCK_thd_data); + WSREP_MDL_LOG(DEBUG, "MDL conflict ", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + + DEBUG_SYNC(request_thd, "before_wsrep_thd_abort"); + DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", { + const char act[]= "now " + "SIGNAL sync.before_wsrep_thd_abort_reached " + "WAIT_FOR signal.before_wsrep_thd_abort"; + DBUG_ASSERT(!debug_sync_set_action(request_thd, STRING_WITH_LEN(act))); + };); + + /* Here we will call wsrep_abort_transaction so we should hold + THD::LOCK_thd_data to protect victim from concurrent usage + and THD::LOCK_thd_kill to protect from disconnect or delete. + + */ + mysql_mutex_lock(&granted_thd->LOCK_thd_kill); + mysql_mutex_lock(&granted_thd->LOCK_thd_data); + + if (wsrep_thd_is_toi(granted_thd) || + wsrep_thd_is_applying(granted_thd)) + { + if (wsrep_thd_is_aborting(granted_thd)) + { + WSREP_DEBUG("BF thread waiting for SR in aborting state for %s", + wsrep_thd_query(request_thd)); + THD_STAGE_INFO(request_thd, stage_waiting_isolation); + ticket->wsrep_report(wsrep_debug); + } + else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) + { + WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", + schema, schema_len, request_thd, granted_thd); + WSREP_DEBUG("wsrep_handle_mdl_conflict DDL vs SR for %s", + wsrep_thd_query(request_thd)); + THD_STAGE_INFO(request_thd, stage_waiting_isolation); + wsrep_abort_thd(request_thd, granted_thd, 1); + } + else + { + WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); + unireg_abort(1); + } + } + else if (granted_thd->lex->sql_command == SQLCOM_FLUSH || + granted_thd->mdl_context.has_explicit_locks()) + { + WSREP_DEBUG("BF thread waiting for FLUSH for %s", + wsrep_thd_query(request_thd)); + THD_STAGE_INFO(request_thd, stage_waiting_ddl); + ticket->wsrep_report(wsrep_debug); + if (granted_thd->current_backup_stage != BACKUP_FINISHED && + wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP)) + { + wsrep_abort_thd(request_thd, granted_thd, 1); + } + } + else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + WSREP_DEBUG("DROP caused BF abort, conf %s for %s", + wsrep_thd_transaction_state_str(granted_thd), + wsrep_thd_query(request_thd)); + THD_STAGE_INFO(request_thd, stage_waiting_isolation); + ticket->wsrep_report(wsrep_debug); + wsrep_abort_thd(request_thd, granted_thd, 1); + } + else + { + WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", schema, schema_len, + request_thd, granted_thd); + WSREP_DEBUG("wsrep_handle_mdl_conflict -> BF abort for %s", + wsrep_thd_query(request_thd)); + THD_STAGE_INFO(request_thd, stage_waiting_isolation); + ticket->wsrep_report(wsrep_debug); + + if (granted_thd->wsrep_trx().active()) + { + wsrep_abort_thd(request_thd, granted_thd, 1); + } + else + { + /* + Granted_thd is likely executing with wsrep_on=0. If the requesting + thd is BF, BF abort and wait. + */ + if (wsrep_thd_is_BF(request_thd, FALSE)) + { + granted_thd->awake_no_mutex(KILL_QUERY_HARD); + ha_abort_transaction(request_thd, granted_thd, TRUE); + } + else + { + WSREP_MDL_LOG(INFO, "MDL unknown BF-BF conflict", + schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); + unireg_abort(1); + } + } + } + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); + } + else + { + mysql_mutex_unlock(&request_thd->LOCK_thd_data); + } +} + +/**/ +static bool abort_replicated(THD *thd) +{ + bool ret_code= false; + mysql_mutex_lock(&thd->LOCK_thd_kill); + mysql_mutex_lock(&thd->LOCK_thd_data); + if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) + { + WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); + + wsrep_abort_thd(thd, thd, TRUE); + ret_code= true; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + return ret_code; +} + +/**/ +static inline bool is_client_connection(THD *thd) +{ + return (thd->wsrep_client_thread && thd->variables.wsrep_on); +} + +static inline bool is_committing_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_thd_data); + ret= (thd->wsrep_trx().state() == wsrep::transaction::s_committing) ? true : false; + mysql_mutex_unlock(&thd->LOCK_thd_data); + + return ret; +} + +static my_bool have_client_connections(THD *thd, void*) +{ + DBUG_PRINT("quit",("Informing thread %lld that it's time to die", + (longlong) thd->thread_id)); + if (is_client_connection(thd) && + (thd->killed == KILL_CONNECTION || + thd->killed == KILL_CONNECTION_HARD)) + { + (void)abort_replicated(thd); + return 1; + } + return 0; +} + +static void wsrep_close_thread(THD *thd) +{ + thd->set_killed(KILL_CONNECTION_HARD); + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd)); + mysql_mutex_lock(&thd->LOCK_thd_kill); + thd->abort_current_cond_wait(true); + mysql_mutex_unlock(&thd->LOCK_thd_kill); +} + +static my_bool have_committing_connections(THD *thd, void *) +{ + return is_client_connection(thd) && is_committing_connection(thd) ? 1 : 0; +} + +int wsrep_wait_committing_connections_close(int wait_time) +{ + int sleep_time= 100; + + WSREP_DEBUG("wait for committing transaction to close: %d sleep: %d", wait_time, sleep_time); + while (server_threads.iterate(have_committing_connections) && wait_time > 0) + { + WSREP_DEBUG("wait for committing transaction to close: %d", wait_time); + my_sleep(sleep_time); + wait_time -= sleep_time; + } + return server_threads.iterate(have_committing_connections); +} + +static my_bool kill_all_threads(THD *thd, THD *caller_thd) +{ + DBUG_PRINT("quit", ("Informing thread %lld that it's time to die", + (longlong) thd->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (is_client_connection(thd) && thd != caller_thd) + { + if (is_replaying_connection(thd)) + thd->set_killed(KILL_CONNECTION_HARD); + else if (!abort_replicated(thd)) + { + /* replicated transactions must be skipped */ + WSREP_DEBUG("closing connection %lld", (longlong) thd->thread_id); + /* instead of wsrep_close_thread() we do now soft kill by THD::awake */ + thd->awake(KILL_CONNECTION_HARD); + } + } + return 0; +} + +static my_bool kill_remaining_threads(THD *thd, THD *caller_thd) +{ +#ifndef __bsdi__ // Bug in BSDI kernel + if (is_client_connection(thd) && + !abort_replicated(thd) && + !is_replaying_connection(thd) && + thd_is_connection_alive(thd) && + thd != caller_thd) + { + + WSREP_INFO("killing local connection: %lld", (longlong) thd->thread_id); + close_connection(thd); + } +#endif + return 0; +} + +void wsrep_close_client_connections(my_bool wait_to_end, THD* except_caller_thd) +{ + /* Clear thread cache */ + thread_cache.final_flush(); + + /* + First signal all threads that it's time to die + */ + server_threads.iterate(kill_all_threads, except_caller_thd); + + /* + Force remaining threads to die by closing the connection to the client + */ + server_threads.iterate(kill_remaining_threads, except_caller_thd); + + DBUG_PRINT("quit", ("Waiting for threads to die (count=%u)", THD_count::value())); + WSREP_DEBUG("waiting for client connections to close: %u", THD_count::value()); + + while (wait_to_end && server_threads.iterate(have_client_connections)) + { + sleep(1); + DBUG_PRINT("quit",("One thread died (count=%u)", THD_count::value())); + } + + /* All client connection threads have now been aborted */ +} + + +void wsrep_close_applier(THD *thd) +{ + WSREP_DEBUG("closing applier %lld", (longlong) thd->thread_id); + wsrep_close_thread(thd); +} + +static my_bool wsrep_close_threads_callback(THD *thd, THD *caller_thd) +{ + DBUG_PRINT("quit",("Informing thread %lld that it's time to die", + (longlong) thd->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (thd->wsrep_applier && thd != caller_thd) + { + WSREP_DEBUG("closing wsrep thread %lld", (longlong) thd->thread_id); + wsrep_close_thread(thd); + } + return 0; +} + +void wsrep_close_threads(THD *thd) +{ + server_threads.iterate(wsrep_close_threads_callback, thd); +} + +void wsrep_wait_appliers_close(THD *thd) +{ + /* Wait for wsrep appliers to gracefully exit */ + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + while (wsrep_running_threads > 2) + /* + 2 is for rollbacker thread which needs to be killed explicitly. + This gotta be fixed in a more elegant manner if we gonna have arbitrary + number of non-applier wsrep threads. + */ + { + mysql_cond_wait(&COND_wsrep_slave_threads, &LOCK_wsrep_slave_threads); + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + DBUG_PRINT("quit",("applier threads have died (count=%u)", + uint32_t(wsrep_running_threads))); + + /* Now kill remaining wsrep threads: rollbacker */ + wsrep_close_threads (thd); + /* and wait for them to die */ + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + while (wsrep_running_threads > 0) + { + mysql_cond_wait(&COND_wsrep_slave_threads, &LOCK_wsrep_slave_threads); + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + DBUG_PRINT("quit",("all wsrep system threads have died")); + + /* All wsrep applier threads have now been aborted. However, if this thread + is also applier, we are still running... + */ +} +int wsrep_must_ignore_error(THD* thd) +{ + const int error= thd->get_stmt_da()->sql_errno(); + const uint flags= sql_command_flags[thd->lex->sql_command]; + + DBUG_ASSERT(error); + DBUG_ASSERT(wsrep_thd_is_toi(thd)); + + if ((wsrep_ignore_apply_errors & WSREP_IGNORE_ERRORS_ON_DDL)) + goto ignore_error; + + if ((flags & CF_WSREP_MAY_IGNORE_ERRORS) && + (wsrep_ignore_apply_errors & WSREP_IGNORE_ERRORS_ON_RECONCILING_DDL)) + { + switch (error) + { + case ER_DB_DROP_EXISTS: + case ER_BAD_TABLE_ERROR: + case ER_CANT_DROP_FIELD_OR_KEY: + goto ignore_error; + } + } + + return 0; + +ignore_error: + WSREP_WARN("Ignoring error '%s' on query. " + "Default database: '%s'. Query: '%s', Error_code: %d", + thd->get_stmt_da()->message(), + print_slave_db_safe(thd->db.str), + thd->query(), + error); + return 1; +} + +int wsrep_ignored_error_code(Log_event* ev, int error) +{ + const THD* thd= ev->thd; + + DBUG_ASSERT(error); + /* Note that binlog events can be executed on master also with + BINLOG '....'; */ + DBUG_ASSERT(!wsrep_thd_is_local_toi(thd)); + + if ((wsrep_ignore_apply_errors & WSREP_IGNORE_ERRORS_ON_RECONCILING_DML)) + { + const int ev_type= ev->get_type_code(); + if ((ev_type == DELETE_ROWS_EVENT || ev_type == DELETE_ROWS_EVENT_V1) + && error == ER_KEY_NOT_FOUND) + goto ignore_error; + } + + return 0; + +ignore_error: + WSREP_WARN("Ignoring error '%s' on %s event. Error_code: %d", + thd->get_stmt_da()->message(), + ev->get_type_str(), + error); + return 1; +} + +bool wsrep_provider_is_SR_capable() +{ + return Wsrep_server_state::has_capability(wsrep::provider::capability::streaming); +} + +int wsrep_thd_retry_counter(const THD *thd) +{ + return thd->wsrep_retry_counter; +} + +extern bool wsrep_thd_ignore_table(THD *thd) +{ + return thd->wsrep_ignore_table; +} + +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info) +{ + if (create_info->tmp_table()) + { + /* CREATE TEMPORARY TABLE LIKE must be skipped from replication */ + WSREP_DEBUG("CREATE TEMPORARY TABLE LIKE... skipped replication\n %s", + thd->query()); + } + else if (!(thd->find_temporary_table(src_table))) + { + /* this is straight CREATE TABLE LIKE... with no tmp tables */ + WSREP_TO_ISOLATION_BEGIN_CREATE(table->db.str, table->table_name.str, table, create_info); + } + else + { + /* Non-MERGE tables ignore this call. */ + if (src_table->table->file->extra(HA_EXTRA_ADD_CHILDREN_LIST)) + return (true); + + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + + int result __attribute__((unused))= + show_create_table(thd, src_table, &query, NULL, WITH_DB_NAME); + WSREP_DEBUG("TMP TABLE: %s ret_code %d", query.ptr(), result); + + thd->wsrep_TOI_pre_query= query.ptr(); + thd->wsrep_TOI_pre_query_len= query.length(); + + WSREP_TO_ISOLATION_BEGIN_CREATE(table->db.str, table->table_name.str, table, create_info); + + thd->wsrep_TOI_pre_query= NULL; + thd->wsrep_TOI_pre_query_len= 0; + + /* Non-MERGE tables ignore this call. */ + src_table->table->file->extra(HA_EXTRA_DETACH_CHILDREN); + } + + return(false); +#ifdef WITH_WSREP +wsrep_error_label: + thd->wsrep_TOI_pre_query= NULL; + return (true); +#endif +} + +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + String stmt_query; + + LEX_CSTRING definer_user; + LEX_CSTRING definer_host; + + if (!lex->definer) + { + if (!thd->slave_thread) + { + if (!(lex->definer= create_default_definer(thd, false))) + return 1; + } + } + + if (lex->definer) + { + /* SUID trigger. */ + LEX_USER *d= get_current_user(thd, lex->definer); + + if (!d) + return 1; + + definer_user= d->user; + definer_host= d->host; + } + else + { + /* non-SUID trigger. */ + + definer_user.str= 0; + definer_user.length= 0; + + definer_host.str= 0; + definer_host.length= 0; + } + + const LEX_CSTRING command[2]= + {{ C_STRING_WITH_LEN("CREATE ") }, + { C_STRING_WITH_LEN("CREATE OR REPLACE ") }}; + + if (thd->lex->create_info.or_replace()) + stmt_query.append(command[1]); + else + stmt_query.append(command[0]); + + append_definer(thd, &stmt_query, &definer_user, &definer_host); + + LEX_CSTRING stmt_definition; + stmt_definition.str= (char*) thd->lex->stmt_definition_begin; + stmt_definition.length= thd->lex->stmt_definition_end + - thd->lex->stmt_definition_begin; + trim_whitespace(thd->charset(), &stmt_definition); + + stmt_query.append(stmt_definition.str, stmt_definition.length); + + return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(), + buf, buf_len); +} + +void* start_wsrep_THD(void *arg) +{ + THD *thd= NULL; + + Wsrep_thd_args* thd_args= (Wsrep_thd_args*) arg; + + if (my_thread_init() || (!(thd= new THD(next_thread_id(), true)))) + { + goto error; + } + + statistic_increment(thread_created, &LOCK_status); + + thd->real_id=pthread_self(); // Keep purify happy + + my_net_init(&thd->net,(st_vio*) 0, thd, MYF(0)); + + DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + + server_threads.insert(thd); + + /* from bootstrap()... */ + thd->bootstrap=1; + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ALL_KNOWN_ACL; + + /* from handle_one_connection... */ + pthread_detach_this_thread(); + + mysql_thread_set_psi_id(thd->thread_id); + thd->thr_create_utime= microsecond_interval_timer(); + + DBUG_EXECUTE_IF("wsrep_simulate_failed_connection_1", goto error; ); +// + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld", + (long long)thd->thread_id)); + /* now that we've called my_thread_init(), it is safe to call DBUG_* */ + + thd->thread_stack= (char*) &thd; + wsrep_assign_from_threadvars(thd); + wsrep_store_threadvars(thd); + + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + + /* handle_one_connection() again... */ + thd->proc_info= 0; + thd->set_command(COM_SLEEP); + thd->init_for_queries(); + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + + wsrep_running_threads++; + + switch (thd_args->thread_type()) { + case WSREP_APPLIER_THREAD: + wsrep_running_applier_threads++; + break; + case WSREP_ROLLBACKER_THREAD: + wsrep_running_rollbacker_threads++; + break; + default: + WSREP_ERROR("Incorrect wsrep thread type: %d", thd_args->thread_type()); + break; + } + + mysql_cond_broadcast(&COND_wsrep_slave_threads); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + WSREP_DEBUG("wsrep system thread %llu, %p starting", + thd->thread_id, thd); + thd_args->fun()(thd, static_cast(thd_args)); + + WSREP_DEBUG("wsrep system thread: %llu, %p closing", + thd->thread_id, thd); + + /* Wsrep may reset globals during thread context switches, store globals + before cleanup. */ + wsrep_store_threadvars(thd); + + close_connection(thd, 0); + + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + DBUG_ASSERT(wsrep_running_threads > 0); + wsrep_running_threads--; + + switch (thd_args->thread_type()) { + case WSREP_APPLIER_THREAD: + DBUG_ASSERT(wsrep_running_applier_threads > 0); + wsrep_running_applier_threads--; + break; + case WSREP_ROLLBACKER_THREAD: + DBUG_ASSERT(wsrep_running_rollbacker_threads > 0); + wsrep_running_rollbacker_threads--; + break; + default: + WSREP_ERROR("Incorrect wsrep thread type: %d", thd_args->thread_type()); + break; + } + + delete thd_args; + WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads); + mysql_cond_broadcast(&COND_wsrep_slave_threads); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + /* + Note: We can't call THD destructor without crashing + if plugins have not been initialized. However, in most of the + cases this means that pre SE initialization SST failed and + we are going to exit anyway. + */ + if (plugins_are_initialized) + { + net_end(&thd->net); + unlink_thd(thd); + } + else + { + /* + TODO: lightweight cleanup to get rid of: + 'Error in my_thread_global_end(): 2 threads didn't exit' + at server shutdown + */ + server_threads.erase(thd); + } + + delete thd; + my_thread_end(); + return(NULL); + +error: + WSREP_ERROR("Failed to create/initialize system thread"); + + if (thd) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects, &LOCK_status); + server_threads.erase(thd); + delete thd; + my_thread_end(); + } + delete thd_args; + // This will signal error to wsrep_slave_threads_update + wsrep_thread_create_failed.store(true, std::memory_order_relaxed); + + /* Abort if its the first applier/rollbacker thread. */ + if (!mysqld_server_initialized) + unireg_abort(1); + else + return NULL; +} + +enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit) +{ + switch (unit) + { + case WSREP_FRAG_BYTES: return wsrep::streaming_context::bytes; + case WSREP_FRAG_ROWS: return wsrep::streaming_context::row; + case WSREP_FRAG_STATEMENTS: return wsrep::streaming_context::statement; + default: + DBUG_ASSERT(0); + return wsrep::streaming_context::bytes; + } +} + +bool THD::wsrep_parallel_slave_wait_for_prior_commit() +{ + if (rgi_slave && rgi_slave->is_parallel_exec && wait_for_prior_commit()) + { + return 1; + } + return 0; +} + +/***** callbacks for wsrep service ************/ + +my_bool get_wsrep_recovery() +{ + return wsrep_recovery; +} + +bool wsrep_consistency_check(THD *thd) +{ + return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; +} + +// Wait until wsrep has reached ready state +void wsrep_wait_ready(THD *thd) +{ + mysql_mutex_lock(&LOCK_wsrep_ready); + while(!wsrep_ready) + { + WSREP_INFO("Waiting to reach ready state"); + mysql_cond_wait(&COND_wsrep_ready, &LOCK_wsrep_ready); + } + WSREP_INFO("ready state reached"); + mysql_mutex_unlock(&LOCK_wsrep_ready); +} + +void wsrep_ready_set(bool ready_value) +{ + WSREP_DEBUG("Setting wsrep_ready to %d", ready_value); + mysql_mutex_lock(&LOCK_wsrep_ready); + wsrep_ready= ready_value; + // Signal if we have reached ready state + if (wsrep_ready) + mysql_cond_signal(&COND_wsrep_ready); + mysql_mutex_unlock(&LOCK_wsrep_ready); +} + + +/* + Commit an empty transaction. + + If the transaction is real and the wsrep transaction is still active, + the transaction did not generate any rows or keys and is committed + as empty. Here the wsrep transaction is rolled back and after statement + step is performed to leave the wsrep transaction in the state as it + never existed. + + This should not be an inline functions as it requires a lot of stack space + because of WSREP_DBUG() usage. It's also not a function that is + frequently called. +*/ + +void wsrep_commit_empty(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_commit_empty"); + WSREP_DEBUG("wsrep_commit_empty for %llu client_state %s client_mode" + " %s trans_state %s sql %s", + thd_get_thread_id(thd), + wsrep::to_c_string(thd->wsrep_cs().state()), + wsrep::to_c_string(thd->wsrep_cs().mode()), + wsrep::to_c_string(thd->wsrep_cs().transaction().state()), + wsrep_thd_query(thd)); + + if (wsrep_is_real(thd, all) && + wsrep_thd_is_local(thd) && + thd->wsrep_trx().active() && + !thd->internal_transaction() && + thd->wsrep_trx().state() != wsrep::transaction::s_committed) + { + /* Here transaction is either empty (i.e. no changes) or + it was CREATE TABLE with no row binlog format or + we have already aborted transaction e.g. because max writeset size + has been reached. */ + DBUG_ASSERT(!wsrep_has_changes(thd) || + (thd->lex->sql_command == SQLCOM_CREATE_TABLE && + !thd->is_current_stmt_binlog_format_row()) || + thd->wsrep_cs().transaction().state() == wsrep::transaction::s_aborted); + bool have_error= wsrep_current_error(thd); + int ret= wsrep_before_rollback(thd, all) || + wsrep_after_rollback(thd, all) || + wsrep_after_statement(thd); + /* The committing transaction was empty but it held some locks and + got BF aborted. As there were no certified changes in the + data, we ignore the deadlock error and rely on error reporting + by storage engine/server. */ + if (!ret && !have_error && wsrep_current_error(thd)) + { + DBUG_ASSERT(wsrep_current_error(thd) == wsrep::e_deadlock_error); + thd->wsrep_cs().reset_error(); + } + if (ret) + { + WSREP_DEBUG("wsrep_commit_empty failed: %d", wsrep_current_error(thd)); + } + } + DBUG_VOID_RETURN; +} diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h new file mode 100644 index 00000000..3efe3829 --- /dev/null +++ b/sql/wsrep_mysqld.h @@ -0,0 +1,629 @@ +/* Copyright 2008-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef WSREP_MYSQLD_H +#define WSREP_MYSQLD_H + +#include + +#ifdef WITH_WSREP + +#include +#include "mysql/service_wsrep.h" + +#include +#include +#include "log.h" +#include "mysqld.h" + +typedef struct st_mysql_show_var SHOW_VAR; +#include +#include "mdl.h" +#include "sql_table.h" +#include "wsrep_mysqld_c.h" + +#include "wsrep/provider.hpp" +#include "wsrep/streaming_context.hpp" +#include "wsrep_api.h" +#include + +#define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX + +class THD; + +// Global wsrep parameters + +// MySQL wsrep options +extern const char* wsrep_provider; +extern const char* wsrep_provider_options; +extern const char* wsrep_cluster_name; +extern const char* wsrep_cluster_address; +extern const char* wsrep_node_name; +extern const char* wsrep_node_address; +extern const char* wsrep_node_incoming_address; +extern const char* wsrep_data_home_dir; +extern const char* wsrep_dbug_option; +extern long wsrep_slave_threads; +extern int wsrep_slave_count_change; +extern ulong wsrep_debug; +extern my_bool wsrep_convert_LOCK_to_trx; +extern ulong wsrep_retry_autocommit; +extern my_bool wsrep_auto_increment_control; +extern my_bool wsrep_drupal_282555_workaround; +extern my_bool wsrep_incremental_data_collection; +extern const char* wsrep_start_position; +extern ulong wsrep_max_ws_size; +extern ulong wsrep_max_ws_rows; +extern const char* wsrep_notify_cmd; +extern const char* wsrep_status_file; +extern const char* wsrep_allowlist; +extern my_bool wsrep_certify_nonPK; +extern long int wsrep_protocol_version; +extern my_bool wsrep_desync; +extern ulong wsrep_reject_queries; +extern my_bool wsrep_recovery; +extern my_bool wsrep_log_conflicts; +extern ulong wsrep_mysql_replication_bundle; +extern my_bool wsrep_load_data_splitting; +extern my_bool wsrep_restart_slave; +extern my_bool wsrep_restart_slave_activated; +extern my_bool wsrep_slave_FK_checks; +extern my_bool wsrep_slave_UK_checks; +extern ulong wsrep_trx_fragment_unit; +extern ulong wsrep_SR_store_type; +extern uint wsrep_ignore_apply_errors; +extern ulong wsrep_running_threads; +extern ulong wsrep_running_applier_threads; +extern ulong wsrep_running_rollbacker_threads; +extern bool wsrep_new_cluster; +extern bool wsrep_gtid_mode; +extern uint32 wsrep_gtid_domain_id; +extern std::atomic wsrep_thread_create_failed; +extern ulonglong wsrep_mode; + +enum enum_wsrep_reject_types { + WSREP_REJECT_NONE, /* nothing rejected */ + WSREP_REJECT_ALL, /* reject all queries, with UNKNOWN_COMMAND error */ + WSREP_REJECT_ALL_KILL /* kill existing connections and reject all queries*/ +}; + +enum enum_wsrep_OSU_method { + WSREP_OSU_TOI, + WSREP_OSU_RSU, + WSREP_OSU_NONE, +}; + +enum enum_wsrep_sync_wait { + WSREP_SYNC_WAIT_NONE= 0x0, + // select, begin + WSREP_SYNC_WAIT_BEFORE_READ= 0x1, + WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE= 0x2, + WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE= 0x4, + WSREP_SYNC_WAIT_BEFORE_SHOW= 0x8, + WSREP_SYNC_WAIT_MAX= 0xF +}; + +enum enum_wsrep_ignore_apply_error { + WSREP_IGNORE_ERRORS_NONE= 0x0, + WSREP_IGNORE_ERRORS_ON_RECONCILING_DDL= 0x1, + WSREP_IGNORE_ERRORS_ON_RECONCILING_DML= 0x2, + WSREP_IGNORE_ERRORS_ON_DDL= 0x4, + WSREP_IGNORE_ERRORS_MAX= 0x7 +}; + +/* wsrep_mode features */ +enum enum_wsrep_mode { + WSREP_MODE_STRICT_REPLICATION= (1ULL << 0), + WSREP_MODE_BINLOG_ROW_FORMAT_ONLY= (1ULL << 1), + WSREP_MODE_REQUIRED_PRIMARY_KEY= (1ULL << 2), + WSREP_MODE_REPLICATE_MYISAM= (1ULL << 3), + WSREP_MODE_REPLICATE_ARIA= (1ULL << 4), + WSREP_MODE_DISALLOW_LOCAL_GTID= (1ULL << 5), + WSREP_MODE_BF_MARIABACKUP= (1ULL << 6) +}; + +// Streaming Replication +#define WSREP_FRAG_BYTES 0 +#define WSREP_FRAG_ROWS 1 +#define WSREP_FRAG_STATEMENTS 2 + +#define WSREP_SR_STORE_NONE 0 +#define WSREP_SR_STORE_TABLE 1 + +extern const char *wsrep_fragment_units[]; +extern const char *wsrep_SR_store_types[]; + +// MySQL status variables +extern my_bool wsrep_connected; +extern my_bool wsrep_ready; +extern const char* wsrep_cluster_state_uuid; +extern long long wsrep_cluster_conf_id; +extern const char* wsrep_cluster_status; +extern long wsrep_cluster_size; +extern long wsrep_local_index; +extern long long wsrep_local_bf_aborts; +extern const char* wsrep_provider_name; +extern const char* wsrep_provider_version; +extern const char* wsrep_provider_vendor; +extern char* wsrep_provider_capabilities; +extern char* wsrep_cluster_capabilities; + +int wsrep_show_status(THD *thd, SHOW_VAR *var, void *buff, + system_status_var *status_var, enum_var_type scope); +int wsrep_show_ready(THD *thd, SHOW_VAR *var, char *buff); +void wsrep_free_status(THD *thd); +void wsrep_update_cluster_state_uuid(const char* str); + +/* Filters out --wsrep-new-cluster oprtion from argv[] + * should be called in the very beginning of main() */ +void wsrep_filter_new_cluster (int* argc, char* argv[]); + +int wsrep_init(); +void wsrep_deinit(bool free_options); + +/* Initialize wsrep thread LOCKs and CONDs */ +void wsrep_thr_init(); +/* Destroy wsrep thread LOCKs and CONDs */ +void wsrep_thr_deinit(); + +void wsrep_recover(); +bool wsrep_before_SE(); // initialize wsrep before storage + // engines (true) or after (false) +/* wsrep initialization sequence at startup + * @param before wsrep_before_SE() value */ +void wsrep_init_startup(bool before); + +/* Recover streaming transactions from fragment storage */ +void wsrep_recover_sr_from_storage(THD *); + +// Other wsrep global variables +extern my_bool wsrep_inited; // whether wsrep is initialized ? +extern bool wsrep_service_started; + +extern "C" void wsrep_fire_rollbacker(THD *thd); +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); +extern "C" time_t wsrep_thd_query_start(THD *thd); +extern void wsrep_close_client_connections(my_bool wait_to_end, + THD *except_caller_thd= NULL); +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); + +extern int wsrep_wait_committing_connections_close(int wait_time); +extern void wsrep_close_applier(THD *thd); +extern void wsrep_wait_appliers_close(THD *thd); +extern void wsrep_close_applier_threads(int count); + + +/* new defines */ +extern void wsrep_stop_replication(THD *thd); +extern bool wsrep_start_replication(const char *wsrep_cluster_address); +extern void wsrep_shutdown_replication(); +extern bool wsrep_check_mode (enum_wsrep_mode mask); +extern bool wsrep_check_mode_after_open_table (THD *thd, const handlerton *hton, + TABLE_LIST *tables); +extern bool wsrep_check_mode_before_cmd_execute (THD *thd); +extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); +extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); +extern bool wsrep_sync_wait (THD* thd, enum enum_sql_command command); +extern enum wsrep::provider::status +wsrep_sync_wait_upto (THD* thd, wsrep_gtid_t* upto, int timeout); +extern int wsrep_check_opts(); +extern void wsrep_prepend_PATH (const char* path); +extern bool wsrep_append_fk_parent_table(THD* thd, TABLE_LIST* table, wsrep::key_array* keys); +extern bool wsrep_reload_ssl(); +extern bool wsrep_split_allowlist(std::vector& allowlist); + +/* Other global variables */ +extern wsrep_seqno_t wsrep_locked_seqno; + +/* A wrapper function for MySQL log functions. The call will prefix + the log message with WSREP and forward the result buffer to fun. */ +void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...); + +#define WSREP_SYNC_WAIT(thd_, before_) \ + { if (WSREP_CLIENT(thd_) && \ + wsrep_sync_wait(thd_, before_)) goto wsrep_error_label; } + +#define WSREP_MYSQL_DB (char *)"mysql" + +#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) \ + if (WSREP_ON && WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) \ + goto wsrep_error_label; + +#define WSREP_TO_ISOLATION_BEGIN_CREATE(db_, table_, table_list_, create_info_) \ + if (WSREP_ON && WSREP(thd) && \ + wsrep_to_isolation_begin(thd, db_, table_, \ + table_list_, nullptr, nullptr, create_info_))\ + goto wsrep_error_label; + +#define WSREP_TO_ISOLATION_BEGIN_ALTER(db_, table_, table_list_, alter_info_, fk_tables_, create_info_) \ + if (WSREP(thd) && wsrep_thd_is_local(thd) && \ + wsrep_to_isolation_begin(thd, db_, table_, \ + table_list_, alter_info_, fk_tables_, create_info_)) + +#define WSREP_TO_ISOLATION_END \ + if ((WSREP(thd) && wsrep_thd_is_local_toi(thd)) || \ + wsrep_thd_is_in_rsu(thd)) \ + wsrep_to_isolation_end(thd); + +/* + Checks if lex->no_write_to_binlog is set for statements that use LOCAL or + NO_WRITE_TO_BINLOG. +*/ +#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_) \ + if (WSREP(thd) && !thd->lex->no_write_to_binlog \ + && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) \ + goto wsrep_error_label; + + +#define WSREP_PROVIDER_EXISTS (WSREP_PROVIDER_EXISTS_) + +static inline bool wsrep_cluster_address_exists() +{ + if (mysqld_server_started) + mysql_mutex_assert_owner(&LOCK_global_system_variables); + return wsrep_cluster_address && wsrep_cluster_address[0]; +} + +extern my_bool wsrep_ready_get(); +extern void wsrep_ready_wait(); + +extern mysql_mutex_t LOCK_wsrep_ready; +extern mysql_cond_t COND_wsrep_ready; +extern mysql_mutex_t LOCK_wsrep_sst; +extern mysql_cond_t COND_wsrep_sst; +extern mysql_mutex_t LOCK_wsrep_sst_init; +extern mysql_cond_t COND_wsrep_sst_init; +extern int wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_replaying; +extern mysql_cond_t COND_wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_slave_threads; +extern mysql_cond_t COND_wsrep_slave_threads; +extern mysql_mutex_t LOCK_wsrep_gtid_wait_upto; +extern mysql_mutex_t LOCK_wsrep_cluster_config; +extern mysql_mutex_t LOCK_wsrep_desync; +extern mysql_mutex_t LOCK_wsrep_SR_pool; +extern mysql_mutex_t LOCK_wsrep_SR_store; +extern mysql_mutex_t LOCK_wsrep_config_state; +extern mysql_mutex_t LOCK_wsrep_group_commit; +extern mysql_mutex_t LOCK_wsrep_joiner_monitor; +extern mysql_mutex_t LOCK_wsrep_donor_monitor; +extern mysql_cond_t COND_wsrep_joiner_monitor; +extern mysql_cond_t COND_wsrep_donor_monitor; + +extern int wsrep_to_isolation; +#ifdef GTID_SUPPORT +extern rpl_sidno wsrep_sidno; +#endif /* GTID_SUPPORT */ +extern my_bool wsrep_preordered_opt; + +#ifdef HAVE_PSI_INTERFACE + +extern PSI_cond_key key_COND_wsrep_thd; + +extern PSI_mutex_key key_LOCK_wsrep_ready; +extern PSI_mutex_key key_COND_wsrep_ready; +extern PSI_mutex_key key_LOCK_wsrep_sst; +extern PSI_cond_key key_COND_wsrep_sst; +extern PSI_mutex_key key_LOCK_wsrep_sst_init; +extern PSI_cond_key key_COND_wsrep_sst_init; +extern PSI_mutex_key key_LOCK_wsrep_sst_thread; +extern PSI_cond_key key_COND_wsrep_sst_thread; +extern PSI_mutex_key key_LOCK_wsrep_replaying; +extern PSI_cond_key key_COND_wsrep_replaying; +extern PSI_mutex_key key_LOCK_wsrep_slave_threads; +extern PSI_cond_key key_COND_wsrep_slave_threads; +extern PSI_mutex_key key_LOCK_wsrep_gtid_wait_upto; +extern PSI_cond_key key_COND_wsrep_gtid_wait_upto; +extern PSI_mutex_key key_LOCK_wsrep_cluster_config; +extern PSI_mutex_key key_LOCK_wsrep_desync; +extern PSI_mutex_key key_LOCK_wsrep_SR_pool; +extern PSI_mutex_key key_LOCK_wsrep_SR_store; +extern PSI_mutex_key key_LOCK_wsrep_global_seqno; +extern PSI_mutex_key key_LOCK_wsrep_thd_queue; +extern PSI_cond_key key_COND_wsrep_thd_queue; +extern PSI_mutex_key key_LOCK_wsrep_joiner_monitor; +extern PSI_mutex_key key_LOCK_wsrep_donor_monitor; + +extern PSI_file_key key_file_wsrep_gra_log; + +extern PSI_thread_key key_wsrep_sst_joiner; +extern PSI_thread_key key_wsrep_sst_donor; +extern PSI_thread_key key_wsrep_rollbacker; +extern PSI_thread_key key_wsrep_applier; +extern PSI_thread_key key_wsrep_sst_joiner_monitor; +extern PSI_thread_key key_wsrep_sst_donor_monitor; +#endif /* HAVE_PSI_INTERFACE */ + + +struct TABLE_LIST; +class Alter_info; +int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, + const TABLE_LIST* table_list, + const Alter_info* alter_info= nullptr, + const wsrep::key_array *fk_tables= nullptr, + const HA_CREATE_INFO* create_info= nullptr); + +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *db_type); +bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list); + +void wsrep_to_isolation_end(THD *thd); + +bool wsrep_append_SR_keys(THD *thd); +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len); + +void wsrep_init_sidno(const wsrep_uuid_t&); +bool wsrep_node_is_donor(); +bool wsrep_node_is_synced(); + +void wsrep_init_SR(); +void wsrep_verify_SE_checkpoint(const wsrep_uuid_t& uuid, wsrep_seqno_t seqno); +int wsrep_replay_from_SR_store(THD*, const wsrep_trx_meta_t&); + +class Log_event; +int wsrep_ignored_error_code(Log_event* ev, int error); +int wsrep_must_ignore_error(THD* thd); + +struct wsrep_server_gtid_t +{ + uint32 domain_id; + uint32 server_id; + uint64 seqno; +}; +class Wsrep_gtid_server +{ +public: + uint32 domain_id; + uint32 server_id; + Wsrep_gtid_server() + : m_force_signal(false) + , m_seqno(0) + , m_committed_seqno(0) + { } + void gtid(const wsrep_server_gtid_t& gtid) + { + domain_id= gtid.domain_id; + server_id= gtid.server_id; + m_seqno= gtid.seqno; + } + wsrep_server_gtid_t gtid() + { + wsrep_server_gtid_t gtid; + gtid.domain_id= domain_id; + gtid.server_id= server_id; + gtid.seqno= m_seqno; + return gtid; + } + void seqno(const uint64 seqno) { m_seqno= seqno; } + uint64 seqno() const { return m_seqno; } + uint64 seqno_committed() const { return m_committed_seqno; } + uint64 seqno_inc() + { + m_seqno++; + return m_seqno; + } + const wsrep_server_gtid_t& undefined() + { + return m_undefined; + } + int wait_gtid_upto(const uint64_t seqno, uint timeout) + { + int wait_result= 0; + struct timespec wait_time; + int ret= 0; + mysql_cond_t wait_cond; + mysql_cond_init(key_COND_wsrep_gtid_wait_upto, &wait_cond, NULL); + set_timespec(wait_time, timeout); + mysql_mutex_lock(&LOCK_wsrep_gtid_wait_upto); + std::multimap::iterator it; + if (seqno > m_seqno) + { + try + { + it= m_wait_map.insert(std::make_pair(seqno, &wait_cond)); + } + catch (std::bad_alloc& e) + { + ret= ENOMEM; + } + while (!ret && (m_committed_seqno < seqno) && !m_force_signal) + { + wait_result= mysql_cond_timedwait(&wait_cond, + &LOCK_wsrep_gtid_wait_upto, + &wait_time); + if (wait_result == ETIMEDOUT || wait_result == ETIME) + { + ret= wait_result; + break; + } + } + if (ret != ENOMEM) + { + m_wait_map.erase(it); + } + } + mysql_mutex_unlock(&LOCK_wsrep_gtid_wait_upto); + mysql_cond_destroy(&wait_cond); + return ret; + } + void signal_waiters(uint64 seqno, bool signal_all) + { + mysql_mutex_lock(&LOCK_wsrep_gtid_wait_upto); + if (!signal_all && (m_committed_seqno >= seqno)) + { + mysql_mutex_unlock(&LOCK_wsrep_gtid_wait_upto); + return; + } + m_force_signal= true; + std::multimap::iterator it_end; + std::multimap::iterator it_begin; + if (signal_all) + { + it_end= m_wait_map.end(); + } + else + { + it_end= m_wait_map.upper_bound(seqno); + } + if (m_committed_seqno < seqno) + { + m_committed_seqno= seqno; + } + for (it_begin = m_wait_map.begin(); it_begin != it_end; ++it_begin) + { + mysql_cond_signal(it_begin->second); + } + m_force_signal= false; + mysql_mutex_unlock(&LOCK_wsrep_gtid_wait_upto); + } +private: + const wsrep_server_gtid_t m_undefined= {0,0,0}; + std::multimap m_wait_map; + bool m_force_signal; + Atomic_counter m_seqno; + Atomic_counter m_committed_seqno; +}; +extern Wsrep_gtid_server wsrep_gtid_server; +void wsrep_init_gtid(); +bool wsrep_check_gtid_seqno(const uint32&, const uint32&, uint64&); +bool wsrep_get_binlog_gtid_seqno(wsrep_server_gtid_t&); + +typedef struct wsrep_key_arr +{ + wsrep_key_t* keys; + size_t keys_len; +} wsrep_key_arr_t; +bool wsrep_prepare_keys_for_isolation(THD* thd, + const char* db, + const char* table, + const TABLE_LIST* table_list, + wsrep_key_arr_t* ka); +void wsrep_keys_free(wsrep_key_arr_t* key_arr); + +extern void +wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, + const MDL_ticket *ticket, + const MDL_key *key); + +enum wsrep_thread_type { + WSREP_APPLIER_THREAD=1, + WSREP_ROLLBACKER_THREAD=2 +}; + +typedef void (*wsrep_thd_processor_fun)(THD*, void *); +class Wsrep_thd_args +{ + public: + Wsrep_thd_args(wsrep_thd_processor_fun fun, + wsrep_thread_type thread_type, + pthread_t thread_id) + : + fun_ (fun), + thread_type_ (thread_type), + thread_id_ (thread_id) + { } + + wsrep_thd_processor_fun fun() { return fun_; } + pthread_t* thread_id() {return &thread_id_; } + enum wsrep_thread_type thread_type() {return thread_type_;} + + private: + + Wsrep_thd_args(const Wsrep_thd_args&); + Wsrep_thd_args& operator=(const Wsrep_thd_args&); + + wsrep_thd_processor_fun fun_; + enum wsrep_thread_type thread_type_; + pthread_t thread_id_; +}; + +void* start_wsrep_THD(void*); + +void wsrep_close_threads(THD *thd); +bool wsrep_is_show_query(enum enum_sql_command command); +void wsrep_replay_transaction(THD *thd); +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info); +bool wsrep_node_is_donor(); +bool wsrep_node_is_synced(); + +/** + * Check if the wsrep provider (ie the Galera library) is capable of + * doing streaming replication. + * @return true if SR capable + */ +bool wsrep_provider_is_SR_capable(); + +/** + * Initialize WSREP server instance. + * + * @return Zero on success, non-zero on error. + */ +int wsrep_init_server(); + +/** + * Initialize WSREP globals. This should be done after server initialization + * is complete and the server has joined to the cluster. + * + */ +void wsrep_init_globals(); + +/** + * Deinit and release WSREP resources. + */ +void wsrep_deinit_server(); + +/** + * Convert streaming fragment unit (WSREP_FRAG_BYTES, WSREP_FRAG_ROWS...) + * to corresponding wsrep-lib fragment_unit + */ +enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit); + +wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, + enum wsrep::key::type type); + +void wsrep_wait_ready(THD *thd); +void wsrep_ready_set(bool ready_value); +#else /* !WITH_WSREP */ + +/* These macros are needed to compile MariaDB without WSREP support + * (e.g. embedded) */ + +#define WSREP_PROVIDER_EXISTS (0) +#define wsrep_emulate_bin_log (0) +#define wsrep_to_isolation (0) +#define wsrep_before_SE() (0) +#define wsrep_init_startup(X) +#define wsrep_check_opts() (0) +#define wsrep_thr_init() do {} while(0) +#define wsrep_thr_deinit() do {} while(0) +#define wsrep_init_globals() do {} while(0) +#define wsrep_create_appliers(X) do {} while(0) +#define wsrep_should_replicate_ddl(X,Y) (1) +#define wsrep_cluster_address_exists() (false) +#define WSREP_MYSQL_DB (0) +#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) do { } while(0) +#define WSREP_TO_ISOLATION_BEGIN_ALTER(db_, table_, table_list_, alter_info_, fk_tables_) +#define WSREP_TO_ISOLATION_END +#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_) +#define WSREP_SYNC_WAIT(thd_, before_) + +#endif /* WITH_WSREP */ + +#endif /* WSREP_MYSQLD_H */ diff --git a/sql/wsrep_mysqld_c.h b/sql/wsrep_mysqld_c.h new file mode 100644 index 00000000..603f2c29 --- /dev/null +++ b/sql/wsrep_mysqld_c.h @@ -0,0 +1,30 @@ +/* Copyright 2018-2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef WSREP_MYSQLD_C_H +#define WSREP_MYSQLD_C_H + +enum enum_wsrep_certification_rules { + WSREP_CERTIFICATION_RULES_STRICT, + WSREP_CERTIFICATION_RULES_OPTIMIZED +}; + +/* This is intentionally declared as a weak global symbol, so that +the same ha_innodb.so can be used with the embedded server +(which does not link to the definition of this variable) +and with the regular server built WITH_WSREP. */ +extern ulong wsrep_certification_rules __attribute__((weak)); + +#endif /* WSREP_MYSQLD_C_H */ diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc new file mode 100644 index 00000000..6cf4fc4c --- /dev/null +++ b/sql/wsrep_notify.cc @@ -0,0 +1,105 @@ +/* Copyright 2010 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include +#include "wsrep_priv.h" +#include "wsrep_utils.h" +#include "wsrep_status.h" + +void wsrep_notify_status(enum wsrep::server_state::state status, + const wsrep::view* view) +{ + + Wsrep_status::report_state(status); + + if (!view) + { + WSREP_DEBUG("wsrep_notify_status server not yet ready : wsrep_ready=%d status %d", + wsrep_ready, (int)status); + return; + } + + if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd)) + { + WSREP_INFO("wsrep_notify_cmd is not defined, skipping notification."); + return; + } + + const long cmd_len = (1 << 16) - 1; + char* cmd_ptr = (char*) my_malloc(PSI_NOT_INSTRUMENTED, cmd_len + 1, MYF(MY_WME)); + long cmd_off = 0; + + if (!cmd_ptr) + return; // the warning is in the log + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s", + wsrep_notify_cmd); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s", + to_c_string(status)); + + if (view != NULL) + { + std::ostringstream uuid; + uuid << view->state_id().id(); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --uuid %s", uuid.str().c_str()); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --primary %s", view->view_seqno().get() >= 0 ? "yes" : "no"); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --index %zd", view->own_index()); + + const std::vector& members(view->members()); + if (members.size()) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members"); + + for (unsigned int i= 0; i < members.size(); i++) + { + std::ostringstream id; + id << members[i].id(); + cmd_off += snprintf(cmd_ptr + cmd_off, cmd_len - cmd_off, + "%c%s/%s/%s", i > 0 ? ',' : ' ', + id.str().c_str(), + members[i].name().c_str(), + members[i].incoming().c_str()); + } + } + } + + if (cmd_off == cmd_len) + { + WSREP_ERROR("Notification buffer too short (%ld). Aborting notification.", + cmd_len); + my_free(cmd_ptr); + return; + } + + wsp::process p(cmd_ptr, "r", NULL); + + p.wait(); + int err= p.error(); + + if (err) + { + WSREP_ERROR("Notification command failed: %d (%s): \"%s\"", + err, strerror(err), cmd_ptr); + } + my_free(cmd_ptr); +} + diff --git a/sql/wsrep_on.h b/sql/wsrep_on.h new file mode 100644 index 00000000..a3ef8345 --- /dev/null +++ b/sql/wsrep_on.h @@ -0,0 +1,58 @@ +/* Copyright 2022 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#ifndef WSREP_ON_H +#define WSREP_ON_H + +#ifdef WITH_WSREP + +extern bool WSREP_ON_; +extern bool WSREP_PROVIDER_EXISTS_; +extern my_bool wsrep_emulate_bin_log; +extern ulong wsrep_forced_binlog_format; + +#define WSREP_ON unlikely(WSREP_ON_) + +/* use xxxxxx_NNULL macros when thd pointer is guaranteed to be non-null to + * avoid compiler warnings (GCC 6 and later) */ + +#define WSREP_NNULL(thd) \ + (WSREP_PROVIDER_EXISTS_ && thd->variables.wsrep_on) + +#define WSREP(thd) \ + (thd && WSREP_NNULL(thd)) + +#define WSREP_CLIENT_NNULL(thd) \ + (WSREP_NNULL(thd) && thd->wsrep_client_thread) + +#define WSREP_CLIENT(thd) \ + (WSREP(thd) && thd->wsrep_client_thread) + +#define WSREP_EMULATE_BINLOG_NNULL(thd) \ + (WSREP_NNULL(thd) && wsrep_emulate_bin_log) + +#define WSREP_EMULATE_BINLOG(thd) \ + (WSREP(thd) && wsrep_emulate_bin_log) + +#else + +#define WSREP_ON false +#define WSREP(T) (0) +#define WSREP_NNULL(T) (0) +#define WSREP_EMULATE_BINLOG(thd) (0) +#define WSREP_EMULATE_BINLOG_NNULL(thd) (0) + +#endif +#endif diff --git a/sql/wsrep_plugin.cc b/sql/wsrep_plugin.cc new file mode 100644 index 00000000..d23c51b1 --- /dev/null +++ b/sql/wsrep_plugin.cc @@ -0,0 +1,53 @@ +/* Copyright 2016 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#include "wsrep_trans_observer.h" +#include "wsrep_mysqld.h" + +#include + +static int wsrep_plugin_init(void *p) +{ + WSREP_DEBUG("wsrep_plugin_init()"); + return 0; +} + +static int wsrep_plugin_deinit(void *p) +{ + WSREP_DEBUG("wsrep_plugin_deinit()"); + return 0; +} + +struct Mysql_replication wsrep_plugin= { + MYSQL_REPLICATION_INTERFACE_VERSION +}; + +maria_declare_plugin(wsrep) +{ + MYSQL_REPLICATION_PLUGIN, + &wsrep_plugin, + "wsrep", + "Codership Oy", + "Wsrep replication plugin", + PLUGIN_LICENSE_GPL, + wsrep_plugin_init, + wsrep_plugin_deinit, + 0x0100, + NULL, /* Status variables */ + NULL, /* System variables */ + "1.0", /* Version (string) */ + MariaDB_PLUGIN_MATURITY_STABLE /* Maturity */ +} +maria_declare_plugin_end; diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h new file mode 100644 index 00000000..e707ec79 --- /dev/null +++ b/sql/wsrep_priv.h @@ -0,0 +1,43 @@ +/* Copyright 2010-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ + +//! @file declares symbols private to wsrep integration layer + +#ifndef WSREP_PRIV_H +#define WSREP_PRIV_H + +#include "wsrep_api.h" +#include "wsrep/server_state.hpp" + +ssize_t wsrep_sst_prepare (void** msg); +wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, + void* recv_ctx, + const wsrep_buf_t* msg, + const wsrep_gtid_t* state_id, + const wsrep_buf_t* state, + bool bypass); + +extern wsrep_uuid_t local_uuid; +extern wsrep_seqno_t local_seqno; + +// a helper function +bool wsrep_sst_received(THD*, const wsrep_uuid_t&, wsrep_seqno_t, + const void*, size_t); + +void wsrep_notify_status(enum wsrep::server_state::state status, + const wsrep::view* view= 0); + +#endif /* WSREP_PRIV_H */ diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc new file mode 100644 index 00000000..c6e45340 --- /dev/null +++ b/sql/wsrep_schema.cc @@ -0,0 +1,1701 @@ +/* Copyright (C) 2015-2022 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "mariadb.h" + +#include "table.h" +#include "key.h" +#include "sql_base.h" +#include "sql_parse.h" +#include "sql_update.h" +#include "transaction.h" + +#include "mysql/service_wsrep.h" +#include "wsrep_schema.h" +#include "wsrep_applier.h" +#include "wsrep_xid.h" +#include "wsrep_binlog.h" +#include "wsrep_high_priority_service.h" +#include "wsrep_storage_service.h" +#include "wsrep_thd.h" +#include "wsrep_server_state.h" + +#include +#include + +const char* wsrep_sr_table_name_full= WSREP_SCHEMA "/" WSREP_STREAMING_TABLE; + +static const std::string wsrep_schema_str= WSREP_SCHEMA; +static const std::string sr_table_str= WSREP_STREAMING_TABLE; +static const std::string cluster_table_str= WSREP_CLUSTER_TABLE; +static const std::string members_table_str= WSREP_MEMBERS_TABLE; +static const std::string allowlist_table_str= WSREP_ALLOWLIST_TABLE; + +static const std::string create_cluster_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + cluster_table_str + + "(" + "cluster_uuid CHAR(36) PRIMARY KEY," + "view_id BIGINT NOT NULL," + "view_seqno BIGINT NOT NULL," + "protocol_version INT NOT NULL," + "capabilities INT NOT NULL" + ") ENGINE=InnoDB STATS_PERSISTENT=0 CHARSET=latin1"; + +static const std::string create_members_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + members_table_str + + "(" + "node_uuid CHAR(36) PRIMARY KEY," + "cluster_uuid CHAR(36) NOT NULL," + "node_name CHAR(32) NOT NULL," + "node_incoming_address VARCHAR(256) NOT NULL" + ") ENGINE=InnoDB STATS_PERSISTENT=0 CHARSET=latin1"; + +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY +static const std::string cluster_member_history_table_str= "wsrep_cluster_member_history"; +static const std::string create_members_history_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + cluster_member_history_table_str + + "(" + "node_uuid CHAR(36) PRIMARY KEY," + "cluster_uuid CHAR(36) NOT NULL," + "last_view_id BIGINT NOT NULL," + "last_view_seqno BIGINT NOT NULL," + "node_name CHAR(32) NOT NULL," + "node_incoming_address VARCHAR(256) NOT NULL" + ") ENGINE=InnoDB STATS_PERSISTENT=0 CHARSET=latin1"; +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + +static const std::string create_frag_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + sr_table_str + + "(" + "node_uuid CHAR(36), " + "trx_id BIGINT, " + "seqno BIGINT, " + "flags INT NOT NULL, " + "frag LONGBLOB NOT NULL, " + "PRIMARY KEY (node_uuid, trx_id, seqno)" + ") ENGINE=InnoDB STATS_PERSISTENT=0 CHARSET=latin1"; + +static const std::string create_allowlist_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + allowlist_table_str + + "(" + "ip CHAR(64) NOT NULL," + "PRIMARY KEY (ip)" + ") ENGINE=InnoDB STATS_PERSISTENT=0"; + +static const std::string delete_from_cluster_table= + "DELETE FROM " + wsrep_schema_str + "." + cluster_table_str; + +static const std::string delete_from_members_table= + "DELETE FROM " + wsrep_schema_str + "." + members_table_str; + +/* For rolling upgrade we need to use ALTER. We do not want +persistent statistics to be collected from these tables. */ +static const std::string alter_cluster_table= + "ALTER TABLE " + wsrep_schema_str + "." + cluster_table_str + + " STATS_PERSISTENT=0 CHARSET=latin1"; + +static const std::string alter_members_table= + "ALTER TABLE " + wsrep_schema_str + "." + members_table_str + + " STATS_PERSISTENT=0 CHARSET=latin1"; + +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY +static const std::string alter_members_history_table= + "ALTER TABLE " + wsrep_schema_str + "." + members_history_table_str + + " STATS_PERSISTENT=0 CHARSET=latin1"; +#endif + +static const std::string alter_frag_table= + "ALTER TABLE " + wsrep_schema_str + "." + sr_table_str + + " STATS_PERSISTENT=0 CHARSET=latin1"; + +namespace Wsrep_schema_impl +{ + +class binlog_off +{ +public: + binlog_off(THD* thd) + : m_thd(thd) + , m_option_bits(thd->variables.option_bits) + , m_sql_log_bin(thd->variables.sql_log_bin) + { + thd->variables.option_bits&= ~OPTION_BIN_LOG; + thd->variables.sql_log_bin= 0; + } + ~binlog_off() + { + m_thd->variables.option_bits= m_option_bits; + m_thd->variables.sql_log_bin= m_sql_log_bin; + } +private: + THD* m_thd; + ulonglong m_option_bits; + my_bool m_sql_log_bin; +}; + +class wsrep_off +{ +public: + wsrep_off(THD* thd) + : m_thd(thd) + , m_wsrep_on(thd->variables.wsrep_on) + { + thd->variables.wsrep_on= 0; + } + ~wsrep_off() + { + m_thd->variables.wsrep_on= m_wsrep_on; + } +private: + THD* m_thd; + my_bool m_wsrep_on; +}; + +class thd_server_status +{ +public: + thd_server_status(THD* thd, uint server_status, bool condition) + : m_thd(thd) + , m_thd_server_status(thd->server_status) + { + if (condition) + thd->server_status= server_status; + } + ~thd_server_status() + { + m_thd->server_status= m_thd_server_status; + } +private: + THD* m_thd; + uint m_thd_server_status; +}; + +class thd_context_switch +{ +public: + thd_context_switch(THD *orig_thd, THD *cur_thd) + : m_orig_thd(orig_thd) + , m_cur_thd(cur_thd) + { + wsrep_reset_threadvars(m_orig_thd); + wsrep_store_threadvars(m_cur_thd); + } + ~thd_context_switch() + { + wsrep_reset_threadvars(m_cur_thd); + wsrep_store_threadvars(m_orig_thd); + } +private: + THD *m_orig_thd; + THD *m_cur_thd; +}; + +class sql_safe_updates +{ +public: + sql_safe_updates(THD* thd) + : m_thd(thd) + , m_option_bits(thd->variables.option_bits) + { + thd->variables.option_bits&= ~OPTION_SAFE_UPDATES; + } + ~sql_safe_updates() + { + m_thd->variables.option_bits= m_option_bits; + } +private: + THD* m_thd; + ulonglong m_option_bits; +}; + +static int execute_SQL(THD* thd, const char* sql, uint length) { + DBUG_ENTER("Wsrep_schema::execute_SQL()"); + int err= 0; + + PSI_statement_locker *parent_locker= thd->m_statement_psi; + Parser_state parser_state; + + WSREP_DEBUG("SQL: %d %s thd: %lld", length, sql, (long long)thd->thread_id); + + if (parser_state.init(thd, (char*)sql, length) == 0) { + thd->reset_for_next_command(); + lex_start(thd); + + thd->m_statement_psi= NULL; + + thd->set_query((char*)sql, length); + thd->set_query_id(next_query_id()); + + mysql_parse(thd, (char*)sql, length, & parser_state); + + if (thd->is_error()) { + WSREP_WARN("Wsrep_schema::execute_sql() failed, %d %s\nSQL: %s", + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message(), + sql); + err= 1; + } + thd->m_statement_psi= parent_locker; + thd->end_statement(); + thd->reset_query(); + close_thread_tables(thd); + delete_explain_query(thd->lex); + } + else { + WSREP_WARN("SR init failure"); + } + thd->cleanup_after_query(); + DBUG_RETURN(err); +} + +/* + Initialize thd for next "statement" + */ +static void init_stmt(THD* thd) { + thd->reset_for_next_command(); +} + +static void finish_stmt(THD* thd) { + trans_commit_stmt(thd); + close_thread_tables(thd); +} + +static int open_table(THD* thd, + const LEX_CSTRING *schema_name, + const LEX_CSTRING *table_name, + enum thr_lock_type const lock_type, + TABLE** table) { + assert(table); + *table= NULL; + + DBUG_ENTER("Wsrep_schema::open_table()"); + + TABLE_LIST tables; + uint flags= (MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK | + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY | + MYSQL_OPEN_IGNORE_FLUSH | + MYSQL_LOCK_IGNORE_TIMEOUT); + + tables.init_one_table(schema_name, + table_name, + NULL, lock_type); + thd->lex->query_tables_own_last= 0; + + // No need to open table if the query was bf aborted, + // thd client will get ER_LOCK_DEADLOCK in the end. + const bool interrupted= thd->killed || + (thd->is_error() && + (thd->get_stmt_da()->sql_errno() == ER_QUERY_INTERRUPTED)); + + if (interrupted || + !open_n_lock_single_table(thd, &tables, tables.lock_type, flags)) { + close_thread_tables(thd); + DBUG_RETURN(1); + } + + *table= tables.table; + (*table)->use_all_columns(); + + DBUG_RETURN(0); +} + + +static int open_for_write(THD* thd, const char* table_name, TABLE** table) { + LEX_CSTRING schema_str= { wsrep_schema_str.c_str(), wsrep_schema_str.length() }; + LEX_CSTRING table_str= { table_name, strlen(table_name) }; + if (Wsrep_schema_impl::open_table(thd, &schema_str, &table_str, TL_WRITE, + table)) { + // No need to log an error if the query was bf aborted, + // thd client will get ER_LOCK_DEADLOCK in the end. + const bool interrupted= thd->killed || + (thd->is_error() && + (thd->get_stmt_da()->sql_errno() == ER_QUERY_INTERRUPTED)); + if (!interrupted) { + WSREP_ERROR("Failed to open table %s.%s for writing", + schema_str.str, table_name); + } + return 1; + } + empty_record(*table); + (*table)->use_all_columns(); + restore_record(*table, s->default_values); + return 0; +} + +static void store(TABLE* table, uint field, const Wsrep_id& id) { + assert(field < table->s->fields); + std::ostringstream os; + os << id; + table->field[field]->store(os.str().c_str(), + os.str().size(), + &my_charset_bin); +} + + +template +static void store(TABLE* table, uint field, const INTTYPE val) { + assert(field < table->s->fields); + table->field[field]->store(val); +} + +template +static void store(TABLE* table, uint field, const CHARTYPE* str, size_t str_len) { + assert(field < table->s->fields); + table->field[field]->store((const char*)str, + str_len, + &my_charset_bin); +} + +static void store(TABLE* table, uint field, const std::string& str) +{ + store(table, field, str.c_str(), str.size()); +} + +static int update_or_insert(TABLE* table) { + DBUG_ENTER("Wsrep_schema::update_or_insert()"); + int ret= 0; + char* key; + int error; + + /* + Verify that the table has primary key defined. + */ + if (table->s->primary_key >= MAX_KEY || + !table->s->keys_in_use.is_set(table->s->primary_key)) { + WSREP_ERROR("No primary key for %s.%s", + table->s->db.str, table->s->table_name.str); + DBUG_RETURN(1); + } + + /* + Find the record and update or insert a new one if not found. + */ + if (!(key= (char*) my_safe_alloca(table->s->max_unique_length))) { + WSREP_ERROR("Error allocating %ud bytes for key", + table->s->max_unique_length); + DBUG_RETURN(1); + } + + key_copy((uchar*) key, table->record[0], + table->key_info + table->s->primary_key, 0); + + if ((error= table->file->ha_index_read_idx_map(table->record[1], + table->s->primary_key, + (uchar*) key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT))) { + /* + Row not found, insert a new one. + */ + if ((error= table->file->ha_write_row(table->record[0]))) { + WSREP_ERROR("Error writing into %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + ret= 1; + } + } + else if (!records_are_comparable(table) || compare_record(table)) { + /* + Record has changed + */ + if ((error= table->file->ha_update_row(table->record[1], + table->record[0])) && + error != HA_ERR_RECORD_IS_THE_SAME) { + WSREP_ERROR("Error updating record in %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + ret= 1; + } + } + + my_safe_afree(key, table->s->max_unique_length); + + DBUG_RETURN(ret); +} + +static int insert(TABLE* table) { + DBUG_ENTER("Wsrep_schema::insert()"); + int ret= 0; + int error; + + /* + Verify that the table has primary key defined. + */ + if (table->s->primary_key >= MAX_KEY || + !table->s->keys_in_use.is_set(table->s->primary_key)) { + WSREP_ERROR("No primary key for %s.%s", + table->s->db.str, table->s->table_name.str); + DBUG_RETURN(1); + } + + if ((error= table->file->ha_write_row(table->record[0]))) { + if (error == HA_ERR_FOUND_DUPP_KEY) { + WSREP_WARN("Duplicate key found when writing into %s.%s", + table->s->db.str, + table->s->table_name.str); + ret= HA_ERR_FOUND_DUPP_KEY; + } else { + WSREP_ERROR("Error writing into %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + ret= 1; + } + } + + DBUG_RETURN(ret); +} + +static int delete_row(TABLE* table) { + int error; + int retry= 3; + + do { + error= table->file->ha_delete_row(table->record[0]); + retry--; + } while (error && retry); + + if (error) { + WSREP_ERROR("Error deleting row from %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + return 1; + } + return 0; +} + +static int open_for_read(THD* thd, const char* table_name, TABLE** table) { + + LEX_CSTRING schema_str= { wsrep_schema_str.c_str(), wsrep_schema_str.length() }; + LEX_CSTRING table_str= { table_name, strlen(table_name) }; + if (Wsrep_schema_impl::open_table(thd, &schema_str, &table_str, TL_READ, + table)) { + WSREP_ERROR("Failed to open table %s.%s for reading", + schema_str.str, table_name); + return 1; + } + empty_record(*table); + (*table)->use_all_columns(); + restore_record(*table, s->default_values); + return 0; +} + +/* + Init table for sequential scan. + + @return 0 in case of success, 1 in case of error. + */ +static int init_for_scan(TABLE* table) { + int error; + if ((error= table->file->ha_rnd_init(TRUE))) { + WSREP_ERROR("Failed to init table for scan: %d", error); + return 1; + } + return 0; +} +/* + Scan next record. For return codes see handler::ha_rnd_next() + + @return 0 in case of success, error code in case of error + */ +static int next_record(TABLE* table) { + int error; + if ((error= table->file->ha_rnd_next(table->record[0])) && + error != HA_ERR_END_OF_FILE) { + WSREP_ERROR("Failed to read next record: %d", error); + } + return error; +} + +/* + End scan. + + @return 0 in case of success, 1 in case of error. + */ +static int end_scan(TABLE* table) { + int error; + if ((error= table->file->ha_rnd_end())) { + WSREP_ERROR("Failed to end scan: %d", error); + return 1; + } + return 0; +} + +static int scan(TABLE* table, uint field, wsrep::id& id) +{ + assert(field < table->s->fields); + String uuid_str; + (void)table->field[field]->val_str(&uuid_str); + id= wsrep::id(std::string(uuid_str.c_ptr(), uuid_str.length())); + return 0; +} + +template +static int scan(TABLE* table, uint field, INTTYPE& val) +{ + assert(field < table->s->fields); + val= table->field[field]->val_int(); + return 0; +} + +static int scan(TABLE* table, uint field, char* strbuf, uint strbuf_len) +{ + uint len; + StringBuffer str; + (void) table->field[field]->val_str(&str); + len= str.length(); + strmake(strbuf, str.ptr(), MY_MIN(len, strbuf_len-1)); + return 0; +} + +/* + Scan member + TODO: filter members by cluster UUID + */ +static int scan_member(TABLE* table, + const Wsrep_id& cluster_uuid, + std::vector& members) +{ + Wsrep_id member_id; + char member_name[128]= { 0, }; + char member_incoming[128]= { 0, }; + + if (scan(table, 0, member_id) || + scan(table, 2, member_name, sizeof(member_name)) || + scan(table, 3, member_incoming, sizeof(member_incoming))) { + return 1; + } + + if (members.empty() == false) { + assert(members.rbegin()->id() < member_id); + } + + try { + members.push_back(Wsrep_view::member(member_id, + member_name, + member_incoming)); + } + catch (...) { + WSREP_ERROR("Caught exception while scanning members table"); + return 1; + } + return 0; +} + +/* + Init table for index scan and retrieve first record + + @return 0 in case of success, error code in case of error. + */ +static int init_for_index_scan(TABLE* table, const uchar* key, + key_part_map map) { + int error; + if ((error= table->file->ha_index_init(table->s->primary_key, true))) { + WSREP_ERROR("Failed to init table for index scan: %d", error); + return error; + } + + error= table->file->ha_index_read_map(table->record[0], + key, map, HA_READ_KEY_EXACT); + switch(error) { + case 0: + case HA_ERR_END_OF_FILE: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_ABORTED_BY_USER: + break; + case -1: + WSREP_DEBUG("init_for_index_scan interrupted"); + break; + default: + WSREP_ERROR("init_for_index_scan failed to read first record, error %d", error); + } + return error; +} + +/* + End index scan. + + @return 0 in case of success, 1 in case of error. + */ +static int end_index_scan(TABLE* table) { + int error; + if (table->file->inited) { + if ((error= table->file->ha_index_end())) { + WSREP_ERROR("Failed to end scan: %d", error); + return 1; + } + } + return 0; +} + +static void make_key(TABLE* table, uchar** key, key_part_map* map, int parts) { + uint prefix_length= 0; + KEY_PART_INFO* key_part= table->key_info->key_part; + + for (int i=0; i < parts; i++) + prefix_length += key_part[i].store_length; + + *map= make_prev_keypart_map(parts); + + if (!(*key= (uchar *) my_malloc(PSI_NOT_INSTRUMENTED, prefix_length + 1, MYF(MY_WME)))) + { + WSREP_ERROR("Failed to allocate memory for key prefix_length %u", prefix_length); + assert(0); + } + + key_copy(*key, table->record[0], table->key_info, prefix_length); +} + +} /* namespace Wsrep_schema_impl */ + + +Wsrep_schema::Wsrep_schema() = default; + +Wsrep_schema::~Wsrep_schema() = default; + +static void wsrep_init_thd_for_schema(THD *thd) +{ + thd->security_ctx->skip_grants(); + thd->system_thread= SYSTEM_THREAD_GENERIC; + + thd->real_id=pthread_self(); // Keep purify happy + + thd->prior_thr_create_utime= thd->start_utime= thd->thr_create_utime; + + /* No Galera replication */ + thd->variables.wsrep_on= 0; + /* No binlogging */ + thd->variables.sql_log_bin= 0; + thd->variables.option_bits&= ~OPTION_BIN_LOG; + /* No safe updates */ + thd->variables.option_bits&= ~OPTION_SAFE_UPDATES; + /* No general log */ + thd->variables.option_bits|= OPTION_LOG_OFF; + /* Read committed isolation to avoid gap locking */ + thd->variables.tx_isolation= ISO_READ_COMMITTED; + wsrep_assign_from_threadvars(thd); + wsrep_store_threadvars(thd); +} + +static bool wsrep_schema_ready= false; + +int Wsrep_schema::init() +{ + DBUG_ENTER("Wsrep_schema::init()"); + int ret; + THD* thd= new THD(next_thread_id()); + if (!thd) { + WSREP_ERROR("Unable to get thd"); + DBUG_RETURN(1); + } + thd->thread_stack= (char*)&thd; + wsrep_init_thd_for_schema(thd); + + if (Wsrep_schema_impl::execute_SQL(thd, create_cluster_table_str.c_str(), + create_cluster_table_str.size()) || + Wsrep_schema_impl::execute_SQL(thd, create_members_table_str.c_str(), + create_members_table_str.size()) || +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY + Wsrep_schema_impl::execute_SQL(thd, + create_members_history_table_str.c_str(), + create_members_history_table_str.size()) || + Wsrep_schema_impl::execute_SQL(thd, + alter_members_history_table.c_str(), + alter_members_history_table.size()) || +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + Wsrep_schema_impl::execute_SQL(thd, + create_frag_table_str.c_str(), + create_frag_table_str.size()) || + Wsrep_schema_impl::execute_SQL(thd, + alter_cluster_table.c_str(), + alter_cluster_table.size()) || + Wsrep_schema_impl::execute_SQL(thd, + alter_members_table.c_str(), + alter_members_table.size()) || + Wsrep_schema_impl::execute_SQL(thd, + alter_frag_table.c_str(), + alter_frag_table.size()) || + Wsrep_schema_impl::execute_SQL(thd, + create_allowlist_table_str.c_str(), + create_allowlist_table_str.size())) + { + ret= 1; + } + else + { + wsrep_schema_ready= true; + ret= 0; + } + + delete thd; + DBUG_RETURN(ret); +} + +int Wsrep_schema::store_view(THD* thd, const Wsrep_view& view) +{ + DBUG_ENTER("Wsrep_schema::store_view()"); + assert(view.status() == Wsrep_view::primary); + int ret= 1; + int error; + TABLE* cluster_table= 0; + TABLE* members_table= 0; +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY + TABLE* members_history_table= 0; +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + + Wsrep_schema_impl::wsrep_off wsrep_off(thd); + Wsrep_schema_impl::binlog_off binlog_off(thd); + Wsrep_schema_impl::sql_safe_updates sql_safe_updates(thd); + + /* + Clean up cluster table and members table. + */ + if (Wsrep_schema_impl::execute_SQL(thd, + delete_from_cluster_table.c_str(), + delete_from_cluster_table.size()) || + Wsrep_schema_impl::execute_SQL(thd, + delete_from_members_table.c_str(), + delete_from_members_table.size())) { + goto out; + } + + /* + Store cluster view info + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, cluster_table_str.c_str(), &cluster_table)) + { + goto out; + } + + Wsrep_schema_impl::store(cluster_table, 0, view.state_id().id()); + Wsrep_schema_impl::store(cluster_table, 1, view.view_seqno().get()); + Wsrep_schema_impl::store(cluster_table, 2, view.state_id().seqno().get()); + Wsrep_schema_impl::store(cluster_table, 3, view.protocol_version()); + Wsrep_schema_impl::store(cluster_table, 4, view.capabilities()); + + if ((error= Wsrep_schema_impl::update_or_insert(cluster_table))) + { + WSREP_ERROR("failed to write to cluster table: %d", error); + goto out; + } + + Wsrep_schema_impl::finish_stmt(thd); + + /* + Store info about current members + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, members_table_str.c_str(), + &members_table)) + { + WSREP_ERROR("failed to open wsrep.members table"); + goto out; + } + + for (size_t i= 0; i < view.members().size(); ++i) + { + Wsrep_schema_impl::store(members_table, 0, view.members()[i].id()); + Wsrep_schema_impl::store(members_table, 1, view.state_id().id()); + Wsrep_schema_impl::store(members_table, 2, view.members()[i].name()); + Wsrep_schema_impl::store(members_table, 3, view.members()[i].incoming()); + if ((error= Wsrep_schema_impl::update_or_insert(members_table))) + { + WSREP_ERROR("failed to write wsrep.members table: %d", error); + goto out; + } + } + Wsrep_schema_impl::finish_stmt(thd); + +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY + /* + Store members history + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, cluster_member_history.c_str(), + &members_history_table)) { + WSREP_ERROR("failed to open wsrep.members table"); + goto out; + } + + for (size_t i= 0; i < view.members().size(); ++i) { + Wsrep_schema_impl::store(members_history_table, 0, view.members()[i].id()); + Wsrep_schema_impl::store(members_history_table, 1, view.state_id().id()); + Wsrep_schema_impl::store(members_history_table, 2, view.view_seqno()); + Wsrep_schema_impl::store(members_history_table, 3, view.state_id().seqno()); + Wsrep_schema_impl::store(members_history_table, 4, + view.members()[i].name()); + Wsrep_schema_impl::store(members_history_table, 5, + view.members()[i].incoming()); + if ((error= Wsrep_schema_impl::update_or_insert(members_history_table))) { + WSREP_ERROR("failed to write wsrep_cluster_member_history table: %d", error); + goto out; + } + } + Wsrep_schema_impl::finish_stmt(thd); +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + ret= 0; + out: + + DBUG_RETURN(ret); +} + +Wsrep_view Wsrep_schema::restore_view(THD* thd, const Wsrep_id& own_id) const { + DBUG_ENTER("Wsrep_schema::restore_view()"); + + int ret= 1; + int error; + + TABLE* cluster_table= 0; + bool end_cluster_scan= false; + TABLE* members_table= 0; + bool end_members_scan= false; + + /* variables below need to be initialized in case cluster table is empty */ + Wsrep_id cluster_uuid; + wsrep_seqno_t view_id= -1; + wsrep_seqno_t view_seqno= -1; + int my_idx= -1; + int proto_ver= 0; + wsrep_cap_t capabilities= 0; + std::vector members; + + // we don't want causal waits for reading non-replicated private data + int const wsrep_sync_wait_saved= thd->variables.wsrep_sync_wait; + thd->variables.wsrep_sync_wait= 0; + + if (trans_begin(thd, MYSQL_START_TRANS_OPT_READ_ONLY)) { + WSREP_ERROR("wsrep_schema::restore_view(): Failed to start transaction"); + goto out; + } + + /* + Read cluster info from cluster table + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_read(thd, cluster_table_str.c_str(), &cluster_table) || + Wsrep_schema_impl::init_for_scan(cluster_table)) { + goto out; + } + + if (((error= Wsrep_schema_impl::next_record(cluster_table)) != 0 || + Wsrep_schema_impl::scan(cluster_table, 0, cluster_uuid) || + Wsrep_schema_impl::scan(cluster_table, 1, view_id) || + Wsrep_schema_impl::scan(cluster_table, 2, view_seqno) || + Wsrep_schema_impl::scan(cluster_table, 3, proto_ver) || + Wsrep_schema_impl::scan(cluster_table, 4, capabilities)) && + error != HA_ERR_END_OF_FILE) { + end_cluster_scan= true; + goto out; + } + + if (Wsrep_schema_impl::end_scan(cluster_table)) { + goto out; + } + Wsrep_schema_impl::finish_stmt(thd); + + /* + Read members from members table + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_read(thd, members_table_str.c_str(), &members_table) || + Wsrep_schema_impl::init_for_scan(members_table)) { + goto out; + } + end_members_scan= true; + + while (true) { + if ((error= Wsrep_schema_impl::next_record(members_table)) == 0) { + if (Wsrep_schema_impl::scan_member(members_table, + cluster_uuid, + members)) { + goto out; + } + } + else if (error == HA_ERR_END_OF_FILE) { + break; + } + else { + goto out; + } + } + + end_members_scan= false; + if (Wsrep_schema_impl::end_scan(members_table)) { + goto out; + } + Wsrep_schema_impl::finish_stmt(thd); + + if (own_id.is_undefined() == false) { + for (uint i= 0; i < members.size(); ++i) { + if (members[i].id() == own_id) { + my_idx= i; + break; + } + } + } + + (void)trans_commit(thd); + ret= 0; /* Success*/ + out: + + if (end_cluster_scan) Wsrep_schema_impl::end_scan(cluster_table); + if (end_members_scan) Wsrep_schema_impl::end_scan(members_table); + + if (0 != ret) { + trans_rollback_stmt(thd); + if (!trans_rollback(thd)) { + close_thread_tables(thd); + } + } + thd->release_transactional_locks(); + + thd->variables.wsrep_sync_wait= wsrep_sync_wait_saved; + + if (0 == ret) { + Wsrep_view ret_view( + wsrep::gtid(cluster_uuid, Wsrep_seqno(view_seqno)), + Wsrep_seqno(view_id), + wsrep::view::primary, + capabilities, + my_idx, + proto_ver, + members + ); + + if (wsrep_debug) { + std::ostringstream os; + os << "Restored cluster view:\n" << ret_view; + WSREP_INFO("%s", os.str().c_str()); + } + DBUG_RETURN(ret_view); + } + else + { + WSREP_ERROR("wsrep_schema::restore_view() failed."); + Wsrep_view ret_view; + DBUG_RETURN(ret_view); + } +} + +int Wsrep_schema::append_fragment(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + wsrep::seqno seqno, + int flags, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_schema::append_fragment"); + std::ostringstream os; + os << server_id; + WSREP_DEBUG("Append fragment(%llu) %s, %llu", + thd->thread_id, + os.str().c_str(), + transaction_id.get()); + /* use private query table list for the duration of fragment storing, + populated query table list from "parent DML" may cause problems .e.g + for virtual column handling + */ + Query_tables_list query_tables_list_backup; + thd->lex->reset_n_backup_query_tables_list(&query_tables_list_backup); + + Wsrep_schema_impl::binlog_off binlog_off(thd); + Wsrep_schema_impl::sql_safe_updates sql_safe_updates(thd); + Wsrep_schema_impl::init_stmt(thd); + + TABLE* frag_table= 0; + if (Wsrep_schema_impl::open_for_write(thd, sr_table_str.c_str(), &frag_table)) + { + trans_rollback_stmt(thd); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(1); + } + + Wsrep_schema_impl::store(frag_table, 0, server_id); + Wsrep_schema_impl::store(frag_table, 1, transaction_id.get()); + Wsrep_schema_impl::store(frag_table, 2, seqno.get()); + Wsrep_schema_impl::store(frag_table, 3, flags); + Wsrep_schema_impl::store(frag_table, 4, data.data(), data.size()); + + if (Wsrep_schema_impl::insert(frag_table)) { + trans_rollback_stmt(thd); + close_thread_tables(thd); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(1); + } + Wsrep_schema_impl::finish_stmt(thd); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(0); +} + +int Wsrep_schema::update_fragment_meta(THD* thd, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_schema::update_fragment_meta"); + std::ostringstream os; + os << ws_meta.server_id(); + WSREP_DEBUG("update_frag_seqno(%llu) %s, %llu, seqno %lld", + thd->thread_id, + os.str().c_str(), + ws_meta.transaction_id().get(), + ws_meta.seqno().get()); + DBUG_ASSERT(ws_meta.seqno().is_undefined() == false); + + /* use private query table list for the duration of fragment storing, + populated query table list from "parent DML" may cause problems .e.g + for virtual column handling + */ + Query_tables_list query_tables_list_backup; + thd->lex->reset_n_backup_query_tables_list(&query_tables_list_backup); + + Wsrep_schema_impl::binlog_off binlog_off(thd); + Wsrep_schema_impl::sql_safe_updates sql_safe_updates(thd); + int error; + uchar *key=NULL; + key_part_map key_map= 0; + TABLE* frag_table= 0; + + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, sr_table_str.c_str(), &frag_table)) + { + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(1); + } + + /* Find record with the given uuid, trx id, and seqno -1 */ + Wsrep_schema_impl::store(frag_table, 0, ws_meta.server_id()); + Wsrep_schema_impl::store(frag_table, 1, ws_meta.transaction_id().get()); + Wsrep_schema_impl::store(frag_table, 2, -1); + Wsrep_schema_impl::make_key(frag_table, &key, &key_map, 3); + + if ((error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, key_map))) + { + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + { + WSREP_WARN("Record not found in %s.%s: %d", + frag_table->s->db.str, + frag_table->s->table_name.str, + error); + } + Wsrep_schema_impl::finish_stmt(thd); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + my_free(key); + DBUG_RETURN(1); + } + + my_free(key); + /* Copy the original record to frag_table->record[1] */ + store_record(frag_table, record[1]); + + /* Store seqno in frag_table->record[0] and update the row */ + Wsrep_schema_impl::store(frag_table, 2, ws_meta.seqno().get()); + if ((error= frag_table->file->ha_update_row(frag_table->record[1], + frag_table->record[0]))) { + WSREP_ERROR("Error updating record in %s.%s: %d", + frag_table->s->db.str, + frag_table->s->table_name.str, + error); + Wsrep_schema_impl::finish_stmt(thd); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(1); + } + + int ret= Wsrep_schema_impl::end_index_scan(frag_table); + Wsrep_schema_impl::finish_stmt(thd); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + DBUG_RETURN(ret); +} + +static int remove_fragment(THD* thd, + TABLE* frag_table, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + wsrep::seqno seqno) +{ + WSREP_DEBUG("remove_fragment(%llu) trx %llu, seqno %lld", + thd->thread_id, + transaction_id.get(), + seqno.get()); + int ret= 0; + int error; + uchar *key= NULL; + key_part_map key_map= 0; + + DBUG_ASSERT(server_id.is_undefined() == false); + DBUG_ASSERT(transaction_id.is_undefined() == false); + DBUG_ASSERT(seqno.is_undefined() == false); + + /* + Remove record with the given uuid, trx id, and seqno. + Using a complete key here avoids gap locks. + */ + Wsrep_schema_impl::store(frag_table, 0, server_id); + Wsrep_schema_impl::store(frag_table, 1, transaction_id.get()); + Wsrep_schema_impl::store(frag_table, 2, seqno.get()); + Wsrep_schema_impl::make_key(frag_table, &key, &key_map, 3); + + if ((error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, + key_map))) + { + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + { + WSREP_DEBUG("Record not found in %s.%s:trx %llu, seqno %lld, error %d", + frag_table->s->db.str, + frag_table->s->table_name.str, + transaction_id.get(), + seqno.get(), + error); + } + ret= error; + } + else if (Wsrep_schema_impl::delete_row(frag_table)) + { + ret= 1; + } + + if (key) + my_free(key); + Wsrep_schema_impl::end_index_scan(frag_table); + return ret; +} + +int Wsrep_schema::remove_fragments(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + const std::vector& fragments) +{ + DBUG_ENTER("Wsrep_schema::remove_fragments"); + int ret= 0; + + WSREP_DEBUG("Removing %zu fragments", fragments.size()); + Wsrep_schema_impl::wsrep_off wsrep_off(thd); + Wsrep_schema_impl::binlog_off binlog_off(thd); + Wsrep_schema_impl::sql_safe_updates sql_safe_updates(thd); + + Query_tables_list query_tables_list_backup; + Open_tables_backup open_tables_backup; + thd->lex->reset_n_backup_query_tables_list(&query_tables_list_backup); + thd->reset_n_backup_open_tables_state(&open_tables_backup); + + TABLE* frag_table= 0; + if (Wsrep_schema_impl::open_for_write(thd, sr_table_str.c_str(), &frag_table)) + { + ret= 1; + } + else + { + for (std::vector::const_iterator i= fragments.begin(); + i != fragments.end(); ++i) + { + if (remove_fragment(thd, + frag_table, + server_id, + transaction_id, *i)) + { + ret= 1; + break; + } + } + } + close_thread_tables(thd); + thd->restore_backup_open_tables_state(&open_tables_backup); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + + if (thd->wsrep_cs().mode() == wsrep::client_state::m_local && + !thd->in_multi_stmt_transaction_mode()) + { + /* + The ugly part: Locally executing autocommit statement is + committing and it has removed a fragment from stable storage. + Now calling finish_stmt() will call trans_commit_stmt(), which will + actually commit the transaction, what we really don't want + to do at this point. + + Doing nothing at this point seems to work ok, this block is + intentionally no-op and for documentation purposes only. + */ + } + else + { + Wsrep_schema_impl::thd_server_status + thd_server_status(thd, thd->server_status | SERVER_STATUS_IN_TRANS, + thd->in_multi_stmt_transaction_mode()); + Wsrep_schema_impl::finish_stmt(thd); + } + + DBUG_RETURN(ret); +} + +int Wsrep_schema::replay_transaction(THD* orig_thd, + Relay_log_info* rli, + const wsrep::ws_meta& ws_meta, + const std::vector& fragments) +{ + DBUG_ENTER("Wsrep_schema::replay_transaction"); + DBUG_ASSERT(!fragments.empty()); + + THD thd(next_thread_id(), true); + thd.thread_stack= (orig_thd ? orig_thd->thread_stack : + (char*) &thd); + wsrep_assign_from_threadvars(&thd); + + Wsrep_schema_impl::wsrep_off wsrep_off(&thd); + Wsrep_schema_impl::binlog_off binlog_off(&thd); + Wsrep_schema_impl::sql_safe_updates sql_safe_updates(&thd); + Wsrep_schema_impl::thd_context_switch thd_context_switch(orig_thd, &thd); + + int ret= 1; + int error; + TABLE* frag_table= 0; + uchar *key=NULL; + key_part_map key_map= 0; + + for (std::vector::const_iterator i= fragments.begin(); + i != fragments.end(); ++i) + { + Wsrep_schema_impl::init_stmt(&thd); + if ((error= Wsrep_schema_impl::open_for_read(&thd, sr_table_str.c_str(), &frag_table))) + { + WSREP_WARN("Could not open SR table for read: %d", error); + Wsrep_schema_impl::finish_stmt(&thd); + DBUG_RETURN(1); + } + + Wsrep_schema_impl::store(frag_table, 0, ws_meta.server_id()); + Wsrep_schema_impl::store(frag_table, 1, ws_meta.transaction_id().get()); + Wsrep_schema_impl::store(frag_table, 2, i->get()); + Wsrep_schema_impl::make_key(frag_table, &key, &key_map, 3); + + int error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, + key_map); + if (error) + { + WSREP_WARN("Failed to init streaming log table for index scan: %d", + error); + Wsrep_schema_impl::end_index_scan(frag_table); + ret= 1; + break; + } + + int flags; + Wsrep_schema_impl::scan(frag_table, 3, flags); + WSREP_DEBUG("replay_fragment(%llu): seqno: %lld flags: %x", + ws_meta.transaction_id().get(), + i->get(), + flags); + String buf; + frag_table->field[4]->val_str(&buf); + + { + Wsrep_schema_impl::thd_context_switch thd_context_switch(&thd, orig_thd); + + ret= wsrep_apply_events(orig_thd, rli, buf.ptr(), buf.length()); + if (ret) + { + WSREP_WARN("Wsrep_schema::replay_transaction: failed to apply fragments"); + break; + } + } + + Wsrep_schema_impl::end_index_scan(frag_table); + Wsrep_schema_impl::finish_stmt(&thd); + + Wsrep_schema_impl::init_stmt(&thd); + + if ((error= Wsrep_schema_impl::open_for_write(&thd, + sr_table_str.c_str(), + &frag_table))) + { + WSREP_WARN("Could not open SR table for write: %d", error); + Wsrep_schema_impl::finish_stmt(&thd); + DBUG_RETURN(1); + } + + error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, + key_map); + if (error) + { + WSREP_WARN("Failed to init streaming log table for index scan: %d", + error); + Wsrep_schema_impl::end_index_scan(frag_table); + ret= 1; + break; + } + + error= Wsrep_schema_impl::delete_row(frag_table); + + if (error) + { + WSREP_WARN("Could not delete row from streaming log table: %d", error); + Wsrep_schema_impl::end_index_scan(frag_table); + ret= 1; + break; + } + Wsrep_schema_impl::end_index_scan(frag_table); + Wsrep_schema_impl::finish_stmt(&thd); + my_free(key); + key= NULL; + } + + if (key) + my_free(key); + DBUG_RETURN(ret); +} + +int Wsrep_schema::recover_sr_transactions(THD *orig_thd) +{ + DBUG_ENTER("Wsrep_schema::recover_sr_transactions"); + THD storage_thd(next_thread_id(), true); + storage_thd.thread_stack= (orig_thd ? orig_thd->thread_stack : + (char*) &storage_thd); + wsrep_assign_from_threadvars(&storage_thd); + TABLE* frag_table= 0; + TABLE* cluster_table= 0; + Wsrep_storage_service storage_service(&storage_thd); + Wsrep_schema_impl::binlog_off binlog_off(&storage_thd); + Wsrep_schema_impl::wsrep_off wsrep_off(&storage_thd); + Wsrep_schema_impl::sql_safe_updates sql_safe_updates(&storage_thd); + Wsrep_schema_impl::thd_context_switch thd_context_switch(orig_thd, + &storage_thd); + Wsrep_server_state& server_state(Wsrep_server_state::instance()); + + int ret= 1; + int error; + wsrep::id cluster_id; + + Wsrep_schema_impl::init_stmt(&storage_thd); + storage_thd.wsrep_skip_locking= FALSE; + if (Wsrep_schema_impl::open_for_read(&storage_thd, + cluster_table_str.c_str(), + &cluster_table) || + Wsrep_schema_impl::init_for_scan(cluster_table)) + { + Wsrep_schema_impl::finish_stmt(&storage_thd); + DBUG_RETURN(1); + } + + if ((error= Wsrep_schema_impl::next_record(cluster_table))) + { + Wsrep_schema_impl::end_scan(cluster_table); + Wsrep_schema_impl::finish_stmt(&storage_thd); + trans_commit(&storage_thd); + if (error == HA_ERR_END_OF_FILE) + { + WSREP_INFO("Cluster table is empty, not recovering transactions"); + DBUG_RETURN(0); + } + else + { + WSREP_ERROR("Failed to read cluster table: %d", error); + DBUG_RETURN(1); + } + } + + Wsrep_schema_impl::scan(cluster_table, 0, cluster_id); + Wsrep_schema_impl::end_scan(cluster_table); + Wsrep_schema_impl::finish_stmt(&storage_thd); + + std::ostringstream os; + os << cluster_id; + WSREP_INFO("Recovered cluster id %s", os.str().c_str()); + + storage_thd.wsrep_skip_locking= TRUE; + Wsrep_schema_impl::init_stmt(&storage_thd); + + /* + Open the table for reading and writing so that fragments without + valid seqno can be deleted. + */ + if (Wsrep_schema_impl::open_for_write(&storage_thd, sr_table_str.c_str(), &frag_table) || + Wsrep_schema_impl::init_for_scan(frag_table)) + { + WSREP_ERROR("Failed to open SR table for write"); + goto out; + } + + while (0 == error) + { + if ((error= Wsrep_schema_impl::next_record(frag_table)) == 0) + { + wsrep::id server_id; + Wsrep_schema_impl::scan(frag_table, 0, server_id); + wsrep::client_id client_id; + unsigned long long transaction_id_ull; + Wsrep_schema_impl::scan(frag_table, 1, transaction_id_ull); + wsrep::transaction_id transaction_id(transaction_id_ull); + long long seqno_ll; + Wsrep_schema_impl::scan(frag_table, 2, seqno_ll); + wsrep::seqno seqno(seqno_ll); + + /* This is possible if the server crashes between inserting the + fragment into table and updating the fragment seqno after + certification. */ + if (seqno.is_undefined()) + { + Wsrep_schema_impl::delete_row(frag_table); + continue; + } + + wsrep::gtid gtid(cluster_id, seqno); + int flags; + Wsrep_schema_impl::scan(frag_table, 3, flags); + String data_str; + + (void)frag_table->field[4]->val_str(&data_str); + wsrep::const_buffer data(data_str.ptr(), data_str.length()); + wsrep::ws_meta ws_meta(gtid, + wsrep::stid(server_id, + transaction_id, + client_id), + wsrep::seqno::undefined(), + flags); + + wsrep::high_priority_service* applier; + if (!(applier= server_state.find_streaming_applier(server_id, + transaction_id))) + { + DBUG_ASSERT(wsrep::starts_transaction(flags)); + applier = wsrep_create_streaming_applier(&storage_thd, "recovery"); + server_state.start_streaming_applier(server_id, transaction_id, + applier); + applier->start_transaction(wsrep::ws_handle(transaction_id, 0), + ws_meta); + } + applier->store_globals(); + wsrep::mutable_buffer unused; + if ((ret= applier->apply_write_set(ws_meta, data, unused)) != 0) + { + WSREP_ERROR("SR trx recovery applying returned %d", ret); + } + else + { + applier->after_apply(); + } + storage_service.store_globals(); + } + else if (error == HA_ERR_END_OF_FILE) + { + ret= 0; + } + else + { + WSREP_ERROR("SR table scan returned error %d", error); + } + } + Wsrep_schema_impl::end_scan(frag_table); + Wsrep_schema_impl::finish_stmt(&storage_thd); + trans_commit(&storage_thd); + storage_thd.set_mysys_var(0); +out: + DBUG_RETURN(ret); +} + +void Wsrep_schema::clear_allowlist() +{ + THD* thd= new THD(next_thread_id()); + if (!thd) + { + WSREP_ERROR("Unable to get thd"); + return; + } + + thd->thread_stack= (char*)&thd; + wsrep_init_thd_for_schema(thd); + TABLE* allowlist_table= 0; + int error= 0; + + Wsrep_schema_impl::init_stmt(thd); + + if (Wsrep_schema_impl::open_for_write(thd, allowlist_table_str.c_str(), + &allowlist_table) || + Wsrep_schema_impl::init_for_scan(allowlist_table)) + { + WSREP_ERROR("Failed to open mysql.wsrep_allowlist table"); + goto out; + } + + while (0 == error) + { + if ((error= Wsrep_schema_impl::next_record(allowlist_table)) == 0) + { + Wsrep_schema_impl::delete_row(allowlist_table); + } + else if (error == HA_ERR_END_OF_FILE) + { + continue; + } + else + { + WSREP_ERROR("Allowlist table scan returned error %d", error); + } + } + + Wsrep_schema_impl::end_scan(allowlist_table); + Wsrep_schema_impl::finish_stmt(thd); +out: + delete thd; +} + +void Wsrep_schema::store_allowlist(std::vector& ip_allowlist) +{ + THD* thd= new THD(next_thread_id()); + if (!thd) + { + WSREP_ERROR("Unable to get thd"); + return; + } + + thd->thread_stack= (char*)&thd; + wsrep_init_thd_for_schema(thd); + TABLE* allowlist_table= 0; + int error; + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, allowlist_table_str.c_str(), + &allowlist_table)) + { + WSREP_ERROR("Failed to open mysql.wsrep_allowlist table"); + goto out; + } + for (size_t i= 0; i < ip_allowlist.size(); ++i) + { + Wsrep_schema_impl::store(allowlist_table, 0, ip_allowlist[i]); + if ((error= Wsrep_schema_impl::insert(allowlist_table))) + { + if (error == HA_ERR_FOUND_DUPP_KEY) + { + WSREP_WARN("Duplicate entry (%s) found in `wsrep_allowlist` list", ip_allowlist[i].c_str()); + } + else + { + WSREP_ERROR("Failed to write mysql.wsrep_allowlist table: %d", error); + goto out; + } + } + } + Wsrep_schema_impl::finish_stmt(thd); +out: + delete thd; +} + +typedef struct Allowlist_check_arg +{ + Allowlist_check_arg(const std::string& value) + : value(value) + , response(false) + { + } + std::string value; + bool response; +} Allowlist_check_arg; + +static void *allowlist_check_thread(void *param) +{ + Allowlist_check_arg *arg= (Allowlist_check_arg *) param; + + my_thread_init(); + THD thd(0); + thd.thread_stack= (char *) &thd; + wsrep_init_thd_for_schema(&thd); + + int error; + TABLE *allowlist_table= 0; + bool match_found_or_empty= false; + bool table_have_rows= false; + char row[64]= { + 0, + }; + + /* + * Read allowlist table + */ + Wsrep_schema_impl::init_stmt(&thd); + if (Wsrep_schema_impl::open_for_read(&thd, allowlist_table_str.c_str(), + &allowlist_table) || + Wsrep_schema_impl::init_for_scan(allowlist_table)) + { + goto out; + } + while (true) + { + if ((error= Wsrep_schema_impl::next_record(allowlist_table)) == 0) + { + if (Wsrep_schema_impl::scan(allowlist_table, 0, row, sizeof(row))) + { + goto out; + } + table_have_rows= true; + if (!arg->value.compare(row)) + { + match_found_or_empty= true; + break; + } + } + else if (error == HA_ERR_END_OF_FILE) + { + if (!table_have_rows) + { + WSREP_DEBUG("allowlist table empty, allowing all connections."); + // If table is empty we are allowing all connections + match_found_or_empty= true; + } + break; + } + else + { + goto out; + } + } + if (Wsrep_schema_impl::end_scan(allowlist_table)) + { + goto out; + } + Wsrep_schema_impl::finish_stmt(&thd); + (void) trans_commit(&thd); +out: + my_thread_end(); + arg->response = match_found_or_empty; + return 0; +} + +bool Wsrep_schema::allowlist_check(Wsrep_allowlist_key key, + const std::string &value) +{ + // We don't have wsrep schema initialized at this point + if (wsrep_schema_ready == false) + { + return true; + } + pthread_t allowlist_check_thd; + int ret; + Allowlist_check_arg arg(value); + ret= mysql_thread_create(0, /* Not instrumented */ + &allowlist_check_thd, NULL, + allowlist_check_thread, &arg); + if (ret) + { + WSREP_ERROR("allowlist_check(): mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + return false; + } + pthread_join(allowlist_check_thd, NULL); + return arg.response; +} diff --git a/sql/wsrep_schema.h b/sql/wsrep_schema.h new file mode 100644 index 00000000..81816bbc --- /dev/null +++ b/sql/wsrep_schema.h @@ -0,0 +1,172 @@ +/* Copyright (C) 2015-2019 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + + +#ifndef WSREP_SCHEMA_H +#define WSREP_SCHEMA_H + +/* wsrep-lib */ +#include "wsrep_types.h" + +#include "mysqld.h" +#include "wsrep_mysqld.h" + +/* + Forward decls +*/ +class THD; +class Relay_log_info; +struct TABLE; +struct TABLE_LIST; +struct st_mysql_lex_string; +typedef struct st_mysql_lex_string LEX_STRING; + +#define WSREP_SCHEMA "mysql" +#define WSREP_STREAMING_TABLE "wsrep_streaming_log" +#define WSREP_CLUSTER_TABLE "wsrep_cluster" +#define WSREP_MEMBERS_TABLE "wsrep_cluster_members" +#define WSREP_ALLOWLIST_TABLE "wsrep_allowlist" + +/** Name of the table in `wsrep_schema_str` used for storing streaming +replication data. In an InnoDB full format, e.g. "database/tablename". */ +extern const char* wsrep_sr_table_name_full; + +class Wsrep_schema +{ + public: + + Wsrep_schema(); + ~Wsrep_schema(); + + /* + Initialize wsrep schema. Storage engines must be running before + calling this function. + */ + int init(); + + /* + Store wsrep view info into wsrep schema. + */ + int store_view(THD*, const Wsrep_view& view); + + /* + Restore view info from stable storage. + */ + Wsrep_view restore_view(THD* thd, const Wsrep_id& own_id) const; + + /** + Append transaction fragment to fragment storage. + Transaction must have been started for THD before this call. + In order to make changes durable, transaction must be committed + separately after this call. + + @param thd THD object + @param server_id Wsrep server identifier + @param transaction_id Transaction identifier + @param flags Flags for the fragment + @param data Fragment data buffer + + @return Zero in case of success, non-zero on failure. + */ + int append_fragment(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + wsrep::seqno seqno, + int flags, + const wsrep::const_buffer& data); + /** + Update existing fragment meta data. The fragment must have been + inserted before using append_fragment(). + + @param thd THD object + @param ws_meta Wsrep meta data + + @return Zero in case of success, non-zero on failure. + */ + int update_fragment_meta(THD* thd, + const wsrep::ws_meta& ws_meta); + + /** + Remove fragments from storage. This method must be called + inside active transaction. Fragment removal will be committed + once the transaction commits. + + @param thd Pointer to THD object + @param server_id Identifier of the running server + @param transaction_id Identifier of the current transaction + @param fragments Vector of fragment seqnos to be removed + */ + int remove_fragments(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + const std::vector& fragments); + + /** + Replay a transaction from stored fragments. The caller must have + started a transaction for a thd. + + @param thd Pointer to THD object + @param ws_meta Write set meta data for commit fragment. + @param fragments Vector of fragments to be replayed + + @return Zero on success, non-zero on failure. + */ + int replay_transaction(THD* thd, + Relay_log_info* rli, + const wsrep::ws_meta& ws_meta, + const std::vector& fragments); + + /** + Recover streaming transactions from SR table. + This method should be called after storage enignes are initialized. + It will scan SR table and replay found streaming transactions. + + @param orig_thd The THD object of the calling thread. + + @return Zero on success, non-zero on failure. + */ + int recover_sr_transactions(THD* orig_thd); + + + /** + Delete all rows on bootstrap from `wsrep_allowlist` variable + */ + void clear_allowlist(); + + /** + Store allowlist ip on bootstrap from `wsrep_allowlist` variable + */ + void store_allowlist(std::vector& ip_allowlist); + + /** + Scan white list table against accepted connection. Allow if ip + is found in table or if table is empty. + + @param key Which allowlist column to compare + @param value Value to be checked against allowlist + + @return True if found or empty table, false on not found + */ + bool allowlist_check(Wsrep_allowlist_key key, const std::string& val); + + private: + /* Non-copyable */ + Wsrep_schema(const Wsrep_schema&); + Wsrep_schema& operator=(const Wsrep_schema&); +}; + +extern Wsrep_schema* wsrep_schema; + +#endif /* !WSREP_SCHEMA_H */ diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc new file mode 100644 index 00000000..6f902130 --- /dev/null +++ b/sql/wsrep_server_service.cc @@ -0,0 +1,411 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_global.h" +#include "wsrep_server_service.h" +#include "wsrep_server_state.h" +#include "wsrep_client_state.h" +#include "wsrep_client_service.h" +#include "wsrep_storage_service.h" +#include "wsrep_high_priority_service.h" + +#include "wsrep_sst.h" +#include "wsrep_xid.h" +#include "wsrep_mysqld.h" +#include "wsrep_schema.h" +#include "wsrep_utils.h" +#include "wsrep_thd.h" + +#include "log.h" /* sql_print_xxx() */ +#include "sql_class.h" /* system variables */ +#include "transaction.h" /* trans_xxx */ +#include "sql_base.h" /* close_thread_tables */ +#include "debug_sync.h" + +static void init_service_thd(THD* thd, char* thread_stack) +{ + thd->thread_stack= thread_stack; + thd->real_id= pthread_self(); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + thd->set_command(COM_SLEEP); + thd->reset_for_next_command(true); + server_threads.insert(thd); // as wsrep_innobase_kill_one_trx() uses find_thread_by_id() +} + +Wsrep_storage_service* +wsrep_create_storage_service(THD* orig_THD, const char* ctx) +{ + THD* thd= new THD(true, true); + init_service_thd(thd, orig_THD->thread_stack); + WSREP_DEBUG("Created storage service in %s context with thread id %llu", + ctx, thd->thread_id); + /* Use variables from the current thd attached to client_service. + This is because we need to be able to BF abort storage access + operations. */ + wsrep_assign_from_threadvars(thd); + return new Wsrep_storage_service(thd); +} + +wsrep::storage_service* Wsrep_server_service::storage_service( + wsrep::client_service& client_service) +{ + Wsrep_client_service& cs= + static_cast(client_service); + return wsrep_create_storage_service(cs.m_thd, "local"); +} + +wsrep::storage_service* Wsrep_server_service::storage_service( + wsrep::high_priority_service& high_priority_service) +{ + Wsrep_high_priority_service& hps= + static_cast(high_priority_service); + return wsrep_create_storage_service(hps.m_thd, "high priority"); +} + +void Wsrep_server_service::release_storage_service( + wsrep::storage_service* storage_service) +{ + Wsrep_storage_service* ss= + static_cast(storage_service); + THD* thd= ss->m_thd; + wsrep_reset_threadvars(thd); + server_threads.erase(thd); + delete ss; + delete thd; +} + +Wsrep_applier_service* +wsrep_create_streaming_applier(THD *orig_thd, const char *ctx) +{ + /* Reset variables to allow creating new variables in thread local + storage for new THD if needed. Note that reset must be done for + current_thd, as orig_thd may not be in effect. This may be the case when + streaming transaction is BF aborted and streaming applier + is created from BF aborter context. */ + Wsrep_threadvars saved_threadvars(wsrep_save_threadvars()); + if (saved_threadvars.cur_thd) + wsrep_reset_threadvars(saved_threadvars.cur_thd); + THD *thd= 0; + Wsrep_applier_service *ret= 0; + if (!wsrep_create_threadvars() && + (thd= new THD(next_thread_id(), true))) + { + init_service_thd(thd, orig_thd->thread_stack); + wsrep_assign_from_threadvars(thd); + WSREP_DEBUG("Created streaming applier service in %s context with " + "thread id %llu", ctx, thd->thread_id); + if (!(ret= new (std::nothrow) Wsrep_applier_service(thd))) + { + delete thd; + } + } + /* Restore original thread local storage state before returning. */ + wsrep_restore_threadvars(saved_threadvars); + if (saved_threadvars.cur_thd) + wsrep_store_threadvars(saved_threadvars.cur_thd); + return ret; +} + +wsrep::high_priority_service* +Wsrep_server_service::streaming_applier_service( + wsrep::client_service& orig_client_service) +{ + Wsrep_client_service& orig_cs= + static_cast(orig_client_service); + return wsrep_create_streaming_applier(orig_cs.m_thd, "local"); +} + +wsrep::high_priority_service* +Wsrep_server_service::streaming_applier_service( + wsrep::high_priority_service& orig_high_priority_service) +{ + Wsrep_high_priority_service& + orig_hps(static_cast(orig_high_priority_service)); + return wsrep_create_streaming_applier(orig_hps.m_thd, "high priority"); +} + +void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_service* high_priority_service) +{ + Wsrep_high_priority_service* hps= + static_cast(high_priority_service); + THD* thd= hps->m_thd; + delete hps; + wsrep_store_threadvars(thd); + server_threads.erase(thd); + delete thd; + wsrep_delete_threadvars(); +} + +void Wsrep_server_service::background_rollback( + wsrep::unique_lock &lock WSREP_UNUSED, + wsrep::client_state &client_state) +{ + DBUG_ASSERT(lock.owns_lock()); + Wsrep_client_state &cs= static_cast(client_state); + mysql_mutex_assert_owner(&cs.thd()->LOCK_thd_data); + wsrep_fire_rollbacker(cs.thd()); +} + +void Wsrep_server_service::bootstrap() +{ + wsrep::log_info() + << "Bootstrapping a new cluster, setting initial position to " + << wsrep::gtid::undefined(); + wsrep_set_SE_checkpoint(wsrep::gtid::undefined(), wsrep_gtid_server.undefined()); +} + +void Wsrep_server_service::log_message(enum wsrep::log::level level, + const char* message) +{ + switch (level) + { + case wsrep::log::debug: + WSREP_DEBUG("%s", message); + break; + case wsrep::log::info: + WSREP_INFO("%s", message); + break; + case wsrep::log::warning: + WSREP_WARN("%s", message); + break; + case wsrep::log::error: + WSREP_ERROR("%s", message); + break; + case wsrep::log::unknown: + WSREP_UNKNOWN("%s", message); + break; + } +} + +void Wsrep_server_service::log_view( + wsrep::high_priority_service* high_priority_service, + const wsrep::view& view) +{ + Wsrep_high_priority_service* applier= + static_cast(high_priority_service); + /* Update global system variables */ + mysql_mutex_lock(&LOCK_global_system_variables); + if (wsrep_auto_increment_control && view.own_index() >= 0) + { + global_system_variables.auto_increment_offset= view.own_index() + 1; + global_system_variables.auto_increment_increment= view.members().size(); + wsrep_protocol_version= view.protocol_version(); + } + mysql_mutex_unlock(&LOCK_global_system_variables); + + /* Update wsrep status variables */ + mysql_mutex_lock(&LOCK_status); + wsrep_cluster_size= view.members().size(); + wsrep_local_index= view.own_index(); + std::ostringstream os; + os << view.state_id().id(); + wsrep_update_cluster_state_uuid(os.str().c_str()); + mysql_mutex_unlock(&LOCK_status); + wsrep_config_state->set(view); + wsrep_cluster_conf_id= view.view_seqno().get(); + + if (view.status() == wsrep::view::primary) + { + if (applier) + { + Wsrep_id id; + Wsrep_view prev_view= wsrep_schema->restore_view(applier->m_thd, id); + bool checkpoint_was_reset= false; + if (prev_view.state_id().id() != view.state_id().id()) + { + WSREP_DEBUG("New cluster UUID was generated, resetting position info"); + wsrep_set_SE_checkpoint(wsrep::gtid::undefined(), wsrep_gtid_server.undefined()); + checkpoint_was_reset= true; + } + + if (wsrep_debug) + { + std::ostringstream os; + os << "Storing cluster view:\n" << view; + WSREP_INFO("%s", os.str().c_str()); + DBUG_ASSERT(prev_view.state_id().id() != view.state_id().id() || + view.state_id().seqno().get() >= prev_view.state_id().seqno().get()); + } + + if (trans_begin(applier->m_thd, MYSQL_START_TRANS_OPT_READ_WRITE)) + { + WSREP_WARN("Failed to start transaction for store view"); + } + else + { + if (wsrep_schema->store_view(applier->m_thd, view)) + { + WSREP_WARN("Failed to store view"); + trans_rollback_stmt(applier->m_thd); + if (!trans_rollback(applier->m_thd)) + { + close_thread_tables(applier->m_thd); + } + } + else + { + if (trans_commit(applier->m_thd)) + { + WSREP_WARN("Failed to commit transaction for store view"); + } + } + applier->m_thd->release_transactional_locks(); + } + + /* + Backwards compatibility: When running in mixed cluster with + Galera 3.x, the provider does not generate unique sequence numbers + for views. This condition can be checked by inspecting last + committed as returned by the provider. If the last_committed + matches to view state_id seqno, the cluster runs in backwards + compatibility mode and we skip setting the checkpoint for + view. + */ + wsrep::seqno last_committed= + Wsrep_server_state::instance().provider().last_committed_gtid().seqno(); + if (checkpoint_was_reset || last_committed != view.state_id().seqno()) + { + wsrep_set_SE_checkpoint(view.state_id(), wsrep_gtid_server.gtid()); + } + DBUG_ASSERT(wsrep_get_SE_checkpoint().id() == view.state_id().id()); + } + else + { + WSREP_DEBUG("No applier in Wsrep_server_service::log_view(), " + "skipping write to wsrep_schema"); + } + } +} + +void Wsrep_server_service::recover_streaming_appliers(wsrep::client_service& cs) +{ + Wsrep_client_service& client_service= static_cast(cs); + wsrep_recover_sr_from_storage(client_service.m_thd); +} + +void Wsrep_server_service::recover_streaming_appliers( + wsrep::high_priority_service& hs) +{ + Wsrep_high_priority_service& high_priority_service= + static_cast(hs); + wsrep_recover_sr_from_storage(high_priority_service.m_thd); +} + +wsrep::view Wsrep_server_service::get_view(wsrep::client_service& c, + const wsrep::id& own_id) +{ + Wsrep_client_service& cs(static_cast(c)); + wsrep::view v(wsrep_schema->restore_view(cs.m_thd, own_id)); + return v; +} + +wsrep::gtid Wsrep_server_service::get_position(wsrep::client_service&) +{ + return wsrep_get_SE_checkpoint(); +} + +void Wsrep_server_service::set_position(wsrep::client_service& c WSREP_UNUSED, + const wsrep::gtid& gtid) +{ + Wsrep_client_service& cs WSREP_UNUSED (static_cast(c)); + DBUG_ASSERT(cs.m_client_state.transaction().state() + == wsrep::transaction::s_aborted); + // Wait until all prior committers have finished. + wsrep::gtid wait_for(gtid.id(), + wsrep::seqno(gtid.seqno().get() - 1)); + if (auto err = Wsrep_server_state::instance().provider() + .wait_for_gtid(wait_for, std::numeric_limits::max())) + { + WSREP_WARN("Wait for gtid returned error %d while waiting for " + "prior transactions to commit before setting position", err); + } + wsrep_set_SE_checkpoint(gtid, wsrep_gtid_server.gtid()); +} + +void Wsrep_server_service::log_state_change( + enum Wsrep_server_state::state prev_state, + enum Wsrep_server_state::state current_state) +{ + WSREP_INFO("Server status change %s -> %s", + wsrep::to_c_string(prev_state), + wsrep::to_c_string(current_state)); + mysql_mutex_lock(&LOCK_status); + switch (current_state) + { + case Wsrep_server_state::s_synced: + wsrep_ready= TRUE; + WSREP_INFO("Synchronized with group, ready for connections"); + wsrep_ready_set(true); + /* fall through */ + case Wsrep_server_state::s_joined: + case Wsrep_server_state::s_donor: + wsrep_cluster_status= "Primary"; + break; + case Wsrep_server_state::s_connected: + wsrep_cluster_status= "non-Primary"; + wsrep_ready_set(false); + wsrep_connected= TRUE; + break; + case Wsrep_server_state::s_disconnected: + wsrep_ready_set(false); + wsrep_connected= FALSE; + wsrep_cluster_status= "Disconnected"; + break; + default: + wsrep_ready_set(false); + wsrep_cluster_status= "non-Primary"; + break; + } + mysql_mutex_unlock(&LOCK_status); + wsrep_config_state->set(current_state); +} + +bool Wsrep_server_service::sst_before_init() const +{ + return wsrep_before_SE(); +} + +std::string Wsrep_server_service::sst_request() +{ + return wsrep_sst_prepare(); +} + +int Wsrep_server_service::start_sst(const std::string& sst_request, + const wsrep::gtid& gtid, + bool bypass) +{ + return wsrep_sst_donate(sst_request, gtid, bypass); +} + +int Wsrep_server_service::wait_committing_transactions(int timeout) +{ + return wsrep_wait_committing_connections_close(timeout); +} + +void Wsrep_server_service::debug_sync(const char* sync_point) +{ +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF(sync_point, { + std::stringstream dbug_action; + dbug_action << "now " + << "SIGNAL " << sync_point << "_reached " + << "WAIT_FOR " << sync_point << "_continue"; + const std::string& action(dbug_action.str()); + DBUG_ASSERT(!debug_sync_set_action(current_thd, + action.c_str(), + action.length())); + };); +#endif +} diff --git a/sql/wsrep_server_service.h b/sql/wsrep_server_service.h new file mode 100644 index 00000000..0fc48402 --- /dev/null +++ b/sql/wsrep_server_service.h @@ -0,0 +1,102 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_SERVER_SERVICE_H +#define WSREP_SERVER_SERVICE_H + +/* wsrep-lib */ +#include "wsrep/server_service.hpp" +#include "wsrep/exception.hpp" // not_impemented_error(), remove when finished +#include "wsrep/storage_service.hpp" + +class Wsrep_server_state; + + +/* wsrep::server_service interface implementation */ +class Wsrep_server_service : public wsrep::server_service +{ +public: + Wsrep_server_service(Wsrep_server_state& server_state) + : m_server_state(server_state) + { } + + wsrep::storage_service* storage_service(wsrep::client_service&); + + wsrep::storage_service* storage_service(wsrep::high_priority_service&); + + void release_storage_service(wsrep::storage_service*); + + wsrep::high_priority_service* + streaming_applier_service(wsrep::client_service&); + + wsrep::high_priority_service* + streaming_applier_service(wsrep::high_priority_service&); + + void release_high_priority_service(wsrep::high_priority_service*); + + void background_rollback(wsrep::unique_lock &, + wsrep::client_state &); + + void bootstrap(); + void log_message(enum wsrep::log::level, const char*); + + void log_dummy_write_set(wsrep::client_state&, const wsrep::ws_meta&) + { throw wsrep::not_implemented_error(); } + + void log_view(wsrep::high_priority_service*, const wsrep::view&); + + void recover_streaming_appliers(wsrep::client_service&); + void recover_streaming_appliers(wsrep::high_priority_service&); + wsrep::view get_view(wsrep::client_service&, const wsrep::id& own_id); + + wsrep::gtid get_position(wsrep::client_service&); + void set_position(wsrep::client_service&, const wsrep::gtid&); + + void log_state_change(enum wsrep::server_state::state, + enum wsrep::server_state::state); + + bool sst_before_init() const; + + std::string sst_request(); + int start_sst(const std::string&, const wsrep::gtid&, bool); + + int wait_committing_transactions(int); + + void debug_sync(const char*); +private: + Wsrep_server_state& m_server_state; +}; + +/** + Helper method to create new streaming applier. + + @param orig_thd Original thd context to copy operation context from. + @param ctx Context string for debug logging. + */ +class Wsrep_applier_service; +Wsrep_applier_service* +wsrep_create_streaming_applier(THD *orig_thd, const char *ctx); + +/** + Helper method to create new storage service. + + @param orig_thd Original thd context to copy operation context from. + @param ctx Context string for debug logging. +*/ +class Wsrep_storage_service; +Wsrep_storage_service* +wsrep_create_storage_service(THD *orig_thd, const char *ctx); + +#endif /* WSREP_SERVER_SERVICE */ diff --git a/sql/wsrep_server_state.cc b/sql/wsrep_server_state.cc new file mode 100644 index 00000000..e1730423 --- /dev/null +++ b/sql/wsrep_server_state.cc @@ -0,0 +1,99 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_global.h" +#include "wsrep_api.h" +#include "wsrep_server_state.h" +#include "wsrep_allowlist_service.h" +#include "wsrep_binlog.h" /* init/deinit group commit */ + +mysql_mutex_t LOCK_wsrep_server_state; +mysql_cond_t COND_wsrep_server_state; + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key key_LOCK_wsrep_server_state; +PSI_cond_key key_COND_wsrep_server_state; +#endif + +wsrep::provider::services Wsrep_server_state::m_provider_services; + +Wsrep_server_state::Wsrep_server_state(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version) + : wsrep::server_state(m_mutex, + m_cond, + m_service, + NULL, + name, + incoming_address, + address, + working_dir, + initial_position, + max_protocol_version, + wsrep::server_state::rm_sync) + , m_mutex(&LOCK_wsrep_server_state) + , m_cond(&COND_wsrep_server_state) + , m_service(*this) +{ } + +Wsrep_server_state::~Wsrep_server_state() = default; + +void Wsrep_server_state::init_once(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version) +{ + if (m_instance == 0) + { + mysql_mutex_init(key_LOCK_wsrep_server_state, &LOCK_wsrep_server_state, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_server_state, &COND_wsrep_server_state, 0); + m_instance = new Wsrep_server_state(name, + incoming_address, + address, + working_dir, + initial_position, + max_protocol_version); + } +} + +void Wsrep_server_state::destroy() +{ + if (m_instance) + { + delete m_instance; + m_instance= 0; + mysql_mutex_destroy(&LOCK_wsrep_server_state); + mysql_cond_destroy(&COND_wsrep_server_state); + } +} + +void Wsrep_server_state::init_provider_services() +{ + m_provider_services.allowlist_service= wsrep_allowlist_service_init(); +} + +void Wsrep_server_state::deinit_provider_services() +{ + if (m_provider_services.allowlist_service) + wsrep_allowlist_service_deinit(); + m_provider_services= wsrep::provider::services(); +} + diff --git a/sql/wsrep_server_state.h b/sql/wsrep_server_state.h new file mode 100644 index 00000000..8759f7a9 --- /dev/null +++ b/sql/wsrep_server_state.h @@ -0,0 +1,83 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_SERVER_STATE_H +#define WSREP_SERVER_STATE_H + +/* wsrep-lib */ +#include "wsrep/server_state.hpp" +#include "wsrep/provider.hpp" + +/* implementation */ +#include "wsrep_server_service.h" +#include "wsrep_mutex.h" +#include "wsrep_condition_variable.h" + +class Wsrep_server_state : public wsrep::server_state +{ +public: + static void init_once(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version); + static void destroy(); + + static Wsrep_server_state& instance() + { + return *m_instance; + } + + static bool is_inited() + { + return (m_instance != NULL); + } + + static wsrep::provider& get_provider() + { + return instance().provider(); + } + + static bool has_capability(int capability) + { + return (get_provider().capabilities() & capability); + } + + static void init_provider_services(); + static void deinit_provider_services(); + + static const wsrep::provider::services& provider_services() + { + return m_provider_services; + } + +private: + Wsrep_server_state(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version); + ~Wsrep_server_state(); + Wsrep_mutex m_mutex; + Wsrep_condition_variable m_cond; + Wsrep_server_service m_service; + static wsrep::provider::services m_provider_services; + static Wsrep_server_state* m_instance; + +}; + +#endif // WSREP_SERVER_STATE_H diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc new file mode 100644 index 00000000..db138f25 --- /dev/null +++ b/sql/wsrep_sst.cc @@ -0,0 +1,2161 @@ +/* Copyright 2008-2022 Codership Oy + Copyright (c) 2008, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mariadb.h" +#include "wsrep_sst.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "wsrep_priv.h" +#include "wsrep_utils.h" +#include "wsrep_xid.h" +#include "wsrep_thd.h" +#include "wsrep_server_state.h" + +#include +#include +#include "debug_sync.h" + +#include + +static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 + + sizeof(WSREP_SST_OPT_CONF) + + sizeof(WSREP_SST_OPT_CONF_SUFFIX) + + sizeof(WSREP_SST_OPT_CONF_EXTRA)]= {0}; + +const char* wsrep_sst_method = WSREP_SST_DEFAULT; +const char* wsrep_sst_receive_address= WSREP_SST_ADDRESS_AUTO; +const char* wsrep_sst_donor = ""; +const char* wsrep_sst_auth = NULL; + +// container for real auth string +static const char* sst_auth_real = NULL; +my_bool wsrep_sst_donor_rejects_queries= FALSE; + +#define WSREP_EXTEND_TIMEOUT_INTERVAL 60 +#define WSREP_TIMEDWAIT_SECONDS 30 + +bool sst_joiner_completed = false; +bool sst_donor_completed = false; + +struct sst_thread_arg +{ + const char* cmd; + char** env; + char* ret_str; + int err; + mysql_mutex_t lock; + mysql_cond_t cond; + + sst_thread_arg (const char* c, char** e) + : cmd(c), env(e), ret_str(0), err(-1) + { + mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL); + } + + ~sst_thread_arg() + { + mysql_cond_destroy (&cond); + mysql_mutex_unlock (&lock); + mysql_mutex_destroy (&lock); + } +}; + +static void wsrep_donor_monitor_end(void) +{ + mysql_mutex_lock(&LOCK_wsrep_donor_monitor); + sst_donor_completed= true; + mysql_cond_signal(&COND_wsrep_donor_monitor); + mysql_mutex_unlock(&LOCK_wsrep_donor_monitor); +} + +static void wsrep_joiner_monitor_end(void) +{ + mysql_mutex_lock(&LOCK_wsrep_joiner_monitor); + sst_joiner_completed= true; + mysql_cond_signal(&COND_wsrep_joiner_monitor); + mysql_mutex_unlock(&LOCK_wsrep_joiner_monitor); +} + +static void* wsrep_sst_donor_monitor_thread(void *arg __attribute__((unused))) +{ + int ret= 0; + unsigned long time_waited= 0; + + mysql_mutex_lock(&LOCK_wsrep_donor_monitor); + + WSREP_INFO("Donor monitor thread started to monitor"); + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + + while (!sst_donor_completed) + { + timespec ts; + set_timespec(ts, WSREP_TIMEDWAIT_SECONDS); + time_t start_time= time(NULL); + ret= mysql_cond_timedwait(&COND_wsrep_donor_monitor, &LOCK_wsrep_donor_monitor, &ts); + time_t end_time= time(NULL); + time_waited+= difftime(end_time, start_time); + + if (ret == ETIMEDOUT && !sst_donor_completed) + { + WSREP_DEBUG("Donor waited %lu sec, extending systemd startup timeout as SST" + "is not completed", + time_waited); + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP state transfer ongoing..."); + } + } + + WSREP_INFO("Donor monitor thread ended with total time %lu sec", time_waited); + mysql_mutex_unlock(&LOCK_wsrep_donor_monitor); + + return NULL; +} + +static void* wsrep_sst_joiner_monitor_thread(void *arg __attribute__((unused))) +{ + int ret= 0; + unsigned long time_waited= 0; + + mysql_mutex_lock(&LOCK_wsrep_joiner_monitor); + + WSREP_INFO("Joiner monitor thread started to monitor"); + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + + while (!sst_joiner_completed) + { + timespec ts; + set_timespec(ts, WSREP_TIMEDWAIT_SECONDS); + time_t start_time= time(NULL); + ret= mysql_cond_timedwait(&COND_wsrep_joiner_monitor, &LOCK_wsrep_joiner_monitor, &ts); + time_t end_time= time(NULL); + time_waited+= difftime(end_time, start_time); + + if (ret == ETIMEDOUT && !sst_joiner_completed) + { + WSREP_DEBUG("Joiner waited %lu sec, extending systemd startup timeout as SST" + "is not completed", + time_waited); + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP state transfer ongoing..."); + } + } + + WSREP_INFO("Joiner monitor thread ended with total time %lu sec", time_waited); + mysql_mutex_unlock(&LOCK_wsrep_joiner_monitor); + + return NULL; +} + +/* return true if character can be a part of a filename */ +static bool filename_char(int const c) +{ + return isalnum(c) || (c == '-') || (c == '_') || (c == '.'); +} + +/* return true if character can be a part of an address string */ +static bool address_char(int const c) +{ + return filename_char(c) || + (c == ':') || (c == '[') || (c == ']') || (c == '/'); +} + +static bool check_request_str(const char* const str, + bool (*check) (int c), + bool log_warn = true) +{ + for (size_t i(0); str[i] != '\0'; ++i) + { + if (!check(str[i])) + { + if (log_warn) WSREP_WARN("Illegal character in state transfer request: %i (%c).", + str[i], str[i]); + return true; + } + } + + return false; +} + +bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var) +{ + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length == 0 )) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; + } + + /* check also that method name is alphanumeric string */ + if (check_request_str(var->save_result.string_value.str, + filename_char, false)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; + } + + return 0; +} + +static const char* data_home_dir; + +void wsrep_set_data_home_dir(const char *data_dir) +{ + data_home_dir= (data_dir && *data_dir) ? data_dir : NULL; +} + +static void make_wsrep_defaults_file() +{ + if (!wsrep_defaults_file[0]) + { + char *ptr= wsrep_defaults_file; + char *end= ptr + sizeof(wsrep_defaults_file); + if (my_defaults_file) + ptr= strxnmov(ptr, end - ptr, + WSREP_SST_OPT_CONF, " '", my_defaults_file, "' ", NULL); + + if (my_defaults_extra_file) + ptr= strxnmov(ptr, end - ptr, + WSREP_SST_OPT_CONF_EXTRA, " '", my_defaults_extra_file, "' ", NULL); + + if (my_defaults_group_suffix) + ptr= strxnmov(ptr, end - ptr, + WSREP_SST_OPT_CONF_SUFFIX, " '", my_defaults_group_suffix, "' ", NULL); + } +} + + +bool wsrep_sst_receive_address_check (sys_var *self, THD* thd, set_var* var) +{ + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + { + goto err; + } + + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_sst_receive_address_update (sys_var *self, THD* thd, + enum_var_type type) +{ + return 0; +} + +bool wsrep_sst_auth_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +static bool sst_auth_real_set (const char* value) +{ + const char* v= NULL; + + if (value) + { + v= my_strdup(PSI_INSTRUMENT_ME, value, MYF(0)); + } + else // its NULL + { + wsrep_sst_auth_free(); + return 0; + } + + if (v) + { + // set sst_auth_real + if (sst_auth_real) { my_free((void *) sst_auth_real); } + sst_auth_real= v; + + // mask wsrep_sst_auth + if (strlen(sst_auth_real)) + { + if (wsrep_sst_auth) { my_free((void*) wsrep_sst_auth); } + wsrep_sst_auth= my_strdup(PSI_INSTRUMENT_ME, WSREP_SST_AUTH_MASK, MYF(0)); + } + else + { + if (wsrep_sst_auth) { my_free((void*) wsrep_sst_auth); } + wsrep_sst_auth= NULL; + } + + return 0; + } + return 1; +} + +void wsrep_sst_auth_free() +{ + if (wsrep_sst_auth) { my_free((void *) wsrep_sst_auth); } + if (sst_auth_real) { my_free((void *) sst_auth_real); } + wsrep_sst_auth= NULL; + sst_auth_real= NULL; +} + +bool wsrep_sst_auth_update (sys_var *self, THD* thd, enum_var_type type) +{ + return sst_auth_real_set (wsrep_sst_auth); +} + +void wsrep_sst_auth_init () +{ + sst_auth_real_set(wsrep_sst_auth); +} + +bool wsrep_sst_donor_check (sys_var *self, THD* thd, set_var* var) +{ + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN -1))) // safety + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; + } + + return 0; +} + +bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + + +bool wsrep_before_SE() +{ + return (wsrep_provider != NULL + && strcmp (wsrep_provider, WSREP_NONE) + && strcmp (wsrep_sst_method, WSREP_SST_SKIP) + && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP)); +} + +// Signal end of SST +static bool wsrep_sst_complete (THD* thd, + int const rcode, + wsrep::gtid const sst_gtid) +{ + Wsrep_client_service client_service(thd, thd->wsrep_cs()); + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + enum wsrep::server_state::state state= server_state.state(); + bool failed= false; + char start_pos_buf[FN_REFLEN]; + ssize_t len= wsrep::print_to_c_str(sst_gtid, start_pos_buf, FN_REFLEN-1); + start_pos_buf[len]='\0'; + + // Do not call sst_received if we are not in joiner or + // initialized state on server. This is because it + // assumes we are on those states. Give error if we are + // in incorrect state. + if ((state == Wsrep_server_state::s_joiner || + state == Wsrep_server_state::s_initialized)) + { + if (Wsrep_server_state::instance().sst_received(client_service, rcode)) + { + failed= true; + } + else + { + WSREP_INFO("SST succeeded for position %s", start_pos_buf); + } + } + else + { + WSREP_ERROR("SST failed for position %s initialized %d server_state %s", + start_pos_buf, + server_state.is_initialized(), + wsrep::to_c_string(state)); + failed= true; + } + + wsrep_joiner_monitor_end(); + return failed; +} + + /* + If wsrep provider is loaded, inform that the new state snapshot + has been received. Also update the local checkpoint. + + @param thd [IN] + @param uuid [IN] Initial state UUID + @param seqno [IN] Initial state sequence number + @param state [IN] Always NULL, also ignored by wsrep provider (?) + @param state_len [IN] Always 0, also ignored by wsrep provider (?) + @return true when successful, false if error +*/ +bool wsrep_sst_received (THD* thd, + const wsrep_uuid_t& uuid, + wsrep_seqno_t const seqno, + const void* const state, + size_t const state_len) +{ + bool error= false; + /* + To keep track of whether the local uuid:seqno should be updated. Also, note + that local state (uuid:seqno) is updated/checkpointed only after we get an + OK from wsrep provider. By doing so, the values remain consistent across + the server & wsrep provider. + */ + /* + TODO: Handle backwards compatibility. WSREP API v25 does not have + wsrep schema. + */ + /* + Logical SST methods (mysqldump etc) don't update InnoDB sys header. + Reset the SE checkpoint before recovering view in order to avoid + sanity check failure. + */ + wsrep::gtid const sst_gtid(wsrep::id(uuid.data, sizeof(uuid.data)), + wsrep::seqno(seqno)); + + if (!wsrep_before_SE()) { + wsrep_set_SE_checkpoint(wsrep::gtid::undefined(), wsrep_gtid_server.undefined()); + wsrep_set_SE_checkpoint(sst_gtid, wsrep_gtid_server.gtid()); + } + wsrep_verify_SE_checkpoint(uuid, seqno); + + /* + Both wsrep_init_SR() and wsrep_recover_view() may use + wsrep thread pool. Restore original thd context before returning. + */ + if (thd) { + wsrep_store_threadvars(thd); + } + else { + set_current_thd(nullptr); + } + + /* During sst WSREP(thd) is not yet set for joiner. */ + if (WSREP_ON) + { + int const rcode(seqno < 0 ? seqno : 0); + error= wsrep_sst_complete(thd,rcode, sst_gtid); + } + + return error; +} + +static int sst_scan_uuid_seqno (const char* str, + wsrep_uuid_t* uuid, wsrep_seqno_t* seqno) +{ + int offt= wsrep_uuid_scan (str, strlen(str), uuid); + errno= 0; /* Reset the errno */ + if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt]) + { + *seqno= strtoll (str + offt + 1, NULL, 10); + if (*seqno != LLONG_MAX || errno != ERANGE) + { + return 0; + } + } + + WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str); + return -EINVAL; +} + +// get rid of trailing \n +static char* my_fgets (char* buf, size_t buf_len, FILE* stream) +{ + char* ret= fgets (buf, buf_len, stream); + + if (ret) + { + size_t len= strlen(ret); + if (len > 0 && ret[len - 1] == '\n') ret[len - 1]= '\0'; + } + + return ret; +} + +/* + Generate "name 'value'" string. +*/ +static char* generate_name_value(const char* name, const char* value) +{ + size_t name_len= strlen(name); + size_t value_len= strlen(value); + char* buf= + (char*) my_malloc(PSI_INSTRUMENT_ME, (name_len + value_len + 5), MYF(0)); + if (buf) + { + char* ref= buf; + *ref++ = ' '; + memcpy(ref, name, name_len * sizeof(char)); + ref += name_len; + *ref++ = ' '; + *ref++ = '\''; + memcpy(ref, value, value_len * sizeof(char)); + ref += value_len; + *ref++ = '\''; + *ref = 0; + } + return buf; +} +/* + Generate binlog option string for sst_donate_other(), sst_prepare_other(). + + Returns zero on success, negative error code otherwise. + + String containing binlog name is stored in param ret if binlog is enabled + and GTID mode is on, otherwise empty string. Returned string should be + freed with my_free(). + */ +static int generate_binlog_opt_val(char** ret) +{ + DBUG_ASSERT(ret); + *ret= NULL; + if (opt_bin_log) + { + assert(opt_bin_logname); + *ret= strcmp(opt_bin_logname, "0") ? + generate_name_value(WSREP_SST_OPT_BINLOG, + opt_bin_logname) : + my_strdup(PSI_INSTRUMENT_ME, "", MYF(0)); + } + else + { + *ret= my_strdup(PSI_INSTRUMENT_ME, "", MYF(0)); + } + if (!*ret) return -ENOMEM; + return 0; +} + +static int generate_binlog_index_opt_val(char** ret) +{ + DBUG_ASSERT(ret); + *ret= NULL; + if (opt_binlog_index_name) + { + *ret= strcmp(opt_binlog_index_name, "0") ? + generate_name_value(WSREP_SST_OPT_BINLOG_INDEX, + opt_binlog_index_name) : + my_strdup(PSI_INSTRUMENT_ME, "", MYF(0)); + } + else + { + *ret= my_strdup(PSI_INSTRUMENT_ME, "", MYF(0)); + } + if (!*ret) return -ENOMEM; + return 0; +} + +// report progress event +static void sst_report_progress(int const from, + long long const total_prev, + long long const total, + long long const complete) +{ + static char buf[128] = { '\0', }; + static size_t const buf_len= sizeof(buf) - 1; + snprintf(buf, buf_len, + "{ \"from\": %d, \"to\": %d, \"total\": %lld, \"done\": %lld, " + "\"indefinite\": -1 }", + from, WSREP_MEMBER_JOINED, total_prev + total, total_prev +complete); + WSREP_DEBUG("REPORTING SST PROGRESS: '%s'", buf); +} + +// process "complete" event from SST script feedback +static void sst_handle_complete(const char* const input, + long long const total_prev, + long long* total, + long long* complete, + int const from) +{ + long long x; + int n= sscanf(input, " %lld", &x); + if (n > 0 && x > *complete) + { + *complete= x; + if (*complete > *total) *total= *complete; + sst_report_progress(from, total_prev, *total, *complete); + } +} + +// process "total" event from SST script feedback +static void sst_handle_total(const char* const input, + long long* total_prev, + long long* total, + long long* complete, + int const from) +{ + long long x; + int n= sscanf(input, " %lld", &x); + if (n > 0) + { + // new stage starts, update total_prev + *total_prev+= *total; + *total= x; + *complete= 0; + sst_report_progress(from, *total_prev, *total, *complete); + } +} + +static void* sst_joiner_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*) a; + int err= 1; + + { + THD* thd; + static const char magic[]= "ready"; + static const size_t magic_len= sizeof(magic) - 1; + const size_t out_len= 512; + char out[out_len]; + + WSREP_INFO("Running: '%s'", arg->cmd); + + wsp::process proc (arg->cmd, "r", arg->env); + + if (proc.pipe() && !proc.error()) + { + const char* tmp= my_fgets (out, out_len, proc.pipe()); + + if (!tmp || strlen(tmp) < (magic_len + 2) || + strncasecmp (tmp, magic, magic_len)) + { + WSREP_ERROR("Failed to read '%s ' from: %s\n\tRead: '%s'", + magic, arg->cmd, tmp); + proc.wait(); + if (proc.error()) err= proc.error(); + } + else + { + err= 0; + } + } + else + { + err= proc.error(); + WSREP_ERROR("Failed to execute: %s : %d (%s)", + arg->cmd, err, strerror(err)); + } + + /* + signal sst_prepare thread with ret code, + it will go on sending SST request + */ + mysql_mutex_lock (&arg->lock); + if (!err) + { + arg->ret_str= strdup (out + magic_len + 1); + if (!arg->ret_str) err= ENOMEM; + } + arg->err= -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (err) return NULL; /* lp:808417 - return immediately, don't signal + * initializer thread to ensure single thread of + * shutdown. */ + + wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; + + // current stage progress + long long total= 0; + long long complete= 0; + // previous stages cumulative progress + long long total_prev= 0; + + // in case of successful receiver start, wait for SST completion/end + const char* tmp= NULL; + err= EINVAL; + + wait_signal: + tmp= my_fgets (out, out_len, proc.pipe()); + + if (tmp) + { + static const char magic_total[]= "total"; + static const size_t total_len=strlen(magic_total); + static const char magic_complete[]= "complete"; + static const size_t complete_len=strlen(magic_complete); + static const int from= WSREP_MEMBER_JOINER; + + if (!strncasecmp (tmp, magic_complete, complete_len)) + { + sst_handle_complete(tmp + complete_len, total_prev, &total, &complete, + from); + goto wait_signal; + } + else if (!strncasecmp (tmp, magic_total, total_len)) + { + sst_handle_total(tmp + total_len, &total_prev, &total, &complete, from); + goto wait_signal; + } + } + else + { + WSREP_ERROR("Failed to read uuid:seqno and wsrep_gtid_domain_id from " + "joiner script."); + proc.wait(); + if (proc.error()) err= proc.error(); + } + + // this should be the final script output with GTID + if (tmp) + { + proc.wait(); + // Read state ID (UUID:SEQNO) followed by wsrep_gtid_domain_id (if any). + const char *pos= strchr(out, ' '); + + if (!pos) { + + if (wsrep_gtid_mode) + { + // There is no wsrep_gtid_domain_id (some older version SST script?). + WSREP_WARN("Did not find domain ID from SST script output '%s'. " + "Domain ID must be set manually to keep binlog consistent", + out); + } + err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); + + } else { + // Scan state ID first followed by wsrep_gtid_domain_id. + unsigned long int domain_id; + + // Null-terminate the state-id. + out[pos - out]= 0; + err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); + + if (err) + { + goto err; + } + else if (wsrep_gtid_mode) + { + errno= 0; /* Reset the errno */ + domain_id= strtoul(pos + 1, NULL, 10); + err= errno; + + /* Check if we received a valid gtid_domain_id. */ + if (err == EINVAL || err == ERANGE) + { + WSREP_ERROR("Failed to get donor wsrep_gtid_domain_id."); + err= EINVAL; + goto err; + } else { + wsrep_gtid_server.domain_id= (uint32) domain_id; + wsrep_gtid_domain_id= (uint32)domain_id; + } + } + } + } + +err: + + wsrep::gtid ret_gtid; + + if (err) + { + ret_gtid= wsrep::gtid::undefined(); + } + else + { + ret_gtid= wsrep::gtid(wsrep::id(ret_uuid.data, sizeof(ret_uuid.data)), + wsrep::seqno(ret_seqno)); + } + + /* + Tell initializer thread that SST is complete + For that initialize a THD + */ + if (my_thread_init()) + { + WSREP_ERROR("my_thread_init() failed, can't signal end of SST. " + "Aborting."); + unireg_abort(1); + } + + thd= new THD(next_thread_id()); + + if (!thd) + { + WSREP_ERROR("Failed to allocate THD to restore view from local state, " + "can't signal end of SST. Aborting."); + unireg_abort(1); + } + + thd->thread_stack= (char*) &thd; + thd->security_ctx->skip_grants(); + thd->system_thread= SYSTEM_THREAD_GENERIC; + thd->real_id= pthread_self(); + + wsrep_assign_from_threadvars(thd); + wsrep_store_threadvars(thd); + + /* */ + thd->variables.wsrep_on = 0; + /* No binlogging */ + thd->variables.sql_log_bin = 0; + thd->variables.option_bits &= ~OPTION_BIN_LOG; + /* No general log */ + thd->variables.option_bits |= OPTION_LOG_OFF; + /* Read committed isolation to avoid gap locking */ + thd->variables.tx_isolation= ISO_READ_COMMITTED; + + wsrep_sst_complete (thd, -err, ret_gtid); + + delete thd; + my_thread_end(); + } + + return NULL; +} + +#define WSREP_SST_AUTH_ENV "WSREP_SST_OPT_AUTH" +#define WSREP_SST_REMOTE_AUTH_ENV "WSREP_SST_OPT_REMOTE_AUTH" +#define DATA_HOME_DIR_ENV "INNODB_DATA_HOME_DIR" + +static int sst_append_env_var(wsp::env& env, + const char* const var, + const char* const val) +{ + int const env_str_size= strlen(var) + 1 /* = */ + + (val ? strlen(val) : 0) + 1 /* \0 */; + + wsp::string env_str(env_str_size); // for automatic cleanup on return + if (!env_str()) return -ENOMEM; + + int ret= snprintf(env_str(), env_str_size, "%s=%s", var, val ? val : ""); + + if (ret < 0 || ret >= env_str_size) + { + WSREP_ERROR("sst_append_env_var(): snprintf(%s=%s) failed: %d", + var, val, ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + env.append(env_str()); + return -env.error(); +} + +#ifdef _WIN32 +/* + Space, single quote, ampersand, backquote, I/O redirection + characters, caret, all brackets, plus, exclamation and comma + characters require text to be enclosed in double quotes: +*/ +#define IS_SPECIAL(c) \ + (isspace(c) || c == '\'' || c == '&' || c == '`' || c == '|' || \ + c == '>' || c == '<' || c == ';' || c == '^' || \ + c == '[' || c == ']' || c == '{' || c == '}' || \ + c == '(' || c == ')' || c == '+' || c == '!' || \ + c == ',') +/* + Inside values, equals character are interpreted as special + character and requires quotation: +*/ +#define IS_SPECIAL_V(c) (IS_SPECIAL(c) || c == '=') +/* + Double quotation mark and percent characters require escaping: +*/ +#define IS_REQ_ESCAPING(c) (c == '""' || c == '%') +#else +/* + Space, single quote, ampersand, backquote, and I/O redirection + characters require text to be enclosed in double quotes. The + semicolon is used to separate shell commands, so it must be + enclosed in double quotes as well: +*/ +#define IS_SPECIAL(c) \ + (isspace(c) || c == '\'' || c == '&' || c == '`' || c == '|' || \ + c == '>' || c == '<' || c == ';') +/* + Inside values, characters are interpreted as in parameter names: +*/ +#define IS_SPECIAL_V(c) IS_SPECIAL(c) +/* + Double quotation mark and backslash characters require + backslash prefixing, the dollar symbol is used to substitute + a variable value, therefore it also requires escaping: +*/ +#define IS_REQ_ESCAPING(c) (c == '"' || c == '\\' || c == '$') +#endif + +static size_t estimate_cmd_len (bool* extra_args) +{ + /* + The length of the area reserved for the control parameters + of the SST script (excluding the copying of the original + mysqld arguments): + */ + size_t cmd_len= 4096; + bool extra= false; + /* + If mysqld was started with arguments, add them all: + */ + if (orig_argc > 1) + { + for (int i = 1; i < orig_argc; i++) + { + const char* arg= orig_argv[i]; + size_t n= strlen(arg); + if (n == 0) continue; + cmd_len += n; + bool quotation= false; + char c; + while ((c = *arg++) != 0) + { + if (IS_SPECIAL(c)) + { + quotation= true; + } + else if (IS_REQ_ESCAPING(c)) + { + cmd_len++; +#ifdef _WIN32 + quotation= true; +#endif + } + /* + If the equals symbol is encountered, then we need to separately + process the right side: + */ + else if (c == '=') + { + /* Perhaps we need to quote the left part of the argument: */ + if (quotation) + { + cmd_len += 2; + /* + Reset the quotation flag, since now the status for + the right side of the expression will be saved here: + */ + quotation= false; + } + while ((c = *arg++) != 0) + { + if (IS_SPECIAL_V(c)) + { + quotation= true; + } + else if (IS_REQ_ESCAPING(c)) + { + cmd_len++; +#ifdef _WIN32 + quotation= true; +#endif + } + } + break; + } + } + /* Perhaps we need to quote the entire argument or its right part: */ + if (quotation) + { + cmd_len += 2; + } + } + extra = true; + cmd_len += strlen(WSREP_SST_OPT_MYSQLD); + /* + Add the separating spaces between arguments, + and one additional space before "--mysqld-args": + */ + cmd_len += orig_argc; + } + *extra_args= extra; + return cmd_len; +} + +static void copy_orig_argv (char* cmd_str) +{ + /* + If mysqld was started with arguments, copy them all: + */ + if (orig_argc > 1) + { + size_t n = strlen(WSREP_SST_OPT_MYSQLD); + *cmd_str++ = ' '; + memcpy(cmd_str, WSREP_SST_OPT_MYSQLD, n * sizeof(char)); + cmd_str += n; + for (int i = 1; i < orig_argc; i++) + { + char* arg= orig_argv[i]; + n = strlen(arg); + if (n == 0) continue; + *cmd_str++ = ' '; + bool quotation= false; + bool plain= true; + char *arg_scan= arg; + char c; + while ((c = *arg_scan++) != 0) + { + if (IS_SPECIAL(c)) + { + quotation= true; + } + else if (IS_REQ_ESCAPING(c)) + { + plain= false; +#ifdef _WIN32 + quotation= true; +#endif + } + /* + If the equals symbol is encountered, then we need to separately + process the right side: + */ + else if (c == '=') + { + /* Calculate length of the Left part of the argument: */ + size_t m = (size_t) (arg_scan - arg) - 1; + if (m) + { + /* Perhaps we need to quote the left part of the argument: */ + if (quotation) + { + *cmd_str++ = '"'; + } + /* + If there were special characters inside, then we can use + the fast memcpy function: + */ + if (plain) + { + memcpy(cmd_str, arg, m * sizeof(char)); + cmd_str += m; + /* Left part of the argument has already been processed: */ + n -= m; + arg += m; + } + /* Otherwise we need to prefix individual characters: */ + else + { + n -= m; + while (m) + { + c = *arg++; + if (IS_REQ_ESCAPING(c)) + { +#ifdef _WIN32 + *cmd_str++ = c; +#else + *cmd_str++ = '\\'; +#endif + } + *cmd_str++ = c; + m--; + } + /* + Reset the plain string flag, since now the status for + the right side of the expression will be saved here: + */ + plain= true; + } + /* Perhaps we need to quote the left part of the argument: */ + if (quotation) + { + *cmd_str++ = '"'; + /* + Reset the quotation flag, since now the status for + the right side of the expression will be saved here: + */ + quotation= false; + } + } + /* Copy equals symbol: */ + *cmd_str++ = '='; + arg++; + n--; + /* Let's deal with the left side of the expression: */ + while ((c = *arg_scan++) != 0) + { + if (IS_SPECIAL_V(c)) + { + quotation= true; + } + else if (IS_REQ_ESCAPING(c)) + { + plain= false; +#ifdef _WIN32 + quotation= true; +#endif + } + } + break; + } + } + if (n) + { + /* Perhaps we need to quote the entire argument or its right part: */ + if (quotation) + { + *cmd_str++ = '"'; + } + /* + If there were no special characters inside, then we can use + the fast memcpy function: + */ + if (plain) + { + memcpy(cmd_str, arg, n * sizeof(char)); + cmd_str += n; + } + /* Otherwise we need to prefix individual characters: */ + else + { + while ((c = *arg++) != 0) + { + if (IS_REQ_ESCAPING(c)) + { +#ifdef _WIN32 + *cmd_str++ = c; +#else + *cmd_str++ = '\\'; +#endif + } + *cmd_str++ = c; + } + } + /* Perhaps we need to quote the entire argument or its right part: */ + if (quotation) + { + *cmd_str++ = '"'; + } + } + } + /* + Add a terminating null character (not counted in the length, + since we've overwritten the original null character which + was previously added by snprintf: + */ + *cmd_str = 0; + } +} + +static ssize_t sst_prepare_other (const char* method, + const char* sst_auth, + const char* addr_in, + const char** addr_out) +{ + bool extra_args; + size_t const cmd_len= estimate_cmd_len(&extra_args); + wsp::string cmd_str(cmd_len); + + if (!cmd_str()) + { + WSREP_ERROR("sst_prepare_other(): could not allocate cmd buffer of %zd bytes", + cmd_len); + return -ENOMEM; + } + + char* binlog_opt_val= NULL; + char* binlog_index_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_opt_val() failed: %d", + ret); + return ret; + } + + if ((ret= generate_binlog_index_opt_val(&binlog_index_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_index_opt_val() failed %d", + ret); + if (binlog_opt_val) my_free(binlog_opt_val); + return ret; + } + + make_wsrep_defaults_file(); + + ret= snprintf (cmd_str(), cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE " 'joiner' " + WSREP_SST_OPT_ADDR " '%s' " + WSREP_SST_OPT_DATA " '%s' " + "%s" + WSREP_SST_OPT_PARENT " %d " + WSREP_SST_OPT_PROGRESS " %d" + "%s" + "%s", + method, addr_in, mysql_real_data_home, + wsrep_defaults_file, + (int)getpid(), + wsrep_debug ? 1 : 0, + binlog_opt_val, binlog_index_opt_val); + + my_free(binlog_opt_val); + my_free(binlog_index_opt_val); + + if (ret < 0 || size_t(ret) >= cmd_len) + { + WSREP_ERROR("sst_prepare_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + if (extra_args) + copy_orig_argv(cmd_str() + ret); + + wsp::env env(NULL); + if (env.error()) + { + WSREP_ERROR("sst_prepare_other(): env. var ctor failed: %d", -env.error()); + return -env.error(); + } + + if ((ret= sst_append_env_var(env, WSREP_SST_AUTH_ENV, sst_auth))) + { + WSREP_ERROR("sst_prepare_other(): appending auth failed: %d", ret); + return ret; + } + + if (data_home_dir) + { + if ((ret= sst_append_env_var(env, DATA_HOME_DIR_ENV, data_home_dir))) + { + WSREP_ERROR("sst_prepare_other(): appending data " + "directory failed: %d", ret); + return ret; + } + } + + pthread_t tmp, monitor; + sst_thread_arg arg(cmd_str(), env()); + + mysql_mutex_lock (&arg.lock); + + ret = mysql_thread_create (key_wsrep_sst_joiner_monitor, &monitor, NULL, wsrep_sst_joiner_monitor_thread, NULL); + + if (ret) + { + WSREP_ERROR("sst_prepare_other(): mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + return -ret; + } + + sst_joiner_completed= false; + + ret= mysql_thread_create (key_wsrep_sst_joiner, &tmp, NULL, sst_joiner_thread, &arg); + + if (ret) + { + WSREP_ERROR("sst_prepare_other(): mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + + pthread_detach(monitor); + return -ret; + } + + mysql_cond_wait (&arg.cond, &arg.lock); + + *addr_out= arg.ret_str; + + if (!arg.err) + ret= strlen(*addr_out); + else + { + assert (arg.err < 0); + ret= arg.err; + } + + pthread_detach (tmp); + pthread_detach (monitor); + + return ret; +} + +extern uint mysqld_port; + +/*! Just tells donor where to send mysqldump */ +static ssize_t sst_prepare_mysqldump (const char* addr_in, + const char** addr_out) +{ + ssize_t ret= strlen (addr_in); + + if (!strrchr(addr_in, ':')) + { + ssize_t s= ret + 7; + char* tmp= (char*) malloc (s); + + if (tmp) + { + ret= snprintf (tmp, s, "%s:%u", addr_in, mysqld_port); + + if (ret > 0 && ret < s) + { + *addr_out= tmp; + return ret; + } + if (ret > 0) /* buffer too short */ ret= -EMSGSIZE; + free (tmp); + } + else { + ret= -ENOMEM; + } + + WSREP_ERROR ("Could not prepare state transfer request: " + "adding default port failed: %zd.", ret); + } + else { + *addr_out= addr_in; + } + + pthread_t monitor; + ret = mysql_thread_create (key_wsrep_sst_joiner_monitor, &monitor, NULL, wsrep_sst_joiner_monitor_thread, NULL); + + if (ret) + { + WSREP_ERROR("sst_prepare_other(): mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + return -ret; + } + + sst_joiner_completed= false; + pthread_detach (monitor); + + return ret; +} + +std::string wsrep_sst_prepare() +{ + const ssize_t ip_max= 256; + char ip_buf[ip_max]; + const char* addr_in= NULL; + const char* addr_out= NULL; + const char* method; + + if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP)) + { + return WSREP_STATE_TRANSFER_TRIVIAL; + } + + /* + Figure out SST receive address. Common for all SST methods. + */ + wsp::Address* addr_in_parser= NULL; + + // Attempt 1: wsrep_sst_receive_address + if (wsrep_sst_receive_address && + strcmp(wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO)) + { + addr_in_parser = new wsp::Address(wsrep_sst_receive_address); + + if (!addr_in_parser->is_valid()) + { + WSREP_ERROR("Could not parse wsrep_sst_receive_address : %s", + wsrep_sst_receive_address); + unireg_abort(1); + } + } + //Attempt 2: wsrep_node_address + else if (wsrep_node_address && *wsrep_node_address) + { + addr_in_parser = new wsp::Address(wsrep_node_address); + + if (addr_in_parser->is_valid()) + { + // we must not inherit the port number from this address: + addr_in_parser->set_port(0); + } + else + { + WSREP_ERROR("Could not parse wsrep_node_address : %s", + wsrep_node_address); + throw wsrep::runtime_error("Failed to prepare for SST. Unrecoverable"); + } + } + // Attempt 3: Try to get the IP from the list of available interfaces. + else + { + ssize_t ret= wsrep_guess_ip(ip_buf, ip_max); + + if (ret && ret < ip_max) + { + addr_in_parser = new wsp::Address(ip_buf); + } + else + { + WSREP_ERROR("Failed to guess address to accept state transfer. " + "wsrep_sst_receive_address must be set manually."); + throw wsrep::runtime_error("Could not prepare state transfer request"); + } + } + + assert(addr_in_parser); + + size_t len= addr_in_parser->get_address_len(); + bool is_ipv6= addr_in_parser->is_ipv6(); + const char* address= addr_in_parser->get_address(); + + if (len > (is_ipv6 ? ip_max - 2 : ip_max)) + { + WSREP_ERROR("Address to accept state transfer is too long: '%s'", + address); + unireg_abort(1); + } + + if (is_ipv6) + { + /* wsrep_sst_*.sh scripts requite ipv6 addreses to be in square breackets */ + ip_buf[0] = '['; + /* the length (len) already includes the null byte: */ + memcpy(ip_buf + 1, address, len - 1); + ip_buf[len] = ']'; + ip_buf[len + 1] = 0; + len += 2; + } + else + { + memcpy(ip_buf, address, len); + } + + int port= addr_in_parser->get_port(); + if (port) + { + size_t space= ip_max - len; + ip_buf[len - 1] = ':'; + int ret= snprintf(ip_buf + len, ip_max - len, "%d", port); + if (ret <= 0 || (size_t) ret > space) + { + WSREP_ERROR("Address to accept state transfer is too long: '%s:%d'", + address, port); + unireg_abort(1); + } + } + + delete addr_in_parser; + addr_in = ip_buf; + + ssize_t addr_len= -ENOSYS; + method = wsrep_sst_method; + if (!strcmp(method, WSREP_SST_MYSQLDUMP)) + { + addr_len= sst_prepare_mysqldump (addr_in, &addr_out); + if (addr_len < 0) + { + throw wsrep::runtime_error("Could not prepare mysqldimp address"); + } + } + else + { + /*! A heuristic workaround until we learn how to stop and start engines */ + if (Wsrep_server_state::instance().is_initialized() && + Wsrep_server_state::instance().state() == Wsrep_server_state::s_joiner) + { + if (!strcmp(method, WSREP_SST_XTRABACKUP) || + !strcmp(method, WSREP_SST_XTRABACKUPV2)) + { + WSREP_WARN("The %s SST method is deprecated, so it is automatically " + "replaced by %s", method, WSREP_SST_MARIABACKUP); + method = WSREP_SST_MARIABACKUP; + } + // we already did SST at initializaiton, now engines are running + // sql_print_information() is here because the message is too long + // for WSREP_INFO. + sql_print_information ("WSREP: " + "You have configured '%s' state snapshot transfer method " + "which cannot be performed on a running server. " + "Wsrep provider won't be able to fall back to it " + "if other means of state transfer are unavailable. " + "In that case you will need to restart the server.", + method); + return ""; + } + + addr_len = sst_prepare_other (method, sst_auth_real, + addr_in, &addr_out); + if (addr_len < 0) + { + WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.", + method); + throw wsrep::runtime_error("Failed to prepare for SST. Unrecoverable"); + } + } + + std::string ret; + ret += method; + ret.push_back('\0'); + ret += addr_out; + + const char* method_ptr(ret.data()); + const char* addr_ptr(ret.data() + strlen(method_ptr) + 1); + WSREP_DEBUG("Prepared SST request: %s|%s", method_ptr, addr_ptr); + + if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out); + + return ret; +} + +// helper method for donors +static int sst_run_shell (const char* cmd_str, char** env, int max_tries) +{ + int ret= 0; + + for (int tries=1; tries <= max_tries; tries++) + { + wsp::process proc (cmd_str, "r", env); + + if (NULL != proc.pipe()) + { + proc.wait(); + } + + if ((ret= proc.error())) + { + WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)", + tries, max_tries, proc.cmd(), ret, strerror(ret)); + sleep (1); + } + else + { + WSREP_DEBUG("SST script successfully completed."); + break; + } + } + + return -ret; +} + +static void sst_reject_queries(my_bool close_conn) +{ + WSREP_INFO("Rejecting client queries for the duration of SST."); + if (TRUE == close_conn) wsrep_close_client_connections(FALSE); +} + +static int sst_donate_mysqldump (const char* addr, + const wsrep::gtid& gtid, + bool bypass, + char** env) // carries auth info +{ + char host[256]; + wsp::Address address(addr); + if (!address.is_valid()) + { + WSREP_ERROR("Could not parse SST address : %s", addr); + return 0; + } + memcpy(host, address.get_address(), address.get_address_len()); + int port= address.get_port(); + bool extra_args; + size_t const cmd_len= estimate_cmd_len(&extra_args); + wsp::string cmd_str(cmd_len); + + if (!cmd_str()) + { + WSREP_ERROR("sst_donate_mysqldump(): " + "could not allocate cmd buffer of %zd bytes", cmd_len); + return -ENOMEM; + } + + /* + we enable new client connections so that mysqldump donation can connect in, + but we reject local connections from modifyingcdata during SST, to keep + data intact + */ + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE); + + make_wsrep_defaults_file(); + + std::ostringstream uuid_oss; + uuid_oss << gtid.id(); + int ret= snprintf (cmd_str(), cmd_len, + "wsrep_sst_mysqldump " + WSREP_SST_OPT_ADDR " '%s' " + WSREP_SST_OPT_PORT " '%u' " + WSREP_SST_OPT_LPORT " '%u' " + WSREP_SST_OPT_SOCKET " '%s' " + "%s" + WSREP_SST_OPT_GTID " '%s:%lld,%d-%d-%llu' " + WSREP_SST_OPT_GTID_DOMAIN_ID " '%d'" + "%s", + addr, port, mysqld_port, mysqld_unix_port, + wsrep_defaults_file, + uuid_oss.str().c_str(), gtid.seqno().get(), + wsrep_gtid_server.domain_id, wsrep_gtid_server.server_id, + wsrep_gtid_server.seqno(), + wsrep_gtid_server.domain_id, + bypass ? " " WSREP_SST_OPT_BYPASS : ""); + + if (ret < 0 || size_t(ret) >= cmd_len) + { + WSREP_ERROR("sst_donate_mysqldump(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + if (extra_args) + copy_orig_argv(cmd_str() + ret); + + WSREP_DEBUG("Running: '%s'", cmd_str()); + + ret= sst_run_shell (cmd_str(), env, 3); + + wsrep::gtid sst_sent_gtid(ret == 0 ? + gtid : + wsrep::gtid(gtid.id(), + wsrep::seqno::undefined())); + Wsrep_server_state::instance().sst_sent(sst_sent_gtid, ret); + + wsrep_donor_monitor_end(); + + return ret; +} + +wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + +/* + Create a file under data directory. +*/ +static int sst_create_file(const char *name, const char *content) +{ + int err= 0; + char *real_name; + char *tmp_name; + ssize_t len; + FILE *file; + + len= strlen(mysql_real_data_home) + strlen(name) + 2; + real_name= (char *) alloca(len); + + snprintf(real_name, (size_t) len, "%s/%s", mysql_real_data_home, name); + + tmp_name= (char *) alloca(len + 4); + snprintf(tmp_name, (size_t) len + 4, "%s.tmp", real_name); + + file= fopen(tmp_name, "w+"); + + if (0 == file) + { + err= errno; + WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err, strerror(err)); + } + else + { + // Write the specified content into the file. + if (content != NULL) + { + fprintf(file, "%s\n", content); + fsync(fileno(file)); + } + + fclose(file); + + if (rename(tmp_name, real_name) == -1) + { + err= errno; + WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", tmp_name, + real_name, err, strerror(err)); + } + } + + return err; +} + +static int run_sql_command(THD *thd, const char *query) +{ + thd->set_query((char *)query, strlen(query)); + + Parser_state ps; + if (ps.init(thd, thd->query(), thd->query_length())) + { + WSREP_ERROR("SST query: %s failed", query); + return -1; + } + + mysql_parse(thd, thd->query(), thd->query_length(), &ps); + if (thd->is_error()) + { + int const err= thd->get_stmt_da()->sql_errno(); + WSREP_WARN ("Error executing '%s': %d (%s)", + query, err, thd->get_stmt_da()->message()); + thd->clear_error(); + return -1; + } + return 0; +} + +static int sst_flush_tables(THD* thd) +{ + WSREP_INFO("Flushing tables for SST..."); + + int err= 0; + int not_used; + /* + Files created to notify the SST script about the outcome of table flush + operation. + */ + const char *flush_success= "tables_flushed"; + const char *flush_error= "sst_error"; + + CHARSET_INFO *current_charset= thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->cs_name.str); + thd->variables.character_set_client= &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.cs_name.str); + } + + if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK")) + { + err= -1; + } + else + { + /* + Make sure logs are flushed after global read lock acquired. In case + reload fails, we must also release the acquired FTWRL. + */ + if (reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG, + (TABLE_LIST*) 0, ¬_used)) + { + thd->global_read_lock.unlock_global_read_lock(thd); + err= -1; + } + } + + thd->variables.character_set_client= current_charset; + + if (err) + { + WSREP_ERROR("Failed to flush and lock tables"); + + /* + The SST must be aborted as the flush tables failed. Notify this to SST + script by creating the error file. + */ + int tmp; + if ((tmp= sst_create_file(flush_error, NULL))) { + err= tmp; + } + } + else + { + ha_disable_internal_writes(true); + + WSREP_INFO("Tables flushed."); + + // Create a file with cluster state ID and wsrep_gtid_domain_id. + char content[100]; + snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, + (long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id); + err= sst_create_file(flush_success, content); + + if (err) + WSREP_INFO("Creating file for flush_success failed %d",err); + + const char base_name[]= "tables_flushed"; + ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2; + char *real_name= (char*) my_malloc(key_memory_WSREP, full_len, 0); + sprintf(real_name, "%s/%s", mysql_real_data_home, base_name); + char *tmp_name= (char*) my_malloc(key_memory_WSREP, full_len + 4, 0); + sprintf(tmp_name, "%s.tmp", real_name); + + FILE* file= fopen(tmp_name, "w+"); + if (0 == file) + { + err= errno; + WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err)); + } + else + { + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + std::ostringstream uuid_oss; + + uuid_oss << server_state.current_view().state_id().id(); + + fprintf(file, "%s:%lld %u\n", + uuid_oss.str().c_str(), server_state.pause_seqno().get(), + wsrep_gtid_server.domain_id); + fsync(fileno(file)); + fclose(file); + if (rename(tmp_name, real_name) == -1) + { + err= errno; + WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", + tmp_name, real_name, err,strerror(err)); + } + } + my_free(real_name); + my_free(tmp_name); + if (err) + ha_disable_internal_writes(false); + } + + return err; +} + +static void* sst_donor_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*)a; + + WSREP_INFO("Running: '%s'", arg->cmd); + + int err= 1; + bool locked= false; + + const char* out= NULL; + const size_t out_len= 128; + char out_buf[out_len]; + + wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED; + // seqno of complete SST + wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; + + // We turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + // We also set this SST thread THD as system thread + wsp::thd thd(FALSE, true); + wsp::process proc(arg->cmd, "r", arg->env); + + err= -proc.error(); + +/* Inform server about SST script startup and release TO isolation */ + mysql_mutex_lock (&arg->lock); + arg->err= -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (proc.pipe() && !err) + { + long long total= 0; + long long complete= 0; + // total form previous stages + long long total_prev= 0; + +wait_signal: + out= my_fgets (out_buf, out_len, proc.pipe()); + + if (out) + { + static const char magic_flush[]= "flush tables"; + static const char magic_cont[]= "continue"; + static const char magic_done[]= "done"; + static const size_t done_len=strlen(magic_done); + static const char magic_total[]= "total"; + static const size_t total_len=strlen(magic_total); + static const char magic_complete[]= "complete"; + static const size_t complete_len=strlen(magic_complete); + static const int from= WSREP_MEMBER_DONOR; + + if (!strncasecmp (out, magic_complete, complete_len)) + { + sst_handle_complete(out + complete_len, total_prev, &total, &complete, + from); + goto wait_signal; + } + else if (!strncasecmp (out, magic_total, total_len)) + { + sst_handle_total(out + total_len, &total_prev, &total, &complete, from); + goto wait_signal; + } + else if (!strcasecmp (out, magic_flush)) + { + err= sst_flush_tables (thd.ptr); + + if (!err) + { + locked= true; + /* + Lets also keep statements that modify binary logs (like RESET LOGS, + RESET MASTER) from proceeding until the files have been transferred + to the joiner node. + */ + if (mysql_bin_log.is_open()) + mysql_mutex_lock(mysql_bin_log.get_log_lock()); + + WSREP_INFO("Donor state reached"); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_donor_state", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_donor_state_reached " + "WAIT_FOR signal.wsrep_donor_state"; + assert(!debug_sync_set_action(thd.ptr, + STRING_WITH_LEN(act))); + };); +#endif + + goto wait_signal; + } + } + else if (!strcasecmp (out, magic_cont)) + { + if (locked) + { + locked= false; + ha_disable_internal_writes(false); + if (mysql_bin_log.is_open()) + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); + } + err= 0; + goto wait_signal; + } + else if (!strncasecmp (out, magic_done, done_len)) + { + err= sst_scan_uuid_seqno (out + strlen(magic_done) + 1, + &ret_uuid, &ret_seqno); + } + else + { + WSREP_WARN("Received unknown signal: '%s'", out); + err = -EINVAL; + proc.wait(); + } + } + else + { + WSREP_ERROR("Failed to read from: %s", proc.cmd()); + proc.wait(); + } + if (!err && proc.error()) err= -proc.error(); + } + else + { + WSREP_ERROR("Failed to execute: %s : %d (%s)", + proc.cmd(), err, strerror(err)); + } + + if (locked) // don't forget to unlock server before return + { + ha_disable_internal_writes(false); + if (mysql_bin_log.is_open()) + { + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + } + thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); + } + + wsrep::gtid gtid(wsrep::id(ret_uuid.data, sizeof(ret_uuid.data)), + wsrep::seqno(err ? wsrep::seqno::undefined() : + wsrep::seqno(ret_seqno))); + + Wsrep_server_state::instance().sst_sent(gtid, err); + + proc.wait(); + + wsrep_donor_monitor_end(); + return nullptr; +} + +static int sst_donate_other (const char* method, + const char* addr, + const wsrep::gtid& gtid, + bool bypass, + char** env) // carries auth info +{ + bool extra_args; + size_t const cmd_len= estimate_cmd_len(&extra_args); + wsp::string cmd_str(cmd_len); + + if (!cmd_str()) + { + WSREP_ERROR("sst_donate_other(): " + "could not allocate cmd buffer of %zd bytes", cmd_len); + return -ENOMEM; + } + + char* binlog_opt_val= NULL; + char* binlog_index_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_donate_other(): generate_binlog_opt_val() failed: %d",ret); + return ret; + } + + if ((ret= generate_binlog_index_opt_val(&binlog_index_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_index_opt_val() failed %d", + ret); + if (binlog_opt_val) my_free(binlog_opt_val); + return ret; + } + + make_wsrep_defaults_file(); + + std::ostringstream uuid_oss; + uuid_oss << gtid.id(); + ret= snprintf (cmd_str(), cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE " 'donor' " + WSREP_SST_OPT_ADDR " '%s' " + WSREP_SST_OPT_LPORT " %u " + WSREP_SST_OPT_SOCKET " '%s' " + WSREP_SST_OPT_PROGRESS " %d " + WSREP_SST_OPT_DATA " '%s' " + "%s" + WSREP_SST_OPT_GTID " '%s:%lld' " + WSREP_SST_OPT_GTID_DOMAIN_ID " %d" + "%s" + "%s" + "%s", + method, addr, mysqld_port, mysqld_unix_port, + wsrep_debug ? 1 : 0, + mysql_real_data_home, + wsrep_defaults_file, + uuid_oss.str().c_str(), gtid.seqno().get(), wsrep_gtid_server.domain_id, + binlog_opt_val, binlog_index_opt_val, + bypass ? " " WSREP_SST_OPT_BYPASS : ""); + + my_free(binlog_opt_val); + my_free(binlog_index_opt_val); + + if (ret < 0 || size_t(ret) >= cmd_len) + { + WSREP_ERROR("sst_donate_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + if (extra_args) + copy_orig_argv(cmd_str() + ret); + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(FALSE); + + pthread_t tmp; + sst_thread_arg arg(cmd_str(), env); + + mysql_mutex_lock (&arg.lock); + + ret= mysql_thread_create (key_wsrep_sst_donor, &tmp, NULL, sst_donor_thread, &arg); + + if (ret) + { + WSREP_ERROR("sst_donate_other(): mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + return ret; + } + + mysql_cond_wait (&arg.cond, &arg.lock); + + WSREP_INFO("sst_donor_thread signaled with %d", arg.err); + return arg.err; +} + +int wsrep_sst_donate(const std::string& msg, + const wsrep::gtid& current_gtid, + const bool bypass) +{ + const char* method= msg.data(); + size_t method_len= strlen (method); + + if (check_request_str(method, filename_char, true)) + { + WSREP_ERROR("Bad SST method name. SST canceled."); + return WSREP_CB_FAILURE; + } + + const char* data= method + method_len + 1; + + /* check for auth@addr separator */ + const char* addr= strrchr(data, '@'); + wsp::string remote_auth; + if (addr) + { + remote_auth.set(strndup(data, addr - data)); + addr++; + } + else + { + // no auth part + addr= data; + } + + if (check_request_str(addr, address_char, true)) + { + WSREP_ERROR("Bad SST address string. SST canceled."); + return WSREP_CB_FAILURE; + } + + wsp::env env(NULL); + if (env.error()) + { + WSREP_ERROR("wsrep_sst_donate_cb(): env var ctor failed: %d", -env.error()); + return WSREP_CB_FAILURE; + } + + int ret; + if ((ret= sst_append_env_var(env, WSREP_SST_AUTH_ENV, sst_auth_real))) + { + WSREP_ERROR("wsrep_sst_donate_cb(): appending auth env failed: %d", ret); + return WSREP_CB_FAILURE; + } + + if (remote_auth()) + { + if ((ret= sst_append_env_var(env, WSREP_SST_REMOTE_AUTH_ENV,remote_auth()))) + { + WSREP_ERROR("wsrep_sst_donate_cb(): appending remote auth env failed: " + "%d", ret); + return WSREP_CB_FAILURE; + } + } + + if (data_home_dir) + { + if ((ret= sst_append_env_var(env, DATA_HOME_DIR_ENV, data_home_dir))) + { + WSREP_ERROR("wsrep_sst_donate_cb(): appending data " + "directory failed: %d", ret); + return WSREP_CB_FAILURE; + } + } + + sst_donor_completed= false; + pthread_t monitor; + + ret= mysql_thread_create (key_wsrep_sst_donor_monitor, &monitor, NULL, wsrep_sst_donor_monitor_thread, NULL); + + if (ret) + { + WSREP_ERROR("sst_donate: mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + return WSREP_CB_FAILURE; + } + + if (!strcmp (WSREP_SST_MYSQLDUMP, method)) + { + ret= sst_donate_mysqldump(addr, current_gtid, bypass, env()); + } + else + { + ret= sst_donate_other(method, addr, current_gtid, bypass, env()); + } + + return (ret >= 0 ? 0 : 1); +} diff --git a/sql/wsrep_sst.h b/sql/wsrep_sst.h new file mode 100644 index 00000000..462db7a1 --- /dev/null +++ b/sql/wsrep_sst.h @@ -0,0 +1,107 @@ +/* Copyright (C) 2013-2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#ifndef WSREP_SST_H +#define WSREP_SST_H + +#include + +#include "wsrep/gtid.hpp" +#include +#include + +#define WSREP_SST_OPT_ROLE "--role" +#define WSREP_SST_OPT_ADDR "--address" +#define WSREP_SST_OPT_AUTH "--auth" +#define WSREP_SST_OPT_DATA "--datadir" +#define WSREP_SST_OPT_CONF "--defaults-file" +#define WSREP_SST_OPT_CONF_SUFFIX "--defaults-group-suffix" +#define WSREP_SST_OPT_CONF_EXTRA "--defaults-extra-file" +#define WSREP_SST_OPT_PARENT "--parent" +#define WSREP_SST_OPT_BINLOG "--binlog" +#define WSREP_SST_OPT_BINLOG_INDEX "--binlog-index" +#define WSREP_SST_OPT_PROGRESS "--progress" +#define WSREP_SST_OPT_MYSQLD "--mysqld-args" + +// mysqldump-specific options +#define WSREP_SST_OPT_USER "--user" +#define WSREP_SST_OPT_PSWD "--password" +#define WSREP_SST_OPT_HOST "--host" +#define WSREP_SST_OPT_PORT "--port" +#define WSREP_SST_OPT_LPORT "--local-port" + +// donor-specific +#define WSREP_SST_OPT_SOCKET "--socket" +#define WSREP_SST_OPT_GTID "--gtid" +#define WSREP_SST_OPT_BYPASS "--bypass" +#define WSREP_SST_OPT_GTID_DOMAIN_ID "--gtid-domain-id" + +#define WSREP_SST_MYSQLDUMP "mysqldump" +#define WSREP_SST_RSYNC "rsync" +#define WSREP_SST_SKIP "skip" +#define WSREP_SST_MARIABACKUP "mariabackup" +#define WSREP_SST_XTRABACKUP "xtrabackup" +#define WSREP_SST_XTRABACKUPV2 "xtrabackupv2" +#define WSREP_SST_DEFAULT WSREP_SST_RSYNC +#define WSREP_SST_ADDRESS_AUTO "AUTO" +#define WSREP_SST_AUTH_MASK "********" + +/* system variables */ +extern const char* wsrep_sst_method; +extern const char* wsrep_sst_receive_address; +extern const char* wsrep_sst_donor; +extern const char* wsrep_sst_auth; +extern my_bool wsrep_sst_donor_rejects_queries; + +/*! Synchronizes applier thread start with init thread */ +extern void wsrep_sst_grab(); +/*! Init thread waits for SST completion */ +extern bool wsrep_sst_wait(); +/*! Signals wsrep that initialization is complete, writesets can be applied */ +extern bool wsrep_sst_continue(); +extern void wsrep_sst_auth_init(); +extern void wsrep_sst_auth_free(); + +extern void wsrep_SE_init_grab(); /*! grab init critical section */ +extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */ +extern void wsrep_SE_init_done(); /*! signal that SE init is complte */ +extern void wsrep_SE_initialized(); /*! mark SE initialization complete */ + +/** + Return a string containing the state transfer request string. + Note that the string may contain a '\0' in the middle. +*/ +std::string wsrep_sst_prepare(); + +/** + Donate a SST. + + @param request SST request string received from the joiner. Note that + the string may contain a '\0' in the middle. + @param gtid Current position of the donor + @param bypass If true, full SST is not needed. Joiner needs to be + notified that it can continue starting from gtid. + */ +int wsrep_sst_donate(const std::string& request, + const wsrep::gtid& gtid, + bool bypass); + +#else +#define wsrep_SE_initialized() do { } while(0) +#define wsrep_SE_init_grab() do { } while(0) +#define wsrep_SE_init_done() do { } while(0) +#define wsrep_sst_continue() (0) + +#endif /* WSREP_SST_H */ diff --git a/sql/wsrep_status.cc b/sql/wsrep_status.cc new file mode 100644 index 00000000..eacd7080 --- /dev/null +++ b/sql/wsrep_status.cc @@ -0,0 +1,60 @@ +/* Copyright 2021 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "wsrep_status.h" + +mysql_mutex_t LOCK_wsrep_status; + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key key_LOCK_wsrep_status; +#endif + +Wsrep_mutex* Wsrep_status::m_mutex = 0; +wsrep::reporter* Wsrep_status::m_instance = 0; + +void Wsrep_status::report_log_msg(wsrep::reporter::log_level const level, + const char* const tag, size_t const tag_len, + const char* const buf, size_t const buf_len, + double const tstamp) +{ + if (!Wsrep_status::m_instance) return; + + Wsrep_status::m_instance->report_log_msg(level, + std::string(tag, tag_len) + std::string(buf, buf_len), + tstamp); +} + +void Wsrep_status::init_once(const std::string& file_name) +{ + if (file_name.length() > 0 && m_instance == 0) + { + mysql_mutex_init(key_LOCK_wsrep_status, &LOCK_wsrep_status, + MY_MUTEX_INIT_FAST); + m_mutex = new Wsrep_mutex(&LOCK_wsrep_status); + m_instance = new wsrep::reporter(*m_mutex, file_name, 4); + } +} + +void Wsrep_status::destroy() +{ + if (m_instance) + { + delete m_instance; + m_instance= 0; + delete m_mutex; + m_mutex= 0; + mysql_mutex_destroy(&LOCK_wsrep_status); + } +} diff --git a/sql/wsrep_status.h b/sql/wsrep_status.h new file mode 100644 index 00000000..dd83dda2 --- /dev/null +++ b/sql/wsrep_status.h @@ -0,0 +1,62 @@ +/* Copyright 2021 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_STATUS_H +#define WSREP_STATUS_H + +/* wsrep-lib */ +#include "wsrep/reporter.hpp" + +/* implementation */ +#include "wsrep_mutex.h" + +class Wsrep_status +{ +public: + static void init_once(const std::string& file_name); + static void destroy(); + + static void report_state(enum wsrep::server_state::state const state) + { + if (!Wsrep_status::m_instance) return; + + Wsrep_status::m_instance->report_state(state); + } + + static void report_progress(const std::string& progress) + { + if (!Wsrep_status::m_instance) return; + + Wsrep_status::m_instance->report_progress(progress); + } + + static void report_log_msg(wsrep::reporter::log_level level, + const char* tag, size_t tag_len, + const char* buf, size_t buf_len, + double const tstamp = wsrep::reporter::undefined); + + static bool is_instance_initialized() + { + return m_instance; + } + +private: + Wsrep_status(const std::string& file_name); + + static Wsrep_mutex* m_mutex; + static wsrep::reporter* m_instance; +}; + +#endif /* WSREP_STATUS_H */ diff --git a/sql/wsrep_storage_service.cc b/sql/wsrep_storage_service.cc new file mode 100644 index 00000000..4885fd9f --- /dev/null +++ b/sql/wsrep_storage_service.cc @@ -0,0 +1,206 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_global.h" +#include "wsrep_storage_service.h" +#include "wsrep_trans_observer.h" /* wsrep_open() */ +#include "wsrep_schema.h" +#include "wsrep_binlog.h" + +#include "sql_class.h" +#include "mysqld.h" /* next_query_id() */ +#include "slave.h" /* opt_log_slave_updates() */ +#include "transaction.h" /* trans_commit(), trans_rollback() */ + +/* + Temporarily enable wsrep on thd + */ +class Wsrep_on +{ +public: + Wsrep_on(THD* thd) + : m_thd(thd) + , m_wsrep_on(thd->variables.wsrep_on) + { + thd->variables.wsrep_on= TRUE; + } + ~Wsrep_on() + { + m_thd->variables.wsrep_on= m_wsrep_on; + } +private: + THD* m_thd; + my_bool m_wsrep_on; +}; + +Wsrep_storage_service::Wsrep_storage_service(THD* thd) + : wsrep::storage_service() + , wsrep::high_priority_context(thd->wsrep_cs()) + , m_thd(thd) +{ + thd->security_ctx->skip_grants(); + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + + /* No binlogging */ + + /* No general log */ + thd->variables.option_bits |= OPTION_LOG_OFF; + + /* Read committed isolation to avoid gap locking */ + thd->variables.tx_isolation = ISO_READ_COMMITTED; + + /* Keep wsrep on to enter commit ordering hooks */ + thd->variables.wsrep_on= 1; + thd->wsrep_skip_locking= true; + + wsrep_open(thd); + wsrep_before_command(thd); +} + +Wsrep_storage_service::~Wsrep_storage_service() +{ + wsrep_after_command_ignore_result(m_thd); + wsrep_close(m_thd); + m_thd->wsrep_skip_locking= false; +} + +int Wsrep_storage_service::start_transaction(const wsrep::ws_handle& ws_handle) +{ + DBUG_ENTER("Wsrep_storage_service::start_transaction"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::start_transcation(%llu, %p)", + m_thd->thread_id, m_thd)); + m_thd->set_wsrep_next_trx_id(ws_handle.transaction_id().get()); + DBUG_RETURN(m_thd->wsrep_cs().start_transaction( + wsrep::transaction_id(m_thd->wsrep_next_trx_id())) || + trans_begin(m_thd, MYSQL_START_TRANS_OPT_READ_WRITE)); +} + +void Wsrep_storage_service::adopt_transaction(const wsrep::transaction& transaction) +{ + DBUG_ENTER("Wsrep_Storage_server::adopt_transaction"); + DBUG_ASSERT(m_thd == current_thd); + m_thd->wsrep_cs().adopt_transaction(transaction); + trans_begin(m_thd, MYSQL_START_TRANS_OPT_READ_WRITE); + DBUG_VOID_RETURN; +} + +int Wsrep_storage_service::append_fragment(const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + int flags, + const wsrep::const_buffer& data, + const wsrep::xid& xid WSREP_UNUSED) +{ + DBUG_ENTER("Wsrep_storage_service::append_fragment"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::append_fragment(%llu, %p)", + m_thd->thread_id, m_thd)); + int ret= wsrep_schema->append_fragment(m_thd, + server_id, + transaction_id, + wsrep::seqno(-1), + flags, + data); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::update_fragment_meta(const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_storage_service::update_fragment_meta"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::update_fragment_meta(%llu, %p)", + m_thd->thread_id, m_thd)); + int ret= wsrep_schema->update_fragment_meta(m_thd, ws_meta); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::remove_fragments() +{ + DBUG_ENTER("Wsrep_storage_service::remove_fragments"); + DBUG_ASSERT(m_thd == current_thd); + + int ret= wsrep_schema->remove_fragments(m_thd, + m_thd->wsrep_trx().server_id(), + m_thd->wsrep_trx().id(), + m_thd->wsrep_sr().fragments()); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::commit(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_storage_service::commit"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::commit(%llu, %p)", + m_thd->thread_id, m_thd)); + WSREP_DEBUG("Storage service commit: %llu, %lld", + ws_meta.transaction_id().get(), ws_meta.seqno().get()); + int ret= 0; + const bool is_ordered= !ws_meta.seqno().is_undefined(); + + if (is_ordered) + { + ret= m_thd->wsrep_cs().prepare_for_ordering(ws_handle, ws_meta, true); + } + + ret= ret || trans_commit(m_thd); + + if (!is_ordered) + { + /* Wsrep commit was not ordered so it does not go through commit time + hooks and remains active. Roll it back to make cleanup happen + in after_applying() call. */ + m_thd->wsrep_cs().before_rollback(); + m_thd->wsrep_cs().after_rollback(); + } + else if (ret) + { + /* Commit failed, this probably means that the parent SR transaction + was BF aborted. Roll back out of order, the parent + transaction will release commit order after it has rolled back. */ + m_thd->wsrep_cs().prepare_for_ordering(wsrep::ws_handle(), + wsrep::ws_meta(), + false); + trans_rollback(m_thd); + } + m_thd->wsrep_cs().after_applying(); + m_thd->release_transactional_locks(); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::rollback(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_storage_service::rollback"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::rollback(%llu, %p)", + m_thd->thread_id, m_thd)); + int ret= (m_thd->wsrep_cs().prepare_for_ordering( + ws_handle, ws_meta, false) || + trans_rollback(m_thd)); + m_thd->wsrep_cs().after_applying(); + m_thd->release_transactional_locks(); + DBUG_RETURN(ret); +} + +void Wsrep_storage_service::store_globals() +{ + wsrep_store_threadvars(m_thd); +} + +void Wsrep_storage_service::reset_globals() +{ + wsrep_reset_threadvars(m_thd); +} diff --git a/sql/wsrep_storage_service.h b/sql/wsrep_storage_service.h new file mode 100644 index 00000000..f39543a8 --- /dev/null +++ b/sql/wsrep_storage_service.h @@ -0,0 +1,49 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_STORAGE_SERVICE_H +#define WSREP_STORAGE_SERVICE_H + +#include "wsrep/storage_service.hpp" +#include "wsrep/client_state.hpp" + +class THD; +class Wsrep_server_service; +class Wsrep_storage_service : + public wsrep::storage_service, + public wsrep::high_priority_context +{ +public: + Wsrep_storage_service(THD*); + ~Wsrep_storage_service(); + int start_transaction(const wsrep::ws_handle&); + void adopt_transaction(const wsrep::transaction&); + int append_fragment(const wsrep::id&, + wsrep::transaction_id, + int flags, + const wsrep::const_buffer&, + const wsrep::xid&); + int update_fragment_meta(const wsrep::ws_meta&); + int remove_fragments(); + int commit(const wsrep::ws_handle&, const wsrep::ws_meta&); + int rollback(const wsrep::ws_handle&, const wsrep::ws_meta&); + void store_globals(); + void reset_globals(); +private: + friend class Wsrep_server_service; + THD* m_thd; +}; + +#endif /* WSREP_STORAGE_SERVICE_H */ diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc new file mode 100644 index 00000000..682e6485 --- /dev/null +++ b/sql/wsrep_thd.cc @@ -0,0 +1,586 @@ +/* Copyright (C) 2013-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#include "mariadb.h" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#include "wsrep_high_priority_service.h" +#include "wsrep_storage_service.h" +#include "wsrep_server_state.h" +#include "transaction.h" +#include "rpl_rli.h" +#include "log_event.h" +#include "sql_parse.h" +#include "wsrep_mysqld.h" // start_wsrep_THD(); +#include "mysql/service_wsrep.h" +#include "debug_sync.h" +#include "slave.h" +#include "rpl_rli.h" +#include "rpl_mi.h" + +extern "C" pthread_key(struct st_my_thread_var*, THR_KEY_mysys); + +static Wsrep_thd_queue* wsrep_rollback_queue= 0; +static Atomic_counter wsrep_bf_aborts_counter; + + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope) +{ + wsrep_local_bf_aborts= wsrep_bf_aborts_counter; + var->type= SHOW_LONGLONG; + var->value= (char*)&wsrep_local_bf_aborts; + return 0; +} + +static void wsrep_replication_process(THD *thd, + void* arg __attribute__((unused))) +{ + DBUG_ENTER("wsrep_replication_process"); + + Wsrep_applier_service applier_service(thd); + + WSREP_INFO("Starting applier thread %llu", thd->thread_id); + enum wsrep::provider::status + ret= Wsrep_server_state::get_provider().run_applier(&applier_service); + + WSREP_INFO("Applier thread exiting ret: %d thd: %llu", ret, thd->thread_id); + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_wsrep_slave_threads); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + delete thd->wsrep_rgi->rli->mi; + delete thd->wsrep_rgi->rli; + + thd->wsrep_rgi->cleanup_after_session(); + delete thd->wsrep_rgi; + thd->wsrep_rgi= NULL; + + + if(thd->has_thd_temporary_tables()) + { + WSREP_WARN("Applier %lld has temporary tables at exit.", + thd->thread_id); + } + DBUG_VOID_RETURN; +} + +static bool create_wsrep_THD(Wsrep_thd_args* args, bool mutex_protected) +{ + if (!mutex_protected) + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + + ulong old_wsrep_running_threads= wsrep_running_threads; + + DBUG_ASSERT(args->thread_type() == WSREP_APPLIER_THREAD || + args->thread_type() == WSREP_ROLLBACKER_THREAD); + + bool res= mysql_thread_create(args->thread_type() == WSREP_APPLIER_THREAD + ? key_wsrep_applier : key_wsrep_rollbacker, + args->thread_id(), &connection_attrib, + start_wsrep_THD, (void*)args); + + if (res) + WSREP_ERROR("Can't create wsrep thread"); + + /* + if starting a thread on server startup, wait until the this thread's THD + is fully initialized (otherwise a THD initialization code might + try to access a partially initialized server data structure - MDEV-8208). + */ + if (!mysqld_server_initialized) + { + while (old_wsrep_running_threads == wsrep_running_threads) + { + mysql_cond_wait(&COND_wsrep_slave_threads, &LOCK_wsrep_slave_threads); + } + } + + if (!mutex_protected) + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + return res; +} + +bool wsrep_create_appliers(long threads, bool mutex_protected) +{ + /* Dont' start slave threads if wsrep-provider or wsrep-cluster-address + is not set. + */ + if (!WSREP_PROVIDER_EXISTS) + { + return false; + } + + DBUG_ASSERT(wsrep_cluster_address[0]); + + long wsrep_threads=0; + + while (wsrep_threads++ < threads) + { + Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_replication_process, + WSREP_APPLIER_THREAD, + pthread_self())); + if (create_wsrep_THD(args, mutex_protected)) + { + WSREP_WARN("Can't create thread to manage wsrep replication"); + return true; + } + } + + return false; +} + +static void wsrep_remove_streaming_fragments(THD* thd, const char* ctx) +{ + wsrep::transaction_id transaction_id(thd->wsrep_trx().id()); + Wsrep_storage_service* storage_service= wsrep_create_storage_service(thd, ctx); + storage_service->store_globals(); + storage_service->adopt_transaction(thd->wsrep_trx()); + storage_service->remove_fragments(); + storage_service->commit(wsrep::ws_handle(transaction_id, 0), + wsrep::ws_meta()); + Wsrep_server_state::instance().server_service() + .release_storage_service(storage_service); + wsrep_store_threadvars(thd); +} + +static void wsrep_rollback_high_priority(THD *thd, THD *rollbacker) +{ + WSREP_DEBUG("Rollbacker aborting SR applier thd (%llu %lu)", + thd->thread_id, thd->real_id); + char* orig_thread_stack= thd->thread_stack; + thd->thread_stack= rollbacker->thread_stack; + DBUG_ASSERT(thd->wsrep_cs().mode() == Wsrep_client_state::m_high_priority); + /* Must be streaming and must have been removed from the + server state streaming appliers map. */ + DBUG_ASSERT(thd->wsrep_trx().is_streaming()); + DBUG_ASSERT(!Wsrep_server_state::instance().find_streaming_applier( + thd->wsrep_trx().server_id(), + thd->wsrep_trx().id())); + DBUG_ASSERT(thd->wsrep_applier_service); + + /* Fragment removal should happen before rollback to make + the transaction non-observable in SR table after the rollback + completes. For correctness the order does not matter here, + but currently it is mandated by checks in some MTR tests. */ + wsrep_remove_streaming_fragments(thd, "high priority"); + thd->wsrep_applier_service->rollback(wsrep::ws_handle(), + wsrep::ws_meta()); + thd->wsrep_applier_service->after_apply(); + thd->thread_stack= orig_thread_stack; + WSREP_DEBUG("rollbacker aborted thd: (%llu %lu)", + thd->thread_id, thd->real_id); + /* Will free THD */ + Wsrep_server_state::instance().server_service() + .release_high_priority_service(thd->wsrep_applier_service); +} + +static void wsrep_rollback_local(THD *thd, THD *rollbacker) +{ + WSREP_DEBUG("Rollbacker aborting local thd (%llu %lu)", + thd->thread_id, thd->real_id); + char* orig_thread_stack= thd->thread_stack; + thd->thread_stack= rollbacker->thread_stack; + if (thd->wsrep_trx().is_streaming()) + { + wsrep_remove_streaming_fragments(thd, "local"); + } + /* Set thd->event_scheduler.data temporarily to NULL to avoid + callbacks to threadpool wait_begin() during rollback. */ + auto saved_esd= thd->event_scheduler.data; + thd->event_scheduler.data= 0; + mysql_mutex_lock(&thd->LOCK_thd_data); + /* prepare THD for rollback processing */ + thd->reset_for_next_command(); + thd->lex->sql_command= SQLCOM_ROLLBACK; + mysql_mutex_unlock(&thd->LOCK_thd_data); + /* Perform a client rollback, restore globals and signal + the victim only when all the resources have been + released */ + thd->wsrep_cs().client_service().bf_rollback(); + wsrep_reset_threadvars(thd); + /* Assign saved event_scheduler.data back before letting + client to continue. */ + thd->event_scheduler.data= saved_esd; + thd->thread_stack= orig_thread_stack; + thd->wsrep_cs().sync_rollback_complete(); + WSREP_DEBUG("rollbacker aborted thd: (%llu %lu)", + thd->thread_id, thd->real_id); +} + +static void wsrep_rollback_process(THD *rollbacker, + void *arg __attribute__((unused))) +{ + DBUG_ENTER("wsrep_rollback_process"); + + THD* thd= NULL; + DBUG_ASSERT(!wsrep_rollback_queue); + wsrep_rollback_queue= new Wsrep_thd_queue(rollbacker); + WSREP_INFO("Starting rollbacker thread %llu", rollbacker->thread_id); + + thd_proc_info(rollbacker, "wsrep aborter idle"); + while ((thd= wsrep_rollback_queue->pop_front()) != NULL) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + wsrep::client_state& cs(thd->wsrep_cs()); + const wsrep::transaction& tx(cs.transaction()); + if (tx.state() == wsrep::transaction::s_aborted) + { + WSREP_DEBUG("rollbacker thd already aborted: %llu state: %d", + (long long)thd->real_id, + tx.state()); + mysql_mutex_unlock(&thd->LOCK_thd_data); + continue; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + + wsrep_reset_threadvars(rollbacker); + wsrep_store_threadvars(thd); + thd->wsrep_cs().acquire_ownership(); + + thd_proc_info(rollbacker, "wsrep aborter active"); + + /* Rollback methods below may free thd pointer. Do not try + to access it after method returns. */ + if (wsrep_thd_is_applying(thd)) + { + wsrep_rollback_high_priority(thd, rollbacker); + } + else + { + wsrep_rollback_local(thd, rollbacker); + } + wsrep_store_threadvars(rollbacker); + thd_proc_info(rollbacker, "wsrep aborter idle"); + } + + delete wsrep_rollback_queue; + wsrep_rollback_queue= NULL; + + WSREP_INFO("rollbacker thread exiting %llu", rollbacker->thread_id); + + DBUG_ASSERT(rollbacker->killed != NOT_KILLED); + DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting")); + DBUG_VOID_RETURN; +} + +void wsrep_create_rollbacker() +{ + DBUG_ASSERT(wsrep_cluster_address[0]); + Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process, + WSREP_ROLLBACKER_THREAD, + pthread_self())); + + /* create rollbacker */ + if (create_wsrep_THD(args, false)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); +} + +/* + Start async rollback process + + Asserts thd->LOCK_thd_data ownership + */ +void wsrep_fire_rollbacker(THD *thd) +{ + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_aborting); + DBUG_PRINT("wsrep",("enqueuing trx abort for %llu", thd->thread_id)); + WSREP_DEBUG("enqueuing trx abort for (%llu)", thd->thread_id); + if (wsrep_rollback_queue->push_back(thd)) + { + WSREP_WARN("duplicate thd %llu for rollbacker", + thd->thread_id); + } +} + +static bool wsrep_bf_abort_low(THD *bf_thd, THD *victim_thd) +{ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_bf_abort", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_bf_abort_reached " + "WAIT_FOR signal.wsrep_bf_abort"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, + STRING_WITH_LEN(act))); + };); +#endif + + wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno()); + bool ret; + + { + /* Adopt the lock, it is being held by the caller. */ + Wsrep_mutex wsm{&victim_thd->LOCK_thd_data}; + wsrep::unique_lock lock{wsm, std::adopt_lock}; + + if (wsrep_thd_is_toi(bf_thd)) + { + ret= victim_thd->wsrep_cs().total_order_bf_abort(lock, bf_seqno); + } + else + { + DBUG_ASSERT(WSREP(victim_thd) ? victim_thd->wsrep_trx().active() : 1); + ret= victim_thd->wsrep_cs().bf_abort(lock, bf_seqno); + } + if (ret) + { + /* BF abort should be allowed only once by wsrep-lib.*/ + DBUG_ASSERT(victim_thd->wsrep_aborter == 0); + victim_thd->wsrep_aborter= bf_thd->thread_id; + wsrep_bf_aborts_counter++; + } + lock.release(); /* No unlock at the end of the scope. */ + } + + /* Sanity check for wsrep-lib calls to return with LOCK_thd_data held. */ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + + return ret; +} + +void wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) +{ + DBUG_ENTER("wsrep_abort_thd"); + + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + + /* Note that when you use RSU node is desynced from cluster, thus WSREP(thd) + might not be true. + */ + if ((WSREP(bf_thd) + || ((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) + && wsrep_thd_is_toi(bf_thd)) + || bf_thd->lex->sql_command == SQLCOM_KILL) + && !wsrep_thd_is_aborting(victim_thd) && + wsrep_bf_abort_low(bf_thd, victim_thd) && + !victim_thd->wsrep_cs().is_rollbacker_active()) + { + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", + (long long)bf_thd->real_id, (long long)victim_thd->real_id); + victim_thd->awake_no_mutex(KILL_QUERY_HARD); + ha_abort_transaction(bf_thd, victim_thd, signal); + } + else + { + WSREP_DEBUG("wsrep_abort_thd not effective: bf %llu victim %llu " + "wsrep %d wsrep_on %d RSU %d TOI %d aborting %d", + (long long)bf_thd->real_id, (long long)victim_thd->real_id, + WSREP_NNULL(bf_thd), WSREP_ON, + bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU, + wsrep_thd_is_toi(bf_thd), + wsrep_thd_is_aborting(victim_thd)); + } + + DBUG_VOID_RETURN; +} + +bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) +{ + WSREP_LOG_THD(bf_thd, "BF aborter before"); + WSREP_LOG_THD(victim_thd, "victim before"); + + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + + if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) + { + WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction." + " Victim state %s bf state %s", + wsrep::to_c_string(victim_thd->wsrep_trx().state()), + wsrep::to_c_string(bf_thd->wsrep_trx().state())); + + switch (victim_thd->wsrep_trx().state()) { + case wsrep::transaction::s_aborting: /* fall through */ + case wsrep::transaction::s_aborted: + WSREP_DEBUG("victim is aborting or has aborted"); + break; + default: break; + } + /* victim may not have started transaction yet in wsrep context, but it may + have acquired MDL locks (due to DDL execution), and this has caused BF conflict. + such case does not require aborting in wsrep or replication provider state. + */ + if (victim_thd->current_backup_stage != BACKUP_FINISHED && + wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP)) + { + WSREP_DEBUG("killing connection for non wsrep session"); + victim_thd->awake_no_mutex(KILL_CONNECTION); + } + return false; + } + + return wsrep_bf_abort_low(bf_thd, victim_thd); +} + +uint wsrep_kill_thd(THD *thd, THD *victim_thd, killed_state kill_signal) +{ + DBUG_ENTER("wsrep_kill_thd"); + DBUG_ASSERT(WSREP(victim_thd)); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + using trans= wsrep::transaction; + auto trx_state= victim_thd->wsrep_trx().state(); +#ifndef DBUG_OFF + victim_thd->wsrep_killed_state= trx_state; +#endif /* DBUG_OFF */ + /* + Already killed or in commit codepath. Mark the victim as killed, + the killed status will be restored in wsrep_after_commit() and + will be processed after the commit is over. In case of multiple + KILLs happened on commit codepath, the last one will be effective. + */ + if (victim_thd->wsrep_abort_by_kill || + trx_state == trans::s_preparing || + trx_state == trans::s_committing || + trx_state == trans::s_ordered_commit) + { + victim_thd->wsrep_abort_by_kill= kill_signal; + DBUG_RETURN(0); + } + /* + Mark killed victim_thd with kill_signal so that awake_no_mutex does + not dive into storage engine. We use ha_abort_transaction() + to do the storage engine part for wsrep THDs. + */ + DEBUG_SYNC(thd, "wsrep_kill_before_awake_no_mutex"); + victim_thd->wsrep_abort_by_kill= kill_signal; + victim_thd->awake_no_mutex(kill_signal); + /* ha_abort_transaction() releases tmp->LOCK_thd_kill, so tmp + is not safe to access anymore. */ + ha_abort_transaction(thd, victim_thd, 1); + DBUG_RETURN(0); +} + +void wsrep_backup_kill_for_commit(THD *thd) +{ + DBUG_ASSERT(WSREP(thd)); + mysql_mutex_assert_owner(&thd->LOCK_thd_kill); + DBUG_ASSERT(thd->killed != NOT_KILLED); + mysql_mutex_lock(&thd->LOCK_thd_data); + /* If the transaction will roll back, keep the killed state. + For must replay, the replay will happen in different THD context + which is high priority and cannot be killed. The owning thread will + pick the killed state in after statement processing. */ + if (thd->wsrep_trx().state() != wsrep::transaction::s_cert_failed && + thd->wsrep_trx().state() != wsrep::transaction::s_must_abort && + thd->wsrep_trx().state() != wsrep::transaction::s_aborting && + thd->wsrep_trx().state() != wsrep::transaction::s_must_replay) + { + thd->wsrep_abort_by_kill= thd->killed; + thd->wsrep_abort_by_kill_err= thd->killed_err; + thd->killed= NOT_KILLED; + thd->killed_err= 0; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); +} + +void wsrep_restore_kill_after_commit(THD *thd) +{ + DBUG_ASSERT(WSREP(thd)); + mysql_mutex_assert_owner(&thd->LOCK_thd_kill); + thd->killed= thd->wsrep_abort_by_kill; + thd->killed_err= thd->wsrep_abort_by_kill_err; + thd->wsrep_abort_by_kill= NOT_KILLED; + thd->wsrep_abort_by_kill_err= 0; +} + +int wsrep_create_threadvars() +{ + int ret= 0; + if (thread_handling == SCHEDULER_TYPES_COUNT) + { + /* Caller should have called wsrep_reset_threadvars() before this + method. */ + DBUG_ASSERT(!pthread_getspecific(THR_KEY_mysys)); + pthread_setspecific(THR_KEY_mysys, 0); + ret= my_thread_init(); + } + return ret; +} + +void wsrep_delete_threadvars() +{ + if (thread_handling == SCHEDULER_TYPES_COUNT) + { + /* The caller should have called wsrep_store_threadvars() before + this method. */ + DBUG_ASSERT(pthread_getspecific(THR_KEY_mysys)); + /* Reset psi state to avoid deallocating applier thread + psi_thread. */ +#ifdef HAVE_PSI_INTERFACE + PSI_thread *psi_thread= PSI_CALL_get_thread(); + if (PSI_server) + { + PSI_server->set_thread(0); + } +#endif /* HAVE_PSI_INTERFACE */ + my_thread_end(); + PSI_CALL_set_thread(psi_thread); + pthread_setspecific(THR_KEY_mysys, 0); + } +} + +void wsrep_assign_from_threadvars(THD *thd) +{ + if (thread_handling == SCHEDULER_TYPES_COUNT) + { + st_my_thread_var *mysys_var= (st_my_thread_var *)pthread_getspecific(THR_KEY_mysys); + DBUG_ASSERT(mysys_var); + thd->set_mysys_var(mysys_var); + } +} + +Wsrep_threadvars wsrep_save_threadvars() +{ + return Wsrep_threadvars{ + current_thd, + (st_my_thread_var*) pthread_getspecific(THR_KEY_mysys) + }; +} + +void wsrep_restore_threadvars(const Wsrep_threadvars& globals) +{ + set_current_thd(globals.cur_thd); + pthread_setspecific(THR_KEY_mysys, globals.mysys_var); +} + +void wsrep_store_threadvars(THD *thd) +{ + if (thread_handling == SCHEDULER_TYPES_COUNT) + { + pthread_setspecific(THR_KEY_mysys, thd->mysys_var); + } + thd->store_globals(); +} + +void wsrep_reset_threadvars(THD *thd) +{ + if (thread_handling == SCHEDULER_TYPES_COUNT) + { + pthread_setspecific(THR_KEY_mysys, 0); + } + else + { + thd->reset_globals(); + } +} diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h new file mode 100644 index 00000000..f3790887 --- /dev/null +++ b/sql/wsrep_thd.h @@ -0,0 +1,325 @@ +/* Copyright (C) 2013-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#ifndef WSREP_THD_H +#define WSREP_THD_H + +#include + +#include "mysql/service_wsrep.h" +#include "wsrep/client_state.hpp" +#include "sql_class.h" +#include "wsrep_utils.h" +#include +class Wsrep_thd_queue +{ +public: + Wsrep_thd_queue(THD* t) : thd(t) + { + mysql_mutex_init(key_LOCK_wsrep_thd_queue, + &LOCK_wsrep_thd_queue, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_thd_queue, &COND_wsrep_thd_queue, NULL); + } + ~Wsrep_thd_queue() + { + mysql_mutex_destroy(&LOCK_wsrep_thd_queue); + mysql_cond_destroy(&COND_wsrep_thd_queue); + } + bool push_back(THD* thd) + { + DBUG_ASSERT(thd); + wsp::auto_lock lock(&LOCK_wsrep_thd_queue); + std::deque::iterator it = queue.begin(); + while (it != queue.end()) + { + if (*it == thd) + { + return true; + } + it++; + } + queue.push_back(thd); + mysql_cond_signal(&COND_wsrep_thd_queue); + return false; + } + THD* pop_front() + { + wsp::auto_lock lock(&LOCK_wsrep_thd_queue); + while (queue.empty()) + { + if (thd->killed != NOT_KILLED) + return NULL; + + thd->mysys_var->current_mutex= &LOCK_wsrep_thd_queue; + thd->mysys_var->current_cond= &COND_wsrep_thd_queue; + + mysql_cond_wait(&COND_wsrep_thd_queue, &LOCK_wsrep_thd_queue); + + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + } + THD* ret= queue.front(); + queue.pop_front(); + return ret; + } +private: + THD* thd; + std::deque queue; + mysql_mutex_t LOCK_wsrep_thd_queue; + mysql_cond_t COND_wsrep_thd_queue; +}; + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff, + enum enum_var_type scope); +bool wsrep_create_appliers(long threads, bool mutex_protected=false); +void wsrep_create_rollbacker(); + +bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd); +/* + Abort transaction for victim_thd. This function is called from + MDL BF abort codepath. +*/ +void wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) __attribute__((nonnull(1,2))); + +/** + Kill wsrep connection with kill_signal. Object thd is not + guaranteed to exist anymore when this function returns. + + Asserts that the caller holds victim_thd->LOCK_thd_kill, + victim_thd->LOCK_thd_data. + + @param thd THD object for connection that executes the KILL. + @param victim_thd THD object for connection to be killed. + @param kill_signal Kill signal. + + @return Zero if the kill was successful, otherwise non-zero error code. + */ +uint wsrep_kill_thd(THD *thd, THD *victim_thd, killed_state kill_signal); + +/* + Backup kill status for commit. + */ +void wsrep_backup_kill_for_commit(THD *); + +/* + Restore KILL status after commit. + */ +void wsrep_restore_kill_after_commit(THD *); + +/* + Helper methods to deal with thread local storage. + The purpose of these methods is to hide the details of thread + local storage handling when operating with wsrep storage access + and streaming applier THDs + + With one-thread-per-connection thread handling thread specific + variables are allocated when the thread is started and deallocated + before thread exits (my_thread_init(), my_thread_end()). However, + with pool-of-threads thread handling new thread specific variables + are allocated for each THD separately (see threadpool_add_connection()), + and the variables in thread local storage are assigned from + currently active thread (see thread_attach()). This must be taken into + account when storing/resetting thread local storage and when creating + streaming applier THDs. +*/ + +/** + Create new variables for thread local storage. With + one-thread-per-connection thread handling this is a no op, + with pool-of-threads new variables are created via my_thread_init(). + It is assumed that the caller has called wsrep_reset_threadvars() to clear + the thread local storage before this call. + + @return Zero in case of success, non-zero otherwise. +*/ +int wsrep_create_threadvars(); + +/** + Delete variables which were created by wsrep_create_threadvars(). + The caller must store variables into thread local storage before + this call via wsrep_store_threadvars(). +*/ +void wsrep_delete_threadvars(); + +/** + Assign variables from current thread local storage into THD. + This should be called for THDs whose lifetime is limited to single + thread execution or which may share the operation context with some + parent THD (e.g. storage access) and thus don't require separately + allocated globals. + + With one-thread-per-connection thread handling this is a no-op, + with pool-of-threads the variables which are currently stored into + thread local storage are assigned to THD. +*/ +void wsrep_assign_from_threadvars(THD *); + +/** + Helper struct to save variables from thread local storage. + */ +struct Wsrep_threadvars +{ + THD* cur_thd; + st_my_thread_var* mysys_var; +}; + +/** + Save variables from thread local storage into Wsrep_threadvars struct. + */ +Wsrep_threadvars wsrep_save_threadvars(); + +/** + Restore variables into thread local storage from Wsrep_threadvars struct. +*/ +void wsrep_restore_threadvars(const Wsrep_threadvars&); + +/** + Store variables into thread local storage. +*/ +void wsrep_store_threadvars(THD *); + +/** + Reset thread local storage. +*/ +void wsrep_reset_threadvars(THD *); + +/** + Helper functions to override error status + + In many contexts it is desirable to mask the original error status + set for THD or it is necessary to change OK status to error. + This function implements the common logic for the most + of the cases. + + Rules: + * If the diagnostics are has OK or EOF status, override it unconditionally + * If the error is either ER_ERROR_DURING_COMMIT or ER_LOCK_DEADLOCK + it is usually the correct error status to be returned to client, + so don't override those by default + */ + +static inline void wsrep_override_error(THD *thd, uint error, const char *format= 0, ...) +{ + Diagnostics_area *da= thd->get_stmt_da(); + if (da->is_ok() || + da->is_eof() || + !da->is_set() || + (da->is_error() && + da->sql_errno() != error && + da->sql_errno() != ER_ERROR_DURING_COMMIT && + da->sql_errno() != ER_LOCK_DEADLOCK)) + { + da->reset_diagnostics_area(); + va_list args; + va_start(args, format); + if (!format) format= ER_THD(thd, error); + my_printv_error(error, format, MYF(0), args); + va_end(args); + } +} + +static inline void wsrep_override_error(THD* thd, + wsrep::client_error ce, + enum wsrep::provider::status status) +{ + DBUG_ASSERT(ce != wsrep::e_success); + switch (ce) + { + case wsrep::e_error_during_commit: + if (status == wsrep::provider::error_size_exceeded) + wsrep_override_error(thd, ER_UNKNOWN_ERROR, "Maximum writeset size exceeded"); + else + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, 0, status); + break; + case wsrep::e_deadlock_error: + wsrep_override_error(thd, ER_LOCK_DEADLOCK); + break; + case wsrep::e_interrupted_error: + wsrep_override_error(thd, ER_QUERY_INTERRUPTED); + break; + case wsrep::e_size_exceeded_error: + wsrep_override_error(thd, ER_UNKNOWN_ERROR, "Maximum writeset size exceeded"); + break; + case wsrep::e_append_fragment_error: + /* TODO: Figure out better error number */ + if (status) + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, + "Error while appending streaming replication fragment" + "(provider status: %s)", + wsrep::provider::to_string(status).c_str()); + else + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, + "Error while appending streaming replication fragment"); + break; + case wsrep::e_not_supported_error: + wsrep_override_error(thd, ER_NOT_SUPPORTED_YET); + break; + case wsrep::e_timeout_error: + wsrep_override_error(thd, ER_LOCK_WAIT_TIMEOUT); + break; + default: + wsrep_override_error(thd, ER_UNKNOWN_ERROR); + break; + } +} + +/** + Helper function to log THD wsrep context. + + @param thd Pointer to THD + @param message Optional message + @param function Function where the call was made from + */ +static inline void wsrep_log_thd(const THD *thd, + const char *message, + const char *function) +{ + WSREP_DEBUG("%s %s\n" + " thd: %llu thd_ptr: %p client_mode: %s client_state: %s trx_state: %s\n" + " next_trx_id: %lld trx_id: %lld seqno: %lld\n" + " is_streaming: %d fragments: %zu\n" + " sql_errno: %u message: %s\n" +#define WSREP_THD_LOG_QUERIES +#ifdef WSREP_THD_LOG_QUERIES + " command: %d query: %.72s" +#endif /* WSREP_OBSERVER_LOG_QUERIES */ + , + function, + message ? message : "", + thd->thread_id, + thd, + wsrep_thd_client_mode_str(thd), + wsrep_thd_client_state_str(thd), + wsrep_thd_transaction_state_str(thd), + (long long)thd->wsrep_next_trx_id(), + (long long)thd->wsrep_trx_id(), + (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_trx().is_streaming(), + thd->wsrep_sr().fragments().size(), + (thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0), + (thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->message() : "") +#ifdef WSREP_THD_LOG_QUERIES + , thd->lex->sql_command, + wsrep_thd_query(thd) +#endif /* WSREP_OBSERVER_LOG_QUERIES */ + ); +} + +#define WSREP_LOG_THD(thd_, message_) wsrep_log_thd(thd_, message_, __FUNCTION__) + +#endif /* WSREP_THD_H */ diff --git a/sql/wsrep_trans_observer.h b/sql/wsrep_trans_observer.h new file mode 100644 index 00000000..a963a2b1 --- /dev/null +++ b/sql/wsrep_trans_observer.h @@ -0,0 +1,602 @@ +/* Copyright 2016-2023 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#ifndef WSREP_TRANS_OBSERVER_H +#define WSREP_TRANS_OBSERVER_H + +#include "my_global.h" +#include "mysql/service_wsrep.h" +#include "wsrep_applier.h" /* wsrep_apply_error */ +#include "wsrep_xid.h" +#include "wsrep_thd.h" +#include "wsrep_binlog.h" /* register/deregister group commit */ +#include "my_dbug.h" + +class THD; + +void wsrep_commit_empty(THD* thd, bool all); + +/* + Return true if THD has active wsrep transaction. + */ +static inline bool wsrep_is_active(THD* thd) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none && + thd->wsrep_cs().transaction().active() && + !thd->internal_transaction()); +} + +/* + Return true if transaction is ordered. + */ +static inline bool wsrep_is_ordered(THD* thd) +{ + return thd->wsrep_trx().ordered(); +} + +/* + Return true if transaction has been BF aborted but has not been + rolled back yet. + + It is required that the caller holds thd->LOCK_thd_data. +*/ +static inline bool wsrep_must_abort(THD* thd) +{ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + return (thd->wsrep_trx().state() == wsrep::transaction::s_must_abort); +} + +/* + Return true if the transaction must be replayed. + */ +static inline bool wsrep_must_replay(THD* thd) +{ + return (thd->wsrep_trx().state() == wsrep::transaction::s_must_replay); +} +/* + Return true if transaction has not been committed. + + Note that we don't require thd->LOCK_thd_data here. Calling this method + makes sense only from codepaths which are past ordered_commit state + and the wsrep transaction is immune to BF aborts at that point. +*/ +static inline bool wsrep_not_committed(THD* thd) +{ + return (thd->wsrep_trx().state() != wsrep::transaction::s_committed); +} + +/* + Return true if THD is either committing a transaction or statement + is autocommit. + */ +static inline bool wsrep_is_real(THD* thd, bool all) +{ + return (all || thd->transaction->all.ha_list == 0); +} + +/* + Check if a transaction has generated changes. + */ +static inline bool wsrep_has_changes(THD* thd) +{ + return (thd->wsrep_trx().is_empty() == false); +} + +/* + Check if an active transaction has been BF aborted. + */ +static inline bool wsrep_is_bf_aborted(THD* thd) +{ + return (thd->wsrep_trx().active() && thd->wsrep_trx().bf_aborted()); +} + +static inline int wsrep_check_pk(THD* thd) +{ + if (!wsrep_certify_nonPK) + { + for (TABLE* table= thd->open_tables; table != NULL; table= table->next) + { + if (table->key_info == NULL || table->s->primary_key == MAX_KEY) + { + WSREP_DEBUG("No primary key found for table %s.%s", + table->s->db.str, table->s->table_name.str); + wsrep_override_error(thd, ER_LOCK_DEADLOCK); + return 1; + } + } + } + return 0; +} + +static inline bool wsrep_streaming_enabled(THD* thd) +{ + return (thd->wsrep_sr().fragment_size() > 0); +} + +/* + Return number of fragments successfully certified for the + current statement. + */ +static inline size_t wsrep_fragments_certified_for_stmt(THD* thd) +{ + return thd->wsrep_trx().fragments_certified_for_statement(); +} + +static inline int wsrep_start_transaction(THD* thd, wsrep_trx_id_t trx_id) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none) { + if (wsrep_is_active(thd) == false) + return thd->wsrep_cs().start_transaction(wsrep::transaction_id(trx_id)); + } + return 0; +} + +/**/ +static inline int wsrep_start_trx_if_not_started(THD* thd) +{ + int ret= 0; + DBUG_ASSERT(thd->wsrep_next_trx_id() != WSREP_UNDEFINED_TRX_ID); + DBUG_ASSERT(thd->wsrep_cs().mode() == Wsrep_client_state::m_local); + if (thd->wsrep_trx().active() == false) + { + ret= wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + return ret; +} + +/* + Called after each row operation. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_row_internal(THD* thd) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none && + wsrep_thd_is_local(thd)) + { + if (wsrep_check_pk(thd)) + { + return 1; + } + else if (wsrep_streaming_enabled(thd)) + { + return thd->wsrep_cs().after_row(); + } + } + return 0; +} + +/* + Helper method to determine whether commit time hooks + should be run for the transaction. + + Commit hooks must be run in the following cases: + - The transaction is local and has generated write set and is committing. + - The transaction has been BF aborted + - Is running in high priority mode and is ordered. This can be replayer, + applier or storage access. + */ +static inline bool wsrep_run_commit_hook(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_run_commit_hook"); + DBUG_PRINT("wsrep", ("Is_active: %d is_real %d has_changes %d is_applying %d " + "is_ordered: %d", + wsrep_is_active(thd), wsrep_is_real(thd, all), + wsrep_has_changes(thd), wsrep_thd_is_applying(thd), + wsrep_is_ordered(thd))); + + /* skipping non-wsrep threads */ + if (!WSREP(thd)) + DBUG_RETURN(false); + + /* Is MST commit or autocommit? */ + bool ret= wsrep_is_active(thd) && wsrep_is_real(thd, all); + /* Do not commit if we are aborting */ + ret= ret && (thd->wsrep_trx().state() != wsrep::transaction::s_aborting); + if (ret && !(wsrep_has_changes(thd) || /* Has generated write set */ + /* Is high priority (replay, applier, storage) and the + transaction is scheduled for commit ordering */ + (wsrep_thd_is_applying(thd) && wsrep_is_ordered(thd)))) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + DBUG_PRINT("wsrep", ("state: %s", + wsrep::to_c_string(thd->wsrep_trx().state()))); + /* Transaction is local but has no changes, the commit hooks will + be skipped and the wsrep transaction is terminated in + wsrep_commit_empty() */ + if (thd->wsrep_trx().state() == wsrep::transaction::s_executing) + { + ret= false; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + + mysql_mutex_lock(&thd->LOCK_thd_data); + /* Transaction creating sequence is TOI or RSU, + CREATE SEQUENCE = CREATE + INSERT (initial value) + and replicated using statement based replication, thus + the commit hooks will be skipped. + + For TEMPORARY SEQUENCES commit hooks will be done as + CREATE + INSERT is not replicated and needs to be + committed locally. */ + if (ret && + (thd->wsrep_cs().mode() == wsrep::client_state::m_toi || + thd->wsrep_cs().mode() == wsrep::client_state::m_rsu) && + thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE && + !thd->lex->tmp_table()) + ret= false; + mysql_mutex_unlock(&thd->LOCK_thd_data); + + DBUG_PRINT("wsrep", ("return: %d", ret)); + DBUG_RETURN(ret); +} + +/* + Called before the transaction is prepared. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_before_prepare(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_before_prepare"); + WSREP_DEBUG("wsrep_before_prepare: %d", wsrep_is_real(thd, all)); + int ret= 0; + DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); + if ((ret= thd->wsrep_parallel_slave_wait_for_prior_commit())) + { + DBUG_RETURN(ret); + } + if ((ret= thd->wsrep_cs().before_prepare()) == 0) + { + DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined()); + wsrep_xid_init(&thd->wsrep_xid, + thd->wsrep_trx().ws_meta().gtid(), + wsrep_gtid_server.gtid()); + } + + mysql_mutex_lock(&thd->LOCK_thd_kill); + if (thd->killed) wsrep_backup_kill_for_commit(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + + DBUG_RETURN(ret); +} + +/* + Called after the transaction has been prepared. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_prepare(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_after_prepare"); + WSREP_DEBUG("wsrep_after_prepare: %d", wsrep_is_real(thd, all)); + DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); + int ret= thd->wsrep_cs().after_prepare(); + DBUG_ASSERT(ret == 0 || thd->wsrep_cs().current_error() || + thd->wsrep_cs().transaction().state() == wsrep::transaction::s_must_replay); + DBUG_RETURN(ret); +} + +/* + Called before the transaction is committed. + + This function must be called from both client and + applier contexts before commit. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_before_commit(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_before_commit"); + WSREP_DEBUG("wsrep_before_commit: %d, %lld", + wsrep_is_real(thd, all), + (long long)wsrep_thd_trx_seqno(thd)); + THD_STAGE_INFO(thd, stage_waiting_certification); + int ret= 0; + DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); + + if ((ret= thd->wsrep_cs().before_commit()) == 0) + { + DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined()); + if (!thd->variables.gtid_seq_no && + (thd->wsrep_trx().ws_meta().flags() & wsrep::provider::flag::commit)) + { + uint64 seqno= 0; + if (thd->variables.wsrep_gtid_seq_no && + thd->variables.wsrep_gtid_seq_no > wsrep_gtid_server.seqno()) + { + seqno= thd->variables.wsrep_gtid_seq_no; + wsrep_gtid_server.seqno(thd->variables.wsrep_gtid_seq_no); + } + else + { + seqno= wsrep_gtid_server.seqno_inc(); + } + thd->variables.wsrep_gtid_seq_no= 0; + thd->wsrep_current_gtid_seqno= seqno; + if (mysql_bin_log.is_open() && wsrep_gtid_mode) + { + thd->variables.gtid_seq_no= seqno; + thd->variables.gtid_domain_id= wsrep_gtid_server.domain_id; + thd->variables.server_id= wsrep_gtid_server.server_id; + } + } + + wsrep_xid_init(&thd->wsrep_xid, + thd->wsrep_trx().ws_meta().gtid(), + wsrep_gtid_server.gtid()); + wsrep_register_for_group_commit(thd); + } + + mysql_mutex_lock(&thd->LOCK_thd_kill); + if (thd->killed) wsrep_backup_kill_for_commit(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + + DBUG_RETURN(ret); +} + +/* + Called after the transaction has been ordered for commit. + + This function must be called from both client and + applier contexts after the commit has been ordered. + + @param thd Pointer to THD + @param all + @param err Error buffer in case of applying error + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_ordered_commit(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_ordered_commit"); + WSREP_DEBUG("wsrep_ordered_commit: %d %lld", wsrep_is_real(thd, all), + (long long) wsrep_thd_trx_seqno(thd)); + DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); + DBUG_RETURN(thd->wsrep_cs().ordered_commit()); +} + +/* + Called after the transaction has been committed. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_commit(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_after_commit"); + WSREP_DEBUG("wsrep_after_commit: %d, %d, %lld, %d", + wsrep_is_real(thd, all), + wsrep_is_active(thd), + (long long)wsrep_thd_trx_seqno(thd), + wsrep_has_changes(thd)); + DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); + if (thd->internal_transaction()) + DBUG_RETURN(0); + int ret= 0; + if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) + { + ret= thd->wsrep_cs().ordered_commit(); + } + wsrep_unregister_from_group_commit(thd); + thd->wsrep_xid.null(); + DBUG_RETURN(ret || thd->wsrep_cs().after_commit()); +} + +/* + Called before the transaction is rolled back. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_before_rollback(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_before_rollback"); + int ret= 0; + if (wsrep_is_active(thd)) + { + if (!all && thd->in_active_multi_stmt_transaction()) + { + if (wsrep_emulate_bin_log) + { + wsrep_thd_binlog_stmt_rollback(thd); + } + + if (thd->wsrep_trx().is_streaming() && + (wsrep_fragments_certified_for_stmt(thd) > 0)) + { + /* Non-safe statement rollback during SR multi statement + transaction. A statement rollback is considered unsafe, if + the same statement has already replicated one or more fragments. + Self abort the transaction, the actual rollback and error + handling will be done in after statement phase. */ + WSREP_DEBUG("statement rollback is not safe for streaming replication"); + wsrep_thd_self_abort(thd); + ret= 0; + } + } + else if (wsrep_is_real(thd, all) && + thd->wsrep_trx().state() != wsrep::transaction::s_aborted) + { + /* Real transaction rolling back and wsrep abort not completed + yet */ + /* Reset XID so that it does not trigger writing serialization + history in InnoDB. This needs to be avoided because rollback + may happen out of order and replay may follow. */ + thd->wsrep_xid.null(); + ret= thd->wsrep_cs().before_rollback(); + } + } + DBUG_RETURN(ret); +} + +/* + Called after the transaction has been rolled back. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_rollback(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_after_rollback"); + DBUG_RETURN((wsrep_is_real(thd, all) && wsrep_is_active(thd) && + thd->wsrep_cs().transaction().state() != + wsrep::transaction::s_aborted) ? + thd->wsrep_cs().after_rollback() : 0); +} + +static inline int wsrep_before_statement(THD* thd) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none && + !thd->internal_transaction() ? + thd->wsrep_cs().before_statement() : 0); +} + +static inline +int wsrep_after_statement(THD* thd) +{ + DBUG_ENTER("wsrep_after_statement"); + WSREP_DEBUG("wsrep_after_statement for %lu client_state %s " + " client_mode %s trans_state %s", + thd_get_thread_id(thd), + wsrep::to_c_string(thd->wsrep_cs().state()), + wsrep::to_c_string(thd->wsrep_cs().mode()), + wsrep::to_c_string(thd->wsrep_cs().transaction().state())); + int ret= ((thd->wsrep_cs().state() != wsrep::client_state::s_none && + thd->wsrep_cs().mode() == Wsrep_client_state::m_local) && + !thd->internal_transaction() ? + thd->wsrep_cs().after_statement() : 0); + + if (wsrep_is_active(thd)) + { + mysql_mutex_lock(&thd->LOCK_thd_kill); + wsrep_restore_kill_after_commit(thd); + mysql_mutex_unlock(&thd->LOCK_thd_kill); + } + DBUG_RETURN(ret); +} + +static inline void wsrep_after_apply(THD* thd) +{ + DBUG_ASSERT(wsrep_thd_is_applying(thd)); + WSREP_DEBUG("wsrep_after_apply %lld", thd->thread_id); + if (!thd->internal_transaction()) + thd->wsrep_cs().after_applying(); +} + +static inline void wsrep_open(THD* thd) +{ + DBUG_ENTER("wsrep_open"); + if (WSREP_ON_) + { + /* WSREP_PROVIDER_EXISTS_ cannot be set if WSREP_ON_ is not set */ + DBUG_ASSERT(WSREP_PROVIDER_EXISTS_); + thd->wsrep_cs().open(wsrep::client_id(thd->thread_id)); + thd->wsrep_cs().debug_log_level(wsrep_debug); + if (!thd->wsrep_applier && thd->variables.wsrep_trx_fragment_size) + { + thd->wsrep_cs().enable_streaming( + wsrep_fragment_unit(thd->variables.wsrep_trx_fragment_unit), + size_t(thd->variables.wsrep_trx_fragment_size)); + } + } + DBUG_VOID_RETURN; +} + +static inline void wsrep_close(THD* thd) +{ + DBUG_ENTER("wsrep_close"); + if (thd->wsrep_cs().state() != wsrep::client_state::s_none && + !thd->internal_transaction()) + { + thd->wsrep_cs().close(); + } + DBUG_VOID_RETURN; +} + +static inline void wsrep_cleanup(THD* thd) +{ + DBUG_ENTER("wsrep_cleanup"); + if (thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + thd->wsrep_cs().cleanup(); + } + DBUG_VOID_RETURN; +} + +static inline void +wsrep_wait_rollback_complete_and_acquire_ownership(THD *thd) +{ + DBUG_ENTER("wsrep_wait_rollback_complete_and_acquire_ownership"); + if (thd->wsrep_cs().state() != wsrep::client_state::s_none && + !thd->internal_transaction()) + { + thd->wsrep_cs().wait_rollback_complete_and_acquire_ownership(); + } + DBUG_VOID_RETURN; +} + +static inline int wsrep_before_command(THD* thd, bool keep_command_error) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none && + !thd->internal_transaction() ? + thd->wsrep_cs().before_command(keep_command_error) : 0); +} + +static inline int wsrep_before_command(THD* thd) +{ + return wsrep_before_command(thd, false); +} + +/* + Called after each command. + + Return zero on success, non-zero on failure. +*/ +static inline void wsrep_after_command_before_result(THD* thd) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none && + !thd->internal_transaction()) + { + thd->wsrep_cs().after_command_before_result(); + } +} + +static inline void wsrep_after_command_after_result(THD* thd) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none && + !thd->internal_transaction()) + { + thd->wsrep_cs().after_command_after_result(); + } +} + +static inline void wsrep_after_command_ignore_result(THD* thd) +{ + wsrep_after_command_before_result(thd); + DBUG_ASSERT(!thd->wsrep_cs().current_error()); + wsrep_after_command_after_result(thd); +} + +static inline enum wsrep::client_error wsrep_current_error(THD* thd) +{ + return thd->wsrep_cs().current_error(); +} + +static inline enum wsrep::provider::status +wsrep_current_error_status(THD* thd) +{ + return thd->wsrep_cs().current_error_status(); +} + +#endif /* WSREP_TRANS_OBSERVER */ diff --git a/sql/wsrep_types.h b/sql/wsrep_types.h new file mode 100644 index 00000000..cd53ab95 --- /dev/null +++ b/sql/wsrep_types.h @@ -0,0 +1,31 @@ +/* Copyright 2018 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Wsrep typedefs to better conform to coding style. + */ +#ifndef WSREP_TYPES_H +#define WSREP_TYPES_H + +#include "wsrep/seqno.hpp" +#include "wsrep/view.hpp" +#include "wsrep/allowlist_service.hpp" + +typedef wsrep::id Wsrep_id; +typedef wsrep::seqno Wsrep_seqno; +typedef wsrep::view Wsrep_view; +typedef enum wsrep::allowlist_service::allowlist_key Wsrep_allowlist_key; + +#endif /* WSREP_TYPES_H */ diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc new file mode 100644 index 00000000..0e4e2638 --- /dev/null +++ b/sql/wsrep_utils.cc @@ -0,0 +1,607 @@ +/* Copyright 2010-2015 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ + +//! @file some utility functions and classes not directly related to replication + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // POSIX_SPAWN_USEVFORK flag +#endif + +#include "mariadb.h" +#include "my_global.h" +#include "wsrep_api.h" +#include "wsrep_utils.h" +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" + +#include + +#include // posix_spawn() +#include // pipe() +#include // errno +#include // strerror() +#include // waitpid() +#include +#include +#include // getaddrinfo() + +#ifdef HAVE_GETIFADDRS +#include +#include +#endif /* HAVE_GETIFADDRS */ + +extern char** environ; // environment variables + +static wsp::string wsrep_PATH; + +void +wsrep_prepend_PATH (const char* path) +{ + int count= 0; + + while (environ[count]) + { + if (strncmp (environ[count], "PATH=", 5)) + { + count++; + continue; + } + + char* const old_path (environ[count]); + + if (strstr (old_path, path)) return; // path already there + + size_t const new_path_len(strlen(old_path) + strlen(":") + + strlen(path) + 1); + + char* const new_path (static_cast(malloc(new_path_len))); + + if (new_path) + { + snprintf (new_path, new_path_len, "PATH=%s:%s", path, + old_path + strlen("PATH=")); + + wsrep_PATH.set (new_path); + environ[count]= new_path; + } + else + { + WSREP_ERROR ("Failed to allocate 'PATH' environment variable " + "buffer of size %zu.", new_path_len); + } + + return; + } + + WSREP_ERROR ("Failed to find 'PATH' environment variable. " + "State snapshot transfer may not be working."); +} + +namespace wsp +{ + +bool +env::ctor_common(char** e) +{ + env_= static_cast(my_malloc(key_memory_WSREP, + (len_ + 1) * sizeof(char*), + 0)); + + if (env_) + { + for (size_t i(0); i < len_; ++i) + { + assert(e[i]); // caller should make sure about len_ + env_[i]= my_strdup(key_memory_WSREP, e[i], MYF(0)); + if (!env_[i]) + { + errno_= errno; + WSREP_ERROR("Failed to allocate env. var: %s", e[i]); + return true; + } + } + + env_[len_]= NULL; + return false; + } + else + { + errno_= errno; + WSREP_ERROR("Failed to allocate env. var vector of length: %zu", len_); + return true; + } +} + +void +env::dtor() +{ + if (env_) + { + /* don't need to go beyond the first NULL */ + for (size_t i(0); env_[i] != NULL; ++i) { my_free(env_[i]); } + my_free(env_); + env_= NULL; + } + len_= 0; +} + +env::env(char** e) + : len_(0), env_(NULL), errno_(0) +{ + if (!e) { e= environ; } + /* count the size of the vector */ + while (e[len_]) { ++len_; } + + if (ctor_common(e)) dtor(); +} + +env::env(const env& e) + : len_(e.len_), env_(0), errno_(0) +{ + if (ctor_common(e.env_)) dtor(); +} + +env::~env() { dtor(); } + +int +env::append(const char* val) +{ + char** tmp= static_cast(my_realloc(key_memory_WSREP, + env_, (len_ + 2)*sizeof(char*), + 0)); + if (tmp) + { + env_= tmp; + env_[len_]= my_strdup(key_memory_WSREP, val, 0); + + if (env_[len_]) + { + ++len_; + env_[len_]= NULL; + } + else errno_= errno; + } + else errno_= errno; + + return errno_; +} + + +#define PIPE_READ 0 +#define PIPE_WRITE 1 +#define STDIN_FD 0 +#define STDOUT_FD 1 + +#ifndef POSIX_SPAWN_USEVFORK +# define POSIX_SPAWN_USEVFORK 0 +#endif + +process::process (const char* cmd, const char* type, char** env) + : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0) +{ + if (0 == str_) + { + WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd)); + err_= ENOMEM; + return; + } + + if (0 == strlen(str_)) + { + WSREP_ERROR ("Can't start a process: null or empty command line."); + return; + } + + if (NULL == type || (strcmp (type, "w") && strcmp(type, "r"))) + { + WSREP_ERROR ("type argument should be either \"r\" or \"w\"."); + return; + } + + if (NULL == env) { env= environ; } // default to global environment + + int pipe_fds[2]= { -1, }; + if (::pipe(pipe_fds)) + { + err_= errno; + WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_)); + return; + } + + // which end of pipe will be returned to parent + int const parent_end (strcmp(type,"w") ? PIPE_READ : PIPE_WRITE); + int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ); + int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD); + + char* const pargv[4]= { strdup("sh"), strdup("-c"), strdup(str_), NULL }; + if (!(pargv[0] && pargv[1] && pargv[2])) + { + err_= ENOMEM; + WSREP_ERROR ("Failed to allocate pargv[] array."); + goto cleanup_pipe; + } + + posix_spawnattr_t attr; + err_= posix_spawnattr_init (&attr); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_pipe; + } + + /* make sure that no signlas are masked in child process */ + sigset_t sigmask_empty; sigemptyset(&sigmask_empty); + err_= posix_spawnattr_setsigmask(&attr, &sigmask_empty); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setsigmask() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + /* make sure the following signals are not ignored in child process */ + sigset_t default_signals; sigemptyset(&default_signals); + sigaddset(&default_signals, SIGHUP); + sigaddset(&default_signals, SIGINT); + sigaddset(&default_signals, SIGQUIT); + sigaddset(&default_signals, SIGPIPE); + sigaddset(&default_signals, SIGTERM); + sigaddset(&default_signals, SIGCHLD); + err_= posix_spawnattr_setsigdefault(&attr, &default_signals); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setsigdefault() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + err_= posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF | + POSIX_SPAWN_SETSIGMASK | + POSIX_SPAWN_USEVFORK); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + posix_spawn_file_actions_t fact; + err_= posix_spawn_file_actions_init (&fact); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + // close child's stdout|stdin depending on what we returning + err_= posix_spawn_file_actions_addclose (&fact, close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + // substitute our pipe descriptor in place of the closed one + err_= posix_spawn_file_actions_adddup2 (&fact, + pipe_fds[child_end], close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + err_= posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, env); + if (err_) + { + WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)", + pargv[2], err_, strerror(err_)); + pid_= 0; // just to make sure it was not messed up in the call + goto cleanup_fact; + } + + io_= fdopen (pipe_fds[parent_end], type); + + if (io_) + { + pipe_fds[parent_end]= -1; // skip close on cleanup + } + else + { + err_= errno; + WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_)); + } + +cleanup_fact: + int err; // to preserve err_ code + err= posix_spawn_file_actions_destroy (&fact); + if (err) + { + WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n", + err, strerror(err)); + } + +cleanup_attr: + err= posix_spawnattr_destroy (&attr); + if (err) + { + WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)", + err, strerror(err)); + } + +cleanup_pipe: + if (pipe_fds[0] >= 0) close (pipe_fds[0]); + if (pipe_fds[1] >= 0) close (pipe_fds[1]); + + free (pargv[0]); + free (pargv[1]); + free (pargv[2]); +} + +process::~process () +{ + if (io_) + { + assert (pid_); + assert (str_); + + WSREP_WARN("Closing pipe to child process: %s, PID(%ld) " + "which might still be running.", str_, (long)pid_); + + if (fclose (io_) == -1) + { + err_= errno; + WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_)); + } + } + + if (str_) free (const_cast(str_)); +} + +int +process::wait () +{ + if (pid_) + { + int status; + if (-1 == waitpid(pid_, &status, 0)) + { + err_= errno; assert (err_); + WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)", + str_, (long)pid_, err_, strerror (err_)); + } + else + { // command completed, check exit status + if (WIFEXITED (status)) { + err_= WEXITSTATUS (status); + } + else { // command didn't complete with exit() + WSREP_ERROR("Process was aborted."); + err_= errno ? errno : ECHILD; + } + + if (err_) { + switch (err_) /* Translate error codes to more meaningful */ + { + case 126: err_= EACCES; break; /* Permission denied */ + case 127: err_= ENOENT; break; /* No such file or directory */ + case 143: err_= EINTR; break; /* Subprocess killed */ + } + WSREP_ERROR("Process completed with error: %s: %d (%s)", + str_, err_, strerror(err_)); + } + + pid_= 0; + if (io_) fclose (io_); + io_= NULL; + } + } + else { + assert (NULL == io_); + WSREP_ERROR("Command did not run: %s", str_); + } + + return err_; +} + +thd::thd (my_bool won, bool system_thread) : init(), ptr(new THD(0)) +{ + if (ptr) + { + ptr->thread_stack= (char*) &ptr; + wsrep_assign_from_threadvars(ptr); + wsrep_store_threadvars(ptr); + ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog + ptr->variables.wsrep_on= won; + if (system_thread) + ptr->system_thread= SYSTEM_THREAD_GENERIC; + ptr->security_ctx->master_access= ALL_KNOWN_ACL; + lex_start(ptr); + } +} + +thd::~thd () +{ + if (ptr) + { + delete ptr; + set_current_thd(nullptr); + } +} + +} // namespace wsp + +/* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */ +unsigned int wsrep_check_ip (const char* const addr, bool *is_ipv6) +{ + unsigned int ret= INADDR_NONE; + struct addrinfo *res, hints; + + memset (&hints, 0, sizeof(hints)); + hints.ai_flags= AI_PASSIVE/*|AI_ADDRCONFIG*/; + hints.ai_socktype= SOCK_STREAM; + hints.ai_family= AF_UNSPEC; + + *is_ipv6= false; + + char *end; + char address[INET6_ADDRSTRLEN]; + + end= strcend(addr, ','); + strmake(address, addr, (uint) (end - addr)); + + int gai_ret= getaddrinfo(address, NULL, &hints, &res); + if (0 == gai_ret) + { + if (AF_INET == res->ai_family) /* IPv4 */ + { + struct sockaddr_in* a= (struct sockaddr_in*)res->ai_addr; + ret= htonl(a->sin_addr.s_addr); + } + else /* IPv6 */ + { + struct sockaddr_in6* a= (struct sockaddr_in6*)res->ai_addr; + if (IN6_IS_ADDR_UNSPECIFIED(&a->sin6_addr)) + ret= INADDR_ANY; + else if (IN6_IS_ADDR_LOOPBACK(&a->sin6_addr)) + ret= INADDR_LOOPBACK; + else + ret= 0xdeadbeef; + + *is_ipv6= true; + } + freeaddrinfo (res); + } + else { + WSREP_ERROR ("getaddrinfo() failed on '%s': %d (%s)", + addr, gai_ret, gai_strerror(gai_ret)); + } + + return ret; +} + +extern char* my_bind_addr_str; + +size_t wsrep_guess_ip (char* buf, size_t buf_len) +{ + size_t ret= 0; + + // Attempt 1: Try to get the IP from bind-address. + // Skip if empty or bind-address=* + if (my_bind_addr_str && my_bind_addr_str[0] != '\0' && + strcmp(my_bind_addr_str, "*") != 0) + { + bool unused; + unsigned int const ip_type= wsrep_check_ip(my_bind_addr_str, &unused); + + if (INADDR_NONE == ip_type) { + WSREP_ERROR("Networking not configured, cannot receive state " + "transfer."); + ret= 0; + goto done; + } else if (INADDR_ANY != ip_type) { + strncpy (buf, my_bind_addr_str, buf_len); + ret= strlen(buf); + goto done; + } + } + + // Attempt 2: mysqld binds to all interfaces, try IP from wsrep_node_address. + if (wsrep_node_address && wsrep_node_address[0] != '\0') { + wsp::Address addr(wsrep_node_address); + if (!addr.is_valid()) + { + WSREP_WARN("Could not parse wsrep_node_address : %s", + wsrep_node_address); + ret= 0; + goto done; + } + + /* Safety check: Buffer length should be sufficiently large. */ + DBUG_ASSERT(buf_len >= addr.get_address_len()); + + memcpy(buf, addr.get_address(), addr.get_address_len()); + ret= addr.get_address_len(); + goto done; + } + + /* + Attempt 3: Try to get the IP from the list of available interfaces. + + getifaddrs() is avaiable at least on Linux since glib 2.3, FreeBSD, + MAC OSX, OpenSolaris, Solaris. + + On platforms which do not support getifaddrs() this function returns + a failure and user is prompted to do manual configuration. + */ +#if HAVE_GETIFADDRS + struct ifaddrs *ifaddr, *ifa; + int family; + + if (getifaddrs(&ifaddr) == 0) + { + for (ifa= ifaddr; ifa != NULL; ifa= ifa->ifa_next) + { + if (!ifa->ifa_addr) + continue; + + family= ifa->ifa_addr->sa_family; + + if ((family != AF_INET) && (family != AF_INET6)) + continue; + + // Skip loopback interfaces (like lo:127.0.0.1) + if (ifa->ifa_flags & IFF_LOOPBACK) + continue; + + /* + Get IP address from the socket address. The resulting address may have + zone ID appended for IPv6 addresses (
%). + */ + if (vio_getnameinfo(ifa->ifa_addr, buf, buf_len, NULL, 0, NI_NUMERICHOST)) + continue; + + freeifaddrs(ifaddr); + + ret= strlen(buf); + goto done; + } + freeifaddrs(ifaddr); + } +#endif /* HAVE_GETIFADDRS */ + +done: + WSREP_DEBUG("wsrep_guess_ip() : %s", (ret > 0) ? buf : "????"); + return ret; +} + +/* returns the length of the host part of the address string */ +size_t wsrep_host_len(const char* const addr, size_t const addr_len) +{ + // check for IPv6 notation first + const char* const bracket= ('[' == addr[0] ? strchr(addr, ']') : NULL); + + if (bracket) { // IPv6 + return (bracket - addr + 1); + } + else { // host part ends at ':' or end of string + const char* const colon= strchr(addr, ':'); + return (colon ? colon - addr : addr_len); + } +} diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h new file mode 100644 index 00000000..a1629b03 --- /dev/null +++ b/sql/wsrep_utils.h @@ -0,0 +1,447 @@ +/* Copyright (C) 2013-2015 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#ifndef WSREP_UTILS_H +#define WSREP_UTILS_H + +#include "wsrep_priv.h" +#include "wsrep_mysqld.h" + +unsigned int wsrep_check_ip (const char* const addr, bool *is_ipv6); +size_t wsrep_guess_ip (char* buf, size_t buf_len); +namespace wsp { +class node_status +{ +public: + node_status() : status(wsrep::server_state::s_disconnected) {} + void set(enum wsrep::server_state::state new_status, + const wsrep::view* view= 0) + { + if (status != new_status || 0 != view) + { + wsrep_notify_status(new_status, view); + status= new_status; + } + } + enum wsrep::server_state::state get() const { return status; } +private: + enum wsrep::server_state::state status; +}; +} /* namespace wsp */ + +extern wsp::node_status local_status; + +/* returns the length of the host part of the address string */ +size_t wsrep_host_len(const char* addr, size_t addr_len); + +namespace wsp { + +class Address { +public: + Address() + : m_address_len(0), m_family(UNSPEC), m_port(0), m_valid(false) + { + memset(m_address, 0, sizeof(m_address)); + } + Address(const char *addr_in) + : m_address_len(0), m_family(UNSPEC), m_port(0), m_valid(false) + { + memset(m_address, 0, sizeof(m_address)); + parse_addr(addr_in); + } + bool is_valid() { return m_valid; } + bool is_ipv6() { return (m_family == INET6); } + + const char* get_address() { return m_address; } + size_t get_address_len() { return m_address_len; } + int get_port() { return m_port; } + void set_port(int port) { m_port= port; } + +private: + enum family { + UNSPEC= 0, + INET, /* IPv4 */ + INET6, /* IPv6 */ + }; + + char m_address[256]; + size_t m_address_len; + family m_family; + int m_port; + bool m_valid; + + void parse_addr(const char *addr_in) { + const char *start; + const char *end; + const char *port; + const char* open_bracket= strchr(const_cast(addr_in), '['); + const char* close_bracket= strchr(const_cast(addr_in), ']'); + const char* colon= strchr(const_cast(addr_in), ':'); + const char* dot= strchr(const_cast(addr_in), '.'); + + int cc= colon_count(addr_in); + + if (open_bracket != NULL || + dot == NULL || + (colon != NULL && (dot == NULL || colon < dot))) + { + // This could be an IPv6 address or a hostname + if (open_bracket != NULL) { + /* Sanity check: Address with '[' must include ']' */ + if (close_bracket == NULL && + open_bracket < close_bracket) /* Error: malformed address */ + { + m_valid= false; + return; + } + + start= open_bracket + 1; + end= close_bracket; + + /* Check for port */ + port= strchr(close_bracket, ':'); + if ((port != NULL) && parse_port(port + 1)) + { + return; /* Error: invalid port */ + } + m_family= INET6; + } + else + { + switch (cc) { + case 0: + /* Hostname with no port */ + start= addr_in; + end= addr_in + strlen(addr_in); + break; + case 1: + /* Hostname with port (host:port) */ + start= addr_in; + end= colon; + if (parse_port(colon + 1)) + return; /* Error: invalid port */ + break; + default: + /* IPv6 address */ + start= addr_in; + end= addr_in + strlen(addr_in); + m_family= INET6; + break; + } + } + } else { /* IPv4 address or hostname */ + start= addr_in; + if (colon != NULL) { /* Port */ + end= colon; + if (parse_port(colon + 1)) + return; /* Error: invalid port */ + } else { + end= addr_in + strlen(addr_in); + } + } + + size_t len= end - start; + + /* Safety */ + if (len >= sizeof(m_address)) + { + // The supplied address is too large to fit into the internal buffer. + m_valid= false; + return; + } + + memcpy(m_address, start, len); + m_address[len]= '\0'; + m_address_len= ++ len; + m_valid= true; + return; + } + + int colon_count(const char *addr) { + int count= 0, i= 0; + + while(addr[i] != '\0') + { + if (addr[i] == ':') ++count; + ++ i; + } + return count; + } + + bool parse_port(const char *port) { + errno= 0; /* Reset the errno */ + m_port= strtol(port, NULL, 10); + if (errno == EINVAL || errno == ERANGE) + { + m_port= 0; /* Error: invalid port */ + m_valid= false; + return true; + } + return false; + } +}; + +class Config_state +{ +public: + Config_state() : view_(), status_(wsrep::server_state::s_disconnected) + {} + + void set(const wsrep::view& view) + { + wsrep_notify_status(status_, &view); + + lock(); + view_= view; + unlock(); + } + + void set(enum wsrep::server_state::state status) + { + if (status == wsrep::server_state::s_donor || + status == wsrep::server_state::s_synced) + wsrep_notify_status(status, &view_); + else + wsrep_notify_status(status); + + lock(); + status_= status; + unlock(); + } + + const wsrep::view& get_view_info() const + { + return view_; + } + + enum wsrep::server_state::state get_status() const + { + return status_; + } + + int lock() + { + return mysql_mutex_lock(&LOCK_wsrep_config_state); + } + + int unlock() + { + return mysql_mutex_unlock(&LOCK_wsrep_config_state); + } + +private: + wsrep::view view_; + enum wsrep::server_state::state status_; +}; + +} /* namespace wsp */ + +extern wsp::Config_state *wsrep_config_state; + +namespace wsp { +/* a class to manage env vars array */ +class env +{ +private: + size_t len_; + char** env_; + int errno_; + bool ctor_common(char** e); + void dtor(); + env& operator =(env); +public: + explicit env(char** env); + explicit env(const env&); + ~env(); + int append(const char* var); /* add a new env. var */ + int error() const { return errno_; } + char** operator()() { return env_; } +}; + +/* A small class to run external programs. */ +class process +{ +private: + const char* const str_; + FILE* io_; + int err_; + pid_t pid_; + +public: +/*! @arg type is a pointer to a null-terminated string which must contain + either the letter 'r' for reading or the letter 'w' for writing. + @arg env optional null-terminated vector of environment variables + */ + process (const char* cmd, const char* type, char** env); + ~process (); + + FILE* pipe () { return io_; } + int error() { return err_; } + int wait (); + const char* cmd() { return str_; } +}; + +class thd +{ + class thd_init + { + public: + thd_init() { my_thread_init(); } + ~thd_init() { my_thread_end(); } + } + init; + + thd (const thd&); + thd& operator= (const thd&); + +public: + + thd(my_bool wsrep_on, bool system_thread=false); + ~thd(); + THD* const ptr; +}; + +class string +{ +public: + string() : string_(0) {} + explicit string(size_t s) : string_(static_cast(malloc(s))) {} + char* operator()() { return string_; } + void set(char* str) { if (string_) free (string_); string_= str; } + ~string() { set (0); } +private: + char* string_; +}; + +/* scope level lock */ +class auto_lock +{ +public: + auto_lock(mysql_mutex_t* m) : m_(m) { mysql_mutex_lock(m_); } + ~auto_lock() { mysql_mutex_unlock(m_); } +private: + mysql_mutex_t& operator =(mysql_mutex_t&); + mysql_mutex_t* const m_; +}; + +#ifdef REMOVED +class lock +{ + pthread_mutex_t* const mtx_; + +public: + + lock (pthread_mutex_t* mtx) : mtx_(mtx) + { + int err= pthread_mutex_lock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex lock failed: %s", strerror(err)); + abort(); + } + } + + virtual ~lock () + { + int err= pthread_mutex_unlock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex unlock failed: %s", strerror(err)); + abort(); + } + } + + inline void wait (pthread_cond_t* cond) + { + pthread_cond_wait (cond, mtx_); + } + +private: + + lock (const lock&); + lock& operator=(const lock&); + +}; + +class monitor +{ + int mutable refcnt; + pthread_mutex_t mutable mtx; + pthread_cond_t mutable cond; + +public: + + monitor() : refcnt(0) + { + pthread_mutex_init (&mtx, NULL); + pthread_cond_init (&cond, NULL); + } + + ~monitor() + { + pthread_mutex_destroy (&mtx); + pthread_cond_destroy (&cond); + } + + void enter() const + { + lock l(&mtx); + + while (refcnt) + { + l.wait(&cond); + } + refcnt++; + } + + void leave() const + { + lock l(&mtx); + + refcnt--; + if (refcnt == 0) + { + pthread_cond_signal (&cond); + } + } + +private: + + monitor (const monitor&); + monitor& operator= (const monitor&); +}; + +class critical +{ + const monitor& mon; + +public: + + critical(const monitor& m) : mon(m) { mon.enter(); } + + ~critical() { mon.leave(); } + +private: + + critical (const critical&); + critical& operator= (const critical&); +}; +#endif + +} // namespace wsrep + +#endif /* WSREP_UTILS_H */ diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc new file mode 100644 index 00000000..d4af7a95 --- /dev/null +++ b/sql/wsrep_var.cc @@ -0,0 +1,1132 @@ +/* Copyright 2008-2022 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "wsrep_var.h" + +#include +#include +#include +#include +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include "wsrep_xid.h" +#include +#include +#include +#include "wsrep_trans_observer.h" +#include "wsrep_server_state.h" + +ulong wsrep_reject_queries; + +int wsrep_init_vars() +{ + wsrep_provider = my_strdup(PSI_INSTRUMENT_ME, WSREP_NONE, MYF(MY_WME)); + wsrep_provider_options= my_strdup(PSI_INSTRUMENT_ME, "", MYF(MY_WME)); + wsrep_cluster_address = my_strdup(PSI_INSTRUMENT_ME, "", MYF(MY_WME)); + wsrep_cluster_name = my_strdup(PSI_INSTRUMENT_ME, WSREP_CLUSTER_NAME, MYF(MY_WME)); + wsrep_node_name = my_strdup(PSI_INSTRUMENT_ME, "", MYF(MY_WME)); + wsrep_node_address = my_strdup(PSI_INSTRUMENT_ME, "", MYF(MY_WME)); + wsrep_node_incoming_address= my_strdup(PSI_INSTRUMENT_ME, WSREP_NODE_INCOMING_AUTO, MYF(MY_WME)); + if (wsrep_gtid_mode) + wsrep_start_position = my_strdup(PSI_INSTRUMENT_ME, WSREP_START_POSITION_ZERO_GTID, MYF(MY_WME)); + else + wsrep_start_position = my_strdup(PSI_INSTRUMENT_ME, WSREP_START_POSITION_ZERO, MYF(MY_WME)); + return 0; +} + +static int get_provider_option_value(const char* opts, + const char* opt_name, + ulong* opt_value) +{ + int ret= 1; + ulong opt_value_tmp; + char *opt_value_str, *s, *opts_copy= my_strdup(PSI_INSTRUMENT_ME, opts, MYF(MY_WME)); + + if ((opt_value_str= strstr(opts_copy, opt_name)) == NULL) + goto end; + opt_value_str= strtok_r(opt_value_str, "=", &s); + if (opt_value_str == NULL) goto end; + opt_value_str= strtok_r(NULL, ";", &s); + if (opt_value_str == NULL) goto end; + + opt_value_tmp= strtoul(opt_value_str, NULL, 10); + if (errno == ERANGE) goto end; + + *opt_value= opt_value_tmp; + ret= 0; + +end: + my_free(opts_copy); + return ret; +} + +static bool refresh_provider_options() +{ + WSREP_DEBUG("refresh_provider_options: %s", + (wsrep_provider_options) ? wsrep_provider_options : "null"); + + try + { + std::string opts= Wsrep_server_state::instance().provider().options(); + wsrep_provider_options_init(opts.c_str()); + get_provider_option_value(wsrep_provider_options, + (char*)"repl.max_ws_size", + &wsrep_max_ws_size); + return false; + } + catch (...) + { + WSREP_ERROR("Failed to get provider options"); + return true; + } +} + +void wsrep_set_wsrep_on(THD* thd) +{ + if (thd) + thd->wsrep_was_on= WSREP_ON_; + WSREP_PROVIDER_EXISTS_= wsrep_provider && + strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN); + WSREP_ON_= global_system_variables.wsrep_on && WSREP_PROVIDER_EXISTS_; +} + +bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + if (var_type == OPT_GLOBAL) + { + my_bool saved_wsrep_on= global_system_variables.wsrep_on; + + thd->variables.wsrep_on= global_system_variables.wsrep_on; + + // If wsrep has not been inited we need to do it now + if (global_system_variables.wsrep_on && wsrep_provider && !wsrep_inited) + { + // wsrep_init() rewrites provide if it fails + char* tmp= strdup(wsrep_provider); + mysql_mutex_unlock(&LOCK_global_system_variables); + + if (wsrep_init()) + { + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp, errno, "wsrep_init failed"); + saved_wsrep_on= false; + } + + free(tmp); + mysql_mutex_lock(&LOCK_global_system_variables); + } + + thd->variables.wsrep_on= global_system_variables.wsrep_on= saved_wsrep_on; + } + + wsrep_set_wsrep_on(thd); + + if (var_type == OPT_GLOBAL) + { + if (thd->variables.wsrep_on && + thd->wsrep_cs().state() == wsrep::client_state::s_none) + { + wsrep_open(thd); + wsrep_before_command(thd); + } + } + + return false; +} + +bool wsrep_on_check(sys_var *self, THD* thd, set_var* var) +{ + bool new_wsrep_on= (bool)var->save_result.ulonglong_value; + + if (check_has_super(self, thd, var)) + return true; + + if (new_wsrep_on) + { + if (!WSREP_PROVIDER_EXISTS) + { + my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) can't be enabled " + "if the wsrep_provider is unset or set to 'none'", MYF(0)); + return true; + } + + if (var->type == OPT_SESSION && + !global_system_variables.wsrep_on) + { + my_message(ER_WRONG_ARGUMENTS, + "Can't enable @@session.wsrep_on, " + "while @@global.wsrep_on is disabled", MYF(0)); + return true; + } + } + + if (thd->in_active_multi_stmt_transaction()) + { + my_error(ER_CANT_DO_THIS_DURING_AN_TRANSACTION, MYF(0)); + return true; + } + + if (var->type == OPT_GLOBAL) + { + /* + The global value is about to change. Cleanup + the transaction state and close the client + state. wsrep_on_update() will take care of + reopening it should wsrep_on be re-enabled. + */ + if (global_system_variables.wsrep_on && !new_wsrep_on) + { + wsrep_commit_empty(thd, true); + wsrep_after_statement(thd); + wsrep_after_command_ignore_result(thd); + wsrep_close(thd); + wsrep_cleanup(thd); + } + } + + return false; +} + +bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + if (thd->variables.wsrep_causal_reads) { + thd->variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + thd->variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ; + } + + // update global settings too. + if (global_system_variables.wsrep_causal_reads) { + global_system_variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + global_system_variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ; + } + + return false; +} + +bool wsrep_sync_wait_update (sys_var* self, THD* thd, enum_var_type var_type) +{ + thd->variables.wsrep_causal_reads= thd->variables.wsrep_sync_wait & + WSREP_SYNC_WAIT_BEFORE_READ; + + // update global settings too + global_system_variables.wsrep_causal_reads= global_system_variables.wsrep_sync_wait & + WSREP_SYNC_WAIT_BEFORE_READ; + + return false; +} + +template +static T parse_value(char** startptr, char** endptr) +{ + T val= strtoll(*startptr, *&endptr, 10); + *startptr= *endptr; + return val; +} + +/* + Verify the format of the given UUID:seqno. + + @return + true Fail + false Pass +*/ +static +bool wsrep_start_position_verify (const char* start_str) +{ + size_t start_len; + wsrep_uuid_t uuid; + ssize_t uuid_len; + + // Check whether it has minimum acceptable length. + start_len= strlen (start_str); + if (start_len < 34) + return true; + + /* + Parse the input to check whether UUID length is acceptable + and seqno has been provided. + */ + uuid_len= wsrep_uuid_scan (start_str, start_len, &uuid); + if (uuid_len < 0 || (start_len - uuid_len) < 2) + return true; + + // Separator must follow the UUID. + if (start_str[uuid_len] != ':') + return true; + + char* endptr; + char* startptr= (char *)start_str + uuid_len + 1; + wsrep_seqno_t const seqno(parse_value(&startptr, &endptr)); + + // Do not allow seqno < -1 + if (seqno < -1) + return true; + + // Start parsing native GTID part + if (*startptr == ',') + { + startptr++; + uint32_t domain __attribute__((unused)) + (parse_value(&startptr, &endptr)); + if (*endptr != '-') return true; + startptr++; + uint32_t server __attribute__((unused)) + (parse_value(&startptr, &endptr)); + if (*endptr != '-') return true; + startptr++; + uint64_t seq __attribute__((unused)) + (parse_value(&startptr, &endptr)); + } + + // Remaining string was seqno. + if (*endptr == '\0') return false; + + return true; +} + + +static +bool wsrep_set_local_position(THD* thd, const char* const value, + size_t length, bool const sst) +{ + char* endptr; + char* startptr; + wsrep_uuid_t uuid; + size_t const uuid_len= wsrep_uuid_scan(value, length, &uuid); + startptr= (char *)value + uuid_len + 1; + wsrep_seqno_t const seqno= parse_value(&startptr, &endptr); + + if (*startptr == ',') + { + startptr++; + wsrep_gtid_server.domain_id= parse_value(&startptr, &endptr); + startptr++; + wsrep_gtid_server.server_id= parse_value(&startptr, &endptr); + startptr++; + wsrep_gtid_server.seqno(parse_value(&startptr, &endptr)); + } + + char start_pos_buf[FN_REFLEN]; + memcpy(start_pos_buf, value, length); + start_pos_buf[length]='\0'; + + // If both are same as WSREP_START_POSITION_ZERO just set local + if (!strcmp(start_pos_buf, WSREP_START_POSITION_ZERO) && + !strcmp(wsrep_start_position, WSREP_START_POSITION_ZERO)) + goto set; + else + WSREP_INFO("SST setting local position to %s current %s", start_pos_buf, wsrep_start_position); + + if (sst) + return (wsrep_sst_received (thd, uuid, seqno, NULL, 0)); + +set: + local_uuid= uuid; + local_seqno= seqno; + + return false; +} + + +bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var) +{ + char start_pos_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(start_pos_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + start_pos_buf[var->save_result.string_value.length]= 0; + + + WSREP_DEBUG("SST wsrep_start_position check for new position %s old %s", + start_pos_buf, wsrep_start_position); + + // Verify the format. + if (wsrep_start_position_verify(start_pos_buf)) return true; + + + // Give error if position is updated when wsrep is not enabled or + // provider is not loaded. + if ((!WSREP_ON || !Wsrep_server_state::instance().is_provider_loaded()) + && strcmp(start_pos_buf, WSREP_START_POSITION_ZERO)) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_start_position' because " + "wsrep is switched off or provider is not loaded"); + goto err; + } + + /* + As part of further verification, we try to update the value and catch + errors (if any) only when value actually has been changed. + */ + if (wsrep_set_local_position(thd, var->save_result.string_value.str, + var->save_result.string_value.length, + true)) + goto err; + + return false; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return true; +} + +bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type) +{ + // Print a confirmation that wsrep_start_position has been updated. + WSREP_INFO ("wsrep_start_position set to '%s'", wsrep_start_position); + return false; +} + +bool wsrep_start_position_init (const char* val) +{ + if (NULL == val || wsrep_start_position_verify (val)) + { + WSREP_ERROR("Bad initial value for wsrep_start_position: %s", + (val ? val : "")); + return true; + } + + if (wsrep_set_local_position (NULL, val, strlen(val), false)) + { + WSREP_ERROR("Failed to set initial wsep_start_position: %s", val); + return true; + } + + return false; +} + +static int wsrep_provider_verify (const char* provider_str) +{ + MY_STAT f_stat; + char path[FN_REFLEN]; + + if (!provider_str || strlen(provider_str)== 0) + return 1; + + if (!strcmp(provider_str, WSREP_NONE)) + return 0; + + if (!unpack_filename(path, provider_str)) + return 1; + + /* check that provider file exists */ + memset(&f_stat, 0, sizeof(MY_STAT)); + if (!my_stat(path, &f_stat, MYF(0))) + { + return 1; + } + + if (MY_S_ISDIR(f_stat.st_mode)) + { + return 1; + } + + return 0; +} + +bool wsrep_provider_check (sys_var *self, THD* thd, set_var* var) +{ + char wsrep_provider_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(wsrep_provider_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + wsrep_provider_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_provider_verify(wsrep_provider_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool rcode= false; + + + WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider); + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + + /* provider status variables are allocated in provider library + and need to freed here, otherwise a dangling reference to + wsrep_status_vars would remain in THD + */ + wsrep_free_status(thd); + + if (wsrep_inited == 1) + wsrep_deinit(false); + + char* tmp= strdup(wsrep_provider); // wsrep_init() rewrites provider + //when fails + + if (wsrep_init()) + { + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp, my_error, "wsrep_init failed"); + rcode= true; + } + free(tmp); + + // we sure don't want to use old address with new provider + wsrep_cluster_address_init(NULL); + wsrep_provider_options_init(NULL); + if (!rcode) + refresh_provider_options(); + + wsrep_set_wsrep_on(thd); + mysql_mutex_lock(&LOCK_global_system_variables); + + return rcode; +} + +void wsrep_provider_init (const char* value) +{ + WSREP_DEBUG("wsrep_provider_init: %s -> %s", + (wsrep_provider) ? wsrep_provider : "null", + (value) ? value : "null"); + if (NULL == value || wsrep_provider_verify (value)) + { + WSREP_ERROR("Bad initial value for wsrep_provider: %s", + (value ? value : "")); + return; + } + + if (wsrep_provider) my_free((void *)wsrep_provider); + wsrep_provider= my_strdup(PSI_INSTRUMENT_MEM, value, MYF(0)); + wsrep_set_wsrep_on(NULL); +} + +bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) +{ + if (!WSREP_ON) + { + my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) not started", MYF(0)); + return true; + } + return false; +} + +bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type) +{ + if (wsrep_provider_options) + { + enum wsrep::provider::status ret= + Wsrep_server_state::instance().provider().options(wsrep_provider_options); + if (ret) + { + WSREP_ERROR("Set options returned %d", ret); + goto err; + } + + return refresh_provider_options(); + } +err: + refresh_provider_options(); + return true; +} + +void wsrep_provider_options_init(const char* value) +{ + if (wsrep_provider_options && wsrep_provider_options != value) + my_free((void *)wsrep_provider_options); + wsrep_provider_options= value ? my_strdup(PSI_INSTRUMENT_MEM, value, MYF(0)) : NULL; +} + +bool wsrep_reject_queries_update(sys_var *self, THD* thd, enum_var_type type) +{ + switch (wsrep_reject_queries) { + case WSREP_REJECT_NONE: + WSREP_INFO("Allowing client queries due to manual setting"); + break; + case WSREP_REJECT_ALL: + WSREP_INFO("Rejecting client queries due to manual setting"); + break; + case WSREP_REJECT_ALL_KILL: + /* close all client connections, but this one */ + wsrep_close_client_connections(FALSE, thd); + WSREP_INFO("Rejecting client queries and killing connections due to manual setting"); + break; + default: + WSREP_INFO("Unknown value for wsrep_reject_queries: %lu", + wsrep_reject_queries); + return true; + } + return false; +} + +bool wsrep_debug_update(sys_var *self, THD* thd, enum_var_type type) +{ + // Give warnings if wsrep_debug is set and wsrep is disabled or + // provider is not loaded, it will not have any effect + if ((!WSREP_ON || !Wsrep_server_state::instance().is_provider_loaded()) + && wsrep_debug) + { + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Setting 'wsrep_debug' has no effect because " + "wsrep is switched off"); + wsrep_debug= 0; + } + else + Wsrep_server_state::instance().debug_log_level(wsrep_debug); + + return false; +} + +bool +wsrep_gtid_seq_no_check(sys_var *self, THD *thd, set_var *var) +{ + ulonglong new_wsrep_gtid_seq_no= var->save_result.ulonglong_value; + if (wsrep_gtid_mode && new_wsrep_gtid_seq_no > wsrep_gtid_server.seqno()) + return false; + return true; +} + +static int wsrep_cluster_address_verify (const char* cluster_address_str) +{ + /* There is no predefined address format, it depends on provider. */ + return 0; +} + +bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length >= sizeof(addr_buf))) // safety + goto err; + + strmake(addr_buf, var->save_result.string_value.str, + MY_MIN(sizeof(addr_buf)-1, var->save_result.string_value.length)); + + if (!wsrep_cluster_address_verify(addr_buf)) + return 0; + + err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + if (!Wsrep_server_state::instance().is_provider_loaded()) + { + WSREP_INFO("WSREP (galera) provider is not loaded, can't re(start) replication."); + return false; + } + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + */ + char *tmp= my_strdup(PSI_INSTRUMENT_ME, wsrep_cluster_address, MYF(MY_WME)); + WSREP_DEBUG("wsrep_cluster_address_update: %s", wsrep_cluster_address); + mysql_mutex_unlock(&LOCK_global_system_variables); + + mysql_mutex_lock(&LOCK_wsrep_cluster_config); + wsrep_stop_replication(thd); + + if (*tmp && wsrep_start_replication(tmp)) + { + wsrep_create_rollbacker(); + WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu", + wsrep_slave_threads, wsrep_running_applier_threads); + wsrep_create_appliers(wsrep_slave_threads); + } + mysql_mutex_unlock(&LOCK_wsrep_cluster_config); + + mysql_mutex_lock(&LOCK_global_system_variables); + if (strcmp(tmp, wsrep_cluster_address)) + { + my_free((void*)wsrep_cluster_address); + wsrep_cluster_address= tmp; + } + else + my_free(tmp); + + return false; +} + +void wsrep_cluster_address_init (const char* value) +{ + WSREP_DEBUG("wsrep_cluster_address_init: %s -> %s", + (wsrep_cluster_address) ? wsrep_cluster_address : "null", + (value) ? value : "null"); + + my_free(const_cast(wsrep_cluster_address)); + wsrep_cluster_address= my_strdup(PSI_INSTRUMENT_MEM, safe_str(value), MYF(0)); +} + +/* wsrep_cluster_name cannot be NULL or an empty string. */ +bool wsrep_cluster_name_check (sys_var *self, THD* thd, set_var* var) +{ + if (!var->save_result.string_value.str || + (var->save_result.string_value.length == 0)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_cluster_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +bool wsrep_node_name_check (sys_var *self, THD* thd, set_var* var) +{ + // TODO: for now 'allow' 0-length string to be valid (default) + if (!var->save_result.string_value.str) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_node_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +// TODO: do something more elaborate, like checking connectivity +bool wsrep_node_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + // TODO: for now 'allow' 0-length string to be valid (default) + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_node_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +void wsrep_node_address_init (const char* value) +{ + if (wsrep_node_address && strcmp(wsrep_node_address, value)) + my_free ((void*)wsrep_node_address); + + wsrep_node_address= value ? my_strdup(PSI_INSTRUMENT_MEM, value, MYF(0)) : NULL; +} + +static void wsrep_slave_count_change_update () +{ + wsrep_slave_count_change = (wsrep_slave_threads - wsrep_running_applier_threads); + WSREP_DEBUG("Change on slave threads: New %ld old %lu difference %d", + wsrep_slave_threads, wsrep_running_applier_threads, wsrep_slave_count_change); +} + +bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) +{ + if (!wsrep_cluster_address_exists()) + return false; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + mysql_mutex_lock(&LOCK_global_system_variables); + bool res= false; + + wsrep_slave_count_change_update(); + + if (wsrep_slave_count_change > 0) + { + WSREP_DEBUG("Creating %d applier threads, total %ld", wsrep_slave_count_change, wsrep_slave_threads); + wsrep_thread_create_failed.store(false, std::memory_order_relaxed); + res= wsrep_create_appliers(wsrep_slave_count_change, true); + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + // Thread creation and execution is asyncronous, therefore we need + // wait them to be started or error produced + while (wsrep_running_applier_threads != (ulong)wsrep_slave_threads && + !wsrep_thread_create_failed.load(std::memory_order_relaxed)) + { + my_sleep(1000); + } + + mysql_mutex_lock(&LOCK_global_system_variables); + + if (wsrep_thread_create_failed.load(std::memory_order_relaxed)) { + wsrep_slave_threads= wsrep_running_applier_threads; + return true; + } + + WSREP_DEBUG("Running %lu applier threads", wsrep_running_applier_threads); + wsrep_slave_count_change = 0; + } + else + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + return res; +} + +bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) +{ + if (!WSREP_ON) + { + my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) not started", MYF(0)); + return true; + } + + if (thd->global_read_lock.is_acquired()) + { + my_message (ER_CANNOT_USER, "Global read lock acquired. Can't set 'wsrep_desync'", MYF(0)); + return true; + } + + bool new_wsrep_desync= (bool) var->save_result.ulonglong_value; + if (wsrep_desync == new_wsrep_desync) { + if (new_wsrep_desync) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already ON."); + } else { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already OFF."); + } + return false; + } + int ret= 1; + if (new_wsrep_desync) { + ret= Wsrep_server_state::instance().provider().desync(); + if (ret) { + WSREP_WARN ("SET desync failed %d for schema: %s, query: %s", ret, + thd->db.str, wsrep_thd_query(thd)); + my_error (ER_CANNOT_USER, MYF(0), "'desync'", thd->query()); + return true; + } + } else { + THD_STAGE_INFO(thd, stage_waiting_flow); + ret= Wsrep_server_state::instance().provider().resync(); + if (ret != WSREP_OK) { + WSREP_WARN ("SET resync failed %d for schema: %s, query: %s", ret, + thd->get_db(), wsrep_thd_query(thd)); + my_error (ER_CANNOT_USER, MYF(0), "'resync'", thd->query()); + return true; + } + } + return false; +} + +bool wsrep_desync_update (sys_var *self, THD* thd, enum_var_type type) +{ + return false; +} + +bool wsrep_trx_fragment_size_check (sys_var *self, THD* thd, set_var* var) +{ + if (var->value == NULL) { + return false; + } + + const ulong new_trx_fragment_size= var->value->val_uint(); + + if (!WSREP(thd) && new_trx_fragment_size > 0) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_size' to a value other than " + "0 because wsrep is switched off."); + return true; + } + + if (new_trx_fragment_size > 0 && !wsrep_provider_is_SR_capable()) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_size' to a value other than " + "0 because the wsrep_provider does not support streaming " + "replication."); + return true; + } + + if (wsrep_protocol_version < 4 && new_trx_fragment_size > 0) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_size' to a value other than " + "0 because cluster is not yet operating in Galera 4 mode."); + return true; + } + + return false; +} + +bool wsrep_trx_fragment_size_update(sys_var* self, THD *thd, enum_var_type) +{ + WSREP_DEBUG("wsrep_trx_fragment_size_update: %llu", + thd->variables.wsrep_trx_fragment_size); + + // Give error if wsrep_trx_fragment_size is set and wsrep is disabled or + // provider is not loaded + if (!WSREP_ON || !Wsrep_server_state::instance().is_provider_loaded()) + { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_size' because " + "wsrep is switched off"); + return true; + } + + if (thd->variables.wsrep_trx_fragment_size) + { + return thd->wsrep_cs().enable_streaming( + wsrep_fragment_unit(thd->variables.wsrep_trx_fragment_unit), + size_t(thd->variables.wsrep_trx_fragment_size)); + } + else + { + thd->wsrep_cs().disable_streaming(); + return false; + } +} + +bool wsrep_trx_fragment_unit_update(sys_var* self, THD *thd, enum_var_type) +{ + WSREP_DEBUG("wsrep_trx_fragment_unit_update: %lu", + thd->variables.wsrep_trx_fragment_unit); + + // Give error if wsrep_trx_fragment_unit is set and wsrep is disabled or + // provider is not loaded + if (!WSREP_ON || !Wsrep_server_state::instance().is_provider_loaded()) + { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_unit' because " + "wsrep is switched off"); + return true; + } + + if (thd->variables.wsrep_trx_fragment_size) + { + return thd->wsrep_cs().enable_streaming( + wsrep_fragment_unit(thd->variables.wsrep_trx_fragment_unit), + size_t(thd->variables.wsrep_trx_fragment_size)); + } + return false; +} + +bool wsrep_max_ws_size_check(sys_var *self, THD* thd, set_var* var) +{ + if (!WSREP_ON) + { + my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) not started", MYF(0)); + return true; + } + return false; +} + +bool wsrep_max_ws_size_update(sys_var *self, THD *thd, enum_var_type) +{ + char max_ws_size_opt[128]; + my_snprintf(max_ws_size_opt, sizeof(max_ws_size_opt), + "repl.max_ws_size=%d", wsrep_max_ws_size); + enum wsrep::provider::status ret= Wsrep_server_state::instance().provider().options(max_ws_size_opt); + if (ret) + { + WSREP_ERROR("Set options returned %d", ret); + return true; + } + return refresh_provider_options(); +} + +bool wsrep_mode_check(sys_var *self, THD* thd, set_var* var) +{ + return false; +} + +#if UNUSED /* eaec266eb16c (Sergei Golubchik 2014-09-28) */ +static SHOW_VAR wsrep_status_vars[]= +{ + {"connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"ready", (char*) &wsrep_show_ready, SHOW_FUNC}, + {"cluster_state_uuid",(char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, + {"local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, + {"local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_FUNC}, + {"provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"provider_capabilities", (char*) &wsrep_provider_capabilities, SHOW_CHAR_PTR}, + {"thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH}, + {"applier_thread_count", (char*)&wsrep_running_applier_threads, SHOW_LONG_NOFLUSH}, + {"rollbacker_thread_count", (char *)&wsrep_running_rollbacker_threads, SHOW_LONG_NOFLUSH}, +}; + +static int show_var_cmp(const void *var1, const void *var2) +{ + return strcasecmp(((SHOW_VAR*)var1)->name, ((SHOW_VAR*)var2)->name); +} + +/* + * Status variables stuff below + */ +static inline void +wsrep_assign_to_mysql (SHOW_VAR* mysql, wsrep_stats_var* wsrep_var) +{ + mysql->name= wsrep_var->name; + switch (wsrep_var->type) { + case WSREP_VAR_INT64: + mysql->value= (char*) &wsrep_var->value._int64; + mysql->type= SHOW_LONGLONG; + break; + case WSREP_VAR_STRING: + mysql->value= (char*) &wsrep_var->value._string; + mysql->type= SHOW_CHAR_PTR; + break; + case WSREP_VAR_DOUBLE: + mysql->value= (char*) &wsrep_var->value._double; + mysql->type= SHOW_DOUBLE; + break; + } +} +#endif /* UNUSED */ + +#if DYNAMIC +// somehow this mysql status thing works only with statically allocated arrays. +static SHOW_VAR* mysql_status_vars= NULL; +static int mysql_status_len= -1; +#else +static SHOW_VAR mysql_status_vars[512 + 1]; +static const int mysql_status_len= 512; +#endif + +static void export_wsrep_status_to_mysql(THD* thd) +{ + int wsrep_status_len, i; + + thd->wsrep_status_vars= Wsrep_server_state::instance().status(); + + wsrep_status_len= thd->wsrep_status_vars.size(); + +#if DYNAMIC + if (wsrep_status_len != mysql_status_len) { + void* tmp= my_realloc(key_memory_WSREP, + mysql_status_vars, + (wsrep_status_len + 1) * sizeof(SHOW_VAR), + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) { + + sql_print_error ("Out of memory for wsrep status variables." + "Number of variables: %d", wsrep_status_len); + return; + } + + mysql_status_len= wsrep_status_len; + mysql_status_vars= (SHOW_VAR*)tmp; + } + /* @TODO: fix this: */ +#else + if (mysql_status_len < wsrep_status_len) wsrep_status_len= mysql_status_len; +#endif + + for (i= 0; i < wsrep_status_len; i++) + { + mysql_status_vars[i].name= (char*)thd->wsrep_status_vars[i].name().c_str(); + mysql_status_vars[i].value= (char*)thd->wsrep_status_vars[i].value().c_str(); + mysql_status_vars[i].type= SHOW_CHAR; + } + + mysql_status_vars[wsrep_status_len].name = NullS; + mysql_status_vars[wsrep_status_len].value = NullS; + mysql_status_vars[wsrep_status_len].type = SHOW_LONG; +} + +int wsrep_show_status (THD *thd, SHOW_VAR *var, void *, + system_status_var *, enum_var_type) +{ + /* Note that we should allow show status like 'wsrep%' even + when WSREP(thd) is false. */ + if (WSREP_ON) + { + export_wsrep_status_to_mysql(thd); + var->type= SHOW_ARRAY; + var->value= (char *) &mysql_status_vars; + } + else + { + var->type= SHOW_CHAR; + var->value= (char*) "0"; + } + return 0; +} + +void wsrep_free_status (THD* thd) +{ + thd->wsrep_status_vars.clear(); +} + +void wsrep_free_status_vars() +{ +#if DYNAMIC + my_free(mysql_status_vars); + mysql_status_vars= NULL; + mysql_status_len= 0; +#endif +} + +bool wsrep_gtid_domain_id_update(sys_var* self, THD *thd, enum_var_type) +{ + WSREP_DEBUG("wsrep_gtid_domain_id_update: %llu", + wsrep_gtid_domain_id); + wsrep_gtid_server.domain_id= wsrep_gtid_domain_id; + return false; +} + diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h new file mode 100644 index 00000000..4939b097 --- /dev/null +++ b/sql/wsrep_var.h @@ -0,0 +1,120 @@ +/* Copyright (C) 2013-2021 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#ifndef WSREP_VAR_H +#define WSREP_VAR_H + +#include + +#ifdef WITH_WSREP + +#define WSREP_CLUSTER_NAME "my_wsrep_cluster" +#define WSREP_NODE_INCOMING_AUTO "AUTO" +#define WSREP_START_POSITION_ZERO "00000000-0000-0000-0000-000000000000:-1" +#define WSREP_START_POSITION_ZERO_GTID "00000000-0000-0000-0000-000000000000:-1,0-0-0" + +// MySQL variables funcs + +#include "sql_priv.h" +#include +#include + +class sys_var; +class set_var; +class THD; + +int wsrep_init_vars(); +void wsrep_set_wsrep_on(THD *thd); +void wsrep_free_status_vars(); + +#define CHECK_ARGS (sys_var *self, THD* thd, set_var *var) +#define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type) +#define DEFAULT_ARGS (THD* thd, enum_var_type var_type) +#define INIT_ARGS (const char* opt) + +extern bool wsrep_causal_reads_update UPDATE_ARGS; +extern bool wsrep_on_check CHECK_ARGS; +extern bool wsrep_on_update UPDATE_ARGS; +extern bool wsrep_sync_wait_update UPDATE_ARGS; +extern bool wsrep_start_position_check CHECK_ARGS; +extern bool wsrep_start_position_update UPDATE_ARGS; +extern bool wsrep_start_position_init INIT_ARGS; + +extern bool wsrep_provider_check CHECK_ARGS; +extern bool wsrep_provider_update UPDATE_ARGS; +extern void wsrep_provider_init INIT_ARGS; + +extern bool wsrep_provider_options_check CHECK_ARGS; +extern bool wsrep_provider_options_update UPDATE_ARGS; +extern void wsrep_provider_options_init INIT_ARGS; + +extern bool wsrep_cluster_address_check CHECK_ARGS; +extern bool wsrep_cluster_address_update UPDATE_ARGS; +extern void wsrep_cluster_address_init INIT_ARGS; + +extern bool wsrep_cluster_name_check CHECK_ARGS; +extern bool wsrep_cluster_name_update UPDATE_ARGS; + +extern bool wsrep_node_name_check CHECK_ARGS; +extern bool wsrep_node_name_update UPDATE_ARGS; + +extern bool wsrep_node_address_check CHECK_ARGS; +extern bool wsrep_node_address_update UPDATE_ARGS; +extern void wsrep_node_address_init INIT_ARGS; + +extern bool wsrep_sst_method_check CHECK_ARGS; +extern bool wsrep_sst_method_update UPDATE_ARGS; +extern void wsrep_sst_method_init INIT_ARGS; + +extern bool wsrep_sst_receive_address_check CHECK_ARGS; +extern bool wsrep_sst_receive_address_update UPDATE_ARGS; + +extern bool wsrep_sst_auth_check CHECK_ARGS; +extern bool wsrep_sst_auth_update UPDATE_ARGS; + +extern bool wsrep_sst_donor_check CHECK_ARGS; +extern bool wsrep_sst_donor_update UPDATE_ARGS; + +extern bool wsrep_slave_threads_check CHECK_ARGS; +extern bool wsrep_slave_threads_update UPDATE_ARGS; + +extern bool wsrep_desync_check CHECK_ARGS; +extern bool wsrep_desync_update UPDATE_ARGS; + +extern bool wsrep_trx_fragment_size_check CHECK_ARGS; +extern bool wsrep_trx_fragment_size_update UPDATE_ARGS; + +extern bool wsrep_trx_fragment_unit_update UPDATE_ARGS; + +extern bool wsrep_max_ws_size_check CHECK_ARGS; +extern bool wsrep_max_ws_size_update UPDATE_ARGS; + +extern bool wsrep_reject_queries_update UPDATE_ARGS; + +extern bool wsrep_debug_update UPDATE_ARGS; + +extern bool wsrep_gtid_seq_no_check CHECK_ARGS; + +extern bool wsrep_gtid_domain_id_update UPDATE_ARGS; + +extern bool wsrep_mode_check CHECK_ARGS; +#else /* WITH_WSREP */ + +#define wsrep_provider_init(X) +#define wsrep_init_vars() (0) +#define wsrep_start_position_init(X) + +#endif /* WITH_WSREP */ +#endif /* WSREP_VAR_H */ diff --git a/sql/wsrep_xid.cc b/sql/wsrep_xid.cc new file mode 100644 index 00000000..34eafe9c --- /dev/null +++ b/sql/wsrep_xid.cc @@ -0,0 +1,254 @@ +/* Copyright 2015 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ + +//! @file some utility functions and classes not directly related to replication + +#include "mariadb.h" +#include "wsrep_xid.h" +#include "sql_class.h" +#include "wsrep_mysqld.h" // for logging macros + +#include + +#include /* std::sort() */ +/* + * WSREPXid + */ + +#define WSREP_XID_PREFIX "WSREPXi" +#define WSREP_XID_PREFIX_LEN 7 +#define WSREP_XID_VERSION_OFFSET WSREP_XID_PREFIX_LEN +#define WSREP_XID_VERSION_1 'd' +#define WSREP_XID_VERSION_2 'e' +#define WSREP_XID_VERSION_3 'f' +#define WSREP_XID_UUID_OFFSET 8 +#define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t)) +#define WSREP_XID_GTRID_LEN_V_1_2 (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t)) +#define WSREP_XID_RPL_GTID_OFFSET (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t)) +#define WSREP_XID_GTRID_LEN_V_3 (WSREP_XID_RPL_GTID_OFFSET + sizeof(wsrep_server_gtid_t)) + +void wsrep_xid_init(XID* xid, const wsrep::gtid& wsgtid, const wsrep_server_gtid_t& gtid) +{ + xid->formatID= 1; + xid->gtrid_length= WSREP_XID_GTRID_LEN_V_3; + xid->bqual_length= 0; + memset(xid->data, 0, sizeof(xid->data)); + memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN); + xid->data[WSREP_XID_VERSION_OFFSET]= WSREP_XID_VERSION_3; + memcpy(xid->data + WSREP_XID_UUID_OFFSET, wsgtid.id().data(),sizeof(wsrep::id)); + int8store(xid->data + WSREP_XID_SEQNO_OFFSET, wsgtid.seqno().get()); + memcpy(xid->data + WSREP_XID_RPL_GTID_OFFSET, >id, sizeof(wsrep_server_gtid_t)); +} + +extern "C" +int wsrep_is_wsrep_xid(const void* xid_ptr) +{ + const XID* xid= static_cast(xid_ptr); + return (xid->formatID == 1 && + xid->bqual_length == 0 && + xid->gtrid_length >= static_cast(WSREP_XID_GTRID_LEN_V_1_2) && + !memcmp(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN) && + (((xid->data[WSREP_XID_VERSION_OFFSET] == WSREP_XID_VERSION_1 || + xid->data[WSREP_XID_VERSION_OFFSET] == WSREP_XID_VERSION_2) && + xid->gtrid_length == WSREP_XID_GTRID_LEN_V_1_2) || + (xid->data[WSREP_XID_VERSION_OFFSET] == WSREP_XID_VERSION_3 && + xid->gtrid_length == WSREP_XID_GTRID_LEN_V_3))); +} + +const unsigned char* wsrep_xid_uuid(const xid_t* xid) +{ + DBUG_ASSERT(xid); + static wsrep::id const undefined; + if (wsrep_is_wsrep_xid(xid)) + return reinterpret_cast + (xid->data + WSREP_XID_UUID_OFFSET); + else + return static_cast(wsrep::id::undefined().data()); +} + +const wsrep::id& wsrep_xid_uuid(const XID& xid) +{ + compile_time_assert(sizeof(wsrep::id) == sizeof(wsrep_uuid_t)); + return *reinterpret_cast(wsrep_xid_uuid(&xid)); +} + +long long wsrep_xid_seqno(const xid_t* xid) +{ + DBUG_ASSERT(xid); + long long ret= wsrep::seqno::undefined().get(); + if (wsrep_is_wsrep_xid(xid)) + { + switch (xid->data[WSREP_XID_VERSION_OFFSET]) + { + case WSREP_XID_VERSION_1: + memcpy(&ret, xid->data + WSREP_XID_SEQNO_OFFSET, sizeof ret); + break; + case WSREP_XID_VERSION_2: + case WSREP_XID_VERSION_3: + ret= sint8korr(xid->data + WSREP_XID_SEQNO_OFFSET); + break; + default: + break; + } + } + return ret; +} + +wsrep::seqno wsrep_xid_seqno(const XID& xid) +{ + return wsrep::seqno(wsrep_xid_seqno(&xid)); +} + +static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= static_cast(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + + if (hton->set_checkpoint) + { + const unsigned char* uuid= wsrep_xid_uuid(xid); + char uuid_str[40]= {0, }; + wsrep_uuid_print((const wsrep_uuid_t*)uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(xid)); + hton->set_checkpoint(hton, xid); + } + return FALSE; +} + +bool wsrep_set_SE_checkpoint(XID& xid) +{ + return plugin_foreach(NULL, set_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, + &xid); +} + +bool wsrep_set_SE_checkpoint(const wsrep::gtid& wsgtid, const wsrep_server_gtid_t& gtid) +{ + XID xid; + wsrep_xid_init(&xid, wsgtid, gtid); + return wsrep_set_SE_checkpoint(xid); +} + +static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= reinterpret_cast(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + + if (hton->get_checkpoint) + { + hton->get_checkpoint(hton, xid); + wsrep_uuid_t uuid; + memcpy(&uuid, wsrep_xid_uuid(xid), sizeof(uuid)); + char uuid_str[40]= {0, }; + wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(xid)); + } + return FALSE; +} + +bool wsrep_get_SE_checkpoint(XID& xid) +{ + return plugin_foreach(NULL, get_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, + &xid); +} + +static bool wsrep_get_SE_checkpoint_common(XID& xid) +{ + xid.null(); + + if (wsrep_get_SE_checkpoint(xid)) + { + return FALSE; + } + + if (xid.is_null()) + { + return FALSE; + } + + if (!wsrep_is_wsrep_xid(&xid)) + { + WSREP_WARN("Read non-wsrep XID from storage engines."); + return FALSE; + } + + return TRUE; +} + +template<> +wsrep::gtid wsrep_get_SE_checkpoint() +{ + XID xid; + + if (!wsrep_get_SE_checkpoint_common(xid)) + { + return wsrep::gtid(); + } + + return wsrep::gtid(wsrep_xid_uuid(xid),wsrep_xid_seqno(xid)); +} + +template<> +wsrep_server_gtid_t wsrep_get_SE_checkpoint() +{ + XID xid; + wsrep_server_gtid_t gtid= {0,0,0}; + + if (!wsrep_get_SE_checkpoint_common(xid)) + { + return gtid; + } + + if (xid.data[WSREP_XID_VERSION_OFFSET] == WSREP_XID_VERSION_3) + { + memcpy(>id, &xid.data[WSREP_XID_RPL_GTID_OFFSET], sizeof(wsrep_server_gtid_t)); + } + + return gtid; +} + +/* + Sort order for XIDs. Wsrep XIDs are sorted according to + seqno in ascending order. Non-wsrep XIDs are considered + equal among themselves and greater than with respect + to wsrep XIDs. + */ +struct Wsrep_xid_cmp +{ + bool operator()(const XID& left, const XID& right) const + { + const bool left_is_wsrep= wsrep_is_wsrep_xid(&left); + const bool right_is_wsrep= wsrep_is_wsrep_xid(&right); + if (left_is_wsrep && right_is_wsrep) + { + return (wsrep_xid_seqno(&left) < wsrep_xid_seqno(&right)); + } + else if (left_is_wsrep) + { + return true; + } + else + { + return false; + } + } +}; + +void wsrep_sort_xid_array(XID *array, int len) +{ + std::sort(array, array + len, Wsrep_xid_cmp()); +} diff --git a/sql/wsrep_xid.h b/sql/wsrep_xid.h new file mode 100644 index 00000000..45ba6ffe --- /dev/null +++ b/sql/wsrep_xid.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2015 Codership Oy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */ + +#ifndef WSREP_XID_H +#define WSREP_XID_H + +#include + +#ifdef WITH_WSREP + +#include "wsrep_mysqld.h" +#include "wsrep/gtid.hpp" +#include "handler.h" // XID typedef + +void wsrep_xid_init(xid_t*, const wsrep::gtid&, const wsrep_server_gtid_t&); +const wsrep::id& wsrep_xid_uuid(const XID&); +wsrep::seqno wsrep_xid_seqno(const XID&); + +template T wsrep_get_SE_checkpoint(); +bool wsrep_set_SE_checkpoint(const wsrep::gtid& gtid, const wsrep_server_gtid_t&); +//void wsrep_get_SE_checkpoint(XID&); /* uncomment if needed */ +//void wsrep_set_SE_checkpoint(XID&); /* uncomment if needed */ + +void wsrep_sort_xid_array(XID *array, int len); + +#endif /* WITH_WSREP */ +#endif /* WSREP_UTILS_H */ diff --git a/sql/xa.cc b/sql/xa.cc new file mode 100644 index 00000000..49be7fb5 --- /dev/null +++ b/sql/xa.cc @@ -0,0 +1,1171 @@ +/* + Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ + +#include "mariadb.h" +#include "sql_class.h" +#include "transaction.h" +#include "my_cpu.h" +#include +#include + +static bool slave_applier_reset_xa_trans(THD *thd); + +/*************************************************************************** + Handling of XA id caching +***************************************************************************/ +struct XID_cache_insert_element +{ + enum xa_states xa_state; + XID *xid; + XID_cache_element *xid_cache_element; + + XID_cache_insert_element(enum xa_states xa_state_arg, XID *xid_arg): + xa_state(xa_state_arg), xid(xid_arg) {} +}; + + +class XID_cache_element +{ + /* + m_state is used to prevent elements from being deleted while XA RECOVER + iterates xid cache and to prevent recovered elments from being acquired by + multiple threads. + + bits 1..29 are reference counter + bit 30 is RECOVERED flag + bit 31 is ACQUIRED flag (thread owns this xid) + bit 32 is unused + + Newly allocated and deleted elements have m_state set to 0. + + On lock() m_state is atomically incremented. It also creates load-ACQUIRE + memory barrier to make sure m_state is actually updated before furhter + memory accesses. Attempting to lock an element that has neither ACQUIRED + nor RECOVERED flag set returns failure and further accesses to element + memory are forbidden. + + On unlock() m_state is decremented. It also creates store-RELEASE memory + barrier to make sure m_state is actually updated after preceding memory + accesses. + + ACQUIRED flag is set when thread registers it's xid or when thread acquires + recovered xid. + + RECOVERED flag is set for elements found during crash recovery. + + ACQUIRED and RECOVERED flags are cleared before element is deleted from + hash in a spin loop, after last reference is released. + */ + std::atomic m_state; +public: + static const int32 ACQUIRED= 1 << 30; + static const int32 RECOVERED= 1 << 29; + /* Error reported by the Resource Manager (RM) to the Transaction Manager. */ + uint rm_error; + enum xa_states xa_state; + XID xid; + bool is_set(int32_t flag) + { return m_state.load(std::memory_order_relaxed) & flag; } + void set(int32_t flag) + { + DBUG_ASSERT(!is_set(ACQUIRED | RECOVERED)); + m_state.fetch_add(flag, std::memory_order_relaxed); + } + bool lock() + { + int32_t old= m_state.fetch_add(1, std::memory_order_acquire); + if (old & (ACQUIRED | RECOVERED)) + return true; + unlock(); + return false; + } + void unlock() + { m_state.fetch_sub(1, std::memory_order_release); } + void mark_uninitialized() + { + int32_t old= ACQUIRED; + while (!m_state.compare_exchange_weak(old, 0, + std::memory_order_relaxed, + std::memory_order_relaxed)) + { + old&= ACQUIRED | RECOVERED; + (void) LF_BACKOFF(); + } + } + void acquired_to_recovered() + { + m_state.fetch_or(RECOVERED, std::memory_order_relaxed); + m_state.fetch_and(~ACQUIRED, std::memory_order_release); + } + bool acquire_recovered() + { + int32_t old= RECOVERED; + while (!m_state.compare_exchange_weak(old, ACQUIRED | RECOVERED, + std::memory_order_acquire, + std::memory_order_relaxed)) + { + if (!(old & RECOVERED) || (old & ACQUIRED)) + return false; + old= RECOVERED; + (void) LF_BACKOFF(); + } + return true; + } + static void lf_hash_initializer(LF_HASH *hash __attribute__((unused)), + XID_cache_element *element, + XID_cache_insert_element *new_element) + { + DBUG_ASSERT(!element->is_set(ACQUIRED | RECOVERED)); + element->rm_error= 0; + element->xa_state= new_element->xa_state; + element->xid.set(new_element->xid); + new_element->xid_cache_element= element; + } + static void lf_alloc_constructor(uchar *ptr) + { + XID_cache_element *element= (XID_cache_element*) (ptr + LF_HASH_OVERHEAD); + element->m_state= 0; + } + static void lf_alloc_destructor(uchar *ptr) + { + DBUG_ASSERT(!reinterpret_cast(ptr + LF_HASH_OVERHEAD) + ->is_set(ACQUIRED)); + } + static uchar *key(const XID_cache_element *element, size_t *length, + my_bool not_used __attribute__((unused))) + { + *length= element->xid.key_length(); + return element->xid.key(); + } +}; + + +static LF_HASH xid_cache; +static bool xid_cache_inited; + + +enum xa_states XID_STATE::get_state_code() const +{ + return xid_cache_element ? xid_cache_element->xa_state : XA_NO_STATE; +} + + +bool THD::fix_xid_hash_pins() +{ + if (!xid_hash_pins) + xid_hash_pins= lf_hash_get_pins(&xid_cache); + return !xid_hash_pins; +} + + +void XID_STATE::set_error(uint error) +{ + if (is_explicit_XA()) + xid_cache_element->rm_error= error; +} + + +void XID_STATE::er_xaer_rmfail() const +{ + static const char *xa_state_names[]= + { "ACTIVE", "IDLE", "PREPARED", "ROLLBACK ONLY", "NON-EXISTING"}; + my_error(ER_XAER_RMFAIL, MYF(0), xa_state_names[get_state_code()]); +} + + +/** + Check that XA transaction has an uncommitted work. Report an error + to the user in case when there is an uncommitted work for XA transaction. + + @return result of check + @retval false XA transaction is NOT in state IDLE, PREPARED + or ROLLBACK_ONLY. + @retval true XA transaction is in state IDLE or PREPARED + or ROLLBACK_ONLY. +*/ + +bool XID_STATE::check_has_uncommitted_xa() const +{ + if (is_explicit_XA() && xid_cache_element->xa_state != XA_ACTIVE) + { + er_xaer_rmfail(); + return true; + } + return false; +} + + +XID *XID_STATE::get_xid() const +{ + DBUG_ASSERT(is_explicit_XA()); + return &xid_cache_element->xid; +} + + +void xid_cache_init() +{ + xid_cache_inited= true; + lf_hash_init(&xid_cache, sizeof(XID_cache_element), LF_HASH_UNIQUE, 0, 0, + (my_hash_get_key) XID_cache_element::key, &my_charset_bin); + xid_cache.alloc.constructor= XID_cache_element::lf_alloc_constructor; + xid_cache.alloc.destructor= XID_cache_element::lf_alloc_destructor; + xid_cache.initializer= + (lf_hash_initializer) XID_cache_element::lf_hash_initializer; +} + + +void xid_cache_free() +{ + if (xid_cache_inited) + { + lf_hash_destroy(&xid_cache); + xid_cache_inited= false; + } +} + + +/** + Find recovered XA transaction by XID. +*/ + +static XID_cache_element *xid_cache_search(THD *thd, XID *xid) +{ + DBUG_ASSERT(thd->xid_hash_pins); + XID_cache_element *element= + (XID_cache_element*) lf_hash_search(&xid_cache, thd->xid_hash_pins, + xid->key(), xid->key_length()); + if (element) + { + /* The element can be removed from lf_hash by other thread, but + element->acquire_recovered() will return false in this case. */ + if (!element->acquire_recovered()) + element= 0; + lf_hash_search_unpin(thd->xid_hash_pins); + /* Once the element is acquired (i.e. got the ACQUIRED bit) by this thread, + only this thread can delete it. The deletion happens in xid_cache_delete(). + See also the XID_cache_element documentation. */ + DEBUG_SYNC(thd, "xa_after_search"); + } + return element; +} + + +bool xid_cache_insert(XID *xid) +{ + XID_cache_insert_element new_element(XA_PREPARED, xid); + LF_PINS *pins; + + if (!(pins= lf_hash_get_pins(&xid_cache))) + return true; + + int res= lf_hash_insert(&xid_cache, pins, &new_element); + switch (res) + { + case 0: + new_element.xid_cache_element->set(XID_cache_element::RECOVERED); + break; + case 1: + res= 0; + } + lf_hash_put_pins(pins); + return res; +} + + +bool xid_cache_insert(THD *thd, XID_STATE *xid_state, XID *xid) +{ + XID_cache_insert_element new_element(XA_ACTIVE, xid); + + if (thd->fix_xid_hash_pins()) + return true; + + int res= lf_hash_insert(&xid_cache, thd->xid_hash_pins, &new_element); + switch (res) + { + case 0: + xid_state->xid_cache_element= new_element.xid_cache_element; + xid_state->xid_cache_element->set(XID_cache_element::ACQUIRED); + break; + case 1: + my_error(ER_XAER_DUPID, MYF(0)); + } + return res; +} + + +static void xid_cache_delete(THD *thd, XID_cache_element *&element) +{ + DBUG_ASSERT(thd->xid_hash_pins); + element->mark_uninitialized(); + lf_hash_delete(&xid_cache, thd->xid_hash_pins, + element->xid.key(), element->xid.key_length()); +} + + +void xid_cache_delete(THD *thd, XID_STATE *xid_state) +{ + DBUG_ASSERT(xid_state->is_explicit_XA()); + xid_cache_delete(thd, xid_state->xid_cache_element); + xid_state->xid_cache_element= 0; +} + + +struct xid_cache_iterate_arg +{ + my_hash_walk_action action; + void *argument; +}; + +static my_bool xid_cache_iterate_callback(XID_cache_element *element, + xid_cache_iterate_arg *arg) +{ + my_bool res= FALSE; + if (element->lock()) + { + res= arg->action(element, arg->argument); + element->unlock(); + } + return res; +} + +static int xid_cache_iterate(THD *thd, my_hash_walk_action action, void *arg) +{ + xid_cache_iterate_arg argument= { action, arg }; + return thd->fix_xid_hash_pins() ? -1 : + lf_hash_iterate(&xid_cache, thd->xid_hash_pins, + (my_hash_walk_action) xid_cache_iterate_callback, + &argument); +} + + +/** + Mark a XA transaction as rollback-only if the RM unilaterally + rolled back the transaction branch. + + @note If a rollback was requested by the RM, this function sets + the appropriate rollback error code and transits the state + to XA_ROLLBACK_ONLY. + + @return TRUE if transaction was rolled back or if the transaction + state is XA_ROLLBACK_ONLY. FALSE otherwise. +*/ +static bool xa_trans_rolled_back(XID_cache_element *element) +{ + if (element->rm_error) + { + switch (element->rm_error) { + case ER_LOCK_WAIT_TIMEOUT: + my_error(ER_XA_RBTIMEOUT, MYF(0)); + break; + case ER_LOCK_DEADLOCK: + my_error(ER_XA_RBDEADLOCK, MYF(0)); + break; + default: + my_error(ER_XA_RBROLLBACK, MYF(0)); + } + element->xa_state= XA_ROLLBACK_ONLY; + } + + return element->xa_state == XA_ROLLBACK_ONLY; +} + + +/** + Rollback the active XA transaction. + + @return TRUE if the rollback failed, FALSE otherwise. +*/ + +bool xa_trans_force_rollback(THD *thd) +{ + bool rc= false; + + if (ha_rollback_trans(thd, true)) + { + my_error(ER_XAER_RMERR, MYF(0)); + rc= true; + } + thd->variables.option_bits&= + ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX | OPTION_GTID_BEGIN); + thd->transaction->all.reset(); + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + xid_cache_delete(thd, &thd->transaction->xid_state); + + trans_track_end_trx(thd); + thd->mdl_context.release_transactional_locks(thd); + + return rc; +} + + +/** + Starts an XA transaction with the given xid value. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_xa_start(THD *thd) +{ + DBUG_ENTER("trans_xa_start"); + + if (thd->transaction->xid_state.is_explicit_XA() && + thd->transaction->xid_state.xid_cache_element->xa_state == XA_IDLE && + thd->lex->xa_opt == XA_RESUME) + { + bool not_equal= + !thd->transaction->xid_state.xid_cache_element->xid.eq(thd->lex->xid); + if (not_equal) + my_error(ER_XAER_NOTA, MYF(0)); + else + { + thd->transaction->xid_state.xid_cache_element->xa_state= XA_ACTIVE; + MYSQL_SET_TRANSACTION_XA_STATE(thd->m_transaction_psi, XA_ACTIVE); + } + DBUG_RETURN(not_equal); + } + + /* TODO: JOIN is not supported yet. */ + if (thd->lex->xa_opt != XA_NONE) + my_error(ER_XAER_INVAL, MYF(0)); + else if (!thd->lex->xid->gtrid_length) + my_error(ER_XAER_INVAL, MYF(0)); + else if (thd->transaction->xid_state.is_explicit_XA()) + thd->transaction->xid_state.er_xaer_rmfail(); + else if (thd->locked_tables_mode || thd->in_active_multi_stmt_transaction()) + my_error(ER_XAER_OUTSIDE, MYF(0)); + else if (!trans_begin(thd)) + { + MYSQL_SET_TRANSACTION_XID(thd->m_transaction_psi, thd->lex->xid, XA_ACTIVE); + if (xid_cache_insert(thd, &thd->transaction->xid_state, thd->lex->xid)) + { + trans_rollback(thd); + DBUG_RETURN(true); + } + DBUG_RETURN(FALSE); + } + + DBUG_RETURN(TRUE); +} + + +/** + Put a XA transaction in the IDLE state. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_xa_end(THD *thd) +{ + DBUG_ENTER("trans_xa_end"); + + /* TODO: SUSPEND and FOR MIGRATE are not supported yet. */ + if (thd->lex->xa_opt != XA_NONE) + my_error(ER_XAER_INVAL, MYF(0)); + else if (!thd->transaction->xid_state.is_explicit_XA() || + thd->transaction->xid_state.xid_cache_element->xa_state != XA_ACTIVE) + thd->transaction->xid_state.er_xaer_rmfail(); + else if (!thd->transaction->xid_state.xid_cache_element->xid.eq(thd->lex->xid)) + my_error(ER_XAER_NOTA, MYF(0)); + else if (!xa_trans_rolled_back(thd->transaction->xid_state.xid_cache_element)) + { + thd->transaction->xid_state.xid_cache_element->xa_state= XA_IDLE; + MYSQL_SET_TRANSACTION_XA_STATE(thd->m_transaction_psi, XA_IDLE); + } + + DBUG_RETURN(thd->is_error() || + thd->transaction->xid_state.xid_cache_element->xa_state != XA_IDLE); +} + + +/** + Put a XA transaction in the PREPARED state. + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_xa_prepare(THD *thd) +{ + int res= 1; + + DBUG_ENTER("trans_xa_prepare"); + + if (!thd->transaction->xid_state.is_explicit_XA() || + thd->transaction->xid_state.xid_cache_element->xa_state != XA_IDLE) + thd->transaction->xid_state.er_xaer_rmfail(); + else if (!thd->transaction->xid_state.xid_cache_element->xid.eq(thd->lex->xid)) + my_error(ER_XAER_NOTA, MYF(0)); + else + { + /* + Acquire metadata lock which will ensure that COMMIT is blocked + by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in + progress blocks FTWRL). + + We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. + */ + MDL_request mdl_request; + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_STATEMENT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout) || + ha_prepare(thd)) + { + if (!mdl_request.ticket) + ha_rollback_trans(thd, TRUE); + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX); + thd->transaction->all.reset(); + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + xid_cache_delete(thd, &thd->transaction->xid_state); + my_error(ER_XA_RBROLLBACK, MYF(0)); + } + else + { + thd->transaction->xid_state.xid_cache_element->xa_state= XA_PREPARED; + MYSQL_SET_TRANSACTION_XA_STATE(thd->m_transaction_psi, XA_PREPARED); + res= thd->variables.pseudo_slave_mode || thd->slave_thread ? + slave_applier_reset_xa_trans(thd) : 0; + } + } + + DBUG_RETURN(res); +} + + +/** + Commit and terminate the a XA transaction. + Transactional locks are released if transaction ended + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure + +*/ + +bool trans_xa_commit(THD *thd) +{ + bool res= true; + XID_STATE &xid_state= thd->transaction->xid_state; + + DBUG_ENTER("trans_xa_commit"); + + if (!xid_state.is_explicit_XA() || + !xid_state.xid_cache_element->xid.eq(thd->lex->xid)) + { + if (thd->in_multi_stmt_transaction_mode()) + { + /* + Not allow to commit from inside an not-"native" to xid + ongoing transaction: the commit effect can't be reversed. + */ + my_error(ER_XAER_OUTSIDE, MYF(0)); + DBUG_RETURN(TRUE); + } + if (thd->lex->xa_opt != XA_NONE) + { + /* + Not allow to commit with one phase a prepared xa out of compatibility + with the native commit branch's error out. + */ + my_error(ER_XAER_INVAL, MYF(0)); + DBUG_RETURN(TRUE); + } + if (thd->fix_xid_hash_pins()) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (auto xs= xid_cache_search(thd, thd->lex->xid)) + { + bool xid_deleted= false; + MDL_request mdl_request; + bool rw_trans= (xs->rm_error != ER_XA_RBROLLBACK); + + if (rw_trans && thd->is_read_only_ctx()) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + res= 1; + goto _end_external_xid; + } + + res= xa_trans_rolled_back(xs); + /* + Acquire metadata lock which will ensure that COMMIT is blocked + by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in + progress blocks FTWRL). + + We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. + */ + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + { + /* + We can't rollback an XA transaction on lock failure due to + Innodb redo log and bin log update is involved in rollback. + Return error to user for a retry. + */ + DBUG_ASSERT(thd->is_error()); + + res= true; + goto _end_external_xid; + } + else + { + thd->backup_commit_lock= &mdl_request; + } + DBUG_ASSERT(!xid_state.xid_cache_element); + + xid_state.xid_cache_element= xs; + ha_commit_or_rollback_by_xid(thd->lex->xid, !res); + if (!res && thd->is_error()) + { + // hton completion error retains xs/xid in the cache, + // unless there had been already one as reflected by `res`. + res= true; + goto _end_external_xid; + } + xid_cache_delete(thd, xs); + xid_deleted= true; + + _end_external_xid: + xid_state.xid_cache_element= 0; + res= res || thd->is_error(); + if (!xid_deleted) + xs->acquired_to_recovered(); + if (mdl_request.ticket) + { + thd->mdl_context.release_lock(mdl_request.ticket); + thd->backup_commit_lock= 0; + } + } + else + my_error(ER_XAER_NOTA, MYF(0)); + DBUG_RETURN(res); + } + + if (thd->transaction->all.is_trx_read_write() && thd->is_read_only_ctx()) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + DBUG_RETURN(TRUE); + } else if (xa_trans_rolled_back(xid_state.xid_cache_element)) + { + xa_trans_force_rollback(thd); + DBUG_RETURN(thd->is_error()); + } + else if (xid_state.xid_cache_element->xa_state == XA_IDLE && + thd->lex->xa_opt == XA_ONE_PHASE) + { + int r= ha_commit_trans(thd, TRUE); + if ((res= MY_TEST(r))) + my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); + } + else if (thd->transaction->xid_state.xid_cache_element->xa_state == XA_PREPARED) + { + MDL_request mdl_request; + if (thd->lex->xa_opt != XA_NONE) + { + my_error(ER_XAER_INVAL, MYF(0)); + DBUG_RETURN(TRUE); + } + + /* + Acquire metadata lock which will ensure that COMMIT is blocked + by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in + progress blocks FTWRL). + + We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. + */ + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_TRANSACTION); + + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + { + /* + We can't rollback an XA transaction on lock failure due to + Innodb redo log and bin log update is involved in rollback. + Return error to user for a retry. + */ + my_error(ER_XAER_RMERR, MYF(0)); + DBUG_RETURN(true); + } + else + { + DEBUG_SYNC(thd, "trans_xa_commit_after_acquire_commit_lock"); + + res= MY_TEST(ha_commit_one_phase(thd, 1)); + if (res) + my_error(ER_XAER_RMERR, MYF(0)); + else + { + /* + Since we don't call ha_commit_trans() for prepared transactions, + we need to explicitly mark the transaction as committed. + */ + MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi); + } + + thd->m_transaction_psi= NULL; + } + } + else + { + xid_state.er_xaer_rmfail(); + DBUG_RETURN(TRUE); + } + + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX); + thd->transaction->all.reset(); + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + xid_cache_delete(thd, &xid_state); + + trans_track_end_trx(thd); + thd->mdl_context.release_transactional_locks(thd); + + /* The transaction should be marked as complete in P_S. */ + DBUG_ASSERT(thd->m_transaction_psi == NULL || res); + DBUG_RETURN(res); +} + + +/** + Roll back and terminate a XA transaction. + Transactional locks are released if transaction ended + + @param thd Current thread + + @retval FALSE Success + @retval TRUE Failure +*/ + +bool trans_xa_rollback(THD *thd) +{ + XID_STATE &xid_state= thd->transaction->xid_state; + + DBUG_ENTER("trans_xa_rollback"); + + if (!xid_state.is_explicit_XA() || + !xid_state.xid_cache_element->xid.eq(thd->lex->xid)) + { + if (thd->in_multi_stmt_transaction_mode()) + { + my_error(ER_XAER_OUTSIDE, MYF(0)); + DBUG_RETURN(TRUE); + } + if (thd->fix_xid_hash_pins()) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(TRUE); + } + + if (auto xs= xid_cache_search(thd, thd->lex->xid)) + { + bool res; + bool xid_deleted= false; + MDL_request mdl_request; + bool rw_trans= (xs->rm_error != ER_XA_RBROLLBACK); + + if (rw_trans && thd->is_read_only_ctx()) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + res= 1; + goto _end_external_xid; + } + + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + { + /* + We can't rollback an XA transaction on lock failure due to + Innodb redo log and bin log update is involved in rollback. + Return error to user for a retry. + */ + DBUG_ASSERT(thd->is_error()); + + goto _end_external_xid; + } + else + { + thd->backup_commit_lock= &mdl_request; + } + res= xa_trans_rolled_back(xs); + DBUG_ASSERT(!xid_state.xid_cache_element); + + xid_state.xid_cache_element= xs; + ha_commit_or_rollback_by_xid(thd->lex->xid, 0); + if (!res && thd->is_error()) + { + goto _end_external_xid; + } + xid_cache_delete(thd, xs); + xid_deleted= true; + + _end_external_xid: + xid_state.xid_cache_element= 0; + if (!xid_deleted) + xs->acquired_to_recovered(); + if (mdl_request.ticket) + { + thd->mdl_context.release_lock(mdl_request.ticket); + thd->backup_commit_lock= 0; + } + } + else + my_error(ER_XAER_NOTA, MYF(0)); + DBUG_RETURN(thd->get_stmt_da()->is_error()); + } + + if (thd->transaction->all.is_trx_read_write() && thd->is_read_only_ctx()) + { + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); + DBUG_RETURN(TRUE); + } else if (xid_state.xid_cache_element->xa_state == XA_ACTIVE) + { + xid_state.er_xaer_rmfail(); + DBUG_RETURN(TRUE); + } + + MDL_request mdl_request; + MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_STATEMENT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + { + /* + We can't rollback an XA transaction on lock failure due to + Innodb redo log and bin log update is involved in rollback. + Return error to user for a retry. + */ + my_error(ER_XAER_RMERR, MYF(0)); + DBUG_RETURN(true); + } + + DBUG_RETURN(xa_trans_force_rollback(thd)); +} + + +bool trans_xa_detach(THD *thd) +{ + DBUG_ASSERT(thd->transaction->xid_state.is_explicit_XA()); + + if (thd->transaction->xid_state.xid_cache_element->xa_state != XA_PREPARED) + return xa_trans_force_rollback(thd); + else if (!thd->transaction->all.is_trx_read_write()) + { + thd->transaction->xid_state.set_error(ER_XA_RBROLLBACK); + ha_rollback_trans(thd, true); + } + + thd->transaction->xid_state.xid_cache_element->acquired_to_recovered(); + thd->transaction->xid_state.xid_cache_element= 0; + thd->transaction->cleanup(); + + Ha_trx_info *ha_info, *ha_info_next; + for (ha_info= thd->transaction->all.ha_list; + ha_info; + ha_info= ha_info_next) + { + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + + thd->transaction->all.ha_list= 0; + thd->transaction->all.no_2pc= 0; + thd->m_transaction_psi= 0; + thd->server_status&= ~(SERVER_STATUS_IN_TRANS | + SERVER_STATUS_IN_TRANS_READONLY); + thd->mdl_context.release_transactional_locks(thd); + + return false; +} + + +/** + return the XID as it appears in the SQL function's arguments. + So this string can be passed to XA START, XA PREPARE etc... + + @note + the 'buf' has to have space for at least SQL_XIDSIZE bytes. +*/ + + +/* + 'a'..'z' 'A'..'Z', '0'..'9' + and '-' '_' ' ' symbols don't have to be + converted. +*/ + +static const char xid_needs_conv[128]= +{ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1, + 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1 +}; + +/* + The size of XID string representation in the form + 'gtrid', 'bqual', formatID + see xid_t::get_sql_string() for details. +*/ +#define SQL_XIDSIZE (XIDDATASIZE * 2 + 8 + MY_INT64_NUM_DECIMAL_DIGITS) + +/* The 'buf' has to have space for at least SQL_XIDSIZE bytes. */ +static uint get_sql_xid(XID *xid, char *buf) +{ + int tot_len= xid->gtrid_length + xid->bqual_length; + int i; + const char *orig_buf= buf; + + for (i=0; idata)[i]; + if (c >= 128 || xid_needs_conv[c]) + break; + } + + if (i >= tot_len) + { + /* No need to convert characters to hexadecimals. */ + *buf++= '\''; + memcpy(buf, xid->data, xid->gtrid_length); + buf+= xid->gtrid_length; + *buf++= '\''; + if (xid->bqual_length > 0 || xid->formatID != 1) + { + *buf++= ','; + *buf++= '\''; + memcpy(buf, xid->data+xid->gtrid_length, xid->bqual_length); + buf+= xid->bqual_length; + *buf++= '\''; + } + } + else + { + *buf++= 'X'; + *buf++= '\''; + for (i= 0; i < xid->gtrid_length; i++) + { + *buf++=_dig_vec_lower[((uchar*) xid->data)[i] >> 4]; + *buf++=_dig_vec_lower[((uchar*) xid->data)[i] & 0x0f]; + } + *buf++= '\''; + if (xid->bqual_length > 0 || xid->formatID != 1) + { + *buf++= ','; + *buf++= 'X'; + *buf++= '\''; + for (; i < tot_len; i++) + { + *buf++=_dig_vec_lower[((uchar*) xid->data)[i] >> 4]; + *buf++=_dig_vec_lower[((uchar*) xid->data)[i] & 0x0f]; + } + *buf++= '\''; + } + } + + if (xid->formatID != 1) + { + *buf++= ','; + buf+= my_longlong10_to_str_8bit(&my_charset_bin, buf, + MY_INT64_NUM_DECIMAL_DIGITS, -10, xid->formatID); + } + + return (uint)(buf - orig_buf); +} + + +/** + return the list of XID's to a client, the same way SHOW commands do. + + @note + I didn't find in XA specs that an RM cannot return the same XID twice, + so mysql_xa_recover does not filter XID's to ensure uniqueness. + It can be easily fixed later, if necessary. +*/ + +static my_bool xa_recover_callback(XID_cache_element *xs, Protocol *protocol, + char *data, uint data_len, CHARSET_INFO *data_cs) +{ + if (xs->xa_state == XA_PREPARED) + { + protocol->prepare_for_resend(); + protocol->store_longlong((longlong) xs->xid.formatID, FALSE); + protocol->store_longlong((longlong) xs->xid.gtrid_length, FALSE); + protocol->store_longlong((longlong) xs->xid.bqual_length, FALSE); + protocol->store(data, data_len, data_cs); + if (protocol->write()) + return TRUE; + } + return FALSE; +} + + +static my_bool xa_recover_callback_short(XID_cache_element *xs, + Protocol *protocol) +{ + return xa_recover_callback(xs, protocol, xs->xid.data, + xs->xid.gtrid_length + xs->xid.bqual_length, &my_charset_bin); +} + + +static my_bool xa_recover_callback_verbose(XID_cache_element *xs, + Protocol *protocol) +{ + char buf[SQL_XIDSIZE]; + uint len= get_sql_xid(&xs->xid, buf); + return xa_recover_callback(xs, protocol, buf, len, + &my_charset_utf8mb3_general_ci); +} + + +/** + Collect field names of result set that will be sent to a client in result of + handling XA RECOVER statement. + + @param thd Thread data object + @param[out] fields List of fields whose metadata should be collected for + sending to client +*/ + +void xa_recover_get_fields(THD *thd, List *field_list, + my_hash_walk_action *action) +{ + MEM_ROOT *mem_root= thd->mem_root; + + field_list->push_back(new (mem_root) + Item_int(thd, "formatID", 0, + MY_INT32_NUM_DECIMAL_DIGITS), mem_root); + field_list->push_back(new (mem_root) + Item_int(thd, "gtrid_length", 0, + MY_INT32_NUM_DECIMAL_DIGITS), mem_root); + field_list->push_back(new (mem_root) + Item_int(thd, "bqual_length", 0, + MY_INT32_NUM_DECIMAL_DIGITS), mem_root); + { + uint len; + CHARSET_INFO *cs; + + if (thd->lex->verbose) + { + len= SQL_XIDSIZE; + cs= &my_charset_utf8mb3_general_ci; + if (action) + *action= (my_hash_walk_action) xa_recover_callback_verbose; + } + else + { + len= XIDDATASIZE; + cs= &my_charset_bin; + if (action) + *action= (my_hash_walk_action) xa_recover_callback_short; + } + + field_list->push_back(new (mem_root) + Item_empty_string(thd, "data", len, cs), mem_root); + } +} + +bool mysql_xa_recover(THD *thd) +{ + List field_list; + Protocol *protocol= thd->protocol; + my_hash_walk_action action; + DBUG_ENTER("mysql_xa_recover"); + + xa_recover_get_fields(thd, &field_list, &action); + + if (protocol->send_result_set_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(1); + + if (xid_cache_iterate(thd, action, protocol)) + DBUG_RETURN(1); + my_eof(thd); + DBUG_RETURN(0); +} + + +/** + This is a specific to (pseudo-) slave applier collection of standard cleanup + actions to reset XA transaction state sim to @c ha_commit_one_phase. + THD of the slave applier is dissociated from a transaction object in engine + that continues to exist there. + + @param THD current thread + @return the value of is_error() +*/ + +static bool slave_applier_reset_xa_trans(THD *thd) +{ + thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_BINLOG_THIS_TRX); + thd->server_status&= + ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); + DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); + + if (thd->variables.pseudo_slave_mode && + !thd->transaction->all.is_trx_read_write()) + { + thd->transaction->xid_state.set_error(ER_XA_RBROLLBACK); + } + thd->transaction->xid_state.xid_cache_element->acquired_to_recovered(); + thd->transaction->xid_state.xid_cache_element= 0; + + for (Ha_trx_info *ha_info= thd->transaction->all.ha_list, *ha_info_next; + ha_info; ha_info= ha_info_next) + { + ha_info_next= ha_info->next(); + ha_info->reset(); + } + thd->transaction->all.ha_list= 0; + + ha_close_connection(thd); + thd->transaction->cleanup(); + thd->transaction->all.reset(); + + DBUG_ASSERT(!thd->transaction->all.ha_list); + DBUG_ASSERT(!thd->transaction->all.no_2pc); + + thd->has_waiter= false; + MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi); // TODO/Fixme: commit? + thd->m_transaction_psi= NULL; + return thd->is_error(); +} diff --git a/sql/xa.h b/sql/xa.h new file mode 100644 index 00000000..a9b06e42 --- /dev/null +++ b/sql/xa.h @@ -0,0 +1,59 @@ +#ifndef XA_INCLUDED +#define XA_INCLUDED +/* + Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ + +class XID_cache_element; +enum xa_states +{ + XA_ACTIVE= 0, + XA_IDLE, + XA_PREPARED, + XA_ROLLBACK_ONLY, + XA_NO_STATE +}; + +struct XID_STATE { + XID_cache_element *xid_cache_element; + + bool check_has_uncommitted_xa() const; + bool is_explicit_XA() const { return xid_cache_element != 0; } + void set_error(uint error); + void er_xaer_rmfail() const; + XID *get_xid() const; + enum xa_states get_state_code() const; +}; + +void xid_cache_init(void); +void xid_cache_free(void); +bool xid_cache_insert(XID *xid); +bool xid_cache_insert(THD *thd, XID_STATE *xid_state, XID *xid); +void xid_cache_delete(THD *thd, XID_STATE *xid_state); + +bool trans_xa_start(THD *thd); +bool trans_xa_end(THD *thd); +bool trans_xa_prepare(THD *thd); +bool trans_xa_commit(THD *thd); +bool trans_xa_rollback(THD *thd); +bool trans_xa_detach(THD *thd); +bool mysql_xa_recover(THD *thd); + +void xa_recover_get_fields(THD *thd, List *field_list, + my_hash_walk_action *action); + +#endif /* XA_INCLUDED */ -- cgit v1.2.3